NVIDIA OptiX 7.1 API nvidia_logo_transpbg.gif Up
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
optix_7_device_impl_transformations.h
Go to the documentation of this file.
1 /*
2 * Copyright (c) 2020 NVIDIA Corporation. All rights reserved.
3 *
4 * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
5 * rights in and to this software, related documentation and any modifications thereto.
6 * Any use, reproduction, disclosure or distribution of this software and related
7 * documentation without an express license agreement from NVIDIA Corporation is strictly
8 * prohibited.
9 *
10 * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
11 * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
12 * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
13 * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
14 * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
15 * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
16 * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
17 * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
18 * SUCH DAMAGES
19 */
20 
29 #if !defined( __OPTIX_INCLUDE_INTERNAL_HEADERS__ )
30 #error("optix_7_device_impl_transformations.h is an internal header file and must not be used directly. Please use optix_device.h or optix.h instead.")
31 #endif
32 
33 #ifndef __optix_optix_7_device_impl_transformations_h__
34 #define __optix_optix_7_device_impl_transformations_h__
35 
36 namespace optix_impl {
37 
38 static __forceinline__ __device__ float4 optixAddFloat4( const float4& a, const float4& b )
39 {
40  return make_float4( a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w );
41 }
42 
43 static __forceinline__ __device__ float4 optixMulFloat4( const float4& a, float b )
44 {
45  return make_float4( a.x * b, a.y * b, a.z * b, a.w * b );
46 }
47 
48 static __forceinline__ __device__ uint4 optixLdg( unsigned long long addr )
49 {
50  const uint4* ptr;
51  asm volatile( "cvta.to.global.u64 %0, %1;" : "=l"( ptr ) : "l"( addr ) );
52  uint4 ret;
53  asm volatile( "ld.global.v4.u32 {%0,%1,%2,%3}, [%4];"
54  : "=r"( ret.x ), "=r"( ret.y ), "=r"( ret.z ), "=r"( ret.w )
55  : "l"( ptr ) );
56  return ret;
57 }
58 
59 template <class T>
60 static __forceinline__ __device__ T optixLoadReadOnlyAlign16( const T* ptr )
61 {
62  T v;
63  for( int ofs = 0; ofs < sizeof( T ); ofs += 16 )
64  *(uint4*)( (char*)&v + ofs ) = optixLdg( (unsigned long long)( (char*)ptr + ofs ) );
65  return v;
66 }
67 
68 // Multiplies the row vector vec with the 3x4 matrix with rows m0, m1, and m2
69 static __forceinline__ __device__ float4 optixMultiplyRowMatrix( const float4 vec, const float4 m0, const float4 m1, const float4 m2 )
70 {
71  float4 result;
72 
73  result.x = vec.x * m0.x + vec.y * m1.x + vec.z * m2.x;
74  result.y = vec.x * m0.y + vec.y * m1.y + vec.z * m2.y;
75  result.z = vec.x * m0.z + vec.y * m1.z + vec.z * m2.z;
76  result.w = vec.x * m0.w + vec.y * m1.w + vec.z * m2.w + vec.w;
77 
78  return result;
79 }
80 
81 // Converts the SRT transformation srt into a 3x4 matrix with rows m0, m1, and m2
82 static __forceinline__ __device__ void optixGetMatrixFromSrt( float4& m0, float4& m1, float4& m2, const OptixSRTData& srt )
83 {
84  const float4 q = {srt.qx, srt.qy, srt.qz, srt.qw};
85 
86  // normalize
87  const float inv_sql = 1.f / ( srt.qx * srt.qx + srt.qy * srt.qy + srt.qz * srt.qz + srt.qw * srt.qw );
88  const float4 nq = optixMulFloat4( q, inv_sql );
89 
90  const float sqw = q.w * nq.w;
91  const float sqx = q.x * nq.x;
92  const float sqy = q.y * nq.y;
93  const float sqz = q.z * nq.z;
94 
95  const float xy = q.x * nq.y;
96  const float zw = q.z * nq.w;
97  const float xz = q.x * nq.z;
98  const float yw = q.y * nq.w;
99  const float yz = q.y * nq.z;
100  const float xw = q.x * nq.w;
101 
102  m0.x = ( sqx - sqy - sqz + sqw );
103  m0.y = 2.0f * ( xy - zw );
104  m0.z = 2.0f * ( xz + yw );
105 
106  m1.x = 2.0f * ( xy + zw );
107  m1.y = ( -sqx + sqy - sqz + sqw );
108  m1.z = 2.0f * ( yz - xw );
109 
110  m2.x = 2.0f * ( xz - yw );
111  m2.y = 2.0f * ( yz + xw );
112  m2.z = ( -sqx - sqy + sqz + sqw );
113 
114  m0.w = m0.x * srt.pvx + m0.y * srt.pvy + m0.z * srt.pvz + srt.tx;
115  m1.w = m1.x * srt.pvx + m1.y * srt.pvy + m1.z * srt.pvz + srt.ty;
116  m2.w = m2.x * srt.pvx + m2.y * srt.pvy + m2.z * srt.pvz + srt.tz;
117 
118  m0.z = m0.x * srt.b + m0.y * srt.c + m0.z * srt.sz;
119  m1.z = m1.x * srt.b + m1.y * srt.c + m1.z * srt.sz;
120  m2.z = m2.x * srt.b + m2.y * srt.c + m2.z * srt.sz;
121 
122  m0.y = m0.x * srt.a + m0.y * srt.sy;
123  m1.y = m1.x * srt.a + m1.y * srt.sy;
124  m2.y = m2.x * srt.a + m2.y * srt.sy;
125 
126  m0.x = m0.x * srt.sx;
127  m1.x = m1.x * srt.sx;
128  m2.x = m2.x * srt.sx;
129 }
130 
131 // Inverts a 3x4 matrix in place
132 static __forceinline__ __device__ void optixInvertMatrix( float4& m0, float4& m1, float4& m2 )
133 {
134  const float det3 =
135  m0.x * ( m1.y * m2.z - m1.z * m2.y ) - m0.y * ( m1.x * m2.z - m1.z * m2.x ) + m0.z * ( m1.x * m2.y - m1.y * m2.x );
136 
137  const float inv_det3 = 1.0f / det3;
138 
139  float inv3[3][3];
140  inv3[0][0] = inv_det3 * ( m1.y * m2.z - m2.y * m1.z );
141  inv3[0][1] = inv_det3 * ( m0.z * m2.y - m2.z * m0.y );
142  inv3[0][2] = inv_det3 * ( m0.y * m1.z - m1.y * m0.z );
143 
144  inv3[1][0] = inv_det3 * ( m1.z * m2.x - m2.z * m1.x );
145  inv3[1][1] = inv_det3 * ( m0.x * m2.z - m2.x * m0.z );
146  inv3[1][2] = inv_det3 * ( m0.z * m1.x - m1.z * m0.x );
147 
148  inv3[2][0] = inv_det3 * ( m1.x * m2.y - m2.x * m1.y );
149  inv3[2][1] = inv_det3 * ( m0.y * m2.x - m2.y * m0.x );
150  inv3[2][2] = inv_det3 * ( m0.x * m1.y - m1.x * m0.y );
151 
152  const float b[3] = {m0.w, m1.w, m2.w};
153 
154  m0.x = inv3[0][0];
155  m0.y = inv3[0][1];
156  m0.z = inv3[0][2];
157  m0.w = -inv3[0][0] * b[0] - inv3[0][1] * b[1] - inv3[0][2] * b[2];
158 
159  m1.x = inv3[1][0];
160  m1.y = inv3[1][1];
161  m1.z = inv3[1][2];
162  m1.w = -inv3[1][0] * b[0] - inv3[1][1] * b[1] - inv3[1][2] * b[2];
163 
164  m2.x = inv3[2][0];
165  m2.y = inv3[2][1];
166  m2.z = inv3[2][2];
167  m2.w = -inv3[2][0] * b[0] - inv3[2][1] * b[1] - inv3[2][2] * b[2];
168 }
169 
170 static __forceinline__ __device__ void optixLoadInterpolatedMatrixKey( float4& m0, float4& m1, float4& m2, const float4* matrix, const float t1 )
171 {
172  m0 = optixLoadReadOnlyAlign16( &matrix[0] );
173  m1 = optixLoadReadOnlyAlign16( &matrix[1] );
174  m2 = optixLoadReadOnlyAlign16( &matrix[2] );
175 
176  // The conditional prevents concurrent loads leading to spills
177  if( t1 > 0.0f )
178  {
179  const float t0 = 1.0f - t1;
180  m0 = optixAddFloat4( optixMulFloat4( m0, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &matrix[3] ), t1 ) );
181  m1 = optixAddFloat4( optixMulFloat4( m1, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &matrix[4] ), t1 ) );
182  m2 = optixAddFloat4( optixMulFloat4( m2, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &matrix[5] ), t1 ) );
183  }
184 }
185 
186 static __forceinline__ __device__ void optixLoadInterpolatedSrtKey( float4& srt0,
187  float4& srt1,
188  float4& srt2,
189  float4& srt3,
190  const float4* srt,
191  const float t1 )
192 {
193  srt0 = optixLoadReadOnlyAlign16( &srt[0] );
194  srt1 = optixLoadReadOnlyAlign16( &srt[1] );
195  srt2 = optixLoadReadOnlyAlign16( &srt[2] );
196  srt3 = optixLoadReadOnlyAlign16( &srt[3] );
197 
198  // The conditional prevents concurrent loads leading to spills
199  if( t1 > 0.0f )
200  {
201  const float t0 = 1.0f - t1;
202  srt0 = optixAddFloat4( optixMulFloat4( srt0, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &srt[4] ), t1 ) );
203  srt1 = optixAddFloat4( optixMulFloat4( srt1, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &srt[5] ), t1 ) );
204  srt2 = optixAddFloat4( optixMulFloat4( srt2, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &srt[6] ), t1 ) );
205  srt3 = optixAddFloat4( optixMulFloat4( srt3, t0 ), optixMulFloat4( optixLoadReadOnlyAlign16( &srt[7] ), t1 ) );
206 
207  float inv_length = 1.f / sqrt( srt2.y * srt2.y + srt2.z * srt2.z + srt2.w * srt2.w + srt3.x * srt3.x );
208  srt2.y *= inv_length;
209  srt2.z *= inv_length;
210  srt2.w *= inv_length;
211  srt3.x *= inv_length;
212  }
213 }
214 
215 static __forceinline__ __device__ void optixResolveMotionKey( float& localt, int& key, const OptixMotionOptions& options, const float globalt )
216 {
217  const float timeBegin = options.timeBegin;
218  const float timeEnd = options.timeEnd;
219  const float numIntervals = (float)( options.numKeys - 1 );
220 
221  // No need to check the motion flags. If data originates from a valid transform list handle, then globalt is in
222  // range, or vanish flags are not set.
223 
224  const float time = max( 0.f, min( numIntervals, ( globalt - timeBegin ) * numIntervals / ( timeEnd - timeBegin ) ) );
225  const float fltKey = floorf( time );
226 
227  localt = time - fltKey;
228  key = (int)fltKey;
229 }
230 
231 // Returns the interpolated transformation matrix for a particular matrix motion transformation and point in time.
232 static __forceinline__ __device__ void optixGetInterpolatedTransformation( float4& trf0,
233  float4& trf1,
234  float4& trf2,
235  const OptixMatrixMotionTransform* transformData,
236  const float time )
237 {
238  // Compute key and intra key time
239  float keyTime;
240  int key;
241  optixResolveMotionKey( keyTime, key, optixLoadReadOnlyAlign16( transformData ).motionOptions, time );
242 
243  // Get pointer to left key
244  const float4* transform = (const float4*)( &transformData->transform[key][0] );
245 
246  // Load and interpolate matrix keys
247  optixLoadInterpolatedMatrixKey( trf0, trf1, trf2, transform, keyTime );
248 }
249 
250 // Returns the interpolated transformation matrix for a particular SRT motion transformation and point in time.
251 static __forceinline__ __device__ void optixGetInterpolatedTransformation( float4& trf0,
252  float4& trf1,
253  float4& trf2,
254  const OptixSRTMotionTransform* transformData,
255  const float time )
256 {
257  // Compute key and intra key time
258  float keyTime;
259  int key;
260  optixResolveMotionKey( keyTime, key, optixLoadReadOnlyAlign16( transformData ).motionOptions, time );
261 
262  // Get pointer to left key
263  const float4* dataPtr = reinterpret_cast<const float4*>( &transformData->srtData[key] );
264 
265  // Load and interpolated SRT keys
266  float4 data[4];
267  optixLoadInterpolatedSrtKey( data[0], data[1], data[2], data[3], dataPtr, keyTime );
268 
269  OptixSRTData srt = {data[0].x, data[0].y, data[0].z, data[0].w, data[1].x, data[1].y, data[1].z, data[1].w,
270  data[2].x, data[2].y, data[2].z, data[2].w, data[3].x, data[3].y, data[3].z, data[3].w};
271 
272  // Convert SRT into a matrix
273  optixGetMatrixFromSrt( trf0, trf1, trf2, srt );
274 }
275 
276 // Returns the interpolated transformation matrix for a particular traversable handle and point in time.
277 static __forceinline__ __device__ void optixGetInterpolatedTransformationFromHandle( float4& trf0,
278  float4& trf1,
279  float4& trf2,
280  const OptixTraversableHandle handle,
281  const float time,
282  const bool objectToWorld )
283 {
285 
287  {
289  {
291  optixGetInterpolatedTransformation( trf0, trf1, trf2, transformData, time );
292  }
293  else
294  {
295  const OptixSRTMotionTransform* transformData = optixGetSRTMotionTransformFromHandle( handle );
296  optixGetInterpolatedTransformation( trf0, trf1, trf2, transformData, time );
297  }
298 
299  if( !objectToWorld )
300  optixInvertMatrix( trf0, trf1, trf2 );
301  }
303  {
304  const float4* transform;
305 
306  if( type == OPTIX_TRANSFORM_TYPE_INSTANCE )
307  {
308  transform = ( objectToWorld ) ? optixGetInstanceTransformFromHandle( handle ) :
310  }
311  else
312  {
313  const OptixStaticTransform* traversable = optixGetStaticTransformFromHandle( handle );
314  transform = (const float4*)( ( objectToWorld ) ? traversable->transform : traversable->invTransform );
315  }
316 
317  trf0 = optixLoadReadOnlyAlign16( &transform[0] );
318  trf1 = optixLoadReadOnlyAlign16( &transform[1] );
319  trf2 = optixLoadReadOnlyAlign16( &transform[2] );
320  }
321  else
322  {
323  trf0 = {1.0f, 0.0f, 0.0f, 0.0f};
324  trf1 = {0.0f, 1.0f, 0.0f, 0.0f};
325  trf2 = {0.0f, 0.0f, 1.0f, 0.0f};
326  }
327 }
328 
329 // Returns the world-to-object transformation matrix resulting from the current transform stack and current ray time.
330 static __forceinline__ __device__ void optixGetWorldToObjectTransformMatrix( float4& m0, float4& m1, float4& m2 )
331 {
332  const unsigned int size = optixGetTransformListSize();
333  const float time = optixGetRayTime();
334 
335 #pragma unroll 1
336  for( unsigned int i = 0; i < size; ++i )
337  {
339 
340  float4 trf0, trf1, trf2;
341  optixGetInterpolatedTransformationFromHandle( trf0, trf1, trf2, handle, time, /*objectToWorld*/ false );
342 
343  if( i == 0 )
344  {
345  m0 = trf0;
346  m1 = trf1;
347  m2 = trf2;
348  }
349  else
350  {
351  // m := trf * m
352  float4 tmp0 = m0, tmp1 = m1, tmp2 = m2;
353  m0 = optixMultiplyRowMatrix( trf0, tmp0, tmp1, tmp2 );
354  m1 = optixMultiplyRowMatrix( trf1, tmp0, tmp1, tmp2 );
355  m2 = optixMultiplyRowMatrix( trf2, tmp0, tmp1, tmp2 );
356  }
357  }
358 }
359 
360 // Returns the object-to-world transformation matrix resulting from the current transform stack and current ray time.
361 static __forceinline__ __device__ void optixGetObjectToWorldTransformMatrix( float4& m0, float4& m1, float4& m2 )
362 {
363  const int size = optixGetTransformListSize();
364  const float time = optixGetRayTime();
365 
366 #pragma unroll 1
367  for( int i = size - 1; i >= 0; --i )
368  {
370 
371  float4 trf0, trf1, trf2;
372  optixGetInterpolatedTransformationFromHandle( trf0, trf1, trf2, handle, time, /*objectToWorld*/ true );
373 
374  if( i == size - 1 )
375  {
376  m0 = trf0;
377  m1 = trf1;
378  m2 = trf2;
379  }
380  else
381  {
382  // m := trf * m
383  float4 tmp0 = m0, tmp1 = m1, tmp2 = m2;
384  m0 = optixMultiplyRowMatrix( trf0, tmp0, tmp1, tmp2 );
385  m1 = optixMultiplyRowMatrix( trf1, tmp0, tmp1, tmp2 );
386  m2 = optixMultiplyRowMatrix( trf2, tmp0, tmp1, tmp2 );
387  }
388  }
389 }
390 
391 // Multiplies the 3x4 matrix with rows m0, m1, m2 with the point p.
392 static __forceinline__ __device__ float3 optixTransformPoint( const float4& m0, const float4& m1, const float4& m2, const float3& p )
393 {
394  float3 result;
395  result.x = m0.x * p.x + m0.y * p.y + m0.z * p.z + m0.w;
396  result.y = m1.x * p.x + m1.y * p.y + m1.z * p.z + m1.w;
397  result.z = m2.x * p.x + m2.y * p.y + m2.z * p.z + m2.w;
398  return result;
399 }
400 
401 // Multiplies the 3x3 linear submatrix of the 3x4 matrix with rows m0, m1, m2 with the vector v.
402 static __forceinline__ __device__ float3 optixTransformVector( const float4& m0, const float4& m1, const float4& m2, const float3& v )
403 {
404  float3 result;
405  result.x = m0.x * v.x + m0.y * v.y + m0.z * v.z;
406  result.y = m1.x * v.x + m1.y * v.y + m1.z * v.z;
407  result.z = m2.x * v.x + m2.y * v.y + m2.z * v.z;
408  return result;
409 }
410 
411 // Multiplies the transpose of the 3x3 linear submatrix of the 3x4 matrix with rows m0, m1, m2 with the normal n.
412 // Note that the given matrix is supposed to be the inverse of the actual transformation matrix.
413 static __forceinline__ __device__ float3 optixTransformNormal( const float4& m0, const float4& m1, const float4& m2, const float3& n )
414 {
415  float3 result;
416  result.x = m0.x * n.x + m1.x * n.y + m2.x * n.z;
417  result.y = m0.y * n.x + m1.y * n.y + m2.y * n.z;
418  result.z = m0.z * n.x + m1.z * n.y + m2.z * n.z;
419  return result;
420 }
421 
422 } // namespace optix_impl
423 
424 #endif
425