Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1081 lines
46 KiB

  1. #include "platform.h"
  2. #include "box_buoyancy.h"
  3. #include "mathlib/vector4d.h"
  4. #include "hardware_clock_fast.h"
  5. inline const Vector ToVector( const fltx4 & f4 )
  6. {
  7. return Vector( SubFloat( f4, 0 ), SubFloat( f4, 1 ), SubFloat( f4, 2 ) );
  8. }
  9. #ifdef _X360
  10. FORCEINLINE fltx4 PermYXZW( const fltx4 & a )
  11. {
  12. return __vpermwi( a, 0x4B ); // 01001011b
  13. }
  14. FORCEINLINE fltx4 PermXZYW( const fltx4 & a )
  15. {
  16. return __vpermwi( a, 0x27 ); // 00100111b
  17. }
  18. FORCEINLINE fltx4 PermZYXW( const fltx4 & a )
  19. {
  20. return __vpermwi( a, 0x93 ); // 10010011b
  21. }
  22. FORCEINLINE fltx4 PermXXYW( const fltx4 & a )
  23. {
  24. return __vpermwi( a, 0x07 ); // 00000111b
  25. }
  26. FORCEINLINE fltx4 PermYZZW( const fltx4 & a )
  27. {
  28. return __vpermwi( a, 0x6B ); // 01101011b
  29. }
  30. FORCEINLINE fltx4 Sum3SIMD( const fltx4 &a )
  31. {
  32. return __vmsum3fp( a, Four_Ones );
  33. }
  34. FORCEINLINE fltx4 CombineSIMD( const fltx4 & x, const fltx4 & y, const fltx4 & z, const fltx4 & w )
  35. {
  36. fltx4 r0 = __vmrghw(x, z);
  37. fltx4 r1 = __vmrghw(y, w);
  38. return __vmrghw(r0, r1);
  39. }
  40. // Assumes Y(xbox),Z(PC) are splatted
  41. FORCEINLINE fltx4 CombineXYZ_Special( const fltx4 & x, const fltx4 & y, const fltx4 & z )
  42. {
  43. fltx4 r0 = __vmrghw(x, z);
  44. return __vmrghw(r0, y);
  45. }
  46. #elif defined( _PS3 )
  47. const int32 ALIGN16 g_SIMD_YXZW[4] ALIGN16_POST = { 0x04050607, 0x00010203, 0x08090A0B, 0x0C0D0E0F };
  48. const int32 ALIGN16 g_SIMD_XZYW[4] ALIGN16_POST = { 0x00010203, 0x08090A0B, 0x04050607, 0x0C0D0E0F };
  49. const int32 ALIGN16 g_SIMD_ZYXW[4] ALIGN16_POST = { 0x08090A0B, 0x04050607, 0x00010203, 0x0C0D0E0F };
  50. const int32 ALIGN16 g_SIMD_XXYW[4] ALIGN16_POST = { 0x00010203, 0x00010203, 0x04050607, 0x0C0D0E0F };
  51. const int32 ALIGN16 g_SIMD_YZZW[4] ALIGN16_POST = { 0x04050607, 0x08090A0B, 0x08090A0B, 0x0C0D0E0F };
  52. FORCEINLINE fltx4 PermYXZW( const fltx4 & a )
  53. {
  54. return vec_perm( a, a, (vec_uchar16)LoadAlignedIntSIMD( g_SIMD_YXZW ) );
  55. }
  56. FORCEINLINE fltx4 PermXZYW( const fltx4 & a )
  57. {
  58. return vec_perm( a, a, (vec_uchar16)LoadAlignedIntSIMD( g_SIMD_XZYW ) );
  59. }
  60. FORCEINLINE fltx4 PermZYXW( const fltx4 & a )
  61. {
  62. return vec_perm( a, a, (vec_uchar16)LoadAlignedIntSIMD( g_SIMD_ZYXW ) );
  63. }
  64. FORCEINLINE fltx4 PermXXYW( const fltx4 & a )
  65. {
  66. return vec_perm( a, a, (vec_uchar16)LoadAlignedIntSIMD( g_SIMD_XXYW ) );
  67. }
  68. FORCEINLINE fltx4 PermYZZW( const fltx4 & a )
  69. {
  70. return vec_perm( a, a, (vec_uchar16)LoadAlignedIntSIMD( g_SIMD_YZZW ) );
  71. }
  72. FORCEINLINE fltx4 Sum3SIMD( const fltx4 &a )
  73. {
  74. return SplatXSIMD( a ) + SplatYSIMD( a ) + SplatZSIMD( a );
  75. }
  76. const int32 ALIGN16 g_SIMD_XAXA[4] ALIGN16_POST = { 0x00010203, 0x10111213, 0x00010203, 0x10111213 };
  77. const int32 ALIGN16 g_SIMD_XYAB[4] ALIGN16_POST = { 0x00010203, 0x10111213, 0x00010203, 0x10111213 };
  78. FORCEINLINE fltx4 CombineSIMD( const fltx4 & x, const fltx4 & y, const fltx4 & z, const fltx4 & w )
  79. {
  80. //fltx4 xy = vec_perm(x, y, LoadAlignedIntSIMD( g_SIMD_XAXA ) );
  81. //fltx4 zw = vec_perm(z, w, LoadAlignedIntSIMD( g_SIMD_XAXA ) );
  82. fltx4 xzxz = vec_mergeh(x, z);
  83. fltx4 ywyw = vec_mergeh(y, w);
  84. return vec_mergeh(xzxz, ywyw);
  85. }
  86. // Assumes Y(xbox),Z(PC) are splatted
  87. FORCEINLINE fltx4 CombineXYZ_Special( const fltx4 & x, const fltx4 & y, const fltx4 & z )
  88. {
  89. fltx4 r0 = vec_mergeh(x, z);
  90. return vec_mergeh(r0, y);
  91. }
  92. #else
  93. FORCEINLINE fltx4 PermYXZW( const fltx4 & a )
  94. {
  95. return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 2, 0, 1 ) );
  96. }
  97. FORCEINLINE fltx4 PermXZYW( const fltx4 & a )
  98. {
  99. return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 1, 2, 0 ) );
  100. }
  101. FORCEINLINE fltx4 PermZYXW( const fltx4 & a )
  102. {
  103. return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 0, 1, 2 ) );
  104. }
  105. FORCEINLINE fltx4 PermXXYW( const fltx4 & a )
  106. {
  107. return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 1, 0, 0 ) );
  108. }
  109. FORCEINLINE fltx4 PermYZZW( const fltx4 & a )
  110. {
  111. return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 2, 2, 1 ) );
  112. }
  113. FORCEINLINE fltx4 Sum3SIMD( const fltx4 &a )
  114. {
  115. return SplatXSIMD( a ) + SplatYSIMD( a ) + SplatZSIMD( a );
  116. }
  117. FORCEINLINE fltx4 CombineSIMD( const fltx4 & row0, const fltx4 & row1, const fltx4 & row2, const fltx4 & row3 )
  118. {
  119. fltx4 tmp0 = _mm_shuffle_ps( row0, row1, 0x44);
  120. fltx4 tmp1 = _mm_shuffle_ps( row2, row3, 0x44);
  121. return _mm_shuffle_ps(tmp0, tmp1, 0x88);
  122. }
  123. // Assumes Y(xbox),Z(PC) are splatted
  124. FORCEINLINE fltx4 CombineXYZ_Special( const fltx4 & x, const fltx4 & y, const fltx4 & z )
  125. {
  126. fltx4 tmp0 = _mm_shuffle_ps( x, y, 0x44);
  127. return _mm_shuffle_ps(tmp0, z, 0x88);
  128. }
  129. #endif
  130. fltx4 GetBoxBuoyancy4x3( const fltx4& f4a, const fltx4& f4b, const fltx4&f4c, const fltx4&f4Origin )
  131. {
  132. FourVectors box;
  133. box.LoadAndSwizzle( f4a, f4b, f4c, f4Origin );
  134. return GetBoxBuoyancy3x4( box );
  135. }
  136. void BenchmarkBoxBuoyancy4x3( const fltx4& f4a, const fltx4& f4b, const fltx4&f4c, const fltx4&f4Origin )
  137. {
  138. FourVectors box;
  139. box.LoadAndSwizzle( f4a, f4b, f4c, f4Origin );
  140. fltx4 result = {0, 0, 0, 0};
  141. int start, end;
  142. const int nIterations = 1000000;
  143. start = GetHardwareClockFast();
  144. for ( int i = 0; i < nIterations; ++i )
  145. {
  146. result = result + GetBoxBuoyancy3x4( box );
  147. box.x = AndSIMD( box.x, box.x );
  148. }
  149. end = GetHardwareClockFast();
  150. Msg( "Box Buoyancy 4x3 Benchmark: %d ticks/box, volume %g \n", int32( ( end - start ) ) / nIterations, SubFloat( result, 3 ) / nIterations );
  151. }
  152. /*
  153. inline fltx4 operator - ( const fltx4 & a, const fltx4 & b )
  154. {
  155. return SubSIMD( a, b );
  156. }
  157. inline fltx4 operator + ( const fltx4 & a, const fltx4 & b )
  158. {
  159. return AddSIMD( a, b );
  160. }
  161. inline fltx4 operator * ( const fltx4 & a, const fltx4 & b )
  162. {
  163. return MulSIMD( a, b );
  164. }
  165. */
  166. inline fltx4 Bound( const fltx4 & a, const fltx4 &low, const fltx4 &high )
  167. {
  168. return MinSIMD( MaxSIMD( a, low ), high );
  169. }
  170. inline fltx4 Limit01( const fltx4 & a )
  171. {
  172. return MinSIMD( MaxSIMD( Four_Zeros, a ), Four_Ones );
  173. }
  174. const fltx4 Four_One6th = { 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f };
  175. const fltx4 Four_One4th = { 0.25f, 0.25f, 0.25f, 0.25f };
  176. const fltx4 Four_One12th = { 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f };
  177. // integral A .. 1 of : y (tipZ + (baseZ - tipZ) y) dy
  178. inline fltx4 TriHelperIntegralAto1( const fltx4 &alpha, const fltx4 &tipZ, const fltx4 &baseZ )
  179. {
  180. return MaddSIMD( Four_Thirds, baseZ,
  181. MsubSIMD( alpha * alpha, ( MsubSIMD( Four_Thirds, alpha * ( tipZ - baseZ ), Four_PointFives * tipZ ) ), Four_One6th * tipZ )
  182. );
  183. }
  184. // integral A .. 1 of : y ((tipZ + (baseZ - tipZ) y)^2) dy
  185. inline fltx4 TriHelperZ2IntegralAto1( const fltx4 &alpha, const fltx4 &tipZ, const fltx4 &baseZ )
  186. {
  187. fltx4 alphaSqr = alpha * alpha;
  188. fltx4 alphaMinus1 = alpha - Four_Ones, alphaMinus1Sqr = alphaMinus1 * alphaMinus1;
  189. return Four_One4th*( Four_Ones - alphaSqr ) * ( alphaSqr + Four_Ones ) * baseZ*baseZ + ( Four_One6th + alphaSqr*alpha * ( Four_PointFives * alpha - Four_TwoThirds ) )* baseZ *tipZ - alphaMinus1Sqr * alphaMinus1 * ( Four_One12th + Four_One4th * alpha ) * tipZ * tipZ;
  190. }
  191. // integral A .. 1 of : y (tipZ + (baseZ - tipZ) y) * (tipX + (baseX - tipX) y) dy
  192. inline fltx4 TriHelperZ2IntegralAto1( const fltx4 &alpha, const fltx4 &tipZ, const fltx4 &baseZ, const fltx4 &tipX, const fltx4 &baseX )
  193. {
  194. fltx4 alphaSqr = alpha * alpha;
  195. fltx4 alphaMinus1 = alpha - Four_Ones, alphaMinus1Sqr = alphaMinus1 * alphaMinus1;
  196. return ( alphaMinus1Sqr*tipX*( ( Four_Ones + alpha*( Four_Twos + Four_Threes*alpha ) )*baseZ + tipZ + ( Four_Twos - Four_Threes*alpha )*alpha*tipZ ) +
  197. baseX*( -Four_Threes*( alphaSqr*alphaSqr - Four_Ones )*baseZ + tipZ + alphaSqr*alpha*( Four_Threes*alpha - Four_Fours )*tipZ ) ) * Four_One12th;
  198. }
  199. // integral 0 .. B of : y (tipZ + (baseZ - tipZ) y) dy
  200. inline fltx4 TriHelperIntegral0toB( const fltx4 &beta, const fltx4 &tipZ, const fltx4 &baseZ )
  201. {
  202. return beta * beta * ( MaddSIMD( Four_Thirds, ( baseZ - tipZ ) * beta, Four_PointFives * tipZ ) );
  203. }
  204. /*
  205. double SubDbl( const fltx4& a, int i )
  206. {
  207. return SubFloat( a, i );
  208. }
  209. */
  210. // integral 0 .. B of : y ((tipZ + (baseZ - tipZ) y)^2) dy
  211. inline fltx4 TriHelperZ2Integral0toB( const fltx4 &beta, const fltx4 &tipZ, const fltx4 &baseZ )
  212. {
  213. fltx4 dz = baseZ - tipZ;
  214. fltx4 bdz = beta * dz;
  215. fltx4 f4Integral = beta * beta * ( Four_One4th * bdz * bdz + Four_TwoThirds * bdz * tipZ + Four_PointFives * tipZ * tipZ );
  216. /*
  217. double testIntegral[4];
  218. for ( int i = 0; i < 4; ++i )
  219. {
  220. testIntegral[i] = SubDbl( beta, i ) * SubDbl( beta, i ) * ( SubDbl( Four_One4th, i ) * SubDbl( bdz, i ) * SubDbl( bdz, i ) + SubDbl( Four_TwoThirds, i ) * SubDbl( bdz, i ) * SubDbl( tipZ, i ) + SubDbl( Four_PointFives, i ) * SubDbl( tipZ, i ) * SubDbl( tipZ, i ) );
  221. }
  222. */
  223. return f4Integral;
  224. }
  225. // integral 0 .. B of : y (tipZ + (baseZ - tipZ) y) (tipX + (baseX - tipX) y ) dy
  226. // note: baseX should be the center of the base coordinate
  227. inline fltx4 TriHelperZ2Integral0toB( const fltx4 &beta, const fltx4 &tipZ, const fltx4 &baseZ, const fltx4 &tipX, const fltx4 &baseX )
  228. {
  229. fltx4 dz = baseZ - tipZ, dx = baseX - tipX;
  230. fltx4 betaSqr = beta * beta;
  231. fltx4 f4Integral = betaSqr * ( betaSqr * Four_One4th * dx * dz + Four_PointFives * tipX * tipZ + Four_Thirds * beta * ( baseZ * tipX + ( baseX - Four_Twos * tipX ) * tipZ ) );
  232. return f4Integral;
  233. }
  234. // this is 3 * Integral 0..1 of (z0+(z1-z0)y) dy
  235. inline fltx4 TrplAvgSqrZ( const fltx4& z0, const fltx4 &z1 )
  236. {
  237. return MaddSIMD( z0, z0 + z1, z1 * z1 );
  238. }
  239. inline fltx4 SixAvgSqrZX( const fltx4& z0, const fltx4 &z1, const fltx4& x0, const fltx4 &x1 )
  240. {
  241. return x0 * MaddSIMD( Four_Twos, z0, z1 ) + x1 * MaddSIMD( Four_Twos, z1, z0 ) ;
  242. }
  243. const fltx4 f4Epsilon = {1e-6f, 1e-6f, 1e-6f, 1e-6f};
  244. inline FourVectors Cross( const FourVectors &a, const FourVectors &b )
  245. {
  246. FourVectors ret;
  247. ret.x=MsubSIMD( a.z, b.y, MulSIMD( a.y, b.z ) );
  248. ret.y=MsubSIMD( a.x, b.z, MulSIMD( a.z, b.x ) );
  249. ret.z=MsubSIMD( a.y, b.x, MulSIMD( a.x, b.y ) );
  250. return ret;
  251. }
  252. inline fltx4 CrossZ( const FourVectors &a, const FourVectors &b )
  253. {
  254. return MsubSIMD( a.y, b.x, MulSIMD( a.x, b.y ) );
  255. }
  256. inline fltx4 Sqr( const fltx4 &a )
  257. {
  258. return a * a;
  259. }
  260. inline FourVectors MsubSIMD( const FourVectors &a, const fltx4 &b, const FourVectors &c) // c-a*b
  261. {
  262. FourVectors ret;
  263. ret.x = MsubSIMD(a.x, b, c.x );
  264. ret.y = MsubSIMD(a.y, b, c.y );
  265. ret.z = MsubSIMD(a.z, b, c.z );
  266. return ret;
  267. }
  268. const fltx4 g_f4_11h4 = {1,1,0.5f,4.0f};
  269. const fltx4 g_f4_4424 = {4,4,2,4};
  270. const fltx4 g_f4AlmostInifiniteSlope = {1e+24,1e+24,1e+24,1e+24};
  271. const int32 ALIGN16 g_SIMD_signmask_W[4] ALIGN16_POST = { 0x80000000, 0x80000000, 0x80000000, 0xFFFFFFFF };
  272. const int32 ALIGN16 g_SIMD_signmask_NoW[4] ALIGN16_POST = { 0x80000000, 0x80000000, 0x80000000, 0 };
  273. // physical interpretation: we're integrating the pressure force (pascals) along the submerged surface.
  274. // in other words, we substitute the usual volume integral for surface integral
  275. // Xbox360: 1250 cycles; Core2 Quad: 500 cycles; Core i7: ? cycles ; error: 2e-5
  276. fltx4 GetBoxBuoyancy3x4( const FourVectors &box_in )
  277. {
  278. FourVectors box; // sorted box
  279. // make (a,b,c).z > 0
  280. fltx4 f4SignMask = LoadAlignedSIMD( g_SIMD_signmask );
  281. fltx4 signZ = AndSIMD( box_in.z, f4SignMask );
  282. box.x = XorSIMD( box_in.x, signZ );
  283. box.y = XorSIMD( box_in.y, signZ );
  284. box.z = AndNotSIMD( f4SignMask, box_in.z );
  285. fltx4 boxCenterZ = SplatWSIMD( box_in.z ); // the height of the center of the box above the water level
  286. fltx4 boxCenterXY = AndSIMD( SetYSIMD( SplatWSIMD( box_in.x ), SplatWSIMD( box_in.y ) ), LoadAlignedSIMD( g_SIMD_SkipTailMask[2] ) );
  287. // there are a lot of scheduling holes on this stage, so we might as well precompute something
  288. // high point of the box, a+b+c
  289. fltx4 boxTopX = Sum3SIMD( box.x );
  290. fltx4 boxTopY = Sum3SIMD( box.y );
  291. fltx4 boxTopZrel = Sum3SIMD( box.z );
  292. fltx4 boxTopZabs = boxCenterZ + boxTopZrel, boxBotZ = boxCenterZ - boxTopZrel;
  293. // sort a.z > b.z > c.z > 0; sorting takes 43 cycles on xbox360
  294. bi32x4 swap_a_c = CmpLtSIMD( SplatXSIMD( box.z ), SplatZSIMD( box.z ) );
  295. box.x = MaskedAssign( swap_a_c, PermZYXW( box.x ), box.x );
  296. box.y = MaskedAssign( swap_a_c, PermZYXW( box.y ), box.y );
  297. box.z = MaskedAssign( swap_a_c, PermZYXW( box.z ), box.z );
  298. bi32x4 isBsmaller = CmpLtSIMD( SplatYSIMD( box.z ), box.z );
  299. bi32x4 ordered_a_b = SplatXSIMD( isBsmaller ); // if a > b, they're ordered correctly
  300. box.x = MaskedAssign( ordered_a_b, box.x, PermYXZW( box.x ) );
  301. box.y = MaskedAssign( ordered_a_b, box.y, PermYXZW( box.y ) );
  302. box.z = MaskedAssign( ordered_a_b, box.z, PermYXZW( box.z ) );
  303. bi32x4 swap_b_c = SplatZSIMD( isBsmaller ); // if b < c, we need to swap them
  304. box.x = MaskedAssign( swap_b_c, PermXZYW( box.x ), box.x );
  305. box.y = MaskedAssign( swap_b_c, PermXZYW( box.y ), box.y );
  306. box.z = MaskedAssign( swap_b_c, PermXZYW( box.z ), box.z );
  307. Assert( SubFloat( box.z, 0 ) >= SubFloat( box.z, 1 ) && SubFloat( box.z, 1 ) >= SubFloat( box.z, 2 ) && SubFloat( box.z, 2 ) >= 0 );
  308. // sorted and positive, time to integrate sides: (a,b) (a,c) (b,c)
  309. // (a-b).z > (b-a).z, so the a+b, a-b, b-a, -a-b is the order of corners, top-to-bottom
  310. FourVectors boxA, boxB; // these two represent a and b of each pair of edges defining
  311. boxA.x = PermXXYW( box.x );
  312. boxA.y = PermXXYW( box.y );
  313. boxA.z = PermXXYW( box.z );
  314. boxB.x = PermYZZW( box.x );
  315. boxB.y = PermYZZW( box.y );
  316. boxB.z = PermYZZW( box.z );
  317. FourVectors boxC; // "c" maps to �c,b,a
  318. boxC.x = PermZYXW( box.x );
  319. boxC.y = PermZYXW( box.y );
  320. boxC.z = PermZYXW( box.z );
  321. // if a.z == 0 , b.z is also 0, so the whole rectangle is parallel to z=const
  322. bi32x4 isSideFlat = CmpLtSIMD( boxA.z, f4Epsilon );
  323. fltx4 rcpAz = AndNotSIMD( isSideFlat, ReciprocalSIMD( boxA.z ) );
  324. fltx4 rcp2Az = Four_PointFives * rcpAz;
  325. // the part of quad along a that's in the triangles cut by z=const surfaces
  326. // this is the same regardless of C
  327. //
  328. // tab size must = 4 spaces for the ASCII art below to make sense
  329. //
  330. // * (a+b) cut = 0 a
  331. // /| | ^
  332. // / | | |
  333. // (a-b) *--+ cut = f4CutPart | |
  334. // | | | |
  335. // | | | | > b=
  336. // | | V | /
  337. // +--* (b-a) cut = 1 cut, | /
  338. // | / level, | /
  339. // |/ fraction, | /
  340. // (-a-b) * cut = 1 + f4CutPart etc. |/
  341. //
  342. //
  343. // (a+b)-(a-b) 2b b
  344. // computed as ----------- == -- == -
  345. // (a+b)-(b-a) 2a a
  346. //
  347. fltx4 f4CutPart = MulSIMD( boxB.z, rcpAz ); // this must be between 0 (b is parallel to z=const) and 1 (a and b both have 45' slope)
  348. Assert( IsAllGreaterThanOrEq( Four_Ones + f4Epsilon, SetWToZeroSIMD( f4CutPart ) ) && IsAllGreaterThanOrEq( f4CutPart + f4Epsilon, SetWToZeroSIMD( Four_Zeros ) ) );
  349. //fltx4 rcpCutPart = AndSIMD( ReciprocalSIMD( f4CutPart ), CmpGtSIMD( f4CutPart, f4Epsilon ) );
  350. // integrate the full sides of the box, multiplied by the XY projection areas
  351. fltx4 f4SideProj = fabs( CrossZ( boxA, boxB ) );
  352. // here's the center-of-mass and total volume integral solution:
  353. // {{4/3 (3 x0 z0 + xA zA + xB zB), 4/3 (3 y0 z0 + yA zA + yB zB), 2/3 (3 z0^2 + zA^2 + zB^2), 4 z0},
  354. // {1/24 (4 x0 (3 z0 + zA + zB) + xA (4 z0 + 2 zA + zB) + xB (4 z0 + zA + 2 zB)),
  355. // 1/24 (4 y0 (3 z0 + zA + zB) + yA (4 z0 + 2 zA + zB) + yB (4 z0 + zA + 2 zB)),
  356. // 1/24 (6 z0^2 + zA^2 + zA zB + zB^2 + 4 z0 (zA + zB)),
  357. // 1/6 (3 z0 + zA + zB)}}
  358. //fltx4 f4FullZ0_Cpos = boxCenterZ + boxC.z, f4FullZ0_Cneg = boxCenterZ - boxC.z;
  359. // 4/3 (3 x0 z0 + xA zA + xB zB) type of integral : take x0 z0 + (xA zA + xB zB) / 3
  360. // consider that x0 = � boxC.x and z0 = boxCenterZ � boxC.z, we're left with
  361. // � boxCenter boxC.x + boxC.x boxC.z + (xA zA + xB zB) / 3
  362. // Again, the only part that changes is (� boxCenterZ boxC.x)
  363. fltx4 f4Full_X_common = boxC.x * boxC.z + Four_Thirds * ( boxA.x * boxA.z + boxB.x * boxB.z );
  364. fltx4 f4Full_X_Cpos = Four_Fours * (boxCenterZ * boxC.x + f4Full_X_common);
  365. fltx4 f4Full_X_Cneg = Four_Fours * (f4Full_X_common - boxCenterZ * boxC.x);
  366. // y is the same as x
  367. fltx4 f4Full_Y_common = boxC.y * boxC.z + Four_Thirds * ( boxA.y * boxA.z + boxB.y * boxB.z );
  368. fltx4 f4Full_Y_Cpos = Four_Fours * ( boxCenterZ * boxC.y + f4Full_Y_common ) ;
  369. fltx4 f4Full_Y_Cneg = Four_Fours * ( f4Full_Y_common - boxCenterZ * boxC.y ) ;
  370. // z is different: 2/3 (3 z0^2 + zA^2 + zB^2) ; z0 = boxCenterZ � boxC.z,
  371. // so we can just add the difference of 4 * boxCenterZ * boxC.z to get from Cneg to Cpos
  372. fltx4 f4Full_Z_common = Four_TwoThirds * ( Sqr( boxA.z ) + Sqr( boxB.z ) );
  373. fltx4 f4Full_Z_Cpos = MaddSIMD( Four_Twos, Sqr( boxCenterZ + boxC.z ), f4Full_Z_common );
  374. fltx4 f4Full_Z_Cneg = MaddSIMD( Four_Twos, Sqr( boxCenterZ - boxC.z ), f4Full_Z_common );
  375. fltx4 f4Full_W_Cpos = Four_Fours * ( boxCenterZ + boxC.z ), f4Full_W_Cneg = Four_Fours * ( boxCenterZ - boxC.z );
  376. // this is how we'd compute the center of mass for fully-submerged cube, for validation
  377. #ifdef _DEBUG
  378. fltx4 f4TestVolume = Dot3SIMD( f4Full_W_Cpos - f4Full_W_Cneg, f4SideProj );
  379. fltx4 f4TestSideProjDivVolume = f4SideProj * ReciprocalSIMD( f4TestVolume );
  380. fltx4 f4TestLeverX = Dot3SIMD( f4Full_X_Cpos - f4Full_X_Cneg, f4TestSideProjDivVolume ), f4TestLeverY = Dot3SIMD( f4Full_Y_Cpos - f4Full_Y_Cneg, f4TestSideProjDivVolume );
  381. fltx4 f4TestLeverZ = Dot3SIMD( f4Full_Z_Cpos - f4Full_Z_Cneg, f4TestSideProjDivVolume );
  382. fltx4 f4TestResult = CombineSIMD( f4TestLeverX + SplatWSIMD(box_in.x), f4TestLeverY + SplatWSIMD(box_in.y), f4TestLeverZ, f4TestVolume ); (void)f4TestResult;
  383. #endif
  384. //
  385. //
  386. /////////////////////////////////////////////////////////////////////////////
  387. // Computing Center parallelogram component of the full surface integral
  388. //
  389. // To compute the integral across the submerged part of each of 6 faces, we'll compute these components and then selectively sum them up
  390. // to form the full integral: the top and bottom triangle.
  391. // if the water level is intersecting top triangle ((a-b).z < 0) , we'll subtract top triangle integral from full integral
  392. // if the water level is intersecting bottom triangle ((b-a).z < 0) , we'll select just the bottom triangle integral
  393. // .. and we'll have to compute the middle part because it's not symmetrical ..
  394. // .. on the second thought, we compute the center (parallelogram) , upper tri and lower tri
  395. // for the center computation, we need the point of the middle of the center and m=b-ra parallel to the water
  396. // waterTop is{ 0 = at V0 top; cut = at V1; 1 = at V2; 1+cut = at V3 bottom of the quad }
  397. // waterBot is central-symmetrical, negative
  398. // to find the fraction of right side of rectangle (the +b side) that has z=0
  399. // this is different for +C and -C sides
  400. //
  401. // (a+b) � c + p a+b � c + p
  402. // computed as -------------- == ------------ // note: � is typed by Alt + 0177
  403. // (a+b)-(b-a) 2 a
  404. //
  405. // Warning: I take special care in cases of flat faces (z=const, when rcpAz is undefined)
  406. // in these cases, submerged faces must have water<=0 and faces above water (z>0) must have water >= 1 + cut
  407. // Note: If I take care not to compute fully-submerged or fully-above-water polytopes, I only need to check
  408. // below-water case for Cneg faces and above-water case for Cpos faces
  409. //
  410. // The trick I'm using here to account for everything is perturb the face's slope slightly to effectively divide by epsilon
  411. fltx4 rcp2AzSpecial = MaskedAssign( isSideFlat, g_f4AlmostInifiniteSlope, rcp2Az );
  412. fltx4 f4WaterPart_Cpos = boxTopZabs * rcp2AzSpecial, f4WaterPart_Cneg = MaddSIMD( boxBotZ, rcp2AzSpecial, f4CutPart ) + Four_Ones;
  413. // on the central piece, we need to integrate along axes (a,m = b - cut*a) and ranges {-1+cut...max(-1+cut,1-max(w,cut)) , -1...1}
  414. // even cut and w have the same denominator: it's cut=2b/2a and water=topZ/2a
  415. //fltx4 f4HighLimit_Cpos = MaxSIMD( f4LowLimit, Four_Ones - MaxSIMD( f4WaterPart_Cpos, f4CutPart ) );
  416. //fltx4 f4HighLimit_Cneg = MaxSIMD( f4LowLimit, Four_Ones - MaxSIMD( f4WaterPart_Cneg, f4CutPart ) );
  417. fltx4 f4TopWaterInCenter_Cpos = MinSIMD( Four_Ones, MaxSIMD( f4CutPart, f4WaterPart_Cpos ) );
  418. fltx4 f4TopWaterInCenter_Cneg = MinSIMD( Four_Ones, MaxSIMD( f4CutPart, f4WaterPart_Cneg ) );
  419. // the range is full (1 means full span of the whole center parallelogram)
  420. // but the origin is to be multiplied by A, so 1 means half of the length (-1 means 0 area)
  421. fltx4 f4CenterRange_Cpos = Four_Ones - f4TopWaterInCenter_Cpos, f4CenterOriginA_Cpos = f4CutPart - f4TopWaterInCenter_Cpos;
  422. fltx4 f4CenterRange_Cneg = Four_Ones - f4TopWaterInCenter_Cneg, f4CenterOriginA_Cneg = f4CutPart - f4TopWaterInCenter_Cneg;
  423. // given the span (we're integrating from -span to +span), we can compute the center point for integration: ((r-1) + (1-max(w,r)))/2
  424. // we can also compute the area of projection, because we reduce the area of the face by 1-max(r,w), i.e. by the span
  425. fltx4 f4CenterProj_Cpos = f4SideProj * f4CenterRange_Cpos, f4CenterProj_Cneg = f4SideProj * f4CenterRange_Cneg;
  426. fltx4 f4CenterRangeSqr_Cpos = f4CenterRange_Cpos * f4CenterRange_Cpos;
  427. fltx4 f4CenterRangeSqr_Cneg = f4CenterRange_Cneg * f4CenterRange_Cneg;
  428. // to integrate the central piece, we need the center point (pos�(c-a*q)), q = ; and m=b-cut a
  429. // because it cancels out lots of terms in the integral
  430. FourVectors boxM = MsubSIMD( boxA, f4CutPart, boxB ); // m=b-ra, replacement for b in the integrals
  431. // here's the center-of-mass and total volume integral solution. M is our B in this case.
  432. // {{4/3 (3 x0 z0 + xA zA + xM zM), 4/3 (3 y0 z0 + yA zA + yM zM), 2/3 (3 z0^2 + zA^2 + zM^2), 4 z0},
  433. //
  434. // and for triangles it would be this:
  435. // {1/24 (4 x0 (3 z0 + zA + zM) + xA (4 z0 + 2 zA + zM) + xM (4 z0 + zA + 2 zM)),
  436. // 1/24 (4 y0 (3 z0 + zA + zM) + yA (4 z0 + 2 zA + zM) + yM (4 z0 + zA + 2 zM)),
  437. // 1/24 (6 z0^2 + zA^2 + zA zM + zM^2 + 4 z0 (zA + zM)),
  438. // 1/6 (3 z0 + zA + zM)}}
  439. // ... but we only use the rectangular integral right now
  440. fltx4 f4CenterX0_Cpos = boxC.x + f4CenterOriginA_Cpos * boxA.x, f4CenterX0_Cneg = f4CenterOriginA_Cneg * boxA.x - boxC.x;
  441. fltx4 f4CenterY0_Cpos = boxC.y + f4CenterOriginA_Cpos * boxA.y, f4CenterY0_Cneg = f4CenterOriginA_Cneg * boxA.y - boxC.y;
  442. fltx4 f4CenterZ0_Cpos = boxCenterZ + boxC.z + f4CenterOriginA_Cpos * boxA.z, f4CenterZ0_Cneg = boxCenterZ + f4CenterOriginA_Cneg * boxA.z - boxC.z;
  443. // 4/3 (3 x0 z0 + xA zA + xB zB) type of integral : take x0 z0 + (xA zA + xB zB) / 3
  444. // xA zA + xB zB is the common part
  445. //fltx4 f4Center_X_common = Four_Thirds * (boxA.x * boxA.z + boxM.x * boxM.z );
  446. fltx4 boxMxz = boxM.x * boxM.z, boxAxz = boxA.x * boxA.z;
  447. fltx4 f4Center_X_Cpos = Four_Fours * MaddSIMD( f4CenterX0_Cpos, f4CenterZ0_Cpos, Four_Thirds * MaddSIMD( boxAxz, f4CenterRangeSqr_Cpos, boxMxz ) );
  448. fltx4 f4Center_X_Cneg = Four_Fours * MaddSIMD( f4CenterX0_Cneg, f4CenterZ0_Cneg, Four_Thirds * MaddSIMD( boxAxz, f4CenterRangeSqr_Cneg, boxMxz ) );
  449. // y is the same as x
  450. //fltx4 f4Center_Y_common = Four_Thirds * (boxA.y * boxA.z + boxM.y * boxM.z );
  451. fltx4 boxMyz = boxM.y * boxM.z, boxAyz = boxA.y * boxA.z;
  452. fltx4 f4Center_Y_Cpos = Four_Fours * MaddSIMD( f4CenterY0_Cpos, f4CenterZ0_Cpos, Four_Thirds * MaddSIMD(boxAyz, f4CenterRangeSqr_Cpos, boxMyz ) );
  453. fltx4 f4Center_Y_Cneg = Four_Fours * MaddSIMD( f4CenterY0_Cneg, f4CenterZ0_Cneg, Four_Thirds * MaddSIMD(boxAyz, f4CenterRangeSqr_Cneg, boxMyz ) );
  454. // z is a bit different: 2/3 (3 z0^2 + zA^2 + zB^2)
  455. // so we can just add the difference of 4 * boxCenterZ * boxC.z to get from Cneg to Cpos
  456. //fltx4 f4Center_Z_common = Four_TwoThirds * ( Sqr( boxA.z ) + Sqr( boxM.z ) );
  457. fltx4 boxMzz = boxM.z * boxM.z, boxAzz = boxA.z * boxA.z;
  458. fltx4 f4Center_Z_Cpos = Four_Twos * MaddSIMD( f4CenterZ0_Cpos, f4CenterZ0_Cpos, Four_Thirds * MaddSIMD( boxAzz, f4CenterRangeSqr_Cpos, boxMzz ) );
  459. fltx4 f4Center_Z_Cneg = Four_Twos * MaddSIMD( f4CenterZ0_Cneg, f4CenterZ0_Cneg, Four_Thirds * MaddSIMD( boxAzz, f4CenterRangeSqr_Cneg, boxMzz ) );
  460. fltx4 f4Center_W_Cpos = Four_Fours * f4CenterZ0_Cpos, f4Center_W_Cneg = Four_Fours * f4CenterZ0_Cneg;
  461. #ifdef _DEBUG
  462. fltx4 f4CenterVolume = Dot3SIMD( f4Center_W_Cpos, f4CenterProj_Cpos ) - Dot3SIMD( f4Center_W_Cneg, f4CenterProj_Cneg );
  463. fltx4 f4CenterLeverX = Dot3SIMD( f4Center_X_Cpos, f4CenterProj_Cpos ) - Dot3SIMD( f4Center_X_Cneg, f4CenterProj_Cneg );
  464. fltx4 f4CenterLeverY = Dot3SIMD( f4Center_Y_Cpos, f4CenterProj_Cpos ) - Dot3SIMD( f4Center_Y_Cneg, f4CenterProj_Cneg );
  465. fltx4 f4CenterLeverZ = Dot3SIMD( f4Center_Z_Cpos, f4CenterProj_Cpos ) - Dot3SIMD( f4Center_Z_Cneg, f4CenterProj_Cneg );
  466. // this is the condenced result of previous integration
  467. fltx4 f4CenterComponent = CombineSIMD( f4CenterLeverX, f4CenterLeverY, f4CenterLeverZ, f4CenterVolume );(void)f4CenterComponent;
  468. #endif
  469. //
  470. //
  471. //////////////////////////////////////////////////////////////////////////
  472. // Computing triangle components
  473. //
  474. // If top triangle is selected , Center and bottom tri are ignored and top tri is subtracted from "Full" side integrals
  475. // top triangle starts with the top vertex, spanning 0..-2*min(water,cut) along A and 0..-2*min(water,cut)/cut along B
  476. // the isTopTri_* selectors will select the top tris out if appropriate
  477. bi32x4 isCutLarge = CmpGtSIMD( f4CutPart, f4Epsilon ); // is the triangle part large enough to even consider it? in most cases it is
  478. bi32x4 isTopTri_Cpos = AndSIMD( CmpLeSIMD( f4WaterPart_Cpos, f4CutPart ), isCutLarge ), isTopTri_Cneg = AndSIMD( CmpLeSIMD( f4WaterPart_Cneg, f4CutPart ), isCutLarge );
  479. //fltx4 isBotTri_Cpos = AndNotSIMD( isTopTri_Cpos, isCutLarge ), isBotTri_Cneg = AndNotSIMD( isTopTri_Cneg, isCutLarge );
  480. // integrate above-water part
  481. fltx4 rcpCutPart = AndSIMD( ReciprocalSIMD( f4CutPart ), isCutLarge ); // when this is Inf, isCutLarge will select it off
  482. fltx4 f4WaterInTop_Cpos = MaxSIMD( Four_Zeros, MinSIMD( f4CutPart, f4WaterPart_Cpos ) );
  483. fltx4 f4WaterInTop_Cneg = MaxSIMD( Four_Zeros, MinSIMD( f4CutPart, f4WaterPart_Cneg ) ); // when water is below the tri, it'll actually be selected off, so the min(cut,water) isn't needed here really
  484. FourVectors boxTopTriB_Cpos = boxB * ( f4WaterInTop_Cpos * rcpCutPart ), boxTopTriB_Cneg = boxB * ( f4WaterInTop_Cneg * rcpCutPart );
  485. FourVectors boxTopTriA_Cpos = boxA * f4WaterInTop_Cpos, boxTopTriA_Cneg = boxA * f4WaterInTop_Cneg;
  486. fltx4 f4TopTriProj_Cpos = fabs( CrossZ( boxTopTriA_Cpos, boxTopTriB_Cpos ) ), f4TopTriProj_Cneg = fabs( CrossZ( boxTopTriA_Cneg, boxTopTriB_Cneg ) );
  487. fltx4 f4WaterInBot_common = Four_Ones + f4CutPart, f4CutPart_neg = -f4CutPart;
  488. // fltx4 f4WaterInBot_Cpos = MaxSIMD( Four_Zeros, MinSIMD( f4CutPart, f4WaterInBot_common - f4WaterPart_Cpos ) );
  489. // fltx4 f4WaterInBot_Cneg = MaxSIMD( Four_Zeros, MinSIMD( f4CutPart, f4WaterInBot_common - f4WaterPart_Cneg ) );
  490. fltx4 f4WaterInBot_Cpos_neg = MinSIMD( Four_Zeros, MaxSIMD( f4CutPart_neg, f4WaterPart_Cpos - f4WaterInBot_common) );
  491. fltx4 f4WaterInBot_Cneg_neg = MinSIMD( Four_Zeros, MaxSIMD( f4CutPart_neg, f4WaterPart_Cneg - f4WaterInBot_common) );
  492. // update: (looks like) for the bottom triangle, we need to integrate (0..+2) and (0..+2) in positive triangle, so we'll just need to flip
  493. // the signs for the bottom triangle A and B vectors
  494. FourVectors boxBotTriB_Cpos = boxB * ( f4WaterInBot_Cpos_neg * rcpCutPart ), boxBotTriB_Cneg = boxB * ( f4WaterInBot_Cneg_neg * rcpCutPart );
  495. FourVectors boxBotTriA_Cpos = boxA * f4WaterInBot_Cpos_neg, boxBotTriA_Cneg = boxA * f4WaterInBot_Cneg_neg;
  496. fltx4 f4BotTriProj_Cpos = fabs( CrossZ( boxBotTriA_Cpos, boxBotTriB_Cpos ) ), f4BotTriProj_Cneg = fabs( CrossZ( boxBotTriA_Cneg, boxBotTriB_Cneg ) );
  497. // let's integrate along topTriA (0..-2) and topTriB (0..-2), a triangle . Here's the solved integral:
  498. // 2/3 (xA (-2 z0 + 2 zA + zB) + xB (-2 z0 + zA + 2 zB) + x0 (3 z0 - 2 (zA + zB))),
  499. // 2/3 (yA (-2 z0 + 2 zA + zB) + yB (-2 z0 + zA + 2 zB) + y0 (3 z0 - 2 (zA + zB))),
  500. // 1/3 (3 z0^2 - 4 z0 (zA + zB) + 2 (zA^2 + zA zB + zB^2)),
  501. // 2/3 (3 z0 - 2 (zA + zB))
  502. //
  503. // here's collected by x0,y0,z0
  504. // 2/3 (-2 xA - 2 xB) z0 + 2/3 (2 xA zA + xB zA + xA zB + 2 xB zB) + x0 (2 z0 - (4 (zA + zB))/3),
  505. // 2/3 (-2 yA - 2 yB) z0 + 2/3 (2 yA zA + yB zA + yA zB + 2 yB zB) + y0 (2 z0 - (4 (zA + zB))/3),
  506. // z0^2 - 4/3 z0 (zA + zB) + 2/3 (zA^2 + zA zB + zB^2),
  507. // 2 z0 - (4 (zA + zB))/3
  508. // x0,y0,z0 are the boxTopZ for Cpos, and boxTopZ - 2 C for Cneg
  509. fltx4 f4TopTriX0_Cneg = MsubSIMD( Four_Twos, boxC.x, boxTopX );
  510. fltx4 f4TopTriY0_Cneg = MsubSIMD( Four_Twos, boxC.y, boxTopY );
  511. fltx4 f4TopTriZ0_Cneg = MsubSIMD( Four_Twos, boxC.z, boxTopZabs );
  512. fltx4 f4TopTri_X_Cpos = Four_TwoThirds * (boxTopTriA_Cpos.x * ( Four_Twos * ( boxTopTriA_Cpos.z - boxTopZabs ) + boxTopTriB_Cpos.z ) +
  513. boxTopTriB_Cpos.x * ( boxTopTriA_Cpos.z +
  514. Four_Twos * ( boxTopTriB_Cpos.z - boxTopZabs ) ) +
  515. boxTopX * (Four_Threes * boxTopZabs - Four_Twos * ( boxTopTriA_Cpos.z + boxTopTriB_Cpos.z ) ) );
  516. fltx4 f4TopTri_Y_Cpos = Four_TwoThirds * (boxTopTriA_Cpos.y * ( Four_Twos * ( boxTopTriA_Cpos.z - boxTopZabs ) + boxTopTriB_Cpos.z ) +
  517. boxTopTriB_Cpos.y * ( boxTopTriA_Cpos.z +
  518. Four_Twos * ( boxTopTriB_Cpos.z - boxTopZabs ) ) +
  519. boxTopY * (Four_Threes * boxTopZabs - Four_Twos * ( boxTopTriA_Cpos.z + boxTopTriB_Cpos.z ) ) );
  520. fltx4 f4TopTri_Z_Cpos = Four_Thirds * (Four_Threes * boxTopZabs * boxTopZabs -
  521. Four_Fours * boxTopZabs * (boxTopTriA_Cpos.z + boxTopTriB_Cpos.z) +
  522. Four_Twos * (boxTopTriA_Cpos.z * boxTopTriA_Cpos.z +
  523. boxTopTriA_Cpos.z * boxTopTriB_Cpos.z + boxTopTriB_Cpos.z*boxTopTriB_Cpos.z));
  524. fltx4 f4TopTri_W_Cpos = Four_TwoThirds * ( Four_Threes * boxTopZabs - Four_Twos * ( boxTopTriA_Cpos.z + boxTopTriB_Cpos.z ) );
  525. fltx4 f4TopTri_X_Cneg = Four_TwoThirds * (boxTopTriA_Cneg.x * ( Four_Twos * ( boxTopTriA_Cneg.z - f4TopTriZ0_Cneg ) + boxTopTriB_Cneg.z ) +
  526. boxTopTriB_Cneg.x * ( boxTopTriA_Cneg.z +
  527. Four_Twos * ( boxTopTriB_Cneg.z - f4TopTriZ0_Cneg ) ) +
  528. f4TopTriX0_Cneg * (Four_Threes * f4TopTriZ0_Cneg - Four_Twos * ( boxTopTriA_Cneg.z + boxTopTriB_Cneg.z ) ) );
  529. fltx4 f4TopTri_Y_Cneg = Four_TwoThirds * (boxTopTriA_Cneg.y * ( Four_Twos * ( boxTopTriA_Cneg.z - f4TopTriZ0_Cneg ) + boxTopTriB_Cneg.z ) +
  530. boxTopTriB_Cneg.y * ( boxTopTriA_Cneg.z +
  531. Four_Twos * ( boxTopTriB_Cneg.z - f4TopTriZ0_Cneg ) ) +
  532. f4TopTriY0_Cneg * (Four_Threes * f4TopTriZ0_Cneg - Four_Twos * ( boxTopTriA_Cneg.z + boxTopTriB_Cneg.z ) ) );
  533. fltx4 f4TopTri_Z_Cneg = Four_Thirds * (Four_Threes * f4TopTriZ0_Cneg * f4TopTriZ0_Cneg -
  534. Four_Fours * f4TopTriZ0_Cneg * (boxTopTriA_Cneg.z + boxTopTriB_Cneg.z) +
  535. Four_Twos * (boxTopTriA_Cneg.z * boxTopTriA_Cneg.z +
  536. boxTopTriA_Cneg.z * boxTopTriB_Cneg.z + boxTopTriB_Cneg.z*boxTopTriB_Cneg.z));
  537. fltx4 f4TopTri_W_Cneg = Four_TwoThirds * ( Four_Threes * f4TopTriZ0_Cneg - Four_Twos * ( boxTopTriA_Cneg.z + boxTopTriB_Cneg.z ) );
  538. fltx4 f4BotTriX0_Cpos = boxC.x - boxA.x - boxB.x;
  539. fltx4 f4BotTriY0_Cpos = boxC.y - boxA.y - boxB.y;
  540. fltx4 f4BotTriZ0_Cpos = boxC.z - boxA.z - boxB.z + boxCenterZ;
  541. fltx4 f4BotTri_X_Cpos = Four_TwoThirds * (boxBotTriA_Cpos.x * ( Four_Twos * ( boxBotTriA_Cpos.z - f4BotTriZ0_Cpos ) + boxBotTriB_Cpos.z ) +
  542. boxBotTriB_Cpos.x * ( boxBotTriA_Cpos.z +
  543. Four_Twos * ( boxBotTriB_Cpos.z - f4BotTriZ0_Cpos ) ) +
  544. f4BotTriX0_Cpos * (Four_Threes * f4BotTriZ0_Cpos - Four_Twos * ( boxBotTriA_Cpos.z + boxBotTriB_Cpos.z ) ) );
  545. fltx4 f4BotTri_Y_Cpos = Four_TwoThirds * (boxBotTriA_Cpos.y * ( Four_Twos * ( boxBotTriA_Cpos.z - f4BotTriZ0_Cpos ) + boxBotTriB_Cpos.z ) +
  546. boxBotTriB_Cpos.y * ( boxBotTriA_Cpos.z +
  547. Four_Twos * ( boxBotTriB_Cpos.z - f4BotTriZ0_Cpos ) ) +
  548. f4BotTriY0_Cpos * (Four_Threes * f4BotTriZ0_Cpos - Four_Twos * ( boxBotTriA_Cpos.z + boxBotTriB_Cpos.z ) ) );
  549. fltx4 f4BotTri_Z_Cpos = Four_Thirds * (Four_Threes * f4BotTriZ0_Cpos * f4BotTriZ0_Cpos -
  550. Four_Fours * f4BotTriZ0_Cpos * (boxBotTriA_Cpos.z + boxBotTriB_Cpos.z) +
  551. Four_Twos * (boxBotTriA_Cpos.z * boxBotTriA_Cpos.z +
  552. boxBotTriA_Cpos.z * boxBotTriB_Cpos.z + boxBotTriB_Cpos.z*boxBotTriB_Cpos.z));
  553. fltx4 f4BotTri_W_Cpos = Four_TwoThirds * ( Four_Threes * f4BotTriZ0_Cpos - Four_Twos * ( boxBotTriA_Cpos.z + boxBotTriB_Cpos.z ) );
  554. fltx4 f4BotTriZ0_Cneg = boxCenterZ - boxTopZrel;
  555. fltx4 f4BotTri_X_Cneg = Four_TwoThirds * (boxBotTriA_Cneg.x * ( Four_Twos * ( boxBotTriA_Cneg.z - f4BotTriZ0_Cneg ) + boxBotTriB_Cneg.z ) +
  556. boxBotTriB_Cneg.x * ( boxBotTriA_Cneg.z +
  557. Four_Twos * ( boxBotTriB_Cneg.z - f4BotTriZ0_Cneg ) )
  558. -boxTopX * (Four_Threes * f4BotTriZ0_Cneg - Four_Twos * ( boxBotTriA_Cneg.z + boxBotTriB_Cneg.z ) ) );
  559. fltx4 f4BotTri_Y_Cneg = Four_TwoThirds * (boxBotTriA_Cneg.y * ( Four_Twos * ( boxBotTriA_Cneg.z - f4BotTriZ0_Cneg ) + boxBotTriB_Cneg.z ) +
  560. boxBotTriB_Cneg.y * ( boxBotTriA_Cneg.z +
  561. Four_Twos * ( boxBotTriB_Cneg.z - f4BotTriZ0_Cneg ) )
  562. -boxTopY * (Four_Threes * f4BotTriZ0_Cneg - Four_Twos * ( boxBotTriA_Cneg.z + boxBotTriB_Cneg.z ) ) );
  563. fltx4 f4BotTri_Z_Cneg = Four_Thirds * (Four_Threes * f4BotTriZ0_Cneg * f4BotTriZ0_Cneg -
  564. Four_Fours * f4BotTriZ0_Cneg * (boxBotTriA_Cneg.z + boxBotTriB_Cneg.z) +
  565. Four_Twos * (boxBotTriA_Cneg.z * boxBotTriA_Cneg.z +
  566. boxBotTriA_Cneg.z * boxBotTriB_Cneg.z + boxBotTriB_Cneg.z*boxBotTriB_Cneg.z));
  567. fltx4 f4BotTri_W_Cneg = Four_TwoThirds * ( Four_Threes * f4BotTriZ0_Cneg - Four_Twos * ( boxBotTriA_Cneg.z + boxBotTriB_Cneg.z ) );
  568. fltx4 f4All_X_Cpos = MaskedAssign( isTopTri_Cpos, f4SideProj * f4Full_X_Cpos - f4TopTriProj_Cpos * f4TopTri_X_Cpos, f4BotTriProj_Cpos * f4BotTri_X_Cpos + f4CenterProj_Cpos * f4Center_X_Cpos );
  569. fltx4 f4All_X_Cneg = MaskedAssign( isTopTri_Cneg, f4SideProj * f4Full_X_Cneg - f4TopTriProj_Cneg * f4TopTri_X_Cneg, f4BotTriProj_Cneg * f4BotTri_X_Cneg + f4CenterProj_Cneg * f4Center_X_Cneg );
  570. fltx4 f4All_Y_Cpos = MaskedAssign( isTopTri_Cpos, f4SideProj * f4Full_Y_Cpos - f4TopTriProj_Cpos * f4TopTri_Y_Cpos, f4BotTriProj_Cpos * f4BotTri_Y_Cpos + f4CenterProj_Cpos * f4Center_Y_Cpos );
  571. fltx4 f4All_Y_Cneg = MaskedAssign( isTopTri_Cneg, f4SideProj * f4Full_Y_Cneg - f4TopTriProj_Cneg * f4TopTri_Y_Cneg, f4BotTriProj_Cneg * f4BotTri_Y_Cneg + f4CenterProj_Cneg * f4Center_Y_Cneg );
  572. fltx4 f4All_Z_Cpos = MaskedAssign( isTopTri_Cpos, f4SideProj * f4Full_Z_Cpos - f4TopTriProj_Cpos * f4TopTri_Z_Cpos, f4BotTriProj_Cpos * f4BotTri_Z_Cpos + f4CenterProj_Cpos * f4Center_Z_Cpos );
  573. fltx4 f4All_Z_Cneg = MaskedAssign( isTopTri_Cneg, f4SideProj * f4Full_Z_Cneg - f4TopTriProj_Cneg * f4TopTri_Z_Cneg, f4BotTriProj_Cneg * f4BotTri_Z_Cneg + f4CenterProj_Cneg * f4Center_Z_Cneg );
  574. fltx4 f4All_W_Cpos = MaskedAssign( isTopTri_Cpos, f4SideProj * f4Full_W_Cpos - f4TopTriProj_Cpos * f4TopTri_W_Cpos, f4BotTriProj_Cpos * f4BotTri_W_Cpos + f4CenterProj_Cpos * f4Center_W_Cpos );
  575. fltx4 f4All_W_Cneg = MaskedAssign( isTopTri_Cneg, f4SideProj * f4Full_W_Cneg - f4TopTriProj_Cneg * f4TopTri_W_Cneg, f4BotTriProj_Cneg * f4BotTri_W_Cneg + f4CenterProj_Cneg * f4Center_W_Cneg );
  576. fltx4 f4All_X = Sum3SIMD( f4All_X_Cpos - f4All_X_Cneg );
  577. fltx4 f4All_Y = Sum3SIMD( f4All_Y_Cpos - f4All_Y_Cneg );
  578. // <Sergiy> to be brutally honest, I don't care about Z integral. It represents the Z of the lever of archimedes force, and
  579. // it affects neither force nor torque exerted by the said force. Not computing it here reduces this routine from 1188 ticks to 900 ticks per run
  580. (void)f4All_Z_Cpos;
  581. (void)f4All_Z_Cneg;
  582. fltx4 f4All_Z = Four_Zeros;//Sum3SIMD( f4All_Z_Cpos - f4All_Z_Cneg );
  583. fltx4 f4All_W = Sum3SIMD( f4All_W_Cpos - f4All_W_Cneg );
  584. #if 1
  585. // <Sergiy> again, to be brutally honest, I don't care about the actual lever of archimedes force.
  586. // I can just as well use lever * displaced_volume to compute the torque, and it'll actually be more precise, although less understandable.
  587. //
  588. // this variant returns XYZ of the center of mass of displaced fluid multiplied by W, and W = volume of displaced fluid
  589. fltx4 f4All = CombineSIMD( f4All_X, f4All_Y, f4All_Z, f4All_W ) + f4All_W * boxCenterXY;
  590. #else
  591. // this variant returns XYZ of the center of mass of displaced fluid, and W = volume of displaced fluid
  592. fltx4 rcpAllW = ReciprocalSIMD( f4All_W );
  593. fltx4 f4All = SetWSIMD( CombineXYZ_Special( f4All_X, f4All_Y, f4All_Z ) * rcpAllW + boxCenterXY, f4All_W );
  594. #endif
  595. return f4All;
  596. }
  597. /*
  598. float GetBoxBuoyancyTest( const matrix3x4_t & tm )
  599. {
  600. }
  601. */
  602. Vector4D GetPyramidBuoyancy( const Vector &pos, const Vector &a, const Vector &b, const Vector &n )
  603. {
  604. Vector verts[5], verts2[10];
  605. uint numVerts = 4, numVerts2 = 0;
  606. verts[0] = pos + n + a + b;
  607. verts[1] = pos + n + a - b;
  608. verts[2] = pos + n - a - b;
  609. verts[3] = pos + n - a + b;
  610. Vector prevVert = verts[3];
  611. for ( uint i = 0; i < numVerts; ++i )
  612. {
  613. if ( prevVert.z * verts[i].z < 0 )
  614. {
  615. // switching sign
  616. float flFraction = prevVert.z / ( prevVert.z - verts[i].z );
  617. verts2[numVerts2] = prevVert * ( 1 - flFraction ) + verts[i] * flFraction;
  618. Assert( fabs( verts2[numVerts2].z ) < 1e-5f );
  619. verts2[numVerts2].z = 0;
  620. numVerts2++;
  621. }
  622. prevVert = verts2[numVerts2++] = verts[i];
  623. }
  624. float flSum = 0, flSign = 1.0f;
  625. Vector vecCenter( 0, 0, 0 );
  626. Vector normal = CrossProduct( a, b );
  627. Assert( DotProduct( normal, n ) >= -1e-6f );
  628. if ( DotProduct( pos + n, normal ) < 0 ) // pos + n is the center of the face
  629. {
  630. flSign = -1.0f;
  631. }
  632. // exclude all z>0 verts
  633. for ( uint i = 0 ; i < numVerts2; )
  634. {
  635. if ( verts2[i].z > 0 )
  636. {
  637. for ( uint j = i + 1 ; j < numVerts2; ++j )
  638. {
  639. verts2[j-1] = verts2[j];
  640. }
  641. --numVerts2;
  642. }
  643. else
  644. {
  645. ++i;
  646. }
  647. }
  648. Vector rootVert = verts2[0];
  649. for ( uint i = 1; i + 1 < numVerts2 ; ++i )
  650. {
  651. Vector curVert = verts2[i], nextVert = verts2[i+1];
  652. {
  653. // this segment is guaranteed to be under water
  654. float flElementVolume = DotProduct( CrossProduct( curVert, rootVert ), nextVert ) / 6;
  655. flElementVolume = fabs( flElementVolume );
  656. flSum += flElementVolume ;
  657. Vector vecElementCenter = ( rootVert + curVert + nextVert ) * 0.25f;
  658. vecCenter += flElementVolume * vecElementCenter;
  659. }
  660. }
  661. Vector4D result;
  662. #if 1
  663. result.Init( vecCenter * flSign, flSum * flSign );
  664. #else
  665. result.Init( flSum > 1e-8f ? vecCenter / flSum : Vector( 0, 0, 0 ), flSum * flSign );
  666. #endif
  667. return result;
  668. }
  669. /*Vector4D GetQuadBuoyancy( const Vector &pos, const Vector &a, const Vector &b, const Vector &n )
  670. {
  671. Vector verts[4], verts2[10];
  672. uint numVerts = 4, numVerts2 = 0;
  673. Vector acrossb = CrossProduct( a, b );
  674. float flAreaXIntegral = acrossb.x * 4 * ( pos.x + n.x );
  675. float flAreaYIntegral = acrossb.y * 4 * ( pos.y + n.y );
  676. float flAreaZIntegral = acrossb.z * 4 * ( pos.z + n.z );
  677. Vector4D vecIntegral;
  678. vecIntegral.w = flAreaZIntegral;
  679. Vector center = pos + n;
  680. Assert(DotProduct(n, acrossb) > 0);
  681. float x0 = center.x, y0 = center.y, z0 = center.z;
  682. float xA = a.x, yA = a.y, zA = a.z;
  683. float xB = b.x, yB = b.y, zB = b.z;
  684. vecIntegral.Init(
  685. 4* x0 *z0 + (xA* zA + xB*zB)/3,
  686. 4* y0 *z0 + (yA* zA + yB*zB)/3,
  687. 2* z0 *z0 + (zA* zA + zB*zB)/3,
  688. 4* z0);
  689. return vecIntegral * acrossb.z;
  690. } */
  691. inline void Swap(Vector&a, Vector&b)
  692. {
  693. Vector t = a;
  694. a = b;
  695. b = t;
  696. }
  697. /*
  698. Vector4D GetBuoyancy( const Vector &pos, Vector box[3] )
  699. {
  700. float rcpZ[3];
  701. for(int i = 0; i < 3; ++i)
  702. {
  703. if( box[i].z < 0 )
  704. box[i] = -box[i];
  705. for(int j = 0; j < i; ++j)
  706. {
  707. if(box[j].z < box[i].z)
  708. Swap(box[i], box[j]);
  709. }
  710. }
  711. for(int i = 0; i < 3; ++i)
  712. rcpZ[i] = box[i].z > 1e-7f? 1 / box[i].z : 0;
  713. uint numVerts = 4, numVerts2 = 0;
  714. Vector acrossb = CrossProduct( a, b );
  715. float flAreaXIntegral = acrossb.x * 4 * ( pos.x + n.x );
  716. float flAreaYIntegral = acrossb.y * 4 * ( pos.y + n.y );
  717. float flAreaZIntegral = acrossb.z * 4 * ( pos.z + n.z );
  718. Vector4D vecIntegral;
  719. vecIntegral.w = flAreaZIntegral;
  720. Vector center = pos + n;
  721. Assert(DotProduct(n, acrossb) > 0);
  722. float x0 = center.x, y0 = center.y, z0 = center.z;
  723. float xA = a.x, yA = a.y, zA = a.z;
  724. float xB = b.x, yB = b.y, zB = b.z;
  725. vecIntegral.Init(
  726. 4* x0 *z0 + (xA* zA + xB*zB)/3,
  727. 4* y0 *z0 + (yA* zA + yB*zB)/3,
  728. 2* z0 *z0 + (zA* zA + zB*zB)/3,
  729. 4* z0);
  730. return vecIntegral * acrossb.z;
  731. }*/
  732. Vector4D operator % ( const Vector4D & a, const Vector4D & b )
  733. {
  734. Vector4D ave;
  735. ave.Init( fabs( a.w + b.w ) > 1e-6f ? ( a.AsVector3D() * a.w + b.AsVector3D() * b.w ) / ( a.w + b.w ) : Vector( 0, 0, 0 ), a.w + b.w );
  736. return ave;
  737. }
  738. Vector4D GetBoxBuoyancy( const Vector& a, const Vector& b, const Vector& c, const Vector& pos )
  739. {
  740. return GetPyramidBuoyancy( pos, a, b, c ) + GetPyramidBuoyancy( pos, b, a, -c ) + GetPyramidBuoyancy( pos, c, a, b ) + GetPyramidBuoyancy( pos, a, c, -b ) + GetPyramidBuoyancy( pos, b, c, a ) + GetPyramidBuoyancy( pos, c, b, -a );
  741. }
  742. void BenchmarkBoxBuoyancy( Vector a, const Vector& b, const Vector& c, const Vector& pos )
  743. {
  744. int start, end;
  745. const int nIterations = 100000;
  746. Vector4D result;
  747. start = GetHardwareClockFast();
  748. result.Init(0,0,0,0);
  749. for ( int i = 0; i < nIterations; ++i )
  750. {
  751. result = result % (GetPyramidBuoyancy( pos, a, b, c ) % GetPyramidBuoyancy( pos, b, a, -c ) % GetPyramidBuoyancy( pos, c, a, b ) % GetPyramidBuoyancy( pos, a, c, -b ) % GetPyramidBuoyancy( pos, b, c, a ) % GetPyramidBuoyancy( pos, c, b, -a )) ;
  752. a += Vector(1e-24f, 1e-25f, 1e-26f);
  753. }
  754. end = GetHardwareClockFast();
  755. Msg( "Box Buoyancy Scalar Benchmark: %d ticks/box, volume %g \n", int32( ( end - start ) ) / nIterations, result.w / nIterations );
  756. }
  757. const Vector RotateZ( const Vector & in, float flDegrees )
  758. {
  759. Vector res;
  760. VectorRotate( in, QAngle(0,flDegrees,0), res );
  761. return res;
  762. }
  763. const Vector RotateY( const Vector & in, float flDegrees )
  764. {
  765. Vector res;
  766. VectorRotate( in, QAngle(flDegrees,0,0), res );
  767. return res;
  768. }
  769. const Vector Rotate( const Vector & in, const QAngle &a )
  770. {
  771. Vector res;
  772. VectorRotate( in, a, res );
  773. return res;
  774. }
  775. struct Test_t
  776. {
  777. void Test()
  778. {
  779. PermTest();
  780. #ifdef _DEBUG
  781. BuoyancyTest();
  782. #else
  783. Benchmark();
  784. #endif
  785. }
  786. bool TestAllEqual( const fltx4 & a, const fltx4 & b )
  787. {
  788. return IsAllEqual( a, b );
  789. }
  790. void PermTest()
  791. {
  792. #ifdef _DEBUG
  793. fltx4 f4Canonical = {0.125f, 1.125f, 2.125f, 3.125f};
  794. float flCanonical[4] = {0.125f, 1.125f, 2.125f, 3.125f};
  795. fltx4 f4CanonicalYXZW = {1.125f, 0.125f, 2.125f, 3.125f};
  796. fltx4 f4CanonicalXZYW = {0.125f, 2.125f, 1.125f, 3.125f};
  797. fltx4 f4CanonicalZYXW = {2.125f, 1.125f, 0.125f, 3.125f};
  798. fltx4 f4CanonicalXXYW = {0.125f, 0.125f, 1.125f, 3.125f};
  799. fltx4 f4CanonicalYZZW = {1.125f, 2.125f, 2.125f, 3.125f};
  800. Assert( TestAllEqual( f4Canonical, LoadUnalignedSIMD( flCanonical ) ) );
  801. for ( int i = 0; i < 4; ++i )
  802. {
  803. float flSubFloat = SubFloat( f4Canonical, i );
  804. Assert( fabs( flSubFloat - float( i ) - 0.125f ) < 1e-6f );
  805. }
  806. Assert( TestAllEqual( PermYXZW( f4Canonical ), ( f4CanonicalYXZW ) ) );
  807. Assert( TestAllEqual( PermXZYW( f4Canonical ), ( f4CanonicalXZYW ) ) );
  808. Assert( TestAllEqual( PermZYXW( f4Canonical ), ( f4CanonicalZYXW ) ) );
  809. Assert( TestAllEqual( PermXXYW( f4Canonical ), ( f4CanonicalXXYW ) ) );
  810. Assert( TestAllEqual( PermYZZW( f4Canonical ), f4CanonicalYZZW ) );
  811. #endif
  812. }
  813. void BuoyancyTest()
  814. {
  815. Vector test[][3] =
  816. {
  817. {Vector( 1, 0, 0 ), Vector( 0, 0, 1 ), Vector( 0, 0, 0.0f )},
  818. {Vector( 1, 0, 1 ), Vector( -1, 0, 1 ), Vector( 0, 0, -0.5f )},
  819. {Vector( 0, 1, 1 ), Vector( 0, -1, 1 ), Vector( 0, 0, 0.0f )},
  820. {Vector( 0, 2, 2 ), Vector( 0, -2, 2 ), Vector( 0, 0, 0.0f )},
  821. {Vector( 5, 0, 5 ), Vector( -1, 0, 1 ), Vector( 0, 0, 0.0f )},
  822. {Vector( 2, 0, 1 ), Vector( -1, 0, 2 ), Vector( 0, 0, 0.0f )},
  823. {RotateZ(Vector( 1, 0, 1 ),45), RotateZ(Vector( -1, 0, 1 ),45), Vector( 0, 0, 0.0f )},
  824. {RotateZ(Vector( 1, 0, 1 ),30), RotateZ(Vector( -1, 0, 1 ),30), Vector( 0, 0, 0.5f )},
  825. {RotateZ(Vector( sqrtf(0.5f), 0, sqrtf(0.5f) ),45), RotateZ(Vector( 0, 1, 0 ),45), Vector( 0, 0, 0.5f )},
  826. {RotateY(RotateZ(Vector(1,0,0),45),atan(sqrtf(2))*180/M_PI),RotateY(RotateZ(Vector(0,1,0),45),atan(sqrtf(2))*180/M_PI), Vector(0,0,0)}, // unit cube with tips extended high/low
  827. {RotateY(RotateZ(Vector(1,0,0),45),atan(sqrtf(2))*180/M_PI),RotateY(RotateZ(Vector(0,1,0),45),atan(sqrtf(2))*180/M_PI), Vector(0,0,0.01f)}, // unit cube with tips extended high/low
  828. {RotateY(RotateZ(Vector(1,0,0),45),atan(sqrtf(2))*180/M_PI),RotateY(RotateZ(Vector(0,1,0),45),atan(sqrtf(2))*180/M_PI), Vector(0,0,0.25f)}, // unit cube with tips extended high/low
  829. {RotateY(RotateZ(Vector(1,0,0),45),atan(sqrtf(2))*180/M_PI),RotateY(RotateZ(Vector(0,1,0),45),atan(sqrtf(2))*180/M_PI), Vector(0,0,0.5f)}, // unit cube with tips extended high/low
  830. {RotateY(RotateZ(Vector(1,0,0),45),atan(sqrtf(2))*180/M_PI),RotateY(RotateZ(Vector(0,1,0),45),atan(sqrtf(2))*180/M_PI), Vector(0,0,-0.25f)}, // unit cube with tips extended high/low
  831. {RotateY(RotateZ(Vector(1,0,0),45),atan(sqrtf(2))*180/M_PI),RotateY(RotateZ(Vector(0,1,0),45),atan(sqrtf(2))*180/M_PI), Vector(0,0,-0.5f)}, // unit cube with tips extended high/low
  832. {Vector( 2, 1, 1 ), Vector( -1, 1, 1 ), Vector( 0, 0, 0.0f )},
  833. {Vector( 2, 1, 1 ), Vector( -1, 1, 1 ), Vector( 0, 0, 0.5f )},
  834. {Vector( 0, 2, 1 ).Normalized(), Vector( 1, -1, 2 ).Normalized(), Vector( 0, 0, 0 )},
  835. {Vector( -0.804987f, 0.250343f, -0.811212f ), Vector( 0.474009f, -0.625978f,-0.663551f ).Normalized(), Vector( 1, 0, 0 )}
  836. };
  837. float flMaxError = 0;
  838. for ( int nAttempt = 0, numAttempts = 1000000; nAttempt < numAttempts; ++nAttempt )
  839. {
  840. Vector a = RandomVector( -1, 1 ), c = RandomVector( -1, 1 ), b = CrossProduct( a, c ), pos = RandomVector( -2, 2 );
  841. c = CrossProduct( a, b ).Normalized() * RandomVector( 0, 1.75f ).x;
  842. if ( nAttempt < sizeof( test ) / sizeof( test[0] ) )
  843. {
  844. a = test[nAttempt][0];
  845. b = test[nAttempt][1];
  846. c = CrossProduct( a, b ).Normalized() /* a.Length()*/;
  847. pos = test[nAttempt][2];
  848. }
  849. //pos.x = 0;
  850. //pos.y = 0;
  851. //pos.z = 0;
  852. matrix3x4_t tm;
  853. tm.Init( a, b, c, pos );
  854. FourVectors box;
  855. box.LoadAndSwizzle( LoadUnalignedSIMD( &a ), LoadUnalignedSIMD( &b ), LoadUnalignedSIMD( &c ), LoadUnalignedSIMD( &pos ) );
  856. //fltx4 f4Result0 = GetBoxBuoyancy3x4( box );
  857. Vector4D result1 = GetBoxBuoyancy(a,b,c,pos);
  858. fltx4 f4ResultV2 = GetBoxBuoyancy3x4( box );
  859. fltx4 f4Residual = f4ResultV2 - LoadUnalignedSIMD( &result1 );
  860. float flError = sqrtf( SubFloat( Dot4SIMD( f4Residual, f4Residual ), 0 ) );
  861. if( flError > flMaxError )
  862. {
  863. flMaxError = flError;
  864. Msg( "%d. Error %g\n", nAttempt, flError);
  865. }
  866. Assert( IsAllGreaterThan( ReplicateX4( 1e-4f ), fabs( f4Residual ) ) );
  867. float flBoxVolume = a.Length() * b.Length() * c.Length() * 8; (void)(flBoxVolume); // debug only
  868. if ( ( nAttempt % ( numAttempts / 10 ) ) == 0 )
  869. {
  870. DevMsg( "." );
  871. }
  872. }
  873. DevMsg( "Buoyancy test completed, benchmarking\n" );
  874. }
  875. void Benchmark()
  876. {
  877. for ( int i = 0; i < 100; ++i )
  878. {
  879. Vector a = RandomVector( -1, 1 ), c = RandomVector( -1, 1 ), b = CrossProduct( a, c ), pos = RandomVector( -2, 2 );
  880. c = CrossProduct( a, b ).Normalized() * RandomVector( 0, 1.75f ).x;
  881. BenchmarkBoxBuoyancy4x3( LoadUnalignedSIMD( &a ), LoadUnalignedSIMD( &b ), LoadUnalignedSIMD( &c ), LoadUnalignedSIMD( &pos ) );
  882. BenchmarkBoxBuoyancy( a,b,c,pos );
  883. }
  884. }
  885. };
  886. static Test_t s_test;
  887. void TestBuoyancy()
  888. {
  889. s_test.Test();
  890. }