Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1765 lines
74 KiB

  1. //========= Copyright � Valve Corporation, All rights reserved. ============//
  2. //
  3. // Quat-dominant finite element mesh simulation, a distillation of experiments made in femodel-v0
  4. // Multigrid didn't work as well as advertised, so I dropped it - this is a single-level solver without hierarchy
  5. // CG works really well, but is not very stable due to linearization issues, so it may make it here eventually as an auxiliary pass
  6. // Warmstarting works well, but doesn't offer any advantages in extreme cases (it can even add energy sometimes), so I don't see it as high value add-on
  7. // The latest simple axial bend model ( distribute parallel impulses across 4 vertices of 2 adjacent triangles, precompute proportions) worked very well
  8. // 2D LSq shape matching for triangles worked really well, too.
  9. // So I found an fast approximate solution to Wahba's problem, which allows me to quickly shape-match
  10. // more than 3 vertices, with different masses. 2D LSq is still used for 2-vert-constrained elements
  11. //
  12. #include "mathlib/femodel.h"
  13. #include "mathlib/cholesky.h"
  14. #include "mathlib/ssecholesky.h"
  15. #include "tier0/microprofiler.h"
  16. #include "mathlib/svd.h"
  17. #include "mathlib/femodel.inl"
  18. const fltx4 Four_SinCosRotation2DMinWeights = { 1e-14f, 1e-14f, 1e-14f, 1e-14f };
  19. void RelaxRod( const FeRodConstraint_t &c, float flModelScale, VectorAligned &a, VectorAligned &b )
  20. {
  21. Vector vDist = b - a;
  22. float flDist = vDist.Length( );
  23. float flReqDist;
  24. if ( flDist < flModelScale * c.flMinDist )
  25. {
  26. flReqDist = flModelScale * c.flMinDist;
  27. }
  28. else if ( flDist > flModelScale * c.flMaxDist )
  29. {
  30. flReqDist = flModelScale * c.flMaxDist;
  31. }
  32. else
  33. {
  34. return; // no need to adjust the distance
  35. }
  36. Vector vDelta = vDist * ( c.flRelaxationFactor * ( flReqDist / flDist - 1 ) );
  37. a -= vDelta * c.flWeight0;
  38. b += vDelta * ( 1 - c.flWeight0 );
  39. }
  40. //----------------------------------------------------------------------------------------------------------
  41. void CFeModel::RelaxRods( VectorAligned *pPos, float flStiffness, float flModelScale )const
  42. {
  43. //CMicroProfilerGuard mpg( &g_GlobalStats[ PhysicsGlobalProfiler::RelaxRods ], m_nRodCount - 1 );
  44. for ( uint i = 0; i < m_nRodCount; ++i )
  45. {
  46. const FeRodConstraint_t &c = m_pRods[ i ];
  47. VectorAligned &a = pPos[ c.nNode[ 0 ] ], &b = pPos[ c.nNode[ 1 ] ];
  48. RelaxRod( c, flModelScale, a, b );
  49. }
  50. }
  51. //----------------------------------------------------------------------------------------------------------
  52. void CFeModel::RelaxRods2( VectorAligned *pPos, float flStiffness, float flModelScale )const
  53. {
  54. //CMicroProfilerGuard mpg( &g_GlobalStats[ PhysicsGlobalProfiler::RelaxRods ], m_nRodCount - 1 );
  55. for ( uint i = 0; i < m_nRodCount; ++i )
  56. {
  57. FeRodConstraint_t c = m_pRods[ i ];
  58. VectorAligned &a = pPos[ c.nNode[ 0 ] ], &b = pPos[ c.nNode[ 1 ] ];
  59. Vector vDist = b - a;
  60. float flDist = vDist.Length();
  61. float flReqDist = Min( Max( flDist, flModelScale * c.flMinDist ), flModelScale * c.flMaxDist );
  62. Vector vDelta = vDist * ( c.flRelaxationFactor * ( flReqDist / flDist - 1 ) );
  63. a -= vDelta * c.flWeight0;
  64. b += vDelta * ( 1 - c.flWeight0 );
  65. }
  66. }
  67. //----------------------------------------------------------------------------------------------------------
  68. void CFeModel::RelaxRods2Ftl( VectorAligned *pPos, float flStiffness, float flModelScale )const
  69. {
  70. //CMicroProfilerGuard mpg( &g_GlobalStats[ PhysicsGlobalProfiler::RelaxRods ], m_nRodCount - 1 );
  71. for ( uint i = 0; i < m_nRodCount; ++i )
  72. {
  73. FeRodConstraint_t c = m_pRods[ i ];
  74. VectorAligned &a = pPos[ c.nNode[ 0 ] ], &b = pPos[ c.nNode[ 1 ] ];
  75. Vector vDist = b - a;
  76. float flDist = vDist.Length( );
  77. float flReqDist = Min( Max( flDist, flModelScale * c.flMinDist ), flModelScale * c.flMaxDist );
  78. Vector vDelta = vDist * ( c.flRelaxationFactor * ( flReqDist / flDist - 1 ) );
  79. b += vDelta;
  80. }
  81. }
  82. //----------------------------------------------------------------------------------------------------------
  83. void CFeModel::RelaxRodsUninertial( VectorAligned *pPos1, VectorAligned *pPos0, float flStiffness, float flModelScale )const
  84. {
  85. //CMicroProfilerGuard mpg( &g_GlobalStats[ PhysicsGlobalProfiler::RelaxRods ], m_nRodCount - 1 );
  86. for ( uint i = 0; i < m_nRodCount; ++i )
  87. {
  88. FeRodConstraint_t c = m_pRods[ i ];
  89. VectorAligned &a1 = pPos1[ c.nNode[ 0 ] ], &b1 = pPos1[ c.nNode[ 1 ] ];
  90. Vector vDist = b1 - a1;
  91. float flDist = vDist.Length( ), flMaxDist = flModelScale * c.flMaxDist;
  92. if ( flDist > flMaxDist )
  93. {
  94. VectorAligned &a0 = pPos0[ c.nNode[ 0 ] ], &b0 = pPos0[ c.nNode[ 1 ] ];
  95. Vector vDelta = vDist * ( c.flRelaxationFactor * ( flMaxDist / flDist - 1 ) );
  96. a1 -= vDelta * c.flWeight0;
  97. b1 += vDelta * ( 1 - c.flWeight0 );
  98. a0 -= vDelta * c.flWeight0;
  99. b0 += vDelta * ( 1 - c.flWeight0 );
  100. }
  101. }
  102. }
  103. const fltx4 Four_2ToTheMinus30 = { 1.0f / float( 1 << 30 ), 1.0f / float( 1 << 30 ), 1.0f / float( 1 << 30 ), 1.0f / float( 1 << 30 ) };
  104. const fltx4 Four_MaxWorldSize = { 1 << 20, 1 << 20, 1 << 20, 1 << 20 };
  105. //----------------------------------------------------------------------------------------------------------
  106. // 128 ticks (32 ticks per rod) on an i7
  107. void CFeModel::RelaxSimdRods( fltx4 *pPos, float flStiffness, float flModelScale )const
  108. {
  109. //CMicroProfilerGuard mpg( &g_GlobalStats[ PhysicsGlobalProfiler::RelaxRods ], m_nSimdRodCount * 4 - 1 );
  110. fltx4 /*f4Stiffness = ReplicateX4( flStiffness ),*/ f4ModelScale = ReplicateX4( flModelScale );
  111. for ( uint i = 0; i < m_nSimdRodCount; ++i )
  112. {
  113. const FeSimdRodConstraint_t &c = m_pSimdRods[ i ];
  114. AssertDbg( c.nNode[ 0 ][ 0 ] < m_nNodeCount && c.nNode[ 1 ][ 0 ] < m_nNodeCount && c.nNode[ 0 ][ 1 ] < m_nNodeCount && c.nNode[ 1 ][ 1 ] < m_nNodeCount && c.nNode[ 0 ][ 2 ] < m_nNodeCount && c.nNode[ 1 ][ 2 ] < m_nNodeCount && c.nNode[ 0 ][ 3 ] < m_nNodeCount && c.nNode[ 1 ][ 3 ] < m_nNodeCount );
  115. FourVectors a, b;
  116. LOAD_NODES( a, c.nNode[ 0 ] );
  117. LOAD_NODES( b, c.nNode[ 1 ] );
  118. FourVectors vDist = b - a;
  119. fltx4 f4DistSqr = vDist.LengthSqr( );
  120. {
  121. // dealing with collapsed nodes, when two nodes have exactly the same coordinates
  122. f4DistSqr = MaxSIMD( f4DistSqr, Four_2ToTheMinus30 );
  123. }
  124. fltx4 f4DistRcp = ReciprocalSqrtSIMD( f4DistSqr );
  125. fltx4 f4Dist = f4DistRcp * f4DistSqr;
  126. fltx4 f4ReqDist = MinSIMD( MaxSIMD( f4Dist, f4ModelScale * c.f4MinDist ), f4ModelScale * c.f4MaxDist );
  127. FourVectors vDelta = vDist * ( c.f4RelaxationFactor * ( f4ReqDist * f4DistRcp - Four_Ones ) );
  128. a -= vDelta * c.f4Weight0;
  129. b += vDelta * ( Four_Ones - c.f4Weight0 );
  130. AssertDbg( IsAllGreaterThan( Four_MaxWorldSize, a.Length( ) + b.Length( ) ) );
  131. SAVE_NODES( a, c.nNode[ 0 ] );
  132. SAVE_NODES( b, c.nNode[ 1 ] );
  133. }
  134. }
  135. //----------------------------------------------------------------------------------------------------------
  136. // 128 ticks (32 ticks per rod) on an i7
  137. void CFeModel::RelaxSimdRodsFtl( fltx4 *pPos, float flStiffness, float flModelScale )const
  138. {
  139. //CMicroProfilerGuard mpg( &g_GlobalStats[ PhysicsGlobalProfiler::RelaxRods ], m_nSimdRodCount * 4 - 1 );
  140. fltx4 /*f4Stiffness = ReplicateX4( flStiffness ),*/ f4ModelScale = ReplicateX4( flModelScale );
  141. for ( uint i = 0; i < m_nSimdRodCount; ++i )
  142. {
  143. const FeSimdRodConstraint_t &c = m_pSimdRods[ i ];
  144. AssertDbg( c.nNode[ 0 ][ 0 ] < m_nNodeCount && c.nNode[ 1 ][ 0 ] < m_nNodeCount && c.nNode[ 0 ][ 1 ] < m_nNodeCount && c.nNode[ 1 ][ 1 ] < m_nNodeCount && c.nNode[ 0 ][ 2 ] < m_nNodeCount && c.nNode[ 1 ][ 2 ] < m_nNodeCount && c.nNode[ 0 ][ 3 ] < m_nNodeCount && c.nNode[ 1 ][ 3 ] < m_nNodeCount );
  145. FourVectors a, b;
  146. LOAD_NODES( a, c.nNode[ 0 ] );
  147. LOAD_NODES( b, c.nNode[ 1 ] );
  148. FourVectors vDist = b - a;
  149. fltx4 f4DistSqr = vDist.LengthSqr( );
  150. {
  151. // dealing with collapsed nodes, when two nodes have exactly the same coordinates
  152. f4DistSqr = MaxSIMD( f4DistSqr, Four_2ToTheMinus30 );
  153. }
  154. fltx4 f4DistRcp = ReciprocalSqrtSIMD( f4DistSqr );
  155. fltx4 f4Dist = f4DistRcp * f4DistSqr;
  156. fltx4 f4ReqDist = MinSIMD( MaxSIMD( f4Dist, f4ModelScale * c.f4MinDist ), f4ModelScale * c.f4MaxDist );
  157. FourVectors vDelta = vDist * ( c.f4RelaxationFactor * ( f4ReqDist * f4DistRcp - Four_Ones ) );
  158. b += vDelta;
  159. AssertDbg( IsAllGreaterThan( Four_MaxWorldSize, a.Length( ) + b.Length( ) ) );
  160. SAVE_NODES( b, c.nNode[ 1 ] );
  161. }
  162. }
  163. //----------------------------------------------------------------------------------------------------------
  164. float CFeModel::RelaxTris( VectorAligned *pPos, float flStiffness, float flModelScale )const
  165. {
  166. float flSumError = 0;
  167. // first the simplest case of quads hanging on the edge 0-1: this is a 2D case with 2 moving nodes and only 1 DOF (rotation around edge 0-1)
  168. for ( uint i = 0; i < m_nTriCount[ 2 ]; ++i )
  169. {
  170. const FeTri_t &tri = m_pTris[ i ];
  171. flSumError += RelaxTri2( flStiffness, flModelScale, tri, pPos[ tri.nNode[ 0 ] ], pPos[ tri.nNode[ 1 ] ], pPos[ tri.nNode[ 2 ] ] );
  172. }
  173. for ( uint i = m_nTriCount[ 2 ]; i < m_nTriCount[ 1 ]; ++i )
  174. {
  175. const FeTri_t &tri = m_pTris[ i ];
  176. flSumError += RelaxTri1( flStiffness, flModelScale, tri, pPos[ tri.nNode[ 0 ] ], pPos[ tri.nNode[ 1 ] ], pPos[ tri.nNode[ 2 ] ] );
  177. }
  178. for ( uint i = m_nTriCount[ 1 ]; i < m_nTriCount[ 0 ]; ++i )
  179. {
  180. const FeTri_t &tri = m_pTris[ i ];
  181. flSumError += RelaxTri0( flStiffness, flModelScale, tri, pPos[ tri.nNode[ 0 ] ], pPos[ tri.nNode[ 1 ] ], pPos[ tri.nNode[ 2 ] ] );
  182. }
  183. return flSumError;
  184. }
  185. //----------------------------------------------------------------------------------------------------------
  186. fltx4 CFeModel::RelaxSimdTris( VectorAligned *pPos, float flStiffness, float flModelScale )const
  187. {
  188. fltx4 flSumError = Four_Zeros, fl4Stiffness = ReplicateX4( flStiffness ), fl4ModelScale = ReplicateX4( flModelScale);
  189. // first the simplest case of quads hanging on the edge 0-1: this is a 2D case with 2 moving nodes and only 1 DOF (rotation around edge 0-1)
  190. for ( uint i = 0; i < m_nSimdTriCount[ 2 ]; ++i )
  191. {
  192. const FeSimdTri_t &tri = m_pSimdTris[ i ];
  193. flSumError += RelaxSimdTri2( fl4Stiffness, fl4ModelScale, tri, ( fltx4* ) pPos );
  194. }
  195. for ( uint i = m_nSimdTriCount[ 2 ]; i < m_nSimdTriCount[ 1 ]; ++i )
  196. {
  197. const FeSimdTri_t &tri = m_pSimdTris[ i ];
  198. flSumError += RelaxSimdTri1( fl4Stiffness, fl4ModelScale, tri, ( fltx4* ) pPos );
  199. }
  200. for ( uint i = m_nSimdTriCount[ 1 ]; i < m_nSimdTriCount[ 0 ]; ++i )
  201. {
  202. const FeSimdTri_t &tri = m_pSimdTris[ i ];
  203. flSumError += RelaxSimdTri0( fl4Stiffness, fl4ModelScale, tri, ( fltx4* ) pPos );
  204. }
  205. return flSumError;
  206. }
  207. //----------------------------------------------------------------------------------------------------------
  208. float CFeModel::RelaxQuads( VectorAligned *pPos, float flStiffness, float flModelScale, int nExperimental )const
  209. {
  210. float flSumError = 0;
  211. // first the simplest case of quads hanging on the edge 0-1: this is a 2D case with 2 moving nodes and only 1 DOF (rotation around edge 0-1)
  212. for ( uint i = 0; i < m_nQuadCount[ 2 ]; ++i )
  213. {
  214. const FeQuad_t &quad = m_pQuads[ i ];
  215. flSumError += RelaxQuad2( flStiffness, flModelScale, quad, pPos[ quad.nNode[ 0 ] ], pPos[ quad.nNode[ 1 ] ], pPos[ quad.nNode[ 2 ] ], pPos[ quad.nNode[ 3 ] ] );
  216. }
  217. for ( uint i = m_nQuadCount[ 2 ]; i < m_nQuadCount[ 1 ]; ++i )
  218. {
  219. const FeQuad_t &quad = m_pQuads[ i ];
  220. flSumError += RelaxQuad1( flStiffness, flModelScale, quad, pPos[ quad.nNode[ 0 ] ], pPos[ quad.nNode[ 1 ] ], pPos[ quad.nNode[ 2 ] ], pPos[ quad.nNode[ 3 ] ] );
  221. }
  222. if ( nExperimental )
  223. {
  224. for ( uint i = m_nQuadCount[ 1 ]; i < m_nQuadCount[ 0 ]; ++i )
  225. {
  226. const FeQuad_t &quad = m_pQuads[ i ];
  227. flSumError += RelaxQuad0flat( flStiffness, flModelScale, quad, pPos[ quad.nNode[ 0 ] ], pPos[ quad.nNode[ 1 ] ], pPos[ quad.nNode[ 2 ] ], pPos[ quad.nNode[ 3 ] ] );
  228. }
  229. }
  230. else
  231. {
  232. for ( uint i = m_nQuadCount[ 1 ]; i < m_nQuadCount[ 0 ]; ++i )
  233. {
  234. const FeQuad_t &quad = m_pQuads[ i ];
  235. flSumError += RelaxQuad0( flStiffness, flModelScale, quad, pPos[ quad.nNode[ 0 ] ], pPos[ quad.nNode[ 1 ] ], pPos[ quad.nNode[ 2 ] ], pPos[ quad.nNode[ 3 ] ] );
  236. }
  237. }
  238. return flSumError;
  239. }
  240. //----------------------------------------------------------------------------------------------------------
  241. // Measured on i7-4930K Ivy Bridge: ~600 ticks / 4 quads
  242. fltx4 CFeModel::RelaxSimdQuads( VectorAligned *pPos, float flStiffness, float flModelScale, int nExperimental )const
  243. {
  244. fltx4 flSumError = Four_Zeros, fl4Stiffness = ReplicateX4( flStiffness ), fl4ModelScale = ReplicateX4( flModelScale );
  245. // first the simplest case of quads hanging on the edge 0-1: this is a 2D case with 2 moving nodes and only 1 DOF (rotation around edge 0-1)
  246. for ( uint i = 0; i < m_nSimdQuadCount[ 2 ]; ++i )
  247. {
  248. const FeSimdQuad_t &quad = m_pSimdQuads[ i ];
  249. flSumError += RelaxSimdQuad2( fl4Stiffness, fl4ModelScale, quad, ( fltx4* )pPos );
  250. }
  251. for ( uint i = m_nSimdQuadCount[ 2 ]; i < m_nSimdQuadCount[ 1 ]; ++i )
  252. {
  253. const FeSimdQuad_t &quad = m_pSimdQuads[ i ];
  254. flSumError += RelaxSimdQuad1( fl4Stiffness, fl4ModelScale, quad, ( fltx4* ) pPos );
  255. }
  256. if ( nExperimental )
  257. {
  258. for ( uint i = m_nSimdQuadCount[ 1 ]; i < m_nSimdQuadCount[ 0 ]; ++i )
  259. {
  260. const FeSimdQuad_t &quad = m_pSimdQuads[ i ];
  261. flSumError += RelaxSimdQuad0flat( fl4Stiffness, fl4ModelScale, quad, ( fltx4* ) pPos );
  262. }
  263. }
  264. else
  265. {
  266. for ( uint i = m_nSimdQuadCount[ 1 ]; i < m_nSimdQuadCount[ 0 ]; ++i )
  267. {
  268. const FeSimdQuad_t &quad = m_pSimdQuads[ i ];
  269. flSumError += RelaxSimdQuad0( fl4Stiffness, fl4ModelScale, quad, ( fltx4* ) pPos );
  270. }
  271. }
  272. return flSumError;
  273. }
  274. //----------------------------------------------------------------------------------------------------------
  275. float CFeModel::RelaxTri2( float flStiffness, float flModelScale, const FeTri_t &tri, const VectorAligned &p0, const VectorAligned &p1, VectorAligned &p2 )const
  276. {
  277. CFeTriBasis basis( p1 - p0, p2 - p0 ); // p1-p0 is the best choice for the axis, because we're rotating around it
  278. p2 = p0 + basis.LocalXYToWorld( flModelScale * tri.v2.x + 0.5f * ( basis.v1x - flModelScale * tri.v1x ), flModelScale * tri.v2.y );
  279. return 0;
  280. }
  281. FORCEINLINE float CrossProductZ( float v1x, float v1y, float v2x, float v2y )
  282. {
  283. return v1x * v2y - v1y * v2x;
  284. }
  285. //----------------------------------------------------------------------------------------------------------
  286. float CFeModel::RelaxTri1( float flStiffness, float flModelScale, const FeTri_t &tri, const VectorAligned &p0, VectorAligned &p1, VectorAligned &p2 )const
  287. {
  288. CFeTriBasis basis( p1 - p0, p2 - p0 ); // rotating around p0, because center of mass = p0, because it's the only infinite-mass corner of this triangle
  289. float tri_v1x = flModelScale * tri.v1x;
  290. Vector2D tri_v2 = flModelScale * tri.v2;
  291. CSinCosRotation2D rotation( tri.w2 * DotProduct2D( tri_v2, basis.v2 ) + tri.w1 * ( tri_v1x * basis.v1x ), tri.w2 * CrossProductZ( tri_v2, basis.v2 ) );
  292. // we computed omega (sin, cos), apply it..
  293. p1 = p0 + basis.LocalXYToWorld( rotation.m_flCos * tri_v1x, rotation.m_flSin * tri_v1x );
  294. p2 = p0 + basis.LocalXYToWorld( rotation * tri_v2 );
  295. return 1.0f - rotation.m_flCos;
  296. }
  297. //----------------------------------------------------------------------------------------------------------
  298. float CFeModel::RelaxTri0( float flStiffness, float flModelScale, const FeTri_t &tri, VectorAligned &p0, VectorAligned &p1, VectorAligned &p2 )const
  299. {
  300. CFeTriBasis basis( p1 - p0, p2 - p0 ); // rotating around p0, because center of mass = p0, because it's the only infinite-mass corner of this triangle
  301. float tri_v1x = flModelScale * tri.v1x;
  302. Vector2D tri_v2 = flModelScale * tri.v2;
  303. Vector2D x0neg( basis.v1x * tri.w1 + basis.v2.x * tri.w2, basis.v2.y * tri.w2 ); // center of mass of the deformed triangle p0,p1,p2 in local coordinate system
  304. Vector2D r0neg( tri_v1x * tri.w1 + tri_v2.x * tri.w2, tri_v2.y * tri.w2 ); // center of mass of the target triangle configuration, in its coordinate system
  305. float w0 = 1.0f - ( tri.w1 + tri.w2 );
  306. Vector2D x2 = basis.v2 - x0neg, r2 = tri_v2 - r0neg;
  307. float x1x = basis.v1x - x0neg.x; // x1.y = -x0neg.y
  308. float r1x = tri_v1x - r0neg.x; // r1.y = -r0neg.y
  309. CSinCosRotation2D rotation(
  310. tri.w2 * DotProduct2D( x2, r2 ) + tri.w1 * ( x1x * r1x ) + w0 * ( x0neg.x * r0neg.x ) + ( tri.w1 + w0 ) * ( x0neg.y * r0neg.y ),
  311. tri.w2 * CrossProductZ( r2, x2 ) - tri.w1 * CrossProductZ( r1x, r0neg.y, x1x, x0neg.y ) + w0 * CrossProductZ( r0neg, x0neg )
  312. );
  313. #ifdef _DEBUG
  314. Vector2D x0 = -x0neg, x1 = Vector2D( basis.v1x, 0 ) + x0;
  315. Vector2D r0 = -r0neg, r1 = Vector2D( tri_v1x, 0 ) + r0;
  316. CSinCosRotation2D rotation2(
  317. tri.w2 * DotProduct2D( x2, r2 ) + tri.w1 * DotProduct2D( x1, r1 ) + w0 * DotProduct2D( x0, r0 ),
  318. tri.w2 * CrossProductZ( r2, x2 ) + tri.w1 * CrossProductZ( r1, x1 ) + w0 * CrossProductZ( r0, x0 )
  319. );
  320. Assert( Diff( rotation, rotation2 ) < 1e-4f );
  321. Vector2D y0 = rotation * r0, y1 = rotation * r1, y2 = rotation * r2;
  322. CSinCosRotation2D rotation3(
  323. tri.w2 * DotProduct2D( y2, x2 ) + tri.w1 * DotProduct2D( y1, x1 ) + w0 * DotProduct2D( y0, x0 ),
  324. tri.w2 * CrossProductZ( y2, x2 ) + tri.w1 * CrossProductZ( y1, x1 ) + w0 * CrossProductZ( y0, x0 )
  325. );
  326. Assert( fabsf( rotation3.m_flSin ) < 1e-5f );
  327. #endif
  328. Vector2D dX0 = x0neg - rotation * r0neg;
  329. // we computed omega (sin, cos), apply it..
  330. p0 += basis.LocalXYToWorld( dX0 );
  331. p1 = p0 + basis.LocalXYToWorld( rotation.m_flCos * tri_v1x, rotation.m_flSin * tri_v1x );
  332. p2 = p0 + basis.LocalXYToWorld( rotation * tri_v2 );
  333. return 1.0f - rotation.m_flCos;
  334. }
  335. //----------------------------------------------------------------------------------------------------------
  336. float CFeModel::RelaxQuad2( float flStiffness, float flModelScale, const FeQuad_t &quad, const VectorAligned &p0, const VectorAligned &p1, VectorAligned &p2, VectorAligned &p3 )const
  337. {
  338. Vector vCoM = ( p0 + p1 ) * 0.5f;
  339. CFeBasis basis( p1 - p0, p2 + p3 - 2 * p0 ); // p1-p0 is the best choice for the axis, because we're rotating around it ; Should be synced up with Builder:AdjustQuads
  340. // Numerical stability note: if we choose cross product or some other nice sounding but insanely arbitrary axis to rotate around here,
  341. // not only is it wrong, it'd also make it impossible to do a square element with diagonal static and diagonal dynamic elements,
  342. // because the order of vertices around this quad would not be natural (the diagonal vertices have to go first for the solve because that's our convention here)
  343. // so p2-p0 and p3-p1 tentative axes would be parallel, producing a very unstable solve (since we don't solve full Wahba problem, but just finding an approximate solution;
  344. // although in this very special case of RelaxQuad2, because we make the problem essentially 2D, we solve pretty close)
  345. // assume center of mass = p0, because it doesn't matter where it is along the line p0..p1
  346. // find omega, refer to wahba1d.nb. We only have points 2 and 3 to compute and move, and even that only in the plane YZ (X is fixed, as we rotate around axis X)
  347. Vector x2 = p2 - vCoM, x3 = p3 - vCoM;
  348. Vector2D vLocalP2 = basis.WorldToLocalYZ( x2 ), vLocalP3 = basis.WorldToLocalYZ( x3 );
  349. // Note: e x X = CrossProduct( vAxisX, (?, x1, x2) ) = (0, -x2, x1 ). Note that we rename YZ->XY for Vector2D conversion, so (-x2, x1) -> ( -p.y, p.x )
  350. float flMass2 = quad.vShape[ 2 ].w, flMass3 = quad.vShape[ 3 ].w;
  351. CSinCosRotation2D rotation( ( vLocalP2.x * quad.vShape[ 2 ].y + vLocalP2.y * quad.vShape[ 2 ].z ) * flMass2 + ( vLocalP3.x * quad.vShape[ 3 ].y + vLocalP3.y * quad.vShape[ 3 ].z ) * flMass3,
  352. ( vLocalP2.x * quad.vShape[ 2 ].z - vLocalP2.y * quad.vShape[ 2 ].y ) * flMass2 + ( vLocalP3.x * quad.vShape[ 3 ].z - vLocalP3.y * quad.vShape[ 3 ].y ) * flMass3 );
  353. Vector r2 = basis.LocalToWorld( quad.vShape[ 2 ].x, quad.vShape[ 2 ].y * rotation.GetCosine() - quad.vShape[ 2 ].z * rotation.GetSine(), quad.vShape[ 2 ].z * rotation.GetCosine() + quad.vShape[ 2 ].y * rotation.GetSine() );
  354. Vector r3 = basis.LocalToWorld( quad.vShape[ 3 ].x, quad.vShape[ 3 ].y * rotation.GetCosine() - quad.vShape[ 3 ].z * rotation.GetSine(), quad.vShape[ 3 ].z * rotation.GetCosine() + quad.vShape[ 3 ].y * rotation.GetSine() );
  355. float flError = ( r2 - x2 ).LengthSqr( ) + ( r3 - x3 ).LengthSqr( );
  356. p2 = vCoM + r2 * flModelScale;
  357. p3 = vCoM + r3 * flModelScale;
  358. return flError;
  359. }
  360. //----------------------------------------------------------------------------------------------------------
  361. float CFeModel::RelaxQuad1( float flStiffness, float flModelScale, const FeQuad_t &quad, const VectorAligned &p0, VectorAligned &p1, VectorAligned &p2, VectorAligned &p3 )const
  362. {
  363. Vector vCoM = p0;
  364. CFeBasis basis( p2 - p0, p3 - p1 ); // rotating around p0, because center of mass = p0, because it's the only infinite-mass corner of this quad; Should be synced up with Builder:AdjustQuads
  365. // find omega, refer to wahba.nb. We only have points 1, 2 and 3 to compute and move, but otherwise this is the same as the free-quad version
  366. Vector x1 = p1 - vCoM, x2 = p2 - vCoM, x3 = p3 - vCoM;
  367. Vector r1 = basis.LocalToWorld( quad.vShape[ 1 ] * flModelScale ), r2 = basis.LocalToWorld( quad.vShape[ 2 ] * flModelScale ), r3 = basis.LocalToWorld( quad.vShape[ 3 ] * flModelScale );
  368. float m1 = quad.vShape[ 1 ].w, m2 = quad.vShape[ 2 ].w, m3 = quad.vShape[ 3 ].w;
  369. // refer to wahba.nb
  370. Vector rhs = -(m1 * CrossProduct( x1, r1 ) + m2 * CrossProduct( x2, r2 ) + m3 * CrossProduct( x3, r3 ) );
  371. CovMatrix3 cov;
  372. cov.InitForWahba( m1, x1 );
  373. cov.AddForWahba( m2, x2 );
  374. cov.AddForWahba( m3, x3 );
  375. Cholesky3x3_t chol( cov.m_vDiag.x, cov.m_flXY, cov.m_vDiag.y, cov.m_flXZ, cov.m_flYZ, cov.m_vDiag.z );
  376. CSinCosRotation rotation( chol.Solve( rhs ) );
  377. // yay! we computed omega (sin), apply it..
  378. r1 = rotation * r1; r2 = rotation * r2; r3 = rotation * r3;
  379. float flError = ( r1 - x1 ).LengthSqr( ) + ( r2 - x2 ).LengthSqr( ) + ( r3 - x3 ).LengthSqr( );
  380. p1 = r1 + vCoM; p2 = r2 + vCoM; p3 = r3 + vCoM;
  381. return flError;
  382. }
  383. //----------------------------------------------------------------------------------------------------------
  384. // total theoretical cost is ~300 flops, practically taking 670 ticks (170/single quad) which when SIMDized can be 300 ticks to solve 4 quads, or 8 quads when using AVX
  385. // this is as fast as solving 12 distance constraints, but the quad is solved precisely every time
  386. // considering that a quad requires at least 6 distance constraints to make it rigid, and 2 passes won't solve it precisely, it's a prety good deal
  387. float CFeModel::RelaxQuad0( float flStiffness, float flModelScale, const FeQuad_t &quad, VectorAligned &p0, VectorAligned &p1, VectorAligned &p2, VectorAligned &p3 )const
  388. {
  389. CFeBasis basis( p2 - p0, p3 - p1 ); // the agreed upon local frame of reference of the quad Finite Element. Should be synced up with Builder:AdjustQuads
  390. float m[ 4 ] = { quad.vShape[ 0 ].w, quad.vShape[ 1 ].w, quad.vShape[ 2 ].w, quad.vShape[ 3 ].w };
  391. Vector vCoM = p0 * m[ 0 ] + p1 * m[ 1 ] + p2 * m[ 2 ] + p3 * m[ 3 ]; // ~12 flops
  392. // find omega, refer to wahba.nb. We only have points 1, 2 and 3 to compute and move, but otherwise this is the same as the free-quad version
  393. Vector x[ 4 ] = { p0 - vCoM, p1 - vCoM, p2 - vCoM, p3 - vCoM }; // ~9 flops
  394. Vector r[ 4 ] = { basis.LocalToWorld( flModelScale * quad.vShape[ 0 ] ), basis.LocalToWorld( flModelScale * quad.vShape[ 1 ] ), basis.LocalToWorld( flModelScale * quad.vShape[ 2 ] ), basis.LocalToWorld( flModelScale * quad.vShape[ 3 ] ) }; // ~36 flops
  395. float flError;
  396. // for ( int i = 0; i < 3; ++i )
  397. {
  398. #ifdef _DEBUG
  399. Vector q[ 4 ] = { basis.WorldToLocal( x[ 0 ] ), basis.WorldToLocal( x[ 1 ] ), basis.WorldToLocal( x[ 2 ] ), basis.WorldToLocal( x[ 3 ] ) }; NOTE_UNUSED( q );
  400. #endif
  401. // refer to wahba.nb
  402. Vector rhs = - ( m[ 0 ] * CrossProduct( x[ 0 ], r[ 0 ] ) + m[ 1 ] * CrossProduct( x[ 1 ], r[ 1 ] ) + m[ 2 ] * CrossProduct( x[ 2 ], r[ 2 ] ) + m[ 3 ] * CrossProduct( x[ 3 ], r[ 3 ] ) ); // ~27 flops
  403. // ~84 flops to sum up the covariance matrix
  404. CovMatrix3 cov;
  405. cov.InitForWahba( m[ 0 ], x[ 0 ] );
  406. cov.AddForWahba( m[ 1 ], x[ 1 ] );
  407. cov.AddForWahba( m[ 2 ], x[ 2 ] );
  408. cov.AddForWahba( m[ 3 ], x[ 3 ] );
  409. Cholesky3x3_t chol( cov.m_vDiag.x, cov.m_flXY, cov.m_vDiag.y, cov.m_flXZ, cov.m_flYZ, cov.m_vDiag.z ); // ~30 flops in SIMD
  410. CSinCosRotation rotation( chol.Solve( rhs ) ); // ~20 flops to solve and init rotation
  411. // yay! we computed omega (sin), apply it..
  412. r[ 0 ] = rotation * r[ 0 ]; r[ 1 ] = rotation * r[ 1 ]; r[ 2 ] = rotation * r[ 2 ]; r[ 3 ] = rotation * r[ 3 ];
  413. #ifdef _DEBUG
  414. Vector s[ 4 ] = { basis.WorldToLocal( r[ 0 ] ), basis.WorldToLocal( r[ 1 ] ), basis.WorldToLocal( r[ 2 ] ), basis.WorldToLocal( r[ 3 ] ) }; NOTE_UNUSED( s );
  415. #endif
  416. flError = ( r[ 0 ] - x[ 0 ] ).LengthSqr() + ( r[ 1 ] - x[ 1 ] ).LengthSqr() + ( r[ 2 ] - x[ 2 ] ).LengthSqr() + ( r[ 3 ] - x[ 3 ] ).LengthSqr();
  417. } // ~24 flops
  418. p0 = r[ 0 ] + vCoM; p1 = r[ 1 ] + vCoM; p2 = r[ 2 ] + vCoM; p3 = r[ 3 ] + vCoM; // 33 flops
  419. return flError;
  420. }
  421. // ~121 flops
  422. float CFeModel::RelaxQuad0flat( float flStiffness, float flModelScale, const FeQuad_t &quad, VectorAligned &p0, VectorAligned &p1, VectorAligned &p2, VectorAligned &p3 )const
  423. {
  424. CFeBasis basis( p2 - p0, p3 - p1 ); // ~9 flops; the agreed upon local frame of reference of the quad Finite Element
  425. float m[ 4 ] = { quad.vShape[ 0 ].w, quad.vShape[ 1 ].w, quad.vShape[ 2 ].w, quad.vShape[ 3 ].w };
  426. Vector vCoM = p0 * m[ 0 ] + p1 * m[ 1 ] + p2 * m[ 2 ] + p3 * m[ 3 ]; // ~12 flops
  427. // find omega, refer to wahba.nb. We only have points 1, 2 and 3 to compute and move, but otherwise this is the same as the free-quad version
  428. Vector2D x[ 4 ] = { basis.WorldToLocalXY( p0 - vCoM ), basis.WorldToLocalXY( p1 - vCoM ), basis.WorldToLocalXY( p2 - vCoM ), basis.WorldToLocalXY( p3 - vCoM ) }; // ~24 flops
  429. // refer to wahba.nb
  430. CSinCosRotation2D rotation( m[ 0 ] * DotProduct( quad.vShape[ 0 ], x[ 0 ] ) + m[ 1 ] * DotProduct( quad.vShape[ 1 ], x[ 1 ] ) + m[ 2 ] * DotProduct( quad.vShape[ 2 ], x[ 2 ] ) + m[ 3 ] * DotProduct( quad.vShape[ 3 ], x[ 3 ] ) ,
  431. m[ 0 ] * CrossProductZ( quad.vShape[ 0 ], x[ 0 ] ) + m[ 1 ] * CrossProductZ( quad.vShape[ 1 ], x[ 1 ] ) + m[ 2 ] * CrossProductZ( quad.vShape[ 2 ], x[ 2 ] ) + m[ 3 ] * CrossProductZ( quad.vShape[ 3 ], x[ 3 ] ) ); // flops
  432. basis.UnrotateXY( rotation ); // ~12 flops
  433. Vector r[ 4 ] = { basis.LocalToWorld( flModelScale * quad.vShape[ 0 ].AsVector3D( ) ), basis.LocalToWorld( flModelScale * quad.vShape[ 1 ].AsVector3D( ) ), basis.LocalToWorld( flModelScale * quad.vShape[ 2 ].AsVector3D( ) ), basis.LocalToWorld( flModelScale * quad.vShape[ 3 ].AsVector3D( ) ) }; // ~40 flops
  434. p0 = r[ 0 ] + vCoM; p1 = r[ 1 ] + vCoM; p2 = r[ 2 ] + vCoM; p3 = r[ 3 ] + vCoM; // 12 flops
  435. return 1.0f - rotation.m_flCos; // I really should scale it by the quad's perimeter or something...
  436. }
  437. //----------------------------------------------------------------------------------------------------------
  438. fltx4 CFeModel::RelaxSimdQuad2( const fltx4& fl4Stiffness, const fltx4 &fl4ModelScale, const FeSimdQuad_t &quad, fltx4 *pPos )const
  439. {
  440. FourVectors p0, p1, p2, p3;
  441. LOAD_NODES( p0, quad.nNode[ 0 ] );
  442. LOAD_NODES( p1, quad.nNode[ 1 ] );
  443. LOAD_NODES( p2, quad.nNode[ 2 ] );
  444. LOAD_NODES( p3, quad.nNode[ 3 ] );
  445. FourVectors vCoM = ( p0 + p1 ) * Four_PointFives;
  446. CFeSimdBasis basis( p1 - p0, p2 + p3 - ( p0 + p0 ) ); // p1-p0 is the best choice for the axis, because we're rotating around it
  447. // assume center of mass = p0, because it doesn't matter where it is along the line p0..p1
  448. // find omega, refer to wahba1d.nb. We only have points 2 and 3 to compute and move, and even that only in the plane YZ (X is fixed, as we rotate around axis X)
  449. FourVectors x2 = p2 - vCoM, x3 = p3 - vCoM;
  450. FourVectorsYZ vLocalP2 = basis.WorldToLocalYZ( x2 ), vLocalP3 = basis.WorldToLocalYZ( x3 );
  451. // Note: e x X = CrossProduct( vAxisX, (?, x1, x2) ) = (0, -x2, x1 ). Note that we rename YZ->XY for Vector2D conversion, so (-x2, x1) -> ( -p.y, p.x )
  452. fltx4 flMass2 = quad.f4Weights[ 2 ], flMass3 = quad.f4Weights[ 3 ];
  453. CSimdSinCosRotation2D rotation( ( vLocalP2.y * quad.vShape[ 2 ].y + vLocalP2.z * quad.vShape[ 2 ].z ) * flMass2 + ( vLocalP3.y * quad.vShape[ 3 ].y + vLocalP3.z * quad.vShape[ 3 ].z ) * flMass3,
  454. ( vLocalP2.y * quad.vShape[ 2 ].z - vLocalP2.z * quad.vShape[ 2 ].y ) * flMass2 + ( vLocalP3.y * quad.vShape[ 3 ].z - vLocalP3.z * quad.vShape[ 3 ].y ) * flMass3 );
  455. FourVectors r2 = basis.LocalToWorld( quad.vShape[ 2 ].x, quad.vShape[ 2 ].y * rotation.GetCosine( ) - quad.vShape[ 2 ].z * rotation.GetSine( ), quad.vShape[ 2 ].z * rotation.GetCosine( ) + quad.vShape[ 2 ].y * rotation.GetSine( ) );
  456. FourVectors r3 = basis.LocalToWorld( quad.vShape[ 3 ].x, quad.vShape[ 3 ].y * rotation.GetCosine( ) - quad.vShape[ 3 ].z * rotation.GetSine( ), quad.vShape[ 3 ].z * rotation.GetCosine( ) + quad.vShape[ 3 ].y * rotation.GetSine( ) );
  457. fltx4 flError = ( r2 - x2 ).LengthSqr( ) + ( r3 - x3 ).LengthSqr( );
  458. fltx4 wr = fl4ModelScale * fl4Stiffness, wx = Four_Ones - fl4Stiffness;
  459. p2 = vCoM + r2 * wr + x2 * wx;
  460. p3 = vCoM + r3 * wr + x3 * wx;
  461. AssertDbg( IsAllGreaterThan( Four_MaxWorldSize, p0.Length( ) + p1.Length( ) + p2.Length( ) + p3.Length( ) ) );
  462. SAVE_NODES( p2, quad.nNode[ 2 ] );
  463. SAVE_NODES( p3, quad.nNode[ 3 ] );
  464. return flError;
  465. }
  466. //----------------------------------------------------------------------------------------------------------
  467. fltx4 CFeModel::RelaxSimdQuad1( const fltx4& fl4Stiffness, const fltx4 &fl4ModelScale, const FeSimdQuad_t &quad, fltx4 *pPos )const
  468. {
  469. FourVectors p0, p1, p2, p3;
  470. LOAD_NODES( p0, quad.nNode[ 0 ] );
  471. LOAD_NODES( p1, quad.nNode[ 1 ] );
  472. LOAD_NODES( p2, quad.nNode[ 2 ] );
  473. LOAD_NODES( p3, quad.nNode[ 3 ] );
  474. const FourVectors &vCoM = p0;
  475. CFeSimdBasis basis( p2 - p0, p3 - p1 ); // rotating around p0, because center of mass = p0, because it's the only infinite-mass corner of this quad
  476. // find omega, refer to wahba.nb. We only have points 1, 2 and 3 to compute and move, but otherwise this is the same as the free-quad version
  477. FourVectors x1 = p1 - vCoM, x2 = p2 - vCoM, x3 = p3 - vCoM;
  478. FourVectors r1 = basis.LocalToWorld( fl4ModelScale * quad.vShape[ 1 ] ), r2 = basis.LocalToWorld( fl4ModelScale * quad.vShape[ 2 ] ), r3 = basis.LocalToWorld( fl4ModelScale * quad.vShape[ 3 ] );
  479. fltx4 m1 = quad.f4Weights[ 1 ], m2 = quad.f4Weights[ 2 ], m3 = quad.f4Weights[ 3 ];
  480. // refer to wahba.nb
  481. FourVectors rhs = m1 * CrossProduct( x1, r1 ) + m2 * CrossProduct( x2, r2 ) + m3 * CrossProduct( x3, r3 );
  482. FourCovMatrices3 cov;
  483. cov.InitForWahba( m1, x1 );
  484. cov.AddForWahba( m2, x2 );
  485. cov.AddForWahba( m3, x3 );
  486. SimdCholesky3x3_t chol( cov.m_vDiag.x, cov.m_flXY, cov.m_vDiag.y, cov.m_flXZ, cov.m_flYZ, cov.m_vDiag.z );
  487. CSimdSinCosRotation rotation( AndSIMD( chol.Solve( rhs ), chol.GetValidMask() ) );
  488. // yay! we computed omega (sin), apply it..
  489. r1 = rotation.Unrotate( r1 ); r2 = rotation.Unrotate( r2 ); r3 = rotation.Unrotate( r3 );
  490. fltx4 flError = ( r1 - x1 ).LengthSqr( ) + ( r2 - x2 ).LengthSqr( ) + ( r3 - x3 ).LengthSqr( );
  491. p1 = r1 + vCoM; p2 = r2 + vCoM; p3 = r3 + vCoM;
  492. AssertDbg( IsAllGreaterThan( Four_MaxWorldSize, p0.Length( ) + p1.Length( ) + p2.Length( ) + p3.Length( ) ) );
  493. SAVE_NODES( p1, quad.nNode[ 1 ] );
  494. SAVE_NODES( p2, quad.nNode[ 2 ] );
  495. SAVE_NODES( p3, quad.nNode[ 3 ] );
  496. return flError;
  497. }
  498. fltx4 CFeModel::RelaxSimdQuad0( const fltx4& flStiffness, const fltx4 &fl4ModelScale, const FeSimdQuad_t &quad, fltx4 *pPos )const
  499. {
  500. FourVectors p0, p1, p2, p3;
  501. LOAD_NODES( p0, quad.nNode[ 0 ] );
  502. LOAD_NODES( p1, quad.nNode[ 1 ] );
  503. LOAD_NODES( p2, quad.nNode[ 2 ] );
  504. LOAD_NODES( p3, quad.nNode[ 3 ] );
  505. CFeSimdBasis basis( p2 - p0, p3 - p1 ); // the agreed upon local frame of reference of the quad Finite Element
  506. const fltx4 *m = quad.f4Weights;
  507. FourVectors vCoM = p0 * m[ 0 ] + p1 * m[ 1 ] + p2 * m[ 2 ] + p3 * m[ 3 ]; // ~12 flops
  508. // find omega, refer to wahba.nb. We only have points 1, 2 and 3 to compute and move, but otherwise this is the same as the free-quad version
  509. FourVectors x[ 4 ] = { p0 - vCoM, p1 - vCoM, p2 - vCoM, p3 - vCoM }; // ~9 flops
  510. FourVectors r[ 4 ] = { basis.LocalToWorld( fl4ModelScale * quad.vShape[ 0 ] ), basis.LocalToWorld( fl4ModelScale * quad.vShape[ 1 ] ), basis.LocalToWorld( fl4ModelScale * quad.vShape[ 2 ] ), basis.LocalToWorld( fl4ModelScale * quad.vShape[ 3 ] ) }; // ~48 flops
  511. fltx4 fl4Error;
  512. // for ( int i = 0; i < 3; ++i )
  513. {
  514. #ifdef _DEBUG
  515. //FourVectors q[ 4 ] = { basis.WorldToLocal( x[ 0 ] ), basis.WorldToLocal( x[ 1 ] ), basis.WorldToLocal( x[ 2 ] ), basis.WorldToLocal( x[ 3 ] ) }; NOTE_UNUSED( q );
  516. #endif
  517. // refer to wahba.nb
  518. FourVectors rhs = ( m[ 0 ] * CrossProduct( x[ 0 ], r[ 0 ] ) + m[ 1 ] * CrossProduct( x[ 1 ], r[ 1 ] ) + m[ 2 ] * CrossProduct( x[ 2 ], r[ 2 ] ) + m[ 3 ] * CrossProduct( x[ 3 ], r[ 3 ] ) ); // ~27 flops
  519. // ~84 flops to sum up the covariance matrix
  520. FourCovMatrices3 cov;
  521. cov.InitForWahba( m[ 0 ], x[ 0 ] );
  522. cov.AddForWahba( m[ 1 ], x[ 1 ] );
  523. cov.AddForWahba( m[ 2 ], x[ 2 ] );
  524. cov.AddForWahba( m[ 3 ], x[ 3 ] );
  525. SimdCholesky3x3_t chol( cov.m_vDiag.x, cov.m_flXY, cov.m_vDiag.y, cov.m_flXZ, cov.m_flYZ, cov.m_vDiag.z ); // ~30 flops in SIMD
  526. CSimdSinCosRotation rotation( AndSIMD( chol.Solve( rhs ), chol.GetValidMask( ) ) ); // ~20 + 7 flops to solve and init rotation + mask out invalid columns
  527. // yay! we computed omega (sin), apply it..
  528. r[ 0 ] = rotation.Unrotate( r[ 0 ] ); r[ 1 ] = rotation.Unrotate( r[ 1 ] ); r[ 2 ] = rotation.Unrotate( r[ 2 ] ); r[ 3 ] = rotation.Unrotate( r[ 3 ] );
  529. fl4Error = ( r[ 0 ] - x[ 0 ] ).LengthSqr( ) + ( r[ 1 ] - x[ 1 ] ).LengthSqr( ) + ( r[ 2 ] - x[ 2 ] ).LengthSqr( ) + ( r[ 3 ] - x[ 3 ] ).LengthSqr( );
  530. #ifdef _DEBUG
  531. //FourVectors s[ 4 ] = { basis.WorldToLocal( r[ 0 ] ), basis.WorldToLocal( r[ 1 ] ), basis.WorldToLocal( r[ 2 ] ), basis.WorldToLocal( r[ 3 ] ) }; NOTE_UNUSED( s );
  532. //const fltx4 fl4MaxExpectedError = { 64, 64, 64, 64 };
  533. //AssertDbg( IsAllGreaterThan( fl4MaxExpectedError, fl4Error ) );
  534. #endif
  535. } // ~24 flops
  536. fltx4 flUnStiffness = Four_Ones - flStiffness;
  537. p0 = r[ 0 ] * flStiffness + x[ 0 ] * flUnStiffness + vCoM;
  538. p1 = r[ 1 ] * flStiffness + x[ 1 ] * flUnStiffness + vCoM;
  539. p2 = r[ 2 ] * flStiffness + x[ 2 ] * flUnStiffness + vCoM;
  540. p3 = r[ 3 ] * flStiffness + x[ 3 ] * flUnStiffness + vCoM; // 33 flops
  541. AssertDbg( IsAllGreaterThan( Four_MaxWorldSize, p0.Length( ) + p1.Length( ) + p2.Length( ) + p3.Length() ) );
  542. SAVE_NODES( p0, quad.nNode[ 0 ] );
  543. SAVE_NODES( p1, quad.nNode[ 1 ] );
  544. SAVE_NODES( p2, quad.nNode[ 2 ] );
  545. SAVE_NODES( p3, quad.nNode[ 3 ] );
  546. return fl4Error;
  547. }
  548. FORCEINLINE fltx4 DotProduct( const FourVectors &v1, const FourVectors2D &v2 )
  549. {
  550. return v1.x * v2.x + v1.y * v2.y;
  551. }
  552. fltx4 CFeModel::RelaxSimdQuad0flat( const fltx4& flStiffness, const fltx4 &fl4ModelScale, const FeSimdQuad_t &quad, fltx4 *pPos )const
  553. {
  554. FourVectors p0, p1, p2, p3;
  555. LOAD_NODES( p0, quad.nNode[ 0 ] );
  556. LOAD_NODES( p1, quad.nNode[ 1 ] );
  557. LOAD_NODES( p2, quad.nNode[ 2 ] );
  558. LOAD_NODES( p3, quad.nNode[ 3 ] );
  559. CFeSimdBasis basis( p2 - p0, p3 - p1 ); // the agreed upon local frame of reference of the quad Finite Element
  560. FourVectors vCoM = p0 * quad.f4Weights[ 0 ] + p1 * quad.f4Weights[ 1 ] + p2 * quad.f4Weights[ 2 ] + p3 * quad.f4Weights[ 3 ]; // ~12 flops
  561. FourVectors2D x[ 4 ] = { basis.WorldToLocalXY( p0 - vCoM ), basis.WorldToLocalXY( p1 - vCoM ), basis.WorldToLocalXY( p2 - vCoM ), basis.WorldToLocalXY( p3 - vCoM ) }; // ~24 flops
  562. // refer to wahba.nb
  563. CSimdSinCosRotation2D rotation(
  564. quad.f4Weights[ 0 ] * DotProduct( quad.vShape[ 0 ], x[ 0 ] ) + quad.f4Weights[ 1 ] * DotProduct( quad.vShape[ 1 ], x[ 1 ] ) + quad.f4Weights[ 2 ] * DotProduct( quad.vShape[ 2 ], x[ 2 ] ) + quad.f4Weights[ 3 ] * DotProduct( quad.vShape[ 3 ], x[ 3 ] ),
  565. quad.f4Weights[ 0 ] * CrossProductZ( quad.vShape[ 0 ], x[ 0 ] ) + quad.f4Weights[ 1 ] * CrossProductZ( quad.vShape[ 1 ], x[ 1 ] ) + quad.f4Weights[ 2 ] * CrossProductZ( quad.vShape[ 2 ], x[ 2 ] ) + quad.f4Weights[ 3 ] * CrossProductZ( quad.vShape[ 3 ], x[ 3 ] )
  566. ); // ~flops
  567. basis.UnrotateXY( rotation ); // ~12 flops
  568. FourVectors r[ 4 ] = { basis.LocalToWorld( fl4ModelScale * quad.vShape[ 0 ] ), basis.LocalToWorld( fl4ModelScale * quad.vShape[ 1 ] ), basis.LocalToWorld( fl4ModelScale * quad.vShape[ 2 ] ), basis.LocalToWorld( fl4ModelScale * quad.vShape[ 3 ] ) }; // ~36 flops
  569. p0 = r[ 0 ] + vCoM; p1 = r[ 1 ] + vCoM; p2 = r[ 2 ] + vCoM; p3 = r[ 3 ] + vCoM; // 33 flops
  570. AssertDbg( IsAllGreaterThan( Four_MaxWorldSize, p0.Length( ) + p1.Length( ) + p2.Length( ) + p3.Length( ) ) );
  571. SAVE_NODES( p0, quad.nNode[ 0 ] );
  572. SAVE_NODES( p1, quad.nNode[ 1 ] );
  573. SAVE_NODES( p2, quad.nNode[ 2 ] );
  574. SAVE_NODES( p3, quad.nNode[ 3 ] );
  575. return Four_Ones - rotation.m_fl4Cos;
  576. }
  577. //----------------------------------------------------------------------------------------------------------
  578. fltx4 CFeModel::RelaxSimdTri2( const fltx4& flStiffness, const fltx4 &fl4ModelScale, const FeSimdTri_t &tri, fltx4 *pPos )const
  579. {
  580. FourVectors p0, p1, p2;
  581. LOAD_NODES( p0, tri.nNode[ 0 ] );
  582. LOAD_NODES( p1, tri.nNode[ 1 ] );
  583. LOAD_NODES( p2, tri.nNode[ 2 ] );
  584. CFeSimdTriBasis basis( p1 - p0, p2 - p0 ); // p1-p0 is the best choice for the axis, because we're rotating around it
  585. p2 = p0 + basis.LocalXYToWorld( fl4ModelScale * ( tri.v2.x + Four_PointFives * ( basis.v1x - tri.v1x ) ), fl4ModelScale * tri.v2.y );
  586. AssertDbg( IsAllGreaterThan( Four_MaxWorldSize, p0.Length( ) + p1.Length( ) + p2.Length( ) ) );
  587. SAVE_NODES( p2, tri.nNode[ 2 ] );
  588. return Four_Zeros;
  589. }
  590. FORCEINLINE fltx4 CrossProductZ( const fltx4& v1x, const fltx4& v1y, const fltx4& v2x, const fltx4& v2y )
  591. {
  592. return v1x * v2y - v1y * v2x;
  593. }
  594. FORCEINLINE fltx4 DotProduct2D( const FourVectors2D &v1, const FourVectors2D &v2 )
  595. {
  596. return v1.x * v2.x + v1.y * v2.y;
  597. }
  598. FORCEINLINE fltx4 CrossProductZ( const FourVectors2D &v1, const FourVectors2D &v2 )
  599. {
  600. return v1.x * v2.y - v1.y * v2.x;
  601. }
  602. //----------------------------------------------------------------------------------------------------------
  603. fltx4 CFeModel::RelaxSimdTri1( const fltx4& flStiffness, const fltx4 &fl4ModelScale, const FeSimdTri_t &tri, fltx4 *pPos )const
  604. {
  605. FourVectors p0, p1, p2;
  606. LOAD_NODES( p0, tri.nNode[ 0 ] );
  607. LOAD_NODES( p1, tri.nNode[ 1 ] );
  608. LOAD_NODES( p2, tri.nNode[ 2 ] );
  609. CFeSimdTriBasis basis( p1 - p0, p2 - p0 ); // rotating around p0, because center of mass = p0, because it's the only infinite-mass corner of this triangle
  610. CSimdSinCosRotation2D rotation( tri.w2 * DotProduct2D( tri.v2, basis.v2 ) + tri.w1 * ( tri.v1x * basis.v1x ), tri.w2 * CrossProductZ( tri.v2, basis.v2 ) );
  611. // we computed omega (sin, cos), apply it..
  612. p1 = p0 + basis.LocalXYToWorld( fl4ModelScale * ( rotation.m_fl4Cos * tri.v1x ), fl4ModelScale * ( rotation.m_fl4Sin * tri.v1x ) );
  613. p2 = p0 + basis.LocalXYToWorld( fl4ModelScale * ( rotation * tri.v2 ) );
  614. AssertDbg( IsAllGreaterThan( Four_MaxWorldSize, p0.Length( ) + p1.Length( ) + p2.Length( ) ) );
  615. SAVE_NODES( p1, tri.nNode[ 1 ] );
  616. SAVE_NODES( p2, tri.nNode[ 2 ] );
  617. return Four_Ones - rotation.m_fl4Cos;
  618. }
  619. //----------------------------------------------------------------------------------------------------------
  620. fltx4 CFeModel::RelaxSimdTri0( const fltx4& flStiffness, const fltx4 &fl4ModelScale, const FeSimdTri_t &tri, fltx4 *pPos )const
  621. {
  622. FourVectors p0, p1, p2;
  623. LOAD_NODES( p0, tri.nNode[ 0 ] );
  624. LOAD_NODES( p1, tri.nNode[ 1 ] );
  625. LOAD_NODES( p2, tri.nNode[ 2 ] );
  626. fltx4 tri_v1x = fl4ModelScale * tri.v1x;
  627. FourVectors2D tri_v2 = fl4ModelScale * tri.v2;
  628. CFeSimdTriBasis basis( p1 - p0, p2 - p0 );
  629. FourVectors2D x0neg( basis.v1x * tri.w1 + basis.v2.x * tri.w2, basis.v2.y * tri.w2 ); // center of mass of the deformed triangle p0,p1,p2 in local coordinate system
  630. FourVectors2D r0neg( tri_v1x * tri.w1 + tri_v2.x * tri.w2, tri_v2.y * tri.w2 ); // center of mass of the target triangle configuration, in its coordinate system
  631. fltx4 w0 = Four_Ones - ( tri.w1 + tri.w2 );
  632. FourVectors2D x2 = basis.v2 - x0neg, r2 = tri_v2 - r0neg;
  633. fltx4 x1x = basis.v1x - x0neg.x; // x1.y = -x0neg.y
  634. fltx4 r1x = tri_v1x - r0neg.x; // r1.y = -r0neg.y
  635. CSimdSinCosRotation2D rotation(
  636. tri.w2 * DotProduct2D( x2, r2 ) + tri.w1 * ( x1x * r1x ) + w0 * ( x0neg.x * r0neg.x ) + ( tri.w1 + w0 ) * ( x0neg.y * r0neg.y ),
  637. tri.w2 * CrossProductZ( r2, x2 ) - tri.w1 * CrossProductZ( r1x, r0neg.y, x1x, x0neg.y ) + w0 * CrossProductZ( r0neg, x0neg )
  638. );
  639. FourVectors2D dX0 = x0neg - rotation * r0neg;
  640. // we computed omega (sin, cos), apply it..
  641. p0 += basis.LocalXYToWorld( dX0 );
  642. p1 = p0 + basis.LocalXYToWorld( rotation.m_fl4Cos * tri_v1x, rotation.m_fl4Sin * tri_v1x );
  643. p2 = p0 + basis.LocalXYToWorld( rotation * tri_v2 );
  644. AssertDbg( IsAllGreaterThan( Four_MaxWorldSize, p0.Length( ) + p1.Length( ) + p2.Length() ) );
  645. SAVE_NODES( p0, tri.nNode[ 0 ] );
  646. SAVE_NODES( p1, tri.nNode[ 1 ] );
  647. SAVE_NODES( p2, tri.nNode[ 2 ] );
  648. return Four_Ones - rotation.m_fl4Cos;
  649. }
  650. //----------------------------------------------------------------------------------------------------------
  651. void CFeModel::RelaxBend( VectorAligned *pPos, float flStiffness )const
  652. {
  653. for ( uint nEdge = 0; nEdge < m_nAxialEdgeCount; ++nEdge )
  654. {
  655. const FeAxialEdgeBend_t& edge = m_pAxialEdges[ nEdge ];
  656. VectorAligned &p0 = pPos[ edge.nNode[ 0 ] ], &p1 = pPos[ edge.nNode[ 1 ] ], &p2 = pPos[ edge.nNode[ 2 ] ], &p3 = pPos[ edge.nNode[ 3 ] ], &p4 = pPos[ edge.nNode[ 4 ] ], &p5 = pPos[ edge.nNode[ 5 ] ];
  657. Vector fe = p0 * ( 1.0f - edge.te ) + p1 * edge.te;
  658. float etvHalf = edge.tv * 0.5f;
  659. Vector fv = ( p2 + p3 ) * ( 0.5f - etvHalf ) + ( p4 + p5 ) * etvHalf;
  660. // alternative axis
  661. Vector vAxis = fv - fe;
  662. float flAxis = vAxis.Length( );
  663. Vector vEdge = p1 - p0, vVirtualEdge = ( p4 + p5 ) - ( p2 + p3 );
  664. Vector vCrossEdges = CrossProduct( vEdge, vVirtualEdge );
  665. float flCorrection;
  666. if ( flAxis > 0.001f ) // this is the upper bound on the distance between edges; I hope edges themselves are never this short, so we can consider them almost-intersecting if they're this close and fallback
  667. {
  668. float flAdjDist = DotProduct( vAxis, vCrossEdges ) > 0 ? -edge.flDist : edge.flDist; // -1.0f: detect flipped edge, flip it back
  669. flCorrection = 1.0f + flAdjDist / flAxis;
  670. }
  671. else
  672. {
  673. // recover from intersecting edges; not necessary for high curvatures if there are rigid rods that enforce some bend, but crucial to maintain low or zero angles of curvature
  674. // generally, rods are better for high curvature and useless for low; bends are better for low curvature and useless for high
  675. float flCrossEdges = vCrossEdges.Length();
  676. if ( flCrossEdges > FLT_EPSILON )
  677. {
  678. vAxis = vCrossEdges;
  679. flCorrection = edge.flDist / flCrossEdges;
  680. }
  681. else
  682. {
  683. vAxis.z = 1; // degenerate case, should probably recover anyway when polygon shape recovers
  684. flCorrection = edge.flDist;
  685. }
  686. }
  687. Vector vDelta = ( flStiffness * flCorrection ) * vAxis;
  688. p0 += vDelta * edge.flWeight[ 0 ];
  689. p1 += vDelta * edge.flWeight[ 1 ];
  690. Vector dw2 = vDelta * edge.flWeight[ 2 ];
  691. // Careful! The same weight will be applied to the same node twice sometimes here
  692. Vector p2new = p2 + dw2;
  693. Vector p3new = p3 + dw2;
  694. p2 = p2new;
  695. p3 = p3new;
  696. Vector dw3 = vDelta * edge.flWeight[ 3 ];
  697. Vector p4new = p4 + dw3;
  698. Vector p5new = p5 + dw3;
  699. p4 = p4new;
  700. p5 = p5new;
  701. }
  702. }
  703. void IntegrateSpring( const FeSpringIntegrator_t &integ, float flTimeStep, float flModelScale, VectorAligned &pos10, VectorAligned &pos11, VectorAligned &pos00, VectorAligned &pos01 )
  704. {
  705. Vector vDeltaOrigin1 = pos10 - pos11;
  706. Vector vDeltaOrigin0 = pos00 - pos01;
  707. Vector vDeltaVelDt = ( vDeltaOrigin1 - vDeltaOrigin0 );
  708. float flDistance = vDeltaOrigin1.Length( );
  709. if ( flDistance < 1.0f )
  710. {
  711. return; // nothing to integrate
  712. }
  713. Vector vSpringDir = vDeltaOrigin1 / flDistance;
  714. float flHTerm = ( flDistance - integ.flSpringRestLength * flModelScale ) * integ.flSpringConstant * flTimeStep;
  715. float flDTerm = ( vDeltaVelDt.Dot( vSpringDir ) * integ.flSpringDamping );
  716. //Vector vSpringDeltaVel = vSpringAcceleration * -( flHTerm + flDTerm );
  717. Vector vIntegral = vSpringDir * ( ( flHTerm + flDTerm ) * ( flTimeStep /** 0.5f*/ ) );
  718. // reference from source1 code:
  719. // if ( ( m_SpringType == CLOTH_SPRING_STRUCTURAL_HORIZONTAL || m_SpringType == CLOTH_SPRING_STRUCTURAL_VERTICAL ) && ( pParticle1->IsFixed( ) == true || pParticle2->IsFixed( ) == true ) )
  720. // {
  721. // m_vSpringForce *= 2.0f; // <--- this is why I'm multiplying by dt^2, not dt^2 / 2
  722. // }
  723. // all working springs are structural horizontal or vertical
  724. // SERGIY TEST: is the sign correct here? the cloth1 cloth uses spring force = -(HTerm+DTerm)*d
  725. pos00 -= vIntegral * integ.flNodeWeight0;
  726. pos01 += vIntegral * ( 1.0f - integ.flNodeWeight0 );
  727. }
  728. //----------------------------------------------------------------------------------------------------------
  729. // for integrating accelerations into positions, pass dt*dt/2 for subsequent verlet integration;
  730. // for integrating accelerations into velocities or velocities into positions, pass dt;
  731. void CFeModel::IntegrateSprings( VectorAligned *pPos0, VectorAligned *pPos1, float flTimeStep, float flModelScale ) const
  732. {
  733. for ( int i = 0; i < m_nSpringIntegratorCount; ++i )
  734. {
  735. const FeSpringIntegrator_t &integ = m_pSpringIntegrator[ i ];
  736. uint n0 = integ.nNode[ 0 ], n1 = integ.nNode[ 1 ];
  737. VectorAligned &pos10 = pPos1[ n0 ], &pos11 = pPos1[ n1 ];
  738. VectorAligned &pos00 = pPos0[ n0 ], &pos01 = pPos0[ n1 ];
  739. IntegrateSpring( integ, flTimeStep, flModelScale, pos10, pos11, pos00, pos01 );
  740. }
  741. }
  742. float CFeModel::ComputeElasticEnergyQuads( const VectorAligned *pPos, float flModelScale )const
  743. {
  744. float flStiffness = 1.0f, flSumEnergy = 0;
  745. #define QUAD_ENERGY(FUNC) \
  746. const FeQuad_t &quad = m_pQuads[ i ]; \
  747. VectorAligned pos[ 4 ] = { pPos[ quad.nNode[ 0 ] ], pPos[ quad.nNode[ 1 ] ], pPos[ quad.nNode[ 2 ] ], pPos[ quad.nNode[ 3 ] ] }; \
  748. FUNC( flStiffness, flModelScale, quad, pos[ 0 ], pos[ 1 ], pos[ 2 ], pos[ 3 ] ); \
  749. for ( int j = 0; j < 4; ++j ) \
  750. if ( m_pNodeInvMasses[ quad.nNode[ j ] ] > 0 ) \
  751. flSumEnergy += ( pos[ j ] - pPos[ quad.nNode[ j ] ] ).LengthSqr( ) * 0.5f / m_pNodeInvMasses[ quad.nNode[ j ] ];
  752. // first the simplest case of quads hanging on the edge 0-1: this is a 2D case with 2 moving nodes and only 1 DOF (rotation around edge 0-1)
  753. for ( uint i = 0; i < m_nQuadCount[ 2 ]; ++i )
  754. {
  755. QUAD_ENERGY( RelaxQuad2 );
  756. }
  757. for ( uint i = m_nQuadCount[ 2 ]; i < m_nQuadCount[ 1 ]; ++i )
  758. {
  759. QUAD_ENERGY( RelaxQuad1 );
  760. }
  761. for ( uint i = m_nQuadCount[ 1 ]; i < m_nQuadCount[ 0 ]; ++i )
  762. {
  763. QUAD_ENERGY( RelaxQuad0 );
  764. }
  765. #undef QUAD_ENERGY
  766. return flSumEnergy;
  767. }
  768. float CFeModel::ComputeElasticEnergyRods( const VectorAligned *pPos, float flModelScale )const
  769. {
  770. float flSumEnergy = 0;
  771. //CMicroProfilerGuard mpg( &g_GlobalStats[ PhysicsGlobalProfiler::RelaxRods ], m_nRodCount - 1 );
  772. for ( uint i = 0; i < m_nRodCount; ++i )
  773. {
  774. const FeRodConstraint_t &c = m_pRods[ i ];
  775. uint n0 = c.nNode[ 0 ], n1 = c.nNode[ 1 ];
  776. VectorAligned a = pPos[ n0 ], b = pPos[ n1 ];
  777. RelaxRod( c, flModelScale, a, b );
  778. if ( m_pNodeInvMasses[ n0 ] > 0 )
  779. flSumEnergy += ( a - pPos[ n0 ] ).LengthSqr( ) * 0.5f / m_pNodeInvMasses[ n0 ];
  780. if ( m_pNodeInvMasses[ n1 ] > 0 )
  781. flSumEnergy += ( b - pPos[ n1 ] ).LengthSqr( ) * 0.5f / m_pNodeInvMasses[ n1 ];
  782. }
  783. return flSumEnergy;
  784. }
  785. float CFeModel::ComputeElasticEnergySprings( const VectorAligned *pPos0, const VectorAligned *pPos1, float flTimeStep, float flModelScale ) const
  786. {
  787. float flSumEnergy = 0, dtSqr = flTimeStep;
  788. for ( int i = 0; i < m_nSpringIntegratorCount; ++i )
  789. {
  790. const FeSpringIntegrator_t &integ = m_pSpringIntegrator[ i ];
  791. uint n0 = integ.nNode[ 0 ], n1 = integ.nNode[ 1 ];
  792. VectorAligned pos10 = pPos1[ n0 ], pos11 = pPos1[ n1 ];
  793. VectorAligned pos00 = pPos0[ n0 ], pos01 = pPos0[ n1 ];
  794. IntegrateSpring( integ, flTimeStep, flModelScale, pos10, pos11, pos00, pos01 );
  795. if ( m_pNodeInvMasses[ n0 ] > 0 )
  796. flSumEnergy += ( pos10 - pPos1[ n0 ] ).LengthSqr( ) / ( m_pNodeInvMasses[ n0 ] * dtSqr );
  797. if ( m_pNodeInvMasses[ n1 ] > 0)
  798. flSumEnergy += ( pos11 - pPos1[ n1 ] ).LengthSqr( ) / ( m_pNodeInvMasses[ n1 ] * dtSqr );
  799. }
  800. return flSumEnergy;
  801. }
  802. int CFeModel::GetComplexity( )const
  803. {
  804. return
  805. ( m_nNodeCount - m_nStaticNodes ) * ( m_pNodeIntegrator ? 6 : 5 ) + m_nStaticNodes +
  806. m_nSimdQuadCount[ 0 ] * 20 +
  807. m_nSimdRodCount * 5 +
  808. m_nSimdTriCount[ 0 ] * 10 +
  809. m_nSimdSpringIntegratorCount * 3;
  810. }
  811. uint CFeModel::ComputeCollisionTreeDepthTopDown() const
  812. {
  813. CUtlVector< uint16 > levels;
  814. return ComputeCollisionTreeDepthTopDown( levels );
  815. }
  816. uint CFeModel::ComputeCollisionTreeDepthTopDown( CUtlVector< uint16 > &levels ) const
  817. {
  818. if ( GetDynamicNodeCount() > 0 )
  819. {
  820. levels.SetCount( GetDynamicNodeCount() * 2 - 1 );
  821. return ComputeCollisionTreeDepthTopDown( levels.Base() );
  822. }
  823. else
  824. {
  825. return 0;
  826. }
  827. }
  828. uint CFeModel::ComputeCollisionTreeDepthTopDown( uint16 *pLevels ) const
  829. {
  830. if ( !m_pTreeChildren )
  831. return 0;
  832. const uint nDynCount = GetDynamicNodeCount();
  833. const uint nInvalidLevel = 0xFFFF;
  834. if ( IsDebug() )
  835. {
  836. for ( int nCluster = 2 * nDynCount - 1; nCluster-- > 0; )
  837. {
  838. pLevels[ nCluster ] = nInvalidLevel;
  839. }
  840. }
  841. uint nLevel = 0;
  842. pLevels[ 2 * nDynCount - 2 ] = nLevel; // top
  843. for ( int nParent = nDynCount - 1; nParent-- > 0; )
  844. {
  845. const FeTreeChildren_t &children = m_pTreeChildren[ nParent ];
  846. uint nNewLevel = 1 + pLevels[ nParent + nDynCount ];
  847. pLevels[ children.nChild[ 0 ] ] = pLevels[ children.nChild[ 1 ] ] = nNewLevel;
  848. nLevel = Max( nNewLevel, nLevel );
  849. }
  850. if ( IsDebug() )
  851. {
  852. // we should've filled out everything
  853. for ( int nCluster = 2 * nDynCount - 1; nCluster-- > 0; )
  854. {
  855. Assert( pLevels[ nCluster ] != nInvalidLevel );
  856. }
  857. for ( uint i = 0; i < 2 * nDynCount - 2; ++i )
  858. {
  859. Assert( pLevels[ i ] == 1 + pLevels[ m_pTreeParents[ i ] ] );
  860. }
  861. }
  862. return nLevel;
  863. }
  864. uint CFeModel::ComputeCollisionTreeHeightBottomUp( CUtlVector< uint16 > &levels ) const
  865. {
  866. if ( GetDynamicNodeCount() > 1 )
  867. {
  868. levels.SetCount( GetDynamicNodeCount() - 1 );
  869. return ComputeCollisionTreeDepthTopDown( levels.Base() );
  870. }
  871. else
  872. {
  873. return 0;
  874. }
  875. }
  876. uint CFeModel::ComputeCollisionTreeHeightBottomUp() const
  877. {
  878. CUtlVector< uint16 > levels;
  879. return ComputeCollisionTreeHeightBottomUp( levels );
  880. }
  881. uint CFeModel::ComputeCollisionTreeHeightBottomUp( uint16 *pLevels ) const
  882. {
  883. if ( !m_pTreeParents )
  884. return 0;
  885. const uint nDynCount = GetDynamicNodeCount();
  886. for ( uint i = nDynCount - 1; i-- > 0; )
  887. {
  888. pLevels[ i ] = 1; // all clusters have at least level 1
  889. }
  890. uint nLevel = 0;
  891. for ( uint i = 0; i < nDynCount - 2; ++i )
  892. {
  893. AssertDbg( m_pTreeParents[ i + nDynCount ] > i + nDynCount && m_pTreeParents[ i + nDynCount ] < 2 * nDynCount - 1 ); // nodes and clusters go strictly child-to-parent
  894. uint16 &refParentLevel = pLevels[ m_pTreeParents[ i + nDynCount ] - nDynCount ];
  895. uint nChildLevel = pLevels[ i ];
  896. refParentLevel = Max< uint >( refParentLevel, nChildLevel + 1 );
  897. nLevel = Max< uint >( refParentLevel, nLevel );
  898. }
  899. if ( IsDebug() )
  900. {
  901. for ( uint i = nDynCount - 1; i-- > 0; )
  902. {
  903. const FeTreeChildren_t &tc = m_pTreeChildren[ i ];
  904. uint nChildLevel[ 2 ];
  905. for ( uint j = 0; j < 2; ++j )
  906. {
  907. nChildLevel[ j ] = tc.nChild[ j ] < nDynCount ? 0 : pLevels[ tc.nChild[ j ] - nDynCount ];
  908. }
  909. Assert( pLevels[ i ] == Max( nChildLevel[ 0 ], nChildLevel[ 1 ] ) + 1 );
  910. }
  911. }
  912. return nLevel;
  913. }
  914. // Careful! pDynPos is the array of DYNAMIC nodes, excluding m_nStaticNodes static nodes at the beginning of the array (don't need static positions to compute dynamic collision)
  915. // tree parents are defined for each dynamic node, and for each compound node. There are always nDynCount - 1 compound nodes. AABB is always defined only for compound nodes.
  916. void CFeModel::ComputeCollisionTreeBounds( const fltx4 *pDynPos, FeAabb_t *pClusters )const
  917. {
  918. const uint nDynCount = GetDynamicNodeCount();
  919. for ( uint i = nDynCount - 1; i-- > 0; )
  920. {
  921. pClusters[ i ].m_vMaxBounds = -Four_FLT_MAX;
  922. pClusters[ i ].m_vMinBounds = Four_FLT_MAX;
  923. }
  924. if ( m_pNodeCollisionRadii )
  925. {
  926. for ( uint i = 0; i < nDynCount; ++i )
  927. {
  928. uint nParent = m_pTreeParents[ i ] - nDynCount;
  929. Assert( nParent < nDynCount - 1 );
  930. pClusters[ nParent ].AddCenterAndExtents( pDynPos[ i ], ReplicateX4( m_pNodeCollisionRadii[ i ] ) );
  931. }
  932. }
  933. else
  934. {
  935. for ( uint i = 0; i < nDynCount; ++i )
  936. {
  937. uint nParent = m_pTreeParents[ i ] - nDynCount;
  938. Assert( nParent < nDynCount - 1 );
  939. pClusters[ nParent ] |= pDynPos[ i ];
  940. }
  941. }
  942. for ( uint i = 0; i < nDynCount - 2; ++i )
  943. {
  944. uint nParent = m_pTreeParents[ i + nDynCount ] - nDynCount;
  945. Assert( nParent < nDynCount - 1 );
  946. pClusters[ nParent ] |= pClusters[ i ];
  947. }
  948. }
  949. float CFeModel::ComputeCollisionTreeBoundsError( const fltx4 *pDynPos, const FeAabb_t *pClusters )const
  950. {
  951. const uint nDynCount = GetDynamicNodeCount();
  952. fltx4 f4Error = Four_Zeros;
  953. if ( m_pNodeCollisionRadii )
  954. {
  955. for ( uint i = 0; i < nDynCount; ++i )
  956. {
  957. uint nParent = m_pTreeParents[ i ] - nDynCount;
  958. Assert( nParent < nDynCount - 1 );
  959. f4Error = MaxSIMD( f4Error, pClusters[ nParent ].GetDistVector( pDynPos[ i ] - ReplicateX4( m_pNodeCollisionRadii[ i ] ) ) );
  960. f4Error = MaxSIMD( f4Error, pClusters[ nParent ].GetDistVector( pDynPos[ i ] + ReplicateX4( m_pNodeCollisionRadii[ i ] ) ) );
  961. }
  962. }
  963. else
  964. {
  965. for ( uint i = 0; i < nDynCount; ++i )
  966. {
  967. uint nParent = m_pTreeParents[ i ] - nDynCount;
  968. Assert( nParent < nDynCount - 1 );
  969. f4Error = MaxSIMD( f4Error, pClusters[ nParent ].GetDistVector( pDynPos[ i ] ) );
  970. }
  971. }
  972. for ( uint i = 0; i < nDynCount - 2; ++i )
  973. {
  974. uint nParent = m_pTreeParents[ i + nDynCount ] - nDynCount;
  975. Assert( nParent < nDynCount - 1 );
  976. f4Error = MaxSIMD( f4Error, pClusters[ nParent ].GetDistVector( pClusters[ i ].m_vMinBounds ) );
  977. f4Error = MaxSIMD( f4Error, pClusters[ nParent ].GetDistVector( pClusters[ i ].m_vMaxBounds ) );
  978. }
  979. return SubFloat( SqrtSIMD( Dot3SIMD( f4Error, f4Error ) ), 0 );
  980. }
  981. uint CFeModel::ComputeCollisionTreeNodeCount( uint16 *pCounts ) const
  982. {
  983. uint nDynCount = GetDynamicNodeCount();
  984. for ( uint i = 0; i < nDynCount - 1; ++i )
  985. {
  986. pCounts[ i ] = 0;
  987. }
  988. for ( uint i = 0; i < nDynCount; ++i )
  989. {
  990. uint nParent = m_pTreeParents[ i ] - nDynCount;
  991. AssertDbg( nParent < nDynCount - 1 );
  992. pCounts[ nParent ]++;
  993. }
  994. for ( uint i = nDynCount; i < 2 * nDynCount - 2; ++i )
  995. {
  996. uint nParent = m_pTreeParents[ i ] - nDynCount;
  997. AssertDbg( nParent < nDynCount - 1 );
  998. pCounts[ nParent ] += pCounts[ i - nDynCount ];
  999. }
  1000. AssertDbg( nDynCount == pCounts[ nDynCount - 2 ] );
  1001. return nDynCount;
  1002. }
  1003. void CFeModel::ApplyAirDrag( VectorAligned *pPos0, const VectorAligned *pPos1, float flExpDrag, float flVelDrag )
  1004. {
  1005. fltx4 f4ExpDrag = ReplicateX4( flExpDrag ), f4VelDrag = ReplicateX4( flVelDrag );
  1006. for ( uint i = m_nStaticNodes; i < m_nNodeCount; ++i )
  1007. {
  1008. fltx4 pos0 = LoadAlignedSIMD( pPos0[ i ].Base() ), pos1 = LoadAlignedSIMD( pPos1[ i ].Base() );
  1009. fltx4 dvdt = pos1 - pos0, lenSqr = Dot3SIMD( dvdt, dvdt );
  1010. fltx4 add = dvdt * MinSIMD( f4ExpDrag + f4VelDrag * SqrtEstSIMD( lenSqr ), Four_Ones );
  1011. StoreAlignedSIMD( pPos0[ i ].Base(), pos0 + add );
  1012. }
  1013. }
  1014. FORCEINLINE FourVectors ComputeQuadAirDrag( const FourVectors &vAn, const FourVectors &d, const fltx4 &f4ExpDrag, const fltx4 &f4VelDrag, const fltx4 &lenSqInvAn )
  1015. {
  1016. //return MinSIMD( AbsSIMD( DotProduct( vAxisZ, q0 - p0 ) * f4VelDrag ), Four_Ones ) * ( q0 - p0 );
  1017. fltx4 An_d = DotProduct( vAn, d );
  1018. return ( MinSIMD( Four_Ones, f4ExpDrag + AbsSIMD( An_d ) * f4VelDrag ) * An_d * lenSqInvAn ) * vAn;
  1019. }
  1020. void CFeModel::ApplyQuadAirDrag( fltx4 *pPos0, const fltx4 *pPos1, float flExpDrag, float flVelDrag )
  1021. {
  1022. fltx4 f4ExpDrag = ReplicateX4( flExpDrag ), f4VelDrag = ReplicateX4( flVelDrag );
  1023. for ( uint nQuad = m_nSimdQuadCount[ 1 ]; nQuad < m_nSimdQuadCount[ 0 ]; ++nQuad )
  1024. {
  1025. const FeSimdQuad_t &quad = m_pSimdQuads[ nQuad ];
  1026. FourVectors p0, p1, p2, p3, q0, q1, q2, q3;
  1027. LOAD_NODES_POS( pPos0, p0, quad.nNode[ 0 ] );
  1028. LOAD_NODES_POS( pPos0, p1, quad.nNode[ 1 ] );
  1029. LOAD_NODES_POS( pPos0, p2, quad.nNode[ 2 ] );
  1030. LOAD_NODES_POS( pPos0, p3, quad.nNode[ 3 ] );
  1031. LOAD_NODES_POS( pPos1, q0, quad.nNode[ 0 ] );
  1032. LOAD_NODES_POS( pPos1, q1, quad.nNode[ 1 ] );
  1033. LOAD_NODES_POS( pPos1, q2, quad.nNode[ 2 ] );
  1034. LOAD_NODES_POS( pPos1, q3, quad.nNode[ 3 ] );
  1035. FourVectors vAn = CrossProduct( p2 - p0, p3 - p1);
  1036. fltx4 lenSqAn = MaxSIMD( vAn.LengthSqr(), Four_2ToTheMinus30 );
  1037. fltx4 lenSqInvAn = ReciprocalEstSIMD( lenSqAn );
  1038. // fltx4 f4ScaledVelDrag = f4VelDrag * f4AxisZlenInv;
  1039. // fltx4 f4AxisZlen = f4AxisZlenSqr * f4AxisZlenInv;
  1040. p0 += ComputeQuadAirDrag( vAn, q0 - p0, f4ExpDrag, f4VelDrag, lenSqInvAn );
  1041. p1 += ComputeQuadAirDrag( vAn, q1 - p1, f4ExpDrag, f4VelDrag, lenSqInvAn );
  1042. p2 += ComputeQuadAirDrag( vAn, q2 - p2, f4ExpDrag, f4VelDrag, lenSqInvAn );
  1043. p3 += ComputeQuadAirDrag( vAn, q3 - p3, f4ExpDrag, f4VelDrag, lenSqInvAn );
  1044. SAVE_NODES_POS( pPos0, p0, quad.nNode[ 0 ] );
  1045. SAVE_NODES_POS( pPos0, p1, quad.nNode[ 1 ] );
  1046. SAVE_NODES_POS( pPos0, p2, quad.nNode[ 2 ] );
  1047. SAVE_NODES_POS( pPos0, p3, quad.nNode[ 3 ] );
  1048. }
  1049. }
  1050. void CFeModel::ApplyRodAirDrag( fltx4 *pPos0, const fltx4 *pPos1, float flExpDrag, float flVelDrag )
  1051. {
  1052. }
  1053. void CFeModel::ApplyQuadWind( VectorAligned *pPos1, const Vector &vWind, float flAirDrag )
  1054. {
  1055. for ( uint nQuad = 0; nQuad < m_nQuadCount[ 0 ]; ++nQuad )
  1056. {
  1057. const FeQuad_t &quad = m_pQuads[ nQuad ];
  1058. VectorAligned &p0 = pPos1[ quad.nNode[ 0 ] ], &p1 = pPos1[ quad.nNode[ 1 ] ], &p2 = pPos1[ quad.nNode[ 2 ] ], &p3 = pPos1[ quad.nNode[ 3 ] ];
  1059. Vector vArea = CrossProduct( p2 - p0, p3 - p1 );
  1060. float flArea = vArea.Length();
  1061. Vector vNormal = vArea / flArea;
  1062. float flNormalFlow = DotProduct( vNormal, vWind );
  1063. Vector vNormalFlow = flNormalFlow * vNormal;
  1064. Vector vTangentFlow = vWind - vNormalFlow;
  1065. Vector vNormalImpulse = vArea * flNormalFlow;// == vNormalFlow * flArea;
  1066. Vector vTangentImpulse = ( flAirDrag * flArea ) * vTangentFlow;
  1067. Vector vQuadImpulse = vNormalImpulse + vTangentImpulse;
  1068. p0 += quad.vShape[ 0 ].w * vQuadImpulse;
  1069. p1 += quad.vShape[ 1 ].w * vQuadImpulse;
  1070. p2 += quad.vShape[ 2 ].w * vQuadImpulse;
  1071. p3 += quad.vShape[ 3 ].w * vQuadImpulse;
  1072. }
  1073. }
  1074. void CFeModel::SmoothQuadVelocityField( fltx4 *pPos0, const fltx4 *pPos1, float flBlendFactor )
  1075. {
  1076. fltx4 f4MulOriginal = ReplicateX4( flBlendFactor ), f4MulTarget = Four_Ones - f4MulOriginal, f4MulDoubleTarget = Four_PointFives * f4MulTarget;
  1077. // with 2 points fixed, the target velocity is controlled by them only (conservation of momentum in massive-light particle interaction, assuming the 2 top particles have the same mass)
  1078. for ( uint nQuad = 0; nQuad < m_nSimdQuadCount[ 2 ]; ++nQuad )
  1079. {
  1080. const FeSimdQuad_t &quad = m_pSimdQuads[ nQuad ];
  1081. FourVectors p0, p1, p2, p3, q0, q1, q2, q3;
  1082. LOAD_NODES_POS( pPos0, p0, quad.nNode[ 0 ] );
  1083. LOAD_NODES_POS( pPos0, p1, quad.nNode[ 1 ] );
  1084. LOAD_NODES_POS( pPos0, p2, quad.nNode[ 2 ] );
  1085. LOAD_NODES_POS( pPos0, p3, quad.nNode[ 3 ] );
  1086. LOAD_NODES_POS( pPos1, q0, quad.nNode[ 0 ] );
  1087. LOAD_NODES_POS( pPos1, q1, quad.nNode[ 1 ] );
  1088. LOAD_NODES_POS( pPos1, q2, quad.nNode[ 2 ] );
  1089. LOAD_NODES_POS( pPos1, q3, quad.nNode[ 3 ] );
  1090. // note: this is negative velocitie (backward-time velocities)
  1091. FourVectors v0 = p0 - q0, v1 = p1 - q1, v2 = p2 - q2, v3 = p3 - q3;
  1092. FourVectors vPremulTarget = ( v0 + v1 ) * f4MulDoubleTarget;
  1093. p2 = q2 + v2 * f4MulOriginal + vPremulTarget;
  1094. p3 = q3 + v3 * f4MulOriginal + vPremulTarget;
  1095. SAVE_NODES_POS( pPos0, p2, quad.nNode[ 2 ] );
  1096. SAVE_NODES_POS( pPos0, p3, quad.nNode[ 3 ] );
  1097. }
  1098. // with 1 point fixed, the target velocity is that point's velocity (conservation of momentum in massive-light particle interaction)
  1099. for ( uint nQuad = m_nSimdQuadCount[ 2 ]; nQuad < m_nSimdQuadCount[ 1 ]; ++nQuad )
  1100. {
  1101. const FeSimdQuad_t &quad = m_pSimdQuads[ nQuad ];
  1102. FourVectors p0, p1, p2, p3, q0, q1, q2, q3;
  1103. LOAD_NODES_POS( pPos0, p0, quad.nNode[ 0 ] );
  1104. LOAD_NODES_POS( pPos0, p1, quad.nNode[ 1 ] );
  1105. LOAD_NODES_POS( pPos0, p2, quad.nNode[ 2 ] );
  1106. LOAD_NODES_POS( pPos0, p3, quad.nNode[ 3 ] );
  1107. LOAD_NODES_POS( pPos1, q0, quad.nNode[ 0 ] );
  1108. LOAD_NODES_POS( pPos1, q1, quad.nNode[ 1 ] );
  1109. LOAD_NODES_POS( pPos1, q2, quad.nNode[ 2 ] );
  1110. LOAD_NODES_POS( pPos1, q3, quad.nNode[ 3 ] );
  1111. // note: this is negative velocitie (backward-time velocities)
  1112. FourVectors v0 = p0 - q0, v1 = p1 - q1, v2 = p2 - q2, v3 = p3 - q3;
  1113. FourVectors vPremulTarget = v0 * f4MulTarget;
  1114. p1 = q1 + v1 * f4MulOriginal + vPremulTarget;
  1115. p2 = q2 + v2 * f4MulOriginal + vPremulTarget;
  1116. p3 = q3 + v3 * f4MulOriginal + vPremulTarget;
  1117. SAVE_NODES_POS( pPos0, p1, quad.nNode[ 1 ] );
  1118. SAVE_NODES_POS( pPos0, p2, quad.nNode[ 2 ] );
  1119. SAVE_NODES_POS( pPos0, p3, quad.nNode[ 3 ] );
  1120. }
  1121. // with no points fixed, we're conserving momentum
  1122. for ( uint nQuad = m_nSimdQuadCount[ 1 ]; nQuad < m_nSimdQuadCount[ 0 ]; ++nQuad )
  1123. {
  1124. const FeSimdQuad_t &quad = m_pSimdQuads[ nQuad ];
  1125. FourVectors p0, p1, p2, p3, q0, q1, q2, q3;
  1126. LOAD_NODES_POS( pPos0, p0, quad.nNode[ 0 ] );
  1127. LOAD_NODES_POS( pPos0, p1, quad.nNode[ 1 ] );
  1128. LOAD_NODES_POS( pPos0, p2, quad.nNode[ 2 ] );
  1129. LOAD_NODES_POS( pPos0, p3, quad.nNode[ 3 ] );
  1130. LOAD_NODES_POS( pPos1, q0, quad.nNode[ 0 ] );
  1131. LOAD_NODES_POS( pPos1, q1, quad.nNode[ 1 ] );
  1132. LOAD_NODES_POS( pPos1, q2, quad.nNode[ 2 ] );
  1133. LOAD_NODES_POS( pPos1, q3, quad.nNode[ 3 ] );
  1134. // note: this is negative velocitie (backward-time velocities)
  1135. FourVectors v0 = p0 - q0, v1 = p1 - q1, v2 = p2 - q2, v3 = p3 - q3;
  1136. const fltx4 *m = quad.f4Weights;
  1137. FourVectors vPremulTarget = ( v0 * m[ 0 ] + v1 * m[ 1 ] + v2 * m[ 2 ] + v3 * m[ 3 ] ) * f4MulTarget; // ~12 flops
  1138. p0 = q0 + v0 * f4MulOriginal + vPremulTarget;
  1139. p1 = q1 + v1 * f4MulOriginal + vPremulTarget;
  1140. p2 = q2 + v2 * f4MulOriginal + vPremulTarget;
  1141. p3 = q3 + v3 * f4MulOriginal + vPremulTarget;
  1142. SAVE_NODES_POS( pPos0, p0, quad.nNode[ 0 ] );
  1143. SAVE_NODES_POS( pPos0, p1, quad.nNode[ 1 ] );
  1144. SAVE_NODES_POS( pPos0, p2, quad.nNode[ 2 ] );
  1145. SAVE_NODES_POS( pPos0, p3, quad.nNode[ 3 ] );
  1146. }
  1147. }
  1148. void CFeModel::SmoothRodVelocityField( fltx4 *pPos0, const fltx4 *pPos1, float flBlendFactor )
  1149. {
  1150. fltx4 f4MulOriginal = ReplicateX4( flBlendFactor ), f4MulTarget = Four_Ones - f4MulOriginal;
  1151. for ( uint i = 0; i < m_nSimdRodCount; ++i )
  1152. {
  1153. const FeSimdRodConstraint_t &rod = m_pSimdRods[ i ];
  1154. AssertDbg(
  1155. rod.nNode[ 0 ][ 0 ] < m_nNodeCount && rod.nNode[ 1 ][ 0 ] < m_nNodeCount
  1156. && rod.nNode[ 0 ][ 1 ] < m_nNodeCount && rod.nNode[ 1 ][ 1 ] < m_nNodeCount
  1157. && rod.nNode[ 0 ][ 2 ] < m_nNodeCount && rod.nNode[ 1 ][ 2 ] < m_nNodeCount
  1158. && rod.nNode[ 0 ][ 3 ] < m_nNodeCount && rod.nNode[ 1 ][ 3 ] < m_nNodeCount
  1159. );
  1160. FourVectors p0, p1, q0, q1;
  1161. LOAD_NODES_POS( pPos0, p0, rod.nNode[ 0 ] );
  1162. LOAD_NODES_POS( pPos0, p1, rod.nNode[ 1 ] );
  1163. LOAD_NODES_POS( pPos1, q0, rod.nNode[ 0 ] );
  1164. LOAD_NODES_POS( pPos1, q1, rod.nNode[ 1 ] );
  1165. // this is conservation of momentum principle, and it works directly with m1 = 1-w0 because: invM1/(invM0+invM1) == m0/(m0+m1)
  1166. FourVectors v0 = p0 - q0, v1 = p1 - q1;
  1167. FourVectors vPremulTarget = ( v0 + ( v1 - v0 ) * rod.f4Weight0 ) * f4MulTarget;// v0 * ( Four_Ones - rod.f4Weight0 ) + v1 * rod.f4Weight0;
  1168. p0 = q0 + v0 * f4MulOriginal + vPremulTarget;
  1169. p1 = q1 + v1 * f4MulOriginal + vPremulTarget;
  1170. SAVE_NODES_POS( pPos0, p0, rod.nNode[ 0 ] );
  1171. SAVE_NODES_POS( pPos0, p1, rod.nNode[ 1 ] );
  1172. }
  1173. }
  1174. SVD::SymMatrix3< float > AsSymMatrix3( const CovMatrix3 &cov )
  1175. {
  1176. SVD::SymMatrix3< float > sym;
  1177. sym.m00() = cov.m_vDiag.x;
  1178. sym.m11() = cov.m_vDiag.y;
  1179. sym.m22() = cov.m_vDiag.z;
  1180. sym.m01() = cov.m_flXY;
  1181. sym.m02() = cov.m_flXZ;
  1182. sym.m12() = cov.m_flYZ;
  1183. return sym;
  1184. }
  1185. matrix3x4_t AsMatrix3x4( SVD::Matrix3< float > &m, const Vector &vOrigin )
  1186. {
  1187. matrix3x4_t res;
  1188. for ( int i = 0; i < 3; ++i )
  1189. for ( int j = 0; j < 3; ++j )
  1190. res.m_flMatVal[ i ][ j ] = m.m[ i ][ j ];
  1191. res.SetOrigin( vOrigin );
  1192. return res;
  1193. }
  1194. matrix3x4_t AsMatrix3x4_Transposed( SVD::Matrix3< float > &m, const Vector &vOrigin )
  1195. {
  1196. matrix3x4_t res;
  1197. for ( int i = 0; i < 3; ++i )
  1198. for ( int j = 0; j < 3; ++j )
  1199. res.m_flMatVal[ i ][ j ] = m.m[ j ][ i ];
  1200. res.SetOrigin( vOrigin );
  1201. return res;
  1202. }
  1203. CovMatrix3 CovMatrix3::GetPseudoInverse()
  1204. {
  1205. SVD::SymMatrix3< float > sym = AsSymMatrix3( *this );
  1206. SVD::SymMatrix3< float > pi = SVD::PseudoInverse( sym );
  1207. CovMatrix3 covPi;
  1208. covPi.m_vDiag.x = pi.m00();
  1209. covPi.m_vDiag.y = pi.m11();
  1210. covPi.m_vDiag.z = pi.m22();
  1211. covPi.m_flXY = pi.m01();
  1212. covPi.m_flYZ = pi.m12();
  1213. covPi.m_flXZ = pi.m02();
  1214. return covPi;
  1215. }
  1216. void CFeModel::FitCenters( fltx4 *pPos )const
  1217. {
  1218. uint nMatrix = 0, nRunningWeight = 0;
  1219. for ( ; nMatrix < m_nFitMatrixCount[ 2 ]; ++nMatrix )
  1220. {
  1221. //AssertDbg( nRunningWeight == fm.nBegin );
  1222. const FeFitMatrix_t &fm = m_pFitMatrices[ nMatrix ];
  1223. AssertDbg( nRunningWeight + 2 <= fm.nEnd );
  1224. const FeFitWeight_t *w = m_pFitWeights + nRunningWeight;
  1225. // weights don't matter, because we'll just use the line between the two nodes as a hinge
  1226. pPos[ fm.nNode ] = Four_PointFives * ( pPos[ w[ 0 ].nNode ] + pPos[ w[ 1 ].nNode ] );
  1227. nRunningWeight = fm.nEnd;
  1228. }
  1229. for ( ; nMatrix < m_nFitMatrixCount[ 1 ]; ++nMatrix )
  1230. {
  1231. //AssertDbg( nRunningWeight == fm.nBegin );
  1232. const FeFitMatrix_t &fm = m_pFitMatrices[ nMatrix ];
  1233. const FeFitWeight_t &w = m_pFitWeights[ nRunningWeight ];
  1234. pPos[ fm.nNode ] = pPos[ w.nNode ];// we'll hinge around this point
  1235. nRunningWeight = fm.nEnd;
  1236. }
  1237. for ( ; nMatrix < m_nFitMatrixCount[ 0 ]; ++nMatrix )
  1238. {
  1239. //AssertDbg( nRunningWeight == fm.nBegin );
  1240. const FeFitMatrix_t &fm = m_pFitMatrices[ nMatrix ];
  1241. const FeFitWeight_t *w = m_pFitWeights;
  1242. fltx4 v4Center = Four_Zeros;
  1243. fltx4 f4Sum = Four_Zeros;
  1244. uint nEndDominant = fm.nBeginDynamic == nRunningWeight ? fm.nEnd : fm.nBeginDynamic; // count only static nodes if there are any static nodes here
  1245. for ( ; nRunningWeight < nEndDominant; ++nRunningWeight )
  1246. {
  1247. fltx4 f4Weight = ReplicateX4( &w[ nRunningWeight ].flWeight );
  1248. f4Sum += f4Weight;
  1249. v4Center += pPos[ w[ nRunningWeight ].nNode ] * f4Weight;
  1250. }
  1251. pPos[ fm.nNode ] = v4Center * ReciprocalSIMD( f4Sum );// we'll hinge around this point
  1252. nRunningWeight = fm.nEnd; // go to the next
  1253. }
  1254. }
  1255. void CFeModel::FitTransforms( const VectorAligned *pPos, matrix3x4a_t *pOut )const
  1256. {
  1257. uint nMatrix = 0;
  1258. const FeFitWeight_t *pWeights = m_pFitWeights;
  1259. matrix3x4a_t tm;
  1260. /*
  1261. for ( ; nMatrix < m_nFitMatrixCount[ 2 ]; ++nMatrix )
  1262. {
  1263. const FeFitMatrix_t &fm = m_pFitMatrices[ nMatrix ];
  1264. const FeFitWeight_t *pWeightsEnd = m_pFitWeights + fm.nEnd;
  1265. //Vector vAxis = pPos[ pWeights[ 0 ].nNode ] - pPos[ pWeights[ 1 ].nNode ];
  1266. FitTransform( &tm, fm, pPos, pWeights + 2, pWeightsEnd );
  1267. pOut[ fm.nCtrl ] = tm * TransformMatrix( fm.bone );
  1268. pWeights = pWeightsEnd;
  1269. }
  1270. for ( ; nMatrix < m_nFitMatrixCount[ 1 ]; ++nMatrix )
  1271. {
  1272. const FeFitMatrix_t &fm = m_pFitMatrices[ nMatrix ];
  1273. const FeFitWeight_t *pWeightsEnd = m_pFitWeights + fm.nEnd;
  1274. AssertDbg( pWeights + 1 <= pWeightsEnd );
  1275. FitTransform( &tm, fm, pPos, pWeights + 1, pWeightsEnd );
  1276. pOut[ fm.nCtrl ] = tm * TransformMatrix( fm.bone );
  1277. pWeights = pWeightsEnd;
  1278. }
  1279. */
  1280. for ( ; nMatrix < m_nFitMatrixCount[ 0 ]; ++nMatrix )
  1281. {
  1282. const FeFitMatrix_t &fm = m_pFitMatrices[ nMatrix ];
  1283. const FeFitWeight_t *pWeightsEnd = m_pFitWeights + fm.nEnd;
  1284. FitTransform( &tm, fm, pPos, pWeights, pWeightsEnd );
  1285. pOut[ fm.nCtrl ] = tm * TransformMatrix( fm.bone );
  1286. pWeights = pWeightsEnd;
  1287. }
  1288. }
  1289. void CFeModel::FeedbackFitTransforms( VectorAligned *pPos, float flStiffness )const
  1290. {
  1291. uint nMatrix = 0;
  1292. matrix3x4a_t tm;
  1293. const FeFitWeight_t *pWeights = m_pFitWeights;
  1294. /*
  1295. for ( ; nMatrix < m_nFitMatrixCount[ 2 ]; ++nMatrix )
  1296. {
  1297. const FeFitMatrix_t &fm = m_pFitMatrices[ nMatrix ];
  1298. const FeFitWeight_t *pWeightsEnd = m_pFitWeights + fm.nEnd;
  1299. FitTransform( &tm, fm, pPos, pWeights + 2, pWeightsEnd );
  1300. FeedbackFitTransform( tm, fm, pPos, m_pFitWeights + fm.nBeginDynamic, pWeightsEnd, flStiffness );
  1301. pWeights = pWeightsEnd;
  1302. }
  1303. for ( ; nMatrix < m_nFitMatrixCount[ 1 ]; ++nMatrix )
  1304. {
  1305. const FeFitMatrix_t &fm = m_pFitMatrices[ nMatrix ];
  1306. const FeFitWeight_t *pWeightsEnd = m_pFitWeights + fm.nEnd;
  1307. AssertDbg( pWeights + 1 <= pWeightsEnd );
  1308. FitTransform( &tm, fm, pPos, pWeights + 1, pWeightsEnd );
  1309. FeedbackFitTransform( tm, fm, pPos, m_pFitWeights + fm.nBeginDynamic, pWeightsEnd, flStiffness );
  1310. pWeights = pWeightsEnd;
  1311. }
  1312. */
  1313. for ( ; nMatrix < m_nFitMatrixCount[ 0 ]; ++nMatrix )
  1314. {
  1315. const FeFitMatrix_t &fm = m_pFitMatrices[ nMatrix ];
  1316. const FeFitWeight_t *pWeightsEnd = m_pFitWeights + fm.nEnd;
  1317. FitTransform( &tm, fm, pPos, pWeights, pWeightsEnd );
  1318. FeedbackFitTransform( tm, fm, pPos, m_pFitWeights + fm.nBeginDynamic, pWeightsEnd, flStiffness );
  1319. pWeights = pWeightsEnd;
  1320. }
  1321. }
  1322. Vector GetColumn( const SVD::Matrix3< float > &m, int c )
  1323. {
  1324. return Vector( m.m[ 0 ][ c ], m.m[ 1 ][ c ], m.m[ 2 ][ c ] );
  1325. }
  1326. void CFeModel::FeedbackFitTransform( const matrix3x4a_t &tm, const FeFitMatrix_t &fm, VectorAligned *pPos, const FeFitWeight_t *pWeights, const FeFitWeight_t *pWeightsEnd, float flStiffness )const
  1327. {
  1328. for ( const FeFitWeight_t *p = pWeights; p < pWeightsEnd; ++p )
  1329. {
  1330. VectorAligned &refPos = pPos[ p->nNode ];
  1331. Vector vQ = m_pInitPose[ p->nNode ].m_vPosition - fm.vCenter;
  1332. refPos = refPos * ( 1.0f - flStiffness ) + VectorTransform( vQ, tm ) * flStiffness;
  1333. }
  1334. }
  1335. void CFeModel::FitTransform( matrix3x4a_t*pOut, const FeFitMatrix_t &fm, const VectorAligned *pPos, const FeFitWeight_t *pWeights, const FeFitWeight_t *pWeightsEnd )const
  1336. {
  1337. const VectorAligned &vDynCenter = pPos[ fm.nNode ];
  1338. SVD::Matrix3<float> Apq;
  1339. Apq.SetZero();
  1340. for ( const FeFitWeight_t *p = pWeights; p < pWeightsEnd; ++p )
  1341. {
  1342. Vector vP = pPos[ p->nNode ] - vDynCenter, vQ = m_pInitPose[ p->nNode ].m_vPosition - fm.vCenter;
  1343. float m = p->flWeight;
  1344. for ( int i = 0; i < 3; ++i )
  1345. for ( int j = 0; j < 3; ++j )
  1346. Apq.m[ i ][ j ] += vP[ i ] * vQ[ j ] * m;
  1347. };
  1348. SVD::SvdIterator<float> svd;
  1349. //SVD::Matrix3<float> a = Apq * SVD::Matrix3<float>( AsSymMatrix3( fm.AqqInv ) ); // optimal but skewed fitting matrix; Aqq is not really necessary except when we clamp the singular values (allow some stretch)
  1350. svd.Init( Apq );
  1351. svd.Iterate( 6, FLT_EPSILON ); // the epsilon is the sum of squares of sines of Givens rotations, I think it's a very good measure of quality in this case
  1352. SVD::Matrix3< float > v = svd.ComputeV();
  1353. // B = US = AV
  1354. SVD::Matrix3< float > us = Apq * v;
  1355. // columns of U*S matrix have length of singular values. Clamp them, negate the smallest if needed (if the matrix is mirror matrix)
  1356. // TODO: measure and maybe rewrite with SIMD, as this seems slow
  1357. // sort by singular values
  1358. int nLarge, nMedium, nSmall;
  1359. float flSmallAxisParity;
  1360. if ( svd.ata.m00() > svd.ata.m11() )
  1361. {
  1362. if ( svd.ata.m11() > svd.ata.m22() )
  1363. {
  1364. nLarge = 0; nMedium = 1; nSmall = 2; flSmallAxisParity = 1.0f;
  1365. }
  1366. else if ( svd.ata.m22() > svd.ata.m00() )
  1367. {
  1368. nLarge = 2; nMedium = 0; nSmall = 1; flSmallAxisParity = 1.0f;
  1369. }
  1370. else
  1371. {
  1372. nLarge = 0; nMedium = 2; nSmall = 1; flSmallAxisParity = -1.0f;
  1373. }
  1374. }
  1375. else
  1376. {
  1377. if ( svd.ata.m00() > svd.ata.m22() )
  1378. {
  1379. nLarge = 1; nMedium = 0; nSmall = 2; flSmallAxisParity = -1.0f;
  1380. }
  1381. else if ( svd.ata.m22() > svd.ata.m11() )
  1382. {
  1383. nLarge = 2; nMedium = 1; nSmall = 0; flSmallAxisParity = -1.0f;
  1384. }
  1385. else
  1386. {
  1387. nLarge = 1; nMedium = 2; nSmall = 0; flSmallAxisParity = 1.0f;
  1388. }
  1389. }
  1390. #if defined( _DEBUG ) && defined( DBGFLAG_ASSERT )
  1391. float s[ 3 ] = { svd.ata.m00(), svd.ata.m11(), svd.ata.m22() };
  1392. Assert( s[ nLarge ] >= s[ nMedium ] && s[ nMedium ] >= s[ nSmall ] );
  1393. #endif
  1394. matrix3x4a_t u;
  1395. Vector vMainAxis( us.m[0][nLarge], us.m[1][nLarge], us.m[2][nLarge ] );
  1396. float flMainAxisLength = vMainAxis.Length();
  1397. if ( flMainAxisLength < FLT_EPSILON )
  1398. {
  1399. // really, really small - very undefined matrix
  1400. pOut->SetToIdentity();
  1401. return;
  1402. }
  1403. vMainAxis /= flMainAxisLength;
  1404. u.SetColumn( vMainAxis, ( MatrixAxisType_t )nLarge );
  1405. Vector vMedAxis( us.m[0][nMedium], us.m[1][nMedium], us.m[2][nMedium ] );
  1406. vMedAxis -= DotProduct( vMainAxis, vMedAxis ) * vMainAxis;
  1407. float flMedAxisLength = vMedAxis.Length();
  1408. if( flMedAxisLength > FLT_EPSILON)
  1409. {
  1410. vMedAxis /= flMedAxisLength;
  1411. }
  1412. else
  1413. {
  1414. // fallback
  1415. vMedAxis = VectorPerpendicularToVector(vMainAxis);
  1416. }
  1417. u.SetColumn(vMedAxis, ( MatrixAxisType_t)nMedium );
  1418. // the third axis doesn't matter if we don't clamp, and we can only recover it up to the sign
  1419. u.SetColumn( CrossProduct( vMainAxis, vMedAxis ) * flSmallAxisParity, ( MatrixAxisType_t )nSmall );
  1420. u.SetOrigin( vec3_origin );
  1421. #if 0
  1422. // U matrix columns are the axes we need; some of them will be Zero
  1423. fltx4 uRow0 = LoadUnaligned3SIMD( us.m[ 0 ] );
  1424. fltx4 uRow1 = LoadUnaligned3SIMD( us.m[ 1 ] );
  1425. fltx4 uRow2 = LoadUnaligned3SIMD( us.m[ 2 ] );
  1426. fltx4 uLengthSqr = uRow0 * uRow0 + uRow1 * uRow1 + uRow2 * uRow2;
  1427. fltx4 uLengthInv = ReciprocalSqrtSIMD( MaxSIMD( uLengthSqr, Four_Epsilons ) );
  1428. // When/if we support non-orthouniform animation matrices, we should clamp the length, not set it to 1. That will make for much nicer softbody
  1429. matrix3x4a_t u;
  1430. u.SIMDRow( 0 ) = /*SetWToZeroSIMD*/( uRow0 * uLengthInv );
  1431. u.SIMDRow( 1 ) = /*SetWToZeroSIMD*/( uRow1 * uLengthInv );
  1432. u.SIMDRow( 2 ) = /*SetWToZeroSIMD*/( uRow2 * uLengthInv );
  1433. #ifdef _DEBUG
  1434. fltx4 sSqr = { svd.ata.m00(), svd.ata.m11(), svd.ata.m22(), 0.0f }; NOTE_UNUSED( sSqr );
  1435. fltx4 sInv = ReciprocalSqrtSIMD( MaxSIMD( sSqr, Four_Epsilons ) ); NOTE_UNUSED( sInv );// reciprocal estimate is actually fine here, but may produce slightly non-orthonormal matrices (error < 1%) which assert in a bunch of places
  1436. // ( SVD::RsqrtEst( Max( us.ColLenSqr( 0 ), 1e-30f ) ), SVD::RsqrtEst( Max( us.ColLenSqr( 1 ), 1e-30f ) ), SVD::RsqrtEst( Max( us.ColLenSqr( 2 ), 1e-30f ) ) );
  1437. // TODO: implement QR or some other recovery from near-0 singular values
  1438. #endif
  1439. float det = u.GetDeterminant();
  1440. if ( det < 0 )
  1441. {
  1442. // this is a mirror matrix;
  1443. int nColumn = AsVector( uLengthSqr ).SmallestComponent(); //smallest sigma, largest sigma^-1
  1444. u[ 0 ][ nColumn ] = -u[ 0 ][ nColumn ];
  1445. u[ 1 ][ nColumn ] = -u[ 1 ][ nColumn ];
  1446. u[ 2 ][ nColumn ] = -u[ 2 ][ nColumn ];
  1447. }
  1448. #endif
  1449. matrix3x4_t vt = AsMatrix3x4_Transposed( v, vec3_origin ), uvt;
  1450. ConcatTransforms( u, vt, uvt );
  1451. uvt.SetOrigin( vDynCenter );
  1452. *pOut = uvt;
  1453. }
  1454. void CFeModel::FitTransform2D( matrix3x4a_t *pOut, const FeFitMatrix_t &fm, const Vector &vAxis, const VectorAligned *pPos, const FeFitWeight_t *pWeights, const FeFitWeight_t *pWeightsEnd )const
  1455. {
  1456. FitTransform( pOut, fm, pPos, pWeights, pWeightsEnd );
  1457. /*
  1458. const VectorAligned &vDynCenter = pPos[ fm.nNode ];
  1459. class CFitTransform2D
  1460. {
  1461. public:
  1462. CFitTransform2D( const Vector &vAxis )
  1463. {
  1464. m_vAxisZ = vAxis.NormalizedSafe( Vector( 0, 0, 1 ) );
  1465. m_vAxisX = VectorPerpendicularToVector( m_vAxisZ );
  1466. m_vAxisY = CrossProduct( m_vAxisZ, m_vAxisX );
  1467. }
  1468. Vector2D ToLocalXY( const Vector &v ) { return Vector2D( DotProduct( m_vAxisX, v ), DotProduct( m_vAxisY, v ) ); }
  1469. Vector m_vAxisX;
  1470. Vector m_vAxisY;
  1471. Vector m_vAxisZ;
  1472. };
  1473. CFitTransform2D fitTm( vAxis );
  1474. float flCos = 0, flSin = 0;
  1475. for ( const FeFitWeight_t *p = pWeights; p < pWeightsEnd; ++p )
  1476. {
  1477. Vector2D p = fitTm.ToLocalXY( pPos[ p->nNode ] - vDynCenter ), q = fitTm.ToLocalXY( m_pInitPose[ p->nNode ].m_vPosition - fm.vCenter );
  1478. }
  1479. CSinCosRotation2D rotation( flCos, flSin );
  1480. */
  1481. }