Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

229 lines
7.9 KiB

  1. //----------------------------------------------------------------------------------------------------------
  2. #define LOAD_NODES_POS( POS, V, IDX ) { \
  3. fltx4 _pos0 = POS[ IDX[ 0 ] ], _pos1 = POS[ IDX[ 1 ] ], _pos2 = POS[ IDX[ 2 ] ], _pos3 = POS[ IDX[ 3 ] ]; \
  4. __m128 tmp3, tmp2, tmp1, tmp0; \
  5. tmp0 = _mm_shuffle_ps( ( _pos0 ), ( _pos1 ), 0x44 ); \
  6. tmp2 = _mm_shuffle_ps( ( _pos0 ), ( _pos1 ), 0xEE ); \
  7. tmp1 = _mm_shuffle_ps( ( _pos2 ), ( _pos3 ), 0x44 ); \
  8. tmp3 = _mm_shuffle_ps( ( _pos2 ), ( _pos3 ), 0xEE ); \
  9. V.x = _mm_shuffle_ps( tmp0, tmp1, 0x88 ); \
  10. V.y = _mm_shuffle_ps( tmp0, tmp1, 0xDD ); \
  11. V.z = _mm_shuffle_ps( tmp2, tmp3, 0x88 ); \
  12. }
  13. #define LOAD_NODES( V, IDX ) { \
  14. fltx4 _pos0 = pPos[ IDX[ 0 ] ], _pos1 = pPos[ IDX[ 1 ] ], _pos2 = pPos[ IDX[ 2 ] ], _pos3 = pPos[ IDX[ 3 ] ]; \
  15. __m128 tmp3, tmp2, tmp1, tmp0; \
  16. tmp0 = _mm_shuffle_ps( ( _pos0 ), ( _pos1 ), 0x44 ); \
  17. tmp2 = _mm_shuffle_ps( ( _pos0 ), ( _pos1 ), 0xEE ); \
  18. tmp1 = _mm_shuffle_ps( ( _pos2 ), ( _pos3 ), 0x44 ); \
  19. tmp3 = _mm_shuffle_ps( ( _pos2 ), ( _pos3 ), 0xEE ); \
  20. V.x = _mm_shuffle_ps( tmp0, tmp1, 0x88 ); \
  21. V.y = _mm_shuffle_ps( tmp0, tmp1, 0xDD ); \
  22. V.z = _mm_shuffle_ps( tmp2, tmp3, 0x88 ); \
  23. }
  24. #define SAVE_NODES_POS( POS, V, IDX ) { \
  25. __m128 tmp3, tmp2, tmp1, tmp0; \
  26. tmp0 = _mm_shuffle_ps( V.x, V.y, 0x44 ); \
  27. tmp2 = _mm_shuffle_ps( V.x, V.y, 0xEE ); \
  28. tmp1 = _mm_shuffle_ps( V.z, Four_Zeros, 0x44 ); \
  29. tmp3 = _mm_shuffle_ps( V.z, Four_Zeros, 0xEE ); \
  30. POS[ IDX[ 0 ] ] = _mm_shuffle_ps( tmp0, tmp1, 0x88 ); \
  31. POS[ IDX[ 1 ] ] = _mm_shuffle_ps( tmp0, tmp1, 0xDD ); \
  32. POS[ IDX[ 2 ] ] = _mm_shuffle_ps( tmp2, tmp3, 0x88 ); \
  33. POS[ IDX[ 3 ] ] = _mm_shuffle_ps( tmp2, tmp3, 0xDD ); \
  34. }
  35. #define SAVE_NODES( V, IDX ) { \
  36. __m128 tmp3, tmp2, tmp1, tmp0; \
  37. tmp0 = _mm_shuffle_ps( V.x, V.y, 0x44 ); \
  38. tmp2 = _mm_shuffle_ps( V.x, V.y, 0xEE ); \
  39. tmp1 = _mm_shuffle_ps( V.z, Four_Zeros, 0x44 ); \
  40. tmp3 = _mm_shuffle_ps( V.z, Four_Zeros, 0xEE ); \
  41. pPos[ IDX[ 0 ] ] = _mm_shuffle_ps( tmp0, tmp1, 0x88 ); \
  42. pPos[ IDX[ 1 ] ] = _mm_shuffle_ps( tmp0, tmp1, 0xDD ); \
  43. pPos[ IDX[ 2 ] ] = _mm_shuffle_ps( tmp2, tmp3, 0x88 ); \
  44. pPos[ IDX[ 3 ] ] = _mm_shuffle_ps( tmp2, tmp3, 0xDD ); \
  45. }
  46. inline void CovMatrix3::InitForWahba( float m, const Vector &x )
  47. {
  48. m_vDiag.x = m * ( Sqr( x.y ) + Sqr( x.z ) );
  49. m_vDiag.y = m * ( Sqr( x.x ) + Sqr( x.z ) );
  50. m_vDiag.z = m * ( Sqr( x.x ) + Sqr( x.y ) );
  51. m_flXY = -m * x.x * x.y;
  52. m_flXZ = -m * x.x * x.z;
  53. m_flYZ = -m * x.y * x.z;
  54. }
  55. inline void CovMatrix3::Reset()
  56. {
  57. m_vDiag = vec3_origin;
  58. m_flXY = m_flXZ = m_flYZ = 0;
  59. }
  60. inline void CovMatrix3::AddCov( const Vector &d ) // d is supposedly a vector relatively to the mean of the set; i.e. we assume here that we're actually summing up voth d and -d
  61. {
  62. m_vDiag.x += Sqr( d.x );
  63. m_vDiag.y += Sqr( d.y );
  64. m_vDiag.z += Sqr( d.z );
  65. m_flXY += d.x * d.y;
  66. m_flXZ += d.x * d.z;
  67. m_flYZ += d.y * d.z;
  68. }
  69. inline void CovMatrix3::AddCov( const Vector &d, float m ) // d is supposedly a vector relatively to the mean of the set; i.e. we assume here that we're actually summing up voth d and -d
  70. {
  71. m_vDiag.x += m * Sqr( d.x );
  72. m_vDiag.y += m * Sqr( d.y );
  73. m_vDiag.z += m * Sqr( d.z );
  74. m_flXY += m * d.x * d.y;
  75. m_flXZ += m * d.x * d.z;
  76. m_flYZ += m * d.y * d.z;
  77. }
  78. // the element of the sum on the left side of the approximate solution of Wahba's problem (see wahba.nb for details)
  79. // thi sis essentially Sum[Mi Xi * w * Xi], Mi = weights, "*" means cross product, Xi is a deformed polygon vertex relative to center of mass,
  80. // 21 flops, with madd
  81. inline void CovMatrix3::AddForWahba( float m, const Vector &x )
  82. {
  83. m_vDiag.x += m * ( Sqr( x.y ) + Sqr( x.z ) );
  84. m_vDiag.y += m * ( Sqr( x.x ) + Sqr( x.z ) );
  85. m_vDiag.z += m * ( Sqr( x.x ) + Sqr( x.y ) );
  86. m_flXY -= m * x.x * x.y;
  87. m_flXZ -= m * x.x * x.z;
  88. m_flYZ -= m * x.y * x.z;
  89. }
  90. inline void CovMatrix3::NormalizeEigenvalues( )
  91. {
  92. // trace is the sum of eigenvalues; it's not a perfect way , but it's one way
  93. float flNorm = 1.0f / ( m_vDiag.x + m_vDiag.y + m_vDiag.z );
  94. m_vDiag *= flNorm;
  95. m_flXY *= flNorm;
  96. m_flXZ *= flNorm;
  97. m_flYZ *= flNorm;
  98. }
  99. inline void CovMatrix3::RegularizeEigenvalues( )
  100. {
  101. m_vDiag += Vector( .001f, .001f, .001f );
  102. NormalizeEigenvalues( );
  103. m_vDiag += Vector( .1f, .1f, .1f );
  104. }
  105. inline Vector CovMatrix3::operator * ( const Vector &d )
  106. {
  107. return Vector(
  108. m_vDiag.x * d.x + m_flXY * d.y + m_flXZ * d.z,
  109. m_flXY * d.x + m_vDiag.y * d.y + m_flYZ * d.z,
  110. m_flXZ * d.x + m_flYZ * d.y + m_vDiag.z * d.z
  111. );
  112. }
  113. inline void FourCovMatrices3::InitForWahba( const fltx4 &m, const FourVectors &x )
  114. {
  115. m_vDiag.x = m * ( x.y * x.y + x.z * x.z );
  116. m_vDiag.y = m * ( x.x * x.x + x.z * x.z );
  117. m_vDiag.z = m * ( x.x * x.x + x.y * x.y );
  118. m_flXY = -m * x.x * x.y;
  119. m_flXZ = -m * x.x * x.z;
  120. m_flYZ = -m * x.y * x.z;
  121. }
  122. // the element of the sum on the left side of the approximate solution of Wahba's problem (see wahba.nb for details)
  123. // thi sis essentially Sum[Mi Xi * w * Xi], Mi = weights, "*" means cross product, Xi is a deformed polygon vertex relative to center of mass,
  124. // 21 flops, with madd
  125. inline void FourCovMatrices3::AddForWahba( const fltx4 &m, const FourVectors &x )
  126. {
  127. m_vDiag.x += m * ( x.y * x.y + x.z * x.z );
  128. m_vDiag.y += m * ( x.x * x.x + x.z * x.z );
  129. m_vDiag.z += m * ( x.x * x.x + x.y * x.y );
  130. m_flXY -= m * x.x * x.y;
  131. m_flXZ -= m * x.x * x.z;
  132. m_flYZ -= m * x.y * x.z;
  133. }
  134. inline FourVectors FourCovMatrices3::operator * ( const FourVectors &d )
  135. {
  136. return FourVectors(
  137. m_vDiag.x * d.x + m_flXY * d.y + m_flXZ * d.z,
  138. m_flXY * d.x + m_vDiag.y * d.y + m_flYZ * d.z,
  139. m_flXZ * d.x + m_flYZ * d.y + m_vDiag.z * d.z
  140. );
  141. }
  142. inline float Perimeter( const FeQuad_t &quad )
  143. {
  144. return
  145. ( quad.vShape[ 0 ].AsVector3D( ) - quad.vShape[ 1 ].AsVector3D( ) ).Length( ) +
  146. ( quad.vShape[ 1 ].AsVector3D( ) - quad.vShape[ 2 ].AsVector3D( ) ).Length( ) +
  147. ( quad.vShape[ 2 ].AsVector3D( ) - quad.vShape[ 3 ].AsVector3D( ) ).Length( ) +
  148. ( quad.vShape[ 3 ].AsVector3D( ) - quad.vShape[ 0 ].AsVector3D( ) ).Length( );
  149. }
  150. inline fltx4 Perimeter( const FeSimdQuad_t &quad )
  151. {
  152. return
  153. ( quad.vShape[ 0 ] - quad.vShape[ 1 ] ).Length( ) +
  154. ( quad.vShape[ 1 ] - quad.vShape[ 2 ] ).Length( ) +
  155. ( quad.vShape[ 2 ] - quad.vShape[ 3 ] ).Length( ) +
  156. ( quad.vShape[ 3 ] - quad.vShape[ 0 ] ).Length( );
  157. }
  158. inline float Perimeter( const FeTri_t &tri )
  159. {
  160. return fabsf( tri.v1x ) + tri.v2.Length() + sqrtf( tri.v2.y * tri.v2.y + ( tri.v2.x - tri.v1x ) * ( tri.v2.x - tri.v1x ) );
  161. }
  162. inline fltx4 Perimeter( const FeSimdTri_t &tri )
  163. {
  164. return AbsSIMD( tri.v1x ) + tri.v2.Length( ) + SqrtSIMD( tri.v2.y * tri.v2.y + ( tri.v2.x - tri.v1x ) * ( tri.v2.x - tri.v1x ) );
  165. }
  166. FORCEINLINE float CrossProductZ( const Vector2D &v1, const Vector2D &v2 )
  167. {
  168. return v1.x * v2.y - v1.y * v2.x;
  169. }
  170. FORCEINLINE float CrossProductZ( const Vector2D &v1, const Vector4D &v2 )
  171. {
  172. return v1.x * v2.y - v1.y * v2.x;
  173. }
  174. FORCEINLINE float CrossProductZ( const Vector4D &v1, const Vector2D &v2 )
  175. {
  176. return v1.x * v2.y - v1.y * v2.x;
  177. }
  178. FORCEINLINE fltx4 CrossProductZ( const FourVectors &v1, const FourVectors2D &v2 )
  179. {
  180. return v1.x * v2.y - v1.y * v2.x;
  181. }
  182. FORCEINLINE float DotProduct( const Vector4D &v1, const Vector2D &v2 )
  183. {
  184. return v1.x * v2.x + v1.y * v2.y;
  185. }
  186. FORCEINLINE FourVectors AndSIMD( const FourVectors &left, const fltx4 &right )
  187. {
  188. FourVectors out;
  189. out.x = AndSIMD( left.x, right );
  190. out.y = AndSIMD( left.y, right );
  191. out.z = AndSIMD( left.z, right );
  192. return out;
  193. }