Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

519 lines
18 KiB

  1. //////////////////////////////////////////////////////////////////////////
  2. //
  3. // Implementaiton of iterative 3x3 SVD without branches, using approximate Givens rotations,
  4. // applied sequentially to every off-diagonal element. The same code can compile into scalar, SSE and AVX
  5. // by templetizing on the Float data type.
  6. //
  7. #include "ssemath.h"
  8. namespace SVD
  9. {
  10. inline fltx4 CmpLt( const fltx4 &a, const fltx4 &b ) { return CmpLtSIMD( a, b ); }
  11. inline bool CmpLt( float a, float b ) { return a < b ? true : false; }
  12. inline bool AllLe( const fltx4 &a, const fltx4 &b ){ return IsAllGreaterThanOrEq( b, a ); }
  13. inline bool AllLe( float a, float b ) { return a <= b; }
  14. template <typename Float >
  15. struct FloatTraits
  16. {
  17. typedef Float Bool;
  18. };
  19. template <>
  20. struct FloatTraits < float >
  21. {
  22. typedef bool Bool;
  23. };
  24. template< typename Float > inline Float Replicate( float a );
  25. template <> inline fltx4 Replicate< fltx4 >( float a )
  26. {
  27. return _mm_set1_ps( a );
  28. }
  29. template <> inline float Replicate< float >( float a ){ return a; }
  30. template <typename Float>
  31. class SymMatrix3;
  32. inline fltx4 RsqrtEst( const fltx4 &a ){ return ReciprocalSqrtEstSIMD( a ); }
  33. template <typename Float>
  34. inline Float Rsqrt( const Float &a )
  35. {
  36. fltx4 it = RsqrtEst( a );
  37. // a single Newton iteration; can repeat multiple times
  38. return Replicate<Float>( .5f ) * it * ( Replicate<Float>( 3.0f ) - ( a * it * it ) );
  39. }
  40. inline float RsqrtEst( float a )
  41. {
  42. float res;
  43. StoreUnalignedFloat( &res, RsqrtEst( LoadUnalignedFloatSIMD(&a) ) );
  44. return res;
  45. }
  46. template <>
  47. inline float Rsqrt( const float &a ) { return 1.0f / sqrtf( a ); }
  48. inline fltx4 Select( const fltx4& a /*mask=0*/, const fltx4& b/*mask=1*/, const fltx4& mask )
  49. {
  50. // (((b ^ a) & mask)^a)
  51. return _mm_xor_ps( a, _mm_and_ps( mask, _mm_xor_ps( b, a ) ) );
  52. }
  53. inline float Select( float a, float b, bool mask )
  54. {
  55. return mask ? b : a;
  56. }
  57. template <typename Float>
  58. class Matrix3
  59. {
  60. public:
  61. Matrix3() {}
  62. Float m[ 3 ][ 3 ];
  63. Matrix3 < Float > operator * ( const Matrix3< Float > &other )const
  64. {
  65. Matrix3 < Float > res;
  66. for ( int i = 0; i < 3; ++i )
  67. for ( int j = 0; j < 3; ++j )
  68. res.m[ i ][ j ] = m[ i ][ 0 ] * other.m[ 0 ][ j ] + m[ i ][ 1 ] * other.m[ 1 ][ j ] + m[ i ][ 2 ] * other.m[ 2 ][ j ];
  69. return res;
  70. }
  71. Matrix3 < Float > operator - ( const Matrix3< Float > &other )const
  72. {
  73. Matrix3 < Float > res;
  74. for ( int i = 0; i < 3; ++i )
  75. for ( int j = 0; j < 3; ++j )
  76. res.m[ i ][ j ] = m[ i ][ j ] - other.m[ i ][ j ];
  77. return res;
  78. }
  79. explicit Matrix3< Float >( const SymMatrix3< Float > &other );
  80. Float FrobeniusNorm()const
  81. {
  82. Float sum = Replicate<Float>( 0.0f );
  83. for ( int i = 0; i < 3; ++i )
  84. for ( int j = 0; j < 3; ++j )
  85. sum += m[ i ][ j ] * m[ i ][ j ];
  86. return sum;
  87. }
  88. Float AtA( int i, int j )const
  89. {
  90. return m[ 0 ][ i ] * m[ 0 ][ j ] + m[ 1 ][ i ] * m[ 1 ][ j ] + m[ 2 ][ i ] * m[ 2 ][ j ];
  91. }
  92. void SetIdentity()
  93. {
  94. m[ 0 ][ 0 ] = m[ 1 ][ 1 ] = m[ 2 ][ 2 ] = Replicate<Float>( 1.0f );
  95. m[ 0 ][ 1 ] = m[ 1 ][ 0 ] = m[ 0 ][ 2 ] = m[ 2 ][ 0 ] = m[ 2 ][ 1 ] = m[ 1 ][ 2 ] = Replicate<Float>( 0.0f );
  96. }
  97. void SetZero()
  98. {
  99. memset( this, 0, sizeof( *this ) );
  100. }
  101. Float ColLenSqr( int j )const
  102. {
  103. return m[ 0 ][ j ] * m[ 0 ][ j ] + m[ 1 ][ j ] * m[ 1 ][ j ] + m[ 2 ][ j ] * m[ 2 ][ j ];
  104. }
  105. Float Det()const
  106. {
  107. return -( m[ 0 ][ 2 ]*m[ 1 ][ 1 ]*m[ 2 ][ 0 ] ) + m[ 0 ][ 1 ]*m[ 1 ][ 2 ]*m[ 2 ][ 0 ] + m[ 0 ][ 2 ]*m[ 1 ][ 0 ]*m[ 2 ][ 1 ] - m[ 0 ][ 0 ]*m[ 1 ][ 2 ]*m[ 2 ][ 1 ] - m[ 0 ][ 1 ]*m[ 1 ][ 0 ]*m[ 2 ][ 2 ] + m[ 0 ][ 0 ]*m[ 1 ][ 1 ]*m[ 2 ][ 2 ];
  108. }
  109. };
  110. template <typename Float>
  111. inline Matrix3 < Float > MulT( const Matrix3< Float > &u, const Matrix3< Float > &vt )
  112. {
  113. Matrix3 < Float > res;
  114. for ( int i = 0; i < 3; ++i )
  115. for ( int j = 0; j < 3; ++j )
  116. res.m[ i ][ j ] = u.m[ i ][ 0 ] * vt.m[ j ][ 0 ] + u.m[ i ][ 1 ] * vt.m[ j ][ 1 ] + u.m[ i ][ 2 ] * vt.m[ j ][ 2 ];
  117. return res;
  118. }
  119. template <typename Float>
  120. Float OrthogonalityError( const Matrix3<Float> &m )
  121. {
  122. Float result = Replicate<Float>( 0.0f );
  123. for ( int i = 0; i < 2; ++i )
  124. {
  125. for ( int j = i + 1; j < 3; ++j )
  126. {
  127. Float dot = m.m[ 0 ][ i ] * m.m[ 0 ][ j ] + m.m[ 1 ][ i ] * m.m[ 1 ][ j ] + m.m[ 2 ][ i ] * m.m[ 2 ][ j ];
  128. result += dot * dot;
  129. }
  130. }
  131. return result;
  132. }
  133. template <typename Float >
  134. class DiagMatrix3
  135. {
  136. public:
  137. Float m[ 3 ];
  138. };
  139. template <typename Float>
  140. class SymMatrix3
  141. {
  142. public:
  143. enum Index_t
  144. {
  145. a00, a10, a11, a20, a21, a22, Count, a01 = a10, a02 = a20, a12 = a21
  146. };
  147. Float m[ 6 ];
  148. Float &m00() { return m[ a00 ]; }
  149. Float &m01() { return m[ a01 ]; }
  150. Float &m02() { return m[ a02 ]; }
  151. Float &m11() { return m[ a11 ]; }
  152. Float &m12() { return m[ a12 ]; }
  153. Float &m22() { return m[ a22 ]; }
  154. Float OffDiagNorm()const { return m[ a01 ] * m[ a01 ] + m[ a21 ] * m[ a21 ] + m[ a02 ] * m[ a02 ]; }
  155. Float DiagNorm()const { return m[ a00 ] * m[ a00 ] + m[ a11 ] * m[ a11 ] + m[ a22 ] * m[ a22 ]; }
  156. };
  157. template < typename Float >
  158. Matrix3< Float >::Matrix3( const SymMatrix3< Float > &other )
  159. {
  160. m[ 0 ][ 0 ] = other.m[ other.a00 ];
  161. m[ 0 ][ 1 ] = m[ 1 ][ 0 ] = other.m[ other.a01 ];
  162. m[ 1 ][ 1 ] = other.m[ other.a11 ];
  163. m[ 2 ][ 0 ] = m[ 0 ][ 2 ] = other.m[ other.a01 ];
  164. m[ 2 ][ 1 ] = m[ 1 ][ 2 ] = other.m[ other.a01 ];
  165. m[ 2 ][ 2 ] = other.m[ other.a22 ];
  166. }
  167. template <typename Float >
  168. class SinCos
  169. {
  170. public:
  171. Float s, c;
  172. SinCos() {}
  173. SinCos( const Float &_c, const Float &_s ) : c( _c ), s( _s ) {}
  174. SinCos< Float> DoubleAngle()const
  175. {
  176. SinCos< Float> res;
  177. res.s = Replicate<Float>( 2.0f ) * s * c;
  178. res.c = c * c - s * s;
  179. return res;
  180. }
  181. };
  182. template <typename Float>
  183. class Quaternion
  184. {
  185. public:
  186. Float x, y, z, w;
  187. void SetIdentity()
  188. {
  189. x = y = z = Replicate<float>( 0.0f );
  190. w = Replicate<float>( 1.0f );
  191. }
  192. Quaternion<Float> operator * ( const Float &f )const
  193. {
  194. Quaternion< Float > res;
  195. res.x = x * f;
  196. res.y = y * f;
  197. res.z = z * f;
  198. res.w = w * f;
  199. return res;
  200. }
  201. Float LengthSqr() const
  202. {
  203. return x * x + y * y + z * z + w * w;
  204. }
  205. };
  206. template <typename Float>
  207. Matrix3<Float> QuaternionMatrix( const Quaternion<Float> &q )
  208. {
  209. Matrix3<Float> matrix;
  210. const Float one = Replicate<Float>( 1.0f ), two = Replicate<Float>( 2.0f );
  211. matrix.m[ 0 ][ 0 ] = one - two * q.y * q.y - two * q.z * q.z;
  212. matrix.m[ 1 ][ 0 ] = two * q.x * q.y + two * q.w * q.z;
  213. matrix.m[ 2 ][ 0 ] = two * q.x * q.z - two * q.w * q.y;
  214. matrix.m[ 0 ][ 1 ] = two * q.x * q.y - two * q.w * q.z;
  215. matrix.m[ 1 ][ 1 ] = one - two * q.x * q.x - two * q.z * q.z;
  216. matrix.m[ 2 ][ 1 ] = two * q.y * q.z + two * q.w * q.x;
  217. matrix.m[ 0 ][ 2 ] = two * q.x * q.z + two * q.w * q.y;
  218. matrix.m[ 1 ][ 2 ] = two * q.y * q.z - two * q.w * q.x;
  219. matrix.m[ 2 ][ 2 ] = one - two * q.x * q.x - two * q.y * q.y;
  220. return matrix;
  221. }
  222. template <typename Float >
  223. inline SymMatrix3< Float > AtA( const Matrix3< Float > &a )
  224. {
  225. SymMatrix3< Float > res;
  226. res.m[ res.a00 ] = a.AtA( 0, 0 );
  227. res.m[ res.a10 ] = a.AtA( 1, 0 );
  228. res.m[ res.a11 ] = a.AtA( 1, 1 );
  229. res.m[ res.a20 ] = a.AtA( 2, 0 );
  230. res.m[ res.a21 ] = a.AtA( 2, 1 );
  231. res.m[ res.a22 ] = a.AtA( 2, 2 );
  232. return res;
  233. }
  234. template <typename Float >
  235. inline SymMatrix3< Float > QtAQ( const Matrix3< Float > &q, const SymMatrix3< Float > &a )
  236. {
  237. SymMatrix3< Float > res;
  238. res.m[ res.a00 ] = q.m[ 0 ][ 0 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 0 ] + a.m[ a.a01 ] * q.m[ 1 ][ 0 ] + a.m[ a.a02 ] * q.m[ 2 ][ 0 ] ) +
  239. q.m[ 1 ][ 0 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 0 ] + a.m[ a.a11 ] * q.m[ 1 ][ 0 ] + a.m[ a.a12 ] * q.m[ 2 ][ 0 ] ) +
  240. q.m[ 2 ][ 0 ] * ( a.m[ a.a02 ] * q.m[ 0 ][ 0 ] + a.m[ a.a12 ] * q.m[ 1 ][ 0 ] + a.m[ a.a22 ] * q.m[ 2 ][ 0 ] );
  241. res.m[ res.a01 ] =
  242. q.m[ 0 ][ 1 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 0 ] + a.m[ a.a01 ] * q.m[ 1 ][ 0 ] + a.m[ a.a02 ] * q.m[ 2 ][ 0 ] ) +
  243. q.m[ 1 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 0 ] + a.m[ a.a11 ] * q.m[ 1 ][ 0 ] + a.m[ a.a12 ] * q.m[ 2 ][ 0 ] ) + ( a.m[ a.a02 ] * q.m[ 0 ][ 0 ] + a.m[ a.a12 ] * q.m[ 1 ][ 0 ] +
  244. a.m[ a.a22 ] * q.m[ 2 ][ 0 ] ) * q.m[ 2 ][ 1 ];
  245. res.m[ res.a02 ] =
  246. q.m[ 0 ][ 2 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 0 ] + a.m[ a.a01 ] * q.m[ 1 ][ 0 ] + a.m[ a.a02 ] * q.m[ 2 ][ 0 ] ) +
  247. q.m[ 1 ][ 2 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 0 ] + a.m[ a.a11 ] * q.m[ 1 ][ 0 ] + a.m[ a.a12 ] * q.m[ 2 ][ 0 ] ) + ( a.m[ a.a02 ] * q.m[ 0 ][ 0 ] + a.m[ a.a12 ] * q.m[ 1 ][ 0 ] +
  248. a.m[ a.a22 ] * q.m[ 2 ][ 0 ] ) * q.m[ 2 ][ 2 ];
  249. res.m[ res.a11 ] =
  250. q.m[ 0 ][ 1 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 1 ] + a.m[ a.a01 ] * q.m[ 1 ][ 1 ] + a.m[ a.a02 ] * q.m[ 2 ][ 1 ] ) +
  251. q.m[ 1 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 1 ] + a.m[ a.a11 ] * q.m[ 1 ][ 1 ] + a.m[ a.a12 ] * q.m[ 2 ][ 1 ] ) +
  252. q.m[ 2 ][ 1 ] * ( a.m[ a.a02 ] * q.m[ 0 ][ 1 ] + a.m[ a.a12 ] * q.m[ 1 ][ 1 ] + a.m[ a.a22 ] * q.m[ 2 ][ 1 ] );
  253. res.m[ res.a12 ] =
  254. q.m[ 0 ][ 2 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 1 ] + a.m[ a.a01 ] * q.m[ 1 ][ 1 ] + a.m[ a.a02 ] * q.m[ 2 ][ 1 ] ) +
  255. q.m[ 1 ][ 2 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 1 ] + a.m[ a.a11 ] * q.m[ 1 ][ 1 ] + a.m[ a.a12 ] * q.m[ 2 ][ 1 ] ) + ( a.m[ a.a02 ] * q.m[ 0 ][ 1 ] + a.m[ a.a12 ] * q.m[ 1 ][ 1 ] +
  256. a.m[ a.a22 ] * q.m[ 2 ][ 1 ] ) * q.m[ 2 ][ 2 ];
  257. res.m[ res.a22 ] =
  258. q.m[ 0 ][ 2 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 2 ] + a.m[ a.a01 ] * q.m[ 1 ][ 2 ] + a.m[ a.a02 ] * q.m[ 2 ][ 2 ] ) +
  259. q.m[ 1 ][ 2 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 2 ] + a.m[ a.a11 ] * q.m[ 1 ][ 2 ] + a.m[ a.a12 ] * q.m[ 2 ][ 2 ] ) +
  260. q.m[ 2 ][ 2 ] * ( a.m[ a.a02 ] * q.m[ 0 ][ 2 ] + a.m[ a.a12 ] * q.m[ 1 ][ 2 ] + a.m[ a.a22 ] * q.m[ 2 ][ 2 ] );
  261. return res;
  262. }
  263. template <typename Float >
  264. inline SymMatrix3< Float > QAQt( const Matrix3< Float > &q, const SymMatrix3< Float > &a )
  265. {
  266. SymMatrix3< Float > res;
  267. res.m[ res.a00 ] = q.m[ 0 ][ 0 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 0 ] + a.m[ a.a01 ] * q.m[ 0 ][ 1 ] + a.m[ a.a02 ] * q.m[ 0 ][ 2 ] ) +
  268. q.m[ 0 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 0 ] + a.m[ a.a11 ] * q.m[ 0 ][ 1 ] + a.m[ a.a12 ] * q.m[ 0 ][ 2 ] ) +
  269. q.m[ 0 ][ 2 ] * ( a.m[ a.a02 ] * q.m[ 0 ][ 0 ] + a.m[ a.a12 ] * q.m[ 0 ][ 1 ] + a.m[ a.a22 ] * q.m[ 0 ][ 2 ] );
  270. res.m[ res.a01 ] =
  271. q.m[ 1 ][ 0 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 0 ] + a.m[ a.a01 ] * q.m[ 0 ][ 1 ] + a.m[ a.a02 ] * q.m[ 0 ][ 2 ] ) +
  272. q.m[ 1 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 0 ] + a.m[ a.a11 ] * q.m[ 0 ][ 1 ] + a.m[ a.a12 ] * q.m[ 0 ][ 2 ] ) + ( a.m[ a.a02 ] * q.m[ 0 ][ 0 ] + a.m[ a.a12 ] * q.m[ 0 ][ 1 ] +
  273. a.m[ a.a22 ] * q.m[ 0 ][ 2 ] ) * q.m[ 1 ][ 2 ];
  274. res.m[ res.a02 ] =
  275. q.m[ 2 ][ 0 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 0 ] + a.m[ a.a01 ] * q.m[ 0 ][ 1 ] + a.m[ a.a02 ] * q.m[ 0 ][ 2 ] ) +
  276. q.m[ 2 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 0 ] + a.m[ a.a11 ] * q.m[ 0 ][ 1 ] + a.m[ a.a12 ] * q.m[ 0 ][ 2 ] ) + ( a.m[ a.a02 ] * q.m[ 0 ][ 0 ] + a.m[ a.a12 ] * q.m[ 0 ][ 1 ] +
  277. a.m[ a.a22 ] * q.m[ 0 ][ 2 ] ) * q.m[ 2 ][ 2 ];
  278. res.m[ res.a11 ] =
  279. q.m[ 1 ][ 0 ] * ( a.m[ a.a00 ] * q.m[ 1 ][ 0 ] + a.m[ a.a01 ] * q.m[ 1 ][ 1 ] + a.m[ a.a02 ] * q.m[ 1 ][ 2 ] ) +
  280. q.m[ 1 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 1 ][ 0 ] + a.m[ a.a11 ] * q.m[ 1 ][ 1 ] + a.m[ a.a12 ] * q.m[ 1 ][ 2 ] ) +
  281. q.m[ 1 ][ 2 ] * ( a.m[ a.a02 ] * q.m[ 1 ][ 0 ] + a.m[ a.a12 ] * q.m[ 1 ][ 1 ] + a.m[ a.a22 ] * q.m[ 1 ][ 2 ] );
  282. res.m[ res.a12 ] =
  283. q.m[ 2 ][ 0 ] * ( a.m[ a.a00 ] * q.m[ 1 ][ 0 ] + a.m[ a.a01 ] * q.m[ 1 ][ 1 ] + a.m[ a.a02 ] * q.m[ 1 ][ 2 ] ) +
  284. q.m[ 2 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 1 ][ 0 ] + a.m[ a.a11 ] * q.m[ 1 ][ 1 ] + a.m[ a.a12 ] * q.m[ 1 ][ 2 ] ) + ( a.m[ a.a02 ] * q.m[ 1 ][ 0 ] + a.m[ a.a12 ] * q.m[ 1 ][ 1 ] +
  285. a.m[ a.a22 ] * q.m[ 1 ][ 2 ] ) * q.m[ 2 ][ 2 ];
  286. res.m[ res.a22 ] =
  287. q.m[ 2 ][ 0 ] * ( a.m[ a.a00 ] * q.m[ 2 ][ 0 ] + a.m[ a.a01 ] * q.m[ 2 ][ 1 ] + a.m[ a.a02 ] * q.m[ 2 ][ 2 ] ) +
  288. q.m[ 2 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 2 ][ 0 ] + a.m[ a.a11 ] * q.m[ 2 ][ 1 ] + a.m[ a.a12 ] * q.m[ 2 ][ 2 ] ) +
  289. q.m[ 2 ][ 2 ] * ( a.m[ a.a02 ] * q.m[ 2 ][ 0 ] + a.m[ a.a12 ] * q.m[ 2 ][ 1 ] + a.m[ a.a22 ] * q.m[ 2 ][ 2 ] );
  290. return res;
  291. }
  292. template <typename Float >
  293. inline SymMatrix3< Float > QAQt( const Matrix3< Float > &q, const DiagMatrix3< Float > &a )
  294. {
  295. SymMatrix3< Float > res;
  296. res.m[ res.a00 ] = q.m[ 0 ][ 0 ] * a.m[ 0 ] * q.m[ 0 ][ 0 ] +
  297. q.m[ 0 ][ 1 ] * a.m[ 1 ] * q.m[ 0 ][ 1 ] +
  298. q.m[ 0 ][ 2 ] * a.m[ 2 ] * q.m[ 0 ][ 2 ] ;
  299. res.m[ res.a01 ] =
  300. q.m[ 1 ][ 0 ] * ( a.m[ 0 ] * q.m[ 0 ][ 0 ]) +
  301. q.m[ 1 ][ 1 ] * ( a.m[ 1 ] * q.m[ 0 ][ 1 ]) + ( a.m[ 2 ] * q.m[ 0 ][ 2 ] ) * q.m[ 1 ][ 2 ];
  302. res.m[ res.a02 ] =
  303. q.m[ 2 ][ 0 ] * ( a.m[ 0 ] * q.m[ 0 ][ 0 ]) +
  304. q.m[ 2 ][ 1 ] * ( a.m[ 1 ] * q.m[ 0 ][ 1 ] ) + ( a.m[ 2 ] * q.m[ 0 ][ 2 ] ) * q.m[ 2 ][ 2 ];
  305. res.m[ res.a11 ] =
  306. q.m[ 1 ][ 0 ] * ( a.m[ 0 ] * q.m[ 1 ][ 0 ]) +
  307. q.m[ 1 ][ 1 ] * ( a.m[ 1 ] * q.m[ 1 ][ 1 ]) +
  308. q.m[ 1 ][ 2 ] * ( a.m[ 2 ] * q.m[ 1 ][ 2 ] );
  309. res.m[ res.a12 ] =
  310. q.m[ 2 ][ 0 ] * ( a.m[ 0 ] * q.m[ 1 ][ 0 ]) +
  311. q.m[ 2 ][ 1 ] * ( a.m[ 1 ] * q.m[ 1 ][ 1 ] ) + ( a.m[ 2 ] * q.m[ 1 ][ 2 ] ) * q.m[ 2 ][ 2 ];
  312. res.m[ res.a22 ] =
  313. q.m[ 2 ][ 0 ] * ( a.m[ 0 ] * q.m[ 2 ][ 0 ] ) +
  314. q.m[ 2 ][ 1 ] * ( a.m[ 1 ] * q.m[ 2 ][ 1 ] ) +
  315. q.m[ 2 ][ 2 ] * ( a.m[ 2 ] * q.m[ 2 ][ 2 ] );
  316. return res;
  317. }
  318. template <typename Float >
  319. void PerformGivensRotation2x2( const SinCos<Float> &res, Float &a11, Float &a12, Float &a22 )
  320. {
  321. const Float two = Replicate<Float>( 2.0f );
  322. Float cc = res.c * res.c, ss = res.s * res.s, cs = res.c * res.s;
  323. Float b11 = cc * a11 + two * cs * a12 + ss * a22;
  324. Float b12 = cs * ( a22 - a11 ) + ( cc - ss ) * a12;
  325. Float b22 = ss * a11 - two * cs * a12 + cc * a22;
  326. a11 = b11;
  327. a12 = b12;
  328. a22 = b22;
  329. }
  330. template <typename Float>
  331. void UnperformGivensRotation3x3( const SinCos<Float> &r, Float &a00, Float &a01, Float &a11, Float &a02, Float &a12 )
  332. {
  333. const Float two = Replicate<Float>( 2.0f );
  334. Float b00 = a00 * r.c * r.c - r.s * ( two * a01 * r.c - a11 * r.s );
  335. Float b01 = r.c*( a01 * r.c + a00 * r.s ) - r.s * ( a11 * r.c + a01 * r.s );
  336. Float b11 = a11 * r.c * r.c + r.s * ( two * a01 * r.c + a00 * r.s );
  337. Float b02 = a02 *r.c - a12 * r.s;
  338. Float b12 = a12 *r.c + a02 * r.s;
  339. a00 = b00;
  340. a01 = b01;
  341. a11 = b11;
  342. a02 = b02;
  343. a12 = b12;
  344. }
  345. template <typename Float>
  346. void PerformGivensRotation3x3( const SinCos<Float> &r, Float &a00, Float &a01, Float &a11, Float &a02, Float &a12 )
  347. {
  348. const Float two = Replicate<Float>( 2.0f );
  349. Float b00 = a00 * r.c * r.c + r.s * ( two * a01 * r.c + a11 * r.s );
  350. Float b01 = r.c*( a01 *r.c - a00 * r.s ) + r.s * ( a11 *r.c - a01 * r.s );
  351. Float b11 = a11 * r.c *r.c - r.s * ( two * a01 * r.c - a00 * r.s );
  352. Float b02 = a02 *r.c + a12 * r.s;
  353. Float b12 = a12 *r.c - a02 * r.s;
  354. a00 = b00;
  355. a01 = b01;
  356. a11 = b11;
  357. a02 = b02;
  358. a12 = b12;
  359. }
  360. inline SinCos< float > ComputeGivensRotation( float a11, float a12, float a22 )
  361. {
  362. float theta = fabsf( a11 - a22 ) > 1e-6f ? 0.5f * atanf( 2 * a12 / ( a11 - a22 ) ) : 3.14159265358979323846f / 4;
  363. SinCos< float >res( cosf( theta ), sinf( theta ) );
  364. #ifdef _DEBUG
  365. PerformGivensRotation2x2( res, a11, a12, a22 );
  366. Assert( fabsf( a12 ) < 0.001f * ( 1 + fabsf( a11 ) + fabsf( a22 ) ) );
  367. #endif
  368. return res;
  369. }
  370. template <typename Float >
  371. inline SinCos< Float> ApproximateGivensRotation( const Float & a11, const Float & a12, const Float & a22 )
  372. {
  373. const Float two = Replicate<Float>( 2.0f );
  374. Float ch = two * ( a11 - a22 );
  375. Float sh = a12;
  376. typename FloatTraits<Float>::Bool b = CmpLt( Replicate<Float>( 5.82842712474619f ) * sh*sh, ch*ch );
  377. Float r2 = ch*ch + sh *sh;
  378. typename FloatTraits<Float>::Bool bZero = CmpLt( r2, Replicate<Float>( 1e-12f ) );
  379. Float omega = RsqrtEst( r2 );
  380. SinCos<Float>res;
  381. res.s = Select( Replicate<Float>( 0.3826834323650897717284599840304f ), omega * sh, b );
  382. res.c = Select( Replicate<Float>( 0.92387953251128675612818318939679f ), omega * ch, b );
  383. res.s = Select( res.s, Replicate<Float>( 0.0f ), bZero ); // todo: replace with And
  384. res.c = Select( res.c, Replicate<Float>( 1.0f ), bZero );
  385. return res;
  386. }
  387. template <typename Float >
  388. void PerformGivensRotationQuaternion( const SinCos<Float> &res, Float &x, Float &y, Float &z, Float &w )
  389. {
  390. //const Float two = Replicate<Float>( 2.0f );
  391. Float xNew = res.c * x + res.s * w, yNew = res.c * y + res.s * z, zNew = res.c * z - res.s * y, wNew = res.c * w - res.s * x;
  392. x = xNew;
  393. y = yNew;
  394. z = zNew;
  395. w = wNew;
  396. }
  397. template <typename Float >
  398. class SvdIterator
  399. {
  400. public:
  401. SvdIterator(){}
  402. Quaternion < Float > q;
  403. SymMatrix3<Float> ata;
  404. void Init( const Matrix3<Float> a )
  405. {
  406. q.SetIdentity();
  407. ata = AtA( a );
  408. }
  409. void Iterate( int nIterations, float flEpsilon = 0.0f )
  410. {
  411. SinCos< Float> r;
  412. //SymMatrix3<Float> inv0 = QAQt( QuaternionMatrix( q ), ata ), origAta = AtA( a );
  413. for ( int i = 0; i < nIterations; ++i )
  414. {
  415. r = ApproximateGivensRotation( ata.m[ ata.a00 ], ata.m[ ata.a10 ], ata.m[ ata.a11 ] );
  416. Float sumErrors = r.s * r.s;
  417. PerformGivensRotation3x3( r.DoubleAngle(), ata.m[ ata.a00 ], ata.m[ ata.a10 ], ata.m[ ata.a11 ], ata.m[ ata.a20 ], ata.m[ ata.a21 ] );
  418. PerformGivensRotationQuaternion( r, q.z, q.x, q.y, q.w );
  419. //SymMatrix3<Float> inv1 = QAQt( QuaternionMatrix( q ), ata );
  420. r = ApproximateGivensRotation( ata.m[ ata.a11 ], ata.m[ ata.a21 ], ata.m[ ata.a22 ] );
  421. sumErrors += r.s * r.s;
  422. PerformGivensRotation3x3( r.DoubleAngle(), ata.m[ ata.a11 ], ata.m[ ata.a21 ], ata.m[ ata.a22 ], ata.m[ ata.a01 ], ata.m[ ata.a02 ] );
  423. PerformGivensRotationQuaternion( r, q.x, q.y, q.z, q.w );
  424. //SymMatrix3<Float> inv2 = QAQt( QuaternionMatrix( q ), ata );
  425. r = ApproximateGivensRotation( ata.m[ ata.a22 ], ata.m[ ata.a20 ], ata.m[ ata.a00 ] );
  426. sumErrors += r.s * r.s;
  427. PerformGivensRotation3x3( r.DoubleAngle(), ata.m[ ata.a22 ], ata.m[ ata.a02 ], ata.m[ ata.a00 ], ata.m[ ata.a12 ], ata.m[ ata.a10 ] );
  428. PerformGivensRotationQuaternion( r, q.y, q.z, q.x, q.w );
  429. //SymMatrix3<Float> inv3 = QAQt( QuaternionMatrix( q ), ata );
  430. if ( AllLe( sumErrors, Replicate<Float>( flEpsilon ) ) )
  431. break; // early out
  432. }
  433. }
  434. Matrix3< Float > ComputeV()const { return QuaternionMatrix( q * Rsqrt( q.LengthSqr() ) ); }
  435. };
  436. inline float PseudoInverse( float fl ) { return fabsf( fl ) < FLT_EPSILON ? 0 : 1.0f / fl; }
  437. inline SymMatrix3< float > PseudoInverse( const SymMatrix3< float > &cov )
  438. {
  439. SvdIterator< float > si;
  440. si.q.SetIdentity();
  441. si.ata = cov;
  442. si.Iterate( 5 );
  443. DiagMatrix3< float > pseudoInverseDiag;
  444. pseudoInverseDiag.m[ 0 ] = PseudoInverse( si.ata.m00() );
  445. pseudoInverseDiag.m[ 1 ] = PseudoInverse( si.ata.m11() );
  446. pseudoInverseDiag.m[ 2 ] = PseudoInverse( si.ata.m22() );
  447. return QAQt( si.ComputeV(), pseudoInverseDiag );
  448. }
  449. }