Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

519 lines
18 KiB

//////////////////////////////////////////////////////////////////////////
//
// Implementaiton of iterative 3x3 SVD without branches, using approximate Givens rotations,
// applied sequentially to every off-diagonal element. The same code can compile into scalar, SSE and AVX
// by templetizing on the Float data type.
//
#include "ssemath.h"
namespace SVD
{
inline fltx4 CmpLt( const fltx4 &a, const fltx4 &b ) { return CmpLtSIMD( a, b ); }
inline bool CmpLt( float a, float b ) { return a < b ? true : false; }
inline bool AllLe( const fltx4 &a, const fltx4 &b ){ return IsAllGreaterThanOrEq( b, a ); }
inline bool AllLe( float a, float b ) { return a <= b; }
template <typename Float >
struct FloatTraits
{
typedef Float Bool;
};
template <>
struct FloatTraits < float >
{
typedef bool Bool;
};
template< typename Float > inline Float Replicate( float a );
template <> inline fltx4 Replicate< fltx4 >( float a )
{
return _mm_set1_ps( a );
}
template <> inline float Replicate< float >( float a ){ return a; }
template <typename Float>
class SymMatrix3;
inline fltx4 RsqrtEst( const fltx4 &a ){ return ReciprocalSqrtEstSIMD( a ); }
template <typename Float>
inline Float Rsqrt( const Float &a )
{
fltx4 it = RsqrtEst( a );
// a single Newton iteration; can repeat multiple times
return Replicate<Float>( .5f ) * it * ( Replicate<Float>( 3.0f ) - ( a * it * it ) );
}
inline float RsqrtEst( float a )
{
float res;
StoreUnalignedFloat( &res, RsqrtEst( LoadUnalignedFloatSIMD(&a) ) );
return res;
}
template <>
inline float Rsqrt( const float &a ) { return 1.0f / sqrtf( a ); }
inline fltx4 Select( const fltx4& a /*mask=0*/, const fltx4& b/*mask=1*/, const fltx4& mask )
{
// (((b ^ a) & mask)^a)
return _mm_xor_ps( a, _mm_and_ps( mask, _mm_xor_ps( b, a ) ) );
}
inline float Select( float a, float b, bool mask )
{
return mask ? b : a;
}
template <typename Float>
class Matrix3
{
public:
Matrix3() {}
Float m[ 3 ][ 3 ];
Matrix3 < Float > operator * ( const Matrix3< Float > &other )const
{
Matrix3 < Float > res;
for ( int i = 0; i < 3; ++i )
for ( int j = 0; j < 3; ++j )
res.m[ i ][ j ] = m[ i ][ 0 ] * other.m[ 0 ][ j ] + m[ i ][ 1 ] * other.m[ 1 ][ j ] + m[ i ][ 2 ] * other.m[ 2 ][ j ];
return res;
}
Matrix3 < Float > operator - ( const Matrix3< Float > &other )const
{
Matrix3 < Float > res;
for ( int i = 0; i < 3; ++i )
for ( int j = 0; j < 3; ++j )
res.m[ i ][ j ] = m[ i ][ j ] - other.m[ i ][ j ];
return res;
}
explicit Matrix3< Float >( const SymMatrix3< Float > &other );
Float FrobeniusNorm()const
{
Float sum = Replicate<Float>( 0.0f );
for ( int i = 0; i < 3; ++i )
for ( int j = 0; j < 3; ++j )
sum += m[ i ][ j ] * m[ i ][ j ];
return sum;
}
Float AtA( int i, int j )const
{
return m[ 0 ][ i ] * m[ 0 ][ j ] + m[ 1 ][ i ] * m[ 1 ][ j ] + m[ 2 ][ i ] * m[ 2 ][ j ];
}
void SetIdentity()
{
m[ 0 ][ 0 ] = m[ 1 ][ 1 ] = m[ 2 ][ 2 ] = Replicate<Float>( 1.0f );
m[ 0 ][ 1 ] = m[ 1 ][ 0 ] = m[ 0 ][ 2 ] = m[ 2 ][ 0 ] = m[ 2 ][ 1 ] = m[ 1 ][ 2 ] = Replicate<Float>( 0.0f );
}
void SetZero()
{
memset( this, 0, sizeof( *this ) );
}
Float ColLenSqr( int j )const
{
return m[ 0 ][ j ] * m[ 0 ][ j ] + m[ 1 ][ j ] * m[ 1 ][ j ] + m[ 2 ][ j ] * m[ 2 ][ j ];
}
Float Det()const
{
return -( m[ 0 ][ 2 ]*m[ 1 ][ 1 ]*m[ 2 ][ 0 ] ) + m[ 0 ][ 1 ]*m[ 1 ][ 2 ]*m[ 2 ][ 0 ] + m[ 0 ][ 2 ]*m[ 1 ][ 0 ]*m[ 2 ][ 1 ] - m[ 0 ][ 0 ]*m[ 1 ][ 2 ]*m[ 2 ][ 1 ] - m[ 0 ][ 1 ]*m[ 1 ][ 0 ]*m[ 2 ][ 2 ] + m[ 0 ][ 0 ]*m[ 1 ][ 1 ]*m[ 2 ][ 2 ];
}
};
template <typename Float>
inline Matrix3 < Float > MulT( const Matrix3< Float > &u, const Matrix3< Float > &vt )
{
Matrix3 < Float > res;
for ( int i = 0; i < 3; ++i )
for ( int j = 0; j < 3; ++j )
res.m[ i ][ j ] = u.m[ i ][ 0 ] * vt.m[ j ][ 0 ] + u.m[ i ][ 1 ] * vt.m[ j ][ 1 ] + u.m[ i ][ 2 ] * vt.m[ j ][ 2 ];
return res;
}
template <typename Float>
Float OrthogonalityError( const Matrix3<Float> &m )
{
Float result = Replicate<Float>( 0.0f );
for ( int i = 0; i < 2; ++i )
{
for ( int j = i + 1; j < 3; ++j )
{
Float dot = m.m[ 0 ][ i ] * m.m[ 0 ][ j ] + m.m[ 1 ][ i ] * m.m[ 1 ][ j ] + m.m[ 2 ][ i ] * m.m[ 2 ][ j ];
result += dot * dot;
}
}
return result;
}
template <typename Float >
class DiagMatrix3
{
public:
Float m[ 3 ];
};
template <typename Float>
class SymMatrix3
{
public:
enum Index_t
{
a00, a10, a11, a20, a21, a22, Count, a01 = a10, a02 = a20, a12 = a21
};
Float m[ 6 ];
Float &m00() { return m[ a00 ]; }
Float &m01() { return m[ a01 ]; }
Float &m02() { return m[ a02 ]; }
Float &m11() { return m[ a11 ]; }
Float &m12() { return m[ a12 ]; }
Float &m22() { return m[ a22 ]; }
Float OffDiagNorm()const { return m[ a01 ] * m[ a01 ] + m[ a21 ] * m[ a21 ] + m[ a02 ] * m[ a02 ]; }
Float DiagNorm()const { return m[ a00 ] * m[ a00 ] + m[ a11 ] * m[ a11 ] + m[ a22 ] * m[ a22 ]; }
};
template < typename Float >
Matrix3< Float >::Matrix3( const SymMatrix3< Float > &other )
{
m[ 0 ][ 0 ] = other.m[ other.a00 ];
m[ 0 ][ 1 ] = m[ 1 ][ 0 ] = other.m[ other.a01 ];
m[ 1 ][ 1 ] = other.m[ other.a11 ];
m[ 2 ][ 0 ] = m[ 0 ][ 2 ] = other.m[ other.a01 ];
m[ 2 ][ 1 ] = m[ 1 ][ 2 ] = other.m[ other.a01 ];
m[ 2 ][ 2 ] = other.m[ other.a22 ];
}
template <typename Float >
class SinCos
{
public:
Float s, c;
SinCos() {}
SinCos( const Float &_c, const Float &_s ) : c( _c ), s( _s ) {}
SinCos< Float> DoubleAngle()const
{
SinCos< Float> res;
res.s = Replicate<Float>( 2.0f ) * s * c;
res.c = c * c - s * s;
return res;
}
};
template <typename Float>
class Quaternion
{
public:
Float x, y, z, w;
void SetIdentity()
{
x = y = z = Replicate<float>( 0.0f );
w = Replicate<float>( 1.0f );
}
Quaternion<Float> operator * ( const Float &f )const
{
Quaternion< Float > res;
res.x = x * f;
res.y = y * f;
res.z = z * f;
res.w = w * f;
return res;
}
Float LengthSqr() const
{
return x * x + y * y + z * z + w * w;
}
};
template <typename Float>
Matrix3<Float> QuaternionMatrix( const Quaternion<Float> &q )
{
Matrix3<Float> matrix;
const Float one = Replicate<Float>( 1.0f ), two = Replicate<Float>( 2.0f );
matrix.m[ 0 ][ 0 ] = one - two * q.y * q.y - two * q.z * q.z;
matrix.m[ 1 ][ 0 ] = two * q.x * q.y + two * q.w * q.z;
matrix.m[ 2 ][ 0 ] = two * q.x * q.z - two * q.w * q.y;
matrix.m[ 0 ][ 1 ] = two * q.x * q.y - two * q.w * q.z;
matrix.m[ 1 ][ 1 ] = one - two * q.x * q.x - two * q.z * q.z;
matrix.m[ 2 ][ 1 ] = two * q.y * q.z + two * q.w * q.x;
matrix.m[ 0 ][ 2 ] = two * q.x * q.z + two * q.w * q.y;
matrix.m[ 1 ][ 2 ] = two * q.y * q.z - two * q.w * q.x;
matrix.m[ 2 ][ 2 ] = one - two * q.x * q.x - two * q.y * q.y;
return matrix;
}
template <typename Float >
inline SymMatrix3< Float > AtA( const Matrix3< Float > &a )
{
SymMatrix3< Float > res;
res.m[ res.a00 ] = a.AtA( 0, 0 );
res.m[ res.a10 ] = a.AtA( 1, 0 );
res.m[ res.a11 ] = a.AtA( 1, 1 );
res.m[ res.a20 ] = a.AtA( 2, 0 );
res.m[ res.a21 ] = a.AtA( 2, 1 );
res.m[ res.a22 ] = a.AtA( 2, 2 );
return res;
}
template <typename Float >
inline SymMatrix3< Float > QtAQ( const Matrix3< Float > &q, const SymMatrix3< Float > &a )
{
SymMatrix3< Float > res;
res.m[ res.a00 ] = q.m[ 0 ][ 0 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 0 ] + a.m[ a.a01 ] * q.m[ 1 ][ 0 ] + a.m[ a.a02 ] * q.m[ 2 ][ 0 ] ) +
q.m[ 1 ][ 0 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 0 ] + a.m[ a.a11 ] * q.m[ 1 ][ 0 ] + a.m[ a.a12 ] * q.m[ 2 ][ 0 ] ) +
q.m[ 2 ][ 0 ] * ( a.m[ a.a02 ] * q.m[ 0 ][ 0 ] + a.m[ a.a12 ] * q.m[ 1 ][ 0 ] + a.m[ a.a22 ] * q.m[ 2 ][ 0 ] );
res.m[ res.a01 ] =
q.m[ 0 ][ 1 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 0 ] + a.m[ a.a01 ] * q.m[ 1 ][ 0 ] + a.m[ a.a02 ] * q.m[ 2 ][ 0 ] ) +
q.m[ 1 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 0 ] + a.m[ a.a11 ] * q.m[ 1 ][ 0 ] + a.m[ a.a12 ] * q.m[ 2 ][ 0 ] ) + ( a.m[ a.a02 ] * q.m[ 0 ][ 0 ] + a.m[ a.a12 ] * q.m[ 1 ][ 0 ] +
a.m[ a.a22 ] * q.m[ 2 ][ 0 ] ) * q.m[ 2 ][ 1 ];
res.m[ res.a02 ] =
q.m[ 0 ][ 2 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 0 ] + a.m[ a.a01 ] * q.m[ 1 ][ 0 ] + a.m[ a.a02 ] * q.m[ 2 ][ 0 ] ) +
q.m[ 1 ][ 2 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 0 ] + a.m[ a.a11 ] * q.m[ 1 ][ 0 ] + a.m[ a.a12 ] * q.m[ 2 ][ 0 ] ) + ( a.m[ a.a02 ] * q.m[ 0 ][ 0 ] + a.m[ a.a12 ] * q.m[ 1 ][ 0 ] +
a.m[ a.a22 ] * q.m[ 2 ][ 0 ] ) * q.m[ 2 ][ 2 ];
res.m[ res.a11 ] =
q.m[ 0 ][ 1 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 1 ] + a.m[ a.a01 ] * q.m[ 1 ][ 1 ] + a.m[ a.a02 ] * q.m[ 2 ][ 1 ] ) +
q.m[ 1 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 1 ] + a.m[ a.a11 ] * q.m[ 1 ][ 1 ] + a.m[ a.a12 ] * q.m[ 2 ][ 1 ] ) +
q.m[ 2 ][ 1 ] * ( a.m[ a.a02 ] * q.m[ 0 ][ 1 ] + a.m[ a.a12 ] * q.m[ 1 ][ 1 ] + a.m[ a.a22 ] * q.m[ 2 ][ 1 ] );
res.m[ res.a12 ] =
q.m[ 0 ][ 2 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 1 ] + a.m[ a.a01 ] * q.m[ 1 ][ 1 ] + a.m[ a.a02 ] * q.m[ 2 ][ 1 ] ) +
q.m[ 1 ][ 2 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 1 ] + a.m[ a.a11 ] * q.m[ 1 ][ 1 ] + a.m[ a.a12 ] * q.m[ 2 ][ 1 ] ) + ( a.m[ a.a02 ] * q.m[ 0 ][ 1 ] + a.m[ a.a12 ] * q.m[ 1 ][ 1 ] +
a.m[ a.a22 ] * q.m[ 2 ][ 1 ] ) * q.m[ 2 ][ 2 ];
res.m[ res.a22 ] =
q.m[ 0 ][ 2 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 2 ] + a.m[ a.a01 ] * q.m[ 1 ][ 2 ] + a.m[ a.a02 ] * q.m[ 2 ][ 2 ] ) +
q.m[ 1 ][ 2 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 2 ] + a.m[ a.a11 ] * q.m[ 1 ][ 2 ] + a.m[ a.a12 ] * q.m[ 2 ][ 2 ] ) +
q.m[ 2 ][ 2 ] * ( a.m[ a.a02 ] * q.m[ 0 ][ 2 ] + a.m[ a.a12 ] * q.m[ 1 ][ 2 ] + a.m[ a.a22 ] * q.m[ 2 ][ 2 ] );
return res;
}
template <typename Float >
inline SymMatrix3< Float > QAQt( const Matrix3< Float > &q, const SymMatrix3< Float > &a )
{
SymMatrix3< Float > res;
res.m[ res.a00 ] = q.m[ 0 ][ 0 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 0 ] + a.m[ a.a01 ] * q.m[ 0 ][ 1 ] + a.m[ a.a02 ] * q.m[ 0 ][ 2 ] ) +
q.m[ 0 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 0 ] + a.m[ a.a11 ] * q.m[ 0 ][ 1 ] + a.m[ a.a12 ] * q.m[ 0 ][ 2 ] ) +
q.m[ 0 ][ 2 ] * ( a.m[ a.a02 ] * q.m[ 0 ][ 0 ] + a.m[ a.a12 ] * q.m[ 0 ][ 1 ] + a.m[ a.a22 ] * q.m[ 0 ][ 2 ] );
res.m[ res.a01 ] =
q.m[ 1 ][ 0 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 0 ] + a.m[ a.a01 ] * q.m[ 0 ][ 1 ] + a.m[ a.a02 ] * q.m[ 0 ][ 2 ] ) +
q.m[ 1 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 0 ] + a.m[ a.a11 ] * q.m[ 0 ][ 1 ] + a.m[ a.a12 ] * q.m[ 0 ][ 2 ] ) + ( a.m[ a.a02 ] * q.m[ 0 ][ 0 ] + a.m[ a.a12 ] * q.m[ 0 ][ 1 ] +
a.m[ a.a22 ] * q.m[ 0 ][ 2 ] ) * q.m[ 1 ][ 2 ];
res.m[ res.a02 ] =
q.m[ 2 ][ 0 ] * ( a.m[ a.a00 ] * q.m[ 0 ][ 0 ] + a.m[ a.a01 ] * q.m[ 0 ][ 1 ] + a.m[ a.a02 ] * q.m[ 0 ][ 2 ] ) +
q.m[ 2 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 0 ][ 0 ] + a.m[ a.a11 ] * q.m[ 0 ][ 1 ] + a.m[ a.a12 ] * q.m[ 0 ][ 2 ] ) + ( a.m[ a.a02 ] * q.m[ 0 ][ 0 ] + a.m[ a.a12 ] * q.m[ 0 ][ 1 ] +
a.m[ a.a22 ] * q.m[ 0 ][ 2 ] ) * q.m[ 2 ][ 2 ];
res.m[ res.a11 ] =
q.m[ 1 ][ 0 ] * ( a.m[ a.a00 ] * q.m[ 1 ][ 0 ] + a.m[ a.a01 ] * q.m[ 1 ][ 1 ] + a.m[ a.a02 ] * q.m[ 1 ][ 2 ] ) +
q.m[ 1 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 1 ][ 0 ] + a.m[ a.a11 ] * q.m[ 1 ][ 1 ] + a.m[ a.a12 ] * q.m[ 1 ][ 2 ] ) +
q.m[ 1 ][ 2 ] * ( a.m[ a.a02 ] * q.m[ 1 ][ 0 ] + a.m[ a.a12 ] * q.m[ 1 ][ 1 ] + a.m[ a.a22 ] * q.m[ 1 ][ 2 ] );
res.m[ res.a12 ] =
q.m[ 2 ][ 0 ] * ( a.m[ a.a00 ] * q.m[ 1 ][ 0 ] + a.m[ a.a01 ] * q.m[ 1 ][ 1 ] + a.m[ a.a02 ] * q.m[ 1 ][ 2 ] ) +
q.m[ 2 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 1 ][ 0 ] + a.m[ a.a11 ] * q.m[ 1 ][ 1 ] + a.m[ a.a12 ] * q.m[ 1 ][ 2 ] ) + ( a.m[ a.a02 ] * q.m[ 1 ][ 0 ] + a.m[ a.a12 ] * q.m[ 1 ][ 1 ] +
a.m[ a.a22 ] * q.m[ 1 ][ 2 ] ) * q.m[ 2 ][ 2 ];
res.m[ res.a22 ] =
q.m[ 2 ][ 0 ] * ( a.m[ a.a00 ] * q.m[ 2 ][ 0 ] + a.m[ a.a01 ] * q.m[ 2 ][ 1 ] + a.m[ a.a02 ] * q.m[ 2 ][ 2 ] ) +
q.m[ 2 ][ 1 ] * ( a.m[ a.a01 ] * q.m[ 2 ][ 0 ] + a.m[ a.a11 ] * q.m[ 2 ][ 1 ] + a.m[ a.a12 ] * q.m[ 2 ][ 2 ] ) +
q.m[ 2 ][ 2 ] * ( a.m[ a.a02 ] * q.m[ 2 ][ 0 ] + a.m[ a.a12 ] * q.m[ 2 ][ 1 ] + a.m[ a.a22 ] * q.m[ 2 ][ 2 ] );
return res;
}
template <typename Float >
inline SymMatrix3< Float > QAQt( const Matrix3< Float > &q, const DiagMatrix3< Float > &a )
{
SymMatrix3< Float > res;
res.m[ res.a00 ] = q.m[ 0 ][ 0 ] * a.m[ 0 ] * q.m[ 0 ][ 0 ] +
q.m[ 0 ][ 1 ] * a.m[ 1 ] * q.m[ 0 ][ 1 ] +
q.m[ 0 ][ 2 ] * a.m[ 2 ] * q.m[ 0 ][ 2 ] ;
res.m[ res.a01 ] =
q.m[ 1 ][ 0 ] * ( a.m[ 0 ] * q.m[ 0 ][ 0 ]) +
q.m[ 1 ][ 1 ] * ( a.m[ 1 ] * q.m[ 0 ][ 1 ]) + ( a.m[ 2 ] * q.m[ 0 ][ 2 ] ) * q.m[ 1 ][ 2 ];
res.m[ res.a02 ] =
q.m[ 2 ][ 0 ] * ( a.m[ 0 ] * q.m[ 0 ][ 0 ]) +
q.m[ 2 ][ 1 ] * ( a.m[ 1 ] * q.m[ 0 ][ 1 ] ) + ( a.m[ 2 ] * q.m[ 0 ][ 2 ] ) * q.m[ 2 ][ 2 ];
res.m[ res.a11 ] =
q.m[ 1 ][ 0 ] * ( a.m[ 0 ] * q.m[ 1 ][ 0 ]) +
q.m[ 1 ][ 1 ] * ( a.m[ 1 ] * q.m[ 1 ][ 1 ]) +
q.m[ 1 ][ 2 ] * ( a.m[ 2 ] * q.m[ 1 ][ 2 ] );
res.m[ res.a12 ] =
q.m[ 2 ][ 0 ] * ( a.m[ 0 ] * q.m[ 1 ][ 0 ]) +
q.m[ 2 ][ 1 ] * ( a.m[ 1 ] * q.m[ 1 ][ 1 ] ) + ( a.m[ 2 ] * q.m[ 1 ][ 2 ] ) * q.m[ 2 ][ 2 ];
res.m[ res.a22 ] =
q.m[ 2 ][ 0 ] * ( a.m[ 0 ] * q.m[ 2 ][ 0 ] ) +
q.m[ 2 ][ 1 ] * ( a.m[ 1 ] * q.m[ 2 ][ 1 ] ) +
q.m[ 2 ][ 2 ] * ( a.m[ 2 ] * q.m[ 2 ][ 2 ] );
return res;
}
template <typename Float >
void PerformGivensRotation2x2( const SinCos<Float> &res, Float &a11, Float &a12, Float &a22 )
{
const Float two = Replicate<Float>( 2.0f );
Float cc = res.c * res.c, ss = res.s * res.s, cs = res.c * res.s;
Float b11 = cc * a11 + two * cs * a12 + ss * a22;
Float b12 = cs * ( a22 - a11 ) + ( cc - ss ) * a12;
Float b22 = ss * a11 - two * cs * a12 + cc * a22;
a11 = b11;
a12 = b12;
a22 = b22;
}
template <typename Float>
void UnperformGivensRotation3x3( const SinCos<Float> &r, Float &a00, Float &a01, Float &a11, Float &a02, Float &a12 )
{
const Float two = Replicate<Float>( 2.0f );
Float b00 = a00 * r.c * r.c - r.s * ( two * a01 * r.c - a11 * r.s );
Float b01 = r.c*( a01 * r.c + a00 * r.s ) - r.s * ( a11 * r.c + a01 * r.s );
Float b11 = a11 * r.c * r.c + r.s * ( two * a01 * r.c + a00 * r.s );
Float b02 = a02 *r.c - a12 * r.s;
Float b12 = a12 *r.c + a02 * r.s;
a00 = b00;
a01 = b01;
a11 = b11;
a02 = b02;
a12 = b12;
}
template <typename Float>
void PerformGivensRotation3x3( const SinCos<Float> &r, Float &a00, Float &a01, Float &a11, Float &a02, Float &a12 )
{
const Float two = Replicate<Float>( 2.0f );
Float b00 = a00 * r.c * r.c + r.s * ( two * a01 * r.c + a11 * r.s );
Float b01 = r.c*( a01 *r.c - a00 * r.s ) + r.s * ( a11 *r.c - a01 * r.s );
Float b11 = a11 * r.c *r.c - r.s * ( two * a01 * r.c - a00 * r.s );
Float b02 = a02 *r.c + a12 * r.s;
Float b12 = a12 *r.c - a02 * r.s;
a00 = b00;
a01 = b01;
a11 = b11;
a02 = b02;
a12 = b12;
}
inline SinCos< float > ComputeGivensRotation( float a11, float a12, float a22 )
{
float theta = fabsf( a11 - a22 ) > 1e-6f ? 0.5f * atanf( 2 * a12 / ( a11 - a22 ) ) : 3.14159265358979323846f / 4;
SinCos< float >res( cosf( theta ), sinf( theta ) );
#ifdef _DEBUG
PerformGivensRotation2x2( res, a11, a12, a22 );
Assert( fabsf( a12 ) < 0.001f * ( 1 + fabsf( a11 ) + fabsf( a22 ) ) );
#endif
return res;
}
template <typename Float >
inline SinCos< Float> ApproximateGivensRotation( const Float & a11, const Float & a12, const Float & a22 )
{
const Float two = Replicate<Float>( 2.0f );
Float ch = two * ( a11 - a22 );
Float sh = a12;
typename FloatTraits<Float>::Bool b = CmpLt( Replicate<Float>( 5.82842712474619f ) * sh*sh, ch*ch );
Float r2 = ch*ch + sh *sh;
typename FloatTraits<Float>::Bool bZero = CmpLt( r2, Replicate<Float>( 1e-12f ) );
Float omega = RsqrtEst( r2 );
SinCos<Float>res;
res.s = Select( Replicate<Float>( 0.3826834323650897717284599840304f ), omega * sh, b );
res.c = Select( Replicate<Float>( 0.92387953251128675612818318939679f ), omega * ch, b );
res.s = Select( res.s, Replicate<Float>( 0.0f ), bZero ); // todo: replace with And
res.c = Select( res.c, Replicate<Float>( 1.0f ), bZero );
return res;
}
template <typename Float >
void PerformGivensRotationQuaternion( const SinCos<Float> &res, Float &x, Float &y, Float &z, Float &w )
{
//const Float two = Replicate<Float>( 2.0f );
Float xNew = res.c * x + res.s * w, yNew = res.c * y + res.s * z, zNew = res.c * z - res.s * y, wNew = res.c * w - res.s * x;
x = xNew;
y = yNew;
z = zNew;
w = wNew;
}
template <typename Float >
class SvdIterator
{
public:
SvdIterator(){}
Quaternion < Float > q;
SymMatrix3<Float> ata;
void Init( const Matrix3<Float> a )
{
q.SetIdentity();
ata = AtA( a );
}
void Iterate( int nIterations, float flEpsilon = 0.0f )
{
SinCos< Float> r;
//SymMatrix3<Float> inv0 = QAQt( QuaternionMatrix( q ), ata ), origAta = AtA( a );
for ( int i = 0; i < nIterations; ++i )
{
r = ApproximateGivensRotation( ata.m[ ata.a00 ], ata.m[ ata.a10 ], ata.m[ ata.a11 ] );
Float sumErrors = r.s * r.s;
PerformGivensRotation3x3( r.DoubleAngle(), ata.m[ ata.a00 ], ata.m[ ata.a10 ], ata.m[ ata.a11 ], ata.m[ ata.a20 ], ata.m[ ata.a21 ] );
PerformGivensRotationQuaternion( r, q.z, q.x, q.y, q.w );
//SymMatrix3<Float> inv1 = QAQt( QuaternionMatrix( q ), ata );
r = ApproximateGivensRotation( ata.m[ ata.a11 ], ata.m[ ata.a21 ], ata.m[ ata.a22 ] );
sumErrors += r.s * r.s;
PerformGivensRotation3x3( r.DoubleAngle(), ata.m[ ata.a11 ], ata.m[ ata.a21 ], ata.m[ ata.a22 ], ata.m[ ata.a01 ], ata.m[ ata.a02 ] );
PerformGivensRotationQuaternion( r, q.x, q.y, q.z, q.w );
//SymMatrix3<Float> inv2 = QAQt( QuaternionMatrix( q ), ata );
r = ApproximateGivensRotation( ata.m[ ata.a22 ], ata.m[ ata.a20 ], ata.m[ ata.a00 ] );
sumErrors += r.s * r.s;
PerformGivensRotation3x3( r.DoubleAngle(), ata.m[ ata.a22 ], ata.m[ ata.a02 ], ata.m[ ata.a00 ], ata.m[ ata.a12 ], ata.m[ ata.a10 ] );
PerformGivensRotationQuaternion( r, q.y, q.z, q.x, q.w );
//SymMatrix3<Float> inv3 = QAQt( QuaternionMatrix( q ), ata );
if ( AllLe( sumErrors, Replicate<Float>( flEpsilon ) ) )
break; // early out
}
}
Matrix3< Float > ComputeV()const { return QuaternionMatrix( q * Rsqrt( q.LengthSqr() ) ); }
};
inline float PseudoInverse( float fl ) { return fabsf( fl ) < FLT_EPSILON ? 0 : 1.0f / fl; }
inline SymMatrix3< float > PseudoInverse( const SymMatrix3< float > &cov )
{
SvdIterator< float > si;
si.q.SetIdentity();
si.ata = cov;
si.Iterate( 5 );
DiagMatrix3< float > pseudoInverseDiag;
pseudoInverseDiag.m[ 0 ] = PseudoInverse( si.ata.m00() );
pseudoInverseDiag.m[ 1 ] = PseudoInverse( si.ata.m11() );
pseudoInverseDiag.m[ 2 ] = PseudoInverse( si.ata.m22() );
return QAQt( si.ComputeV(), pseudoInverseDiag );
}
}