You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
143 lines
3.8 KiB
143 lines
3.8 KiB
//========= Copyright c 1996-2009, Valve Corporation, All rights reserved. ============//
|
|
//
|
|
// Purpose: Cholesky and LDL' decomposition-related code
|
|
//
|
|
//=====================================================================================//
|
|
#include "mathlib/cholesky.h"
|
|
#include "mathlib/ssecholesky.h"
|
|
|
|
inline float ClampNeg( float a )
|
|
{
|
|
return a >= 0.0f ? a : 0.0f ; //( a + fabsf(a) ) * 0.5f;
|
|
}
|
|
|
|
inline float SafeSqrt( float a )
|
|
{
|
|
return sqrtf( ClampNeg( a ) );
|
|
}
|
|
|
|
inline float SafeRecip( float a )
|
|
{
|
|
return 1.0f / ( a > 1e-8f ? a : 1e-8f );
|
|
}
|
|
|
|
bool Cholesky3x3_t::IsValid( )
|
|
{
|
|
return m_inv00 + m_inv11 + m_inv22 < 1e+7f;
|
|
}
|
|
|
|
const fltx4 Four_1e7 = { 1.0e+7, 1.0e+7, 1.0e+7, 1.0e+7 };
|
|
|
|
bool SimdCholesky3x3_t::IsValid( )const
|
|
{
|
|
return IsAllGreaterThan( Four_1e7, m_inv00 + m_inv11 + m_inv22 );
|
|
}
|
|
|
|
fltx4 SimdCholesky3x3_t::GetValidMask( )const
|
|
{
|
|
return CmpGtSIMD( Four_1e7, m_inv00 + m_inv11 + m_inv22 );
|
|
}
|
|
|
|
|
|
// initializes this decomposition; see formula at http://planetmath.org/encyclopedia/CholeskyDecomposition.html
|
|
bool Cholesky3x3_t::Init( float a00, float a10, float a11, float a20, float a21, float a22 )
|
|
{
|
|
#ifdef _DEBUG
|
|
memset( this, 0xCD, sizeof(*this) ); // to see what's changing easily
|
|
#endif
|
|
m_00 = SafeSqrt( a00 ); m_inv00 = SafeRecip( m_00 );
|
|
m_10 = ( a10 ) * m_inv00;
|
|
m_11 = SafeSqrt( a11 - Sqr( m_10 ) ); m_inv11 = SafeRecip( m_11 );
|
|
m_20 = ( a20 ) * m_inv00;
|
|
m_21 = ( a21 - m_20 * m_10) * m_inv11;
|
|
m_22 = SafeSqrt( a22 - Sqr( m_20 ) - Sqr( m_21 ) ); m_inv22 = SafeRecip( m_22 );
|
|
|
|
#ifdef _DEBUG
|
|
if( IsValid() )
|
|
{
|
|
matrix3x4_t l,r, a;
|
|
FillLeft(l);
|
|
FillRight(r);
|
|
MatrixMultiply(l, r, a);
|
|
float flError = Sqr( a00 - a[0][0] ) + Sqr( a10 - a[1][0] ) + Sqr( a20 - a[2][0] ) +
|
|
Sqr( a11 - a[1][1] ) + Sqr( a21 - a[2][1] ) + Sqr( a22 - a[2][2] );
|
|
Assert( flError < 1e-5f );
|
|
}
|
|
#endif
|
|
|
|
return IsValid();
|
|
}
|
|
|
|
|
|
void Cholesky3x3_t::FillLeft( matrix3x4_t & l )
|
|
{
|
|
l[0][0] = m_00;
|
|
l[0][1] = l[0][2] = l[0][3] = 0;
|
|
l[1][0] = m_10; l[1][1] = m_11;
|
|
l[1][2] = l[1][3] = 0;
|
|
l[2][0] = m_20; l[2][1] = m_21; l[2][2] = m_22;
|
|
l[2][3] = 0;
|
|
}
|
|
|
|
void Cholesky3x3_t::FillRight( matrix3x4_t & r )
|
|
{
|
|
r[0][0] = m_00;
|
|
r[1][0] = r[2][0] = 0;
|
|
r[0][1] = m_10; r[1][1] = m_11;
|
|
r[2][1] = 0;
|
|
r[0][2] = m_20; r[1][2] = m_21; r[2][2] = m_22;
|
|
r[0][3] = r[1][3] = r[2][3] = 0;
|
|
}
|
|
|
|
|
|
// solve this : L x = b
|
|
const Vector Cholesky3x3_t::SolveLeft( const Vector &b )
|
|
{
|
|
Vector result;
|
|
result.x = m_inv00 * b.x;
|
|
result.y = m_inv11 * ( b.y - m_10 * result.x );
|
|
result.z = m_inv22 * ( b.z - m_20 * result.x - m_21 * result.y );
|
|
return result;
|
|
}
|
|
|
|
const Vector Cholesky3x3_t::SolveRight( const Vector &b )
|
|
{
|
|
Vector result;
|
|
result.z = m_inv22 * b.z;
|
|
result.y = m_inv11 * ( b.y - m_21 * result.z );
|
|
result.x = m_inv00 * ( b.x - m_20 * result.z - m_10 * result.y );
|
|
return result;
|
|
}
|
|
|
|
|
|
|
|
// initializes this decomposition; see formula at http://planetmath.org/encyclopedia/CholeskyDecomposition.html
|
|
void SimdCholesky3x3_t::Init( const fltx4 & a00, const fltx4 & a10, const fltx4 & a11, const fltx4 & a20, const fltx4 & a21, const fltx4 & a22 )
|
|
{
|
|
m_inv00 = ReciprocalSqrtSIMD( a00 );
|
|
m_10 = ( a10 ) * m_inv00;
|
|
m_inv11 = ReciprocalSqrtSIMD( a11 - ( m_10 * m_10 ) );
|
|
m_20 = ( a20 ) * m_inv00;
|
|
m_21 = ( a21 - m_20 * m_10 ) * m_inv11;
|
|
m_inv22 = ReciprocalSqrtSIMD( a22 - ( m_20 * m_20 ) - ( m_21 * m_21 ) );
|
|
}
|
|
|
|
|
|
// solve this : L x = b
|
|
const FourVectors SimdCholesky3x3_t::SolveLeft( const FourVectors &b )
|
|
{
|
|
FourVectors result;
|
|
result.x = m_inv00 * b.x;
|
|
result.y = m_inv11 * ( b.y - m_10 * result.x );
|
|
result.z = m_inv22 * ( b.z - m_20 * result.x - m_21 * result.y );
|
|
return result;
|
|
}
|
|
|
|
const FourVectors SimdCholesky3x3_t::SolveRight( const FourVectors &b )
|
|
{
|
|
FourVectors result;
|
|
result.z = m_inv22 * b.z;
|
|
result.y = m_inv11 * ( b.y - m_21 * result.z );
|
|
result.x = m_inv00 * ( b.x - m_20 * result.z - m_10 * result.y );
|
|
return result;
|
|
}
|