//========= Copyright c 1996-2009, Valve Corporation, All rights reserved. ============//
//
// Purpose: Cholesky and LDL' decomposition-related code
//
//=====================================================================================//
#include "mathlib/cholesky.h"
#include "mathlib/ssecholesky.h"

inline float ClampNeg( float a )
{
	return a >= 0.0f ? a : 0.0f ; //( a + fabsf(a) ) * 0.5f;
}

inline float SafeSqrt( float a )
{
	return sqrtf( ClampNeg( a ) );
}

inline float SafeRecip( float a )
{
	return 1.0f / ( a > 1e-8f ? a : 1e-8f );
}

bool Cholesky3x3_t::IsValid( )
{
	return m_inv00 + m_inv11 + m_inv22 < 1e+7f;
}

const fltx4 Four_1e7 = { 1.0e+7, 1.0e+7, 1.0e+7, 1.0e+7 };

bool SimdCholesky3x3_t::IsValid( )const 
{
	return IsAllGreaterThan( Four_1e7, m_inv00 + m_inv11 + m_inv22 );
}

fltx4 SimdCholesky3x3_t::GetValidMask( )const
{
	return CmpGtSIMD( Four_1e7, m_inv00 + m_inv11 + m_inv22 );
}


// initializes this decomposition; see formula at http://planetmath.org/encyclopedia/CholeskyDecomposition.html
bool Cholesky3x3_t::Init( float a00, float a10, float a11, float a20, float a21, float a22 )
{
#ifdef _DEBUG
	memset( this, 0xCD, sizeof(*this) ); // to see what's changing easily
#endif
	m_00 = SafeSqrt( a00 ); m_inv00 = SafeRecip( m_00 );
	m_10 = ( a10 ) * m_inv00;
	m_11 = SafeSqrt( a11 - Sqr( m_10 ) ); m_inv11 = SafeRecip( m_11 );
	m_20 = ( a20 ) * m_inv00;
	m_21 = ( a21 - m_20 * m_10) * m_inv11;
	m_22 = SafeSqrt( a22 - Sqr( m_20 ) - Sqr( m_21 ) ); m_inv22 = SafeRecip( m_22 );

#ifdef _DEBUG
	if( IsValid() )
	{
		matrix3x4_t l,r, a;
		FillLeft(l);
		FillRight(r);
		MatrixMultiply(l, r, a);
		float flError = Sqr( a00 - a[0][0] ) + Sqr( a10 - a[1][0] ) + Sqr( a20 - a[2][0] ) + 
			Sqr( a11 - a[1][1] ) + Sqr( a21 - a[2][1] ) + Sqr( a22 - a[2][2] );
		Assert( flError < 1e-5f );
	}
#endif

	return IsValid();
}


void Cholesky3x3_t::FillLeft( matrix3x4_t & l )
{
	l[0][0] = m_00;
	l[0][1] = l[0][2] = l[0][3] = 0;
	l[1][0] = m_10; l[1][1] = m_11;
	l[1][2] = l[1][3] = 0;
	l[2][0] = m_20;	l[2][1] = m_21; l[2][2] = m_22;
	l[2][3] = 0;
}

void Cholesky3x3_t::FillRight( matrix3x4_t & r )
{
	r[0][0] = m_00;
	r[1][0] = r[2][0] = 0;
	r[0][1] = m_10; r[1][1] = m_11;
	r[2][1] = 0;
	r[0][2] = m_20;	r[1][2] = m_21; r[2][2] = m_22;
	r[0][3] = r[1][3] = r[2][3] = 0;
}


// solve this : L x = b
const Vector Cholesky3x3_t::SolveLeft( const Vector &b )
{
	Vector result;
	result.x = m_inv00 * b.x;
	result.y = m_inv11 * ( b.y - m_10 * result.x );
	result.z = m_inv22 * ( b.z - m_20 * result.x - m_21 * result.y );
	return result;
}

const Vector Cholesky3x3_t::SolveRight( const Vector &b )
{
	Vector result;
	result.z = m_inv22 * b.z;
	result.y = m_inv11 * ( b.y - m_21 * result.z );
	result.x = m_inv00 * ( b.x - m_20 * result.z - m_10 * result.y );
	return result;
}



// initializes this decomposition; see formula at http://planetmath.org/encyclopedia/CholeskyDecomposition.html
void SimdCholesky3x3_t::Init( const fltx4 & a00, const fltx4 & a10, const fltx4 & a11, const fltx4 & a20, const fltx4 & a21, const fltx4 & a22 )
{
	m_inv00 = ReciprocalSqrtSIMD( a00 );
	m_10 = ( a10 ) * m_inv00;
	m_inv11 = ReciprocalSqrtSIMD( a11 - ( m_10 * m_10 ) );
	m_20 = ( a20 ) * m_inv00;
	m_21 = ( a21 - m_20 * m_10 ) * m_inv11;
	m_inv22 = ReciprocalSqrtSIMD( a22 - ( m_20 * m_20 ) - ( m_21 * m_21 ) );
}


// solve this : L x = b
const FourVectors SimdCholesky3x3_t::SolveLeft( const FourVectors &b )
{
	FourVectors result;
	result.x = m_inv00 * b.x;
	result.y = m_inv11 * ( b.y - m_10 * result.x );
	result.z = m_inv22 * ( b.z - m_20 * result.x - m_21 * result.y );
	return result;
}

const FourVectors SimdCholesky3x3_t::SolveRight( const FourVectors &b )
{
	FourVectors result;
	result.z = m_inv22 * b.z;
	result.y = m_inv11 * ( b.y - m_21 * result.z );
	result.x = m_inv00 * ( b.x - m_20 * result.z - m_10 * result.y );
	return result;
}