csgo/cstrike15_src/dx10sdk/Include/xnamathmisc.inl


								/*++


								Copyright (c) Microsoft Corporation. All rights reserved.


								Module Name:


								    xnamathmisc.inl


								Abstract:


									XNA math library for Windows and Xbox 360: Quaternion, plane, and color functions.

								--*/


								#if defined(_MSC_VER) && (_MSC_VER > 1000)

								#pragma once

								#endif


								#ifndef __XNAMATHMISC_INL__

								#define __XNAMATHMISC_INL__


								/****************************************************************************

								 *

								 * Quaternion

								 *

								 ****************************************************************************/


								//------------------------------------------------------------------------------

								// Comparison operations

								//------------------------------------------------------------------------------


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMQuaternionEqual

								(

								    FXMVECTOR Q1,

								    FXMVECTOR Q2

								)

								{

								    return XMVector4Equal(Q1, Q2);

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMQuaternionNotEqual

								(

								    FXMVECTOR Q1,

								    FXMVECTOR Q2

								)

								{

								    return XMVector4NotEqual(Q1, Q2);

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMQuaternionIsNaN

								(

								    FXMVECTOR Q

								)

								{

								    return XMVector4IsNaN(Q);

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMQuaternionIsInfinite

								(

								    FXMVECTOR Q

								)

								{

								    return XMVector4IsInfinite(Q);

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMQuaternionIsIdentity

								(

								    FXMVECTOR Q

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    return XMVector4Equal(Q, g_XMIdentityR3.v);


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMVECTOR vTemp = _mm_cmpeq_ps(Q,g_XMIdentityR3);

								    return (_mm_movemask_ps(vTemp)==0x0f) ? true : false;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------

								// Computation operations

								//------------------------------------------------------------------------------


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionDot

								(

								    FXMVECTOR Q1,

								    FXMVECTOR Q2

								)

								{

								    return XMVector4Dot(Q1, Q2);

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionMultiply

								(

								    FXMVECTOR Q1,

								    FXMVECTOR Q2

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR         NegativeQ1;

								    XMVECTOR         Q2X;

								    XMVECTOR         Q2Y;

								    XMVECTOR         Q2Z;

								    XMVECTOR         Q2W;

								    XMVECTOR         Q1WZYX;

								    XMVECTOR         Q1ZWXY;

								    XMVECTOR         Q1YXWZ;

								    XMVECTOR         Result;

								    CONST XMVECTORU32 ControlWZYX = {XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1X};

								    CONST XMVECTORU32 ControlZWXY = {XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_1Y};

								    CONST XMVECTORU32 ControlYXWZ = {XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z};


								    NegativeQ1 = XMVectorNegate(Q1);


								    Q2W = XMVectorSplatW(Q2);

								    Q2X = XMVectorSplatX(Q2);

								    Q2Y = XMVectorSplatY(Q2);

								    Q2Z = XMVectorSplatZ(Q2);


								    Q1WZYX = XMVectorPermute(Q1, NegativeQ1, ControlWZYX.v);

								    Q1ZWXY = XMVectorPermute(Q1, NegativeQ1, ControlZWXY.v);

								    Q1YXWZ = XMVectorPermute(Q1, NegativeQ1, ControlYXWZ.v);


								    Result = XMVectorMultiply(Q1, Q2W);

								    Result = XMVectorMultiplyAdd(Q1WZYX, Q2X, Result);

								    Result = XMVectorMultiplyAdd(Q1ZWXY, Q2Y, Result);

								    Result = XMVectorMultiplyAdd(Q1YXWZ, Q2Z, Result);


								    return Result;


								#elif defined(_XM_SSE_INTRINSICS_)

								    static CONST XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f};

								    static CONST XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f};

								    static CONST XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f};

								    // Copy to SSE registers and use as few as possible for x86

								    XMVECTOR Q2X = Q2;

								    XMVECTOR Q2Y = Q2;

								    XMVECTOR Q2Z = Q2;

								    XMVECTOR vResult = Q2;

								    // Splat with one instruction

								    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));

								    Q2X = _mm_shuffle_ps(Q2X,Q2X,_MM_SHUFFLE(0,0,0,0));

								    Q2Y = _mm_shuffle_ps(Q2Y,Q2Y,_MM_SHUFFLE(1,1,1,1));

								    Q2Z = _mm_shuffle_ps(Q2Z,Q2Z,_MM_SHUFFLE(2,2,2,2));

								    // Retire Q1 and perform Q1*Q2W

								    vResult = _mm_mul_ps(vResult,Q1);

								    XMVECTOR Q1Shuffle = Q1;

								    // Shuffle the copies of Q1

								    Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));

								    // Mul by Q1WZYX

								    Q2X = _mm_mul_ps(Q2X,Q1Shuffle);

								    Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(2,3,0,1));

								    // Flip the signs on y and z

								    Q2X = _mm_mul_ps(Q2X,ControlWZYX);

								    // Mul by Q1ZWXY

								    Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle);

								    Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));

								    // Flip the signs on z and w

								    Q2Y = _mm_mul_ps(Q2Y,ControlZWXY);

								    // Mul by Q1YXWZ

								    Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle);

								    vResult = _mm_add_ps(vResult,Q2X);

								    // Flip the signs on x and w

								    Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ);

								    Q2Y = _mm_add_ps(Q2Y,Q2Z);

								    vResult = _mm_add_ps(vResult,Q2Y);

								    return vResult;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionLengthSq

								(

								    FXMVECTOR Q

								)

								{

								    return XMVector4LengthSq(Q);

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionReciprocalLength

								(

								    FXMVECTOR Q

								)

								{

								    return XMVector4ReciprocalLength(Q);

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionLength

								(

								    FXMVECTOR Q

								)

								{

								    return XMVector4Length(Q);

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionNormalizeEst

								(

								    FXMVECTOR Q

								)

								{

								    return XMVector4NormalizeEst(Q);

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionNormalize

								(

								    FXMVECTOR Q

								)

								{

								    return XMVector4Normalize(Q);

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionConjugate

								(

								    FXMVECTOR Q

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR Result = {

								        -Q.vector4_f32[0],

								        -Q.vector4_f32[1],

								        -Q.vector4_f32[2],

								        Q.vector4_f32[3]

								    };

								    return Result;

								#elif defined(_XM_SSE_INTRINSICS_)

								    static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f};

								    XMVECTOR Result = _mm_mul_ps(Q,NegativeOne3);

								    return Result;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionInverse

								(

								    FXMVECTOR Q

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR        Conjugate;

								    XMVECTOR        L;

								    XMVECTOR        Control;

								    XMVECTOR        Result;

								    CONST XMVECTOR  Zero = XMVectorZero();


								    L = XMVector4LengthSq(Q);

								    Conjugate = XMQuaternionConjugate(Q);


								    Control = XMVectorLessOrEqual(L, g_XMEpsilon.v);


								    L = XMVectorReciprocal(L);

								    Result = XMVectorMultiply(Conjugate, L);


								    Result = XMVectorSelect(Result, Zero, Control);


								    return Result;


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMVECTOR        Conjugate;

								    XMVECTOR        L;

								    XMVECTOR        Control;

								    XMVECTOR        Result;

								    XMVECTOR  Zero = XMVectorZero();


								    L = XMVector4LengthSq(Q);

								    Conjugate = XMQuaternionConjugate(Q);

								    Control = XMVectorLessOrEqual(L, g_XMEpsilon);

								    Result = _mm_div_ps(Conjugate,L);

								    Result = XMVectorSelect(Result, Zero, Control);

								    return Result;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionLn

								(

								    FXMVECTOR Q

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR Q0;

								    XMVECTOR QW;

								    XMVECTOR Theta;

								    XMVECTOR SinTheta;

								    XMVECTOR S;

								    XMVECTOR ControlW;

								    XMVECTOR Result;

								    static CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};


								    QW = XMVectorSplatW(Q);

								    Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v);


								    ControlW = XMVectorInBounds(QW, OneMinusEpsilon);


								    Theta = XMVectorACos(QW);

								    SinTheta = XMVectorSin(Theta);


								    S = XMVectorReciprocal(SinTheta);

								    S = XMVectorMultiply(Theta, S);


								    Result = XMVectorMultiply(Q0, S);


								    Result = XMVectorSelect(Q0, Result, ControlW);


								    return Result;


								#elif defined(_XM_SSE_INTRINSICS_)

								    static CONST XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};

								    static CONST XMVECTORF32 NegOneMinusEpsilon = {-(1.0f - 0.00001f), -(1.0f - 0.00001f),-(1.0f - 0.00001f),-(1.0f - 0.00001f)};

								    // Get W only

								    XMVECTOR QW = _mm_shuffle_ps(Q,Q,_MM_SHUFFLE(3,3,3,3));

								    // W = 0

								    XMVECTOR Q0 = _mm_and_ps(Q,g_XMMask3);

								    // Use W if within bounds

								    XMVECTOR ControlW = _mm_cmple_ps(QW,OneMinusEpsilon);

								    XMVECTOR vTemp2 = _mm_cmpge_ps(QW,NegOneMinusEpsilon);

								    ControlW = _mm_and_ps(ControlW,vTemp2);

								    // Get theta

								    XMVECTOR vTheta = XMVectorACos(QW);

								    // Get Sine of theta

								    vTemp2 = XMVectorSin(vTheta);

								    // theta/sine of theta

								    vTheta = _mm_div_ps(vTheta,vTemp2);

								    // Here's the answer

								    vTheta = _mm_mul_ps(vTheta,Q0);

								    // Was W in bounds? If not, return input as is

								    vTheta = XMVectorSelect(Q0,vTheta,ControlW);

								    return vTheta;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionExp

								(

								    FXMVECTOR Q

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR Theta;

								    XMVECTOR SinTheta;

								    XMVECTOR CosTheta;

								    XMVECTOR S;

								    XMVECTOR Control;

								    XMVECTOR Zero;

								    XMVECTOR Result;


								    Theta = XMVector3Length(Q);

								    XMVectorSinCos(&SinTheta, &CosTheta, Theta);


								    S = XMVectorReciprocal(Theta);

								    S = XMVectorMultiply(SinTheta, S);


								    Result = XMVectorMultiply(Q, S);


								    Zero = XMVectorZero();

								    Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v);

								    Result = XMVectorSelect(Result, Q, Control);


								    Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v);


								    return Result;


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMVECTOR Theta;

								    XMVECTOR SinTheta;

								    XMVECTOR CosTheta;

								    XMVECTOR S;

								    XMVECTOR Control;

								    XMVECTOR Zero;

								    XMVECTOR Result;

								    Theta = XMVector3Length(Q);

								    XMVectorSinCos(&SinTheta, &CosTheta, Theta);

								    S = _mm_div_ps(SinTheta,Theta);

								    Result = _mm_mul_ps(Q, S);

								    Zero = XMVectorZero();

								    Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon);

								    Result = XMVectorSelect(Result,Q,Control);

								    Result = _mm_and_ps(Result,g_XMMask3);

								    CosTheta = _mm_and_ps(CosTheta,g_XMMaskW);

								    Result = _mm_or_ps(Result,CosTheta);

								    return Result;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMINLINE XMVECTOR XMQuaternionSlerp

								(

								    FXMVECTOR Q0,

								    FXMVECTOR Q1,

								    FLOAT    t

								)

								{

								    XMVECTOR T = XMVectorReplicate(t);

								    return XMQuaternionSlerpV(Q0, Q1, T);

								}


								//------------------------------------------------------------------------------


								XMINLINE XMVECTOR XMQuaternionSlerpV

								(

								    FXMVECTOR Q0,

								    FXMVECTOR Q1,

								    FXMVECTOR T

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)

								    XMVECTOR Omega;

								    XMVECTOR CosOmega;

								    XMVECTOR SinOmega;

								    XMVECTOR InvSinOmega;

								    XMVECTOR V01;

								    XMVECTOR C1000;

								    XMVECTOR SignMask;

								    XMVECTOR S0;

								    XMVECTOR S1;

								    XMVECTOR Sign;

								    XMVECTOR Control;

								    XMVECTOR Result;

								    XMVECTOR Zero;

								    CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};


								    XMASSERT((T.vector4_f32[1] == T.vector4_f32[0]) && (T.vector4_f32[2] == T.vector4_f32[0]) && (T.vector4_f32[3] == T.vector4_f32[0]));


								    CosOmega = XMQuaternionDot(Q0, Q1);


								    Zero = XMVectorZero();

								    Control = XMVectorLess(CosOmega, Zero);

								    Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control);


								    CosOmega = XMVectorMultiply(CosOmega, Sign);


								    Control = XMVectorLess(CosOmega, OneMinusEpsilon);


								    SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v);

								    SinOmega = XMVectorSqrt(SinOmega);


								    Omega = XMVectorATan2(SinOmega, CosOmega);


								    SignMask = XMVectorSplatSignMask();

								    C1000 = XMVectorSetBinaryConstant(1, 0, 0, 0);

								    V01 = XMVectorShiftLeft(T, Zero, 2);

								    SignMask = XMVectorShiftLeft(SignMask, Zero, 3);

								    V01 = XMVectorXorInt(V01, SignMask);

								    V01 = XMVectorAdd(C1000, V01);


								    InvSinOmega = XMVectorReciprocal(SinOmega);


								    S0 = XMVectorMultiply(V01, Omega);

								    S0 = XMVectorSin(S0);

								    S0 = XMVectorMultiply(S0, InvSinOmega);


								    S0 = XMVectorSelect(V01, S0, Control);


								    S1 = XMVectorSplatY(S0);

								    S0 = XMVectorSplatX(S0);


								    S1 = XMVectorMultiply(S1, Sign);


								    Result = XMVectorMultiply(Q0, S0);

								    Result = XMVectorMultiplyAdd(Q1, S1, Result);


								    return Result;


								#elif defined(_XM_SSE_INTRINSICS_)

								    // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)

								    XMVECTOR Omega;

								    XMVECTOR CosOmega;

								    XMVECTOR SinOmega;

								    XMVECTOR V01;

								    XMVECTOR S0;

								    XMVECTOR S1;

								    XMVECTOR Sign;

								    XMVECTOR Control;

								    XMVECTOR Result;

								    XMVECTOR Zero;

								    static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};

								    static const XMVECTORI32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000};

								    static const XMVECTORI32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000};


								    XMASSERT((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));


								    CosOmega = XMQuaternionDot(Q0, Q1);


								    Zero = XMVectorZero();

								    Control = XMVectorLess(CosOmega, Zero);

								    Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control);


								    CosOmega = _mm_mul_ps(CosOmega, Sign);


								    Control = XMVectorLess(CosOmega, OneMinusEpsilon);


								    SinOmega = _mm_mul_ps(CosOmega,CosOmega);

								    SinOmega = _mm_sub_ps(g_XMOne,SinOmega);

								    SinOmega = _mm_sqrt_ps(SinOmega);


								    Omega = XMVectorATan2(SinOmega, CosOmega);


								    V01 = _mm_shuffle_ps(T,T,_MM_SHUFFLE(2,3,0,1));

								    V01 = _mm_and_ps(V01,MaskXY);

								    V01 = _mm_xor_ps(V01,SignMask2);

								    V01 = _mm_add_ps(g_XMIdentityR0, V01);


								    S0 = _mm_mul_ps(V01, Omega);

								    S0 = XMVectorSin(S0);

								    S0 = _mm_div_ps(S0, SinOmega);


								    S0 = XMVectorSelect(V01, S0, Control);


								    S1 = XMVectorSplatY(S0);

								    S0 = XMVectorSplatX(S0);


								    S1 = _mm_mul_ps(S1, Sign);

								    Result = _mm_mul_ps(Q0, S0);

								    S1 = _mm_mul_ps(S1, Q1);

								    Result = _mm_add_ps(Result,S1);

								    return Result;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionSquad

								(

								    FXMVECTOR Q0,

								    FXMVECTOR Q1,

								    FXMVECTOR Q2,

								    CXMVECTOR Q3,

								    FLOAT    t

								)

								{

								    XMVECTOR T = XMVectorReplicate(t);

								    return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T);

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionSquadV

								(

								    FXMVECTOR Q0,

								    FXMVECTOR Q1,

								    FXMVECTOR Q2,

								    CXMVECTOR Q3,

								    CXMVECTOR T

								)

								{

								    XMVECTOR Q03;

								    XMVECTOR Q12;

								    XMVECTOR TP;

								    XMVECTOR Two;

								    XMVECTOR Result;


								    XMASSERT( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) );


								    TP = T;

								    Two = XMVectorSplatConstant(2, 0);


								    Q03 = XMQuaternionSlerpV(Q0, Q3, T);

								    Q12 = XMQuaternionSlerpV(Q1, Q2, T);


								    TP = XMVectorNegativeMultiplySubtract(TP, TP, TP);

								    TP = XMVectorMultiply(TP, Two);


								    Result = XMQuaternionSlerpV(Q03, Q12, TP);


								    return Result;


								}


								//------------------------------------------------------------------------------


								XMINLINE VOID XMQuaternionSquadSetup

								(

								    XMVECTOR* pA,

								    XMVECTOR* pB,

								    XMVECTOR* pC,

								    FXMVECTOR  Q0,

								    FXMVECTOR  Q1,

								    FXMVECTOR  Q2,

								    CXMVECTOR  Q3

								)

								{

								    XMVECTOR SQ0, SQ2, SQ3;

								    XMVECTOR InvQ1, InvQ2;

								    XMVECTOR LnQ0, LnQ1, LnQ2, LnQ3;

								    XMVECTOR ExpQ02, ExpQ13;

								    XMVECTOR LS01, LS12, LS23;

								    XMVECTOR LD01, LD12, LD23;

								    XMVECTOR Control0, Control1, Control2;

								    XMVECTOR NegativeOneQuarter;


								    XMASSERT(pA);

								    XMASSERT(pB);

								    XMASSERT(pC);


								    LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2));

								    LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2));

								    SQ2 = XMVectorNegate(Q2);


								    Control1 = XMVectorLess(LS12, LD12);

								    SQ2 = XMVectorSelect(Q2, SQ2, Control1);


								    LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1));

								    LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1));

								    SQ0 = XMVectorNegate(Q0);


								    LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3));

								    LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3));

								    SQ3 = XMVectorNegate(Q3);


								    Control0 = XMVectorLess(LS01, LD01);

								    Control2 = XMVectorLess(LS23, LD23);


								    SQ0 = XMVectorSelect(Q0, SQ0, Control0);

								    SQ3 = XMVectorSelect(Q3, SQ3, Control2);


								    InvQ1 = XMQuaternionInverse(Q1);

								    InvQ2 = XMQuaternionInverse(SQ2);


								    LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0));

								    LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2));

								    LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1));

								    LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3));


								    NegativeOneQuarter = XMVectorSplatConstant(-1, 2);


								    ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter);

								    ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter);

								    ExpQ02 = XMQuaternionExp(ExpQ02);

								    ExpQ13 = XMQuaternionExp(ExpQ13);


								    *pA = XMQuaternionMultiply(Q1, ExpQ02);

								    *pB = XMQuaternionMultiply(SQ2, ExpQ13);

								    *pC = SQ2;

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionBaryCentric

								(

								    FXMVECTOR Q0,

								    FXMVECTOR Q1,

								    FXMVECTOR Q2,

								    FLOAT    f,

								    FLOAT    g

								)

								{

								    XMVECTOR Q01;

								    XMVECTOR Q02;

								    FLOAT    s;

								    XMVECTOR Result;


								    s = f + g;


								    if ((s < 0.00001f) && (s > -0.00001f))

								    {

								        Result = Q0;

								    }

								    else

								    {

								        Q01 = XMQuaternionSlerp(Q0, Q1, s);

								        Q02 = XMQuaternionSlerp(Q0, Q2, s);


								        Result = XMQuaternionSlerp(Q01, Q02, g / s);

								    }


								    return Result;

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionBaryCentricV

								(

								    FXMVECTOR Q0,

								    FXMVECTOR Q1,

								    FXMVECTOR Q2,

								    CXMVECTOR F,

								    CXMVECTOR G

								)

								{

								    XMVECTOR Q01;

								    XMVECTOR Q02;

								    XMVECTOR S, GS;

								    XMVECTOR Epsilon;

								    XMVECTOR Result;


								    XMASSERT( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) );

								    XMASSERT( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) );


								    Epsilon = XMVectorSplatConstant(1, 16);


								    S = XMVectorAdd(F, G);


								    if (XMVector4InBounds(S, Epsilon))

								    {

								        Result = Q0;

								    }

								    else

								    {

								        Q01 = XMQuaternionSlerpV(Q0, Q1, S);

								        Q02 = XMQuaternionSlerpV(Q0, Q2, S);

								        GS = XMVectorReciprocal(S);

								        GS = XMVectorMultiply(G, GS);


								        Result = XMQuaternionSlerpV(Q01, Q02, GS);

								    }


								    return Result;

								}


								//------------------------------------------------------------------------------

								// Transformation operations

								//------------------------------------------------------------------------------


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionIdentity()

								{

								#if defined(_XM_NO_INTRINSICS_)

								    return g_XMIdentityR3.v;

								#elif defined(_XM_SSE_INTRINSICS_)

								    return g_XMIdentityR3;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYaw

								(

								    FLOAT Pitch,

								    FLOAT Yaw,

								    FLOAT Roll

								)

								{

								    XMVECTOR Angles;

								    XMVECTOR Q;


								    Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);

								    Q = XMQuaternionRotationRollPitchYawFromVector(Angles);


								    return Q;

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYawFromVector

								(

								    FXMVECTOR Angles // <Pitch, Yaw, Roll, 0>

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR                Q, Q0, Q1;

								    XMVECTOR                P0, P1, Y0, Y1, R0, R1;

								    XMVECTOR                HalfAngles;

								    XMVECTOR                SinAngles, CosAngles;

								    static CONST XMVECTORU32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};

								    static CONST XMVECTORU32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};

								    static CONST XMVECTORU32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};

								    static CONST XMVECTOR   Sign = {1.0f, -1.0f, -1.0f, 1.0f};


								    HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v);

								    XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);


								    P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch.v);

								    Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw.v);

								    R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll.v);

								    P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch.v);

								    Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw.v);

								    R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll.v);


								    Q1 = XMVectorMultiply(P1, Sign);

								    Q0 = XMVectorMultiply(P0, Y0);

								    Q1 = XMVectorMultiply(Q1, Y1);

								    Q0 = XMVectorMultiply(Q0, R0);

								    Q = XMVectorMultiplyAdd(Q1, R1, Q0);


								    return Q;


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMVECTOR                Q, Q0, Q1;

								    XMVECTOR                P0, P1, Y0, Y1, R0, R1;

								    XMVECTOR                HalfAngles;

								    XMVECTOR                SinAngles, CosAngles;

								    static CONST XMVECTORI32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};

								    static CONST XMVECTORI32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};

								    static CONST XMVECTORI32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};

								    static CONST XMVECTORF32 Sign = {1.0f, -1.0f, -1.0f, 1.0f};


								    HalfAngles = _mm_mul_ps(Angles, g_XMOneHalf);

								    XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);


								    P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch);

								    Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw);

								    R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll);

								    P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch);

								    Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw);

								    R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll);


								    Q1 = _mm_mul_ps(P1, Sign);

								    Q0 = _mm_mul_ps(P0, Y0);

								    Q1 = _mm_mul_ps(Q1, Y1);

								    Q0 = _mm_mul_ps(Q0, R0);

								    Q = _mm_mul_ps(Q1, R1);

								    Q = _mm_add_ps(Q,Q0);

								    return Q;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionRotationNormal

								(

								    FXMVECTOR NormalAxis,

								    FLOAT    Angle

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR Q;

								    XMVECTOR N;

								    XMVECTOR Scale;


								    N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v);


								    XMScalarSinCos(&Scale.vector4_f32[2], &Scale.vector4_f32[3], 0.5f * Angle);


								    Scale.vector4_f32[0] = Scale.vector4_f32[1] = Scale.vector4_f32[2];


								    Q = XMVectorMultiply(N, Scale);


								    return Q;


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3);

								    N = _mm_or_ps(N,g_XMIdentityR3);

								    XMVECTOR Scale = _mm_set_ps1(0.5f * Angle);

								    XMVECTOR vSine;

								    XMVECTOR vCosine;

								    XMVectorSinCos(&vSine,&vCosine,Scale);

								    Scale = _mm_and_ps(vSine,g_XMMask3);

								    vCosine = _mm_and_ps(vCosine,g_XMMaskW);

								    Scale = _mm_or_ps(Scale,vCosine);

								    N = _mm_mul_ps(N,Scale);

								    return N;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMQuaternionRotationAxis

								(

								    FXMVECTOR Axis,

								    FLOAT    Angle

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR Normal;

								    XMVECTOR Q;


								    XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));

								    XMASSERT(!XMVector3IsInfinite(Axis));


								    Normal = XMVector3Normalize(Axis);

								    Q = XMQuaternionRotationNormal(Normal, Angle);


								    return Q;


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMVECTOR Normal;

								    XMVECTOR Q;


								    XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));

								    XMASSERT(!XMVector3IsInfinite(Axis));


								    Normal = XMVector3Normalize(Axis);

								    Q = XMQuaternionRotationNormal(Normal, Angle);

								    return Q;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMINLINE XMVECTOR XMQuaternionRotationMatrix

								(

								    CXMMATRIX M

								)

								{

								#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_)


								    XMVECTOR Q0, Q1, Q2;

								    XMVECTOR M00, M11, M22;

								    XMVECTOR CQ0, CQ1, C;

								    XMVECTOR CX, CY, CZ, CW;

								    XMVECTOR SQ1, Scale;

								    XMVECTOR Rsq, Sqrt, VEqualsNaN;

								    XMVECTOR A, B, P;

								    XMVECTOR PermuteSplat, PermuteSplatT;

								    XMVECTOR SignB, SignBT;

								    XMVECTOR PermuteControl, PermuteControlT;

								    XMVECTOR Result;

								    static CONST XMVECTORF32 OneQuarter = {0.25f, 0.25f, 0.25f, 0.25f};

								    static CONST XMVECTORF32 SignPNNP = {1.0f, -1.0f, -1.0f, 1.0f};

								    static CONST XMVECTORF32 SignNPNP = {-1.0f, 1.0f, -1.0f, 1.0f};

								    static CONST XMVECTORF32 SignNNPP = {-1.0f, -1.0f, 1.0f, 1.0f};

								    static CONST XMVECTORF32 SignPNPP = {1.0f, -1.0f, 1.0f, 1.0f};

								    static CONST XMVECTORF32 SignPPNP = {1.0f, 1.0f, -1.0f, 1.0f};

								    static CONST XMVECTORF32 SignNPPP = {-1.0f, 1.0f, 1.0f, 1.0f};

								    static CONST XMVECTORU32 Permute0X0X0Y0W = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W};

								    static CONST XMVECTORU32 Permute0Y0Z0Z1W = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_1W};

								    static CONST XMVECTORU32 SplatX = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};

								    static CONST XMVECTORU32 SplatY = {XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y};

								    static CONST XMVECTORU32 SplatZ = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z};

								    static CONST XMVECTORU32 SplatW = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W};

								    static CONST XMVECTORU32 PermuteC = {XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Y};

								    static CONST XMVECTORU32 PermuteA = {XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0W};

								    static CONST XMVECTORU32 PermuteB = {XM_PERMUTE_1X, XM_PERMUTE_1W, XM_PERMUTE_0Z, XM_PERMUTE_0W};

								    static CONST XMVECTORU32 Permute0 = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Z, XM_PERMUTE_1Y};

								    static CONST XMVECTORU32 Permute1 = {XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z};

								    static CONST XMVECTORU32 Permute2 = {XM_PERMUTE_1Z, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_1X};

								    static CONST XMVECTORU32 Permute3 = {XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_1X, XM_PERMUTE_0W};


								    M00 = XMVectorSplatX(M.r[0]);

								    M11 = XMVectorSplatY(M.r[1]);

								    M22 = XMVectorSplatZ(M.r[2]);


								    Q0 = XMVectorMultiply(SignPNNP.v, M00);

								    Q0 = XMVectorMultiplyAdd(SignNPNP.v, M11, Q0);

								    Q0 = XMVectorMultiplyAdd(SignNNPP.v, M22, Q0);


								    Q1 = XMVectorAdd(Q0, g_XMOne.v);


								    Rsq = XMVectorReciprocalSqrt(Q1);

								    VEqualsNaN = XMVectorIsNaN(Rsq);

								    Sqrt = XMVectorMultiply(Q1, Rsq);

								    Q1 = XMVectorSelect(Sqrt, Q1, VEqualsNaN);


								    Q1 = XMVectorMultiply(Q1, g_XMOneHalf.v);


								    SQ1 = XMVectorMultiply(Rsq, g_XMOneHalf.v);


								    CQ0 = XMVectorPermute(Q0, Q0, Permute0X0X0Y0W.v);

								    CQ1 = XMVectorPermute(Q0, g_XMEpsilon.v, Permute0Y0Z0Z1W.v);

								    C = XMVectorGreaterOrEqual(CQ0, CQ1);


								    CX = XMVectorSplatX(C);

								    CY = XMVectorSplatY(C);

								    CZ = XMVectorSplatZ(C);

								    CW = XMVectorSplatW(C);


								    PermuteSplat = XMVectorSelect(SplatZ.v, SplatY.v, CZ);

								    SignB = XMVectorSelect(SignNPPP.v, SignPPNP.v, CZ);

								    PermuteControl = XMVectorSelect(Permute2.v, Permute1.v, CZ);


								    PermuteSplat = XMVectorSelect(PermuteSplat, SplatZ.v, CX);

								    SignB = XMVectorSelect(SignB, SignNPPP.v, CX);

								    PermuteControl = XMVectorSelect(PermuteControl, Permute2.v, CX);


								    PermuteSplatT = XMVectorSelect(PermuteSplat,SplatX.v, CY);

								    SignBT = XMVectorSelect(SignB, SignPNPP.v, CY);

								    PermuteControlT = XMVectorSelect(PermuteControl,Permute0.v, CY);


								    PermuteSplat = XMVectorSelect(PermuteSplat, PermuteSplatT, CX);

								    SignB = XMVectorSelect(SignB, SignBT, CX);

								    PermuteControl = XMVectorSelect(PermuteControl, PermuteControlT, CX);


								    PermuteSplat = XMVectorSelect(PermuteSplat,SplatW.v, CW);

								    SignB = XMVectorSelect(SignB, g_XMNegativeOne.v, CW);

								    PermuteControl = XMVectorSelect(PermuteControl,Permute3.v, CW);


								    Scale = XMVectorPermute(SQ1, SQ1, PermuteSplat);


								    P = XMVectorPermute(M.r[1], M.r[2],PermuteC.v);  // {M10, M12, M20, M21}

								    A = XMVectorPermute(M.r[0], P, PermuteA.v);       // {M01, M12, M20, M03}

								    B = XMVectorPermute(M.r[0], P, PermuteB.v);       // {M10, M21, M02, M03}


								    Q2 = XMVectorMultiplyAdd(SignB, B, A);

								    Q2 = XMVectorMultiply(Q2, Scale);


								    Result = XMVectorPermute(Q1, Q2, PermuteControl);


								    return Result;


								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------

								// Conversion operations

								//------------------------------------------------------------------------------


								//------------------------------------------------------------------------------


								XMFINLINE VOID XMQuaternionToAxisAngle

								(

								    XMVECTOR* pAxis,

								    FLOAT*    pAngle,

								    FXMVECTOR  Q

								)

								{

								    XMASSERT(pAxis);

								    XMASSERT(pAngle);


								    *pAxis = Q;


								#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)

								    *pAngle = 2.0f * acosf(XMVectorGetW(Q));

								#else

								    *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q));

								#endif

								}


								/****************************************************************************

								 *

								 * Plane

								 *

								 ****************************************************************************/


								//------------------------------------------------------------------------------

								// Comparison operations

								//------------------------------------------------------------------------------


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMPlaneEqual

								(

								    FXMVECTOR P1,

								    FXMVECTOR P2

								)

								{

								    return XMVector4Equal(P1, P2);

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMPlaneNearEqual

								(

								    FXMVECTOR P1,

								    FXMVECTOR P2,

								    FXMVECTOR Epsilon

								)

								{

								    XMVECTOR NP1 = XMPlaneNormalize(P1);

								    XMVECTOR NP2 = XMPlaneNormalize(P2);

								    return XMVector4NearEqual(NP1, NP2, Epsilon);

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMPlaneNotEqual

								(

								    FXMVECTOR P1,

								    FXMVECTOR P2

								)

								{

								    return XMVector4NotEqual(P1, P2);

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMPlaneIsNaN

								(

								    FXMVECTOR P

								)

								{

								    return XMVector4IsNaN(P);

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMPlaneIsInfinite

								(

								    FXMVECTOR P

								)

								{

								    return XMVector4IsInfinite(P);

								}


								//------------------------------------------------------------------------------

								// Computation operations

								//------------------------------------------------------------------------------


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMPlaneDot

								(

								    FXMVECTOR P,

								    FXMVECTOR V

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    return XMVector4Dot(P, V);


								#elif defined(_XM_SSE_INTRINSICS_)

								    __m128 vTemp2 = V;

								    __m128 vTemp = _mm_mul_ps(P,vTemp2);

								    vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position

								    vTemp2 = _mm_add_ps(vTemp2,vTemp);          // Add Z = X+Z; W = Y+W;

								    vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0));  // Copy W to the Z position

								    vTemp = _mm_add_ps(vTemp,vTemp2);           // Add Z and W together

								    return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2));    // Splat Z and return

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMPlaneDotCoord

								(

								    FXMVECTOR P,

								    FXMVECTOR V

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR V3;

								    XMVECTOR Result;


								    // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3]

								    V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v);

								    Result = XMVector4Dot(P, V3);


								    return Result;


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMVECTOR vTemp2 = _mm_and_ps(V,g_XMMask3);

								    vTemp2 = _mm_or_ps(vTemp2,g_XMIdentityR3);

								    XMVECTOR vTemp = _mm_mul_ps(P,vTemp2);

								    vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position

								    vTemp2 = _mm_add_ps(vTemp2,vTemp);          // Add Z = X+Z; W = Y+W;

								    vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0));  // Copy W to the Z position

								    vTemp = _mm_add_ps(vTemp,vTemp2);           // Add Z and W together

								    return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2));    // Splat Z and return

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMPlaneDotNormal

								(

								    FXMVECTOR P,

								    FXMVECTOR V

								)

								{

								    return XMVector3Dot(P, V);

								}


								//------------------------------------------------------------------------------

								// XMPlaneNormalizeEst uses a reciprocal estimate and

								// returns QNaN on zero and infinite vectors.


								XMFINLINE XMVECTOR XMPlaneNormalizeEst

								(

								    FXMVECTOR P

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR Result;

								    Result = XMVector3ReciprocalLength(P);

								    Result = XMVectorMultiply(P, Result);

								    return Result;


								#elif defined(_XM_SSE_INTRINSICS_)

								    // Perform the dot product

								    XMVECTOR vDot = _mm_mul_ps(P,P);

								    // x=Dot.y, y=Dot.z

								    XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));

								    // Result.x = x+y

								    vDot = _mm_add_ss(vDot,vTemp);

								    // x=Dot.z

								    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));

								    // Result.x = (x+y)+z

								    vDot = _mm_add_ss(vDot,vTemp);

								    // Splat x

									vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));

								    // Get the reciprocal

								    vDot = _mm_rsqrt_ps(vDot);

								    // Get the reciprocal

								    vDot = _mm_mul_ps(vDot,P);

								    return vDot;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMPlaneNormalize

								(

								    FXMVECTOR P

								)

								{

								#if defined(_XM_NO_INTRINSICS_)

								    FLOAT fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2]));

								    // Prevent divide by zero

								    if (fLengthSq) {

								        fLengthSq = 1.0f/fLengthSq;

								    }

								    {

								    XMVECTOR vResult = {

								        P.vector4_f32[0]*fLengthSq,

								        P.vector4_f32[1]*fLengthSq,

								        P.vector4_f32[2]*fLengthSq,

								        P.vector4_f32[3]*fLengthSq

								    };

								    return vResult;

								    }

								#elif defined(_XM_SSE_INTRINSICS_)

								    // Perform the dot product on x,y and z only

								    XMVECTOR vLengthSq = _mm_mul_ps(P,P);

								    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1));

								    vLengthSq = _mm_add_ss(vLengthSq,vTemp);

								    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));

								    vLengthSq = _mm_add_ss(vLengthSq,vTemp);

									vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));

								    // Prepare for the division

								    XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);

								    // Failsafe on zero (Or epsilon) length planes

								    // If the length is infinity, set the elements to zero

								    vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);

								    // Reciprocal mul to perform the normalization

								    vResult = _mm_div_ps(P,vResult);

								    // Any that are infinity, set to zero

								    vResult = _mm_and_ps(vResult,vLengthSq);

								    return vResult;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMPlaneIntersectLine

								(

								    FXMVECTOR P,

								    FXMVECTOR LinePoint1,

								    FXMVECTOR LinePoint2

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR V1;

								    XMVECTOR V2;

								    XMVECTOR D;

								    XMVECTOR ReciprocalD;

								    XMVECTOR VT;

								    XMVECTOR Point;

								    XMVECTOR Zero;

								    XMVECTOR Control;

								    XMVECTOR Result;


								    V1 = XMVector3Dot(P, LinePoint1);

								    V2 = XMVector3Dot(P, LinePoint2);

								    D = XMVectorSubtract(V1, V2);


								    ReciprocalD = XMVectorReciprocal(D);

								    VT = XMPlaneDotCoord(P, LinePoint1);

								    VT = XMVectorMultiply(VT, ReciprocalD);


								    Point = XMVectorSubtract(LinePoint2, LinePoint1);

								    Point = XMVectorMultiplyAdd(Point, VT, LinePoint1);


								    Zero = XMVectorZero();

								    Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v);


								    Result = XMVectorSelect(Point, g_XMQNaN.v, Control);


								    return Result;


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMVECTOR V1;

								    XMVECTOR V2;

								    XMVECTOR D;

								    XMVECTOR VT;

								    XMVECTOR Point;

								    XMVECTOR Zero;

								    XMVECTOR Control;

								    XMVECTOR Result;


								    V1 = XMVector3Dot(P, LinePoint1);

								    V2 = XMVector3Dot(P, LinePoint2);

								    D = _mm_sub_ps(V1, V2);


								    VT = XMPlaneDotCoord(P, LinePoint1);

								    VT = _mm_div_ps(VT, D);


								    Point = _mm_sub_ps(LinePoint2, LinePoint1);

								    Point = _mm_mul_ps(Point,VT);

								    Point = _mm_add_ps(Point,LinePoint1);

								    Zero = XMVectorZero();

								    Control = XMVectorNearEqual(D, Zero, g_XMEpsilon);

								    Result = XMVectorSelect(Point, g_XMQNaN, Control);

								    return Result;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMINLINE VOID XMPlaneIntersectPlane

								(

								    XMVECTOR* pLinePoint1,

								    XMVECTOR* pLinePoint2,

								    FXMVECTOR  P1,

								    FXMVECTOR  P2

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR V1;

								    XMVECTOR V2;

								    XMVECTOR V3;

								    XMVECTOR LengthSq;

								    XMVECTOR RcpLengthSq;

								    XMVECTOR Point;

								    XMVECTOR P1W;

								    XMVECTOR P2W;

								    XMVECTOR Control;

								    XMVECTOR LinePoint1;

								    XMVECTOR LinePoint2;


								    XMASSERT(pLinePoint1);

								    XMASSERT(pLinePoint2);


								    V1 = XMVector3Cross(P2, P1);


								    LengthSq = XMVector3LengthSq(V1);


								    V2 = XMVector3Cross(P2, V1);


								    P1W = XMVectorSplatW(P1);

								    Point = XMVectorMultiply(V2, P1W);


								    V3 = XMVector3Cross(V1, P1);


								    P2W = XMVectorSplatW(P2);

								    Point = XMVectorMultiplyAdd(V3, P2W, Point);


								    RcpLengthSq = XMVectorReciprocal(LengthSq);

								    LinePoint1 = XMVectorMultiply(Point, RcpLengthSq);


								    LinePoint2 = XMVectorAdd(LinePoint1, V1);


								    Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v);

								    *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control);

								    *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control);


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMASSERT(pLinePoint1);

								    XMASSERT(pLinePoint2);

								    XMVECTOR V1;

								    XMVECTOR V2;

								    XMVECTOR V3;

								    XMVECTOR LengthSq;

								    XMVECTOR Point;

								    XMVECTOR P1W;

								    XMVECTOR P2W;

								    XMVECTOR Control;

								    XMVECTOR LinePoint1;

								    XMVECTOR LinePoint2;


								    V1 = XMVector3Cross(P2, P1);


								    LengthSq = XMVector3LengthSq(V1);


								    V2 = XMVector3Cross(P2, V1);


								    P1W = _mm_shuffle_ps(P1,P1,_MM_SHUFFLE(3,3,3,3));

								    Point = _mm_mul_ps(V2, P1W);


								    V3 = XMVector3Cross(V1, P1);


								    P2W = _mm_shuffle_ps(P2,P2,_MM_SHUFFLE(3,3,3,3));

								    V3 = _mm_mul_ps(V3,P2W);

								    Point = _mm_add_ps(Point,V3);

								    LinePoint1 = _mm_div_ps(Point,LengthSq);


								    LinePoint2 = _mm_add_ps(LinePoint1, V1);


								    Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon);

								    *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN, Control);

								    *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN, Control);

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMPlaneTransform

								(

								    FXMVECTOR P,

								    CXMMATRIX M

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR X;

								    XMVECTOR Y;

								    XMVECTOR Z;

								    XMVECTOR W;

								    XMVECTOR Result;


								    W = XMVectorSplatW(P);

								    Z = XMVectorSplatZ(P);

								    Y = XMVectorSplatY(P);

								    X = XMVectorSplatX(P);


								    Result = XMVectorMultiply(W, M.r[3]);

								    Result = XMVectorMultiplyAdd(Z, M.r[2], Result);

								    Result = XMVectorMultiplyAdd(Y, M.r[1], Result);

								    Result = XMVectorMultiplyAdd(X, M.r[0], Result);


								    return Result;


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMVECTOR X = _mm_shuffle_ps(P,P,_MM_SHUFFLE(0,0,0,0));

								    XMVECTOR Y = _mm_shuffle_ps(P,P,_MM_SHUFFLE(1,1,1,1));

								    XMVECTOR Z = _mm_shuffle_ps(P,P,_MM_SHUFFLE(2,2,2,2));

								    XMVECTOR W = _mm_shuffle_ps(P,P,_MM_SHUFFLE(3,3,3,3));

								    X = _mm_mul_ps(X, M.r[0]);

								    Y = _mm_mul_ps(Y, M.r[1]);

								    Z = _mm_mul_ps(Z, M.r[2]);

								    W = _mm_mul_ps(W, M.r[3]);

								    X = _mm_add_ps(X,Z);

								    Y = _mm_add_ps(Y,W);

								    X = _mm_add_ps(X,Y);

								    return X;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMFLOAT4* XMPlaneTransformStream

								(

								    XMFLOAT4*       pOutputStream,

								    UINT            OutputStride,

								    CONST XMFLOAT4* pInputStream,

								    UINT            InputStride,

								    UINT            PlaneCount,

								    CXMMATRIX     M

								)

								{

								    return XMVector4TransformStream(pOutputStream,

								                                    OutputStride,

								                                    pInputStream,

								                                    InputStride,

								                                    PlaneCount,

								                                    M);

								}


								//------------------------------------------------------------------------------

								// Conversion operations

								//------------------------------------------------------------------------------


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMPlaneFromPointNormal

								(

								    FXMVECTOR Point,

								    FXMVECTOR Normal

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR W;

								    XMVECTOR Result;


								    W = XMVector3Dot(Point, Normal);

								    W = XMVectorNegate(W);

								    Result = XMVectorSelect(W, Normal, g_XMSelect1110.v);


								    return Result;


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMVECTOR W;

								    XMVECTOR Result;

								    W = XMVector3Dot(Point,Normal);

								    W = _mm_mul_ps(W,g_XMNegativeOne);

								    Result = _mm_and_ps(Normal,g_XMMask3);

								    W = _mm_and_ps(W,g_XMMaskW);

								    Result = _mm_or_ps(Result,W);

								    return Result;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMPlaneFromPoints

								(

								    FXMVECTOR Point1,

								    FXMVECTOR Point2,

								    FXMVECTOR Point3

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR N;

								    XMVECTOR D;

								    XMVECTOR V21;

								    XMVECTOR V31;

								    XMVECTOR Result;


								    V21 = XMVectorSubtract(Point1, Point2);

								    V31 = XMVectorSubtract(Point1, Point3);


								    N = XMVector3Cross(V21, V31);

								    N = XMVector3Normalize(N);


								    D = XMPlaneDotNormal(N, Point1);

								    D = XMVectorNegate(D);


								    Result = XMVectorSelect(D, N, g_XMSelect1110.v);


								    return Result;


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMVECTOR N;

								    XMVECTOR D;

								    XMVECTOR V21;

								    XMVECTOR V31;

								    XMVECTOR Result;


								    V21 = _mm_sub_ps(Point1, Point2);

								    V31 = _mm_sub_ps(Point1, Point3);


								    N = XMVector3Cross(V21, V31);

								    N = XMVector3Normalize(N);


								    D = XMPlaneDotNormal(N, Point1);

								    D = _mm_mul_ps(D,g_XMNegativeOne);

								    N = _mm_and_ps(N,g_XMMask3);

								    D = _mm_and_ps(D,g_XMMaskW);

								    Result = _mm_or_ps(D,N);

								    return Result;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								/****************************************************************************

								 *

								 * Color

								 *

								 ****************************************************************************/


								//------------------------------------------------------------------------------

								// Comparison operations

								//------------------------------------------------------------------------------


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMColorEqual

								(

								    FXMVECTOR C1,

								    FXMVECTOR C2

								)

								{

								    return XMVector4Equal(C1, C2);

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMColorNotEqual

								(

								    FXMVECTOR C1,

								    FXMVECTOR C2

								)

								{

								    return XMVector4NotEqual(C1, C2);

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMColorGreater

								(

								    FXMVECTOR C1,

								    FXMVECTOR C2

								)

								{

								    return XMVector4Greater(C1, C2);

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMColorGreaterOrEqual

								(

								    FXMVECTOR C1,

								    FXMVECTOR C2

								)

								{

								    return XMVector4GreaterOrEqual(C1, C2);

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMColorLess

								(

								    FXMVECTOR C1,

								    FXMVECTOR C2

								)

								{

								    return XMVector4Less(C1, C2);

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMColorLessOrEqual

								(

								    FXMVECTOR C1,

								    FXMVECTOR C2

								)

								{

								    return XMVector4LessOrEqual(C1, C2);

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMColorIsNaN

								(

								    FXMVECTOR C

								)

								{

								    return XMVector4IsNaN(C);

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMColorIsInfinite

								(

								    FXMVECTOR C

								)

								{

								    return XMVector4IsInfinite(C);

								}


								//------------------------------------------------------------------------------

								// Computation operations

								//------------------------------------------------------------------------------


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMColorNegative

								(

								    FXMVECTOR vColor

								)

								{

								#if defined(_XM_NO_INTRINSICS_)

								//    XMASSERT(XMVector4GreaterOrEqual(C, XMVectorReplicate(0.0f)));

								//    XMASSERT(XMVector4LessOrEqual(C, XMVectorReplicate(1.0f)));

								    XMVECTOR vResult = {

								        1.0f - vColor.vector4_f32[0],

								        1.0f - vColor.vector4_f32[1],

								        1.0f - vColor.vector4_f32[2],

								        vColor.vector4_f32[3]

								    };

								    return vResult;


								#elif defined(_XM_SSE_INTRINSICS_)

								    // Negate only x,y and z.

								    XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3);

								    // Add 1,1,1,0 to -x,-y,-z,w

									return _mm_add_ps(vTemp,g_XMOne3);

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMColorModulate

								(

								    FXMVECTOR C1,

								    FXMVECTOR C2

								)

								{

								    return XMVectorMultiply(C1, C2);

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMColorAdjustSaturation

								(

								    FXMVECTOR vColor,

								    FLOAT    fSaturation

								)

								{

								#if defined(_XM_NO_INTRINSICS_)

								    CONST XMVECTOR gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};


								    // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2];

								    // Result = (C - Luminance) * Saturation + Luminance;


								    FLOAT fLuminance = (vColor.vector4_f32[0]*gvLuminance.vector4_f32[0])+(vColor.vector4_f32[1]*gvLuminance.vector4_f32[1])+(vColor.vector4_f32[2]*gvLuminance.vector4_f32[2]);

								    XMVECTOR vResult = {

								        ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance,

								        ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance,

								        ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance,

								        vColor.vector4_f32[3]};

								    return vResult;


								#elif defined(_XM_SSE_INTRINSICS_)

								    static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};

								// Mul RGB by intensity constants

								    XMVECTOR vLuminance = _mm_mul_ps(vColor,gvLuminance);

								// vResult.x = vLuminance.y, vResult.y = vLuminance.y,

								// vResult.z = vLuminance.z, vResult.w = vLuminance.z

								    XMVECTOR vResult = vLuminance;

								    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,1,1));

								// vLuminance.x += vLuminance.y

								    vLuminance = _mm_add_ss(vLuminance,vResult);

								// Splat vLuminance.z

								    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,2,2));

								// vLuminance.x += vLuminance.z (Dot product)

								    vLuminance = _mm_add_ss(vLuminance,vResult);

								// Splat vLuminance

								    vLuminance = _mm_shuffle_ps(vLuminance,vLuminance,_MM_SHUFFLE(0,0,0,0));

								// Splat fSaturation

								    XMVECTOR vSaturation = _mm_set_ps1(fSaturation);

								// vResult = ((vColor-vLuminance)*vSaturation)+vLuminance;

								    vResult = _mm_sub_ps(vColor,vLuminance);

								    vResult = _mm_mul_ps(vResult,vSaturation);

								    vResult = _mm_add_ps(vResult,vLuminance);

								// Retain w from the source color

								    vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2));   // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w

								    vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0));  // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w

								    return vResult;

								#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMColorAdjustContrast

								(

								    FXMVECTOR vColor,

								    FLOAT    fContrast

								)

								{

								#if defined(_XM_NO_INTRINSICS_)

								    // Result = (vColor - 0.5f) * fContrast + 0.5f;

								    XMVECTOR vResult = {

								        ((vColor.vector4_f32[0]-0.5f) * fContrast) + 0.5f,

								        ((vColor.vector4_f32[1]-0.5f) * fContrast) + 0.5f,

								        ((vColor.vector4_f32[2]-0.5f) * fContrast) + 0.5f,

								        vColor.vector4_f32[3]        // Leave W untouched

								    };

								    return vResult;


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMVECTOR vScale = _mm_set_ps1(fContrast);           // Splat the scale

								    XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf);  // Subtract 0.5f from the source (Saving source)

								    vResult = _mm_mul_ps(vResult,vScale);               // Mul by scale

								    vResult = _mm_add_ps(vResult,g_XMOneHalf);          // Add 0.5f

								// Retain w from the source color

								    vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2));   // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w

								    vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0));  // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w

								    return vResult;

								#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)

								#endif // _XM_VMX128_INTRINSICS_

								}


								/****************************************************************************

								 *

								 * Miscellaneous

								 *

								 ****************************************************************************/


								//------------------------------------------------------------------------------


								XMINLINE BOOL XMVerifyCPUSupport()

								{

								#if defined(_XM_NO_INTRINSICS_) || !defined(_XM_SSE_INTRINSICS_)

									return TRUE;

								#else // _XM_SSE_INTRINSICS_

									// Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail

									// Detecting SSE2 on older versions of Windows would require using cpuid directly

									return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) );

								#endif

								}


								//------------------------------------------------------------------------------


								#define XMASSERT_LINE_STRING_SIZE 16


								XMINLINE VOID XMAssert

								(

								    CONST CHAR* pExpression,

								    CONST CHAR* pFileName,

								    UINT        LineNumber

								)

								{

								    CHAR        aLineString[XMASSERT_LINE_STRING_SIZE];

								    CHAR*       pLineString;

								    UINT        Line;


								    aLineString[XMASSERT_LINE_STRING_SIZE - 2] = '0';

								    aLineString[XMASSERT_LINE_STRING_SIZE - 1] = '\0';

								    for (Line = LineNumber, pLineString = aLineString + XMASSERT_LINE_STRING_SIZE - 2;

								         Line != 0 && pLineString >= aLineString;

								         Line /= 10, pLineString--)

								    {

								        *pLineString = (CHAR)('0' + (Line % 10));

								    }


								#ifndef NO_OUTPUT_DEBUG_STRING

								    OutputDebugStringA("Assertion failed: ");

								    OutputDebugStringA(pExpression);

								    OutputDebugStringA(", file ");

								    OutputDebugStringA(pFileName);

								    OutputDebugStringA(", line ");

								    OutputDebugStringA(pLineString + 1);

								    OutputDebugStringA("\r\n");

								#else

								    DbgPrint("Assertion failed: %s, file %s, line %d\r\n", pExpression, pFileName, LineNumber);

								#endif


								    __debugbreak();

								}


								//------------------------------------------------------------------------------


								XMFINLINE XMVECTOR XMFresnelTerm

								(

								    FXMVECTOR CosIncidentAngle,

								    FXMVECTOR RefractionIndex

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR G;

								    XMVECTOR D, S;

								    XMVECTOR V0, V1, V2, V3;

								    XMVECTOR Result;


								    // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where

								    // c = CosIncidentAngle

								    // g = sqrt(c^2 + RefractionIndex^2 - 1)


								    XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));


								    G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v);

								    G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G);

								    G = XMVectorAbs(G);

								    G = XMVectorSqrt(G);


								    S = XMVectorAdd(G, CosIncidentAngle);

								    D = XMVectorSubtract(G, CosIncidentAngle);


								    V0 = XMVectorMultiply(D, D);

								    V1 = XMVectorMultiply(S, S);

								    V1 = XMVectorReciprocal(V1);

								    V0 = XMVectorMultiply(g_XMOneHalf.v, V0);

								    V0 = XMVectorMultiply(V0, V1);


								    V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v);

								    V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v);

								    V2 = XMVectorMultiply(V2, V2);

								    V3 = XMVectorMultiply(V3, V3);

								    V3 = XMVectorReciprocal(V3);

								    V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v);


								    Result = XMVectorMultiply(V0, V2);


								    Result = XMVectorSaturate(Result);


								    return Result;


								#elif defined(_XM_SSE_INTRINSICS_)

								    // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where

								    // c = CosIncidentAngle

								    // g = sqrt(c^2 + RefractionIndex^2 - 1)


								    XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));


								    // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2))

								    XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex);

								    XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle);

								    G = _mm_sub_ps(G,g_XMOne);

								    vTemp = _mm_add_ps(vTemp,G);

								    // max((0-vTemp),vTemp) == abs(vTemp)

								    // The abs is needed to deal with refraction and cosine being zero

									G = _mm_setzero_ps();

									G = _mm_sub_ps(G,vTemp);

									G = _mm_max_ps(G,vTemp);

								    // Last operation, the sqrt()

								    G = _mm_sqrt_ps(G);


								    // Calc G-C and G+C

								    XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle);

								    XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle);

								    // Perform the term (0.5f *(g - c)^2) / (g + c)^2

								    XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC);

								    vTemp = _mm_mul_ps(GAddC,GAddC);

								    vResult = _mm_mul_ps(vResult,g_XMOneHalf);

								    vResult = _mm_div_ps(vResult,vTemp);

								    // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1)

								    GAddC = _mm_mul_ps(GAddC,CosIncidentAngle);

								    GSubC = _mm_mul_ps(GSubC,CosIncidentAngle);

								    GAddC = _mm_sub_ps(GAddC,g_XMOne);

								    GSubC = _mm_add_ps(GSubC,g_XMOne);

								    GAddC = _mm_mul_ps(GAddC,GAddC);

								    GSubC = _mm_mul_ps(GSubC,GSubC);

								    GAddC = _mm_div_ps(GAddC,GSubC);

								    GAddC = _mm_add_ps(GAddC,g_XMOne);

								    // Multiply the two term parts

								    vResult = _mm_mul_ps(vResult,GAddC);

								    // Clamp to 0.0 - 1.0f

								    vResult = _mm_max_ps(vResult,g_XMZero);

								    vResult = _mm_min_ps(vResult,g_XMOne);

								    return vResult;

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE BOOL XMScalarNearEqual

								(

								    FLOAT S1,

								    FLOAT S2,

								    FLOAT Epsilon

								)

								{

								    FLOAT Delta = S1 - S2;

								#if defined(_XM_NO_INTRINSICS_)

								    UINT  AbsDelta = *(UINT*)&Delta & 0x7FFFFFFF;

								    return (*(FLOAT*)&AbsDelta <= Epsilon);

								#elif defined(_XM_SSE_INTRINSICS_)

								    return (fabsf(Delta) <= Epsilon);

								#else

								    return (__fabs(Delta) <= Epsilon);

								#endif

								}


								//------------------------------------------------------------------------------

								// Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI

								XMFINLINE FLOAT XMScalarModAngle

								(

								    FLOAT Angle

								)

								{

								    // Note: The modulo is performed with unsigned math only to work

								    // around a precision error on numbers that are close to PI

								    float fTemp;

								#if defined(_XM_NO_INTRINSICS_) || !defined(_XM_VMX128_INTRINSICS_)

								    // Normalize the range from 0.0f to XM_2PI

								    Angle = Angle + XM_PI;

								    // Perform the modulo, unsigned

								    fTemp = fabsf(Angle);

								    fTemp = fTemp - (XM_2PI * (FLOAT)((INT)(fTemp/XM_2PI)));

								    // Restore the number to the range of -XM_PI to XM_PI-epsilon

								    fTemp = fTemp - XM_PI;

								    // If the modulo'd value was negative, restore negation

								    if (Angle<0.0f) {

								        fTemp = -fTemp;

								    }

								    return fTemp;

								#else

								#endif

								}


								//------------------------------------------------------------------------------


								XMINLINE FLOAT XMScalarSin

								(

								    FLOAT Value

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    FLOAT                  ValueMod;

								    FLOAT                  ValueSq;

								    XMVECTOR               V0123, V0246, V1357, V9111315, V17192123;

								    XMVECTOR               V1, V7, V8;

								    XMVECTOR               R0, R1, R2;


								    ValueMod = XMScalarModAngle(Value);


								    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +

								    //           V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)


								    ValueSq = ValueMod * ValueMod;


								    V0123     = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);

								    V1        = XMVectorSplatY(V0123);

								    V0246     = XMVectorMultiply(V0123, V0123);

								    V1357     = XMVectorMultiply(V0246, V1);

								    V7        = XMVectorSplatW(V1357);

								    V8        = XMVectorMultiply(V7, V1);

								    V9111315  = XMVectorMultiply(V1357, V8);

								    V17192123 = XMVectorMultiply(V9111315, V8);


								    R0        = XMVector4Dot(V1357, g_XMSinCoefficients0.v);

								    R1        = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);

								    R2        = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);


								    return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0];


								#elif defined(_XM_SSE_INTRINSICS_)

								    return sinf( Value );

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMINLINE FLOAT XMScalarCos

								(

								    FLOAT Value

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    FLOAT                  ValueMod;

								    FLOAT                  ValueSq;

								    XMVECTOR               V0123, V0246, V8101214, V16182022;

								    XMVECTOR               V2, V6, V8;

								    XMVECTOR               R0, R1, R2;


								    ValueMod = XMScalarModAngle(Value);


								    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +

								    //           V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)


								    ValueSq = ValueMod * ValueMod;


								    V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);

								    V0246 = XMVectorMultiply(V0123, V0123);


								    V2 = XMVectorSplatZ(V0123);

								    V6 = XMVectorSplatW(V0246);

								    V8 = XMVectorMultiply(V6, V2);


								    V8101214 = XMVectorMultiply(V0246, V8);

								    V16182022 = XMVectorMultiply(V8101214, V8);


								    R0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);

								    R1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);

								    R2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);


								    return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0];


								#elif defined(_XM_SSE_INTRINSICS_)

								    return cosf(Value);

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMINLINE VOID XMScalarSinCos

								(

								    FLOAT* pSin,

								    FLOAT* pCos,

								    FLOAT  Value

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    FLOAT                  ValueMod;

								    FLOAT                  ValueSq;

								    XMVECTOR               V0123, V0246, V1357, V8101214, V9111315, V16182022, V17192123;

								    XMVECTOR               V1, V2, V6, V8;

								    XMVECTOR               S0, S1, S2, C0, C1, C2;


								    XMASSERT(pSin);

								    XMASSERT(pCos);


								    ValueMod = XMScalarModAngle(Value);


								    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +

								    //           V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)

								    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +

								    //           V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)


								    ValueSq = ValueMod * ValueMod;


								    V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);


								    V1 = XMVectorSplatY(V0123);

								    V2 = XMVectorSplatZ(V0123);


								    V0246 = XMVectorMultiply(V0123, V0123);

								    V1357 = XMVectorMultiply(V0246, V1);


								    V6 = XMVectorSplatW(V0246);

								    V8 = XMVectorMultiply(V6, V2);


								    V8101214 = XMVectorMultiply(V0246, V8);

								    V9111315 = XMVectorMultiply(V1357, V8);

								    V16182022 = XMVectorMultiply(V8101214, V8);

								    V17192123 = XMVectorMultiply(V9111315, V8);


								    C0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);

								    S0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v);

								    C1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);

								    S1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);

								    C2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);

								    S2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);


								    *pCos = C0.vector4_f32[0] + C1.vector4_f32[0] + C2.vector4_f32[0];

								    *pSin = S0.vector4_f32[0] + S1.vector4_f32[0] + S2.vector4_f32[0];


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMASSERT(pSin);

								    XMASSERT(pCos);


								    *pSin = sinf(Value);

								    *pCos = cosf(Value);

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMINLINE FLOAT XMScalarASin

								(

								    FLOAT Value

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    FLOAT AbsValue, Value2, Value3, D;

								    XMVECTOR AbsV, R0, R1, Result;

								    XMVECTOR V3;


								    *(UINT*)&AbsValue = *(UINT*)&Value & 0x7FFFFFFF;


								    Value2 = Value * AbsValue;

								    Value3 = Value * Value2;

								    D = (Value - Value2) / sqrtf(1.00000011921f - AbsValue);


								    AbsV = XMVectorReplicate(AbsValue);


								    V3.vector4_f32[0] = Value3;

								    V3.vector4_f32[1] = 1.0f;

								    V3.vector4_f32[2] = Value3;

								    V3.vector4_f32[3] = 1.0f;


								    R1 = XMVectorSet(D, D, Value, Value);

								    R1 = XMVectorMultiply(R1, V3);


								    R0 = XMVectorMultiplyAdd(AbsV, g_XMASinCoefficients0.v, g_XMASinCoefficients1.v);

								    R0 = XMVectorMultiplyAdd(AbsV, R0, g_XMASinCoefficients2.v);


								    Result = XMVector4Dot(R0, R1);


								    return Result.vector4_f32[0];


								#elif defined(_XM_SSE_INTRINSICS_)

								    return asinf(Value);

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMINLINE FLOAT XMScalarACos

								(

								    FLOAT Value

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    return XM_PIDIV2 - XMScalarASin(Value);


								#elif defined(_XM_SSE_INTRINSICS_)

								    return acosf(Value);

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE FLOAT XMScalarSinEst

								(

								    FLOAT Value

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    FLOAT                  ValueSq;

								    XMVECTOR               V;

								    XMVECTOR               Y;

								    XMVECTOR               Result;


								    XMASSERT(Value >= -XM_PI);

								    XMASSERT(Value < XM_PI);


								    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)


								    ValueSq = Value * Value;


								    V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);

								    Y = XMVectorSplatY(V);

								    V = XMVectorMultiply(V, V);

								    V = XMVectorMultiply(V, Y);


								    Result = XMVector4Dot(V, g_XMSinEstCoefficients.v);


								    return Result.vector4_f32[0];


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMASSERT(Value >= -XM_PI);

								    XMASSERT(Value < XM_PI);

								    float ValueSq = Value*Value;

								    XMVECTOR vValue = _mm_set_ps1(Value);

								    XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f);

								    vTemp = _mm_mul_ps(vTemp,vTemp);

								    vTemp = _mm_mul_ps(vTemp,vValue);

								    // vTemp = Value,Value^3,Value^5,Value^7

								    vTemp = _mm_mul_ps(vTemp,g_XMSinEstCoefficients);

								    vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position

								    vValue = _mm_add_ps(vValue,vTemp);          // Add Z = X+Z; W = Y+W;

								    vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0));  // Copy W to the Z position

								    vTemp = _mm_add_ps(vTemp,vValue);           // Add Z and W together

								    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2));    // Splat Z and return

								#if defined(_MSC_VER) && (_MSC_VER>=1500)

								    return _mm_cvtss_f32(vTemp);

								#else

								    return vTemp.m128_f32[0];

								#endif

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE FLOAT XMScalarCosEst

								(

								    FLOAT Value

								)

								{

								#if defined(_XM_NO_INTRINSICS_)

								    FLOAT    ValueSq;

								    XMVECTOR V;

								    XMVECTOR Result;

								    XMASSERT(Value >= -XM_PI);

								    XMASSERT(Value < XM_PI);

								    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)

								    ValueSq = Value * Value;

								    V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);

								    V = XMVectorMultiply(V, V);

								    Result = XMVector4Dot(V, g_XMCosEstCoefficients.v);

								    return Result.vector4_f32[0];

								#elif defined(_XM_SSE_INTRINSICS_)

								    XMASSERT(Value >= -XM_PI);

								    XMASSERT(Value < XM_PI);

								    float ValueSq = Value*Value;

								    XMVECTOR vValue = _mm_setzero_ps();

								    XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f);

								    vTemp = _mm_mul_ps(vTemp,vTemp);

								    // vTemp = 1.0f,Value^2,Value^4,Value^6

								    vTemp = _mm_mul_ps(vTemp,g_XMCosEstCoefficients);

								    vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position

								    vValue = _mm_add_ps(vValue,vTemp);          // Add Z = X+Z; W = Y+W;

								    vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0));  // Copy W to the Z position

								    vTemp = _mm_add_ps(vTemp,vValue);           // Add Z and W together

								    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2));    // Splat Z and return

								#if defined(_MSC_VER) && (_MSC_VER>=1500)

								    return _mm_cvtss_f32(vTemp);

								#else

								    return vTemp.m128_f32[0];

								#endif

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE VOID XMScalarSinCosEst

								(

								    FLOAT* pSin,

								    FLOAT* pCos,

								    FLOAT  Value

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    FLOAT    ValueSq;

								    XMVECTOR V, Sin, Cos;

								    XMVECTOR Y;


								    XMASSERT(pSin);

								    XMASSERT(pCos);

								    XMASSERT(Value >= -XM_PI);

								    XMASSERT(Value < XM_PI);


								    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)

								    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)


								    ValueSq = Value * Value;

								    V = XMVectorSet(1.0f, Value, ValueSq, Value * ValueSq);

								    Y = XMVectorSplatY(V);

								    Cos = XMVectorMultiply(V, V);

								    Sin = XMVectorMultiply(Cos, Y);


								    Cos = XMVector4Dot(Cos, g_XMCosEstCoefficients.v);

								    Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients.v);


								    *pCos = Cos.vector4_f32[0];

								    *pSin = Sin.vector4_f32[0];


								#elif defined(_XM_SSE_INTRINSICS_)

								    XMASSERT(pSin);

								    XMASSERT(pCos);

								    XMASSERT(Value >= -XM_PI);

								    XMASSERT(Value < XM_PI);

								    float ValueSq = Value * Value;

								    XMVECTOR Cos = _mm_set_ps(Value * ValueSq,ValueSq,Value,1.0f);

								    XMVECTOR Sin = _mm_set_ps1(Value);

								    Cos = _mm_mul_ps(Cos,Cos);

								    Sin = _mm_mul_ps(Sin,Cos);

								    // Cos = 1.0f,Value^2,Value^4,Value^6

								    Cos = XMVector4Dot(Cos,g_XMCosEstCoefficients);

								    _mm_store_ss(pCos,Cos);

								    // Sin = Value,Value^3,Value^5,Value^7

								    Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients);

								    _mm_store_ss(pSin,Sin);

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE FLOAT XMScalarASinEst

								(

								    FLOAT Value

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR VR, CR, CS;

								    XMVECTOR Result;

								    FLOAT AbsV, V2, D;

								    CONST FLOAT OnePlusEps = 1.00000011921f;


								    *(UINT*)&AbsV = *(UINT*)&Value & 0x7FFFFFFF;

								    V2 = Value * AbsV;

								    D = OnePlusEps - AbsV;


								    CS = XMVectorSet(Value, 1.0f, 1.0f, V2);

								    VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);

								    CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);


								    Result = XMVector4Dot(VR, CR);


								    return Result.vector4_f32[0];


								#elif defined(_XM_SSE_INTRINSICS_)

								    CONST FLOAT OnePlusEps = 1.00000011921f;

								    FLOAT AbsV = fabsf(Value);

								    FLOAT V2 = Value * AbsV;    // Square with sign retained

								    FLOAT D = OnePlusEps - AbsV;


								    XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);

								    XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));

								    Result = _mm_mul_ps(Result, g_XMASinEstCoefficients);

								    Result = XMVector4Dot(VR,Result);

								#if defined(_MSC_VER) && (_MSC_VER>=1500)

								    return _mm_cvtss_f32(Result);

								#else

								    return Result.m128_f32[0];

								#endif

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								//------------------------------------------------------------------------------


								XMFINLINE FLOAT XMScalarACosEst

								(

								    FLOAT Value

								)

								{

								#if defined(_XM_NO_INTRINSICS_)


								    XMVECTOR VR, CR, CS;

								    XMVECTOR Result;

								    FLOAT AbsV, V2, D;

								    CONST FLOAT OnePlusEps = 1.00000011921f;


								    // return XM_PIDIV2 - XMScalarASin(Value);


								    *(UINT*)&AbsV = *(UINT*)&Value & 0x7FFFFFFF;

								    V2 = Value * AbsV;

								    D = OnePlusEps - AbsV;


								    CS = XMVectorSet(Value, 1.0f, 1.0f, V2);

								    VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);

								    CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);


								    Result = XMVector4Dot(VR, CR);


								    return XM_PIDIV2 - Result.vector4_f32[0];


								#elif defined(_XM_SSE_INTRINSICS_)

								    CONST FLOAT OnePlusEps = 1.00000011921f;

								    FLOAT AbsV = fabsf(Value);

								    FLOAT V2 = Value * AbsV;    // Value^2 retaining sign

								    FLOAT D = OnePlusEps - AbsV;

								    XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);

								    XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));

								    Result = _mm_mul_ps(Result,g_XMASinEstCoefficients);

								    Result = XMVector4Dot(VR,Result);

								#if defined(_MSC_VER) && (_MSC_VER>=1500)

								    return XM_PIDIV2 - _mm_cvtss_f32(Result);

								#else

								    return XM_PIDIV2 - Result.m128_f32[0];

								#endif

								#else // _XM_VMX128_INTRINSICS_

								#endif // _XM_VMX128_INTRINSICS_

								}


								#endif // __XNAMATHMISC_INL__