//---------------------------------------------------------------------------- // // Copyright (C) Microsoft Corporation, 1997. // // d3dflt.h // // Floating-point constants and operations on FP values. // //---------------------------------------------------------------------------- #ifndef _D3DFLT_H_ #define _D3DFLT_H_ #include #include #ifdef __cplusplus extern "C" { #endif typedef union tagFLOATINT32 { FLOAT f; INT32 i; UINT32 u; } FLOATINT32, *PFLOATINT32; // // Type-forcing macros to access FP as integer and vice-versa. // ATTENTION - VC5's optimizer turns these macros into ftol sometimes, // completely breaking them. // Using FLOATINT32 works around the problem but is not as flexible, // so the old code is kept around for the time when the compiler is fixed. // Note that pointer casting with FLOATINT32 fails just as the direct // pointer casting does, so it's not a remedy. // // Use these macros with extreme care. // #define ASFLOAT(i) (*(FLOAT *)&(i)) #define ASINT32(f) (*(INT32 *)&(f)) #define ASUINT32(f) (*(UINT32 *)&(f)) // // FP constants. // // Powers of two for snap values. These should not be used in code. #define CONST_TWOPOW0 1 #define CONST_TWOPOW1 2 #define CONST_TWOPOW2 4 #define CONST_TWOPOW3 8 #define CONST_TWOPOW4 16 #define CONST_TWOPOW5 32 #define CONST_TWOPOW6 64 #define CONST_TWOPOW7 128 #define CONST_TWOPOW8 256 #define CONST_TWOPOW9 512 #define CONST_TWOPOW10 1024 #define CONST_TWOPOW11 2048 #define CONST_TWOPOW12 4096 #define CONST_TWOPOW13 8192 #define CONST_TWOPOW14 16384 #define CONST_TWOPOW15 32768 #define CONST_TWOPOW16 65536 #define CONST_TWOPOW17 131072 #define CONST_TWOPOW18 262144 #define CONST_TWOPOW19 524288 #define CONST_TWOPOW20 1048576 #define CONST_TWOPOW21 2097152 #define CONST_TWOPOW22 4194304 #define CONST_TWOPOW23 8388608 #define CONST_TWOPOW24 16777216 #define CONST_TWOPOW25 33554432 #define CONST_TWOPOW26 67108864 #define CONST_TWOPOW27 134217728 #define CONST_TWOPOW28 268435456 #define CONST_TWOPOW29 536870912 #define CONST_TWOPOW30 1073741824 #define CONST_TWOPOW31 2147483648 #define CONST_TWOPOW32 4294967296 #define CONST_TWOPOW33 8589934592 #define CONST_TWOPOW34 17179869184 #define CONST_TWOPOW35 34359738368 #define CONST_TWOPOW36 68719476736 #define CONST_TWOPOW37 137438953472 #define CONST_TWOPOW38 274877906944 #define CONST_TWOPOW39 549755813888 #define CONST_TWOPOW40 1099511627776 #define CONST_TWOPOW41 2199023255552 #define CONST_TWOPOW42 4398046511104 #define CONST_TWOPOW43 8796093022208 #define CONST_TWOPOW44 17592186044416 #define CONST_TWOPOW45 35184372088832 #define CONST_TWOPOW46 70368744177664 #define CONST_TWOPOW47 140737488355328 #define CONST_TWOPOW48 281474976710656 #define CONST_TWOPOW49 562949953421312 #define CONST_TWOPOW50 1125899906842624 #define CONST_TWOPOW51 2251799813685248 #define CONST_TWOPOW52 4503599627370496 #define FLOAT_TWOPOW0 ((FLOAT)(CONST_TWOPOW0)) #define FLOAT_TWOPOW1 ((FLOAT)(CONST_TWOPOW1)) #define FLOAT_TWOPOW2 ((FLOAT)(CONST_TWOPOW2)) #define FLOAT_TWOPOW3 ((FLOAT)(CONST_TWOPOW3)) #define FLOAT_TWOPOW4 ((FLOAT)(CONST_TWOPOW4)) #define FLOAT_TWOPOW5 ((FLOAT)(CONST_TWOPOW5)) #define FLOAT_TWOPOW6 ((FLOAT)(CONST_TWOPOW6)) #define FLOAT_TWOPOW7 ((FLOAT)(CONST_TWOPOW7)) #define FLOAT_TWOPOW8 ((FLOAT)(CONST_TWOPOW8)) #define FLOAT_TWOPOW9 ((FLOAT)(CONST_TWOPOW9)) #define FLOAT_TWOPOW10 ((FLOAT)(CONST_TWOPOW10)) #define FLOAT_TWOPOW11 ((FLOAT)(CONST_TWOPOW11)) #define FLOAT_TWOPOW12 ((FLOAT)(CONST_TWOPOW12)) #define FLOAT_TWOPOW13 ((FLOAT)(CONST_TWOPOW13)) #define FLOAT_TWOPOW14 ((FLOAT)(CONST_TWOPOW14)) #define FLOAT_TWOPOW15 ((FLOAT)(CONST_TWOPOW15)) #define FLOAT_TWOPOW16 ((FLOAT)(CONST_TWOPOW16)) #define FLOAT_TWOPOW17 ((FLOAT)(CONST_TWOPOW17)) #define FLOAT_TWOPOW18 ((FLOAT)(CONST_TWOPOW18)) #define FLOAT_TWOPOW19 ((FLOAT)(CONST_TWOPOW19)) #define FLOAT_TWOPOW20 ((FLOAT)(CONST_TWOPOW20)) #define FLOAT_TWOPOW21 ((FLOAT)(CONST_TWOPOW21)) #define FLOAT_TWOPOW22 ((FLOAT)(CONST_TWOPOW22)) #define FLOAT_TWOPOW23 ((FLOAT)(CONST_TWOPOW23)) #define FLOAT_TWOPOW24 ((FLOAT)(CONST_TWOPOW24)) #define FLOAT_TWOPOW25 ((FLOAT)(CONST_TWOPOW25)) #define FLOAT_TWOPOW26 ((FLOAT)(CONST_TWOPOW26)) #define FLOAT_TWOPOW27 ((FLOAT)(CONST_TWOPOW27)) #define FLOAT_TWOPOW28 ((FLOAT)(CONST_TWOPOW28)) #define FLOAT_TWOPOW29 ((FLOAT)(CONST_TWOPOW29)) #define FLOAT_TWOPOW30 ((FLOAT)(CONST_TWOPOW30)) #define FLOAT_TWOPOW31 ((FLOAT)(CONST_TWOPOW31)) #define FLOAT_TWOPOW32 ((FLOAT)(CONST_TWOPOW32)) #define FLOAT_TWOPOW33 ((FLOAT)(CONST_TWOPOW33)) #define FLOAT_TWOPOW34 ((FLOAT)(CONST_TWOPOW34)) #define FLOAT_TWOPOW35 ((FLOAT)(CONST_TWOPOW35)) #define FLOAT_TWOPOW36 ((FLOAT)(CONST_TWOPOW36)) #define FLOAT_TWOPOW37 ((FLOAT)(CONST_TWOPOW37)) #define FLOAT_TWOPOW38 ((FLOAT)(CONST_TWOPOW38)) #define FLOAT_TWOPOW39 ((FLOAT)(CONST_TWOPOW39)) #define FLOAT_TWOPOW40 ((FLOAT)(CONST_TWOPOW40)) #define FLOAT_TWOPOW41 ((FLOAT)(CONST_TWOPOW41)) #define FLOAT_TWOPOW42 ((FLOAT)(CONST_TWOPOW42)) #define FLOAT_TWOPOW43 ((FLOAT)(CONST_TWOPOW43)) #define FLOAT_TWOPOW44 ((FLOAT)(CONST_TWOPOW44)) #define FLOAT_TWOPOW45 ((FLOAT)(CONST_TWOPOW45)) #define FLOAT_TWOPOW46 ((FLOAT)(CONST_TWOPOW46)) #define FLOAT_TWOPOW47 ((FLOAT)(CONST_TWOPOW47)) #define FLOAT_TWOPOW48 ((FLOAT)(CONST_TWOPOW48)) #define FLOAT_TWOPOW49 ((FLOAT)(CONST_TWOPOW49)) #define FLOAT_TWOPOW50 ((FLOAT)(CONST_TWOPOW50)) #define FLOAT_TWOPOW51 ((FLOAT)(CONST_TWOPOW51)) #define FLOAT_TWOPOW52 ((FLOAT)(CONST_TWOPOW52)) // Values that are smaller than the named value by the smallest // representable amount. Since this depends on the type used // there is no CONST form. #define FLOAT_NEARTWOPOW31 ((FLOAT)2147483583) #define FLOAT_NEARTWOPOW32 ((FLOAT)4294967167) // Value close enough to zero to consider zero. This can't be too small // but it can't be too large. In other words, it's picked by guessing. #define FLOAT_NEARZERO (1e-5f) // General FP constants. #define FLOAT_E ((FLOAT)2.7182818284590452354) // Integer value of first exponent bit in a float. Provides a scaling factor // for exponent values extracted directly from float representation. #define FLOAT_EXPSCALE ((FLOAT)0x00800000) // Integer representation of 1.0f. #define INT32_FLOAT_ONE 0x3f800000 #ifdef _X86_ // All FP values are loaded from memory so declare them all as global // variables. extern FLOAT g_fE; extern FLOAT g_fZero; extern FLOAT g_fNearZero; extern FLOAT g_fHalf; extern FLOAT g_fp95; extern FLOAT g_fOne; extern FLOAT g_fOneMinusEps; extern FLOAT g_fExpScale; extern FLOAT g_fOoExpScale; extern FLOAT g_f255oTwoPow15; extern FLOAT g_fOo255; extern FLOAT g_fOo256; extern FLOAT g_fTwoPow7; extern FLOAT g_fTwoPow8; extern FLOAT g_fTwoPow11; extern FLOAT g_fTwoPow15; extern FLOAT g_fOoTwoPow15; extern FLOAT g_fTwoPow16; extern FLOAT g_fOoTwoPow16; extern FLOAT g_fTwoPow20; extern FLOAT g_fOoTwoPow20; extern FLOAT g_fTwoPow27; extern FLOAT g_fOoTwoPow27; extern FLOAT g_fTwoPow30; extern FLOAT g_fTwoPow31; extern FLOAT g_fNearTwoPow31; extern FLOAT g_fOoTwoPow31; extern FLOAT g_fOoNearTwoPow31; extern FLOAT g_fTwoPow32; extern FLOAT g_fNearTwoPow32; extern FLOAT g_fTwoPow39; extern FLOAT g_fTwoPow47; #else // Leave FP values as constants. #define g_fE FLOAT_E #define g_fNearZero FLOAT_NEARZERO #define g_fZero (0.0f) #define g_fHalf (0.5f) #define g_fp95 (0.95f) #define g_fOne (1.0f) #define g_fOneMinusEps (1.0f - FLT_EPSILON) #define g_fExpScale FLOAT_EXPSCALE #define g_fOoExpScale ((FLOAT)(1.0 / (double)FLOAT_EXPSCALE)) #define g_f255oTwoPow15 ((FLOAT)(255.0 / (double)CONST_TWOPOW15)) #define g_fOo255 ((FLOAT)(1.0 / 255.0)) #define g_fOo256 ((FLOAT)(1.0 / 256.0)) #define g_fTwoPow7 FLOAT_TWOPOW7 #define g_fTwoPow8 FLOAT_TWOPOW8 #define g_fTwoPow11 FLOAT_TWOPOW11 #define g_fTwoPow15 FLOAT_TWOPOW15 #define g_fOoTwoPow15 ((FLOAT)(1.0 / (double)CONST_TWOPOW15)) #define g_fTwoPow16 FLOAT_TWOPOW16 #define g_fOoTwoPow16 ((FLOAT)(1.0 / (double)CONST_TWOPOW16)) #define g_fTwoPow20 FLOAT_TWOPOW20 #define g_fOoTwoPow20 ((FLOAT)(1.0 / (double)CONST_TWOPOW20)) #define g_fTwoPow27 FLOAT_TWOPOW27 #define g_fOoTwoPow27 ((FLOAT)(1.0 / (double)CONST_TWOPOW27)) #define g_fTwoPow30 FLOAT_TWOPOW30 #define g_fTwoPow31 FLOAT_TWOPOW31 #define g_fNearTwoPow31 FLOAT_NEARTWOPOW31 #define g_fOoTwoPow31 ((FLOAT)(1.0 / (double)CONST_TWOPOW31)) #define g_fOoNearTwoPow31 ((FLOAT)(1.0 / ((double)FLOAT_NEARTWOPOW31))) #define g_fTwoPow32 FLOAT_TWOPOW32 #define g_fNearTwoPow32 FLOAT_NEARTWOPOW32 #define g_fTwoPow39 FLOAT_TWOPOW39 #define g_fTwoPow47 FLOAT_TWOPOW47 #endif // _X86_ // // Conversion tables. // // Takes an unsigned byte to a float in [0.0, 1.0]. 257'th entry is // also one to allow overflow. extern FLOAT g_fUInt8ToFloat[257]; // Floating-point pinning values for float-int conversion. extern double g_dSnap[33]; // // x86 FP control for optimized FTOI and single-precision divides. // #ifdef _X86_ #define FPU_GET_MODE(uMode) \ __asm fnstcw WORD PTR uMode #define FPU_SET_MODE(uMode) \ __asm fldcw WORD PTR uMode #define FPU_SAFE_SET_MODE(uMode) \ __asm fnclex \ __asm fldcw WORD PTR uMode #define FPU_MODE_CHOP_ROUND(uMode) \ ((uMode) | 0xc00) #define FPU_MODE_LOW_PRECISION(uMode) \ ((uMode) & 0xfcff) #define FPU_MODE_MASK_EXCEPTIONS(uMode) \ ((uMode) | 0x3f) #if 0 || DBG #define ASSERT_CHOP_ROUND() \ { \ WORD cw; \ __asm fnstcw cw \ DDASSERT((cw & 0xc00) == 0xc00); \ } #else #define ASSERT_CHOP_ROUND() #endif // DBG #else // Initialize with zero to avoid use-before-set errors. #define FPU_GET_MODE(uMode) \ ((uMode) = 0) #define FPU_SET_MODE(uMode) #define FPU_SAFE_SET_MODE(uMode) #define FPU_MODE_CHOP_ROUND(uMode) 0 #define FPU_MODE_LOW_PRECISION(uMode) 0 #define FPU_MODE_MASK_EXCEPTIONS(uMode) 0 #define ASSERT_CHOP_ROUND() #endif // _X86_ // // Single-precision FP functions. // May produce invalid results for exceptional or denormal values. // ATTENTION - Alpha exposes float math routines and they may be a small win. // #define COSF(fV) ((FLOAT)cos((double)(fV))) #define SINF(fV) ((FLOAT)sin((double)(fV))) #define SQRTF(fV) ((FLOAT)sqrt((double)(fV))) #define POWF(fV, fE) ((FLOAT)pow((double)(fV), (double)(fE))) // Approximate log and power functions using Jim Blinn's CG&A technique. // Only work for positive values. #ifdef POINTER_CASTING __inline FLOAT APPXLG2F(FLOAT f) { return (FLOAT)(ASINT32(f) - INT32_FLOAT_ONE) * g_fOoExpScale; } __inline FLOAT APPXPOW2F(FLOAT f) { INT32 i = (INT32)(f * g_fExpScale) + INT32_FLOAT_ONE; return ASFLOAT(i); } __inline FLOAT APPXINVF(FLOAT f) { INT32 i = (INT32_FLOAT_ONE << 1) - ASINT32(f); return ASFLOAT(i); } __inline FLOAT APPXSQRTF(FLOAT f) { INT32 i = (ASINT32(f) >> 1) + (INT32_FLOAT_ONE >> 1); return ASFLOAT(i); } __inline FLOAT APPXISQRTF(FLOAT f) { INT32 i = INT32_FLOAT_ONE + (INT32_FLOAT_ONE >> 1) - (ASINT32(f) >> 1); return ASFLOAT(i); } __inline FLOAT APPXPOWF(FLOAT f, FLOAT exp) { INT32 i = (INT32)(exp * (ASINT32(f) - INT32_FLOAT_ONE)) + INT32_FLOAT_ONE; return ASFLOAT(i); } #else __inline FLOAT APPXLG2F(FLOAT f) { FLOATINT32 fi; fi.f = f; return (FLOAT)(fi.i - INT32_FLOAT_ONE) * g_fOoExpScale; } __inline FLOAT APPXPOW2F(FLOAT f) { FLOATINT32 fi; fi.i = (INT32)(f * g_fExpScale) + INT32_FLOAT_ONE; return fi.f; } __inline FLOAT APPXINVF(FLOAT f) { FLOATINT32 fi; fi.f = f; fi.i = (INT32_FLOAT_ONE << 1) - fi.i; return fi.f; } __inline FLOAT APPXSQRTF(FLOAT f) { FLOATINT32 fi; fi.f = f; fi.i = (fi.i >> 1) + (INT32_FLOAT_ONE >> 1); return fi.f; } __inline FLOAT APPXISQRTF(FLOAT f) { FLOATINT32 fi; fi.f = f; fi.i = INT32_FLOAT_ONE + (INT32_FLOAT_ONE >> 1) - (fi.i >> 1); return fi.f; } __inline FLOAT APPXPOWF(FLOAT f, FLOAT exp) { FLOATINT32 fi; fi.f = f; fi.i = (INT32)(exp * (fi.i - INT32_FLOAT_ONE)) + INT32_FLOAT_ONE; return fi.f; } #endif #ifdef _X86_ // Uses a table float __fastcall TableInvSqrt(float value); // Uses Jim Blinn's floating point trick float __fastcall JBInvSqrt(float value); #define ISQRTF(fV) TableInvSqrt(fV); #ifdef POINTER_CASTING // Strip sign bit in integer. __inline FLOAT ABSF(FLOAT f) { UINT32 i = ASUINT32(f) & 0x7fffffff; return ASFLOAT(i); } // Toggle sign bit in integer. __inline FLOAT NEGF(FLOAT f) { UINT32 i = ASUINT32(f) ^ 0x80000000; return ASFLOAT(i); } #else // Strip sign bit in integer. __inline FLOAT ABSF(FLOAT f) { FLOATINT32 fi; fi.f = f; fi.u &= 0x7fffffff; return fi.f; } // Toggle sign bit in integer. __inline FLOAT NEGF(FLOAT f) { FLOATINT32 fi; fi.f = f; fi.u ^= 0x80000000; return fi.f; } #endif // POINTER_CASTING // Requires chop rounding. __inline INT32 SCALED_FRACTION(FLOAT f) { LARGE_INTEGER i; __asm { fld f fmul g_fTwoPow31 fistp i } return i.LowPart; } // Requires chop rounding. __inline INT FTOI(FLOAT f) { LARGE_INTEGER i; __asm { fld f fistp i } return i.LowPart; } // Requires chop rounding. #define ICEILF(f) (FLOAT_LEZ(f) ? FTOI(f) : FTOI((f) + g_fOneMinusEps)) #define CEILF(f) ((FLOAT)ICEILF(f)) #define IFLOORF(f) (FLOAT_LTZ(f) ? FTOI((f) - g_fOneMinusEps) : FTOI(f)) #define FLOORF(f) ((FLOAT)IFLOORF(f)) #else // _X86_ #define ISQRTF(fV) (1.0f / (FLOAT)sqrt((double)(fV))) #define ABSF(f) ((FLOAT)fabs((double)(f))) #define NEGF(f) (-(f)) #define SCALED_FRACTION(f) ((INT32)((f) * g_fTwoPow31)) #define FTOI(f) ((INT)(f)) #define CEILF(f) ((FLOAT)ceil((double)(f))) #define ICEILF(f) ((INT)CEILF(f)) #define FLOORF(f) ((FLOAT)floor((double)(f))) #define IFLOORF(f) ((INT)FLOORF(f)) #endif // _X86_ // // Overlapped divide support. // #ifdef _X86_ // Starts a divide directly from memory. Result field is provided for // compatibility with non-x86 code that does the divide immediately. #define FLD_BEGIN_DIVIDE(Num, Den, Res) { __asm fld Num __asm fdiv Den } #define FLD_BEGIN_IDIVIDE(Num, Den, Res) { __asm fld Num __asm fidiv Den } // Store a divide result directly to memory. #define FSTP_END_DIVIDE(Res) { __asm fstp Res } #else // _X86_ #define FLD_BEGIN_DIVIDE(Num, Den, Res) ((Res) = (Num) / (Den)) #define FLD_BEGIN_IDIVIDE(Num, Den, Res) ((Res) = (Num) / (FLOAT)(Den)) #define FSTP_END_DIVIDE(Res) #endif // _X86_ // // Specialized FP comparison functions. // // On the x86, it's faster to do compares with an integer cast // than it is to do the fcom. // // The zero operations work for all normalized FP numbers, -0 included. // #ifdef _X86_ #define FLOAT_CMP_POS(fa, op, fb) (ASINT32(fa) op ASINT32(fb)) #define FLOAT_CMP_PONE(flt, op) (ASINT32(flt) op INT32_FLOAT_ONE) #ifdef POINTER_CASTING #define FLOAT_GTZ(flt) (ASINT32(flt) > 0) #define FLOAT_LTZ(flt) (ASUINT32(flt) > 0x80000000) #define FLOAT_GEZ(flt) (ASUINT32(flt) <= 0x80000000) #define FLOAT_LEZ(flt) (ASINT32(flt) <= 0) #define FLOAT_EQZ(flt) ((ASUINT32(flt) & 0x7fffffff) == 0) #define FLOAT_NEZ(flt) ((ASUINT32(flt) & 0x7fffffff) != 0) #else __inline int FLOAT_GTZ(FLOAT f) { FLOATINT32 fi; fi.f = f; return fi.i > 0; } __inline int FLOAT_LTZ(FLOAT f) { FLOATINT32 fi; fi.f = f; return fi.u > 0x80000000; } __inline int FLOAT_GEZ(FLOAT f) { FLOATINT32 fi; fi.f = f; return fi.u <= 0x80000000; } __inline int FLOAT_LEZ(FLOAT f) { FLOATINT32 fi; fi.f = f; return fi.i <= 0; } __inline int FLOAT_EQZ(FLOAT f) { FLOATINT32 fi; fi.f = f; return (fi.u & 0x7fffffff) == 0; } __inline int FLOAT_NEZ(FLOAT f) { FLOATINT32 fi; fi.f = f; return (fi.u & 0x7fffffff) != 0; } #endif // POINTER_CASTING #else #define FLOAT_GTZ(flt) ((flt) > g_fZero) #define FLOAT_LTZ(flt) ((flt) < g_fZero) #define FLOAT_GEZ(flt) ((flt) >= g_fZero) #define FLOAT_LEZ(flt) ((flt) <= g_fZero) #define FLOAT_EQZ(flt) ((flt) == g_fZero) #define FLOAT_NEZ(flt) ((flt) != g_fZero) #define FLOAT_CMP_POS(fa, op, fb) ((fa) op (fb)) #define FLOAT_CMP_PONE(flt, op) ((flt) op g_fOne) #endif // _X86_ #ifdef __cplusplus } #endif #endif // #ifndef _D3DFLT_H_