|
|
//----------------------------------------------------------------------------
//
// Copyright (C) Microsoft Corporation, 1997.
//
// d3dflt.h
//
// Floating-point constants and operations on FP values.
//
//----------------------------------------------------------------------------
#ifndef _D3DFLT_H_
#define _D3DFLT_H_
#include <math.h>
#include <float.h>
#ifdef __cplusplus
extern "C" { #endif
typedef union tagFLOATINT32 { FLOAT f; INT32 i; UINT32 u; } FLOATINT32, *PFLOATINT32;
//
// Type-forcing macros to access FP as integer and vice-versa.
// ATTENTION - VC5's optimizer turns these macros into ftol sometimes,
// completely breaking them.
// Using FLOATINT32 works around the problem but is not as flexible,
// so the old code is kept around for the time when the compiler is fixed.
// Note that pointer casting with FLOATINT32 fails just as the direct
// pointer casting does, so it's not a remedy.
//
// Use these macros with extreme care.
//
#define ASFLOAT(i) (*(FLOAT *)&(i))
#define ASINT32(f) (*(INT32 *)&(f))
#define ASUINT32(f) (*(UINT32 *)&(f))
//
// FP constants.
//
// Powers of two for snap values. These should not be used in code.
#define CONST_TWOPOW0 1
#define CONST_TWOPOW1 2
#define CONST_TWOPOW2 4
#define CONST_TWOPOW3 8
#define CONST_TWOPOW4 16
#define CONST_TWOPOW5 32
#define CONST_TWOPOW6 64
#define CONST_TWOPOW7 128
#define CONST_TWOPOW8 256
#define CONST_TWOPOW9 512
#define CONST_TWOPOW10 1024
#define CONST_TWOPOW11 2048
#define CONST_TWOPOW12 4096
#define CONST_TWOPOW13 8192
#define CONST_TWOPOW14 16384
#define CONST_TWOPOW15 32768
#define CONST_TWOPOW16 65536
#define CONST_TWOPOW17 131072
#define CONST_TWOPOW18 262144
#define CONST_TWOPOW19 524288
#define CONST_TWOPOW20 1048576
#define CONST_TWOPOW21 2097152
#define CONST_TWOPOW22 4194304
#define CONST_TWOPOW23 8388608
#define CONST_TWOPOW24 16777216
#define CONST_TWOPOW25 33554432
#define CONST_TWOPOW26 67108864
#define CONST_TWOPOW27 134217728
#define CONST_TWOPOW28 268435456
#define CONST_TWOPOW29 536870912
#define CONST_TWOPOW30 1073741824
#define CONST_TWOPOW31 2147483648
#define CONST_TWOPOW32 4294967296
#define CONST_TWOPOW33 8589934592
#define CONST_TWOPOW34 17179869184
#define CONST_TWOPOW35 34359738368
#define CONST_TWOPOW36 68719476736
#define CONST_TWOPOW37 137438953472
#define CONST_TWOPOW38 274877906944
#define CONST_TWOPOW39 549755813888
#define CONST_TWOPOW40 1099511627776
#define CONST_TWOPOW41 2199023255552
#define CONST_TWOPOW42 4398046511104
#define CONST_TWOPOW43 8796093022208
#define CONST_TWOPOW44 17592186044416
#define CONST_TWOPOW45 35184372088832
#define CONST_TWOPOW46 70368744177664
#define CONST_TWOPOW47 140737488355328
#define CONST_TWOPOW48 281474976710656
#define CONST_TWOPOW49 562949953421312
#define CONST_TWOPOW50 1125899906842624
#define CONST_TWOPOW51 2251799813685248
#define CONST_TWOPOW52 4503599627370496
#define FLOAT_TWOPOW0 ((FLOAT)(CONST_TWOPOW0))
#define FLOAT_TWOPOW1 ((FLOAT)(CONST_TWOPOW1))
#define FLOAT_TWOPOW2 ((FLOAT)(CONST_TWOPOW2))
#define FLOAT_TWOPOW3 ((FLOAT)(CONST_TWOPOW3))
#define FLOAT_TWOPOW4 ((FLOAT)(CONST_TWOPOW4))
#define FLOAT_TWOPOW5 ((FLOAT)(CONST_TWOPOW5))
#define FLOAT_TWOPOW6 ((FLOAT)(CONST_TWOPOW6))
#define FLOAT_TWOPOW7 ((FLOAT)(CONST_TWOPOW7))
#define FLOAT_TWOPOW8 ((FLOAT)(CONST_TWOPOW8))
#define FLOAT_TWOPOW9 ((FLOAT)(CONST_TWOPOW9))
#define FLOAT_TWOPOW10 ((FLOAT)(CONST_TWOPOW10))
#define FLOAT_TWOPOW11 ((FLOAT)(CONST_TWOPOW11))
#define FLOAT_TWOPOW12 ((FLOAT)(CONST_TWOPOW12))
#define FLOAT_TWOPOW13 ((FLOAT)(CONST_TWOPOW13))
#define FLOAT_TWOPOW14 ((FLOAT)(CONST_TWOPOW14))
#define FLOAT_TWOPOW15 ((FLOAT)(CONST_TWOPOW15))
#define FLOAT_TWOPOW16 ((FLOAT)(CONST_TWOPOW16))
#define FLOAT_TWOPOW17 ((FLOAT)(CONST_TWOPOW17))
#define FLOAT_TWOPOW18 ((FLOAT)(CONST_TWOPOW18))
#define FLOAT_TWOPOW19 ((FLOAT)(CONST_TWOPOW19))
#define FLOAT_TWOPOW20 ((FLOAT)(CONST_TWOPOW20))
#define FLOAT_TWOPOW21 ((FLOAT)(CONST_TWOPOW21))
#define FLOAT_TWOPOW22 ((FLOAT)(CONST_TWOPOW22))
#define FLOAT_TWOPOW23 ((FLOAT)(CONST_TWOPOW23))
#define FLOAT_TWOPOW24 ((FLOAT)(CONST_TWOPOW24))
#define FLOAT_TWOPOW25 ((FLOAT)(CONST_TWOPOW25))
#define FLOAT_TWOPOW26 ((FLOAT)(CONST_TWOPOW26))
#define FLOAT_TWOPOW27 ((FLOAT)(CONST_TWOPOW27))
#define FLOAT_TWOPOW28 ((FLOAT)(CONST_TWOPOW28))
#define FLOAT_TWOPOW29 ((FLOAT)(CONST_TWOPOW29))
#define FLOAT_TWOPOW30 ((FLOAT)(CONST_TWOPOW30))
#define FLOAT_TWOPOW31 ((FLOAT)(CONST_TWOPOW31))
#define FLOAT_TWOPOW32 ((FLOAT)(CONST_TWOPOW32))
#define FLOAT_TWOPOW33 ((FLOAT)(CONST_TWOPOW33))
#define FLOAT_TWOPOW34 ((FLOAT)(CONST_TWOPOW34))
#define FLOAT_TWOPOW35 ((FLOAT)(CONST_TWOPOW35))
#define FLOAT_TWOPOW36 ((FLOAT)(CONST_TWOPOW36))
#define FLOAT_TWOPOW37 ((FLOAT)(CONST_TWOPOW37))
#define FLOAT_TWOPOW38 ((FLOAT)(CONST_TWOPOW38))
#define FLOAT_TWOPOW39 ((FLOAT)(CONST_TWOPOW39))
#define FLOAT_TWOPOW40 ((FLOAT)(CONST_TWOPOW40))
#define FLOAT_TWOPOW41 ((FLOAT)(CONST_TWOPOW41))
#define FLOAT_TWOPOW42 ((FLOAT)(CONST_TWOPOW42))
#define FLOAT_TWOPOW43 ((FLOAT)(CONST_TWOPOW43))
#define FLOAT_TWOPOW44 ((FLOAT)(CONST_TWOPOW44))
#define FLOAT_TWOPOW45 ((FLOAT)(CONST_TWOPOW45))
#define FLOAT_TWOPOW46 ((FLOAT)(CONST_TWOPOW46))
#define FLOAT_TWOPOW47 ((FLOAT)(CONST_TWOPOW47))
#define FLOAT_TWOPOW48 ((FLOAT)(CONST_TWOPOW48))
#define FLOAT_TWOPOW49 ((FLOAT)(CONST_TWOPOW49))
#define FLOAT_TWOPOW50 ((FLOAT)(CONST_TWOPOW50))
#define FLOAT_TWOPOW51 ((FLOAT)(CONST_TWOPOW51))
#define FLOAT_TWOPOW52 ((FLOAT)(CONST_TWOPOW52))
// Values that are smaller than the named value by the smallest
// representable amount. Since this depends on the type used
// there is no CONST form.
#define FLOAT_NEARTWOPOW31 ((FLOAT)2147483583)
#define FLOAT_NEARTWOPOW32 ((FLOAT)4294967167)
// Value close enough to zero to consider zero. This can't be too small
// but it can't be too large. In other words, it's picked by guessing.
#define FLOAT_NEARZERO (1e-5f)
// General FP constants.
#define FLOAT_E ((FLOAT)2.7182818284590452354)
// Integer value of first exponent bit in a float. Provides a scaling factor
// for exponent values extracted directly from float representation.
#define FLOAT_EXPSCALE ((FLOAT)0x00800000)
// Integer representation of 1.0f.
#define INT32_FLOAT_ONE 0x3f800000
#ifdef _X86_
// All FP values are loaded from memory so declare them all as global
// variables.
extern FLOAT g_fE; extern FLOAT g_fZero; extern FLOAT g_fNearZero; extern FLOAT g_fHalf; extern FLOAT g_fp95; extern FLOAT g_fOne; extern FLOAT g_fOneMinusEps; extern FLOAT g_fExpScale; extern FLOAT g_fOoExpScale; extern FLOAT g_f255oTwoPow15; extern FLOAT g_fOo255; extern FLOAT g_fOo256; extern FLOAT g_fTwoPow7; extern FLOAT g_fTwoPow8; extern FLOAT g_fTwoPow11; extern FLOAT g_fTwoPow15; extern FLOAT g_fOoTwoPow15; extern FLOAT g_fTwoPow16; extern FLOAT g_fOoTwoPow16; extern FLOAT g_fTwoPow20; extern FLOAT g_fOoTwoPow20; extern FLOAT g_fTwoPow27; extern FLOAT g_fOoTwoPow27; extern FLOAT g_fTwoPow30; extern FLOAT g_fTwoPow31; extern FLOAT g_fNearTwoPow31; extern FLOAT g_fOoTwoPow31; extern FLOAT g_fOoNearTwoPow31; extern FLOAT g_fTwoPow32; extern FLOAT g_fNearTwoPow32; extern FLOAT g_fTwoPow39; extern FLOAT g_fTwoPow47;
#else
// Leave FP values as constants.
#define g_fE FLOAT_E
#define g_fNearZero FLOAT_NEARZERO
#define g_fZero (0.0f)
#define g_fHalf (0.5f)
#define g_fp95 (0.95f)
#define g_fOne (1.0f)
#define g_fOneMinusEps (1.0f - FLT_EPSILON)
#define g_fExpScale FLOAT_EXPSCALE
#define g_fOoExpScale ((FLOAT)(1.0 / (double)FLOAT_EXPSCALE))
#define g_f255oTwoPow15 ((FLOAT)(255.0 / (double)CONST_TWOPOW15))
#define g_fOo255 ((FLOAT)(1.0 / 255.0))
#define g_fOo256 ((FLOAT)(1.0 / 256.0))
#define g_fTwoPow7 FLOAT_TWOPOW7
#define g_fTwoPow8 FLOAT_TWOPOW8
#define g_fTwoPow11 FLOAT_TWOPOW11
#define g_fTwoPow15 FLOAT_TWOPOW15
#define g_fOoTwoPow15 ((FLOAT)(1.0 / (double)CONST_TWOPOW15))
#define g_fTwoPow16 FLOAT_TWOPOW16
#define g_fOoTwoPow16 ((FLOAT)(1.0 / (double)CONST_TWOPOW16))
#define g_fTwoPow20 FLOAT_TWOPOW20
#define g_fOoTwoPow20 ((FLOAT)(1.0 / (double)CONST_TWOPOW20))
#define g_fTwoPow27 FLOAT_TWOPOW27
#define g_fOoTwoPow27 ((FLOAT)(1.0 / (double)CONST_TWOPOW27))
#define g_fTwoPow30 FLOAT_TWOPOW30
#define g_fTwoPow31 FLOAT_TWOPOW31
#define g_fNearTwoPow31 FLOAT_NEARTWOPOW31
#define g_fOoTwoPow31 ((FLOAT)(1.0 / (double)CONST_TWOPOW31))
#define g_fOoNearTwoPow31 ((FLOAT)(1.0 / ((double)FLOAT_NEARTWOPOW31)))
#define g_fTwoPow32 FLOAT_TWOPOW32
#define g_fNearTwoPow32 FLOAT_NEARTWOPOW32
#define g_fTwoPow39 FLOAT_TWOPOW39
#define g_fTwoPow47 FLOAT_TWOPOW47
#endif // _X86_
//
// Conversion tables.
//
// Takes an unsigned byte to a float in [0.0, 1.0]. 257'th entry is
// also one to allow overflow.
extern FLOAT g_fUInt8ToFloat[257];
// Floating-point pinning values for float-int conversion.
extern DOUBLE g_dSnap[33];
//
// x86 FP control for optimized FTOI and single-precision divides.
//
#ifdef _X86_
#define FPU_GET_MODE(uMode) \
__asm fnstcw WORD PTR uMode #define FPU_SET_MODE(uMode) \
__asm fldcw WORD PTR uMode #define FPU_SAFE_SET_MODE(uMode) \
__asm fnclex \ __asm fldcw WORD PTR uMode
#define FPU_MODE_CHOP_ROUND(uMode) \
((uMode) | 0xc00) #define FPU_MODE_LOW_PRECISION(uMode) \
((uMode) & 0xfcff) #define FPU_MODE_MASK_EXCEPTIONS(uMode) \
((uMode) | 0x3f)
#if DBG
#define ASSERT_CHOP_ROUND() \
{ \ WORD cw; \ __asm fnstcw cw \ DDASSERT((cw & 0xc00) == 0xc00); \ }
#else
#define ASSERT_CHOP_ROUND()
#endif // DBG
#else
// Initialize with zero to avoid use-before-set errors.
#define FPU_GET_MODE(uMode) \
((uMode) = 0) #define FPU_SET_MODE(uMode)
#define FPU_SAFE_SET_MODE(uMode)
#define FPU_MODE_CHOP_ROUND(uMode) 0
#define FPU_MODE_LOW_PRECISION(uMode) 0
#define FPU_MODE_MASK_EXCEPTIONS(uMode) 0
#define ASSERT_CHOP_ROUND()
#endif // _X86_
//
// Single-precision FP functions.
// May produce invalid results for exceptional or denormal values.
// ATTENTION - Alpha exposes float math routines and they may be a small win.
//
#define COSF(fV) ((FLOAT)cos((double)(fV)))
#define SINF(fV) ((FLOAT)sin((double)(fV)))
#define SQRTF(fV) ((FLOAT)sqrt((double)(fV)))
#define POWF(fV, fE) ((FLOAT)pow((double)(fV), (double)(fE)))
// Approximate log and power functions using Jim Blinn's CG&A technique.
// Only work for positive values.
#ifdef POINTER_CASTING
__inline FLOAT APPXLG2F(FLOAT f) { return (FLOAT)(ASINT32(f) - INT32_FLOAT_ONE) * g_fOoExpScale; }
__inline FLOAT APPXPOW2F(FLOAT f) { INT32 i = (INT32)(f * g_fExpScale) + INT32_FLOAT_ONE; return ASFLOAT(i); }
__inline FLOAT APPXINVF(FLOAT f) { INT32 i = (INT32_FLOAT_ONE << 1) - ASINT32(f); return ASFLOAT(i); }
__inline FLOAT APPXSQRTF(FLOAT f) { INT32 i = (ASINT32(f) >> 1) + (INT32_FLOAT_ONE >> 1); return ASFLOAT(i); }
__inline FLOAT APPXISQRTF(FLOAT f) { INT32 i = INT32_FLOAT_ONE + (INT32_FLOAT_ONE >> 1) - (ASINT32(f) >> 1); return ASFLOAT(i); }
__inline FLOAT APPXPOWF(FLOAT f, FLOAT exp) { INT32 i = (INT32)(exp * (ASINT32(f) - INT32_FLOAT_ONE)) + INT32_FLOAT_ONE; return ASFLOAT(i); }
#else
__inline FLOAT APPXLG2F(FLOAT f) { FLOATINT32 fi; fi.f = f; return (FLOAT)(fi.i - INT32_FLOAT_ONE) * g_fOoExpScale; }
__inline FLOAT APPXPOW2F(FLOAT f) { FLOATINT32 fi; fi.i = (INT32)(f * g_fExpScale) + INT32_FLOAT_ONE; return fi.f; }
__inline FLOAT APPXINVF(FLOAT f) { FLOATINT32 fi; fi.f = f; fi.i = (INT32_FLOAT_ONE << 1) - fi.i; return fi.f; }
__inline FLOAT APPXSQRTF(FLOAT f) { FLOATINT32 fi; fi.f = f; fi.i = (fi.i >> 1) + (INT32_FLOAT_ONE >> 1); return fi.f; }
__inline FLOAT APPXISQRTF(FLOAT f) { FLOATINT32 fi; fi.f = f; fi.i = INT32_FLOAT_ONE + (INT32_FLOAT_ONE >> 1) - (fi.i >> 1); return fi.f; }
__inline FLOAT APPXPOWF(FLOAT f, FLOAT exp) { FLOATINT32 fi; fi.f = f; fi.i = (INT32)(exp * (fi.i - INT32_FLOAT_ONE)) + INT32_FLOAT_ONE; return fi.f; }
#endif
#ifdef _X86_
// Uses a table
float __fastcall TableInvSqrt(float value); // Uses Jim Blinn's floating point trick
float __fastcall JBInvSqrt(float value);
#define ISQRTF(fV) TableInvSqrt(fV);
#ifdef POINTER_CASTING
// Strip sign bit in integer.
__inline FLOAT ABSF(FLOAT f) { UINT32 i = ASUINT32(f) & 0x7fffffff; return ASFLOAT(i); }
// Toggle sign bit in integer.
__inline FLOAT NEGF(FLOAT f) { UINT32 i = ASUINT32(f) ^ 0x80000000; return ASFLOAT(i); }
#else
// Strip sign bit in integer.
__inline FLOAT ABSF(FLOAT f) { FLOATINT32 fi; fi.f = f; fi.u &= 0x7fffffff; return fi.f; }
// Toggle sign bit in integer.
__inline FLOAT NEGF(FLOAT f) { FLOATINT32 fi; fi.f = f; fi.u ^= 0x80000000; return fi.f; }
#endif // POINTER_CASTING
// Requires chop rounding.
__inline INT32 SCALED_FRACTION(FLOAT f) { LARGE_INTEGER i;
__asm { fld f fmul g_fTwoPow31 fistp i }
return i.LowPart; }
// Requires chop rounding.
__inline INT FTOI(FLOAT f) { LARGE_INTEGER i;
__asm { fld f fistp i }
return i.LowPart; }
// Requires chop rounding.
#define ICEILF(f) (FLOAT_LEZ(f) ? FTOI(f) : FTOI((f) + g_fOneMinusEps))
#define CEILF(f) ((FLOAT)ICEILF(f))
#define IFLOORF(f) (FLOAT_LTZ(f) ? FTOI((f) - g_fOneMinusEps) : FTOI(f))
#define FLOORF(f) ((FLOAT)IFLOORF(f))
#else // _X86_
#define ISQRTF(fV) (1.0f / (FLOAT)sqrt((double)(fV)))
#define ABSF(f) ((FLOAT)fabs((double)(f)))
#define NEGF(f) (-(f))
#define SCALED_FRACTION(f) ((INT32)((f) * g_fTwoPow31))
#define FTOI(f) ((INT)(f))
#define CEILF(f) ((FLOAT)ceil((double)(f)))
#define ICEILF(f) ((INT)CEILF(f))
#define FLOORF(f) ((FLOAT)floor((double)(f)))
#define IFLOORF(f) ((INT)FLOORF(f))
#endif // _X86_
//
// Overlapped divide support.
//
#ifdef _X86_
// Starts a divide directly from memory. Result field is provided for
// compatibility with non-x86 code that does the divide immediately.
#define FLD_BEGIN_DIVIDE(Num, Den, Res) { __asm fld Num __asm fdiv Den }
#define FLD_BEGIN_IDIVIDE(Num, Den, Res) { __asm fld Num __asm fidiv Den }
// Store a divide result directly to memory.
#define FSTP_END_DIVIDE(Res) { __asm fstp Res }
#else // _X86_
#define FLD_BEGIN_DIVIDE(Num, Den, Res) ((Res) = (Num) / (Den))
#define FLD_BEGIN_IDIVIDE(Num, Den, Res) ((Res) = (Num) / (FLOAT)(Den))
#define FSTP_END_DIVIDE(Res)
#endif // _X86_
//
// Specialized FP comparison functions.
//
// On the x86, it's faster to do compares with an integer cast
// than it is to do the fcom.
//
// The zero operations work for all normalized FP numbers, -0 included.
//
#ifdef _X86_
#define FLOAT_CMP_POS(fa, op, fb) (ASINT32(fa) op ASINT32(fb))
#define FLOAT_CMP_PONE(flt, op) (ASINT32(flt) op INT32_FLOAT_ONE)
#ifdef POINTER_CASTING
#define FLOAT_GTZ(flt) (ASINT32(flt) > 0)
#define FLOAT_LTZ(flt) (ASUINT32(flt) > 0x80000000)
#define FLOAT_GEZ(flt) (ASUINT32(flt) <= 0x80000000)
#define FLOAT_LEZ(flt) (ASINT32(flt) <= 0)
#define FLOAT_EQZ(flt) ((ASUINT32(flt) & 0x7fffffff) == 0)
#define FLOAT_NEZ(flt) ((ASUINT32(flt) & 0x7fffffff) != 0)
#else
__inline int FLOAT_GTZ(FLOAT f) { FLOATINT32 fi; fi.f = f; return fi.i > 0; } __inline int FLOAT_LTZ(FLOAT f) { FLOATINT32 fi; fi.f = f; return fi.u > 0x80000000; } __inline int FLOAT_GEZ(FLOAT f) { FLOATINT32 fi; fi.f = f; return fi.u <= 0x80000000; } __inline int FLOAT_LEZ(FLOAT f) { FLOATINT32 fi; fi.f = f; return fi.i <= 0; } __inline int FLOAT_EQZ(FLOAT f) { FLOATINT32 fi; fi.f = f; return (fi.u & 0x7fffffff) == 0; } __inline int FLOAT_NEZ(FLOAT f) { FLOATINT32 fi; fi.f = f; return (fi.u & 0x7fffffff) != 0; }
#endif // POINTER_CASTING
#else
#define FLOAT_GTZ(flt) ((flt) > g_fZero)
#define FLOAT_LTZ(flt) ((flt) < g_fZero)
#define FLOAT_GEZ(flt) ((flt) >= g_fZero)
#define FLOAT_LEZ(flt) ((flt) <= g_fZero)
#define FLOAT_EQZ(flt) ((flt) == g_fZero)
#define FLOAT_NEZ(flt) ((flt) != g_fZero)
#define FLOAT_CMP_POS(fa, op, fb) ((fa) op (fb))
#define FLOAT_CMP_PONE(flt, op) ((flt) op g_fOne)
#endif // _X86_
#ifdef __cplusplus
} #endif
#endif // #ifndef _D3DFLT_H_
|