Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

528 lines
15 KiB

  1. #ifndef __glcpu_h_
  2. #define __glcpu_h_
  3. /*
  4. ** Copyright 1991, Silicon Graphics, Inc.
  5. ** All Rights Reserved.
  6. **
  7. ** This is UNPUBLISHED PROPRIETARY SOURCE CODE of Silicon Graphics, Inc.;
  8. ** the contents of this file may not be disclosed to third parties, copied or
  9. ** duplicated in any form, in whole or in part, without the prior written
  10. ** permission of Silicon Graphics, Inc.
  11. **
  12. ** RESTRICTED RIGHTS LEGEND:
  13. ** Use, duplication or disclosure by the Government is subject to restrictions
  14. ** as set forth in subdivision (c)(1)(ii) of the Rights in Technical Data
  15. ** and Computer Software clause at DFARS 252.227-7013, and/or in similar or
  16. ** successor clauses in the FAR, DOD or NASA FAR Supplement. Unpublished -
  17. ** rights reserved under the Copyright Laws of the United States.
  18. **
  19. ** CPU dependent constants.
  20. */
  21. #include <float.h>
  22. #include <math.h>
  23. #define __GL_BITS_PER_BYTE 8
  24. #define __GL_STIPPLE_MSB 1
  25. #define __GL_FLOAT_MANTISSA_BITS 23
  26. #define __GL_FLOAT_MANTISSA_SHIFT 0
  27. #define __GL_FLOAT_EXPONENT_BIAS 127
  28. #define __GL_FLOAT_EXPONENT_BITS 8
  29. #define __GL_FLOAT_EXPONENT_SHIFT 23
  30. #define __GL_FLOAT_SIGN_SHIFT 31
  31. #define __GL_FLOAT_MANTISSA_MASK (((1 << __GL_FLOAT_MANTISSA_BITS) - 1) << __GL_FLOAT_MANTISSA_SHIFT)
  32. #define __GL_FLOAT_EXPONENT_MASK (((1 << __GL_FLOAT_EXPONENT_BITS) - 1) << __GL_FLOAT_EXPONENT_SHIFT)
  33. // If the MSB of a FP number is known then float-to-int conversion
  34. // becomes a simple shift and mask
  35. // The value must be positive
  36. #define __GL_FIXED_FLOAT_TO_INT(flt, shift) \
  37. ((*(LONG *)&(flt) >> (shift)) & \
  38. ((1 << (__GL_FLOAT_MANTISSA_BITS-(shift)))-1) | \
  39. (1 << (__GL_FLOAT_MANTISSA_BITS-(shift))))
  40. // Same as above except without the MSB, which can be useful
  41. // for getting unbiased numbers when the bias is only the MSB
  42. // The value must be positive
  43. #define __GL_FIXED_FLOAT_TO_INT_NO_MSB(flt, shift) \
  44. ((*(LONG *)&(flt) >> (shift)) & \
  45. ((1 << (__GL_FLOAT_MANTISSA_BITS-(shift)))-1))
  46. // Produces the fixed-point form
  47. // The value must be positive
  48. #define __GL_FIXED_FLOAT_TO_FIXED(flt) \
  49. ((*(LONG *)&(flt)) & \
  50. ((1 << (__GL_FLOAT_MANTISSA_BITS))-1) | \
  51. (1 << (__GL_FLOAT_MANTISSA_BITS)))
  52. #define __GL_FIXED_FLOAT_TO_FIXED_NO_MSB(flt) \
  53. ((*(LONG *)&(flt)) & \
  54. ((1 << (__GL_FLOAT_MANTISSA_BITS))-1))
  55. // The fixed-point fraction as an integer
  56. // The value must be positive
  57. #define __GL_FIXED_FLOAT_FRACTION(flt, shift) \
  58. (*(LONG *)&(flt) & ((1 << (shift))-1))
  59. // Converts the fixed-point form to an IEEE float, but still typed
  60. // as an int because a cast to float would cause the compiler to do
  61. // an int-float conversion
  62. // The value must be positive
  63. #define __GL_FIXED_TO_FIXED_FLOAT(fxed, shift) \
  64. ((fxed) & ((1 << (__GL_FLOAT_MANTISSA_BITS))-1) | \
  65. ((__GL_FLOAT_EXPONENT_BIAS+(shift)) << __GL_FLOAT_EXPONENT_SHIFT))
  66. // On the x86, it's faster to do zero compares with an integer cast
  67. // than it is to do the fcomp.
  68. // In the case of the equality test there is only a check for
  69. // +0. IEEE floats can also be -0, so great care should be
  70. // taken not to use the zero test unless missing this case is
  71. // unimportant
  72. //
  73. // Additionally, FP compares are faster as integers
  74. // These operations work for all normalized FP numbers, -0 included
  75. #ifdef _X86_
  76. #define __GL_FLOAT_GTZ(flt) (*(LONG *)&(flt) > 0)
  77. #define __GL_FLOAT_LTZ(flt) (*(ULONG *)&(flt) > 0x80000000)
  78. #define __GL_FLOAT_GEZ(flt) (*(ULONG *)&(flt) <= 0x80000000)
  79. #define __GL_FLOAT_LEZ(flt) (*(LONG *)&(flt) <= 0)
  80. #define __GL_FLOAT_EQZ(flt) ((*(ULONG *)&(flt) & 0x7fffffff) == 0)
  81. #define __GL_FLOAT_NEZ(flt) ((*(ULONG *)&(flt) & 0x7fffffff) != 0)
  82. #define __GL_FLOAT_COMPARE_PONE(flt, op) (*(LONG *)&(flt) op 0x3f800000)
  83. #else
  84. #define __GL_FLOAT_GTZ(flt) ((flt) > __glZero)
  85. #define __GL_FLOAT_LTZ(flt) ((flt) < __glZero)
  86. #define __GL_FLOAT_GEZ(flt) ((flt) >= __glZero)
  87. #define __GL_FLOAT_LEZ(flt) ((flt) <= __glZero)
  88. #define __GL_FLOAT_EQZ(flt) ((flt) == __glZero)
  89. #define __GL_FLOAT_NEZ(flt) ((flt) != __glZero)
  90. #define __GL_FLOAT_COMPARE_PONE(flt, op) ((flt) op __glOne)
  91. #endif // _X86_
  92. // These operations only account for positive zero. -0 will not work
  93. #ifdef _X86_
  94. #define __GL_FLOAT_EQPZ(flt) (*(LONG *)&(flt) == 0)
  95. #define __GL_FLOAT_NEPZ(flt) (*(LONG *)&(flt) != 0)
  96. #define __GL_FLOAT_EQ(f1, f2) (*(LONG *)&(f1) == *(LONG *)&(f2))
  97. #define __GL_FLOAT_NE(f1, f2) (*(LONG *)&(f1) != *(LONG *)&(f2))
  98. #else
  99. #define __GL_FLOAT_EQPZ(flt) ((flt) == __glZero)
  100. #define __GL_FLOAT_NEPZ(flt) ((flt) != __glZero)
  101. #define __GL_FLOAT_EQ(f1, f2) ((f1) == (f2))
  102. #define __GL_FLOAT_NE(f1, f2) ((f1) != (f2))
  103. #endif // _X86_
  104. // Macro to start an FP divide in the FPU, used to overlap a
  105. // divide with integer operations
  106. // Can't just use C because it stores the result immediately
  107. #ifdef _X86_
  108. #define __GL_FLOAT_SIMPLE_BEGIN_DIVIDE(num, den, result) \
  109. __asm fld num \
  110. __asm fdiv den
  111. #define __GL_FLOAT_SIMPLE_END_DIVIDE(result) \
  112. __asm fstp DWORD PTR result
  113. __inline void __GL_FLOAT_BEGIN_DIVIDE(__GLfloat num, __GLfloat den,
  114. __GLfloat *result)
  115. {
  116. __asm fld num
  117. __asm fdiv den
  118. }
  119. __inline void __GL_FLOAT_END_DIVIDE(__GLfloat *result)
  120. {
  121. __asm mov eax, result
  122. __asm fstp DWORD PTR [eax]
  123. }
  124. #else
  125. #define __GL_FLOAT_SIMPLE_BEGIN_DIVIDE(num, den, result) \
  126. ((result) = (num)/(den))
  127. #define __GL_FLOAT_SIMPLE_END_DIVIDE(result)
  128. #define __GL_FLOAT_BEGIN_DIVIDE(num, den, result) (*(result) = (num)/(den))
  129. #define __GL_FLOAT_END_DIVIDE(result)
  130. #endif // _X86_
  131. //**********************************************************************
  132. //
  133. // Math helper functions and macros
  134. //
  135. //**********************************************************************
  136. #define CASTFIX(a) (*((LONG *)&(a)))
  137. #define CASTINT(a) CASTFIX(a)
  138. #define CASTFLOAT(a) (*((__GLfloat *)&(a)))
  139. #define FLT_TO_RGBA(ul, pColor) \
  140. (ul) =\
  141. (((ULONG)(FLT_TO_UCHAR_SCALE(pColor->a, GENACCEL(gc).aAccelPrimScale)) << 24) | \
  142. ((ULONG)(FLT_TO_UCHAR_SCALE(pColor->r, GENACCEL(gc).rAccelPrimScale)) << 16) | \
  143. ((ULONG)(FLT_TO_UCHAR_SCALE(pColor->g, GENACCEL(gc).gAccelPrimScale)) << 8) | \
  144. ((ULONG)(FLT_TO_UCHAR_SCALE(pColor->b, GENACCEL(gc).bAccelPrimScale))))
  145. #define FLT_TO_CINDEX(ul, pColor) \
  146. (ul) =\
  147. ((ULONG)(FLT_TO_UCHAR_SCALE(pColor->r, GENACCEL(gc).rAccelPrimScale)) << 16)
  148. #ifdef _X86_
  149. #pragma warning(disable:4035) // Function doesn't return a value
  150. // Convert float to int 15.16
  151. __inline LONG __fastcall FLT_TO_FIX(
  152. float a)
  153. {
  154. LARGE_INTEGER li;
  155. __asm {
  156. mov eax, a
  157. test eax, 07fffffffh
  158. jz RetZero
  159. add eax, 08000000h
  160. mov a, eax
  161. fld a
  162. fistp li
  163. mov eax, DWORD PTR li
  164. jmp Done
  165. RetZero:
  166. xor eax, eax
  167. Done:
  168. }
  169. }
  170. // Convert float to int 15.16, can cause overflow exceptions
  171. __inline LONG __fastcall UNSAFE_FLT_TO_FIX(
  172. float a)
  173. {
  174. LONG l;
  175. __asm {
  176. mov eax, a
  177. test eax, 07fffffffh
  178. jz RetZero
  179. add eax, 08000000h
  180. mov a, eax
  181. fld a
  182. fistp l
  183. mov eax, l
  184. jmp Done
  185. RetZero:
  186. xor eax, eax
  187. Done:
  188. }
  189. }
  190. // Convert float to int 0.31
  191. __inline LONG __fastcall FLT_FRACTION(
  192. float a)
  193. {
  194. LARGE_INTEGER li;
  195. __asm {
  196. mov eax, a
  197. test eax, 07fffffffh
  198. jz RetZero
  199. add eax, 0f800000h
  200. mov a, eax
  201. fld a
  202. fistp li
  203. mov eax, DWORD PTR li
  204. jmp Done
  205. RetZero:
  206. xor eax, eax
  207. Done:
  208. }
  209. }
  210. // Convert float to int 0.31, can cause overflow exceptions
  211. __inline LONG __fastcall UNSAFE_FLT_FRACTION(
  212. float a)
  213. {
  214. LONG l;
  215. __asm {
  216. mov eax, a
  217. test eax, 07fffffffh
  218. jz RetZero
  219. add eax, 0f800000h
  220. mov a, eax
  221. fld a
  222. fistp l
  223. mov eax, l
  224. jmp Done
  225. RetZero:
  226. xor eax, eax
  227. Done:
  228. }
  229. }
  230. #pragma warning(default:4035) // Function doesn't return a value
  231. // Convert float*scale to int
  232. __inline LONG __fastcall FLT_TO_FIX_SCALE(
  233. float a,
  234. float b)
  235. {
  236. LARGE_INTEGER li;
  237. __asm {
  238. fld a
  239. fmul b
  240. fistp li
  241. }
  242. return li.LowPart;
  243. }
  244. #define FLT_TO_UCHAR_SCALE(value_in, scale) \
  245. ((UCHAR)FLT_TO_FIX_SCALE(value_in, scale))
  246. __inline LONG __fastcall FTOL(
  247. float a)
  248. {
  249. LARGE_INTEGER li;
  250. _asm {
  251. fld a
  252. fistp li
  253. }
  254. return li.LowPart;
  255. }
  256. // Can cause overflow exceptions
  257. __inline LONG __fastcall UNSAFE_FTOL(
  258. float a)
  259. {
  260. LONG l;
  261. _asm {
  262. fld a
  263. fistp l
  264. }
  265. return l;
  266. }
  267. // Requires R-G-B to be FP stack 2-1-0
  268. // Requires gc in edx
  269. #define FLT_STACK_RGB_TO_GC_FIXED(rOffset, gOffset, bOffset) \
  270. __asm fld __glVal65536 \
  271. __asm fmul st(3), st(0) \
  272. __asm fmul st(2), st(0) \
  273. __asm fmulp st(1), st(0) \
  274. __asm fistp DWORD PTR [edx+bOffset] \
  275. __asm fistp DWORD PTR [edx+gOffset] \
  276. __asm fistp DWORD PTR [edx+rOffset]
  277. #define FPU_SAVE_MODE() \
  278. DWORD cwSave; \
  279. DWORD cwTemp; \
  280. \
  281. __asm { \
  282. _asm fnstcw WORD PTR cwSave \
  283. _asm mov eax, cwSave \
  284. _asm mov cwTemp, eax \
  285. }
  286. #define FPU_RESTORE_MODE() \
  287. __asm { \
  288. _asm fldcw WORD PTR cwSave \
  289. }
  290. #define FPU_RESTORE_MODE_NO_EXCEPTIONS()\
  291. __asm { \
  292. _asm fnclex \
  293. _asm fldcw WORD PTR cwSave \
  294. }
  295. #define FPU_CHOP_ON() \
  296. __asm { \
  297. _asm mov eax, cwTemp \
  298. _asm or eax, 0x0c00 \
  299. _asm mov cwTemp, eax \
  300. _asm fldcw WORD PTR cwTemp \
  301. }
  302. #define FPU_ROUND_ON() \
  303. __asm { \
  304. _asm mov eax, cwTemp \
  305. _asm and eax,0xf3ff \
  306. _asm mov cwTemp, eax \
  307. _asm fldcw WORD PTR cwTemp \
  308. }
  309. #define FPU_ROUND_ON_PREC_HI() \
  310. __asm { \
  311. _asm mov eax, cwTemp \
  312. _asm and eax,0xf0ff \
  313. _asm or eax,0x0200 \
  314. _asm mov cwTemp, eax \
  315. _asm fldcw WORD PTR cwTemp \
  316. }
  317. #define FPU_PREC_LOW() \
  318. __asm { \
  319. _asm mov eax, cwTemp \
  320. _asm and eax, 0xfcff \
  321. _asm mov cwTemp, eax \
  322. _asm fldcw WORD PTR cwTemp \
  323. }
  324. #define FPU_PREC_LOW_MASK_EXCEPTIONS() \
  325. __asm { \
  326. _asm mov eax, cwTemp \
  327. _asm and eax, 0xfcff \
  328. _asm or eax, 0x3f \
  329. _asm mov cwTemp, eax \
  330. _asm fldcw WORD PTR cwTemp \
  331. }
  332. #define FPU_CHOP_ON_PREC_LOW() \
  333. __asm { \
  334. _asm mov eax, cwTemp \
  335. _asm or eax, 0x0c00 \
  336. _asm and eax, 0xfcff \
  337. _asm mov cwTemp, eax \
  338. _asm fldcw WORD PTR cwTemp \
  339. }
  340. #define FPU_CHOP_OFF_PREC_HI() \
  341. __asm { \
  342. _asm mov eax, cwTemp \
  343. _asm mov ah, 2 \
  344. _asm mov cwTemp, eax \
  345. _asm fldcw WORD PTR cwTemp \
  346. }
  347. #define CHOP_ROUND_ON()
  348. #define CHOP_ROUND_OFF()
  349. #if DBG
  350. #define ASSERT_CHOP_ROUND() \
  351. { \
  352. WORD cw; \
  353. __asm { \
  354. __asm fnstcw cw \
  355. } \
  356. ASSERTOPENGL((cw & 0xc00) == 0xc00, "Chop round must be on\n"); \
  357. }
  358. #else
  359. #define ASSERT_CHOP_ROUND()
  360. #endif
  361. #else // _X86_
  362. #define FTOL(value) \
  363. ((GLint)(value))
  364. #define UNSAFE_FTOL(value) \
  365. FTOL(value)
  366. #define FLT_TO_FIX_SCALE(value_in, scale) \
  367. ((GLint)((__GLfloat)(value_in) * scale))
  368. #define FLT_TO_UCHAR_SCALE(value_in, scale) \
  369. ((UCHAR)((GLint)((__GLfloat)(value_in) * scale)))
  370. #define FLT_TO_FIX(value_in) \
  371. ((GLint)((__GLfloat)(value_in) * FIX_SCALEFACT))
  372. #define UNSAFE_FLT_TO_FIX(value_in) \
  373. FLT_TO_FIX(value_in)
  374. #define FLT_FRACTION(f) \
  375. FTOL((f) * __glVal2147483648)
  376. #define UNSAFE_FLT_FRACTION(f) \
  377. FLT_FRACTION(f)
  378. #define FPU_SAVE_MODE()
  379. #define FPU_RESTORE_MODE()
  380. #define FPU_RESTORE_MODE_NO_EXCEPTIONS()
  381. #define FPU_CHOP_ON()
  382. #define FPU_ROUND_ON()
  383. #define FPU_ROUND_ON_PREC_HI()
  384. #define FPU_PREC_LOW()
  385. #define FPU_PREC_LOW_MASK_EXCEPTIONS()
  386. #define FPU_CHOP_ON_PREC_LOW()
  387. #define FPU_CHOP_OFF_PREC_HI()
  388. #define CHOP_ROUND_ON()
  389. #define CHOP_ROUND_OFF()
  390. #define ASSERT_CHOP_ROUND()
  391. #endif //_X86_
  392. //**********************************************************************
  393. //
  394. // Fast math routines/macros. These may assume that the FPU is in
  395. // single-precision, truncation mode as defined by the CPU_XXX macros.
  396. //
  397. //**********************************************************************
  398. #ifdef _X86_
  399. __inline float __gl_fast_ceilf(float f)
  400. {
  401. LONG i;
  402. ASSERT_CHOP_ROUND();
  403. i = FTOL(f + ((float)1.0 - (float)FLT_EPSILON));
  404. return (float)i;
  405. }
  406. __inline float __gl_fast_floorf(float f)
  407. {
  408. LONG i;
  409. ASSERT_CHOP_ROUND();
  410. if (__GL_FLOAT_LTZ(f)) {
  411. i = FTOL(f - ((float)1.0 - (float)FLT_EPSILON));
  412. } else {
  413. i = FTOL(f);
  414. }
  415. return (float)i;
  416. }
  417. __inline LONG __gl_fast_floorf_i(float f)
  418. {
  419. ASSERT_CHOP_ROUND();
  420. if (__GL_FLOAT_LTZ(f)) {
  421. return FTOL(f - ((float)1.0 - (float)FLT_EPSILON));
  422. } else {
  423. return FTOL(f);
  424. }
  425. }
  426. #define __GL_FAST_FLOORF_I(f) __gl_fast_floorf_i(f)
  427. #define __GL_FAST_FLOORF(f) __gl_fast_floorf(f)
  428. #define __GL_FAST_CEILF(f) __gl_fast_ceilf(f)
  429. #else
  430. #define __GL_FAST_FLOORF_I(f) ((GLint)floor((double) (f)))
  431. #define __GL_FAST_FLOORF(f) ((__GLfloat)floor((double) (f)))
  432. #define __GL_FAST_CEILF(f) ((__GLfloat)ceil((double) (f)))
  433. #endif
  434. //**********************************************************************
  435. //
  436. // Other various macros:
  437. //
  438. //**********************************************************************
  439. // Z16_SCALE is the same as FIX_SCALEFACT
  440. #define FLT_TO_Z16_SCALE(value) FLT_TO_FIX(value)
  441. /* NOTE: __glzValue better be unsigned */
  442. #define __GL_Z_SIGN_BIT(z) \
  443. ((z) >> (sizeof(__GLzValue) * __GL_BITS_PER_BYTE - 1))
  444. #ifdef NT
  445. #define __GL_STIPPLE_MSB 1
  446. #endif /* NT */
  447. #endif /* __glcpu_h_ */