Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

831 lines
23 KiB

  1. ;---------------------------Module-Header------------------------------;
  2. ; Module Name: math.asm
  3. ;
  4. ; Fast math routines.
  5. ;
  6. ; Created: 11/1/1996
  7. ; Author: Otto Berkes [ottob]
  8. ;
  9. ; Copyright (c) 1996 Microsoft Corporation
  10. ;----------------------------------------------------------------------;
  11. .386
  12. .model small,pascal
  13. assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
  14. assume fs:nothing,gs:nothing
  15. .xlist
  16. include gli386.inc
  17. .list
  18. PROFILE = 0
  19. include profile.inc
  20. ;
  21. ;
  22. ; We're trying to solve:
  23. ;
  24. ; 1/sqrt(x)
  25. ;
  26. ; which in IEEE float is:
  27. ;
  28. ; 1/sqrt(M * 2^[E-127])
  29. ;
  30. ; To simplify, substitute e = [E-127]
  31. ;
  32. ; We can simplify this by pulling a large portion of the exponent out
  33. ; by using only that portion of the exponent divisible by two (so we can
  34. ; pull it out of the sqrt term):
  35. ;
  36. ; 1/sqrt(M * 2^(2*[e div 2]) * 2^[e MOD 2])
  37. ;
  38. ; which is:
  39. ;
  40. ; 1/ (2^[e div 2] * sqrt(M * 2^[e MOD 2]))
  41. ;
  42. ; or
  43. ;
  44. ; (2^[e div 2])^(-1) * 1/sqrt(M * 2^[e MOD 2])
  45. ;
  46. ; =
  47. ; 2^-[e div 2] * 1/sqrt(M * 2^[e MOD 2])
  48. ;
  49. ; substitute back for e = [E-127]:
  50. ;
  51. ; 2^-[(E - 127) div 2] * 1/sqrt(M * 2^[(E - 127) MOD 2])
  52. ;
  53. ; =
  54. ; 2^-[(E div 2) - 63] * 1/sqrt(M * 2^[(E - 1) MOD 2])
  55. ;
  56. ; =
  57. ; 2^[63 - (E div 2)] * 1/sqrt(M * 2^[(E - 1) MOD 2])
  58. ;
  59. ; As a floating-point number, 2^[63 - (E div 2)] is just the exponent value:
  60. ;
  61. ; [63 - (E div 2)] + 127
  62. ;
  63. ; or
  64. ; [(127+63) - (E div 2)]
  65. ;
  66. ; Remembering to account for the implicit '1' im the mantissa of IEEE floating-
  67. ; point numbers, the range of (M * 2^[(E - 1) MOD 2]) is 0.800000 to
  68. ; 0.ffffff*2, which is the interval [0.5, 2.0). We can use the fact that this
  69. ; is a relatively small range, and therefore can use a table lookup near the
  70. ; actual value. The table will contain values for the piece-wise approximation
  71. ; to the curve 1/sqrt(M * 2^[(E - 1) MOD 2]) using an acceptable interval.
  72. ; These values can then be used to approximate the desired inverse square root
  73. ; value. At this point, all that remains is to apply the correct exponent
  74. ; for the number, which is simply [(127+63) - (E div 2)] from the above
  75. ; equations.
  76. ;
  77. ; To do the piecewise-linear approximation, we can store a table of values at
  78. ; the appropriate intervals, and the deltas between them. However, this
  79. ; will require calculating the difference between the interval value and
  80. ; x. We can do a bit better by using slope-intercept (y = mx + b)m so the
  81. ; table will store (m, b).
  82. ;
  83. ; With a 512-entry table, we will get at least 16 bits of precision. This
  84. ; result was obtined using simulations.
  85. .data
  86. ; The following 'C' code generates the table below:
  87. ;#define SQRT_TAB_SIZE_LOG2 9 // 512-entry table
  88. ;
  89. ;#define MANTISSA_SIZE 24
  90. ;#define SQRT_TAB_SIZE (1 << SQRT_TAB_SIZE_LOG2)
  91. ;#define SQRT_INC (1 << (MANTISSA_SIZE - SQRT_TAB_SIZE_LOG2))
  92. ;#define CASTFIX(a) (*((LONG *)&(a)))
  93. ;
  94. ;void genTable()
  95. ;{
  96. ; int i;
  97. ; float x;
  98. ; float xNext;
  99. ; float y;
  100. ; float yNext;
  101. ; float xInterval;
  102. ;
  103. ; // We will start our table with the case where the exponent is even.
  104. ;
  105. ; CASTFIX(x) = 0x3f000000;
  106. ;
  107. ; // We will use the current and next values to generate the piece-wise
  108. ; // data for the curve. The interval between 'current' and 'next' is
  109. ; // based on the smallest change possible in the floating-point value
  110. ; // that also represents a difference of one table-lookup entry.
  111. ;
  112. ; // When we switch to the odd-exponent case (at 1.0), we have to adjust
  113. ; // for the fact that effective interval between successive values
  114. ; / is doubled.
  115. ;
  116. ; CASTFIX(xNext) = CASTFIX(x) + SQRT_INC;
  117. ; y = (float)1.0 / sqrt((double)x);
  118. ;
  119. ; // Calculate 1.0 / (piece-wise approximation interval).
  120. ;
  121. ; xInterval = xNext - x;
  122. ;
  123. ; xInterval = (float)1.0 / xInterval;
  124. ;
  125. ; // Now, generate the table:
  126. ;
  127. ; for (i = 0; i < SQRT_TAB_SIZE; i++) {
  128. ; float m;
  129. ; float b;
  130. ;
  131. ; // We increment our floating-point values using integer operations
  132. ; // to ensure accuracy:
  133. ;
  134. ; CASTFIX(xNext) = CASTFIX(x) + SQRT_INC;
  135. ;
  136. ; // Find next point on curve:
  137. ;
  138. ; yNext = (float)1.0 / sqrt((double)xNext);
  139. ;
  140. ; // Test for odd-exponent case:
  141. ;
  142. ; if (CASTFIX(x) == 0x3f800000)
  143. ; xInterval *= (float)0.5;
  144. ;
  145. ; m = (yNext - y) * xInterval;
  146. ; b = y - (m * x);
  147. ;
  148. ; printf("\t\tdd\t0%8xh, 0%8xh\n", CASTFIX(m), CASTFIX(b));
  149. ;
  150. ; y = yNext;
  151. ; x = xNext;
  152. ; }
  153. ;}
  154. invSqrtTab dd 0bfb47e00h, 04007a1fah
  155. dd 0bfb37000h, 040075e36h
  156. dd 0bfb26600h, 040071b31h
  157. dd 0bfb16000h, 04006d8ech
  158. dd 0bfb05800h, 0400695e4h
  159. dd 0bfaf5800h, 0400654a4h
  160. dd 0bfae5600h, 0400612a2h
  161. dd 0bfad5800h, 04005d165h
  162. dd 0bfac5e00h, 0400590f1h
  163. dd 0bfab6400h, 04005503eh
  164. dd 0bfaa6e00h, 040051058h
  165. dd 0bfa97800h, 04004d033h
  166. dd 0bfa88800h, 040049163h
  167. dd 0bfa79600h, 0400451d0h
  168. dd 0bfa6aa00h, 040041396h
  169. dd 0bfa5be00h, 04003d522h
  170. dd 0bfa4d400h, 0400396fah
  171. dd 0bfa3ee00h, 0400359a8h
  172. dd 0bfa30800h, 040031c1dh
  173. dd 0bfa22400h, 04002dee2h
  174. dd 0bfa14400h, 04002a282h
  175. dd 0bfa06600h, 040026674h
  176. dd 0bf9f8800h, 040022a30h
  177. dd 0bf9eae00h, 04001eecah
  178. dd 0bf9dd400h, 04001b32eh
  179. dd 0bf9cfc00h, 0400177e8h
  180. dd 0bf9c2800h, 040013d86h
  181. dd 0bf9b5400h, 0400102efh
  182. dd 0bf9a8400h, 04000c93fh
  183. dd 0bf99b400h, 040008f5bh
  184. dd 0bf98e600h, 0400055d2h
  185. dd 0bf981800h, 040001c16h
  186. dd 0bf975000h, 03fffc7abh
  187. dd 0bf968600h, 03fff55a6h
  188. dd 0bf95c000h, 03ffee580h
  189. dd 0bf94fc00h, 03ffe761ah
  190. dd 0bf943800h, 03ffe0652h
  191. dd 0bf937400h, 03ffd9628h
  192. dd 0bf92b600h, 03ffd290eh
  193. dd 0bf91f800h, 03ffcbb95h
  194. dd 0bf913a00h, 03ffc4dbdh
  195. dd 0bf907e00h, 03ffbe0afh
  196. dd 0bf8fc600h, 03ffb7597h
  197. dd 0bf8f0c00h, 03ffb08f8h
  198. dd 0bf8e5800h, 03ffa9f80h
  199. dd 0bf8da000h, 03ffa3354h
  200. dd 0bf8cee00h, 03ff9ca56h
  201. dd 0bf8c3c00h, 03ff960ffh
  202. dd 0bf8b8a00h, 03ff8f74fh
  203. dd 0bf8adc00h, 03ff88fa8h
  204. dd 0bf8a2e00h, 03ff827aah
  205. dd 0bf898000h, 03ff7bf55h
  206. dd 0bf88d600h, 03ff75911h
  207. dd 0bf882e00h, 03ff6f3adh
  208. dd 0bf878400h, 03ff68cbfh
  209. dd 0bf86de00h, 03ff627eah
  210. dd 0bf863600h, 03ff5c18ah
  211. dd 0bf859400h, 03ff55e81h
  212. dd 0bf84f000h, 03ff4f9edh
  213. dd 0bf845000h, 03ff4977dh
  214. dd 0bf83ae00h, 03ff43381h
  215. dd 0bf831000h, 03ff3d1aeh
  216. dd 0bf827200h, 03ff36f8ch
  217. dd 0bf81d400h, 03ff30d1bh
  218. dd 0bf813a00h, 03ff2acdbh
  219. dd 0bf809e00h, 03ff24b0dh
  220. dd 0bf800600h, 03ff1eb75h
  221. dd 0bf7edc00h, 03ff18b91h
  222. dd 0bf7db000h, 03ff12ca5h
  223. dd 0bf7c8400h, 03ff0cd6eh
  224. dd 0bf7b5c00h, 03ff06f32h
  225. dd 0bf7a3400h, 03ff010ach
  226. dd 0bf791000h, 03fefb324h
  227. dd 0bf77f000h, 03fef569ch
  228. dd 0bf76d000h, 03feef9cch
  229. dd 0bf75b000h, 03fee9cb4h
  230. dd 0bf749400h, 03fee40a0h
  231. dd 0bf737c00h, 03fede592h
  232. dd 0bf726800h, 03fed8b8ch
  233. dd 0bf714c00h, 03fed2ea3h
  234. dd 0bf704000h, 03fecd6b3h
  235. dd 0bf6f2800h, 03fec7a8dh
  236. dd 0bf6e1c00h, 03fec2217h
  237. dd 0bf6d1000h, 03febc95eh
  238. dd 0bf6c0400h, 03feb7062h
  239. dd 0bf6afc00h, 03feb1878h
  240. dd 0bf69f400h, 03feac04ch
  241. dd 0bf68ec00h, 03fea67deh
  242. dd 0bf67ec00h, 03fea11deh
  243. dd 0bf66e800h, 03fe9ba45h
  244. dd 0bf65e800h, 03fe963c5h
  245. dd 0bf64ec00h, 03fe90e60h
  246. dd 0bf63f000h, 03fe8b8bch
  247. dd 0bf62f400h, 03fe862d9h
  248. dd 0bf620000h, 03fe80f73h
  249. dd 0bf610400h, 03fe7b912h
  250. dd 0bf601000h, 03fe76532h
  251. dd 0bf5f2000h, 03fe71276h
  252. dd 0bf5e2c00h, 03fe6be1ch
  253. dd 0bf5d3c00h, 03fe66ae8h
  254. dd 0bf5c5000h, 03fe618dch
  255. dd 0bf5b6000h, 03fe5c530h
  256. dd 0bf5a7800h, 03fe57414h
  257. dd 0bf598c00h, 03fe52157h
  258. dd 0bf58a800h, 03fe4d12fh
  259. dd 0bf57c000h, 03fe47f65h
  260. dd 0bf56dc00h, 03fe42ecbh
  261. dd 0bf55f800h, 03fe3ddf8h
  262. dd 0bf551800h, 03fe38e58h
  263. dd 0bf543800h, 03fe33e80h
  264. dd 0bf535c00h, 03fe2efdeh
  265. dd 0bf527c00h, 03fe29f96h
  266. dd 0bf51a000h, 03fe25086h
  267. dd 0bf50c800h, 03fe202b0h
  268. dd 0bf4ff000h, 03fe1b4a4h
  269. dd 0bf4f1c00h, 03fe167d5h
  270. dd 0bf4e4400h, 03fe1195dh
  271. dd 0bf4d7000h, 03fe0cc24h
  272. dd 0bf4c9c00h, 03fe07eb6h
  273. dd 0bf4bcc00h, 03fe0328ah
  274. dd 0bf4afc00h, 03fdfe62ah
  275. dd 0bf4a3000h, 03fdf9b0fh
  276. dd 0bf496000h, 03fdf4e47h
  277. dd 0bf489800h, 03fdf0441h
  278. dd 0bf47c800h, 03fdeb711h
  279. dd 0bf470400h, 03fde6e24h
  280. dd 0bf463c00h, 03fde2388h
  281. dd 0bf457400h, 03fddd8bah
  282. dd 0bf44b000h, 03fdd8f3ah
  283. dd 0bf43ec00h, 03fdd4589h
  284. dd 0bf432800h, 03fdcfba7h
  285. dd 0bf426800h, 03fdcb317h
  286. dd 0bf41a800h, 03fdc6a57h
  287. dd 0bf40e800h, 03fdc2167h
  288. dd 0bf402c00h, 03fdbd9cdh
  289. dd 0bf3f6c00h, 03fdb907dh
  290. dd 0bf3eb400h, 03fdb4a0dh
  291. dd 0bf3dfc00h, 03fdb036fh
  292. dd 0bf3d4000h, 03fdabb19h
  293. dd 0bf3c8800h, 03fda741fh
  294. dd 0bf3bd400h, 03fda2e83h
  295. dd 0bf3b2000h, 03fd9e8bah
  296. dd 0bf3a6800h, 03fd9a136h
  297. dd 0bf39b400h, 03fd95b13h
  298. dd 0bf390800h, 03fd917e3h
  299. dd 0bf385000h, 03fd8cfd5h
  300. dd 0bf37a400h, 03fd88c4fh
  301. dd 0bf36f800h, 03fd8489eh
  302. dd 0bf364400h, 03fd8019ah
  303. dd 0bf359c00h, 03fd7bf28h
  304. dd 0bf34f000h, 03fd77af6h
  305. dd 0bf344400h, 03fd73699h
  306. dd 0bf339c00h, 03fd6f3a9h
  307. dd 0bf32f400h, 03fd6b08fh
  308. dd 0bf324c00h, 03fd66d4bh
  309. dd 0bf31a800h, 03fd62b78h
  310. dd 0bf310000h, 03fd5e7e0h
  311. dd 0bf305c00h, 03fd5a5bbh
  312. dd 0bf2fb800h, 03fd5636dh
  313. dd 0bf2f1800h, 03fd52295h
  314. dd 0bf2e7400h, 03fd4dff5h
  315. dd 0bf2dd800h, 03fd4a06eh
  316. dd 0bf2d3400h, 03fd45d7ch
  317. dd 0bf2c9800h, 03fd41da7h
  318. dd 0bf2bf800h, 03fd3dc07h
  319. dd 0bf2b6000h, 03fd39d89h
  320. dd 0bf2ac000h, 03fd35b99h
  321. dd 0bf2a2800h, 03fd31ccfh
  322. dd 0bf298c00h, 03fd2dc37h
  323. dd 0bf28f400h, 03fd29d21h
  324. dd 0bf285c00h, 03fd25de5h
  325. dd 0bf27c400h, 03fd21e83h
  326. dd 0bf273000h, 03fd1e0a7h
  327. dd 0bf269800h, 03fd1a0f9h
  328. dd 0bf260400h, 03fd162d3h
  329. dd 0bf257000h, 03fd12488h
  330. dd 0bf24e000h, 03fd0e7c8h
  331. dd 0bf244c00h, 03fd0a933h
  332. dd 0bf23bc00h, 03fd06c2bh
  333. dd 0bf232800h, 03fd02d4ch
  334. dd 0bf229c00h, 03fcff1b0h
  335. dd 0bf220c00h, 03fcfb43ch
  336. dd 0bf218000h, 03fcf785ah
  337. dd 0bf20f400h, 03fcf3c55h
  338. dd 0bf206400h, 03fcefe75h
  339. dd 0bf1fdc00h, 03fcec3e3h
  340. dd 0bf1f4c00h, 03fce85bbh
  341. dd 0bf1ec800h, 03fce4ca0h
  342. dd 0bf1e3c00h, 03fce0fech
  343. dd 0bf1db400h, 03fcdd4d2h
  344. dd 0bf1d2c00h, 03fcd9996h
  345. dd 0bf1ca800h, 03fcd5ff7h
  346. dd 0bf1c2000h, 03fcd2477h
  347. dd 0bf1b9800h, 03fcce8d5h
  348. dd 0bf1b1800h, 03fccb095h
  349. dd 0bf1a9400h, 03fcc7672h
  350. dd 0bf1a0c00h, 03fcc3a6ah
  351. dd 0bf199000h, 03fcc038fh
  352. dd 0bf190800h, 03fcbc743h
  353. dd 0bf188c00h, 03fcb902ah
  354. dd 0bf180800h, 03fcb5562h
  355. dd 0bf178c00h, 03fcb1e0bh
  356. dd 0bf170c00h, 03fcae4cbh
  357. dd 0bf168c00h, 03fcaab6bh
  358. dd 0bf161000h, 03fca73b7h
  359. dd 0bf159400h, 03fca3be4h
  360. dd 0bf151800h, 03fca03f2h
  361. dd 0bf149800h, 03fc9ca12h
  362. dd 0bf142400h, 03fc99582h
  363. dd 0bf13a400h, 03fc95b62h
  364. dd 0bf133000h, 03fc92698h
  365. dd 0bf12b400h, 03fc8ee0bh
  366. dd 0bf123c00h, 03fc8b733h
  367. dd 0bf11c400h, 03fc8803dh
  368. dd 0bf114c00h, 03fc84929h
  369. dd 0bf10d800h, 03fc813ceh
  370. dd 0bf106400h, 03fc7de56h
  371. dd 0bf0fec00h, 03fc7a6e8h
  372. dd 0bf0f7800h, 03fc77136h
  373. dd 0bf0f0400h, 03fc73b67h
  374. dd 0bf0e9000h, 03fc7057bh
  375. dd 0bf0e2000h, 03fc6d14fh
  376. dd 0bf0dac00h, 03fc69b29h
  377. dd 0bf0d3c00h, 03fc666c5h
  378. dd 0bf0ccc00h, 03fc63245h
  379. dd 0bf0c5800h, 03fc5fbc8h
  380. dd 0bf0bec00h, 03fc5c8f2h
  381. dd 0bf0b7c00h, 03fc5941eh
  382. dd 0bf0b0c00h, 03fc55f2eh
  383. dd 0bf0aa000h, 03fc52c07h
  384. dd 0bf0a3000h, 03fc4f6dfh
  385. dd 0bf09c400h, 03fc4c382h
  386. dd 0bf095c00h, 03fc491f2h
  387. dd 0bf08ec00h, 03fc45c76h
  388. dd 0bf088000h, 03fc428c8h
  389. dd 0bf081800h, 03fc3f6eah
  390. dd 0bf07b000h, 03fc3c4f2h
  391. dd 0bf074000h, 03fc38f06h
  392. dd 0bf06dc00h, 03fc35ec8h
  393. dd 0bf067400h, 03fc32c82h
  394. dd 0bf060800h, 03fc2f832h
  395. dd 0bf05a400h, 03fc2c7a9h
  396. dd 0bf053c00h, 03fc29515h
  397. dd 0bf04d800h, 03fc2645ah
  398. dd 0bf047000h, 03fc23192h
  399. dd 0bf040800h, 03fc1feb0h
  400. dd 0bf03a800h, 03fc1cfa0h
  401. dd 0bf034000h, 03fc19c8ah
  402. dd 0bf02dc00h, 03fc16b52h
  403. dd 0bf027c00h, 03fc13bfah
  404. dd 0bf021800h, 03fc10a90h
  405. dd 0bf01b400h, 03fc0d90dh
  406. dd 0bf015000h, 03fc0a771h
  407. dd 0bf00f400h, 03fc079b6h
  408. dd 0bf009000h, 03fc047e8h
  409. dd 0bf003000h, 03fc01800h
  410. dd 0beff4000h, 03fbfd000h
  411. dd 0befdc400h, 03fbf70a1h
  412. dd 0befc4c00h, 03fbf11e5h
  413. dd 0befad800h, 03fbeb3ceh
  414. dd 0bef96400h, 03fbe555ah
  415. dd 0bef7f800h, 03fbdf893h
  416. dd 0bef68e00h, 03fbd9bf4h
  417. dd 0bef52600h, 03fbd3f7eh
  418. dd 0bef3c200h, 03fbce3b6h
  419. dd 0bef26200h, 03fbc889eh
  420. dd 0bef10600h, 03fbc2e38h
  421. dd 0beefac00h, 03fbbd400h
  422. dd 0beee5400h, 03fbb79f8h
  423. dd 0beed0200h, 03fbb212eh
  424. dd 0beebb200h, 03fbac896h
  425. dd 0beea6600h, 03fba70b9h
  426. dd 0bee91a00h, 03fba1889h
  427. dd 0bee7d400h, 03fb9c1a0h
  428. dd 0bee69000h, 03fb96aeeh
  429. dd 0bee54e00h, 03fb91474h
  430. dd 0bee41200h, 03fb8bf48h
  431. dd 0bee2d400h, 03fb86942h
  432. dd 0bee19e00h, 03fb8151ah
  433. dd 0bee06600h, 03fb7c018h
  434. dd 0bedf3400h, 03fb76c6ch
  435. dd 0bede0400h, 03fb71900h
  436. dd 0bedcd600h, 03fb6c5d4h
  437. dd 0bedbac00h, 03fb67379h
  438. dd 0beda8400h, 03fb62161h
  439. dd 0bed95e00h, 03fb5cf8eh
  440. dd 0bed83a00h, 03fb57e00h
  441. dd 0bed71a00h, 03fb52d48h
  442. dd 0bed5fc00h, 03fb4dcd8h
  443. dd 0bed4e000h, 03fb48cb0h
  444. dd 0bed3c800h, 03fb43d64h
  445. dd 0bed2b000h, 03fb3edd2h
  446. dd 0bed19c00h, 03fb39f1eh
  447. dd 0bed08a00h, 03fb350b8h
  448. dd 0becf7c00h, 03fb30333h
  449. dd 0bece6c00h, 03fb2b4d7h
  450. dd 0becd6200h, 03fb267f3h
  451. dd 0becc5a00h, 03fb21b61h
  452. dd 0becb5200h, 03fb1ce8dh
  453. dd 0beca4e00h, 03fb182a2h
  454. dd 0bec94c00h, 03fb1370ch
  455. dd 0bec84a00h, 03fb0eb36h
  456. dd 0bec74e00h, 03fb0a0e4h
  457. dd 0bec65200h, 03fb05652h
  458. dd 0bec55800h, 03fb00c1ah
  459. dd 0bec45e00h, 03fafc1a4h
  460. dd 0bec36a00h, 03faf78bah
  461. dd 0bec27600h, 03faf2f93h
  462. dd 0bec18400h, 03faee6c9h
  463. dd 0bec09600h, 03fae9ef8h
  464. dd 0bebfa600h, 03fae5650h
  465. dd 0bebeba00h, 03fae0ea2h
  466. dd 0bebdd000h, 03fadc756h
  467. dd 0bebce800h, 03fad806ch
  468. dd 0bebc0000h, 03fad3948h
  469. dd 0bebb1e00h, 03facf3c3h
  470. dd 0beba3a00h, 03facad67h
  471. dd 0beb95800h, 03fac6770h
  472. dd 0beb87a00h, 03fac2280h
  473. dd 0beb79c00h, 03fabdd57h
  474. dd 0beb6c000h, 03fab9897h
  475. dd 0beb5e600h, 03fab5440h
  476. dd 0beb50e00h, 03fab1054h
  477. dd 0beb43600h, 03faacc32h
  478. dd 0beb36200h, 03faa891eh
  479. dd 0beb28e00h, 03faa45d6h
  480. dd 0beb1bc00h, 03faa02fah
  481. dd 0beb0ec00h, 03fa9c08eh
  482. dd 0beb01e00h, 03fa97e92h
  483. dd 0beaf5000h, 03fa93c63h
  484. dd 0beae8600h, 03fa8fb4ah
  485. dd 0beadba00h, 03fa8b959h
  486. dd 0beacf400h, 03fa87927h
  487. dd 0beac2a00h, 03fa83776h
  488. dd 0beab6600h, 03fa7f788h
  489. dd 0beaaa200h, 03fa7b76ah
  490. dd 0bea9e000h, 03fa777c2h
  491. dd 0bea91e00h, 03fa737e9h
  492. dd 0bea85e00h, 03fa6f889h
  493. dd 0bea7a000h, 03fa6b9a2h
  494. dd 0bea6e400h, 03fa67b36h
  495. dd 0bea62800h, 03fa63c9ch
  496. dd 0bea56e00h, 03fa5fe7ch
  497. dd 0bea4b400h, 03fa5c02fh
  498. dd 0bea3fe00h, 03fa5830bh
  499. dd 0bea34600h, 03fa5450dh
  500. dd 0bea29400h, 03fa508e8h
  501. dd 0bea1de00h, 03fa4cb3ch
  502. dd 0bea12c00h, 03fa48ebeh
  503. dd 0bea07c00h, 03fa452c2h
  504. dd 0be9fcc00h, 03fa4169ah
  505. dd 0be9f1e00h, 03fa3daf5h
  506. dd 0be9e7000h, 03fa39f25h
  507. dd 0be9dc400h, 03fa363dah
  508. dd 0be9d1a00h, 03fa32915h
  509. dd 0be9c7000h, 03fa2ee26h
  510. dd 0be9bc800h, 03fa2b3beh
  511. dd 0be9b2000h, 03fa2792ch
  512. dd 0be9a7a00h, 03fa23f22h
  513. dd 0be99d600h, 03fa205a4h
  514. dd 0be993200h, 03fa1cbfch
  515. dd 0be989000h, 03fa192dfh
  516. dd 0be97ec00h, 03fa158e5h
  517. dd 0be974e00h, 03fa120e2h
  518. dd 0be96ae00h, 03fa0e802h
  519. dd 0be961000h, 03fa0afb1h
  520. dd 0be957200h, 03fa07738h
  521. dd 0be94d800h, 03fa04006h
  522. dd 0be943a00h, 03fa0073eh
  523. dd 0be93a200h, 03f9fd078h
  524. dd 0be930a00h, 03f9f998ch
  525. dd 0be927000h, 03f9f61c1h
  526. dd 0be91da00h, 03f9f2b43h
  527. dd 0be914400h, 03f9ef4a0h
  528. dd 0be90b000h, 03f9ebe92h
  529. dd 0be901a00h, 03f9e87a3h
  530. dd 0be8f8a00h, 03f9e52c3h
  531. dd 0be8ef600h, 03f9e1c46h
  532. dd 0be8e6600h, 03f9de71eh
  533. dd 0be8dd600h, 03f9db1d2h
  534. dd 0be8d4600h, 03f9d7c62h
  535. dd 0be8cb800h, 03f9d478ch
  536. dd 0be8c2c00h, 03f9d1352h
  537. dd 0be8b9e00h, 03f9cde36h
  538. dd 0be8b1400h, 03f9caa76h
  539. dd 0be8a8a00h, 03f9c7694h
  540. dd 0be8a0000h, 03f9c428eh
  541. dd 0be897600h, 03f9c0e67h
  542. dd 0be88f000h, 03f9bdba1h
  543. dd 0be886800h, 03f9ba7f7h
  544. dd 0be87e200h, 03f9b74eeh
  545. dd 0be875e00h, 03f9b4287h
  546. dd 0be86d800h, 03f9b0f3bh
  547. dd 0be865600h, 03f9add56h
  548. dd 0be85d200h, 03f9aaa8ch
  549. dd 0be855200h, 03f9a792ch
  550. dd 0be84d000h, 03f9a46e6h
  551. dd 0be844e00h, 03f9a1480h
  552. dd 0be83d000h, 03f99e387h
  553. dd 0be835200h, 03f99b26eh
  554. dd 0be82d400h, 03f998136h
  555. dd 0be825600h, 03f994fdfh
  556. dd 0be81da00h, 03f991f31h
  557. dd 0be816000h, 03f98ef2eh
  558. dd 0be80e400h, 03f98be42h
  559. dd 0be806a00h, 03f988e01h
  560. dd 0be7fe000h, 03f985da2h
  561. dd 0be7ef400h, 03f982ebch
  562. dd 0be7e0000h, 03f97fe20h
  563. dd 0be7d1400h, 03f97cefeh
  564. dd 0be7c2400h, 03f979ef2h
  565. dd 0be7b3c00h, 03f977063h
  566. dd 0be7a5400h, 03f9741b7h
  567. dd 0be796800h, 03f971220h
  568. dd 0be788400h, 03f96e408h
  569. dd 0be779c00h, 03f96b506h
  570. dd 0be76b800h, 03f9686b6h
  571. dd 0be75d800h, 03f96591ah
  572. dd 0be74f400h, 03f962a90h
  573. dd 0be741400h, 03f95fcbch
  574. dd 0be733400h, 03f95cecch
  575. dd 0be725800h, 03f95a193h
  576. dd 0be717c00h, 03f95743eh
  577. dd 0be70a400h, 03f9547a1h
  578. dd 0be6fc800h, 03f951a15h
  579. dd 0be6ef000h, 03f94ed42h
  580. dd 0be6e1800h, 03f94c054h
  581. dd 0be6d4000h, 03f94934bh
  582. dd 0be6c7000h, 03f9467d3h
  583. dd 0be6b9c00h, 03f943b6ah
  584. dd 0be6ac800h, 03f940ee8h
  585. dd 0be69f800h, 03f93e322h
  586. dd 0be692800h, 03f93b742h
  587. dd 0be685c00h, 03f938c20h
  588. dd 0be678c00h, 03f93600ch
  589. dd 0be66c000h, 03f9334b8h
  590. dd 0be65f800h, 03f930a24h
  591. dd 0be652c00h, 03f92de9ch
  592. dd 0be646400h, 03f92b3d6h
  593. dd 0be639c00h, 03f9288f7h
  594. dd 0be62d400h, 03f925dffh
  595. dd 0be621000h, 03f9233cah
  596. dd 0be615000h, 03f920a5ah
  597. dd 0be608800h, 03f91df18h
  598. dd 0be5fc800h, 03f91b578h
  599. dd 0be5f0800h, 03f918bc0h
  600. dd 0be5e4800h, 03f9161f0h
  601. dd 0be5d8800h, 03f913808h
  602. dd 0be5ccc00h, 03f910ee8h
  603. dd 0be5c0c00h, 03f90e4d0h
  604. dd 0be5b5400h, 03f90bc62h
  605. dd 0be5a9800h, 03f9092fbh
  606. dd 0be59e000h, 03f906a5fh
  607. dd 0be592800h, 03f9041ach
  608. dd 0be587000h, 03f9018e2h
  609. dd 0be57b800h, 03f8ff001h
  610. dd 0be570400h, 03f8fc7edh
  611. dd 0be565000h, 03f8f9fc2h
  612. dd 0be559c00h, 03f8f7782h
  613. dd 0be54e800h, 03f8f4f2ah
  614. dd 0be543800h, 03f8f27a2h
  615. dd 0be538800h, 03f8f0004h
  616. dd 0be52d800h, 03f8ed850h
  617. dd 0be522c00h, 03f8eb16eh
  618. dd 0be517c00h, 03f8e898eh
  619. dd 0be50d000h, 03f8e6280h
  620. dd 0be502400h, 03f8e3b5dh
  621. dd 0be4f7800h, 03f8e1424h
  622. dd 0be4ecc00h, 03f8decd6h
  623. dd 0be4e2800h, 03f8dc748h
  624. dd 0be4d7c00h, 03f8d9fcfh
  625. dd 0be4cd800h, 03f8d7a18h
  626. dd 0be4c3000h, 03f8d5360h
  627. dd 0be4b8800h, 03f8d2c92h
  628. dd 0be4ae800h, 03f8d078ah
  629. dd 0be4a4000h, 03f8ce094h
  630. dd 0be49a000h, 03f8cbb64h
  631. dd 0be48fc00h, 03f8c9531h
  632. dd 0be485c00h, 03f8c6fd9h
  633. dd 0be47bc00h, 03f8c4a6dh
  634. dd 0be471c00h, 03f8c24edh
  635. dd 0be467c00h, 03f8bff59h
  636. dd 0be45e000h, 03f8bdaa2h
  637. dd 0be454000h, 03f8bb4e6h
  638. dd 0be44a800h, 03f8b90fah
  639. dd 0be440800h, 03f8b6b16h
  640. dd 0be437000h, 03f8b4704h
  641. dd 0be42d800h, 03f8b22dfh
  642. dd 0be423c00h, 03f8afdb3h
  643. dd 0be41a400h, 03f8ad968h
  644. dd 0be410c00h, 03f8ab50ah
  645. dd 0be407800h, 03f8a918eh
  646. dd 0be3fe000h, 03f8a6d0ah
  647. dd 0be3f4c00h, 03f8a496ah
  648. dd 0be3eb400h, 03f8a24c0h
  649. dd 0be3e2400h, 03f8a01f2h
  650. dd 0be3d9000h, 03f89de1ah
  651. dd 0be3d0000h, 03f89bb28h
  652. dd 0be3c6c00h, 03f89972bh
  653. dd 0be3bd800h, 03f89731ch
  654. dd 0be3b4c00h, 03f8950eeh
  655. dd 0be3abc00h, 03f892db4h
  656. dd 0be3a3000h, 03f890b62h
  657. dd 0be399c00h, 03f88e709h
  658. dd 0be391400h, 03f88c591h
  659. dd 0be388400h, 03f88a20fh
  660. dd 0be37fc00h, 03f888075h
  661. dd 0be377000h, 03f885dcch
  662. dd 0be36e400h, 03f883b12h
  663. dd 0be365800h, 03f881847h
  664. dd 0be35d400h, 03f87f768h
  665. dd 0be354800h, 03f87d47ah
  666. .code
  667. SQRT_TAB_LOG2 equ 9 ;; log2 of the lookup-table
  668. MANTISSA_SIZE equ 24 ;; number if mantissa bits in fp value
  669. ;; number of represented mantissa bits
  670. ;; (one less than total due to hidden
  671. ;; leading one).
  672. MANTISSA_BITS equ (MANTISSA_SIZE - 1)
  673. ELEMENT_SIZE_LOG2 equ 3 ;; log2 of each table entry (8 bytes)
  674. ;; shift required to get bits in value
  675. ;; in the correct place to use as an
  676. ;; index for the table lookup
  677. EXPONENT_SHIFT equ (MANTISSA_BITS - (SQRT_TAB_LOG2 - 1)\
  678. - ELEMENT_SIZE_LOG2)
  679. ;; mask value for clamping to [.5..2)
  680. CLAMP_MASK equ ((1 SHL (MANTISSA_BITS+1)) - 1)
  681. ;; mask for sign/exponent bits
  682. MANTISSA_MASK equ ((1 SHL MANTISSA_BITS) - 1)
  683. ;; mask for sign/exponent bits
  684. EXPONENT_MASK equ (-1 AND (NOT MANTISSA_MASK))
  685. ;; mask for table lookup
  686. TABLE_MASK equ ((1 SHL (SQRT_TAB_LOG2 + ELEMENT_SIZE_LOG2)) - 1) \
  687. AND (NOT((1 SHL ELEMENT_SIZE_LOG2) - 1))
  688. ;; bias used to represent clamped value
  689. EXPONENT_BIAS_EVEN equ 3f000000h
  690. ;; bias value used for final exponent
  691. ;; computation
  692. LARGE_EXPONENT_BIAS equ (((127 + 127/2) SHL (MANTISSA_BITS+1)) OR CLAMP_MASK)
  693. x equ DWORD PTR 8[ebp]
  694. num equ DWORD PTR -8[ebp]
  695. @__FastInvSqrt@4 PROC NEAR
  696. push ebp
  697. mov ebp, esp
  698. sub esp, 8
  699. push ecx
  700. mov eax, x
  701. mov ecx, eax
  702. shr ecx, EXPONENT_SHIFT ;; ecx is table index (8 frac. bits)
  703. and eax, CLAMP_MASK ;; clamp number to [0.5, 2.0]
  704. and ecx, TABLE_MASK ;; (8 bytes)/(table entry)
  705. or eax, EXPONENT_BIAS_EVEN ;; re-adjust exponent for clamped number
  706. mov num, eax
  707. fld num
  708. fmul [invSqrtTab+ecx] ;; find mx
  709. mov eax, LARGE_EXPONENT_BIAS;; (127+63)<<23 to re-adjust exponent
  710. sub eax, x ;; divide exponent by 2
  711. fadd [invSqrtTab+ecx+4] ;; get mx + b
  712. shr eax, 1
  713. and eax, EXPONENT_MASK ;; mask exponent
  714. mov num, eax
  715. fmul num ;; now adjust for exponent
  716. pop ecx
  717. mov esp, ebp
  718. pop ebp
  719. ret 4
  720. @__FastInvSqrt@4 endp
  721. len equ DWORD PTR -4[ebp]
  722. num equ DWORD PTR -8[ebp]
  723. @__glNormalize@8 PROC NEAR
  724. push ebp
  725. mov ebp, esp
  726. sub esp, 8
  727. fld DWORD PTR [edx]
  728. fmul DWORD PTR [edx] ;; x
  729. fld DWORD PTR [edx+4]
  730. fmul DWORD PTR [edx+4] ;; y x
  731. fld DWORD PTR [edx+8]
  732. fmul DWORD PTR [edx+8] ;; z y x
  733. fxch ST(2) ;; x y z
  734. faddp ST(1), ST ;; xy z
  735. faddp ST(1), ST ;; xyz
  736. fstp len
  737. mov eax, len
  738. test eax, eax
  739. jne notZeroLen
  740. mov [ecx], eax
  741. mov [ecx+4], eax
  742. mov [ecx+8], eax
  743. mov esp, ebp
  744. pop ebp
  745. ret 0
  746. notZeroLen:
  747. cmp eax, __FLOAT_ONE
  748. jne notOneLen
  749. cmp ecx, edx
  750. je normExit
  751. mov eax, [edx]
  752. mov [ecx], eax
  753. mov eax, [edx+4]
  754. mov [ecx+4], eax
  755. mov eax, [edx+8]
  756. mov [ecx+8], eax
  757. mov esp, ebp
  758. pop ebp
  759. ret 0
  760. notOneLen:
  761. ;; eax already has length
  762. push edi
  763. mov edi, eax
  764. shr edi, EXPONENT_SHIFT ;; edi is table index (8 frac. bits)
  765. and eax, CLAMP_MASK ;; clamp number to [0.5, 2.0]
  766. and edi, TABLE_MASK ;; (8 bytes)/(table entry)
  767. or eax, EXPONENT_BIAS_EVEN ;; re-adjust exponent for clamped number
  768. mov num, eax
  769. fld num
  770. fmul [invSqrtTab+edi] ;; find mx
  771. mov eax, LARGE_EXPONENT_BIAS;; (127+63)<<23 to re-adjust exponent
  772. sub eax, len ;; divide exponent by 2
  773. fadd [invSqrtTab+edi+4] ;; get mx + b
  774. shr eax, 1
  775. and eax, EXPONENT_MASK ;; mask exponent
  776. mov num, eax
  777. fmul num ;; now adjust for exponent
  778. fld DWORD PTR [edx] ;; 1/sqrt(len) on stack
  779. fmul ST, ST(1)
  780. fld DWORD PTR [edx+4]
  781. fmul ST, ST(2)
  782. fld DWORD PTR [edx+8]
  783. fmul ST, ST(3) ;; z y x len
  784. fxch ST(2) ;; x y z len
  785. fstp DWORD PTR [ecx]
  786. fstp DWORD PTR [ecx+4]
  787. fstp DWORD PTR [ecx+8]
  788. fstp ST(0) ;; pop len
  789. pop edi
  790. mov esp, ebp
  791. pop ebp
  792. ret 0
  793. normExit:
  794. mov esp, ebp
  795. pop ebp
  796. ret 0
  797. @__glNormalize@8 ENDP
  798. END