Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

918 lines
27 KiB

  1. ;---------------------------Module-Header------------------------------;
  2. ; Module Name: math.asm
  3. ;
  4. ; Fast math routines.
  5. ;
  6. ; Created: 11/1/1996
  7. ; Author: Otto Berkes [ottob]
  8. ;
  9. ; Copyright (c) 1996 Microsoft Corporation
  10. ;----------------------------------------------------------------------;
  11. .386
  12. .model small,pascal
  13. assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
  14. assume fs:nothing,gs:nothing
  15. .list
  16. ; float __fastcall TableInvSqrt(float value);
  17. ;
  18. ; void __fastcall TableVecNormalize(float *resultNormal, floar *normal);
  19. ;
  20. ; resultNormal and normal could have the same address
  21. ;
  22. ;
  23. ;
  24. ; We're trying to solve:
  25. ;
  26. ; 1/sqrt(x)
  27. ;
  28. ; which in IEEE float is:
  29. ;
  30. ; 1/sqrt(M * 2^[E-127])
  31. ;
  32. ; To simplify, substitute e = [E-127]
  33. ;
  34. ; We can simplify this by pulling a large portion of the exponent out
  35. ; by using only that portion of the exponent divisible by two (so we can
  36. ; pull it out of the sqrt term):
  37. ;
  38. ; 1/sqrt(M * 2^(2*[e div 2]) * 2^[e MOD 2])
  39. ;
  40. ; which is:
  41. ;
  42. ; 1/ (2^[e div 2] * sqrt(M * 2^[e MOD 2]))
  43. ;
  44. ; or
  45. ;
  46. ; (2^[e div 2])^(-1) * 1/sqrt(M * 2^[e MOD 2])
  47. ;
  48. ; =
  49. ; 2^-[e div 2] * 1/sqrt(M * 2^[e MOD 2])
  50. ;
  51. ; substitute back for e = [E-127]:
  52. ;
  53. ; 2^-[(E - 127) div 2] * 1/sqrt(M * 2^[(E - 127) MOD 2])
  54. ;
  55. ; =
  56. ; 2^-[(E div 2) - 63] * 1/sqrt(M * 2^[(E - 1) MOD 2])
  57. ;
  58. ; =
  59. ; 2^[63 - (E div 2)] * 1/sqrt(M * 2^[(E - 1) MOD 2])
  60. ;
  61. ; As a floating-point number, 2^[63 - (E div 2)] is just the exponent value:
  62. ;
  63. ; [63 - (E div 2)] + 127
  64. ;
  65. ; or
  66. ; [(127+63) - (E div 2)]
  67. ;
  68. ; Remembering to account for the implicit '1' im the mantissa of IEEE floating-
  69. ; point numbers, the range of (M * 2^[(E - 1) MOD 2]) is 0.800000 to
  70. ; 0.ffffff*2, which is the interval [0.5, 2.0). We can use the fact that this
  71. ; is a relatively small range, and therefore can use a table lookup near the
  72. ; actual value. The table will contain values for the piece-wise approximation
  73. ; to the curve 1/sqrt(M * 2^[(E - 1) MOD 2]) using an acceptable interval.
  74. ; These values can then be used to approximate the desired inverse square root
  75. ; value. At this point, all that remains is to apply the correct exponent
  76. ; for the number, which is simply [(127+63) - (E div 2)] from the above
  77. ; equations.
  78. ;
  79. ; To do the piecewise-linear approximation, we can store a table of values at
  80. ; the appropriate intervals, and the deltas between them. However, this
  81. ; will require calculating the difference between the interval value and
  82. ; x. We can do a bit better by using slope-intercept (y = mx + b)m so the
  83. ; table will store (m, b).
  84. ;
  85. ; With a 512-entry table, we will get at least 16 bits of precision. This
  86. ; result was obtined using simulations.
  87. .data
  88. ; The following 'C' code generates the table below:
  89. ;#define SQRT_TAB_SIZE_LOG2 9 // 512-entry table
  90. ;
  91. ;#define MANTISSA_SIZE 24
  92. ;#define SQRT_TAB_SIZE (1 << SQRT_TAB_SIZE_LOG2)
  93. ;#define SQRT_INC (1 << (MANTISSA_SIZE - SQRT_TAB_SIZE_LOG2))
  94. ;#define CASTFIX(a) (*((LONG *)&(a)))
  95. ;
  96. ;void genTable()
  97. ;{
  98. ; int i;
  99. ; float x;
  100. ; float xNext;
  101. ; float y;
  102. ; float yNext;
  103. ; float xInterval;
  104. ;
  105. ; // We will start our table with the case where the exponent is even.
  106. ;
  107. ; CASTFIX(x) = 0x3f000000;
  108. ;
  109. ; // We will use the current and next values to generate the piece-wise
  110. ; // data for the curve. The interval between 'current' and 'next' is
  111. ; // based on the smallest change possible in the floating-point value
  112. ; // that also represents a difference of one table-lookup entry.
  113. ;
  114. ; // When we switch to the odd-exponent case (at 1.0), we have to adjust
  115. ; // for the fact that effective interval between successive values
  116. ; / is doubled.
  117. ;
  118. ; CASTFIX(xNext) = CASTFIX(x) + SQRT_INC;
  119. ; y = (float)1.0 / sqrt((double)x);
  120. ;
  121. ; // Calculate 1.0 / (piece-wise approximation interval).
  122. ;
  123. ; xInterval = xNext - x;
  124. ;
  125. ; xInterval = (float)1.0 / xInterval;
  126. ;
  127. ; // Now, generate the table:
  128. ;
  129. ; for (i = 0; i < SQRT_TAB_SIZE; i++) {
  130. ; float m;
  131. ; float b;
  132. ;
  133. ; // We increment our floating-point values using integer operations
  134. ; // to ensure accuracy:
  135. ;
  136. ; CASTFIX(xNext) = CASTFIX(x) + SQRT_INC;
  137. ;
  138. ; // Find next point on curve:
  139. ;
  140. ; yNext = (float)1.0 / sqrt((double)xNext);
  141. ;
  142. ; // Test for odd-exponent case:
  143. ;
  144. ; if (CASTFIX(x) == 0x3f800000)
  145. ; xInterval *= (float)0.5;
  146. ;
  147. ; m = (yNext - y) * xInterval;
  148. ; b = y - (m * x);
  149. ;
  150. ; printf("\t\tdd\t0%8xh, 0%8xh\n", CASTFIX(m), CASTFIX(b));
  151. ;
  152. ; y = yNext;
  153. ; x = xNext;
  154. ; }
  155. ;}
  156. invSqrtTab dd 0bfb47e00h, 04007a1fah
  157. dd 0bfb37000h, 040075e36h
  158. dd 0bfb26600h, 040071b31h
  159. dd 0bfb16000h, 04006d8ech
  160. dd 0bfb05800h, 0400695e4h
  161. dd 0bfaf5800h, 0400654a4h
  162. dd 0bfae5600h, 0400612a2h
  163. dd 0bfad5800h, 04005d165h
  164. dd 0bfac5e00h, 0400590f1h
  165. dd 0bfab6400h, 04005503eh
  166. dd 0bfaa6e00h, 040051058h
  167. dd 0bfa97800h, 04004d033h
  168. dd 0bfa88800h, 040049163h
  169. dd 0bfa79600h, 0400451d0h
  170. dd 0bfa6aa00h, 040041396h
  171. dd 0bfa5be00h, 04003d522h
  172. dd 0bfa4d400h, 0400396fah
  173. dd 0bfa3ee00h, 0400359a8h
  174. dd 0bfa30800h, 040031c1dh
  175. dd 0bfa22400h, 04002dee2h
  176. dd 0bfa14400h, 04002a282h
  177. dd 0bfa06600h, 040026674h
  178. dd 0bf9f8800h, 040022a30h
  179. dd 0bf9eae00h, 04001eecah
  180. dd 0bf9dd400h, 04001b32eh
  181. dd 0bf9cfc00h, 0400177e8h
  182. dd 0bf9c2800h, 040013d86h
  183. dd 0bf9b5400h, 0400102efh
  184. dd 0bf9a8400h, 04000c93fh
  185. dd 0bf99b400h, 040008f5bh
  186. dd 0bf98e600h, 0400055d2h
  187. dd 0bf981800h, 040001c16h
  188. dd 0bf975000h, 03fffc7abh
  189. dd 0bf968600h, 03fff55a6h
  190. dd 0bf95c000h, 03ffee580h
  191. dd 0bf94fc00h, 03ffe761ah
  192. dd 0bf943800h, 03ffe0652h
  193. dd 0bf937400h, 03ffd9628h
  194. dd 0bf92b600h, 03ffd290eh
  195. dd 0bf91f800h, 03ffcbb95h
  196. dd 0bf913a00h, 03ffc4dbdh
  197. dd 0bf907e00h, 03ffbe0afh
  198. dd 0bf8fc600h, 03ffb7597h
  199. dd 0bf8f0c00h, 03ffb08f8h
  200. dd 0bf8e5800h, 03ffa9f80h
  201. dd 0bf8da000h, 03ffa3354h
  202. dd 0bf8cee00h, 03ff9ca56h
  203. dd 0bf8c3c00h, 03ff960ffh
  204. dd 0bf8b8a00h, 03ff8f74fh
  205. dd 0bf8adc00h, 03ff88fa8h
  206. dd 0bf8a2e00h, 03ff827aah
  207. dd 0bf898000h, 03ff7bf55h
  208. dd 0bf88d600h, 03ff75911h
  209. dd 0bf882e00h, 03ff6f3adh
  210. dd 0bf878400h, 03ff68cbfh
  211. dd 0bf86de00h, 03ff627eah
  212. dd 0bf863600h, 03ff5c18ah
  213. dd 0bf859400h, 03ff55e81h
  214. dd 0bf84f000h, 03ff4f9edh
  215. dd 0bf845000h, 03ff4977dh
  216. dd 0bf83ae00h, 03ff43381h
  217. dd 0bf831000h, 03ff3d1aeh
  218. dd 0bf827200h, 03ff36f8ch
  219. dd 0bf81d400h, 03ff30d1bh
  220. dd 0bf813a00h, 03ff2acdbh
  221. dd 0bf809e00h, 03ff24b0dh
  222. dd 0bf800600h, 03ff1eb75h
  223. dd 0bf7edc00h, 03ff18b91h
  224. dd 0bf7db000h, 03ff12ca5h
  225. dd 0bf7c8400h, 03ff0cd6eh
  226. dd 0bf7b5c00h, 03ff06f32h
  227. dd 0bf7a3400h, 03ff010ach
  228. dd 0bf791000h, 03fefb324h
  229. dd 0bf77f000h, 03fef569ch
  230. dd 0bf76d000h, 03feef9cch
  231. dd 0bf75b000h, 03fee9cb4h
  232. dd 0bf749400h, 03fee40a0h
  233. dd 0bf737c00h, 03fede592h
  234. dd 0bf726800h, 03fed8b8ch
  235. dd 0bf714c00h, 03fed2ea3h
  236. dd 0bf704000h, 03fecd6b3h
  237. dd 0bf6f2800h, 03fec7a8dh
  238. dd 0bf6e1c00h, 03fec2217h
  239. dd 0bf6d1000h, 03febc95eh
  240. dd 0bf6c0400h, 03feb7062h
  241. dd 0bf6afc00h, 03feb1878h
  242. dd 0bf69f400h, 03feac04ch
  243. dd 0bf68ec00h, 03fea67deh
  244. dd 0bf67ec00h, 03fea11deh
  245. dd 0bf66e800h, 03fe9ba45h
  246. dd 0bf65e800h, 03fe963c5h
  247. dd 0bf64ec00h, 03fe90e60h
  248. dd 0bf63f000h, 03fe8b8bch
  249. dd 0bf62f400h, 03fe862d9h
  250. dd 0bf620000h, 03fe80f73h
  251. dd 0bf610400h, 03fe7b912h
  252. dd 0bf601000h, 03fe76532h
  253. dd 0bf5f2000h, 03fe71276h
  254. dd 0bf5e2c00h, 03fe6be1ch
  255. dd 0bf5d3c00h, 03fe66ae8h
  256. dd 0bf5c5000h, 03fe618dch
  257. dd 0bf5b6000h, 03fe5c530h
  258. dd 0bf5a7800h, 03fe57414h
  259. dd 0bf598c00h, 03fe52157h
  260. dd 0bf58a800h, 03fe4d12fh
  261. dd 0bf57c000h, 03fe47f65h
  262. dd 0bf56dc00h, 03fe42ecbh
  263. dd 0bf55f800h, 03fe3ddf8h
  264. dd 0bf551800h, 03fe38e58h
  265. dd 0bf543800h, 03fe33e80h
  266. dd 0bf535c00h, 03fe2efdeh
  267. dd 0bf527c00h, 03fe29f96h
  268. dd 0bf51a000h, 03fe25086h
  269. dd 0bf50c800h, 03fe202b0h
  270. dd 0bf4ff000h, 03fe1b4a4h
  271. dd 0bf4f1c00h, 03fe167d5h
  272. dd 0bf4e4400h, 03fe1195dh
  273. dd 0bf4d7000h, 03fe0cc24h
  274. dd 0bf4c9c00h, 03fe07eb6h
  275. dd 0bf4bcc00h, 03fe0328ah
  276. dd 0bf4afc00h, 03fdfe62ah
  277. dd 0bf4a3000h, 03fdf9b0fh
  278. dd 0bf496000h, 03fdf4e47h
  279. dd 0bf489800h, 03fdf0441h
  280. dd 0bf47c800h, 03fdeb711h
  281. dd 0bf470400h, 03fde6e24h
  282. dd 0bf463c00h, 03fde2388h
  283. dd 0bf457400h, 03fddd8bah
  284. dd 0bf44b000h, 03fdd8f3ah
  285. dd 0bf43ec00h, 03fdd4589h
  286. dd 0bf432800h, 03fdcfba7h
  287. dd 0bf426800h, 03fdcb317h
  288. dd 0bf41a800h, 03fdc6a57h
  289. dd 0bf40e800h, 03fdc2167h
  290. dd 0bf402c00h, 03fdbd9cdh
  291. dd 0bf3f6c00h, 03fdb907dh
  292. dd 0bf3eb400h, 03fdb4a0dh
  293. dd 0bf3dfc00h, 03fdb036fh
  294. dd 0bf3d4000h, 03fdabb19h
  295. dd 0bf3c8800h, 03fda741fh
  296. dd 0bf3bd400h, 03fda2e83h
  297. dd 0bf3b2000h, 03fd9e8bah
  298. dd 0bf3a6800h, 03fd9a136h
  299. dd 0bf39b400h, 03fd95b13h
  300. dd 0bf390800h, 03fd917e3h
  301. dd 0bf385000h, 03fd8cfd5h
  302. dd 0bf37a400h, 03fd88c4fh
  303. dd 0bf36f800h, 03fd8489eh
  304. dd 0bf364400h, 03fd8019ah
  305. dd 0bf359c00h, 03fd7bf28h
  306. dd 0bf34f000h, 03fd77af6h
  307. dd 0bf344400h, 03fd73699h
  308. dd 0bf339c00h, 03fd6f3a9h
  309. dd 0bf32f400h, 03fd6b08fh
  310. dd 0bf324c00h, 03fd66d4bh
  311. dd 0bf31a800h, 03fd62b78h
  312. dd 0bf310000h, 03fd5e7e0h
  313. dd 0bf305c00h, 03fd5a5bbh
  314. dd 0bf2fb800h, 03fd5636dh
  315. dd 0bf2f1800h, 03fd52295h
  316. dd 0bf2e7400h, 03fd4dff5h
  317. dd 0bf2dd800h, 03fd4a06eh
  318. dd 0bf2d3400h, 03fd45d7ch
  319. dd 0bf2c9800h, 03fd41da7h
  320. dd 0bf2bf800h, 03fd3dc07h
  321. dd 0bf2b6000h, 03fd39d89h
  322. dd 0bf2ac000h, 03fd35b99h
  323. dd 0bf2a2800h, 03fd31ccfh
  324. dd 0bf298c00h, 03fd2dc37h
  325. dd 0bf28f400h, 03fd29d21h
  326. dd 0bf285c00h, 03fd25de5h
  327. dd 0bf27c400h, 03fd21e83h
  328. dd 0bf273000h, 03fd1e0a7h
  329. dd 0bf269800h, 03fd1a0f9h
  330. dd 0bf260400h, 03fd162d3h
  331. dd 0bf257000h, 03fd12488h
  332. dd 0bf24e000h, 03fd0e7c8h
  333. dd 0bf244c00h, 03fd0a933h
  334. dd 0bf23bc00h, 03fd06c2bh
  335. dd 0bf232800h, 03fd02d4ch
  336. dd 0bf229c00h, 03fcff1b0h
  337. dd 0bf220c00h, 03fcfb43ch
  338. dd 0bf218000h, 03fcf785ah
  339. dd 0bf20f400h, 03fcf3c55h
  340. dd 0bf206400h, 03fcefe75h
  341. dd 0bf1fdc00h, 03fcec3e3h
  342. dd 0bf1f4c00h, 03fce85bbh
  343. dd 0bf1ec800h, 03fce4ca0h
  344. dd 0bf1e3c00h, 03fce0fech
  345. dd 0bf1db400h, 03fcdd4d2h
  346. dd 0bf1d2c00h, 03fcd9996h
  347. dd 0bf1ca800h, 03fcd5ff7h
  348. dd 0bf1c2000h, 03fcd2477h
  349. dd 0bf1b9800h, 03fcce8d5h
  350. dd 0bf1b1800h, 03fccb095h
  351. dd 0bf1a9400h, 03fcc7672h
  352. dd 0bf1a0c00h, 03fcc3a6ah
  353. dd 0bf199000h, 03fcc038fh
  354. dd 0bf190800h, 03fcbc743h
  355. dd 0bf188c00h, 03fcb902ah
  356. dd 0bf180800h, 03fcb5562h
  357. dd 0bf178c00h, 03fcb1e0bh
  358. dd 0bf170c00h, 03fcae4cbh
  359. dd 0bf168c00h, 03fcaab6bh
  360. dd 0bf161000h, 03fca73b7h
  361. dd 0bf159400h, 03fca3be4h
  362. dd 0bf151800h, 03fca03f2h
  363. dd 0bf149800h, 03fc9ca12h
  364. dd 0bf142400h, 03fc99582h
  365. dd 0bf13a400h, 03fc95b62h
  366. dd 0bf133000h, 03fc92698h
  367. dd 0bf12b400h, 03fc8ee0bh
  368. dd 0bf123c00h, 03fc8b733h
  369. dd 0bf11c400h, 03fc8803dh
  370. dd 0bf114c00h, 03fc84929h
  371. dd 0bf10d800h, 03fc813ceh
  372. dd 0bf106400h, 03fc7de56h
  373. dd 0bf0fec00h, 03fc7a6e8h
  374. dd 0bf0f7800h, 03fc77136h
  375. dd 0bf0f0400h, 03fc73b67h
  376. dd 0bf0e9000h, 03fc7057bh
  377. dd 0bf0e2000h, 03fc6d14fh
  378. dd 0bf0dac00h, 03fc69b29h
  379. dd 0bf0d3c00h, 03fc666c5h
  380. dd 0bf0ccc00h, 03fc63245h
  381. dd 0bf0c5800h, 03fc5fbc8h
  382. dd 0bf0bec00h, 03fc5c8f2h
  383. dd 0bf0b7c00h, 03fc5941eh
  384. dd 0bf0b0c00h, 03fc55f2eh
  385. dd 0bf0aa000h, 03fc52c07h
  386. dd 0bf0a3000h, 03fc4f6dfh
  387. dd 0bf09c400h, 03fc4c382h
  388. dd 0bf095c00h, 03fc491f2h
  389. dd 0bf08ec00h, 03fc45c76h
  390. dd 0bf088000h, 03fc428c8h
  391. dd 0bf081800h, 03fc3f6eah
  392. dd 0bf07b000h, 03fc3c4f2h
  393. dd 0bf074000h, 03fc38f06h
  394. dd 0bf06dc00h, 03fc35ec8h
  395. dd 0bf067400h, 03fc32c82h
  396. dd 0bf060800h, 03fc2f832h
  397. dd 0bf05a400h, 03fc2c7a9h
  398. dd 0bf053c00h, 03fc29515h
  399. dd 0bf04d800h, 03fc2645ah
  400. dd 0bf047000h, 03fc23192h
  401. dd 0bf040800h, 03fc1feb0h
  402. dd 0bf03a800h, 03fc1cfa0h
  403. dd 0bf034000h, 03fc19c8ah
  404. dd 0bf02dc00h, 03fc16b52h
  405. dd 0bf027c00h, 03fc13bfah
  406. dd 0bf021800h, 03fc10a90h
  407. dd 0bf01b400h, 03fc0d90dh
  408. dd 0bf015000h, 03fc0a771h
  409. dd 0bf00f400h, 03fc079b6h
  410. dd 0bf009000h, 03fc047e8h
  411. dd 0bf003000h, 03fc01800h
  412. dd 0beff4000h, 03fbfd000h
  413. dd 0befdc400h, 03fbf70a1h
  414. dd 0befc4c00h, 03fbf11e5h
  415. dd 0befad800h, 03fbeb3ceh
  416. dd 0bef96400h, 03fbe555ah
  417. dd 0bef7f800h, 03fbdf893h
  418. dd 0bef68e00h, 03fbd9bf4h
  419. dd 0bef52600h, 03fbd3f7eh
  420. dd 0bef3c200h, 03fbce3b6h
  421. dd 0bef26200h, 03fbc889eh
  422. dd 0bef10600h, 03fbc2e38h
  423. dd 0beefac00h, 03fbbd400h
  424. dd 0beee5400h, 03fbb79f8h
  425. dd 0beed0200h, 03fbb212eh
  426. dd 0beebb200h, 03fbac896h
  427. dd 0beea6600h, 03fba70b9h
  428. dd 0bee91a00h, 03fba1889h
  429. dd 0bee7d400h, 03fb9c1a0h
  430. dd 0bee69000h, 03fb96aeeh
  431. dd 0bee54e00h, 03fb91474h
  432. dd 0bee41200h, 03fb8bf48h
  433. dd 0bee2d400h, 03fb86942h
  434. dd 0bee19e00h, 03fb8151ah
  435. dd 0bee06600h, 03fb7c018h
  436. dd 0bedf3400h, 03fb76c6ch
  437. dd 0bede0400h, 03fb71900h
  438. dd 0bedcd600h, 03fb6c5d4h
  439. dd 0bedbac00h, 03fb67379h
  440. dd 0beda8400h, 03fb62161h
  441. dd 0bed95e00h, 03fb5cf8eh
  442. dd 0bed83a00h, 03fb57e00h
  443. dd 0bed71a00h, 03fb52d48h
  444. dd 0bed5fc00h, 03fb4dcd8h
  445. dd 0bed4e000h, 03fb48cb0h
  446. dd 0bed3c800h, 03fb43d64h
  447. dd 0bed2b000h, 03fb3edd2h
  448. dd 0bed19c00h, 03fb39f1eh
  449. dd 0bed08a00h, 03fb350b8h
  450. dd 0becf7c00h, 03fb30333h
  451. dd 0bece6c00h, 03fb2b4d7h
  452. dd 0becd6200h, 03fb267f3h
  453. dd 0becc5a00h, 03fb21b61h
  454. dd 0becb5200h, 03fb1ce8dh
  455. dd 0beca4e00h, 03fb182a2h
  456. dd 0bec94c00h, 03fb1370ch
  457. dd 0bec84a00h, 03fb0eb36h
  458. dd 0bec74e00h, 03fb0a0e4h
  459. dd 0bec65200h, 03fb05652h
  460. dd 0bec55800h, 03fb00c1ah
  461. dd 0bec45e00h, 03fafc1a4h
  462. dd 0bec36a00h, 03faf78bah
  463. dd 0bec27600h, 03faf2f93h
  464. dd 0bec18400h, 03faee6c9h
  465. dd 0bec09600h, 03fae9ef8h
  466. dd 0bebfa600h, 03fae5650h
  467. dd 0bebeba00h, 03fae0ea2h
  468. dd 0bebdd000h, 03fadc756h
  469. dd 0bebce800h, 03fad806ch
  470. dd 0bebc0000h, 03fad3948h
  471. dd 0bebb1e00h, 03facf3c3h
  472. dd 0beba3a00h, 03facad67h
  473. dd 0beb95800h, 03fac6770h
  474. dd 0beb87a00h, 03fac2280h
  475. dd 0beb79c00h, 03fabdd57h
  476. dd 0beb6c000h, 03fab9897h
  477. dd 0beb5e600h, 03fab5440h
  478. dd 0beb50e00h, 03fab1054h
  479. dd 0beb43600h, 03faacc32h
  480. dd 0beb36200h, 03faa891eh
  481. dd 0beb28e00h, 03faa45d6h
  482. dd 0beb1bc00h, 03faa02fah
  483. dd 0beb0ec00h, 03fa9c08eh
  484. dd 0beb01e00h, 03fa97e92h
  485. dd 0beaf5000h, 03fa93c63h
  486. dd 0beae8600h, 03fa8fb4ah
  487. dd 0beadba00h, 03fa8b959h
  488. dd 0beacf400h, 03fa87927h
  489. dd 0beac2a00h, 03fa83776h
  490. dd 0beab6600h, 03fa7f788h
  491. dd 0beaaa200h, 03fa7b76ah
  492. dd 0bea9e000h, 03fa777c2h
  493. dd 0bea91e00h, 03fa737e9h
  494. dd 0bea85e00h, 03fa6f889h
  495. dd 0bea7a000h, 03fa6b9a2h
  496. dd 0bea6e400h, 03fa67b36h
  497. dd 0bea62800h, 03fa63c9ch
  498. dd 0bea56e00h, 03fa5fe7ch
  499. dd 0bea4b400h, 03fa5c02fh
  500. dd 0bea3fe00h, 03fa5830bh
  501. dd 0bea34600h, 03fa5450dh
  502. dd 0bea29400h, 03fa508e8h
  503. dd 0bea1de00h, 03fa4cb3ch
  504. dd 0bea12c00h, 03fa48ebeh
  505. dd 0bea07c00h, 03fa452c2h
  506. dd 0be9fcc00h, 03fa4169ah
  507. dd 0be9f1e00h, 03fa3daf5h
  508. dd 0be9e7000h, 03fa39f25h
  509. dd 0be9dc400h, 03fa363dah
  510. dd 0be9d1a00h, 03fa32915h
  511. dd 0be9c7000h, 03fa2ee26h
  512. dd 0be9bc800h, 03fa2b3beh
  513. dd 0be9b2000h, 03fa2792ch
  514. dd 0be9a7a00h, 03fa23f22h
  515. dd 0be99d600h, 03fa205a4h
  516. dd 0be993200h, 03fa1cbfch
  517. dd 0be989000h, 03fa192dfh
  518. dd 0be97ec00h, 03fa158e5h
  519. dd 0be974e00h, 03fa120e2h
  520. dd 0be96ae00h, 03fa0e802h
  521. dd 0be961000h, 03fa0afb1h
  522. dd 0be957200h, 03fa07738h
  523. dd 0be94d800h, 03fa04006h
  524. dd 0be943a00h, 03fa0073eh
  525. dd 0be93a200h, 03f9fd078h
  526. dd 0be930a00h, 03f9f998ch
  527. dd 0be927000h, 03f9f61c1h
  528. dd 0be91da00h, 03f9f2b43h
  529. dd 0be914400h, 03f9ef4a0h
  530. dd 0be90b000h, 03f9ebe92h
  531. dd 0be901a00h, 03f9e87a3h
  532. dd 0be8f8a00h, 03f9e52c3h
  533. dd 0be8ef600h, 03f9e1c46h
  534. dd 0be8e6600h, 03f9de71eh
  535. dd 0be8dd600h, 03f9db1d2h
  536. dd 0be8d4600h, 03f9d7c62h
  537. dd 0be8cb800h, 03f9d478ch
  538. dd 0be8c2c00h, 03f9d1352h
  539. dd 0be8b9e00h, 03f9cde36h
  540. dd 0be8b1400h, 03f9caa76h
  541. dd 0be8a8a00h, 03f9c7694h
  542. dd 0be8a0000h, 03f9c428eh
  543. dd 0be897600h, 03f9c0e67h
  544. dd 0be88f000h, 03f9bdba1h
  545. dd 0be886800h, 03f9ba7f7h
  546. dd 0be87e200h, 03f9b74eeh
  547. dd 0be875e00h, 03f9b4287h
  548. dd 0be86d800h, 03f9b0f3bh
  549. dd 0be865600h, 03f9add56h
  550. dd 0be85d200h, 03f9aaa8ch
  551. dd 0be855200h, 03f9a792ch
  552. dd 0be84d000h, 03f9a46e6h
  553. dd 0be844e00h, 03f9a1480h
  554. dd 0be83d000h, 03f99e387h
  555. dd 0be835200h, 03f99b26eh
  556. dd 0be82d400h, 03f998136h
  557. dd 0be825600h, 03f994fdfh
  558. dd 0be81da00h, 03f991f31h
  559. dd 0be816000h, 03f98ef2eh
  560. dd 0be80e400h, 03f98be42h
  561. dd 0be806a00h, 03f988e01h
  562. dd 0be7fe000h, 03f985da2h
  563. dd 0be7ef400h, 03f982ebch
  564. dd 0be7e0000h, 03f97fe20h
  565. dd 0be7d1400h, 03f97cefeh
  566. dd 0be7c2400h, 03f979ef2h
  567. dd 0be7b3c00h, 03f977063h
  568. dd 0be7a5400h, 03f9741b7h
  569. dd 0be796800h, 03f971220h
  570. dd 0be788400h, 03f96e408h
  571. dd 0be779c00h, 03f96b506h
  572. dd 0be76b800h, 03f9686b6h
  573. dd 0be75d800h, 03f96591ah
  574. dd 0be74f400h, 03f962a90h
  575. dd 0be741400h, 03f95fcbch
  576. dd 0be733400h, 03f95cecch
  577. dd 0be725800h, 03f95a193h
  578. dd 0be717c00h, 03f95743eh
  579. dd 0be70a400h, 03f9547a1h
  580. dd 0be6fc800h, 03f951a15h
  581. dd 0be6ef000h, 03f94ed42h
  582. dd 0be6e1800h, 03f94c054h
  583. dd 0be6d4000h, 03f94934bh
  584. dd 0be6c7000h, 03f9467d3h
  585. dd 0be6b9c00h, 03f943b6ah
  586. dd 0be6ac800h, 03f940ee8h
  587. dd 0be69f800h, 03f93e322h
  588. dd 0be692800h, 03f93b742h
  589. dd 0be685c00h, 03f938c20h
  590. dd 0be678c00h, 03f93600ch
  591. dd 0be66c000h, 03f9334b8h
  592. dd 0be65f800h, 03f930a24h
  593. dd 0be652c00h, 03f92de9ch
  594. dd 0be646400h, 03f92b3d6h
  595. dd 0be639c00h, 03f9288f7h
  596. dd 0be62d400h, 03f925dffh
  597. dd 0be621000h, 03f9233cah
  598. dd 0be615000h, 03f920a5ah
  599. dd 0be608800h, 03f91df18h
  600. dd 0be5fc800h, 03f91b578h
  601. dd 0be5f0800h, 03f918bc0h
  602. dd 0be5e4800h, 03f9161f0h
  603. dd 0be5d8800h, 03f913808h
  604. dd 0be5ccc00h, 03f910ee8h
  605. dd 0be5c0c00h, 03f90e4d0h
  606. dd 0be5b5400h, 03f90bc62h
  607. dd 0be5a9800h, 03f9092fbh
  608. dd 0be59e000h, 03f906a5fh
  609. dd 0be592800h, 03f9041ach
  610. dd 0be587000h, 03f9018e2h
  611. dd 0be57b800h, 03f8ff001h
  612. dd 0be570400h, 03f8fc7edh
  613. dd 0be565000h, 03f8f9fc2h
  614. dd 0be559c00h, 03f8f7782h
  615. dd 0be54e800h, 03f8f4f2ah
  616. dd 0be543800h, 03f8f27a2h
  617. dd 0be538800h, 03f8f0004h
  618. dd 0be52d800h, 03f8ed850h
  619. dd 0be522c00h, 03f8eb16eh
  620. dd 0be517c00h, 03f8e898eh
  621. dd 0be50d000h, 03f8e6280h
  622. dd 0be502400h, 03f8e3b5dh
  623. dd 0be4f7800h, 03f8e1424h
  624. dd 0be4ecc00h, 03f8decd6h
  625. dd 0be4e2800h, 03f8dc748h
  626. dd 0be4d7c00h, 03f8d9fcfh
  627. dd 0be4cd800h, 03f8d7a18h
  628. dd 0be4c3000h, 03f8d5360h
  629. dd 0be4b8800h, 03f8d2c92h
  630. dd 0be4ae800h, 03f8d078ah
  631. dd 0be4a4000h, 03f8ce094h
  632. dd 0be49a000h, 03f8cbb64h
  633. dd 0be48fc00h, 03f8c9531h
  634. dd 0be485c00h, 03f8c6fd9h
  635. dd 0be47bc00h, 03f8c4a6dh
  636. dd 0be471c00h, 03f8c24edh
  637. dd 0be467c00h, 03f8bff59h
  638. dd 0be45e000h, 03f8bdaa2h
  639. dd 0be454000h, 03f8bb4e6h
  640. dd 0be44a800h, 03f8b90fah
  641. dd 0be440800h, 03f8b6b16h
  642. dd 0be437000h, 03f8b4704h
  643. dd 0be42d800h, 03f8b22dfh
  644. dd 0be423c00h, 03f8afdb3h
  645. dd 0be41a400h, 03f8ad968h
  646. dd 0be410c00h, 03f8ab50ah
  647. dd 0be407800h, 03f8a918eh
  648. dd 0be3fe000h, 03f8a6d0ah
  649. dd 0be3f4c00h, 03f8a496ah
  650. dd 0be3eb400h, 03f8a24c0h
  651. dd 0be3e2400h, 03f8a01f2h
  652. dd 0be3d9000h, 03f89de1ah
  653. dd 0be3d0000h, 03f89bb28h
  654. dd 0be3c6c00h, 03f89972bh
  655. dd 0be3bd800h, 03f89731ch
  656. dd 0be3b4c00h, 03f8950eeh
  657. dd 0be3abc00h, 03f892db4h
  658. dd 0be3a3000h, 03f890b62h
  659. dd 0be399c00h, 03f88e709h
  660. dd 0be391400h, 03f88c591h
  661. dd 0be388400h, 03f88a20fh
  662. dd 0be37fc00h, 03f888075h
  663. dd 0be377000h, 03f885dcch
  664. dd 0be36e400h, 03f883b12h
  665. dd 0be365800h, 03f881847h
  666. dd 0be35d400h, 03f87f768h
  667. dd 0be354800h, 03f87d47ah
  668. a1 dd 0.47
  669. a2 dd 1.47
  670. .code
  671. SQRT_TAB_LOG2 equ 9 ;; log2 of the lookup-table
  672. MANTISSA_SIZE equ 24 ;; number if mantissa bits in fp value
  673. ;; number of represented mantissa bits
  674. ;; (one less than total due to hidden
  675. ;; leading one).
  676. MANTISSA_BITS equ (MANTISSA_SIZE - 1)
  677. ELEMENT_SIZE_LOG2 equ 3 ;; log2 of each table entry (8 bytes)
  678. ;; shift required to get bits in value
  679. ;; in the correct place to use as an
  680. ;; index for the table lookup
  681. EXPONENT_SHIFT equ (MANTISSA_BITS - (SQRT_TAB_LOG2 - 1)\
  682. - ELEMENT_SIZE_LOG2)
  683. ;; mask value for clamping to [.5..2)
  684. CLAMP_MASK equ ((1 SHL (MANTISSA_BITS+1)) - 1)
  685. ;; mask for sign/exponent bits
  686. MANTISSA_MASK equ ((1 SHL MANTISSA_BITS) - 1)
  687. ;; mask for sign/exponent bits
  688. EXPONENT_MASK equ (-1 AND (NOT MANTISSA_MASK))
  689. ;; mask for table lookup
  690. TABLE_MASK equ ((1 SHL (SQRT_TAB_LOG2 + ELEMENT_SIZE_LOG2)) - 1) \
  691. AND (NOT((1 SHL ELEMENT_SIZE_LOG2) - 1))
  692. ;; bias used to represent clamped value
  693. EXPONENT_BIAS_EVEN equ 3f000000h
  694. ;; bias value used for final exponent
  695. ;; computation
  696. LARGE_EXPONENT_BIAS equ (((127 + 127/2) SHL (MANTISSA_BITS+1)) OR CLAMP_MASK)
  697. __FLOAT_ONE equ 03F800000h
  698. ;----------------------------------------------------------------------
  699. ;
  700. ; float __fastcall JBInvSqrt(float x);
  701. ;
  702. ; Input:
  703. ; esp + 4 = x
  704. ; Output:
  705. ; result is on the floating point stack
  706. ; Algorithm:
  707. ; The floating point trick, described in IEEE Computer Graphics and
  708. ; Applications v.17 number 4 in Jim Blinn's article, is used.
  709. ;
  710. ; ONE_AS_INTEGER = 0x3F800000;
  711. ; int tmp = (ONE_AS_INTEGER << 1 + ONE_AS_INTEGER - *(long*)&x) >> 1;
  712. ; float y = *(float*)&tmp;
  713. ; result = y*(1.47f - 0.47f*x*y*y);
  714. ;
  715. @JBInvSqrt@4 PROC NEAR
  716. mov eax, 07F000000h+03F800000h ; (ONE_AS_INTEGER<<1) + ONE_AS_INTEGER
  717. sub eax, [esp+4]
  718. sub esp, 4 ; place for temporary variable "y"
  719. sar eax, 1
  720. mov [esp], eax ; y
  721. fld a1
  722. fmul DWORD PTR [esp+8] ; x*0.47
  723. fld DWORD PTR [esp]
  724. fld st(0) ; y y x*0.47
  725. fmul st(0), st(1) ; y*y y x*0.47
  726. fld a2 ; 1.47 y*y y x*0.47
  727. fxch st(3) ; x*0.47 y*y y 1.47
  728. fmulp st(1), st(0) ; x*0.47*y*y y 1.47
  729. fsubp st(2), st(0) ; y 1.47-x*0.47*y*y
  730. fmulp st(1), st(0) ; result
  731. add esp, 4
  732. ret 4
  733. @JBInvSqrt@4 endp
  734. ;----------------------------------------------------------------------
  735. ; void __fastcall JBInvSqrt(float *result, float *nomal);
  736. ;
  737. ; Input:
  738. ; ecx = address of the result
  739. ; edx = address of the normal
  740. ;
  741. ;
  742. @JBVecNormalize@8 PROC NEAR
  743. fld DWORD PTR [edx]
  744. fmul st(0), st(0)
  745. fld DWORD PTR [edx + 4]
  746. fmul st(0), st(0)
  747. fld DWORD PTR [edx + 8]
  748. fmul st(0), st(0) ; z y x
  749. fxch st(2) ; x y z
  750. faddp st(1), st ; x + y, z
  751. faddp st(1), st ; len
  752. sub esp, 4 ; Place for temporary variable "y"
  753. mov eax, 07F000000h+03F800000h ; (ONE_AS_INTEGER<<1) + ONE_AS_INTEGER
  754. fst DWORD PTR [esp] ; Vector length
  755. sub eax, [esp]
  756. sar eax, 1
  757. mov [esp], eax ; y
  758. fmul a1 ; x*0.47
  759. fld DWORD PTR [esp] ; y x*0.47
  760. fld st(0) ; y y x*0.47
  761. fmul st(0), st(1) ; y*y y x*0.47
  762. fld a2 ; 1.47 y*y y x*0.47
  763. fxch st(3) ; x*0.47 y*y y 1.47
  764. fmulp st(1), st(0) ; x*0.47*y*y y 1.47
  765. fsubp st(2), st(0) ; y aaa
  766. fmulp st(1), st(0) ; 1/sqrt(len)
  767. fld DWORD PTR [edx] ; Start normalizing the normal
  768. fmul st, st(1)
  769. fld DWORD PTR [edx + 4]
  770. fmul st, st(2)
  771. fld DWORD PTR [edx + 8]
  772. fmulp st(3), st(0) ; y x z
  773. fxch st(1)
  774. add esp, 4
  775. fstp DWORD PTR [ecx]
  776. fstp DWORD PTR [ecx + 4]
  777. fstp DWORD PTR [ecx + 8]
  778. ret
  779. @JBVecNormalize@8 endp
  780. ;----------------------------------------------------------------------
  781. ; Input:
  782. ; [esp+4] = x
  783. ;
  784. ;
  785. x equ DWORD PTR [esp + 12]
  786. num equ DWORD PTR [esp]
  787. @TableInvSqrt@4 PROC NEAR
  788. mov eax, [esp + 4] ; x
  789. push ecx
  790. mov ecx, eax
  791. sub esp, 4 ; Place for num
  792. shr ecx, EXPONENT_SHIFT ;; ecx is table index (8 frac. bits)
  793. and eax, CLAMP_MASK ;; clamp number to [0.5, 2.0]
  794. and ecx, TABLE_MASK ;; (8 bytes)/(table entry)
  795. or eax, EXPONENT_BIAS_EVEN ;; re-adjust exponent for clamped number
  796. mov num, eax
  797. fld num
  798. fmul [invSqrtTab+ecx] ;; find mx
  799. mov eax, LARGE_EXPONENT_BIAS;; (127+63)<<23 to re-adjust exponent
  800. sub eax, x ;; divide exponent by 2
  801. fadd [invSqrtTab+ecx+4] ;; get mx + b
  802. shr eax, 1
  803. and eax, EXPONENT_MASK ;; mask exponent
  804. mov num, eax
  805. fmul num ;; now adjust for exponent
  806. add esp, 4
  807. pop ecx
  808. ret 4
  809. @TableInvSqrt@4 endp
  810. ;----------------------------------------------------------------------
  811. ;
  812. len equ DWORD PTR -4[ebp]
  813. num equ DWORD PTR -8[ebp]
  814. @TableVecNormalize@8 PROC NEAR
  815. push ebp
  816. mov ebp, esp
  817. sub esp, 8
  818. fld DWORD PTR [edx]
  819. fmul DWORD PTR [edx] ;; x
  820. fld DWORD PTR [edx+4]
  821. fmul DWORD PTR [edx+4] ;; y x
  822. fld DWORD PTR [edx+8]
  823. fmul DWORD PTR [edx+8] ;; z y x
  824. fxch ST(2) ;; x y z
  825. faddp ST(1), ST ;; xy z
  826. faddp ST(1), ST ;; xyz
  827. fstp len
  828. mov eax, len
  829. test eax, eax
  830. jne notZeroLen
  831. mov [ecx], eax
  832. mov [ecx+4], eax
  833. mov [ecx+8], eax
  834. mov esp, ebp
  835. pop ebp
  836. ret 0
  837. notZeroLen:
  838. cmp eax, __FLOAT_ONE
  839. jne notOneLen
  840. cmp ecx, edx
  841. je normExit
  842. mov eax, [edx]
  843. mov [ecx], eax
  844. mov eax, [edx+4]
  845. mov [ecx+4], eax
  846. mov eax, [edx+8]
  847. mov [ecx+8], eax
  848. mov esp, ebp
  849. pop ebp
  850. ret 0
  851. notOneLen:
  852. ;; eax already has length
  853. push edi
  854. mov edi, eax
  855. shr edi, EXPONENT_SHIFT ;; edi is table index (8 frac. bits)
  856. and eax, CLAMP_MASK ;; clamp number to [0.5, 2.0]
  857. and edi, TABLE_MASK ;; (8 bytes)/(table entry)
  858. or eax, EXPONENT_BIAS_EVEN ;; re-adjust exponent for clamped number
  859. mov num, eax
  860. fld num
  861. fmul [invSqrtTab+edi] ;; find mx
  862. mov eax, LARGE_EXPONENT_BIAS;; (127+63)<<23 to re-adjust exponent
  863. sub eax, len ;; divide exponent by 2
  864. fadd [invSqrtTab+edi+4] ;; get mx + b
  865. shr eax, 1
  866. and eax, EXPONENT_MASK ;; mask exponent
  867. mov num, eax
  868. fmul num ;; now adjust for exponent
  869. fld DWORD PTR [edx] ;; 1/sqrt(len) on stack
  870. fmul ST, ST(1)
  871. fld DWORD PTR [edx+4]
  872. fmul ST, ST(2)
  873. fld DWORD PTR [edx+8]
  874. fmul ST, ST(3) ;; z y x len
  875. fxch ST(2) ;; x y z len
  876. fstp DWORD PTR [ecx]
  877. fstp DWORD PTR [ecx+4]
  878. fstp DWORD PTR [ecx+8]
  879. fstp ST(0) ;; pop len
  880. pop edi
  881. mov esp, ebp
  882. pop ebp
  883. ret 0
  884. normExit:
  885. mov esp, ebp
  886. pop ebp
  887. ret 0
  888. @TableVecNormalize@8 ENDP
  889. END