Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

832 lines
24 KiB

;---------------------------Module-Header------------------------------;
; Module Name: math.asm
;
; Fast math routines.
;
; Created: 11/1/1996
; Author: Otto Berkes [ottob]
;
; Copyright (c) 1996 Microsoft Corporation
;----------------------------------------------------------------------;
.386
.model small,pascal
assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
assume fs:nothing,gs:nothing
.xlist
include gli386.inc
.list
PROFILE = 0
include profile.inc
;
;
; We're trying to solve:
;
; 1/sqrt(x)
;
; which in IEEE float is:
;
; 1/sqrt(M * 2^[E-127])
;
; To simplify, substitute e = [E-127]
;
; We can simplify this by pulling a large portion of the exponent out
; by using only that portion of the exponent divisible by two (so we can
; pull it out of the sqrt term):
;
; 1/sqrt(M * 2^(2*[e div 2]) * 2^[e MOD 2])
;
; which is:
;
; 1/ (2^[e div 2] * sqrt(M * 2^[e MOD 2]))
;
; or
;
; (2^[e div 2])^(-1) * 1/sqrt(M * 2^[e MOD 2])
;
; =
; 2^-[e div 2] * 1/sqrt(M * 2^[e MOD 2])
;
; substitute back for e = [E-127]:
;
; 2^-[(E - 127) div 2] * 1/sqrt(M * 2^[(E - 127) MOD 2])
;
; =
; 2^-[(E div 2) - 63] * 1/sqrt(M * 2^[(E - 1) MOD 2])
;
; =
; 2^[63 - (E div 2)] * 1/sqrt(M * 2^[(E - 1) MOD 2])
;
; As a floating-point number, 2^[63 - (E div 2)] is just the exponent value:
;
; [63 - (E div 2)] + 127
;
; or
; [(127+63) - (E div 2)]
;
; Remembering to account for the implicit '1' im the mantissa of IEEE floating-
; point numbers, the range of (M * 2^[(E - 1) MOD 2]) is 0.800000 to
; 0.ffffff*2, which is the interval [0.5, 2.0). We can use the fact that this
; is a relatively small range, and therefore can use a table lookup near the
; actual value. The table will contain values for the piece-wise approximation
; to the curve 1/sqrt(M * 2^[(E - 1) MOD 2]) using an acceptable interval.
; These values can then be used to approximate the desired inverse square root
; value. At this point, all that remains is to apply the correct exponent
; for the number, which is simply [(127+63) - (E div 2)] from the above
; equations.
;
; To do the piecewise-linear approximation, we can store a table of values at
; the appropriate intervals, and the deltas between them. However, this
; will require calculating the difference between the interval value and
; x. We can do a bit better by using slope-intercept (y = mx + b)m so the
; table will store (m, b).
;
; With a 512-entry table, we will get at least 16 bits of precision. This
; result was obtined using simulations.
.data
; The following 'C' code generates the table below:
;#define SQRT_TAB_SIZE_LOG2 9 // 512-entry table
;
;#define MANTISSA_SIZE 24
;#define SQRT_TAB_SIZE (1 << SQRT_TAB_SIZE_LOG2)
;#define SQRT_INC (1 << (MANTISSA_SIZE - SQRT_TAB_SIZE_LOG2))
;#define CASTFIX(a) (*((LONG *)&(a)))
;
;void genTable()
;{
; int i;
; float x;
; float xNext;
; float y;
; float yNext;
; float xInterval;
;
; // We will start our table with the case where the exponent is even.
;
; CASTFIX(x) = 0x3f000000;
;
; // We will use the current and next values to generate the piece-wise
; // data for the curve. The interval between 'current' and 'next' is
; // based on the smallest change possible in the floating-point value
; // that also represents a difference of one table-lookup entry.
;
; // When we switch to the odd-exponent case (at 1.0), we have to adjust
; // for the fact that effective interval between successive values
; / is doubled.
;
; CASTFIX(xNext) = CASTFIX(x) + SQRT_INC;
; y = (float)1.0 / sqrt((double)x);
;
; // Calculate 1.0 / (piece-wise approximation interval).
;
; xInterval = xNext - x;
;
; xInterval = (float)1.0 / xInterval;
;
; // Now, generate the table:
;
; for (i = 0; i < SQRT_TAB_SIZE; i++) {
; float m;
; float b;
;
; // We increment our floating-point values using integer operations
; // to ensure accuracy:
;
; CASTFIX(xNext) = CASTFIX(x) + SQRT_INC;
;
; // Find next point on curve:
;
; yNext = (float)1.0 / sqrt((double)xNext);
;
; // Test for odd-exponent case:
;
; if (CASTFIX(x) == 0x3f800000)
; xInterval *= (float)0.5;
;
; m = (yNext - y) * xInterval;
; b = y - (m * x);
;
; printf("\t\tdd\t0%8xh, 0%8xh\n", CASTFIX(m), CASTFIX(b));
;
; y = yNext;
; x = xNext;
; }
;}
invSqrtTab dd 0bfb47e00h, 04007a1fah
dd 0bfb37000h, 040075e36h
dd 0bfb26600h, 040071b31h
dd 0bfb16000h, 04006d8ech
dd 0bfb05800h, 0400695e4h
dd 0bfaf5800h, 0400654a4h
dd 0bfae5600h, 0400612a2h
dd 0bfad5800h, 04005d165h
dd 0bfac5e00h, 0400590f1h
dd 0bfab6400h, 04005503eh
dd 0bfaa6e00h, 040051058h
dd 0bfa97800h, 04004d033h
dd 0bfa88800h, 040049163h
dd 0bfa79600h, 0400451d0h
dd 0bfa6aa00h, 040041396h
dd 0bfa5be00h, 04003d522h
dd 0bfa4d400h, 0400396fah
dd 0bfa3ee00h, 0400359a8h
dd 0bfa30800h, 040031c1dh
dd 0bfa22400h, 04002dee2h
dd 0bfa14400h, 04002a282h
dd 0bfa06600h, 040026674h
dd 0bf9f8800h, 040022a30h
dd 0bf9eae00h, 04001eecah
dd 0bf9dd400h, 04001b32eh
dd 0bf9cfc00h, 0400177e8h
dd 0bf9c2800h, 040013d86h
dd 0bf9b5400h, 0400102efh
dd 0bf9a8400h, 04000c93fh
dd 0bf99b400h, 040008f5bh
dd 0bf98e600h, 0400055d2h
dd 0bf981800h, 040001c16h
dd 0bf975000h, 03fffc7abh
dd 0bf968600h, 03fff55a6h
dd 0bf95c000h, 03ffee580h
dd 0bf94fc00h, 03ffe761ah
dd 0bf943800h, 03ffe0652h
dd 0bf937400h, 03ffd9628h
dd 0bf92b600h, 03ffd290eh
dd 0bf91f800h, 03ffcbb95h
dd 0bf913a00h, 03ffc4dbdh
dd 0bf907e00h, 03ffbe0afh
dd 0bf8fc600h, 03ffb7597h
dd 0bf8f0c00h, 03ffb08f8h
dd 0bf8e5800h, 03ffa9f80h
dd 0bf8da000h, 03ffa3354h
dd 0bf8cee00h, 03ff9ca56h
dd 0bf8c3c00h, 03ff960ffh
dd 0bf8b8a00h, 03ff8f74fh
dd 0bf8adc00h, 03ff88fa8h
dd 0bf8a2e00h, 03ff827aah
dd 0bf898000h, 03ff7bf55h
dd 0bf88d600h, 03ff75911h
dd 0bf882e00h, 03ff6f3adh
dd 0bf878400h, 03ff68cbfh
dd 0bf86de00h, 03ff627eah
dd 0bf863600h, 03ff5c18ah
dd 0bf859400h, 03ff55e81h
dd 0bf84f000h, 03ff4f9edh
dd 0bf845000h, 03ff4977dh
dd 0bf83ae00h, 03ff43381h
dd 0bf831000h, 03ff3d1aeh
dd 0bf827200h, 03ff36f8ch
dd 0bf81d400h, 03ff30d1bh
dd 0bf813a00h, 03ff2acdbh
dd 0bf809e00h, 03ff24b0dh
dd 0bf800600h, 03ff1eb75h
dd 0bf7edc00h, 03ff18b91h
dd 0bf7db000h, 03ff12ca5h
dd 0bf7c8400h, 03ff0cd6eh
dd 0bf7b5c00h, 03ff06f32h
dd 0bf7a3400h, 03ff010ach
dd 0bf791000h, 03fefb324h
dd 0bf77f000h, 03fef569ch
dd 0bf76d000h, 03feef9cch
dd 0bf75b000h, 03fee9cb4h
dd 0bf749400h, 03fee40a0h
dd 0bf737c00h, 03fede592h
dd 0bf726800h, 03fed8b8ch
dd 0bf714c00h, 03fed2ea3h
dd 0bf704000h, 03fecd6b3h
dd 0bf6f2800h, 03fec7a8dh
dd 0bf6e1c00h, 03fec2217h
dd 0bf6d1000h, 03febc95eh
dd 0bf6c0400h, 03feb7062h
dd 0bf6afc00h, 03feb1878h
dd 0bf69f400h, 03feac04ch
dd 0bf68ec00h, 03fea67deh
dd 0bf67ec00h, 03fea11deh
dd 0bf66e800h, 03fe9ba45h
dd 0bf65e800h, 03fe963c5h
dd 0bf64ec00h, 03fe90e60h
dd 0bf63f000h, 03fe8b8bch
dd 0bf62f400h, 03fe862d9h
dd 0bf620000h, 03fe80f73h
dd 0bf610400h, 03fe7b912h
dd 0bf601000h, 03fe76532h
dd 0bf5f2000h, 03fe71276h
dd 0bf5e2c00h, 03fe6be1ch
dd 0bf5d3c00h, 03fe66ae8h
dd 0bf5c5000h, 03fe618dch
dd 0bf5b6000h, 03fe5c530h
dd 0bf5a7800h, 03fe57414h
dd 0bf598c00h, 03fe52157h
dd 0bf58a800h, 03fe4d12fh
dd 0bf57c000h, 03fe47f65h
dd 0bf56dc00h, 03fe42ecbh
dd 0bf55f800h, 03fe3ddf8h
dd 0bf551800h, 03fe38e58h
dd 0bf543800h, 03fe33e80h
dd 0bf535c00h, 03fe2efdeh
dd 0bf527c00h, 03fe29f96h
dd 0bf51a000h, 03fe25086h
dd 0bf50c800h, 03fe202b0h
dd 0bf4ff000h, 03fe1b4a4h
dd 0bf4f1c00h, 03fe167d5h
dd 0bf4e4400h, 03fe1195dh
dd 0bf4d7000h, 03fe0cc24h
dd 0bf4c9c00h, 03fe07eb6h
dd 0bf4bcc00h, 03fe0328ah
dd 0bf4afc00h, 03fdfe62ah
dd 0bf4a3000h, 03fdf9b0fh
dd 0bf496000h, 03fdf4e47h
dd 0bf489800h, 03fdf0441h
dd 0bf47c800h, 03fdeb711h
dd 0bf470400h, 03fde6e24h
dd 0bf463c00h, 03fde2388h
dd 0bf457400h, 03fddd8bah
dd 0bf44b000h, 03fdd8f3ah
dd 0bf43ec00h, 03fdd4589h
dd 0bf432800h, 03fdcfba7h
dd 0bf426800h, 03fdcb317h
dd 0bf41a800h, 03fdc6a57h
dd 0bf40e800h, 03fdc2167h
dd 0bf402c00h, 03fdbd9cdh
dd 0bf3f6c00h, 03fdb907dh
dd 0bf3eb400h, 03fdb4a0dh
dd 0bf3dfc00h, 03fdb036fh
dd 0bf3d4000h, 03fdabb19h
dd 0bf3c8800h, 03fda741fh
dd 0bf3bd400h, 03fda2e83h
dd 0bf3b2000h, 03fd9e8bah
dd 0bf3a6800h, 03fd9a136h
dd 0bf39b400h, 03fd95b13h
dd 0bf390800h, 03fd917e3h
dd 0bf385000h, 03fd8cfd5h
dd 0bf37a400h, 03fd88c4fh
dd 0bf36f800h, 03fd8489eh
dd 0bf364400h, 03fd8019ah
dd 0bf359c00h, 03fd7bf28h
dd 0bf34f000h, 03fd77af6h
dd 0bf344400h, 03fd73699h
dd 0bf339c00h, 03fd6f3a9h
dd 0bf32f400h, 03fd6b08fh
dd 0bf324c00h, 03fd66d4bh
dd 0bf31a800h, 03fd62b78h
dd 0bf310000h, 03fd5e7e0h
dd 0bf305c00h, 03fd5a5bbh
dd 0bf2fb800h, 03fd5636dh
dd 0bf2f1800h, 03fd52295h
dd 0bf2e7400h, 03fd4dff5h
dd 0bf2dd800h, 03fd4a06eh
dd 0bf2d3400h, 03fd45d7ch
dd 0bf2c9800h, 03fd41da7h
dd 0bf2bf800h, 03fd3dc07h
dd 0bf2b6000h, 03fd39d89h
dd 0bf2ac000h, 03fd35b99h
dd 0bf2a2800h, 03fd31ccfh
dd 0bf298c00h, 03fd2dc37h
dd 0bf28f400h, 03fd29d21h
dd 0bf285c00h, 03fd25de5h
dd 0bf27c400h, 03fd21e83h
dd 0bf273000h, 03fd1e0a7h
dd 0bf269800h, 03fd1a0f9h
dd 0bf260400h, 03fd162d3h
dd 0bf257000h, 03fd12488h
dd 0bf24e000h, 03fd0e7c8h
dd 0bf244c00h, 03fd0a933h
dd 0bf23bc00h, 03fd06c2bh
dd 0bf232800h, 03fd02d4ch
dd 0bf229c00h, 03fcff1b0h
dd 0bf220c00h, 03fcfb43ch
dd 0bf218000h, 03fcf785ah
dd 0bf20f400h, 03fcf3c55h
dd 0bf206400h, 03fcefe75h
dd 0bf1fdc00h, 03fcec3e3h
dd 0bf1f4c00h, 03fce85bbh
dd 0bf1ec800h, 03fce4ca0h
dd 0bf1e3c00h, 03fce0fech
dd 0bf1db400h, 03fcdd4d2h
dd 0bf1d2c00h, 03fcd9996h
dd 0bf1ca800h, 03fcd5ff7h
dd 0bf1c2000h, 03fcd2477h
dd 0bf1b9800h, 03fcce8d5h
dd 0bf1b1800h, 03fccb095h
dd 0bf1a9400h, 03fcc7672h
dd 0bf1a0c00h, 03fcc3a6ah
dd 0bf199000h, 03fcc038fh
dd 0bf190800h, 03fcbc743h
dd 0bf188c00h, 03fcb902ah
dd 0bf180800h, 03fcb5562h
dd 0bf178c00h, 03fcb1e0bh
dd 0bf170c00h, 03fcae4cbh
dd 0bf168c00h, 03fcaab6bh
dd 0bf161000h, 03fca73b7h
dd 0bf159400h, 03fca3be4h
dd 0bf151800h, 03fca03f2h
dd 0bf149800h, 03fc9ca12h
dd 0bf142400h, 03fc99582h
dd 0bf13a400h, 03fc95b62h
dd 0bf133000h, 03fc92698h
dd 0bf12b400h, 03fc8ee0bh
dd 0bf123c00h, 03fc8b733h
dd 0bf11c400h, 03fc8803dh
dd 0bf114c00h, 03fc84929h
dd 0bf10d800h, 03fc813ceh
dd 0bf106400h, 03fc7de56h
dd 0bf0fec00h, 03fc7a6e8h
dd 0bf0f7800h, 03fc77136h
dd 0bf0f0400h, 03fc73b67h
dd 0bf0e9000h, 03fc7057bh
dd 0bf0e2000h, 03fc6d14fh
dd 0bf0dac00h, 03fc69b29h
dd 0bf0d3c00h, 03fc666c5h
dd 0bf0ccc00h, 03fc63245h
dd 0bf0c5800h, 03fc5fbc8h
dd 0bf0bec00h, 03fc5c8f2h
dd 0bf0b7c00h, 03fc5941eh
dd 0bf0b0c00h, 03fc55f2eh
dd 0bf0aa000h, 03fc52c07h
dd 0bf0a3000h, 03fc4f6dfh
dd 0bf09c400h, 03fc4c382h
dd 0bf095c00h, 03fc491f2h
dd 0bf08ec00h, 03fc45c76h
dd 0bf088000h, 03fc428c8h
dd 0bf081800h, 03fc3f6eah
dd 0bf07b000h, 03fc3c4f2h
dd 0bf074000h, 03fc38f06h
dd 0bf06dc00h, 03fc35ec8h
dd 0bf067400h, 03fc32c82h
dd 0bf060800h, 03fc2f832h
dd 0bf05a400h, 03fc2c7a9h
dd 0bf053c00h, 03fc29515h
dd 0bf04d800h, 03fc2645ah
dd 0bf047000h, 03fc23192h
dd 0bf040800h, 03fc1feb0h
dd 0bf03a800h, 03fc1cfa0h
dd 0bf034000h, 03fc19c8ah
dd 0bf02dc00h, 03fc16b52h
dd 0bf027c00h, 03fc13bfah
dd 0bf021800h, 03fc10a90h
dd 0bf01b400h, 03fc0d90dh
dd 0bf015000h, 03fc0a771h
dd 0bf00f400h, 03fc079b6h
dd 0bf009000h, 03fc047e8h
dd 0bf003000h, 03fc01800h
dd 0beff4000h, 03fbfd000h
dd 0befdc400h, 03fbf70a1h
dd 0befc4c00h, 03fbf11e5h
dd 0befad800h, 03fbeb3ceh
dd 0bef96400h, 03fbe555ah
dd 0bef7f800h, 03fbdf893h
dd 0bef68e00h, 03fbd9bf4h
dd 0bef52600h, 03fbd3f7eh
dd 0bef3c200h, 03fbce3b6h
dd 0bef26200h, 03fbc889eh
dd 0bef10600h, 03fbc2e38h
dd 0beefac00h, 03fbbd400h
dd 0beee5400h, 03fbb79f8h
dd 0beed0200h, 03fbb212eh
dd 0beebb200h, 03fbac896h
dd 0beea6600h, 03fba70b9h
dd 0bee91a00h, 03fba1889h
dd 0bee7d400h, 03fb9c1a0h
dd 0bee69000h, 03fb96aeeh
dd 0bee54e00h, 03fb91474h
dd 0bee41200h, 03fb8bf48h
dd 0bee2d400h, 03fb86942h
dd 0bee19e00h, 03fb8151ah
dd 0bee06600h, 03fb7c018h
dd 0bedf3400h, 03fb76c6ch
dd 0bede0400h, 03fb71900h
dd 0bedcd600h, 03fb6c5d4h
dd 0bedbac00h, 03fb67379h
dd 0beda8400h, 03fb62161h
dd 0bed95e00h, 03fb5cf8eh
dd 0bed83a00h, 03fb57e00h
dd 0bed71a00h, 03fb52d48h
dd 0bed5fc00h, 03fb4dcd8h
dd 0bed4e000h, 03fb48cb0h
dd 0bed3c800h, 03fb43d64h
dd 0bed2b000h, 03fb3edd2h
dd 0bed19c00h, 03fb39f1eh
dd 0bed08a00h, 03fb350b8h
dd 0becf7c00h, 03fb30333h
dd 0bece6c00h, 03fb2b4d7h
dd 0becd6200h, 03fb267f3h
dd 0becc5a00h, 03fb21b61h
dd 0becb5200h, 03fb1ce8dh
dd 0beca4e00h, 03fb182a2h
dd 0bec94c00h, 03fb1370ch
dd 0bec84a00h, 03fb0eb36h
dd 0bec74e00h, 03fb0a0e4h
dd 0bec65200h, 03fb05652h
dd 0bec55800h, 03fb00c1ah
dd 0bec45e00h, 03fafc1a4h
dd 0bec36a00h, 03faf78bah
dd 0bec27600h, 03faf2f93h
dd 0bec18400h, 03faee6c9h
dd 0bec09600h, 03fae9ef8h
dd 0bebfa600h, 03fae5650h
dd 0bebeba00h, 03fae0ea2h
dd 0bebdd000h, 03fadc756h
dd 0bebce800h, 03fad806ch
dd 0bebc0000h, 03fad3948h
dd 0bebb1e00h, 03facf3c3h
dd 0beba3a00h, 03facad67h
dd 0beb95800h, 03fac6770h
dd 0beb87a00h, 03fac2280h
dd 0beb79c00h, 03fabdd57h
dd 0beb6c000h, 03fab9897h
dd 0beb5e600h, 03fab5440h
dd 0beb50e00h, 03fab1054h
dd 0beb43600h, 03faacc32h
dd 0beb36200h, 03faa891eh
dd 0beb28e00h, 03faa45d6h
dd 0beb1bc00h, 03faa02fah
dd 0beb0ec00h, 03fa9c08eh
dd 0beb01e00h, 03fa97e92h
dd 0beaf5000h, 03fa93c63h
dd 0beae8600h, 03fa8fb4ah
dd 0beadba00h, 03fa8b959h
dd 0beacf400h, 03fa87927h
dd 0beac2a00h, 03fa83776h
dd 0beab6600h, 03fa7f788h
dd 0beaaa200h, 03fa7b76ah
dd 0bea9e000h, 03fa777c2h
dd 0bea91e00h, 03fa737e9h
dd 0bea85e00h, 03fa6f889h
dd 0bea7a000h, 03fa6b9a2h
dd 0bea6e400h, 03fa67b36h
dd 0bea62800h, 03fa63c9ch
dd 0bea56e00h, 03fa5fe7ch
dd 0bea4b400h, 03fa5c02fh
dd 0bea3fe00h, 03fa5830bh
dd 0bea34600h, 03fa5450dh
dd 0bea29400h, 03fa508e8h
dd 0bea1de00h, 03fa4cb3ch
dd 0bea12c00h, 03fa48ebeh
dd 0bea07c00h, 03fa452c2h
dd 0be9fcc00h, 03fa4169ah
dd 0be9f1e00h, 03fa3daf5h
dd 0be9e7000h, 03fa39f25h
dd 0be9dc400h, 03fa363dah
dd 0be9d1a00h, 03fa32915h
dd 0be9c7000h, 03fa2ee26h
dd 0be9bc800h, 03fa2b3beh
dd 0be9b2000h, 03fa2792ch
dd 0be9a7a00h, 03fa23f22h
dd 0be99d600h, 03fa205a4h
dd 0be993200h, 03fa1cbfch
dd 0be989000h, 03fa192dfh
dd 0be97ec00h, 03fa158e5h
dd 0be974e00h, 03fa120e2h
dd 0be96ae00h, 03fa0e802h
dd 0be961000h, 03fa0afb1h
dd 0be957200h, 03fa07738h
dd 0be94d800h, 03fa04006h
dd 0be943a00h, 03fa0073eh
dd 0be93a200h, 03f9fd078h
dd 0be930a00h, 03f9f998ch
dd 0be927000h, 03f9f61c1h
dd 0be91da00h, 03f9f2b43h
dd 0be914400h, 03f9ef4a0h
dd 0be90b000h, 03f9ebe92h
dd 0be901a00h, 03f9e87a3h
dd 0be8f8a00h, 03f9e52c3h
dd 0be8ef600h, 03f9e1c46h
dd 0be8e6600h, 03f9de71eh
dd 0be8dd600h, 03f9db1d2h
dd 0be8d4600h, 03f9d7c62h
dd 0be8cb800h, 03f9d478ch
dd 0be8c2c00h, 03f9d1352h
dd 0be8b9e00h, 03f9cde36h
dd 0be8b1400h, 03f9caa76h
dd 0be8a8a00h, 03f9c7694h
dd 0be8a0000h, 03f9c428eh
dd 0be897600h, 03f9c0e67h
dd 0be88f000h, 03f9bdba1h
dd 0be886800h, 03f9ba7f7h
dd 0be87e200h, 03f9b74eeh
dd 0be875e00h, 03f9b4287h
dd 0be86d800h, 03f9b0f3bh
dd 0be865600h, 03f9add56h
dd 0be85d200h, 03f9aaa8ch
dd 0be855200h, 03f9a792ch
dd 0be84d000h, 03f9a46e6h
dd 0be844e00h, 03f9a1480h
dd 0be83d000h, 03f99e387h
dd 0be835200h, 03f99b26eh
dd 0be82d400h, 03f998136h
dd 0be825600h, 03f994fdfh
dd 0be81da00h, 03f991f31h
dd 0be816000h, 03f98ef2eh
dd 0be80e400h, 03f98be42h
dd 0be806a00h, 03f988e01h
dd 0be7fe000h, 03f985da2h
dd 0be7ef400h, 03f982ebch
dd 0be7e0000h, 03f97fe20h
dd 0be7d1400h, 03f97cefeh
dd 0be7c2400h, 03f979ef2h
dd 0be7b3c00h, 03f977063h
dd 0be7a5400h, 03f9741b7h
dd 0be796800h, 03f971220h
dd 0be788400h, 03f96e408h
dd 0be779c00h, 03f96b506h
dd 0be76b800h, 03f9686b6h
dd 0be75d800h, 03f96591ah
dd 0be74f400h, 03f962a90h
dd 0be741400h, 03f95fcbch
dd 0be733400h, 03f95cecch
dd 0be725800h, 03f95a193h
dd 0be717c00h, 03f95743eh
dd 0be70a400h, 03f9547a1h
dd 0be6fc800h, 03f951a15h
dd 0be6ef000h, 03f94ed42h
dd 0be6e1800h, 03f94c054h
dd 0be6d4000h, 03f94934bh
dd 0be6c7000h, 03f9467d3h
dd 0be6b9c00h, 03f943b6ah
dd 0be6ac800h, 03f940ee8h
dd 0be69f800h, 03f93e322h
dd 0be692800h, 03f93b742h
dd 0be685c00h, 03f938c20h
dd 0be678c00h, 03f93600ch
dd 0be66c000h, 03f9334b8h
dd 0be65f800h, 03f930a24h
dd 0be652c00h, 03f92de9ch
dd 0be646400h, 03f92b3d6h
dd 0be639c00h, 03f9288f7h
dd 0be62d400h, 03f925dffh
dd 0be621000h, 03f9233cah
dd 0be615000h, 03f920a5ah
dd 0be608800h, 03f91df18h
dd 0be5fc800h, 03f91b578h
dd 0be5f0800h, 03f918bc0h
dd 0be5e4800h, 03f9161f0h
dd 0be5d8800h, 03f913808h
dd 0be5ccc00h, 03f910ee8h
dd 0be5c0c00h, 03f90e4d0h
dd 0be5b5400h, 03f90bc62h
dd 0be5a9800h, 03f9092fbh
dd 0be59e000h, 03f906a5fh
dd 0be592800h, 03f9041ach
dd 0be587000h, 03f9018e2h
dd 0be57b800h, 03f8ff001h
dd 0be570400h, 03f8fc7edh
dd 0be565000h, 03f8f9fc2h
dd 0be559c00h, 03f8f7782h
dd 0be54e800h, 03f8f4f2ah
dd 0be543800h, 03f8f27a2h
dd 0be538800h, 03f8f0004h
dd 0be52d800h, 03f8ed850h
dd 0be522c00h, 03f8eb16eh
dd 0be517c00h, 03f8e898eh
dd 0be50d000h, 03f8e6280h
dd 0be502400h, 03f8e3b5dh
dd 0be4f7800h, 03f8e1424h
dd 0be4ecc00h, 03f8decd6h
dd 0be4e2800h, 03f8dc748h
dd 0be4d7c00h, 03f8d9fcfh
dd 0be4cd800h, 03f8d7a18h
dd 0be4c3000h, 03f8d5360h
dd 0be4b8800h, 03f8d2c92h
dd 0be4ae800h, 03f8d078ah
dd 0be4a4000h, 03f8ce094h
dd 0be49a000h, 03f8cbb64h
dd 0be48fc00h, 03f8c9531h
dd 0be485c00h, 03f8c6fd9h
dd 0be47bc00h, 03f8c4a6dh
dd 0be471c00h, 03f8c24edh
dd 0be467c00h, 03f8bff59h
dd 0be45e000h, 03f8bdaa2h
dd 0be454000h, 03f8bb4e6h
dd 0be44a800h, 03f8b90fah
dd 0be440800h, 03f8b6b16h
dd 0be437000h, 03f8b4704h
dd 0be42d800h, 03f8b22dfh
dd 0be423c00h, 03f8afdb3h
dd 0be41a400h, 03f8ad968h
dd 0be410c00h, 03f8ab50ah
dd 0be407800h, 03f8a918eh
dd 0be3fe000h, 03f8a6d0ah
dd 0be3f4c00h, 03f8a496ah
dd 0be3eb400h, 03f8a24c0h
dd 0be3e2400h, 03f8a01f2h
dd 0be3d9000h, 03f89de1ah
dd 0be3d0000h, 03f89bb28h
dd 0be3c6c00h, 03f89972bh
dd 0be3bd800h, 03f89731ch
dd 0be3b4c00h, 03f8950eeh
dd 0be3abc00h, 03f892db4h
dd 0be3a3000h, 03f890b62h
dd 0be399c00h, 03f88e709h
dd 0be391400h, 03f88c591h
dd 0be388400h, 03f88a20fh
dd 0be37fc00h, 03f888075h
dd 0be377000h, 03f885dcch
dd 0be36e400h, 03f883b12h
dd 0be365800h, 03f881847h
dd 0be35d400h, 03f87f768h
dd 0be354800h, 03f87d47ah
.code
SQRT_TAB_LOG2 equ 9 ;; log2 of the lookup-table
MANTISSA_SIZE equ 24 ;; number if mantissa bits in fp value
;; number of represented mantissa bits
;; (one less than total due to hidden
;; leading one).
MANTISSA_BITS equ (MANTISSA_SIZE - 1)
ELEMENT_SIZE_LOG2 equ 3 ;; log2 of each table entry (8 bytes)
;; shift required to get bits in value
;; in the correct place to use as an
;; index for the table lookup
EXPONENT_SHIFT equ (MANTISSA_BITS - (SQRT_TAB_LOG2 - 1)\
- ELEMENT_SIZE_LOG2)
;; mask value for clamping to [.5..2)
CLAMP_MASK equ ((1 SHL (MANTISSA_BITS+1)) - 1)
;; mask for sign/exponent bits
MANTISSA_MASK equ ((1 SHL MANTISSA_BITS) - 1)
;; mask for sign/exponent bits
EXPONENT_MASK equ (-1 AND (NOT MANTISSA_MASK))
;; mask for table lookup
TABLE_MASK equ ((1 SHL (SQRT_TAB_LOG2 + ELEMENT_SIZE_LOG2)) - 1) \
AND (NOT((1 SHL ELEMENT_SIZE_LOG2) - 1))
;; bias used to represent clamped value
EXPONENT_BIAS_EVEN equ 3f000000h
;; bias value used for final exponent
;; computation
LARGE_EXPONENT_BIAS equ (((127 + 127/2) SHL (MANTISSA_BITS+1)) OR CLAMP_MASK)
x equ DWORD PTR 8[ebp]
num equ DWORD PTR -8[ebp]
@__FastInvSqrt@4 PROC NEAR
push ebp
mov ebp, esp
sub esp, 8
push ecx
mov eax, x
mov ecx, eax
shr ecx, EXPONENT_SHIFT ;; ecx is table index (8 frac. bits)
and eax, CLAMP_MASK ;; clamp number to [0.5, 2.0]
and ecx, TABLE_MASK ;; (8 bytes)/(table entry)
or eax, EXPONENT_BIAS_EVEN ;; re-adjust exponent for clamped number
mov num, eax
fld num
fmul [invSqrtTab+ecx] ;; find mx
mov eax, LARGE_EXPONENT_BIAS;; (127+63)<<23 to re-adjust exponent
sub eax, x ;; divide exponent by 2
fadd [invSqrtTab+ecx+4] ;; get mx + b
shr eax, 1
and eax, EXPONENT_MASK ;; mask exponent
mov num, eax
fmul num ;; now adjust for exponent
pop ecx
mov esp, ebp
pop ebp
ret 4
@__FastInvSqrt@4 endp
len equ DWORD PTR -4[ebp]
num equ DWORD PTR -8[ebp]
@__glNormalize@8 PROC NEAR
push ebp
mov ebp, esp
sub esp, 8
fld DWORD PTR [edx]
fmul DWORD PTR [edx] ;; x
fld DWORD PTR [edx+4]
fmul DWORD PTR [edx+4] ;; y x
fld DWORD PTR [edx+8]
fmul DWORD PTR [edx+8] ;; z y x
fxch ST(2) ;; x y z
faddp ST(1), ST ;; xy z
faddp ST(1), ST ;; xyz
fstp len
mov eax, len
test eax, eax
jne notZeroLen
mov [ecx], eax
mov [ecx+4], eax
mov [ecx+8], eax
mov esp, ebp
pop ebp
ret 0
notZeroLen:
cmp eax, __FLOAT_ONE
jne notOneLen
cmp ecx, edx
je normExit
mov eax, [edx]
mov [ecx], eax
mov eax, [edx+4]
mov [ecx+4], eax
mov eax, [edx+8]
mov [ecx+8], eax
mov esp, ebp
pop ebp
ret 0
notOneLen:
;; eax already has length
push edi
mov edi, eax
shr edi, EXPONENT_SHIFT ;; edi is table index (8 frac. bits)
and eax, CLAMP_MASK ;; clamp number to [0.5, 2.0]
and edi, TABLE_MASK ;; (8 bytes)/(table entry)
or eax, EXPONENT_BIAS_EVEN ;; re-adjust exponent for clamped number
mov num, eax
fld num
fmul [invSqrtTab+edi] ;; find mx
mov eax, LARGE_EXPONENT_BIAS;; (127+63)<<23 to re-adjust exponent
sub eax, len ;; divide exponent by 2
fadd [invSqrtTab+edi+4] ;; get mx + b
shr eax, 1
and eax, EXPONENT_MASK ;; mask exponent
mov num, eax
fmul num ;; now adjust for exponent
fld DWORD PTR [edx] ;; 1/sqrt(len) on stack
fmul ST, ST(1)
fld DWORD PTR [edx+4]
fmul ST, ST(2)
fld DWORD PTR [edx+8]
fmul ST, ST(3) ;; z y x len
fxch ST(2) ;; x y z len
fstp DWORD PTR [ecx]
fstp DWORD PTR [ecx+4]
fstp DWORD PTR [ecx+8]
fstp ST(0) ;; pop len
pop edi
mov esp, ebp
pop ebp
ret 0
normExit:
mov esp, ebp
pop ebp
ret 0
@__glNormalize@8 ENDP
END