You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
471 lines
18 KiB
471 lines
18 KiB
;//
|
|
;// INTEL CORPORATION PROPRIETARY INFORMATION
|
|
;// This software is supplied under the terms of a license agreement or
|
|
;// nondisclosure agreement with Intel Corporation and may not be copied
|
|
;// or disclosed except in accordance with the terms of that agreement.
|
|
;// Copyright (c) 2000 Intel Corporation. All Rights Reserved.
|
|
;//
|
|
;//
|
|
; log10_wmt.asm
|
|
;
|
|
; double log10(double);
|
|
;
|
|
; Initial version: 11/30/2000
|
|
;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; This is a second implementation of the branch-free log10 function ;;
|
|
;; In this version, we use a trick to get the index j = 0,1,...,64 that ;;
|
|
;; does not require integer testing. By adding 2^(42)-1 (FP) to Y, the ;;
|
|
;; least 4 bits contains dob't cares and the 7 bits following that has ;;
|
|
;; the range 0 to 64. Hence obtaining, say, the 16 lsb and masking off ;;
|
|
;; everything except bit 4 through 10 gives the shifted index. ;;
|
|
;; This saves an integer shift as well. ;;
|
|
;; ;;
|
|
;; Another important feature is that we use the table of log(1/B) ;;
|
|
;; throughout. To ensure numerical accuracy, we only need to ensure that ;;
|
|
;; T(0)_hi = B(last)_hi, T(0)_lo = B(last)_lo. This ensures W_hi = 0 and ;;
|
|
;; W_lo = 0 exactly in the case of |X-1| <= 2^(-7). ;;
|
|
;; Finally, we do away with the need for extra-precision addition by the ;;
|
|
;; following observation. The three pieces at the end are ;;
|
|
;; A = W_hi + r_hi; B = r_lo; C = P + W_lo. ;;
|
|
;; When W_hi = W_lo = 0, the addition sequence (A+B) + C is accurate as ;;
|
|
;; the sum A+B is exact. ;;
|
|
;; Otherwise, A + (B+C) is accurate as B is going to be largely shifted ;;
|
|
;; off compared to the final result. ;;
|
|
;; Hence if we use compare and mask operations to ;;
|
|
;; create alpha = (r_lo or 0), beta = (0 or r_lo), Res_hi <- W_hi+alpha, ;;
|
|
;; Res_lo <- C + beta, then result is accurately computed as ;;
|
|
;; Res_hi+Res_lo. ;;
|
|
;; ;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
.686P
|
|
.387
|
|
.XMM
|
|
.MODEL FLAT,C
|
|
|
|
EXTRN C __libm_error_support : NEAR
|
|
|
|
CONST SEGMENT PARA PUBLIC USE32 'CONST'
|
|
ALIGN 16
|
|
|
|
emask DQ 000FFFFFFFFFFFFFH, 000FFFFFFFFFFFFFH ; mask off sign/expo field
|
|
CC DQ 3FDBC00000000000H, 3FDBC00000000000H ; pair of C
|
|
Magic DQ 428FFFFFFFFFF810H, 428FFFFFFFFFF810H ; 2^(42)-1+2^(-7)
|
|
hi_mask DQ 7FFFFFFF80000000H, 7FFFFFFF80000000H ; maks of bottom 31 bits
|
|
LOG10_2 DQ 3FD34413509F7800H, 3D1FEF311F12B358H ; L_hi,L_lo -> [L_lo|L_hi]
|
|
place_L DQ 0000000000000000H,0FFFFFFFFFFFFFFFFH ; 0,1 -> [FF..FF|00..00]
|
|
DQ 0FFFFFFFFFFFFFFFFH, 0000000000000000H ; 1,0 -> [00..00|FF..FF]
|
|
One DQ 3ff0000000000000H, 3ff0000000000000H ; 1,1
|
|
Zero DQ 0000000000000000H, 0000000000000000H ; 0,0
|
|
Two52 DQ 4330000000000000H, 4330000000000000H ; 2^52 for normalization
|
|
Infs DQ 0FFF0000000000000H, 7FF0000000000000H ; -inf,+inf --> [+inf|-inf]
|
|
NaN DQ 7FF0000000000001H, 7FF0000000000001H ; NaN for log(-ve), log(Nan)
|
|
|
|
coeff DQ 40358914C697CEF9H, 0C00893096429813DH ; p6,p3 ->[p3|p6]
|
|
DQ 0C025C9806A358455H, 3FFC6A02DC9635D2H ; p5,p2 ->[p2|p5]
|
|
DQ 4016AB9F7E1899F7H, 0BFF27AF2DC77B135H ; p4,p1 ->[p1|p4]
|
|
DQ 3F5A7A6CBF2E4108H, 0000000000000000H ; p0,0 -> [0|p0]
|
|
|
|
;-------Table T_hi,T_lo so that movapd gives [ T_lo | T_hi ]
|
|
T_Tbl DQ 0000000000000000H, 0000000000000000H
|
|
DQ 3F7C03A80AE40000H, 3D3E05382D51F71BH
|
|
DQ 3F8B579DB6DE0000H, 3D386B09FEFB3005H
|
|
DQ 3F9470AEDE968000H, 3D39FC780C91E11DH
|
|
DQ 3F9ADA2E8E3E0000H, 3D351BD19E6E701AH
|
|
DQ 3FA0ADD8F759C000H, 3D1B2A51090000A1H
|
|
DQ 3FA3FAF7C6630000H, 3D083662F181F53FH
|
|
DQ 3FA7171E59EFC000H, 3D16BD1A3FCF54DBH
|
|
DQ 3FAA3E9002C70000H, 3D21D257C8D0D386H
|
|
DQ 3FAD32332DC34000H, 3D1B7ADBF8D9441FH
|
|
DQ 3FB0281170D6A000H, 3D1BF38B28AF5076H
|
|
DQ 3FB19C1FECF16000H, 3D3EE03F1E5355D4H
|
|
DQ 3FB3151BFD65C000H, 3D37E280048C6795H
|
|
DQ 3FB4932780C56000H, 3D2FC4ACCD62A5F3H
|
|
DQ 3FB605735EE98000H, 3D17C3CF23A17D9FH
|
|
DQ 3FB76B778D4AA000H, 3D1C03E812A06E7AH
|
|
DQ 3FB8D60B4EE4C000H, 3D3900E5CC4E4C82H
|
|
DQ 3FBA33B422244000H, 3D36F17034675735H
|
|
DQ 3FBB95B654A78000H, 3D290E5E24764EC7H
|
|
DQ 3FBCEA2602E9E000H, 3CEBD129822ECBCBH
|
|
DQ 3FBE42B4C16CA000H, 3D25E50FF38D4DE9H
|
|
DQ 3FBF8D05B16A6000H, 3D2A8EA5A2B777A7H
|
|
DQ 3FC06D9BC53C2000H, 3D32818DEEE1FA45H
|
|
DQ 3FC10D3EACDE0000H, 3D1E8A45CB83F0AEH
|
|
DQ 3FC1B83F1574D000H, 3D010B19F193FFD4H
|
|
DQ 3FC251FE054FD000H, 3CFEAC09402877C0H
|
|
DQ 3FC2F7301CF4E000H, 3D30F5C70D1A6341H
|
|
DQ 3FC394700F795000H, 3D1FE93F791A7264H
|
|
DQ 3FC4297453B4A000H, 3D3ECE09C5BC4B34H
|
|
DQ 3FC4CA24FAFEC000H, 3D2E204342E66851H
|
|
DQ 3FC5627512093000H, 3D30DFECB3AA172DH
|
|
DQ 3FC5F21A1AF60000H, 3D3FEF1B2D3E6113H
|
|
DQ 3FC68DA216900000H, 3CED942CFC9699D0H
|
|
DQ 3FC720586C280000H, 3D3D20A8624054CDH
|
|
DQ 3FC7B495FF1C5000H, 3D25012C689133C5H
|
|
DQ 3FC83FA266CEA000H, 3D20C6C18687239FH
|
|
DQ 3FC8CC0E0C56F000H, 3D36E3B4A1CFA0DFH
|
|
DQ 3FC959DFEFE7D000H, 3D2420027AFFE0E5H
|
|
DQ 3FC9E91F47D2C000H, 3D35330E6CF22420H
|
|
DQ 3FCA6EA48B034000H, 3D33EBACB92B5B7FH
|
|
DQ 3FCB00B7C552F000H, 3D3DF4694C64AA73H
|
|
DQ 3FCB88E67CF97000H, 3D32FF232278A072H
|
|
DQ 3FCC06E3BA2E4000H, 3D32CB15CD55BD7CH
|
|
DQ 3FCC919DD46C0000H, 3D0EB64694E6AC72H
|
|
DQ 3FCD11FB61139000H, 3D1A34DB91AE960BH
|
|
DQ 3FCD9F59ABFD1000H, 3D207B23BCD76C73H
|
|
DQ 3FCE163D527E6000H, 3D319D69F22E93E4H
|
|
DQ 3FCE9A2CDC02A000H, 3D20EBF59081F187H
|
|
DQ 3FCF1F5876949000H, 3D07AFEBEA179000H
|
|
DQ 3FCF99801FDB7000H, 3D22737DF7F29668H
|
|
DQ 3FD00A5B4509D000H, 3D1F6B5B2353257FH
|
|
DQ 3FD0488037FBE800H, 3D1B6A93B9B912C6H
|
|
DQ 3FD087315621A800H, 3D3261DA7DBFF3AEH
|
|
DQ 3FD0C6711D6AB800H, 3D35E94A8D30C132H
|
|
DQ 3FD0FFD9CDD2A800H, 3D16350EF6F19D80H
|
|
DQ 3FD1402FBEC27800H, 3D313C204222BA8BH
|
|
DQ 3FD17A9719699000H, 3D21F279212D5C99H
|
|
DQ 3FD1B57A30AC5800H, 3D3DCF3E62FF847EH
|
|
DQ 3FD1F0DB153AB800H, 3D27582E230C0EDFH
|
|
DQ 3FD2260E4F424800H, 3D157E1028A41FF9H
|
|
DQ 3FD26262A6117800H, 3D12B01A2E0C1912H
|
|
DQ 3FD29871C043D800H, 3D2B3969AC9E3779H
|
|
DQ 3FD2D5C1760B8000H, 3D3AEADEBE0F08BFH
|
|
DQ 3FD30CB3A7BB3000H, 3D38929919B6D832H
|
|
DQ 3FD34413509F7800H, 3D1FEF311F12B358H
|
|
|
|
;-----------------
|
|
CB_Tbl DQ 3FDBC00000000000H, 3FDBC00000000000H
|
|
DQ 3FDB510000000000H, 3FDB510000000000H
|
|
DQ 3FDAE8F000000000H, 3FDAE8F000000000H
|
|
DQ 3FDA80E000000000H, 3FDA80E000000000H
|
|
DQ 3FDA1FC000000000H, 3FDA1FC000000000H
|
|
DQ 3FD9BEA000000000H, 3FD9BEA000000000H
|
|
DQ 3FD95D8000000000H, 3FD95D8000000000H
|
|
DQ 3FD9035000000000H, 3FD9035000000000H
|
|
DQ 3FD8A92000000000H, 3FD8A92000000000H
|
|
DQ 3FD855E000000000H, 3FD855E000000000H
|
|
DQ 3FD7FF2800000000H, 3FD7FF2800000000H
|
|
DQ 3FD7AF6000000000H, 3FD7AF6000000000H
|
|
DQ 3FD75F9800000000H, 3FD75F9800000000H
|
|
DQ 3FD70FD000000000H, 3FD70FD000000000H
|
|
DQ 3FD6C38000000000H, 3FD6C38000000000H
|
|
DQ 3FD67AA800000000H, 3FD67AA800000000H
|
|
DQ 3FD631D000000000H, 3FD631D000000000H
|
|
DQ 3FD5EC7000000000H, 3FD5EC7000000000H
|
|
DQ 3FD5A71000000000H, 3FD5A71000000000H
|
|
DQ 3FD5652800000000H, 3FD5652800000000H
|
|
DQ 3FD5234000000000H, 3FD5234000000000H
|
|
DQ 3FD4E4D000000000H, 3FD4E4D000000000H
|
|
DQ 3FD4A66000000000H, 3FD4A66000000000H
|
|
DQ 3FD46B6800000000H, 3FD46B6800000000H
|
|
DQ 3FD42CF800000000H, 3FD42CF800000000H
|
|
DQ 3FD3F57800000000H, 3FD3F57800000000H
|
|
DQ 3FD3BA8000000000H, 3FD3BA8000000000H
|
|
DQ 3FD3830000000000H, 3FD3830000000000H
|
|
DQ 3FD34EF800000000H, 3FD34EF800000000H
|
|
DQ 3FD3177800000000H, 3FD3177800000000H
|
|
DQ 3FD2E37000000000H, 3FD2E37000000000H
|
|
DQ 3FD2B2E000000000H, 3FD2B2E000000000H
|
|
DQ 3FD27ED800000000H, 3FD27ED800000000H
|
|
DQ 3FD24E4800000000H, 3FD24E4800000000H
|
|
DQ 3FD21DB800000000H, 3FD21DB800000000H
|
|
DQ 3FD1F0A000000000H, 3FD1F0A000000000H
|
|
DQ 3FD1C38800000000H, 3FD1C38800000000H
|
|
DQ 3FD1967000000000H, 3FD1967000000000H
|
|
DQ 3FD1695800000000H, 3FD1695800000000H
|
|
DQ 3FD13FB800000000H, 3FD13FB800000000H
|
|
DQ 3FD112A000000000H, 3FD112A000000000H
|
|
DQ 3FD0E90000000000H, 3FD0E90000000000H
|
|
DQ 3FD0C2D800000000H, 3FD0C2D800000000H
|
|
DQ 3FD0993800000000H, 3FD0993800000000H
|
|
DQ 3FD0731000000000H, 3FD0731000000000H
|
|
DQ 3FD0497000000000H, 3FD0497000000000H
|
|
DQ 3FD026C000000000H, 3FD026C000000000H
|
|
DQ 3FD0009800000000H, 3FD0009800000000H
|
|
DQ 3FCFB4E000000000H, 3FCFB4E000000000H
|
|
DQ 3FCF6F8000000000H, 3FCF6F8000000000H
|
|
DQ 3FCF2A2000000000H, 3FCF2A2000000000H
|
|
DQ 3FCEE4C000000000H, 3FCEE4C000000000H
|
|
DQ 3FCE9F6000000000H, 3FCE9F6000000000H
|
|
DQ 3FCE5A0000000000H, 3FCE5A0000000000H
|
|
DQ 3FCE1B9000000000H, 3FCE1B9000000000H
|
|
DQ 3FCDD63000000000H, 3FCDD63000000000H
|
|
DQ 3FCD97C000000000H, 3FCD97C000000000H
|
|
DQ 3FCD595000000000H, 3FCD595000000000H
|
|
DQ 3FCD1AE000000000H, 3FCD1AE000000000H
|
|
DQ 3FCCE36000000000H, 3FCCE36000000000H
|
|
DQ 3FCCA4F000000000H, 3FCCA4F000000000H
|
|
DQ 3FCC6D7000000000H, 3FCC6D7000000000H
|
|
DQ 3FCC2F0000000000H, 3FCC2F0000000000H
|
|
DQ 3FCBF78000000000H, 3FCBF78000000000H
|
|
DQ 3FCBC00000000000H, 3FCBC00000000000H
|
|
|
|
ALIGN 16
|
|
CONST ENDS
|
|
|
|
$cmpsd MACRO op1, op2, op3
|
|
LOCAL begin_cmpsd, end_cmpsd
|
|
begin_cmpsd:
|
|
cmppd op1, op2, op3
|
|
end_cmpsd:
|
|
org begin_cmpsd
|
|
db 0F2h
|
|
org end_cmpsd
|
|
ENDM
|
|
|
|
_TEXT SEGMENT PARA PUBLIC USE32 'CODE'
|
|
ALIGN 16
|
|
|
|
PUBLIC _log10_pentium4, _CIlog10_pentium4
|
|
_CIlog10_pentium4 PROC NEAR
|
|
push ebp
|
|
mov ebp, esp
|
|
sub esp, 8 ; for argument DBLSIZE
|
|
and esp, 0fffffff0h
|
|
fstp qword ptr [esp]
|
|
movq xmm0, qword ptr [esp]
|
|
call start
|
|
leave
|
|
ret
|
|
|
|
|
|
;----------------------;
|
|
;--Argument Reduction--;
|
|
;----------------------;
|
|
|
|
_log10_pentium4 label proc
|
|
movlpd xmm0, QWORD PTR [4+esp] ;... load X to low part of xmm0
|
|
start:
|
|
mov edx,0 ;... set edx to 0
|
|
|
|
DENORMAL_RETRY:
|
|
|
|
movapd xmm5,xmm0
|
|
unpcklpd xmm0,xmm0 ;... [X|X]
|
|
|
|
psrlq xmm5,52
|
|
pextrw ecx,xmm5,0
|
|
|
|
movapd xmm1, QWORD PTR [emask] ;... pair of 000FF...FF
|
|
movapd xmm2, QWORD PTR [CC] ;... pair of CC
|
|
movapd xmm3, QWORD PTR [One] ;... pair of 3FF000...000
|
|
movapd xmm4, QWORD PTR [Magic] ;... pair of 2^(42)-1+2^(-7)
|
|
movapd xmm6, QWORD PTR [hi_mask] ;... pair of 7FFFFFFF8000..00
|
|
andpd xmm0,xmm1
|
|
orpd xmm0,xmm3 ;... [Y|Y]
|
|
addpd xmm4,xmm0 ;... 11 lsb contains the index to CB
|
|
;... the last 4 lsb are don't cares, the
|
|
;... 7 bits following that is the index
|
|
;... Hence by masking, we already have index*16
|
|
|
|
pextrw eax,xmm4,0
|
|
and eax,000007F0H ;... eax is offset
|
|
movapd xmm4, QWORD PTR [eax+CB_Tbl] ;... [CB|CB]
|
|
movapd xmm7, QWORD PTR [eax+T_Tbl]
|
|
|
|
andpd xmm6,xmm0 ;... [Y_hi|Y_hi]
|
|
subpd xmm0,xmm6 ;... [Y_lo|Y_lo]
|
|
mulpd xmm6,xmm4 ;... [CB*Y_hi|CB*Y_hi]
|
|
subpd xmm6,xmm2 ;... [R_hi|R_hi]
|
|
addsd xmm7,xmm6 ;... [T_lo|T_hi+R_hi]
|
|
mulpd xmm0,xmm4 ;... [R_lo|R_lo]
|
|
movapd xmm4,xmm0 ;... [R_lo|R_lo]
|
|
addpd xmm0,xmm6 ;... [R|R]
|
|
|
|
;-----------------------------------------;
|
|
;--Approx and Reconstruction in parallel--;
|
|
;-----------------------------------------;
|
|
|
|
;...m is in ecx, [T_lo,T_hi+R_hi] in xmm7
|
|
;...xmm4 through xmm6 will be used
|
|
and ecx,00000FFFH ;... note we need sign and biased exponent
|
|
sub ecx,1
|
|
cmp ecx,2045 ;... the largest biased exponent 2046-1
|
|
;... if ecx is ABOVE (unsigned) this, either
|
|
;... the sign is +ve and biased exponent is 7FF
|
|
;... or the sign is +ve and exponent is 0, or
|
|
;... the sign is -ve (i.e. sign bit 1)
|
|
ja SPECIAL_CASES
|
|
|
|
sub ecx,1022 ;... m in integer format
|
|
add ecx,edx ;... this is the denormal adjustment
|
|
|
|
cvtsi2sd xmm6,ecx
|
|
unpcklpd xmm6,xmm6 ;... [m | m] in FP format
|
|
|
|
shl ecx,10
|
|
add eax,ecx ;16*(64*m + j) 0 <=> (m=-1 & j=64) or (m=0 & j=0)
|
|
mov ecx,16
|
|
mov edx,0
|
|
cmp eax,0
|
|
cmove edx,ecx ;this is the index into the mask table (place_{L,R})
|
|
|
|
movapd xmm1, QWORD PTR [coeff] ;... loading [p3|p6]
|
|
movapd xmm3,xmm0
|
|
movapd xmm2, QWORD PTR [coeff+16] ;... loading [p2|p5]
|
|
mulpd xmm1,xmm0 ;... [p3 R | p6 R]
|
|
mulpd xmm3,xmm3 ;... [R^2|R^2]
|
|
addpd xmm1,xmm2 ;... [p2+p3 R |p5+p6 R]
|
|
movapd xmm2, QWORD PTR [coeff+32] ;... [p1|p4]
|
|
mulsd xmm3,xmm3 ;... [R^2|R^4]
|
|
|
|
movapd xmm5, QWORD PTR [LOG10_2] ;... loading [L_lo|L_hi]
|
|
;... [T_lo|T_hi+R_hi] already in xmm7
|
|
mulpd xmm6,xmm5 ;... [m L_lo | m L_hi]
|
|
movapd xmm5, QWORD PTR [edx+place_L] ;... [FF..FF|00.00] or [00..00|FF..FF]
|
|
andpd xmm4,xmm5 ;... [R_lo|0] or [0|R_lo]
|
|
addpd xmm7,xmm6 ;... [W_lo|W_hi]
|
|
addpd xmm7,xmm4 ;... [A_lo|A_hi]
|
|
|
|
mulpd xmm1,xmm0 ;... [p2 R+p3 R^2|p5 R+p6 R^2]
|
|
mulsd xmm3,xmm0 ;... [R^2|R^5]
|
|
addpd xmm1,xmm2 ;... [p1+.. | p4+...]
|
|
movapd xmm2, QWORD PTR [coeff+48] ;... [0|p0]
|
|
mulpd xmm2,xmm0 ;... [0|p0 R]
|
|
|
|
|
|
movapd xmm6,xmm7
|
|
unpckhpd xmm6,xmm6 ;... [*|A_lo]
|
|
|
|
mulpd xmm1,xmm3 ;... [P_hi|P_lo]
|
|
sub esp, 16
|
|
movapd xmm0,xmm1 ;... copy of [P_hi|P_lo]
|
|
addpd xmm1,xmm2 ;... [P_hi|P_lo]
|
|
unpckhpd xmm0,xmm0 ;... [P_hi|P_hi]
|
|
|
|
;...[P_hi|P_lo] in xmm1 at this point
|
|
addsd xmm0,xmm1 ;... [*|P]
|
|
addsd xmm0,xmm6
|
|
addsd xmm0,xmm7
|
|
|
|
movlpd QWORD PTR [esp+4], xmm0 ; return result
|
|
fld QWORD PTR [esp+4] ;
|
|
add esp, 16
|
|
ret
|
|
|
|
SPECIAL_CASES:
|
|
movlpd xmm0, QWORD PTR [4+esp] ;... load X again
|
|
movapd xmm1, QWORD PTR [Zero]
|
|
$cmpsd xmm1,xmm0,0
|
|
pextrw eax,xmm1,0 ;... ones if X = +-0.0
|
|
cmp eax,0
|
|
ja INPUT_ZERO
|
|
|
|
cmp ecx,-1 ;... ecx = -1 iff X is positive denormal
|
|
je INPUT_DENORM
|
|
|
|
cmp ecx,000007FEH
|
|
ja INPUT_NEGATIVE
|
|
|
|
movlpd xmm0, QWORD PTR [4+esp]
|
|
movapd xmm1, QWORD PTR [emask]
|
|
movapd xmm2, QWORD PTR [One]
|
|
andpd xmm0,xmm1
|
|
orpd xmm0,xmm2 ;... xmm0 is 1 iff the input argument was +inf
|
|
$cmpsd xmm2,xmm0,0
|
|
pextrw eax,xmm2,0 ;... 0 if X is NaN
|
|
cmp eax, 0
|
|
je INPUT_NaN
|
|
|
|
INPUT_INF:
|
|
;....Input is +Inf
|
|
fld QWORD PTR [Infs+8] ;
|
|
ret
|
|
|
|
INPUT_NaN:
|
|
|
|
; movlpd xmm0, [esp+4]
|
|
; addsd xmm0, xmm0
|
|
; sub esp, 16
|
|
; movlpd [esp+4], xmm0
|
|
; fld QWORD PTR [esp+4]
|
|
; add esp, 16
|
|
; ret
|
|
mov edx, 1001
|
|
jmp CALL_LIBM_ERROR
|
|
|
|
INPUT_ZERO:
|
|
|
|
; raise Divide by Zero
|
|
movlpd xmm2, QWORD PTR [One]
|
|
divsd xmm2, xmm0
|
|
movlpd xmm1, QWORD PTR [Infs]
|
|
mov edx, 8
|
|
jmp CALL_LIBM_ERROR
|
|
|
|
INPUT_DENORM:
|
|
|
|
;....check for zero or denormal
|
|
;....for now I assume this is simply denormal
|
|
;....in reality, we need to check for zero and handle appropriately
|
|
|
|
movlpd xmm1,Two52
|
|
mulsd xmm0,xmm1
|
|
mov edx,-52 ;...set adjustment to exponent
|
|
jmp DENORMAL_RETRY ;...branch back
|
|
|
|
|
|
INPUT_NEGATIVE:
|
|
|
|
add ecx,1
|
|
and ecx, 7ffH
|
|
cmp ecx, 7ffH
|
|
jae NEG_INF_NAN
|
|
|
|
NEG_NORMAL_INFINITY:
|
|
|
|
; xmm1=0
|
|
xorpd xmm1, xmm1
|
|
; raise Invalid
|
|
divsd xmm1, xmm1
|
|
mov edx, 9
|
|
|
|
CALL_LIBM_ERROR:
|
|
|
|
;call libm_error_support(void *arg1,void *arg2,void *retval,error_types input_tag)
|
|
sub esp, 28
|
|
movlpd QWORD PTR [esp+16], xmm1
|
|
mov DWORD PTR [esp+12],edx
|
|
mov edx, esp
|
|
add edx,16
|
|
mov DWORD PTR [esp+8],edx
|
|
add edx,16
|
|
mov DWORD PTR [esp+4],edx
|
|
mov DWORD PTR [esp],edx
|
|
call NEAR PTR __libm_error_support
|
|
fld QWORD PTR [esp+16] ;
|
|
add esp,28
|
|
ret
|
|
|
|
|
|
|
|
NEG_INF_NAN:
|
|
|
|
movlpd xmm2, QWORD PTR [esp+4]
|
|
movlpd xmm0, QWORD PTR [esp+4]
|
|
movd eax, xmm2
|
|
psrlq xmm2, 32
|
|
movd ecx, xmm2
|
|
and ecx, 0fffffH ; eliminate sign/exponent
|
|
or eax, ecx
|
|
cmp eax,0
|
|
jz NEG_NORMAL_INFINITY ; negative infinity
|
|
|
|
; addsd xmm0, xmm0
|
|
; sub esp,16
|
|
; movlpd QWORD PTR [esp+4], xmm0
|
|
; fld QWORD PTR [esp+4]
|
|
; add esp, 16
|
|
; ret
|
|
mov edx, 1001
|
|
jmp CALL_LIBM_ERROR
|
|
|
|
|
|
_CIlog10_pentium4 ENDP
|
|
|
|
ALIGN 16
|
|
_TEXT ENDS
|
|
|
|
END
|
|
|