You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
460 lines
17 KiB
460 lines
17 KiB
;//
|
|
;// INTEL CORPORATION PROPRIETARY INFORMATION
|
|
;// This software is supplied under the terms of a license agreement or
|
|
;// nondisclosure agreement with Intel Corporation and may not be copied
|
|
;// or disclosed except in accordance with the terms of that agreement.
|
|
;// Copyright (c) 2000 Intel Corporation. All Rights Reserved.
|
|
;//
|
|
;//
|
|
; log_wmt.asm
|
|
;
|
|
; double log(double);
|
|
;
|
|
; Initial version: 12/15/2000
|
|
; Updated with bug fixes: 2/20/2001
|
|
;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; ;;
|
|
;; Another important feature is that we use the table of log(1/B) ;;
|
|
;; throughout. To ensure numerical accuracy, we only need to ensure that ;;
|
|
;; T(0)_hi = B(last)_hi, T(0)_lo = B(last)_lo. This ensures W_hi = 0 and ;;
|
|
;; W_lo = 0 exactly in the case of |X-1| <= 2^(-7). ;;
|
|
;; Finally, we do away with the need for extra-precision addition by the ;;
|
|
;; following observation. The three pieces at the end are ;;
|
|
;; A = W_hi + r_hi; B = r_lo; C = P + W_lo. ;;
|
|
;; When W_hi = W_lo = 0, the addition sequence (A+B) + C is accurate as ;;
|
|
;; the sum A+B is exact. ;;
|
|
;; Otherwise, A + (B+C) is accurate as B is going to be largely shifted ;;
|
|
;; off compared to the final result. ;;
|
|
;; Hence if we use compare and mask operations to ;;
|
|
;; create alpha = (r_lo or 0), beta = (0 or r_lo), Res_hi <- W_hi+alpha, ;;
|
|
;; Res_lo <- C + beta, then result is accurately computed as ;;
|
|
;; Res_hi+Res_lo. ;;
|
|
;; ;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
.686P
|
|
.387
|
|
.XMM
|
|
.MODEL FLAT,C
|
|
|
|
EXTRN C __libm_error_support : NEAR
|
|
|
|
|
|
CONST SEGMENT PARA PUBLIC USE32 'CONST'
|
|
ALIGN 16
|
|
|
|
emask DQ 000FFFFFFFFFFFFFH, 000FFFFFFFFFFFFFH ; mask off sign/expo field
|
|
Magic DQ 428FFFFFFFFFF80FH, 428FFFFFFFFFF80FH ; 2^(42)-1+2^(-7)
|
|
hi_mask DQ 7FFFFFFFFFE00000H, 7FFFFFFFFFE00000H ; mask of bottom 21 bits
|
|
LOG_2 DQ 3FE62E42FEFA3800H, 3D2EF35793C76730H ; L_hi,L_lo -> [L_lo|L_hi]
|
|
place_L DQ 0000000000000000H,0FFFFFFFFFFFFFFFFH ; 0,1 -> [FF..FF|00..00]
|
|
DQ 0FFFFFFFFFFFFFFFFH, 0000000000000000H ; 1,0 -> [00..00|FF..FF]
|
|
One DQ 3ff0000000000000H, 3ff0000000000000H ; 1,1
|
|
Zero DQ 0000000000000000H, 0000000000000000H ; 0,0
|
|
Two52 DQ 4330000000000000H, 4330000000000000H ; 2^52 for normalization
|
|
Infs DQ 0FFF0000000000000H, 7FF0000000000000H ; -inf,+inf --> [+inf|-inf]
|
|
NaN DQ 7FF0000000000001H, 7FF0000000000001H ; NaN for log(-ve), log(Nan)
|
|
|
|
coeff DQ 3FC24998090DC555H, 0BFCFFFFFFF201E13H ; p6,p3 ->[p3|p6]
|
|
DQ 0BFC555C54DD57D75H, 3FD55555555555A7H ; p5,p2 ->[p2|p5]
|
|
DQ 3FC9999998867A53H, 0BFE000000000001CH ; p4,p1 ->[p1|p4]
|
|
|
|
;-------Table B-----------
|
|
B_Tbl DQ 3FF0000000000000H, 3FF0000000000000H
|
|
DQ 3FEF820000000000H, 3FEF820000000000H
|
|
DQ 3FEF080000000000H, 3FEF080000000000H
|
|
DQ 3FEE920000000000H, 3FEE920000000000H
|
|
DQ 3FEE1E0000000000H, 3FEE1E0000000000H
|
|
DQ 3FEDAE0000000000H, 3FEDAE0000000000H
|
|
DQ 3FED420000000000H, 3FED420000000000H
|
|
DQ 3FECD80000000000H, 3FECD80000000000H
|
|
DQ 3FEC720000000000H, 3FEC720000000000H
|
|
DQ 3FEC0E0000000000H, 3FEC0E0000000000H
|
|
DQ 3FEBAC0000000000H, 3FEBAC0000000000H
|
|
DQ 3FEB4E0000000000H, 3FEB4E0000000000H
|
|
DQ 3FEAF20000000000H, 3FEAF20000000000H
|
|
DQ 3FEA980000000000H, 3FEA980000000000H
|
|
DQ 3FEA420000000000H, 3FEA420000000000H
|
|
DQ 3FE9EC0000000000H, 3FE9EC0000000000H
|
|
DQ 3FE99A0000000000H, 3FE99A0000000000H
|
|
DQ 3FE9480000000000H, 3FE9480000000000H
|
|
DQ 3FE8FA0000000000H, 3FE8FA0000000000H
|
|
DQ 3FE8AC0000000000H, 3FE8AC0000000000H
|
|
DQ 3FE8620000000000H, 3FE8620000000000H
|
|
DQ 3FE8180000000000H, 3FE8180000000000H
|
|
DQ 3FE7D00000000000H, 3FE7D00000000000H
|
|
DQ 3FE78A0000000000H, 3FE78A0000000000H
|
|
DQ 3FE7460000000000H, 3FE7460000000000H
|
|
DQ 3FE7020000000000H, 3FE7020000000000H
|
|
DQ 3FE6C20000000000H, 3FE6C20000000000H
|
|
DQ 3FE6820000000000H, 3FE6820000000000H
|
|
DQ 3FE6420000000000H, 3FE6420000000000H
|
|
DQ 3FE6060000000000H, 3FE6060000000000H
|
|
DQ 3FE5CA0000000000H, 3FE5CA0000000000H
|
|
DQ 3FE58E0000000000H, 3FE58E0000000000H
|
|
DQ 3FE5560000000000H, 3FE5560000000000H
|
|
DQ 3FE51E0000000000H, 3FE51E0000000000H
|
|
DQ 3FE4E60000000000H, 3FE4E60000000000H
|
|
DQ 3FE4B00000000000H, 3FE4B00000000000H
|
|
DQ 3FE47A0000000000H, 3FE47A0000000000H
|
|
DQ 3FE4460000000000H, 3FE4460000000000H
|
|
DQ 3FE4140000000000H, 3FE4140000000000H
|
|
DQ 3FE3E20000000000H, 3FE3E20000000000H
|
|
DQ 3FE3B20000000000H, 3FE3B20000000000H
|
|
DQ 3FE3820000000000H, 3FE3820000000000H
|
|
DQ 3FE3520000000000H, 3FE3520000000000H
|
|
DQ 3FE3240000000000H, 3FE3240000000000H
|
|
DQ 3FE2F60000000000H, 3FE2F60000000000H
|
|
DQ 3FE2CA0000000000H, 3FE2CA0000000000H
|
|
DQ 3FE29E0000000000H, 3FE29E0000000000H
|
|
DQ 3FE2740000000000H, 3FE2740000000000H
|
|
DQ 3FE24A0000000000H, 3FE24A0000000000H
|
|
DQ 3FE2200000000000H, 3FE2200000000000H
|
|
DQ 3FE1F80000000000H, 3FE1F80000000000H
|
|
DQ 3FE1D00000000000H, 3FE1D00000000000H
|
|
DQ 3FE1A80000000000H, 3FE1A80000000000H
|
|
DQ 3FE1820000000000H, 3FE1820000000000H
|
|
DQ 3FE15C0000000000H, 3FE15C0000000000H
|
|
DQ 3FE1360000000000H, 3FE1360000000000H
|
|
DQ 3FE1120000000000H, 3FE1120000000000H
|
|
DQ 3FE0EC0000000000H, 3FE0EC0000000000H
|
|
DQ 3FE0CA0000000000H, 3FE0CA0000000000H
|
|
DQ 3FE0A60000000000H, 3FE0A60000000000H
|
|
DQ 3FE0840000000000H, 3FE0840000000000H
|
|
DQ 3FE0620000000000H, 3FE0620000000000H
|
|
DQ 3FE0420000000000H, 3FE0420000000000H
|
|
DQ 3FE0200000000000H, 3FE0200000000000H
|
|
DQ 3FE0000000000000H, 3FE0000000000000H
|
|
|
|
;-------Table T_hi,T_lo so that movapd gives [ T_lo | T_hi ]
|
|
T_Tbl DQ 0000000000000000H, 0000000000000000H
|
|
DQ 3F8FBEA8B13C0000H, 3CDEC927B17E4E13H
|
|
DQ 3F9F7A9B16780000H, 3D242AD9271BE7D7H
|
|
DQ 3FA766D923C20000H, 3D1FF0A82F1C24C1H
|
|
DQ 3FAF0C30C1114000H, 3D31A88653BA4140H
|
|
DQ 3FB345179B63C000H, 3D3D4203D36150D0H
|
|
DQ 3FB6EF528C056000H, 3D24573A51306A44H
|
|
DQ 3FBA956D3ECAC000H, 3D3E63794C02C4AFH
|
|
DQ 3FBE2507702AE000H, 3D303B433FD6EEDCH
|
|
DQ 3FC0D79E7CD48000H, 3D3CB422847849E4H
|
|
DQ 3FC299D30C606000H, 3D3D4D0079DC08D9H
|
|
DQ 3FC44F8B726F8000H, 3D3DF6A4432B9BB4H
|
|
DQ 3FC601B076E7A000H, 3D3152D7D4DFC8E5H
|
|
DQ 3FC7B00916515000H, 3D146280D3E606A3H
|
|
DQ 3FC9509AA0044000H, 3D3F1E675B4D35C6H
|
|
DQ 3FCAF6895610D000H, 3D375BEBBA042B64H
|
|
DQ 3FCC8DF7CB9A8000H, 3D3EEE42F58E1E6EH
|
|
DQ 3FCE2A877A6B2000H, 3D3823817787081AH
|
|
DQ 3FCFB7D86EEE3000H, 3D371FCF1923FB43H
|
|
DQ 3FD0A504E97BB000H, 3D303094E6690C44H
|
|
DQ 3FD1661CAECB9800H, 3D2D1C000C076A8BH
|
|
DQ 3FD22981FBEF7800H, 3D17AF7A7DA9FC99H
|
|
DQ 3FD2E9E2BCE12000H, 3D24300C128D1DC2H
|
|
DQ 3FD3A71C56BB4800H, 3D08C46FB5A88483H
|
|
DQ 3FD4610BC29C5800H, 3D385F4D833BCDC7H
|
|
DQ 3FD51D1D93104000H, 3D35B0FAA20D9C8EH
|
|
DQ 3FD5D01DC49FF000H, 3D2740AB8CFA5ED3H
|
|
DQ 3FD68518244CF800H, 3D28722FF88BF119H
|
|
DQ 3FD73C1800DC0800H, 3D3320DBF75476C0H
|
|
DQ 3FD7E9883FA49800H, 3D3FAFF96743F289H
|
|
DQ 3FD898D38A893000H, 3D31F666071E2F57H
|
|
DQ 3FD94A0428036000H, 3D30E7BCB08C6B44H
|
|
DQ 3FD9F123F4BF6800H, 3D36892015F2401FH
|
|
DQ 3FDA99FCABDB8000H, 3D11E89C5F87A311H
|
|
DQ 3FDB44977C148800H, 3D3C6A343FB526DBH
|
|
DQ 3FDBEACD9E271800H, 3D268A6EDB879B51H
|
|
DQ 3FDC92B7D6BB0800H, 3D10FE9FFF876CC2H
|
|
DQ 3FDD360E90C38000H, 3D342CDB58440FD6H
|
|
DQ 3FDDD4AA04E1C000H, 3D32D8512DF01AFDH
|
|
DQ 3FDE74D262788800H, 3CFEB945ED9457BCH
|
|
DQ 3FDF100F6C2EB000H, 3D2CCE779D37F3D8H
|
|
DQ 3FDFACC89C9A9800H, 3D163E0D100EC76CH
|
|
DQ 3FE02582A5C9D000H, 3D222C6C4E98E18CH
|
|
DQ 3FE0720E5C40DC00H, 3D38E27400B03FBEH
|
|
DQ 3FE0BF52E7353800H, 3D19B5899CD387D3H
|
|
DQ 3FE109EB9E2E4C00H, 3D12DA67293E0BE7H
|
|
DQ 3FE15533D3B8D400H, 3D3D981CA8B0D3C3H
|
|
DQ 3FE19DB6BA0BA400H, 3D2B675885A4A268H
|
|
DQ 3FE1E6DF676FF800H, 3D1A58BA81B983AAH
|
|
DQ 3FE230B0D8BEBC00H, 3D12FC066E48667BH
|
|
DQ 3FE2779E1EC93C00H, 3D36523373359B79H
|
|
DQ 3FE2BF29F9841C00H, 3CFD8A3861D3B7ECH
|
|
DQ 3FE30757344F0C00H, 3D309BE85662F034H
|
|
DQ 3FE34C80A8958000H, 3D1D4093FCAC34BDH
|
|
DQ 3FE39240DDE5CC00H, 3D3493DBEAB758B3H
|
|
DQ 3FE3D89A6B1A5400H, 3D28C7CD5FA81E3EH
|
|
DQ 3FE41BCFF4860000H, 3D076FD6B90E2A84H
|
|
DQ 3FE4635BCF40DC00H, 3D2CE8D5D412CAADH
|
|
DQ 3FE4A3E862342400H, 3D224FA993F78464H
|
|
DQ 3FE4E8D015786C00H, 3D38B1C0D0303623H
|
|
DQ 3FE52A6D269BC400H, 3D30022268F689C9H
|
|
DQ 3FE56C91D71CF800H, 3CE07BAFD1366E9EH
|
|
DQ 3FE5AB505B390400H, 3CD5627AF66563FAH
|
|
DQ 3FE5EE82AA241800H, 3D2202380CDA46BEH
|
|
DQ 3FE62E42FEFA3800H, 3D2EF35793C76730H
|
|
|
|
ALIGN 16
|
|
CONST ENDS
|
|
|
|
$cmpsd MACRO op1, op2, op3
|
|
LOCAL begin_cmpsd, end_cmpsd
|
|
begin_cmpsd:
|
|
cmppd op1, op2, op3
|
|
end_cmpsd:
|
|
org begin_cmpsd
|
|
db 0F2h
|
|
org end_cmpsd
|
|
ENDM
|
|
|
|
|
|
_TEXT SEGMENT PARA PUBLIC USE32 'CODE'
|
|
ALIGN 16
|
|
|
|
PUBLIC _log_pentium4, _CIlog_pentium4
|
|
_CIlog_pentium4 PROC NEAR
|
|
push ebp
|
|
mov ebp, esp
|
|
sub esp, 8 ; for argument DBLSIZE
|
|
and esp, 0fffffff0h
|
|
fstp qword ptr [esp]
|
|
movq xmm0, qword ptr [esp]
|
|
call start
|
|
leave
|
|
ret
|
|
|
|
;----------------------;
|
|
;--Argument Reduction--;
|
|
;----------------------;
|
|
_log_pentium4 label proc
|
|
movlpd xmm0, QWORD PTR [4+esp] ;... load X to low part of xmm0
|
|
start:
|
|
mov edx,0 ;... set edx to 0
|
|
|
|
DENORMAL_RETRY:
|
|
|
|
movapd xmm5,xmm0
|
|
unpcklpd xmm0,xmm0 ;... [X|X]
|
|
|
|
psrlq xmm5,52
|
|
pextrw ecx,xmm5,0
|
|
|
|
movapd xmm1, QWORD PTR [emask] ;... pair of 000FF...FF
|
|
movapd xmm3, QWORD PTR [One] ;... pair of 3FF000...000
|
|
movapd xmm4, QWORD PTR [Magic] ;... pair of 2^(42)-1+2^(-7)
|
|
movapd xmm6, QWORD PTR [hi_mask] ;... pair of 7FFFFFFF..FE00000
|
|
andpd xmm0,xmm1
|
|
orpd xmm0,xmm3 ;... [Y|Y]
|
|
addpd xmm4,xmm0 ;... 11 lsb contains the index to B
|
|
;... the last 4 lsb are don't cares, the
|
|
;... 7 bits following that is the index
|
|
;... Hence by masking, we already have index*16
|
|
|
|
pextrw eax,xmm4,0
|
|
and eax,000007F0H ;... eax is offset
|
|
movapd xmm4, QWORD PTR [eax+B_Tbl] ;... [B|B]
|
|
movapd xmm7, QWORD PTR [eax+T_Tbl]
|
|
|
|
andpd xmm6,xmm0 ;... [Y_hi|Y_hi]
|
|
subpd xmm0,xmm6 ;... [Y_lo|Y_lo]
|
|
mulpd xmm6,xmm4 ;... [B*Y_hi|B*Y_hi]
|
|
subpd xmm6,xmm3 ;... [R_hi|R_hi]
|
|
addsd xmm7,xmm6 ;... [T_lo|T_hi+R_hi]
|
|
mulpd xmm0,xmm4 ;... [R_lo|R_lo]
|
|
movapd xmm4,xmm0 ;... [R_lo|R_lo]
|
|
addpd xmm0,xmm6 ;... [R|R]
|
|
|
|
;-----------------------------------------;
|
|
;--Approx and Reconstruction in parallel--;
|
|
;-----------------------------------------;
|
|
|
|
;...m is in ecx, [T_lo,T_hi+R_hi] in xmm7
|
|
;...xmm4 through xmm6 will be used
|
|
and ecx,00000FFFH ;... note we need sign and biased exponent
|
|
sub ecx,1
|
|
cmp ecx,2045 ;... the largest biased exponent 2046-1
|
|
;... if ecx is ABOVE (unsigned) this, either
|
|
;... the sign is +ve and biased exponent is 7FF
|
|
;... or the sign is +ve and exponent is 0, or
|
|
;... the sign is -ve (i.e. sign bit 1)
|
|
ja SPECIAL_CASES
|
|
|
|
sub ecx,1022 ;... m in integer format
|
|
add ecx,edx ;... this is the denormal adjustment
|
|
|
|
cvtsi2sd xmm6,ecx
|
|
unpcklpd xmm6,xmm6 ;... [m | m] in FP format
|
|
|
|
shl ecx,10
|
|
add eax,ecx ;16*(64*m + j) 0 <=> (m=-1 & j=64) or (m=0 & j=0)
|
|
mov ecx,16
|
|
mov edx,0
|
|
cmp eax,0
|
|
cmove edx,ecx ;this is the index into the mask table (place_{L,R})
|
|
|
|
movapd xmm1, QWORD PTR [coeff] ;... loading [p3|p6]
|
|
movapd xmm3,xmm0
|
|
movapd xmm2, QWORD PTR [coeff+16] ;... loading [p2|p5]
|
|
mulpd xmm1,xmm0 ;... [p3 R | p6 R]
|
|
mulpd xmm3,xmm3 ;... [R^2|R^2]
|
|
addpd xmm1,xmm2 ;... [p2+p3 R |p5+p6 R]
|
|
movapd xmm2, QWORD PTR [coeff+32] ;... [p1|p4]
|
|
mulsd xmm3,xmm3 ;... [R^2|R^4]
|
|
|
|
movapd xmm5, QWORD PTR [LOG_2] ;... loading [L_lo|L_hi]
|
|
;... [T_lo|T_hi+R_hi] already in xmm7
|
|
mulpd xmm6,xmm5 ;... [m L_lo | m L_hi]
|
|
movapd xmm5, QWORD PTR [edx+place_L] ;... [FF..FF|00.00] or [00..00|FF..FF]
|
|
andpd xmm4,xmm5 ;... [R_lo|0] or [0|R_lo]
|
|
addpd xmm7,xmm6 ;... [W_lo|W_hi]
|
|
addpd xmm7,xmm4 ;... [A_lo|A_hi]
|
|
|
|
mulpd xmm1,xmm0 ;... [p2 R+p3 R^2|p5 R+p6 R^2]
|
|
mulsd xmm3,xmm0 ;... [R^2|R^5]
|
|
addpd xmm1,xmm2 ;... [p1+.. | p4+...]
|
|
|
|
|
|
movapd xmm6,xmm7
|
|
unpckhpd xmm6,xmm6 ;... [*|A_lo]
|
|
|
|
mulpd xmm1,xmm3 ;... [P_hi|P_lo]
|
|
sub esp, 16
|
|
movapd xmm0,xmm1 ;... copy of [P_hi|P_lo]
|
|
unpckhpd xmm1,xmm1 ;... [P_hi|P_hi]
|
|
|
|
;...[P_hi|P_lo] in xmm1 at this point
|
|
addsd xmm0,xmm1 ;... [*|P]
|
|
addsd xmm0,xmm6
|
|
addsd xmm0,xmm7
|
|
|
|
movlpd QWORD PTR [esp+4], xmm0 ; return result
|
|
fld QWORD PTR [esp+4] ;
|
|
add esp, 16
|
|
ret
|
|
|
|
SPECIAL_CASES:
|
|
movlpd xmm0, QWORD PTR [4+esp] ;... load X again
|
|
movapd xmm1, QWORD PTR [Zero]
|
|
$cmpsd xmm1,xmm0,0
|
|
pextrw eax,xmm1,0 ;... ones if X = +-0.0
|
|
cmp eax,0
|
|
ja INPUT_ZERO
|
|
|
|
cmp ecx,-1 ;... ecx = -1 iff X is positive denormal
|
|
je INPUT_DENORM
|
|
|
|
cmp ecx,000007FEH
|
|
ja INPUT_NEGATIVE
|
|
|
|
movlpd xmm0, QWORD PTR [4+esp]
|
|
movapd xmm1, QWORD PTR [emask]
|
|
movapd xmm2, QWORD PTR [One]
|
|
andpd xmm0,xmm1
|
|
orpd xmm0,xmm2 ;... xmm0 is 1 iff the input argument was +inf
|
|
$cmpsd xmm2,xmm0,0
|
|
pextrw eax,xmm2,0 ;... 0 if X is NaN
|
|
cmp eax, 0
|
|
je INPUT_NaN
|
|
|
|
INPUT_INF:
|
|
|
|
;....Input is +Inf
|
|
fld QWORD PTR [Infs+8] ;
|
|
ret
|
|
|
|
INPUT_NaN:
|
|
|
|
; movlpd xmm0, QWORD PTR [esp+4]
|
|
; addsd xmm0, xmm0
|
|
; sub esp, 16
|
|
; movlpd QWORD PTR [esp+4], xmm0 ; return result
|
|
; fld QWORD PTR [esp+4] ;
|
|
; add esp, 16
|
|
; ret
|
|
mov edx, 1000
|
|
jmp CALL_LIBM_ERROR
|
|
|
|
INPUT_ZERO:
|
|
|
|
; raise Divide by Zero
|
|
movlpd xmm2, QWORD PTR [One]
|
|
divsd xmm2, xmm0
|
|
movlpd xmm1, QWORD PTR [Infs]
|
|
mov edx, 2
|
|
jmp CALL_LIBM_ERROR
|
|
|
|
INPUT_DENORM:
|
|
|
|
;....check for zero or denormal
|
|
;....for now I assume this is simply denormal
|
|
;....in reality, we need to check for zero and handle appropriately
|
|
|
|
movlpd xmm1,Two52
|
|
mulsd xmm0,xmm1
|
|
mov edx,-52 ;...set adjustment to exponent
|
|
jmp DENORMAL_RETRY ;...branch back
|
|
|
|
INPUT_NEGATIVE:
|
|
|
|
add ecx,1
|
|
and ecx, 7ffH
|
|
cmp ecx, 7ffH
|
|
jae NEG_INF_NAN
|
|
|
|
NEG_NORMAL_INFINITY:
|
|
|
|
; xmm1=0
|
|
xorpd xmm1, xmm1
|
|
; raise Invalid
|
|
divsd xmm1, xmm1
|
|
mov edx, 3
|
|
|
|
CALL_LIBM_ERROR:
|
|
|
|
;call libm_error_support(void *arg1,void *arg2,void *retval,error_types input_tag)
|
|
sub esp, 28
|
|
movlpd QWORD PTR [esp+16], xmm1
|
|
mov DWORD PTR [esp+12],edx
|
|
mov edx, esp
|
|
add edx,16
|
|
mov DWORD PTR [esp+8],edx
|
|
add edx,16
|
|
mov DWORD PTR [esp+4],edx
|
|
mov DWORD PTR [esp],edx
|
|
call NEAR PTR __libm_error_support
|
|
; movlpd xmm0, QWORD PTR [esp+16]
|
|
; movlpd QWORD PTR [esp+16], xmm0 ; return result
|
|
fld QWORD PTR [esp+16] ;
|
|
add esp,28
|
|
ret
|
|
|
|
|
|
NEG_INF_NAN:
|
|
|
|
movlpd xmm2, QWORD PTR [esp+4]
|
|
movlpd xmm0, QWORD PTR [esp+4]
|
|
movd eax, xmm2
|
|
psrlq xmm2, 32
|
|
movd ecx, xmm2
|
|
and ecx, 0fffffH ; eliminate sign/exponent
|
|
or eax, ecx
|
|
cmp eax,0
|
|
jz NEG_NORMAL_INFINITY ; negative infinity
|
|
|
|
; addsd xmm0, xmm0
|
|
; sub esp,16
|
|
; movlpd QWORD PTR [esp+4], xmm0
|
|
; fld QWORD PTR [esp+4]
|
|
; add esp, 16
|
|
; ret
|
|
mov edx, 1000
|
|
jmp CALL_LIBM_ERROR
|
|
|
|
|
|
_CIlog_pentium4 ENDP
|
|
|
|
ALIGN 16
|
|
_TEXT ENDS
|
|
|
|
END
|
|
|