Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

299 lines
12 KiB

;/* File: "atan_wmt.asm". */
;//
;// INTEL CORPORATION PROPRIETARY INFORMATION
;// This software is supplied under the terms of a license agreement or
;// nondisclosure agreement with Intel Corporation and may not be copied
;// or disclosed except in accordance with the terms of that agreement.
;// Copyright (c) 2000 Intel Corporation. All Rights Reserved.
;//
;//
;// Contents: atan.
;//
;// Purpose: Libm
;//
.686P
.387
.XMM
.MODEL FLAT,C
EXTRN C __libm_error_support : NEAR
CONST SEGMENT PARA PUBLIC USE32 'CONST'
ALIGN 16
EXTRN C _atan_table:QWORD
_atn TEXTEQU <_atan_table>
;/*
;// FUNCTION: double atan(double x)
;//
;// DESCRIPTION:
;//
;// 1. For |x| < 2^(-27), where atan(x) ~= x, return x.
;// 2. For |x| >= 0.1633123935319536975596774e+17, where atan(x) ~= +-Pi/2, return +-Pi/2.
;// 3. In interval [0.0,0.03125] polynomial approximation of atan(x)=x-x*P(x^2).
;// 4. In interval [0.03125,0.375] polynomial approximation of atan(x)=x-x*D(x^2).
;// 5. In interval [0.375,8.0] we compute ind and eps such, that x=0.03125*ind+eps and 0.0<eps<0.03125.
;// Let s=0.03125*ind, then atan(x)=atan(s)+atan(t), where t=((x-s)/(1+x*s)). For lo and hi part of
;// atan(s) we have table (see file atan_table.c): atn[ind]+atn[ind+1]=atan(s).
;// atan(t) is approximated atan(t)=t-t*P(t^2).
;// 6. In interval [8.0,0.1633123935319536975596774e+17] atan(x)=Pi/2+atan(-1/x).
;// atan(-1/x) is approximated atan(t)=t-t*P(t^2), where t=-1/x.
;// 7. For x < 0.0 atan(x) = -atan(|x|).
;// 8. Special cases:
;// atan(+0) = +0;
;// atan(-0) = -0;
;// atan(+INF) = +Pi/2;
;// atan(-INF) = -Pi/2;
;// atan(NaN) = NaN.
;//
;// KEYS OF COMPILER: -c -w -Zl -Di386 /QIfdiv-
;*/
_mexp DQ 07ff0000000000000H, 07ff0000000000000H
_mabs DQ 07fffffffffffffffH, 07fffffffffffffffH
_pi_2d DQ 03ff921fb54442d18H, 0bff921fb54442d18H
_cntshf DQ 00000000000040201H, 00000000000040201H
_d1400 DQ 03fd5555555555552H, 00000000000000000H
_d1213 DQ 03fc249249246aa76H, 0bfc99999999992acH
_d1011 DQ 03fb745d15933de8aH, 0bfbc71c71b835923H
_d89 DQ 03fb110f5eeb76ecaH, 0bfb3b1390a3b9899H
_d67 DQ 03faae4492fe3a600H, 0bfae1c1704144b68H
_d45 DQ 03fa51fa164891abeH, 0bfa8171d55d53138H
_d23 DQ 03f974721481ca2a2H, 0bfa124ce2388f2cbH
_d01 DQ 03f66107c30e0b8a5H, 0bf866e5652b14bbdH
_p60 DQ 03fd55555555554ebH, 00000000000000000H
_p45 DQ 03fc249249014497eH, 0bfc9999999976718H
_p23 DQ 03fb7453ba342480fH, 0bfbc71c4eebfb10eH
_p01 DQ 03fae9be97b0f8d08H, 0bfb39ad683f878c6H
_zero DQ 00000000000000000H, 00000000000000000H
_onen DQ 0bff0000000000000H, 0bff0000000000000H
_one DQ 03ff0000000000000H, 03ff0000000000000H
_cnst8 DQ 04020000000000000H, 04020000000000000H
_in3 DQ 04020000000000000H, 04020000000000000H
_in2 DQ 03fd8000000000000H, 03fd8000000000000H
_in1 DQ 03fa0000000000000H, 03fa0000000000000H
_in0 DQ 03e40000000000000H, 03e40000000000000H
_in DQ 0434d02967c31cdb5H, 0434d02967c31cdb5H
_minval DQ 00010000000000000H, 00010000000000000H
libm_small DQ 00200000000000000H
CONST ENDS
_x TEXTEQU <esp+4>
XMMWORD TEXTEQU <OWORD>
_TEXT SEGMENT PARA PUBLIC USE32 'CODE'
ALIGN 4
PUBLIC C _atan_pentium4, _CIatan_pentium4
_CIatan_pentium4 PROC NEAR
push ebp
mov ebp, esp
sub esp, 8 ; for argument DBLSIZE
and esp, 0fffffff0h
fstp qword ptr [esp]
movq xmm7, qword ptr [esp]
call start
leave
ret
_atan_pentium4 label proc
movq xmm7, QWORD PTR [_x] ; x
start:
unpcklpd xmm7, xmm7
movapd xmm2, xmm7
andpd xmm2, XMMWORD PTR _mabs ; |x|
comisd xmm2, XMMWORD PTR _in ; |x| < 0.1633123935319536975596774e+17 ?
jp x_nan
jae bigx
comisd xmm2, XMMWORD PTR _in1 ; |x| < 0.03125 ?
jae xge0_03125
comisd xmm2, XMMWORD PTR _in0 ; |x| < 2^(-27) ?
jb retx ; atan(x) ~= x
; 2^(-27) < |x| < 0.03125, atan(x)=x-x*P(x^2)
movapd xmm1, xmm2
mulpd xmm1, xmm2 ; |x|^2
movapd xmm3, xmm1
mulpd xmm3, xmm1 ; |x|^4
movapd xmm5, XMMWORD PTR _p01 ; calculate P(x^2)
mulpd xmm5, xmm3
addpd xmm5, XMMWORD PTR _p23
mulpd xmm5, xmm3
addpd xmm5, XMMWORD PTR _p45
mulpd xmm5, xmm3
addpd xmm5, XMMWORD PTR _p60
mulsd xmm5, xmm1
movapd xmm3, xmm5
shufpd xmm3, xmm3, 1
addsd xmm5, xmm3 ; P(x^2)
mulsd xmm5, xmm7 ; x * P(x^2)
subsd xmm7, xmm5 ; x - x * P(x^2)
movq QWORD PTR [_x], xmm7
fld QWORD PTR [_x]
ret
xge0_03125: ; |x| >= 0.03125
comisd xmm2, XMMWORD PTR _in2 ; |x| < 0.375 ?
jae xge0_375
; 0.03125 < |x| < 0.375, atan(x)=x-x*D(x^2)
movapd xmm1, xmm2
mulpd xmm1, xmm2 ; |x|^2
movapd xmm3, xmm1
mulpd xmm3, xmm1 ; |x|^4
movapd xmm5, XMMWORD PTR _d01 ; calculate D(x^2)
mulpd xmm5, xmm3
addpd xmm5, XMMWORD PTR _d23
mulpd xmm5, xmm3
addpd xmm5, XMMWORD PTR _d45
mulpd xmm5, xmm3
addpd xmm5, XMMWORD PTR _d67
mulpd xmm5, xmm3
addpd xmm5, XMMWORD PTR _d89
mulpd xmm5, xmm3
addpd xmm5, XMMWORD PTR _d1011
mulpd xmm5, xmm3
addpd xmm5, XMMWORD PTR _d1213
mulpd xmm5, xmm3
addpd xmm5, XMMWORD PTR _d1400
mulsd xmm5, xmm1
movapd xmm3, xmm5
shufpd xmm3, xmm3, 1
addsd xmm5, xmm3 ; D(x^2)
mulsd xmm5, xmm7 ; x * D(x^2)
subsd xmm7, xmm5 ; x - x * D(x^2)
movq QWORD PTR [_x], xmm7
fld QWORD PTR [_x]
ret
xge0_375: ; |x| >= 0.375
movq xmm6, xmm7 ; x
xorpd xmm6, xmm2 ; sign x
comisd xmm2, XMMWORD PTR _in3 ; |x| < 8.0 ?
jae xge8_0
; 0.375 < |x| < 8.0:
; atan(|x|)=atan(s)+atan(t), s=ind*0.03125, t=(|x|-s)/(1+|x|*s)
movq xmm0, XMMWORD PTR _cnst8
movq xmm5, XMMWORD PTR _cntshf
movq xmm3, xmm2 ; calculate ind
addsd xmm3, xmm0
psrlq xmm3, 44
psubd xmm3, xmm5
movd eax, xmm3 ; ind
lea eax, DWORD PTR [eax+eax*2] ; ind*3
movq xmm5, QWORD PTR _atn[eax*8+16] ; s
movq xmm3, xmm2 ; |x|
subsd xmm2, xmm5 ; |x|-s
mulsd xmm3, xmm5 ; |x|*s
addsd xmm3, XMMWORD PTR _one ; 1+|x|*s
divsd xmm2, xmm3 ; (|x|-s)/(1+|x|*s)
unpcklpd xmm2, xmm2
jmp clcpol
xge8_0: ; |x| > 8.0
; 8.0 < |x| < 0.1633123935319536975596774e+17:
; atan(|x|)=Pi/2+atan(-1/|x|)
mov eax, 768 ; ind*3 - entry point in table, where lo and hi part of Pi/2
movq xmm0, xmm2 ; |x|
movq xmm2, XMMWORD PTR _onen
divsd xmm2, xmm0 ;-1/|x|
unpcklpd xmm2, xmm2
clcpol:
movq xmm0, QWORD PTR _atn[0+eax*8] ; atn[ind+0] - hi part of atan(s) or Pi/2
movq xmm4, QWORD PTR _atn[8+eax*8] ; atn[ind+1] - lo part of atan(s) or Pi/2
movapd xmm1, xmm2
mulpd xmm1, xmm2 ; |x|^2
movapd xmm3, xmm1
mulpd xmm3, xmm1 ; |x|^4
movapd xmm5, XMMWORD PTR _p01 ; calculate P(x^2)
mulpd xmm5, xmm3
addpd xmm5, XMMWORD PTR _p23
mulpd xmm5, xmm3
addpd xmm5, XMMWORD PTR _p45
mulpd xmm5, xmm3
addpd xmm5, XMMWORD PTR _p60
mulsd xmm5, xmm1
movapd xmm3, xmm5
shufpd xmm3, xmm3, 1
addsd xmm5, xmm3 ; P(x^2)
; atan(|x|) = atn[ind+0]-((|x|*P(x^2)-atn[ind+1])-|x|)
mulsd xmm5, xmm2 ; |x|*P(x^2)
subsd xmm5, xmm4 ; |x|*P(x^2)-atn[ind+1]
subsd xmm5, xmm2 ; (|x|*P(x^2)-atn[ind+1])-|x|
subsd xmm0, xmm5 ; atn[ind+0]-((|x|*P(x^2)-atn[ind+1])-|x|)
orpd xmm0, xmm6 ; sign x
movq QWORD PTR [_x], xmm0
fld QWORD PTR [_x]
ret
retx: ; |x| < 2^(-27): atan(x) ~= x
comisd xmm2, XMMWORD PTR _zero ; x == 0 ?
jne notzero
fld QWORD PTR [_x] ; x == +0.0 or -0.0
ret
notzero:
comisd xmm2, XMMWORD PTR _minval ; x < minval ?
jae ge_minval
fld QWORD PTR libm_small
fmul QWORD PTR libm_small
sub esp, 8
fstp QWORD PTR [esp] ; should be flag UNDERFLOW
fld QWORD PTR [esp]
add esp, 8
fadd QWORD PTR [_x] ; should be inexact result
ret
ge_minval: ; minval < x < 2^(-27)
fld QWORD PTR libm_small
fmul QWORD PTR libm_small
fadd QWORD PTR [_x] ; should be inexact result
ret
bigx: ; |x| > 0.1633123935319536975596774e+17
movq xmm0, xmm2 ; |x|
movq xmm3, QWORD PTR _mexp
andpd xmm0, xmm3
ucomisd xmm0, xmm3
jp x_nan
mov eax, DWORD PTR [_x+4] ; x
shr eax, 31 ; sign x
fld QWORD PTR libm_small
fadd QWORD PTR _pi_2d[eax*8] ; should be inexact result
ret ; return +-Pi/2
x_nan:
mov edx, 1003
;call libm_error_support(void *arg1,void *arg2,void *retval,error_types input_tag)
sub esp, 16
mov DWORD PTR [esp+12],edx
mov edx, esp
add edx, 16+4
mov DWORD PTR [esp+8],edx
mov DWORD PTR [esp+4],edx
mov DWORD PTR [esp],edx
call NEAR PTR __libm_error_support
add esp, 16
fld QWORD PTR [_x]
ret ; return same nan
ALIGN 4
_CIatan_pentium4 ENDP
_TEXT ENDS
END