You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
299 lines
12 KiB
299 lines
12 KiB
;/* File: "atan_wmt.asm". */
|
|
;//
|
|
;// INTEL CORPORATION PROPRIETARY INFORMATION
|
|
;// This software is supplied under the terms of a license agreement or
|
|
;// nondisclosure agreement with Intel Corporation and may not be copied
|
|
;// or disclosed except in accordance with the terms of that agreement.
|
|
;// Copyright (c) 2000 Intel Corporation. All Rights Reserved.
|
|
;//
|
|
;//
|
|
;// Contents: atan.
|
|
;//
|
|
;// Purpose: Libm
|
|
;//
|
|
|
|
.686P
|
|
.387
|
|
.XMM
|
|
.MODEL FLAT,C
|
|
|
|
EXTRN C __libm_error_support : NEAR
|
|
|
|
CONST SEGMENT PARA PUBLIC USE32 'CONST'
|
|
ALIGN 16
|
|
|
|
EXTRN C _atan_table:QWORD
|
|
|
|
_atn TEXTEQU <_atan_table>
|
|
|
|
;/*
|
|
;// FUNCTION: double atan(double x)
|
|
;//
|
|
;// DESCRIPTION:
|
|
;//
|
|
;// 1. For |x| < 2^(-27), where atan(x) ~= x, return x.
|
|
;// 2. For |x| >= 0.1633123935319536975596774e+17, where atan(x) ~= +-Pi/2, return +-Pi/2.
|
|
;// 3. In interval [0.0,0.03125] polynomial approximation of atan(x)=x-x*P(x^2).
|
|
;// 4. In interval [0.03125,0.375] polynomial approximation of atan(x)=x-x*D(x^2).
|
|
;// 5. In interval [0.375,8.0] we compute ind and eps such, that x=0.03125*ind+eps and 0.0<eps<0.03125.
|
|
;// Let s=0.03125*ind, then atan(x)=atan(s)+atan(t), where t=((x-s)/(1+x*s)). For lo and hi part of
|
|
;// atan(s) we have table (see file atan_table.c): atn[ind]+atn[ind+1]=atan(s).
|
|
;// atan(t) is approximated atan(t)=t-t*P(t^2).
|
|
;// 6. In interval [8.0,0.1633123935319536975596774e+17] atan(x)=Pi/2+atan(-1/x).
|
|
;// atan(-1/x) is approximated atan(t)=t-t*P(t^2), where t=-1/x.
|
|
;// 7. For x < 0.0 atan(x) = -atan(|x|).
|
|
;// 8. Special cases:
|
|
;// atan(+0) = +0;
|
|
;// atan(-0) = -0;
|
|
;// atan(+INF) = +Pi/2;
|
|
;// atan(-INF) = -Pi/2;
|
|
;// atan(NaN) = NaN.
|
|
;//
|
|
;// KEYS OF COMPILER: -c -w -Zl -Di386 /QIfdiv-
|
|
;*/
|
|
|
|
_mexp DQ 07ff0000000000000H, 07ff0000000000000H
|
|
_mabs DQ 07fffffffffffffffH, 07fffffffffffffffH
|
|
_pi_2d DQ 03ff921fb54442d18H, 0bff921fb54442d18H
|
|
_cntshf DQ 00000000000040201H, 00000000000040201H
|
|
_d1400 DQ 03fd5555555555552H, 00000000000000000H
|
|
_d1213 DQ 03fc249249246aa76H, 0bfc99999999992acH
|
|
_d1011 DQ 03fb745d15933de8aH, 0bfbc71c71b835923H
|
|
_d89 DQ 03fb110f5eeb76ecaH, 0bfb3b1390a3b9899H
|
|
_d67 DQ 03faae4492fe3a600H, 0bfae1c1704144b68H
|
|
_d45 DQ 03fa51fa164891abeH, 0bfa8171d55d53138H
|
|
_d23 DQ 03f974721481ca2a2H, 0bfa124ce2388f2cbH
|
|
_d01 DQ 03f66107c30e0b8a5H, 0bf866e5652b14bbdH
|
|
_p60 DQ 03fd55555555554ebH, 00000000000000000H
|
|
_p45 DQ 03fc249249014497eH, 0bfc9999999976718H
|
|
_p23 DQ 03fb7453ba342480fH, 0bfbc71c4eebfb10eH
|
|
_p01 DQ 03fae9be97b0f8d08H, 0bfb39ad683f878c6H
|
|
_zero DQ 00000000000000000H, 00000000000000000H
|
|
_onen DQ 0bff0000000000000H, 0bff0000000000000H
|
|
_one DQ 03ff0000000000000H, 03ff0000000000000H
|
|
_cnst8 DQ 04020000000000000H, 04020000000000000H
|
|
_in3 DQ 04020000000000000H, 04020000000000000H
|
|
_in2 DQ 03fd8000000000000H, 03fd8000000000000H
|
|
_in1 DQ 03fa0000000000000H, 03fa0000000000000H
|
|
_in0 DQ 03e40000000000000H, 03e40000000000000H
|
|
_in DQ 0434d02967c31cdb5H, 0434d02967c31cdb5H
|
|
_minval DQ 00010000000000000H, 00010000000000000H
|
|
libm_small DQ 00200000000000000H
|
|
CONST ENDS
|
|
|
|
_x TEXTEQU <esp+4>
|
|
XMMWORD TEXTEQU <OWORD>
|
|
|
|
_TEXT SEGMENT PARA PUBLIC USE32 'CODE'
|
|
ALIGN 4
|
|
|
|
PUBLIC C _atan_pentium4, _CIatan_pentium4
|
|
_CIatan_pentium4 PROC NEAR
|
|
push ebp
|
|
mov ebp, esp
|
|
sub esp, 8 ; for argument DBLSIZE
|
|
and esp, 0fffffff0h
|
|
fstp qword ptr [esp]
|
|
movq xmm7, qword ptr [esp]
|
|
call start
|
|
leave
|
|
ret
|
|
_atan_pentium4 label proc
|
|
movq xmm7, QWORD PTR [_x] ; x
|
|
start:
|
|
unpcklpd xmm7, xmm7
|
|
movapd xmm2, xmm7
|
|
andpd xmm2, XMMWORD PTR _mabs ; |x|
|
|
comisd xmm2, XMMWORD PTR _in ; |x| < 0.1633123935319536975596774e+17 ?
|
|
jp x_nan
|
|
jae bigx
|
|
comisd xmm2, XMMWORD PTR _in1 ; |x| < 0.03125 ?
|
|
jae xge0_03125
|
|
comisd xmm2, XMMWORD PTR _in0 ; |x| < 2^(-27) ?
|
|
jb retx ; atan(x) ~= x
|
|
|
|
; 2^(-27) < |x| < 0.03125, atan(x)=x-x*P(x^2)
|
|
|
|
movapd xmm1, xmm2
|
|
mulpd xmm1, xmm2 ; |x|^2
|
|
movapd xmm3, xmm1
|
|
mulpd xmm3, xmm1 ; |x|^4
|
|
movapd xmm5, XMMWORD PTR _p01 ; calculate P(x^2)
|
|
mulpd xmm5, xmm3
|
|
addpd xmm5, XMMWORD PTR _p23
|
|
mulpd xmm5, xmm3
|
|
addpd xmm5, XMMWORD PTR _p45
|
|
mulpd xmm5, xmm3
|
|
addpd xmm5, XMMWORD PTR _p60
|
|
mulsd xmm5, xmm1
|
|
movapd xmm3, xmm5
|
|
shufpd xmm3, xmm3, 1
|
|
addsd xmm5, xmm3 ; P(x^2)
|
|
mulsd xmm5, xmm7 ; x * P(x^2)
|
|
subsd xmm7, xmm5 ; x - x * P(x^2)
|
|
movq QWORD PTR [_x], xmm7
|
|
fld QWORD PTR [_x]
|
|
ret
|
|
|
|
xge0_03125: ; |x| >= 0.03125
|
|
comisd xmm2, XMMWORD PTR _in2 ; |x| < 0.375 ?
|
|
jae xge0_375
|
|
|
|
; 0.03125 < |x| < 0.375, atan(x)=x-x*D(x^2)
|
|
|
|
movapd xmm1, xmm2
|
|
mulpd xmm1, xmm2 ; |x|^2
|
|
movapd xmm3, xmm1
|
|
mulpd xmm3, xmm1 ; |x|^4
|
|
movapd xmm5, XMMWORD PTR _d01 ; calculate D(x^2)
|
|
mulpd xmm5, xmm3
|
|
addpd xmm5, XMMWORD PTR _d23
|
|
mulpd xmm5, xmm3
|
|
addpd xmm5, XMMWORD PTR _d45
|
|
mulpd xmm5, xmm3
|
|
addpd xmm5, XMMWORD PTR _d67
|
|
mulpd xmm5, xmm3
|
|
addpd xmm5, XMMWORD PTR _d89
|
|
mulpd xmm5, xmm3
|
|
addpd xmm5, XMMWORD PTR _d1011
|
|
mulpd xmm5, xmm3
|
|
addpd xmm5, XMMWORD PTR _d1213
|
|
mulpd xmm5, xmm3
|
|
addpd xmm5, XMMWORD PTR _d1400
|
|
mulsd xmm5, xmm1
|
|
movapd xmm3, xmm5
|
|
shufpd xmm3, xmm3, 1
|
|
addsd xmm5, xmm3 ; D(x^2)
|
|
mulsd xmm5, xmm7 ; x * D(x^2)
|
|
subsd xmm7, xmm5 ; x - x * D(x^2)
|
|
movq QWORD PTR [_x], xmm7
|
|
fld QWORD PTR [_x]
|
|
ret
|
|
|
|
xge0_375: ; |x| >= 0.375
|
|
movq xmm6, xmm7 ; x
|
|
xorpd xmm6, xmm2 ; sign x
|
|
comisd xmm2, XMMWORD PTR _in3 ; |x| < 8.0 ?
|
|
jae xge8_0
|
|
|
|
; 0.375 < |x| < 8.0:
|
|
; atan(|x|)=atan(s)+atan(t), s=ind*0.03125, t=(|x|-s)/(1+|x|*s)
|
|
|
|
movq xmm0, XMMWORD PTR _cnst8
|
|
movq xmm5, XMMWORD PTR _cntshf
|
|
movq xmm3, xmm2 ; calculate ind
|
|
addsd xmm3, xmm0
|
|
psrlq xmm3, 44
|
|
psubd xmm3, xmm5
|
|
movd eax, xmm3 ; ind
|
|
lea eax, DWORD PTR [eax+eax*2] ; ind*3
|
|
movq xmm5, QWORD PTR _atn[eax*8+16] ; s
|
|
movq xmm3, xmm2 ; |x|
|
|
subsd xmm2, xmm5 ; |x|-s
|
|
mulsd xmm3, xmm5 ; |x|*s
|
|
addsd xmm3, XMMWORD PTR _one ; 1+|x|*s
|
|
divsd xmm2, xmm3 ; (|x|-s)/(1+|x|*s)
|
|
unpcklpd xmm2, xmm2
|
|
jmp clcpol
|
|
|
|
xge8_0: ; |x| > 8.0
|
|
|
|
; 8.0 < |x| < 0.1633123935319536975596774e+17:
|
|
; atan(|x|)=Pi/2+atan(-1/|x|)
|
|
|
|
mov eax, 768 ; ind*3 - entry point in table, where lo and hi part of Pi/2
|
|
movq xmm0, xmm2 ; |x|
|
|
movq xmm2, XMMWORD PTR _onen
|
|
divsd xmm2, xmm0 ;-1/|x|
|
|
unpcklpd xmm2, xmm2
|
|
|
|
clcpol:
|
|
movq xmm0, QWORD PTR _atn[0+eax*8] ; atn[ind+0] - hi part of atan(s) or Pi/2
|
|
movq xmm4, QWORD PTR _atn[8+eax*8] ; atn[ind+1] - lo part of atan(s) or Pi/2
|
|
movapd xmm1, xmm2
|
|
mulpd xmm1, xmm2 ; |x|^2
|
|
movapd xmm3, xmm1
|
|
mulpd xmm3, xmm1 ; |x|^4
|
|
movapd xmm5, XMMWORD PTR _p01 ; calculate P(x^2)
|
|
mulpd xmm5, xmm3
|
|
addpd xmm5, XMMWORD PTR _p23
|
|
mulpd xmm5, xmm3
|
|
addpd xmm5, XMMWORD PTR _p45
|
|
mulpd xmm5, xmm3
|
|
addpd xmm5, XMMWORD PTR _p60
|
|
mulsd xmm5, xmm1
|
|
movapd xmm3, xmm5
|
|
shufpd xmm3, xmm3, 1
|
|
addsd xmm5, xmm3 ; P(x^2)
|
|
|
|
; atan(|x|) = atn[ind+0]-((|x|*P(x^2)-atn[ind+1])-|x|)
|
|
|
|
mulsd xmm5, xmm2 ; |x|*P(x^2)
|
|
subsd xmm5, xmm4 ; |x|*P(x^2)-atn[ind+1]
|
|
subsd xmm5, xmm2 ; (|x|*P(x^2)-atn[ind+1])-|x|
|
|
subsd xmm0, xmm5 ; atn[ind+0]-((|x|*P(x^2)-atn[ind+1])-|x|)
|
|
orpd xmm0, xmm6 ; sign x
|
|
movq QWORD PTR [_x], xmm0
|
|
fld QWORD PTR [_x]
|
|
ret
|
|
|
|
retx: ; |x| < 2^(-27): atan(x) ~= x
|
|
comisd xmm2, XMMWORD PTR _zero ; x == 0 ?
|
|
jne notzero
|
|
fld QWORD PTR [_x] ; x == +0.0 or -0.0
|
|
ret
|
|
|
|
notzero:
|
|
comisd xmm2, XMMWORD PTR _minval ; x < minval ?
|
|
jae ge_minval
|
|
fld QWORD PTR libm_small
|
|
fmul QWORD PTR libm_small
|
|
sub esp, 8
|
|
fstp QWORD PTR [esp] ; should be flag UNDERFLOW
|
|
fld QWORD PTR [esp]
|
|
add esp, 8
|
|
fadd QWORD PTR [_x] ; should be inexact result
|
|
ret
|
|
|
|
ge_minval: ; minval < x < 2^(-27)
|
|
fld QWORD PTR libm_small
|
|
fmul QWORD PTR libm_small
|
|
fadd QWORD PTR [_x] ; should be inexact result
|
|
ret
|
|
|
|
bigx: ; |x| > 0.1633123935319536975596774e+17
|
|
movq xmm0, xmm2 ; |x|
|
|
movq xmm3, QWORD PTR _mexp
|
|
andpd xmm0, xmm3
|
|
ucomisd xmm0, xmm3
|
|
jp x_nan
|
|
|
|
mov eax, DWORD PTR [_x+4] ; x
|
|
shr eax, 31 ; sign x
|
|
fld QWORD PTR libm_small
|
|
fadd QWORD PTR _pi_2d[eax*8] ; should be inexact result
|
|
ret ; return +-Pi/2
|
|
|
|
x_nan:
|
|
mov edx, 1003
|
|
;call libm_error_support(void *arg1,void *arg2,void *retval,error_types input_tag)
|
|
sub esp, 16
|
|
mov DWORD PTR [esp+12],edx
|
|
mov edx, esp
|
|
add edx, 16+4
|
|
mov DWORD PTR [esp+8],edx
|
|
mov DWORD PTR [esp+4],edx
|
|
mov DWORD PTR [esp],edx
|
|
call NEAR PTR __libm_error_support
|
|
add esp, 16
|
|
|
|
fld QWORD PTR [_x]
|
|
ret ; return same nan
|
|
|
|
ALIGN 4
|
|
|
|
_CIatan_pentium4 ENDP
|
|
|
|
_TEXT ENDS
|
|
|
|
END
|