;/* File: "atan_wmt.asm". */ ;// ;// INTEL CORPORATION PROPRIETARY INFORMATION ;// This software is supplied under the terms of a license agreement or ;// nondisclosure agreement with Intel Corporation and may not be copied ;// or disclosed except in accordance with the terms of that agreement. ;// Copyright (c) 2000 Intel Corporation. All Rights Reserved. ;// ;// ;// Contents: atan. ;// ;// Purpose: Libm ;// .686P .387 .XMM .MODEL FLAT,C EXTRN C __libm_error_support : NEAR CONST SEGMENT PARA PUBLIC USE32 'CONST' ALIGN 16 EXTRN C _atan_table:QWORD _atn TEXTEQU <_atan_table> ;/* ;// FUNCTION: double atan(double x) ;// ;// DESCRIPTION: ;// ;// 1. For |x| < 2^(-27), where atan(x) ~= x, return x. ;// 2. For |x| >= 0.1633123935319536975596774e+17, where atan(x) ~= +-Pi/2, return +-Pi/2. ;// 3. In interval [0.0,0.03125] polynomial approximation of atan(x)=x-x*P(x^2). ;// 4. In interval [0.03125,0.375] polynomial approximation of atan(x)=x-x*D(x^2). ;// 5. In interval [0.375,8.0] we compute ind and eps such, that x=0.03125*ind+eps and 0.0 XMMWORD TEXTEQU _TEXT SEGMENT PARA PUBLIC USE32 'CODE' ALIGN 4 PUBLIC C _atan_pentium4, _CIatan_pentium4 _CIatan_pentium4 PROC NEAR push ebp mov ebp, esp sub esp, 8 ; for argument DBLSIZE and esp, 0fffffff0h fstp qword ptr [esp] movq xmm7, qword ptr [esp] call start leave ret _atan_pentium4 label proc movq xmm7, QWORD PTR [_x] ; x start: unpcklpd xmm7, xmm7 movapd xmm2, xmm7 andpd xmm2, XMMWORD PTR _mabs ; |x| comisd xmm2, XMMWORD PTR _in ; |x| < 0.1633123935319536975596774e+17 ? jp x_nan jae bigx comisd xmm2, XMMWORD PTR _in1 ; |x| < 0.03125 ? jae xge0_03125 comisd xmm2, XMMWORD PTR _in0 ; |x| < 2^(-27) ? jb retx ; atan(x) ~= x ; 2^(-27) < |x| < 0.03125, atan(x)=x-x*P(x^2) movapd xmm1, xmm2 mulpd xmm1, xmm2 ; |x|^2 movapd xmm3, xmm1 mulpd xmm3, xmm1 ; |x|^4 movapd xmm5, XMMWORD PTR _p01 ; calculate P(x^2) mulpd xmm5, xmm3 addpd xmm5, XMMWORD PTR _p23 mulpd xmm5, xmm3 addpd xmm5, XMMWORD PTR _p45 mulpd xmm5, xmm3 addpd xmm5, XMMWORD PTR _p60 mulsd xmm5, xmm1 movapd xmm3, xmm5 shufpd xmm3, xmm3, 1 addsd xmm5, xmm3 ; P(x^2) mulsd xmm5, xmm7 ; x * P(x^2) subsd xmm7, xmm5 ; x - x * P(x^2) movq QWORD PTR [_x], xmm7 fld QWORD PTR [_x] ret xge0_03125: ; |x| >= 0.03125 comisd xmm2, XMMWORD PTR _in2 ; |x| < 0.375 ? jae xge0_375 ; 0.03125 < |x| < 0.375, atan(x)=x-x*D(x^2) movapd xmm1, xmm2 mulpd xmm1, xmm2 ; |x|^2 movapd xmm3, xmm1 mulpd xmm3, xmm1 ; |x|^4 movapd xmm5, XMMWORD PTR _d01 ; calculate D(x^2) mulpd xmm5, xmm3 addpd xmm5, XMMWORD PTR _d23 mulpd xmm5, xmm3 addpd xmm5, XMMWORD PTR _d45 mulpd xmm5, xmm3 addpd xmm5, XMMWORD PTR _d67 mulpd xmm5, xmm3 addpd xmm5, XMMWORD PTR _d89 mulpd xmm5, xmm3 addpd xmm5, XMMWORD PTR _d1011 mulpd xmm5, xmm3 addpd xmm5, XMMWORD PTR _d1213 mulpd xmm5, xmm3 addpd xmm5, XMMWORD PTR _d1400 mulsd xmm5, xmm1 movapd xmm3, xmm5 shufpd xmm3, xmm3, 1 addsd xmm5, xmm3 ; D(x^2) mulsd xmm5, xmm7 ; x * D(x^2) subsd xmm7, xmm5 ; x - x * D(x^2) movq QWORD PTR [_x], xmm7 fld QWORD PTR [_x] ret xge0_375: ; |x| >= 0.375 movq xmm6, xmm7 ; x xorpd xmm6, xmm2 ; sign x comisd xmm2, XMMWORD PTR _in3 ; |x| < 8.0 ? jae xge8_0 ; 0.375 < |x| < 8.0: ; atan(|x|)=atan(s)+atan(t), s=ind*0.03125, t=(|x|-s)/(1+|x|*s) movq xmm0, XMMWORD PTR _cnst8 movq xmm5, XMMWORD PTR _cntshf movq xmm3, xmm2 ; calculate ind addsd xmm3, xmm0 psrlq xmm3, 44 psubd xmm3, xmm5 movd eax, xmm3 ; ind lea eax, DWORD PTR [eax+eax*2] ; ind*3 movq xmm5, QWORD PTR _atn[eax*8+16] ; s movq xmm3, xmm2 ; |x| subsd xmm2, xmm5 ; |x|-s mulsd xmm3, xmm5 ; |x|*s addsd xmm3, XMMWORD PTR _one ; 1+|x|*s divsd xmm2, xmm3 ; (|x|-s)/(1+|x|*s) unpcklpd xmm2, xmm2 jmp clcpol xge8_0: ; |x| > 8.0 ; 8.0 < |x| < 0.1633123935319536975596774e+17: ; atan(|x|)=Pi/2+atan(-1/|x|) mov eax, 768 ; ind*3 - entry point in table, where lo and hi part of Pi/2 movq xmm0, xmm2 ; |x| movq xmm2, XMMWORD PTR _onen divsd xmm2, xmm0 ;-1/|x| unpcklpd xmm2, xmm2 clcpol: movq xmm0, QWORD PTR _atn[0+eax*8] ; atn[ind+0] - hi part of atan(s) or Pi/2 movq xmm4, QWORD PTR _atn[8+eax*8] ; atn[ind+1] - lo part of atan(s) or Pi/2 movapd xmm1, xmm2 mulpd xmm1, xmm2 ; |x|^2 movapd xmm3, xmm1 mulpd xmm3, xmm1 ; |x|^4 movapd xmm5, XMMWORD PTR _p01 ; calculate P(x^2) mulpd xmm5, xmm3 addpd xmm5, XMMWORD PTR _p23 mulpd xmm5, xmm3 addpd xmm5, XMMWORD PTR _p45 mulpd xmm5, xmm3 addpd xmm5, XMMWORD PTR _p60 mulsd xmm5, xmm1 movapd xmm3, xmm5 shufpd xmm3, xmm3, 1 addsd xmm5, xmm3 ; P(x^2) ; atan(|x|) = atn[ind+0]-((|x|*P(x^2)-atn[ind+1])-|x|) mulsd xmm5, xmm2 ; |x|*P(x^2) subsd xmm5, xmm4 ; |x|*P(x^2)-atn[ind+1] subsd xmm5, xmm2 ; (|x|*P(x^2)-atn[ind+1])-|x| subsd xmm0, xmm5 ; atn[ind+0]-((|x|*P(x^2)-atn[ind+1])-|x|) orpd xmm0, xmm6 ; sign x movq QWORD PTR [_x], xmm0 fld QWORD PTR [_x] ret retx: ; |x| < 2^(-27): atan(x) ~= x comisd xmm2, XMMWORD PTR _zero ; x == 0 ? jne notzero fld QWORD PTR [_x] ; x == +0.0 or -0.0 ret notzero: comisd xmm2, XMMWORD PTR _minval ; x < minval ? jae ge_minval fld QWORD PTR libm_small fmul QWORD PTR libm_small sub esp, 8 fstp QWORD PTR [esp] ; should be flag UNDERFLOW fld QWORD PTR [esp] add esp, 8 fadd QWORD PTR [_x] ; should be inexact result ret ge_minval: ; minval < x < 2^(-27) fld QWORD PTR libm_small fmul QWORD PTR libm_small fadd QWORD PTR [_x] ; should be inexact result ret bigx: ; |x| > 0.1633123935319536975596774e+17 movq xmm0, xmm2 ; |x| movq xmm3, QWORD PTR _mexp andpd xmm0, xmm3 ucomisd xmm0, xmm3 jp x_nan mov eax, DWORD PTR [_x+4] ; x shr eax, 31 ; sign x fld QWORD PTR libm_small fadd QWORD PTR _pi_2d[eax*8] ; should be inexact result ret ; return +-Pi/2 x_nan: mov edx, 1003 ;call libm_error_support(void *arg1,void *arg2,void *retval,error_types input_tag) sub esp, 16 mov DWORD PTR [esp+12],edx mov edx, esp add edx, 16+4 mov DWORD PTR [esp+8],edx mov DWORD PTR [esp+4],edx mov DWORD PTR [esp],edx call NEAR PTR __libm_error_support add esp, 16 fld QWORD PTR [_x] ret ; return same nan ALIGN 4 _CIatan_pentium4 ENDP _TEXT ENDS END