Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

299 lines
12 KiB

  1. ;/* File: "atan_wmt.asm". */
  2. ;//
  3. ;// INTEL CORPORATION PROPRIETARY INFORMATION
  4. ;// This software is supplied under the terms of a license agreement or
  5. ;// nondisclosure agreement with Intel Corporation and may not be copied
  6. ;// or disclosed except in accordance with the terms of that agreement.
  7. ;// Copyright (c) 2000 Intel Corporation. All Rights Reserved.
  8. ;//
  9. ;//
  10. ;// Contents: atan.
  11. ;//
  12. ;// Purpose: Libm
  13. ;//
  14. .686P
  15. .387
  16. .XMM
  17. .MODEL FLAT,C
  18. EXTRN C __libm_error_support : NEAR
  19. CONST SEGMENT PARA PUBLIC USE32 'CONST'
  20. ALIGN 16
  21. EXTRN C _atan_table:QWORD
  22. _atn TEXTEQU <_atan_table>
  23. ;/*
  24. ;// FUNCTION: double atan(double x)
  25. ;//
  26. ;// DESCRIPTION:
  27. ;//
  28. ;// 1. For |x| < 2^(-27), where atan(x) ~= x, return x.
  29. ;// 2. For |x| >= 0.1633123935319536975596774e+17, where atan(x) ~= +-Pi/2, return +-Pi/2.
  30. ;// 3. In interval [0.0,0.03125] polynomial approximation of atan(x)=x-x*P(x^2).
  31. ;// 4. In interval [0.03125,0.375] polynomial approximation of atan(x)=x-x*D(x^2).
  32. ;// 5. In interval [0.375,8.0] we compute ind and eps such, that x=0.03125*ind+eps and 0.0<eps<0.03125.
  33. ;// Let s=0.03125*ind, then atan(x)=atan(s)+atan(t), where t=((x-s)/(1+x*s)). For lo and hi part of
  34. ;// atan(s) we have table (see file atan_table.c): atn[ind]+atn[ind+1]=atan(s).
  35. ;// atan(t) is approximated atan(t)=t-t*P(t^2).
  36. ;// 6. In interval [8.0,0.1633123935319536975596774e+17] atan(x)=Pi/2+atan(-1/x).
  37. ;// atan(-1/x) is approximated atan(t)=t-t*P(t^2), where t=-1/x.
  38. ;// 7. For x < 0.0 atan(x) = -atan(|x|).
  39. ;// 8. Special cases:
  40. ;// atan(+0) = +0;
  41. ;// atan(-0) = -0;
  42. ;// atan(+INF) = +Pi/2;
  43. ;// atan(-INF) = -Pi/2;
  44. ;// atan(NaN) = NaN.
  45. ;//
  46. ;// KEYS OF COMPILER: -c -w -Zl -Di386 /QIfdiv-
  47. ;*/
  48. _mexp DQ 07ff0000000000000H, 07ff0000000000000H
  49. _mabs DQ 07fffffffffffffffH, 07fffffffffffffffH
  50. _pi_2d DQ 03ff921fb54442d18H, 0bff921fb54442d18H
  51. _cntshf DQ 00000000000040201H, 00000000000040201H
  52. _d1400 DQ 03fd5555555555552H, 00000000000000000H
  53. _d1213 DQ 03fc249249246aa76H, 0bfc99999999992acH
  54. _d1011 DQ 03fb745d15933de8aH, 0bfbc71c71b835923H
  55. _d89 DQ 03fb110f5eeb76ecaH, 0bfb3b1390a3b9899H
  56. _d67 DQ 03faae4492fe3a600H, 0bfae1c1704144b68H
  57. _d45 DQ 03fa51fa164891abeH, 0bfa8171d55d53138H
  58. _d23 DQ 03f974721481ca2a2H, 0bfa124ce2388f2cbH
  59. _d01 DQ 03f66107c30e0b8a5H, 0bf866e5652b14bbdH
  60. _p60 DQ 03fd55555555554ebH, 00000000000000000H
  61. _p45 DQ 03fc249249014497eH, 0bfc9999999976718H
  62. _p23 DQ 03fb7453ba342480fH, 0bfbc71c4eebfb10eH
  63. _p01 DQ 03fae9be97b0f8d08H, 0bfb39ad683f878c6H
  64. _zero DQ 00000000000000000H, 00000000000000000H
  65. _onen DQ 0bff0000000000000H, 0bff0000000000000H
  66. _one DQ 03ff0000000000000H, 03ff0000000000000H
  67. _cnst8 DQ 04020000000000000H, 04020000000000000H
  68. _in3 DQ 04020000000000000H, 04020000000000000H
  69. _in2 DQ 03fd8000000000000H, 03fd8000000000000H
  70. _in1 DQ 03fa0000000000000H, 03fa0000000000000H
  71. _in0 DQ 03e40000000000000H, 03e40000000000000H
  72. _in DQ 0434d02967c31cdb5H, 0434d02967c31cdb5H
  73. _minval DQ 00010000000000000H, 00010000000000000H
  74. libm_small DQ 00200000000000000H
  75. CONST ENDS
  76. _x TEXTEQU <esp+4>
  77. XMMWORD TEXTEQU <OWORD>
  78. _TEXT SEGMENT PARA PUBLIC USE32 'CODE'
  79. ALIGN 4
  80. PUBLIC C _atan_pentium4, _CIatan_pentium4
  81. _CIatan_pentium4 PROC NEAR
  82. push ebp
  83. mov ebp, esp
  84. sub esp, 8 ; for argument DBLSIZE
  85. and esp, 0fffffff0h
  86. fstp qword ptr [esp]
  87. movq xmm7, qword ptr [esp]
  88. call start
  89. leave
  90. ret
  91. _atan_pentium4 label proc
  92. movq xmm7, QWORD PTR [_x] ; x
  93. start:
  94. unpcklpd xmm7, xmm7
  95. movapd xmm2, xmm7
  96. andpd xmm2, XMMWORD PTR _mabs ; |x|
  97. comisd xmm2, XMMWORD PTR _in ; |x| < 0.1633123935319536975596774e+17 ?
  98. jp x_nan
  99. jae bigx
  100. comisd xmm2, XMMWORD PTR _in1 ; |x| < 0.03125 ?
  101. jae xge0_03125
  102. comisd xmm2, XMMWORD PTR _in0 ; |x| < 2^(-27) ?
  103. jb retx ; atan(x) ~= x
  104. ; 2^(-27) < |x| < 0.03125, atan(x)=x-x*P(x^2)
  105. movapd xmm1, xmm2
  106. mulpd xmm1, xmm2 ; |x|^2
  107. movapd xmm3, xmm1
  108. mulpd xmm3, xmm1 ; |x|^4
  109. movapd xmm5, XMMWORD PTR _p01 ; calculate P(x^2)
  110. mulpd xmm5, xmm3
  111. addpd xmm5, XMMWORD PTR _p23
  112. mulpd xmm5, xmm3
  113. addpd xmm5, XMMWORD PTR _p45
  114. mulpd xmm5, xmm3
  115. addpd xmm5, XMMWORD PTR _p60
  116. mulsd xmm5, xmm1
  117. movapd xmm3, xmm5
  118. shufpd xmm3, xmm3, 1
  119. addsd xmm5, xmm3 ; P(x^2)
  120. mulsd xmm5, xmm7 ; x * P(x^2)
  121. subsd xmm7, xmm5 ; x - x * P(x^2)
  122. movq QWORD PTR [_x], xmm7
  123. fld QWORD PTR [_x]
  124. ret
  125. xge0_03125: ; |x| >= 0.03125
  126. comisd xmm2, XMMWORD PTR _in2 ; |x| < 0.375 ?
  127. jae xge0_375
  128. ; 0.03125 < |x| < 0.375, atan(x)=x-x*D(x^2)
  129. movapd xmm1, xmm2
  130. mulpd xmm1, xmm2 ; |x|^2
  131. movapd xmm3, xmm1
  132. mulpd xmm3, xmm1 ; |x|^4
  133. movapd xmm5, XMMWORD PTR _d01 ; calculate D(x^2)
  134. mulpd xmm5, xmm3
  135. addpd xmm5, XMMWORD PTR _d23
  136. mulpd xmm5, xmm3
  137. addpd xmm5, XMMWORD PTR _d45
  138. mulpd xmm5, xmm3
  139. addpd xmm5, XMMWORD PTR _d67
  140. mulpd xmm5, xmm3
  141. addpd xmm5, XMMWORD PTR _d89
  142. mulpd xmm5, xmm3
  143. addpd xmm5, XMMWORD PTR _d1011
  144. mulpd xmm5, xmm3
  145. addpd xmm5, XMMWORD PTR _d1213
  146. mulpd xmm5, xmm3
  147. addpd xmm5, XMMWORD PTR _d1400
  148. mulsd xmm5, xmm1
  149. movapd xmm3, xmm5
  150. shufpd xmm3, xmm3, 1
  151. addsd xmm5, xmm3 ; D(x^2)
  152. mulsd xmm5, xmm7 ; x * D(x^2)
  153. subsd xmm7, xmm5 ; x - x * D(x^2)
  154. movq QWORD PTR [_x], xmm7
  155. fld QWORD PTR [_x]
  156. ret
  157. xge0_375: ; |x| >= 0.375
  158. movq xmm6, xmm7 ; x
  159. xorpd xmm6, xmm2 ; sign x
  160. comisd xmm2, XMMWORD PTR _in3 ; |x| < 8.0 ?
  161. jae xge8_0
  162. ; 0.375 < |x| < 8.0:
  163. ; atan(|x|)=atan(s)+atan(t), s=ind*0.03125, t=(|x|-s)/(1+|x|*s)
  164. movq xmm0, XMMWORD PTR _cnst8
  165. movq xmm5, XMMWORD PTR _cntshf
  166. movq xmm3, xmm2 ; calculate ind
  167. addsd xmm3, xmm0
  168. psrlq xmm3, 44
  169. psubd xmm3, xmm5
  170. movd eax, xmm3 ; ind
  171. lea eax, DWORD PTR [eax+eax*2] ; ind*3
  172. movq xmm5, QWORD PTR _atn[eax*8+16] ; s
  173. movq xmm3, xmm2 ; |x|
  174. subsd xmm2, xmm5 ; |x|-s
  175. mulsd xmm3, xmm5 ; |x|*s
  176. addsd xmm3, XMMWORD PTR _one ; 1+|x|*s
  177. divsd xmm2, xmm3 ; (|x|-s)/(1+|x|*s)
  178. unpcklpd xmm2, xmm2
  179. jmp clcpol
  180. xge8_0: ; |x| > 8.0
  181. ; 8.0 < |x| < 0.1633123935319536975596774e+17:
  182. ; atan(|x|)=Pi/2+atan(-1/|x|)
  183. mov eax, 768 ; ind*3 - entry point in table, where lo and hi part of Pi/2
  184. movq xmm0, xmm2 ; |x|
  185. movq xmm2, XMMWORD PTR _onen
  186. divsd xmm2, xmm0 ;-1/|x|
  187. unpcklpd xmm2, xmm2
  188. clcpol:
  189. movq xmm0, QWORD PTR _atn[0+eax*8] ; atn[ind+0] - hi part of atan(s) or Pi/2
  190. movq xmm4, QWORD PTR _atn[8+eax*8] ; atn[ind+1] - lo part of atan(s) or Pi/2
  191. movapd xmm1, xmm2
  192. mulpd xmm1, xmm2 ; |x|^2
  193. movapd xmm3, xmm1
  194. mulpd xmm3, xmm1 ; |x|^4
  195. movapd xmm5, XMMWORD PTR _p01 ; calculate P(x^2)
  196. mulpd xmm5, xmm3
  197. addpd xmm5, XMMWORD PTR _p23
  198. mulpd xmm5, xmm3
  199. addpd xmm5, XMMWORD PTR _p45
  200. mulpd xmm5, xmm3
  201. addpd xmm5, XMMWORD PTR _p60
  202. mulsd xmm5, xmm1
  203. movapd xmm3, xmm5
  204. shufpd xmm3, xmm3, 1
  205. addsd xmm5, xmm3 ; P(x^2)
  206. ; atan(|x|) = atn[ind+0]-((|x|*P(x^2)-atn[ind+1])-|x|)
  207. mulsd xmm5, xmm2 ; |x|*P(x^2)
  208. subsd xmm5, xmm4 ; |x|*P(x^2)-atn[ind+1]
  209. subsd xmm5, xmm2 ; (|x|*P(x^2)-atn[ind+1])-|x|
  210. subsd xmm0, xmm5 ; atn[ind+0]-((|x|*P(x^2)-atn[ind+1])-|x|)
  211. orpd xmm0, xmm6 ; sign x
  212. movq QWORD PTR [_x], xmm0
  213. fld QWORD PTR [_x]
  214. ret
  215. retx: ; |x| < 2^(-27): atan(x) ~= x
  216. comisd xmm2, XMMWORD PTR _zero ; x == 0 ?
  217. jne notzero
  218. fld QWORD PTR [_x] ; x == +0.0 or -0.0
  219. ret
  220. notzero:
  221. comisd xmm2, XMMWORD PTR _minval ; x < minval ?
  222. jae ge_minval
  223. fld QWORD PTR libm_small
  224. fmul QWORD PTR libm_small
  225. sub esp, 8
  226. fstp QWORD PTR [esp] ; should be flag UNDERFLOW
  227. fld QWORD PTR [esp]
  228. add esp, 8
  229. fadd QWORD PTR [_x] ; should be inexact result
  230. ret
  231. ge_minval: ; minval < x < 2^(-27)
  232. fld QWORD PTR libm_small
  233. fmul QWORD PTR libm_small
  234. fadd QWORD PTR [_x] ; should be inexact result
  235. ret
  236. bigx: ; |x| > 0.1633123935319536975596774e+17
  237. movq xmm0, xmm2 ; |x|
  238. movq xmm3, QWORD PTR _mexp
  239. andpd xmm0, xmm3
  240. ucomisd xmm0, xmm3
  241. jp x_nan
  242. mov eax, DWORD PTR [_x+4] ; x
  243. shr eax, 31 ; sign x
  244. fld QWORD PTR libm_small
  245. fadd QWORD PTR _pi_2d[eax*8] ; should be inexact result
  246. ret ; return +-Pi/2
  247. x_nan:
  248. mov edx, 1003
  249. ;call libm_error_support(void *arg1,void *arg2,void *retval,error_types input_tag)
  250. sub esp, 16
  251. mov DWORD PTR [esp+12],edx
  252. mov edx, esp
  253. add edx, 16+4
  254. mov DWORD PTR [esp+8],edx
  255. mov DWORD PTR [esp+4],edx
  256. mov DWORD PTR [esp],edx
  257. call NEAR PTR __libm_error_support
  258. add esp, 16
  259. fld QWORD PTR [_x]
  260. ret ; return same nan
  261. ALIGN 4
  262. _CIatan_pentium4 ENDP
  263. _TEXT ENDS
  264. END