Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1061 lines
26 KiB

  1. .file "atan2.s"
  2. // Copyright (c) 2000, Intel Corporation
  3. // All rights reserved.
  4. //
  5. // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
  6. // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
  7. //
  8. // WARRANTY DISCLAIMER
  9. //
  10. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  11. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  12. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  13. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  14. // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  15. // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  16. // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  17. // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  18. // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  19. // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  20. // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  21. //
  22. // Intel Corporation is the author of this code, and requests that all
  23. // problem reports or change requests be submitted to it directly at
  24. // http://developer.intel.com/opensource.
  25. //
  26. // History
  27. //==============================================================
  28. // 2/02/00 Initial version
  29. // 4/04/00 Unwind support added
  30. // 8/15/00 Bundle added after call to __libm_error_support to properly
  31. // set [the previously overwritten] GR_Parameter_RESULT.
  32. // 8/17/00 Changed predicate register macro-usage to direct predicate
  33. // names due to an assembler bug.
  34. // 9/28/00 Updated to set invalid on SNaN inputs
  35. //
  36. // API
  37. //==============================================================
  38. // double atan2(double Y, double X)
  39. //
  40. // Overview of operation
  41. //==============================================================
  42. //
  43. // There are two basic paths: swap true and swap false.
  44. // atan2(Y,X) ==> atan2(V/U) where U >= V. If Y > X, we must swap.
  45. //
  46. // p6 swap True
  47. // p7 swap False
  48. // p8 X+ (If swap=True p8=p9=0)
  49. // p9 X-
  50. //
  51. // all the other predicates p10 thru p15 and false for the main path
  52. //
  53. // Special values
  54. //==============================================================
  55. // Y x Result
  56. // +number +inf +0
  57. // -number +inf -0
  58. // +number -inf +pi
  59. // -number -inf -pi
  60. //
  61. // +inf +number +pi/2
  62. // -inf +number -pi/2
  63. // +inf -number +pi/2
  64. // -inf -number -pi/2
  65. //
  66. // +inf +inf +pi/4
  67. // -inf +inf -pi/4
  68. // +inf -inf +3pi/4
  69. // -inf -inf -3pi/4
  70. //
  71. // +1 +1 +pi/4
  72. // -1 +1 -pi/4
  73. // +1 -1 +3pi/4
  74. // -1 -1 -3pi/4
  75. //
  76. // +number +0 +pi/2
  77. // -number +0 -pi/2
  78. // +number -0 +pi/2
  79. // -number -0 -pi/2
  80. //
  81. // +0 +number +0
  82. // -0 +number -0
  83. // +0 -number +pi
  84. // -0 -number -pi
  85. //
  86. // +0 +0 +0
  87. // -0 +0 -0
  88. // +0 -0 +pi
  89. // -0 -0 -pi
  90. //
  91. // Nan anything quiet Y
  92. // anything NaN quiet X
  93. // atan(+-0/+-0) sets double error tag to 37
  94. // atan(+-0/+-0) sets single error tag to 38
  95. // Assembly macros
  96. //==============================================================
  97. EXP_AD_P1 = r33
  98. EXP_AD_P2 = r34
  99. GR_SAVE_B0 = r35
  100. GR_SAVE_GP = r36
  101. GR_SAVE_PFS = r37
  102. GR_Parameter_X = r38
  103. GR_Parameter_Y = r39
  104. GR_Parameter_RESULT = r40
  105. atan2_GR_tag = r41
  106. atan2_X = f9
  107. atan2_Y = f8
  108. atan2_u1_X = f32
  109. atan2_u1_Y = f33
  110. atan2_Umax = f34
  111. atan2_Vmin = f35
  112. atan2_two = f36
  113. atan2_absX = f37
  114. atan2_z1_X = f38
  115. atan2_z1_Y = f39
  116. atan2_B1X = f40
  117. atan2_B1Y = f41
  118. atan2_wp = f42
  119. atan2_B1sq = f43
  120. atan2_z = f44
  121. atan2_w = f45
  122. atan2_P0 = f46
  123. atan2_P1 = f47
  124. atan2_P2 = f48
  125. atan2_P3 = f49
  126. atan2_P4 = f50
  127. atan2_P5 = f51
  128. atan2_P6 = f52
  129. atan2_P7 = f53
  130. atan2_P8 = f54
  131. atan2_P9 = f55
  132. atan2_P10 = f56
  133. atan2_P11 = f57
  134. atan2_P12 = f58
  135. atan2_P13 = f59
  136. atan2_P14 = f60
  137. atan2_P15 = f61
  138. atan2_P16 = f62
  139. atan2_P17 = f63
  140. atan2_P18 = f64
  141. atan2_P19 = f65
  142. atan2_P20 = f66
  143. atan2_P21 = f67
  144. atan2_P22 = f68
  145. atan2_Pi_by_2 = f69
  146. atan2_V13 = f70
  147. atan2_W11 = f71
  148. atan2_E = f72
  149. atan2_gamma = f73
  150. atan2_V11 = f74
  151. atan2_V12 = f75
  152. atan2_V7 = f76
  153. atan2_V8 = f77
  154. atan2_W7 = f78
  155. atan2_W8 = f79
  156. atan2_W3 = f80
  157. atan2_W4 = f81
  158. atan2_V3 = f82
  159. atan2_V4 = f83
  160. atan2_F = f84
  161. atan2_gV = f85
  162. atan2_V10 = f86
  163. atan2_zcub = f87
  164. atan2_V6 = f88
  165. atan2_V9 = f89
  166. atan2_W10 = f90
  167. atan2_W6 = f91
  168. atan2_W2 = f92
  169. atan2_V2 = f93
  170. atan2_alpha = f94
  171. atan2_alpha_1 = f95
  172. atan2_gVF = f96
  173. atan2_V5 = f97
  174. atan2_W12 = f98
  175. atan2_W5 = f99
  176. atan2_alpha_sq = f100
  177. atan2_Cp = f101
  178. atan2_V1 = f102
  179. atan2_NORM_X = f103
  180. atan2_W1 = f104
  181. atan2_alpha_cub = f105
  182. atan2_C = f106
  183. atan2_P = f107
  184. atan2_d = f108
  185. atan2_A_hi = f109
  186. atan2_dsq = f110
  187. atan2_pd = f111
  188. atan2_A_lo = f112
  189. atan2_A = f113
  190. atan2_Pp = f114
  191. atan2_NORM_X = f115
  192. atan2_sgnY = f116
  193. atan2_pi = f117
  194. atan2_sgnX = f118
  195. atan2_sgnXY = f119
  196. atan2_3pi_by_4 = f120
  197. atan2_pi_by_4 = f121
  198. atan2_NORM_Y = f122
  199. //atan2_sF = p7
  200. //atan2_sT = p6
  201. // These coefficients are for atan2.
  202. // You can also use this set to substitute those used in the |X| <= 1 case for atan;
  203. // BUT NOT vice versa.
  204. /////////////////////////////////////////////////////////////
  205. .data
  206. .align 16
  207. atan2_tb1:
  208. data8 0xB199DD6D2675C40F , 0x0000BFFA // P10
  209. data8 0xA21922DC45605EA1 , 0x00003FFA // P11
  210. data8 0xD78F28FC2A592781 , 0x0000BFFA // P8
  211. data8 0xC2F01E5DDD100DBE , 0x00003FFA // P9
  212. data8 0x9D89D7D55C3287A5 , 0x00003FFB // P5
  213. data8 0xF0F03ADB3FC930D3 , 0x00003FFA // P7
  214. data8 0xF396268151CFB11C , 0x00003FF7 // P17
  215. data8 0x9D3436AABE218776 , 0x00003FF5 // P19
  216. data8 0x80D601879218B53A , 0x00003FFA // P13
  217. data8 0xA2270D30A90AA220 , 0x00003FF9 // P15
  218. data8 0xCCCCCCCCCCC906CD , 0x00003FFC // P1
  219. data8 0xE38E38E320A8A098 , 0x00003FFB // P3
  220. data8 0xFE7E52D2A89995B3 , 0x0000BFEC // P22
  221. data8 0xC90FDAA22168C235 , 0x00003FFE // pi/4
  222. atan2_tb2:
  223. data8 0x9F90FB984D8E39D0 , 0x0000BFF3 // P20
  224. data8 0xCE585A259BD8374C , 0x00003FF0 // P21
  225. data8 0xBA2E8B9793955C77 , 0x0000BFFB // P4
  226. data8 0x88887EBB209E3543 , 0x0000BFFB // P6
  227. data8 0xD818B4BB43D84BF2 , 0x0000BFF8 // P16
  228. data8 0xDEC343E068A6D2A8 , 0x0000BFF6 // P18
  229. data8 0x9297B23CCFFB291F , 0x0000BFFA // P12
  230. data8 0xD5F4F2182E7A8725 , 0x0000BFF9 // P14
  231. data8 0xAAAAAAAAAAAAA8A9 , 0x0000BFFD // P0
  232. data8 0x9249249247E37913 , 0x0000BFFC // P2
  233. data8 0xC90FDAA22168C235 , 0x00003FFF // pi/2
  234. data8 0xC90FDAA22168C235 , 0x00004000 // pi
  235. data8 0x96cbe3f9990e91a8 , 0x00004000 // 3pi/4
  236. .align 32
  237. .global atan2#
  238. ////////////////////////////////////////////////////////
  239. .section .text
  240. .proc atan2#
  241. .align 32
  242. atan2:
  243. // qnan snan inf norm unorm 0 -+
  244. // 0 0 1 0 0 0 11
  245. // Y NAN? p10 p11
  246. // p10 ==> quiet Y and return
  247. // p11 X NAN? p12, p13
  248. // p12 ==> quiet X and return
  249. { .mfi
  250. alloc r32 = ar.pfs,1,5,4,0
  251. frcpa.s1 atan2_u1_X,p6 = f1,atan2_X
  252. addl EXP_AD_P2 = @ltoff(atan2_tb2), gp
  253. }
  254. { .mfi
  255. addl EXP_AD_P1 = @ltoff(atan2_tb1), gp
  256. fclass.m.unc p10,p11 = f8, 0xc3
  257. nop.i 999
  258. ;;
  259. }
  260. { .mfi
  261. ld8 EXP_AD_P1 = [EXP_AD_P1]
  262. frcpa.s1 atan2_u1_Y,p7 = f1,atan2_Y
  263. nop.i 999
  264. }
  265. { .mfi
  266. nop.m 999
  267. fma.s1 atan2_two = f1,f1,f1
  268. nop.i 999
  269. ;;
  270. }
  271. { .mfi
  272. ld8 EXP_AD_P2 = [ EXP_AD_P2]
  273. famax.s1 atan2_Umax = f8,f9
  274. nop.i 999
  275. }
  276. ;;
  277. { .mfi
  278. nop.m 999
  279. fmerge.s atan2_absX = f0,atan2_X
  280. nop.i 999
  281. }
  282. ;;
  283. // p10 Y NAN, quiet and return
  284. { .mfi
  285. ldfe atan2_P10 = [EXP_AD_P1],16
  286. fmerge.s atan2_sgnY = atan2_Y,f1
  287. nop.i 999
  288. }
  289. { .mfb
  290. nop.m 999
  291. (p10) fma.d f8 = f8,f9,f0
  292. (p10) br.ret.spnt b0
  293. ;;
  294. }
  295. { .mmf
  296. ldfe atan2_P11 = [EXP_AD_P1],16
  297. ldfe atan2_P20 = [EXP_AD_P2],16
  298. fmerge.s atan2_sgnX = atan2_X,f1
  299. ;;
  300. }
  301. { .mfi
  302. ldfe atan2_P8 = [EXP_AD_P1],16
  303. fma.s1 atan2_z1_X = atan2_u1_X, atan2_Y, f0
  304. nop.i 999
  305. }
  306. { .mfi
  307. ldfe atan2_P21 = [EXP_AD_P2],16
  308. fma.s1 atan2_z1_Y = atan2_u1_Y, atan2_X, f0
  309. nop.i 999
  310. ;;
  311. }
  312. { .mfi
  313. ldfe atan2_P9 = [EXP_AD_P1],16
  314. fnma.s1 atan2_B1X = atan2_u1_X, atan2_X, atan2_two
  315. nop.i 999
  316. }
  317. { .mfi
  318. ldfe atan2_P4 = [EXP_AD_P2],16
  319. fnma.s1 atan2_B1Y = atan2_u1_Y, atan2_Y, atan2_two
  320. nop.i 999
  321. ;;
  322. }
  323. // p6 (atan2_sT) true if swap
  324. // p7 (atan2_sF) true if no swap
  325. // p11 ==> Y !NAN; X NAN?
  326. { .mfi
  327. ldfe atan2_P5 = [EXP_AD_P1],16
  328. // fcmp.eq.unc.s1 atan2_sF,atan2_sT = atan2_Umax, atan2_X
  329. fcmp.eq.unc.s1 p7,p6 = atan2_Umax, atan2_X
  330. nop.i 999
  331. }
  332. { .mfi
  333. ldfe atan2_P6 = [EXP_AD_P2],16
  334. (p11) fclass.m.unc p12,p13 = f9, 0xc3
  335. nop.i 999
  336. ;;
  337. }
  338. { .mmf
  339. ldfe atan2_P7 = [EXP_AD_P1],16
  340. ldfe atan2_P16 = [EXP_AD_P2],16
  341. famin.s1 atan2_Vmin = f8,f9
  342. ;;
  343. }
  344. // p8 true if X positive
  345. // p9 true if X negative
  346. // both are false is swap is true
  347. { .mfi
  348. ldfe atan2_P17 = [EXP_AD_P1],16
  349. //(atan2_sF) fcmp.eq.unc.s1 p8,p9 = atan2_sgnX,f1
  350. (p7) fcmp.eq.unc.s1 p8,p9 = atan2_sgnX,f1
  351. nop.i 999
  352. }
  353. { .mfi
  354. ldfe atan2_P18 = [EXP_AD_P2],16
  355. fma.s1 atan2_sgnXY = atan2_sgnX, atan2_sgnY, f0
  356. nop.i 999
  357. ;;
  358. }
  359. { .mfi
  360. ldfe atan2_P19 = [EXP_AD_P1],16
  361. //(atan2_sF) fma.s1 atan2_wp = atan2_z1_X, atan2_z1_X, f0
  362. (p7) fma.s1 atan2_wp = atan2_z1_X, atan2_z1_X, f0
  363. nop.i 999
  364. }
  365. { .mfi
  366. ldfe atan2_P12 = [EXP_AD_P2],16
  367. //(atan2_sT) fma.s1 atan2_wp = atan2_z1_Y, atan2_z1_Y, f0
  368. (p6) fma.s1 atan2_wp = atan2_z1_Y, atan2_z1_Y, f0
  369. nop.i 999
  370. ;;
  371. }
  372. { .mfi
  373. ldfe atan2_P13 = [EXP_AD_P1],16
  374. //(atan2_sF) fma.s1 atan2_z = atan2_z1_X, atan2_B1X, f0
  375. (p7) fma.s1 atan2_z = atan2_z1_X, atan2_B1X, f0
  376. nop.i 999
  377. }
  378. { .mfi
  379. ldfe atan2_P14 = [EXP_AD_P2],16
  380. //(atan2_sT) fma.s1 atan2_z = atan2_z1_Y, atan2_B1Y, f0
  381. (p6) fma.s1 atan2_z = atan2_z1_Y, atan2_B1Y, f0
  382. nop.i 999
  383. ;;
  384. }
  385. { .mfi
  386. ldfe atan2_P15 = [EXP_AD_P1],16
  387. //(atan2_sF) fma.s1 atan2_B1sq = atan2_B1X, atan2_B1X, f0
  388. (p7) fma.s1 atan2_B1sq = atan2_B1X, atan2_B1X, f0
  389. nop.i 999
  390. }
  391. { .mfi
  392. ldfe atan2_P0 = [EXP_AD_P2],16
  393. //(atan2_sT) fma.s1 atan2_B1sq = atan2_B1Y, atan2_B1Y, f0
  394. (p6) fma.s1 atan2_B1sq = atan2_B1Y, atan2_B1Y, f0
  395. nop.i 999
  396. ;;
  397. }
  398. // p12 ==> X NAN, quiet and return
  399. { .mfi
  400. ldfe atan2_P1 = [EXP_AD_P1],16
  401. fmerge.s atan2_Umax = f0,atan2_Umax
  402. nop.i 999
  403. }
  404. { .mfb
  405. ldfe atan2_P2 = [EXP_AD_P2],16
  406. (p12) fma.d f8 = f9,f8,f0
  407. (p12) br.ret.spnt b0
  408. ;;
  409. }
  410. // p10 ==> x inf y ?
  411. // p11 ==> x !inf y ?
  412. { .mfi
  413. ldfe atan2_P3 = [EXP_AD_P1],16
  414. fmerge.s atan2_Vmin = f0,atan2_Vmin
  415. nop.i 999
  416. }
  417. { .mfi
  418. ldfe atan2_Pi_by_2 = [EXP_AD_P2],16
  419. fclass.m.unc p10,p11 = f9, 0x23
  420. nop.i 999
  421. ;;
  422. }
  423. { .mmf
  424. ldfe atan2_P22 = [EXP_AD_P1],16
  425. ldfe atan2_pi = [EXP_AD_P2],16
  426. nop.f 999
  427. ;;
  428. }
  429. { .mfi
  430. nop.m 999
  431. fma.s0 atan2_NORM_X = f9,f1,f0
  432. nop.i 999
  433. }
  434. { .mfi
  435. nop.m 999
  436. fma.s0 atan2_NORM_Y = f8,f1,f0
  437. nop.i 999
  438. ;;
  439. }
  440. { .mfi
  441. ldfe atan2_pi_by_4 = [EXP_AD_P1],16
  442. fma.s1 atan2_w = atan2_wp, atan2_B1sq,f0
  443. nop.i 999
  444. }
  445. { .mfi
  446. ldfe atan2_3pi_by_4 = [EXP_AD_P2],16
  447. //(atan2_sT) fmerge.ns atan2_sgnXY = atan2_sgnXY, atan2_sgnXY
  448. (p6) fmerge.ns atan2_sgnXY = atan2_sgnXY, atan2_sgnXY
  449. nop.i 999
  450. ;;
  451. }
  452. // p12 ==> x inf y inf
  453. // p13 ==> x inf y !inf
  454. { .mfi
  455. nop.m 999
  456. fmerge.s atan2_z = f0, atan2_z
  457. nop.i 999
  458. }
  459. { .mfi
  460. nop.m 99
  461. (p10) fclass.m.unc p12,p13 = f8, 0x23
  462. nop.i 999
  463. }
  464. { .mfi
  465. nop.m 99
  466. (p11) fclass.m.unc p14,p15 = f8, 0x23
  467. nop.i 999
  468. ;;
  469. }
  470. { .mfi
  471. nop.m 999
  472. (p12) fcmp.eq.unc.s1 p10,p11 = atan2_sgnX,f1
  473. nop.i 99
  474. ;;
  475. }
  476. { .mfb
  477. nop.m 999
  478. (p14) fma.d f8 = atan2_sgnY, atan2_Pi_by_2, f0
  479. (p14) br.ret.spnt b0
  480. ;;
  481. }
  482. { .mfi
  483. nop.m 999
  484. fma.s1 atan2_V13 = atan2_w, atan2_P11, atan2_P10
  485. nop.i 999
  486. }
  487. { .mfi
  488. nop.m 999
  489. fma.s1 atan2_W11 = atan2_w, atan2_P21, atan2_P20
  490. nop.i 999
  491. ;;
  492. }
  493. { .mfi
  494. nop.m 999
  495. fma.s1 atan2_E = atan2_Vmin, atan2_z, atan2_Umax
  496. nop.i 999
  497. }
  498. { .mfi
  499. nop.m 999
  500. fnma.s1 atan2_gamma = atan2_Umax, atan2_z, f1
  501. nop.i 999
  502. ;;
  503. }
  504. { .mfi
  505. nop.m 999
  506. fma.s1 atan2_V11 = atan2_w, atan2_P9, atan2_P8
  507. nop.i 999
  508. }
  509. { .mfi
  510. nop.m 999
  511. fma.s1 atan2_V12 = atan2_w, atan2_w, f0
  512. nop.i 999
  513. ;;
  514. }
  515. { .mfi
  516. nop.m 999
  517. fma.s1 atan2_V7 = atan2_w, atan2_P5 , atan2_P4
  518. nop.i 999
  519. }
  520. { .mfi
  521. nop.m 999
  522. fma.s1 atan2_V8 = atan2_w, atan2_P7 , atan2_P6
  523. nop.i 999
  524. ;;
  525. }
  526. { .mfi
  527. nop.m 999
  528. fma.s1 atan2_W7 = atan2_w, atan2_P17, atan2_P16
  529. nop.i 999
  530. }
  531. { .mfi
  532. nop.m 999
  533. fma.s1 atan2_W8 = atan2_w, atan2_P19, atan2_P18
  534. nop.i 999
  535. ;;
  536. }
  537. { .mfi
  538. nop.m 999
  539. fma.s1 atan2_W3 = atan2_w, atan2_P13, atan2_P12
  540. nop.i 999
  541. }
  542. { .mfi
  543. nop.m 999
  544. fma.s1 atan2_W4 = atan2_w, atan2_P15, atan2_P14
  545. nop.i 999
  546. ;;
  547. }
  548. { .mfi
  549. nop.m 999
  550. fma.s1 atan2_V3 = atan2_w, atan2_P1 , atan2_P0
  551. nop.i 999
  552. }
  553. { .mfi
  554. nop.m 999
  555. fma.s1 atan2_V4 = atan2_w, atan2_P3 , atan2_P2
  556. nop.i 999
  557. ;;
  558. }
  559. { .mfi
  560. nop.m 999
  561. fma.s1 atan2_zcub = atan2_z, atan2_w, f0
  562. nop.i 999
  563. }
  564. { .mfi
  565. nop.m 999
  566. fnma.s1 atan2_gV = atan2_Umax, atan2_z, atan2_Vmin
  567. nop.i 999
  568. ;;
  569. }
  570. { .mfi
  571. nop.m 999
  572. frcpa.s1 atan2_F,p15 = f1, atan2_E
  573. nop.i 999
  574. }
  575. { .mfi
  576. nop.m 999
  577. fma.s1 atan2_V10 = atan2_V12, atan2_V13, atan2_V11
  578. nop.i 999
  579. ;;
  580. }
  581. { .mfi
  582. nop.m 999
  583. fma.s1 atan2_V6 = atan2_V12, atan2_V8 , atan2_V7
  584. nop.i 999
  585. }
  586. { .mfi
  587. nop.m 999
  588. fma.s1 atan2_V9 = atan2_V12, atan2_V12, f0
  589. nop.i 999
  590. ;;
  591. }
  592. { .mfi
  593. nop.m 999
  594. fma.s1 atan2_W10 = atan2_V12, atan2_P22 , atan2_W11
  595. nop.i 999
  596. }
  597. { .mfi
  598. nop.m 999
  599. fma.s1 atan2_W6 = atan2_V12, atan2_W8 , atan2_W7
  600. nop.i 999
  601. ;;
  602. }
  603. { .mfi
  604. nop.m 999
  605. fma.s1 atan2_W2 = atan2_V12, atan2_W4 , atan2_W3
  606. nop.i 999
  607. }
  608. { .mfi
  609. nop.m 999
  610. fma.s1 atan2_V2 = atan2_V12, atan2_V4 , atan2_V3
  611. nop.i 999
  612. ;;
  613. }
  614. // Both X and Y are INF
  615. // p10 ==> X +
  616. // p11 ==> X -
  617. .pred.rel "mutex",p10,p11
  618. { .mfb
  619. nop.m 999
  620. (p10) fma.d f8 = atan2_sgnY, atan2_pi_by_4, f0
  621. (p10) br.ret.spnt b0
  622. }
  623. { .mfb
  624. nop.m 999
  625. (p11) fma.d f8 = atan2_sgnY, atan2_3pi_by_4, f0
  626. (p11) br.ret.spnt b0
  627. ;;
  628. }
  629. .pred.rel "mutex",p8,p9,p6
  630. { .mfi
  631. nop.m 999
  632. fnma.s1 atan2_alpha = atan2_E, atan2_F, f1
  633. nop.i 999
  634. }
  635. { .mfi
  636. nop.m 999
  637. fnma.s1 atan2_alpha_1 = atan2_E, atan2_F, atan2_two
  638. nop.i 999
  639. ;;
  640. }
  641. { .mfi
  642. nop.m 999
  643. //(atan2_sT) fmerge.s atan2_P = atan2_Y, atan2_Pi_by_2
  644. (p6) fmerge.s atan2_P = atan2_Y, atan2_Pi_by_2
  645. nop.i 999
  646. }
  647. { .mfi
  648. nop.m 999
  649. fma.s1 atan2_gVF = atan2_gV, atan2_F, f0
  650. nop.i 999
  651. ;;
  652. }
  653. { .mfi
  654. nop.m 999
  655. fma.s1 atan2_V5 = atan2_V9, atan2_V10, atan2_V6
  656. nop.i 999
  657. }
  658. { .mfi
  659. nop.m 999
  660. fma.s1 atan2_W12 = atan2_V9, atan2_V9, f0
  661. nop.i 999
  662. ;;
  663. }
  664. { .mfi
  665. nop.m 999
  666. (p8) fmerge.s atan2_P = atan2_sgnY, f0
  667. nop.i 999
  668. }
  669. { .mfi
  670. nop.m 999
  671. fma.s1 atan2_W5 = atan2_V9, atan2_W10, atan2_W6
  672. nop.i 999
  673. ;;
  674. }
  675. { .mfi
  676. nop.m 999
  677. (p9) fmerge.s atan2_P = atan2_sgnY, atan2_pi
  678. nop.i 999
  679. ;;
  680. }
  681. { .mfi
  682. nop.m 999
  683. fma.s1 atan2_alpha_sq = atan2_alpha, atan2_alpha, f0
  684. nop.i 999
  685. }
  686. { .mfi
  687. nop.m 999
  688. fma.s1 atan2_Cp = atan2_alpha, atan2_alpha_1, f1
  689. nop.i 999
  690. ;;
  691. }
  692. { .mfi
  693. nop.m 999
  694. fma.s1 atan2_V1 = atan2_V9, atan2_V5, atan2_V2
  695. nop.i 999
  696. }
  697. { .mfi
  698. nop.m 999
  699. fma.s1 atan2_W12 = atan2_V9, atan2_W12, f0
  700. nop.i 999
  701. ;;
  702. }
  703. // p13 ==> x inf y !inf
  704. { .mfi
  705. nop.m 999
  706. fma.s1 atan2_W1 = atan2_V9, atan2_W5, atan2_W2
  707. nop.i 999
  708. }
  709. { .mfi
  710. nop.m 999
  711. (p13) fcmp.eq.unc.s1 p10,p11 = atan2_sgnX,f1
  712. nop.i 999
  713. ;;
  714. }
  715. { .mfi
  716. nop.m 999
  717. fma.s1 atan2_alpha_cub = atan2_alpha, atan2_alpha_sq, f0
  718. nop.i 999
  719. }
  720. { .mfi
  721. nop.m 999
  722. fma.s1 atan2_C = atan2_gVF, atan2_Cp, f0
  723. nop.i 999
  724. ;;
  725. }
  726. .pred.rel "mutex",p10,p11
  727. // x inf y !inf
  728. { .mfb
  729. nop.m 999
  730. (p10) fmerge.s f8 = atan2_sgnY, f0
  731. (p10) br.ret.spnt b0
  732. }
  733. { .mfb
  734. nop.m 999
  735. (p11) fma.d f8 = atan2_sgnY, atan2_pi, f0
  736. (p11) br.ret.spnt b0
  737. ;;
  738. }
  739. // p10 ==> y 0 x?
  740. // p11 ==> y !0 x?
  741. { .mfi
  742. nop.m 999
  743. fclass.m.unc p10,p11 = f8, 0x07
  744. nop.i 999
  745. ;;
  746. }
  747. { .mfi
  748. nop.m 999
  749. fma.s1 atan2_Pp = atan2_W12, atan2_W1, atan2_V1
  750. nop.i 999
  751. }
  752. { .mfi
  753. nop.m 999
  754. fma.s1 atan2_d = atan2_alpha_cub, atan2_C, atan2_C
  755. nop.i 999
  756. ;;
  757. }
  758. // p12 ==> y0 x0
  759. // p13 ==> y0 x!0
  760. // p14 ==> y!0 x0
  761. // p15 ==> y!0 x!0
  762. { .mfi
  763. nop.m 999
  764. (p10) fclass.m.unc p12,p13 = f9, 0x07
  765. nop.i 999
  766. }
  767. { .mfi
  768. nop.m 999
  769. (p11) fclass.m.unc p14,p15 = f9, 0x07
  770. nop.i 999
  771. ;;
  772. }
  773. { .mfb
  774. nop.m 999
  775. (p13) fcmp.eq.unc.s1 p10,p11 = atan2_sgnX,f1
  776. (p12) br.spnt ATAN2_ERROR
  777. ;;
  778. }
  779. { .mfi
  780. nop.m 999
  781. fma.s1 atan2_pd = atan2_P0, atan2_d, f0
  782. nop.i 999
  783. }
  784. { .mfi
  785. nop.m 999
  786. fma.s1 atan2_dsq = atan2_d, atan2_d, f0
  787. nop.i 999
  788. ;;
  789. }
  790. { .mfi
  791. nop.m 999
  792. fma.s1 atan2_A_hi = atan2_zcub, atan2_Pp, atan2_z
  793. nop.i 999
  794. }
  795. { .mfb
  796. nop.m 999
  797. (p14) fma.d f8 = atan2_sgnY, atan2_Pi_by_2, f0
  798. (p14) br.ret.spnt b0
  799. ;;
  800. }
  801. { .mfb
  802. nop.m 999
  803. (p10) fmerge.s f8 = atan2_sgnY, f0
  804. (p10) br.ret.spnt b0
  805. }
  806. { .mfb
  807. nop.m 999
  808. (p11) fma.d f8 = atan2_sgnY, atan2_pi, f0
  809. (p11) br.ret.spnt b0
  810. ;;
  811. }
  812. { .mfi
  813. nop.m 999
  814. fma.s1 atan2_A_lo = atan2_pd, atan2_dsq, atan2_d
  815. nop.i 999
  816. ;;
  817. }
  818. { .mfi
  819. nop.m 999
  820. fma.s1 atan2_A = atan2_A_hi, f1, atan2_A_lo
  821. nop.i 999
  822. ;;
  823. }
  824. { .mfb
  825. nop.m 999
  826. fma.d f8 = atan2_sgnXY, atan2_A, atan2_P
  827. br.ret.sptk b0
  828. }
  829. ATAN2_ERROR:
  830. { .mfi
  831. nop.m 999
  832. fcmp.eq.unc.s1 p10,p11 = atan2_sgnX,f1
  833. nop.i 999
  834. }
  835. ;;
  836. { .mfi
  837. mov atan2_GR_tag = 37
  838. (p10) fmerge.s f10 = atan2_sgnY, f0
  839. nop.i 999
  840. }
  841. { .mfi
  842. nop.m 999
  843. (p11) fma.d f10 = atan2_sgnY, atan2_pi, f0
  844. nop.i 999
  845. ;;
  846. }
  847. .endp atan2#
  848. // Stack operations when calling error support.
  849. // (1) (2) (3) (call) (4)
  850. // sp -> + psp -> + psp -> + sp -> +
  851. // | | | |
  852. // | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
  853. // | | | |
  854. // | <-GR_Y Y2->| Y2 ->| <- GR_Y |
  855. // | | | |
  856. // | | <- GR_X X1 ->| |
  857. // | | | |
  858. // sp-64 -> + sp -> + sp -> + +
  859. // save ar.pfs save b0 restore gp
  860. // save gp restore ar.pfs
  861. .proc __libm_error_region
  862. __libm_error_region:
  863. .prologue
  864. // (1)
  865. { .mfi
  866. add GR_Parameter_Y=-32,sp // Parameter 2 value
  867. nop.f 999
  868. .save ar.pfs,GR_SAVE_PFS
  869. mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
  870. }
  871. { .mfi
  872. .fframe 64
  873. add sp=-64,sp // Create new stack
  874. nop.f 0
  875. mov GR_SAVE_GP=gp // Save gp
  876. };;
  877. // (2)
  878. { .mmi
  879. stfd [GR_Parameter_Y] = f8,16 // STORE Parameter 2 on stack
  880. add GR_Parameter_X = 16,sp // Parameter 1 address
  881. .save b0, GR_SAVE_B0
  882. mov GR_SAVE_B0=b0 // Save b0
  883. };;
  884. .body
  885. // (3)
  886. { .mib
  887. stfd [GR_Parameter_X] = f9 // STORE Parameter 1 on stack
  888. add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
  889. nop.b 0
  890. }
  891. { .mib
  892. stfd [GR_Parameter_Y] = f10 // STORE Parameter 3 on stack
  893. add GR_Parameter_Y = -16,GR_Parameter_Y
  894. br.call.sptk b0=__libm_error_support# // Call error handling function
  895. };;
  896. { .mmi
  897. nop.m 0
  898. nop.m 0
  899. add GR_Parameter_RESULT = 48,sp
  900. };;
  901. // (4)
  902. { .mmi
  903. ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
  904. .restore
  905. add sp = 64,sp // Restore stack pointer
  906. mov b0 = GR_SAVE_B0 // Restore return address
  907. };;
  908. { .mib
  909. mov gp = GR_SAVE_GP // Restore gp
  910. mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
  911. br.ret.sptk b0 // Return
  912. };;
  913. .endp __libm_error_region
  914. .type __libm_error_support#,@function
  915. .global __libm_error_support#