Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

918 lines
26 KiB

  1. .file "atan2f.s"
  2. // Copyright (c) 2000, Intel Corporation
  3. // All rights reserved.
  4. //
  5. // Contributed 6/1/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
  6. // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
  7. //
  8. // WARRANTY DISCLAIMER
  9. //
  10. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  11. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  12. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  13. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  14. // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  15. // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  16. // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  17. // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  18. // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  19. // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  20. // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  21. //
  22. // Intel Corporation is the author of this code, and requests that all
  23. // problem reports or change requests be submitted to it directly at
  24. // http://developer.intel.com/opensource.
  25. // History
  26. //==============================================================
  27. // ?/??/00 Initial version
  28. // 8/15/00 Bundle added after call to __libm_error_support to properly
  29. // set [the previously overwritten] GR_Parameter_RESULT.
  30. // 8/17/00 Changed predicate register macro-usage to direct predicate
  31. // names due to an assembler bug.
  32. // Description
  33. //=========================================
  34. // The atan2 function computes the principle value of the arc tangent of y/x using
  35. // the signs of both arguments to determine the quadrant of the return value.
  36. // A domain error may occur if both arguments are zero.
  37. // The atan2 function returns the arc tangent of y/x in the range [-pi,+pi] radians.
  38. // Special values
  39. //==============================================================
  40. // Y x Result
  41. // +number +inf +0
  42. // -number +inf -0
  43. // +number -inf +pi
  44. // -number -inf -pi
  45. //
  46. // +inf +number +pi/2
  47. // -inf +number -pi/2
  48. // +inf -number +pi/2
  49. // -inf -number -pi/2
  50. //
  51. // +inf +inf +pi/4
  52. // -inf +inf -pi/4
  53. // +inf -inf +3pi/4
  54. // -inf -inf -3pi/4
  55. //
  56. // +1 +1 +pi/4
  57. // -1 +1 -pi/4
  58. // +1 -1 +3pi/4
  59. // -1 -1 -3pi/4
  60. //
  61. // +number +0 +pi/2 // does not raise DBZ
  62. // -number +0 -pi/2 // does not raise DBZ
  63. // +number -0 +pi/2 // does not raise DBZ
  64. // -number -0 -pi/2 // does not raise DBZ
  65. //
  66. // +0 +number +0
  67. // -0 +number -0
  68. // +0 -number +pi
  69. // -0 -number -pi
  70. //
  71. // +0 +0 +0 // does not raise invalid
  72. // -0 +0 -0 // does not raise invalid
  73. // +0 -0 +pi // does not raise invalid
  74. // -0 -0 -pi // does not raise invalid
  75. //
  76. // Nan anything quiet Y
  77. // anything NaN quiet X
  78. // atan(+-0/+-0) sets double error tag to 37
  79. // atan(+-0/+-0) sets single error tag to 38
  80. // These are domain errors.
  81. //
  82. // Assembly macros
  83. //=========================================
  84. // integer registers
  85. atan2f_GR_Addr_1 = r33
  86. atan2f_GR_Addr_2 = r34
  87. GR_SAVE_B0 = r35
  88. GR_SAVE_PFS = r36
  89. GR_SAVE_GP = r37
  90. GR_Parameter_X = r38
  91. GR_Parameter_Y = r39
  92. GR_Parameter_RESULT = r40
  93. GR_Parameter_TAG = r41
  94. // floating point registers
  95. atan2f_coef_p1 = f32
  96. atan2f_coef_p7 = f33
  97. atan2f_coef_p8 = f34
  98. atan2f_coef_p3 = f35
  99. atan2f_coef_p4 = f36
  100. atan2f_coef_p9 = f37
  101. atan2f_coef_p10 = f38
  102. atan2f_coef_p2 = f39
  103. atan2f_coef_p5 = f40
  104. atan2f_coef_p6 = f41
  105. atan2f_const_1 = f43
  106. atan2f_const_pi = f44
  107. atan2f_abs_Y = f45
  108. atan2f_abs_X = f46
  109. atan2f_sgn_Y = f47
  110. atan2f_sgn_X = f48
  111. atan2f_A = f49
  112. atan2f_Asq = f50
  113. atan2f_Acub = f51
  114. atan2f_A4 = f52
  115. atan2f_A5 = f53
  116. atan2f_A6 = f54
  117. atan2f_A11 = f55
  118. atan2f_poly_A1 = f56
  119. atan2f_poly_A2 = f57
  120. atan2f_poly_A3 = f58
  121. atan2f_poly_A4 = f59
  122. atan2f_poly_A5 = f60
  123. atan2f_poly_atan_A = f61
  124. atan2f_answer = f62
  125. atan2f_C = f63
  126. atan2f_G_numer = f64
  127. atan2f_G_denom = f65
  128. atan2f_H1 = f66
  129. atan2f_H_beta = f67
  130. atan2f_H2 = f68
  131. atan2f_H_beta2 = f69
  132. atan2f_H3 = f70
  133. atan2f_g = f71
  134. atan2f_gsq = f72
  135. atan2f_poly_atan_G = f73
  136. atan2f_Z = f74
  137. atan2f_Zsq = f75
  138. atan2f_Zcub = f76
  139. atan2f_Z4 = f77
  140. atan2f_Z5 = f78
  141. atan2f_Z6 = f79
  142. atan2f_Z11 = f80
  143. atan2f_poly_Z1 = f81
  144. atan2f_poly_Z2 = f82
  145. atan2f_poly_Z3 = f83
  146. atan2f_poly_Z4 = f84
  147. atan2f_poly_Z5 = f85
  148. atan2f_T_numer = f86
  149. atan2f_T_denom = f87
  150. atan2f_S1 = f88
  151. atan2f_S_beta = f89
  152. atan2f_S2 = f90
  153. atan2f_S_beta2 = f91
  154. atan2f_S3 = f92
  155. atan2f_t = f93
  156. atan2f_tsq = f94
  157. atan2f_poly_atan_T = f95
  158. atan2f_poly_atan_Z = f96
  159. atan2f_const_piby4 = f97
  160. atan2f_const_3piby4 = f98
  161. atan2f_const_piby2 = f99
  162. // predicate registers
  163. //atan2f_Pred_Swap = p6
  164. //atan2f_Pred_noSwap = p7
  165. //atan2f_Pred_Xpos = p8
  166. //atan2f_Pred_Xneg = p9
  167. .data
  168. .align 16
  169. atan2f_coef_table1:
  170. data8 0xBFD5555512191621 // p1
  171. data8 0xBFA6E10BA401393F // p7
  172. data8 0x3FBC4F512B1865F5 // p4
  173. data8 0xBF7DEAADAA336451 // p9
  174. data8 0xBFB68EED6A8CFA32 // p5
  175. data8 0x3FB142A73D7C54E3 // p6
  176. data8 0x3fe921fb54442d18 // pi/4
  177. data8 0x4002d97c7f3321d2 // 3pi/4
  178. atan2f_coef_table2:
  179. data8 0x3F97105B4160F86B // p8
  180. data8 0xBFC2473C5145EE38 // p3
  181. data8 0x3F522E5D33BC9BAA // p10
  182. data8 0x3FC9997E7AFBFF4E // p2
  183. data8 0x3ff921fb54442d18 // pi/2
  184. data8 0x400921fb54442d18 // pi
  185. .global atan2f
  186. .text
  187. .proc atan2f
  188. .align 32
  189. atan2f:
  190. { .mfi
  191. alloc r32 = ar.pfs,1,5,4,0
  192. frcpa.s1 atan2f_Z,p0 = f1,f8
  193. addl atan2f_GR_Addr_2 = @ltoff(atan2f_coef_table2),gp
  194. }
  195. { .mfi
  196. addl atan2f_GR_Addr_1 = @ltoff(atan2f_coef_table1),gp
  197. nop.f 999
  198. nop.i 999;;
  199. }
  200. { .mfi
  201. nop.m 999
  202. frcpa.s1 atan2f_A,p0 = f1,f9
  203. nop.i 999;;
  204. }
  205. { .mfi
  206. ld8 atan2f_GR_Addr_1 = [atan2f_GR_Addr_1]
  207. fmerge.s atan2f_sgn_X = f9,f1
  208. nop.i 999
  209. }
  210. { .mfi
  211. ld8 atan2f_GR_Addr_2 = [atan2f_GR_Addr_2]
  212. nop.f 999
  213. nop.i 999;;
  214. }
  215. { .mfi
  216. nop.m 999
  217. fmerge.s atan2f_sgn_Y = f8,f1
  218. nop.i 999;;
  219. }
  220. { .mfi
  221. nop.m 999
  222. fmerge.s atan2f_abs_X = f0,f9
  223. nop.i 999;;
  224. }
  225. { .mfi
  226. ldfpd atan2f_coef_p1,atan2f_coef_p7 = [atan2f_GR_Addr_1],16
  227. fmerge.s atan2f_abs_Y = f0,f8
  228. nop.i 999
  229. }
  230. { .mfi
  231. ldfpd atan2f_coef_p8,atan2f_coef_p3 = [atan2f_GR_Addr_2],16
  232. fma.s1 atan2f_Z = atan2f_Z,f9,f0
  233. nop.i 999;;
  234. }
  235. { .mfi
  236. ldfpd atan2f_coef_p4,atan2f_coef_p9 = [atan2f_GR_Addr_1],16
  237. fclass.m p10,p0 = f9,0xe7 // @inf|@snan|@qnan|@zero
  238. nop.i 999
  239. }
  240. { .mfi
  241. ldfpd atan2f_coef_p10,atan2f_coef_p2 = [atan2f_GR_Addr_2],16
  242. fma.s1 atan2f_A = atan2f_A,f8,f0
  243. nop.i 999;;
  244. }
  245. { .mfi
  246. ldfpd atan2f_const_piby2,atan2f_const_pi = [atan2f_GR_Addr_2]
  247. // fcmp.ge.s1 atan2f_Pred_Xpos,atan2f_Pred_Xneg = atan2f_sgn_X,f1
  248. fcmp.ge.s1 p8,p9 = atan2f_sgn_X,f1
  249. nop.i 999
  250. }
  251. { .mfi
  252. ldfpd atan2f_coef_p5,atan2f_coef_p6 = [atan2f_GR_Addr_1],16
  253. nop.f 999
  254. nop.i 999;;
  255. }
  256. { .mfi
  257. ldfpd atan2f_const_piby4,atan2f_const_3piby4 = [atan2f_GR_Addr_1]
  258. fclass.m p11,p0 = f8,0xe7 // @inf|@snan|@qnan|@zero
  259. nop.i 999;;
  260. }
  261. { .mfi
  262. nop.m 999
  263. //(atan2f_Pred_Xpos) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f0,f0
  264. (p8) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f0,f0
  265. nop.i 999
  266. }
  267. { .mfi
  268. nop.m 999
  269. //(atan2f_Pred_Xneg) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f1,f0
  270. (p9) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f1,f0
  271. nop.i 999;;
  272. }
  273. { .mfb
  274. nop.m 999
  275. fma.s1 atan2f_Zsq = atan2f_Z,atan2f_Z,f0
  276. (p10) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO
  277. }
  278. { .mfb
  279. nop.m 999
  280. fma.s1 atan2f_T_denom = atan2f_Z,f9,f8
  281. (p11) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO
  282. }
  283. ;;
  284. { .mfi
  285. nop.m 999
  286. fma.s1 atan2f_Asq = atan2f_A,atan2f_A,f0
  287. nop.i 999
  288. }
  289. { .mfi
  290. nop.m 999
  291. fma.s1 atan2f_G_denom = atan2f_A,f8,f9
  292. nop.i 999;;
  293. }
  294. { .mfi
  295. nop.m 999
  296. fnma.s1 atan2f_G_numer = atan2f_A,f9,f8
  297. nop.i 999
  298. }
  299. { .mfi
  300. nop.m 999
  301. fnma.s1 atan2f_T_numer = atan2f_Z,f8,f9
  302. nop.i 999;;
  303. }
  304. { .mfi
  305. nop.m 999
  306. // fcmp.gt.s1 atan2f_Pred_Swap,atan2f_Pred_noSwap = atan2f_abs_Y,atan2f_abs_X
  307. fcmp.gt.s1 p6,p7 = atan2f_abs_Y,atan2f_abs_X
  308. nop.i 999;;
  309. }
  310. { .mfi
  311. nop.m 999
  312. fma.s1 atan2f_poly_A4 = atan2f_A,atan2f_coef_p1,f0
  313. nop.i 999
  314. }
  315. { .mfi
  316. nop.m 999
  317. fma.s1 atan2f_poly_Z4 = atan2f_Z,atan2f_coef_p1,f0
  318. nop.i 999;;
  319. }
  320. { .mfi
  321. nop.m 999
  322. fma.s1 atan2f_poly_Z2 = atan2f_Zsq,atan2f_coef_p8,atan2f_coef_p7
  323. nop.i 999
  324. }
  325. { .mfi
  326. nop.m 999
  327. fma.s1 atan2f_poly_Z5 = atan2f_Zsq,atan2f_coef_p4,atan2f_coef_p3
  328. nop.i 999;;
  329. }
  330. { .mfi
  331. nop.m 999
  332. fma.s1 atan2f_Z4 = atan2f_Zsq,atan2f_Zsq,f0
  333. nop.i 999
  334. }
  335. { .mfi
  336. nop.m 999
  337. fma.s1 atan2f_poly_Z1 = atan2f_Zsq,atan2f_coef_p10,atan2f_coef_p9
  338. nop.i 999;;
  339. }
  340. { .mfi
  341. nop.m 999
  342. frcpa.s1 atan2f_S1,p0 = f1,atan2f_T_denom
  343. nop.i 999
  344. }
  345. { .mfi
  346. nop.m 999
  347. fma.s1 atan2f_Zcub = atan2f_Z,atan2f_Zsq,f0
  348. nop.i 999;;
  349. }
  350. { .mfi
  351. nop.m 999
  352. frcpa.s1 atan2f_H1,p0 = f1,atan2f_G_denom
  353. nop.i 999
  354. }
  355. { .mfi
  356. nop.m 999
  357. fma.s1 atan2f_poly_A5 = atan2f_Asq,atan2f_coef_p4,atan2f_coef_p3
  358. nop.i 999;;
  359. }
  360. { .mfi
  361. nop.m 999
  362. fma.s1 atan2f_poly_A1 = atan2f_Asq,atan2f_coef_p10,atan2f_coef_p9
  363. nop.i 999
  364. }
  365. { .mfi
  366. nop.m 999
  367. fma.s1 atan2f_poly_A2 = atan2f_Asq,atan2f_coef_p8,atan2f_coef_p7
  368. nop.i 999;;
  369. }
  370. { .mfi
  371. nop.m 999
  372. fma.s1 atan2f_Acub = atan2f_A,atan2f_Asq,f0
  373. nop.i 999
  374. }
  375. { .mfi
  376. nop.m 999
  377. fma.s1 atan2f_A4 = atan2f_Asq,atan2f_Asq,f0
  378. nop.i 999;;
  379. }
  380. { .mfi
  381. nop.m 999
  382. fma.s1 atan2f_poly_Z4 = atan2f_Zsq,atan2f_poly_Z4,atan2f_Z
  383. nop.i 999
  384. }
  385. { .mfi
  386. nop.m 999
  387. fma.s1 atan2f_poly_Z5 = atan2f_Zsq,atan2f_poly_Z5,atan2f_coef_p2
  388. nop.i 999;;
  389. }
  390. { .mfi
  391. nop.m 999
  392. fnma.s1 atan2f_S_beta = atan2f_S1,atan2f_T_denom,f1
  393. nop.i 999
  394. }
  395. { .mfi
  396. nop.m 999
  397. fma.s1 atan2f_t = atan2f_S1,atan2f_T_numer,f0
  398. nop.i 999;;
  399. }
  400. { .mfi
  401. nop.m 999
  402. fma.s1 atan2f_poly_Z3 = atan2f_Zsq,atan2f_coef_p6,atan2f_coef_p5
  403. nop.i 999
  404. }
  405. { .mfi
  406. nop.m 999
  407. fma.s1 atan2f_poly_Z1 = atan2f_Z4,atan2f_poly_Z1,atan2f_poly_Z2
  408. nop.i 999;;
  409. }
  410. { .mfi
  411. nop.m 999
  412. fma.s1 atan2f_Z5 = atan2f_Zsq,atan2f_Zcub,f0
  413. nop.i 999
  414. }
  415. { .mfi
  416. nop.m 999
  417. fma.s1 atan2f_Z6 = atan2f_Zsq,atan2f_Z4,f0
  418. nop.i 999;;
  419. }
  420. { .mfi
  421. nop.m 999
  422. fnma.s1 atan2f_H_beta = atan2f_H1,atan2f_G_denom,f1
  423. nop.i 999
  424. }
  425. { .mfi
  426. nop.m 999
  427. fma.s1 atan2f_g = atan2f_H1,atan2f_G_numer,f0
  428. nop.i 999;;
  429. }
  430. { .mfi
  431. nop.m 999
  432. fma.s1 atan2f_poly_A4 = atan2f_Asq,atan2f_poly_A4,atan2f_A
  433. nop.i 999
  434. }
  435. { .mfi
  436. nop.m 999
  437. fma.s1 atan2f_poly_A5 = atan2f_Asq,atan2f_poly_A5,atan2f_coef_p2
  438. nop.i 999;;
  439. }
  440. { .mfi
  441. nop.m 999
  442. fma.s1 atan2f_poly_A3 = atan2f_Asq,atan2f_coef_p6,atan2f_coef_p5
  443. nop.i 999
  444. }
  445. { .mfi
  446. nop.m 999
  447. fma.s1 atan2f_poly_A1 = atan2f_A4,atan2f_poly_A1,atan2f_poly_A2
  448. nop.i 999;;
  449. }
  450. { .mfi
  451. nop.m 999
  452. fma.s1 atan2f_A5 = atan2f_Asq,atan2f_Acub,f0
  453. nop.i 999
  454. }
  455. { .mfi
  456. nop.m 999
  457. fma.s1 atan2f_A6 = atan2f_Asq,atan2f_A4,f0
  458. nop.i 999;;
  459. }
  460. { .mfi
  461. nop.m 999
  462. fma.s1 atan2f_tsq = atan2f_t,atan2f_t,f0
  463. nop.i 999
  464. }
  465. { .mfi
  466. nop.m 999
  467. fma.s1 atan2f_poly_atan_T = atan2f_t,atan2f_coef_p1,f0
  468. nop.i 999;;
  469. }
  470. { .mfi
  471. nop.m 999
  472. fma.s1 atan2f_S2 = atan2f_S1,atan2f_S_beta,atan2f_S1
  473. nop.i 999
  474. }
  475. { .mfi
  476. nop.m 999
  477. fma.s1 atan2f_S_beta2 = atan2f_S_beta,atan2f_S_beta,f0
  478. nop.i 999;;
  479. }
  480. { .mfi
  481. nop.m 999
  482. fma.s1 atan2f_poly_Z1 = atan2f_Z4,atan2f_poly_Z1,atan2f_poly_Z3
  483. nop.i 999
  484. }
  485. { .mfi
  486. nop.m 999
  487. fma.s1 atan2f_poly_Z4 = atan2f_Z5,atan2f_poly_Z5,atan2f_poly_Z4
  488. nop.i 999;;
  489. }
  490. { .mfi
  491. nop.m 999
  492. fma.s1 atan2f_poly_atan_G = atan2f_g,atan2f_coef_p1,f0
  493. nop.i 999
  494. }
  495. { .mfi
  496. nop.m 999
  497. fma.s1 atan2f_Z11 = atan2f_Z5,atan2f_Z6,f0
  498. nop.i 999;;
  499. }
  500. { .mfi
  501. nop.m 999
  502. fma.s1 atan2f_H_beta2 = atan2f_H_beta,atan2f_H_beta,f0
  503. nop.i 999
  504. }
  505. { .mfi
  506. nop.m 999
  507. fma.s1 atan2f_gsq = atan2f_g,atan2f_g,f0
  508. nop.i 999;;
  509. }
  510. { .mfi
  511. nop.m 999
  512. fma.s1 atan2f_poly_A4 = atan2f_A5,atan2f_poly_A5,atan2f_poly_A4
  513. nop.i 999
  514. }
  515. { .mfi
  516. nop.m 999
  517. fma.s1 atan2f_H2 = atan2f_H1,atan2f_H_beta,atan2f_H1
  518. nop.i 999;;
  519. }
  520. { .mfi
  521. nop.m 999
  522. fma.s1 atan2f_A11 = atan2f_A5,atan2f_A6,f0
  523. nop.i 999
  524. }
  525. { .mfi
  526. nop.m 999
  527. fma.s1 atan2f_poly_A1 = atan2f_A4,atan2f_poly_A1,atan2f_poly_A3
  528. nop.i 999;;
  529. }
  530. { .mfi
  531. nop.m 999
  532. fma.s1 atan2f_S3 = atan2f_S2,atan2f_S_beta2,atan2f_S2
  533. nop.i 999
  534. }
  535. { .mfi
  536. nop.m 999
  537. fma.s1 atan2f_poly_atan_T = atan2f_tsq,atan2f_poly_atan_T,f0
  538. nop.i 999;;
  539. }
  540. { .mfi
  541. nop.m 999
  542. //(atan2f_Pred_Swap) fma.s1 atan2f_C = atan2f_sgn_Y,atan2f_const_piby2,f0
  543. (p6) fma.s1 atan2f_C = atan2f_sgn_Y,atan2f_const_piby2,f0
  544. nop.i 999
  545. }
  546. { .mfi
  547. nop.m 999
  548. fma.s1 atan2f_poly_atan_Z = atan2f_Z11,atan2f_poly_Z1,atan2f_poly_Z4
  549. nop.i 999;;
  550. }
  551. { .mfi
  552. nop.m 999
  553. //(atan2f_Pred_noSwap) fma.s1 atan2f_C = atan2f_const_1,atan2f_const_pi,f0
  554. (p7) fma.s1 atan2f_C = atan2f_const_1,atan2f_const_pi,f0
  555. nop.i 999
  556. }
  557. { .mfi
  558. nop.m 999
  559. fma.s1 atan2f_poly_atan_G = atan2f_gsq,atan2f_poly_atan_G,f0
  560. nop.i 999;;
  561. }
  562. { .mfi
  563. nop.m 999
  564. fma.s1 atan2f_H3 = atan2f_H2,atan2f_H_beta2,atan2f_H2
  565. nop.i 999;;
  566. }
  567. { .mfi
  568. nop.m 999
  569. fma.s1 atan2f_poly_atan_A = atan2f_A11,atan2f_poly_A1,atan2f_poly_A4
  570. nop.i 999;;
  571. }
  572. { .mfi
  573. nop.m 999
  574. fma.s1 atan2f_poly_atan_T = atan2f_T_numer,atan2f_S3,atan2f_poly_atan_T
  575. nop.i 999;;
  576. }
  577. { .mfi
  578. nop.m 999
  579. //(atan2f_Pred_Swap) fms.s1 atan2f_answer = f1,atan2f_C,atan2f_poly_atan_Z
  580. (p6) fms.s1 atan2f_answer = f1,atan2f_C,atan2f_poly_atan_Z
  581. nop.i 999;;
  582. }
  583. { .mfi
  584. nop.m 999
  585. fma.s1 atan2f_poly_atan_G = atan2f_G_numer,atan2f_H3,atan2f_poly_atan_G
  586. nop.i 999;;
  587. }
  588. { .mfi
  589. nop.m 999
  590. //(atan2f_Pred_noSwap) fma.s1 atan2f_answer = f1,atan2f_C,atan2f_poly_atan_A
  591. (p7) fma.s1 atan2f_answer = f1,atan2f_C,atan2f_poly_atan_A
  592. nop.i 999;;
  593. }
  594. { .mfi
  595. nop.m 999
  596. //(atan2f_Pred_Swap) fms.s f8 = f1,atan2f_answer,atan2f_poly_atan_T
  597. (p6) fms.s f8 = f1,atan2f_answer,atan2f_poly_atan_T
  598. nop.i 999;;
  599. }
  600. { .mfb
  601. nop.m 999
  602. //(atan2f_Pred_noSwap) fma.s f8 = f1,atan2f_answer,atan2f_poly_atan_G
  603. (p7) fma.s f8 = f1,atan2f_answer,atan2f_poly_atan_G
  604. br.ret.sptk b0
  605. }
  606. ATAN2F_XY_INF_NAN_ZERO:
  607. // p10 = (y is NAN)
  608. // answer is quiet y
  609. // p11 = (y is not NAN)
  610. // p12 = (X is NAN)
  611. // answer is quiet x
  612. fclass.m p10,p11 = f8,0xc3 // @snan | @qnan
  613. ;;
  614. (p10) fnorm.s f10 = f9
  615. (p10) fnorm.s f8 = f8
  616. (p10) br.ret.spnt b0
  617. (p11) fclass.m p12,p0 = f9,0xc3 // @snan | @qnan
  618. ;;
  619. (p12) fnorm.s f8 = f9
  620. (p12) br.ret.spnt b0
  621. // p10 = x is +inf
  622. // p12 = (x is +inf) AND (y is +- inf)
  623. // answer is (sign of y)pi/4
  624. // p13 = (x is +inf) AND (y is +- number)
  625. // answer is (sign of y)0
  626. fclass.m p10,p0 = f9,0x21 // @inf| @pos
  627. ;;
  628. (p10) fclass.m.unc p12,p13 = f8,0x23 // @inf
  629. ;;
  630. (p12) fma.s f8 = atan2f_sgn_Y, atan2f_const_piby4,f0
  631. (p12) br.ret.spnt b0
  632. ;;
  633. (p13) fmerge.s f8 = f8,f0
  634. (p13) br.ret.spnt b0
  635. // p11 = x is -inf
  636. // p14 = (x is -inf) AND (y is +- inf)
  637. // amswer is (sign of y)3pi/4
  638. // p15 = (x is -inf) AND (y is +- number)
  639. // answer is (sign of y)pi
  640. // p12 = x is +- number
  641. // p13 = (x is +- number) AND (y is +- inf)
  642. // answer is (sign of y)pi/2
  643. fclass.m.unc p11,p12 = f9,0x22 // @inf | @neg
  644. ;;
  645. (p11) fclass.m.unc p14,p15 = f8,0x23 // @inf
  646. ;;
  647. (p14) fma.s f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0
  648. (p14) br.ret.spnt b0
  649. (p15) fma.s f8 = atan2f_sgn_Y, atan2f_const_pi,f0
  650. (p15) br.ret.spnt b0
  651. (p12) fclass.m.unc p13,p0 = f8,0x23 // @inf
  652. ;;
  653. (p13) fma.s f8 = atan2f_sgn_Y, atan2f_const_piby2,f0
  654. (p13) br.ret.spnt b0
  655. // p10 = (x is +-0)
  656. // p13 = (x is +-0) AND (y is +-number)
  657. // answer is (sign of y) pi/2
  658. // p12 = (x is +-0) AND (y is +-0)
  659. // answer is goto error_region
  660. // p11 = (x is +- number)
  661. // p12 = (x is +- number) AND (y is +- 0)
  662. // p13 = (x is + number) AND (y is +- 0)
  663. // answer is (sign of y)0
  664. // p14 = NOT (x is + number) AND (y is +- 0)
  665. // p15 = (x is - number) AND (y is +- 0)
  666. // answer is (sign of y)pi
  667. fclass.m p10,p11 = f9,0x7 // @zero
  668. ;;
  669. (p10) fclass.m.unc p12,p13 = f8,0x7 // @zero
  670. ;;
  671. (p13) fma.s f8 = atan2f_sgn_Y, atan2f_const_piby2,f0
  672. (p13) br.ret.spnt b0
  673. (p12) br.cond.spnt __libm_error_region
  674. ;;
  675. (p11) fclass.m.unc p12,p0 = f8,0x7 // @zero
  676. ;;
  677. (p12) fclass.m.unc p13, p14 = f9,0x19 // @norm| @unorm | @pos
  678. ;;
  679. (p13) fmerge.s f8 = f8, f0
  680. (p13) br.ret.spnt b0
  681. (p14) fclass.m.unc p15, p0 = f9,0x1a // @norm| @unorm | @neg
  682. ;;
  683. (p15) fma.s f8 = atan2f_sgn_Y, atan2f_const_pi,f0
  684. (p15) br.ret.spnt b0
  685. .endp atan2f
  686. .proc __libm_error_region
  687. __libm_error_region:
  688. .prologue
  689. mov GR_Parameter_TAG = 38
  690. fclass.m p10,p11 = f9,0x5 // @zero | @pos
  691. ;;
  692. (p10) fmerge.s f10 = f8, f0
  693. (p11) fma.s f10 = atan2f_sgn_Y, atan2f_const_pi,f0
  694. ;;
  695. { .mfi
  696. add GR_Parameter_Y=-32,sp // Parameter 2 value
  697. nop.f 999
  698. .save ar.pfs,GR_SAVE_PFS
  699. mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
  700. }
  701. { .mfi
  702. .fframe 64
  703. add sp=-64,sp // Create new stack
  704. nop.f 0
  705. mov GR_SAVE_GP=gp // Save gp
  706. }
  707. ;;
  708. { .mmi
  709. stfs [GR_Parameter_Y] = f9,16 // Store Parameter 2 on stack
  710. add GR_Parameter_X = 16,sp // Parameter 1 address
  711. .save b0, GR_SAVE_B0
  712. mov GR_SAVE_B0=b0 // Save b0
  713. }
  714. ;;
  715. .body
  716. { .mib
  717. stfs [GR_Parameter_X] = f8 // Store Parameter 1 on stack
  718. add GR_Parameter_RESULT = 0,GR_Parameter_Y
  719. nop.b 0 // Parameter 3 address
  720. }
  721. { .mib
  722. stfs [GR_Parameter_Y] = f10 // Store Parameter 3 on stack
  723. add GR_Parameter_Y = -16,GR_Parameter_Y
  724. br.call.sptk b0=__libm_error_support# // Call error handling function
  725. }
  726. ;;
  727. { .mmi
  728. nop.m 0
  729. nop.m 0
  730. add GR_Parameter_RESULT = 48,sp
  731. };;
  732. { .mmi
  733. ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
  734. .restore
  735. add sp = 64,sp // Restore stack pointer
  736. mov b0 = GR_SAVE_B0 // Restore return address
  737. }
  738. ;;
  739. { .mib
  740. mov gp = GR_SAVE_GP // Restore gp
  741. mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
  742. br.ret.sptk b0 // Return
  743. }
  744. ;;
  745. .endp __libm_error_region
  746. .type __libm_error_support#,@function
  747. .global __libm_error_support#