Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

942 lines
25 KiB

  1. .file "atan.s"
  2. // Copyright (c) 2000, 2001, Intel Corporation
  3. // All rights reserved.
  4. //
  5. // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
  6. // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
  7. //
  8. // WARRANTY DISCLAIMER
  9. //
  10. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  11. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  12. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  13. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  14. // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  15. // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  16. // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  17. // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  18. // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  19. // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  20. // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  21. //
  22. // Intel Corporation is the author of this code, and requests that all
  23. // problem reports or change requests be submitted to it directly at
  24. // http://developer.intel.com/opensource.
  25. //
  26. // History
  27. //==============================================================
  28. // 2/02/00: Initial version
  29. // 4/13/00: Improved speed
  30. // 4/19/00: Removed the qualifying predicate from the fmerge.s that
  31. // takes the absolute value.
  32. // 6/16/00: Reassigned FP registers to eliminate stalls on loads
  33. // 8/30/00: Saved 5 cycles in main path by rearranging large argument logic
  34. // and delaying use of result of fcmp in load by 1 group
  35. //
  36. // API
  37. //==============================================================
  38. // double atan( double x);
  39. //
  40. // Overview of operation
  41. //==============================================================
  42. // atan(x) = sign(X)pi/2 - atan(1/x)
  43. //
  44. // We have two paths: |x| > 1 and |x| <= 1
  45. //
  46. // |x| > 1
  47. // ==========================================
  48. //
  49. // c = frcpa(x) which is approximately 1/x
  50. //
  51. // xc = 1- B
  52. // B = 1-xc
  53. //
  54. // Approximate 1/(1-B)^k by a polynomial in B, poly(B)
  55. // k is 45.
  56. //
  57. // poly(B) = 1 + r1 B + r2 B^2 + ...+ r10 B^10
  58. //
  59. // c^k = (1-B)^k/x^k
  60. // c^k/(1-B)^k = 1/x^k
  61. // c^k poly(B) = 1/x^k
  62. // poly(x) = series(atan(1/x)) = 1/x - 1/3x^3 + 1/5x^5 - 1/7x^7 .... + 1/45 x^45
  63. // = 1/x^45 ( x^44 - x^42/3 + x^40/5 - x^38/7 ... +1)
  64. // = 1/x^45 ( y^22 - y^21/3 + y^20/5 - y^19/7 ... +1)
  65. //
  66. // = c^45 poly(B) poly(x)
  67. // = c^45 r(B) q(y)
  68. // q(y) = q0 + q1 y + q2 y^2 + ... + q22 y^22
  69. // where q22 is 1.0
  70. // atan(x) = sign(X)pi/2 - c^45 r(B) q(y)
  71. // |x| <= 1
  72. // ==========================================
  73. // poly(x) = series(atan(x)) = x - x^3/3 + x^5/5 + .....
  74. // poly(x) = series(atan(x)) = x + x^3(- 1/3 + x^2/5 + ..... +x^47/47)
  75. // poly(x) = series(atan(x)) = x + x^3(p0 + x^2/5 + ..... + x^44/47)
  76. // poly(x) = series(atan(x)) = x + x^3(p0 + y/5 + ..... + y^22/47)
  77. // where p0 is about -1/3.
  78. // atan(x) = poly(x)
  79. // Special Values
  80. //==============================================================
  81. // atan(QNAN) = QNAN
  82. // atan(SNAN) = quieted SNAN
  83. // atan(+-inf) = +- pi/2
  84. // atan(+-0) = +-0
  85. // Registers used
  86. //==============================================================
  87. // predicate registers used:
  88. // p6 -> p11
  89. // floating-point registers used:
  90. // f32 -> f127
  91. // general registers used
  92. // r32 -> r37
  93. // Assembly macros
  94. //==============================================================
  95. atan_Pi_by_2 = f32
  96. atan_S_PI = f33
  97. atan_ABS_f8 = f34
  98. atan_R0 = f35
  99. atan_R1 = f36
  100. atan_R2 = f37
  101. atan_R3 = f38
  102. atan_R4 = f39
  103. atan_R5 = f40
  104. atan_R6 = f41
  105. atan_R7 = f42
  106. atan_R8 = f43
  107. atan_R9 = f44
  108. atan_R10 = f45
  109. atan_Q0 = f46
  110. atan_Q1 = f47
  111. atan_Q2 = f48
  112. atan_Q3 = f49
  113. atan_Q4 = f50
  114. atan_Q5 = f51
  115. atan_Q6 = f52
  116. atan_Q7 = f53
  117. atan_Q8 = f54
  118. atan_Q9 = f55
  119. atan_Q10 = f56
  120. atan_Q11 = f57
  121. atan_Q12 = f58
  122. atan_Q13 = f59
  123. atan_Q14 = f60
  124. atan_Q15 = f61
  125. atan_Q16 = f62
  126. atan_Q17 = f63
  127. atan_Q18 = f64
  128. atan_Q19 = f65
  129. atan_Q20 = f66
  130. atan_Q21 = f67
  131. atan_Q22 = f68
  132. // P and Q constants are mutually exclusive
  133. // so they can share macro definitions
  134. atan_P0 = f46
  135. atan_P1 = f47
  136. atan_P2 = f48
  137. atan_P3 = f49
  138. atan_P4 = f10
  139. atan_P5 = f11
  140. atan_P6 = f12
  141. atan_P7 = f13
  142. atan_P10 = f103
  143. atan_P11 = f114
  144. atan_P12 = f58
  145. atan_P13 = f59
  146. atan_P14 = f60
  147. atan_P15 = f61
  148. atan_P16 = f62
  149. atan_P17 = f63
  150. atan_P18 = f64
  151. atan_P19 = f65
  152. atan_P20 = f14
  153. atan_P21 = f99
  154. atan_P22 = f68
  155. // end of P constant macros
  156. atan_C = f69
  157. atan_Y = f70
  158. atan_B = f71
  159. atan_Z = f72
  160. atan_V11 = f73
  161. atan_V12 = f74
  162. atan_V7 = f75
  163. atan_V8 = f76
  164. atan_W13 = f77
  165. atan_W11 = f78
  166. atan_V3 = f79
  167. atan_V4 = f80
  168. atan_G11 = f81
  169. atan_G12 = f82
  170. atan_G7 = f83
  171. atan_G8 = f84
  172. atan_Z1 = f85
  173. atan_W7 = f86
  174. atan_G3 = f87
  175. atan_W8 = f88
  176. atan_V9 = f89
  177. atan_V10 = f90
  178. atan_G10 = f91
  179. atan_W3 = f92
  180. atan_G4 = f93
  181. atan_G9 = f94
  182. atan_G6 = f95
  183. atan_W4 = f96
  184. atan_Z2 = f97
  185. atan_V6 = f98
  186. atan_V2 = f99
  187. atan_W6 = f100
  188. atan_W10 = f101
  189. atan_Y3 = f102
  190. atan_G2 = f103
  191. atan_Y8 = f104
  192. atan_G5 = f105
  193. atan_Z3 = f106
  194. atan_Z4 = f107
  195. atan_W2 = f108
  196. atan_V5 = f109
  197. atan_W5 = f110
  198. atan_G1 = f111
  199. atan_Y11 = f112
  200. atan_Z5 = f113
  201. atan_Z6 = f114
  202. atan_V1 = f115
  203. atan_W1 = f116
  204. atan_Z7 = f117
  205. atan_Q = f118
  206. atan_Z = f119
  207. atan_abs_f8 = f120
  208. atan_V13 = f121
  209. atan_Xcub = f122
  210. atan_Y12 = f123
  211. atan_P = f124
  212. atan_NORM_f8 = f125
  213. atan_P8 = f126
  214. atan_P9 = f127
  215. atan_GR_AD_R = r14
  216. atan_GR_AD_Q = r15
  217. atan_GR_AD_P = r16
  218. atan_GR_10172 = r17
  219. atan_GR_exp_f8 = r18
  220. atan_GR_signexp_f8 = r19
  221. atan_GR_exp_mask = r20
  222. /////////////////////////////////////////////////////////////
  223. .data
  224. .align 16
  225. double_atan_constants_R:
  226. data8 0xB36B46B9C5443CED, 0x0000401C //R8
  227. data8 0x842633E0D126261F, 0x0000401F //R9
  228. data8 0xBE04FFFFFFFF46E0, 0x00004010 //R4
  229. data8 0xE8C62000244D66E2, 0x00004013 //R5
  230. data8 0xF2790C001E3789B3, 0x00004016 //R6
  231. data8 0xDCD2CCF97D7C764F, 0x00004019 //R7
  232. data8 0xB40000000000000B, 0x00004004 //R1
  233. data8 0xB265F3D38F5EE28F, 0x00004021 //R10
  234. data8 0x8160000000000001, 0x00004009 //R2
  235. data8 0xFD5BFFFFFFFE55CD, 0x0000400C //R3
  236. data8 0xC90FDAA22168C235, 0x00003FFF // pi/2
  237. double_atan_constants_Q:
  238. data8 0xEBD602FA7761BC33, 0x00003FF9 //Q8
  239. data8 0x8CB1CABD6A91913C, 0x0000BFFA //Q9
  240. data8 0x84C665C37D623CD2, 0x00003FF7 //Q4
  241. data8 0x8DE0D1673DAEA9BC, 0x0000BFF8 //Q5
  242. data8 0xF658ADBE2C6E6FCC, 0x00003FF8 //Q6
  243. data8 0xB56307BE1DD3FFB6, 0x0000BFF9 //Q7
  244. data8 0xAAAAAAAAAAAA8000, 0x0000BFFD //Q21
  245. data8 0x8000000000000000, 0x00003FFF //Q22
  246. data8 0x924924923A9D710C, 0x0000BFFC //Q19
  247. data8 0xCCCCCCCCCC9380E7, 0x00003FFC //Q20
  248. data8 0xA644DC250EFA2800, 0x00003FED //Q0
  249. data8 0x83DEAE24EEBF5E44, 0x0000BFF1 //Q1
  250. data8 0xC758CCC64793D4EC, 0x00003FF3 //Q2
  251. data8 0xBFDC0B54E7C89DCE, 0x0000BFF5 //Q3
  252. data8 0x888855199D1290AF, 0x0000BFFB //Q15
  253. data8 0x9D89D3BE514B0178, 0x00003FFB //Q16
  254. data8 0xBA2E8B4DEC70282A, 0x0000BFFB //Q17
  255. data8 0xE38E38DF9E9FC83B, 0x00003FFB //Q18
  256. data8 0x9F8781CC990029D9, 0x00003FFA //Q10
  257. data8 0xB0B39472DEBA3C79, 0x0000BFFA //Q11
  258. data8 0xC2AFAEF8C85B0BC6, 0x00003FFA //Q12
  259. data8 0xD780E539797525DD, 0x0000BFFA //Q13
  260. data8 0xF0EDC449AC786DF9, 0x00003FFA //Q14
  261. double_atan_constants_P:
  262. data8 0xB1899EC590CDB8DF, 0x0000BFFA //P10
  263. data8 0xA1E79850A67D59B0, 0x00003FFA //P11
  264. data8 0x911D8B30C2A96E6D, 0x0000BFF3 //P20
  265. data8 0xB87233C68A640706, 0x00003FF0 //P21
  266. data8 0xD78E4B82F3C29D7A, 0x0000BFFA //P8
  267. data8 0xC2EBE37AF932C14F, 0x00003FFA //P9
  268. data8 0xBA2E8B94AA104DD6, 0x0000BFFB //P4
  269. data8 0x9D89D7A640B71D38, 0x00003FFB //P5
  270. data8 0x88887CA2CE9B2A40, 0x0000BFFB //P6
  271. data8 0xF0F017D57A919C1E, 0x00003FFA //P7
  272. data8 0xD0D635F230C80E06, 0x0000BFF8 //P16
  273. data8 0xE847BECA7209B479, 0x00003FF7 //P17
  274. data8 0xD14C6A2AAE0D5B07, 0x0000BFF6 //P18
  275. data8 0x915F612A5C469117, 0x00003FF5 //P19
  276. data8 0x921EDE5FD0DBBBE2, 0x0000BFFA //P12
  277. data8 0xFFD303C2C8535445, 0x00003FF9 //P13
  278. data8 0xD30DF50E295386F7, 0x0000BFF9 //P14
  279. data8 0x9E81F2B1BBD210A8, 0x00003FF9 //P15
  280. data8 0xAAAAAAAAAAAAA800, 0x0000BFFD //P0
  281. data8 0xCCCCCCCCCCC7D476, 0x00003FFC //P1
  282. data8 0x9249249247838066, 0x0000BFFC //P2
  283. data8 0xE38E38E302290D68, 0x00003FFB //P3
  284. data8 0xDF7F0A816F7E5025, 0x0000BFEC //P22
  285. .align 32
  286. .global atan#
  287. ////////////////////////////////////////////////////////
  288. .section .text
  289. .proc atan#
  290. .align 32
  291. atan:
  292. { .mmf
  293. addl atan_GR_AD_P = @ltoff(double_atan_constants_P), gp
  294. addl atan_GR_AD_Q = @ltoff(double_atan_constants_Q), gp
  295. fmerge.s atan_ABS_f8 = f0,f8
  296. }
  297. ;;
  298. { .mmf
  299. ld8 atan_GR_AD_P = [atan_GR_AD_P]
  300. ld8 atan_GR_AD_Q = [atan_GR_AD_Q]
  301. frcpa.s1 atan_C,p8 = f1,f8
  302. }
  303. ;;
  304. { .mmf
  305. addl atan_GR_AD_R = @ltoff(double_atan_constants_R), gp
  306. addl atan_GR_exp_mask = 0x1ffff, r0
  307. fma.s1 atan_Y = f8,f8,f0
  308. }
  309. ;;
  310. // This fnorm takes faults or sets fault flags
  311. { .mmf
  312. mov atan_GR_10172 = 0x10172
  313. ld8 atan_GR_AD_R = [atan_GR_AD_R]
  314. fnorm atan_NORM_f8 = f8
  315. }
  316. ;;
  317. // qnan snan inf norm unorm 0 -+
  318. // 1 1 0 0 0 1 11
  319. // c 7
  320. // p9 set if we have a NAN or +-0
  321. { .mmf
  322. ldfe atan_Q8 = [atan_GR_AD_Q],16
  323. ldfe atan_P10 = [atan_GR_AD_P],16
  324. fclass.m.unc p9, p0 = f8, 0xc7
  325. }
  326. ;;
  327. { .mmi
  328. ldfe atan_Q9 = [atan_GR_AD_Q],16
  329. ldfe atan_P11 = [atan_GR_AD_P],16
  330. nop.i 999
  331. }
  332. ;;
  333. { .mmf
  334. ldfe atan_Q4 = [atan_GR_AD_Q],16
  335. ldfe atan_P20 = [atan_GR_AD_P],16
  336. (p9) fma.d.s0 f8 = f8,f1,f0
  337. ;;
  338. }
  339. // Exit if we have a NAN or +-0
  340. { .mmb
  341. ldfe atan_Q5 = [atan_GR_AD_Q],16
  342. ldfe atan_P21 = [atan_GR_AD_P],16
  343. (p9) br.ret.spnt b0
  344. ;;
  345. }
  346. // p6 is TRUE if |x| <= 1
  347. // p7 is TRUE if |x| > 1
  348. { .mmf
  349. ldfe atan_Q6 = [atan_GR_AD_Q],16
  350. ldfe atan_P8 = [atan_GR_AD_P],16
  351. fcmp.le.unc p6,p7 = atan_ABS_f8, f1
  352. ;;
  353. }
  354. { .mfi
  355. ldfe atan_Q7 = [atan_GR_AD_Q],16
  356. fma.s1 atan_Z = atan_C, atan_C, f0
  357. nop.i 999
  358. }
  359. { .mfi
  360. ldfe atan_P9 = [atan_GR_AD_P],16
  361. fnma.s1 atan_B = atan_C,f8, f1
  362. nop.i 999 ;;
  363. }
  364. { .mfi
  365. ldfe atan_Q21 = [atan_GR_AD_Q],16
  366. fma.s1 atan_V12 = atan_Y, atan_Y, f0
  367. nop.i 999
  368. }
  369. { .mfi
  370. ldfe atan_P4 = [atan_GR_AD_P],16
  371. fma.s1 atan_Xcub = f8, atan_Y , f0
  372. nop.i 999
  373. ;;
  374. }
  375. { .mmi
  376. (p7) ldfe atan_Q22 = [atan_GR_AD_Q],16
  377. (p6) ldfe atan_P5 = [atan_GR_AD_P],16
  378. (p6) cmp.eq.unc p8,p0 = r0,r0
  379. ;;
  380. }
  381. { .mmi
  382. (p7) ldfe atan_Q19 = [atan_GR_AD_Q],16
  383. (p6) ldfe atan_P6 = [atan_GR_AD_P],16
  384. (p7) cmp.eq.unc p9,p0 = r0,r0
  385. ;;
  386. }
  387. { .mmi
  388. (p7) ldfe atan_Q20 = [atan_GR_AD_Q],16
  389. (p6) ldfe atan_P7 = [atan_GR_AD_P],16
  390. nop.i 999
  391. ;;
  392. }
  393. { .mfi
  394. (p7) ldfe atan_Q0 = [atan_GR_AD_Q],16
  395. (p6) fma.s1 atan_V13 = atan_Y, atan_P11, atan_P10
  396. nop.i 999
  397. }
  398. { .mfi
  399. (p6) ldfe atan_P16 = [atan_GR_AD_P],16
  400. (p7) fma.s1 atan_V11 = atan_Y, atan_Q9, atan_Q8
  401. nop.i 999 ;;
  402. }
  403. { .mfi
  404. (p7) ldfe atan_Q1 = [atan_GR_AD_Q],16
  405. (p7) fma.s1 atan_G12 = atan_B, atan_B, f0
  406. nop.i 999
  407. }
  408. { .mfi
  409. (p6) ldfe atan_P17 = [atan_GR_AD_P],16
  410. fma.s1 atan_V9 = atan_V12, atan_V12, f0
  411. nop.i 999 ;;
  412. }
  413. { .mfi
  414. (p7) ldfe atan_Q2 = [atan_GR_AD_Q],16
  415. (p6) fma.s1 atan_W11 = atan_Y, atan_P21, atan_P20
  416. nop.i 999
  417. }
  418. { .mfi
  419. (p6) ldfe atan_P18 = [atan_GR_AD_P],16
  420. (p7) fma.s1 atan_V7 = atan_Y, atan_Q5, atan_Q4
  421. nop.i 999 ;;
  422. }
  423. { .mfi
  424. (p7) ldfe atan_Q3 = [atan_GR_AD_Q],16
  425. (p7) fma.s1 atan_Z1 = atan_Z, atan_Z, f0
  426. nop.i 999
  427. }
  428. { .mfi
  429. (p6) ldfe atan_P19 = [atan_GR_AD_P],16
  430. (p7) fma.s1 atan_Y3 = atan_Y , atan_V12, f0
  431. nop.i 999 ;;
  432. }
  433. { .mfi
  434. (p7) ldfe atan_R8 = [atan_GR_AD_R],16
  435. (p6) fma.s1 atan_V11 = atan_Y, atan_P9, atan_P8
  436. nop.i 999
  437. }
  438. { .mfi
  439. (p6) ldfe atan_P12 = [atan_GR_AD_P],16
  440. (p7) fma.s1 atan_V8 = atan_Y, atan_Q7, atan_Q6
  441. nop.i 999 ;;
  442. }
  443. { .mmi
  444. (p7) ldfe atan_R9 = [atan_GR_AD_R],16
  445. (p6) ldfe atan_P13 = [atan_GR_AD_P],16
  446. nop.i 999
  447. ;;
  448. }
  449. { .mfi
  450. (p7) ldfe atan_R4 = [atan_GR_AD_R],16
  451. (p6) fma.s1 atan_V7 = atan_Y, atan_P5, atan_P4
  452. nop.i 999
  453. }
  454. { .mfi
  455. (p6) ldfe atan_P14 = [atan_GR_AD_P],16
  456. (p7) fma.s1 atan_W13 = atan_Y, atan_Q22, atan_Q21
  457. nop.i 999 ;;
  458. }
  459. { .mfi
  460. (p7) ldfe atan_R5 = [atan_GR_AD_R],16
  461. (p6) fma.s1 atan_Y12 = atan_V9 , atan_V9 , f0
  462. nop.i 999
  463. }
  464. { .mfi
  465. (p6) ldfe atan_P15 = [atan_GR_AD_P],16
  466. (p7) fma.s1 atan_Y8 = atan_V9 , atan_V9 , f0
  467. nop.i 999 ;;
  468. }
  469. { .mfi
  470. (p7) ldfe atan_R6 = [atan_GR_AD_R],16
  471. (p6) fma.s1 atan_V8 = atan_Y, atan_P7, atan_P6
  472. nop.i 999
  473. }
  474. { .mfi
  475. (p6) ldfe atan_P0 = [atan_GR_AD_P],16
  476. (p7) fma.s1 atan_W11 = atan_Y, atan_Q20, atan_Q19
  477. nop.i 999 ;;
  478. }
  479. { .mfi
  480. (p7) ldfe atan_R7 = [atan_GR_AD_R],16
  481. (p7) fma.s1 atan_Z2 = atan_Z1 , atan_Z1, f0
  482. nop.i 999
  483. }
  484. { .mfi
  485. (p6) ldfe atan_P1 = [atan_GR_AD_P],16
  486. (p6) fma.s1 atan_V10 = atan_V12, atan_V13, atan_V11
  487. nop.i 999 ;;
  488. }
  489. { .mfi
  490. (p7) ldfe atan_Q15 = [atan_GR_AD_Q],16
  491. (p6) fma.s1 atan_W7 = atan_Y, atan_P17, atan_P16
  492. nop.i 999
  493. }
  494. { .mfi
  495. (p6) ldfe atan_P2 = [atan_GR_AD_P],16
  496. (p7) fma.s1 atan_V3 = atan_Y, atan_Q1 , atan_Q0
  497. nop.i 999 ;;
  498. }
  499. { .mfi
  500. (p7) ldfe atan_Q16 = [atan_GR_AD_Q],16
  501. (p7) fma.s1 atan_G9 = atan_G12, atan_G12, f0
  502. nop.i 999
  503. }
  504. { .mfi
  505. (p6) ldfe atan_P3 = [atan_GR_AD_P],16
  506. (p7) fma.s1 atan_V6 = atan_V12, atan_V8, atan_V7
  507. nop.i 999 ;;
  508. }
  509. { .mfi
  510. (p7) ldfe atan_R1 = [atan_GR_AD_R],16
  511. (p6) fma.s1 atan_W8 = atan_Y, atan_P19, atan_P18
  512. nop.i 999
  513. }
  514. { .mfi
  515. (p6) ldfe atan_P22 = [atan_GR_AD_P],16
  516. (p7) fma.s1 atan_V4 = atan_Y, atan_Q3 , atan_Q2
  517. nop.i 999 ;;
  518. }
  519. { .mfi
  520. getf.exp atan_GR_signexp_f8 = atan_NORM_f8
  521. (p7) fma.s1 atan_Y11 = atan_Y3 , atan_Y8 , f0
  522. nop.i 999
  523. }
  524. { .mfi
  525. (p7) ldfe atan_Q17 = [atan_GR_AD_Q],16
  526. (p6) fma.s1 atan_V6 = atan_V12, atan_V8, atan_V7
  527. nop.i 999 ;;
  528. }
  529. { .mfi
  530. (p7) ldfe atan_Q18 = [atan_GR_AD_Q],16
  531. (p6) fma.s1 atan_W3 = atan_Y, atan_P13, atan_P12
  532. nop.i 999
  533. }
  534. { .mfi
  535. (p7) ldfe atan_R10 = [atan_GR_AD_R],16
  536. (p7) fma.s1 atan_G11 = atan_B, atan_R9 , atan_R8
  537. nop.i 999 ;;
  538. }
  539. { .mfi
  540. (p7) ldfe atan_Q10 = [atan_GR_AD_Q],16
  541. (p7) fma.s1 atan_Z3 = atan_Z1 , atan_Z2 , f0
  542. and atan_GR_exp_f8 = atan_GR_signexp_f8,atan_GR_exp_mask
  543. }
  544. { .mfi
  545. (p7) ldfe atan_R2 = [atan_GR_AD_R],16
  546. (p7) fma.s1 atan_Z4 = atan_Z2 , atan_Z2 , f0
  547. nop.i 999 ;;
  548. }
  549. { .mfi
  550. (p7) ldfe atan_Q11 = [atan_GR_AD_Q],16
  551. (p6) fma.s1 atan_W4 = atan_Y, atan_P15, atan_P14
  552. nop.i 999
  553. }
  554. { .mfi
  555. (p7) ldfe atan_R3 = [atan_GR_AD_R],16
  556. (p7) fma.s1 atan_G7 = atan_B, atan_R5 , atan_R4
  557. cmp.le.unc p11,p0 = atan_GR_10172,atan_GR_exp_f8
  558. ;;
  559. }
  560. { .mmf
  561. (p9) ldfe atan_Q12 = [atan_GR_AD_Q],16
  562. ldfe atan_S_PI = [atan_GR_AD_R],16
  563. (p8) fma.s1 atan_W6 = atan_V12, atan_W8, atan_W7
  564. ;;
  565. }
  566. { .mfi
  567. (p9) ldfe atan_Q13 = [atan_GR_AD_Q],16
  568. (p8) fma.s1 atan_V3 = atan_Y, atan_P1 , atan_P0
  569. (p11) cmp.ne.and p6,p7 = r0,r0
  570. }
  571. { .mfi
  572. nop.m 999
  573. (p8) fma.s1 atan_V5 = atan_V9 , atan_V10, atan_V6
  574. nop.i 999 ;;
  575. }
  576. .pred.rel "mutex",p6,p7,p11
  577. { .mfi
  578. (p7) ldfe atan_Q14 = [atan_GR_AD_Q],16
  579. (p6) fma.s1 atan_Y12 = atan_V9 , atan_Y12, f0
  580. nop.i 999
  581. }
  582. { .mfi
  583. nop.m 999
  584. (p7) fma.s1 atan_G8 = atan_B, atan_R7 , atan_R6
  585. nop.i 999 ;;
  586. }
  587. { .mfi
  588. nop.m 999
  589. (p6) fma.s1 atan_V4 = atan_Y, atan_P3 , atan_P2
  590. nop.i 999
  591. }
  592. { .mfi
  593. nop.m 999
  594. (p7) fma.s1 atan_W7 = atan_Y, atan_Q16, atan_Q15
  595. nop.i 999 ;;
  596. }
  597. { .mfi
  598. nop.m 999
  599. (p6) fma.s1 atan_W10 = atan_V12, atan_P22, atan_W11
  600. nop.i 999
  601. }
  602. { .mfi
  603. nop.m 999
  604. (p7) fma.s1 atan_G3 = atan_B, atan_R1 , f1
  605. nop.i 999 ;;
  606. }
  607. { .mfi
  608. nop.m 999
  609. (p6) fma.s1 atan_W2 = atan_V12, atan_W4 , atan_W3
  610. nop.i 999
  611. }
  612. { .mfi
  613. nop.m 999
  614. (p7) fma.s1 atan_V2 = atan_V12, atan_V4 , atan_V3
  615. nop.i 999 ;;
  616. }
  617. { .mfi
  618. nop.m 999
  619. (p7) fma.s1 atan_W8 = atan_Y, atan_Q18, atan_Q17
  620. nop.i 999
  621. }
  622. { .mfi
  623. nop.m 999
  624. (p7) fma.s1 atan_G10 = atan_G12, atan_R10, atan_G11
  625. nop.i 999 ;;
  626. }
  627. { .mfi
  628. nop.m 999
  629. (p7) fma.s1 atan_V10 = atan_V12, atan_Q10, atan_V11
  630. nop.i 999
  631. }
  632. { .mfi
  633. nop.m 999
  634. (p7) fma.s1 atan_G6 = atan_G12, atan_G8 , atan_G7
  635. nop.i 999 ;;
  636. }
  637. { .mfi
  638. nop.m 999
  639. (p6) fma.s1 atan_V2 = atan_V12, atan_V4, atan_V3
  640. nop.i 999
  641. }
  642. { .mfi
  643. nop.m 999
  644. (p7) fma.s1 atan_G4 = atan_B , atan_R3 , atan_R2
  645. nop.i 999 ;;
  646. }
  647. { .mfi
  648. nop.m 999
  649. (p6) fma.s1 atan_W5 = atan_V9 , atan_W10, atan_W6
  650. nop.i 999
  651. }
  652. { .mfi
  653. nop.m 999
  654. (p7) fma.s1 atan_W3 = atan_Y , atan_Q12, atan_Q11
  655. nop.i 999 ;;
  656. }
  657. { .mfi
  658. nop.m 999
  659. (p7) fma.s1 atan_Z5 = atan_Z3 , atan_Z4 , f0
  660. nop.i 999
  661. }
  662. { .mfi
  663. nop.m 999
  664. (p7) fma.s1 atan_W10 = atan_V12, atan_W13, atan_W11
  665. nop.i 999 ;;
  666. }
  667. { .mfi
  668. nop.m 999
  669. (p7) fma.s1 atan_W4 = atan_Y , atan_Q14, atan_Q13
  670. nop.i 999
  671. }
  672. { .mfi
  673. nop.m 999
  674. (p7) fma.s1 atan_W6 = atan_V12, atan_W8, atan_W7
  675. nop.i 999 ;;
  676. }
  677. { .mfi
  678. nop.m 999
  679. (p7) fma.s1 atan_V5 = atan_V9 , atan_V10, atan_V6
  680. nop.i 999
  681. }
  682. { .mfi
  683. nop.m 999
  684. (p7) fma.s1 atan_G5 = atan_G9 , atan_G10, atan_G6
  685. nop.i 999 ;;
  686. }
  687. { .mfi
  688. nop.m 999
  689. (p6) fma.s1 atan_V1 = atan_V9 , atan_V5 , atan_V2
  690. nop.i 999
  691. }
  692. { .mfi
  693. nop.m 999
  694. (p7) fma.s1 atan_G2 = atan_G12, atan_G4 , atan_G3
  695. nop.i 999 ;;
  696. }
  697. { .mfi
  698. nop.m 999
  699. (p6) fma.s1 atan_W1 = atan_V9 , atan_W5 , atan_W2
  700. nop.i 999
  701. }
  702. { .mfi
  703. nop.m 999
  704. (p7) fma.s1 atan_Z6 = atan_Z4 , atan_C , f0
  705. nop.i 999 ;;
  706. }
  707. { .mfi
  708. nop.m 999
  709. fmerge.s atan_S_PI = f8, atan_S_PI
  710. nop.i 999 ;;
  711. }
  712. { .mfi
  713. nop.m 999
  714. (p7) fma.s1 atan_W5 = atan_V9 , atan_W10, atan_W6
  715. nop.i 999
  716. }
  717. { .mfi
  718. nop.m 999
  719. (p7) fma.s1 atan_W2 = atan_V12, atan_W4 , atan_W3
  720. nop.i 999 ;;
  721. }
  722. { .mfi
  723. nop.m 999
  724. (p7) fma.s1 atan_G1 = atan_G9 , atan_G5 , atan_G2
  725. nop.i 999
  726. }
  727. { .mfi
  728. nop.m 999
  729. (p7) fma.s1 atan_V1 = atan_V9 , atan_V5 , atan_V2
  730. nop.i 999 ;;
  731. }
  732. { .mfi
  733. nop.m 999
  734. (p6) fma.s1 atan_P = atan_Y12, atan_W1 , atan_V1
  735. nop.i 999
  736. }
  737. { .mfi
  738. nop.m 999
  739. (p7) fma.s1 atan_Z7 = atan_Z5 , atan_Z6 , f0
  740. nop.i 999 ;;
  741. }
  742. { .mfi
  743. nop.m 999
  744. (p7) fma.s1 atan_W1 = atan_V9 , atan_W5 , atan_W2
  745. nop.i 999 ;;
  746. }
  747. { .mfi
  748. nop.m 999
  749. (p11) fma.d.s0 f8 = atan_S_PI,f1,f0
  750. nop.i 999
  751. }
  752. { .mfi
  753. nop.m 999
  754. (p7) fma.s1 atan_Z = atan_G1 , atan_Z7 , f0
  755. nop.i 999 ;;
  756. }
  757. { .mfi
  758. nop.m 999
  759. (p7) fma.s1 atan_Q = atan_Y11, atan_W1 , atan_V1
  760. nop.i 999 ;;
  761. }
  762. { .mfi
  763. nop.m 999
  764. (p6) fma.d.s0 f8 = atan_P , atan_Xcub , f8
  765. nop.i 999
  766. }
  767. { .mfb
  768. nop.m 999
  769. (p7) fnma.d.s0 f8 = atan_Z , atan_Q , atan_S_PI
  770. br.ret.sptk b0 ;;
  771. }
  772. .endp atan