Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

655 lines
18 KiB

  1. .file "sincosf.s"
  2. // Copyright (c) 2000, 2001, Intel Corporation
  3. // All rights reserved.
  4. //
  5. // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
  6. // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
  7. //
  8. // WARRANTY DISCLAIMER
  9. //
  10. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  11. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  12. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  13. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  14. // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  15. // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  16. // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  17. // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  18. // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  19. // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  20. // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  21. //
  22. // Intel Corporation is the author of this code, and requests that all
  23. // problem reports or change requests be submitted to it directly at
  24. // http://developer.intel.com/opensource.
  25. // History
  26. //==============================================================
  27. // 2/02/00 Initial version
  28. // 4/02/00 Unwind support added.
  29. // 5/10/00 Improved speed with new algorithm.
  30. // 8/08/00 Improved speed by avoiding SIR flush.
  31. // 8/17/00 Changed predicate register macro-usage to direct predicate
  32. // names due to an assembler bug.
  33. // 8/30/00 Put sin_of_r before sin_tbl_S_cos_of_r to gain a cycle
  34. // 1/02/00 Fixed flag settings, improved speed.
  35. //
  36. // API
  37. //==============================================================
  38. // float sinf( float x);
  39. // float cosf( float x);
  40. //
  41. // Assembly macros
  42. //==============================================================
  43. // SIN_Sin_Flag = p6
  44. // SIN_Cos_Flag = p7
  45. // integer registers used
  46. SIN_AD_PQ_1 = r33
  47. SIN_AD_PQ_2 = r33
  48. sin_GR_sincos_flag = r34
  49. sin_GR_Mint = r35
  50. sin_GR_index = r36
  51. gr_tmp = r37
  52. GR_SAVE_B0 = r37
  53. GR_SAVE_GP = r38
  54. GR_SAVE_PFS = r39
  55. // floating point registers used
  56. sin_coeff_P1 = f32
  57. sin_coeff_P2 = f33
  58. sin_coeff_Q1 = f34
  59. sin_coeff_Q2 = f35
  60. sin_coeff_P4 = f36
  61. sin_coeff_P5 = f37
  62. sin_coeff_Q3 = f38
  63. sin_coeff_Q4 = f39
  64. sin_Mx = f40
  65. sin_Mfloat = f41
  66. sin_tbl_S = f42
  67. sin_tbl_C = f43
  68. sin_r = f44
  69. sin_rcube = f45
  70. sin_tsq = f46
  71. sin_r7 = f47
  72. sin_t = f48
  73. sin_poly_p2 = f49
  74. sin_poly_p1 = f50
  75. fp_tmp = f51
  76. sin_poly_p3 = f52
  77. sin_poly_p4 = f53
  78. sin_of_r = f54
  79. sin_S_t = f55
  80. sin_poly_q2 = f56
  81. sin_poly_q1 = f57
  82. sin_S_tcube = f58
  83. sin_poly_q3 = f59
  84. sin_poly_q4 = f60
  85. sin_tbl_S_tcube = f61
  86. sin_tbl_S_cos_of_r = f62
  87. sin_coeff_Q5 = f63
  88. sin_coeff_Q6 = f64
  89. sin_coeff_P3 = f65
  90. sin_poly_q5 = f66
  91. sin_poly_q12 = f67
  92. sin_poly_q3456 = f68
  93. fp_tmp2 = f69
  94. SIN_NORM_f8 = f70
  95. .data
  96. .align 16
  97. sin_coeff_1_table:
  98. data8 0xBF56C16C16BF6462 // q3
  99. data8 0x3EFA01A0128B9EBC // q4
  100. data8 0xBE927E42FDF33FFE // q5
  101. data8 0x3E21DA5C72A446F3 // q6
  102. data8 0x3EC71DD1D5E421A4 // p4
  103. data8 0xBE5AC5C9D0ACF95A // p5
  104. data8 0xBFC55555555554CA // p1
  105. data8 0x3F811111110F2395 // p2
  106. data8 0xBFE0000000000000 // q1
  107. data8 0x3FA55555555554EF // q2
  108. data8 0xBF2A01A011232913 // p3
  109. data8 0x0000000000000000 // pad
  110. /////////////////////////////////////////
  111. data8 0xBFE1A54991426566 //sin(-32)
  112. data8 0x3FEAB1F5305DE8E5 //cos(-32)
  113. data8 0x3FD9DBC0B640FC81 //sin(-31)
  114. data8 0x3FED4591C3E12A20 //cos(-31)
  115. data8 0x3FEF9DF47F1C903D //sin(-30)
  116. data8 0x3FC3BE82F2505A52 //cos(-30)
  117. data8 0x3FE53C7D20A6C9E7 //sin(-29)
  118. data8 0xBFE7F01658314E47 //cos(-29)
  119. data8 0xBFD156853B4514D6 //sin(-28)
  120. data8 0xBFEECDAAD1582500 //cos(-28)
  121. data8 0xBFEE9AA1B0E5BA30 //sin(-27)
  122. data8 0xBFD2B266F959DED5 //cos(-27)
  123. data8 0xBFE866E0FAC32583 //sin(-26)
  124. data8 0x3FE4B3902691A9ED //cos(-26)
  125. data8 0x3FC0F0E6F31E809D //sin(-25)
  126. data8 0x3FEFB7EEF59504FF //cos(-25)
  127. data8 0x3FECFA7F7919140F //sin(-24)
  128. data8 0x3FDB25BFB50A609A //cos(-24)
  129. data8 0x3FEB143CD0247D02 //sin(-23)
  130. data8 0xBFE10CF7D591F272 //cos(-23)
  131. data8 0x3F8220A29F6EB9F4 //sin(-22)
  132. data8 0xBFEFFFADD8D4ACDA //cos(-22)
  133. data8 0xBFEAC5E20BB0D7ED //sin(-21)
  134. data8 0xBFE186FF83773759 //cos(-21)
  135. data8 0xBFED36D8F55D3CE0 //sin(-20)
  136. data8 0x3FDA1E043964A83F //cos(-20)
  137. data8 0xBFC32F2D28F584CF //sin(-19)
  138. data8 0x3FEFA377DE108258 //cos(-19)
  139. data8 0x3FE8081668131E26 //sin(-18)
  140. data8 0x3FE52150815D2470 //cos(-18)
  141. data8 0x3FEEC3C4AC42882B //sin(-17)
  142. data8 0xBFD19C46B07F58E7 //cos(-17)
  143. data8 0x3FD26D02085F20F8 //sin(-16)
  144. data8 0xBFEEA5257E962F74 //cos(-16)
  145. data8 0xBFE4CF2871CEC2E8 //sin(-15)
  146. data8 0xBFE84F5D069CA4F3 //cos(-15)
  147. data8 0xBFEFB30E327C5E45 //sin(-14)
  148. data8 0x3FC1809AEC2CA0ED //cos(-14)
  149. data8 0xBFDAE4044881C506 //sin(-13)
  150. data8 0x3FED09CDD5260CB7 //cos(-13)
  151. data8 0x3FE12B9AF7D765A5 //sin(-12)
  152. data8 0x3FEB00DA046B65E3 //cos(-12)
  153. data8 0x3FEFFFEB762E93EB //sin(-11)
  154. data8 0x3F7220AE41EE2FDF //cos(-11)
  155. data8 0x3FE1689EF5F34F52 //sin(-10)
  156. data8 0xBFEAD9AC890C6B1F //cos(-10)
  157. data8 0xBFDA6026360C2F91 //sin( -9)
  158. data8 0xBFED27FAA6A6196B //cos( -9)
  159. data8 0xBFEFA8D2A028CF7B //sin( -8)
  160. data8 0xBFC29FBEBF632F94 //cos( -8)
  161. data8 0xBFE50608C26D0A08 //sin( -7)
  162. data8 0x3FE81FF79ED92017 //cos( -7)
  163. data8 0x3FD1E1F18AB0A2C0 //sin( -6)
  164. data8 0x3FEEB9B7097822F5 //cos( -6)
  165. data8 0x3FEEAF81F5E09933 //sin( -5)
  166. data8 0x3FD22785706B4AD9 //cos( -5)
  167. data8 0x3FE837B9DDDC1EAE //sin( -4)
  168. data8 0xBFE4EAA606DB24C1 //cos( -4)
  169. data8 0xBFC210386DB6D55B //sin( -3)
  170. data8 0xBFEFAE04BE85E5D2 //cos( -3)
  171. data8 0xBFED18F6EAD1B446 //sin( -2)
  172. data8 0xBFDAA22657537205 //cos( -2)
  173. data8 0xBFEAED548F090CEE //sin( -1)
  174. data8 0x3FE14A280FB5068C //cos( -1)
  175. data8 0x0000000000000000 //sin( 0)
  176. data8 0x3FF0000000000000 //cos( 0)
  177. data8 0x3FEAED548F090CEE //sin( 1)
  178. data8 0x3FE14A280FB5068C //cos( 1)
  179. data8 0x3FED18F6EAD1B446 //sin( 2)
  180. data8 0xBFDAA22657537205 //cos( 2)
  181. data8 0x3FC210386DB6D55B //sin( 3)
  182. data8 0xBFEFAE04BE85E5D2 //cos( 3)
  183. data8 0xBFE837B9DDDC1EAE //sin( 4)
  184. data8 0xBFE4EAA606DB24C1 //cos( 4)
  185. data8 0xBFEEAF81F5E09933 //sin( 5)
  186. data8 0x3FD22785706B4AD9 //cos( 5)
  187. data8 0xBFD1E1F18AB0A2C0 //sin( 6)
  188. data8 0x3FEEB9B7097822F5 //cos( 6)
  189. data8 0x3FE50608C26D0A08 //sin( 7)
  190. data8 0x3FE81FF79ED92017 //cos( 7)
  191. data8 0x3FEFA8D2A028CF7B //sin( 8)
  192. data8 0xBFC29FBEBF632F94 //cos( 8)
  193. data8 0x3FDA6026360C2F91 //sin( 9)
  194. data8 0xBFED27FAA6A6196B //cos( 9)
  195. data8 0xBFE1689EF5F34F52 //sin( 10)
  196. data8 0xBFEAD9AC890C6B1F //cos( 10)
  197. data8 0xBFEFFFEB762E93EB //sin( 11)
  198. data8 0x3F7220AE41EE2FDF //cos( 11)
  199. data8 0xBFE12B9AF7D765A5 //sin( 12)
  200. data8 0x3FEB00DA046B65E3 //cos( 12)
  201. data8 0x3FDAE4044881C506 //sin( 13)
  202. data8 0x3FED09CDD5260CB7 //cos( 13)
  203. data8 0x3FEFB30E327C5E45 //sin( 14)
  204. data8 0x3FC1809AEC2CA0ED //cos( 14)
  205. data8 0x3FE4CF2871CEC2E8 //sin( 15)
  206. data8 0xBFE84F5D069CA4F3 //cos( 15)
  207. data8 0xBFD26D02085F20F8 //sin( 16)
  208. data8 0xBFEEA5257E962F74 //cos( 16)
  209. data8 0xBFEEC3C4AC42882B //sin( 17)
  210. data8 0xBFD19C46B07F58E7 //cos( 17)
  211. data8 0xBFE8081668131E26 //sin( 18)
  212. data8 0x3FE52150815D2470 //cos( 18)
  213. data8 0x3FC32F2D28F584CF //sin( 19)
  214. data8 0x3FEFA377DE108258 //cos( 19)
  215. data8 0x3FED36D8F55D3CE0 //sin( 20)
  216. data8 0x3FDA1E043964A83F //cos( 20)
  217. data8 0x3FEAC5E20BB0D7ED //sin( 21)
  218. data8 0xBFE186FF83773759 //cos( 21)
  219. data8 0xBF8220A29F6EB9F4 //sin( 22)
  220. data8 0xBFEFFFADD8D4ACDA //cos( 22)
  221. data8 0xBFEB143CD0247D02 //sin( 23)
  222. data8 0xBFE10CF7D591F272 //cos( 23)
  223. data8 0xBFECFA7F7919140F //sin( 24)
  224. data8 0x3FDB25BFB50A609A //cos( 24)
  225. data8 0xBFC0F0E6F31E809D //sin( 25)
  226. data8 0x3FEFB7EEF59504FF //cos( 25)
  227. data8 0x3FE866E0FAC32583 //sin( 26)
  228. data8 0x3FE4B3902691A9ED //cos( 26)
  229. data8 0x3FEE9AA1B0E5BA30 //sin( 27)
  230. data8 0xBFD2B266F959DED5 //cos( 27)
  231. data8 0x3FD156853B4514D6 //sin( 28)
  232. data8 0xBFEECDAAD1582500 //cos( 28)
  233. data8 0xBFE53C7D20A6C9E7 //sin( 29)
  234. data8 0xBFE7F01658314E47 //cos( 29)
  235. data8 0xBFEF9DF47F1C903D //sin( 30)
  236. data8 0x3FC3BE82F2505A52 //cos( 30)
  237. data8 0xBFD9DBC0B640FC81 //sin( 31)
  238. data8 0x3FED4591C3E12A20 //cos( 31)
  239. data8 0x3FE1A54991426566 //sin( 32)
  240. data8 0x3FEAB1F5305DE8E5 //cos( 32)
  241. //////////////////////////////////////////
  242. .global sinf
  243. .global cosf
  244. .text
  245. .proc cosf
  246. .align 32
  247. cosf:
  248. { .mfi
  249. alloc r32 = ar.pfs,1,7,0,0
  250. fcvt.fx.s1 sin_Mx = f8
  251. cmp.ne p6,p7 = r0,r0 // p7 set if cos
  252. }
  253. { .mfi
  254. addl SIN_AD_PQ_1 = @ltoff(sin_coeff_1_table),gp
  255. fnorm.s0 SIN_NORM_f8 = f8 // Sets denormal or invalid
  256. mov sin_GR_sincos_flag = 0x0
  257. }
  258. ;;
  259. { .mfi
  260. ld8 SIN_AD_PQ_1 = [SIN_AD_PQ_1]
  261. fclass.m.unc p9,p0 = f8, 0x07
  262. cmp.ne p8,p0 = r0,r0
  263. }
  264. { .mfb
  265. nop.m 999
  266. nop.f 999
  267. br.sptk SINCOSF_COMMON
  268. }
  269. ;;
  270. .endp cosf
  271. .text
  272. .proc sinf
  273. .align 32
  274. sinf:
  275. { .mfi
  276. alloc r32 = ar.pfs,1,7,0,0
  277. fcvt.fx.s1 sin_Mx = f8
  278. cmp.eq p6,p7 = r0,r0 // p6 set if sin
  279. }
  280. { .mfi
  281. addl SIN_AD_PQ_1 = @ltoff(sin_coeff_1_table),gp
  282. fnorm.s0 SIN_NORM_f8 = f8 // Sets denormal or invalid
  283. mov sin_GR_sincos_flag = 0x1
  284. }
  285. ;;
  286. { .mfi
  287. ld8 SIN_AD_PQ_1 = [SIN_AD_PQ_1]
  288. fclass.m.unc p8,p0 = f8, 0x07
  289. cmp.ne p9,p0 = r0,r0
  290. }
  291. { .mfb
  292. nop.m 999
  293. nop.f 999
  294. br.sptk SINCOSF_COMMON
  295. }
  296. ;;
  297. SINCOSF_COMMON:
  298. // Here with p6 if sin, p7 if cos, p8 if sin(0), p9 if cos(0)
  299. { .mmf
  300. ldfpd sin_coeff_Q3, sin_coeff_Q4 = [SIN_AD_PQ_1], 16
  301. nop.m 999
  302. fclass.m.unc p11,p0 = f8, 0x23 // Test for x=inf
  303. }
  304. ;;
  305. { .mfb
  306. ldfpd sin_coeff_Q5, sin_coeff_Q6 = [SIN_AD_PQ_1], 16
  307. fclass.m.unc p10,p0 = f8, 0xc3 // Test for x=nan
  308. (p8) br.ret.spnt b0 // Exit for sin(0)
  309. }
  310. { .mfb
  311. nop.m 999
  312. (p9) fma.s f8 = f1,f1,f0
  313. (p9) br.ret.spnt b0 // Exit for cos(0)
  314. }
  315. ;;
  316. { .mmf
  317. ldfpd sin_coeff_P4, sin_coeff_P5 = [SIN_AD_PQ_1], 16
  318. addl gr_tmp = -1,r0
  319. fcvt.xf sin_Mfloat = sin_Mx
  320. }
  321. ;;
  322. { .mfi
  323. getf.sig sin_GR_Mint = sin_Mx
  324. (p11) frcpa.s0 f8,p13 = f0,f0 // qnan indef if x=inf
  325. nop.i 999
  326. }
  327. { .mfb
  328. ldfpd sin_coeff_P1, sin_coeff_P2 = [SIN_AD_PQ_1], 16
  329. nop.f 999
  330. (p11) br.ret.spnt b0 // Exit for x=inf
  331. }
  332. ;;
  333. { .mfi
  334. ldfpd sin_coeff_Q1, sin_coeff_Q2 = [SIN_AD_PQ_1], 16
  335. nop.f 999
  336. cmp.ge p8,p9 = -33,sin_GR_Mint
  337. }
  338. { .mfb
  339. add sin_GR_index = 32,sin_GR_Mint
  340. (p10) fma.s f8 = f8,f1,f0 // Force qnan if x=nan
  341. (p10) br.ret.spnt b0 // Exit for x=nan
  342. }
  343. ;;
  344. { .mmi
  345. ldfd sin_coeff_P3 = [SIN_AD_PQ_1], 16
  346. (p9) cmp.le p8,p0 = 33, sin_GR_Mint
  347. shl sin_GR_index = sin_GR_index,4
  348. }
  349. ;;
  350. { .mfi
  351. setf.sig fp_tmp = gr_tmp // Create constant such that fmpy sets inexact
  352. fnma.s1 sin_r = f1,sin_Mfloat,SIN_NORM_f8
  353. (p8) cmp.eq.unc p11,p12=sin_GR_sincos_flag,r0 // p11 if must call dbl cos
  354. // p12 if must call dbl sin
  355. }
  356. { .mbb
  357. add SIN_AD_PQ_2 = sin_GR_index,SIN_AD_PQ_1
  358. (p11) br.cond.spnt COS_DOUBLE
  359. (p12) br.cond.spnt SIN_DOUBLE
  360. }
  361. ;;
  362. .pred.rel "mutex",p6,p7 //SIN_Sin_Flag, SIN_Cos_Flag
  363. { .mmi
  364. (p6) ldfpd sin_tbl_S,sin_tbl_C = [SIN_AD_PQ_2]
  365. (p7) ldfpd sin_tbl_C,sin_tbl_S = [SIN_AD_PQ_2]
  366. nop.i 999
  367. }
  368. ;;
  369. { .mfi
  370. nop.m 999
  371. (p6) fclass.m.unc p8,p0 = f8, 0x0b // If sin, note denormal input to set uflow
  372. nop.i 999
  373. }
  374. { .mfi
  375. nop.m 999
  376. fma.s1 sin_t = sin_r,sin_r,f0
  377. nop.i 999
  378. }
  379. ;;
  380. { .mfi
  381. nop.m 999
  382. fma.s1 sin_rcube = sin_t,sin_r,f0
  383. nop.i 999
  384. }
  385. { .mfi
  386. nop.m 999
  387. fma.s1 sin_tsq = sin_t,sin_t,f0
  388. nop.i 999
  389. }
  390. ;;
  391. { .mfi
  392. nop.m 999
  393. fma.s1 sin_poly_q3 = sin_t,sin_coeff_Q4,sin_coeff_Q3
  394. nop.i 999
  395. }
  396. { .mfi
  397. nop.m 999
  398. fma.s1 sin_poly_q5 = sin_t,sin_coeff_Q6,sin_coeff_Q5
  399. nop.i 999
  400. }
  401. ;;
  402. { .mfi
  403. nop.m 999
  404. fma.s1 sin_poly_p1 = sin_t,sin_coeff_P5,sin_coeff_P4
  405. nop.i 999
  406. }
  407. { .mfi
  408. nop.m 999
  409. fma.s1 sin_poly_p2 = sin_t,sin_coeff_P2,sin_coeff_P1
  410. nop.i 999
  411. }
  412. ;;
  413. { .mfi
  414. nop.m 999
  415. fma.s1 sin_poly_q1 = sin_t,sin_coeff_Q2,sin_coeff_Q1
  416. nop.i 999
  417. }
  418. { .mfi
  419. nop.m 999
  420. fma.s1 sin_S_t = sin_t,sin_tbl_S,f0
  421. nop.i 999
  422. }
  423. ;;
  424. { .mfi
  425. nop.m 999
  426. (p8) fmpy.s.s0 fp_tmp2 = f8,f8 // Dummy mult to set underflow if sin(denormal)
  427. nop.i 999
  428. }
  429. { .mfi
  430. nop.m 999
  431. fma.s1 sin_r7 = sin_rcube,sin_tsq,f0
  432. nop.i 999
  433. }
  434. ;;
  435. { .mfi
  436. nop.m 999
  437. fma.s1 sin_poly_q3456 = sin_tsq,sin_poly_q5,sin_poly_q3
  438. nop.i 999
  439. }
  440. ;;
  441. { .mfi
  442. nop.m 999
  443. fma.s1 sin_poly_p3 = sin_t,sin_poly_p1,sin_coeff_P3
  444. nop.i 999
  445. }
  446. { .mfi
  447. nop.m 999
  448. fma.s1 sin_poly_p4 = sin_rcube,sin_poly_p2,sin_r
  449. nop.i 999
  450. }
  451. ;;
  452. { .mfi
  453. nop.m 999
  454. fma.s1 sin_tbl_S_tcube = sin_S_t,sin_tsq,f0
  455. nop.i 999
  456. }
  457. { .mfi
  458. nop.m 999
  459. fma.s1 sin_poly_q12 = sin_S_t,sin_poly_q1,sin_tbl_S
  460. nop.i 999
  461. }
  462. ;;
  463. { .mfi
  464. nop.m 999
  465. fma.d.s1 sin_of_r = sin_r7,sin_poly_p3,sin_poly_p4
  466. nop.i 999
  467. }
  468. ;;
  469. { .mfi
  470. nop.m 999
  471. fma.d.s1 sin_tbl_S_cos_of_r = sin_tbl_S_tcube,sin_poly_q3456,sin_poly_q12
  472. nop.i 999
  473. }
  474. { .mfi
  475. nop.m 999
  476. fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
  477. nop.i 999
  478. }
  479. ;;
  480. .pred.rel "mutex",p6,p7 //SIN_Sin_Flag, SIN_Cos_Flag
  481. { .mfi
  482. nop.m 999
  483. //(SIN_Sin_Flag) fma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
  484. (p6) fma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
  485. nop.i 999
  486. }
  487. { .mfb
  488. nop.m 999
  489. //(SIN_Cos_Flag) fnma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
  490. (p7) fnma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
  491. br.ret.sptk b0
  492. }
  493. .endp sinf
  494. .proc SIN_DOUBLE
  495. SIN_DOUBLE:
  496. .prologue
  497. { .mfi
  498. nop.m 0
  499. nop.f 0
  500. .save ar.pfs,GR_SAVE_PFS
  501. mov GR_SAVE_PFS=ar.pfs
  502. }
  503. ;;
  504. { .mfi
  505. mov GR_SAVE_GP=gp
  506. nop.f 0
  507. .save b0, GR_SAVE_B0
  508. mov GR_SAVE_B0=b0
  509. }
  510. .body
  511. { .mmb
  512. nop.m 999
  513. nop.m 999
  514. br.call.sptk.many b0=sin
  515. }
  516. ;;
  517. { .mfi
  518. mov gp = GR_SAVE_GP
  519. nop.f 999
  520. mov b0 = GR_SAVE_B0
  521. }
  522. ;;
  523. { .mfi
  524. nop.m 999
  525. fma.s f8 = f8,f1,f0
  526. mov ar.pfs = GR_SAVE_PFS
  527. }
  528. { .mib
  529. nop.m 999
  530. nop.i 999
  531. br.ret.sptk b0
  532. }
  533. ;;
  534. .endp SIN_DOUBLE
  535. .proc COS_DOUBLE
  536. COS_DOUBLE:
  537. .prologue
  538. { .mfi
  539. nop.m 0
  540. nop.f 0
  541. .save ar.pfs,GR_SAVE_PFS
  542. mov GR_SAVE_PFS=ar.pfs
  543. }
  544. ;;
  545. { .mfi
  546. mov GR_SAVE_GP=gp
  547. nop.f 0
  548. .save b0, GR_SAVE_B0
  549. mov GR_SAVE_B0=b0
  550. }
  551. .body
  552. { .mmb
  553. nop.m 999
  554. nop.m 999
  555. br.call.sptk.many b0=cos
  556. }
  557. ;;
  558. { .mfi
  559. mov gp = GR_SAVE_GP
  560. nop.f 999
  561. mov b0 = GR_SAVE_B0
  562. }
  563. ;;
  564. { .mfi
  565. nop.m 999
  566. fma.s f8 = f8,f1,f0
  567. mov ar.pfs = GR_SAVE_PFS
  568. }
  569. { .mib
  570. nop.m 999
  571. nop.i 999
  572. br.ret.sptk b0
  573. }
  574. ;;
  575. .endp COS_DOUBLE
  576. .type sin,@function
  577. .global sin
  578. .type cos,@function
  579. .global cos