Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1237 lines
36 KiB

  1. .file "sinhf.s"
  2. // Copyright (c) 2000, Intel Corporation
  3. // All rights reserved.
  4. //
  5. // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
  6. // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
  7. //
  8. // WARRANTY DISCLAIMER
  9. //
  10. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  11. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  12. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  13. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  14. // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  15. // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  16. // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  17. // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  18. // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  19. // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  20. // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  21. //
  22. // Intel Corporation is the author of this code, and requests that all
  23. // problem reports or change requests be submitted to it directly at
  24. // http://developer.intel.com/opensource.
  25. //
  26. // History
  27. //==============================================================
  28. // 2/02/00 Initial version
  29. // 4/04/00 Unwind support added
  30. // 8/15/00 Bundle added after call to __libm_error_support to properly
  31. // set [the previously overwritten] GR_Parameter_RESULT.
  32. // 10/12/00 Update to set denormal operand and underflow flags
  33. //
  34. // API
  35. //==============================================================
  36. // float = sinhf(float)
  37. // input floating point f8
  38. // output floating point f8
  39. //
  40. // Registers used
  41. //==============================================================
  42. // general registers:
  43. // r32 -> r47
  44. //
  45. // predicate registers used:
  46. // p6 p7 p8 p9
  47. //
  48. // floating-point registers used:
  49. // f9 -> f15; f32 -> f44;
  50. // f8 has input, then output
  51. //
  52. // Overview of operation
  53. //==============================================================
  54. // There are four paths
  55. // 1. |x| < 0.25 SINH_BY_POLY
  56. // 2. |x| < 32 SINH_BY_TBL
  57. // 3. |x| < 2^14 SINH_BY_EXP
  58. // 4. |x_ >= 2^14 SINH_HUGE
  59. //
  60. // For double extended we get infinity for x >= 400c b174 ddc0 31ae c0ea
  61. // >= 1.0110001.... x 2^13
  62. // >= 11357.2166
  63. //
  64. // But for double we get infinity for x >= 408633ce8fb9f87e
  65. // >= 1.0110...x 2^9
  66. // >= +7.10476e+002
  67. //
  68. // And for single we get infinity for x >= 42b3a496
  69. // >= 1.0110... 2^6
  70. // >= 89.8215
  71. //
  72. // SAFE: If there is danger of overflow set SAFE to 0
  73. // NOT implemented: if there is danger of underflow, set SAFE to 0
  74. // SAFE for all paths listed below
  75. //
  76. // 1. SINH_BY_POLY
  77. // ===============
  78. // If |x| is less than the tiny threshold, then clear SAFE
  79. // For double, the tiny threshold is -1022 = -0x3fe => -3fe + ffff = fc01
  80. // register-biased, this is fc01
  81. // For single, the tiny threshold is -126 = -7e => -7e + ffff = ff81
  82. // If |x| < tiny threshold, set SAFE = 0
  83. //
  84. // 2. SINH_BY_TBL
  85. // =============
  86. // SAFE: SAFE is always 1 for TBL;
  87. //
  88. // 3. SINH_BY_EXP
  89. // ==============
  90. // There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
  91. // r34 has N-1; 16382 is in register biased form, 0x13ffd
  92. // There is danger of double overflow if N-1 > 0x3fe
  93. // in register biased form, 0x103fd
  94. // Analagously, there is danger of single overflow if N-1 > 0x7e
  95. // in register biased form, 0x1007d
  96. // SAFE: If there is danger of overflow set SAFE to 0
  97. //
  98. // 4. SINH_HUGE
  99. // ============
  100. // SAFE: SAFE is always 0 for HUGE
  101. //
  102. // Assembly macros
  103. //==============================================================
  104. sinhf_FR_X = f44
  105. sinhf_FR_X2 = f9
  106. sinhf_FR_X4 = f10
  107. sinhf_FR_SGNX = f40
  108. sinhf_FR_Inv_log2by64 = f9
  109. sinhf_FR_log2by64_lo = f11
  110. sinhf_FR_log2by64_hi = f10
  111. sinhf_FR_A1 = f9
  112. sinhf_FR_A2 = f10
  113. sinhf_FR_A3 = f11
  114. sinhf_FR_Rcub = f12
  115. sinhf_FR_M_temp = f13
  116. sinhf_FR_R_temp = f13
  117. sinhf_FR_Rsq = f13
  118. sinhf_FR_R = f14
  119. sinhf_FR_M = f38
  120. sinhf_FR_B1 = f15
  121. sinhf_FR_B2 = f32
  122. sinhf_FR_B3 = f33
  123. sinhf_FR_peven_temp1 = f34
  124. sinhf_FR_peven_temp2 = f35
  125. sinhf_FR_peven = f36
  126. sinhf_FR_podd_temp1 = f34
  127. sinhf_FR_podd_temp2 = f35
  128. sinhf_FR_podd = f37
  129. sinhf_FR_poly_podd_temp1 = f11
  130. sinhf_FR_poly_podd_temp2 = f13
  131. sinhf_FR_poly_peven_temp1 = f11
  132. sinhf_FR_poly_peven_temp2 = f13
  133. sinhf_FR_J_temp = f9
  134. sinhf_FR_J = f10
  135. sinhf_FR_Mmj = f39
  136. sinhf_FR_N_temp1 = f11
  137. sinhf_FR_N_temp2 = f12
  138. sinhf_FR_N = f13
  139. sinhf_FR_spos = f14
  140. sinhf_FR_sneg = f15
  141. sinhf_FR_Tjhi = f32
  142. sinhf_FR_Tjlo = f33
  143. sinhf_FR_Tmjhi = f34
  144. sinhf_FR_Tmjlo = f35
  145. sinhf_GR_mJ = r35
  146. sinhf_GR_J = r36
  147. sinhf_AD_mJ = r38
  148. sinhf_AD_J = r39
  149. sinhf_FR_S_hi = f9
  150. sinhf_FR_S_hi_temp = f10
  151. sinhf_FR_S_lo_temp1 = f11
  152. sinhf_FR_S_lo_temp2 = f12
  153. sinhf_FR_S_lo_temp3 = f13
  154. sinhf_FR_S_lo = f38
  155. sinhf_FR_C_hi = f39
  156. sinhf_FR_C_hi_temp1 = f10
  157. sinhf_FR_Y_hi = f11
  158. sinhf_FR_Y_lo_temp = f12
  159. sinhf_FR_Y_lo = f13
  160. sinhf_FR_SINH = f9
  161. sinhf_FR_P1 = f14
  162. sinhf_FR_P2 = f15
  163. sinhf_FR_P3 = f32
  164. sinhf_FR_P4 = f33
  165. sinhf_FR_P5 = f34
  166. sinhf_FR_P6 = f35
  167. sinhf_FR_TINY_THRESH = f9
  168. sinhf_FR_SINH_temp = f10
  169. sinhf_FR_SCALE = f11
  170. sinhf_FR_signed_hi_lo = f10
  171. GR_SAVE_B0 = r42
  172. GR_SAVE_PFS = r41
  173. GR_SAVE_GP = r43
  174. GR_Parameter_X = r44
  175. GR_Parameter_Y = r45
  176. GR_Parameter_RESULT = r46
  177. //GR_Parameter_TAG = r47
  178. FR_X = f8
  179. FR_Y = f0
  180. FR_RESULT = f44
  181. // Data tables
  182. //==============================================================
  183. .data
  184. .align 16
  185. single_sinhf_arg_reduction:
  186. data8 0xB8AA3B295C17F0BC, 0x00004005
  187. data8 0xB17217F7D1000000, 0x00003FF8
  188. data8 0xCF79ABC9E3B39804, 0x00003FD0
  189. single_sinhf_p_table:
  190. data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC
  191. data8 0x8888888888888412, 0x00003FF8
  192. data8 0xD00D00D00D4D39F2, 0x00003FF2
  193. data8 0xB8EF1D28926D8891, 0x00003FEC
  194. data8 0xD732377688025BE9, 0x00003FE5
  195. data8 0xB08AF9AE78C1239F, 0x00003FDE
  196. single_sinhf_ab_table:
  197. data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
  198. data8 0x88888888884ECDD5, 0x00003FF8
  199. data8 0xD00D0C6DCC26A86B, 0x00003FF2
  200. data8 0x8000000000000002, 0x00003FFE
  201. data8 0xAAAAAAAAAA402C77, 0x00003FFA
  202. data8 0xB60B6CC96BDB144D, 0x00003FF5
  203. single_sinhf_j_table:
  204. data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
  205. data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
  206. data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
  207. data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
  208. data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
  209. data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
  210. data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
  211. data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
  212. data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
  213. data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
  214. data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
  215. data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
  216. data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
  217. data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
  218. data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
  219. data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
  220. data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
  221. data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
  222. data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
  223. data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
  224. data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
  225. data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
  226. data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
  227. data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
  228. data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
  229. data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
  230. data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
  231. data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
  232. data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
  233. data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
  234. data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
  235. data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
  236. data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
  237. data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
  238. data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
  239. data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
  240. data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
  241. data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
  242. data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
  243. data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
  244. data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
  245. data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
  246. data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
  247. data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
  248. data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
  249. data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
  250. data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
  251. data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
  252. data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
  253. data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
  254. data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
  255. data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
  256. data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
  257. data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
  258. data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
  259. data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
  260. data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
  261. data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
  262. data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
  263. data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
  264. data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
  265. data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
  266. data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
  267. data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
  268. data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
  269. .align 32
  270. .global sinhf#
  271. .section .text
  272. .proc sinhf#
  273. .align 32
  274. sinhf:
  275. // X infinity or NAN?
  276. // Take invalid fault if enabled
  277. { .mfi
  278. alloc r32 = ar.pfs,0,12,4,0
  279. (p0) fclass.m.unc p6,p0 = f8, 0xe3
  280. nop.i 0;;
  281. }
  282. { .mfb
  283. nop.m 999
  284. (p6) fma.s.s0 f8 = f8,f1,f8
  285. (p6) br.ret.spnt b0 ;;
  286. }
  287. // Put 0.25 in f9; p6 true if x < 0.25
  288. { .mlx
  289. nop.m 999
  290. (p0) movl r32 = 0x000000000000fffd ;;
  291. }
  292. { .mfi
  293. (p0) setf.exp f9 = r32
  294. (p0) fmerge.s sinhf_FR_SGNX = f8,f1
  295. nop.i 999 ;;
  296. }
  297. { .mfi
  298. nop.m 999
  299. (p0) fmerge.s sinhf_FR_X = f0,f8
  300. nop.i 999
  301. }
  302. // Identify denormal operands.
  303. { .mfi
  304. nop.m 999
  305. fclass.m.unc p10,p0 = f8, 0x09 // + denorm
  306. nop.i 999
  307. };;
  308. { .mfi
  309. nop.m 999
  310. fclass.m.unc p11,p0 = f8, 0x0a // - denorm
  311. nop.i 999
  312. }
  313. { .mfi
  314. nop.m 999
  315. (p0) fcmp.lt.unc.s1 p0,p7 = sinhf_FR_X,f9
  316. nop.i 999 ;;
  317. }
  318. { .mib
  319. nop.m 999
  320. nop.i 999
  321. (p7) br.cond.sptk SINH_BY_TBL ;;
  322. }
  323. SINH_BY_POLY:
  324. // POLY cannot overflow so there is no need to call __libm_error_support
  325. // Set tiny_SAFE (p7) to 1(0) if answer is not tiny
  326. // Currently we do not use tiny_SAFE. So the setting of tiny_SAFE is
  327. // commented out.
  328. //(p0) movl r32 = 0x000000000000fc01
  329. //(p0) setf.exp f10 = r32
  330. //(p0) fcmp.lt.unc.s1 p6,p7 = f8,f10
  331. // Here is essentially the algorithm for SINH_BY_POLY. Care is take for the order
  332. // of multiplication; and P_1 is not exactly 1/3!, P_2 is not exactly 1/5!, etc.
  333. // Note that ax = |x|
  334. // sinhf(x) = sign * (series(e^x) - series(e^-x))/2
  335. // = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11! + ax^13/13!)
  336. // = sign * (ax + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
  337. // + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ) )
  338. // = sign * (ax + ax*p_odd + (ax*p_even))
  339. // = sign * (ax + Y_lo)
  340. // sinhf(x) = sign * (Y_hi + Y_lo)
  341. // Get the values of P_x from the table
  342. { .mfb
  343. addl r34 = @ltoff(single_sinhf_p_table), gp
  344. (p10) fma.s.s0 f8 = f8,f8,f8
  345. (p10) br.ret.spnt b0
  346. }
  347. ;;
  348. { .mfb
  349. ld8 r34 = [r34]
  350. (p11) fnma.s.s0 f8 = f8,f8,f8
  351. (p11) br.ret.spnt b0
  352. }
  353. ;;
  354. // Calculate sinhf_FR_X2 = ax*ax and sinhf_FR_X4 = ax*ax*ax*ax
  355. { .mmf
  356. nop.m 999
  357. (p0) ldfe sinhf_FR_P1 = [r34],16
  358. (p0) fma.s1 sinhf_FR_X2 = sinhf_FR_X, sinhf_FR_X, f0 ;;
  359. }
  360. { .mmi
  361. (p0) ldfe sinhf_FR_P2 = [r34],16 ;;
  362. (p0) ldfe sinhf_FR_P3 = [r34],16
  363. nop.i 999 ;;
  364. }
  365. { .mmi
  366. (p0) ldfe sinhf_FR_P4 = [r34],16 ;;
  367. (p0) ldfe sinhf_FR_P5 = [r34],16
  368. nop.i 999 ;;
  369. }
  370. { .mfi
  371. (p0) ldfe sinhf_FR_P6 = [r34],16
  372. (p0) fma.s1 sinhf_FR_X4 = sinhf_FR_X2, sinhf_FR_X2, f0
  373. nop.i 999 ;;
  374. }
  375. // Calculate sinhf_FR_podd = p_odd and sinhf_FR_peven = p_even
  376. { .mfi
  377. nop.m 999
  378. (p0) fma.s1 sinhf_FR_poly_podd_temp1 = sinhf_FR_X4, sinhf_FR_P5, sinhf_FR_P3
  379. nop.i 999 ;;
  380. }
  381. { .mfi
  382. nop.m 999
  383. (p0) fma.s1 sinhf_FR_poly_podd_temp2 = sinhf_FR_X4, sinhf_FR_poly_podd_temp1, sinhf_FR_P1
  384. nop.i 999
  385. }
  386. { .mfi
  387. nop.m 999
  388. (p0) fma.s1 sinhf_FR_poly_peven_temp1 = sinhf_FR_X4, sinhf_FR_P6, sinhf_FR_P4
  389. nop.i 999 ;;
  390. }
  391. { .mfi
  392. nop.m 999
  393. (p0) fma.s1 sinhf_FR_podd = sinhf_FR_X2, sinhf_FR_poly_podd_temp2, f0
  394. nop.i 999
  395. }
  396. { .mfi
  397. nop.m 999
  398. (p0) fma.s1 sinhf_FR_poly_peven_temp2 = sinhf_FR_X4, sinhf_FR_poly_peven_temp1, sinhf_FR_P2
  399. nop.i 999 ;;
  400. }
  401. { .mfi
  402. nop.m 999
  403. (p0) fma.s1 sinhf_FR_peven = sinhf_FR_X4, sinhf_FR_poly_peven_temp2, f0
  404. nop.i 999 ;;
  405. }
  406. // Calculate sinhf_FR_Y_lo = ax*p_odd + (ax*p_even)
  407. { .mfi
  408. nop.m 999
  409. (p0) fma.s1 sinhf_FR_Y_lo_temp = sinhf_FR_X, sinhf_FR_peven, f0
  410. nop.i 999 ;;
  411. }
  412. { .mfi
  413. nop.m 999
  414. (p0) fma.s1 sinhf_FR_Y_lo = sinhf_FR_X, sinhf_FR_podd, sinhf_FR_Y_lo_temp
  415. nop.i 999 ;;
  416. }
  417. // Calculate sinhf_FR_SINH = Y_hi + Y_lo. Note that ax = Y_hi
  418. { .mfi
  419. nop.m 999
  420. (p0) fma.s1 sinhf_FR_SINH = sinhf_FR_X, f1, sinhf_FR_Y_lo
  421. nop.i 999 ;;
  422. }
  423. // Calculate f8 = sign * (Y_hi + Y_lo)
  424. // Go to return
  425. { .mfb
  426. nop.m 999
  427. (p0) fma.s.s0 f8 = sinhf_FR_SGNX,sinhf_FR_SINH,f0
  428. (p0) br.ret.sptk b0 ;;
  429. }
  430. SINH_BY_TBL:
  431. // Now that we are at TBL; so far all we know is that |x| >= 0.25.
  432. // The first two steps are the same for TBL and EXP, but if we are HUGE
  433. // we want to leave now.
  434. // Double-extended:
  435. // Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
  436. // Double
  437. // Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
  438. // Single
  439. // Go to HUGE if |x| >= 2^7, 10006 (register-biased) is e = 7 (true)
  440. { .mlx
  441. nop.m 999
  442. (p0) movl r32 = 0x0000000000010006 ;;
  443. }
  444. { .mfi
  445. (p0) setf.exp f9 = r32
  446. nop.f 999
  447. nop.i 999 ;;
  448. }
  449. { .mfi
  450. nop.m 999
  451. (p0) fcmp.ge.unc.s1 p6,p7 = sinhf_FR_X,f9
  452. nop.i 999 ;;
  453. }
  454. { .mib
  455. nop.m 999
  456. nop.i 999
  457. (p6) br.cond.spnt SINH_HUGE ;;
  458. }
  459. // r32 = 1
  460. // r34 = N-1
  461. // r35 = N
  462. // r36 = j
  463. // r37 = N+1
  464. // TBL can never overflow
  465. // sinhf(x) = sinhf(B+R)
  466. // = sinhf(B)cosh(R) + cosh(B)sinhf(R)
  467. //
  468. // ax = |x| = M*log2/64 + R
  469. // B = M*log2/64
  470. // M = 64*N + j
  471. // We will calcualte M and get N as (M-j)/64
  472. // The division is a shift.
  473. // exp(B) = exp(N*log2 + j*log2/64)
  474. // = 2^N * 2^(j*log2/64)
  475. // sinhf(B) = 1/2(e^B -e^-B)
  476. // = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64))
  477. // sinhf(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
  478. // cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
  479. // 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
  480. // Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
  481. // R = ax - M*log2/64
  482. // R = ax - M*log2_by_64_hi - M*log2_by_64_lo
  483. // exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
  484. // = 1 + p_odd + p_even
  485. // where the p_even uses the A coefficients and the p_even uses the B coefficients
  486. // So sinhf(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
  487. // cosh(R) = 1 + p_even
  488. // sinhf(B) = S_hi + S_lo
  489. // cosh(B) = C_hi
  490. // sinhf(x) = sinhf(B)cosh(R) + cosh(B)sinhf(R)
  491. // ******************************************************
  492. // STEP 1 (TBL and EXP)
  493. // ******************************************************
  494. // Get the following constants.
  495. // f9 = Inv_log2by64
  496. // f10 = log2by64_hi
  497. // f11 = log2by64_lo
  498. { .mmi
  499. (p0) adds r32 = 0x1,r0
  500. (p0) addl r34 = @ltoff(single_sinhf_arg_reduction), gp
  501. nop.i 999
  502. }
  503. ;;
  504. { .mmi
  505. ld8 r34 = [r34]
  506. nop.m 999
  507. nop.i 999
  508. }
  509. ;;
  510. // We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
  511. // put them in an exponent.
  512. // sinhf_FR_spos = 2^(N-1) and sinhf_FR_sneg = 2^(-N-1)
  513. // r39 = 0xffff + (N-1) = 0xffff +N -1
  514. // r40 = 0xffff - (N +1) = 0xffff -N -1
  515. { .mlx
  516. nop.m 999
  517. (p0) movl r38 = 0x000000000000fffe ;;
  518. }
  519. { .mmi
  520. (p0) ldfe sinhf_FR_Inv_log2by64 = [r34],16 ;;
  521. (p0) ldfe sinhf_FR_log2by64_hi = [r34],16
  522. nop.i 999 ;;
  523. }
  524. { .mbb
  525. (p0) ldfe sinhf_FR_log2by64_lo = [r34],16
  526. nop.b 999
  527. nop.b 999 ;;
  528. }
  529. // Get the A coefficients
  530. // f9 = A_1
  531. // f10 = A_2
  532. // f11 = A_3
  533. { .mmi
  534. nop.m 999
  535. (p0) addl r34 = @ltoff(single_sinhf_ab_table), gp
  536. nop.i 999
  537. }
  538. ;;
  539. { .mmi
  540. ld8 r34 = [r34]
  541. nop.m 999
  542. nop.i 999
  543. }
  544. ;;
  545. // Calculate M and keep it as integer and floating point.
  546. // f38 = M = round-to-integer(x*Inv_log2by64)
  547. // sinhf_FR_M = M = truncate(ax/(log2/64))
  548. // Put the significand of M in r35
  549. // and the floating point representation of M in sinhf_FR_M
  550. { .mfi
  551. nop.m 999
  552. (p0) fma.s1 sinhf_FR_M = sinhf_FR_X, sinhf_FR_Inv_log2by64, f0
  553. nop.i 999
  554. }
  555. { .mfi
  556. (p0) ldfe sinhf_FR_A1 = [r34],16
  557. nop.f 999
  558. nop.i 999 ;;
  559. }
  560. { .mfi
  561. nop.m 999
  562. (p0) fcvt.fx.s1 sinhf_FR_M_temp = sinhf_FR_M
  563. nop.i 999 ;;
  564. }
  565. { .mfi
  566. nop.m 999
  567. (p0) fnorm.s1 sinhf_FR_M = sinhf_FR_M_temp
  568. nop.i 999 ;;
  569. }
  570. { .mfi
  571. (p0) getf.sig r35 = sinhf_FR_M_temp
  572. nop.f 999
  573. nop.i 999 ;;
  574. }
  575. // M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
  576. // has a range of -32 thru 31.
  577. // r35 = M
  578. // r36 = j
  579. { .mii
  580. nop.m 999
  581. nop.i 999 ;;
  582. (p0) and r36 = 0x3f, r35 ;;
  583. }
  584. // Calculate R
  585. // f13 = f44 - f12*f10 = ax - M*log2by64_hi
  586. // f14 = f13 - f8*f11 = R = (ax - M*log2by64_hi) - M*log2by64_lo
  587. { .mfi
  588. nop.m 999
  589. (p0) fnma.s1 sinhf_FR_R_temp = sinhf_FR_M, sinhf_FR_log2by64_hi, sinhf_FR_X
  590. nop.i 999
  591. }
  592. { .mfi
  593. (p0) ldfe sinhf_FR_A2 = [r34],16
  594. nop.f 999
  595. nop.i 999 ;;
  596. }
  597. { .mfi
  598. nop.m 999
  599. (p0) fnma.s1 sinhf_FR_R = sinhf_FR_M, sinhf_FR_log2by64_lo, sinhf_FR_R_temp
  600. nop.i 999
  601. }
  602. // Get the B coefficients
  603. // f15 = B_1
  604. // f32 = B_2
  605. // f33 = B_3
  606. { .mmi
  607. (p0) ldfe sinhf_FR_A3 = [r34],16 ;;
  608. (p0) ldfe sinhf_FR_B1 = [r34],16
  609. nop.i 999 ;;
  610. }
  611. { .mmi
  612. (p0) ldfe sinhf_FR_B2 = [r34],16 ;;
  613. (p0) ldfe sinhf_FR_B3 = [r34],16
  614. nop.i 999 ;;
  615. }
  616. { .mii
  617. nop.m 999
  618. (p0) shl r34 = r36, 0x2 ;;
  619. (p0) sxt1 r37 = r34 ;;
  620. }
  621. // ******************************************************
  622. // STEP 2 (TBL and EXP)
  623. // ******************************************************
  624. // Calculate Rsquared and Rcubed in preparation for p_even and p_odd
  625. // f12 = R*R*R
  626. // f13 = R*R
  627. // f14 = R <== from above
  628. { .mfi
  629. nop.m 999
  630. (p0) fma.s1 sinhf_FR_Rsq = sinhf_FR_R, sinhf_FR_R, f0
  631. (p0) shr r36 = r37, 0x2 ;;
  632. }
  633. // r34 = M-j = r35 - r36
  634. // r35 = N = (M-j)/64
  635. { .mii
  636. (p0) sub r34 = r35, r36
  637. nop.i 999 ;;
  638. (p0) shr r35 = r34, 0x6 ;;
  639. }
  640. { .mii
  641. (p0) sub r40 = r38, r35
  642. (p0) adds r37 = 0x1, r35
  643. (p0) add r39 = r38, r35 ;;
  644. }
  645. // Get the address of the J table, add the offset,
  646. // addresses are sinhf_AD_mJ and sinhf_AD_J, get the T value
  647. // f32 = T(j)_hi
  648. // f33 = T(j)_lo
  649. // f34 = T(-j)_hi
  650. // f35 = T(-j)_lo
  651. { .mmi
  652. (p0) sub r34 = r35, r32
  653. (p0) addl r37 = @ltoff(single_sinhf_j_table), gp
  654. nop.i 999
  655. }
  656. ;;
  657. { .mmi
  658. ld8 r37 = [r37]
  659. nop.m 999
  660. nop.i 999
  661. }
  662. ;;
  663. { .mfi
  664. nop.m 999
  665. (p0) fma.s1 sinhf_FR_Rcub = sinhf_FR_Rsq, sinhf_FR_R, f0
  666. nop.i 999
  667. }
  668. // ******************************************************
  669. // STEP 3 Now decide if we need to branch to EXP
  670. // ******************************************************
  671. // Put 32 in f9; p6 true if x < 32
  672. // Go to EXP if |x| >= 32
  673. { .mlx
  674. nop.m 999
  675. (p0) movl r32 = 0x0000000000010004 ;;
  676. }
  677. // Calculate p_even
  678. // f34 = B_2 + Rsq *B_3
  679. // f35 = B_1 + Rsq*f34 = B_1 + Rsq * (B_2 + Rsq *B_3)
  680. // f36 = p_even = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
  681. { .mfi
  682. nop.m 999
  683. (p0) fma.s1 sinhf_FR_peven_temp1 = sinhf_FR_Rsq, sinhf_FR_B3, sinhf_FR_B2
  684. nop.i 999 ;;
  685. }
  686. { .mfi
  687. nop.m 999
  688. (p0) fma.s1 sinhf_FR_peven_temp2 = sinhf_FR_Rsq, sinhf_FR_peven_temp1, sinhf_FR_B1
  689. nop.i 999
  690. }
  691. // Calculate p_odd
  692. // f34 = A_2 + Rsq *A_3
  693. // f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
  694. // f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
  695. { .mfi
  696. nop.m 999
  697. (p0) fma.s1 sinhf_FR_podd_temp1 = sinhf_FR_Rsq, sinhf_FR_A3, sinhf_FR_A2
  698. nop.i 999 ;;
  699. }
  700. { .mfi
  701. (p0) setf.exp sinhf_FR_N_temp1 = r39
  702. nop.f 999
  703. nop.i 999 ;;
  704. }
  705. { .mfi
  706. nop.m 999
  707. (p0) fma.s1 sinhf_FR_peven = sinhf_FR_Rsq, sinhf_FR_peven_temp2, f0
  708. nop.i 999
  709. }
  710. { .mfi
  711. nop.m 999
  712. (p0) fma.s1 sinhf_FR_podd_temp2 = sinhf_FR_Rsq, sinhf_FR_podd_temp1, sinhf_FR_A1
  713. nop.i 999 ;;
  714. }
  715. { .mfi
  716. (p0) setf.exp f9 = r32
  717. nop.f 999
  718. nop.i 999 ;;
  719. }
  720. { .mfi
  721. nop.m 999
  722. (p0) fma.s1 sinhf_FR_podd = sinhf_FR_podd_temp2, sinhf_FR_Rcub, sinhf_FR_R
  723. nop.i 999
  724. }
  725. // sinhf_GR_mj contains the table offset for -j
  726. // sinhf_GR_j contains the table offset for +j
  727. // p6 is true when j <= 0
  728. { .mlx
  729. (p0) setf.exp sinhf_FR_N_temp2 = r40
  730. (p0) movl r40 = 0x0000000000000020 ;;
  731. }
  732. { .mfi
  733. (p0) sub sinhf_GR_mJ = r40, r36
  734. (p0) fmerge.se sinhf_FR_spos = sinhf_FR_N_temp1, f1
  735. (p0) adds sinhf_GR_J = 0x20, r36 ;;
  736. }
  737. { .mii
  738. nop.m 999
  739. (p0) shl sinhf_GR_mJ = sinhf_GR_mJ, 5 ;;
  740. (p0) add sinhf_AD_mJ = r37, sinhf_GR_mJ ;;
  741. }
  742. { .mmi
  743. nop.m 999
  744. (p0) ldfe sinhf_FR_Tmjhi = [sinhf_AD_mJ],16
  745. (p0) shl sinhf_GR_J = sinhf_GR_J, 5 ;;
  746. }
  747. { .mfi
  748. (p0) ldfs sinhf_FR_Tmjlo = [sinhf_AD_mJ],16
  749. (p0) fcmp.lt.unc.s1 p0,p7 = sinhf_FR_X,f9
  750. (p0) add sinhf_AD_J = r37, sinhf_GR_J ;;
  751. }
  752. { .mmi
  753. (p0) ldfe sinhf_FR_Tjhi = [sinhf_AD_J],16 ;;
  754. (p0) ldfs sinhf_FR_Tjlo = [sinhf_AD_J],16
  755. nop.i 999 ;;
  756. }
  757. { .mfb
  758. nop.m 999
  759. (p0) fmerge.se sinhf_FR_sneg = sinhf_FR_N_temp2, f1
  760. (p7) br.cond.spnt SINH_BY_EXP ;;
  761. }
  762. { .mfi
  763. nop.m 999
  764. nop.f 999
  765. nop.i 999 ;;
  766. }
  767. // ******************************************************
  768. // If NOT branch to EXP
  769. // ******************************************************
  770. // Calculate S_hi and S_lo
  771. // sinhf_FR_S_hi_temp = sinhf_FR_sneg * sinhf_FR_Tmjhi
  772. // sinhf_FR_S_hi = sinhf_FR_spos * sinhf_FR_Tjhi - sinhf_FR_S_hi_temp
  773. // sinhf_FR_S_hi = sinhf_FR_spos * sinhf_FR_Tjhi - (sinhf_FR_sneg * sinhf_FR_Tmjlo)
  774. { .mfi
  775. nop.m 999
  776. (p0) fma.s1 sinhf_FR_S_hi_temp = sinhf_FR_sneg, sinhf_FR_Tmjhi, f0
  777. nop.i 999 ;;
  778. }
  779. { .mfi
  780. nop.m 999
  781. (p0) fms.s1 sinhf_FR_S_hi = sinhf_FR_spos, sinhf_FR_Tjhi, sinhf_FR_S_hi_temp
  782. nop.i 999
  783. }
  784. // Calculate C_hi
  785. // sinhf_FR_C_hi_temp1 = sinhf_FR_sneg * sinhf_FR_Tmjhi
  786. // sinhf_FR_C_hi = sinhf_FR_spos * sinhf_FR_Tjhi + sinhf_FR_C_hi_temp1
  787. { .mfi
  788. nop.m 999
  789. (p0) fma.s1 sinhf_FR_C_hi_temp1 = sinhf_FR_sneg, sinhf_FR_Tmjhi, f0
  790. nop.i 999 ;;
  791. }
  792. // sinhf_FR_S_lo_temp1 = sinhf_FR_spos * sinhf_FR_Tjhi - sinhf_FR_S_hi
  793. // sinhf_FR_S_lo_temp2 = -sinhf_FR_sneg * sinhf_FR_Tmjlo + (sinhf_FR_spos * sinhf_FR_Tjhi - sinhf_FR_S_hi)
  794. // sinhf_FR_S_lo_temp2 = -sinhf_FR_sneg * sinhf_FR_Tmjlo + (sinhf_FR_S_lo_temp1 )
  795. { .mfi
  796. nop.m 999
  797. (p0) fms.s1 sinhf_FR_S_lo_temp1 = sinhf_FR_spos, sinhf_FR_Tjhi, sinhf_FR_S_hi
  798. nop.i 999
  799. }
  800. { .mfi
  801. nop.m 999
  802. (p0) fma.s1 sinhf_FR_C_hi = sinhf_FR_spos, sinhf_FR_Tjhi, sinhf_FR_C_hi_temp1
  803. nop.i 999 ;;
  804. }
  805. { .mfi
  806. nop.m 999
  807. (p0) fnma.s1 sinhf_FR_S_lo_temp2 = sinhf_FR_sneg, sinhf_FR_Tmjhi, sinhf_FR_S_lo_temp1
  808. nop.i 999
  809. }
  810. // sinhf_FR_S_lo_temp1 = sinhf_FR_sneg * sinhf_FR_Tmjlo
  811. // sinhf_FR_S_lo_temp3 = sinhf_FR_spos * sinhf_FR_Tjlo - sinhf_FR_S_lo_temp1
  812. // sinhf_FR_S_lo_temp3 = sinhf_FR_spos * sinhf_FR_Tjlo -(sinhf_FR_sneg * sinhf_FR_Tmjlo)
  813. // sinhf_FR_S_lo = sinhf_FR_S_lo_temp3 + sinhf_FR_S_lo_temp2
  814. { .mfi
  815. nop.m 999
  816. (p0) fma.s1 sinhf_FR_S_lo_temp1 = sinhf_FR_sneg, sinhf_FR_Tmjlo, f0
  817. nop.i 999 ;;
  818. }
  819. { .mfi
  820. nop.m 999
  821. (p0) fma.s1 sinhf_FR_S_lo_temp3 = sinhf_FR_spos, sinhf_FR_Tjlo, sinhf_FR_S_lo_temp1
  822. nop.i 999 ;;
  823. }
  824. { .mfi
  825. nop.m 999
  826. (p0) fma.s1 sinhf_FR_S_lo = sinhf_FR_S_lo_temp3, f1, sinhf_FR_S_lo_temp2
  827. nop.i 999 ;;
  828. }
  829. // Y_hi = S_hi
  830. // Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
  831. // sinhf_FR_Y_lo_temp = sinhf_FR_S_hi * sinhf_FR_peven + sinhf_FR_S_lo
  832. // sinhf_FR_Y_lo = sinhf_FR_C_hi * sinhf_FR_podd + sinhf_FR_Y_lo_temp
  833. { .mfi
  834. nop.m 999
  835. (p0) fma.s1 sinhf_FR_Y_lo_temp = sinhf_FR_S_hi, sinhf_FR_peven, sinhf_FR_S_lo
  836. nop.i 999 ;;
  837. }
  838. { .mfi
  839. nop.m 999
  840. (p0) fma.s1 sinhf_FR_Y_lo = sinhf_FR_C_hi, sinhf_FR_podd, sinhf_FR_Y_lo_temp
  841. nop.i 999 ;;
  842. }
  843. // sinhf_FR_SINH = Y_hi + Y_lo
  844. // f8 = answer = sinhf_FR_SGNX * sinhf_FR_SINH
  845. { .mfi
  846. nop.m 999
  847. (p0) fma.s1 sinhf_FR_SINH = sinhf_FR_S_hi, f1, sinhf_FR_Y_lo
  848. nop.i 999 ;;
  849. }
  850. { .mfb
  851. nop.m 999
  852. (p0) fma.s.s0 f8 = sinhf_FR_SGNX, sinhf_FR_SINH,f0
  853. (p0) br.ret.sptk b0 ;;
  854. }
  855. SINH_BY_EXP:
  856. // When p7 is true, we know that an overflow is not going to happen
  857. // When p7 is false, we must check for possible overflow
  858. // p7 is the over_SAFE flag
  859. // Y_hi = Tjhi
  860. // Y_lo = Tjhi * (p_odd + p_even) +Tjlo
  861. // Scale = sign * 2^(N-1)
  862. // sinhf_FR_Y_lo = sinhf_FR_Tjhi * (sinhf_FR_peven + sinhf_FR_podd)
  863. // sinhf_FR_Y_lo = sinhf_FR_Tjhi * (sinhf_FR_Y_lo_temp )
  864. { .mfi
  865. nop.m 999
  866. (p0) fma.s1 sinhf_FR_Y_lo_temp = sinhf_FR_peven, f1, sinhf_FR_podd
  867. nop.i 999
  868. }
  869. // Now we are in EXP. This is the only path where an overflow is possible
  870. // but not for certain. So this is the only path where over_SAFE has any use.
  871. // r34 still has N-1
  872. // There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
  873. // There is a danger of double overflow if N-1 > 0x3fe = 1022
  874. // There is a danger of single overflow if N-1 > 0x7e = 126
  875. { .mlx
  876. nop.m 999
  877. (p0) movl r32 = 0x000000000000007e ;;
  878. }
  879. { .mfi
  880. (p0) cmp.gt.unc p0,p7 = r34, r32
  881. (p0) fmerge.s sinhf_FR_SCALE = sinhf_FR_SGNX, sinhf_FR_spos
  882. nop.i 999 ;;
  883. }
  884. { .mfi
  885. nop.m 999
  886. (p0) fma.s1 sinhf_FR_Y_lo = sinhf_FR_Tjhi, sinhf_FR_Y_lo_temp, sinhf_FR_Tjlo
  887. nop.i 999 ;;
  888. }
  889. // f8 = answer = scale * (Y_hi + Y_lo)
  890. { .mfi
  891. nop.m 999
  892. (p0) fma.s1 sinhf_FR_SINH_temp = sinhf_FR_Y_lo, f1, sinhf_FR_Tjhi
  893. nop.i 999 ;;
  894. }
  895. { .mfi
  896. nop.m 999
  897. (p0) fma.s.s0 f44 = sinhf_FR_SCALE, sinhf_FR_SINH_temp, f0
  898. nop.i 999 ;;
  899. }
  900. // If over_SAFE is set, return
  901. { .mfb
  902. nop.m 999
  903. (p7) fmerge.s f8 = f44,f44
  904. (p7) br.ret.sptk b0 ;;
  905. }
  906. // Else see if we overflowed
  907. // S0 user supplied status
  908. // S2 user supplied status + WRE + TD (Overflows)
  909. // If WRE is set then an overflow will not occur in EXP.
  910. // The input value that would cause a register (WRE) value to overflow is about 2^15
  911. // and this input would go into the HUGE path.
  912. // Answer with WRE is in f43.
  913. { .mfi
  914. nop.m 999
  915. (p0) fsetc.s2 0x7F,0x42
  916. nop.i 999;;
  917. }
  918. { .mfi
  919. nop.m 999
  920. (p0) fma.s.s2 f43 = sinhf_FR_SCALE, sinhf_FR_SINH_temp, f0
  921. nop.i 999 ;;
  922. }
  923. // 1 more that the exponent of the largest double (7FE) = 7FF
  924. // 7FF - 3FF = 400 (true); 400 + FFFF = 103FF (register-biased)
  925. // So 0 103FF 8000000000000000 is one ulp more than
  926. // largest double in register bias
  927. // 1 more that the exponent of the largest single (FE) = FF
  928. // FF - 7F = 80 (true); 80 + FFFF = 1007F (register-biased)
  929. // Now set p8 if the answer with WRE is greater than or equal this value
  930. // Also set p9 if the answer with WRE is less than or equal to negative this value
  931. { .mlx
  932. nop.m 999
  933. (p0) movl r32 = 0x0000000001007F ;;
  934. }
  935. { .mmf
  936. nop.m 999
  937. (p0) setf.exp f41 = r32
  938. (p0) fsetc.s2 0x7F,0x40 ;;
  939. }
  940. { .mfi
  941. nop.m 999
  942. (p0) fcmp.ge.unc.s1 p8, p0 = f43, f41
  943. nop.i 999
  944. }
  945. { .mfi
  946. nop.m 999
  947. (p0) fmerge.ns f42 = f41, f41
  948. nop.i 999 ;;
  949. }
  950. // The error tag for overflow is 128
  951. { .mii
  952. nop.m 999
  953. nop.i 999 ;;
  954. (p8) mov r47 = 128 ;;
  955. }
  956. { .mfb
  957. nop.m 999
  958. (p0) fcmp.le.unc.s1 p9, p0 = f43, f42
  959. (p8) br.cond.spnt __libm_error_region ;;
  960. }
  961. { .mii
  962. nop.m 999
  963. nop.i 999 ;;
  964. (p9) mov r47 = 128
  965. }
  966. { .mib
  967. nop.m 999
  968. nop.i 999
  969. (p9) br.cond.spnt __libm_error_region ;;
  970. }
  971. { .mfb
  972. nop.m 999
  973. (p0) fmerge.s f8 = f44,f44
  974. (p0) br.ret.sptk b0 ;;
  975. }
  976. SINH_HUGE:
  977. // for SINH_HUGE, put 24000 in exponent; take sign from input; add 1
  978. // SAFE: SAFE is always 0 for HUGE
  979. { .mlx
  980. nop.m 999
  981. (p0) movl r32 = 0x0000000000015dbf ;;
  982. }
  983. { .mfi
  984. (p0) setf.exp f9 = r32
  985. nop.f 999
  986. nop.i 999 ;;
  987. }
  988. { .mfi
  989. nop.m 999
  990. (p0) fma.s1 sinhf_FR_signed_hi_lo = sinhf_FR_SGNX, f9, f1
  991. nop.i 999 ;;
  992. }
  993. { .mfi
  994. nop.m 999
  995. (p0) fma.s.s0 f44 = sinhf_FR_signed_hi_lo, f9, f0
  996. (p0) mov r47 = 128
  997. }
  998. .endp sinhf
  999. .proc __libm_error_region
  1000. __libm_error_region:
  1001. .prologue
  1002. { .mii
  1003. add GR_Parameter_Y=-32,sp // Parameter 2 value
  1004. nop.i 0
  1005. .save ar.pfs,GR_SAVE_PFS
  1006. mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
  1007. }
  1008. { .mfi
  1009. .fframe 64
  1010. add sp=-64,sp // Create new stack
  1011. nop.f 0
  1012. mov GR_SAVE_GP=gp // Save gp
  1013. };;
  1014. { .mmi
  1015. stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
  1016. add GR_Parameter_X = 16,sp // Parameter 1 address
  1017. .save b0, GR_SAVE_B0
  1018. mov GR_SAVE_B0=b0 // Save b0
  1019. };;
  1020. .body
  1021. { .mib
  1022. stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
  1023. add GR_Parameter_RESULT = 0,GR_Parameter_Y
  1024. nop.b 0 // Parameter 3 address
  1025. }
  1026. { .mib
  1027. stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
  1028. add GR_Parameter_Y = -16,GR_Parameter_Y
  1029. br.call.sptk b0=__libm_error_support# // Call error handling function
  1030. };;
  1031. { .mmi
  1032. nop.m 0
  1033. nop.m 0
  1034. add GR_Parameter_RESULT = 48,sp
  1035. };;
  1036. { .mmi
  1037. ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
  1038. .restore
  1039. add sp = 64,sp // Restore stack pointer
  1040. mov b0 = GR_SAVE_B0 // Restore return address
  1041. };;
  1042. { .mib
  1043. mov gp = GR_SAVE_GP // Restore gp
  1044. mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
  1045. br.ret.sptk b0 // Return
  1046. };;
  1047. .endp __libm_error_region
  1048. .type __libm_error_support#,@function
  1049. .global __libm_error_support#