Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1254 lines
36 KiB

  1. .file "sinh.s"
  2. // Copyright (c) 2000, Intel Corporation
  3. // All rights reserved.
  4. //
  5. // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
  6. // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
  7. //
  8. // WARRANTY DISCLAIMER
  9. //
  10. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  11. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  12. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  13. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  14. // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  15. // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  16. // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  17. // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  18. // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  19. // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  20. // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  21. //
  22. // Intel Corporation is the author of this code, and requests that all
  23. // problem reports or change requests be submitted to it directly at
  24. // http://developer.intel.com/opensource.
  25. //
  26. // History
  27. //==============================================================
  28. // 2/02/00 Initial version
  29. // 4/04/00 Unwind support added
  30. // 8/15/00 Bundle added after call to __libm_error_support to properly
  31. // set [the previously overwritten] GR_Parameter_RESULT.
  32. // 10/12/00 Update to set denormal operand and underflow flags
  33. //
  34. // API
  35. //==============================================================
  36. // double = sinh(double)
  37. // input floating point f8
  38. // output floating point f8
  39. //
  40. // Registers used
  41. //==============================================================
  42. // general registers:
  43. // r32 -> r47
  44. // predicate registers used:
  45. // p6 p7 p8 p9
  46. // floating-point registers used:
  47. // f9 -> f15; f32 -> f44;
  48. // f8 has input, then output
  49. //
  50. // Overview of operation
  51. //==============================================================
  52. // There are four paths
  53. // 1. |x| < 0.25 SINH_BY_POLY
  54. // 2. |x| < 32 SINH_BY_TBL
  55. // 3. |x| < 2^14 SINH_BY_EXP
  56. // 4. |x_ >= 2^14 SINH_HUGE
  57. //
  58. // For double extended we get infinity for x >= 400c b174 ddc0 31ae c0ea
  59. // >= 1.0110001.... x 2^13
  60. // >= 11357.2166
  61. //
  62. // But for double we get infinity for x >= 408633ce8fb9f87e
  63. // >= 1.0110...x 2^9
  64. // >= +7.10476e+002
  65. //
  66. // And for single we get infinity for x >= 42b3a496
  67. // >= 1.0110... 2^6
  68. // >= 89.8215
  69. //
  70. // SAFE: If there is danger of overflow set SAFE to 0
  71. // NOT implemented: if there is danger of underflow, set SAFE to 0
  72. // SAFE for all paths listed below
  73. //
  74. // 1. SINH_BY_POLY
  75. // ===============
  76. // If |x| is less than the tiny threshold, then clear SAFE
  77. // For double, the tiny threshold is -1022 = -0x3fe => -3fe + ffff = fc01
  78. // register-biased, this is fc01
  79. // For single, the tiny threshold is -126 = -7e => -7e + ffff = ff81
  80. // If |x| < tiny threshold, set SAFE = 0
  81. //
  82. // 2. SINH_BY_TBL
  83. // =============
  84. // SAFE: SAFE is always 1 for TBL;
  85. //
  86. // 3. SINH_BY_EXP
  87. // ==============
  88. // There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
  89. // r34 has N-1; 16382 is in register biased form, 0x13ffd
  90. // There is danger of double overflow if N-1 > 0x3fe
  91. // in register biased form, 0x103fd
  92. // Analagously, there is danger of single overflow if N-1 > 0x7e
  93. // in register biased form, 0x1007d
  94. // SAFE: If there is danger of overflow set SAFE to 0
  95. //
  96. // 4. SINH_HUGE
  97. // ============
  98. // SAFE: SAFE is always 0 for HUGE
  99. //
  100. // Assembly macros
  101. //==============================================================
  102. sinh_FR_X = f44
  103. sinh_FR_X2 = f9
  104. sinh_FR_X4 = f10
  105. sinh_FR_SGNX = f40
  106. sinh_FR_Inv_log2by64 = f9
  107. sinh_FR_log2by64_lo = f11
  108. sinh_FR_log2by64_hi = f10
  109. sinh_FR_A1 = f9
  110. sinh_FR_A2 = f10
  111. sinh_FR_A3 = f11
  112. sinh_FR_Rcub = f12
  113. sinh_FR_M_temp = f13
  114. sinh_FR_R_temp = f13
  115. sinh_FR_Rsq = f13
  116. sinh_FR_R = f14
  117. sinh_FR_M = f38
  118. sinh_FR_B1 = f15
  119. sinh_FR_B2 = f32
  120. sinh_FR_B3 = f33
  121. sinh_FR_peven_temp1 = f34
  122. sinh_FR_peven_temp2 = f35
  123. sinh_FR_peven = f36
  124. sinh_FR_podd_temp1 = f34
  125. sinh_FR_podd_temp2 = f35
  126. sinh_FR_podd = f37
  127. sinh_FR_poly_podd_temp1 = f11
  128. sinh_FR_poly_podd_temp2 = f13
  129. sinh_FR_poly_peven_temp1 = f11
  130. sinh_FR_poly_peven_temp2 = f13
  131. sinh_FR_J_temp = f9
  132. sinh_FR_J = f10
  133. sinh_FR_Mmj = f39
  134. sinh_FR_N_temp1 = f11
  135. sinh_FR_N_temp2 = f12
  136. sinh_FR_N = f13
  137. sinh_FR_spos = f14
  138. sinh_FR_sneg = f15
  139. sinh_FR_Tjhi = f32
  140. sinh_FR_Tjlo = f33
  141. sinh_FR_Tmjhi = f34
  142. sinh_FR_Tmjlo = f35
  143. sinh_GR_mJ = r35
  144. sinh_GR_J = r36
  145. sinh_AD_mJ = r38
  146. sinh_AD_J = r39
  147. sinh_FR_S_hi = f9
  148. sinh_FR_S_hi_temp = f10
  149. sinh_FR_S_lo_temp1 = f11
  150. sinh_FR_S_lo_temp2 = f12
  151. sinh_FR_S_lo_temp3 = f13
  152. sinh_FR_S_lo = f38
  153. sinh_FR_C_hi = f39
  154. sinh_FR_C_hi_temp1 = f10
  155. sinh_FR_Y_hi = f11
  156. sinh_FR_Y_lo_temp = f12
  157. sinh_FR_Y_lo = f13
  158. sinh_FR_SINH = f9
  159. sinh_FR_P1 = f14
  160. sinh_FR_P2 = f15
  161. sinh_FR_P3 = f32
  162. sinh_FR_P4 = f33
  163. sinh_FR_P5 = f34
  164. sinh_FR_P6 = f35
  165. sinh_FR_TINY_THRESH = f9
  166. sinh_FR_SINH_temp = f10
  167. sinh_FR_SCALE = f11
  168. sinh_FR_signed_hi_lo = f10
  169. GR_SAVE_PFS = r41
  170. GR_SAVE_B0 = r42
  171. GR_SAVE_GP = r43
  172. GR_Parameter_X = r44
  173. GR_Parameter_Y = r45
  174. GR_Parameter_RESULT = r46
  175. // Data tables
  176. //==============================================================
  177. .data
  178. .align 16
  179. double_sinh_arg_reduction:
  180. data8 0xB8AA3B295C17F0BC, 0x00004005
  181. data8 0xB17217F7D1000000, 0x00003FF8
  182. data8 0xCF79ABC9E3B39804, 0x00003FD0
  183. double_sinh_p_table:
  184. data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC
  185. data8 0x8888888888888412, 0x00003FF8
  186. data8 0xD00D00D00D4D39F2, 0x00003FF2
  187. data8 0xB8EF1D28926D8891, 0x00003FEC
  188. data8 0xD732377688025BE9, 0x00003FE5
  189. data8 0xB08AF9AE78C1239F, 0x00003FDE
  190. double_sinh_ab_table:
  191. data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
  192. data8 0x88888888884ECDD5, 0x00003FF8
  193. data8 0xD00D0C6DCC26A86B, 0x00003FF2
  194. data8 0x8000000000000002, 0x00003FFE
  195. data8 0xAAAAAAAAAA402C77, 0x00003FFA
  196. data8 0xB60B6CC96BDB144D, 0x00003FF5
  197. double_sinh_j_table:
  198. data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
  199. data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
  200. data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
  201. data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
  202. data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
  203. data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
  204. data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
  205. data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
  206. data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
  207. data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
  208. data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
  209. data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
  210. data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
  211. data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
  212. data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
  213. data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
  214. data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
  215. data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
  216. data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
  217. data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
  218. data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
  219. data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
  220. data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
  221. data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
  222. data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
  223. data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
  224. data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
  225. data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
  226. data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
  227. data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
  228. data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
  229. data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
  230. data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
  231. data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
  232. data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
  233. data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
  234. data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
  235. data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
  236. data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
  237. data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
  238. data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
  239. data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
  240. data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
  241. data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
  242. data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
  243. data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
  244. data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
  245. data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
  246. data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
  247. data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
  248. data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
  249. data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
  250. data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
  251. data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
  252. data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
  253. data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
  254. data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
  255. data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
  256. data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
  257. data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
  258. data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
  259. data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
  260. data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
  261. data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
  262. data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
  263. .align 32
  264. .global sinh#
  265. .section .text
  266. .proc sinh#
  267. .align 32
  268. sinh:
  269. // X infinity or NAN?
  270. // Take invalid fault if enabled
  271. { .mfi
  272. alloc r32 = ar.pfs,0,12,4,0
  273. (p0) fclass.m.unc p6,p0 = f8, 0xe3 //@qnan | @snan | @inf
  274. nop.i 999
  275. }
  276. ;;
  277. { .mfb
  278. nop.m 999
  279. (p6) fma.d.s0 f8 = f8,f1,f8
  280. (p6) br.ret.spnt b0 ;;
  281. }
  282. // Put 0.25 in f9; p6 true if x < 0.25
  283. { .mlx
  284. nop.m 999
  285. (p0) movl r32 = 0x000000000000fffd ;;
  286. }
  287. { .mfi
  288. (p0) setf.exp f9 = r32
  289. nop.f 999
  290. nop.i 999 ;;
  291. }
  292. { .mfi
  293. nop.m 999
  294. (p0) fmerge.s sinh_FR_X = f0,f8
  295. nop.i 999
  296. }
  297. // Identify denormal operands.
  298. { .mfi
  299. nop.m 999
  300. fclass.m.unc p10,p0 = f8, 0x09 // + denorm
  301. nop.i 999
  302. };;
  303. { .mfi
  304. nop.m 999
  305. fclass.m.unc p11,p0 = f8, 0x0a // - denorm
  306. nop.i 999
  307. }
  308. { .mfi
  309. nop.m 999
  310. (p0) fmerge.s sinh_FR_SGNX = f8,f1
  311. nop.i 999 ;;
  312. }
  313. { .mfi
  314. nop.m 999
  315. (p0) fcmp.lt.unc.s1 p0,p7 = sinh_FR_X,f9
  316. nop.i 999 ;;
  317. }
  318. { .mib
  319. nop.m 999
  320. nop.i 999
  321. (p7) br.cond.sptk SINH_BY_TBL ;;
  322. }
  323. SINH_BY_POLY:
  324. // POLY cannot overflow so there is no need to call __libm_error_support
  325. // Set tiny_SAFE (p7) to 1(0) if answer is not tiny
  326. // Currently we do not use tiny_SAFE. So the setting of tiny_SAFE is
  327. // commented out.
  328. //(p0) movl r32 = 0x000000000000fc01
  329. //(p0) setf.exp f10 = r32
  330. //(p0) fcmp.lt.unc.s1 p6,p7 = f8,f10
  331. // Here is essentially the algorithm for SINH_BY_POLY. Care is take for the order
  332. // of multiplication; and P_1 is not exactly 1/3!, P_2 is not exactly 1/5!, etc.
  333. // Note that ax = |x|
  334. // sinh(x) = sign * (series(e^x) - series(e^-x))/2
  335. // = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11! + ax^13/13!)
  336. // = sign * (ax + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
  337. // + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ) )
  338. // = sign * (ax + ax*p_odd + (ax*p_even))
  339. // = sign * (ax + Y_lo)
  340. // sinh(x) = sign * (Y_hi + Y_lo)
  341. // Get the values of P_x from the table
  342. { .mfb
  343. (p0) addl r34 = @ltoff(double_sinh_p_table), gp
  344. (p10) fma.d.s0 f8 = f8,f8,f8
  345. (p10) br.ret.spnt b0
  346. }
  347. ;;
  348. { .mfb
  349. ld8 r34 = [r34]
  350. (p11) fnma.d.s0 f8 = f8,f8,f8
  351. (p11) br.ret.spnt b0
  352. }
  353. ;;
  354. // Calculate sinh_FR_X2 = ax*ax and sinh_FR_X4 = ax*ax*ax*ax
  355. { .mmf
  356. nop.m 999
  357. (p0) ldfe sinh_FR_P1 = [r34],16
  358. (p0) fma.s1 sinh_FR_X2 = sinh_FR_X, sinh_FR_X, f0 ;;
  359. }
  360. { .mmi
  361. (p0) ldfe sinh_FR_P2 = [r34],16 ;;
  362. (p0) ldfe sinh_FR_P3 = [r34],16
  363. nop.i 999 ;;
  364. }
  365. { .mmi
  366. (p0) ldfe sinh_FR_P4 = [r34],16 ;;
  367. (p0) ldfe sinh_FR_P5 = [r34],16
  368. nop.i 999 ;;
  369. }
  370. { .mfi
  371. (p0) ldfe sinh_FR_P6 = [r34],16
  372. (p0) fma.s1 sinh_FR_X4 = sinh_FR_X2, sinh_FR_X2, f0
  373. nop.i 999 ;;
  374. }
  375. // Calculate sinh_FR_podd = p_odd and sinh_FR_peven = p_even
  376. { .mfi
  377. nop.m 999
  378. (p0) fma.s1 sinh_FR_poly_podd_temp1 = sinh_FR_X4, sinh_FR_P5, sinh_FR_P3
  379. nop.i 999 ;;
  380. }
  381. { .mfi
  382. nop.m 999
  383. (p0) fma.s1 sinh_FR_poly_podd_temp2 = sinh_FR_X4, sinh_FR_poly_podd_temp1, sinh_FR_P1
  384. nop.i 999
  385. }
  386. { .mfi
  387. nop.m 999
  388. (p0) fma.s1 sinh_FR_poly_peven_temp1 = sinh_FR_X4, sinh_FR_P6, sinh_FR_P4
  389. nop.i 999 ;;
  390. }
  391. { .mfi
  392. nop.m 999
  393. (p0) fma.s1 sinh_FR_podd = sinh_FR_X2, sinh_FR_poly_podd_temp2, f0
  394. nop.i 999
  395. }
  396. { .mfi
  397. nop.m 999
  398. (p0) fma.s1 sinh_FR_poly_peven_temp2 = sinh_FR_X4, sinh_FR_poly_peven_temp1, sinh_FR_P2
  399. nop.i 999 ;;
  400. }
  401. { .mfi
  402. nop.m 999
  403. (p0) fma.s1 sinh_FR_peven = sinh_FR_X4, sinh_FR_poly_peven_temp2, f0
  404. nop.i 999 ;;
  405. }
  406. // Calculate sinh_FR_Y_lo = ax*p_odd + (ax*p_even)
  407. { .mfi
  408. nop.m 999
  409. (p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_X, sinh_FR_peven, f0
  410. nop.i 999 ;;
  411. }
  412. { .mfi
  413. nop.m 999
  414. (p0) fma.s1 sinh_FR_Y_lo = sinh_FR_X, sinh_FR_podd, sinh_FR_Y_lo_temp
  415. nop.i 999 ;;
  416. }
  417. // Calculate sinh_FR_SINH = Y_hi + Y_lo. Note that ax = Y_hi
  418. { .mfi
  419. nop.m 999
  420. (p0) fma.s1 sinh_FR_SINH = sinh_FR_X, f1, sinh_FR_Y_lo
  421. nop.i 999 ;;
  422. }
  423. // Calculate f8 = sign * (Y_hi + Y_lo)
  424. // Go to return
  425. { .mfb
  426. nop.m 999
  427. (p0) fma.d.s0 f8 = sinh_FR_SGNX,sinh_FR_SINH,f0
  428. (p0) br.ret.sptk b0 ;;
  429. }
  430. SINH_BY_TBL:
  431. // Now that we are at TBL; so far all we know is that |x| >= 0.25.
  432. // The first two steps are the same for TBL and EXP, but if we are HUGE
  433. // we want to leave now.
  434. // Double-extended:
  435. // Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
  436. // Double
  437. // Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
  438. // Single
  439. // Go to HUGE if |x| >= 2^7, 10006 (register-biased) is e = 7 (true)
  440. { .mlx
  441. nop.m 999
  442. (p0) movl r32 = 0x0000000000010009 ;;
  443. }
  444. { .mfi
  445. (p0) setf.exp f9 = r32
  446. nop.f 999
  447. nop.i 999 ;;
  448. }
  449. { .mfi
  450. nop.m 999
  451. (p0) fcmp.ge.unc.s1 p6,p7 = sinh_FR_X,f9
  452. nop.i 999 ;;
  453. }
  454. { .mib
  455. nop.m 999
  456. nop.i 999
  457. (p6) br.cond.spnt SINH_HUGE ;;
  458. }
  459. // r32 = 1
  460. // r34 = N-1
  461. // r35 = N
  462. // r36 = j
  463. // r37 = N+1
  464. // TBL can never overflow
  465. // sinh(x) = sinh(B+R)
  466. // = sinh(B)cosh(R) + cosh(B)sinh(R)
  467. //
  468. // ax = |x| = M*log2/64 + R
  469. // B = M*log2/64
  470. // M = 64*N + j
  471. // We will calcualte M and get N as (M-j)/64
  472. // The division is a shift.
  473. // exp(B) = exp(N*log2 + j*log2/64)
  474. // = 2^N * 2^(j*log2/64)
  475. // sinh(B) = 1/2(e^B -e^-B)
  476. // = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64))
  477. // sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
  478. // cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
  479. // 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
  480. // Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
  481. // R = ax - M*log2/64
  482. // R = ax - M*log2_by_64_hi - M*log2_by_64_lo
  483. // exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
  484. // = 1 + p_odd + p_even
  485. // where the p_even uses the A coefficients and the p_even uses the B coefficients
  486. // So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
  487. // cosh(R) = 1 + p_even
  488. // sinh(B) = S_hi + S_lo
  489. // cosh(B) = C_hi
  490. // sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
  491. // ******************************************************
  492. // STEP 1 (TBL and EXP)
  493. // ******************************************************
  494. // Get the following constants.
  495. // f9 = Inv_log2by64
  496. // f10 = log2by64_hi
  497. // f11 = log2by64_lo
  498. { .mmi
  499. (p0) adds r32 = 0x1,r0
  500. (p0) addl r34 = @ltoff(double_sinh_arg_reduction), gp
  501. nop.i 999
  502. }
  503. ;;
  504. { .mmi
  505. ld8 r34 = [r34]
  506. nop.m 999
  507. nop.i 999
  508. }
  509. ;;
  510. // We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
  511. // put them in an exponent.
  512. // sinh_FR_spos = 2^(N-1) and sinh_FR_sneg = 2^(-N-1)
  513. // r39 = 0xffff + (N-1) = 0xffff +N -1
  514. // r40 = 0xffff - (N +1) = 0xffff -N -1
  515. { .mlx
  516. nop.m 999
  517. (p0) movl r38 = 0x000000000000fffe ;;
  518. }
  519. { .mmi
  520. (p0) ldfe sinh_FR_Inv_log2by64 = [r34],16 ;;
  521. (p0) ldfe sinh_FR_log2by64_hi = [r34],16
  522. nop.i 999 ;;
  523. }
  524. { .mbb
  525. (p0) ldfe sinh_FR_log2by64_lo = [r34],16
  526. nop.b 999
  527. nop.b 999 ;;
  528. }
  529. // Get the A coefficients
  530. // f9 = A_1
  531. // f10 = A_2
  532. // f11 = A_3
  533. { .mmi
  534. nop.m 999
  535. (p0) addl r34 = @ltoff(double_sinh_ab_table), gp
  536. nop.i 999
  537. }
  538. ;;
  539. { .mmi
  540. ld8 r34 = [r34]
  541. nop.m 999
  542. nop.i 999
  543. }
  544. ;;
  545. // Calculate M and keep it as integer and floating point.
  546. // f38 = M = round-to-integer(x*Inv_log2by64)
  547. // sinh_FR_M = M = truncate(ax/(log2/64))
  548. // Put the significand of M in r35
  549. // and the floating point representation of M in sinh_FR_M
  550. { .mfi
  551. nop.m 999
  552. (p0) fma.s1 sinh_FR_M = sinh_FR_X, sinh_FR_Inv_log2by64, f0
  553. nop.i 999
  554. }
  555. { .mfi
  556. (p0) ldfe sinh_FR_A1 = [r34],16
  557. nop.f 999
  558. nop.i 999 ;;
  559. }
  560. { .mfi
  561. nop.m 999
  562. (p0) fcvt.fx.s1 sinh_FR_M_temp = sinh_FR_M
  563. nop.i 999 ;;
  564. }
  565. { .mfi
  566. nop.m 999
  567. (p0) fnorm.s1 sinh_FR_M = sinh_FR_M_temp
  568. nop.i 999 ;;
  569. }
  570. { .mfi
  571. (p0) getf.sig r35 = sinh_FR_M_temp
  572. nop.f 999
  573. nop.i 999 ;;
  574. }
  575. // M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
  576. // has a range of -32 thru 31.
  577. // r35 = M
  578. // r36 = j
  579. { .mii
  580. nop.m 999
  581. nop.i 999 ;;
  582. (p0) and r36 = 0x3f, r35 ;;
  583. }
  584. // Calculate R
  585. // f13 = f44 - f12*f10 = ax - M*log2by64_hi
  586. // f14 = f13 - f8*f11 = R = (ax - M*log2by64_hi) - M*log2by64_lo
  587. { .mfi
  588. nop.m 999
  589. (p0) fnma.s1 sinh_FR_R_temp = sinh_FR_M, sinh_FR_log2by64_hi, sinh_FR_X
  590. nop.i 999
  591. }
  592. { .mfi
  593. (p0) ldfe sinh_FR_A2 = [r34],16
  594. nop.f 999
  595. nop.i 999 ;;
  596. }
  597. { .mfi
  598. nop.m 999
  599. (p0) fnma.s1 sinh_FR_R = sinh_FR_M, sinh_FR_log2by64_lo, sinh_FR_R_temp
  600. nop.i 999
  601. }
  602. // Get the B coefficients
  603. // f15 = B_1
  604. // f32 = B_2
  605. // f33 = B_3
  606. { .mmi
  607. (p0) ldfe sinh_FR_A3 = [r34],16 ;;
  608. (p0) ldfe sinh_FR_B1 = [r34],16
  609. nop.i 999 ;;
  610. }
  611. { .mmi
  612. (p0) ldfe sinh_FR_B2 = [r34],16 ;;
  613. (p0) ldfe sinh_FR_B3 = [r34],16
  614. nop.i 999 ;;
  615. }
  616. { .mii
  617. nop.m 999
  618. (p0) shl r34 = r36, 0x2 ;;
  619. (p0) sxt1 r37 = r34 ;;
  620. }
  621. // ******************************************************
  622. // STEP 2 (TBL and EXP)
  623. // ******************************************************
  624. // Calculate Rsquared and Rcubed in preparation for p_even and p_odd
  625. // f12 = R*R*R
  626. // f13 = R*R
  627. // f14 = R <== from above
  628. { .mfi
  629. nop.m 999
  630. (p0) fma.s1 sinh_FR_Rsq = sinh_FR_R, sinh_FR_R, f0
  631. (p0) shr r36 = r37, 0x2 ;;
  632. }
  633. // r34 = M-j = r35 - r36
  634. // r35 = N = (M-j)/64
  635. { .mii
  636. (p0) sub r34 = r35, r36
  637. nop.i 999 ;;
  638. (p0) shr r35 = r34, 0x6 ;;
  639. }
  640. { .mii
  641. (p0) sub r40 = r38, r35
  642. (p0) adds r37 = 0x1, r35
  643. (p0) add r39 = r38, r35 ;;
  644. }
  645. // Get the address of the J table, add the offset,
  646. // addresses are sinh_AD_mJ and sinh_AD_J, get the T value
  647. // f32 = T(j)_hi
  648. // f33 = T(j)_lo
  649. // f34 = T(-j)_hi
  650. // f35 = T(-j)_lo
  651. { .mmi
  652. (p0) sub r34 = r35, r32
  653. (p0) addl r37 = @ltoff(double_sinh_j_table), gp
  654. nop.i 999
  655. }
  656. ;;
  657. { .mmi
  658. ld8 r37 = [r37]
  659. nop.m 999
  660. nop.i 999
  661. }
  662. ;;
  663. { .mfi
  664. nop.m 999
  665. (p0) fma.s1 sinh_FR_Rcub = sinh_FR_Rsq, sinh_FR_R, f0
  666. nop.i 999
  667. }
  668. // ******************************************************
  669. // STEP 3 Now decide if we need to branch to EXP
  670. // ******************************************************
  671. // Put 32 in f9; p6 true if x < 32
  672. // Go to EXP if |x| >= 32
  673. { .mlx
  674. nop.m 999
  675. (p0) movl r32 = 0x0000000000010004 ;;
  676. }
  677. // Calculate p_even
  678. // f34 = B_2 + Rsq *B_3
  679. // f35 = B_1 + Rsq*f34 = B_1 + Rsq * (B_2 + Rsq *B_3)
  680. // f36 = p_even = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
  681. { .mfi
  682. nop.m 999
  683. (p0) fma.s1 sinh_FR_peven_temp1 = sinh_FR_Rsq, sinh_FR_B3, sinh_FR_B2
  684. nop.i 999 ;;
  685. }
  686. { .mfi
  687. nop.m 999
  688. (p0) fma.s1 sinh_FR_peven_temp2 = sinh_FR_Rsq, sinh_FR_peven_temp1, sinh_FR_B1
  689. nop.i 999
  690. }
  691. // Calculate p_odd
  692. // f34 = A_2 + Rsq *A_3
  693. // f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
  694. // f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
  695. { .mfi
  696. nop.m 999
  697. (p0) fma.s1 sinh_FR_podd_temp1 = sinh_FR_Rsq, sinh_FR_A3, sinh_FR_A2
  698. nop.i 999 ;;
  699. }
  700. { .mfi
  701. (p0) setf.exp sinh_FR_N_temp1 = r39
  702. nop.f 999
  703. nop.i 999 ;;
  704. }
  705. { .mfi
  706. nop.m 999
  707. (p0) fma.s1 sinh_FR_peven = sinh_FR_Rsq, sinh_FR_peven_temp2, f0
  708. nop.i 999
  709. }
  710. { .mfi
  711. nop.m 999
  712. (p0) fma.s1 sinh_FR_podd_temp2 = sinh_FR_Rsq, sinh_FR_podd_temp1, sinh_FR_A1
  713. nop.i 999 ;;
  714. }
  715. { .mfi
  716. (p0) setf.exp f9 = r32
  717. nop.f 999
  718. nop.i 999 ;;
  719. }
  720. { .mfi
  721. nop.m 999
  722. (p0) fma.s1 sinh_FR_podd = sinh_FR_podd_temp2, sinh_FR_Rcub, sinh_FR_R
  723. nop.i 999
  724. }
  725. // sinh_GR_mj contains the table offset for -j
  726. // sinh_GR_j contains the table offset for +j
  727. // p6 is true when j <= 0
  728. { .mlx
  729. (p0) setf.exp sinh_FR_N_temp2 = r40
  730. (p0) movl r40 = 0x0000000000000020 ;;
  731. }
  732. { .mfi
  733. (p0) sub sinh_GR_mJ = r40, r36
  734. (p0) fmerge.se sinh_FR_spos = sinh_FR_N_temp1, f1
  735. (p0) adds sinh_GR_J = 0x20, r36 ;;
  736. }
  737. { .mii
  738. nop.m 999
  739. (p0) shl sinh_GR_mJ = sinh_GR_mJ, 5 ;;
  740. (p0) add sinh_AD_mJ = r37, sinh_GR_mJ ;;
  741. }
  742. { .mmi
  743. nop.m 999
  744. (p0) ldfe sinh_FR_Tmjhi = [sinh_AD_mJ],16
  745. (p0) shl sinh_GR_J = sinh_GR_J, 5 ;;
  746. }
  747. { .mfi
  748. (p0) ldfs sinh_FR_Tmjlo = [sinh_AD_mJ],16
  749. (p0) fcmp.lt.unc.s1 p0,p7 = sinh_FR_X,f9
  750. (p0) add sinh_AD_J = r37, sinh_GR_J ;;
  751. }
  752. { .mmi
  753. (p0) ldfe sinh_FR_Tjhi = [sinh_AD_J],16 ;;
  754. (p0) ldfs sinh_FR_Tjlo = [sinh_AD_J],16
  755. nop.i 999 ;;
  756. }
  757. { .mfb
  758. nop.m 999
  759. (p0) fmerge.se sinh_FR_sneg = sinh_FR_N_temp2, f1
  760. (p7) br.cond.spnt SINH_BY_EXP ;;
  761. }
  762. { .mfi
  763. nop.m 999
  764. nop.f 999
  765. nop.i 999 ;;
  766. }
  767. // ******************************************************
  768. // If NOT branch to EXP
  769. // ******************************************************
  770. // Calculate S_hi and S_lo
  771. // sinh_FR_S_hi_temp = sinh_FR_sneg * sinh_FR_Tmjhi
  772. // sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi_temp
  773. // sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - (sinh_FR_sneg * sinh_FR_Tmjlo)
  774. { .mfi
  775. nop.m 999
  776. (p0) fma.s1 sinh_FR_S_hi_temp = sinh_FR_sneg, sinh_FR_Tmjhi, f0
  777. nop.i 999 ;;
  778. }
  779. { .mfi
  780. nop.m 999
  781. (p0) fms.s1 sinh_FR_S_hi = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_S_hi_temp
  782. nop.i 999
  783. }
  784. // Calculate C_hi
  785. // sinh_FR_C_hi_temp1 = sinh_FR_sneg * sinh_FR_Tmjhi
  786. // sinh_FR_C_hi = sinh_FR_spos * sinh_FR_Tjhi + sinh_FR_C_hi_temp1
  787. { .mfi
  788. nop.m 999
  789. (p0) fma.s1 sinh_FR_C_hi_temp1 = sinh_FR_sneg, sinh_FR_Tmjhi, f0
  790. nop.i 999 ;;
  791. }
  792. // sinh_FR_S_lo_temp1 = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi
  793. // sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi)
  794. // sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_S_lo_temp1 )
  795. { .mfi
  796. nop.m 999
  797. (p0) fms.s1 sinh_FR_S_lo_temp1 = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_S_hi
  798. nop.i 999
  799. }
  800. { .mfi
  801. nop.m 999
  802. (p0) fma.s1 sinh_FR_C_hi = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_C_hi_temp1
  803. nop.i 999 ;;
  804. }
  805. { .mfi
  806. nop.m 999
  807. (p0) fnma.s1 sinh_FR_S_lo_temp2 = sinh_FR_sneg, sinh_FR_Tmjhi, sinh_FR_S_lo_temp1
  808. nop.i 999
  809. }
  810. // sinh_FR_S_lo_temp1 = sinh_FR_sneg * sinh_FR_Tmjlo
  811. // sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo - sinh_FR_S_lo_temp1
  812. // sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo -(sinh_FR_sneg * sinh_FR_Tmjlo)
  813. // sinh_FR_S_lo = sinh_FR_S_lo_temp3 + sinh_FR_S_lo_temp2
  814. { .mfi
  815. nop.m 999
  816. (p0) fma.s1 sinh_FR_S_lo_temp1 = sinh_FR_sneg, sinh_FR_Tmjlo, f0
  817. nop.i 999 ;;
  818. }
  819. { .mfi
  820. nop.m 999
  821. (p0) fma.s1 sinh_FR_S_lo_temp3 = sinh_FR_spos, sinh_FR_Tjlo, sinh_FR_S_lo_temp1
  822. nop.i 999 ;;
  823. }
  824. { .mfi
  825. nop.m 999
  826. (p0) fma.s1 sinh_FR_S_lo = sinh_FR_S_lo_temp3, f1, sinh_FR_S_lo_temp2
  827. nop.i 999 ;;
  828. }
  829. // Y_hi = S_hi
  830. // Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
  831. // sinh_FR_Y_lo_temp = sinh_FR_S_hi * sinh_FR_peven + sinh_FR_S_lo
  832. // sinh_FR_Y_lo = sinh_FR_C_hi * sinh_FR_podd + sinh_FR_Y_lo_temp
  833. { .mfi
  834. nop.m 999
  835. (p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_S_hi, sinh_FR_peven, sinh_FR_S_lo
  836. nop.i 999 ;;
  837. }
  838. { .mfi
  839. nop.m 999
  840. (p0) fma.s1 sinh_FR_Y_lo = sinh_FR_C_hi, sinh_FR_podd, sinh_FR_Y_lo_temp
  841. nop.i 999 ;;
  842. }
  843. // sinh_FR_SINH = Y_hi + Y_lo
  844. // f8 = answer = sinh_FR_SGNX * sinh_FR_SINH
  845. { .mfi
  846. nop.m 999
  847. (p0) fma.s1 sinh_FR_SINH = sinh_FR_S_hi, f1, sinh_FR_Y_lo
  848. nop.i 999 ;;
  849. }
  850. { .mfb
  851. nop.m 999
  852. (p0) fma.d.s0 f8 = sinh_FR_SGNX, sinh_FR_SINH,f0
  853. (p0) br.ret.sptk b0 ;;
  854. }
  855. SINH_BY_EXP:
  856. // When p7 is true, we know that an overflow is not going to happen
  857. // When p7 is false, we must check for possible overflow
  858. // p7 is the over_SAFE flag
  859. // Y_hi = Tjhi
  860. // Y_lo = Tjhi * (p_odd + p_even) +Tjlo
  861. // Scale = sign * 2^(N-1)
  862. // sinh_FR_Y_lo = sinh_FR_Tjhi * (sinh_FR_peven + sinh_FR_podd)
  863. // sinh_FR_Y_lo = sinh_FR_Tjhi * (sinh_FR_Y_lo_temp )
  864. { .mfi
  865. nop.m 999
  866. (p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_peven, f1, sinh_FR_podd
  867. nop.i 999
  868. }
  869. // Now we are in EXP. This is the only path where an overflow is possible
  870. // but not for certain. So this is the only path where over_SAFE has any use.
  871. // r34 still has N-1
  872. // There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
  873. // There is a danger of double overflow if N-1 > 0x3fe = 1022
  874. { .mlx
  875. nop.m 999
  876. (p0) movl r32 = 0x00000000000003fe ;;
  877. }
  878. { .mfi
  879. (p0) cmp.gt.unc p0,p7 = r34, r32
  880. (p0) fmerge.s sinh_FR_SCALE = sinh_FR_SGNX, sinh_FR_spos
  881. nop.i 999 ;;
  882. }
  883. { .mfi
  884. nop.m 999
  885. (p0) fma.s1 sinh_FR_Y_lo = sinh_FR_Tjhi, sinh_FR_Y_lo_temp, sinh_FR_Tjlo
  886. nop.i 999 ;;
  887. }
  888. // f8 = answer = scale * (Y_hi + Y_lo)
  889. { .mfi
  890. nop.m 999
  891. (p0) fma.s1 sinh_FR_SINH_temp = sinh_FR_Y_lo, f1, sinh_FR_Tjhi
  892. nop.i 999 ;;
  893. }
  894. { .mfi
  895. nop.m 999
  896. (p0) fma.d.s0 f44 = sinh_FR_SCALE, sinh_FR_SINH_temp, f0
  897. nop.i 999 ;;
  898. }
  899. // If over_SAFE is set, return
  900. { .mfb
  901. nop.m 999
  902. (p7) fmerge.s f8 = f44,f44
  903. (p7) br.ret.sptk b0 ;;
  904. }
  905. // Else see if we overflowed
  906. // S0 user supplied status
  907. // S2 user supplied status + WRE + TD (Overflows)
  908. // If WRE is set then an overflow will not occur in EXP.
  909. // The input value that would cause a register (WRE) value to overflow is about 2^15
  910. // and this input would go into the HUGE path.
  911. // Answer with WRE is in f43.
  912. { .mfi
  913. nop.m 999
  914. (p0) fsetc.s2 0x7F,0x42
  915. nop.i 999;;
  916. }
  917. { .mfi
  918. nop.m 999
  919. (p0) fma.d.s2 f43 = sinh_FR_SCALE, sinh_FR_SINH_temp, f0
  920. nop.i 999 ;;
  921. }
  922. // 103FF => 103FF -FFFF = 400(true)
  923. // 400 + 3FF = 7FF, which is 1 more that the exponent of the largest
  924. // double (7FE). So 0 103FF 8000000000000000 is one ulp more than
  925. // largest double in register bias
  926. // Now set p8 if the answer with WRE is greater than or equal this value
  927. // Also set p9 if the answer with WRE is less than or equal to negative this value
  928. { .mlx
  929. nop.m 999
  930. (p0) movl r32 = 0x000000000103FF ;;
  931. }
  932. { .mmf
  933. nop.m 999
  934. (p0) setf.exp f41 = r32
  935. (p0) fsetc.s2 0x7F,0x40 ;;
  936. }
  937. { .mfi
  938. nop.m 999
  939. (p0) fcmp.ge.unc.s1 p8, p0 = f43, f41
  940. nop.i 999
  941. }
  942. { .mfi
  943. nop.m 999
  944. (p0) fmerge.ns f42 = f41, f41
  945. nop.i 999 ;;
  946. }
  947. // The error tag for overflow is 127
  948. { .mii
  949. nop.m 999
  950. nop.i 999 ;;
  951. (p8) mov r47 = 127 ;;
  952. }
  953. { .mfb
  954. nop.m 999
  955. (p0) fcmp.le.unc.s1 p9, p0 = f43, f42
  956. (p8) br.cond.spnt SINH_ERROR_SUPPORT ;;
  957. }
  958. { .mii
  959. nop.m 999
  960. nop.i 999 ;;
  961. (p9) mov r47 = 127
  962. }
  963. { .mib
  964. nop.m 999
  965. nop.i 999
  966. (p9) br.cond.spnt SINH_ERROR_SUPPORT ;;
  967. }
  968. { .mfb
  969. nop.m 999
  970. (p0) fmerge.s f8 = f44,f44
  971. (p0) br.ret.sptk b0 ;;
  972. }
  973. SINH_HUGE:
  974. // for SINH_HUGE, put 24000 in exponent; take sign from input; add 1
  975. // SAFE: SAFE is always 0 for HUGE
  976. { .mlx
  977. nop.m 999
  978. (p0) movl r32 = 0x0000000000015dbf ;;
  979. }
  980. { .mfi
  981. (p0) setf.exp f9 = r32
  982. nop.f 999
  983. nop.i 999 ;;
  984. }
  985. { .mfi
  986. nop.m 999
  987. (p0) fma.s1 sinh_FR_signed_hi_lo = sinh_FR_SGNX, f9, f1
  988. nop.i 999 ;;
  989. }
  990. { .mfi
  991. nop.m 999
  992. (p0) fma.d.s0 f44 = sinh_FR_signed_hi_lo, f9, f0
  993. (p0) mov r47 = 127
  994. }
  995. .endp sinh
  996. // Stack operations when calling error support.
  997. // (1) (2) (3) (call) (4)
  998. // sp -> + psp -> + psp -> + sp -> +
  999. // | | | |
  1000. // | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
  1001. // | | | |
  1002. // | <-GR_Y Y2->| Y2 ->| <- GR_Y |
  1003. // | | | |
  1004. // | | <- GR_X X1 ->| |
  1005. // | | | |
  1006. // sp-64 -> + sp -> + sp -> + +
  1007. // save ar.pfs save b0 restore gp
  1008. // save gp restore ar.pfs
  1009. .proc __libm_error_region
  1010. __libm_error_region:
  1011. SINH_ERROR_SUPPORT:
  1012. .prologue
  1013. // (1)
  1014. { .mfi
  1015. add GR_Parameter_Y=-32,sp // Parameter 2 value
  1016. nop.f 0
  1017. .save ar.pfs,GR_SAVE_PFS
  1018. mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
  1019. }
  1020. { .mfi
  1021. .fframe 64
  1022. add sp=-64,sp // Create new stack
  1023. nop.f 0
  1024. mov GR_SAVE_GP=gp // Save gp
  1025. };;
  1026. // (2)
  1027. { .mmi
  1028. stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
  1029. add GR_Parameter_X = 16,sp // Parameter 1 address
  1030. .save b0, GR_SAVE_B0
  1031. mov GR_SAVE_B0=b0 // Save b0
  1032. };;
  1033. .body
  1034. // (3)
  1035. { .mib
  1036. stfd [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
  1037. add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
  1038. nop.b 0
  1039. }
  1040. { .mib
  1041. stfd [GR_Parameter_Y] = f44 // STORE Parameter 3 on stack
  1042. add GR_Parameter_Y = -16,GR_Parameter_Y
  1043. br.call.sptk b0=__libm_error_support# // Call error handling function
  1044. };;
  1045. { .mmi
  1046. nop.m 0
  1047. nop.m 0
  1048. add GR_Parameter_RESULT = 48,sp
  1049. };;
  1050. // (4)
  1051. { .mmi
  1052. ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
  1053. .restore
  1054. add sp = 64,sp // Restore stack pointer
  1055. mov b0 = GR_SAVE_B0 // Restore return address
  1056. };;
  1057. { .mib
  1058. mov gp = GR_SAVE_GP // Restore gp
  1059. mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
  1060. br.ret.sptk b0 // Return
  1061. };;
  1062. .endp __libm_error_region
  1063. .type __libm_error_support#,@function
  1064. .global __libm_error_support#