Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1104 lines
35 KiB

  1. .file "log.s"
  2. // Copyright (c) 2000, Intel Corporation
  3. // All rights reserved.
  4. //
  5. // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
  6. // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
  7. //
  8. // WARRANTY DISCLAIMER
  9. //
  10. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  11. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  12. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  13. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  14. // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  15. // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  16. // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  17. // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  18. // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  19. // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  20. // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  21. //
  22. // Intel Corporation is the author of this code, and requests that all
  23. // problem reports or change requests be submitted to it directly at
  24. // http://developer.intel.com/opensource.
  25. //
  26. // History
  27. //==============================================================
  28. // 2/02/00 Initial version
  29. // 4/04/00 Unwind support added
  30. // 6/16/00 Updated table to be rounded correctly
  31. // 8/15/00 Bundle added after call to __libm_error_support to properly
  32. // set [the previously overwritten] GR_Parameter_RESULT.
  33. // 8/17/00 Improved speed of main path by 5 cycles
  34. // Shortened path for x=1.0
  35. //
  36. //
  37. // API
  38. //==============================================================
  39. // double log(double)
  40. // double log10(double)
  41. //
  42. // Overview of operation
  43. //==============================================================
  44. // Background
  45. //
  46. // Consider x = 2^N 1.f1 f2 f3 f4...f63
  47. // Log(x) = log(frcpa(x) x/frcpa(x))
  48. // = log(1/frcpa(x)) + log(frcpa(x) x)
  49. // = -log(frcpa(x)) + log(frcpa(x) x)
  50. //
  51. // frcpa(x) = 2^-N frcpa((1.f1 f2 ... f63)
  52. //
  53. // -log(frcpa(x)) = -log(C)
  54. // = -log(2^-N) - log(frcpa(1.f1 f2 ... f63))
  55. //
  56. // -log(frcpa(x)) = -log(C)
  57. // = +Nlog2 - log(frcpa(1.f1 f2 ... f63))
  58. //
  59. // -log(frcpa(x)) = -log(C)
  60. // = +Nlog2 + log(frcpa(1.f1 f2 ... f63))
  61. //
  62. // Log(x) = log(1/frcpa(x)) + log(frcpa(x) x)
  63. // Log(x) = +Nlog2 + log(1./frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
  64. // Log(x) = +Nlog2 - log(/frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
  65. // Log(x) = +Nlog2 + T + log(frcpa(x) x)
  66. //
  67. // Log(x) = +Nlog2 + T + log(C x)
  68. //
  69. // Cx = 1 + r
  70. //
  71. // Log(x) = +Nlog2 + T + log(1+r)
  72. // Log(x) = +Nlog2 + T + Series( r - r^2/2 + r^3/3 - r^4/4 ....)
  73. //
  74. // 1.f1 f2 ... f8 has 256 entries.
  75. // They are 1 + k/2^8, k = 0 ... 255
  76. // These 256 values are the table entries.
  77. //
  78. // Implementation
  79. //===============
  80. // input = x
  81. // C = frcpa(x)
  82. // r = C * x - 1
  83. //
  84. // Form the series P1 + P2 * x + P_3 * x^2 + P_4 * X^3 + P_5 * x^4
  85. // P_series = r(r(r(r * P5 +P4) + P3) + P2) + P1
  86. // x = f * 2*n where f is 1.f_1f_2f_3....f_63
  87. // Nfloat = float(n) where n is the true unbiased exponent
  88. // pre-index = f_1f_2....f_8
  89. // index = pre_index * 16
  90. // get the dxt table entry at index + offset = T
  91. // Yhi = (T + Nfloat * log(2)) + r
  92. // answer = P_series * r*r + Yhi
  93. // The T table is calculated as follows
  94. // Form x_k = 1 + k/2^8 where k goes from 0... 255
  95. // y_k = frcpa(x_k)
  96. // log(1/y_k) in quad and round to double-extended
  97. // Special values
  98. //==============================================================
  99. // log(+0) = -inf
  100. // log(-0) = -inf
  101. // log(+qnan) = +qnan
  102. // log(-qnan) = -qnan
  103. // log(+snan) = +qnan
  104. // log(-snan) = -qnan
  105. // log(-n) = QNAN Indefinite
  106. // log(-inf) = QNAN Idefinite
  107. // log(+inf) = +inf
  108. // Registers used
  109. //==============================================================
  110. // Floating Point registers used:
  111. // f8, input
  112. // f9 -> f15, f32 -> f68
  113. // General registers used:
  114. // r32 -> r54
  115. // Predicate registers used:
  116. // p6 -> p14
  117. // p8 log base e
  118. // p6 log base e special
  119. // p9 used in the frcpa
  120. // p13 log base e large W
  121. // p14 log base e small w
  122. // p7 log base 10
  123. // p10 log base 10 large W
  124. // p11 log base 10 small w
  125. // p12 log base 10 special
  126. // Assembly macros
  127. //==============================================================
  128. log_int_Nfloat = f9
  129. log_Nfloat = f10
  130. log_P5 = f11
  131. log_P4 = f12
  132. log_P3 = f13
  133. log_P2 = f14
  134. log_P1 = f15
  135. log_log2 = f32
  136. log_T = f33
  137. log_rp_p4 = f34
  138. log_rp_p3 = f35
  139. log_rp_p2 = f36
  140. log_pseries = f37
  141. log_rsq = f40
  142. log_T_plus_Nlog2 = f41
  143. log_Yhi = f42
  144. log_r = f43
  145. log_C = f44
  146. log_w = f45
  147. log_Q8 = f46
  148. log_Q7 = f47
  149. log_Q3 = f48
  150. log_Q4 = f49
  151. log_Q6 = f50
  152. log_Q2 = f51
  153. log_Q5 = f52
  154. log_Q1 = f53
  155. log_rp_q7 = f54
  156. log_rp_q6 = f55
  157. log_Qlo = f56
  158. log_rp_q3 = f57
  159. log_rp_q2 = f58
  160. log_Qhi = f59
  161. log_wsq = f60
  162. log_w4 = f61
  163. log_Q = f62
  164. log_inv_ln10 = f63
  165. log_log10_hi = f64
  166. log_log10_lo = f65
  167. log_neg_one = f66
  168. log_NORM_f8 = f67
  169. log_r2P_r = f68
  170. // ===================================
  171. log_GR_exp_17_ones = r33
  172. log_GR_exp_16_ones = r34
  173. log_GR_exp_f8 = r35
  174. log_GR_signexp_f8 = r36
  175. log_GR_true_exp_f8 = r37
  176. log_GR_significand_f8 = r38
  177. log_GR_sig_no_explicit = r39
  178. log_GR_sig_f1f8 = r39
  179. log_GR_sig_f1f8_times_16 = r39
  180. log_GR_AD_p_table = r40
  181. log_GR_signexp_w = r41
  182. log_GR_fff9 = r42
  183. //////////////////////////////////////////////////////////////
  184. log_GR_AD_q_table = r43
  185. log_GR_AD_inv_ln_10 = r44
  186. // r45 unused
  187. log_GR_exp_w = r46
  188. // r47 is unused
  189. GR_SAVE_B0 = r48
  190. GR_SAVE_GP = r49
  191. GR_SAVE_PFS = r50
  192. GR_Parameter_X = r51
  193. GR_Parameter_Y = r52
  194. GR_Parameter_RESULT = r53
  195. log_GR_tag = r54
  196. // Data tables
  197. //==============================================================
  198. .data
  199. .align 16
  200. log_q_table:
  201. data8 0x3FBC756AC654273B // Q8
  202. data8 0xBFC001A42489AB4D // Q7
  203. data8 0xBFD00000000019AC // Q3
  204. data8 0x3FC99999999A169B // Q4
  205. data8 0x3FC2492479AA0DF8 // Q6
  206. data8 0x3FD5555555555555 // Q2
  207. data8 0xBFC5555544986F52 // Q5
  208. data8 0xBFE0000000000000 // Q1
  209. log_p_table:
  210. data8 0xBFC5555DA7212371 // P5
  211. data8 0x3FC999A19EEF5826 // P4
  212. data8 0xBFCFFFFFFFFEF009 // P3
  213. data8 0x3FD555555554ECB2 // P2
  214. data8 0xBFE0000000000000 // P1
  215. data8 0x0000000000000000 // pad so that dxt values on on 16 byte boundaries
  216. data8 0xde5bd8a937287195, 0x00003ffd // double-extended 1/ln(10)
  217. data8 0xb17217f7d1cf79ac, 0x00003ffe // log2
  218. // b17217f7d1cf79ab c9e3b39803f2f6a
  219. data8 0x80200aaeac44ef38 , 0x00003ff6 // log(1/frcpa(1+ 0/2^-8))
  220. data8 0xc09090a2c35aa070 , 0x00003ff7 // log(1/frcpa(1+ 1/2^-8))
  221. data8 0xa0c94fcb41977c75 , 0x00003ff8 // log(1/frcpa(1+ 2/2^-8))
  222. data8 0xe18b9c263af83301 , 0x00003ff8 // log(1/frcpa(1+ 3/2^-8))
  223. data8 0x8d35c8d6399c30ea , 0x00003ff9 // log(1/frcpa(1+ 4/2^-8))
  224. data8 0xadd4d2ecd601cbb8 , 0x00003ff9 // log(1/frcpa(1+ 5/2^-8))
  225. data8 0xce95403a192f9f01 , 0x00003ff9 // log(1/frcpa(1+ 6/2^-8))
  226. data8 0xeb59392cbcc01096 , 0x00003ff9 // log(1/frcpa(1+ 7/2^-8))
  227. data8 0x862c7d0cefd54c5d , 0x00003ffa // log(1/frcpa(1+ 8/2^-8))
  228. data8 0x94aa63c65e70d499 , 0x00003ffa // log(1/frcpa(1+ 9/2^-8))
  229. data8 0xa54a696d4b62b382 , 0x00003ffa // log(1/frcpa(1+ 10/2^-8))
  230. data8 0xb3e4a796a5dac208 , 0x00003ffa // log(1/frcpa(1+ 11/2^-8))
  231. data8 0xc28c45b1878340a9 , 0x00003ffa // log(1/frcpa(1+ 12/2^-8))
  232. data8 0xd35c55f39d7a6235 , 0x00003ffa // log(1/frcpa(1+ 13/2^-8))
  233. data8 0xe220f037b954f1f5 , 0x00003ffa // log(1/frcpa(1+ 14/2^-8))
  234. data8 0xf0f3389b036834f3 , 0x00003ffa // log(1/frcpa(1+ 15/2^-8))
  235. data8 0xffd3488d5c980465 , 0x00003ffa // log(1/frcpa(1+ 16/2^-8))
  236. data8 0x87609ce2ed300490 , 0x00003ffb // log(1/frcpa(1+ 17/2^-8))
  237. data8 0x8ede9321e8c85927 , 0x00003ffb // log(1/frcpa(1+ 18/2^-8))
  238. data8 0x96639427f2f8e2f4 , 0x00003ffb // log(1/frcpa(1+ 19/2^-8))
  239. data8 0x9defad3e8f73217b , 0x00003ffb // log(1/frcpa(1+ 20/2^-8))
  240. data8 0xa582ebd50097029c , 0x00003ffb // log(1/frcpa(1+ 21/2^-8))
  241. data8 0xac06dbe75ab80fee , 0x00003ffb // log(1/frcpa(1+ 22/2^-8))
  242. data8 0xb3a78449b2d3ccca , 0x00003ffb // log(1/frcpa(1+ 23/2^-8))
  243. data8 0xbb4f79635ab46bb2 , 0x00003ffb // log(1/frcpa(1+ 24/2^-8))
  244. data8 0xc2fec93a83523f3f , 0x00003ffb // log(1/frcpa(1+ 25/2^-8))
  245. data8 0xc99af2eaca4c4571 , 0x00003ffb // log(1/frcpa(1+ 26/2^-8))
  246. data8 0xd1581106472fa653 , 0x00003ffb // log(1/frcpa(1+ 27/2^-8))
  247. data8 0xd8002560d4355f2e , 0x00003ffb // log(1/frcpa(1+ 28/2^-8))
  248. data8 0xdfcb43b4fe508632 , 0x00003ffb // log(1/frcpa(1+ 29/2^-8))
  249. data8 0xe67f6dff709d4119 , 0x00003ffb // log(1/frcpa(1+ 30/2^-8))
  250. data8 0xed393b1c22351280 , 0x00003ffb // log(1/frcpa(1+ 31/2^-8))
  251. data8 0xf5192bff087bcc35 , 0x00003ffb // log(1/frcpa(1+ 32/2^-8))
  252. data8 0xfbdf4ff6dfef2fa3 , 0x00003ffb // log(1/frcpa(1+ 33/2^-8))
  253. data8 0x81559a97f92f9cc7 , 0x00003ffc // log(1/frcpa(1+ 34/2^-8))
  254. data8 0x84be72bce90266e8 , 0x00003ffc // log(1/frcpa(1+ 35/2^-8))
  255. data8 0x88bc74113f23def2 , 0x00003ffc // log(1/frcpa(1+ 36/2^-8))
  256. data8 0x8c2ba3edf6799d11 , 0x00003ffc // log(1/frcpa(1+ 37/2^-8))
  257. data8 0x8f9dc92f92ea08b1 , 0x00003ffc // log(1/frcpa(1+ 38/2^-8))
  258. data8 0x9312e8f36efab5a7 , 0x00003ffc // log(1/frcpa(1+ 39/2^-8))
  259. data8 0x968b08643409ceb6 , 0x00003ffc // log(1/frcpa(1+ 40/2^-8))
  260. data8 0x9a062cba08a1708c , 0x00003ffc // log(1/frcpa(1+ 41/2^-8))
  261. data8 0x9d845b3abf95485c , 0x00003ffc // log(1/frcpa(1+ 42/2^-8))
  262. data8 0xa06fd841bc001bb4 , 0x00003ffc // log(1/frcpa(1+ 43/2^-8))
  263. data8 0xa3f3a74652fbe0db , 0x00003ffc // log(1/frcpa(1+ 44/2^-8))
  264. data8 0xa77a8fb2336f20f5 , 0x00003ffc // log(1/frcpa(1+ 45/2^-8))
  265. data8 0xab0497015d28b0a0 , 0x00003ffc // log(1/frcpa(1+ 46/2^-8))
  266. data8 0xae91c2be6ba6a615 , 0x00003ffc // log(1/frcpa(1+ 47/2^-8))
  267. data8 0xb189d1b99aebb20b , 0x00003ffc // log(1/frcpa(1+ 48/2^-8))
  268. data8 0xb51cced5de9c1b2c , 0x00003ffc // log(1/frcpa(1+ 49/2^-8))
  269. data8 0xb819bee9e720d42f , 0x00003ffc // log(1/frcpa(1+ 50/2^-8))
  270. data8 0xbbb2a0947b093a5d , 0x00003ffc // log(1/frcpa(1+ 51/2^-8))
  271. data8 0xbf4ec1505811684a , 0x00003ffc // log(1/frcpa(1+ 52/2^-8))
  272. data8 0xc2535bacfa8975ff , 0x00003ffc // log(1/frcpa(1+ 53/2^-8))
  273. data8 0xc55a3eafad187eb8 , 0x00003ffc // log(1/frcpa(1+ 54/2^-8))
  274. data8 0xc8ff2484b2c0da74 , 0x00003ffc // log(1/frcpa(1+ 55/2^-8))
  275. data8 0xcc0b1a008d53ab76 , 0x00003ffc // log(1/frcpa(1+ 56/2^-8))
  276. data8 0xcfb6203844b3209b , 0x00003ffc // log(1/frcpa(1+ 57/2^-8))
  277. data8 0xd2c73949a47a19f5 , 0x00003ffc // log(1/frcpa(1+ 58/2^-8))
  278. data8 0xd5daae18b49d6695 , 0x00003ffc // log(1/frcpa(1+ 59/2^-8))
  279. data8 0xd8f08248cf7e8019 , 0x00003ffc // log(1/frcpa(1+ 60/2^-8))
  280. data8 0xdca7749f1b3e540e , 0x00003ffc // log(1/frcpa(1+ 61/2^-8))
  281. data8 0xdfc28e033aaaf7c7 , 0x00003ffc // log(1/frcpa(1+ 62/2^-8))
  282. data8 0xe2e012a5f91d2f55 , 0x00003ffc // log(1/frcpa(1+ 63/2^-8))
  283. data8 0xe600064ed9e292a8 , 0x00003ffc // log(1/frcpa(1+ 64/2^-8))
  284. data8 0xe9226cce42b39f60 , 0x00003ffc // log(1/frcpa(1+ 65/2^-8))
  285. data8 0xec4749fd97a28360 , 0x00003ffc // log(1/frcpa(1+ 66/2^-8))
  286. data8 0xef6ea1bf57780495 , 0x00003ffc // log(1/frcpa(1+ 67/2^-8))
  287. data8 0xf29877ff38809091 , 0x00003ffc // log(1/frcpa(1+ 68/2^-8))
  288. data8 0xf5c4d0b245cb89be , 0x00003ffc // log(1/frcpa(1+ 69/2^-8))
  289. data8 0xf8f3afd6fcdef3aa , 0x00003ffc // log(1/frcpa(1+ 70/2^-8))
  290. data8 0xfc2519756be1abc7 , 0x00003ffc // log(1/frcpa(1+ 71/2^-8))
  291. data8 0xff59119f503e6832 , 0x00003ffc // log(1/frcpa(1+ 72/2^-8))
  292. data8 0x8147ce381ae0e146 , 0x00003ffd // log(1/frcpa(1+ 73/2^-8))
  293. data8 0x82e45f06cb1ad0f2 , 0x00003ffd // log(1/frcpa(1+ 74/2^-8))
  294. data8 0x842f5c7c573cbaa2 , 0x00003ffd // log(1/frcpa(1+ 75/2^-8))
  295. data8 0x85ce471968c8893a , 0x00003ffd // log(1/frcpa(1+ 76/2^-8))
  296. data8 0x876e8305bc04066d , 0x00003ffd // log(1/frcpa(1+ 77/2^-8))
  297. data8 0x891012678031fbb3 , 0x00003ffd // log(1/frcpa(1+ 78/2^-8))
  298. data8 0x8a5f1493d766a05f , 0x00003ffd // log(1/frcpa(1+ 79/2^-8))
  299. data8 0x8c030c778c56fa00 , 0x00003ffd // log(1/frcpa(1+ 80/2^-8))
  300. data8 0x8da85df17e31d9ae , 0x00003ffd // log(1/frcpa(1+ 81/2^-8))
  301. data8 0x8efa663e7921687e , 0x00003ffd // log(1/frcpa(1+ 82/2^-8))
  302. data8 0x90a22b6875c6a1f8 , 0x00003ffd // log(1/frcpa(1+ 83/2^-8))
  303. data8 0x91f62cc8f5d24837 , 0x00003ffd // log(1/frcpa(1+ 84/2^-8))
  304. data8 0x93a06cfc3857d980 , 0x00003ffd // log(1/frcpa(1+ 85/2^-8))
  305. data8 0x94f66d5e6fd01ced , 0x00003ffd // log(1/frcpa(1+ 86/2^-8))
  306. data8 0x96a330156e6772f2 , 0x00003ffd // log(1/frcpa(1+ 87/2^-8))
  307. data8 0x97fb3582754ea25b , 0x00003ffd // log(1/frcpa(1+ 88/2^-8))
  308. data8 0x99aa8259aad1bbf2 , 0x00003ffd // log(1/frcpa(1+ 89/2^-8))
  309. data8 0x9b0492f6227ae4a8 , 0x00003ffd // log(1/frcpa(1+ 90/2^-8))
  310. data8 0x9c5f8e199bf3a7a5 , 0x00003ffd // log(1/frcpa(1+ 91/2^-8))
  311. data8 0x9e1293b9998c1daa , 0x00003ffd // log(1/frcpa(1+ 92/2^-8))
  312. data8 0x9f6fa31e0b41f308 , 0x00003ffd // log(1/frcpa(1+ 93/2^-8))
  313. data8 0xa0cda11eaf46390e , 0x00003ffd // log(1/frcpa(1+ 94/2^-8))
  314. data8 0xa22c8f029cfa45aa , 0x00003ffd // log(1/frcpa(1+ 95/2^-8))
  315. data8 0xa3e48badb7856b34 , 0x00003ffd // log(1/frcpa(1+ 96/2^-8))
  316. data8 0xa5459a0aa95849f9 , 0x00003ffd // log(1/frcpa(1+ 97/2^-8))
  317. data8 0xa6a79c84480cfebd , 0x00003ffd // log(1/frcpa(1+ 98/2^-8))
  318. data8 0xa80a946d0fcb3eb2 , 0x00003ffd // log(1/frcpa(1+ 99/2^-8))
  319. data8 0xa96e831a3ea7b314 , 0x00003ffd // log(1/frcpa(1+100/2^-8))
  320. data8 0xaad369e3dc544e3b , 0x00003ffd // log(1/frcpa(1+101/2^-8))
  321. data8 0xac92e9588952c815 , 0x00003ffd // log(1/frcpa(1+102/2^-8))
  322. data8 0xadfa035aa1ed8fdc , 0x00003ffd // log(1/frcpa(1+103/2^-8))
  323. data8 0xaf6219eae1ad6e34 , 0x00003ffd // log(1/frcpa(1+104/2^-8))
  324. data8 0xb0cb2e6d8160f753 , 0x00003ffd // log(1/frcpa(1+105/2^-8))
  325. data8 0xb2354249ad950f72 , 0x00003ffd // log(1/frcpa(1+106/2^-8))
  326. data8 0xb3a056e98ef4a3b4 , 0x00003ffd // log(1/frcpa(1+107/2^-8))
  327. data8 0xb50c6dba52c6292a , 0x00003ffd // log(1/frcpa(1+108/2^-8))
  328. data8 0xb679882c33876165 , 0x00003ffd // log(1/frcpa(1+109/2^-8))
  329. data8 0xb78c07429785cedc , 0x00003ffd // log(1/frcpa(1+110/2^-8))
  330. data8 0xb8faeb8dc4a77d24 , 0x00003ffd // log(1/frcpa(1+111/2^-8))
  331. data8 0xba6ad77eb36ae0d6 , 0x00003ffd // log(1/frcpa(1+112/2^-8))
  332. data8 0xbbdbcc915e9bee50 , 0x00003ffd // log(1/frcpa(1+113/2^-8))
  333. data8 0xbd4dcc44f8cf12ef , 0x00003ffd // log(1/frcpa(1+114/2^-8))
  334. data8 0xbec0d81bf5b531fa , 0x00003ffd // log(1/frcpa(1+115/2^-8))
  335. data8 0xc034f19c139186f4 , 0x00003ffd // log(1/frcpa(1+116/2^-8))
  336. data8 0xc14cb69f7c5e55ab , 0x00003ffd // log(1/frcpa(1+117/2^-8))
  337. data8 0xc2c2abbb6e5fd56f , 0x00003ffd // log(1/frcpa(1+118/2^-8))
  338. data8 0xc439b2c193e6771e , 0x00003ffd // log(1/frcpa(1+119/2^-8))
  339. data8 0xc553acb9d5c67733 , 0x00003ffd // log(1/frcpa(1+120/2^-8))
  340. data8 0xc6cc96e441272441 , 0x00003ffd // log(1/frcpa(1+121/2^-8))
  341. data8 0xc8469753eca88c30 , 0x00003ffd // log(1/frcpa(1+122/2^-8))
  342. data8 0xc962cf3ce072b05c , 0x00003ffd // log(1/frcpa(1+123/2^-8))
  343. data8 0xcadeba8771f694aa , 0x00003ffd // log(1/frcpa(1+124/2^-8))
  344. data8 0xcc5bc08d1f72da94 , 0x00003ffd // log(1/frcpa(1+125/2^-8))
  345. data8 0xcd7a3f99ea035c29 , 0x00003ffd // log(1/frcpa(1+126/2^-8))
  346. data8 0xcef93860c8a53c35 , 0x00003ffd // log(1/frcpa(1+127/2^-8))
  347. data8 0xd0192f68a7ed23df , 0x00003ffd // log(1/frcpa(1+128/2^-8))
  348. data8 0xd19a201127d3c645 , 0x00003ffd // log(1/frcpa(1+129/2^-8))
  349. data8 0xd2bb92f4061c172c , 0x00003ffd // log(1/frcpa(1+130/2^-8))
  350. data8 0xd43e80b2ee8cc8fc , 0x00003ffd // log(1/frcpa(1+131/2^-8))
  351. data8 0xd56173601fc4ade4 , 0x00003ffd // log(1/frcpa(1+132/2^-8))
  352. data8 0xd6e6637efb54086f , 0x00003ffd // log(1/frcpa(1+133/2^-8))
  353. data8 0xd80ad9f58f3c8193 , 0x00003ffd // log(1/frcpa(1+134/2^-8))
  354. data8 0xd991d1d31aca41f8 , 0x00003ffd // log(1/frcpa(1+135/2^-8))
  355. data8 0xdab7d02231484a93 , 0x00003ffd // log(1/frcpa(1+136/2^-8))
  356. data8 0xdc40d532cde49a54 , 0x00003ffd // log(1/frcpa(1+137/2^-8))
  357. data8 0xdd685f79ed8b265e , 0x00003ffd // log(1/frcpa(1+138/2^-8))
  358. data8 0xde9094bbc0e17b1d , 0x00003ffd // log(1/frcpa(1+139/2^-8))
  359. data8 0xe01c91b78440c425 , 0x00003ffd // log(1/frcpa(1+140/2^-8))
  360. data8 0xe14658f26997e729 , 0x00003ffd // log(1/frcpa(1+141/2^-8))
  361. data8 0xe270cdc2391e0d23 , 0x00003ffd // log(1/frcpa(1+142/2^-8))
  362. data8 0xe3ffce3a2aa64922 , 0x00003ffd // log(1/frcpa(1+143/2^-8))
  363. data8 0xe52bdb274ed82887 , 0x00003ffd // log(1/frcpa(1+144/2^-8))
  364. data8 0xe6589852e75d7df6 , 0x00003ffd // log(1/frcpa(1+145/2^-8))
  365. data8 0xe786068c79937a7d , 0x00003ffd // log(1/frcpa(1+146/2^-8))
  366. data8 0xe91903adad100911 , 0x00003ffd // log(1/frcpa(1+147/2^-8))
  367. data8 0xea481236f7d35bb0 , 0x00003ffd // log(1/frcpa(1+148/2^-8))
  368. data8 0xeb77d48c692e6b14 , 0x00003ffd // log(1/frcpa(1+149/2^-8))
  369. data8 0xeca84b83d7297b87 , 0x00003ffd // log(1/frcpa(1+150/2^-8))
  370. data8 0xedd977f4962aa158 , 0x00003ffd // log(1/frcpa(1+151/2^-8))
  371. data8 0xef7179a22f257754 , 0x00003ffd // log(1/frcpa(1+152/2^-8))
  372. data8 0xf0a450d139366ca7 , 0x00003ffd // log(1/frcpa(1+153/2^-8))
  373. data8 0xf1d7e0524ff9ffdb , 0x00003ffd // log(1/frcpa(1+154/2^-8))
  374. data8 0xf30c29036a8b6cae , 0x00003ffd // log(1/frcpa(1+155/2^-8))
  375. data8 0xf4412bc411ea8d92 , 0x00003ffd // log(1/frcpa(1+156/2^-8))
  376. data8 0xf576e97564c8619d , 0x00003ffd // log(1/frcpa(1+157/2^-8))
  377. data8 0xf6ad62fa1b5f172f , 0x00003ffd // log(1/frcpa(1+158/2^-8))
  378. data8 0xf7e499368b55c542 , 0x00003ffd // log(1/frcpa(1+159/2^-8))
  379. data8 0xf91c8d10abaffe22 , 0x00003ffd // log(1/frcpa(1+160/2^-8))
  380. data8 0xfa553f7018c966f3 , 0x00003ffd // log(1/frcpa(1+161/2^-8))
  381. data8 0xfb8eb13e185d802c , 0x00003ffd // log(1/frcpa(1+162/2^-8))
  382. data8 0xfcc8e3659d9bcbed , 0x00003ffd // log(1/frcpa(1+163/2^-8))
  383. data8 0xfe03d6d34d487fd2 , 0x00003ffd // log(1/frcpa(1+164/2^-8))
  384. data8 0xff3f8c7581e9f0ae , 0x00003ffd // log(1/frcpa(1+165/2^-8))
  385. data8 0x803e029e280173ae , 0x00003ffe // log(1/frcpa(1+166/2^-8))
  386. data8 0x80dca10cc52d0757 , 0x00003ffe // log(1/frcpa(1+167/2^-8))
  387. data8 0x817ba200632755a1 , 0x00003ffe // log(1/frcpa(1+168/2^-8))
  388. data8 0x821b05f3b01d6774 , 0x00003ffe // log(1/frcpa(1+169/2^-8))
  389. data8 0x82bacd623ff19d06 , 0x00003ffe // log(1/frcpa(1+170/2^-8))
  390. data8 0x835af8c88e7a8f47 , 0x00003ffe // log(1/frcpa(1+171/2^-8))
  391. data8 0x83c5f8299e2b4091 , 0x00003ffe // log(1/frcpa(1+172/2^-8))
  392. data8 0x8466cb43f3d87300 , 0x00003ffe // log(1/frcpa(1+173/2^-8))
  393. data8 0x850803a67c80ca4b , 0x00003ffe // log(1/frcpa(1+174/2^-8))
  394. data8 0x85a9a1d11a23b461 , 0x00003ffe // log(1/frcpa(1+175/2^-8))
  395. data8 0x864ba644a18e6e05 , 0x00003ffe // log(1/frcpa(1+176/2^-8))
  396. data8 0x86ee1182dcc432f7 , 0x00003ffe // log(1/frcpa(1+177/2^-8))
  397. data8 0x875a925d7e48c316 , 0x00003ffe // log(1/frcpa(1+178/2^-8))
  398. data8 0x87fdaa109d23aef7 , 0x00003ffe // log(1/frcpa(1+179/2^-8))
  399. data8 0x88a129ed4becfaf2 , 0x00003ffe // log(1/frcpa(1+180/2^-8))
  400. data8 0x89451278ecd7f9cf , 0x00003ffe // log(1/frcpa(1+181/2^-8))
  401. data8 0x89b29295f8432617 , 0x00003ffe // log(1/frcpa(1+182/2^-8))
  402. data8 0x8a572ac5a5496882 , 0x00003ffe // log(1/frcpa(1+183/2^-8))
  403. data8 0x8afc2d0ce3b2dadf , 0x00003ffe // log(1/frcpa(1+184/2^-8))
  404. data8 0x8b6a69c608cfd3af , 0x00003ffe // log(1/frcpa(1+185/2^-8))
  405. data8 0x8c101e106e899a83 , 0x00003ffe // log(1/frcpa(1+186/2^-8))
  406. data8 0x8cb63de258f9d626 , 0x00003ffe // log(1/frcpa(1+187/2^-8))
  407. data8 0x8d2539c5bd19e2b1 , 0x00003ffe // log(1/frcpa(1+188/2^-8))
  408. data8 0x8dcc0e064b29e6f1 , 0x00003ffe // log(1/frcpa(1+189/2^-8))
  409. data8 0x8e734f45d88357ae , 0x00003ffe // log(1/frcpa(1+190/2^-8))
  410. data8 0x8ee30cef034a20db , 0x00003ffe // log(1/frcpa(1+191/2^-8))
  411. data8 0x8f8b0515686d1d06 , 0x00003ffe // log(1/frcpa(1+192/2^-8))
  412. data8 0x90336bba039bf32f , 0x00003ffe // log(1/frcpa(1+193/2^-8))
  413. data8 0x90a3edd23d1c9d58 , 0x00003ffe // log(1/frcpa(1+194/2^-8))
  414. data8 0x914d0de2f5d61b32 , 0x00003ffe // log(1/frcpa(1+195/2^-8))
  415. data8 0x91be0c20d28173b5 , 0x00003ffe // log(1/frcpa(1+196/2^-8))
  416. data8 0x9267e737c06cd34a , 0x00003ffe // log(1/frcpa(1+197/2^-8))
  417. data8 0x92d962ae6abb1237 , 0x00003ffe // log(1/frcpa(1+198/2^-8))
  418. data8 0x9383fa6afbe2074c , 0x00003ffe // log(1/frcpa(1+199/2^-8))
  419. data8 0x942f0421651c1c4e , 0x00003ffe // log(1/frcpa(1+200/2^-8))
  420. data8 0x94a14a3845bb985e , 0x00003ffe // log(1/frcpa(1+201/2^-8))
  421. data8 0x954d133857f861e7 , 0x00003ffe // log(1/frcpa(1+202/2^-8))
  422. data8 0x95bfd96468e604c4 , 0x00003ffe // log(1/frcpa(1+203/2^-8))
  423. data8 0x9632d31cafafa858 , 0x00003ffe // log(1/frcpa(1+204/2^-8))
  424. data8 0x96dfaabd86fa1647 , 0x00003ffe // log(1/frcpa(1+205/2^-8))
  425. data8 0x9753261fcbb2a594 , 0x00003ffe // log(1/frcpa(1+206/2^-8))
  426. data8 0x9800c11b426b996d , 0x00003ffe // log(1/frcpa(1+207/2^-8))
  427. data8 0x9874bf4d45ae663c , 0x00003ffe // log(1/frcpa(1+208/2^-8))
  428. data8 0x99231f5ee9a74f79 , 0x00003ffe // log(1/frcpa(1+209/2^-8))
  429. data8 0x9997a18a56bcad28 , 0x00003ffe // log(1/frcpa(1+210/2^-8))
  430. data8 0x9a46c873a3267e79 , 0x00003ffe // log(1/frcpa(1+211/2^-8))
  431. data8 0x9abbcfc621eb6cb6 , 0x00003ffe // log(1/frcpa(1+212/2^-8))
  432. data8 0x9b310cb0d354c990 , 0x00003ffe // log(1/frcpa(1+213/2^-8))
  433. data8 0x9be14cf9e1b3515c , 0x00003ffe // log(1/frcpa(1+214/2^-8))
  434. data8 0x9c5710b8cbb73a43 , 0x00003ffe // log(1/frcpa(1+215/2^-8))
  435. data8 0x9ccd0abd301f399c , 0x00003ffe // log(1/frcpa(1+216/2^-8))
  436. data8 0x9d7e67f3bdce8888 , 0x00003ffe // log(1/frcpa(1+217/2^-8))
  437. data8 0x9df4ea81a99daa01 , 0x00003ffe // log(1/frcpa(1+218/2^-8))
  438. data8 0x9e6ba405a54514ba , 0x00003ffe // log(1/frcpa(1+219/2^-8))
  439. data8 0x9f1e21c8c7bb62b3 , 0x00003ffe // log(1/frcpa(1+220/2^-8))
  440. data8 0x9f956593f6b6355c , 0x00003ffe // log(1/frcpa(1+221/2^-8))
  441. data8 0xa00ce1092e5498c3 , 0x00003ffe // log(1/frcpa(1+222/2^-8))
  442. data8 0xa0c08309c4b912c1 , 0x00003ffe // log(1/frcpa(1+223/2^-8))
  443. data8 0xa1388a8c6faa2afa , 0x00003ffe // log(1/frcpa(1+224/2^-8))
  444. data8 0xa1b0ca7095b5f985 , 0x00003ffe // log(1/frcpa(1+225/2^-8))
  445. data8 0xa22942eb47534a00 , 0x00003ffe // log(1/frcpa(1+226/2^-8))
  446. data8 0xa2de62326449d0a3 , 0x00003ffe // log(1/frcpa(1+227/2^-8))
  447. data8 0xa357690f88bfe345 , 0x00003ffe // log(1/frcpa(1+228/2^-8))
  448. data8 0xa3d0a93f45169a4b , 0x00003ffe // log(1/frcpa(1+229/2^-8))
  449. data8 0xa44a22f7ffe65f30 , 0x00003ffe // log(1/frcpa(1+230/2^-8))
  450. data8 0xa500c5e5b4c1aa36 , 0x00003ffe // log(1/frcpa(1+231/2^-8))
  451. data8 0xa57ad064eb2ebbc2 , 0x00003ffe // log(1/frcpa(1+232/2^-8))
  452. data8 0xa5f5152dedf4384e , 0x00003ffe // log(1/frcpa(1+233/2^-8))
  453. data8 0xa66f9478856233ec , 0x00003ffe // log(1/frcpa(1+234/2^-8))
  454. data8 0xa6ea4e7cca02c32e , 0x00003ffe // log(1/frcpa(1+235/2^-8))
  455. data8 0xa765437325341ccf , 0x00003ffe // log(1/frcpa(1+236/2^-8))
  456. data8 0xa81e21e6c75b4020 , 0x00003ffe // log(1/frcpa(1+237/2^-8))
  457. data8 0xa899ab333fe2b9ca , 0x00003ffe // log(1/frcpa(1+238/2^-8))
  458. data8 0xa9157039c51ebe71 , 0x00003ffe // log(1/frcpa(1+239/2^-8))
  459. data8 0xa991713433c2b999 , 0x00003ffe // log(1/frcpa(1+240/2^-8))
  460. data8 0xaa0dae5cbcc048b3 , 0x00003ffe // log(1/frcpa(1+241/2^-8))
  461. data8 0xaa8a27ede5eb13ad , 0x00003ffe // log(1/frcpa(1+242/2^-8))
  462. data8 0xab06de228a9e3499 , 0x00003ffe // log(1/frcpa(1+243/2^-8))
  463. data8 0xab83d135dc633301 , 0x00003ffe // log(1/frcpa(1+244/2^-8))
  464. data8 0xac3fb076adc7fe7a , 0x00003ffe // log(1/frcpa(1+245/2^-8))
  465. data8 0xacbd3cbbe47988f1 , 0x00003ffe // log(1/frcpa(1+246/2^-8))
  466. data8 0xad3b06b1a5dc57c3 , 0x00003ffe // log(1/frcpa(1+247/2^-8))
  467. data8 0xadb90e94af887717 , 0x00003ffe // log(1/frcpa(1+248/2^-8))
  468. data8 0xae3754a218f7c816 , 0x00003ffe // log(1/frcpa(1+249/2^-8))
  469. data8 0xaeb5d9175437afa2 , 0x00003ffe // log(1/frcpa(1+250/2^-8))
  470. data8 0xaf349c322e9c7cee , 0x00003ffe // log(1/frcpa(1+251/2^-8))
  471. data8 0xafb39e30d1768d1c , 0x00003ffe // log(1/frcpa(1+252/2^-8))
  472. data8 0xb032df51c2c93116 , 0x00003ffe // log(1/frcpa(1+253/2^-8))
  473. data8 0xb0b25fd3e6035ad9 , 0x00003ffe // log(1/frcpa(1+254/2^-8))
  474. data8 0xb1321ff67cba178c , 0x00003ffe // log(1/frcpa(1+255/2^-8))
  475. .align 32
  476. .global log#
  477. .global log10#
  478. // log10 has p7 true, p8 false
  479. // log has p8 true, p7 false
  480. .section .text
  481. .proc log10#
  482. .align 32
  483. log10:
  484. { .mfi
  485. alloc r32=ar.pfs,1,18,4,0
  486. (p0) frcpa.s1 log_C,p9 = f1,f8
  487. (p0) cmp.eq.unc p7,p8 = r0, r0
  488. }
  489. { .mfb
  490. (p0) addl r40 = @ltoff(log_p_table), gp
  491. (p0) fnorm log_NORM_f8 = f8
  492. (p0) br.sptk LOG_LN_X
  493. }
  494. ;;
  495. .endp log10
  496. .section .text
  497. .proc log#
  498. .align 32
  499. log:
  500. { .mfi
  501. alloc r32=ar.pfs,1,18,4,0
  502. (p0) frcpa.s1 log_C,p9 = f1,f8
  503. (p0) cmp.eq.unc p8,p7 = r0, r0
  504. }
  505. { .mfi
  506. (p0) addl r40 = @ltoff(log_p_table), gp
  507. (p0) fnorm log_NORM_f8 = f8
  508. nop.i 999
  509. }
  510. ;;
  511. LOG_LN_X:
  512. { .mmf
  513. (p0) addl r43 = @ltoff(log_q_table), gp
  514. ld8 r40 = [r40]
  515. (p0) fms.s1 log_w = f8,f1,f1
  516. }
  517. ;;
  518. { .mmi
  519. ld8 r43 = [r43]
  520. (p0) mov log_GR_exp_16_ones = 0xffff
  521. (p0) mov log_GR_exp_17_ones = 0x1ffff
  522. }
  523. ;;
  524. {.mfi
  525. (p0) ldfpd log_P5,log_P4 = [r40],16
  526. nop.f 999
  527. (p0) mov log_GR_fff9 = 0xfff9 ;;
  528. }
  529. { .mmi
  530. (p0) ldfpd log_Q8,log_Q7 = [r43],16
  531. nop.m 999
  532. nop.i 999 ;;
  533. }
  534. { .mmi
  535. (p0) ldfpd log_Q3,log_Q4 = [r43],16
  536. (p0) ldfd log_P3 = [r40],8
  537. nop.i 999 ;;
  538. }
  539. { .mmf
  540. (p0) ldfd log_Q6 = [r43],8
  541. (p0) ldfd log_P2 = [r40],8
  542. (p0) fms.s1 log_r = log_C,f8,f1 ;;
  543. }
  544. { .mmf
  545. (p0) ldfd log_Q2 = [r43],8
  546. (p0) ldfd log_P1 = [r40],16
  547. (p0) fma.s1 log_wsq = log_w, log_w, f0
  548. ;;
  549. }
  550. { .mmi
  551. (p0) getf.sig log_GR_significand_f8 = log_NORM_f8
  552. (p0) ldfe log_inv_ln10 = [r40],16
  553. nop.i 999 ;;
  554. }
  555. { .mmf
  556. (p0) getf.exp log_GR_signexp_f8 = log_NORM_f8
  557. (p0) ldfd log_Q5 = [r43],8
  558. (p0) fcmp.eq.s1 p10,p0 = log_NORM_f8, f1
  559. ;;
  560. }
  561. { .mmf
  562. (p0) getf.exp log_GR_signexp_w = log_w
  563. (p0) ldfe log_log2 = [r40],16
  564. (p0) fclass.m.unc p6,p0 = f8, 0xa7
  565. ;;
  566. }
  567. { .mfi
  568. (p0) ldfd log_Q1 = [r43],16
  569. (p0) fclass.m.unc p12,p0 = f8, 0x1a
  570. (p0) shl r39 = log_GR_significand_f8,1
  571. }
  572. { .mfi
  573. nop.m 999
  574. (p0) fma.s1 log_rsq = log_r, log_r, f0
  575. (p0) and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
  576. ;;
  577. }
  578. { .mfi
  579. (p0) sub log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones
  580. (p0) fma.s1 log_rp_p4 = log_P5, log_r, log_P4 // u1
  581. (p0) shr.u r39 = r39,56
  582. }
  583. { .mfi
  584. nop.m 999
  585. (p0) fma.s1 log_w4 = log_wsq, log_wsq, f0
  586. (p0) and log_GR_exp_w = log_GR_exp_17_ones, log_GR_signexp_w
  587. ;;
  588. }
  589. { .mfi
  590. nop.m 999
  591. (p0) fma.s1 log_rp_q7 = log_Q8, log_w, log_Q7 // v1
  592. shl r39 = r39,4
  593. ;;
  594. }
  595. // p13 <== Large W ln
  596. // p14 <== small w ln
  597. { .mfi
  598. (p0) setf.sig log_int_Nfloat = log_GR_true_exp_f8
  599. (p10) fmerge.s f8 = f0, f0
  600. (p0) add r40 = r39,r40
  601. }
  602. { .mfb
  603. (p8) cmp.ge.unc p13,p14 = log_GR_exp_w, log_GR_fff9
  604. (p0) fma.s1 log_rp_q3 = log_Q4, log_w, log_Q3 // v2
  605. (p10) br.ret.spnt b0 // Exit for x=1.0
  606. ;;
  607. }
  608. { .mbb
  609. nop.m 999
  610. (p6) br.cond.spnt LOG_NAN_ZERO_INF_PN
  611. (p12) br.cond.spnt LOG_NAN_ZERO_INF_PN
  612. ;;
  613. }
  614. { .mbb
  615. (p0) ldfe log_T = [r40]
  616. nop.b 999
  617. nop.b 999
  618. ;;
  619. }
  620. { .mfi
  621. nop.m 999
  622. (p0) fma.s1 log_rp_p3 = log_rp_p4, log_r, log_P3 // u2
  623. (p7) cmp.ge.unc p10,p11 = log_GR_exp_w, log_GR_fff9
  624. ;;
  625. }
  626. { .mfi
  627. nop.m 999
  628. (p0) fma.s1 log_rp_q6 = log_rp_q7, log_w, log_Q6 // v3
  629. nop.i 999 ;;
  630. }
  631. { .mfi
  632. nop.m 999
  633. (p0) fma.s1 log_rp_q2 = log_rp_q3, log_w, log_Q2 // v4
  634. nop.i 999 ;;
  635. }
  636. { .mfi
  637. nop.m 999
  638. (p0) fma.s1 log_rp_p2 = log_rp_p3, log_r, log_P2 // u3
  639. nop.i 999 ;;
  640. }
  641. { .mfi
  642. nop.m 999
  643. (p0) fma.s1 log_Qlo = log_rp_q6, log_w, log_Q5
  644. nop.i 999
  645. }
  646. { .mfi
  647. nop.m 999
  648. (p0) fcvt.xf log_Nfloat = log_int_Nfloat
  649. nop.i 999 ;;
  650. }
  651. { .mfi
  652. nop.m 999
  653. (p0) fma.s1 log_Qhi = log_rp_q2, log_w, log_Q1
  654. nop.i 999 ;;
  655. }
  656. { .mfi
  657. nop.m 999
  658. (p0) fma.s1 log_pseries = log_rp_p2, log_r, log_P1
  659. nop.i 999 ;;
  660. }
  661. { .mfi
  662. nop.m 999
  663. (p0) fma.s1 log_T_plus_Nlog2 = log_Nfloat,log_log2, log_T
  664. nop.i 999
  665. }
  666. { .mfi
  667. nop.m 999
  668. (p0) fma.s1 log_Q = log_Qlo, log_w4, log_Qhi
  669. nop.i 999 ;;
  670. }
  671. // small w, ln <== p13
  672. // small w, log10 <== p10
  673. .pred.rel "mutex",p13,p10
  674. { .mfi
  675. nop.m 999
  676. (p13) fma.s1 log_r2P_r = log_rsq, log_pseries, log_r
  677. nop.i 999
  678. }
  679. { .mfi
  680. nop.m 999
  681. (p10) fma.s1 log_r2P_r = log_rsq, log_pseries, log_r
  682. nop.i 999 ;;
  683. }
  684. // small w, ln <== p14
  685. // small w, log10 <== p11
  686. .pred.rel "mutex",p14,p11
  687. { .mfi
  688. nop.m 999
  689. (p14) fma.d f8 = log_wsq, log_Q, log_w
  690. nop.i 999
  691. }
  692. { .mfi
  693. nop.m 999
  694. (p11) fma.s1 f8 = log_wsq, log_Q, log_w
  695. nop.i 999 ;;
  696. }
  697. { .mfi
  698. nop.m 999
  699. (p10) fma.s1 log_log10_hi = log_T_plus_Nlog2, log_inv_ln10,f0
  700. nop.i 999 ;;
  701. }
  702. // large W, ln <== p13
  703. // large Ww, log <== p10
  704. .pred.rel "mutex",p13,p10
  705. { .mfi
  706. nop.m 999
  707. (p13) fadd.d f8 = log_T_plus_Nlog2, log_r2P_r
  708. nop.i 999
  709. }
  710. { .mfi
  711. nop.m 999
  712. (p10) fma.s1 log_log10_lo = log_inv_ln10, log_r2P_r,f0
  713. nop.i 999 ;;
  714. }
  715. { .mfi
  716. nop.m 999
  717. (p11) fma.d f8 = log_inv_ln10,f8,f0
  718. (p0) nop.i 999
  719. ;;
  720. }
  721. { .mfb
  722. nop.m 999
  723. (p10) fma.d f8 = log_log10_hi, f1, log_log10_lo
  724. (p0) br.ret.sptk b0
  725. ;;
  726. }
  727. LOG_NAN_ZERO_INF_PN:
  728. // qnan snan inf norm unorm 0 -+
  729. // 1 1 0 0 0 0 11 0xc3
  730. // 0 0 0 0 0 1 11 0x7
  731. // 0 0 1 1 1 0 10 0x3a
  732. // 0 0 1 0 0 0 01 0x21
  733. // Save x (f8) in f10
  734. { .mfi
  735. nop.m 999
  736. (p0) fmerge.s f10 = f8,f8
  737. nop.i 999
  738. }
  739. // p11 NAN
  740. // p12 means log(+inf)
  741. // Later p11, p12 used for frcpa
  742. // p8 p9 means ln(+-0) = -inf
  743. // p7 p10 means log(+-0) = -inf
  744. // p13 means ln(-)
  745. // p14 means log(-)
  746. // Log(+nan, -nan)
  747. // At this point we know it is quiet
  748. // So return
  749. { .mfi
  750. nop.m 999
  751. (p0) fclass.m.unc p11,p0 = f8, 0xc3
  752. nop.i 999
  753. }
  754. { .mfi
  755. nop.m 999
  756. (p0) fmerge.ns f6 = f1,f1
  757. nop.i 999 ;;
  758. }
  759. { .mfb
  760. nop.m 999
  761. (p0) fclass.m.unc p12,p0 = f8, 0x21
  762. nop.b 999 ;;
  763. }
  764. { .mbb
  765. nop.m 999
  766. (p11) br.ret.sptk b0
  767. (p12) br.ret.sptk b0 ;;
  768. }
  769. // We stay in ERROR RETURN.
  770. // Input is not a NAN
  771. // p9 means ln(+-0) = -inf
  772. // p10 means log(+-0) = -inf
  773. // Log(+-0) = -inf
  774. { .mfi
  775. nop.m 999
  776. (p8) fclass.m.unc p9,p0 = f10, 0x07
  777. nop.i 999
  778. }
  779. { .mfi
  780. nop.m 999
  781. (p7) fclass.m.unc p10,p0 = f10, 0x07
  782. nop.i 999 ;;
  783. }
  784. // p13 ln(-)
  785. // p14 log(-)
  786. // Log(-inf, -normal, -unnormal) = QNAN indefinite
  787. { .mfi
  788. nop.m 999
  789. (p8) fclass.m.unc p13,p0 = f10, 0x3a
  790. nop.i 999
  791. }
  792. { .mfi
  793. nop.m 999
  794. (p7) fclass.m.unc p14,p0 = f10, 0x3a
  795. nop.i 999 ;;
  796. }
  797. .pred.rel "mutex",p9,p10
  798. { .mmi
  799. (p9) mov log_GR_tag = 2
  800. (p10) mov log_GR_tag = 8
  801. nop.i 999 ;;
  802. }
  803. .pred.rel "mutex",p13,p14
  804. { .mmi
  805. (p13) mov log_GR_tag = 3
  806. (p14) mov log_GR_tag = 9
  807. nop.i 999 ;;
  808. }
  809. .pred.rel "mutex",p9,p10
  810. { .mfi
  811. nop.m 999
  812. (p9) frcpa f8,p11 = f6,f0
  813. nop.i 999
  814. }
  815. { .mfi
  816. nop.m 999
  817. (p10) frcpa f8,p12 = f6,f0
  818. nop.i 999 ;;
  819. }
  820. { .mfi
  821. nop.m 999
  822. (p13) frcpa f8,p11 = f0,f0
  823. nop.i 999
  824. }
  825. { .mfb
  826. nop.m 999
  827. (p14) frcpa f8,p12 = f0,f0
  828. br.cond.sptk __libm_error_region ;;
  829. }
  830. .endp log
  831. // Stack operations when calling error support.
  832. // (1) (2) (3) (call) (4)
  833. // sp -> + psp -> + psp -> + sp -> +
  834. // | | | |
  835. // | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
  836. // | | | |
  837. // | <-GR_Y Y2->| Y2 ->| <- GR_Y |
  838. // | | | |
  839. // | | <- GR_X X1 ->| |
  840. // | | | |
  841. // sp-64 -> + sp -> + sp -> + +
  842. // save ar.pfs save b0 restore gp
  843. // save gp restore ar.pfs
  844. .proc __libm_error_region
  845. __libm_error_region:
  846. .prologue
  847. // (1)
  848. { .mfi
  849. add GR_Parameter_Y=-32,sp // Parameter 2 value
  850. nop.f 0
  851. .save ar.pfs,GR_SAVE_PFS
  852. mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
  853. }
  854. { .mfi
  855. .fframe 64
  856. add sp=-64,sp // Create new stack
  857. nop.f 0
  858. mov GR_SAVE_GP=gp // Save gp
  859. };;
  860. // (2)
  861. { .mmi
  862. stfd [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack
  863. add GR_Parameter_X = 16,sp // Parameter 1 address
  864. .save b0, GR_SAVE_B0
  865. mov GR_SAVE_B0=b0 // Save b0
  866. };;
  867. .body
  868. // (3)
  869. { .mib
  870. stfd [GR_Parameter_X] = f10 // STORE Parameter 1 on stack
  871. add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
  872. nop.b 0
  873. }
  874. { .mib
  875. stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
  876. add GR_Parameter_Y = -16,GR_Parameter_Y
  877. br.call.sptk b0=__libm_error_support# // Call error handling function
  878. };;
  879. { .mmi
  880. nop.m 0
  881. nop.m 0
  882. add GR_Parameter_RESULT = 48,sp
  883. };;
  884. // (4)
  885. { .mmi
  886. ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
  887. .restore
  888. add sp = 64,sp // Restore stack pointer
  889. mov b0 = GR_SAVE_B0 // Restore return address
  890. };;
  891. { .mib
  892. mov gp = GR_SAVE_GP // Restore gp
  893. mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
  894. br.ret.sptk b0 // Return
  895. };;
  896. .endp __libm_error_region
  897. .type __libm_error_support#,@function
  898. .global __libm_error_support#