Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

298 lines
7.4 KiB

  1. .file "logbf.s"
  2. // Copyright (c) 2000, Intel Corporation
  3. // All rights reserved.
  4. //
  5. // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
  6. // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
  7. //
  8. // WARRANTY DISCLAIMER
  9. //
  10. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  11. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  12. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  13. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  14. // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  15. // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  16. // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  17. // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  18. // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  19. // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  20. // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  21. //
  22. // Intel Corporation is the author of this code, and requests that all
  23. // problem reports or change requests be submitted to it directly at
  24. // http://developer.intel.com/opensource.
  25. //
  26. // History
  27. //==============================================================
  28. // 2/02/00 Initial version
  29. // 2/16/00 Modified to conform to C9X
  30. // 3/16/00 Improved speed
  31. // 4/04/00 Unwind support added
  32. // 5/30/00 Fixed bug when x double-extended denormal
  33. // 8/15/00 Bundle added after call to __libm_error_support to properly
  34. // set [the previously overwritten] GR_Parameter_RESULT.
  35. //
  36. // API
  37. //==============================================================
  38. // float _logbf( float x);
  39. //
  40. // Overview of operation
  41. //==============================================================
  42. // The logbf function extracts the exponent of x as an integer in
  43. // floating-point format.
  44. // logbf computes log2 of x as a float
  45. // logbf is similar to ilogbf but differs in the following ways:
  46. // +-inf
  47. // ilogbf: returns INT_MAX
  48. // logbf: returns +inf
  49. // Nan returns FP_LOGBNAN (which is either INT_MAX or INT_MIN)
  50. // ilogbf: returns INT_MAX (7fffffff)
  51. // logbf: returns QNAN (quietized SNAN)
  52. // 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
  53. // ilogbf: returns -INT_MAX (80000001)
  54. // logbf: returns -inf, raises the divide-by-zero exception,
  55. // and calls libm_error_support to set domain error
  56. //
  57. // Registers used
  58. //==============================================================
  59. // general registers used:
  60. // ar.pfs r32
  61. // r33 -> r37
  62. // r38 -> r41 used as parameters to error path
  63. //
  64. // predicate registers used:
  65. // p6, p7, p8
  66. //
  67. // floating-point registers used:
  68. // f9, f10, f11
  69. // f8, input
  70. GR_SAVE_B0 = r34
  71. // r40 is address of table of coefficients
  72. GR_SAVE_PFS = r32
  73. GR_SAVE_GP = r35
  74. GR_Parameter_X = r38
  75. GR_Parameter_Y = r39
  76. GR_Parameter_RESULT = r40
  77. GR_Parameter_TAG = r41
  78. FR_X = f8
  79. FR_Y = f0
  80. FR_RESULT = f10
  81. .align 32
  82. .global _logbf#
  83. .section .text
  84. .proc _logbf#
  85. .align 32
  86. _logbf:
  87. // qnan snan inf norm unorm 0 -+
  88. // 0 0 0 0 1 0 11
  89. // 0 b
  90. { .mfi
  91. alloc r32=ar.pfs,1,5,4,0
  92. (p0) fclass.m.unc p8,p0 = f8, 0x0b
  93. nop.i 999
  94. }
  95. // X NORMAL
  96. // r37 = exp(f8) - - 0xffff
  97. // sig(f8) = r37
  98. // f8 = convert_to_fp (sig))
  99. { .mfi
  100. (p0) getf.exp r35 = f8
  101. (p0) fnorm f10=f8
  102. nop.i 999 ;;
  103. }
  104. // qnan snan inf norm unorm 0 -+
  105. // 1 1 1 0 0 0 11
  106. // e 3
  107. { .mmf
  108. (p0) mov r33 = 0xffff
  109. (p0) mov r34 = 0x1ffff
  110. (p0) fclass.m.unc p6,p0 = f8, 0xe3 ;;
  111. }
  112. { .mfb
  113. (p0) and r36 = r35, r34
  114. (p0) fclass.m.unc p7,p0 = f8, 0x07
  115. (p8) br.cond.spnt LOGB_DENORM ;;
  116. }
  117. { .mib
  118. (p0) sub r37 = r36, r33
  119. nop.i 999
  120. (p6) br.cond.spnt LOGB_NAN_INF ;;
  121. }
  122. { .mib
  123. (p0) setf.sig f9 = r37
  124. nop.i 999
  125. (p7) br.cond.spnt LOGB_ZERO ;;
  126. }
  127. { .mfi
  128. nop.m 999
  129. (p0) fcvt.xf f10 = f9
  130. nop.i 999 ;;
  131. }
  132. { .mfb
  133. nop.m 999
  134. (p0) fnorm.s f8 = f10
  135. (p0) br.ret.sptk b0 ;;
  136. }
  137. LOGB_DENORM:
  138. // Form signexp of 2^64 in case need to scale denormal
  139. // Check to see if double-extended denormal
  140. { .mfi
  141. (p0) mov r38 = 0x1003f
  142. (p0) fclass.m.unc p8,p0 = f10, 0x0b
  143. nop.i 999 ;;
  144. }
  145. // Form 2^64 in case need to scale denormal
  146. { .mfi
  147. (p0) setf.exp f11 = r38
  148. nop.f 999
  149. nop.i 999 ;;
  150. }
  151. // If double-extended denormal add 64 to exponent bias for scaling
  152. // If double-extended denormal form x * 2^64 which is normal
  153. { .mfi
  154. (p8) add r33 = 64, r33
  155. (p8) fmpy f10 = f10, f11
  156. nop.i 999 ;;
  157. }
  158. // Logic is the same as normal path but use normalized input
  159. { .mmi
  160. (p0) getf.exp r35 = f10 ;;
  161. nop.m 999
  162. nop.i 999 ;;
  163. }
  164. { .mmi
  165. (p0) and r36 = r35, r34 ;;
  166. (p0) sub r37 = r36, r33
  167. nop.i 999 ;;
  168. }
  169. { .mmi
  170. (p0) setf.sig f9 = r37
  171. nop.m 999
  172. nop.i 999 ;;
  173. }
  174. { .mfi
  175. nop.m 999
  176. (p0) fcvt.xf f10 = f9
  177. nop.i 999 ;;
  178. }
  179. { .mfb
  180. nop.m 999
  181. (p0) fnorm.s f8 = f10
  182. (p0) br.ret.sptk b0 ;;
  183. }
  184. LOGB_NAN_INF:
  185. // X NAN or INFINITY, return f8 * f8
  186. { .mfb
  187. nop.m 999
  188. (p0) fma.s f8= f8,f8,f0
  189. (p0) br.ret.sptk b0 ;;
  190. }
  191. LOGB_ZERO:
  192. // X ZERO
  193. // return -1.0/fabs(f8)=-inf, set divide-by-zero flag, call error support
  194. { .mfi
  195. nop.m 999
  196. (p0) fmerge.s f9 = f0,f8
  197. nop.i 999 ;;
  198. }
  199. { .mfi
  200. nop.m 999
  201. (p0) fmerge.ns f10 = f0,f9
  202. nop.i 999 ;;
  203. }
  204. { .mfi
  205. nop.m 999
  206. (p0) frcpa f10,p6 = f1,f10
  207. nop.i 999 ;;
  208. }
  209. .endp _logbf
  210. .proc __libm_error_region
  211. __libm_error_region:
  212. .prologue
  213. { .mii
  214. add GR_Parameter_Y=-32,sp // Parameter 2 value
  215. (p0) mov GR_Parameter_TAG = 152
  216. .save ar.pfs,GR_SAVE_PFS
  217. mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
  218. }
  219. { .mfi
  220. .fframe 64
  221. add sp=-64,sp // Create new stack
  222. nop.f 0
  223. mov GR_SAVE_GP=gp // Save gp
  224. };;
  225. { .mmi
  226. stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
  227. add GR_Parameter_X = 16,sp // Parameter 1 address
  228. .save b0, GR_SAVE_B0
  229. mov GR_SAVE_B0=b0 // Save b0
  230. };;
  231. .body
  232. { .mib
  233. stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
  234. add GR_Parameter_RESULT = 0,GR_Parameter_Y
  235. nop.b 0 // Parameter 3 address
  236. }
  237. { .mib
  238. stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
  239. add GR_Parameter_Y = -16,GR_Parameter_Y
  240. br.call.sptk b0=__libm_error_support# // Call error handling function
  241. };;
  242. { .mmi
  243. nop.m 0
  244. nop.m 0
  245. add GR_Parameter_RESULT = 48,sp
  246. };;
  247. { .mmi
  248. ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
  249. .restore
  250. add sp = 64,sp // Restore stack pointer
  251. mov b0 = GR_SAVE_B0 // Restore return address
  252. };;
  253. { .mib
  254. mov gp = GR_SAVE_GP // Restore gp
  255. mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
  256. br.ret.sptk b0 // Return
  257. };;
  258. .endp __libm_error_region
  259. .type __libm_error_support#,@function
  260. .global __libm_error_support#