Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

532 lines
11 KiB

  1. .file "fmod.s"
  2. // Copyright (c) 2000, Intel Corporation
  3. // All rights reserved.
  4. //
  5. // Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
  6. // Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
  7. // Software Lab, Intel Corporation.
  8. //
  9. // WARRANTY DISCLAIMER
  10. //
  11. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  12. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  13. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  14. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  15. // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  16. // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  17. // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  18. // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  19. // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  20. // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  21. // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  22. //
  23. // Intel Corporation is the author of this code, and requests that all
  24. // problem reports or change requests be submitted to it directly at
  25. // http://developer.intel.com/opensource.
  26. //
  27. // History
  28. //====================================================================
  29. // 2/02/00 Initial version
  30. // 3/02/00 New Algorithm
  31. // 4/04/00 Unwind support added
  32. // 8/15/00 Bundle added after call to __libm_error_support to properly
  33. // set [the previously overwritten] GR_Parameter_RESULT.
  34. //
  35. // API
  36. //====================================================================
  37. // double fmod(double,double);
  38. //
  39. // Overview of operation
  40. //====================================================================
  41. // fmod(a,b)=a-i*b,
  42. // where i is an integer such that, if b!=0,
  43. // |i|<|a/b| and |a/b-i|<1
  44. //
  45. // Algorithm
  46. //====================================================================
  47. // a). if |a|<|b|, return a
  48. // b). get quotient and reciprocal overestimates accurate to
  49. // 33 bits (q2,y2)
  50. // c). if the exponent difference (exponent(a)-exponent(b))
  51. // is less than 32, truncate quotient to integer and
  52. // finish in one iteration
  53. // d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
  54. // round quotient estimate to single precision (k=RN(q2)),
  55. // calculate partial remainder (a'=a-k*b),
  56. // get quotient estimate (a'*y2), and repeat from c).
  57. //
  58. // Special cases
  59. //====================================================================
  60. // b=+/-0: return NaN, call libm_error_support
  61. // a=+/-Inf, a=NaN or b=NaN: return NaN
  62. //
  63. // Registers used
  64. //====================================================================
  65. // Predicate registers: p6-p11
  66. // General registers: r2,r29,r32 (ar.pfs), r33-r39
  67. // Floating point registers: f6-f15
  68. .section .text
  69. GR_SAVE_B0 = r33
  70. GR_SAVE_PFS = r34
  71. GR_SAVE_GP = r35
  72. GR_SAVE_SP = r36
  73. GR_Parameter_X = r37
  74. GR_Parameter_Y = r38
  75. GR_Parameter_RESULT = r39
  76. GR_Parameter_TAG = r40
  77. FR_X = f10
  78. FR_Y = f9
  79. FR_RESULT = f8
  80. .proc fmod#
  81. .align 32
  82. .global fmod#
  83. .align 32
  84. fmod:
  85. // inputs in f8, f9
  86. // result in f8
  87. { .mfi
  88. alloc r32=ar.pfs,1,4,4,0
  89. // f6=|a|
  90. fmerge.s f6=f0,f8
  91. mov r2 = 0x0ffdd
  92. }
  93. {.mfi
  94. nop.m 0
  95. // f7=|b|
  96. fmerge.s f7=f0,f9
  97. nop.i 0;;
  98. }
  99. { .mfi
  100. setf.exp f11 = r2
  101. // (1) y0
  102. frcpa.s1 f10,p6=f6,f7
  103. nop.i 0
  104. }
  105. // Y +-NAN, +-inf, +-0? p7
  106. { .mfi
  107. nop.m 999
  108. (p0) fclass.m.unc p7,p0 = f9, 0xe7
  109. nop.i 999;;
  110. }
  111. // qnan snan inf norm unorm 0 -+
  112. // 1 1 1 0 0 0 11
  113. // e 3
  114. // X +-NAN, +-inf, ? p9
  115. { .mfi
  116. nop.m 999
  117. (p0) fclass.m.unc p9,p0 = f8, 0xe3
  118. nop.i 999
  119. }
  120. // |x| < |y|? Return x p8
  121. { .mfi
  122. nop.m 999
  123. (p0) fcmp.lt.unc.s1 p8,p0 = f6,f7
  124. nop.i 999 ;;
  125. }
  126. { .mfi
  127. nop.m 0
  128. // normalize y (if |x|<|y|)
  129. (p8) fma.s0 f9=f9,f1,f0
  130. nop.i 0;;
  131. }
  132. { .mfi
  133. mov r2=0x1001f
  134. // (2) q0=a*y0
  135. (p6) fma.s1 f13=f6,f10,f0
  136. nop.i 0
  137. }
  138. { .mfi
  139. nop.m 0
  140. // (3) e0 = 1 - b * y0
  141. (p6) fnma.s1 f12=f7,f10,f1
  142. nop.i 0;;
  143. }
  144. {.mfi
  145. nop.m 0
  146. // normalize x (if |x|<|y|)
  147. (p8) fma.d.s0 f8=f8,f1,f0
  148. nop.i 0
  149. }
  150. {.bbb
  151. (p9) br.cond.spnt FMOD_X_NAN_INF
  152. (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
  153. // if |x|<|y|, return
  154. (p8) br.ret.spnt b0;;
  155. }
  156. {.mfi
  157. nop.m 0
  158. // normalize x
  159. fma.s0 f6=f6,f1,f0
  160. nop.i 0
  161. }
  162. {.mfi
  163. nop.m 0
  164. // normalize y
  165. fma.s0 f7=f7,f1,f0
  166. nop.i 0;;
  167. }
  168. {.mfi
  169. // f15=2^32
  170. setf.exp f15=r2
  171. // (4) q1=q0+e0*q0
  172. (p6) fma.s1 f13=f12,f13,f13
  173. nop.i 0
  174. }
  175. { .mfi
  176. nop.m 0
  177. // (5) e1 = e0 * e0 + 2^-34
  178. (p6) fma.s1 f14=f12,f12,f11
  179. nop.i 0;;
  180. }
  181. {.mlx
  182. nop.m 0
  183. movl r2=0x33a00000;;
  184. }
  185. { .mfi
  186. nop.m 0
  187. // (6) y1 = y0 + e0 * y0
  188. (p6) fma.s1 f10=f12,f10,f10
  189. nop.i 0;;
  190. }
  191. {.mfi
  192. // set f12=1.25*2^{-24}
  193. setf.s f12=r2
  194. // (7) q2=q1+e1*q1
  195. (p6) fma.s1 f13=f13,f14,f13
  196. nop.i 0;;
  197. }
  198. {.mfi
  199. nop.m 0
  200. fmerge.s f9=f8,f9
  201. nop.i 0
  202. }
  203. { .mfi
  204. nop.m 0
  205. // (8) y2 = y1 + e1 * y1
  206. (p6) fma.s1 f10=f14,f10,f10
  207. // set p6=0, p10=0
  208. cmp.ne.and p6,p10=r0,r0;;
  209. }
  210. .align 32
  211. loop53:
  212. {.mfi
  213. nop.m 0
  214. // compare q2, 2^32
  215. fcmp.lt.unc.s1 p8,p7=f13,f15
  216. nop.i 0
  217. }
  218. {.mfi
  219. nop.m 0
  220. // will truncate quotient to integer, if exponent<32 (in advance)
  221. fcvt.fx.trunc.s1 f11=f13
  222. nop.i 0;;
  223. }
  224. {.mfi
  225. nop.m 0
  226. // if exponent>32, round quotient to single precision (perform in advance)
  227. fma.s.s1 f13=f13,f1,f0
  228. nop.i 0;;
  229. }
  230. {.mfi
  231. nop.m 0
  232. // set f12=sgn(a)
  233. (p8) fmerge.s f12=f8,f1
  234. nop.i 0
  235. }
  236. {.mfi
  237. nop.m 0
  238. // normalize truncated quotient
  239. (p8) fcvt.xf f13=f11
  240. nop.i 0;;
  241. }
  242. { .mfi
  243. nop.m 0
  244. // calculate remainder (assuming f13=RZ(Q))
  245. (p7) fnma.s1 f14=f13,f7,f6
  246. nop.i 0
  247. }
  248. {.mfi
  249. nop.m 0
  250. // also if exponent>32, round quotient to single precision
  251. // and subtract 1 ulp: q=q-q*(1.25*2^{-24})
  252. (p7) fnma.s.s1 f11=f13,f12,f13
  253. nop.i 0;;
  254. }
  255. {.mfi
  256. nop.m 0
  257. // (p8) calculate remainder (82-bit format)
  258. (p8) fnma.s1 f11=f13,f7,f6
  259. nop.i 0
  260. }
  261. {.mfi
  262. nop.m 0
  263. // (p7) calculate remainder (assuming f11=RZ(Q))
  264. (p7) fnma.s1 f6=f11,f7,f6
  265. nop.i 0;;
  266. }
  267. {.mfi
  268. nop.m 0
  269. // Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ?
  270. (p8) fcmp.lt.unc.s1 p6,p10=f11,f0
  271. nop.i 0;;
  272. }
  273. {.mfi
  274. nop.m 0
  275. // get new quotient estimation: a'*y2
  276. (p7) fma.s1 f13=f14,f10,f0
  277. nop.i 0
  278. }
  279. {.mfb
  280. nop.m 0
  281. // was f14=RZ(Q) ? (then new remainder f14>=0)
  282. (p7) fcmp.lt.unc.s1 p7,p9=f14,f0
  283. nop.b 0;;
  284. }
  285. .pred.rel "mutex",p6,p10
  286. {.mfb
  287. nop.m 0
  288. // add b to estimated remainder (to cover the case when the quotient was overestimated)
  289. // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
  290. (p6) fma.d.s0 f8=f11,f12,f9
  291. nop.b 0
  292. }
  293. {.mfb
  294. nop.m 0
  295. // calculate remainder (single precision)
  296. // set correct sign of result before returning
  297. (p10) fma.d.s0 f8=f11,f12,f0
  298. (p8) br.ret.sptk b0;;
  299. }
  300. {.mfi
  301. nop.m 0
  302. // if f13!=RZ(Q), get alternative quotient estimation: a''*y2
  303. (p7) fma.s1 f13=f6,f10,f0
  304. nop.i 0
  305. }
  306. {.mfb
  307. nop.m 0
  308. // if f14 was RZ(Q), set remainder to f14
  309. (p9) mov f6=f14
  310. br.cond.sptk loop53;;
  311. }
  312. FMOD_X_NAN_INF:
  313. // Y zero ?
  314. {.mfi
  315. nop.m 0
  316. fma.s1 f10=f9,f1,f0
  317. nop.i 0;;
  318. }
  319. {.mfi
  320. nop.m 0
  321. fcmp.eq.unc.s1 p11,p0=f10,f0
  322. nop.i 0;;
  323. }
  324. {.mib
  325. nop.m 0
  326. nop.i 0
  327. // if Y zero
  328. (p11) br.cond.spnt FMOD_Y_ZERO;;
  329. }
  330. // X infinity? Return QNAN indefinite
  331. { .mfi
  332. nop.m 999
  333. (p0) fclass.m.unc p8,p9 = f8, 0x23
  334. nop.i 999;;
  335. }
  336. // Y NaN ?
  337. {.mfi
  338. nop.m 999
  339. (p8) fclass.m p9,p8=f9,0xc3
  340. nop.i 0;;
  341. }
  342. {.mfi
  343. nop.m 999
  344. (p8) frcpa.s0 f8,p0 = f8,f8
  345. nop.i 0
  346. }
  347. { .mfi
  348. nop.m 999
  349. // also set Denormal flag if necessary
  350. (p8) fma.s0 f9=f9,f1,f0
  351. nop.i 999 ;;
  352. }
  353. { .mfb
  354. nop.m 999
  355. (p8) fma.d f8=f8,f1,f0
  356. nop.b 999 ;;
  357. }
  358. { .mfb
  359. nop.m 999
  360. (p9) frcpa.s0 f8,p7=f8,f9
  361. br.ret.sptk b0 ;;
  362. }
  363. FMOD_Y_NAN_INF_ZERO:
  364. // Y INF
  365. { .mfi
  366. nop.m 999
  367. (p0) fclass.m.unc p7,p0 = f9, 0x23
  368. nop.i 999 ;;
  369. }
  370. { .mfb
  371. nop.m 999
  372. (p7) fma.d f8=f8,f1,f0
  373. (p7) br.ret.spnt b0 ;;
  374. }
  375. // Y NAN?
  376. { .mfi
  377. nop.m 999
  378. (p0) fclass.m.unc p9,p0 = f9, 0xc3
  379. nop.i 999 ;;
  380. }
  381. { .mfb
  382. nop.m 999
  383. (p9) fma.d f8=f9,f1,f0
  384. (p9) br.ret.spnt b0 ;;
  385. }
  386. FMOD_Y_ZERO:
  387. // Y zero? Must be zero at this point
  388. // because it is the only choice left.
  389. // Return QNAN indefinite
  390. {.mfi
  391. nop.m 0
  392. // set Invalid
  393. frcpa f12,p0=f0,f0
  394. nop.i 0
  395. }
  396. // X NAN?
  397. { .mfi
  398. nop.m 999
  399. (p0) fclass.m.unc p9,p10 = f8, 0xc3
  400. nop.i 999 ;;
  401. }
  402. { .mfi
  403. nop.m 999
  404. (p10) fclass.nm p9,p10 = f8, 0xff
  405. nop.i 999 ;;
  406. }
  407. {.mfi
  408. nop.m 999
  409. (p9) frcpa f11,p7=f8,f0
  410. nop.i 0;;
  411. }
  412. { .mfi
  413. nop.m 999
  414. (p10) frcpa f11,p7 = f9,f9
  415. (p0) mov GR_Parameter_TAG = 121 ;;
  416. }
  417. { .mfi
  418. nop.m 999
  419. (p0) fmerge.s f10 = f8, f8
  420. nop.i 999
  421. }
  422. { .mfb
  423. nop.m 999
  424. (p0) fma.d f8=f11,f1,f0
  425. (p0) br.sptk __libm_error_region;;
  426. }
  427. .endp fmod
  428. .proc __libm_error_region
  429. __libm_error_region:
  430. .prologue
  431. { .mfi
  432. add GR_Parameter_Y=-32,sp // Parameter 2 value
  433. nop.f 0
  434. .save ar.pfs,GR_SAVE_PFS
  435. mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
  436. }
  437. { .mfi
  438. .fframe 64
  439. add sp=-64,sp // Create new stack
  440. nop.f 0
  441. mov GR_SAVE_GP=gp // Save gp
  442. };;
  443. { .mmi
  444. stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
  445. add GR_Parameter_X = 16,sp // Parameter 1 address
  446. .save b0, GR_SAVE_B0
  447. mov GR_SAVE_B0=b0 // Save b0
  448. };;
  449. .body
  450. { .mib
  451. stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
  452. add GR_Parameter_RESULT = 0,GR_Parameter_Y
  453. nop.b 0 // Parameter 3 address
  454. }
  455. { .mib
  456. stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
  457. add GR_Parameter_Y = -16,GR_Parameter_Y
  458. br.call.sptk b0=__libm_error_support# // Call error handling function
  459. };;
  460. { .mmi
  461. nop.m 0
  462. nop.m 0
  463. add GR_Parameter_RESULT = 48,sp
  464. };;
  465. { .mmi
  466. ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
  467. .restore
  468. add sp = 64,sp // Restore stack pointer
  469. mov b0 = GR_SAVE_B0 // Restore return address
  470. };;
  471. { .mib
  472. mov gp = GR_SAVE_GP // Restore gp
  473. mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
  474. br.ret.sptk b0 // Return
  475. };;
  476. .endp __libm_error_region
  477. .type __libm_error_support#,@function
  478. .global __libm_error_support#