Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

546 lines
12 KiB

  1. .file "fmodf.s"
  2. // Copyright (c) 2000, 2001, Intel Corporation
  3. // All rights reserved.
  4. //
  5. // Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
  6. // Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
  7. // Software Lab, Intel Corporation.
  8. //
  9. // WARRANTY DISCLAIMER
  10. //
  11. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  12. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  13. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  14. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  15. // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  16. // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  17. // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  18. // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  19. // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  20. // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  21. // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  22. //
  23. // Intel Corporation is the author of this code, and requests that all
  24. // problem reports or change requests be submitted to it directly at
  25. // http://developer.intel.com/opensource.
  26. //
  27. // History
  28. //====================================================================
  29. // 2/02/00 Initial version
  30. // 3/02/00 New Algorithm
  31. // 4/04/00 Unwind support added
  32. // 8/15/00 Bundle added after call to __libm_error_support to properly
  33. // set [the previously overwritten] GR_Parameter_RESULT.
  34. //11/28/00 Set FR_Y to f9
  35. //
  36. // API
  37. //====================================================================
  38. // float fmodf(float,float);
  39. //
  40. // Overview of operation
  41. //====================================================================
  42. // fmod(a,b)=a-i*b,
  43. // where i is an integer such that, if b!=0,
  44. // |i|<|a/b| and |a/b-i|<1
  45. // Algorithm
  46. //====================================================================
  47. // a). if |a|<|b|, return a
  48. // b). get quotient and reciprocal overestimates accurate to
  49. // 33 bits (q2,y2)
  50. // c). if the exponent difference (exponent(a)-exponent(b))
  51. // is less than 32, truncate quotient to integer and
  52. // finish in one iteration
  53. // d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
  54. // round quotient estimate to single precision (k=RN(q2)),
  55. // calculate partial remainder (a'=a-k*b),
  56. // get quotient estimate (a'*y2), and repeat from c).
  57. // Special cases
  58. //====================================================================
  59. // b=+/-0: return NaN, call libm_error_support
  60. // a=+/-Inf, a=NaN or b=NaN: return NaN
  61. // Registers used
  62. //====================================================================
  63. // Predicate registers: p6-p11
  64. // General registers: r2,r29,r32 (ar.pfs), r33-r39
  65. // Floating point registers: f6-f15
  66. .section .text
  67. GR_SAVE_B0 = r33
  68. GR_SAVE_PFS = r34
  69. GR_SAVE_GP = r35
  70. GR_SAVE_SP = r36
  71. GR_Parameter_X = r37
  72. GR_Parameter_Y = r38
  73. GR_Parameter_RESULT = r39
  74. GR_Parameter_TAG = r40
  75. FR_X = f10
  76. FR_Y = f9
  77. FR_RESULT = f8
  78. .proc fmodf#
  79. .align 32
  80. .global fmodf#
  81. .align 32
  82. fmodf:
  83. // inputs in f8, f9
  84. // result in f8
  85. { .mfi
  86. alloc r32=ar.pfs,1,4,4,0
  87. // f6=|a|
  88. fmerge.s f6=f0,f8
  89. mov r2 = 0x0ffdd
  90. }
  91. {.mfi
  92. nop.m 0
  93. // f7=|b|
  94. fmerge.s f7=f0,f9
  95. nop.i 0;;
  96. }
  97. { .mfi
  98. setf.exp f11 = r2
  99. // (1) y0
  100. frcpa.s1 f10,p6=f6,f7
  101. nop.i 0
  102. }
  103. // eliminate special cases
  104. // Y +-NAN, +-inf, +-0? p7
  105. { .mfi
  106. nop.m 999
  107. fclass.m.unc p7,p0 = f9, 0xe7
  108. nop.i 999;;
  109. }
  110. // qnan snan inf norm unorm 0 -+
  111. // 1 1 1 0 0 0 11
  112. // e 3
  113. // X +-NAN, +-inf, ? p9
  114. { .mfi
  115. nop.m 999
  116. fclass.m.unc p9,p0 = f8, 0xe3
  117. nop.i 999
  118. }
  119. // |x| < |y|? Return x p8
  120. { .mfi
  121. nop.m 999
  122. fcmp.lt.unc.s1 p8,p0 = f6,f7
  123. nop.i 999 ;;
  124. }
  125. { .mfi
  126. nop.m 0
  127. // normalize y (if |x|<|y|)
  128. (p8) fma.s0 f9=f9,f1,f0
  129. nop.i 0;;
  130. }
  131. { .mfi
  132. mov r2=0x1001f
  133. // (2) q0=a*y0
  134. (p6) fma.s1 f13=f6,f10,f0
  135. nop.i 0
  136. }
  137. { .mfi
  138. nop.m 0
  139. // (3) e0 = 1 - b * y0
  140. (p6) fnma.s1 f12=f7,f10,f1
  141. nop.i 0;;
  142. }
  143. {.mfi
  144. nop.m 0
  145. // normalize x (if |x|<|y|)
  146. (p8) fma.s.s0 f8=f8,f1,f0
  147. nop.i 0
  148. }
  149. {.bbb
  150. (p9) br.cond.spnt FMOD_X_NAN_INF
  151. (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
  152. // if |x|<|y|, return
  153. (p8) br.ret.spnt b0;;
  154. }
  155. {.mfi
  156. nop.m 0
  157. // normalize x
  158. fma.s0 f6=f6,f1,f0
  159. nop.i 0
  160. }
  161. {.mfi
  162. nop.m 0
  163. // normalize y
  164. fma.s0 f7=f7,f1,f0
  165. nop.i 0;;
  166. }
  167. {.mfi
  168. // f15=2^32
  169. setf.exp f15=r2
  170. // (4) q1=q0+e0*q0
  171. (p6) fma.s1 f13=f12,f13,f13
  172. nop.i 0
  173. }
  174. { .mfi
  175. nop.m 0
  176. // (5) e1 = e0 * e0 + 2^-34
  177. (p6) fma.s1 f14=f12,f12,f11
  178. nop.i 0;;
  179. }
  180. {.mlx
  181. nop.m 0
  182. movl r2=0x33a00000;;
  183. }
  184. { .mfi
  185. nop.m 0
  186. // (6) y1 = y0 + e0 * y0
  187. (p6) fma.s1 f10=f12,f10,f10
  188. nop.i 0;;
  189. }
  190. {.mfi
  191. // set f12=1.25*2^{-24}
  192. setf.s f12=r2
  193. // (7) q2=q1+e1*q1
  194. (p6) fma.s1 f13=f13,f14,f13
  195. nop.i 0;;
  196. }
  197. {.mfi
  198. nop.m 0
  199. fmerge.s f9=f8,f9
  200. nop.i 0
  201. }
  202. { .mfi
  203. nop.m 0
  204. // (8) y2 = y1 + e1 * y1
  205. (p6) fma.s1 f10=f14,f10,f10
  206. // set p6=0, p10=0
  207. cmp.ne.and p6,p10=r0,r0;;
  208. }
  209. .align 32
  210. loop24:
  211. {.mfi
  212. nop.m 0
  213. // compare q2, 2^32
  214. fcmp.lt.unc.s1 p8,p7=f13,f15
  215. nop.i 0
  216. }
  217. {.mfi
  218. nop.m 0
  219. // will truncate quotient to integer, if exponent<32 (in advance)
  220. fcvt.fx.trunc.s1 f11=f13
  221. nop.i 0;;
  222. }
  223. {.mfi
  224. nop.m 0
  225. // if exponent>32, round quotient to single precision (perform in advance)
  226. fma.s.s1 f13=f13,f1,f0
  227. nop.i 0;;
  228. }
  229. {.mfi
  230. nop.m 0
  231. // set f12=sgn(a)
  232. (p8) fmerge.s f12=f8,f1
  233. nop.i 0
  234. }
  235. {.mfi
  236. nop.m 0
  237. // normalize truncated quotient
  238. (p8) fcvt.xf f13=f11
  239. nop.i 0;;
  240. }
  241. { .mfi
  242. nop.m 0
  243. // calculate remainder (assuming f13=RZ(Q))
  244. (p7) fnma.s1 f14=f13,f7,f6
  245. nop.i 0
  246. }
  247. {.mfi
  248. nop.m 0
  249. // also if exponent>32, round quotient to single precision
  250. // and subtract 1 ulp: q=q-q*(1.25*2^{-24})
  251. (p7) fnma.s.s1 f11=f13,f12,f13
  252. nop.i 0;;
  253. }
  254. {.mfi
  255. nop.m 0
  256. // (p8) calculate remainder (82-bit format)
  257. (p8) fnma.s1 f11=f13,f7,f6
  258. nop.i 0
  259. }
  260. {.mfi
  261. nop.m 0
  262. // (p7) calculate remainder (assuming f11=RZ(Q))
  263. (p7) fnma.s1 f6=f11,f7,f6
  264. nop.i 0;;
  265. }
  266. {.mfi
  267. nop.m 0
  268. // Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ?
  269. (p8) fcmp.lt.unc.s1 p6,p10=f11,f0
  270. nop.i 0;;
  271. }
  272. {.mfi
  273. nop.m 0
  274. // get new quotient estimation: a'*y2
  275. (p7) fma.s1 f13=f14,f10,f0
  276. nop.i 0
  277. }
  278. {.mfb
  279. nop.m 0
  280. // was f14=RZ(Q) ? (then new remainder f14>=0)
  281. (p7) fcmp.lt.unc.s1 p7,p9=f14,f0
  282. nop.b 0;;
  283. }
  284. .pred.rel "mutex",p6,p10
  285. {.mfb
  286. nop.m 0
  287. // add b to estimated remainder (to cover the case when the quotient was overestimated)
  288. // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
  289. (p6) fma.s.s0 f8=f11,f12,f9
  290. nop.b 0
  291. }
  292. {.mfb
  293. nop.m 0
  294. // calculate remainder (single precision)
  295. // set correct sign of result before returning
  296. (p10) fma.s.s0 f8=f11,f12,f0
  297. (p8) br.ret.sptk b0;;
  298. }
  299. {.mfi
  300. nop.m 0
  301. // if f13!=RZ(Q), get alternative quotient estimation: a''*y2
  302. (p7) fma.s1 f13=f6,f10,f0
  303. nop.i 0
  304. }
  305. {.mfb
  306. nop.m 0
  307. // if f14 was RZ(Q), set remainder to f14
  308. (p9) mov f6=f14
  309. br.cond.sptk loop24;;
  310. }
  311. { .mmb
  312. nop.m 0
  313. nop.m 0
  314. br.ret.sptk b0;;
  315. }
  316. FMOD_X_NAN_INF:
  317. // Y zero ?
  318. {.mfi
  319. nop.m 0
  320. fma.s1 f10=f9,f1,f0
  321. nop.i 0;;
  322. }
  323. {.mfi
  324. nop.m 0
  325. fcmp.eq.unc.s1 p11,p0=f10,f0
  326. nop.i 0;;
  327. }
  328. {.mib
  329. nop.m 0
  330. nop.i 0
  331. // if Y zero
  332. (p11) br.cond.spnt FMOD_Y_ZERO;;
  333. }
  334. // X infinity? Return QNAN indefinite
  335. { .mfi
  336. nop.m 999
  337. fclass.m.unc p8,p9 = f8, 0x23
  338. nop.i 999;;
  339. }
  340. // Y NaN ?
  341. {.mfi
  342. nop.m 999
  343. (p8) fclass.m p9,p8=f9,0xc3
  344. nop.i 0;;
  345. }
  346. {.mfi
  347. nop.m 999
  348. (p8) frcpa.s0 f8,p0 = f8,f8
  349. nop.i 0
  350. }
  351. { .mfi
  352. nop.m 999
  353. // also set Denormal flag if necessary
  354. (p8) fma.s0 f9=f9,f1,f0
  355. nop.i 999 ;;
  356. }
  357. { .mfb
  358. nop.m 999
  359. (p8) fma.s f8=f8,f1,f0
  360. nop.b 999 ;;
  361. }
  362. { .mfb
  363. nop.m 999
  364. (p9) frcpa.s0 f8,p7=f8,f9
  365. br.ret.sptk b0 ;;
  366. }
  367. FMOD_Y_NAN_INF_ZERO:
  368. // Y INF
  369. { .mfi
  370. nop.m 999
  371. fclass.m.unc p7,p0 = f9, 0x23
  372. nop.i 999 ;;
  373. }
  374. { .mfb
  375. nop.m 999
  376. (p7) fma.s f8=f8,f1,f0
  377. (p7) br.ret.spnt b0 ;;
  378. }
  379. // Y NAN?
  380. { .mfi
  381. nop.m 999
  382. fclass.m.unc p9,p0 = f9, 0xc3
  383. nop.i 999 ;;
  384. }
  385. { .mfb
  386. nop.m 999
  387. (p9) fma.s f8=f9,f1,f0
  388. (p9) br.ret.spnt b0 ;;
  389. }
  390. FMOD_Y_ZERO:
  391. // Y zero? Must be zero at this point
  392. // because it is the only choice left.
  393. // Return QNAN indefinite
  394. {.mfi
  395. nop.m 0
  396. // set Invalid
  397. frcpa f12,p0=f0,f0
  398. nop.i 999
  399. }
  400. // X NAN?
  401. { .mfi
  402. nop.m 999
  403. fclass.m.unc p9,p10 = f8, 0xc3
  404. nop.i 999 ;;
  405. }
  406. { .mfi
  407. nop.m 999
  408. (p10) fclass.nm p9,p10 = f8, 0xff
  409. nop.i 999 ;;
  410. }
  411. {.mfi
  412. nop.m 999
  413. (p9) frcpa f11,p7=f8,f0
  414. nop.i 0;;
  415. }
  416. { .mfi
  417. nop.m 999
  418. (p10) frcpa f11,p7 = f0,f0
  419. nop.i 999;;
  420. }
  421. { .mfi
  422. nop.m 999
  423. fmerge.s f10 = f8, f8
  424. nop.i 999
  425. }
  426. { .mfi
  427. nop.m 999
  428. fma.s f8=f11,f1,f0
  429. nop.i 999;;
  430. }
  431. EXP_ERROR_RETURN:
  432. { .mib
  433. nop.m 0
  434. mov GR_Parameter_TAG=122
  435. br.sptk __libm_error_region;;
  436. }
  437. .endp fmodf
  438. .proc __libm_error_region
  439. __libm_error_region:
  440. .prologue
  441. { .mfi
  442. add GR_Parameter_Y=-32,sp // Parameter 2 value
  443. nop.f 0
  444. .save ar.pfs,GR_SAVE_PFS
  445. mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
  446. }
  447. { .mfi
  448. .fframe 64
  449. add sp=-64,sp // Create new stack
  450. nop.f 0
  451. mov GR_SAVE_GP=gp // Save gp
  452. };;
  453. { .mmi
  454. stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
  455. add GR_Parameter_X = 16,sp // Parameter 1 address
  456. .save b0, GR_SAVE_B0
  457. mov GR_SAVE_B0=b0 // Save b0
  458. };;
  459. .body
  460. { .mib
  461. stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
  462. add GR_Parameter_RESULT = 0,GR_Parameter_Y
  463. nop.b 0 // Parameter 3 address
  464. }
  465. { .mib
  466. stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
  467. add GR_Parameter_Y = -16,GR_Parameter_Y
  468. br.call.sptk b0=__libm_error_support#;; // Call error handling function
  469. }
  470. { .mmi
  471. nop.m 0
  472. nop.m 0
  473. add GR_Parameter_RESULT = 48,sp
  474. };;
  475. { .mmi
  476. ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
  477. .restore
  478. add sp = 64,sp // Restore stack pointer
  479. mov b0 = GR_SAVE_B0 // Restore return address
  480. };;
  481. { .mib
  482. mov gp = GR_SAVE_GP // Restore gp
  483. mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
  484. br.ret.sptk b0 // Return
  485. };;
  486. .endp __libm_error_region
  487. .type __libm_error_support#,@function
  488. .global __libm_error_support#