Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1659 lines
46 KiB

  1. title adj_fdiv - routines to compensate for incorrect Pentium FDIV
  2. ;***
  3. ;adj_fdiv - routines to compensate for incorrect Pentium FDIV
  4. ;
  5. ; Copyright (c) 1994-2001, Microsoft Corporation. All rights reserved.
  6. ;
  7. ;Purpose:
  8. ; Workarounds to correct for broken FDIV
  9. ;
  10. ;Revision History:
  11. ;
  12. ; 12/06/94 Jamie MacCalman
  13. ; initial version, based on Intel fix
  14. ; 12/09/94 Jamie MacCalman
  15. ; added _adj_fpremX & _safe_fdivX entry points
  16. ; 12/13/94 Jamie MacCalman
  17. ; upgraded to V.3 of Intel's workarounds
  18. ; 12/19/94 Jamie MacCalman
  19. ; upgraded to V.4 of Intel's workarounds
  20. ; 12/27/94 Jamie MacCalman
  21. ; upgraded to V.5 (aka "V1.0") of Intel's workarounds
  22. ; 1/13/95 Jamie MacCalman
  23. ; added underscores to fdivp_sti_st & fdivrp_sti_st for ANSI conformance
  24. ;
  25. ; The following code is a PRELIMINARY IMPLEMENTATION of a
  26. ; software patch for the floating point divide instructions.
  27. ;
  28. ;
  29. include cruntime.inc
  30. include mrt386.inc
  31. include elem87.inc
  32. ;
  33. ; Stack variables for divide routines.
  34. ;
  35. DENOM EQU 0
  36. NUMER EQU 12
  37. PREV_CW EQU 28
  38. PATCH_CW EQU 32
  39. DENOM_SAVE EQU 32
  40. MAIN_DENOM EQU 4
  41. MAIN_NUMER EQU 16
  42. SPILL_SIZE EQU 12
  43. MEM_OPERAND EQU 8
  44. STACK_SIZE EQU 44
  45. SPILL_MEM_OPERAND EQU 20
  46. ONESMASK EQU 0e000000h
  47. SINGLE_NAN EQU 07f800000h
  48. DOUBLE_NAN EQU 07ff00000h
  49. ILLEGAL_OPC EQU 6
  50. ;
  51. ; FPREM constants
  52. ;
  53. FPREM_FLT_SIZE EQU 12
  54. FPREM_DENOM EQU 0
  55. FPREM_DENOM_SAVE EQU FPREM_DENOM + FPREM_FLT_SIZE
  56. FPREM_NUMER EQU FPREM_DENOM_SAVE + FPREM_FLT_SIZE
  57. FPREM_PREV_CW EQU FPREM_NUMER + FPREM_FLT_SIZE
  58. FPREM_PATCH_CW EQU FPREM_PREV_CW + 4
  59. FPREM_SW EQU FPREM_PATCH_CW + 4
  60. FPREM_STACK_SIZE EQU FPREM_SW + 4
  61. FPREM_RET_SIZE EQU 4
  62. FPREM_PUSH_SIZE EQU 4
  63. FPREM_MAIN_FUDGE EQU FPREM_RET_SIZE + FPREM_PUSH_SIZE + FPREM_PUSH_SIZE + FPREM_PUSH_SIZE
  64. FPREM_MAIN_DENOM EQU FPREM_DENOM + FPREM_MAIN_FUDGE
  65. FPREM_MAIN_DENOM_SAVE EQU FPREM_DENOM_SAVE + FPREM_MAIN_FUDGE
  66. FPREM_MAIN_NUMER EQU FPREM_NUMER + FPREM_MAIN_FUDGE
  67. FPREM_MAIN_PREV_CW EQU FPREM_PREV_CW + FPREM_MAIN_FUDGE
  68. FPREM_MAIN_PATCH_CW EQU FPREM_PATCH_CW + FPREM_MAIN_FUDGE
  69. FPREM_MAIN_FPREM_SW EQU FPREM_SW + FPREM_MAIN_FUDGE
  70. FPREM_ONESMASK EQU 700h
  71. .data
  72. fdiv_risc_table DB 0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0
  73. fdiv_scale_1 DD 03f700000h ;0.9375
  74. fdiv_scale_2 DD 03f880000h ;1.0625
  75. one_shl_63 DD 05f000000h
  76. fprem_risc_table DB 0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0
  77. fprem_scale DB 0, 0, 0, 0, 0, 0, 0eeh, 03fh
  78. one_shl_64 DB 0, 0, 0, 0, 0, 0, 0f0h, 043h
  79. one_shr_64 DB 0, 0, 0, 0, 0, 0, 0f0h, 03bh
  80. one DB 0, 0, 0, 0, 0, 0, 0f0h, 03fh
  81. half DB 0, 0, 0, 0, 0, 0, 0e0h, 03fh
  82. big_number DB 0, 0, 0, 0, 0, 0, 0ffh, 0ffh, 0feh, 07fh
  83. ifdef DEBUG
  84. public fpcw
  85. public fpsw
  86. fpcw dw 0
  87. fpsw dw 0
  88. endif
  89. FPU_STATE STRUC
  90. CONTROL_WORD DW ?
  91. reserved_1 DW ?
  92. STATUS_WORD DD ?
  93. TAG_WORD DW ?
  94. reserved_3 DW ?
  95. IP_OFFSET DD ?
  96. CS_SLCT DW ?
  97. OPCODE DW ?
  98. DATA_OFFSET DD ?
  99. OPERAND_SLCT DW ?
  100. reserved_4 DW ?
  101. FPU_STATE ENDS
  102. ENV_SIZE EQU 28
  103. dispatch_table DD offset FLAT:label0
  104. DD offset FLAT:label1
  105. DD offset FLAT:label2
  106. DD offset FLAT:label3
  107. DD offset FLAT:label4
  108. DD offset FLAT:label5
  109. DD offset FLAT:label6
  110. DD offset FLAT:label7
  111. DD offset FLAT:label8
  112. DD offset FLAT:label9
  113. DD offset FLAT:label10
  114. DD offset FLAT:label11
  115. DD offset FLAT:label12
  116. DD offset FLAT:label13
  117. DD offset FLAT:label14
  118. DD offset FLAT:label15
  119. DD offset FLAT:label16
  120. DD offset FLAT:label17
  121. DD offset FLAT:label18
  122. DD offset FLAT:label19
  123. DD offset FLAT:label20
  124. DD offset FLAT:label21
  125. DD offset FLAT:label22
  126. DD offset FLAT:label23
  127. DD offset FLAT:label24
  128. DD offset FLAT:label25
  129. DD offset FLAT:label26
  130. DD offset FLAT:label27
  131. DD offset FLAT:label28
  132. DD offset FLAT:label29
  133. DD offset FLAT:label30
  134. DD offset FLAT:label31
  135. DD offset FLAT:label32
  136. DD offset FLAT:label33
  137. DD offset FLAT:label34
  138. DD offset FLAT:label35
  139. DD offset FLAT:label36
  140. DD offset FLAT:label37
  141. DD offset FLAT:label38
  142. DD offset FLAT:label39
  143. DD offset FLAT:label40
  144. DD offset FLAT:label41
  145. DD offset FLAT:label42
  146. DD offset FLAT:label43
  147. DD offset FLAT:label44
  148. DD offset FLAT:label45
  149. DD offset FLAT:label46
  150. DD offset FLAT:label47
  151. DD offset FLAT:label48
  152. DD offset FLAT:label49
  153. DD offset FLAT:label50
  154. DD offset FLAT:label51
  155. DD offset FLAT:label52
  156. DD offset FLAT:label53
  157. DD offset FLAT:label54
  158. DD offset FLAT:label55
  159. DD offset FLAT:label56
  160. DD offset FLAT:label57
  161. DD offset FLAT:label58
  162. DD offset FLAT:label59
  163. DD offset FLAT:label60
  164. DD offset FLAT:label61
  165. DD offset FLAT:label62
  166. DD offset FLAT:label63
  167. fpcw dw 0
  168. CODESEG
  169. ;
  170. ; PRELIMINARY VERSION for register-register divides.
  171. ;
  172. ; In this implementation the
  173. ; fdiv_main_routine is called,
  174. ; therefore all the stack frame
  175. ; locations are adjusted for the
  176. ; return pointer.
  177. fdiv_main_routine PROC NEAR
  178. fld tbyte ptr [esp+MAIN_NUMER] ; load the numerator
  179. fld tbyte ptr [esp+MAIN_DENOM] ; load the denominator
  180. retry:
  181. ; The following three lines test for denormals and zeros.
  182. ; A denormal or zero has a 0 in the explicit digit to the left of the
  183. ; binary point. Since that bit is the high bit of the word, adding
  184. ; it to itself will produce a carry if and only if the number is not
  185. ; denormal or zero.
  186. ;
  187. mov eax, [esp+MAIN_DENOM+4] ; get mantissa bits 32-64
  188. add eax,eax ; shift the one's bit onto carry
  189. jnc denormal ; if no carry, we're denormal
  190. ; The following three lines test the three bits after the four bit
  191. ; pattern (1,4,7,a,d). If these three bits are not all one, then
  192. ; the denominator cannot expose the flaw. This condition is tested by
  193. ; inverting the bits and testing that all are equal to zero afterward.
  194. xor eax, ONESMASK ; invert the bits that must be ones
  195. test eax, ONESMASK ; and make sure they are all ones
  196. jz scale_if_needed ; if all are one scale numbers
  197. fdivp st(1), st ; use of hardware is OK.
  198. ret
  199. ;
  200. ; Now we test the four bits for one of the five patterns.
  201. ;
  202. scale_if_needed:
  203. shr eax, 28 ; keep first 4 bits after point
  204. cmp byte ptr fdiv_risc_table[eax], 0 ; check for (1,4,7,a,d)
  205. jnz divide_scaled ; are in potential problem area
  206. fdivp st(1), st ; use of hardware is OK.
  207. ret
  208. divide_scaled:
  209. mov eax, [esp + MAIN_DENOM+8] ; test denominator exponent
  210. and eax, 07fffh ; if pseudodenormal ensure that only
  211. jz invalid_denom ; invalid exception flag is set
  212. cmp eax, 07fffh ; if NaN or infinity ensure that only
  213. je invalid_denom ; invalid exception flag is set
  214. ;
  215. ; The following six lines turn off exceptions and set the
  216. ; precision control to 80 bits. The former is necessary to
  217. ; force any traps to be taken at the divide instead of the scaling
  218. ; code. The latter is necessary in order to get full precision for
  219. ; codes with incoming 32 and 64 bit precision settings. If
  220. ; it can be guaranteed that before reaching this point, the underflow
  221. ; exception is masked and the precision control is at 80 bits, these
  222. ; six lines can be omitted.
  223. ;
  224. fnstcw [esp+PREV_CW] ; save caller's control word
  225. mov eax, [esp+PREV_CW]
  226. or eax, 033fh ; mask exceptions, pc=80
  227. and eax, 0f3ffh ; set rounding mode to nearest
  228. mov [esp+PATCH_CW], eax
  229. fldcw [esp+PATCH_CW] ; mask exceptions & pc=80
  230. ; The following lines check the numerator exponent before scaling.
  231. ; This in order to prevent undeflow when scaling the numerator,
  232. ; which will cause a denormal exception flag to be set when the
  233. ; actual divide is preformed. This flag would not have been set
  234. ; normally. If there is a risk of underflow, the scale factor is
  235. ; 17/16 instead of 15/16.
  236. ;
  237. mov eax, [esp+MAIN_NUMER+8] ; test numerator exponent
  238. and eax, 07fffh
  239. cmp eax, 00001h
  240. je small_numer
  241. fmul fdiv_scale_1 ; scale denominator by 15/16
  242. fxch
  243. fmul fdiv_scale_1 ; scale numerator by 15/16
  244. fxch
  245. ;
  246. ; The next line restores the users control word. If the incoming
  247. ; control word had the underflow exception masked and precision
  248. ; control set to 80 bits, this line can be omitted.
  249. ;
  250. fldcw [esp+PREV_CW] ; restore caller's control word
  251. fdivp st(1), st ; use of hardware is OK.
  252. ret
  253. small_numer:
  254. fmul fdiv_scale_2 ; scale denominator by 17/16
  255. fxch
  256. fmul fdiv_scale_2 ; scale numerator by 17/16
  257. fxch
  258. ;
  259. ; The next line restores the users control word. If the incoming
  260. ; control word had the underflow exception masked and precision
  261. ; control set to 80 bits, this line can be omitted.
  262. ;
  263. fldcw [esp+PREV_CW] ; restore caller's control word
  264. fdivp st(1), st ; use of hardware is OK.
  265. ret
  266. denormal:
  267. mov eax, [esp+MAIN_DENOM] ; test for whole mantissa == 0
  268. or eax, [esp+MAIN_DENOM+4] ; test for whole mantissa == 0
  269. jnz denormal_divide_scaled ; denominator is not zero
  270. invalid_denom: ; zero or invalid denominator
  271. fdivp st(1), st ; use of hardware is OK.
  272. ret
  273. denormal_divide_scaled:
  274. mov eax, [esp + MAIN_DENOM + 8] ; get exponent
  275. and eax, 07fffh ; check for zero exponent
  276. jnz invalid_denom ;
  277. ;
  278. ; The following six lines turn off exceptions and set the
  279. ; precision control to 80 bits. The former is necessary to
  280. ; force any traps to be taken at the divide instead of the scaling
  281. ; code. The latter is necessary in order to get full precision for
  282. ; codes with incoming 32 and 64 bit precision settings. If
  283. ; it can be guaranteed that before reaching this point, the underflow
  284. ; exception is masked and the precision control is at 80 bits, these
  285. ; six lines can be omitted.
  286. ;
  287. fnstcw [esp+PREV_CW] ; save caller's control word
  288. mov eax, [esp+PREV_CW]
  289. or eax, 033fh ; mask exceptions, pc=80
  290. and eax, 0f3ffh ; set rounding mode to nearest
  291. mov [esp+PATCH_CW], eax
  292. fldcw [esp+PATCH_CW] ; mask exceptions & pc=80
  293. mov eax, [esp + MAIN_NUMER +8] ; test numerator exponent
  294. and eax, 07fffh ; check for denormal numerator
  295. je denormal_numer
  296. cmp eax, 07fffh ; NaN or infinity
  297. je invalid_numer
  298. mov eax, [esp + MAIN_NUMER + 4] ; get bits 32..63 of mantissa
  299. add eax, eax ; shift the first bit into carry
  300. jnc invalid_numer ; if there is no carry, we have an
  301. ; invalid numer
  302. jmp numer_ok
  303. denormal_numer:
  304. mov eax, [esp + MAIN_NUMER + 4] ; get bits 32..63 of mantissa
  305. add eax, eax ; shift the first bit into carry
  306. jc invalid_numer ; if there is a carry, we have an
  307. ; invalid numer
  308. numer_ok:
  309. fxch
  310. fstp st ; pop numerator
  311. fld st ; make copy of denominator
  312. fmul dword ptr[one_shl_63] ; make denominator not denormal
  313. fstp tbyte ptr [esp+MAIN_DENOM] ; save modified denominator
  314. fld tbyte ptr [esp+MAIN_NUMER] ; load numerator
  315. fxch ; restore proper order
  316. fwait
  317. ; The next line restores the users control word. If the incoming
  318. ; control word had the underflow exception masked and precision
  319. ; control set to 80 bits, this line can be omitted.
  320. ;
  321. fldcw [esp+PREV_CW] ; restore caller's control word
  322. jmp retry ; start the whole thing over
  323. invalid_numer:
  324. ;
  325. ; The next line restores the users control word. If the incoming
  326. ; control word had the underflow exception masked and precision
  327. ; control set to 80 bits, this line can be omitted.
  328. ;
  329. fldcw [esp + PREV_CW]
  330. fdivp st(1), st ; use of hardware is OK.
  331. ret
  332. fdiv_main_routine ENDP
  333. fdivr_st MACRO reg_index, reg_index_minus1
  334. fstp tbyte ptr [esp+DENOM]
  335. IF reg_index_minus1 GE 1
  336. fxch st(reg_index_minus1)
  337. ENDIF
  338. fstp tbyte ptr [esp+NUMER]
  339. call fdiv_main_routine
  340. IF reg_index_minus1 GE 1
  341. fxch st(reg_index_minus1)
  342. ENDIF
  343. fld tbyte ptr [esp+NUMER]
  344. fxch st(reg_index)
  345. add esp, STACK_SIZE
  346. ENDM
  347. fdivr_sti MACRO reg_index, reg_index_minus1
  348. fstp tbyte ptr [esp+NUMER]
  349. IF reg_index_minus1 GE 1
  350. fxch st(reg_index_minus1)
  351. ENDIF
  352. fstp tbyte ptr [esp+DENOM]
  353. call fdiv_main_routine
  354. IF reg_index_minus1 GE 1
  355. fxch st(reg_index_minus1)
  356. ENDIF
  357. fld tbyte ptr [esp+NUMER]
  358. add esp, STACK_SIZE
  359. ENDM
  360. fdivrp_sti MACRO reg_index, reg_index_minus1
  361. fstp tbyte ptr [esp+NUMER]
  362. IF reg_index_minus1 GE 1
  363. fxch st(reg_index_minus1)
  364. ENDIF
  365. fstp tbyte ptr [esp+DENOM]
  366. call fdiv_main_routine
  367. IF reg_index_minus1 GE 1
  368. fxch st(reg_index_minus1)
  369. ENDIF
  370. add esp, STACK_SIZE
  371. ENDM
  372. fdiv_st MACRO reg_index, reg_index_minus1
  373. fstp tbyte ptr [esp+NUMER]
  374. IF reg_index_minus1 GE 1
  375. fxch st(reg_index_minus1)
  376. ENDIF
  377. fld st
  378. fstp tbyte ptr [esp+DENOM]
  379. fstp tbyte ptr [esp+DENOM_SAVE] ; save original denom,
  380. call fdiv_main_routine
  381. IF reg_index_minus1 GE 1
  382. fxch st(reg_index_minus1)
  383. ENDIF
  384. fld tbyte ptr [esp+DENOM_SAVE]
  385. fxch st(reg_index)
  386. add esp, STACK_SIZE
  387. ENDM
  388. fdiv_sti MACRO reg_index, reg_index_minus1
  389. fxch st(reg_index)
  390. fstp tbyte ptr [esp+NUMER]
  391. IF reg_index_minus1 GE 1
  392. fxch st(reg_index_minus1)
  393. ENDIF
  394. fld st
  395. fstp tbyte ptr [esp+DENOM]
  396. fstp tbyte ptr [esp+DENOM_SAVE] ; save original denom,
  397. call fdiv_main_routine
  398. IF reg_index_minus1 GE 1
  399. fxch st(reg_index_minus1)
  400. ENDIF
  401. fld tbyte ptr [esp+DENOM_SAVE]
  402. add esp, STACK_SIZE
  403. ENDM
  404. fdivp_sti MACRO reg_index, reg_index_minus1
  405. fstp tbyte ptr [esp+DENOM]
  406. IF reg_index_minus1 GE 1
  407. fxch st(reg_index_minus1)
  408. ENDIF
  409. fstp tbyte ptr [esp+NUMER]
  410. call fdiv_main_routine
  411. IF reg_index_minus1 GE 1
  412. fxch st(reg_index_minus1)
  413. ENDIF
  414. add esp, STACK_SIZE
  415. ENDM
  416. public _adj_fdiv_r
  417. _adj_fdiv_r PROC NEAR
  418. sub esp, STACK_SIZE ; added back at end of fdiv_x macros
  419. and eax, 0000003FH ; upper 26 bits could be anything
  420. jmp dword ptr dispatch_table[eax*4]
  421. label0::
  422. fdiv st,st(0) ; D8 F0 FDIV ST,ST(0)
  423. add esp, STACK_SIZE
  424. ret
  425. label1::
  426. add esp, STACK_SIZE
  427. int ILLEGAL_OPC
  428. label2::
  429. fdivr st,st(0) ; D8 F8 FDIVR ST,ST(0)
  430. add esp, STACK_SIZE
  431. ret
  432. label3::
  433. add esp, STACK_SIZE
  434. int ILLEGAL_OPC
  435. label4::
  436. fdiv st(0),st ; DC F8/D8 F0 FDIV ST(0),ST
  437. add esp, STACK_SIZE
  438. ret
  439. label5::
  440. fdivp st(0),st ; DE F8 FDIVP ST(0),ST
  441. add esp, STACK_SIZE
  442. ret
  443. label6::
  444. fdivr st(0),st ; DC F0/DE F0 FDIVR ST(0),ST
  445. add esp, STACK_SIZE
  446. ret
  447. label7::
  448. fdivrp st(0),st ; DE F0 FDIVRP ST(0),ST
  449. add esp, STACK_SIZE
  450. ret
  451. label8::
  452. fdiv_st 1,0
  453. ret
  454. label9::
  455. add esp, STACK_SIZE
  456. int ILLEGAL_OPC
  457. label10::
  458. fdivr_st 1,0
  459. ret
  460. label11::
  461. add esp, STACK_SIZE
  462. int ILLEGAL_OPC
  463. label12::
  464. fdiv_sti 1,0
  465. ret
  466. label13::
  467. fdivp_sti 1,0
  468. ret
  469. label14::
  470. fdivr_sti 1,0
  471. ret
  472. label15::
  473. fdivrp_sti 1,0
  474. ret
  475. label16::
  476. fdiv_st 2,1
  477. ret
  478. label17::
  479. add esp, STACK_SIZE
  480. int ILLEGAL_OPC
  481. label18::
  482. fdivr_st 2,1
  483. ret
  484. label19::
  485. add esp, STACK_SIZE
  486. int ILLEGAL_OPC
  487. label20::
  488. fdiv_sti 2,1
  489. ret
  490. label21::
  491. fdivp_sti 2,1
  492. ret
  493. label22::
  494. fdivr_sti 2,1
  495. ret
  496. label23::
  497. fdivrp_sti 2,1
  498. ret
  499. label24::
  500. fdiv_st 3,2
  501. ret
  502. label25::
  503. add esp, STACK_SIZE
  504. int ILLEGAL_OPC
  505. label26::
  506. fdivr_st 3,2
  507. ret
  508. label27::
  509. add esp, STACK_SIZE
  510. int ILLEGAL_OPC
  511. label28::
  512. fdiv_sti 3,2
  513. ret
  514. label29::
  515. fdivp_sti 3,2
  516. ret
  517. label30::
  518. fdivr_sti 3,2
  519. ret
  520. label31::
  521. fdivrp_sti 3,2
  522. ret
  523. label32::
  524. fdiv_st 4,3
  525. ret
  526. label33::
  527. add esp, STACK_SIZE
  528. int ILLEGAL_OPC
  529. label34::
  530. fdivr_st 4,3
  531. ret
  532. label35::
  533. add esp, STACK_SIZE
  534. int ILLEGAL_OPC
  535. label36::
  536. fdiv_sti 4,3
  537. ret
  538. label37::
  539. fdivp_sti 4,3
  540. ret
  541. label38::
  542. fdivr_sti 4,3
  543. ret
  544. label39::
  545. fdivrp_sti 4,3
  546. ret
  547. label40::
  548. fdiv_st 5,4
  549. ret
  550. label41::
  551. add esp, STACK_SIZE
  552. int ILLEGAL_OPC
  553. label42::
  554. fdivr_st 5,4
  555. ret
  556. label43::
  557. add esp, STACK_SIZE
  558. int ILLEGAL_OPC
  559. label44::
  560. fdiv_sti 5,4
  561. ret
  562. label45::
  563. fdivp_sti 5,4
  564. ret
  565. label46::
  566. fdivr_sti 5,4
  567. ret
  568. label47::
  569. fdivrp_sti 5,4
  570. ret
  571. label48::
  572. fdiv_st 6,5
  573. ret
  574. label49::
  575. add esp, STACK_SIZE
  576. int ILLEGAL_OPC
  577. label50::
  578. fdivr_st 6,5
  579. ret
  580. label51::
  581. add esp, STACK_SIZE
  582. int ILLEGAL_OPC
  583. label52::
  584. fdiv_sti 6,5
  585. ret
  586. label53::
  587. fdivp_sti 6,5
  588. ret
  589. label54::
  590. fdivr_sti 6,5
  591. ret
  592. label55::
  593. fdivrp_sti 6,5
  594. ret
  595. label56::
  596. fdiv_st 7,6
  597. ret
  598. label57::
  599. add esp, STACK_SIZE
  600. int ILLEGAL_OPC
  601. label58::
  602. fdivr_st 7,6
  603. ret
  604. label59::
  605. add esp, STACK_SIZE
  606. int ILLEGAL_OPC
  607. label60::
  608. fdiv_sti 7,6
  609. ret
  610. label61::
  611. fdivp_sti 7,6
  612. ret
  613. label62::
  614. fdivr_sti 7,6
  615. ret
  616. label63::
  617. fdivrp_sti 7,6
  618. ret
  619. _adj_fdiv_r ENDP
  620. _fdivp_sti_st PROC NEAR
  621. ; for calling from mem routines
  622. sub esp, STACK_SIZE ; added back at end of fdivp_sti macro
  623. fdivp_sti 1, 0
  624. ret
  625. _fdivp_sti_st ENDP
  626. _fdivrp_sti_st PROC NEAR
  627. ; for calling from mem routines
  628. sub esp, STACK_SIZE ; added back at end of fdivrp_sti macro
  629. fdivrp_sti 1, 0
  630. ret
  631. _fdivrp_sti_st ENDP
  632. ;;; _adj_fdiv_m32 - FDIV m32real FIX
  633. ;;
  634. ;; Input : Value of the m32real in the top of STACK
  635. ;;
  636. ;; Output: Result of FDIV in ST
  637. PUBLIC _adj_fdiv_m32
  638. _adj_fdiv_m32 PROC NEAR
  639. push eax ; save eax
  640. mov eax, [esp + MEM_OPERAND] ; check for
  641. and eax, SINGLE_NAN ; NaN
  642. cmp eax, SINGLE_NAN ;
  643. je memory_divide_m32 ;
  644. fnstsw ax ; get status word
  645. and eax, 3800h ; get top of stack
  646. je spill_fpstack ; is FP stack full?
  647. fld dword ptr[esp + MEM_OPERAND] ; load m32real in ST
  648. call _fdivp_sti_st ; do actual divide
  649. pop eax
  650. ret 4
  651. spill_fpstack:
  652. fxch
  653. sub esp, SPILL_SIZE ; make temp space
  654. fstp tbyte ptr[esp ] ; save user's ST(1)
  655. fld dword ptr[esp + SPILL_MEM_OPERAND] ; load m32 real
  656. call _fdivp_sti_st ; do actual divide
  657. fld tbyte ptr[esp] ; restore user's ST(1)
  658. ;esp is adjusted by fdivrp fn
  659. fxch
  660. add esp, SPILL_SIZE
  661. pop eax
  662. ret 4
  663. memory_divide_m32:
  664. fdiv dword ptr[esp + MEM_OPERAND] ; do actual divide
  665. pop eax
  666. ret 4
  667. _adj_fdiv_m32 ENDP
  668. ;;; _adj_fdiv_m64 - FDIV m64real FIX
  669. ;;
  670. ;; Input : Value of the m64real in the top of STACK
  671. ;;
  672. ;; Output: Result of FDIV in ST
  673. PUBLIC _adj_fdiv_m64
  674. _adj_fdiv_m64 PROC NEAR
  675. push eax ; save eax
  676. mov eax, [esp + MEM_OPERAND + 4] ; check for
  677. and eax, DOUBLE_NAN ; NaN
  678. cmp eax, DOUBLE_NAN ;
  679. je memory_divide_m64 ;
  680. fnstsw ax ; get status word
  681. and eax, 3800h ; get top of stack
  682. je spill_fpstack_m64 ; is FP stack full?
  683. fld qword ptr[esp + MEM_OPERAND] ; load m64real in ST
  684. call _fdivp_sti_st ; do actual divide
  685. pop eax
  686. ret 8
  687. spill_fpstack_m64:
  688. fxch
  689. sub esp, SPILL_SIZE ; make temp space
  690. fstp tbyte ptr[esp] ; save user's ST(1)
  691. fld qword ptr[esp + SPILL_MEM_OPERAND] ; load m64real
  692. call _fdivp_sti_st ; do actual divide
  693. fld tbyte ptr[esp] ; restore user's ST(1)
  694. ;esp is adjusted by fdivrp fn
  695. fxch
  696. add esp, SPILL_SIZE
  697. pop eax
  698. ret 8
  699. memory_divide_m64:
  700. fdiv qword ptr[esp + MEM_OPERAND] ; do actual divide
  701. pop eax
  702. ret 8
  703. _adj_fdiv_m64 ENDP
  704. ;;; _adj_fdiv_m16i - FDIV m16int FIX
  705. ;;
  706. ;; Input : Value of the m16int in the top of STACK
  707. ;;
  708. ;; Output: Result of FDIV in ST
  709. PUBLIC _adj_fdiv_m16i
  710. _adj_fdiv_m16i PROC NEAR
  711. push eax ; save eax
  712. fnstsw ax ; get status word
  713. and eax, 3800h ; get top of stack
  714. je spill_fpstack_m16i ; is FP stack full?
  715. fild word ptr[esp + MEM_OPERAND] ; load m16int in ST
  716. call _fdivp_sti_st ; do actual divide
  717. pop eax
  718. ret 4
  719. spill_fpstack_m16i:
  720. fxch
  721. sub esp, SPILL_SIZE ; make temp space
  722. fstp tbyte ptr[esp ] ; save user's ST(1)
  723. fild word ptr[esp + SPILL_MEM_OPERAND] ; load m16int
  724. call _fdivp_sti_st ; do actual divide
  725. fld tbyte ptr[esp] ; restore user's ST(1)
  726. ;esp is adjusted by fdivrp fn
  727. fxch
  728. add esp, SPILL_SIZE
  729. pop eax
  730. ret 4
  731. _adj_fdiv_m16i ENDP
  732. ;;; _adj_fdiv_m32i - FDIV m32int FIX
  733. ;;
  734. ;; Input : Value of the m32int in the top of STACK
  735. ;;
  736. ;; Output: Result of FDIV in ST
  737. PUBLIC _adj_fdiv_m32i
  738. _adj_fdiv_m32i PROC NEAR
  739. push eax ; save eax
  740. fnstsw ax ; get status word
  741. and eax, 3800h ; get top of stack
  742. je spill_fpstack_m32i ; is FP stack full?
  743. fild dword ptr[esp + MEM_OPERAND] ; load m32int in ST
  744. call _fdivp_sti_st ; do actual divide
  745. pop eax
  746. ret 4
  747. spill_fpstack_m32i:
  748. fxch
  749. sub esp, SPILL_SIZE ; make temp space
  750. fstp tbyte ptr[esp ] ; save user's ST(1)
  751. fild dword ptr[esp + SPILL_MEM_OPERAND] ; load m32int
  752. call _fdivp_sti_st ; do actual divide
  753. fld tbyte ptr[esp] ; restore user's ST(1)
  754. ;esp is adjusted by fdivrp fn
  755. fxch
  756. add esp, SPILL_SIZE
  757. pop eax
  758. ret 4
  759. _adj_fdiv_m32i ENDP
  760. ;;; _adj_fdivr_m32 - FDIVR m32real FIX
  761. ;;
  762. ;; Input : Value of the m32real in the top of STACK
  763. ;;
  764. ;; Output: Result of FDIVR in ST
  765. PUBLIC _adj_fdivr_m32
  766. _adj_fdivr_m32 PROC NEAR
  767. push eax ; save eax
  768. mov eax, [esp + MEM_OPERAND] ; check for
  769. and eax, SINGLE_NAN ; NaN
  770. cmp eax, SINGLE_NAN ;
  771. je memory_divide_m32r ;
  772. fnstsw ax ; get status word
  773. and eax, 3800h ; get top of stack
  774. je spill_fpstack_m32r ; is FP stack full?
  775. fld dword ptr[esp + MEM_OPERAND] ; load m32real in ST
  776. call _fdivrp_sti_st ; do actual divide
  777. pop eax
  778. ret 4
  779. spill_fpstack_m32r:
  780. fxch
  781. sub esp, SPILL_SIZE ; make temp space
  782. fstp tbyte ptr[esp ] ; save user's ST(1)
  783. fld dword ptr[esp + SPILL_MEM_OPERAND] ; load m32 real
  784. call _fdivrp_sti_st ; do actual divide
  785. fld tbyte ptr[esp] ; restore user's ST(1)
  786. ;esp is adjusted by fdivp fn
  787. fxch
  788. add esp, SPILL_SIZE
  789. pop eax
  790. ret 4
  791. memory_divide_m32r:
  792. fdivr dword ptr[esp + MEM_OPERAND] ; do actual divide
  793. pop eax
  794. ret 4
  795. _adj_fdivr_m32 ENDP
  796. ;;; _adj_fdivr_m64 - FDIVR m64real FIX
  797. ;;
  798. ;; Input : Value of the m64real in the top of STACK
  799. ;;
  800. ;; Output: Result of FDIVR in ST
  801. PUBLIC _adj_fdivr_m64
  802. _adj_fdivr_m64 PROC NEAR
  803. push eax ; save eax
  804. mov eax, [esp + MEM_OPERAND + 4] ; check for
  805. and eax, DOUBLE_NAN ; NaN
  806. cmp eax, DOUBLE_NAN ;
  807. je memory_divide_m64r ;
  808. fnstsw ax ; get status word
  809. and eax, 3800h ; get top of stack
  810. je spill_fpstack_m64r ; is FP stack full?
  811. fld qword ptr[esp + MEM_OPERAND] ; load m64real in ST
  812. call _fdivrp_sti_st ; do actual divide
  813. pop eax
  814. ret 8
  815. spill_fpstack_m64r:
  816. fxch
  817. sub esp, SPILL_SIZE ; make temp space
  818. fstp tbyte ptr[esp ] ; save user's ST(1)
  819. fld qword ptr[esp + SPILL_MEM_OPERAND] ; load m64real
  820. call _fdivrp_sti_st ; do actual divide
  821. fld tbyte ptr[esp] ; restore user's ST(1)
  822. ;esp is adjusted by fdivp fn
  823. fxch
  824. add esp, SPILL_SIZE
  825. pop eax
  826. ret 8
  827. memory_divide_m64r:
  828. fdivr qword ptr[esp + MEM_OPERAND] ; do actual divide
  829. pop eax
  830. ret 8
  831. _adj_fdivr_m64 ENDP
  832. ;;; _adj_fdivr_m16i - FDIVR m16int FIX
  833. ;;
  834. ;; Input : Value of the m16int in the top of STACK
  835. ;;
  836. ;; Output: Result of FDIVR in ST
  837. PUBLIC _adj_fdivr_m16i
  838. _adj_fdivr_m16i PROC NEAR
  839. push eax ; save eax
  840. fnstsw ax ; get status word
  841. and eax, 3800h ; get top of stack
  842. je spill_fpstack_m16ir ; is FP stack full?
  843. fild word ptr[esp + MEM_OPERAND] ; load m16int in ST
  844. call _fdivrp_sti_st ; do actual divide
  845. pop eax
  846. ret 4
  847. spill_fpstack_m16ir:
  848. fxch
  849. sub esp, SPILL_SIZE ; make temp space
  850. fstp tbyte ptr[esp ] ; save user's ST(1)
  851. fild word ptr[esp + SPILL_MEM_OPERAND] ; load m16int
  852. call _fdivrp_sti_st ; do actual divide
  853. fld tbyte ptr[esp] ; restore user's ST(1)
  854. ;esp is adjusted by fdivp fn
  855. fxch
  856. add esp, SPILL_SIZE
  857. pop eax
  858. ret 4
  859. _adj_fdivr_m16i ENDP
  860. ;;; _adj_fdivr_m32i - FDIVR m32int FIX
  861. ;;
  862. ;; Input : Value of the m32int in the top of STACK
  863. ;;
  864. ;; Output: Result of FDIVR in ST
  865. PUBLIC _adj_fdivr_m32i
  866. _adj_fdivr_m32i PROC NEAR
  867. push eax ; save eax
  868. fnstsw ax ; get status word
  869. and eax, 3800h ; get top of stack
  870. je spill_fpstack_m32ir ; is FP stack full?
  871. fild dword ptr[esp + MEM_OPERAND] ; load m32int in ST
  872. call _fdivrp_sti_st ; do actual divide
  873. pop eax
  874. ret 4
  875. spill_fpstack_m32ir:
  876. fxch
  877. sub esp, SPILL_SIZE ; make temp space
  878. fstp tbyte ptr[esp ] ; save user's ST(1)
  879. fild dword ptr[esp + SPILL_MEM_OPERAND] ; load m32int
  880. call _fdivrp_sti_st ; do actual divide
  881. fld tbyte ptr[esp] ; restore user's ST(1)
  882. ;esp is adjusted by fdivp fn
  883. fxch
  884. add esp, SPILL_SIZE
  885. pop eax
  886. ret 4
  887. _adj_fdivr_m32i ENDP
  888. ;;; _safe_fdiv - FDIV fix
  889. ;;
  890. ;; Pentium-safe version of FDIV, aka FDIVP ST(1),ST(0)
  891. ;;
  892. ;; Input : Numerator in ST(1), Denominator in ST(0)
  893. ;;
  894. ;; Output: Result of FDIV in ST(0)
  895. PUBLIC _safe_fdiv
  896. _safe_fdiv PROC NEAR
  897. push eax
  898. sub esp, STACK_SIZE
  899. fstp tbyte ptr [esp+DENOM]
  900. fstp tbyte ptr [esp+NUMER]
  901. call fdiv_main_routine
  902. add esp, STACK_SIZE
  903. pop eax
  904. ret
  905. _safe_fdiv ENDP
  906. ;;; _safe_fdivr - FDIVR fix
  907. ;;
  908. ;; Pentium-safe version of FDIVR, aka FDIVRP ST(1),ST(0)
  909. ;;
  910. ;; Input : Numerator in ST(0), Denominator in ST(1)
  911. ;;
  912. ;; Output: Result of FDIVR in ST(0)
  913. public _safe_fdivr
  914. _safe_fdivr PROC NEAR
  915. push eax
  916. sub esp, STACK_SIZE
  917. fstp tbyte ptr [esp+NUMER]
  918. fstp tbyte ptr [esp+DENOM]
  919. call fdiv_main_routine
  920. add esp, STACK_SIZE
  921. pop eax
  922. ret
  923. _safe_fdivr ENDP
  924. ;;; _adj_fprem - FPREM FIX
  925. ;;
  926. ;; Based on PRELIMINARY Intel code.
  927. _fprem_common PROC NEAR
  928. push eax
  929. push ebx
  930. push ecx
  931. mov eax, [FPREM_MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
  932. xor eax, FPREM_ONESMASK ; invert bits that have to be one
  933. test eax, FPREM_ONESMASK ; check bits that have to be one
  934. jnz remainder_hardware_ok
  935. shr eax, 11
  936. and eax, 0fh
  937. cmp byte ptr fprem_risc_table[eax], 0 ; check for (1,4,7,a,d)
  938. jz remainder_hardware_ok
  939. ; The denominator has the bit pattern. Weed out the funny cases like NaNs
  940. ; before applying the software version. Our caller guarantees that the
  941. ; denominator is not a denormal. Here we check for:
  942. ; denominator inf, NaN, unnormal
  943. ; numerator inf, NaN, unnormal, denormal
  944. mov eax, [FPREM_MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
  945. and eax, 07fff0000h ; mask the exponent only
  946. cmp eax, 07fff0000h ; check for INF or NaN
  947. je remainder_hardware_ok
  948. mov eax, [FPREM_MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
  949. and eax, 07fff0000h ; mask the exponent only
  950. jz remainder_hardware_ok ; jif numerator denormal
  951. cmp eax, 07fff0000h ; check for INF or NaN
  952. je remainder_hardware_ok
  953. mov eax, [esp + FPREM_MAIN_NUMER + 4] ; high mantissa bits - numerator
  954. add eax, eax ; set carry if explicit bit set
  955. jnz remainder_hardware_ok ; jmp if numerator is unnormal
  956. mov eax, [esp + FPREM_MAIN_DENOM + 4] ; high mantissa bits - denominator
  957. add eax, eax ; set carry if explicit bit set
  958. jnz remainder_hardware_ok ; jmp if denominator is unnormal
  959. rem_patch:
  960. mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  961. and eax, 07fffh ; clear sy
  962. add eax, 63 ; evaluate ey + 63
  963. mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  964. and ebx, 07fffh ; clear sx
  965. sub ebx, eax ; evaluate the exponent difference (ex - ey)
  966. ja rem_large ; if ex > ey + 63, case of large arguments
  967. rem_patch_loop:
  968. mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  969. and eax, 07fffh ; clear sy
  970. add eax, 10 ; evaluate ey + 10
  971. mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  972. and ebx, 07fffh ; clear sx
  973. sub ebx, eax ; evaluate the exponent difference (ex - ey)
  974. js remainder_hardware_ok ; safe if ey + 10 > ex
  975. fld tbyte ptr [FPREM_MAIN_NUMER+esp] ; load the numerator
  976. mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  977. mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  978. and ebx, 07fffh ; clear sx
  979. mov ecx, ebx
  980. sub ebx, eax
  981. and ebx, 07h
  982. or ebx, 04h
  983. sub ecx, ebx
  984. mov ebx, eax
  985. and ebx, 08000h ; keep sy
  986. or ecx, ebx ; merge the sign of y
  987. mov dword ptr [FPREM_MAIN_DENOM+8+esp], ecx
  988. fld tbyte ptr [FPREM_MAIN_DENOM+esp] ; load the shifted denominator
  989. mov dword ptr [FPREM_MAIN_DENOM+8+esp], eax ; restore the initial denominator
  990. fxch
  991. fprem ; this rem is safe
  992. fstp tbyte ptr [FPREM_MAIN_NUMER+esp] ; update the numerator
  993. fstp st(0) ; pop the stack
  994. jmp rem_patch_loop
  995. rem_large:
  996. test edx, 02h ; is denominator already saved
  997. jnz already_saved
  998. fld tbyte ptr[esp + FPREM_MAIN_DENOM]
  999. fstp tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE] ; save denominator
  1000. already_saved:
  1001. ; Save user's precision control and institute 80. The fp ops in
  1002. ; rem_large_loop must not round to user's precision (if it is less
  1003. ; than 80) because the hardware would not have done so. We are
  1004. ; aping the hardware here, which is all extended.
  1005. fnstcw [esp+FPREM_MAIN_PREV_CW] ; save caller's control word
  1006. mov eax, dword ptr[esp + FPREM_MAIN_PREV_CW]
  1007. or eax, 033fh ; mask exceptions, pc=80
  1008. mov [esp + FPREM_MAIN_PATCH_CW], eax
  1009. fldcw [esp + FPREM_MAIN_PATCH_CW]
  1010. mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  1011. and eax, 07fffh ; clear sy
  1012. mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  1013. and ebx, 07fffh ; clear sx
  1014. sub ebx, eax ; evaluate the exponent difference
  1015. and ebx, 03fh
  1016. or ebx, 020h
  1017. add ebx, 1
  1018. mov ecx, ebx
  1019. mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  1020. mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  1021. and ebx, 07fffh ; clear sx
  1022. and eax, 08000h ; keep sy
  1023. or ebx, eax ; merge the sign of y
  1024. mov dword ptr[FPREM_MAIN_DENOM+8+esp], ebx ; make ey equal to ex (scaled denominator)
  1025. fld tbyte ptr [FPREM_MAIN_DENOM+esp] ; load the scaled denominator
  1026. fabs
  1027. fld tbyte ptr [FPREM_MAIN_NUMER+esp] ; load the numerator
  1028. fabs
  1029. rem_large_loop:
  1030. fcom
  1031. fnstsw ax
  1032. and eax, 00100h
  1033. jnz rem_no_sub
  1034. fsub st, st(1)
  1035. rem_no_sub:
  1036. fxch
  1037. fmul qword ptr half
  1038. fxch
  1039. sub ecx, 1 ; decrement the loop counter
  1040. jnz rem_large_loop
  1041. mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  1042. fstp tbyte ptr[esp + FPREM_MAIN_NUMER] ; save result
  1043. fstp st ; toss modified denom
  1044. fld tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE]
  1045. fld tbyte ptr[big_number] ; force C2 to be set
  1046. fprem
  1047. fstp st
  1048. fld tbyte ptr[esp + FPREM_MAIN_NUMER] ; restore saved result
  1049. fldcw [esp + FPREM_MAIN_PREV_CW] ; restore caller's control word
  1050. and ebx, 08000h ; keep sx
  1051. jz rem_done
  1052. fchs
  1053. jmp rem_done
  1054. remainder_hardware_ok:
  1055. fld tbyte ptr [FPREM_MAIN_DENOM+esp] ; load the denominator
  1056. fld tbyte ptr [FPREM_MAIN_NUMER+esp] ; load the numerator
  1057. fprem ; and finally do a remainder
  1058. ; prem_main_routine end
  1059. rem_done:
  1060. test edx, 03h
  1061. jz rem_exit
  1062. fnstsw [esp + FPREM_MAIN_FPREM_SW] ; save Q0 Q1 and Q2
  1063. test edx, 01h
  1064. jz do_not_de_scale
  1065. ; De-scale the result. Go to pc=80 to prevent from fmul
  1066. ; from user precision (fprem does not round the result).
  1067. fnstcw [esp + FPREM_MAIN_PREV_CW] ; save callers control word
  1068. mov eax, [esp + FPREM_MAIN_PREV_CW]
  1069. or eax, 0300h ; pc = 80
  1070. mov [esp + FPREM_MAIN_PATCH_CW], eax
  1071. fldcw [esp + FPREM_MAIN_PATCH_CW]
  1072. fmul qword ptr one_shr_64
  1073. fldcw [esp + FPREM_MAIN_PREV_CW] ; restore callers CW
  1074. do_not_de_scale:
  1075. mov eax, [esp + FPREM_MAIN_FPREM_SW]
  1076. fxch
  1077. fstp st
  1078. fld tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE]
  1079. fxch
  1080. and eax, 04300h ; restore saved Q0, Q1, Q2
  1081. sub esp, ENV_SIZE
  1082. fnstenv [esp]
  1083. and [esp].STATUS_WORD, 0bcffh
  1084. or [esp].STATUS_WORD, eax
  1085. fldenv [esp]
  1086. add esp, ENV_SIZE
  1087. rem_exit:
  1088. pop ecx
  1089. pop ebx
  1090. pop eax
  1091. ret
  1092. _fprem_common ENDP
  1093. PUBLIC _adj_fprem
  1094. _adj_fprem PROC NEAR
  1095. push edx
  1096. sub esp, FPREM_STACK_SIZE
  1097. fstp tbyte ptr [FPREM_NUMER+esp]
  1098. fstp tbyte ptr [FPREM_DENOM+esp]
  1099. xor edx, edx
  1100. ; prem_main_routine begin
  1101. mov eax,[FPREM_DENOM+6+esp] ; exponent and high 16 bits of mantissa
  1102. test eax,07fff0000h ; check for denormal
  1103. jz fprem_denormal
  1104. call _fprem_common
  1105. add esp, FPREM_STACK_SIZE
  1106. pop edx
  1107. ret
  1108. fprem_denormal:
  1109. fld tbyte ptr [FPREM_DENOM+esp] ; load the denominator
  1110. fld tbyte ptr [FPREM_NUMER+esp] ; load the numerator
  1111. mov eax, [FPREM_DENOM+esp] ; test for whole mantissa == 0
  1112. or eax, [FPREM_DENOM+4+esp] ; test for whole mantissa == 0
  1113. jz remainder_hardware_ok_l ; denominator is zero
  1114. fxch
  1115. fstp tbyte ptr[esp + FPREM_DENOM_SAVE] ; save org denominator
  1116. fld tbyte ptr[esp + FPREM_DENOM]
  1117. fxch
  1118. or edx, 02h
  1119. ;
  1120. ; For this we need pc=80. Also, mask exceptions so we don't take any
  1121. ; denormal operand exceptions. It is guaranteed that the descaling
  1122. ; later on will take underflow, which is what the hardware would have done
  1123. ; on a normal fprem.
  1124. ;
  1125. fnstcw [FPREM_PREV_CW+esp] ; save caller's control word
  1126. mov eax, [FPREM_PREV_CW+esp]
  1127. or eax, 0033fh ; mask exceptions, pc=80
  1128. mov [FPREM_PATCH_CW+esp], eax
  1129. fldcw [FPREM_PATCH_CW+esp] ; mask exceptions & pc=80
  1130. ; The denominator is a denormal. For most numerators, scale both numerator
  1131. ; and denominator to get rid of denormals. Then execute the common code
  1132. ; with the flag set to indicate that the result must be de-scaled.
  1133. ; For large numerators this won't work because the scaling would cause
  1134. ; overflow. In this case we know the numerator is large, the denominator
  1135. ; is small (denormal), so the exponent difference is also large. This means
  1136. ; the rem_large code will be used and this code depends on the difference
  1137. ; in exponents modulo 64. Adding 64 to the denominators exponent
  1138. ; doesn't change the modulo 64 difference. So we can scale the denominator
  1139. ; by 64, making it not denormal, and this won't effect the result.
  1140. ;
  1141. ; To start with, figure out if numerator is large
  1142. mov eax, [esp + FPREM_NUMER + 8] ; load numerator exponent
  1143. and eax, 7fffh ; isolate numerator exponent
  1144. cmp eax, 7fbeh ; compare Nexp to Maxexp-64
  1145. ja big_numer_rem_de ; jif big numerator
  1146. ; So the numerator is not large scale both numerator and denominator
  1147. or edx, 1 ; edx = 1, if denormal extended divisor
  1148. fmul qword ptr one_shl_64 ; make numerator not denormal
  1149. fstp tbyte ptr[esp + FPREM_NUMER]
  1150. fmul qword ptr one_shl_64 ; make denominator not denormal
  1151. fstp tbyte ptr[esp + FPREM_DENOM]
  1152. jmp scaling_done
  1153. ; The numerator is large. Scale only the denominator, which will not
  1154. ; change the result which we know will be partial. Set the scale flag
  1155. ; to false.
  1156. big_numer_rem_de:
  1157. ; We must do this with pc=80 to avoid rounding to single/double.
  1158. ; In this case we do not mask exceptions so that we will take
  1159. ; denormal operand, as would the hardware.
  1160. fnstcw [FPREM_PREV_CW+esp] ; save caller's control word
  1161. mov eax, [FPREM_PREV_CW+esp]
  1162. or eax, 00300h ; pc=80
  1163. mov [FPREM_PATCH_CW+esp], eax
  1164. fldcw [FPREM_PATCH_CW+esp] ; pc=80
  1165. fstp st ; Toss numerator
  1166. fmul qword ptr one_shl_64 ; make denominator not denormal
  1167. fstp tbyte ptr[esp + FPREM_DENOM]
  1168. ; Restore the control word which was fiddled to scale at 80-bit precision.
  1169. ; Then call the common code.
  1170. scaling_done:
  1171. fldcw [esp + FPREM_PREV_CW] ; restore callers control word
  1172. call _fprem_common
  1173. add esp, FPREM_STACK_SIZE
  1174. pop edx
  1175. ret
  1176. remainder_hardware_ok_l:
  1177. fprem ; and finally do a remainder
  1178. add esp, FPREM_STACK_SIZE
  1179. pop edx
  1180. ret
  1181. _adj_fprem ENDP
  1182. ;
  1183. ; FPREM1 code begins here
  1184. ;
  1185. _fprem1_common PROC NEAR
  1186. push eax
  1187. push ebx
  1188. push ecx
  1189. mov eax, [FPREM_MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
  1190. xor eax, FPREM_ONESMASK ; invert bits that have to be one
  1191. test eax, FPREM_ONESMASK ; check bits that have to be one
  1192. jnz remainder1_hardware_ok
  1193. shr eax, 11
  1194. and eax, 0fh
  1195. cmp byte ptr fprem_risc_table[eax], 0 ; check for (1,4,7,a,d)
  1196. jz remainder1_hardware_ok
  1197. ; The denominator has the bit pattern. Weed out the funny cases like NaNs
  1198. ; before applying the software version. Our caller guarantees that the
  1199. ; denominator is not a denormal. Here we check for:
  1200. ; denominator inf, NaN, unnormal
  1201. ; numerator inf, NaN, unnormal, denormal
  1202. mov eax, [FPREM_MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
  1203. and eax, 07fff0000h ; mask the exponent only
  1204. cmp eax, 07fff0000h ; check for INF or NaN
  1205. je remainder1_hardware_ok
  1206. mov eax, [FPREM_MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
  1207. and eax, 07fff0000h ; mask the exponent only
  1208. jz remainder1_hardware_ok ; jif numerator denormal
  1209. cmp eax, 07fff0000h ; check for INF or NaN
  1210. je remainder1_hardware_ok
  1211. mov eax, [esp + FPREM_MAIN_NUMER + 4] ; high mantissa bits - numerator
  1212. add eax, eax ; set carry if explicit bit set
  1213. jnz remainder1_hardware_ok ; jmp if numerator is unnormal
  1214. mov eax, [esp + FPREM_MAIN_DENOM + 4] ; high mantissa bits - denominator
  1215. add eax, eax ; set carry if explicit bit set
  1216. jnz remainder1_hardware_ok ; jmp if denominator is unnormal
  1217. rem1_patch:
  1218. mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  1219. and eax, 07fffh ; clear sy
  1220. add eax, 63 ; evaluate ey + 63
  1221. mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  1222. and ebx, 07fffh ; clear sx
  1223. sub ebx, eax ; evaluate the exponent difference (ex - ey)
  1224. ja rem1_large ; if ex > ey + 63, case of large arguments
  1225. rem1_patch_loop:
  1226. mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  1227. and eax, 07fffh ; clear sy
  1228. add eax, 10 ; evaluate ey + 10
  1229. mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  1230. and ebx, 07fffh ; clear sx
  1231. sub ebx, eax ; evaluate the exponent difference (ex - ey)
  1232. js remainder1_hardware_ok ; safe if ey + 10 > ex
  1233. fld tbyte ptr [FPREM_MAIN_NUMER+esp] ; load the numerator
  1234. mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  1235. mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  1236. and ebx, 07fffh ; clear sx
  1237. mov ecx, ebx
  1238. sub ebx, eax
  1239. and ebx, 07h
  1240. or ebx, 04h
  1241. sub ecx, ebx
  1242. mov ebx, eax
  1243. and ebx, 08000h ; keep sy
  1244. or ecx, ebx ; merge the sign of y
  1245. mov dword ptr [FPREM_MAIN_DENOM+8+esp], ecx
  1246. fld tbyte ptr [FPREM_MAIN_DENOM+esp] ; load the shifted denominator
  1247. mov dword ptr [FPREM_MAIN_DENOM+8+esp], eax ; restore the initial denominator
  1248. fxch
  1249. fprem ; this rem is safe
  1250. fstp tbyte ptr [FPREM_MAIN_NUMER+esp] ; update the numerator
  1251. fstp st(0) ; pop the stack
  1252. jmp rem1_patch_loop
  1253. rem1_large:
  1254. test ebx, 02h ; is denominator already saved
  1255. jnz already_saved1
  1256. fld tbyte ptr[esp + FPREM_MAIN_DENOM]
  1257. fstp tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE] ; save denominator
  1258. already_saved1:
  1259. ; Save user's precision control and institute 80. The fp ops in
  1260. ; rem1_large_loop must not round to user's precision (if it is less
  1261. ; than 80) because the hardware would not have done so. We are
  1262. ; aping the hardware here, which is all extended.
  1263. fnstcw [esp+FPREM_MAIN_PREV_CW] ; save caller's control word
  1264. mov eax, dword ptr[esp + FPREM_MAIN_PREV_CW]
  1265. or eax, 033fh ; mask exceptions, pc=80
  1266. mov [esp + FPREM_MAIN_PATCH_CW], eax
  1267. fldcw [esp + FPREM_MAIN_PATCH_CW]
  1268. mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  1269. and eax, 07fffh ; clear sy
  1270. mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  1271. and ebx, 07fffh ; clear sx
  1272. sub ebx, eax ; evaluate the exponent difference
  1273. and ebx, 03fh
  1274. or ebx, 020h
  1275. add ebx, 1
  1276. mov ecx, ebx
  1277. mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  1278. mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  1279. and ebx, 07fffh ; clear sx
  1280. and eax, 08000h ; keep sy
  1281. or ebx, eax ; merge the sign of y
  1282. mov dword ptr[FPREM_MAIN_DENOM+8+esp], ebx ; make ey equal to ex (scaled denominator)
  1283. fld tbyte ptr [FPREM_MAIN_DENOM+esp] ; load the scaled denominator
  1284. fabs
  1285. fld tbyte ptr [FPREM_MAIN_NUMER+esp] ; load the numerator
  1286. fabs
  1287. rem1_large_loop:
  1288. fcom
  1289. fnstsw ax
  1290. and eax, 00100h
  1291. jnz rem1_no_sub
  1292. fsub st, st(1)
  1293. rem1_no_sub:
  1294. fxch
  1295. fmul qword ptr half
  1296. fxch
  1297. sub ecx, 1 ; decrement the loop counter
  1298. jnz rem1_large_loop
  1299. mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  1300. fstp tbyte ptr[esp + FPREM_MAIN_NUMER] ; save result
  1301. fstp st ; toss modified denom
  1302. fld tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE]
  1303. fld tbyte ptr[big_number] ; force C2 to be set
  1304. fprem1
  1305. fstp st
  1306. fld tbyte ptr[esp + FPREM_MAIN_NUMER] ; restore saved result
  1307. fldcw [esp + FPREM_MAIN_PREV_CW] ; restore caller's control word
  1308. and ebx, 08000h ; keep sx
  1309. jz rem1_done
  1310. fchs
  1311. jmp rem1_done
  1312. remainder1_hardware_ok:
  1313. fld tbyte ptr [FPREM_MAIN_DENOM+esp] ; load the denominator
  1314. fld tbyte ptr [FPREM_MAIN_NUMER+esp] ; load the numerator
  1315. fprem1 ; and finally do a remainder
  1316. ; prem1_main_routine end
  1317. rem1_done:
  1318. test edx, 03h
  1319. jz rem1_exit
  1320. fnstsw [esp + FPREM_MAIN_FPREM_SW] ; save Q0 Q1 and Q2
  1321. test edx, 01h
  1322. jz do_not_de_scale1
  1323. ; De-scale the result. Go to pc=80 to prevent from fmul
  1324. ; from user precision (fprem does not round the result).
  1325. fnstcw [esp + FPREM_MAIN_PREV_CW] ; save callers control word
  1326. mov eax, [esp + FPREM_MAIN_PREV_CW]
  1327. or eax, 0300h ; pc = 80
  1328. mov [esp + FPREM_MAIN_PATCH_CW], eax
  1329. fldcw [esp + FPREM_MAIN_PATCH_CW]
  1330. fmul qword ptr one_shr_64
  1331. fldcw [esp + FPREM_MAIN_PREV_CW] ; restore callers CW
  1332. do_not_de_scale1:
  1333. mov eax, [esp + FPREM_MAIN_FPREM_SW]
  1334. fxch
  1335. fstp st
  1336. fld tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE]
  1337. fxch
  1338. and eax, 04300h ; restore saved Q0, Q1, Q2
  1339. sub esp, ENV_SIZE
  1340. fnstenv [esp]
  1341. and [esp].STATUS_WORD, 0bcffh
  1342. or [esp].STATUS_WORD, eax
  1343. fldenv [esp]
  1344. add esp, ENV_SIZE
  1345. rem1_exit:
  1346. pop ecx
  1347. pop ebx
  1348. pop eax
  1349. ret
  1350. _fprem1_common ENDP
  1351. PUBLIC _adj_fprem1
  1352. _adj_fprem1 PROC NEAR
  1353. push edx
  1354. sub esp, FPREM_STACK_SIZE
  1355. fstp tbyte ptr [FPREM_NUMER+esp]
  1356. fstp tbyte ptr [FPREM_DENOM+esp]
  1357. mov edx, 0
  1358. ; prem1_main_routine begin
  1359. mov eax,[FPREM_DENOM+6+esp] ; exponent and high 16 bits of mantissa
  1360. test eax,07fff0000h ; check for denormal
  1361. jz denormal1
  1362. call _fprem1_common
  1363. add esp, FPREM_STACK_SIZE
  1364. pop edx
  1365. ret
  1366. denormal1:
  1367. fld tbyte ptr [FPREM_DENOM+esp] ; load the denominator
  1368. fld tbyte ptr [FPREM_NUMER+esp] ; load the numerator
  1369. mov eax, [FPREM_DENOM+esp] ; test for whole mantissa == 0
  1370. or eax, [FPREM_DENOM+4+esp] ; test for whole mantissa == 0
  1371. jz remainder1_hardware_ok_l ; denominator is zero
  1372. fxch
  1373. fstp tbyte ptr[esp + FPREM_DENOM_SAVE] ; save org denominator
  1374. fld tbyte ptr[esp + FPREM_DENOM]
  1375. fxch
  1376. or edx, 02h
  1377. ;
  1378. ; For this we need pc=80. Also, mask exceptions so we don't take any
  1379. ; denormal operand exceptions. It is guaranteed that the descaling
  1380. ; later on will take underflow, which is what the hardware would have done
  1381. ; on a normal fprem.
  1382. ;
  1383. fnstcw [FPREM_PREV_CW+esp] ; save caller's control word
  1384. mov eax, [FPREM_PREV_CW+esp]
  1385. or eax, 0033fh ; mask exceptions, pc=80
  1386. mov [FPREM_PATCH_CW+esp], eax
  1387. fldcw [FPREM_PATCH_CW+esp] ; mask exceptions & pc=80
  1388. ; The denominator is a denormal. For most numerators, scale both numerator
  1389. ; and denominator to get rid of denormals. Then execute the common code
  1390. ; with the flag set to indicate that the result must be de-scaled.
  1391. ; For large numerators this won't work because the scaling would cause
  1392. ; overflow. In this case we know the numerator is large, the denominator
  1393. ; is small (denormal), so the exponent difference is also large. This means
  1394. ; the rem1_large code will be used and this code depends on the difference
  1395. ; in exponents modulo 64. Adding 64 to the denominators exponent
  1396. ; doesn't change the modulo 64 difference. So we can scale the denominator
  1397. ; by 64, making it not denormal, and this won't effect the result.
  1398. ;
  1399. ; To start with, figure out if numerator is large
  1400. mov eax, [esp + FPREM_NUMER + 8] ; load numerator exponent
  1401. and eax, 7fffh ; isolate numerator exponent
  1402. cmp eax, 7fbeh ; compare Nexp to Maxexp-64
  1403. ja big_numer_rem1_de ; jif big numerator
  1404. ; So the numerator is not large scale both numerator and denominator
  1405. or edx, 1 ; edx = 1, if denormal extended divisor
  1406. fmul qword ptr one_shl_64 ; make numerator not denormal
  1407. fstp tbyte ptr[esp + FPREM_NUMER]
  1408. fmul qword ptr one_shl_64 ; make denominator not denormal
  1409. fstp tbyte ptr[esp + FPREM_DENOM]
  1410. jmp scaling_done1
  1411. ; The numerator is large. Scale only the denominator, which will not
  1412. ; change the result which we know will be partial. Set the scale flag
  1413. ; to false.
  1414. big_numer_rem1_de:
  1415. ; We must do this with pc=80 to avoid rounding to single/double.
  1416. ; In this case we do not mask exceptions so that we will take
  1417. ; denormal operand, as would the hardware.
  1418. fnstcw [FPREM_PREV_CW+esp] ; save caller's control word
  1419. mov eax, [FPREM_PREV_CW+esp]
  1420. or eax, 00300h ; pc=80
  1421. mov [FPREM_PATCH_CW+esp], eax
  1422. fldcw [FPREM_PATCH_CW+esp] ; pc=80
  1423. fstp st ; Toss numerator
  1424. fmul qword ptr one_shl_64 ; make denominator not denormal
  1425. fstp tbyte ptr[esp + FPREM_DENOM]
  1426. ; Restore the control word which was fiddled to scale at 80-bit precision.
  1427. ; Then call the common code.
  1428. scaling_done1:
  1429. fldcw [esp + FPREM_PREV_CW] ; restore callers control word
  1430. call _fprem1_common
  1431. add esp, FPREM_STACK_SIZE
  1432. pop edx
  1433. ret
  1434. remainder1_hardware_ok_l:
  1435. fprem ; and finally do a remainder
  1436. add esp, FPREM_STACK_SIZE
  1437. pop edx
  1438. ret
  1439. _adj_fprem1 ENDP
  1440. PUBLIC _safe_fprem
  1441. _safe_fprem PROC NEAR
  1442. call _adj_fprem
  1443. ret
  1444. _safe_fprem ENDP
  1445. PUBLIC _safe_fprem1
  1446. _safe_fprem1 PROC NEAR
  1447. call _adj_fprem1
  1448. ret
  1449. _safe_fprem1 ENDP
  1450. ;;; _adj_fpatan - FPATAN FIX
  1451. ;;
  1452. ;; Dummy entry point
  1453. PUBLIC _adj_fpatan
  1454. _adj_fpatan PROC NEAR
  1455. fpatan
  1456. ret
  1457. _adj_fpatan ENDP
  1458. ;;; _adj_fptan - FPTAN FIX
  1459. ;;
  1460. ;; Dummy entry point
  1461. PUBLIC _adj_fptan
  1462. _adj_fptan PROC NEAR
  1463. fptan
  1464. ret
  1465. _adj_fptan ENDP
  1466. end