Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

407 lines
12 KiB

  1. subttl emfprem.asm - FPREM and FPREM1 instructions
  2. page
  3. ;*******************************************************************************
  4. ;emfprem.asm - FPREM and FPREM1 instructions
  5. ; by Tim Paterson
  6. ;
  7. ; Microsoft Confidential
  8. ;
  9. ; Copyright (c) Microsoft Corporation 1991
  10. ; All Rights Reserved
  11. ;
  12. ;Inputs:
  13. ; edi = [CURstk]
  14. ; ST(1) loaded into ebx:esi & ecx
  15. ;
  16. ;Revision History:
  17. ;
  18. ; [] 09/05/91 TP Initial 32-bit version.
  19. ;
  20. ;*******************************************************************************
  21. ;Dispatch table for remainder
  22. ;
  23. ;One operand has been loaded into ecx:ebx:esi ("source"), the other is
  24. ;pointed to by edi ("dest").
  25. ;
  26. ;Tag of source is shifted. Tag values are as follows:
  27. .erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
  28. .erre TAG_VALID eq 1
  29. .erre TAG_ZERO eq 2
  30. .erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
  31. ;Any special case routines not found in this file are in emarith.asm
  32. ;Divisor Dividend
  33. tFpremDisp label dword ;Source(ST(1)) Dest (ST(0))
  34. dd PremDouble ;single single
  35. dd PremDouble ;single double
  36. dd PremX ;single zero
  37. dd PremSpclDest ;single special
  38. dd PremDouble ;double single
  39. dd PremDouble ;double double
  40. dd PremX ;double zero
  41. dd PremSpclDest ;double special
  42. dd ReturnIndefinite ;zero single
  43. dd ReturnIndefinite ;zero double
  44. dd ReturnIndefinite ;zero zero
  45. dd PremSpclDest ;zero special
  46. dd PremSpclSource ;special single
  47. dd PremSpclSource ;special double
  48. dd PremSpclSource ;special zero
  49. dd TwoOpBothSpcl ;special special
  50. dd ReturnIndefinite ;Two infinites
  51. PremSpclDone:
  52. add sp,4 ;Clean off return address for normal
  53. ret
  54. ;***
  55. PremSpclDest:
  56. mov al,EMSEG:[edi].bTag ;Pick up tag
  57. cmp al,bTAG_INF ;Dividing infinity?
  58. jz ReturnIndefinite ;Invalid operation if so
  59. jmp SpclDest ;In emarith.asm
  60. ;***
  61. PremSpclSource:
  62. cmp cl,bTAG_INF ;Dividing by infinity?
  63. jnz SpclSource ;in emarith.asm
  64. PremX:
  65. ;Return Dest unchanged, quotient = 0
  66. mov EMSEG:[SWcc],0
  67. ret
  68. ;*******************************************************************************
  69. ;Map quotient bits to condition codes
  70. Q0 equ C1
  71. Q1 equ C3
  72. Q2 equ C0
  73. MapQuo label byte
  74. db 0
  75. db Q0
  76. db Q1
  77. db Q1+Q0
  78. db Q2
  79. db Q2+Q0
  80. db Q2+Q1
  81. db Q2+Q1+Q0
  82. Prem1Cont:
  83. ;edx:eax = remainder, normalized
  84. ;ebx:esi = divisor
  85. ;ebp = quotient
  86. ;edi = exponent difference, zero or less
  87. ;ecx = 0 (positive sign)
  88. ;
  89. ;At this point, 0 <= remainder < divisor. However, for FPREM1 we need
  90. ; -divisor/2 <= remainder <= divisor/2. If remainder = divisor/2, whether
  91. ;we choose + or - is dependent on whichever gives us an even quotient
  92. ;(the usual IEEE rounding rule). Quotient must be incremented if we
  93. ;use negative remainder.
  94. cmp edi,-1
  95. jl PremCont ;Remainder < divisor/2
  96. jg NegRemainExp0 ;Remainder > divisor/2
  97. ;Exponent is -1
  98. cmp edx,ebx
  99. jl PremCont ;Remainder < divisor/2
  100. jg NegRemain ;Remainder > divisor/2
  101. cmp eax,esi
  102. jl PremCont ;Remainder < divisor/2
  103. jg NegRemain ;Remainder > divisor/2
  104. ;Remainder = divisor/2. Ensure quotient is even
  105. test ebp,1 ;Even?
  106. jz PremCont
  107. NegRemain:
  108. ;Theoretically we subtract divisor from remainder once more, leaving us
  109. ;with a negative remainder. But since we use sign/magnitude representation,
  110. ;we want the abs() of that with sign bit set--so subtract remainder from
  111. ;(larger) divisor. Note that exponent difference is -1, so we must align
  112. ;binary points first.
  113. add esi,esi
  114. adc ebx,ebx ;Double divisor to align binary points
  115. NegRemainExp0:
  116. sub esi,eax
  117. sbb ebx,edx ;Subtract remainder
  118. mov eax,esi
  119. mov edx,ebx ;Result in edx:eax
  120. mov ch,bSign ;Flip sign of remainder
  121. inc ebp ;Increase quotient
  122. ;Must normalize result of subtraction
  123. bsr ecx,edx ;Look for 1 bit
  124. jnz @F
  125. sub edi,32
  126. xchg edx,eax ;Shift left 32 bits
  127. bsr ecx,edx
  128. @@:
  129. lea edi,[edi+ecx-31] ;Fix up exponent for normalization
  130. not cl
  131. shld edx,eax,cl
  132. shl eax,cl
  133. mov ch,bSign ;Flip sign of remainder
  134. PremCont:
  135. ;edx:eax = remainder, normalized
  136. ;ebp = quotient
  137. ;edi = exponent difference, zero or less
  138. ;ch = sign
  139. or eax,eax ;Low bits zero?
  140. .erre bTAG_VALID eq 1
  141. .erre bTAG_SNGL eq 0
  142. setnz cl ;if low half==0 then cl=0 else cl=1
  143. mov esi,EMSEG:[CURstk]
  144. mov ebx,esi
  145. NextStackElem ebx,Prem
  146. add di,EMSEG:[ebx].wExp ;Compute result exponent
  147. cmp di,IexpMin-IexpBias
  148. jle PremUnderflow
  149. SavePremResult:
  150. mov EMSEG:[esi].lManLo,eax
  151. xor EMSEG:[esi].bSgn,ch
  152. mov EMSEG:[esi].lManHi,edx
  153. and ebp,7 ;Keep last 3 bits of quotient only
  154. ; and give write buffers a break
  155. mov EMSEG:[esi].wExp,di
  156. mov EMSEG:[esi].bTag,cl
  157. mov al,MapQuo[ebp] ;Get cond. codes for this quotient
  158. mov EMSEG:[SWcc],al
  159. ret
  160. NextStackWrap ebx,Prem ;Tied to NextStackElem above
  161. PremUnderflow:
  162. test EMSEG:[CWmask],Underflow ;Is exception unmasked?
  163. jz UnmaskedPremUnder
  164. mov cl,bTAG_DEN
  165. jmp SavePremResult
  166. UnmaskedPremUnder:
  167. add edi,UnderBias ;Additional exp. bias for unmasked resp.
  168. or EMSEG:[CURerr],Underflow
  169. jmp SavePremResult
  170. ;*******************************************************************************
  171. PremDouble:
  172. ;edi = [CURstk]
  173. ;ebx:esi = ST(1) mantissa, ecx = ExpSgn
  174. add sp,4 ;Clean off return address for special
  175. mov eax,EMSEG:[edi].lManLo
  176. mov edx,EMSEG:[edi].lManHi
  177. movsx edi,EMSEG:[edi].wExp
  178. xor ebp,ebp ;Quotient, in case we skip stage 1
  179. sar ecx,16 ;Bring exponent down
  180. sub edi,ecx ;Get exponent difference
  181. jl ExitPremLoop ;If dividend is smaller, return it.
  182. ;FPREM is performed in two stages. The first stage is used only if the
  183. ;exponent difference is greater than 31. It reduces the exponent difference
  184. ;by 32, and repeats until the difference is less than 32. Note that
  185. ;unlike the hardware FPREM instruction, we are not limited to reducing
  186. ;the exponent by only 63--we just keep looping until it's done.
  187. ;
  188. ;The second stage performs ordinary 1-bit-at-a-time long division.
  189. ;It stops when the exponent difference is zero, meaning we have an
  190. ;integer quotient and the final remainder.
  191. ;
  192. ;edx:eax = dividend
  193. ;ebx:esi = divisor
  194. ;edi = exponent difference
  195. ;ebp = 0 (initial quotient)
  196. cmp edi,32 ;Do we need to do stage 1?
  197. jl FitDivisor ;No, start stage 2
  198. ;FPREM stage 1
  199. ;
  200. ;Exponent difference is at least 32. Use 32-bit division to compute
  201. ;quotient and exact remainder, reducing exponent difference by 32.
  202. ;DIV instruction will overflow if dividend >= divisor. In this case,
  203. ;subtract divisor from dividend to ensure no overflow. This will change
  204. ;the quotient, but that doesn't matter because we only need the last
  205. ;3 bits of the quotient (and we're about to calculate 32 quotient bits).
  206. ;This subtraction will not affect the remainder.
  207. sub eax,esi
  208. sbb edx,ebx
  209. jnc FpremReduce32 ;Was dividend big?
  210. add eax,esi ;Restore dividend, it was smaller
  211. adc edx,ebx
  212. ;Division algorithm from Knuth vol. 2, p. 237, using 32-bit "digits":
  213. ;Guess a quotient digit by dividing two MSDs of dividend by the MSD of
  214. ;divisor. If divisor is >= 1/2 the radix (radix = 2^32 in this case), then
  215. ;this guess will be no more than 2 larger than the correct value of that
  216. ;quotient digit (and never smaller). Divisor meets magnitude condition
  217. ;because it's normalized.
  218. ;
  219. ;This loop typically takes 117 clocks.
  220. ;edx:eax = dividend
  221. ;ebx:esi = divisor
  222. ;edi = exponent difference
  223. ;ebp = quotient (zero)
  224. FpremReduce32:
  225. ;We know that dividend < divisor, but it is still possible that
  226. ;high dividend == high divisor, which will cause the DIV instruction
  227. ;to overflow.
  228. cmp edx,ebx ;Will DIV instruction overflow?
  229. jae PremOvfl
  230. div ebx ;Guess a quotient "digit"
  231. ;Currently, remainder in edx = dividend - (quotient * high half divisor).
  232. ;The definition of remainder is dividend - (quotient * all divisor). So
  233. ;if we subtract (quotient * low half divisor) from edx, we'll get
  234. ;the true remainder. If it's negative, our guess was too big.
  235. mov ebp,eax ;Save quotient
  236. mov ecx,edx ;Save remainder
  237. mul esi ;Quotient * low half divisor
  238. neg eax ;Subtract from dividend extended with 0
  239. sbb ecx,edx ;Subtract from remainder
  240. mov edx,ecx ;Remainder back to edx:eax
  241. jnc HavPremQuo ;Was quotient OK?
  242. FpremCorrect:
  243. dec ebp ;Quotient was too big
  244. add eax,esi ;Add divisor back into remainder
  245. adc edx,ebx
  246. jnc FpremCorrect ;Repeat if quotient is still too big
  247. HavPremQuo:
  248. sub edi,32 ;Exponent reduced
  249. cmp edi,32 ;Exponent difference within 31?
  250. jl PremNormalize ;Do it a bit a time
  251. or edx,edx ;Check for zero remainder
  252. jnz FpremReduce32
  253. or eax,eax ;Remainder 0?
  254. jz ExactPrem
  255. xchg edx,eax ;Shift left 32 bits
  256. sub edi,32 ;Another 32 bits reduced
  257. cmp edi,32
  258. jge FpremReduce32
  259. xor ebp,ebp ;No quotient bits are valid
  260. jmp PremNormalize
  261. PremOvfl:
  262. ;edx:eax = dividend
  263. ;ebx:esi = divisor
  264. ;On exit, ebp = second quotient "digit"
  265. ;
  266. ;Come here if divide instruction would overflow. This must mean that edx == ebx,
  267. ;i.e., the high halves of the dividend and divisor are equal. Assume a result
  268. ;of 2^32-1, thus remainder = dividend - ( divisor * (2^32-1) )
  269. ; = dividend - divisor * 2^32 + divisor. Since the high halves of the dividend
  270. ;and divisor are equal, dividend - divisor * 2^32 can be computed by
  271. ;subtracting only the low halves. When adding divisor (in ebx) to this, note
  272. ;that edx == ebx, and we want the result in edx anyway.
  273. ;
  274. ;Note also that since dividend < divisor, the
  275. ;dividend - divisor * 2^32 calculation must always be negative. Thus the
  276. ;addition of divisor back to it should generate a carry if it goes positive.
  277. mov ebp,-1 ;Max quotient digit
  278. sub eax,esi ;Calculate correct remainder
  279. add edx,eax ;Should set CY if quotient fit
  280. mov eax,esi ;edx:eax has new remainder
  281. jc HavPremQuo ;Remainder was positive
  282. jmp FpremCorrect
  283. ExactPrem:
  284. ;eax = 0
  285. mov esi,EMSEG:[CURstk]
  286. mov EMSEG:[esi].lManLo,eax
  287. mov EMSEG:[esi].lManHi,eax
  288. add sp,4 ;Clean off first return address
  289. mov EMSEG:[esi].wExp,ax
  290. mov EMSEG:[esi].bTag,bTAG_ZERO
  291. ret
  292. ;FPREM stage 2
  293. ;
  294. ;Exponent difference is less than 32. Use restoring long division to
  295. ;compute quotient bits until exponent difference is zero. Note that we
  296. ;often get more than one bit/loop: BSR is used to scan off leading
  297. ;zeros each time around. Since the divisor is normalized, we can
  298. ;instantly compute a zero quotient bit for each leading zero bit.
  299. ;
  300. ;For reductions of 1 to 31 bits per loop, this loop requires 41 or 59 clocks
  301. ;plus 3 clocks/bit (BSR time). If we had to use this for 32-bit reductions
  302. ;(without stage 1), we could expect (50+6)*16 = 896 clocks typ (2 bits/loop)
  303. ;instead of the 112 required by stage 1!
  304. FpremLoop:
  305. ;edx:eax = dividend (remainder) minus divisor
  306. ;ebx:esi = divisor
  307. ;ebp = quotient
  308. ;edi = exponent difference, less than 32
  309. ;
  310. ;If R is current remainder and d is divisor, then we have edx:eax = R - d,
  311. ;which is negative. We want 2*R - d, which is positive.
  312. ;2*R - d = 2*(R - d) + d.
  313. add eax,eax ;2*(R - d)
  314. adc edx,edx
  315. add eax,esi ;2*(R-d) + d = 2*R - d
  316. adc edx,ebx
  317. add ebp,ebp ;Double quotient too
  318. dec edi ;Decrement exponent difference
  319. DivisorFit:
  320. inc ebp ;Count one in quotient
  321. PremNormalize:
  322. bsr ecx,edx ;Find first 1 bit
  323. jz PremHighZero
  324. not cl
  325. and cl,1FH ;Convert bit no. to shift count
  326. shld edx,eax,cl ;Normalize
  327. shl eax,cl
  328. sub edi,ecx ;Reduce exponent difference
  329. jl PremTooFar
  330. shl ebp,cl ;Shift quotient
  331. FitDivisor:
  332. ;Dividend could be larger or smaller than divisor
  333. sub eax,esi
  334. sbb edx,ebx
  335. jnc DivisorFit
  336. ;Couldn't subtract divisor from dividend.
  337. or edi,edi ;Is exponent difference zero or less?
  338. jg FpremLoop
  339. add eax,esi ;Restore dividend
  340. adc edx,ebx
  341. xor ecx,ecx ;Sign is positive
  342. ret
  343. PremTooFar:
  344. ;Exponent difference in edi went negative when reduced by shift count in ecx.
  345. ;We need a quotient corresponding to exponent difference of zero.
  346. add ecx,edi ;Restore exponent difference
  347. shl ebp,cl ;Fix up quotient
  348. ExitPremLoop:
  349. ;edx:eax = remainder, normalized
  350. ;ebp = quotient
  351. ;edi = exponent difference, zero or less
  352. xor ecx,ecx ;Sign is positive
  353. ret
  354. PremHighZero:
  355. ;High half of remainder is all zero, so we've reduced exponent difference
  356. ;by 32 bits and overshot. We need a quotient corresponding to exponent
  357. ;difference of zero, so we just shift it by the original difference. Then
  358. ;we need to normalize the low half remainder.
  359. mov ecx,edi
  360. shl ebp,cl ;Fix up quotient
  361. bsr ecx,eax
  362. jz ExactPrem
  363. lea edi,[edi+ecx-63] ;Fix up exponent for normalization
  364. xchg eax,edx ;Shift by 32 bits
  365. not cl
  366. shl edx,cl ;Normalize remainder
  367. xor ecx,ecx ;Sign is positive
  368. ret