mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
407 lines
12 KiB
407 lines
12 KiB
subttl emfprem.asm - FPREM and FPREM1 instructions
|
|
page
|
|
;*******************************************************************************
|
|
;emfprem.asm - FPREM and FPREM1 instructions
|
|
; by Tim Paterson
|
|
;
|
|
; Microsoft Confidential
|
|
;
|
|
; Copyright (c) Microsoft Corporation 1991
|
|
; All Rights Reserved
|
|
;
|
|
;Inputs:
|
|
; edi = [CURstk]
|
|
; ST(1) loaded into ebx:esi & ecx
|
|
;
|
|
;Revision History:
|
|
;
|
|
; [] 09/05/91 TP Initial 32-bit version.
|
|
;
|
|
;*******************************************************************************
|
|
|
|
;Dispatch table for remainder
|
|
;
|
|
;One operand has been loaded into ecx:ebx:esi ("source"), the other is
|
|
;pointed to by edi ("dest").
|
|
;
|
|
;Tag of source is shifted. Tag values are as follows:
|
|
|
|
.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
|
|
.erre TAG_VALID eq 1
|
|
.erre TAG_ZERO eq 2
|
|
.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
|
|
|
|
;Any special case routines not found in this file are in emarith.asm
|
|
|
|
;Divisor Dividend
|
|
tFpremDisp label dword ;Source(ST(1)) Dest (ST(0))
|
|
dd PremDouble ;single single
|
|
dd PremDouble ;single double
|
|
dd PremX ;single zero
|
|
dd PremSpclDest ;single special
|
|
dd PremDouble ;double single
|
|
dd PremDouble ;double double
|
|
dd PremX ;double zero
|
|
dd PremSpclDest ;double special
|
|
dd ReturnIndefinite ;zero single
|
|
dd ReturnIndefinite ;zero double
|
|
dd ReturnIndefinite ;zero zero
|
|
dd PremSpclDest ;zero special
|
|
dd PremSpclSource ;special single
|
|
dd PremSpclSource ;special double
|
|
dd PremSpclSource ;special zero
|
|
dd TwoOpBothSpcl ;special special
|
|
dd ReturnIndefinite ;Two infinites
|
|
|
|
|
|
PremSpclDone:
|
|
add sp,4 ;Clean off return address for normal
|
|
ret
|
|
|
|
;***
|
|
PremSpclDest:
|
|
mov al,EMSEG:[edi].bTag ;Pick up tag
|
|
cmp al,bTAG_INF ;Dividing infinity?
|
|
jz ReturnIndefinite ;Invalid operation if so
|
|
jmp SpclDest ;In emarith.asm
|
|
|
|
;***
|
|
PremSpclSource:
|
|
cmp cl,bTAG_INF ;Dividing by infinity?
|
|
jnz SpclSource ;in emarith.asm
|
|
PremX:
|
|
;Return Dest unchanged, quotient = 0
|
|
mov EMSEG:[SWcc],0
|
|
ret
|
|
;*******************************************************************************
|
|
|
|
;Map quotient bits to condition codes
|
|
|
|
Q0 equ C1
|
|
Q1 equ C3
|
|
Q2 equ C0
|
|
|
|
MapQuo label byte
|
|
db 0
|
|
db Q0
|
|
db Q1
|
|
db Q1+Q0
|
|
db Q2
|
|
db Q2+Q0
|
|
db Q2+Q1
|
|
db Q2+Q1+Q0
|
|
|
|
Prem1Cont:
|
|
|
|
;edx:eax = remainder, normalized
|
|
;ebx:esi = divisor
|
|
;ebp = quotient
|
|
;edi = exponent difference, zero or less
|
|
;ecx = 0 (positive sign)
|
|
;
|
|
;At this point, 0 <= remainder < divisor. However, for FPREM1 we need
|
|
; -divisor/2 <= remainder <= divisor/2. If remainder = divisor/2, whether
|
|
;we choose + or - is dependent on whichever gives us an even quotient
|
|
;(the usual IEEE rounding rule). Quotient must be incremented if we
|
|
;use negative remainder.
|
|
|
|
cmp edi,-1
|
|
jl PremCont ;Remainder < divisor/2
|
|
jg NegRemainExp0 ;Remainder > divisor/2
|
|
;Exponent is -1
|
|
cmp edx,ebx
|
|
jl PremCont ;Remainder < divisor/2
|
|
jg NegRemain ;Remainder > divisor/2
|
|
cmp eax,esi
|
|
jl PremCont ;Remainder < divisor/2
|
|
jg NegRemain ;Remainder > divisor/2
|
|
;Remainder = divisor/2. Ensure quotient is even
|
|
test ebp,1 ;Even?
|
|
jz PremCont
|
|
NegRemain:
|
|
;Theoretically we subtract divisor from remainder once more, leaving us
|
|
;with a negative remainder. But since we use sign/magnitude representation,
|
|
;we want the abs() of that with sign bit set--so subtract remainder from
|
|
;(larger) divisor. Note that exponent difference is -1, so we must align
|
|
;binary points first.
|
|
add esi,esi
|
|
adc ebx,ebx ;Double divisor to align binary points
|
|
NegRemainExp0:
|
|
sub esi,eax
|
|
sbb ebx,edx ;Subtract remainder
|
|
mov eax,esi
|
|
mov edx,ebx ;Result in edx:eax
|
|
mov ch,bSign ;Flip sign of remainder
|
|
inc ebp ;Increase quotient
|
|
;Must normalize result of subtraction
|
|
bsr ecx,edx ;Look for 1 bit
|
|
jnz @F
|
|
sub edi,32
|
|
xchg edx,eax ;Shift left 32 bits
|
|
bsr ecx,edx
|
|
@@:
|
|
lea edi,[edi+ecx-31] ;Fix up exponent for normalization
|
|
not cl
|
|
shld edx,eax,cl
|
|
shl eax,cl
|
|
mov ch,bSign ;Flip sign of remainder
|
|
|
|
PremCont:
|
|
;edx:eax = remainder, normalized
|
|
;ebp = quotient
|
|
;edi = exponent difference, zero or less
|
|
;ch = sign
|
|
or eax,eax ;Low bits zero?
|
|
.erre bTAG_VALID eq 1
|
|
.erre bTAG_SNGL eq 0
|
|
setnz cl ;if low half==0 then cl=0 else cl=1
|
|
mov esi,EMSEG:[CURstk]
|
|
mov ebx,esi
|
|
NextStackElem ebx,Prem
|
|
add di,EMSEG:[ebx].wExp ;Compute result exponent
|
|
cmp di,IexpMin-IexpBias
|
|
jle PremUnderflow
|
|
SavePremResult:
|
|
mov EMSEG:[esi].lManLo,eax
|
|
xor EMSEG:[esi].bSgn,ch
|
|
mov EMSEG:[esi].lManHi,edx
|
|
and ebp,7 ;Keep last 3 bits of quotient only
|
|
; and give write buffers a break
|
|
mov EMSEG:[esi].wExp,di
|
|
mov EMSEG:[esi].bTag,cl
|
|
mov al,MapQuo[ebp] ;Get cond. codes for this quotient
|
|
mov EMSEG:[SWcc],al
|
|
ret
|
|
|
|
NextStackWrap ebx,Prem ;Tied to NextStackElem above
|
|
|
|
PremUnderflow:
|
|
test EMSEG:[CWmask],Underflow ;Is exception unmasked?
|
|
jz UnmaskedPremUnder
|
|
mov cl,bTAG_DEN
|
|
jmp SavePremResult
|
|
|
|
UnmaskedPremUnder:
|
|
add edi,UnderBias ;Additional exp. bias for unmasked resp.
|
|
or EMSEG:[CURerr],Underflow
|
|
jmp SavePremResult
|
|
|
|
;*******************************************************************************
|
|
|
|
PremDouble:
|
|
;edi = [CURstk]
|
|
;ebx:esi = ST(1) mantissa, ecx = ExpSgn
|
|
|
|
add sp,4 ;Clean off return address for special
|
|
mov eax,EMSEG:[edi].lManLo
|
|
mov edx,EMSEG:[edi].lManHi
|
|
movsx edi,EMSEG:[edi].wExp
|
|
xor ebp,ebp ;Quotient, in case we skip stage 1
|
|
sar ecx,16 ;Bring exponent down
|
|
sub edi,ecx ;Get exponent difference
|
|
jl ExitPremLoop ;If dividend is smaller, return it.
|
|
|
|
;FPREM is performed in two stages. The first stage is used only if the
|
|
;exponent difference is greater than 31. It reduces the exponent difference
|
|
;by 32, and repeats until the difference is less than 32. Note that
|
|
;unlike the hardware FPREM instruction, we are not limited to reducing
|
|
;the exponent by only 63--we just keep looping until it's done.
|
|
;
|
|
;The second stage performs ordinary 1-bit-at-a-time long division.
|
|
;It stops when the exponent difference is zero, meaning we have an
|
|
;integer quotient and the final remainder.
|
|
;
|
|
;edx:eax = dividend
|
|
;ebx:esi = divisor
|
|
;edi = exponent difference
|
|
;ebp = 0 (initial quotient)
|
|
|
|
cmp edi,32 ;Do we need to do stage 1?
|
|
jl FitDivisor ;No, start stage 2
|
|
|
|
;FPREM stage 1
|
|
;
|
|
;Exponent difference is at least 32. Use 32-bit division to compute
|
|
;quotient and exact remainder, reducing exponent difference by 32.
|
|
|
|
;DIV instruction will overflow if dividend >= divisor. In this case,
|
|
;subtract divisor from dividend to ensure no overflow. This will change
|
|
;the quotient, but that doesn't matter because we only need the last
|
|
;3 bits of the quotient (and we're about to calculate 32 quotient bits).
|
|
;This subtraction will not affect the remainder.
|
|
|
|
sub eax,esi
|
|
sbb edx,ebx
|
|
jnc FpremReduce32 ;Was dividend big?
|
|
add eax,esi ;Restore dividend, it was smaller
|
|
adc edx,ebx
|
|
|
|
;Division algorithm from Knuth vol. 2, p. 237, using 32-bit "digits":
|
|
;Guess a quotient digit by dividing two MSDs of dividend by the MSD of
|
|
;divisor. If divisor is >= 1/2 the radix (radix = 2^32 in this case), then
|
|
;this guess will be no more than 2 larger than the correct value of that
|
|
;quotient digit (and never smaller). Divisor meets magnitude condition
|
|
;because it's normalized.
|
|
;
|
|
;This loop typically takes 117 clocks.
|
|
|
|
;edx:eax = dividend
|
|
;ebx:esi = divisor
|
|
;edi = exponent difference
|
|
;ebp = quotient (zero)
|
|
|
|
FpremReduce32:
|
|
;We know that dividend < divisor, but it is still possible that
|
|
;high dividend == high divisor, which will cause the DIV instruction
|
|
;to overflow.
|
|
cmp edx,ebx ;Will DIV instruction overflow?
|
|
jae PremOvfl
|
|
div ebx ;Guess a quotient "digit"
|
|
|
|
;Currently, remainder in edx = dividend - (quotient * high half divisor).
|
|
;The definition of remainder is dividend - (quotient * all divisor). So
|
|
;if we subtract (quotient * low half divisor) from edx, we'll get
|
|
;the true remainder. If it's negative, our guess was too big.
|
|
|
|
mov ebp,eax ;Save quotient
|
|
mov ecx,edx ;Save remainder
|
|
mul esi ;Quotient * low half divisor
|
|
neg eax ;Subtract from dividend extended with 0
|
|
sbb ecx,edx ;Subtract from remainder
|
|
mov edx,ecx ;Remainder back to edx:eax
|
|
jnc HavPremQuo ;Was quotient OK?
|
|
FpremCorrect:
|
|
dec ebp ;Quotient was too big
|
|
add eax,esi ;Add divisor back into remainder
|
|
adc edx,ebx
|
|
jnc FpremCorrect ;Repeat if quotient is still too big
|
|
HavPremQuo:
|
|
sub edi,32 ;Exponent reduced
|
|
cmp edi,32 ;Exponent difference within 31?
|
|
jl PremNormalize ;Do it a bit a time
|
|
or edx,edx ;Check for zero remainder
|
|
jnz FpremReduce32
|
|
or eax,eax ;Remainder 0?
|
|
jz ExactPrem
|
|
xchg edx,eax ;Shift left 32 bits
|
|
sub edi,32 ;Another 32 bits reduced
|
|
cmp edi,32
|
|
jge FpremReduce32
|
|
xor ebp,ebp ;No quotient bits are valid
|
|
jmp PremNormalize
|
|
|
|
PremOvfl:
|
|
;edx:eax = dividend
|
|
;ebx:esi = divisor
|
|
;On exit, ebp = second quotient "digit"
|
|
;
|
|
;Come here if divide instruction would overflow. This must mean that edx == ebx,
|
|
;i.e., the high halves of the dividend and divisor are equal. Assume a result
|
|
;of 2^32-1, thus remainder = dividend - ( divisor * (2^32-1) )
|
|
; = dividend - divisor * 2^32 + divisor. Since the high halves of the dividend
|
|
;and divisor are equal, dividend - divisor * 2^32 can be computed by
|
|
;subtracting only the low halves. When adding divisor (in ebx) to this, note
|
|
;that edx == ebx, and we want the result in edx anyway.
|
|
;
|
|
;Note also that since dividend < divisor, the
|
|
;dividend - divisor * 2^32 calculation must always be negative. Thus the
|
|
;addition of divisor back to it should generate a carry if it goes positive.
|
|
|
|
mov ebp,-1 ;Max quotient digit
|
|
sub eax,esi ;Calculate correct remainder
|
|
add edx,eax ;Should set CY if quotient fit
|
|
mov eax,esi ;edx:eax has new remainder
|
|
jc HavPremQuo ;Remainder was positive
|
|
jmp FpremCorrect
|
|
|
|
ExactPrem:
|
|
;eax = 0
|
|
mov esi,EMSEG:[CURstk]
|
|
mov EMSEG:[esi].lManLo,eax
|
|
mov EMSEG:[esi].lManHi,eax
|
|
add sp,4 ;Clean off first return address
|
|
mov EMSEG:[esi].wExp,ax
|
|
mov EMSEG:[esi].bTag,bTAG_ZERO
|
|
ret
|
|
|
|
|
|
;FPREM stage 2
|
|
;
|
|
;Exponent difference is less than 32. Use restoring long division to
|
|
;compute quotient bits until exponent difference is zero. Note that we
|
|
;often get more than one bit/loop: BSR is used to scan off leading
|
|
;zeros each time around. Since the divisor is normalized, we can
|
|
;instantly compute a zero quotient bit for each leading zero bit.
|
|
;
|
|
;For reductions of 1 to 31 bits per loop, this loop requires 41 or 59 clocks
|
|
;plus 3 clocks/bit (BSR time). If we had to use this for 32-bit reductions
|
|
;(without stage 1), we could expect (50+6)*16 = 896 clocks typ (2 bits/loop)
|
|
;instead of the 112 required by stage 1!
|
|
|
|
FpremLoop:
|
|
;edx:eax = dividend (remainder) minus divisor
|
|
;ebx:esi = divisor
|
|
;ebp = quotient
|
|
;edi = exponent difference, less than 32
|
|
;
|
|
;If R is current remainder and d is divisor, then we have edx:eax = R - d,
|
|
;which is negative. We want 2*R - d, which is positive.
|
|
;2*R - d = 2*(R - d) + d.
|
|
add eax,eax ;2*(R - d)
|
|
adc edx,edx
|
|
add eax,esi ;2*(R-d) + d = 2*R - d
|
|
adc edx,ebx
|
|
add ebp,ebp ;Double quotient too
|
|
dec edi ;Decrement exponent difference
|
|
DivisorFit:
|
|
inc ebp ;Count one in quotient
|
|
PremNormalize:
|
|
bsr ecx,edx ;Find first 1 bit
|
|
jz PremHighZero
|
|
not cl
|
|
and cl,1FH ;Convert bit no. to shift count
|
|
shld edx,eax,cl ;Normalize
|
|
shl eax,cl
|
|
sub edi,ecx ;Reduce exponent difference
|
|
jl PremTooFar
|
|
shl ebp,cl ;Shift quotient
|
|
FitDivisor:
|
|
;Dividend could be larger or smaller than divisor
|
|
sub eax,esi
|
|
sbb edx,ebx
|
|
jnc DivisorFit
|
|
;Couldn't subtract divisor from dividend.
|
|
or edi,edi ;Is exponent difference zero or less?
|
|
jg FpremLoop
|
|
add eax,esi ;Restore dividend
|
|
adc edx,ebx
|
|
xor ecx,ecx ;Sign is positive
|
|
ret
|
|
|
|
PremTooFar:
|
|
;Exponent difference in edi went negative when reduced by shift count in ecx.
|
|
;We need a quotient corresponding to exponent difference of zero.
|
|
add ecx,edi ;Restore exponent difference
|
|
shl ebp,cl ;Fix up quotient
|
|
ExitPremLoop:
|
|
;edx:eax = remainder, normalized
|
|
;ebp = quotient
|
|
;edi = exponent difference, zero or less
|
|
xor ecx,ecx ;Sign is positive
|
|
ret
|
|
|
|
PremHighZero:
|
|
;High half of remainder is all zero, so we've reduced exponent difference
|
|
;by 32 bits and overshot. We need a quotient corresponding to exponent
|
|
;difference of zero, so we just shift it by the original difference. Then
|
|
;we need to normalize the low half remainder.
|
|
mov ecx,edi
|
|
shl ebp,cl ;Fix up quotient
|
|
bsr ecx,eax
|
|
jz ExactPrem
|
|
lea edi,[edi+ecx-63] ;Fix up exponent for normalization
|
|
xchg eax,edx ;Shift by 32 bits
|
|
not cl
|
|
shl edx,cl ;Normalize remainder
|
|
xor ecx,ecx ;Sign is positive
|
|
ret
|