subttl emfprem.asm - FPREM and FPREM1 instructions page ;******************************************************************************* ;emfprem.asm - FPREM and FPREM1 instructions ; by Tim Paterson ; ; Microsoft Confidential ; ; Copyright (c) Microsoft Corporation 1991 ; All Rights Reserved ; ;Inputs: ; edi = [CURstk] ; ST(1) loaded into ebx:esi & ecx ; ;Revision History: ; ; [] 09/05/91 TP Initial 32-bit version. ; ;******************************************************************************* ;Dispatch table for remainder ; ;One operand has been loaded into ecx:ebx:esi ("source"), the other is ;pointed to by edi ("dest"). ; ;Tag of source is shifted. Tag values are as follows: .erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero .erre TAG_VALID eq 1 .erre TAG_ZERO eq 2 .erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty ;Any special case routines not found in this file are in emarith.asm ;Divisor Dividend tFpremDisp label dword ;Source(ST(1)) Dest (ST(0)) dd PremDouble ;single single dd PremDouble ;single double dd PremX ;single zero dd PremSpclDest ;single special dd PremDouble ;double single dd PremDouble ;double double dd PremX ;double zero dd PremSpclDest ;double special dd ReturnIndefinite ;zero single dd ReturnIndefinite ;zero double dd ReturnIndefinite ;zero zero dd PremSpclDest ;zero special dd PremSpclSource ;special single dd PremSpclSource ;special double dd PremSpclSource ;special zero dd TwoOpBothSpcl ;special special dd ReturnIndefinite ;Two infinites PremSpclDone: add sp,4 ;Clean off return address for normal ret ;*** PremSpclDest: mov al,EMSEG:[edi].bTag ;Pick up tag cmp al,bTAG_INF ;Dividing infinity? jz ReturnIndefinite ;Invalid operation if so jmp SpclDest ;In emarith.asm ;*** PremSpclSource: cmp cl,bTAG_INF ;Dividing by infinity? jnz SpclSource ;in emarith.asm PremX: ;Return Dest unchanged, quotient = 0 mov EMSEG:[SWcc],0 ret ;******************************************************************************* ;Map quotient bits to condition codes Q0 equ C1 Q1 equ C3 Q2 equ C0 MapQuo label byte db 0 db Q0 db Q1 db Q1+Q0 db Q2 db Q2+Q0 db Q2+Q1 db Q2+Q1+Q0 Prem1Cont: ;edx:eax = remainder, normalized ;ebx:esi = divisor ;ebp = quotient ;edi = exponent difference, zero or less ;ecx = 0 (positive sign) ; ;At this point, 0 <= remainder < divisor. However, for FPREM1 we need ; -divisor/2 <= remainder <= divisor/2. If remainder = divisor/2, whether ;we choose + or - is dependent on whichever gives us an even quotient ;(the usual IEEE rounding rule). Quotient must be incremented if we ;use negative remainder. cmp edi,-1 jl PremCont ;Remainder < divisor/2 jg NegRemainExp0 ;Remainder > divisor/2 ;Exponent is -1 cmp edx,ebx jl PremCont ;Remainder < divisor/2 jg NegRemain ;Remainder > divisor/2 cmp eax,esi jl PremCont ;Remainder < divisor/2 jg NegRemain ;Remainder > divisor/2 ;Remainder = divisor/2. Ensure quotient is even test ebp,1 ;Even? jz PremCont NegRemain: ;Theoretically we subtract divisor from remainder once more, leaving us ;with a negative remainder. But since we use sign/magnitude representation, ;we want the abs() of that with sign bit set--so subtract remainder from ;(larger) divisor. Note that exponent difference is -1, so we must align ;binary points first. add esi,esi adc ebx,ebx ;Double divisor to align binary points NegRemainExp0: sub esi,eax sbb ebx,edx ;Subtract remainder mov eax,esi mov edx,ebx ;Result in edx:eax mov ch,bSign ;Flip sign of remainder inc ebp ;Increase quotient ;Must normalize result of subtraction bsr ecx,edx ;Look for 1 bit jnz @F sub edi,32 xchg edx,eax ;Shift left 32 bits bsr ecx,edx @@: lea edi,[edi+ecx-31] ;Fix up exponent for normalization not cl shld edx,eax,cl shl eax,cl mov ch,bSign ;Flip sign of remainder PremCont: ;edx:eax = remainder, normalized ;ebp = quotient ;edi = exponent difference, zero or less ;ch = sign or eax,eax ;Low bits zero? .erre bTAG_VALID eq 1 .erre bTAG_SNGL eq 0 setnz cl ;if low half==0 then cl=0 else cl=1 mov esi,EMSEG:[CURstk] mov ebx,esi NextStackElem ebx,Prem add di,EMSEG:[ebx].wExp ;Compute result exponent cmp di,IexpMin-IexpBias jle PremUnderflow SavePremResult: mov EMSEG:[esi].lManLo,eax xor EMSEG:[esi].bSgn,ch mov EMSEG:[esi].lManHi,edx and ebp,7 ;Keep last 3 bits of quotient only ; and give write buffers a break mov EMSEG:[esi].wExp,di mov EMSEG:[esi].bTag,cl mov al,MapQuo[ebp] ;Get cond. codes for this quotient mov EMSEG:[SWcc],al ret NextStackWrap ebx,Prem ;Tied to NextStackElem above PremUnderflow: test EMSEG:[CWmask],Underflow ;Is exception unmasked? jz UnmaskedPremUnder mov cl,bTAG_DEN jmp SavePremResult UnmaskedPremUnder: add edi,UnderBias ;Additional exp. bias for unmasked resp. or EMSEG:[CURerr],Underflow jmp SavePremResult ;******************************************************************************* PremDouble: ;edi = [CURstk] ;ebx:esi = ST(1) mantissa, ecx = ExpSgn add sp,4 ;Clean off return address for special mov eax,EMSEG:[edi].lManLo mov edx,EMSEG:[edi].lManHi movsx edi,EMSEG:[edi].wExp xor ebp,ebp ;Quotient, in case we skip stage 1 sar ecx,16 ;Bring exponent down sub edi,ecx ;Get exponent difference jl ExitPremLoop ;If dividend is smaller, return it. ;FPREM is performed in two stages. The first stage is used only if the ;exponent difference is greater than 31. It reduces the exponent difference ;by 32, and repeats until the difference is less than 32. Note that ;unlike the hardware FPREM instruction, we are not limited to reducing ;the exponent by only 63--we just keep looping until it's done. ; ;The second stage performs ordinary 1-bit-at-a-time long division. ;It stops when the exponent difference is zero, meaning we have an ;integer quotient and the final remainder. ; ;edx:eax = dividend ;ebx:esi = divisor ;edi = exponent difference ;ebp = 0 (initial quotient) cmp edi,32 ;Do we need to do stage 1? jl FitDivisor ;No, start stage 2 ;FPREM stage 1 ; ;Exponent difference is at least 32. Use 32-bit division to compute ;quotient and exact remainder, reducing exponent difference by 32. ;DIV instruction will overflow if dividend >= divisor. In this case, ;subtract divisor from dividend to ensure no overflow. This will change ;the quotient, but that doesn't matter because we only need the last ;3 bits of the quotient (and we're about to calculate 32 quotient bits). ;This subtraction will not affect the remainder. sub eax,esi sbb edx,ebx jnc FpremReduce32 ;Was dividend big? add eax,esi ;Restore dividend, it was smaller adc edx,ebx ;Division algorithm from Knuth vol. 2, p. 237, using 32-bit "digits": ;Guess a quotient digit by dividing two MSDs of dividend by the MSD of ;divisor. If divisor is >= 1/2 the radix (radix = 2^32 in this case), then ;this guess will be no more than 2 larger than the correct value of that ;quotient digit (and never smaller). Divisor meets magnitude condition ;because it's normalized. ; ;This loop typically takes 117 clocks. ;edx:eax = dividend ;ebx:esi = divisor ;edi = exponent difference ;ebp = quotient (zero) FpremReduce32: ;We know that dividend < divisor, but it is still possible that ;high dividend == high divisor, which will cause the DIV instruction ;to overflow. cmp edx,ebx ;Will DIV instruction overflow? jae PremOvfl div ebx ;Guess a quotient "digit" ;Currently, remainder in edx = dividend - (quotient * high half divisor). ;The definition of remainder is dividend - (quotient * all divisor). So ;if we subtract (quotient * low half divisor) from edx, we'll get ;the true remainder. If it's negative, our guess was too big. mov ebp,eax ;Save quotient mov ecx,edx ;Save remainder mul esi ;Quotient * low half divisor neg eax ;Subtract from dividend extended with 0 sbb ecx,edx ;Subtract from remainder mov edx,ecx ;Remainder back to edx:eax jnc HavPremQuo ;Was quotient OK? FpremCorrect: dec ebp ;Quotient was too big add eax,esi ;Add divisor back into remainder adc edx,ebx jnc FpremCorrect ;Repeat if quotient is still too big HavPremQuo: sub edi,32 ;Exponent reduced cmp edi,32 ;Exponent difference within 31? jl PremNormalize ;Do it a bit a time or edx,edx ;Check for zero remainder jnz FpremReduce32 or eax,eax ;Remainder 0? jz ExactPrem xchg edx,eax ;Shift left 32 bits sub edi,32 ;Another 32 bits reduced cmp edi,32 jge FpremReduce32 xor ebp,ebp ;No quotient bits are valid jmp PremNormalize PremOvfl: ;edx:eax = dividend ;ebx:esi = divisor ;On exit, ebp = second quotient "digit" ; ;Come here if divide instruction would overflow. This must mean that edx == ebx, ;i.e., the high halves of the dividend and divisor are equal. Assume a result ;of 2^32-1, thus remainder = dividend - ( divisor * (2^32-1) ) ; = dividend - divisor * 2^32 + divisor. Since the high halves of the dividend ;and divisor are equal, dividend - divisor * 2^32 can be computed by ;subtracting only the low halves. When adding divisor (in ebx) to this, note ;that edx == ebx, and we want the result in edx anyway. ; ;Note also that since dividend < divisor, the ;dividend - divisor * 2^32 calculation must always be negative. Thus the ;addition of divisor back to it should generate a carry if it goes positive. mov ebp,-1 ;Max quotient digit sub eax,esi ;Calculate correct remainder add edx,eax ;Should set CY if quotient fit mov eax,esi ;edx:eax has new remainder jc HavPremQuo ;Remainder was positive jmp FpremCorrect ExactPrem: ;eax = 0 mov esi,EMSEG:[CURstk] mov EMSEG:[esi].lManLo,eax mov EMSEG:[esi].lManHi,eax add sp,4 ;Clean off first return address mov EMSEG:[esi].wExp,ax mov EMSEG:[esi].bTag,bTAG_ZERO ret ;FPREM stage 2 ; ;Exponent difference is less than 32. Use restoring long division to ;compute quotient bits until exponent difference is zero. Note that we ;often get more than one bit/loop: BSR is used to scan off leading ;zeros each time around. Since the divisor is normalized, we can ;instantly compute a zero quotient bit for each leading zero bit. ; ;For reductions of 1 to 31 bits per loop, this loop requires 41 or 59 clocks ;plus 3 clocks/bit (BSR time). If we had to use this for 32-bit reductions ;(without stage 1), we could expect (50+6)*16 = 896 clocks typ (2 bits/loop) ;instead of the 112 required by stage 1! FpremLoop: ;edx:eax = dividend (remainder) minus divisor ;ebx:esi = divisor ;ebp = quotient ;edi = exponent difference, less than 32 ; ;If R is current remainder and d is divisor, then we have edx:eax = R - d, ;which is negative. We want 2*R - d, which is positive. ;2*R - d = 2*(R - d) + d. add eax,eax ;2*(R - d) adc edx,edx add eax,esi ;2*(R-d) + d = 2*R - d adc edx,ebx add ebp,ebp ;Double quotient too dec edi ;Decrement exponent difference DivisorFit: inc ebp ;Count one in quotient PremNormalize: bsr ecx,edx ;Find first 1 bit jz PremHighZero not cl and cl,1FH ;Convert bit no. to shift count shld edx,eax,cl ;Normalize shl eax,cl sub edi,ecx ;Reduce exponent difference jl PremTooFar shl ebp,cl ;Shift quotient FitDivisor: ;Dividend could be larger or smaller than divisor sub eax,esi sbb edx,ebx jnc DivisorFit ;Couldn't subtract divisor from dividend. or edi,edi ;Is exponent difference zero or less? jg FpremLoop add eax,esi ;Restore dividend adc edx,ebx xor ecx,ecx ;Sign is positive ret PremTooFar: ;Exponent difference in edi went negative when reduced by shift count in ecx. ;We need a quotient corresponding to exponent difference of zero. add ecx,edi ;Restore exponent difference shl ebp,cl ;Fix up quotient ExitPremLoop: ;edx:eax = remainder, normalized ;ebp = quotient ;edi = exponent difference, zero or less xor ecx,ecx ;Sign is positive ret PremHighZero: ;High half of remainder is all zero, so we've reduced exponent difference ;by 32 bits and overshot. We need a quotient corresponding to exponent ;difference of zero, so we just shift it by the original difference. Then ;we need to normalize the low half remainder. mov ecx,edi shl ebp,cl ;Fix up quotient bsr ecx,eax jz ExactPrem lea edi,[edi+ecx-63] ;Fix up exponent for normalization xchg eax,edx ;Shift by 32 bits not cl shl edx,cl ;Normalize remainder xor ecx,ecx ;Sign is positive ret