;------------------------------------------------------------------------ ; LibHlprs.a ; ; Library helper routines needed by the code generator. ; #include "assert.a" ;------------------------------------------------------------------------ #ifdef DEBUG ; This testing macro creates a function that calls both the 68000 and 68020 ; versions of a LibHlpr routine and asserts if the results differ. #define Cat(A,B) A##B #define VerifyLibHlpr(FUNC) cBegin nogen \ \ movem.l ,-(sp) \ \ jbsr Cat(FUNC,_00) \ \ move.l d0,d2 \ \ movem.l (sp)+, \ \ jbsr Cat(FUNC,_20) \ \ cmp.l d0,d2 \ \ AssertEq("Bad libhlpr result") \ \ move.l (sp)+,d2 \ \ rts \ \ cEnd nogen \ \ cProc Cat(FUNC,_00),PUBLIC+SYSCALL #else #define VerifyLibHlpr(FUNC) #endif ;------------------------------------------------------------------------ code ;------------------------------------------------------------------------ ; ULDivT ; ; This function performs an unsigned 32/32 divide. ; ; Entry: d0 = 32-bit dividend (high/low words denoted as a:b) ; d1 = 32-bit divisor (high/low words denoted as c:d) ; ; Exit: d0 = 32-bit quotient ; d1 = trashed ; ; Note: All other registers must be preserved! ; ; Since the actual operands will often have only 16 significant ; bits, we optimize for speed in these cases. First, if the divisor ; is a 16-bit value, we can use two DIVU instructions to quickly ; compute the 32-bit quotient. If the dividend is also a 16-bit ; value, we can use a single DIVU without worrying about overflow. ; These checks also simplify the full 32/32 case (see UDivMod) ; since we know the quotient must be a 16-bit value in that case. ; ; Terms: For the optimized cases we know c==0, and thus: ; ; (a:b) / (0:d) == q1:q2 where: ; ; q1 = (0:a) / d (remainder r1 - used below) ; q2 = (r1:b) / d (remainder r2 - discarded) cProc ULDivT,PUBLIC+SYSCALL VerifyLibHlpr(ULDivT) cBegin nogen AssertCode("tst.l d1") AssertNe("ULDivT: div by 0") ; d0 d1 ; ------ ------ swap d1 ; a:b d:c tst.w d1 ; ifeq ; d:0? (is divisor 16 bits?) move.w d0,d1 ; d:b clr.w d0 ; a:0 swap d0 ; 0:a ifne ; swap d1 ; b:d divu d1,d0 ; r1:q1 swap d1 ; d:b endif ; eor.w d0,d1 ; <3 eor's swap low words of d0,d1> eor.w d1,d0 ; eor.w d0,d1 ; r1:b d:q1 swap d1 ; r1:b q1:d divu d1,d0 ; r2:q2 move.w d0,d1 ; q1:q2 move.l d1,d0 ; q1:q2 rts ; endif swap d1 ; Unswap divisor jbsr UDivMod ; Quotient comes back in d1 so move.l d1,d0 ; move it to d0 before returning rts cEnd nogen ;------------------------------------------------------------------------ ; LDivT ; ; This function performs a signed 32/32 divide. ; ; Entry: d0 = 32-bit dividend ; d1 = 32-bit divisor ; ; Exit: d0 = quotient ; d1 = trashed ; ; Note: All other registers must be preserved! cProc LDivT,PUBLIC+SYSCALL VerifyLibHlpr(LDivT) cBegin nogen tst.l d0 ; Test sign of dividend ifmi ; neg.l d0 ; Make dividend positive tst.l d1 ; Test sign of divisor jpl 10$ ; neg.l d1 ; Make divisor positive jra ULDivT ; Do the divide (-/- yields +) endif ; tst.l d1 ; Test sign of divisor jpl ULDivT ; Do the divide (+/+ yields +) neg.l d1 ; Make divisor positive 10$: jbsr ULDivT ; Do the divide (-/+ or +/- yields -) neg.l d0 ; Make result negative rts ; All done! cEnd nogen ;------------------------------------------------------------------------ ; ULModT ; ; This function performs an unsigned 32/32 modulo operation. ; ; Entry: d0 = 32-bit dividend (high/low words denoted as a:b) ; d1 = 32-bit divisor (high/low words denoted as c:d) ; ; Exit: d0 = 32-bit remainder ; d1 = trashed ; ; Notes: All other registers must be preserved! ; ; Since the actual operands will often have only 16 significant ; bits, we optimize for speed in these cases. First, if the divisor ; is a 16-bit value, we can use two DIVU instructions to quickly ; compute the modulo (which will fit in 16 bits since it will be ; less than the divisor). If the dividend is also a 16-bit value, ; we can use a single DIVU without worrying about the quotient ; overflowing. These checks also simplify the full 32/32 case (see ; UDivMod) since we know the quotient must be a 16-bit value in ; that case (although in general the modulo will require 32 bits). ; ; Terms: For the optimized cases we know c==0, and thus: ; ; (a:b) % (0:d) == 0:r2 where: ; ; r1 = (0:a) % d ; r2 = (r1:b) % d cProc ULModT,PUBLIC+SYSCALL VerifyLibHlpr(ULModT) cBegin nogen AssertCode("tst.l d1") AssertNe("ULModT: mod by 0") ; d0 d1 ; ------ ------ swap d1 ; a:b d:c tst.w d1 ; ifeq ; d:0? (is divisor 16 bits?) move.w d0,d1 ; d:b clr.w d0 ; a:0 swap d0 ; 0:a ifne ; swap d1 ; b:d divu d1,d0 ; r1:q1 swap d1 ; d:b endif ; move.w d1,d0 ; r1:b swap d1 ; b:d divu d1,d0 ; r2:q2 clr.w d0 ; r2:0 swap d0 ; 0:r2 rts ; endif swap d1 ; Unswap divisor jbsr UDivMod ; Do full 32/32 modulo rts ; 32-bit remainder comes back in d0 cEnd nogen ;------------------------------------------------------------------------ ; LModT ; ; This function performs a signed 32/32 mod operation. ; ; Entry: d0 = 32-bit dividend ; d1 = 32-bit divisor ; ; Exit: d0 = remainder (has same sign as dividend) ; d1 = trashed ; ; Note: All other registers must be preserved! cProc LModT,PUBLIC+SYSCALL VerifyLibHlpr(LModT) cBegin nogen tst.l d1 ; Test sign of divisor ifmi ; neg.l d1 ; Make divisor positive endif ; tst.l d0 ; Test sign of dividend jpl ULModT ; Compute modulus (positive result) neg.l d0 ; Make dividend positive jbsr ULModT ; Compute modulus neg.l d0 ; Make result negative rts ; All done! cEnd nogen ;------------------------------------------------------------------------ ; UDivMod ; ; This routine implements a 32/32 unsigned divide algorithm which assumes ; that the divisor is greater than 65535. With this restriction, we know ; that overflow conditions cannot occur. ; ; Entry: d0 = 32-bit dividend ; d1 = 32-bit divisor ; ; Exit: d0 = 32-bit remainder ; d1 = 32-bit quotient (note: upper 16 bits are guaranteed zero) ; ; Note: All other registers must be preserved! UDivMod: movem.l ,-(a7) AssertCode("cmp.l #0xffff,d1") AssertHi("UDivMod: divisor < 64K") move.l d1,d3 ; Preshift the d0:d1 pseudo register by 16 bits since we know that ; actually doing the first 16 iterations won't do any real work ; (we know the quotient must be all zeros in its upper 16 bits). moveq #0,d1 move.w d0,d1 swap d1 clr.w d0 swap d0 ; Now loop through using the classic shift and subtract algorithm ; for the remaining 16 iterations. moveq #15,d2 do add.l d1,d1 addx.l d0,d0 cmp.l d3,d0 ifhs sub.l d3,d0 addq.w #1,d1 endif until_dec d2 AssertCode("cmp.l #0xffff,d1") AssertLs("UDivMod: quotient >= 64k") movem.l (a7)+, rts ;------------------------------------------------------------------------ ; ULMulT ; ; This function performs a signed or unsigned 32*32 multiplication. ; ; Note: the fact that the operands could be signed or unsigned doesn't ; matter since we return only the low 32 bits of the full 64-bit result). ; ; Entry: d0 = 32-bit operand1 (high/low words denoted as a:b) ; d1 = 32-bit operand2 (high/low words denoted as c:d) ; ; Exit: d0 = 32-bit result ; d1 = trashed ; ; Note: All other registers must be preserved! cProc ULMulT,PUBLIC+SYSCALL VerifyLibHlpr(ULMulT) cBegin nogen move.w d0,-(sp) ; Push b move.w d1,-(sp) ; Push d swap d1 ; mulu d0,d1 ; d1.l = b*c swap d0 ; mulu (sp),d0 ; d0.l = a*d add.w d0,d1 ; d1.w = LOWORD(a*d + b*c) move.w (sp)+,d0 ; mulu (sp)+,d0 ; d0.l = b*d swap d0 ; add.w d1,d0 ; swap d0 ; d0.l = b*d + LOWORD(a*d+b*c) << 16 rts cEnd nogen ;======================================================================== ; ; The following are 68020+ versions of the lib-helper math routines. They ; use identical reg conventions as above, so the comments are not repeated. ; ; REVIEW: these routines can be used for testing the accuracy of the 68000 ; versions above. Also, some slightly tricky run-time init code could ; check if the cpu is a 68020+ and rewrite the LibHlpr thunks to use the ; 68020 routines for optimal speed. This would even work if this segment ; was swappable given how the swapper updates thunks. (pretty cool, huh?!) ; ;======================================================================== ;------------------------------------------------------------------------ ; ULDivT_20 cProc ULDivT_20,PUBLIC+SYSCALL cBegin nogen ; divu.l d1,d0 dc.w $4c41 dc.w $0000 rts cEnd nogen ;------------------------------------------------------------------------ ; LDivT_20 cProc LDivT_20,PUBLIC+SYSCALL cBegin nogen ; divs.l d1,d0 dc.w $4c41 dc.w $0800 rts cEnd nogen ;------------------------------------------------------------------------ ; ULModT_20 cProc ULModT_20,PUBLIC+SYSCALL cBegin nogen ; divul.l d1,d1:d0 dc.w $4c41 dc.w $0001 move.l d1,d0 rts cEnd nogen ;------------------------------------------------------------------------ ; LModT_20 cProc LModT_20,PUBLIC+SYSCALL cBegin nogen ; divul.l d1,d1:d0 dc.w $4c41 dc.w $0801 move.l d1,d0 rts cEnd nogen ;------------------------------------------------------------------------ ; ULMulT_20 cProc ULMulT_20,PUBLIC+SYSCALL cBegin nogen ; mulu.l d1,d0 dc.w $4c01 dc.w $0000 rts cEnd nogen