mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
452 lines
9.8 KiB
452 lines
9.8 KiB
;------------------------------------------------------------------------
|
|
; LibHlprs.a
|
|
;
|
|
; Library helper routines needed by the code generator.
|
|
;
|
|
|
|
|
|
#include "assert.a"
|
|
|
|
|
|
;------------------------------------------------------------------------
|
|
|
|
#ifdef DEBUG
|
|
|
|
; This testing macro creates a function that calls both the 68000 and 68020
|
|
; versions of a LibHlpr routine and asserts if the results differ.
|
|
|
|
#define Cat(A,B) A##B
|
|
|
|
#define VerifyLibHlpr(FUNC) cBegin nogen \ \
|
|
movem.l <d0,d1,d2>,-(sp) \ \
|
|
jbsr Cat(FUNC,_00) \ \
|
|
move.l d0,d2 \ \
|
|
movem.l (sp)+,<d0,d1> \ \
|
|
jbsr Cat(FUNC,_20) \ \
|
|
cmp.l d0,d2 \ \
|
|
AssertEq("Bad libhlpr result") \ \
|
|
move.l (sp)+,d2 \ \
|
|
rts \ \
|
|
cEnd nogen \ \
|
|
cProc Cat(FUNC,_00),PUBLIC+SYSCALL
|
|
#else
|
|
|
|
#define VerifyLibHlpr(FUNC)
|
|
|
|
#endif
|
|
|
|
;------------------------------------------------------------------------
|
|
|
|
|
|
code
|
|
|
|
|
|
;------------------------------------------------------------------------
|
|
; ULDivT
|
|
;
|
|
; This function performs an unsigned 32/32 divide.
|
|
;
|
|
; Entry: d0 = 32-bit dividend (high/low words denoted as a:b)
|
|
; d1 = 32-bit divisor (high/low words denoted as c:d)
|
|
;
|
|
; Exit: d0 = 32-bit quotient
|
|
; d1 = trashed
|
|
;
|
|
; Note: All other registers must be preserved!
|
|
;
|
|
; Since the actual operands will often have only 16 significant
|
|
; bits, we optimize for speed in these cases. First, if the divisor
|
|
; is a 16-bit value, we can use two DIVU instructions to quickly
|
|
; compute the 32-bit quotient. If the dividend is also a 16-bit
|
|
; value, we can use a single DIVU without worrying about overflow.
|
|
; These checks also simplify the full 32/32 case (see UDivMod)
|
|
; since we know the quotient must be a 16-bit value in that case.
|
|
;
|
|
; Terms: For the optimized cases we know c==0, and thus:
|
|
;
|
|
; (a:b) / (0:d) == q1:q2 where:
|
|
;
|
|
; q1 = (0:a) / d (remainder r1 - used below)
|
|
; q2 = (r1:b) / d (remainder r2 - discarded)
|
|
|
|
|
|
cProc ULDivT,PUBLIC+SYSCALL
|
|
|
|
VerifyLibHlpr(ULDivT)
|
|
|
|
cBegin nogen
|
|
|
|
AssertCode("tst.l d1")
|
|
AssertNe("ULDivT: div by 0")
|
|
; d0 d1
|
|
; ------ ------
|
|
swap d1 ; a:b d:c
|
|
tst.w d1 ;
|
|
ifeq ; d:0? (is divisor 16 bits?)
|
|
move.w d0,d1 ; d:b
|
|
clr.w d0 ; a:0
|
|
swap d0 ; 0:a
|
|
ifne ;
|
|
swap d1 ; b:d
|
|
divu d1,d0 ; r1:q1
|
|
swap d1 ; d:b
|
|
endif ;
|
|
eor.w d0,d1 ; <3 eor's swap low words of d0,d1>
|
|
eor.w d1,d0 ;
|
|
eor.w d0,d1 ; r1:b d:q1
|
|
swap d1 ; r1:b q1:d
|
|
divu d1,d0 ; r2:q2
|
|
move.w d0,d1 ; q1:q2
|
|
move.l d1,d0 ; q1:q2
|
|
rts ;
|
|
endif
|
|
|
|
swap d1 ; Unswap divisor
|
|
jbsr UDivMod ; Quotient comes back in d1 so
|
|
move.l d1,d0 ; move it to d0 before returning
|
|
|
|
rts
|
|
|
|
cEnd nogen
|
|
|
|
|
|
|
|
;------------------------------------------------------------------------
|
|
; LDivT
|
|
;
|
|
; This function performs a signed 32/32 divide.
|
|
;
|
|
; Entry: d0 = 32-bit dividend
|
|
; d1 = 32-bit divisor
|
|
;
|
|
; Exit: d0 = quotient
|
|
; d1 = trashed
|
|
;
|
|
; Note: All other registers must be preserved!
|
|
|
|
|
|
cProc LDivT,PUBLIC+SYSCALL
|
|
|
|
VerifyLibHlpr(LDivT)
|
|
|
|
cBegin nogen
|
|
|
|
tst.l d0 ; Test sign of dividend
|
|
ifmi ;
|
|
neg.l d0 ; Make dividend positive
|
|
tst.l d1 ; Test sign of divisor
|
|
jpl 10$ ;
|
|
neg.l d1 ; Make divisor positive
|
|
jra ULDivT ; Do the divide (-/- yields +)
|
|
endif ;
|
|
tst.l d1 ; Test sign of divisor
|
|
jpl ULDivT ; Do the divide (+/+ yields +)
|
|
neg.l d1 ; Make divisor positive
|
|
10$: jbsr ULDivT ; Do the divide (-/+ or +/- yields -)
|
|
neg.l d0 ; Make result negative
|
|
rts ; All done!
|
|
|
|
cEnd nogen
|
|
|
|
|
|
|
|
;------------------------------------------------------------------------
|
|
; ULModT
|
|
;
|
|
; This function performs an unsigned 32/32 modulo operation.
|
|
;
|
|
; Entry: d0 = 32-bit dividend (high/low words denoted as a:b)
|
|
; d1 = 32-bit divisor (high/low words denoted as c:d)
|
|
;
|
|
; Exit: d0 = 32-bit remainder
|
|
; d1 = trashed
|
|
;
|
|
; Notes: All other registers must be preserved!
|
|
;
|
|
; Since the actual operands will often have only 16 significant
|
|
; bits, we optimize for speed in these cases. First, if the divisor
|
|
; is a 16-bit value, we can use two DIVU instructions to quickly
|
|
; compute the modulo (which will fit in 16 bits since it will be
|
|
; less than the divisor). If the dividend is also a 16-bit value,
|
|
; we can use a single DIVU without worrying about the quotient
|
|
; overflowing. These checks also simplify the full 32/32 case (see
|
|
; UDivMod) since we know the quotient must be a 16-bit value in
|
|
; that case (although in general the modulo will require 32 bits).
|
|
;
|
|
; Terms: For the optimized cases we know c==0, and thus:
|
|
;
|
|
; (a:b) % (0:d) == 0:r2 where:
|
|
;
|
|
; r1 = (0:a) % d
|
|
; r2 = (r1:b) % d
|
|
|
|
|
|
cProc ULModT,PUBLIC+SYSCALL
|
|
|
|
VerifyLibHlpr(ULModT)
|
|
|
|
cBegin nogen
|
|
|
|
AssertCode("tst.l d1")
|
|
AssertNe("ULModT: mod by 0")
|
|
; d0 d1
|
|
; ------ ------
|
|
swap d1 ; a:b d:c
|
|
tst.w d1 ;
|
|
ifeq ; d:0? (is divisor 16 bits?)
|
|
move.w d0,d1 ; d:b
|
|
clr.w d0 ; a:0
|
|
swap d0 ; 0:a
|
|
ifne ;
|
|
swap d1 ; b:d
|
|
divu d1,d0 ; r1:q1
|
|
swap d1 ; d:b
|
|
endif ;
|
|
move.w d1,d0 ; r1:b
|
|
swap d1 ; b:d
|
|
divu d1,d0 ; r2:q2
|
|
clr.w d0 ; r2:0
|
|
swap d0 ; 0:r2
|
|
rts ;
|
|
endif
|
|
|
|
swap d1 ; Unswap divisor
|
|
jbsr UDivMod ; Do full 32/32 modulo
|
|
|
|
rts ; 32-bit remainder comes back in d0
|
|
|
|
cEnd nogen
|
|
|
|
|
|
|
|
;------------------------------------------------------------------------
|
|
; LModT
|
|
;
|
|
; This function performs a signed 32/32 mod operation.
|
|
;
|
|
; Entry: d0 = 32-bit dividend
|
|
; d1 = 32-bit divisor
|
|
;
|
|
; Exit: d0 = remainder (has same sign as dividend)
|
|
; d1 = trashed
|
|
;
|
|
; Note: All other registers must be preserved!
|
|
|
|
|
|
cProc LModT,PUBLIC+SYSCALL
|
|
|
|
VerifyLibHlpr(LModT)
|
|
|
|
cBegin nogen
|
|
|
|
tst.l d1 ; Test sign of divisor
|
|
ifmi ;
|
|
neg.l d1 ; Make divisor positive
|
|
endif ;
|
|
|
|
tst.l d0 ; Test sign of dividend
|
|
jpl ULModT ; Compute modulus (positive result)
|
|
|
|
neg.l d0 ; Make dividend positive
|
|
jbsr ULModT ; Compute modulus
|
|
neg.l d0 ; Make result negative
|
|
rts ; All done!
|
|
|
|
cEnd nogen
|
|
|
|
|
|
|
|
;------------------------------------------------------------------------
|
|
; UDivMod
|
|
;
|
|
; This routine implements a 32/32 unsigned divide algorithm which assumes
|
|
; that the divisor is greater than 65535. With this restriction, we know
|
|
; that overflow conditions cannot occur.
|
|
;
|
|
; Entry: d0 = 32-bit dividend
|
|
; d1 = 32-bit divisor
|
|
;
|
|
; Exit: d0 = 32-bit remainder
|
|
; d1 = 32-bit quotient (note: upper 16 bits are guaranteed zero)
|
|
;
|
|
; Note: All other registers must be preserved!
|
|
|
|
UDivMod:
|
|
|
|
movem.l <d2,d3>,-(a7)
|
|
|
|
AssertCode("cmp.l #0xffff,d1")
|
|
AssertHi("UDivMod: divisor < 64K")
|
|
|
|
move.l d1,d3
|
|
|
|
; Preshift the d0:d1 pseudo register by 16 bits since we know that
|
|
; actually doing the first 16 iterations won't do any real work
|
|
; (we know the quotient must be all zeros in its upper 16 bits).
|
|
|
|
moveq #0,d1
|
|
move.w d0,d1
|
|
swap d1
|
|
|
|
clr.w d0
|
|
swap d0
|
|
|
|
; Now loop through using the classic shift and subtract algorithm
|
|
; for the remaining 16 iterations.
|
|
|
|
moveq #15,d2
|
|
do
|
|
add.l d1,d1
|
|
addx.l d0,d0
|
|
cmp.l d3,d0
|
|
ifhs
|
|
sub.l d3,d0
|
|
addq.w #1,d1
|
|
endif
|
|
until_dec d2
|
|
|
|
AssertCode("cmp.l #0xffff,d1")
|
|
AssertLs("UDivMod: quotient >= 64k")
|
|
|
|
movem.l (a7)+,<d2,d3>
|
|
rts
|
|
|
|
|
|
|
|
;------------------------------------------------------------------------
|
|
; ULMulT
|
|
;
|
|
; This function performs a signed or unsigned 32*32 multiplication.
|
|
;
|
|
; Note: the fact that the operands could be signed or unsigned doesn't
|
|
; matter since we return only the low 32 bits of the full 64-bit result).
|
|
;
|
|
; Entry: d0 = 32-bit operand1 (high/low words denoted as a:b)
|
|
; d1 = 32-bit operand2 (high/low words denoted as c:d)
|
|
;
|
|
; Exit: d0 = 32-bit result
|
|
; d1 = trashed
|
|
;
|
|
; Note: All other registers must be preserved!
|
|
|
|
|
|
cProc ULMulT,PUBLIC+SYSCALL
|
|
|
|
VerifyLibHlpr(ULMulT)
|
|
|
|
cBegin nogen
|
|
|
|
move.w d0,-(sp) ; Push b
|
|
move.w d1,-(sp) ; Push d
|
|
swap d1 ;
|
|
mulu d0,d1 ; d1.l = b*c
|
|
swap d0 ;
|
|
mulu (sp),d0 ; d0.l = a*d
|
|
add.w d0,d1 ; d1.w = LOWORD(a*d + b*c)
|
|
move.w (sp)+,d0 ;
|
|
mulu (sp)+,d0 ; d0.l = b*d
|
|
swap d0 ;
|
|
add.w d1,d0 ;
|
|
swap d0 ; d0.l = b*d + LOWORD(a*d+b*c) << 16
|
|
rts
|
|
|
|
cEnd nogen
|
|
|
|
|
|
|
|
;========================================================================
|
|
;
|
|
; The following are 68020+ versions of the lib-helper math routines. They
|
|
; use identical reg conventions as above, so the comments are not repeated.
|
|
;
|
|
; REVIEW: these routines can be used for testing the accuracy of the 68000
|
|
; versions above. Also, some slightly tricky run-time init code could
|
|
; check if the cpu is a 68020+ and rewrite the LibHlpr thunks to use the
|
|
; 68020 routines for optimal speed. This would even work if this segment
|
|
; was swappable given how the swapper updates thunks. (pretty cool, huh?!)
|
|
;
|
|
;========================================================================
|
|
|
|
|
|
|
|
;------------------------------------------------------------------------
|
|
; ULDivT_20
|
|
|
|
|
|
cProc ULDivT_20,PUBLIC+SYSCALL
|
|
cBegin nogen
|
|
|
|
; divu.l d1,d0
|
|
dc.w $4c41
|
|
dc.w $0000
|
|
|
|
rts
|
|
|
|
cEnd nogen
|
|
|
|
|
|
;------------------------------------------------------------------------
|
|
; LDivT_20
|
|
|
|
|
|
cProc LDivT_20,PUBLIC+SYSCALL
|
|
cBegin nogen
|
|
|
|
; divs.l d1,d0
|
|
dc.w $4c41
|
|
dc.w $0800
|
|
|
|
rts
|
|
|
|
cEnd nogen
|
|
|
|
|
|
;------------------------------------------------------------------------
|
|
; ULModT_20
|
|
|
|
|
|
cProc ULModT_20,PUBLIC+SYSCALL
|
|
cBegin nogen
|
|
|
|
; divul.l d1,d1:d0
|
|
dc.w $4c41
|
|
dc.w $0001
|
|
|
|
move.l d1,d0
|
|
rts
|
|
|
|
cEnd nogen
|
|
|
|
|
|
;------------------------------------------------------------------------
|
|
; LModT_20
|
|
|
|
|
|
cProc LModT_20,PUBLIC+SYSCALL
|
|
cBegin nogen
|
|
|
|
; divul.l d1,d1:d0
|
|
dc.w $4c41
|
|
dc.w $0801
|
|
|
|
move.l d1,d0
|
|
rts
|
|
|
|
cEnd nogen
|
|
|
|
|
|
;------------------------------------------------------------------------
|
|
; ULMulT_20
|
|
|
|
|
|
cProc ULMulT_20,PUBLIC+SYSCALL
|
|
cBegin nogen
|
|
|
|
; mulu.l d1,d0
|
|
dc.w $4c01
|
|
dc.w $0000
|
|
|
|
rts
|
|
|
|
cEnd nogen
|
|
|