Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

452 lines
9.8 KiB

;------------------------------------------------------------------------
; LibHlprs.a
;
; Library helper routines needed by the code generator.
;
#include "assert.a"
;------------------------------------------------------------------------
#ifdef DEBUG
; This testing macro creates a function that calls both the 68000 and 68020
; versions of a LibHlpr routine and asserts if the results differ.
#define Cat(A,B) A##B
#define VerifyLibHlpr(FUNC) cBegin nogen \ \
movem.l <d0,d1,d2>,-(sp) \ \
jbsr Cat(FUNC,_00) \ \
move.l d0,d2 \ \
movem.l (sp)+,<d0,d1> \ \
jbsr Cat(FUNC,_20) \ \
cmp.l d0,d2 \ \
AssertEq("Bad libhlpr result") \ \
move.l (sp)+,d2 \ \
rts \ \
cEnd nogen \ \
cProc Cat(FUNC,_00),PUBLIC+SYSCALL
#else
#define VerifyLibHlpr(FUNC)
#endif
;------------------------------------------------------------------------
code
;------------------------------------------------------------------------
; ULDivT
;
; This function performs an unsigned 32/32 divide.
;
; Entry: d0 = 32-bit dividend (high/low words denoted as a:b)
; d1 = 32-bit divisor (high/low words denoted as c:d)
;
; Exit: d0 = 32-bit quotient
; d1 = trashed
;
; Note: All other registers must be preserved!
;
; Since the actual operands will often have only 16 significant
; bits, we optimize for speed in these cases. First, if the divisor
; is a 16-bit value, we can use two DIVU instructions to quickly
; compute the 32-bit quotient. If the dividend is also a 16-bit
; value, we can use a single DIVU without worrying about overflow.
; These checks also simplify the full 32/32 case (see UDivMod)
; since we know the quotient must be a 16-bit value in that case.
;
; Terms: For the optimized cases we know c==0, and thus:
;
; (a:b) / (0:d) == q1:q2 where:
;
; q1 = (0:a) / d (remainder r1 - used below)
; q2 = (r1:b) / d (remainder r2 - discarded)
cProc ULDivT,PUBLIC+SYSCALL
VerifyLibHlpr(ULDivT)
cBegin nogen
AssertCode("tst.l d1")
AssertNe("ULDivT: div by 0")
; d0 d1
; ------ ------
swap d1 ; a:b d:c
tst.w d1 ;
ifeq ; d:0? (is divisor 16 bits?)
move.w d0,d1 ; d:b
clr.w d0 ; a:0
swap d0 ; 0:a
ifne ;
swap d1 ; b:d
divu d1,d0 ; r1:q1
swap d1 ; d:b
endif ;
eor.w d0,d1 ; <3 eor's swap low words of d0,d1>
eor.w d1,d0 ;
eor.w d0,d1 ; r1:b d:q1
swap d1 ; r1:b q1:d
divu d1,d0 ; r2:q2
move.w d0,d1 ; q1:q2
move.l d1,d0 ; q1:q2
rts ;
endif
swap d1 ; Unswap divisor
jbsr UDivMod ; Quotient comes back in d1 so
move.l d1,d0 ; move it to d0 before returning
rts
cEnd nogen
;------------------------------------------------------------------------
; LDivT
;
; This function performs a signed 32/32 divide.
;
; Entry: d0 = 32-bit dividend
; d1 = 32-bit divisor
;
; Exit: d0 = quotient
; d1 = trashed
;
; Note: All other registers must be preserved!
cProc LDivT,PUBLIC+SYSCALL
VerifyLibHlpr(LDivT)
cBegin nogen
tst.l d0 ; Test sign of dividend
ifmi ;
neg.l d0 ; Make dividend positive
tst.l d1 ; Test sign of divisor
jpl 10$ ;
neg.l d1 ; Make divisor positive
jra ULDivT ; Do the divide (-/- yields +)
endif ;
tst.l d1 ; Test sign of divisor
jpl ULDivT ; Do the divide (+/+ yields +)
neg.l d1 ; Make divisor positive
10$: jbsr ULDivT ; Do the divide (-/+ or +/- yields -)
neg.l d0 ; Make result negative
rts ; All done!
cEnd nogen
;------------------------------------------------------------------------
; ULModT
;
; This function performs an unsigned 32/32 modulo operation.
;
; Entry: d0 = 32-bit dividend (high/low words denoted as a:b)
; d1 = 32-bit divisor (high/low words denoted as c:d)
;
; Exit: d0 = 32-bit remainder
; d1 = trashed
;
; Notes: All other registers must be preserved!
;
; Since the actual operands will often have only 16 significant
; bits, we optimize for speed in these cases. First, if the divisor
; is a 16-bit value, we can use two DIVU instructions to quickly
; compute the modulo (which will fit in 16 bits since it will be
; less than the divisor). If the dividend is also a 16-bit value,
; we can use a single DIVU without worrying about the quotient
; overflowing. These checks also simplify the full 32/32 case (see
; UDivMod) since we know the quotient must be a 16-bit value in
; that case (although in general the modulo will require 32 bits).
;
; Terms: For the optimized cases we know c==0, and thus:
;
; (a:b) % (0:d) == 0:r2 where:
;
; r1 = (0:a) % d
; r2 = (r1:b) % d
cProc ULModT,PUBLIC+SYSCALL
VerifyLibHlpr(ULModT)
cBegin nogen
AssertCode("tst.l d1")
AssertNe("ULModT: mod by 0")
; d0 d1
; ------ ------
swap d1 ; a:b d:c
tst.w d1 ;
ifeq ; d:0? (is divisor 16 bits?)
move.w d0,d1 ; d:b
clr.w d0 ; a:0
swap d0 ; 0:a
ifne ;
swap d1 ; b:d
divu d1,d0 ; r1:q1
swap d1 ; d:b
endif ;
move.w d1,d0 ; r1:b
swap d1 ; b:d
divu d1,d0 ; r2:q2
clr.w d0 ; r2:0
swap d0 ; 0:r2
rts ;
endif
swap d1 ; Unswap divisor
jbsr UDivMod ; Do full 32/32 modulo
rts ; 32-bit remainder comes back in d0
cEnd nogen
;------------------------------------------------------------------------
; LModT
;
; This function performs a signed 32/32 mod operation.
;
; Entry: d0 = 32-bit dividend
; d1 = 32-bit divisor
;
; Exit: d0 = remainder (has same sign as dividend)
; d1 = trashed
;
; Note: All other registers must be preserved!
cProc LModT,PUBLIC+SYSCALL
VerifyLibHlpr(LModT)
cBegin nogen
tst.l d1 ; Test sign of divisor
ifmi ;
neg.l d1 ; Make divisor positive
endif ;
tst.l d0 ; Test sign of dividend
jpl ULModT ; Compute modulus (positive result)
neg.l d0 ; Make dividend positive
jbsr ULModT ; Compute modulus
neg.l d0 ; Make result negative
rts ; All done!
cEnd nogen
;------------------------------------------------------------------------
; UDivMod
;
; This routine implements a 32/32 unsigned divide algorithm which assumes
; that the divisor is greater than 65535. With this restriction, we know
; that overflow conditions cannot occur.
;
; Entry: d0 = 32-bit dividend
; d1 = 32-bit divisor
;
; Exit: d0 = 32-bit remainder
; d1 = 32-bit quotient (note: upper 16 bits are guaranteed zero)
;
; Note: All other registers must be preserved!
UDivMod:
movem.l <d2,d3>,-(a7)
AssertCode("cmp.l #0xffff,d1")
AssertHi("UDivMod: divisor < 64K")
move.l d1,d3
; Preshift the d0:d1 pseudo register by 16 bits since we know that
; actually doing the first 16 iterations won't do any real work
; (we know the quotient must be all zeros in its upper 16 bits).
moveq #0,d1
move.w d0,d1
swap d1
clr.w d0
swap d0
; Now loop through using the classic shift and subtract algorithm
; for the remaining 16 iterations.
moveq #15,d2
do
add.l d1,d1
addx.l d0,d0
cmp.l d3,d0
ifhs
sub.l d3,d0
addq.w #1,d1
endif
until_dec d2
AssertCode("cmp.l #0xffff,d1")
AssertLs("UDivMod: quotient >= 64k")
movem.l (a7)+,<d2,d3>
rts
;------------------------------------------------------------------------
; ULMulT
;
; This function performs a signed or unsigned 32*32 multiplication.
;
; Note: the fact that the operands could be signed or unsigned doesn't
; matter since we return only the low 32 bits of the full 64-bit result).
;
; Entry: d0 = 32-bit operand1 (high/low words denoted as a:b)
; d1 = 32-bit operand2 (high/low words denoted as c:d)
;
; Exit: d0 = 32-bit result
; d1 = trashed
;
; Note: All other registers must be preserved!
cProc ULMulT,PUBLIC+SYSCALL
VerifyLibHlpr(ULMulT)
cBegin nogen
move.w d0,-(sp) ; Push b
move.w d1,-(sp) ; Push d
swap d1 ;
mulu d0,d1 ; d1.l = b*c
swap d0 ;
mulu (sp),d0 ; d0.l = a*d
add.w d0,d1 ; d1.w = LOWORD(a*d + b*c)
move.w (sp)+,d0 ;
mulu (sp)+,d0 ; d0.l = b*d
swap d0 ;
add.w d1,d0 ;
swap d0 ; d0.l = b*d + LOWORD(a*d+b*c) << 16
rts
cEnd nogen
;========================================================================
;
; The following are 68020+ versions of the lib-helper math routines. They
; use identical reg conventions as above, so the comments are not repeated.
;
; REVIEW: these routines can be used for testing the accuracy of the 68000
; versions above. Also, some slightly tricky run-time init code could
; check if the cpu is a 68020+ and rewrite the LibHlpr thunks to use the
; 68020 routines for optimal speed. This would even work if this segment
; was swappable given how the swapper updates thunks. (pretty cool, huh?!)
;
;========================================================================
;------------------------------------------------------------------------
; ULDivT_20
cProc ULDivT_20,PUBLIC+SYSCALL
cBegin nogen
; divu.l d1,d0
dc.w $4c41
dc.w $0000
rts
cEnd nogen
;------------------------------------------------------------------------
; LDivT_20
cProc LDivT_20,PUBLIC+SYSCALL
cBegin nogen
; divs.l d1,d0
dc.w $4c41
dc.w $0800
rts
cEnd nogen
;------------------------------------------------------------------------
; ULModT_20
cProc ULModT_20,PUBLIC+SYSCALL
cBegin nogen
; divul.l d1,d1:d0
dc.w $4c41
dc.w $0001
move.l d1,d0
rts
cEnd nogen
;------------------------------------------------------------------------
; LModT_20
cProc LModT_20,PUBLIC+SYSCALL
cBegin nogen
; divul.l d1,d1:d0
dc.w $4c41
dc.w $0801
move.l d1,d0
rts
cEnd nogen
;------------------------------------------------------------------------
; ULMulT_20
cProc ULMulT_20,PUBLIC+SYSCALL
cBegin nogen
; mulu.l d1,d0
dc.w $4c01
dc.w $0000
rts
cEnd nogen