Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

164 lines
4.1 KiB

.globl ..__u64div
// unsigned 64 bit divide
// divide r4:r3 by r6:r5 with the result in r4:r3
.pdata
.align 2
.ualong ..__u64div,__u64div.e,0,0,__u64div.b
.text
.align 2
..__u64div:
.function ..__u64div
// no prologue
__u64div.b:
or. r0,r4,r6
bne __u64div.1
// if both r4 and r6 are zero, use 32 bit divide
divwu r3,r3,r5
twi 0x6,r5,0 // trap if division by zero
blr
__u64div.1:
// start by left justifying divisor
// this allows us to use cntlzw to speed
// calculations
//
li r12,32 // r12 = 32 for rest of routine
cntlzw r9,r6
subf. r10,r9,r12
slw r6,r6,r9
srw r11,r5,r10
slw r5,r5,r9
or r6,r6,r11
li r0,32 // if divisor > 2^32, quotient is at most 32 bits
bne __u64div.2
// r10 == 0 iff r6 ==0, so may need to shift again
twi 0x6,r6,0 // trap if division by zero
cntlzw r9,r6
subf r10,r9,r12
slw r6,r6,r9
li r0,64 // quotient is up to 64 bits long
__u64div.2:
// now need to shift dividend by same amount
// note that first 32 bits are taken care of by
// changing the shift count.
srw r8,r4,r10
slw r7,r4,r9
srw r11,r3,r10
slw r4,r3,r9
or r7,r7,r11
li r3,0
__u64div.3:
// main loop.
//
// begin by left justifying dividend
// and adjusting shift count appropriately
cntlzw r9,r8
subf. r0,r9,r0
blt __u64div.6 // done when count goes < 0
subf. r10,r9,r12
slw r8,r8,r9
srw r11,r7,r10
or r8,r8,r11
slw r7,r7,r9
srw r11,r4,r10
or r7,r7,r11
slw r4,r4,r9
srw r11,r3,r10
or r4,r4,r11
slw r3,r3,r9
// if r10 == 0, high bit may not be one - try again
beq __u64div.3
// assume dividend >= divisor
subfc r7,r5,r7
subfe. r8,r6,r8
ori r3,r3,1
bge __u64div.3
// check for last quotient bit
cmpwi cr1,r0,0
// shift dividend/quotient left one bit
beq cr1,__u64div.5 // exit if last quotient bit
rlwinm r8,r8,1,0,31
rlwinm r7,r7,1,0,31
rlwinm r4,r4,1,0,31
rlwimi r8,r7,0,31,31
rlwimi r7,r4,0,31,31
rlwimi r4,r3,1,31,31
rlwinm r3,r3,1,0,29 // also clears previous quotient bit
// add in divisor (now worth 1/2 subtracted value)
addc r7,r7,r5
adde r8,r8,r6
__u64div.4:
// decrement count
addic. r0,r0,-1
// set quotient bit
ori r3,r3,1
beqlr // if count has gone to zero, return
// shift dividend/quotient left one bit
srwi. r9,r8,31 // test upper bit of new dividend
beq __u64div.3 // if upper bit is 0 go back to original loop
rlwinm r8,r8,1,0,31
rlwinm r7,r7,1,0,31
rlwinm r4,r4,1,0,31
rlwimi r8,r7,0,31,31
rlwimi r7,r4,0,31,31
rlwimi r4,r3,1,31,31
slwi r3,r3,1
// subtract divisor from dividend and repeat
subfc r7,r5,r7
subfe r8,r6,r8
b __u64div.4
__u64div.5:
// eliminate last quotient bit
rlwinm r3,r3,0,0,30 // clears quotient bit
blr
__u64div.6:
// get correct shift count
add r9,r9,r0
// and shift quotient by it
subf r10,r9,r12
slw r4,r4,r9
srw r11,r3,r10
slw r3,r3,r9
or r4,r4,r11
blr
__u64div.e:
.debug$S
.ualong 1
.uashort 17
.uashort 0x9 # S_OBJNAME
.ualong 0
.byte 10, "u64div.obj"
.uashort 24
.uashort 0x1 # S_COMPILE
.byte 0x42 # Target processor = PPC 604
.byte 3 # Language = ASM
.byte 0
.byte 0
.byte 17, "PowerPC Assembler"