Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1440 lines
51 KiB

// TITLE("Compare, Move, Zero, and Fill Memory Support")
//++
//
// Copyright (c) 1990 Microsoft Corporation
//
// Module Name:
//
// xxmvmem.s
//
// Abstract:
//
// This module implements functions to compare, move, zero, and fill
// blocks of memory. If the memory is aligned, then these functions
// are very efficient.
//
// N.B. These routines MUST preserve all floating state since they are
// frequently called from interrupt service routines that normally
// do not save or restore floating state.
//
// Author:
//
// David N. Cutler (davec) 11-Apr-1990
//
// Environment:
//
// User or Kernel mode.
//
// Revision History:
//
//--
#include "ksmips.h"
SBTTL("Compare Memory")
//++
//
// ULONG
// RtlCompareMemory (
// IN PVOID Source1,
// IN PVOID Source2,
// IN ULONG Length
// )
//
// Routine Description:
//
// This function compares two blocks of memory and returns the number
// of bytes that compared equal.
//
// Arguments:
//
// Source1 (a0) - Supplies a pointer to the first block of memory to
// compare.
//
// Source2 (a1) - Supplies a pointer to the second block of memory to
// compare.
//
// Length (a2) - Supplies the length, in bytes, of the memory to be
// compared.
//
// Return Value:
//
// The number of bytes that compared equal is returned as the function
// value. If all bytes compared equal, then the length of the orginal
// block of memory is returned.
//
//--
LEAF_ENTRY(RtlCompareMemory)
addu a3,a0,a2 // compute ending address of source1
move v0,a2 // save length of comparison
and t0,a2,32 - 1 // isolate residual bytes
subu t1,a2,t0 // subtract out residual bytes
addu t4,a0,t1 // compute ending block address
beq zero,t1,100f // if eq, no 32-byte block to compare
or t0,a0,a1 // merge and isolate alignment bits
and t0,t0,0x3 //
bne zero,t0,CompareUnaligned // if ne, unalignment comparison
//
// Compare memory aligned.
//
CompareAligned: //
.set noreorder
.set noat
10: lw t0,0(a0) // compare 32-byte block
lw t1,0(a1) //
lw t2,4(a0) //
bne t0,t1,90f // if ne, first word not equal
lw t3,4(a1) //
lw t0,8(a0) //
bne t2,t3,20f // if ne, second word not equal
lw t1,8(a1) //
lw t2,12(a0) //
bne t0,t1,30f // if ne, third word not equal
lw t3,12(a1) //
lw t0,16(a0) //
bne t2,t3,40f // if ne, fourth word not equal
lw t1,16(a1) //
lw t2,20(a0) //
bne t0,t1,50f // if ne, fifth word not equal
lw t3,20(a1) //
lw t0,24(a0) //
bne t2,t3,60f // if ne, sixth word not equal
lw t1,24(a1) //
lw t2,28(a0) //
bne t0,t1,70f // if ne, seventh word not equal
lw t3,28(a1) //
addu a0,a0,32 // advance source1 to next block
bne t2,t3,80f // if ne, eighth word not equal
nop //
bne a0,t4,10b // if ne, more 32-byte blocks to compare
addu a1,a1,32 // update source2 address
.set at
.set reorder
subu a2,a3,a0 // compute remaining bytes
b 100f //
//
// Compare memory unaligned.
//
CompareUnaligned: //
and t0,a0,0x3 // isolate source1 alignment
bne zero,t0,CompareUnalignedS1 // if ne, source1 unaligned
//
// Source1 is aligned and Source2 is unaligned.
//
CompareUnalignedS2: //
.set noreorder
.set noat
10: lw t0,0(a0) // compare 32-byte block
lwr t1,0(a1) //
lwl t1,3(a1) //
lw t2,4(a0) //
bne t0,t1,90f // if ne, first word not equal
lwr t3,4(a1) //
lwl t3,7(a1) //
lw t0,8(a0) //
bne t2,t3,20f // if ne, second word not equal
lwr t1,8(a1) //
lwl t1,11(a1) //
lw t2,12(a0) //
bne t0,t1,30f // if ne, third word not equal
lwr t3,12(a1) //
lwl t3,15(a1) //
lw t0,16(a0) //
bne t2,t3,40f // if ne, fourth word not equal
lwr t1,16(a1) //
lwl t1,19(a1) //
lw t2,20(a0) //
bne t0,t1,50f // if ne, fifth word not equal
lwr t3,20(a1) //
lwl t3,23(a1) //
lw t0,24(a0) //
bne t2,t3,60f // if ne, sixth word not equal
lwr t1,24(a1) //
lwl t1,27(a1) //
lw t2,28(a0) //
bne t0,t1,70f // if ne, seventh word not equal
lwr t3,28(a1) //
lwl t3,31(a1) //
addu a0,a0,32 // advance source1 to next block
bne t2,t3,80f // if ne, eighth word not equal
nop //
bne a0,t4,10b // if ne, more 32-byte blocks to compare
addu a1,a1,32 // update source2 address
.set at
.set reorder
subu a2,a3,a0 // compute remaining bytes
b 100f //
//
// Source1 is unaligned, check Source2 alignment.
//
CompareUnalignedS1: //
and t0,a1,0x3 // isolate Source2 alignment
bne zero,t0,CompareUnalignedS1AndS2 // if ne, Source2 unaligned
//
// Source1 is unaligned and Source2 is aligned.
//
.set noreorder
.set noat
10: lwr t0,0(a0) // compare 32-byte block
lwl t0,3(a0) //
lw t1,0(a1) //
lwr t2,4(a0) //
lwl t2,7(a0) //
bne t0,t1,90f // if ne, first word not equal
lw t3,4(a1) //
lwr t0,8(a0) //
lwl t0,11(a0) //
bne t2,t3,20f // if ne, second word not equal
lw t1,8(a1) //
lwr t2,12(a0) //
lwl t2,15(a0) //
bne t0,t1,30f // if ne, third word not equal
lw t3,12(a1) //
lwr t0,16(a0) //
lwl t0,19(a0) //
bne t2,t3,40f // if ne, fourth word not equal
lw t1,16(a1) //
lwr t2,20(a0) //
lwl t2,23(a0) //
bne t0,t1,50f // if ne, fifth word not equal
lw t3,20(a1) //
lwr t0,24(a0) //
lwl t0,27(a0) //
bne t2,t3,60f // if ne, sixth word not equal
lw t1,24(a1) //
lwr t2,28(a0) //
lwl t2,31(a0) //
bne t0,t1,70f // if ne, seventh word not equal
lw t3,28(a1) //
addu a0,a0,32 // advance source1 to next block
bne t2,t3,80f // if ne, eighth word not equal
nop //
bne a0,t4,10b // if ne, more 32-byte blocks to compare
addu a1,a1,32 // update source2 address
.set at
.set reorder
subu a2,a3,a0 // compute remaining bytes
b 100f //
//
// Source1 and Source2 are unaligned.
//
CompareUnalignedS1AndS2: //
.set noreorder
.set noat
10: lwr t0,0(a0) // compare 32-byte block
lwl t0,3(a0) //
lwr t1,0(a1) //
lwl t1,3(a1) //
lwr t2,4(a0) //
lwl t2,7(a0) //
bne t0,t1,90f // if ne, first word not equal
lwr t3,4(a1) //
lwl t3,7(a1) //
lwr t0,8(a0) //
lwl t0,11(a0) //
bne t2,t3,20f // if ne, second word not equal
lwr t1,8(a1) //
lwl t1,11(a1) //
lwr t2,12(a0) //
lwl t2,15(a0) //
bne t0,t1,30f // if ne, third word not equal
lwr t3,12(a1) //
lwl t3,15(a1) //
lwr t0,16(a0) //
lwl t0,19(a0) //
bne t2,t3,40f // if ne, fourth word not equal
lwr t1,16(a1) //
lwl t1,19(a1) //
lwr t2,20(a0) //
lwl t2,23(a0) //
bne t0,t1,50f // if ne, fifth word not equal
lwr t3,20(a1) //
lwl t3,23(a1) //
lwr t0,24(a0) //
lwl t0,27(a0) //
bne t2,t3,60f // if ne, sixth word not equal
lwr t1,24(a1) //
lwl t1,27(a1) //
lwr t2,28(a0) //
lwl t2,31(a0) //
bne t0,t1,70f // if ne, seventh word not equal
lwr t3,28(a1) //
lwl t3,31(a1) //
addu a0,a0,32 // advance source1 to next block
bne t2,t3,80f // if ne, eighth word not equal
nop //
bne a0,t4,10b // if ne, more 32-byte blocks to compare
addu a1,a1,32 // update source2 address
.set at
.set reorder
subu a2,a3,a0 // compute remaining bytes
b 100f //
//
// Adjust source1 and source2 pointers dependent on position of miscompare in
// block.
//
20: addu a0,a0,4 // mismatch on second word
addu a1,a1,4 //
b 90f //
30: addu a0,a0,8 // mismatch on third word
addu a1,a1,8 //
b 90f //
40: addu a0,a0,12 // mistmatch on fourth word
addu a1,a1,12 //
b 90f //
50: addu a0,a0,16 // mismatch on fifth word
addu a1,a1,16 //
b 90f //
60: addu a0,a0,20 // mismatch on sixth word
addu a1,a1,20 //
b 90f //
70: addu a0,a0,24 // mismatch on seventh word
addu a1,a1,24 //
b 90f //
80: subu a0,a0,4 // mismatch on eighth word
addu a1,a1,28 //
90: subu a2,a3,a0 // compute remaining bytes
//
// Compare 1-byte blocks.
//
100: addu t2,a0,a2 // compute ending block address
beq zero,a2,120f // if eq, no bytes to zero
110: lb t0,0(a0) // compare 1-byte block
lb t1,0(a1) //
addu a1,a1,1 // advance pointers to next block
bne t0,t1,120f // if ne, byte not equal
addu a0,a0,1 //
bne a0,t2,110b // if ne, more 1-byte block to zero
120: subu t0,a3,a0 // compute number of bytes not compared
subu v0,v0,t0 // compute number of byte that matched
j ra // return
.end RtlCompareMemory
SBTTL("Equal Memory")
//++
//
// ULONG
// RtlEqualMemory (
// IN PVOID Source1,
// IN PVOID Source2,
// IN ULONG Length
// )
//
// Routine Description:
//
// This function compares two blocks of memory for equality.
//
// Arguments:
//
// Source1 (a0) - Supplies a pointer to the first block of memory to
// compare.
//
// Source2 (a1) - Supplies a pointer to the second block of memory to
// compare.
//
// Length (a2) - Supplies the length, in bytes, of the memory to be
// compared.
//
// Return Value:
//
// If all bytes in the source strings match, then a value of TRUE is
// returned. Otherwise, FALSE is returned.
//
//--
LEAF_ENTRY(RtlEqualMemory)
li v0,FALSE // set return value FALSE
addu a3,a0,a2 // compute ending address of source1
and t0,a2,16 - 1 // isolate residual bytes
subu t1,a2,t0 // subtract out residual bytes
addu t4,a0,t1 // compute ending block address
beq zero,t1,20f // if eq, no 16-byte block to compare
or t0,a0,a1 // merge and isolate alignment bits
and t0,t0,0x3 //
bne zero,t0,EqualUnaligned // if ne, unalignment comparison
//
// Compare memory aligned.
//
EqualAligned: //
.set noreorder
.set noat
10: lw t0,0(a0) // compare 16-byte block
lw t1,0(a1) //
lw t2,4(a0) //
bne t0,t1,50f // if ne, first word not equal
lw t3,4(a1) //
lw t0,8(a0) //
bne t2,t3,50f // if ne, second word not equal
lw t1,8(a1) //
lw t2,12(a0) //
bne t0,t1,50f // if ne, third word not equal
lw t3,12(a1) //
bne t2,t3,50f // if ne, eighth word not equal
addu a0,a0,16 // advance source1 to next block
bne a0,t4,10b // if ne, more blocks to compare
addu a1,a1,16 // advance source2 to next block
.set at
.set reorder
subu a2,a3,a0 // compute remaining bytes
b 20f //
//
// Compare memory unaligned.
//
EqualUnaligned: //
and t0,a0,0x3 // isolate source1 alignment
bne zero,t0,EqualUnalignedS1 // if ne, source1 unaligned
//
// Source1 is aligned and Source2 is unaligned.
//
EqualUnalignedS2: //
.set noreorder
.set noat
10: lw t0,0(a0) // compare 16-byte block
lwr t1,0(a1) //
lwl t1,3(a1) //
lw t2,4(a0) //
bne t0,t1,50f // if ne, first word not equal
lwr t3,4(a1) //
lwl t3,7(a1) //
lw t0,8(a0) //
bne t2,t3,50f // if ne, second word not equal
lwr t1,8(a1) //
lwl t1,11(a1) //
lw t2,12(a0) //
bne t0,t1,50f // if ne, third word not equal
lwr t3,12(a1) //
lwl t3,15(a1) //
bne t2,t3,50f // if ne, fourth word not equal
addu a0,a0,16 // advance source1 to next block
bne a0,t4,10b // if ne, more blocks to compare
addu a1,a1,16 // advance source2 to next block
.set at
.set reorder
subu a2,a3,a0 // compute remaining bytes
b 20f //
//
// Source1 is unaligned, check Source2 alignment.
//
EqualUnalignedS1: //
and t0,a1,0x3 // isolate Source2 alignment
bne zero,t0,EqualUnalignedS1AndS2 // if ne, Source2 unaligned
//
// Source1 is unaligned and Source2 is aligned.
//
.set noreorder
.set noat
10: lwr t0,0(a0) // compare 16-byte block
lwl t0,3(a0) //
lw t1,0(a1) //
lwr t2,4(a0) //
lwl t2,7(a0) //
bne t0,t1,50f // if ne, first word not equal
lw t3,4(a1) //
lwr t0,8(a0) //
lwl t0,11(a0) //
bne t2,t3,50f // if ne, second word not equal
lw t1,8(a1) //
lwr t2,12(a0) //
lwl t2,15(a0) //
bne t0,t1,50f // if ne, third word not equal
lw t3,12(a1) //
bne t2,t3,50f // if ne, fourth word not equal
addu a0,a0,16 // advance source1 to next block
bne a0,t4,10b // if ne, more blocks to compare
addu a1,a1,16 // advance source2 to next block
.set at
.set reorder
subu a2,a3,a0 // compute remaining bytes
b 20f //
//
// Source1 and Source2 are unaligned.
//
EqualUnalignedS1AndS2: //
.set noreorder
.set noat
10: lwr t0,0(a0) // compare 16-byte block
lwl t0,3(a0) //
lwr t1,0(a1) //
lwl t1,3(a1) //
lwr t2,4(a0) //
lwl t2,7(a0) //
bne t0,t1,50f // if ne, first word not equal
lwr t3,4(a1) //
lwl t3,7(a1) //
lwr t0,8(a0) //
lwl t0,11(a0) //
bne t2,t3,50f // if ne, second word not equal
lwr t1,8(a1) //
lwl t1,11(a1) //
lwr t2,12(a0) //
lwl t2,15(a0) //
bne t0,t1,50f // if ne, third word not equal
lwr t3,12(a1) //
lwl t3,15(a1) //
bne t2,t3,50f // if ne, fourth word not equal
addu a0,a0,16 // advance source1 to next block
bne a0,t4,10b // if ne, more blocks to compare
addu a1,a1,16 // advance source2 to next block
.set at
.set reorder
subu a2,a3,a0 // compute remaining bytes
//
// Compare 1-byte blocks.
//
20: addu t2,a0,a2 // compute ending block address
beq zero,a2,40f // if eq, no bytes to zero
30: lb t0,0(a0) // compare 1-byte block
lb t1,0(a1) //
addu a1,a1,1 // advance pointers to next block
bne t0,t1,50f // if ne, byte not equal
addu a0,a0,1 //
bne a0,t2,30b // if ne, more 1-byte block to zero
40: li v0,TRUE // set return value TRUE
50: j ra // return
.end RtlEqualMemory
SBTTL("Move Memory")
//++
//
// PVOID
// RtlMoveMemory (
// IN PVOID Destination,
// IN PVOID Source,
// IN ULONG Length
// )
//
// Routine Description:
//
// This function moves memory either forward or backward, aligned or
// unaligned, in 32-byte blocks, followed by 4-byte blocks, followed
// by any remaining bytes.
//
// Arguments:
//
// Destination (a0) - Supplies a pointer to the destination address of
// the move operation.
//
// Source (a1) - Supplies a pointer to the source address of the move
// operation.
//
// Length (a2) - Supplies the length, in bytes, of the memory to be moved.
//
// Return Value:
//
// The Destination address is returned as the function value.
//
// N.B. The C runtime entry points memmove and memcpy are equivalent to
// RtlMoveMemory htus alternate entry points are provided for these
// routines.
//--
LEAF_ENTRY(RtlMoveMemory)
ALTERNATE_ENTRY(memcpy)
ALTERNATE_ENTRY(memmove)
move v0,a0 // set return value
//
// If the source address is less than the destination address and source
// address plus the length of the move is greater than the destination
// address, then the source and destination overlap such that the move
// must be performed backwards.
//
10: bgeu a1,a0,MoveForward // if geu, no overlap possible
addu t0,a1,a2 // compute source ending address
bgtu t0,a0,MoveBackward // if gtu, source and destination overlap
//
// Move memory forward aligned and unaligned.
//
MoveForward: //
sltu t0,a2,8 // check if less than eight bytes
bne zero,t0,50f // if ne, less than eight bytes to move
xor t0,a0,a1 // compare alignment bits
and t0,t0,0x7 // isolate alignment comparison
bne zero,t0,MoveForwardUnaligned // if ne, incompatible alignment
//
// Move memory forward aligned.
//
MoveForwardAligned: //
subu t0,zero,a0 // compute bytes until aligned
and t0,t0,0x7 // isolate residual byte count
subu a2,a2,t0 // reduce number of bytes to move
beq zero,t0,10f // if eq, already aligned
ldr t1,0(a1) // move unaligned bytes
sdr t1,0(a0) //
addu a0,a0,t0 // align destination address
addu a1,a1,t0 // align source address
//
// Check for 32-byte blocks to move.
//
10: and t0,a2,32 - 1 // isolate residual bytes
subu t1,a2,t0 // subtract out residual bytes
addu t8,a0,t1 // compute ending block address
beq zero,t1,30f // if eq, no 32-byte block to zero
move a2,t0 // set residual number of bytes
//
// Check for odd number of 32-byte blocks to move.
//
and t0,t1,1 << 5 // test if even number of 32-byte blocks
beq zero,t0,20f // if eq, even number of 32-byte blocks
//
// Move one 32-byte block quadword aligned.
//
.set noreorder
ld t0,0(a1) // move 32-byte block
ld t1,8(a1) //
ld t2,16(a1) //
ld t3,24(a1) //
sd t0,0(a0) //
sd t1,8(a0) //
sd t2,16(a0) //
sd t3,24(a0) //
addu a0,a0,32 // advance pointers to next block
beq a0,t8,30f // if eq, end of block
addu a1,a1,32 //
.set reorder
//
// Move 64-byte blocks quadword aligned.
//
.set noreorder
20: ld t0,0(a1) // move 64-byte block
ld t1,8(a1) //
ld t2,16(a1) //
ld t3,24(a1) //
ld t4,32(a1) //
ld t5,40(a1) //
ld t6,48(a1) //
ld t7,56(a1) //
sd t0,0(a0) //
sd t1,8(a0) //
sd t2,16(a0) //
sd t3,24(a0) //
sd t4,32(a0) //
sd t5,40(a0) //
sd t6,48(a0) //
sd t7,56(a0) //
addu a0,a0,64 // advance pointers to next block
bne a0,t8,20b // if ne, more 64-byte blocks to zero
addu a1,a1,64 //
.set reorder
//
// Check for 4-byte blocks to move.
//
30: and t0,a2,4 - 1 // isolate residual bytes
subu t1,a2,t0 // subtract out residual bytes
addu t2,a0,t1 // compute ending block address
beq zero,t1,50f // if eq, no 4-byte block to zero
move a2,t0 // set residual number of bytes
//
// Move 4-byte block.
//
.set noreorder
40: lw t0,0(a1) // move 4-byte block
addu a0,a0,4 // advance pointers to next block
sw t0,-4(a0) //
bne a0,t2,40b // if ne, more 4-byte blocks to zero
addu a1,a1,4 //
.set reorder
//
// Move 1-byte blocks.
//
50: addu t2,a0,a2 // compute ending block address
beq zero,a2,70f // if eq, no bytes to zero
.set noreorder
60: lb t0,0(a1) // move 1-byte block
addu a0,a0,1 // advance pointers to next block
sb t0,-1(a0) //
bne a0,t2,60b // if ne, more 1-byte block to zero
addu a1,a1,1 //
.set reorder
70: j ra // return
//
// Move memory forward unaligned.
//
MoveForwardUnaligned: //
subu t0,zero,a0 // compute bytes until aligned
and t0,t0,0x7 // isolate residual byte count
subu a2,a2,t0 // reduce number of bytes to move
beq zero,t0,10f // if eq, already aligned
ldr t1,0(a1) // move unaligned bytes
ldl t1,7(a1) //
sdr t1,0(a0) //
addu a0,a0,t0 // align destination address
addu a1,a1,t0 // update source address
//
// Check for 32-byte blocks to move.
//
10: and t0,a2,32 - 1 // isolate residual bytes
subu t1,a2,t0 // subtract out residual bytes
addu t8,a0,t1 // compute ending block address
beq zero,t1,30f // if eq, no 32-byte block to zero
move a2,t0 // set residual number of bytes
//
// Check for odd number of 32-byte blocks to move.
//
and t0,t1,1 << 5 // test if even number of 32-byte blocks
beq zero,t0,20f // if eq, even number of 32-byte blocks
//
// Move one 32-byte block quadword aligned.
//
.set noreorder
ldr t0,0(a1) // move 32-byte block
ldl t0,7(a1) //
ldr t1,8(a1) //
ldl t1,15(a1) //
ldr t2,16(a1) //
ldl t2,23(a1) //
ldr t3,24(a1) //
ldl t3,31(a1) //
sd t0,0(a0) //
sd t1,8(a0) //
sd t2,16(a0) //
sd t3,24(a0) //
addu a0,a0,32 // advance pointers to next block
beq a0,t8,30f // if eq, end of block
addu a1,a1,32 //
.set reorder
//
// Move 64-byte block.
//
.set noreorder
20: ldr t0,0(a1) // move 64-byte block
ldl t0,7(a1) //
ldr t1,8(a1) //
ldl t1,15(a1) //
ldr t2,16(a1) //
ldl t2,23(a1) //
ldr t3,24(a1) //
ldl t3,31(a1) //
ldr t4,32(a1) //
ldl t4,39(a1) //
ldr t5,40(a1) //
ldl t5,47(a1) //
ldr t6,48(a1) //
ldl t6,55(a1) //
ldr t7,56(a1) //
ldl t7,63(a1) //
sd t0,0(a0) //
sd t1,8(a0) //
sd t2,16(a0) //
sd t3,24(a0) //
sd t4,32(a0) //
sd t5,40(a0) //
sd t6,48(a0) //
sd t7,56(a0) //
addu a0,a0,64 // advance pointers to next block
bne a0,t8,20b // if ne, more 32-byte blocks to zero
addu a1,a1,64 //
.set reorder
//
// Check for 4-byte blocks to move.
//
30: and t0,a2,4 - 1 // isolate residual bytes
subu t1,a2,t0 // subtract out residual bytes
addu t2,a0,t1 // compute ending block address
beq zero,t1,50f // if eq, no 4-byte block to zero
move a2,t0 // set residual number of bytes
//
// Move 4-byte block.
//
.set noreorder
40: lwr t0,0(a1) // move 4-byte block
lwl t0,3(a1) //
addu a0,a0,4 // advance pointers to next block
sw t0,-4(a0) //
bne a0,t2,40b // if ne, more 4-byte blocks to zero
addu a1,a1,4 //
.set reorder
//
// Move 1-byte blocks.
//
50: addu t2,a0,a2 // compute ending block address
beq zero,a2,70f // if eq, no bytes to zero
.set noreorder
60: lb t0,0(a1) // move 1-byte block
addu a0,a0,1 // advance pointers to next block
sb t0,-1(a0) //
bne a0,t2,60b // if ne, more 1-byte block to zero
addu a1,a1,1 //
.set reorder
70: j ra // return
//
// Move memory backward.
//
MoveBackward: //
addu a0,a0,a2 // compute ending destination address
addu a1,a1,a2 // compute ending source address
sltu t0,a2,8 // check if less than eight bytes
bne zero,t0,50f // if ne, less than eight bytes to move
xor t0,a0,a1 // compare alignment bits
and t0,t0,0x7 // isolate alignment comparison
bne zero,t0,MoveBackwardUnaligned // if ne, incompatible alignment
//
// Move memory backward aligned.
//
MoveBackwardAligned: //
and t0,a0,0x7 // isolate residual byte count
subu a2,a2,t0 // reduce number of bytes to move
beq zero,t0,10f // if eq, already aligned
ldl t1,-1(a1) // move unaligned bytes
sdl t1,-1(a0) //
subu a0,a0,t0 // align destination address
subu a1,a1,t0 // align source address
//
// Check for 32-byte blocks to move.
//
10: and t0,a2,32 - 1 // isolate residual bytes
subu t1,a2,t0 // subtract out residual bytes
subu t8,a0,t1 // compute ending block address
beq zero,t1,30f // if eq, no 32-byte block to zero
move a2,t0 // set residual number of bytes
//
// Check for odd number of 32-byte blocks to move.
//
and t0,t1,1 << 5 // test if even number of 32-byte blocks
beq zero,t0,20f // if eq, even number of 32-byte blocks
//
// Move one 32-byte block quadword aligned.
//
.set noreorder
ld t0,-8(a1) // move 32-byte block
ld t1,-16(a1) //
ld t2,-24(a1) //
ld t3,-32(a1) //
sd t0,-8(a0) //
sd t1,-16(a0) //
sd t2,-24(a0) //
sd t3,-32(a0) //
subu a0,a0,32 // advance pointers to next block
beq a0,t8,30f // if eq, end of block
subu a1,a1,32 //
.set reorder
//
// Move 64-byte blocks quadword aligned.
//
.set noreorder
20: ld t0,-8(a1) // move 64-byte block
ld t1,-16(a1) //
ld t2,-24(a1) //
ld t3,-32(a1) //
ld t4,-40(a1) //
ld t5,-48(a1) //
ld t6,-56(a1) //
ld t7,-64(a1) //
sd t0,-8(a0) //
sd t1,-16(a0) //
sd t2,-24(a0) //
sd t3,-32(a0) //
sd t4,-40(a0) //
sd t5,-48(a0) //
sd t6,-56(a0) //
sd t7,-64(a0) //
subu a0,a0,64 // advance pointers to next block
bne a0,t8,20b // if ne, more 64-byte blocks to zero
subu a1,a1,64 //
.set reorder
//
// Check for 4-byte blocks to move.
//
30: and t0,a2,4 - 1 // isolate residual bytes
subu t1,a2,t0 // subtract out residual bytes
subu t2,a0,t1 // compute ending block address
beq zero,t1,50f // if eq, no 4-byte block to zero
move a2,t0 // set residual number of bytes
//
// Move 4-byte block.
//
.set noreorder
40: lw t0,-4(a1) // move 4-byte block
subu a0,a0,4 // advance pointers to next block
sw t0,0(a0) //
bne a0,t2,40b // if ne, more 4-byte blocks to zero
subu a1,a1,4 //
.set reorder
//
// Move 1-byte blocks.
//
50: subu t2,a0,a2 // compute ending block address
beq zero,a2,70f // if eq, no bytes to zero
.set noreorder
60: lb t0,-1(a1) // move 1-byte block
subu a0,a0,1 // advance pointers to next block
sb t0,0(a0) //
bne a0,t2,60b // if ne, more 1-byte block to zero
subu a1,a1,1 //
.set reorder
70: j ra // return
//
// Move memory backward unaligned.
//
MoveBackwardUnaligned: //
and t0,a0,0x7 // isolate residual byte count
subu a2,a2,t0 // reduce number of bytes to move
beq zero,t0,10f // if eq, already aligned
ldl t1,-1(a1) // move unaligned bytes
ldr t1,-8(a1) //
sdl t1,-1(a0) //
subu a0,a0,t0 // align destination address
subu a1,a1,t0 // update source address
//
// Check for 32-byte blocks to move.
//
10: and t0,a2,32 - 1 // isolate residual bytes
subu t1,a2,t0 // subtract out residual bytes
subu t8,a0,t1 // compute ending block address
beq zero,t1,30f // if eq, no 32-byte block to zero
move a2,t0 // set residual number of bytes
//
// Check for odd number of 32-byte blocks to move.
//
and t0,t1,1 << 5 // test if even number of 32-byte blocks
beq zero,t0,20f // if eq, even number of 32-byte blocks
//
// Move one 32-byte block.
//
.set noreorder
ldr t0,-8(a1) // move 32-byte block
ldl t0,-1(a1) //
ldr t1,-16(a1) //
ldl t1,-9(a1) //
ldr t2,-24(a1) //
ldl t2,-17(a1) //
ldr t3,-32(a1) //
ldl t3,-25(a1) //
sd t0,-8(a0) //
sd t1,-16(a0) //
sd t2,-24(a0) //
sd t3,-32(a0) //
subu a0,a0,32 // advance pointers to next block
beq a0,t8,30f // if eq, end of block
subu a1,a1,32 //
.set reorder
//
// Move 32-byte block.
//
.set noreorder
20: ldr t0,-8(a1) // move 32-byte block
ldl t0,-1(a1) //
ldr t1,-16(a1) //
ldl t1,-9(a1) //
ldr t2,-24(a1) //
ldl t2,-17(a1) //
ldr t3,-32(a1) //
ldl t3,-25(a1) //
ldr t4,-40(a1) //
ldl t4,-33(a1) //
ldr t5,-48(a1) //
ldl t5,-41(a1) //
ldr t6,-56(a1) //
ldl t6,-49(a1) //
ldr t7,-64(a1) //
ldl t7,-57(a1) //
sd t0,-8(a0) //
sd t1,-16(a0) //
sd t2,-24(a0) //
sd t3,-32(a0) //
sd t4,-40(a0) //
sd t5,-48(a0) //
sd t6,-56(a0) //
sd t7,-64(a0) //
subu a0,a0,64 // advance pointers to next block
bne a0,t8,20b // if ne, more 64-byte blocks to zero
subu a1,a1,64 //
.set reorder
//
// Check for 4-byte blocks to move.
//
30: and t0,a2,4 - 1 // isolate residual bytes
subu t1,a2,t0 // subtract out residual bytes
subu t2,a0,t1 // compute ending block address
beq zero,t1,50f // if eq, no 4-byte block to zero
move a2,t0 // set residual number of bytes
//
// Move 4-byte block.
//
.set noreorder
40: lwr t0,-4(a1) // move 4-byte block
lwl t0,-1(a1) //
subu a0,a0,4 // advance pointers to next block
sw t0,0(a0) //
bne a0,t2,40b // if ne, more 4-byte blocks to zero
subu a1,a1,4 //
.set reorder
//
// Move 1-byte blocks.
//
50: subu t2,a0,a2 // compute ending block address
beq zero,a2,70f // if eq, no bytes to zero
.set noreorder
60: lb t0,-1(a1) // move 1-byte block
subu a0,a0,1 // advance pointers to next block
sb t0,0(a0) //
bne a0,t2,60b // if ne, more 1-byte block to zero
subu a1,a1,1 //
.set reorder
70: j ra // return
.end RtlMoveMemory
SBTTL("Zero Memory")
//++
//
// PVOID
// RtlZeroMemory (
// IN PVOID Destination,
// IN ULONG Length
// )
//
// Routine Description:
//
// This function zeros memory by first aligning the destination address to
// a longword boundary, and then zeroing 32-byte blocks, followed by 4-byte
// blocks, followed by any remaining bytes.
//
// Arguments:
//
// Destination (a0) - Supplies a pointer to the memory to zero.
//
// Length (a1) - Supplies the length, in bytes, of the memory to be zeroed.
//
// Return Value:
//
// The destination address is returned as the function value.
//
//--
LEAF_ENTRY(RtlZeroMemory)
move a2,zero // set fill pattern
b RtlpFillMemory //
SBTTL("Fill Memory")
//++
//
// PVOID
// RtlFillMemory (
// IN PVOID Destination,
// IN ULONG Length,
// IN UCHAR Fill
// )
//
// Routine Description:
//
// This function fills memory by first aligning the destination address to
// a longword boundary, and then filling 32-byte blocks, followed by 4-byte
// blocks, followed by any remaining bytes.
//
// Arguments:
//
// Destination (a0) - Supplies a pointer to the memory to fill.
//
// Length (a1) - Supplies the length, in bytes, of the memory to be filled.
//
// Fill (a2) - Supplies the fill byte.
//
// N.B. The alternate entry memset expects the length and fill arguments
// to be reversed.
//
// Return Value:
//
// The destination address is returned as the function value.
//
//--
ALTERNATE_ENTRY(memset)
move a3,a1 // swap length and fill arguments
move a1,a2 //
move a2,a3 //
ALTERNATE_ENTRY(RtlFillMemory)
and a2,a2,0xff // clear excess bits
sll t0,a2,8 // duplicate fill byte
or a2,a2,t0 // generate fill word
sll t0,a2,16 // duplicate fill word
or a2,a2,t0 // generate fill longword
//
// Fill memory with the pattern specified in register a2.
//
RtlpFillMemory: //
move v0,a0 // set return value
dsll a2,a2,32 // duplicate pattern to 64-bits
dsrl t0,a2,32 //
or a2,a2,t0 //
subu t0,zero,a0 // compute bytes until aligned
and t0,t0,0x7 // isolate residual byte count
subu t1,a1,t0 // reduce number of bytes to fill
blez t1,60f // if lez, less than 8 bytes to fill
move a1,t1 // set number of bytes to fill
beq zero,t0,10f // if eq, already aligned
sdr a2,0(a0) // fill unaligned bytes
addu a0,a0,t0 // align destination address
//
// Check for 32-byte blocks to fill.
//
10: and t0,a1,32 - 1 // isolate residual bytes
subu t1,a1,t0 // subtract out residual bytes
addu t2,a0,t1 // compute ending block address
beq zero,t1,40f // if eq, no 32-byte blocks to fill
move a1,t0 // set residual number of bytes
//
// Fill 32-byte blocks.
//
and t0,a0,1 << 2 // check if destintion quadword aligned
beq zero,t0,20f // if eq, yes
sw a2,0(a0) // store destination longword
addu a0,a0,4 // align destination address
addu a1,a1,t1 // recompute bytes to fill
subu a1,a1,4 // reduce count by 4
b 10b //
//
// The destination is quadword aligned.
//
20: and t0,t1,1 << 5 // test if even number of 32-byte blocks
beq zero,t0,30f // if eq, even number of 32-byte blocks
//
// Fill one 32-byte block.
//
.set noreorder
.set noat
sd a2,0(a0) // fill 32-byte block
sd a2,8(a0) //
sd a2,16(a0) //
addu a0,a0,32 // advance pointer to next block
beq a0,t2,40f // if ne, no 64-byte blocks to fill
sd a2,-8(a0) //
.set at
.set reorder
//
// Fill 64-byte block.
//
.set noreorder
.set noat
30: sd a2,0(a0) // fill 32-byte block
sd a2,8(a0) //
sd a2,16(a0) //
sd a2,24(a0) //
sd a2,32(a0) //
sd a2,40(a0) //
sd a2,48(a0) //
addu a0,a0,64 // advance pointer to next block
bne a0,t2,30b // if ne, more 32-byte blocks to fill
sd a2,-8(a0) //
.set at
.set reorder
//
// Check for 4-byte blocks to fill.
//
40: and t0,a1,4 - 1 // isolate residual bytes
subu t1,a1,t0 // subtract out residual bytes
addu t2,a0,t1 // compute ending block address
beq zero,t1,60f // if eq, no 4-byte block to fill
move a1,t0 // set residual number of bytes
//
// Fill 4-byte blocks.
//
.set noreorder
50: addu a0,a0,4 // advance pointer to next block
bne a0,t2,50b // if ne, more 4-byte blocks to fill
sw a2,-4(a0) // fill 4-byte block
.set reorder
//
// Check for 1-byte blocks to fill.
//
60: addu t2,a0,a1 // compute ending block address
beq zero,a1,80f // if eq, no bytes to fill
//
// Fill 1-byte blocks.
//
.set noreorder
70: addu a0,a0,1 // advance pointer to next block
bne a0,t2,70b // if ne, more 1-byte block to fill
sb a2,-1(a0) // fill 1-byte block
.set reorder
80: j ra // return
.end RtlZeroMemory
SBTTL("Fill Memory Ulong")
//++
//
// PVOID
// RtlFillMemoryUlong (
// IN PVOID Destination,
// IN ULONG Length,
// IN ULONG Pattern
// )
//
// Routine Description:
//
// This function fills memory with the specified longowrd pattern by
// filling 32-byte blocks followed by 4-byte blocks.
//
// N.B. This routine assumes that the destination address is aligned
// on a longword boundary and that the length is an even multiple
// of longwords.
//
// Arguments:
//
// Destination (a0) - Supplies a pointer to the memory to fill.
//
// Length (a1) - Supplies the length, in bytes, of the memory to be filled.
//
// Pattern (a2) - Supplies the fill pattern.
//
// Return Value:
//
// The destination address is returned as the function value.
//
//--
LEAF_ENTRY(RtlFillMemoryUlong)
move v0,a0 // set function value
srl a1,a1,2 // make sure length is an even number
sll a1,a1,2 // of longwords
dsll a2,a2,32 // duplicate pattern to 64-bits
dsrl t0,a2,32 //
or a2,a2,t0 //
//
// Check for 32-byte blocks to fill.
//
10: and t0,a1,32 - 1 // isolate residual bytes
subu t1,a1,t0 // subtract out residual bytes
addu t2,a0,t1 // compute ending block address
beq zero,t1,40f // if eq, no 32-byte blocks to fill
move a1,t0 // set residual number of bytes
//
// Fill 32-byte blocks.
//
and t0,a0,1 << 2 // check if destintion quadword aligned
beq zero,t0,20f // if eq, yes
sw a2,0(a0) // store destination longword
addu a0,a0,4 // align destination address
addu a1,a1,t1 // recompute bytes to fill
subu a1,a1,4 // reduce count by 4
b 10b //
//
// The destination is quadword aligned.
//
20: and t0,t1,1 << 5 // test if even number of 32-byte blocks
beq zero,t0,30f // if eq, even number of 32-byte blocks
//
// Fill one 32-byte block.
//
.set noreorder
.set noat
sd a2,0(a0) // fill 32-byte block
sd a2,8(a0) //
sd a2,16(a0) //
addu a0,a0,32 // advance pointer to next block
beq a0,t2,40f // if ne, no 64-byte blocks to fill
sd a2,-8(a0) //
.set at
.set reorder
//
// Fill 64-byte block.
//
.set noreorder
.set noat
30: sd a2,0(a0) // fill 32-byte block
sd a2,8(a0) //
sd a2,16(a0) //
sd a2,24(a0) //
sd a2,32(a0) //
sd a2,40(a0) //
sd a2,48(a0) //
addu a0,a0,64 // advance pointer to next block
bne a0,t2,30b // if ne, more 32-byte blocks to fill
sd a2,-8(a0) //
.set at
.set reorder
//
// Check for 4-byte blocks to fill.
//
40: addu t2,a1,a0 // compute ending block address
beq zero,a1,60f // if eq, no 4-byte block to fill
//
// Fill 4-byte blocks.
//
.set noreorder
50: addu a0,a0,4 // advance pointer to next block
bne a0,t2,50b // if ne, more 4-byte blocks to fill
sw a2,-4(a0) // fill 4-byte block
.set reorder
60: j ra // return
.end RtlFillMemoryUlong