mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1218 lines
43 KiB
1218 lines
43 KiB
// TITLE("Compare, Move, and Fill Memory Support")
|
|
//++
|
|
//
|
|
// Copyright (c) 1990 Microsoft Corporation
|
|
//
|
|
// Module Name:
|
|
//
|
|
// memory.s
|
|
//
|
|
// Abstract:
|
|
//
|
|
// This module implements functions to compare, move, zero, and fill
|
|
// blocks of memory. If the memory is aligned, then these functions
|
|
// are very efficient.
|
|
//
|
|
// N.B. These routines MUST preserve all floating state since they are
|
|
// frequently called from interrupt service routines that normally
|
|
// do not save or restore floating state.
|
|
//
|
|
// Author:
|
|
//
|
|
// David N. Cutler (davec) 11-Apr-1990
|
|
//
|
|
// Environment:
|
|
//
|
|
// User or Kernel mode.
|
|
//
|
|
// Revision History:
|
|
// 02/02/94 RDL This is a cloned version of ntos\rtl\mips\xxmvmem.s
|
|
// Used RtlMoveMemory and RtlFillMemory.
|
|
// 02/15/94 RDL Used RtlCompareMemory, changed return code for memcmp.
|
|
// 02/22/94 RDL Fixed memcmp, zero length and equal aligned 32-byte
|
|
// buffers return wrong code.
|
|
//
|
|
//--
|
|
|
|
#include "ksmips.h"
|
|
SBTTL("Compare Memory")
|
|
|
|
//++
|
|
//
|
|
// ULONG
|
|
// RtlCompareMemory (
|
|
// IN PVOID Source1,
|
|
// IN PVOID Source2,
|
|
// IN ULONG Length
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This function compares two blocks of memory and returns the number
|
|
// of bytes that compared equal.
|
|
//
|
|
// Arguments:
|
|
//
|
|
// Source1 (a0) - Supplies a pointer to the first block of memory to
|
|
// compare.
|
|
//
|
|
// Source2 (a1) - Supplies a pointer to the second block of memory to
|
|
// compare.
|
|
//
|
|
// Length (a2) - Supplies the length, in bytes, of the memory to be
|
|
// compared.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// zero if source1 == source2
|
|
// -1 if source1 < source2
|
|
// 1 if source1 > source2
|
|
// value. If all bytes compared equal, then the length of the orginal
|
|
// block of memory is returned.
|
|
//
|
|
//--
|
|
|
|
LEAF_ENTRY(memcmp)
|
|
|
|
addu a3,a0,a2 // compute ending address of source1
|
|
move v0,a2 // save length of comparison
|
|
and t0,a2,32 - 1 // isolate residual bytes
|
|
subu t1,a2,t0 // subtract out residual bytes
|
|
addu t4,a0,t1 // compute ending block address
|
|
beq zero,t1,100f // if eq, no 32-byte block to compare
|
|
or t0,a0,a1 // merge and isolate alignment bits
|
|
and t0,t0,0x3 //
|
|
bne zero,t0,CompareUnaligned // if ne, unalignment comparison
|
|
|
|
//
|
|
// Compare memory aligned.
|
|
//
|
|
|
|
CompareAligned: //
|
|
|
|
.set noreorder
|
|
10: lw t0,0(a0) // compare 32-byte block
|
|
lw t1,0(a1) //
|
|
lw t2,4(a0) //
|
|
bne t0,t1,90f // if ne, first word not equal
|
|
lw t3,4(a1) //
|
|
lw t0,8(a0) //
|
|
bne t2,t3,20f // if ne, second word not equal
|
|
lw t1,8(a1) //
|
|
lw t2,12(a0) //
|
|
bne t0,t1,30f // if ne, third word not equal
|
|
lw t3,12(a1) //
|
|
lw t0,16(a0) //
|
|
bne t2,t3,40f // if ne, fourth word not equal
|
|
lw t1,16(a1) //
|
|
lw t2,20(a0) //
|
|
bne t0,t1,50f // if ne, fifth word not equal
|
|
lw t3,20(a1) //
|
|
lw t0,24(a0) //
|
|
bne t2,t3,60f // if ne, sixth word not equal
|
|
lw t1,24(a1) //
|
|
lw t2,28(a0) //
|
|
bne t0,t1,70f // if ne, seventh word not equal
|
|
lw t3,28(a1) //
|
|
addu a0,a0,32 // advance source1 to next block
|
|
bne t2,t3,80f // if ne, eighth word not equal
|
|
nop //
|
|
bne a0,t4,10b // if ne, more 32-byte blocks to compare
|
|
addu a1,a1,32 // update source2 address
|
|
.set reorder
|
|
|
|
subu a2,a3,a0 // compute remaining bytes
|
|
b 100f //
|
|
|
|
//
|
|
// Compare memory unaligned.
|
|
//
|
|
|
|
CompareUnaligned: //
|
|
and t0,a0,0x3 // isolate source1 alignment
|
|
bne zero,t0,CompareUnalignedS1 // if ne, source1 unaligned
|
|
|
|
//
|
|
// Source1 is aligned and Source2 is unaligned.
|
|
//
|
|
|
|
CompareUnalignedS2: //
|
|
|
|
.set noreorder
|
|
10: lw t0,0(a0) // compare 32-byte block
|
|
lwr t1,0(a1) //
|
|
lwl t1,3(a1) //
|
|
lw t2,4(a0) //
|
|
bne t0,t1,90f // if ne, first word not equal
|
|
lwr t3,4(a1) //
|
|
lwl t3,7(a1) //
|
|
lw t0,8(a0) //
|
|
bne t2,t3,20f // if ne, second word not equal
|
|
lwr t1,8(a1) //
|
|
lwl t1,11(a1) //
|
|
lw t2,12(a0) //
|
|
bne t0,t1,30f // if ne, third word not equal
|
|
lwr t3,12(a1) //
|
|
lwl t3,15(a1) //
|
|
lw t0,16(a0) //
|
|
bne t2,t3,40f // if ne, fourth word not equal
|
|
lwr t1,16(a1) //
|
|
lwl t1,19(a1) //
|
|
lw t2,20(a0) //
|
|
bne t0,t1,50f // if ne, fifth word not equal
|
|
lwr t3,20(a1) //
|
|
lwl t3,23(a1) //
|
|
lw t0,24(a0) //
|
|
bne t2,t3,60f // if ne, sixth word not equal
|
|
lwr t1,24(a1) //
|
|
lwl t1,27(a1) //
|
|
lw t2,28(a0) //
|
|
bne t0,t1,70f // if ne, seventh word not equal
|
|
lwr t3,28(a1) //
|
|
lwl t3,31(a1) //
|
|
addu a0,a0,32 // advance source1 to next block
|
|
bne t2,t3,80f // if ne, eighth word not equal
|
|
nop //
|
|
bne a0,t4,10b // if ne, more 32-byte blocks to compare
|
|
addu a1,a1,32 // update source2 address
|
|
.set reorder
|
|
|
|
subu a2,a3,a0 // compute remaining bytes
|
|
b 100f //
|
|
|
|
//
|
|
// Source1 is unaligned, check Source2 alignment.
|
|
//
|
|
|
|
CompareUnalignedS1: //
|
|
and t0,a1,0x3 // isolate Source2 alignment
|
|
bne zero,t0,CompareUnalignedS1AndS2 // if ne, Source2 unaligned
|
|
|
|
//
|
|
// Source1 is unaligned and Source2 is aligned.
|
|
//
|
|
|
|
.set noreorder
|
|
10: lwr t0,0(a0) // compare 32-byte block
|
|
lwl t0,3(a0) //
|
|
lw t1,0(a1) //
|
|
lwr t2,4(a0) //
|
|
lwl t2,7(a0) //
|
|
bne t0,t1,90f // if ne, first word not equal
|
|
lw t3,4(a1) //
|
|
lwr t0,8(a0) //
|
|
lwl t0,11(a0) //
|
|
bne t2,t3,20f // if ne, second word not equal
|
|
lw t1,8(a1) //
|
|
lwr t2,12(a0) //
|
|
lwl t2,15(a0) //
|
|
bne t0,t1,30f // if ne, third word not equal
|
|
lw t3,12(a1) //
|
|
lwr t0,16(a0) //
|
|
lwl t0,19(a0) //
|
|
bne t2,t3,40f // if ne, fourth word not equal
|
|
lw t1,16(a1) //
|
|
lwr t2,20(a0) //
|
|
lwl t2,23(a0) //
|
|
bne t0,t1,50f // if ne, fifth word not equal
|
|
lw t3,20(a1) //
|
|
lwr t0,24(a0) //
|
|
lwl t0,27(a0) //
|
|
bne t2,t3,60f // if ne, sixth word not equal
|
|
lw t1,24(a1) //
|
|
lwr t2,28(a0) //
|
|
lwl t2,31(a0) //
|
|
bne t0,t1,70f // if ne, seventh word not equal
|
|
lw t3,28(a1) //
|
|
addu a0,a0,32 // advance source1 to next block
|
|
bne t2,t3,80f // if ne, eighth word not equal
|
|
nop //
|
|
bne a0,t4,10b // if ne, more 32-byte blocks to compare
|
|
addu a1,a1,32 // update source2 address
|
|
.set reorder
|
|
|
|
subu a2,a3,a0 // compute remaining bytes
|
|
b 100f //
|
|
|
|
//
|
|
// Source1 and Source2 are unaligned.
|
|
//
|
|
|
|
CompareUnalignedS1AndS2: //
|
|
|
|
.set noreorder
|
|
10: lwr t0,0(a0) // compare 32-byte block
|
|
lwl t0,3(a0) //
|
|
lwr t1,0(a1) //
|
|
lwl t1,3(a1) //
|
|
lwr t2,4(a0) //
|
|
lwl t2,7(a0) //
|
|
bne t0,t1,90f // if ne, first word not equal
|
|
lwr t3,4(a1) //
|
|
lwl t3,7(a1) //
|
|
lwr t0,8(a0) //
|
|
lwl t0,11(a0) //
|
|
bne t2,t3,20f // if ne, second word not equal
|
|
lwr t1,8(a1) //
|
|
lwl t1,11(a1) //
|
|
lwr t2,12(a0) //
|
|
lwl t2,15(a0) //
|
|
bne t0,t1,30f // if ne, third word not equal
|
|
lwr t3,12(a1) //
|
|
lwl t3,15(a1) //
|
|
lwr t0,16(a0) //
|
|
lwl t0,19(a0) //
|
|
bne t2,t3,40f // if ne, fourth word not equal
|
|
lwr t1,16(a1) //
|
|
lwl t1,19(a1) //
|
|
lwr t2,20(a0) //
|
|
lwl t2,23(a0) //
|
|
bne t0,t1,50f // if ne, fifth word not equal
|
|
lwr t3,20(a1) //
|
|
lwl t3,23(a1) //
|
|
lwr t0,24(a0) //
|
|
lwl t0,27(a0) //
|
|
bne t2,t3,60f // if ne, sixth word not equal
|
|
lwr t1,24(a1) //
|
|
lwl t1,27(a1) //
|
|
lwr t2,28(a0) //
|
|
lwl t2,31(a0) //
|
|
bne t0,t1,70f // if ne, seventh word not equal
|
|
lwr t3,28(a1) //
|
|
lwl t3,31(a1) //
|
|
addu a0,a0,32 // advance source1 to next block
|
|
bne t2,t3,80f // if ne, eighth word not equal
|
|
nop //
|
|
bne a0,t4,10b // if ne, more 32-byte blocks to compare
|
|
addu a1,a1,32 // update source2 address
|
|
.set reorder
|
|
|
|
subu a2,a3,a0 // compute remaining bytes
|
|
b 100f //
|
|
|
|
//
|
|
// Adjust source1 and source2 pointers dependent on position of miscompare in
|
|
// block.
|
|
//
|
|
|
|
20: addu a0,a0,4 // mismatch on second word
|
|
addu a1,a1,4 //
|
|
b 90f //
|
|
|
|
30: addu a0,a0,8 // mismatch on third word
|
|
addu a1,a1,8 //
|
|
b 90f //
|
|
|
|
40: addu a0,a0,12 // mistmatch on fourth word
|
|
addu a1,a1,12 //
|
|
b 90f //
|
|
|
|
50: addu a0,a0,16 // mismatch on fifth word
|
|
addu a1,a1,16 //
|
|
b 90f //
|
|
|
|
60: addu a0,a0,20 // mismatch on sixth word
|
|
addu a1,a1,20 //
|
|
b 90f //
|
|
|
|
70: addu a0,a0,24 // mismatch on seventh word
|
|
addu a1,a1,24 //
|
|
b 90f //
|
|
|
|
80: subu a0,a0,4 // mismatch on eighth word
|
|
addu a1,a1,28 //
|
|
90: subu a2,a3,a0 // compute remaining bytes
|
|
|
|
//
|
|
// Compare 1-byte blocks.
|
|
//
|
|
|
|
100: addu t2,a0,a2 // compute ending block address
|
|
beq zero,a2,120f // if eq, buffers equal
|
|
110: lb t0,0(a0) // compare 1-byte block
|
|
lb t1,0(a1) //
|
|
addu a1,a1,1 // advance pointers to next block
|
|
bne t0,t1,130f // if ne, byte not equal
|
|
addu a0,a0,1 //
|
|
bne a0,t2,110b // if ne, more 1-byte block to zero
|
|
|
|
120: move v0,zero // source1 == source2
|
|
j ra // return
|
|
|
|
130: sltu v0,t1,t0 // compare source1 to source2
|
|
beq v0,zero,140f
|
|
j ra // return, source1 > source2
|
|
140:
|
|
li v0,-1
|
|
j ra // return, source1 < source2
|
|
|
|
.end memcmp
|
|
|
|
SBTTL("Move Memory")
|
|
//++
|
|
//
|
|
// VOID
|
|
// RtlMoveMemory (
|
|
// IN PVOID Destination,
|
|
// IN PVOID Source,
|
|
// IN ULONG Length
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This function moves memory either forward or backward, aligned or
|
|
// unaligned, in 32-byte blocks, followed by 4-byte blocks, followed
|
|
// by any remaining bytes.
|
|
//
|
|
// Arguments:
|
|
//
|
|
// Destination (a0) - Supplies a pointer to the destination address of
|
|
// the move operation.
|
|
//
|
|
// Source (a1) - Supplies a pointer to the source address of the move
|
|
// operation.
|
|
//
|
|
// Length (a2) - Supplies the length, in bytes, of the memory to be moved.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// None.
|
|
//
|
|
// N.B. The C runtime entry points memmove and memcpy are equivalent to
|
|
// RtlMoveMemory thus alternate entry points are provided for these
|
|
// routines.
|
|
//--
|
|
|
|
LEAF_ENTRY(memmove)
|
|
j memcpy
|
|
.end memmove
|
|
|
|
LEAF_ENTRY(memcpy)
|
|
|
|
move v0,a0 // return destination
|
|
|
|
//
|
|
// If the source address is less than the destination address and source
|
|
// address plus the length of the move is greater than the destination
|
|
// address, then the source and destination overlap such that the move
|
|
// must be performed backwards.
|
|
//
|
|
|
|
10: bgeu a1,a0,MoveForward // if geu, no overlap possible
|
|
addu t0,a1,a2 // compute source ending address
|
|
bgtu t0,a0,MoveBackward // if gtu, source and destination overlap
|
|
|
|
//
|
|
// Move memory forward aligned and unaligned.
|
|
//
|
|
|
|
MoveForward: //
|
|
sltu t0,a2,4 // check if less than four bytes
|
|
bne zero,t0,50f // if ne, less than four bytes to move
|
|
xor t0,a0,a1 // compare alignment bits
|
|
and t0,t0,0x3 // isolate alignment comparison
|
|
bne zero,t0,MoveForwardUnaligned // if ne, incompatible alignment
|
|
|
|
//
|
|
// Move memory forward aligned.
|
|
//
|
|
|
|
MoveForwardAligned: //
|
|
subu t0,zero,a0 // compute bytes until aligned
|
|
and t0,t0,0x3 // isolate residual byte count
|
|
subu a2,a2,t0 // reduce number of bytes to move
|
|
beq zero,t0,10f // if eq, already aligned
|
|
lwr t1,0(a1) // move unaligned bytes
|
|
swr t1,0(a0) //
|
|
addu a0,a0,t0 // align destination address
|
|
addu a1,a1,t0 // align source address
|
|
|
|
//
|
|
// Check for 32-byte blocks to move.
|
|
//
|
|
|
|
10: and t0,a2,32 - 1 // isolate residual bytes
|
|
subu t1,a2,t0 // subtract out residual bytes
|
|
addu t8,a0,t1 // compute ending block address
|
|
beq zero,t1,30f // if eq, no 32-byte block to zero
|
|
move a2,t0 // set residual number of bytes
|
|
|
|
//
|
|
// Move 32-byte blocks.
|
|
//
|
|
|
|
#if defined(R4000)
|
|
|
|
and t0,a0,1 << 2 // check if destination quadword aligned
|
|
beq zero,t0,15f // if eq, destination quadword aligned
|
|
lw t0,0(a1) // get source longword
|
|
addu a1,a1,4 // align source address
|
|
sw t0,0(a0) // store destination longword
|
|
addu a0,a0,4 // align destination address
|
|
addu a2,a2,t1 // recompute bytes to move
|
|
subu a2,a2,4 // reduce count by 4
|
|
b 10b //
|
|
|
|
//
|
|
// The destination is quadword aligned, check the source operand.
|
|
//
|
|
|
|
15: and t0,a1,1 << 2 // check if source quadword aligned
|
|
beq zero,t0,22f // if eq, source quadword aligned
|
|
|
|
//
|
|
// The source is longword aligned and the destination is quadword aligned.
|
|
//
|
|
|
|
.set noreorder
|
|
20: lwc1 f0,0(a1) // move 32-byte block
|
|
lwc1 f1,4(a1) //
|
|
lwc1 f2,8(a1) //
|
|
lwc1 f3,12(a1) //
|
|
lwc1 f4,16(a1) //
|
|
lwc1 f5,20(a1) //
|
|
lwc1 f6,24(a1) //
|
|
lwc1 f7,28(a1) //
|
|
sdc1 f0,0(a0) //
|
|
sdc1 f2,8(a0) //
|
|
sdc1 f4,16(a0) //
|
|
sdc1 f6,24(a0) //
|
|
addu a0,a0,32 // advance pointers to next block
|
|
bne a0,t8,20b // if ne, more 32-byte blocks to zero
|
|
addu a1,a1,32 //
|
|
.set reorder
|
|
|
|
b 30f //
|
|
|
|
//
|
|
// Both the source and the destination are quadword aligned.
|
|
//
|
|
|
|
22: and t0,t1,1 << 5 // test if even number of 32-byte blocks
|
|
beq zero,t0,26f // if eq, even number of 32-byte blocks
|
|
|
|
//
|
|
// Move one 32-byte block quadword aligned.
|
|
//
|
|
|
|
.set noreorder
|
|
ldc1 f0,0(a1) // move 32-byte block
|
|
ldc1 f2,8(a1) //
|
|
ldc1 f4,16(a1) //
|
|
ldc1 f6,24(a1) //
|
|
sdc1 f0,0(a0) //
|
|
sdc1 f2,8(a0) //
|
|
sdc1 f4,16(a0) //
|
|
sdc1 f6,24(a0) //
|
|
addu a0,a0,32 // advance pointers to next block
|
|
beq a0,t8,30f // if eq, end of block
|
|
addu a1,a1,32 //
|
|
.set reorder
|
|
|
|
//
|
|
// Move 64-byte blocks quadword aligned.
|
|
//
|
|
|
|
.set noreorder
|
|
26: ldc1 f0,0(a1) // move 64-byte block
|
|
ldc1 f2,8(a1) //
|
|
ldc1 f4,16(a1) //
|
|
ldc1 f6,24(a1) //
|
|
ldc1 f8,32(a1) //
|
|
ldc1 f10,40(a1) //
|
|
ldc1 f12,48(a1) //
|
|
ldc1 f14,56(a1) //
|
|
sdc1 f0,0(a0) //
|
|
sdc1 f2,8(a0) //
|
|
sdc1 f4,16(a0) //
|
|
sdc1 f6,24(a0) //
|
|
sdc1 f8,32(a0) //
|
|
sdc1 f10,40(a0) //
|
|
sdc1 f12,48(a0) //
|
|
sdc1 f14,56(a0) //
|
|
addu a0,a0,64 // advance pointers to next block
|
|
bne a0,t8,26b // if ne, more 64-byte blocks to zero
|
|
addu a1,a1,64 //
|
|
.set reorder
|
|
|
|
#endif
|
|
|
|
//
|
|
// The source is longword aligned and the destination is longword aligned.
|
|
//
|
|
|
|
#if defined(R3000)
|
|
|
|
.set noreorder
|
|
20: lw t0,0(a1) // move 32-byte block
|
|
lw t1,4(a1) //
|
|
lw t2,8(a1) //
|
|
lw t3,12(a1) //
|
|
lw t4,16(a1) //
|
|
lw t5,20(a1) //
|
|
lw t6,24(a1) //
|
|
lw t7,28(a1) //
|
|
sw t0,0(a0) //
|
|
sw t1,4(a0) //
|
|
sw t2,8(a0) //
|
|
sw t3,12(a0) //
|
|
sw t4,16(a0) //
|
|
sw t5,20(a0) //
|
|
sw t6,24(a0) //
|
|
sw t7,28(a0) //
|
|
addu a0,a0,32 // advance pointers to next block
|
|
bne a0,t8,20b // if ne, more 32-byte blocks to zero
|
|
addu a1,a1,32 //
|
|
.set reorder
|
|
|
|
#endif
|
|
|
|
//
|
|
// Check for 4-byte blocks to move.
|
|
//
|
|
|
|
30: and t0,a2,4 - 1 // isolate residual bytes
|
|
subu t1,a2,t0 // subtract out residual bytes
|
|
addu t2,a0,t1 // compute ending block address
|
|
beq zero,t1,50f // if eq, no 4-byte block to zero
|
|
move a2,t0 // set residual number of bytes
|
|
|
|
//
|
|
// Move 4-byte block.
|
|
//
|
|
|
|
.set noreorder
|
|
40: lw t0,0(a1) // move 4-byte block
|
|
addu a0,a0,4 // advance pointers to next block
|
|
sw t0,-4(a0) //
|
|
bne a0,t2,40b // if ne, more 4-byte blocks to zero
|
|
addu a1,a1,4 //
|
|
.set reorder
|
|
|
|
//
|
|
// Move 1-byte blocks.
|
|
//
|
|
|
|
50: addu t2,a0,a2 // compute ending block address
|
|
beq zero,a2,70f // if eq, no bytes to zero
|
|
|
|
.set noreorder
|
|
60: lb t0,0(a1) // move 1-byte block
|
|
addu a0,a0,1 // advance pointers to next block
|
|
sb t0,-1(a0) //
|
|
bne a0,t2,60b // if ne, more 1-byte block to zero
|
|
addu a1,a1,1 //
|
|
.set reorder
|
|
|
|
70: j ra // return
|
|
|
|
//
|
|
// Move memory forward unaligned.
|
|
//
|
|
|
|
MoveForwardUnaligned: //
|
|
subu t0,zero,a0 // compute bytes until aligned
|
|
and t0,t0,0x3 // isolate residual byte count
|
|
subu a2,a2,t0 // reduce number of bytes to move
|
|
beq zero,t0,10f // if eq, already aligned
|
|
lwr t1,0(a1) // move unaligned bytes
|
|
lwl t1,3(a1) //
|
|
swr t1,0(a0) //
|
|
addu a0,a0,t0 // align destination address
|
|
addu a1,a1,t0 // update source address
|
|
|
|
//
|
|
// Check for 32-byte blocks to move.
|
|
//
|
|
|
|
10: and t0,a2,32 - 1 // isolate residual bytes
|
|
subu t1,a2,t0 // subtract out residual bytes
|
|
addu t8,a0,t1 // compute ending block address
|
|
beq zero,t1,30f // if eq, no 32-byte block to zero
|
|
move a2,t0 // set residual number of bytes
|
|
|
|
//
|
|
// Move 32-byte block.
|
|
//
|
|
|
|
.set noreorder
|
|
20: lwr t0,0(a1) // move 32-byte block
|
|
lwl t0,3(a1) //
|
|
lwr t1,4(a1) //
|
|
lwl t1,7(a1) //
|
|
lwr t2,8(a1) //
|
|
lwl t2,11(a1) //
|
|
lwr t3,12(a1) //
|
|
lwl t3,15(a1) //
|
|
lwr t4,16(a1) //
|
|
lwl t4,19(a1) //
|
|
lwr t5,20(a1) //
|
|
lwl t5,23(a1) //
|
|
lwr t6,24(a1) //
|
|
lwl t6,27(a1) //
|
|
lwr t7,28(a1) //
|
|
lwl t7,31(a1) //
|
|
sw t0,0(a0) //
|
|
sw t1,4(a0) //
|
|
sw t2,8(a0) //
|
|
sw t3,12(a0) //
|
|
sw t4,16(a0) //
|
|
sw t5,20(a0) //
|
|
sw t6,24(a0) //
|
|
sw t7,28(a0) //
|
|
addu a0,a0,32 // advance pointers to next block
|
|
bne a0,t8,20b // if ne, more 32-byte blocks to zero
|
|
addu a1,a1,32 //
|
|
.set reorder
|
|
|
|
//
|
|
// Check for 4-byte blocks to move.
|
|
//
|
|
|
|
30: and t0,a2,4 - 1 // isolate residual bytes
|
|
subu t1,a2,t0 // subtract out residual bytes
|
|
addu t2,a0,t1 // compute ending block address
|
|
beq zero,t1,50f // if eq, no 4-byte block to zero
|
|
move a2,t0 // set residual number of bytes
|
|
|
|
//
|
|
// Move 4-byte block.
|
|
//
|
|
|
|
.set noreorder
|
|
40: lwr t0,0(a1) // move 4-byte block
|
|
lwl t0,3(a1) //
|
|
addu a0,a0,4 // advance pointers to next block
|
|
sw t0,-4(a0) //
|
|
bne a0,t2,40b // if ne, more 4-byte blocks to zero
|
|
addu a1,a1,4 //
|
|
.set reorder
|
|
|
|
//
|
|
// Move 1-byte blocks.
|
|
//
|
|
|
|
50: addu t2,a0,a2 // compute ending block address
|
|
beq zero,a2,70f // if eq, no bytes to zero
|
|
|
|
.set noreorder
|
|
60: lb t0,0(a1) // move 1-byte block
|
|
addu a0,a0,1 // advance pointers to next block
|
|
sb t0,-1(a0) //
|
|
bne a0,t2,60b // if ne, more 1-byte block to zero
|
|
addu a1,a1,1 //
|
|
.set reorder
|
|
|
|
70: j ra // return
|
|
|
|
//
|
|
// Move memory backward.
|
|
//
|
|
|
|
MoveBackward: //
|
|
addu a0,a0,a2 // compute ending destination address
|
|
addu a1,a1,a2 // compute ending source address
|
|
sltu t0,a2,4 // check if less than four bytes
|
|
bne zero,t0,50f // if ne, less than four bytes to move
|
|
xor t0,a0,a1 // compare alignment bits
|
|
and t0,t0,0x3 // isolate alignment comparison
|
|
bne zero,t0,MoveBackwardUnaligned // if ne, incompatible alignment
|
|
|
|
//
|
|
// Move memory backward aligned.
|
|
//
|
|
|
|
MoveBackwardAligned: //
|
|
and t0,a0,0x3 // isolate residual byte count
|
|
subu a2,a2,t0 // reduce number of bytes to move
|
|
beq zero,t0,10f // if eq, already aligned
|
|
lwl t1,-1(a1) // move unaligned bytes
|
|
swl t1,-1(a0) //
|
|
subu a0,a0,t0 // align destination address
|
|
subu a1,a1,t0 // align source address
|
|
|
|
//
|
|
// Check for 32-byte blocks to move.
|
|
//
|
|
|
|
10: and t0,a2,32 - 1 // isolate residual bytes
|
|
subu t1,a2,t0 // subtract out residual bytes
|
|
subu t8,a0,t1 // compute ending block address
|
|
beq zero,t1,30f // if eq, no 32-byte block to zero
|
|
move a2,t0 // set residual number of bytes
|
|
|
|
//
|
|
// Move 32-byte block.
|
|
//
|
|
|
|
#if defined(R4000)
|
|
|
|
and t0,a0,1 << 2 // check if destination quadword aligned
|
|
beq zero,t0,15f // if eq, destination quadword aligned
|
|
lw t0,-4(a1) // get source longword
|
|
subu a1,a1,4 // align source address
|
|
sw t0,-4(a0) // store destination longword
|
|
subu a0,a0,4 // align destination address
|
|
addu a2,a2,t1 // recompute byte to move
|
|
subu a2,a2,4 // reduce count by 4
|
|
b 10b //
|
|
|
|
//
|
|
// The destination is quadword aligned, check the source operand.
|
|
//
|
|
|
|
15: and t0,a1,1 << 2 // check if source quadword aligned
|
|
beq zero,t0,22f // if eq, source quadword aligned
|
|
|
|
//
|
|
// The source is longword aligned and the destination is quadword aligned.
|
|
//
|
|
|
|
.set noreorder
|
|
20: lwc1 f1,-4(a1) // move 32-byte block
|
|
lwc1 f0,-8(a1) //
|
|
lwc1 f3,-12(a1) //
|
|
lwc1 f2,-16(a1) //
|
|
lwc1 f5,-20(a1) //
|
|
lwc1 f4,-24(a1) //
|
|
lwc1 f7,-28(a1) //
|
|
lwc1 f6,-32(a1) //
|
|
sdc1 f0,-8(a0) //
|
|
sdc1 f2,-16(a0) //
|
|
sdc1 f4,-24(a0) //
|
|
sdc1 f6,-32(a0) //
|
|
subu a0,a0,32 // advance pointers to next block
|
|
bne a0,t8,20b // if ne, more 32-byte blocks to zero
|
|
subu a1,a1,32 //
|
|
.set reorder
|
|
|
|
b 30f //
|
|
|
|
//
|
|
// Both the source and the destination are quadword aligned.
|
|
//
|
|
|
|
22: and t0,t1,1 << 5 // test if even number of 32-byte blocks
|
|
beq zero,t0,26f // if eq, even number of 32-byte blocks
|
|
|
|
//
|
|
// Move one 32-byte block quadword aligned.
|
|
//
|
|
|
|
.set noreorder
|
|
ldc1 f0,-8(a1) // move 32-byte block
|
|
ldc1 f2,-16(a1) //
|
|
ldc1 f4,-24(a1) //
|
|
ldc1 f6,-32(a1) //
|
|
sdc1 f0,-8(a0) //
|
|
sdc1 f2,-16(a0) //
|
|
sdc1 f4,-24(a0) //
|
|
sdc1 f6,-32(a0) //
|
|
subu a0,a0,32 // advance pointers to next block
|
|
beq a0,t8,30f // if eq, end of block
|
|
subu a1,a1,32 //
|
|
.set reorder
|
|
|
|
//
|
|
// Move 64-byte blocks quadword aligned.
|
|
//
|
|
|
|
.set noreorder
|
|
26: ldc1 f0,-8(a1) // move 64-byte block
|
|
ldc1 f2,-16(a1) //
|
|
ldc1 f4,-24(a1) //
|
|
ldc1 f6,-32(a1) //
|
|
ldc1 f8,-40(a1) //
|
|
ldc1 f10,-48(a1) //
|
|
ldc1 f12,-56(a1) //
|
|
ldc1 f14,-64(a1) //
|
|
sdc1 f0,-8(a0) //
|
|
sdc1 f2,-16(a0) //
|
|
sdc1 f4,-24(a0) //
|
|
sdc1 f6,-32(a0) //
|
|
sdc1 f8,-40(a0) //
|
|
sdc1 f10,-48(a0) //
|
|
sdc1 f12,-56(a0) //
|
|
sdc1 f14,-64(a0) //
|
|
subu a0,a0,64 // advance pointers to next block
|
|
bne a0,t8,26b // if ne, more 64-byte blocks to zero
|
|
subu a1,a1,64 //
|
|
.set reorder
|
|
|
|
#endif
|
|
|
|
//
|
|
// The source is longword aligned and the destination is longword aligned.
|
|
//
|
|
|
|
#if defined(R3000)
|
|
|
|
.set noreorder
|
|
20: lw t0,-4(a1) // move 32-byte block
|
|
lw t1,-8(a1) //
|
|
lw t2,-12(a1) //
|
|
lw t3,-16(a1) //
|
|
lw t4,-20(a1) //
|
|
lw t5,-24(a1) //
|
|
lw t6,-28(a1) //
|
|
lw t7,-32(a1) //
|
|
sw t0,-4(a0) //
|
|
sw t1,-8(a0) //
|
|
sw t2,-12(a0) //
|
|
sw t3,-16(a0) //
|
|
sw t4,-20(a0) //
|
|
sw t5,-24(a0) //
|
|
sw t6,-28(a0) //
|
|
sw t7,-32(a0) //
|
|
subu a0,a0,32 // advance pointers to next block
|
|
bne a0,t8,20b // if ne, more 32-byte blocks to zero
|
|
subu a1,a1,32 //
|
|
.set reorder
|
|
|
|
#endif
|
|
|
|
//
|
|
// Check for 4-byte blocks to move.
|
|
//
|
|
|
|
30: and t0,a2,4 - 1 // isolate residual bytes
|
|
subu t1,a2,t0 // subtract out residual bytes
|
|
subu t2,a0,t1 // compute ending block address
|
|
beq zero,t1,50f // if eq, no 4-byte block to zero
|
|
move a2,t0 // set residual number of bytes
|
|
|
|
//
|
|
// Move 4-byte block.
|
|
//
|
|
|
|
.set noreorder
|
|
40: lw t0,-4(a1) // move 4-byte block
|
|
subu a0,a0,4 // advance pointers to next block
|
|
sw t0,0(a0) //
|
|
bne a0,t2,40b // if ne, more 4-byte blocks to zero
|
|
subu a1,a1,4 //
|
|
.set reorder
|
|
|
|
//
|
|
// Move 1-byte blocks.
|
|
//
|
|
|
|
50: subu t2,a0,a2 // compute ending block address
|
|
beq zero,a2,70f // if eq, no bytes to zero
|
|
|
|
.set noreorder
|
|
60: lb t0,-1(a1) // move 1-byte block
|
|
subu a0,a0,1 // advance pointers to next block
|
|
sb t0,0(a0) //
|
|
bne a0,t2,60b // if ne, more 1-byte block to zero
|
|
subu a1,a1,1 //
|
|
.set reorder
|
|
|
|
70: j ra // return
|
|
|
|
//
|
|
// Move memory backward unaligned.
|
|
//
|
|
|
|
MoveBackwardUnaligned: //
|
|
and t0,a0,0x3 // isolate residual byte count
|
|
subu a2,a2,t0 // reduce number of bytes to move
|
|
beq zero,t0,10f // if eq, already aligned
|
|
lwl t1,-1(a1) // move unaligned bytes
|
|
lwr t1,-4(a1) //
|
|
swl t1,-1(a0) //
|
|
subu a0,a0,t0 // align destination address
|
|
subu a1,a1,t0 // update source address
|
|
|
|
//
|
|
// Check for 32-byte blocks to move.
|
|
//
|
|
|
|
10: and t0,a2,32 - 1 // isolate residual bytes
|
|
subu t1,a2,t0 // subtract out residual bytes
|
|
subu t8,a0,t1 // compute ending block address
|
|
beq zero,t1,30f // if eq, no 32-byte block to zero
|
|
move a2,t0 // set residual number of bytes
|
|
|
|
//
|
|
// Move 32-byte block.
|
|
//
|
|
|
|
.set noreorder
|
|
20: lwr t0,-4(a1) // move 32-byte block
|
|
lwl t0,-1(a1) //
|
|
lwr t1,-8(a1) //
|
|
lwl t1,-5(a1) //
|
|
lwr t2,-12(a1) //
|
|
lwl t2,-9(a1) //
|
|
lwr t3,-16(a1) //
|
|
lwl t3,-13(a1) //
|
|
lwr t4,-20(a1) //
|
|
lwl t4,-17(a1) //
|
|
lwr t5,-24(a1) //
|
|
lwl t5,-21(a1) //
|
|
lwr t6,-28(a1) //
|
|
lwl t6,-25(a1) //
|
|
lwr t7,-32(a1) //
|
|
lwl t7,-29(a1) //
|
|
sw t0,-4(a0) //
|
|
sw t1,-8(a0) //
|
|
sw t2,-12(a0) //
|
|
sw t3,-16(a0) //
|
|
sw t4,-20(a0) //
|
|
sw t5,-24(a0) //
|
|
sw t6,-28(a0) //
|
|
sw t7,-32(a0) //
|
|
subu a0,a0,32 // advance pointers to next block
|
|
bne a0,t8,20b // if ne, more 32-byte blocks to zero
|
|
subu a1,a1,32 //
|
|
.set reorder
|
|
|
|
//
|
|
// Check for 4-byte blocks to move.
|
|
//
|
|
|
|
30: and t0,a2,4 - 1 // isolate residual bytes
|
|
subu t1,a2,t0 // subtract out residual bytes
|
|
subu t2,a0,t1 // compute ending block address
|
|
beq zero,t1,50f // if eq, no 4-byte block to zero
|
|
move a2,t0 // set residual number of bytes
|
|
|
|
//
|
|
// Move 4-byte block.
|
|
//
|
|
|
|
.set noreorder
|
|
40: lwr t0,-4(a1) // move 4-byte block
|
|
lwl t0,-1(a1) //
|
|
subu a0,a0,4 // advance pointers to next block
|
|
sw t0,0(a0) //
|
|
bne a0,t2,40b // if ne, more 4-byte blocks to zero
|
|
subu a1,a1,4 //
|
|
.set reorder
|
|
|
|
//
|
|
// Move 1-byte blocks.
|
|
//
|
|
|
|
50: subu t2,a0,a2 // compute ending block address
|
|
beq zero,a2,70f // if eq, no bytes to zero
|
|
|
|
.set noreorder
|
|
60: lb t0,-1(a1) // move 1-byte block
|
|
subu a0,a0,1 // advance pointers to next block
|
|
sb t0,0(a0) //
|
|
bne a0,t2,60b // if ne, more 1-byte block to zero
|
|
subu a1,a1,1 //
|
|
.set reorder
|
|
|
|
70: j ra // return
|
|
|
|
.end memcpy
|
|
|
|
SBTTL("Fill Memory")
|
|
//++
|
|
//
|
|
// VOID
|
|
// RtlFillMemory (
|
|
// IN PVOID Destination,
|
|
// IN ULONG Length,
|
|
// IN UCHAR Fill
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This function fills memory by first aligning the destination address to
|
|
// a longword boundary, and then filling 32-byte blocks, followed by 4-byte
|
|
// blocks, followed by any remaining bytes.
|
|
//
|
|
// Arguments:
|
|
//
|
|
// Destination (a0) - Supplies a pointer to the memory to fill.
|
|
//
|
|
// Length (a1) - Supplies the length, in bytes, of the memory to be filled.
|
|
//
|
|
// Fill (a2) - Supplies the fill byte.
|
|
//
|
|
// N.B. The alternate entry memset expects the length and fill arguments
|
|
// to be reversed.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// None.
|
|
//
|
|
//--
|
|
|
|
LEAF_ENTRY(memset)
|
|
|
|
move a3,a1 // swap length and fill arguments
|
|
move a1,a2 //
|
|
move a2,a3 //
|
|
move v0,a0 // return destination
|
|
|
|
and a2,a2,0xff // clear excess bits
|
|
sll t0,a2,8 // duplicate fill byte
|
|
or a2,a2,t0 // generate fill word
|
|
sll t0,a2,16 // duplicate fill word
|
|
or a2,a2,t0 // generate fill longword
|
|
|
|
//
|
|
// Fill memory with the pattern specified in register a2.
|
|
//
|
|
|
|
#if DBG
|
|
|
|
mtc1 a2,f0 // set pattern to store
|
|
mtc1 a2,f1 //
|
|
|
|
#endif
|
|
|
|
subu t0,zero,a0 // compute bytes until aligned
|
|
and t0,t0,0x3 // isolate residual byte count
|
|
subu t1,a1,t0 // reduce number of bytes to fill
|
|
blez t1,60f // if lez, less than 4 bytes to fill
|
|
move a1,t1 // set number of bytes to fill
|
|
beq zero,t0,10f // if eq, already aligned
|
|
swr a2,0(a0) // fill unaligned bytes
|
|
addu a0,a0,t0 // align destination address
|
|
|
|
//
|
|
// Check for 32-byte blocks to fill.
|
|
//
|
|
|
|
10: and t0,a1,32 - 1 // isolate residual bytes
|
|
subu t1,a1,t0 // subtract out residual bytes
|
|
addu t2,a0,t1 // compute ending block address
|
|
beq zero,t1,40f // if eq, no 32-byte blocks to fill
|
|
move a1,t0 // set residual number of bytes
|
|
|
|
//
|
|
// Fill 32-byte blocks.
|
|
//
|
|
|
|
#if defined(R4000)
|
|
|
|
and t0,a0,1 << 2 // check if destintion quadword aligned
|
|
beq zero,t0,20f // if eq, yes
|
|
sw a2,0(a0) // store destination longword
|
|
addu a0,a0,4 // align destination address
|
|
addu a1,a1,t1 // recompute bytes to fill
|
|
subu a1,a1,4 // reduce count by 4
|
|
b 10b //
|
|
|
|
//
|
|
// The destination is quadword aligned.
|
|
//
|
|
|
|
20: mtc1 a2,f0 // set pattern value
|
|
mtc1 a2,f1 //
|
|
and t0,t1,1 << 5 // test if even number of 32-byte blocks
|
|
beq zero,t0,30f // if eq, even number of 32-byte blocks
|
|
|
|
//
|
|
// Fill one 32-byte block.
|
|
//
|
|
|
|
.set noreorder
|
|
sdc1 f0,0(a0) // fill 32-byte block
|
|
sdc1 f0,8(a0) //
|
|
sdc1 f0,16(a0) //
|
|
addu a0,a0,32 // advance pointer to next block
|
|
beq a0,t2,40f // if ne, no 64-byte blocks to fill
|
|
sdc1 f0,-8(a0) //
|
|
.set reorder
|
|
|
|
//
|
|
// Fill 64-byte block.
|
|
//
|
|
|
|
.set noreorder
|
|
30: sdc1 f0,0(a0) // fill 32-byte block
|
|
sdc1 f0,8(a0) //
|
|
sdc1 f0,16(a0) //
|
|
sdc1 f0,24(a0) //
|
|
sdc1 f0,32(a0) //
|
|
sdc1 f0,40(a0) //
|
|
sdc1 f0,48(a0) //
|
|
addu a0,a0,64 // advance pointer to next block
|
|
bne a0,t2,30b // if ne, more 32-byte blocks to fill
|
|
sdc1 f0,-8(a0) //
|
|
.set reorder
|
|
|
|
#endif
|
|
|
|
//
|
|
// Fill 32-byte blocks.
|
|
//
|
|
|
|
#if defined(R3000)
|
|
|
|
.set noreorder
|
|
20: sw a2,0(a0) // fill 32-byte block
|
|
sw a2,4(a0) //
|
|
sw a2,8(a0) //
|
|
sw a2,12(a0) //
|
|
addu a0,a0,32 // advance pointer to next block
|
|
sw a2,-4(a0) //
|
|
sw a2,-8(a0) //
|
|
sw a2,-12(a0) //
|
|
bne a0,t2,20b // if ne, more 32-byte blocks to fill
|
|
sw a2,-16(a0) //
|
|
.set reorder
|
|
|
|
#endif
|
|
|
|
//
|
|
// Check for 4-byte blocks to fill.
|
|
//
|
|
|
|
40: and t0,a1,4 - 1 // isolate residual bytes
|
|
subu t1,a1,t0 // subtract out residual bytes
|
|
addu t2,a0,t1 // compute ending block address
|
|
beq zero,t1,60f // if eq, no 4-byte block to fill
|
|
move a1,t0 // set residual number of bytes
|
|
|
|
//
|
|
// Fill 4-byte blocks.
|
|
//
|
|
|
|
.set noreorder
|
|
50: addu a0,a0,4 // advance pointer to next block
|
|
bne a0,t2,50b // if ne, more 4-byte blocks to fill
|
|
sw a2,-4(a0) // fill 4-byte block
|
|
.set reorder
|
|
|
|
//
|
|
// Check for 1-byte blocks to fill.
|
|
//
|
|
|
|
60: addu t2,a0,a1 // compute ending block address
|
|
beq zero,a1,80f // if eq, no bytes to fill
|
|
|
|
//
|
|
// Fill 1-byte blocks.
|
|
//
|
|
|
|
.set noreorder
|
|
70: addu a0,a0,1 // advance pointer to next block
|
|
bne a0,t2,70b // if ne, more 1-byte block to fill
|
|
sb a2,-1(a0) // fill 1-byte block
|
|
.set reorder
|
|
|
|
#if DBG
|
|
|
|
80: mfc1 t0,f0 // get fill pattern
|
|
mfc1 t1,f1 //
|
|
bne t0,a2,90f // if ne, pattern altered
|
|
bne t1,a2,90f // if ne, pattern altered
|
|
j ra // return
|
|
|
|
90: break KERNEL_BREAKPOINT //
|
|
|
|
#else
|
|
|
|
80: j ra // return
|
|
|
|
#endif
|
|
|
|
.end memset
|