mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1185 lines
56 KiB
1185 lines
56 KiB
// TITLE("Move, Zero, and Fill Memory Support")
|
|
//++
|
|
//
|
|
// Copyright (c) 1993 IBM Corporation
|
|
//
|
|
// Module Name:
|
|
//
|
|
// mvmem.s
|
|
//
|
|
// Abstract:
|
|
//
|
|
// This module implements functions to move, zero, and fill blocks of
|
|
// memory. If the memory is aligned, then these functions are very
|
|
// efficient.
|
|
//
|
|
// Author:
|
|
//
|
|
// Curt Fawcett (crf) 10-Aug-1993
|
|
//
|
|
// Environment:
|
|
//
|
|
// User or Kernel mode.
|
|
//
|
|
// Revision History:
|
|
//
|
|
// Curt Fawcett 11-Jan-1994 Removed register definitions
|
|
// and fixed for new assembler
|
|
//
|
|
// Curt Fawcett 27-May-1994 Updated to match new mips
|
|
// code
|
|
//
|
|
//--
|
|
|
|
#include <kxppc.h>
|
|
|
|
//
|
|
// Define local constants
|
|
//
|
|
.set BLKLN,32
|
|
//++
|
|
//
|
|
// VOID
|
|
// RtlMoveMemory (
|
|
// IN PVOID Destination,
|
|
// IN PVOID Source,
|
|
// IN ULONG Length
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This function moves memory either forward or backward, aligned or
|
|
// unaligned, in 32-byte blocks, followed by 4-byte blocks, followed
|
|
// by any remaining bytes.
|
|
//
|
|
// Arguments:
|
|
//
|
|
// DEST (r.3) - Supplies a pointer to the destination address of
|
|
// the move operation.
|
|
//
|
|
// SRC (r.4) - Supplies a pointer to the source address of the move
|
|
// operation.
|
|
//
|
|
// LNGTH (r.5) - Supplies the length, in bytes, of the memory to be
|
|
// moved.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// None.
|
|
//
|
|
// N.B. The C runtime entry points memmove and memcpy are equivalent
|
|
// to RtlMoveMemory thus alternate entry points are provided for
|
|
// these routines.
|
|
//--
|
|
//
|
|
// Define the routine entry point
|
|
//
|
|
LEAF_ENTRY(RtlMoveMemory)
|
|
//
|
|
ALTERNATE_ENTRY(memcpy)
|
|
ALTERNATE_ENTRY(memmove)
|
|
//
|
|
// Check to see if destination block overlaps the source block
|
|
// If so, jump to a backward move to preserve source block from
|
|
// being corrupted.
|
|
//
|
|
cmpw r.4,r.3 // Check to see if DEST > SRC
|
|
bge+ MoveForward // Jump if no overlap possible
|
|
add r.10,r.4,r.5 // Get ending SRC address
|
|
cmpw r.10,r.3 // Check for overlap
|
|
bgt- MoveBackward // Jump for overlap
|
|
//
|
|
// Move Memory Forward
|
|
//
|
|
// Check alignment
|
|
//
|
|
|
|
MoveForward:
|
|
cmpwi r.5,4 // Check for less than 4 bytes
|
|
blt- FwdMoveByByte // Jump if single byte moves
|
|
xor r.9,r.4,r.3 // Check for same alignment
|
|
andi. r.9,r.9,3 // Isolate alignment bits
|
|
bne- MvFwdUnaligned // Jump if different alignments
|
|
//
|
|
// Move Memory Forward - Same SRC and DEST alignment
|
|
//
|
|
// Load and store extra bytes until a word boundary is reached
|
|
//
|
|
|
|
MvFwdAligned:
|
|
andi. r.6,r.3,3 // Check alignment type
|
|
beq+ FwdBlkDiv // Jump to process 32-Byte blocks
|
|
cmpwi r.6,3 // Check for 1 byte unaligned
|
|
bne+ FwdChkFor2 // If not, check next case
|
|
lbz r.7,0(r.4) // Get unaligned byte
|
|
li r.6,1 // Set byte move count
|
|
stb r.7,0(r.3) // Store unaligned byte
|
|
b UpdateAddrs // Jump to update addresses
|
|
FwdChkFor2:
|
|
cmpwi r.6,2 // Check for halfword aligned
|
|
bne+ FwdChkFor1 // If not, check next case
|
|
lhz r.7,0(r.4) // Get unaligned halfword
|
|
li r.6,2 // Set byte move count
|
|
sth r.7,0(r.3) // Store unaligned halfword
|
|
b UpdateAddrs // Jump to update addresses
|
|
FwdChkFor1:
|
|
lbz r.8,0(r.4) // Get unaligned byte
|
|
lhz r.7,1(r.4) // Get unaligned halfword
|
|
stb r.8,0(r.3) // Store unaligned byte
|
|
sth r.7,1(r.3) // Store unaligned halfword
|
|
li r.6,3 // Set byte move count
|
|
UpdateAddrs:
|
|
sub r.5,r.5,r.6 // Decrement LNGTH by unaligned
|
|
add r.4,r.4,r.6 // Update the SRC address
|
|
add r.3,r.3,r.6 // Update the DEST address
|
|
//
|
|
// Divide the block to process into 32-byte blocks
|
|
//
|
|
|
|
FwdBlkDiv:
|
|
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
|
sub. r.7,r.5,r.6 // Get full block count
|
|
add r.10,r.4,r.7 // Get address of last full block
|
|
beq- FwdMoveBy4Bytes // Jump if no full blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
//
|
|
// Move 32-byte blocks
|
|
//
|
|
FwdMvFullBlks:
|
|
lwz r.6,0(r.4) // Get 1st SRC word
|
|
lwz r.7,4(r.4) // Get 2nd SRC word
|
|
stw r.6,0(r.3) // Store 1st DEST word
|
|
stw r.7,4(r.3) // Store 2nd DEST word
|
|
lwz r.6,8(r.4) // Get 3rd SRC word
|
|
lwz r.7,12(r.4) // Get 4th SRC word
|
|
stw r.6,8(r.3) // Store 3rd DEST word
|
|
stw r.7,12(r.3) // Store 4th DEST word
|
|
lwz r.6,16(r.4) // Get 5th SRC word
|
|
lwz r.7,20(r.4) // Get 6th SRC word
|
|
stw r.6,16(r.3) // Store 5th DEST word
|
|
stw r.7,20(r.3) // Store 6th DEST word
|
|
lwz r.6,24(r.4) // Get 7th SRC word
|
|
lwz r.7,28(r.4) // Get 8th SRC word
|
|
stw r.6,24(r.3) // Store 7th DEST word
|
|
stw r.7,28(r.3) // Store 8th DEST word
|
|
addi r.4,r.4,32 // Update SRC pointer
|
|
addi r.3,r.3,32 // Update DEST pointer
|
|
cmpw r.4,r.10 // Check for all blocks done
|
|
bne+ FwdMvFullBlks // Jump if more blocks
|
|
//
|
|
// Move 4-byte blocks
|
|
//
|
|
|
|
FwdMoveBy4Bytes:
|
|
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
|
sub. r.7,r.5,r.6 // Get 4-byte block count
|
|
add r.10,r.4,r.7 // Get address of last full block
|
|
beq- FwdMoveByByte // Jump if no 4-byte blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
|
|
FwdLpOn4Bytes:
|
|
lwz r.6,0(r.4) // Load next set of 4 bytes
|
|
addi r.4,r.4,4 // Get pointer to next SRC block
|
|
stw r.6,0(r.3) // Store next DEST block
|
|
addi r.3,r.3,4 // Get pointer to next DEST block
|
|
cmpw r.4,r.10 // Check for last block
|
|
bne+ FwdLpOn4Bytes // Jump if more blocks
|
|
//
|
|
// Move 1-byte blocks
|
|
//
|
|
|
|
FwdMoveByByte:
|
|
cmpwi r.5,0 // Check for no bytes left
|
|
beq+ MvExit // Jump to return if done
|
|
lbz r.6,0(r.4) // Get 1st SRC byte
|
|
cmpwi r.5,1 // Check for no bytes left
|
|
stb r.6,0(r.3) // Store 1st DEST byte
|
|
beq+ MvExit // Jump to return if done
|
|
lbz r.6,1(r.4) // Get 2nd SRC byte
|
|
cmpwi r.5,2 // Check for no bytes left
|
|
stb r.6,1(r.3) // Store 2nd DEST byte
|
|
beq+ MvExit // Jump to return if done
|
|
lbz r.6,2(r.4) // Get 3rd SRC byte
|
|
stb r.6,2(r.3) // Store 3rd byte word
|
|
b MvExit // Jump to return
|
|
//
|
|
// Forward Move - SRC and DEST have different alignments
|
|
//
|
|
|
|
MvFwdUnaligned:
|
|
or r.9,r.4,r.3 // Check if either byte unaligned
|
|
andi. r.9,r.9,3 // Isolate alignment
|
|
cmpwi r.9,2 // Check for even result
|
|
bne+ FwdMvByteUnaligned // Jump for byte unaligned
|
|
//
|
|
// Divide the blocks to process into 32-byte blocks
|
|
//
|
|
|
|
FwdBlkDivUnaligned:
|
|
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
|
sub. r.7,r.5,r.6 // Get full block count
|
|
add r.10,r.4,r.7 // Get address of last full block
|
|
beq- FwdMvHWrdBy4Bytes // Jump if no full blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
//
|
|
// Forward Move - SRC or DEST is halfword aligned, the other is by word
|
|
//
|
|
FwdMvByHWord:
|
|
lhz r.6,0(r.4) // Get 1st 2 bytes of 1st SRC wrd
|
|
lhz r.7,2(r.4) // Get 2nd 2 bytes of 1st SRC wrd
|
|
sth r.6,0(r.3) // Put 1st 2 bytes of 1st DST wrd
|
|
sth r.7,2(r.3) // Put 2nd 2 bytes of 1st DST wrd
|
|
lhz r.6,4(r.4) // Get 1st 2 bytes of 2nd SRC wrd
|
|
lhz r.7,6(r.4) // Get 2nd 2 bytes of 2nd SRC wrd
|
|
sth r.6,4(r.3) // Put 1st 2 bytes of 2nd DST wrd
|
|
sth r.7,6(r.3) // Put 2nd 2 bytes of 2nd DST wrd
|
|
lhz r.6,8(r.4) // Get 1st 2 bytes of 3rd SRC wrd
|
|
lhz r.7,10(r.4) // Get 2nd 2 bytes of 3rd SRC wrd
|
|
sth r.6,8(r.3) // Put 1st 2 bytes of 3rd DST wrd
|
|
sth r.7,10(r.3) // Put 2nd 2 bytes of 3rd DST wrd
|
|
lhz r.6,12(r.4) // Get 1st 2 bytes of 4th SRC wrd
|
|
lhz r.7,14(r.4) // Get 2nd 2 bytes of 4th SRC wrd
|
|
sth r.6,12(r.3) // Put 1st 2 bytes of 4th DST wrd
|
|
sth r.7,14(r.3) // Put 2nd 2 bytes of 4th DST wrd
|
|
lhz r.6,16(r.4) // Get 1st 2 bytes of 5th SRC wrd
|
|
lhz r.7,18(r.4) // Get 2nd 2 bytes of 5th SRC wrd
|
|
sth r.6,16(r.3) // Put 1st 2 bytes of 5th DST wrd
|
|
sth r.7,18(r.3) // Put 2nd 2 bytes of 5th DST wrd
|
|
lhz r.6,20(r.4) // Get 1st 2 bytes of 6th SRC wrd
|
|
lhz r.7,22(r.4) // Get 2nd 2 bytes of 6th SRC wrd
|
|
sth r.6,20(r.3) // Put 1st 2 bytes of 6th DST wrd
|
|
sth r.7,22(r.3) // Put 2nd 2 bytes of 6th DST wrd
|
|
lhz r.6,24(r.4) // Get 1st 2 bytes of 7th SRC wrd
|
|
lhz r.7,26(r.4) // Get 2nd 2 bytes of 7th SRC wrd
|
|
sth r.6,24(r.3) // Put 1st 2 bytes of 7th DST wrd
|
|
sth r.7,26(r.3) // Put 2nd 2 bytes of 7th DST wrd
|
|
lhz r.6,28(r.4) // Get 1st 2 bytes of 8th SRC wrd
|
|
lhz r.7,30(r.4) // Get 2nd 2 bytes of 8th SRC wrd
|
|
sth r.6,28(r.3) // Put 1st 2 bytes of 8th DST wrd
|
|
sth r.7,30(r.3) // Put 2nd 2 bytes of 8th DST wrd
|
|
addi r.4,r.4,32 // Update SRC pointer
|
|
addi r.3,r.3,32 // Update DEST pointer
|
|
cmpw r.4,r.10 // Check for all blocks done
|
|
bne+ FwdMvByHWord // Jump if more blocks
|
|
//
|
|
// Move 4-byte blocks with DEST Halfword unaligned
|
|
//
|
|
|
|
FwdMvHWrdBy4Bytes:
|
|
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
|
sub. r.7,r.5,r.6 // Get 4-byte block count
|
|
add r.10,r.4,r.7 // Get address of last full block
|
|
beq- FwdMoveByByte // Jump if no 4-byte blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
|
|
FwdHWrdLpOn4Bytes:
|
|
lhz r.6,0(r.4) // Get 1st 2 bytes of 1st SRC wrd
|
|
lhz r.7,2(r.4) // Get 2nd 2 bytes of 1st SRC wrd
|
|
sth r.6,0(r.3) // Put 1st 2 bytes of 1st DST wrd
|
|
sth r.7,2(r.3) // Put 2nd 2 bytes of 1st DST wrd
|
|
addi r.4,r.4,4 // Update SRC pointer
|
|
addi r.3,r.3,4 // Update DEST pointer
|
|
cmpw r.4,r.10 // Check for last block
|
|
bne+ FwdHWrdLpOn4Bytes // Jump if more blocks
|
|
|
|
b FwdMoveByByte // Jump to complete last bytes
|
|
//
|
|
// Forward Move - DEST is byte unaligned - Check SRC
|
|
//
|
|
|
|
FwdMvByteUnaligned:
|
|
and r.9,r.3,r.4 // Check for both byte aligned
|
|
and r.9,r.9,1 // Isolate alignment bits
|
|
bne- FwdBlksByByte // Jump if both not byte aligned
|
|
//
|
|
// Divide the blocks to process into 32-byte blocks
|
|
//
|
|
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
|
sub. r.7,r.5,r.6 // Get full block count
|
|
add r.10,r.4,r.7 // Get address of last full block
|
|
beq- FwdMvByteBy4Bytes // Jump if no full blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
//
|
|
// Forward Move - Both DEST and SRC are byte unaligned, but differently
|
|
//
|
|
|
|
FwdMvByByte:
|
|
lbz r.6,0(r.4) // Get first byte of 1st SRC word
|
|
lhz r.7,1(r.4) // Get mid-h-word of 1st SRC word
|
|
lbz r.8,3(r.4) // Get last byte of 1st SRC word
|
|
stb r.6,0(r.3) // Put first byte of 1st DEST wd
|
|
sth r.7,1(r.3) // Put mid-h-word of 1st DEST wd
|
|
stb r.8,3(r.3) // Put last byte of 1st DEST wrd
|
|
lbz r.6,4(r.4) // Get first byte of 2nd SRC word
|
|
lhz r.7,5(r.4) // Get mid-h-word of 2nd SRC word
|
|
lbz r.8,7(r.4) // Get last byte of 2nd SRC word
|
|
stb r.6,4(r.3) // Put first byte of 2nd DEST wd
|
|
sth r.7,5(r.3) // Put mid-h-word of 2nd DEST wd
|
|
stb r.8,7(r.3) // Put last byte of 2nd DEST wrd
|
|
lbz r.6,8(r.4) // Get first byte of 3rd SRC word
|
|
lhz r.7,9(r.4) // Get mid-h-word of 3rd SRC word
|
|
lbz r.8,11(r.4) // Get last byte of 3rd SRC word
|
|
stb r.6,8(r.3) // Put first byte of 3rd DEST wd
|
|
sth r.7,9(r.3) // Put mid-h-word of 3rd DEST wd
|
|
stb r.8,11(r.3) // Put last byte of 3rd DEST wrd
|
|
lbz r.6,12(r.4) // Get first byte of 4th SRC word
|
|
lhz r.7,13(r.4) // Get mid-h-word of 4th SRC word
|
|
lbz r.8,15(r.4) // Get last byte of 4th SRC word
|
|
stb r.6,12(r.3) // Put first byte of 4th DEST wd
|
|
sth r.7,13(r.3) // Put mid-h-word of 4th DEST wd
|
|
stb r.8,15(r.3) // Put last byte of 4th DEST wrd
|
|
lbz r.6,16(r.4) // Get first byte of 5th SRC word
|
|
lhz r.7,17(r.4) // Get mid-h-word of 5th SRC word
|
|
lbz r.8,19(r.4) // Get last byte of 5th SRC word
|
|
stb r.6,16(r.3) // Put first byte of 5th DEST wd
|
|
sth r.7,17(r.3) // Put mid-h-word of 5th DEST wd
|
|
stb r.8,19(r.3) // Put last byte of 5th DEST wrd
|
|
lbz r.6,20(r.4) // Get first byte of 6th SRC word
|
|
lhz r.7,21(r.4) // Get mid-h-word of 6th SRC word
|
|
lbz r.8,23(r.4) // Get last byte of 6th SRC word
|
|
stb r.6,20(r.3) // Put first byte of 6th DEST wd
|
|
sth r.7,21(r.3) // Put mid-h-word of 6th DEST wd
|
|
stb r.8,23(r.3) // Put last byte of 6th DEST wrd
|
|
lbz r.6,24(r.4) // Get first byte of 7th SRC word
|
|
lhz r.7,25(r.4) // Get mid-h-word of 7th SRC word
|
|
lbz r.8,27(r.4) // Get last byte of 7th SRC word
|
|
stb r.6,24(r.3) // Put first byte of 7th DEST wd
|
|
sth r.7,25(r.3) // Put mid-h-word of 7th DEST wd
|
|
stb r.8,27(r.3) // Put last byte of 7th DEST wrd
|
|
lbz r.6,28(r.4) // Get first byte of 8th SRC word
|
|
lhz r.7,29(r.4) // Get mid-h-word of 8th SRC word
|
|
lbz r.8,31(r.4) // Get last byte of 8th SRC word
|
|
stb r.6,28(r.3) // Put first byte of 8th DEST wd
|
|
sth r.7,29(r.3) // Put mid-h-word of 8th DEST wd
|
|
stb r.8,31(r.3) // Put last byte of 8th DEST wrd
|
|
addi r.4,r.4,32 // Update SRC pointer
|
|
addi r.3,r.3,32 // Update DEST pointer
|
|
cmpw r.4,r.10 // Check for all blocks done
|
|
bne+ FwdMvByByte // Jump if more blocks
|
|
//
|
|
// Move 4-byte blocks with DEST or SRC byte aligned
|
|
//
|
|
|
|
FwdMvByteBy4Bytes:
|
|
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
|
sub. r.7,r.5,r.6 // Get 4-byte block count
|
|
add r.10,r.4,r.7 // Get address of last full block
|
|
beq- FwdMoveByByte // Jump if no 4-byte blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
|
|
FwdByteLpOn4Bytes:
|
|
lbz r.6,0(r.4) // Get first byte of 1st SRC word
|
|
lhz r.7,1(r.4) // Get mid-h-word of 1st SRC word
|
|
lbz r.8,3(r.4) // Get last byte of 1st SRC word
|
|
stb r.6,0(r.3) // Put first byte of 1st DEST wd
|
|
sth r.7,1(r.3) // Put mid-h-word of 1st DEST wd
|
|
stb r.8,3(r.3) // Put last byte of 1st DEST wrd
|
|
addi r.4,r.4,4 // Update SRC pointer
|
|
addi r.3,r.3,4 // Update DEST pointer
|
|
cmpw r.4,r.10 // Check for last block
|
|
bne+ FwdByteLpOn4Bytes // Jump if more blocks
|
|
|
|
b FwdMoveByByte // Jump to complete last bytes
|
|
//
|
|
// Forward Move - Either SRC or DEST are byte unaligned but not both
|
|
//
|
|
// Divide the blocks to process into 32-byte blocks
|
|
//
|
|
|
|
FwdBlksByByte:
|
|
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
|
sub. r.7,r.5,r.6 // Get full block count
|
|
add r.10,r.4,r.7 // Get address of last full block
|
|
beq- FwdMvBlksOf4Bytes // Jump if no full blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
|
|
FwdMvBlksByByte:
|
|
lbz r.6,0(r.4) // Get first byte of 1st SRC wrd
|
|
lbz r.7,1(r.4) // Get second byte of 1st SRC wrd
|
|
stb r.6,0(r.3) // Put first byte of 1st DEST wrd
|
|
stb r.7,1(r.3) // Put second byte of 1st DST wrd
|
|
lbz r.6,2(r.4) // Get third byte of 1st SRC wrd
|
|
lbz r.7,3(r.4) // Get fourth byte of 1st SRC wrd
|
|
stb r.6,2(r.3) // Put third byte of 1st DEST wrd
|
|
stb r.7,3(r.3) // Put fourth byte of 1st DST wrd
|
|
lbz r.6,4(r.4) // Get first byte of 2nd SRC wrd
|
|
lbz r.7,5(r.4) // Get 2nd byte of 2nd SRC wrd
|
|
stb r.6,4(r.3) // Put first byte of 2nd DEST wrd
|
|
stb r.7,5(r.3) // Put second byte of 2nd DST wrd
|
|
lbz r.6,6(r.4) // Get third byte of 2nd SRC wrd
|
|
lbz r.7,7(r.4) // Get fourth byte of 2nd SRC wrd
|
|
stb r.6,6(r.3) // Put third byte of 2nd DEST wrd
|
|
stb r.7,7(r.3) // Put fourth byte of 2nd DST wrd
|
|
lbz r.6,8(r.4) // Get first byte of 3rd SRC wrd
|
|
lbz r.7,9(r.4) // Get second byte of 3rd SRC wrd
|
|
stb r.6,8(r.3) // Put first byte of 3rd DEST wrd
|
|
stb r.7,9(r.3) // Put second byte of 3rd DST wrd
|
|
lbz r.6,10(r.4) // Get third byte of 3rd SRC wrd
|
|
lbz r.7,11(r.4) // Get fourth byte of 3rd SRC wrd
|
|
stb r.6,10(r.3) // Put third byte of 3rd DEST wrd
|
|
stb r.7,11(r.3) // Put fourth byte of 3rd DST wrd
|
|
lbz r.6,12(r.4) // Get first byte of 4th SRC wrd
|
|
lbz r.7,13(r.4) // Get second byte of 4th SRC wrd
|
|
stb r.6,12(r.3) // Put first byte of 4th DEST wrd
|
|
stb r.7,13(r.3) // Put second byte of 4th DST wrd
|
|
lbz r.6,14(r.4) // Get third byte of 4th SRC wrd
|
|
lbz r.7,15(r.4) // Get fourth byte of 4th SRC wrd
|
|
stb r.6,14(r.3) // Put third byte of 4th DEST wrd
|
|
stb r.7,15(r.3) // Put fourth byte of 4th DST wrd
|
|
lbz r.6,16(r.4) // Get first byte of 5th SRC wrd
|
|
lbz r.7,17(r.4) // Get second byte of 5th SRC wrd
|
|
stb r.6,16(r.3) // Put first byte of 5th DEST wrd
|
|
stb r.7,17(r.3) // Put second byte of 5th DST wrd
|
|
lbz r.6,18(r.4) // Get third byte of 5th SRC wrd
|
|
lbz r.7,19(r.4) // Get fourth byte of 5th SRC wrd
|
|
stb r.6,18(r.3) // Put third byte of 5th DEST wrd
|
|
stb r.7,19(r.3) // Put fourth byte of 5th DST wrd
|
|
lbz r.6,20(r.4) // Get first byte of 6th SRC wrd
|
|
lbz r.7,21(r.4) // Get second byte of 6th SRC wrd
|
|
stb r.6,20(r.3) // Put first byte of 6th DEST wrd
|
|
stb r.7,21(r.3) // Put second byte of 6th DST wrd
|
|
lbz r.6,22(r.4) // Get third byte of 6th SRC wrd
|
|
lbz r.7,23(r.4) // Get fourth byte of 6th SRC wrd
|
|
stb r.6,22(r.3) // Put third byte of 6th DEST wrd
|
|
stb r.7,23(r.3) // Put fourth byte of 6th DST wrd
|
|
lbz r.6,24(r.4) // Get first byte of 7th SRC wrd
|
|
lbz r.7,25(r.4) // Get second byte of 7th SRC wrd
|
|
stb r.6,24(r.3) // Put first byte of 7th DEST wrd
|
|
stb r.7,25(r.3) // Put second byte of 7th DST wrd
|
|
lbz r.6,26(r.4) // Get third byte of 7th SRC wrd
|
|
lbz r.7,27(r.4) // Get fourth byte of 7th SRC wrd
|
|
stb r.6,26(r.3) // Put third byte of 7th DEST wrd
|
|
stb r.7,27(r.3) // Put fourth byte of 7th DST wrd
|
|
lbz r.6,28(r.4) // Get first byte of 8th SRC wrd
|
|
lbz r.7,29(r.4) // Get second byte of 8th SRC wrd
|
|
stb r.6,28(r.3) // Put first byte of 8th DEST wrd
|
|
stb r.7,29(r.3) // Put second byte of 8th DST wrd
|
|
lbz r.6,30(r.4) // Get third byte of 8th SRC wrd
|
|
lbz r.7,31(r.4) // Get fourth byte of 8th SRC wrd
|
|
stb r.6,30(r.3) // Put third byte of 8th DEST wrd
|
|
stb r.7,31(r.3) // Put fourth byte of 8th DST wrd
|
|
addi r.4,r.4,32 // Update SRC pointer
|
|
addi r.3,r.3,32 // Update DEST pointer
|
|
cmpw r.4,r.10 // Check for all blocks done
|
|
bne+ FwdMvBlksByByte // Jump if more blocks
|
|
//
|
|
// Move 4-byte blocks with DEST or SRC Byte aligned
|
|
//
|
|
|
|
FwdMvBlksOf4Bytes:
|
|
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
|
sub. r.7,r.5,r.6 // Get 4-byte block count
|
|
add r.10,r.4,r.7 // Get address of last full block
|
|
beq- FwdMoveByByte // Jump if no 4-byte blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
|
|
FwdBlksLpOn4Bytes:
|
|
lbz r.6,0(r.4) // Get first byte of 1st SRC wrd
|
|
lbz r.7,1(r.4) // Get second byte of 1st SRC wrd
|
|
stb r.6,0(r.3) // Put first byte of 1st DEST wrd
|
|
stb r.7,1(r.3) // Put second byte of 1st DST wrd
|
|
lbz r.6,2(r.4) // Get third byte of 1st SRC wrd
|
|
lbz r.7,3(r.4) // Get fourth byte of 1st SRC wrd
|
|
stb r.6,2(r.3) // Put third byte of 1st DEST wrd
|
|
stb r.7,3(r.3) // Put fourth byte of 1st DST wrd
|
|
addi r.4,r.4,4 // Update SRC pointer
|
|
addi r.3,r.3,4 // Update DEST pointer
|
|
cmpw r.4,r.10 // Check for last block
|
|
bne+ FwdBlksLpOn4Bytes // Jump if more blocks
|
|
|
|
b FwdMoveByByte // Jump to complete last bytes
|
|
//
|
|
// Move Memory Backward
|
|
//
|
|
// Check alignment
|
|
//
|
|
|
|
MoveBackward:
|
|
add r.4,r.4,r.5 // Compute ending SRC address
|
|
add r.3,r.3,r.5 // Compute ending DEST address
|
|
cmpwi r.5,4 // Check for less than 4 bytes
|
|
blt- BckMoveByByte // Jump if single byte moves
|
|
xor r.9,r.4,r.3 // Check for same alignment
|
|
andi. r.9,r.9,3 // Isolate alignment bits
|
|
bne- MvBckUnaligned // Jump if different alignments
|
|
//
|
|
// Move Memory Backword - Same SRC and DEST alignment
|
|
//
|
|
// Load and store extra bytes until a word boundary is reached
|
|
//
|
|
|
|
MvBckAligned:
|
|
andi. r.6,r.3,3 // Check alignment type
|
|
beq+ BckBlkDiv // Jump to process 32-Byte blocks
|
|
cmpwi r.6,1 // Check for 1 byte unaligned
|
|
bne+ BckChkFor2 // If not, check next case
|
|
lbz r.7,-1(r.4) // Get unaligned byte
|
|
sub r.5,r.5,r.6 // Decrement LNGTH by unaligned
|
|
stb r.7,-1(r.3) // Store unaligned byte
|
|
b BckUpdateAddrs // Jump to update addresses
|
|
BckChkFor2:
|
|
cmpwi r.6,2 // Check for halfword aligned
|
|
bne+ BckChkFor3 // If not, check next case
|
|
lhz r.7,-2(r.4) // Get unaligned halfword
|
|
sub r.5,r.5,r.6 // Decrement LNGTH by unaligned
|
|
sth r.7,-2(r.3) // Store unaligned halfword
|
|
b BckUpdateAddrs // Jump to update addresses
|
|
BckChkFor3:
|
|
lbz r.8,-1(r.4) // Get unaligned byte
|
|
lhz r.7,-3(r.4) // Get unaligned halfword
|
|
stb r.8,-1(r.3) // Store unaligned byte
|
|
sth r.7,-3(r.3) // Store unaligned halfword
|
|
sub r.5,r.5,r.6 // Decrement LNGTH by unaligned
|
|
BckUpdateAddrs:
|
|
sub r.4,r.4,r.6 // Update the SRC address
|
|
sub r.3,r.3,r.6 // Update the DEST address
|
|
//
|
|
// Divide the block to process into 32-byte blocks
|
|
//
|
|
|
|
BckBlkDiv:
|
|
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
|
sub. r.7,r.5,r.6 // Get full block count
|
|
sub r.10,r.4,r.7 // Get address of last full block
|
|
beq- BckMoveBy4Bytes // Jump if no full blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
//
|
|
// Move 32-byte blocks
|
|
//
|
|
BckMvFullBlks:
|
|
lwz r.6,-4(r.4) // Get 1st SRC word
|
|
lwz r.7,-8(r.4) // Get 2nd SRC word
|
|
stw r.6,-4(r.3) // Store 1st DEST word
|
|
stw r.7,-8(r.3) // Store 2nd DEST word
|
|
lwz r.6,-12(r.4) // Get 3rd SRC word
|
|
lwz r.7,-16(r.4) // Get 4th SRC word
|
|
stw r.6,-12(r.3) // Store 3rd DEST word
|
|
stw r.7,-16(r.3) // Store 4th DEST word
|
|
lwz r.6,-20(r.4) // Get 5th SRC word
|
|
lwz r.7,-24(r.4) // Get 6th SRC word
|
|
stw r.6,-20(r.3) // Store 5th DEST word
|
|
stw r.7,-24(r.3) // Store 6th DEST word
|
|
lwz r.6,-28(r.4) // Get 7th SRC word
|
|
lwz r.7,-32(r.4) // Get 8th SRC word
|
|
stw r.6,-28(r.3) // Store 7th DEST word
|
|
stw r.7,-32(r.3) // Store 8th DEST word
|
|
subi r.4,r.4,32 // Update SRC pointer
|
|
subi r.3,r.3,32 // Update DEST pointer
|
|
cmpw r.4,r.10 // Check for all blocks done
|
|
bne+ BckMvFullBlks // Jump if more blocks
|
|
//
|
|
// Move 4-byte blocks
|
|
//
|
|
|
|
BckMoveBy4Bytes:
|
|
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
|
sub. r.7,r.5,r.6 // Get 4-byte block count
|
|
sub r.10,r.4,r.7 // Get address of last full block
|
|
beq- BckMoveByByte // Jump if no 4-byte blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
|
|
BckLpOn4Bytes:
|
|
lwz r.6,-4(r.4) // Load next set of 4 bytes
|
|
subi r.4,r.4,4 // Get pointer to next SRC block
|
|
stw r.6,-4(r.3) // Store next DEST block
|
|
subi r.3,r.3,4 // Get pointer to next DEST block
|
|
cmpw r.4,r.10 // Check for last block
|
|
bne+ BckLpOn4Bytes // Jump if more blocks
|
|
//
|
|
// Move 1-byte blocks
|
|
//
|
|
|
|
BckMoveByByte:
|
|
cmpwi r.5,0 // Check for no bytes left
|
|
beq+ MvExit // Jump to return if done
|
|
lbz r.6,-1(r.4) // Get 1st SRC byte
|
|
cmpwi r.5,1 // Check for no bytes left
|
|
stb r.6,-1(r.3) // Store 1st DEST byte
|
|
beq+ MvExit // Jump to return if done
|
|
lbz r.6,-2(r.4) // Get 2nd SRC byte
|
|
cmpwi r.5,2 // Check for no bytes left
|
|
stb r.6,-2(r.3) // Store 2nd DEST byte
|
|
beq+ MvExit // Jump to return if done
|
|
lbz r.6,-3(r.4) // Get 3rd SRC byte
|
|
stb r.6,-3(r.3) // Store 3rd byte word
|
|
b MvExit // Jump to return
|
|
//
|
|
// Backward Move - SRC and DEST have different alignments
|
|
//
|
|
|
|
MvBckUnaligned:
|
|
or r.9,r.4,r.3 // Check for either byte unaligned
|
|
andi. r.9,r.9,3 // Isolate alignment
|
|
cmpwi r.9,2 // Check for even result
|
|
bne+ BckMvByteUnaligned // Jump for byte unaligned
|
|
//
|
|
// Divide the blocks to process into 32-byte blocks
|
|
//
|
|
|
|
BckBlkDivUnaligned:
|
|
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
|
sub. r.7,r.5,r.6 // Get full block count
|
|
sub r.10,r.4,r.7 // Get address of last full block
|
|
beq- BckMvHWrdBy4Bytes // Jump if no full blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
//
|
|
// Backward Move - SRC or DEST is halfword aligned, the other is by word
|
|
//
|
|
BckMvByHWord:
|
|
lhz r.6,-2(r.4) // Get 1st 2 bytes of 1st SRC wrd
|
|
lhz r.7,-4(r.4) // Get 2nd 2 bytes of 1st SRC wrd
|
|
sth r.6,-2(r.3) // Put 1st 2 bytes of 1st DST wrd
|
|
sth r.7,-4(r.3) // Put 2nd 2 bytes of 1st DST wrd
|
|
lhz r.6,-6(r.4) // Get 1st 2 bytes of 2nd SRC wrd
|
|
lhz r.7,-8(r.4) // Get 2nd 2 bytes of 2nd SRC wrd
|
|
sth r.6,-6(r.3) // Put 1st 2 bytes of 2nd DST wrd
|
|
sth r.7,-8(r.3) // Put 2nd 2 bytes of 2nd DST wrd
|
|
lhz r.6,-10(r.4) // Get 1st 2 bytes of 3rd SRC wrd
|
|
lhz r.7,-12(r.4) // Get 2nd 2 bytes of 3rd SRC wrd
|
|
sth r.6,-10(r.3) // Put 1st 2 bytes of 3rd DST wrd
|
|
sth r.7,-12(r.3) // Put 2nd 2 bytes of 3rd DST wrd
|
|
lhz r.6,-14(r.4) // Get 1st 2 bytes of 4th SRC wrd
|
|
lhz r.7,-16(r.4) // Get 2nd 2 bytes of 4th SRC wrd
|
|
sth r.6,-14(r.3) // Put 1st 2 bytes of 4th DST wrd
|
|
sth r.7,-16(r.3) // Put 2nd 2 bytes of 4th DST wrd
|
|
lhz r.6,-18(r.4) // Get 1st 2 bytes of 5th SRC wrd
|
|
lhz r.7,-20(r.4) // Get 2nd 2 bytes of 5th SRC wrd
|
|
sth r.6,-18(r.3) // Put 1st 2 bytes of 5th DST wrd
|
|
sth r.7,-20(r.3) // Put 2nd 2 bytes of 5th DST wrd
|
|
lhz r.6,-22(r.4) // Get 1st 2 bytes of 6th SRC wrd
|
|
lhz r.7,-24(r.4) // Get 2nd 2 bytes of 6th SRC wrd
|
|
sth r.6,-22(r.3) // Put 1st 2 bytes of 6th DST wrd
|
|
sth r.7,-24(r.3) // Put 2nd 2 bytes of 6th DST wrd
|
|
lhz r.6,-26(r.4) // Get 1st 2 bytes of 7th SRC wrd
|
|
lhz r.7,-28(r.4) // Get 2nd 2 bytes of 7th SRC wrd
|
|
sth r.6,-26(r.3) // Put 1st 2 bytes of 7th DST wrd
|
|
sth r.7,-28(r.3) // Put 2nd 2 bytes of 7th DST wrd
|
|
lhz r.6,-30(r.4) // Get 1st 2 bytes of 8th SRC wrd
|
|
lhz r.7,-32(r.4) // Get 2nd 2 bytes of 8th SRC wrd
|
|
sth r.6,-30(r.3) // Put 1st 2 bytes of 8th DST wrd
|
|
sth r.7,-32(r.3) // Put 2nd 2 bytes of 8th DST wrd
|
|
subi r.4,r.4,32 // Update SRC pointer
|
|
subi r.3,r.3,32 // Update DEST pointer
|
|
cmpw r.4,r.10 // Check for all blocks done
|
|
bne+ BckMvByHWord // Jump if more blocks
|
|
//
|
|
// Move 4-byte blocks with DEST Halfword unaligned
|
|
//
|
|
|
|
BckMvHWrdBy4Bytes:
|
|
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
|
sub. r.7,r.5,r.6 // Get 4-byte block count
|
|
sub r.10,r.4,r.7 // Get address of last full block
|
|
beq- BckMoveByByte // Jump if no 4-byte blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
|
|
BckHWrdLpOn4Bytes:
|
|
lhz r.6,-2(r.4) // Get 1st 2 bytes of 1st SRC wrd
|
|
lhz r.7,-4(r.4) // Get 2nd 2 bytes of 1st SRC wrd
|
|
sth r.6,-2(r.3) // Put 1st 2 bytes of 1st DST wrd
|
|
sth r.7,-4(r.3) // Put 2nd 2 bytes of 1st DST wrd
|
|
subi r.4,r.4,4 // Update SRC pointer
|
|
subi r.3,r.3,4 // Update DEST pointer
|
|
cmpw r.4,r.10 // Check for last block
|
|
bne+ BckHWrdLpOn4Bytes // Jump if more blocks
|
|
|
|
b BckMoveByByte // Jump to complete last bytes
|
|
//
|
|
// Check for both byte unaligned
|
|
//
|
|
|
|
BckMvByteUnaligned:
|
|
and r.9,r.3,r.4 // Check for both byte aligned
|
|
and r.9,r.9,1 // Isolate alignment bits
|
|
bne- BckBlksByByte // Jump if both not byte aligned
|
|
//
|
|
// Divide the blocks to process into 32-byte blocks
|
|
//
|
|
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
|
sub. r.7,r.5,r.6 // Get full block count
|
|
sub r.10,r.4,r.7 // Get address of last full block
|
|
beq- BckMvByteBy4Bytes // Jump if no full blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
//
|
|
// Backward Move - Both SRC and DEST are byte unaligned, but differently
|
|
//
|
|
|
|
BckMvByByte:
|
|
lbz r.6,-1(r.4) // Get first byte of 1st SRC word
|
|
lhz r.7,-3(r.4) // Get mid-h-word of 1st SRC word
|
|
lbz r.8,-4(r.4) // Get last byte of 1st SRC word
|
|
stb r.6,-1(r.3) // Put first byte of 1st DEST wd
|
|
sth r.7,-3(r.3) // Put mid-h-word of 1st DEST wd
|
|
stb r.8,-4(r.3) // Put last byte of 1st DEST wrd
|
|
lbz r.6,-5(r.4) // Get first byte of 2nd SRC word
|
|
lhz r.7,-7(r.4) // Get mid-h-word of 2nd SRC word
|
|
lbz r.8,-8(r.4) // Get last byte of 2nd SRC word
|
|
stb r.6,-5(r.3) // Put first byte of 2nd DEST wd
|
|
sth r.7,-7(r.3) // Put mid-h-word of 2nd DEST wd
|
|
stb r.8,-8(r.3) // Put last byte of 2nd DEST wrd
|
|
lbz r.6,-9(r.4) // Get first byte of 3rd SRC word
|
|
lhz r.7,-11(r.4) // Get mid-h-word of 3rd SRC word
|
|
lbz r.8,-12(r.4) // Get last byte of 3rd SRC word
|
|
stb r.6,-9(r.3) // Put first byte of 3rd DEST wd
|
|
sth r.7,-11(r.3) // Put mid-h-word of 3rd DEST wd
|
|
stb r.8,-12(r.3) // Put last byte of 3rd DEST wrd
|
|
lbz r.6,-13(r.4) // Get first byte of 4th SRC word
|
|
lhz r.7,-15(r.4) // Get mid-h-word of 4th SRC word
|
|
lbz r.8,-16(r.4) // Get last byte of 4th SRC word
|
|
stb r.6,-13(r.3) // Put first byte of 4th DEST wd
|
|
sth r.7,-15(r.3) // Put mid-h-word of 4th DEST wd
|
|
stb r.8,-16(r.3) // Put last byte of 4th DEST wrd
|
|
lbz r.6,-17(r.4) // Get first byte of 5th SRC word
|
|
lhz r.7,-19(r.4) // Get mid-h-word of 5th SRC word
|
|
lbz r.8,-20(r.4) // Get last byte of 5th SRC word
|
|
stb r.6,-17(r.3) // Put first byte of 5th DEST wd
|
|
sth r.7,-19(r.3) // Put mid-h-word of 5th DEST wd
|
|
stb r.8,-20(r.3) // Put last byte of 5th DEST wrd
|
|
lbz r.6,-21(r.4) // Get first byte of 6th SRC word
|
|
lhz r.7,-23(r.4) // Get mid-h-word of 6th SRC word
|
|
lbz r.8,-24(r.4) // Get last byte of 6th SRC word
|
|
stb r.6,-21(r.3) // Put first byte of 6th DEST wd
|
|
sth r.7,-23(r.3) // Put mid-h-word of 6th DEST wd
|
|
stb r.8,-24(r.3) // Put last byte of 6th DEST wrd
|
|
lbz r.6,-25(r.4) // Get first byte of 7th SRC word
|
|
lhz r.7,-27(r.4) // Get mid-h-word of 7th SRC word
|
|
lbz r.8,-28(r.4) // Get last byte of 7th SRC word
|
|
stb r.6,-25(r.3) // Put first byte of 7th DEST wd
|
|
sth r.7,-27(r.3) // Put mid-h-word of 7th DEST wd
|
|
stb r.8,-28(r.3) // Put last byte of 7th DEST wrd
|
|
lbz r.6,-29(r.4) // Get first byte of 8th SRC word
|
|
lhz r.7,-31(r.4) // Get mid-h-word of 8th SRC word
|
|
lbz r.8,-32(r.4) // Get last byte of 8th SRC word
|
|
stb r.6,-29(r.3) // Put first byte of 8th DEST wd
|
|
sth r.7,-31(r.3) // Put mid-h-word of 8th DEST wd
|
|
stb r.8,-32(r.3) // Put last byte of 8th DEST wrd
|
|
subi r.4,r.4,32 // Update SRC pointer
|
|
subi r.3,r.3,32 // Update DEST pointer
|
|
cmpw r.4,r.10 // Check for all blocks done
|
|
bne+ BckMvByByte // Jump if more blocks
|
|
//
|
|
// Move 4-byte blocks with DEST and SRC Byte aligned
|
|
//
|
|
|
|
BckMvByteBy4Bytes:
|
|
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
|
sub. r.7,r.5,r.6 // Get 4-byte block count
|
|
sub r.10,r.4,r.7 // Get address of last full block
|
|
beq- BckMoveByByte // Jump if no 4-byte blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
|
|
BckByteLpOn4Bytes:
|
|
lbz r.6,-1(r.4) // Get first byte of 1st SRC word
|
|
lhz r.7,-3(r.4) // Get mid-h-word of 1st SRC word
|
|
lbz r.8,-4(r.4) // Get last byte of 1st SRC word
|
|
stb r.6,-1(r.3) // Put first byte of 1st DEST wd
|
|
sth r.7,-3(r.3) // Put mid-h-word of 1st DEST wd
|
|
stb r.8,-4(r.3) // Put last byte of 1st DEST wrd
|
|
subi r.4,r.4,4 // Update SRC pointer
|
|
subi r.3,r.3,4 // Update DEST pointer
|
|
cmpw r.4,r.10 // Check for last block
|
|
bne+ BckByteLpOn4Bytes // Jump if more blocks
|
|
|
|
b BckMoveByByte // Jump to complete last bytes
|
|
//
|
|
// Backward Move - Either DEST or SRC byte unaligned but not both
|
|
//
|
|
// Divide the blocks to process into 32-byte blocks
|
|
//
|
|
|
|
BckBlksByByte:
|
|
andi. r.6,r.5,BLKLN-1 // Isolate remainder of LNGTH/32
|
|
sub. r.7,r.5,r.6 // Get full block count
|
|
sub r.10,r.4,r.7 // Get address of last full block
|
|
beq- BckMvBlksOf4Bytes // Jump if no full blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
|
|
BckMvBlksByByte:
|
|
lbz r.6,-1(r.4) // Get first byte of 1st SRC wrd
|
|
lbz r.7,-2(r.4) // Get second byte of 1st SRC wrd
|
|
stb r.6,-1(r.3) // Put first byte of 1st DEST wrd
|
|
stb r.7,-2(r.3) // Put second byte of 1st DST wrd
|
|
lbz r.6,-3(r.4) // Get third byte of 1st SRC wrd
|
|
lbz r.7,-4(r.4) // Get fourth byte of 1st SRC wrd
|
|
stb r.6,-3(r.3) // Put third byte of 1st DEST wrd
|
|
stb r.7,-4(r.3) // Put fourth byte of 1st DST wrd
|
|
lbz r.6,-5(r.4) // Get first byte of 2nd SRC wrd
|
|
lbz r.7,-6(r.4) // Get second byte of 2nd SRC wrd
|
|
stb r.6,-5(r.3) // Put first byte of 2nd DEST wrd
|
|
stb r.7,-6(r.3) // Put second byte of 2nd DST wrd
|
|
lbz r.6,-7(r.4) // Get third byte of 2nd SRC wrd
|
|
lbz r.7,-8(r.4) // Get fourth byte of 2nd SRC wrd
|
|
stb r.6,-7(r.3) // Put third byte of 2nd DEST wrd
|
|
stb r.7,-8(r.3) // Put fourth byte of 2nd DST wrd
|
|
lbz r.6,-9(r.4) // Get first byte of 3rd SRC wrd
|
|
lbz r.7,-10(r.4) // Get second byte of 3rd SRC wrd
|
|
stb r.6,-9(r.3) // Put first byte of 3rd DST wrd
|
|
stb r.7,-10(r.3) // Put second byte of 3rd DST wrd
|
|
lbz r.6,-11(r.4) // Get third byte of 3rd SRC wrd
|
|
lbz r.7,-12(r.4) // Get fourth byte of 3rd SRC wrd
|
|
stb r.6,-11(r.3) // Put third byte of 3rd DEST wrd
|
|
stb r.7,-12(r.3) // Put fourth byte of 3rd DST wrd
|
|
lbz r.6,-13(r.4) // Get first byte of 4th SRC wrd
|
|
lbz r.7,-14(r.4) // Get second byte of 4th SRC wrd
|
|
stb r.6,-13(r.3) // Put first byte of 4th DEST wrd
|
|
stb r.7,-14(r.3) // Put second byte of 4th DST wrd
|
|
lbz r.6,-15(r.4) // Get third byte of 4th SRC wrd
|
|
lbz r.7,-16(r.4) // Get fourth byte of 4th SRC wrd
|
|
stb r.6,-15(r.3) // Put third byte of 4th DEST wrd
|
|
stb r.7,-16(r.3) // Put fourth byte of 4th DST wrd
|
|
lbz r.6,-17(r.4) // Get first byte of 5th SRC wrd
|
|
lbz r.7,-18(r.4) // Get second byte of 5th SRC wrd
|
|
stb r.6,-17(r.3) // Put first byte of 5th DEST wrd
|
|
stb r.7,-18(r.3) // Put second byte of 5th DST wrd
|
|
lbz r.6,-19(r.4) // Get third byte of 5th SRC wrd
|
|
lbz r.7,-20(r.4) // Get fourth byte of 5th SRC wrd
|
|
stb r.6,-19(r.3) // Put third byte of 5th DEST wrd
|
|
stb r.7,-20(r.3) // Put fourth byte of 5th DST wrd
|
|
lbz r.6,-21(r.4) // Get first byte of 6th SRC wrd
|
|
lbz r.7,-22(r.4) // Get second byte of 6th SRC wrd
|
|
stb r.6,-21(r.3) // Put first byte of 6th DEST wrd
|
|
stb r.7,-22(r.3) // Put second byte of 6th DST wrd
|
|
lbz r.6,-23(r.4) // Get third byte of 6th SRC wrd
|
|
lbz r.7,-24(r.4) // Get fourth byte of 6th SRC wrd
|
|
stb r.6,-23(r.3) // Put third byte of 6th DEST wrd
|
|
stb r.7,-24(r.3) // Put fourth byte of 6th DST wrd
|
|
lbz r.6,-25(r.4) // Get first byte of 7th SRC wrd
|
|
lbz r.7,-26(r.4) // Get second byte of 7th SRC wrd
|
|
stb r.6,-25(r.3) // Put first byte of 7th DEST wrd
|
|
stb r.7,-26(r.3) // Put second byte of 7th DST wrd
|
|
lbz r.6,-27(r.4) // Get third byte of 7th SRC wrd
|
|
lbz r.7,-28(r.4) // Get fourth byte of 7th SRC wrd
|
|
stb r.6,-27(r.3) // Put third byte of 7th DEST wrd
|
|
stb r.7,-28(r.3) // Put fourth byte of 7th DST wrd
|
|
lbz r.6,-29(r.4) // Get first byte of 8th SRC wrd
|
|
lbz r.7,-30(r.4) // Get second byte of 8th SRC wrd
|
|
stb r.6,-29(r.3) // Put first byte of 8th DEST wrd
|
|
stb r.7,-30(r.3) // Put second byte of 8th DST wrd
|
|
lbz r.6,-31(r.4) // Get third byte of 8th SRC wrd
|
|
lbz r.7,-32(r.4) // Get fourth byte of 8th SRC wrd
|
|
stb r.6,-31(r.3) // Put third byte of 8th DEST wrd
|
|
stb r.7,-32(r.3) // Put fourth byte of 8th DST wrd
|
|
subi r.4,r.4,32 // Update SRC pointer
|
|
subi r.3,r.3,32 // Update DEST pointer
|
|
cmpw r.4,r.10 // Check for all blocks done
|
|
bne+ BckMvBlksByByte // Jump if more blocks
|
|
//
|
|
// Move 4-byte blocks with DEST or SRC Byte aligned, but not the other
|
|
//
|
|
|
|
BckMvBlksOf4Bytes:
|
|
andi. r.6,r.5,4-1 // Isolate remainder of LNGTH/4
|
|
sub. r.7,r.5,r.6 // Get 4-byte block count
|
|
sub r.10,r.4,r.7 // Get address of last full block
|
|
beq- BckMoveByByte // Jump if no 4-byte blocks
|
|
mr r.5,r.6 // Set Length = remainder
|
|
|
|
BckBlksLpOn4Bytes:
|
|
lbz r.6,-1(r.4) // Get first byte of 1st SRC wrd
|
|
lbz r.7,-2(r.4) // Get second byte of 1st SRC wrd
|
|
stb r.6,-1(r.3) // Put first byte of 1st DEST wrd
|
|
stb r.7,-2(r.3) // Put second byte of 1st DST wrd
|
|
lbz r.6,-3(r.4) // Get third byte of 1st SRC wrd
|
|
lbz r.7,-4(r.4) // Get fourth byte of 1st SRC wrd
|
|
stb r.6,-3(r.3) // Put third byte of 1st DEST wrd
|
|
stb r.7,-4(r.3) // Put fourth byte of 1st DST wrd
|
|
subi r.4,r.4,4 // Update SRC pointer
|
|
subi r.3,r.3,4 // Update DEST pointer
|
|
cmpw r.4,r.10 // Check for last block
|
|
bne+ BckBlksLpOn4Bytes // Jump if more blocks
|
|
|
|
b BckMoveByByte // Jump to complete last bytes
|
|
//
|
|
// Exit the routine
|
|
//
|
|
MvExit:
|
|
LEAF_EXIT(RtlMoveMemory)
|
|
//
|
|
//
|
|
// .end memcpy
|
|
// .end memmove
|
|
//++
|
|
//
|
|
// VOID
|
|
// RtlZeroMemory (
|
|
// IN PVOID Destination,
|
|
// IN ULONG Length
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This function zeros memory by first aligning the destination
|
|
// address to a longword boundary, and then zeroing 32-byte blocks,
|
|
// followed by 4-byte blocks, followed by any remaining bytes.
|
|
//
|
|
// Arguments:
|
|
//
|
|
// DEST (r.3) - Supplies a pointer to the memory to zero.
|
|
//
|
|
// LENGTH (r.4) - Supplies the length, in bytes, of the memory to be
|
|
// zeroed.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// None.
|
|
//
|
|
//--
|
|
//
|
|
// Define the entry point
|
|
//
|
|
LEAF_ENTRY(RtlZeroMemory)
|
|
//
|
|
// Set Fill pattern as zero
|
|
//
|
|
li r.5,0 // Set pattern as 0
|
|
//
|
|
// Fill Memory with the zeros
|
|
//
|
|
//
|
|
// Zero extra bytes until a word boundary is reached
|
|
//
|
|
cmpwi r.4,4 // Check for less than 3 bytes
|
|
blt- ZeroByByte // Jump to handle small cases
|
|
andi. r.6,r.3,3 // Check alignment type
|
|
beq+ ZBlkDiv // Jump to process 32-Byte blocks
|
|
cmpwi r.6,3 // Check for 1 byte unaligned
|
|
bne+ Zero2 // If not, check next case
|
|
stb r.5,0(r.3) // Store unaligned byte
|
|
li r.6,1
|
|
b ZUpdteAddr // Jump to update addresses
|
|
Zero2:
|
|
cmpwi r.6,2 // Check for halfword aligned
|
|
bne+ Zero3 // If not, check next case
|
|
sth r.5,0(r.3) // Get unaligned halfword
|
|
li r.6,2
|
|
b ZUpdteAddr // Jump to update addresses
|
|
Zero3:
|
|
stb r.5,0(r.3) // Store unaligned byte
|
|
sth r.5,1(r.3) // Store unaligned halfword
|
|
li r.6,3
|
|
ZUpdteAddr:
|
|
sub r.4,r.4,r.6 // Decrement LENGTH by unaligned
|
|
add r.3,r.3,r.6 // Update the DEST address
|
|
//
|
|
// Divide the block to process into 32-byte blocks
|
|
//
|
|
|
|
ZBlkDiv:
|
|
andi. r.6,r.4,BLKLN-1 // Isolate remainder of LENGTH/32
|
|
sub. r.7,r.4,r.6 // Get full block count
|
|
add r.10,r.3,r.7 // Get address of last full block
|
|
beq- ZeroBy4Bytes // Jump if no full blocks
|
|
mr r.4,r.6 // Set Length = Remainder
|
|
//
|
|
// Zero 32-Byte Blocks
|
|
//
|
|
// Check for 32-Byte Boundary, if so use the cache zero
|
|
//
|
|
andi. r.9,r.3,31 // Check for cache boundary
|
|
bne+ BlkZero // Jump if not on cache boundary
|
|
li r.6,0 // Set offset=0
|
|
//
|
|
// Zero using the cache
|
|
//
|
|
|
|
BlkZeroC:
|
|
dcbz r.6,r.3 // Zero 32-byte cache block
|
|
addi r.3,r.3,32 // Increment the DEST address
|
|
cmpw r.3,r.10 // Check for completion
|
|
bne+ BlkZeroC // Jump if more 32-Byte Blk fills
|
|
|
|
b ZeroBy4Bytes // Jump to finish
|
|
//
|
|
// Zero using normal stores
|
|
//
|
|
BlkZero:
|
|
stw r.5,0(r.3) // Store the 1st DEST word
|
|
stw r.5,4(r.3) // Store the 2nd DEST word
|
|
stw r.5,8(r.3) // Store the 3rd DEST word
|
|
stw r.5,12(r.3) // Store the 4th DEST word
|
|
stw r.5,16(r.3) // Store the 5th DEST word
|
|
stw r.5,20(r.3) // Store the 6th DEST word
|
|
stw r.5,24(r.3) // Store the 7th DEST word
|
|
stw r.5,28(r.3) // Store the 8th DEST word
|
|
addi r.3,r.3,32 // Increment the DEST address
|
|
cmpw r.3,r.10 // Check for completion
|
|
bne+ BlkZero // Jump if more 32-Byte Blk fills
|
|
//
|
|
// Zero 4-Byte Blocks
|
|
//
|
|
ZeroBy4Bytes:
|
|
andi. r.6,r.4,3 // Isolate remainder of LENGTH/4
|
|
sub. r.7,r.4,r.6 // Get full block count
|
|
add r.10,r.3,r.7 // Get address of last full block
|
|
beq- ZeroByByte // Jump if no full blocks
|
|
mr r.4,r.6 // Set Length = Remainder
|
|
//
|
|
Zero4Bytes:
|
|
stw r.5,0(r.3)
|
|
addi r.3,r.3,4 // Increment the DEST address
|
|
cmpw r.3,r.10 // Check for completion
|
|
bne+ Zero4Bytes // Jump if more 4-Byte Blk fills
|
|
//
|
|
// Zero 1-Byte Blocks
|
|
//
|
|
ZeroByByte:
|
|
cmpwi r.4,0 // Check for completion
|
|
beq+ ZeroExit // Jump if done
|
|
//
|
|
Zero1Byte:
|
|
stb r.5,0(r.3) // Zero 1 byte
|
|
cmpwi r.4,1 // Check for done
|
|
beq+ ZeroExit // Jump if done
|
|
stb r.5,1(r.3) // Zero 1 byte
|
|
cmpwi r.4,2 // Check for done
|
|
beq+ ZeroExit // Jump if done
|
|
stb r.5,2(r.3) // Zero 1 Byte
|
|
//
|
|
// Exit
|
|
//
|
|
ZeroExit:
|
|
LEAF_EXIT(RtlZeroMemory)
|
|
//
|
|
//++
|
|
//
|
|
// VOID
|
|
// RtlFillMemory (
|
|
// IN PVOID Destination,
|
|
// IN ULONG Length,
|
|
// IN UCHAR Fill
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This function fills memory by first aligning the destination
|
|
// address to a longword boundary, and then filling 32-byte blocks,
|
|
// followed by 4-byte blocks, followed by any remaining bytes.
|
|
//
|
|
// Arguments:
|
|
//
|
|
// DEST (r.3) - Supplies a pointer to the memory to fill.
|
|
//
|
|
// LENGTH (r.4) - Supplies the length, in bytes, of the memory to be
|
|
// filled.
|
|
//
|
|
// PTTRN (r.5) - Supplies the fill byte.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// None.
|
|
//
|
|
// N.B. The entry memset expects the length and fill arguments
|
|
// to be reversed.
|
|
//
|
|
//--
|
|
//
|
|
// Define the entry point
|
|
//
|
|
LEAF_ENTRY(memset)
|
|
//
|
|
// Reverse length and fill arguments
|
|
//
|
|
mr r.6,r.4 // Temporary save PTTRN
|
|
mr r.4,r.5 // Move LENGTH to correct reg
|
|
mr r.5,r.6 // Move PTTRN to correct reg
|
|
|
|
ALTERNATE_ENTRY(RtlFillMemory)
|
|
//
|
|
// Initialize a register with the fill byte duplicated
|
|
//
|
|
andi. r.5,r.5,0xff // Clear leftmost bits
|
|
slwi r.6,r.5,8 // Shift the pattern left
|
|
or r.5,r.6,r.5 // Create 1st copy of the byte
|
|
slwi r.6,r.5,16 // Shift the pattern left
|
|
or r.5,r.6,r.5 // Create the filled register
|
|
//
|
|
// Fill Memory with the pattern
|
|
//
|
|
//
|
|
// Fill extra bytes until a word boundary is reached
|
|
//
|
|
|
|
RtlpFillMemory:
|
|
cmpwi r.4,4 // Check for less than 3 bytes
|
|
blt- FillByByte // Jump to handle small cases
|
|
andi. r.6,r.3,3 // Check alignment type
|
|
beq+ BlkDiv // Jump to process 32-Byte blocks
|
|
cmpwi r.6,3 // Check for 1 byte unaligned
|
|
bne+ Fill2 // If not, check next case
|
|
stb r.5,0(r.3) // Store unaligned byte
|
|
li r.6,1
|
|
b UpdteAddr // Jump to update addresses
|
|
Fill2:
|
|
cmpwi r.6,2 // Check for halfword aligned
|
|
bne+ Fill3 // If not, check next case
|
|
sth r.5,0(r.3) // Get unaligned halfword
|
|
li r.6,2
|
|
b UpdteAddr // Jump to update addresses
|
|
Fill3:
|
|
stb r.5,0(r.3) // Store unaligned byte
|
|
sth r.5,1(r.3) // Store unaligned halfword
|
|
li r.6,3
|
|
UpdteAddr:
|
|
sub r.4,r.4,r.6 // Decrement LENGTH by unaligned
|
|
add r.3,r.3,r.6 // Update the DEST address
|
|
//
|
|
// Divide the block to process into 32-byte blocks
|
|
//
|
|
|
|
BlkDiv:
|
|
andi. r.6,r.4,BLKLN-1 // Isolate remainder of LENGTH/32
|
|
sub. r.7,r.4,r.6 // Get full block count
|
|
add r.10,r.3,r.7 // Get address of last full block
|
|
beq- FillBy4Bytes // Jump if no full blocks
|
|
mr r.4,r.6 // Set Length = Remainder
|
|
//
|
|
// Fill 32-Byte Blocks
|
|
//
|
|
BlkFill:
|
|
stw r.5,0(r.3) // Store the 1st DEST word
|
|
stw r.5,4(r.3) // Store the 2nd DEST word
|
|
stw r.5,8(r.3) // Store the 3rd DEST word
|
|
stw r.5,12(r.3) // Store the 4th DEST word
|
|
stw r.5,16(r.3) // Store the 5th DEST word
|
|
stw r.5,20(r.3) // Store the 6th DEST word
|
|
stw r.5,24(r.3) // Store the 7th DEST word
|
|
stw r.5,28(r.3) // Store the 8th DEST word
|
|
addi r.3,r.3,32 // Increment the DEST address
|
|
cmpw r.3,r.10 // Check for completion
|
|
bne+ BlkFill // Jump if more 32-Byte Blk fills
|
|
//
|
|
// Fill 4-Byte Blocks
|
|
//
|
|
FillBy4Bytes:
|
|
andi. r.6,r.4,3 // Isolate remainder of LENGTH/4
|
|
sub. r.7,r.4,r.6 // Get full block count
|
|
add r.10,r.3,r.7 // Get address of last full block
|
|
beq- FillByByte // Jump if no full blocks
|
|
mr r.4,r.6 // Set Length = Remainder
|
|
//
|
|
Fill4Bytes:
|
|
stw r.5,0(r.3)
|
|
addi r.3,r.3,4 // Increment the DEST address
|
|
cmpw r.3,r.10 // Check for completion
|
|
bne+ Fill4Bytes // Jump if more 4-Byte Blk fills
|
|
//
|
|
// Fill 1-Byte Blocks
|
|
//
|
|
FillByByte:
|
|
cmpwi r.4,0 // Check for completion
|
|
beq+ FillExit // Jump if done
|
|
//
|
|
Fill1Byte:
|
|
stb r.5,0(r.3) // Fill 1 byte
|
|
cmpwi r.4,1 // Check for done
|
|
beq+ FillExit // Jump if done
|
|
stb r.5,1(r.3) // Fill 1 byte
|
|
cmpwi r.4,2 // Check for done
|
|
beq+ FillExit // Jump if done
|
|
stb r.5,2(r.3) // Fill 1 Byte
|
|
//
|
|
// Exit
|
|
//
|
|
FillExit:
|
|
LEAF_EXIT(memset)
|
|
// .end RtlFillMemory
|