mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
298 lines
7.7 KiB
298 lines
7.7 KiB
/*
|
|
* Fast bcopy code which supports overlapped copies.
|
|
* Not fully optimized yet.
|
|
*
|
|
* Written by: Kipp Hickman
|
|
*
|
|
* $Source: /proj/sherwood/isms/irix/lib/libc/src/strings/RCS/bcopy.s,v $
|
|
* $Revision: 1.7 $
|
|
* $Date: 1993/11/20 19:23:11 $
|
|
*/
|
|
|
|
#include <kxmips.h>
|
|
|
|
/*
|
|
* char *bcopy(from, to, count);
|
|
* unsigned char *from, *to;
|
|
* unsigned long count;
|
|
*
|
|
* OR
|
|
*
|
|
* void *memcpy/memmove(to, from, count);
|
|
* void *to, *from;
|
|
* unsigned long count;
|
|
*
|
|
* Both functions return "to"
|
|
*/
|
|
|
|
#define MINCOPY 16
|
|
|
|
/* registers used */
|
|
|
|
#define to a0
|
|
#define from a1
|
|
#define count a2
|
|
|
|
LEAF_ENTRY(memcpy)
|
|
ALTERNATE_ENTRY(memmove)
|
|
move a3,to # Save to in a3
|
|
beq count,zero,ret # Test for zero count
|
|
beq from,to,ret # Test for from == to
|
|
|
|
/* use backwards copying code if the from and to regions overlap */
|
|
blt to,from,goforwards # If to < from then use forwards copy
|
|
add v0,from,count # v0 := from + count
|
|
bge to,v0,goforwards # If to >= from + count; no overlap
|
|
b gobackwards # Oh well, go backwards
|
|
|
|
/*****************************************************************************/
|
|
|
|
/*
|
|
* Forward copy code. Check for pointer alignment and try to get both
|
|
* pointers aligned on a long boundary.
|
|
*/
|
|
goforwards:
|
|
/* small byte counts use byte at a time copy */
|
|
blt count,MINCOPY,forwards_bytecopy
|
|
and v0,from,3 # v0 := from & 3
|
|
and v1,to,3 # v1 := to & 3
|
|
beq v0,v1,forwalignable # low bits are identical
|
|
/*
|
|
* Byte at a time copy code. This is used when the pointers are not
|
|
* alignable, when the byte count is small, or when cleaning up any
|
|
* remaining bytes on a larger transfer.
|
|
*/
|
|
forwards_bytecopy:
|
|
beq count,zero,ret # If count is zero, then we are done
|
|
addu v1,from,count # v1 := from + count
|
|
|
|
99: lb v0,0(from) # v0 = *from
|
|
addu from,1 # advance pointer
|
|
sb v0,0(to) # Store byte
|
|
addu to,1 # advance pointer
|
|
bne from,v1,99b # Loop until done
|
|
ret: move v0,a3 # Set v0 to old "to" pointer
|
|
j ra # return to caller
|
|
|
|
/*
|
|
* Pointers are alignable, and may be aligned. Since v0 == v1, we need only
|
|
* check what value v0 has to see how to get aligned. Also, since we have
|
|
* eliminated tiny copies, we know that the count is large enough to
|
|
* encompass the alignment copies.
|
|
*/
|
|
forwalignable:
|
|
beq v0,zero,forwards # If v0==v1 && v0==0 then aligned
|
|
beq v0,1,forw_copy3 # Need to copy 3 bytes to get aligned
|
|
beq v0,2,forw_copy2 # Need to copy 2 bytes to get aligned
|
|
|
|
/* need to copy 1 byte */
|
|
lb v0,0(from) # get one byte
|
|
addu from,1 # advance pointer
|
|
sb v0,0(to) # store one byte
|
|
addu to,1 # advance pointer
|
|
subu count,1 # and reduce count
|
|
b forwards # Now pointers are aligned
|
|
|
|
/* need to copy 2 bytes */
|
|
forw_copy2:
|
|
lh v0,0(from) # get one short
|
|
addu from,2 # advance pointer
|
|
sh v0,0(to) # store one short
|
|
addu to,2 # advance pointer
|
|
subu count,2 # and reduce count
|
|
b forwards
|
|
|
|
/* need to copy 3 bytes */
|
|
forw_copy3:
|
|
lb v0,0(from) # get one byte
|
|
lh v1,1(from) # and one short
|
|
addu from,3 # advance pointer
|
|
sb v0,0(to) # store one byte
|
|
sh v1,1(to) # and one short
|
|
addu to,3 # advance pointer
|
|
subu count,3 # and reduce count
|
|
/* FALLTHROUGH */
|
|
/*
|
|
* Once we are here, the pointers are aligned on long boundaries.
|
|
* Begin copying in large chunks.
|
|
*/
|
|
forwards:
|
|
|
|
/* 32 byte at a time loop */
|
|
forwards_32:
|
|
blt count,32,forwards_16 # do 16 bytes at a time
|
|
lw v0,0(from)
|
|
lw v1,4(from)
|
|
lw t0,8(from)
|
|
lw t1,12(from)
|
|
lw t2,16(from)
|
|
lw t3,20(from)
|
|
lw t4,24(from)
|
|
lw t5,28(from) # Fetch 8*4 bytes
|
|
addu from,32 # advance from pointer now
|
|
sw v0,0(to)
|
|
sw v1,4(to)
|
|
sw t0,8(to)
|
|
sw t1,12(to)
|
|
sw t2,16(to)
|
|
sw t3,20(to)
|
|
sw t4,24(to)
|
|
sw t5,28(to) # Store 8*4 bytes
|
|
addu to,32 # advance to pointer now
|
|
subu count,32 # Reduce count
|
|
b forwards_32 # Try some more
|
|
|
|
/* 16 byte at a time loop */
|
|
forwards_16:
|
|
blt count,16,forwards_4 # Do rest in words
|
|
lw v0,0(from)
|
|
lw v1,4(from)
|
|
lw t0,8(from)
|
|
lw t1,12(from)
|
|
addu from,16 # advance from pointer now
|
|
sw v0,0(to)
|
|
sw v1,4(to)
|
|
sw t0,8(to)
|
|
sw t1,12(to)
|
|
addu to,16 # advance to pointer now
|
|
subu count,16 # Reduce count
|
|
b forwards_16 # Try some more
|
|
|
|
/* 4 bytes at a time loop */
|
|
forwards_4:
|
|
blt count,4,forwards_bytecopy # Do rest
|
|
lw v0,0(from)
|
|
addu from,4 # advance pointer
|
|
sw v0,0(to)
|
|
addu to,4 # advance pointer
|
|
subu count,4
|
|
b forwards_4
|
|
|
|
/*****************************************************************************/
|
|
|
|
/*
|
|
* Backward copy code. Check for pointer alignment and try to get both
|
|
* pointers aligned on a long boundary.
|
|
*/
|
|
gobackwards:
|
|
add from,count # Advance to end + 1
|
|
add to,count # Advance to end + 1
|
|
|
|
/* small byte counts use byte at a time copy */
|
|
blt count,MINCOPY,backwards_bytecopy
|
|
and v0,from,3 # v0 := from & 3
|
|
and v1,to,3 # v1 := to & 3
|
|
beq v0,v1,backalignable # low bits are identical
|
|
/*
|
|
* Byte at a time copy code. This is used when the pointers are not
|
|
* alignable, when the byte count is small, or when cleaning up any
|
|
* remaining bytes on a larger transfer.
|
|
*/
|
|
backwards_bytecopy:
|
|
beq count,zero,ret # If count is zero quit
|
|
subu from,1 # Reduce by one (point at byte)
|
|
subu to,1 # Reduce by one (point at byte)
|
|
subu v1,from,count # v1 := original from - 1
|
|
|
|
99: lb v0,0(from) # v0 = *from
|
|
subu from,1 # backup pointer
|
|
sb v0,0(to) # Store byte
|
|
subu to,1 # backup pointer
|
|
bne from,v1,99b # Loop until done
|
|
move v0,a3 # Set v0 to old "to" pointer
|
|
j ra # return to caller
|
|
|
|
/*
|
|
* Pointers are alignable, and may be aligned. Since v0 == v1, we need only
|
|
* check what value v0 has to see how to get aligned. Also, since we have
|
|
* eliminated tiny copies, we know that the count is large enough to
|
|
* encompass the alignment copies.
|
|
*/
|
|
backalignable:
|
|
beq v0,zero,backwards # If v0==v1 && v0==0 then aligned
|
|
beq v0,3,back_copy3 # Need to copy 3 bytes to get aligned
|
|
beq v0,2,back_copy2 # Need to copy 2 bytes to get aligned
|
|
|
|
/* need to copy 1 byte */
|
|
lb v0,-1(from) # get one byte
|
|
subu from,1 # backup pointer
|
|
sb v0,-1(to) # store one byte
|
|
subu to,1 # backup pointer
|
|
subu count,1 # and reduce count
|
|
b backwards # Now pointers are aligned
|
|
|
|
/* need to copy 2 bytes */
|
|
back_copy2:
|
|
lh v0,-2(from) # get one short
|
|
subu from,2 # backup pointer
|
|
sh v0,-2(to) # store one short
|
|
subu to,2 # backup pointer
|
|
subu count,2 # and reduce count
|
|
b backwards
|
|
|
|
/* need to copy 3 bytes */
|
|
back_copy3:
|
|
lb v0,-1(from) # get one byte
|
|
lh v1,-3(from) # and one short
|
|
subu from,3 # backup pointer
|
|
sb v0,-1(to) # store one byte
|
|
sh v1,-3(to) # and one short
|
|
subu to,3 # backup pointer
|
|
subu count,3 # and reduce count
|
|
/* FALLTHROUGH */
|
|
/*
|
|
* Once we are here, the pointers are aligned on long boundaries.
|
|
* Begin copying in large chunks.
|
|
*/
|
|
backwards:
|
|
|
|
/* 32 byte at a time loop */
|
|
backwards_32:
|
|
blt count,32,backwards_16 # do 16 bytes at a time
|
|
lw v0,-4(from)
|
|
lw v1,-8(from)
|
|
lw t0,-12(from)
|
|
lw t1,-16(from)
|
|
lw t2,-20(from)
|
|
lw t3,-24(from)
|
|
lw t4,-28(from)
|
|
lw t5,-32(from) # Fetch 8*4 bytes
|
|
subu from,32 # backup from pointer now
|
|
sw v0,-4(to)
|
|
sw v1,-8(to)
|
|
sw t0,-12(to)
|
|
sw t1,-16(to)
|
|
sw t2,-20(to)
|
|
sw t3,-24(to)
|
|
sw t4,-28(to)
|
|
sw t5,-32(to) # Store 8*4 bytes
|
|
subu to,32 # backup to pointer now
|
|
subu count,32 # Reduce count
|
|
b backwards_32 # Try some more
|
|
|
|
/* 16 byte at a time loop */
|
|
backwards_16:
|
|
blt count,16,backwards_4 # Do rest in words
|
|
lw v0,-4(from)
|
|
lw v1,-8(from)
|
|
lw t0,-12(from)
|
|
lw t1,-16(from)
|
|
subu from,16 # backup from pointer now
|
|
sw v0,-4(to)
|
|
sw v1,-8(to)
|
|
sw t0,-12(to)
|
|
sw t1,-16(to)
|
|
subu to,16 # backup to pointer now
|
|
subu count,16 # Reduce count
|
|
b backwards_16 # Try some more
|
|
|
|
/* 4 byte at a time loop */
|
|
backwards_4:
|
|
blt count,4,backwards_bytecopy # Do rest
|
|
lw v0,-4(from)
|
|
subu from,4 # backup from pointer
|
|
sw v0,-4(to)
|
|
subu to,4 # backup to pointer
|
|
subu count,4 # Reduce count
|
|
b backwards_4
|
|
.end memcpy
|