Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

298 lines
7.7 KiB

/*
* Fast bcopy code which supports overlapped copies.
* Not fully optimized yet.
*
* Written by: Kipp Hickman
*
* $Source: /proj/sherwood/isms/irix/lib/libc/src/strings/RCS/bcopy.s,v $
* $Revision: 1.7 $
* $Date: 1993/11/20 19:23:11 $
*/
#include <kxmips.h>
/*
* char *bcopy(from, to, count);
* unsigned char *from, *to;
* unsigned long count;
*
* OR
*
* void *memcpy/memmove(to, from, count);
* void *to, *from;
* unsigned long count;
*
* Both functions return "to"
*/
#define MINCOPY 16
/* registers used */
#define to a0
#define from a1
#define count a2
LEAF_ENTRY(memcpy)
ALTERNATE_ENTRY(memmove)
move a3,to # Save to in a3
beq count,zero,ret # Test for zero count
beq from,to,ret # Test for from == to
/* use backwards copying code if the from and to regions overlap */
blt to,from,goforwards # If to < from then use forwards copy
add v0,from,count # v0 := from + count
bge to,v0,goforwards # If to >= from + count; no overlap
b gobackwards # Oh well, go backwards
/*****************************************************************************/
/*
* Forward copy code. Check for pointer alignment and try to get both
* pointers aligned on a long boundary.
*/
goforwards:
/* small byte counts use byte at a time copy */
blt count,MINCOPY,forwards_bytecopy
and v0,from,3 # v0 := from & 3
and v1,to,3 # v1 := to & 3
beq v0,v1,forwalignable # low bits are identical
/*
* Byte at a time copy code. This is used when the pointers are not
* alignable, when the byte count is small, or when cleaning up any
* remaining bytes on a larger transfer.
*/
forwards_bytecopy:
beq count,zero,ret # If count is zero, then we are done
addu v1,from,count # v1 := from + count
99: lb v0,0(from) # v0 = *from
addu from,1 # advance pointer
sb v0,0(to) # Store byte
addu to,1 # advance pointer
bne from,v1,99b # Loop until done
ret: move v0,a3 # Set v0 to old "to" pointer
j ra # return to caller
/*
* Pointers are alignable, and may be aligned. Since v0 == v1, we need only
* check what value v0 has to see how to get aligned. Also, since we have
* eliminated tiny copies, we know that the count is large enough to
* encompass the alignment copies.
*/
forwalignable:
beq v0,zero,forwards # If v0==v1 && v0==0 then aligned
beq v0,1,forw_copy3 # Need to copy 3 bytes to get aligned
beq v0,2,forw_copy2 # Need to copy 2 bytes to get aligned
/* need to copy 1 byte */
lb v0,0(from) # get one byte
addu from,1 # advance pointer
sb v0,0(to) # store one byte
addu to,1 # advance pointer
subu count,1 # and reduce count
b forwards # Now pointers are aligned
/* need to copy 2 bytes */
forw_copy2:
lh v0,0(from) # get one short
addu from,2 # advance pointer
sh v0,0(to) # store one short
addu to,2 # advance pointer
subu count,2 # and reduce count
b forwards
/* need to copy 3 bytes */
forw_copy3:
lb v0,0(from) # get one byte
lh v1,1(from) # and one short
addu from,3 # advance pointer
sb v0,0(to) # store one byte
sh v1,1(to) # and one short
addu to,3 # advance pointer
subu count,3 # and reduce count
/* FALLTHROUGH */
/*
* Once we are here, the pointers are aligned on long boundaries.
* Begin copying in large chunks.
*/
forwards:
/* 32 byte at a time loop */
forwards_32:
blt count,32,forwards_16 # do 16 bytes at a time
lw v0,0(from)
lw v1,4(from)
lw t0,8(from)
lw t1,12(from)
lw t2,16(from)
lw t3,20(from)
lw t4,24(from)
lw t5,28(from) # Fetch 8*4 bytes
addu from,32 # advance from pointer now
sw v0,0(to)
sw v1,4(to)
sw t0,8(to)
sw t1,12(to)
sw t2,16(to)
sw t3,20(to)
sw t4,24(to)
sw t5,28(to) # Store 8*4 bytes
addu to,32 # advance to pointer now
subu count,32 # Reduce count
b forwards_32 # Try some more
/* 16 byte at a time loop */
forwards_16:
blt count,16,forwards_4 # Do rest in words
lw v0,0(from)
lw v1,4(from)
lw t0,8(from)
lw t1,12(from)
addu from,16 # advance from pointer now
sw v0,0(to)
sw v1,4(to)
sw t0,8(to)
sw t1,12(to)
addu to,16 # advance to pointer now
subu count,16 # Reduce count
b forwards_16 # Try some more
/* 4 bytes at a time loop */
forwards_4:
blt count,4,forwards_bytecopy # Do rest
lw v0,0(from)
addu from,4 # advance pointer
sw v0,0(to)
addu to,4 # advance pointer
subu count,4
b forwards_4
/*****************************************************************************/
/*
* Backward copy code. Check for pointer alignment and try to get both
* pointers aligned on a long boundary.
*/
gobackwards:
add from,count # Advance to end + 1
add to,count # Advance to end + 1
/* small byte counts use byte at a time copy */
blt count,MINCOPY,backwards_bytecopy
and v0,from,3 # v0 := from & 3
and v1,to,3 # v1 := to & 3
beq v0,v1,backalignable # low bits are identical
/*
* Byte at a time copy code. This is used when the pointers are not
* alignable, when the byte count is small, or when cleaning up any
* remaining bytes on a larger transfer.
*/
backwards_bytecopy:
beq count,zero,ret # If count is zero quit
subu from,1 # Reduce by one (point at byte)
subu to,1 # Reduce by one (point at byte)
subu v1,from,count # v1 := original from - 1
99: lb v0,0(from) # v0 = *from
subu from,1 # backup pointer
sb v0,0(to) # Store byte
subu to,1 # backup pointer
bne from,v1,99b # Loop until done
move v0,a3 # Set v0 to old "to" pointer
j ra # return to caller
/*
* Pointers are alignable, and may be aligned. Since v0 == v1, we need only
* check what value v0 has to see how to get aligned. Also, since we have
* eliminated tiny copies, we know that the count is large enough to
* encompass the alignment copies.
*/
backalignable:
beq v0,zero,backwards # If v0==v1 && v0==0 then aligned
beq v0,3,back_copy3 # Need to copy 3 bytes to get aligned
beq v0,2,back_copy2 # Need to copy 2 bytes to get aligned
/* need to copy 1 byte */
lb v0,-1(from) # get one byte
subu from,1 # backup pointer
sb v0,-1(to) # store one byte
subu to,1 # backup pointer
subu count,1 # and reduce count
b backwards # Now pointers are aligned
/* need to copy 2 bytes */
back_copy2:
lh v0,-2(from) # get one short
subu from,2 # backup pointer
sh v0,-2(to) # store one short
subu to,2 # backup pointer
subu count,2 # and reduce count
b backwards
/* need to copy 3 bytes */
back_copy3:
lb v0,-1(from) # get one byte
lh v1,-3(from) # and one short
subu from,3 # backup pointer
sb v0,-1(to) # store one byte
sh v1,-3(to) # and one short
subu to,3 # backup pointer
subu count,3 # and reduce count
/* FALLTHROUGH */
/*
* Once we are here, the pointers are aligned on long boundaries.
* Begin copying in large chunks.
*/
backwards:
/* 32 byte at a time loop */
backwards_32:
blt count,32,backwards_16 # do 16 bytes at a time
lw v0,-4(from)
lw v1,-8(from)
lw t0,-12(from)
lw t1,-16(from)
lw t2,-20(from)
lw t3,-24(from)
lw t4,-28(from)
lw t5,-32(from) # Fetch 8*4 bytes
subu from,32 # backup from pointer now
sw v0,-4(to)
sw v1,-8(to)
sw t0,-12(to)
sw t1,-16(to)
sw t2,-20(to)
sw t3,-24(to)
sw t4,-28(to)
sw t5,-32(to) # Store 8*4 bytes
subu to,32 # backup to pointer now
subu count,32 # Reduce count
b backwards_32 # Try some more
/* 16 byte at a time loop */
backwards_16:
blt count,16,backwards_4 # Do rest in words
lw v0,-4(from)
lw v1,-8(from)
lw t0,-12(from)
lw t1,-16(from)
subu from,16 # backup from pointer now
sw v0,-4(to)
sw v1,-8(to)
sw t0,-12(to)
sw t1,-16(to)
subu to,16 # backup to pointer now
subu count,16 # Reduce count
b backwards_16 # Try some more
/* 4 byte at a time loop */
backwards_4:
blt count,4,backwards_bytecopy # Do rest
lw v0,-4(from)
subu from,4 # backup from pointer
sw v0,-4(to)
subu to,4 # backup to pointer
subu count,4 # Reduce count
b backwards_4
.end memcpy