mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1327 lines
28 KiB
1327 lines
28 KiB
title "User Mode Zero and Move Memory functions"
|
|
;++
|
|
;
|
|
; Copyright (c) 1989 Microsoft Corporation
|
|
;
|
|
; Module Name:
|
|
;
|
|
; movemem.asm
|
|
;
|
|
; Abstract:
|
|
;
|
|
; This module implements functions to zero and copy blocks of memory
|
|
;
|
|
;
|
|
; Author:
|
|
;
|
|
; Steven R. Wood (stevewo) 25-May-1990
|
|
;
|
|
; Environment:
|
|
;
|
|
; User mode only.
|
|
;
|
|
; Revision History:
|
|
;
|
|
;--
|
|
.386p
|
|
.xlist
|
|
include ks386.inc
|
|
include callconv.inc ; calling convention macros
|
|
.list
|
|
|
|
if DBG
|
|
_DATA SEGMENT DWORD PUBLIC 'DATA'
|
|
|
|
public _RtlpZeroCount
|
|
public _RtlpZeroBytes
|
|
|
|
_RtlpZeroCount dd 0
|
|
_RtlpZeroBytes dd 0
|
|
|
|
ifndef BLDR_KERNEL_RUNTIME
|
|
_MsgUnalignedPtr db 'RTL: RtlCompare/FillMemoryUlong called with unaligned pointer (%x)\n',0
|
|
_MsgUnalignedCount db 'RTL: RtlCompare/FillMemoryUlong called with unaligned count (%x)\n',0
|
|
endif
|
|
|
|
_DATA ENDS
|
|
|
|
ifndef BLDR_KERNEL_RUNTIME
|
|
ifdef NTOS_KERNEL_RUNTIME
|
|
extrn _KdDebuggerEnabled:BYTE
|
|
endif
|
|
EXTRNP _DbgBreakPoint,0
|
|
extrn _DbgPrint:near
|
|
endif
|
|
endif
|
|
|
|
;
|
|
; Alignment parameters for zeroing and moving memory.
|
|
;
|
|
|
|
ZERO_MEMORY_ALIGNMENT = 4
|
|
ZERO_MEMORY_ALIGNMENT_LOG2 = 2
|
|
ZERO_MEMORY_ALIGNMENT_MASK = ZERO_MEMORY_ALIGNMENT - 1
|
|
|
|
MEMORY_ALIGNMENT = 4
|
|
MEMORY_ALIGNMENT_LOG2 = 2
|
|
MEMORY_ALIGNMENT_MASK = MEMORY_ALIGNMENT - 1
|
|
|
|
|
|
;
|
|
; Alignment for functions in this module
|
|
;
|
|
|
|
CODE_ALIGNMENT macro
|
|
align 16
|
|
endm
|
|
|
|
|
|
_TEXT$00 SEGMENT PARA PUBLIC 'CODE'
|
|
ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
|
|
|
|
page , 132
|
|
subttl "RtlCompareMemory"
|
|
;++
|
|
;
|
|
; ULONG
|
|
; RtlCompareMemory (
|
|
; IN PVOID Source1,
|
|
; IN PVOID Source2,
|
|
; IN ULONG Length
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function compares two blocks of memory and returns the number
|
|
; of bytes that compared equal.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Source1 (esp+4) - Supplies a pointer to the first block of memory to
|
|
; compare.
|
|
;
|
|
; Source2 (esp+8) - Supplies a pointer to the second block of memory to
|
|
; compare.
|
|
;
|
|
; Length (esp+12) - Supplies the Length, in bytes, of the memory to be
|
|
; compared.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; The number of bytes that compared equal is returned as the function
|
|
; value. If all bytes compared equal, then the length of the orginal
|
|
; block of memory is returned.
|
|
;
|
|
;--
|
|
|
|
RcmSource1 equ [esp+12]
|
|
RcmSource2 equ [esp+16]
|
|
RcmLength equ [esp+20]
|
|
|
|
CODE_ALIGNMENT
|
|
cPublicProc _RtlCompareMemory,3
|
|
cPublicFpo 3,0
|
|
|
|
push esi ; save registers
|
|
push edi ;
|
|
cld ; clear direction
|
|
mov esi,RcmSource1 ; (esi) -> first block to compare
|
|
mov edi,RcmSource2 ; (edi) -> second block to compare
|
|
|
|
;
|
|
; Compare dwords, if any.
|
|
;
|
|
|
|
rcm10: mov ecx,RcmLength ; (ecx) = length in bytes
|
|
shr ecx,2 ; (ecx) = length in dwords
|
|
jz rcm20 ; no dwords, try bytes
|
|
repe cmpsd ; compare dwords
|
|
jnz rcm40 ; mismatch, go find byte
|
|
|
|
;
|
|
; Compare residual bytes, if any.
|
|
;
|
|
|
|
rcm20: mov ecx,RcmLength ; (ecx) = length in bytes
|
|
and ecx,3 ; (ecx) = length mod 4
|
|
jz rcm30 ; 0 odd bytes, go do dwords
|
|
repe cmpsb ; compare odd bytes
|
|
jnz rcm50 ; mismatch, go report how far we got
|
|
|
|
;
|
|
; All bytes in the block match.
|
|
;
|
|
|
|
rcm30: mov eax,RcmLength ; set number of matching bytes
|
|
pop edi ; restore registers
|
|
pop esi ;
|
|
stdRET _RtlCompareMemory
|
|
|
|
;
|
|
; When we come to rcm40, esi (and edi) points to the dword after the
|
|
; one which caused the mismatch. Back up 1 dword and find the byte.
|
|
; Since we know the dword didn't match, we can assume one byte won't.
|
|
;
|
|
|
|
rcm40: sub esi,4 ; back up
|
|
sub edi,4 ; back up
|
|
mov ecx,5 ; ensure that ecx doesn't count out
|
|
repe cmpsb ; find mismatch byte
|
|
|
|
;
|
|
; When we come to rcm50, esi points to the byte after the one that
|
|
; did not match, which is TWO after the last byte that did match.
|
|
;
|
|
|
|
rcm50: dec esi ; back up
|
|
sub esi,RcmSource1 ; compute bytes that matched
|
|
mov eax,esi ;
|
|
pop edi ; restore registers
|
|
pop esi ;
|
|
stdRET _RtlCompareMemory
|
|
|
|
stdENDP _RtlCompareMemory
|
|
|
|
|
|
subttl "RtlCompareMemory"
|
|
EcmlSource equ [esp + 4 + 4]
|
|
EcmlLength equ [esp + 4 + 8]
|
|
EcmlPattern equ [esp + 4 + 12]
|
|
|
|
; end of arguments
|
|
|
|
CODE_ALIGNMENT
|
|
cPublicProc _RtlCompareMemoryUlong ,3
|
|
|
|
;
|
|
; Save the non-volatile registers that we will use, without the benefit of
|
|
; a frame pointer. No exception handling in this routine.
|
|
;
|
|
|
|
push edi
|
|
|
|
;
|
|
; Setup the registers for using REP STOS instruction to zero memory.
|
|
;
|
|
; edi -> memory to zero
|
|
; ecx = number of 32-bit words to zero
|
|
; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
|
|
; eax = value to store in destination
|
|
; direction flag is clear for auto-increment
|
|
;
|
|
|
|
mov edi,EcmlSource
|
|
if DBG
|
|
ifndef BLDR_KERNEL_RUNTIME
|
|
test edi,3
|
|
jz @F
|
|
push edi
|
|
push offset FLAT:_MsgUnalignedPtr
|
|
call _DbgPrint
|
|
add esp, 2 * 4
|
|
ifdef NTOS_KERNEL_RUNTIME
|
|
cmp _KdDebuggerEnabled,0
|
|
else
|
|
mov eax,fs:[PcTeb]
|
|
mov eax,[eax].TebPeb
|
|
cmp byte ptr [eax].PebBeingDebugged,0
|
|
endif
|
|
je @F
|
|
call _DbgBreakPoint@0
|
|
@@:
|
|
endif
|
|
endif
|
|
mov ecx,EcmlLength
|
|
mov eax,EcmlPattern
|
|
shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
|
|
|
|
|
|
;
|
|
; If number of 32-bit words to compare is non-zero, then do it.
|
|
;
|
|
|
|
repe scasd
|
|
je @F
|
|
sub edi,4
|
|
@@:
|
|
sub edi,EcmlSource
|
|
mov eax,edi
|
|
pop edi
|
|
stdRET _RtlCompareMemoryUlong
|
|
|
|
stdENDP _RtlCompareMemoryUlong
|
|
|
|
|
|
subttl "RtlFillMemory"
|
|
;++
|
|
;
|
|
; VOID
|
|
; RtlFillMemory (
|
|
; IN PVOID Destination,
|
|
; IN ULONG Length,
|
|
; IN UCHAR Fill
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function fills memory with a byte value.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Destination - Supplies a pointer to the memory to zero.
|
|
;
|
|
; Length - Supplies the Length, in bytes, of the memory to be zeroed.
|
|
;
|
|
; Fill - Supplies the byte value to fill memory with.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
; definitions for arguments
|
|
; (TOS) = Return address
|
|
|
|
EfmDestination equ [esp + 4 + 4]
|
|
EfmLength equ [esp + 4 + 8]
|
|
EfmFill equ byte ptr [esp + 4 + 12]
|
|
|
|
; end of arguments
|
|
|
|
CODE_ALIGNMENT
|
|
cPublicProc _RtlFillMemory ,3
|
|
cPublicFpo 3,1
|
|
|
|
;
|
|
; Save the non-volatile registers that we will use, without the benefit of
|
|
; a frame pointer. No exception handling in this routine.
|
|
;
|
|
|
|
push edi
|
|
|
|
;
|
|
; Setup the registers for using REP STOS instruction to zero memory.
|
|
;
|
|
; edi -> memory to zero
|
|
; ecx = number of 32-bit words to zero
|
|
; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
|
|
; eax = value to store in destination
|
|
; direction flag is clear for auto-increment
|
|
;
|
|
|
|
mov edi,EfmDestination
|
|
mov ecx,EfmLength
|
|
mov al,EfmFill
|
|
mov ah,al
|
|
shl eax,16
|
|
mov al,EfmFill
|
|
mov ah,al
|
|
cld
|
|
|
|
mov edx,ecx
|
|
and edx,ZERO_MEMORY_ALIGNMENT_MASK
|
|
shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
|
|
|
|
|
|
;
|
|
; If number of 32-bit words to zero is non-zero, then do it.
|
|
;
|
|
|
|
rep stosd
|
|
|
|
;
|
|
; If number of extra 8-bit bytes to zero is non-zero, then do it. In either
|
|
; case restore non-volatile registers and return.
|
|
;
|
|
|
|
or ecx,edx
|
|
jnz @F
|
|
pop edi
|
|
stdRET _RtlFillMemory
|
|
@@:
|
|
rep stosb
|
|
pop edi
|
|
stdRET _RtlFillMemory
|
|
|
|
stdENDP _RtlFillMemory
|
|
|
|
subttl "RtlFillMemory"
|
|
;++
|
|
;
|
|
; VOID
|
|
; RtlFillMemoryUlonglong (
|
|
; IN PVOID Destination,
|
|
; IN ULONG Length,
|
|
; IN ULONG Fill
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function fills memory with a 64-bit value. The Destination pointer
|
|
; must be aligned on an 8 byte boundary and the low order two bits of the
|
|
; Length parameter are ignored.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Destination - Supplies a pointer to the memory to zero.
|
|
;
|
|
; Length - Supplies the Length, in bytes, of the memory to be zeroed.
|
|
;
|
|
; Fill - Supplies the 64-bit value to fill memory with.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
; definitions for arguments
|
|
; (TOS) = Return address
|
|
|
|
EfmlDestination equ [esp + 0ch]
|
|
EfmlLength equ [esp + 10h]
|
|
EfmlFillLow equ [esp + 14h]
|
|
EfmlFillHigh equ [esp + 18h]
|
|
|
|
; end of arguments
|
|
|
|
CODE_ALIGNMENT
|
|
cPublicProc _RtlFillMemoryUlonglong ,4
|
|
cPublicFpo 4,1
|
|
|
|
;
|
|
; Save the non-volatile registers that we will use, without the benefit of
|
|
; a frame pointer. No exception handling in this routine.
|
|
;
|
|
|
|
push esi
|
|
push edi
|
|
|
|
;
|
|
; Setup the registers for using REP MOVSD instruction to zero memory.
|
|
;
|
|
; edi -> memory to fill
|
|
; esi -> first 8 byte chunk of the memory destination to fill
|
|
; ecx = number of 32-bit words to zero
|
|
; eax = value to store in destination
|
|
; direction flag is clear for auto-increment
|
|
;
|
|
|
|
mov ecx,EfmlLength ; # of bytes
|
|
mov esi,EfmlDestination ; Destination pointer
|
|
|
|
if DBG
|
|
ifndef BLDR_KERNEL_RUNTIME
|
|
test ecx,7
|
|
jz @F
|
|
push ecx
|
|
push offset FLAT:_MsgUnalignedPtr
|
|
call _DbgPrint
|
|
add esp, 2 * 4
|
|
mov ecx,EfmlLength ; # of bytes
|
|
ifdef NTOS_KERNEL_RUNTIME
|
|
cmp _KdDebuggerEnabled,0
|
|
else
|
|
mov eax,fs:[PcTeb]
|
|
mov eax,[eax].TebPeb
|
|
cmp byte ptr [eax].PebBeingDebugged,0
|
|
endif
|
|
je @F
|
|
call _DbgBreakPoint@0
|
|
@@:
|
|
|
|
test esi,3
|
|
jz @F
|
|
push esi
|
|
push offset FLAT:_MsgUnalignedPtr
|
|
call _DbgPrint
|
|
add esp, 2 * 4
|
|
ifdef NTOS_KERNEL_RUNTIME
|
|
cmp _KdDebuggerEnabled,0
|
|
else
|
|
mov eax,fs:[PcTeb]
|
|
mov eax,[eax].TebPeb
|
|
cmp byte ptr [eax].PebBeingDebugged,0
|
|
endif
|
|
je @F
|
|
call _DbgBreakPoint@0
|
|
@@:
|
|
endif
|
|
endif
|
|
mov eax,EfmlFillLow ; get low portion of the fill arg
|
|
shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2 ; convert bytes to dwords
|
|
sub ecx,2 ; doing the 1st one by hand
|
|
mov [esi],eax ; fill 1st highpart
|
|
mov eax,EfmlFillHigh ; get high portion of the fill arg
|
|
lea edi,[esi+08] ; initialize the dest pointer
|
|
mov [esi+04],eax ; fill 1st lowpart
|
|
|
|
rep movsd ; ripple the rest
|
|
|
|
pop edi
|
|
pop esi
|
|
stdRET _RtlFillMemoryUlonglong
|
|
|
|
stdENDP _RtlFillMemoryUlonglong
|
|
|
|
subttl "RtlZeroMemory"
|
|
;++
|
|
;
|
|
; VOID
|
|
; RtlFillMemoryUlong (
|
|
; IN PVOID Destination,
|
|
; IN ULONG Length,
|
|
; IN ULONG Fill
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function fills memory with a 32-bit value. The Destination pointer
|
|
; must be aligned on a 4 byte boundary and the low order two bits of the
|
|
; Length parameter are ignored.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Destination - Supplies a pointer to the memory to zero.
|
|
;
|
|
; Length - Supplies the Length, in bytes, of the memory to be zeroed.
|
|
;
|
|
; Fill - Supplies the 32-bit value to fill memory with.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
; definitions for arguments
|
|
; (TOS) = Return address
|
|
|
|
EfmlDestination equ [esp + 4 + 4]
|
|
EfmlLength equ [esp + 4 + 8]
|
|
EfmlFill equ [esp + 4 + 12]
|
|
|
|
; end of arguments
|
|
|
|
CODE_ALIGNMENT
|
|
cPublicProc _RtlFillMemoryUlong ,3
|
|
cPublicFpo 3,1
|
|
|
|
;
|
|
; Save the non-volatile registers that we will use, without the benefit of
|
|
; a frame pointer. No exception handling in this routine.
|
|
;
|
|
|
|
push edi
|
|
|
|
;
|
|
; Setup the registers for using REP STOS instruction to zero memory.
|
|
;
|
|
; edi -> memory to zero
|
|
; ecx = number of 32-bit words to zero
|
|
; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
|
|
; eax = value to store in destination
|
|
; direction flag is clear for auto-increment
|
|
;
|
|
|
|
mov edi,EfmlDestination
|
|
if DBG
|
|
ifndef BLDR_KERNEL_RUNTIME
|
|
test edi,3
|
|
jz @F
|
|
push edi
|
|
push offset FLAT:_MsgUnalignedPtr
|
|
call _DbgPrint
|
|
add esp, 2 * 4
|
|
ifdef NTOS_KERNEL_RUNTIME
|
|
cmp _KdDebuggerEnabled,0
|
|
else
|
|
mov eax,fs:[PcTeb]
|
|
mov eax,[eax].TebPeb
|
|
cmp byte ptr [eax].PebBeingDebugged,0
|
|
endif
|
|
je @F
|
|
call _DbgBreakPoint@0
|
|
@@:
|
|
endif
|
|
endif
|
|
mov ecx,EfmlLength
|
|
mov eax,EfmlFill
|
|
shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
|
|
|
|
|
|
;
|
|
; If number of 32-bit words to zero is non-zero, then do it.
|
|
;
|
|
|
|
rep stosd
|
|
|
|
pop edi
|
|
stdRET _RtlFillMemoryUlong
|
|
|
|
stdENDP _RtlFillMemoryUlong
|
|
|
|
subttl "RtlZeroMemory"
|
|
;++
|
|
;
|
|
; VOID
|
|
; RtlZeroMemory (
|
|
; IN PVOID Destination,
|
|
; IN ULONG Length
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function zeros memory.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Destination - Supplies a pointer to the memory to zero.
|
|
;
|
|
; Length - Supplies the Length, in bytes, of the memory to be zeroed.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
; definitions for arguments
|
|
; (TOS) = Return address
|
|
|
|
EzmDestination equ [esp + 4 + 4]
|
|
EzmLength equ [esp + 4 + 8]
|
|
|
|
; end of arguments
|
|
|
|
CODE_ALIGNMENT
|
|
cPublicProc _RtlZeroMemory ,2
|
|
cPublicFpo 2,1
|
|
|
|
;
|
|
; Save the non-volatile registers that we will use, without the benefit of
|
|
; a frame pointer. No exception handling in this routine.
|
|
;
|
|
|
|
push edi
|
|
|
|
;
|
|
; Setup the registers for using REP STOS instruction to zero memory.
|
|
;
|
|
; edi -> memory to zero
|
|
; ecx = number of 32-bit words to zero
|
|
; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
|
|
; eax = zero (value to store in destination)
|
|
; direction flag is clear for auto-increment
|
|
;
|
|
|
|
mov edi,EzmDestination
|
|
mov ecx,EzmLength
|
|
xor eax,eax
|
|
cld
|
|
|
|
mov edx,ecx
|
|
and edx,ZERO_MEMORY_ALIGNMENT_MASK
|
|
shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
|
|
|
|
|
|
;
|
|
; If number of 32-bit words to zero is non-zero, then do it.
|
|
;
|
|
|
|
rep stosd
|
|
|
|
;
|
|
; If number of extra 8-bit bytes to zero is non-zero, then do it. In either
|
|
; case restore non-volatile registers and return.
|
|
;
|
|
|
|
or ecx,edx
|
|
jnz @F
|
|
pop edi
|
|
stdRET _RtlZeroMemory
|
|
@@:
|
|
rep stosb
|
|
pop edi
|
|
stdRET _RtlZeroMemory
|
|
|
|
stdENDP _RtlZeroMemory
|
|
|
|
page , 132
|
|
subttl "RtlMoveMemory"
|
|
;++
|
|
;
|
|
; VOID
|
|
; RtlMoveMemory (
|
|
; IN PVOID Destination,
|
|
; IN PVOID Source OPTIONAL,
|
|
; IN ULONG Length
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function moves memory either forward or backward, aligned or
|
|
; unaligned, in 4-byte blocks, followed by any remaining bytes.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Destination - Supplies a pointer to the destination of the move.
|
|
;
|
|
; Source - Supplies a pointer to the memory to move.
|
|
;
|
|
; Length - Supplies the Length, in bytes, of the memory to be moved.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
; Definitions of arguments
|
|
; (TOS) = Return address
|
|
|
|
EmmDestination equ [esp + 8 + 4]
|
|
EmmSource equ [esp + 8 + 8]
|
|
EmmLength equ [esp + 8 + 12]
|
|
|
|
; End of arguments
|
|
|
|
CODE_ALIGNMENT
|
|
cPublicProc _RtlMoveMemory ,3
|
|
cPublicFpo 3,2
|
|
|
|
;
|
|
; Save the non-volatile registers that we will use, without the benefit of
|
|
; a frame pointer. No exception handling in this routine.
|
|
;
|
|
|
|
push esi
|
|
push edi
|
|
|
|
;
|
|
; Setup the registers for using REP MOVS instruction to move memory.
|
|
;
|
|
; esi -> memory to move (NULL implies the destination will be zeroed)
|
|
; edi -> destination of move
|
|
; ecx = number of 32-bit words to move
|
|
; edx = number of extra 8-bit bytes to move at the end (0 - 3)
|
|
; direction flag is clear for auto-increment
|
|
;
|
|
|
|
mov esi,EmmSource
|
|
mov edi,EmmDestination
|
|
mov ecx,EmmLength
|
|
if DBG
|
|
inc _RtlpZeroCount
|
|
add _RtlpZeroBytes,ecx
|
|
endif
|
|
cld
|
|
|
|
cmp esi,edi ; Special case if Source > Destination
|
|
jbe overlap
|
|
|
|
nooverlap:
|
|
mov edx,ecx
|
|
and edx,MEMORY_ALIGNMENT_MASK
|
|
shr ecx,MEMORY_ALIGNMENT_LOG2
|
|
|
|
;
|
|
; If number of 32-bit words to move is non-zero, then do it.
|
|
;
|
|
|
|
rep movsd
|
|
|
|
;
|
|
; If number of extra 8-bit bytes to move is non-zero, then do it. In either
|
|
; case restore non-volatile registers and return.
|
|
;
|
|
|
|
or ecx,edx
|
|
jnz @F
|
|
pop edi
|
|
pop esi
|
|
stdRET _RtlMoveMemory
|
|
@@:
|
|
rep movsb
|
|
|
|
movedone:
|
|
pop edi
|
|
pop esi
|
|
stdRET _RtlMoveMemory
|
|
|
|
;
|
|
; Here to handle special case when Source > Destination and therefore is a
|
|
; potential overlapping move. If Source == Destination, then nothing to do.
|
|
; Otherwise, increment the Source and Destination pointers by Length and do
|
|
; the move backwards, a byte at a time.
|
|
;
|
|
|
|
overlap:
|
|
je movedone
|
|
mov eax,edi
|
|
sub eax,esi
|
|
cmp ecx,eax
|
|
jbe nooverlap
|
|
|
|
std
|
|
add esi,ecx
|
|
add edi,ecx
|
|
dec esi
|
|
dec edi
|
|
rep movsb
|
|
cld
|
|
jmp short movedone
|
|
|
|
stdENDP _RtlMoveMemory
|
|
|
|
subttl "RtlCopyMemoryNonTemporal"
|
|
;
|
|
; Register Definitions (for instruction macros).
|
|
;
|
|
|
|
rEAX equ 0
|
|
rECX equ 1
|
|
rEDX equ 2
|
|
rEBX equ 3
|
|
rESP equ 4
|
|
rEBP equ 5
|
|
rESI equ 6
|
|
rEDI equ 7
|
|
|
|
MEMORY_ALIGNMENT_MASK0 = 63
|
|
MEMORY_ALIGNMENT_LOG2_0 = 6
|
|
|
|
MEMORY_ALIGNMENT_MASK1 = 3
|
|
MEMORY_ALIGNMENT_LOG2_1 = 2
|
|
|
|
sfence macro
|
|
db 0FH, 0AEH, 0F8H
|
|
endm
|
|
|
|
prefetchnta_short macro GeneralReg, Offset
|
|
db 0FH, 018H, 040H + GeneralReg, Offset
|
|
endm
|
|
|
|
prefetchnta_long macro GeneralReg, Offset
|
|
db 0FH, 018H, 080h + GeneralReg
|
|
dd Offset
|
|
endm
|
|
|
|
movnti_eax macro GeneralReg, Offset
|
|
db 0FH, 0C3H, 040H + GeneralReg, Offset
|
|
endm
|
|
|
|
movnti_eax_0_disp macro GeneralReg
|
|
db 0FH, 0C3H, 000H + GeneralReg
|
|
endm
|
|
|
|
movnti_ebx macro GeneralReg, Offset
|
|
db 0FH, 0C3H, 058H + GeneralReg, Offset
|
|
endm
|
|
|
|
;
|
|
;
|
|
; Macro that moves 64bytes (1 cache line using movnti (eax and ebx registers)
|
|
;
|
|
;
|
|
|
|
movnticopy64bytes macro
|
|
mov eax, [esi]
|
|
mov ebx, [esi + 4]
|
|
movnti_eax_0_disp rEDI
|
|
movnti_ebx rEDI, 4
|
|
|
|
mov eax, [esi + 8]
|
|
mov ebx, [esi + 12]
|
|
movnti_eax rEDI, 8
|
|
movnti_ebx rEDI, 12
|
|
|
|
mov eax, [esi + 16]
|
|
mov ebx, [esi + 20]
|
|
movnti_eax rEDI, 16
|
|
movnti_ebx rEDI, 20
|
|
|
|
mov eax, [esi + 24]
|
|
mov ebx, [esi + 28]
|
|
movnti_eax rEDI, 24
|
|
movnti_ebx rEDI, 28
|
|
|
|
mov eax, [esi + 32]
|
|
mov ebx, [esi + 36]
|
|
movnti_eax rEDI,32
|
|
movnti_ebx rEDI, 36
|
|
|
|
mov eax, [esi + 40]
|
|
mov ebx, [esi + 44]
|
|
movnti_eax rEDI, 40
|
|
movnti_ebx rEDI, 44
|
|
|
|
mov eax, [esi + 48]
|
|
mov ebx, [esi + 52]
|
|
movnti_eax rEDI,48
|
|
movnti_ebx rEDI, 52
|
|
|
|
mov eax, [esi + 56]
|
|
mov ebx, [esi + 60]
|
|
movnti_eax rEDI, 56
|
|
movnti_ebx rEDI, 60
|
|
endm
|
|
|
|
|
|
;++
|
|
;
|
|
; VOID
|
|
; RtlCopyMemoryNonTemporal(
|
|
; IN PVOID Destination,
|
|
; IN PVOID Source ,
|
|
; IN ULONG Length
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function copies nonoverlapping from one buffer to another
|
|
; using nontemporal moves that do not polute the cache.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Destination - Supplies a pointer to the destination of the move.
|
|
;
|
|
; Source - Supplies a pointer to the memory to move.
|
|
;
|
|
; Length - Supplies the Length, in bytes, of the memory to be moved.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
cPublicProc _RtlCopyMemoryNonTemporal ,3
|
|
|
|
; Definitions of arguments
|
|
; (TOS) = Return address
|
|
|
|
CPNDestination equ [ebp + 4 + 4]
|
|
CPNSource equ [ebp + 4 + 8]
|
|
CPNLength equ [ebp + 4 + 12]
|
|
|
|
push ebp
|
|
mov ebp, esp
|
|
push esi
|
|
push edi
|
|
push ebx
|
|
|
|
mov esi, CPNSource
|
|
mov edi, CPNDestination
|
|
mov ecx, CPNLength
|
|
|
|
|
|
;
|
|
; Before prefetching we must guarantee the TLB is valid.
|
|
;
|
|
mov eax, [esi]
|
|
|
|
cld
|
|
|
|
;
|
|
;Check if less than 64 bytes
|
|
;
|
|
|
|
mov edx, ecx
|
|
and ecx, MEMORY_ALIGNMENT_MASK0
|
|
shr edx, MEMORY_ALIGNMENT_LOG2_0
|
|
je Copy4
|
|
dec edx
|
|
je copy64
|
|
|
|
prefetchnta_short rESI, 128
|
|
dec edx
|
|
je copy128
|
|
|
|
prefetchnta_short rESI, 192
|
|
dec edx
|
|
je copy192
|
|
|
|
|
|
|
|
copyLoop:
|
|
|
|
prefetchnta_long rESI, 256
|
|
|
|
movnticopy64bytes
|
|
lea esi, [esi + 64]
|
|
lea edi, [edi + 64]
|
|
|
|
dec edx
|
|
jnz copyLoop
|
|
|
|
|
|
copy192:
|
|
|
|
|
|
movnticopy64bytes
|
|
lea esi, [esi + 64]
|
|
lea edi, [edi + 64]
|
|
|
|
copy128:
|
|
|
|
|
|
movnticopy64bytes
|
|
lea esi, [esi + 64]
|
|
lea edi, [edi + 64]
|
|
|
|
copy64:
|
|
|
|
movnticopy64bytes
|
|
|
|
or ecx, ecx ; anything less than 64 to do?
|
|
jz ExitRoutine
|
|
|
|
prefetchnta_short rESI, 0
|
|
;
|
|
;Update pointer for last copy
|
|
;
|
|
|
|
lea esi, [esi + 64]
|
|
lea edi, [edi + 64]
|
|
|
|
;
|
|
;Handle extra bytes here in 32 bit chuncks and then 8-bit bytes
|
|
;
|
|
|
|
Copy4:
|
|
mov edx, ecx
|
|
and ecx, MEMORY_ALIGNMENT_MASK1
|
|
shr edx, MEMORY_ALIGNMENT_LOG2_1
|
|
|
|
;
|
|
; If the number of 32-bit words to move is non-zero, then do it
|
|
;
|
|
jz RemainingBytes
|
|
|
|
Copy4Loop:
|
|
mov eax, [esi]
|
|
movnti_eax_0_disp rEDI
|
|
lea esi, [esi+4]
|
|
lea edi, [edi+4]
|
|
dec edx
|
|
jnz Copy4Loop
|
|
|
|
RemainingBytes:
|
|
or ecx, ecx
|
|
jz ExitRoutine
|
|
rep movsb
|
|
|
|
ExitRoutine:
|
|
|
|
sfence ;Make all stores globally visible
|
|
pop ebx
|
|
pop edi
|
|
pop esi
|
|
pop ebp
|
|
stdRET _RtlCopyMemoryNonTemporal
|
|
|
|
stdENDP _RtlCopyMemoryNonTemporal
|
|
|
|
;++
|
|
;
|
|
; VOID
|
|
; RtlPrefetchCopyMemory(
|
|
; IN PVOID Destination,
|
|
; IN PVOID Source ,
|
|
; IN ULONG Length
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function copies nonoverlapping from one buffer to another
|
|
; prefetching the source 256 bytes ahead.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Destination - Supplies a pointer to the destination of the move.
|
|
;
|
|
; Source - Supplies a pointer to the memory to move.
|
|
;
|
|
; Length - Supplies the Length, in bytes, of the memory to be moved.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
cPublicProc _RtlPrefetchCopyMemory,3
|
|
|
|
push ebp
|
|
mov ebp, esp
|
|
push esi
|
|
push edi
|
|
push ebx
|
|
|
|
mov esi, CPNSource
|
|
mov edi, CPNDestination
|
|
mov ecx, CPNLength
|
|
|
|
|
|
;
|
|
; Before prefetching we must guarantee the TLB is valid.
|
|
;
|
|
mov eax, [esi]
|
|
|
|
cld
|
|
|
|
;
|
|
;Check if less than 64 bytes
|
|
;
|
|
|
|
mov edx, ecx
|
|
and ecx, MEMORY_ALIGNMENT_MASK0
|
|
shr edx, MEMORY_ALIGNMENT_LOG2_0
|
|
je short pcmCopy4
|
|
dec edx
|
|
push ecx
|
|
je short pcmcopy64
|
|
|
|
prefetchnta_short rESI, 128
|
|
dec edx
|
|
je short pcmcopy128
|
|
|
|
prefetchnta_short rESI, 192
|
|
dec edx
|
|
je short pcmcopy192
|
|
|
|
|
|
|
|
pcmcopyLoop:
|
|
|
|
prefetchnta_long rESI, 256
|
|
|
|
mov ecx, 16
|
|
rep movsd
|
|
|
|
dec edx
|
|
jnz short pcmcopyLoop
|
|
|
|
pcmcopy192:
|
|
|
|
mov ecx, 16
|
|
rep movsd
|
|
|
|
pcmcopy128:
|
|
|
|
mov ecx, 16
|
|
rep movsd
|
|
|
|
pcmcopy64:
|
|
|
|
mov ecx, 16
|
|
rep movsd
|
|
|
|
pop ecx
|
|
or ecx, ecx ; anything less than 64 to do?
|
|
jz short pcmExitRoutine
|
|
|
|
prefetchnta_short rESI, 0
|
|
|
|
;
|
|
; Copy last part byte by byte.
|
|
;
|
|
|
|
pcmCopy4:
|
|
or ecx, ecx
|
|
jz short pcmExitRoutine
|
|
rep movsb
|
|
|
|
pcmExitRoutine:
|
|
|
|
pop ebx
|
|
pop edi
|
|
pop esi
|
|
pop ebp
|
|
stdRET _RtlPrefetchCopyMemory
|
|
|
|
stdENDP _RtlPrefetchCopyMemory
|
|
|
|
;++
|
|
;
|
|
; VOID
|
|
; RtlPrefetchCopyMemory32(
|
|
; IN PVOID Destination,
|
|
; IN PVOID Source ,
|
|
; IN ULONG Length
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function copies nonoverlapping from one buffer to another
|
|
; prefetching the source 256 bytes ahead.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Destination - Supplies a pointer to the destination of the move.
|
|
;
|
|
; Source - Supplies a pointer to the memory to move.
|
|
;
|
|
; Length - Supplies the Length, in bytes, of the memory to be moved.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
cPublicProc _RtlPrefetchCopyMemory32,3
|
|
|
|
push ebp
|
|
mov ebp, esp
|
|
push esi
|
|
push edi
|
|
push ebx
|
|
|
|
mov esi, CPNSource
|
|
mov edi, CPNDestination
|
|
mov ecx, CPNLength
|
|
|
|
|
|
;
|
|
; Before prefetching we must guarantee the TLB is valid.
|
|
;
|
|
mov eax, [esi]
|
|
|
|
cld
|
|
|
|
;
|
|
;Check if less than 64 bytes
|
|
;
|
|
|
|
mov edx, ecx
|
|
and ecx, MEMORY_ALIGNMENT_MASK0
|
|
shr edx, MEMORY_ALIGNMENT_LOG2_0
|
|
je short pcm32Copy4
|
|
dec edx
|
|
prefetchnta_short rESI, 32
|
|
push ecx
|
|
je short pcm32copy64
|
|
|
|
prefetchnta_short rESI, 128
|
|
prefetchnta_short rESI, 160
|
|
dec edx
|
|
je short pcm32copy128
|
|
|
|
prefetchnta_short rESI, 192
|
|
prefetchnta_short rESI, 124
|
|
dec edx
|
|
je short pcm32copy192
|
|
|
|
|
|
|
|
pcm32copyLoop:
|
|
|
|
prefetchnta_long rESI, 256
|
|
prefetchnta_long rESI, 288
|
|
|
|
mov ecx, 16
|
|
rep movsd
|
|
|
|
dec edx
|
|
jnz short pcm32copyLoop
|
|
|
|
pcm32copy192:
|
|
|
|
mov ecx, 16
|
|
rep movsd
|
|
|
|
pcm32copy128:
|
|
|
|
mov ecx, 16
|
|
rep movsd
|
|
|
|
pcm32copy64:
|
|
|
|
mov ecx, 16
|
|
rep movsd
|
|
|
|
pop ecx
|
|
or ecx, ecx ; anything less than 64 to do?
|
|
jz short pcm32ExitRoutine
|
|
|
|
prefetchnta_short rESI, 0
|
|
|
|
;
|
|
; Copy last part byte by byte.
|
|
;
|
|
|
|
pcm32Copy4:
|
|
or ecx, ecx
|
|
jz short pcm32ExitRoutine
|
|
rep movsb
|
|
|
|
pcm32ExitRoutine:
|
|
|
|
pop ebx
|
|
pop edi
|
|
pop esi
|
|
pop ebp
|
|
stdRET _RtlPrefetchCopyMemory32
|
|
|
|
stdENDP _RtlPrefetchCopyMemory32
|
|
|
|
subttl "RtlPrefetchMemoryNonTemporal"
|
|
|
|
;++
|
|
;
|
|
; VOID
|
|
; FASTCALL
|
|
; RtlPrefetchMemoryNonTemporal(
|
|
; IN PVOID Source,
|
|
; IN SIZE_T Length
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; This function prefetches memory at Source, for Length bytes into the
|
|
; closest cache to the processor.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; Source - Supplies a pointer to the memory to prefetch.
|
|
;
|
|
; Length - Supplies the Length, in bytes, of the memory to be moved.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
ifndef BLDR_KERNEL_RUNTIME
|
|
ifdef NTOS_KERNEL_RUNTIME
|
|
|
|
extrn _KePrefetchNTAGranularity:DWORD
|
|
|
|
cPublicFastCall RtlPrefetchMemoryNonTemporal ,2
|
|
|
|
;
|
|
; The following instruction will be patched out at boot time if
|
|
; this processor supports the prefetch instruction.
|
|
;
|
|
|
|
ret ; patched out at boot.
|
|
|
|
mov eax, _KePrefetchNTAGranularity ; get d-cache line size
|
|
@@: prefetchnta_short rECX, 0 ; prefetch line
|
|
add ecx, eax ; bump prefetch address
|
|
sub edx, eax ; decrement length
|
|
ja short @b ; loop if more to get
|
|
|
|
fstRET RtlPrefetchMemoryNonTemporal ; return
|
|
|
|
fstENDP RtlPrefetchMemoryNonTemporal
|
|
|
|
endif
|
|
endif
|
|
|
|
_TEXT$00 ends
|
|
end
|