|
|
title "User Mode Zero and Move Memory functions" ;++ ; ; Copyright (c) 1989 Microsoft Corporation ; ; Module Name: ; ; movemem.asm ; ; Abstract: ; ; This module implements functions to zero and copy blocks of memory ; ; ; Author: ; ; Steven R. Wood (stevewo) 25-May-1990 ; ; Environment: ; ; User mode only. ; ; Revision History: ; ;-- .386p .xlist include ks386.inc include callconv.inc ; calling convention macros .list
if DBG _DATA SEGMENT DWORD PUBLIC 'DATA'
public _RtlpZeroCount public _RtlpZeroBytes
_RtlpZeroCount dd 0 _RtlpZeroBytes dd 0
ifndef BLDR_KERNEL_RUNTIME _MsgUnalignedPtr db 'RTL: RtlCompare/FillMemoryUlong called with unaligned pointer (%x)\n',0 _MsgUnalignedCount db 'RTL: RtlCompare/FillMemoryUlong called with unaligned count (%x)\n',0 endif
_DATA ENDS
ifndef BLDR_KERNEL_RUNTIME ifdef NTOS_KERNEL_RUNTIME extrn _KdDebuggerEnabled:BYTE endif EXTRNP _DbgBreakPoint,0 extrn _DbgPrint:near endif endif
; ; Alignment parameters for zeroing and moving memory. ;
ZERO_MEMORY_ALIGNMENT = 4 ZERO_MEMORY_ALIGNMENT_LOG2 = 2 ZERO_MEMORY_ALIGNMENT_MASK = ZERO_MEMORY_ALIGNMENT - 1
MEMORY_ALIGNMENT = 4 MEMORY_ALIGNMENT_LOG2 = 2 MEMORY_ALIGNMENT_MASK = MEMORY_ALIGNMENT - 1
; ; Alignment for functions in this module ;
CODE_ALIGNMENT macro align 16 endm
_TEXT$00 SEGMENT PARA PUBLIC 'CODE' ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
page , 132 subttl "RtlCompareMemory" ;++ ; ; ULONG ; RtlCompareMemory ( ; IN PVOID Source1, ; IN PVOID Source2, ; IN ULONG Length ; ) ; ; Routine Description: ; ; This function compares two blocks of memory and returns the number ; of bytes that compared equal. ; ; Arguments: ; ; Source1 (esp+4) - Supplies a pointer to the first block of memory to ; compare. ; ; Source2 (esp+8) - Supplies a pointer to the second block of memory to ; compare. ; ; Length (esp+12) - Supplies the Length, in bytes, of the memory to be ; compared. ; ; Return Value: ; ; The number of bytes that compared equal is returned as the function ; value. If all bytes compared equal, then the length of the orginal ; block of memory is returned. ; ;--
RcmSource1 equ [esp+12] RcmSource2 equ [esp+16] RcmLength equ [esp+20]
CODE_ALIGNMENT cPublicProc _RtlCompareMemory,3 cPublicFpo 3,0
push esi ; save registers push edi ; cld ; clear direction mov esi,RcmSource1 ; (esi) -> first block to compare mov edi,RcmSource2 ; (edi) -> second block to compare
; ; Compare dwords, if any. ;
rcm10: mov ecx,RcmLength ; (ecx) = length in bytes shr ecx,2 ; (ecx) = length in dwords jz rcm20 ; no dwords, try bytes repe cmpsd ; compare dwords jnz rcm40 ; mismatch, go find byte
; ; Compare residual bytes, if any. ;
rcm20: mov ecx,RcmLength ; (ecx) = length in bytes and ecx,3 ; (ecx) = length mod 4 jz rcm30 ; 0 odd bytes, go do dwords repe cmpsb ; compare odd bytes jnz rcm50 ; mismatch, go report how far we got
; ; All bytes in the block match. ;
rcm30: mov eax,RcmLength ; set number of matching bytes pop edi ; restore registers pop esi ; stdRET _RtlCompareMemory
; ; When we come to rcm40, esi (and edi) points to the dword after the ; one which caused the mismatch. Back up 1 dword and find the byte. ; Since we know the dword didn't match, we can assume one byte won't. ;
rcm40: sub esi,4 ; back up sub edi,4 ; back up mov ecx,5 ; ensure that ecx doesn't count out repe cmpsb ; find mismatch byte
; ; When we come to rcm50, esi points to the byte after the one that ; did not match, which is TWO after the last byte that did match. ;
rcm50: dec esi ; back up sub esi,RcmSource1 ; compute bytes that matched mov eax,esi ; pop edi ; restore registers pop esi ; stdRET _RtlCompareMemory
stdENDP _RtlCompareMemory
subttl "RtlCompareMemory" EcmlSource equ [esp + 4 + 4] EcmlLength equ [esp + 4 + 8] EcmlPattern equ [esp + 4 + 12]
; end of arguments
CODE_ALIGNMENT cPublicProc _RtlCompareMemoryUlong ,3
; ; Save the non-volatile registers that we will use, without the benefit of ; a frame pointer. No exception handling in this routine. ;
push edi
; ; Setup the registers for using REP STOS instruction to zero memory. ; ; edi -> memory to zero ; ecx = number of 32-bit words to zero ; edx = number of extra 8-bit bytes to zero at the end (0 - 3) ; eax = value to store in destination ; direction flag is clear for auto-increment ;
mov edi,EcmlSource if DBG ifndef BLDR_KERNEL_RUNTIME test edi,3 jz @F push edi push offset FLAT:_MsgUnalignedPtr call _DbgPrint add esp, 2 * 4 ifdef NTOS_KERNEL_RUNTIME cmp _KdDebuggerEnabled,0 else mov eax,fs:[PcTeb] mov eax,[eax].TebPeb cmp byte ptr [eax].PebBeingDebugged,0 endif je @F call _DbgBreakPoint@0 @@: endif endif mov ecx,EcmlLength mov eax,EcmlPattern shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
; ; If number of 32-bit words to compare is non-zero, then do it. ;
repe scasd je @F sub edi,4 @@: sub edi,EcmlSource mov eax,edi pop edi stdRET _RtlCompareMemoryUlong
stdENDP _RtlCompareMemoryUlong
subttl "RtlFillMemory" ;++ ; ; VOID ; RtlFillMemory ( ; IN PVOID Destination, ; IN ULONG Length, ; IN UCHAR Fill ; ) ; ; Routine Description: ; ; This function fills memory with a byte value. ; ; Arguments: ; ; Destination - Supplies a pointer to the memory to zero. ; ; Length - Supplies the Length, in bytes, of the memory to be zeroed. ; ; Fill - Supplies the byte value to fill memory with. ; ; Return Value: ; ; None. ; ;--
; definitions for arguments ; (TOS) = Return address
EfmDestination equ [esp + 4 + 4] EfmLength equ [esp + 4 + 8] EfmFill equ byte ptr [esp + 4 + 12]
; end of arguments
CODE_ALIGNMENT cPublicProc _RtlFillMemory ,3 cPublicFpo 3,1
; ; Save the non-volatile registers that we will use, without the benefit of ; a frame pointer. No exception handling in this routine. ;
push edi
; ; Setup the registers for using REP STOS instruction to zero memory. ; ; edi -> memory to zero ; ecx = number of 32-bit words to zero ; edx = number of extra 8-bit bytes to zero at the end (0 - 3) ; eax = value to store in destination ; direction flag is clear for auto-increment ;
mov edi,EfmDestination mov ecx,EfmLength mov al,EfmFill mov ah,al shl eax,16 mov al,EfmFill mov ah,al cld
mov edx,ecx and edx,ZERO_MEMORY_ALIGNMENT_MASK shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
; ; If number of 32-bit words to zero is non-zero, then do it. ;
rep stosd
; ; If number of extra 8-bit bytes to zero is non-zero, then do it. In either ; case restore non-volatile registers and return. ;
or ecx,edx jnz @F pop edi stdRET _RtlFillMemory @@: rep stosb pop edi stdRET _RtlFillMemory
stdENDP _RtlFillMemory
subttl "RtlFillMemory" ;++ ; ; VOID ; RtlFillMemoryUlonglong ( ; IN PVOID Destination, ; IN ULONG Length, ; IN ULONG Fill ; ) ; ; Routine Description: ; ; This function fills memory with a 64-bit value. The Destination pointer ; must be aligned on an 8 byte boundary and the low order two bits of the ; Length parameter are ignored. ; ; Arguments: ; ; Destination - Supplies a pointer to the memory to zero. ; ; Length - Supplies the Length, in bytes, of the memory to be zeroed. ; ; Fill - Supplies the 64-bit value to fill memory with. ; ; Return Value: ; ; None. ; ;--
; definitions for arguments ; (TOS) = Return address
EfmlDestination equ [esp + 0ch] EfmlLength equ [esp + 10h] EfmlFillLow equ [esp + 14h] EfmlFillHigh equ [esp + 18h]
; end of arguments
CODE_ALIGNMENT cPublicProc _RtlFillMemoryUlonglong ,4 cPublicFpo 4,1
; ; Save the non-volatile registers that we will use, without the benefit of ; a frame pointer. No exception handling in this routine. ;
push esi push edi
; ; Setup the registers for using REP MOVSD instruction to zero memory. ; ; edi -> memory to fill ; esi -> first 8 byte chunk of the memory destination to fill ; ecx = number of 32-bit words to zero ; eax = value to store in destination ; direction flag is clear for auto-increment ;
mov ecx,EfmlLength ; # of bytes mov esi,EfmlDestination ; Destination pointer
if DBG ifndef BLDR_KERNEL_RUNTIME test ecx,7 jz @F push ecx push offset FLAT:_MsgUnalignedPtr call _DbgPrint add esp, 2 * 4 mov ecx,EfmlLength ; # of bytes ifdef NTOS_KERNEL_RUNTIME cmp _KdDebuggerEnabled,0 else mov eax,fs:[PcTeb] mov eax,[eax].TebPeb cmp byte ptr [eax].PebBeingDebugged,0 endif je @F call _DbgBreakPoint@0 @@:
test esi,3 jz @F push esi push offset FLAT:_MsgUnalignedPtr call _DbgPrint add esp, 2 * 4 ifdef NTOS_KERNEL_RUNTIME cmp _KdDebuggerEnabled,0 else mov eax,fs:[PcTeb] mov eax,[eax].TebPeb cmp byte ptr [eax].PebBeingDebugged,0 endif je @F call _DbgBreakPoint@0 @@: endif endif mov eax,EfmlFillLow ; get low portion of the fill arg shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2 ; convert bytes to dwords sub ecx,2 ; doing the 1st one by hand mov [esi],eax ; fill 1st highpart mov eax,EfmlFillHigh ; get high portion of the fill arg lea edi,[esi+08] ; initialize the dest pointer mov [esi+04],eax ; fill 1st lowpart
rep movsd ; ripple the rest
pop edi pop esi stdRET _RtlFillMemoryUlonglong
stdENDP _RtlFillMemoryUlonglong
subttl "RtlZeroMemory" ;++ ; ; VOID ; RtlFillMemoryUlong ( ; IN PVOID Destination, ; IN ULONG Length, ; IN ULONG Fill ; ) ; ; Routine Description: ; ; This function fills memory with a 32-bit value. The Destination pointer ; must be aligned on a 4 byte boundary and the low order two bits of the ; Length parameter are ignored. ; ; Arguments: ; ; Destination - Supplies a pointer to the memory to zero. ; ; Length - Supplies the Length, in bytes, of the memory to be zeroed. ; ; Fill - Supplies the 32-bit value to fill memory with. ; ; Return Value: ; ; None. ; ;--
; definitions for arguments ; (TOS) = Return address
EfmlDestination equ [esp + 4 + 4] EfmlLength equ [esp + 4 + 8] EfmlFill equ [esp + 4 + 12]
; end of arguments
CODE_ALIGNMENT cPublicProc _RtlFillMemoryUlong ,3 cPublicFpo 3,1
; ; Save the non-volatile registers that we will use, without the benefit of ; a frame pointer. No exception handling in this routine. ;
push edi
; ; Setup the registers for using REP STOS instruction to zero memory. ; ; edi -> memory to zero ; ecx = number of 32-bit words to zero ; edx = number of extra 8-bit bytes to zero at the end (0 - 3) ; eax = value to store in destination ; direction flag is clear for auto-increment ;
mov edi,EfmlDestination if DBG ifndef BLDR_KERNEL_RUNTIME test edi,3 jz @F push edi push offset FLAT:_MsgUnalignedPtr call _DbgPrint add esp, 2 * 4 ifdef NTOS_KERNEL_RUNTIME cmp _KdDebuggerEnabled,0 else mov eax,fs:[PcTeb] mov eax,[eax].TebPeb cmp byte ptr [eax].PebBeingDebugged,0 endif je @F call _DbgBreakPoint@0 @@: endif endif mov ecx,EfmlLength mov eax,EfmlFill shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
; ; If number of 32-bit words to zero is non-zero, then do it. ;
rep stosd
pop edi stdRET _RtlFillMemoryUlong
stdENDP _RtlFillMemoryUlong
subttl "RtlZeroMemory" ;++ ; ; VOID ; RtlZeroMemory ( ; IN PVOID Destination, ; IN ULONG Length ; ) ; ; Routine Description: ; ; This function zeros memory. ; ; Arguments: ; ; Destination - Supplies a pointer to the memory to zero. ; ; Length - Supplies the Length, in bytes, of the memory to be zeroed. ; ; Return Value: ; ; None. ; ;--
; definitions for arguments ; (TOS) = Return address
EzmDestination equ [esp + 4 + 4] EzmLength equ [esp + 4 + 8]
; end of arguments
CODE_ALIGNMENT cPublicProc _RtlZeroMemory ,2 cPublicFpo 2,1
; ; Save the non-volatile registers that we will use, without the benefit of ; a frame pointer. No exception handling in this routine. ;
push edi
; ; Setup the registers for using REP STOS instruction to zero memory. ; ; edi -> memory to zero ; ecx = number of 32-bit words to zero ; edx = number of extra 8-bit bytes to zero at the end (0 - 3) ; eax = zero (value to store in destination) ; direction flag is clear for auto-increment ;
mov edi,EzmDestination mov ecx,EzmLength xor eax,eax cld
mov edx,ecx and edx,ZERO_MEMORY_ALIGNMENT_MASK shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
; ; If number of 32-bit words to zero is non-zero, then do it. ;
rep stosd
; ; If number of extra 8-bit bytes to zero is non-zero, then do it. In either ; case restore non-volatile registers and return. ;
or ecx,edx jnz @F pop edi stdRET _RtlZeroMemory @@: rep stosb pop edi stdRET _RtlZeroMemory
stdENDP _RtlZeroMemory
page , 132 subttl "RtlMoveMemory" ;++ ; ; VOID ; RtlMoveMemory ( ; IN PVOID Destination, ; IN PVOID Source OPTIONAL, ; IN ULONG Length ; ) ; ; Routine Description: ; ; This function moves memory either forward or backward, aligned or ; unaligned, in 4-byte blocks, followed by any remaining bytes. ; ; Arguments: ; ; Destination - Supplies a pointer to the destination of the move. ; ; Source - Supplies a pointer to the memory to move. ; ; Length - Supplies the Length, in bytes, of the memory to be moved. ; ; Return Value: ; ; None. ; ;--
; Definitions of arguments ; (TOS) = Return address
EmmDestination equ [esp + 8 + 4] EmmSource equ [esp + 8 + 8] EmmLength equ [esp + 8 + 12]
; End of arguments
CODE_ALIGNMENT cPublicProc _RtlMoveMemory ,3 cPublicFpo 3,2
; ; Save the non-volatile registers that we will use, without the benefit of ; a frame pointer. No exception handling in this routine. ;
push esi push edi
; ; Setup the registers for using REP MOVS instruction to move memory. ; ; esi -> memory to move (NULL implies the destination will be zeroed) ; edi -> destination of move ; ecx = number of 32-bit words to move ; edx = number of extra 8-bit bytes to move at the end (0 - 3) ; direction flag is clear for auto-increment ;
mov esi,EmmSource mov edi,EmmDestination mov ecx,EmmLength if DBG inc _RtlpZeroCount add _RtlpZeroBytes,ecx endif cld
cmp esi,edi ; Special case if Source > Destination jbe overlap
nooverlap: mov edx,ecx and edx,MEMORY_ALIGNMENT_MASK shr ecx,MEMORY_ALIGNMENT_LOG2
; ; If number of 32-bit words to move is non-zero, then do it. ;
rep movsd
; ; If number of extra 8-bit bytes to move is non-zero, then do it. In either ; case restore non-volatile registers and return. ;
or ecx,edx jnz @F pop edi pop esi stdRET _RtlMoveMemory @@: rep movsb
movedone: pop edi pop esi stdRET _RtlMoveMemory
; ; Here to handle special case when Source > Destination and therefore is a ; potential overlapping move. If Source == Destination, then nothing to do. ; Otherwise, increment the Source and Destination pointers by Length and do ; the move backwards, a byte at a time. ;
overlap: je movedone mov eax,edi sub eax,esi cmp ecx,eax jbe nooverlap
std add esi,ecx add edi,ecx dec esi dec edi rep movsb cld jmp short movedone
stdENDP _RtlMoveMemory
subttl "RtlCopyMemoryNonTemporal" ; ; Register Definitions (for instruction macros). ;
rEAX equ 0 rECX equ 1 rEDX equ 2 rEBX equ 3 rESP equ 4 rEBP equ 5 rESI equ 6 rEDI equ 7
MEMORY_ALIGNMENT_MASK0 = 63 MEMORY_ALIGNMENT_LOG2_0 = 6
MEMORY_ALIGNMENT_MASK1 = 3 MEMORY_ALIGNMENT_LOG2_1 = 2
sfence macro db 0FH, 0AEH, 0F8H endm
prefetchnta_short macro GeneralReg, Offset db 0FH, 018H, 040H + GeneralReg, Offset endm
prefetchnta_long macro GeneralReg, Offset db 0FH, 018H, 080h + GeneralReg dd Offset endm
movnti_eax macro GeneralReg, Offset db 0FH, 0C3H, 040H + GeneralReg, Offset endm
movnti_eax_0_disp macro GeneralReg db 0FH, 0C3H, 000H + GeneralReg endm
movnti_ebx macro GeneralReg, Offset db 0FH, 0C3H, 058H + GeneralReg, Offset endm
; ; ; Macro that moves 64bytes (1 cache line using movnti (eax and ebx registers) ; ;
movnticopy64bytes macro mov eax, [esi] mov ebx, [esi + 4] movnti_eax_0_disp rEDI movnti_ebx rEDI, 4
mov eax, [esi + 8] mov ebx, [esi + 12] movnti_eax rEDI, 8 movnti_ebx rEDI, 12
mov eax, [esi + 16] mov ebx, [esi + 20] movnti_eax rEDI, 16 movnti_ebx rEDI, 20
mov eax, [esi + 24] mov ebx, [esi + 28] movnti_eax rEDI, 24 movnti_ebx rEDI, 28
mov eax, [esi + 32] mov ebx, [esi + 36] movnti_eax rEDI,32 movnti_ebx rEDI, 36
mov eax, [esi + 40] mov ebx, [esi + 44] movnti_eax rEDI, 40 movnti_ebx rEDI, 44
mov eax, [esi + 48] mov ebx, [esi + 52] movnti_eax rEDI,48 movnti_ebx rEDI, 52
mov eax, [esi + 56] mov ebx, [esi + 60] movnti_eax rEDI, 56 movnti_ebx rEDI, 60 endm
;++ ; ; VOID ; RtlCopyMemoryNonTemporal( ; IN PVOID Destination, ; IN PVOID Source , ; IN ULONG Length ; ) ; ; Routine Description: ; ; This function copies nonoverlapping from one buffer to another ; using nontemporal moves that do not polute the cache. ; ; Arguments: ; ; Destination - Supplies a pointer to the destination of the move. ; ; Source - Supplies a pointer to the memory to move. ; ; Length - Supplies the Length, in bytes, of the memory to be moved. ; ; Return Value: ; ; None. ; ;--
cPublicProc _RtlCopyMemoryNonTemporal ,3
; Definitions of arguments ; (TOS) = Return address
CPNDestination equ [ebp + 4 + 4] CPNSource equ [ebp + 4 + 8] CPNLength equ [ebp + 4 + 12]
push ebp mov ebp, esp push esi push edi push ebx mov esi, CPNSource mov edi, CPNDestination mov ecx, CPNLength
; ; Before prefetching we must guarantee the TLB is valid. ; mov eax, [esi]
cld
; ;Check if less than 64 bytes ; mov edx, ecx and ecx, MEMORY_ALIGNMENT_MASK0 shr edx, MEMORY_ALIGNMENT_LOG2_0 je Copy4 dec edx je copy64
prefetchnta_short rESI, 128 dec edx je copy128
prefetchnta_short rESI, 192 dec edx je copy192
copyLoop:
prefetchnta_long rESI, 256
movnticopy64bytes lea esi, [esi + 64] lea edi, [edi + 64] dec edx jnz copyLoop
copy192:
movnticopy64bytes lea esi, [esi + 64] lea edi, [edi + 64] copy128:
movnticopy64bytes lea esi, [esi + 64] lea edi, [edi + 64]
copy64:
movnticopy64bytes
or ecx, ecx ; anything less than 64 to do? jz ExitRoutine
prefetchnta_short rESI, 0 ; ;Update pointer for last copy ; lea esi, [esi + 64] lea edi, [edi + 64]
; ;Handle extra bytes here in 32 bit chuncks and then 8-bit bytes ;
Copy4: mov edx, ecx and ecx, MEMORY_ALIGNMENT_MASK1 shr edx, MEMORY_ALIGNMENT_LOG2_1
; ; If the number of 32-bit words to move is non-zero, then do it ; jz RemainingBytes
Copy4Loop: mov eax, [esi] movnti_eax_0_disp rEDI lea esi, [esi+4] lea edi, [edi+4] dec edx jnz Copy4Loop RemainingBytes: or ecx, ecx jz ExitRoutine rep movsb
ExitRoutine:
sfence ;Make all stores globally visible pop ebx pop edi pop esi pop ebp stdRET _RtlCopyMemoryNonTemporal
stdENDP _RtlCopyMemoryNonTemporal
;++ ; ; VOID ; RtlPrefetchCopyMemory( ; IN PVOID Destination, ; IN PVOID Source , ; IN ULONG Length ; ) ; ; Routine Description: ; ; This function copies nonoverlapping from one buffer to another ; prefetching the source 256 bytes ahead. ; ; Arguments: ; ; Destination - Supplies a pointer to the destination of the move. ; ; Source - Supplies a pointer to the memory to move. ; ; Length - Supplies the Length, in bytes, of the memory to be moved. ; ; Return Value: ; ; None. ; ;--
cPublicProc _RtlPrefetchCopyMemory,3
push ebp mov ebp, esp push esi push edi push ebx mov esi, CPNSource mov edi, CPNDestination mov ecx, CPNLength
; ; Before prefetching we must guarantee the TLB is valid. ; mov eax, [esi]
cld
; ;Check if less than 64 bytes ; mov edx, ecx and ecx, MEMORY_ALIGNMENT_MASK0 shr edx, MEMORY_ALIGNMENT_LOG2_0 je short pcmCopy4 dec edx push ecx je short pcmcopy64
prefetchnta_short rESI, 128 dec edx je short pcmcopy128
prefetchnta_short rESI, 192 dec edx je short pcmcopy192
pcmcopyLoop:
prefetchnta_long rESI, 256
mov ecx, 16 rep movsd dec edx jnz short pcmcopyLoop
pcmcopy192:
mov ecx, 16 rep movsd pcmcopy128:
mov ecx, 16 rep movsd
pcmcopy64:
mov ecx, 16 rep movsd
pop ecx or ecx, ecx ; anything less than 64 to do? jz short pcmExitRoutine
prefetchnta_short rESI, 0
; ; Copy last part byte by byte. ;
pcmCopy4: or ecx, ecx jz short pcmExitRoutine rep movsb
pcmExitRoutine:
pop ebx pop edi pop esi pop ebp stdRET _RtlPrefetchCopyMemory
stdENDP _RtlPrefetchCopyMemory
;++ ; ; VOID ; RtlPrefetchCopyMemory32( ; IN PVOID Destination, ; IN PVOID Source , ; IN ULONG Length ; ) ; ; Routine Description: ; ; This function copies nonoverlapping from one buffer to another ; prefetching the source 256 bytes ahead. ; ; Arguments: ; ; Destination - Supplies a pointer to the destination of the move. ; ; Source - Supplies a pointer to the memory to move. ; ; Length - Supplies the Length, in bytes, of the memory to be moved. ; ; Return Value: ; ; None. ; ;--
cPublicProc _RtlPrefetchCopyMemory32,3
push ebp mov ebp, esp push esi push edi push ebx mov esi, CPNSource mov edi, CPNDestination mov ecx, CPNLength
; ; Before prefetching we must guarantee the TLB is valid. ; mov eax, [esi]
cld
; ;Check if less than 64 bytes ; mov edx, ecx and ecx, MEMORY_ALIGNMENT_MASK0 shr edx, MEMORY_ALIGNMENT_LOG2_0 je short pcm32Copy4 dec edx prefetchnta_short rESI, 32 push ecx je short pcm32copy64
prefetchnta_short rESI, 128 prefetchnta_short rESI, 160 dec edx je short pcm32copy128
prefetchnta_short rESI, 192 prefetchnta_short rESI, 124 dec edx je short pcm32copy192
pcm32copyLoop:
prefetchnta_long rESI, 256 prefetchnta_long rESI, 288
mov ecx, 16 rep movsd dec edx jnz short pcm32copyLoop
pcm32copy192:
mov ecx, 16 rep movsd pcm32copy128:
mov ecx, 16 rep movsd
pcm32copy64:
mov ecx, 16 rep movsd
pop ecx or ecx, ecx ; anything less than 64 to do? jz short pcm32ExitRoutine
prefetchnta_short rESI, 0
; ; Copy last part byte by byte. ;
pcm32Copy4: or ecx, ecx jz short pcm32ExitRoutine rep movsb
pcm32ExitRoutine:
pop ebx pop edi pop esi pop ebp stdRET _RtlPrefetchCopyMemory32
stdENDP _RtlPrefetchCopyMemory32
subttl "RtlPrefetchMemoryNonTemporal"
;++ ; ; VOID ; FASTCALL ; RtlPrefetchMemoryNonTemporal( ; IN PVOID Source, ; IN SIZE_T Length ; ) ; ; Routine Description: ; ; This function prefetches memory at Source, for Length bytes into the ; closest cache to the processor. ; ; Arguments: ; ; Source - Supplies a pointer to the memory to prefetch. ; ; Length - Supplies the Length, in bytes, of the memory to be moved. ; ; Return Value: ; ; None. ; ;--
ifndef BLDR_KERNEL_RUNTIME ifdef NTOS_KERNEL_RUNTIME
extrn _KePrefetchNTAGranularity:DWORD
cPublicFastCall RtlPrefetchMemoryNonTemporal ,2
; ; The following instruction will be patched out at boot time if ; this processor supports the prefetch instruction. ;
ret ; patched out at boot.
mov eax, _KePrefetchNTAGranularity ; get d-cache line size @@: prefetchnta_short rECX, 0 ; prefetch line add ecx, eax ; bump prefetch address sub edx, eax ; decrement length ja short @b ; loop if more to get
fstRET RtlPrefetchMemoryNonTemporal ; return
fstENDP RtlPrefetchMemoryNonTemporal
endif endif
_TEXT$00 ends end
|