|
|
title "Hal Copy using Movnti" ;++ ; ;Copyright (c) 2000 Microsoft Corporation ; ;Module Name: ; ; ixmovnti.asm ; ;Abstract: ; ; HAL routine that uses movnti instruction to copy buffer ; similar to RtlMovememory but does not support backwards and ; overlapped move ; Based on a previously tested fast copy by Jim crossland. ;Author: ; Gautham chinya ; Intel Corp ; ;Revision History: ; ;--
.386p
.xlist include callconv.inc ; calling convention macros include mac386.inc .list ; ; Register Definitions (for instruction macros). ;
rEAX equ 0 rECX equ 1 rEDX equ 2 rEBX equ 3 rESP equ 4 rEBP equ 5 rESI equ 6 rEDI equ 7
MEMORY_ALIGNMENT_MASK0 = 63 MEMORY_ALIGNMENT_LOG2_0 = 6
MEMORY_ALIGNMENT_MASK1 = 3 MEMORY_ALIGNMENT_LOG2_1 = 2
sfence macro db 0FH, 0AEH, 0F8H endm
prefetchnta_short macro GeneralReg, Offset db 0FH, 018H, 040H + GeneralReg, Offset endm
prefetchnta_long macro GeneralReg, Offset db 0FH, 018H, 080h + GeneralReg dd Offset endm
movnti_eax macro GeneralReg, Offset db 0FH, 0C3H, 040H + GeneralReg, Offset endm
movnti_eax_0_disp macro GeneralReg db 0FH, 0C3H, 000H + GeneralReg endm
movnti_ebx macro GeneralReg, Offset db 0FH, 0C3H, 058H + GeneralReg, Offset endm
; ; ; Macro that moves 64bytes (1 cache line using movnti (eax and ebx registers) ; ;
movnticopy64bytes macro mov eax, [esi] mov ebx, [esi + 4] movnti_eax_0_disp rEDI movnti_ebx rEDI, 4
mov eax, [esi + 8] mov ebx, [esi + 12] movnti_eax rEDI, 8 movnti_ebx rEDI, 12
mov eax, [esi + 16] mov ebx, [esi + 20] movnti_eax rEDI, 16 movnti_ebx rEDI, 20
mov eax, [esi + 24] mov ebx, [esi + 28] movnti_eax rEDI, 24 movnti_ebx rEDI, 28
mov eax, [esi + 32] mov ebx, [esi + 36] movnti_eax rEDI,32 movnti_ebx rEDI, 36
mov eax, [esi + 40] mov ebx, [esi + 44] movnti_eax rEDI, 40 movnti_ebx rEDI, 44
mov eax, [esi + 48] mov ebx, [esi + 52] movnti_eax rEDI,48 movnti_ebx rEDI, 52
mov eax, [esi + 56] mov ebx, [esi + 60] movnti_eax rEDI, 56 movnti_ebx rEDI, 60 endm
_TEXT$03 SEGMENT DWORD PUBLIC 'CODE' ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING page ,132 subttl "HalpMovntiCopyBuffer" ;++ ; ; VOID ; HalpMovntiCopyBuffer( ; IN PVOID Destination, ; IN PVOID Source , ; IN ULONG Length ; ) ; ; Routine Description: ; ; This function copies buffers ; in 4-byte blocks using movnti. ; ; Arguments: ; ; Destination - Supplies a pointer to the destination of the move. ; ; Source - Supplies a pointer to the memory to move. ; ; Length - Supplies the Length, in bytes, of the memory to be moved. ; ; Return Value: ; ; None. ; ;--
cPublicProc _HalpMovntiCopyBuffer ,3
; Definitions of arguments ; (TOS) = Return address
EmmDestination equ [ebp + 4 + 4] EmmSource equ [ebp + 4 + 8] EmmLength equ [ebp + 4 + 12]
push ebp mov ebp, esp push esi push edi push ebx mov esi, EmmSource mov edi, EmmDestination mov ecx, EmmLength
; ; Before prefetching we must guarantee the TLB is valid. ; mov eax, [esi]
cld
; ;Check if less than 64 bytes ; mov edx, ecx and ecx, MEMORY_ALIGNMENT_MASK0 shr edx, MEMORY_ALIGNMENT_LOG2_0 je Copy4 dec edx je copy64
prefetchnta_short rESI, 128 dec edx je copy128
prefetchnta_short rESI, 192 dec edx je copy192
copyLoop:
prefetchnta_long rESI, 256
movnticopy64bytes lea esi, [esi + 64] lea edi, [edi + 64] dec edx jnz copyLoop
copy192:
movnticopy64bytes lea esi, [esi + 64] lea edi, [edi + 64] copy128:
movnticopy64bytes lea esi, [esi + 64] lea edi, [edi + 64]
copy64:
movnticopy64bytes
or ecx, ecx ; anything less than 64 to do? jz ExitRoutine
prefetchnta_short rESI, 0 ; ;Update pointer for last copy ; lea esi, [esi + 64] lea edi, [edi + 64]
; ;Handle extra bytes here in 32 bit chuncks and then 8-bit bytes ;
Copy4: mov edx, ecx and ecx, MEMORY_ALIGNMENT_MASK1 shr edx, MEMORY_ALIGNMENT_LOG2_1
; ; If the number of 32-bit words to move is non-zero, then do it ; jz RemainingBytes
Copy4Loop: mov eax, [esi] movnti_eax_0_disp rEDI lea esi, [esi+4] lea edi, [edi+4] dec edx jnz Copy4Loop RemainingBytes: or ecx, ecx jz ExitRoutine rep movsb
ExitRoutine:
sfence ;Make all stores globally visible pop ebx pop edi pop esi pop ebp stdRET _HalpMovntiCopyBuffer
stdENDP _HalpMovntiCopyBuffer
_TEXT$03 ends end
|