title "Memory functions" ;++ ; ; Copyright (c) 2000 Microsoft Corporation ; ; Module Name: ; ; movemem.asm ; ; Abstract: ; ; This module implements functions to fill, copy , and compare blocks of ; memory. ; ; Author: ; ; David N. Cutler (davec) 6-Jul-2000 ; ; Environment: ; ; Any mode. ; ;-- include ksamd64.inc altentry RtlCopyMemoryAlternate subttl "Compare Memory" ;++ ; ; SIZE_T ; RtlCompareMemory ( ; IN PVOID Source1, ; IN PVOID Source2, ; IN SIZE_T Length ; ) ; ; Routine Description: ; ; This function compares two unaligned blocks of memory and returns the ; number of bytes that compared equal. ; ; Arguments: ; ; Source1 (rcx) - Supplies a pointer to the first block of memory to ; compare. ; ; Source2 (rdx) - Supplies a pointer to the second block of memory to ; compare. ; ; Length (r8) - Supplies the Length, in bytes, of the memory to be ; compared. ; ; Return Value: ; ; The number of bytes that compared equal is returned as the function ; value. If all bytes compared equal, then the length of the orginal ; block of memory is returned. ; ;-- NESTED_ENTRY RtlCompareMemory, _TEXT$00 push_reg rdi ; save nonvolatile registers push_reg rsi ; END_PROLOGUE mov rsi, rcx ; set address of first string mov rdi, rdx ; set address of second string xor edx, ecx ; check if compatible alignment and edx, 07h ; jnz short RlCM50 ; if nz, incompatible alignment cmp r8, 8 ; check if length to align jb short RlCM50 ; if b, insufficient alignment length ; ; Buffer alignment is compatible and there are enough bytes for alignment. ; mov r9, rdi ; copy destination address neg ecx ; compute alignment length and ecx, 07h ; jz short RlCM10 ; if z, buffers already aligned sub r8, rcx ; reduce count by align length repe cmpsb ; compare bytes to alignment jnz short RlCM30 ; if nz, not all bytes matched RlCM10: mov rcx, r8 ; and rcx, -8 ; check if any quarwords to compare jz short RlCM20 ; if z, no quadwords to compare sub r8, rcx ; reduce length by compare count shr rcx, 3 ; compute number of quadwords repe cmpsq ; compare quadwords jz short RlCM20 ; if z, all quadwords compared inc rcx ; increment remaining count sub rsi, 8 ; back up source address sub rdi, 8 ; back up destination address shl rcx, 3 ; compute uncompared bytes RlCM20: add r8, rcx ; compute residual bytes to compare jz short RlCM40 ; if z, all bytes compared equal mov rcx, r8 ; set remaining bytes to compare repe cmpsb ; compare bytes jz short RlCM40 ; if z, all byte compared equal RlCM30: dec rdi ; back up destination address RlCM40: sub rdi, r9 ; compute number of bytes matched mov rax, rdi ; pop rsi ; restore nonvolatile register pop rdi ; ret ; return ; ; Buffer alignment is incompatible or there is less than 8 bytes to compare. ; RlCM50: test r8, r8 ; test if any bytes to compare jz short RlCM60 ; if z, no bytes to compare mov rcx, r8 ; set number of bytes to compare repe cmpsb ; compare bytes jz short RlCM60 ; if z, all bytes compared equal inc rcx ; increment remaining count sub r8, rcx ; compute number of bytes matched RlCM60: mov rax, r8 ; pop rsi ; restore nonvolatile register pop rdi ; ret ; return NESTED_END RtlCompareMemory, _TEXT$00 subttl "Compare Memory 32-bits" ;++ ; ; SIZE_T ; RtlCompareMemoryUlong ( ; IN PVOID Source, ; IN SIZE_T Length, ; IN ULONG Pattern ; ) ; ; Routine Description: ; ; This function compares a block of dword aligned memory with a specified ; pattern 32-bits at a time. ; ; N.B. The low two bits of the length are assumed to be zero and are ; ignored. ; ; Arguments: ; ; Source (rcx) - Supplies a pointer to the block of memory to compare. ; ; Length (rdx) - Supplies the length, in bytes, of the memory to compare. compare. ; ; Pattern (r8d) - Supplies the pattern to be compared against. ; ; Return Value: ; ; The number of bytes that compared equal is returned as the function ; value. If all bytes compared equal, then the length of the orginal ; block of memory is returned. ; ;-- NESTED_ENTRY RtlCompareMemoryUlong, _TEXT$00 push_reg rdi ; save nonvolatile register END_PROLOGUE mov rdi, rcx ; set destination address shr rdx, 2 ; compute number of dwords jz short RlCU10 ; if z, no dwords to compare mov rcx, rdx ; set length of compare in dwords mov eax, r8d ; set comparison pattern repe scasd ; compare memory with pattern jz short RlCU10 ; if z, all dwords compared inc rcx ; increment remaining count sub rdx, rcx ; compute number of bytes matched RlCU10: lea rax, [rdx*4] ; compute successful compare in bytes pop rdi ; restore nonvolatile register ret ; return NESTED_END RtlCompareMemoryUlong, _TEXT$00 subttl "Copy Memory" ;++ ; ; VOID ; RtlCopyMemory ( ; OUT VOID UNALIGNED *Destination, ; IN CONST VOID UNALIGNED * Sources, ; IN SIZE_T Length ; ) ; ; Routine Description: ; ; This function copies nonoverlapping from one unaligned buffer to another. ; ; Arguments: ; ; Destination (rcx) - Supplies a pointer to the destination buffer. ; ; Sources (rdx) - Supplies a pointer to the source buffer. ; ; Length (r8) - Supplies the length, in bytes, of the copy operation. ; ; Return Value: ; ; None. ; ;-- NESTED_ENTRY RtlCopyMemory, _TEXT$00 push_reg rdi ; save nonvolatile registers push_reg rsi ; END_PROLOGUE ALTERNATE_ENTRY RtlCopyMemoryAlternate mov rdi, rcx ; set destination address mov rsi, rdx ; set source address ; ; Check for quadword alignment compatibility. ; xor edx, ecx ; check if compatible alignment and edx, 07h ; jnz short RlCP40 ; is nz, incompatible alignment cmp r8, 8 ; check if 8 bytes to move jb short RlCP20 ; if b, less than 8 bytes to move ; ; Buffer alignment is compatible and there are enough bytes for alignment. ; neg ecx ; compute alignment length and ecx, 07h ; jz short RlCP10 ; if z, buffers already aligned sub r8, rcx ; reduce count by align length rep movsb ; move bytes to alignment ; ; Move 8-byte blocks. ; RlCP10: mov rcx, r8 ; compute number of 8-byte blocks and rcx, -8 ; jz short RlCP20 ; if z, no 8-byte blocks sub r8, rcx ; subtract 8-byte blocks from count shr rcx, 3 ; compute number of 8-byte blocks rep movsq ; move 8-byte blocks ; ; Move residual bytes. ; RlCP20: test r8, r8 ; test if any bytes to move jz short RlCP30 ; if z, no bytes to move mov rcx, r8 ; set remaining byte to move rep movsb ; move bytes to destination RlCP30: pop rsi ; restore nonvolatile registers pop rdi ; ret ; return ; ; The source and destination are not quadword alignment compatible. ; ; Check for doubleword alignment compatibility. ; RlCP40: and edx, 03h ; check if compatibile alignment jnz short RlCP60 ; is nz, incompatible alignment cmp r8, 4 ; check if 4 bytes to move jb short RlCP20 ; if b, less than 4 bytes to move ; ; Buffer alignment is compatible and there are enough bytes for alignment. ; neg ecx ; compute alignment length and ecx, 03h ; jz short RlCP50 ; if z, buffers already aligned sub r8, rcx ; reduce count by align length rep movsb ; move bytes to alignment ; ; Move 4-byte blocks. ; RlCP50: mov rcx, r8 ; compute number of 4-byte blocks and rcx, -4 ; jz short RlCP20 ; if z, no 4-byte blocks sub r8, rcx ; subtract 4-byte blocks from count shr rcx, 2 ; compute number of 4-byte blocks rep movsd ; move 4-byte blocks jmp short RlCP20 ; finish in common code ; ; The source and destination are not doubleword alignment compatible. ; ; Check for word alignment compatibility. ; RlCP60: and edx, 01h ; check if compatibile alignment jnz short RlCP20 ; is nz, incompatible alignment cmp r8, 2 ; check if 2 bytes to move jb short RlCP20 ; if b, less than 2 bytes to move ; ; Buffer alignment is compatible and there are enough bytes for alignment. ; neg ecx ; compute alignment length and ecx, 01h ; jz short RlCP70 ; if z, buffers already aligned sub r8, rcx ; reduce count by align length rep movsb ; move bytes to alignment ; ; Move 2-byte blocks. ; RlCP70: mov rcx, r8 ; compute number of 2-byte blocks and rcx, -2 ; jz short RlCP20 ; if z, no 2-byte blocks sub r8, rcx ; subtract 2-byte blocks from count shr rcx, 1 ; compute number of 2-byte blocks rep movsw ; move 2-byte blocks jmp short RlCP20 ; finish in common code NESTED_END RtlCopyMemory, _TEXT$00 subttl "Copy Memory NonTemporal" ;++ ; ; VOID ; RtlCopyMemoryNonTemporal ( ; OUT VOID UNALIGNED *Destination, ; IN CONST VOID UNALIGNED * Sources, ; IN SIZE_T Length ; ) ; ; Routine Description: ; ; This function copies nonoverlapping from one buffer to another using ; nontemporal moves that do not polute the cache. ; ; Arguments: ; ; Destination (rcx) - Supplies a pointer to the destination buffer. ; ; Sources (rdx) - Supplies a pointer to the source buffer. ; ; Length (r8) - Supplies the length, in bytes, of the copy operation. ; ; Return Value: ; ; None. ; ;-- NESTED_ENTRY RtlCopyMemoryNonTemporal, _TEXT$00 push_reg rdi ; save nonvolatile registers push_reg rsi ; END_PROLOGUE mov rdi, rcx ; set destination address mov rsi, rdx ; set source address cmp r8, 16 ; check if 16 bytes to move jb RlNT50 ; if b, less than 16 bytes to move ; ; Align the destination to a 16-byte boundary. ; neg ecx ; compute alignment length and ecx, 0fh ; jz short RlNT10 ; if z, destination already aligned sub r8, rcx ; reduce count by align length rep movsb ; move bytes to alignment ; ; Move 64-byte blocks. ; RlNT10: mov rax, r8 ; compute number of 64-byte blocks and rax, -64 ; jz short RlNT30 ; if z, no 64-byte blocks to move sub r8, rax ; subtract 64-byte blocks from count RlNT20: prefetchnta 0[rsi] ; prefetch start of source block prefetchnta 63[rsi] ; prefetch end source block movdqu xmm0, [rsi] ; move 64-byte block movdqu xmm1, 16[rsi] ; movdqu xmm2, 32[rsi] ; movdqu xmm3, 48[rsi] ; movntdq [rdi], xmm0 ; movntdq 16[rdi], xmm1 ; movntdq 32[rdi], xmm2 ; movntdq 48[rdi], xmm3 ; add rdi, 64 ; advance destination address add rsi, 64 ; advance source address sub rax, 64 ; subtract number of bytes moved jnz short RlNT20 ; if nz, more 64-byte blocks to move ; ; Move 16-byte blocks. ; RlNT30: mov rax, r8 ; compute number of 16-byte blocks and rax, -16 ; jz short RlNT50 ; if z, no 16-byte blocks sub r8, rax ; subract 16-byte blocks from count RlNT40: movdqu xmm0, [rsi] ; move 16-byte block movntdq [rdi], xmm0 ; add rdi, 16 ; advance destination address add rsi, 16 ; advance source address sub rax, 16 ; subtract number of bytes moved jnz short RlNT40 ; if nz, more 16-byte blocks to move ; ; Move residual bytes. ; RlNT50: test r8, r8 ; test if any bytes to move jz short RlNT60 ; if z, no bytes to move mov rcx, r8 ; set residual bytes to move rep movsb ; move residual bytes RlNT60: sfence ; make sure all stores complete pop rsi ; restore nonvolatile registers pop rdi ; ret ; return NESTED_END RtlCopyMemoryNonTemporal, _TEXT$00 subttl "Fill Memory" ;++ ; ; VOID ; RtlFillMemory ( ; IN VOID UNALIGNED *Destination, ; IN SIZE_T Length, ; IN UCHAR Fill ; ) ; ; Routine Description: ; ; This function fills a block of unaligned memory with a specified pattern. ; ; Arguments: ; ; Destination (rcx) - Supplies a pointer to the memory to fill. ; ; Length (rdx) - Supplies the length, in bytes, of the memory to fill. ; ; Fill (r8d) - Supplies the value to fill memory with. ; ; Return Value: ; ; None. ; ;-- NESTED_ENTRY RtlFillMemory, _TEXT$00 push_reg rdi ; save nonvolatile register END_PROLOGUE mov rdi, rcx ; set destination address mov eax, r8d ; set fill pattern cmp rdx, 8 ; check if 8 bytes to fill jb short RlFM20 ; if b, less than 8 bytes to fill ; ; Fill alignment bytes. ; neg ecx ; compute alignment length and ecx, 07h ; jz short RlFM10 ; if z, buffers already aligned sub rdx, rcx ; reduce count by align length rep stosb ; fill bytes to alignment ; ; Fill 8-byte blocks. ; RlFM10: mov rcx, rdx ; compute number of 8-byte blocks and rcx, -8 ; jz short RlFM20 ; if z, no 8-byte blocks sub rdx, rcx ; subtract 8-byte blocks from count shr rcx, 3 ; compute number of 8-byte blocks mov ah, al ; replicate pattern to dword shl eax, 16 ; mov al, r8b ; mov ah, al ; mov r9, rax ; shl rax, 32 ; or rax, r9 ; rep stosq ; fill 8-byte blocks ; ; Fill residual bytes. ; RlFM20: test rdx, rdx ; test if any bytes to fill jz short RlFM30 ; if z, no bytes to fill mov rcx, rdx ; set remaining byte to fill rep stosb ; fill residual bytes RlFM30: pop rdi ; restore nonvolatile register ret ; return NESTED_END RtlFillMemory, _TEXT$00 subttl "Move Memory" ;++ ; ; VOID ; RtlMoveMemory ( ; OUT VOID UNALIGNED *Destination, ; IN CONST VOID UNALIGNED * Sources, ; IN SIZE_T Length ; ) ; ; Routine Description: ; ; This function copies from one unaligned buffer to another. ; ; Arguments: ; ; Destination (rcx) - Supplies a pointer to the destination buffer. ; ; Sources (rdx) - Supplies a pointer to the source buffer. ; ; Length (r8) - Supplies the length, in bytes, of the copy operation. ; ; Return Value: ; ; None. ; ;-- NESTED_ENTRY RtlMoveMemory, _TEXT$00 push_reg rdi ; save nonvolatile registers push_reg rsi ; END_PROLOGUE cmp rcx, rdx ; check if possible buffer overlap jbe RtlCopyMemoryAlternate ; if be, no overlap possible mov rsi, rdx ; compute ending source address add rsi, r8 ; dec rsi ; cmp rcx, rsi ; check for buffer overlap ja RtlCopyMemoryAlternate ; if g, no overlap possible mov rdi, rcx ; compute ending destination address add rdi, r8 ; dec rdi ; mov rcx, r8 ; set count of bytes to move std ; set direction flag rep movsb ; move bytes backward to destination cld ; clear direction flag pop rsi ; restore nonvolatile registers pop rdi ; ret ; return NESTED_END RtlMoveMemory, _TEXT$00 subttl "Prefetch Memory NonTemporal" ;++ ; ; VOID ; RtlPrefetchMemoryNonTemporal ( ; IN CONST PVOID Source, ; IN SIZE_T Length ; ) ; ; Routine Description: ; ; This function prefetches memory at Source, for Length bytes into the ; closest cache to the processor. ; ; Arguments: ; ; Source (rcx) - Supplies a pointer to the memory to be prefetched. ; ; Length (rdx) - Supplies the length, in bytes, of the operation. ; ; Return Value: ; ; None. ; ;-- LEAF_ENTRY RtlPrefetchMemoryNonTemporal, _TEXT$00 RlPF10: prefetchnta 0[rcx] ; prefetch line add rcx, 64 ; increment address to prefetch sub rdx, 64 ; subtract number of bytes prefetched ja RlPF10 ; if above zero, more bytes to move ret ; return LEAF_END RtlPrefetchMemoryNonTemporal, _TEXT$00 subttl "Zero Memory" ;++ ; ; VOID ; RtlZeroMemory ( ; IN VOID UNALIGNED *Destination, ; IN SIZE_T Length ; ) ; ; Routine Description: ; ; This function fills a block of unaligned memory with zero. ; ; Arguments: ; ; Destination (rcx) - Supplies a pointer to the memory to fill. ; ; Length (rdx) - Supplies the length, in bytes, of the memory to fill. ; ; Return Value: ; ; None. ; ;-- LEAF_ENTRY RtlZeroMemory, _TEXT$00 xor r8, r8 ; set fill pattern jmp RtlFillMemory ; finish in common code LEAF_END RtlZeroMemory, _TEXT$00 end