|
|
title "Zero memory pages using fastest means available" ;++ ; ; Copyright (c) 1998 Microsoft Corporation ; ; Module Name: ; ; zero.asm ; ; Abstract: ; ; Zero memory pages using the fastest means available. ; ; Author: ; ; Peter Johnston (peterj) 20-Jun-1998. ; Critical sections of Katmai code adapted from in-line ; assembly version by Shiv Kaushik or Intel Corp. ; ; Environment: ; ; x86 ; ; Revision History: ; ;--
.386p .xlist include ks386.inc include callconv.inc include mac386.inc .list
; ; Register Definitions (for instruction macros). ;
rEAX equ 0 rECX equ 1 rEDX equ 2 rEBX equ 3 rESP equ 4 rEBP equ 5 rESI equ 6 rEDI equ 7
; ; Define SIMD instructions used in this module. ;
if 0
; these remain for reference only. In theory the stuff following ; should generate the right code.
xorps_xmm0_xmm0 macro db 0FH, 057H, 0C0H endm
movntps_edx macro Offset db 0FH, 02BH, 042H, Offset endm
movaps_esp_xmm0 macro db 0FH, 029H, 004H, 024H endm
movaps_xmm0_esp macro db 0FH, 028H, 004H, 024H endm
endif
xorps macro XMMReg1, XMMReg2 db 0FH, 057H, 0C0H + (XMMReg1 * 8) + XMMReg2 endm
movntps macro GeneralReg, Offset, XMMReg db 0FH, 02BH, 040H + (XmmReg * 8) + GeneralReg, Offset endm
sfence macro db 0FH, 0AEH, 0F8H endm
movaps_load macro XMMReg, GeneralReg db 0FH, 028H, (XMMReg * 8) + 4, (4 * 8) + GeneralReg endm
movaps_store macro GeneralReg, XMMReg db 0FH, 029H, (XMMReg * 8) + 4, (4 * 8) + GeneralReg endm
; ; NPX Save and Restore ;
fxsave macro Register db 0FH, 0AEH, Register endm
fxrstor macro Register db 0FH, 0AEH, 8+Register endm
_TEXT SEGMENT DWORD PUBLIC 'CODE' ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
;++ ; ; VOID ; KeZeroPages ( ; IN PVOID PageBase, ; IN SIZE_T NumberOfBytes ; ) ; ; Routine Description: ; ; KeZeroPages is really just a function pointer that points at ; either KiZeroPages or KiXMMIZeroPages depending on whether or ; not XMMI instructions are available. ; ; Arguments: ; ; (ecx) PageBase Base address of pages to be zeroed. ; ; (edx) NumberOfBytes Number of bytes to be zeroed. Always a PAGE_SIZE multiple. ; ; ; Return Value: ; ;--
page ,132 subttl "KiXMMIZeroPagesNoSave - Use XMMI to zero memory (XMMI owned)"
;++ ; ; VOID ; KiXMMIZeroPagesNoSave ( ; IN PVOID PageBase, ; IN SIZE_T NumberOfBytes ; ) ; ; Routine Description: ; ; Use XMMI to zero a page of memory 16 bytes at a time while ; at the same time minimizing cache polution. ; ; Note: The XMMI register set belongs to this thread. It is neither ; saved nor restored by this procedure. ; ; Arguments: ; ; (ecx) PageBase Virtual address of the base of the page to be zeroed. ; ; (edx) NumberOfBytes Number of bytes to be zeroed. Always a PAGE_SIZE multiple. ; ; Return Value: ; ; None. ; ;--
INNER_LOOP_BYTES equ 64 INNER_LOOP_SHIFT equ 6
cPublicFastCall KiXMMIZeroPagesNoSave,2 cPublicFpo 0, 1
xorps 0, 0 ; zero xmm0 (128 bits) shr edx, INNER_LOOP_SHIFT ; Number of Iterations
inner:
movntps rECX, 0, 0 ; store bytes 0 - 15 movntps rECX, 16, 0 ; 16 - 31 movntps rECX, 32, 0 ; 32 - 47 movntps rECX, 48, 0 ; 48 - 63
add ecx, 64 ; increment base dec edx ; decrement loop count jnz short inner
; Force all stores to complete before any other ; stores from this processor.
sfence
ifndef SFENCE_IS_NOT_BUSTED
; ERRATA the next uncached write to this processor's APIC ; may fail unless the store pipes have drained. sfence by ; itself is not enough. Force drainage now by doing an ; interlocked exchange.
xchg [esp-4], edx
endif
fstRET KiXMMIZeroPagesNoSave
fstENDP KiXMMIZeroPagesNoSave
page ,132 subttl "KiXMMIZeroPages - Use XMMI to zero memory"
;++ ; ; VOID ; KiXMMIZeroPages ( ; IN PVOID PageBase, ; IN SIZE_T NumberOfBytes ; ) ; ; Routine Description: ; ; Use XMMI to zero a page of memory 16 bytes at a time. This ; routine is a wrapper around KiXMMIZeroPagesNoSave. In this ; case we don't have the luxury of not saving/restoring context. ; ; Arguments: ; ; (ecx) PageBase Virtual address of the base of the page to be zeroed. ; ; (edx) NumberOfBytes Number of bytes to be zeroed. Always a PAGE_SIZE multiple. ; ; Return Value: ; ; None. ; ;--
cPublicFastCall KiXMMIZeroPages,2 cPublicFpo 0, 2
push ebp push ebx mov ebx, PCR[PcPrcbData+PbCurrentThread] mov eax, [ebx].ThInitialStack sub eax, NPX_FRAME_LENGTH mov ebp, esp ; save stack pointer sub esp, 16 ; reserve space for xmm0 and esp, 0FFFFFFF0H ; 16 byte aligned cli ; don't context switch test [eax].FpCr0NpxState, CR0_EM ; if FP explicitly disabled jnz short kxzp90 ; do it the old way cmp byte ptr [ebx].ThNpxState, NPX_STATE_LOADED je short kxzp80 ; jiff, NPX stated loaded
; NPX state is not loaded on this thread, it will be by ; the time we reenable context switching.
mov byte ptr [ebx].ThNpxState, NPX_STATE_LOADED
; enable use of FP instructions
mov ebx, cr0 and ebx, NOT (CR0_MP+CR0_TS+CR0_EM) mov cr0, ebx ; enable NPX
ifdef NT_UP
; if this is a UP machine, the state might be loaded for ; another thread in which case it needs to be saved.
mov ebx, PCR[PcPrcbData+PbNpxThread]; Owner of NPX state or ebx, ebx ; NULL? jz short @f ; yes, skip save.
mov byte ptr [ebx].ThNpxState, NPX_STATE_NOT_LOADED mov ebx, [ebx].ThInitialStack ; get address of save sub ebx, NPX_FRAME_LENGTH ; area. fxsave rEBX ; save NPX @@:
endif
; Now load the NPX context for this thread. This is because ; if we switch away from this thread it will get saved again ; in this save area and destroying it would be bad.
fxrstor rEAX
mov eax, PCR[PcPrcbData+PbCurrentThread] mov PCR[PcPrcbData+PbNpxThread], eax
kxzp80: sti ; reenable context switching movaps_store rESP, 0 ; save xmm0 fstCall KiXMMIZeroPagesNoSave ; zero the page movaps_load 0, rESP ; restore xmm
; restore stack pointer, non-volatiles and return
mov esp, ebp pop ebx pop ebp fstRET KiXMMIZeroPages
; FP is explicitly disabled for this thread (probably a VDM ; thread). Restore stack pointer, non-volatiles and jump into ; KiZeroPage to do the work the old fashioned way.
kxzp90: sti mov esp, ebp pop ebx pop ebp jmp short @KiZeroPages@8
fstENDP KiXMMIZeroPages
page ,132 subttl "KiZeroPages - Available to all X86 processors"
;++ ; ; KiZeroPages( ; PVOID PageBase, ; IN SIZE_T NumberOfBytes ; ) ; ; Routine Description: ; ; Generic Zero Page routine, used on processors that don't have ; a more efficient way to zero large blocks of memory. ; (Same as RtlZeroMemory). ; ; Arguments: ; ; (ecx) PageBase Base address of page to be zeroed. ; ; (edx) NumberOfBytes Number of bytes to be zeroed. Always a PAGE_SIZE multiple. ; ; Return Value: ; ; None. ; ;--
cPublicFastCall KiZeroPages,2 cPublicFpo 0, 0
push edi ; save EDI (non-volatile) xor eax, eax ; 32 bit zero mov edi, ecx ; setup for repsto mov ecx, edx ; number of bytes shr ecx, 2 ; iteration count
; store eax, ecx times starting at edi
rep stosd
pop edi ; restore edi and return fstRET KiZeroPages
fstENDP KiZeroPages
_TEXT ends end
|