You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
369 lines
9.5 KiB
369 lines
9.5 KiB
title "Zero memory pages using fastest means available"
|
|
;++
|
|
;
|
|
; Copyright (c) 1998 Microsoft Corporation
|
|
;
|
|
; Module Name:
|
|
;
|
|
; zero.asm
|
|
;
|
|
; Abstract:
|
|
;
|
|
; Zero memory pages using the fastest means available.
|
|
;
|
|
; Author:
|
|
;
|
|
; Peter Johnston (peterj) 20-Jun-1998.
|
|
; Critical sections of Katmai code adapted from in-line
|
|
; assembly version by Shiv Kaushik or Intel Corp.
|
|
;
|
|
; Environment:
|
|
;
|
|
; x86
|
|
;
|
|
; Revision History:
|
|
;
|
|
;--
|
|
|
|
.386p
|
|
.xlist
|
|
include ks386.inc
|
|
include callconv.inc
|
|
include mac386.inc
|
|
.list
|
|
|
|
;
|
|
; Register Definitions (for instruction macros).
|
|
;
|
|
|
|
rEAX equ 0
|
|
rECX equ 1
|
|
rEDX equ 2
|
|
rEBX equ 3
|
|
rESP equ 4
|
|
rEBP equ 5
|
|
rESI equ 6
|
|
rEDI equ 7
|
|
|
|
;
|
|
; Define SIMD instructions used in this module.
|
|
;
|
|
|
|
if 0
|
|
|
|
; these remain for reference only. In theory the stuff following
|
|
; should generate the right code.
|
|
|
|
xorps_xmm0_xmm0 macro
|
|
db 0FH, 057H, 0C0H
|
|
endm
|
|
|
|
movntps_edx macro Offset
|
|
db 0FH, 02BH, 042H, Offset
|
|
endm
|
|
|
|
movaps_esp_xmm0 macro
|
|
db 0FH, 029H, 004H, 024H
|
|
endm
|
|
|
|
movaps_xmm0_esp macro
|
|
db 0FH, 028H, 004H, 024H
|
|
endm
|
|
|
|
endif
|
|
|
|
xorps macro XMMReg1, XMMReg2
|
|
db 0FH, 057H, 0C0H + (XMMReg1 * 8) + XMMReg2
|
|
endm
|
|
|
|
movntps macro GeneralReg, Offset, XMMReg
|
|
db 0FH, 02BH, 040H + (XmmReg * 8) + GeneralReg, Offset
|
|
endm
|
|
|
|
sfence macro
|
|
db 0FH, 0AEH, 0F8H
|
|
endm
|
|
|
|
movaps_load macro XMMReg, GeneralReg
|
|
db 0FH, 028H, (XMMReg * 8) + 4, (4 * 8) + GeneralReg
|
|
endm
|
|
|
|
movaps_store macro GeneralReg, XMMReg
|
|
db 0FH, 029H, (XMMReg * 8) + 4, (4 * 8) + GeneralReg
|
|
endm
|
|
|
|
|
|
;
|
|
; NPX Save and Restore
|
|
;
|
|
|
|
fxsave macro Register
|
|
db 0FH, 0AEH, Register
|
|
endm
|
|
|
|
fxrstor macro Register
|
|
db 0FH, 0AEH, 8+Register
|
|
endm
|
|
|
|
|
|
_TEXT SEGMENT DWORD PUBLIC 'CODE'
|
|
ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
|
|
|
|
;++
|
|
;
|
|
; VOID
|
|
; KeZeroPages (
|
|
; IN PVOID PageBase,
|
|
; IN SIZE_T NumberOfBytes
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; KeZeroPages is really just a function pointer that points at
|
|
; either KiZeroPages or KiXMMIZeroPages depending on whether or
|
|
; not XMMI instructions are available.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; (ecx) PageBase Base address of pages to be zeroed.
|
|
;
|
|
; (edx) NumberOfBytes Number of bytes to be zeroed. Always a PAGE_SIZE multiple.
|
|
;
|
|
;
|
|
; Return Value:
|
|
;
|
|
;--
|
|
|
|
|
|
page ,132
|
|
subttl "KiXMMIZeroPagesNoSave - Use XMMI to zero memory (XMMI owned)"
|
|
|
|
;++
|
|
;
|
|
; VOID
|
|
; KiXMMIZeroPagesNoSave (
|
|
; IN PVOID PageBase,
|
|
; IN SIZE_T NumberOfBytes
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; Use XMMI to zero a page of memory 16 bytes at a time while
|
|
; at the same time minimizing cache polution.
|
|
;
|
|
; Note: The XMMI register set belongs to this thread. It is neither
|
|
; saved nor restored by this procedure.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; (ecx) PageBase Virtual address of the base of the page to be zeroed.
|
|
;
|
|
; (edx) NumberOfBytes Number of bytes to be zeroed. Always a PAGE_SIZE multiple.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
INNER_LOOP_BYTES equ 64
|
|
INNER_LOOP_SHIFT equ 6
|
|
|
|
cPublicFastCall KiXMMIZeroPagesNoSave,2
|
|
cPublicFpo 0, 1
|
|
|
|
xorps 0, 0 ; zero xmm0 (128 bits)
|
|
shr edx, INNER_LOOP_SHIFT ; Number of Iterations
|
|
|
|
inner:
|
|
|
|
movntps rECX, 0, 0 ; store bytes 0 - 15
|
|
movntps rECX, 16, 0 ; 16 - 31
|
|
movntps rECX, 32, 0 ; 32 - 47
|
|
movntps rECX, 48, 0 ; 48 - 63
|
|
|
|
add ecx, 64 ; increment base
|
|
dec edx ; decrement loop count
|
|
jnz short inner
|
|
|
|
; Force all stores to complete before any other
|
|
; stores from this processor.
|
|
|
|
sfence
|
|
|
|
ifndef SFENCE_IS_NOT_BUSTED
|
|
|
|
; ERRATA the next uncached write to this processor's APIC
|
|
; may fail unless the store pipes have drained. sfence by
|
|
; itself is not enough. Force drainage now by doing an
|
|
; interlocked exchange.
|
|
|
|
xchg [esp-4], edx
|
|
|
|
endif
|
|
|
|
fstRET KiXMMIZeroPagesNoSave
|
|
|
|
fstENDP KiXMMIZeroPagesNoSave
|
|
|
|
|
|
page ,132
|
|
subttl "KiXMMIZeroPages - Use XMMI to zero memory"
|
|
|
|
;++
|
|
;
|
|
; VOID
|
|
; KiXMMIZeroPages (
|
|
; IN PVOID PageBase,
|
|
; IN SIZE_T NumberOfBytes
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; Use XMMI to zero a page of memory 16 bytes at a time. This
|
|
; routine is a wrapper around KiXMMIZeroPagesNoSave. In this
|
|
; case we don't have the luxury of not saving/restoring context.
|
|
;
|
|
; Arguments:
|
|
;
|
|
; (ecx) PageBase Virtual address of the base of the page to be zeroed.
|
|
;
|
|
; (edx) NumberOfBytes Number of bytes to be zeroed. Always a PAGE_SIZE multiple.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
cPublicFastCall KiXMMIZeroPages,2
|
|
cPublicFpo 0, 2
|
|
|
|
push ebp
|
|
push ebx
|
|
mov ebx, PCR[PcPrcbData+PbCurrentThread]
|
|
mov eax, [ebx].ThInitialStack
|
|
sub eax, NPX_FRAME_LENGTH
|
|
mov ebp, esp ; save stack pointer
|
|
sub esp, 16 ; reserve space for xmm0
|
|
and esp, 0FFFFFFF0H ; 16 byte aligned
|
|
cli ; don't context switch
|
|
test [eax].FpCr0NpxState, CR0_EM ; if FP explicitly disabled
|
|
jnz short kxzp90 ; do it the old way
|
|
cmp byte ptr [ebx].ThNpxState, NPX_STATE_LOADED
|
|
je short kxzp80 ; jiff, NPX stated loaded
|
|
|
|
; NPX state is not loaded on this thread, it will be by
|
|
; the time we reenable context switching.
|
|
|
|
mov byte ptr [ebx].ThNpxState, NPX_STATE_LOADED
|
|
|
|
; enable use of FP instructions
|
|
|
|
mov ebx, cr0
|
|
and ebx, NOT (CR0_MP+CR0_TS+CR0_EM)
|
|
mov cr0, ebx ; enable NPX
|
|
|
|
ifdef NT_UP
|
|
|
|
; if this is a UP machine, the state might be loaded for
|
|
; another thread in which case it needs to be saved.
|
|
|
|
mov ebx, PCR[PcPrcbData+PbNpxThread]; Owner of NPX state
|
|
or ebx, ebx ; NULL?
|
|
jz short @f ; yes, skip save.
|
|
|
|
mov byte ptr [ebx].ThNpxState, NPX_STATE_NOT_LOADED
|
|
mov ebx, [ebx].ThInitialStack ; get address of save
|
|
sub ebx, NPX_FRAME_LENGTH ; area.
|
|
fxsave rEBX ; save NPX
|
|
@@:
|
|
|
|
endif
|
|
|
|
; Now load the NPX context for this thread. This is because
|
|
; if we switch away from this thread it will get saved again
|
|
; in this save area and destroying it would be bad.
|
|
|
|
fxrstor rEAX
|
|
|
|
mov eax, PCR[PcPrcbData+PbCurrentThread]
|
|
mov PCR[PcPrcbData+PbNpxThread], eax
|
|
|
|
kxzp80:
|
|
sti ; reenable context switching
|
|
movaps_store rESP, 0 ; save xmm0
|
|
fstCall KiXMMIZeroPagesNoSave ; zero the page
|
|
movaps_load 0, rESP ; restore xmm
|
|
|
|
; restore stack pointer, non-volatiles and return
|
|
|
|
mov esp, ebp
|
|
pop ebx
|
|
pop ebp
|
|
fstRET KiXMMIZeroPages
|
|
|
|
|
|
; FP is explicitly disabled for this thread (probably a VDM
|
|
; thread). Restore stack pointer, non-volatiles and jump into
|
|
; KiZeroPage to do the work the old fashioned way.
|
|
|
|
kxzp90:
|
|
sti
|
|
mov esp, ebp
|
|
pop ebx
|
|
pop ebp
|
|
jmp short @KiZeroPages@8
|
|
|
|
fstENDP KiXMMIZeroPages
|
|
|
|
|
|
page ,132
|
|
subttl "KiZeroPages - Available to all X86 processors"
|
|
|
|
;++
|
|
;
|
|
; KiZeroPages(
|
|
; PVOID PageBase,
|
|
; IN SIZE_T NumberOfBytes
|
|
; )
|
|
;
|
|
; Routine Description:
|
|
;
|
|
; Generic Zero Page routine, used on processors that don't have
|
|
; a more efficient way to zero large blocks of memory.
|
|
; (Same as RtlZeroMemory).
|
|
;
|
|
; Arguments:
|
|
;
|
|
; (ecx) PageBase Base address of page to be zeroed.
|
|
;
|
|
; (edx) NumberOfBytes Number of bytes to be zeroed. Always a PAGE_SIZE multiple.
|
|
;
|
|
; Return Value:
|
|
;
|
|
; None.
|
|
;
|
|
;--
|
|
|
|
cPublicFastCall KiZeroPages,2
|
|
cPublicFpo 0, 0
|
|
|
|
push edi ; save EDI (non-volatile)
|
|
xor eax, eax ; 32 bit zero
|
|
mov edi, ecx ; setup for repsto
|
|
mov ecx, edx ; number of bytes
|
|
shr ecx, 2 ; iteration count
|
|
|
|
; store eax, ecx times starting at edi
|
|
|
|
rep stosd
|
|
|
|
pop edi ; restore edi and return
|
|
fstRET KiZeroPages
|
|
|
|
fstENDP KiZeroPages
|
|
|
|
|
|
_TEXT ends
|
|
end
|