Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1642 lines
47 KiB

title "Context Swap"
;++
;
; Copyright (c) 1989, 2000 Microsoft Corporation
;
; Module Name:
;
; ctxswap.asm
;
; Abstract:
;
; This module implements the code necessary to field the dispatch
; interrupt and to perform kernel initiated context switching.
;
; Author:
;
; Shie-Lin Tzong (shielint) 14-Jan-1990
;
; Environment:
;
; Kernel mode only.
;
; Revision History:
;
; 22-feb-90 bryanwi
; write actual swap context procedure
;
;--
.586p
.xlist
include ks386.inc
include i386\kimacro.inc
include mac386.inc
include callconv.inc
include irqli386.inc
FPOFRAME macro a, b
.FPO ( a, b, 0, 0, 0, 0 )
endm
.list
EXTRNP KefAcquireSpinLockAtDpcLevel,1,,FASTCALL
EXTRNP KefReleaseSpinLockFromDpcLevel,1,,FASTCALL
EXTRNP HalClearSoftwareInterrupt,1,IMPORT,FASTCALL
EXTRNP HalRequestSoftwareInterrupt,1,IMPORT,FASTCALL
ifndef NT_UP
EXTRNP KiIdleSchedule,1,,FASTCALL
endif
EXTRNP KiQueueReadyThread,2,,FASTCALL
EXTRNP KiRetireDpcList,1,,FASTCALL
EXTRNP _KiQuantumEnd,0
EXTRNP _KiTimerExpiration,4
EXTRNP _KeBugCheckEx,5
extrn _KiTrap13:PROC
extrn _KeFeatureBits:DWORD
extrn __imp__KeRaiseIrqlToSynchLevel@0:DWORD
extrn _KiIdleSummary:DWORD
EXTRNP WmiTraceContextSwap,2,,FASTCALL
if DBG
extrn _KdDebuggerEnabled:BYTE
EXTRNP _DbgBreakPoint,0
EXTRNP _KdPollBreakIn,0
extrn _DbgPrint:near
extrn _MsgDpcTrashedEsp:BYTE
extrn _MsgDpcTimeout:BYTE
extrn _KiDPCTimeout:DWORD
endif
_TEXT$00 SEGMENT PARA PUBLIC 'CODE'
ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
cPublicFastCall KiRDTSC, 1
rdtsc ; read the timestamp counter
mov [ecx], eax ; return the low 32 bits
mov [ecx+4], edx ; return the high 32 bits
fstRET KiRDTSC
fstENDP KiRDTSC
page ,132
subttl "Swap Context"
;++
;
; BOOLEAN
; KiSwapContext (
; IN PKTHREAD OldThread
; IN PKTHREAD NewThread
; )
;
; Routine Description:
;
; This function is a small wrapper, callable from C code, that marshalls
; arguments and calls the actual swap context routine.
;
; Arguments:
;
; OldThread (ecx) - Supplies the address of the old thread
; NewThread (edx) - Supplies the address of the new thread.
;
; Return Value:
;
; If a kernel APC is pending, then a value of TRUE is returned. Otherwise,
; a value of FALSE is returned.
;
;--
cPublicFastCall KiSwapContext, 2
.fpo (0, 0, 0, 4, 1, 0)
;
; N.B. The following registers MUST be saved such that ebp is saved last.
; This is done so the debugger can find the saved ebp for a thread
; that is not currently in the running state.
;
sub esp, 4*4
mov [esp+12], ebx ; save registers
mov [esp+8], esi ;
mov [esp+4], edi ;
mov [esp+0], ebp ;
mov ebx, PCR[PcSelfPcr] ; set address of PCR
mov edi, ecx ; set old thread address
mov esi, edx ; set next thread address
movzx ecx, byte ptr [edi].ThWaitirql ; set APC interrupt bypass disable
CAPSTART <@KiSwapContext@8,SwapContext>
call SwapContext ; swap context
CAPEND <@KiSwapContext@8>
mov ebp, [esp+0] ; restore registers
mov edi, [esp+4] ;
mov esi, [esp+8] ;
mov ebx, [esp+12] ;
add esp, 4*4 ;
fstRET KiSwapContext ;
fstENDP KiSwapContext
page ,132
subttl "Dispatch Interrupt"
;++
;
; Routine Description:
;
; This routine is entered as the result of a software interrupt generated
; at DISPATCH_LEVEL. Its function is to process the Deferred Procedure Call
; (DPC) list, and then perform a context switch if a new thread has been
; selected for execution on the processor.
;
; This routine is entered at IRQL DISPATCH_LEVEL with the dispatcher
; database unlocked. When a return to the caller finally occurs, the
; IRQL remains at DISPATCH_LEVEL, and the dispatcher database is still
; unlocked.
;
; Arguments:
;
; None
;
; Return Value:
;
; None.
;
;--
align 16
cPublicProc _KiDispatchInterrupt ,0
cPublicFpo 0, 0
mov ebx, PCR[PcSelfPcr] ; get address of PCR
;
; Disable interrupts and check if there is any work in the DPC list
; of the current processor.
;
kdi00: cli ; disable interrupts
mov eax, [ebx]+PcPrcbData+PbDpcQueueDepth ; get DPC queue depth
or eax, [ebx]+PcPrcbData+PbTimerRequest ; merge timer request
ifndef NT_UP
or eax, [ebx]+PcPrcbData+PbDeferredReadyListHead ; merge deferred list head
endif
jz short kdi40 ; if z, no DPC's or timers to process
push ebp ; save register
;
; Exceptions occuring in DPCs are unrelated to any exception handlers
; in the interrupted thread. Terminate the exception list.
;
push [ebx].PcExceptionList
mov [ebx].PcExceptionList, EXCEPTION_CHAIN_END
;
; Switch to the DPC stack for this processor.
;
mov edx, esp
mov esp, [ebx].PcPrcbData.PbDpcStack
push edx
.fpo (0, 0, 0, 1, 1, 0)
mov ecx, [ebx].PcPrcb ; get current PRCB address
CAPSTART <_KiDispatchInterrupt,@KiRetireDpcList@4>
fstCall KiRetireDpcList ; process the current DPC list
CAPEND <_KiDispatchInterrupt>
;
; Switch back to the current thread stack, restore the exception list
; and saved EBP.
;
pop esp
pop [ebx].PcExceptionList
pop ebp
.fpo (0, 0, 0, 0, 0, 0)
;
; Check to determine if quantum end is requested.
;
; N.B. If a new thread is selected as a result of processing the quantum
; end request, then the new thread is returned with the dispatcher
; database locked. Otherwise, NULL is returned with the dispatcher
; database unlocked.
;
kdi40: sti ; enable interrupts
cmp byte ptr [ebx].PcPrcbData.PbQuantumEnd, 0 ; quantum end requested
jne kdi90 ; if neq, quantum end request
;
; Check to determine if a new thread has been selected for execution on this
; processor.
;
cmp dword ptr [ebx].PcPrcbData.PbNextThread, 0 ; check if next thread
je kdi70 ; if eq, then no new thread
;
; N.B. The following registers MUST be saved such that ebp is saved last.
; This is done so the debugger can find the saved ebp for a thread
; that is not currently in the running state.
;
.fpo (0, 0, 0, 3, 1, 0)
sub esp, 3*4
mov [esp+8], esi ; save registers
mov [esp+4], edi ;
mov [esp+0], ebp ;
mov edi, [ebx].PcPrcbData.PbCurrentThread ; get current thread address (as old thread)
;
; Raise IRQL to SYNCH level, set context swap busy for the old thread, and
; acquire the current PRCB lock.
;
ifndef NT_UP
call dword ptr [__imp__KeRaiseIrqlToSynchLevel@0] ; raise IRQL to SYNCH
mov byte ptr [edi].ThSwapBusy, 1 ; set context swap busy
lea ecx, [ebx].PcPrcbData.PbPrcbLock ; get PRCB lock address
lock bts dword ptr [ecx], 0 ; try to acquire PRCB lock
jnc short kdi50 ; if nc, PRCB lock acquired
fstCall KefAcquireSpinLockAtDpcLevel ; acquire current PRCB lock
endif
;
; Get the next thread address, set the thread state to running, queue the old
; running thread, and swap context to the next thread.
;
kdi50: mov esi, [ebx].PcPrcbData.PbNextThread ; get next thread address
and dword ptr [ebx].PcPrcbData.PbNextThread, 0 ; clear next thread address
mov [ebx].PcPrcbData.PbCurrentThread, esi ; set current thread address
mov byte ptr [esi]+ThState, Running ; set thread state to running
mov byte ptr [edi].ThWaitReason, WrDispatchInt ; set wait reason
mov ecx, edi ; set address of curent thread
lea edx, [ebx].PcPrcbData ; set address of PRCB
fstCall KiQueueReadyThread ; ready thread for execution
CAPSTART <_KiDispatchInterrupt,SwapContext>
mov cl, APC_LEVEL ; set APC interrupt bypass disable
call SwapContext ; swap context
CAPEND <_KiDispatchInterrupt>
mov ebp, [esp+0] ; restore registers
mov edi, [esp+4] ;
mov esi, [esp+8] ;
add esp, 3*4
kdi70: stdRET _KiDispatchInterrupt ; return
;
; Process quantum end event.
;
; N.B. If the quantum end code returns a NULL value, then no next thread
; has been selected for execution. Otherwise, a next thread has been
; selected and the source thread lock has been acquired.
;
kdi90: mov byte ptr [ebx].PcPrcbData.PbQuantumEnd, 0 ; clear quantum end indicator
CAPSTART <_KiDispatchInterrupt,_KiQuantumEnd@0>
stdCall _KiQuantumEnd ; process quantum end
CAPEND <_KiDispatchInterrupt>
stdRET _KiDispatchInterrupt ; return
stdENDP _KiDispatchInterrupt
page ,132
subttl "Swap Context to Next Thread"
;++
;
; Routine Description:
;
; This routine is called to swap context from one thread to the next.
; It swaps context, flushes the data, instruction, and translation
; buffer caches, restores nonvolatile integer registers, and returns
; to its caller.
;
; N.B. It is assumed that the caller (only callers are within this
; module) saved the nonvolatile registers, ebx, esi, edi, and
; ebp. This enables the caller to have more registers available.
;
; Arguments:
;
; cl - APC interrupt bypass disable (zero enable, nonzero disable).
; edi - Address of previous thread.
; esi - Address of next thread.
; ebx - Address of PCR.
;
; Return value:
;
; al - Kernel APC pending.
; ebx - Address of PCR.
; esi - Address of current thread object.
;
;--
;
; NOTE: The ES: override on the move to ThState is part of the
; lazy-segment load system. It assures that ES has a valid
; selector in it, thus preventing us from propagating a bad
; ES across a context switch.
;
; Note that if segments, other than the standard flat segments,
; with limits above 2 gig exist, neither this nor the rest of
; lazy segment loads are reliable.
;
; Note that ThState must be set before the dispatcher lock is released
; to prevent KiSetPriorityThread from seeing a stale value.
;
ifndef NT_UP
public _ScPatchFxb
public _ScPatchFxe
endif
public SwapContext
align 16
SwapContext proc
;
; Save the APC disable flag.
;
push ecx ; save APC bypass disable
cPublicFpo 0, 1
;
; Wait for context to be swapped for the target thread.
;
ifndef NT_UP
sc00: cmp byte ptr [esi].ThSwapBusy, 0 ; check if context swap busy
je short sc01 ; if e, context swap idle
YIELD ; yield execution for SMT system
jmp short sc00 ;
endif
;
; Increment the number of context switches on this processor.
;
; N.B. This increment is done here is force the cache block containing the
; context switch count into the cache as write exclusive. There are
; several other references to this cache block in the following code.
;
sc01: inc dword ptr [ebx]+PcContextSwitches ; processor count
;
; Save the thread exception list head.
;
push [ebx]+PcExceptionList ; save thread exception list head
cPublicFpo 0, 2
;
; Check for context swap logging.
;
cmp [ebx]+PcPerfGlobalGroupMask, 0 ; check if logging enable
jne sc92 ; If not, then check if we are enabled
sc03:
ifndef NT_UP
if DBG
mov cl, [esi]+ThNextProcessor ; get current processor number
cmp cl, [ebx]+PcPrcbData+PbNumber ; same as running processor?
jne sc_error2 ; if ne, processor number mismatch
endif
endif
;
; Accumulate the total time spent in a thread.
;
ifdef PERF_DATA
test _KeFeatureBits, KF_RDTSC ; feature supported?
jz short @f ; if z, feature not present
rdtsc ; read cycle counter
sub eax, [ebx].PcPrcbData.PbThreadStartCount.LiLowPart ; sub off thread
sbb edx, [ebx].PcPrcbData.PbThreadStartCount.LiHighPart ; starting time
add [edi].EtPerformanceCountLow, eax ; accumlate thread run time
adc [edi].EtPerformanceCountHigh, edx ;
add [ebx].PcPrcbData.PbThreadStartCount.LiLowPart, eax ; set new thread
adc [ebx].PcPrcbData.PbThreadStartCount.LiHighPart, edx ; starting time
@@: ;
endif
;
; On a uniprocessor system the NPX state is swapped in a lazy manner.
; If a thread whose state is not in the coprocessor attempts to perform
; a coprocessor operation, the current NPX state is swapped out (if needed),
; and the new state is swapped in durning the fault. (KiTrap07)
;
; On a multiprocessor system we still fault in the NPX state on demand, but
; we save the state when the thread switches out (assuming the NPX state
; was loaded). This is because it could be difficult to obtain the thread's
; NPX in the trap handler if it was loaded into a different processor's
; coprocessor.
;
mov ebp, cr0 ; get current CR0
mov edx, ebp ;
ifndef NT_UP
cmp byte ptr [edi]+ThNpxState, NPX_STATE_LOADED ; check if NPX state
je sc_save_npx_state ; if e, NPX state not loaded
endif
;
; Save the old stack pointer and compute the new stack limits.
;
sc05: mov [edi]+ThKernelStack, esp ; save old kernel stack pointer
mov eax, [esi]+ThInitialStack ; get new initial stack pointer
;
; (eax) = Initial Stack
; (ebx) = PCR
; (edi) = OldThread
; (esi) = NewThread
; (ebp) = Current CR0
; (edx) = Current CR0
;
.errnz (NPX_STATE_NOT_LOADED - CR0_TS - CR0_MP)
.errnz (NPX_STATE_LOADED - 0)
ifdef NT_UP
;
; On UP systems floating point state might be being changed by an ISR so we
; block interrupts.
;
cli
endif
movzx ecx, byte ptr [esi]+ThNpxState ; new NPX state is (or is not) loaded
and edx, NOT (CR0_MP+CR0_EM+CR0_TS) ; clear thread settable NPX bits
or ecx, edx ; or in new thread's cr0
or ecx, [eax]+FpCr0NpxState-NPX_FRAME_LENGTH ; merge new thread settable state
cmp ebp, ecx ; check if old and new CR0 match
jne sc_reload_cr0 ; if ne, change in CR0
sc06:
ifdef NT_UP
sti
endif
if DBG
mov eax, [esi]+ThKernelStack ; set new stack pointer
cmp esi, dword ptr [eax-4]
je @f
int 3
@@:
xchg esp, eax
mov [eax-4], edi ; Save thread address on stack below stack pointer
else
mov esp, [esi]+ThKernelStack ; set new stack pointer
endif
;
; Check if the old process is the same as the new process.
;
mov ebp, [esi].ThApcState.AsProcess ; get old process address
mov eax, [edi].ThApcState.AsProcess ; get old process address
cmp ebp, eax ; check if process match
jz short sc23 ; if z, process match
;
; Set the processor bit in the new process and clear the old.
;
ifndef NT_UP
mov ecx, [ebx]+PcSetMemberCopy ; get processor set member
lock xor [ebp]+PrActiveProcessors, ecx ; set bit in new processor set
lock xor [eax]+PrActiveProcessors, ecx ; clear bit in old processor set
if DBG
test [ebp]+PrActiveProcessors, ecx ; test if bit set in new set
jz sc_error5 ; if z, bit not set in new set
test [eax]+PrActiveProcessors, ecx ; test if bit clear in old set
jnz sc_error4 ; if nz, bit not clear in old set
endif
endif
;
; LDT switch, If either the target or source process have an LDT we need to
; load the ldt
;
mov ecx, [ebp]+PrLdtDescriptor
or ecx, [eax]+PrLdtDescriptor
jnz sc_load_ldt ; if nz, LDT limit
sc_load_ldt_ret:
;
; Load the new CR3 and as a side effect flush non-global TB entries.
;
mov eax, [ebp]+PrDirectoryTableBase ; get new directory base
mov cr3, eax ; and flush TB
;
; Set context swap idle for the old thread.
;
sc23: ;
ifndef NT_UP
and byte ptr [edi].ThSwapBusy, 0 ; clear old thread swap busy
endif
xor eax, eax
mov gs, eax
;
; Set the TEB descriptor to point to the thread TEB and set the TEB address
; in the PCR. The es override here is to force lazy segment loading to occure.
;
mov eax, es:[esi]+ThTeb ; get user TEB address
mov [ebx]+PcTeb, eax ; set user TEB address
mov ecx, [ebx]+PcGdt ; get GDT address
mov [ecx]+(KGDT_R3_TEB+KgdtBaseLow), ax ;
shr eax, 16 ;
mov [ecx]+(KGDT_R3_TEB+KgdtBaseMid), al ;
mov [ecx]+(KGDT_R3_TEB+KgdtBaseHi), ah ;
;
; Adjust the initial stack address, if necessary, and store in the TSS so V86
; mode threads and 32 bit threads can share a common trapframe structure and
; the NPX save area will be accessible in the same manner on all threads.
;
mov eax, [esi].ThInitialStack ; get initial stack address
sub eax, NPX_FRAME_LENGTH
.errnz (EFLAGS_V86_MASK AND 0FF00FFFFh)
test byte ptr [eax] - KTRAP_FRAME_LENGTH + TsEFlags + 2, EFLAGS_V86_MASK / 10000h
jnz short sc24 ; if nz, V86 frame, no adjustment
sub eax, TsV86Gs - TsHardwareSegSs ; bias for missing fields
sc24: mov ecx, [ebx]+PcTssCopy ; get TSS address
mov [ecx]+TssEsp0, eax ; set initial kernel stack address
;
; Set the IOPM map offset value.
;
; N.B. This may be a redundant load of this value if the process did not
; change during the context switch. However, always reloading this
; value saves several instructions under the context swap lock.
;
mov ax, [ebp]+PrIopmOffset ; set IOPM offset
mov [ecx]+TssIoMapBase, ax ;
;
; Update context switch counters.
;
inc dword ptr [esi]+ThContextSwitches ; thread count
;
; Restore thread exception list head and get APC bypass disable.
;
pop [ebx].PcExceptionList ; restore thread exception list head
pop ecx ; get APC bypass disable
;
; Check if an attempt is being made to context switch while in a DPC routine.
;
cmp word ptr [ebx]+PcPrcbData+PbDpcRoutineActive, 0 ; check if DPC active
jne sc91 ; bugcheck if DPC active.
;
; If the new thread has a kernel mode APC pending, then request an APC
; interrupt.
;
cmp byte ptr [esi].ThApcState.AsKernelApcPending, 0 ; APC pending?
jne short sc80 ; if ne, kernel APC pending
xor eax, eax ; set return value
ret ; return
;
; The new thread has an APC interrupt pending.
;
; If the the special APC disable count is nonzero, then return no kernel APC
; pending. An APC will be requested when the special APC disable count reaches
; zero.
;
; If APC interrupt bypass is not enabled, then request a software interrupt
; at APC_LEVEL and return no kernel APC pending. Otherwise, return kernel APC
; pending.
;
sc80: cmp word ptr [esi].ThSpecialApcDisable, 0 ; check if special APC disable
jne short sc90 ; if ne, special APC disable
test cl, cl ; test for APC bypass disable
jz short sc90 ; if z, APC bypass enabled
mov cl, APC_LEVEL ; request software interrupt level
fstCall HalRequestSoftwareInterrupt ;
or eax, esp ; clear ZF flag
sc90: setz al ; set return value
ret ; return
;
; Set for new LDT value
;
sc_load_ldt:
mov eax, [ebp+PrLdtDescriptor] ;
test eax, eax
je @f
mov ecx, [ebx]+PcGdt ; get GDT address
mov [ecx+KGDT_LDT], eax ;
mov eax, [ebp+PrLdtDescriptor+4] ;
mov [ecx+KGDT_LDT+4], eax ;
;
; Set up int 21 descriptor of IDT. If the process does not have an Ldt, it
; should never make any int 21 calls. If it does, an exception is generated. If
; the process has an Ldt, we need to update int21 entry of LDT for the process.
; Note the Int21Descriptor of the process may simply indicate an invalid
; entry. In which case, the int 21 will be trapped to the kernel.
;
mov ecx, [ebx]+PcIdt ;
mov eax, [ebp+PrInt21Descriptor] ;
mov [ecx+21h*8], eax ;
mov eax, [ebp+PrInt21Descriptor+4] ;
mov [ecx+21h*8+4], eax ;
mov eax, KGDT_LDT
@@: lldt ax
jmp sc_load_ldt_ret
;
; Cr0 has changed (ie, floating point processor present), load the new value.
;
sc_reload_cr0:
if DBG
test byte ptr [esi]+ThNpxState, NOT (CR0_TS+CR0_MP)
jnz sc_error ;
test dword ptr [eax]+FpCr0NpxState-NPX_FRAME_LENGTH, NOT (CR0_PE+CR0_MP+CR0_EM+CR0_TS)
jnz sc_error3 ;
endif
mov cr0,ecx ; set new CR0 NPX state
jmp sc06
;
; Save coprocessor's current context. FpCr0NpxState is the current thread's
; CR0 state. The following bits are valid: CR0_MP, CR0_EM, CR0_TS. MVDMs
; may set and clear MP & EM as they please and the settings will be reloaded
; on a context switch (but they will not be saved from CR0 to Cr0NpxState).
; The kernel sets and clears TS as required.
;
; (ebp) = Current CR0
; (edx) = Current CR0
;
ifndef NT_UP
sc_save_npx_state:
and edx, NOT (CR0_MP+CR0_EM+CR0_TS) ; we need access to the NPX state
mov ecx, [edi].ThInitialStack ; get NPX save save area address
sub ecx, NPX_FRAME_LENGTH
cmp ebp, edx ; Does CR0 need reloading?
je short sc_npx10
mov cr0, edx ; set new cr0
mov ebp, edx ; (ebp) = (edx) = current cr0 state
sc_npx10:
;
; The fwait following the fnsave is to make sure that the fnsave has stored the
; data into the save area before this coprocessor state could possibly be
; context switched in and used on a different (co)processor. I've added the
; clocks from when the dispatcher lock is released and don't believe it's a
; possibility. I've also timed the impact this fwait seems to have on a 486
; when performing lots of numeric calculations. It appears as if there is
; nothing to wait for after the fnsave (although the 486 manual says there is)
; and therefore the calculation time far outweighed the 3clk fwait and it
; didn't make a noticable difference.
;
;
; If FXSR feature is NOT present on the processor, the fxsave instruction is
; patched at boot time to start using fnsave instead
;
_ScPatchFxb:
; fxsave [ecx] ; save NPX state
db 0FH, 0AEH, 01
_ScPatchFxe:
mov byte ptr [edi]+ThNpxState, NPX_STATE_NOT_LOADED ; set no NPX state
mov dword ptr [ebx].PcPrcbData+PbNpxThread, 0 ; clear npx owner
jmp sc05
endif
;
; This code is out of line to optimize the normal case with tracing is off.
;
sc92: mov eax, [ebx]+PcPerfGlobalGroupMask ; Load the ptr into eax
cmp eax, 0 ; catch race condition on pointer here
jz sc03 ; instead of above in mainline code
mov edx, esi ; pass the new ETHREAD object
mov ecx, edi ; pass the old ETHREAD object
test dword ptr [eax+PERF_CONTEXTSWAP_OFFSET], PERF_CONTEXTSWAP_FLAG
jz sc03 ; return if our flag is not set
fstCall WmiTraceContextSwap ; call the Wmi context swap trace
jmp sc03 ;
;
; A context switch was attempted while executing a DPC - bug check.
;
.fpo (2, 0, 0, 0, 0, 0)
sc91: stdCall _KeBugCheckEx <ATTEMPTED_SWITCH_FROM_DPC, edi, esi, 0, 0>
ret ; return
if DBG
sc_error5: int 3
sc_error4: int 3
sc_error3: int 3
sc_error2: int 3
sc_error: int 3
endif
SwapContext endp
page , 132
subttl "Flush EntireTranslation Buffer"
;++
;
; VOID
; KeFlushCurrentTb (
; )
;
; Routine Description:
;
; This function flushes the entire translation buffer (TB) on the current
; processor and also flushes the data cache if an entry in the translation
; buffer has become invalid.
;
; Arguments:
;
; Return Value:
;
; None.
;
;--
cPublicProc _KeFlushCurrentTb ,0
ktb00: mov eax, cr3 ; (eax) = directory table base
mov cr3, eax ; flush TLB
stdRET _KeFlushCurrentTb
ktb_gb: mov eax, cr4 ; *** see Ki386EnableGlobalPage ***
and eax, not CR4_PGE ; This FlushCurrentTb version gets copied into
mov cr4, eax ; ktb00 at initialization time if needed.
or eax, CR4_PGE
mov cr4, eax
ktb_eb: stdRET _KeFlushCurrentTb
stdENDP _KeFlushCurrentTb
;;
;; moved KiFlushDcache below KeFlushCurrentTb for BBT purposes. BBT
;; needs an end label to treat KeFlushCurrentTb as data and to keep together.
;;
page , 132
subttl "Flush Data Cache"
;++
;
; VOID
; KiFlushDcache (
; )
;
; VOID
; KiFlushIcache (
; )
;
; Routine Description:
;
; This routine does nothing on i386 and i486 systems. Why? Because
; (a) their caches are completely transparent, (b) they don't have
; instructions to flush their caches.
;
; Arguments:
;
; None.
;
; Return Value:
;
; None.
;
;--
cPublicProc _KiFlushDcache ,0
cPublicProc _KiFlushIcache ,0
stdRET _KiFlushIcache
stdENDP _KiFlushIcache
stdENDP _KiFlushDcache
_TEXT$00 ends
INIT SEGMENT DWORD PUBLIC 'CODE'
ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
;++
;
; VOID
; Ki386EnableGlobalPage (
; IN volatile PLONG Number
; )
;
; /*++
;
; Routine Description:
;
; This routine enables the global page PDE/PTE support in the system,
; and stalls until complete and them sets the current processor's cr4
; register to enable global page support.
;
; Arguments:
;
; Number - Supplies a pointer to the count of the number of processors in
; the configuration.
;
; Return Value:
;
; None.
;--
cPublicProc _Ki386EnableGlobalPage,1
push esi
push edi
push ebx
mov edx, [esp+16] ; pointer to Number
pushfd
cli
;
; Wait for all processors
;
lock dec dword ptr [edx] ; count down
egp10: YIELD
cmp dword ptr [edx], 0 ; wait for all processors to signal
jnz short egp10
cmp byte ptr PCR[PcNumber], 0 ; processor 0?
jne short egp20
;
; Install proper KeFlushCurrentTb function.
;
mov edi, ktb00
mov esi, ktb_gb
mov ecx, ktb_eb - ktb_gb + 1
rep movsb
mov byte ptr [ktb_eb], 0
;
; Wait for P0 to signal that proper flush TB handlers have been installed
;
egp20: cmp byte ptr [ktb_eb], 0
jnz short egp20
;
; Flush TB, and enable global page support
; (note load of CR4 is explicitly done before the load of CR3
; to work around P6 step B0 errata 11)
;
mov eax, cr4
and eax, not CR4_PGE ; should not be set, but let's be safe
mov ecx, cr3
mov cr4, eax
mov cr3, ecx ; Flush TB
or eax, CR4_PGE ; enable global TBs
mov cr4, eax
popfd
pop ebx
pop edi
pop esi
stdRET _Ki386EnableGlobalPage
stdENDP _Ki386EnableGlobalPage
;++
;
; VOID
; Ki386EnableDE (
; IN volatile PLONG Number
; )
;
; /*++
;
; Routine Description:
;
; This routine sets DE bit in CR4 to enable IO breakpoints
;
; Arguments:
;
; Number - Supplies a pointer to the count of the number of processors in
; the configuration.
;
; Return Value:
;
; None.
;--
cPublicProc _Ki386EnableDE,1
mov eax, cr4
or eax, CR4_DE
mov cr4, eax
stdRET _Ki386EnableDE
stdENDP _Ki386EnableDE
;++
;
; VOID
; Ki386EnableFxsr (
; IN volatile PLONG Number
; )
;
; /*++
;
; Routine Description:
;
; This routine sets OSFXSR bit in CR4 to indicate that OS supports
; FXSAVE/FXRSTOR for use during context switches
;
; Arguments:
;
; Number - Supplies a pointer to the count of the number of processors in
; the configuration.
;
; Return Value:
;
; None.
;--
cPublicProc _Ki386EnableFxsr,1
mov eax, cr4
or eax, CR4_FXSR
mov cr4, eax
stdRET _Ki386EnableFxsr
stdENDP _Ki386EnableFxsr
;++
;
; VOID
; Ki386EnableXMMIExceptions (
; IN volatile PLONG Number
; )
;
; /*++
;
; Routine Description:
;
; This routine installs int 19 XMMI unmasked Numeric Exception handler
; and sets OSXMMEXCPT bit in CR4 to indicate that OS supports
; unmasked Katmai New Instruction technology exceptions.
;
; Arguments:
;
; Number - Supplies a pointer to count of the number of processors in
; the configuration.
;
; Return Value:
;
; None.
;--
cPublicProc _Ki386EnableXMMIExceptions,1
;Set up IDT for INT19
mov ecx,PCR[PcIdt] ;Get IDT address
lea eax, [ecx] + 098h ;XMMI exception is int 19
mov byte ptr [eax + 5], 08eh ;P=1,DPL=0,Type=e
mov word ptr [eax + 2], KGDT_R0_CODE ;Kernel code selector
mov edx, offset FLAT:_KiTrap13 ;Address of int 19 handler
mov ecx,edx
mov word ptr [eax],cx ;addr moves into low byte
shr ecx,16
mov word ptr [eax + 6],cx ;addr moves into high byte
;Enable XMMI exception handling
mov eax, cr4
or eax, CR4_XMMEXCPT
mov cr4, eax
stdRET _Ki386EnableXMMIExceptions
stdENDP _Ki386EnableXMMIExceptions
;++
;
; VOID
; Ki386EnableCurrentLargePage (
; IN ULONG IdentityAddr,
; IN ULONG IdentityCr3
; )
;
; /*++
;
; Routine Description:
;
; This routine enables the large page PDE support in the processor.
;
; Arguments:
;
; IdentityAddr - Supplies the linear address of the beginning of this
; function where (linear == physical).
;
; IdentityCr3 - Supplies a pointer to the temporary page directory and
; page tables that provide both the kernel (virtual ->physical) and
; identity (linear->physical) mappings needed for this function.
;
; Return Value:
;
; None.
;--
public _Ki386EnableCurrentLargePageEnd
cPublicProc _Ki386EnableCurrentLargePage,2
mov ecx,[esp]+4 ; (ecx)-> IdentityAddr
mov edx,[esp]+8 ; (edx)-> IdentityCr3
pushfd ; save current IF state
cli ; disable interrupts
mov eax, cr3 ; (eax)-> original Cr3
mov cr3, edx ; load Cr3 with Identity mapping
sub ecx, offset _Ki386EnableCurrentLargePage
add ecx, offset _Ki386LargePageIdentityLabel
jmp ecx ; jump to (linear == physical)
_Ki386LargePageIdentityLabel:
mov ecx, cr0
and ecx, NOT CR0_PG ; clear PG bit to disable paging
mov cr0, ecx ; disable paging
jmp $+2
mov edx, cr4
or edx, CR4_PSE ; enable Page Size Extensions
mov cr4, edx
mov edx, offset OriginalMapping
or ecx, CR0_PG ; set PG bit to enable paging
mov cr0, ecx ; enable paging
jmp edx ; Return to original mapping.
OriginalMapping:
mov cr3, eax ; restore original Cr3
popfd ; restore interrupts to previous
stdRET _Ki386EnableCurrentLargePage
_Ki386EnableCurrentLargePageEnd:
stdENDP _Ki386EnableCurrentLargePage
INIT ends
_TEXT$00 SEGMENT PARA PUBLIC 'CODE'
ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
page , 132
subttl "Swap Process"
;++
;
; VOID
; KiSwapProcess (
; IN PKPROCESS NewProcess,
; IN PKPROCESS OldProcess
; )
;
; Routine Description:
;
; This function swaps the address space to another process by flushing
; the data cache, the instruction cache, the translation buffer, and
; establishes a new directory table base.
;
; It also swaps in the LDT and IOPM of the new process. This is necessary
; to avoid bogus mismatches in SwapContext.
;
; NOTE: keep in sync with process switch part of SwapContext
;
; Arguments:
;
; Process - Supplies a pointer to a control object of type process.
;
; Return Value:
;
; None.
;
;--
cPublicProc _KiSwapProcess ,2
cPublicFpo 2, 0
mov edx,[esp]+4 ; (edx)-> New process
mov eax,[esp]+8 ; (eax)-> Old Process
;
; Set the processor number in the new process and clear it in the old.
;
ifndef NT_UP
mov ecx, PCR[PcSetMember]
lock xor [edx]+PrActiveProcessors,ecx ; set bit in new processor set
lock xor [eax]+PrActiveProcessors,ecx ; clear bit in old processor set
if DBG
test [edx]+PrActiveProcessors,ecx ; test if bit set in new set
jz kisp_error1 ; if z, bit not set in new set
test [eax]+PrActiveProcessors,ecx ; test if bit clear in old set
jnz kisp_error ; if nz, bit not clear in old set
endif
endif
;
; Change LDT, If either the source or target process has an LDT we need to
; load the new one.
;
mov ecx, [edx]+PrLdtDescriptor
or ecx, [eax]+PrLdtDescriptor
jnz kisp_load_ldt ; if nz, LDT limit
kisp_load_ldt_ret: ; if nz, LDT limit
;
; Load the new CR3 and as a side effect flush non-global TB entries.
;
mov eax,[edx]+PrDirectoryTableBase
mov cr3,eax
mov ecx,PCR[PcTssCopy] ; (ecx)-> TSS
;
; Clear gs so it can't leak across processes
;
xor eax,eax ; assume ldtr is to be NULL
mov gs,ax ; Clear gs. (also workarounds
;
; Change IOPM
;
mov ax,[edx]+PrIopmOffset
mov [ecx]+TssIoMapBase,ax
stdRET _KiSwapProcess
kisp_load_ldt:
;
; Edit LDT descriptor
;
mov eax,[edx+PrLdtDescriptor]
test eax, eax
je @f
mov ecx,PCR[PcGdt]
mov [ecx+KGDT_LDT],eax
mov eax,[edx+PrLdtDescriptor+4]
mov [ecx+KGDT_LDT+4],eax
;
; Set up int 21 descriptor of IDT. If the process does not have Ldt, it
; should never make any int 21 call. If it does, an exception is generated.
; If the process has Ldt, we need to update int21 entry of LDT for the process.
; Note the Int21Descriptor of the process may simply indicate an invalid
; entry. In which case, the int 21 will be trapped to the kernel.
;
mov ecx, PCR[PcIdt]
mov eax, [edx+PrInt21Descriptor]
mov [ecx+21h*8], eax
mov eax, [edx+PrInt21Descriptor+4]
mov [ecx+21h*8+4], eax
mov eax,KGDT_LDT ;@@32-bit op to avoid prefix
@@: lldt ax
jmp kisp_load_ldt_ret
if DBG
kisp_error1: int 3
kisp_error: int 3
endif
stdENDP _KiSwapProcess
page ,132
subttl "Idle Loop"
;++
;
; VOID
; KiIdleLoop(
; VOID
; )
;
; Routine Description:
;
; This routine continuously executes the idle loop and never returns.
;
; Arguments:
;
; ebx - Address of the current processor's PCR.
;
; Return value:
;
; None - routine never returns.
;
;--
cPublicFastCall KiIdleLoop ,0
cPublicFpo 0, 0
if DBG
xor edi, edi ; reset poll breakin counter
endif
jmp short kid20 ; Skip HalIdleProcessor on first iteration
;
; There are no entries in the DPC list and a thread has not been selected
; for execution on this processor. Call the HAL so power managment can be
; performed.
;
; N.B. The HAL is called with interrupts disabled. The HAL will return
; with interrupts enabled.
;
; N.B. Use a call instruction instead of a push-jmp, as the call instruction
; executes faster and won't invalidate the processor's call-return stack
; cache.
;
kid10: lea ecx, [ebx].PcPrcbData.PbPowerState
call dword ptr [ecx].PpIdleFunction ; (ecx) = Arg0
;
; Give the debugger an opportunity to gain control on debug systems.
;
; N.B. On an MP system the lowest numbered idle processor is the only
; processor that polls for a breakin request.
;
kid20:
if DBG
ifndef NT_UP
mov eax, _KiIdleSummary ; get idle summary
mov ecx, [ebx].PcSetMember ; get set member
dec ecx ; compute right bit mask
and eax, ecx ; check if any lower bits set
jnz short CheckDpcList ; if nz, not lowest numbered
endif
dec edi ; decrement poll counter
jg short CheckDpcList ; if g, not time to poll
POLL_DEBUGGER ; check if break in requested
endif
kid30:
if DBG
ifndef NT_UP
mov edi, 20 * 1000 ; set breakin poll interval
else
mov edi, 100 ; UP idle loop has a HLT in it
endif
endif
CheckDpcList0: ;
YIELD
;
; Disable interrupts and check if there is any work in the DPC list of the
; current processor or a target processor.
;
CheckDpcList:
;
; N.B. The following code enables interrupts for a few cycles, then
; disables them again for the subsequent DPC and next thread
; checks.
;
sti ; enable interrupts
nop ;
nop ;
cli ; disable interrupts
;
; Process the deferred procedure call list for the current processor.
;
mov eax, [ebx]+PcPrcbData+PbDpcQueueDepth ; get DPC queue depth
or eax, [ebx]+PcPrcbData+PbTimerRequest ; merge timer request
ifndef NT_UP
or eax, [ebx]+PcPrcbData+PbDeferredReadyListHead ; merge deferred list head
endif
jz short CheckNextThread ; if z, no DPC's or timers to process
mov cl, DISPATCH_LEVEL ; set interrupt level
fstCall HalClearSoftwareInterrupt ; clear software interrupt
lea ecx, [ebx].PcPrcbData ; set current PRCB address
CAPSTART <@KiIdleLoop@0,@KiRetireDpcList@4>
fstCall KiRetireDpcList ; process the current DPC list
CAPEND <@KiIdleLoop@0>
if DBG
xor edi, edi ; clear breakin poll interval
endif
;
; Check if a thread has been selected to run on the current processor.
;
CheckNextThread: ;
cmp dword ptr [ebx].PcPrcbData.PbNextThread, 0 ; thread selected?
ifdef NT_UP
je short kid10 ; if eq, no thread selected
else
je kid40 ; if eq, no thread selected.
endif
;
; Raise IRQL to synchronization level and enable interrupts.
;
ifndef NT_UP
RaiseIrql SYNCH_LEVEL, NoOld ; raise IRQL to synchronizaiton level
endif
sti ; enable interrupts
mov edi, [ebx].PcPrcbData.PbCurrentThread ; get idle thread address
;
; Set context swap busy for idle thread and acquire the PRCB lock.
;
ifndef NT_UP
mov byte ptr [edi].ThSwapBusy, 1 ; set context swap busy
lock bts dword ptr [ebx].PcPrcbData.PbPrcbLock, 0 ; try to acquire PRCB Lock
jnc short kid33 ; if nc, PRCB lock acquired
lea ecx, [ebx].PcPrcbData.PbPrcbLock ; get PRCB lock address
fstCall KefAcquireSpinLockAtDpcLevel ; acquire current PRCB lock
endif
;
; If a thread had been scheduled for this processor but was removed from
; eligibility (e.g., an affinity change), then the new thread could be the
; idle thread.
;
kid33: mov esi, [ebx].PcPrcbData.PbNextThread ; get next thread address
ifndef NT_UP
cmp esi, edi ; check if idle thread
je short kisame ; if e, processor idle again
endif
and dword ptr [ebx].PcPrcbData.PbNextThread, 0 ; clear next thread
mov [ebx].PcPrcbData.PbCurrentThread, esi ; set new thread address
mov byte ptr [esi]+ThState, Running ; set thread state running
;
; Clear idle schedule since a new thread has been selected for execution on
; this processor and release the PRCB lock.
;
ifndef NT_UP
and byte ptr [ebx].PcPrcbData.PbIdleSchedule, 0 ; clear idle schedule
and dword ptr [ebx].PcPrcbData.PbPrcbLock, 0 ; release current PRCB lock
endif
kid35: ;
CAPSTART <@KiIdleLoop@0,SwapContext>
mov ecx, APC_LEVEL ; set APC bypass disable
call SwapContext ; swap context
CAPEND <@KiIdleLoop@0>
ifndef NT_UP
LowerIrql DISPATCH_LEVEL ; lower IRQL to dispatch level
endif
jmp kid30 ;
;
; The new thread is the Idle thread (same as old thread). This can happen
; rarely when a thread scheduled for this processor is made unable to run
; on this processor. As this processor has again been marked idle, other
; processors may unconditionally assign new threads to this processor.
;
ifndef NT_UP
kisame: and dword ptr [ebx].PcPrcbData.PbNextThread, 0 ; clear next thread
and dword ptr [ebx].PcPrcbData.PbPrcbLock, 0 ; release current PRCB lock
and byte ptr [edi].ThSwapBusy, 0 ; set idle thread context swap idle
jmp kid30 ;
;
; Call idle schedule if requested.
;
kid40: cmp byte ptr [ebx].PcPrcbData.PbIdleSchedule, 0 ; check if idle schedule
je kid10 ; if e, idle schedule not requested
sti ; enable interrupts
lea ecx, [ebx].PcPrcbData ; get current PRCB address
fstCall KiIdleSchedule ; attempt to schedule thread
test eax, eax ; test if new thread schedule
mov esi, eax ; set new thread address
mov edi, [ebx].PcPrcbData.PbIdleThread ; get idle thread address
jnz short kid35 ; if nz, new thread scheduled
jmp kid30 ;
endif
fstENDP KiIdleLoop
ifdef DBGMP
cPublicProc _KiPollDebugger,0
cPublicFpo 0,3
push eax
push ecx
push edx
POLL_DEBUGGER
pop edx
pop ecx
pop eax
stdRET _KiPollDebugger
stdENDP _KiPollDebugger
endif
page , 132
subttl "Adjust TSS ESP0 value"
;++
;
; VOID
; KiAdjustEsp0 (
; IN PKTRAP_FRAME TrapFrame
; )
;
; Routine Description:
;
; This routine puts the apropriate ESP0 value in the esp0 field of the
; TSS. This allows protect mode and V86 mode to use the same stack
; frame. The ESP0 value for protected mode is 16 bytes lower than
; for V86 mode to compensate for the missing segment registers.
;
; Arguments:
;
; TrapFrame - Supplies a pointer to the TrapFrame.
;
; Return Value:
;
; None.
;
;--
cPublicProc _Ki386AdjustEsp0 ,1
if DBG
;
; Make sure we are not called when the trap frame can be
; edited by a SetContextThread.
;
CurrentIrql
cmp al, APC_LEVEL
jge @f
int 3
@@:
endif
mov eax, PCR[PcPrcbData.PbCurrentThread] ; get current thread address
mov edx, [esp + 4] ; edx -> trap frame
mov eax, [eax]+ThInitialStack ; eax = base of stack
test dword ptr [edx]+TsEFlags, EFLAGS_V86_MASK ; is this a V86 frame?
jnz short ae10 ; if nz, V86 frame
sub eax, TsV86Gs - TsHardwareSegSS ; compensate for missing regs
ae10: sub eax, NPX_FRAME_LENGTH ;
pushfd ; Make sure we don't move
cli ; processors while we do this
mov edx, PCR[PcTssCopy] ;
mov [edx]+TssEsp0, eax ; set Esp0 value
popfd ;
stdRET _Ki386AdjustEsp0
stdENDP _Ki386AdjustEsp0
_TEXT$00 ends
end