Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1457 lines
55 KiB

// TITLE("Context Swap")
//++
//
// Copyright (c) 1991 Microsoft Corporation
// Copyright (c) 1992 Digital Equipment Corporation
//
// Module Name:
//
// ctxsw.s
//
// Abstract:
//
// This module implements the ALPHA machine dependent code necessary to
// field the dispatch interrupt and to perform kernel initiated context
// switching.
//
// Author:
//
// David N. Cutler (davec) 1-Apr-1991
// Joe Notarangelo 05-Jun-1992
//
// Environment:
//
// Kernel mode only, IRQL DISPATCH_LEVEL.
//
// Revision History:
//
//--
#include "ksalpha.h"
// #define _COLLECT_SWITCH_DATA_ 1
SBTTL("Switch To Thread ")
// NTSTATUS
// KiSwitchToThread (
// IN PKTHREAD NextThread
// IN ULONG WaitReason,
// IN ULONG WaitMode,
// IN PKEVENT WaitObject
// )
//
// Routine Description:
//
// This function performs an optimal switch to the specified target thread
// if possible. No timeout is associated with the wait, thus the issuing
// thread will wait until the wait event is signaled or an APC is deliverd.
//
// N.B. This routine is called with the dispatcher database locked.
//
// N.B. The wait IRQL is assumed to be set for the current thread and the
// wait status is assumed to be set for the target thread.
//
// N.B. It is assumed that if a queue is associated with the target thread,
// then the concurrency count has been incremented.
//
// N.B. Control is returned from this function with the dispatcher database
// unlocked.
//
// Arguments:
//
// NextThread - Supplies a pointer to a dispatcher object of type thread.
//
// WaitReason - supplies the reason for the wait operation.
//
// WaitMode - Supplies the processor wait mode.
//
// WaitObject - Supplies a pointer to a dispatcher object of type event
// or semaphore.
//
// Return Value:
//
// The wait completion status. A value of STATUS_SUCCESS is returned if
// the specified object satisfied the wait. A value of STATUS_USER_APC is
// returned if the wait was aborted to deliver a user APC to the current
// thread.
//--
NESTED_ENTRY(KiSwitchToThread, ExceptionFrameLength, zero)
lda sp, -ExceptionFrameLength(sp) // allocate context frame
stq ra, ExIntRa(sp) // save return address
stq s0, ExIntS0(sp) // save non-volatile integer registers
stq s1, ExIntS1(sp) //
stq s2, ExIntS2(sp) //
stq s3, ExIntS3(sp) //
stq s4, ExIntS4(sp) //
stq s5, ExIntS5(sp) //
stq fp, ExIntFp(sp) //
PROLOGUE_END
//
// Save the wait reason, the wait mode, and the wait object address.
//
// N.B. - Fill fields in the exception frame are used to save the
// client event address and the wait mode
//
stl a1, ExPsr + 4(sp) // save wait reason
stl a2, ExPsr + 8(sp) // save wait mode
stl a3, ExPsr +12(sp) // save wait object address
//
// If the target thread's kernel stack is resident, the target thread's
// process is in the balance set, the target thread can can run on the
// current processor, and another thread has not already been selected
// to run on the current processor, then do a direct dispatch to the
// target thread bypassing all the general wait logic, thread priorities
// permiting.
//
ldl s4, ThApcState + AsProcess(a0) // get target process address
ldq_u t0, ThKernelStackResident(a0) // get kernel stack resident
extbl t0, ThKernelStackResident % 8, t7 // extract byte field
GET_PROCESSOR_CONTROL_BLOCK_BASE // get address of PRCB
bis v0, zero, s0 // save PRCB in s0
ldq_u t1, PrState(s4) // get target process state
extbl t1, PrState % 8, t8 // extract byte field
ldl s1, PbCurrentThread(v0) // get current thread address
beq t7, LongWay // if eq, kernel stack not resident
xor t8, ProcessInMemory, t6 // check if process in memory
bis a0, zero, s2 // set target thread address
bne t6, LongWay // if ne, process not in memory
#if !defined(NT_UP)
ldl t0,PbNextThread(s0) // get address of next thread
ldl t1,PbSetMember(s0) // get processor set member
ldl t2,ThAffinity(s2) // get target thread affinity
bne t0, LongWay // if ne, next thread selected
and t1,t2,t3 // check for compatible affinity
beq t3, LongWay // if eq, affinity not compatible
#endif
//
// Compute the new thread priority.
//
// N.B. This code takes advantage of the fact that ThPriorityDecrement and
// ThBasePriority are contained in the same dword of the KTHREAD object.
//
#if ((ThBasePriority / 4) != (ThPriorityDecrement / 4))
#error "ThBasePriority and ThPriorityDecrement have moved"
#endif
ldq_u t12, ThPriority(s1) // get client thread priority
extbl t12, ThPriority % 8, t4 // extract byte field
ldq_u t11, ThPriority(s2) // get server thread priority
extbl t11, ThPriority % 8, t5 // extract byte field
cmpult t4, LOW_REALTIME_PRIORITY, v0 // check if realtime client
cmpult t5, LOW_REALTIME_PRIORITY, t10 // check if realtime server
beq v0, 60f // if eq, realtime client
ldq_u t9, ThPriorityDecrement(s2) // get priority decrement value
extbl t9, ThPriorityDecrement % 8, t6 // extract priority decrement byte
extbl t9, ThBasePriority % 8, t7 // extract base priority byte
beq t10, 65f // if eq, realtime server
addq t7, 1, t8 // compute boosted priority
bne t6, 30f // if ne, server boost active
//
// Both the client and the server are not realtime and a priority boost
// is not currently active for the server. Under these conditions an
// optimal switch to the server can be performed if the base priority
// of the server is above a minimum threshold or the boosted priority
// of the server is not less than the client priority.
//
cmpult t8, t4, v0 // check if high enough boost
cmpult t8, LOW_REALTIME_PRIORITY, t10 // check if less than realtime
lda t12, ThPriority(s2) // get address of thread priority byte
bne v0, 20f // if ne, boosted priority less
mskbl t11, t12, t11 // clear priority byte
cmoveq t10,LOW_REALTIME_PRIORITY-1,t8 // set maximum server priority
insbl t8, t12, t10 // get priority byte into position
bis t10, t11, t11 // merge
bic t12, 3, t12 // get longword address
extll t11, t12, t9 // extract stored longword
stl t9, 0(t12) // store new priority
br zero, 70f
//
// The boosted priority of the server is less than the current priority of
// the client. If the server base priority is above the required threshold,
// then a optimal switch to the server can be performed by temporarily
// raising the priority of the server to that of the client.
//
//
// N.B. This code takes advantage of the fact that ThPriorityDecrement,
// ThBasePriority, ThDecrementCount, and ThQuantum are contained in
// the same dword of the KTHREAD object.
//
#if ((ThBasePriority / 4) != (ThPriorityDecrement / 4))
#error "ThBasePriority and ThPriorityDecrement have moved"
#endif
#if ((ThBasePriority / 4) != (ThDecrementCount / 4))
#error "ThBasePriority and ThDecrementCount have moved"
#endif
#if ((ThBasePriority / 4) != (ThQuantum / 4))
#error "ThBasePriority and ThQuantum have moved"
#endif
20:
cmpult t7, BASE_PRIORITY_THRESHOLD, v0 // check if above threshold
subq t4, t7, t11 // compute priority decrement value
bne v0, LongWay // if ne[TRUE], priority below threshold
lda t10, ROUND_TRIP_DECREMENT_COUNT(zero) // get system decrement
mskbl t9, ThPriorityDecrement % 8, t9 // zero ThPriorityDecrement in source
mskbl t9, ThDecrementCount % 8, t9 // zero ThDecrementCount in source
insbl t11, ThPriorityDecrement % 8, t11 // extract new priority decrement byte
insbl t10, ThDecrementCount % 8, t10 // extract new DecrementCount
bis t9, t11, t9 // merge previous and priority decrement
bis t9, t10, t9 // merge ThDecrementCount
lda t12, ThBasePriority(s2) // get address to store result
bic t12, 3, t12 // make longword address
extll t9, t12, t10 // extract stored longword
stl t10, 0(t12) // store updated values
StoreByte(t4, ThPriority(s2)) //
br zero, 70f //
//
// A server boost has previously been applied to the server thread. Count
// down the decrement count to determine if another optimal server switch
// is allowed.
//
//
// N.B. This code takes advantage of the fact that ThPriorityDecrement and
// ThDecrementCount are contained in the same dword of the KTHREAD object.
//
#if ((ThDecrementCount / 4) != (ThPriorityDecrement / 4))
#error "ThDecrementCount and ThPriorityDecrement have moved"
#endif
30:
extbl t9, ThDecrementCount % 8, a5 // get original count
lda t12, ThDecrementCount(s2) //
mskbl t9, t12, t11 // clear count byte
subq a5, 1, a5 // decrement original count
insbl a5, t12, a5 // get new count into position
bic t12, 3, t12 // get the longword address
bis t11, a5, t11 // merge in new count
extll t11, t12, t11 // get the longword to store
stl t11, 0(t12) // store updated count
beq a5, 40f // optimal switches exhausted
//
// Another optimal switch to the server is allowed provided that the
// server priority is not less than the client priority.
//
cmpult t5, t4, v0 // check if server lower priority
beq v0, 70f // if eq[FALSE], server not lower
br zero, LongWay //
//
// The server has exhausted the number of times an optimal switch may
// be performed without reducing it priority. Reduce the priority of
// the server to its original unboosted value minus one.
//
40:
StoreByte( zero, ThPriorityDecrement(s2) ) // clear server priority decr
StoreByte( t7, ThPriority(s2) ) // set server priority to base
br zero, LongWay //
//
// The client is realtime. In order for an optimal switch to occur, the
// server must also be realtime and run at a high or equal priority.
//
60:
cmpult t5, t4, v0 // check if server is lower priority
bne v0, LongWay // if ne, server is lower priority
65:
ldq_u t12, PrThreadQuantum(s4)
extbl t12, PrThreadQuantum % 8, t11 // get process quantum value
StoreByte( t11, ThQuantum(s2) ) // set server thread quantum
//
// An optimal switch to the server can be executed.
//
//
// Set the next processor for the server thread.
//
70:
#if !defined(NT_UP)
ldl t1, PbNumber(s0) // set server next processor number
StoreByte(t1, ThNextProcessor(s2))
#endif
//
// Set the address of the wait block list in the client thread, complete
// the initialization of the builtin event wait block, and insert the wait
// block in client event wait list.
//
lda t3, EVENT_WAIT_BLOCK_OFFSET(s1) // compute wait block address
stl t3, ThWaitBlockList(s1) // set address of wait block list
stl zero, ThWaitStatus(s1) // set initial wait status
stl a3, WbObject(t3) // set address of wait object
stl t3, WbNextWaitBlock(t3) // set next wait block address
ldah t1, WaitAny(zero) // get wait type and wait key
stl t1, WbWaitKey(t3) // set wait type and wait key
lda t2, EvWaitListHead(a3) // compute event wait listhead address
ldl t5, LsBlink(t2) // get backward link of listhead
lda t6, WbWaitListEntry(t3) // compute wait block list entry address
stl t6, LsBlink(t2) // set backward link of listhead
stl t6, LsFlink(t5) // set forward link in last entry
stl t2, LsFlink(t6) // set forward link in wait entry
stl t5, LsBlink(t6) // set backward link in wait entry
//
// Set the client thread wait parameters, set the thread state to Waiting,
// and insert the thread in the wait list.
//
StoreByte( zero, ThAlertable(s1) ) // set alertable FALSE
StoreByte( a1, ThWaitReason(s1) )
StoreByte( a2, ThWaitMode(s1) ) // set the wait mode
ldq_u t7, ThEnableStackSwap(s1) // get kernel stack swap enable
extbl t7, ThEnableStackSwap % 8, a3
ldl t6, KeTickCount // get low part of tick count
stl t6, ThWaitTime(s1) // set thread wait time
ldil t3, Waiting // set thread state
StoreByte( t3, ThState(s1) ) //
lda t1, KiWaitInListHead // get address of wait in listhead
beq a2, 75f // if eq, wait mode is kernel
beq a3, 75f // if eq, kernel stack swap disabled
cmpult t4, LOW_REALTIME_PRIORITY + 9, v0 // check if priority in range
bne v0, 76f // if ne, thread priority in range
75: lda t1, KiWaitOutListHead // get address of wait out listhead
76: ldl t5, LsBlink(t1) // get backlink of wait listhead
lda t6, ThWaitListEntry(s1) // compute client wait list entry addr
stl t6, LsBlink(t1) // set backward link of listhead
stl t6, LsFlink(t5) // set forward link in last entry
stl t1, LsFlink(t6) // set forward link in wait entry
stl t5, LsBlink(t6) // set backward link in wait entry
//
// If the current thread is processing a queue entry, then attempt to
// activate another thread that is blocked on the queue object.
//
// N.B. The next thread address can change if the routine to activate
// a queue waiter is called.
//
77: ldl a0, ThQueue(s1) // get queue object address
beq a0, 78f // if eq, no queue object attached
stl s2, PbNextThread(s0)
bsr ra, KiActivateWaiterQueue // attempt to activate a blocked thread
ldl s2, PbNextThread(s0) // get next thread address
stl zero, PbNextThread(s0) // set next thread address to NULL
78: stl s2, PbCurrentThread(s0) // set address of current thread object
bsr ra, SwapContext // swap context
//
// On return from SwapContext, s2 is pointer to thread object.
//
ldq_u v0, ThWaitIrql(s2)
extbl v0, ThWaitIrql % 8, a0 // get original Irql
ldl t0, ThWaitStatus(s2) // get wait completion status
//
// Lower IRQL to its previous level.
//
// N.B. SwapContext releases the dispatcher database lock.
//
SWAP_IRQL // v0 = previous Irql
//
// If the wait was not interrupted to deliver a kernel APC, then return the
// completion status.
//
bis t0, zero, v0 // v0 = wait completion status
xor t0, STATUS_KERNEL_APC, t1 // check if awakened for kernel APC
bne t1, 90f // if ne, normal wait completion
//
// Raise IRQL to synchronization level and acquire the dispatcher database lock.
//
// N.B. The raise IRQL code is duplicated here to avoid any extra overhead
// since this is such a common operation.
//
ldl a0, KiSynchIrql // get new IRQL level
SWAP_IRQL // v0 = previous Irql
StoreByte( v0, ThWaitIrql(s2) ) // set client wait Irql
//
// Acquire the dispatcher database lock.
//
#if !defined(NT_UP)
lda t2, KiDispatcherLock // get current lock value address
80:
ldl_l t3, 0(t2) // get current lock value
bis s2, zero, t4 // set ownership value
bne t3, 85f // if ne, spin lock owned
stl_c t4, 0(t2) // set spin lock owned
beq t4, 85f // if eq, store conditional failed
mb // synchronize subsequent reads after
// the lock is acquired
#endif
ldl t1, ExPsr + 4(sp) // restore client event address
ldl t2, ExPsr + 8(sp) // restore wait mode
br zero, ContinueWait //
//
// Ready the target thread for execution and wait on the specified wait
// object.
//
LongWay:
bsr ra, KiReadyThread // ready thread for execution
//
// Continue and return the wait completion status.
//
// N.B. The wait continuation routine is called with the dispatcher
// database locked.
//
ContinueWait:
ldl a0, ExPsr+12(sp) // get wait object address
ldl a1, ExPsr+4(sp) // get wait reason
ldl a2, ExPsr+8(sp) // get wait mode
bsr ra, KiContinueClientWait // continue client wait
90:
ldq s0, ExIntS0(sp) // restore registers s0 - fp
ldq s1, ExIntS1(sp) //
ldq s2, ExIntS2(sp) //
ldq s3, ExIntS3(sp) //
ldq s4, ExIntS4(sp) //
ldq s5, ExIntS5(sp) //
ldq fp, ExIntFp(sp) //
ldq ra, ExIntRa(sp) // restore return address
lda sp, ExceptionFrameLength(sp) // deallocate context frame
ret zero, (ra) // return
#if !defined(NT_UP)
85:
bis v0, zero, a0 // lower back down to old IRQL
SWAP_IRQL
86:
ldl t3, 0(t2) // read current lock value
bne t3, 86b // loop in cache until lock available
ldl a0, KiSynchIrql // raise back to sync level to retry acquire
SWAP_IRQL // restore old IRQL to v0
br zero, 80b // retry spinlock acquisition
#endif //NT_UP
.end KiSwitchToThread
SBTTL("Unlock Dispatcher Database")
//++
//
// VOID
// KiUnlockDispatcherDatabase (
// IN KIRQL OldIrql
// )
//
// Routine Description:
//
// This routine is entered at IRQL DISPATCH_LEVEL with the dispatcher
// database locked. Ifs function is to either unlock the dispatcher
// database and return or initiate a context switch if another thread
// has been selected for execution.
//
// N.B. A context switch CANNOT be initiated if the previous IRQL
// is DISPATCH_LEVEL.
//
// N.B. This routine is carefully written to be a leaf function. If,
// however, a context swap should be performed, the routine is
// switched to a nested fucntion.
//
// Arguments:
//
// OldIrql (a0) - Supplies the IRQL when the dispatcher database
// lock was acquired.
//
// Return Value:
//
// None.
//
//--
LEAF_ENTRY(KiUnlockDispatcherDatabase)
//
// Check if a thread has been scheduled to execute on the current processor
//
GET_PROCESSOR_CONTROL_BLOCK_BASE // v0 = PRCB
cmpult a0, DISPATCH_LEVEL, t1 // check if IRQL below dispatch level
ldl t2, PbNextThread(v0) // get next thread address
bne t2, 30f // if ne, next thread selected
//
// Release dispatcher database lock, restore IRQL to its previous level
// and return
//
10:
#if !defined(NT_UP)
mb
stl zero, KiDispatcherLock
#endif
SWAP_IRQL
ret zero, (ra)
//
// A new thread has been selected to run on the current processor, but
// the new IRQL is not below dispatch level. If the current processor is
// not executing a DPC, then request a dispatch interrupt on the current
// processor before releasing the dispatcher lock and restoring IRQL.
//
20:
ldl t2, PbDpcRoutineActive(v0)
bne t2,10b // if eq, DPC active
#if !defined(NT_UP)
mb
stl zero, KiDispatcherLock
#endif
SWAP_IRQL
ldil a0, DISPATCH_LEVEL // set interrupt request level
REQUEST_SOFTWARE_INTERRUPT // request DPC interrupt
ret zero, (ra)
//
// A new thread has been selected to run on the current processor.
//
// If the new IRQL is less than dispatch level, then switch to the new
// thread.
//
30: beq t1, 20b // if eq, not below dispatch level
.end KiUnlockDispatcherDatabase
//
// N.B. This routine is carefully written as a nested function.
// Control only reaches this routine from above.
//
// v0 contains the address of PRCB
// t2 contains the next thread
//
NESTED_ENTRY(KxUnlockDispatcherDatabase, ExceptionFrameLength, zero)
lda sp, -ExceptionFrameLength(sp) // allocate context frame
stq ra, ExIntRa(sp) // save return address
stq s0, ExIntS0(sp) // save integer registers
stq s1, ExIntS1(sp)
stq s2, ExIntS2(sp)
stq s3, ExIntS3(sp)
stq s4, ExIntS4(sp)
stq s5, ExIntS5(sp)
stq fp, ExIntFp(sp)
PROLOGUE_END
bis v0, zero, s0 // set address of PRCB
GET_CURRENT_THREAD // get current thread address
bis v0, zero, s1
bis t2, zero, s2 // set next thread address
StoreByte(a0, ThWaitIrql(s1)) // save previous IRQL
stl zero, PbNextThread(s0) // clear next thread address
//
// Reready current thread for execution and swap context to the selected thread.
//
// N.B. The return from the call to swap context is directly to the swap
// thread exit.
//
bis s1, zero, a0 // set address of previous thread object
stl s2, PbCurrentThread(s0) // set address of current thread object
bsr ra, KiReadyThread // reready thread for execution
lda ra, KiSwapThreadExit // set return address
jmp SwapContext // swap context
.end KxUnlockDispatcherDatabase
SBTTL("Swap Thread")
//++
//
// VOID
// KiSwapThread (
// VOID
// )
//
// Routine Description:
//
// This routine is called to select the next thread to run on the
// current processor and to perform a context switch to the thread.
//
// Arguments:
//
// None.
//
// Return Value:
//
// Wait completion status (v0).
//
//--
NESTED_ENTRY(KiSwapThread, ExceptionFrameLength, zero)
lda sp, -ExceptionFrameLength(sp) // allocate context frame
stq ra, ExIntRa(sp) // save return address
stq s0, ExIntS0(sp) // save integer registers s0 - s5
stq s1, ExIntS1(sp) //
stq s2, ExIntS2(sp) //
stq s3, ExIntS3(sp) //
stq s4, ExIntS4(sp) //
stq s5, ExIntS5(sp) //
stq fp, ExIntFp(sp) // save fp
PROLOGUE_END
GET_PROCESSOR_CONTROL_REGION_BASE //
bis v0, zero, s3 // get PCR in s3
ldl s0, PcPrcb(s3) // get address of PRCB
ldl s5, KiReadySummary // get ready summary in s5
zapnot s5, 0x0f, t0 // clear high 32 bits.
GET_CURRENT_THREAD
bis v0, zero, s1 // get current thread address
ldl s2, PbNextThread(s0) // get next thread address
#if !defined(NT_UP)
ldl fp, PcSetMember(s3) // get processor affinity mask
#endif
stl zero, PbNextThread(s0) // zero next thread address
bne s2, 120f // if ne, next thread selected
//
// Find the highest nibble in the ready summary that contains a set bit
// and left justify so the nibble is in bits <63:60>.
//
cmpbge zero, t0, s4 // generate 4-bit mask with clear
// bits representing nonzero bytes.
ldil t2, 7 // initial bit number
srl s4, 1, t5 // check bits <15:8>
cmovlbc t5, 15, t2 // if bit clear, bit number = 15
srl s4, 2, t6 // check bits <23:16>
cmovlbc t6, 23, t2 // if bit clear, bit number = 23
srl s4, 3, t7 // check bits <31:24>
cmovlbc t7, 31, t2 // if bit clear, bit number = 31
bic t2, 7, t3 // get byte shift from priority
srl t0, t3, s4 // isolate highest nonzero byte
and s4, 0xf0, t4 // check if high nibble nonzero
subq t2, 4, t1 // compute bit number if high nibble zero
cmoveq t4, t1, t2 // if eq, high nibble zero
10:
ornot zero, t2, t4 // compute left justify shift count
sll t0, t4, t0 // left justify ready summary to nibble
//
// If the next bit is set in the ready summary, then scan the corresponding
// dispatcher ready queue.
//
30:
blt t0, 50f // if ltz, queue contains an entry
31:
sll t0, 1, t0 // position next ready summary bit
subq t2, 1, t2 // decrement ready queue priority
bne t0, 30b // if ne, more queues to scan
//
// All ready queues were scanned without finding a runnable thread so
// default to the idle thread and set the appropirate bit in idle summary.
//
#if defined(_COLLECT_SWITCH_DATA_)
lda t0, KeThreadSwitchCounters // get switch counters address
ldl v0, TwSwitchToIdle(t0) // increment switch to idle
addq v0, 1, v0 //
stl v0, TwSwitchToIdle(t0) //
#endif
#if defined(NT_UP)
ldil t0, 1 // get current idle summary
#else
ldl t0, KiIdleSummary // get current idle summary
bis t0, fp, t0 // set member bit in idle summary
#endif
stl t0, KiIdleSummary // set new idle summary
ldl s2, PbIdleThread(s0) // set address of idle thread
br zero, 120f // swap context
50:
lda t1, KiDispatcherReadyListHead // get ready list head base address
s8addq t2, t1, s4 // compute ready queue address
ldl t4, LsFlink(s4) // get address of next queue entry
55:
subq t4, ThWaitListEntry, s2 // compute address of thread object
#if !defined(NT_UP)
//
// If the thread can execute on the current processor, then remove it from
// the dispatcher ready queue.
//
ldl t5, ThAffinity(s2) // get thread affinity
and t5, fp, t6 // the current processor
bne t6, 60f // if ne, thread affinity compatible
ldl t4, LsFlink(t4) // get address of next entry
cmpeq t4, s4, t1 // check for end of list
beq t1, 55b // if eq, not end of list
br zero, 31b //
60:
//
// If the thread last ran on the current processor, the processor is the
// ideal processor for the thread, the thread has been waiting for longer
// than a quantum, ot its priority is greater than low realtime plus 9,
// then select the thread. Otherwise, an attempt is made to find a more
// appropriate candidate.
//
ldq_u t1, PcNumber(s3) // get current processor number
extbl t1, PcNumber % 8, t12 //
ldq_u t11, ThNextProcessor(s2) // get thread's last processor number
extbl t11, ThNextProcessor % 8, t9 //
cmpeq t9, t12, t5 // check thread's last processor
bne t5, 110f // if eq, last processor match
ldq_u t6, ThIdealProcessor(s2) // get thread's ideal processor number
extbl t6, ThIdealProcessor % 8, a3 //
cmpeq a3, t12, t8 // check thread's ideal processor
bne t8, 100f // if eq, ideal processor match
ldl t6, KeTickCount // get low part of tick count
ldl t7, ThWaitTime(s2) // get time of thread ready
subq t6, t7, t8 // compute length of wait
cmpult t8, READY_SKIP_QUANTUM+1, t1 // check if wait time exceeded
cmpult t2, LOW_REALTIME_PRIORITY+9, t3 // check if priority in range
and t1, t3, v0 // check if priority and time match
beq v0, 100f // if eq, select this thread
//
// Search forward in the ready queue until the end of the list is reached
// or a more appropriate thread is found.
//
ldl t7, LsFlink(t4) // get address of next entry
80: cmpeq t7, s4, t1 // if eq, end of list
bne t1, 100f // select original thread
subq t7, ThWaitListEntry, a0 // compute address of thread object
ldl a2, ThAffinity(a0) // get thread affinity
and a2, fp, t1 // check for compatibile thread affinity
beq t1, 85f // if eq, thread affinity not compatible
ldq_u t5, ThNextProcessor(a0) // get last processor number
extbl t5, ThNextProcessor % 8, t9 //
cmpeq t9, t12, t10 // if eq, processor number match
bne t10, 90f //
ldq_u a1, ThIdealProcessor(a0) // get ideal processor number
extbl a1, ThIdealProcessor % 8, a3
cmpeq a3, t12, t10 // if eq, ideal processor match
bne t10, 90f
85: ldl t8, ThWaitTime(a0) // get time of thread ready
ldl t7, LsFlink(t7) // get address of next entry
subq t6, t8, t8 // compute length of wait
cmpult t8, READY_SKIP_QUANTUM+1, t5 //
bne t5, 80b // if ne, wait time not exceeded
br zero, 100f // select original thread
90: bis a0, zero, s2 // set thread address
bis t7, zero, t4 // set list entry address
bis t5, zero, t11 // copy last processor data
100: insbl t12, ThNextProcessor % 8, t8 // move next processor into position
mskbl t11, ThNextProcessor % 8, t5 // mask next processor position
bis t8, t5, t6 // merge
stq_u t6, ThNextProcessor(s2) // update next processor
110:
#if defined(_COLLECT_SWITCH_DATA_)
ldq_u t5, ThNextProcessor(s2) // get last processor number
extbl t5, ThNextProcessor % 8, t9 //
ldq_u a1, ThIdealProcessor(s2) // get ideal processor number
extbl a1, ThIdealProcessor % 8, a3
lda t0, KeThreadSwitchCounters + TwFindAny // compute address of Any counter
addq t0, TwFindIdeal-TwFindAny, t1 // compute address of Ideal counter
cmpeq t9, t12, t7 // if eq, last processor match
addq t0, TwFindLast-TwFindAny, t6 // compute address of Last counter
cmpeq a3, t12, t5 // if eq, ideal processor match
cmovne t7, t6, t0 // if last match, use last counter
cmovne t5, t1, t0 // if ideal match, use ideal counter
ldl v0, 0(t0) // increment counter
addq v0, 1, v0 //
stl v0, 0(t0) //
#endif
#endif
ldl t5, LsFlink(t4) // get list entry forward link
ldl t6, LsBlink(t4) // get list entry backward link
stl t5, LsFlink(t6) // set forward link in previous entry
stl t6, LsBlink(t5) // set backward link in next entry
cmpeq t6, t5, t7 // if eq, list is empty
beq t7, 120f //
ldil t1, 1 // compute ready summary set member
sll t1, t2, t1 //
xor t1, s5, t1 // clear member bit in ready summary
stl t1, KiReadySummary //
//
// Swap context to the next thread
//
120:
stl s2, PbCurrentThread(s0) // set address of current thread object
bsr ra, SwapContext // swap context
ALTERNATE_ENTRY(KiSwapThreadExit)
//
// Lower IRQL, deallocate context frame, and return wait completion status.
//
// N.B. SwapContext releases the dispatcher database lock.
//
// N.B. The register v0 contains the complement of the kernel APC pending state.
//
// N.B. The register s2 contains the address of the new thread.
//
ldl s1, ThWaitStatus(s2) // get wait completion status
ldq_u t1, ThWaitIrql(s2) // get original IRQL
extbl t1, ThWaitIrql % 8, a0 //
bis v0, a0, t3 // check if APC pending and IRQL is zero
bne t3, 10f
//
// Lower IRQL to APC level and dispatch APC interrupt.
//
ldil a0, APC_LEVEL
SWAP_IRQL
ldil a0, APC_LEVEL
DEASSERT_SOFTWARE_INTERRUPT
GET_PROCESSOR_CONTROL_BLOCK_BASE // get PRCB in v0
ldl t1, PbApcBypassCount(v0) // increment the APC bypass count
addl t1, 1, t2
stl t2, PbApcBypassCount(v0) // store result
bis zero, zero, a0 // set previous mode to kernel
bis zero, zero, a1 // set exception frame address
bis zero, zero, a2 // set trap frame address
bsr ra, KiDeliverApc // deliver kernel mode APC
bis zero, zero, a0 // set original wait IRQL
//
// Lower IRQL to wait level, set return status, restore registers, and
// return.
//
10:
SWAP_IRQL
bis s1, zero, v0
ldq ra, ExIntRa(sp) // restore return address
ldq s0, ExIntS0(sp) // restore int regs S0-S5
ldq s1, ExIntS1(sp) //
ldq s2, ExIntS2(sp) //
ldq s3, ExIntS3(sp) //
ldq s4, ExIntS4(sp) //
ldq s5, ExIntS5(sp) //
ldq fp, ExIntFp(sp) // restore fp
lda sp, ExceptionFrameLength(sp) // deallocate context frame
ret zero, (ra) // return
98:
subq t2, 1, t2 // decrement ready queue priority
subq s4, 8, s4 // advance to next ready queue
sll t0, 1, t0 // position next ready summary bit
bne t0, 40b // if ne, more queues to scan
.end KiSwapThread
SBTTL("Dispatch Interrupt")
//++
//
// Routine Description:
//
// This routine is entered as the result of a software interrupt generated
// at DISPATCH_LEVEL. Its function is to process the Deferred Procedure Call
// (DPC) list, and then perform a context switch if a new thread has been
// selected for execution on the processor.
//
// This routine is entered at IRQL DISPATCH_LEVEL with the dispatcher
// database unlocked. When a return to the caller finally occurs, the
// IRQL remains at DISPATCH_LEVEL, and the dispatcher database is still
// unlocked.
//
// N.B. On entry to this routine only the volatile integer registers have
// been saved. The volatile floating point registers have not been saved.
//
// Arguments:
//
// fp - Supplies a pointer to the base of a trap frame.
//
// Return Value:
//
// None.
//
//--
.struct 0
DpSp: .space 8 // saved stack pointer
DpBs: .space 8 // base of previous stack
DpcFrameLength: // DPC frame length
NESTED_ENTRY(KiDispatchInterrupt, ExceptionFrameLength, zero)
lda sp, -ExceptionFrameLength(sp) // allocate context frame
stq ra, ExIntRa(sp) // save return address
//
// Save the saved registers in case we context switch to a new thread.
//
// N.B. - If we don't context switch then we need only restore those
// registers that we use in this routine, currently those registers
// are s0, s1
//
stq s0, ExIntS0(sp) // save integer registers s0-s6
stq s1, ExIntS1(sp) //
stq s2, ExIntS2(sp) //
stq s3, ExIntS3(sp) //
stq s4, ExIntS4(sp) //
stq s5, ExIntS5(sp) //
stq fp, ExIntFp(sp) //
PROLOGUE_END
//
// Increment the dispatch interrupt count
//
GET_PROCESSOR_CONTROL_BLOCK_BASE //
bis v0, zero, s0 // s0 = base address of PRCB
ldl t2, PbDispatchInterruptCount(s0) // get old dispatch interrupt count
addl t2, 1, t3 // increment dispatch interrupt count
stl t3, PbDispatchInterruptCount(s0) // set new dispatch interrupt count
//
// Process the DPC List with interrupts off.
//
ldl t0, PbDpcQueueDepth(s0) // get current queue depth
beq t0, 20f // no DPCs, check quantum end
PollDpcList:
DISABLE_INTERRUPTS
//
// Save current initial stack address and set new initial stack address.
//
GET_PROCESSOR_CONTROL_REGION_BASE // v0 = PCR address
ldl a0, PcDpcStack(v0) // get address of DPC stack
lda t0, -DpcFrameLength(a0) // allocate DPC frame
stq sp, DpSp(t0) // save old stack pointer
bis t0, t0, sp // set new stack pointer
SET_INITIAL_KERNEL_STACK // a = new, v0 = previous
stq v0, DpBs(sp) // save current initial stack
bsr ra, KiRetireDpcList // process the DPC list
//
// Switch back to previous stack and restore the initial stack limit.
//
ldq a0, DpBs(sp) // get previous initial stack address
SET_INITIAL_KERNEL_STACK // set current initial stack
ldq sp, DpSp(sp) // restore stack pointer
ENABLE_INTERRUPTS
//
// Check to determine if quantum end has occured.
//
20:
ldl t0, PbQuantumEnd(s0) // get quantum end indicator
beq t0, 25f // if eq, no quantum end request
stl zero, PbQuantumEnd(s0) // clear quantum end indicator
bsr ra, KiQuantumEnd // process quantum end request
beq v0, 50f // if eq, no next thread, return
bis v0, zero, s2 // set next thread
br zero, 40f // else restore interrupts and return
//
// Determine if a new thread has been selected for execution on
// this processor.
//
25: ldl v0, PbNextThread(s0) // get address of next thread object
beq v0, 50f // if eq, no new thread selected
//
// Lock dispatcher database and reread address of next thread object
// since it is possible for it to change in mp sysytem
//
#if !defined(NT_UP)
lda s1, KiDispatcherLock // get dispatcher base lock address
#endif
30:
ldl a0, KiSynchIrql
SWAP_IRQL
#if !defined(NT_UP)
ldl_l t0, 0(s1) // get current lock value
bis s1, zero, t1 // t1 = lock ownership value
bne t0, 45f // ne => spin lock owned
stl_c t1, 0(s1) // set lock to owned
beq t1, 45f // zero => stl_c failed
mb // synchronize subsequent reads after
// the spinlock is acquired
#endif
//
// Reready current thread for execution and swap context to the selected thread.
//
ldl s2, PbNextThread(s0) // get addr of next thread
40:
GET_CURRENT_THREAD // v0 = address of current thread
bis v0, zero, s1 // s1 = address of current thread
stl zero, PbNextThread(s0) // clear address of next thread
bis s1, zero, a0 // parameter to KiReadyThread
stl s2, PbCurrentThread(s0) // set address of current thread
bsr ra, KiReadyThread // reready thread for execution
bsr ra, KiSaveVolatileFloatState
bsr ra, SwapContext // swap context
//
// Restore the saved integer registers that were changed for a context
// switch only.
//
// N.B. - The frame pointer must be restored before the volatile floating
// state because it is the pointer to the trap frame.
//
ldq s2, ExIntS2(sp) // restore s2 - s5
ldq s3, ExIntS3(sp) //
ldq s4, ExIntS4(sp) //
ldq s5, ExIntS5(sp) //
ldq fp, ExIntFp(sp) // restore the frame pointer
bsr ra, KiRestoreVolatileFloatState
//
// Restore the remaining saved integer registers and return.
//
50:
ldq s0, ExIntS0(sp) // restore s0 - s1
ldq s1, ExIntS1(sp) //
ldq ra, ExIntRa(sp) // get return address
lda sp, ExceptionFrameLength(sp) // deallocate context frame
ret zero, (ra) // return
#if !defined(NT_UP)
45:
//
// Dispatcher lock is owned, spin on both the the dispatcher lock and
// the DPC queue going not empty.
//
bis v0, zero, a0 // lower back to original IRQL to wait for locks
SWAP_IRQL
48:
ldl t0, 0(s1) // read current dispatcher lock value
beq t0, 30b // lock available. retry spinlock
ldl t1, PbDpcQueueDepth(s0) // get current DPC queue depth
bne t1, PollDpcList // if nez, list not empty
br zero, 48b // loop in cache until lock available
#endif
.end KiDispatchInterrupt
SBTTL("Swap Context to Next Thread")
//++
//
// Routine Description:
//
// This routine is called to swap context from one thread to the next.
//
// Arguments:
//
// s0 - Address of Processor Control Block (PRCB).
// s1 - Address of previous thread object.
// s2 - Address of next thread object.
// sp - Pointer to a exception frame.
//
// Return value:
//
// v0 - complement of Kernel APC pending.
// s2 - Address of current thread object.
//
//--
NESTED_ENTRY(SwapContext, 0, zero)
stq ra, ExSwapReturn(sp) // save return address
PROLOGUE_END
//
// Set new thread's state to running. Note this must be done
// under the dispatcher lock so that KiSetPriorityThread sees
// the correct state.
//
ldil t0, Running // set state of new thread to running
StoreByte( t0, ThState(s2) ) //
#if !defined(NT_UP)
//
// Acquire the context swap lock so the address space of the old thread
// cannot be deleted and then release the dispatcher database lock.
//
// N.B. This lock is used to protect the address space until the context
// switch has sufficiently progressed to the point where the address
// space is no longer needed. This lock is also acquired by the reaper
// thread before it finishes thread termination.
//
lda t0, KiContextSwapLock // get context swap lock value address
10:
ldl_l t1, 0(t0) // get current lock value
bis t0, zero, t2 // set ownership value
bne t1, 11f // if ne, lock already owned
stl_c t2, 0(t0) // set lock ownership value
beq t2, 11f // if eq, store conditional failed
mb // synchronize reads and writes
stl zero, KiDispatcherLock // set lock not owned
#endif
#if defined(PERF_DATA)
//
// Accumulate the total time spent in a thread.
//
bis zero,zero,a0 // optional frequency not required
bsr ra, KeQueryPerformanceCounter // 64-bit cycle count in v0
ldq t0, PbStartCount(s0) // get starting cycle count
stq v0, PbStartCount(s0) // set starting cycle count
ldl t1, EtPerformanceCountHigh(s1) // get accumulated cycle count high
sll t1, 32, t2
ldl t3, EtPerformanceCountLow(s1) // get accumulated cycle count low
zap t3, 0xf0, t4 // zero out high dword sign extension
bis t2, t4, t3
subq v0, t0, t5 // compute elapsed cycle count
addq t5, t3, t4 // compute new cycle count
stl t4, EtPerformanceCountLow(s1) // set new cycle count in thread
srl t4, 32, t2
stl t2, EtPerformanceCountHigh(s1)
#endif
bsr ra, KiSaveNonVolatileFloatState // save nv floating state
ALTERNATE_ENTRY(SwapFromIdle)
//
// Get address of old and new process objects.
//
ldl s5, ThApcState + AsProcess(s1) // get address of old process
ldl s4, ThApcState + AsProcess(s2) // get address of new process
//
// Save the current PSR in the context frame, store the kernel stack pointer
// in the previous thread object, load the new kernel stack pointer from the
// new thread object, load the ptes for the new kernel stack in the DTB
// stack, select and new process id and swap to the new process, and restore
// the previous PSR from the context frame.
//
DISABLE_INTERRUPTS // disable interrupts
// v0 = current psr
ldl a0, ThInitialStack(s2) // get initial kernel stack pointer
stl sp, ThKernelStack(s1) // save old kernel stack pointer
bis s2, zero, a1 // new thread address
ldl a2, ThTeb(s2) // get address of user TEB
#ifdef NT_UP
//
// On uni-processor systems keep the global current thread address
// up to date.
//
stl a1, KiCurrentThread // save new current thread
#endif //NT_UP
//
// If the old process is the same as the new process, then there is no need
// to change the address space. The a3 parameter indicates that the address
// space is not to be swapped if it is less than zero. Otherwise, a3 will
// contain the pfn of the PDR for the new address space.
//
ldil a3, -1 // assume no address space change
bis zero, zero, a4 // assume ASN = 0
bis zero, 1, a5 // assume ASN wrap
bis zero, zero, t3 // show MAX ASN=0
cmpeq s5, s4, t0 // old process = new process?
bne t0, 40f // if ne[true], no address space swap
#if !defined(NT_UP)
//
// Update the processor set masks. Clear the processor set member
// number in the old process and set the processor member number in the
// new process.
//
GET_PROCESSOR_CONTROL_REGION_BASE // get PCR pointer in v0
ldl t0, PcSetMember(v0) // get processor set mask
ldl t1, PrActiveProcessors(s5) // get old active processor set
ldl t2, PrActiveProcessors(s4) // get new active processor set
bic t1, t0, t3 // clear processor member in set
bis t2, t0, t4 // set processor member in set
stl t3, PrActiveProcessors(s5) // set old active processor set
stl t4, PrActiveProcessors(s4) // set new active processor set
#endif
ldl a3, PrDirectoryTableBase(s4) // get page directory PDE
srl a3, PTE_PFN, a3 // pass pfn only
//
// If the maximum address space number is zero, then we know to assign
// ASN of zero to this process, just do it.
//
ldl t3, KiMaximumPid // get MAX ASN
beq t3, 40f // if eq, only ASN=0
//
// If the process sequence number matches the master sequence number then
// use the process ASN. Otherwise, allocate a new ASN. When allocating
// a new ASN check for ASN wrapping and handle it.
//
bis zero, zero, a5 // assume tbiap = FALSE
GET_PROCESSOR_CONTROL_REGION_BASE
ldl t4, PcCurrentPid(v0) // get current processor PID
addl t4, 1, a4 // increment PID
cmpule a4, t3, t6 // is new PID le max?
cmoveq t6, t3, a5 // if eq[false], set tbiap indicator
cmoveq t6, zero, a4 // if eq[false], new PID is zero
stl a4, PcCurrentPid(v0) // set current processor PID
40:
//
// Release the context swap lock, swap context, and enable interrupts
//
#if !defined(NT_UP)
mb // synchronize all previous writes
// before releasing the spinlock
stl zero, KiContextSwapLock // set spin lock not owned
#endif
// a0 = initial ksp of new thread
// a1 = new thread address
// a2 = new TEB
// a3 = PDR of new address space or -1
// a4 = new ASN
// a5 = ASN wrap indicator
SWAP_THREAD_CONTEXT // swap thread
ldl sp, ThKernelStack(s2) // get new kernel stack pointer
ENABLE_INTERRUPTS // turn on interrupts
//
// If the new thread has a kernel mode APC pending, then request an
// APC interrupt.
//
ldil v0, 1 // set no apc pending
LoadByte(t0, ThApcState + AsKernelApcPending(s2)) // get kernel APC pendng
ldl t2, ExPsr(sp) // get previous processor status
beq t0, 50f // if eq no apc pending
ldil a0, APC_INTERRUPT // request an apc interrupt
REQUEST_SOFTWARE_INTERRUPT //
bis zero, zero, v0 // set APC pending
50:
//
// Count number of context switches
//
ldl t1, PbContextSwitches(s0) // increment number of switches
addl t1, 1, t1 //
stl t1, PbContextSwitches(s0) // store result
ldl t0, ThContextSwitches(s2) // increment number of context
addq t0, 1, t0 // switches for thread
stl t0, ThContextSwitches(s2) // store result
//
// Restore the nonvolatile floating state.
//
bsr ra, KiRestoreNonVolatileFloatState
//
// load RA and return with address of current thread in s2
//
ldq ra, ExSwapReturn(sp) // get return address
ret zero, (ra) // return
11:
ldl t1, 0(t0) // spin in cache until lock looks free
beq t1, 10b
br zero, 11b // retry
.end SwapContext
SBTTL("Swap Process")
//++
//
// BOOLEAN
// KiSwapProcess (
// IN PKPROCESS NewProcess
// IN PKPROCESS OldProcess
// )
//
// Routine Description:
//
// This function swaps the address space from one process to another by
// assigning a new ASN if necessary and calling the palcode to swap
// the privileged portion of the process context (the page directory
// base pointer and the ASN). This function also maintains the processor
// set for both processes in the switch.
//
// Arguments:
//
// NewProcess (a0) - Supplies a pointer to a control object of type process
// which represents the new process to switch to.
//
// OldProcess (a1) - Supplies a pointer to a control object of type process
// which represents the old process to switch from..
//
// Return Value:
//
// None.
//
//--
LEAF_ENTRY(KiSwapProcess)
//
// Acquire the context swap lock, clear the processor set member in he old
// process, set the processor member in the new process, and release the
// context swap lock.
//
GET_PROCESSOR_CONTROL_REGION_BASE // get PCR pointer in v0
#if !defined(NT_UP)
lda t7, KiContextSwapLock // get context swap lock address
10:
ldl_l t0, 0(t7) // get current lock value
bis t7, zero, t1 // set ownership value
bne t0, 15f // if ne, lock already owned
stl_c t1, 0(t7) // set lock ownership value
beq t1, 15f // if eq, store conditional failed
mb // synchronize subsequent reads
ldl t0, PcSetMember(v0) // get processor set mask
ldl t1, PrActiveProcessors(a1) // get old active processor set
ldl t2, PrActiveProcessors(a0) // get new active processor set
bic t1, t0, t1 // clear processor member in set
bis t2, t0, t2 // set processor member in set
stl t1, PrActiveProcessors(a1) // set old active processor set
stl t2, PrActiveProcessors(a0) // set new active processor set
mb // synchronize subsequent writes
stl zero, 0(t7) // clear lock value
#endif
//
// If the maximum address space number is zero, then we know to assign
// ASN of zero to this process, just do it.
//
bis zero, zero, a1 // assume ASN = 0
ldil a2, TRUE // assume tbiap = TRUE
ldl t3, KiMaximumPid // get MAX ASN
beq t3, 30f // if eq, only ASN=0
//
// If the process sequence number matches the master sequence number then
// use the process ASN. Otherwise, allocate a new ASN. When allocating
// a new ASN check for ASN wrapping and handle it.
//
ldl t4, PcCurrentPid(v0) // get current processor PID
addl t4, 1, a1 // increment PID
cmpule a1, t3, t6 // is new PID le max?
cmovne t6, zero, a2 // if ne[true], clear tbiap indicator
cmoveq t6, zero, a1 // if eq[false], new PID is zero
stl a1, PcCurrentPid(v0) // set current processor PID
30:
ldl a0, PrDirectoryTableBase(a0) // get page directory PDE
srl a0, PTE_PFN, a0 // pass pfn only
bis a2, zero, v0 // set wrap indicator return value
// a0 = pfn of new page directory base
// a1 = new address space number
// a2 = tbiap indicator
SWAP_PROCESS_CONTEXT // swap address space
ret zero, (ra) // return
#if !defined(NT_UP)
15:
ldl t0, 0(t7) // spin in cache until lock looks free
beq t0, 10b // lock is unowned, retry acquisition
br zero, 15b
#endif
.end KiSwapProcess