|
|
//++ // // Copyright (c) 1989-2000 Microsoft Corporation // // Component Name: // // NT / KE // // Module Name: // // ctxswap.s // // Abstract: // // This module implements the IA64 Process and Thread Context Swaps. // // Author: // // David N. Cutler (davec) 5-Mar-1989 // // Environment: // // Kernel mode only // // Revision History: // // Bernard Lint Jul-12-1995 // // Initial IA64 version // //--
#include "ksia64.h"
.file "ctxswap.s" .text
// // Globals imported: //
.global KiReadySummary .global KiIdleSummary .global KiDispatcherReadyListHead .global KeTickCount .global KiMasterSequence .global KiMasterRid .global PPerfGlobalGroupMask
PublicFunction(KiDeliverApc) PublicFunction(KiSaveExceptionFrame) PublicFunction(KiRestoreExceptionFrame) PublicFunction(KiActivateWaiterQueue) PublicFunction(KiReadyThread) PublicFunction(KeFlushEntireTb) PublicFunction(KiQuantumEnd) PublicFunction(KiSyncNewRegionId) PublicFunction(KiCheckForSoftwareInterrupt) PublicFunction(KiSaveHigherFPVolatileAtDispatchLevel) PublicFunction(KeAcquireQueuedSpinLockAtDpcLevel) PublicFunction(KeReleaseQueuedSpinLockFromDpcLevel) PublicFunction(KeTryToAcquireQueuedSpinLockRaiseToSynch) PublicFunction(WmiTraceContextSwap)
#if DBG
PublicFunction(KeBugCheckEx) #endif // DBG
SBTTL("Unlock Dispatcher Database") //++ //-------------------------------------------------------------------- // // VOID // KiUnlockDispatcherDatabase ( // IN KIRQL OldIrql // ) // // Routine Description: // // This routine is entered at synchronization level with the dispatcher // database locked. Its function is to either unlock the dispatcher // database and return or initiate a context switch if another thread // has been selected for execution. // // N.B. A context switch CANNOT be initiated if the previous IRQL // is greater than or equal to DISPATCH_LEVEL. // // N.B. This routine is carefully written to be a leaf function. If, // however, a context swap should be performed, the routine is // switched to a nested fucntion. // // Arguments: // // OldIrql (a0) - Supplies the IRQL when the dispatcher database // lock was acquired (in low order byte, not zero extended). // // Return Value: // // None. // //-------------------------------------------------------------------- //--
NESTED_ENTRY(KiUnlockDispatcherDatabase) NESTED_SETUP(1,3,1,0)
// // Register aliases //
rDPC = loc2 // DPC active flag
rpT1 = t1 // temp pointer rpT2 = t2 // temp pointer rpT3 = t3 // temp pointer rT1 = t5 // temp regs rT2 = t6 rPrcb = t8 // PRCB pointer
pNotNl = pt2 // true if next thread not NULL pIRQGE = pt3 // true if DISPATCH_LEVEL <= old irql pIRQLT = pt4 // true if DISPATCH_LEVEL > old irql pDPC = pt5 // true if DPC active pNoAPC = pt2 // do not dispatch APC pAPC = pt9
PROLOGUE_END
// // Check if a thread has been scheduled to execute on the current processor //
movl rPrcb = KiPcr + PcPrcb ;;
LDPTR (rPrcb, rPrcb) // rPrcb -> PRCB ;;
add rpT1 = PbNextThread, rPrcb // -> next thread add rpT2 = PbDpcRoutineActive,rPrcb // -> DPC active flag ;;
LDPTR (v0, rpT1) // v0 = next thread ;;
cmp.ne pNotNl = zero, v0 // pNotNl = next thread is 0 zxt1 a0 = a0 // isolate old IRQL ;;
(pNotNl) cmp.leu.unc pIRQGE, pIRQLT = DISPATCH_LEVEL, a0 mov rDPC = 1 // speculate that DPC is active (pIRQLT) br.spnt KxUnlockDispatcherDatabase ;;
// // Case 1: // Next thread is NULL: // Release dispatcher database lock, restore IRQL to its previous level // and return //
// // Case 2: // A new thread has been selected to run on the current processor, but // the new IRQL is not below dispatch level. Release the dispatcher // lock and restore IRQL. If the current processor is // not executing a DPC, then request a dispatch interrupt on the current // processor. // // At this point pNotNl = 1 if thread not NULL, 0 if NULL //
(pIRQGE) ld4 rDPC = [rpT2] // rDPC.4 = DPC active flag #if !defined(NT_UP)
add out0 = (LockQueueDispatcherLock * 16) + PbLockQueue, rPrcb br.call.sptk brp = KeReleaseQueuedSpinLockFromDpcLevel #endif // !defined(NT_UP)
;;
LOWER_IRQL(a0) cmp4.eq pDPC = rDPC, zero // pDPC = request DPC intr REQUEST_DISPATCH_INT(pDPC) // request DPC interrupt
NESTED_RETURN NESTED_EXIT(KiUnlockDispatcherDatabase)
// // N.B. This routine is carefully written as a nested function. // Control only reaches this routine from above. // // rPrcb contains the address of PRCB // v0 contains the next thread //
NESTED_ENTRY(KxUnlockDispatcherDatabase) PROLOGUE_BEGIN
.regstk 1, 2, 1, 0 alloc t16 = ar.pfs, 1, 2, 1, 0 .save rp, loc0 mov loc0 = brp .fframe SwitchFrameLength add sp = -SwitchFrameLength, sp ;;
.save ar.unat, loc1 mov loc1 = ar.unat add t0 = ExFltS19+SwExFrame+STACK_SCRATCH_AREA, sp add t1 = ExFltS18+SwExFrame+STACK_SCRATCH_AREA, sp ;;
.save.gf 0x0, 0xC0000 stf.spill [t0] = fs19, ExFltS17-ExFltS19 stf.spill [t1] = fs18, ExFltS16-ExFltS18 ;;
.save.gf 0x0, 0x30000 stf.spill [t0] = fs17, ExFltS15-ExFltS17 stf.spill [t1] = fs16, ExFltS14-ExFltS16 mov t10 = bs4 ;;
.save.gf 0x0, 0xC000 stf.spill [t0] = fs15, ExFltS13-ExFltS15 stf.spill [t1] = fs14, ExFltS12-ExFltS14 mov t11 = bs3 ;;
.save.gf 0x0, 0x3000 stf.spill [t0] = fs13, ExFltS11-ExFltS13 stf.spill [t1] = fs12, ExFltS10-ExFltS12 mov t12 = bs2 ;;
.save.gf 0x0, 0xC00 stf.spill [t0] = fs11, ExFltS9-ExFltS11 stf.spill [t1] = fs10, ExFltS8-ExFltS10 mov t13 = bs1 ;;
.save.gf 0x0, 0x300 stf.spill [t0] = fs9, ExFltS7-ExFltS9 stf.spill [t1] = fs8, ExFltS6-ExFltS8 mov t14 = bs0 ;;
.save.gf 0x0, 0xC0 stf.spill [t0] = fs7, ExFltS5-ExFltS7 stf.spill [t1] = fs6, ExFltS4-ExFltS6 mov t15 = ar.lc ;;
.save.gf 0x0, 0x30 stf.spill [t0] = fs5, ExFltS3-ExFltS5 stf.spill [t1] = fs4, ExFltS2-ExFltS4 ;;
.save.f 0xC stf.spill [t0] = fs3, ExFltS1-ExFltS3 // save fs3 stf.spill [t1] = fs2, ExFltS0-ExFltS2 // save fs2 ;;
.save.f 0x3 stf.spill [t0] = fs1, ExBrS4-ExFltS1 // save fs1 stf.spill [t1] = fs0, ExBrS3-ExFltS0 // save fs0 ;;
.save.b 0x18 st8 [t0] = t10, ExBrS2-ExBrS4 // save bs4 st8 [t1] = t11, ExBrS1-ExBrS3 // save bs3 ;;
.save.b 0x6 st8 [t0] = t12, ExBrS0-ExBrS2 // save bs2 st8 [t1] = t13, ExIntS2-ExBrS1 // save bs1 ;;
.save.b 0x1 st8 [t0] = t14, ExIntS3-ExBrS0 // save bs0 movl t12 = KiPcr + PcCurrentThread ;;
.save.gf 0xC, 0x0 .mem.offset 0,0 st8.spill [t0] = s3, ExIntS1-ExIntS3 // save s3 .mem.offset 8,0 st8.spill [t1] = s2, ExIntS0-ExIntS2 // save s2 ;;
.save.gf 0x3, 0x0 .mem.offset 0,0 st8.spill [t0] = s1, ExApLC-ExIntS1 // save s1 .mem.offset 8,0 st8.spill [t1] = s0, ExApEC-ExIntS0 // save s0 ;;
.savepsp ar.pfs, ExceptionFrameLength-ExApEC-STACK_SCRATCH_AREA st8 [t1] = t16, ExIntNats-ExApEC mov t4 = ar.unat // captured Nats of s0-s3 mov s0 = rPrcb
LDPTR (s1, t12) // current thread ;;
.savepsp ar.lc, ExceptionFrameLength-ExApLC-STACK_SCRATCH_AREA st8 [t0] = t15 .savepsp @priunat, ExceptionFrameLength-ExIntNats-STACK_SCRATCH_AREA st8 [t1] = t4 // save Nats of s0-s3 mov s2 = v0
PROLOGUE_END
add rpT2 = PbNextThread, s0 // -> next thread add out0 = ThWaitIrql, s1 // -> previous IRQL ;;
STPTRINC (rpT2, zero,PbCurrentThread-PbNextThread) // clear NextThread st1 [out0] = a0, ThIdleSwapBlock-ThWaitIrql // save old IRQL mov rpT3 = 1 ;;
// // Reready current thread for execution and swap context to the selected // thread. // // Note: Set IdleSwapBlock in the current thread so no idle processor // can switch to this processor before it is removed from the current // processor.
STPTR (rpT2, s2) // set current thread object st1 [out0] = rpT3, -ThIdleSwapBlock// out0 -> previous thread br.call.sptk brp = KiReadyThread ;;
br.call.sptk brp = SwapContext ;;
// // Lower IRQL, deallocate exception/switch frame. // // N.B. SwapContext releases the dispatcher database lock. // // N.B. v0 contains the kernel APC pending state on return. // // N.B. s2 contains the address of the new thread on return. //
add rpT2 = ThWaitIrql, s2 // -> ThWaitIrql cmp.ne pAPC, pNoAPC = zero, v0 ;;
ld1 a0 = [rpT2] // a0 = original wait IRQL ;;
(pAPC) cmp.ne pNoAPC = zero, a0 // APC pending and IRQL == 0 (pNoAPC) br.spnt Kudd_Exit ;;
.regstk 1, 2, 3, 0 alloc t16 = ar.pfs, 1, 2, 3, 0 mov rT2 = APC_LEVEL ;;
SET_IRQL(rT2)
mov out0 = KernelMode mov out1 = zero mov out2 = zero br.call.sptk brp = KiDeliverApc ;;
// // Lower IRQL to wait level, set return status, restore registers, and return. //
Kudd_Exit:
LOWER_IRQL(a0) // a0 = new irql
add out0 = STACK_SCRATCH_AREA+SwExFrame, sp br.call.sptk brp = KiRestoreExceptionFrame ;;
add rpT1 = ExApEC+SwExFrame+STACK_SCRATCH_AREA, sp ;;
ld8 rT1 = [rpT1] mov brp = loc0 ;;
mov ar.unat = loc1 nop.f 0 mov ar.pfs = rT1
.restore add sp = SwitchFrameLength, sp nop.i 0 br.ret.sptk brp ;;
NESTED_EXIT(KxUnlockDispatcherDatabase)
SBTTL("Swap Thread") //++ //-------------------------------------------------------------------- // // BOOLEAN // KiSwapContext ( // IN PKTHREAD Thread // ) // // Routine Description: // // This routine saves the non-volatile registers, marshals the // arguments for SwapContext and calls SwapContext to perform // the actual thread switch. // // Arguments: // // Thread - Supplies the address of the new thread. // // Return Value: // // If a kernel APC is pending, then a value of TRUE is returned. // Otherwise, FALSE is returned. // // Notes: // // GP valid on entry -- GP is not switched, just use kernel GP //-------------------------------------------------------------------- //--
NESTED_ENTRY(KiSwapContext)
// // Register aliases //
pNoAPC = pt2 // do not dispatch APC
rpT1 = t0 // temp pointer rpT2 = t1 // temp pointer rT1 = t10 // temp regs
PROLOGUE_BEGIN
.regstk 1, 2, 1, 0 alloc t16 = ar.pfs, 1, 2, 1, 0 .save rp, loc0 mov loc0 = brp .fframe SwitchFrameLength add sp = -SwitchFrameLength, sp ;;
.save ar.unat, loc1 mov loc1 = ar.unat add t0 = ExFltS19+SwExFrame+STACK_SCRATCH_AREA, sp add t1 = ExFltS18+SwExFrame+STACK_SCRATCH_AREA, sp ;;
.save.gf 0x0, 0xC0000 stf.spill [t0] = fs19, ExFltS17-ExFltS19 stf.spill [t1] = fs18, ExFltS16-ExFltS18 ;;
.save.gf 0x0, 0x30000 stf.spill [t0] = fs17, ExFltS15-ExFltS17 stf.spill [t1] = fs16, ExFltS14-ExFltS16 mov t10 = bs4 ;;
.save.gf 0x0, 0xC000 stf.spill [t0] = fs15, ExFltS13-ExFltS15 stf.spill [t1] = fs14, ExFltS12-ExFltS14 mov t11 = bs3 ;;
.save.gf 0x0, 0x3000 stf.spill [t0] = fs13, ExFltS11-ExFltS13 stf.spill [t1] = fs12, ExFltS10-ExFltS12 mov t12 = bs2 ;;
.save.gf 0x0, 0xC00 stf.spill [t0] = fs11, ExFltS9-ExFltS11 stf.spill [t1] = fs10, ExFltS8-ExFltS10 mov t13 = bs1 ;;
.save.gf 0x0, 0x300 stf.spill [t0] = fs9, ExFltS7-ExFltS9 stf.spill [t1] = fs8, ExFltS6-ExFltS8 mov t14 = bs0 ;;
.save.gf 0x0, 0xC0 stf.spill [t0] = fs7, ExFltS5-ExFltS7 stf.spill [t1] = fs6, ExFltS4-ExFltS6 mov t15 = ar.lc ;;
.save.gf 0x0, 0x30 stf.spill [t0] = fs5, ExFltS3-ExFltS5 stf.spill [t1] = fs4, ExFltS2-ExFltS4 ;;
.save.f 0xC stf.spill [t0] = fs3, ExFltS1-ExFltS3 // save fs3 stf.spill [t1] = fs2, ExFltS0-ExFltS2 // save fs2 ;;
.save.f 0x3 stf.spill [t0] = fs1, ExBrS4-ExFltS1 // save fs1 stf.spill [t1] = fs0, ExBrS3-ExFltS0 // save fs0 ;;
.save.b 0x18 st8 [t0] = t10, ExBrS2-ExBrS4 // save bs4 st8 [t1] = t11, ExBrS1-ExBrS3 // save bs3 ;;
.save.b 0x6 st8 [t0] = t12, ExBrS0-ExBrS2 // save bs2 st8 [t1] = t13, ExIntS2-ExBrS1 // save bs1 ;;
.save.b 0x1 st8 [t0] = t14, ExIntS3-ExBrS0 // save bs0 ;;
.save.gf 0xC, 0x0 .mem.offset 0,0 st8.spill [t0] = s3, ExIntS1-ExIntS3 // save s3 .mem.offset 8,0 st8.spill [t1] = s2, ExIntS0-ExIntS2 // save s2 ;;
.save.gf 0x3, 0x0 .mem.offset 0,0 st8.spill [t0] = s1, ExApLC-ExIntS1 // save s1 .mem.offset 8,0 st8.spill [t1] = s0, ExApEC-ExIntS0 // save s0 ;;
.savepsp ar.pfs, ExceptionFrameLength-ExApEC-STACK_SCRATCH_AREA st8 [t1] = t16, ExIntNats-ExApEC mov t4 = ar.unat // captured Nats of s0-s3 ;;
.savepsp ar.lc, ExceptionFrameLength-ExApLC-STACK_SCRATCH_AREA st8 [t0] = t15 .savepsp @priunat, ExceptionFrameLength-ExIntNats-STACK_SCRATCH_AREA st8 [t1] = t4 // save Nats of s0-s3
PROLOGUE_END
// // For the call to SwapContext- // // s0 // Prcb address // s1 // old thread address // s2 // new thread address // pt0 = 1 //
mov s2 = a0 // s2 <- New Thread movl rpT1 = KiPcr + PcPrcb ;;
LDPTRINC (s0, rpT1, PcCurrentThread-PcPrcb)// s0 <- Prcb ;;
LDPTR (s1, rpT1) // s1 <- Old Thread add rpT2 = PbCurrentThread, s0 ;;
// // Swap context to the next thread. //
STPTR (rpT2, a0) // Set new thread current cmp.eq pt0 = zero, zero // indicate lock context swap br.call.sptk brp = SwapContext // call SwapContext(prcb, OldTh, NewTh) ;;
// // Deallocate exception/switch frame. // // N.B. SwapContext releases the dispatcher database lock. // // N.B. v0 contains the kernel APC pending state on return, ie, 0 if // no APC pending, 1 if APC pending. v0 will be forced to 0 if // the new IRQL doesn't allow APCs. // // N.B. KiRestoreExceptionFrame doesn't touch v0, t21 or t22. //
add rpT2 = ThWaitIrql, s2 // -> ThWaitIrql add rpT1 = ExApEC+SwExFrame+STACK_SCRATCH_AREA, sp add out0 = STACK_SCRATCH_AREA+SwExFrame, sp ;;
ld1 t21 = [rpT2] // t21 = original wait IRQL ld8 t22 = [rpT1] // t22 = PFS br.call.sptk brp = KiRestoreExceptionFrame ;;
mov brp = loc0 cmp.ne pNoAPC = zero, t21 // no APC if IRQL != 0 ;;
mov ar.unat = loc1 nop.f 0 mov ar.pfs = t22
.restore add sp = SwitchFrameLength, sp (pNoAPC) mov v0 = zero br.ret.sptk brp ;;
NESTED_EXIT(KiSwapContext)
SBTTL("Swap Context to Next Thread") //++ //-------------------------------------------------------------------- // Routine: // // SwapContext // // Routine Description: // // This routine is called to swap context from one thread to the next. // // Arguments: // // s0 - Address of Processor Control Block (PRCB). // s1 - Address of previous thread object. // s2 - Address of next thread object. // // Return value: // // v0 - Kernel APC pending flag // s0 - Address of Processor Control Block (PRCB). // s1 - Address of previous thread object. // s2 - Address of current thread object. // // Note: // Kernel GP is not saved and restored across context switch // // !!WARNING!! - Thierry. 03/01/2000. // Be aware that this implementation is a result of performance analysis. // Please consider this when you are making changes... // //-------------------------------------------------------------------- //--
NESTED_ENTRY(SwapContext)
// // Register aliases //
rT1 = t1 // temp rT2 = t2 // temp rT3 = t3 // temp rNewproc = t4 // next process object rOldproc = t5 // previous process object rpThBSL = t6 // pointer to new thread backing store limit rpT1 = t7 // temp pointer rpT2 = t8 // temp pointer rpT3 = t9 // temp pointer rAr1 = t10 rAr2 = t11 rAr3 = t12 rAr4 = t13
rNewIKS = t14 // new initial kernel stack rNewKSL = t15 // new kernel stack limit rNewBSP = t16 // new thread BSP/BSPSTORE rOldBSP = t16 // old thread BSP rOldRNAT = t17 // old thread RNAT rNewRNAT = t17 // new thread RNAT rOldSbase = t18 // old thread kstack base
pUsTh = pt4 // is user thread? pKrTh = pt5 // is user thread? pSave = pt7 // is high fp set dirty? pDiff = ps4 // if new and old process different pSame = ps5 // if new and old process same
// // Set new thread's state to running. Note this must be done // under the dispatcher lock so that KiSetPriorityThread sees // the correct state. //
PROLOGUE_BEGIN
#if !defined(NT_UP)
alloc rT2 = ar.pfs, 0, 0, 4, 0 mov rT1 = brp // move from brp takes 2 cycles add rpT3 = ThState, s2 ;;
lfetch.excl [rpT3] mov rAr1 = Running add rpT2 = SwPFS+STACK_SCRATCH_AREA, sp ;;
add out0 = (LockQueueContextSwapLock * 16) + PbLockQueue, s0 .savesp ar.pfs, SwPFS+STACK_SCRATCH_AREA st8.nta [rpT2] = rT2, SwRp-SwPFS // save pfs ;;
.savesp brp, SwRp+STACK_SCRATCH_AREA st8.nta [rpT2] = rT1 // save return link st1.nta [rpT3] = rAr1 // set thread state to Running br.call.sptk brp = KeAcquireQueuedSpinLockAtDpcLevel ;;
// // Release DispatcherLock. //
add out0 = (LockQueueDispatcherLock * 16) + PbLockQueue, s0 br.call.sptk brp = KeReleaseQueuedSpinLockFromDpcLevel ;;
mov out0 = ar.fpsr // move from ar.fpsr takes 12 cycles movl rpT1 = KiPcr+PcHighFpOwner // setup for prefetching ;;
{ .mmi lfetch [rpT1] cmp.ne pUsTh = zero, teb // test for ia32 save required // must not have a nop.f for next 10 cycles-- // Using temporarely the explicit templating // for the next cycles. add out1 = ThStackBase, s1 // move early to start access for rOldSbase { .mmi add rpT1 = SwFPSR+STACK_SCRATCH_AREA, sp add rpT2 = SwPreds+STACK_SCRATCH_AREA, sp nop.i 0x0 } ;;
{ .mmi ld8.nta rOldSbase = [out1] // speculative start early for ia32 saves lfetch.excl [rpT1] add out2 = ThNumber, s2 // setup for prefetching } { .mmi mov.m ar.rsc = r0 // put RSE in lazy mode mov rOldBSP = ar.bsp // move from ar.bsp takes 12 cycles nop.i 0x0 } ;;
{ .mmi lfetch [out2] nop.m 0x0 mov rT1 = pr // move from pr takes 2 cycles } ;;
{ .mmi flushrs mov rT3 = psr.um // move from psr.um takes 12 cycles nop.i 0x0 } ;;
{ .mmi lfetch.excl [rpT2] mov.m rOldRNAT = ar.rnat // move from ar.rnat takes 5 cycles add out2 = @gprel(PPerfGlobalGroupMask), gp } ;;
{ .mli lfetch [out2] movl out3 = KiPcr + PcInterruptionCount // INTERRUPTION_LOGGING on or off, we are prefetching this line. // If any real performance problem is detected, we will undef these lines. } ;;
{ .mmi lfetch [out3] add rpT3 = SwRnat+STACK_SCRATCH_AREA, sp } ;;
#else // NT_UP
alloc rT2 = ar.pfs, 0, 0, 4, 0 cmp.ne pUsTh = zero, teb // test for ia32 save required ;;
mov.m ar.rsc = r0 // put RSE in lazy mode add out1 = ThStackBase, s1 // move early to start access for rOldSbase mov out0 = ar.fpsr // move from ar.fpsr takes 12 cycles // must not have a nop.f for next 10 cycles-- // Using temporarely the explicit templating // for the next cycles. ;;
{ .mmi ld8.nta rOldSbase = [out1] // speculative start early for ia32 saves mov rOldBSP = ar.bsp // move from ar.bsp takes 12 cycles add rpT1 = SwRp+STACK_SCRATCH_AREA, sp } ;;
flushrs mov rT3 = psr.um // move from psr.um takes 12 cycles add rpT2 = SwPFS+STACK_SCRATCH_AREA, sp ;;
mov.m rOldRNAT = ar.rnat // move from ar.rnat takes 5 cycles mov rT1 = brp // move from brp takes 2 cycles add rpT3 = ThState, s2 ;;
{ .mmi mov rAr1 = Running .savesp brp, SwRp+STACK_SCRATCH_AREA st8.nta [rpT1] = rT1, SwFPSR-SwRp // save return link nop.i 0x0 } ;;
{ .mii st1.nta [rpT3] = rAr1 // set thread state to Running mov rT1 = pr // move from pr takes 2 cycles nop.i 0x0 } ;;
{ .mii .savesp ar.pfs, SwPFS+STACK_SCRATCH_AREA st8.nta [rpT2] = rT2, SwPreds-SwPFS // save pfs add rpT3 = SwRnat+STACK_SCRATCH_AREA, sp nop.i 0x0 } ;;
#endif // NT_UP
{ .mmi st8.nta [rpT3] = rOldRNAT nop.m 0x0 nop.i 0x0 } st8 [rpT1] = out0, SwBsp-SwFPSR // save kernel FPSR st8 [rpT2] = rT1 // save preserved predicates ;;
st8.nta [rpT1] = rOldBSP add rpT3 = ThKernelBStore, s1 tbit.nz pSave = rT3, PSR_MFH // check mfh bit (pUsTh) br.call.spnt brp = SwapContextIA32Save ;;
st8.nta [rpT3] = rOldBSP (pSave) add out0 = -ThreadStateSaveAreaLength+TsHigherFPVolatile, rOldSbase (pSave) br.call.spnt brp = KiSaveHigherFPVolatileAtDispatchLevel ;;
// // Acquire the context swap lock so the address space of the old process // cannot be deleted and then release the dispatcher database lock. // // N.B. This lock is used to protect the address space until the context // switch has sufficiently progressed to the point where the address // space is no longer needed. This lock is also acquired by the reaper // thread before it finishes thread termination. //
PROLOGUE_END
// // ***** TBD ****** Save performance counters? (user vs. kernel) //
// // Accumlate the total time spent in a thread. //
#if defined(PERF_DATA)
**** TBD **** MIPS code
addu a0,sp,ExFltF20 // compute address of result move a1,zero // set address of optional frequency jal KeQueryPerformanceCounter // query performance counter lw t0,ExFltF20(sp) // get current cycle count lw t1,ExFltF20 + 4(sp) // lw t2,PbStartCount(s0) // get starting cycle count lw t3,PbStartCount + 4(s0) // sw t0,PbStartCount(s0) // set starting cycle count sw t1,PbStartCount + 4(s0) // lw t4,EtPerformanceCountLow(s1) // get accumulated cycle count lw t5,EtPerformanceCountHigh(s1) // subu t6,t0,t2 // subtract low parts subu t7,t1,t3 // subtract high parts sltu v0,t0,t2 // generate borrow from high part subu t7,t7,v0 // subtract borrow addu t6,t6,t4 // add low parts addu t7,t7,t5 // add high parts sltu v0,t6,t4 // generate carry into high part addu t7,t7,v0 // add carry sw t6,EtPerformanceCountLow(s1) // set accumulated cycle count sw t7,EtPerformanceCountHigh(s1) //
#endif // defined(PERF_DATA)
// // The following entry point is used to switch from the idle thread to // another thread. //
;;
ALTERNATE_ENTRY(SwapFromIdle)
alloc rT1 = ar.pfs, 2, 0, 2, 0
// // Check if we are tracing context swaps //
mov out0 = s1 // assign out0 to old ethread pointer add rpT3 = @gprel(PPerfGlobalGroupMask), gp ;;
ld8.nta rpT3 = [rpT3] // get value of PperfGlobalGroupMask mov out1 = s2 // assign out1 to new ethread pointer ;;
add rpT2 = PERF_CONTEXTSWAP_OFFSET, rpT3 cmp.ne pt3 = zero, rpT3 // if it's non-zero, then trace on ;;
(pt3) ld4.nta rpT2 = [rpT2] ;;
(pt3) and rpT2 = PERF_CONTEXTSWAP_FLAG, rpT2 ;;
(pt3) cmp.ne.unc pt4 = zero, rpT2 (pt4) br.call.spnt brp = WmiTraceContextSwap // optimize for no tracing case ;;
// // Get address of old and new process objects. //
add rpT2 = ThApcState+AsProcess,s2 // -> new thread AsProcess add rpT1 = ThApcState+AsProcess,s1 // -> old thread AsProcess ;;
LDPTR (rOldproc, rpT1) // old process LDPTR (rNewproc, rpT2) // new process
#if !defined(NT_UP)
// // In MP system, // should a thread address is recycled and the thread is migrated to a // processor that holds the stale values in the high fp register set, // set KiPcr->HighFpOwner to zero (i.e. when pt4 is set to TRUE) //
add rpT1 = ThNumber, s2 movl rpT2 = KiPcr+PcHighFpOwner ;;
ld1 rT1 = [rpT1] ld8 rT2 = [rpT2], PcNumber-PcHighFpOwner add out0 = ThIdleSwapBlock, s1 ;;
ld1 rT3 = [rpT2], PcHighFpOwner-PcNumber st1 [out0] = zero // clear OldThread->IdleSwapBlock cmp.eq pt3 = rT2, s2 ;;
(pt3) cmp.ne.unc pt4 = rT1, rT3 ;;
(pt4) st8 [rpT2] = zero
#endif // !defined(NT_UP)
;;
flushrs FAST_DISABLE_INTERRUPTS ;;
// // Thierry - 03/29/2000 // It should be noticed that the performance analysis for SwapContext // was done with INTERRUPTION_LOGGING defined as 1. //
#define INTERRUPTION_LOGGING 1
#if defined(INTERRUPTION_LOGGING)
// For Conditional Interrupt Logging #define ContextSwitchBit 63
.global KiVectorLogMask
mov rT3 = gp ;;
movl gp = _gp ;;
add rpT1 = @gprel(KiVectorLogMask), gp ;;
ld8 rT1 = [rpT1] mov gp = rT3 ;;
tbit.z pt4 = rT1, ContextSwitchBit (pt4) br.cond.sptk EndOfLogging0
movl rpT1 = KiPcr+PcInterruptionCount mov rT3 = MAX_NUMBER_OF_IHISTORY_RECORDS - 1 cmp.ne pDiff,pSame=rOldproc,rNewproc ;;
(pDiff) mov rT1 = 0x91 // process switch ld4.nt1 rT2 = [rpT1] // get current count ;;
(pSame) mov rT1 = 0x90 // thread switch add rpT3 = 1, rT2 // incr count and rT2 = rT3, rT2 // index of current entry add rpT2 = 0x1000-PcInterruptionCount, rpT1 // base of history ;;
st4.nta [rpT1] = rpT3 // save count shl rT2 = rT2, 5 // offset of current entry ;;
add rpT2 = rpT2, rT2 // address of current entry ;;
st8 [rpT2] = rT1, 8 // save switch type ;;
st8 [rpT2] = s2, 8 // save new thread pointer ;;
st8 [rpT2] = s1, 8 // save old thread ;;
st8 [rpT2] = sp // save old sp ;;
// For Conditional Interrupt Logging EndOfLogging0:
#endif // INTERRUPTION_LOGGING
mov ar.rsc = r0 // put RSE in lazy mode add rpT1 = ThInitialStack, s2 add rpT2 = ThKernelStack, s1 ;;
// // Store the kernel stack pointer in the previous thread object, // load the new kernel stack pointer from the new thread object, // switch backing store pointers, select new process id and swap // to the new process. //
ld8.nta rNewIKS = [rpT1], ThKernelStack-ThInitialStack st8.nta [rpT2] = sp // save current sp ;;
ld8.nta sp = [rpT1], ThStackLimit-ThKernelStack movl rpT2 = KiPcr + PcInitialStack ;;
alloc rT1 = 0,0,0,0 // make current frame 0 size ld8.nta rNewKSL = [rpT1], ThInitialBStore-ThStackLimit ;;
loadrs // invalidate RSE and ALAT ld8.nta rT1 = [rpT1], ThBStoreLimit-ThInitialBStore ;;
ld8.nta rT2 = [rpT1], ThDebugActive-ThBStoreLimit st8 [rpT2] = rNewIKS, PcStackLimit-PcInitialStack ;;
// get debugger active state ld1.nta rT3 = [rpT1], ThTeb-ThDebugActive st8 [rpT2] = rNewKSL, PcInitialBStore-PcStackLimit add rpT3 = SwBsp+STACK_SCRATCH_AREA, sp ;;
ld8 rNewBSP = [rpT3], SwRnat-SwBsp st8 [rpT2] = rT1, PcBStoreLimit-PcInitialBStore ;;
ld8 rNewRNAT = [rpT3] st8 [rpT2] = rT2, PcDebugActive-PcBStoreLimit ;;
// load new teb ld8 teb = [rpT1], ThApcState+AsKernelApcPending-ThTeb // set new debugger active state st1 [rpT2] = rT3, PcCurrentThread-PcDebugActive invala
// // Setup PCR intial kernel BSP and BSTORE limit //
mov ar.bspstore = rNewBSP // load new bspstore cmp.ne pDiff,pSame=rOldproc,rNewproc // if ne, switch process ;;
mov ar.rnat = rNewRNAT // load new RNATs ;;
mov ar.rsc = RSC_KERNEL // enable RSE ;;
// // If the new process is not the same as the old process, then swap the // address space to the new process. // // N.B. The context swap lock cannot be dropped until all references to the // old process address space are complete. This includes any possible // TB Misses that could occur referencing the new address space while // still executing in the old address space. // // N.B. The process address space swap is executed with interrupts disabled. //
alloc rT1 = 0,4,2,0 STPTR (rpT2, s2) ;;
mov kteb = teb // update kernel TEB FAST_ENABLE_INTERRUPTS ld1 loc0 = [rpT1] // load the ApcPending flag
#if !defined(NT_UP)
// // Release the context swap lock // N.B. ContextSwapLock is always released in KxSwapProcess, if called //
add out0 = (LockQueueContextSwapLock * 16) + PbLockQueue, s0 add loc1 = PcApcInterrupt-PcCurrentThread, rpT2 (pSame) br.call.sptk brp = KeReleaseQueuedSpinLockFromDpcLevel ;;
#else // !defined(NT_UP)
add loc1 = PcApcInterrupt-PcCurrentThread, rpT2 ;;
#endif // !defined(NT_UP)
mov out0 = rNewproc // set address of new process mov out1 = rOldproc // set address of old process (pDiff) br.call.sptk brp = KxSwapProcess // call swap address space(NewProc, OldProc) ;;
// // In new address space, if changed. //
st1 [loc1] = loc0 // request (or clear) APC pend. add rpT1 = PbContextSwitches, s0 add rpT2 = ThContextSwitches, s2 ;;
// // If the new thread has a kernel mode APC pending, then request an APC // interrupt. //
ld4 loc1 = [rpT1] ld4 loc2 = [rpT2] ;;
// // Increment context switch counters //
cmp.ne pUsTh, pKrTh = zero, teb add loc1 = loc1, zero, 1 add loc2 = loc2, zero, 1 ;;
st4 [rpT1] = loc1 // increment # of context switches
st4 [rpT2] = loc2 // increment # of context switches
add rpT1 = SwFPSR+STACK_SCRATCH_AREA, sp add rpT2 = SwPFS+STACK_SCRATCH_AREA, sp ;;
ld8 loc1 = [rpT1], SwRp-SwFPSR // restore brp and pfs ld8 loc2 = [rpT2], SwPreds-SwPFS ;;
ld8 rT3 = [rpT1] ld8 rT2 = [rpT2]
mov v0 = loc0 // set v0 = apc pending (pUsTh) br.call.spnt brp = SwapContextIA32Restore ;;
// // Note: at this point s0 = Prcb, s1 = previous thread, s2 = current thread //
mov ar.fpsr = loc1 mov ar.pfs = loc2 mov brp = rT3
mov pr = rT2 // Restore preserved preds
#if 0
// // Thierry 03/22/2000: // // The following memory synchronization of the local processor // I-cache and D-cache because of I-stream modifications is not // required if the modifying code is written following the NT // Core Team specifications: // - [Allocate VA] // - Modify the code // - Call FlushIntructionCache() // -> calls KiSweepIcache[Range]() // - Execute the code. // // The removal of this instruction eliminates a "> 100 cycle" stall. //
sync.i
#endif // 0
;;
srlz.i
br.ret.sptk brp
NESTED_EXIT(SwapContext)
//++ //-------------------------------------------------------------------- // Routine: // // SwapContextIA32Save // // Routine Description: // // This function saves the IA32 context on the kernel stack. // Called from SwapContext. // // Arguments: // // rOldSbase : old thread kstack base. // // Return value: // // None. // // Note: // // SwapContext registers context. // //-------------------------------------------------------------------- //-- LEAF_ENTRY(SwapContextIA32Save)
mov rAr1 = ar21 // IA32 FP control register FCR ;;
mov rAr2 = ar24 // IA32 EFLAG register ;;
mov rAr3 = ar25 ;;
mov rAr4 = ar26 ;;
// // we may skip saving ar27 because it cannot be modified by user code // mov rT1 = ar30 ;;
mov rT2 = ar28 ;;
mov rT3 = ar29 ;;
// these are separated out due to cache miss potential add rpT1 = -ThreadStateSaveAreaLength+TsAppRegisters+TsAr21, rOldSbase add rpT2 = -ThreadStateSaveAreaLength+TsAppRegisters+TsAr24, rOldSbase ;;
st8 [rpT1] = rAr1, TsAr25-TsAr21 st8 [rpT2] = rAr2, TsAr26-TsAr24 ;;
st8 [rpT1] = rAr3, TsAr29-TsAr25 st8 [rpT2] = rAr4, TsAr28-TsAr26 ;;
st8 [rpT2] = rT2, TsAr30-TsAr28 ;;
st8 [rpT2] = rT1 st8 [rpT1] = rT3
br.ret.sptk.few.clr brp LEAF_EXIT(SwapContextIA32Save)
//++ //-------------------------------------------------------------------- // Routine: // // SwapContextIA32Restore // // Routine Description: // // This function restores the IA32 registers context. // Called from SwapContext. // // Arguments: // // s2 - Address of next thread object. // // Return value: // // None. // // Note: // // SwapContext registers context. // //-------------------------------------------------------------------- //-- LEAF_ENTRY(SwapContextIA32Restore)
add rpT1 = ThStackBase, s2 ;;
ld8.nta rpT1 = [rpT1] ;;
add rpT2 = -ThreadStateSaveAreaLength+TsAppRegisters+TsAr21, rpT1 add rpT3 = -ThreadStateSaveAreaLength+TsAppRegisters+TsAr24, rpT1 ;;
ld8.nta rAr1 = [rpT2], TsAr25-TsAr21 ld8.nta rAr2 = [rpT3], TsAr26-TsAr24 ;;
ld8.nta rAr3 = [rpT2], TsAr27-TsAr25 ld8.nta rAr4 = [rpT3], TsAr28-TsAr26 ;;
mov ar21 = rAr1 mov ar24 = rAr2
mov ar25 = rAr3 mov ar26 = rAr4
ld8.nta rAr1 = [rpT2], TsAr29-TsAr27 ld8.nta rAr2 = [rpT3], TsAr30-TsAr28 ;;
ld8.nta rAr3 = [rpT2] ld8.nta rAr4 = [rpT3] ;;
mov ar27 = rAr1 mov ar28 = rAr2
mov ar29 = rAr3 mov ar30 = rAr4
br.ret.sptk.few.clr brp LEAF_EXIT(SwapContextIA32Restore)
SBTTL("Swap Process") //++ //-------------------------------------------------------------------- // // VOID // KiSwapProcess ( // IN PKPROCESS NewProcess, // IN PKPROCESS OldProcess // ) // // Routine Description: // // This function swaps the address space from one process to another by // assigning a new region id, if necessary, and loading the fixed entry // in the TB that maps the process page directory page. This routine follows // the PowerPC design for handling RID wrap. // // On entry/exit: // // Interrupt enabled. // // Arguments: // // NewProcess (a0) - Supplies a pointer to a control object of type process // which represents the new process that is switched to (32-bit address). // // OldProcess (a1) - Supplies a pointer to a control object of type process // which represents the old process that is switched from (32-bit address). // // Return Value: // // None. // //-------------------------------------------------------------------- //-- NESTED_ENTRY(KiSwapProcess) NESTED_SETUP(2,3,3,0)
PROLOGUE_END
// // Register aliases //
rNewProc = a0 rOldProc = a1
rpCSLock = loc2
rpT1 = t0 rpT2 = t1 rProcSet = t2 rNewActive= t3 rOldActive= t4 rMasterSeq= t5 rNewSeq = t6 rOldPsrL = t7 rVa = t8 rPDE0 = t9 // PDE for page directory page 0 rVa2 = t10 rSessionBase = t11 rSessionInfo = t12 rT1 = t13 rT2 = t14
// // KiSwapProcess must get the context swap lock // KxSwapProcess is called from SwapContext with the lock held //
#if !defined(NT_UP)
movl rpT1 = KiPcr+PcPrcb ;;
ld8 rpT1 = [rpT1] ;;
add out0 = (LockQueueContextSwapLock * 16) + PbLockQueue, rpT1 br.call.sptk brp = KeAcquireQueuedSpinLockAtDpcLevel ;;
br.sptk Ksp_Continue #endif // !defined(NT_UP)
;;
ALTERNATE_ENTRY(KxSwapProcess) NESTED_SETUP(2,3,3,0)
PROLOGUE_END // // Clear the processor set member number in the old process and set the // processor member number in the new process. //
Ksp_Continue:
#if !defined(NT_UP)
add rpT2 = PrActiveProcessors, rOldProc // -> old active processor set movl rpT1 = KiPcr + PcSetMember // -> processor set member ;;
ld4 rProcSet= [rpT1] // rProcSet.4 = processor set member add rpT1 = PrActiveProcessors, rNewProc // -> new active processor set ;;
ld4 rNewActive = [rpT1] // rNewActive.4 = new active processor set ld4 rOldActive = [rpT2] // rOldActive.4 = old active processor set ;;
or rNewActive = rNewActive,rProcSet // set processor member in new set xor rOldActive = rOldActive,rProcSet // clear processor member in old set ;;
st4 [rpT1] = rNewActive // set new active processor set st4 [rpT2] = rOldActive // set old active processor set
#endif // !defined(NT_UP)
// // If the process sequence number matches the system sequence number, then // use the process RID. Otherwise, allocate a new process RID. // // N.B. KiMasterRid, KiMasterSequence are changed only when holding the // KiContextSwapLock. //
add rT2 = PrSessionMapInfo, rNewProc add out0 = PrProcessRegion, rNewProc ;;
ld8 out1 = [rT2] br.call.sptk brp = KiSyncNewRegionId ;;
// // Switch address space to new process // v0 = rRid = new process rid //
fwb // hint to flush write buffers
FAST_DISABLE_INTERRUPTS
add rpT1 = PrDirectoryTableBase, rNewProc movl rVa = KiPcr+PcPdeUtbase add rpT2 = PrSessionParentBase, rNewProc movl rVa2 = KiPcr+PcPdeStbase ;;
ld8.nta rPDE0 = [rpT1] // rPDE0 = Page directory page 0 ld8.nta rSessionBase = [rpT2] ld8.nta rVa = [rVa] ld8.nta rVa2 = [rVa2] ;;
// // To access IFA, ITDR registers, PSR.ic bit must be 0. Otherwise, // it causes an illegal operation fault. While PSR.ic=0, any // interruption can not be afforded. Make sure there will be no // TLB miss and no interrupt coming in during this period. //
rsm 1 << PSR_IC // PSR.ic=0 ;;
srlz.d // must serialize mov rT1 = PAGE_SHIFT << IDTR_PS // load page size field for IDTR ;;
mov cr.itir = rT1 // set up IDTR for dirbase ptr.d rVa, rT1 // remove DTR for user space ;;
mov cr.ifa = rVa // set up IFA for dirbase vaddr mov rT2 = DTR_UTBASE_INDEX ;;
itr.d dtr[rT2] = rPDE0 // insert PDE0 to DTR ;;
ptr.d rVa2, rT1 // remove DTR for session ;; // to avoid a overlapping error
mov cr.ifa = rVa2 mov rT2 = DTR_STBASE_INDEX ;;
itr.d dtr[rT2] = rSessionBase // insert the root for session space ;;
ssm 1 << PSR_IC // PSR.ic=1 ;;
srlz.i // must I serialize
#if DBG
mov t0 = PbProcessorState+KpsSpecialRegisters+KsTrD0+(8*DTR_UTBASE_INDEX) movl t3 = KiPcr + PcPrcb ;;
ld8 t3 = [t3] mov t1 = PbProcessorState+KpsSpecialRegisters+KsTrD0+(8*DTR_STBASE_INDEX) ;;
add t0 = t3, t0 add t1 = t3, t1 ;;
st8 [t0] = rPDE0 st8 [t1] = rSessionBase ;;
#endif
FAST_ENABLE_INTERRUPTS
// // Now make sure branch history is enabled for non wow processes // and disabled for wow processes //
add t1 = @gprel(KiVectorLogMask), gp ;;
ld8 t1 = [t1] ;;
cmp.eq pt0 = t1, r0 (pt0) br.cond.sptk SkipBranchHistory
mov t1 = 3 ;;
mov t2 = cpuid[t1] add t3 = PrWow64Process, rNewProc ;;
extr.u t2 = t2, 24, 8 ld4 t4 = [t3];
;;
cmp.ne pt1 = 7, t2 ;;
mov t1 = 675 (pt1) br.dpnt SkipBranchHistory ;;
mov t2 = msr[t1] cmp.eq pt1,pt2 = zero, t4 // Wow64 is non-zero ;;
(pt1) mov t3 = 2 // Enable the HB for ia64 procs (pt2) mov t3 = 256 // Disable the HB for wow64 procs ;;
dep t2 = t3, t2, 0, 9 // Disable the HB for wow64 procs ;;
mov msr[t1] = t2;
;;
SkipBranchHistory:
#if !defined(NT_UP)
// // Can now release the context swap lock //
movl rpT1 = KiPcr+PcPrcb ;;
ld8 rpT1 = [rpT1] ;;
add out0 = (LockQueueContextSwapLock * 16) + PbLockQueue, rpT1 br.call.sptk brp = KeReleaseQueuedSpinLockFromDpcLevel ;;
#endif // !defined(NT_UP)
NESTED_RETURN NESTED_EXIT(KiSwapProcess)
SBTTL("Retire Deferred Procedure Call List") //++ // Routine: // // VOID // KiRetireDpcList ( // PKPRCB Prcb, // ) // // Routine Description: // // This routine is called to retire the specified deferred procedure // call list. DPC routines are called using the idle thread (current) // stack. // // N.B. Interrupts must be disabled on entry to this routine. Control is returned // to the caller with the same conditions true. // // Arguments: // // a0 - Address of the current PRCB. // // Return value: // // None. // //--
NESTED_ENTRY(KiRetireDpcList) NESTED_SETUP(1,2,4,0)
PROLOGUE_END
Krdl_Restart:
add t0 = PbDpcQueueDepth, a0 add t1 = PbDpcRoutineActive, a0 add t2 = PbDpcLock, a0 ;;
ld4 t4 = [t0] add t3 = PbDpcListHead+LsFlink, a0 ;;
Krdl_Restart2:
cmp4.eq pt1 = zero, t4 st4 [t1] = t4 (pt1) br.spnt Krdl_Exit ;;
#if !defined(NT_UP)
ACQUIRE_SPINLOCK(t2, a0, Krdl_20) #endif // !defined(NT_UP)
ld4 t4 = [t0] LDPTR (t5, t3) // -> first DPC entry ;;
cmp4.eq pt1, pt2 = zero, t4 ;;
(pt2) add t10 = LsFlink, t5 (pt2) add out0 = -DpDpcListEntry, t5 (pt1) br.spnt Krdl_Unlock ;;
LDPTR (t6, t10) add t11 = DpDeferredRoutine, out0 add t12 = DpSystemArgument1, out0 ;;
// // Setup call to DPC routine // // arguments are: // dpc object address (out0) // deferred context (out1) // system argument 1 (out2) // system argument 2 (out3) // // N.B. the arguments must be loaded from the DPC object BEFORE // the inserted flag is cleared to prevent the object being // overwritten before its time. //
ld8.nt1 t13 = [t11], DpDeferredContext-DpDeferredRoutine ld8.nt1 out2 = [t12], DpSystemArgument2-DpSystemArgument1 ;;
ld8.nt1 out1 = [t11], DpLock-DpDeferredContext ld8.nt1 out3 = [t12] add t4 = -1, t4
STPTRINC (t3, t6, -LsFlink) ld8.nt1 t14 = [t13], 8 add t15 = LsBlink, t6 ;;
ld8.nt1 gp = [t13] STPTR (t15, t3)
STPTR (t11, zero) st4 [t0] = t4
#if !defined(NT_UP)
RELEASE_SPINLOCK(t2) // set spin lock not owned #endif //!defined(NT_UP)
FAST_ENABLE_INTERRUPTS mov bt0 = t14 br.call.sptk.few.clr brp = bt0 // call DPC routine ;;
// // Check to determine if any more DPCs are available to process. //
FAST_DISABLE_INTERRUPTS br Krdl_Restart ;;
// // The DPC list became empty while we were acquiring the DPC queue lock. // Clear DPC routine active. The race condition mentioned above doesn't // exist here because we hold the DPC queue lock. //
Krdl_Unlock:
#if !defined(NT_UP)
add t2 = PbDpcLock, a0 ;;
RELEASE_SPINLOCK(t2) #endif // !defined(NT_UP)
Krdl_Exit:
add t0 = PbDpcQueueDepth, a0 add t1 = PbDpcRoutineActive, a0 add out0 = PbDpcInterruptRequested, a0 ;;
st4.nta [t1] = zero st4.rel.nta [out0] = zero add t2 = PbDpcLock, a0
ld4 t4 = [t0] add t3 = PbDpcListHead+LsFlink, a0 ;;
cmp4.eq pt1, pt2 = zero, t4 (pt2) br.spnt Krdl_Restart2 ;;
NESTED_RETURN NESTED_EXIT(KiRetireDpcList)
SBTTL("Dispatch Interrupt") //++ //-------------------------------------------------------------------- // Routine: // // KiDispatchInterrupt // // Routine Description: // // This routine is entered as the result of a software interrupt generated // at DISPATCH_LEVEL. Its function is to process the Deferred Procedure Call // (DPC) list, and then perform a context switch if a new thread has been // selected for execution on the processor. // // This routine is entered at IRQL DISPATCH_LEVEL with the dispatcher // database unlocked. When a return to the caller finally occurs, the // IRQL remains at DISPATCH_LEVEL, and the dispatcher database is still // unlocked. // // N.B. On entry to this routine the volatile states (excluding high // floating point register set) have been saved. // // On entry: // // sp - points to stack scratch area. // // Arguments: // // None // // Return Value: // // None. //-------------------------------------------------------------------- //-- NESTED_ENTRY(KiDispatchInterrupt) PROLOGUE_BEGIN
.regstk 0, 4, 2, 0 alloc t16 = ar.pfs, 0, 4, 2, 0 .save rp, loc0 mov loc0 = brp .fframe SwitchFrameLength add sp = -SwitchFrameLength, sp ;;
.save ar.unat, loc1 mov loc1 = ar.unat add t0 = ExFltS19+SwExFrame+STACK_SCRATCH_AREA, sp add t1 = ExFltS18+SwExFrame+STACK_SCRATCH_AREA, sp ;;
.save.gf 0x0, 0xC0000 stf.spill [t0] = fs19, ExFltS17-ExFltS19 stf.spill [t1] = fs18, ExFltS16-ExFltS18 ;;
.save.gf 0x0, 0x30000 stf.spill [t0] = fs17, ExFltS15-ExFltS17 stf.spill [t1] = fs16, ExFltS14-ExFltS16 mov t10 = bs4 ;;
.save.gf 0x0, 0xC000 stf.spill [t0] = fs15, ExFltS13-ExFltS15 stf.spill [t1] = fs14, ExFltS12-ExFltS14 mov t11 = bs3 ;;
.save.gf 0x0, 0x3000 stf.spill [t0] = fs13, ExFltS11-ExFltS13 stf.spill [t1] = fs12, ExFltS10-ExFltS12 mov t12 = bs2 ;;
.save.gf 0x0, 0xC00 stf.spill [t0] = fs11, ExFltS9-ExFltS11 stf.spill [t1] = fs10, ExFltS8-ExFltS10 mov t13 = bs1 ;;
.save.gf 0x0, 0x300 stf.spill [t0] = fs9, ExFltS7-ExFltS9 stf.spill [t1] = fs8, ExFltS6-ExFltS8 mov t14 = bs0 ;;
.save.gf 0x0, 0xC0 stf.spill [t0] = fs7, ExFltS5-ExFltS7 stf.spill [t1] = fs6, ExFltS4-ExFltS6 mov t15 = ar.lc ;;
.save.gf 0x0, 0x30 stf.spill [t0] = fs5, ExFltS3-ExFltS5 stf.spill [t1] = fs4, ExFltS2-ExFltS4 ;;
.save.f 0xC stf.spill [t0] = fs3, ExFltS1-ExFltS3 // save fs3 stf.spill [t1] = fs2, ExFltS0-ExFltS2 // save fs2 ;;
.save.f 0x3 stf.spill [t0] = fs1, ExBrS4-ExFltS1 // save fs1 stf.spill [t1] = fs0, ExBrS3-ExFltS0 // save fs0 ;;
.save.b 0x18 st8 [t0] = t10, ExBrS2-ExBrS4 // save bs4 st8 [t1] = t11, ExBrS1-ExBrS3 // save bs3 ;;
.save.b 0x6 st8 [t0] = t12, ExBrS0-ExBrS2 // save bs2 st8 [t1] = t13, ExIntS2-ExBrS1 // save bs1 ;;
.save.b 0x1 st8 [t0] = t14, ExIntS3-ExBrS0 // save bs0 ;;
.save.gf 0xC, 0x0 .mem.offset 0,0 st8.spill [t0] = s3, ExIntS1-ExIntS3 // save s3 .mem.offset 8,0 st8.spill [t1] = s2, ExIntS0-ExIntS2 // save s2 ;;
.save.gf 0x3, 0x0 .mem.offset 0,0 st8.spill [t0] = s1, ExApLC-ExIntS1 // save s1 .mem.offset 8,0 st8.spill [t1] = s0, ExApEC-ExIntS0 // save s0 ;;
.savepsp ar.pfs, ExceptionFrameLength-ExApEC-STACK_SCRATCH_AREA st8 [t1] = t16, ExIntNats-ExApEC mov t4 = ar.unat // captured Nats of s0-s3 ;;
.savepsp ar.lc, ExceptionFrameLength-ExApLC-STACK_SCRATCH_AREA st8 [t0] = t15 .savepsp @priunat, ExceptionFrameLength-ExIntNats-STACK_SCRATCH_AREA st8 [t1] = t4 // save Nats of s0-s3
PROLOGUE_END
// // Register aliases //
rPrcb = loc2 rKerGP = loc3
rpT1 = t0 rpT2 = t1 rT1 = t2 rT2 = t3 rpDPLock = t4 // pointer to dispatcher lock
pNoTh = pt1 // No next thread to run pNext = pt2 // next thread not null pNull = pt3 // no thread available pOwned = pt4 // dispatcher lock already owned pNotOwned = pt5 pQEnd = pt6 // quantum end request pending pNoQEnd = pt7 // no quantum end request pending
// // Increment the dispatch interrupt count //
mov rKerGP = gp // save gp movl rPrcb = KiPcr + PcPrcb ;;
LDPTR (rPrcb, rPrcb) // rPrcb -> Prcb ;;
add rpT1 = PbDispatchInterruptCount, rPrcb ;;
ld4 rT1 = [rpT1] ;;
add rT1 = rT1, zero, 1 ;;
st4 [rpT1] = rT1
// **** TBD **** use alpha optimization to first check Dpc Q depth
// // Process the DPC list //
Kdi_PollDpcList:
// // Process the deferred procedure call list. //
FAST_ENABLE_INTERRUPTS ;;
srlz.d
// // **** TBD ***** No stack switch as in alpha, mips... // Save current initial stack address and set new initial stack address. //
FAST_DISABLE_INTERRUPTS mov out0 = rPrcb br.call.sptk brp = KiRetireDpcList ;;
// // Check to determine if quantum end has occured. // // N.B. If a new thread is selected as a result of processing a quantum // end request, then the new thread is returned with the dispatcher // database locked. Otherwise, NULL is returned with the dispatcher // database unlocked. //
FAST_ENABLE_INTERRUPTS add rpT1 = PbQuantumEnd, rPrcb ;;
ld4 rT1 = [rpT1] // get quantum end indicator ;;
cmp4.ne pQEnd, pNoQEnd = rT1, zero // if zero, no quantum end reqs mov gp = rKerGP // restore gp ;;
(pQEnd) st4 [rpT1] = zero // clear quantum end indicator (pNoQEnd) br.cond.sptk Kdi_NoQuantumEnd (pQEnd) br.call.spnt brp = KiQuantumEnd // call KiQuantumEnd (C code) ;;
cmp4.eq pNoTh, pNext = v0, zero // pNoTh = no next thread (pNoTh) br.dpnt Kdi_Exit // br to exit if no next thread (pNext) br.dpnt Kdi_Swap // br to swap to next thread
// // If no quantum end requests: // Check to determine if a new thread has been selected for execution on // this processor. //
Kdi_NoQuantumEnd: add rpT2 = PbNextThread, rPrcb ;;
LDPTR (rT1, rpT2) // rT1 = address of next thread object ;;
cmp.eq pNull = rT1, zero // pNull => no thread selected (pNull) br.dpnt Kdi_Exit // exit if no thread selected
#if !defined(NT_UP)
// // try to acquire the dispatcher database lock. //
mov out0 = LockQueueDispatcherLock movl out1 = KiPcr+PcSystemReserved+8 br.call.sptk brp = KeTryToAcquireQueuedSpinLockRaiseToSynch ;;
cmp.ne pOwned, pNotOwned = TRUE, v0 // pOwned = 1 if not free (pOwned) br.dpnt Kdi_PollDpcList // br out if owned ;;
#else
mov rT1 = SYNCH_LEVEL ;;
SET_IRQL (rT1)
#endif // !defined(NT_UP)
// // Reread address of next thread object since it is possible for it to // change in a multiprocessor system. //
Kdi_Swap:
add rpT2 = PbNextThread, rPrcb // -> next thread movl rpT1 = KiPcr + PcCurrentThread ;;
LDPTR (s1, rpT1) // current thread object LDPTR (s2, rpT2) // next thread object add rpT1 = PbCurrentThread, rPrcb ;;
// // Reready current thread for execution and swap context to the selected // thread. // // Note: Set IdleSwapBlock in the current thread so no idle processor // can switch to this processor before it is removed from the current // processor. //
STPTR (rpT2, zero) // clear addr of next thread add out0 = ThIdleSwapBlock, s1 // block swap from idle mov rT1 = 1 ;;
STPTR (rpT1, s2) // set addr of current thread st1 [out0] = rT1, -ThIdleSwapBlock// set addr of previous thread br.call.sptk brp = KiReadyThread // call KiReadyThread(OldTh) ;;
mov s0 = rPrcb // setup call cmp.ne pt0 = zero, zero // no need to lock context swap br.call.sptk brp = SwapContext // call SwapContext(Prcb, OldTh, NewTh) ;;
// // Restore saved registers, and return. //
add out0 = STACK_SCRATCH_AREA+SwExFrame, sp br.call.sptk brp = KiRestoreExceptionFrame ;;
Kdi_Exit:
add rpT1 = ExApEC+SwExFrame+STACK_SCRATCH_AREA, sp ;;
ld8 rT1 = [rpT1] mov brp = loc0 ;;
mov ar.unat = loc1 mov ar.pfs = rT1 .restore add sp = SwitchFrameLength, sp br.ret.sptk brp
NESTED_EXIT(KiDispatchInterrupt)
|