title  "Context Swap"
;++
;
; Copyright (c) 2000  Microsoft Corporation
;
; Module Name:
;
;   ctxswap.asm
;
; Abstract:
;
;   This module implements the code necessary to field the dispatch interrupt
;   and perform context switching.
;
; Author:
;
;   David N. Cutler (davec) 26-Aug-2000
;
; Environment:
;
;    Kernel mode only.
;
;--

include ksamd64.inc

        extern  KeAcquireQueuedSpinLockAtDpcLevel:proc
        extern  KeAcquireQueuedSpinLockRaiseToSynch:proc
        extern  KeBugCheck:proc
        extern  KeReleaseQueuedSpinLock:proc
        extern  KeReleaseQueuedSpinLockFromDpcLevel:proc
        extern  KiDeliverApc:proc
        extern  KiDispatcherLock:qword
        extern  KiQuantumEnd:proc
        extern  KiReadyThread:proc
        extern  KiRetireDpcList:proc
        extern  __imp_HalRequestSoftwareInterrupt:qword

        subttl  "Unlock Dispatcher Database"
;++
;
; VOID
; KiUnlockDispatcherDatabase (
;     IN KIRQL OldIrql
;     )
;
; Routine Description:
;
;   This routine is entered at SYNCH_LEVEL with the dispatcher database
;   locked. Its function is to either unlock the dispatcher database and
;   return or initiate a context switch if another thread has been selected
;   for execution.
;
; Arguments:
;
;   OldIrql (cl) - Supplies the IRQL when the dispatcher database lock was
;       acquired.
;
; Return Value:
;
;   None.
;
;--

UdFrame struct
        P1Home  dq ?                    ; queued spin lock number parameter
        P2Home  dq ?                    ; previous IRQL paramater
        SavedIrql db ?                  ; saved previous IRQL
        Fill    db 7 dup (?)            ; fill to 8 mod 16
UdFrame ends

        NESTED_ENTRY KiUnlockDispatcherDatabase, _TEXT$00

        alloc_stack (sizeof UdFrame)    ; allocate stack frame

        END_PROLOGUE

;
; Check if a new thread is scheduled for execution.
;

        cmp     qword ptr gs:[PcNextThread], 0 ; check if thread scheduled
        jne     short KiUD30            ; if ne, new thread scheduled

;
; Release dispatcher database lock, lower IRQL to its previous level,
; and return.
;

ifndef NT_UP

KiUD10: mov     dl, cl                  ; set old IRQL value
        mov     ecx, LockQueueDispatcherLock ; set lock queue number
        call    KeReleaseQueuedSpinLock ; release dispatcher lock

else

KiUD10: movzx   ecx, cl                 ; set IRQL to previous level

        SetIrql                         ;

endif

        add     rsp, sizeof UdFrame     ; deallocate stack frame
        ret                             ; return

;
; A new thread has been selected to run on the current processor, but the new
; IRQL is not below dispatch level. If the current processor is not executing
; a DPC, then request a dispatch interrupt on the current processor.
;

KiUD20: cmp     qword ptr gs:[PcDpcRoutineActive], 0 ; check if DPC routine active
        jne     short KiUD10            ; if ne, DPC routine is active
        mov     UdFrame.SavedIrql[rsp], cl ; save previous IRQL
        mov     cl, DISPATCH_LEVEL      ; request dispatch interrupt
        call    __imp_HalRequestSoftwareInterrupt ;
        mov     cl, UdFrame.SavedIrql[rsp] ; restore previous IRQL
        jmp     short KiUD10

;
; Check if the previous IRQL is less than dispatch level.
;

KiUD30: cmp     cl, DISPATCH_LEVEL      ; check if IRQL below dispatch level
        jge     short KiUD20            ; if ge, not below dispatch level
        add     rsp, sizeof UdFrame     ; deallocate stack frame
        jmp     short KxUnlockDispatcherDatabase ; finish in common code

        NESTED_END KiUnlockDispatcherDatabase, _TEXT$00

;
; There is a new thread scheduled for execution and the previous IRQL is
; less than dispatch level. Context switch to the new thread immediately.
;
; N.B. The following routine is entered by falling through the from above
;      code.
;
; N.B. The following routine is carefully written as a nested function that
;      appears to have been called directly by the caller of the above
;      function which unlocks the dispatcher database.
;
; Arguments:
;
;   OldIrql (cl) - Supplies the IRQL when the dispatcher database lock was
;       acquired.
;

        NESTED_ENTRY KxUnlockDispatcherDatabase, _TEXT$00

        GENERATE_EXCEPTION_FRAME        ; generate exception frame

        mov     rbx, gs:[PcCurrentPrcb] ; get current PRCB address
        mov     rsi, PbNextThread[rbx]  ; get next thread address
        mov     rdi, PbCurrentThread[rbx] ; get current thread address
        and     qword ptr PbNextThread[rbx], 0 ; clear next thread address
        mov     PbCurrentThread[rbx], rsi ; set current thread address
        mov     ThWaitIrql[rdi], cl     ; save previous IRQL

ifndef NT_UP

        mov     byte ptr ThIdleSwapBlock[rdi], 1 ; block swap from idle

endif

        mov     rcx, rdi                ; set address of current thread
        call    KiReadyThread           ; reready thread for execution
        xor     eax, eax                ; set NPX save false
        mov     cl, ThWaitIrql[rdi]     ; set APC interrupt bypass disable

ifndef NT_UP

        xor     edx, edx                ; set swap from idle false

endif

        call    SwapContext             ; swap context
        movzx   ecx, byte ptr ThWaitIrql[rsi] ; get original wait IRQL
        or      al, al                  ; check if kernel APC pending
        jz      short KiXD10            ; if z, no kernel APC pending
        mov     ecx, APC_LEVEL          ; set IRQL to APC level

        SetIrql                         ;

        xor     ecx, ecx                ; set previous mode to kernel
        xor     edx, edx                ; clear exception frame address
        xor     r8, r8                  ; clear trap frame address
        call    KiDeliverApc            ; deliver kernel mode APC
        xor     ecx, ecx                ; set original wait IRQL
KiXD10:                                 ; reference label

        SetIrql                         ; set IRQL to previous level

        RESTORE_EXCEPTION_STATE         ; restore exception state/deallocate

        ret                             ; return

        NESTED_END KxUnlockDispatcherDatabase, _TEXT$00

        subttl  "Swap Context"
;++
;
; BOOLEAN
; KiSwapContext (
;    IN PKTHREAD Thread
;    )
;
; Routine Description:
;
;   This function is a small wrapper that marshalls arguments and calls the
;   actual swap context routine.
;
; Arguments:
;
;   Thread (rcx) - Supplies the address of the new thread.
;
; Return Value:
;
;   If a kernel APC is pending, then a value of TRUE is returned. Otherwise,
;   a value of FALSE is returned.
;
;--

        NESTED_ENTRY KiSwapContext, _TEXT$00

        GENERATE_EXCEPTION_FRAME        ; generate exception frame

        mov     rbx, gs:[PcCurrentPrcb] ; get current PRCB address
        mov     rsi, rcx                ; set next thread address
        mov     rdi, PbCurrentThread[rbx] ; get current thread address
        mov     PbCurrentThread[rbx], rsi ; set current thread address
        xor     eax, eax                ; set NPX save false
        mov     cl, ThWaitIrql[rdi]     ; set APC interrupt bypass disable

ifndef NT_UP

        xor     edx, edx                ; set swap from idle false

endif

        call    SwapContext             ; swap context

        RESTORE_EXCEPTION_STATE         ; restore exception state/deallocate

        ret                             ; return

        NESTED_END KiSwapContext, _TEXT$00

        subttl  "Dispatch Interrupt"
;++
;
; Routine Description:
;
;   This routine is entered as the result of a software interrupt generated
;   at DISPATCH_LEVEL. Its function is to process the DPC list, and then
;   perform a context switch if a new thread has been selected for execution
;   on the current processor.
;
;   This routine is entered at DISPATCH_LEVEL with the dispatcher database
;   unlocked.
;
; Arguments:
;
;   None
;
; Return Value:
;
;   None.
;
;--

DiFrame struct
        P1Home  dq ?                    ; PRCB address parameter
        Fill    dq ?                    ; fill to 8 mod 16
        SavedRbx dq ?                   ; saved RBX
DiFrame ends

        NESTED_ENTRY KiDispatchInterrupt, _TEXT$00

        push_reg rbx                    ; save nonvolatile register
        alloc_stack (sizeof DiFrame - 8) ; allocate stack frame

        END_PROLOGUE

        mov     rbx, gs:[PcCurrentPrcb] ; get current PRCB address
        and     dword ptr PbDpcInterruptRequested[rbx], 0 ; clear request

;
; Check if the DPC queue has any entries to process.
;

KiDI10: cli                             ; disable interrupts
        mov     eax, PbDpcQueueDepth[rbx] ; get DPC queue depth
        or      eax, PbTimerHand[rbx]   ; merge timer hand value
        jz      short KiDI20            ; if z, no DPCs to process
        mov     PbSavedRsp[rbx], rsp    ; save current stack pointer
        mov     rsp, PbDpcStack[rbx]    ; set DPC stack pointer
        mov     rcx, rbx                ; set PRCB address parameter
        call    KiRetireDpcList         ; process the DPC list
        mov     rsp, PbSavedRsp[rbx]    ; restore current stack pointer

;
; Check to determine if quantum end is requested.
;

KiDI20: sti                             ; enable interrupts
        cmp     dword ptr PbQuantumEnd[rbx], 0 ; check if quantum end request
        je      short KiDI50            ; if e, quantum end not requested

;
; Process quantum end event.
;
; N.B. If a new thread is selected as a result of processing the quantum end
;      request, then the new thread is returned with the dispatcher database
;      locked. Otherwise, NULL is returned with the dispatcher database
;      unlocked.
;

        and     dword ptr PbQuantumEnd[rbx], 0 ; clear quantum end indicator
        call    KiQuantumEnd            ; process quantum end
        test    rax, rax                ; test if new thread selected
        jnz     short KiDI60            ; if ne, new thread selected

;
; A new thread has not been selected for execution. Restore nonvolatile
; registers, deallocate stack frame, and return.
;

KiDI30: mov     rbx, DiFrame.SavedRbx[rsp] ; restore nonvolatile register
        add     rsp, sizeof DiFrame     ; deallocate stack frame
        ret                             ; return

;
; The dispatch lock could not be acquired. Lower IRQL to dispatch level, and
; loop processing the DPC list and quantum end events.
;

KiDI40: mov     ecx, DISPATCH_LEVEL     ; set IRQL to DISPATCH_LEVEL

        SetIrql                         ;

        jmp short KiDI10                ; try again

;
; Check to determine if a new thread has been selected for execution on this
; processor.
;

KiDI50: cmp     qword ptr PbNextThread[rbx], 0 ; check if new thread selected
        je      short KiDI30            ; if eq, then no new thread

ifndef NT_UP

        mov     ecx, SYNCH_LEVEL        ; set IRQL to SYNCH_LEVEL

        SetIrql                         ;

        lea     rcx, KiDispatcherLock   ; get dispatcher database lock address
        lea     rdx, PbLockQueue + (16 * LockQueueDispatcherLock)[rbx] ; lock queue
        xor     eax, eax                ; set comperand value to NULL
   lock cmpxchg [rcx], rdx              ; try to acquire dispatcher lock
        jnz     short KiDI40            ; if nz, dispatcher lock not acquired

endif

        mov     rax, PbNextThread[rbx]  ; get next thread address

;
; Swap context to a new thread.
;

KiDI60: add     rsp, sizeof DiFrame - 8 ; deallocate stack frame
        pop     rbx                     ; restore nonvolatile register
        jmp     short KxDispatchInterrupt ; finish in common code

        NESTED_END KiDispatchInterrupt, _TEXT$00

;
; There is a new thread scheduled for execution and the dispatcher lock
; has been acquired. Context switch to the new thread immediately.
;
; N.B. The following routine is entered by falling through from the above
;      routine.
;
; N.B. The following routine is carefully written as a nested function that
;      appears to have been called directly by the caller of the above
;      function which processes the dispatch interrupt.
;
; Arguments:
;
;   Thread (rax) - Supplies the address of the next thread to run on the
;       current processor.
;

        NESTED_ENTRY KxDispatchInterrupt, _TEXT$00

        GENERATE_EXCEPTION_FRAME        ; generate exception frame

        mov     rbx, gs:[PcCurrentPrcb] ; get current PRCB address
        mov     rsi, rax                ; set address of next thread
        mov     rdi, PbCurrentThread[rbx] ; get current thread address
        and     qword ptr PbNextThread[rbx], 0 ; clear next thread address
        mov     PbCurrentThread[rbx], rsi ; set current thread address

ifndef NT_UP

        mov     byte ptr ThIdleSwapBlock[rdi], 1 ; block swap from idle

endif

        mov     rcx, rdi                ; set address of current thread
        call    KiReadyThread           ; reready thread for execution
        mov     eax, TRUE               ; set NPX save true
        mov     cl, APC_LEVEL           ; set APC interrupt bypass disable

ifndef NT_UP

        xor     edx, edx                ; set swap from idle false

endif

        call    SwapContext             ; call context swap routine

        RESTORE_EXCEPTION_STATE         ; restore exception state/deallocate

        ret                             ; return

        NESTED_END KxDispatchInterrupt, _TEXT$00

        subttl  "Swap Context"
;++
;
; Routine Description:
;
;   This routine is called to swap context from one thread to the next. It
;   swaps context, flushes the translation buffer, swaps the process address
;   space if necessary, and returns to its caller.
;
;   N.B. This routine is only called by code within this module and the idle
;        thread code and uses special register calling conventions.
;
; Arguments:
;
;   al - Supplies a boolean value that determines whether the full legacy
;       floating state needs to be saved.
;
;   cl - Supplies the APC interrupt bypass disable IRQL value.
;
;   edx - Supplies a logical value that specifies whether the context swap
;       is being called from the idle thread (MP systems only).
;
;   rbx - Supplies the address of the current PRCB.
;
;   rdi - Supplies the address of previous thread.
;
;   rsi - Supplies the address of next thread.
;
; Return value:
;
;   al - Supplies the kernel APC pending flag.
;
;   rbx - Supplies the address of the current PRCB.
;
;   rsi - Supplies the address of current thread.
;
;--

        NESTED_ENTRY SwapContext, _TEXT$00

        push_reg rbp                    ; save nonvolatile register
        alloc_stack (KSWITCH_FRAME_LENGTH - (2 * 8)) ; allocate stack frame

        END_PROLOGUE

        mov     SwNpxSave[rsp], al      ; save NPX save
        mov     SwApcBypass[rsp], cl    ; save APC bypass disable

;
; Set the new thread state to running.
;
; N.B. The state of the new thread MUST be set to running before releasing
;      the dispatcher lock.
;

        mov     byte ptr ThState[rsi], Running ; set thread state to running

;
; Acquire the context swap lock so the address space of the previous process
; cannot be deleted, then release the dispatcher database lock.
;
; N.B. The context swap lock is used to protect the address space until the
;      context switch has sufficiently progressed to the point where the
;      previous process address space is no longer needed. This lock is also
;      acquired by the reaper thread before it finishes thread termination.
;


ifndef NT_UP

        test    edx, edx                ; test if call from idle thread
        jnz     short KiSC05            ; if nz, call from idle thread
        lea     rcx, PbLockQueue + (16 * LockQueueContextSwapLock)[rbx] ; lock queue
        call    KeAcquireQueuedSpinLockAtDpcLevel ; acquire context swap lock
        lea     rcx, PbLockQueue + (16 * LockQueueDispatcherLock)[rbx] ; lock queue
        call    KeReleaseQueuedSpinLockFromDpcLevel ; release dispatcher lock

endif

;
; Check if an attempt is being made to context switch while in a DPC routine.
;

KiSC05: cmp     dword ptr PbDpcRoutineActive[rbx], 0 ; check if DPC active
        jne     KiSC60                  ; if ne, DPC is active

;
; Accumulate the total time spent in a thread.
;

ifdef PERF_DATA

        rdtsc                           ; read cycle counter
        sub     eax, PbThreadStartCount + 0[rbx] ; sub out thread start time
        sbb     edx, PbThreadStartCount + 4[rbx] ;
        add     EtPerformanceCountLow[rdi], eax ; accumlate thread run time
        adc     EtPerformanceCountHigh[rdi], edx ;
        add     PbThreadStartCount + 4[rbx], eax ; set new thread start time
        adc     PbThreadStartCount + 8[rbx], edx ;

endif

;
; Save the kernel mode XMM control/status register. If the current thread
; executes in user mode, then also save the legacy floating point state.
;

        stmxcsr SwMxCsr[rsp]            ; save kernel mode XMM control/status
        cmp     byte ptr ThNpxState[rdi], UserMode ; check if user mode thread
        jne     short KiSC10            ; if ne, not user mode thread
        mov     rbp, ThInitialStack[rdi] ; get previous thread initial stack
        cmp     byte ptr SwNpxSave[rsp], TRUE ; check if full save required
        jne     short KiSC07            ; if ne, full save not required
        fnsaved [rbp]                   ; save full legacy floating point state
        jmp     short KiSC10            ;

;
; Full floating save not required.
;

KiSC07: fnstenvd [rbp]                  ; save legacy floating environment

;
; Switch kernel stacks.
;

KiSC10: mov     ThKernelStack[rdi], rsp ; save old kernel stack pointer
        mov     rsp, ThKernelStack[rsi] ; get new kernel stack pointer

;
; Swap the process address space if the new process is not the same as the
; previous process.
;

        mov     rax, ThApcState + AsProcess[rdi] ; get previous process address
        cmp     rax, ThApcState + AsProcess[rsi] ; check if process address match
        je      short KiSC20            ; if e, process addresses match
        mov     r14, ThApcState + AsProcess[rsi] ; get new process address

;
; Update the processor set masks.
;

ifndef NT_UP

        mov     rcx, PbSetMember[rbx]   ; get processor set member
        xor     PrActiveProcessors[rax], rcx ; clear bit in previous set
        xor     PrActiveProcessors[r14], rcx ; set bit in new set

if DBG

        test    PrActiveProcessors[rax], rcx ; test if bit clear in previous set
        jnz     short @f                ; if nz, bit not clear in previous set
        test    PrActiveProcessors[r14], rcx ; test if bit set in new set
        jnz     short KiSC15            ; if nz, bit set in new set
@@:     int     3                       ; debug break - incorrect active mask

endif

endif

;
; Load new CR3 value which will flush the TB and set the IOPM map offset in
; the TSS.
;

KiSC15: mov     r15, gs:[PcTss]         ; get processor TSS address
        mov     cx, PrIopmOffset[r14]   ; get process IOPM offset
        mov     TssIoMapBase[r15], cx   ; set TSS IOPM offset
        mov     rax, PrDirectoryTableBase[r14] ; get new directory base
        mov     cr3, rax                ; flush TLB and set new directory base

;
; Release the context swap lock.
;

KiSC20: mov     byte ptr ThIdleSwapBlock[rdi], 0 ; unblock swap from idle

ifndef NT_UP

        lea     rcx, PbLockQueue + (16 * LockQueueContextSwapLock)[rbx] ; lock queue
        call    KeReleaseQueuedSpinLockFromDpcLevel ; release context swap lock

endif

;
; Set the new kernel stack base in the TSS.
;

        mov     r15, gs:[PcTss]         ; get processor TSS address
        mov     rbp, ThInitialStack[rsi] ; get new stack base address
        mov     TssRsp0[r15], rbp       ; set stack base address in TSS

;
; If the new thread executes in user mode, then restore the legacy floating
; state, load the compatibility mode TEB address, load the native user mode
; TEB address, and reload the segment registers if needed.
;
; N.B. The upper 32-bits of the compatibility mode TEB address are always
;      zero.
;

        cmp     byte ptr ThNpxState[rsi], UserMode ; check if user mode thread
        jne     KiSC30                  ; if ne, not user mode thread
        cmp     byte ptr SwNpxSave[rsp], TRUE ; check if full restore required
        jne     short KiSC22            ; if ne, full restore not required
        mov     cx, LfControlWord[rbp]  ; save current control word
        mov     word ptr LfControlWord[rbp], 03fh ; set to mask all exceptions
        frstord [rbp]                   ; restore legacy floating point state
        mov     LfControlWord[rbp], cx  ; restore control word
        fldcw   word ptr LfControlWord[rbp] ; load legacy control word
        jmp     short KiSC24            ;

;
; Full legacy floating restore not required.
;

KiSC22: fldenv  [rbp]                   ; restore legacy floating environment

;
; Set base of compatibility mode TEB.
;

KiSC24: mov     eax, ThTeb[rsi]         ; compute compatibility mode TEB address
        add     eax, CmThreadEnvironmentBlockOffset ;
        mov     rcx, gs:[PcGdt]         ; get GDT base address
        mov     KgdtBaseLow + KGDT64_R3_CMTEB[rcx], ax ; set CMTEB base address
        shr     eax, 16                 ;
        mov     KgdtBaseMiddle + KGDT64_R3_CMTEB[rcx], al ;
        mov     KgdtBaseHigh + KGDT64_R3_CMTEB[rcx], ah   ;

;
; If the user segment selectors have been changed, then reload them with
; their cannonical values.
;

        mov     ax, ds                  ; compute sum of segment selectors
        mov     cx, es                  ;
        add     ax, cx                  ;
        mov     cx, gs                  ;
        add     ax, cx                  ;
        cmp     ax, ((KGDT64_R3_DATA or RPL_MASK) * 3) ; check if sum matches
        je      short KiSC25            ; if e, sum matches expected value
        mov     cx, KGDT64_R3_DATA or RPL_MASK ; reload user segment selectors
        mov     ds, cx                  ;
        mov     es, cx                  ;

;
; N.B. The following reload of the GS selector destroys the system MSR_GS_BASE
;      register. Thus this sequence must be done with interrupt off.
;

        mov     eax, gs:[PcSelf]        ; get current PCR address
        mov     edx, gs:[PcSelf + 4]    ;
        cli                             ; disable interrupts
        mov     gs, cx                  ; reload GS segment selector
        mov     ecx, MSR_GS_BASE        ; get GS base MSR number
        wrmsr                           ; write system PCR base address
        sti                             ; enable interrupts
KiSC25: mov     ax, KGDT64_R3_CMTEB or RPL_MASK ; reload FS segment selector
        mov     fs, ax                  ;
        mov     eax, ThTeb[rsi]         ; get low part of user TEB address
        mov     edx, ThTeb + 4[rsi]     ; get high part of user TEB address
        mov     gs:[PcTeb], eax         ; set user TEB address in PCR
        mov     gs:[PcTeb + 4], edx     ;
        mov     ecx, MSR_GS_SWAP        ; get GS base swap MSR number
        wrmsr                           ; write user TEB base address

;
; Restore kernel mode XMM control/status and update context switch counters.
;

KiSC30: ldmxcsr SwMxCsr[rsp]            ; kernel mode XMM control/status
        inc     dword ptr ThContextSwitches[rsi] ; thread count
        inc     dword ptr PbContextSwitches[rbx] ; processor count

;
; If the new thread has a kernel mode APC pending, then request an APC
; interrupt if APC bypass is disabled.
;

        mov     al, ThApcState + AsKernelApcPending[rsi] ; get APC pending
        test    al, al                  ; test if kernel APC pending
        jz      short KiSC50            ; if z, kernel APC not pending
        cmp     byte ptr SwApcBypass[rsp], APC_LEVEL ; check if APC bypass enabled
        jb      short KiSC40            ; if b, APC bypass is enabled
        mov     cl, APC_LEVEL           ; request APC interrupt
        call    __imp_HalRequestSoftwareInterrupt ;
        clc                             ; clear carry flag
KiSC40: setb    al                      ; set return value
KiSC50: add     rsp, KSWITCH_FRAME_LENGTH - (2 * 8) ; deallocate stack frame
        pop     rbp                     ; restore nonvolatile register
        ret                             ; return

;
; An attempt is being made to context switch while in a DPC routine. This is
; most likely caused by a DPC routine calling one of the wait functions.
;

KiSC60: mov     ecx, ATTEMPTED_SWITCH_FROM_DPC ; set bug check code
        call    KeBugCheck              ; bug check system - no return
        ret                             ; return

        NESTED_END SwapContext, _TEXT$00

        end