Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

553 lines
19 KiB

  1. title "Context Swap"
  2. ;++
  3. ;
  4. ; Copyright (c) 2000 Microsoft Corporation
  5. ;
  6. ; Module Name:
  7. ;
  8. ; ctxswap.asm
  9. ;
  10. ; Abstract:
  11. ;
  12. ; This module implements the code necessary to field the dispatch interrupt
  13. ; and perform context switching.
  14. ;
  15. ; Author:
  16. ;
  17. ; David N. Cutler (davec) 26-Aug-2000
  18. ;
  19. ; Environment:
  20. ;
  21. ; Kernel mode only.
  22. ;
  23. ;--
  24. include ksamd64.inc
  25. extern KeBugCheckEx:proc
  26. extern KiDeliverApc:proc
  27. extern KeRaiseIrqlToSynchLevel:proc
  28. extern KiQuantumEnd:proc
  29. extern KiQueueReadyThread:proc
  30. extern KiRetireDpcList:proc
  31. extern WmiTraceContextSwap:proc
  32. extern __imp_HalRequestSoftwareInterrupt:qword
  33. subttl "Swap Context"
  34. ;++
  35. ;
  36. ; BOOLEAN
  37. ; KiSwapContext (
  38. ; IN PKTHREAD OldThread,
  39. ; IN PKTHREAD NewThread
  40. ; )
  41. ;
  42. ; Routine Description:
  43. ;
  44. ; This function is a small wrapper that marshalls arguments and calls the
  45. ; actual swap context routine.
  46. ;
  47. ; N.B. The old thread lock has been acquired and the dispatcher lock dropped
  48. ; before this routine is called.
  49. ;
  50. ; N.B. The current thread address and the new thread state has been set
  51. ; before this routine is called.
  52. ;
  53. ; Arguments:
  54. ;
  55. ; OldThread (rcx) - Supplies the address of the old thread.
  56. ;
  57. ; NewThread (rdx) - Supplies the address of the old thread.
  58. ;
  59. ; Return Value:
  60. ;
  61. ; If a kernel APC is pending, then a value of TRUE is returned. Otherwise,
  62. ; a value of FALSE is returned.
  63. ;
  64. ;--
  65. NESTED_ENTRY KiSwapContext, _TEXT$00
  66. GENERATE_EXCEPTION_FRAME ; generate exception frame
  67. mov rbx, gs:[PcCurrentPrcb] ; get current PRCB address
  68. mov rdi, rcx ; set old thread address
  69. mov rsi, rdx ; set new thread address
  70. mov cl, ThWaitIrql[rdi] ; set APC interrupt bypass disable
  71. call SwapContext ; swap context
  72. RESTORE_EXCEPTION_STATE ; restore exception state/deallocate
  73. ret ; return
  74. NESTED_END KiSwapContext, _TEXT$00
  75. subttl "Dispatch Interrupt"
  76. ;++
  77. ;
  78. ; Routine Description:
  79. ;
  80. ; This routine is entered as the result of a software interrupt generated
  81. ; at DISPATCH_LEVEL. Its function is to process the DPC list, and then
  82. ; perform a context switch if a new thread has been selected for execution
  83. ; on the current processor.
  84. ;
  85. ; This routine is entered at DISPATCH_LEVEL with the dispatcher database
  86. ; unlocked.
  87. ;
  88. ; Arguments:
  89. ;
  90. ; None
  91. ;
  92. ; Return Value:
  93. ;
  94. ; None.
  95. ;
  96. ;--
  97. DiFrame struct
  98. P1Home dq ? ; PRCB address parameter
  99. Fill dq ? ; fill to 8 mod 16
  100. SavedRbx dq ? ; saved RBX
  101. DiFrame ends
  102. NESTED_ENTRY KiDispatchInterrupt, _TEXT$00
  103. push_reg rbx ; save nonvolatile register
  104. alloc_stack (sizeof DiFrame - 8) ; allocate stack frame
  105. END_PROLOGUE
  106. mov rbx, gs:[PcCurrentPrcb] ; get current PRCB address
  107. and byte ptr PbDpcInterruptRequested[rbx], 0 ; clear request
  108. ;
  109. ; Check if the DPC queue has any entries to process.
  110. ;
  111. KiDI10: cli ; disable interrupts
  112. mov eax, PbDpcQueueDepth[rbx] ; get DPC queue depth
  113. or rax, PbTimerRequest[rbx] ; merge timer request value
  114. ifndef NT_UP
  115. or rax, PbDeferredReadyListHead[rbx] ; merge deferred ready list
  116. endif
  117. jz short KiDI20 ; if z, no DPCs to process
  118. mov PbSavedRsp[rbx], rsp ; save current stack pointer
  119. mov rsp, PbDpcStack[rbx] ; set DPC stack pointer
  120. mov rcx, rbx ; set PRCB address parameter
  121. call KiRetireDpcList ; process the DPC list
  122. mov rsp, PbSavedRsp[rbx] ; restore current stack pointer
  123. ;
  124. ; Check to determine if quantum end is requested.
  125. ;
  126. KiDI20: sti ; enable interrupts
  127. cmp byte ptr PbQuantumEnd[rbx], 0 ; check if quantum end request
  128. je short KiDI40 ; if e, quantum end not requested
  129. and byte ptr PbQuantumEnd[rbx], 0 ; clear quantum end indicator
  130. call KiQuantumEnd ; process quantum end
  131. ;
  132. ; Restore nonvolatile registers, deallocate stack frame, and return.
  133. ;
  134. KiDI30: add rsp, sizeof DiFrame - 8 ; deallocate stack frame
  135. pop rbx ; restore nonvolatile register
  136. ret ; return
  137. ;
  138. ; Check to determine if a new thread has been selected for execution on this
  139. ; processor.
  140. ;
  141. KiDI40: cmp qword ptr PbNextThread[rbx], 0 ; check if new thread selected
  142. je short KiDI30 ; if eq, then no new thread
  143. ;
  144. ; Swap context to a new thread as the result of new thread being scheduled
  145. ; by the dispatch interrupt.
  146. ;
  147. add rsp, sizeof DiFrame - 8 ; deallocate stack frame
  148. pop rbx ; restore nonvolatile register
  149. jmp short KxDispatchInterrupt ;
  150. NESTED_END KiDispatchInterrupt, _TEXT$00
  151. ;
  152. ; There is a new thread scheduled for execution and the dispatcher lock
  153. ; has been acquired. Context switch to the new thread immediately.
  154. ;
  155. ; N.B. The following routine is entered by falling through from the above
  156. ; routine.
  157. ;
  158. ; N.B. The following routine is carefully written as a nested function that
  159. ; appears to have been called directly by the caller of the above
  160. ; function which processes the dispatch interrupt.
  161. ;
  162. ; Arguments:
  163. ;
  164. ; None.
  165. ;
  166. NESTED_ENTRY KxDispatchInterrupt, _TEXT$00
  167. GENERATE_EXCEPTION_FRAME ; generate exception frame
  168. mov rbx, gs:[PcCurrentPrcb] ; get current PRCB address
  169. mov rdi, PbCurrentThread[rbx] ; get old thread address
  170. ;
  171. ; Raise IRQL to SYNCH level, set context swap busy for the old thread, and
  172. ; acquire the current PRCB lock.
  173. ;
  174. ifndef NT_UP
  175. call KeRaiseIrqlToSynchLevel ; raise IRQL to SYNCH Level
  176. mov byte ptr ThSwapbusy[rdi], 1 ; set context swap busy
  177. lea r11, PbPrcbLock[rbx] ; set address of current PRCB
  178. AcquireSpinLock r11 ; acquire current PRCB lock
  179. endif
  180. ;
  181. ; Get the next thread address, set the thread state to running, queue the old
  182. ; running thread, and swap context to the next thread.
  183. ;
  184. mov rsi, PbNextThread[rbx] ; get next thread address
  185. and qword ptr PbNextThread[rbx], 0 ; clear next thread address
  186. mov PbCurrentThread[rbx], rsi ; set current thread address
  187. mov byte ptr ThState[rsi], Running ; set new thread state
  188. mov byte ptr ThWaitReason[rdi], WrDispatchInt ; set wait reason
  189. mov rcx, rdi ; set address of old thread
  190. mov rdx, rbx ; set address of current PRCB
  191. call KiQueueReadyThread ; queue ready thread for execution
  192. mov cl, APC_LEVEL ; set APC interrupt bypass disable
  193. call SwapContext ; call context swap routine
  194. RESTORE_EXCEPTION_STATE ; restore exception state/deallocate
  195. ret ; return
  196. NESTED_END KxDispatchInterrupt, _TEXT$00
  197. subttl "Swap Context"
  198. ;++
  199. ;
  200. ; Routine Description:
  201. ;
  202. ; This routine is called to swap context from one thread to the next. It
  203. ; swaps context, flushes the translation buffer, swaps the process address
  204. ; space if necessary, and returns to its caller.
  205. ;
  206. ; N.B. This routine is only called by code within this module and the idle
  207. ; thread code and uses special register calling conventions.
  208. ;
  209. ; Arguments:
  210. ;
  211. ; al - Supplies a boolean value that determines whether the full legacy
  212. ; floating state needs to be saved.
  213. ;
  214. ; cl - Supplies the APC interrupt bypass disable IRQL value.
  215. ;
  216. ; rbx - Supplies the address of the current PRCB.
  217. ;
  218. ; rdi - Supplies the address of previous thread.
  219. ;
  220. ; rsi - Supplies the address of next thread.
  221. ;
  222. ; Return value:
  223. ;
  224. ; al - Supplies the kernel APC pending flag.
  225. ;
  226. ; rbx - Supplies the address of the current PRCB.
  227. ;
  228. ; rsi - Supplies the address of current thread.
  229. ;
  230. ;--
  231. NESTED_ENTRY SwapContext, _TEXT$00
  232. push_reg rbp ; save nonvolatile register
  233. alloc_stack (KSWITCH_FRAME_LENGTH - (2 * 8)) ; allocate stack frame
  234. END_PROLOGUE
  235. mov SwApcBypass[rsp], cl ; save APC bypass disable
  236. ;
  237. ; Wait for new thread lock to be dropped.
  238. ;
  239. ; N.B. It is necessary to wait for the new thread context to be swapped so
  240. ; that any context switch away from the new thread on another processor
  241. ; is completed before attempting to swap context context to the thread.
  242. ;
  243. ifndef NT_UP
  244. KiSC00: cmp byte ptr ThSwapBusy[rsi], 0 ; check if swap busy for new thread
  245. jne short KiSC00 ; if ne, context busy for new thread
  246. endif
  247. ;
  248. ; Increment the number of context switches on this processor.
  249. ;
  250. ; N.B. This increment is done here is force the cache block containing the
  251. ; context switch count into the cache as write exclusive. There are
  252. ; several other references to this cache block in the following code.
  253. ;
  254. inc dword ptr (PcContextSwitches - PcPrcb)[rbx] ; processor count
  255. ;
  256. ; Accumulate the total time spent in a thread.
  257. ;
  258. ifdef PERF_DATA
  259. rdtsc ; read cycle counter
  260. sub eax, PbThreadStartCount + 0[rbx] ; sub out thread start time
  261. sbb edx, PbThreadStartCount + 4[rbx] ;
  262. add EtPerformanceCountLow[rdi], eax ; accumlate thread run time
  263. adc EtPerformanceCountHigh[rdi], edx ;
  264. add PbThreadStartCount + 4[rbx], eax ; set new thread start time
  265. adc PbThreadStartCount + 8[rbx], edx ;
  266. endif
  267. ;
  268. ; Check for context swap logging.
  269. ;
  270. cmp qword ptr (PcPerfGlobalGroupMask - PcPrcb)[rbx], 0 ; check if logging enable
  271. je short KiSC05 ; if eq, logging not enabled
  272. mov rax, (PcPerfGlobalGroupMask - PcPrcb)[rbx] ; get global mask address
  273. mov rdx, rdi ; set address of old thread
  274. mov rcx, rsi ; set address of new thread
  275. test dword ptr PERF_CONTEXTSWAP_OFFSET[rax], PERF_CONTEXTSWAP_FLAG ; check flag
  276. jz short KiSC05 ; if z, context swap events not enabled
  277. call WmiTraceContextSwap ; call trace routine
  278. ;
  279. ; Save the kernel mode XMM control/status register. If the current thread
  280. ; NPX state is switch, then save the legacy floating point state.
  281. ;
  282. KiSC05: stmxcsr SwMxCsr[rsp] ; save kernel mode XMM control/status
  283. cmp byte ptr ThNpxState[rdi], LEGACY_STATE_SWITCH ; check if switched
  284. jne short KiSC10 ; if ne, legacy state not switched
  285. mov rbp, ThInitialStack[rdi] ; get previous thread initial stack
  286. fnsaved [rbp] ; save full legacy floating point state
  287. ;
  288. ; Switch kernel stacks.
  289. ;
  290. KiSC10: mov ThKernelStack[rdi], rsp ; save old kernel stack pointer
  291. mov rsp, ThKernelStack[rsi] ; get new kernel stack pointer
  292. ;
  293. ; Swap the process address space if the new process is not the same as the
  294. ; previous process.
  295. ;
  296. mov r14, ThApcState + AsProcess[rsi] ; get new process address
  297. cmp r14, ThApcState + AsProcess[rdi] ; check if process match
  298. je short KiSC20 ; if e, process addresses match
  299. ;
  300. ; Clear the processor bit in the old process.
  301. ;
  302. ifndef NT_UP
  303. mov rax, ThApcState + AsProcess[rdi] ; get old process address
  304. mov rcx, (PcSetMember - PcPrcb)[rbx] ; get processor set member
  305. lock xor PrActiveProcessors[rax], rcx ; clear bit in previous set
  306. if DBG
  307. test PrActiveProcessors[rax], rcx ; test if bit clear in previous set
  308. jz short @f ; if z, bit clear in previous set
  309. int 3 ; debug break - incorrect active mask
  310. @@: ; reference label
  311. endif
  312. endif
  313. ;
  314. ; Set the processor bit in the new process.
  315. ;
  316. ifndef NT_UP
  317. lock xor PrActiveProcessors[r14], rcx ; set bit in new set
  318. if DBG
  319. test PrActiveProcessors[r14], rcx ; test if bit set in new set
  320. jnz short @f ; if nz, bit set in new set
  321. int 3 ; debug break - incorrect active mask
  322. @@: ; reference label
  323. endif
  324. endif
  325. ;
  326. ; Load new CR3 value which will flush the TB.
  327. ;
  328. mov rax, PrDirectoryTableBase[r14] ; get new directory base
  329. mov cr3, rax ; flush TLB and set new directory base
  330. ;
  331. ; Set context swap idle for the old thread lock.
  332. ;
  333. KiSc20: ;
  334. ifndef NT_UP
  335. and byte ptr ThSwapBusy[rdi], 0 ; set context swap idle
  336. endif
  337. ;
  338. ; Set the new kernel stack base in the TSS.
  339. ;
  340. mov r15, (PcTss - PcPrcb)[rbx] ; get processor TSS address
  341. mov rbp, ThInitialStack[rsi] ; get new stack base address
  342. mov TssRsp0[r15], rbp ; set stack base address in TSS
  343. ;
  344. ; If the new thread executes in user mode, then restore the legacy floating
  345. ; state, load the compatibility mode TEB address, load the native user mode
  346. ; TEB address, and reload the segment registers if needed.
  347. ;
  348. ; N.B. The upper 32-bits of the compatibility mode TEB address are always
  349. ; zero.
  350. ;
  351. cmp byte ptr ThNpxState[rsi], LEGACY_STATE_UNUSED ; check if kernel thread
  352. je KiSC30 ; if e, kernel thread
  353. mov cx, LfControlWord[rbp] ; save current control word
  354. mov word ptr LfControlWord[rbp], 03fh ; set to mask all exceptions
  355. frstord [rbp] ; restore legacy floating point state
  356. mov LfControlWord[rbp], cx ; restore control word
  357. fldcw word ptr LfControlWord[rbp] ; load legacy control word
  358. ;
  359. ; Set base of compatibility mode TEB.
  360. ;
  361. mov eax, ThTeb[rsi] ; compute compatibility mode TEB address
  362. add eax, CmThreadEnvironmentBlockOffset ;
  363. mov rcx, (PcGdt - PcPrcb)[rbx] ; get GDT base address
  364. mov KgdtBaseLow + KGDT64_R3_CMTEB[rcx], ax ; set CMTEB base address
  365. shr eax, 16 ;
  366. mov KgdtBaseMiddle + KGDT64_R3_CMTEB[rcx], al ;
  367. mov KgdtBaseHigh + KGDT64_R3_CMTEB[rcx], ah ;
  368. ;
  369. ; If the user segment selectors have been changed, then reload them with
  370. ; their cannonical values.
  371. ;
  372. ; N.B. The following code depends on the values defined in ntamd64.w that
  373. ; can be loaded in ds, es, fs, and gs. In particular an "and" operation
  374. ; is used for the below comparison.
  375. ;
  376. mov ax, ds ; compute sum of segment selectors
  377. mov cx, es ;
  378. and ax, cx ;
  379. mov cx, gs ;
  380. and ax, cx ;
  381. cmp ax, (KGDT64_R3_DATA or RPL_MASK) ; check if sum matches
  382. je short KiSC25 ; if e, sum matches expected value
  383. mov cx, KGDT64_R3_DATA or RPL_MASK ; reload user segment selectors
  384. mov ds, cx ;
  385. mov es, cx ;
  386. ;
  387. ; N.B. The following reload of the GS selector destroys the system MSR_GS_BASE
  388. ; register. Thus this sequence must be done with interrupt off.
  389. ;
  390. mov eax, (PcSelf - PcPrcb)[rbx] ; get current PCR address
  391. mov edx, (PcSelf - PcPrcb + 4)[rbx] ;
  392. cli ; disable interrupts
  393. mov gs, cx ; reload GS segment selector
  394. mov ecx, MSR_GS_BASE ; get GS base MSR number
  395. wrmsr ; write system PCR base address
  396. sti ; enable interrupts
  397. KiSC25: mov ax, KGDT64_R3_CMTEB or RPL_MASK ; reload FS segment selector
  398. mov fs, ax ;
  399. mov eax, ThTeb[rsi] ; get low part of user TEB address
  400. mov edx, ThTeb + 4[rsi] ; get high part of user TEB address
  401. mov (PcTeb - PcPrcb)[rbx], eax ; set user TEB address in PCR
  402. mov (PcTeb - PcPrcb + 4)[rbx], edx ;
  403. mov ecx, MSR_GS_SWAP ; get GS base swap MSR number
  404. wrmsr ; write user TEB base address
  405. ;
  406. ; Restore kernel mode XMM control/status and update context switch counters.
  407. ;
  408. KiSC30: ldmxcsr SwMxCsr[rsp] ; kernel mode XMM control/status
  409. inc dword ptr ThContextSwitches[rsi] ; thread count
  410. ;
  411. ; Check if an attempt is being made to context switch while in a DPC routine.
  412. ;
  413. cmp word ptr PbDpcRoutineActive[rbx], 0 ; check if DPC active
  414. jne short KiSC50 ; if ne, DPC is active
  415. ;
  416. ; If the new thread has a kernel mode APC pending, then request an APC
  417. ; interrupt if APC bypass is disabled.
  418. ;
  419. cmp byte ptr ThApcState + AsKernelApcPending[rsi], TRUE ; check if APC pending
  420. jne short KiSC40 ; if ne, kernel APC not pending
  421. cmp word ptr ThSpecialApcDisable[rsi], 0 ; check if special APC disable
  422. jne short KiSC40 ; if ne, special APC disable
  423. cmp byte ptr SwApcBypass[rsp], PASSIVE_LEVEL ; check if APC bypass enabled
  424. je short KiSC40 ; if e, APC bypass enabled
  425. mov cl, APC_LEVEL ; request APC interrupt
  426. call __imp_HalRequestSoftwareInterrupt ;
  427. or rax, rsp ; clear ZF flag
  428. KiSC40: setz al ; set return value
  429. add rsp, KSWITCH_FRAME_LENGTH - (2 * 8) ; deallocate stack frame
  430. pop rbp ; restore nonvolatile register
  431. ret ; return
  432. ;
  433. ; An attempt is being made to context switch while in a DPC routine. This is
  434. ; most likely caused by a DPC routine calling one of the wait functions.
  435. ;
  436. KiSC50: xor r9, r9 ; clear register
  437. mov SwP5Home[rsp], r9 ; set parameter 5
  438. mov r8, rsi ; set new thread address
  439. mov rdx, rdi ; set old thread address
  440. mov ecx, ATTEMPTED_SWITCH_FROM_DPC ; set bug check code
  441. call KeBugCheckEx ; bug check system - no return
  442. ret ; return
  443. NESTED_END SwapContext, _TEXT$00
  444. end