Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

439 lines
11 KiB

  1. TITLE "AMD64 Support Routines"
  2. ;++
  3. ;
  4. ; Copyright (c) 2000 Microsoft Corporation
  5. ;
  6. ; Module Name:
  7. ;
  8. ; miscs.asm
  9. ;
  10. ; Abstract:
  11. ;
  12. ; This module implements various routines for the AMD64 that must be
  13. ; written in assembler.
  14. ;
  15. ; Author:
  16. ;
  17. ; Forrest Foltz (forrestf) 14-Oct-2000
  18. ;
  19. ; Environment:
  20. ;
  21. ; Kernel mode only.
  22. ;
  23. ;--
  24. include kxamd64.inc
  25. include ksamd64.inc
  26. extern HalpHiberInProgress:byte
  27. FLUSH_TB macro
  28. mov rcx, cr4
  29. and rcx, NOT CR4_PGE
  30. mov cr4, rcx
  31. mov rax, cr3
  32. mov cr3, rax
  33. or rcx, CR4_PGE
  34. mov cr4, rcx
  35. endm
  36. ;++
  37. ;
  38. ; VOID
  39. ; HalProcessorIdle(
  40. ; VOID
  41. ; )
  42. ;
  43. ; Routine Description:
  44. ;
  45. ; This function is called when the current processor is idle.
  46. ;
  47. ; This function is called with interrupts disabled, and the processor
  48. ; is idle until it receives an interrupt. The does not need to return
  49. ; until an interrupt is received by the current processor.
  50. ;
  51. ; This is the lowest level of processor idle. It occurs frequently,
  52. ; and this function (alone) should not put the processor into a
  53. ; power savings mode which requeres large amount of time to enter & exit.
  54. ;
  55. ; Return Value:
  56. ;
  57. ;--
  58. LEAF_ENTRY HalProcessorIdle, _TEXT$00
  59. ;
  60. ; the following code sequence "sti-halt" puts the processor
  61. ; into a Halted state, with interrupts enabled, without processing
  62. ; an interrupt before halting. The STI instruction has a delay
  63. ; slot such that it does not take effect until after the instruction
  64. ; following it - this has the effect of HALTing without allowing
  65. ; a possible interrupt and then enabling interrupts while HALTed.
  66. ;
  67. ;
  68. ; On an MP hal we don't stop the processor, since that causes
  69. ; the SNOOP to slow down as well
  70. ;
  71. sti
  72. ifdef NT_UP
  73. hlt
  74. endif
  75. ;
  76. ; Now return to the system. If there's still no work, then it
  77. ; will call us back to halt again.
  78. ;
  79. ret
  80. LEAF_END HalProcessorIdle, _TEXT$00
  81. ;++
  82. ;
  83. ; VOID
  84. ; HalpHalt (
  85. ; VOID
  86. ; );
  87. ;
  88. ; Routine Description:
  89. ;
  90. ; Executes a hlt instruction. Should the hlt instruction execute,
  91. ; control is returned to the caller.
  92. ;
  93. ; Arguments:
  94. ;
  95. ; None.
  96. ;
  97. ; Return Value:
  98. ;
  99. ; None.
  100. ;
  101. ;--*/
  102. LEAF_ENTRY HalpHalt, _TEXT$0
  103. hlt
  104. ret
  105. LEAF_END HalpHalt, _TEXT$0
  106. ;++
  107. ;
  108. ; VOID
  109. ; HalpIoDelay (
  110. ; VOID
  111. ; );
  112. ;
  113. ; Routine Description:
  114. ;
  115. ; Generate a delay after port I/O.
  116. ;
  117. ; Arguments:
  118. ;
  119. ; None.
  120. ;
  121. ; Return Value:
  122. ;
  123. ; None.
  124. ;
  125. ;--
  126. LEAF_ENTRY HalpIoDelay, _TEXT$00
  127. jmp $+2
  128. jmp $+2
  129. ret
  130. LEAF_END HalpIoDelay, _TEXT$00
  131. ;++
  132. ;
  133. ; VOID
  134. ; HalpSerialize (
  135. ; VOID
  136. ; )
  137. ;
  138. ; Routine Description:
  139. ;
  140. ; This function implements the fence operation for out-of-order execution
  141. ;
  142. ; Arguments:
  143. ;
  144. ; None
  145. ;
  146. ; Return Value:
  147. ;
  148. ; None
  149. ;
  150. ;--
  151. HsFrame struct
  152. SavedRbx dq ? ; preserve RBX
  153. HsFrame ends
  154. NESTED_ENTRY HalpSerialize, _TEXT$00
  155. push_reg rbx
  156. END_PROLOGUE
  157. cpuid
  158. pop rbx
  159. ret
  160. NESTED_END HalpSerialize, _TEXT$00
  161. ;++
  162. ;
  163. ; HalpLMIdentityStub
  164. ;
  165. ; This routine is entered during startup of a secondary processor. The
  166. ; contents of this routine is actually copied into the startup block
  167. ; (see mpsproca.c). It's purpose is to give StartPx_PMStub a 32-bit
  168. ; addressable target.
  169. ;
  170. ; The act of jumping here causes the processor to begin execution in
  171. ; long mode. Therefore, we can now perform a 64-bit jump to HalpLMStub.
  172. ;
  173. ; Arguments:
  174. ;
  175. ; rdi -> idenity-mapped address of PROCESSOR_START_BLOCK
  176. ;
  177. ; Return Value:
  178. ;
  179. ; None
  180. ;
  181. ;--
  182. LEAF_ENTRY HalpLMIdentityStub, _TEXT$00
  183. mov edi, edi ; zero extend high 32 bits
  184. mov rcx, QWORD PTR [rdi] + PsbLmTarget
  185. mov rax, QWORD PTR [rdi] + PsbProcessorState + PsCr3
  186. mov rdi, QWORD PTR [rdi] + PsbSelfMap
  187. jmp rcx
  188. public HalpLMIdentityStubEnd
  189. HalpLMIdentityStubEnd::
  190. LEAF_END HalpLMIdentityStub, _TEXT$00
  191. ;++
  192. ;
  193. ; HalpLMStub
  194. ;
  195. ; This routine is entered during startup of a secondary processor. We
  196. ; have just left StartPx_PMStub (xmstub.asm) and are running in an
  197. ; identity-mapped address space.
  198. ;
  199. ; Arguments:
  200. ;
  201. ; rax == Final CR3 to be used
  202. ; rdi -> idenity-mapped address of PROCESSOR_START_BLOCK
  203. ;
  204. ; Return Value:
  205. ;
  206. ; None
  207. ;
  208. ;--
  209. LEAF_ENTRY HalpLMStub, _TEXT$00
  210. ;
  211. ; Set the final CR3 value. We are now executing in image-loaded
  212. ; code, rather than code that has been copied to low memory.
  213. ;
  214. ; LEAF_ENTRY ensures 16-byte alignment, so the following two
  215. ; instructions are guaranteed to be on the same page.
  216. ;
  217. mov cr3, rax
  218. jmp $+2
  219. ;
  220. ; Load the PAT and invalidate the TLB
  221. ;
  222. FLUSH_TB
  223. mov eax, [rdi] + PsbMsrPat
  224. mov edx, [rdi] + PsbMsrPat + 4
  225. mov ecx, MSR_PAT
  226. wbinvd
  227. wrmsr
  228. wbinvd
  229. FLUSH_TB
  230. ;
  231. ; Load this processor's GDT and IDT. Because PSB_GDDT32_CODE64 is
  232. ; identical to KGDT64_R0_CODE (asserted in mpsproca.c), no far jump
  233. ; is necessary to load a new CS.
  234. ;
  235. lgdt fword ptr [rdi] + PsbProcessorState + PsGdtr
  236. lidt fword ptr [rdi] + PsbProcessorState + PsIdtr
  237. ;
  238. ; Set rdx to point to the context frame and load the segment
  239. ; registers.
  240. ;
  241. mov ds, [rdi] + PsbProcessorState + PsContextFrame + CxSegDS
  242. mov es, [rdi] + PsbProcessorState + PsContextFrame + CxSegES
  243. mov fs, [rdi] + PsbProcessorState + PsContextFrame + CxSegFS
  244. mov gs, [rdi] + PsbProcessorState + PsContextFrame + CxSegGS
  245. mov ss, [rdi] + PsbProcessorState + PsContextFrame + CxSegSS
  246. ;
  247. ; Force the TSS descriptor into a non-busy state, so we don't fault
  248. ; when we load the TR.
  249. ;
  250. movzx eax, word ptr [rdi] + PsbProcessorState + SrTr ; get TSS selector
  251. add rax, [rdi] + PsbProcessorState + PsGdtr + 2 ; add TSS base
  252. and byte ptr [rax+5], NOT 2 ; clear the busy bit
  253. ;
  254. ; Load the task register
  255. ;
  256. ltr WORD PTR [rdi] + PsbProcessorState + SrTr
  257. ;
  258. ; Check if it is a fresh startup or a resume from hibernate
  259. ;
  260. mov al, HalpHiberInProgress
  261. cmp al, 0
  262. jz @f
  263. ;
  264. ; We are waking up from lower power state. We should restore
  265. ; control registers and MSRs here.
  266. ;
  267. mov rax, [rdi] + PsbProcessorState + PsSpecialRegisters + PsCr8
  268. mov cr8, rax
  269. mov ax, word ptr [rdi] + PsbProcessorState + PsLdtr
  270. lldt ax
  271. mov rdx, [rdi] + PsbProcessorState + PsSpecialRegisters + SrMsrGsBase
  272. mov eax, edx
  273. shr rdx, 32
  274. mov ecx, MSR_GS_BASE
  275. wrmsr
  276. mov rdx, [rdi] + PsbProcessorState + PsSpecialRegisters + SrMsrGsSwap
  277. mov eax, edx
  278. shr rdx, 32
  279. mov ecx, MSR_GS_SWAP
  280. wrmsr
  281. mov rdx, [rdi] + PsbProcessorState + PsSpecialRegisters + SrMsrStar
  282. mov eax, edx
  283. shr rdx, 32
  284. mov ecx, MSR_STAR
  285. wrmsr
  286. mov rdx, [rdi] + PsbProcessorState + PsSpecialRegisters + SrMsrLStar
  287. mov eax, edx
  288. shr rdx, 32
  289. mov ecx, MSR_LSTAR
  290. wrmsr
  291. mov rdx, [rdi] + PsbProcessorState + PsSpecialRegisters + SrMsrCStar
  292. mov eax, edx
  293. shr rdx, 32
  294. mov ecx, MSR_CSTAR
  295. wrmsr
  296. mov rdx, [rdi] + PsbProcessorState + PsSpecialRegisters + SrMsrSyscallMask
  297. mov eax, edx
  298. shr rdx, 32
  299. mov ecx, MSR_SYSCALL_MASK
  300. wrmsr
  301. ;
  302. ; Load the debug registers
  303. ;
  304. @@: xor rax, rax
  305. mov dr7, rax
  306. lea rsi, [rdi] + PsbProcessorState + SrKernelDr0
  307. .errnz (SrKernelDr1 - SrKernelDr0 - 1 * 8)
  308. .errnz (SrKernelDr2 - SrKernelDr0 - 2 * 8)
  309. .errnz (SrKernelDr3 - SrKernelDr0 - 3 * 8)
  310. .errnz (SrKernelDr6 - SrKernelDr0 - 4 * 8)
  311. .errnz (SrKernelDr7 - SrKernelDr0 - 5 * 8)
  312. lodsq
  313. mov dr0, rax
  314. lodsq
  315. mov dr1, rax
  316. lodsq
  317. mov dr2, rax
  318. lodsq
  319. mov dr3, rax
  320. lodsq
  321. mov dr6, rax
  322. lodsq
  323. mov dr7, rax
  324. ;
  325. ; Load the stack pointer, eflags and store the new IP in
  326. ; a return frame. Also push two registers that will be used
  327. ; to the very end.
  328. ;
  329. ; Note that up to this point, no stack is available.
  330. ;
  331. mov rsp, [rdi] + PsbProcessorState + PsContextFrame + CxRsp
  332. pushq [rdi] + PsbProcessorState + PsContextFrame + CxEflags
  333. popfq
  334. pushq [rdi] + PsbProcessorState + PsContextFrame + CxRip
  335. pushq [rdi] + PsbProcessorState + PsContextFrame + CxRdi
  336. mov rax, [rdi] + PsbProcessorState + PsContextFrame + CxRax
  337. mov rbx, [rdi] + PsbProcessorState + PsContextFrame + CxRbx
  338. mov rcx, [rdi] + PsbProcessorState + PsContextFrame + CxRcx
  339. mov rdx, [rdi] + PsbProcessorState + PsContextFrame + CxRdx
  340. mov rsi, [rdi] + PsbProcessorState + PsContextFrame + CxRsi
  341. mov rbp, [rdi] + PsbProcessorState + PsContextFrame + CxRbp
  342. mov r8, [rdi] + PsbProcessorState + PsContextFrame + CxR8
  343. mov r9, [rdi] + PsbProcessorState + PsContextFrame + CxR9
  344. mov r10, [rdi] + PsbProcessorState + PsContextFrame + CxR10
  345. mov r11, [rdi] + PsbProcessorState + PsContextFrame + CxR11
  346. mov r12, [rdi] + PsbProcessorState + PsContextFrame + CxR12
  347. mov r13, [rdi] + PsbProcessorState + PsContextFrame + CxR13
  348. mov r14, [rdi] + PsbProcessorState + PsContextFrame + CxR14
  349. mov r15, [rdi] + PsbProcessorState + PsContextFrame + CxR15
  350. ;
  351. ; Indicate that we've started, pop the correct value for rdi
  352. ; and return.
  353. ;
  354. inc DWORD PTR [rdi] + PsbCompletionFlag
  355. pop rdi
  356. ret
  357. LEAF_END HalpLMStub, _TEXT$00
  358. END