Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

793 lines
18 KiB

  1. /*++
  2. Copyright (c) 1991-2000 Microsoft Corporation
  3. Module Name:
  4. amd64.c
  5. Abstract:
  6. This module contains routines necessary to support loading and
  7. transitioning into an AMD64 kernel. The code in this module has
  8. access to amd64-specific defines found in amd64.h but not to i386-
  9. specific declarations found in i386.h.
  10. Author:
  11. Forrest Foltz (forrestf) 20-Apr-2000
  12. Environment:
  13. Revision History:
  14. --*/
  15. //
  16. // Here, we want the header files to be processed as though building for
  17. // AMD64. Make appropriate definitions and undefs.
  18. //
  19. #define _AMD64_
  20. #define _M_AMD64
  21. #define _WIN64
  22. #undef _X86_
  23. #undef X86
  24. #undef _M_X86_
  25. #undef _M_IX86
  26. //
  27. // Warning 4163 is "function unavailable as intrinsic"
  28. //
  29. #pragma warning(disable:4163)
  30. //
  31. // Warning 4235 is "nonstandard extension used", referring to __unaligned
  32. //
  33. #pragma warning(disable:4235)
  34. //
  35. // Warning 4391 is "incorrect return type for intrinsi function"
  36. //
  37. #pragma warning(disable:4391)
  38. #include "bootx86.h"
  39. #include "amd64prv.h"
  40. #include <pcmp.inc>
  41. #include <ntapic.inc>
  42. //
  43. // GDT: functino to convert selector to GDT entry.
  44. //
  45. PKGDTENTRY64
  46. __inline
  47. GDT_PTR (
  48. PGDT_64 Gdt,
  49. ULONG Selector
  50. )
  51. {
  52. ULONG index;
  53. PKGDTENTRY64 entry;
  54. index = Selector & ~7;
  55. entry = (PKGDTENTRY64)((PUCHAR)Gdt + index);
  56. return entry;
  57. }
  58. //
  59. // Interrupt descriptor table
  60. //
  61. typedef struct _IDT_64 {
  62. KIDTENTRY64 Entries[ MAXIMUM_IDTVECTOR + 1 ];
  63. } IDT_64, *PIDT_64;
  64. #define VIRTUAL_ADDRESS_BITS 48
  65. #define VIRTUAL_ADDRESS_MASK (((ULONGLONG)1 << VIRTUAL_ADDRESS_BITS) - 1)
  66. //
  67. // Longmode ring0 code selector. This actually lives in the middle of the
  68. // transition data stream in amd64s.asm
  69. //
  70. extern USHORT BlAmd64_KGDT64_R0_CODE;
  71. //
  72. // Data values exported to amd64x86.c
  73. //
  74. const ULONG BlAmd64DoubleFaultStackSize = DOUBLE_FAULT_STACK_SIZE;
  75. const ULONG BlAmd64KernelStackSize = KERNEL_STACK_SIZE;
  76. const ULONG BlAmd64McaExceptionStackSize = KERNEL_MCA_EXCEPTION_STACK_SIZE;
  77. const ULONG BlAmd64GdtSize = KGDT64_LAST;
  78. const ULONG BlAmd64IdtSize = sizeof(IDT_64);
  79. #define CONST_EXPORT(x) const ULONG BlAmd64_##x = x;
  80. CONST_EXPORT(MSR_FS_BASE)
  81. CONST_EXPORT(MSR_GS_BASE)
  82. CONST_EXPORT(KGDT64_SYS_TSS)
  83. CONST_EXPORT(MSR_EFER)
  84. CONST_EXPORT(TSS_IST_PANIC)
  85. CONST_EXPORT(TSS_IST_MCA)
  86. const ULONG64 BlAmd64_LOCALAPIC = LOCALAPIC;
  87. const ULONG64 BlAmd64UserSharedData = KI_USER_SHARED_DATA;
  88. //
  89. // Flags to be enabled in the EFER MSR before transitioning to long mode
  90. //
  91. const ULONG BlAmd64_MSR_EFER_Flags = MSR_LME | MSR_SCE;
  92. //
  93. // Array of address bit decode counts and recursive mapping bases,
  94. // one for each level of the mapping
  95. //
  96. AMD64_MAPPING_INFO BlAmd64MappingLevels[ AMD64_MAPPING_LEVELS ] =
  97. {
  98. { PTE_BASE, PTI_MASK_AMD64, PTI_SHIFT },
  99. { PDE_BASE, PDI_MASK_AMD64, PDI_SHIFT },
  100. { PPE_BASE, PPI_MASK, PPI_SHIFT },
  101. { PXE_BASE, PXI_MASK, PXI_SHIFT }
  102. };
  103. //
  104. // BlAmd64TopLevelPte refers to the physical page number of the Page Map
  105. // Level 4 (PML4) table.
  106. //
  107. // BlAmd64TopLevelPte is not really a page table entry and so does not
  108. // actually exist as an element within a page table. It exists only as
  109. // a convenience to BlAmd64CreateAmd64Mapping().
  110. //
  111. HARDWARE_PTE BlAmd64TopLevelPte;
  112. //
  113. // PAGE_MAP_LEVEL_4 yields the identity-mapped (physical) address of the
  114. // PML4 table.
  115. //
  116. #define PAGE_MAP_LEVEL_4 \
  117. ((PPAGE_TABLE)(BlAmd64TopLevelPte.PageFrameNumber << PAGE_SHIFT))
  118. //
  119. // Special PFN for BlAmd64CreateMapping
  120. //
  121. #define PFN_NO_MAP ((PFN_NUMBER)-1)
  122. //
  123. // Size of the VA mapped by a level 0 page table
  124. //
  125. #define PAGE_TABLE_VA ((POINTER64)(PTES_PER_PAGE * PAGE_SIZE))
  126. //
  127. // Prototypes for local functions
  128. //
  129. ARC_STATUS
  130. BlAmd64CreateMappingWorker(
  131. IN ULONGLONG Va,
  132. IN PFN_NUMBER Pfn,
  133. IN ULONG MappingLevel,
  134. IN OUT PHARDWARE_PTE UpperLevelPte
  135. );
  136. VOID
  137. BlAmd64MakePteValid(
  138. IN PHARDWARE_PTE Pte,
  139. IN PFN_NUMBER Pfn
  140. );
  141. VOID
  142. BlAmd64ClearTopLevelPte(
  143. VOID
  144. )
  145. /*++
  146. Routine Description:
  147. This routine simply clears BlAmd64TopLevelPte.
  148. Arguments:
  149. None.
  150. Return Value:
  151. None.
  152. --*/
  153. {
  154. *(PULONG64)&BlAmd64TopLevelPte = 0;
  155. }
  156. ARC_STATUS
  157. BlAmd64CreateMapping(
  158. IN ULONGLONG Va,
  159. IN PFN_NUMBER Pfn
  160. )
  161. /*++
  162. Routine Description:
  163. This function maps a virtual address into a 4-level AMD64 page mapping
  164. structure.
  165. Arguments:
  166. Va - Supplies the 64-bit virtual address to map
  167. Pfn - Supplies the 64-bit physical page number to map the address to
  168. Return Value:
  169. ARC_STATUS - Status of operation.
  170. --*/
  171. {
  172. ARC_STATUS status;
  173. status = BlAmd64CreateMappingWorker( Va & VIRTUAL_ADDRESS_MASK,
  174. Pfn,
  175. AMD64_MAPPING_LEVELS - 1,
  176. &BlAmd64TopLevelPte );
  177. return status;
  178. }
  179. ARC_STATUS
  180. BlAmd64CreateMappingWorker(
  181. IN ULONGLONG Va,
  182. IN PFN_NUMBER Pfn,
  183. IN ULONG MappingLevel,
  184. IN OUT PHARDWARE_PTE UpperLevelPte
  185. )
  186. /*++
  187. Routine Description:
  188. This function creates an address mapping in a single level of an AMD64
  189. 4-level mapping structure. It is called only by BlCreateMapping
  190. and by itself, recursively.
  191. Arguments:
  192. Va - Supplies the 64-bit virtual address to map. This address has already
  193. had any insignificant upper bits masked via VIRTUAL_ADDRESS_MASK.
  194. Pfn - Supplies the 64-bit physical page number to map the address to. If
  195. Pfn == PFN_NO_MAP, then all of the page tables are put into
  196. place to support the mapping but the level 0 pte itself is not
  197. actually filled in. This is used to create the HAL's VA mapping
  198. area.
  199. MappingLevel - The mapping level in which to create the appropriate
  200. mapping. Must be 0, 1, 2 or 3.
  201. UpperLevelPte - A pointer to the parent PTE that refers to the page
  202. at this mapping level. If no page exists at this level for this
  203. address, then this routine will allocate one and modify
  204. UpperLevelPte appropriately.
  205. Return Value:
  206. ARC_STATUS - Status of operation.
  207. --*/
  208. {
  209. PFN_NUMBER pageTablePfn;
  210. PAMD64_PAGE_TABLE pageTable;
  211. ULONGLONG va;
  212. PAMD64_MAPPING_INFO mappingInfo;
  213. ULONG pteIndex;
  214. PHARDWARE_PTE pte;
  215. ARC_STATUS status;
  216. BOOLEAN newPage;
  217. mappingInfo = &BlAmd64MappingLevels[ MappingLevel ];
  218. if (UpperLevelPte->Valid == 0) {
  219. //
  220. // A new page table must be allocated.
  221. //
  222. newPage = TRUE;
  223. pageTable = BlAmd64AllocatePageTable();
  224. if (pageTable == NULL) {
  225. return ENOMEM;
  226. }
  227. //
  228. // Reference the new page table with the parent PTE
  229. //
  230. pageTablePfn = (ULONG)pageTable >> PAGE_SHIFT;
  231. BlAmd64MakePteValid( UpperLevelPte, pageTablePfn );
  232. if (MappingLevel == (AMD64_MAPPING_LEVELS - 1)) {
  233. //
  234. // We have just allocated the top-level page. Insert a
  235. // recursive mapping here.
  236. //
  237. pteIndex = (ULONG)((mappingInfo->RecursiveMappingBase >>
  238. mappingInfo->AddressShift) &
  239. mappingInfo->AddressMask);
  240. pte = &pageTable->PteArray[ pteIndex ];
  241. BlAmd64MakePteValid( pte, pageTablePfn );
  242. }
  243. } else {
  244. //
  245. // A page table structure already exists for this level.
  246. //
  247. newPage = FALSE;
  248. pageTablePfn = UpperLevelPte->PageFrameNumber;
  249. pageTable = (PAMD64_PAGE_TABLE)((ULONG)pageTablePfn << PAGE_SHIFT);
  250. }
  251. //
  252. // Derive a pointer to the appropriate PTE within the page table
  253. //
  254. pteIndex =
  255. (ULONG)(Va >> mappingInfo->AddressShift) & mappingInfo->AddressMask;
  256. pte = &pageTable->PteArray[ pteIndex ];
  257. if (MappingLevel == 0) {
  258. if (Pfn != PFN_NO_MAP) {
  259. //
  260. // This is an actual level 0, or PTE, entry. Just set it with
  261. // the Pfn that was passed in.
  262. //
  263. BlAmd64MakePteValid( pte, Pfn );
  264. } else {
  265. //
  266. // This is a special HAL mapping, one that ensures that all
  267. // levels of page table are in place to support this mapping
  268. // but doesn't actually fill in the level 0 PTE.
  269. //
  270. // So do nothing here except break the recursion.
  271. //
  272. }
  273. } else {
  274. //
  275. // More mapping levels to go, call this function recursively and
  276. // process the next level.
  277. //
  278. status = BlAmd64CreateMappingWorker( Va,
  279. Pfn,
  280. MappingLevel - 1,
  281. pte );
  282. if (status != ESUCCESS) {
  283. return status;
  284. }
  285. }
  286. if (newPage != FALSE) {
  287. //
  288. // A new page table was allocated, above. Recursively map
  289. // it within the PTE_BASE region.
  290. //
  291. va = (Va >> mappingInfo->AddressShift);
  292. va *= sizeof(HARDWARE_PTE);
  293. va += mappingInfo->RecursiveMappingBase;
  294. status = BlAmd64CreateMapping( va, pageTablePfn );
  295. if (status != ESUCCESS) {
  296. return status;
  297. }
  298. }
  299. return ESUCCESS;
  300. }
  301. VOID
  302. BlAmd64MakePteValid(
  303. IN OUT PHARDWARE_PTE Pte,
  304. IN PFN_NUMBER Pfn
  305. )
  306. /*++
  307. Routine Description:
  308. This routine fills an AMD64 Pte with the supplied Pfn and makes it
  309. valid.
  310. Arguments:
  311. Pte - Supplies a pointer to the Pte to make valid.
  312. Pfn - Supplies the page frame number to set in the Pte.
  313. Return Value:
  314. ARC_STATUS - Status of operation.
  315. --*/
  316. {
  317. //
  318. // Make sure we're not just overwriting a PFN in an already
  319. // valid PTE.
  320. //
  321. ASSERT( Pte->Valid == 0 || Pte->PageFrameNumber == Pfn );
  322. Pte->PageFrameNumber = Pfn;
  323. Pte->Valid = 1;
  324. Pte->Write = 1;
  325. Pte->Accessed = 1;
  326. Pte->Dirty = 1;
  327. }
  328. VOID
  329. BlAmd64BuildGdtEntry(
  330. IN PGDT_64 Gdt,
  331. IN USHORT Selector,
  332. IN POINTER64 Base,
  333. IN ULONGLONG Limit,
  334. IN ULONG Type,
  335. IN ULONG Dpl,
  336. IN BOOLEAN LongMode,
  337. IN BOOLEAN DefaultBig
  338. )
  339. /*++
  340. Routine Description:
  341. This routine fills in an AMD64 GDT entry.
  342. Arguments:
  343. Gdt - Supplies a pointer to the GDT.
  344. Selector - Segment selector of the GDT entry within Gdt.
  345. Base - Base address value of the descriptor.
  346. Limit - Limit value of the descriptor.
  347. Type - 5-bit type value of the descriptor.
  348. Dpl - Priviledge value of the descriptor.
  349. LongMode - Indicates whether this is a longmode descriptor (valid only
  350. for code segment descriptors).
  351. DefaultBig - Supplies the value for the default/big field in the
  352. descriptor.
  353. Return Value:
  354. None.
  355. --*/
  356. {
  357. ULONG limit20;
  358. PKGDTENTRY64 gdtEntry;
  359. KGDT_BASE gdtBase;
  360. KGDT_LIMIT gdtLimit;
  361. gdtEntry = GDT_PTR(Gdt,Selector);
  362. //
  363. // Set the Base and LongMode fields
  364. //
  365. gdtBase.Base = Base;
  366. gdtEntry->BaseLow = gdtBase.BaseLow;
  367. gdtEntry->Bits.BaseMiddle = gdtBase.BaseMiddle;
  368. gdtEntry->Bits.BaseHigh = gdtBase.BaseHigh;
  369. gdtEntry->Bits.LongMode = 0;
  370. if ((LongMode != FALSE) || (Type == TYPE_TSS64)) {
  371. //
  372. // All long GDT entries use a 64-bit base and have the longmode bit
  373. // set.
  374. //
  375. // In addition, a TSS GDT entry uses a 64-bit but does *not* set the
  376. // longmode bit. This applies to an LDT entry as well, which is not
  377. // used in this OS.
  378. //
  379. if (Type != TYPE_TSS64) {
  380. gdtEntry->Bits.LongMode = 1;
  381. }
  382. gdtEntry->MustBeZero = 0;
  383. gdtEntry->BaseUpper = gdtBase.BaseUpper;
  384. }
  385. //
  386. // Set the Limit and Granularity fields
  387. //
  388. if (Limit > (1 << 20)) {
  389. limit20 = (ULONG)(Limit / PAGE_SIZE);
  390. gdtEntry->Bits.Granularity = 1;
  391. } else {
  392. limit20 = (ULONG)Limit;
  393. gdtEntry->Bits.Granularity = 0;
  394. }
  395. gdtLimit.Limit = limit20;
  396. gdtEntry->LimitLow = gdtLimit.LimitLow;
  397. gdtEntry->Bits.LimitHigh = gdtLimit.LimitHigh;
  398. //
  399. // Set Present = 1 unless this is a NULL descriptor
  400. //
  401. if (Type == 0) {
  402. gdtEntry->Bits.Present = 0;
  403. } else {
  404. gdtEntry->Bits.Present = 1;
  405. }
  406. //
  407. // Set remaining fields
  408. //
  409. gdtEntry->Bits.Type = Type;
  410. gdtEntry->Bits.Dpl = Dpl;
  411. gdtEntry->Bits.DefaultBig = DefaultBig;
  412. gdtEntry->Bits.System = 0;
  413. }
  414. VOID
  415. BlAmd64BuildAmd64GDT(
  416. IN PVOID SysTss,
  417. OUT PVOID Gdt
  418. )
  419. /*++
  420. Routine Description:
  421. This routine initializes the longmode Global Descriptor Table.
  422. Arguments:
  423. SysTss - Supplies a 32-bit KSEG0_X86 pointer to the system TSS.
  424. Gdt - Supplies a 32-bit pointer to the Gdt to fill in.
  425. Return Value:
  426. None.
  427. --*/
  428. {
  429. PGDT_64 gdt64;
  430. POINTER64 sysTss64;
  431. gdt64 = (PGDT_64)Gdt;
  432. //
  433. // KGDT64_NULL: NULL descriptor
  434. //
  435. BlAmd64BuildGdtEntry(gdt64,KGDT64_NULL,
  436. 0,0,0,0,0,0); // Null selector, all zeros
  437. //
  438. // KGDT_R0_CODE: Kernel mode code
  439. //
  440. BlAmd64BuildGdtEntry(gdt64,KGDT64_R0_CODE,
  441. 0, // Base and limit are ignored
  442. 0, // in a long-mode code selector
  443. TYPE_CODE, // Code segment: Execute/Read
  444. DPL_SYSTEM, // Kernel only
  445. TRUE, // Longmode
  446. FALSE); // Not 32-bit default
  447. //
  448. // KGDT_R0_STACK: Kernel mode stack
  449. //
  450. BlAmd64BuildGdtEntry(gdt64,KGDT64_R0_DATA,
  451. 0, // Base and limit are ignored
  452. 0, // when in long-mode
  453. TYPE_DATA, // Data segment: Read/Write
  454. DPL_SYSTEM, // Kernel only
  455. FALSE, // Not longmode
  456. TRUE); // 32-bit default
  457. //
  458. // KDT_SYS_TSS: Kernel mode system task state
  459. //
  460. sysTss64 = PTR_64(SysTss);
  461. BlAmd64BuildGdtEntry(gdt64,KGDT64_SYS_TSS,
  462. sysTss64, // Base to be filled in at runtime
  463. FIELD_OFFSET(KTSS64,IoMap)-1, // Contains only a KTSS64
  464. TYPE_TSS64, // Not busy TSS
  465. DPL_SYSTEM, // Kernel only
  466. FALSE, // Not longmode
  467. FALSE); // Not 32-bit default
  468. //
  469. // KGDT64_R3_CODE: User mode 64-bit code
  470. //
  471. BlAmd64BuildGdtEntry(gdt64,KGDT64_R3_CODE,
  472. 0, // Base and limit are ignored
  473. 0, // in a long-mode code selector
  474. TYPE_CODE, // Code segment: Execute/Read
  475. DPL_USER, // User mode
  476. TRUE, // Longmode
  477. FALSE); // Not 32-bit default
  478. //
  479. // KGDT64_R3_CMCODE: User-mode 32-bit code. Flat 2 gig.
  480. //
  481. BlAmd64BuildGdtEntry(gdt64,KGDT64_R3_CMCODE,
  482. 0, // Base
  483. 0x7FFFFFFF, // 2G limit
  484. TYPE_CODE, // Code segment: Execute/Read
  485. DPL_USER, // User mode
  486. FALSE, // Not longmode
  487. TRUE); // 32-bit default
  488. //
  489. // KGDT64_R3_DATA: User-mode 32-bit data. Flat 2 gig.
  490. //
  491. BlAmd64BuildGdtEntry(gdt64,KGDT64_R3_DATA,
  492. 0, // Base
  493. 0x7FFFFFFF, // 2G limit
  494. TYPE_DATA, // Data segment: Read/Write
  495. DPL_USER, // User mode
  496. FALSE, // Not longmode
  497. TRUE); // 32-bit default
  498. //
  499. // KGDT64_R3_CMTEB: User-mode 32-bit TEB. Flat 4K.
  500. //
  501. BlAmd64BuildGdtEntry(gdt64,KGDT64_R3_CMTEB,
  502. 0, // Base
  503. 0x0FFF, // 4K limit
  504. TYPE_DATA, // Data segment: Read/Write
  505. DPL_USER, // User mode
  506. FALSE, // Not longmode
  507. TRUE); // 32-bit default
  508. //
  509. // Set the code selector
  510. //
  511. BlAmd64_KGDT64_R0_CODE = KGDT64_R0_CODE;
  512. }
  513. ARC_STATUS
  514. BlAmd64MapHalVaSpace(
  515. VOID
  516. )
  517. /*++
  518. Routine Description:
  519. This routine initializes the VA space reserved for the HAL. This
  520. involves building all page tables necessary to support the mappings
  521. but not actually filling in any level 0 PTEs.
  522. Arguments:
  523. None.
  524. Return Value:
  525. ARC_STATUS - Status of operation.
  526. --*/
  527. {
  528. POINTER64 va;
  529. ULONG round;
  530. ULONG vaRemaining;
  531. ARC_STATUS status;
  532. //
  533. // The hal has a piece of VA space reserved for it, from HAL_VA_START to
  534. // HAL_VA_START + HAL_VA_SIZE - 1.
  535. //
  536. // This routine ensures that all necessary levels of page tables are
  537. // present to support any mappings that the hal might put there.
  538. //
  539. vaRemaining = HAL_VA_SIZE;
  540. va = HAL_VA_START;
  541. //
  542. // Round VA down to a page table boundary.
  543. //
  544. round = (ULONG)(va & (PAGE_TABLE_VA-1));
  545. va -= round;
  546. vaRemaining += round;
  547. while (TRUE) {
  548. //
  549. // Perform a "mapping". The special PFN_NUMBER sets up all of
  550. // the page tables necessary to support the mapping, without
  551. // actually filling in a level 0 PTE.
  552. //
  553. status = BlAmd64CreateMapping(va, PFN_NO_MAP);
  554. if (status != ESUCCESS) {
  555. return status;
  556. }
  557. if (vaRemaining <= PAGE_TABLE_VA) {
  558. break;
  559. }
  560. vaRemaining -= PAGE_TABLE_VA;
  561. va += PAGE_TABLE_VA;
  562. }
  563. return ESUCCESS;
  564. }