Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

804 lines
19 KiB

  1. /*++
  2. Copyright (c) 1991-2000 Microsoft Corporation
  3. Module Name:
  4. amd64.c
  5. Abstract:
  6. This module contains routines necessary to support loading and
  7. transitioning into an AMD64 kernel. The code in this module has
  8. access to amd64-specific defines found in amd64.h but not to i386-
  9. specific declarations found in i386.h.
  10. Author:
  11. Forrest Foltz (forrestf) 20-Apr-2000
  12. Environment:
  13. Revision History:
  14. --*/
  15. //
  16. // Here, we want the header files to be processed as though building for
  17. // AMD64. Make appropriate definitions and undefs.
  18. //
  19. #define _AMD64_
  20. #define _M_AMD64
  21. #define _WIN64
  22. #undef _X86_
  23. #undef X86
  24. #undef _M_X86_
  25. #undef _M_IX86
  26. //
  27. // Warning 4163 is "function unavailable as intrinsic"
  28. //
  29. #pragma warning(disable:4163)
  30. //
  31. // Warning 4235 is "nonstandard extension used", referring to __unaligned
  32. //
  33. #pragma warning(disable:4235)
  34. //
  35. // Warning 4391 is "incorrect return type for intrinsi function"
  36. //
  37. #pragma warning(disable:4391)
  38. //
  39. // Warning 4305 is "type cast truncation. this stuff is compiled for 32
  40. // bit, but requires 64 bit structures. we should build
  41. // a seperate header for amd64 that includes only the
  42. // 64 bit structures required and include them only for
  43. // amd64
  44. //
  45. #pragma warning(disable:4305)
  46. #include "bootx86.h"
  47. #include "amd64prv.h"
  48. #include <pcmp.inc>
  49. #include <ntapic.inc>
  50. //
  51. // GDT: functino to convert selector to GDT entry.
  52. //
  53. PKGDTENTRY64
  54. __inline
  55. GDT_PTR (
  56. PGDT_64 Gdt,
  57. ULONG Selector
  58. )
  59. {
  60. ULONG index;
  61. PKGDTENTRY64 entry;
  62. index = Selector & ~7;
  63. entry = (PKGDTENTRY64)((PUCHAR)Gdt + index);
  64. return entry;
  65. }
  66. //
  67. // Interrupt descriptor table
  68. //
  69. typedef struct _IDT_64 {
  70. KIDTENTRY64 Entries[ MAXIMUM_IDTVECTOR + 1 ];
  71. } IDT_64, *PIDT_64;
  72. #define VIRTUAL_ADDRESS_BITS 48
  73. #define VIRTUAL_ADDRESS_MASK (((ULONGLONG)1 << VIRTUAL_ADDRESS_BITS) - 1)
  74. //
  75. // Longmode ring0 code selector. This actually lives in the middle of the
  76. // transition data stream in amd64s.asm
  77. //
  78. extern USHORT BlAmd64_KGDT64_R0_CODE;
  79. //
  80. // Data values exported to amd64x86.c
  81. //
  82. const ULONG BlAmd64DoubleFaultStackSize = DOUBLE_FAULT_STACK_SIZE;
  83. const ULONG BlAmd64KernelStackSize = KERNEL_STACK_SIZE;
  84. const ULONG BlAmd64McaExceptionStackSize = KERNEL_MCA_EXCEPTION_STACK_SIZE;
  85. const ULONG BlAmd64GdtSize = KGDT64_LAST;
  86. const ULONG BlAmd64IdtSize = sizeof(IDT_64);
  87. #define CONST_EXPORT(x) const ULONG BlAmd64_##x = x;
  88. CONST_EXPORT(MSR_FS_BASE)
  89. CONST_EXPORT(MSR_GS_BASE)
  90. CONST_EXPORT(KGDT64_SYS_TSS)
  91. CONST_EXPORT(MSR_EFER)
  92. CONST_EXPORT(TSS_IST_PANIC)
  93. CONST_EXPORT(TSS_IST_MCA)
  94. const ULONG64 BlAmd64_LOCALAPIC = LOCALAPIC;
  95. const ULONG64 BlAmd64UserSharedData = KI_USER_SHARED_DATA;
  96. //
  97. // Flags to be enabled in the EFER MSR before transitioning to long mode
  98. //
  99. const ULONG BlAmd64_MSR_EFER_Flags = MSR_LME | MSR_SCE | MSR_NXE;
  100. //
  101. // Array of address bit decode counts and recursive mapping bases,
  102. // one for each level of the mapping
  103. //
  104. AMD64_MAPPING_INFO BlAmd64MappingLevels[ AMD64_MAPPING_LEVELS ] =
  105. {
  106. { PTE_BASE, PTI_MASK_AMD64, PTI_SHIFT },
  107. { PDE_BASE, PDI_MASK_AMD64, PDI_SHIFT },
  108. { PPE_BASE, PPI_MASK, PPI_SHIFT },
  109. { PXE_BASE, PXI_MASK, PXI_SHIFT }
  110. };
  111. //
  112. // BlAmd64TopLevelPte refers to the physical page number of the Page Map
  113. // Level 4 (PML4) table.
  114. //
  115. // BlAmd64TopLevelPte is not really a page table entry and so does not
  116. // actually exist as an element within a page table. It exists only as
  117. // a convenience to BlAmd64CreateAmd64Mapping().
  118. //
  119. HARDWARE_PTE BlAmd64TopLevelPte;
  120. //
  121. // PAGE_MAP_LEVEL_4 yields the identity-mapped (physical) address of the
  122. // PML4 table.
  123. //
  124. #define PAGE_MAP_LEVEL_4 \
  125. ((PPAGE_TABLE)(BlAmd64TopLevelPte.PageFrameNumber << PAGE_SHIFT))
  126. //
  127. // Special PFN for BlAmd64CreateMapping
  128. //
  129. #define PFN_NO_MAP ((PFN_NUMBER)-1)
  130. //
  131. // Size of the VA mapped by a level 0 page table
  132. //
  133. #define PAGE_TABLE_VA ((POINTER64)(PTES_PER_PAGE * PAGE_SIZE))
  134. //
  135. // Prototypes for local functions
  136. //
  137. ARC_STATUS
  138. BlAmd64CreateMappingWorker(
  139. IN ULONGLONG Va,
  140. IN PFN_NUMBER Pfn,
  141. IN ULONG MappingLevel,
  142. IN OUT PHARDWARE_PTE UpperLevelPte
  143. );
  144. VOID
  145. BlAmd64MakePteValid(
  146. IN PHARDWARE_PTE Pte,
  147. IN PFN_NUMBER Pfn
  148. );
  149. VOID
  150. BlAmd64ClearTopLevelPte(
  151. VOID
  152. )
  153. /*++
  154. Routine Description:
  155. This routine simply clears BlAmd64TopLevelPte.
  156. Arguments:
  157. None.
  158. Return Value:
  159. None.
  160. --*/
  161. {
  162. *(PULONG64)&BlAmd64TopLevelPte = 0;
  163. }
  164. ARC_STATUS
  165. BlAmd64CreateMapping(
  166. IN ULONGLONG Va,
  167. IN PFN_NUMBER Pfn
  168. )
  169. /*++
  170. Routine Description:
  171. This function maps a virtual address into a 4-level AMD64 page mapping
  172. structure.
  173. Arguments:
  174. Va - Supplies the 64-bit virtual address to map
  175. Pfn - Supplies the 64-bit physical page number to map the address to
  176. Return Value:
  177. ARC_STATUS - Status of operation.
  178. --*/
  179. {
  180. ARC_STATUS status;
  181. status = BlAmd64CreateMappingWorker( Va & VIRTUAL_ADDRESS_MASK,
  182. Pfn,
  183. AMD64_MAPPING_LEVELS - 1,
  184. &BlAmd64TopLevelPte );
  185. return status;
  186. }
  187. ARC_STATUS
  188. BlAmd64CreateMappingWorker(
  189. IN ULONGLONG Va,
  190. IN PFN_NUMBER Pfn,
  191. IN ULONG MappingLevel,
  192. IN OUT PHARDWARE_PTE UpperLevelPte
  193. )
  194. /*++
  195. Routine Description:
  196. This function creates an address mapping in a single level of an AMD64
  197. 4-level mapping structure. It is called only by BlCreateMapping
  198. and by itself, recursively.
  199. Arguments:
  200. Va - Supplies the 64-bit virtual address to map. This address has already
  201. had any insignificant upper bits masked via VIRTUAL_ADDRESS_MASK.
  202. Pfn - Supplies the 64-bit physical page number to map the address to. If
  203. Pfn == PFN_NO_MAP, then all of the page tables are put into
  204. place to support the mapping but the level 0 pte itself is not
  205. actually filled in. This is used to create the HAL's VA mapping
  206. area.
  207. MappingLevel - The mapping level in which to create the appropriate
  208. mapping. Must be 0, 1, 2 or 3.
  209. UpperLevelPte - A pointer to the parent PTE that refers to the page
  210. at this mapping level. If no page exists at this level for this
  211. address, then this routine will allocate one and modify
  212. UpperLevelPte appropriately.
  213. Return Value:
  214. ARC_STATUS - Status of operation.
  215. --*/
  216. {
  217. PFN_NUMBER pageTablePfn;
  218. PAMD64_PAGE_TABLE pageTable;
  219. ULONGLONG va;
  220. PAMD64_MAPPING_INFO mappingInfo;
  221. ULONG pteIndex;
  222. PHARDWARE_PTE pte;
  223. ARC_STATUS status;
  224. BOOLEAN newPage;
  225. mappingInfo = &BlAmd64MappingLevels[ MappingLevel ];
  226. if (UpperLevelPte->Valid == 0) {
  227. //
  228. // A new page table must be allocated.
  229. //
  230. newPage = TRUE;
  231. pageTable = BlAmd64AllocatePageTable();
  232. if (pageTable == NULL) {
  233. return ENOMEM;
  234. }
  235. //
  236. // Reference the new page table with the parent PTE
  237. //
  238. pageTablePfn = (ULONG)pageTable >> PAGE_SHIFT;
  239. BlAmd64MakePteValid( UpperLevelPte, pageTablePfn );
  240. if (MappingLevel == (AMD64_MAPPING_LEVELS - 1)) {
  241. //
  242. // We have just allocated the top-level page. Insert a
  243. // recursive mapping here.
  244. //
  245. pteIndex = (ULONG)((mappingInfo->RecursiveMappingBase >>
  246. mappingInfo->AddressShift) &
  247. mappingInfo->AddressMask);
  248. pte = &pageTable->PteArray[ pteIndex ];
  249. BlAmd64MakePteValid( pte, pageTablePfn );
  250. }
  251. } else {
  252. //
  253. // A page table structure already exists for this level.
  254. //
  255. newPage = FALSE;
  256. pageTablePfn = UpperLevelPte->PageFrameNumber;
  257. pageTable = (PAMD64_PAGE_TABLE)((ULONG)pageTablePfn << PAGE_SHIFT);
  258. }
  259. //
  260. // Derive a pointer to the appropriate PTE within the page table
  261. //
  262. pteIndex =
  263. (ULONG)(Va >> mappingInfo->AddressShift) & mappingInfo->AddressMask;
  264. pte = &pageTable->PteArray[ pteIndex ];
  265. if (MappingLevel == 0) {
  266. if (Pfn != PFN_NO_MAP) {
  267. //
  268. // This is an actual level 0, or PTE, entry. Just set it with
  269. // the Pfn that was passed in.
  270. //
  271. BlAmd64MakePteValid( pte, Pfn );
  272. } else {
  273. //
  274. // This is a special HAL mapping, one that ensures that all
  275. // levels of page table are in place to support this mapping
  276. // but doesn't actually fill in the level 0 PTE.
  277. //
  278. // So do nothing here except break the recursion.
  279. //
  280. }
  281. } else {
  282. //
  283. // More mapping levels to go, call this function recursively and
  284. // process the next level.
  285. //
  286. status = BlAmd64CreateMappingWorker( Va,
  287. Pfn,
  288. MappingLevel - 1,
  289. pte );
  290. if (status != ESUCCESS) {
  291. return status;
  292. }
  293. }
  294. if (newPage != FALSE) {
  295. //
  296. // A new page table was allocated, above. Recursively map
  297. // it within the PTE_BASE region.
  298. //
  299. va = (Va >> mappingInfo->AddressShift);
  300. va *= sizeof(HARDWARE_PTE);
  301. va += mappingInfo->RecursiveMappingBase;
  302. status = BlAmd64CreateMapping( va, pageTablePfn );
  303. if (status != ESUCCESS) {
  304. return status;
  305. }
  306. }
  307. return ESUCCESS;
  308. }
  309. VOID
  310. BlAmd64MakePteValid(
  311. IN OUT PHARDWARE_PTE Pte,
  312. IN PFN_NUMBER Pfn
  313. )
  314. /*++
  315. Routine Description:
  316. This routine fills an AMD64 Pte with the supplied Pfn and makes it
  317. valid.
  318. Arguments:
  319. Pte - Supplies a pointer to the Pte to make valid.
  320. Pfn - Supplies the page frame number to set in the Pte.
  321. Return Value:
  322. ARC_STATUS - Status of operation.
  323. --*/
  324. {
  325. //
  326. // Make sure we're not just overwriting a PFN in an already
  327. // valid PTE.
  328. //
  329. ASSERT( Pte->Valid == 0 || Pte->PageFrameNumber == Pfn );
  330. Pte->PageFrameNumber = Pfn;
  331. Pte->Valid = 1;
  332. Pte->Write = 1;
  333. Pte->Accessed = 1;
  334. Pte->Dirty = 1;
  335. }
  336. VOID
  337. BlAmd64BuildGdtEntry(
  338. IN PGDT_64 Gdt,
  339. IN USHORT Selector,
  340. IN POINTER64 Base,
  341. IN ULONGLONG Limit,
  342. IN ULONG Type,
  343. IN ULONG Dpl,
  344. IN BOOLEAN LongMode,
  345. IN BOOLEAN DefaultBig
  346. )
  347. /*++
  348. Routine Description:
  349. This routine fills in an AMD64 GDT entry.
  350. Arguments:
  351. Gdt - Supplies a pointer to the GDT.
  352. Selector - Segment selector of the GDT entry within Gdt.
  353. Base - Base address value of the descriptor.
  354. Limit - Limit value of the descriptor.
  355. Type - 5-bit type value of the descriptor.
  356. Dpl - Priviledge value of the descriptor.
  357. LongMode - Indicates whether this is a longmode descriptor (valid only
  358. for code segment descriptors).
  359. DefaultBig - Supplies the value for the default/big field in the
  360. descriptor.
  361. Return Value:
  362. None.
  363. --*/
  364. {
  365. ULONG limit20;
  366. PKGDTENTRY64 gdtEntry;
  367. KGDT_BASE gdtBase;
  368. KGDT_LIMIT gdtLimit;
  369. gdtEntry = GDT_PTR(Gdt,Selector);
  370. //
  371. // Set the Base and LongMode fields
  372. //
  373. gdtBase.Base = Base;
  374. gdtEntry->BaseLow = gdtBase.BaseLow;
  375. gdtEntry->Bits.BaseMiddle = gdtBase.BaseMiddle;
  376. gdtEntry->Bits.BaseHigh = gdtBase.BaseHigh;
  377. gdtEntry->Bits.LongMode = 0;
  378. if ((LongMode != FALSE) || (Type == TYPE_TSS64)) {
  379. //
  380. // All long GDT entries use a 64-bit base and have the longmode bit
  381. // set.
  382. //
  383. // In addition, a TSS GDT entry uses a 64-bit but does *not* set the
  384. // longmode bit. This applies to an LDT entry as well, which is not
  385. // used in this OS.
  386. //
  387. if (Type != TYPE_TSS64) {
  388. gdtEntry->Bits.LongMode = 1;
  389. }
  390. gdtEntry->MustBeZero = 0;
  391. gdtEntry->BaseUpper = gdtBase.BaseUpper;
  392. }
  393. //
  394. // Set the Limit and Granularity fields
  395. //
  396. if (Limit > (1 << 20)) {
  397. limit20 = (ULONG)(Limit / PAGE_SIZE);
  398. gdtEntry->Bits.Granularity = 1;
  399. } else {
  400. limit20 = (ULONG)Limit;
  401. gdtEntry->Bits.Granularity = 0;
  402. }
  403. gdtLimit.Limit = limit20;
  404. gdtEntry->LimitLow = gdtLimit.LimitLow;
  405. gdtEntry->Bits.LimitHigh = gdtLimit.LimitHigh;
  406. //
  407. // Set Present = 1 unless this is a NULL descriptor
  408. //
  409. if (Type == 0) {
  410. gdtEntry->Bits.Present = 0;
  411. } else {
  412. gdtEntry->Bits.Present = 1;
  413. }
  414. //
  415. // Set remaining fields
  416. //
  417. gdtEntry->Bits.Type = Type;
  418. gdtEntry->Bits.Dpl = Dpl;
  419. gdtEntry->Bits.DefaultBig = DefaultBig;
  420. gdtEntry->Bits.System = 0;
  421. }
  422. VOID
  423. BlAmd64BuildAmd64GDT(
  424. IN PVOID SysTss,
  425. OUT PVOID Gdt
  426. )
  427. /*++
  428. Routine Description:
  429. This routine initializes the longmode Global Descriptor Table.
  430. Arguments:
  431. SysTss - Supplies a 32-bit KSEG0_X86 pointer to the system TSS.
  432. Gdt - Supplies a 32-bit pointer to the Gdt to fill in.
  433. Return Value:
  434. None.
  435. --*/
  436. {
  437. PGDT_64 gdt64;
  438. POINTER64 sysTss64;
  439. gdt64 = (PGDT_64)Gdt;
  440. //
  441. // KGDT64_NULL: NULL descriptor
  442. //
  443. BlAmd64BuildGdtEntry(gdt64,KGDT64_NULL,
  444. 0,0,0,0,0,0); // Null selector, all zeros
  445. //
  446. // KGDT_R0_CODE: Kernel mode code
  447. //
  448. BlAmd64BuildGdtEntry(gdt64,KGDT64_R0_CODE,
  449. 0, // Base and limit are ignored
  450. 0, // in a long-mode code selector
  451. TYPE_CODE, // Code segment: Execute/Read
  452. DPL_SYSTEM, // Kernel only
  453. TRUE, // Longmode
  454. FALSE); // Not 32-bit default
  455. //
  456. // KGDT_R0_STACK: Kernel mode stack
  457. //
  458. BlAmd64BuildGdtEntry(gdt64,KGDT64_R0_DATA,
  459. 0, // Base and limit are ignored
  460. 0, // when in long-mode
  461. TYPE_DATA, // Data segment: Read/Write
  462. DPL_SYSTEM, // Kernel only
  463. FALSE, // Not longmode
  464. TRUE); // 32-bit default
  465. //
  466. // KDT_SYS_TSS: Kernel mode system task state
  467. //
  468. sysTss64 = PTR_64(SysTss);
  469. BlAmd64BuildGdtEntry(gdt64,KGDT64_SYS_TSS,
  470. sysTss64, // Base to be filled in at runtime
  471. sizeof(KTSS64), // Contains only a KTSS64
  472. TYPE_TSS64, // Not busy TSS
  473. DPL_SYSTEM, // Kernel only
  474. FALSE, // Not longmode
  475. FALSE); // Not 32-bit default
  476. //
  477. // KGDT64_R3_CODE: User mode 64-bit code
  478. //
  479. BlAmd64BuildGdtEntry(gdt64,KGDT64_R3_CODE,
  480. 0, // Base and limit are ignored
  481. 0, // in a long-mode code selector
  482. TYPE_CODE, // Code segment: Execute/Read
  483. DPL_USER, // User mode
  484. TRUE, // Longmode
  485. FALSE); // Not 32-bit default
  486. //
  487. // KGDT64_R3_CMCODE: User-mode 32-bit code. Flat 4 gig. Note that we may
  488. // give Wow64 apps almost the entire 4GB address space.
  489. //
  490. BlAmd64BuildGdtEntry(gdt64,KGDT64_R3_CMCODE,
  491. 0, // Base
  492. 0xFFFFFFFF, // 4G limit
  493. TYPE_CODE, // Code segment: Execute/Read
  494. DPL_USER, // User mode
  495. FALSE, // Not longmode
  496. TRUE); // 32-bit default
  497. //
  498. // KGDT64_R3_DATA: User-mode 32-bit data. Flat 4 gig.
  499. //
  500. BlAmd64BuildGdtEntry(gdt64,KGDT64_R3_DATA,
  501. 0, // Base
  502. 0xFFFFFFFF, // 4G limit
  503. TYPE_DATA, // Data segment: Read/Write
  504. DPL_USER, // User mode
  505. FALSE, // Not longmode
  506. TRUE); // 32-bit default
  507. //
  508. // KGDT64_R3_CMTEB: User-mode 32-bit TEB. Flat 4K.
  509. //
  510. BlAmd64BuildGdtEntry(gdt64,KGDT64_R3_CMTEB,
  511. 0, // Base
  512. 0x0FFF, // 4K limit
  513. TYPE_DATA, // Data segment: Read/Write
  514. DPL_USER, // User mode
  515. FALSE, // Not longmode
  516. TRUE); // 32-bit default
  517. //
  518. // Set the code selector
  519. //
  520. BlAmd64_KGDT64_R0_CODE = KGDT64_R0_CODE;
  521. }
  522. ARC_STATUS
  523. BlAmd64MapHalVaSpace(
  524. VOID
  525. )
  526. /*++
  527. Routine Description:
  528. This routine initializes the VA space reserved for the HAL. This
  529. involves building all page tables necessary to support the mappings
  530. but not actually filling in any level 0 PTEs.
  531. Arguments:
  532. None.
  533. Return Value:
  534. ARC_STATUS - Status of operation.
  535. --*/
  536. {
  537. POINTER64 va;
  538. ULONG round;
  539. ULONG vaRemaining;
  540. ARC_STATUS status;
  541. //
  542. // The hal has a piece of VA space reserved for it, from HAL_VA_START to
  543. // HAL_VA_START + HAL_VA_SIZE - 1.
  544. //
  545. // This routine ensures that all necessary levels of page tables are
  546. // present to support any mappings that the hal might put there.
  547. //
  548. vaRemaining = HAL_VA_SIZE;
  549. va = HAL_VA_START;
  550. //
  551. // Round VA down to a page table boundary.
  552. //
  553. round = (ULONG)(va & (PAGE_TABLE_VA-1));
  554. va -= round;
  555. vaRemaining += round;
  556. while (TRUE) {
  557. //
  558. // Perform a "mapping". The special PFN_NUMBER sets up all of
  559. // the page tables necessary to support the mapping, without
  560. // actually filling in a level 0 PTE.
  561. //
  562. status = BlAmd64CreateMapping(va, PFN_NO_MAP);
  563. if (status != ESUCCESS) {
  564. return status;
  565. }
  566. if (vaRemaining <= PAGE_TABLE_VA) {
  567. break;
  568. }
  569. vaRemaining -= PAGE_TABLE_VA;
  570. va += PAGE_TABLE_VA;
  571. }
  572. return ESUCCESS;
  573. }