Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3822 lines
109 KiB

  1. /*++
  2. Copyright (c) 1989 Microsoft Corporation
  3. Module Name:
  4. physical.c
  5. Abstract:
  6. This module contains the routines to manipulate physical memory from
  7. user space.
  8. There are restrictions on how user controlled physical memory can be used.
  9. Realize that all this memory is nonpaged and hence applications should
  10. allocate this with care as it represents a very real system resource.
  11. Virtual memory which maps user controlled physical memory pages must be :
  12. 1. Private memory only (ie: cannot be shared between processes).
  13. 2. The same physical page cannot be mapped at 2 different virtual
  14. addresses.
  15. 3. Callers must have LOCK_VM privilege to create these VADs.
  16. 4. Device drivers cannot call MmSecureVirtualMemory on it - this means
  17. that applications should not expect to use this memory for win32k.sys
  18. calls.
  19. 5. NtProtectVirtualMemory only allows read-write protection on this
  20. memory. No other protection (no access, guard pages, readonly, etc)
  21. are allowed.
  22. 6. NtFreeVirtualMemory allows only MEM_RELEASE and NOT MEM_DECOMMIT on
  23. these VADs. Even MEM_RELEASE is only allowed on entire VAD ranges -
  24. that is, splitting of these VADs is not allowed.
  25. 7. fork() style child processes don't inherit physical VADs.
  26. 8. The physical pages in these VADs are not subject to job limits.
  27. Author:
  28. Landy Wang (landyw) 25-Jan-1999
  29. Revision History:
  30. --*/
  31. #include "mi.h"
  32. #ifdef ALLOC_PRAGMA
  33. #pragma alloc_text(PAGE,NtMapUserPhysicalPages)
  34. #pragma alloc_text(PAGE,NtMapUserPhysicalPagesScatter)
  35. #pragma alloc_text(PAGE,MiRemoveUserPhysicalPagesVad)
  36. #pragma alloc_text(PAGE,MiAllocateAweInfo)
  37. #pragma alloc_text(PAGE,MiCleanPhysicalProcessPages)
  38. #pragma alloc_text(PAGE,NtAllocateUserPhysicalPages)
  39. #pragma alloc_text(PAGE,NtFreeUserPhysicalPages)
  40. #pragma alloc_text(PAGE,MiAweViewInserter)
  41. #pragma alloc_text(PAGE,MiAweViewRemover)
  42. #pragma alloc_text(PAGE,MmSetPhysicalPagesLimit)
  43. #pragma alloc_text(PAGELK,MiAllocateLargePages)
  44. #pragma alloc_text(PAGELK,MiFreeLargePages)
  45. #endif
  46. //
  47. // This local stack size definition is deliberately large as ISVs have told
  48. // us they expect to typically do up to this amount.
  49. //
  50. #define COPY_STACK_SIZE 1024
  51. #define SMALL_COPY_STACK_SIZE 512
  52. #define BITS_IN_ULONG ((sizeof (ULONG)) * 8)
  53. #define LOWEST_USABLE_PHYSICAL_ADDRESS (16 * 1024 * 1024)
  54. #define LOWEST_USABLE_PHYSICAL_PAGE (LOWEST_USABLE_PHYSICAL_ADDRESS >> PAGE_SHIFT)
  55. #define LOWEST_BITMAP_PHYSICAL_PAGE 0
  56. #define MI_FRAME_TO_BITMAP_INDEX(x) ((ULONG)(x))
  57. #define MI_BITMAP_INDEX_TO_FRAME(x) ((ULONG)(x))
  58. PFN_NUMBER MmVadPhysicalPages;
  59. #if DBG
  60. LOGICAL MiUsingLowPagesForAwe = FALSE;
  61. extern ULONG MiShowStuckPages;
  62. #endif
  63. NTSTATUS
  64. NtMapUserPhysicalPages (
  65. IN PVOID VirtualAddress,
  66. IN ULONG_PTR NumberOfPages,
  67. IN PULONG_PTR UserPfnArray OPTIONAL
  68. )
  69. /*++
  70. Routine Description:
  71. This function maps the specified nonpaged physical pages into the specified
  72. user address range.
  73. Note no WSLEs are maintained for this range as it is all nonpaged.
  74. Arguments:
  75. VirtualAddress - Supplies a user virtual address within a UserPhysicalPages
  76. Vad.
  77. NumberOfPages - Supplies the number of pages to map.
  78. UserPfnArray - Supplies a pointer to the page frame numbers to map in.
  79. If this is zero, then the virtual addresses are set to
  80. NO_ACCESS.
  81. Return Value:
  82. Various NTSTATUS codes.
  83. --*/
  84. {
  85. ULONG Processor;
  86. ULONG_PTR OldValue;
  87. ULONG_PTR NewValue;
  88. PAWEINFO AweInfo;
  89. PULONG BitBuffer;
  90. PEPROCESS Process;
  91. PMMPTE PointerPte;
  92. PMMPTE LastPte;
  93. PVOID EndAddress;
  94. PFN_NUMBER PageFrameIndex;
  95. PMMPFN Pfn1;
  96. NTSTATUS Status;
  97. MMPTE_FLUSH_LIST PteFlushList;
  98. PVOID PoolArea;
  99. PVOID PoolAreaEnd;
  100. PPFN_NUMBER FrameList;
  101. ULONG BitMapIndex;
  102. ULONG_PTR StackArray[COPY_STACK_SIZE];
  103. MMPTE OldPteContents;
  104. MMPTE OriginalPteContents;
  105. MMPTE NewPteContents;
  106. ULONG_PTR NumberOfBytes;
  107. ULONG SizeOfBitMap;
  108. PRTL_BITMAP BitMap;
  109. PMI_PHYSICAL_VIEW PhysicalView;
  110. PEX_PUSH_LOCK PushLock;
  111. PKTHREAD CurrentThread;
  112. TABLE_SEARCH_RESULT SearchResult;
  113. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  114. if (NumberOfPages > (MAXULONG_PTR / PAGE_SIZE)) {
  115. return STATUS_INVALID_PARAMETER_2;
  116. }
  117. VirtualAddress = PAGE_ALIGN(VirtualAddress);
  118. EndAddress = (PVOID)((PCHAR)VirtualAddress + (NumberOfPages << PAGE_SHIFT) -1);
  119. if (EndAddress <= VirtualAddress) {
  120. return STATUS_INVALID_PARAMETER_2;
  121. }
  122. //
  123. // Carefully probe and capture all user parameters.
  124. //
  125. FrameList = NULL;
  126. PoolArea = (PVOID)&StackArray[0];
  127. if (ARGUMENT_PRESENT(UserPfnArray)) {
  128. //
  129. // Check for zero pages here so the loops further down can be optimized
  130. // taking into account this can never happen.
  131. //
  132. if (NumberOfPages == 0) {
  133. return STATUS_SUCCESS;
  134. }
  135. NumberOfBytes = NumberOfPages * sizeof(ULONG_PTR);
  136. if (NumberOfPages > COPY_STACK_SIZE) {
  137. PoolArea = ExAllocatePoolWithTag (NonPagedPool,
  138. NumberOfBytes,
  139. 'wRmM');
  140. if (PoolArea == NULL) {
  141. return STATUS_INSUFFICIENT_RESOURCES;
  142. }
  143. }
  144. //
  145. // Capture the specified page frame numbers.
  146. //
  147. try {
  148. ProbeForRead (UserPfnArray,
  149. NumberOfBytes,
  150. sizeof(ULONG_PTR));
  151. RtlCopyMemory (PoolArea, UserPfnArray, NumberOfBytes);
  152. } except(EXCEPTION_EXECUTE_HANDLER) {
  153. if (PoolArea != (PVOID)&StackArray[0]) {
  154. ExFreePool (PoolArea);
  155. }
  156. return GetExceptionCode();
  157. }
  158. FrameList = (PPFN_NUMBER)PoolArea;
  159. }
  160. PoolAreaEnd = (PVOID)((PULONG_PTR)PoolArea + NumberOfPages);
  161. PointerPte = MiGetPteAddress (VirtualAddress);
  162. LastPte = PointerPte + NumberOfPages;
  163. Process = PsGetCurrentProcess ();
  164. PageFrameIndex = 0;
  165. //
  166. // Initialize as much as possible before acquiring any locks.
  167. //
  168. MI_MAKE_VALID_PTE (NewPteContents,
  169. PageFrameIndex,
  170. MM_READWRITE,
  171. PointerPte);
  172. MI_SET_PTE_DIRTY (NewPteContents);
  173. PteFlushList.Count = 0;
  174. //
  175. // A memory barrier is needed to read the EPROCESS AweInfo field
  176. // in order to ensure the writes to the AweInfo structure fields are
  177. // visible in correct order. This avoids the need to acquire any
  178. // stronger synchronization (ie: spinlock/pushlock, etc) in the interest
  179. // of best performance.
  180. //
  181. KeMemoryBarrier ();
  182. AweInfo = (PAWEINFO) Process->AweInfo;
  183. //
  184. // The physical pages bitmap must exist.
  185. //
  186. if ((AweInfo == NULL) || (AweInfo->VadPhysicalPagesBitMap == NULL)) {
  187. if (PoolArea != (PVOID)&StackArray[0]) {
  188. ExFreePool (PoolArea);
  189. }
  190. return STATUS_INVALID_PARAMETER_1;
  191. }
  192. //
  193. // Block APCs to prevent recursive pushlock scenarios as this is not
  194. // supported.
  195. //
  196. CurrentThread = KeGetCurrentThread ();
  197. KeEnterGuardedRegionThread (CurrentThread);
  198. //
  199. // Pushlock protection protects insertion/removal of Vads into each process'
  200. // AweVadList. It also protects creation/deletion and adds/removes
  201. // of the VadPhysicalPagesBitMap. Finally, it protects the PFN
  202. // modifications for pages in the bitmap.
  203. //
  204. PushLock = ExAcquireCacheAwarePushLockShared (AweInfo->PushLock);
  205. BitMap = AweInfo->VadPhysicalPagesBitMap;
  206. ASSERT (BitMap != NULL);
  207. Processor = KeGetCurrentProcessorNumber ();
  208. PhysicalView = AweInfo->PhysicalViewHint[Processor];
  209. if ((PhysicalView != NULL) &&
  210. (PhysicalView->u.LongFlags & MI_PHYSICAL_VIEW_AWE) &&
  211. (VirtualAddress >= MI_VPN_TO_VA (PhysicalView->StartingVpn)) &&
  212. (EndAddress <= MI_VPN_TO_VA_ENDING (PhysicalView->EndingVpn))) {
  213. NOTHING;
  214. }
  215. else {
  216. //
  217. // Lookup the element and save the result.
  218. //
  219. // Note that the push lock is sufficient to traverse this list.
  220. //
  221. SearchResult = MiFindNodeOrParent (&AweInfo->AweVadRoot,
  222. MI_VA_TO_VPN (VirtualAddress),
  223. (PMMADDRESS_NODE *) &PhysicalView);
  224. if ((SearchResult == TableFoundNode) &&
  225. (PhysicalView->u.LongFlags & MI_PHYSICAL_VIEW_AWE) &&
  226. (VirtualAddress >= MI_VPN_TO_VA (PhysicalView->StartingVpn)) &&
  227. (EndAddress <= MI_VPN_TO_VA_ENDING (PhysicalView->EndingVpn))) {
  228. AweInfo->PhysicalViewHint[Processor] = PhysicalView;
  229. }
  230. else {
  231. Status = STATUS_INVALID_PARAMETER_1;
  232. goto ErrorReturn;
  233. }
  234. }
  235. //
  236. // Ensure the PFN element corresponding to each specified page is owned
  237. // by the specified VAD.
  238. //
  239. // Since this ownership can only be changed while holding this process'
  240. // working set lock, the PFN can be scanned here without holding the PFN
  241. // lock.
  242. //
  243. // Note the PFN lock is not needed because any race with MmProbeAndLockPages
  244. // can only result in the I/O going to the old page or the new page.
  245. // If the user breaks the rules, the PFN database (and any pages being
  246. // windowed here) are still protected because of the reference counts
  247. // on the pages with inprogress I/O. This is possible because NO pages
  248. // are actually freed here - they are just windowed.
  249. //
  250. if (ARGUMENT_PRESENT(UserPfnArray)) {
  251. //
  252. // By keeping the PFN bitmap in the VAD (instead of in the PFN
  253. // database itself), a few benefits are realized:
  254. //
  255. // 1. No need to acquire the PFN lock here.
  256. // 2. Faster handling of PFN databases with holes.
  257. // 3. Transparent support for dynamic PFN database growth.
  258. // 4. Less nonpaged memory is used (for the bitmap vs adding a
  259. // field to the PFN) on systems with no unused pack space in
  260. // the PFN database, presuming not many of these VADs get
  261. // allocated.
  262. //
  263. //
  264. // The first pass here ensures all the frames are secure.
  265. //
  266. //
  267. // N.B. This implies that PFN_NUMBER is always ULONG_PTR in width
  268. // as PFN_NUMBER is not exposed to application code today.
  269. //
  270. SizeOfBitMap = BitMap->SizeOfBitMap;
  271. BitBuffer = BitMap->Buffer;
  272. do {
  273. PageFrameIndex = *FrameList;
  274. //
  275. // Frames past the end of the bitmap are not allowed.
  276. //
  277. BitMapIndex = MI_FRAME_TO_BITMAP_INDEX(PageFrameIndex);
  278. #if defined (_WIN64)
  279. //
  280. // Ensure the frame is a 32-bit number.
  281. //
  282. if (BitMapIndex != PageFrameIndex) {
  283. Status = STATUS_CONFLICTING_ADDRESSES;
  284. goto ErrorReturn0;
  285. }
  286. #endif
  287. if (BitMapIndex >= SizeOfBitMap) {
  288. Status = STATUS_CONFLICTING_ADDRESSES;
  289. goto ErrorReturn0;
  290. }
  291. //
  292. // Frames not in the bitmap are not allowed.
  293. //
  294. if (MI_CHECK_BIT (BitBuffer, BitMapIndex) == 0) {
  295. Status = STATUS_CONFLICTING_ADDRESSES;
  296. goto ErrorReturn0;
  297. }
  298. //
  299. // The frame must not be already mapped anywhere.
  300. // Or be passed in twice in different spots in the array.
  301. //
  302. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  303. ASSERT (MI_PFN_IS_AWE (Pfn1));
  304. OldValue = Pfn1->u2.ShareCount;
  305. if (OldValue != 1) {
  306. Status = STATUS_INVALID_PARAMETER_3;
  307. goto ErrorReturn0;
  308. }
  309. NewValue = OldValue + 2;
  310. //
  311. // Mark the frame as "about to be mapped".
  312. //
  313. #if defined (_WIN64)
  314. OldValue = InterlockedCompareExchange64 ((PLONGLONG)&Pfn1->u2.ShareCount,
  315. (LONGLONG)NewValue,
  316. (LONGLONG)OldValue);
  317. #else
  318. OldValue = InterlockedCompareExchange ((PLONG)&Pfn1->u2.ShareCount,
  319. NewValue,
  320. OldValue);
  321. #endif
  322. if (OldValue != 1) {
  323. Status = STATUS_INVALID_PARAMETER_3;
  324. goto ErrorReturn0;
  325. }
  326. ASSERT (MI_PFN_IS_AWE (Pfn1));
  327. ASSERT (Pfn1->u2.ShareCount == 3);
  328. ASSERT ((PageFrameIndex >= LOWEST_USABLE_PHYSICAL_PAGE) ||
  329. (MiUsingLowPagesForAwe == TRUE));
  330. FrameList += 1;
  331. } while (FrameList < (PPFN_NUMBER) PoolAreaEnd);
  332. //
  333. // This pass actually inserts them all into the page table pages and
  334. // the TBs now that we know the frames are good. Check the PTEs and
  335. // PFNs carefully as a malicious user may issue more than one remap
  336. // request for all or portions of the same region simultaneously.
  337. //
  338. FrameList = (PPFN_NUMBER)PoolArea;
  339. do {
  340. PageFrameIndex = *FrameList;
  341. NewPteContents.u.Hard.PageFrameNumber = PageFrameIndex;
  342. do {
  343. OldPteContents = *PointerPte;
  344. OriginalPteContents.u.Long = InterlockedCompareExchangePte (
  345. PointerPte,
  346. NewPteContents.u.Long,
  347. OldPteContents.u.Long);
  348. } while (OriginalPteContents.u.Long != OldPteContents.u.Long);
  349. //
  350. // The PTE is now pointing at the new frame. Note that another
  351. // thread can immediately access the page contents via this PTE
  352. // even though they're not supposed to until this API returns.
  353. // Thus, the page frames are handled carefully so that malicious
  354. // apps cannot corrupt frames they don't really still or yet own.
  355. //
  356. if (OldPteContents.u.Hard.Valid == 1) {
  357. //
  358. // The old frame was mapped so the TB entry must be flushed.
  359. // Note the app could maliciously dirty data in the old frame
  360. // until the TB flush completes, so don't allow frame reuse
  361. // till then (although allowing remapping within this process
  362. // is ok).
  363. //
  364. Pfn1 = MI_PFN_ELEMENT (OldPteContents.u.Hard.PageFrameNumber);
  365. ASSERT (Pfn1->PteAddress != NULL);
  366. ASSERT (Pfn1->u2.ShareCount == 2);
  367. //
  368. // Carefully clear the PteAddress before decrementing the share
  369. // count.
  370. //
  371. Pfn1->PteAddress = NULL;
  372. InterlockedExchangeAddSizeT (&Pfn1->u2.ShareCount, -1);
  373. if (PteFlushList.Count != MM_MAXIMUM_FLUSH_COUNT) {
  374. PteFlushList.FlushVa[PteFlushList.Count] = VirtualAddress;
  375. PteFlushList.Count += 1;
  376. }
  377. }
  378. //
  379. // Update counters for the new frame we just put in the PTE and TB.
  380. //
  381. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  382. ASSERT (Pfn1->PteAddress == NULL);
  383. ASSERT (Pfn1->u2.ShareCount == 3);
  384. Pfn1->PteAddress = PointerPte;
  385. InterlockedExchangeAddSizeT (&Pfn1->u2.ShareCount, -1);
  386. VirtualAddress = (PVOID)((PCHAR)VirtualAddress + PAGE_SIZE);
  387. PointerPte += 1;
  388. FrameList += 1;
  389. } while (FrameList < (PPFN_NUMBER) PoolAreaEnd);
  390. }
  391. else {
  392. //
  393. // Set the specified virtual address range to no access.
  394. //
  395. while (PointerPte < LastPte) {
  396. do {
  397. OldPteContents = *PointerPte;
  398. OriginalPteContents.u.Long = InterlockedCompareExchangePte (
  399. PointerPte,
  400. ZeroPte.u.Long,
  401. OldPteContents.u.Long);
  402. } while (OriginalPteContents.u.Long != OldPteContents.u.Long);
  403. //
  404. // The PTE has been cleared. Note that another thread can still
  405. // be accessing the page contents via the stale PTE until the TB
  406. // entry is flushed even though they're not supposed to.
  407. // Thus, the page frames are handled carefully so that malicious
  408. // apps cannot corrupt frames they don't still own.
  409. //
  410. if (OldPteContents.u.Hard.Valid == 1) {
  411. //
  412. // The old frame was mapped so the TB entry must be flushed.
  413. // Note the app could maliciously dirty data in the old frame
  414. // until the TB flush completes, so don't allow frame reuse
  415. // till then (although allowing remapping within this process
  416. // is ok).
  417. //
  418. Pfn1 = MI_PFN_ELEMENT (OldPteContents.u.Hard.PageFrameNumber);
  419. ASSERT (MI_PFN_IS_AWE (Pfn1));
  420. ASSERT (Pfn1->PteAddress != NULL);
  421. ASSERT (Pfn1->u2.ShareCount == 2);
  422. Pfn1->PteAddress = NULL;
  423. InterlockedExchangeAddSizeT (&Pfn1->u2.ShareCount, -1);
  424. if (PteFlushList.Count != MM_MAXIMUM_FLUSH_COUNT) {
  425. PteFlushList.FlushVa[PteFlushList.Count] = VirtualAddress;
  426. PteFlushList.Count += 1;
  427. }
  428. }
  429. VirtualAddress = (PVOID)((PCHAR)VirtualAddress + PAGE_SIZE);
  430. PointerPte += 1;
  431. }
  432. }
  433. ExReleaseCacheAwarePushLockShared (PushLock);
  434. KeLeaveGuardedRegionThread (CurrentThread);
  435. //
  436. // Flush the TB entries for any relevant pages. Note this can be done
  437. // without holding the AWE push lock because the PTEs have already been
  438. // filled so any concurrent (bogus) map/unmap call will see the right
  439. // entries. AND any free of the physical pages will also see the right
  440. // entries (although the free must do a TB flush while holding the AWE
  441. // push lock exclusive to ensure no thread gets to continue using a
  442. // stale mapping to the page being freed prior to the flush below).
  443. //
  444. if (PteFlushList.Count != 0) {
  445. MiFlushPteList (&PteFlushList, FALSE);
  446. }
  447. if (PoolArea != (PVOID)&StackArray[0]) {
  448. ExFreePool (PoolArea);
  449. }
  450. return STATUS_SUCCESS;
  451. ErrorReturn0:
  452. while (FrameList > (PPFN_NUMBER)PoolArea) {
  453. FrameList -= 1;
  454. PageFrameIndex = *FrameList;
  455. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  456. ASSERT (Pfn1->u2.ShareCount == 3);
  457. Pfn1->u2.ShareCount = 1;
  458. }
  459. ErrorReturn:
  460. ExReleaseCacheAwarePushLockShared (PushLock);
  461. KeLeaveGuardedRegionThread (CurrentThread);
  462. if (PoolArea != (PVOID)&StackArray[0]) {
  463. ExFreePool (PoolArea);
  464. }
  465. return Status;
  466. }
  467. NTSTATUS
  468. NtMapUserPhysicalPagesScatter (
  469. IN PVOID *VirtualAddresses,
  470. IN ULONG_PTR NumberOfPages,
  471. IN PULONG_PTR UserPfnArray OPTIONAL
  472. )
  473. /*++
  474. Routine Description:
  475. This function maps the specified nonpaged physical pages into the specified
  476. user address range.
  477. Note no WSLEs are maintained for this range as it is all nonpaged.
  478. Arguments:
  479. VirtualAddresses - Supplies a pointer to an array of user virtual addresses
  480. within UserPhysicalPages Vads. Each array entry is
  481. presumed to map a single page.
  482. NumberOfPages - Supplies the number of pages to map.
  483. UserPfnArray - Supplies a pointer to the page frame numbers to map in.
  484. If this is zero, then the virtual addresses are set to
  485. NO_ACCESS. If the array entry is zero then just the
  486. corresponding virtual address is set to NO_ACCESS.
  487. Return Value:
  488. Various NTSTATUS codes.
  489. --*/
  490. {
  491. ULONG Processor;
  492. ULONG_PTR OldValue;
  493. ULONG_PTR NewValue;
  494. PULONG BitBuffer;
  495. PAWEINFO AweInfo;
  496. PEPROCESS Process;
  497. PMMPTE PointerPte;
  498. PFN_NUMBER PageFrameIndex;
  499. PMMPFN Pfn1;
  500. NTSTATUS Status;
  501. MMPTE_FLUSH_LIST PteFlushList;
  502. PVOID PoolArea;
  503. PVOID PoolAreaEnd;
  504. PVOID *PoolVirtualArea;
  505. PVOID *PoolVirtualAreaBase;
  506. PVOID *PoolVirtualAreaEnd;
  507. PPFN_NUMBER FrameList;
  508. ULONG BitMapIndex;
  509. PVOID StackVirtualArray[SMALL_COPY_STACK_SIZE];
  510. ULONG_PTR StackArray[SMALL_COPY_STACK_SIZE];
  511. MMPTE OriginalPteContents;
  512. MMPTE OldPteContents;
  513. MMPTE NewPteContents0;
  514. MMPTE NewPteContents;
  515. ULONG_PTR NumberOfBytes;
  516. PRTL_BITMAP BitMap;
  517. PMI_PHYSICAL_VIEW PhysicalView;
  518. PMI_PHYSICAL_VIEW LocalPhysicalView;
  519. PMI_PHYSICAL_VIEW NewPhysicalViewHint;
  520. PVOID VirtualAddress;
  521. ULONG SizeOfBitMap;
  522. PEX_PUSH_LOCK PushLock;
  523. PKTHREAD CurrentThread;
  524. TABLE_SEARCH_RESULT SearchResult;
  525. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  526. if (NumberOfPages > (MAXULONG_PTR / PAGE_SIZE)) {
  527. return STATUS_INVALID_PARAMETER_2;
  528. }
  529. //
  530. // Carefully probe and capture the user virtual address array.
  531. //
  532. PoolArea = (PVOID)&StackArray[0];
  533. PoolVirtualAreaBase = (PVOID)&StackVirtualArray[0];
  534. NumberOfBytes = NumberOfPages * sizeof(PVOID);
  535. if (NumberOfPages > SMALL_COPY_STACK_SIZE) {
  536. PoolVirtualAreaBase = ExAllocatePoolWithTag (NonPagedPool,
  537. NumberOfBytes,
  538. 'wRmM');
  539. if (PoolVirtualAreaBase == NULL) {
  540. return STATUS_INSUFFICIENT_RESOURCES;
  541. }
  542. }
  543. PoolVirtualArea = PoolVirtualAreaBase;
  544. try {
  545. ProbeForRead (VirtualAddresses,
  546. NumberOfBytes,
  547. sizeof(PVOID));
  548. RtlCopyMemory (PoolVirtualArea, VirtualAddresses, NumberOfBytes);
  549. } except(EXCEPTION_EXECUTE_HANDLER) {
  550. Status = GetExceptionCode();
  551. goto ErrorReturn;
  552. }
  553. //
  554. // Check for zero pages here so the loops further down can be optimized
  555. // taking into account this can never happen.
  556. //
  557. if (NumberOfPages == 0) {
  558. return STATUS_SUCCESS;
  559. }
  560. //
  561. // Carefully probe and capture the user PFN array.
  562. //
  563. if (ARGUMENT_PRESENT(UserPfnArray)) {
  564. NumberOfBytes = NumberOfPages * sizeof(ULONG_PTR);
  565. if (NumberOfPages > SMALL_COPY_STACK_SIZE) {
  566. PoolArea = ExAllocatePoolWithTag (NonPagedPool,
  567. NumberOfBytes,
  568. 'wRmM');
  569. if (PoolArea == NULL) {
  570. PoolArea = (PVOID)&StackArray[0];
  571. Status = STATUS_INSUFFICIENT_RESOURCES;
  572. goto ErrorReturn;
  573. }
  574. }
  575. //
  576. // Capture the specified page frame numbers.
  577. //
  578. try {
  579. ProbeForRead (UserPfnArray,
  580. NumberOfBytes,
  581. sizeof(ULONG_PTR));
  582. RtlCopyMemory (PoolArea, UserPfnArray, NumberOfBytes);
  583. } except(EXCEPTION_EXECUTE_HANDLER) {
  584. Status = GetExceptionCode();
  585. goto ErrorReturn;
  586. }
  587. }
  588. PoolAreaEnd = (PVOID)((PULONG_PTR)PoolArea + NumberOfPages);
  589. Process = PsGetCurrentProcess();
  590. //
  591. // Initialize as much as possible before acquiring any locks.
  592. //
  593. PageFrameIndex = 0;
  594. PhysicalView = NULL;
  595. PteFlushList.Count = 0;
  596. FrameList = (PPFN_NUMBER)PoolArea;
  597. ASSERT (NumberOfPages != 0);
  598. PoolVirtualAreaEnd = PoolVirtualAreaBase + NumberOfPages;
  599. MI_MAKE_VALID_PTE (NewPteContents0,
  600. PageFrameIndex,
  601. MM_READWRITE,
  602. MiGetPteAddress(PoolVirtualArea[0]));
  603. MI_SET_PTE_DIRTY (NewPteContents0);
  604. Status = STATUS_SUCCESS;
  605. NewPhysicalViewHint = NULL;
  606. //
  607. // A memory barrier is needed to read the EPROCESS AweInfo field
  608. // in order to ensure the writes to the AweInfo structure fields are
  609. // visible in correct order. This avoids the need to acquire any
  610. // stronger synchronization (ie: spinlock/pushlock, etc) in the interest
  611. // of best performance.
  612. //
  613. KeMemoryBarrier ();
  614. AweInfo = (PAWEINFO) Process->AweInfo;
  615. //
  616. // The physical pages bitmap must exist.
  617. //
  618. if ((AweInfo == NULL) || (AweInfo->VadPhysicalPagesBitMap == NULL)) {
  619. Status = STATUS_INVALID_PARAMETER_1;
  620. goto ErrorReturn;
  621. }
  622. //
  623. // Block APCs to prevent recursive pushlock scenarios as this is not
  624. // supported.
  625. //
  626. CurrentThread = KeGetCurrentThread ();
  627. KeEnterGuardedRegionThread (CurrentThread);
  628. //
  629. // Pushlock protection protects insertion/removal of Vads into each process'
  630. // AweVadList. It also protects creation/deletion and adds/removes
  631. // of the VadPhysicalPagesBitMap. Finally, it protects the PFN
  632. // modifications for pages in the bitmap.
  633. //
  634. PushLock = ExAcquireCacheAwarePushLockShared (AweInfo->PushLock);
  635. BitMap = AweInfo->VadPhysicalPagesBitMap;
  636. ASSERT (BitMap != NULL);
  637. //
  638. // Note that the PFN lock is not needed to traverse this list (even though
  639. // MmProbeAndLockPages uses it), because the pushlock has been acquired.
  640. //
  641. Processor = KeGetCurrentProcessorNumber ();
  642. LocalPhysicalView = AweInfo->PhysicalViewHint[Processor];
  643. if ((LocalPhysicalView != NULL) &&
  644. ((LocalPhysicalView->u.LongFlags & MI_PHYSICAL_VIEW_AWE) == 0)) {
  645. LocalPhysicalView = NULL;
  646. }
  647. do {
  648. VirtualAddress = *PoolVirtualArea;
  649. //
  650. // First check the last physical view this processor used.
  651. //
  652. if (LocalPhysicalView != NULL) {
  653. ASSERT (LocalPhysicalView->u.LongFlags & MI_PHYSICAL_VIEW_AWE);
  654. ASSERT (LocalPhysicalView->Vad->u.VadFlags.UserPhysicalPages == 1);
  655. if ((VirtualAddress >= MI_VPN_TO_VA (LocalPhysicalView->StartingVpn)) &&
  656. (VirtualAddress <= MI_VPN_TO_VA_ENDING (LocalPhysicalView->EndingVpn))) {
  657. //
  658. // The virtual address is within the hint so it's good.
  659. //
  660. PoolVirtualArea += 1;
  661. NewPhysicalViewHint = LocalPhysicalView;
  662. continue;
  663. }
  664. }
  665. //
  666. // Check the last physical view this loop used.
  667. //
  668. if (PhysicalView != NULL) {
  669. ASSERT (PhysicalView->u.LongFlags & MI_PHYSICAL_VIEW_AWE);
  670. ASSERT (PhysicalView->Vad->u.VadFlags.UserPhysicalPages == 1);
  671. if ((VirtualAddress >= MI_VPN_TO_VA (PhysicalView->StartingVpn)) &&
  672. (VirtualAddress <= MI_VPN_TO_VA_ENDING (PhysicalView->EndingVpn))) {
  673. //
  674. // The virtual address is within the hint so it's good.
  675. //
  676. PoolVirtualArea += 1;
  677. NewPhysicalViewHint = PhysicalView;
  678. continue;
  679. }
  680. }
  681. //
  682. // Lookup the element and save the result.
  683. //
  684. // Note that the push lock is sufficient to traverse this list.
  685. //
  686. SearchResult = MiFindNodeOrParent (&AweInfo->AweVadRoot,
  687. MI_VA_TO_VPN (VirtualAddress),
  688. (PMMADDRESS_NODE *) &PhysicalView);
  689. if ((SearchResult == TableFoundNode) &&
  690. (PhysicalView->u.LongFlags & MI_PHYSICAL_VIEW_AWE) &&
  691. (VirtualAddress >= MI_VPN_TO_VA (PhysicalView->StartingVpn)) &&
  692. (VirtualAddress <= MI_VPN_TO_VA_ENDING (PhysicalView->EndingVpn))) {
  693. NewPhysicalViewHint = PhysicalView;
  694. }
  695. else {
  696. //
  697. // No virtual address is reserved at the specified base address,
  698. // return an error.
  699. //
  700. ExReleaseCacheAwarePushLockShared (PushLock);
  701. KeLeaveGuardedRegionThread (CurrentThread);
  702. Status = STATUS_INVALID_PARAMETER_1;
  703. goto ErrorReturn;
  704. }
  705. PoolVirtualArea += 1;
  706. } while (PoolVirtualArea < PoolVirtualAreaEnd);
  707. ASSERT (NewPhysicalViewHint != NULL);
  708. if (AweInfo->PhysicalViewHint[Processor] != NewPhysicalViewHint) {
  709. AweInfo->PhysicalViewHint[Processor] = NewPhysicalViewHint;
  710. }
  711. //
  712. // Ensure the PFN element corresponding to each specified page is owned
  713. // by the specified VAD.
  714. //
  715. // Since this ownership can only be changed while holding this process'
  716. // working set lock, the PFN can be scanned here without holding the PFN
  717. // lock.
  718. //
  719. // Note the PFN lock is not needed because any race with MmProbeAndLockPages
  720. // can only result in the I/O going to the old page or the new page.
  721. // If the user breaks the rules, the PFN database (and any pages being
  722. // windowed here) are still protected because of the reference counts
  723. // on the pages with inprogress I/O. This is possible because NO pages
  724. // are actually freed here - they are just windowed.
  725. //
  726. PoolVirtualArea = PoolVirtualAreaBase;
  727. if (ARGUMENT_PRESENT(UserPfnArray)) {
  728. //
  729. // By keeping the PFN bitmap in the process (instead of in the PFN
  730. // database itself), a few benefits are realized:
  731. //
  732. // 1. No need to acquire the PFN lock here.
  733. // 2. Faster handling of PFN databases with holes.
  734. // 3. Transparent support for dynamic PFN database growth.
  735. // 4. Less nonpaged memory is used (for the bitmap vs adding a
  736. // field to the PFN) on systems with no unused pack space in
  737. // the PFN database.
  738. //
  739. //
  740. // The first pass here ensures all the frames are secure.
  741. //
  742. //
  743. // N.B. This implies that PFN_NUMBER is always ULONG_PTR in width
  744. // as PFN_NUMBER is not exposed to application code today.
  745. //
  746. SizeOfBitMap = BitMap->SizeOfBitMap;
  747. BitBuffer = BitMap->Buffer;
  748. do {
  749. PageFrameIndex = *FrameList;
  750. //
  751. // Zero entries are treated as a command to unmap.
  752. //
  753. if (PageFrameIndex == 0) {
  754. FrameList += 1;
  755. continue;
  756. }
  757. //
  758. // Frames past the end of the bitmap are not allowed.
  759. //
  760. BitMapIndex = MI_FRAME_TO_BITMAP_INDEX(PageFrameIndex);
  761. #if defined (_WIN64)
  762. //
  763. // Ensure the frame is a 32-bit number.
  764. //
  765. if (BitMapIndex != PageFrameIndex) {
  766. Status = STATUS_CONFLICTING_ADDRESSES;
  767. goto ErrorReturn0;
  768. }
  769. #endif
  770. if (BitMapIndex >= SizeOfBitMap) {
  771. Status = STATUS_CONFLICTING_ADDRESSES;
  772. goto ErrorReturn0;
  773. }
  774. //
  775. // Frames not in the bitmap are not allowed.
  776. //
  777. if (MI_CHECK_BIT (BitBuffer, BitMapIndex) == 0) {
  778. Status = STATUS_CONFLICTING_ADDRESSES;
  779. goto ErrorReturn0;
  780. }
  781. //
  782. // The frame must not be already mapped anywhere.
  783. // Or be passed in twice in different spots in the array.
  784. //
  785. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  786. ASSERT (MI_PFN_IS_AWE (Pfn1));
  787. OldValue = Pfn1->u2.ShareCount;
  788. if (OldValue != 1) {
  789. Status = STATUS_INVALID_PARAMETER_3;
  790. goto ErrorReturn0;
  791. }
  792. NewValue = OldValue + 2;
  793. //
  794. // Mark the frame as "about to be mapped".
  795. //
  796. #if defined (_WIN64)
  797. OldValue = InterlockedCompareExchange64 ((PLONGLONG)&Pfn1->u2.ShareCount,
  798. (LONGLONG)NewValue,
  799. (LONGLONG)OldValue);
  800. #else
  801. OldValue = InterlockedCompareExchange ((PLONG)&Pfn1->u2.ShareCount,
  802. NewValue,
  803. OldValue);
  804. #endif
  805. if (OldValue != 1) {
  806. Status = STATUS_INVALID_PARAMETER_3;
  807. goto ErrorReturn0;
  808. }
  809. ASSERT (MI_PFN_IS_AWE (Pfn1));
  810. ASSERT (Pfn1->u2.ShareCount == 3);
  811. ASSERT ((PageFrameIndex >= LOWEST_USABLE_PHYSICAL_PAGE) ||
  812. (MiUsingLowPagesForAwe == TRUE));
  813. FrameList += 1;
  814. } while (FrameList < (PPFN_NUMBER) PoolAreaEnd);
  815. //
  816. // This pass actually inserts them all into the page table pages and
  817. // the TBs now that we know the frames are good. Check the PTEs and
  818. // PFNs carefully as a malicious user may issue more than one remap
  819. // request for all or portions of the same region simultaneously.
  820. //
  821. FrameList = (PPFN_NUMBER)PoolArea;
  822. do {
  823. PageFrameIndex = *FrameList;
  824. if (PageFrameIndex != 0) {
  825. NewPteContents = NewPteContents0;
  826. NewPteContents.u.Hard.PageFrameNumber = PageFrameIndex;
  827. }
  828. else {
  829. NewPteContents.u.Long = ZeroPte.u.Long;
  830. }
  831. VirtualAddress = *PoolVirtualArea;
  832. PoolVirtualArea += 1;
  833. PointerPte = MiGetPteAddress (VirtualAddress);
  834. do {
  835. OldPteContents = *PointerPte;
  836. OriginalPteContents.u.Long = InterlockedCompareExchangePte (
  837. PointerPte,
  838. NewPteContents.u.Long,
  839. OldPteContents.u.Long);
  840. } while (OriginalPteContents.u.Long != OldPteContents.u.Long);
  841. //
  842. // The PTE is now pointing at the new frame. Note that another
  843. // thread can immediately access the page contents via this PTE
  844. // even though they're not supposed to until this API returns.
  845. // Thus, the page frames are handled carefully so that malicious
  846. // apps cannot corrupt frames they don't really still or yet own.
  847. //
  848. if (OldPteContents.u.Hard.Valid == 1) {
  849. //
  850. // The old frame was mapped so the TB entry must be flushed.
  851. // Note the app could maliciously dirty data in the old frame
  852. // until the TB flush completes, so don't allow frame reuse
  853. // till then (although allowing remapping within this process
  854. // is ok).
  855. //
  856. Pfn1 = MI_PFN_ELEMENT (OldPteContents.u.Hard.PageFrameNumber);
  857. ASSERT (Pfn1->PteAddress != NULL);
  858. ASSERT (Pfn1->u2.ShareCount == 2);
  859. ASSERT (MI_PFN_IS_AWE (Pfn1));
  860. Pfn1->PteAddress = NULL;
  861. InterlockedExchangeAddSizeT (&Pfn1->u2.ShareCount, -1);
  862. if (PteFlushList.Count != MM_MAXIMUM_FLUSH_COUNT) {
  863. PteFlushList.FlushVa[PteFlushList.Count] = VirtualAddress;
  864. PteFlushList.Count += 1;
  865. }
  866. }
  867. if (PageFrameIndex != 0) {
  868. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  869. ASSERT (Pfn1->PteAddress == NULL);
  870. ASSERT (Pfn1->u2.ShareCount == 3);
  871. Pfn1->PteAddress = PointerPte;
  872. InterlockedExchangeAddSizeT (&Pfn1->u2.ShareCount, -1);
  873. }
  874. FrameList += 1;
  875. } while (FrameList < (PPFN_NUMBER) PoolAreaEnd);
  876. }
  877. else {
  878. //
  879. // Set the specified virtual address range to no access.
  880. //
  881. do {
  882. VirtualAddress = *PoolVirtualArea;
  883. PointerPte = MiGetPteAddress (VirtualAddress);
  884. do {
  885. OldPteContents = *PointerPte;
  886. OriginalPteContents.u.Long = InterlockedCompareExchangePte (
  887. PointerPte,
  888. ZeroPte.u.Long,
  889. OldPteContents.u.Long);
  890. } while (OriginalPteContents.u.Long != OldPteContents.u.Long);
  891. //
  892. // The PTE is now zeroed. Note that another thread can still
  893. // Note the app could maliciously dirty data in the old frame
  894. // until the TB flush completes, so don't allow frame reuse
  895. // till then (although allowing remapping within this process
  896. // is ok) to prevent the app from corrupting frames it doesn't
  897. // really still own.
  898. //
  899. if (OldPteContents.u.Hard.Valid == 1) {
  900. //
  901. // The old frame was mapped so the TB entry must be flushed.
  902. //
  903. Pfn1 = MI_PFN_ELEMENT (OldPteContents.u.Hard.PageFrameNumber);
  904. ASSERT (Pfn1->PteAddress != NULL);
  905. ASSERT (Pfn1->u2.ShareCount == 2);
  906. ASSERT (MI_PFN_IS_AWE (Pfn1));
  907. Pfn1->PteAddress = NULL;
  908. InterlockedExchangeAddSizeT (&Pfn1->u2.ShareCount, -1);
  909. if (PteFlushList.Count != MM_MAXIMUM_FLUSH_COUNT) {
  910. PteFlushList.FlushVa[PteFlushList.Count] = VirtualAddress;
  911. PteFlushList.Count += 1;
  912. }
  913. }
  914. PoolVirtualArea += 1;
  915. } while (PoolVirtualArea < PoolVirtualAreaEnd);
  916. }
  917. ExReleaseCacheAwarePushLockShared (PushLock);
  918. KeLeaveGuardedRegionThread (CurrentThread);
  919. //
  920. // Flush the TB entries for any relevant pages. Note this can be done
  921. // without holding the AWE push lock because the PTEs have already been
  922. // filled so any concurrent (bogus) map/unmap call will see the right
  923. // entries. AND any free of the physical pages will also see the right
  924. // entries (although the free must do a TB flush while holding the AWE
  925. // push lock exclusive to ensure no thread gets to continue using a
  926. // stale mapping to the page being freed prior to the flush below).
  927. //
  928. if (PteFlushList.Count != 0) {
  929. MiFlushPteList (&PteFlushList, FALSE);
  930. }
  931. ErrorReturn:
  932. if (PoolArea != (PVOID)&StackArray[0]) {
  933. ExFreePool (PoolArea);
  934. }
  935. if (PoolVirtualAreaBase != (PVOID)&StackVirtualArray[0]) {
  936. ExFreePool (PoolVirtualAreaBase);
  937. }
  938. return Status;
  939. ErrorReturn0:
  940. while (FrameList > (PPFN_NUMBER)PoolArea) {
  941. FrameList -= 1;
  942. PageFrameIndex = *FrameList;
  943. if (PageFrameIndex != 0) {
  944. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  945. ASSERT (Pfn1->u2.ShareCount == 3);
  946. ASSERT (MI_PFN_IS_AWE (Pfn1));
  947. InterlockedExchangeAddSizeT (&Pfn1->u2.ShareCount, -2);
  948. }
  949. }
  950. ExReleaseCacheAwarePushLockShared (PushLock);
  951. KeLeaveGuardedRegionThread (CurrentThread);
  952. goto ErrorReturn;
  953. }
  954. PVOID
  955. MiAllocateAweInfo (
  956. VOID
  957. )
  958. /*++
  959. Routine Description:
  960. This function allocates an AWE structure for the current process. Note
  961. this structure is never destroyed while the process is alive in order to
  962. allow various checks to occur lock free.
  963. Arguments:
  964. None.
  965. Return Value:
  966. A non-NULL AweInfo pointer on success, NULL on failure.
  967. Environment:
  968. Kernel mode, PASSIVE_LEVEL, no locks held.
  969. --*/
  970. {
  971. PAWEINFO AweInfo;
  972. PEPROCESS Process;
  973. AweInfo = ExAllocatePoolWithTag (NonPagedPool,
  974. sizeof (AWEINFO),
  975. 'wAmM');
  976. if (AweInfo != NULL) {
  977. AweInfo->VadPhysicalPagesBitMap = NULL;
  978. AweInfo->VadPhysicalPages = 0;
  979. AweInfo->VadPhysicalPagesLimit = 0;
  980. RtlZeroMemory (&AweInfo->PhysicalViewHint,
  981. MAXIMUM_PROCESSORS * sizeof(PMI_PHYSICAL_VIEW));
  982. RtlZeroMemory (&AweInfo->AweVadRoot,
  983. sizeof(MM_AVL_TABLE));
  984. ASSERT (AweInfo->AweVadRoot.NumberGenericTableElements == 0);
  985. AweInfo->AweVadRoot.BalancedRoot.u1.Parent = &AweInfo->AweVadRoot.BalancedRoot;
  986. AweInfo->PushLock = ExAllocateCacheAwarePushLock ();
  987. if (AweInfo->PushLock == NULL) {
  988. ExFreePool (AweInfo);
  989. return NULL;
  990. }
  991. Process = PsGetCurrentProcess();
  992. //
  993. // A memory barrier is needed to ensure the writes initializing the
  994. // AweInfo fields are visible prior to setting the EPROCESS AweInfo
  995. // pointer. This is because the reads from these fields are done
  996. // lock free for improved performance. There is no need to explicitly
  997. // add one here as the InterlockedCompare already has one.
  998. //
  999. if (InterlockedCompareExchangePointer (&Process->AweInfo,
  1000. AweInfo,
  1001. NULL) != NULL) {
  1002. ExFreeCacheAwarePushLock (AweInfo->PushLock);
  1003. ExFreePool (AweInfo);
  1004. AweInfo = Process->AweInfo;
  1005. ASSERT (AweInfo != NULL);
  1006. }
  1007. }
  1008. return (PVOID) AweInfo;
  1009. }
  1010. NTSTATUS
  1011. NtAllocateUserPhysicalPages (
  1012. IN HANDLE ProcessHandle,
  1013. IN OUT PULONG_PTR NumberOfPages,
  1014. OUT PULONG_PTR UserPfnArray
  1015. )
  1016. /*++
  1017. Routine Description:
  1018. This function allocates nonpaged physical pages for the specified
  1019. subject process.
  1020. No WSLEs are maintained for this range.
  1021. The caller must check the NumberOfPages returned to determine how many
  1022. pages were actually allocated (this number may be less than the requested
  1023. amount).
  1024. On success, the user array is filled with the allocated physical page
  1025. frame numbers (only up to the returned NumberOfPages is filled in).
  1026. No PTEs are filled here - this gives the application the flexibility
  1027. to order the address space with no metadata structure imposed by the Mm.
  1028. Applications do this via NtMapUserPhysicalPages - ie:
  1029. - Each physical page allocated is set in the process's bitmap.
  1030. This provides remap, free and unmap a way to validate and rundown
  1031. these frames.
  1032. Unmaps may result in a walk of the entire bitmap, but that's ok as
  1033. unmaps should be less frequent. The win is it saves us from
  1034. using up system virtual address space to manage these frames.
  1035. - Note that the same physical frame may NOT be mapped at two different
  1036. virtual addresses in the process. This makes frees and unmaps
  1037. substantially faster as no checks for aliasing need be performed.
  1038. Arguments:
  1039. ProcessHandle - Supplies an open handle to a process object.
  1040. NumberOfPages - Supplies a pointer to a variable that supplies the
  1041. desired size in pages of the allocation. This is filled
  1042. with the actual number of pages allocated.
  1043. UserPfnArray - Supplies a pointer to user memory to store the allocated
  1044. frame numbers into.
  1045. Return Value:
  1046. Various NTSTATUS codes.
  1047. --*/
  1048. {
  1049. PAWEINFO AweInfo;
  1050. ULONG i;
  1051. KAPC_STATE ApcState;
  1052. PEPROCESS Process;
  1053. KPROCESSOR_MODE PreviousMode;
  1054. NTSTATUS Status;
  1055. LOGICAL Attached;
  1056. LOGICAL WsHeld;
  1057. ULONG_PTR CapturedNumberOfPages;
  1058. ULONG_PTR AllocatedPages;
  1059. ULONG_PTR MdlRequestInPages;
  1060. ULONG_PTR TotalAllocatedPages;
  1061. PMDL MemoryDescriptorList;
  1062. PMDL MemoryDescriptorList2;
  1063. PMDL MemoryDescriptorHead;
  1064. PPFN_NUMBER MdlPage;
  1065. PRTL_BITMAP BitMap;
  1066. ULONG BitMapSize;
  1067. ULONG BitMapIndex;
  1068. PMMPFN Pfn1;
  1069. PHYSICAL_ADDRESS LowAddress;
  1070. PHYSICAL_ADDRESS HighAddress;
  1071. PHYSICAL_ADDRESS SkipBytes;
  1072. ULONG SizeOfBitMap;
  1073. PFN_NUMBER HighestPossiblePhysicalPage;
  1074. PETHREAD CurrentThread;
  1075. PEPROCESS CurrentProcess;
  1076. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  1077. Attached = FALSE;
  1078. WsHeld = FALSE;
  1079. //
  1080. // Check the allocation type field.
  1081. //
  1082. CurrentThread = PsGetCurrentThread ();
  1083. CurrentProcess = PsGetCurrentProcessByThread (CurrentThread);
  1084. PreviousMode = KeGetPreviousModeByThread(&CurrentThread->Tcb);
  1085. //
  1086. // Establish an exception handler, probe the specified addresses
  1087. // for write access and capture the initial values.
  1088. //
  1089. try {
  1090. //
  1091. // Capture the number of pages.
  1092. //
  1093. if (PreviousMode != KernelMode) {
  1094. ProbeForWritePointer (NumberOfPages);
  1095. CapturedNumberOfPages = *NumberOfPages;
  1096. if (CapturedNumberOfPages == 0) {
  1097. return STATUS_SUCCESS;
  1098. }
  1099. if (CapturedNumberOfPages > (MAXULONG_PTR / sizeof(ULONG_PTR))) {
  1100. return STATUS_INVALID_PARAMETER_2;
  1101. }
  1102. ProbeForWrite (UserPfnArray,
  1103. CapturedNumberOfPages * sizeof (ULONG_PTR),
  1104. sizeof(PULONG_PTR));
  1105. }
  1106. else {
  1107. CapturedNumberOfPages = *NumberOfPages;
  1108. }
  1109. } except (ExSystemExceptionFilter()) {
  1110. //
  1111. // If an exception occurs during the probe or capture
  1112. // of the initial values, then handle the exception and
  1113. // return the exception code as the status value.
  1114. //
  1115. return GetExceptionCode();
  1116. }
  1117. //
  1118. // Reference the specified process handle for VM_OPERATION access.
  1119. //
  1120. if (ProcessHandle == NtCurrentProcess()) {
  1121. Process = CurrentProcess;
  1122. }
  1123. else {
  1124. Status = ObReferenceObjectByHandle ( ProcessHandle,
  1125. PROCESS_VM_OPERATION,
  1126. PsProcessType,
  1127. PreviousMode,
  1128. (PVOID *)&Process,
  1129. NULL );
  1130. if (!NT_SUCCESS(Status)) {
  1131. return Status;
  1132. }
  1133. }
  1134. //
  1135. // LockMemory privilege is required.
  1136. //
  1137. if (!SeSinglePrivilegeCheck (SeLockMemoryPrivilege, PreviousMode)) {
  1138. if (ProcessHandle != NtCurrentProcess()) {
  1139. ObDereferenceObject (Process);
  1140. }
  1141. return STATUS_PRIVILEGE_NOT_HELD;
  1142. }
  1143. //
  1144. // If the specified process is not the current process, attach
  1145. // to the specified process.
  1146. //
  1147. if (CurrentProcess != Process) {
  1148. KeStackAttachProcess (&Process->Pcb, &ApcState);
  1149. Attached = TRUE;
  1150. }
  1151. BitMapSize = 0;
  1152. TotalAllocatedPages = 0;
  1153. //
  1154. // Get the working set mutex to synchronize. This also blocks APCs so
  1155. // an APC which takes a page fault does not corrupt various structures.
  1156. //
  1157. WsHeld = TRUE;
  1158. LOCK_WS (Process);
  1159. //
  1160. // Make sure the address space was not deleted. If so, return an error.
  1161. //
  1162. if (Process->Flags & PS_PROCESS_FLAGS_VM_DELETED) {
  1163. Status = STATUS_PROCESS_IS_TERMINATING;
  1164. goto ErrorReturn;
  1165. }
  1166. AweInfo = Process->AweInfo;
  1167. if (AweInfo == NULL) {
  1168. AweInfo = (PAWEINFO) MiAllocateAweInfo ();
  1169. if (AweInfo == NULL) {
  1170. Status = STATUS_INSUFFICIENT_RESOURCES;
  1171. goto ErrorReturn;
  1172. }
  1173. ASSERT (AweInfo == Process->AweInfo);
  1174. }
  1175. if (AweInfo->VadPhysicalPagesLimit != 0) {
  1176. if (AweInfo->VadPhysicalPages >= AweInfo->VadPhysicalPagesLimit) {
  1177. Status = STATUS_COMMITMENT_LIMIT;
  1178. goto ErrorReturn;
  1179. }
  1180. if (CapturedNumberOfPages > AweInfo->VadPhysicalPagesLimit - AweInfo->VadPhysicalPages) {
  1181. CapturedNumberOfPages = AweInfo->VadPhysicalPagesLimit - AweInfo->VadPhysicalPages;
  1182. }
  1183. }
  1184. //
  1185. // Create the physical pages bitmap if it does not already exist.
  1186. //
  1187. BitMap = AweInfo->VadPhysicalPagesBitMap;
  1188. if (BitMap == NULL) {
  1189. HighestPossiblePhysicalPage = MmHighestPossiblePhysicalPage;
  1190. #if defined (_WIN64)
  1191. //
  1192. // Force a 32-bit maximum on any page allocation because the bitmap
  1193. // package is currently 32-bit.
  1194. //
  1195. if (HighestPossiblePhysicalPage + 1 >= _4gb) {
  1196. HighestPossiblePhysicalPage = _4gb - 2;
  1197. }
  1198. #endif
  1199. BitMapSize = sizeof(RTL_BITMAP) + (ULONG)((((HighestPossiblePhysicalPage + 1) + 31) / 32) * 4);
  1200. BitMap = ExAllocatePoolWithTag (NonPagedPool, BitMapSize, 'LdaV');
  1201. if (BitMap == NULL) {
  1202. Status = STATUS_INSUFFICIENT_RESOURCES;
  1203. goto ErrorReturn;
  1204. }
  1205. RtlInitializeBitMap (BitMap,
  1206. (PULONG)(BitMap + 1),
  1207. (ULONG)(HighestPossiblePhysicalPage + 1));
  1208. RtlClearAllBits (BitMap);
  1209. //
  1210. // Charge quota for the nonpaged pool for the bitmap. This is
  1211. // done here rather than by using ExAllocatePoolWithQuota
  1212. // so the process object is not referenced by the quota charge.
  1213. //
  1214. Status = PsChargeProcessNonPagedPoolQuota (Process, BitMapSize);
  1215. if (!NT_SUCCESS(Status)) {
  1216. UNLOCK_WS (Process);
  1217. WsHeld = FALSE;
  1218. ExFreePool (BitMap);
  1219. goto ErrorReturn;
  1220. }
  1221. AweInfo->VadPhysicalPagesBitMap = BitMap;
  1222. UNLOCK_WS (Process);
  1223. WsHeld = FALSE;
  1224. SizeOfBitMap = BitMap->SizeOfBitMap;
  1225. }
  1226. else {
  1227. SizeOfBitMap = AweInfo->VadPhysicalPagesBitMap->SizeOfBitMap;
  1228. UNLOCK_WS (Process);
  1229. WsHeld = FALSE;
  1230. }
  1231. AllocatedPages = 0;
  1232. MemoryDescriptorHead = NULL;
  1233. SkipBytes.QuadPart = 0;
  1234. //
  1235. // Don't use the low 16mb of memory so that at least some low pages are left
  1236. // for 32/24-bit device drivers. Just under 4gb is the maximum allocation
  1237. // per MDL so the ByteCount field does not overflow.
  1238. //
  1239. HighAddress.QuadPart = ((ULONGLONG)(SizeOfBitMap - 1)) << PAGE_SHIFT;
  1240. LowAddress.QuadPart = LOWEST_USABLE_PHYSICAL_ADDRESS;
  1241. if (LowAddress.QuadPart >= HighAddress.QuadPart) {
  1242. //
  1243. // If there's less than 16mb of RAM, just take pages from anywhere.
  1244. //
  1245. #if DBG
  1246. MiUsingLowPagesForAwe = TRUE;
  1247. #endif
  1248. LowAddress.QuadPart = 0;
  1249. }
  1250. Status = STATUS_SUCCESS;
  1251. do {
  1252. MdlRequestInPages = CapturedNumberOfPages - TotalAllocatedPages;
  1253. if (MdlRequestInPages > (ULONG_PTR)((MAXULONG - PAGE_SIZE) >> PAGE_SHIFT)) {
  1254. MdlRequestInPages = (ULONG_PTR)((MAXULONG - PAGE_SIZE) >> PAGE_SHIFT);
  1255. }
  1256. //
  1257. // Note this allocation returns zeroed pages.
  1258. //
  1259. MemoryDescriptorList = MmAllocatePagesForMdl (LowAddress,
  1260. HighAddress,
  1261. SkipBytes,
  1262. MdlRequestInPages << PAGE_SHIFT);
  1263. if (MemoryDescriptorList == NULL) {
  1264. //
  1265. // No (more) pages available. If this becomes a common situation,
  1266. // all the working sets could be flushed here.
  1267. //
  1268. // Make do with what we've gotten so far.
  1269. //
  1270. if (TotalAllocatedPages == 0) {
  1271. Status = STATUS_INSUFFICIENT_RESOURCES;
  1272. }
  1273. break;
  1274. }
  1275. AllocatedPages = MemoryDescriptorList->ByteCount >> PAGE_SHIFT;
  1276. //
  1277. // The per-process WS lock guards updates to AweInfo->VadPhysicalPages.
  1278. //
  1279. LOCK_WS (Process);
  1280. //
  1281. // Make sure the address space was not deleted. If so, return an error.
  1282. // Note any prior MDLs allocated in this loop have already had their
  1283. // pages freed by the exiting thread, but this thread is still
  1284. // responsible for freeing the pool containing the MDLs themselves.
  1285. //
  1286. if (Process->Flags & PS_PROCESS_FLAGS_VM_DELETED) {
  1287. UNLOCK_WS (Process);
  1288. WsHeld = FALSE;
  1289. MmFreePagesFromMdl (MemoryDescriptorList);
  1290. ExFreePool (MemoryDescriptorList);
  1291. Status = STATUS_PROCESS_IS_TERMINATING;
  1292. break;
  1293. }
  1294. //
  1295. // Recheck the process and job limits as they may have changed
  1296. // when the working set mutex was released above.
  1297. //
  1298. if (AweInfo->VadPhysicalPagesLimit != 0) {
  1299. if ((AweInfo->VadPhysicalPages >= AweInfo->VadPhysicalPagesLimit) ||
  1300. (AllocatedPages > AweInfo->VadPhysicalPagesLimit - AweInfo->VadPhysicalPages)) {
  1301. UNLOCK_WS (Process);
  1302. WsHeld = FALSE;
  1303. MmFreePagesFromMdl (MemoryDescriptorList);
  1304. ExFreePool (MemoryDescriptorList);
  1305. if (TotalAllocatedPages == 0) {
  1306. Status = STATUS_COMMITMENT_LIMIT;
  1307. }
  1308. break;
  1309. }
  1310. }
  1311. if (Process->JobStatus & PS_JOB_STATUS_REPORT_PHYSICAL_PAGE_CHANGES) {
  1312. if (PsChangeJobMemoryUsage (PS_JOB_STATUS_REPORT_PHYSICAL_PAGE_CHANGES,
  1313. AllocatedPages) == FALSE) {
  1314. UNLOCK_WS (Process);
  1315. WsHeld = FALSE;
  1316. MmFreePagesFromMdl (MemoryDescriptorList);
  1317. ExFreePool (MemoryDescriptorList);
  1318. if (TotalAllocatedPages == 0) {
  1319. Status = STATUS_COMMITMENT_LIMIT;
  1320. }
  1321. break;
  1322. }
  1323. }
  1324. ASSERT ((AweInfo->VadPhysicalPages + AllocatedPages <= AweInfo->VadPhysicalPagesLimit) || (AweInfo->VadPhysicalPagesLimit == 0));
  1325. AweInfo->VadPhysicalPages += AllocatedPages;
  1326. //
  1327. // Update the allocation bitmap for each allocated frame.
  1328. // Note the PFN lock is not needed to modify the PteAddress below.
  1329. // In fact, even the AWE push lock is not needed as these pages
  1330. // are brand new.
  1331. //
  1332. MdlPage = (PPFN_NUMBER)(MemoryDescriptorList + 1);
  1333. for (i = 0; i < AllocatedPages; i += 1) {
  1334. ASSERT ((*MdlPage >= LOWEST_USABLE_PHYSICAL_PAGE) ||
  1335. (MiUsingLowPagesForAwe == TRUE));
  1336. BitMapIndex = MI_FRAME_TO_BITMAP_INDEX(*MdlPage);
  1337. ASSERT (BitMapIndex < BitMap->SizeOfBitMap);
  1338. ASSERT (MI_CHECK_BIT (BitMap->Buffer, BitMapIndex) == 0);
  1339. ASSERT64 (*MdlPage < _4gb);
  1340. Pfn1 = MI_PFN_ELEMENT (*MdlPage);
  1341. ASSERT (MI_PFN_IS_AWE (Pfn1));
  1342. Pfn1->PteAddress = NULL;
  1343. Pfn1->AweReferenceCount = 1;
  1344. ASSERT (Pfn1->u4.AweAllocation == 0);
  1345. Pfn1->u4.AweAllocation = 1;
  1346. ASSERT (Pfn1->u2.ShareCount == 1);
  1347. //
  1348. // Once this bit is set (and the mutex released below), a rogue
  1349. // thread that is passing random frame numbers to
  1350. // NtFreeUserPhysicalPages can free this frame. This means NO
  1351. // references can be made to it by this routine after this point
  1352. // without first re-checking the bitmap.
  1353. //
  1354. MI_SET_BIT (BitMap->Buffer, BitMapIndex);
  1355. MdlPage += 1;
  1356. }
  1357. UNLOCK_WS (Process);
  1358. MemoryDescriptorList->Next = MemoryDescriptorHead;
  1359. MemoryDescriptorHead = MemoryDescriptorList;
  1360. InterlockedExchangeAddSizeT (&MmVadPhysicalPages, AllocatedPages);
  1361. TotalAllocatedPages += AllocatedPages;
  1362. ASSERT (TotalAllocatedPages <= CapturedNumberOfPages);
  1363. if (TotalAllocatedPages == CapturedNumberOfPages) {
  1364. break;
  1365. }
  1366. //
  1367. // Try the same memory range again - there might be more pages
  1368. // left in it that can be claimed as a truncated MDL had to be
  1369. // used for the last request.
  1370. //
  1371. } while (TRUE);
  1372. WsHeld = FALSE;
  1373. if (Attached == TRUE) {
  1374. KeUnstackDetachProcess (&ApcState);
  1375. Attached = FALSE;
  1376. }
  1377. //
  1378. // Establish an exception handler and carefully write out the
  1379. // number of pages and the frame numbers.
  1380. //
  1381. try {
  1382. ASSERT (TotalAllocatedPages <= CapturedNumberOfPages);
  1383. //
  1384. // Deliberately only write out the number of pages if the operation
  1385. // succeeded. This is because this was the behavior on Windows 2000.
  1386. // And an app may be calling like this:
  1387. //
  1388. // PagesNo = SOMETHING_BIG;
  1389. //
  1390. // do
  1391. // {
  1392. // Success = AllocateUserPhysicalPages (&PagesNo);
  1393. //
  1394. // if (Success == TRUE) {
  1395. // break;
  1396. // }
  1397. //
  1398. // PagesNo = PagesNo / 2;
  1399. // continue;
  1400. // } while (PagesNo > 0);
  1401. //
  1402. if (NT_SUCCESS (Status)) {
  1403. *NumberOfPages = TotalAllocatedPages;
  1404. }
  1405. MemoryDescriptorList = MemoryDescriptorHead;
  1406. while (MemoryDescriptorList != NULL) {
  1407. MdlPage = (PPFN_NUMBER)(MemoryDescriptorList + 1);
  1408. AllocatedPages = MemoryDescriptorList->ByteCount >> PAGE_SHIFT;
  1409. for (i = 0; i < AllocatedPages; i += 1) {
  1410. *UserPfnArray = *(PULONG_PTR)MdlPage;
  1411. #if 0
  1412. //
  1413. // The bitmap entry for this page was set above, so a rogue
  1414. // thread that is passing random frame numbers to
  1415. // NtFreeUserPhysicalPages may have already freed this frame.
  1416. // This means the ASSERT below cannot be made without first
  1417. // re-checking the bitmap to see if the page is still in it.
  1418. // It's not worth reacquiring the mutex just for this, so turn
  1419. // the assert off for now.
  1420. //
  1421. ASSERT (MI_PFN_ELEMENT(*MdlPage)->u2.ShareCount == 1);
  1422. #endif
  1423. UserPfnArray += 1;
  1424. MdlPage += 1;
  1425. }
  1426. MemoryDescriptorList = MemoryDescriptorList->Next;
  1427. }
  1428. } except (ExSystemExceptionFilter()) {
  1429. //
  1430. // If anything went wrong communicating the pages back to the user
  1431. // then the user has really hurt himself because these addresses
  1432. // passed the probe tests at the beginning of the service. Rather
  1433. // than carrying around extensive recovery code, just return back
  1434. // success as this scenario is the same as if the user scribbled
  1435. // over the output parameters after the service returned anyway.
  1436. // You can't stop someone who's determined to lose their values !
  1437. //
  1438. // Fall through...
  1439. //
  1440. }
  1441. //
  1442. // Free the space consumed by the MDLs now that the page frame numbers
  1443. // have been saved in the bitmap and copied to the user.
  1444. //
  1445. MemoryDescriptorList = MemoryDescriptorHead;
  1446. while (MemoryDescriptorList != NULL) {
  1447. MemoryDescriptorList2 = MemoryDescriptorList->Next;
  1448. ExFreePool (MemoryDescriptorList);
  1449. MemoryDescriptorList = MemoryDescriptorList2;
  1450. }
  1451. ErrorReturn:
  1452. if (WsHeld == TRUE) {
  1453. UNLOCK_WS (Process);
  1454. }
  1455. ASSERT (TotalAllocatedPages <= CapturedNumberOfPages);
  1456. if (Attached == TRUE) {
  1457. KeUnstackDetachProcess (&ApcState);
  1458. }
  1459. if (ProcessHandle != NtCurrentProcess()) {
  1460. ObDereferenceObject (Process);
  1461. }
  1462. return Status;
  1463. }
  1464. NTSTATUS
  1465. NtFreeUserPhysicalPages (
  1466. IN HANDLE ProcessHandle,
  1467. IN OUT PULONG_PTR NumberOfPages,
  1468. IN PULONG_PTR UserPfnArray
  1469. )
  1470. /*++
  1471. Routine Description:
  1472. This function frees the nonpaged physical pages for the specified
  1473. subject process. Any PTEs referencing these pages are also invalidated.
  1474. Note there is no need to walk the entire VAD tree to clear the PTEs that
  1475. match each page as each physical page can only be mapped at a single
  1476. virtual address (alias addresses within the VAD are not allowed).
  1477. Arguments:
  1478. ProcessHandle - Supplies an open handle to a process object.
  1479. NumberOfPages - Supplies the size in pages of the allocation to delete.
  1480. Returns the actual number of pages deleted.
  1481. UserPfnArray - Supplies a pointer to memory to retrieve the page frame
  1482. numbers from.
  1483. Return Value:
  1484. Various NTSTATUS codes.
  1485. --*/
  1486. {
  1487. PAWEINFO AweInfo;
  1488. PULONG BitBuffer;
  1489. KAPC_STATE ApcState;
  1490. ULONG_PTR CapturedNumberOfPages;
  1491. PMDL MemoryDescriptorList;
  1492. PPFN_NUMBER MdlPage;
  1493. PPFN_NUMBER LastMdlPage;
  1494. PFN_NUMBER PagesInMdl;
  1495. PFN_NUMBER PageFrameIndex;
  1496. PRTL_BITMAP BitMap;
  1497. ULONG BitMapIndex;
  1498. ULONG_PTR PagesProcessed;
  1499. PFN_NUMBER MdlHack[(sizeof(MDL) / sizeof(PFN_NUMBER)) + COPY_STACK_SIZE];
  1500. ULONG_PTR MdlPages;
  1501. ULONG_PTR NumberOfBytes;
  1502. PEPROCESS Process;
  1503. KPROCESSOR_MODE PreviousMode;
  1504. NTSTATUS Status;
  1505. LOGICAL Attached;
  1506. PMMPFN Pfn1;
  1507. LOGICAL OnePassComplete;
  1508. LOGICAL ProcessReferenced;
  1509. MMPTE_FLUSH_LIST PteFlushList;
  1510. PMMPTE PointerPte;
  1511. MMPTE OldPteContents;
  1512. PETHREAD CurrentThread;
  1513. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  1514. //
  1515. // Establish an exception handler, probe the specified addresses
  1516. // for read access and capture the page frame numbers.
  1517. //
  1518. CurrentThread = PsGetCurrentThread ();
  1519. PreviousMode = KeGetPreviousModeByThread (&CurrentThread->Tcb);
  1520. if (PreviousMode != KernelMode) {
  1521. try {
  1522. ProbeForWritePointer (NumberOfPages);
  1523. CapturedNumberOfPages = *NumberOfPages;
  1524. //
  1525. // Initialize the NumberOfPages freed to zero so the user can be
  1526. // reasonably informed about errors that occur midway through
  1527. // the transaction.
  1528. //
  1529. *NumberOfPages = 0;
  1530. } except (ExSystemExceptionFilter()) {
  1531. //
  1532. // If an exception occurs during the probe or capture
  1533. // of the initial values, then handle the exception and
  1534. // return the exception code as the status value.
  1535. //
  1536. return GetExceptionCode();
  1537. }
  1538. }
  1539. else {
  1540. CapturedNumberOfPages = *NumberOfPages;
  1541. }
  1542. if (CapturedNumberOfPages == 0) {
  1543. return STATUS_INVALID_PARAMETER_2;
  1544. }
  1545. OnePassComplete = FALSE;
  1546. PagesProcessed = 0;
  1547. MemoryDescriptorList = NULL;
  1548. SATISFY_OVERZEALOUS_COMPILER (MdlPages = 0);
  1549. if (CapturedNumberOfPages > COPY_STACK_SIZE) {
  1550. //
  1551. // Ensure the number of pages can fit into an MDL's ByteCount.
  1552. //
  1553. if (CapturedNumberOfPages > ((ULONG)MAXULONG >> PAGE_SHIFT)) {
  1554. MdlPages = (ULONG_PTR)((ULONG)MAXULONG >> PAGE_SHIFT);
  1555. }
  1556. else {
  1557. MdlPages = CapturedNumberOfPages;
  1558. }
  1559. while (MdlPages > COPY_STACK_SIZE) {
  1560. MemoryDescriptorList = MmCreateMdl (NULL,
  1561. 0,
  1562. MdlPages << PAGE_SHIFT);
  1563. if (MemoryDescriptorList != NULL) {
  1564. break;
  1565. }
  1566. MdlPages >>= 1;
  1567. }
  1568. }
  1569. if (MemoryDescriptorList == NULL) {
  1570. MdlPages = COPY_STACK_SIZE;
  1571. MemoryDescriptorList = (PMDL)&MdlHack[0];
  1572. }
  1573. ProcessReferenced = FALSE;
  1574. Process = PsGetCurrentProcessByThread (CurrentThread);
  1575. repeat:
  1576. if (CapturedNumberOfPages < MdlPages) {
  1577. MdlPages = CapturedNumberOfPages;
  1578. }
  1579. MmInitializeMdl (MemoryDescriptorList, 0, MdlPages << PAGE_SHIFT);
  1580. MdlPage = (PPFN_NUMBER)(MemoryDescriptorList + 1);
  1581. NumberOfBytes = MdlPages * sizeof(ULONG_PTR);
  1582. Attached = FALSE;
  1583. //
  1584. // Establish an exception handler, probe the specified addresses
  1585. // for read access and capture the page frame numbers.
  1586. //
  1587. if (PreviousMode != KernelMode) {
  1588. try {
  1589. //
  1590. // Update the user's count so if anything goes wrong, the user can
  1591. // be reasonably informed about how far into the transaction it
  1592. // occurred.
  1593. //
  1594. *NumberOfPages = PagesProcessed;
  1595. ProbeForRead (UserPfnArray,
  1596. NumberOfBytes,
  1597. sizeof(PULONG_PTR));
  1598. RtlCopyMemory ((PVOID)MdlPage,
  1599. UserPfnArray,
  1600. NumberOfBytes);
  1601. } except (ExSystemExceptionFilter()) {
  1602. //
  1603. // If an exception occurs during the probe or capture
  1604. // of the initial values, then handle the exception and
  1605. // return the exception code as the status value.
  1606. //
  1607. Status = GetExceptionCode();
  1608. goto ErrorReturn;
  1609. }
  1610. }
  1611. else {
  1612. RtlCopyMemory ((PVOID)MdlPage,
  1613. UserPfnArray,
  1614. NumberOfBytes);
  1615. }
  1616. if (OnePassComplete == FALSE) {
  1617. //
  1618. // Reference the specified process handle for VM_OPERATION access.
  1619. //
  1620. if (ProcessHandle == NtCurrentProcess()) {
  1621. Process = PsGetCurrentProcessByThread(CurrentThread);
  1622. }
  1623. else {
  1624. Status = ObReferenceObjectByHandle ( ProcessHandle,
  1625. PROCESS_VM_OPERATION,
  1626. PsProcessType,
  1627. PreviousMode,
  1628. (PVOID *)&Process,
  1629. NULL );
  1630. if (!NT_SUCCESS(Status)) {
  1631. goto ErrorReturn;
  1632. }
  1633. ProcessReferenced = TRUE;
  1634. }
  1635. }
  1636. //
  1637. // If the specified process is not the current process, attach
  1638. // to the specified process.
  1639. //
  1640. if (PsGetCurrentProcessByThread(CurrentThread) != Process) {
  1641. KeStackAttachProcess (&Process->Pcb, &ApcState);
  1642. Attached = TRUE;
  1643. }
  1644. //
  1645. // A memory barrier is needed to read the EPROCESS AweInfo field
  1646. // in order to ensure the writes to the AweInfo structure fields are
  1647. // visible in correct order. This avoids the need to acquire any
  1648. // stronger synchronization (ie: spinlock/pushlock, etc) in the interest
  1649. // of best performance.
  1650. //
  1651. KeMemoryBarrier ();
  1652. AweInfo = (PAWEINFO) Process->AweInfo;
  1653. //
  1654. // The physical pages bitmap must exist.
  1655. //
  1656. if ((AweInfo == NULL) || (AweInfo->VadPhysicalPagesBitMap == NULL)) {
  1657. Status = STATUS_INVALID_PARAMETER_1;
  1658. goto ErrorReturn;
  1659. }
  1660. PteFlushList.Count = 0;
  1661. Status = STATUS_SUCCESS;
  1662. //
  1663. // Get the address creation mutex to block multiple threads from
  1664. // creating or deleting address space at the same time and
  1665. // get the working set mutex so virtual address descriptors can
  1666. // be inserted and walked. Block APCs so an APC which takes a page
  1667. // fault does not corrupt various structures.
  1668. //
  1669. LOCK_WS (Process);
  1670. //
  1671. // Make sure the address space was not deleted, if so, return an error.
  1672. //
  1673. if (Process->Flags & PS_PROCESS_FLAGS_VM_DELETED) {
  1674. UNLOCK_WS (Process);
  1675. Status = STATUS_PROCESS_IS_TERMINATING;
  1676. goto ErrorReturn;
  1677. }
  1678. BitMap = AweInfo->VadPhysicalPagesBitMap;
  1679. ASSERT (BitMap != NULL);
  1680. BitBuffer = BitMap->Buffer;
  1681. LastMdlPage = MdlPage + MdlPages;
  1682. //
  1683. // Flush the entire TB for this process while holding its AWE push lock
  1684. // exclusive so that if this free is occurring prior to any pending
  1685. // flushes at the end of an in-progress map/unmap, the app is not left
  1686. // with a stale TB entry that would allow him to corrupt pages that no
  1687. // longer belong to him.
  1688. //
  1689. //
  1690. // Block APCs to prevent recursive pushlock scenarios as this is not
  1691. // supported.
  1692. //
  1693. ExAcquireCacheAwarePushLockExclusive (AweInfo->PushLock);
  1694. KeFlushProcessTb (FALSE);
  1695. while (MdlPage < LastMdlPage) {
  1696. PageFrameIndex = *MdlPage;
  1697. BitMapIndex = MI_FRAME_TO_BITMAP_INDEX(PageFrameIndex);
  1698. #if defined (_WIN64)
  1699. //
  1700. // Ensure the frame is a 32-bit number.
  1701. //
  1702. if (BitMapIndex != PageFrameIndex) {
  1703. Status = STATUS_CONFLICTING_ADDRESSES;
  1704. break;
  1705. }
  1706. #endif
  1707. //
  1708. // Frames past the end of the bitmap are not allowed.
  1709. //
  1710. if (BitMapIndex >= BitMap->SizeOfBitMap) {
  1711. Status = STATUS_CONFLICTING_ADDRESSES;
  1712. break;
  1713. }
  1714. //
  1715. // Frames not in the bitmap are not allowed.
  1716. //
  1717. if (MI_CHECK_BIT (BitBuffer, BitMapIndex) == 0) {
  1718. Status = STATUS_CONFLICTING_ADDRESSES;
  1719. break;
  1720. }
  1721. ASSERT ((PageFrameIndex >= LOWEST_USABLE_PHYSICAL_PAGE) ||
  1722. (MiUsingLowPagesForAwe == TRUE));
  1723. PagesProcessed += 1;
  1724. ASSERT64 (PageFrameIndex < _4gb);
  1725. MI_CLEAR_BIT (BitBuffer, BitMapIndex);
  1726. Pfn1 = MI_PFN_ELEMENT(PageFrameIndex);
  1727. ASSERT (MI_PFN_IS_AWE (Pfn1));
  1728. ASSERT (Pfn1->u4.AweAllocation == 1);
  1729. #if DBG
  1730. if (Pfn1->u2.ShareCount == 1) {
  1731. ASSERT (Pfn1->PteAddress == NULL);
  1732. }
  1733. else if (Pfn1->u2.ShareCount == 2) {
  1734. ASSERT (Pfn1->PteAddress != NULL);
  1735. }
  1736. else {
  1737. ASSERT (FALSE);
  1738. }
  1739. #endif
  1740. //
  1741. // If the frame is currently mapped in the Vad then the PTE must
  1742. // be cleared and the TB entry flushed.
  1743. //
  1744. if (Pfn1->u2.ShareCount != 1) {
  1745. //
  1746. // Note the exclusive hold of the AWE push lock prevents
  1747. // any other concurrent threads from mapping or unmapping
  1748. // right now. This also eliminates the need to update the PFN
  1749. // sharecount with an interlocked sequence as well.
  1750. //
  1751. Pfn1->u2.ShareCount -= 1;
  1752. PointerPte = Pfn1->PteAddress;
  1753. Pfn1->PteAddress = NULL;
  1754. OldPteContents = *PointerPte;
  1755. ASSERT (OldPteContents.u.Hard.Valid == 1);
  1756. if (PteFlushList.Count != MM_MAXIMUM_FLUSH_COUNT) {
  1757. PteFlushList.FlushVa[PteFlushList.Count] =
  1758. MiGetVirtualAddressMappedByPte (PointerPte);
  1759. PteFlushList.Count += 1;
  1760. }
  1761. MI_WRITE_INVALID_PTE (PointerPte, ZeroPte);
  1762. }
  1763. MI_SET_PFN_DELETED (Pfn1);
  1764. MdlPage += 1;
  1765. }
  1766. //
  1767. // Flush the TB entries for any relevant pages.
  1768. //
  1769. MiFlushPteList (&PteFlushList, FALSE);
  1770. ExReleaseCacheAwarePushLockExclusive (AweInfo->PushLock);
  1771. //
  1772. // Free the actual pages (this may be a partially filled MDL).
  1773. //
  1774. PagesInMdl = MdlPage - (PPFN_NUMBER)(MemoryDescriptorList + 1);
  1775. //
  1776. // Set the ByteCount to the actual number of validated pages - the caller
  1777. // may have lied and we have to sync up here to account for any bogus
  1778. // frames.
  1779. //
  1780. MemoryDescriptorList->ByteCount = (ULONG)(PagesInMdl << PAGE_SHIFT);
  1781. if (PagesInMdl != 0) {
  1782. AweInfo->VadPhysicalPages -= PagesInMdl;
  1783. UNLOCK_WS (Process);
  1784. InterlockedExchangeAddSizeT (&MmVadPhysicalPages, 0 - PagesInMdl);
  1785. MmFreePagesFromMdl (MemoryDescriptorList);
  1786. if (Process->JobStatus & PS_JOB_STATUS_REPORT_PHYSICAL_PAGE_CHANGES) {
  1787. PsChangeJobMemoryUsage (PS_JOB_STATUS_REPORT_PHYSICAL_PAGE_CHANGES,
  1788. -(SSIZE_T)PagesInMdl);
  1789. }
  1790. }
  1791. else {
  1792. UNLOCK_WS (Process);
  1793. }
  1794. CapturedNumberOfPages -= PagesInMdl;
  1795. if ((Status == STATUS_SUCCESS) && (CapturedNumberOfPages != 0)) {
  1796. if (Attached == TRUE) {
  1797. KeUnstackDetachProcess (&ApcState);
  1798. Attached = FALSE;
  1799. }
  1800. OnePassComplete = TRUE;
  1801. ASSERT (MdlPages == PagesInMdl);
  1802. UserPfnArray += MdlPages;
  1803. //
  1804. // Do it all again until all the pages are freed or an error occurs.
  1805. //
  1806. goto repeat;
  1807. }
  1808. //
  1809. // Fall through.
  1810. //
  1811. ErrorReturn:
  1812. //
  1813. // Free any pool acquired for holding MDLs.
  1814. //
  1815. if (MemoryDescriptorList != (PMDL)&MdlHack[0]) {
  1816. ExFreePool (MemoryDescriptorList);
  1817. }
  1818. if (Attached == TRUE) {
  1819. KeUnstackDetachProcess (&ApcState);
  1820. }
  1821. //
  1822. // Establish an exception handler and carefully write out the
  1823. // number of pages actually processed.
  1824. //
  1825. try {
  1826. *NumberOfPages = PagesProcessed;
  1827. } except (EXCEPTION_EXECUTE_HANDLER) {
  1828. //
  1829. // Return success at this point even if the results
  1830. // cannot be written.
  1831. //
  1832. NOTHING;
  1833. }
  1834. if (ProcessReferenced == TRUE) {
  1835. ObDereferenceObject (Process);
  1836. }
  1837. return Status;
  1838. }
  1839. VOID
  1840. MiRemoveUserPhysicalPagesVad (
  1841. IN PMMVAD_SHORT Vad
  1842. )
  1843. /*++
  1844. Routine Description:
  1845. This function removes the user-physical-pages mapped region from the
  1846. current process's address space. This mapped region is private memory.
  1847. The physical pages of this Vad are unmapped here, but not freed.
  1848. Pagetable pages are freed and their use/commitment counts/quotas are
  1849. managed by our caller.
  1850. Arguments:
  1851. Vad - Supplies the VAD which manages the address space.
  1852. Return Value:
  1853. None.
  1854. Environment:
  1855. APC level, working set mutex and address creation mutex held.
  1856. --*/
  1857. {
  1858. PMMPFN Pfn1;
  1859. PEPROCESS Process;
  1860. PFN_NUMBER PageFrameIndex;
  1861. MMPTE_FLUSH_LIST PteFlushList;
  1862. PMMPTE PointerPte;
  1863. MMPTE PteContents;
  1864. PMMPTE EndingPte;
  1865. PAWEINFO AweInfo;
  1866. PKTHREAD CurrentThread;
  1867. #if DBG
  1868. ULONG_PTR ActualPages;
  1869. ULONG_PTR ExpectedPages;
  1870. PMI_PHYSICAL_VIEW PhysicalView;
  1871. PVOID RestartKey;
  1872. #endif
  1873. ASSERT (KeAreAllApcsDisabled () == TRUE);
  1874. ASSERT (Vad->u.VadFlags.UserPhysicalPages == 1);
  1875. Process = PsGetCurrentProcess ();
  1876. AweInfo = (PAWEINFO) Process->AweInfo;
  1877. ASSERT (AweInfo != NULL);
  1878. //
  1879. // If the physical pages count is zero, nothing needs to be done.
  1880. // On checked systems, verify the list anyway.
  1881. //
  1882. #if DBG
  1883. ActualPages = 0;
  1884. ExpectedPages = AweInfo->VadPhysicalPages;
  1885. #else
  1886. if (AweInfo->VadPhysicalPages == 0) {
  1887. return;
  1888. }
  1889. #endif
  1890. PointerPte = MiGetPteAddress (MI_VPN_TO_VA (Vad->StartingVpn));
  1891. EndingPte = MiGetPteAddress (MI_VPN_TO_VA_ENDING (Vad->EndingVpn));
  1892. PteFlushList.Count = 0;
  1893. //
  1894. // The caller must have removed this Vad from the physical view list,
  1895. // otherwise another thread could immediately remap pages back into this
  1896. // same Vad.
  1897. //
  1898. CurrentThread = KeGetCurrentThread ();
  1899. KeEnterGuardedRegionThread (CurrentThread);
  1900. ExAcquireCacheAwarePushLockExclusive (AweInfo->PushLock);
  1901. #if DBG
  1902. RestartKey = NULL;
  1903. do {
  1904. PhysicalView = (PMI_PHYSICAL_VIEW) MiEnumerateGenericTableWithoutSplayingAvl (&AweInfo->AweVadRoot, &RestartKey);
  1905. if (PhysicalView == NULL) {
  1906. break;
  1907. }
  1908. ASSERT (PhysicalView->Vad != (PMMVAD)Vad);
  1909. } while (TRUE);
  1910. #endif
  1911. while (PointerPte <= EndingPte) {
  1912. PteContents = *PointerPte;
  1913. if (PteContents.u.Hard.Valid == 0) {
  1914. PointerPte += 1;
  1915. continue;
  1916. }
  1917. //
  1918. // The frame is currently mapped in this Vad so the PTE must
  1919. // be cleared and the TB entry flushed.
  1920. //
  1921. PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE (PointerPte);
  1922. ASSERT ((PageFrameIndex >= LOWEST_USABLE_PHYSICAL_PAGE) ||
  1923. (MiUsingLowPagesForAwe == TRUE));
  1924. ASSERT (ExpectedPages != 0);
  1925. Pfn1 = MI_PFN_ELEMENT(PageFrameIndex);
  1926. ASSERT (MI_PFN_IS_AWE (Pfn1));
  1927. ASSERT (Pfn1->u2.ShareCount == 2);
  1928. ASSERT (Pfn1->PteAddress == PointerPte);
  1929. //
  1930. // Note the AWE/PFN locks are not needed here because we have acquired
  1931. // the pushlock exclusive so no one can be mapping or unmapping
  1932. // right now. In fact, the PFN sharecount doesn't even have to be
  1933. // updated with an interlocked sequence because the pushlock is held
  1934. // exclusive.
  1935. //
  1936. Pfn1->u2.ShareCount -= 1;
  1937. Pfn1->PteAddress = NULL;
  1938. if (PteFlushList.Count != MM_MAXIMUM_FLUSH_COUNT) {
  1939. PteFlushList.FlushVa[PteFlushList.Count] =
  1940. MiGetVirtualAddressMappedByPte (PointerPte);
  1941. PteFlushList.Count += 1;
  1942. }
  1943. MI_WRITE_INVALID_PTE (PointerPte, ZeroPte);
  1944. PointerPte += 1;
  1945. #if DBG
  1946. ActualPages += 1;
  1947. #endif
  1948. ASSERT (ActualPages <= ExpectedPages);
  1949. }
  1950. //
  1951. // Flush the TB entries for any relevant pages.
  1952. //
  1953. MiFlushPteList (&PteFlushList, FALSE);
  1954. ExReleaseCacheAwarePushLockExclusive (AweInfo->PushLock);
  1955. KeLeaveGuardedRegionThread (CurrentThread);
  1956. return;
  1957. }
  1958. VOID
  1959. MiCleanPhysicalProcessPages (
  1960. IN PEPROCESS Process
  1961. )
  1962. /*++
  1963. Routine Description:
  1964. This routine frees the VadPhysicalBitMap, any remaining physical pages (as
  1965. they may not have been currently mapped into any Vads) and returns the
  1966. bitmap quota.
  1967. Arguments:
  1968. Process - Supplies the process to clean.
  1969. Return Value:
  1970. None.
  1971. Environment:
  1972. Kernel mode, APC level, working set mutex held. Called only on process
  1973. exit, so the AWE push lock is not needed here.
  1974. --*/
  1975. {
  1976. PMMPFN Pfn1;
  1977. PAWEINFO AweInfo;
  1978. ULONG BitMapSize;
  1979. ULONG BitMapIndex;
  1980. ULONG BitMapHint;
  1981. PRTL_BITMAP BitMap;
  1982. PPFN_NUMBER MdlPage;
  1983. PFN_NUMBER MdlHack[(sizeof(MDL) / sizeof(PFN_NUMBER)) + COPY_STACK_SIZE];
  1984. ULONG_PTR MdlPages;
  1985. ULONG_PTR NumberOfPages;
  1986. ULONG_PTR TotalFreedPages;
  1987. PMDL MemoryDescriptorList;
  1988. PFN_NUMBER PageFrameIndex;
  1989. PFN_NUMBER HighestPossiblePhysicalPage;
  1990. #if DBG
  1991. ULONG_PTR ActualPages = 0;
  1992. ULONG_PTR ExpectedPages = 0;
  1993. #endif
  1994. ASSERT (KeAreAllApcsDisabled () == TRUE);
  1995. AweInfo = (PAWEINFO) Process->AweInfo;
  1996. if (AweInfo == NULL) {
  1997. return;
  1998. }
  1999. TotalFreedPages = 0;
  2000. BitMap = AweInfo->VadPhysicalPagesBitMap;
  2001. if (BitMap == NULL) {
  2002. goto Finish;
  2003. }
  2004. #if DBG
  2005. ExpectedPages = AweInfo->VadPhysicalPages;
  2006. #else
  2007. if (AweInfo->VadPhysicalPages == 0) {
  2008. goto Finish;
  2009. }
  2010. #endif
  2011. MdlPages = COPY_STACK_SIZE;
  2012. MemoryDescriptorList = (PMDL)&MdlHack[0];
  2013. MdlPage = (PPFN_NUMBER)(MemoryDescriptorList + 1);
  2014. NumberOfPages = 0;
  2015. BitMapHint = 0;
  2016. while (TRUE) {
  2017. BitMapIndex = RtlFindSetBits (BitMap, 1, BitMapHint);
  2018. if (BitMapIndex < BitMapHint) {
  2019. break;
  2020. }
  2021. if (BitMapIndex == NO_BITS_FOUND) {
  2022. break;
  2023. }
  2024. PageFrameIndex = MI_BITMAP_INDEX_TO_FRAME(BitMapIndex);
  2025. ASSERT64 (PageFrameIndex < _4gb);
  2026. //
  2027. // The bitmap search wraps, so handle it here.
  2028. // Note PFN 0 is illegal.
  2029. //
  2030. ASSERT (PageFrameIndex != 0);
  2031. ASSERT ((PageFrameIndex >= LOWEST_USABLE_PHYSICAL_PAGE) ||
  2032. (MiUsingLowPagesForAwe == TRUE));
  2033. ASSERT (ExpectedPages != 0);
  2034. Pfn1 = MI_PFN_ELEMENT(PageFrameIndex);
  2035. ASSERT (Pfn1->u4.AweAllocation == 1);
  2036. ASSERT (Pfn1->u2.ShareCount == 1);
  2037. ASSERT (Pfn1->PteAddress == NULL);
  2038. ASSERT (MI_PFN_IS_AWE (Pfn1));
  2039. MI_SET_PFN_DELETED(Pfn1);
  2040. *MdlPage = PageFrameIndex;
  2041. MdlPage += 1;
  2042. NumberOfPages += 1;
  2043. #if DBG
  2044. ActualPages += 1;
  2045. #endif
  2046. if (NumberOfPages == COPY_STACK_SIZE) {
  2047. //
  2048. // Free the pages in the full MDL.
  2049. //
  2050. MmInitializeMdl (MemoryDescriptorList,
  2051. 0,
  2052. NumberOfPages << PAGE_SHIFT);
  2053. MmFreePagesFromMdl (MemoryDescriptorList);
  2054. MdlPage = (PPFN_NUMBER)(MemoryDescriptorList + 1);
  2055. AweInfo->VadPhysicalPages -= NumberOfPages;
  2056. TotalFreedPages += NumberOfPages;
  2057. NumberOfPages = 0;
  2058. }
  2059. BitMapHint = BitMapIndex + 1;
  2060. if (BitMapHint >= BitMap->SizeOfBitMap) {
  2061. break;
  2062. }
  2063. }
  2064. //
  2065. // Free any straggling MDL pages here.
  2066. //
  2067. if (NumberOfPages != 0) {
  2068. MmInitializeMdl (MemoryDescriptorList,
  2069. 0,
  2070. NumberOfPages << PAGE_SHIFT);
  2071. MmFreePagesFromMdl (MemoryDescriptorList);
  2072. AweInfo->VadPhysicalPages -= NumberOfPages;
  2073. TotalFreedPages += NumberOfPages;
  2074. }
  2075. Finish:
  2076. ASSERT (ExpectedPages == ActualPages);
  2077. HighestPossiblePhysicalPage = MmHighestPossiblePhysicalPage;
  2078. #if defined (_WIN64)
  2079. //
  2080. // Force a 32-bit maximum on any page allocation because the bitmap
  2081. // package is currently 32-bit.
  2082. //
  2083. if (HighestPossiblePhysicalPage + 1 >= _4gb) {
  2084. HighestPossiblePhysicalPage = _4gb - 2;
  2085. }
  2086. #endif
  2087. ASSERT (AweInfo->VadPhysicalPages == 0);
  2088. if (BitMap != NULL) {
  2089. BitMapSize = sizeof(RTL_BITMAP) + (ULONG)((((HighestPossiblePhysicalPage + 1) + 31) / 32) * 4);
  2090. ExFreePool (BitMap);
  2091. PsReturnProcessNonPagedPoolQuota (Process, BitMapSize);
  2092. }
  2093. ExFreeCacheAwarePushLock (AweInfo->PushLock);
  2094. ExFreePool (AweInfo);
  2095. Process->AweInfo = NULL;
  2096. ASSERT (ExpectedPages == ActualPages);
  2097. if (TotalFreedPages != 0) {
  2098. InterlockedExchangeAddSizeT (&MmVadPhysicalPages, 0 - TotalFreedPages);
  2099. if (Process->JobStatus & PS_JOB_STATUS_REPORT_PHYSICAL_PAGE_CHANGES) {
  2100. PsChangeJobMemoryUsage (PS_JOB_STATUS_REPORT_PHYSICAL_PAGE_CHANGES,
  2101. -(SSIZE_T)TotalFreedPages);
  2102. }
  2103. }
  2104. return;
  2105. }
  2106. VOID
  2107. MiAweViewInserter (
  2108. IN PEPROCESS Process,
  2109. IN PMI_PHYSICAL_VIEW PhysicalView
  2110. )
  2111. /*++
  2112. Routine Description:
  2113. This function inserts a new AWE or large page view into the specified
  2114. process' AWE chain.
  2115. Arguments:
  2116. Process - Supplies the process to add the AWE VAD to.
  2117. PhysicalView - Supplies the physical view data to link in.
  2118. Return Value:
  2119. TRUE if the view was inserted, FALSE if not.
  2120. Environment:
  2121. Kernel mode. APC_LEVEL, working set and address space mutexes held.
  2122. --*/
  2123. {
  2124. PAWEINFO AweInfo;
  2125. AweInfo = (PAWEINFO) Process->AweInfo;
  2126. ASSERT (AweInfo != NULL);
  2127. ExAcquireCacheAwarePushLockExclusive (AweInfo->PushLock);
  2128. MiInsertNode ((PMMADDRESS_NODE)PhysicalView, &AweInfo->AweVadRoot);
  2129. ExReleaseCacheAwarePushLockExclusive (AweInfo->PushLock);
  2130. }
  2131. VOID
  2132. MiAweViewRemover (
  2133. IN PEPROCESS Process,
  2134. IN PMMVAD Vad
  2135. )
  2136. /*++
  2137. Routine Description:
  2138. This function removes an AWE or large page Vad from the specified
  2139. process' AWE chain.
  2140. Arguments:
  2141. Process - Supplies the process to remove the AWE VAD from.
  2142. Vad - Supplies the Vad to remove.
  2143. Return Value:
  2144. None.
  2145. Environment:
  2146. Kernel mode, APC_LEVEL, working set and address space mutexes held.
  2147. --*/
  2148. {
  2149. PAWEINFO AweInfo;
  2150. PMI_PHYSICAL_VIEW AweView;
  2151. TABLE_SEARCH_RESULT SearchResult;
  2152. AweInfo = (PAWEINFO) Process->AweInfo;
  2153. ASSERT (AweInfo != NULL);
  2154. ExAcquireCacheAwarePushLockExclusive (AweInfo->PushLock);
  2155. //
  2156. // Lookup the element and save the result.
  2157. //
  2158. SearchResult = MiFindNodeOrParent (&AweInfo->AweVadRoot,
  2159. Vad->StartingVpn,
  2160. (PMMADDRESS_NODE *) &AweView);
  2161. ASSERT (SearchResult == TableFoundNode);
  2162. ASSERT (AweView->Vad == Vad);
  2163. MiRemoveNode ((PMMADDRESS_NODE)AweView, &AweInfo->AweVadRoot);
  2164. if ((AweView->u.LongFlags == MI_PHYSICAL_VIEW_AWE) ||
  2165. (AweView->u.LongFlags == MI_PHYSICAL_VIEW_LARGE)) {
  2166. RtlZeroMemory (&AweInfo->PhysicalViewHint,
  2167. MAXIMUM_PROCESSORS * sizeof(PMI_PHYSICAL_VIEW));
  2168. }
  2169. ExReleaseCacheAwarePushLockExclusive (AweInfo->PushLock);
  2170. ExFreePool (AweView);
  2171. return;
  2172. }
  2173. typedef struct _MI_LARGEPAGE_MEMORY_RUN {
  2174. LIST_ENTRY ListEntry;
  2175. PFN_NUMBER BasePage;
  2176. PFN_NUMBER PageCount;
  2177. } MI_LARGEPAGE_MEMORY_RUN, *PMI_LARGEPAGE_MEMORY_RUN;
  2178. NTSTATUS
  2179. MiAllocateLargePages (
  2180. IN PVOID StartingAddress,
  2181. IN PVOID EndingAddress
  2182. )
  2183. /*++
  2184. Routine Description:
  2185. This routine allocates contiguous physical memory and then initializes
  2186. page directory and page table pages to map it with large pages.
  2187. Arguments:
  2188. StartingAddress - Supplies the starting address of the range.
  2189. EndingAddress - Supplies the ending address of the range.
  2190. Return Value:
  2191. NTSTATUS.
  2192. Environment:
  2193. Kernel mode, APCs disabled, AddressCreation mutex held.
  2194. --*/
  2195. {
  2196. PFN_NUMBER PdeFrame;
  2197. PLIST_ENTRY NextEntry;
  2198. PMI_LARGEPAGE_MEMORY_RUN LargePageInfo;
  2199. PFN_NUMBER ZeroCount;
  2200. PFN_NUMBER ZeroSize;
  2201. ULONG Color;
  2202. PCOLORED_PAGE_INFO ColoredPageInfoBase;
  2203. LIST_ENTRY LargePageListHead;
  2204. PMMPFN Pfn1;
  2205. PMMPFN EndPfn;
  2206. LOGICAL ChargedJob;
  2207. ULONG i;
  2208. PAWEINFO AweInfo;
  2209. MMPTE TempPde;
  2210. PEPROCESS Process;
  2211. SIZE_T NumberOfBytes;
  2212. PFN_NUMBER NewPage;
  2213. PFN_NUMBER PageFrameIndexLarge;
  2214. PFN_NUMBER NumberOfPages;
  2215. PFN_NUMBER ChunkSize;
  2216. PFN_NUMBER PagesSoFar;
  2217. PFN_NUMBER PagesLeft;
  2218. PMMPTE LastPde;
  2219. PMMPTE LastPpe;
  2220. PMMPTE LastPxe;
  2221. PMMPTE PointerPde;
  2222. PMMPTE PointerPpe;
  2223. PMMPTE PointerPxe;
  2224. KIRQL OldIrql;
  2225. #if (_MI_PAGING_LEVELS >= 3)
  2226. PFN_NUMBER PagesNeeded;
  2227. MMPTE PteContents;
  2228. PVOID UsedPageTableHandle;
  2229. #endif
  2230. ASSERT (KeAreAllApcsDisabled () == TRUE);
  2231. NumberOfBytes = (PCHAR)EndingAddress + 1 - (PCHAR)StartingAddress;
  2232. NumberOfPages = BYTES_TO_PAGES (NumberOfBytes);
  2233. ChargedJob = FALSE;
  2234. Process = PsGetCurrentProcess ();
  2235. AweInfo = (PAWEINFO) Process->AweInfo;
  2236. LOCK_WS_UNSAFE (Process);
  2237. if (AweInfo->VadPhysicalPagesLimit != 0) {
  2238. if (AweInfo->VadPhysicalPages >= AweInfo->VadPhysicalPagesLimit) {
  2239. UNLOCK_WS_UNSAFE (Process);
  2240. return STATUS_COMMITMENT_LIMIT;
  2241. }
  2242. if (NumberOfPages > AweInfo->VadPhysicalPagesLimit - AweInfo->VadPhysicalPages) {
  2243. UNLOCK_WS_UNSAFE (Process);
  2244. return STATUS_COMMITMENT_LIMIT;
  2245. }
  2246. ASSERT ((AweInfo->VadPhysicalPages + NumberOfPages <= AweInfo->VadPhysicalPagesLimit) || (AweInfo->VadPhysicalPagesLimit == 0));
  2247. if (Process->JobStatus & PS_JOB_STATUS_REPORT_PHYSICAL_PAGE_CHANGES) {
  2248. if (PsChangeJobMemoryUsage (PS_JOB_STATUS_REPORT_PHYSICAL_PAGE_CHANGES,
  2249. NumberOfPages) == FALSE) {
  2250. UNLOCK_WS_UNSAFE (Process);
  2251. return STATUS_COMMITMENT_LIMIT;
  2252. }
  2253. ChargedJob = TRUE;
  2254. }
  2255. }
  2256. AweInfo->VadPhysicalPages += NumberOfPages;
  2257. UNLOCK_WS_UNSAFE (Process);
  2258. PointerPxe = MiGetPxeAddress (StartingAddress);
  2259. PointerPpe = MiGetPpeAddress (StartingAddress);
  2260. PointerPde = MiGetPdeAddress (StartingAddress);
  2261. LastPxe = MiGetPxeAddress (EndingAddress);
  2262. LastPpe = MiGetPpeAddress (EndingAddress);
  2263. LastPde = MiGetPdeAddress (EndingAddress);
  2264. MmLockPagableSectionByHandle (ExPageLockHandle);
  2265. #if (_MI_PAGING_LEVELS >= 3)
  2266. //
  2267. // Charge resident available pages for all of the page directory
  2268. // pages as they will not be paged until the VAD is freed.
  2269. //
  2270. // Note that commitment is not charged here because the VAD insertion
  2271. // charges commit for the entire paging hierarchy (including the
  2272. // nonexistent page tables).
  2273. //
  2274. PagesNeeded = LastPpe - PointerPpe + 1;
  2275. #if (_MI_PAGING_LEVELS >= 4)
  2276. PagesNeeded += LastPxe - PointerPxe + 1;
  2277. #endif
  2278. ASSERT (PagesNeeded != 0);
  2279. LOCK_PFN (OldIrql);
  2280. if ((SPFN_NUMBER)PagesNeeded > MI_NONPAGABLE_MEMORY_AVAILABLE() - 20) {
  2281. UNLOCK_PFN (OldIrql);
  2282. MmUnlockPagableImageSection (ExPageLockHandle);
  2283. LOCK_WS_UNSAFE (Process);
  2284. ASSERT (AweInfo->VadPhysicalPages >= NumberOfPages);
  2285. AweInfo->VadPhysicalPages -= NumberOfPages;
  2286. UNLOCK_WS_UNSAFE (Process);
  2287. if (ChargedJob == TRUE) {
  2288. PsChangeJobMemoryUsage (PS_JOB_STATUS_REPORT_PHYSICAL_PAGE_CHANGES,
  2289. -(SSIZE_T)NumberOfPages);
  2290. }
  2291. return STATUS_INSUFFICIENT_RESOURCES;
  2292. }
  2293. MI_DECREMENT_RESIDENT_AVAILABLE (PagesNeeded, MM_RESAVAIL_ALLOCATE_USER_PAGE_TABLE);
  2294. UNLOCK_PFN (OldIrql);
  2295. #endif
  2296. i = 3;
  2297. ChunkSize = NumberOfPages;
  2298. PagesSoFar = 0;
  2299. LargePageInfo = NULL;
  2300. ZeroCount = 0;
  2301. InitializeListHead (&LargePageListHead);
  2302. //
  2303. // Allocate a list of colored anchors.
  2304. //
  2305. ColoredPageInfoBase = (PCOLORED_PAGE_INFO) ExAllocatePoolWithTag (
  2306. NonPagedPool,
  2307. MmSecondaryColors * sizeof (COLORED_PAGE_INFO),
  2308. 'ldmM');
  2309. if (ColoredPageInfoBase == NULL) {
  2310. goto bail;
  2311. }
  2312. for (Color = 0; Color < MmSecondaryColors; Color += 1) {
  2313. ColoredPageInfoBase[Color].PagesQueued = 0;
  2314. ColoredPageInfoBase[Color].PfnAllocation = (PMMPFN) MM_EMPTY_LIST;
  2315. ColoredPageInfoBase[Color].PagesQueued = 0;
  2316. }
  2317. //
  2318. // Try for the actual contiguous memory.
  2319. //
  2320. InterlockedIncrement (&MiDelayPageFaults);
  2321. do {
  2322. ASSERT (i <= 3);
  2323. if (LargePageInfo == NULL) {
  2324. LargePageInfo = ExAllocatePoolWithTag (NonPagedPool,
  2325. sizeof (MI_LARGEPAGE_MEMORY_RUN),
  2326. 'lLmM');
  2327. if (LargePageInfo == NULL) {
  2328. PageFrameIndexLarge = 0;
  2329. break;
  2330. }
  2331. }
  2332. PageFrameIndexLarge = MiFindLargePageMemory (ColoredPageInfoBase,
  2333. ChunkSize,
  2334. &ZeroSize);
  2335. if (PageFrameIndexLarge != 0) {
  2336. //
  2337. // Save the start and length of each run for subsequent
  2338. // zeroing and PDE filling.
  2339. //
  2340. LargePageInfo->BasePage = PageFrameIndexLarge;
  2341. LargePageInfo->PageCount = ChunkSize;
  2342. InsertTailList (&LargePageListHead, &LargePageInfo->ListEntry);
  2343. LargePageInfo = NULL;
  2344. ASSERT (ZeroSize <= ChunkSize);
  2345. ZeroCount += ZeroSize;
  2346. ASSERT ((ChunkSize == NumberOfPages) || (i == 0));
  2347. PagesSoFar += ChunkSize;
  2348. if (PagesSoFar == NumberOfPages) {
  2349. break;
  2350. }
  2351. else {
  2352. ASSERT (NumberOfPages > PagesSoFar);
  2353. PagesLeft = NumberOfPages - PagesSoFar;
  2354. if (ChunkSize > PagesLeft) {
  2355. ChunkSize = PagesLeft;
  2356. }
  2357. }
  2358. continue;
  2359. }
  2360. switch (i) {
  2361. case 3:
  2362. MmEmptyAllWorkingSets ();
  2363. #if DBG
  2364. if (MiShowStuckPages != 0) {
  2365. MiFlushAllPages ();
  2366. KeDelayExecutionThread (KernelMode,
  2367. FALSE,
  2368. (PLARGE_INTEGER)&MmHalfSecond);
  2369. }
  2370. #endif
  2371. i -= 1;
  2372. break;
  2373. case 2:
  2374. #if DBG
  2375. if (MiShowStuckPages != 0) {
  2376. MmEmptyAllWorkingSets ();
  2377. }
  2378. #endif
  2379. MiFlushAllPages ();
  2380. KeDelayExecutionThread (KernelMode,
  2381. FALSE,
  2382. (PLARGE_INTEGER)&MmHalfSecond);
  2383. i -= 1;
  2384. break;
  2385. case 1:
  2386. MmEmptyAllWorkingSets ();
  2387. MiFlushAllPages ();
  2388. KeDelayExecutionThread (KernelMode,
  2389. FALSE,
  2390. (PLARGE_INTEGER)&MmOneSecond);
  2391. i -= 1;
  2392. break;
  2393. case 0:
  2394. //
  2395. // Halve the request size. If needed, then round down
  2396. // to the next page directory multiple. Then retry.
  2397. //
  2398. ChunkSize >>= 1;
  2399. ChunkSize &= ~((MM_MINIMUM_VA_FOR_LARGE_PAGE >> PAGE_SHIFT) - 1);
  2400. break;
  2401. }
  2402. if (ChunkSize < (MM_MINIMUM_VA_FOR_LARGE_PAGE >> PAGE_SHIFT)) {
  2403. ASSERT (i == 0);
  2404. break;
  2405. }
  2406. } while (TRUE);
  2407. InterlockedDecrement (&MiDelayPageFaults);
  2408. if (LargePageInfo != NULL) {
  2409. ExFreePool (LargePageInfo);
  2410. LargePageInfo = NULL;
  2411. }
  2412. if (PageFrameIndexLarge == 0) {
  2413. bail:
  2414. //
  2415. // The entire region could not be allocated.
  2416. // Free any large page subchunks that might have been allocated.
  2417. //
  2418. NextEntry = LargePageListHead.Flink;
  2419. while (NextEntry != &LargePageListHead) {
  2420. LargePageInfo = CONTAINING_RECORD (NextEntry,
  2421. MI_LARGEPAGE_MEMORY_RUN,
  2422. ListEntry);
  2423. NextEntry = NextEntry->Flink;
  2424. RemoveEntryList (&LargePageInfo->ListEntry);
  2425. NewPage = LargePageInfo->BasePage;
  2426. ChunkSize = LargePageInfo->PageCount;
  2427. ASSERT (ChunkSize != 0);
  2428. Pfn1 = MI_PFN_ELEMENT (LargePageInfo->BasePage);
  2429. LOCK_PFN (OldIrql);
  2430. MI_INCREMENT_RESIDENT_AVAILABLE (ChunkSize, MM_RESAVAIL_FREE_LARGE_PAGES);
  2431. do {
  2432. ASSERT (Pfn1->u2.ShareCount == 1);
  2433. ASSERT (Pfn1->u3.e1.PageLocation == ActiveAndValid);
  2434. ASSERT (Pfn1->u3.e1.CacheAttribute == MiCached);
  2435. ASSERT (Pfn1->u3.e1.LargeSessionAllocation == 0);
  2436. ASSERT (Pfn1->u3.e1.PrototypePte == 0);
  2437. ASSERT (Pfn1->u3.e2.ReferenceCount == 1);
  2438. ASSERT (Pfn1->u4.VerifierAllocation == 0);
  2439. ASSERT (Pfn1->u4.AweAllocation == 1);
  2440. Pfn1->u3.e1.StartOfAllocation = 0;
  2441. Pfn1->u3.e1.EndOfAllocation = 0;
  2442. Pfn1->u3.e2.ReferenceCount = 0;
  2443. #if DBG
  2444. Pfn1->u3.e1.PageLocation = StandbyPageList;
  2445. #endif
  2446. MiInsertPageInFreeList (NewPage);
  2447. Pfn1 += 1;
  2448. NewPage += 1;
  2449. ChunkSize -= 1;
  2450. } while (ChunkSize != 0);
  2451. UNLOCK_PFN (OldIrql);
  2452. ExFreePool (LargePageInfo);
  2453. }
  2454. if (ColoredPageInfoBase != NULL) {
  2455. ExFreePool (ColoredPageInfoBase);
  2456. }
  2457. #if (_MI_PAGING_LEVELS >= 3)
  2458. if (PagesNeeded != 0) {
  2459. MI_INCREMENT_RESIDENT_AVAILABLE (PagesNeeded, MM_RESAVAIL_FREE_USER_PAGE_TABLE);
  2460. }
  2461. #endif
  2462. LOCK_WS_UNSAFE (Process);
  2463. ASSERT (AweInfo->VadPhysicalPages >= NumberOfPages);
  2464. AweInfo->VadPhysicalPages -= NumberOfPages;
  2465. UNLOCK_WS_UNSAFE (Process);
  2466. if (ChargedJob == TRUE) {
  2467. PsChangeJobMemoryUsage (PS_JOB_STATUS_REPORT_PHYSICAL_PAGE_CHANGES,
  2468. -(SSIZE_T)NumberOfPages);
  2469. }
  2470. MmUnlockPagableImageSection (ExPageLockHandle);
  2471. return STATUS_INSUFFICIENT_RESOURCES;
  2472. }
  2473. #if (_MI_PAGING_LEVELS >= 3)
  2474. LOCK_WS_UNSAFE (Process);
  2475. while (PointerPpe <= LastPpe) {
  2476. //
  2477. // Pointing to the next page directory page, make
  2478. // it exist and make it valid.
  2479. //
  2480. // Note this ripples sharecounts through the paging hierarchy so
  2481. // there is no need to up sharecounts to prevent trimming of the
  2482. // page directory parent page as making the page directory
  2483. // valid below does this automatically.
  2484. //
  2485. MiMakePdeExistAndMakeValid (PointerPpe, Process, MM_NOIRQL);
  2486. //
  2487. // Up the sharecount so the page directory page will not get
  2488. // trimmed even if it has no currently valid entries.
  2489. //
  2490. PteContents = *PointerPpe;
  2491. ASSERT (PteContents.u.Hard.Valid == 1);
  2492. Pfn1 = MI_PFN_ELEMENT (PteContents.u.Hard.PageFrameNumber);
  2493. LOCK_PFN (OldIrql);
  2494. Pfn1->u2.ShareCount += 1;
  2495. UNLOCK_PFN (OldIrql);
  2496. UsedPageTableHandle = (PVOID) Pfn1;
  2497. //
  2498. // Increment the count of non-zero page directory entries
  2499. // for this page directory - even though this entry is still zero,
  2500. // this is a special case.
  2501. //
  2502. MI_INCREMENT_USED_PTES_BY_HANDLE (UsedPageTableHandle);
  2503. PointerPpe += 1;
  2504. }
  2505. UNLOCK_WS_UNSAFE (Process);
  2506. #endif
  2507. if (ZeroCount != 0) {
  2508. //
  2509. // Zero all the free & standby pages, fanning out the work. This
  2510. // is done even on UP machines because the worker thread code maps
  2511. // large MDLs and is thus better performing than zeroing a single
  2512. // page at a time.
  2513. //
  2514. MiZeroInParallel (ColoredPageInfoBase);
  2515. //
  2516. // Denote that no pages are left to be zeroed because in addition
  2517. // to zeroing them, we have reset all their OriginalPte fields
  2518. // to demand zero so they cannot be walked by the zeroing loop
  2519. // below.
  2520. //
  2521. ZeroCount = 0;
  2522. }
  2523. //
  2524. // Map the now zeroed pages into the caller's user address space.
  2525. //
  2526. MI_MAKE_VALID_PTE (TempPde,
  2527. 0,
  2528. MM_READWRITE,
  2529. MiGetPteAddress (StartingAddress));
  2530. MI_SET_PTE_DIRTY (TempPde);
  2531. MI_SET_ACCESSED_IN_PTE (&TempPde, 1);
  2532. MI_MAKE_PDE_MAP_LARGE_PAGE (&TempPde);
  2533. NextEntry = LargePageListHead.Flink;
  2534. while (NextEntry != &LargePageListHead) {
  2535. LargePageInfo = CONTAINING_RECORD (NextEntry,
  2536. MI_LARGEPAGE_MEMORY_RUN,
  2537. ListEntry);
  2538. NextEntry = NextEntry->Flink;
  2539. RemoveEntryList (&LargePageInfo->ListEntry);
  2540. TempPde.u.Hard.PageFrameNumber = LargePageInfo->BasePage;
  2541. ChunkSize = LargePageInfo->PageCount;
  2542. ASSERT (ChunkSize != 0);
  2543. //
  2544. // Initialize each page directory page. Lock the PFN database to
  2545. // prevent races with concurrent MmProbeAndLockPages calls.
  2546. //
  2547. LastPde = PointerPde + (ChunkSize / (MM_VA_MAPPED_BY_PDE >> PAGE_SHIFT));
  2548. Pfn1 = MI_PFN_ELEMENT (LargePageInfo->BasePage);
  2549. EndPfn = Pfn1 + ChunkSize;
  2550. ASSERT (MiGetPteAddress (PointerPde)->u.Hard.Valid == 1);
  2551. PdeFrame = (PFN_NUMBER) (MiGetPteAddress (PointerPde)->u.Hard.PageFrameNumber);
  2552. LOCK_WS_UNSAFE (Process);
  2553. LOCK_PFN (OldIrql);
  2554. do {
  2555. ASSERT (Pfn1->u4.AweAllocation == 1);
  2556. Pfn1->AweReferenceCount = 1;
  2557. Pfn1->PteAddress = PointerPde; // Point at the allocation base
  2558. MI_SET_PFN_DELETED (Pfn1);
  2559. Pfn1->u4.PteFrame = PdeFrame; // Point at the allocation base
  2560. Pfn1 += 1;
  2561. } while (Pfn1 < EndPfn);
  2562. while (PointerPde < LastPde) {
  2563. ASSERT (PointerPde->u.Long == 0);
  2564. MI_WRITE_VALID_PTE (PointerPde, TempPde);
  2565. TempPde.u.Hard.PageFrameNumber += (MM_VA_MAPPED_BY_PDE >> PAGE_SHIFT);
  2566. PointerPde += 1;
  2567. }
  2568. UNLOCK_PFN (OldIrql);
  2569. UNLOCK_WS_UNSAFE (Process);
  2570. ExFreePool (LargePageInfo);
  2571. }
  2572. MmUnlockPagableImageSection (ExPageLockHandle);
  2573. ExFreePool (ColoredPageInfoBase);
  2574. #if 0
  2575. //
  2576. // Make sure the range really is zero.
  2577. //
  2578. try {
  2579. ASSERT (RtlCompareMemoryUlong (StartingAddress, NumberOfBytes, 0) == NumberOfBytes);
  2580. } except (EXCEPTION_EXECUTE_HANDLER) {
  2581. DbgPrint ("MM: Exception during large page zero compare!\n");
  2582. }
  2583. #endif
  2584. return STATUS_SUCCESS;
  2585. }
  2586. VOID
  2587. MiFreeLargePages (
  2588. IN PVOID StartingAddress,
  2589. IN PVOID EndingAddress
  2590. )
  2591. /*++
  2592. Routine Description:
  2593. This routine deletes page directory and page table pages for a
  2594. user-controlled large page range.
  2595. Arguments:
  2596. StartingAddress - Supplies the starting address of the range.
  2597. EndingAddress - Supplies the ending address of the range.
  2598. Return Value:
  2599. None.
  2600. Environment:
  2601. Kernel mode, APCs disabled, WorkingSetMutex and AddressCreation mutexes
  2602. held.
  2603. --*/
  2604. {
  2605. PAWEINFO AweInfo;
  2606. PMMPTE PointerPde;
  2607. PMMPTE LastPde;
  2608. MMPTE PteContents;
  2609. PEPROCESS CurrentProcess;
  2610. PVOID UsedPageTableHandle;
  2611. PMMPFN Pfn1;
  2612. PFN_NUMBER PageFrameIndex;
  2613. PFN_NUMBER NumberOfPages;
  2614. KIRQL OldIrql;
  2615. #if (_MI_PAGING_LEVELS >= 3)
  2616. PMMPTE LastPpe;
  2617. PMMPTE LastPxe;
  2618. PMMPTE PointerPpe;
  2619. PMMPTE PointerPxe;
  2620. PFN_NUMBER PagesNeeded;
  2621. PVOID TempVa;
  2622. #endif
  2623. CurrentProcess = PsGetCurrentProcess ();
  2624. PointerPde = MiGetPdeAddress (StartingAddress);
  2625. LastPde = MiGetPdeAddress (EndingAddress);
  2626. UsedPageTableHandle = MI_GET_USED_PTES_HANDLE (StartingAddress);
  2627. #if (_MI_PAGING_LEVELS >= 3)
  2628. PointerPxe = MiGetPxeAddress (StartingAddress);
  2629. PointerPpe = MiGetPpeAddress (StartingAddress);
  2630. LastPxe = MiGetPxeAddress (EndingAddress);
  2631. LastPpe = MiGetPpeAddress (EndingAddress);
  2632. //
  2633. // Return resident available pages for all of the page directory
  2634. // pages as they can now be paged again.
  2635. //
  2636. // Note that commitment is not returned here because the VAD release
  2637. // returns commit for the entire paging hierarchy (including the
  2638. // nonexistent page tables).
  2639. //
  2640. PagesNeeded = LastPpe - PointerPpe + 1;
  2641. #if (_MI_PAGING_LEVELS >= 4)
  2642. PagesNeeded += LastPxe - PointerPxe + 1;
  2643. #endif
  2644. ASSERT (PagesNeeded != 0);
  2645. #endif
  2646. MmLockPagableSectionByHandle (ExPageLockHandle);
  2647. //
  2648. // Delete the range mapped by each page directory page.
  2649. //
  2650. while (PointerPde <= LastPde) {
  2651. PteContents = *PointerPde;
  2652. ASSERT (PteContents.u.Hard.Valid == 1);
  2653. ASSERT (MI_PDE_MAPS_LARGE_PAGE (&PteContents) == 1);
  2654. PageFrameIndex = (PFN_NUMBER) PteContents.u.Hard.PageFrameNumber;
  2655. ASSERT (PageFrameIndex != 0);
  2656. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  2657. LOCK_PFN (OldIrql);
  2658. MI_WRITE_INVALID_PTE (PointerPde, ZeroPte);
  2659. UNLOCK_PFN (OldIrql);
  2660. //
  2661. // Flush the mapping so the pages can be immediately reused
  2662. // without any possibility of conflicting TB entries.
  2663. //
  2664. KeFlushProcessTb (FALSE);
  2665. MiFreeLargePageMemory (PageFrameIndex,
  2666. MM_VA_MAPPED_BY_PDE >> PAGE_SHIFT);
  2667. PointerPde += 1;
  2668. }
  2669. #if (_MI_PAGING_LEVELS >= 3)
  2670. LOCK_PFN (OldIrql);
  2671. do {
  2672. //
  2673. // Down the sharecount on the finished page directory page.
  2674. //
  2675. PteContents = *PointerPpe;
  2676. ASSERT (PteContents.u.Hard.Valid == 1);
  2677. Pfn1 = MI_PFN_ELEMENT (PteContents.u.Hard.PageFrameNumber);
  2678. ASSERT (Pfn1->u2.ShareCount > 1);
  2679. Pfn1->u2.ShareCount -= 1;
  2680. UsedPageTableHandle = (PVOID) Pfn1;
  2681. MI_DECREMENT_USED_PTES_BY_HANDLE (UsedPageTableHandle);
  2682. PointerPpe += 1;
  2683. //
  2684. // If all the entries have been eliminated from the previous
  2685. // page directory page, delete the page directory page itself.
  2686. //
  2687. if (MI_GET_USED_PTES_FROM_HANDLE (UsedPageTableHandle) == 0) {
  2688. ASSERT ((PointerPpe - 1)->u.Long != 0);
  2689. #if (_MI_PAGING_LEVELS >= 4)
  2690. UsedPageTableHandle = (PVOID) Pfn1->u4.PteFrame;
  2691. MI_DECREMENT_USED_PTES_BY_HANDLE (UsedPageTableHandle);
  2692. #endif
  2693. TempVa = MiGetVirtualAddressMappedByPte (PointerPpe - 1);
  2694. MiDeletePte (PointerPpe - 1,
  2695. TempVa,
  2696. FALSE,
  2697. CurrentProcess,
  2698. NULL,
  2699. NULL,
  2700. OldIrql);
  2701. #if (_MI_PAGING_LEVELS >= 4)
  2702. if ((MiIsPteOnPdeBoundary(PointerPpe)) || (PointerPpe > LastPpe)) {
  2703. if (MI_GET_USED_PTES_FROM_HANDLE (UsedPageTableHandle) == 0) {
  2704. PointerPxe = MiGetPteAddress (PointerPpe - 1);
  2705. ASSERT (PointerPxe->u.Long != 0);
  2706. TempVa = MiGetVirtualAddressMappedByPte (PointerPxe);
  2707. MiDeletePte (PointerPxe,
  2708. TempVa,
  2709. FALSE,
  2710. CurrentProcess,
  2711. NULL,
  2712. NULL,
  2713. OldIrql);
  2714. }
  2715. }
  2716. #endif
  2717. }
  2718. } while (PointerPpe <= LastPpe);
  2719. UNLOCK_PFN (OldIrql);
  2720. MI_INCREMENT_RESIDENT_AVAILABLE (PagesNeeded, MM_RESAVAIL_FREE_USER_PAGE_TABLE);
  2721. #endif
  2722. MmUnlockPagableImageSection (ExPageLockHandle);
  2723. NumberOfPages = BYTES_TO_PAGES ((PCHAR)EndingAddress + 1 - (PCHAR)StartingAddress);
  2724. //
  2725. // The per-process WS lock guards updates to AweInfo->VadPhysicalPages.
  2726. //
  2727. AweInfo = (PAWEINFO) CurrentProcess->AweInfo;
  2728. ASSERT (AweInfo->VadPhysicalPages >= NumberOfPages);
  2729. AweInfo->VadPhysicalPages -= NumberOfPages;
  2730. if (CurrentProcess->JobStatus & PS_JOB_STATUS_REPORT_PHYSICAL_PAGE_CHANGES) {
  2731. PsChangeJobMemoryUsage (PS_JOB_STATUS_REPORT_PHYSICAL_PAGE_CHANGES,
  2732. -(SSIZE_T)NumberOfPages);
  2733. }
  2734. //
  2735. // All done, return.
  2736. //
  2737. return;
  2738. }
  2739. PFN_NUMBER
  2740. MmSetPhysicalPagesLimit (
  2741. IN PFN_NUMBER NewPhysicalPagesLimit
  2742. )
  2743. /*++
  2744. Routine Description:
  2745. This routine sets a physical page allocation limit for the current process.
  2746. This is the limit of AWE and large page allocations.
  2747. Note the process may already be over the new limit at the time this routine
  2748. is called. If so, no new AWE or large page allocations will succeed until
  2749. existing allocations are freed such that the process satisfies the
  2750. new limit.
  2751. Arguments:
  2752. NewPhysicalPagesLimit - Supplies the new limit to be enforced or zero if the
  2753. caller is simply querying for an existing limit.
  2754. Return Value:
  2755. The physical pages limit in effect upon return from this routine.
  2756. Environment:
  2757. Kernel mode, APC_LEVEL or below.
  2758. --*/
  2759. {
  2760. PAWEINFO AweInfo;
  2761. PEPROCESS Process;
  2762. Process = PsGetCurrentProcess ();
  2763. PAGED_CODE ();
  2764. LOCK_WS (Process);
  2765. AweInfo = (PAWEINFO) Process->AweInfo;
  2766. if (AweInfo != NULL) {
  2767. if (NewPhysicalPagesLimit != 0) {
  2768. AweInfo->VadPhysicalPagesLimit = NewPhysicalPagesLimit;
  2769. }
  2770. else {
  2771. NewPhysicalPagesLimit = AweInfo->VadPhysicalPagesLimit;
  2772. }
  2773. }
  2774. else {
  2775. NewPhysicalPagesLimit = 0;
  2776. }
  2777. UNLOCK_WS (Process);
  2778. return NewPhysicalPagesLimit;
  2779. }