Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2846 lines
78 KiB

  1. /*++
  2. Copyright (c) 1989 Microsoft Corporation
  3. Module Name:
  4. physical.c
  5. Abstract:
  6. This module contains the routines to manipulate physical memory from
  7. user space.
  8. There are restrictions on how user controlled physical memory can be used.
  9. Realize that all this memory is nonpaged and hence applications should
  10. allocate this with care as it represents a very real system resource.
  11. Virtual memory which maps user controlled physical memory pages must be :
  12. 1. Private memory only (ie: cannot be shared between processes).
  13. 2. The same physical page cannot be mapped at 2 different virtual
  14. addresses.
  15. 3. Callers must have LOCK_VM privilege to create these VADs.
  16. 4. Device drivers cannot call MmSecureVirtualMemory on it - this means
  17. that applications should not expect to use this memory for win32k.sys
  18. calls.
  19. 5. NtProtectVirtualMemory only allows read-write protection on this
  20. memory. No other protection (no access, guard pages, readonly, etc)
  21. are allowed.
  22. 6. NtFreeVirtualMemory allows only MEM_RELEASE and NOT MEM_DECOMMIT on
  23. these VADs. Even MEM_RELEASE is only allowed on entire VAD ranges -
  24. that is, splitting of these VADs is not allowed.
  25. 7. fork() style child processes don't inherit physical VADs.
  26. 8. The physical pages in these VADs are not subject to job limits.
  27. Author:
  28. Landy Wang (landyw) 25-Jan-1999
  29. Revision History:
  30. --*/
  31. #include "mi.h"
  32. #ifdef ALLOC_PRAGMA
  33. #pragma alloc_text(PAGE,NtMapUserPhysicalPages)
  34. #pragma alloc_text(PAGE,NtMapUserPhysicalPagesScatter)
  35. #pragma alloc_text(PAGE,MiRemoveUserPhysicalPagesVad)
  36. #pragma alloc_text(PAGE,MiAllocateAweInfo)
  37. #pragma alloc_text(PAGE,MiCleanPhysicalProcessPages)
  38. #pragma alloc_text(PAGE,NtAllocateUserPhysicalPages)
  39. #pragma alloc_text(PAGE,NtFreeUserPhysicalPages)
  40. #pragma alloc_text(PAGE,MiAweViewInserter)
  41. #pragma alloc_text(PAGE,MiAweViewRemover)
  42. #endif
  43. //
  44. // This local stack size definition is deliberately large as ISVs have told
  45. // us they expect to typically do up to this amount.
  46. //
  47. #define COPY_STACK_SIZE 1024
  48. #define SMALL_COPY_STACK_SIZE 512
  49. #define BITS_IN_ULONG ((sizeof (ULONG)) * 8)
  50. #define LOWEST_USABLE_PHYSICAL_ADDRESS (16 * 1024 * 1024)
  51. #define LOWEST_USABLE_PHYSICAL_PAGE (LOWEST_USABLE_PHYSICAL_ADDRESS >> PAGE_SHIFT)
  52. #define LOWEST_BITMAP_PHYSICAL_PAGE 0
  53. #define MI_FRAME_TO_BITMAP_INDEX(x) ((ULONG)(x))
  54. #define MI_BITMAP_INDEX_TO_FRAME(x) ((ULONG)(x))
  55. PFN_NUMBER MmVadPhysicalPages;
  56. #if DBG
  57. LOGICAL MiUsingLowPagesForAwe = FALSE;
  58. #endif
  59. NTSTATUS
  60. NtMapUserPhysicalPages (
  61. IN PVOID VirtualAddress,
  62. IN ULONG_PTR NumberOfPages,
  63. IN PULONG_PTR UserPfnArray OPTIONAL
  64. )
  65. /*++
  66. Routine Description:
  67. This function maps the specified nonpaged physical pages into the specified
  68. user address range.
  69. Note no WSLEs are maintained for this range as it is all nonpaged.
  70. Arguments:
  71. VirtualAddress - Supplies a user virtual address within a UserPhysicalPages
  72. Vad.
  73. NumberOfPages - Supplies the number of pages to map.
  74. UserPfnArray - Supplies a pointer to the page frame numbers to map in.
  75. If this is zero, then the virtual addresses are set to
  76. NO_ACCESS.
  77. Return Value:
  78. Various NTSTATUS codes.
  79. --*/
  80. {
  81. KIRQL OldIrql;
  82. ULONG_PTR OldValue;
  83. ULONG_PTR NewValue;
  84. PAWEINFO AweInfo;
  85. PULONG BitBuffer;
  86. PEPROCESS Process;
  87. PMMPTE PointerPte;
  88. PMMPTE LastPte;
  89. PVOID EndAddress;
  90. PFN_NUMBER PageFrameIndex;
  91. PMMPFN Pfn1;
  92. NTSTATUS Status;
  93. MMPTE_FLUSH_LIST PteFlushList;
  94. PVOID PoolArea;
  95. PVOID PoolAreaEnd;
  96. PPFN_NUMBER FrameList;
  97. ULONG BitMapIndex;
  98. ULONG_PTR StackArray[COPY_STACK_SIZE];
  99. MMPTE OldPteContents;
  100. MMPTE OriginalPteContents;
  101. MMPTE NewPteContents;
  102. MMPTE JunkPte;
  103. ULONG_PTR NumberOfBytes;
  104. ULONG SizeOfBitMap;
  105. PRTL_BITMAP BitMap;
  106. PLIST_ENTRY NextEntry;
  107. PMI_PHYSICAL_VIEW PhysicalView;
  108. PEX_PUSH_LOCK PushLock;
  109. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  110. if (NumberOfPages > (MAXULONG_PTR / PAGE_SIZE)) {
  111. return STATUS_INVALID_PARAMETER_2;
  112. }
  113. VirtualAddress = PAGE_ALIGN(VirtualAddress);
  114. EndAddress = (PVOID)((PCHAR)VirtualAddress + (NumberOfPages << PAGE_SHIFT) -1);
  115. if (EndAddress <= VirtualAddress) {
  116. return STATUS_INVALID_PARAMETER_2;
  117. }
  118. //
  119. // Carefully probe and capture all user parameters.
  120. //
  121. FrameList = NULL;
  122. PoolArea = (PVOID)&StackArray[0];
  123. if (ARGUMENT_PRESENT(UserPfnArray)) {
  124. //
  125. // Check for zero pages here so the loops further down can be optimized
  126. // taking into account this can never happen.
  127. //
  128. if (NumberOfPages == 0) {
  129. return STATUS_SUCCESS;
  130. }
  131. NumberOfBytes = NumberOfPages * sizeof(ULONG_PTR);
  132. if (NumberOfPages > COPY_STACK_SIZE) {
  133. PoolArea = ExAllocatePoolWithTag (NonPagedPool,
  134. NumberOfBytes,
  135. 'wRmM');
  136. if (PoolArea == NULL) {
  137. return STATUS_INSUFFICIENT_RESOURCES;
  138. }
  139. }
  140. //
  141. // Capture the specified page frame numbers.
  142. //
  143. try {
  144. ProbeForRead (UserPfnArray,
  145. NumberOfBytes,
  146. sizeof(ULONG_PTR));
  147. RtlCopyMemory (PoolArea, UserPfnArray, NumberOfBytes);
  148. } except(EXCEPTION_EXECUTE_HANDLER) {
  149. if (PoolArea != (PVOID)&StackArray[0]) {
  150. ExFreePool (PoolArea);
  151. }
  152. return GetExceptionCode();
  153. }
  154. FrameList = (PPFN_NUMBER)PoolArea;
  155. }
  156. PoolAreaEnd = (PVOID)((PULONG_PTR)PoolArea + NumberOfPages);
  157. PointerPte = MiGetPteAddress (VirtualAddress);
  158. LastPte = PointerPte + NumberOfPages;
  159. Process = PsGetCurrentProcess ();
  160. PageFrameIndex = 0;
  161. //
  162. // Initialize as much as possible before acquiring any locks.
  163. //
  164. MI_MAKE_VALID_PTE (NewPteContents,
  165. PageFrameIndex,
  166. MM_READWRITE,
  167. PointerPte);
  168. MI_SET_PTE_DIRTY (NewPteContents);
  169. PteFlushList.Count = 0;
  170. //
  171. // A memory barrier is needed to read the EPROCESS AweInfo field
  172. // in order to ensure the writes to the AweInfo structure fields are
  173. // visible in correct order. This avoids the need to acquire any
  174. // stronger synchronization (ie: spinlock/pushlock, etc) in the interest
  175. // of best performance.
  176. //
  177. KeMemoryBarrier ();
  178. AweInfo = (PAWEINFO) Process->AweInfo;
  179. //
  180. // The physical pages bitmap must exist.
  181. //
  182. if ((AweInfo == NULL) || (AweInfo->VadPhysicalPagesBitMap == NULL)) {
  183. if (PoolArea != (PVOID)&StackArray[0]) {
  184. ExFreePool (PoolArea);
  185. }
  186. return STATUS_INVALID_PARAMETER_1;
  187. }
  188. //
  189. // Block APCs to prevent recursive pushlock scenarios as this is not
  190. // supported.
  191. //
  192. KeRaiseIrql (APC_LEVEL, &OldIrql);
  193. //
  194. // Pushlock protection protects insertion/removal of Vads into each process'
  195. // AweVadList. It also protects creation/deletion and adds/removes
  196. // of the VadPhysicalPagesBitMap. Finally, it protects the PFN
  197. // modifications for pages in the bitmap.
  198. //
  199. PushLock = ExAcquireCacheAwarePushLockShared (AweInfo->PushLock);
  200. BitMap = AweInfo->VadPhysicalPagesBitMap;
  201. ASSERT (BitMap != NULL);
  202. //
  203. // Note that the push lock is sufficient to traverse this list.
  204. //
  205. NextEntry = AweInfo->AweVadList.Flink;
  206. //
  207. // Note the compiler generates much better code with the syntax below
  208. // than with "while (NextEntry != &AweInfo->AweVadList) {"
  209. //
  210. do {
  211. if (NextEntry == &AweInfo->AweVadList) {
  212. //
  213. // No virtual address is reserved at the specified base address,
  214. // return an error.
  215. //
  216. Status = STATUS_INVALID_PARAMETER_1;
  217. goto ErrorReturn;
  218. }
  219. PhysicalView = CONTAINING_RECORD(NextEntry,
  220. MI_PHYSICAL_VIEW,
  221. ListEntry);
  222. ASSERT (PhysicalView->u.LongFlags == MI_PHYSICAL_VIEW_AWE);
  223. ASSERT (PhysicalView->Vad->u.VadFlags.UserPhysicalPages == 1);
  224. if ((VirtualAddress >= (PVOID)PhysicalView->StartVa) &&
  225. (EndAddress <= (PVOID)PhysicalView->EndVa)) {
  226. break;
  227. }
  228. NextEntry = NextEntry->Flink;
  229. } while (TRUE);
  230. //
  231. // Ensure the PFN element corresponding to each specified page is owned
  232. // by the specified VAD.
  233. //
  234. // Since this ownership can only be changed while holding this process'
  235. // working set lock, the PFN can be scanned here without holding the PFN
  236. // lock.
  237. //
  238. // Note the PFN lock is not needed because any race with MmProbeAndLockPages
  239. // can only result in the I/O going to the old page or the new page.
  240. // If the user breaks the rules, the PFN database (and any pages being
  241. // windowed here) are still protected because of the reference counts
  242. // on the pages with inprogress I/O. This is possible because NO pages
  243. // are actually freed here - they are just windowed.
  244. //
  245. if (ARGUMENT_PRESENT(UserPfnArray)) {
  246. //
  247. // By keeping the PFN bitmap in the VAD (instead of in the PFN
  248. // database itself), a few benefits are realized:
  249. //
  250. // 1. No need to acquire the PFN lock here.
  251. // 2. Faster handling of PFN databases with holes.
  252. // 3. Transparent support for dynamic PFN database growth.
  253. // 4. Less nonpaged memory is used (for the bitmap vs adding a
  254. // field to the PFN) on systems with no unused pack space in
  255. // the PFN database, presuming not many of these VADs get
  256. // allocated.
  257. //
  258. //
  259. // The first pass here ensures all the frames are secure.
  260. //
  261. //
  262. // N.B. This implies that PFN_NUMBER is always ULONG_PTR in width
  263. // as PFN_NUMBER is not exposed to application code today.
  264. //
  265. SizeOfBitMap = BitMap->SizeOfBitMap;
  266. BitBuffer = BitMap->Buffer;
  267. do {
  268. PageFrameIndex = *FrameList;
  269. //
  270. // Frames past the end of the bitmap are not allowed.
  271. //
  272. BitMapIndex = MI_FRAME_TO_BITMAP_INDEX(PageFrameIndex);
  273. #if defined (_WIN64)
  274. //
  275. // Ensure the frame is a 32-bit number.
  276. //
  277. if (BitMapIndex != PageFrameIndex) {
  278. Status = STATUS_CONFLICTING_ADDRESSES;
  279. goto ErrorReturn0;
  280. }
  281. #endif
  282. if (BitMapIndex >= SizeOfBitMap) {
  283. Status = STATUS_CONFLICTING_ADDRESSES;
  284. goto ErrorReturn0;
  285. }
  286. //
  287. // Frames not in the bitmap are not allowed.
  288. //
  289. if (MI_CHECK_BIT (BitBuffer, BitMapIndex) == 0) {
  290. Status = STATUS_CONFLICTING_ADDRESSES;
  291. goto ErrorReturn0;
  292. }
  293. //
  294. // The frame must not be already mapped anywhere.
  295. // Or be passed in twice in different spots in the array.
  296. //
  297. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  298. ASSERT (MI_PFN_IS_AWE (Pfn1));
  299. OldValue = Pfn1->u2.ShareCount;
  300. if (OldValue != 1) {
  301. Status = STATUS_INVALID_PARAMETER_3;
  302. goto ErrorReturn0;
  303. }
  304. NewValue = OldValue + 2;
  305. //
  306. // Mark the frame as "about to be mapped".
  307. //
  308. #if defined (_WIN64)
  309. OldValue = InterlockedCompareExchange64 ((PLONGLONG)&Pfn1->u2.ShareCount,
  310. (LONGLONG)NewValue,
  311. (LONGLONG)OldValue);
  312. #else
  313. OldValue = InterlockedCompareExchange ((PLONG)&Pfn1->u2.ShareCount,
  314. NewValue,
  315. OldValue);
  316. #endif
  317. if (OldValue != 1) {
  318. Status = STATUS_INVALID_PARAMETER_3;
  319. goto ErrorReturn0;
  320. }
  321. ASSERT (MI_PFN_IS_AWE (Pfn1));
  322. ASSERT (Pfn1->u2.ShareCount == 3);
  323. ASSERT ((PageFrameIndex >= LOWEST_USABLE_PHYSICAL_PAGE) ||
  324. (MiUsingLowPagesForAwe == TRUE));
  325. FrameList += 1;
  326. } while (FrameList < (PPFN_NUMBER) PoolAreaEnd);
  327. //
  328. // This pass actually inserts them all into the page table pages and
  329. // the TBs now that we know the frames are good. Check the PTEs and
  330. // PFNs carefully as a malicious user may issue more than one remap
  331. // request for all or portions of the same region simultaneously.
  332. //
  333. FrameList = (PPFN_NUMBER)PoolArea;
  334. do {
  335. PageFrameIndex = *FrameList;
  336. NewPteContents.u.Hard.PageFrameNumber = PageFrameIndex;
  337. do {
  338. OldPteContents = *PointerPte;
  339. OriginalPteContents.u.Long = InterlockedCompareExchangePte (
  340. PointerPte,
  341. NewPteContents.u.Long,
  342. OldPteContents.u.Long);
  343. } while (OriginalPteContents.u.Long != OldPteContents.u.Long);
  344. //
  345. // The PTE is now pointing at the new frame. Note that another
  346. // thread can immediately access the page contents via this PTE
  347. // even though they're not supposed to until this API returns.
  348. // Thus, the page frames are handled carefully so that malicious
  349. // apps cannot corrupt frames they don't really still or yet own.
  350. //
  351. if (OldPteContents.u.Hard.Valid == 1) {
  352. //
  353. // The old frame was mapped so the TB entry must be flushed.
  354. // Note the app could maliciously dirty data in the old frame
  355. // until the TB flush completes, so don't allow frame reuse
  356. // till then (although allowing remapping within this process
  357. // is ok).
  358. //
  359. Pfn1 = MI_PFN_ELEMENT (OldPteContents.u.Hard.PageFrameNumber);
  360. ASSERT (Pfn1->PteAddress != NULL);
  361. ASSERT (Pfn1->u2.ShareCount == 2);
  362. //
  363. // Carefully clear the PteAddress before decrementing the share
  364. // count.
  365. //
  366. Pfn1->PteAddress = NULL;
  367. InterlockedExchangeAddSizeT (&Pfn1->u2.ShareCount, -1);
  368. if (PteFlushList.Count != MM_MAXIMUM_FLUSH_COUNT) {
  369. PteFlushList.FlushVa[PteFlushList.Count] = VirtualAddress;
  370. PteFlushList.FlushPte[PteFlushList.Count] = &JunkPte;
  371. PteFlushList.Count += 1;
  372. }
  373. }
  374. //
  375. // Update counters for the new frame we just put in the PTE and
  376. // TB.
  377. //
  378. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  379. ASSERT (Pfn1->PteAddress == NULL);
  380. ASSERT (Pfn1->u2.ShareCount == 3);
  381. Pfn1->PteAddress = PointerPte;
  382. InterlockedExchangeAddSizeT (&Pfn1->u2.ShareCount, -1);
  383. VirtualAddress = (PVOID)((PCHAR)VirtualAddress + PAGE_SIZE);
  384. PointerPte += 1;
  385. FrameList += 1;
  386. } while (FrameList < (PPFN_NUMBER) PoolAreaEnd);
  387. }
  388. else {
  389. //
  390. // Set the specified virtual address range to no access.
  391. //
  392. while (PointerPte < LastPte) {
  393. do {
  394. OldPteContents = *PointerPte;
  395. OriginalPteContents.u.Long = InterlockedCompareExchangePte (
  396. PointerPte,
  397. ZeroPte.u.Long,
  398. OldPteContents.u.Long);
  399. } while (OriginalPteContents.u.Long != OldPteContents.u.Long);
  400. //
  401. // The PTE has been cleared. Note that another thread can still
  402. // be accessing the page contents via the stale PTE until the TB
  403. // entry is flushed even though they're not supposed to.
  404. // Thus, the page frames are handled carefully so that malicious
  405. // apps cannot corrupt frames they don't still own.
  406. //
  407. if (OldPteContents.u.Hard.Valid == 1) {
  408. //
  409. // The old frame was mapped so the TB entry must be flushed.
  410. // Note the app could maliciously dirty data in the old frame
  411. // until the TB flush completes, so don't allow frame reuse
  412. // till then (although allowing remapping within this process
  413. // is ok).
  414. //
  415. Pfn1 = MI_PFN_ELEMENT (OldPteContents.u.Hard.PageFrameNumber);
  416. ASSERT (MI_PFN_IS_AWE (Pfn1));
  417. ASSERT (Pfn1->PteAddress != NULL);
  418. ASSERT (Pfn1->u2.ShareCount == 2);
  419. Pfn1->PteAddress = NULL;
  420. InterlockedExchangeAddSizeT (&Pfn1->u2.ShareCount, -1);
  421. if (PteFlushList.Count != MM_MAXIMUM_FLUSH_COUNT) {
  422. PteFlushList.FlushVa[PteFlushList.Count] = VirtualAddress;
  423. PteFlushList.FlushPte[PteFlushList.Count] = &JunkPte;
  424. PteFlushList.Count += 1;
  425. }
  426. }
  427. VirtualAddress = (PVOID)((PCHAR)VirtualAddress + PAGE_SIZE);
  428. PointerPte += 1;
  429. }
  430. }
  431. ExReleaseCacheAwarePushLockShared (PushLock);
  432. KeLowerIrql (OldIrql);
  433. //
  434. // Flush the TB entries for any relevant pages. Note this can be done
  435. // without holding the AWE push lock because the PTEs have already been
  436. // filled so any concurrent (bogus) map/unmap call will see the right
  437. // entries. AND any free of the physical pages will also see the right
  438. // entries (although the free must do a TB flush while holding the AWE
  439. // push lock exclusive to ensure no thread gets to continue using a
  440. // stale mapping to the page being freed prior to the flush below).
  441. //
  442. if (PteFlushList.Count != 0) {
  443. MiFlushPteList (&PteFlushList, FALSE, ZeroPte);
  444. }
  445. if (PoolArea != (PVOID)&StackArray[0]) {
  446. ExFreePool (PoolArea);
  447. }
  448. return STATUS_SUCCESS;
  449. ErrorReturn0:
  450. while (FrameList > (PPFN_NUMBER)PoolArea) {
  451. FrameList -= 1;
  452. PageFrameIndex = *FrameList;
  453. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  454. ASSERT (Pfn1->u2.ShareCount == 3);
  455. Pfn1->u2.ShareCount = 1;
  456. }
  457. ErrorReturn:
  458. ExReleaseCacheAwarePushLockShared (PushLock);
  459. KeLowerIrql (OldIrql);
  460. if (PoolArea != (PVOID)&StackArray[0]) {
  461. ExFreePool (PoolArea);
  462. }
  463. return Status;
  464. }
  465. NTSTATUS
  466. NtMapUserPhysicalPagesScatter (
  467. IN PVOID *VirtualAddresses,
  468. IN ULONG_PTR NumberOfPages,
  469. IN PULONG_PTR UserPfnArray OPTIONAL
  470. )
  471. /*++
  472. Routine Description:
  473. This function maps the specified nonpaged physical pages into the specified
  474. user address range.
  475. Note no WSLEs are maintained for this range as it is all nonpaged.
  476. Arguments:
  477. VirtualAddresses - Supplies a pointer to an array of user virtual addresses
  478. within UserPhysicalPages Vads. Each array entry is
  479. presumed to map a single page.
  480. NumberOfPages - Supplies the number of pages to map.
  481. UserPfnArray - Supplies a pointer to the page frame numbers to map in.
  482. If this is zero, then the virtual addresses are set to
  483. NO_ACCESS. If the array entry is zero then just the
  484. corresponding virtual address is set to NO_ACCESS.
  485. Return Value:
  486. Various NTSTATUS codes.
  487. --*/
  488. {
  489. KIRQL OldIrql;
  490. ULONG_PTR OldValue;
  491. ULONG_PTR NewValue;
  492. PULONG BitBuffer;
  493. PAWEINFO AweInfo;
  494. PEPROCESS Process;
  495. PMMPTE PointerPte;
  496. PFN_NUMBER PageFrameIndex;
  497. PMMPFN Pfn1;
  498. NTSTATUS Status;
  499. MMPTE_FLUSH_LIST PteFlushList;
  500. PVOID PoolArea;
  501. PVOID PoolAreaEnd;
  502. PVOID *PoolVirtualArea;
  503. PVOID *PoolVirtualAreaBase;
  504. PVOID *PoolVirtualAreaEnd;
  505. PPFN_NUMBER FrameList;
  506. ULONG BitMapIndex;
  507. PVOID StackVirtualArray[SMALL_COPY_STACK_SIZE];
  508. ULONG_PTR StackArray[SMALL_COPY_STACK_SIZE];
  509. MMPTE OriginalPteContents;
  510. MMPTE OldPteContents;
  511. MMPTE NewPteContents0;
  512. MMPTE NewPteContents;
  513. MMPTE JunkPte;
  514. ULONG_PTR NumberOfBytes;
  515. PRTL_BITMAP BitMap;
  516. PLIST_ENTRY NextEntry;
  517. PLIST_ENTRY FirstEntry;
  518. PMI_PHYSICAL_VIEW PhysicalView;
  519. PVOID VirtualAddress;
  520. ULONG SizeOfBitMap;
  521. PEX_PUSH_LOCK PushLock;
  522. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  523. if (NumberOfPages > (MAXULONG_PTR / PAGE_SIZE)) {
  524. return STATUS_INVALID_PARAMETER_2;
  525. }
  526. //
  527. // Carefully probe and capture the user virtual address array.
  528. //
  529. PoolArea = (PVOID)&StackArray[0];
  530. PoolVirtualAreaBase = (PVOID)&StackVirtualArray[0];
  531. NumberOfBytes = NumberOfPages * sizeof(PVOID);
  532. if (NumberOfPages > SMALL_COPY_STACK_SIZE) {
  533. PoolVirtualAreaBase = ExAllocatePoolWithTag (NonPagedPool,
  534. NumberOfBytes,
  535. 'wRmM');
  536. if (PoolVirtualAreaBase == NULL) {
  537. return STATUS_INSUFFICIENT_RESOURCES;
  538. }
  539. }
  540. PoolVirtualArea = PoolVirtualAreaBase;
  541. try {
  542. ProbeForRead (VirtualAddresses,
  543. NumberOfBytes,
  544. sizeof(PVOID));
  545. RtlCopyMemory (PoolVirtualArea, VirtualAddresses, NumberOfBytes);
  546. } except(EXCEPTION_EXECUTE_HANDLER) {
  547. Status = GetExceptionCode();
  548. goto ErrorReturn;
  549. }
  550. //
  551. // Check for zero pages here so the loops further down can be optimized
  552. // taking into account this can never happen.
  553. //
  554. if (NumberOfPages == 0) {
  555. return STATUS_SUCCESS;
  556. }
  557. //
  558. // Carefully probe and capture the user PFN array.
  559. //
  560. if (ARGUMENT_PRESENT(UserPfnArray)) {
  561. NumberOfBytes = NumberOfPages * sizeof(ULONG_PTR);
  562. if (NumberOfPages > SMALL_COPY_STACK_SIZE) {
  563. PoolArea = ExAllocatePoolWithTag (NonPagedPool,
  564. NumberOfBytes,
  565. 'wRmM');
  566. if (PoolArea == NULL) {
  567. PoolArea = (PVOID)&StackArray[0];
  568. Status = STATUS_INSUFFICIENT_RESOURCES;
  569. goto ErrorReturn;
  570. }
  571. }
  572. //
  573. // Capture the specified page frame numbers.
  574. //
  575. try {
  576. ProbeForRead (UserPfnArray,
  577. NumberOfBytes,
  578. sizeof(ULONG_PTR));
  579. RtlCopyMemory (PoolArea, UserPfnArray, NumberOfBytes);
  580. } except(EXCEPTION_EXECUTE_HANDLER) {
  581. Status = GetExceptionCode();
  582. goto ErrorReturn;
  583. }
  584. }
  585. PoolAreaEnd = (PVOID)((PULONG_PTR)PoolArea + NumberOfPages);
  586. Process = PsGetCurrentProcess();
  587. //
  588. // Initialize as much as possible before acquiring any locks.
  589. //
  590. PageFrameIndex = 0;
  591. PhysicalView = NULL;
  592. PteFlushList.Count = 0;
  593. FrameList = (PPFN_NUMBER)PoolArea;
  594. ASSERT (NumberOfPages != 0);
  595. PoolVirtualAreaEnd = PoolVirtualAreaBase + NumberOfPages;
  596. MI_MAKE_VALID_PTE (NewPteContents0,
  597. PageFrameIndex,
  598. MM_READWRITE,
  599. MiGetPteAddress(PoolVirtualArea[0]));
  600. MI_SET_PTE_DIRTY (NewPteContents0);
  601. Status = STATUS_SUCCESS;
  602. //
  603. // A memory barrier is needed to read the EPROCESS AweInfo field
  604. // in order to ensure the writes to the AweInfo structure fields are
  605. // visible in correct order. This avoids the need to acquire any
  606. // stronger synchronization (ie: spinlock/pushlock, etc) in the interest
  607. // of best performance.
  608. //
  609. KeMemoryBarrier ();
  610. AweInfo = (PAWEINFO) Process->AweInfo;
  611. //
  612. // The physical pages bitmap must exist.
  613. //
  614. if ((AweInfo == NULL) || (AweInfo->VadPhysicalPagesBitMap == NULL)) {
  615. Status = STATUS_INVALID_PARAMETER_1;
  616. goto ErrorReturn;
  617. }
  618. //
  619. // Block APCs to prevent recursive pushlock scenarios as this is not
  620. // supported.
  621. //
  622. KeRaiseIrql (APC_LEVEL, &OldIrql);
  623. //
  624. // Pushlock protection protects insertion/removal of Vads into each process'
  625. // AweVadList. It also protects creation/deletion and adds/removes
  626. // of the VadPhysicalPagesBitMap. Finally, it protects the PFN
  627. // modifications for pages in the bitmap.
  628. //
  629. PushLock = ExAcquireCacheAwarePushLockShared (AweInfo->PushLock);
  630. BitMap = AweInfo->VadPhysicalPagesBitMap;
  631. ASSERT (BitMap != NULL);
  632. //
  633. // Note that the PFN lock is not needed to traverse this list (even though
  634. // MmProbeAndLockPages uses it), because the pushlock has been acquired.
  635. //
  636. // The AweVadList should typically have just one entry - the view
  637. // we're looking for, so this traverse should be quick.
  638. //
  639. //
  640. // Snap the first entry now so compares in the loop save an indirect
  641. // reference as we know it can't change. Check it for being empty now
  642. // so that also doesn't need to be checked in the loop.
  643. //
  644. FirstEntry = AweInfo->AweVadList.Flink;
  645. if (FirstEntry == &AweInfo->AweVadList) {
  646. //
  647. // No AWE Vads exist - return an error.
  648. //
  649. ExReleaseCacheAwarePushLockShared (PushLock);
  650. KeLowerIrql (OldIrql);
  651. Status = STATUS_INVALID_PARAMETER_1;
  652. goto ErrorReturn;
  653. }
  654. PhysicalView = CONTAINING_RECORD (FirstEntry, MI_PHYSICAL_VIEW, ListEntry);
  655. do {
  656. VirtualAddress = *PoolVirtualArea;
  657. //
  658. // Check the last physical view interrogated (hint) first.
  659. //
  660. ASSERT (PhysicalView->u.LongFlags == MI_PHYSICAL_VIEW_AWE);
  661. ASSERT (PhysicalView->Vad->u.VadFlags.UserPhysicalPages == 1);
  662. if ((VirtualAddress >= (PVOID)PhysicalView->StartVa) &&
  663. (VirtualAddress <= (PVOID)PhysicalView->EndVa)) {
  664. //
  665. // The virtual address is within the hint so it's good.
  666. //
  667. PoolVirtualArea += 1;
  668. continue;
  669. }
  670. NextEntry = FirstEntry;
  671. //
  672. // Note the compiler generates much better code with the syntax below
  673. // than with "while (NextEntry != &AweInfo->AweVadList) {"
  674. //
  675. do {
  676. if (NextEntry == &AweInfo->AweVadList) {
  677. //
  678. // No virtual address is reserved at the specified base address,
  679. // return an error.
  680. //
  681. ExReleaseCacheAwarePushLockShared (PushLock);
  682. KeLowerIrql (OldIrql);
  683. Status = STATUS_INVALID_PARAMETER_1;
  684. goto ErrorReturn;
  685. }
  686. PhysicalView = CONTAINING_RECORD (NextEntry,
  687. MI_PHYSICAL_VIEW,
  688. ListEntry);
  689. ASSERT (PhysicalView->Vad->u.VadFlags.UserPhysicalPages == 1);
  690. ASSERT (PhysicalView->u.LongFlags == MI_PHYSICAL_VIEW_AWE);
  691. if ((VirtualAddress >= (PVOID)PhysicalView->StartVa) &&
  692. (VirtualAddress <= (PVOID)PhysicalView->EndVa)) {
  693. break;
  694. }
  695. NextEntry = NextEntry->Flink;
  696. } while (TRUE);
  697. PoolVirtualArea += 1;
  698. } while (PoolVirtualArea < PoolVirtualAreaEnd);
  699. //
  700. // Ensure the PFN element corresponding to each specified page is owned
  701. // by the specified VAD.
  702. //
  703. // Since this ownership can only be changed while holding this process'
  704. // working set lock, the PFN can be scanned here without holding the PFN
  705. // lock.
  706. //
  707. // Note the PFN lock is not needed because any race with MmProbeAndLockPages
  708. // can only result in the I/O going to the old page or the new page.
  709. // If the user breaks the rules, the PFN database (and any pages being
  710. // windowed here) are still protected because of the reference counts
  711. // on the pages with inprogress I/O. This is possible because NO pages
  712. // are actually freed here - they are just windowed.
  713. //
  714. PoolVirtualArea = PoolVirtualAreaBase;
  715. if (ARGUMENT_PRESENT(UserPfnArray)) {
  716. //
  717. // By keeping the PFN bitmap in the process (instead of in the PFN
  718. // database itself), a few benefits are realized:
  719. //
  720. // 1. No need to acquire the PFN lock here.
  721. // 2. Faster handling of PFN databases with holes.
  722. // 3. Transparent support for dynamic PFN database growth.
  723. // 4. Less nonpaged memory is used (for the bitmap vs adding a
  724. // field to the PFN) on systems with no unused pack space in
  725. // the PFN database.
  726. //
  727. //
  728. // The first pass here ensures all the frames are secure.
  729. //
  730. //
  731. // N.B. This implies that PFN_NUMBER is always ULONG_PTR in width
  732. // as PFN_NUMBER is not exposed to application code today.
  733. //
  734. SizeOfBitMap = BitMap->SizeOfBitMap;
  735. BitBuffer = BitMap->Buffer;
  736. do {
  737. PageFrameIndex = *FrameList;
  738. //
  739. // Zero entries are treated as a command to unmap.
  740. //
  741. if (PageFrameIndex == 0) {
  742. FrameList += 1;
  743. continue;
  744. }
  745. //
  746. // Frames past the end of the bitmap are not allowed.
  747. //
  748. BitMapIndex = MI_FRAME_TO_BITMAP_INDEX(PageFrameIndex);
  749. #if defined (_WIN64)
  750. //
  751. // Ensure the frame is a 32-bit number.
  752. //
  753. if (BitMapIndex != PageFrameIndex) {
  754. Status = STATUS_CONFLICTING_ADDRESSES;
  755. goto ErrorReturn0;
  756. }
  757. #endif
  758. if (BitMapIndex >= SizeOfBitMap) {
  759. Status = STATUS_CONFLICTING_ADDRESSES;
  760. goto ErrorReturn0;
  761. }
  762. //
  763. // Frames not in the bitmap are not allowed.
  764. //
  765. if (MI_CHECK_BIT (BitBuffer, BitMapIndex) == 0) {
  766. Status = STATUS_CONFLICTING_ADDRESSES;
  767. goto ErrorReturn0;
  768. }
  769. //
  770. // The frame must not be already mapped anywhere.
  771. // Or be passed in twice in different spots in the array.
  772. //
  773. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  774. ASSERT (MI_PFN_IS_AWE (Pfn1));
  775. OldValue = Pfn1->u2.ShareCount;
  776. if (OldValue != 1) {
  777. Status = STATUS_INVALID_PARAMETER_3;
  778. goto ErrorReturn0;
  779. }
  780. NewValue = OldValue + 2;
  781. //
  782. // Mark the frame as "about to be mapped".
  783. //
  784. #if defined (_WIN64)
  785. OldValue = InterlockedCompareExchange64 ((PLONGLONG)&Pfn1->u2.ShareCount,
  786. (LONGLONG)NewValue,
  787. (LONGLONG)OldValue);
  788. #else
  789. OldValue = InterlockedCompareExchange ((PLONG)&Pfn1->u2.ShareCount,
  790. NewValue,
  791. OldValue);
  792. #endif
  793. if (OldValue != 1) {
  794. Status = STATUS_INVALID_PARAMETER_3;
  795. goto ErrorReturn0;
  796. }
  797. ASSERT (MI_PFN_IS_AWE (Pfn1));
  798. ASSERT (Pfn1->u2.ShareCount == 3);
  799. ASSERT ((PageFrameIndex >= LOWEST_USABLE_PHYSICAL_PAGE) ||
  800. (MiUsingLowPagesForAwe == TRUE));
  801. FrameList += 1;
  802. } while (FrameList < (PPFN_NUMBER) PoolAreaEnd);
  803. //
  804. // This pass actually inserts them all into the page table pages and
  805. // the TBs now that we know the frames are good. Check the PTEs and
  806. // PFNs carefully as a malicious user may issue more than one remap
  807. // request for all or portions of the same region simultaneously.
  808. //
  809. FrameList = (PPFN_NUMBER)PoolArea;
  810. do {
  811. PageFrameIndex = *FrameList;
  812. if (PageFrameIndex != 0) {
  813. NewPteContents = NewPteContents0;
  814. NewPteContents.u.Hard.PageFrameNumber = PageFrameIndex;
  815. }
  816. else {
  817. NewPteContents.u.Long = ZeroPte.u.Long;
  818. }
  819. VirtualAddress = *PoolVirtualArea;
  820. PoolVirtualArea += 1;
  821. PointerPte = MiGetPteAddress (VirtualAddress);
  822. do {
  823. OldPteContents = *PointerPte;
  824. OriginalPteContents.u.Long = InterlockedCompareExchangePte (
  825. PointerPte,
  826. NewPteContents.u.Long,
  827. OldPteContents.u.Long);
  828. } while (OriginalPteContents.u.Long != OldPteContents.u.Long);
  829. //
  830. // The PTE is now pointing at the new frame. Note that another
  831. // thread can immediately access the page contents via this PTE
  832. // even though they're not supposed to until this API returns.
  833. // Thus, the page frames are handled carefully so that malicious
  834. // apps cannot corrupt frames they don't really still or yet own.
  835. //
  836. if (OldPteContents.u.Hard.Valid == 1) {
  837. //
  838. // The old frame was mapped so the TB entry must be flushed.
  839. // Note the app could maliciously dirty data in the old frame
  840. // until the TB flush completes, so don't allow frame reuse
  841. // till then (although allowing remapping within this process
  842. // is ok).
  843. //
  844. Pfn1 = MI_PFN_ELEMENT (OldPteContents.u.Hard.PageFrameNumber);
  845. ASSERT (Pfn1->PteAddress != NULL);
  846. ASSERT (Pfn1->u2.ShareCount == 2);
  847. ASSERT (MI_PFN_IS_AWE (Pfn1));
  848. Pfn1->PteAddress = NULL;
  849. InterlockedExchangeAddSizeT (&Pfn1->u2.ShareCount, -1);
  850. if (PteFlushList.Count != MM_MAXIMUM_FLUSH_COUNT) {
  851. PteFlushList.FlushVa[PteFlushList.Count] = VirtualAddress;
  852. PteFlushList.FlushPte[PteFlushList.Count] = &JunkPte;
  853. PteFlushList.Count += 1;
  854. }
  855. }
  856. if (PageFrameIndex != 0) {
  857. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  858. ASSERT (Pfn1->PteAddress == NULL);
  859. ASSERT (Pfn1->u2.ShareCount == 3);
  860. Pfn1->PteAddress = PointerPte;
  861. InterlockedExchangeAddSizeT (&Pfn1->u2.ShareCount, -1);
  862. }
  863. FrameList += 1;
  864. } while (FrameList < (PPFN_NUMBER) PoolAreaEnd);
  865. }
  866. else {
  867. //
  868. // Set the specified virtual address range to no access.
  869. //
  870. do {
  871. VirtualAddress = *PoolVirtualArea;
  872. PointerPte = MiGetPteAddress (VirtualAddress);
  873. do {
  874. OldPteContents = *PointerPte;
  875. OriginalPteContents.u.Long = InterlockedCompareExchangePte (
  876. PointerPte,
  877. ZeroPte.u.Long,
  878. OldPteContents.u.Long);
  879. } while (OriginalPteContents.u.Long != OldPteContents.u.Long);
  880. //
  881. // The PTE is now zeroed. Note that another thread can still
  882. // Note the app could maliciously dirty data in the old frame
  883. // until the TB flush completes, so don't allow frame reuse
  884. // till then (although allowing remapping within this process
  885. // is ok) to prevent the app from corrupting frames it doesn't
  886. // really still own.
  887. //
  888. if (OldPteContents.u.Hard.Valid == 1) {
  889. //
  890. // The old frame was mapped so the TB entry must be flushed.
  891. //
  892. Pfn1 = MI_PFN_ELEMENT (OldPteContents.u.Hard.PageFrameNumber);
  893. ASSERT (Pfn1->PteAddress != NULL);
  894. ASSERT (Pfn1->u2.ShareCount == 2);
  895. ASSERT (MI_PFN_IS_AWE (Pfn1));
  896. Pfn1->PteAddress = NULL;
  897. InterlockedExchangeAddSizeT (&Pfn1->u2.ShareCount, -1);
  898. if (PteFlushList.Count != MM_MAXIMUM_FLUSH_COUNT) {
  899. PteFlushList.FlushVa[PteFlushList.Count] = VirtualAddress;
  900. PteFlushList.FlushPte[PteFlushList.Count] = &JunkPte;
  901. PteFlushList.Count += 1;
  902. }
  903. }
  904. PoolVirtualArea += 1;
  905. } while (PoolVirtualArea < PoolVirtualAreaEnd);
  906. }
  907. ExReleaseCacheAwarePushLockShared (PushLock);
  908. KeLowerIrql (OldIrql);
  909. //
  910. // Flush the TB entries for any relevant pages. Note this can be done
  911. // without holding the AWE push lock because the PTEs have already been
  912. // filled so any concurrent (bogus) map/unmap call will see the right
  913. // entries. AND any free of the physical pages will also see the right
  914. // entries (although the free must do a TB flush while holding the AWE
  915. // push lock exclusive to ensure no thread gets to continue using a
  916. // stale mapping to the page being freed prior to the flush below).
  917. //
  918. if (PteFlushList.Count != 0) {
  919. MiFlushPteList (&PteFlushList, FALSE, ZeroPte);
  920. }
  921. ErrorReturn:
  922. if (PoolArea != (PVOID)&StackArray[0]) {
  923. ExFreePool (PoolArea);
  924. }
  925. if (PoolVirtualAreaBase != (PVOID)&StackVirtualArray[0]) {
  926. ExFreePool (PoolVirtualAreaBase);
  927. }
  928. return Status;
  929. ErrorReturn0:
  930. while (FrameList > (PPFN_NUMBER)PoolArea) {
  931. FrameList -= 1;
  932. PageFrameIndex = *FrameList;
  933. if (PageFrameIndex != 0) {
  934. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  935. ASSERT (Pfn1->u2.ShareCount == 3);
  936. ASSERT (MI_PFN_IS_AWE (Pfn1));
  937. InterlockedExchangeAddSizeT (&Pfn1->u2.ShareCount, -2);
  938. }
  939. }
  940. ExReleaseCacheAwarePushLockShared (PushLock);
  941. KeLowerIrql (OldIrql);
  942. goto ErrorReturn;
  943. }
  944. PVOID
  945. MiAllocateAweInfo (
  946. VOID
  947. )
  948. /*++
  949. Routine Description:
  950. This function allocates an AWE structure for the current process. Note
  951. this structure is never destroyed while the process is alive in order to
  952. allow various checks to occur lock free.
  953. Arguments:
  954. None.
  955. Return Value:
  956. A non-NULL AweInfo pointer on success, NULL on failure.
  957. Environment:
  958. Kernel mode, PASSIVE_LEVEL, no locks held.
  959. --*/
  960. {
  961. PAWEINFO AweInfo;
  962. PEPROCESS Process;
  963. AweInfo = ExAllocatePoolWithTag (NonPagedPool,
  964. sizeof (AWEINFO),
  965. 'wAmM');
  966. if (AweInfo != NULL) {
  967. AweInfo->VadPhysicalPagesBitMap = NULL;
  968. AweInfo->VadPhysicalPages = 0;
  969. InitializeListHead (&AweInfo->AweVadList);
  970. AweInfo->PushLock = ExAllocateCacheAwarePushLock ();
  971. if (AweInfo->PushLock == NULL) {
  972. ExFreePool (AweInfo);
  973. return NULL;
  974. }
  975. Process = PsGetCurrentProcess();
  976. //
  977. // A memory barrier is needed to ensure the writes initializing the
  978. // AweInfo fields are visible prior to setting the EPROCESS AweInfo
  979. // pointer. This is because the reads from these fields are done
  980. // lock free for improved performance.
  981. //
  982. KeMemoryBarrier ();
  983. if (InterlockedCompareExchangePointer (&Process->AweInfo,
  984. AweInfo,
  985. NULL) != NULL) {
  986. ExFreeCacheAwarePushLock (AweInfo->PushLock);
  987. ExFreePool (AweInfo);
  988. AweInfo = Process->AweInfo;
  989. ASSERT (AweInfo != NULL);
  990. }
  991. }
  992. return (PVOID) AweInfo;
  993. }
  994. NTSTATUS
  995. NtAllocateUserPhysicalPages (
  996. IN HANDLE ProcessHandle,
  997. IN OUT PULONG_PTR NumberOfPages,
  998. OUT PULONG_PTR UserPfnArray
  999. )
  1000. /*++
  1001. Routine Description:
  1002. This function allocates nonpaged physical pages for the specified
  1003. subject process.
  1004. No WSLEs are maintained for this range.
  1005. The caller must check the NumberOfPages returned to determine how many
  1006. pages were actually allocated (this number may be less than the requested
  1007. amount).
  1008. On success, the user array is filled with the allocated physical page
  1009. frame numbers (only up to the returned NumberOfPages is filled in).
  1010. No PTEs are filled here - this gives the application the flexibility
  1011. to order the address space with no metadata structure imposed by the Mm.
  1012. Applications do this via NtMapUserPhysicalPages - ie:
  1013. - Each physical page allocated is set in the process's bitmap.
  1014. This provides remap, free and unmap a way to validate and rundown
  1015. these frames.
  1016. Unmaps may result in a walk of the entire bitmap, but that's ok as
  1017. unmaps should be less frequent. The win is it saves us from
  1018. using up system virtual address space to manage these frames.
  1019. - Note that the same physical frame may NOT be mapped at two different
  1020. virtual addresses in the process. This makes frees and unmaps
  1021. substantially faster as no checks for aliasing need be performed.
  1022. Arguments:
  1023. ProcessHandle - Supplies an open handle to a process object.
  1024. NumberOfPages - Supplies a pointer to a variable that supplies the
  1025. desired size in pages of the allocation. This is filled
  1026. with the actual number of pages allocated.
  1027. UserPfnArray - Supplies a pointer to user memory to store the allocated
  1028. frame numbers into.
  1029. Return Value:
  1030. Various NTSTATUS codes.
  1031. --*/
  1032. {
  1033. PAWEINFO AweInfo;
  1034. ULONG i;
  1035. KAPC_STATE ApcState;
  1036. PEPROCESS Process;
  1037. KPROCESSOR_MODE PreviousMode;
  1038. NTSTATUS Status;
  1039. LOGICAL Attached;
  1040. LOGICAL WsHeld;
  1041. ULONG_PTR CapturedNumberOfPages;
  1042. ULONG_PTR AllocatedPages;
  1043. ULONG_PTR MdlRequestInPages;
  1044. ULONG_PTR TotalAllocatedPages;
  1045. PMDL MemoryDescriptorList;
  1046. PMDL MemoryDescriptorList2;
  1047. PMDL MemoryDescriptorHead;
  1048. PPFN_NUMBER MdlPage;
  1049. PRTL_BITMAP BitMap;
  1050. ULONG BitMapSize;
  1051. ULONG BitMapIndex;
  1052. PMMPFN Pfn1;
  1053. PHYSICAL_ADDRESS LowAddress;
  1054. PHYSICAL_ADDRESS HighAddress;
  1055. PHYSICAL_ADDRESS SkipBytes;
  1056. ULONG SizeOfBitMap;
  1057. PFN_NUMBER HighestPossiblePhysicalPage;
  1058. PETHREAD CurrentThread;
  1059. PEPROCESS CurrentProcess;
  1060. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  1061. Attached = FALSE;
  1062. WsHeld = FALSE;
  1063. //
  1064. // Check the allocation type field.
  1065. //
  1066. CurrentThread = PsGetCurrentThread ();
  1067. CurrentProcess = PsGetCurrentProcessByThread (CurrentThread);
  1068. PreviousMode = KeGetPreviousModeByThread(&CurrentThread->Tcb);
  1069. //
  1070. // Establish an exception handler, probe the specified addresses
  1071. // for write access and capture the initial values.
  1072. //
  1073. try {
  1074. //
  1075. // Capture the number of pages.
  1076. //
  1077. if (PreviousMode != KernelMode) {
  1078. ProbeForWritePointer (NumberOfPages);
  1079. CapturedNumberOfPages = *NumberOfPages;
  1080. if (CapturedNumberOfPages == 0) {
  1081. return STATUS_SUCCESS;
  1082. }
  1083. if (CapturedNumberOfPages > (MAXULONG_PTR / sizeof(ULONG_PTR))) {
  1084. return STATUS_INVALID_PARAMETER_2;
  1085. }
  1086. ProbeForWrite (UserPfnArray,
  1087. CapturedNumberOfPages * sizeof (ULONG_PTR),
  1088. sizeof(PULONG_PTR));
  1089. }
  1090. else {
  1091. CapturedNumberOfPages = *NumberOfPages;
  1092. }
  1093. } except (ExSystemExceptionFilter()) {
  1094. //
  1095. // If an exception occurs during the probe or capture
  1096. // of the initial values, then handle the exception and
  1097. // return the exception code as the status value.
  1098. //
  1099. return GetExceptionCode();
  1100. }
  1101. //
  1102. // Reference the specified process handle for VM_OPERATION access.
  1103. //
  1104. if (ProcessHandle == NtCurrentProcess()) {
  1105. Process = CurrentProcess;
  1106. }
  1107. else {
  1108. Status = ObReferenceObjectByHandle ( ProcessHandle,
  1109. PROCESS_VM_OPERATION,
  1110. PsProcessType,
  1111. PreviousMode,
  1112. (PVOID *)&Process,
  1113. NULL );
  1114. if (!NT_SUCCESS(Status)) {
  1115. return Status;
  1116. }
  1117. }
  1118. if (!SeSinglePrivilegeCheck (SeLockMemoryPrivilege, PreviousMode)) {
  1119. if (ProcessHandle != NtCurrentProcess()) {
  1120. ObDereferenceObject (Process);
  1121. }
  1122. return STATUS_PRIVILEGE_NOT_HELD;
  1123. }
  1124. //
  1125. // If the specified process is not the current process, attach
  1126. // to the specified process.
  1127. //
  1128. if (CurrentProcess != Process) {
  1129. KeStackAttachProcess (&Process->Pcb, &ApcState);
  1130. Attached = TRUE;
  1131. }
  1132. BitMapSize = 0;
  1133. //
  1134. // Get the working set mutex to synchronize. This also blocks APCs so
  1135. // an APC which takes a page fault does not corrupt various structures.
  1136. //
  1137. WsHeld = TRUE;
  1138. LOCK_WS (Process);
  1139. //
  1140. // Make sure the address space was not deleted, If so, return an error.
  1141. //
  1142. if (Process->Flags & PS_PROCESS_FLAGS_VM_DELETED) {
  1143. Status = STATUS_PROCESS_IS_TERMINATING;
  1144. goto ErrorReturn;
  1145. }
  1146. AweInfo = Process->AweInfo;
  1147. if (AweInfo == NULL) {
  1148. AweInfo = (PAWEINFO) MiAllocateAweInfo ();
  1149. if (AweInfo == NULL) {
  1150. Status = STATUS_INSUFFICIENT_RESOURCES;
  1151. goto ErrorReturn;
  1152. }
  1153. ASSERT (AweInfo == Process->AweInfo);
  1154. }
  1155. //
  1156. // Create the physical pages bitmap if it does not already exist.
  1157. // LockMemory privilege is required.
  1158. //
  1159. BitMap = AweInfo->VadPhysicalPagesBitMap;
  1160. if (BitMap == NULL) {
  1161. HighestPossiblePhysicalPage = MmHighestPossiblePhysicalPage;
  1162. #if defined (_WIN64)
  1163. //
  1164. // Force a 32-bit maximum on any page allocation because the bitmap
  1165. // package is currently 32-bit.
  1166. //
  1167. if (HighestPossiblePhysicalPage + 1 >= _4gb) {
  1168. HighestPossiblePhysicalPage = _4gb - 2;
  1169. }
  1170. #endif
  1171. BitMapSize = sizeof(RTL_BITMAP) + (ULONG)((((HighestPossiblePhysicalPage + 1) + 31) / 32) * 4);
  1172. BitMap = ExAllocatePoolWithTag (NonPagedPool, BitMapSize, 'LdaV');
  1173. if (BitMap == NULL) {
  1174. Status = STATUS_INSUFFICIENT_RESOURCES;
  1175. goto ErrorReturn;
  1176. }
  1177. RtlInitializeBitMap (BitMap,
  1178. (PULONG)(BitMap + 1),
  1179. (ULONG)(HighestPossiblePhysicalPage + 1));
  1180. RtlClearAllBits (BitMap);
  1181. //
  1182. // Charge quota for the nonpaged pool for the bitmap. This is
  1183. // done here rather than by using ExAllocatePoolWithQuota
  1184. // so the process object is not referenced by the quota charge.
  1185. //
  1186. Status = PsChargeProcessNonPagedPoolQuota (Process, BitMapSize);
  1187. if (!NT_SUCCESS(Status)) {
  1188. ExFreePool (BitMap);
  1189. goto ErrorReturn;
  1190. }
  1191. SizeOfBitMap = BitMap->SizeOfBitMap;
  1192. }
  1193. else {
  1194. //
  1195. // It's ok to snap this without a lock.
  1196. //
  1197. SizeOfBitMap = AweInfo->VadPhysicalPagesBitMap->SizeOfBitMap;
  1198. }
  1199. AllocatedPages = 0;
  1200. TotalAllocatedPages = 0;
  1201. MemoryDescriptorHead = NULL;
  1202. SkipBytes.QuadPart = 0;
  1203. //
  1204. // Don't use the low 16mb of memory so that at least some low pages are left
  1205. // for 32/24-bit device drivers. Just under 4gb is the maximum allocation
  1206. // per MDL so the ByteCount field does not overflow.
  1207. //
  1208. HighAddress.QuadPart = ((ULONGLONG)(SizeOfBitMap - 1)) << PAGE_SHIFT;
  1209. LowAddress.QuadPart = LOWEST_USABLE_PHYSICAL_ADDRESS;
  1210. if (LowAddress.QuadPart >= HighAddress.QuadPart) {
  1211. //
  1212. // If there's less than 16mb of RAM, just take pages from anywhere.
  1213. //
  1214. #if DBG
  1215. MiUsingLowPagesForAwe = TRUE;
  1216. #endif
  1217. LowAddress.QuadPart = 0;
  1218. }
  1219. do {
  1220. MdlRequestInPages = CapturedNumberOfPages - TotalAllocatedPages;
  1221. if (MdlRequestInPages > (ULONG_PTR)((MAXULONG - PAGE_SIZE) >> PAGE_SHIFT)) {
  1222. MdlRequestInPages = (ULONG_PTR)((MAXULONG - PAGE_SIZE) >> PAGE_SHIFT);
  1223. }
  1224. //
  1225. // Note this allocation returns zeroed pages.
  1226. //
  1227. MemoryDescriptorList = MmAllocatePagesForMdl (LowAddress,
  1228. HighAddress,
  1229. SkipBytes,
  1230. MdlRequestInPages << PAGE_SHIFT);
  1231. if (MemoryDescriptorList == NULL) {
  1232. //
  1233. // No (more) pages available. If this becomes a common situation,
  1234. // all the working sets could be flushed here.
  1235. //
  1236. if (TotalAllocatedPages == 0) {
  1237. if (BitMapSize) {
  1238. ExFreePool (BitMap);
  1239. PsReturnProcessNonPagedPoolQuota (Process, BitMapSize);
  1240. }
  1241. Status = STATUS_INSUFFICIENT_RESOURCES;
  1242. goto ErrorReturn;
  1243. }
  1244. //
  1245. // Make do with what we've gotten so far.
  1246. //
  1247. break;
  1248. }
  1249. MemoryDescriptorList->Next = MemoryDescriptorHead;
  1250. MemoryDescriptorHead = MemoryDescriptorList;
  1251. MdlPage = (PPFN_NUMBER)(MemoryDescriptorList + 1);
  1252. AllocatedPages = MemoryDescriptorList->ByteCount >> PAGE_SHIFT;
  1253. TotalAllocatedPages += AllocatedPages;
  1254. InterlockedExchangeAddSizeT (&MmVadPhysicalPages, AllocatedPages);
  1255. //
  1256. // The per-process WS lock guards updates to Process->VadPhysicalPages.
  1257. //
  1258. AweInfo->VadPhysicalPages += AllocatedPages;
  1259. //
  1260. // Update the allocation bitmap for each allocated frame.
  1261. // Note the PFN lock is not needed to modify the PteAddress below.
  1262. // In fact, even the AWE push lock is not needed as these pages
  1263. // are brand new.
  1264. //
  1265. for (i = 0; i < AllocatedPages; i += 1) {
  1266. ASSERT ((*MdlPage >= LOWEST_USABLE_PHYSICAL_PAGE) ||
  1267. (MiUsingLowPagesForAwe == TRUE));
  1268. BitMapIndex = MI_FRAME_TO_BITMAP_INDEX(*MdlPage);
  1269. ASSERT (BitMapIndex < BitMap->SizeOfBitMap);
  1270. ASSERT (MI_CHECK_BIT (BitMap->Buffer, BitMapIndex) == 0);
  1271. ASSERT64 (*MdlPage < _4gb);
  1272. Pfn1 = MI_PFN_ELEMENT (*MdlPage);
  1273. ASSERT (MI_PFN_IS_AWE (Pfn1));
  1274. Pfn1->PteAddress = NULL;
  1275. ASSERT (Pfn1->u2.ShareCount == 1);
  1276. //
  1277. // Once this bit is set (and the mutex released below), a rogue
  1278. // thread that is passing random frame numbers to
  1279. // NtFreeUserPhysicalPages can free this frame. This means NO
  1280. // references can be made to it by this routine after this point
  1281. // without first re-checking the bitmap.
  1282. //
  1283. MI_SET_BIT (BitMap->Buffer, BitMapIndex);
  1284. MdlPage += 1;
  1285. }
  1286. ASSERT (TotalAllocatedPages <= CapturedNumberOfPages);
  1287. if (TotalAllocatedPages == CapturedNumberOfPages) {
  1288. break;
  1289. }
  1290. //
  1291. // Try the same memory range again - there might be more pages
  1292. // left in it that can be claimed as a truncated MDL had to be
  1293. // used for the last request.
  1294. //
  1295. } while (TRUE);
  1296. ASSERT (TotalAllocatedPages != 0);
  1297. if (BitMapSize != 0) {
  1298. //
  1299. // If this API resulted in the creation of the bitmap, then set it
  1300. // in the process structure now. No need for locking around this.
  1301. //
  1302. AweInfo->VadPhysicalPagesBitMap = BitMap;
  1303. }
  1304. UNLOCK_WS (Process);
  1305. WsHeld = FALSE;
  1306. if (Attached == TRUE) {
  1307. KeUnstackDetachProcess (&ApcState);
  1308. Attached = FALSE;
  1309. }
  1310. //
  1311. // Establish an exception handler and carefully write out the
  1312. // number of pages and the frame numbers.
  1313. //
  1314. Status = STATUS_SUCCESS;
  1315. try {
  1316. ASSERT (TotalAllocatedPages <= CapturedNumberOfPages);
  1317. *NumberOfPages = TotalAllocatedPages;
  1318. MemoryDescriptorList = MemoryDescriptorHead;
  1319. while (MemoryDescriptorList != NULL) {
  1320. MdlPage = (PPFN_NUMBER)(MemoryDescriptorList + 1);
  1321. AllocatedPages = MemoryDescriptorList->ByteCount >> PAGE_SHIFT;
  1322. for (i = 0; i < AllocatedPages; i += 1) {
  1323. *UserPfnArray = *(PULONG_PTR)MdlPage;
  1324. #if 0
  1325. //
  1326. // The bitmap entry for this page was set above, so a rogue
  1327. // thread that is passing random frame numbers to
  1328. // NtFreeUserPhysicalPages may have already freed this frame.
  1329. // This means the ASSERT below cannot be made without first
  1330. // re-checking the bitmap to see if the page is still in it.
  1331. // It's not worth reacquiring the mutex just for this, so turn
  1332. // the assert off for now.
  1333. //
  1334. ASSERT (MI_PFN_ELEMENT(*MdlPage)->u2.ShareCount == 1);
  1335. #endif
  1336. UserPfnArray += 1;
  1337. MdlPage += 1;
  1338. }
  1339. MemoryDescriptorList = MemoryDescriptorList->Next;
  1340. }
  1341. } except (ExSystemExceptionFilter()) {
  1342. //
  1343. // If anything went wrong communicating the pages back to the user
  1344. // then the user has really hurt himself because these addresses
  1345. // passed the probe tests at the beginning of the service. Rather
  1346. // than carrying around extensive recovery code, just return back
  1347. // success as this scenario is the same as if the user scribbled
  1348. // over the output parameters after the service returned anyway.
  1349. // You can't stop someone who's determined to lose their values !
  1350. //
  1351. // Fall through...
  1352. //
  1353. }
  1354. //
  1355. // Free the space consumed by the MDLs now that the page frame numbers
  1356. // have been saved in the bitmap and copied to the user.
  1357. //
  1358. MemoryDescriptorList = MemoryDescriptorHead;
  1359. while (MemoryDescriptorList != NULL) {
  1360. MemoryDescriptorList2 = MemoryDescriptorList->Next;
  1361. ExFreePool (MemoryDescriptorList);
  1362. MemoryDescriptorList = MemoryDescriptorList2;
  1363. }
  1364. ErrorReturn:
  1365. if (WsHeld == TRUE) {
  1366. UNLOCK_WS (Process);
  1367. }
  1368. if (Attached == TRUE) {
  1369. KeUnstackDetachProcess (&ApcState);
  1370. }
  1371. if (ProcessHandle != NtCurrentProcess()) {
  1372. ObDereferenceObject (Process);
  1373. }
  1374. return Status;
  1375. }
  1376. NTSTATUS
  1377. NtFreeUserPhysicalPages (
  1378. IN HANDLE ProcessHandle,
  1379. IN OUT PULONG_PTR NumberOfPages,
  1380. IN PULONG_PTR UserPfnArray
  1381. )
  1382. /*++
  1383. Routine Description:
  1384. This function frees the nonpaged physical pages for the specified
  1385. subject process. Any PTEs referencing these pages are also invalidated.
  1386. Note there is no need to walk the entire VAD tree to clear the PTEs that
  1387. match each page as each physical page can only be mapped at a single
  1388. virtual address (alias addresses within the VAD are not allowed).
  1389. Arguments:
  1390. ProcessHandle - Supplies an open handle to a process object.
  1391. NumberOfPages - Supplies the size in pages of the allocation to delete.
  1392. Returns the actual number of pages deleted.
  1393. UserPfnArray - Supplies a pointer to memory to retrieve the page frame
  1394. numbers from.
  1395. Return Value:
  1396. Various NTSTATUS codes.
  1397. --*/
  1398. {
  1399. PAWEINFO AweInfo;
  1400. PULONG BitBuffer;
  1401. KAPC_STATE ApcState;
  1402. ULONG_PTR CapturedNumberOfPages;
  1403. PMDL MemoryDescriptorList;
  1404. PPFN_NUMBER MdlPage;
  1405. PPFN_NUMBER LastMdlPage;
  1406. PFN_NUMBER PagesInMdl;
  1407. PFN_NUMBER PageFrameIndex;
  1408. PRTL_BITMAP BitMap;
  1409. ULONG BitMapIndex;
  1410. ULONG_PTR PagesProcessed;
  1411. PFN_NUMBER MdlHack[(sizeof(MDL) / sizeof(PFN_NUMBER)) + COPY_STACK_SIZE];
  1412. ULONG_PTR MdlPages;
  1413. ULONG_PTR NumberOfBytes;
  1414. PEPROCESS Process;
  1415. KPROCESSOR_MODE PreviousMode;
  1416. NTSTATUS Status;
  1417. LOGICAL Attached;
  1418. PMMPFN Pfn1;
  1419. LOGICAL WsHeld;
  1420. LOGICAL OnePassComplete;
  1421. LOGICAL ProcessReferenced;
  1422. MMPTE_FLUSH_LIST PteFlushList;
  1423. PMMPTE PointerPte;
  1424. MMPTE OldPteContents;
  1425. MMPTE JunkPte;
  1426. PETHREAD CurrentThread;
  1427. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  1428. //
  1429. // Establish an exception handler, probe the specified addresses
  1430. // for read access and capture the page frame numbers.
  1431. //
  1432. CurrentThread = PsGetCurrentThread ();
  1433. PreviousMode = KeGetPreviousModeByThread (&CurrentThread->Tcb);
  1434. if (PreviousMode != KernelMode) {
  1435. try {
  1436. ProbeForWritePointer (NumberOfPages);
  1437. CapturedNumberOfPages = *NumberOfPages;
  1438. //
  1439. // Initialize the NumberOfPages freed to zero so the user can be
  1440. // reasonably informed about errors that occur midway through
  1441. // the transaction.
  1442. //
  1443. *NumberOfPages = 0;
  1444. } except (ExSystemExceptionFilter()) {
  1445. //
  1446. // If an exception occurs during the probe or capture
  1447. // of the initial values, then handle the exception and
  1448. // return the exception code as the status value.
  1449. //
  1450. return GetExceptionCode();
  1451. }
  1452. }
  1453. else {
  1454. CapturedNumberOfPages = *NumberOfPages;
  1455. }
  1456. if (CapturedNumberOfPages == 0) {
  1457. return STATUS_INVALID_PARAMETER_2;
  1458. }
  1459. OnePassComplete = FALSE;
  1460. PagesProcessed = 0;
  1461. //
  1462. // Initializing MdlPages is not needed for
  1463. // correctness but without it the compiler cannot compile this code
  1464. // W4 to check for use of uninitialized variables.
  1465. //
  1466. MdlPages = 0;
  1467. MemoryDescriptorList = NULL;
  1468. if (CapturedNumberOfPages > COPY_STACK_SIZE) {
  1469. //
  1470. // Ensure the number of pages can fit into an MDL's ByteCount.
  1471. //
  1472. if (CapturedNumberOfPages > ((ULONG)MAXULONG >> PAGE_SHIFT)) {
  1473. MdlPages = (ULONG_PTR)((ULONG)MAXULONG >> PAGE_SHIFT);
  1474. }
  1475. else {
  1476. MdlPages = CapturedNumberOfPages;
  1477. }
  1478. while (MdlPages > COPY_STACK_SIZE) {
  1479. MemoryDescriptorList = MmCreateMdl (NULL,
  1480. 0,
  1481. MdlPages << PAGE_SHIFT);
  1482. if (MemoryDescriptorList != NULL) {
  1483. break;
  1484. }
  1485. MdlPages >>= 1;
  1486. }
  1487. }
  1488. if (MemoryDescriptorList == NULL) {
  1489. MdlPages = COPY_STACK_SIZE;
  1490. MemoryDescriptorList = (PMDL)&MdlHack[0];
  1491. }
  1492. WsHeld = FALSE;
  1493. ProcessReferenced = FALSE;
  1494. Process = PsGetCurrentProcessByThread (CurrentThread);
  1495. repeat:
  1496. if (CapturedNumberOfPages < MdlPages) {
  1497. MdlPages = CapturedNumberOfPages;
  1498. }
  1499. MmInitializeMdl (MemoryDescriptorList, 0, MdlPages << PAGE_SHIFT);
  1500. MdlPage = (PPFN_NUMBER)(MemoryDescriptorList + 1);
  1501. NumberOfBytes = MdlPages * sizeof(ULONG_PTR);
  1502. Attached = FALSE;
  1503. //
  1504. // Establish an exception handler, probe the specified addresses
  1505. // for read access and capture the page frame numbers.
  1506. //
  1507. if (PreviousMode != KernelMode) {
  1508. try {
  1509. //
  1510. // Update the user's count so if anything goes wrong, the user can
  1511. // be reasonably informed about how far into the transaction it
  1512. // occurred.
  1513. //
  1514. *NumberOfPages = PagesProcessed;
  1515. ProbeForRead (UserPfnArray,
  1516. NumberOfBytes,
  1517. sizeof(PULONG_PTR));
  1518. RtlCopyMemory ((PVOID)MdlPage,
  1519. UserPfnArray,
  1520. NumberOfBytes);
  1521. } except (ExSystemExceptionFilter()) {
  1522. //
  1523. // If an exception occurs during the probe or capture
  1524. // of the initial values, then handle the exception and
  1525. // return the exception code as the status value.
  1526. //
  1527. Status = GetExceptionCode();
  1528. goto ErrorReturn;
  1529. }
  1530. }
  1531. else {
  1532. RtlCopyMemory ((PVOID)MdlPage,
  1533. UserPfnArray,
  1534. NumberOfBytes);
  1535. }
  1536. if (OnePassComplete == FALSE) {
  1537. //
  1538. // Reference the specified process handle for VM_OPERATION access.
  1539. //
  1540. if (ProcessHandle == NtCurrentProcess()) {
  1541. Process = PsGetCurrentProcessByThread(CurrentThread);
  1542. }
  1543. else {
  1544. Status = ObReferenceObjectByHandle ( ProcessHandle,
  1545. PROCESS_VM_OPERATION,
  1546. PsProcessType,
  1547. PreviousMode,
  1548. (PVOID *)&Process,
  1549. NULL );
  1550. if (!NT_SUCCESS(Status)) {
  1551. goto ErrorReturn;
  1552. }
  1553. ProcessReferenced = TRUE;
  1554. }
  1555. }
  1556. //
  1557. // If the specified process is not the current process, attach
  1558. // to the specified process.
  1559. //
  1560. if (PsGetCurrentProcessByThread(CurrentThread) != Process) {
  1561. KeStackAttachProcess (&Process->Pcb, &ApcState);
  1562. Attached = TRUE;
  1563. }
  1564. //
  1565. // A memory barrier is needed to read the EPROCESS AweInfo field
  1566. // in order to ensure the writes to the AweInfo structure fields are
  1567. // visible in correct order. This avoids the need to acquire any
  1568. // stronger synchronization (ie: spinlock/pushlock, etc) in the interest
  1569. // of best performance.
  1570. //
  1571. KeMemoryBarrier ();
  1572. AweInfo = (PAWEINFO) Process->AweInfo;
  1573. //
  1574. // The physical pages bitmap must exist.
  1575. //
  1576. if ((AweInfo == NULL) || (AweInfo->VadPhysicalPagesBitMap == NULL)) {
  1577. Status = STATUS_INVALID_PARAMETER_1;
  1578. goto ErrorReturn;
  1579. }
  1580. PteFlushList.Count = 0;
  1581. Status = STATUS_SUCCESS;
  1582. //
  1583. // Get the address creation mutex to block multiple threads from
  1584. // creating or deleting address space at the same time and
  1585. // get the working set mutex so virtual address descriptors can
  1586. // be inserted and walked. Block APCs so an APC which takes a page
  1587. // fault does not corrupt various structures.
  1588. //
  1589. WsHeld = TRUE;
  1590. LOCK_WS (Process);
  1591. //
  1592. // Make sure the address space was not deleted, if so, return an error.
  1593. //
  1594. if (Process->Flags & PS_PROCESS_FLAGS_VM_DELETED) {
  1595. Status = STATUS_PROCESS_IS_TERMINATING;
  1596. goto ErrorReturn;
  1597. }
  1598. BitMap = AweInfo->VadPhysicalPagesBitMap;
  1599. ASSERT (BitMap != NULL);
  1600. BitBuffer = BitMap->Buffer;
  1601. LastMdlPage = MdlPage + MdlPages;
  1602. //
  1603. // Flush the entire TB for this process while holding its AWE push lock
  1604. // exclusive so that if this free is occurring prior to any pending
  1605. // flushes at the end of an in-progress map/unmap, the app is not left
  1606. // with a stale TB entry that would allow him to corrupt pages that no
  1607. // longer belong to him.
  1608. //
  1609. //
  1610. // Block APCs to prevent recursive pushlock scenarios as this is not
  1611. // supported.
  1612. //
  1613. ExAcquireCacheAwarePushLockExclusive (AweInfo->PushLock);
  1614. KeFlushEntireTb (TRUE, FALSE);
  1615. while (MdlPage < LastMdlPage) {
  1616. PageFrameIndex = *MdlPage;
  1617. BitMapIndex = MI_FRAME_TO_BITMAP_INDEX(PageFrameIndex);
  1618. #if defined (_WIN64)
  1619. //
  1620. // Ensure the frame is a 32-bit number.
  1621. //
  1622. if (BitMapIndex != PageFrameIndex) {
  1623. Status = STATUS_CONFLICTING_ADDRESSES;
  1624. break;
  1625. }
  1626. #endif
  1627. //
  1628. // Frames past the end of the bitmap are not allowed.
  1629. //
  1630. if (BitMapIndex >= BitMap->SizeOfBitMap) {
  1631. Status = STATUS_CONFLICTING_ADDRESSES;
  1632. break;
  1633. }
  1634. //
  1635. // Frames not in the bitmap are not allowed.
  1636. //
  1637. if (MI_CHECK_BIT (BitBuffer, BitMapIndex) == 0) {
  1638. Status = STATUS_CONFLICTING_ADDRESSES;
  1639. break;
  1640. }
  1641. ASSERT ((PageFrameIndex >= LOWEST_USABLE_PHYSICAL_PAGE) ||
  1642. (MiUsingLowPagesForAwe == TRUE));
  1643. PagesProcessed += 1;
  1644. ASSERT64 (PageFrameIndex < _4gb);
  1645. MI_CLEAR_BIT (BitBuffer, BitMapIndex);
  1646. Pfn1 = MI_PFN_ELEMENT(PageFrameIndex);
  1647. ASSERT (MI_PFN_IS_AWE (Pfn1));
  1648. #if DBG
  1649. if (Pfn1->u2.ShareCount == 1) {
  1650. ASSERT (Pfn1->PteAddress == NULL);
  1651. }
  1652. else if (Pfn1->u2.ShareCount == 2) {
  1653. ASSERT (Pfn1->PteAddress != NULL);
  1654. }
  1655. else {
  1656. ASSERT (FALSE);
  1657. }
  1658. #endif
  1659. //
  1660. // If the frame is currently mapped in the Vad then the PTE must
  1661. // be cleared and the TB entry flushed.
  1662. //
  1663. if (Pfn1->u2.ShareCount != 1) {
  1664. //
  1665. // Note the exclusive hold of the AWE push lock prevents
  1666. // any other concurrent threads from mapping or unmapping
  1667. // right now. This also eliminates the need to update the PFN
  1668. // sharecount with an interlocked sequence as well.
  1669. //
  1670. Pfn1->u2.ShareCount -= 1;
  1671. PointerPte = Pfn1->PteAddress;
  1672. Pfn1->PteAddress = NULL;
  1673. OldPteContents = *PointerPte;
  1674. ASSERT (OldPteContents.u.Hard.Valid == 1);
  1675. if (PteFlushList.Count != MM_MAXIMUM_FLUSH_COUNT) {
  1676. PteFlushList.FlushVa[PteFlushList.Count] =
  1677. MiGetVirtualAddressMappedByPte (PointerPte);
  1678. PteFlushList.FlushPte[PteFlushList.Count] = &JunkPte;
  1679. PteFlushList.Count += 1;
  1680. }
  1681. MI_WRITE_INVALID_PTE (PointerPte, ZeroPte);
  1682. }
  1683. MI_SET_PFN_DELETED (Pfn1);
  1684. MdlPage += 1;
  1685. }
  1686. //
  1687. // Flush the TB entries for any relevant pages.
  1688. //
  1689. MiFlushPteList (&PteFlushList, FALSE, ZeroPte);
  1690. ExReleaseCacheAwarePushLockExclusive (AweInfo->PushLock);
  1691. //
  1692. // Free the actual pages (this may be a partially filled MDL).
  1693. //
  1694. PagesInMdl = MdlPage - (PPFN_NUMBER)(MemoryDescriptorList + 1);
  1695. //
  1696. // Set the ByteCount to the actual number of validated pages - the caller
  1697. // may have lied and we have to sync up here to account for any bogus
  1698. // frames.
  1699. //
  1700. MemoryDescriptorList->ByteCount = (ULONG)(PagesInMdl << PAGE_SHIFT);
  1701. if (PagesInMdl != 0) {
  1702. AweInfo->VadPhysicalPages -= PagesInMdl;
  1703. InterlockedExchangeAddSizeT (&MmVadPhysicalPages, 0 - PagesInMdl);
  1704. MmFreePagesFromMdl (MemoryDescriptorList);
  1705. }
  1706. CapturedNumberOfPages -= PagesInMdl;
  1707. if ((Status == STATUS_SUCCESS) && (CapturedNumberOfPages != 0)) {
  1708. UNLOCK_WS (Process);
  1709. WsHeld = FALSE;
  1710. if (Attached == TRUE) {
  1711. KeUnstackDetachProcess (&ApcState);
  1712. Attached = FALSE;
  1713. }
  1714. OnePassComplete = TRUE;
  1715. ASSERT (MdlPages == PagesInMdl);
  1716. UserPfnArray += MdlPages;
  1717. //
  1718. // Do it all again until all the pages are freed or an error occurs.
  1719. //
  1720. goto repeat;
  1721. }
  1722. //
  1723. // Fall through.
  1724. //
  1725. ErrorReturn:
  1726. if (WsHeld == TRUE) {
  1727. UNLOCK_WS (Process);
  1728. }
  1729. //
  1730. // Free any pool acquired for holding MDLs.
  1731. //
  1732. if (MemoryDescriptorList != (PMDL)&MdlHack[0]) {
  1733. ExFreePool (MemoryDescriptorList);
  1734. }
  1735. if (Attached == TRUE) {
  1736. KeUnstackDetachProcess (&ApcState);
  1737. }
  1738. //
  1739. // Establish an exception handler and carefully write out the
  1740. // number of pages actually processed.
  1741. //
  1742. try {
  1743. *NumberOfPages = PagesProcessed;
  1744. } except (EXCEPTION_EXECUTE_HANDLER) {
  1745. //
  1746. // Return success at this point even if the results
  1747. // cannot be written.
  1748. //
  1749. NOTHING;
  1750. }
  1751. if (ProcessReferenced == TRUE) {
  1752. ObDereferenceObject (Process);
  1753. }
  1754. return Status;
  1755. }
  1756. VOID
  1757. MiRemoveUserPhysicalPagesVad (
  1758. IN PMMVAD_SHORT Vad
  1759. )
  1760. /*++
  1761. Routine Description:
  1762. This function removes the user-physical-pages mapped region from the
  1763. current process's address space. This mapped region is private memory.
  1764. The physical pages of this Vad are unmapped here, but not freed.
  1765. Pagetable pages are freed and their use/commitment counts/quotas are
  1766. managed by our caller.
  1767. Arguments:
  1768. Vad - Supplies the VAD which manages the address space.
  1769. Return Value:
  1770. None.
  1771. Environment:
  1772. APC level, working set mutex and address creation mutex held.
  1773. --*/
  1774. {
  1775. KIRQL OldIrql;
  1776. PMMPFN Pfn1;
  1777. PEPROCESS Process;
  1778. PFN_NUMBER PageFrameIndex;
  1779. MMPTE_FLUSH_LIST PteFlushList;
  1780. PMMPTE PointerPte;
  1781. MMPTE PteContents;
  1782. MMPTE JunkPte;
  1783. PMMPTE EndingPte;
  1784. PAWEINFO AweInfo;
  1785. #if DBG
  1786. ULONG_PTR ActualPages;
  1787. ULONG_PTR ExpectedPages;
  1788. PLIST_ENTRY NextEntry;
  1789. PMI_PHYSICAL_VIEW PhysicalView;
  1790. #endif
  1791. ASSERT (KeGetCurrentIrql() == APC_LEVEL);
  1792. ASSERT (Vad->u.VadFlags.UserPhysicalPages == 1);
  1793. Process = PsGetCurrentProcess();
  1794. AweInfo = (PAWEINFO) Process->AweInfo;
  1795. ASSERT (AweInfo != NULL);
  1796. //
  1797. // If the physical pages count is zero, nothing needs to be done.
  1798. // On checked systems, verify the list anyway.
  1799. //
  1800. #if DBG
  1801. ActualPages = 0;
  1802. ExpectedPages = AweInfo->VadPhysicalPages;
  1803. #else
  1804. if (AweInfo->VadPhysicalPages == 0) {
  1805. return;
  1806. }
  1807. #endif
  1808. PointerPte = MiGetPteAddress (MI_VPN_TO_VA (Vad->StartingVpn));
  1809. EndingPte = MiGetPteAddress (MI_VPN_TO_VA_ENDING (Vad->EndingVpn));
  1810. PteFlushList.Count = 0;
  1811. //
  1812. // The caller must have removed this Vad from the physical view list,
  1813. // otherwise another thread could immediately remap pages back into this
  1814. // same Vad.
  1815. //
  1816. KeRaiseIrql (APC_LEVEL, &OldIrql);
  1817. ExAcquireCacheAwarePushLockExclusive (AweInfo->PushLock);
  1818. #if DBG
  1819. NextEntry = AweInfo->AweVadList.Flink;
  1820. while (NextEntry != &AweInfo->AweVadList) {
  1821. PhysicalView = CONTAINING_RECORD(NextEntry,
  1822. MI_PHYSICAL_VIEW,
  1823. ListEntry);
  1824. ASSERT (PhysicalView->Vad != (PMMVAD)Vad);
  1825. NextEntry = NextEntry->Flink;
  1826. }
  1827. #endif
  1828. while (PointerPte <= EndingPte) {
  1829. PteContents = *PointerPte;
  1830. if (PteContents.u.Hard.Valid == 0) {
  1831. PointerPte += 1;
  1832. continue;
  1833. }
  1834. //
  1835. // The frame is currently mapped in this Vad so the PTE must
  1836. // be cleared and the TB entry flushed.
  1837. //
  1838. PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE (PointerPte);
  1839. ASSERT ((PageFrameIndex >= LOWEST_USABLE_PHYSICAL_PAGE) ||
  1840. (MiUsingLowPagesForAwe == TRUE));
  1841. ASSERT (ExpectedPages != 0);
  1842. Pfn1 = MI_PFN_ELEMENT(PageFrameIndex);
  1843. ASSERT (MI_PFN_IS_AWE (Pfn1));
  1844. ASSERT (Pfn1->u2.ShareCount == 2);
  1845. ASSERT (Pfn1->PteAddress == PointerPte);
  1846. //
  1847. // Note the AWE/PFN locks are not needed here because we have acquired
  1848. // the pushlock exclusive so no one can be mapping or unmapping
  1849. // right now. In fact, the PFN sharecount doesn't even have to be
  1850. // updated with an interlocked sequence because the pushlock is held
  1851. // exclusive.
  1852. //
  1853. Pfn1->u2.ShareCount -= 1;
  1854. Pfn1->PteAddress = NULL;
  1855. if (PteFlushList.Count != MM_MAXIMUM_FLUSH_COUNT) {
  1856. PteFlushList.FlushVa[PteFlushList.Count] =
  1857. MiGetVirtualAddressMappedByPte (PointerPte);
  1858. PteFlushList.FlushPte[PteFlushList.Count] = &JunkPte;
  1859. PteFlushList.Count += 1;
  1860. }
  1861. MI_WRITE_INVALID_PTE (PointerPte, ZeroPte);
  1862. PointerPte += 1;
  1863. #if DBG
  1864. ActualPages += 1;
  1865. #endif
  1866. ASSERT (ActualPages <= ExpectedPages);
  1867. }
  1868. //
  1869. // Flush the TB entries for any relevant pages.
  1870. //
  1871. MiFlushPteList (&PteFlushList, FALSE, ZeroPte);
  1872. ExReleaseCacheAwarePushLockExclusive (AweInfo->PushLock);
  1873. KeLowerIrql (OldIrql);
  1874. return;
  1875. }
  1876. VOID
  1877. MiCleanPhysicalProcessPages (
  1878. IN PEPROCESS Process
  1879. )
  1880. /*++
  1881. Routine Description:
  1882. This routine frees the VadPhysicalBitMap, any remaining physical pages (as
  1883. they may not have been currently mapped into any Vads) and returns the
  1884. bitmap quota.
  1885. Arguments:
  1886. Process - Supplies the process to clean.
  1887. Return Value:
  1888. None.
  1889. Environment:
  1890. Kernel mode, APC level, working set mutex held. Called only on process
  1891. exit, so the AWE push lock is not needed here.
  1892. --*/
  1893. {
  1894. PMMPFN Pfn1;
  1895. PAWEINFO AweInfo;
  1896. ULONG BitMapSize;
  1897. ULONG BitMapIndex;
  1898. ULONG BitMapHint;
  1899. PRTL_BITMAP BitMap;
  1900. PPFN_NUMBER MdlPage;
  1901. PFN_NUMBER MdlHack[(sizeof(MDL) / sizeof(PFN_NUMBER)) + COPY_STACK_SIZE];
  1902. ULONG_PTR MdlPages;
  1903. ULONG_PTR NumberOfPages;
  1904. ULONG_PTR TotalFreedPages;
  1905. PMDL MemoryDescriptorList;
  1906. PFN_NUMBER PageFrameIndex;
  1907. PFN_NUMBER HighestPossiblePhysicalPage;
  1908. #if DBG
  1909. ULONG_PTR ActualPages = 0;
  1910. ULONG_PTR ExpectedPages = 0;
  1911. #endif
  1912. ASSERT (KeGetCurrentIrql() == APC_LEVEL);
  1913. AweInfo = (PAWEINFO) Process->AweInfo;
  1914. if (AweInfo == NULL) {
  1915. return;
  1916. }
  1917. TotalFreedPages = 0;
  1918. BitMap = AweInfo->VadPhysicalPagesBitMap;
  1919. if (BitMap == NULL) {
  1920. goto Finish;
  1921. }
  1922. #if DBG
  1923. ExpectedPages = AweInfo->VadPhysicalPages;
  1924. #else
  1925. if (AweInfo->VadPhysicalPages == 0) {
  1926. goto Finish;
  1927. }
  1928. #endif
  1929. MdlPages = COPY_STACK_SIZE;
  1930. MemoryDescriptorList = (PMDL)&MdlHack[0];
  1931. MdlPage = (PPFN_NUMBER)(MemoryDescriptorList + 1);
  1932. NumberOfPages = 0;
  1933. BitMapHint = 0;
  1934. while (TRUE) {
  1935. BitMapIndex = RtlFindSetBits (BitMap, 1, BitMapHint);
  1936. if (BitMapIndex < BitMapHint) {
  1937. break;
  1938. }
  1939. if (BitMapIndex == NO_BITS_FOUND) {
  1940. break;
  1941. }
  1942. PageFrameIndex = MI_BITMAP_INDEX_TO_FRAME(BitMapIndex);
  1943. ASSERT64 (PageFrameIndex < _4gb);
  1944. //
  1945. // The bitmap search wraps, so handle it here.
  1946. // Note PFN 0 is illegal.
  1947. //
  1948. ASSERT (PageFrameIndex != 0);
  1949. ASSERT ((PageFrameIndex >= LOWEST_USABLE_PHYSICAL_PAGE) ||
  1950. (MiUsingLowPagesForAwe == TRUE));
  1951. ASSERT (ExpectedPages != 0);
  1952. Pfn1 = MI_PFN_ELEMENT(PageFrameIndex);
  1953. ASSERT (Pfn1->u2.ShareCount == 1);
  1954. ASSERT (Pfn1->PteAddress == NULL);
  1955. ASSERT (MI_PFN_IS_AWE (Pfn1));
  1956. MI_SET_PFN_DELETED(Pfn1);
  1957. *MdlPage = PageFrameIndex;
  1958. MdlPage += 1;
  1959. NumberOfPages += 1;
  1960. #if DBG
  1961. ActualPages += 1;
  1962. #endif
  1963. if (NumberOfPages == COPY_STACK_SIZE) {
  1964. //
  1965. // Free the pages in the full MDL.
  1966. //
  1967. MmInitializeMdl (MemoryDescriptorList,
  1968. 0,
  1969. NumberOfPages << PAGE_SHIFT);
  1970. MmFreePagesFromMdl (MemoryDescriptorList);
  1971. MdlPage = (PPFN_NUMBER)(MemoryDescriptorList + 1);
  1972. AweInfo->VadPhysicalPages -= NumberOfPages;
  1973. TotalFreedPages += NumberOfPages;
  1974. NumberOfPages = 0;
  1975. }
  1976. BitMapHint = BitMapIndex + 1;
  1977. if (BitMapHint >= BitMap->SizeOfBitMap) {
  1978. break;
  1979. }
  1980. }
  1981. //
  1982. // Free any straggling MDL pages here.
  1983. //
  1984. if (NumberOfPages != 0) {
  1985. MmInitializeMdl (MemoryDescriptorList,
  1986. 0,
  1987. NumberOfPages << PAGE_SHIFT);
  1988. MmFreePagesFromMdl (MemoryDescriptorList);
  1989. AweInfo->VadPhysicalPages -= NumberOfPages;
  1990. TotalFreedPages += NumberOfPages;
  1991. }
  1992. Finish:
  1993. ASSERT (ExpectedPages == ActualPages);
  1994. HighestPossiblePhysicalPage = MmHighestPossiblePhysicalPage;
  1995. #if defined (_WIN64)
  1996. //
  1997. // Force a 32-bit maximum on any page allocation because the bitmap
  1998. // package is currently 32-bit.
  1999. //
  2000. if (HighestPossiblePhysicalPage + 1 >= _4gb) {
  2001. HighestPossiblePhysicalPage = _4gb - 2;
  2002. }
  2003. #endif
  2004. ASSERT (AweInfo->VadPhysicalPages == 0);
  2005. if (BitMap != NULL) {
  2006. BitMapSize = sizeof(RTL_BITMAP) + (ULONG)((((HighestPossiblePhysicalPage + 1) + 31) / 32) * 4);
  2007. ExFreePool (BitMap);
  2008. PsReturnProcessNonPagedPoolQuota (Process, BitMapSize);
  2009. }
  2010. ExFreeCacheAwarePushLock (AweInfo->PushLock);
  2011. ExFreePool (AweInfo);
  2012. Process->AweInfo = NULL;
  2013. ASSERT (ExpectedPages == ActualPages);
  2014. if (TotalFreedPages != 0) {
  2015. InterlockedExchangeAddSizeT (&MmVadPhysicalPages, 0 - TotalFreedPages);
  2016. }
  2017. return;
  2018. }
  2019. VOID
  2020. MiAweViewInserter (
  2021. IN PEPROCESS Process,
  2022. IN PMI_PHYSICAL_VIEW PhysicalView
  2023. )
  2024. /*++
  2025. Routine Description:
  2026. This function inserts a new AWE view into the specified process' AWE chain.
  2027. Arguments:
  2028. Process - Supplies the process to add the AWE VAD to.
  2029. PhysicalView - Supplies the physical view data to link in.
  2030. Return Value:
  2031. TRUE if the view was inserted, FALSE if not.
  2032. Environment:
  2033. Kernel mode. APC_LEVEL, working set and address space mutexes held.
  2034. --*/
  2035. {
  2036. PAWEINFO AweInfo;
  2037. AweInfo = (PAWEINFO) Process->AweInfo;
  2038. ASSERT (AweInfo != NULL);
  2039. ExAcquireCacheAwarePushLockExclusive (AweInfo->PushLock);
  2040. InsertTailList (&AweInfo->AweVadList, &PhysicalView->ListEntry);
  2041. ExReleaseCacheAwarePushLockExclusive (AweInfo->PushLock);
  2042. }
  2043. VOID
  2044. MiAweViewRemover (
  2045. IN PEPROCESS Process,
  2046. IN PMMVAD Vad
  2047. )
  2048. /*++
  2049. Routine Description:
  2050. This function removes an AWE Vad from the specified process' AWE chain.
  2051. Arguments:
  2052. Process - Supplies the process to remove the AWE VAD from.
  2053. Vad - Supplies the Vad to remove.
  2054. Return Value:
  2055. None.
  2056. Environment:
  2057. Kernel mode, APC_LEVEL, working set and address space mutexes held.
  2058. --*/
  2059. {
  2060. PAWEINFO AweInfo;
  2061. PLIST_ENTRY NextEntry;
  2062. PMI_PHYSICAL_VIEW AweView;
  2063. AweInfo = (PAWEINFO) Process->AweInfo;
  2064. ASSERT (AweInfo != NULL);
  2065. ExAcquireCacheAwarePushLockExclusive (AweInfo->PushLock);
  2066. NextEntry = AweInfo->AweVadList.Flink;
  2067. while (NextEntry != &AweInfo->AweVadList) {
  2068. AweView = CONTAINING_RECORD (NextEntry,
  2069. MI_PHYSICAL_VIEW,
  2070. ListEntry);
  2071. if (AweView->Vad == Vad) {
  2072. RemoveEntryList (NextEntry);
  2073. ExReleaseCacheAwarePushLockExclusive (AweInfo->PushLock);
  2074. ExFreePool (AweView);
  2075. return;
  2076. }
  2077. NextEntry = NextEntry->Flink;
  2078. }
  2079. ASSERT (FALSE);
  2080. ExReleaseCacheAwarePushLockExclusive (AweInfo->PushLock);
  2081. }