Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

5641 lines
157 KiB

  1. /*++
  2. Copyright (c) 1989 Microsoft Corporation
  3. Module Name:
  4. pagfault.c
  5. Abstract:
  6. This module contains the pager for memory management.
  7. Author:
  8. Lou Perazzoli (loup) 10-Apr-1989
  9. Landy Wang (landyw) 02-June-1997
  10. Revision History:
  11. --*/
  12. #include "mi.h"
  13. #if defined ( _WIN64)
  14. #if DBGXX
  15. VOID
  16. MiCheckPageTableInPage(
  17. IN PMMPFN Pfn,
  18. IN PMMINPAGE_SUPPORT Support
  19. );
  20. #endif
  21. #endif
  22. #define STATUS_PTE_CHANGED 0x87303000
  23. #define STATUS_REFAULT 0xC7303001
  24. ULONG MmInPageSupportMinimum = 4;
  25. ULONG MiInPageSinglePages;
  26. extern PMMPTE MmSharedUserDataPte;
  27. extern PVOID MmSpecialPoolStart;
  28. extern PVOID MmSpecialPoolEnd;
  29. ULONG MiFaultRetries;
  30. ULONG MiUserFaultRetries;
  31. ULONG MmClusterPageFileReads;
  32. #define MI_PROTOTYPE_WSINDEX ((ULONG)-1)
  33. VOID
  34. MiHandleBankedSection (
  35. IN PVOID VirtualAddress,
  36. IN PMMVAD Vad
  37. );
  38. NTSTATUS
  39. MiCompleteProtoPteFault (
  40. IN ULONG_PTR StoreInstruction,
  41. IN PVOID FaultingAddress,
  42. IN PMMPTE PointerPte,
  43. IN PMMPTE PointerProtoPte
  44. );
  45. NTSTATUS
  46. MiDispatchFault (
  47. IN ULONG_PTR FaultStatus,
  48. IN PVOID VirtualAddress,
  49. IN PMMPTE PointerPte,
  50. IN PMMPTE PointerProtoPte,
  51. IN PEPROCESS Process,
  52. OUT PLOGICAL ApcNeeded
  53. )
  54. /*++
  55. Routine Description:
  56. This routine dispatches a page fault to the appropriate
  57. routine to complete the fault.
  58. Arguments:
  59. FaultStatus - Supplies fault status information bits.
  60. VirtualAddress - Supplies the faulting address.
  61. PointerPte - Supplies the PTE for the faulting address.
  62. PointerProtoPte - Supplies a pointer to the prototype PTE to fault in,
  63. NULL if no prototype PTE exists.
  64. Process - Supplies a pointer to the process object. If this
  65. parameter is NULL, then the fault is for system
  66. space and the process's working set lock is not held.
  67. If this parameter is HYDRA_PROCESS, then the fault is for session
  68. space and the process's working set lock is not held - rather
  69. the session space's working set lock is held.
  70. ApcNeeded - Supplies a pointer to a location set to TRUE if an I/O
  71. completion APC is needed to complete partial IRPs that
  72. collided.
  73. It is the caller's responsibility to initialize this (usually
  74. to FALSE) on entry. However, since this routine may be called
  75. multiple times for a single fault (for the page directory,
  76. page table and the page itself), it is possible for it to
  77. occasionally be TRUE on entry.
  78. If it is FALSE on exit, no completion APC is needed.
  79. Return Value:
  80. status.
  81. Environment:
  82. Kernel mode, working set lock held.
  83. --*/
  84. {
  85. MMPTE TempPte;
  86. NTSTATUS status;
  87. PMMINPAGE_SUPPORT ReadBlock;
  88. MMPTE SavedPte;
  89. PMMINPAGE_SUPPORT CapturedEvent;
  90. KIRQL OldIrql;
  91. PPFN_NUMBER Page;
  92. PFN_NUMBER PageFrameIndex;
  93. LONG NumberOfBytes;
  94. PMMPTE CheckPte;
  95. PMMPTE ReadPte;
  96. PMMPFN PfnClusterPage;
  97. PMMPFN Pfn1;
  98. LOGICAL WsLockChanged;
  99. PETHREAD CurrentThread;
  100. PERFINFO_HARDPAGEFAULT_INFORMATION HardFaultEvent;
  101. LARGE_INTEGER IoStartTime;
  102. LARGE_INTEGER IoCompleteTime;
  103. LOGICAL PerfInfoLogHardFault;
  104. PETHREAD Thread;
  105. ULONG_PTR StoreInstruction;
  106. WsLockChanged = FALSE;
  107. StoreInstruction = MI_FAULT_STATUS_INDICATES_WRITE (FaultStatus);
  108. //
  109. // Initializing ReadBlock & ReadPte is not needed for correctness, but
  110. // without it the compiler cannot compile this code W4 to check for use of
  111. // uninitialized variables.
  112. //
  113. ReadPte = NULL;
  114. ReadBlock = NULL;
  115. if (PointerProtoPte != NULL) {
  116. ASSERT (!MI_IS_PHYSICAL_ADDRESS(PointerProtoPte));
  117. CheckPte = MiGetPteAddress (PointerProtoPte);
  118. //
  119. // Acquire the PFN lock to synchronize access to prototype PTEs.
  120. // This is required as the working set lock will not prevent
  121. // multiple processes from operating on the same prototype PTE.
  122. //
  123. LOCK_PFN (OldIrql);
  124. //
  125. // Make sure the prototype PTEs are in memory. For
  126. // user mode faults, this should already be the case.
  127. //
  128. if (CheckPte->u.Hard.Valid == 0) {
  129. ASSERT ((Process == NULL) || (Process == HYDRA_PROCESS));
  130. UNLOCK_PFN (OldIrql);
  131. VirtualAddress = PointerProtoPte;
  132. PointerPte = CheckPte;
  133. PointerProtoPte = NULL;
  134. //
  135. // The page that contains the prototype PTE is not in memory.
  136. //
  137. if (Process == HYDRA_PROCESS) {
  138. //
  139. // We were called while holding this session space's
  140. // working set lock. But we need to fault in a
  141. // prototype PTE which is in system paged pool. This
  142. // must be done under the system working set lock.
  143. //
  144. // So we release the session space WSL lock and get
  145. // the system working set lock. When done
  146. // we return STATUS_MORE_PROCESSING_REQUIRED
  147. // so our caller will call us again to handle the
  148. // actual prototype PTE fault.
  149. //
  150. UNLOCK_SESSION_SPACE_WS (APC_LEVEL);
  151. //
  152. // Clear Process as the system working set is now held.
  153. //
  154. Process = NULL;
  155. WsLockChanged = TRUE;
  156. ASSERT (MI_IS_SESSION_ADDRESS (VirtualAddress) == FALSE);
  157. LOCK_SYSTEM_WS_UNSAFE (PsGetCurrentThread ());
  158. }
  159. goto NonProtoFault;
  160. }
  161. if (PointerPte->u.Hard.Valid == 1) {
  162. //
  163. // PTE was already made valid by the cache manager support
  164. // routines.
  165. //
  166. UNLOCK_PFN (OldIrql);
  167. return STATUS_SUCCESS;
  168. }
  169. ReadPte = PointerProtoPte;
  170. status = MiResolveProtoPteFault (StoreInstruction,
  171. VirtualAddress,
  172. PointerPte,
  173. PointerProtoPte,
  174. &ReadBlock,
  175. Process,
  176. ApcNeeded);
  177. //
  178. // Returns with PFN lock released.
  179. //
  180. ASSERT (KeGetCurrentIrql() == APC_LEVEL);
  181. }
  182. else {
  183. NonProtoFault:
  184. TempPte = *PointerPte;
  185. ASSERT (TempPte.u.Long != 0);
  186. if (TempPte.u.Soft.Transition != 0) {
  187. //
  188. // This is a transition page.
  189. //
  190. CapturedEvent = NULL;
  191. status = MiResolveTransitionFault (VirtualAddress,
  192. PointerPte,
  193. Process,
  194. FALSE,
  195. ApcNeeded,
  196. &CapturedEvent);
  197. if (CapturedEvent != NULL) {
  198. MiFreeInPageSupportBlock (CapturedEvent);
  199. }
  200. }
  201. else if (TempPte.u.Soft.PageFileHigh == 0) {
  202. //
  203. // Demand zero fault.
  204. //
  205. status = MiResolveDemandZeroFault (VirtualAddress,
  206. PointerPte,
  207. Process,
  208. FALSE);
  209. }
  210. else {
  211. //
  212. // Page resides in paging file.
  213. //
  214. ReadPte = PointerPte;
  215. LOCK_PFN (OldIrql);
  216. status = MiResolvePageFileFault (VirtualAddress,
  217. PointerPte,
  218. &ReadBlock,
  219. Process);
  220. }
  221. }
  222. ASSERT (KeGetCurrentIrql() == APC_LEVEL);
  223. if (NT_SUCCESS(status)) {
  224. if (WsLockChanged == TRUE) {
  225. UNLOCK_SYSTEM_WS (APC_LEVEL);
  226. LOCK_SESSION_SPACE_WS (OldIrql, PsGetCurrentThread ());
  227. }
  228. return status;
  229. }
  230. if (status == STATUS_ISSUE_PAGING_IO) {
  231. ASSERT (ReadPte != NULL);
  232. ASSERT (ReadBlock != NULL);
  233. SavedPte = *ReadPte;
  234. CapturedEvent = (PMMINPAGE_SUPPORT)ReadBlock->Pfn->u1.Event;
  235. CurrentThread = NULL;
  236. if (Process == HYDRA_PROCESS) {
  237. UNLOCK_SESSION_SPACE_WS(APC_LEVEL);
  238. }
  239. else if (Process != NULL) {
  240. //
  241. // APCs must be explicitly disabled to prevent suspend APCs from
  242. // interrupting this thread before the I/O has been issued.
  243. // Otherwise a shared page I/O can stop any other thread that
  244. // references it indefinitely until the suspend is released.
  245. //
  246. CurrentThread = PsGetCurrentThread();
  247. ASSERT (CurrentThread->NestedFaultCount <= 2);
  248. CurrentThread->NestedFaultCount += 1;
  249. KeEnterCriticalRegionThread (&CurrentThread->Tcb);
  250. UNLOCK_WS (Process);
  251. }
  252. else {
  253. UNLOCK_SYSTEM_WS(APC_LEVEL);
  254. }
  255. #if DBG
  256. if (MmDebug & MM_DBG_PAGEFAULT) {
  257. DbgPrint ("MMFAULT: va: %p size: %lx process: %s file: %Z\n",
  258. VirtualAddress,
  259. ReadBlock->Mdl.ByteCount,
  260. Process == HYDRA_PROCESS ? (PUCHAR)"Session Space" : (Process ? Process->ImageFileName : (PUCHAR)"SystemVa"),
  261. &ReadBlock->FilePointer->FileName
  262. );
  263. }
  264. #endif //DBG
  265. if (PERFINFO_IS_GROUP_ON(PERF_FILE_IO)) {
  266. PerfInfoLogHardFault = TRUE;
  267. PerfTimeStamp (IoStartTime);
  268. }
  269. else {
  270. PerfInfoLogHardFault = FALSE;
  271. //
  272. // Initializing these is not needed for correctness, but
  273. // without it the compiler cannot compile this code W4 to check
  274. // for use of uninitialized variables.
  275. //
  276. IoStartTime.QuadPart = 0;
  277. }
  278. IoCompleteTime.QuadPart = 0;
  279. //
  280. // Assert no reads issued here are marked as prefetched.
  281. //
  282. ASSERT (ReadBlock->u1.e1.PrefetchMdlHighBits == 0);
  283. //
  284. // Issue the read request.
  285. //
  286. status = IoPageRead (ReadBlock->FilePointer,
  287. &ReadBlock->Mdl,
  288. &ReadBlock->ReadOffset,
  289. &ReadBlock->Event,
  290. &ReadBlock->IoStatus);
  291. if (!NT_SUCCESS(status)) {
  292. //
  293. // Set the event as the I/O system doesn't set it on errors.
  294. //
  295. ReadBlock->IoStatus.Status = status;
  296. ReadBlock->IoStatus.Information = 0;
  297. KeSetEvent (&ReadBlock->Event, 0, FALSE);
  298. }
  299. //
  300. // Initializing PageFrameIndex is not needed for correctness, but
  301. // without it the compiler cannot compile this code W4 to check
  302. // for use of uninitialized variables.
  303. //
  304. PageFrameIndex = (PFN_NUMBER)-1;
  305. //
  306. // Wait for the I/O operation.
  307. //
  308. status = MiWaitForInPageComplete (ReadBlock->Pfn,
  309. ReadPte,
  310. VirtualAddress,
  311. &SavedPte,
  312. CapturedEvent,
  313. Process);
  314. if (CurrentThread != NULL) {
  315. KeLeaveCriticalRegionThread ((PKTHREAD)CurrentThread);
  316. ASSERT (CurrentThread->NestedFaultCount <= 3);
  317. ASSERT (CurrentThread->NestedFaultCount != 0);
  318. CurrentThread->NestedFaultCount -= 1;
  319. if ((CurrentThread->ApcNeeded == 1) &&
  320. (CurrentThread->NestedFaultCount == 0)) {
  321. *ApcNeeded = TRUE;
  322. CurrentThread->ApcNeeded = 0;
  323. }
  324. }
  325. if (PerfInfoLogHardFault) {
  326. PerfTimeStamp (IoCompleteTime);
  327. }
  328. //
  329. // MiWaitForInPageComplete RETURNS WITH THE WORKING SET LOCK
  330. // AND PFN LOCK HELD!!!
  331. //
  332. //
  333. // This is the thread which owns the event, clear the event field
  334. // in the PFN database.
  335. //
  336. Pfn1 = ReadBlock->Pfn;
  337. Page = &ReadBlock->Page[0];
  338. NumberOfBytes = (LONG)ReadBlock->Mdl.ByteCount;
  339. CheckPte = ReadBlock->BasePte;
  340. while (NumberOfBytes > 0) {
  341. //
  342. // Don't remove the page we just brought in to
  343. // satisfy this page fault.
  344. //
  345. if (CheckPte != ReadPte) {
  346. PfnClusterPage = MI_PFN_ELEMENT (*Page);
  347. MI_SNAP_DATA (PfnClusterPage, PfnClusterPage->PteAddress, 0xB);
  348. ASSERT (PfnClusterPage->u4.PteFrame == Pfn1->u4.PteFrame);
  349. #if DBG
  350. if (PfnClusterPage->u4.InPageError) {
  351. ASSERT (status != STATUS_SUCCESS);
  352. }
  353. #endif
  354. if (PfnClusterPage->u3.e1.ReadInProgress != 0) {
  355. ASSERT (PfnClusterPage->u4.PteFrame != MI_MAGIC_AWE_PTEFRAME);
  356. PfnClusterPage->u3.e1.ReadInProgress = 0;
  357. if (PfnClusterPage->u4.InPageError == 0) {
  358. PfnClusterPage->u1.Event = NULL;
  359. }
  360. }
  361. MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(PfnClusterPage, 9);
  362. }
  363. else {
  364. PageFrameIndex = *Page;
  365. MI_SNAP_DATA (MI_PFN_ELEMENT (PageFrameIndex),
  366. MI_PFN_ELEMENT (PageFrameIndex)->PteAddress,
  367. 0xC);
  368. }
  369. CheckPte += 1;
  370. Page += 1;
  371. NumberOfBytes -= PAGE_SIZE;
  372. }
  373. if (status != STATUS_SUCCESS) {
  374. MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(MI_PFN_ELEMENT(PageFrameIndex), 9);
  375. if (status == STATUS_PTE_CHANGED) {
  376. //
  377. // State of PTE changed during I/O operation, just
  378. // return success and refault.
  379. //
  380. UNLOCK_PFN (APC_LEVEL);
  381. if (WsLockChanged == TRUE) {
  382. UNLOCK_SYSTEM_WS (APC_LEVEL);
  383. LOCK_SESSION_SPACE_WS (OldIrql, PsGetCurrentThread ());
  384. }
  385. MiFreeInPageSupportBlock (CapturedEvent);
  386. return STATUS_SUCCESS;
  387. }
  388. //
  389. // An I/O error occurred during the page read
  390. // operation. All the pages which were just
  391. // put into transition should be put onto the
  392. // free list if InPageError is set, and their
  393. // PTEs restored to the proper contents.
  394. //
  395. Page = &ReadBlock->Page[0];
  396. NumberOfBytes = ReadBlock->Mdl.ByteCount;
  397. while (NumberOfBytes > 0) {
  398. PfnClusterPage = MI_PFN_ELEMENT (*Page);
  399. if (PfnClusterPage->u4.InPageError == 1) {
  400. if (PfnClusterPage->u3.e2.ReferenceCount == 0) {
  401. PfnClusterPage->u4.InPageError = 0;
  402. //
  403. // Only restore the transition PTE if the address
  404. // space still exists. Another thread may have
  405. // deleted the VAD while this thread waited for the
  406. // fault to complete - in this case, the frame
  407. // will be marked as free already.
  408. //
  409. if (PfnClusterPage->u3.e1.PageLocation != FreePageList) {
  410. ASSERT (PfnClusterPage->u3.e1.PageLocation ==
  411. StandbyPageList);
  412. MiUnlinkPageFromList (PfnClusterPage);
  413. MiRestoreTransitionPte (*Page);
  414. MiInsertPageInFreeList (*Page);
  415. }
  416. }
  417. }
  418. Page += 1;
  419. NumberOfBytes -= PAGE_SIZE;
  420. }
  421. UNLOCK_PFN (APC_LEVEL);
  422. if (WsLockChanged == TRUE) {
  423. UNLOCK_SYSTEM_WS (APC_LEVEL);
  424. LOCK_SESSION_SPACE_WS (OldIrql, PsGetCurrentThread ());
  425. }
  426. MiFreeInPageSupportBlock (CapturedEvent);
  427. if (status == STATUS_REFAULT) {
  428. //
  429. // The I/O operation to bring in a system page failed
  430. // due to insufficent resources. Delay a bit, then
  431. // return success and refault.
  432. //
  433. KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmShortTime);
  434. return STATUS_SUCCESS;
  435. }
  436. return status;
  437. }
  438. //
  439. // PTE is still in transition state, same protection, etc.
  440. //
  441. ASSERT (Pfn1->u4.InPageError == 0);
  442. if (Pfn1->u2.ShareCount == 0) {
  443. MI_REMOVE_LOCKED_PAGE_CHARGE (Pfn1, 9);
  444. }
  445. Pfn1->u2.ShareCount += 1;
  446. Pfn1->u3.e1.PageLocation = ActiveAndValid;
  447. Pfn1->u3.e1.CacheAttribute = MiCached;
  448. MI_MAKE_TRANSITION_PTE_VALID (TempPte, ReadPte);
  449. if (StoreInstruction && TempPte.u.Hard.Write) {
  450. MI_SET_PTE_DIRTY (TempPte);
  451. }
  452. MI_WRITE_VALID_PTE (ReadPte, TempPte);
  453. if (PointerProtoPte != NULL) {
  454. #if DBG
  455. NTSTATUS oldstatus = status;
  456. #endif
  457. //
  458. // The prototype PTE has been made valid, now make the
  459. // original PTE valid. The original PTE must still be invalid
  460. // otherwise MiWaitForInPageComplete would have returned
  461. // a collision status.
  462. //
  463. ASSERT (PointerPte->u.Hard.Valid == 0);
  464. //
  465. // PTE is not valid, continue with operation.
  466. //
  467. status = MiCompleteProtoPteFault (StoreInstruction,
  468. VirtualAddress,
  469. PointerPte,
  470. PointerProtoPte);
  471. //
  472. // Returns with PFN lock released!
  473. //
  474. #if DBG
  475. if (PointerPte->u.Hard.Valid == 0) {
  476. DbgPrint ("MM:PAGFAULT - va %p %p %p status:%lx\n",
  477. VirtualAddress, PointerPte, PointerProtoPte, oldstatus);
  478. }
  479. #endif
  480. ASSERT (KeGetCurrentIrql() == APC_LEVEL);
  481. }
  482. else {
  483. if (Pfn1->u1.Event == 0) {
  484. Pfn1->u1.Event = (PVOID)PsGetCurrentThread();
  485. }
  486. UNLOCK_PFN (APC_LEVEL);
  487. MiAddValidPageToWorkingSet (VirtualAddress,
  488. ReadPte,
  489. Pfn1,
  490. 0);
  491. ASSERT (KeGetCurrentIrql() == APC_LEVEL);
  492. }
  493. ASSERT (KeGetCurrentIrql() == APC_LEVEL);
  494. if (PerfInfoLogHardFault) {
  495. Thread = PsGetCurrentThread();
  496. HardFaultEvent.ReadOffset = ReadBlock->ReadOffset;
  497. HardFaultEvent.IoTime.QuadPart = IoCompleteTime.QuadPart - IoStartTime.QuadPart;
  498. HardFaultEvent.VirtualAddress = VirtualAddress;
  499. HardFaultEvent.FileObject = ReadBlock->FilePointer;
  500. HardFaultEvent.ThreadId = HandleToUlong(Thread->Cid.UniqueThread);
  501. HardFaultEvent.ByteCount = ReadBlock->Mdl.ByteCount;
  502. PerfInfoLogBytes(PERFINFO_LOG_TYPE_HARDFAULT, &HardFaultEvent, sizeof(HardFaultEvent));
  503. }
  504. MiFreeInPageSupportBlock (CapturedEvent);
  505. if (status == STATUS_SUCCESS) {
  506. status = STATUS_PAGE_FAULT_PAGING_FILE;
  507. }
  508. }
  509. //
  510. // Stop high priority threads from consuming the CPU on collided
  511. // faults for pages that are still marked with inpage errors. All
  512. // the threads must let go of the page so it can be freed and the
  513. // inpage I/O reissued to the filesystem.
  514. //
  515. if (MmIsRetryIoStatus(status)) {
  516. KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmShortTime);
  517. status = STATUS_SUCCESS;
  518. }
  519. if ((status == STATUS_REFAULT) ||
  520. (status == STATUS_PTE_CHANGED)) {
  521. status = STATUS_SUCCESS;
  522. }
  523. ASSERT (KeGetCurrentIrql() == APC_LEVEL);
  524. if (WsLockChanged == TRUE) {
  525. UNLOCK_SYSTEM_WS (APC_LEVEL);
  526. LOCK_SESSION_SPACE_WS (OldIrql, PsGetCurrentThread ());
  527. }
  528. return status;
  529. }
  530. NTSTATUS
  531. MiResolveDemandZeroFault (
  532. IN PVOID VirtualAddress,
  533. IN PMMPTE PointerPte,
  534. IN PEPROCESS Process,
  535. IN ULONG PrototypePte
  536. )
  537. /*++
  538. Routine Description:
  539. This routine resolves a demand zero page fault.
  540. If the PrototypePte argument is TRUE, the PFN lock is
  541. held, the lock cannot be dropped, and the page should
  542. not be added to the working set at this time.
  543. Arguments:
  544. VirtualAddress - Supplies the faulting address.
  545. PointerPte - Supplies the PTE for the faulting address.
  546. Process - Supplies a pointer to the process object. If this
  547. parameter is NULL, then the fault is for system
  548. space and the process's working set lock is not held.
  549. PrototypePte - Supplies TRUE if this is a prototype PTE.
  550. Return Value:
  551. status, either STATUS_SUCCESS or STATUS_REFAULT.
  552. Environment:
  553. Kernel mode, PFN lock held conditionally.
  554. --*/
  555. {
  556. PMMPFN Pfn1;
  557. PFN_NUMBER PageFrameIndex;
  558. MMPTE TempPte;
  559. ULONG PageColor;
  560. KIRQL OldIrql;
  561. LOGICAL NeedToZero;
  562. LOGICAL BarrierNeeded;
  563. ULONG BarrierStamp;
  564. NeedToZero = FALSE;
  565. BarrierNeeded = FALSE;
  566. PERFINFO_PRIVATE_PAGE_DEMAND_ZERO(VirtualAddress);
  567. //
  568. // Check to see if a page is available, if a wait is
  569. // returned, do not continue, just return success.
  570. //
  571. if (!PrototypePte) {
  572. LOCK_PFN (OldIrql);
  573. }
  574. MM_PFN_LOCK_ASSERT();
  575. if (PointerPte->u.Hard.Valid == 0) {
  576. if (!MiEnsureAvailablePageOrWait (Process,
  577. VirtualAddress)) {
  578. //
  579. // Initializing BarrierStamp is not needed for
  580. // correctness but without it the compiler cannot compile this code
  581. // W4 to check for use of uninitialized variables.
  582. //
  583. BarrierStamp = 0;
  584. if (Process != NULL && Process != HYDRA_PROCESS && (!PrototypePte)) {
  585. //
  586. // If a fork operation is in progress and the faulting thread
  587. // is not the thread performing the fork operation, block until
  588. // the fork is completed.
  589. //
  590. if (Process->ForkInProgress != NULL) {
  591. if (MiWaitForForkToComplete (Process, TRUE) == TRUE) {
  592. UNLOCK_PFN (APC_LEVEL);
  593. return STATUS_REFAULT;
  594. }
  595. }
  596. Process->NumberOfPrivatePages += 1;
  597. PageColor = MI_PAGE_COLOR_VA_PROCESS (VirtualAddress,
  598. &Process->NextPageColor);
  599. ASSERT (MI_IS_PAGE_TABLE_ADDRESS(PointerPte));
  600. PageFrameIndex = MiRemoveZeroPageIfAny (PageColor);
  601. if (PageFrameIndex) {
  602. //
  603. // This barrier check is needed after zeroing the page
  604. // and before setting the PTE valid. Note since the PFN
  605. // database entry is used to hold the sequence timestamp,
  606. // it must be captured now. Check it at the last possible
  607. // moment.
  608. //
  609. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  610. BarrierStamp = (ULONG)Pfn1->u4.PteFrame;
  611. }
  612. else {
  613. PageFrameIndex = MiRemoveAnyPage (PageColor);
  614. NeedToZero = TRUE;
  615. }
  616. BarrierNeeded = TRUE;
  617. }
  618. else {
  619. PageColor = MI_PAGE_COLOR_VA_PROCESS (VirtualAddress,
  620. &MI_SYSTEM_PAGE_COLOR);
  621. //
  622. // As this is a system page, there is no need to
  623. // remove a page of zeroes, it must be initialized by
  624. // the system before being used.
  625. //
  626. if (PrototypePte) {
  627. PageFrameIndex = MiRemoveZeroPage (PageColor);
  628. }
  629. else {
  630. PageFrameIndex = MiRemoveAnyPage (PageColor);
  631. }
  632. }
  633. MmInfoCounters.DemandZeroCount += 1;
  634. MiInitializePfn (PageFrameIndex, PointerPte, 1);
  635. if (!PrototypePte) {
  636. UNLOCK_PFN (APC_LEVEL);
  637. }
  638. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  639. if (NeedToZero) {
  640. MiZeroPhysicalPage (PageFrameIndex, PageColor);
  641. //
  642. // Note the stamping must occur after the page is zeroed.
  643. //
  644. MI_BARRIER_STAMP_ZEROED_PAGE (&BarrierStamp);
  645. }
  646. //
  647. // As this page is demand zero, set the modified bit in the
  648. // PFN database element and set the dirty bit in the PTE.
  649. //
  650. PERFINFO_SOFTFAULT(Pfn1, VirtualAddress, PERFINFO_LOG_TYPE_DEMANDZEROFAULT)
  651. MI_SNAP_DATA (Pfn1, PointerPte, 5);
  652. MI_MAKE_VALID_PTE (TempPte,
  653. PageFrameIndex,
  654. PointerPte->u.Soft.Protection,
  655. PointerPte);
  656. if (TempPte.u.Hard.Write != 0) {
  657. MI_SET_PTE_DIRTY (TempPte);
  658. }
  659. if (BarrierNeeded) {
  660. MI_BARRIER_SYNCHRONIZE (BarrierStamp);
  661. }
  662. MI_WRITE_VALID_PTE (PointerPte, TempPte);
  663. if (!PrototypePte) {
  664. ASSERT (Pfn1->u1.Event == 0);
  665. Pfn1->u1.Event = (PVOID)PsGetCurrentThread();
  666. MiAddValidPageToWorkingSet (VirtualAddress,
  667. PointerPte,
  668. Pfn1,
  669. 0);
  670. }
  671. return STATUS_PAGE_FAULT_DEMAND_ZERO;
  672. }
  673. }
  674. if (!PrototypePte) {
  675. UNLOCK_PFN (APC_LEVEL);
  676. }
  677. return STATUS_REFAULT;
  678. }
  679. NTSTATUS
  680. MiResolveTransitionFault (
  681. IN PVOID FaultingAddress,
  682. IN PMMPTE PointerPte,
  683. IN PEPROCESS CurrentProcess,
  684. IN ULONG PfnLockHeld,
  685. OUT PLOGICAL ApcNeeded,
  686. OUT PMMINPAGE_SUPPORT *InPageBlock
  687. )
  688. /*++
  689. Routine Description:
  690. This routine resolves a transition page fault.
  691. Arguments:
  692. FaultingAddress - Supplies the faulting address.
  693. PointerPte - Supplies the PTE for the faulting address.
  694. CurrentProcess - Supplies a pointer to the process object. If this
  695. parameter is NULL, then the fault is for system
  696. space and the process's working set lock is not held.
  697. PfnLockHeld - Supplies TRUE if the PFN lock is held, FALSE if not.
  698. ApcNeeded - Supplies a pointer to a location set to TRUE if an I/O
  699. completion APC is needed to complete partial IRPs that
  700. collided.
  701. It is the caller's responsibility to initialize this (usually
  702. to FALSE) on entry. However, since this routine may be called
  703. multiple times for a single fault (for the page directory,
  704. page table and the page itself), it is possible for it to
  705. occasionally be TRUE on entry.
  706. If it is FALSE on exit, no completion APC is needed.
  707. InPageBlock - Supplies a pointer to an inpage block pointer. The caller
  708. must initialize this to NULL on entry. This routine
  709. sets this to a non-NULL value to signify an inpage block
  710. the caller must free when the caller releases the PFN lock.
  711. Return Value:
  712. status, either STATUS_SUCCESS, STATUS_REFAULT or an I/O status
  713. code.
  714. Environment:
  715. Kernel mode, PFN lock may optionally be held.
  716. --*/
  717. {
  718. MMPFNENTRY PfnFlags;
  719. PFN_NUMBER PageFrameIndex;
  720. PMMPFN Pfn1;
  721. PMMPFN Pfn2;
  722. MMPTE TempPte;
  723. NTSTATUS status;
  724. NTSTATUS PfnStatus;
  725. PMMINPAGE_SUPPORT CapturedEvent;
  726. KIRQL OldIrql;
  727. PETHREAD CurrentThread;
  728. PMMPTE PointerToPteForProtoPage;
  729. //
  730. // ***********************************************************
  731. // Transition PTE.
  732. // ***********************************************************
  733. //
  734. //
  735. // A transition PTE is either on the free or modified list,
  736. // on neither list because of its ReferenceCount
  737. // or currently being read in from the disk (read in progress).
  738. // If the page is read in progress, this is a collided page
  739. // and must be handled accordingly.
  740. //
  741. ASSERT (*InPageBlock == NULL);
  742. if (!PfnLockHeld) {
  743. LOCK_PFN (OldIrql);
  744. }
  745. TempPte = *PointerPte;
  746. if ((TempPte.u.Soft.Valid == 0) &&
  747. (TempPte.u.Soft.Prototype == 0) &&
  748. (TempPte.u.Soft.Transition == 1)) {
  749. //
  750. // Still in transition format.
  751. //
  752. MmInfoCounters.TransitionCount += 1;
  753. PageFrameIndex = MI_GET_PAGE_FRAME_FROM_TRANSITION_PTE (&TempPte);
  754. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  755. if (Pfn1->u4.InPageError) {
  756. //
  757. // There was an in-page read error and there are other
  758. // threads colliding for this page, delay to let the
  759. // other threads complete and return. Snap relevant PFN fields
  760. // before releasing the lock as the page may immediately get
  761. // reused.
  762. //
  763. PfnFlags = Pfn1->u3.e1;
  764. status = Pfn1->u1.ReadStatus;
  765. if (!PfnLockHeld) {
  766. UNLOCK_PFN (APC_LEVEL);
  767. }
  768. if (PfnFlags.ReadInProgress) {
  769. //
  770. // This only occurs when the page is being reclaimed by the
  771. // compression reaper. In this case, the page is still on the
  772. // transition list (so the ReadStatus is really a flink) so
  773. // substitute a retry status which will induce a delay so the
  774. // compression reaper can finish taking the page (and PTE).
  775. //
  776. return STATUS_NO_MEMORY;
  777. }
  778. ASSERT (!NT_SUCCESS(status));
  779. return status;
  780. }
  781. if (Pfn1->u3.e1.ReadInProgress) {
  782. //
  783. // Collided page fault.
  784. //
  785. #if DBG
  786. if (MmDebug & MM_DBG_COLLIDED_PAGE) {
  787. DbgPrint("MM:collided page fault\n");
  788. }
  789. #endif
  790. CapturedEvent = (PMMINPAGE_SUPPORT)Pfn1->u1.Event;
  791. CurrentThread = PsGetCurrentThread();
  792. if (CapturedEvent->Thread == CurrentThread) {
  793. //
  794. // This detects MmCopyToCachedPage deadlocks where both the
  795. // user and system address point at the same physical page.
  796. //
  797. // It also detects when the Io APC completion routine accesses
  798. // the same user page (ie: during an overlapped I/O) that
  799. // the user thread has already faulted on.
  800. //
  801. // Both cases above can result in fatal deadlocks and so must
  802. // be detected here. Return a unique status code so the
  803. // (legitimate) callers know this has happened so it can be
  804. // handled properly. In the first case above this means
  805. // restarting the entire operation immediately. In the second
  806. // case above it means requesting a callback from the Mm
  807. // once the first fault has completed.
  808. //
  809. // Note that non-legitimate callers must get back a failure
  810. // status so the thread can be terminated.
  811. //
  812. ASSERT ((CurrentThread->NestedFaultCount == 1) ||
  813. (CurrentThread->NestedFaultCount == 2));
  814. CurrentThread->ApcNeeded = 1;
  815. if (!PfnLockHeld) {
  816. UNLOCK_PFN (APC_LEVEL);
  817. }
  818. return STATUS_MULTIPLE_FAULT_VIOLATION;
  819. }
  820. //
  821. // Increment the reference count for the page so it won't be
  822. // reused until all collisions have been completed.
  823. //
  824. ASSERT (Pfn1->u2.ShareCount == 0);
  825. ASSERT (Pfn1->u3.e2.ReferenceCount != 0);
  826. ASSERT (Pfn1->u3.e1.LockCharged == 1);
  827. Pfn1->u3.e2.ReferenceCount += 1;
  828. //
  829. // Careful synchronization is applied to the WaitCount field so
  830. // that freeing of the inpage block can occur lock-free. Note
  831. // that the ReadInProgress bit on each PFN is set and cleared while
  832. // holding the PFN lock. Inpage blocks are always (and must be)
  833. // freed _AFTER_ the ReadInProgress bit is cleared.
  834. //
  835. InterlockedIncrement(&CapturedEvent->WaitCount);
  836. UNLOCK_PFN (APC_LEVEL);
  837. if (CurrentProcess == HYDRA_PROCESS) {
  838. CurrentThread = NULL;
  839. UNLOCK_SESSION_SPACE_WS (APC_LEVEL);
  840. }
  841. else if (CurrentProcess != NULL) {
  842. //
  843. // APCs must be explicitly disabled to prevent suspend APCs from
  844. // interrupting this thread before the wait has been issued.
  845. // Otherwise the APC can result in this page being locked
  846. // indefinitely until the suspend is released.
  847. //
  848. ASSERT (CurrentThread->NestedFaultCount <= 2);
  849. CurrentThread->NestedFaultCount += 1;
  850. KeEnterCriticalRegionThread (&CurrentThread->Tcb);
  851. UNLOCK_WS (CurrentProcess);
  852. }
  853. else {
  854. CurrentThread = NULL;
  855. UNLOCK_SYSTEM_WS (APC_LEVEL);
  856. }
  857. //
  858. // Set the inpage block address as the waitcount was incremented
  859. // above and therefore the free must be done by our caller.
  860. //
  861. *InPageBlock = CapturedEvent;
  862. status = MiWaitForInPageComplete (Pfn1,
  863. PointerPte,
  864. FaultingAddress,
  865. &TempPte,
  866. CapturedEvent,
  867. CurrentProcess);
  868. //
  869. // MiWaitForInPageComplete RETURNS WITH THE WORKING SET LOCK
  870. // AND PFN LOCK HELD!!!
  871. //
  872. if (CurrentThread != NULL) {
  873. KeLeaveCriticalRegionThread ((PKTHREAD)CurrentThread);
  874. ASSERT (CurrentThread->NestedFaultCount <= 3);
  875. ASSERT (CurrentThread->NestedFaultCount != 0);
  876. CurrentThread->NestedFaultCount -= 1;
  877. if ((CurrentThread->ApcNeeded == 1) &&
  878. (CurrentThread->NestedFaultCount == 0)) {
  879. *ApcNeeded = TRUE;
  880. CurrentThread->ApcNeeded = 0;
  881. }
  882. }
  883. ASSERT (Pfn1->u3.e1.ReadInProgress == 0);
  884. if (status != STATUS_SUCCESS) {
  885. PfnStatus = Pfn1->u1.ReadStatus;
  886. MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(Pfn1, 9);
  887. //
  888. // Check to see if an I/O error occurred on this page.
  889. // If so, try to free the physical page, wait a
  890. // half second and return a status of PTE_CHANGED.
  891. // This will result in success being returned to
  892. // the user and the fault will occur again and should
  893. // not be a transition fault this time.
  894. //
  895. if (Pfn1->u4.InPageError == 1) {
  896. ASSERT (!NT_SUCCESS(PfnStatus));
  897. status = PfnStatus;
  898. if (Pfn1->u3.e2.ReferenceCount == 0) {
  899. Pfn1->u4.InPageError = 0;
  900. //
  901. // Only restore the transition PTE if the address
  902. // space still exists. Another thread may have
  903. // deleted the VAD while this thread waited for the
  904. // fault to complete - in this case, the frame
  905. // will be marked as free already.
  906. //
  907. if (Pfn1->u3.e1.PageLocation != FreePageList) {
  908. ASSERT (Pfn1->u3.e1.PageLocation ==
  909. StandbyPageList);
  910. MiUnlinkPageFromList (Pfn1);
  911. MiRestoreTransitionPte (PageFrameIndex);
  912. MiInsertPageInFreeList (PageFrameIndex);
  913. }
  914. }
  915. }
  916. #if DBG
  917. if (MmDebug & MM_DBG_COLLIDED_PAGE) {
  918. DbgPrint("MM:decrement ref count - PTE changed\n");
  919. MiFormatPfn(Pfn1);
  920. }
  921. #endif
  922. if (!PfnLockHeld) {
  923. UNLOCK_PFN (APC_LEVEL);
  924. }
  925. //
  926. // Instead of returning status, always return STATUS_REFAULT.
  927. // This is to support filesystems that save state in the
  928. // ETHREAD of the thread that serviced the fault ! Since
  929. // collided threads never enter the filesystem, their ETHREADs
  930. // haven't been hacked up. Since this only matters when
  931. // errors occur (specifically STATUS_VERIFY_REQUIRED today),
  932. // retry any failed I/O in the context of each collider
  933. // to give the filesystems ample opportunity.
  934. //
  935. return STATUS_REFAULT;
  936. }
  937. }
  938. else {
  939. //
  940. // PTE refers to a normal transition PTE.
  941. //
  942. ASSERT ((SPFN_NUMBER)MmAvailablePages >= 0);
  943. ASSERT (Pfn1->u3.e1.CacheAttribute == MiCached);
  944. if (MmAvailablePages == 0) {
  945. //
  946. // This can only happen if the system is utilizing a hardware
  947. // compression cache. This ensures that only a safe amount
  948. // of the compressed virtual cache is directly mapped so that
  949. // if the hardware gets into trouble, we can bail it out.
  950. //
  951. if (!PfnLockHeld) {
  952. UNLOCK_PFN (APC_LEVEL);
  953. }
  954. //
  955. // Note our caller will delay execution after releasing the
  956. // working set mutex in order to make pages available.
  957. //
  958. return STATUS_NO_MEMORY;
  959. }
  960. ASSERT (Pfn1->u4.InPageError == 0);
  961. if (Pfn1->u3.e1.PageLocation == ActiveAndValid) {
  962. //
  963. // This page must contain an MmSt allocation of prototype PTEs.
  964. // Because these types of pages reside in paged pool (or special
  965. // pool) and are part of the system working set, they can be
  966. // trimmed at any time regardless of the share count. However,
  967. // if the share count is nonzero, then the page state will
  968. // remain active and the page will remain in memory - but the
  969. // PTE will be set to the transition state. Make the page
  970. // valid without incrementing the reference count, but
  971. // increment the share count.
  972. //
  973. ASSERT (((Pfn1->PteAddress >= MiGetPteAddress(MmPagedPoolStart)) &&
  974. (Pfn1->PteAddress <= MiGetPteAddress(MmPagedPoolEnd))) ||
  975. ((Pfn1->PteAddress >= MiGetPteAddress(MmSpecialPoolStart)) &&
  976. (Pfn1->PteAddress <= MiGetPteAddress(MmSpecialPoolEnd))));
  977. //
  978. // Don't increment the valid PTE count for the
  979. // page table page.
  980. //
  981. ASSERT (Pfn1->u2.ShareCount != 0);
  982. ASSERT (Pfn1->u3.e2.ReferenceCount != 0);
  983. }
  984. else {
  985. MiUnlinkPageFromList (Pfn1);
  986. ASSERT (Pfn1->u3.e1.CacheAttribute == MiCached);
  987. //
  988. // Update the PFN database - the reference count must be
  989. // incremented as the share count is going to go from zero to 1.
  990. //
  991. ASSERT (Pfn1->u2.ShareCount == 0);
  992. //
  993. // The PFN reference count will be 1 already here if the
  994. // modified writer has begun a write of this page. Otherwise
  995. // it's ordinarily 0.
  996. //
  997. MI_ADD_LOCKED_PAGE_CHARGE_FOR_MODIFIED_PAGE (Pfn1, 8);
  998. Pfn1->u3.e2.ReferenceCount += 1;
  999. }
  1000. }
  1001. //
  1002. // Join with collided page fault code to handle updating
  1003. // the transition PTE.
  1004. //
  1005. ASSERT (Pfn1->u4.InPageError == 0);
  1006. if (Pfn1->u2.ShareCount == 0) {
  1007. MI_REMOVE_LOCKED_PAGE_CHARGE (Pfn1, 9);
  1008. }
  1009. Pfn1->u2.ShareCount += 1;
  1010. Pfn1->u3.e1.PageLocation = ActiveAndValid;
  1011. ASSERT (Pfn1->u3.e1.CacheAttribute == MiCached);
  1012. //
  1013. // Paged pool is trimmed without regard to sharecounts.
  1014. // This means a paged pool PTE can be in transition while
  1015. // the page is still marked active.
  1016. //
  1017. // Note this check only needs to be done for system space addresses
  1018. // as user space address faults lock down the page containing the
  1019. // prototype PTE entries before processing the fault.
  1020. //
  1021. // One example is a system cache fault - the FaultingAddress is a
  1022. // system cache virtual address, the PointerPte points at the pool
  1023. // allocation containing the relevant prototype PTEs. This page
  1024. // may have been trimmed because it isn't locked down during
  1025. // processing of system space virtual address faults.
  1026. //
  1027. if (FaultingAddress >= MmSystemRangeStart) {
  1028. PointerToPteForProtoPage = MiGetPteAddress (PointerPte);
  1029. TempPte = *PointerToPteForProtoPage;
  1030. if ((TempPte.u.Hard.Valid == 0) &&
  1031. (TempPte.u.Soft.Transition == 1)) {
  1032. PageFrameIndex = MI_GET_PAGE_FRAME_FROM_TRANSITION_PTE (&TempPte);
  1033. Pfn2 = MI_PFN_ELEMENT (PageFrameIndex);
  1034. ASSERT ((Pfn2->u3.e1.ReadInProgress == 0) &&
  1035. (Pfn2->u4.InPageError));
  1036. ASSERT (Pfn2->u3.e1.PageLocation == ActiveAndValid);
  1037. ASSERT (((Pfn2->PteAddress >= MiGetPteAddress(MmPagedPoolStart)) &&
  1038. (Pfn2->PteAddress <= MiGetPteAddress(MmPagedPoolEnd))) ||
  1039. ((Pfn2->PteAddress >= MiGetPteAddress(MmSpecialPoolStart)) &&
  1040. (Pfn2->PteAddress <= MiGetPteAddress(MmSpecialPoolEnd))));
  1041. //
  1042. // Don't increment the valid PTE count for the
  1043. // paged pool page.
  1044. //
  1045. ASSERT (Pfn2->u2.ShareCount != 0);
  1046. ASSERT (Pfn2->u3.e2.ReferenceCount != 0);
  1047. Pfn2->u3.e1.PageLocation = ActiveAndValid;
  1048. ASSERT (Pfn2->u3.e1.CacheAttribute == MiCached);
  1049. MI_MAKE_VALID_PTE (TempPte,
  1050. PageFrameIndex,
  1051. Pfn2->OriginalPte.u.Soft.Protection,
  1052. PointerToPteForProtoPage);
  1053. MI_WRITE_VALID_PTE (PointerToPteForProtoPage, TempPte);
  1054. }
  1055. }
  1056. MI_MAKE_TRANSITION_PTE_VALID (TempPte, PointerPte);
  1057. //
  1058. // If the modified field is set in the PFN database and this
  1059. // page is not copy on modify, then set the dirty bit.
  1060. // This can be done as the modified page will not be
  1061. // written to the paging file until this PTE is made invalid.
  1062. //
  1063. if (Pfn1->u3.e1.Modified && TempPte.u.Hard.Write &&
  1064. (TempPte.u.Hard.CopyOnWrite == 0)) {
  1065. MI_SET_PTE_DIRTY (TempPte);
  1066. }
  1067. else {
  1068. MI_SET_PTE_CLEAN (TempPte);
  1069. }
  1070. MI_WRITE_VALID_PTE (PointerPte, TempPte);
  1071. if (!PfnLockHeld) {
  1072. if (Pfn1->u1.Event == 0) {
  1073. Pfn1->u1.Event = (PVOID)PsGetCurrentThread();
  1074. }
  1075. UNLOCK_PFN (APC_LEVEL);
  1076. PERFINFO_SOFTFAULT(Pfn1, FaultingAddress, PERFINFO_LOG_TYPE_TRANSITIONFAULT)
  1077. MiAddValidPageToWorkingSet (FaultingAddress,
  1078. PointerPte,
  1079. Pfn1,
  1080. 0);
  1081. }
  1082. return STATUS_PAGE_FAULT_TRANSITION;
  1083. }
  1084. else {
  1085. if (!PfnLockHeld) {
  1086. UNLOCK_PFN (APC_LEVEL);
  1087. }
  1088. }
  1089. return STATUS_REFAULT;
  1090. }
  1091. NTSTATUS
  1092. MiResolvePageFileFault (
  1093. IN PVOID FaultingAddress,
  1094. IN PMMPTE PointerPte,
  1095. OUT PMMINPAGE_SUPPORT *ReadBlock,
  1096. IN PEPROCESS Process
  1097. )
  1098. /*++
  1099. Routine Description:
  1100. This routine builds the MDL and other structures to allow a
  1101. read operation on a page file for a page fault.
  1102. Arguments:
  1103. FaultingAddress - Supplies the faulting address.
  1104. PointerPte - Supplies the PTE for the faulting address.
  1105. ReadBlock - Supplies a pointer to put the address of the read block which
  1106. needs to be completed before an I/O can be issued.
  1107. Process - Supplies a pointer to the process object. If this
  1108. parameter is NULL, then the fault is for system
  1109. space and the process's working set lock is not held.
  1110. Return Value:
  1111. status. A status value of STATUS_ISSUE_PAGING_IO is returned
  1112. if this function completes successfully.
  1113. Environment:
  1114. Kernel mode, PFN lock held.
  1115. --*/
  1116. {
  1117. PMDL Mdl;
  1118. ULONG i;
  1119. PMMPTE BasePte;
  1120. PMMPTE CheckPte;
  1121. PMMPTE FirstPte;
  1122. PMMPTE LastPte;
  1123. PSUBSECTION Subsection;
  1124. ULONG ReadSize;
  1125. LARGE_INTEGER StartingOffset;
  1126. PFN_NUMBER PageFrameIndex;
  1127. PPFN_NUMBER MdlPage;
  1128. ULONG PageFileNumber;
  1129. ULONG ClusterSize;
  1130. ULONG BackwardPageCount;
  1131. ULONG ForwardPageCount;
  1132. ULONG MaxForwardPageCount;
  1133. ULONG MaxBackwardPageCount;
  1134. WSLE_NUMBER WorkingSetIndex;
  1135. ULONG PageColor;
  1136. MMPTE TempPte;
  1137. MMPTE ComparePte;
  1138. PMMINPAGE_SUPPORT ReadBlockLocal;
  1139. PETHREAD CurrentThread;
  1140. PMMVAD Vad;
  1141. // **************************************************
  1142. // Page File Read
  1143. // **************************************************
  1144. //
  1145. // Calculate the VBN for the in-page operation.
  1146. //
  1147. TempPte = *PointerPte;
  1148. if (TempPte.u.Hard.Valid == 1) {
  1149. UNLOCK_PFN (APC_LEVEL);
  1150. return STATUS_REFAULT;
  1151. }
  1152. ASSERT (TempPte.u.Soft.Prototype == 0);
  1153. ASSERT (TempPte.u.Soft.Transition == 0);
  1154. MM_PFN_LOCK_ASSERT();
  1155. if (MiEnsureAvailablePageOrWait (Process, FaultingAddress)) {
  1156. //
  1157. // A wait operation was performed which dropped the locks,
  1158. // repeat this fault.
  1159. //
  1160. UNLOCK_PFN (APC_LEVEL);
  1161. return STATUS_REFAULT;
  1162. }
  1163. ReadBlockLocal = MiGetInPageSupportBlock (TRUE, Process);
  1164. if (ReadBlockLocal == NULL) {
  1165. UNLOCK_PFN (APC_LEVEL);
  1166. return STATUS_REFAULT;
  1167. }
  1168. MmInfoCounters.PageReadCount += 1;
  1169. MmInfoCounters.PageReadIoCount += 1;
  1170. //
  1171. // Transition collisions rely on the entire PFN (including the event field)
  1172. // being initialized, the ReadBlockLocal's event being not-signaled,
  1173. // and the ReadBlockLocal's thread and waitcount being initialized.
  1174. //
  1175. // All of this has been done by MiGetInPageSupportBlock already except
  1176. // the PFN settings. The PFN lock can be safely released once
  1177. // this is done.
  1178. //
  1179. ReadSize = 1;
  1180. BasePte = NULL;
  1181. if (MI_IS_PAGE_TABLE_ADDRESS(PointerPte)) {
  1182. WorkingSetIndex = 1;
  1183. }
  1184. else {
  1185. WorkingSetIndex = MI_PROTOTYPE_WSINDEX;
  1186. }
  1187. //
  1188. // Capture the desired cluster size.
  1189. //
  1190. ClusterSize = MmClusterPageFileReads;
  1191. ASSERT (ClusterSize <= MM_MAXIMUM_READ_CLUSTER_SIZE);
  1192. if (MiInPageSinglePages != 0) {
  1193. MiInPageSinglePages -= 1;
  1194. }
  1195. else if ((ClusterSize > 1) && (MmAvailablePages > 256)) {
  1196. //
  1197. // Maybe this condition should be only on free+zeroed pages (ie: don't
  1198. // include standby). Maybe it should look at the recycle rate of
  1199. // the standby list, etc, etc.
  1200. //
  1201. ASSERT (ClusterSize <= MmAvailablePages);
  1202. //
  1203. // Attempt to cluster ahead and behind.
  1204. //
  1205. MaxForwardPageCount = PTE_PER_PAGE - (BYTE_OFFSET (PointerPte) / sizeof (MMPTE));
  1206. ASSERT (MaxForwardPageCount != 0);
  1207. MaxBackwardPageCount = PTE_PER_PAGE - MaxForwardPageCount;
  1208. MaxForwardPageCount -= 1;
  1209. if (WorkingSetIndex == MI_PROTOTYPE_WSINDEX) {
  1210. //
  1211. // This is a pagefile read for a shared memory (prototype PTE)
  1212. // backed section. Stay within the prototype PTE pool allocation.
  1213. //
  1214. // The prototype PTE pool start and end must be carefully
  1215. // calculated (remember the user's view may be smaller or larger
  1216. // than this). Don't bother walking the entire VAD tree if it is
  1217. // very large as this can take a significant amount of time.
  1218. //
  1219. if ((FaultingAddress <= MM_HIGHEST_USER_ADDRESS) &&
  1220. (Process->NumberOfVads < 128)) {
  1221. Vad = MiLocateAddress (FaultingAddress);
  1222. if (Vad != NULL) {
  1223. Subsection = MiLocateSubsection (Vad,
  1224. MI_VA_TO_VPN(FaultingAddress));
  1225. if (Subsection != NULL) {
  1226. FirstPte = &Subsection->SubsectionBase[0];
  1227. LastPte = &Subsection->SubsectionBase[Subsection->PtesInSubsection];
  1228. if ((ULONG)(LastPte - PointerPte - 1) < MaxForwardPageCount) {
  1229. MaxForwardPageCount = (ULONG)(LastPte - PointerPte - 1);
  1230. }
  1231. if ((ULONG)(PointerPte - FirstPte) < MaxBackwardPageCount) {
  1232. MaxBackwardPageCount = (ULONG)(PointerPte - FirstPte);
  1233. }
  1234. }
  1235. else {
  1236. ClusterSize = 0;
  1237. }
  1238. }
  1239. else {
  1240. ClusterSize = 0;
  1241. }
  1242. }
  1243. else {
  1244. ClusterSize = 0;
  1245. }
  1246. }
  1247. CurrentThread = PsGetCurrentThread();
  1248. if (CurrentThread->ForwardClusterOnly) {
  1249. MaxBackwardPageCount = 0;
  1250. if (MaxForwardPageCount == 0) {
  1251. //
  1252. // This PTE is the last one in the page table page and
  1253. // no backwards clustering is enabled for this thread so
  1254. // no clustering can be done.
  1255. //
  1256. ClusterSize = 0;
  1257. }
  1258. }
  1259. if (ClusterSize != 0) {
  1260. if (MaxForwardPageCount > ClusterSize) {
  1261. MaxForwardPageCount = ClusterSize;
  1262. }
  1263. ComparePte = TempPte;
  1264. CheckPte = PointerPte + 1;
  1265. ForwardPageCount = MaxForwardPageCount;
  1266. //
  1267. // Try to cluster forward within the page of PTEs.
  1268. //
  1269. while (ForwardPageCount != 0) {
  1270. ASSERT (MiIsPteOnPdeBoundary (CheckPte) == 0);
  1271. ComparePte.u.Soft.PageFileHigh += 1;
  1272. if (CheckPte->u.Long != ComparePte.u.Long) {
  1273. break;
  1274. }
  1275. ForwardPageCount -= 1;
  1276. CheckPte += 1;
  1277. }
  1278. ReadSize += (MaxForwardPageCount - ForwardPageCount);
  1279. //
  1280. // Try to cluster backward within the page of PTEs. Donate
  1281. // any unused forward cluster space to the backwards gathering
  1282. // but keep the entire transfer within the MDL.
  1283. //
  1284. ClusterSize -= (MaxForwardPageCount - ForwardPageCount);
  1285. if (MaxBackwardPageCount > ClusterSize) {
  1286. MaxBackwardPageCount = ClusterSize;
  1287. }
  1288. ComparePte = TempPte;
  1289. BasePte = PointerPte;
  1290. CheckPte = PointerPte;
  1291. BackwardPageCount = MaxBackwardPageCount;
  1292. while (BackwardPageCount != 0) {
  1293. ASSERT (MiIsPteOnPdeBoundary(CheckPte) == 0);
  1294. CheckPte -= 1;
  1295. ComparePte.u.Soft.PageFileHigh -= 1;
  1296. if (CheckPte->u.Long != ComparePte.u.Long) {
  1297. break;
  1298. }
  1299. BackwardPageCount -= 1;
  1300. }
  1301. ReadSize += (MaxBackwardPageCount - BackwardPageCount);
  1302. BasePte -= (MaxBackwardPageCount - BackwardPageCount);
  1303. }
  1304. }
  1305. if (ReadSize == 1) {
  1306. //
  1307. // Get a page and put the PTE into the transition state with the
  1308. // read-in-progress flag set.
  1309. //
  1310. if (Process == HYDRA_PROCESS) {
  1311. PageColor = MI_GET_PAGE_COLOR_FROM_SESSION (MmSessionSpace);
  1312. }
  1313. else if (Process == NULL) {
  1314. PageColor = MI_GET_PAGE_COLOR_FROM_VA(FaultingAddress);
  1315. }
  1316. else {
  1317. PageColor = MI_PAGE_COLOR_VA_PROCESS (FaultingAddress,
  1318. &Process->NextPageColor);
  1319. }
  1320. PageFrameIndex = MiRemoveAnyPage (PageColor);
  1321. MiInitializeReadInProgressSinglePfn (PageFrameIndex,
  1322. PointerPte,
  1323. &ReadBlockLocal->Event,
  1324. WorkingSetIndex);
  1325. MI_RETRIEVE_USED_PAGETABLE_ENTRIES_FROM_PTE (ReadBlockLocal, &TempPte);
  1326. }
  1327. else {
  1328. Mdl = &ReadBlockLocal->Mdl;
  1329. MdlPage = &ReadBlockLocal->Page[0];
  1330. ASSERT (ReadSize <= MmAvailablePages);
  1331. for (i = 0; i < ReadSize; i += 1) {
  1332. //
  1333. // Get a page and put the PTE into the transition state with the
  1334. // read-in-progress flag set.
  1335. //
  1336. if (Process == HYDRA_PROCESS) {
  1337. PageColor = MI_GET_PAGE_COLOR_FROM_SESSION (MmSessionSpace);
  1338. }
  1339. else if (Process == NULL) {
  1340. PageColor = MI_GET_PAGE_COLOR_FROM_VA(FaultingAddress);
  1341. }
  1342. else {
  1343. PageColor = MI_PAGE_COLOR_VA_PROCESS (FaultingAddress,
  1344. &Process->NextPageColor);
  1345. }
  1346. *MdlPage = MiRemoveAnyPage (PageColor);
  1347. MdlPage += 1;
  1348. }
  1349. ReadSize *= PAGE_SIZE;
  1350. //
  1351. // Note PageFrameIndex is the actual frame that was requested by
  1352. // this caller. All the other frames will be put in transition
  1353. // when the inpage completes (provided there are no colliding threads).
  1354. //
  1355. MdlPage = &ReadBlockLocal->Page[0];
  1356. PageFrameIndex = *(MdlPage + (PointerPte - BasePte));
  1357. //
  1358. // Initialize the MDL for this request.
  1359. //
  1360. MmInitializeMdl (Mdl,
  1361. MiGetVirtualAddressMappedByPte (BasePte),
  1362. ReadSize);
  1363. Mdl->MdlFlags |= (MDL_PAGES_LOCKED | MDL_IO_PAGE_READ);
  1364. //
  1365. // Set PointerPte and TempPte to the base of the cluster so the
  1366. // correct starting offset can be calculated below. Note this must
  1367. // be done before MiInitializeReadInProgressPfn overwrites the PTEs.
  1368. //
  1369. PointerPte = BasePte;
  1370. TempPte = *PointerPte;
  1371. ASSERT (TempPte.u.Soft.Prototype == 0);
  1372. ASSERT (TempPte.u.Soft.Transition == 0);
  1373. //
  1374. // Put the PTEs into the transition state with the
  1375. // read-in-progress flag set.
  1376. //
  1377. MiInitializeReadInProgressPfn (Mdl,
  1378. BasePte,
  1379. &ReadBlockLocal->Event,
  1380. WorkingSetIndex);
  1381. MI_ZERO_USED_PAGETABLE_ENTRIES_IN_INPAGE_SUPPORT(ReadBlockLocal);
  1382. }
  1383. UNLOCK_PFN (APC_LEVEL);
  1384. *ReadBlock = ReadBlockLocal;
  1385. PageFileNumber = GET_PAGING_FILE_NUMBER (TempPte);
  1386. StartingOffset.LowPart = GET_PAGING_FILE_OFFSET (TempPte);
  1387. ASSERT (StartingOffset.LowPart <= MmPagingFile[PageFileNumber]->Size);
  1388. StartingOffset.HighPart = 0;
  1389. StartingOffset.QuadPart = StartingOffset.QuadPart << PAGE_SHIFT;
  1390. ReadBlockLocal->FilePointer = MmPagingFile[PageFileNumber]->File;
  1391. #if DBG
  1392. if (((StartingOffset.QuadPart >> PAGE_SHIFT) < 8192) && (PageFileNumber == 0)) {
  1393. if ((MmPagingFileDebug[StartingOffset.QuadPart >> PAGE_SHIFT] & ~0x1f) !=
  1394. ((ULONG_PTR)PointerPte << 3)) {
  1395. if ((MmPagingFileDebug[StartingOffset.QuadPart >> PAGE_SHIFT] & ~0x1f) !=
  1396. ((ULONG_PTR)(MiGetPteAddress(FaultingAddress)) << 3)) {
  1397. DbgPrint("MMINPAGE: Mismatch PointerPte %p Offset %I64X info %p\n",
  1398. PointerPte,
  1399. StartingOffset.QuadPart >> PAGE_SHIFT,
  1400. MmPagingFileDebug[StartingOffset.QuadPart >> PAGE_SHIFT]);
  1401. DbgBreakPoint();
  1402. }
  1403. }
  1404. }
  1405. #endif //DBG
  1406. ReadBlockLocal->ReadOffset = StartingOffset;
  1407. ReadBlockLocal->BasePte = PointerPte;
  1408. //
  1409. // Build a single page MDL for the request unless it was a cluster -
  1410. // clustered MDLs have already been constructed.
  1411. //
  1412. if (ReadSize == 1) {
  1413. MmInitializeMdl (&ReadBlockLocal->Mdl, PAGE_ALIGN(FaultingAddress), PAGE_SIZE);
  1414. ReadBlockLocal->Mdl.MdlFlags |= (MDL_PAGES_LOCKED | MDL_IO_PAGE_READ);
  1415. ReadBlockLocal->Page[0] = PageFrameIndex;
  1416. }
  1417. ReadBlockLocal->Pfn = MI_PFN_ELEMENT (PageFrameIndex);
  1418. return STATUS_ISSUE_PAGING_IO;
  1419. }
  1420. NTSTATUS
  1421. MiResolveProtoPteFault (
  1422. IN ULONG_PTR StoreInstruction,
  1423. IN PVOID FaultingAddress,
  1424. IN PMMPTE PointerPte,
  1425. IN PMMPTE PointerProtoPte,
  1426. OUT PMMINPAGE_SUPPORT *ReadBlock,
  1427. IN PEPROCESS Process,
  1428. OUT PLOGICAL ApcNeeded
  1429. )
  1430. /*++
  1431. Routine Description:
  1432. This routine resolves a prototype PTE fault.
  1433. Arguments:
  1434. StoreInstruction - Supplies nonzero if the instruction is trying
  1435. to modify the faulting address (i.e. write
  1436. access required).
  1437. FaultingAddress - Supplies the faulting address.
  1438. PointerPte - Supplies the PTE for the faulting address.
  1439. PointerProtoPte - Supplies a pointer to the prototype PTE to fault in.
  1440. ReadBlock - Supplies a pointer to put the address of the read block which
  1441. needs to be completed before an I/O can be issued.
  1442. Process - Supplies a pointer to the process object. If this
  1443. parameter is NULL, then the fault is for system
  1444. space and the process's working set lock is not held.
  1445. ApcNeeded - Supplies a pointer to a location set to TRUE if an I/O
  1446. completion APC is needed to complete partial IRPs that
  1447. collided.
  1448. Return Value:
  1449. status, either STATUS_SUCCESS, STATUS_REFAULT, or an I/O status
  1450. code.
  1451. Environment:
  1452. Kernel mode, PFN lock held.
  1453. --*/
  1454. {
  1455. MMPTE TempPte;
  1456. PFN_NUMBER PageFrameIndex;
  1457. PMMPFN Pfn1;
  1458. NTSTATUS status;
  1459. ULONG CopyOnWrite;
  1460. LOGICAL PfnHeld;
  1461. PMMINPAGE_SUPPORT CapturedEvent;
  1462. CapturedEvent = NULL;
  1463. //
  1464. // Note the PFN lock must be held as the routine to locate a working
  1465. // set entry decrements the share count of PFN elements.
  1466. //
  1467. MM_PFN_LOCK_ASSERT();
  1468. #if DBG
  1469. if (MmDebug & MM_DBG_PTE_UPDATE) {
  1470. DbgPrint("MM:actual fault %p va %p\n",PointerPte, FaultingAddress);
  1471. MiFormatPte(PointerPte);
  1472. }
  1473. #endif //DBG
  1474. ASSERT (PointerPte->u.Soft.Prototype == 1);
  1475. TempPte = *PointerProtoPte;
  1476. //
  1477. // The page containing the prototype PTE is resident,
  1478. // handle the fault referring to the prototype PTE.
  1479. // If the prototype PTE is already valid, make this
  1480. // PTE valid and up the share count etc.
  1481. //
  1482. if (TempPte.u.Hard.Valid) {
  1483. //
  1484. // Prototype PTE is valid.
  1485. //
  1486. PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE (&TempPte);
  1487. Pfn1 = MI_PFN_ELEMENT(PageFrameIndex);
  1488. Pfn1->u2.ShareCount += 1;
  1489. status = STATUS_SUCCESS;
  1490. //
  1491. // Count this as a transition fault.
  1492. //
  1493. MmInfoCounters.TransitionCount += 1;
  1494. PfnHeld = TRUE;
  1495. PERFINFO_SOFTFAULT(Pfn1, FaultingAddress, PERFINFO_LOG_TYPE_ADDVALIDPAGETOWS)
  1496. }
  1497. else {
  1498. //
  1499. // Check to make sure the prototype PTE is committed.
  1500. //
  1501. if (TempPte.u.Long == 0) {
  1502. #if DBG
  1503. if (MmDebug & MM_DBG_STOP_ON_ACCVIO) {
  1504. DbgPrint("MM:access vio2 - %p\n",FaultingAddress);
  1505. MiFormatPte(PointerPte);
  1506. DbgBreakPoint();
  1507. }
  1508. #endif //DEBUG
  1509. UNLOCK_PFN (APC_LEVEL);
  1510. return STATUS_ACCESS_VIOLATION;
  1511. }
  1512. //
  1513. // If the PTE indicates that the protection field to be
  1514. // checked is in the prototype PTE, check it now.
  1515. //
  1516. CopyOnWrite = FALSE;
  1517. if (PointerPte->u.Soft.PageFileHigh != MI_PTE_LOOKUP_NEEDED) {
  1518. if (PointerPte->u.Proto.ReadOnly == 0) {
  1519. //
  1520. // Check for kernel mode access, we have already verified
  1521. // that the user has access to the virtual address.
  1522. //
  1523. #if 0 // removed this assert since mapping drivers via MmMapViewInSystemSpace
  1524. // file violates the assert.
  1525. {
  1526. PSUBSECTION Sub;
  1527. if (PointerProtoPte->u.Soft.Prototype == 1) {
  1528. Sub = MiGetSubsectionAddress (PointerProtoPte);
  1529. ASSERT (Sub->u.SubsectionFlags.Protection ==
  1530. PointerProtoPte->u.Soft.Protection);
  1531. }
  1532. }
  1533. #endif //DBG
  1534. status = MiAccessCheck (PointerProtoPte,
  1535. StoreInstruction,
  1536. KernelMode,
  1537. MI_GET_PROTECTION_FROM_SOFT_PTE (PointerProtoPte),
  1538. TRUE);
  1539. if (status != STATUS_SUCCESS) {
  1540. #if DBG
  1541. if (MmDebug & MM_DBG_STOP_ON_ACCVIO) {
  1542. DbgPrint("MM:access vio3 - %p\n",FaultingAddress);
  1543. MiFormatPte(PointerPte);
  1544. MiFormatPte(PointerProtoPte);
  1545. DbgBreakPoint();
  1546. }
  1547. #endif
  1548. UNLOCK_PFN (APC_LEVEL);
  1549. return status;
  1550. }
  1551. if ((PointerProtoPte->u.Soft.Protection & MM_COPY_ON_WRITE_MASK) ==
  1552. MM_COPY_ON_WRITE_MASK) {
  1553. CopyOnWrite = TRUE;
  1554. }
  1555. }
  1556. }
  1557. else {
  1558. if ((PointerPte->u.Soft.Protection & MM_COPY_ON_WRITE_MASK) ==
  1559. MM_COPY_ON_WRITE_MASK) {
  1560. CopyOnWrite = TRUE;
  1561. }
  1562. }
  1563. if ((!IS_PTE_NOT_DEMAND_ZERO(TempPte)) && (CopyOnWrite)) {
  1564. //
  1565. // The prototype PTE is demand zero and copy on
  1566. // write. Make this PTE a private demand zero PTE.
  1567. //
  1568. ASSERT (Process != NULL);
  1569. PointerPte->u.Long = MM_DEMAND_ZERO_WRITE_PTE;
  1570. UNLOCK_PFN (APC_LEVEL);
  1571. status = MiResolveDemandZeroFault (FaultingAddress,
  1572. PointerPte,
  1573. Process,
  1574. FALSE);
  1575. return status;
  1576. }
  1577. //
  1578. // Make the prototype PTE valid, the prototype PTE is in
  1579. // one of these 4 states:
  1580. //
  1581. // demand zero
  1582. // transition
  1583. // paging file
  1584. // mapped file
  1585. //
  1586. if (TempPte.u.Soft.Prototype == 1) {
  1587. //
  1588. // Mapped File.
  1589. //
  1590. status = MiResolveMappedFileFault (FaultingAddress,
  1591. PointerProtoPte,
  1592. ReadBlock,
  1593. Process);
  1594. //
  1595. // Returns with PFN lock held.
  1596. //
  1597. PfnHeld = TRUE;
  1598. }
  1599. else if (TempPte.u.Soft.Transition == 1) {
  1600. //
  1601. // Transition.
  1602. //
  1603. status = MiResolveTransitionFault (FaultingAddress,
  1604. PointerProtoPte,
  1605. Process,
  1606. TRUE,
  1607. ApcNeeded,
  1608. &CapturedEvent);
  1609. //
  1610. // Returns with PFN lock held.
  1611. //
  1612. PfnHeld = TRUE;
  1613. }
  1614. else if (TempPte.u.Soft.PageFileHigh == 0) {
  1615. //
  1616. // Demand Zero
  1617. //
  1618. status = MiResolveDemandZeroFault (FaultingAddress,
  1619. PointerProtoPte,
  1620. Process,
  1621. TRUE);
  1622. //
  1623. // Returns with PFN lock held.
  1624. //
  1625. PfnHeld = TRUE;
  1626. }
  1627. else {
  1628. //
  1629. // Paging file.
  1630. //
  1631. status = MiResolvePageFileFault (FaultingAddress,
  1632. PointerProtoPte,
  1633. ReadBlock,
  1634. Process);
  1635. //
  1636. // Returns with PFN lock released.
  1637. //
  1638. ASSERT (KeGetCurrentIrql() == APC_LEVEL);
  1639. PfnHeld = FALSE;
  1640. }
  1641. }
  1642. if (NT_SUCCESS(status)) {
  1643. ASSERT (PointerPte->u.Hard.Valid == 0);
  1644. MiCompleteProtoPteFault (StoreInstruction,
  1645. FaultingAddress,
  1646. PointerPte,
  1647. PointerProtoPte);
  1648. if (CapturedEvent != NULL) {
  1649. MiFreeInPageSupportBlock (CapturedEvent);
  1650. }
  1651. }
  1652. else {
  1653. if (PfnHeld == TRUE) {
  1654. UNLOCK_PFN (APC_LEVEL);
  1655. }
  1656. ASSERT (KeGetCurrentIrql() == APC_LEVEL);
  1657. if (CapturedEvent != NULL) {
  1658. MiFreeInPageSupportBlock (CapturedEvent);
  1659. }
  1660. //
  1661. // Stop high priority threads from consuming the CPU on collided
  1662. // faults for pages that are still marked with inpage errors. All
  1663. // the threads must let go of the page so it can be freed and the
  1664. // inpage I/O reissued to the filesystem.
  1665. //
  1666. if (MmIsRetryIoStatus(status)) {
  1667. KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmShortTime);
  1668. status = STATUS_REFAULT;
  1669. }
  1670. }
  1671. return status;
  1672. }
  1673. NTSTATUS
  1674. MiCompleteProtoPteFault (
  1675. IN ULONG_PTR StoreInstruction,
  1676. IN PVOID FaultingAddress,
  1677. IN PMMPTE PointerPte,
  1678. IN PMMPTE PointerProtoPte
  1679. )
  1680. /*++
  1681. Routine Description:
  1682. This routine completes a prototype PTE fault. It is invoked
  1683. after a read operation has completed bringing the data into
  1684. memory.
  1685. Arguments:
  1686. StoreInstruction - Supplies nonzero if the instruction is trying
  1687. to modify the faulting address (i.e. write
  1688. access required).
  1689. FaultingAddress - Supplies the faulting address.
  1690. PointerPte - Supplies the PTE for the faulting address.
  1691. PointerProtoPte - Supplies a pointer to the prototype PTE to fault in,
  1692. NULL if no prototype PTE exists.
  1693. Return Value:
  1694. status.
  1695. Environment:
  1696. Kernel mode, PFN lock held.
  1697. --*/
  1698. {
  1699. MMPTE TempPte;
  1700. MMWSLE ProtoProtect;
  1701. PFN_NUMBER PageFrameIndex;
  1702. PMMPFN Pfn1;
  1703. PMMPFN Pfn2;
  1704. PMMPTE ContainingPageTablePointer;
  1705. PFILE_OBJECT FileObject;
  1706. LONGLONG FileOffset;
  1707. PSUBSECTION Subsection;
  1708. MMSECTION_FLAGS ControlAreaFlags;
  1709. ULONG Flags;
  1710. MM_PFN_LOCK_ASSERT();
  1711. PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE (PointerProtoPte);
  1712. Pfn1 = MI_PFN_ELEMENT(PageFrameIndex);
  1713. Pfn1->u3.e1.PrototypePte = 1;
  1714. //
  1715. // Capture prefetch fault information.
  1716. //
  1717. Subsection = NULL;
  1718. if (CCPF_IS_PREFETCHER_ACTIVE()) {
  1719. TempPte = Pfn1->OriginalPte;
  1720. if (TempPte.u.Soft.Prototype == 1) {
  1721. Subsection = MiGetSubsectionAddress (&TempPte);
  1722. }
  1723. }
  1724. //
  1725. // Prototype PTE is now valid, make the PTE valid.
  1726. //
  1727. ASSERT (PointerProtoPte->u.Hard.Valid == 1);
  1728. //
  1729. // A PTE just went from not present, not transition to
  1730. // present. The share count and valid count must be
  1731. // updated in the page table page which contains this PTE.
  1732. //
  1733. ContainingPageTablePointer = MiGetPteAddress(PointerPte);
  1734. Pfn2 = MI_PFN_ELEMENT(ContainingPageTablePointer->u.Hard.PageFrameNumber);
  1735. Pfn2->u2.ShareCount += 1;
  1736. ProtoProtect.u1.Long = 0;
  1737. if (PointerPte->u.Soft.PageFileHigh == MI_PTE_LOOKUP_NEEDED) {
  1738. //
  1739. // The protection code for the real PTE comes from the real PTE as
  1740. // it was placed there earlier during the handling of this fault.
  1741. //
  1742. ProtoProtect.u1.e1.Protection = MI_GET_PROTECTION_FROM_SOFT_PTE(PointerPte);
  1743. }
  1744. else if ((MI_IS_SESSION_IMAGE_ADDRESS (FaultingAddress)) &&
  1745. (PointerPte->u.Proto.ReadOnly == 0)) {
  1746. //
  1747. // Session image addresses must be treated specially. This is
  1748. // because we only encode the readonly bit in the PTEs in the
  1749. // native pagetables (ie: not in the prototype PTEs themselves).
  1750. //
  1751. // Normally MiWaitForInPageComplete checks to make sure that collided
  1752. // faults are processed properly by seeing if the prototype PTE
  1753. // state before and after the fault is the same. This is not enough
  1754. // because for the session image range, the readonly attribute of the
  1755. // native PTE must also be taken into account because it must be
  1756. // preserved here. Consider:
  1757. //
  1758. // Thread A faults on a session image address that is *data* (ie:
  1759. // readwrite bss). The native PTE is set to LOOKUP_NEEDED and execute-
  1760. // writecopy is set in the native PTE as well at the start of the
  1761. // fault. The prototype PTE is put in transition and an inpage
  1762. // initiated.
  1763. //
  1764. // Then thread B collides on the same address, eventually racing
  1765. // ahead of thread A after the inpage completes and makes both the
  1766. // prototype PTE and the hardware PTE valid, and puts the session
  1767. // image VA into the session working set list.
  1768. //
  1769. // Now the working set trimmer executes and trims the newly inserted
  1770. // session image VA. The hardware PTE is repointed back to the
  1771. // prototype PTE *WITHOUT* the readonly bit set (this is correct),
  1772. // and the prototype PTE continues to point at the same transition
  1773. // page because the reference count on the PFN is still held by
  1774. // thread A.
  1775. //
  1776. // Then thread A resumes to process the initial fault, unaware
  1777. // that thread B and the trimmer thread ran while thread A was waiting
  1778. // for the inpage to complete. The first check above will see the
  1779. // hardware PTE is not encoded with lookup needed and thus assume
  1780. // that the protection should be set to the prototype PTE below.
  1781. // This would be wrong as the session address referred to data !
  1782. //
  1783. // This is the edge condition the code in this if statement handles.
  1784. //
  1785. ProtoProtect.u1.e1.Protection = MM_EXECUTE_WRITECOPY;
  1786. }
  1787. else {
  1788. //
  1789. // Use the protection in the prototype PTE to initialize the real PTE.
  1790. //
  1791. ProtoProtect.u1.e1.Protection = MI_GET_PROTECTION_FROM_SOFT_PTE(&Pfn1->OriginalPte);
  1792. ProtoProtect.u1.e1.SameProtectAsProto = 1;
  1793. MI_ASSERT_NOT_SESSION_DATA (PointerPte);
  1794. if ((StoreInstruction != 0) &&
  1795. ((ProtoProtect.u1.e1.Protection & MM_PROTECTION_WRITE_MASK) == 0)) {
  1796. //
  1797. // This is the errant case where the user is trying to write
  1798. // to a readonly subsection in the image. Since we're more than
  1799. // halfway through the fault, take the easy way to clean this up -
  1800. // treat the access as a read for the rest of this trip through
  1801. // the fault. We'll then immediately refault when the instruction
  1802. // is rerun (because it's really a write), and then we'll notice
  1803. // that the user's PTE is not copy-on-write (or even writable!)
  1804. // and return a clean access violation.
  1805. //
  1806. #if DBG
  1807. DbgPrint("MM: user tried to write to a readonly subsection in the image! %p %p %p\n",
  1808. FaultingAddress,
  1809. PointerPte,
  1810. PointerProtoPte);
  1811. #endif
  1812. StoreInstruction = 0;
  1813. }
  1814. }
  1815. MI_SNAP_DATA (Pfn1, PointerProtoPte, 6);
  1816. MI_MAKE_VALID_PTE (TempPte,
  1817. PageFrameIndex,
  1818. ProtoProtect.u1.e1.Protection,
  1819. PointerPte);
  1820. //
  1821. // If this is a store instruction and the page is not copy on
  1822. // write, then set the modified bit in the PFN database and
  1823. // the dirty bit in the PTE. The PTE is not set dirty even
  1824. // if the modified bit is set so writes to the page can be
  1825. // tracked for FlushVirtualMemory.
  1826. //
  1827. if ((StoreInstruction != 0) && (TempPte.u.Hard.CopyOnWrite == 0)) {
  1828. #if DBG
  1829. PVOID Va;
  1830. MMPTE TempPte2;
  1831. PSUBSECTION Subsection2;
  1832. PCONTROL_AREA ControlArea;
  1833. Va = MiGetVirtualAddressMappedByPte (PointerPte);
  1834. //
  1835. // Session space backed by the filesystem is not writable.
  1836. //
  1837. ASSERT (!MI_IS_SESSION_IMAGE_ADDRESS (Va));
  1838. TempPte2 = Pfn1->OriginalPte;
  1839. if (TempPte2.u.Soft.Prototype == 1) {
  1840. Subsection2 = MiGetSubsectionAddress (&TempPte2);
  1841. ControlArea = Subsection2->ControlArea;
  1842. if (ControlArea->DereferenceList.Flink != NULL) {
  1843. if (!KdDebuggerNotPresent) {
  1844. DbgPrint ("MM: page fault completing to dereferenced CA %p %p %p\n",
  1845. ControlArea, Pfn1, PointerPte);
  1846. DbgBreakPoint ();
  1847. }
  1848. }
  1849. }
  1850. #endif
  1851. MI_SET_MODIFIED (Pfn1, 1, 0xA);
  1852. MI_SET_PTE_DIRTY (TempPte);
  1853. if ((Pfn1->OriginalPte.u.Soft.Prototype == 0) &&
  1854. (Pfn1->u3.e1.WriteInProgress == 0)) {
  1855. MiReleasePageFileSpace (Pfn1->OriginalPte);
  1856. Pfn1->OriginalPte.u.Soft.PageFileHigh = 0;
  1857. }
  1858. }
  1859. MI_WRITE_VALID_PTE (PointerPte, TempPte);
  1860. if (Pfn1->u1.Event == NULL) {
  1861. Pfn1->u1.Event = (PVOID)PsGetCurrentThread();
  1862. }
  1863. UNLOCK_PFN (APC_LEVEL);
  1864. PERFINFO_SOFTFAULT(Pfn1, FaultingAddress, PERFINFO_LOG_TYPE_PROTOPTEFAULT);
  1865. MiAddValidPageToWorkingSet (FaultingAddress,
  1866. PointerPte,
  1867. Pfn1,
  1868. (ULONG) ProtoProtect.u1.Long);
  1869. //
  1870. // Log prefetch fault information now that the PFN lock has been released
  1871. // and the PTE has been made valid. This minimizes PFN lock contention,
  1872. // allows CcPfLogPageFault to allocate (and fault on) pool, and allows other
  1873. // threads in this process to execute without faulting on this address.
  1874. //
  1875. // Note that the process' working set mutex is still held so any other
  1876. // faults or operations on user addresses by other threads in this process
  1877. // will block for the duration of this call.
  1878. //
  1879. if (Subsection != NULL) {
  1880. FileObject = Subsection->ControlArea->FilePointer;
  1881. FileOffset = MiStartingOffset (Subsection, PointerProtoPte);
  1882. ControlAreaFlags = Subsection->ControlArea->u.Flags;
  1883. Flags = 0;
  1884. if (ControlAreaFlags.Image) {
  1885. if ((Subsection->StartingSector == 0) &&
  1886. (Subsection->SubsectionBase != Subsection->ControlArea->Segment->PrototypePte)) {
  1887. //
  1888. // This is an image that was built with a linker pre-1995
  1889. // (version 2.39 is one example) that put bss into a separate
  1890. // subsection with zero as a starting file offset field
  1891. // in the on-disk image. The prefetcher will fetch from the
  1892. // wrong offset trying to satisfy these ranges (which are
  1893. // actually demand zero when the fault occurs) so don't let
  1894. // the prefetcher know about ANY access within this subsection.
  1895. //
  1896. goto Finish;
  1897. }
  1898. Flags |= CCPF_TYPE_IMAGE;
  1899. }
  1900. if (ControlAreaFlags.Rom) {
  1901. Flags |= CCPF_TYPE_ROM;
  1902. }
  1903. CcPfLogPageFault (FileObject, FileOffset, Flags);
  1904. }
  1905. Finish:
  1906. ASSERT (PointerPte == MiGetPteAddress(FaultingAddress));
  1907. return STATUS_SUCCESS;
  1908. }
  1909. NTSTATUS
  1910. MiResolveMappedFileFault (
  1911. IN PVOID FaultingAddress,
  1912. IN PMMPTE PointerPte,
  1913. OUT PMMINPAGE_SUPPORT *ReadBlock,
  1914. IN PEPROCESS Process
  1915. )
  1916. /*++
  1917. Routine Description:
  1918. This routine builds the MDL and other structures to allow a
  1919. read operation on a mapped file for a page fault.
  1920. Arguments:
  1921. FaultingAddress - Supplies the faulting address.
  1922. PointerPte - Supplies the PTE for the faulting address.
  1923. ReadBlock - Supplies a pointer to put the address of the read block which
  1924. needs to be completed before an I/O can be issued.
  1925. Process - Supplies a pointer to the process object. If this
  1926. parameter is NULL, then the fault is for system
  1927. space and the process's working set lock is not held.
  1928. Return Value:
  1929. status. A status value of STATUS_ISSUE_PAGING_IO is returned
  1930. if this function completes successfully.
  1931. Environment:
  1932. Kernel mode, PFN lock held.
  1933. --*/
  1934. {
  1935. MMPTE TempPte;
  1936. PFN_NUMBER PageFrameIndex;
  1937. PMMPFN Pfn1;
  1938. PMMPFN Pfn2;
  1939. PSUBSECTION Subsection;
  1940. PCONTROL_AREA ControlArea;
  1941. PMDL Mdl;
  1942. ULONG ReadSize;
  1943. PETHREAD CurrentThread;
  1944. PPFN_NUMBER Page;
  1945. PPFN_NUMBER EndPage;
  1946. PMMPTE BasePte;
  1947. PMMPTE CheckPte;
  1948. LARGE_INTEGER StartingOffset;
  1949. LARGE_INTEGER TempOffset;
  1950. PPFN_NUMBER FirstMdlPage;
  1951. PMMINPAGE_SUPPORT ReadBlockLocal;
  1952. ULONG PageColor;
  1953. ULONG ClusterSize;
  1954. ULONG Result;
  1955. PFN_COUNT AvailablePages;
  1956. PMMPTE PteFramePointer;
  1957. PFN_NUMBER PteFramePage;
  1958. ClusterSize = 0;
  1959. ASSERT (PointerPte->u.Soft.Prototype == 1);
  1960. // *********************************************
  1961. // Mapped File (subsection format)
  1962. // *********************************************
  1963. Result = MiEnsureAvailablePageOrWait (Process, FaultingAddress);
  1964. if (Result) {
  1965. //
  1966. // A wait operation was performed which dropped the locks,
  1967. // repeat this fault.
  1968. //
  1969. return STATUS_REFAULT;
  1970. }
  1971. #if DBG
  1972. if (MmDebug & MM_DBG_PTE_UPDATE) {
  1973. MiFormatPte (PointerPte);
  1974. }
  1975. #endif
  1976. //
  1977. // Calculate address of subsection for this prototype PTE.
  1978. //
  1979. Subsection = MiGetSubsectionAddress (PointerPte);
  1980. #ifdef LARGE_PAGES
  1981. //
  1982. // Check to see if this subsection maps a large page, if
  1983. // so, just fill the TB and return a status of PTE_CHANGED.
  1984. //
  1985. if (Subsection->u.SubsectionFlags.LargePages == 1) {
  1986. KeFlushEntireTb (TRUE, TRUE);
  1987. KeFillLargeEntryTb ((PHARDWARE_PTE)(Subsection + 1),
  1988. FaultingAddress,
  1989. Subsection->StartingSector);
  1990. return STATUS_REFAULT;
  1991. }
  1992. #endif
  1993. ControlArea = Subsection->ControlArea;
  1994. if (ControlArea->u.Flags.FailAllIo) {
  1995. return STATUS_IN_PAGE_ERROR;
  1996. }
  1997. if (PointerPte >= &Subsection->SubsectionBase[Subsection->PtesInSubsection]) {
  1998. //
  1999. // Attempt to read past the end of this subsection.
  2000. //
  2001. return STATUS_ACCESS_VIOLATION;
  2002. }
  2003. if (ControlArea->u.Flags.Rom == 1) {
  2004. ASSERT (XIPConfigured == TRUE);
  2005. //
  2006. // Calculate the offset to read into the file.
  2007. // offset = base + ((thispte - basepte) << PAGE_SHIFT)
  2008. //
  2009. StartingOffset.QuadPart = MiStartingOffset (Subsection, PointerPte);
  2010. TempOffset = MiEndingOffset(Subsection);
  2011. ASSERT (StartingOffset.QuadPart < TempOffset.QuadPart);
  2012. //
  2013. // Check to see if the read will go past the end of the file,
  2014. // If so, correct the read size and get a zeroed page instead.
  2015. //
  2016. if ((ControlArea->u.Flags.Image) &&
  2017. (((UINT64)StartingOffset.QuadPart + PAGE_SIZE) > (UINT64)TempOffset.QuadPart)) {
  2018. ReadBlockLocal = MiGetInPageSupportBlock (TRUE, Process);
  2019. if (ReadBlockLocal == NULL) {
  2020. return STATUS_REFAULT;
  2021. }
  2022. *ReadBlock = ReadBlockLocal;
  2023. CurrentThread = PsGetCurrentThread();
  2024. //
  2025. // Build an MDL for the request.
  2026. //
  2027. Mdl = &ReadBlockLocal->Mdl;
  2028. FirstMdlPage = &ReadBlockLocal->Page[0];
  2029. Page = FirstMdlPage;
  2030. #if DBG
  2031. RtlFillMemoryUlong (Page,
  2032. (MM_MAXIMUM_READ_CLUSTER_SIZE+1) * sizeof(PFN_NUMBER),
  2033. 0xf1f1f1f1);
  2034. #endif
  2035. ReadSize = PAGE_SIZE;
  2036. BasePte = PointerPte;
  2037. ClusterSize = 1;
  2038. goto UseSingleRamPage;
  2039. }
  2040. PageFrameIndex = (PFN_NUMBER) (StartingOffset.QuadPart >> PAGE_SHIFT);
  2041. PageFrameIndex += ((PLARGE_CONTROL_AREA)ControlArea)->StartingFrame;
  2042. //
  2043. // Increment the PFN reference count in the control area for
  2044. // the subsection (the PFN lock is required to modify this field).
  2045. //
  2046. ControlArea->NumberOfPfnReferences += 1;
  2047. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  2048. ASSERT (Pfn1->u3.e1.Rom == 1);
  2049. if (Pfn1->u3.e1.PageLocation != 0) {
  2050. ASSERT (Pfn1->u3.e1.PageLocation == StandbyPageList);
  2051. MiUnlinkPageFromList (Pfn1);
  2052. //
  2053. // Update the PFN database - the reference count must be
  2054. // incremented as the share count is going to go from zero to 1.
  2055. //
  2056. ASSERT (Pfn1->u2.ShareCount == 0);
  2057. ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
  2058. ASSERT (Pfn1->u3.e1.CacheAttribute == MiCached);
  2059. Pfn1->u3.e2.ReferenceCount += 1;
  2060. Pfn1->u2.ShareCount += 1;
  2061. Pfn1->u3.e1.PageLocation = ActiveAndValid;
  2062. Pfn1->u3.e1.CacheAttribute = MiCached;
  2063. ASSERT (Pfn1->PteAddress == PointerPte);
  2064. ASSERT (Pfn1->u1.Event == NULL);
  2065. //
  2066. // Determine the page frame number of the page table page which
  2067. // contains this PTE.
  2068. //
  2069. PteFramePointer = MiGetPteAddress (PointerPte);
  2070. if (PteFramePointer->u.Hard.Valid == 0) {
  2071. #if (_MI_PAGING_LEVELS < 3)
  2072. if (!NT_SUCCESS(MiCheckPdeForPagedPool (PointerPte))) {
  2073. #endif
  2074. KeBugCheckEx (MEMORY_MANAGEMENT,
  2075. 0x61940,
  2076. (ULONG_PTR)PointerPte,
  2077. (ULONG_PTR)PteFramePointer->u.Long,
  2078. 0);
  2079. #if (_MI_PAGING_LEVELS < 3)
  2080. }
  2081. #endif
  2082. }
  2083. PteFramePage = MI_GET_PAGE_FRAME_FROM_PTE (PteFramePointer);
  2084. ASSERT (Pfn1->u4.PteFrame == PteFramePage);
  2085. //
  2086. // Increment the share count for the page table page containing
  2087. // this PTE as the PTE is going to be made valid.
  2088. //
  2089. ASSERT (PteFramePage != 0);
  2090. Pfn2 = MI_PFN_ELEMENT (PteFramePage);
  2091. Pfn2->u2.ShareCount += 1;
  2092. }
  2093. else {
  2094. ASSERT (Pfn1->u4.InPageError == 0);
  2095. ASSERT (Pfn1->u3.e1.PrototypePte == 1);
  2096. ASSERT (Pfn1->u1.Event == NULL);
  2097. MiInitializePfn (PageFrameIndex, PointerPte, 0);
  2098. }
  2099. //
  2100. // Put the prototype PTE into the valid state.
  2101. //
  2102. MI_MAKE_VALID_PTE (TempPte,
  2103. PageFrameIndex,
  2104. PointerPte->u.Soft.Protection,
  2105. PointerPte);
  2106. MI_WRITE_VALID_PTE (PointerPte, TempPte);
  2107. return STATUS_PAGE_FAULT_TRANSITION;
  2108. }
  2109. CurrentThread = PsGetCurrentThread();
  2110. ReadBlockLocal = MiGetInPageSupportBlock (TRUE, Process);
  2111. if (ReadBlockLocal == NULL) {
  2112. return STATUS_REFAULT;
  2113. }
  2114. *ReadBlock = ReadBlockLocal;
  2115. //
  2116. // Build an MDL for the request.
  2117. //
  2118. Mdl = &ReadBlockLocal->Mdl;
  2119. FirstMdlPage = &ReadBlockLocal->Page[0];
  2120. Page = FirstMdlPage;
  2121. #if DBG
  2122. RtlFillMemoryUlong (Page, (MM_MAXIMUM_READ_CLUSTER_SIZE+1) * sizeof(PFN_NUMBER), 0xf1f1f1f1);
  2123. #endif //DBG
  2124. ReadSize = PAGE_SIZE;
  2125. BasePte = PointerPte;
  2126. //
  2127. // Should we attempt to perform page fault clustering?
  2128. //
  2129. AvailablePages = MmAvailablePages;
  2130. if (MiInPageSinglePages != 0) {
  2131. AvailablePages = 0;
  2132. MiInPageSinglePages -= 1;
  2133. }
  2134. if ((!CurrentThread->DisablePageFaultClustering) &&
  2135. (PERFINFO_DO_PAGEFAULT_CLUSTERING()) &&
  2136. (ControlArea->u.Flags.NoModifiedWriting == 0)) {
  2137. if ((AvailablePages > (MmFreeGoal * 2))
  2138. ||
  2139. (((ControlArea->u.Flags.Image != 0) ||
  2140. (CurrentThread->ForwardClusterOnly)) &&
  2141. (AvailablePages > (MM_MAXIMUM_READ_CLUSTER_SIZE + 16)))) {
  2142. //
  2143. // Cluster up to n pages. This one + n-1.
  2144. //
  2145. if (ControlArea->u.Flags.Image == 0) {
  2146. ASSERT (CurrentThread->ReadClusterSize <=
  2147. MM_MAXIMUM_READ_CLUSTER_SIZE);
  2148. ClusterSize = CurrentThread->ReadClusterSize;
  2149. }
  2150. else {
  2151. ClusterSize = MmDataClusterSize;
  2152. if (Subsection->u.SubsectionFlags.Protection &
  2153. MM_PROTECTION_EXECUTE_MASK ) {
  2154. ClusterSize = MmCodeClusterSize;
  2155. }
  2156. }
  2157. EndPage = Page + ClusterSize;
  2158. CheckPte = PointerPte + 1;
  2159. //
  2160. // Try to cluster within the page of PTEs.
  2161. //
  2162. while ((MiIsPteOnPdeBoundary(CheckPte) == 0) &&
  2163. (Page < EndPage) &&
  2164. (CheckPte <
  2165. &Subsection->SubsectionBase[Subsection->PtesInSubsection])
  2166. && (CheckPte->u.Long == BasePte->u.Long)) {
  2167. ControlArea->NumberOfPfnReferences += 1;
  2168. ReadSize += PAGE_SIZE;
  2169. Page += 1;
  2170. CheckPte += 1;
  2171. }
  2172. if ((Page < EndPage) && (!CurrentThread->ForwardClusterOnly)) {
  2173. //
  2174. // Attempt to cluster going backwards from the PTE.
  2175. //
  2176. CheckPte = PointerPte - 1;
  2177. while ((((ULONG_PTR)CheckPte & (PAGE_SIZE - 1)) !=
  2178. (PAGE_SIZE - sizeof(MMPTE))) &&
  2179. (Page < EndPage) &&
  2180. (CheckPte >= Subsection->SubsectionBase) &&
  2181. (CheckPte->u.Long == BasePte->u.Long)) {
  2182. ControlArea->NumberOfPfnReferences += 1;
  2183. ReadSize += PAGE_SIZE;
  2184. Page += 1;
  2185. CheckPte -= 1;
  2186. }
  2187. BasePte = CheckPte + 1;
  2188. }
  2189. }
  2190. }
  2191. //
  2192. //
  2193. // Calculate the offset to read into the file.
  2194. // offset = base + ((thispte - basepte) << PAGE_SHIFT)
  2195. //
  2196. StartingOffset.QuadPart = MiStartingOffset (Subsection, BasePte);
  2197. TempOffset = MiEndingOffset(Subsection);
  2198. ASSERT (StartingOffset.QuadPart < TempOffset.QuadPart);
  2199. UseSingleRamPage:
  2200. //
  2201. // Remove pages to fill in the MDL. This is done here as the
  2202. // base PTE has been determined and can be used for virtual
  2203. // aliasing checks.
  2204. //
  2205. EndPage = FirstMdlPage;
  2206. CheckPte = BasePte;
  2207. while (EndPage < Page) {
  2208. if (Process == HYDRA_PROCESS) {
  2209. PageColor = MI_GET_PAGE_COLOR_FROM_SESSION (MmSessionSpace);
  2210. }
  2211. else if (Process == NULL) {
  2212. PageColor = MI_GET_PAGE_COLOR_FROM_PTE (CheckPte);
  2213. }
  2214. else {
  2215. PageColor = MI_PAGE_COLOR_PTE_PROCESS (CheckPte,
  2216. &Process->NextPageColor);
  2217. }
  2218. *EndPage = MiRemoveAnyPage (PageColor);
  2219. EndPage += 1;
  2220. CheckPte += 1;
  2221. }
  2222. if (Process == HYDRA_PROCESS) {
  2223. PageColor = MI_GET_PAGE_COLOR_FROM_SESSION (MmSessionSpace);
  2224. }
  2225. else if (Process == NULL) {
  2226. PageColor = MI_GET_PAGE_COLOR_FROM_PTE (CheckPte);
  2227. }
  2228. else {
  2229. PageColor = MI_PAGE_COLOR_PTE_PROCESS (CheckPte,
  2230. &Process->NextPageColor);
  2231. }
  2232. //
  2233. // Check to see if the read will go past the end of the file,
  2234. // If so, correct the read size and get a zeroed page.
  2235. //
  2236. MmInfoCounters.PageReadIoCount += 1;
  2237. MmInfoCounters.PageReadCount += ReadSize >> PAGE_SHIFT;
  2238. if ((ControlArea->u.Flags.Image) &&
  2239. (((UINT64)StartingOffset.QuadPart + ReadSize) > (UINT64)TempOffset.QuadPart)) {
  2240. ASSERT ((ULONG)(TempOffset.QuadPart - StartingOffset.QuadPart)
  2241. > (ReadSize - PAGE_SIZE));
  2242. ReadSize = (ULONG)(TempOffset.QuadPart - StartingOffset.QuadPart);
  2243. //
  2244. // Round the offset to a 512-byte offset as this will help filesystems
  2245. // optimize the transfer. Note that filesystems will always zero fill
  2246. // the remainder between VDL and the next 512-byte multiple and we have
  2247. // already zeroed the whole page.
  2248. //
  2249. ReadSize = ((ReadSize + MMSECTOR_MASK) & ~MMSECTOR_MASK);
  2250. PageFrameIndex = MiRemoveZeroPage (PageColor);
  2251. }
  2252. else {
  2253. //
  2254. // We are reading a complete page, no need to get a zeroed page.
  2255. //
  2256. PageFrameIndex = MiRemoveAnyPage (PageColor);
  2257. }
  2258. //
  2259. // Increment the PFN reference count in the control area for
  2260. // the subsection (the PFN lock is required to modify this field).
  2261. //
  2262. ControlArea->NumberOfPfnReferences += 1;
  2263. *Page = PageFrameIndex;
  2264. PageFrameIndex = *(FirstMdlPage + (PointerPte - BasePte));
  2265. //
  2266. // Get a page and put the PTE into the transition state with the
  2267. // read-in-progress flag set.
  2268. //
  2269. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  2270. //
  2271. // Initialize MDL for request.
  2272. //
  2273. MmInitializeMdl (Mdl,
  2274. MiGetVirtualAddressMappedByPte (BasePte),
  2275. ReadSize);
  2276. Mdl->MdlFlags |= (MDL_PAGES_LOCKED | MDL_IO_PAGE_READ);
  2277. #if DBG
  2278. if (ReadSize > ((ClusterSize + 1) << PAGE_SHIFT)) {
  2279. KeBugCheckEx (MEMORY_MANAGEMENT, 0x777,(ULONG_PTR)Mdl, (ULONG_PTR)Subsection,
  2280. (ULONG)TempOffset.LowPart);
  2281. }
  2282. #endif //DBG
  2283. MiInitializeReadInProgressPfn (Mdl,
  2284. BasePte,
  2285. &ReadBlockLocal->Event,
  2286. MI_PROTOTYPE_WSINDEX);
  2287. MI_ZERO_USED_PAGETABLE_ENTRIES_IN_INPAGE_SUPPORT(ReadBlockLocal);
  2288. ReadBlockLocal->ReadOffset = StartingOffset;
  2289. ReadBlockLocal->FilePointer = ControlArea->FilePointer;
  2290. ReadBlockLocal->BasePte = BasePte;
  2291. ReadBlockLocal->Pfn = Pfn1;
  2292. return STATUS_ISSUE_PAGING_IO;
  2293. }
  2294. NTSTATUS
  2295. MiWaitForInPageComplete (
  2296. IN PMMPFN Pfn2,
  2297. IN PMMPTE PointerPte,
  2298. IN PVOID FaultingAddress,
  2299. IN PMMPTE PointerPteContents,
  2300. IN PMMINPAGE_SUPPORT InPageSupport,
  2301. IN PEPROCESS CurrentProcess
  2302. )
  2303. /*++
  2304. Routine Description:
  2305. Waits for a page read to complete.
  2306. Arguments:
  2307. Pfn - Supplies a pointer to the PFN element for the page being read.
  2308. PointerPte - Supplies a pointer to the PTE that is in the transition
  2309. state. This can be a prototype PTE address.
  2310. FaultingAddress - Supplies the faulting address.
  2311. PointerPteContents - Supplies the contents of the PTE before the
  2312. working set lock was released.
  2313. InPageSupport - Supplies a pointer to the inpage support structure
  2314. for this read operation.
  2315. Return Value:
  2316. Returns the status of the inpage operation.
  2317. Note that the working set mutex and PFN lock are held upon return !!!
  2318. Environment:
  2319. Kernel mode, APCs disabled. Neither the working set lock nor
  2320. the PFN lock may be held.
  2321. --*/
  2322. {
  2323. PMMPTE NewPointerPte;
  2324. PMMPTE ProtoPte;
  2325. PMMPFN Pfn1;
  2326. PMMPFN Pfn;
  2327. PULONG Va;
  2328. PPFN_NUMBER Page;
  2329. PPFN_NUMBER LastPage;
  2330. ULONG Offset;
  2331. ULONG Protection;
  2332. PMDL Mdl;
  2333. KIRQL OldIrql;
  2334. NTSTATUS status;
  2335. NTSTATUS status2;
  2336. PEPROCESS Process;
  2337. //
  2338. // Wait for the I/O to complete. Note that we can't wait for all
  2339. // the objects simultaneously as other threads/processes could be
  2340. // waiting for the same event. The first thread which completes
  2341. // the wait and gets the PFN lock may reuse the event for another
  2342. // fault before this thread completes its wait.
  2343. //
  2344. KeWaitForSingleObject( &InPageSupport->Event,
  2345. WrPageIn,
  2346. KernelMode,
  2347. FALSE,
  2348. NULL);
  2349. if (CurrentProcess == HYDRA_PROCESS) {
  2350. LOCK_SESSION_SPACE_WS (OldIrql, PsGetCurrentThread ());
  2351. }
  2352. else if (CurrentProcess == PREFETCH_PROCESS) {
  2353. NOTHING;
  2354. }
  2355. else if (CurrentProcess != NULL) {
  2356. LOCK_WS (CurrentProcess);
  2357. }
  2358. else {
  2359. LOCK_SYSTEM_WS (OldIrql, PsGetCurrentThread ());
  2360. }
  2361. LOCK_PFN (OldIrql);
  2362. ASSERT (Pfn2->u3.e2.ReferenceCount != 0);
  2363. //
  2364. // Check to see if this is the first thread to complete the in-page
  2365. // operation.
  2366. //
  2367. Pfn = InPageSupport->Pfn;
  2368. if (Pfn2 != Pfn) {
  2369. ASSERT (Pfn2->u4.PteFrame != MI_MAGIC_AWE_PTEFRAME);
  2370. Pfn2->u3.e1.ReadInProgress = 0;
  2371. }
  2372. //
  2373. // Another thread has already serviced the read, check the
  2374. // io-error flag in the PFN database to ensure the in-page
  2375. // was successful.
  2376. //
  2377. if (Pfn2->u4.InPageError == 1) {
  2378. ASSERT (!NT_SUCCESS(Pfn2->u1.ReadStatus));
  2379. if (MmIsRetryIoStatus(Pfn2->u1.ReadStatus)) {
  2380. return STATUS_REFAULT;
  2381. }
  2382. return Pfn2->u1.ReadStatus;
  2383. }
  2384. if (InPageSupport->u1.e1.Completed == 0) {
  2385. //
  2386. // The ReadInProgress bit for the dummy page is constantly cleared
  2387. // below as there are generally multiple inpage blocks pointing to
  2388. // the same dummy page.
  2389. //
  2390. ASSERT ((Pfn->u3.e1.ReadInProgress == 1) ||
  2391. (Pfn->PteAddress == MI_PF_DUMMY_PAGE_PTE));
  2392. InPageSupport->u1.e1.Completed = 1;
  2393. Mdl = &InPageSupport->Mdl;
  2394. if (InPageSupport->u1.e1.PrefetchMdlHighBits != 0) {
  2395. //
  2396. // This is a prefetcher-issued read.
  2397. //
  2398. Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport);
  2399. }
  2400. if (Mdl->MdlFlags & MDL_MAPPED_TO_SYSTEM_VA) {
  2401. MmUnmapLockedPages (Mdl->MappedSystemVa, Mdl);
  2402. }
  2403. ASSERT (Pfn->u4.PteFrame != MI_MAGIC_AWE_PTEFRAME);
  2404. Pfn->u3.e1.ReadInProgress = 0;
  2405. Pfn->u1.Event = NULL;
  2406. #if defined (_WIN64)
  2407. //
  2408. // Page directory and page table pages are never clustered,
  2409. // ensure this is never violated as only one UsedPageTableEntries
  2410. // is kept in the inpage support block.
  2411. //
  2412. if (InPageSupport->UsedPageTableEntries) {
  2413. Page = (PPFN_NUMBER)(Mdl + 1);
  2414. LastPage = Page + ((Mdl->ByteCount - 1) >> PAGE_SHIFT);
  2415. ASSERT (Page == LastPage);
  2416. }
  2417. #if DBGXX
  2418. MiCheckPageTableInPage (Pfn, InPageSupport);
  2419. #endif
  2420. #endif
  2421. MI_INSERT_USED_PAGETABLE_ENTRIES_IN_PFN(Pfn, InPageSupport);
  2422. //
  2423. // Check the IO_STATUS_BLOCK to ensure the in-page completed successfully.
  2424. //
  2425. if (!NT_SUCCESS(InPageSupport->IoStatus.Status)) {
  2426. if (InPageSupport->IoStatus.Status == STATUS_END_OF_FILE) {
  2427. //
  2428. // An attempt was made to read past the end of file
  2429. // zero all the remaining bytes in the read.
  2430. //
  2431. Page = (PPFN_NUMBER)(Mdl + 1);
  2432. LastPage = Page + ((Mdl->ByteCount - 1) >> PAGE_SHIFT);
  2433. while (Page <= LastPage) {
  2434. MiZeroPhysicalPage (*Page, 0);
  2435. MI_ZERO_USED_PAGETABLE_ENTRIES_IN_PFN(MI_PFN_ELEMENT(*Page));
  2436. Page += 1;
  2437. }
  2438. }
  2439. else {
  2440. //
  2441. // In page io error occurred.
  2442. //
  2443. status = InPageSupport->IoStatus.Status;
  2444. status2 = InPageSupport->IoStatus.Status;
  2445. if (status != STATUS_VERIFY_REQUIRED) {
  2446. LOGICAL Retry;
  2447. Retry = FALSE;
  2448. #if DBG
  2449. DbgPrint ("MM: inpage I/O error %X\n",
  2450. InPageSupport->IoStatus.Status);
  2451. #endif
  2452. //
  2453. // If this page is for paged pool or for paged
  2454. // kernel code or page table pages, bugcheck.
  2455. //
  2456. if ((FaultingAddress > MM_HIGHEST_USER_ADDRESS) &&
  2457. (!MI_IS_SYSTEM_CACHE_ADDRESS(FaultingAddress))) {
  2458. if (MmIsRetryIoStatus(status)) {
  2459. if (MiInPageSinglePages == 0) {
  2460. MiInPageSinglePages = 30;
  2461. }
  2462. MiFaultRetries -= 1;
  2463. if (MiFaultRetries & MiFaultRetryMask) {
  2464. Retry = TRUE;
  2465. }
  2466. }
  2467. if (Retry == FALSE) {
  2468. ULONG_PTR PteContents;
  2469. //
  2470. // The prototype PTE resides in paged pool which may
  2471. // not be resident at this point. Check first.
  2472. //
  2473. if (MmIsAddressValid (PointerPte) == TRUE) {
  2474. PteContents = *(PULONG_PTR)PointerPte;
  2475. }
  2476. else {
  2477. PteContents = (ULONG_PTR)-1;
  2478. }
  2479. KeBugCheckEx (KERNEL_DATA_INPAGE_ERROR,
  2480. (ULONG_PTR)PointerPte,
  2481. status,
  2482. (ULONG_PTR)FaultingAddress,
  2483. PteContents);
  2484. }
  2485. status2 = STATUS_REFAULT;
  2486. }
  2487. else {
  2488. if (MmIsRetryIoStatus(status)) {
  2489. if (MiInPageSinglePages == 0) {
  2490. MiInPageSinglePages = 30;
  2491. }
  2492. MiUserFaultRetries -= 1;
  2493. if (MiUserFaultRetries & MiUserFaultRetryMask) {
  2494. Retry = TRUE;
  2495. }
  2496. }
  2497. if (Retry == TRUE) {
  2498. status2 = STATUS_REFAULT;
  2499. }
  2500. }
  2501. }
  2502. Page = (PPFN_NUMBER)(Mdl + 1);
  2503. LastPage = Page + ((Mdl->ByteCount - 1) >> PAGE_SHIFT);
  2504. #if DBG
  2505. Process = PsGetCurrentProcess ();
  2506. #endif
  2507. while (Page <= LastPage) {
  2508. Pfn1 = MI_PFN_ELEMENT (*Page);
  2509. ASSERT (Pfn1->u3.e2.ReferenceCount != 0);
  2510. Pfn1->u4.InPageError = 1;
  2511. Pfn1->u1.ReadStatus = status;
  2512. #if DBG
  2513. Va = (PULONG)MiMapPageInHyperSpaceAtDpc (Process, *Page);
  2514. RtlFillMemoryUlong (Va, PAGE_SIZE, 0x50444142);
  2515. MiUnmapPageInHyperSpaceFromDpc (Process, Va);
  2516. #endif
  2517. Page += 1;
  2518. }
  2519. return status2;
  2520. }
  2521. }
  2522. else {
  2523. MiFaultRetries = 0;
  2524. MiUserFaultRetries = 0;
  2525. if (InPageSupport->IoStatus.Information != Mdl->ByteCount) {
  2526. ASSERT (InPageSupport->IoStatus.Information != 0);
  2527. //
  2528. // Less than a full page was read - zero the remainder
  2529. // of the page.
  2530. //
  2531. Page = (PPFN_NUMBER)(Mdl + 1);
  2532. LastPage = Page + ((Mdl->ByteCount - 1) >> PAGE_SHIFT);
  2533. Page += ((InPageSupport->IoStatus.Information - 1) >> PAGE_SHIFT);
  2534. Offset = BYTE_OFFSET (InPageSupport->IoStatus.Information);
  2535. if (Offset != 0) {
  2536. Process = PsGetCurrentProcess ();
  2537. Va = (PULONG)((PCHAR)MiMapPageInHyperSpaceAtDpc (Process, *Page)
  2538. + Offset);
  2539. RtlZeroMemory (Va, PAGE_SIZE - Offset);
  2540. MiUnmapPageInHyperSpaceFromDpc (Process, Va);
  2541. }
  2542. //
  2543. // Zero any remaining pages within the MDL.
  2544. //
  2545. Page += 1;
  2546. while (Page <= LastPage) {
  2547. MiZeroPhysicalPage (*Page, 0);
  2548. Page += 1;
  2549. }
  2550. }
  2551. //
  2552. // If any filesystem return non-zeroed data for any slop
  2553. // after the VDL but before the next 512-byte offset then this
  2554. // non-zeroed data will overwrite our zeroed page. This would
  2555. // need to be checked for and cleaned up here. Note that the only
  2556. // reason Mm even rounds the MDL request up to a 512-byte offset
  2557. // is so filesystems receive a transfer they can handle optimally,
  2558. // but any transfer size has always worked (although non-512 byte
  2559. // multiples end up getting posted by the filesystem).
  2560. //
  2561. }
  2562. }
  2563. //
  2564. // Prefetcher-issued reads only put prototype PTEs into transition and
  2565. // never fill actual hardware PTEs so these can be returned now.
  2566. //
  2567. if (CurrentProcess == PREFETCH_PROCESS) {
  2568. return STATUS_SUCCESS;
  2569. }
  2570. //
  2571. // Check to see if the faulting PTE has changed.
  2572. //
  2573. NewPointerPte = MiFindActualFaultingPte (FaultingAddress);
  2574. //
  2575. // If this PTE is in prototype PTE format, make the pointer to the
  2576. // PTE point to the prototype PTE.
  2577. //
  2578. if (NewPointerPte == NULL) {
  2579. return STATUS_PTE_CHANGED;
  2580. }
  2581. if (NewPointerPte != PointerPte) {
  2582. //
  2583. // Check to make sure the NewPointerPte is not a prototype PTE
  2584. // which refers to the page being made valid.
  2585. //
  2586. if (NewPointerPte->u.Soft.Prototype == 1) {
  2587. if (NewPointerPte->u.Soft.PageFileHigh == MI_PTE_LOOKUP_NEEDED) {
  2588. ProtoPte = MiCheckVirtualAddress (FaultingAddress,
  2589. &Protection);
  2590. }
  2591. else {
  2592. ProtoPte = MiPteToProto (NewPointerPte);
  2593. }
  2594. //
  2595. // Make sure the prototype PTE refers to the PTE made valid.
  2596. //
  2597. if (ProtoPte != PointerPte) {
  2598. return STATUS_PTE_CHANGED;
  2599. }
  2600. //
  2601. // If the only difference is the owner mask, everything is okay.
  2602. //
  2603. if (ProtoPte->u.Long != PointerPteContents->u.Long) {
  2604. return STATUS_PTE_CHANGED;
  2605. }
  2606. }
  2607. else {
  2608. return STATUS_PTE_CHANGED;
  2609. }
  2610. }
  2611. else {
  2612. if (NewPointerPte->u.Long != PointerPteContents->u.Long) {
  2613. return STATUS_PTE_CHANGED;
  2614. }
  2615. }
  2616. return STATUS_SUCCESS;
  2617. }
  2618. PMMPTE
  2619. MiFindActualFaultingPte (
  2620. IN PVOID FaultingAddress
  2621. )
  2622. /*++
  2623. Routine Description:
  2624. This routine locates the actual PTE which must be made resident in order
  2625. to complete this fault. Note that for certain cases multiple faults
  2626. are required to make the final page resident.
  2627. Arguments:
  2628. FaultingAddress - Supplies the virtual address which caused the fault.
  2629. Return Value:
  2630. The PTE to be made valid to finish the fault, NULL if the fault should
  2631. be retried.
  2632. Environment:
  2633. Kernel mode, APCs disabled, working set mutex held.
  2634. --*/
  2635. {
  2636. PMMPTE ProtoPteAddress;
  2637. PMMPTE PointerPte;
  2638. PMMPTE PointerFaultingPte;
  2639. ULONG Protection;
  2640. if (MI_IS_PHYSICAL_ADDRESS(FaultingAddress)) {
  2641. return NULL;
  2642. }
  2643. #if (_MI_PAGING_LEVELS >= 4)
  2644. PointerPte = MiGetPxeAddress (FaultingAddress);
  2645. if (PointerPte->u.Hard.Valid == 0) {
  2646. //
  2647. // Page directory parent page is not valid.
  2648. //
  2649. return PointerPte;
  2650. }
  2651. #endif
  2652. #if (_MI_PAGING_LEVELS >= 3)
  2653. PointerPte = MiGetPpeAddress (FaultingAddress);
  2654. if (PointerPte->u.Hard.Valid == 0) {
  2655. //
  2656. // Page directory page is not valid.
  2657. //
  2658. return PointerPte;
  2659. }
  2660. #endif
  2661. PointerPte = MiGetPdeAddress (FaultingAddress);
  2662. if (PointerPte->u.Hard.Valid == 0) {
  2663. //
  2664. // Page table page is not valid.
  2665. //
  2666. return PointerPte;
  2667. }
  2668. PointerPte = MiGetPteAddress (FaultingAddress);
  2669. if (PointerPte->u.Hard.Valid == 1) {
  2670. //
  2671. // Page is already valid, no need to fault it in.
  2672. //
  2673. return NULL;
  2674. }
  2675. if (PointerPte->u.Soft.Prototype == 0) {
  2676. //
  2677. // Page is not a prototype PTE, make this PTE valid.
  2678. //
  2679. return PointerPte;
  2680. }
  2681. //
  2682. // Check to see if the PTE which maps the prototype PTE is valid.
  2683. //
  2684. if (PointerPte->u.Soft.PageFileHigh == MI_PTE_LOOKUP_NEEDED) {
  2685. //
  2686. // Protection is here, PTE must be located in VAD.
  2687. //
  2688. ProtoPteAddress = MiCheckVirtualAddress (FaultingAddress,
  2689. &Protection);
  2690. if (ProtoPteAddress == NULL) {
  2691. //
  2692. // No prototype PTE means another thread has deleted the VAD while
  2693. // this thread waited for the inpage to complete. Certainly NULL
  2694. // must be returned so a stale PTE is not modified - the instruction
  2695. // will then be reexecuted and an access violation delivered.
  2696. //
  2697. return NULL;
  2698. }
  2699. }
  2700. else {
  2701. //
  2702. // Protection is in ProtoPte.
  2703. //
  2704. ProtoPteAddress = MiPteToProto (PointerPte);
  2705. }
  2706. PointerFaultingPte = MiFindActualFaultingPte (ProtoPteAddress);
  2707. if (PointerFaultingPte == (PMMPTE)NULL) {
  2708. return PointerPte;
  2709. }
  2710. return PointerFaultingPte;
  2711. }
  2712. PMMPTE
  2713. MiCheckVirtualAddress (
  2714. IN PVOID VirtualAddress,
  2715. OUT PULONG ProtectCode
  2716. )
  2717. /*++
  2718. Routine Description:
  2719. This function examines the virtual address descriptors to see
  2720. if the specified virtual address is contained within any of
  2721. the descriptors. If a virtual address descriptor is found
  2722. which contains the specified virtual address, a PTE is built
  2723. from information within the virtual address descriptor and
  2724. returned to the caller.
  2725. Arguments:
  2726. VirtualAddress - Supplies the virtual address to locate within
  2727. a virtual address descriptor.
  2728. Return Value:
  2729. Returns the PTE which corresponds to the supplied virtual address.
  2730. If no virtual address descriptor is found, a zero PTE is returned.
  2731. Environment:
  2732. Kernel mode, APCs disabled, working set mutex held.
  2733. --*/
  2734. {
  2735. PMMVAD Vad;
  2736. PMMPTE PointerPte;
  2737. PLIST_ENTRY NextEntry;
  2738. PIMAGE_ENTRY_IN_SESSION Image;
  2739. if (VirtualAddress <= MM_HIGHEST_USER_ADDRESS) {
  2740. #if defined(MM_SHARED_USER_DATA_VA)
  2741. if (PAGE_ALIGN(VirtualAddress) == (PVOID) MM_SHARED_USER_DATA_VA) {
  2742. //
  2743. // This is the page that is double mapped between
  2744. // user mode and kernel mode. Map in as read only.
  2745. // On MIPS this is hardwired in the TB.
  2746. //
  2747. *ProtectCode = MM_READONLY;
  2748. #if defined(_X86PAE_)
  2749. if (MmPaeMask != 0) {
  2750. //
  2751. // For some 32 bit architectures, the fast system call
  2752. // instruction sequence lives in this page hence we must
  2753. // ensure it is executable.
  2754. //
  2755. *ProtectCode = MM_EXECUTE_READ;
  2756. }
  2757. #endif
  2758. return MmSharedUserDataPte;
  2759. }
  2760. #endif
  2761. Vad = MiLocateAddress (VirtualAddress);
  2762. if (Vad == (PMMVAD)NULL) {
  2763. *ProtectCode = MM_NOACCESS;
  2764. return NULL;
  2765. }
  2766. //
  2767. // A virtual address descriptor which contains the virtual address
  2768. // has been located. Build the PTE from the information within
  2769. // the virtual address descriptor.
  2770. //
  2771. #ifdef LARGE_PAGES
  2772. if (Vad->u.VadFlags.LargePages == 1) {
  2773. KIRQL OldIrql;
  2774. PSUBSECTION Subsection;
  2775. //
  2776. // The first prototype PTE points to the subsection for the
  2777. // large page mapping.
  2778. Subsection = (PSUBSECTION)Vad->FirstPrototypePte;
  2779. ASSERT (Subsection->u.SubsectionFlags.LargePages == 1);
  2780. KeRaiseIrql (DISPATCH_LEVEL, &OldIrql);
  2781. KeFlushEntireTb (TRUE, TRUE);
  2782. KeFillLargeEntryTb ((PHARDWARE_PTE)(Subsection + 1),
  2783. VirtualAddress,
  2784. Subsection->StartingSector);
  2785. KeLowerIrql (OldIrql);
  2786. *ProtectCode = MM_LARGE_PAGES;
  2787. return NULL;
  2788. }
  2789. #endif //LARGE_PAGES
  2790. if (Vad->u.VadFlags.PhysicalMapping == 1) {
  2791. #if defined(_IA64_)
  2792. //
  2793. // This is a banked section for all platforms except IA64. This
  2794. // is because only IA64 (in the MmX86Fault handler for 32-bit apps)
  2795. // calls this routine without first checking for a valid PTE and
  2796. // just returning.
  2797. //
  2798. if (((PMMVAD_LONG)Vad)->u4.Banked == NULL) {
  2799. //
  2800. // This is a physical (non-banked) section which is allowed to
  2801. // take a TB miss, but never a legitimate call to this routine
  2802. // because the corresponding PPE/PDE/PTE must always be valid.
  2803. //
  2804. ASSERT (MiGetPpeAddress(VirtualAddress)->u.Hard.Valid == 1);
  2805. ASSERT (MiGetPdeAddress(VirtualAddress)->u.Hard.Valid == 1);
  2806. PointerPte = MiGetPteAddress(VirtualAddress);
  2807. ASSERT (PointerPte->u.Hard.Valid == 1);
  2808. KeFillEntryTb ((PHARDWARE_PTE)PointerPte, VirtualAddress, FALSE);
  2809. *ProtectCode = MM_NOACCESS;
  2810. return NULL;
  2811. }
  2812. #endif
  2813. //
  2814. // This is definitely a banked section.
  2815. //
  2816. MiHandleBankedSection (VirtualAddress, Vad);
  2817. *ProtectCode = MM_NOACCESS;
  2818. return NULL;
  2819. }
  2820. if (Vad->u.VadFlags.PrivateMemory == 1) {
  2821. //
  2822. // This is a private region of memory. Check to make
  2823. // sure the virtual address has been committed. Note that
  2824. // addresses are dense from the bottom up.
  2825. //
  2826. if (Vad->u.VadFlags.UserPhysicalPages == 1) {
  2827. //
  2828. // These mappings only fault if the access is bad.
  2829. //
  2830. #if 0
  2831. //
  2832. // Note the PTE can only be checked if the PXE, PPE and PDE are
  2833. // all valid, so just comment out the assert for now.
  2834. //
  2835. ASSERT (MiGetPteAddress(VirtualAddress)->u.Long == ZeroPte.u.Long);
  2836. #endif
  2837. *ProtectCode = MM_NOACCESS;
  2838. return NULL;
  2839. }
  2840. if (Vad->u.VadFlags.MemCommit == 1) {
  2841. *ProtectCode = MI_GET_PROTECTION_FROM_VAD(Vad);
  2842. return NULL;
  2843. }
  2844. //
  2845. // The address is reserved but not committed.
  2846. //
  2847. *ProtectCode = MM_NOACCESS;
  2848. return NULL;
  2849. }
  2850. else {
  2851. //
  2852. // This virtual address descriptor refers to a
  2853. // section, calculate the address of the prototype PTE
  2854. // and construct a pointer to the PTE.
  2855. //
  2856. //*******************************************************
  2857. //*******************************************************
  2858. // well here's an interesting problem, how do we know
  2859. // how to set the attributes on the PTE we are creating
  2860. // when we can't look at the prototype PTE without
  2861. // potentially incurring a page fault. In this case
  2862. // PteTemplate would be zero.
  2863. //*******************************************************
  2864. //*******************************************************
  2865. //
  2866. if (Vad->u.VadFlags.ImageMap == 1) {
  2867. //
  2868. // PTE and proto PTEs have the same protection for images.
  2869. //
  2870. *ProtectCode = MM_UNKNOWN_PROTECTION;
  2871. }
  2872. else {
  2873. *ProtectCode = MI_GET_PROTECTION_FROM_VAD(Vad);
  2874. }
  2875. PointerPte = (PMMPTE)MiGetProtoPteAddress(Vad,
  2876. MI_VA_TO_VPN (VirtualAddress));
  2877. if (PointerPte == NULL) {
  2878. *ProtectCode = MM_NOACCESS;
  2879. }
  2880. if (Vad->u2.VadFlags2.ExtendableFile) {
  2881. //
  2882. // Make sure the data has been committed.
  2883. //
  2884. if ((MI_VA_TO_VPN (VirtualAddress) - Vad->StartingVpn) >
  2885. (ULONG_PTR)((((PMMVAD_LONG)Vad)->u4.ExtendedInfo->CommittedSize - 1)
  2886. >> PAGE_SHIFT)) {
  2887. *ProtectCode = MM_NOACCESS;
  2888. }
  2889. }
  2890. return PointerPte;
  2891. }
  2892. }
  2893. else if (MI_IS_PAGE_TABLE_ADDRESS(VirtualAddress)) {
  2894. //
  2895. // The virtual address is within the space occupied by PDEs,
  2896. // make the PDE valid.
  2897. //
  2898. if (((PMMPTE)VirtualAddress >= MiGetPteAddress (MM_PAGED_POOL_START)) &&
  2899. ((PMMPTE)VirtualAddress <= MmPagedPoolInfo.LastPteForPagedPool)) {
  2900. *ProtectCode = MM_NOACCESS;
  2901. return NULL;
  2902. }
  2903. *ProtectCode = MM_READWRITE;
  2904. return NULL;
  2905. }
  2906. else if (MI_IS_SESSION_ADDRESS (VirtualAddress) == TRUE) {
  2907. //
  2908. // See if the session space address is copy on write.
  2909. //
  2910. MM_SESSION_SPACE_WS_LOCK_ASSERT ();
  2911. PointerPte = NULL;
  2912. *ProtectCode = MM_NOACCESS;
  2913. NextEntry = MmSessionSpace->ImageList.Flink;
  2914. while (NextEntry != &MmSessionSpace->ImageList) {
  2915. Image = CONTAINING_RECORD(NextEntry, IMAGE_ENTRY_IN_SESSION, Link);
  2916. if ((VirtualAddress >= Image->Address) && (VirtualAddress <= Image->LastAddress)) {
  2917. PointerPte = Image->PrototypePtes +
  2918. (((PCHAR)VirtualAddress - (PCHAR)Image->Address) >> PAGE_SHIFT);
  2919. *ProtectCode = MM_EXECUTE_WRITECOPY;
  2920. break;
  2921. }
  2922. NextEntry = NextEntry->Flink;
  2923. }
  2924. return PointerPte;
  2925. }
  2926. //
  2927. // Address is in system space.
  2928. //
  2929. *ProtectCode = MM_NOACCESS;
  2930. return NULL;
  2931. }
  2932. #if (_MI_PAGING_LEVELS < 3)
  2933. NTSTATUS
  2934. FASTCALL
  2935. MiCheckPdeForPagedPool (
  2936. IN PVOID VirtualAddress
  2937. )
  2938. /*++
  2939. Routine Description:
  2940. This function copies the Page Table Entry for the corresponding
  2941. virtual address from the system process's page directory.
  2942. This allows page table pages to be lazily evaluated for things
  2943. like paged pool and per-session mappings.
  2944. Arguments:
  2945. VirtualAddress - Supplies the virtual address in question.
  2946. Return Value:
  2947. Either success or access violation.
  2948. Environment:
  2949. Kernel mode, DISPATCH level or below.
  2950. --*/
  2951. {
  2952. PMMPTE PointerPde;
  2953. PMMPTE PointerPte;
  2954. NTSTATUS status;
  2955. if (MI_IS_SESSION_ADDRESS (VirtualAddress) == TRUE) {
  2956. //
  2957. // Virtual address in the session space range.
  2958. //
  2959. return MiCheckPdeForSessionSpace (VirtualAddress);
  2960. }
  2961. if (MI_IS_SESSION_PTE (VirtualAddress) == TRUE) {
  2962. //
  2963. // PTE for the session space range.
  2964. //
  2965. return MiCheckPdeForSessionSpace (VirtualAddress);
  2966. }
  2967. status = STATUS_SUCCESS;
  2968. if (MI_IS_KERNEL_PAGE_TABLE_ADDRESS(VirtualAddress)) {
  2969. //
  2970. // PTE for paged pool.
  2971. //
  2972. PointerPde = MiGetPteAddress (VirtualAddress);
  2973. status = STATUS_WAIT_1;
  2974. }
  2975. else if (VirtualAddress < MmSystemRangeStart) {
  2976. return STATUS_ACCESS_VIOLATION;
  2977. }
  2978. else {
  2979. //
  2980. // Virtual address in paged pool range.
  2981. //
  2982. PointerPde = MiGetPdeAddress (VirtualAddress);
  2983. }
  2984. //
  2985. // Locate the PDE for this page and make it valid.
  2986. //
  2987. if (PointerPde->u.Hard.Valid == 0) {
  2988. PointerPte = MiGetVirtualAddressMappedByPte (PointerPde);
  2989. MI_WRITE_VALID_PTE (PointerPde,
  2990. MmSystemPagePtes [((ULONG_PTR)PointerPde &
  2991. (PD_PER_SYSTEM * (sizeof(MMPTE) * PDE_PER_PAGE) - 1)) / sizeof(MMPTE)]);
  2992. KeFillEntryTb ((PHARDWARE_PTE)PointerPde, PointerPte, FALSE);
  2993. }
  2994. return status;
  2995. }
  2996. NTSTATUS
  2997. FASTCALL
  2998. MiCheckPdeForSessionSpace(
  2999. IN PVOID VirtualAddress
  3000. )
  3001. /*++
  3002. Routine Description:
  3003. This function copies the Page Table Entry for the corresponding
  3004. session virtual address from the current session's data structures.
  3005. This allows page table pages to be lazily evaluated for session mappings.
  3006. The caller must check for the current process having a session space.
  3007. Arguments:
  3008. VirtualAddress - Supplies the virtual address in question.
  3009. Return Value:
  3010. STATUS_WAIT_1 - The mapping has been made valid, retry the fault.
  3011. STATUS_SUCCESS - Did not handle the fault, continue further processing.
  3012. !STATUS_SUCCESS - An access violation has occurred - raise an exception.
  3013. Environment:
  3014. Kernel mode, DISPATCH level or below.
  3015. --*/
  3016. {
  3017. PMMPTE PointerPde;
  3018. PVOID SessionVirtualAddress;
  3019. ULONG Index;
  3020. //
  3021. // First check whether the reference was to a page table page which maps
  3022. // session space. If so, the PDE is retrieved from the session space
  3023. // data structure and made valid.
  3024. //
  3025. if (MI_IS_SESSION_PTE (VirtualAddress) == TRUE) {
  3026. //
  3027. // Verify that the current process has a session space.
  3028. //
  3029. PointerPde = MiGetPdeAddress (MmSessionSpace);
  3030. if (PointerPde->u.Hard.Valid == 0) {
  3031. #if DBG
  3032. DbgPrint("MiCheckPdeForSessionSpace: No current session for PTE %p\n",
  3033. VirtualAddress);
  3034. DbgBreakPoint();
  3035. #endif
  3036. return STATUS_ACCESS_VIOLATION;
  3037. }
  3038. SessionVirtualAddress = MiGetVirtualAddressMappedByPte ((PMMPTE) VirtualAddress);
  3039. PointerPde = MiGetPteAddress (VirtualAddress);
  3040. if (PointerPde->u.Hard.Valid == 1) {
  3041. //
  3042. // The PDE is already valid - another thread must have
  3043. // won the race. Just return.
  3044. //
  3045. return STATUS_WAIT_1;
  3046. }
  3047. //
  3048. // Calculate the session space PDE index and load the
  3049. // PDE from the session space table for this session.
  3050. //
  3051. Index = MiGetPdeSessionIndex (SessionVirtualAddress);
  3052. PointerPde->u.Long = MmSessionSpace->PageTables[Index].u.Long;
  3053. if (PointerPde->u.Hard.Valid == 1) {
  3054. KeFillEntryTb ((PHARDWARE_PTE)PointerPde, VirtualAddress, FALSE);
  3055. return STATUS_WAIT_1;
  3056. }
  3057. #if DBG
  3058. DbgPrint("MiCheckPdeForSessionSpace: No Session PDE for PTE %p, %p\n",
  3059. PointerPde->u.Long, SessionVirtualAddress);
  3060. DbgBreakPoint();
  3061. #endif
  3062. return STATUS_ACCESS_VIOLATION;
  3063. }
  3064. if (MI_IS_SESSION_ADDRESS (VirtualAddress) == FALSE) {
  3065. //
  3066. // Not a session space fault - tell the caller to try other handlers.
  3067. //
  3068. return STATUS_SUCCESS;
  3069. }
  3070. //
  3071. // Handle PDE faults for references in the session space.
  3072. // Verify that the current process has a session space.
  3073. //
  3074. PointerPde = MiGetPdeAddress (MmSessionSpace);
  3075. if (PointerPde->u.Hard.Valid == 0) {
  3076. #if DBG
  3077. DbgPrint("MiCheckPdeForSessionSpace: No current session for VA %p\n",
  3078. VirtualAddress);
  3079. DbgBreakPoint();
  3080. #endif
  3081. return STATUS_ACCESS_VIOLATION;
  3082. }
  3083. PointerPde = MiGetPdeAddress (VirtualAddress);
  3084. if (PointerPde->u.Hard.Valid == 0) {
  3085. //
  3086. // Calculate the session space PDE index and load the
  3087. // PDE from the session space table for this session.
  3088. //
  3089. Index = MiGetPdeSessionIndex (VirtualAddress);
  3090. PointerPde->u.Long = MmSessionSpace->PageTables[Index].u.Long;
  3091. if (PointerPde->u.Hard.Valid == 1) {
  3092. KeFillEntryTb ((PHARDWARE_PTE)PointerPde,
  3093. MiGetPteAddress(VirtualAddress),
  3094. FALSE);
  3095. return STATUS_WAIT_1;
  3096. }
  3097. #if DBG
  3098. DbgPrint("MiCheckPdeForSessionSpace: No Session PDE for VA %p, %p\n",
  3099. PointerPde->u.Long, VirtualAddress);
  3100. DbgBreakPoint();
  3101. #endif
  3102. return STATUS_ACCESS_VIOLATION;
  3103. }
  3104. //
  3105. // Tell the caller to continue with other fault handlers.
  3106. //
  3107. return STATUS_SUCCESS;
  3108. }
  3109. #endif
  3110. VOID
  3111. MiInitializePfn (
  3112. IN PFN_NUMBER PageFrameIndex,
  3113. IN PMMPTE PointerPte,
  3114. IN ULONG ModifiedState
  3115. )
  3116. /*++
  3117. Routine Description:
  3118. This function initializes the specified PFN element to the
  3119. active and valid state.
  3120. Arguments:
  3121. PageFrameIndex - Supplies the page frame number to initialize.
  3122. PointerPte - Supplies the pointer to the PTE which caused the
  3123. page fault.
  3124. ModifiedState - Supplies the state to set the modified field in the PFN
  3125. element for this page, either 0 or 1.
  3126. Return Value:
  3127. None.
  3128. Environment:
  3129. Kernel mode, APCs disabled, PFN lock held.
  3130. --*/
  3131. {
  3132. PMMPFN Pfn1;
  3133. PMMPFN Pfn2;
  3134. PMMPTE PteFramePointer;
  3135. PFN_NUMBER PteFramePage;
  3136. MM_PFN_LOCK_ASSERT();
  3137. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  3138. Pfn1->PteAddress = PointerPte;
  3139. //
  3140. // If the PTE is currently valid, an address space is being built,
  3141. // just make the original PTE demand zero.
  3142. //
  3143. if (PointerPte->u.Hard.Valid == 1) {
  3144. Pfn1->OriginalPte.u.Long = MM_DEMAND_ZERO_WRITE_PTE;
  3145. #if defined(_X86PAE_)
  3146. if (MmPaeMask != 0) {
  3147. if ((PointerPte->u.Long & MmPaeMask) == 0) {
  3148. Pfn1->OriginalPte.u.Soft.Protection = MM_EXECUTE_READWRITE;
  3149. }
  3150. }
  3151. #endif
  3152. #if defined(_IA64_)
  3153. if (PointerPte->u.Hard.Execute == 1) {
  3154. Pfn1->OriginalPte.u.Soft.Protection = MM_EXECUTE_READWRITE;
  3155. }
  3156. #endif
  3157. if (MI_IS_CACHING_DISABLED (PointerPte)) {
  3158. Pfn1->OriginalPte.u.Soft.Protection = MM_READWRITE | MM_NOCACHE;
  3159. }
  3160. }
  3161. else {
  3162. Pfn1->OriginalPte = *PointerPte;
  3163. ASSERT (!((Pfn1->OriginalPte.u.Soft.Prototype == 0) &&
  3164. (Pfn1->OriginalPte.u.Soft.Transition == 1)));
  3165. }
  3166. Pfn1->u3.e2.ReferenceCount += 1;
  3167. #if DBG
  3168. if (Pfn1->u3.e2.ReferenceCount > 1) {
  3169. DbgPrint("MM:incrementing ref count > 1 \n");
  3170. MiFormatPfn(Pfn1);
  3171. MiFormatPte(PointerPte);
  3172. }
  3173. #endif
  3174. Pfn1->u2.ShareCount += 1;
  3175. Pfn1->u3.e1.PageLocation = ActiveAndValid;
  3176. Pfn1->u3.e1.CacheAttribute = MiCached;
  3177. if (ModifiedState == 1) {
  3178. MI_SET_MODIFIED (Pfn1, 1, 0xB);
  3179. }
  3180. else {
  3181. MI_SET_MODIFIED (Pfn1, 0, 0x26);
  3182. }
  3183. #if defined (_WIN64)
  3184. Pfn1->UsedPageTableEntries = 0;
  3185. #endif
  3186. //
  3187. // Determine the page frame number of the page table page which
  3188. // contains this PTE.
  3189. //
  3190. PteFramePointer = MiGetPteAddress(PointerPte);
  3191. if (PteFramePointer->u.Hard.Valid == 0) {
  3192. #if (_MI_PAGING_LEVELS < 3)
  3193. if (!NT_SUCCESS(MiCheckPdeForPagedPool (PointerPte))) {
  3194. #endif
  3195. KeBugCheckEx (MEMORY_MANAGEMENT,
  3196. 0x61940,
  3197. (ULONG_PTR)PointerPte,
  3198. (ULONG_PTR)PteFramePointer->u.Long,
  3199. (ULONG_PTR)MiGetVirtualAddressMappedByPte(PointerPte));
  3200. #if (_MI_PAGING_LEVELS < 3)
  3201. }
  3202. #endif
  3203. }
  3204. PteFramePage = MI_GET_PAGE_FRAME_FROM_PTE (PteFramePointer);
  3205. ASSERT (PteFramePage != 0);
  3206. Pfn1->u4.PteFrame = PteFramePage;
  3207. //
  3208. // Increment the share count for the page table page containing
  3209. // this PTE.
  3210. //
  3211. Pfn2 = MI_PFN_ELEMENT (PteFramePage);
  3212. Pfn2->u2.ShareCount += 1;
  3213. return;
  3214. }
  3215. VOID
  3216. MiInitializeReadInProgressSinglePfn (
  3217. IN PFN_NUMBER PageFrameIndex,
  3218. IN PMMPTE BasePte,
  3219. IN PKEVENT Event,
  3220. IN WSLE_NUMBER WorkingSetIndex
  3221. )
  3222. /*++
  3223. Routine Description:
  3224. This function initializes the specified PFN element to the
  3225. transition / read-in-progress state for an in-page operation.
  3226. Arguments:
  3227. PageFrameIndex - Supplies the page frame to initialize.
  3228. BasePte - Supplies the pointer to the PTE for the page frame.
  3229. Event - Supplies the event which is to be set when the I/O operation
  3230. completes.
  3231. WorkingSetIndex - Supplies the working set index flag, a value of
  3232. -1 indicates no WSLE is required because
  3233. this is a prototype PTE.
  3234. Return Value:
  3235. None.
  3236. Environment:
  3237. Kernel mode, APCs disabled, PFN lock held.
  3238. --*/
  3239. {
  3240. PMMPFN Pfn1;
  3241. PMMPTE PteFramePointer;
  3242. PFN_NUMBER PteFramePage;
  3243. MMPTE TempPte;
  3244. MM_PFN_LOCK_ASSERT();
  3245. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  3246. Pfn1->u1.Event = Event;
  3247. Pfn1->PteAddress = BasePte;
  3248. Pfn1->OriginalPte = *BasePte;
  3249. if (WorkingSetIndex == MI_PROTOTYPE_WSINDEX) {
  3250. Pfn1->u3.e1.PrototypePte = 1;
  3251. }
  3252. ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
  3253. MI_ADD_LOCKED_PAGE_CHARGE_FOR_MODIFIED_PAGE (Pfn1, 10);
  3254. Pfn1->u3.e2.ReferenceCount += 1;
  3255. Pfn1->u2.ShareCount = 0;
  3256. Pfn1->u3.e1.ReadInProgress = 1;
  3257. Pfn1->u3.e1.CacheAttribute = MiCached;
  3258. Pfn1->u4.InPageError = 0;
  3259. //
  3260. // Determine the page frame number of the page table page which
  3261. // contains this PTE.
  3262. //
  3263. PteFramePointer = MiGetPteAddress(BasePte);
  3264. if (PteFramePointer->u.Hard.Valid == 0) {
  3265. #if (_MI_PAGING_LEVELS < 3)
  3266. if (!NT_SUCCESS(MiCheckPdeForPagedPool (BasePte))) {
  3267. #endif
  3268. KeBugCheckEx (MEMORY_MANAGEMENT,
  3269. 0x61940,
  3270. (ULONG_PTR)BasePte,
  3271. (ULONG_PTR)PteFramePointer->u.Long,
  3272. (ULONG_PTR)MiGetVirtualAddressMappedByPte(BasePte));
  3273. #if (_MI_PAGING_LEVELS < 3)
  3274. }
  3275. #endif
  3276. }
  3277. PteFramePage = MI_GET_PAGE_FRAME_FROM_PTE (PteFramePointer);
  3278. Pfn1->u4.PteFrame = PteFramePage;
  3279. //
  3280. // Put the PTE into the transition state, no cache flush needed as
  3281. // PTE is still not valid.
  3282. //
  3283. MI_MAKE_TRANSITION_PTE (TempPte,
  3284. PageFrameIndex,
  3285. BasePte->u.Soft.Protection,
  3286. BasePte);
  3287. MI_WRITE_INVALID_PTE (BasePte, TempPte);
  3288. //
  3289. // Increment the share count for the page table page containing
  3290. // this PTE as the PTE just went into the transition state.
  3291. //
  3292. ASSERT (PteFramePage != 0);
  3293. Pfn1 = MI_PFN_ELEMENT (PteFramePage);
  3294. Pfn1->u2.ShareCount += 1;
  3295. return;
  3296. }
  3297. VOID
  3298. MiInitializeReadInProgressPfn (
  3299. IN PMDL Mdl,
  3300. IN PMMPTE BasePte,
  3301. IN PKEVENT Event,
  3302. IN WSLE_NUMBER WorkingSetIndex
  3303. )
  3304. /*++
  3305. Routine Description:
  3306. This function initializes the specified PFN element to the
  3307. transition / read-in-progress state for an in-page operation.
  3308. Arguments:
  3309. Mdl - Supplies a pointer to the MDL.
  3310. BasePte - Supplies the pointer to the PTE which the first page in
  3311. the MDL maps.
  3312. Event - Supplies the event which is to be set when the I/O operation
  3313. completes.
  3314. WorkingSetIndex - Supplies the working set index flag, a value of
  3315. -1 indicates no WSLE is required because
  3316. this is a prototype PTE.
  3317. Return Value:
  3318. None.
  3319. Environment:
  3320. Kernel mode, APCs disabled, PFN lock held.
  3321. --*/
  3322. {
  3323. PMMPFN Pfn1;
  3324. PMMPFN Pfn2;
  3325. PMMPTE PteFramePointer;
  3326. PFN_NUMBER PteFramePage;
  3327. MMPTE TempPte;
  3328. LONG NumberOfBytes;
  3329. PPFN_NUMBER Page;
  3330. MM_PFN_LOCK_ASSERT();
  3331. Page = (PPFN_NUMBER)(Mdl + 1);
  3332. NumberOfBytes = Mdl->ByteCount;
  3333. while (NumberOfBytes > 0) {
  3334. Pfn1 = MI_PFN_ELEMENT (*Page);
  3335. Pfn1->u1.Event = Event;
  3336. Pfn1->PteAddress = BasePte;
  3337. Pfn1->OriginalPte = *BasePte;
  3338. ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
  3339. if (WorkingSetIndex == MI_PROTOTYPE_WSINDEX) {
  3340. Pfn1->u3.e1.PrototypePte = 1;
  3341. }
  3342. MI_ADD_LOCKED_PAGE_CHARGE_FOR_MODIFIED_PAGE (Pfn1, 10);
  3343. Pfn1->u3.e2.ReferenceCount += 1;
  3344. Pfn1->u2.ShareCount = 0;
  3345. Pfn1->u3.e1.ReadInProgress = 1;
  3346. Pfn1->u3.e1.CacheAttribute = MiCached;
  3347. Pfn1->u4.InPageError = 0;
  3348. //
  3349. // Determine the page frame number of the page table page which
  3350. // contains this PTE.
  3351. //
  3352. PteFramePointer = MiGetPteAddress(BasePte);
  3353. if (PteFramePointer->u.Hard.Valid == 0) {
  3354. #if (_MI_PAGING_LEVELS < 3)
  3355. if (!NT_SUCCESS(MiCheckPdeForPagedPool (BasePte))) {
  3356. #endif
  3357. KeBugCheckEx (MEMORY_MANAGEMENT,
  3358. 0x61940,
  3359. (ULONG_PTR)BasePte,
  3360. (ULONG_PTR)PteFramePointer->u.Long,
  3361. (ULONG_PTR)MiGetVirtualAddressMappedByPte(BasePte));
  3362. #if (_MI_PAGING_LEVELS < 3)
  3363. }
  3364. #endif
  3365. }
  3366. PteFramePage = MI_GET_PAGE_FRAME_FROM_PTE (PteFramePointer);
  3367. Pfn1->u4.PteFrame = PteFramePage;
  3368. //
  3369. // Put the PTE into the transition state, no cache flush needed as
  3370. // PTE is still not valid.
  3371. //
  3372. MI_MAKE_TRANSITION_PTE (TempPte,
  3373. *Page,
  3374. BasePte->u.Soft.Protection,
  3375. BasePte);
  3376. MI_WRITE_INVALID_PTE (BasePte, TempPte);
  3377. //
  3378. // Increment the share count for the page table page containing
  3379. // this PTE as the PTE just went into the transition state.
  3380. //
  3381. ASSERT (PteFramePage != 0);
  3382. Pfn2 = MI_PFN_ELEMENT (PteFramePage);
  3383. Pfn2->u2.ShareCount += 1;
  3384. NumberOfBytes -= PAGE_SIZE;
  3385. Page += 1;
  3386. BasePte += 1;
  3387. }
  3388. return;
  3389. }
  3390. VOID
  3391. MiInitializeTransitionPfn (
  3392. IN PFN_NUMBER PageFrameIndex,
  3393. IN PMMPTE PointerPte
  3394. )
  3395. /*++
  3396. Routine Description:
  3397. This function initializes the specified PFN element to the
  3398. transition state. Main use is by MapImageFile to make the
  3399. page which contains the image header transition in the
  3400. prototype PTEs.
  3401. Arguments:
  3402. PageFrameIndex - Supplies the page frame index to be initialized.
  3403. PointerPte - Supplies an invalid, non-transition PTE to initialize.
  3404. Return Value:
  3405. None.
  3406. Environment:
  3407. Kernel mode, APCs disabled, PFN lock held.
  3408. --*/
  3409. {
  3410. PMMPFN Pfn1;
  3411. PMMPFN Pfn2;
  3412. PMMPTE PteFramePointer;
  3413. PFN_NUMBER PteFramePage;
  3414. MMPTE TempPte;
  3415. MM_PFN_LOCK_ASSERT();
  3416. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  3417. Pfn1->u1.Event = NULL;
  3418. Pfn1->PteAddress = PointerPte;
  3419. Pfn1->OriginalPte = *PointerPte;
  3420. ASSERT (!((Pfn1->OriginalPte.u.Soft.Prototype == 0) &&
  3421. (Pfn1->OriginalPte.u.Soft.Transition == 1)));
  3422. //
  3423. // Don't change the reference count (it should already be 1).
  3424. //
  3425. Pfn1->u2.ShareCount = 0;
  3426. //
  3427. // No WSLE is required because this is a prototype PTE.
  3428. //
  3429. Pfn1->u3.e1.PrototypePte = 1;
  3430. Pfn1->u3.e1.PageLocation = TransitionPage;
  3431. Pfn1->u3.e1.CacheAttribute = MiCached;
  3432. //
  3433. // Determine the page frame number of the page table page which
  3434. // contains this PTE.
  3435. //
  3436. PteFramePointer = MiGetPteAddress(PointerPte);
  3437. if (PteFramePointer->u.Hard.Valid == 0) {
  3438. #if (_MI_PAGING_LEVELS < 3)
  3439. if (!NT_SUCCESS(MiCheckPdeForPagedPool (PointerPte))) {
  3440. #endif
  3441. KeBugCheckEx (MEMORY_MANAGEMENT,
  3442. 0x61940,
  3443. (ULONG_PTR)PointerPte,
  3444. (ULONG_PTR)PteFramePointer->u.Long,
  3445. (ULONG_PTR)MiGetVirtualAddressMappedByPte(PointerPte));
  3446. #if (_MI_PAGING_LEVELS < 3)
  3447. }
  3448. #endif
  3449. }
  3450. PteFramePage = MI_GET_PAGE_FRAME_FROM_PTE (PteFramePointer);
  3451. Pfn1->u4.PteFrame = PteFramePage;
  3452. //
  3453. // Put the PTE into the transition state, no cache flush needed as
  3454. // PTE is still not valid.
  3455. //
  3456. MI_MAKE_TRANSITION_PTE (TempPte,
  3457. PageFrameIndex,
  3458. PointerPte->u.Soft.Protection,
  3459. PointerPte);
  3460. MI_WRITE_INVALID_PTE (PointerPte, TempPte);
  3461. //
  3462. // Increment the share count for the page table page containing
  3463. // this PTE as the PTE just went into the transition state.
  3464. //
  3465. Pfn2 = MI_PFN_ELEMENT (PteFramePage);
  3466. ASSERT (PteFramePage != 0);
  3467. Pfn2->u2.ShareCount += 1;
  3468. return;
  3469. }
  3470. VOID
  3471. MiInitializeCopyOnWritePfn (
  3472. IN PFN_NUMBER PageFrameIndex,
  3473. IN PMMPTE PointerPte,
  3474. IN WSLE_NUMBER WorkingSetIndex,
  3475. IN PVOID SessionPointer
  3476. )
  3477. /*++
  3478. Routine Description:
  3479. This function initializes the specified PFN element to the
  3480. active and valid state for a copy on write operation.
  3481. In this case the page table page which contains the PTE has
  3482. the proper ShareCount.
  3483. Arguments:
  3484. PageFrameIndex - Supplies the page frame number to initialize.
  3485. PointerPte - Supplies the pointer to the PTE which caused the
  3486. page fault.
  3487. WorkingSetIndex - Supplies the working set index for the corresponding
  3488. virtual address.
  3489. SessionPointer - Supplies the session space pointer if this fault is for
  3490. a session space page or NULL if this is for a user page.
  3491. Return Value:
  3492. None.
  3493. Environment:
  3494. Kernel mode, APCs disabled, PFN lock held.
  3495. --*/
  3496. {
  3497. PMMPFN Pfn1;
  3498. PMMPTE PteFramePointer;
  3499. PFN_NUMBER PteFramePage;
  3500. PVOID VirtualAddress;
  3501. PMM_SESSION_SPACE SessionSpace;
  3502. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  3503. Pfn1->PteAddress = PointerPte;
  3504. //
  3505. // Get the protection for the page.
  3506. //
  3507. VirtualAddress = MiGetVirtualAddressMappedByPte (PointerPte);
  3508. Pfn1->OriginalPte.u.Long = 0;
  3509. if (SessionPointer) {
  3510. Pfn1->OriginalPte.u.Soft.Protection = MM_EXECUTE_READWRITE;
  3511. SessionSpace = (PMM_SESSION_SPACE) SessionPointer;
  3512. SessionSpace->Wsle[WorkingSetIndex].u1.e1.Protection =
  3513. MM_EXECUTE_READWRITE;
  3514. }
  3515. else {
  3516. Pfn1->OriginalPte.u.Soft.Protection =
  3517. MI_MAKE_PROTECT_NOT_WRITE_COPY (
  3518. MmWsle[WorkingSetIndex].u1.e1.Protection);
  3519. }
  3520. ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
  3521. Pfn1->u3.e2.ReferenceCount += 1;
  3522. Pfn1->u2.ShareCount += 1;
  3523. Pfn1->u3.e1.PageLocation = ActiveAndValid;
  3524. Pfn1->u3.e1.CacheAttribute = MiCached;
  3525. Pfn1->u1.WsIndex = WorkingSetIndex;
  3526. //
  3527. // Determine the page frame number of the page table page which
  3528. // contains this PTE.
  3529. //
  3530. PteFramePointer = MiGetPteAddress(PointerPte);
  3531. if (PteFramePointer->u.Hard.Valid == 0) {
  3532. #if (_MI_PAGING_LEVELS < 3)
  3533. if (!NT_SUCCESS(MiCheckPdeForPagedPool (PointerPte))) {
  3534. #endif
  3535. KeBugCheckEx (MEMORY_MANAGEMENT,
  3536. 0x61940,
  3537. (ULONG_PTR)PointerPte,
  3538. (ULONG_PTR)PteFramePointer->u.Long,
  3539. (ULONG_PTR)MiGetVirtualAddressMappedByPte(PointerPte));
  3540. #if (_MI_PAGING_LEVELS < 3)
  3541. }
  3542. #endif
  3543. }
  3544. PteFramePage = MI_GET_PAGE_FRAME_FROM_PTE (PteFramePointer);
  3545. ASSERT (PteFramePage != 0);
  3546. Pfn1->u4.PteFrame = PteFramePage;
  3547. //
  3548. // Set the modified flag in the PFN database as we are writing
  3549. // into this page and the dirty bit is already set in the PTE.
  3550. //
  3551. MI_SET_MODIFIED (Pfn1, 1, 0xC);
  3552. return;
  3553. }
  3554. BOOLEAN
  3555. MmIsAddressValid (
  3556. IN PVOID VirtualAddress
  3557. )
  3558. /*++
  3559. Routine Description:
  3560. For a given virtual address this function returns TRUE if no page fault
  3561. will occur for a read operation on the address, FALSE otherwise.
  3562. Note that after this routine was called, if appropriate locks are not
  3563. held, a non-faulting address could fault.
  3564. Arguments:
  3565. VirtualAddress - Supplies the virtual address to check.
  3566. Return Value:
  3567. TRUE if no page fault would be generated reading the virtual address,
  3568. FALSE otherwise.
  3569. Environment:
  3570. Kernel mode.
  3571. --*/
  3572. {
  3573. PMMPTE PointerPte;
  3574. #if defined(_IA64_)
  3575. ULONG Region;
  3576. Region = (ULONG)(((ULONG_PTR) VirtualAddress & VRN_MASK) >> 61);
  3577. if ((Region == 0) || (Region == 1) || (Region == 4) || (Region == 7)) {
  3578. NOTHING;
  3579. }
  3580. else {
  3581. return FALSE;
  3582. }
  3583. if (MiIsVirtualAddressMappedByTr (VirtualAddress) == TRUE) {
  3584. return TRUE;
  3585. }
  3586. if (MiMappingsInitialized == FALSE) {
  3587. return FALSE;
  3588. }
  3589. #endif
  3590. #if defined (_AMD64_)
  3591. //
  3592. // If this is within the physical addressing range, just return TRUE.
  3593. //
  3594. if (MI_IS_PHYSICAL_ADDRESS(VirtualAddress)) {
  3595. PFN_NUMBER PageFrameIndex;
  3596. //
  3597. // Only bound with MmHighestPhysicalPage once Mm has initialized.
  3598. //
  3599. if (MmHighestPhysicalPage != 0) {
  3600. PageFrameIndex = MI_CONVERT_PHYSICAL_TO_PFN(VirtualAddress);
  3601. if (PageFrameIndex > MmHighestPhysicalPage) {
  3602. return FALSE;
  3603. }
  3604. }
  3605. return TRUE;
  3606. }
  3607. #endif
  3608. //
  3609. // If the address is not canonical then return FALSE as the caller (which
  3610. // may be the kernel debugger) is not expecting to get an unimplemented
  3611. // address bit fault.
  3612. //
  3613. if (MI_RESERVED_BITS_CANONICAL(VirtualAddress) == FALSE) {
  3614. return FALSE;
  3615. }
  3616. #if (_MI_PAGING_LEVELS >= 4)
  3617. PointerPte = MiGetPxeAddress (VirtualAddress);
  3618. if (PointerPte->u.Hard.Valid == 0) {
  3619. return FALSE;
  3620. }
  3621. #endif
  3622. #if (_MI_PAGING_LEVELS >= 3)
  3623. PointerPte = MiGetPpeAddress (VirtualAddress);
  3624. if (PointerPte->u.Hard.Valid == 0) {
  3625. return FALSE;
  3626. }
  3627. #endif
  3628. PointerPte = MiGetPdeAddress (VirtualAddress);
  3629. if (PointerPte->u.Hard.Valid == 0) {
  3630. return FALSE;
  3631. }
  3632. #ifdef _X86_
  3633. if (PointerPte->u.Hard.LargePage == 1) {
  3634. return TRUE;
  3635. }
  3636. #endif //_X86_
  3637. PointerPte = MiGetPteAddress (VirtualAddress);
  3638. if (PointerPte->u.Hard.Valid == 0) {
  3639. return FALSE;
  3640. }
  3641. #ifdef _X86_
  3642. //
  3643. // Make sure we're not treating a page directory as a page table here for
  3644. // the case where the page directory is mapping a large page. This is
  3645. // because the large page bit is valid in PDE formats, but reserved in
  3646. // PTE formats and will cause a trap. A virtual address like c0200000
  3647. // triggers this case. It's not enough to just check the large page bit
  3648. // in the PTE below because of course that bit's been reused by other
  3649. // steppings of the processor so we have to look at the address too.
  3650. //
  3651. if (PointerPte->u.Hard.LargePage == 1) {
  3652. PVOID Va;
  3653. Va = MiGetVirtualAddressMappedByPde (PointerPte);
  3654. if (MI_IS_PHYSICAL_ADDRESS(Va)) {
  3655. return FALSE;
  3656. }
  3657. }
  3658. #endif
  3659. #if defined(_IA64_)
  3660. if (MI_GET_ACCESSED_IN_PTE (PointerPte) == 0) {
  3661. //
  3662. // Even though the address is valid, the access bit is off so a
  3663. // reference would cause a fault so return FALSE. We may want to
  3664. // rethink this later to instead update the PTE accessed bit if the
  3665. // PFN lock and relevant working set mutex are not currently held.
  3666. //
  3667. return FALSE;
  3668. }
  3669. #endif
  3670. return TRUE;
  3671. }
  3672. VOID
  3673. MiInitializePfnForOtherProcess (
  3674. IN PFN_NUMBER PageFrameIndex,
  3675. IN PMMPTE PointerPte,
  3676. IN PFN_NUMBER ContainingPageFrame
  3677. )
  3678. /*++
  3679. Routine Description:
  3680. This function initializes the specified PFN element to the
  3681. active and valid state with the dirty bit on in the PTE and
  3682. the PFN database marked as modified.
  3683. As this PTE is not visible from the current process, the containing
  3684. page frame must be supplied at the PTE contents field for the
  3685. PFN database element are set to demand zero.
  3686. Arguments:
  3687. PageFrameIndex - Supplies the page frame number of which to initialize.
  3688. PointerPte - Supplies the pointer to the PTE which caused the
  3689. page fault.
  3690. ContainingPageFrame - Supplies the page frame number of the page
  3691. table page which contains this PTE.
  3692. If the ContainingPageFrame is 0, then
  3693. the ShareCount for the
  3694. containing page is not incremented.
  3695. Return Value:
  3696. None.
  3697. Environment:
  3698. Kernel mode, APCs disabled, PFN lock held.
  3699. --*/
  3700. {
  3701. PMMPFN Pfn1;
  3702. PMMPFN Pfn2;
  3703. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  3704. Pfn1->PteAddress = PointerPte;
  3705. Pfn1->OriginalPte.u.Long = MM_DEMAND_ZERO_WRITE_PTE;
  3706. ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
  3707. Pfn1->u3.e2.ReferenceCount += 1;
  3708. #if DBG
  3709. if (Pfn1->u3.e2.ReferenceCount > 1) {
  3710. DbgPrint("MM:incrementing ref count > 1 \n");
  3711. MiFormatPfn(Pfn1);
  3712. MiFormatPte(PointerPte);
  3713. }
  3714. #endif
  3715. Pfn1->u2.ShareCount += 1;
  3716. Pfn1->u3.e1.PageLocation = ActiveAndValid;
  3717. //
  3718. // Set the page attribute to cached even though it isn't really mapped
  3719. // into a TB entry yet - it will be when the I/O completes and in the
  3720. // future, may get paged in and out multiple times and will be marked
  3721. // as cached in those transactions also. If in fact the driver stack
  3722. // wants to map it some other way for the transfer, the correct mapping
  3723. // will get used regardless.
  3724. //
  3725. Pfn1->u3.e1.CacheAttribute = MiCached;
  3726. MI_SET_MODIFIED (Pfn1, 1, 0xD);
  3727. Pfn1->u4.InPageError = 0;
  3728. //
  3729. // Increment the share count for the page table page containing
  3730. // this PTE.
  3731. //
  3732. if (ContainingPageFrame != 0) {
  3733. Pfn1->u4.PteFrame = ContainingPageFrame;
  3734. Pfn2 = MI_PFN_ELEMENT (ContainingPageFrame);
  3735. Pfn2->u2.ShareCount += 1;
  3736. }
  3737. return;
  3738. }
  3739. VOID
  3740. MiAddValidPageToWorkingSet (
  3741. IN PVOID VirtualAddress,
  3742. IN PMMPTE PointerPte,
  3743. IN PMMPFN Pfn1,
  3744. IN ULONG WsleMask
  3745. )
  3746. /*++
  3747. Routine Description:
  3748. This routine adds the specified virtual address into the
  3749. appropriate working set list.
  3750. Arguments:
  3751. VirtualAddress - Supplies the address to add to the working set list.
  3752. PointerPte - Supplies a pointer to the PTE that is now valid.
  3753. Pfn1 - Supplies the PFN database element for the physical page
  3754. mapped by the virtual address.
  3755. WsleMask - Supplies a mask (protection and flags) to OR into the
  3756. working set list entry.
  3757. Return Value:
  3758. None.
  3759. Environment:
  3760. Kernel mode, APCs disabled, working set lock. PFN lock NOT held.
  3761. --*/
  3762. {
  3763. WSLE_NUMBER WorkingSetIndex;
  3764. PEPROCESS Process;
  3765. PMMSUPPORT WsInfo;
  3766. PMMWSLE Wsle;
  3767. #if !DBG
  3768. UNREFERENCED_PARAMETER (PointerPte);
  3769. #endif
  3770. ASSERT (MI_IS_PAGE_TABLE_ADDRESS(PointerPte));
  3771. ASSERT (PointerPte->u.Hard.Valid == 1);
  3772. if (MI_IS_SESSION_ADDRESS (VirtualAddress) || MI_IS_SESSION_PTE (VirtualAddress)) {
  3773. //
  3774. // Current process's session space working set.
  3775. //
  3776. WsInfo = &MmSessionSpace->Vm;
  3777. Wsle = MmSessionSpace->Wsle;
  3778. }
  3779. else if (MI_IS_PROCESS_SPACE_ADDRESS(VirtualAddress)) {
  3780. //
  3781. // Per process working set.
  3782. //
  3783. Process = PsGetCurrentProcess();
  3784. WsInfo = &Process->Vm;
  3785. Wsle = MmWsle;
  3786. PERFINFO_ADDTOWS(Pfn1, VirtualAddress, Process->UniqueProcessId)
  3787. }
  3788. else {
  3789. //
  3790. // System cache working set.
  3791. //
  3792. WsInfo = &MmSystemCacheWs;
  3793. Wsle = MmSystemCacheWsle;
  3794. PERFINFO_ADDTOWS(Pfn1, VirtualAddress, (HANDLE) -1);
  3795. }
  3796. WorkingSetIndex = MiLocateAndReserveWsle (WsInfo);
  3797. MiUpdateWsle (&WorkingSetIndex,
  3798. VirtualAddress,
  3799. WsInfo->VmWorkingSetList,
  3800. Pfn1);
  3801. Wsle[WorkingSetIndex].u1.Long |= WsleMask;
  3802. #if DBG
  3803. if (MI_IS_SYSTEM_CACHE_ADDRESS(VirtualAddress)) {
  3804. ASSERT (MmSystemCacheWsle[WorkingSetIndex].u1.e1.SameProtectAsProto);
  3805. }
  3806. #endif //DBG
  3807. MI_SET_PTE_IN_WORKING_SET (PointerPte, WorkingSetIndex);
  3808. KeFillEntryTb ((PHARDWARE_PTE)PointerPte, VirtualAddress, FALSE);
  3809. return;
  3810. }
  3811. PMMINPAGE_SUPPORT
  3812. MiGetInPageSupportBlock (
  3813. IN LOGICAL PfnHeld,
  3814. IN PEPROCESS Process
  3815. )
  3816. /*++
  3817. Routine Description:
  3818. This routine acquires an inpage support block. If none are available,
  3819. the PFN lock will be released and reacquired to add an entry to the list.
  3820. NULL will then be returned.
  3821. Arguments:
  3822. PfnHeld - Supplies TRUE if the caller holds the PFN lock, FALSE if not.
  3823. Process - Supplies context if the working set mutex needs to be released
  3824. and reacquired.
  3825. Return Value:
  3826. A non-null pointer to an inpage block if one is already available.
  3827. The PFN lock is not released in this path.
  3828. NULL is returned if no inpage blocks were available. In this path, the
  3829. PFN lock is released and an entry is added - but NULL is still returned
  3830. so the caller is aware that the state has changed due to the lock release
  3831. and reacquisition.
  3832. Environment:
  3833. Kernel mode, PFN lock may optionally be held.
  3834. --*/
  3835. {
  3836. KIRQL OldIrql;
  3837. KIRQL Ignore;
  3838. ULONG Relock;
  3839. LOGICAL WsHeldSafe;
  3840. PMMINPAGE_SUPPORT Support;
  3841. PSINGLE_LIST_ENTRY SingleListEntry;
  3842. #if DBG
  3843. if (PfnHeld == TRUE) {
  3844. MM_PFN_LOCK_ASSERT();
  3845. }
  3846. else {
  3847. ASSERT (KeGetCurrentIrql() < DISPATCH_LEVEL);
  3848. }
  3849. #endif
  3850. if (ExQueryDepthSList (&MmInPageSupportSListHead) != 0) {
  3851. SingleListEntry = InterlockedPopEntrySList (&MmInPageSupportSListHead);
  3852. if (SingleListEntry != NULL) {
  3853. Support = CONTAINING_RECORD (SingleListEntry,
  3854. MMINPAGE_SUPPORT,
  3855. ListEntry);
  3856. returnok:
  3857. ASSERT (Support->WaitCount == 1);
  3858. ASSERT (Support->u1.e1.PrefetchMdlHighBits == 0);
  3859. ASSERT (Support->u1.LongFlags == 0);
  3860. ASSERT (KeReadStateEvent (&Support->Event) == 0);
  3861. ASSERT64 (Support->UsedPageTableEntries == 0);
  3862. Support->Thread = PsGetCurrentThread();
  3863. #if DBG
  3864. Support->ListEntry.Next = NULL;
  3865. #endif
  3866. return Support;
  3867. }
  3868. }
  3869. if (PfnHeld == TRUE) {
  3870. UNLOCK_PFN (APC_LEVEL);
  3871. }
  3872. Support = ExAllocatePoolWithTag (NonPagedPool,
  3873. sizeof(MMINPAGE_SUPPORT),
  3874. 'nImM');
  3875. if (Support != NULL) {
  3876. KeInitializeEvent (&Support->Event, NotificationEvent, FALSE);
  3877. Support->WaitCount = 1;
  3878. Support->u1.LongFlags = 0;
  3879. ASSERT (Support->u1.PrefetchMdl == NULL);
  3880. ASSERT (KeReadStateEvent (&Support->Event) == 0);
  3881. #if defined (_WIN64)
  3882. Support->UsedPageTableEntries = 0;
  3883. #endif
  3884. #if DBG
  3885. Support->Thread = NULL;
  3886. #endif
  3887. if (PfnHeld == FALSE) {
  3888. goto returnok;
  3889. }
  3890. InterlockedPushEntrySList (&MmInPageSupportSListHead,
  3891. (PSINGLE_LIST_ENTRY)&Support->ListEntry);
  3892. }
  3893. else {
  3894. //
  3895. // Initializing WsHeldSafe is not needed for
  3896. // correctness but without it the compiler cannot compile this code
  3897. // W4 to check for use of uninitialized variables.
  3898. //
  3899. WsHeldSafe = FALSE;
  3900. //
  3901. // No pool is available - don't let a high priority thread consume
  3902. // the machine in a continuous refault stream. This delay allows
  3903. // other system threads to run which will try to free up more pool.
  3904. // Release the relevant working set mutex (if any) so the current
  3905. // process can be trimmed for pages also.
  3906. //
  3907. Relock = FALSE;
  3908. if (Process == HYDRA_PROCESS) {
  3909. UNLOCK_SESSION_SPACE_WS (APC_LEVEL);
  3910. }
  3911. else if (Process == PREFETCH_PROCESS) {
  3912. //
  3913. // No mutex is held in this instance.
  3914. //
  3915. NOTHING;
  3916. }
  3917. else if (Process != NULL) {
  3918. //
  3919. // The working set lock may have been acquired safely or unsafely
  3920. // by our caller. Handle both cases here and below.
  3921. //
  3922. UNLOCK_WS_REGARDLESS (Process, WsHeldSafe);
  3923. }
  3924. else {
  3925. if (MmSystemLockOwner == PsGetCurrentThread()) {
  3926. UNLOCK_SYSTEM_WS (APC_LEVEL);
  3927. Relock = TRUE;
  3928. }
  3929. else {
  3930. }
  3931. }
  3932. KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmShortTime);
  3933. if (Process == HYDRA_PROCESS) {
  3934. LOCK_SESSION_SPACE_WS (Ignore, PsGetCurrentThread ());
  3935. }
  3936. else if (Process == PREFETCH_PROCESS) {
  3937. NOTHING;
  3938. }
  3939. else if (Process != NULL) {
  3940. //
  3941. // The working set lock may have been acquired safely or unsafely
  3942. // by our caller. Reacquire it in the same manner our caller did.
  3943. //
  3944. LOCK_WS_REGARDLESS (Process, WsHeldSafe);
  3945. }
  3946. else {
  3947. if (Relock) {
  3948. LOCK_SYSTEM_WS (Ignore, PsGetCurrentThread ());
  3949. }
  3950. }
  3951. }
  3952. if (PfnHeld == TRUE) {
  3953. LOCK_PFN (OldIrql);
  3954. }
  3955. return NULL;
  3956. }
  3957. VOID
  3958. MiFreeInPageSupportBlock (
  3959. IN PMMINPAGE_SUPPORT Support
  3960. )
  3961. /*++
  3962. Routine Description:
  3963. This routine returns the inpage support block to a list of freed blocks.
  3964. Arguments:
  3965. Support - Supplies the inpage support block to put on the free list.
  3966. Return Value:
  3967. None.
  3968. Environment:
  3969. Kernel mode, APC_LEVEL or below.
  3970. --*/
  3971. {
  3972. ASSERT (KeGetCurrentIrql() < DISPATCH_LEVEL);
  3973. ASSERT (Support->Thread != NULL);
  3974. ASSERT (Support->WaitCount != 0);
  3975. ASSERT ((Support->ListEntry.Next == NULL) ||
  3976. (Support->u1.e1.PrefetchMdlHighBits != 0));
  3977. //
  3978. // An interlock is needed for the WaitCount decrement as an inpage support
  3979. // block can be simultaneously freed by any number of threads.
  3980. //
  3981. // Careful synchronization is applied to the WaitCount field so
  3982. // that freeing of the inpage block can occur lock-free. Note
  3983. // that the ReadInProgress bit on each PFN is set and cleared while
  3984. // holding the PFN lock. Inpage blocks are always (and must be)
  3985. // freed _AFTER_ the ReadInProgress bit is cleared.
  3986. //
  3987. if (InterlockedDecrement (&Support->WaitCount) == 0) {
  3988. if (Support->u1.e1.PrefetchMdlHighBits != 0) {
  3989. PMDL Mdl;
  3990. Mdl = MI_EXTRACT_PREFETCH_MDL (Support);
  3991. if (Mdl != &Support->Mdl) {
  3992. ExFreePool (Mdl);
  3993. }
  3994. }
  3995. if (ExQueryDepthSList (&MmInPageSupportSListHead) < MmInPageSupportMinimum) {
  3996. Support->WaitCount = 1;
  3997. Support->u1.LongFlags = 0;
  3998. KeClearEvent (&Support->Event);
  3999. #if defined (_WIN64)
  4000. Support->UsedPageTableEntries = 0;
  4001. #endif
  4002. #if DBG
  4003. Support->Thread = NULL;
  4004. #endif
  4005. InterlockedPushEntrySList (&MmInPageSupportSListHead,
  4006. (PSINGLE_LIST_ENTRY)&Support->ListEntry);
  4007. return;
  4008. }
  4009. ExFreePool (Support);
  4010. }
  4011. return;
  4012. }
  4013. VOID
  4014. MiHandleBankedSection (
  4015. IN PVOID VirtualAddress,
  4016. IN PMMVAD Vad
  4017. )
  4018. /*++
  4019. Routine Description:
  4020. This routine invalidates a bank of video memory, calls out to the
  4021. video driver and then enables the next bank of video memory.
  4022. Arguments:
  4023. VirtualAddress - Supplies the address of the faulting page.
  4024. Vad - Supplies the VAD which maps the range.
  4025. Return Value:
  4026. None.
  4027. Environment:
  4028. Kernel mode, PFN lock held.
  4029. --*/
  4030. {
  4031. PMMBANKED_SECTION Bank;
  4032. PMMPTE PointerPte;
  4033. ULONG BankNumber;
  4034. ULONG size;
  4035. Bank = ((PMMVAD_LONG) Vad)->u4.Banked;
  4036. size = Bank->BankSize;
  4037. RtlFillMemory (Bank->CurrentMappedPte,
  4038. size >> (PAGE_SHIFT - PTE_SHIFT),
  4039. (UCHAR)ZeroPte.u.Long);
  4040. //
  4041. // Flush the TB as we have invalidated all the PTEs in this range.
  4042. //
  4043. KeFlushEntireTb (TRUE, TRUE);
  4044. //
  4045. // Calculate new bank address and bank number.
  4046. //
  4047. PointerPte = MiGetPteAddress (
  4048. (PVOID)((ULONG_PTR)VirtualAddress & ~((LONG)size - 1)));
  4049. Bank->CurrentMappedPte = PointerPte;
  4050. BankNumber = (ULONG)(((PCHAR)PointerPte - (PCHAR)Bank->BasedPte) >> Bank->BankShift);
  4051. (Bank->BankedRoutine) (BankNumber, BankNumber, Bank->Context);
  4052. //
  4053. // Set the new range valid.
  4054. //
  4055. RtlCopyMemory (PointerPte,
  4056. &Bank->BankTemplate[0],
  4057. size >> (PAGE_SHIFT - PTE_SHIFT));
  4058. return;
  4059. }
  4060. NTSTATUS
  4061. MiSessionCopyOnWrite (
  4062. IN PVOID FaultingAddress,
  4063. IN PMMPTE PointerPte
  4064. )
  4065. /*++
  4066. Routine Description:
  4067. This function handles copy on write for image mapped session space.
  4068. Arguments:
  4069. FaultingAddress - Supplies the address which caused the page fault.
  4070. PointerPte - Supplies the pointer to the PTE which caused the page fault.
  4071. Return Value:
  4072. STATUS_SUCCESS.
  4073. Environment:
  4074. Kernel mode, APCs disabled, session WSL held.
  4075. --*/
  4076. {
  4077. MMPTE TempPte;
  4078. MMPTE PreviousPte;
  4079. PFN_NUMBER PageFrameIndex;
  4080. PFN_NUMBER NewPageIndex;
  4081. PULONG CopyTo;
  4082. KIRQL OldIrql;
  4083. PMMPFN Pfn1;
  4084. WSLE_NUMBER WorkingSetIndex;
  4085. PEPROCESS Process;
  4086. PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE (PointerPte);
  4087. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  4088. ASSERT (Pfn1->u3.e1.PrototypePte == 1);
  4089. WorkingSetIndex = MiLocateWsle (FaultingAddress,
  4090. MmSessionSpace->Vm.VmWorkingSetList,
  4091. Pfn1->u1.WsIndex);
  4092. LOCK_PFN (OldIrql);
  4093. //
  4094. // The page must be copied into a new page.
  4095. //
  4096. if (MiEnsureAvailablePageOrWait(HYDRA_PROCESS, NULL)) {
  4097. //
  4098. // A wait operation was performed to obtain an available
  4099. // page and the working set mutex and PFN lock have
  4100. // been released and various things may have changed for
  4101. // the worse. Rather than examine all the conditions again,
  4102. // return and if things are still proper, the fault will
  4103. // be taken again.
  4104. //
  4105. UNLOCK_PFN (OldIrql);
  4106. return STATUS_SUCCESS;
  4107. }
  4108. TempPte = *PointerPte;
  4109. ASSERT ((TempPte.u.Hard.Valid == 1) && (TempPte.u.Hard.Write == 0));
  4110. //
  4111. // Increment the number of private pages.
  4112. //
  4113. MmInfoCounters.CopyOnWriteCount += 1;
  4114. MmSessionSpace->CopyOnWriteCount += 1;
  4115. //
  4116. // A page is being copied and made private, the global state of
  4117. // the shared page does not need to be updated at this point because
  4118. // it is guaranteed to be clean - no POSIX-style forking is allowed on
  4119. // session addresses.
  4120. //
  4121. #if 0
  4122. //
  4123. // This ASSERT is triggered if the session image came from removable media
  4124. // (ie: a special CD install, etc) so it cannot be enabled.
  4125. //
  4126. ASSERT (Pfn1->u3.e1.Modified == 0);
  4127. #endif
  4128. ASSERT (!MI_IS_PTE_DIRTY(TempPte));
  4129. //
  4130. // Get a new page to copy this one into.
  4131. //
  4132. NewPageIndex = MiRemoveAnyPage(MI_GET_PAGE_COLOR_FROM_SESSION(MmSessionSpace));
  4133. MiInitializeCopyOnWritePfn (NewPageIndex,
  4134. PointerPte,
  4135. WorkingSetIndex,
  4136. MmSessionSpace);
  4137. UNLOCK_PFN (OldIrql);
  4138. //
  4139. // Copy the accessed readonly page into the newly allocated writable page.
  4140. //
  4141. Process = PsGetCurrentProcess ();
  4142. CopyTo = (PULONG)MiMapPageInHyperSpace (Process, NewPageIndex, &OldIrql);
  4143. RtlCopyMemory (CopyTo, PAGE_ALIGN (FaultingAddress), PAGE_SIZE);
  4144. MiUnmapPageInHyperSpace (Process, CopyTo, OldIrql);
  4145. //
  4146. // Since the page was a copy on write page, make it
  4147. // accessed, dirty and writable. Also clear the copy-on-write
  4148. // bit in the PTE.
  4149. //
  4150. MI_SET_PTE_DIRTY (TempPte);
  4151. TempPte.u.Hard.Write = 1;
  4152. MI_SET_ACCESSED_IN_PTE (&TempPte, 1);
  4153. TempPte.u.Hard.CopyOnWrite = 0;
  4154. TempPte.u.Hard.PageFrameNumber = NewPageIndex;
  4155. ASSERT (TempPte.u.Hard.Valid == 1);
  4156. LOCK_PFN (OldIrql);
  4157. //
  4158. // Flush the TB entry for this page.
  4159. //
  4160. MI_FLUSH_SINGLE_SESSION_TB (FaultingAddress,
  4161. TRUE,
  4162. TRUE,
  4163. (PHARDWARE_PTE)PointerPte,
  4164. TempPte.u.Flush,
  4165. PreviousPte);
  4166. ASSERT (Pfn1->u3.e1.PrototypePte == 1);
  4167. //
  4168. // Decrement the share count for the page which was copied
  4169. // as this PTE no longer refers to it.
  4170. //
  4171. MiDecrementShareCount (PageFrameIndex);
  4172. UNLOCK_PFN (OldIrql);
  4173. return STATUS_SUCCESS;
  4174. }
  4175. #if DBG
  4176. VOID
  4177. MiCheckFileState (
  4178. IN PMMPFN Pfn
  4179. )
  4180. {
  4181. PSUBSECTION Subsection;
  4182. LARGE_INTEGER StartingOffset;
  4183. if (Pfn->u3.e1.PrototypePte == 0) {
  4184. return;
  4185. }
  4186. if (Pfn->OriginalPte.u.Soft.Prototype == 0) {
  4187. return;
  4188. }
  4189. Subsection = MiGetSubsectionAddress (&(Pfn->OriginalPte));
  4190. if (Subsection->ControlArea->u.Flags.NoModifiedWriting) {
  4191. return;
  4192. }
  4193. StartingOffset.QuadPart = MiStartingOffset (Subsection,
  4194. Pfn->PteAddress);
  4195. DbgPrint("file: %lx offset: %I64X\n",
  4196. Subsection->ControlArea->FilePointer,
  4197. StartingOffset.QuadPart);
  4198. return;
  4199. }
  4200. #endif //DBG