Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2581 lines
74 KiB

  1. /*++
  2. Copyright (c) 1999 Microsoft Corporation
  3. Module Name:
  4. pfsup.c
  5. Abstract:
  6. This module contains the Mm support routines for prefetching groups of pages
  7. from secondary storage.
  8. The caller builds a list of various file objects and logical block offsets,
  9. passing them to MmPrefetchPages. The code here then examines the
  10. internal pages, reading in those that are not already valid or in
  11. transition. These pages are read with a single read, using a dummy page
  12. to bridge small gaps. If the gap is "large", then separate reads are
  13. issued.
  14. Upon conclusion of all the I/Os, control is returned to the calling
  15. thread, and any pages that needed to be read are placed in transition
  16. within the prototype PTE-managed segments. Thus any future references
  17. to these pages should result in soft faults only, provided these pages
  18. do not themselves get trimmed under memory pressure.
  19. Author:
  20. Landy Wang (landyw) 09-Jul-1999
  21. Revision History:
  22. --*/
  23. #include "mi.h"
  24. #if DBG
  25. ULONG MiPfDebug;
  26. #define MI_PF_FORCE_PREFETCH 0x1 // Trim all user pages to force prefetch
  27. #define MI_PF_DELAY 0x2 // Delay hoping to trigger collisions
  28. #define MI_PF_VERBOSE 0x4 // Verbose printing
  29. #define MI_PF_PRINT_ERRORS 0x8 // Print to debugger on errors
  30. #endif
  31. //
  32. // If an MDL contains DUMMY_RATIO times as many dummy pages as real pages
  33. // then don't bother with the read.
  34. //
  35. #define DUMMY_RATIO 16
  36. //
  37. // If two consecutive read-list entries are more than "seek threshold"
  38. // distance apart, the read-list is split between these entries. Otherwise
  39. // the dummy page is used for the gap and only one MDL is used.
  40. //
  41. #define SEEK_THRESHOLD ((128 * 1024) / PAGE_SIZE)
  42. //
  43. // Minimum number of pages to prefetch per section.
  44. //
  45. #define MINIMUM_READ_LIST_PAGES 1
  46. //
  47. // Read-list structures.
  48. //
  49. typedef struct _RLETYPE {
  50. ULONG_PTR Partial : 1; // This entry is a partial page.
  51. ULONG_PTR NewSubsection : 1; // This entry starts in the next subsection.
  52. ULONG_PTR DontUse : 30;
  53. } RLETYPE;
  54. typedef struct _MI_READ_LIST_ENTRY {
  55. union {
  56. PMMPTE PrototypePte;
  57. RLETYPE e1;
  58. } u1;
  59. } MI_READ_LIST_ENTRY, *PMI_READ_LIST_ENTRY;
  60. #define MI_RLEPROTO_BITS 3
  61. #define MI_RLEPROTO_TO_PROTO(ProtoPte) ((PMMPTE)((ULONG_PTR)ProtoPte & ~MI_RLEPROTO_BITS))
  62. typedef struct _MI_READ_LIST {
  63. PCONTROL_AREA ControlArea;
  64. PFILE_OBJECT FileObject;
  65. ULONG LastPteOffsetReferenced;
  66. //
  67. // Note that entries are chained through the inpage support blocks from
  68. // this listhead. This list is not protected by interlocks because it is
  69. // only accessed by the owning thread. Inpage blocks _ARE_ accessed with
  70. // interlocks when they are inserted or removed from the memory management
  71. // freelists, but by the time they get to this module they are decoupled.
  72. //
  73. SINGLE_LIST_ENTRY InPageSupportHead;
  74. MI_READ_LIST_ENTRY List[ANYSIZE_ARRAY];
  75. } MI_READ_LIST, *PMI_READ_LIST;
  76. VOID
  77. MiPfReleaseSubsectionReferences (
  78. IN PMI_READ_LIST MiReadList
  79. );
  80. VOID
  81. MiPfFreeDummyPage (
  82. IN PMMPFN DummyPagePfn
  83. );
  84. NTSTATUS
  85. MiPfPrepareReadList (
  86. IN PREAD_LIST ReadList,
  87. OUT PMI_READ_LIST *OutMiReadList
  88. );
  89. NTSTATUS
  90. MiPfPutPagesInTransition (
  91. IN PMI_READ_LIST ReadList,
  92. IN OUT PMMPFN *DummyPagePfn
  93. );
  94. VOID
  95. MiPfExecuteReadList (
  96. IN PMI_READ_LIST ReadList
  97. );
  98. VOID
  99. MiPfCompletePrefetchIos (
  100. PMI_READ_LIST ReadList
  101. );
  102. #if DBG
  103. VOID
  104. MiPfDbgDumpReadList (
  105. IN PMI_READ_LIST ReadList
  106. );
  107. VOID
  108. MiRemoveUserPages (
  109. VOID
  110. );
  111. #endif
  112. #ifdef ALLOC_PRAGMA
  113. #pragma alloc_text (PAGE, MmPrefetchPages)
  114. #pragma alloc_text (PAGE, MiPfPrepareReadList)
  115. #pragma alloc_text (PAGE, MiPfExecuteReadList)
  116. #pragma alloc_text (PAGE, MiPfReleaseSubsectionReferences)
  117. #endif
  118. NTSTATUS
  119. MmPrefetchPages (
  120. IN ULONG NumberOfLists,
  121. IN PREAD_LIST *ReadLists
  122. )
  123. /*++
  124. Routine Description:
  125. This routine reads pages described in the read-lists in the optimal fashion.
  126. This is the only externally callable prefetch routine. No component
  127. should use this interface except the cache manager.
  128. Arguments:
  129. NumberOfLists - Supplies the number of read-lists.
  130. ReadLists - Supplies an array of read-lists.
  131. Return Value:
  132. NTSTATUS codes.
  133. Environment:
  134. Kernel mode. PASSIVE_LEVEL.
  135. --*/
  136. {
  137. PMI_READ_LIST *MiReadLists;
  138. PMMPFN DummyPagePfn;
  139. NTSTATUS status;
  140. ULONG i;
  141. LOGICAL ReadBuilt;
  142. LOGICAL ApcNeeded;
  143. PETHREAD CurrentThread;
  144. NTSTATUS CauseOfReadBuildFailures;
  145. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  146. //
  147. // Allocate memory for internal Mi read-lists.
  148. //
  149. MiReadLists = (PMI_READ_LIST *) ExAllocatePoolWithTag (
  150. NonPagedPool,
  151. sizeof (PMI_READ_LIST) * NumberOfLists,
  152. 'lRmM'
  153. );
  154. if (MiReadLists == NULL) {
  155. return STATUS_INSUFFICIENT_RESOURCES;
  156. }
  157. ReadBuilt = FALSE;
  158. CauseOfReadBuildFailures = STATUS_SUCCESS;
  159. //
  160. // Prepare read-lists (determine runs and allocate MDLs).
  161. //
  162. for (i = 0; i < NumberOfLists; i += 1) {
  163. //
  164. // Note any non-null list is referenced by this call so this routine
  165. // must dereference it when done to re-enable dynamic prototype PTEs.
  166. //
  167. status = MiPfPrepareReadList (ReadLists[i], &MiReadLists[i]);
  168. //
  169. // MiPfPrepareReadList never returns half-formed inpage support
  170. // blocks and MDLs. Either nothing is returned, partial lists are
  171. // returned or a complete list is returned. Any non-null list
  172. // can therefore be processed.
  173. //
  174. if (NT_SUCCESS (status)) {
  175. if (MiReadLists[i] != NULL) {
  176. ASSERT (MiReadLists[i]->InPageSupportHead.Next != NULL);
  177. ReadBuilt = TRUE;
  178. }
  179. }
  180. else {
  181. CauseOfReadBuildFailures = status;
  182. }
  183. }
  184. if (ReadBuilt == FALSE) {
  185. //
  186. // No lists were created so nothing further needs to be done.
  187. // CauseOfReadBuildFailures tells us whether this was due to all
  188. // the desired pages already being resident or that resources to
  189. // build the request could not be allocated.
  190. //
  191. ExFreePool (MiReadLists);
  192. if (CauseOfReadBuildFailures != STATUS_SUCCESS) {
  193. return CauseOfReadBuildFailures;
  194. }
  195. //
  196. // All the pages the caller asked for are already resident.
  197. //
  198. return STATUS_SUCCESS;
  199. }
  200. //
  201. // APCs must be disabled once we put a page in transition. Otherwise
  202. // a thread suspend will stop us from issuing the I/O - this will hang
  203. // any other threads that need the same page.
  204. //
  205. CurrentThread = PsGetCurrentThread();
  206. ApcNeeded = FALSE;
  207. ASSERT ((PKTHREAD)CurrentThread == KeGetCurrentThread ());
  208. KeEnterCriticalRegionThread ((PKTHREAD)CurrentThread);
  209. //
  210. // The nested fault count protects this thread from deadlocks where a
  211. // special kernel APC fires and references the same user page(s) we are
  212. // putting in transition.
  213. //
  214. KeEnterGuardedRegionThread (&CurrentThread->Tcb);
  215. ASSERT (CurrentThread->NestedFaultCount == 0);
  216. CurrentThread->NestedFaultCount += 1;
  217. KeLeaveGuardedRegionThread (&CurrentThread->Tcb);
  218. //
  219. // Allocate physical memory.
  220. //
  221. DummyPagePfn = NULL;
  222. ReadBuilt = FALSE;
  223. CauseOfReadBuildFailures = STATUS_SUCCESS;
  224. #if DBG
  225. status = 0xC0033333;
  226. #endif
  227. for (i = 0; i < NumberOfLists; i += 1) {
  228. if ((MiReadLists[i] != NULL) &&
  229. (MiReadLists[i]->InPageSupportHead.Next != NULL)) {
  230. status = MiPfPutPagesInTransition (MiReadLists[i], &DummyPagePfn);
  231. if (NT_SUCCESS (status)) {
  232. if (MiReadLists[i]->InPageSupportHead.Next != NULL) {
  233. ReadBuilt = TRUE;
  234. //
  235. // Issue I/Os.
  236. //
  237. MiPfExecuteReadList (MiReadLists[i]);
  238. }
  239. else {
  240. MiPfReleaseSubsectionReferences (MiReadLists[i]);
  241. ExFreePool (MiReadLists[i]);
  242. MiReadLists[i] = NULL;
  243. }
  244. }
  245. else {
  246. CauseOfReadBuildFailures = status;
  247. //
  248. // If not even a single page is available then don't bother
  249. // trying to prefetch anything else.
  250. //
  251. for (; i < NumberOfLists; i += 1) {
  252. if (MiReadLists[i] != NULL) {
  253. MiPfReleaseSubsectionReferences (MiReadLists[i]);
  254. ExFreePool (MiReadLists[i]);
  255. MiReadLists[i] = NULL;
  256. }
  257. }
  258. break;
  259. }
  260. }
  261. }
  262. //
  263. // At least one call to MiPfPutPagesInTransition was made, which
  264. // sets status properly.
  265. //
  266. ASSERT (status != 0xC0033333);
  267. if (ReadBuilt == TRUE) {
  268. status = STATUS_SUCCESS;
  269. //
  270. // Wait for I/Os to complete. Note APCs must remain disabled.
  271. //
  272. for (i = 0; i < NumberOfLists; i += 1) {
  273. if (MiReadLists[i] != NULL) {
  274. ASSERT (MiReadLists[i]->InPageSupportHead.Next != NULL);
  275. MiPfCompletePrefetchIos (MiReadLists[i]);
  276. MiPfReleaseSubsectionReferences (MiReadLists[i]);
  277. }
  278. }
  279. }
  280. else {
  281. //
  282. // No reads were issued.
  283. //
  284. // CauseOfReadBuildFailures tells us whether this was due to all
  285. // the desired pages already being resident or that resources to
  286. // build the request could not be allocated.
  287. //
  288. status = CauseOfReadBuildFailures;
  289. }
  290. //
  291. // Put DummyPage back on the free list.
  292. //
  293. if (DummyPagePfn != NULL) {
  294. MiPfFreeDummyPage (DummyPagePfn);
  295. }
  296. //
  297. // Only when all the I/Os have been completed (not just issued) can
  298. // APCs be re-enabled. This prevents a user-issued suspend APC from
  299. // keeping a shared page in transition forever.
  300. //
  301. KeEnterGuardedRegionThread (&CurrentThread->Tcb);
  302. ASSERT (CurrentThread->NestedFaultCount == 1);
  303. CurrentThread->NestedFaultCount -= 1;
  304. if (CurrentThread->ApcNeeded == 1) {
  305. ApcNeeded = TRUE;
  306. CurrentThread->ApcNeeded = 0;
  307. }
  308. KeLeaveGuardedRegionThread (&CurrentThread->Tcb);
  309. KeLeaveCriticalRegionThread ((PKTHREAD)CurrentThread);
  310. for (i = 0; i < NumberOfLists; i += 1) {
  311. if (MiReadLists[i] != NULL) {
  312. ExFreePool (MiReadLists[i]);
  313. }
  314. }
  315. ExFreePool (MiReadLists);
  316. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  317. ASSERT (CurrentThread->NestedFaultCount == 0);
  318. ASSERT (CurrentThread->ApcNeeded == 0);
  319. if (ApcNeeded == TRUE) {
  320. IoRetryIrpCompletions ();
  321. }
  322. return status;
  323. }
  324. VOID
  325. MiPfFreeDummyPage (
  326. IN PMMPFN DummyPagePfn
  327. )
  328. /*++
  329. Routine Description:
  330. This nonpaged wrapper routine frees the dummy page PFN.
  331. Arguments:
  332. DummyPagePfn - Supplies the dummy page PFN.
  333. Return Value:
  334. None.
  335. Environment:
  336. Kernel mode.
  337. --*/
  338. {
  339. KIRQL OldIrql;
  340. PFN_NUMBER PageFrameIndex;
  341. PageFrameIndex = MI_PFN_ELEMENT_TO_INDEX (DummyPagePfn);
  342. LOCK_PFN (OldIrql);
  343. ASSERT (DummyPagePfn->u2.ShareCount == 1);
  344. ASSERT (DummyPagePfn->u3.e1.PrototypePte == 0);
  345. ASSERT (DummyPagePfn->OriginalPte.u.Long == MM_DEMAND_ZERO_WRITE_PTE);
  346. ASSERT (DummyPagePfn->u3.e2.ReferenceCount == 2);
  347. MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(DummyPagePfn, 17);
  348. //
  349. // Clear the read in progress bit as this page may never have used for an
  350. // I/O after all. The inpage error bit must also be cleared as any number
  351. // of errors may have occurred during reads of pages (that were immaterial
  352. // anyway).
  353. //
  354. DummyPagePfn->u3.e1.ReadInProgress = 0;
  355. DummyPagePfn->u4.InPageError = 0;
  356. MI_SET_PFN_DELETED (DummyPagePfn);
  357. MiDecrementShareCount (DummyPagePfn, PageFrameIndex);
  358. UNLOCK_PFN (OldIrql);
  359. }
  360. VOID
  361. MiMovePageToEndOfStandbyList (
  362. IN PMMPTE PointerPte
  363. )
  364. /*++
  365. Routine Description:
  366. This nonpaged routine obtains the PFN lock and moves a page to the end of
  367. the standby list (if the page is still in transition).
  368. Arguments:
  369. PointerPte - Supplies the prototype PTE to examine.
  370. Return Value:
  371. None.
  372. Environment:
  373. Kernel mode, PFN lock not held.
  374. --*/
  375. {
  376. KIRQL OldIrql;
  377. PMMPFN Pfn1;
  378. MMPTE PteContents;
  379. PFN_NUMBER PageFrameIndex;
  380. LOCK_PFN (OldIrql);
  381. if (!MiIsAddressValid (PointerPte, TRUE)) {
  382. //
  383. // If the paged pool containing the prototype PTE is not resident
  384. // then the actual page itself may still be transition or not. This
  385. // should be so rare it's not worth making the pool resident so the
  386. // proper checks can be applied. Just bail.
  387. //
  388. UNLOCK_PFN (OldIrql);
  389. return;
  390. }
  391. PteContents = *PointerPte;
  392. if ((PteContents.u.Hard.Valid == 0) &&
  393. (PteContents.u.Soft.Prototype == 0) &&
  394. (PteContents.u.Soft.Transition == 1)) {
  395. PageFrameIndex = MI_GET_PAGE_FRAME_FROM_TRANSITION_PTE (&PteContents);
  396. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  397. //
  398. // The page is still in transition, move it to the end to protect it
  399. // from possible cannibalization. Note that if the page is currently
  400. // being written to disk it will be on the modified list and when the
  401. // write completes it will automatically go to the end of the standby
  402. // list anyway so skip those.
  403. //
  404. if (Pfn1->u3.e1.PageLocation == StandbyPageList) {
  405. MiUnlinkPageFromList (Pfn1);
  406. ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
  407. MiInsertPageInList (&MmStandbyPageListHead, PageFrameIndex);
  408. }
  409. }
  410. UNLOCK_PFN (OldIrql);
  411. }
  412. VOID
  413. MiPfReleaseSubsectionReferences (
  414. IN PMI_READ_LIST MiReadList
  415. )
  416. /*++
  417. Routine Description:
  418. This routine releases reference counts on subsections examined by the
  419. prefetch scanner.
  420. Arguments:
  421. MiReadList - Supplies a read-list entry.
  422. Return Value:
  423. None.
  424. Environment:
  425. Kernel mode, PASSIVE_LEVEL.
  426. --*/
  427. {
  428. PMSUBSECTION MappedSubsection;
  429. PCONTROL_AREA ControlArea;
  430. ASSERT (KeGetCurrentIrql () == PASSIVE_LEVEL);
  431. ControlArea = MiReadList->ControlArea;
  432. ASSERT (ControlArea->u.Flags.PhysicalMemory == 0);
  433. ASSERT (ControlArea->FilePointer != NULL);
  434. //
  435. // Image files don't have dynamic prototype PTEs.
  436. //
  437. if (ControlArea->u.Flags.Image == 1) {
  438. return;
  439. }
  440. ASSERT (ControlArea->u.Flags.GlobalOnlyPerSession == 0);
  441. MappedSubsection = (PMSUBSECTION)(ControlArea + 1);
  442. MiRemoveViewsFromSectionWithPfn (MappedSubsection,
  443. MiReadList->LastPteOffsetReferenced);
  444. }
  445. NTSTATUS
  446. MiPfPrepareReadList (
  447. IN PREAD_LIST ReadList,
  448. OUT PMI_READ_LIST *OutMiReadList
  449. )
  450. /*++
  451. Routine Description:
  452. This routine constructs MDLs that describe the pages in the argument
  453. read-list. The caller will then issue the I/Os on return.
  454. Arguments:
  455. ReadList - Supplies the read-list.
  456. OutMiReadList - Supplies a pointer to receive the Mi readlist.
  457. Return Value:
  458. Various NTSTATUS codes.
  459. If STATUS_SUCCESS is returned, OutMiReadList is set to a pointer to an Mi
  460. readlist to be used for prefetching or NULL if no prefetching is needed.
  461. If OutMireadList is non-NULL (on success only) then the caller must call
  462. MiRemoveViewsFromSectionWithPfn (VeryFirstSubsection, LastPteOffsetReferenced) for data files.
  463. Environment:
  464. Kernel mode, PASSIVE_LEVEL.
  465. --*/
  466. {
  467. ULONG LastPteOffset;
  468. NTSTATUS Status;
  469. MMPTE PteContents;
  470. PMMPTE LocalPrototypePte;
  471. PMMPTE LastPrototypePte;
  472. PMMPTE StartPrototypePte;
  473. PMMPTE EndPrototypePte;
  474. PMI_READ_LIST MiReadList;
  475. PMI_READ_LIST_ENTRY Rle;
  476. PMI_READ_LIST_ENTRY StartRleRun;
  477. PMI_READ_LIST_ENTRY EndRleRun;
  478. PMI_READ_LIST_ENTRY RleMax;
  479. PMI_READ_LIST_ENTRY FirstRleInRun;
  480. PCONTROL_AREA ControlArea;
  481. PSUBSECTION Subsection;
  482. PSUBSECTION PreviousSubsection;
  483. PMSUBSECTION VeryFirstSubsection;
  484. PMSUBSECTION VeryLastSubsection;
  485. UINT64 StartOffset;
  486. LARGE_INTEGER EndQuad;
  487. UINT64 EndOffset;
  488. UINT64 FileOffset;
  489. PMMINPAGE_SUPPORT InPageSupport;
  490. PMDL Mdl;
  491. ULONG i;
  492. PFN_NUMBER NumberOfPages;
  493. UINT64 StartingOffset;
  494. UINT64 TempOffset;
  495. ULONG ReadSize;
  496. ULONG NumberOfEntries;
  497. #if DBG
  498. PPFN_NUMBER Page;
  499. #endif
  500. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  501. *OutMiReadList = NULL;
  502. //
  503. // Create an Mi readlist from the argument Cc readlist.
  504. //
  505. NumberOfEntries = ReadList->NumberOfEntries;
  506. MiReadList = (PMI_READ_LIST) ExAllocatePoolWithTag (
  507. NonPagedPool,
  508. sizeof (MI_READ_LIST) + NumberOfEntries * sizeof (MI_READ_LIST_ENTRY),
  509. 'lRmM');
  510. if (MiReadList == NULL) {
  511. return STATUS_INSUFFICIENT_RESOURCES;
  512. }
  513. //
  514. // Translate the section object into the relevant control area.
  515. //
  516. if (ReadList->IsImage) {
  517. ControlArea = (PCONTROL_AREA)ReadList->FileObject->SectionObjectPointer->ImageSectionObject;
  518. ASSERT (ControlArea != NULL );
  519. ASSERT (ControlArea->u.Flags.Image == 1);
  520. }
  521. else {
  522. ControlArea = (PCONTROL_AREA)ReadList->FileObject->SectionObjectPointer->DataSectionObject;
  523. }
  524. //
  525. // If the section is backed by a ROM, then there's no need to prefetch
  526. // anything as it would waste RAM.
  527. //
  528. if (ControlArea->u.Flags.Rom == 1) {
  529. ExFreePool (MiReadList);
  530. return STATUS_SUCCESS;
  531. }
  532. //
  533. // Make sure the section is really prefetchable - physical and
  534. // pagefile-backed sections are not.
  535. //
  536. if ((ControlArea->u.Flags.PhysicalMemory) ||
  537. (ControlArea->FilePointer == NULL)) {
  538. ExFreePool (MiReadList);
  539. return STATUS_INVALID_PARAMETER_1;
  540. }
  541. //
  542. // Initialize the internal Mi readlist.
  543. //
  544. MiReadList->ControlArea = ControlArea;
  545. MiReadList->FileObject = ReadList->FileObject;
  546. MiReadList->InPageSupportHead.Next = NULL;
  547. RtlZeroMemory (MiReadList->List,
  548. sizeof (MI_READ_LIST_ENTRY) * NumberOfEntries);
  549. //
  550. // Copy pages from the Cc readlists to the internal Mi readlists.
  551. //
  552. NumberOfPages = 0;
  553. FirstRleInRun = NULL;
  554. VeryFirstSubsection = NULL;
  555. VeryLastSubsection = NULL;
  556. LastPteOffset = 0;
  557. if (ControlArea->u.Flags.GlobalOnlyPerSession == 0) {
  558. Subsection = (PSUBSECTION)(ControlArea + 1);
  559. //
  560. // Ensure all prototype PTE bases are valid for all subsections of the
  561. // requested file so the traversal code doesn't have to check
  562. // everywhere. As long as the files are not too large this should
  563. // be a cheap operation.
  564. //
  565. if (ControlArea->u.Flags.Image == 0) {
  566. ASSERT (ControlArea->u.Flags.PhysicalMemory == 0);
  567. ASSERT (ControlArea->FilePointer != NULL);
  568. VeryFirstSubsection = (PMSUBSECTION) Subsection;
  569. VeryLastSubsection = (PMSUBSECTION) Subsection;
  570. do {
  571. //
  572. // A memory barrier is needed to read the subsection chains
  573. // in order to ensure the writes to the actual individual
  574. // subsection data structure fields are visible in correct
  575. // order. This avoids the need to acquire any stronger
  576. // synchronization (ie: PFN lock), thus yielding better
  577. // performance and pagability.
  578. //
  579. KeMemoryBarrier ();
  580. LastPteOffset += VeryLastSubsection->PtesInSubsection;
  581. if (VeryLastSubsection->NextSubsection == NULL) {
  582. break;
  583. }
  584. VeryLastSubsection = (PMSUBSECTION) VeryLastSubsection->NextSubsection;
  585. } while (TRUE);
  586. MiReadList->LastPteOffsetReferenced = LastPteOffset;
  587. Status = MiAddViewsForSectionWithPfn (VeryFirstSubsection,
  588. LastPteOffset);
  589. if (!NT_SUCCESS (Status)) {
  590. ExFreePool (MiReadList);
  591. return Status;
  592. }
  593. }
  594. }
  595. else {
  596. Subsection = (PSUBSECTION)((PLARGE_CONTROL_AREA)ControlArea + 1);
  597. }
  598. StartOffset = (UINT64) MiStartingOffset (Subsection, Subsection->SubsectionBase);
  599. EndQuad = MiEndingOffset (Subsection);
  600. EndOffset = (UINT64)EndQuad.QuadPart;
  601. //
  602. // If the file is bigger than the subsection, truncate the subsection range
  603. // checks.
  604. //
  605. if ((StartOffset & ~(PAGE_SIZE - 1)) + ((UINT64)Subsection->PtesInSubsection << PAGE_SHIFT) < EndOffset) {
  606. EndOffset = (StartOffset & ~(PAGE_SIZE - 1)) + ((UINT64)Subsection->PtesInSubsection << PAGE_SHIFT);
  607. }
  608. TempOffset = EndOffset;
  609. PreviousSubsection = NULL;
  610. LastPrototypePte = NULL;
  611. Rle = MiReadList->List;
  612. #if DBG
  613. if (MiPfDebug & MI_PF_FORCE_PREFETCH) {
  614. MiRemoveUserPages ();
  615. }
  616. //
  617. // Initializing FileOffset is not needed for correctness, but without it
  618. // the compiler cannot compile this code W4 to check for use of
  619. // uninitialized variables.
  620. //
  621. FileOffset = 0;
  622. #endif
  623. for (i = 0; i < NumberOfEntries; i += 1, Rle += 1) {
  624. ASSERT ((i == 0) || (ReadList->List[i].Alignment > FileOffset));
  625. FileOffset = ReadList->List[i].Alignment;
  626. ASSERT (Rle->u1.PrototypePte == NULL);
  627. //
  628. // Calculate which PTE maps the given logical block offset.
  629. //
  630. // Since our caller always passes ordered lists of logical block offsets
  631. // within a given file, always look forwards (as an optimization) in the
  632. // subsection chain.
  633. //
  634. // A quick check is made first to avoid recalculations and loops where
  635. // possible.
  636. //
  637. if ((StartOffset <= FileOffset) && (FileOffset < EndOffset)) {
  638. ASSERT (Subsection->SubsectionBase != NULL);
  639. LocalPrototypePte = Subsection->SubsectionBase +
  640. ((FileOffset - StartOffset) >> PAGE_SHIFT);
  641. ASSERT (TempOffset != 0);
  642. ASSERT (EndOffset != 0);
  643. }
  644. else {
  645. LocalPrototypePte = NULL;
  646. do {
  647. ASSERT (Subsection->SubsectionBase != NULL);
  648. if ((Subsection->StartingSector == 0) &&
  649. (ControlArea->u.Flags.Image == 1) &&
  650. (Subsection->SubsectionBase != ControlArea->Segment->PrototypePte)) {
  651. //
  652. // This is an image that was built with a linker pre-1995
  653. // (version 2.39 is one example) that put bss into a
  654. // separate subsection with zero as a starting file offset
  655. // field in the on-disk image. Ignore any prefetch as it
  656. // would read from the wrong offset trying to satisfy these
  657. // ranges (which are actually demand zero when the fault
  658. // occurs).
  659. //
  660. // This can also happen for an image (built with a current
  661. // linker) that has no initialized data (ie: it's data
  662. // is all bss). Just skip the subsection.
  663. //
  664. Subsection = Subsection->NextSubsection;
  665. continue;
  666. }
  667. StartOffset = (UINT64) MiStartingOffset (Subsection, Subsection->SubsectionBase);
  668. EndQuad = MiEndingOffset (Subsection);
  669. EndOffset = (UINT64)EndQuad.QuadPart;
  670. //
  671. // If the file is bigger than the subsection, truncate the
  672. // subsection range checks.
  673. //
  674. if ((StartOffset & ~(PAGE_SIZE - 1)) + ((UINT64)Subsection->PtesInSubsection << PAGE_SHIFT) < EndOffset) {
  675. EndOffset = (StartOffset & ~(PAGE_SIZE - 1)) + ((UINT64)Subsection->PtesInSubsection << PAGE_SHIFT);
  676. }
  677. //
  678. // Always set TempOffset here even without a match. This is
  679. // because the truncation above may have resulted in skipping
  680. // the last straddling page of a subsection. After that,
  681. // the Subsection is set to Subsection->Next below and we
  682. // loop. Falling to the below again, we'd see that the
  683. // FileOffset is less than the StartOffset of the next
  684. // subsection, so we'd goto SkipPage and then compare the
  685. // next FileOffset which might be a match at the very top of
  686. // the loop. Hence, TempOffset must be right even in this
  687. // case, so set it here unconditionally.
  688. //
  689. TempOffset = EndOffset;
  690. if ((StartOffset <= FileOffset) && (FileOffset < EndOffset)) {
  691. LocalPrototypePte = Subsection->SubsectionBase +
  692. ((FileOffset - StartOffset) >> PAGE_SHIFT);
  693. break;
  694. }
  695. if (FileOffset < StartOffset) {
  696. //
  697. // Skip this page of the prefetch as it must be referring
  698. // to bss in the previous subsection - ie: this makes
  699. // no sense to prefetch as it is all demand zero. Moreover,
  700. // there is no disk block address for these at all !
  701. //
  702. goto SkipPage;
  703. }
  704. if ((VeryLastSubsection != NULL) &&
  705. ((PMSUBSECTION)Subsection == VeryLastSubsection)) {
  706. //
  707. // The requested block is beyond the size the section
  708. // was on entry. Reject it as this subsection is not
  709. // referenced.
  710. //
  711. Subsection = NULL;
  712. break;
  713. }
  714. Subsection = Subsection->NextSubsection;
  715. } while (Subsection != NULL);
  716. }
  717. if ((Subsection == NULL) || (LocalPrototypePte == LastPrototypePte)) {
  718. //
  719. // Illegal offsets are not prefetched. Either the file has
  720. // been replaced since the scenario was logged or Cc is passing
  721. // trash. Either way, this prefetch is over.
  722. //
  723. #if DBG
  724. if (MiPfDebug & MI_PF_PRINT_ERRORS) {
  725. DbgPrint ("MiPfPrepareReadList: Illegal readlist passed %p, %p, %p\n", ReadList, LocalPrototypePte, LastPrototypePte);
  726. }
  727. #endif
  728. if (VeryFirstSubsection != NULL) {
  729. MiRemoveViewsFromSectionWithPfn (VeryFirstSubsection,
  730. LastPteOffset);
  731. }
  732. ExFreePool (MiReadList);
  733. return STATUS_INVALID_PARAMETER_1;
  734. }
  735. PteContents = *LocalPrototypePte;
  736. //
  737. // See if this page needs to be read in. Note that these reads
  738. // are done without the PFN or system cache working set locks.
  739. // This is ok because later before we make the final decision on
  740. // whether to read each page, we'll look again.
  741. // If the page is in tranisition, make the call to (possibly) move
  742. // it to the end of the standby list to prevent cannibalization.
  743. //
  744. if (PteContents.u.Hard.Valid == 1) {
  745. SkipPage:
  746. continue;
  747. }
  748. if (PteContents.u.Soft.Prototype == 0) {
  749. if (PteContents.u.Soft.Transition == 1) {
  750. MiMovePageToEndOfStandbyList (LocalPrototypePte);
  751. }
  752. else {
  753. //
  754. // Demand zero or pagefile-backed, don't prefetch from the
  755. // file or we'd lose the contents. Note this can happen for
  756. // session-space images as we back modified (ie: for relocation
  757. // fixups or IAT updated) portions from the pagefile.
  758. //
  759. NOTHING;
  760. }
  761. continue;
  762. }
  763. Rle->u1.PrototypePte = LocalPrototypePte;
  764. LastPrototypePte = LocalPrototypePte;
  765. //
  766. // Check for partial pages as they require further processing later.
  767. //
  768. StartingOffset = (UINT64) MiStartingOffset (Subsection, LocalPrototypePte);
  769. ASSERT (StartingOffset < TempOffset);
  770. if ((StartingOffset + PAGE_SIZE) > TempOffset) {
  771. Rle->u1.e1.Partial = 1;
  772. }
  773. //
  774. // The NewSubsection marker is used to delimit the beginning of a new
  775. // subsection because RLE chunks must be split to accomodate inpage
  776. // completion so that proper zeroing (based on subsection alignment)
  777. // is done in MiWaitForInPageComplete.
  778. //
  779. if (FirstRleInRun == NULL) {
  780. FirstRleInRun = Rle;
  781. Rle->u1.e1.NewSubsection = 1;
  782. PreviousSubsection = Subsection;
  783. }
  784. else {
  785. if (Subsection != PreviousSubsection) {
  786. Rle->u1.e1.NewSubsection = 1;
  787. PreviousSubsection = Subsection;
  788. }
  789. }
  790. NumberOfPages += 1;
  791. }
  792. //
  793. // If the number of pages to read in is extremely small, don't bother.
  794. //
  795. if (NumberOfPages < MINIMUM_READ_LIST_PAGES) {
  796. if (VeryFirstSubsection != NULL) {
  797. MiRemoveViewsFromSectionWithPfn (VeryFirstSubsection,
  798. LastPteOffset);
  799. }
  800. ExFreePool (MiReadList);
  801. return STATUS_SUCCESS;
  802. }
  803. RleMax = MiReadList->List + NumberOfEntries;
  804. ASSERT (FirstRleInRun != RleMax);
  805. Status = STATUS_SUCCESS;
  806. //
  807. // Walk the readlists to determine runs. Cross-subsection runs are split
  808. // here so the completion code can zero the proper amount for any
  809. // non-aligned files.
  810. //
  811. EndRleRun = NULL;
  812. Rle = FirstRleInRun;
  813. //
  814. // Initializing StartRleRun & EndPrototypePte is not needed for correctness
  815. // but without it the compiler cannot compile this code
  816. // W4 to check for use of uninitialized variables.
  817. //
  818. StartRleRun = NULL;
  819. EndPrototypePte = NULL;
  820. while (Rle < RleMax) {
  821. if (Rle->u1.PrototypePte != NULL) {
  822. if (EndRleRun != NULL) {
  823. StartPrototypePte = MI_RLEPROTO_TO_PROTO(Rle->u1.PrototypePte);
  824. if (StartPrototypePte - EndPrototypePte > SEEK_THRESHOLD) {
  825. Rle -= 1;
  826. goto BuildMdl;
  827. }
  828. }
  829. if (Rle->u1.e1.NewSubsection == 1) {
  830. if (EndRleRun != NULL) {
  831. Rle -= 1;
  832. goto BuildMdl;
  833. }
  834. }
  835. if (EndRleRun == NULL) {
  836. StartRleRun = Rle;
  837. }
  838. EndRleRun = Rle;
  839. EndPrototypePte = MI_RLEPROTO_TO_PROTO(Rle->u1.PrototypePte);
  840. if (Rle->u1.e1.Partial == 1) {
  841. //
  842. // This must be the last RLE in this subsection as it is a
  843. // partial page. Split this run now.
  844. //
  845. goto BuildMdl;
  846. }
  847. }
  848. Rle += 1;
  849. //
  850. // Handle any straggling last run as well.
  851. //
  852. if (Rle == RleMax) {
  853. if (EndRleRun != NULL) {
  854. Rle -= 1;
  855. goto BuildMdl;
  856. }
  857. }
  858. continue;
  859. BuildMdl:
  860. //
  861. // Note no preceding or trailing dummy pages are possible as they are
  862. // trimmed immediately each time when the first real page of a run
  863. // is discovered above.
  864. //
  865. ASSERT (Rle >= StartRleRun);
  866. ASSERT (StartRleRun->u1.PrototypePte != NULL);
  867. ASSERT (EndRleRun->u1.PrototypePte != NULL);
  868. StartPrototypePte = MI_RLEPROTO_TO_PROTO(StartRleRun->u1.PrototypePte);
  869. EndPrototypePte = MI_RLEPROTO_TO_PROTO(EndRleRun->u1.PrototypePte);
  870. NumberOfPages = (EndPrototypePte - StartPrototypePte) + 1;
  871. //
  872. // Allocate and initialize an inpage support block for this run.
  873. //
  874. InPageSupport = MiGetInPageSupportBlock (MM_NOIRQL, &Status);
  875. if (InPageSupport == NULL) {
  876. ASSERT (!NT_SUCCESS (Status));
  877. break;
  878. }
  879. //
  880. // Use the MDL embedded in the inpage support block if it's big enough.
  881. // Otherwise allocate and initialize an MDL for this run.
  882. //
  883. if (NumberOfPages <= MM_MAXIMUM_READ_CLUSTER_SIZE + 1) {
  884. Mdl = &InPageSupport->Mdl;
  885. MmInitializeMdl (Mdl, NULL, NumberOfPages << PAGE_SHIFT);
  886. }
  887. else {
  888. Mdl = MmCreateMdl (NULL, NULL, NumberOfPages << PAGE_SHIFT);
  889. if (Mdl == NULL) {
  890. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  891. #if DBG
  892. InPageSupport->ListEntry.Next = NULL;
  893. #endif
  894. MiFreeInPageSupportBlock (InPageSupport);
  895. Status = STATUS_INSUFFICIENT_RESOURCES;
  896. break;
  897. }
  898. }
  899. #if DBG
  900. if (MiPfDebug & MI_PF_VERBOSE) {
  901. DbgPrint ("MiPfPrepareReadList: Creating INPAGE/MDL %p %p for %x pages\n", InPageSupport, Mdl, NumberOfPages);
  902. }
  903. Page = (PPFN_NUMBER)(Mdl + 1);
  904. *Page = MM_EMPTY_LIST;
  905. #endif
  906. //
  907. // Find the subsection for the start RLE. From this the file offset
  908. // can be derived.
  909. //
  910. ASSERT (StartPrototypePte != NULL);
  911. if (ControlArea->u.Flags.GlobalOnlyPerSession == 0) {
  912. Subsection = (PSUBSECTION)(ControlArea + 1);
  913. }
  914. else {
  915. Subsection = (PSUBSECTION)((PLARGE_CONTROL_AREA)ControlArea + 1);
  916. }
  917. do {
  918. ASSERT (Subsection->SubsectionBase != NULL);
  919. if ((StartPrototypePte >= Subsection->SubsectionBase) &&
  920. (StartPrototypePte < Subsection->SubsectionBase + Subsection->PtesInSubsection)) {
  921. break;
  922. }
  923. Subsection = Subsection->NextSubsection;
  924. } while (Subsection != NULL);
  925. //
  926. // Start the read at the proper file offset.
  927. //
  928. StartingOffset = (UINT64) MiStartingOffset (Subsection,
  929. StartPrototypePte);
  930. InPageSupport->ReadOffset = *((PLARGE_INTEGER)(&StartingOffset));
  931. //
  932. // Since the RLE is not always valid here, only walk the remaining
  933. // subsections for valid partial RLEs as only they need truncation.
  934. //
  935. // Note only image file reads need truncation as the filesystem cannot
  936. // blindly zero the rest of the page for these reads as they are packed
  937. // by memory management on a 512-byte sector basis. Data reads use
  938. // the whole page and the filesystems zero fill any remainder beyond
  939. // valid data length. It is important to specify the entire page where
  940. // possible so the filesystem won't post this which will hurt perf.
  941. //
  942. if ((EndRleRun->u1.e1.Partial == 1) && (ReadList->IsImage)) {
  943. ASSERT ((EndPrototypePte >= Subsection->SubsectionBase) &&
  944. (EndPrototypePte < Subsection->SubsectionBase + Subsection->PtesInSubsection));
  945. //
  946. // The read length for a partial RLE must be truncated correctly.
  947. //
  948. EndQuad = MiEndingOffset(Subsection);
  949. TempOffset = (UINT64)EndQuad.QuadPart;
  950. if ((ULONG)(TempOffset - StartingOffset) <= Mdl->ByteCount) {
  951. ReadSize = (ULONG)(TempOffset - StartingOffset);
  952. //
  953. // Round the offset to a 512-byte offset as this will help
  954. // filesystems optimize the transfer. Note that filesystems
  955. // will always zero fill the remainder between VDL and the
  956. // next 512-byte multiple and we have already zeroed the
  957. // whole page.
  958. //
  959. ReadSize = ((ReadSize + MMSECTOR_MASK) & ~MMSECTOR_MASK);
  960. Mdl->ByteCount = ReadSize;
  961. }
  962. }
  963. //
  964. // Stash these in the inpage block so we can walk it quickly later
  965. // in pass 2.
  966. //
  967. InPageSupport->BasePte = (PMMPTE)StartRleRun;
  968. InPageSupport->FilePointer = (PFILE_OBJECT)EndRleRun;
  969. ASSERT (((ULONG_PTR)Mdl & (sizeof(QUAD) - 1)) == 0);
  970. InPageSupport->u1.e1.PrefetchMdlHighBits = ((ULONG_PTR)Mdl >> 3);
  971. PushEntryList (&MiReadList->InPageSupportHead,
  972. &InPageSupport->ListEntry);
  973. Rle += 1;
  974. EndRleRun = NULL;
  975. }
  976. //
  977. // Check for the entire list being full (or empty).
  978. //
  979. // Status is STATUS_INSUFFICIENT_RESOURCES if an MDL or inpage block
  980. // allocation failed. If any allocations succeeded, then set STATUS_SUCCESS
  981. // as pass2 must occur.
  982. //
  983. if (MiReadList->InPageSupportHead.Next != NULL) {
  984. Status = STATUS_SUCCESS;
  985. }
  986. else {
  987. if (VeryFirstSubsection != NULL) {
  988. MiRemoveViewsFromSectionWithPfn (VeryFirstSubsection, LastPteOffset);
  989. }
  990. ExFreePool (MiReadList);
  991. MiReadList = NULL;
  992. }
  993. //
  994. // Note that a nonzero *OutMiReadList return value means that the caller
  995. // needs to remove the views for the section.
  996. //
  997. *OutMiReadList = MiReadList;
  998. return Status;
  999. }
  1000. NTSTATUS
  1001. MiPfPutPagesInTransition (
  1002. IN PMI_READ_LIST ReadList,
  1003. IN OUT PMMPFN *DummyPagePfn
  1004. )
  1005. /*++
  1006. Routine Description:
  1007. This routine allocates physical memory for the specified read-list and
  1008. puts all the pages in transition. On return the caller must issue I/Os
  1009. for the list not only because of this thread, but also to satisfy
  1010. collided faults from other threads for these same pages.
  1011. Arguments:
  1012. ReadList - Supplies a pointer to the read-list.
  1013. DummyPagePfn - If this points at a NULL pointer, then a dummy page is
  1014. allocated and placed in this pointer. Otherwise this points
  1015. at a PFN to use as a dummy page.
  1016. Return Value:
  1017. STATUS_SUCCESS
  1018. STATUS_INSUFFICIENT_RESOURCES
  1019. Environment:
  1020. Kernel mode. PASSIVE_LEVEL.
  1021. --*/
  1022. {
  1023. PVOID StartingVa;
  1024. PFN_NUMBER MdlPages;
  1025. KIRQL OldIrql;
  1026. MMPTE PteContents;
  1027. PMMPTE RlePrototypePte;
  1028. PMMPTE FirstRlePrototypeInRun;
  1029. PFN_NUMBER PageFrameIndex;
  1030. PPFN_NUMBER Page;
  1031. PPFN_NUMBER DestinationPage;
  1032. ULONG PageColor;
  1033. PMI_READ_LIST_ENTRY Rle;
  1034. PMI_READ_LIST_ENTRY RleMax;
  1035. PMI_READ_LIST_ENTRY FirstRleInRun;
  1036. PFN_NUMBER DummyPage;
  1037. PMDL Mdl;
  1038. PMDL FreeMdl;
  1039. PMMPFN PfnProto;
  1040. PMMPFN Pfn1;
  1041. PMMPFN DummyPfn1;
  1042. ULONG i;
  1043. PFN_NUMBER DummyTrim;
  1044. PFN_NUMBER DummyReferences;
  1045. ULONG NumberOfPages;
  1046. MMPTE TempPte;
  1047. PMMPTE PointerPde;
  1048. PEPROCESS CurrentProcess;
  1049. PSINGLE_LIST_ENTRY PrevEntry;
  1050. PSINGLE_LIST_ENTRY NextEntry;
  1051. PMMINPAGE_SUPPORT InPageSupport;
  1052. SINGLE_LIST_ENTRY ReversedInPageSupportHead;
  1053. LOGICAL Waited;
  1054. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  1055. //
  1056. // Reverse the singly linked list of inpage support blocks so the
  1057. // blocks are read in the same order requested for better performance
  1058. // (ie: keep the disk heads seeking in the same direction).
  1059. //
  1060. ReversedInPageSupportHead.Next = NULL;
  1061. do {
  1062. NextEntry = PopEntryList (&ReadList->InPageSupportHead);
  1063. if (NextEntry == NULL) {
  1064. break;
  1065. }
  1066. PushEntryList (&ReversedInPageSupportHead, NextEntry);
  1067. } while (TRUE);
  1068. ASSERT (ReversedInPageSupportHead.Next != NULL);
  1069. ReadList->InPageSupportHead.Next = ReversedInPageSupportHead.Next;
  1070. DummyReferences = 0;
  1071. FreeMdl = NULL;
  1072. CurrentProcess = PsGetCurrentProcess();
  1073. PfnProto = NULL;
  1074. PointerPde = NULL;
  1075. //
  1076. // Allocate a dummy page that will map discarded pages that aren't skipped.
  1077. // Do it only if it's not already allocated.
  1078. //
  1079. if (*DummyPagePfn == NULL) {
  1080. LOCK_PFN (OldIrql);
  1081. //
  1082. // Do a quick sanity check to avoid doing unnecessary work.
  1083. //
  1084. if ((MmAvailablePages < MM_HIGH_LIMIT) ||
  1085. (MI_NONPAGABLE_MEMORY_AVAILABLE() < MM_HIGH_LIMIT)) {
  1086. UNLOCK_PFN (OldIrql);
  1087. do {
  1088. NextEntry = PopEntryList(&ReadList->InPageSupportHead);
  1089. if (NextEntry == NULL) {
  1090. break;
  1091. }
  1092. InPageSupport = CONTAINING_RECORD(NextEntry,
  1093. MMINPAGE_SUPPORT,
  1094. ListEntry);
  1095. #if DBG
  1096. InPageSupport->ListEntry.Next = NULL;
  1097. #endif
  1098. MiFreeInPageSupportBlock (InPageSupport);
  1099. } while (TRUE);
  1100. return STATUS_INSUFFICIENT_RESOURCES;
  1101. }
  1102. DummyPage = MiRemoveAnyPage (0);
  1103. Pfn1 = MI_PFN_ELEMENT (DummyPage);
  1104. ASSERT (Pfn1->u2.ShareCount == 0);
  1105. ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
  1106. MiInitializePfnForOtherProcess (DummyPage, MI_PF_DUMMY_PAGE_PTE, 0);
  1107. //
  1108. // Give the page a containing frame so MiIdentifyPfn won't crash.
  1109. //
  1110. Pfn1->u4.PteFrame = PsInitialSystemProcess->Pcb.DirectoryTableBase[0] >> PAGE_SHIFT;
  1111. //
  1112. // Always bias the reference count by 1 and charge for this locked page
  1113. // up front so the myriad increments and decrements don't get slowed
  1114. // down with needless checking.
  1115. //
  1116. Pfn1->u3.e1.PrototypePte = 0;
  1117. MI_ADD_LOCKED_PAGE_CHARGE(Pfn1, TRUE, 11);
  1118. Pfn1->u3.e2.ReferenceCount += 1;
  1119. Pfn1->u3.e1.ReadInProgress = 1;
  1120. UNLOCK_PFN (OldIrql);
  1121. *DummyPagePfn = Pfn1;
  1122. }
  1123. else {
  1124. Pfn1 = *DummyPagePfn;
  1125. DummyPage = MI_PFN_ELEMENT_TO_INDEX (Pfn1);
  1126. }
  1127. DummyPfn1 = Pfn1;
  1128. PrevEntry = NULL;
  1129. NextEntry = ReadList->InPageSupportHead.Next;
  1130. while (NextEntry != NULL) {
  1131. InPageSupport = CONTAINING_RECORD (NextEntry,
  1132. MMINPAGE_SUPPORT,
  1133. ListEntry);
  1134. Rle = (PMI_READ_LIST_ENTRY) InPageSupport->BasePte;
  1135. RleMax = (PMI_READ_LIST_ENTRY) InPageSupport->FilePointer;
  1136. ASSERT (Rle->u1.PrototypePte != NULL);
  1137. ASSERT (RleMax->u1.PrototypePte != NULL);
  1138. //
  1139. // Properly initialize the inpage support block fields we overloaded.
  1140. //
  1141. InPageSupport->BasePte = MI_RLEPROTO_TO_PROTO (Rle->u1.PrototypePte);
  1142. InPageSupport->FilePointer = ReadList->FileObject;
  1143. FirstRleInRun = Rle;
  1144. FirstRlePrototypeInRun = MI_RLEPROTO_TO_PROTO (Rle->u1.PrototypePte);
  1145. RleMax += 1;
  1146. Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport);
  1147. Page = (PPFN_NUMBER)(Mdl + 1);
  1148. StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
  1149. MdlPages = ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
  1150. Mdl->ByteCount);
  1151. //
  1152. // Default the MDL entry to the dummy page as the RLE PTEs may
  1153. // be noncontiguous and we have no way to distinguish the jumps.
  1154. //
  1155. for (i = 0; i < MdlPages; i += 1) {
  1156. *Page = DummyPage;
  1157. Page += 1;
  1158. }
  1159. DummyReferences += MdlPages;
  1160. if (DummyPfn1->u3.e2.ReferenceCount + MdlPages >= MAXUSHORT) {
  1161. //
  1162. // The USHORT ReferenceCount wrapped.
  1163. //
  1164. // Dequeue all remaining inpage blocks.
  1165. //
  1166. if (PrevEntry != NULL) {
  1167. PrevEntry->Next = NULL;
  1168. }
  1169. else {
  1170. ReadList->InPageSupportHead.Next = NULL;
  1171. }
  1172. do {
  1173. InPageSupport = CONTAINING_RECORD(NextEntry,
  1174. MMINPAGE_SUPPORT,
  1175. ListEntry);
  1176. #if DBG
  1177. InPageSupport->ListEntry.Next = NULL;
  1178. #endif
  1179. NextEntry = NextEntry->Next;
  1180. MiFreeInPageSupportBlock (InPageSupport);
  1181. } while (NextEntry != NULL);
  1182. break;
  1183. }
  1184. NumberOfPages = 0;
  1185. Waited = FALSE;
  1186. //
  1187. // Build the proper InPageSupport and MDL to describe this run.
  1188. //
  1189. LOCK_PFN (OldIrql);
  1190. DummyPfn1->u3.e2.ReferenceCount =
  1191. (USHORT)(DummyPfn1->u3.e2.ReferenceCount + MdlPages);
  1192. for (; Rle < RleMax; Rle += 1) {
  1193. //
  1194. // Fill the MDL entry for this RLE.
  1195. //
  1196. RlePrototypePte = MI_RLEPROTO_TO_PROTO (Rle->u1.PrototypePte);
  1197. if (RlePrototypePte == NULL) {
  1198. continue;
  1199. }
  1200. //
  1201. // The RlePrototypePte better be inside a prototype PTE allocation
  1202. // so that subsequent page trims update the correct PTEs.
  1203. //
  1204. ASSERT (((RlePrototypePte >= (PMMPTE)MmPagedPoolStart) &&
  1205. (RlePrototypePte <= (PMMPTE)MmPagedPoolEnd)) ||
  1206. ((RlePrototypePte >= (PMMPTE)MmSpecialPoolStart) && (RlePrototypePte <= (PMMPTE)MmSpecialPoolEnd)));
  1207. //
  1208. // This is a page that our first pass which ran lock-free decided
  1209. // needed to be read. Here this must be rechecked as the page
  1210. // state could have changed. Note this check is final as the
  1211. // PFN lock is held. The PTE must be put in transition with
  1212. // read in progress before the PFN lock is released.
  1213. //
  1214. //
  1215. // Lock page containing prototype PTEs in memory by
  1216. // incrementing the reference count for the page.
  1217. // Unlock any page locked earlier containing prototype PTEs if
  1218. // the containing page is not the same for both.
  1219. //
  1220. if (PfnProto != NULL) {
  1221. if (PointerPde != MiGetPteAddress (RlePrototypePte)) {
  1222. ASSERT (PfnProto->u3.e2.ReferenceCount > 1);
  1223. MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(PfnProto, 5);
  1224. PfnProto = NULL;
  1225. }
  1226. }
  1227. if (PfnProto == NULL) {
  1228. ASSERT (!MI_IS_PHYSICAL_ADDRESS (RlePrototypePte));
  1229. PointerPde = MiGetPteAddress (RlePrototypePte);
  1230. if (PointerPde->u.Hard.Valid == 0) {
  1231. //
  1232. // Set Waited to TRUE if we ever release the PFN lock as
  1233. // that means a release path below must factor this in.
  1234. //
  1235. if (MiMakeSystemAddressValidPfn (RlePrototypePte, OldIrql) == TRUE) {
  1236. Waited = TRUE;
  1237. }
  1238. }
  1239. PfnProto = MI_PFN_ELEMENT (PointerPde->u.Hard.PageFrameNumber);
  1240. MI_ADD_LOCKED_PAGE_CHARGE(PfnProto, TRUE, 4);
  1241. PfnProto->u3.e2.ReferenceCount += 1;
  1242. ASSERT (PfnProto->u3.e2.ReferenceCount > 1);
  1243. }
  1244. PteContents = *(RlePrototypePte);
  1245. if (PteContents.u.Hard.Valid == 1) {
  1246. //
  1247. // The page has become resident since the last pass. Don't
  1248. // include it.
  1249. //
  1250. NOTHING;
  1251. }
  1252. else if (PteContents.u.Soft.Prototype == 0) {
  1253. //
  1254. // The page is either in transition (so don't prefetch it).
  1255. //
  1256. // - OR -
  1257. //
  1258. // it is now pagefile (or demand zero) backed - in which case
  1259. // prefetching it from the file here would cause us to lose
  1260. // the contents. Note this can happen for session-space images
  1261. // as we back modified (ie: for relocation fixups or IAT
  1262. // updated) portions from the pagefile.
  1263. //
  1264. NOTHING;
  1265. }
  1266. else if ((MmAvailablePages >= MM_HIGH_LIMIT) &&
  1267. (MI_NONPAGABLE_MEMORY_AVAILABLE() >= MM_HIGH_LIMIT)) {
  1268. NumberOfPages += 1;
  1269. //
  1270. // Allocate a physical page.
  1271. //
  1272. PageColor = MI_PAGE_COLOR_VA_PROCESS (
  1273. MiGetVirtualAddressMappedByPte (RlePrototypePte),
  1274. &CurrentProcess->NextPageColor
  1275. );
  1276. if (Rle->u1.e1.Partial == 1) {
  1277. //
  1278. // This read crosses the end of a subsection, get a zeroed
  1279. // page and correct the read size.
  1280. //
  1281. PageFrameIndex = MiRemoveZeroPage (PageColor);
  1282. }
  1283. else {
  1284. PageFrameIndex = MiRemoveAnyPage (PageColor);
  1285. }
  1286. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  1287. ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
  1288. ASSERT (Pfn1->u2.ShareCount == 0);
  1289. ASSERT (RlePrototypePte->u.Hard.Valid == 0);
  1290. //
  1291. // Initialize read-in-progress PFN.
  1292. //
  1293. MiInitializePfn (PageFrameIndex, RlePrototypePte, 0);
  1294. //
  1295. // These pieces of MiInitializePfn initialization are overridden
  1296. // here as these pages are only going into prototype
  1297. // transition and not into any page tables.
  1298. //
  1299. Pfn1->u3.e1.PrototypePte = 1;
  1300. MI_ADD_LOCKED_PAGE_CHARGE(Pfn1, TRUE, 38);
  1301. Pfn1->u2.ShareCount -= 1;
  1302. Pfn1->u3.e1.PageLocation = ZeroedPageList;
  1303. //
  1304. // Initialize the I/O specific fields.
  1305. //
  1306. ASSERT (FirstRleInRun->u1.PrototypePte != NULL);
  1307. Pfn1->u1.Event = &InPageSupport->Event;
  1308. Pfn1->u3.e1.ReadInProgress = 1;
  1309. ASSERT (Pfn1->u4.InPageError == 0);
  1310. //
  1311. // Increment the PFN reference count in the control area for
  1312. // the subsection.
  1313. //
  1314. ReadList->ControlArea->NumberOfPfnReferences += 1;
  1315. //
  1316. // Put the PTE into the transition state.
  1317. // No TB flush needed as the PTE is still not valid.
  1318. //
  1319. MI_MAKE_TRANSITION_PTE (TempPte,
  1320. PageFrameIndex,
  1321. RlePrototypePte->u.Soft.Protection,
  1322. RlePrototypePte);
  1323. MI_WRITE_INVALID_PTE (RlePrototypePte, TempPte);
  1324. Page = (PPFN_NUMBER)(Mdl + 1);
  1325. ASSERT ((ULONG)(RlePrototypePte - FirstRlePrototypeInRun) < MdlPages);
  1326. *(Page + (RlePrototypePte - FirstRlePrototypeInRun)) = PageFrameIndex;
  1327. }
  1328. else {
  1329. //
  1330. // Failed allocation - this concludes prefetching for this run.
  1331. //
  1332. break;
  1333. }
  1334. }
  1335. //
  1336. // If all the pages were resident, dereference the dummy page references
  1337. // now and notify our caller that I/Os are not necessary. Note that
  1338. // STATUS_SUCCESS must still be returned so our caller knows to continue
  1339. // on to the next readlist.
  1340. //
  1341. if (NumberOfPages == 0) {
  1342. ASSERT (DummyPfn1->u3.e2.ReferenceCount > MdlPages);
  1343. DummyPfn1->u3.e2.ReferenceCount =
  1344. (USHORT)(DummyPfn1->u3.e2.ReferenceCount - MdlPages);
  1345. UNLOCK_PFN (OldIrql);
  1346. if (PrevEntry != NULL) {
  1347. PrevEntry->Next = NextEntry->Next;
  1348. }
  1349. else {
  1350. ReadList->InPageSupportHead.Next = NextEntry->Next;
  1351. }
  1352. NextEntry = NextEntry->Next;
  1353. #if DBG
  1354. InPageSupport->ListEntry.Next = NULL;
  1355. #endif
  1356. MiFreeInPageSupportBlock (InPageSupport);
  1357. continue;
  1358. }
  1359. //
  1360. // Carefully trim leading dummy pages.
  1361. //
  1362. Page = (PPFN_NUMBER)(Mdl + 1);
  1363. DummyTrim = 0;
  1364. for (i = 0; i < MdlPages - 1; i += 1) {
  1365. if (*Page == DummyPage) {
  1366. DummyTrim += 1;
  1367. Page += 1;
  1368. }
  1369. else {
  1370. break;
  1371. }
  1372. }
  1373. if (DummyTrim != 0) {
  1374. Mdl->Size =
  1375. (USHORT)(Mdl->Size - (DummyTrim * sizeof(PFN_NUMBER)));
  1376. Mdl->ByteCount -= (ULONG)(DummyTrim * PAGE_SIZE);
  1377. ASSERT (Mdl->ByteCount != 0);
  1378. InPageSupport->ReadOffset.QuadPart += (DummyTrim * PAGE_SIZE);
  1379. DummyPfn1->u3.e2.ReferenceCount =
  1380. (USHORT)(DummyPfn1->u3.e2.ReferenceCount - DummyTrim);
  1381. //
  1382. // Shuffle down the PFNs in the MDL.
  1383. // Recalculate BasePte to adjust for the shuffle.
  1384. //
  1385. Pfn1 = MI_PFN_ELEMENT (*Page);
  1386. ASSERT (Pfn1->PteAddress->u.Hard.Valid == 0);
  1387. ASSERT ((Pfn1->PteAddress->u.Soft.Prototype == 0) &&
  1388. (Pfn1->PteAddress->u.Soft.Transition == 1));
  1389. InPageSupport->BasePte = Pfn1->PteAddress;
  1390. DestinationPage = (PPFN_NUMBER)(Mdl + 1);
  1391. do {
  1392. *DestinationPage = *Page;
  1393. DestinationPage += 1;
  1394. Page += 1;
  1395. i += 1;
  1396. } while (i < MdlPages);
  1397. MdlPages -= DummyTrim;
  1398. }
  1399. //
  1400. // Carefully trim trailing dummy pages.
  1401. //
  1402. StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
  1403. MdlPages = ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
  1404. Mdl->ByteCount);
  1405. ASSERT (MdlPages != 0);
  1406. Page = (PPFN_NUMBER)(Mdl + 1) + MdlPages - 1;
  1407. if (*Page == DummyPage) {
  1408. ASSERT (MdlPages >= 2);
  1409. //
  1410. // Trim the last page specially as it may be a partial page.
  1411. //
  1412. Mdl->Size -= sizeof(PFN_NUMBER);
  1413. if (BYTE_OFFSET(Mdl->ByteCount) != 0) {
  1414. Mdl->ByteCount &= ~(PAGE_SIZE - 1);
  1415. }
  1416. else {
  1417. Mdl->ByteCount -= PAGE_SIZE;
  1418. }
  1419. ASSERT (Mdl->ByteCount != 0);
  1420. DummyPfn1->u3.e2.ReferenceCount -= 1;
  1421. //
  1422. // Now trim any other trailing pages.
  1423. //
  1424. Page -= 1;
  1425. DummyTrim = 0;
  1426. while (Page != ((PPFN_NUMBER)(Mdl + 1))) {
  1427. if (*Page != DummyPage) {
  1428. break;
  1429. }
  1430. DummyTrim += 1;
  1431. Page -= 1;
  1432. }
  1433. if (DummyTrim != 0) {
  1434. ASSERT (Mdl->Size > (USHORT)(DummyTrim * sizeof(PFN_NUMBER)));
  1435. Mdl->Size =
  1436. (USHORT)(Mdl->Size - (DummyTrim * sizeof(PFN_NUMBER)));
  1437. Mdl->ByteCount -= (ULONG)(DummyTrim * PAGE_SIZE);
  1438. DummyPfn1->u3.e2.ReferenceCount =
  1439. (USHORT)(DummyPfn1->u3.e2.ReferenceCount - DummyTrim);
  1440. }
  1441. ASSERT (MdlPages > DummyTrim + 1);
  1442. MdlPages -= (DummyTrim + 1);
  1443. #if DBG
  1444. StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
  1445. ASSERT (MdlPages == ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
  1446. Mdl->ByteCount));
  1447. #endif
  1448. }
  1449. //
  1450. // If the MDL is not already embedded in the inpage block, see if its
  1451. // final size qualifies it - if so, embed it now.
  1452. //
  1453. if ((Mdl != &InPageSupport->Mdl) &&
  1454. (Mdl->ByteCount <= (MM_MAXIMUM_READ_CLUSTER_SIZE + 1) * PAGE_SIZE)){
  1455. #if DBG
  1456. RtlFillMemoryUlong (&InPageSupport->Page[0],
  1457. (MM_MAXIMUM_READ_CLUSTER_SIZE+1) * sizeof (PFN_NUMBER),
  1458. 0xf1f1f1f1);
  1459. #endif
  1460. RtlCopyMemory (&InPageSupport->Mdl, Mdl, Mdl->Size);
  1461. Mdl->Next = FreeMdl;
  1462. FreeMdl = Mdl;
  1463. Mdl = &InPageSupport->Mdl;
  1464. ASSERT (((ULONG_PTR)Mdl & (sizeof(QUAD) - 1)) == 0);
  1465. InPageSupport->u1.e1.PrefetchMdlHighBits = ((ULONG_PTR)Mdl >> 3);
  1466. }
  1467. //
  1468. // If the MDL contains a large number of dummy pages to real pages
  1469. // then just discard it. Only check large MDLs as embedded ones are
  1470. // always worth the I/O.
  1471. //
  1472. // The PFN lock may have been released above during the
  1473. // MiMakeSystemAddressValidPfn call. If so, other threads may
  1474. // have collided on the pages in the prefetch MDL and if so,
  1475. // this I/O must be issued regardless of the inefficiency of
  1476. // dummy pages within it. Otherwise the other threads will
  1477. // hang in limbo forever.
  1478. //
  1479. ASSERT (MdlPages != 0);
  1480. #if DBG
  1481. StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
  1482. ASSERT (MdlPages == ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
  1483. Mdl->ByteCount));
  1484. #endif
  1485. if ((Mdl != &InPageSupport->Mdl) &&
  1486. (Waited == FALSE) &&
  1487. ((MdlPages - NumberOfPages) / DUMMY_RATIO >= NumberOfPages)) {
  1488. if (PrevEntry != NULL) {
  1489. PrevEntry->Next = NextEntry->Next;
  1490. }
  1491. else {
  1492. ReadList->InPageSupportHead.Next = NextEntry->Next;
  1493. }
  1494. NextEntry = NextEntry->Next;
  1495. ASSERT (MI_EXTRACT_PREFETCH_MDL(InPageSupport) == Mdl);
  1496. //
  1497. // Note the pages are individually freed here (rather than just
  1498. // "completing" the I/O with an error) as the PFN lock has
  1499. // never been released since the pages were put in transition.
  1500. // So no collisions on these pages are possible.
  1501. //
  1502. ASSERT (InPageSupport->WaitCount == 1);
  1503. Page = (PPFN_NUMBER)(Mdl + 1) + MdlPages - 1;
  1504. do {
  1505. if (*Page != DummyPage) {
  1506. Pfn1 = MI_PFN_ELEMENT (*Page);
  1507. ASSERT (Pfn1->PteAddress->u.Hard.Valid == 0);
  1508. ASSERT ((Pfn1->PteAddress->u.Soft.Prototype == 0) &&
  1509. (Pfn1->PteAddress->u.Soft.Transition == 1));
  1510. ASSERT (Pfn1->u3.e1.ReadInProgress == 1);
  1511. ASSERT (Pfn1->u3.e1.PrototypePte == 1);
  1512. ASSERT (Pfn1->u3.e2.ReferenceCount == 1);
  1513. ASSERT (Pfn1->u2.ShareCount == 0);
  1514. Pfn1->u3.e1.PageLocation = StandbyPageList;
  1515. Pfn1->u3.e1.ReadInProgress = 0;
  1516. MiRestoreTransitionPte (Pfn1);
  1517. MI_SET_PFN_DELETED (Pfn1);
  1518. MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(Pfn1, 39);
  1519. }
  1520. Page -= 1;
  1521. } while (Page >= (PPFN_NUMBER)(Mdl + 1));
  1522. ASSERT (InPageSupport->WaitCount == 1);
  1523. ASSERT (DummyPfn1->u3.e2.ReferenceCount > MdlPages);
  1524. DummyPfn1->u3.e2.ReferenceCount =
  1525. (USHORT)(DummyPfn1->u3.e2.ReferenceCount - MdlPages);
  1526. UNLOCK_PFN (OldIrql);
  1527. #if DBG
  1528. InPageSupport->ListEntry.Next = NULL;
  1529. #endif
  1530. MiFreeInPageSupportBlock (InPageSupport);
  1531. continue;
  1532. }
  1533. #if DBG
  1534. MiPfDbgDumpReadList (ReadList);
  1535. #endif
  1536. ASSERT ((USHORT)Mdl->Size - sizeof(MDL) == BYTES_TO_PAGES(Mdl->ByteCount) * sizeof(PFN_NUMBER));
  1537. DummyPfn1->u3.e2.ReferenceCount =
  1538. (USHORT)(DummyPfn1->u3.e2.ReferenceCount - NumberOfPages);
  1539. UNLOCK_PFN (OldIrql);
  1540. InterlockedIncrement ((PLONG) &MmInfoCounters.PageReadIoCount);
  1541. InterlockedExchangeAdd ((PLONG) &MmInfoCounters.PageReadCount,
  1542. (LONG) NumberOfPages);
  1543. //
  1544. // March on to the next run and its InPageSupport and MDL.
  1545. //
  1546. PrevEntry = NextEntry;
  1547. NextEntry = NextEntry->Next;
  1548. }
  1549. //
  1550. // Unlock page containing prototype PTEs.
  1551. //
  1552. if (PfnProto != NULL) {
  1553. LOCK_PFN (OldIrql);
  1554. ASSERT (PfnProto->u3.e2.ReferenceCount > 1);
  1555. MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(PfnProto, 5);
  1556. UNLOCK_PFN (OldIrql);
  1557. }
  1558. #if DBG
  1559. if (MiPfDebug & MI_PF_DELAY) {
  1560. //
  1561. // This delay provides a window to increase the chance of collided
  1562. // faults.
  1563. //
  1564. KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmHalfSecond);
  1565. }
  1566. #endif
  1567. //
  1568. // Free any collapsed MDLs that are no longer needed.
  1569. //
  1570. while (FreeMdl != NULL) {
  1571. Mdl = FreeMdl->Next;
  1572. ExFreePool (FreeMdl);
  1573. FreeMdl = Mdl;
  1574. }
  1575. return STATUS_SUCCESS;
  1576. }
  1577. VOID
  1578. MiPfExecuteReadList (
  1579. IN PMI_READ_LIST ReadList
  1580. )
  1581. /*++
  1582. Routine Description:
  1583. This routine executes the read list by issuing paging I/Os for all
  1584. runs described in the read-list.
  1585. Arguments:
  1586. ReadList - Pointer to the read-list.
  1587. Return Value:
  1588. None.
  1589. Environment:
  1590. Kernel mode, PASSIVE_LEVEL.
  1591. --*/
  1592. {
  1593. PMDL Mdl;
  1594. NTSTATUS status;
  1595. PMMPFN Pfn1;
  1596. PMMPTE LocalPrototypePte;
  1597. PFN_NUMBER PageFrameIndex;
  1598. PSINGLE_LIST_ENTRY NextEntry;
  1599. PMMINPAGE_SUPPORT InPageSupport;
  1600. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  1601. NextEntry = ReadList->InPageSupportHead.Next;
  1602. while (NextEntry != NULL) {
  1603. InPageSupport = CONTAINING_RECORD(NextEntry,
  1604. MMINPAGE_SUPPORT,
  1605. ListEntry);
  1606. //
  1607. // Initialize the prefetch MDL.
  1608. //
  1609. Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport);
  1610. ASSERT ((Mdl->MdlFlags & MDL_MAPPED_TO_SYSTEM_VA) == 0);
  1611. Mdl->MdlFlags |= (MDL_PAGES_LOCKED | MDL_IO_PAGE_READ);
  1612. ASSERT (InPageSupport->u1.e1.Completed == 0);
  1613. ASSERT (InPageSupport->Thread == PsGetCurrentThread());
  1614. ASSERT64 (InPageSupport->UsedPageTableEntries == 0);
  1615. ASSERT (InPageSupport->WaitCount >= 1);
  1616. ASSERT (InPageSupport->u1.e1.PrefetchMdlHighBits != 0);
  1617. //
  1618. // Initialize the inpage support block fields we overloaded.
  1619. //
  1620. ASSERT (InPageSupport->FilePointer == ReadList->FileObject);
  1621. LocalPrototypePte = InPageSupport->BasePte;
  1622. ASSERT (LocalPrototypePte->u.Hard.Valid == 0);
  1623. ASSERT ((LocalPrototypePte->u.Soft.Prototype == 0) &&
  1624. (LocalPrototypePte->u.Soft.Transition == 1));
  1625. PageFrameIndex = MI_GET_PAGE_FRAME_FROM_TRANSITION_PTE(LocalPrototypePte);
  1626. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  1627. InPageSupport->Pfn = Pfn1;
  1628. status = IoAsynchronousPageRead (InPageSupport->FilePointer,
  1629. Mdl,
  1630. &InPageSupport->ReadOffset,
  1631. &InPageSupport->Event,
  1632. &InPageSupport->IoStatus);
  1633. if (!NT_SUCCESS (status)) {
  1634. //
  1635. // Set the event as the I/O system doesn't set it on errors.
  1636. //
  1637. InPageSupport->IoStatus.Status = status;
  1638. InPageSupport->IoStatus.Information = 0;
  1639. KeSetEvent (&InPageSupport->Event, 0, FALSE);
  1640. }
  1641. NextEntry = NextEntry->Next;
  1642. }
  1643. #if DBG
  1644. if (MiPfDebug & MI_PF_DELAY) {
  1645. //
  1646. // This delay provides a window to increase the chance of collided
  1647. // faults.
  1648. //
  1649. KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmHalfSecond);
  1650. }
  1651. #endif
  1652. }
  1653. VOID
  1654. MiPfCompletePrefetchIos (
  1655. IN PMI_READ_LIST ReadList
  1656. )
  1657. /*++
  1658. Routine Description:
  1659. This routine waits for a series of page reads to complete
  1660. and completes the requests.
  1661. Arguments:
  1662. ReadList - Pointer to the read-list.
  1663. Return Value:
  1664. None.
  1665. Environment:
  1666. Kernel mode, PASSIVE_LEVEL.
  1667. --*/
  1668. {
  1669. PMDL Mdl;
  1670. PMMPFN Pfn1;
  1671. PMMPFN PfnClusterPage;
  1672. PPFN_NUMBER Page;
  1673. NTSTATUS status;
  1674. LONG NumberOfBytes;
  1675. PMMINPAGE_SUPPORT InPageSupport;
  1676. PSINGLE_LIST_ENTRY NextEntry;
  1677. extern ULONG MmFrontOfList;
  1678. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  1679. do {
  1680. NextEntry = PopEntryList(&ReadList->InPageSupportHead);
  1681. if (NextEntry == NULL) {
  1682. break;
  1683. }
  1684. InPageSupport = CONTAINING_RECORD(NextEntry,
  1685. MMINPAGE_SUPPORT,
  1686. ListEntry);
  1687. ASSERT (InPageSupport->Pfn != 0);
  1688. Pfn1 = InPageSupport->Pfn;
  1689. Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport);
  1690. Page = (PPFN_NUMBER)(Mdl + 1);
  1691. status = MiWaitForInPageComplete (InPageSupport->Pfn,
  1692. InPageSupport->BasePte,
  1693. NULL,
  1694. InPageSupport->BasePte,
  1695. InPageSupport,
  1696. PREFETCH_PROCESS);
  1697. //
  1698. // MiWaitForInPageComplete RETURNS WITH THE PFN LOCK HELD!!!
  1699. //
  1700. //
  1701. // If we are prefetching for boot, insert prefetched pages to the front
  1702. // of the list. Otherwise the pages prefetched first end up susceptible
  1703. // at the front of the list as we prefetch more. We prefetch pages in
  1704. // the order they will be used. When there is a spike in memory usage
  1705. // and there is no free memory, we lose these pages before we can
  1706. // get cache-hits on them. Thus boot gets ahead and starts discarding
  1707. // prefetched pages that it could use just a little later.
  1708. //
  1709. if (CCPF_IS_PREFETCHING_FOR_BOOT()) {
  1710. MmFrontOfList = TRUE;
  1711. }
  1712. NumberOfBytes = (LONG)Mdl->ByteCount;
  1713. while (NumberOfBytes > 0) {
  1714. //
  1715. // Decrement all reference counts.
  1716. //
  1717. PfnClusterPage = MI_PFN_ELEMENT (*Page);
  1718. #if DBG
  1719. if (PfnClusterPage->u4.InPageError) {
  1720. //
  1721. // If the page is marked with an error, then the whole transfer
  1722. // must be marked as not successful as well. The only exception
  1723. // is the prefetch dummy page which is used in multiple
  1724. // transfers concurrently and thus may have the inpage error
  1725. // bit set at any time (due to another transaction besides
  1726. // the current one).
  1727. //
  1728. ASSERT ((status != STATUS_SUCCESS) ||
  1729. (PfnClusterPage->PteAddress == MI_PF_DUMMY_PAGE_PTE));
  1730. }
  1731. #endif
  1732. if (PfnClusterPage->u3.e1.ReadInProgress != 0) {
  1733. ASSERT (PfnClusterPage->u4.PteFrame != MI_MAGIC_AWE_PTEFRAME);
  1734. PfnClusterPage->u3.e1.ReadInProgress = 0;
  1735. if (PfnClusterPage->u4.InPageError == 0) {
  1736. PfnClusterPage->u1.Event = NULL;
  1737. }
  1738. }
  1739. MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(PfnClusterPage, 39);
  1740. Page += 1;
  1741. NumberOfBytes -= PAGE_SIZE;
  1742. }
  1743. //
  1744. // If we were inserting prefetched pages to front of standby list
  1745. // for boot prefetching, stop it before we release the pfn lock.
  1746. //
  1747. MmFrontOfList = FALSE;
  1748. if (status != STATUS_SUCCESS) {
  1749. //
  1750. // An I/O error occurred during the page read
  1751. // operation. All the pages which were just
  1752. // put into transition must be put onto the
  1753. // free list if InPageError is set, and their
  1754. // PTEs restored to the proper contents.
  1755. //
  1756. Page = (PPFN_NUMBER)(Mdl + 1);
  1757. NumberOfBytes = (LONG)Mdl->ByteCount;
  1758. while (NumberOfBytes > 0) {
  1759. PfnClusterPage = MI_PFN_ELEMENT (*Page);
  1760. if (PfnClusterPage->u4.InPageError == 1) {
  1761. if (PfnClusterPage->u3.e2.ReferenceCount == 0) {
  1762. ASSERT (PfnClusterPage->u3.e1.PageLocation ==
  1763. StandbyPageList);
  1764. MiUnlinkPageFromList (PfnClusterPage);
  1765. MiRestoreTransitionPte (PfnClusterPage);
  1766. MiInsertPageInFreeList (*Page);
  1767. }
  1768. }
  1769. Page += 1;
  1770. NumberOfBytes -= PAGE_SIZE;
  1771. }
  1772. }
  1773. //
  1774. // All the relevant prototype PTEs should be in transition state.
  1775. //
  1776. //
  1777. // We took out an extra reference on the inpage block to prevent
  1778. // MiWaitForInPageComplete from freeing it (and the MDL), since we
  1779. // needed to process the MDL above. Now let it go for good.
  1780. //
  1781. ASSERT (InPageSupport->WaitCount >= 1);
  1782. UNLOCK_PFN (PASSIVE_LEVEL);
  1783. #if DBG
  1784. InPageSupport->ListEntry.Next = NULL;
  1785. #endif
  1786. MiFreeInPageSupportBlock (InPageSupport);
  1787. } while (TRUE);
  1788. }
  1789. #if DBG
  1790. VOID
  1791. MiPfDbgDumpReadList (
  1792. IN PMI_READ_LIST ReadList
  1793. )
  1794. /*++
  1795. Routine Description:
  1796. This routine dumps the given read-list range to the debugger.
  1797. Arguments:
  1798. ReadList - Pointer to the read-list.
  1799. Return Value:
  1800. None.
  1801. Environment:
  1802. Kernel mode.
  1803. --*/
  1804. {
  1805. ULONG i;
  1806. PMDL Mdl;
  1807. PMMPFN Pfn1;
  1808. PMMPTE LocalPrototypePte;
  1809. PFN_NUMBER PageFrameIndex;
  1810. PMMINPAGE_SUPPORT InPageSupport;
  1811. PSINGLE_LIST_ENTRY NextEntry;
  1812. PPFN_NUMBER Page;
  1813. PVOID StartingVa;
  1814. PFN_NUMBER MdlPages;
  1815. LARGE_INTEGER ReadOffset;
  1816. if ((MiPfDebug & MI_PF_VERBOSE) == 0) {
  1817. return;
  1818. }
  1819. DbgPrint ("\nPF: Dumping read-list %x (FileObject %x ControlArea %x)\n\n",
  1820. ReadList, ReadList->FileObject, ReadList->ControlArea);
  1821. DbgPrint ("\tFileOffset | Pte | Pfn \n"
  1822. "\t-----------+---------------+----------\n");
  1823. NextEntry = ReadList->InPageSupportHead.Next;
  1824. while (NextEntry != NULL) {
  1825. InPageSupport = CONTAINING_RECORD(NextEntry,
  1826. MMINPAGE_SUPPORT,
  1827. ListEntry);
  1828. ReadOffset = InPageSupport->ReadOffset;
  1829. Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport);
  1830. Page = (PPFN_NUMBER)(Mdl + 1);
  1831. #if DBG
  1832. //
  1833. // MDL isn't filled in yet, skip it.
  1834. //
  1835. if (*Page == MM_EMPTY_LIST) {
  1836. NextEntry = NextEntry->Next;
  1837. continue;
  1838. }
  1839. #endif
  1840. StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
  1841. MdlPages = ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
  1842. Mdl->ByteCount);
  1843. //
  1844. // Default the MDL entry to the dummy page as the RLE PTEs may
  1845. // be noncontiguous and we have no way to distinguish the jumps.
  1846. //
  1847. for (i = 0; i < MdlPages; i += 1) {
  1848. PageFrameIndex = *Page;
  1849. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  1850. LocalPrototypePte = Pfn1->PteAddress;
  1851. if (LocalPrototypePte != MI_PF_DUMMY_PAGE_PTE) {
  1852. ASSERT (LocalPrototypePte->u.Hard.Valid == 0);
  1853. ASSERT ((LocalPrototypePte->u.Soft.Prototype == 0) &&
  1854. (LocalPrototypePte->u.Soft.Transition == 1));
  1855. }
  1856. DbgPrint ("\t %8x | %8x | %8x\n",
  1857. ReadOffset.LowPart,
  1858. LocalPrototypePte,
  1859. PageFrameIndex);
  1860. Page += 1;
  1861. ReadOffset.LowPart += PAGE_SIZE;
  1862. }
  1863. NextEntry = NextEntry->Next;
  1864. }
  1865. DbgPrint ("\t\n");
  1866. }
  1867. VOID
  1868. MiRemoveUserPages (
  1869. VOID
  1870. )
  1871. /*++
  1872. Routine Description:
  1873. This routine removes user space pages.
  1874. Arguments:
  1875. None.
  1876. Return Value:
  1877. Number of pages removed.
  1878. Environment:
  1879. Kernel mode.
  1880. --*/
  1881. {
  1882. PKTHREAD CurrentThread;
  1883. CurrentThread = KeGetCurrentThread ();
  1884. KeEnterCriticalRegionThread (CurrentThread);
  1885. InterlockedIncrement (&MiDelayPageFaults);
  1886. MmEmptyAllWorkingSets ();
  1887. MiFlushAllPages ();
  1888. InterlockedDecrement (&MiDelayPageFaults);
  1889. KeLeaveCriticalRegionThread (CurrentThread);
  1890. //
  1891. // Run the transition list and free all the entries so transition
  1892. // faults are not satisfied for any of the non modified pages that were
  1893. // freed.
  1894. //
  1895. MiPurgeTransitionList ();
  1896. }
  1897. #endif