Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2559 lines
71 KiB

  1. /*++
  2. Copyright (c) 1999 Microsoft Corporation
  3. Module Name:
  4. pfsup.c
  5. Abstract:
  6. This module contains the Mm support routines for prefetching groups of pages
  7. from secondary storage.
  8. The caller builds a list of various file objects and logical block offsets,
  9. passing them to MmPrefetchPages. The code here then examines the
  10. internal pages, reading in those that are not already valid or in
  11. transition. These pages are read with a single read, using a dummy page
  12. to bridge small gaps. If the gap is "large", then separate reads are
  13. issued.
  14. Upon conclusion of all the I/Os, control is returned to the calling
  15. thread, and any pages that needed to be read are placed in transition
  16. within the prototype PTE-managed segments. Thus any future references
  17. to these pages should result in soft faults only, provided these pages
  18. do not themselves get trimmed under memory pressure.
  19. Author:
  20. Landy Wang (landyw) 09-Jul-1999
  21. Revision History:
  22. --*/
  23. #include "mi.h"
  24. #if DBG
  25. ULONG MiPfDebug;
  26. #define MI_PF_FORCE_PREFETCH 0x1 // Trim all user pages to force prefetch
  27. #define MI_PF_DELAY 0x2 // Delay hoping to trigger collisions
  28. #define MI_PF_VERBOSE 0x4 // Verbose printing
  29. #define MI_PF_PRINT_ERRORS 0x8 // Print to debugger on errors
  30. #endif
  31. //
  32. // If an MDL contains DUMMY_RATIO times as many dummy pages as real pages
  33. // then don't bother with the read.
  34. //
  35. #define DUMMY_RATIO 16
  36. //
  37. // If two consecutive read-list entries are more than "seek threshold"
  38. // distance apart, the read-list is split between these entries. Otherwise
  39. // the dummy page is used for the gap and only one MDL is used.
  40. //
  41. #define SEEK_THRESHOLD ((128 * 1024) / PAGE_SIZE)
  42. //
  43. // Minimum number of pages to prefetch per section.
  44. //
  45. #define MINIMUM_READ_LIST_PAGES 1
  46. //
  47. // If at least this many available physical pages, then attempt prefetch.
  48. //
  49. #define MINIMUM_AVAILABLE_PAGES MM_HIGH_LIMIT
  50. //
  51. // Read-list structures.
  52. //
  53. typedef struct _RLETYPE {
  54. ULONG_PTR Partial : 1; // This entry is a partial page.
  55. ULONG_PTR NewSubsection : 1; // This entry starts in the next subsection.
  56. ULONG_PTR DontUse : 30;
  57. } RLETYPE;
  58. typedef struct _MI_READ_LIST_ENTRY {
  59. union {
  60. PMMPTE PrototypePte;
  61. RLETYPE e1;
  62. } u1;
  63. } MI_READ_LIST_ENTRY, *PMI_READ_LIST_ENTRY;
  64. #define MI_RLEPROTO_BITS 3
  65. #define MI_RLEPROTO_TO_PROTO(ProtoPte) ((PMMPTE)((ULONG_PTR)ProtoPte & ~MI_RLEPROTO_BITS))
  66. typedef struct _MI_READ_LIST {
  67. PCONTROL_AREA ControlArea;
  68. PFILE_OBJECT FileObject;
  69. ULONG LastPteOffsetReferenced;
  70. //
  71. // Note that entries are chained through the inpage support blocks from
  72. // this listhead. This list is not protected by interlocks because it is
  73. // only accessed by the owning thread. Inpage blocks _ARE_ accessed with
  74. // interlocks when they are inserted or removed from the memory management
  75. // freelists, but by the time they get to this module they are decoupled.
  76. //
  77. SINGLE_LIST_ENTRY InPageSupportHead;
  78. MI_READ_LIST_ENTRY List[ANYSIZE_ARRAY];
  79. } MI_READ_LIST, *PMI_READ_LIST;
  80. VOID
  81. MiPfReleaseSubsectionReferences (
  82. IN PMI_READ_LIST MiReadList
  83. );
  84. VOID
  85. MiPfFreeDummyPage (
  86. IN PMMPFN DummyPagePfn
  87. );
  88. NTSTATUS
  89. MiPfPrepareReadList (
  90. IN PREAD_LIST ReadList,
  91. OUT PMI_READ_LIST *OutMiReadList
  92. );
  93. NTSTATUS
  94. MiPfPutPagesInTransition (
  95. IN PMI_READ_LIST ReadList,
  96. IN OUT PMMPFN *DummyPagePfn
  97. );
  98. VOID
  99. MiPfExecuteReadList (
  100. IN PMI_READ_LIST ReadList
  101. );
  102. VOID
  103. MiPfCompletePrefetchIos (
  104. PMI_READ_LIST ReadList
  105. );
  106. #if DBG
  107. VOID
  108. MiPfDbgDumpReadList (
  109. IN PMI_READ_LIST ReadList
  110. );
  111. VOID
  112. MiRemoveUserPages (
  113. VOID
  114. );
  115. #endif
  116. #ifdef ALLOC_PRAGMA
  117. #pragma alloc_text (PAGE, MmPrefetchPages)
  118. #pragma alloc_text (PAGE, MiPfPrepareReadList)
  119. #pragma alloc_text (PAGE, MiPfExecuteReadList)
  120. #pragma alloc_text (PAGE, MiPfReleaseSubsectionReferences)
  121. #endif
  122. NTSTATUS
  123. MmPrefetchPages (
  124. IN ULONG NumberOfLists,
  125. IN PREAD_LIST *ReadLists
  126. )
  127. /*++
  128. Routine Description:
  129. This routine reads pages described in the read-lists in the optimal fashion.
  130. This is the only externally callable prefetch routine. No component
  131. should use this interface except the cache manager.
  132. Arguments:
  133. NumberOfLists - Supplies the number of read-lists.
  134. ReadLists - Supplies an array of read-lists.
  135. Return Value:
  136. NTSTATUS codes.
  137. Environment:
  138. Kernel mode. PASSIVE_LEVEL.
  139. --*/
  140. {
  141. PMI_READ_LIST *MiReadLists;
  142. PMMPFN DummyPagePfn;
  143. NTSTATUS status;
  144. ULONG i;
  145. KIRQL OldIrql;
  146. LOGICAL ReadBuilt;
  147. LOGICAL ApcNeeded;
  148. PETHREAD CurrentThread;
  149. NTSTATUS CauseOfReadBuildFailures;
  150. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  151. //
  152. // Allocate memory for internal Mi read-lists.
  153. //
  154. MiReadLists = (PMI_READ_LIST *) ExAllocatePoolWithTag (
  155. NonPagedPool,
  156. sizeof (PMI_READ_LIST) * NumberOfLists,
  157. 'lRmM'
  158. );
  159. if (MiReadLists == NULL) {
  160. return STATUS_INSUFFICIENT_RESOURCES;
  161. }
  162. ReadBuilt = FALSE;
  163. CauseOfReadBuildFailures = STATUS_SUCCESS;
  164. //
  165. // Prepare read-lists (determine runs and allocate MDLs).
  166. //
  167. for (i = 0; i < NumberOfLists; i += 1) {
  168. //
  169. // Note any non-null list is referenced by this call so this routine
  170. // must dereference it when done to re-enable dynamic prototype PTEs.
  171. //
  172. status = MiPfPrepareReadList (ReadLists[i], &MiReadLists[i]);
  173. //
  174. // MiPfPrepareReadList never returns half-formed inpage support
  175. // blocks and MDLs. Either nothing is returned, partial lists are
  176. // returned or a complete list is returned. Any non-null list
  177. // can therefore be processed.
  178. //
  179. if (NT_SUCCESS (status)) {
  180. if (MiReadLists[i] != NULL) {
  181. ASSERT (MiReadLists[i]->InPageSupportHead.Next != NULL);
  182. ReadBuilt = TRUE;
  183. }
  184. }
  185. else {
  186. CauseOfReadBuildFailures = status;
  187. }
  188. }
  189. if (ReadBuilt == FALSE) {
  190. //
  191. // No lists were created so nothing further needs to be done.
  192. // CauseOfReadBuildFailures tells us whether this was due to all
  193. // the desired pages already being resident or that resources to
  194. // build the request could not be allocated.
  195. //
  196. ExFreePool (MiReadLists);
  197. if (CauseOfReadBuildFailures != STATUS_SUCCESS) {
  198. return CauseOfReadBuildFailures;
  199. }
  200. //
  201. // All the pages the caller asked for are already resident.
  202. //
  203. return STATUS_SUCCESS;
  204. }
  205. //
  206. // APCs must be disabled once we put a page in transition. Otherwise
  207. // a thread suspend will stop us from issuing the I/O - this will hang
  208. // any other threads that need the same page.
  209. //
  210. CurrentThread = PsGetCurrentThread();
  211. ApcNeeded = FALSE;
  212. ASSERT ((PKTHREAD)CurrentThread == KeGetCurrentThread ());
  213. KeEnterCriticalRegionThread ((PKTHREAD)CurrentThread);
  214. //
  215. // The nested fault count protects this thread from deadlocks where a
  216. // special kernel APC fires and references the same user page(s) we are
  217. // putting in transition.
  218. //
  219. KeRaiseIrql (APC_LEVEL, &OldIrql);
  220. ASSERT (CurrentThread->NestedFaultCount == 0);
  221. CurrentThread->NestedFaultCount += 1;
  222. KeLowerIrql (OldIrql);
  223. //
  224. // Allocate physical memory.
  225. //
  226. DummyPagePfn = NULL;
  227. ReadBuilt = FALSE;
  228. CauseOfReadBuildFailures = STATUS_SUCCESS;
  229. #if DBG
  230. status = 0xC0033333;
  231. #endif
  232. for (i = 0; i < NumberOfLists; i += 1) {
  233. if ((MiReadLists[i] != NULL) &&
  234. (MiReadLists[i]->InPageSupportHead.Next != NULL)) {
  235. status = MiPfPutPagesInTransition (MiReadLists[i], &DummyPagePfn);
  236. if (NT_SUCCESS (status)) {
  237. if (MiReadLists[i]->InPageSupportHead.Next != NULL) {
  238. ReadBuilt = TRUE;
  239. //
  240. // Issue I/Os.
  241. //
  242. MiPfExecuteReadList (MiReadLists[i]);
  243. }
  244. else {
  245. MiPfReleaseSubsectionReferences (MiReadLists[i]);
  246. ExFreePool (MiReadLists[i]);
  247. MiReadLists[i] = NULL;
  248. }
  249. }
  250. else {
  251. CauseOfReadBuildFailures = status;
  252. //
  253. // If not even a single page is available then don't bother
  254. // trying to prefetch anything else.
  255. //
  256. for (; i < NumberOfLists; i += 1) {
  257. if (MiReadLists[i] != NULL) {
  258. MiPfReleaseSubsectionReferences (MiReadLists[i]);
  259. ExFreePool (MiReadLists[i]);
  260. MiReadLists[i] = NULL;
  261. }
  262. }
  263. break;
  264. }
  265. }
  266. }
  267. //
  268. // At least one call to MiPfPutPagesInTransition was made, which
  269. // sets status properly.
  270. //
  271. ASSERT (status != 0xC0033333);
  272. if (ReadBuilt == TRUE) {
  273. status = STATUS_SUCCESS;
  274. //
  275. // Wait for I/Os to complete. Note APCs must remain disabled.
  276. //
  277. for (i = 0; i < NumberOfLists; i += 1) {
  278. if (MiReadLists[i] != NULL) {
  279. ASSERT (MiReadLists[i]->InPageSupportHead.Next != NULL);
  280. MiPfCompletePrefetchIos (MiReadLists[i]);
  281. MiPfReleaseSubsectionReferences (MiReadLists[i]);
  282. }
  283. }
  284. }
  285. else {
  286. //
  287. // No reads were issued.
  288. //
  289. // CauseOfReadBuildFailures tells us whether this was due to all
  290. // the desired pages already being resident or that resources to
  291. // build the request could not be allocated.
  292. //
  293. status = CauseOfReadBuildFailures;
  294. }
  295. //
  296. // Put DummyPage back on the free list.
  297. //
  298. if (DummyPagePfn != NULL) {
  299. MiPfFreeDummyPage (DummyPagePfn);
  300. }
  301. //
  302. // Only when all the I/Os have been completed (not just issued) can
  303. // APCs be re-enabled. This prevents a user-issued suspend APC from
  304. // keeping a shared page in transition forever.
  305. //
  306. KeRaiseIrql (APC_LEVEL, &OldIrql);
  307. ASSERT (CurrentThread->NestedFaultCount == 1);
  308. CurrentThread->NestedFaultCount -= 1;
  309. if (CurrentThread->ApcNeeded == 1) {
  310. ApcNeeded = TRUE;
  311. CurrentThread->ApcNeeded = 0;
  312. }
  313. KeLowerIrql (OldIrql);
  314. KeLeaveCriticalRegionThread ((PKTHREAD)CurrentThread);
  315. for (i = 0; i < NumberOfLists; i += 1) {
  316. if (MiReadLists[i] != NULL) {
  317. ExFreePool (MiReadLists[i]);
  318. }
  319. }
  320. ExFreePool (MiReadLists);
  321. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  322. ASSERT (CurrentThread->NestedFaultCount == 0);
  323. ASSERT (CurrentThread->ApcNeeded == 0);
  324. if (ApcNeeded == TRUE) {
  325. KeRaiseIrql (APC_LEVEL, &OldIrql);
  326. IoRetryIrpCompletions ();
  327. KeLowerIrql (OldIrql);
  328. }
  329. return status;
  330. }
  331. VOID
  332. MiPfFreeDummyPage (
  333. IN PMMPFN DummyPagePfn
  334. )
  335. /*++
  336. Routine Description:
  337. This nonpaged wrapper routine frees the dummy page PFN.
  338. Arguments:
  339. DummyPagePfn - Supplies the dummy page PFN.
  340. Return Value:
  341. None.
  342. Environment:
  343. Kernel mode.
  344. --*/
  345. {
  346. KIRQL OldIrql;
  347. PFN_NUMBER PageFrameIndex;
  348. PageFrameIndex = DummyPagePfn - MmPfnDatabase;
  349. LOCK_PFN (OldIrql);
  350. ASSERT (DummyPagePfn->u2.ShareCount == 1);
  351. ASSERT (DummyPagePfn->u3.e1.PrototypePte == 0);
  352. ASSERT (DummyPagePfn->OriginalPte.u.Long == MM_DEMAND_ZERO_WRITE_PTE);
  353. ASSERT (DummyPagePfn->u3.e2.ReferenceCount == 2);
  354. MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(DummyPagePfn, 17);
  355. //
  356. // Clear the read in progress bit as this page may never have used for an
  357. // I/O after all. The inpage error bit must also be cleared as any number
  358. // of errors may have occurred during reads of pages (that were immaterial
  359. // anyway).
  360. //
  361. DummyPagePfn->u3.e1.ReadInProgress = 0;
  362. DummyPagePfn->u4.InPageError = 0;
  363. MI_SET_PFN_DELETED (DummyPagePfn);
  364. MiDecrementShareCount (PageFrameIndex);
  365. UNLOCK_PFN (OldIrql);
  366. }
  367. VOID
  368. MiMovePageToEndOfStandbyList(
  369. IN PMMPTE PointerPte
  370. )
  371. /*++
  372. Routine Description:
  373. This nonpaged routine obtains the PFN lock and moves a page to the end of
  374. the standby list (if the page is still in transition).
  375. Arguments:
  376. PointerPte - Supplies the prototype PTE to examine.
  377. Return Value:
  378. None.
  379. Environment:
  380. Kernel mode, PFN lock not held.
  381. --*/
  382. {
  383. KIRQL OldIrql;
  384. PMMPFN Pfn1;
  385. MMPTE PteContents;
  386. PFN_NUMBER PageFrameIndex;
  387. LOCK_PFN (OldIrql);
  388. if (!MmIsAddressValid (PointerPte)) {
  389. //
  390. // If the paged pool containing the prototype PTE is not resident
  391. // then the actual page itself may still be transition or not. This
  392. // should be so rare it's not worth making the pool resident so the
  393. // proper checks can be applied. Just bail.
  394. //
  395. UNLOCK_PFN (OldIrql);
  396. return;
  397. }
  398. PteContents = *PointerPte;
  399. if ((PteContents.u.Hard.Valid == 0) &&
  400. (PteContents.u.Soft.Prototype == 0) &&
  401. (PteContents.u.Soft.Transition == 1)) {
  402. PageFrameIndex = MI_GET_PAGE_FRAME_FROM_TRANSITION_PTE (&PteContents);
  403. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  404. //
  405. // The page is still in transition, move it to the end to protect it
  406. // from possible cannibalization. Note that if the page is currently
  407. // being written to disk it will be on the modified list and when the
  408. // write completes it will automatically go to the end of the standby
  409. // list anyway so skip those.
  410. //
  411. if (Pfn1->u3.e1.PageLocation == StandbyPageList) {
  412. MiUnlinkPageFromList (Pfn1);
  413. MiInsertPageInList (&MmStandbyPageListHead, PageFrameIndex);
  414. }
  415. }
  416. UNLOCK_PFN (OldIrql);
  417. }
  418. VOID
  419. MiPfReleaseSubsectionReferences (
  420. IN PMI_READ_LIST MiReadList
  421. )
  422. /*++
  423. Routine Description:
  424. This routine releases reference counts on subsections examined by the
  425. prefetch scanner.
  426. Arguments:
  427. MiReadList - Supplies a read-list entry.
  428. Return Value:
  429. None.
  430. Environment:
  431. Kernel mode, PASSIVE_LEVEL.
  432. --*/
  433. {
  434. PMSUBSECTION MappedSubsection;
  435. PCONTROL_AREA ControlArea;
  436. ASSERT (KeGetCurrentIrql () == PASSIVE_LEVEL);
  437. ControlArea = MiReadList->ControlArea;
  438. ASSERT (ControlArea->u.Flags.PhysicalMemory == 0);
  439. ASSERT (ControlArea->FilePointer != NULL);
  440. //
  441. // Image files don't have dynamic prototype PTEs.
  442. //
  443. if (ControlArea->u.Flags.Image == 1) {
  444. return;
  445. }
  446. ASSERT (ControlArea->u.Flags.GlobalOnlyPerSession == 0);
  447. MappedSubsection = (PMSUBSECTION)(ControlArea + 1);
  448. MiRemoveViewsFromSectionWithPfn (MappedSubsection,
  449. MiReadList->LastPteOffsetReferenced);
  450. }
  451. NTSTATUS
  452. MiPfPrepareReadList (
  453. IN PREAD_LIST ReadList,
  454. OUT PMI_READ_LIST *OutMiReadList
  455. )
  456. /*++
  457. Routine Description:
  458. This routine constructs MDLs that describe the pages in the argument
  459. read-list. The caller will then issue the I/Os on return.
  460. Arguments:
  461. ReadList - Supplies the read-list.
  462. OutMiReadList - Supplies a pointer to receive the Mi readlist.
  463. Return Value:
  464. Various NTSTATUS codes.
  465. If STATUS_SUCCESS is returned, OutMiReadList is set to a pointer to an Mi
  466. readlist to be used for prefetching or NULL if no prefetching is needed.
  467. If OutMireadList is non-NULL (on success only) then the caller must call
  468. MiRemoveViewsFromSectionWithPfn (VeryFirstSubsection, LastPteOffsetReferenced) for data files.
  469. Environment:
  470. Kernel mode, PASSIVE_LEVEL.
  471. --*/
  472. {
  473. ULONG LastPteOffset;
  474. NTSTATUS Status;
  475. MMPTE PteContents;
  476. PMMPTE LocalPrototypePte;
  477. PMMPTE LastPrototypePte;
  478. PMMPTE StartPrototypePte;
  479. PMMPTE EndPrototypePte;
  480. PMI_READ_LIST MiReadList;
  481. PMI_READ_LIST_ENTRY Rle;
  482. PMI_READ_LIST_ENTRY StartRleRun;
  483. PMI_READ_LIST_ENTRY EndRleRun;
  484. PMI_READ_LIST_ENTRY RleMax;
  485. PMI_READ_LIST_ENTRY FirstRleInRun;
  486. PCONTROL_AREA ControlArea;
  487. PSUBSECTION Subsection;
  488. PSUBSECTION PreviousSubsection;
  489. PMSUBSECTION VeryFirstSubsection;
  490. PMSUBSECTION VeryLastSubsection;
  491. UINT64 StartOffset;
  492. LARGE_INTEGER EndQuad;
  493. UINT64 EndOffset;
  494. UINT64 FileOffset;
  495. PMMINPAGE_SUPPORT InPageSupport;
  496. PMDL Mdl;
  497. ULONG i;
  498. PFN_NUMBER NumberOfPages;
  499. UINT64 StartingOffset;
  500. UINT64 TempOffset;
  501. ULONG ReadSize;
  502. ULONG NumberOfEntries;
  503. #if DBG
  504. PPFN_NUMBER Page;
  505. #endif
  506. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  507. *OutMiReadList = NULL;
  508. //
  509. // Create an Mi readlist from the argument Cc readlist.
  510. //
  511. NumberOfEntries = ReadList->NumberOfEntries;
  512. MiReadList = (PMI_READ_LIST) ExAllocatePoolWithTag (
  513. NonPagedPool,
  514. sizeof (MI_READ_LIST) + NumberOfEntries * sizeof (MI_READ_LIST_ENTRY),
  515. 'lRmM');
  516. if (MiReadList == NULL) {
  517. return STATUS_INSUFFICIENT_RESOURCES;
  518. }
  519. //
  520. // Translate the section object into the relevant control area.
  521. //
  522. if (ReadList->IsImage) {
  523. ControlArea = (PCONTROL_AREA)ReadList->FileObject->SectionObjectPointer->ImageSectionObject;
  524. ASSERT (ControlArea != NULL );
  525. ASSERT (ControlArea->u.Flags.Image == 1);
  526. }
  527. else {
  528. ControlArea = (PCONTROL_AREA)ReadList->FileObject->SectionObjectPointer->DataSectionObject;
  529. }
  530. //
  531. // If the section is backed by a ROM, then there's no need to prefetch
  532. // anything as it would waste RAM.
  533. //
  534. if (ControlArea->u.Flags.Rom == 1) {
  535. ExFreePool (MiReadList);
  536. return STATUS_SUCCESS;
  537. }
  538. //
  539. // Make sure the section is really prefetchable - physical and
  540. // pagefile-backed sections are not.
  541. //
  542. if ((ControlArea->u.Flags.PhysicalMemory) ||
  543. (ControlArea->FilePointer == NULL)) {
  544. ExFreePool (MiReadList);
  545. return STATUS_INVALID_PARAMETER_1;
  546. }
  547. //
  548. // Initialize the internal Mi readlist.
  549. //
  550. MiReadList->ControlArea = ControlArea;
  551. MiReadList->FileObject = ReadList->FileObject;
  552. MiReadList->InPageSupportHead.Next = NULL;
  553. RtlZeroMemory (MiReadList->List,
  554. sizeof (MI_READ_LIST_ENTRY) * NumberOfEntries);
  555. //
  556. // Copy pages from the Cc readlists to the internal Mi readlists.
  557. //
  558. NumberOfPages = 0;
  559. FirstRleInRun = NULL;
  560. VeryFirstSubsection = NULL;
  561. VeryLastSubsection = NULL;
  562. LastPteOffset = 0;
  563. if (ControlArea->u.Flags.GlobalOnlyPerSession == 0) {
  564. Subsection = (PSUBSECTION)(ControlArea + 1);
  565. //
  566. // Ensure all prototype PTE bases are valid for all subsections of the
  567. // requested file so the traversal code doesn't have to check
  568. // everywhere. As long as the files are not too large this should
  569. // be a cheap operation.
  570. //
  571. if (ControlArea->u.Flags.Image == 0) {
  572. ASSERT (ControlArea->u.Flags.PhysicalMemory == 0);
  573. ASSERT (ControlArea->FilePointer != NULL);
  574. VeryFirstSubsection = (PMSUBSECTION) Subsection;
  575. VeryLastSubsection = (PMSUBSECTION) Subsection;
  576. do {
  577. //
  578. // A memory barrier is needed to read the subsection chains
  579. // in order to ensure the writes to the actual individual
  580. // subsection data structure fields are visible in correct
  581. // order. This avoids the need to acquire any stronger
  582. // synchronization (ie: PFN lock), thus yielding better
  583. // performance and pagability.
  584. //
  585. KeMemoryBarrier ();
  586. LastPteOffset += VeryLastSubsection->PtesInSubsection;
  587. if (VeryLastSubsection->NextSubsection == NULL) {
  588. break;
  589. }
  590. VeryLastSubsection = (PMSUBSECTION) VeryLastSubsection->NextSubsection;
  591. } while (TRUE);
  592. MiReadList->LastPteOffsetReferenced = LastPteOffset;
  593. Status = MiAddViewsForSectionWithPfn (VeryFirstSubsection,
  594. LastPteOffset);
  595. if (!NT_SUCCESS (Status)) {
  596. ExFreePool (MiReadList);
  597. return Status;
  598. }
  599. }
  600. }
  601. else {
  602. Subsection = (PSUBSECTION)((PLARGE_CONTROL_AREA)ControlArea + 1);
  603. }
  604. StartOffset = (UINT64)MiStartingOffset (Subsection, Subsection->SubsectionBase);
  605. EndQuad = MiEndingOffset(Subsection);
  606. EndOffset = (UINT64)EndQuad.QuadPart;
  607. //
  608. // If the file is bigger than the subsection, truncate the subsection range
  609. // checks.
  610. //
  611. if ((StartOffset & ~(PAGE_SIZE - 1)) + (Subsection->PtesInSubsection << PAGE_SHIFT) < EndOffset) {
  612. EndOffset = (StartOffset & ~(PAGE_SIZE - 1)) + (Subsection->PtesInSubsection << PAGE_SHIFT);
  613. }
  614. TempOffset = EndOffset;
  615. PreviousSubsection = NULL;
  616. LastPrototypePte = NULL;
  617. Rle = MiReadList->List;
  618. #if DBG
  619. if (MiPfDebug & MI_PF_FORCE_PREFETCH) {
  620. MiRemoveUserPages ();
  621. }
  622. #endif
  623. //
  624. // Initializing FileOffset is not needed for correctness, but without it
  625. // the compiler cannot compile this code W4 to check for use of
  626. // uninitialized variables.
  627. //
  628. FileOffset = 0;
  629. for (i = 0; i < NumberOfEntries; i += 1, Rle += 1) {
  630. ASSERT ((i == 0) || (ReadList->List[i].Alignment > FileOffset));
  631. FileOffset = ReadList->List[i].Alignment;
  632. ASSERT (Rle->u1.PrototypePte == NULL);
  633. //
  634. // Calculate which PTE maps the given logical block offset.
  635. //
  636. // Since our caller always passes ordered lists of logical block offsets
  637. // within a given file, always look forwards (as an optimization) in the
  638. // subsection chain.
  639. //
  640. // A quick check is made first to avoid recalculations and loops where
  641. // possible.
  642. //
  643. if ((StartOffset <= FileOffset) && (FileOffset < EndOffset)) {
  644. ASSERT (Subsection->SubsectionBase != NULL);
  645. LocalPrototypePte = Subsection->SubsectionBase +
  646. ((FileOffset - StartOffset) >> PAGE_SHIFT);
  647. ASSERT (TempOffset != 0);
  648. ASSERT (EndOffset != 0);
  649. }
  650. else {
  651. LocalPrototypePte = NULL;
  652. do {
  653. ASSERT (Subsection->SubsectionBase != NULL);
  654. if ((Subsection->StartingSector == 0) &&
  655. (ControlArea->u.Flags.Image == 1) &&
  656. (Subsection->SubsectionBase != ControlArea->Segment->PrototypePte)) {
  657. //
  658. // This is an image that was built with a linker pre-1995
  659. // (version 2.39 is one example) that put bss into a
  660. // separate subsection with zero as a starting file offset
  661. // field in the on-disk image. Ignore any prefetch as it
  662. // would read from the wrong offset trying to satisfy these
  663. // ranges (which are actually demand zero when the fault
  664. // occurs).
  665. //
  666. // We could be clever here and just ignore this particular
  667. // file offset, but for now just don't prefetch this file
  668. // at all. Note that this offset would only be present in
  669. // a prefetch database that was constructed without the
  670. // accompanying fix just before the call to
  671. // CcPfLogPageFault.
  672. //
  673. Subsection = NULL;
  674. break;
  675. }
  676. StartOffset = (UINT64)MiStartingOffset (Subsection, Subsection->SubsectionBase);
  677. EndQuad = MiEndingOffset(Subsection);
  678. EndOffset = (UINT64)EndQuad.QuadPart;
  679. //
  680. // If the file is bigger than the subsection, truncate the
  681. // subsection range checks.
  682. //
  683. if ((StartOffset & ~(PAGE_SIZE - 1)) + (Subsection->PtesInSubsection << PAGE_SHIFT) < EndOffset) {
  684. EndOffset = (StartOffset & ~(PAGE_SIZE - 1)) + (Subsection->PtesInSubsection << PAGE_SHIFT);
  685. }
  686. if ((StartOffset <= FileOffset) && (FileOffset < EndOffset)) {
  687. LocalPrototypePte = Subsection->SubsectionBase +
  688. ((FileOffset - StartOffset) >> PAGE_SHIFT);
  689. TempOffset = EndOffset;
  690. break;
  691. }
  692. if ((VeryLastSubsection != NULL) &&
  693. ((PMSUBSECTION)Subsection == VeryLastSubsection)) {
  694. //
  695. // The requested block is beyond the size the section
  696. // was on entry. Reject it as this subsection is not
  697. // referenced.
  698. //
  699. Subsection = NULL;
  700. break;
  701. }
  702. Subsection = Subsection->NextSubsection;
  703. } while (Subsection != NULL);
  704. }
  705. if ((Subsection == NULL) || (LocalPrototypePte == LastPrototypePte)) {
  706. //
  707. // Illegal offsets are not prefetched. Either the file has
  708. // been replaced since the scenario was logged or Cc is passing
  709. // trash. Either way, this prefetch is over.
  710. //
  711. #if DBG
  712. if (MiPfDebug & MI_PF_PRINT_ERRORS) {
  713. DbgPrint ("MiPfPrepareReadList: Illegal readlist passed %p, %p, %p\n", ReadList, LocalPrototypePte, LastPrototypePte);
  714. }
  715. #endif
  716. if (VeryFirstSubsection != NULL) {
  717. MiRemoveViewsFromSectionWithPfn (VeryFirstSubsection,
  718. LastPteOffset);
  719. }
  720. ExFreePool (MiReadList);
  721. return STATUS_INVALID_PARAMETER_1;
  722. }
  723. PteContents = *LocalPrototypePte;
  724. //
  725. // See if this page needs to be read in. Note that these reads
  726. // are done without the PFN or system cache working set locks.
  727. // This is ok because later before we make the final decision on
  728. // whether to read each page, we'll look again.
  729. // If the page is in tranisition, make the call to (possibly) move
  730. // it to the end of the standby list to prevent cannibalization.
  731. //
  732. if (PteContents.u.Hard.Valid == 1) {
  733. continue;
  734. }
  735. if (PteContents.u.Soft.Prototype == 0) {
  736. if (PteContents.u.Soft.Transition == 1) {
  737. MiMovePageToEndOfStandbyList (LocalPrototypePte);
  738. }
  739. else {
  740. //
  741. // Demand zero or pagefile-backed, don't prefetch from the
  742. // file or we'd lose the contents. Note this can happen for
  743. // session-space images as we back modified (ie: for relocation
  744. // fixups or IAT updated) portions from the pagefile.
  745. //
  746. NOTHING;
  747. }
  748. continue;
  749. }
  750. Rle->u1.PrototypePte = LocalPrototypePte;
  751. LastPrototypePte = LocalPrototypePte;
  752. //
  753. // Check for partial pages as they require further processing later.
  754. //
  755. StartingOffset = (UINT64) MiStartingOffset (Subsection, LocalPrototypePte);
  756. ASSERT (StartingOffset < TempOffset);
  757. if ((StartingOffset + PAGE_SIZE) > TempOffset) {
  758. Rle->u1.e1.Partial = 1;
  759. }
  760. //
  761. // The NewSubsection marker is used to delimit the beginning of a new
  762. // subsection because RLE chunks must be split to accomodate inpage
  763. // completion so that proper zeroing (based on subsection alignment)
  764. // is done in MiWaitForInPageComplete.
  765. //
  766. if (FirstRleInRun == NULL) {
  767. FirstRleInRun = Rle;
  768. Rle->u1.e1.NewSubsection = 1;
  769. PreviousSubsection = Subsection;
  770. }
  771. else {
  772. if (Subsection != PreviousSubsection) {
  773. Rle->u1.e1.NewSubsection = 1;
  774. PreviousSubsection = Subsection;
  775. }
  776. }
  777. NumberOfPages += 1;
  778. }
  779. //
  780. // If the number of pages to read in is extremely small, don't bother.
  781. //
  782. if (NumberOfPages < MINIMUM_READ_LIST_PAGES) {
  783. if (VeryFirstSubsection != NULL) {
  784. MiRemoveViewsFromSectionWithPfn (VeryFirstSubsection,
  785. LastPteOffset);
  786. }
  787. ExFreePool (MiReadList);
  788. return STATUS_SUCCESS;
  789. }
  790. RleMax = MiReadList->List + NumberOfEntries;
  791. ASSERT (FirstRleInRun != RleMax);
  792. Status = STATUS_SUCCESS;
  793. //
  794. // Walk the readlists to determine runs. Cross-subsection runs are split
  795. // here so the completion code can zero the proper amount for any
  796. // non-aligned files.
  797. //
  798. EndRleRun = NULL;
  799. Rle = FirstRleInRun;
  800. //
  801. // Initializing StartRleRun & EndPrototypePte is not needed for correctness
  802. // but without it the compiler cannot compile this code
  803. // W4 to check for use of uninitialized variables.
  804. //
  805. StartRleRun = NULL;
  806. EndPrototypePte = NULL;
  807. while (Rle < RleMax) {
  808. if (Rle->u1.PrototypePte != NULL) {
  809. if (EndRleRun != NULL) {
  810. StartPrototypePte = MI_RLEPROTO_TO_PROTO(Rle->u1.PrototypePte);
  811. if (StartPrototypePte - EndPrototypePte > SEEK_THRESHOLD) {
  812. Rle -= 1;
  813. goto BuildMdl;
  814. }
  815. }
  816. if (Rle->u1.e1.NewSubsection == 1) {
  817. if (EndRleRun != NULL) {
  818. Rle -= 1;
  819. goto BuildMdl;
  820. }
  821. }
  822. if (EndRleRun == NULL) {
  823. StartRleRun = Rle;
  824. }
  825. EndRleRun = Rle;
  826. EndPrototypePte = MI_RLEPROTO_TO_PROTO(Rle->u1.PrototypePte);
  827. if (Rle->u1.e1.Partial == 1) {
  828. //
  829. // This must be the last RLE in this subsection as it is a
  830. // partial page. Split this run now.
  831. //
  832. goto BuildMdl;
  833. }
  834. }
  835. Rle += 1;
  836. //
  837. // Handle any straggling last run as well.
  838. //
  839. if (Rle == RleMax) {
  840. if (EndRleRun != NULL) {
  841. Rle -= 1;
  842. goto BuildMdl;
  843. }
  844. }
  845. continue;
  846. BuildMdl:
  847. //
  848. // Note no preceding or trailing dummy pages are possible as they are
  849. // trimmed immediately each time when the first real page of a run
  850. // is discovered above.
  851. //
  852. ASSERT (Rle >= StartRleRun);
  853. ASSERT (StartRleRun->u1.PrototypePte != NULL);
  854. ASSERT (EndRleRun->u1.PrototypePte != NULL);
  855. StartPrototypePte = MI_RLEPROTO_TO_PROTO(StartRleRun->u1.PrototypePte);
  856. EndPrototypePte = MI_RLEPROTO_TO_PROTO(EndRleRun->u1.PrototypePte);
  857. NumberOfPages = (EndPrototypePte - StartPrototypePte) + 1;
  858. //
  859. // Allocate and initialize an inpage support block for this run.
  860. //
  861. InPageSupport = MiGetInPageSupportBlock (FALSE, PREFETCH_PROCESS);
  862. if (InPageSupport == NULL) {
  863. Status = STATUS_INSUFFICIENT_RESOURCES;
  864. break;
  865. }
  866. //
  867. // Use the MDL embedded in the inpage support block if it's big enough.
  868. // Otherwise allocate and initialize an MDL for this run.
  869. //
  870. if (NumberOfPages <= MM_MAXIMUM_READ_CLUSTER_SIZE + 1) {
  871. Mdl = &InPageSupport->Mdl;
  872. MmInitializeMdl (Mdl, NULL, NumberOfPages << PAGE_SHIFT);
  873. }
  874. else {
  875. Mdl = MmCreateMdl (NULL, NULL, NumberOfPages << PAGE_SHIFT);
  876. if (Mdl == NULL) {
  877. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  878. #if DBG
  879. InPageSupport->ListEntry.Next = NULL;
  880. #endif
  881. MiFreeInPageSupportBlock (InPageSupport);
  882. Status = STATUS_INSUFFICIENT_RESOURCES;
  883. break;
  884. }
  885. }
  886. #if DBG
  887. if (MiPfDebug & MI_PF_VERBOSE) {
  888. DbgPrint ("MiPfPrepareReadList: Creating INPAGE/MDL %p %p for %x pages\n", InPageSupport, Mdl, NumberOfPages);
  889. }
  890. Page = (PPFN_NUMBER)(Mdl + 1);
  891. *Page = MM_EMPTY_LIST;
  892. #endif
  893. //
  894. // Find the subsection for the start RLE. From this the file offset
  895. // can be derived.
  896. //
  897. ASSERT (StartPrototypePte != NULL);
  898. if (ControlArea->u.Flags.GlobalOnlyPerSession == 0) {
  899. Subsection = (PSUBSECTION)(ControlArea + 1);
  900. }
  901. else {
  902. Subsection = (PSUBSECTION)((PLARGE_CONTROL_AREA)ControlArea + 1);
  903. }
  904. do {
  905. ASSERT (Subsection->SubsectionBase != NULL);
  906. if ((StartPrototypePte >= Subsection->SubsectionBase) &&
  907. (StartPrototypePte < Subsection->SubsectionBase + Subsection->PtesInSubsection)) {
  908. break;
  909. }
  910. Subsection = Subsection->NextSubsection;
  911. } while (Subsection != NULL);
  912. //
  913. // Start the read at the proper file offset.
  914. //
  915. StartingOffset = (UINT64) MiStartingOffset (Subsection,
  916. StartPrototypePte);
  917. InPageSupport->ReadOffset = *((PLARGE_INTEGER)(&StartingOffset));
  918. //
  919. // Since the RLE is not always valid here, only walk the remaining
  920. // subsections for valid partial RLEs as only they need truncation.
  921. //
  922. // Note only image file reads need truncation as the filesystem cannot
  923. // blindly zero the rest of the page for these reads as they are packed
  924. // by memory management on a 512-byte sector basis. Data reads use
  925. // the whole page and the filesystems zero fill any remainder beyond
  926. // valid data length. It is important to specify the entire page where
  927. // possible so the filesystem won't post this which will hurt perf.
  928. //
  929. if ((EndRleRun->u1.e1.Partial == 1) && (ReadList->IsImage)) {
  930. ASSERT ((EndPrototypePte >= Subsection->SubsectionBase) &&
  931. (EndPrototypePte < Subsection->SubsectionBase + Subsection->PtesInSubsection));
  932. //
  933. // The read length for a partial RLE must be truncated correctly.
  934. //
  935. EndQuad = MiEndingOffset(Subsection);
  936. TempOffset = (UINT64)EndQuad.QuadPart;
  937. if ((ULONG)(TempOffset - StartingOffset) <= Mdl->ByteCount) {
  938. ReadSize = (ULONG)(TempOffset - StartingOffset);
  939. //
  940. // Round the offset to a 512-byte offset as this will help
  941. // filesystems optimize the transfer. Note that filesystems
  942. // will always zero fill the remainder between VDL and the
  943. // next 512-byte multiple and we have already zeroed the
  944. // whole page.
  945. //
  946. ReadSize = ((ReadSize + MMSECTOR_MASK) & ~MMSECTOR_MASK);
  947. Mdl->ByteCount = ReadSize;
  948. }
  949. else {
  950. ASSERT ((StartingOffset & ~(PAGE_SIZE - 1)) + (Subsection->PtesInSubsection << PAGE_SHIFT) < TempOffset);
  951. }
  952. }
  953. //
  954. // Stash these in the inpage block so we can walk it quickly later
  955. // in pass 2.
  956. //
  957. InPageSupport->BasePte = (PMMPTE)StartRleRun;
  958. InPageSupport->FilePointer = (PFILE_OBJECT)EndRleRun;
  959. ASSERT (((ULONG_PTR)Mdl & (sizeof(QUAD) - 1)) == 0);
  960. InPageSupport->u1.e1.PrefetchMdlHighBits = ((ULONG_PTR)Mdl >> 3);
  961. PushEntryList (&MiReadList->InPageSupportHead,
  962. &InPageSupport->ListEntry);
  963. Rle += 1;
  964. EndRleRun = NULL;
  965. }
  966. //
  967. // Check for the entire list being full (or empty).
  968. //
  969. // Status is STATUS_INSUFFICIENT_RESOURCES if an MDL or inpage block
  970. // allocation failed. If any allocations succeeded, then set STATUS_SUCCESS
  971. // as pass2 must occur.
  972. //
  973. if (MiReadList->InPageSupportHead.Next != NULL) {
  974. Status = STATUS_SUCCESS;
  975. }
  976. else {
  977. if (VeryFirstSubsection != NULL) {
  978. MiRemoveViewsFromSectionWithPfn (VeryFirstSubsection, LastPteOffset);
  979. }
  980. ExFreePool (MiReadList);
  981. MiReadList = NULL;
  982. }
  983. //
  984. // Note that a nonzero *OutMiReadList return value means that the caller
  985. // needs to remove the views for the section.
  986. //
  987. *OutMiReadList = MiReadList;
  988. return Status;
  989. }
  990. NTSTATUS
  991. MiPfPutPagesInTransition (
  992. IN PMI_READ_LIST ReadList,
  993. IN OUT PMMPFN *DummyPagePfn
  994. )
  995. /*++
  996. Routine Description:
  997. This routine allocates physical memory for the specified read-list and
  998. puts all the pages in transition. On return the caller must issue I/Os
  999. for the list not only because of this thread, but also to satisfy
  1000. collided faults from other threads for these same pages.
  1001. Arguments:
  1002. ReadList - Supplies a pointer to the read-list.
  1003. DummyPagePfn - If this points at a NULL pointer, then a dummy page is
  1004. allocated and placed in this pointer. Otherwise this points
  1005. at a PFN to use as a dummy page.
  1006. Return Value:
  1007. STATUS_SUCCESS
  1008. STATUS_INSUFFICIENT_RESOURCES
  1009. Environment:
  1010. Kernel mode. PASSIVE_LEVEL.
  1011. --*/
  1012. {
  1013. LOGICAL Waited;
  1014. PVOID StartingVa;
  1015. PFN_NUMBER MdlPages;
  1016. KIRQL OldIrql;
  1017. MMPTE PteContents;
  1018. PMMPTE RlePrototypePte;
  1019. PMMPTE FirstRlePrototypeInRun;
  1020. PFN_NUMBER PageFrameIndex;
  1021. PPFN_NUMBER Page;
  1022. PPFN_NUMBER DestinationPage;
  1023. ULONG PageColor;
  1024. PMI_READ_LIST_ENTRY Rle;
  1025. PMI_READ_LIST_ENTRY RleMax;
  1026. PMI_READ_LIST_ENTRY FirstRleInRun;
  1027. PFN_NUMBER DummyPage;
  1028. PMDL Mdl;
  1029. PMDL FreeMdl;
  1030. PMMPFN PfnProto;
  1031. PMMPFN Pfn1;
  1032. PMMPFN DummyPfn1;
  1033. ULONG i;
  1034. PFN_NUMBER DummyTrim;
  1035. PFN_NUMBER DummyReferences;
  1036. ULONG NumberOfPages;
  1037. MMPTE TempPte;
  1038. PMMPTE PointerPde;
  1039. PEPROCESS CurrentProcess;
  1040. PSINGLE_LIST_ENTRY PrevEntry;
  1041. PSINGLE_LIST_ENTRY NextEntry;
  1042. PMMINPAGE_SUPPORT InPageSupport;
  1043. SINGLE_LIST_ENTRY ReversedInPageSupportHead;
  1044. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  1045. //
  1046. // Reverse the singly linked list of inpage support blocks so the
  1047. // blocks are read in the same order requested for better performance
  1048. // (ie: keep the disk heads seeking in the same direction).
  1049. //
  1050. ReversedInPageSupportHead.Next = NULL;
  1051. do {
  1052. NextEntry = PopEntryList (&ReadList->InPageSupportHead);
  1053. if (NextEntry == NULL) {
  1054. break;
  1055. }
  1056. PushEntryList (&ReversedInPageSupportHead, NextEntry);
  1057. } while (TRUE);
  1058. ASSERT (ReversedInPageSupportHead.Next != NULL);
  1059. ReadList->InPageSupportHead.Next = ReversedInPageSupportHead.Next;
  1060. DummyReferences = 0;
  1061. FreeMdl = NULL;
  1062. CurrentProcess = PsGetCurrentProcess();
  1063. PfnProto = NULL;
  1064. PointerPde = NULL;
  1065. LOCK_PFN (OldIrql);
  1066. //
  1067. // Do a quick sanity check to avoid doing unnecessary work.
  1068. //
  1069. if ((MmAvailablePages < MINIMUM_AVAILABLE_PAGES) ||
  1070. (MI_NONPAGABLE_MEMORY_AVAILABLE() < MINIMUM_AVAILABLE_PAGES)) {
  1071. UNLOCK_PFN (OldIrql);
  1072. do {
  1073. NextEntry = PopEntryList(&ReadList->InPageSupportHead);
  1074. if (NextEntry == NULL) {
  1075. break;
  1076. }
  1077. InPageSupport = CONTAINING_RECORD(NextEntry,
  1078. MMINPAGE_SUPPORT,
  1079. ListEntry);
  1080. #if DBG
  1081. InPageSupport->ListEntry.Next = NULL;
  1082. #endif
  1083. MiFreeInPageSupportBlock (InPageSupport);
  1084. } while (TRUE);
  1085. return STATUS_INSUFFICIENT_RESOURCES;
  1086. }
  1087. //
  1088. // Allocate a dummy page that will map discarded pages that aren't skipped.
  1089. // Do it only if it's not already allocated.
  1090. //
  1091. if (*DummyPagePfn == NULL) {
  1092. MiEnsureAvailablePageOrWait (NULL, NULL);
  1093. DummyPage = MiRemoveAnyPage (0);
  1094. Pfn1 = MI_PFN_ELEMENT (DummyPage);
  1095. ASSERT (Pfn1->u2.ShareCount == 0);
  1096. ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
  1097. MiInitializePfnForOtherProcess (DummyPage, MI_PF_DUMMY_PAGE_PTE, 0);
  1098. //
  1099. // Give the page a containing frame so MiIdentifyPfn won't crash.
  1100. //
  1101. Pfn1->u4.PteFrame = PsInitialSystemProcess->Pcb.DirectoryTableBase[0] >> PAGE_SHIFT;
  1102. //
  1103. // Always bias the reference count by 1 and charge for this locked page
  1104. // up front so the myriad increments and decrements don't get slowed
  1105. // down with needless checking.
  1106. //
  1107. Pfn1->u3.e1.PrototypePte = 0;
  1108. MI_ADD_LOCKED_PAGE_CHARGE(Pfn1, 11);
  1109. Pfn1->u3.e2.ReferenceCount += 1;
  1110. Pfn1->u3.e1.ReadInProgress = 1;
  1111. *DummyPagePfn = Pfn1;
  1112. }
  1113. else {
  1114. Pfn1 = *DummyPagePfn;
  1115. DummyPage = Pfn1 - MmPfnDatabase;
  1116. }
  1117. DummyPfn1 = Pfn1;
  1118. PrevEntry = NULL;
  1119. NextEntry = ReadList->InPageSupportHead.Next;
  1120. while (NextEntry != NULL) {
  1121. InPageSupport = CONTAINING_RECORD(NextEntry,
  1122. MMINPAGE_SUPPORT,
  1123. ListEntry);
  1124. Rle = (PMI_READ_LIST_ENTRY)InPageSupport->BasePte;
  1125. RleMax = (PMI_READ_LIST_ENTRY)InPageSupport->FilePointer;
  1126. ASSERT (Rle->u1.PrototypePte != NULL);
  1127. ASSERT (RleMax->u1.PrototypePte != NULL);
  1128. //
  1129. // Properly initialize the inpage support block fields we overloaded.
  1130. //
  1131. InPageSupport->BasePte = MI_RLEPROTO_TO_PROTO (Rle->u1.PrototypePte);
  1132. InPageSupport->FilePointer = ReadList->FileObject;
  1133. FirstRleInRun = Rle;
  1134. FirstRlePrototypeInRun = MI_RLEPROTO_TO_PROTO (Rle->u1.PrototypePte);
  1135. RleMax += 1;
  1136. Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport);
  1137. Page = (PPFN_NUMBER)(Mdl + 1);
  1138. StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
  1139. MdlPages = ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
  1140. Mdl->ByteCount);
  1141. //
  1142. // Default the MDL entry to the dummy page as the RLE PTEs may
  1143. // be noncontiguous and we have no way to distinguish the jumps.
  1144. //
  1145. for (i = 0; i < MdlPages; i += 1) {
  1146. *Page = DummyPage;
  1147. Page += 1;
  1148. }
  1149. DummyReferences += MdlPages;
  1150. if (DummyPfn1->u3.e2.ReferenceCount + MdlPages >= MAXUSHORT) {
  1151. //
  1152. // The USHORT ReferenceCount wrapped.
  1153. //
  1154. // Dequeue all remaining inpage blocks.
  1155. //
  1156. UNLOCK_PFN (OldIrql);
  1157. if (PrevEntry != NULL) {
  1158. PrevEntry->Next = NULL;
  1159. }
  1160. else {
  1161. ReadList->InPageSupportHead.Next = NULL;
  1162. }
  1163. do {
  1164. InPageSupport = CONTAINING_RECORD(NextEntry,
  1165. MMINPAGE_SUPPORT,
  1166. ListEntry);
  1167. #if DBG
  1168. InPageSupport->ListEntry.Next = NULL;
  1169. #endif
  1170. NextEntry = NextEntry->Next;
  1171. MiFreeInPageSupportBlock (InPageSupport);
  1172. } while (NextEntry != NULL);
  1173. LOCK_PFN (OldIrql);
  1174. break;
  1175. }
  1176. DummyPfn1->u3.e2.ReferenceCount =
  1177. (USHORT)(DummyPfn1->u3.e2.ReferenceCount + MdlPages);
  1178. NumberOfPages = 0;
  1179. Waited = FALSE;
  1180. //
  1181. // Build the proper InPageSupport and MDL to describe this run.
  1182. //
  1183. for (; Rle < RleMax; Rle += 1) {
  1184. //
  1185. // Fill the MDL entry for this RLE.
  1186. //
  1187. RlePrototypePte = MI_RLEPROTO_TO_PROTO (Rle->u1.PrototypePte);
  1188. if (RlePrototypePte == NULL) {
  1189. continue;
  1190. }
  1191. //
  1192. // The RlePrototypePte better be inside a prototype PTE allocation
  1193. // so that subsequent page trims update the correct PTEs.
  1194. //
  1195. ASSERT (((RlePrototypePte >= (PMMPTE)MmPagedPoolStart) &&
  1196. (RlePrototypePte <= (PMMPTE)MmPagedPoolEnd)) ||
  1197. ((RlePrototypePte >= (PMMPTE)MmSpecialPoolStart) && (RlePrototypePte <= (PMMPTE)MmSpecialPoolEnd)));
  1198. //
  1199. // This is a page that our first pass which ran lock-free decided
  1200. // needed to be read. Here this must be rechecked as the page
  1201. // state could have changed. Note this check is final as the
  1202. // PFN lock is held. The PTE must be put in transition with
  1203. // read in progress before the PFN lock is released.
  1204. //
  1205. //
  1206. // Lock page containing prototype PTEs in memory by
  1207. // incrementing the reference count for the page.
  1208. // Unlock any page locked earlier containing prototype PTEs if
  1209. // the containing page is not the same for both.
  1210. //
  1211. if (PfnProto != NULL) {
  1212. if (PointerPde != MiGetPteAddress (RlePrototypePte)) {
  1213. ASSERT (PfnProto->u3.e2.ReferenceCount > 1);
  1214. MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(PfnProto, 5);
  1215. PfnProto = NULL;
  1216. }
  1217. }
  1218. if (PfnProto == NULL) {
  1219. ASSERT (!MI_IS_PHYSICAL_ADDRESS (RlePrototypePte));
  1220. PointerPde = MiGetPteAddress (RlePrototypePte);
  1221. if (PointerPde->u.Hard.Valid == 0) {
  1222. //
  1223. // Set Waited to TRUE if we ever release the PFN lock as
  1224. // that means a release path below must factor this in.
  1225. //
  1226. if (MiMakeSystemAddressValidPfn (RlePrototypePte) == TRUE) {
  1227. Waited = TRUE;
  1228. }
  1229. MiMakeSystemAddressValidPfn (RlePrototypePte);
  1230. }
  1231. PfnProto = MI_PFN_ELEMENT (PointerPde->u.Hard.PageFrameNumber);
  1232. MI_ADD_LOCKED_PAGE_CHARGE(PfnProto, 4);
  1233. PfnProto->u3.e2.ReferenceCount += 1;
  1234. ASSERT (PfnProto->u3.e2.ReferenceCount > 1);
  1235. }
  1236. PteContents = *(RlePrototypePte);
  1237. if (PteContents.u.Hard.Valid == 1) {
  1238. //
  1239. // The page has become resident since the last pass. Don't
  1240. // include it.
  1241. //
  1242. NOTHING;
  1243. }
  1244. else if (PteContents.u.Soft.Prototype == 0) {
  1245. //
  1246. // The page is either in transition (so don't prefetch it).
  1247. //
  1248. // - OR -
  1249. //
  1250. // it is now pagefile (or demand zero) backed - in which case
  1251. // prefetching it from the file here would cause us to lose
  1252. // the contents. Note this can happen for session-space images
  1253. // as we back modified (ie: for relocation fixups or IAT
  1254. // updated) portions from the pagefile.
  1255. //
  1256. NOTHING;
  1257. }
  1258. else if ((MmAvailablePages >= MINIMUM_AVAILABLE_PAGES) &&
  1259. (MI_NONPAGABLE_MEMORY_AVAILABLE() >= MINIMUM_AVAILABLE_PAGES)) {
  1260. NumberOfPages += 1;
  1261. //
  1262. // Allocate a physical page.
  1263. //
  1264. PageColor = MI_PAGE_COLOR_VA_PROCESS (
  1265. MiGetVirtualAddressMappedByPte (RlePrototypePte),
  1266. &CurrentProcess->NextPageColor
  1267. );
  1268. if (Rle->u1.e1.Partial == 1) {
  1269. //
  1270. // This read crosses the end of a subsection, get a zeroed
  1271. // page and correct the read size.
  1272. //
  1273. PageFrameIndex = MiRemoveZeroPage (PageColor);
  1274. }
  1275. else {
  1276. PageFrameIndex = MiRemoveAnyPage (PageColor);
  1277. }
  1278. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  1279. ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
  1280. ASSERT (Pfn1->u2.ShareCount == 0);
  1281. ASSERT (RlePrototypePte->u.Hard.Valid == 0);
  1282. //
  1283. // Initialize read-in-progress PFN.
  1284. //
  1285. MiInitializePfn (PageFrameIndex, RlePrototypePte, 0);
  1286. //
  1287. // These pieces of MiInitializePfn initialization are overridden
  1288. // here as these pages are only going into prototype
  1289. // transition and not into any page tables.
  1290. //
  1291. Pfn1->u3.e1.PrototypePte = 1;
  1292. MI_ADD_LOCKED_PAGE_CHARGE(Pfn1, 38);
  1293. Pfn1->u2.ShareCount -= 1;
  1294. Pfn1->u3.e1.PageLocation = ZeroedPageList;
  1295. //
  1296. // Initialize the I/O specific fields.
  1297. //
  1298. ASSERT (FirstRleInRun->u1.PrototypePte != NULL);
  1299. Pfn1->u1.Event = &InPageSupport->Event;
  1300. Pfn1->u3.e1.ReadInProgress = 1;
  1301. ASSERT (Pfn1->u4.InPageError == 0);
  1302. //
  1303. // Increment the PFN reference count in the control area for
  1304. // the subsection.
  1305. //
  1306. ReadList->ControlArea->NumberOfPfnReferences += 1;
  1307. //
  1308. // Put the PTE into the transition state.
  1309. // No TB flush needed as the PTE is still not valid.
  1310. //
  1311. MI_MAKE_TRANSITION_PTE (TempPte,
  1312. PageFrameIndex,
  1313. RlePrototypePte->u.Soft.Protection,
  1314. RlePrototypePte);
  1315. MI_WRITE_INVALID_PTE (RlePrototypePte, TempPte);
  1316. Page = (PPFN_NUMBER)(Mdl + 1);
  1317. ASSERT ((ULONG)(RlePrototypePte - FirstRlePrototypeInRun) < MdlPages);
  1318. *(Page + (RlePrototypePte - FirstRlePrototypeInRun)) = PageFrameIndex;
  1319. }
  1320. else {
  1321. //
  1322. // Failed allocation - this concludes prefetching for this run.
  1323. //
  1324. break;
  1325. }
  1326. }
  1327. //
  1328. // If all the pages were resident, dereference the dummy page references
  1329. // now and notify our caller that I/Os are not necessary. Note that
  1330. // STATUS_SUCCESS must still be returned so our caller knows to continue
  1331. // on to the next readlist.
  1332. //
  1333. if (NumberOfPages == 0) {
  1334. ASSERT (DummyPfn1->u3.e2.ReferenceCount > MdlPages);
  1335. DummyPfn1->u3.e2.ReferenceCount =
  1336. (USHORT)(DummyPfn1->u3.e2.ReferenceCount - MdlPages);
  1337. UNLOCK_PFN (OldIrql);
  1338. if (PrevEntry != NULL) {
  1339. PrevEntry->Next = NextEntry->Next;
  1340. }
  1341. else {
  1342. ReadList->InPageSupportHead.Next = NextEntry->Next;
  1343. }
  1344. NextEntry = NextEntry->Next;
  1345. #if DBG
  1346. InPageSupport->ListEntry.Next = NULL;
  1347. #endif
  1348. MiFreeInPageSupportBlock (InPageSupport);
  1349. LOCK_PFN (OldIrql);
  1350. continue;
  1351. }
  1352. //
  1353. // Carefully trim leading dummy pages.
  1354. //
  1355. Page = (PPFN_NUMBER)(Mdl + 1);
  1356. DummyTrim = 0;
  1357. for (i = 0; i < MdlPages - 1; i += 1) {
  1358. if (*Page == DummyPage) {
  1359. DummyTrim += 1;
  1360. Page += 1;
  1361. }
  1362. else {
  1363. break;
  1364. }
  1365. }
  1366. if (DummyTrim != 0) {
  1367. Mdl->Size =
  1368. (USHORT)(Mdl->Size - (DummyTrim * sizeof(PFN_NUMBER)));
  1369. Mdl->ByteCount -= (ULONG)(DummyTrim * PAGE_SIZE);
  1370. ASSERT (Mdl->ByteCount != 0);
  1371. InPageSupport->ReadOffset.QuadPart += (DummyTrim * PAGE_SIZE);
  1372. DummyPfn1->u3.e2.ReferenceCount =
  1373. (USHORT)(DummyPfn1->u3.e2.ReferenceCount - DummyTrim);
  1374. //
  1375. // Shuffle down the PFNs in the MDL.
  1376. // Recalculate BasePte to adjust for the shuffle.
  1377. //
  1378. Pfn1 = MI_PFN_ELEMENT (*Page);
  1379. ASSERT (Pfn1->PteAddress->u.Hard.Valid == 0);
  1380. ASSERT ((Pfn1->PteAddress->u.Soft.Prototype == 0) &&
  1381. (Pfn1->PteAddress->u.Soft.Transition == 1));
  1382. InPageSupport->BasePte = Pfn1->PteAddress;
  1383. DestinationPage = (PPFN_NUMBER)(Mdl + 1);
  1384. do {
  1385. *DestinationPage = *Page;
  1386. DestinationPage += 1;
  1387. Page += 1;
  1388. i += 1;
  1389. } while (i < MdlPages);
  1390. MdlPages -= DummyTrim;
  1391. }
  1392. //
  1393. // Carefully trim trailing dummy pages.
  1394. //
  1395. StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
  1396. MdlPages = ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
  1397. Mdl->ByteCount);
  1398. ASSERT (MdlPages != 0);
  1399. Page = (PPFN_NUMBER)(Mdl + 1) + MdlPages - 1;
  1400. if (*Page == DummyPage) {
  1401. ASSERT (MdlPages >= 2);
  1402. //
  1403. // Trim the last page specially as it may be a partial page.
  1404. //
  1405. Mdl->Size -= sizeof(PFN_NUMBER);
  1406. if (BYTE_OFFSET(Mdl->ByteCount) != 0) {
  1407. Mdl->ByteCount &= ~(PAGE_SIZE - 1);
  1408. }
  1409. else {
  1410. Mdl->ByteCount -= PAGE_SIZE;
  1411. }
  1412. ASSERT (Mdl->ByteCount != 0);
  1413. DummyPfn1->u3.e2.ReferenceCount -= 1;
  1414. //
  1415. // Now trim any other trailing pages.
  1416. //
  1417. Page -= 1;
  1418. DummyTrim = 0;
  1419. while (Page != ((PPFN_NUMBER)(Mdl + 1))) {
  1420. if (*Page != DummyPage) {
  1421. break;
  1422. }
  1423. DummyTrim += 1;
  1424. Page -= 1;
  1425. }
  1426. if (DummyTrim != 0) {
  1427. ASSERT (Mdl->Size > (USHORT)(DummyTrim * sizeof(PFN_NUMBER)));
  1428. Mdl->Size =
  1429. (USHORT)(Mdl->Size - (DummyTrim * sizeof(PFN_NUMBER)));
  1430. Mdl->ByteCount -= (ULONG)(DummyTrim * PAGE_SIZE);
  1431. DummyPfn1->u3.e2.ReferenceCount =
  1432. (USHORT)(DummyPfn1->u3.e2.ReferenceCount - DummyTrim);
  1433. }
  1434. ASSERT (MdlPages > DummyTrim + 1);
  1435. MdlPages -= (DummyTrim + 1);
  1436. #if DBG
  1437. StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
  1438. ASSERT (MdlPages == ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
  1439. Mdl->ByteCount));
  1440. #endif
  1441. }
  1442. //
  1443. // If the MDL is not already embedded in the inpage block, see if its
  1444. // final size qualifies it - if so, embed it now.
  1445. //
  1446. if ((Mdl != &InPageSupport->Mdl) &&
  1447. (Mdl->ByteCount <= (MM_MAXIMUM_READ_CLUSTER_SIZE + 1) * PAGE_SIZE)){
  1448. #if DBG
  1449. RtlFillMemoryUlong (&InPageSupport->Page[0],
  1450. (MM_MAXIMUM_READ_CLUSTER_SIZE+1) * sizeof (PFN_NUMBER),
  1451. 0xf1f1f1f1);
  1452. #endif
  1453. RtlCopyMemory (&InPageSupport->Mdl, Mdl, Mdl->Size);
  1454. Mdl->Next = FreeMdl;
  1455. FreeMdl = Mdl;
  1456. Mdl = &InPageSupport->Mdl;
  1457. ASSERT (((ULONG_PTR)Mdl & (sizeof(QUAD) - 1)) == 0);
  1458. InPageSupport->u1.e1.PrefetchMdlHighBits = ((ULONG_PTR)Mdl >> 3);
  1459. }
  1460. //
  1461. // If the MDL contains a large number of dummy pages to real pages
  1462. // then just discard it. Only check large MDLs as embedded ones are
  1463. // always worth the I/O.
  1464. //
  1465. // The PFN lock may have been released above during the
  1466. // MiMakeSystemAddressValidPfn call. If so, other threads may
  1467. // have collided on the pages in the prefetch MDL and if so,
  1468. // this I/O must be issued regardless of the inefficiency of
  1469. // dummy pages within it. Otherwise the other threads will
  1470. // hang in limbo forever.
  1471. //
  1472. ASSERT (MdlPages != 0);
  1473. #if DBG
  1474. StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
  1475. ASSERT (MdlPages == ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
  1476. Mdl->ByteCount));
  1477. #endif
  1478. if ((Mdl != &InPageSupport->Mdl) &&
  1479. (Waited == FALSE) &&
  1480. ((MdlPages - NumberOfPages) / DUMMY_RATIO >= NumberOfPages)) {
  1481. if (PrevEntry != NULL) {
  1482. PrevEntry->Next = NextEntry->Next;
  1483. }
  1484. else {
  1485. ReadList->InPageSupportHead.Next = NextEntry->Next;
  1486. }
  1487. NextEntry = NextEntry->Next;
  1488. ASSERT (MI_EXTRACT_PREFETCH_MDL(InPageSupport) == Mdl);
  1489. //
  1490. // Note the pages are individually freed here (rather than just
  1491. // "completing" the I/O with an error) as the PFN lock has
  1492. // never been released since the pages were put in transition.
  1493. // So no collisions on these pages are possible.
  1494. //
  1495. ASSERT (InPageSupport->WaitCount == 1);
  1496. Page = (PPFN_NUMBER)(Mdl + 1) + MdlPages - 1;
  1497. do {
  1498. if (*Page != DummyPage) {
  1499. Pfn1 = MI_PFN_ELEMENT (*Page);
  1500. ASSERT (Pfn1->PteAddress->u.Hard.Valid == 0);
  1501. ASSERT ((Pfn1->PteAddress->u.Soft.Prototype == 0) &&
  1502. (Pfn1->PteAddress->u.Soft.Transition == 1));
  1503. ASSERT (Pfn1->u3.e1.ReadInProgress == 1);
  1504. ASSERT (Pfn1->u3.e1.PrototypePte == 1);
  1505. ASSERT (Pfn1->u3.e2.ReferenceCount == 1);
  1506. ASSERT (Pfn1->u2.ShareCount == 0);
  1507. Pfn1->u3.e1.PageLocation = StandbyPageList;
  1508. Pfn1->u3.e1.ReadInProgress = 0;
  1509. MiRestoreTransitionPte (*Page);
  1510. MI_SET_PFN_DELETED (Pfn1);
  1511. MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(Pfn1, 39);
  1512. }
  1513. Page -= 1;
  1514. } while (Page >= (PPFN_NUMBER)(Mdl + 1));
  1515. ASSERT (InPageSupport->WaitCount == 1);
  1516. ASSERT (DummyPfn1->u3.e2.ReferenceCount > MdlPages);
  1517. DummyPfn1->u3.e2.ReferenceCount =
  1518. (USHORT)(DummyPfn1->u3.e2.ReferenceCount - MdlPages);
  1519. UNLOCK_PFN (OldIrql);
  1520. #if DBG
  1521. InPageSupport->ListEntry.Next = NULL;
  1522. #endif
  1523. MiFreeInPageSupportBlock (InPageSupport);
  1524. LOCK_PFN (OldIrql);
  1525. continue;
  1526. }
  1527. #if DBG
  1528. MiPfDbgDumpReadList (ReadList);
  1529. #endif
  1530. ASSERT ((USHORT)Mdl->Size - sizeof(MDL) == BYTES_TO_PAGES(Mdl->ByteCount) * sizeof(PFN_NUMBER));
  1531. DummyPfn1->u3.e2.ReferenceCount =
  1532. (USHORT)(DummyPfn1->u3.e2.ReferenceCount - NumberOfPages);
  1533. MmInfoCounters.PageReadIoCount += 1;
  1534. MmInfoCounters.PageReadCount += NumberOfPages;
  1535. //
  1536. // March on to the next run and its InPageSupport and MDL.
  1537. //
  1538. PrevEntry = NextEntry;
  1539. NextEntry = NextEntry->Next;
  1540. }
  1541. //
  1542. // Unlock page containing prototype PTEs.
  1543. //
  1544. if (PfnProto != NULL) {
  1545. ASSERT (PfnProto->u3.e2.ReferenceCount > 1);
  1546. MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(PfnProto, 5);
  1547. }
  1548. UNLOCK_PFN (OldIrql);
  1549. #if DBG
  1550. if (MiPfDebug & MI_PF_DELAY) {
  1551. //
  1552. // This delay provides a window to increase the chance of collided
  1553. // faults.
  1554. //
  1555. KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmHalfSecond);
  1556. }
  1557. #endif
  1558. //
  1559. // Free any collapsed MDLs that are no longer needed.
  1560. //
  1561. while (FreeMdl != NULL) {
  1562. Mdl = FreeMdl->Next;
  1563. ExFreePool (FreeMdl);
  1564. FreeMdl = Mdl;
  1565. }
  1566. return STATUS_SUCCESS;
  1567. }
  1568. VOID
  1569. MiPfExecuteReadList (
  1570. IN PMI_READ_LIST ReadList
  1571. )
  1572. /*++
  1573. Routine Description:
  1574. This routine executes the read list by issuing paging I/Os for all
  1575. runs described in the read-list.
  1576. Arguments:
  1577. ReadList - Pointer to the read-list.
  1578. Return Value:
  1579. None.
  1580. Environment:
  1581. Kernel mode, PASSIVE_LEVEL.
  1582. --*/
  1583. {
  1584. PMDL Mdl;
  1585. NTSTATUS status;
  1586. PMMPFN Pfn1;
  1587. PMMPTE LocalPrototypePte;
  1588. PFN_NUMBER PageFrameIndex;
  1589. PSINGLE_LIST_ENTRY NextEntry;
  1590. PMMINPAGE_SUPPORT InPageSupport;
  1591. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  1592. NextEntry = ReadList->InPageSupportHead.Next;
  1593. while (NextEntry != NULL) {
  1594. InPageSupport = CONTAINING_RECORD(NextEntry,
  1595. MMINPAGE_SUPPORT,
  1596. ListEntry);
  1597. //
  1598. // Initialize the prefetch MDL.
  1599. //
  1600. Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport);
  1601. ASSERT ((Mdl->MdlFlags & MDL_MAPPED_TO_SYSTEM_VA) == 0);
  1602. Mdl->MdlFlags |= (MDL_PAGES_LOCKED | MDL_IO_PAGE_READ);
  1603. ASSERT (InPageSupport->u1.e1.Completed == 0);
  1604. ASSERT (InPageSupport->Thread == PsGetCurrentThread());
  1605. ASSERT64 (InPageSupport->UsedPageTableEntries == 0);
  1606. ASSERT (InPageSupport->WaitCount >= 1);
  1607. ASSERT (InPageSupport->u1.e1.PrefetchMdlHighBits != 0);
  1608. //
  1609. // Initialize the inpage support block fields we overloaded.
  1610. //
  1611. ASSERT (InPageSupport->FilePointer == ReadList->FileObject);
  1612. LocalPrototypePte = InPageSupport->BasePte;
  1613. ASSERT (LocalPrototypePte->u.Hard.Valid == 0);
  1614. ASSERT ((LocalPrototypePte->u.Soft.Prototype == 0) &&
  1615. (LocalPrototypePte->u.Soft.Transition == 1));
  1616. PageFrameIndex = MI_GET_PAGE_FRAME_FROM_TRANSITION_PTE(LocalPrototypePte);
  1617. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  1618. InPageSupport->Pfn = Pfn1;
  1619. status = IoAsynchronousPageRead (InPageSupport->FilePointer,
  1620. Mdl,
  1621. &InPageSupport->ReadOffset,
  1622. &InPageSupport->Event,
  1623. &InPageSupport->IoStatus);
  1624. if (!NT_SUCCESS (status)) {
  1625. //
  1626. // Set the event as the I/O system doesn't set it on errors.
  1627. //
  1628. InPageSupport->IoStatus.Status = status;
  1629. InPageSupport->IoStatus.Information = 0;
  1630. KeSetEvent (&InPageSupport->Event, 0, FALSE);
  1631. }
  1632. NextEntry = NextEntry->Next;
  1633. }
  1634. #if DBG
  1635. if (MiPfDebug & MI_PF_DELAY) {
  1636. //
  1637. // This delay provides a window to increase the chance of collided
  1638. // faults.
  1639. //
  1640. KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmHalfSecond);
  1641. }
  1642. #endif
  1643. }
  1644. VOID
  1645. MiPfCompletePrefetchIos (
  1646. IN PMI_READ_LIST ReadList
  1647. )
  1648. /*++
  1649. Routine Description:
  1650. This routine waits for a series of page reads to complete
  1651. and completes the requests.
  1652. Arguments:
  1653. ReadList - Pointer to the read-list.
  1654. Return Value:
  1655. None.
  1656. Environment:
  1657. Kernel mode, PASSIVE_LEVEL.
  1658. --*/
  1659. {
  1660. PMDL Mdl;
  1661. PMMPFN Pfn1;
  1662. PMMPFN PfnClusterPage;
  1663. PPFN_NUMBER Page;
  1664. NTSTATUS status;
  1665. LONG NumberOfBytes;
  1666. PMMINPAGE_SUPPORT InPageSupport;
  1667. PSINGLE_LIST_ENTRY NextEntry;
  1668. extern ULONG MmFrontOfList;
  1669. ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
  1670. do {
  1671. NextEntry = PopEntryList(&ReadList->InPageSupportHead);
  1672. if (NextEntry == NULL) {
  1673. break;
  1674. }
  1675. InPageSupport = CONTAINING_RECORD(NextEntry,
  1676. MMINPAGE_SUPPORT,
  1677. ListEntry);
  1678. ASSERT (InPageSupport->Pfn != 0);
  1679. Pfn1 = InPageSupport->Pfn;
  1680. Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport);
  1681. Page = (PPFN_NUMBER)(Mdl + 1);
  1682. status = MiWaitForInPageComplete (InPageSupport->Pfn,
  1683. InPageSupport->BasePte,
  1684. NULL,
  1685. InPageSupport->BasePte,
  1686. InPageSupport,
  1687. PREFETCH_PROCESS);
  1688. //
  1689. // MiWaitForInPageComplete RETURNS WITH THE PFN LOCK HELD!!!
  1690. //
  1691. //
  1692. // If we are prefetching for boot, insert prefetched pages to the front
  1693. // of the list. Otherwise the pages prefetched first end up susceptible
  1694. // at the front of the list as we prefetch more. We prefetch pages in
  1695. // the order they will be used. When there is a spike in memory usage
  1696. // and there is no free memory, we lose these pages before we can
  1697. // get cache-hits on them. Thus boot gets ahead and starts discarding
  1698. // prefetched pages that it could use just a little later.
  1699. //
  1700. if (CCPF_IS_PREFETCHING_FOR_BOOT()) {
  1701. MmFrontOfList = TRUE;
  1702. }
  1703. NumberOfBytes = (LONG)Mdl->ByteCount;
  1704. while (NumberOfBytes > 0) {
  1705. //
  1706. // Decrement all reference counts.
  1707. //
  1708. PfnClusterPage = MI_PFN_ELEMENT (*Page);
  1709. #if DBG
  1710. if (PfnClusterPage->u4.InPageError) {
  1711. //
  1712. // If the page is marked with an error, then the whole transfer
  1713. // must be marked as not successful as well. The only exception
  1714. // is the prefetch dummy page which is used in multiple
  1715. // transfers concurrently and thus may have the inpage error
  1716. // bit set at any time (due to another transaction besides
  1717. // the current one).
  1718. //
  1719. ASSERT ((status != STATUS_SUCCESS) ||
  1720. (PfnClusterPage->PteAddress == MI_PF_DUMMY_PAGE_PTE));
  1721. }
  1722. #endif
  1723. if (PfnClusterPage->u3.e1.ReadInProgress != 0) {
  1724. ASSERT (PfnClusterPage->u4.PteFrame != MI_MAGIC_AWE_PTEFRAME);
  1725. PfnClusterPage->u3.e1.ReadInProgress = 0;
  1726. if (PfnClusterPage->u4.InPageError == 0) {
  1727. PfnClusterPage->u1.Event = NULL;
  1728. }
  1729. }
  1730. MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(PfnClusterPage, 39);
  1731. Page += 1;
  1732. NumberOfBytes -= PAGE_SIZE;
  1733. }
  1734. //
  1735. // If we were inserting prefetched pages to front of standby list
  1736. // for boot prefetching, stop it before we release the pfn lock.
  1737. //
  1738. MmFrontOfList = FALSE;
  1739. if (status != STATUS_SUCCESS) {
  1740. //
  1741. // An I/O error occurred during the page read
  1742. // operation. All the pages which were just
  1743. // put into transition must be put onto the
  1744. // free list if InPageError is set, and their
  1745. // PTEs restored to the proper contents.
  1746. //
  1747. Page = (PPFN_NUMBER)(Mdl + 1);
  1748. NumberOfBytes = (LONG)Mdl->ByteCount;
  1749. while (NumberOfBytes > 0) {
  1750. PfnClusterPage = MI_PFN_ELEMENT (*Page);
  1751. if (PfnClusterPage->u4.InPageError == 1) {
  1752. if (PfnClusterPage->u3.e2.ReferenceCount == 0) {
  1753. ASSERT (PfnClusterPage->u3.e1.PageLocation ==
  1754. StandbyPageList);
  1755. MiUnlinkPageFromList (PfnClusterPage);
  1756. MiRestoreTransitionPte (*Page);
  1757. MiInsertPageInFreeList (*Page);
  1758. }
  1759. }
  1760. Page += 1;
  1761. NumberOfBytes -= PAGE_SIZE;
  1762. }
  1763. }
  1764. //
  1765. // All the relevant prototype PTEs should be in transition state.
  1766. //
  1767. //
  1768. // We took out an extra reference on the inpage block to prevent
  1769. // MiWaitForInPageComplete from freeing it (and the MDL), since we
  1770. // needed to process the MDL above. Now let it go for good.
  1771. //
  1772. ASSERT (InPageSupport->WaitCount >= 1);
  1773. UNLOCK_PFN (PASSIVE_LEVEL);
  1774. #if DBG
  1775. InPageSupport->ListEntry.Next = NULL;
  1776. #endif
  1777. MiFreeInPageSupportBlock (InPageSupport);
  1778. } while (TRUE);
  1779. }
  1780. #if DBG
  1781. VOID
  1782. MiPfDbgDumpReadList (
  1783. IN PMI_READ_LIST ReadList
  1784. )
  1785. /*++
  1786. Routine Description:
  1787. This routine dumps the given read-list range to the debugger.
  1788. Arguments:
  1789. ReadList - Pointer to the read-list.
  1790. Return Value:
  1791. None.
  1792. Environment:
  1793. Kernel mode.
  1794. --*/
  1795. {
  1796. ULONG i;
  1797. PMDL Mdl;
  1798. PMMPFN Pfn1;
  1799. PMMPTE LocalPrototypePte;
  1800. PFN_NUMBER PageFrameIndex;
  1801. PMMINPAGE_SUPPORT InPageSupport;
  1802. PSINGLE_LIST_ENTRY NextEntry;
  1803. PPFN_NUMBER Page;
  1804. PVOID StartingVa;
  1805. PFN_NUMBER MdlPages;
  1806. LARGE_INTEGER ReadOffset;
  1807. if ((MiPfDebug & MI_PF_VERBOSE) == 0) {
  1808. return;
  1809. }
  1810. DbgPrint ("\nPF: Dumping read-list %x (FileObject %x ControlArea %x)\n\n",
  1811. ReadList, ReadList->FileObject, ReadList->ControlArea);
  1812. DbgPrint ("\tFileOffset | Pte | Pfn \n"
  1813. "\t-----------+---------------+----------\n");
  1814. NextEntry = ReadList->InPageSupportHead.Next;
  1815. while (NextEntry != NULL) {
  1816. InPageSupport = CONTAINING_RECORD(NextEntry,
  1817. MMINPAGE_SUPPORT,
  1818. ListEntry);
  1819. ReadOffset = InPageSupport->ReadOffset;
  1820. Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport);
  1821. Page = (PPFN_NUMBER)(Mdl + 1);
  1822. #if DBG
  1823. //
  1824. // MDL isn't filled in yet, skip it.
  1825. //
  1826. if (*Page == MM_EMPTY_LIST) {
  1827. NextEntry = NextEntry->Next;
  1828. continue;
  1829. }
  1830. #endif
  1831. StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
  1832. MdlPages = ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
  1833. Mdl->ByteCount);
  1834. //
  1835. // Default the MDL entry to the dummy page as the RLE PTEs may
  1836. // be noncontiguous and we have no way to distinguish the jumps.
  1837. //
  1838. for (i = 0; i < MdlPages; i += 1) {
  1839. PageFrameIndex = *Page;
  1840. Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
  1841. LocalPrototypePte = Pfn1->PteAddress;
  1842. if (LocalPrototypePte != MI_PF_DUMMY_PAGE_PTE) {
  1843. ASSERT (LocalPrototypePte->u.Hard.Valid == 0);
  1844. ASSERT ((LocalPrototypePte->u.Soft.Prototype == 0) &&
  1845. (LocalPrototypePte->u.Soft.Transition == 1));
  1846. }
  1847. DbgPrint ("\t %8x | %8x | %8x\n",
  1848. ReadOffset.LowPart,
  1849. LocalPrototypePte,
  1850. PageFrameIndex);
  1851. Page += 1;
  1852. ReadOffset.LowPart += PAGE_SIZE;
  1853. }
  1854. NextEntry = NextEntry->Next;
  1855. }
  1856. DbgPrint ("\t\n");
  1857. }
  1858. VOID
  1859. MiRemoveUserPages (
  1860. VOID
  1861. )
  1862. /*++
  1863. Routine Description:
  1864. This routine removes user space pages.
  1865. Arguments:
  1866. None.
  1867. Return Value:
  1868. Number of pages removed.
  1869. Environment:
  1870. Kernel mode.
  1871. --*/
  1872. {
  1873. InterlockedIncrement (&MiDelayPageFaults);
  1874. MmEmptyAllWorkingSets ();
  1875. MiFlushAllPages ();
  1876. InterlockedDecrement (&MiDelayPageFaults);
  1877. //
  1878. // Run the transition list and free all the entries so transition
  1879. // faults are not satisfied for any of the non modified pages that were
  1880. // freed.
  1881. //
  1882. MiPurgeTransitionList ();
  1883. }
  1884. #endif