You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2581 lines
74 KiB
2581 lines
74 KiB
/*++
|
|
|
|
Copyright (c) 1999 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
pfsup.c
|
|
|
|
Abstract:
|
|
|
|
This module contains the Mm support routines for prefetching groups of pages
|
|
from secondary storage.
|
|
|
|
The caller builds a list of various file objects and logical block offsets,
|
|
passing them to MmPrefetchPages. The code here then examines the
|
|
internal pages, reading in those that are not already valid or in
|
|
transition. These pages are read with a single read, using a dummy page
|
|
to bridge small gaps. If the gap is "large", then separate reads are
|
|
issued.
|
|
|
|
Upon conclusion of all the I/Os, control is returned to the calling
|
|
thread, and any pages that needed to be read are placed in transition
|
|
within the prototype PTE-managed segments. Thus any future references
|
|
to these pages should result in soft faults only, provided these pages
|
|
do not themselves get trimmed under memory pressure.
|
|
|
|
Author:
|
|
|
|
Landy Wang (landyw) 09-Jul-1999
|
|
|
|
Revision History:
|
|
|
|
--*/
|
|
|
|
#include "mi.h"
|
|
|
|
#if DBG
|
|
|
|
ULONG MiPfDebug;
|
|
|
|
#define MI_PF_FORCE_PREFETCH 0x1 // Trim all user pages to force prefetch
|
|
#define MI_PF_DELAY 0x2 // Delay hoping to trigger collisions
|
|
#define MI_PF_VERBOSE 0x4 // Verbose printing
|
|
#define MI_PF_PRINT_ERRORS 0x8 // Print to debugger on errors
|
|
|
|
#endif
|
|
|
|
//
|
|
// If an MDL contains DUMMY_RATIO times as many dummy pages as real pages
|
|
// then don't bother with the read.
|
|
//
|
|
|
|
#define DUMMY_RATIO 16
|
|
|
|
//
|
|
// If two consecutive read-list entries are more than "seek threshold"
|
|
// distance apart, the read-list is split between these entries. Otherwise
|
|
// the dummy page is used for the gap and only one MDL is used.
|
|
//
|
|
|
|
#define SEEK_THRESHOLD ((128 * 1024) / PAGE_SIZE)
|
|
|
|
//
|
|
// Minimum number of pages to prefetch per section.
|
|
//
|
|
|
|
#define MINIMUM_READ_LIST_PAGES 1
|
|
|
|
//
|
|
// Read-list structures.
|
|
//
|
|
|
|
typedef struct _RLETYPE {
|
|
ULONG_PTR Partial : 1; // This entry is a partial page.
|
|
ULONG_PTR NewSubsection : 1; // This entry starts in the next subsection.
|
|
ULONG_PTR DontUse : 30;
|
|
} RLETYPE;
|
|
|
|
typedef struct _MI_READ_LIST_ENTRY {
|
|
|
|
union {
|
|
PMMPTE PrototypePte;
|
|
RLETYPE e1;
|
|
} u1;
|
|
|
|
} MI_READ_LIST_ENTRY, *PMI_READ_LIST_ENTRY;
|
|
|
|
#define MI_RLEPROTO_BITS 3
|
|
|
|
#define MI_RLEPROTO_TO_PROTO(ProtoPte) ((PMMPTE)((ULONG_PTR)ProtoPte & ~MI_RLEPROTO_BITS))
|
|
|
|
typedef struct _MI_READ_LIST {
|
|
|
|
PCONTROL_AREA ControlArea;
|
|
PFILE_OBJECT FileObject;
|
|
ULONG LastPteOffsetReferenced;
|
|
|
|
//
|
|
// Note that entries are chained through the inpage support blocks from
|
|
// this listhead. This list is not protected by interlocks because it is
|
|
// only accessed by the owning thread. Inpage blocks _ARE_ accessed with
|
|
// interlocks when they are inserted or removed from the memory management
|
|
// freelists, but by the time they get to this module they are decoupled.
|
|
//
|
|
|
|
SINGLE_LIST_ENTRY InPageSupportHead;
|
|
|
|
MI_READ_LIST_ENTRY List[ANYSIZE_ARRAY];
|
|
|
|
} MI_READ_LIST, *PMI_READ_LIST;
|
|
|
|
VOID
|
|
MiPfReleaseSubsectionReferences (
|
|
IN PMI_READ_LIST MiReadList
|
|
);
|
|
|
|
VOID
|
|
MiPfFreeDummyPage (
|
|
IN PMMPFN DummyPagePfn
|
|
);
|
|
|
|
NTSTATUS
|
|
MiPfPrepareReadList (
|
|
IN PREAD_LIST ReadList,
|
|
OUT PMI_READ_LIST *OutMiReadList
|
|
);
|
|
|
|
NTSTATUS
|
|
MiPfPutPagesInTransition (
|
|
IN PMI_READ_LIST ReadList,
|
|
IN OUT PMMPFN *DummyPagePfn
|
|
);
|
|
|
|
VOID
|
|
MiPfExecuteReadList (
|
|
IN PMI_READ_LIST ReadList
|
|
);
|
|
|
|
VOID
|
|
MiPfCompletePrefetchIos (
|
|
PMI_READ_LIST ReadList
|
|
);
|
|
|
|
#if DBG
|
|
VOID
|
|
MiPfDbgDumpReadList (
|
|
IN PMI_READ_LIST ReadList
|
|
);
|
|
|
|
VOID
|
|
MiRemoveUserPages (
|
|
VOID
|
|
);
|
|
#endif
|
|
|
|
#ifdef ALLOC_PRAGMA
|
|
#pragma alloc_text (PAGE, MmPrefetchPages)
|
|
#pragma alloc_text (PAGE, MiPfPrepareReadList)
|
|
#pragma alloc_text (PAGE, MiPfExecuteReadList)
|
|
#pragma alloc_text (PAGE, MiPfReleaseSubsectionReferences)
|
|
#endif
|
|
|
|
|
|
NTSTATUS
|
|
MmPrefetchPages (
|
|
IN ULONG NumberOfLists,
|
|
IN PREAD_LIST *ReadLists
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine reads pages described in the read-lists in the optimal fashion.
|
|
|
|
This is the only externally callable prefetch routine. No component
|
|
should use this interface except the cache manager.
|
|
|
|
Arguments:
|
|
|
|
NumberOfLists - Supplies the number of read-lists.
|
|
|
|
ReadLists - Supplies an array of read-lists.
|
|
|
|
Return Value:
|
|
|
|
NTSTATUS codes.
|
|
|
|
Environment:
|
|
|
|
Kernel mode. PASSIVE_LEVEL.
|
|
|
|
--*/
|
|
|
|
{
|
|
PMI_READ_LIST *MiReadLists;
|
|
PMMPFN DummyPagePfn;
|
|
NTSTATUS status;
|
|
ULONG i;
|
|
LOGICAL ReadBuilt;
|
|
LOGICAL ApcNeeded;
|
|
PETHREAD CurrentThread;
|
|
NTSTATUS CauseOfReadBuildFailures;
|
|
|
|
ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
|
|
|
|
//
|
|
// Allocate memory for internal Mi read-lists.
|
|
//
|
|
|
|
MiReadLists = (PMI_READ_LIST *) ExAllocatePoolWithTag (
|
|
NonPagedPool,
|
|
sizeof (PMI_READ_LIST) * NumberOfLists,
|
|
'lRmM'
|
|
);
|
|
|
|
if (MiReadLists == NULL) {
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
ReadBuilt = FALSE;
|
|
CauseOfReadBuildFailures = STATUS_SUCCESS;
|
|
|
|
//
|
|
// Prepare read-lists (determine runs and allocate MDLs).
|
|
//
|
|
|
|
for (i = 0; i < NumberOfLists; i += 1) {
|
|
|
|
//
|
|
// Note any non-null list is referenced by this call so this routine
|
|
// must dereference it when done to re-enable dynamic prototype PTEs.
|
|
//
|
|
|
|
status = MiPfPrepareReadList (ReadLists[i], &MiReadLists[i]);
|
|
|
|
//
|
|
// MiPfPrepareReadList never returns half-formed inpage support
|
|
// blocks and MDLs. Either nothing is returned, partial lists are
|
|
// returned or a complete list is returned. Any non-null list
|
|
// can therefore be processed.
|
|
//
|
|
|
|
if (NT_SUCCESS (status)) {
|
|
if (MiReadLists[i] != NULL) {
|
|
ASSERT (MiReadLists[i]->InPageSupportHead.Next != NULL);
|
|
ReadBuilt = TRUE;
|
|
}
|
|
}
|
|
else {
|
|
CauseOfReadBuildFailures = status;
|
|
}
|
|
}
|
|
|
|
if (ReadBuilt == FALSE) {
|
|
|
|
//
|
|
// No lists were created so nothing further needs to be done.
|
|
// CauseOfReadBuildFailures tells us whether this was due to all
|
|
// the desired pages already being resident or that resources to
|
|
// build the request could not be allocated.
|
|
//
|
|
|
|
ExFreePool (MiReadLists);
|
|
|
|
if (CauseOfReadBuildFailures != STATUS_SUCCESS) {
|
|
return CauseOfReadBuildFailures;
|
|
}
|
|
|
|
//
|
|
// All the pages the caller asked for are already resident.
|
|
//
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
//
|
|
// APCs must be disabled once we put a page in transition. Otherwise
|
|
// a thread suspend will stop us from issuing the I/O - this will hang
|
|
// any other threads that need the same page.
|
|
//
|
|
|
|
CurrentThread = PsGetCurrentThread();
|
|
ApcNeeded = FALSE;
|
|
|
|
ASSERT ((PKTHREAD)CurrentThread == KeGetCurrentThread ());
|
|
KeEnterCriticalRegionThread ((PKTHREAD)CurrentThread);
|
|
|
|
//
|
|
// The nested fault count protects this thread from deadlocks where a
|
|
// special kernel APC fires and references the same user page(s) we are
|
|
// putting in transition.
|
|
//
|
|
|
|
KeEnterGuardedRegionThread (&CurrentThread->Tcb);
|
|
|
|
ASSERT (CurrentThread->NestedFaultCount == 0);
|
|
CurrentThread->NestedFaultCount += 1;
|
|
|
|
KeLeaveGuardedRegionThread (&CurrentThread->Tcb);
|
|
|
|
//
|
|
// Allocate physical memory.
|
|
//
|
|
|
|
DummyPagePfn = NULL;
|
|
ReadBuilt = FALSE;
|
|
CauseOfReadBuildFailures = STATUS_SUCCESS;
|
|
|
|
#if DBG
|
|
status = 0xC0033333;
|
|
#endif
|
|
|
|
for (i = 0; i < NumberOfLists; i += 1) {
|
|
|
|
if ((MiReadLists[i] != NULL) &&
|
|
(MiReadLists[i]->InPageSupportHead.Next != NULL)) {
|
|
|
|
status = MiPfPutPagesInTransition (MiReadLists[i], &DummyPagePfn);
|
|
|
|
if (NT_SUCCESS (status)) {
|
|
if (MiReadLists[i]->InPageSupportHead.Next != NULL) {
|
|
ReadBuilt = TRUE;
|
|
|
|
//
|
|
// Issue I/Os.
|
|
//
|
|
|
|
MiPfExecuteReadList (MiReadLists[i]);
|
|
}
|
|
else {
|
|
MiPfReleaseSubsectionReferences (MiReadLists[i]);
|
|
ExFreePool (MiReadLists[i]);
|
|
MiReadLists[i] = NULL;
|
|
}
|
|
}
|
|
else {
|
|
|
|
CauseOfReadBuildFailures = status;
|
|
|
|
//
|
|
// If not even a single page is available then don't bother
|
|
// trying to prefetch anything else.
|
|
//
|
|
|
|
for (; i < NumberOfLists; i += 1) {
|
|
if (MiReadLists[i] != NULL) {
|
|
MiPfReleaseSubsectionReferences (MiReadLists[i]);
|
|
ExFreePool (MiReadLists[i]);
|
|
MiReadLists[i] = NULL;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// At least one call to MiPfPutPagesInTransition was made, which
|
|
// sets status properly.
|
|
//
|
|
|
|
ASSERT (status != 0xC0033333);
|
|
|
|
if (ReadBuilt == TRUE) {
|
|
|
|
status = STATUS_SUCCESS;
|
|
|
|
//
|
|
// Wait for I/Os to complete. Note APCs must remain disabled.
|
|
//
|
|
|
|
for (i = 0; i < NumberOfLists; i += 1) {
|
|
|
|
if (MiReadLists[i] != NULL) {
|
|
|
|
ASSERT (MiReadLists[i]->InPageSupportHead.Next != NULL);
|
|
|
|
MiPfCompletePrefetchIos (MiReadLists[i]);
|
|
|
|
MiPfReleaseSubsectionReferences (MiReadLists[i]);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
|
|
//
|
|
// No reads were issued.
|
|
//
|
|
// CauseOfReadBuildFailures tells us whether this was due to all
|
|
// the desired pages already being resident or that resources to
|
|
// build the request could not be allocated.
|
|
//
|
|
|
|
status = CauseOfReadBuildFailures;
|
|
}
|
|
|
|
//
|
|
// Put DummyPage back on the free list.
|
|
//
|
|
|
|
if (DummyPagePfn != NULL) {
|
|
MiPfFreeDummyPage (DummyPagePfn);
|
|
}
|
|
|
|
//
|
|
// Only when all the I/Os have been completed (not just issued) can
|
|
// APCs be re-enabled. This prevents a user-issued suspend APC from
|
|
// keeping a shared page in transition forever.
|
|
//
|
|
|
|
KeEnterGuardedRegionThread (&CurrentThread->Tcb);
|
|
|
|
ASSERT (CurrentThread->NestedFaultCount == 1);
|
|
|
|
CurrentThread->NestedFaultCount -= 1;
|
|
|
|
if (CurrentThread->ApcNeeded == 1) {
|
|
ApcNeeded = TRUE;
|
|
CurrentThread->ApcNeeded = 0;
|
|
}
|
|
|
|
KeLeaveGuardedRegionThread (&CurrentThread->Tcb);
|
|
|
|
KeLeaveCriticalRegionThread ((PKTHREAD)CurrentThread);
|
|
|
|
for (i = 0; i < NumberOfLists; i += 1) {
|
|
if (MiReadLists[i] != NULL) {
|
|
ExFreePool (MiReadLists[i]);
|
|
}
|
|
}
|
|
|
|
ExFreePool (MiReadLists);
|
|
|
|
ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
|
|
ASSERT (CurrentThread->NestedFaultCount == 0);
|
|
ASSERT (CurrentThread->ApcNeeded == 0);
|
|
|
|
if (ApcNeeded == TRUE) {
|
|
IoRetryIrpCompletions ();
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
VOID
|
|
MiPfFreeDummyPage (
|
|
IN PMMPFN DummyPagePfn
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This nonpaged wrapper routine frees the dummy page PFN.
|
|
|
|
Arguments:
|
|
|
|
DummyPagePfn - Supplies the dummy page PFN.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
Environment:
|
|
|
|
Kernel mode.
|
|
|
|
--*/
|
|
|
|
{
|
|
KIRQL OldIrql;
|
|
PFN_NUMBER PageFrameIndex;
|
|
|
|
PageFrameIndex = MI_PFN_ELEMENT_TO_INDEX (DummyPagePfn);
|
|
|
|
LOCK_PFN (OldIrql);
|
|
|
|
ASSERT (DummyPagePfn->u2.ShareCount == 1);
|
|
ASSERT (DummyPagePfn->u3.e1.PrototypePte == 0);
|
|
ASSERT (DummyPagePfn->OriginalPte.u.Long == MM_DEMAND_ZERO_WRITE_PTE);
|
|
|
|
ASSERT (DummyPagePfn->u3.e2.ReferenceCount == 2);
|
|
MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(DummyPagePfn, 17);
|
|
|
|
//
|
|
// Clear the read in progress bit as this page may never have used for an
|
|
// I/O after all. The inpage error bit must also be cleared as any number
|
|
// of errors may have occurred during reads of pages (that were immaterial
|
|
// anyway).
|
|
//
|
|
|
|
DummyPagePfn->u3.e1.ReadInProgress = 0;
|
|
DummyPagePfn->u4.InPageError = 0;
|
|
|
|
MI_SET_PFN_DELETED (DummyPagePfn);
|
|
|
|
MiDecrementShareCount (DummyPagePfn, PageFrameIndex);
|
|
|
|
UNLOCK_PFN (OldIrql);
|
|
}
|
|
|
|
VOID
|
|
MiMovePageToEndOfStandbyList (
|
|
IN PMMPTE PointerPte
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This nonpaged routine obtains the PFN lock and moves a page to the end of
|
|
the standby list (if the page is still in transition).
|
|
|
|
Arguments:
|
|
|
|
PointerPte - Supplies the prototype PTE to examine.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
Environment:
|
|
|
|
Kernel mode, PFN lock not held.
|
|
|
|
--*/
|
|
|
|
{
|
|
KIRQL OldIrql;
|
|
PMMPFN Pfn1;
|
|
MMPTE PteContents;
|
|
PFN_NUMBER PageFrameIndex;
|
|
|
|
LOCK_PFN (OldIrql);
|
|
|
|
if (!MiIsAddressValid (PointerPte, TRUE)) {
|
|
|
|
//
|
|
// If the paged pool containing the prototype PTE is not resident
|
|
// then the actual page itself may still be transition or not. This
|
|
// should be so rare it's not worth making the pool resident so the
|
|
// proper checks can be applied. Just bail.
|
|
//
|
|
|
|
UNLOCK_PFN (OldIrql);
|
|
return;
|
|
}
|
|
|
|
PteContents = *PointerPte;
|
|
|
|
if ((PteContents.u.Hard.Valid == 0) &&
|
|
(PteContents.u.Soft.Prototype == 0) &&
|
|
(PteContents.u.Soft.Transition == 1)) {
|
|
|
|
PageFrameIndex = MI_GET_PAGE_FRAME_FROM_TRANSITION_PTE (&PteContents);
|
|
Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
|
|
|
|
//
|
|
// The page is still in transition, move it to the end to protect it
|
|
// from possible cannibalization. Note that if the page is currently
|
|
// being written to disk it will be on the modified list and when the
|
|
// write completes it will automatically go to the end of the standby
|
|
// list anyway so skip those.
|
|
//
|
|
|
|
if (Pfn1->u3.e1.PageLocation == StandbyPageList) {
|
|
MiUnlinkPageFromList (Pfn1);
|
|
ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
|
|
MiInsertPageInList (&MmStandbyPageListHead, PageFrameIndex);
|
|
}
|
|
}
|
|
|
|
UNLOCK_PFN (OldIrql);
|
|
}
|
|
|
|
VOID
|
|
MiPfReleaseSubsectionReferences (
|
|
IN PMI_READ_LIST MiReadList
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine releases reference counts on subsections examined by the
|
|
prefetch scanner.
|
|
|
|
Arguments:
|
|
|
|
MiReadList - Supplies a read-list entry.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
Environment:
|
|
|
|
Kernel mode, PASSIVE_LEVEL.
|
|
|
|
--*/
|
|
|
|
{
|
|
PMSUBSECTION MappedSubsection;
|
|
PCONTROL_AREA ControlArea;
|
|
|
|
ASSERT (KeGetCurrentIrql () == PASSIVE_LEVEL);
|
|
|
|
ControlArea = MiReadList->ControlArea;
|
|
|
|
ASSERT (ControlArea->u.Flags.PhysicalMemory == 0);
|
|
ASSERT (ControlArea->FilePointer != NULL);
|
|
|
|
//
|
|
// Image files don't have dynamic prototype PTEs.
|
|
//
|
|
|
|
if (ControlArea->u.Flags.Image == 1) {
|
|
return;
|
|
}
|
|
|
|
ASSERT (ControlArea->u.Flags.GlobalOnlyPerSession == 0);
|
|
|
|
MappedSubsection = (PMSUBSECTION)(ControlArea + 1);
|
|
|
|
MiRemoveViewsFromSectionWithPfn (MappedSubsection,
|
|
MiReadList->LastPteOffsetReferenced);
|
|
}
|
|
|
|
|
|
NTSTATUS
|
|
MiPfPrepareReadList (
|
|
IN PREAD_LIST ReadList,
|
|
OUT PMI_READ_LIST *OutMiReadList
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine constructs MDLs that describe the pages in the argument
|
|
read-list. The caller will then issue the I/Os on return.
|
|
|
|
Arguments:
|
|
|
|
ReadList - Supplies the read-list.
|
|
|
|
OutMiReadList - Supplies a pointer to receive the Mi readlist.
|
|
|
|
Return Value:
|
|
|
|
Various NTSTATUS codes.
|
|
|
|
If STATUS_SUCCESS is returned, OutMiReadList is set to a pointer to an Mi
|
|
readlist to be used for prefetching or NULL if no prefetching is needed.
|
|
|
|
If OutMireadList is non-NULL (on success only) then the caller must call
|
|
MiRemoveViewsFromSectionWithPfn (VeryFirstSubsection, LastPteOffsetReferenced) for data files.
|
|
|
|
Environment:
|
|
|
|
Kernel mode, PASSIVE_LEVEL.
|
|
|
|
--*/
|
|
|
|
{
|
|
ULONG LastPteOffset;
|
|
NTSTATUS Status;
|
|
MMPTE PteContents;
|
|
PMMPTE LocalPrototypePte;
|
|
PMMPTE LastPrototypePte;
|
|
PMMPTE StartPrototypePte;
|
|
PMMPTE EndPrototypePte;
|
|
PMI_READ_LIST MiReadList;
|
|
PMI_READ_LIST_ENTRY Rle;
|
|
PMI_READ_LIST_ENTRY StartRleRun;
|
|
PMI_READ_LIST_ENTRY EndRleRun;
|
|
PMI_READ_LIST_ENTRY RleMax;
|
|
PMI_READ_LIST_ENTRY FirstRleInRun;
|
|
PCONTROL_AREA ControlArea;
|
|
PSUBSECTION Subsection;
|
|
PSUBSECTION PreviousSubsection;
|
|
PMSUBSECTION VeryFirstSubsection;
|
|
PMSUBSECTION VeryLastSubsection;
|
|
UINT64 StartOffset;
|
|
LARGE_INTEGER EndQuad;
|
|
UINT64 EndOffset;
|
|
UINT64 FileOffset;
|
|
PMMINPAGE_SUPPORT InPageSupport;
|
|
PMDL Mdl;
|
|
ULONG i;
|
|
PFN_NUMBER NumberOfPages;
|
|
UINT64 StartingOffset;
|
|
UINT64 TempOffset;
|
|
ULONG ReadSize;
|
|
ULONG NumberOfEntries;
|
|
#if DBG
|
|
PPFN_NUMBER Page;
|
|
#endif
|
|
|
|
ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
|
|
|
|
*OutMiReadList = NULL;
|
|
|
|
//
|
|
// Create an Mi readlist from the argument Cc readlist.
|
|
//
|
|
|
|
NumberOfEntries = ReadList->NumberOfEntries;
|
|
|
|
MiReadList = (PMI_READ_LIST) ExAllocatePoolWithTag (
|
|
NonPagedPool,
|
|
sizeof (MI_READ_LIST) + NumberOfEntries * sizeof (MI_READ_LIST_ENTRY),
|
|
'lRmM');
|
|
|
|
if (MiReadList == NULL) {
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
//
|
|
// Translate the section object into the relevant control area.
|
|
//
|
|
|
|
if (ReadList->IsImage) {
|
|
ControlArea = (PCONTROL_AREA)ReadList->FileObject->SectionObjectPointer->ImageSectionObject;
|
|
ASSERT (ControlArea != NULL );
|
|
ASSERT (ControlArea->u.Flags.Image == 1);
|
|
}
|
|
else {
|
|
ControlArea = (PCONTROL_AREA)ReadList->FileObject->SectionObjectPointer->DataSectionObject;
|
|
}
|
|
|
|
//
|
|
// If the section is backed by a ROM, then there's no need to prefetch
|
|
// anything as it would waste RAM.
|
|
//
|
|
|
|
if (ControlArea->u.Flags.Rom == 1) {
|
|
ExFreePool (MiReadList);
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
//
|
|
// Make sure the section is really prefetchable - physical and
|
|
// pagefile-backed sections are not.
|
|
//
|
|
|
|
if ((ControlArea->u.Flags.PhysicalMemory) ||
|
|
(ControlArea->FilePointer == NULL)) {
|
|
ExFreePool (MiReadList);
|
|
return STATUS_INVALID_PARAMETER_1;
|
|
}
|
|
|
|
//
|
|
// Initialize the internal Mi readlist.
|
|
//
|
|
|
|
MiReadList->ControlArea = ControlArea;
|
|
MiReadList->FileObject = ReadList->FileObject;
|
|
MiReadList->InPageSupportHead.Next = NULL;
|
|
|
|
RtlZeroMemory (MiReadList->List,
|
|
sizeof (MI_READ_LIST_ENTRY) * NumberOfEntries);
|
|
|
|
//
|
|
// Copy pages from the Cc readlists to the internal Mi readlists.
|
|
//
|
|
|
|
NumberOfPages = 0;
|
|
FirstRleInRun = NULL;
|
|
VeryFirstSubsection = NULL;
|
|
VeryLastSubsection = NULL;
|
|
LastPteOffset = 0;
|
|
|
|
if (ControlArea->u.Flags.GlobalOnlyPerSession == 0) {
|
|
Subsection = (PSUBSECTION)(ControlArea + 1);
|
|
|
|
//
|
|
// Ensure all prototype PTE bases are valid for all subsections of the
|
|
// requested file so the traversal code doesn't have to check
|
|
// everywhere. As long as the files are not too large this should
|
|
// be a cheap operation.
|
|
//
|
|
|
|
if (ControlArea->u.Flags.Image == 0) {
|
|
ASSERT (ControlArea->u.Flags.PhysicalMemory == 0);
|
|
ASSERT (ControlArea->FilePointer != NULL);
|
|
|
|
VeryFirstSubsection = (PMSUBSECTION) Subsection;
|
|
VeryLastSubsection = (PMSUBSECTION) Subsection;
|
|
|
|
do {
|
|
|
|
//
|
|
// A memory barrier is needed to read the subsection chains
|
|
// in order to ensure the writes to the actual individual
|
|
// subsection data structure fields are visible in correct
|
|
// order. This avoids the need to acquire any stronger
|
|
// synchronization (ie: PFN lock), thus yielding better
|
|
// performance and pagability.
|
|
//
|
|
|
|
KeMemoryBarrier ();
|
|
|
|
LastPteOffset += VeryLastSubsection->PtesInSubsection;
|
|
if (VeryLastSubsection->NextSubsection == NULL) {
|
|
break;
|
|
}
|
|
VeryLastSubsection = (PMSUBSECTION) VeryLastSubsection->NextSubsection;
|
|
} while (TRUE);
|
|
|
|
MiReadList->LastPteOffsetReferenced = LastPteOffset;
|
|
|
|
Status = MiAddViewsForSectionWithPfn (VeryFirstSubsection,
|
|
LastPteOffset);
|
|
|
|
if (!NT_SUCCESS (Status)) {
|
|
ExFreePool (MiReadList);
|
|
return Status;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
Subsection = (PSUBSECTION)((PLARGE_CONTROL_AREA)ControlArea + 1);
|
|
}
|
|
|
|
StartOffset = (UINT64) MiStartingOffset (Subsection, Subsection->SubsectionBase);
|
|
EndQuad = MiEndingOffset (Subsection);
|
|
EndOffset = (UINT64)EndQuad.QuadPart;
|
|
|
|
//
|
|
// If the file is bigger than the subsection, truncate the subsection range
|
|
// checks.
|
|
//
|
|
|
|
if ((StartOffset & ~(PAGE_SIZE - 1)) + ((UINT64)Subsection->PtesInSubsection << PAGE_SHIFT) < EndOffset) {
|
|
EndOffset = (StartOffset & ~(PAGE_SIZE - 1)) + ((UINT64)Subsection->PtesInSubsection << PAGE_SHIFT);
|
|
}
|
|
|
|
TempOffset = EndOffset;
|
|
|
|
PreviousSubsection = NULL;
|
|
LastPrototypePte = NULL;
|
|
|
|
Rle = MiReadList->List;
|
|
|
|
#if DBG
|
|
if (MiPfDebug & MI_PF_FORCE_PREFETCH) {
|
|
MiRemoveUserPages ();
|
|
}
|
|
|
|
//
|
|
// Initializing FileOffset is not needed for correctness, but without it
|
|
// the compiler cannot compile this code W4 to check for use of
|
|
// uninitialized variables.
|
|
//
|
|
|
|
FileOffset = 0;
|
|
#endif
|
|
|
|
for (i = 0; i < NumberOfEntries; i += 1, Rle += 1) {
|
|
|
|
ASSERT ((i == 0) || (ReadList->List[i].Alignment > FileOffset));
|
|
|
|
FileOffset = ReadList->List[i].Alignment;
|
|
|
|
ASSERT (Rle->u1.PrototypePte == NULL);
|
|
|
|
//
|
|
// Calculate which PTE maps the given logical block offset.
|
|
//
|
|
// Since our caller always passes ordered lists of logical block offsets
|
|
// within a given file, always look forwards (as an optimization) in the
|
|
// subsection chain.
|
|
//
|
|
// A quick check is made first to avoid recalculations and loops where
|
|
// possible.
|
|
//
|
|
|
|
if ((StartOffset <= FileOffset) && (FileOffset < EndOffset)) {
|
|
ASSERT (Subsection->SubsectionBase != NULL);
|
|
LocalPrototypePte = Subsection->SubsectionBase +
|
|
((FileOffset - StartOffset) >> PAGE_SHIFT);
|
|
ASSERT (TempOffset != 0);
|
|
ASSERT (EndOffset != 0);
|
|
}
|
|
else {
|
|
LocalPrototypePte = NULL;
|
|
do {
|
|
|
|
ASSERT (Subsection->SubsectionBase != NULL);
|
|
|
|
if ((Subsection->StartingSector == 0) &&
|
|
(ControlArea->u.Flags.Image == 1) &&
|
|
(Subsection->SubsectionBase != ControlArea->Segment->PrototypePte)) {
|
|
|
|
//
|
|
// This is an image that was built with a linker pre-1995
|
|
// (version 2.39 is one example) that put bss into a
|
|
// separate subsection with zero as a starting file offset
|
|
// field in the on-disk image. Ignore any prefetch as it
|
|
// would read from the wrong offset trying to satisfy these
|
|
// ranges (which are actually demand zero when the fault
|
|
// occurs).
|
|
//
|
|
// This can also happen for an image (built with a current
|
|
// linker) that has no initialized data (ie: it's data
|
|
// is all bss). Just skip the subsection.
|
|
//
|
|
|
|
Subsection = Subsection->NextSubsection;
|
|
continue;
|
|
}
|
|
|
|
StartOffset = (UINT64) MiStartingOffset (Subsection, Subsection->SubsectionBase);
|
|
|
|
EndQuad = MiEndingOffset (Subsection);
|
|
EndOffset = (UINT64)EndQuad.QuadPart;
|
|
|
|
//
|
|
// If the file is bigger than the subsection, truncate the
|
|
// subsection range checks.
|
|
//
|
|
|
|
if ((StartOffset & ~(PAGE_SIZE - 1)) + ((UINT64)Subsection->PtesInSubsection << PAGE_SHIFT) < EndOffset) {
|
|
EndOffset = (StartOffset & ~(PAGE_SIZE - 1)) + ((UINT64)Subsection->PtesInSubsection << PAGE_SHIFT);
|
|
}
|
|
|
|
//
|
|
// Always set TempOffset here even without a match. This is
|
|
// because the truncation above may have resulted in skipping
|
|
// the last straddling page of a subsection. After that,
|
|
// the Subsection is set to Subsection->Next below and we
|
|
// loop. Falling to the below again, we'd see that the
|
|
// FileOffset is less than the StartOffset of the next
|
|
// subsection, so we'd goto SkipPage and then compare the
|
|
// next FileOffset which might be a match at the very top of
|
|
// the loop. Hence, TempOffset must be right even in this
|
|
// case, so set it here unconditionally.
|
|
//
|
|
|
|
TempOffset = EndOffset;
|
|
|
|
if ((StartOffset <= FileOffset) && (FileOffset < EndOffset)) {
|
|
|
|
LocalPrototypePte = Subsection->SubsectionBase +
|
|
((FileOffset - StartOffset) >> PAGE_SHIFT);
|
|
|
|
break;
|
|
}
|
|
|
|
if (FileOffset < StartOffset) {
|
|
|
|
//
|
|
// Skip this page of the prefetch as it must be referring
|
|
// to bss in the previous subsection - ie: this makes
|
|
// no sense to prefetch as it is all demand zero. Moreover,
|
|
// there is no disk block address for these at all !
|
|
//
|
|
|
|
goto SkipPage;
|
|
}
|
|
|
|
if ((VeryLastSubsection != NULL) &&
|
|
((PMSUBSECTION)Subsection == VeryLastSubsection)) {
|
|
|
|
//
|
|
// The requested block is beyond the size the section
|
|
// was on entry. Reject it as this subsection is not
|
|
// referenced.
|
|
//
|
|
|
|
Subsection = NULL;
|
|
break;
|
|
}
|
|
|
|
Subsection = Subsection->NextSubsection;
|
|
|
|
} while (Subsection != NULL);
|
|
}
|
|
|
|
if ((Subsection == NULL) || (LocalPrototypePte == LastPrototypePte)) {
|
|
|
|
//
|
|
// Illegal offsets are not prefetched. Either the file has
|
|
// been replaced since the scenario was logged or Cc is passing
|
|
// trash. Either way, this prefetch is over.
|
|
//
|
|
|
|
#if DBG
|
|
if (MiPfDebug & MI_PF_PRINT_ERRORS) {
|
|
DbgPrint ("MiPfPrepareReadList: Illegal readlist passed %p, %p, %p\n", ReadList, LocalPrototypePte, LastPrototypePte);
|
|
}
|
|
#endif
|
|
|
|
if (VeryFirstSubsection != NULL) {
|
|
MiRemoveViewsFromSectionWithPfn (VeryFirstSubsection,
|
|
LastPteOffset);
|
|
}
|
|
ExFreePool (MiReadList);
|
|
return STATUS_INVALID_PARAMETER_1;
|
|
}
|
|
|
|
PteContents = *LocalPrototypePte;
|
|
|
|
//
|
|
// See if this page needs to be read in. Note that these reads
|
|
// are done without the PFN or system cache working set locks.
|
|
// This is ok because later before we make the final decision on
|
|
// whether to read each page, we'll look again.
|
|
// If the page is in tranisition, make the call to (possibly) move
|
|
// it to the end of the standby list to prevent cannibalization.
|
|
//
|
|
|
|
if (PteContents.u.Hard.Valid == 1) {
|
|
SkipPage:
|
|
continue;
|
|
}
|
|
|
|
if (PteContents.u.Soft.Prototype == 0) {
|
|
if (PteContents.u.Soft.Transition == 1) {
|
|
MiMovePageToEndOfStandbyList (LocalPrototypePte);
|
|
}
|
|
else {
|
|
|
|
//
|
|
// Demand zero or pagefile-backed, don't prefetch from the
|
|
// file or we'd lose the contents. Note this can happen for
|
|
// session-space images as we back modified (ie: for relocation
|
|
// fixups or IAT updated) portions from the pagefile.
|
|
//
|
|
|
|
NOTHING;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
Rle->u1.PrototypePte = LocalPrototypePte;
|
|
LastPrototypePte = LocalPrototypePte;
|
|
|
|
//
|
|
// Check for partial pages as they require further processing later.
|
|
//
|
|
|
|
StartingOffset = (UINT64) MiStartingOffset (Subsection, LocalPrototypePte);
|
|
|
|
ASSERT (StartingOffset < TempOffset);
|
|
|
|
if ((StartingOffset + PAGE_SIZE) > TempOffset) {
|
|
Rle->u1.e1.Partial = 1;
|
|
}
|
|
|
|
//
|
|
// The NewSubsection marker is used to delimit the beginning of a new
|
|
// subsection because RLE chunks must be split to accomodate inpage
|
|
// completion so that proper zeroing (based on subsection alignment)
|
|
// is done in MiWaitForInPageComplete.
|
|
//
|
|
|
|
if (FirstRleInRun == NULL) {
|
|
FirstRleInRun = Rle;
|
|
Rle->u1.e1.NewSubsection = 1;
|
|
PreviousSubsection = Subsection;
|
|
}
|
|
else {
|
|
if (Subsection != PreviousSubsection) {
|
|
Rle->u1.e1.NewSubsection = 1;
|
|
PreviousSubsection = Subsection;
|
|
}
|
|
}
|
|
|
|
NumberOfPages += 1;
|
|
}
|
|
|
|
//
|
|
// If the number of pages to read in is extremely small, don't bother.
|
|
//
|
|
|
|
if (NumberOfPages < MINIMUM_READ_LIST_PAGES) {
|
|
if (VeryFirstSubsection != NULL) {
|
|
MiRemoveViewsFromSectionWithPfn (VeryFirstSubsection,
|
|
LastPteOffset);
|
|
}
|
|
ExFreePool (MiReadList);
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
RleMax = MiReadList->List + NumberOfEntries;
|
|
ASSERT (FirstRleInRun != RleMax);
|
|
|
|
Status = STATUS_SUCCESS;
|
|
|
|
//
|
|
// Walk the readlists to determine runs. Cross-subsection runs are split
|
|
// here so the completion code can zero the proper amount for any
|
|
// non-aligned files.
|
|
//
|
|
|
|
EndRleRun = NULL;
|
|
Rle = FirstRleInRun;
|
|
|
|
//
|
|
// Initializing StartRleRun & EndPrototypePte is not needed for correctness
|
|
// but without it the compiler cannot compile this code
|
|
// W4 to check for use of uninitialized variables.
|
|
//
|
|
|
|
StartRleRun = NULL;
|
|
EndPrototypePte = NULL;
|
|
|
|
while (Rle < RleMax) {
|
|
|
|
if (Rle->u1.PrototypePte != NULL) {
|
|
|
|
if (EndRleRun != NULL) {
|
|
|
|
StartPrototypePte = MI_RLEPROTO_TO_PROTO(Rle->u1.PrototypePte);
|
|
|
|
if (StartPrototypePte - EndPrototypePte > SEEK_THRESHOLD) {
|
|
Rle -= 1;
|
|
goto BuildMdl;
|
|
}
|
|
}
|
|
|
|
if (Rle->u1.e1.NewSubsection == 1) {
|
|
if (EndRleRun != NULL) {
|
|
Rle -= 1;
|
|
goto BuildMdl;
|
|
}
|
|
}
|
|
|
|
if (EndRleRun == NULL) {
|
|
StartRleRun = Rle;
|
|
}
|
|
|
|
EndRleRun = Rle;
|
|
EndPrototypePte = MI_RLEPROTO_TO_PROTO(Rle->u1.PrototypePte);
|
|
|
|
if (Rle->u1.e1.Partial == 1) {
|
|
|
|
//
|
|
// This must be the last RLE in this subsection as it is a
|
|
// partial page. Split this run now.
|
|
//
|
|
|
|
goto BuildMdl;
|
|
}
|
|
}
|
|
|
|
Rle += 1;
|
|
|
|
//
|
|
// Handle any straggling last run as well.
|
|
//
|
|
|
|
if (Rle == RleMax) {
|
|
if (EndRleRun != NULL) {
|
|
Rle -= 1;
|
|
goto BuildMdl;
|
|
}
|
|
}
|
|
|
|
continue;
|
|
|
|
BuildMdl:
|
|
|
|
//
|
|
// Note no preceding or trailing dummy pages are possible as they are
|
|
// trimmed immediately each time when the first real page of a run
|
|
// is discovered above.
|
|
//
|
|
|
|
ASSERT (Rle >= StartRleRun);
|
|
ASSERT (StartRleRun->u1.PrototypePte != NULL);
|
|
ASSERT (EndRleRun->u1.PrototypePte != NULL);
|
|
|
|
StartPrototypePte = MI_RLEPROTO_TO_PROTO(StartRleRun->u1.PrototypePte);
|
|
EndPrototypePte = MI_RLEPROTO_TO_PROTO(EndRleRun->u1.PrototypePte);
|
|
|
|
NumberOfPages = (EndPrototypePte - StartPrototypePte) + 1;
|
|
|
|
//
|
|
// Allocate and initialize an inpage support block for this run.
|
|
//
|
|
|
|
InPageSupport = MiGetInPageSupportBlock (MM_NOIRQL, &Status);
|
|
|
|
if (InPageSupport == NULL) {
|
|
ASSERT (!NT_SUCCESS (Status));
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Use the MDL embedded in the inpage support block if it's big enough.
|
|
// Otherwise allocate and initialize an MDL for this run.
|
|
//
|
|
|
|
if (NumberOfPages <= MM_MAXIMUM_READ_CLUSTER_SIZE + 1) {
|
|
Mdl = &InPageSupport->Mdl;
|
|
MmInitializeMdl (Mdl, NULL, NumberOfPages << PAGE_SHIFT);
|
|
}
|
|
else {
|
|
Mdl = MmCreateMdl (NULL, NULL, NumberOfPages << PAGE_SHIFT);
|
|
if (Mdl == NULL) {
|
|
ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
|
|
|
|
#if DBG
|
|
InPageSupport->ListEntry.Next = NULL;
|
|
#endif
|
|
|
|
MiFreeInPageSupportBlock (InPageSupport);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
break;
|
|
}
|
|
}
|
|
|
|
#if DBG
|
|
if (MiPfDebug & MI_PF_VERBOSE) {
|
|
DbgPrint ("MiPfPrepareReadList: Creating INPAGE/MDL %p %p for %x pages\n", InPageSupport, Mdl, NumberOfPages);
|
|
}
|
|
|
|
Page = (PPFN_NUMBER)(Mdl + 1);
|
|
*Page = MM_EMPTY_LIST;
|
|
#endif
|
|
//
|
|
// Find the subsection for the start RLE. From this the file offset
|
|
// can be derived.
|
|
//
|
|
|
|
ASSERT (StartPrototypePte != NULL);
|
|
|
|
if (ControlArea->u.Flags.GlobalOnlyPerSession == 0) {
|
|
Subsection = (PSUBSECTION)(ControlArea + 1);
|
|
}
|
|
else {
|
|
Subsection = (PSUBSECTION)((PLARGE_CONTROL_AREA)ControlArea + 1);
|
|
}
|
|
|
|
do {
|
|
ASSERT (Subsection->SubsectionBase != NULL);
|
|
|
|
if ((StartPrototypePte >= Subsection->SubsectionBase) &&
|
|
(StartPrototypePte < Subsection->SubsectionBase + Subsection->PtesInSubsection)) {
|
|
break;
|
|
}
|
|
Subsection = Subsection->NextSubsection;
|
|
|
|
} while (Subsection != NULL);
|
|
|
|
//
|
|
// Start the read at the proper file offset.
|
|
//
|
|
|
|
StartingOffset = (UINT64) MiStartingOffset (Subsection,
|
|
StartPrototypePte);
|
|
|
|
InPageSupport->ReadOffset = *((PLARGE_INTEGER)(&StartingOffset));
|
|
|
|
//
|
|
// Since the RLE is not always valid here, only walk the remaining
|
|
// subsections for valid partial RLEs as only they need truncation.
|
|
//
|
|
// Note only image file reads need truncation as the filesystem cannot
|
|
// blindly zero the rest of the page for these reads as they are packed
|
|
// by memory management on a 512-byte sector basis. Data reads use
|
|
// the whole page and the filesystems zero fill any remainder beyond
|
|
// valid data length. It is important to specify the entire page where
|
|
// possible so the filesystem won't post this which will hurt perf.
|
|
//
|
|
|
|
if ((EndRleRun->u1.e1.Partial == 1) && (ReadList->IsImage)) {
|
|
|
|
ASSERT ((EndPrototypePte >= Subsection->SubsectionBase) &&
|
|
(EndPrototypePte < Subsection->SubsectionBase + Subsection->PtesInSubsection));
|
|
|
|
//
|
|
// The read length for a partial RLE must be truncated correctly.
|
|
//
|
|
|
|
EndQuad = MiEndingOffset(Subsection);
|
|
TempOffset = (UINT64)EndQuad.QuadPart;
|
|
|
|
if ((ULONG)(TempOffset - StartingOffset) <= Mdl->ByteCount) {
|
|
ReadSize = (ULONG)(TempOffset - StartingOffset);
|
|
|
|
//
|
|
// Round the offset to a 512-byte offset as this will help
|
|
// filesystems optimize the transfer. Note that filesystems
|
|
// will always zero fill the remainder between VDL and the
|
|
// next 512-byte multiple and we have already zeroed the
|
|
// whole page.
|
|
//
|
|
|
|
ReadSize = ((ReadSize + MMSECTOR_MASK) & ~MMSECTOR_MASK);
|
|
|
|
Mdl->ByteCount = ReadSize;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Stash these in the inpage block so we can walk it quickly later
|
|
// in pass 2.
|
|
//
|
|
|
|
InPageSupport->BasePte = (PMMPTE)StartRleRun;
|
|
InPageSupport->FilePointer = (PFILE_OBJECT)EndRleRun;
|
|
|
|
ASSERT (((ULONG_PTR)Mdl & (sizeof(QUAD) - 1)) == 0);
|
|
InPageSupport->u1.e1.PrefetchMdlHighBits = ((ULONG_PTR)Mdl >> 3);
|
|
|
|
PushEntryList (&MiReadList->InPageSupportHead,
|
|
&InPageSupport->ListEntry);
|
|
|
|
Rle += 1;
|
|
EndRleRun = NULL;
|
|
}
|
|
|
|
//
|
|
// Check for the entire list being full (or empty).
|
|
//
|
|
// Status is STATUS_INSUFFICIENT_RESOURCES if an MDL or inpage block
|
|
// allocation failed. If any allocations succeeded, then set STATUS_SUCCESS
|
|
// as pass2 must occur.
|
|
//
|
|
|
|
if (MiReadList->InPageSupportHead.Next != NULL) {
|
|
|
|
Status = STATUS_SUCCESS;
|
|
}
|
|
else {
|
|
if (VeryFirstSubsection != NULL) {
|
|
MiRemoveViewsFromSectionWithPfn (VeryFirstSubsection, LastPteOffset);
|
|
}
|
|
ExFreePool (MiReadList);
|
|
MiReadList = NULL;
|
|
}
|
|
|
|
//
|
|
// Note that a nonzero *OutMiReadList return value means that the caller
|
|
// needs to remove the views for the section.
|
|
//
|
|
|
|
*OutMiReadList = MiReadList;
|
|
|
|
return Status;
|
|
}
|
|
|
|
NTSTATUS
|
|
MiPfPutPagesInTransition (
|
|
IN PMI_READ_LIST ReadList,
|
|
IN OUT PMMPFN *DummyPagePfn
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine allocates physical memory for the specified read-list and
|
|
puts all the pages in transition. On return the caller must issue I/Os
|
|
for the list not only because of this thread, but also to satisfy
|
|
collided faults from other threads for these same pages.
|
|
|
|
Arguments:
|
|
|
|
ReadList - Supplies a pointer to the read-list.
|
|
|
|
DummyPagePfn - If this points at a NULL pointer, then a dummy page is
|
|
allocated and placed in this pointer. Otherwise this points
|
|
at a PFN to use as a dummy page.
|
|
|
|
Return Value:
|
|
|
|
STATUS_SUCCESS
|
|
STATUS_INSUFFICIENT_RESOURCES
|
|
|
|
Environment:
|
|
|
|
Kernel mode. PASSIVE_LEVEL.
|
|
|
|
--*/
|
|
|
|
{
|
|
PVOID StartingVa;
|
|
PFN_NUMBER MdlPages;
|
|
KIRQL OldIrql;
|
|
MMPTE PteContents;
|
|
PMMPTE RlePrototypePte;
|
|
PMMPTE FirstRlePrototypeInRun;
|
|
PFN_NUMBER PageFrameIndex;
|
|
PPFN_NUMBER Page;
|
|
PPFN_NUMBER DestinationPage;
|
|
ULONG PageColor;
|
|
PMI_READ_LIST_ENTRY Rle;
|
|
PMI_READ_LIST_ENTRY RleMax;
|
|
PMI_READ_LIST_ENTRY FirstRleInRun;
|
|
PFN_NUMBER DummyPage;
|
|
PMDL Mdl;
|
|
PMDL FreeMdl;
|
|
PMMPFN PfnProto;
|
|
PMMPFN Pfn1;
|
|
PMMPFN DummyPfn1;
|
|
ULONG i;
|
|
PFN_NUMBER DummyTrim;
|
|
PFN_NUMBER DummyReferences;
|
|
ULONG NumberOfPages;
|
|
MMPTE TempPte;
|
|
PMMPTE PointerPde;
|
|
PEPROCESS CurrentProcess;
|
|
PSINGLE_LIST_ENTRY PrevEntry;
|
|
PSINGLE_LIST_ENTRY NextEntry;
|
|
PMMINPAGE_SUPPORT InPageSupport;
|
|
SINGLE_LIST_ENTRY ReversedInPageSupportHead;
|
|
LOGICAL Waited;
|
|
|
|
ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
|
|
|
|
//
|
|
// Reverse the singly linked list of inpage support blocks so the
|
|
// blocks are read in the same order requested for better performance
|
|
// (ie: keep the disk heads seeking in the same direction).
|
|
//
|
|
|
|
ReversedInPageSupportHead.Next = NULL;
|
|
|
|
do {
|
|
|
|
NextEntry = PopEntryList (&ReadList->InPageSupportHead);
|
|
|
|
if (NextEntry == NULL) {
|
|
break;
|
|
}
|
|
|
|
PushEntryList (&ReversedInPageSupportHead, NextEntry);
|
|
|
|
} while (TRUE);
|
|
|
|
ASSERT (ReversedInPageSupportHead.Next != NULL);
|
|
ReadList->InPageSupportHead.Next = ReversedInPageSupportHead.Next;
|
|
|
|
DummyReferences = 0;
|
|
FreeMdl = NULL;
|
|
CurrentProcess = PsGetCurrentProcess();
|
|
|
|
PfnProto = NULL;
|
|
PointerPde = NULL;
|
|
|
|
//
|
|
// Allocate a dummy page that will map discarded pages that aren't skipped.
|
|
// Do it only if it's not already allocated.
|
|
//
|
|
|
|
if (*DummyPagePfn == NULL) {
|
|
|
|
LOCK_PFN (OldIrql);
|
|
|
|
//
|
|
// Do a quick sanity check to avoid doing unnecessary work.
|
|
//
|
|
|
|
if ((MmAvailablePages < MM_HIGH_LIMIT) ||
|
|
(MI_NONPAGABLE_MEMORY_AVAILABLE() < MM_HIGH_LIMIT)) {
|
|
|
|
UNLOCK_PFN (OldIrql);
|
|
|
|
do {
|
|
|
|
NextEntry = PopEntryList(&ReadList->InPageSupportHead);
|
|
if (NextEntry == NULL) {
|
|
break;
|
|
}
|
|
|
|
InPageSupport = CONTAINING_RECORD(NextEntry,
|
|
MMINPAGE_SUPPORT,
|
|
ListEntry);
|
|
|
|
#if DBG
|
|
InPageSupport->ListEntry.Next = NULL;
|
|
#endif
|
|
|
|
MiFreeInPageSupportBlock (InPageSupport);
|
|
} while (TRUE);
|
|
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
DummyPage = MiRemoveAnyPage (0);
|
|
Pfn1 = MI_PFN_ELEMENT (DummyPage);
|
|
|
|
ASSERT (Pfn1->u2.ShareCount == 0);
|
|
ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
|
|
|
|
MiInitializePfnForOtherProcess (DummyPage, MI_PF_DUMMY_PAGE_PTE, 0);
|
|
|
|
//
|
|
// Give the page a containing frame so MiIdentifyPfn won't crash.
|
|
//
|
|
|
|
Pfn1->u4.PteFrame = PsInitialSystemProcess->Pcb.DirectoryTableBase[0] >> PAGE_SHIFT;
|
|
|
|
//
|
|
// Always bias the reference count by 1 and charge for this locked page
|
|
// up front so the myriad increments and decrements don't get slowed
|
|
// down with needless checking.
|
|
//
|
|
|
|
Pfn1->u3.e1.PrototypePte = 0;
|
|
MI_ADD_LOCKED_PAGE_CHARGE(Pfn1, TRUE, 11);
|
|
Pfn1->u3.e2.ReferenceCount += 1;
|
|
|
|
Pfn1->u3.e1.ReadInProgress = 1;
|
|
|
|
UNLOCK_PFN (OldIrql);
|
|
|
|
*DummyPagePfn = Pfn1;
|
|
}
|
|
else {
|
|
Pfn1 = *DummyPagePfn;
|
|
DummyPage = MI_PFN_ELEMENT_TO_INDEX (Pfn1);
|
|
}
|
|
|
|
DummyPfn1 = Pfn1;
|
|
|
|
PrevEntry = NULL;
|
|
NextEntry = ReadList->InPageSupportHead.Next;
|
|
|
|
while (NextEntry != NULL) {
|
|
|
|
InPageSupport = CONTAINING_RECORD (NextEntry,
|
|
MMINPAGE_SUPPORT,
|
|
ListEntry);
|
|
|
|
Rle = (PMI_READ_LIST_ENTRY) InPageSupport->BasePte;
|
|
RleMax = (PMI_READ_LIST_ENTRY) InPageSupport->FilePointer;
|
|
|
|
ASSERT (Rle->u1.PrototypePte != NULL);
|
|
ASSERT (RleMax->u1.PrototypePte != NULL);
|
|
|
|
//
|
|
// Properly initialize the inpage support block fields we overloaded.
|
|
//
|
|
|
|
InPageSupport->BasePte = MI_RLEPROTO_TO_PROTO (Rle->u1.PrototypePte);
|
|
InPageSupport->FilePointer = ReadList->FileObject;
|
|
|
|
FirstRleInRun = Rle;
|
|
FirstRlePrototypeInRun = MI_RLEPROTO_TO_PROTO (Rle->u1.PrototypePte);
|
|
RleMax += 1;
|
|
|
|
Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport);
|
|
|
|
Page = (PPFN_NUMBER)(Mdl + 1);
|
|
|
|
StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
|
|
|
|
MdlPages = ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
|
|
Mdl->ByteCount);
|
|
|
|
//
|
|
// Default the MDL entry to the dummy page as the RLE PTEs may
|
|
// be noncontiguous and we have no way to distinguish the jumps.
|
|
//
|
|
|
|
for (i = 0; i < MdlPages; i += 1) {
|
|
*Page = DummyPage;
|
|
Page += 1;
|
|
}
|
|
|
|
DummyReferences += MdlPages;
|
|
|
|
if (DummyPfn1->u3.e2.ReferenceCount + MdlPages >= MAXUSHORT) {
|
|
|
|
//
|
|
// The USHORT ReferenceCount wrapped.
|
|
//
|
|
// Dequeue all remaining inpage blocks.
|
|
//
|
|
|
|
if (PrevEntry != NULL) {
|
|
PrevEntry->Next = NULL;
|
|
}
|
|
else {
|
|
ReadList->InPageSupportHead.Next = NULL;
|
|
}
|
|
|
|
do {
|
|
|
|
InPageSupport = CONTAINING_RECORD(NextEntry,
|
|
MMINPAGE_SUPPORT,
|
|
ListEntry);
|
|
|
|
#if DBG
|
|
InPageSupport->ListEntry.Next = NULL;
|
|
#endif
|
|
|
|
NextEntry = NextEntry->Next;
|
|
|
|
MiFreeInPageSupportBlock (InPageSupport);
|
|
|
|
} while (NextEntry != NULL);
|
|
|
|
break;
|
|
}
|
|
|
|
NumberOfPages = 0;
|
|
Waited = FALSE;
|
|
|
|
//
|
|
// Build the proper InPageSupport and MDL to describe this run.
|
|
//
|
|
|
|
LOCK_PFN (OldIrql);
|
|
|
|
DummyPfn1->u3.e2.ReferenceCount =
|
|
(USHORT)(DummyPfn1->u3.e2.ReferenceCount + MdlPages);
|
|
|
|
for (; Rle < RleMax; Rle += 1) {
|
|
|
|
//
|
|
// Fill the MDL entry for this RLE.
|
|
//
|
|
|
|
RlePrototypePte = MI_RLEPROTO_TO_PROTO (Rle->u1.PrototypePte);
|
|
|
|
if (RlePrototypePte == NULL) {
|
|
continue;
|
|
}
|
|
|
|
//
|
|
// The RlePrototypePte better be inside a prototype PTE allocation
|
|
// so that subsequent page trims update the correct PTEs.
|
|
//
|
|
|
|
ASSERT (((RlePrototypePte >= (PMMPTE)MmPagedPoolStart) &&
|
|
(RlePrototypePte <= (PMMPTE)MmPagedPoolEnd)) ||
|
|
((RlePrototypePte >= (PMMPTE)MmSpecialPoolStart) && (RlePrototypePte <= (PMMPTE)MmSpecialPoolEnd)));
|
|
|
|
//
|
|
// This is a page that our first pass which ran lock-free decided
|
|
// needed to be read. Here this must be rechecked as the page
|
|
// state could have changed. Note this check is final as the
|
|
// PFN lock is held. The PTE must be put in transition with
|
|
// read in progress before the PFN lock is released.
|
|
//
|
|
|
|
//
|
|
// Lock page containing prototype PTEs in memory by
|
|
// incrementing the reference count for the page.
|
|
// Unlock any page locked earlier containing prototype PTEs if
|
|
// the containing page is not the same for both.
|
|
//
|
|
|
|
if (PfnProto != NULL) {
|
|
|
|
if (PointerPde != MiGetPteAddress (RlePrototypePte)) {
|
|
|
|
ASSERT (PfnProto->u3.e2.ReferenceCount > 1);
|
|
MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(PfnProto, 5);
|
|
PfnProto = NULL;
|
|
}
|
|
}
|
|
|
|
if (PfnProto == NULL) {
|
|
|
|
ASSERT (!MI_IS_PHYSICAL_ADDRESS (RlePrototypePte));
|
|
|
|
PointerPde = MiGetPteAddress (RlePrototypePte);
|
|
|
|
if (PointerPde->u.Hard.Valid == 0) {
|
|
|
|
//
|
|
// Set Waited to TRUE if we ever release the PFN lock as
|
|
// that means a release path below must factor this in.
|
|
//
|
|
|
|
if (MiMakeSystemAddressValidPfn (RlePrototypePte, OldIrql) == TRUE) {
|
|
Waited = TRUE;
|
|
}
|
|
}
|
|
|
|
PfnProto = MI_PFN_ELEMENT (PointerPde->u.Hard.PageFrameNumber);
|
|
MI_ADD_LOCKED_PAGE_CHARGE(PfnProto, TRUE, 4);
|
|
PfnProto->u3.e2.ReferenceCount += 1;
|
|
ASSERT (PfnProto->u3.e2.ReferenceCount > 1);
|
|
}
|
|
|
|
PteContents = *(RlePrototypePte);
|
|
|
|
if (PteContents.u.Hard.Valid == 1) {
|
|
|
|
//
|
|
// The page has become resident since the last pass. Don't
|
|
// include it.
|
|
//
|
|
|
|
NOTHING;
|
|
}
|
|
else if (PteContents.u.Soft.Prototype == 0) {
|
|
|
|
//
|
|
// The page is either in transition (so don't prefetch it).
|
|
//
|
|
// - OR -
|
|
//
|
|
// it is now pagefile (or demand zero) backed - in which case
|
|
// prefetching it from the file here would cause us to lose
|
|
// the contents. Note this can happen for session-space images
|
|
// as we back modified (ie: for relocation fixups or IAT
|
|
// updated) portions from the pagefile.
|
|
//
|
|
|
|
NOTHING;
|
|
}
|
|
else if ((MmAvailablePages >= MM_HIGH_LIMIT) &&
|
|
(MI_NONPAGABLE_MEMORY_AVAILABLE() >= MM_HIGH_LIMIT)) {
|
|
|
|
NumberOfPages += 1;
|
|
|
|
//
|
|
// Allocate a physical page.
|
|
//
|
|
|
|
PageColor = MI_PAGE_COLOR_VA_PROCESS (
|
|
MiGetVirtualAddressMappedByPte (RlePrototypePte),
|
|
&CurrentProcess->NextPageColor
|
|
);
|
|
|
|
if (Rle->u1.e1.Partial == 1) {
|
|
|
|
//
|
|
// This read crosses the end of a subsection, get a zeroed
|
|
// page and correct the read size.
|
|
//
|
|
|
|
PageFrameIndex = MiRemoveZeroPage (PageColor);
|
|
}
|
|
else {
|
|
PageFrameIndex = MiRemoveAnyPage (PageColor);
|
|
}
|
|
|
|
Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
|
|
|
|
ASSERT (Pfn1->u3.e2.ReferenceCount == 0);
|
|
ASSERT (Pfn1->u2.ShareCount == 0);
|
|
ASSERT (RlePrototypePte->u.Hard.Valid == 0);
|
|
|
|
//
|
|
// Initialize read-in-progress PFN.
|
|
//
|
|
|
|
MiInitializePfn (PageFrameIndex, RlePrototypePte, 0);
|
|
|
|
//
|
|
// These pieces of MiInitializePfn initialization are overridden
|
|
// here as these pages are only going into prototype
|
|
// transition and not into any page tables.
|
|
//
|
|
|
|
Pfn1->u3.e1.PrototypePte = 1;
|
|
MI_ADD_LOCKED_PAGE_CHARGE(Pfn1, TRUE, 38);
|
|
Pfn1->u2.ShareCount -= 1;
|
|
Pfn1->u3.e1.PageLocation = ZeroedPageList;
|
|
|
|
//
|
|
// Initialize the I/O specific fields.
|
|
//
|
|
|
|
ASSERT (FirstRleInRun->u1.PrototypePte != NULL);
|
|
Pfn1->u1.Event = &InPageSupport->Event;
|
|
Pfn1->u3.e1.ReadInProgress = 1;
|
|
ASSERT (Pfn1->u4.InPageError == 0);
|
|
|
|
//
|
|
// Increment the PFN reference count in the control area for
|
|
// the subsection.
|
|
//
|
|
|
|
ReadList->ControlArea->NumberOfPfnReferences += 1;
|
|
|
|
//
|
|
// Put the PTE into the transition state.
|
|
// No TB flush needed as the PTE is still not valid.
|
|
//
|
|
|
|
MI_MAKE_TRANSITION_PTE (TempPte,
|
|
PageFrameIndex,
|
|
RlePrototypePte->u.Soft.Protection,
|
|
RlePrototypePte);
|
|
MI_WRITE_INVALID_PTE (RlePrototypePte, TempPte);
|
|
|
|
Page = (PPFN_NUMBER)(Mdl + 1);
|
|
|
|
ASSERT ((ULONG)(RlePrototypePte - FirstRlePrototypeInRun) < MdlPages);
|
|
|
|
*(Page + (RlePrototypePte - FirstRlePrototypeInRun)) = PageFrameIndex;
|
|
}
|
|
else {
|
|
|
|
//
|
|
// Failed allocation - this concludes prefetching for this run.
|
|
//
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
//
|
|
// If all the pages were resident, dereference the dummy page references
|
|
// now and notify our caller that I/Os are not necessary. Note that
|
|
// STATUS_SUCCESS must still be returned so our caller knows to continue
|
|
// on to the next readlist.
|
|
//
|
|
|
|
if (NumberOfPages == 0) {
|
|
ASSERT (DummyPfn1->u3.e2.ReferenceCount > MdlPages);
|
|
DummyPfn1->u3.e2.ReferenceCount =
|
|
(USHORT)(DummyPfn1->u3.e2.ReferenceCount - MdlPages);
|
|
|
|
UNLOCK_PFN (OldIrql);
|
|
|
|
if (PrevEntry != NULL) {
|
|
PrevEntry->Next = NextEntry->Next;
|
|
}
|
|
else {
|
|
ReadList->InPageSupportHead.Next = NextEntry->Next;
|
|
}
|
|
|
|
NextEntry = NextEntry->Next;
|
|
|
|
#if DBG
|
|
InPageSupport->ListEntry.Next = NULL;
|
|
#endif
|
|
MiFreeInPageSupportBlock (InPageSupport);
|
|
|
|
continue;
|
|
}
|
|
|
|
//
|
|
// Carefully trim leading dummy pages.
|
|
//
|
|
|
|
Page = (PPFN_NUMBER)(Mdl + 1);
|
|
|
|
DummyTrim = 0;
|
|
for (i = 0; i < MdlPages - 1; i += 1) {
|
|
if (*Page == DummyPage) {
|
|
DummyTrim += 1;
|
|
Page += 1;
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (DummyTrim != 0) {
|
|
|
|
Mdl->Size =
|
|
(USHORT)(Mdl->Size - (DummyTrim * sizeof(PFN_NUMBER)));
|
|
Mdl->ByteCount -= (ULONG)(DummyTrim * PAGE_SIZE);
|
|
ASSERT (Mdl->ByteCount != 0);
|
|
InPageSupport->ReadOffset.QuadPart += (DummyTrim * PAGE_SIZE);
|
|
DummyPfn1->u3.e2.ReferenceCount =
|
|
(USHORT)(DummyPfn1->u3.e2.ReferenceCount - DummyTrim);
|
|
|
|
//
|
|
// Shuffle down the PFNs in the MDL.
|
|
// Recalculate BasePte to adjust for the shuffle.
|
|
//
|
|
|
|
Pfn1 = MI_PFN_ELEMENT (*Page);
|
|
|
|
ASSERT (Pfn1->PteAddress->u.Hard.Valid == 0);
|
|
ASSERT ((Pfn1->PteAddress->u.Soft.Prototype == 0) &&
|
|
(Pfn1->PteAddress->u.Soft.Transition == 1));
|
|
|
|
InPageSupport->BasePte = Pfn1->PteAddress;
|
|
|
|
DestinationPage = (PPFN_NUMBER)(Mdl + 1);
|
|
|
|
do {
|
|
*DestinationPage = *Page;
|
|
DestinationPage += 1;
|
|
Page += 1;
|
|
i += 1;
|
|
} while (i < MdlPages);
|
|
|
|
MdlPages -= DummyTrim;
|
|
}
|
|
|
|
//
|
|
// Carefully trim trailing dummy pages.
|
|
//
|
|
|
|
StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
|
|
|
|
MdlPages = ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
|
|
Mdl->ByteCount);
|
|
|
|
ASSERT (MdlPages != 0);
|
|
|
|
Page = (PPFN_NUMBER)(Mdl + 1) + MdlPages - 1;
|
|
|
|
if (*Page == DummyPage) {
|
|
|
|
ASSERT (MdlPages >= 2);
|
|
|
|
//
|
|
// Trim the last page specially as it may be a partial page.
|
|
//
|
|
|
|
Mdl->Size -= sizeof(PFN_NUMBER);
|
|
if (BYTE_OFFSET(Mdl->ByteCount) != 0) {
|
|
Mdl->ByteCount &= ~(PAGE_SIZE - 1);
|
|
}
|
|
else {
|
|
Mdl->ByteCount -= PAGE_SIZE;
|
|
}
|
|
ASSERT (Mdl->ByteCount != 0);
|
|
DummyPfn1->u3.e2.ReferenceCount -= 1;
|
|
|
|
//
|
|
// Now trim any other trailing pages.
|
|
//
|
|
|
|
Page -= 1;
|
|
DummyTrim = 0;
|
|
while (Page != ((PPFN_NUMBER)(Mdl + 1))) {
|
|
if (*Page != DummyPage) {
|
|
break;
|
|
}
|
|
DummyTrim += 1;
|
|
Page -= 1;
|
|
}
|
|
if (DummyTrim != 0) {
|
|
ASSERT (Mdl->Size > (USHORT)(DummyTrim * sizeof(PFN_NUMBER)));
|
|
Mdl->Size =
|
|
(USHORT)(Mdl->Size - (DummyTrim * sizeof(PFN_NUMBER)));
|
|
Mdl->ByteCount -= (ULONG)(DummyTrim * PAGE_SIZE);
|
|
DummyPfn1->u3.e2.ReferenceCount =
|
|
(USHORT)(DummyPfn1->u3.e2.ReferenceCount - DummyTrim);
|
|
}
|
|
|
|
ASSERT (MdlPages > DummyTrim + 1);
|
|
MdlPages -= (DummyTrim + 1);
|
|
|
|
#if DBG
|
|
StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
|
|
|
|
ASSERT (MdlPages == ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
|
|
Mdl->ByteCount));
|
|
#endif
|
|
}
|
|
|
|
//
|
|
// If the MDL is not already embedded in the inpage block, see if its
|
|
// final size qualifies it - if so, embed it now.
|
|
//
|
|
|
|
if ((Mdl != &InPageSupport->Mdl) &&
|
|
(Mdl->ByteCount <= (MM_MAXIMUM_READ_CLUSTER_SIZE + 1) * PAGE_SIZE)){
|
|
|
|
#if DBG
|
|
RtlFillMemoryUlong (&InPageSupport->Page[0],
|
|
(MM_MAXIMUM_READ_CLUSTER_SIZE+1) * sizeof (PFN_NUMBER),
|
|
0xf1f1f1f1);
|
|
#endif
|
|
|
|
RtlCopyMemory (&InPageSupport->Mdl, Mdl, Mdl->Size);
|
|
|
|
Mdl->Next = FreeMdl;
|
|
FreeMdl = Mdl;
|
|
|
|
Mdl = &InPageSupport->Mdl;
|
|
|
|
ASSERT (((ULONG_PTR)Mdl & (sizeof(QUAD) - 1)) == 0);
|
|
InPageSupport->u1.e1.PrefetchMdlHighBits = ((ULONG_PTR)Mdl >> 3);
|
|
}
|
|
|
|
//
|
|
// If the MDL contains a large number of dummy pages to real pages
|
|
// then just discard it. Only check large MDLs as embedded ones are
|
|
// always worth the I/O.
|
|
//
|
|
// The PFN lock may have been released above during the
|
|
// MiMakeSystemAddressValidPfn call. If so, other threads may
|
|
// have collided on the pages in the prefetch MDL and if so,
|
|
// this I/O must be issued regardless of the inefficiency of
|
|
// dummy pages within it. Otherwise the other threads will
|
|
// hang in limbo forever.
|
|
//
|
|
|
|
ASSERT (MdlPages != 0);
|
|
|
|
#if DBG
|
|
StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
|
|
|
|
ASSERT (MdlPages == ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
|
|
Mdl->ByteCount));
|
|
#endif
|
|
|
|
if ((Mdl != &InPageSupport->Mdl) &&
|
|
(Waited == FALSE) &&
|
|
((MdlPages - NumberOfPages) / DUMMY_RATIO >= NumberOfPages)) {
|
|
|
|
if (PrevEntry != NULL) {
|
|
PrevEntry->Next = NextEntry->Next;
|
|
}
|
|
else {
|
|
ReadList->InPageSupportHead.Next = NextEntry->Next;
|
|
}
|
|
|
|
NextEntry = NextEntry->Next;
|
|
|
|
ASSERT (MI_EXTRACT_PREFETCH_MDL(InPageSupport) == Mdl);
|
|
|
|
//
|
|
// Note the pages are individually freed here (rather than just
|
|
// "completing" the I/O with an error) as the PFN lock has
|
|
// never been released since the pages were put in transition.
|
|
// So no collisions on these pages are possible.
|
|
//
|
|
|
|
ASSERT (InPageSupport->WaitCount == 1);
|
|
|
|
Page = (PPFN_NUMBER)(Mdl + 1) + MdlPages - 1;
|
|
|
|
do {
|
|
if (*Page != DummyPage) {
|
|
Pfn1 = MI_PFN_ELEMENT (*Page);
|
|
|
|
ASSERT (Pfn1->PteAddress->u.Hard.Valid == 0);
|
|
ASSERT ((Pfn1->PteAddress->u.Soft.Prototype == 0) &&
|
|
(Pfn1->PteAddress->u.Soft.Transition == 1));
|
|
ASSERT (Pfn1->u3.e1.ReadInProgress == 1);
|
|
ASSERT (Pfn1->u3.e1.PrototypePte == 1);
|
|
ASSERT (Pfn1->u3.e2.ReferenceCount == 1);
|
|
ASSERT (Pfn1->u2.ShareCount == 0);
|
|
|
|
Pfn1->u3.e1.PageLocation = StandbyPageList;
|
|
Pfn1->u3.e1.ReadInProgress = 0;
|
|
MiRestoreTransitionPte (Pfn1);
|
|
|
|
MI_SET_PFN_DELETED (Pfn1);
|
|
MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(Pfn1, 39);
|
|
}
|
|
|
|
Page -= 1;
|
|
} while (Page >= (PPFN_NUMBER)(Mdl + 1));
|
|
|
|
ASSERT (InPageSupport->WaitCount == 1);
|
|
|
|
ASSERT (DummyPfn1->u3.e2.ReferenceCount > MdlPages);
|
|
DummyPfn1->u3.e2.ReferenceCount =
|
|
(USHORT)(DummyPfn1->u3.e2.ReferenceCount - MdlPages);
|
|
|
|
UNLOCK_PFN (OldIrql);
|
|
|
|
#if DBG
|
|
InPageSupport->ListEntry.Next = NULL;
|
|
#endif
|
|
MiFreeInPageSupportBlock (InPageSupport);
|
|
|
|
continue;
|
|
}
|
|
|
|
#if DBG
|
|
MiPfDbgDumpReadList (ReadList);
|
|
#endif
|
|
|
|
ASSERT ((USHORT)Mdl->Size - sizeof(MDL) == BYTES_TO_PAGES(Mdl->ByteCount) * sizeof(PFN_NUMBER));
|
|
|
|
DummyPfn1->u3.e2.ReferenceCount =
|
|
(USHORT)(DummyPfn1->u3.e2.ReferenceCount - NumberOfPages);
|
|
|
|
UNLOCK_PFN (OldIrql);
|
|
|
|
InterlockedIncrement ((PLONG) &MmInfoCounters.PageReadIoCount);
|
|
|
|
InterlockedExchangeAdd ((PLONG) &MmInfoCounters.PageReadCount,
|
|
(LONG) NumberOfPages);
|
|
|
|
//
|
|
// March on to the next run and its InPageSupport and MDL.
|
|
//
|
|
|
|
PrevEntry = NextEntry;
|
|
NextEntry = NextEntry->Next;
|
|
}
|
|
|
|
//
|
|
// Unlock page containing prototype PTEs.
|
|
//
|
|
|
|
if (PfnProto != NULL) {
|
|
LOCK_PFN (OldIrql);
|
|
ASSERT (PfnProto->u3.e2.ReferenceCount > 1);
|
|
MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(PfnProto, 5);
|
|
UNLOCK_PFN (OldIrql);
|
|
}
|
|
|
|
#if DBG
|
|
|
|
if (MiPfDebug & MI_PF_DELAY) {
|
|
|
|
//
|
|
// This delay provides a window to increase the chance of collided
|
|
// faults.
|
|
//
|
|
|
|
KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmHalfSecond);
|
|
}
|
|
|
|
#endif
|
|
|
|
//
|
|
// Free any collapsed MDLs that are no longer needed.
|
|
//
|
|
|
|
while (FreeMdl != NULL) {
|
|
Mdl = FreeMdl->Next;
|
|
ExFreePool (FreeMdl);
|
|
FreeMdl = Mdl;
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
VOID
|
|
MiPfExecuteReadList (
|
|
IN PMI_READ_LIST ReadList
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine executes the read list by issuing paging I/Os for all
|
|
runs described in the read-list.
|
|
|
|
Arguments:
|
|
|
|
ReadList - Pointer to the read-list.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
Environment:
|
|
|
|
Kernel mode, PASSIVE_LEVEL.
|
|
|
|
--*/
|
|
|
|
{
|
|
PMDL Mdl;
|
|
NTSTATUS status;
|
|
PMMPFN Pfn1;
|
|
PMMPTE LocalPrototypePte;
|
|
PFN_NUMBER PageFrameIndex;
|
|
PSINGLE_LIST_ENTRY NextEntry;
|
|
PMMINPAGE_SUPPORT InPageSupport;
|
|
|
|
ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
|
|
|
|
NextEntry = ReadList->InPageSupportHead.Next;
|
|
while (NextEntry != NULL) {
|
|
|
|
InPageSupport = CONTAINING_RECORD(NextEntry,
|
|
MMINPAGE_SUPPORT,
|
|
ListEntry);
|
|
|
|
//
|
|
// Initialize the prefetch MDL.
|
|
//
|
|
|
|
Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport);
|
|
|
|
ASSERT ((Mdl->MdlFlags & MDL_MAPPED_TO_SYSTEM_VA) == 0);
|
|
Mdl->MdlFlags |= (MDL_PAGES_LOCKED | MDL_IO_PAGE_READ);
|
|
|
|
ASSERT (InPageSupport->u1.e1.Completed == 0);
|
|
ASSERT (InPageSupport->Thread == PsGetCurrentThread());
|
|
ASSERT64 (InPageSupport->UsedPageTableEntries == 0);
|
|
ASSERT (InPageSupport->WaitCount >= 1);
|
|
ASSERT (InPageSupport->u1.e1.PrefetchMdlHighBits != 0);
|
|
|
|
//
|
|
// Initialize the inpage support block fields we overloaded.
|
|
//
|
|
|
|
ASSERT (InPageSupport->FilePointer == ReadList->FileObject);
|
|
LocalPrototypePte = InPageSupport->BasePte;
|
|
|
|
ASSERT (LocalPrototypePte->u.Hard.Valid == 0);
|
|
ASSERT ((LocalPrototypePte->u.Soft.Prototype == 0) &&
|
|
(LocalPrototypePte->u.Soft.Transition == 1));
|
|
|
|
PageFrameIndex = MI_GET_PAGE_FRAME_FROM_TRANSITION_PTE(LocalPrototypePte);
|
|
Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
|
|
|
|
InPageSupport->Pfn = Pfn1;
|
|
|
|
status = IoAsynchronousPageRead (InPageSupport->FilePointer,
|
|
Mdl,
|
|
&InPageSupport->ReadOffset,
|
|
&InPageSupport->Event,
|
|
&InPageSupport->IoStatus);
|
|
|
|
if (!NT_SUCCESS (status)) {
|
|
|
|
//
|
|
// Set the event as the I/O system doesn't set it on errors.
|
|
//
|
|
|
|
InPageSupport->IoStatus.Status = status;
|
|
InPageSupport->IoStatus.Information = 0;
|
|
KeSetEvent (&InPageSupport->Event, 0, FALSE);
|
|
}
|
|
|
|
NextEntry = NextEntry->Next;
|
|
}
|
|
|
|
#if DBG
|
|
|
|
if (MiPfDebug & MI_PF_DELAY) {
|
|
|
|
//
|
|
// This delay provides a window to increase the chance of collided
|
|
// faults.
|
|
//
|
|
|
|
KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmHalfSecond);
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
VOID
|
|
MiPfCompletePrefetchIos (
|
|
IN PMI_READ_LIST ReadList
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine waits for a series of page reads to complete
|
|
and completes the requests.
|
|
|
|
Arguments:
|
|
|
|
ReadList - Pointer to the read-list.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
Environment:
|
|
|
|
Kernel mode, PASSIVE_LEVEL.
|
|
|
|
--*/
|
|
|
|
{
|
|
PMDL Mdl;
|
|
PMMPFN Pfn1;
|
|
PMMPFN PfnClusterPage;
|
|
PPFN_NUMBER Page;
|
|
NTSTATUS status;
|
|
LONG NumberOfBytes;
|
|
PMMINPAGE_SUPPORT InPageSupport;
|
|
PSINGLE_LIST_ENTRY NextEntry;
|
|
extern ULONG MmFrontOfList;
|
|
|
|
ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL);
|
|
|
|
do {
|
|
|
|
NextEntry = PopEntryList(&ReadList->InPageSupportHead);
|
|
if (NextEntry == NULL) {
|
|
break;
|
|
}
|
|
|
|
InPageSupport = CONTAINING_RECORD(NextEntry,
|
|
MMINPAGE_SUPPORT,
|
|
ListEntry);
|
|
|
|
ASSERT (InPageSupport->Pfn != 0);
|
|
|
|
Pfn1 = InPageSupport->Pfn;
|
|
Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport);
|
|
Page = (PPFN_NUMBER)(Mdl + 1);
|
|
|
|
status = MiWaitForInPageComplete (InPageSupport->Pfn,
|
|
InPageSupport->BasePte,
|
|
NULL,
|
|
InPageSupport->BasePte,
|
|
InPageSupport,
|
|
PREFETCH_PROCESS);
|
|
|
|
//
|
|
// MiWaitForInPageComplete RETURNS WITH THE PFN LOCK HELD!!!
|
|
//
|
|
|
|
//
|
|
// If we are prefetching for boot, insert prefetched pages to the front
|
|
// of the list. Otherwise the pages prefetched first end up susceptible
|
|
// at the front of the list as we prefetch more. We prefetch pages in
|
|
// the order they will be used. When there is a spike in memory usage
|
|
// and there is no free memory, we lose these pages before we can
|
|
// get cache-hits on them. Thus boot gets ahead and starts discarding
|
|
// prefetched pages that it could use just a little later.
|
|
//
|
|
|
|
if (CCPF_IS_PREFETCHING_FOR_BOOT()) {
|
|
MmFrontOfList = TRUE;
|
|
}
|
|
|
|
NumberOfBytes = (LONG)Mdl->ByteCount;
|
|
|
|
while (NumberOfBytes > 0) {
|
|
|
|
//
|
|
// Decrement all reference counts.
|
|
//
|
|
|
|
PfnClusterPage = MI_PFN_ELEMENT (*Page);
|
|
|
|
#if DBG
|
|
if (PfnClusterPage->u4.InPageError) {
|
|
|
|
//
|
|
// If the page is marked with an error, then the whole transfer
|
|
// must be marked as not successful as well. The only exception
|
|
// is the prefetch dummy page which is used in multiple
|
|
// transfers concurrently and thus may have the inpage error
|
|
// bit set at any time (due to another transaction besides
|
|
// the current one).
|
|
//
|
|
|
|
ASSERT ((status != STATUS_SUCCESS) ||
|
|
(PfnClusterPage->PteAddress == MI_PF_DUMMY_PAGE_PTE));
|
|
}
|
|
#endif
|
|
if (PfnClusterPage->u3.e1.ReadInProgress != 0) {
|
|
|
|
ASSERT (PfnClusterPage->u4.PteFrame != MI_MAGIC_AWE_PTEFRAME);
|
|
PfnClusterPage->u3.e1.ReadInProgress = 0;
|
|
|
|
if (PfnClusterPage->u4.InPageError == 0) {
|
|
PfnClusterPage->u1.Event = NULL;
|
|
}
|
|
}
|
|
|
|
MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF(PfnClusterPage, 39);
|
|
|
|
Page += 1;
|
|
NumberOfBytes -= PAGE_SIZE;
|
|
}
|
|
|
|
//
|
|
// If we were inserting prefetched pages to front of standby list
|
|
// for boot prefetching, stop it before we release the pfn lock.
|
|
//
|
|
|
|
MmFrontOfList = FALSE;
|
|
|
|
if (status != STATUS_SUCCESS) {
|
|
|
|
//
|
|
// An I/O error occurred during the page read
|
|
// operation. All the pages which were just
|
|
// put into transition must be put onto the
|
|
// free list if InPageError is set, and their
|
|
// PTEs restored to the proper contents.
|
|
//
|
|
|
|
Page = (PPFN_NUMBER)(Mdl + 1);
|
|
NumberOfBytes = (LONG)Mdl->ByteCount;
|
|
|
|
while (NumberOfBytes > 0) {
|
|
|
|
PfnClusterPage = MI_PFN_ELEMENT (*Page);
|
|
|
|
if (PfnClusterPage->u4.InPageError == 1) {
|
|
|
|
if (PfnClusterPage->u3.e2.ReferenceCount == 0) {
|
|
|
|
ASSERT (PfnClusterPage->u3.e1.PageLocation ==
|
|
StandbyPageList);
|
|
|
|
MiUnlinkPageFromList (PfnClusterPage);
|
|
MiRestoreTransitionPte (PfnClusterPage);
|
|
MiInsertPageInFreeList (*Page);
|
|
}
|
|
}
|
|
Page += 1;
|
|
NumberOfBytes -= PAGE_SIZE;
|
|
}
|
|
}
|
|
|
|
//
|
|
// All the relevant prototype PTEs should be in transition state.
|
|
//
|
|
|
|
//
|
|
// We took out an extra reference on the inpage block to prevent
|
|
// MiWaitForInPageComplete from freeing it (and the MDL), since we
|
|
// needed to process the MDL above. Now let it go for good.
|
|
//
|
|
|
|
ASSERT (InPageSupport->WaitCount >= 1);
|
|
UNLOCK_PFN (PASSIVE_LEVEL);
|
|
|
|
#if DBG
|
|
InPageSupport->ListEntry.Next = NULL;
|
|
#endif
|
|
|
|
MiFreeInPageSupportBlock (InPageSupport);
|
|
|
|
} while (TRUE);
|
|
}
|
|
|
|
#if DBG
|
|
VOID
|
|
MiPfDbgDumpReadList (
|
|
IN PMI_READ_LIST ReadList
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine dumps the given read-list range to the debugger.
|
|
|
|
Arguments:
|
|
|
|
ReadList - Pointer to the read-list.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
Environment:
|
|
|
|
Kernel mode.
|
|
|
|
--*/
|
|
|
|
{
|
|
ULONG i;
|
|
PMDL Mdl;
|
|
PMMPFN Pfn1;
|
|
PMMPTE LocalPrototypePte;
|
|
PFN_NUMBER PageFrameIndex;
|
|
PMMINPAGE_SUPPORT InPageSupport;
|
|
PSINGLE_LIST_ENTRY NextEntry;
|
|
PPFN_NUMBER Page;
|
|
PVOID StartingVa;
|
|
PFN_NUMBER MdlPages;
|
|
LARGE_INTEGER ReadOffset;
|
|
|
|
if ((MiPfDebug & MI_PF_VERBOSE) == 0) {
|
|
return;
|
|
}
|
|
|
|
DbgPrint ("\nPF: Dumping read-list %x (FileObject %x ControlArea %x)\n\n",
|
|
ReadList, ReadList->FileObject, ReadList->ControlArea);
|
|
|
|
DbgPrint ("\tFileOffset | Pte | Pfn \n"
|
|
"\t-----------+---------------+----------\n");
|
|
|
|
NextEntry = ReadList->InPageSupportHead.Next;
|
|
while (NextEntry != NULL) {
|
|
|
|
InPageSupport = CONTAINING_RECORD(NextEntry,
|
|
MMINPAGE_SUPPORT,
|
|
ListEntry);
|
|
|
|
ReadOffset = InPageSupport->ReadOffset;
|
|
Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport);
|
|
|
|
Page = (PPFN_NUMBER)(Mdl + 1);
|
|
#if DBG
|
|
//
|
|
// MDL isn't filled in yet, skip it.
|
|
//
|
|
|
|
if (*Page == MM_EMPTY_LIST) {
|
|
NextEntry = NextEntry->Next;
|
|
continue;
|
|
}
|
|
#endif
|
|
|
|
StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset);
|
|
|
|
MdlPages = ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa,
|
|
Mdl->ByteCount);
|
|
|
|
//
|
|
// Default the MDL entry to the dummy page as the RLE PTEs may
|
|
// be noncontiguous and we have no way to distinguish the jumps.
|
|
//
|
|
|
|
for (i = 0; i < MdlPages; i += 1) {
|
|
PageFrameIndex = *Page;
|
|
Pfn1 = MI_PFN_ELEMENT (PageFrameIndex);
|
|
LocalPrototypePte = Pfn1->PteAddress;
|
|
|
|
if (LocalPrototypePte != MI_PF_DUMMY_PAGE_PTE) {
|
|
ASSERT (LocalPrototypePte->u.Hard.Valid == 0);
|
|
ASSERT ((LocalPrototypePte->u.Soft.Prototype == 0) &&
|
|
(LocalPrototypePte->u.Soft.Transition == 1));
|
|
}
|
|
|
|
DbgPrint ("\t %8x | %8x | %8x\n",
|
|
ReadOffset.LowPart,
|
|
LocalPrototypePte,
|
|
PageFrameIndex);
|
|
|
|
Page += 1;
|
|
ReadOffset.LowPart += PAGE_SIZE;
|
|
}
|
|
|
|
NextEntry = NextEntry->Next;
|
|
}
|
|
|
|
DbgPrint ("\t\n");
|
|
}
|
|
|
|
VOID
|
|
MiRemoveUserPages (
|
|
VOID
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine removes user space pages.
|
|
|
|
Arguments:
|
|
|
|
None.
|
|
|
|
Return Value:
|
|
|
|
Number of pages removed.
|
|
|
|
Environment:
|
|
|
|
Kernel mode.
|
|
|
|
--*/
|
|
|
|
{
|
|
PKTHREAD CurrentThread;
|
|
|
|
CurrentThread = KeGetCurrentThread ();
|
|
|
|
KeEnterCriticalRegionThread (CurrentThread);
|
|
|
|
InterlockedIncrement (&MiDelayPageFaults);
|
|
|
|
MmEmptyAllWorkingSets ();
|
|
MiFlushAllPages ();
|
|
|
|
InterlockedDecrement (&MiDelayPageFaults);
|
|
|
|
KeLeaveCriticalRegionThread (CurrentThread);
|
|
|
|
//
|
|
// Run the transition list and free all the entries so transition
|
|
// faults are not satisfied for any of the non modified pages that were
|
|
// freed.
|
|
//
|
|
|
|
MiPurgeTransitionList ();
|
|
}
|
|
#endif
|