mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
6686 lines
199 KiB
6686 lines
199 KiB
/*++
|
|
|
|
Copyright (c) 1990 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
cachesub.c
|
|
|
|
Abstract:
|
|
|
|
This module implements the common subroutines for the Cache subsystem.
|
|
|
|
Author:
|
|
|
|
Tom Miller [TomM] 4-May-1990
|
|
|
|
Revision History:
|
|
|
|
--*/
|
|
|
|
#include "cc.h"
|
|
|
|
//
|
|
// The Bug check file id for this module
|
|
//
|
|
|
|
#define BugCheckFileId (CACHE_BUG_CHECK_CACHESUB)
|
|
|
|
//
|
|
// Define our debug constant
|
|
//
|
|
|
|
#define me 0x00000002
|
|
|
|
//
|
|
// Define those errors which should be retried
|
|
//
|
|
|
|
#define RetryError(STS) (((STS) == STATUS_VERIFY_REQUIRED) || ((STS) == STATUS_FILE_LOCK_CONFLICT))
|
|
|
|
ULONG CcMaxDirtyWrite = 0x10000;
|
|
|
|
//
|
|
// Local support routines
|
|
//
|
|
|
|
BOOLEAN
|
|
CcFindBcb (
|
|
IN PSHARED_CACHE_MAP SharedCacheMap,
|
|
IN PLARGE_INTEGER FileOffset,
|
|
IN OUT PLARGE_INTEGER BeyondLastByte,
|
|
OUT PBCB *Bcb
|
|
);
|
|
|
|
PBCB
|
|
CcAllocateInitializeBcb (
|
|
IN OUT PSHARED_CACHE_MAP SharedCacheMap OPTIONAL,
|
|
IN OUT PBCB AfterBcb,
|
|
IN PLARGE_INTEGER FileOffset,
|
|
IN PLARGE_INTEGER Length
|
|
);
|
|
|
|
NTSTATUS
|
|
CcSetValidData (
|
|
IN PFILE_OBJECT FileObject,
|
|
IN PLARGE_INTEGER ValidDataLength
|
|
);
|
|
|
|
BOOLEAN
|
|
CcAcquireByteRangeForWrite (
|
|
IN PSHARED_CACHE_MAP SharedCacheMap,
|
|
IN PLARGE_INTEGER TargetOffset OPTIONAL,
|
|
IN ULONG TargetLength,
|
|
OUT PLARGE_INTEGER FileOffset,
|
|
OUT PULONG Length,
|
|
OUT PBCB *FirstBcb
|
|
);
|
|
|
|
VOID
|
|
CcReleaseByteRangeFromWrite (
|
|
IN PSHARED_CACHE_MAP SharedCacheMap,
|
|
IN PLARGE_INTEGER FileOffset,
|
|
IN ULONG Length,
|
|
IN PBCB FirstBcb,
|
|
IN BOOLEAN VerifyRequired
|
|
);
|
|
|
|
PBITMAP_RANGE
|
|
CcFindBitmapRangeToDirty (
|
|
IN PMBCB Mbcb,
|
|
IN LONGLONG Page,
|
|
IN PULONG *FreePageForSetting
|
|
);
|
|
|
|
PBITMAP_RANGE
|
|
CcFindBitmapRangeToClean (
|
|
IN PMBCB Mbcb,
|
|
IN LONGLONG Page
|
|
);
|
|
|
|
BOOLEAN
|
|
CcLogError(
|
|
IN PFILE_OBJECT FileObject,
|
|
IN PUNICODE_STRING FileName,
|
|
IN NTSTATUS Error,
|
|
IN NTSTATUS DeviceError,
|
|
IN UCHAR IrpMajorCode
|
|
);
|
|
|
|
|
|
|
|
//
|
|
// Internal support routine
|
|
//
|
|
|
|
BOOLEAN
|
|
CcPinFileData (
|
|
IN PFILE_OBJECT FileObject,
|
|
IN PLARGE_INTEGER FileOffset,
|
|
IN ULONG Length,
|
|
IN BOOLEAN ReadOnly,
|
|
IN BOOLEAN WriteOnly,
|
|
IN ULONG Flags,
|
|
OUT PBCB *Bcb,
|
|
OUT PVOID *BaseAddress,
|
|
OUT PLARGE_INTEGER BeyondLastByte
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine locks the specified range of file data into memory.
|
|
|
|
Note that the data desired by the caller (or the first part of it)
|
|
may be in one of three states:
|
|
|
|
No Bcb exists which describes the data
|
|
|
|
A Bcb exists describing the data, but it is not mapped
|
|
(BcbOut->BaseAddress == NULL)
|
|
|
|
A Bcb exists describing the data, and it is mapped
|
|
|
|
Given the above three states, and given that the caller may call
|
|
with either Wait == FALSE or Wait == TRUE, this routine has basically
|
|
six cases. What has to be done, and the order in which things must be
|
|
done varies quite a bit with each of these six cases. The most
|
|
straight-forward implementation of this routine, with the least amount
|
|
of branching, is achieved by determining which of the six cases applies,
|
|
and dispatching fairly directly to that case. The handling of the
|
|
cases is summarized in the following table:
|
|
|
|
Wait == TRUE Wait == FALSE
|
|
------------ -------------
|
|
|
|
no Bcb Case 1: Case 2:
|
|
|
|
CcAllocateInitializeBcb CcMapAndRead (exit if FALSE)
|
|
Acquire Bcb Exclusive CcAllocateInitializeBcb
|
|
Release BcbList SpinLock Acquire Bcb Shared if not ReadOnly
|
|
CcMapAndRead w/ Wait Release BcbList SpinLock
|
|
Convert/Release Bcb Resource
|
|
|
|
Bcb not Case 3: Case 4:
|
|
mapped
|
|
Increment PinCount Acquire Bcb Exclusive (exit if FALSE)
|
|
Release BcbList SpinLock CcMapAndRead (exit if FALSE)
|
|
Acquire Bcb Excl. w/ Wait Increment PinCount
|
|
if still not mapped Convert/Release Bcb Resource
|
|
CcMapAndRead w/ Wait Release BcbList SpinLock
|
|
Convert/Release Bcb Resource
|
|
|
|
Bcb mapped Case 5: Case 6:
|
|
|
|
Increment PinCount if not ReadOnly
|
|
Release BcbList SpinLock Acquire Bcb shared (exit if FALSE)
|
|
if not ReadOnly Increment PinCount
|
|
Acquire Bcb Shared Release BcbList SpinLock
|
|
|
|
It is important to note that most changes to this routine will affect
|
|
multiple cases from above.
|
|
|
|
Arguments:
|
|
|
|
FileObject - Pointer to File Object for file
|
|
|
|
FileOffset - Offset in file at which map should begin
|
|
|
|
Length - Length of desired map in bytes
|
|
|
|
ReadOnly - Supplies TRUE if caller will only read the mapped data (i.e.,
|
|
TRUE for CcCopyRead, CcMapData and CcMdlRead and FALSE for
|
|
everyone else)
|
|
|
|
WriteOnly - The specified range of bytes will only be written.
|
|
|
|
Flags - (PIN_WAIT, PIN_EXCLUSIVE, PIN_NO_READ, etc. as defined in cache.h)
|
|
|
|
Bcb - Returns a pointer to the Bcb representing the pinned data.
|
|
|
|
BaseAddress - Returns base address of desired data
|
|
|
|
BeyondLastByte - Returns the File Offset of the first byte beyond the
|
|
last accessible byte.
|
|
|
|
Return Value:
|
|
|
|
FALSE - if PIN_WAIT was set, and it was impossible to lock all
|
|
of the data without blocking
|
|
TRUE - if the desired data, is being returned
|
|
|
|
Raises:
|
|
|
|
STATUS_INSUFFICIENT_RESOURCES - If a pool allocation failure occurs.
|
|
This can only occur if Wait was specified as TRUE. (If Wait is
|
|
specified as FALSE, and an allocation failure occurs, this
|
|
routine simply returns FALSE.)
|
|
|
|
--*/
|
|
|
|
{
|
|
PSHARED_CACHE_MAP SharedCacheMap;
|
|
LARGE_INTEGER TrialBound;
|
|
KLOCK_QUEUE_HANDLE LockHandle;
|
|
PBCB BcbOut = NULL;
|
|
ULONG ZeroFlags = 0;
|
|
LOGICAL SpinLockAcquired = FALSE;
|
|
BOOLEAN Result = FALSE;
|
|
|
|
ULONG ReceivedLength;
|
|
ULONG ActivePage;
|
|
ULONG PageIsDirty;
|
|
PVACB Vacb = NULL;
|
|
|
|
DebugTrace(+1, me, "CcPinFileData:\n", 0 );
|
|
DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
|
|
DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
|
|
FileOffset->HighPart );
|
|
DebugTrace( 0, me, " Length = %08lx\n", Length );
|
|
DebugTrace( 0, me, " Flags = %02lx\n", Flags );
|
|
|
|
//
|
|
// Get pointer to SharedCacheMap via File Object.
|
|
//
|
|
|
|
SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer
|
|
+ sizeof(PVOID));
|
|
|
|
//
|
|
// See if we have an active Vacb, that we need to free.
|
|
//
|
|
|
|
GetActiveVacb( SharedCacheMap, OldIrql, Vacb, ActivePage, PageIsDirty );
|
|
|
|
//
|
|
// If there is an end of a page to be zeroed, then free that page now,
|
|
// so it does not cause our data to get zeroed. If there is an active
|
|
// page, free it so we have the correct ValidDataGoal.
|
|
//
|
|
|
|
if ((Vacb != NULL) || (SharedCacheMap->NeedToZero != NULL)) {
|
|
|
|
CcFreeActiveVacb( SharedCacheMap, Vacb, ActivePage, PageIsDirty );
|
|
Vacb = NULL;
|
|
}
|
|
|
|
//
|
|
// Make sure the calling file system is not asking to map beyond the
|
|
// end of the section, for example, that it did not forget to do
|
|
// CcExtendCacheSection.
|
|
//
|
|
|
|
ASSERT( ( FileOffset->QuadPart + (LONGLONG)Length ) <=
|
|
SharedCacheMap->SectionSize.QuadPart );
|
|
|
|
//
|
|
// Initially clear output
|
|
//
|
|
|
|
*Bcb = NULL;
|
|
*BaseAddress = NULL;
|
|
|
|
if (!FlagOn(Flags, PIN_NO_READ)) {
|
|
|
|
*BaseAddress = CcGetVirtualAddress( SharedCacheMap,
|
|
*FileOffset,
|
|
&Vacb,
|
|
&ReceivedLength );
|
|
|
|
} else {
|
|
|
|
//
|
|
// In the PIN_NO_READ case, we simply need to make sure that the
|
|
// sparse structure containing the Bcb listheads is expanded in the
|
|
// region of the file we are interested in.
|
|
//
|
|
// Fake a ReceivedLength that matches the remaining bytes in the view.
|
|
//
|
|
|
|
ReceivedLength = VACB_MAPPING_GRANULARITY -
|
|
(ULONG)(FileOffset->QuadPart & (VACB_MAPPING_GRANULARITY - 1));
|
|
|
|
//
|
|
// Now simply cause a reference that will expand a multilevel Vacb.
|
|
//
|
|
|
|
CcReferenceFileOffset( SharedCacheMap, *FileOffset );
|
|
}
|
|
|
|
//
|
|
// Acquire Bcb List Exclusive to look for Bcb
|
|
//
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
SpinLockAcquired = TRUE;
|
|
|
|
//
|
|
// Use try to guarantee cleanup on the way out.
|
|
//
|
|
|
|
try {
|
|
|
|
LOGICAL Found;
|
|
LARGE_INTEGER FOffset;
|
|
LARGE_INTEGER TLength;
|
|
|
|
//
|
|
// Search for Bcb describing the largest matching "prefix" byte range,
|
|
// or where to insert it.
|
|
//
|
|
|
|
TrialBound.QuadPart = FileOffset->QuadPart + (LONGLONG)Length;
|
|
Found = CcFindBcb( SharedCacheMap, FileOffset, &TrialBound, &BcbOut );
|
|
|
|
|
|
//
|
|
// Cases 1 and 2 - Bcb was not found.
|
|
//
|
|
// First caculate data to pin down.
|
|
//
|
|
|
|
if (!Found) {
|
|
|
|
//
|
|
// Get out if the user specified PIN_IF_BCB.
|
|
//
|
|
|
|
if (FlagOn(Flags, PIN_IF_BCB)) {
|
|
|
|
//
|
|
// We need to zap BcbOut since this is a hint to the cleanup code
|
|
// to remove the Bcb if we are returning FALSE.
|
|
//
|
|
|
|
BcbOut = NULL;
|
|
try_return( Result = FALSE );
|
|
}
|
|
|
|
//
|
|
// Not found, calculate data to pin down.
|
|
//
|
|
// Round local copy of FileOffset down to page boundary, and
|
|
// round copies of size and minimum size up. Also make sure that
|
|
// we keep the length from crossing the end of the SharedCacheMap.
|
|
//
|
|
|
|
FOffset = *FileOffset;
|
|
TLength.QuadPart = TrialBound.QuadPart - FOffset.QuadPart;
|
|
|
|
TLength.LowPart += FOffset.LowPart & (PAGE_SIZE - 1);
|
|
ReceivedLength += FOffset.LowPart & (PAGE_SIZE - 1);
|
|
|
|
//
|
|
// At this point we can calculate the ReadOnly flag for
|
|
// the purposes of whether to use the Bcb resource, and
|
|
// we can calculate the ZeroFlags.
|
|
//
|
|
|
|
if ((!ReadOnly && !FlagOn(SharedCacheMap->Flags, PIN_ACCESS)) || WriteOnly) {
|
|
|
|
//
|
|
// We can always zero middle pages, if any.
|
|
//
|
|
|
|
ZeroFlags = ZERO_MIDDLE_PAGES;
|
|
|
|
if (((FOffset.LowPart & (PAGE_SIZE - 1)) == 0) &&
|
|
(Length >= PAGE_SIZE)) {
|
|
ZeroFlags |= ZERO_FIRST_PAGE;
|
|
}
|
|
|
|
if ((TLength.LowPart & (PAGE_SIZE - 1)) == 0) {
|
|
ZeroFlags |= ZERO_LAST_PAGE;
|
|
}
|
|
}
|
|
|
|
//
|
|
// We treat Bcbs as ReadOnly (do not acquire resource) if they
|
|
// are in sections for which we have not disabled modified writing.
|
|
//
|
|
|
|
if (!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
|
|
ReadOnly = TRUE;
|
|
}
|
|
|
|
TLength.LowPart = (ULONG) ROUND_TO_PAGES( TLength.LowPart );
|
|
|
|
//
|
|
// Round BaseAddress and FOffset down to the bottom of a page.
|
|
//
|
|
|
|
*BaseAddress = ((PCHAR)*BaseAddress - (FileOffset->LowPart & (PAGE_SIZE - 1)));
|
|
FOffset.LowPart &= ~(PAGE_SIZE - 1);
|
|
|
|
//
|
|
// Even if we are readonly, we can still zero pages entirely
|
|
// beyond valid data length.
|
|
//
|
|
|
|
if (FOffset.QuadPart >= SharedCacheMap->ValidDataGoal.QuadPart) {
|
|
|
|
ZeroFlags |= ZERO_FIRST_PAGE | ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE;
|
|
|
|
} else if ((FOffset.QuadPart + (LONGLONG)PAGE_SIZE) >=
|
|
SharedCacheMap->ValidDataGoal.QuadPart) {
|
|
|
|
ZeroFlags |= ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE;
|
|
}
|
|
|
|
//
|
|
// We will get into trouble if we try to read more than we
|
|
// can map by one Vacb. So make sure that our lengths stay
|
|
// within a Vacb.
|
|
//
|
|
|
|
if (TLength.LowPart > ReceivedLength) {
|
|
TLength.LowPart = ReceivedLength;
|
|
}
|
|
|
|
|
|
//
|
|
// Case 1 - Bcb was not found and Wait is TRUE.
|
|
//
|
|
// Note that it is important to minimize the time that the Bcb
|
|
// List spin lock is held, as well as guarantee we do not take
|
|
// any faults while holding this lock.
|
|
//
|
|
// If we can (and perhaps will) wait, then it is important to
|
|
// allocate the Bcb acquire it exclusive and free the Bcb List.
|
|
// We then procede to read in the data, and anyone else finding
|
|
// our Bcb will have to wait shared to insure that the data is
|
|
// in.
|
|
//
|
|
|
|
if (FlagOn(Flags, PIN_WAIT)) {
|
|
|
|
BcbOut = CcAllocateInitializeBcb( SharedCacheMap,
|
|
BcbOut,
|
|
&FOffset,
|
|
&TLength );
|
|
|
|
if (BcbOut == NULL) {
|
|
DebugTrace( 0, 0, "Bcb allocation failure\n", 0 );
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
SpinLockAcquired = FALSE;
|
|
ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES );
|
|
}
|
|
|
|
//
|
|
// Now just acquire the newly-allocated Bcb shared, and
|
|
// release the spin lock.
|
|
//
|
|
|
|
if (!ReadOnly) {
|
|
if (FlagOn(Flags, PIN_EXCLUSIVE)) {
|
|
(VOID)ExAcquireResourceExclusiveLite( &BcbOut->Resource, TRUE );
|
|
} else {
|
|
(VOID)ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE );
|
|
}
|
|
}
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
SpinLockAcquired = FALSE;
|
|
|
|
//
|
|
// Now read in the data.
|
|
//
|
|
|
|
if (!FlagOn(Flags, PIN_NO_READ)) {
|
|
|
|
(VOID)CcMapAndRead( SharedCacheMap,
|
|
&FOffset,
|
|
TLength.LowPart,
|
|
ZeroFlags,
|
|
TRUE,
|
|
*BaseAddress );
|
|
|
|
//
|
|
// Now we have to reacquire the Bcb List spinlock to load
|
|
// up the mapping if we are the first one, else we collided
|
|
// with someone else who loaded the mapping first, and we
|
|
// will just free our mapping. It is guaranteed that the
|
|
// data will be mapped to the same place.
|
|
//
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
|
|
if (BcbOut->BaseAddress == NULL) {
|
|
|
|
BcbOut->BaseAddress = *BaseAddress;
|
|
BcbOut->Vacb = Vacb;
|
|
Vacb = NULL;
|
|
}
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
|
|
//
|
|
// Calculate Base Address of the data we want.
|
|
//
|
|
|
|
*BaseAddress = (PCHAR)BcbOut->BaseAddress +
|
|
(ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart );
|
|
}
|
|
|
|
//
|
|
// Success!
|
|
//
|
|
|
|
try_return( Result = TRUE );
|
|
}
|
|
|
|
|
|
//
|
|
// Case 2 - Bcb was not found and Wait is FALSE
|
|
//
|
|
// If we cannot wait, then we go immediately see if the data is
|
|
// there (CcMapAndRead), and then only set up the Bcb and release
|
|
// the spin lock if the data is there. Note here we call
|
|
// CcMapAndRead while holding the spin lock, because we know we
|
|
// will not fault and not block before returning.
|
|
//
|
|
|
|
else {
|
|
|
|
//
|
|
// Now try to allocate and initialize the Bcb. If we
|
|
// fail to allocate one, then return FALSE, since we know that
|
|
// Wait = FALSE. The caller may get lucky if he calls
|
|
// us back with Wait = TRUE.
|
|
//
|
|
|
|
BcbOut = CcAllocateInitializeBcb( SharedCacheMap,
|
|
BcbOut,
|
|
&FOffset,
|
|
&TLength );
|
|
|
|
if (BcbOut == NULL) {
|
|
|
|
try_return( Result = FALSE );
|
|
}
|
|
|
|
//
|
|
// If we are not ReadOnly, we must acquire the newly-allocated
|
|
// resource shared, and then we can free the spin lock.
|
|
//
|
|
|
|
if (!ReadOnly) {
|
|
ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE );
|
|
}
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
SpinLockAcquired = FALSE;
|
|
|
|
//
|
|
// Note that since this call has Wait = FALSE, it cannot
|
|
// get an exception (see procedure header).
|
|
//
|
|
|
|
ASSERT( !FlagOn(Flags, PIN_NO_READ) );
|
|
if (!CcMapAndRead( SharedCacheMap,
|
|
&FOffset,
|
|
TLength.LowPart,
|
|
ZeroFlags,
|
|
FALSE,
|
|
*BaseAddress )) {
|
|
|
|
try_return( Result = FALSE );
|
|
}
|
|
|
|
//
|
|
// Now we have to reacquire the Bcb List spinlock to load
|
|
// up the mapping if we are the first one, else we collided
|
|
// with someone else who loaded the mapping first, and we
|
|
// will just free our mapping. It is guaranteed that the
|
|
// data will be mapped to the same place.
|
|
//
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
|
|
if (BcbOut->BaseAddress == NULL) {
|
|
|
|
BcbOut->BaseAddress = *BaseAddress;
|
|
BcbOut->Vacb = Vacb;
|
|
Vacb = NULL;
|
|
}
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
|
|
//
|
|
// Calculate Base Address of the data we want.
|
|
//
|
|
|
|
*BaseAddress = (PCHAR)BcbOut->BaseAddress +
|
|
(ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart );
|
|
|
|
//
|
|
// Success!
|
|
//
|
|
|
|
try_return( Result = TRUE );
|
|
}
|
|
|
|
} else {
|
|
|
|
//
|
|
// We treat Bcbs as ReadOnly (do not acquire resource) if they
|
|
// are in sections for which we have not disabled modified writing.
|
|
//
|
|
|
|
if (!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
|
|
ReadOnly = TRUE;
|
|
}
|
|
}
|
|
|
|
|
|
//
|
|
// Cases 3 and 4 - Bcb is there but not mapped
|
|
//
|
|
|
|
if (BcbOut->BaseAddress == NULL) {
|
|
|
|
//
|
|
// It is too complicated to attempt to calculate any ZeroFlags in this
|
|
// case, because we have to not only do the tests above, but also
|
|
// compare to the byte range in the Bcb since we will be passing
|
|
// those parameters to CcMapAndRead. Also, the probability of hitting
|
|
// some window where zeroing is of any advantage is quite small.
|
|
//
|
|
|
|
//
|
|
// Set up to just reread the Bcb exactly as the data in it is
|
|
// described.
|
|
//
|
|
|
|
*BaseAddress = ((PCHAR)*BaseAddress - (FileOffset->LowPart - BcbOut->FileOffset.LowPart));
|
|
FOffset = BcbOut->FileOffset;
|
|
TLength.QuadPart = (LONGLONG)BcbOut->ByteLength;
|
|
|
|
//
|
|
// Case 3 - Bcb is there but not mapped and Wait is TRUE
|
|
//
|
|
// Increment the PinCount, and then release the BcbList
|
|
// SpinLock so that we can wait to acquire the Bcb exclusive.
|
|
// Once we have the Bcb exclusive, map and read it in if no
|
|
// one beats us to it. Someone may have beat us to it since
|
|
// we had to release the SpinLock above.
|
|
//
|
|
|
|
if (FlagOn(Flags, PIN_WAIT)) {
|
|
|
|
BcbOut->PinCount += 1;
|
|
|
|
//
|
|
// Now we have to release the BcbList SpinLock in order to
|
|
// acquire the Bcb shared.
|
|
//
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
SpinLockAcquired = FALSE;
|
|
if (!ReadOnly) {
|
|
if (FlagOn(Flags, PIN_EXCLUSIVE)) {
|
|
(VOID)ExAcquireResourceExclusiveLite( &BcbOut->Resource, TRUE );
|
|
} else {
|
|
(VOID)ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE );
|
|
}
|
|
}
|
|
|
|
//
|
|
// Now procede to map and read the data in.
|
|
//
|
|
// Now read in the data.
|
|
//
|
|
|
|
if (!FlagOn(Flags, PIN_NO_READ)) {
|
|
|
|
(VOID)CcMapAndRead( SharedCacheMap,
|
|
&FOffset,
|
|
TLength.LowPart,
|
|
ZeroFlags,
|
|
TRUE,
|
|
*BaseAddress );
|
|
|
|
//
|
|
// Now we have to reacquire the Bcb List spinlock to load
|
|
// up the mapping if we are the first one, else we collided
|
|
// with someone else who loaded the mapping first, and we
|
|
// will just free our mapping. It is guaranteed that the
|
|
// data will be mapped to the same place.
|
|
//
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
|
|
if (BcbOut->BaseAddress == NULL) {
|
|
|
|
BcbOut->BaseAddress = *BaseAddress;
|
|
BcbOut->Vacb = Vacb;
|
|
Vacb = NULL;
|
|
}
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
|
|
//
|
|
//
|
|
// Calculate Base Address of the data we want.
|
|
//
|
|
|
|
*BaseAddress = (PCHAR)BcbOut->BaseAddress +
|
|
(ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart );
|
|
}
|
|
|
|
//
|
|
// Success!
|
|
//
|
|
|
|
try_return( Result = TRUE );
|
|
}
|
|
|
|
|
|
//
|
|
// Case 4 - Bcb is there but not mapped, and Wait is FALSE
|
|
//
|
|
// Since we cannot wait, we go immediately see if the data is
|
|
// there (CcMapAndRead), and then only set up the Bcb and release
|
|
// the spin lock if the data is there. Note here we call
|
|
// CcMapAndRead while holding the spin lock, because we know we
|
|
// will not fault and not block before returning.
|
|
//
|
|
|
|
else {
|
|
|
|
if (!ReadOnly && !ExAcquireSharedStarveExclusive( &BcbOut->Resource, FALSE )) {
|
|
|
|
//
|
|
// If we cannot get the resource and have not incremented PinCount, then
|
|
// suppress the unpin on cleanup.
|
|
//
|
|
|
|
BcbOut = NULL;
|
|
try_return( Result = FALSE );
|
|
}
|
|
|
|
BcbOut->PinCount += 1;
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
SpinLockAcquired = FALSE;
|
|
|
|
//
|
|
// Note that since this call has Wait = FALSE, it cannot
|
|
// get an exception (see procedure header).
|
|
//
|
|
|
|
ASSERT( !FlagOn(Flags, PIN_NO_READ) );
|
|
if (!CcMapAndRead( SharedCacheMap,
|
|
&BcbOut->FileOffset,
|
|
BcbOut->ByteLength,
|
|
ZeroFlags,
|
|
FALSE,
|
|
*BaseAddress )) {
|
|
|
|
try_return( Result = FALSE );
|
|
}
|
|
|
|
//
|
|
// Now we have to reacquire the Bcb List spinlock to load
|
|
// up the mapping if we are the first one, else we collided
|
|
// with someone else who loaded the mapping first, and we
|
|
// will just free our mapping. It is guaranteed that the
|
|
// data will be mapped to the same place.
|
|
//
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
|
|
if (BcbOut->BaseAddress == NULL) {
|
|
|
|
BcbOut->BaseAddress = *BaseAddress;
|
|
BcbOut->Vacb = Vacb;
|
|
Vacb = NULL;
|
|
}
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
|
|
//
|
|
// Calculate Base Address of the data we want.
|
|
//
|
|
|
|
*BaseAddress = (PCHAR)BcbOut->BaseAddress +
|
|
(ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart );
|
|
|
|
//
|
|
// Success!
|
|
//
|
|
|
|
try_return( Result = TRUE );
|
|
}
|
|
}
|
|
|
|
|
|
//
|
|
// Cases 5 and 6 - Bcb is there and it is mapped
|
|
//
|
|
|
|
else {
|
|
|
|
//
|
|
// Case 5 - Bcb is there and mapped, and Wait is TRUE
|
|
//
|
|
// We can just increment the PinCount, release the SpinLock
|
|
// and then acquire the Bcb Shared if we are not ReadOnly.
|
|
//
|
|
|
|
if (FlagOn(Flags, PIN_WAIT)) {
|
|
|
|
BcbOut->PinCount += 1;
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
SpinLockAcquired = FALSE;
|
|
|
|
//
|
|
// Acquire Bcb Resource shared to insure that it is in memory.
|
|
//
|
|
|
|
if (!ReadOnly) {
|
|
if (FlagOn(Flags, PIN_EXCLUSIVE)) {
|
|
(VOID)ExAcquireResourceExclusiveLite( &BcbOut->Resource, TRUE );
|
|
} else {
|
|
(VOID)ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE );
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Case 6 - Bcb is there and mapped, and Wait is FALSE
|
|
//
|
|
// If we are not ReadOnly, we have to first see if we can
|
|
// acquire the Bcb shared before incrmenting the PinCount,
|
|
// since we will have to return FALSE if we cannot acquire the
|
|
// resource.
|
|
//
|
|
|
|
else {
|
|
|
|
//
|
|
// Acquire Bcb Resource shared to insure that it is in memory.
|
|
//
|
|
|
|
if (!ReadOnly && !ExAcquireSharedStarveExclusive( &BcbOut->Resource, FALSE )) {
|
|
|
|
//
|
|
// If we cannot get the resource and have not incremented PinCount, then
|
|
// suppress the unpin on cleanup.
|
|
//
|
|
|
|
BcbOut = NULL;
|
|
try_return( Result = FALSE );
|
|
}
|
|
|
|
BcbOut->PinCount += 1;
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
SpinLockAcquired = FALSE;
|
|
}
|
|
|
|
//
|
|
// Calculate Base Address of the data we want.
|
|
//
|
|
|
|
*BaseAddress = (PCHAR)BcbOut->BaseAddress +
|
|
(ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart );
|
|
|
|
//
|
|
// Success!
|
|
//
|
|
|
|
try_return( Result = TRUE );
|
|
}
|
|
|
|
|
|
try_exit: NOTHING;
|
|
|
|
if (FlagOn(Flags, PIN_NO_READ) &&
|
|
FlagOn(Flags, PIN_EXCLUSIVE) &&
|
|
(BcbOut != NULL) &&
|
|
(BcbOut->BaseAddress != NULL)) {
|
|
|
|
//
|
|
// Unmap the Vacb and free the resource if the Bcb is still
|
|
// dirty. We have to free the resource before dropping the
|
|
// spinlock, and we want to hold the resource until the
|
|
// virtual address is freed.
|
|
//
|
|
|
|
CcFreeVirtualAddress( BcbOut->Vacb );
|
|
|
|
BcbOut->BaseAddress = NULL;
|
|
BcbOut->Vacb = NULL;
|
|
}
|
|
|
|
} finally {
|
|
|
|
//
|
|
// Release the spinlock if it is acquired.
|
|
//
|
|
|
|
if (SpinLockAcquired) {
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
}
|
|
|
|
//
|
|
// If the Vacb was not used for any reason (error or not needed), then free it here.
|
|
//
|
|
|
|
if (Vacb != NULL) {
|
|
CcFreeVirtualAddress( Vacb );
|
|
}
|
|
|
|
//
|
|
// If we referenced a piece of a multilevel structure, release here.
|
|
//
|
|
|
|
if (FlagOn(Flags, PIN_NO_READ)) {
|
|
|
|
CcDereferenceFileOffset( SharedCacheMap, *FileOffset );
|
|
}
|
|
|
|
if (Result) {
|
|
|
|
*Bcb = BcbOut;
|
|
*BeyondLastByte = BcbOut->BeyondLastByte;
|
|
|
|
//
|
|
// An abnormal termination can occur on an allocation failure,
|
|
// or on a failure to map and read the buffer.
|
|
//
|
|
|
|
} else {
|
|
|
|
*BaseAddress = NULL;
|
|
if (BcbOut != NULL) {
|
|
CcUnpinFileData( BcbOut, ReadOnly, UNPIN );
|
|
}
|
|
}
|
|
|
|
DebugTrace( 0, me, " <Bcb = %08lx\n", *Bcb );
|
|
DebugTrace( 0, me, " <BaseAddress = %08lx\n", *BaseAddress );
|
|
DebugTrace(-1, me, "CcPinFileData -> %02lx\n", Result );
|
|
}
|
|
|
|
return Result;
|
|
}
|
|
|
|
|
|
//
|
|
// Internal Support Routine
|
|
//
|
|
|
|
VOID
|
|
FASTCALL
|
|
CcUnpinFileData (
|
|
IN OUT PBCB Bcb,
|
|
IN BOOLEAN ReadOnly,
|
|
IN UNMAP_ACTIONS UnmapAction
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine umaps and unlocks the specified buffer, which was previously
|
|
locked and mapped by calling CcPinFileData.
|
|
|
|
Arguments:
|
|
|
|
Bcb - Pointer previously returned from CcPinFileData. As may be
|
|
seen above, this pointer may be either a Bcb or a Vacb.
|
|
|
|
ReadOnly - must specify same value as when data was mapped
|
|
|
|
UnmapAction - UNPIN or SET_CLEAN
|
|
|
|
Return Value:
|
|
|
|
None
|
|
|
|
--*/
|
|
|
|
{
|
|
KLOCK_QUEUE_HANDLE LockHandle;
|
|
PSHARED_CACHE_MAP SharedCacheMap;
|
|
|
|
DebugTrace(+1, me, "CcUnpinFileData >Bcb = %08lx\n", Bcb );
|
|
|
|
//
|
|
// Note, since we have to allocate so many Vacbs, we do not use
|
|
// a node type code. However, the Vacb starts with a BaseAddress,
|
|
// so we assume that the low byte of the Bcb node type code has
|
|
// some bits set, which a page-aligned Base Address cannot.
|
|
//
|
|
|
|
ASSERT( (CACHE_NTC_BCB & 0xFF) != 0 );
|
|
|
|
if (Bcb->NodeTypeCode != CACHE_NTC_BCB) {
|
|
|
|
ASSERT(((PVACB)Bcb >= CcVacbs) && ((PVACB)Bcb < CcBeyondVacbs));
|
|
ASSERT(((PVACB)Bcb)->SharedCacheMap->NodeTypeCode == CACHE_NTC_SHARED_CACHE_MAP);
|
|
|
|
CcFreeVirtualAddress( (PVACB)Bcb );
|
|
|
|
DebugTrace(-1, me, "CcUnpinFileData -> VOID (simple release)\n", 0 );
|
|
|
|
return;
|
|
}
|
|
|
|
SharedCacheMap = Bcb->SharedCacheMap;
|
|
|
|
//
|
|
// We treat Bcbs as ReadOnly (do not acquire resource) if they
|
|
// are in sections for which we have not disabled modified writing, or
|
|
// in this special case if this action is a dereferencing of the BCB.
|
|
//
|
|
|
|
if (!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED) ||
|
|
UnmapAction == UNREF) {
|
|
ReadOnly = TRUE;
|
|
}
|
|
|
|
//
|
|
// Synchronize
|
|
//
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
|
|
switch (UnmapAction) {
|
|
|
|
case UNPIN:
|
|
case UNREF:
|
|
|
|
ASSERT( Bcb->PinCount > 0 );
|
|
|
|
Bcb->PinCount -= 1;
|
|
break;
|
|
|
|
case SET_CLEAN:
|
|
|
|
if (Bcb->Dirty) {
|
|
|
|
ULONG Pages = Bcb->ByteLength >> PAGE_SHIFT;
|
|
|
|
//
|
|
// Reverse the rest of the actions taken when the Bcb was set dirty.
|
|
//
|
|
|
|
Bcb->Dirty = FALSE;
|
|
|
|
CcAcquireMasterLockAtDpcLevel();
|
|
CcDeductDirtyPages( SharedCacheMap, Pages );
|
|
|
|
//
|
|
// Normally we need to reduce CcPagesYetToWrite appropriately.
|
|
//
|
|
|
|
if (CcPagesYetToWrite > Pages) {
|
|
CcPagesYetToWrite -= Pages;
|
|
} else {
|
|
CcPagesYetToWrite = 0;
|
|
}
|
|
|
|
//
|
|
// Remove SharedCacheMap from dirty list if nothing more dirty,
|
|
// and someone still has the cache map opened.
|
|
//
|
|
|
|
if ((SharedCacheMap->DirtyPages == 0) &&
|
|
(SharedCacheMap->OpenCount != 0)) {
|
|
|
|
RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
|
|
InsertTailList( &CcCleanSharedCacheMapList,
|
|
&SharedCacheMap->SharedCacheMapLinks );
|
|
}
|
|
|
|
CcReleaseMasterLockFromDpcLevel();
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
CcBugCheck( UnmapAction, 0, 0 );
|
|
}
|
|
|
|
//
|
|
// If we brought it to 0, then we have to kill it.
|
|
//
|
|
|
|
if (Bcb->PinCount == 0) {
|
|
|
|
//
|
|
// If the Bcb is Dirty, we only release the resource and unmap now.
|
|
//
|
|
|
|
if (Bcb->Dirty) {
|
|
|
|
if (Bcb->BaseAddress != NULL) {
|
|
|
|
//
|
|
// Unmap the Vacb and free the resource if the Bcb is still
|
|
// dirty. We have to free the resource before dropping the
|
|
// spinlock, and we want to hold the resource until the
|
|
// virtual address is freed.
|
|
//
|
|
|
|
CcFreeVirtualAddress( Bcb->Vacb );
|
|
|
|
Bcb->BaseAddress = NULL;
|
|
Bcb->Vacb = NULL;
|
|
}
|
|
|
|
if (!ReadOnly) {
|
|
ExReleaseResourceLite( &Bcb->Resource );
|
|
}
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
}
|
|
|
|
//
|
|
// Otherwise, we also delete the Bcb.
|
|
//
|
|
|
|
else {
|
|
|
|
//
|
|
// Since CcCalculateVacbLockCount has to be able to walk
|
|
// the BcbList with only the VacbSpinLock, we take that one
|
|
// out to change the list and decrement the level.
|
|
//
|
|
|
|
CcAcquireVacbLockAtDpcLevel();
|
|
RemoveEntryList( &Bcb->BcbLinks );
|
|
|
|
//
|
|
// For large metadata streams we unlock the Vacb level.
|
|
//
|
|
|
|
CcUnlockVacbLevel( SharedCacheMap, Bcb->FileOffset.QuadPart );
|
|
CcReleaseVacbLockFromDpcLevel();
|
|
|
|
//
|
|
// Debug routines used to remove Bcbs from the global list
|
|
//
|
|
|
|
#if LIST_DBG
|
|
|
|
KeAcquireQueuedSpinLockAtDpcLevel( KeQueuedSpinLockContext(LockQueueBcbLock) );
|
|
|
|
if (Bcb->CcBcbLinks.Flink != NULL) {
|
|
|
|
RemoveEntryList( &Bcb->CcBcbLinks );
|
|
CcBcbCount -= 1;
|
|
}
|
|
|
|
KeReleaseQueuedSpinLockFromDpcLevel( KeQueuedSpinLockContext(LockQueueBcbLock) );
|
|
|
|
#endif
|
|
|
|
if (Bcb->BaseAddress != NULL) {
|
|
|
|
CcFreeVirtualAddress( Bcb->Vacb );
|
|
}
|
|
#if DBG
|
|
if (!ReadOnly) {
|
|
ExReleaseResourceLite( &Bcb->Resource );
|
|
}
|
|
|
|
//
|
|
// ASSERT that the resource is unowned.
|
|
//
|
|
|
|
ASSERT( Bcb->Resource.ActiveCount == 0 );
|
|
#endif
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
CcDeallocateBcb( Bcb );
|
|
}
|
|
}
|
|
|
|
//
|
|
// Else we just have to release our Shared access, if we are not
|
|
// readonly. We don't need to do this above, since we deallocate
|
|
// the entire Bcb there.
|
|
//
|
|
|
|
else {
|
|
|
|
if (!ReadOnly) {
|
|
ExReleaseResourceLite( &Bcb->Resource );
|
|
}
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
}
|
|
|
|
DebugTrace(-1, me, "CcUnpinFileData -> VOID\n", 0 );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
VOID
|
|
CcSetReadAheadGranularity (
|
|
IN PFILE_OBJECT FileObject,
|
|
IN ULONG Granularity
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine may be called to set the read ahead granularity used by
|
|
the Cache Manager. The default is PAGE_SIZE. The number is decremented
|
|
and stored as a mask.
|
|
|
|
Arguments:
|
|
|
|
FileObject - File Object for which granularity shall be set
|
|
|
|
Granularity - new granularity, which must be an even power of 2 and
|
|
>= PAGE_SIZE
|
|
|
|
Return Value:
|
|
|
|
None
|
|
--*/
|
|
|
|
{
|
|
((PPRIVATE_CACHE_MAP)FileObject->PrivateCacheMap)->ReadAheadMask = Granularity - 1;
|
|
}
|
|
|
|
|
|
VOID
|
|
CcScheduleReadAhead (
|
|
IN PFILE_OBJECT FileObject,
|
|
IN PLARGE_INTEGER FileOffset,
|
|
IN ULONG Length
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is called by Copy Read and Mdl Read file system routines to
|
|
perform common Read Ahead processing. The input parameters describe
|
|
the current read which has just been completed, or perhaps only started
|
|
in the case of Mdl Reads. Based on these parameters, an
|
|
assessment is made on how much data should be read ahead, and whether
|
|
that data has already been read ahead.
|
|
|
|
The processing is divided into two parts:
|
|
|
|
CALCULATE READ AHEAD REQUIREMENTS (CcScheduleReadAhead)
|
|
|
|
PERFORM READ AHEAD (CcPerformReadAhead)
|
|
|
|
File systems should always call CcReadAhead, which will conditionally
|
|
call CcScheduleReadAhead (if the read is large enough). If such a call
|
|
determines that there is read ahead work to do, and no read ahead is
|
|
currently active, then it will set ReadAheadActive and schedule read
|
|
ahead to be peformed by the Lazy Writer, who will call CcPeformReadAhead.
|
|
|
|
Arguments:
|
|
|
|
FileObject - supplies pointer to FileObject on which readahead should be
|
|
considered.
|
|
|
|
FileOffset - supplies the FileOffset at which the last read just occurred.
|
|
|
|
Length - supplies the length of the last read.
|
|
|
|
Return Value:
|
|
|
|
None
|
|
--*/
|
|
|
|
{
|
|
LARGE_INTEGER NewOffset;
|
|
LARGE_INTEGER NewBeyond;
|
|
LARGE_INTEGER FileOffset1, FileOffset2;
|
|
KIRQL OldIrql;
|
|
PSHARED_CACHE_MAP SharedCacheMap;
|
|
PPRIVATE_CACHE_MAP PrivateCacheMap;
|
|
PWORK_QUEUE_ENTRY WorkQueueEntry;
|
|
ULONG ReadAheadSize;
|
|
LOGICAL Changed = FALSE;
|
|
|
|
DebugTrace(+1, me, "CcScheduleReadAhead:\n", 0 );
|
|
DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
|
|
FileOffset->HighPart );
|
|
DebugTrace( 0, me, " Length = %08lx\n", Length );
|
|
|
|
SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer
|
|
+ sizeof(PVOID));
|
|
PrivateCacheMap = FileObject->PrivateCacheMap;
|
|
|
|
if ((PrivateCacheMap == NULL) ||
|
|
(SharedCacheMap == NULL) ||
|
|
FlagOn(SharedCacheMap->Flags, DISABLE_READ_AHEAD)) {
|
|
|
|
DebugTrace(-1, me, "CcScheduleReadAhead -> VOID (Nooped)\n", 0 );
|
|
|
|
return;
|
|
}
|
|
|
|
//
|
|
// Round boundaries of transfer up to some greater granularity, so that
|
|
// sequential reads will be recognized even if a few bytes are skipped
|
|
// between records.
|
|
//
|
|
|
|
NewOffset = *FileOffset;
|
|
NewBeyond.QuadPart = FileOffset->QuadPart + (LONGLONG)Length;
|
|
|
|
//
|
|
// Find the next read ahead boundary beyond the current read.
|
|
//
|
|
|
|
ReadAheadSize = (Length + PrivateCacheMap->ReadAheadMask) & ~PrivateCacheMap->ReadAheadMask;
|
|
FileOffset2.QuadPart = NewBeyond.QuadPart + (LONGLONG)ReadAheadSize;
|
|
FileOffset2.LowPart &= ~PrivateCacheMap->ReadAheadMask;
|
|
|
|
//
|
|
// CALCULATE READ AHEAD REQUIREMENTS
|
|
//
|
|
|
|
//
|
|
// Take out the ReadAhead spinlock to synchronize our read ahead decision.
|
|
//
|
|
|
|
ExAcquireSpinLock( &PrivateCacheMap->ReadAheadSpinLock, &OldIrql );
|
|
|
|
//
|
|
// Read Ahead Case 0.
|
|
//
|
|
// Sequential-only hint in the file object. For this case we will
|
|
// try and always keep two read ahead granularities read ahead from
|
|
// and including the end of the current transfer. This case has the
|
|
// lowest overhead, and the code is completely immune to how the
|
|
// caller skips around. Sequential files use ReadAheadOffset[1] in
|
|
// the PrivateCacheMap as their "high water mark".
|
|
//
|
|
|
|
if (FlagOn(FileObject->Flags, FO_SEQUENTIAL_ONLY)) {
|
|
|
|
//
|
|
// If the next boundary is greater than or equal to the high-water mark,
|
|
// then read ahead.
|
|
//
|
|
|
|
if (FileOffset2.QuadPart >= PrivateCacheMap->ReadAheadOffset[1].QuadPart) {
|
|
|
|
//
|
|
// On the first read if we are using a large read ahead granularity,
|
|
// and the read did not get it all, we will just get the rest of the
|
|
// first data we want.
|
|
//
|
|
|
|
if ((FileOffset->QuadPart == 0)
|
|
|
|
&&
|
|
|
|
(PrivateCacheMap->ReadAheadMask > (PAGE_SIZE - 1))
|
|
|
|
&&
|
|
|
|
((Length + PAGE_SIZE - 1) <= PrivateCacheMap->ReadAheadMask)) {
|
|
|
|
FileOffset1.QuadPart = (LONGLONG)( ROUND_TO_PAGES(Length) );
|
|
PrivateCacheMap->ReadAheadLength[0] = ReadAheadSize - FileOffset1.LowPart;
|
|
FileOffset2.QuadPart = (LONGLONG)ReadAheadSize;
|
|
|
|
//
|
|
// Calculate the next read ahead boundary.
|
|
//
|
|
|
|
} else {
|
|
|
|
FileOffset1.QuadPart = PrivateCacheMap->ReadAheadOffset[1].QuadPart +
|
|
(LONGLONG)ReadAheadSize;
|
|
|
|
//
|
|
// If the end of the current read is actually beyond where we would
|
|
// normally do our read ahead, then we have fallen behind, and we must
|
|
// advance to that spot.
|
|
//
|
|
|
|
if (FileOffset2.QuadPart > FileOffset1.QuadPart) {
|
|
FileOffset1 = FileOffset2;
|
|
}
|
|
PrivateCacheMap->ReadAheadLength[0] = ReadAheadSize;
|
|
FileOffset2.QuadPart = FileOffset1.QuadPart + (LONGLONG)ReadAheadSize;
|
|
}
|
|
|
|
//
|
|
// Now issue the next two read aheads.
|
|
//
|
|
|
|
PrivateCacheMap->ReadAheadOffset[0] = FileOffset1;
|
|
|
|
PrivateCacheMap->ReadAheadOffset[1] = FileOffset2;
|
|
PrivateCacheMap->ReadAheadLength[1] = ReadAheadSize;
|
|
|
|
Changed = TRUE;
|
|
}
|
|
|
|
//
|
|
// Read Ahead Case 1.
|
|
//
|
|
// If this is the third of three sequential reads, then we will see if
|
|
// we can read ahead. Note that if the first read to a file is to
|
|
// offset 0, it passes this test.
|
|
//
|
|
|
|
} else if ((NewOffset.HighPart == PrivateCacheMap->BeyondLastByte2.HighPart)
|
|
|
|
&&
|
|
|
|
((NewOffset.LowPart & ~NOISE_BITS)
|
|
== (PrivateCacheMap->BeyondLastByte2.LowPart & ~NOISE_BITS))
|
|
|
|
&&
|
|
|
|
(PrivateCacheMap->FileOffset2.HighPart
|
|
== PrivateCacheMap->BeyondLastByte1.HighPart)
|
|
|
|
&&
|
|
|
|
((PrivateCacheMap->FileOffset2.LowPart & ~NOISE_BITS)
|
|
== (PrivateCacheMap->BeyondLastByte1.LowPart & ~NOISE_BITS))) {
|
|
|
|
//
|
|
// On the first read if we are using a large read ahead granularity,
|
|
// and the read did not get it all, we will just get the rest of the
|
|
// first data we want.
|
|
//
|
|
|
|
if ((FileOffset->QuadPart == 0)
|
|
|
|
&&
|
|
|
|
(PrivateCacheMap->ReadAheadMask > (PAGE_SIZE - 1))
|
|
|
|
&&
|
|
|
|
((Length + PAGE_SIZE - 1) <= PrivateCacheMap->ReadAheadMask)) {
|
|
|
|
FileOffset2.QuadPart = (LONGLONG)( ROUND_TO_PAGES(Length) );
|
|
}
|
|
|
|
//
|
|
// Round read offset to next read ahead boundary.
|
|
//
|
|
|
|
else {
|
|
FileOffset2.QuadPart = NewBeyond.QuadPart + (LONGLONG)ReadAheadSize;
|
|
|
|
FileOffset2.LowPart &= ~PrivateCacheMap->ReadAheadMask;
|
|
}
|
|
|
|
//
|
|
// Set read ahead length to be the same as for the most recent read,
|
|
// up to our max.
|
|
//
|
|
|
|
if (FileOffset2.QuadPart != PrivateCacheMap->ReadAheadOffset[1].QuadPart) {
|
|
|
|
ASSERT( FileOffset2.HighPart >= 0 );
|
|
|
|
Changed = TRUE;
|
|
PrivateCacheMap->ReadAheadOffset[1] = FileOffset2;
|
|
PrivateCacheMap->ReadAheadLength[1] = ReadAheadSize;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Read Ahead Case 2.
|
|
//
|
|
// If this is the third read following a particular stride, then we
|
|
// will see if we can read ahead. One example of an application that
|
|
// might do this is a spreadsheet. Note that this code even works
|
|
// for negative strides.
|
|
//
|
|
|
|
else if ( ( NewOffset.QuadPart -
|
|
PrivateCacheMap->FileOffset2.QuadPart ) ==
|
|
( PrivateCacheMap->FileOffset2.QuadPart -
|
|
PrivateCacheMap->FileOffset1.QuadPart )) {
|
|
|
|
//
|
|
// According to the current stride, the next offset will be:
|
|
//
|
|
// NewOffset + (NewOffset - FileOffset2)
|
|
//
|
|
// which is the same as:
|
|
//
|
|
// (NewOffset * 2) - FileOffset2
|
|
//
|
|
|
|
FileOffset2.QuadPart = ( NewOffset.QuadPart << 1 ) - PrivateCacheMap->FileOffset2.QuadPart;
|
|
|
|
//
|
|
// If our stride is going backwards through the file, we
|
|
// have to detect the case where the next step would wrap.
|
|
//
|
|
|
|
if (FileOffset2.HighPart >= 0) {
|
|
|
|
//
|
|
// The read ahead length must be extended by the same amount that
|
|
// we will round the PrivateCacheMap->ReadAheadOffset down.
|
|
//
|
|
|
|
Length += FileOffset2.LowPart & (PAGE_SIZE - 1);
|
|
|
|
//
|
|
// Now round the PrivateCacheMap->ReadAheadOffset down.
|
|
//
|
|
|
|
FileOffset2.LowPart &= ~(PAGE_SIZE - 1);
|
|
PrivateCacheMap->ReadAheadOffset[1] = FileOffset2;
|
|
|
|
//
|
|
// Round to page boundary.
|
|
//
|
|
|
|
PrivateCacheMap->ReadAheadLength[1] = (ULONG) ROUND_TO_PAGES(Length);
|
|
Changed = TRUE;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Get out if the ReadAhead requirements did not change.
|
|
//
|
|
|
|
if (!Changed || PrivateCacheMap->Flags.ReadAheadActive) {
|
|
|
|
DebugTrace( 0, me, "Read ahead already in progress or no change\n", 0 );
|
|
|
|
ExReleaseSpinLock( &PrivateCacheMap->ReadAheadSpinLock, OldIrql );
|
|
return;
|
|
}
|
|
|
|
//
|
|
// Otherwise, we will proceed and try to schedule the read ahead
|
|
// ourselves.
|
|
//
|
|
|
|
CC_SET_PRIVATE_CACHE_MAP (PrivateCacheMap, PRIVATE_CACHE_MAP_READ_AHEAD_ACTIVE);
|
|
|
|
//
|
|
// Release spin lock on way out
|
|
//
|
|
|
|
ExReleaseSpinLock( &PrivateCacheMap->ReadAheadSpinLock, OldIrql );
|
|
|
|
//
|
|
// Queue the read ahead request to the Lazy Writer's work queue.
|
|
//
|
|
|
|
DebugTrace( 0, me, "Queueing read ahead to worker thread\n", 0 );
|
|
|
|
WorkQueueEntry = CcAllocateWorkQueueEntry();
|
|
|
|
//
|
|
// If we failed to allocate a work queue entry, then, we will
|
|
// quietly bag it. Read ahead is only an optimization, and
|
|
// no one ever requires that it occur.
|
|
//
|
|
|
|
if (WorkQueueEntry != NULL) {
|
|
|
|
//
|
|
// We must reference this file object so that it cannot go away
|
|
// until we finish Read Ahead processing in the Worker Thread.
|
|
//
|
|
|
|
ObReferenceObject ( FileObject );
|
|
|
|
//
|
|
// Increment open count to make sure the SharedCacheMap stays around.
|
|
//
|
|
|
|
CcAcquireMasterLock( &OldIrql );
|
|
CcIncrementOpenCount( SharedCacheMap, 'adRQ' );
|
|
CcReleaseMasterLock( OldIrql );
|
|
|
|
WorkQueueEntry->Function = (UCHAR)ReadAhead;
|
|
WorkQueueEntry->Parameters.Read.FileObject = FileObject;
|
|
|
|
CcPostWorkQueue( WorkQueueEntry, &CcExpressWorkQueue );
|
|
}
|
|
|
|
//
|
|
// If we failed to allocate a Work Queue Entry, or all of the pages
|
|
// are resident we must set the active flag false.
|
|
//
|
|
|
|
else {
|
|
|
|
ExAcquireFastLock( &PrivateCacheMap->ReadAheadSpinLock, &OldIrql );
|
|
CC_CLEAR_PRIVATE_CACHE_MAP (PrivateCacheMap, PRIVATE_CACHE_MAP_READ_AHEAD_ACTIVE);
|
|
ExReleaseFastLock( &PrivateCacheMap->ReadAheadSpinLock, OldIrql );
|
|
}
|
|
|
|
DebugTrace(-1, me, "CcScheduleReadAhead -> VOID\n", 0 );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
VOID
|
|
FASTCALL
|
|
CcPerformReadAhead (
|
|
IN PFILE_OBJECT FileObject
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is called by the Lazy Writer to perform read ahead which
|
|
has been scheduled for this file by CcScheduleReadAhead.
|
|
|
|
Arguments:
|
|
|
|
FileObject - supplies pointer to FileObject on which readahead should be
|
|
considered.
|
|
|
|
Return Value:
|
|
|
|
None
|
|
--*/
|
|
|
|
{
|
|
KIRQL OldIrql;
|
|
PSHARED_CACHE_MAP SharedCacheMap;
|
|
PPRIVATE_CACHE_MAP PrivateCacheMap;
|
|
ULONG i;
|
|
LARGE_INTEGER ReadAheadOffset[2];
|
|
ULONG ReadAheadLength[2];
|
|
PCACHE_MANAGER_CALLBACKS Callbacks;
|
|
PVOID Context;
|
|
ULONG SavedState;
|
|
LOGICAL Done;
|
|
LOGICAL HitEof = FALSE;
|
|
LOGICAL ReadAheadPerformed = FALSE;
|
|
ULONG FaultOccurred = 0;
|
|
PETHREAD Thread = PsGetCurrentThread();
|
|
PVACB Vacb = NULL;
|
|
|
|
LOGICAL ResourceHeld = FALSE;
|
|
|
|
DebugTrace(+1, me, "CcPerformReadAhead:\n", 0 );
|
|
DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
|
|
|
|
MmSavePageFaultReadAhead( Thread, &SavedState );
|
|
|
|
try {
|
|
|
|
//
|
|
// Since we have the open count biased, we can safely access the
|
|
// SharedCacheMap.
|
|
//
|
|
|
|
SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
|
|
|
|
Callbacks = SharedCacheMap->Callbacks;
|
|
Context = SharedCacheMap->LazyWriteContext;
|
|
|
|
//
|
|
// After the first time, keep looping as long as there are new
|
|
// read ahead requirements. (We will skip out below.)
|
|
//
|
|
|
|
while (TRUE) {
|
|
|
|
//
|
|
// Get SharedCacheMap and PrivateCacheMap. If either are now NULL, get
|
|
// out.
|
|
//
|
|
|
|
CcAcquireMasterLock( &OldIrql );
|
|
|
|
PrivateCacheMap = FileObject->PrivateCacheMap;
|
|
|
|
//
|
|
// Now capture the information that we need, so that we can drop the
|
|
// SharedList Resource. This information is advisory only anyway, and
|
|
// the caller must guarantee that the FileObject is referenced.
|
|
//
|
|
|
|
if (PrivateCacheMap != NULL) {
|
|
|
|
ExAcquireSpinLockAtDpcLevel( &PrivateCacheMap->ReadAheadSpinLock );
|
|
|
|
//
|
|
// We are done when the lengths are 0
|
|
//
|
|
|
|
Done = ((PrivateCacheMap->ReadAheadLength[0] |
|
|
PrivateCacheMap->ReadAheadLength[1]) == 0);
|
|
|
|
ReadAheadOffset[0] = PrivateCacheMap->ReadAheadOffset[0];
|
|
ReadAheadOffset[1] = PrivateCacheMap->ReadAheadOffset[1];
|
|
ReadAheadLength[0] = PrivateCacheMap->ReadAheadLength[0];
|
|
ReadAheadLength[1] = PrivateCacheMap->ReadAheadLength[1];
|
|
PrivateCacheMap->ReadAheadLength[0] = 0;
|
|
PrivateCacheMap->ReadAheadLength[1] = 0;
|
|
|
|
ExReleaseSpinLockFromDpcLevel( &PrivateCacheMap->ReadAheadSpinLock );
|
|
}
|
|
|
|
CcReleaseMasterLock( OldIrql );
|
|
|
|
//
|
|
// Acquire the file shared.
|
|
//
|
|
|
|
ResourceHeld = (*Callbacks->AcquireForReadAhead)( Context, TRUE );
|
|
|
|
if ((PrivateCacheMap == NULL) || Done || !ResourceHeld) {
|
|
|
|
try_return( NOTHING );
|
|
}
|
|
|
|
//
|
|
// PERFORM READ AHEAD
|
|
//
|
|
//
|
|
// Now loop until everything is read in. The Read ahead is accomplished
|
|
// by touching the pages with an appropriate ReadAhead parameter in MM.
|
|
//
|
|
|
|
i = 0;
|
|
|
|
do {
|
|
|
|
LARGE_INTEGER Offset, SavedOffset;
|
|
ULONG Length, SavedLength;
|
|
|
|
Offset = ReadAheadOffset[i];
|
|
Length = ReadAheadLength[i];
|
|
SavedOffset = Offset;
|
|
SavedLength = Length;
|
|
|
|
if ((Length != 0)
|
|
|
|
&&
|
|
|
|
( Offset.QuadPart <= SharedCacheMap->FileSize.QuadPart )) {
|
|
|
|
ReadAheadPerformed = TRUE;
|
|
|
|
//
|
|
// Keep length within file and MAX_READ_AHEAD
|
|
//
|
|
|
|
if ( ( Offset.QuadPart + (LONGLONG)Length ) >= SharedCacheMap->FileSize.QuadPart ) {
|
|
|
|
Length = (ULONG)( SharedCacheMap->FileSize.QuadPart - Offset.QuadPart );
|
|
HitEof = TRUE;
|
|
|
|
}
|
|
if (Length > MAX_READ_AHEAD) {
|
|
Length = MAX_READ_AHEAD;
|
|
}
|
|
|
|
//
|
|
// Now loop to read all of the desired data in. This loop
|
|
// is more or less like the same loop to read data in
|
|
// CcCopyRead, except that we do not copy anything, just
|
|
// unmap as soon as it is in.
|
|
//
|
|
|
|
while (Length != 0) {
|
|
|
|
ULONG ReceivedLength;
|
|
PVOID CacheBuffer;
|
|
ULONG PagesToGo;
|
|
|
|
//
|
|
// Call local routine to Map or Access the file data.
|
|
// If we cannot map the data because of a Wait condition,
|
|
// return FALSE.
|
|
//
|
|
// Since this routine is intended to be called from
|
|
// the finally handler from file system read modules,
|
|
// it is imperative that it not raise any exceptions.
|
|
// Therefore, if any expected exception is raised, we
|
|
// will simply get out.
|
|
//
|
|
|
|
CacheBuffer = CcGetVirtualAddress( SharedCacheMap,
|
|
Offset,
|
|
&Vacb,
|
|
&ReceivedLength );
|
|
|
|
//
|
|
// If we got more than we need, make sure to only transfer
|
|
// the right amount.
|
|
//
|
|
|
|
if (ReceivedLength > Length) {
|
|
ReceivedLength = Length;
|
|
}
|
|
|
|
//
|
|
// Now loop to touch all of the pages, calling MM to insure
|
|
// that if we fault, we take in exactly the number of pages
|
|
// we need.
|
|
//
|
|
|
|
PagesToGo = ADDRESS_AND_SIZE_TO_SPAN_PAGES( CacheBuffer,
|
|
ReceivedLength );
|
|
|
|
CcMissCounter = &CcReadAheadIos;
|
|
|
|
while (PagesToGo) {
|
|
|
|
MmSetPageFaultReadAhead( Thread, (PagesToGo - 1) );
|
|
FaultOccurred |= !MmCheckCachedPageState(CacheBuffer, FALSE);
|
|
|
|
CacheBuffer = (PCHAR)CacheBuffer + PAGE_SIZE;
|
|
PagesToGo -= 1;
|
|
}
|
|
CcMissCounter = &CcThrowAway;
|
|
|
|
//
|
|
// Calculate how much data we have left to go.
|
|
//
|
|
|
|
Length -= ReceivedLength;
|
|
|
|
//
|
|
// Assume we did not get all the data we wanted, and set
|
|
// Offset to the end of the returned data.
|
|
//
|
|
|
|
Offset.QuadPart = Offset.QuadPart + (LONGLONG)ReceivedLength;
|
|
|
|
//
|
|
// It was only a page, so we can just leave this loop
|
|
// After freeing the address.
|
|
//
|
|
|
|
CcFreeVirtualAddress( Vacb );
|
|
Vacb = NULL;
|
|
}
|
|
}
|
|
i += 1;
|
|
} while (i <= 1);
|
|
|
|
//
|
|
// Release the file
|
|
//
|
|
|
|
(*Callbacks->ReleaseFromReadAhead)( Context );
|
|
ResourceHeld = FALSE;
|
|
}
|
|
|
|
try_exit: NOTHING;
|
|
}
|
|
finally {
|
|
|
|
MmResetPageFaultReadAhead(Thread, SavedState);
|
|
CcMissCounter = &CcThrowAway;
|
|
|
|
//
|
|
// If we got an error faulting a single page in, release the Vacb
|
|
// here. It is important to free any mapping before dropping the
|
|
// resource to prevent purge problems.
|
|
//
|
|
|
|
if (Vacb != NULL) {
|
|
CcFreeVirtualAddress( Vacb );
|
|
}
|
|
|
|
//
|
|
// Release the file
|
|
//
|
|
|
|
if (ResourceHeld) {
|
|
(*Callbacks->ReleaseFromReadAhead)( Context );
|
|
}
|
|
|
|
//
|
|
// To show we are done, we must make sure the PrivateCacheMap is
|
|
// still there.
|
|
//
|
|
|
|
CcAcquireMasterLock( &OldIrql );
|
|
|
|
PrivateCacheMap = FileObject->PrivateCacheMap;
|
|
|
|
//
|
|
// Show readahead is going inactive.
|
|
//
|
|
|
|
if (PrivateCacheMap != NULL) {
|
|
|
|
ExAcquireSpinLockAtDpcLevel( &PrivateCacheMap->ReadAheadSpinLock );
|
|
CC_CLEAR_PRIVATE_CACHE_MAP (PrivateCacheMap, PRIVATE_CACHE_MAP_READ_AHEAD_ACTIVE);
|
|
|
|
//
|
|
// If he said sequential only and we smashed into Eof, then
|
|
// let's reset the highwater mark in case he wants to read the
|
|
// file sequentially again.
|
|
//
|
|
|
|
if (HitEof && FlagOn(FileObject->Flags, FO_SEQUENTIAL_ONLY)) {
|
|
PrivateCacheMap->ReadAheadOffset[1].LowPart =
|
|
PrivateCacheMap->ReadAheadOffset[1].HighPart = 0;
|
|
}
|
|
|
|
//
|
|
// If no faults occurred, turn read ahead off.
|
|
//
|
|
|
|
if (ReadAheadPerformed && !FaultOccurred) {
|
|
CC_CLEAR_PRIVATE_CACHE_MAP (PrivateCacheMap, PRIVATE_CACHE_MAP_READ_AHEAD_ENABLED);
|
|
}
|
|
|
|
ExReleaseSpinLockFromDpcLevel( &PrivateCacheMap->ReadAheadSpinLock );
|
|
}
|
|
|
|
//
|
|
// Free SharedCacheMap list
|
|
//
|
|
|
|
CcReleaseMasterLock( OldIrql );
|
|
|
|
ObDereferenceObject( FileObject );
|
|
|
|
//
|
|
// Serialize again to decrement the open count.
|
|
//
|
|
|
|
CcAcquireMasterLock( &OldIrql );
|
|
|
|
CcDecrementOpenCount( SharedCacheMap, 'adRP' );
|
|
|
|
if ((SharedCacheMap->OpenCount == 0) &&
|
|
!FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) &&
|
|
(SharedCacheMap->DirtyPages == 0)) {
|
|
|
|
//
|
|
// Move to the dirty list.
|
|
//
|
|
|
|
RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
|
|
InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
|
|
&SharedCacheMap->SharedCacheMapLinks );
|
|
|
|
//
|
|
// Make sure the Lazy Writer will wake up, because we
|
|
// want him to delete this SharedCacheMap.
|
|
//
|
|
|
|
LazyWriter.OtherWork = TRUE;
|
|
if (!LazyWriter.ScanActive) {
|
|
CcScheduleLazyWriteScan( FALSE );
|
|
}
|
|
}
|
|
|
|
CcReleaseMasterLock( OldIrql );
|
|
}
|
|
|
|
DebugTrace(-1, me, "CcPerformReadAhead -> VOID\n", 0 );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
PBITMAP_RANGE
|
|
CcFindBitmapRangeToDirty (
|
|
IN PMBCB Mbcb,
|
|
IN LONGLONG Page,
|
|
IN PULONG *FreePageForSetting
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine looks for the bitmap range containing the specified page.
|
|
If it is found it is returned so the caller can set some dirty bits.
|
|
If it is not found, then an attempt is made to come up with a free range
|
|
and set it up to describe the desired range. To come up with a free range,
|
|
first we attempt to recycle the lowest range that does not currently contain
|
|
any dirty pages. If there is no such range, then we allocate one.
|
|
|
|
Arguments:
|
|
|
|
Mbcb - Supplies the Mbcb in which to find the range.
|
|
|
|
Page - Supplies the page number for the first page to be set dirty.
|
|
|
|
FreePageForSetting - Supplies a free bitmap page of zeros from the zone; the
|
|
caller's pointer is cleared on return if this page is used.
|
|
|
|
Return Value:
|
|
|
|
The desired bitmap range, or NULL if one could not be allocated.
|
|
|
|
Environment:
|
|
|
|
The BcbSpinLock must be held on entry.
|
|
|
|
--*/
|
|
|
|
{
|
|
PBITMAP_RANGE BitmapRange, FreeRange;
|
|
PLIST_ENTRY InsertPoint;
|
|
LONGLONG BasePage;
|
|
|
|
//
|
|
// Initialize FreeRange and InsertPoint for the case we have
|
|
// to initialize a range.
|
|
//
|
|
|
|
FreeRange = NULL;
|
|
InsertPoint = &Mbcb->BitmapRanges;
|
|
|
|
//
|
|
// Point to the first bitmap range.
|
|
//
|
|
|
|
BitmapRange = (PBITMAP_RANGE)InsertPoint->Flink;
|
|
|
|
//
|
|
// Calculate the desired BasePage from the caller's page.
|
|
//
|
|
|
|
BasePage = (Page & ~(LONGLONG)((MBCB_BITMAP_BLOCK_SIZE * 8) - 1));
|
|
|
|
//
|
|
// Loop through the list until we find the range or we have a free range
|
|
// and correct insertion point.
|
|
//
|
|
|
|
do {
|
|
|
|
//
|
|
// If we get an exact match, then we must have hit a fully-initialized
|
|
// range which we can return.
|
|
//
|
|
|
|
if (BasePage == BitmapRange->BasePage) {
|
|
return BitmapRange;
|
|
|
|
//
|
|
// Otherwise, see if the range is free and we have not captured a
|
|
// free range yet.
|
|
//
|
|
|
|
} else if ((BitmapRange->DirtyPages == 0) && (FreeRange == NULL)) {
|
|
FreeRange = BitmapRange;
|
|
|
|
//
|
|
// If we did not capture a free range, see if we need to update our
|
|
// insertion point.
|
|
//
|
|
|
|
} else if (BasePage > BitmapRange->BasePage) {
|
|
InsertPoint = &BitmapRange->Links;
|
|
}
|
|
|
|
//
|
|
// Advance to the next range (or possibly back to the listhead).
|
|
//
|
|
|
|
BitmapRange = (PBITMAP_RANGE)BitmapRange->Links.Flink;
|
|
|
|
//
|
|
// Loop until we hit the end, or we know we are done updating both InsertPoint
|
|
// and FreeRange.
|
|
//
|
|
|
|
} while ((BitmapRange != (PBITMAP_RANGE)&Mbcb->BitmapRanges) &&
|
|
((BasePage >= BitmapRange->BasePage) ||
|
|
(FreeRange == NULL)));
|
|
|
|
//
|
|
// If we found a FreeRange we can use, then remove it from the list.
|
|
//
|
|
|
|
if (FreeRange != NULL) {
|
|
RemoveEntryList( &FreeRange->Links );
|
|
|
|
//
|
|
// Otherwise we have to allocate the small bitmap range structure. We usually
|
|
// try to avoid calling the pool package while owning a spin lock, but note the
|
|
// following things which must be true if we hit this point:
|
|
//
|
|
// The file is larger than 3 bitmap ranges (normally 384MB on Intel).
|
|
// Three ranges plus all previously allocated ranges are simultaneously dirty.
|
|
//
|
|
// The second point is fairly unlikely, especially for a sequential writer. It
|
|
// can occur for a random writer in a large file, but eventually we will allocate
|
|
// enough ranges to always describe how many ranges he can keep dirty at once!
|
|
//
|
|
|
|
} else {
|
|
FreeRange = ExAllocatePoolWithTag( NonPagedPool, sizeof(BITMAP_RANGE), 'rBcC' );
|
|
if (FreeRange == NULL) {
|
|
return NULL;
|
|
}
|
|
RtlZeroMemory( FreeRange, sizeof(BITMAP_RANGE) );
|
|
}
|
|
|
|
//
|
|
// Insert and initialize.
|
|
//
|
|
|
|
InsertHeadList( InsertPoint, &FreeRange->Links );
|
|
FreeRange->BasePage = BasePage;
|
|
FreeRange->FirstDirtyPage = MAXULONG;
|
|
FreeRange->LastDirtyPage = 0;
|
|
|
|
//
|
|
// If the range does not have a bitmap yet, then consume the one we were passed
|
|
// in.
|
|
//
|
|
|
|
if (FreeRange->Bitmap == NULL) {
|
|
ASSERT(*FreePageForSetting != NULL);
|
|
FreeRange->Bitmap = *FreePageForSetting;
|
|
*FreePageForSetting = NULL;
|
|
}
|
|
|
|
return FreeRange;
|
|
}
|
|
|
|
|
|
PBITMAP_RANGE
|
|
CcFindBitmapRangeToClean (
|
|
IN PMBCB Mbcb,
|
|
IN LONGLONG Page
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine starts from the specified page, and looks for a range with dirty
|
|
pages. The caller must guarantee that some range exists with dirty pages. If
|
|
the end of the ranges is hit before finding any dirty ranges, then this routine
|
|
loops back to the start of the range list.
|
|
|
|
Arguments:
|
|
|
|
Mbcb - Supplies the Mbcb in which to find the range.
|
|
|
|
Page - Supplies the page number for the first page to scan from.
|
|
|
|
Return Value:
|
|
|
|
The desired bitmap range with dirty pages.
|
|
|
|
Environment:
|
|
|
|
The BcbSpinLock must be held on entry.
|
|
|
|
--*/
|
|
|
|
{
|
|
PBITMAP_RANGE BitmapRange;
|
|
|
|
//
|
|
// Point to the first bitmap range.
|
|
//
|
|
|
|
BitmapRange = (PBITMAP_RANGE)Mbcb->BitmapRanges.Flink;
|
|
|
|
//
|
|
// Loop through the list until we find the range to return.
|
|
//
|
|
|
|
do {
|
|
|
|
//
|
|
// If we hit the listhead, then wrap to find the first dirty range.
|
|
//
|
|
|
|
if (BitmapRange == (PBITMAP_RANGE)&Mbcb->BitmapRanges) {
|
|
|
|
//
|
|
// If Page is already 0, we are in an infinite loop.
|
|
//
|
|
|
|
ASSERT(Page != 0);
|
|
|
|
//
|
|
// Clear Page and fall through to advance to first range.
|
|
//
|
|
|
|
Page = 0;
|
|
|
|
|
|
//
|
|
// Otherwise, if we are in range, return the first range
|
|
// with dirty pages.
|
|
//
|
|
|
|
} else if ((Page <= (BitmapRange->BasePage + BitmapRange->LastDirtyPage)) &&
|
|
(BitmapRange->DirtyPages != 0)) {
|
|
return BitmapRange;
|
|
}
|
|
|
|
//
|
|
// Advance to the next range (or possibly back to the listhead).
|
|
//
|
|
|
|
BitmapRange = (PBITMAP_RANGE)BitmapRange->Links.Flink;
|
|
|
|
} while (TRUE);
|
|
}
|
|
|
|
|
|
VOID
|
|
CcSetDirtyInMask (
|
|
IN PSHARED_CACHE_MAP SharedCacheMap,
|
|
IN PLARGE_INTEGER FileOffset,
|
|
IN ULONG Length
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine may be called to set a range of pages dirty in a user data
|
|
file, by just setting the corresponding bits in the mask bcb.
|
|
|
|
IMPORTANT NOTE:
|
|
|
|
If this routine fails to set any bits due to an allocation failure,
|
|
it just returns quietly without informing the caller. (Note that this
|
|
routine is never called for no modified write sections.) The reason
|
|
for this behavior is that this routine is sometimes called as part of
|
|
error recovery (CcFreeActiveVacb, CcMdlWriteComplete, etc.) when it is
|
|
essential to just keep on moving. Note that if an allocation failure does
|
|
occur, this only means that MM will have to flush the modified page in
|
|
time, since the Lazy Writer will not do it.
|
|
|
|
Arguments:
|
|
|
|
SharedCacheMap - SharedCacheMap where the pages are to be set dirty.
|
|
|
|
FileOffset - FileOffset of first page to set dirty
|
|
|
|
Length - Used in conjunction with FileOffset to determine how many pages
|
|
to set dirty.
|
|
|
|
Return Value:
|
|
|
|
None
|
|
|
|
--*/
|
|
|
|
{
|
|
KLOCK_QUEUE_HANDLE LockHandle;
|
|
PMBCB Mbcb;
|
|
PBITMAP_RANGE BitmapRange;
|
|
LONGLONG FirstPage;
|
|
LONGLONG LastPage;
|
|
PULONG MaskPtr;
|
|
ULONG Mask = 0;
|
|
PULONG Bitmap = NULL;
|
|
|
|
//
|
|
// We assume no caller can cross a bitmap range boundary (currently not even
|
|
// a view boundary!), so we do not want to loop through bitmap ranges.
|
|
//
|
|
|
|
ASSERT((FileOffset->QuadPart / MBCB_BITMAP_RANGE) ==
|
|
((FileOffset->QuadPart + Length - 1) / MBCB_BITMAP_RANGE));
|
|
|
|
//
|
|
// Initialize our locals.
|
|
//
|
|
|
|
FirstPage = FileOffset->QuadPart >> PAGE_SHIFT;
|
|
LastPage = ((FileOffset->QuadPart + Length - 1) >> PAGE_SHIFT);
|
|
|
|
//
|
|
// PREfix correctly notes that Mbcb grande promotion test and the one
|
|
// that decides to preallocate the bitmap buffer ever disagree, we will
|
|
// be able to have a NULL Bitmap and die. This will not happen since we
|
|
// guarantee that section size >= filesize. Assert this case, and we will
|
|
// also assert that Bitmap is never NULL when needed - this should convince
|
|
// PREfix we're OK.
|
|
//
|
|
|
|
ASSERT( (SharedCacheMap->SectionSize.QuadPart / PAGE_SIZE) > LastPage );
|
|
|
|
//
|
|
// If we have to convert to an Mbcb grande, we will loop back here to
|
|
// preallocate another buffer.
|
|
//
|
|
|
|
do {
|
|
|
|
//
|
|
// For large streams, we need to preallocate a block we use for
|
|
// we use for bitmaps. We allocate one, then loop back in the rare
|
|
// case where we will need another. We free it at the bottom if we
|
|
// don't need one.
|
|
//
|
|
|
|
if (SharedCacheMap->SectionSize.QuadPart > (MBCB_BITMAP_INITIAL_SIZE * 8 * PAGE_SIZE)) {
|
|
|
|
//
|
|
// If we could not preallocate, break out into common cleanup code and
|
|
// return quietly.
|
|
//
|
|
|
|
if (!CcPrefillVacbLevelZone( 1, &LockHandle.OldIrql, FALSE )) {
|
|
return;
|
|
}
|
|
|
|
Bitmap = (PULONG)CcAllocateVacbLevel( FALSE );
|
|
CcReleaseVacbLock( LockHandle.OldIrql );
|
|
}
|
|
|
|
//
|
|
// Acquire the Mbcb spinlock.
|
|
//
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
|
|
//
|
|
// If there is no Mbcb, we will have to allocate one.
|
|
//
|
|
|
|
Mbcb = SharedCacheMap->Mbcb;
|
|
if (Mbcb == NULL) {
|
|
|
|
//
|
|
// Since we use the Bcb zone, we must assume that Bcbs are big enough.
|
|
//
|
|
|
|
ASSERT(QuadAlign(sizeof(MBCB)) <= QuadAlign(sizeof(BCB)));
|
|
|
|
//
|
|
// Allocate the Mbcb from the Bcb zone.
|
|
//
|
|
|
|
Mbcb = (PMBCB)CcAllocateInitializeBcb( NULL, NULL, NULL, NULL );
|
|
|
|
//
|
|
// If we could not allocate an Mbcb, break out to clean up and return
|
|
//
|
|
|
|
if (Mbcb == NULL) {
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Set in the node type, and initialize the listhead of ranges.
|
|
//
|
|
|
|
Mbcb->NodeTypeCode = CACHE_NTC_MBCB;
|
|
InitializeListHead( &Mbcb->BitmapRanges );
|
|
|
|
//
|
|
// Insert and initialize the first range.
|
|
//
|
|
|
|
InsertTailList( &Mbcb->BitmapRanges, &Mbcb->BitmapRange1.Links );
|
|
Mbcb->BitmapRange1.FirstDirtyPage = MAXULONG;
|
|
|
|
//
|
|
// Use the rest of the Mbcb as the initial bitmap.
|
|
//
|
|
|
|
Mbcb->BitmapRange1.Bitmap = (PULONG)&Mbcb->BitmapRange2;
|
|
|
|
//
|
|
// Now set to use our new Mbcb.
|
|
//
|
|
|
|
SharedCacheMap->Mbcb = Mbcb;
|
|
}
|
|
|
|
//
|
|
// Now see if we need to switch to the Mbcb grande format.
|
|
//
|
|
|
|
if ((LastPage >= (MBCB_BITMAP_INITIAL_SIZE * 8)) &&
|
|
(Mbcb->NodeTypeCode != CACHE_NTC_MBCB_GRANDE)) {
|
|
|
|
ASSERT( Bitmap != NULL );
|
|
|
|
//
|
|
// If there are any dirty pages, copy the initial bitmap over, and zero
|
|
// out the original end of the Mbcb for reuse.
|
|
//
|
|
|
|
if (Mbcb->BitmapRange1.DirtyPages != 0) {
|
|
RtlCopyMemory( Bitmap, Mbcb->BitmapRange1.Bitmap, MBCB_BITMAP_INITIAL_SIZE );
|
|
RtlZeroMemory( Mbcb->BitmapRange1.Bitmap, MBCB_BITMAP_INITIAL_SIZE );
|
|
}
|
|
|
|
//
|
|
// Store the new bitmap pointer and show we have consumed this one.
|
|
//
|
|
|
|
Mbcb->BitmapRange1.Bitmap = Bitmap;
|
|
Bitmap = NULL;
|
|
|
|
//
|
|
// Insert and initialize the first range.
|
|
//
|
|
|
|
InsertTailList( &Mbcb->BitmapRanges, &Mbcb->BitmapRange2.Links );
|
|
Mbcb->BitmapRange2.BasePage = MAXLONGLONG;
|
|
Mbcb->BitmapRange2.FirstDirtyPage = MAXULONG;
|
|
InsertTailList( &Mbcb->BitmapRanges, &Mbcb->BitmapRange3.Links );
|
|
Mbcb->BitmapRange3.BasePage = MAXLONGLONG;
|
|
Mbcb->BitmapRange3.FirstDirtyPage = MAXULONG;
|
|
Mbcb->NodeTypeCode = CACHE_NTC_MBCB_GRANDE;
|
|
|
|
//
|
|
// This is a one-time event - converting to the large Mbcb. Continue back
|
|
// to preallocate another buffer for CcFindBitmapRangeToDirty.
|
|
//
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
continue;
|
|
}
|
|
|
|
//
|
|
// Now find the Bitmap range we are setting bits in.
|
|
//
|
|
|
|
BitmapRange = CcFindBitmapRangeToDirty( Mbcb, FirstPage, &Bitmap );
|
|
|
|
//
|
|
// If we could not allocate this dinky structure, break out quietly.
|
|
//
|
|
|
|
if (BitmapRange == NULL) {
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Now update the first and last dirty page indices and the bitmap.
|
|
//
|
|
|
|
if (FirstPage < (BitmapRange->BasePage + BitmapRange->FirstDirtyPage)) {
|
|
BitmapRange->FirstDirtyPage = (ULONG)(FirstPage - BitmapRange->BasePage);
|
|
}
|
|
|
|
if (LastPage > (BitmapRange->BasePage + BitmapRange->LastDirtyPage)) {
|
|
BitmapRange->LastDirtyPage = (ULONG)(LastPage - BitmapRange->BasePage);
|
|
}
|
|
|
|
//
|
|
// We have to acquire the shared cache map list, because we
|
|
// may be changing lists.
|
|
//
|
|
|
|
CcAcquireMasterLockAtDpcLevel();
|
|
|
|
//
|
|
// If this is the first dirty page for this cache map, there is some work
|
|
// to do.
|
|
//
|
|
|
|
if (SharedCacheMap->DirtyPages == 0) {
|
|
|
|
//
|
|
// If the lazy write scan is not active, then start it.
|
|
//
|
|
|
|
if (!LazyWriter.ScanActive) {
|
|
CcScheduleLazyWriteScan( FALSE );
|
|
}
|
|
|
|
//
|
|
// Move to the dirty list.
|
|
//
|
|
|
|
RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
|
|
InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
|
|
&SharedCacheMap->SharedCacheMapLinks );
|
|
|
|
Mbcb->ResumeWritePage = FirstPage;
|
|
}
|
|
|
|
MaskPtr = &BitmapRange->Bitmap[(ULONG)(FirstPage - BitmapRange->BasePage) / 32];
|
|
Mask = 1 << ((ULONG)FirstPage % 32);
|
|
|
|
//
|
|
// Loop to set all of the bits and adjust the DirtyPage totals.
|
|
//
|
|
|
|
for ( ; FirstPage <= LastPage; FirstPage++) {
|
|
|
|
if ((*MaskPtr & Mask) == 0) {
|
|
CcChargeMaskDirtyPages( SharedCacheMap, Mbcb, BitmapRange, 1 );
|
|
*MaskPtr |= Mask;
|
|
}
|
|
|
|
Mask <<= 1;
|
|
|
|
if (Mask == 0) {
|
|
|
|
MaskPtr += 1;
|
|
Mask = 1;
|
|
}
|
|
}
|
|
|
|
//
|
|
// See if we need to advance our goal for ValidDataLength.
|
|
//
|
|
|
|
LastPage = FileOffset->QuadPart + Length;
|
|
|
|
if (LastPage > SharedCacheMap->ValidDataGoal.QuadPart) {
|
|
SharedCacheMap->ValidDataGoal.QuadPart = (LONGLONG)LastPage;
|
|
}
|
|
|
|
CcReleaseMasterLockFromDpcLevel();
|
|
|
|
//
|
|
// Continue until we have actually set the bits (there is a continue
|
|
// which just wants to loop back and allocate another buffer).
|
|
//
|
|
|
|
} while (Mask == 0);
|
|
|
|
//
|
|
// Now if we preallocated a bitmap buffer, free it on the way out.
|
|
//
|
|
|
|
if (Bitmap != NULL) {
|
|
CcAcquireVacbLockAtDpcLevel();
|
|
CcDeallocateVacbLevel( (PVACB *)Bitmap, FALSE );
|
|
CcReleaseVacbLockFromDpcLevel();
|
|
}
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
}
|
|
|
|
|
|
VOID
|
|
CcSetDirtyPinnedData (
|
|
IN PVOID BcbVoid,
|
|
IN PLARGE_INTEGER Lsn OPTIONAL
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine may be called to set a Bcb (returned by CcPinFileData)
|
|
dirty, and a candidate for the Lazy Writer. All Bcbs should be set
|
|
dirty by calling this routine, even if they are to be flushed
|
|
another way.
|
|
|
|
Arguments:
|
|
|
|
Bcb - Supplies a pointer to a pinned (by CcPinFileData) Bcb, to
|
|
be set dirty.
|
|
|
|
Lsn - Lsn to be remembered with page.
|
|
|
|
Return Value:
|
|
|
|
None
|
|
|
|
--*/
|
|
|
|
{
|
|
PBCB Bcbs[2];
|
|
PBCB *BcbPtrPtr;
|
|
KLOCK_QUEUE_HANDLE LockHandle;
|
|
PSHARED_CACHE_MAP SharedCacheMap;
|
|
|
|
DebugTrace(+1, me, "CcSetDirtyPinnedData: Bcb = %08lx\n", BcbVoid );
|
|
|
|
//
|
|
// Assume this is a normal Bcb, and set up for loop below.
|
|
//
|
|
|
|
Bcbs[0] = (PBCB)BcbVoid;
|
|
Bcbs[1] = NULL;
|
|
BcbPtrPtr = &Bcbs[0];
|
|
|
|
//
|
|
// If it is an overlap Bcb, then point into the Bcb vector
|
|
// for the loop.
|
|
//
|
|
|
|
if (Bcbs[0]->NodeTypeCode == CACHE_NTC_OBCB) {
|
|
BcbPtrPtr = &((POBCB)Bcbs[0])->Bcbs[0];
|
|
}
|
|
|
|
//
|
|
// Loop to set all Bcbs dirty
|
|
//
|
|
|
|
while (*BcbPtrPtr != NULL) {
|
|
|
|
Bcbs[0] = *(BcbPtrPtr++);
|
|
|
|
//
|
|
// Should be no ReadOnly Bcbs
|
|
//
|
|
|
|
ASSERT(((ULONG_PTR)Bcbs[0] & 1) != 1);
|
|
|
|
SharedCacheMap = Bcbs[0]->SharedCacheMap;
|
|
|
|
//
|
|
// We have to acquire the shared cache map list, because we
|
|
// may be changing lists.
|
|
//
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
|
|
if (!Bcbs[0]->Dirty) {
|
|
|
|
ULONG Pages = Bcbs[0]->ByteLength >> PAGE_SHIFT;
|
|
|
|
//
|
|
// Set dirty to keep the Bcb from going away until
|
|
// it is set Undirty, and assign the next modification time stamp.
|
|
//
|
|
|
|
Bcbs[0]->Dirty = TRUE;
|
|
|
|
//
|
|
// Initialize the OldestLsn field.
|
|
//
|
|
|
|
if (ARGUMENT_PRESENT(Lsn)) {
|
|
Bcbs[0]->OldestLsn = *Lsn;
|
|
Bcbs[0]->NewestLsn = *Lsn;
|
|
}
|
|
|
|
//
|
|
// Move it to the dirty list if these are the first dirty pages,
|
|
// and this is not disabled for write behind.
|
|
//
|
|
// Increase the count of dirty bytes in the shared cache map.
|
|
//
|
|
|
|
CcAcquireMasterLockAtDpcLevel();
|
|
if ((SharedCacheMap->DirtyPages == 0) &&
|
|
!FlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND)) {
|
|
|
|
//
|
|
// If the lazy write scan is not active, then start it.
|
|
//
|
|
|
|
if (!LazyWriter.ScanActive) {
|
|
CcScheduleLazyWriteScan( FALSE );
|
|
}
|
|
|
|
RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
|
|
InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
|
|
&SharedCacheMap->SharedCacheMapLinks );
|
|
}
|
|
|
|
CcChargePinDirtyPages( SharedCacheMap, Pages );
|
|
CcReleaseMasterLockFromDpcLevel();
|
|
}
|
|
|
|
//
|
|
// If this Lsn happens to be older/newer than the ones we have stored, then
|
|
// change it.
|
|
//
|
|
|
|
if (ARGUMENT_PRESENT(Lsn)) {
|
|
|
|
if ((Bcbs[0]->OldestLsn.QuadPart == 0) || (Lsn->QuadPart < Bcbs[0]->OldestLsn.QuadPart)) {
|
|
Bcbs[0]->OldestLsn = *Lsn;
|
|
}
|
|
|
|
if (Lsn->QuadPart > Bcbs[0]->NewestLsn.QuadPart) {
|
|
Bcbs[0]->NewestLsn = *Lsn;
|
|
}
|
|
}
|
|
|
|
//
|
|
// See if we need to advance our goal for ValidDataLength.
|
|
//
|
|
|
|
if ( Bcbs[0]->BeyondLastByte.QuadPart > SharedCacheMap->ValidDataGoal.QuadPart ) {
|
|
|
|
SharedCacheMap->ValidDataGoal = Bcbs[0]->BeyondLastByte;
|
|
}
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
}
|
|
|
|
DebugTrace(-1, me, "CcSetDirtyPinnedData -> VOID\n", 0 );
|
|
}
|
|
|
|
|
|
NTSTATUS
|
|
CcSetValidData (
|
|
IN PFILE_OBJECT FileObject,
|
|
IN PLARGE_INTEGER ValidDataLength
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is used to call the File System to update ValidDataLength
|
|
for a file.
|
|
|
|
Arguments:
|
|
|
|
FileObject - A pointer to a referenced file object describing which file
|
|
the read should be performed from.
|
|
|
|
ValidDataLength - Pointer to new ValidDataLength.
|
|
|
|
Return Value:
|
|
|
|
Status of operation.
|
|
|
|
--*/
|
|
|
|
{
|
|
PIO_STACK_LOCATION IrpSp;
|
|
PDEVICE_OBJECT DeviceObject;
|
|
NTSTATUS Status;
|
|
FILE_END_OF_FILE_INFORMATION Buffer;
|
|
IO_STATUS_BLOCK IoStatus;
|
|
KEVENT Event;
|
|
PIRP Irp;
|
|
|
|
DebugTrace(+1, me, "CcSetValidData:\n", 0 );
|
|
DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
|
|
DebugTrace2(0, me, " ValidDataLength = %08lx, %08lx\n",
|
|
ValidDataLength->LowPart, ValidDataLength->HighPart );
|
|
|
|
//
|
|
// Copy ValidDataLength to our buffer.
|
|
//
|
|
|
|
Buffer.EndOfFile = *ValidDataLength;
|
|
|
|
//
|
|
// Initialize the event.
|
|
//
|
|
|
|
KeInitializeEvent( &Event, NotificationEvent, FALSE );
|
|
|
|
//
|
|
// Begin by getting a pointer to the device object that the file resides
|
|
// on.
|
|
//
|
|
|
|
DeviceObject = IoGetRelatedDeviceObject( FileObject );
|
|
|
|
//
|
|
// Allocate an I/O Request Packet (IRP) for this in-page operation.
|
|
//
|
|
|
|
Irp = IoAllocateIrp( DeviceObject->StackSize, FALSE );
|
|
if (Irp == NULL) {
|
|
|
|
DebugTrace(-1, me, "CcSetValidData-> STATUS_INSUFFICIENT_RESOURCES\n", 0 );
|
|
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
//
|
|
// Get a pointer to the first stack location in the packet. This location
|
|
// will be used to pass the function codes and parameters to the first
|
|
// driver.
|
|
//
|
|
|
|
IrpSp = IoGetNextIrpStackLocation( Irp );
|
|
|
|
//
|
|
// Fill in the IRP according to this request, setting the flags to
|
|
// just cause IO to set the event and deallocate the Irp.
|
|
//
|
|
|
|
Irp->Flags = IRP_PAGING_IO | IRP_SYNCHRONOUS_PAGING_IO;
|
|
Irp->RequestorMode = KernelMode;
|
|
Irp->UserIosb = &IoStatus;
|
|
Irp->UserEvent = &Event;
|
|
Irp->Tail.Overlay.OriginalFileObject = FileObject;
|
|
Irp->Tail.Overlay.Thread = PsGetCurrentThread();
|
|
Irp->AssociatedIrp.SystemBuffer = &Buffer;
|
|
|
|
//
|
|
// Fill in the normal read parameters.
|
|
//
|
|
|
|
IrpSp->MajorFunction = IRP_MJ_SET_INFORMATION;
|
|
IrpSp->FileObject = FileObject;
|
|
IrpSp->DeviceObject = DeviceObject;
|
|
IrpSp->Parameters.SetFile.Length = sizeof(FILE_END_OF_FILE_INFORMATION);
|
|
IrpSp->Parameters.SetFile.FileInformationClass = FileEndOfFileInformation;
|
|
IrpSp->Parameters.SetFile.FileObject = NULL;
|
|
IrpSp->Parameters.SetFile.AdvanceOnly = TRUE;
|
|
|
|
//
|
|
// Queue the packet to the appropriate driver based on whether or not there
|
|
// is a VPB associated with the device. This routine should not raise.
|
|
//
|
|
|
|
Status = IoCallDriver( DeviceObject, Irp );
|
|
|
|
//
|
|
// If pending is returned (which is a successful status),
|
|
// we must wait for the request to complete.
|
|
//
|
|
|
|
if (Status == STATUS_PENDING) {
|
|
KeWaitForSingleObject( &Event,
|
|
Executive,
|
|
KernelMode,
|
|
FALSE,
|
|
(PLARGE_INTEGER)NULL);
|
|
}
|
|
|
|
//
|
|
// If we got an error back in Status, then the Iosb
|
|
// was not written, so we will just copy the status
|
|
// there, then test the final status after that.
|
|
//
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
IoStatus.Status = Status;
|
|
}
|
|
|
|
DebugTrace(-1, me, "CcSetValidData-> %08lx\n", IoStatus.Status );
|
|
|
|
return IoStatus.Status;
|
|
}
|
|
|
|
|
|
//
|
|
// Internal Support Routine
|
|
//
|
|
|
|
BOOLEAN
|
|
CcAcquireByteRangeForWrite (
|
|
IN PSHARED_CACHE_MAP SharedCacheMap,
|
|
IN PLARGE_INTEGER TargetOffset OPTIONAL,
|
|
IN ULONG TargetLength,
|
|
OUT PLARGE_INTEGER FileOffset,
|
|
OUT PULONG Length,
|
|
OUT PBCB *FirstBcb
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is called by the Lazy Writer to try to find a contiguous
|
|
range of bytes from the specified SharedCacheMap that are dirty and
|
|
should be flushed. After flushing, these bytes should be released
|
|
by calling CcReleaseByteRangeFromWrite.
|
|
|
|
Dirty ranges are returned in strictly increasing order.
|
|
|
|
Arguments:
|
|
|
|
SharedCacheMap - for the file for which the dirty byte range is sought
|
|
|
|
TargetOffset - If specified, then only the specified range is
|
|
to be flushed.
|
|
|
|
TargetLength - If target offset specified, this completes the range.
|
|
In any case, this field is zero for the Lazy Writer,
|
|
and nonzero for explicit flush calls.
|
|
|
|
FileOffset - Returns the offset for the beginning of the dirty byte
|
|
range to flush
|
|
|
|
Length - Returns the length of bytes in the range.
|
|
|
|
FirstBcb - Returns the first Bcb in the list for the range, to be used
|
|
when calling CcReleaseByteRangeFromWrite, or NULL if dirty
|
|
pages were found in the mask Bcb.
|
|
|
|
Return Value:
|
|
|
|
FALSE - if no dirty byte range could be found to match the necessary
|
|
criteria.
|
|
|
|
TRUE - if a dirty byte range is being returned.
|
|
|
|
--*/
|
|
|
|
{
|
|
KLOCK_QUEUE_HANDLE LockHandle;
|
|
PMBCB Mbcb;
|
|
PBCB Bcb;
|
|
LARGE_INTEGER LsnToFlushTo = {0, 0};
|
|
|
|
LOGICAL BcbLookasideCheck = FALSE;
|
|
|
|
PBITMAP_RANGE BitmapRange;
|
|
PULONG EndPtr;
|
|
PULONG MaskPtr;
|
|
ULONG Mask;
|
|
LONGLONG FirstDirtyPage;
|
|
ULONG OriginalFirstDirtyPage;
|
|
LONGLONG LastDirtyPage = MAXLONGLONG;
|
|
|
|
DebugTrace(+1, me, "CcAcquireByteRangeForWrite:\n", 0);
|
|
DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap);
|
|
|
|
//
|
|
// Initially clear outputs.
|
|
//
|
|
|
|
FileOffset->QuadPart = 0;
|
|
*Length = 0;
|
|
|
|
//
|
|
// We must acquire the SharedCacheMap->BcbSpinLock.
|
|
//
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
|
|
//
|
|
// See if there is a simple Mask Bcb, and if there is anything dirty in
|
|
// it. If so we will simply handle that case here by processing the bitmap.
|
|
//
|
|
|
|
Mbcb = SharedCacheMap->Mbcb;
|
|
|
|
if ((Mbcb != NULL) &&
|
|
(Mbcb->DirtyPages != 0) &&
|
|
((Mbcb->PagesToWrite != 0) || (TargetLength != 0))) {
|
|
|
|
//
|
|
// If a target range was specified (outside call to CcFlush for a range),
|
|
// then calculate FirstPage and EndPtr based on these inputs.
|
|
//
|
|
|
|
if (ARGUMENT_PRESENT(TargetOffset)) {
|
|
|
|
FirstDirtyPage = TargetOffset->QuadPart >> PAGE_SHIFT;
|
|
LastDirtyPage = (TargetOffset->QuadPart + TargetLength - 1) >> PAGE_SHIFT;
|
|
|
|
//
|
|
// Find the bitmap range containing the first dirty page.
|
|
//
|
|
|
|
BitmapRange = CcFindBitmapRangeToClean( Mbcb, FirstDirtyPage );
|
|
|
|
//
|
|
// If the target range is not dirty, get out. We may have even
|
|
// gotten back a nonoverlapping bitmap range.
|
|
//
|
|
|
|
if ((LastDirtyPage < (BitmapRange->BasePage + BitmapRange->FirstDirtyPage)) ||
|
|
(FirstDirtyPage > (BitmapRange->BasePage + BitmapRange->LastDirtyPage))) {
|
|
|
|
goto Scan_Bcbs;
|
|
}
|
|
|
|
if (LastDirtyPage < (BitmapRange->BasePage + BitmapRange->LastDirtyPage)) {
|
|
EndPtr = &BitmapRange->Bitmap[(ULONG)(LastDirtyPage - BitmapRange->BasePage) / 32];
|
|
} else {
|
|
EndPtr = &BitmapRange->Bitmap[BitmapRange->LastDirtyPage / 32];
|
|
}
|
|
|
|
|
|
//
|
|
// Otherwise, for the Lazy Writer pick up where we left off.
|
|
//
|
|
|
|
} else {
|
|
|
|
//
|
|
// If a length was specified, then it is an explicit flush, and
|
|
// we want to start with the first dirty page, else the Lazy Writer
|
|
// starts from the ResumeWritePage.
|
|
//
|
|
|
|
FirstDirtyPage = 0;
|
|
if (TargetLength == 0) {
|
|
FirstDirtyPage = Mbcb->ResumeWritePage;
|
|
}
|
|
|
|
//
|
|
// Now find the next (cyclic) dirty page from this point.
|
|
//
|
|
|
|
BitmapRange = CcFindBitmapRangeToClean( Mbcb, FirstDirtyPage );
|
|
|
|
//
|
|
// If the page we thought we were looking for is beyond the last dirty page
|
|
// of this range, then CcFindBitmapRangeToClean must have wrapped back to
|
|
// the start of the file, and we should resume on the first dirty page of
|
|
// this range.
|
|
//
|
|
|
|
if (FirstDirtyPage > (BitmapRange->BasePage + BitmapRange->LastDirtyPage)) {
|
|
FirstDirtyPage = BitmapRange->BasePage + BitmapRange->FirstDirtyPage;
|
|
}
|
|
|
|
EndPtr = &BitmapRange->Bitmap[BitmapRange->LastDirtyPage / 32];
|
|
}
|
|
|
|
//
|
|
// Now we can skip over any clean pages.
|
|
//
|
|
|
|
if (FirstDirtyPage < (BitmapRange->BasePage + BitmapRange->FirstDirtyPage)) {
|
|
FirstDirtyPage = BitmapRange->BasePage + BitmapRange->FirstDirtyPage;
|
|
}
|
|
|
|
//
|
|
// Form a few other inputs for our dirty page scan.
|
|
//
|
|
|
|
MaskPtr = &BitmapRange->Bitmap[(ULONG)(FirstDirtyPage - BitmapRange->BasePage) / 32];
|
|
Mask = (ULONG)(-1 << (FirstDirtyPage % 32));
|
|
OriginalFirstDirtyPage = (ULONG)(FirstDirtyPage - BitmapRange->BasePage);
|
|
|
|
//
|
|
// Because of the possibility of getting stuck on a "hot spot" which gets
|
|
// modified over and over, we want to be very careful to resume exactly
|
|
// at the recorded resume point. If there is nothing there, then we
|
|
// fall into the loop below to scan for nozero long words in the bitmap,
|
|
// starting at the next longword.
|
|
//
|
|
|
|
if ((*MaskPtr & Mask) == 0) {
|
|
|
|
//
|
|
// Before entering loop, set all mask bits and insure we increment from
|
|
// an even Ulong boundary.
|
|
//
|
|
|
|
Mask = MAXULONG;
|
|
FirstDirtyPage &= ~31;
|
|
|
|
//
|
|
// To scan the bitmap faster, we scan for entire long words which are
|
|
// nonzero.
|
|
//
|
|
|
|
do {
|
|
|
|
MaskPtr += 1;
|
|
FirstDirtyPage += 32;
|
|
|
|
//
|
|
// If we go beyond the end, then we must wrap back to the first
|
|
// dirty page. We will just go back to the start of the first
|
|
// longword.
|
|
//
|
|
|
|
if (MaskPtr > EndPtr) {
|
|
|
|
//
|
|
// We can backup the last dirty page hint to where we
|
|
// started scanning, if we are the lazy writer.
|
|
//
|
|
|
|
if (TargetLength == 0) {
|
|
ASSERT(OriginalFirstDirtyPage >= BitmapRange->FirstDirtyPage);
|
|
BitmapRange->LastDirtyPage = OriginalFirstDirtyPage - 1;
|
|
}
|
|
|
|
//
|
|
// We hit the end of our scan. Let's assume we are supposed
|
|
// to move on to the next range with dirty pages.
|
|
//
|
|
|
|
do {
|
|
|
|
//
|
|
// Go to the next range.
|
|
//
|
|
|
|
BitmapRange = (PBITMAP_RANGE)BitmapRange->Links.Flink;
|
|
|
|
//
|
|
// Did we hit the listhead?
|
|
//
|
|
|
|
if (BitmapRange == (PBITMAP_RANGE)&Mbcb->BitmapRanges) {
|
|
|
|
//
|
|
// If this is an explicit flush, then it is time to
|
|
// get out.
|
|
//
|
|
|
|
if (TargetLength != 0) {
|
|
goto Scan_Bcbs;
|
|
}
|
|
|
|
//
|
|
// Otherwise, we must wrap back to the first range in the
|
|
// Lazy Writer Scan.
|
|
//
|
|
|
|
BitmapRange = (PBITMAP_RANGE)BitmapRange->Links.Flink;
|
|
}
|
|
|
|
} while (BitmapRange->DirtyPages == 0);
|
|
|
|
//
|
|
// Now we have a new range with dirty pages, but if this is
|
|
// an explicit flush of a specified range, we may be done.
|
|
//
|
|
|
|
if ((LastDirtyPage < (BitmapRange->BasePage + BitmapRange->FirstDirtyPage)) ||
|
|
(FirstDirtyPage > (BitmapRange->BasePage + BitmapRange->LastDirtyPage))) {
|
|
|
|
goto Scan_Bcbs;
|
|
}
|
|
|
|
//
|
|
// Otherwise, we need to set up our context to resume scanning in this
|
|
// range.
|
|
//
|
|
|
|
MaskPtr = &BitmapRange->Bitmap[BitmapRange->FirstDirtyPage / 32];
|
|
EndPtr = &BitmapRange->Bitmap[BitmapRange->LastDirtyPage / 32];
|
|
FirstDirtyPage = BitmapRange->BasePage + (BitmapRange->FirstDirtyPage & ~31);
|
|
OriginalFirstDirtyPage = BitmapRange->FirstDirtyPage;
|
|
}
|
|
} while (*MaskPtr == 0);
|
|
}
|
|
|
|
//
|
|
// Calculate the first set bit in the mask that we hit on.
|
|
//
|
|
|
|
Mask = ~Mask + 1;
|
|
|
|
//
|
|
// Now loop to find the first set bit.
|
|
//
|
|
|
|
while ((*MaskPtr & Mask) == 0) {
|
|
|
|
Mask <<= 1;
|
|
FirstDirtyPage += 1;
|
|
}
|
|
|
|
//
|
|
// If a TargetOffset was specified, then make sure we do not start
|
|
// beyond the specified range or a dirty Bcb in the range.
|
|
//
|
|
|
|
if (ARGUMENT_PRESENT(TargetOffset)) {
|
|
|
|
if (FirstDirtyPage >= ((TargetOffset->QuadPart + TargetLength + PAGE_SIZE - 1) >> PAGE_SHIFT)) {
|
|
|
|
goto Scan_Bcbs;
|
|
}
|
|
|
|
//
|
|
// If Bcbs are present on this file, we must go scan to see if they
|
|
// describe a range that must be written first. If this is not the
|
|
// case, we'll hop back and continue building the range from the mask Bcb.
|
|
//
|
|
// Note that this case will be very rare. Bcbs are introduced into user
|
|
// files in limited situations (CcZero) and the reverse is never allowed
|
|
// to happen.
|
|
//
|
|
|
|
if (!IsListEmpty(&SharedCacheMap->BcbList)) {
|
|
|
|
BcbLookasideCheck = TRUE;
|
|
goto Scan_Bcbs;
|
|
}
|
|
}
|
|
|
|
Accept_Page:
|
|
|
|
//
|
|
// Now loop to count the set bits at that point, clearing them as we
|
|
// go because we plan to write the corresponding pages. Stop as soon
|
|
// as we find a clean page, or we reach our maximum write size. Of
|
|
// course we want to ignore long word boundaries and keep trying to
|
|
// extend the write. We do not check for wrapping around the end of
|
|
// the bitmap here, because we guarantee some zero bits at the end
|
|
// in CcSetDirtyInMask.
|
|
//
|
|
|
|
while (((*MaskPtr & Mask) != 0) && (*Length < (MAX_WRITE_BEHIND / PAGE_SIZE)) &&
|
|
(!ARGUMENT_PRESENT(TargetOffset) || ((FirstDirtyPage + *Length) <
|
|
(ULONG)((TargetOffset->QuadPart + TargetLength + PAGE_SIZE - 1) >> PAGE_SHIFT)))) {
|
|
|
|
ASSERT(MaskPtr <= (&BitmapRange->Bitmap[BitmapRange->LastDirtyPage / 32]));
|
|
|
|
*MaskPtr -= Mask;
|
|
*Length += 1;
|
|
Mask <<= 1;
|
|
|
|
if (Mask == 0) {
|
|
|
|
MaskPtr += 1;
|
|
Mask = 1;
|
|
|
|
if (MaskPtr > EndPtr) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Now reduce the count of pages we were supposed to write this time,
|
|
// possibly clearing this count.
|
|
//
|
|
|
|
if (*Length < Mbcb->PagesToWrite) {
|
|
|
|
Mbcb->PagesToWrite -= *Length;
|
|
|
|
} else {
|
|
|
|
Mbcb->PagesToWrite = 0;
|
|
}
|
|
|
|
//
|
|
// Reduce the dirty page counts by the number of pages we just cleared.
|
|
//
|
|
|
|
ASSERT(Mbcb->DirtyPages >= *Length);
|
|
Mbcb->DirtyPages -= *Length;
|
|
BitmapRange->DirtyPages -= *Length;
|
|
|
|
CcAcquireMasterLockAtDpcLevel();
|
|
CcDeductDirtyPages( SharedCacheMap, *Length );
|
|
|
|
//
|
|
// Normally we need to reduce CcPagesYetToWrite appropriately.
|
|
//
|
|
|
|
if (CcPagesYetToWrite > *Length) {
|
|
CcPagesYetToWrite -= *Length;
|
|
} else {
|
|
CcPagesYetToWrite = 0;
|
|
}
|
|
|
|
//
|
|
// If we took out the last dirty page, then move the SharedCacheMap
|
|
// back to the clean list.
|
|
//
|
|
|
|
if (SharedCacheMap->DirtyPages == 0) {
|
|
|
|
RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
|
|
InsertTailList( &CcCleanSharedCacheMapList,
|
|
&SharedCacheMap->SharedCacheMapLinks );
|
|
}
|
|
CcReleaseMasterLockFromDpcLevel();
|
|
|
|
//
|
|
// If the number of dirty pages for the Mbcb went to zero, we can reset
|
|
// our hint fields now.
|
|
//
|
|
|
|
if (BitmapRange->DirtyPages == 0) {
|
|
|
|
BitmapRange->FirstDirtyPage = MAXULONG;
|
|
BitmapRange->LastDirtyPage = 0;
|
|
|
|
//
|
|
// Assume this is a large file and that the resume point should
|
|
// be at the beginning of the next range. In all cases if the resume
|
|
// point is set too high, the next resume will just wrap back to 0 anyway.
|
|
//
|
|
|
|
Mbcb->ResumeWritePage = BitmapRange->BasePage + (MBCB_BITMAP_BLOCK_SIZE * 8);
|
|
|
|
//
|
|
// Otherwise we have to update the hint fields.
|
|
//
|
|
|
|
} else {
|
|
|
|
//
|
|
// Advance the first dirty page hint if we can.
|
|
//
|
|
|
|
if (BitmapRange->FirstDirtyPage == OriginalFirstDirtyPage) {
|
|
|
|
BitmapRange->FirstDirtyPage = (ULONG)(FirstDirtyPage - BitmapRange->BasePage) + *Length;
|
|
}
|
|
|
|
//
|
|
// Set to resume the next scan at the next bit for
|
|
// the Lazy Writer.
|
|
//
|
|
|
|
if (TargetLength == 0) {
|
|
|
|
Mbcb->ResumeWritePage = FirstDirtyPage + *Length;
|
|
}
|
|
}
|
|
|
|
//
|
|
// We can save a callback by letting our caller know when
|
|
// we have no more pages to write.
|
|
//
|
|
|
|
if (IsListEmpty(&SharedCacheMap->BcbList)) {
|
|
SharedCacheMap->PagesToWrite = Mbcb->PagesToWrite;
|
|
}
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
|
|
//
|
|
// Now form all of our outputs. We calculated *Length as a page count,
|
|
// but our caller wants it in bytes.
|
|
//
|
|
|
|
*Length <<= PAGE_SHIFT;
|
|
FileOffset->QuadPart = (LONGLONG)FirstDirtyPage << PAGE_SHIFT;
|
|
*FirstBcb = NULL;
|
|
|
|
DebugTrace2(0, me, " <FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
|
|
FileOffset->HighPart );
|
|
DebugTrace( 0, me, " <Length = %08lx\n", *Length );
|
|
DebugTrace(-1, me, "CcAcquireByteRangeForWrite -> TRUE\n", 0 );
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
//
|
|
// We get here if there is no Mbcb or no dirty pages in it. Note that we
|
|
// wouldn't even be here if there were no dirty pages in this SharedCacheMap.
|
|
//
|
|
|
|
//
|
|
// Now point to last Bcb in List, and loop until we hit one of the
|
|
// breaks below or the beginning of the list.
|
|
//
|
|
|
|
Scan_Bcbs:
|
|
|
|
//
|
|
// Use while TRUE to handle case where the current target range wraps
|
|
// (escape is at the bottom).
|
|
//
|
|
|
|
while (TRUE) {
|
|
|
|
Bcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Blink, BCB, BcbLinks );
|
|
|
|
//
|
|
// If we are to resume from a nonzero FileOffset, call CcFindBcb
|
|
// to get a quicker start. This is only useful on files that make
|
|
// use of significant pinned access, of course.
|
|
//
|
|
|
|
if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
|
|
|
|
PLARGE_INTEGER StartingOffset;
|
|
|
|
if (ARGUMENT_PRESENT(TargetOffset)) {
|
|
StartingOffset = TargetOffset;
|
|
} else {
|
|
StartingOffset = (PLARGE_INTEGER)&SharedCacheMap->BeyondLastFlush;
|
|
}
|
|
|
|
if (StartingOffset->QuadPart != 0) {
|
|
|
|
LARGE_INTEGER StartingOffsetBias;
|
|
|
|
StartingOffsetBias.QuadPart = StartingOffset->QuadPart + PAGE_SIZE;
|
|
|
|
//
|
|
// Position ourselves. If we did not find a Bcb for the page, then
|
|
// a lower FileOffset was returned, so we want to move forward one.
|
|
//
|
|
|
|
if (!CcFindBcb( SharedCacheMap,
|
|
StartingOffset,
|
|
&StartingOffsetBias,
|
|
&Bcb )) {
|
|
Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks );
|
|
}
|
|
}
|
|
}
|
|
|
|
while (&Bcb->BcbLinks != &SharedCacheMap->BcbList) {
|
|
|
|
//
|
|
// Skip over this item if it is a listhead.
|
|
//
|
|
|
|
if (Bcb->NodeTypeCode != CACHE_NTC_BCB) {
|
|
|
|
Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks );
|
|
continue;
|
|
}
|
|
|
|
//
|
|
// If we are doing a specified range, then get out if we hit a
|
|
// higher Bcb.
|
|
//
|
|
|
|
if (ARGUMENT_PRESENT(TargetOffset) &&
|
|
((TargetOffset->QuadPart + TargetLength) <= Bcb->FileOffset.QuadPart)) {
|
|
|
|
break;
|
|
}
|
|
|
|
//
|
|
// If we have not started a run, then see if this Bcb is a candidate
|
|
// to start one.
|
|
//
|
|
|
|
if (*Length == 0) {
|
|
|
|
//
|
|
// Else see if the Bcb is dirty, and is in our specified range, if
|
|
// there is one.
|
|
//
|
|
|
|
if (!Bcb->Dirty ||
|
|
(ARGUMENT_PRESENT(TargetOffset) && (TargetOffset->QuadPart >= Bcb->BeyondLastByte.QuadPart)) ||
|
|
(!ARGUMENT_PRESENT(TargetOffset) && (Bcb->FileOffset.QuadPart < SharedCacheMap->BeyondLastFlush))) {
|
|
|
|
Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks );
|
|
continue;
|
|
|
|
}
|
|
|
|
//
|
|
// If we have a candidate dirty page from the mask Bcb, see
|
|
// if it describes a prior range. We must decide to return
|
|
// the first dirty range.
|
|
//
|
|
|
|
if (BcbLookasideCheck && FirstDirtyPage <= (ULONG)(Bcb->FileOffset.QuadPart >> PAGE_SHIFT)) {
|
|
goto Accept_Page;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Else, if we have started a run, then if this guy cannot be
|
|
// appended to the run, then break. Note that we ignore the
|
|
// Bcb's modification time stamp here to simplify the test.
|
|
//
|
|
// If the Bcb is currently pinned, then there is no sense in causing
|
|
// contention, so we will skip over this guy as well.
|
|
//
|
|
// Finally, if the new Bcb is in the next Vacb level, we will skip it
|
|
// to avoid problems with Bcb listheads going away in the middle of
|
|
// CcReleaseByteRangeFromWrite.
|
|
//
|
|
|
|
else {
|
|
if (!Bcb->Dirty || ( Bcb->FileOffset.QuadPart != ( FileOffset->QuadPart + (LONGLONG)*Length)) ||
|
|
(*Length + Bcb->ByteLength > MAX_WRITE_BEHIND) ||
|
|
(Bcb->PinCount != 0) ||
|
|
((Bcb->FileOffset.QuadPart & (VACB_SIZE_OF_FIRST_LEVEL - 1)) == 0)) {
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Increment PinCount to prevent Bcb from going away once the
|
|
// SpinLock is released, or we set it clean for the case where
|
|
// modified write is allowed.
|
|
//
|
|
|
|
Bcb->PinCount += 1;
|
|
|
|
//
|
|
// Release the SpinLock before waiting on the resource.
|
|
//
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
|
|
if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED) &&
|
|
!FlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND)) {
|
|
|
|
//
|
|
// Now acquire the Bcb exclusive, so that we know that nobody
|
|
// has it pinned and thus no one can be modifying the described
|
|
// buffer. To acquire the first Bcb in a run, we can afford
|
|
// to wait, because we are not holding any resources. However
|
|
// if we already have a Bcb, then we better not wait, because
|
|
// someone could have this Bcb pinned, and then wait for the
|
|
// Bcb we already have exclusive.
|
|
//
|
|
// For streams for which we have not disabled modified page
|
|
// writing, we do not need to acquire this resource, and the
|
|
// foreground processing will not be acquiring the Bcb either.
|
|
//
|
|
|
|
if (!ExAcquireResourceExclusiveLite( &Bcb->Resource,
|
|
(BOOLEAN)(*Length == 0) )) {
|
|
|
|
DebugTrace( 0, me, "Could not acquire 2nd Bcb\n", 0 );
|
|
|
|
//
|
|
// Release the Bcb count we took out above. We say
|
|
// ReadOnly = TRUE since we do not own the resource,
|
|
// and SetClean = FALSE because we just want to decement
|
|
// the count.
|
|
//
|
|
|
|
CcUnpinFileData( Bcb, TRUE, UNPIN );
|
|
|
|
//
|
|
// When we leave the loop, we have to have the spin lock
|
|
//
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
break;
|
|
}
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
|
|
//
|
|
// If someone has the file open WriteThrough, then the Bcb may no
|
|
// longer be dirty. If so, call CcUnpinFileData to decrement the
|
|
// PinCount we incremented and free the resource.
|
|
//
|
|
|
|
if (!Bcb->Dirty) {
|
|
|
|
//
|
|
// Release the spinlock so that we can call CcUnpinFileData
|
|
//
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
|
|
CcUnpinFileData( Bcb, FALSE, UNPIN );
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
|
|
//
|
|
// Now if we already have some data we can just break to return
|
|
// it, otherwise we have to restart the scan, since our Bcb
|
|
// may have gone away.
|
|
//
|
|
|
|
if (*Length != 0) {
|
|
break;
|
|
}
|
|
else {
|
|
|
|
Bcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Blink, BCB, BcbLinks );
|
|
continue;
|
|
}
|
|
}
|
|
|
|
//
|
|
// If we are not in the disable modified write mode (normal user data)
|
|
// then we must set the buffer clean before doing the write, since we
|
|
// are unsynchronized with anyone producing dirty data. That way if we,
|
|
// for example, are writing data out while it is actively being changed,
|
|
// at least the changer will mark the buffer dirty afterwards and cause
|
|
// us to write it again later.
|
|
//
|
|
|
|
} else {
|
|
|
|
CcUnpinFileData( Bcb, TRUE, SET_CLEAN );
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
}
|
|
|
|
DebugTrace( 0, me, "Adding Bcb = %08lx to run\n", Bcb );
|
|
|
|
//
|
|
// No matter what, once we've reached this point we are returning
|
|
// a range from the Bcbs.
|
|
//
|
|
|
|
BcbLookasideCheck = FALSE;
|
|
|
|
//
|
|
// Update all of our return values. Note that FirstBcb refers to the
|
|
// FirstBcb in terms of how the Bcb list is ordered. Since the Bcb list
|
|
// is ordered by descending file offsets, FirstBcb will actually return
|
|
// the Bcb with the highest FileOffset.
|
|
//
|
|
|
|
if (*Length == 0) {
|
|
*FileOffset = Bcb->FileOffset;
|
|
}
|
|
*FirstBcb = Bcb;
|
|
*Length += Bcb->ByteLength;
|
|
|
|
//
|
|
// If there is a log file flush callback for this stream, then we must
|
|
// remember the largest Lsn we are about to flush.
|
|
//
|
|
|
|
if ((SharedCacheMap->FlushToLsnRoutine != NULL) &&
|
|
(Bcb->NewestLsn.QuadPart > LsnToFlushTo.QuadPart)) {
|
|
|
|
LsnToFlushTo = Bcb->NewestLsn;
|
|
}
|
|
|
|
Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks );
|
|
}
|
|
|
|
//
|
|
// If we have a candidate dirty page from the mask Bcb, accept it
|
|
// since no Bcb has been found.
|
|
//
|
|
|
|
if (BcbLookasideCheck) {
|
|
|
|
ASSERT( *Length == 0 );
|
|
goto Accept_Page;
|
|
}
|
|
|
|
//
|
|
// If we found something, update our last flush range and reduce
|
|
// PagesToWrite.
|
|
//
|
|
|
|
if (*Length != 0) {
|
|
|
|
//
|
|
// If this is the Lazy Writer, then update BeyondLastFlush and
|
|
// the PagesToWrite target.
|
|
//
|
|
|
|
if (!ARGUMENT_PRESENT(TargetOffset)) {
|
|
|
|
SharedCacheMap->BeyondLastFlush = FileOffset->QuadPart + *Length;
|
|
|
|
if (SharedCacheMap->PagesToWrite > (*Length >> PAGE_SHIFT)) {
|
|
SharedCacheMap->PagesToWrite -= (*Length >> PAGE_SHIFT);
|
|
} else {
|
|
SharedCacheMap->PagesToWrite = 0;
|
|
}
|
|
}
|
|
|
|
break;
|
|
|
|
//
|
|
// Else, if we scanned the entire file, get out - nothing to write now.
|
|
//
|
|
|
|
} else if ((SharedCacheMap->BeyondLastFlush == 0) || ARGUMENT_PRESENT(TargetOffset)) {
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Otherwise, we may have not found anything because there is nothing
|
|
// beyond the last flush. In that case it is time to wrap back to 0
|
|
// and keep scanning.
|
|
//
|
|
|
|
SharedCacheMap->BeyondLastFlush = 0;
|
|
}
|
|
|
|
//
|
|
// Now release the spinlock file while we go off and do the I/O
|
|
//
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
|
|
//
|
|
// If we need to flush to some Lsn, this is the time to do it now
|
|
// that we have found the largest Lsn and freed the spin lock.
|
|
//
|
|
|
|
if (LsnToFlushTo.QuadPart != 0) {
|
|
|
|
try {
|
|
|
|
(*SharedCacheMap->FlushToLsnRoutine) ( SharedCacheMap->LogHandle,
|
|
LsnToFlushTo );
|
|
} except( CcExceptionFilter( GetExceptionCode() )) {
|
|
|
|
//
|
|
// If there was an error, it will be raised. We cannot
|
|
// write anything until we successfully flush the log
|
|
// file, so we will release everything here and just
|
|
// return with 0 bytes.
|
|
//
|
|
|
|
LARGE_INTEGER LastOffset;
|
|
PBCB NextBcb;
|
|
|
|
//
|
|
// Now loop to free up all of the Bcbs. Set the time
|
|
// stamps to 0, so that we are guaranteed to try to
|
|
// flush them again on the next sweep.
|
|
//
|
|
|
|
do {
|
|
NextBcb = CONTAINING_RECORD( (*FirstBcb)->BcbLinks.Flink, BCB, BcbLinks );
|
|
|
|
//
|
|
// Skip over any listheads.
|
|
//
|
|
|
|
if ((*FirstBcb)->NodeTypeCode == CACHE_NTC_BCB) {
|
|
|
|
LastOffset = (*FirstBcb)->FileOffset;
|
|
|
|
CcUnpinFileData( *FirstBcb,
|
|
BooleanFlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND),
|
|
UNPIN );
|
|
}
|
|
|
|
*FirstBcb = NextBcb;
|
|
} while (FileOffset->QuadPart != LastOffset.QuadPart);
|
|
|
|
//
|
|
// Show we did not acquire anything.
|
|
//
|
|
|
|
*Length = 0;
|
|
}
|
|
}
|
|
|
|
//
|
|
// If we got anything, return TRUE.
|
|
//
|
|
|
|
DebugTrace2(0, me, " <FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
|
|
FileOffset->HighPart );
|
|
DebugTrace( 0, me, " <Length = %08lx\n", *Length );
|
|
DebugTrace(-1, me, "CcAcquireByteRangeForWrite -> %02lx\n", *Length != 0 );
|
|
|
|
return ((BOOLEAN)(*Length != 0));
|
|
}
|
|
|
|
|
|
//
|
|
// Internal Support Routine
|
|
//
|
|
|
|
VOID
|
|
CcReleaseByteRangeFromWrite (
|
|
IN PSHARED_CACHE_MAP SharedCacheMap,
|
|
IN PLARGE_INTEGER FileOffset,
|
|
IN ULONG Length,
|
|
IN PBCB FirstBcb,
|
|
IN BOOLEAN VerifyRequired
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is called by the Lazy Writer to free a range of bytes and
|
|
clear all dirty bits, for a byte range returned by CcAcquireByteRangeForWrite.
|
|
|
|
Arguments:
|
|
|
|
SharedCacheMap - As supplied to CcAcquireByteRangeForWrite
|
|
|
|
FileOffset - As returned from CcAcquireByteRangeForWrite
|
|
|
|
Length - As returned from CcAcquirebyteRangeForWrite
|
|
|
|
FirstBcb - As returned from CcAcquireByteRangeForWrite
|
|
|
|
VerifyRequired - supplied as TRUE if a verify required error was received.
|
|
In this case we must mark/leave the data dirty so that
|
|
we will try to write it again.
|
|
|
|
Return Value:
|
|
|
|
None
|
|
|
|
--*/
|
|
|
|
{
|
|
LARGE_INTEGER LastOffset;
|
|
PBCB NextBcb;
|
|
|
|
DebugTrace(+1, me, "CcReleaseByteRangeFromWrite:\n", 0);
|
|
DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
|
|
FileOffset->HighPart );
|
|
|
|
//
|
|
// If it is a mask Mbcb we are getting, then we only have to check
|
|
// for VerifyRequired.
|
|
//
|
|
|
|
if (FirstBcb == NULL) {
|
|
|
|
ASSERT(Length != 0);
|
|
|
|
if (VerifyRequired) {
|
|
CcSetDirtyInMask( SharedCacheMap, FileOffset, Length );
|
|
}
|
|
|
|
DebugTrace(-1, me, "CcReleaseByteRangeFromWrite -> VOID\n", 0);
|
|
|
|
return;
|
|
}
|
|
|
|
//
|
|
// PREfix correctly notes that if the caller gives us a listhead to start with,
|
|
// we will not have filled in LastOffset by the time we do our first loop test.
|
|
// For PREfix's benefit (and ours), assert we really are starting with a Bcb.
|
|
//
|
|
|
|
ASSERT( FirstBcb->NodeTypeCode == CACHE_NTC_BCB );
|
|
|
|
//
|
|
// Now loop to free up all of the Bcbs. If modified writing is disabled
|
|
// for each Bcb, then we are to set it clean here, since we are synchronized
|
|
// with callers who set the data dirty. Otherwise we only have the Bcb pinned
|
|
// so it will not go away, and we only unpin it here.
|
|
//
|
|
|
|
do {
|
|
NextBcb = CONTAINING_RECORD( FirstBcb->BcbLinks.Flink, BCB, BcbLinks );
|
|
|
|
//
|
|
// Skip over any listheads.
|
|
//
|
|
|
|
if (FirstBcb->NodeTypeCode == CACHE_NTC_BCB) {
|
|
|
|
LastOffset = FirstBcb->FileOffset;
|
|
|
|
//
|
|
// If this is file system metadata (we disabled modified writing),
|
|
// then this is the time to mark the buffer clean, so long as we
|
|
// did not get verify required.
|
|
//
|
|
|
|
if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
|
|
|
|
CcUnpinFileData( FirstBcb,
|
|
BooleanFlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND),
|
|
SET_CLEAN );
|
|
}
|
|
|
|
//
|
|
// If we got verify required, we have to mark the buffer dirty again
|
|
// so we will try again later. Note we have to make this call again
|
|
// to make sure the right thing happens with time stamps.
|
|
//
|
|
|
|
if (VerifyRequired) {
|
|
CcSetDirtyPinnedData( FirstBcb, NULL );
|
|
}
|
|
|
|
//
|
|
// Finally remove a pin count left over from CcAcquireByteRangeForWrite.
|
|
//
|
|
|
|
CcUnpinFileData( FirstBcb, TRUE, UNPIN );
|
|
}
|
|
|
|
FirstBcb = NextBcb;
|
|
} while (FileOffset->QuadPart != LastOffset.QuadPart);
|
|
|
|
DebugTrace(-1, me, "CcReleaseByteRangeFromWrite -> VOID\n", 0);
|
|
}
|
|
|
|
|
|
//
|
|
// Internal Support Routine
|
|
//
|
|
|
|
VOID
|
|
FASTCALL
|
|
CcWriteBehind (
|
|
IN PSHARED_CACHE_MAP SharedCacheMap,
|
|
IN PIO_STATUS_BLOCK IoStatus
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine may be called with Wait = FALSE to see if write behind
|
|
is required, or with Wait = TRUE to perform write behind as required.
|
|
|
|
The code is very similar to the the code that the Lazy Writer performs
|
|
for each SharedCacheMap. The main difference is in the call to
|
|
CcAcquireByteRangeForWrite. Write Behind does not care about time
|
|
stamps (passing ULONG to accept all time stamps), but it will never
|
|
dump the first (highest byte offset) buffer in the list if the last
|
|
byte of that buffer is not yet written. The Lazy Writer does exactly
|
|
the opposite, in the sense that it is totally time-driven, and will
|
|
even dump a partially modified buffer if it sits around long enough.
|
|
|
|
Arguments:
|
|
|
|
SharedCacheMap - Pointer to SharedCacheMap to be written
|
|
|
|
Return Value:
|
|
|
|
FALSE - if write behind is required, but the caller supplied
|
|
Wait = FALSE
|
|
|
|
TRUE - if write behind is complete or not required
|
|
|
|
--*/
|
|
|
|
{
|
|
KLOCK_QUEUE_HANDLE LockHandle;
|
|
ULONG ActivePage;
|
|
ULONG PageIsDirty;
|
|
PMBCB Mbcb;
|
|
NTSTATUS Status;
|
|
PVACB ActiveVacb = NULL;
|
|
|
|
DebugTrace(+1, me, "CcWriteBehind\n", 0 );
|
|
DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap );
|
|
|
|
//
|
|
// First we have to acquire the file for LazyWrite, to avoid
|
|
// deadlocking with writers to the file. We do this via the
|
|
// CallBack procedure specified to CcInitializeCacheMap.
|
|
//
|
|
|
|
if (!(*SharedCacheMap->Callbacks->AcquireForLazyWrite)
|
|
( SharedCacheMap->LazyWriteContext, TRUE )) {
|
|
|
|
//
|
|
// The filesystem is hinting that it doesn't think that it can
|
|
// service the write without significant delay so we will defer
|
|
// and come back later. Simply drop the queued flag ... note that
|
|
// we do not modify CcPagesYetToWrite, in the hope that we can make
|
|
// up the difference in some other cache map on this pass.
|
|
//
|
|
|
|
CcAcquireMasterLock( &LockHandle.OldIrql );
|
|
ClearFlag(SharedCacheMap->Flags, WRITE_QUEUED);
|
|
CcReleaseMasterLock( LockHandle.OldIrql );
|
|
|
|
IoStatus->Status = STATUS_FILE_LOCK_CONFLICT;
|
|
return;
|
|
}
|
|
|
|
//
|
|
// See if there is a previous active page to clean up, but only
|
|
// do so now if it is the last dirty page or no users have the
|
|
// file open. We will free it below after dropping the spinlock.
|
|
//
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
CcAcquireMasterLockAtDpcLevel();
|
|
|
|
if ((SharedCacheMap->DirtyPages <= 1) || (SharedCacheMap->OpenCount == 0)) {
|
|
GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
|
|
}
|
|
|
|
//
|
|
// Increment open count so that our caller's views stay available
|
|
// for CcGetVacbMiss. We could be tying up all of the views, and
|
|
// still need to write file sizes.
|
|
//
|
|
|
|
CcIncrementOpenCount( SharedCacheMap, 'brWS' );
|
|
|
|
//
|
|
// If there is a mask bcb, then we need to establish a target for
|
|
// it to flush.
|
|
//
|
|
|
|
if ((Mbcb = SharedCacheMap->Mbcb) != 0) {
|
|
|
|
//
|
|
// Set a target of pages to write, assuming that any Active
|
|
// Vacb will increase the number.
|
|
//
|
|
|
|
Mbcb->PagesToWrite = Mbcb->DirtyPages + ((ActiveVacb != NULL) ? 1 : 0);
|
|
|
|
if (Mbcb->PagesToWrite > CcPagesYetToWrite) {
|
|
|
|
Mbcb->PagesToWrite = CcPagesYetToWrite;
|
|
}
|
|
}
|
|
|
|
CcReleaseMasterLockFromDpcLevel();
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
|
|
//
|
|
// Now free the active Vacb, if we found one.
|
|
//
|
|
|
|
if (ActiveVacb != NULL) {
|
|
|
|
CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
|
|
}
|
|
|
|
//
|
|
// Now perform the lazy writing for this file via a special call
|
|
// to CcFlushCache. He recognizes us by the &CcNoDelay input to
|
|
// FileOffset, which signifies a Lazy Write, but is subsequently
|
|
// ignored.
|
|
//
|
|
|
|
CcFlushCache( SharedCacheMap->FileObject->SectionObjectPointer,
|
|
&CcNoDelay,
|
|
1,
|
|
IoStatus );
|
|
|
|
//
|
|
// No need for the Lazy Write resource now.
|
|
//
|
|
|
|
(*SharedCacheMap->Callbacks->ReleaseFromLazyWrite)
|
|
( SharedCacheMap->LazyWriteContext );
|
|
|
|
//
|
|
// Check if we need to put up a popup.
|
|
//
|
|
|
|
if (!NT_SUCCESS(IoStatus->Status) && !RetryError(IoStatus->Status)) {
|
|
|
|
//
|
|
// We lost writebehind data. Bemoan our fate into the system event
|
|
// log and throw a popup with a meaningful name to the desktop.
|
|
//
|
|
|
|
POBJECT_NAME_INFORMATION FileNameInfo = NULL;
|
|
NTSTATUS Status;
|
|
|
|
//
|
|
// Increment the count of how many of these we've had. This counter
|
|
// is useful in attempting to discriminate some corruption cases under
|
|
// test.
|
|
//
|
|
|
|
CcLostDelayedWrites += 1;
|
|
|
|
Status = IoQueryFileDosDeviceName( SharedCacheMap->FileObject, &FileNameInfo );
|
|
|
|
if ( Status == STATUS_SUCCESS ) {
|
|
IoRaiseInformationalHardError( STATUS_LOST_WRITEBEHIND_DATA, &FileNameInfo->Name, NULL );
|
|
|
|
} else {
|
|
if ( SharedCacheMap->FileObject->FileName.Length &&
|
|
SharedCacheMap->FileObject->FileName.MaximumLength &&
|
|
SharedCacheMap->FileObject->FileName.Buffer ) {
|
|
|
|
IoRaiseInformationalHardError( STATUS_LOST_WRITEBEHIND_DATA, &SharedCacheMap->FileObject->FileName, NULL );
|
|
}
|
|
}
|
|
|
|
CcLogError( SharedCacheMap->FileObject,
|
|
( Status == STATUS_SUCCESS ?
|
|
&FileNameInfo->Name :
|
|
&SharedCacheMap->FileObject->FileName ),
|
|
IO_LOST_DELAYED_WRITE,
|
|
IoStatus->Status,
|
|
IRP_MJ_WRITE );
|
|
|
|
if (FileNameInfo) {
|
|
ExFreePool(FileNameInfo);
|
|
}
|
|
|
|
//
|
|
// See if there is any deferred writes we can post.
|
|
//
|
|
|
|
} else if (!IsListEmpty(&CcDeferredWrites)) {
|
|
CcPostDeferredWrites();
|
|
}
|
|
|
|
//
|
|
// Now acquire BcbSpinLock again to check for ValidData updates.
|
|
//
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
|
|
//
|
|
// If the the current ValidDataGoal is greater (or equal) than ValidDataLength,
|
|
// then we must see if we have advanced beyond the current ValidDataLength.
|
|
//
|
|
// If we have NEVER written anything out from this shared cache map, then
|
|
// there is no need to check anything associtated with valid data length
|
|
// here. We will come by here again when, and if, anybody actually
|
|
// modifies the file and we lazy write some data.
|
|
//
|
|
|
|
Status = STATUS_SUCCESS;
|
|
if (FlagOn(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED) &&
|
|
(SharedCacheMap->ValidDataGoal.QuadPart >= SharedCacheMap->ValidDataLength.QuadPart) &&
|
|
(SharedCacheMap->ValidDataLength.QuadPart != MAXLONGLONG) &&
|
|
(SharedCacheMap->FileSize.QuadPart != 0)) {
|
|
|
|
LARGE_INTEGER NewValidDataLength;
|
|
|
|
NewValidDataLength = CcGetFlushedValidData( SharedCacheMap->FileObject->SectionObjectPointer,
|
|
TRUE );
|
|
|
|
//
|
|
// If New ValidDataLength has been written, then we have to
|
|
// call the file system back to update it. We must temporarily
|
|
// drop our global list while we do this, which is safe to do since
|
|
// we have not cleared WRITE_QUEUED.
|
|
//
|
|
// Note we keep calling any time we wrote the last page of the file,
|
|
// to solve the "famous" AFS Server problem. The file system will
|
|
// truncate our valid data call to whatever is currently valid. But
|
|
// then if he writes a little more, we do not want to stop calling
|
|
// back.
|
|
//
|
|
|
|
if ( NewValidDataLength.QuadPart >= SharedCacheMap->ValidDataLength.QuadPart ) {
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
|
|
//
|
|
// Call file system to set new valid data. We have no
|
|
// one to tell if this doesn't work.
|
|
//
|
|
|
|
Status = CcSetValidData( SharedCacheMap->FileObject,
|
|
&NewValidDataLength );
|
|
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
if (NT_SUCCESS(Status)) {
|
|
SharedCacheMap->ValidDataLength = NewValidDataLength;
|
|
#ifdef TOMM
|
|
} else if ((Status != STATUS_INSUFFICIENT_RESOURCES) && !RetryError(Status)) {
|
|
DbgPrint("Unexpected status from CcSetValidData: %08lx, FileObject: %08lx\n",
|
|
Status,
|
|
SharedCacheMap->FileObject);
|
|
DbgBreakPoint();
|
|
#endif TOMM
|
|
}
|
|
}
|
|
}
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
|
|
//
|
|
// Show we are done.
|
|
//
|
|
|
|
CcAcquireMasterLock( &LockHandle.OldIrql );
|
|
CcDecrementOpenCount( SharedCacheMap, 'brWF' );
|
|
|
|
//
|
|
// Make an approximate guess about whether we will call CcDeleteSharedCacheMap or not
|
|
// to truncate the file.
|
|
//
|
|
// Also do not delete the SharedCacheMap if we got an error on the ValidDataLength
|
|
// callback. If we get a resource allocation failure or a retryable error (due to
|
|
// log file full?), we have no one to tell, so we must just loop back and try again.
|
|
// Of course all I/O errors are just too bad.
|
|
//
|
|
|
|
if ((SharedCacheMap->OpenCount == 0)
|
|
|
|
&&
|
|
|
|
(NT_SUCCESS(Status) || ((Status != STATUS_INSUFFICIENT_RESOURCES) && !RetryError(Status)))) {
|
|
|
|
CcReleaseMasterLock( LockHandle.OldIrql );
|
|
FsRtlAcquireFileExclusive( SharedCacheMap->FileObject );
|
|
CcAcquireMasterLock( &LockHandle.OldIrql );
|
|
|
|
//
|
|
// Now really see if we are to delete this SharedCacheMap. By having released
|
|
// first we avoid a deadlock with the file system when the FileObject is
|
|
// dereferenced. Note that CcDeleteSharedCacheMap requires that the
|
|
// CcMasterSpinLock already be acquired, and it releases it.
|
|
//
|
|
// Note that we must retest since we dropped and reacquired the master
|
|
// lock.
|
|
//
|
|
|
|
if ((SharedCacheMap->OpenCount == 0)
|
|
|
|
&&
|
|
|
|
((SharedCacheMap->DirtyPages == 0) || ((SharedCacheMap->FileSize.QuadPart == 0) &&
|
|
!FlagOn(SharedCacheMap->Flags, PIN_ACCESS)))) {
|
|
|
|
//
|
|
// Make sure to drop the requeue flag in case the write hit the timeout at
|
|
// the same time it finished everything up.
|
|
//
|
|
|
|
CcDeleteSharedCacheMap( SharedCacheMap, LockHandle.OldIrql, TRUE );
|
|
IoStatus->Information = 0;
|
|
SharedCacheMap = NULL;
|
|
|
|
} else {
|
|
|
|
CcReleaseMasterLock( LockHandle.OldIrql );
|
|
FsRtlReleaseFile( SharedCacheMap->FileObject );
|
|
CcAcquireMasterLock( &LockHandle.OldIrql );
|
|
}
|
|
}
|
|
|
|
//
|
|
// In the normal case, we just clear the flag on the way out if
|
|
// we will not requeue the workitem.
|
|
//
|
|
|
|
if (SharedCacheMap != NULL) {
|
|
|
|
if (IoStatus->Information != CC_REQUEUE) {
|
|
ClearFlag(SharedCacheMap->Flags, WRITE_QUEUED);
|
|
}
|
|
CcReleaseMasterLock( LockHandle.OldIrql );
|
|
}
|
|
|
|
DebugTrace(-1, me, "CcWriteBehind->VOID\n", 0 );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
LARGE_INTEGER
|
|
CcGetFlushedValidData (
|
|
IN PSECTION_OBJECT_POINTERS SectionObjectPointer,
|
|
IN BOOLEAN CcInternalCaller
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine may be called by a file system to find out how far the Cache Manager
|
|
has flushed in the stream. More accurately, this routine returns either the FileOffset
|
|
of the lowest dirty page currently in the file.
|
|
|
|
NOTE that even though the routine takes SectionObjectPointer, the caller must insure
|
|
that the stream is cached and stays cached for the duration of this routine, much like
|
|
for the copy routines, etc.
|
|
|
|
Arguments:
|
|
|
|
SectionObjectPointer - A pointer to the Section Object Pointers
|
|
structure in the nonpaged Fcb.
|
|
|
|
CcInternalCaller - must be TRUE if the caller is coming from Cc, FALSE otherwise.
|
|
TRUE imples the need for self-synchronization.
|
|
|
|
Return Value:
|
|
|
|
The derived number for flushed ValidData, or MAXLONGLONG in the quad part if
|
|
the Section is not cached. (Naturally the caller can guarantee that this case
|
|
does not occur, and internal callers do.)
|
|
|
|
--*/
|
|
|
|
{
|
|
PSHARED_CACHE_MAP SharedCacheMap;
|
|
KLOCK_QUEUE_HANDLE LockHandle;
|
|
LARGE_INTEGER NewValidDataLength;
|
|
|
|
//
|
|
// External callers may be unsynchronized with this shared cache map
|
|
// perhaps going away underneath this call. NTFS and his
|
|
// pair of streams for compression-on-the-wire is a good example of
|
|
// someone who may be synchronized in one stream but needs to peek at
|
|
// the other.
|
|
//
|
|
|
|
if (!CcInternalCaller) {
|
|
|
|
CcAcquireMasterLock( &LockHandle.OldIrql );
|
|
|
|
SharedCacheMap = SectionObjectPointer->SharedCacheMap;
|
|
|
|
if (SharedCacheMap == NULL) {
|
|
CcReleaseMasterLock( LockHandle.OldIrql );
|
|
NewValidDataLength.QuadPart = MAXLONGLONG;
|
|
return NewValidDataLength;
|
|
}
|
|
|
|
CcIncrementOpenCount( SharedCacheMap, 'dfGS' );
|
|
CcReleaseMasterLock( LockHandle.OldIrql );
|
|
KeAcquireInStackQueuedSpinLock( &SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
|
|
} else {
|
|
|
|
SharedCacheMap = SectionObjectPointer->SharedCacheMap;
|
|
}
|
|
|
|
ASSERT( SharedCacheMap != NULL );
|
|
|
|
//
|
|
// If the file is entirely clean, then we wish to return
|
|
// the new ValidDataLength as equal to ValidDataGoal.
|
|
//
|
|
|
|
NewValidDataLength = SharedCacheMap->ValidDataGoal;
|
|
|
|
//
|
|
// If there may be dirty pages we will look at the last Bcb in the
|
|
// descending-order Bcb list, and see if it describes data beyond
|
|
// ValidDataGoal.
|
|
//
|
|
// It is important to note that since we use DirtyPages as a faux
|
|
// reference count over some short windows (+1, -1) the simple
|
|
// fact it is nonzero does *not* mean the file is dirty.
|
|
//
|
|
// (This test is logically too conservative. For example, the last Bcb
|
|
// may not even be dirty (in which case we should look at its
|
|
// predecessor), or we may have earlier written valid data to this
|
|
// byte range (which also means if we knew this we could look at
|
|
// the predessor). This simply means that the Lazy Writer may not
|
|
// successfully get ValidDataLength updated in a file being randomly
|
|
// accessed until the level of file access dies down, or at the latest
|
|
// until the file is closed. However, security will never be
|
|
// compromised.)
|
|
//
|
|
|
|
if (SharedCacheMap->DirtyPages) {
|
|
|
|
PBITMAP_RANGE BitmapRange;
|
|
PBCB LastBcb;
|
|
PMBCB Mbcb = SharedCacheMap->Mbcb;
|
|
|
|
if ((Mbcb != NULL) && (Mbcb->DirtyPages != 0)) {
|
|
|
|
BitmapRange = CcFindBitmapRangeToClean( Mbcb, 0 );
|
|
|
|
ASSERT(BitmapRange->FirstDirtyPage != MAXULONG);
|
|
|
|
NewValidDataLength.QuadPart = (BitmapRange->BasePage + BitmapRange->FirstDirtyPage)
|
|
<< PAGE_SHIFT;
|
|
}
|
|
|
|
LastBcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Flink,
|
|
BCB,
|
|
BcbLinks );
|
|
|
|
while (&LastBcb->BcbLinks != &SharedCacheMap->BcbList) {
|
|
|
|
if ((LastBcb->NodeTypeCode == CACHE_NTC_BCB) && LastBcb->Dirty) {
|
|
break;
|
|
}
|
|
|
|
LastBcb = CONTAINING_RECORD( LastBcb->BcbLinks.Flink,
|
|
BCB,
|
|
BcbLinks );
|
|
}
|
|
|
|
//
|
|
// Check the Base of the last entry.
|
|
//
|
|
|
|
if ((&LastBcb->BcbLinks != &SharedCacheMap->BcbList) &&
|
|
(LastBcb->FileOffset.QuadPart < NewValidDataLength.QuadPart )) {
|
|
|
|
NewValidDataLength = LastBcb->FileOffset;
|
|
}
|
|
}
|
|
|
|
if (!CcInternalCaller) {
|
|
|
|
//
|
|
// Remove our reference.
|
|
//
|
|
|
|
CcAcquireMasterLockAtDpcLevel();
|
|
CcDecrementOpenCount( SharedCacheMap, 'dfGF' );
|
|
|
|
if ((SharedCacheMap->OpenCount == 0) &&
|
|
!FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) &&
|
|
(SharedCacheMap->DirtyPages == 0)) {
|
|
|
|
//
|
|
// Move to the dirty list.
|
|
//
|
|
|
|
RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
|
|
InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
|
|
&SharedCacheMap->SharedCacheMapLinks );
|
|
|
|
//
|
|
// Make sure the Lazy Writer will wake up, because we
|
|
// want him to delete this SharedCacheMap.
|
|
//
|
|
|
|
LazyWriter.OtherWork = TRUE;
|
|
if (!LazyWriter.ScanActive) {
|
|
CcScheduleLazyWriteScan( FALSE );
|
|
}
|
|
}
|
|
|
|
KeReleaseInStackQueuedSpinLockFromDpcLevel( &LockHandle );
|
|
CcReleaseMasterLock( LockHandle.OldIrql );
|
|
}
|
|
|
|
return NewValidDataLength;
|
|
}
|
|
|
|
|
|
VOID
|
|
CcFlushCache (
|
|
IN PSECTION_OBJECT_POINTERS SectionObjectPointer,
|
|
IN PLARGE_INTEGER FileOffset OPTIONAL,
|
|
IN ULONG Length,
|
|
OUT PIO_STATUS_BLOCK IoStatus OPTIONAL
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine may be called to flush dirty data from the cache to the
|
|
cached file on disk. Any byte range within the file may be flushed,
|
|
or the entire file may be flushed by omitting the FileOffset parameter.
|
|
|
|
This routine does not take a Wait parameter; the caller should assume
|
|
that it will always block.
|
|
|
|
Arguments:
|
|
|
|
SectionObjectPointer - A pointer to the Section Object Pointers
|
|
structure in the nonpaged Fcb.
|
|
|
|
FileOffset - If this parameter is supplied (not NULL), then only the
|
|
byte range specified by FileOffset and Length are flushed.
|
|
If &CcNoDelay is specified, then this signifies the call
|
|
from the Lazy Writer, and the lazy write scan should resume
|
|
as normal from the last spot where it left off in the file.
|
|
|
|
Length - Defines the length of the byte range to flush, starting at
|
|
FileOffset. This parameter is ignored if FileOffset is
|
|
specified as NULL.
|
|
|
|
IoStatus - The I/O status resulting from the flush operation.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
|
|
{
|
|
LARGE_INTEGER NextFileOffset, TargetOffset;
|
|
ULONG NextLength;
|
|
PBCB FirstBcb;
|
|
KIRQL OldIrql;
|
|
PSHARED_CACHE_MAP SharedCacheMap;
|
|
IO_STATUS_BLOCK TrashStatus;
|
|
PVOID TempVa;
|
|
ULONG RemainingLength, TempLength;
|
|
NTSTATUS PopupStatus;
|
|
LOGICAL HotSpot;
|
|
ULONG BytesWritten = 0;
|
|
LOGICAL PopupRequired = FALSE;
|
|
LOGICAL VerifyRequired = FALSE;
|
|
LOGICAL IsLazyWriter = FALSE;
|
|
LOGICAL FreeActiveVacb = FALSE;
|
|
PVACB ActiveVacb = NULL;
|
|
NTSTATUS Status = STATUS_SUCCESS;
|
|
LARGE_INTEGER EndTick, CurrentTick;
|
|
|
|
DebugTrace(+1, me, "CcFlushCache:\n", 0 );
|
|
DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n", SectionObjectPointer );
|
|
DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n",
|
|
ARGUMENT_PRESENT(FileOffset) ? FileOffset->LowPart
|
|
: 0,
|
|
ARGUMENT_PRESENT(FileOffset) ? FileOffset->HighPart
|
|
: 0 );
|
|
DebugTrace( 0, me, " Length = %08lx\n", Length );
|
|
|
|
//
|
|
// If IoStatus passed a Null pointer, set up to through status away.
|
|
//
|
|
|
|
if (!ARGUMENT_PRESENT(IoStatus)) {
|
|
IoStatus = &TrashStatus;
|
|
}
|
|
IoStatus->Status = STATUS_SUCCESS;
|
|
IoStatus->Information = 0;
|
|
|
|
//
|
|
// See if this is the Lazy Writer. Since he wants to use this common
|
|
// routine, which is also a public routine callable by file systems,
|
|
// the Lazy Writer shows his call by specifying CcNoDelay as the file offset!
|
|
//
|
|
// Also, in case we do not write anything because we see only HotSpot(s),
|
|
// initialize the Status to indicate a retryable error, so CcWorkerThread
|
|
// knows we did not make any progress. Of course any actual flush will
|
|
// overwrite this code.
|
|
//
|
|
|
|
if (FileOffset == &CcNoDelay) {
|
|
IoStatus->Status = STATUS_VERIFY_REQUIRED;
|
|
IsLazyWriter = TRUE;
|
|
FileOffset = NULL;
|
|
}
|
|
|
|
CcAcquireMasterLock( &OldIrql );
|
|
|
|
SharedCacheMap = SectionObjectPointer->SharedCacheMap;
|
|
|
|
//
|
|
// Awareness is indicated by the lowbit of the FileOffset pointer.
|
|
// Non-awareness of a private write stream results in a no-op.
|
|
//
|
|
|
|
if ((SharedCacheMap != NULL) && FlagOn( SharedCacheMap->Flags, PRIVATE_WRITE )) {
|
|
|
|
if (((ULONG_PTR)FileOffset & 1) == 0) {
|
|
|
|
CcReleaseMasterLock( OldIrql );
|
|
return;
|
|
|
|
}
|
|
|
|
FileOffset = (PLARGE_INTEGER)((ULONG_PTR)FileOffset ^ 1);
|
|
|
|
}
|
|
|
|
//
|
|
// If there is nothing to do, return here.
|
|
//
|
|
|
|
if (ARGUMENT_PRESENT(FileOffset) && (Length == 0)) {
|
|
|
|
CcReleaseMasterLock( OldIrql );
|
|
DebugTrace(-1, me, "CcFlushCache -> VOID\n", 0 );
|
|
return;
|
|
}
|
|
|
|
//
|
|
// See if the file is cached.
|
|
//
|
|
|
|
if (SharedCacheMap != NULL) {
|
|
|
|
//
|
|
// Increment the open count to keep it from going away.
|
|
//
|
|
|
|
CcIncrementOpenCount( SharedCacheMap, 'fcCS' );
|
|
|
|
if ((SharedCacheMap->NeedToZero != NULL) || (SharedCacheMap->ActiveVacb != NULL)) {
|
|
|
|
ULONG FirstPage = 0;
|
|
ULONG LastPage = MAXULONG;
|
|
|
|
if (ARGUMENT_PRESENT(FileOffset)) {
|
|
|
|
FirstPage = (ULONG)(FileOffset->QuadPart >> PAGE_SHIFT);
|
|
LastPage = (ULONG)((FileOffset->QuadPart + Length - 1) >> PAGE_SHIFT);
|
|
}
|
|
|
|
//
|
|
// Make sure we do not flush the active page without zeroing any
|
|
// uninitialized data. Also, it is very important to free the active
|
|
// page if it is the one to be flushed, so that we get the dirty
|
|
// bit out to the Pfn.
|
|
//
|
|
|
|
if (((((LONGLONG)LastPage + 1) << PAGE_SHIFT) > SharedCacheMap->ValidDataGoal.QuadPart) ||
|
|
|
|
((SharedCacheMap->NeedToZero != NULL) &&
|
|
(FirstPage <= SharedCacheMap->NeedToZeroPage) &&
|
|
(LastPage >= SharedCacheMap->NeedToZeroPage)) ||
|
|
|
|
((SharedCacheMap->ActiveVacb != NULL) &&
|
|
(FirstPage <= SharedCacheMap->ActivePage) &&
|
|
(LastPage >= SharedCacheMap->ActivePage))) {
|
|
|
|
GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, RemainingLength, TempLength );
|
|
FreeActiveVacb = TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
CcReleaseMasterLock( OldIrql );
|
|
|
|
if (FreeActiveVacb) {
|
|
CcFreeActiveVacb( SharedCacheMap, ActiveVacb, RemainingLength, TempLength );
|
|
}
|
|
|
|
//
|
|
// If there is a user-mapped file, then we perform the "service" of
|
|
// flushing even data not written via the file system. Note that this
|
|
// is pretty important for folks provoking the flush/purge of a coherency
|
|
// operation.
|
|
//
|
|
// It is critical this happen before we examine our own hints. In the course
|
|
// of this flush it is possible valid data length will be advanced by the
|
|
// underlying filesystem, with CcZero'ing behind - which will cause us to
|
|
// make some dirty zeroes in the cache. Syscache bug! Note how coherency
|
|
// flushing works ...
|
|
//
|
|
|
|
if ((SharedCacheMap == NULL)
|
|
|
|
||
|
|
|
|
FlagOn(((PFSRTL_COMMON_FCB_HEADER)(SharedCacheMap->FileObject->FsContext))->Flags,
|
|
FSRTL_FLAG_USER_MAPPED_FILE) && !IsLazyWriter) {
|
|
|
|
//
|
|
// Call MM to flush the section through our view.
|
|
//
|
|
|
|
DebugTrace( 0, mm, "MmFlushSection:\n", 0 );
|
|
DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n", SectionObjectPointer );
|
|
DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n",
|
|
ARGUMENT_PRESENT(FileOffset) ? FileOffset->LowPart
|
|
: 0,
|
|
ARGUMENT_PRESENT(FileOffset) ? FileOffset->HighPart
|
|
: 0 );
|
|
DebugTrace( 0, mm, " RegionSize = %08lx\n", Length );
|
|
|
|
Status = MmFlushSection( SectionObjectPointer,
|
|
FileOffset,
|
|
Length,
|
|
IoStatus,
|
|
TRUE );
|
|
|
|
if ((!NT_SUCCESS(IoStatus->Status)) && !RetryError(IoStatus->Status)) {
|
|
|
|
PopupRequired = TRUE;
|
|
PopupStatus = IoStatus->Status;
|
|
}
|
|
|
|
DebugTrace2(0, mm, " <IoStatus = %08lx, %08lx\n",
|
|
IoStatus->Status, IoStatus->Information );
|
|
}
|
|
|
|
//
|
|
// Scan for dirty pages if there is a shared cache map.
|
|
//
|
|
|
|
if (SharedCacheMap != NULL) {
|
|
|
|
//
|
|
// If FileOffset was not specified then set to flush entire region
|
|
// and set valid data length to the goal so that we will not get
|
|
// any more call backs.
|
|
//
|
|
|
|
if (!IsLazyWriter && !ARGUMENT_PRESENT(FileOffset)) {
|
|
|
|
SharedCacheMap->ValidDataLength = SharedCacheMap->ValidDataGoal;
|
|
}
|
|
|
|
//
|
|
// If this is an explicit flush, initialize our offset to scan for.
|
|
//
|
|
|
|
if (ARGUMENT_PRESENT(FileOffset)) {
|
|
TargetOffset = *FileOffset;
|
|
}
|
|
|
|
//
|
|
// Assume we want to pass the explicit flush flag in Length.
|
|
// But overwrite it if a length really was specified. On
|
|
// subsequent loops, NextLength will have some nonzero value.
|
|
//
|
|
|
|
NextLength = 1;
|
|
if (Length != 0) {
|
|
NextLength = Length;
|
|
}
|
|
|
|
//
|
|
// Now calculate the tick that will signal the expiration of a
|
|
// lazy writer tick interval.
|
|
//
|
|
|
|
if (IsLazyWriter) {
|
|
|
|
KeQueryTickCount( &EndTick );
|
|
EndTick.QuadPart += CcIdleDelayTick;
|
|
}
|
|
|
|
//
|
|
// Loop as long as we find buffers to flush for this
|
|
// SharedCacheMap, and we are not trying to delete the guy.
|
|
//
|
|
|
|
while (((SharedCacheMap->PagesToWrite != 0) || !IsLazyWriter)
|
|
|
|
&&
|
|
((SharedCacheMap->FileSize.QuadPart != 0) ||
|
|
FlagOn(SharedCacheMap->Flags, PIN_ACCESS))
|
|
|
|
&&
|
|
|
|
!VerifyRequired
|
|
|
|
&&
|
|
|
|
CcAcquireByteRangeForWrite ( SharedCacheMap,
|
|
IsLazyWriter ? NULL : (ARGUMENT_PRESENT(FileOffset) ?
|
|
&TargetOffset : NULL),
|
|
IsLazyWriter ? 0: NextLength,
|
|
&NextFileOffset,
|
|
&NextLength,
|
|
&FirstBcb )) {
|
|
|
|
//
|
|
// Assume this range is not a hot spot.
|
|
//
|
|
|
|
HotSpot = FALSE;
|
|
|
|
//
|
|
// We defer calling Mm to set address range modified until here, to take
|
|
// overhead out of the main line path, and to reduce the number of TBIS
|
|
// on a multiprocessor.
|
|
//
|
|
|
|
RemainingLength = NextLength;
|
|
|
|
do {
|
|
|
|
//
|
|
// See if the next file offset is mapped. (If not, the dirty bit
|
|
// was propagated on the unmap.)
|
|
//
|
|
|
|
if ((TempVa = CcGetVirtualAddressIfMapped( SharedCacheMap,
|
|
NextFileOffset.QuadPart + NextLength - RemainingLength,
|
|
&ActiveVacb,
|
|
&TempLength)) != NULL) {
|
|
|
|
//
|
|
// Reduce TempLength to RemainingLength if necessary, and
|
|
// call MM.
|
|
//
|
|
|
|
if (TempLength > RemainingLength) {
|
|
TempLength = RemainingLength;
|
|
}
|
|
|
|
//
|
|
// Clear the Dirty bit (if set) in the PTE and set the
|
|
// Pfn modified. Assume if the Pte was dirty, that this may
|
|
// be a hot spot. Do not do hot spots for metadata, and unless
|
|
// they are within ValidDataLength as reported to the file system
|
|
// via CcSetValidData.
|
|
//
|
|
|
|
HotSpot = (BOOLEAN)(((MmSetAddressRangeModified(TempVa, TempLength) || HotSpot) &&
|
|
((NextFileOffset.QuadPart + NextLength) <
|
|
(SharedCacheMap->ValidDataLength.QuadPart)) &&
|
|
((SharedCacheMap->LazyWritePassCount & 0xF) != 0) &&
|
|
IsLazyWriter) &&
|
|
!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED));
|
|
|
|
CcFreeVirtualAddress( ActiveVacb );
|
|
|
|
} else {
|
|
|
|
//
|
|
// Reduce TempLength to RemainingLength if necessary.
|
|
//
|
|
|
|
if (TempLength > RemainingLength) {
|
|
TempLength = RemainingLength;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Reduce RemainingLength by what we processed.
|
|
//
|
|
|
|
RemainingLength -= TempLength;
|
|
|
|
//
|
|
// Loop until done.
|
|
//
|
|
|
|
} while (RemainingLength != 0);
|
|
|
|
CcLazyWriteHotSpots += HotSpot;
|
|
|
|
//
|
|
// Now flush, now flush if we do not think it is a hot spot.
|
|
//
|
|
|
|
if (!HotSpot) {
|
|
|
|
MmFlushSection( SharedCacheMap->FileObject->SectionObjectPointer,
|
|
&NextFileOffset,
|
|
NextLength,
|
|
IoStatus,
|
|
!IsLazyWriter );
|
|
|
|
if (NT_SUCCESS(IoStatus->Status)) {
|
|
|
|
if (!FlagOn(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED)) {
|
|
|
|
CcAcquireMasterLock( &OldIrql );
|
|
SetFlag(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED);
|
|
CcReleaseMasterLock( OldIrql );
|
|
}
|
|
|
|
//
|
|
// Increment performance counters
|
|
//
|
|
|
|
if (IsLazyWriter) {
|
|
|
|
CcLazyWriteIos += 1;
|
|
CcLazyWritePages += (NextLength + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
}
|
|
|
|
} else {
|
|
|
|
LARGE_INTEGER Offset = NextFileOffset;
|
|
ULONG RetryLength = NextLength;
|
|
|
|
DebugTrace2( 0, 0, "I/O Error on Cache Flush: %08lx, %08lx\n",
|
|
IoStatus->Status, IoStatus->Information );
|
|
|
|
if (RetryError(IoStatus->Status)) {
|
|
|
|
VerifyRequired = TRUE;
|
|
|
|
//
|
|
// Loop to write each page individually, starting with one
|
|
// more try on the page that got the error, in case that page
|
|
// or any page beyond it can be successfully written
|
|
// individually. Note that Offset and RetryLength are
|
|
// guaranteed to be in integral pages, but the Information
|
|
// field from the failed request is not.
|
|
//
|
|
// We ignore errors now, and give it one last shot, before
|
|
// setting the pages clean (see below).
|
|
//
|
|
|
|
} else {
|
|
|
|
do {
|
|
|
|
DebugTrace2( 0, 0, "Trying page at offset %08lx, %08lx\n",
|
|
Offset.LowPart, Offset.HighPart );
|
|
|
|
MmFlushSection ( SharedCacheMap->FileObject->SectionObjectPointer,
|
|
&Offset,
|
|
PAGE_SIZE,
|
|
IoStatus,
|
|
!IsLazyWriter );
|
|
|
|
DebugTrace2( 0, 0, "I/O status = %08lx, %08lx\n",
|
|
IoStatus->Status, IoStatus->Information );
|
|
|
|
if (NT_SUCCESS(IoStatus->Status)) {
|
|
CcAcquireMasterLock( &OldIrql );
|
|
SetFlag(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED);
|
|
CcReleaseMasterLock( OldIrql );
|
|
}
|
|
|
|
if ((!NT_SUCCESS(IoStatus->Status)) && !RetryError(IoStatus->Status)) {
|
|
|
|
PopupRequired = TRUE;
|
|
PopupStatus = IoStatus->Status;
|
|
}
|
|
|
|
VerifyRequired = VerifyRequired || RetryError(IoStatus->Status);
|
|
|
|
Offset.QuadPart = Offset.QuadPart + (LONGLONG)PAGE_SIZE;
|
|
RetryLength -= PAGE_SIZE;
|
|
|
|
} while(RetryLength > 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Now release the Bcb resources and set them clean. Note we do not check
|
|
// here for errors, and just returned in the I/O status. Errors on writes
|
|
// are rare to begin with. Nonetheless, our strategy is to rely on
|
|
// one or more of the following (depending on the file system) to prevent
|
|
// errors from getting to us.
|
|
//
|
|
// - Retries and/or other forms of error recovery in the disk driver
|
|
// - Mirroring driver
|
|
// - Hot fixing in the noncached path of the file system
|
|
//
|
|
// In the unexpected case that a write error does get through, we
|
|
// *currently* just set the Bcbs clean anyway, rather than let
|
|
// Bcbs and pages accumulate which cannot be written. Note we did
|
|
// a popup above to at least notify the guy.
|
|
//
|
|
// Set the pages dirty again if we either saw a HotSpot or got
|
|
// verify required.
|
|
//
|
|
|
|
CcReleaseByteRangeFromWrite ( SharedCacheMap,
|
|
&NextFileOffset,
|
|
NextLength,
|
|
FirstBcb,
|
|
(BOOLEAN)(HotSpot || VerifyRequired) );
|
|
|
|
//
|
|
// See if there is any deferred writes we should post.
|
|
//
|
|
|
|
BytesWritten += NextLength;
|
|
if ((BytesWritten >= 0x40000) && !IsListEmpty(&CcDeferredWrites)) {
|
|
CcPostDeferredWrites();
|
|
BytesWritten = 0;
|
|
}
|
|
|
|
//
|
|
// If we're the lazy writer and have spent more than the active tick
|
|
// length in this loop, break out for a requeue so we share the
|
|
// file resources.
|
|
//
|
|
|
|
if (IsLazyWriter) {
|
|
|
|
KeQueryTickCount( &CurrentTick );
|
|
|
|
if (CurrentTick.QuadPart > EndTick.QuadPart) {
|
|
IoStatus->Information = CC_REQUEUE;
|
|
break;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Now for explicit flushes, we should advance our range.
|
|
//
|
|
|
|
if (ARGUMENT_PRESENT(FileOffset)) {
|
|
|
|
NextFileOffset.QuadPart += NextLength;
|
|
|
|
//
|
|
// Done yet?
|
|
//
|
|
|
|
if ((FileOffset->QuadPart + Length) <= NextFileOffset.QuadPart) {
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Calculate new target range
|
|
//
|
|
|
|
NextLength = (ULONG)((FileOffset->QuadPart + Length) - NextFileOffset.QuadPart);
|
|
TargetOffset = NextFileOffset;
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// See if there are any deferred writes we should post if
|
|
// we escaped the loop without checking after a series of
|
|
// flushes.
|
|
//
|
|
|
|
if (BytesWritten != 0 && !IsListEmpty(&CcDeferredWrites)) {
|
|
|
|
CcPostDeferredWrites();
|
|
}
|
|
|
|
//
|
|
// Now we can get rid of the open count, and clean up as required.
|
|
//
|
|
|
|
if (SharedCacheMap != NULL) {
|
|
|
|
//
|
|
// Serialize again to decrement the open count.
|
|
//
|
|
|
|
CcAcquireMasterLock( &OldIrql );
|
|
|
|
CcDecrementOpenCount( SharedCacheMap, 'fcCF' );
|
|
|
|
if ((SharedCacheMap->OpenCount == 0) &&
|
|
!FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) &&
|
|
(SharedCacheMap->DirtyPages == 0)) {
|
|
|
|
//
|
|
// Move to the dirty list.
|
|
//
|
|
|
|
RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
|
|
InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
|
|
&SharedCacheMap->SharedCacheMapLinks );
|
|
|
|
//
|
|
// Make sure the Lazy Writer will wake up, because we
|
|
// want him to delete this SharedCacheMap.
|
|
//
|
|
|
|
LazyWriter.OtherWork = TRUE;
|
|
if (!LazyWriter.ScanActive) {
|
|
CcScheduleLazyWriteScan( FALSE );
|
|
}
|
|
}
|
|
|
|
CcReleaseMasterLock( OldIrql );
|
|
}
|
|
|
|
//
|
|
// Make sure and return the first error to our caller. In the
|
|
// case of the Lazy Writer, a popup will be issued.
|
|
//
|
|
|
|
if (PopupRequired) {
|
|
IoStatus->Status = PopupStatus;
|
|
}
|
|
|
|
DebugTrace(-1, me, "CcFlushCache -> VOID\n", 0 );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
PVOID
|
|
CcRemapBcb (
|
|
IN PVOID Bcb
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine may be called by a file system to map a Bcb an additional
|
|
time in order to preserve it through several calls that perform additional
|
|
maps and unpins.
|
|
|
|
|
|
Arguments:
|
|
|
|
Bcb - Supplies a pointer to a previously returned Bcb.
|
|
|
|
Return Value:
|
|
|
|
Bcb with read-only indicator.
|
|
|
|
--*/
|
|
|
|
{
|
|
KIRQL OldIrql;
|
|
PVACB Vacb;
|
|
|
|
//
|
|
// Remove read-only bit
|
|
//
|
|
|
|
Bcb = (PVOID) ((ULONG_PTR)Bcb & ~1);
|
|
|
|
if (((PBCB)Bcb)->NodeTypeCode == CACHE_NTC_OBCB) {
|
|
|
|
//
|
|
// If this is an overlapped BCB, use the first Vacb in the
|
|
// array
|
|
//
|
|
|
|
Vacb = ((POBCB)Bcb)->Bcbs[0]->Vacb;
|
|
|
|
} else if (((PBCB)Bcb)->NodeTypeCode == CACHE_NTC_BCB) {
|
|
|
|
//
|
|
// If this is a BCB, extract the Vcb from it
|
|
//
|
|
|
|
Vacb = ((PBCB)Bcb)->Vacb;
|
|
|
|
} else {
|
|
|
|
//
|
|
// Otherwise, there is no signature to match. Assume
|
|
// it is a Vacb.
|
|
//
|
|
|
|
Vacb = (PVACB) Bcb;
|
|
}
|
|
|
|
ASSERT((Vacb >= CcVacbs) && (Vacb < CcBeyondVacbs));
|
|
|
|
//
|
|
// Safely bump the active count
|
|
//
|
|
|
|
CcAcquireVacbLock( &OldIrql );
|
|
|
|
Vacb->Overlay.ActiveCount += 1;
|
|
|
|
CcReleaseVacbLock( OldIrql );
|
|
|
|
return (PVOID) ((ULONG_PTR)Vacb | 1);
|
|
}
|
|
|
|
|
|
VOID
|
|
CcRepinBcb (
|
|
IN PVOID Bcb
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine may be called by a file system to pin a Bcb an additional
|
|
time in order to reserve it for Write Through or error recovery.
|
|
Typically the file system would do this the first time that it sets a
|
|
pinned buffer dirty while processing a WriteThrough request, or any
|
|
time that it determines that a buffer will be required for WriteThrough.
|
|
|
|
The call to this routine must be followed by a call to CcUnpinRepinnedBcb.
|
|
CcUnpinRepinnedBcb should normally be called during request completion
|
|
after all other resources have been released. CcUnpinRepinnedBcb
|
|
synchronously writes the buffer (for WriteThrough requests) and performs
|
|
the matching unpin for this call.
|
|
|
|
Arguments:
|
|
|
|
Bcb - Supplies a pointer to a previously pinned Bcb
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
|
|
{
|
|
KLOCK_QUEUE_HANDLE LockHandle;
|
|
|
|
KeAcquireInStackQueuedSpinLock( &((PBCB)Bcb)->SharedCacheMap->BcbSpinLock, &LockHandle );
|
|
|
|
((PBCB)Bcb)->PinCount += 1;
|
|
|
|
KeReleaseInStackQueuedSpinLock( &LockHandle );
|
|
}
|
|
|
|
|
|
VOID
|
|
CcUnpinRepinnedBcb (
|
|
IN PVOID Bcb,
|
|
IN BOOLEAN WriteThrough,
|
|
OUT PIO_STATUS_BLOCK IoStatus
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine may be called to Write a previously pinned buffer
|
|
through to the file. It must have been preceded by a call to
|
|
CcRepinBcb. As this routine must acquire the Bcb
|
|
resource exclusive, the caller must be extremely careful to avoid
|
|
deadlocks. Ideally the caller owns no resources at all when it
|
|
calls this routine, or else the caller should guarantee that it
|
|
has nothing else pinned in this same file. (The latter rule is
|
|
the one used to avoid deadlocks in calls from CcCopyWrite and
|
|
CcMdlWrite.)
|
|
|
|
Arguments:
|
|
|
|
Bcb - Pointer to a Bcb which was previously specified in a call
|
|
to CcRepinBcb.
|
|
|
|
WriteThrough - TRUE if the Bcb should be written through.
|
|
|
|
IoStatus - Returns the I/O status for the operation.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
|
|
{
|
|
PSHARED_CACHE_MAP SharedCacheMap = ((PBCB)Bcb)->SharedCacheMap;
|
|
|
|
DebugTrace(+1, me, "CcUnpinRepinnedBcb\n", 0 );
|
|
DebugTrace( 0, me, " Bcb = %08lx\n", Bcb );
|
|
DebugTrace( 0, me, " WriteThrough = %02lx\n", WriteThrough );
|
|
|
|
//
|
|
// Set status to success for non write through case.
|
|
//
|
|
|
|
IoStatus->Status = STATUS_SUCCESS;
|
|
|
|
if (WriteThrough) {
|
|
|
|
//
|
|
// Acquire Bcb exclusive to eliminate possible modifiers of the buffer,
|
|
// since we are about to write its buffer.
|
|
//
|
|
|
|
if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
|
|
ExAcquireResourceExclusiveLite( &((PBCB)Bcb)->Resource, TRUE );
|
|
}
|
|
|
|
//
|
|
// Now, there is a chance that the LazyWriter has already written
|
|
// it, since the resource was free. We will only write it if it
|
|
// is still dirty.
|
|
//
|
|
|
|
if (((PBCB)Bcb)->Dirty) {
|
|
|
|
//
|
|
// First we make sure that the dirty bit in the PFN database is set.
|
|
//
|
|
|
|
ASSERT( ((PBCB)Bcb)->BaseAddress != NULL );
|
|
MmSetAddressRangeModified( ((PBCB)Bcb)->BaseAddress,
|
|
((PBCB)Bcb)->ByteLength );
|
|
|
|
//
|
|
// Now release the Bcb resource and set it clean. Note we do not check
|
|
// here for errors, and just return the I/O status. Errors on writes
|
|
// are rare to begin with. Nonetheless, our strategy is to rely on
|
|
// one or more of the following (depending on the file system) to prevent
|
|
// errors from getting to us.
|
|
//
|
|
// - Retries and/or other forms of error recovery in the disk driver
|
|
// - Mirroring driver
|
|
// - Hot fixing in the noncached path of the file system
|
|
//
|
|
// In the unexpected case that a write error does get through, we
|
|
// report it to our caller, but go ahead and set the Bcb clean. There
|
|
// seems to be no point in letting Bcbs (and pages in physical memory)
|
|
// accumulate which can never go away because we get an unrecoverable I/O
|
|
// error.
|
|
//
|
|
|
|
//
|
|
// We specify TRUE here for ReadOnly so that we will keep the
|
|
// resource during the flush.
|
|
//
|
|
|
|
CcUnpinFileData( (PBCB)Bcb, TRUE, SET_CLEAN );
|
|
|
|
//
|
|
// Write it out.
|
|
//
|
|
|
|
MmFlushSection( ((PBCB)Bcb)->SharedCacheMap->FileObject->SectionObjectPointer,
|
|
&((PBCB)Bcb)->FileOffset,
|
|
((PBCB)Bcb)->ByteLength,
|
|
IoStatus,
|
|
TRUE );
|
|
|
|
//
|
|
// If we got verify required, we have to mark the buffer dirty again
|
|
// so we will try again later.
|
|
//
|
|
|
|
if (RetryError(IoStatus->Status)) {
|
|
CcSetDirtyPinnedData( (PBCB)Bcb, NULL );
|
|
}
|
|
|
|
//
|
|
// Now remove the final pin count now that we have set it clean.
|
|
//
|
|
|
|
CcUnpinFileData( (PBCB)Bcb, FALSE, UNPIN );
|
|
|
|
//
|
|
// See if there is any deferred writes we can post.
|
|
//
|
|
|
|
if (!IsListEmpty(&CcDeferredWrites)) {
|
|
CcPostDeferredWrites();
|
|
}
|
|
}
|
|
else {
|
|
|
|
//
|
|
// Lazy Writer got there first, just free the resource and unpin.
|
|
//
|
|
|
|
CcUnpinFileData( (PBCB)Bcb, FALSE, UNPIN );
|
|
|
|
}
|
|
|
|
DebugTrace2(0, me, " <IoStatus = %08lx, %08lx\n", IoStatus->Status,
|
|
IoStatus->Information );
|
|
}
|
|
|
|
//
|
|
// Non-WriteThrough case
|
|
//
|
|
|
|
else {
|
|
|
|
CcUnpinFileData( (PBCB)Bcb, TRUE, UNPIN );
|
|
|
|
//
|
|
// Set status to success for non write through case.
|
|
//
|
|
|
|
IoStatus->Status = STATUS_SUCCESS;
|
|
}
|
|
|
|
DebugTrace(-1, me, "CcUnpinRepinnedBcb -> VOID\n", 0 );
|
|
}
|
|
|
|
|
|
//
|
|
// Internal Support Routine
|
|
//
|
|
|
|
BOOLEAN
|
|
CcFindBcb (
|
|
IN PSHARED_CACHE_MAP SharedCacheMap,
|
|
IN PLARGE_INTEGER FileOffset,
|
|
IN OUT PLARGE_INTEGER BeyondLastByte,
|
|
OUT PBCB *Bcb
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is called to find a Bcb describing the specified byte range
|
|
of a file. It returns TRUE if it could at least find a Bcb which describes
|
|
the beginning of the specified byte range, or else FALSE if the first
|
|
part of the byte range is not present. In the latter case, the requested
|
|
byte range (TrialLength) is truncated if there is currently a Bcb which
|
|
describes bytes beyond the beginning of the byte range.
|
|
|
|
The caller may see if the entire byte range is being returned by examining
|
|
the Bcb, and the caller (or caller's caller) may then make subsequent
|
|
calls if the data is not all returned.
|
|
|
|
The BcbSpinLock must be currently acquired.
|
|
|
|
Arguments:
|
|
|
|
SharedCacheMap - Supplies a pointer to the SharedCacheMap for the file
|
|
in which the byte range is desired.
|
|
|
|
FileOffset - Supplies the file offset for the beginning of the desired
|
|
byte range.
|
|
|
|
BeyondLastByte - Supplies the file offset of the ending of the desired
|
|
byte range + 1. Note that this offset will be truncated
|
|
on return if the Bcb was not found, but bytes beyond the
|
|
beginning of the Bcb are contained in another Bcb.
|
|
|
|
Bcb - returns a Bcb describing the beginning of the byte range if also
|
|
returning TRUE, or else the point in the Bcb list to insert after.
|
|
|
|
Return Value:
|
|
|
|
FALSE - if no Bcb describes the beginning of the desired byte range
|
|
|
|
TRUE - if a Bcb is being returned describing at least an initial
|
|
part of the byte range.
|
|
|
|
--*/
|
|
|
|
{
|
|
PLIST_ENTRY BcbList;
|
|
PBCB Bcbt;
|
|
BOOLEAN Found = FALSE;
|
|
|
|
DebugTrace(+1, me, "CcFindBcb:\n", 0 );
|
|
DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap );
|
|
DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
|
|
FileOffset->HighPart );
|
|
DebugTrace2(0, me, " TrialLength = %08lx, %08lx\n", TrialLength->LowPart,
|
|
TrialLength->HighPart );
|
|
|
|
//
|
|
// We want to terminate scans by testing the NodeTypeCode field from the
|
|
// BcbLinks, so we want to see the SharedCacheMap signature from the same
|
|
// offset.
|
|
//
|
|
|
|
ASSERT(FIELD_OFFSET(SHARED_CACHE_MAP, BcbList) == FIELD_OFFSET(BCB, BcbLinks));
|
|
|
|
//
|
|
// Similarly, when we hit one of the BcbListHeads in the array, small negative
|
|
// offsets are all structure pointers, so we are counting on the Bcb signature
|
|
// to have some non-Ulong address bits set.
|
|
//
|
|
|
|
ASSERT((CACHE_NTC_BCB & 3) != 0);
|
|
|
|
//
|
|
// Get address of Bcb listhead that is *after* the Bcb we are looking for,
|
|
// for backwards scan. It is important that we fail in the forward
|
|
// direction so that we are looking in the right segment of the Bcb list.
|
|
//
|
|
|
|
BcbList = GetBcbListHead( SharedCacheMap, FileOffset->QuadPart + SIZE_PER_BCB_LIST, TRUE );
|
|
|
|
//
|
|
// Search for an entry that overlaps the specified range, or until we hit
|
|
// a listhead.
|
|
//
|
|
|
|
Bcbt = CONTAINING_RECORD(BcbList->Flink, BCB, BcbLinks);
|
|
|
|
//
|
|
// First see if we really have to do Large arithmetic or not, and
|
|
// then use either a 32-bit loop or a 64-bit loop to search for
|
|
// the Bcb.
|
|
//
|
|
|
|
if (FileOffset->HighPart == 0 &&
|
|
Bcbt->NodeTypeCode == CACHE_NTC_BCB &&
|
|
Bcbt->BeyondLastByte.HighPart == 0) {
|
|
|
|
//
|
|
// 32-bit - loop until we get back to a listhead.
|
|
//
|
|
|
|
while (Bcbt->NodeTypeCode == CACHE_NTC_BCB) {
|
|
|
|
//
|
|
// Since the Bcb list is in descending order, we first check
|
|
// if we are completely beyond the current entry, and if so
|
|
// get out.
|
|
//
|
|
|
|
if (FileOffset->LowPart >= Bcbt->BeyondLastByte.LowPart) {
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Next check if the first byte we are looking for is
|
|
// contained in the current Bcb. If so, we either have
|
|
// a partial hit and must truncate to the exact amount
|
|
// we have found, or we may have a complete hit. In
|
|
// either case we break with Found == TRUE.
|
|
//
|
|
|
|
if (FileOffset->LowPart >= Bcbt->FileOffset.LowPart) {
|
|
Found = TRUE;
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Now we know we must loop back and keep looking, but we
|
|
// still must check for the case where the tail end of the
|
|
// bytes we are looking for are described by the current
|
|
// Bcb. If so we must truncate what we are looking for,
|
|
// because this routine is only supposed to return bytes
|
|
// from the start of the desired range.
|
|
//
|
|
|
|
if (BeyondLastByte->LowPart >= Bcbt->FileOffset.LowPart) {
|
|
BeyondLastByte->LowPart = Bcbt->FileOffset.LowPart;
|
|
}
|
|
|
|
//
|
|
// Advance to next entry in list (which is possibly back to
|
|
// the listhead) and loop back.
|
|
//
|
|
|
|
Bcbt = CONTAINING_RECORD( Bcbt->BcbLinks.Flink,
|
|
BCB,
|
|
BcbLinks );
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
//
|
|
// 64-bit - Loop until we get back to a listhead.
|
|
//
|
|
|
|
while (Bcbt->NodeTypeCode == CACHE_NTC_BCB) {
|
|
|
|
//
|
|
// Since the Bcb list is in descending order, we first check
|
|
// if we are completely beyond the current entry, and if so
|
|
// get out.
|
|
//
|
|
|
|
if (FileOffset->QuadPart >= Bcbt->BeyondLastByte.QuadPart) {
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Next check if the first byte we are looking for is
|
|
// contained in the current Bcb. If so, we either have
|
|
// a partial hit and must truncate to the exact amount
|
|
// we have found, or we may have a complete hit. In
|
|
// either case we break with Found == TRUE.
|
|
//
|
|
|
|
if (FileOffset->QuadPart >= Bcbt->FileOffset.QuadPart) {
|
|
Found = TRUE;
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Now we know we must loop back and keep looking, but we
|
|
// still must check for the case where the tail end of the
|
|
// bytes we are looking for are described by the current
|
|
// Bcb. If so we must truncate what we are looking for,
|
|
// because this routine is only supposed to return bytes
|
|
// from the start of the desired range.
|
|
//
|
|
|
|
if (BeyondLastByte->QuadPart >= Bcbt->FileOffset.QuadPart) {
|
|
BeyondLastByte->QuadPart = Bcbt->FileOffset.QuadPart;
|
|
}
|
|
|
|
//
|
|
// Advance to next entry in list (which is possibly back to
|
|
// the listhead) and loop back.
|
|
//
|
|
|
|
Bcbt = CONTAINING_RECORD( Bcbt->BcbLinks.Flink,
|
|
BCB,
|
|
BcbLinks );
|
|
|
|
}
|
|
}
|
|
|
|
*Bcb = Bcbt;
|
|
|
|
DebugTrace2(0, me, " <TrialLength = %08lx, %08lx\n", TrialLength->LowPart,
|
|
TrialLength->HighPart );
|
|
DebugTrace( 0, me, " <Bcb = %08lx\n", *Bcb );
|
|
DebugTrace(-1, me, "CcFindBcb -> %02lx\n", Found );
|
|
|
|
return Found;
|
|
}
|
|
|
|
|
|
//
|
|
// Internal Support Routine
|
|
//
|
|
|
|
PBCB
|
|
CcAllocateInitializeBcb (
|
|
IN OUT PSHARED_CACHE_MAP SharedCacheMap OPTIONAL,
|
|
IN OUT PBCB AfterBcb,
|
|
IN PLARGE_INTEGER FileOffset,
|
|
IN PLARGE_INTEGER TrialLength
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine allocates and initializes a Bcb to describe the specified
|
|
byte range, and inserts it into the Bcb List of the specified Shared
|
|
Cache Map. The Bcb List spin lock must currently be acquired.
|
|
|
|
BcbSpinLock must be acquired on entry.
|
|
|
|
Arguments:
|
|
|
|
SharedCacheMap - Supplies the SharedCacheMap for the new Bcb.
|
|
|
|
AfterBcb - Supplies where in the descending-order BcbList the new Bcb
|
|
should be inserted: either the ListHead (masquerading as
|
|
a Bcb) or a Bcb.
|
|
|
|
FileOffset - Supplies File Offset for the desired data.
|
|
|
|
TrialLength - Supplies length of desired data.
|
|
|
|
Return Value:
|
|
|
|
Address of the allocated and initialized Bcb
|
|
|
|
--*/
|
|
|
|
{
|
|
PBCB Bcb;
|
|
ULONG RoundedBcbSize = (sizeof(BCB) + 7) & ~7;
|
|
|
|
if ((Bcb = ExAllocatePoolWithTag( NonPagedPool, sizeof(BCB), 'cBcC')) == NULL) {
|
|
|
|
return NULL;
|
|
}
|
|
|
|
//
|
|
// Initialize the newly allocated Bcb. First zero it, then fill in
|
|
// nonzero fields.
|
|
//
|
|
|
|
RtlZeroMemory( Bcb, RoundedBcbSize );
|
|
|
|
//
|
|
// For Mbcb's, SharedCacheMap is NULL, and the rest of this initialization
|
|
// is not desired.
|
|
//
|
|
|
|
if (SharedCacheMap != NULL) {
|
|
|
|
Bcb->NodeTypeCode = CACHE_NTC_BCB;
|
|
Bcb->FileOffset = *FileOffset;
|
|
Bcb->ByteLength = TrialLength->LowPart;
|
|
Bcb->BeyondLastByte.QuadPart = FileOffset->QuadPart + TrialLength->QuadPart;
|
|
Bcb->PinCount += 1;
|
|
ExInitializeResourceLite( &Bcb->Resource );
|
|
Bcb->SharedCacheMap = SharedCacheMap;
|
|
|
|
//
|
|
// Since CcCalculateVacbLockCount has to be able to walk
|
|
// the BcbList with only the VacbSpinLock, we take that one
|
|
// out to change the list and set the count.
|
|
//
|
|
|
|
CcAcquireVacbLockAtDpcLevel();
|
|
InsertTailList( &AfterBcb->BcbLinks, &Bcb->BcbLinks );
|
|
|
|
ASSERT( (SharedCacheMap->SectionSize.QuadPart < VACB_SIZE_OF_FIRST_LEVEL) ||
|
|
(CcFindBcb(SharedCacheMap, FileOffset, &Bcb->BeyondLastByte, &AfterBcb) &&
|
|
(Bcb == AfterBcb)) );
|
|
|
|
//
|
|
// Now for large metadata streams we lock the Vacb level.
|
|
//
|
|
|
|
CcLockVacbLevel( SharedCacheMap, FileOffset->QuadPart );
|
|
CcReleaseVacbLockFromDpcLevel();
|
|
|
|
//
|
|
// If this resource was no write behind, let Ex know that the
|
|
// resource will never be acquired exclusive. Also disable
|
|
// boost (I know this is useless, but KenR said I had to do it).
|
|
//
|
|
|
|
if (SharedCacheMap &&
|
|
FlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND)) {
|
|
#if DBG
|
|
SetFlag(Bcb->Resource.Flag, ResourceNeverExclusive);
|
|
#endif
|
|
ExDisableResourceBoost( &Bcb->Resource );
|
|
}
|
|
}
|
|
|
|
return Bcb;
|
|
}
|
|
|
|
|
|
//
|
|
// Internal support routine
|
|
//
|
|
|
|
VOID
|
|
FASTCALL
|
|
CcDeallocateBcb (
|
|
IN PBCB Bcb
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine deallocates a Bcb to the BcbZone. It must
|
|
already be removed from the BcbList.
|
|
|
|
Arguments:
|
|
|
|
Bcb - the Bcb to deallocate
|
|
|
|
Return Value:
|
|
|
|
None
|
|
|
|
--*/
|
|
|
|
{
|
|
//
|
|
// Deallocate Resource structures
|
|
//
|
|
|
|
if (Bcb->NodeTypeCode == CACHE_NTC_BCB) {
|
|
|
|
ExDeleteResourceLite( &Bcb->Resource );
|
|
}
|
|
|
|
ExFreePool(Bcb);
|
|
return;
|
|
}
|
|
|
|
|
|
//
|
|
// Internal Support Routine
|
|
//
|
|
|
|
BOOLEAN
|
|
CcMapAndRead(
|
|
IN PSHARED_CACHE_MAP SharedCacheMap,
|
|
IN PLARGE_INTEGER FileOffset,
|
|
IN ULONG Length,
|
|
IN ULONG ZeroFlags,
|
|
IN BOOLEAN Wait,
|
|
IN PVOID BaseAddress
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine may be called to insure that the specified data is mapped,
|
|
read into memory and locked. If TRUE is returned, then the
|
|
correct I/O status for the transfer is also returned, along with
|
|
a system-space address for the data.
|
|
|
|
Arguments:
|
|
|
|
SharedCacheMap - Supplies the address of the SharedCacheMap for the
|
|
data.
|
|
|
|
FileOffset - Supplies the file offset of the desired data.
|
|
|
|
Length - Supplies the total amount of data desired.
|
|
|
|
ZeroFlags - Defines which pages may be zeroed if not resident.
|
|
|
|
Wait - Supplies FALSE if the caller is not willing to block for the
|
|
data, or TRUE if the caller is willing to block.
|
|
|
|
BaseAddress - Supplies the system base address at which the data may
|
|
be accessed.
|
|
|
|
Return Value:
|
|
|
|
FALSE - if the caller supplied Wait = FALSE and the data could not
|
|
be returned without blocking.
|
|
|
|
TRUE - if the data is being returned.
|
|
|
|
Note: this routine may raise an exception due to a map or read failure,
|
|
however, this can only happen if Wait was specified as TRUE, since
|
|
mapping and reading will not be performed if the caller cannot wait.
|
|
|
|
--*/
|
|
|
|
{
|
|
ULONG ZeroCase;
|
|
ULONG SavedState;
|
|
BOOLEAN Result = FALSE;
|
|
PETHREAD Thread = PsGetCurrentThread();
|
|
|
|
UNREFERENCED_PARAMETER (SharedCacheMap);
|
|
UNREFERENCED_PARAMETER (FileOffset);
|
|
|
|
MmSavePageFaultReadAhead( Thread, &SavedState );
|
|
|
|
//
|
|
// try around everything for cleanup.
|
|
//
|
|
|
|
try {
|
|
|
|
ULONG PagesToGo;
|
|
|
|
//
|
|
// Now loop to touch all of the pages, calling MM to insure
|
|
// that if we fault, we take in exactly the number of pages
|
|
// we need.
|
|
//
|
|
|
|
PagesToGo = ADDRESS_AND_SIZE_TO_SPAN_PAGES( BaseAddress, Length );
|
|
|
|
//
|
|
// Loop to touch or zero the pages.
|
|
//
|
|
|
|
ZeroCase = ZERO_FIRST_PAGE;
|
|
|
|
while (PagesToGo) {
|
|
|
|
//
|
|
// If we cannot zero this page, or Mm failed to return
|
|
// a zeroed page, then just fault it in.
|
|
//
|
|
|
|
MmSetPageFaultReadAhead( Thread, (PagesToGo - 1) );
|
|
|
|
if (!FlagOn(ZeroFlags, ZeroCase) ||
|
|
!MmCheckCachedPageState(BaseAddress, TRUE)) {
|
|
|
|
//
|
|
// If we get here, it is almost certainly due to the fact
|
|
// that we can not take a zero page. MmCheckCachedPageState
|
|
// will so rarely return FALSE, that we will not worry
|
|
// about it. We will only check if the page is there if
|
|
// Wait is FALSE, so that we can do the right thing.
|
|
//
|
|
|
|
if (!MmCheckCachedPageState(BaseAddress, FALSE) && !Wait) {
|
|
try_return( Result = FALSE );
|
|
}
|
|
}
|
|
|
|
BaseAddress = (PCHAR)BaseAddress + PAGE_SIZE;
|
|
PagesToGo -= 1;
|
|
|
|
if (PagesToGo == 1) {
|
|
ZeroCase = ZERO_LAST_PAGE;
|
|
} else {
|
|
ZeroCase = ZERO_MIDDLE_PAGES;
|
|
}
|
|
}
|
|
|
|
try_return( Result = TRUE );
|
|
|
|
try_exit: NOTHING;
|
|
}
|
|
|
|
//
|
|
// Cleanup on the way out.
|
|
//
|
|
|
|
finally {
|
|
|
|
MmResetPageFaultReadAhead(Thread, SavedState);
|
|
}
|
|
|
|
return Result;
|
|
}
|
|
|
|
|
|
//
|
|
// Internal Support Routine
|
|
//
|
|
|
|
VOID
|
|
CcFreeActiveVacb (
|
|
IN PSHARED_CACHE_MAP SharedCacheMap,
|
|
IN PVACB ActiveVacb OPTIONAL,
|
|
IN ULONG ActivePage,
|
|
IN ULONG PageIsDirty
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine may be called to zero the end of a locked page or
|
|
free the ActiveVacb for a Shared Cache Map, if there is one.
|
|
Note that some callers are not synchronized with foreground
|
|
activity, and may therefore not have an ActiveVacb. Examples
|
|
of unsynchronized callers are CcZeroEndOfLastPage (which is
|
|
called by MM) and any flushing done by CcWriteBehind.
|
|
|
|
Arguments:
|
|
|
|
SharedCacheMap - SharedCacheMap to examine for page to be zeroed.
|
|
|
|
ActiveVacb - Vacb to free
|
|
|
|
ActivePage - Page that was used
|
|
|
|
PageIsDirty - ACTIVE_PAGE_IS_DIRTY if the active page is dirty
|
|
|
|
Return Value:
|
|
|
|
None
|
|
|
|
--*/
|
|
|
|
{
|
|
LARGE_INTEGER ActiveOffset;
|
|
PVOID ActiveAddress;
|
|
ULONG BytesLeftInPage;
|
|
KIRQL OldIrql;
|
|
|
|
//
|
|
// If the page was locked, then unlock it.
|
|
//
|
|
|
|
if (SharedCacheMap->NeedToZero != NULL) {
|
|
|
|
PVACB NeedToZeroVacb;
|
|
|
|
//
|
|
// Zero the rest of the page under spinlock control,
|
|
// and then clear the address field. This field makes
|
|
// zero->nonzero transitions only when the file is exclusive,
|
|
// but it can make nonzero->zero transitions any time the
|
|
// spinlock is not held.
|
|
//
|
|
|
|
ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql );
|
|
|
|
//
|
|
// The address could already be gone.
|
|
//
|
|
|
|
ActiveAddress = SharedCacheMap->NeedToZero;
|
|
if (ActiveAddress != NULL) {
|
|
|
|
BytesLeftInPage = PAGE_SIZE - ((((ULONG)((ULONG_PTR)ActiveAddress) - 1) & (PAGE_SIZE - 1)) + 1);
|
|
|
|
RtlZeroBytes( ActiveAddress, BytesLeftInPage );
|
|
NeedToZeroVacb = SharedCacheMap->NeedToZeroVacb;
|
|
ASSERT( NeedToZeroVacb != NULL );
|
|
SharedCacheMap->NeedToZero = NULL;
|
|
|
|
}
|
|
ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql );
|
|
|
|
//
|
|
// Now call MM to unlock the address. Note we will never store the
|
|
// address at the start of the page, but we can sometimes store
|
|
// the start of the next page when we have exactly filled the page.
|
|
//
|
|
|
|
if (ActiveAddress != NULL) {
|
|
MmUnlockCachedPage( (PVOID)((PCHAR)ActiveAddress - 1) );
|
|
CcFreeVirtualAddress( NeedToZeroVacb );
|
|
}
|
|
}
|
|
|
|
//
|
|
// See if caller actually has an ActiveVacb
|
|
//
|
|
|
|
if (ActiveVacb != NULL) {
|
|
|
|
//
|
|
// See if the page is dirty
|
|
//
|
|
|
|
if (PageIsDirty) {
|
|
|
|
ActiveOffset.QuadPart = (LONGLONG)ActivePage << PAGE_SHIFT;
|
|
ActiveAddress = (PVOID)((PCHAR)ActiveVacb->BaseAddress +
|
|
(ActiveOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1)));
|
|
|
|
//
|
|
// Tell the Lazy Writer to write the page.
|
|
//
|
|
|
|
CcSetDirtyInMask( SharedCacheMap, &ActiveOffset, PAGE_SIZE );
|
|
|
|
//
|
|
// Now we need to clear the flag and decrement some counts if there is
|
|
// no other active Vacb which snuck in.
|
|
//
|
|
|
|
CcAcquireMasterLock( &OldIrql );
|
|
ExAcquireSpinLockAtDpcLevel( &SharedCacheMap->ActiveVacbSpinLock );
|
|
if ((SharedCacheMap->ActiveVacb == NULL) &&
|
|
FlagOn(SharedCacheMap->Flags, ACTIVE_PAGE_IS_DIRTY)) {
|
|
|
|
ClearFlag(SharedCacheMap->Flags, ACTIVE_PAGE_IS_DIRTY);
|
|
CcDeductDirtyPages( SharedCacheMap, 1);
|
|
}
|
|
ExReleaseSpinLockFromDpcLevel( &SharedCacheMap->ActiveVacbSpinLock );
|
|
CcReleaseMasterLock( OldIrql );
|
|
}
|
|
|
|
//
|
|
// Now free the Vacb.
|
|
//
|
|
|
|
CcFreeVirtualAddress( ActiveVacb );
|
|
}
|
|
}
|
|
|
|
|
|
//
|
|
// Internal Support Routine
|
|
//
|
|
|
|
VOID
|
|
CcMapAndCopy(
|
|
IN PSHARED_CACHE_MAP SharedCacheMap,
|
|
IN PVOID UserBuffer,
|
|
IN PLARGE_INTEGER FileOffset,
|
|
IN ULONG Length,
|
|
IN ULONG ZeroFlags,
|
|
IN PFILE_OBJECT FileObject
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine may be called to copy the specified user data to the
|
|
cache via a special Mm routine which copies the data to uninitialized
|
|
pages and returns.
|
|
|
|
Arguments:
|
|
|
|
SharedCacheMap - Supplies the address of the SharedCacheMap for the
|
|
data.
|
|
|
|
UserBuffer - unsafe buffer supplying the user's data to be written
|
|
|
|
FileOffset - Supplies the file offset to be modified
|
|
|
|
Length - Supplies the total amount of data
|
|
|
|
ZeroFlags - Defines which pages may be zeroed if not resident.
|
|
|
|
WriteThrough - Supplies the file object being written to
|
|
|
|
Return Value:
|
|
|
|
None
|
|
|
|
--*/
|
|
|
|
{
|
|
ULONG ReceivedLength;
|
|
ULONG ZeroCase;
|
|
PVOID CacheBuffer;
|
|
PVOID SavedMappedBuffer;
|
|
ULONG SavedMappedLength;
|
|
ULONG ActivePage;
|
|
KIRQL OldIrql;
|
|
LARGE_INTEGER PFileOffset;
|
|
IO_STATUS_BLOCK IoStatus;
|
|
NTSTATUS Status;
|
|
ULONG SavedState;
|
|
LOGICAL MorePages;
|
|
BOOLEAN WriteThrough = BooleanFlagOn( FileObject->Flags, FO_WRITE_THROUGH );
|
|
ULONG SavedTotalLength = Length;
|
|
LARGE_INTEGER LocalOffset;
|
|
ULONG PageOffset = FileOffset->LowPart & (PAGE_SIZE - 1);
|
|
PVACB Vacb = NULL;
|
|
PETHREAD Thread = PsGetCurrentThread();
|
|
|
|
//
|
|
// Initialize SavePage to TRUE to skip the finally clause on zero-length
|
|
// writes.
|
|
//
|
|
|
|
BOOLEAN SavePage = TRUE;
|
|
|
|
//
|
|
// PREfix needs to see this explicitly, as opposed to a structure copy.
|
|
//
|
|
|
|
LocalOffset.QuadPart = FileOffset->QuadPart;
|
|
|
|
DebugTrace(+1, me, "CcMapAndCopy:\n", 0 );
|
|
DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap );
|
|
DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
|
|
FileOffset->HighPart );
|
|
DebugTrace( 0, me, " Length = %08lx\n", Length );
|
|
|
|
MmSavePageFaultReadAhead( Thread, &SavedState );
|
|
|
|
//
|
|
// BUGBUG: re-enable this path when we can also generate a ccsetvaliddata call
|
|
// in all cases to fix corruption issue see 615074)
|
|
//
|
|
|
|
#if 0
|
|
|
|
//
|
|
// See if we need to force write through. If the file object is of remote origin,
|
|
// it has been exempted from throttling. As a result, it is possible that too
|
|
// many pages will get dirty. In order to prevent this, we force write through
|
|
// on these file objects if we would have throttled them in the first place.
|
|
//
|
|
|
|
if (!WriteThrough && IoIsFileOriginRemote(FileObject)
|
|
|
|
&&
|
|
|
|
!CcCanIWrite( FileObject,
|
|
Length,
|
|
FALSE,
|
|
MAXUCHAR - 2 )) {
|
|
|
|
WriteThrough = TRUE;
|
|
}
|
|
|
|
#endif
|
|
|
|
//
|
|
// try around everything for cleanup.
|
|
//
|
|
|
|
try {
|
|
|
|
while (Length != 0) {
|
|
|
|
CacheBuffer = CcGetVirtualAddress( SharedCacheMap,
|
|
LocalOffset,
|
|
&Vacb,
|
|
&ReceivedLength );
|
|
|
|
//
|
|
// PREfix wants to know this cannot be NULL, otherwise it
|
|
// will complain.
|
|
//
|
|
|
|
ASSERT( CacheBuffer != NULL );
|
|
|
|
//
|
|
// If we got more than we need, make sure to only use
|
|
// the right amount.
|
|
//
|
|
|
|
if (ReceivedLength > Length) {
|
|
ReceivedLength = Length;
|
|
}
|
|
SavedMappedBuffer = CacheBuffer;
|
|
SavedMappedLength = ReceivedLength;
|
|
Length -= ReceivedLength;
|
|
|
|
//
|
|
// Now loop to touch all of the pages, calling MM to insure
|
|
// that if we fault, we take in exactly the number of pages
|
|
// we need.
|
|
//
|
|
|
|
CacheBuffer = (PVOID)((PCHAR)CacheBuffer - PageOffset);
|
|
ReceivedLength += PageOffset;
|
|
|
|
//
|
|
// Loop to touch or zero the pages.
|
|
//
|
|
|
|
ZeroCase = ZERO_FIRST_PAGE;
|
|
|
|
//
|
|
// Set up offset to page for use below.
|
|
//
|
|
|
|
PFileOffset = LocalOffset;
|
|
PFileOffset.LowPart -= PageOffset;
|
|
|
|
while (TRUE) {
|
|
|
|
//
|
|
// Calculate whether we wish to save an active page
|
|
// or not.
|
|
//
|
|
|
|
SavePage = (BOOLEAN) ((Length == 0) &&
|
|
(ReceivedLength < PAGE_SIZE) &&
|
|
(SavedTotalLength <= (PAGE_SIZE / 2)) &&
|
|
!WriteThrough);
|
|
|
|
MorePages = (ReceivedLength > PAGE_SIZE);
|
|
|
|
//
|
|
// Copy the data to the user buffer.
|
|
//
|
|
|
|
try {
|
|
|
|
//
|
|
// It is possible that there is a locked page
|
|
// hanging around, and so we need to nuke it here.
|
|
//
|
|
|
|
if (SharedCacheMap->NeedToZero != NULL) {
|
|
CcFreeActiveVacb( SharedCacheMap, NULL, 0, 0 );
|
|
}
|
|
|
|
Status = STATUS_SUCCESS;
|
|
if (FlagOn(ZeroFlags, ZeroCase)) {
|
|
|
|
Status = MmCopyToCachedPage( CacheBuffer,
|
|
UserBuffer,
|
|
PageOffset,
|
|
MorePages ?
|
|
(PAGE_SIZE - PageOffset) :
|
|
(ReceivedLength - PageOffset),
|
|
SavePage );
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
|
|
ExRaiseStatus( FsRtlNormalizeNtstatus( Status,
|
|
STATUS_INVALID_USER_BUFFER ));
|
|
}
|
|
|
|
//
|
|
// Otherwise, we have to actually copy the data ourselves.
|
|
//
|
|
|
|
} else {
|
|
|
|
MmSetPageFaultReadAhead( Thread,
|
|
(MorePages && FlagOn(ZeroFlags, ZERO_LAST_PAGE)) ? 1 : 0);
|
|
|
|
RtlCopyBytes( (PVOID)((PCHAR)CacheBuffer + PageOffset),
|
|
UserBuffer,
|
|
MorePages ?
|
|
(PAGE_SIZE - PageOffset) :
|
|
(ReceivedLength - PageOffset) );
|
|
|
|
MmResetPageFaultReadAhead( Thread, SavedState );
|
|
|
|
}
|
|
|
|
} except( CcCopyReadExceptionFilter( GetExceptionInformation(),
|
|
&Status ) ) {
|
|
|
|
//
|
|
// If we got an access violation, then the user buffer went
|
|
// away. Otherwise we must have gotten an I/O error trying
|
|
// to bring the data in.
|
|
//
|
|
|
|
if (Status == STATUS_ACCESS_VIOLATION) {
|
|
ExRaiseStatus( STATUS_INVALID_USER_BUFFER );
|
|
}
|
|
else {
|
|
ExRaiseStatus( FsRtlNormalizeNtstatus( Status,
|
|
STATUS_UNEXPECTED_IO_ERROR ));
|
|
}
|
|
}
|
|
|
|
//
|
|
// Now get out quickly if it is a small write and we want
|
|
// to save the page.
|
|
//
|
|
|
|
if (SavePage) {
|
|
|
|
ActivePage = (ULONG)( Vacb->Overlay.FileOffset.QuadPart >> PAGE_SHIFT ) +
|
|
(ULONG)(((PCHAR)CacheBuffer - (PCHAR)Vacb->BaseAddress) >>
|
|
PAGE_SHIFT);
|
|
|
|
PFileOffset.LowPart += ReceivedLength;
|
|
|
|
//
|
|
// If the cache page was not locked, then clear the address
|
|
// to zero from.
|
|
//
|
|
|
|
if (Status == STATUS_CACHE_PAGE_LOCKED) {
|
|
|
|
//
|
|
// We need to guarantee this Vacb for zeroing and calling
|
|
// MmUnlockCachedPage, so we increment the active count here
|
|
// and remember it for CcFreeActiveVacb.
|
|
//
|
|
|
|
CcAcquireVacbLock( &OldIrql );
|
|
Vacb->Overlay.ActiveCount += 1;
|
|
|
|
ExAcquireSpinLockAtDpcLevel( &SharedCacheMap->ActiveVacbSpinLock );
|
|
|
|
ASSERT(SharedCacheMap->NeedToZero == NULL);
|
|
|
|
SharedCacheMap->NeedToZero = (PVOID)((PCHAR)CacheBuffer +
|
|
(PFileOffset.LowPart & (PAGE_SIZE - 1)));
|
|
SharedCacheMap->NeedToZeroPage = ActivePage;
|
|
SharedCacheMap->NeedToZeroVacb = Vacb;
|
|
|
|
ExReleaseSpinLockFromDpcLevel( &SharedCacheMap->ActiveVacbSpinLock );
|
|
CcReleaseVacbLock( OldIrql );
|
|
|
|
}
|
|
|
|
SetActiveVacb( SharedCacheMap,
|
|
OldIrql,
|
|
Vacb,
|
|
ActivePage,
|
|
ACTIVE_PAGE_IS_DIRTY );
|
|
|
|
try_return( NOTHING );
|
|
}
|
|
|
|
//
|
|
// If it looks like we may save a page and exit on the next loop,
|
|
// then we must make sure to mark the current page dirty. Note
|
|
// that Cc[Fast]CopyWrite will finish the last part of any page
|
|
// before allowing us to free the Active Vacb above, therefore
|
|
// this case only occurs for a small random write.
|
|
//
|
|
|
|
if ((SavedTotalLength <= (PAGE_SIZE / 2)) && !WriteThrough) {
|
|
|
|
CcSetDirtyInMask( SharedCacheMap, &PFileOffset, ReceivedLength );
|
|
}
|
|
|
|
UserBuffer = (PVOID)((PCHAR)UserBuffer + (PAGE_SIZE - PageOffset));
|
|
PageOffset = 0;
|
|
|
|
//
|
|
// If there is more than a page to go (including what we just
|
|
// copied), then adjust our buffer pointer and counts, and
|
|
// determine if we are to the last page yet.
|
|
//
|
|
|
|
if (MorePages) {
|
|
|
|
CacheBuffer = (PCHAR)CacheBuffer + PAGE_SIZE;
|
|
ReceivedLength -= PAGE_SIZE;
|
|
|
|
//
|
|
// Update our offset to the page. Note that 32-bit
|
|
// add is ok since we cannot cross a Vacb boundary
|
|
// and we reinitialize this offset before entering
|
|
// this loop again.
|
|
//
|
|
|
|
PFileOffset.LowPart += PAGE_SIZE;
|
|
|
|
if (ReceivedLength > PAGE_SIZE) {
|
|
ZeroCase = ZERO_MIDDLE_PAGES;
|
|
} else {
|
|
ZeroCase = ZERO_LAST_PAGE;
|
|
}
|
|
|
|
} else {
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
//
|
|
// If there is still more to write (ie. we are going to step
|
|
// onto the next vacb) AND we just dirtied more than 64K, then
|
|
// do a vicarious MmFlushSection here. This prevents us from
|
|
// creating unlimited dirty pages while holding the file
|
|
// resource exclusive. We also do not need to set the pages
|
|
// dirty in the mask in this case.
|
|
//
|
|
|
|
if (Length > CcMaxDirtyWrite) {
|
|
|
|
MmSetAddressRangeModified( SavedMappedBuffer, SavedMappedLength );
|
|
MmFlushSection( SharedCacheMap->FileObject->SectionObjectPointer,
|
|
&LocalOffset,
|
|
SavedMappedLength,
|
|
&IoStatus,
|
|
TRUE );
|
|
|
|
if (!NT_SUCCESS(IoStatus.Status)) {
|
|
ExRaiseStatus( FsRtlNormalizeNtstatus( IoStatus.Status,
|
|
STATUS_UNEXPECTED_IO_ERROR ));
|
|
}
|
|
|
|
//
|
|
// For write through files, call Mm to propagate the dirty bits
|
|
// here while we have the view mapped, so we know the flush will
|
|
// work below. Again - do not set dirty in the mask.
|
|
//
|
|
|
|
} else if (WriteThrough) {
|
|
|
|
MmSetAddressRangeModified( SavedMappedBuffer, SavedMappedLength );
|
|
|
|
//
|
|
// For the normal case, just set the pages dirty for the Lazy Writer
|
|
// now.
|
|
//
|
|
|
|
} else {
|
|
|
|
CcSetDirtyInMask( SharedCacheMap, &LocalOffset, SavedMappedLength );
|
|
}
|
|
|
|
CcFreeVirtualAddress( Vacb );
|
|
Vacb = NULL;
|
|
|
|
//
|
|
// If we have to loop back to get at least a page, it will be ok to
|
|
// zero the first page. If we are not getting at least a page, we
|
|
// must make sure we clear the ZeroFlags if we cannot zero the last
|
|
// page.
|
|
//
|
|
|
|
if (Length >= PAGE_SIZE) {
|
|
ZeroFlags |= ZERO_FIRST_PAGE;
|
|
} else if ((ZeroFlags & ZERO_LAST_PAGE) == 0) {
|
|
ZeroFlags = 0;
|
|
}
|
|
|
|
//
|
|
// Note that if ReceivedLength (and therefore SavedMappedLength)
|
|
// was truncated to the transfer size then the new LocalOffset
|
|
// computed below is not correct. This is not an issue since
|
|
// in that case (Length == 0) and we would never get here.
|
|
//
|
|
|
|
LocalOffset.QuadPart = LocalOffset.QuadPart + (LONGLONG)SavedMappedLength;
|
|
}
|
|
try_exit: NOTHING;
|
|
}
|
|
|
|
//
|
|
// Cleanup on the way out.
|
|
//
|
|
|
|
finally {
|
|
|
|
MmResetPageFaultReadAhead( Thread, SavedState );
|
|
|
|
//
|
|
// We have no work to do if we have squirreled away the Vacb.
|
|
//
|
|
|
|
if (!SavePage || AbnormalTermination()) {
|
|
|
|
//
|
|
// Make sure we do not leave anything mapped or dirty in the PTE
|
|
// on the way out.
|
|
//
|
|
|
|
if (Vacb != NULL) {
|
|
|
|
CcFreeVirtualAddress( Vacb );
|
|
}
|
|
|
|
//
|
|
// Either flush the whole range because of write through, or
|
|
// mark it dirty for the lazy writer.
|
|
//
|
|
|
|
if (WriteThrough) {
|
|
|
|
MmFlushSection ( SharedCacheMap->FileObject->SectionObjectPointer,
|
|
FileOffset,
|
|
SavedTotalLength,
|
|
&IoStatus,
|
|
TRUE );
|
|
|
|
if (!NT_SUCCESS(IoStatus.Status)) {
|
|
ExRaiseStatus( FsRtlNormalizeNtstatus( IoStatus.Status,
|
|
STATUS_UNEXPECTED_IO_ERROR ));
|
|
}
|
|
|
|
//
|
|
// Advance ValidDataGoal
|
|
//
|
|
|
|
LocalOffset.QuadPart = FileOffset->QuadPart + (LONGLONG)SavedTotalLength;
|
|
if (LocalOffset.QuadPart > SharedCacheMap->ValidDataGoal.QuadPart) {
|
|
SharedCacheMap->ValidDataGoal = LocalOffset;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
DebugTrace(-1, me, "CcMapAndCopy -> %02lx\n", Result );
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
BOOLEAN
|
|
CcLogError(
|
|
IN PFILE_OBJECT FileObject,
|
|
IN PUNICODE_STRING FileName,
|
|
IN NTSTATUS Error,
|
|
IN NTSTATUS DeviceError,
|
|
IN UCHAR IrpMajorCode
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine writes an eventlog entry to the eventlog.
|
|
|
|
Arguments:
|
|
|
|
FileObject - The fileobject in whose context the error occured.
|
|
|
|
FileName - The filename to use in logging the error (usually the DOS-side name)
|
|
|
|
Error - The error to log in the eventlog record
|
|
|
|
DeviceError - The actual error that occured in the device - will be logged
|
|
as user data
|
|
|
|
Return Value:
|
|
|
|
True if successful, false if internal memory allocation failed
|
|
|
|
--*/
|
|
|
|
{
|
|
UCHAR ErrorPacketLength;
|
|
UCHAR BasePacketLength;
|
|
ULONG StringLength;
|
|
PIO_ERROR_LOG_PACKET ErrorLogEntry = NULL;
|
|
BOOLEAN Result = FALSE;
|
|
PWCHAR String;
|
|
|
|
PAGED_CODE();
|
|
|
|
//
|
|
// Get our error packet, holding the string and status code. Note we log against the
|
|
// true filesystem if this is available.
|
|
//
|
|
// The sizing of the packet is a bit slimy since the dumpdata is already grown by a
|
|
// ULONG onto the end of the packet. Since NTSTATUS is ULONG, well, we just work in
|
|
// place.
|
|
//
|
|
|
|
BasePacketLength = sizeof(IO_ERROR_LOG_PACKET);
|
|
if ((BasePacketLength + FileName->Length + sizeof(WCHAR)) <= ERROR_LOG_MAXIMUM_SIZE) {
|
|
ErrorPacketLength = (UCHAR)(BasePacketLength + FileName->Length + sizeof(WCHAR));
|
|
} else {
|
|
ErrorPacketLength = ERROR_LOG_MAXIMUM_SIZE;
|
|
}
|
|
|
|
ErrorLogEntry = (PIO_ERROR_LOG_PACKET) IoAllocateErrorLogEntry( (FileObject->Vpb ?
|
|
FileObject->Vpb->DeviceObject :
|
|
FileObject->DeviceObject),
|
|
ErrorPacketLength );
|
|
if (ErrorLogEntry) {
|
|
|
|
//
|
|
// Fill in the nonzero members of the packet.
|
|
//
|
|
|
|
ErrorLogEntry->MajorFunctionCode = IrpMajorCode;
|
|
ErrorLogEntry->ErrorCode = Error;
|
|
ErrorLogEntry->FinalStatus = DeviceError;
|
|
|
|
ErrorLogEntry->DumpDataSize = sizeof(NTSTATUS);
|
|
RtlCopyMemory( &ErrorLogEntry->DumpData, &DeviceError, sizeof(NTSTATUS) );
|
|
|
|
//
|
|
// The filename string is appended to the end of the error log entry. We may
|
|
// have to smash the middle to fit it in the limited space.
|
|
//
|
|
|
|
StringLength = ErrorPacketLength - BasePacketLength - sizeof(WCHAR);
|
|
|
|
ASSERT(!(StringLength % sizeof(WCHAR)));
|
|
|
|
String = (PWCHAR) ((PUCHAR)ErrorLogEntry + BasePacketLength);
|
|
ErrorLogEntry->NumberOfStrings = 1;
|
|
ErrorLogEntry->StringOffset = BasePacketLength;
|
|
|
|
//
|
|
// If the name does not fit in the packet, divide the name equally to the
|
|
// prefix and suffix, with an ellipsis " .. " (4 wide characters) to indicate
|
|
// the loss.
|
|
//
|
|
|
|
if (StringLength < FileName->Length) {
|
|
|
|
//
|
|
// Remember, prefix + " .. " + suffix is the length. Calculate by figuring
|
|
// the prefix and then get the suffix by whacking the ellipsis and prefix off
|
|
// the total.
|
|
//
|
|
|
|
ULONG NamePrefixSegmentLength = ((StringLength/sizeof(WCHAR))/2 - 2)*sizeof(WCHAR);
|
|
ULONG NameSuffixSegmentLength = StringLength - 4*sizeof(WCHAR) - NamePrefixSegmentLength;
|
|
|
|
ASSERT(!(NamePrefixSegmentLength % sizeof(WCHAR)));
|
|
ASSERT(!(NameSuffixSegmentLength % sizeof(WCHAR)));
|
|
|
|
RtlCopyMemory( String,
|
|
FileName->Buffer,
|
|
NamePrefixSegmentLength );
|
|
String = (PWCHAR)((PCHAR)String + NamePrefixSegmentLength);
|
|
|
|
RtlCopyMemory( String,
|
|
L" .. ",
|
|
4*sizeof(WCHAR) );
|
|
String += 4;
|
|
|
|
RtlCopyMemory( String,
|
|
(PUCHAR)FileName->Buffer +
|
|
FileName->Length - NameSuffixSegmentLength,
|
|
NameSuffixSegmentLength );
|
|
String = (PWCHAR)((PCHAR)String + NameSuffixSegmentLength);
|
|
|
|
} else {
|
|
|
|
RtlCopyMemory( String,
|
|
FileName->Buffer,
|
|
FileName->Length );
|
|
String += FileName->Length/sizeof(WCHAR);
|
|
}
|
|
|
|
//
|
|
// Null terminate the string and send the packet.
|
|
//
|
|
|
|
*String = L'\0';
|
|
|
|
IoWriteErrorLogEntry( ErrorLogEntry );
|
|
Result = TRUE;
|
|
}
|
|
|
|
return Result;
|
|
}
|
|
|
|
|
|
LOGICAL
|
|
CcHasInactiveViews (
|
|
VOID
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is called by Memory Management only to query if the system
|
|
cache has any inactive views. If so, Memory Management may issue a
|
|
subsequent call to CcUnmapInactiveViews to discard these views in an
|
|
attempt to reclaim the prototype PTE pool (and other resources tied to
|
|
the section).
|
|
|
|
Arguments:
|
|
|
|
None.
|
|
|
|
Return Value:
|
|
|
|
TRUE if Cc has any views it can discard, FALSE if not.
|
|
|
|
Environment:
|
|
|
|
Arbitrary thread context, generally APC_LEVEL or DISPATCH_LEVEL. Various
|
|
mutexes and/or spinlocks may be held by the caller.
|
|
|
|
--*/
|
|
|
|
{
|
|
return FALSE; // BUGBUG - add code to flesh out.
|
|
}
|
|
|
|
|
|
LOGICAL
|
|
CcUnmapInactiveViews (
|
|
IN ULONG NumberOfViewsToUnmap
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is called by Memory Management to request that the cache
|
|
manager unmap a number of inactive views. This call is generally made
|
|
because the system is low on pool (paged or nonpaged).
|
|
|
|
Discarding these views is done in an attempt to reclaim the prototype
|
|
PTE pool (and other resources tied to the section).
|
|
|
|
Arguments:
|
|
|
|
NumberOfViewsToUnmap - Supplies the desired number of views to unmap.
|
|
|
|
Return Value:
|
|
|
|
TRUE if Cc discarded *ANY* views, FALSE if not.
|
|
|
|
Environment:
|
|
|
|
Dereference segment thread context at PASSIVE_LEVEL.
|
|
|
|
--*/
|
|
|
|
{
|
|
UNREFERENCED_PARAMETER (NumberOfViewsToUnmap);
|
|
|
|
return FALSE; // BUGBUG - add code to flesh out.
|
|
}
|
|
|
|
#ifdef CCDBG
|
|
VOID
|
|
CcDump (
|
|
IN PVOID Ptr
|
|
)
|
|
|
|
{
|
|
PVOID Junk = Ptr;
|
|
}
|
|
#endif
|
|
|
|
|