mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3434 lines
98 KiB
3434 lines
98 KiB
/*++
|
|
|
|
Copyright (c) 1991 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
LogSup.c
|
|
|
|
Abstract:
|
|
|
|
This module implements the Ntfs interfaces to the Log File Service (LFS).
|
|
|
|
Author:
|
|
|
|
Tom Miller [TomM] 24-Jul-1991
|
|
|
|
Revision History:
|
|
|
|
--*/
|
|
|
|
#include "NtfsProc.h"
|
|
|
|
//
|
|
// The local debug trace level
|
|
//
|
|
|
|
#define Dbg (DEBUG_TRACE_LOGSUP)
|
|
|
|
//
|
|
// Define a tag for general pool allocations from this module
|
|
//
|
|
|
|
#undef MODULE_POOL_TAG
|
|
#define MODULE_POOL_TAG ('LFtN')
|
|
|
|
#ifdef NTFSDBG
|
|
|
|
#define ASSERT_RESTART_TABLE(T) { \
|
|
PULONG _p = (PULONG)(((PCHAR)(T)) + sizeof(RESTART_TABLE)); \
|
|
ULONG _Count = ((T)->EntrySize/4) * (T)->NumberEntries; \
|
|
ULONG _i; \
|
|
for (_i = 0; _i < _Count; _i += 1) { \
|
|
if (_p[_i] == 0xDAADF00D) { \
|
|
DbgPrint("DaadFood for table %08lx, At %08lx\n", (T), &_p[_i]); \
|
|
ASSERTMSG("ASSERT_RESTART_TABLE ", FALSE); \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
#else
|
|
|
|
#define ASSERT_RESTART_TABLE(T) {NOTHING;}
|
|
|
|
#endif
|
|
|
|
//
|
|
// Local procedure prototypes
|
|
//
|
|
|
|
typedef LCN UNALIGNED *PLCN_UNALIGNED;
|
|
|
|
VOID
|
|
DirtyPageRoutine (
|
|
IN PFILE_OBJECT FileObject,
|
|
IN PLARGE_INTEGER FileOffset,
|
|
IN ULONG Length,
|
|
IN PLSN OldestLsn,
|
|
IN PLSN NewestLsn,
|
|
IN PVOID Context1,
|
|
IN PVOID Context2
|
|
);
|
|
|
|
VOID
|
|
LookupLcns (
|
|
IN PIRP_CONTEXT IrpContext,
|
|
IN PSCB Scb,
|
|
IN VCN Vcn,
|
|
IN ULONG ClusterCount,
|
|
IN BOOLEAN MustBeAllocated,
|
|
OUT PLCN_UNALIGNED FirstLcn
|
|
);
|
|
|
|
#ifdef ALLOC_PRAGMA
|
|
#pragma alloc_text(PAGE, LookupLcns)
|
|
#pragma alloc_text(PAGE, NtfsCheckpointCurrentTransaction)
|
|
#pragma alloc_text(PAGE, NtfsCheckpointForLogFileFull)
|
|
#pragma alloc_text(PAGE, NtfsCheckpointVolume)
|
|
#pragma alloc_text(PAGE, NtfsCommitCurrentTransaction)
|
|
#pragma alloc_text(PAGE, NtfsFreeRestartTable)
|
|
#pragma alloc_text(PAGE, NtfsGetFirstRestartTable)
|
|
#pragma alloc_text(PAGE, NtfsGetNextRestartTable)
|
|
#pragma alloc_text(PAGE, NtfsInitializeLogging)
|
|
#pragma alloc_text(PAGE, NtfsInitializeRestartTable)
|
|
#pragma alloc_text(PAGE, NtfsStartLogFile)
|
|
#pragma alloc_text(PAGE, NtfsStopLogFile)
|
|
#pragma alloc_text(PAGE, NtfsWriteLog)
|
|
#endif
|
|
|
|
|
|
LSN
|
|
NtfsWriteLog (
|
|
IN PIRP_CONTEXT IrpContext,
|
|
IN PSCB Scb,
|
|
IN PBCB Bcb OPTIONAL,
|
|
IN NTFS_LOG_OPERATION RedoOperation,
|
|
IN PVOID RedoBuffer OPTIONAL,
|
|
IN ULONG RedoLength,
|
|
IN NTFS_LOG_OPERATION UndoOperation,
|
|
IN PVOID UndoBuffer OPTIONAL,
|
|
IN ULONG UndoLength,
|
|
IN LONGLONG StreamOffset,
|
|
IN ULONG RecordOffset,
|
|
IN ULONG AttributeOffset,
|
|
IN ULONG StructureSize
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine implements an Ntfs-specific interface to LFS for the
|
|
purpose of logging updates to file record segments and resident
|
|
attributes.
|
|
|
|
The caller creates one of the predefined log record formats as
|
|
determined by the given LogOperation, and calls this routine with
|
|
this log record and pointers to the respective file and attribute
|
|
records. The list of log operations along with the respective structure
|
|
expected for the Log Buffer is present in ntfslog.h.
|
|
|
|
Arguments:
|
|
|
|
Scb - Pointer to the Scb for the respective file or Mft. The caller must
|
|
have at least shared access to this Scb.
|
|
|
|
Bcb - If specified, this Bcb will be set dirty specifying the Lsn of
|
|
the log record written.
|
|
|
|
RedoOperation - One of the log operation codes defined in ntfslog.h.
|
|
|
|
RedoBuffer - A pointer to the structure expected for the given Redo operation,
|
|
as summarized in ntfslog.h. This pointer should only be
|
|
omitted if and only if the table in ntfslog.h does not show
|
|
a log record for this log operation.
|
|
|
|
RedoLength - Length of the Redo buffer in bytes.
|
|
|
|
UndoOperation - One of the log operation codes defined in ntfslog.h.
|
|
|
|
Must be CompensationLogRecord if logging the Undo of
|
|
a previous operation, such as during transaction abort.
|
|
In this case, of course, the Redo information is from
|
|
the Undo information of the record being undone. See
|
|
next parameter.
|
|
|
|
UndoBuffer - A pointer to the structure expected for the given Undo operation,
|
|
as summarized in ntfslog.h. This pointer should only be
|
|
omitted if and only if the table in ntfslog.h does not show
|
|
a log record for this log operation. If this pointer is
|
|
identical to RedoBuffer, then UndoLength is ignored and
|
|
only a single copy of the RedoBuffer is made, but described
|
|
by both the Redo and Undo portions of the log record.
|
|
|
|
For a compensation log record (UndoOperation ==
|
|
CompensationLogRecord), this argument must point to the
|
|
UndoNextLsn of the log record being compensated.
|
|
|
|
UndoLength - Length of the Undo buffer in bytes. Ignored if RedoBuffer ==
|
|
UndoBuffer.
|
|
|
|
For a compensation log record, this argument must be the length
|
|
of the original redo record. (Used during restart).
|
|
|
|
StreamOffset - Offset within the stream for the start of the structure being
|
|
modified (Mft or Index), or simply the stream offset for the start
|
|
of the update.
|
|
|
|
RecordOffset - Byte offset from StreamOffset above to update reference
|
|
|
|
AttributeOffset - Offset within a value to which an update applies, if relevant.
|
|
|
|
StructureSize - Size of the entire structure being logged.
|
|
|
|
Return Value:
|
|
|
|
The Lsn of the log record written. For most callers, this status may be ignored,
|
|
because the Lsn is also correctly recorded in the transaction context.
|
|
|
|
If an error occurs this procedure will raise.
|
|
|
|
--*/
|
|
|
|
{
|
|
LFS_WRITE_ENTRY WriteEntries[3];
|
|
|
|
struct {
|
|
|
|
NTFS_LOG_RECORD_HEADER LogRecordHeader;
|
|
LCN Runs[PAGE_SIZE/512 - 1];
|
|
|
|
} LocalHeader;
|
|
|
|
PNTFS_LOG_RECORD_HEADER MyHeader;
|
|
PVCB Vcb;
|
|
|
|
LSN UndoNextLsn;
|
|
LSN ReturnLsn;
|
|
PLSN DirtyLsn = NULL;
|
|
|
|
ULONG WriteIndex = 0;
|
|
ULONG UndoIndex = 0;
|
|
ULONG RedoIndex = 0;
|
|
LONG UndoBytes = 0;
|
|
LONG UndoAdjustmentForLfs = 0;
|
|
LONG UndoRecords = 0;
|
|
|
|
PTRANSACTION_ENTRY TransactionEntry;
|
|
POPEN_ATTRIBUTE_ENTRY OpenAttributeEntry = NULL;
|
|
ULONG OpenAttributeIndex = 0;
|
|
BOOLEAN AttributeTableAcquired = FALSE;
|
|
BOOLEAN TransactionTableAcquired = FALSE;
|
|
|
|
ULONG LogClusterCount = ClustersFromBytes( Scb->Vcb, StructureSize );
|
|
VCN LogVcn = LlClustersFromBytesTruncate( Scb->Vcb, StreamOffset );
|
|
|
|
PAGED_CODE();
|
|
|
|
Vcb = Scb->Vcb;
|
|
|
|
//
|
|
// If the log handle is gone, then we noop this call.
|
|
//
|
|
|
|
if (!FlagOn( Vcb->VcbState, VCB_STATE_VALID_LOG_HANDLE )) {
|
|
|
|
return Li0; //**** LfsZeroLsn;
|
|
}
|
|
|
|
DebugTrace( +1, Dbg, ("NtfsWriteLog:\n") );
|
|
DebugTrace( 0, Dbg, ("Scb = %08lx\n", Scb) );
|
|
DebugTrace( 0, Dbg, ("Bcb = %08lx\n", Bcb) );
|
|
DebugTrace( 0, Dbg, ("RedoOperation = %08lx\n", RedoOperation) );
|
|
DebugTrace( 0, Dbg, ("RedoBuffer = %08lx\n", RedoBuffer) );
|
|
DebugTrace( 0, Dbg, ("RedoLength = %08lx\n", RedoLength) );
|
|
DebugTrace( 0, Dbg, ("UndoOperation = %08lx\n", UndoOperation) );
|
|
DebugTrace( 0, Dbg, ("UndoBuffer = %08lx\n", UndoBuffer) );
|
|
DebugTrace( 0, Dbg, ("UndoLength = %08lx\n", UndoLength) );
|
|
DebugTrace( 0, Dbg, ("StreamOffset = %016I64x\n", StreamOffset) );
|
|
DebugTrace( 0, Dbg, ("RecordOffset = %08lx\n", RecordOffset) );
|
|
DebugTrace( 0, Dbg, ("AttributeOffset = %08lx\n", AttributeOffset) );
|
|
DebugTrace( 0, Dbg, ("StructureSize = %08lx\n", StructureSize) );
|
|
|
|
//
|
|
// Check Redo and Undo lengths
|
|
//
|
|
|
|
ASSERT(((RedoOperation == UpdateNonresidentValue) && (RedoLength <= PAGE_SIZE))
|
|
|
|
||
|
|
|
|
!ARGUMENT_PRESENT(Scb)
|
|
|
|
||
|
|
|
|
!ARGUMENT_PRESENT(Bcb)
|
|
|
|
||
|
|
|
|
((Scb->AttributeTypeCode == $INDEX_ALLOCATION) &&
|
|
(RedoLength <= Scb->ScbType.Index.BytesPerIndexBuffer))
|
|
|
|
||
|
|
|
|
(RedoLength <= Scb->Vcb->BytesPerFileRecordSegment));
|
|
|
|
ASSERT(((UndoOperation == UpdateNonresidentValue) && (UndoLength <= PAGE_SIZE))
|
|
|
|
||
|
|
|
|
!ARGUMENT_PRESENT(Scb)
|
|
|
|
||
|
|
|
|
!ARGUMENT_PRESENT(Bcb)
|
|
|
|
||
|
|
|
|
((Scb->AttributeTypeCode == $INDEX_ALLOCATION) &&
|
|
(UndoLength <= Scb->ScbType.Index.BytesPerIndexBuffer))
|
|
|
|
||
|
|
|
|
(UndoLength <= Scb->Vcb->BytesPerFileRecordSegment)
|
|
|
|
||
|
|
|
|
(UndoOperation == CompensationLogRecord));
|
|
|
|
//
|
|
// Initialize local pointers.
|
|
//
|
|
|
|
MyHeader = (PNTFS_LOG_RECORD_HEADER)&LocalHeader;
|
|
|
|
try {
|
|
|
|
//
|
|
// If the structure size is nonzero, then create an open attribute table
|
|
// entry.
|
|
//
|
|
|
|
if (StructureSize != 0) {
|
|
|
|
//
|
|
// Allocate an entry in the open attribute table and initialize it,
|
|
// if it does not already exist. If we subsequently fail, we do
|
|
// not have to clean this up. It will go away on the next checkpoint.
|
|
//
|
|
|
|
if (Scb->NonpagedScb->OpenAttributeTableIndex == 0) {
|
|
|
|
OPEN_ATTRIBUTE_ENTRY LocalOpenEntry;
|
|
|
|
NtfsAcquireExclusiveRestartTable( &Vcb->OpenAttributeTable, TRUE );
|
|
AttributeTableAcquired = TRUE;
|
|
|
|
//
|
|
// Only proceed if the OpenAttributeTableIndex is still 0.
|
|
// We may reach this point for the MftScb. It may not be
|
|
// acquired when logging changes to file records. We will
|
|
// use the OpenAttributeTable for final synchronization
|
|
// for the Mft open attribute table entry.
|
|
//
|
|
|
|
if (Scb->NonpagedScb->OpenAttributeTableIndex == 0) {
|
|
|
|
//
|
|
// Our structures require tables to stay within 64KB, since
|
|
// we use USHORT offsets. Things are getting out of hand
|
|
// at this point anyway. Raise log file full to reset the
|
|
// table sizes if we get to this point.
|
|
//
|
|
|
|
if (SizeOfRestartTable(&Vcb->OpenAttributeTable) > 0xF000) {
|
|
NtfsRaiseStatus( IrpContext, STATUS_LOG_FILE_FULL, NULL, NULL );
|
|
}
|
|
|
|
Scb->NonpagedScb->OpenAttributeTableIndex =
|
|
OpenAttributeIndex = NtfsAllocateRestartTableIndex( &Vcb->OpenAttributeTable );
|
|
OpenAttributeEntry = GetRestartEntryFromIndex( &Vcb->OpenAttributeTable,
|
|
OpenAttributeIndex );
|
|
OpenAttributeEntry->Overlay.Scb = Scb;
|
|
OpenAttributeEntry->FileReference = Scb->Fcb->FileReference;
|
|
// OpenAttributeEntry->LsnOfOpenRecord = ???
|
|
OpenAttributeEntry->AttributeTypeCode = Scb->AttributeTypeCode;
|
|
OpenAttributeEntry->AttributeName = Scb->AttributeName;
|
|
OpenAttributeEntry->AttributeNamePresent = FALSE;
|
|
if (Scb->AttributeTypeCode == $INDEX_ALLOCATION) {
|
|
|
|
OpenAttributeEntry->BytesPerIndexBuffer = Scb->ScbType.Index.BytesPerIndexBuffer;
|
|
|
|
} else {
|
|
OpenAttributeEntry->BytesPerIndexBuffer = 0;
|
|
}
|
|
|
|
RtlMoveMemory( &LocalOpenEntry, OpenAttributeEntry, sizeof(OPEN_ATTRIBUTE_ENTRY) );
|
|
|
|
NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
|
|
AttributeTableAcquired = FALSE;
|
|
|
|
//
|
|
// Now log the new open attribute table entry before goin on,
|
|
// to insure that the application of the caller's log record
|
|
// will have the information he needs on the attribute. We will
|
|
// use the Undo buffer to convey the attribute name. We will
|
|
// not infinitely recurse, because now this Scb already has an
|
|
// open attribute table index.
|
|
//
|
|
|
|
NtfsWriteLog( IrpContext,
|
|
Scb,
|
|
NULL,
|
|
OpenNonresidentAttribute,
|
|
&LocalOpenEntry,
|
|
sizeof(OPEN_ATTRIBUTE_ENTRY),
|
|
Noop,
|
|
Scb->AttributeName.Length != 0 ?
|
|
Scb->AttributeName.Buffer : NULL,
|
|
Scb->AttributeName.Length,
|
|
(LONGLONG)0,
|
|
0,
|
|
0,
|
|
0 );
|
|
|
|
} else {
|
|
|
|
NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
|
|
AttributeTableAcquired = FALSE;
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Allocate a transaction ID and initialize it, if it does not already exist.
|
|
// If we subsequently fail, we clean it up when the current request is
|
|
// completed.
|
|
//
|
|
|
|
if (IrpContext->TransactionId == 0) {
|
|
|
|
NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
|
|
TransactionTableAcquired = TRUE;
|
|
|
|
//
|
|
// Our structures require tables to stay within 64KB, since
|
|
// we use USHORT offsets. Things are getting out of hand
|
|
// at this point anyway. Raise log file full to reset the
|
|
// table sizes if we get to this point.
|
|
//
|
|
|
|
if (SizeOfRestartTable(&Vcb->TransactionTable) > 0xF000) {
|
|
NtfsRaiseStatus( IrpContext, STATUS_LOG_FILE_FULL, NULL, NULL );
|
|
}
|
|
|
|
IrpContext->TransactionId =
|
|
NtfsAllocateRestartTableIndex( &Vcb->TransactionTable );
|
|
|
|
ClearFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_WROTE_LOG );
|
|
|
|
TransactionEntry = (PTRANSACTION_ENTRY)GetRestartEntryFromIndex(
|
|
&Vcb->TransactionTable,
|
|
IrpContext->TransactionId );
|
|
TransactionEntry->TransactionState = TransactionActive;
|
|
TransactionEntry->FirstLsn =
|
|
TransactionEntry->PreviousLsn =
|
|
TransactionEntry->UndoNextLsn = Li0; //**** LfsZeroLsn;
|
|
|
|
//
|
|
// Remember that we will need a commit record even if we abort
|
|
// the transaction.
|
|
//
|
|
|
|
TransactionEntry->UndoBytes = QuadAlign( sizeof( NTFS_LOG_RECORD_HEADER ));
|
|
TransactionEntry->UndoRecords = 1;
|
|
|
|
NtfsReleaseRestartTable( &Vcb->TransactionTable );
|
|
TransactionTableAcquired = FALSE;
|
|
|
|
//
|
|
// Remember the space for the commit record in our Lfs adjustment.
|
|
//
|
|
|
|
UndoAdjustmentForLfs += QuadAlign( sizeof( NTFS_LOG_RECORD_HEADER ));
|
|
|
|
//
|
|
// If there is an undo operation for this log record, we reserve
|
|
// the space for another Lfs log record.
|
|
//
|
|
|
|
if (UndoOperation != Noop) {
|
|
UndoAdjustmentForLfs += Vcb->LogHeaderReservation;
|
|
}
|
|
}
|
|
|
|
//
|
|
// At least for now, assume update is contained in one physical page.
|
|
//
|
|
|
|
//ASSERT( (StructureSize == 0) || (StructureSize <= PAGE_SIZE) );
|
|
|
|
//
|
|
// If there isn't enough room for this structure on the stack, we
|
|
// need to allocate an auxilary buffer.
|
|
//
|
|
|
|
if (LogClusterCount > (PAGE_SIZE / 512)) {
|
|
|
|
MyHeader = (PNTFS_LOG_RECORD_HEADER)
|
|
NtfsAllocatePool(PagedPool, sizeof( NTFS_LOG_RECORD_HEADER )
|
|
+ (LogClusterCount - 1) * sizeof( LCN ));
|
|
|
|
}
|
|
|
|
//
|
|
// Now fill in the WriteEntries array and the log record header.
|
|
//
|
|
|
|
WriteEntries[0].Buffer = (PVOID)MyHeader;
|
|
WriteEntries[0].ByteLength = sizeof(NTFS_LOG_RECORD_HEADER);
|
|
WriteIndex += 1;
|
|
|
|
//
|
|
// Lookup the Runs for this log record
|
|
//
|
|
|
|
MyHeader->LcnsToFollow = (USHORT)LogClusterCount;
|
|
|
|
if (LogClusterCount != 0) {
|
|
LookupLcns( IrpContext,
|
|
Scb,
|
|
LogVcn,
|
|
LogClusterCount,
|
|
TRUE,
|
|
&MyHeader->LcnsForPage[0] );
|
|
|
|
WriteEntries[0].ByteLength += (LogClusterCount - 1) * sizeof(LCN);
|
|
}
|
|
|
|
//
|
|
// If there is a Redo buffer, fill in its write entry.
|
|
//
|
|
|
|
if (RedoLength != 0) {
|
|
|
|
WriteEntries[1].Buffer = RedoBuffer;
|
|
WriteEntries[1].ByteLength = RedoLength;
|
|
UndoIndex = RedoIndex = WriteIndex;
|
|
WriteIndex += 1;
|
|
}
|
|
|
|
//
|
|
// If there is an undo buffer, and it is at a different address than
|
|
// the redo buffer, then fill in its write entry.
|
|
//
|
|
|
|
if ((RedoBuffer != UndoBuffer) && (UndoLength != 0) &&
|
|
(UndoOperation != CompensationLogRecord)) {
|
|
|
|
WriteEntries[WriteIndex].Buffer = UndoBuffer;
|
|
WriteEntries[WriteIndex].ByteLength = UndoLength;
|
|
UndoIndex = WriteIndex;
|
|
WriteIndex += 1;
|
|
}
|
|
|
|
//
|
|
// Now fill in the rest of the header. Assume Redo and Undo buffer is
|
|
// the same, then fix them up if they are not.
|
|
//
|
|
|
|
MyHeader->RedoOperation = (USHORT)RedoOperation;
|
|
MyHeader->UndoOperation = (USHORT)UndoOperation;
|
|
MyHeader->RedoOffset = (USHORT)WriteEntries[0].ByteLength;
|
|
MyHeader->RedoLength = (USHORT)RedoLength;
|
|
MyHeader->UndoOffset = MyHeader->RedoOffset;
|
|
if (RedoBuffer != UndoBuffer) {
|
|
MyHeader->UndoOffset += (USHORT)QuadAlign(MyHeader->RedoLength);
|
|
}
|
|
MyHeader->UndoLength = (USHORT)UndoLength;
|
|
MyHeader->TargetAttribute = (USHORT)Scb->NonpagedScb->OpenAttributeTableIndex;
|
|
MyHeader->RecordOffset = (USHORT)RecordOffset;
|
|
MyHeader->AttributeOffset = (USHORT)AttributeOffset;
|
|
MyHeader->Reserved = 0;
|
|
|
|
MyHeader->TargetVcn = LogVcn;
|
|
MyHeader->ClusterBlockOffset = (USHORT) LogBlocksFromBytesTruncate( ClusterOffset( Vcb, StreamOffset ));
|
|
|
|
//
|
|
// Finally, get our current transaction entry and call Lfs. We acquire
|
|
// the transaction table exclusive both to synchronize the Lsn updates
|
|
// on return from Lfs, and also to mark the Bcb dirty before any more
|
|
// log records are written.
|
|
//
|
|
// If we do not do serialize the LfsWrite and CcSetDirtyPinnedData, here is
|
|
// what can happen:
|
|
//
|
|
// We log an update for a page and get an Lsn back
|
|
//
|
|
// Another thread writes a start of checkpoint record
|
|
// This thread then collects all of the dirty pages at that time
|
|
// Sometime it writes the dirty page table
|
|
//
|
|
// The former thread which had been preempted, now sets the Bcb dirty
|
|
//
|
|
// If we crash at this time, the page we updated is not in the dirty page
|
|
// table of the checkpoint, and it its update record is also not seen since
|
|
// it was written before the start of the checkpoint!
|
|
//
|
|
// Note however, since the page being updated is pinned and cannot be written,
|
|
// updating the Lsn in the page may simply be considered part of the update.
|
|
// Whoever is doing this update (to the Mft or an Index buffer), must have the
|
|
// Mft or Index acquired exclusive anyway.
|
|
//
|
|
|
|
NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
|
|
TransactionTableAcquired = TRUE;
|
|
|
|
TransactionEntry = (PTRANSACTION_ENTRY)GetRestartEntryFromIndex(
|
|
&Vcb->TransactionTable,
|
|
IrpContext->TransactionId );
|
|
|
|
//
|
|
// Set up the UndoNextLsn. If this is a normal log record, then use
|
|
// the UndoNextLsn stored in the transaction entry; otherwise, use
|
|
// the one passed in as the Undo buffer.
|
|
//
|
|
|
|
if (UndoOperation != CompensationLogRecord) {
|
|
|
|
UndoNextLsn = TransactionEntry->UndoNextLsn;
|
|
|
|
//
|
|
// If there is undo information, calculate the number to pass to Lfs
|
|
// for undo bytes to reserve.
|
|
//
|
|
|
|
if (UndoOperation != Noop) {
|
|
|
|
UndoBytes += QuadAlign(WriteEntries[0].ByteLength);
|
|
|
|
if (UndoIndex != 0) {
|
|
|
|
UndoBytes += QuadAlign(WriteEntries[UndoIndex].ByteLength);
|
|
}
|
|
|
|
UndoRecords += 1;
|
|
}
|
|
|
|
} else {
|
|
|
|
UndoNextLsn = *(PLSN)UndoBuffer;
|
|
|
|
//
|
|
// We can reduce our Undo requirements, by the Redo data being
|
|
// logged. This is either an abort record for a previous action
|
|
// or a commit record. If it is a commit record we accounted
|
|
// for it above on the first NtfsWriteLog, and NtfsCommitTransaction
|
|
// will adjust for the rest.
|
|
//
|
|
|
|
if (!FlagOn( Vcb->VcbState, VCB_STATE_RESTART_IN_PROGRESS )) {
|
|
|
|
UndoBytes -= QuadAlign(WriteEntries[0].ByteLength);
|
|
|
|
if (RedoIndex != 0) {
|
|
|
|
UndoBytes -= QuadAlign(WriteEntries[RedoIndex].ByteLength);
|
|
}
|
|
|
|
UndoRecords -= 1;
|
|
}
|
|
}
|
|
|
|
#ifdef NTFSDBG
|
|
//
|
|
// Perform log-file-full fail checking. We do not perform this check if
|
|
// we are writing an undo record (since we are guaranteed space to undo
|
|
// things).
|
|
//
|
|
|
|
if (UndoOperation != CompensationLogRecord &&
|
|
(IrpContext->MajorFunction != IRP_MJ_FILE_SYSTEM_CONTROL ||
|
|
IrpContext->MinorFunction != IRP_MN_MOUNT_VOLUME)) {
|
|
//
|
|
// There should never be any log records during clean checkpoints
|
|
//
|
|
|
|
ASSERT( !FlagOn( IrpContext->Flags, IRP_CONTEXT_FLAG_CHECKPOINT_ACTIVE ));
|
|
|
|
LogFileFullFailCheck( IrpContext );
|
|
}
|
|
#endif
|
|
|
|
//
|
|
// Call Lfs to write the record.
|
|
//
|
|
|
|
LfsWrite( Vcb->LogHandle,
|
|
WriteIndex,
|
|
&WriteEntries[0],
|
|
LfsClientRecord,
|
|
&IrpContext->TransactionId,
|
|
UndoNextLsn,
|
|
TransactionEntry->PreviousLsn,
|
|
UndoBytes + UndoAdjustmentForLfs,
|
|
&ReturnLsn );
|
|
|
|
//
|
|
// Now that we are successful, update the transaction entry appropriately.
|
|
//
|
|
|
|
TransactionEntry->UndoBytes += UndoBytes;
|
|
TransactionEntry->UndoRecords += UndoRecords;
|
|
TransactionEntry->PreviousLsn = ReturnLsn;
|
|
|
|
//
|
|
// The UndoNextLsn for the transaction depends on whether we are
|
|
// doing a compensation log record or not.
|
|
//
|
|
|
|
if (UndoOperation != CompensationLogRecord) {
|
|
TransactionEntry->UndoNextLsn = ReturnLsn;
|
|
} else {
|
|
TransactionEntry->UndoNextLsn = UndoNextLsn;
|
|
}
|
|
|
|
//
|
|
// If this is the first Lsn, then we have to update that as
|
|
// well.
|
|
//
|
|
|
|
if (TransactionEntry->FirstLsn.QuadPart == 0) {
|
|
|
|
TransactionEntry->FirstLsn = ReturnLsn;
|
|
}
|
|
|
|
//
|
|
// Set to use this Lsn when marking dirty below
|
|
//
|
|
|
|
DirtyLsn = &ReturnLsn;
|
|
|
|
//
|
|
// Set the flag in the Irp Context which indicates we wrote
|
|
// a log record to disk.
|
|
//
|
|
|
|
SetFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_WROTE_LOG );
|
|
|
|
} finally {
|
|
|
|
DebugUnwind( NtfsWriteLog );
|
|
|
|
//
|
|
// Now set the Bcb dirty if specified. We want to set it no matter
|
|
// what happens, because our caller has modified the buffer and is
|
|
// counting on us to call the Cache Manager.
|
|
//
|
|
|
|
if (ARGUMENT_PRESENT(Bcb)) {
|
|
|
|
TIMER_STATUS TimerStatus;
|
|
|
|
CcSetDirtyPinnedData( Bcb, DirtyLsn );
|
|
|
|
//
|
|
// Synchronize with the checkpoint timer and other instances of this routine.
|
|
//
|
|
// Perform an interlocked exchange to indicate that a timer is being set.
|
|
//
|
|
// If the previous value indicates that no timer was set, then we
|
|
// enable the volume checkpoint timer. This will guarantee that a checkpoint
|
|
// will occur to flush out the dirty Bcb data.
|
|
//
|
|
// If the timer was set previously, then it is guaranteed that a checkpoint
|
|
// will occur without this routine having to reenable the timer.
|
|
//
|
|
// If the timer and checkpoint occurred between the dirtying of the Bcb and
|
|
// the setting of the timer status, then we will be queueing a single extra
|
|
// checkpoint on a clean volume. This is not considered harmful.
|
|
//
|
|
|
|
//
|
|
// Atomically set the timer status to indicate a timer is being set and
|
|
// retrieve the previous value.
|
|
//
|
|
|
|
TimerStatus = InterlockedExchange( &NtfsData.TimerStatus, TIMER_SET );
|
|
|
|
//
|
|
// If the timer is not currently set then we must start the checkpoint timer
|
|
// to make sure the above dirtying is flushed out.
|
|
//
|
|
|
|
if (TimerStatus == TIMER_NOT_SET) {
|
|
|
|
LONGLONG FiveSecondsFromNow = -5*1000*1000*10;
|
|
|
|
KeSetTimer( &NtfsData.VolumeCheckpointTimer,
|
|
*(PLARGE_INTEGER)&FiveSecondsFromNow,
|
|
&NtfsData.VolumeCheckpointDpc );
|
|
}
|
|
}
|
|
|
|
if (TransactionTableAcquired) {
|
|
NtfsReleaseRestartTable( &Vcb->TransactionTable );
|
|
}
|
|
|
|
if (AttributeTableAcquired) {
|
|
NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
|
|
}
|
|
|
|
if (MyHeader != (PNTFS_LOG_RECORD_HEADER)&LocalHeader) {
|
|
|
|
NtfsFreePool( MyHeader );
|
|
}
|
|
}
|
|
|
|
DebugTrace( -1, Dbg, ("NtfsWriteLog -> %016I64x\n", ReturnLsn ) );
|
|
|
|
return ReturnLsn;
|
|
}
|
|
|
|
|
|
VOID
|
|
NtfsCheckpointVolume (
|
|
IN PIRP_CONTEXT IrpContext,
|
|
IN PVCB Vcb,
|
|
IN BOOLEAN OwnsCheckpoint,
|
|
IN BOOLEAN CleanVolume,
|
|
IN BOOLEAN FlushVolume,
|
|
IN LSN LastKnownLsn
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is called periodically to perform a checkpoint on the volume
|
|
with respect to the log file. The checkpoint dumps a bunch of log file
|
|
state information to the log file, and finally writes a summary of the
|
|
dumped information in its Restart Area.
|
|
|
|
This checkpoint dumps the following:
|
|
|
|
Open Attribute Table
|
|
(all of the attribute names for the Attribute Table)
|
|
Dirty Pages Table
|
|
Transaction Table
|
|
|
|
Arguments:
|
|
|
|
Vcb - Pointer to the Vcb on which the checkpoint is to occur.
|
|
|
|
OwnsCheckpoint - TRUE if the caller has already taken steps to insure
|
|
that he may proceed with the checkpointing. In this case we
|
|
don't do any checks for other checkpoints and don't clear the
|
|
checkpoint flag or notify any waiting checkpoint threads.
|
|
|
|
CleanVolume - TRUE if the caller wishes to clean the volume before doing
|
|
the checkpoint, or FALSE for a normal periodic checkpoint.
|
|
|
|
FlushVolume - Applies only if CleanVolume is TRUE. This indicates if we should
|
|
should flush the volume or only Lsn streams.
|
|
|
|
LastKnownLsn - Applies only if CleanVolume is TRUE. Only perform the
|
|
clean checkpoint if this value is the same as the last restart area
|
|
in the Vcb. This will prevent us from doing unecesary clean
|
|
checkpoints.
|
|
|
|
Return Value:
|
|
|
|
None
|
|
|
|
--*/
|
|
|
|
{
|
|
RESTART_AREA RestartArea;
|
|
RESTART_POINTERS DirtyPages;
|
|
RESTART_POINTERS Pointers;
|
|
LSN BaseLsn;
|
|
PATTRIBUTE_NAME_ENTRY NamesBuffer = NULL;
|
|
PTRANSACTION_ENTRY TransactionEntry;
|
|
BOOLEAN DirtyPageTableInitialized = FALSE;
|
|
BOOLEAN OpenAttributeTableAcquired = FALSE;
|
|
BOOLEAN TransactionTableAcquired = FALSE;
|
|
LSN OldestDirtyPageLsn = Li0;
|
|
BOOLEAN AcquireFiles = FALSE;
|
|
BOOLEAN BitmapAcquired = FALSE;
|
|
BOOLEAN PostDefrag = FALSE;
|
|
KPRIORITY PreviousPriority;
|
|
BOOLEAN RestorePreviousPriority = FALSE;
|
|
|
|
PAGED_CODE();
|
|
|
|
DebugTrace( +1, Dbg, ("NtfsCheckpointVolume:\n") );
|
|
DebugTrace( 0, Dbg, ("Vcb = %08lx\n", Vcb) );
|
|
|
|
if (!OwnsCheckpoint) {
|
|
|
|
//
|
|
// Acquire the checkpoint event.
|
|
//
|
|
|
|
NtfsAcquireCheckpoint( IrpContext, Vcb );
|
|
|
|
//
|
|
// We will want to post a defrag if defragging is permitted and enabled
|
|
// and we have begun the defrag operation or have excess mapping.
|
|
// If the defrag hasn't been triggered then check the Mft free
|
|
// space. We can skip defragging if a defrag operation is
|
|
// currently active.
|
|
//
|
|
|
|
if (!CleanVolume
|
|
&& FlagOn( Vcb->MftDefragState, VCB_MFT_DEFRAG_PERMITTED )
|
|
&& FlagOn( Vcb->MftDefragState, VCB_MFT_DEFRAG_ENABLED )
|
|
&& !FlagOn( Vcb->MftDefragState, VCB_MFT_DEFRAG_ACTIVE )) {
|
|
|
|
if (FlagOn( Vcb->MftDefragState,
|
|
VCB_MFT_DEFRAG_TRIGGERED | VCB_MFT_DEFRAG_EXCESS_MAP )) {
|
|
|
|
PostDefrag = TRUE;
|
|
|
|
} else {
|
|
|
|
NtfsCheckForDefrag( Vcb );
|
|
|
|
if (FlagOn( Vcb->MftDefragState, VCB_MFT_DEFRAG_TRIGGERED )) {
|
|
|
|
PostDefrag = TRUE;
|
|
|
|
} else {
|
|
|
|
ClearFlag( Vcb->MftDefragState, VCB_MFT_DEFRAG_ENABLED );
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// If a checkpoint is already active, we either have to get out,
|
|
// or wait for it.
|
|
//
|
|
|
|
while (FlagOn( Vcb->CheckpointFlags, VCB_CHECKPOINT_IN_PROGRESS )) {
|
|
|
|
//
|
|
// Release the checkpoint event because we cannot checkpoint now.
|
|
//
|
|
|
|
NtfsReleaseCheckpoint( IrpContext, Vcb );
|
|
|
|
if (CleanVolume) {
|
|
|
|
NtfsWaitOnCheckpointNotify( IrpContext, Vcb );
|
|
|
|
NtfsAcquireCheckpoint( IrpContext, Vcb );
|
|
|
|
} else {
|
|
|
|
return;
|
|
}
|
|
}
|
|
|
|
//
|
|
// We now have the checkpoint event. Check if there is still
|
|
// a need to perform the checkpoint.
|
|
//
|
|
|
|
if (CleanVolume &&
|
|
LastKnownLsn.QuadPart != Vcb->LastRestartArea.QuadPart) {
|
|
|
|
NtfsReleaseCheckpoint( IrpContext, Vcb );
|
|
return;
|
|
}
|
|
|
|
SetFlag( Vcb->CheckpointFlags, VCB_CHECKPOINT_IN_PROGRESS );
|
|
NtfsResetCheckpointNotify( IrpContext, Vcb );
|
|
NtfsReleaseCheckpoint( IrpContext, Vcb );
|
|
|
|
//
|
|
// If this is a clean volume checkpoint then boost the priority of
|
|
// this thread.
|
|
//
|
|
|
|
if (CleanVolume) {
|
|
|
|
PreviousPriority = KeSetPriorityThread( &PsGetCurrentThread()->Tcb,
|
|
LOW_REALTIME_PRIORITY );
|
|
|
|
if (PreviousPriority != LOW_REALTIME_PRIORITY) {
|
|
|
|
RestorePreviousPriority = TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
RtlZeroMemory( &RestartArea, sizeof(RESTART_AREA) );
|
|
RtlZeroMemory( &DirtyPages, sizeof(RESTART_POINTERS) );
|
|
|
|
//
|
|
// Insure cleanup on the way out
|
|
//
|
|
|
|
try {
|
|
|
|
POPEN_ATTRIBUTE_ENTRY AttributeEntry;
|
|
ULONG NameBytes = 0;
|
|
|
|
//
|
|
// Now remember the current "last Lsn" value as the start of
|
|
// our checkpoint. We acquire the transaction table to capture
|
|
// this value to synchronize with threads who are writing log
|
|
// records and setting pages dirty as atomic actions.
|
|
//
|
|
|
|
NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
|
|
BaseLsn =
|
|
RestartArea.StartOfCheckpoint = LfsQueryLastLsn( Vcb->LogHandle );
|
|
NtfsReleaseRestartTable( &Vcb->TransactionTable );
|
|
|
|
|
|
ASSERT( (RestartArea.StartOfCheckpoint.QuadPart != 0) ||
|
|
FlagOn(Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_CLEAN) );
|
|
|
|
//
|
|
// If the last checkpoint was completely clean, and no one has
|
|
// written to the log since then, we can just return.
|
|
//
|
|
|
|
if (FlagOn( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_CLEAN )
|
|
|
|
&&
|
|
|
|
(RestartArea.StartOfCheckpoint.QuadPart == Vcb->EndOfLastCheckpoint.QuadPart)
|
|
|
|
&&
|
|
|
|
!CleanVolume) {
|
|
|
|
//
|
|
// Let's take this opportunity to shrink the Open Attribute and Transaction
|
|
// table back if they have gotten large.
|
|
//
|
|
|
|
//
|
|
// First the Open Attribute Table
|
|
//
|
|
|
|
NtfsAcquireExclusiveRestartTable( &Vcb->OpenAttributeTable, TRUE );
|
|
OpenAttributeTableAcquired = TRUE;
|
|
|
|
if (IsRestartTableEmpty(&Vcb->OpenAttributeTable)
|
|
|
|
&&
|
|
|
|
(Vcb->OpenAttributeTable.Table->NumberEntries >
|
|
HIGHWATER_ATTRIBUTE_COUNT)) {
|
|
|
|
//
|
|
// Initialize first in case we get an allocation failure.
|
|
//
|
|
|
|
InitializeNewTable( sizeof(OPEN_ATTRIBUTE_ENTRY),
|
|
INITIAL_NUMBER_ATTRIBUTES,
|
|
&Pointers );
|
|
|
|
NtfsFreePool( Vcb->OpenAttributeTable.Table );
|
|
Vcb->OpenAttributeTable.Table = Pointers.Table;
|
|
}
|
|
|
|
NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
|
|
OpenAttributeTableAcquired = FALSE;
|
|
|
|
//
|
|
// Now check the transaction table (freeing in the finally clause).
|
|
//
|
|
|
|
NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
|
|
TransactionTableAcquired = TRUE;
|
|
|
|
if (IsRestartTableEmpty(&Vcb->TransactionTable)
|
|
|
|
&&
|
|
|
|
(Vcb->TransactionTable.Table->NumberEntries >
|
|
HIGHWATER_TRANSACTION_COUNT)) {
|
|
|
|
//
|
|
// Initialize first in case we get an allocation failure.
|
|
//
|
|
|
|
InitializeNewTable( sizeof(TRANSACTION_ENTRY),
|
|
INITIAL_NUMBER_TRANSACTIONS,
|
|
&Pointers );
|
|
|
|
NtfsFreePool( Vcb->TransactionTable.Table );
|
|
Vcb->TransactionTable.Table = Pointers.Table;
|
|
}
|
|
|
|
try_return( NOTHING );
|
|
}
|
|
|
|
//
|
|
// Flush any dangling dirty pages from before the last restart.
|
|
// Note that it is arbitrary what Lsn we flush to here, and, in fact,
|
|
// it is not absolutely required that we flush anywhere at all - we
|
|
// could actually rely on the Lazy Writer. All we are trying to do
|
|
// is reduce the amount of work that we will have to do at Restart,
|
|
// by not forcing ourselves to have to go too far back in the log.
|
|
// Presumably this can only happen for some reason the system is
|
|
// starting to produce dirty pages faster than the lazy writer is
|
|
// writing them.
|
|
//
|
|
// (We may wish to play with taking this call out...)
|
|
//
|
|
// This may be an appropriate place to worry about this, but, then
|
|
// again, the Lazy Writer is using (currently) five threads. It may
|
|
// not be appropriate to hold up this one thread doing the checkpoint
|
|
// if the Lazy Writer is getting behind. How many dirty pages we
|
|
// can even have is limited by the size of memory, so if the log file
|
|
// is large enough, this may not be an issue. It seems kind of nice
|
|
// to just let the Lazy Writer keep writing dirty pages as he does
|
|
// now.
|
|
//
|
|
// if (!FlagOn(Vcb->VcbState, VCB_STATE_LAST_CHECKPOINT_CLEAN)) {
|
|
// CcFlushPagesToLsn( Vcb->LogHandle, &Vcb->LastRestartArea );
|
|
// }
|
|
//
|
|
|
|
//
|
|
// Now we must clean the volume here if that is what the caller wants.
|
|
//
|
|
|
|
if (CleanVolume) {
|
|
|
|
NtfsCleanCheckpoints += 1;
|
|
|
|
//
|
|
// Lock down the volume if this is a clean checkpoint.
|
|
//
|
|
|
|
NtfsAcquireAllFiles( IrpContext, Vcb, FlushVolume, FALSE );
|
|
|
|
#ifdef NTFSDBG
|
|
ASSERT( !FlagOn( IrpContext->Flags, IRP_CONTEXT_FLAG_CHECKPOINT_ACTIVE ));
|
|
DebugDoit( SetFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_CHECKPOINT_ACTIVE ));
|
|
#endif // NTFSDBG
|
|
|
|
|
|
AcquireFiles = TRUE;
|
|
|
|
//
|
|
// Now we will acquire the Open Attribute Table exclusive to delete
|
|
// all of the entries, since we want to write a clean checkpoint.
|
|
// This is OK, since we have the global resource and nothing else
|
|
// can be going on. (Similarly we are writing an empty transaction
|
|
// table, while in fact we will be the only transaction, but there
|
|
// is no need to capture our guy, nor explicitly empty this table.)
|
|
//
|
|
|
|
NtfsAcquireExclusiveRestartTable( &Vcb->OpenAttributeTable, TRUE );
|
|
OpenAttributeTableAcquired = TRUE;
|
|
|
|
//
|
|
// First reclaim the page we have reserved in the undo total, to
|
|
// guarantee that we can flush the log file.
|
|
//
|
|
|
|
LfsResetUndoTotal( Vcb->LogHandle, 1, -(LONG)(2 * PAGE_SIZE) );
|
|
|
|
if (FlushVolume) {
|
|
|
|
(VOID)NtfsFlushVolume( IrpContext, Vcb, TRUE, FALSE, FALSE, FALSE );
|
|
|
|
} else {
|
|
|
|
NtfsFlushLsnStreams( Vcb );
|
|
}
|
|
|
|
SetFlag( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_CLEAN );
|
|
|
|
//
|
|
// Loop through to deallocate all of the open attribute entries. Any
|
|
// that point to an Scb need to get the index in the Scb zeroed. If
|
|
// they do not point to an Scb, we have to see if there is a name to
|
|
// free.
|
|
//
|
|
|
|
AttributeEntry = NtfsGetFirstRestartTable( &Vcb->OpenAttributeTable );
|
|
|
|
while (AttributeEntry != NULL) {
|
|
|
|
ULONG Index;
|
|
|
|
if (AttributeEntry->Overlay.Scb != NULL) {
|
|
|
|
AttributeEntry->Overlay.Scb->NonpagedScb->OpenAttributeTableIndex = 0;
|
|
|
|
} else {
|
|
|
|
//
|
|
// Delete its name, if it has one. Check that we aren't
|
|
// using the hardcode $I30 name.
|
|
//
|
|
|
|
if ((AttributeEntry->AttributeName.Buffer != NULL) &&
|
|
(AttributeEntry->AttributeName.Buffer != NtfsFileNameIndexName)) {
|
|
|
|
NtfsFreePool( AttributeEntry->AttributeName.Buffer );
|
|
}
|
|
}
|
|
|
|
//
|
|
// Get the index for the entry.
|
|
//
|
|
|
|
Index = GetIndexFromRestartEntry( &Vcb->OpenAttributeTable,
|
|
AttributeEntry );
|
|
|
|
NtfsFreeRestartTableIndex( &Vcb->OpenAttributeTable,
|
|
Index );
|
|
|
|
AttributeEntry = NtfsGetNextRestartTable( &Vcb->OpenAttributeTable,
|
|
AttributeEntry );
|
|
}
|
|
|
|
//
|
|
// Initialize first in case we get an allocation failure.
|
|
//
|
|
|
|
ASSERT(IsRestartTableEmpty(&Vcb->OpenAttributeTable));
|
|
|
|
InitializeNewTable( sizeof(OPEN_ATTRIBUTE_ENTRY),
|
|
INITIAL_NUMBER_ATTRIBUTES,
|
|
&Pointers );
|
|
|
|
NtfsFreePool( Vcb->OpenAttributeTable.Table );
|
|
Vcb->OpenAttributeTable.Table = Pointers.Table;
|
|
|
|
//
|
|
// Initialize first in case we get an allocation failure.
|
|
// Make sure we commit the current transaction.
|
|
//
|
|
|
|
NtfsCommitCurrentTransaction( IrpContext );
|
|
|
|
ASSERT(IsRestartTableEmpty(&Vcb->TransactionTable));
|
|
|
|
InitializeNewTable( sizeof(TRANSACTION_ENTRY),
|
|
INITIAL_NUMBER_TRANSACTIONS,
|
|
&Pointers );
|
|
|
|
NtfsFreePool( Vcb->TransactionTable.Table );
|
|
Vcb->TransactionTable.Table = Pointers.Table;
|
|
|
|
//
|
|
// Make sure we do not process any log file before the restart
|
|
// area, because we did not dump the open attribute table.
|
|
//
|
|
|
|
RestartArea.StartOfCheckpoint = LfsQueryLastLsn( Vcb->LogHandle );
|
|
|
|
//
|
|
// Save some work if this is a clean checkpoint
|
|
//
|
|
|
|
} else {
|
|
|
|
PDIRTY_PAGE_ENTRY DirtyPage;
|
|
POPEN_ATTRIBUTE_ENTRY OpenEntry;
|
|
ULONG JustMe = 0;
|
|
|
|
//
|
|
// Now we construct the dirty page table by calling the Cache Manager.
|
|
// For each dirty page on files tagged with our log handle, he will
|
|
// call us back at our DirtyPageRoutine. We will allocate the initial
|
|
// Dirty Page Table, but we will let the call back routine grow it as
|
|
// necessary.
|
|
//
|
|
|
|
NtfsInitializeRestartTable( sizeof(DIRTY_PAGE_ENTRY) +
|
|
(Vcb->ClustersPerPage - 1) * sizeof(LCN),
|
|
32,
|
|
&DirtyPages );
|
|
|
|
NtfsAcquireExclusiveRestartTable( &DirtyPages, TRUE );
|
|
|
|
DirtyPageTableInitialized = TRUE;
|
|
|
|
//
|
|
// Now we will acquire the Open Attribute Table shared to freeze changes.
|
|
//
|
|
|
|
NtfsAcquireExclusiveRestartTable( &Vcb->OpenAttributeTable, TRUE );
|
|
OpenAttributeTableAcquired = TRUE;
|
|
|
|
//
|
|
// Loop to see how much we will have to allocate for attribute names.
|
|
//
|
|
|
|
AttributeEntry = NtfsGetFirstRestartTable( &Vcb->OpenAttributeTable );
|
|
|
|
while (AttributeEntry != NULL) {
|
|
|
|
//
|
|
// This checks for one type of aliasing.
|
|
//
|
|
|
|
// ASSERT( (AttributeEntry->Overlay.Scb == NULL) ||
|
|
// (AttributeEntry->Overlay.Scb->OpenAttributeTableIndex ==
|
|
// GetIndexFromRestartEntry( &Vcb->OpenAttributeTable,
|
|
// AttributeEntry )));
|
|
|
|
//
|
|
// Clear the DirtyPageSeen flag prior to collecting the dirty pages,
|
|
// to help us figure out which Open Attribute Entries we still need.
|
|
//
|
|
|
|
AttributeEntry->DirtyPagesSeen = FALSE;
|
|
|
|
if (AttributeEntry->AttributeName.Length != 0) {
|
|
|
|
//
|
|
// Add to our name total, the size of an Attribute Entry,
|
|
// which includes the size of the terminating UNICODE_NULL.
|
|
//
|
|
|
|
NameBytes += AttributeEntry->AttributeName.Length +
|
|
sizeof(ATTRIBUTE_NAME_ENTRY);
|
|
}
|
|
|
|
AttributeEntry = NtfsGetNextRestartTable( &Vcb->OpenAttributeTable,
|
|
AttributeEntry );
|
|
}
|
|
|
|
//
|
|
// Now call the Cache Manager to give us all of our dirty pages
|
|
// via the DirtyPageRoutine callback, and remember what the oldest
|
|
// Lsn is for a dirty page.
|
|
//
|
|
|
|
OldestDirtyPageLsn = CcGetDirtyPages( Vcb->LogHandle,
|
|
&DirtyPageRoutine,
|
|
(PVOID)IrpContext,
|
|
(PVOID)&DirtyPages );
|
|
|
|
if (OldestDirtyPageLsn.QuadPart != 0 &&
|
|
OldestDirtyPageLsn.QuadPart < Vcb->LastBaseLsn.QuadPart) {
|
|
|
|
OldestDirtyPageLsn = Vcb->LastBaseLsn;
|
|
}
|
|
|
|
//
|
|
// Now loop through the dirty page table to extract all of the Vcn/Lcn
|
|
// Mapping that we have, and insert it into the appropriate Scb.
|
|
//
|
|
|
|
DirtyPage = NtfsGetFirstRestartTable( &DirtyPages );
|
|
|
|
//
|
|
// The dirty page routine is called while holding spin locks,
|
|
// so it cannot take page faults. Thus we must scan the dirty
|
|
// page table we just built and fill in the Lcns here.
|
|
//
|
|
|
|
while (DirtyPage != NULL) {
|
|
|
|
PSCB Scb;
|
|
|
|
OpenEntry = GetRestartEntryFromIndex( &Vcb->OpenAttributeTable,
|
|
DirtyPage->TargetAttribute );
|
|
|
|
ASSERT(IsRestartTableEntryAllocated(OpenEntry));
|
|
|
|
ASSERT( DirtyPage->OldestLsn.QuadPart >= Vcb->LastBaseLsn.QuadPart );
|
|
|
|
Scb = OpenEntry->Overlay.Scb;
|
|
|
|
//
|
|
// If we have Lcn's then look them up.
|
|
//
|
|
|
|
if (DirtyPage->LcnsToFollow != 0) {
|
|
|
|
LookupLcns( IrpContext,
|
|
Scb,
|
|
DirtyPage->Vcn,
|
|
DirtyPage->LcnsToFollow,
|
|
FALSE,
|
|
&DirtyPage->LcnsForPage[0] );
|
|
|
|
//
|
|
// Other free this dirty page entry.
|
|
//
|
|
|
|
} else {
|
|
|
|
NtfsFreeRestartTableIndex( &DirtyPages,
|
|
GetIndexFromRestartEntry( &DirtyPages,
|
|
DirtyPage ));
|
|
}
|
|
|
|
//
|
|
// Point to next entry in table, or NULL.
|
|
//
|
|
|
|
DirtyPage = NtfsGetNextRestartTable( &DirtyPages,
|
|
DirtyPage );
|
|
}
|
|
|
|
//
|
|
// If there were any names, then allocate space for them and copy
|
|
// them out.
|
|
//
|
|
|
|
if (NameBytes != 0) {
|
|
|
|
PATTRIBUTE_NAME_ENTRY Name;
|
|
|
|
//
|
|
// Allocate the buffer, with space for two terminating 0's on
|
|
// the end.
|
|
//
|
|
|
|
NameBytes += 4;
|
|
Name =
|
|
NamesBuffer = NtfsAllocatePool( NonPagedPool, NameBytes );
|
|
|
|
//
|
|
// Now loop to copy the names.
|
|
//
|
|
|
|
AttributeEntry = NtfsGetFirstRestartTable( &Vcb->OpenAttributeTable );
|
|
|
|
while (AttributeEntry != NULL) {
|
|
|
|
//
|
|
// Free the Open Attribute Entry if there were no
|
|
// dirty pages and the Scb is gone. This is the only
|
|
// place they are deleted. (Yes, I know we allocated
|
|
// space for its name, but I didn't want to make three
|
|
// passes through the open attribute table. Permeter
|
|
// is running as we speak, and showing 407 open files
|
|
// on NT/IDW5.)
|
|
//
|
|
|
|
if (!AttributeEntry->DirtyPagesSeen
|
|
|
|
&&
|
|
|
|
(AttributeEntry->Overlay.Scb == NULL)) {
|
|
|
|
ULONG Index;
|
|
|
|
//
|
|
// Get the index for the entry.
|
|
//
|
|
|
|
Index = GetIndexFromRestartEntry( &Vcb->OpenAttributeTable,
|
|
AttributeEntry );
|
|
|
|
//
|
|
// Delete its name and free it up.
|
|
//
|
|
|
|
if ((AttributeEntry->AttributeName.Buffer != NULL) &&
|
|
(AttributeEntry->AttributeName.Buffer != NtfsFileNameIndexName)) {
|
|
|
|
NtfsFreePool( AttributeEntry->AttributeName.Buffer );
|
|
}
|
|
|
|
NtfsFreeRestartTableIndex( &Vcb->OpenAttributeTable,
|
|
Index );
|
|
|
|
//
|
|
// Otherwise, if we are not deleting it, we have to
|
|
// copy its name into the buffer we allocated.
|
|
//
|
|
|
|
} else if (AttributeEntry->AttributeName.Length != 0) {
|
|
|
|
//
|
|
// Prefix each name in the buffer with the attribute index
|
|
// and name length.
|
|
//
|
|
|
|
Name->Index = (USHORT)GetIndexFromRestartEntry( &Vcb->OpenAttributeTable,
|
|
AttributeEntry );
|
|
Name->NameLength = AttributeEntry->AttributeName.Length;
|
|
RtlMoveMemory( &Name->Name[0],
|
|
AttributeEntry->AttributeName.Buffer,
|
|
AttributeEntry->AttributeName.Length );
|
|
|
|
Name->Name[Name->NameLength/2] = 0;
|
|
|
|
Name = (PATTRIBUTE_NAME_ENTRY)((PCHAR)Name +
|
|
sizeof(ATTRIBUTE_NAME_ENTRY) +
|
|
Name->NameLength);
|
|
|
|
ASSERT( (PCHAR)Name <= ((PCHAR)NamesBuffer + NameBytes - 4) );
|
|
}
|
|
|
|
AttributeEntry = NtfsGetNextRestartTable( &Vcb->OpenAttributeTable,
|
|
AttributeEntry );
|
|
}
|
|
|
|
//
|
|
// Terminate the Names Buffer.
|
|
//
|
|
|
|
Name->Index = 0;
|
|
Name->NameLength = 0;
|
|
}
|
|
|
|
//
|
|
// Now write all of the non-empty tables to the log.
|
|
//
|
|
|
|
//
|
|
// Write the Open Attribute Table
|
|
//
|
|
|
|
if (!IsRestartTableEmpty(&Vcb->OpenAttributeTable)) {
|
|
RestartArea.OpenAttributeTableLsn =
|
|
NtfsWriteLog( IrpContext,
|
|
Vcb->MftScb,
|
|
NULL,
|
|
OpenAttributeTableDump,
|
|
Vcb->OpenAttributeTable.Table,
|
|
SizeOfRestartTable(&Vcb->OpenAttributeTable),
|
|
Noop,
|
|
NULL,
|
|
0,
|
|
(LONGLONG)0,
|
|
0,
|
|
0,
|
|
0 );
|
|
|
|
RestartArea.OpenAttributeTableLength =
|
|
SizeOfRestartTable(&Vcb->OpenAttributeTable);
|
|
JustMe = 1;
|
|
}
|
|
|
|
NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
|
|
OpenAttributeTableAcquired = FALSE;
|
|
|
|
//
|
|
// Write the Open Attribute Names
|
|
//
|
|
|
|
if (NameBytes != 0) {
|
|
RestartArea.AttributeNamesLsn =
|
|
NtfsWriteLog( IrpContext,
|
|
Vcb->MftScb,
|
|
NULL,
|
|
AttributeNamesDump,
|
|
NamesBuffer,
|
|
NameBytes,
|
|
Noop,
|
|
NULL,
|
|
0,
|
|
(LONGLONG)0,
|
|
0,
|
|
0,
|
|
0 );
|
|
|
|
RestartArea.AttributeNamesLength = NameBytes;
|
|
JustMe = 1;
|
|
}
|
|
|
|
//
|
|
// Write the Dirty Page Table
|
|
//
|
|
|
|
if (!IsRestartTableEmpty(&DirtyPages)) {
|
|
RestartArea.DirtyPageTableLsn =
|
|
NtfsWriteLog( IrpContext,
|
|
Vcb->MftScb,
|
|
NULL,
|
|
DirtyPageTableDump,
|
|
DirtyPages.Table,
|
|
SizeOfRestartTable(&DirtyPages),
|
|
Noop,
|
|
NULL,
|
|
0,
|
|
(LONGLONG)0,
|
|
0,
|
|
0,
|
|
0 );
|
|
|
|
RestartArea.DirtyPageTableLength = SizeOfRestartTable(&DirtyPages);
|
|
JustMe = 1;
|
|
}
|
|
|
|
//
|
|
// Write the Transaction Table if there is more than just us. We
|
|
// are a transaction if we wrote any log records above.
|
|
//
|
|
|
|
NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
|
|
TransactionTableAcquired = TRUE;
|
|
|
|
//
|
|
// Assumee will want to do at least one more checkpoint.
|
|
//
|
|
|
|
ClearFlag( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_CLEAN );
|
|
|
|
if ((ULONG)Vcb->TransactionTable.Table->NumberAllocated > JustMe) {
|
|
RestartArea.TransactionTableLsn =
|
|
NtfsWriteLog( IrpContext,
|
|
Vcb->MftScb,
|
|
NULL,
|
|
TransactionTableDump,
|
|
Vcb->TransactionTable.Table,
|
|
SizeOfRestartTable(&Vcb->TransactionTable),
|
|
Noop,
|
|
NULL,
|
|
0,
|
|
(LONGLONG)0,
|
|
0,
|
|
0,
|
|
0 );
|
|
|
|
RestartArea.TransactionTableLength =
|
|
SizeOfRestartTable(&Vcb->TransactionTable);
|
|
|
|
//
|
|
// Loop to see if the oldest Lsn comes from the transaction table.
|
|
//
|
|
|
|
TransactionEntry = NtfsGetFirstRestartTable( &Vcb->TransactionTable );
|
|
|
|
while (TransactionEntry != NULL) {
|
|
if ((TransactionEntry->FirstLsn.QuadPart != 0)
|
|
|
|
&&
|
|
|
|
(TransactionEntry->FirstLsn.QuadPart < BaseLsn.QuadPart)) {
|
|
|
|
BaseLsn = TransactionEntry->FirstLsn;
|
|
}
|
|
|
|
TransactionEntry = NtfsGetNextRestartTable( &Vcb->TransactionTable,
|
|
TransactionEntry );
|
|
}
|
|
|
|
//
|
|
// If the transaction table is otherwise empty, then this is a good
|
|
// time to reset our totals with Lfs, in case our counts get off a bit.
|
|
//
|
|
|
|
} else {
|
|
|
|
//
|
|
// If we are a transaction, then we have to add in our counts.
|
|
//
|
|
|
|
if (IrpContext->TransactionId != 0) {
|
|
|
|
TransactionEntry = (PTRANSACTION_ENTRY)GetRestartEntryFromIndex(
|
|
&Vcb->TransactionTable, IrpContext->TransactionId );
|
|
|
|
LfsResetUndoTotal( Vcb->LogHandle,
|
|
TransactionEntry->UndoRecords + 2,
|
|
TransactionEntry->UndoBytes +
|
|
QuadAlign(sizeof(RESTART_AREA)) + (2 * PAGE_SIZE) );
|
|
|
|
//
|
|
// Otherwise, we reset to our "idle" requirements.
|
|
//
|
|
|
|
} else {
|
|
LfsResetUndoTotal( Vcb->LogHandle,
|
|
2,
|
|
QuadAlign(sizeof(RESTART_AREA)) + (2 * PAGE_SIZE) );
|
|
}
|
|
|
|
//
|
|
// If the DirtyPage table is entry then mark this as a clean checkpoint.
|
|
//
|
|
|
|
if (IsRestartTableEmpty( &DirtyPages )) {
|
|
|
|
SetFlag( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_CLEAN );
|
|
}
|
|
}
|
|
|
|
NtfsReleaseRestartTable( &Vcb->TransactionTable );
|
|
TransactionTableAcquired = FALSE;
|
|
}
|
|
|
|
//
|
|
// So far BaseLsn holds the minimum of the start Lsn for the checkpoint,
|
|
// or any of the FirstLsn fields for active transactions. Now we see
|
|
// if the oldest Lsn we need in the log should actually come from the
|
|
// oldest page in the dirty page table.
|
|
//
|
|
|
|
if ((OldestDirtyPageLsn.QuadPart != 0)
|
|
|
|
&&
|
|
|
|
(OldestDirtyPageLsn.QuadPart < BaseLsn.QuadPart)) {
|
|
|
|
BaseLsn = OldestDirtyPageLsn;
|
|
}
|
|
|
|
//
|
|
// Finally, write our Restart Area to describe all of the above, and
|
|
// give Lfs our new BaseLsn.
|
|
//
|
|
|
|
Vcb->LastBaseLsn = Vcb->LastRestartArea = BaseLsn;
|
|
LfsWriteRestartArea( Vcb->LogHandle,
|
|
sizeof(RESTART_AREA),
|
|
&RestartArea,
|
|
&Vcb->LastRestartArea );
|
|
|
|
//
|
|
// If this is a clean checkpoint then initialize our reserved area.
|
|
//
|
|
|
|
if (CleanVolume) {
|
|
|
|
LfsResetUndoTotal( Vcb->LogHandle, 2, QuadAlign(sizeof(RESTART_AREA)) + (2 * PAGE_SIZE) );
|
|
}
|
|
|
|
//
|
|
// Now remember where the log file is at now, so we know when to
|
|
// go idle above.
|
|
//
|
|
|
|
Vcb->EndOfLastCheckpoint = LfsQueryLastLsn( Vcb->LogHandle );
|
|
|
|
//
|
|
// Now we want to check if we can release any of the clusters in the
|
|
// deallocated cluster arrays. We know we can look at the
|
|
// fields in the PriorDeallocatedClusters structure because they
|
|
// are never modified in the running system.
|
|
//
|
|
// We compare the Lsn in the Prior structure to see if it is older
|
|
// than the new BaseLsn value. If so we will acquire the volume
|
|
// bitmap in order to swap the structures.
|
|
//
|
|
|
|
if ((Vcb->ActiveDeallocatedClusters != NULL) &&
|
|
|
|
((Vcb->PriorDeallocatedClusters->ClusterCount != 0) ||
|
|
(Vcb->ActiveDeallocatedClusters->ClusterCount != 0))) {
|
|
|
|
if (BaseLsn.QuadPart > Vcb->PriorDeallocatedClusters->Lsn.QuadPart) {
|
|
|
|
NtfsAcquireExclusiveScb( IrpContext, Vcb->BitmapScb );
|
|
BitmapAcquired = TRUE;
|
|
|
|
//
|
|
// If the Prior Mcb is not empty then empty it.
|
|
//
|
|
|
|
if (Vcb->PriorDeallocatedClusters->ClusterCount != 0) {
|
|
|
|
//
|
|
// Decrement the count of deallocated clusters by the amount stored here.
|
|
//
|
|
|
|
Vcb->DeallocatedClusters =
|
|
Vcb->DeallocatedClusters - Vcb->PriorDeallocatedClusters->ClusterCount;
|
|
|
|
//
|
|
// Remove all of the mappings in the Mcb.
|
|
//
|
|
|
|
FsRtlTruncateLargeMcb( &Vcb->PriorDeallocatedClusters->Mcb, (LONGLONG)0 );
|
|
|
|
//
|
|
// Remember that there are no deallocated structures left in
|
|
// the Mcb.
|
|
//
|
|
|
|
Vcb->PriorDeallocatedClusters->ClusterCount = 0;
|
|
}
|
|
|
|
//
|
|
// If this is a clean checkpoint then free the active deallocated
|
|
// clusters. Otherwise do at least one more checkpoint.
|
|
//
|
|
|
|
if (Vcb->ActiveDeallocatedClusters->ClusterCount != 0) {
|
|
|
|
if (CleanVolume) {
|
|
|
|
//
|
|
// Decrement the count of deallocated clusters by the amount stored here.
|
|
//
|
|
|
|
Vcb->DeallocatedClusters =
|
|
Vcb->DeallocatedClusters - Vcb->ActiveDeallocatedClusters->ClusterCount;
|
|
|
|
//
|
|
// Remove all of the mappings in the Mcb.
|
|
//
|
|
|
|
FsRtlTruncateLargeMcb( &Vcb->ActiveDeallocatedClusters->Mcb, (LONGLONG)0 );
|
|
|
|
//
|
|
// Remember that there are no deallocated structures left in
|
|
// the Mcb.
|
|
//
|
|
|
|
Vcb->ActiveDeallocatedClusters->ClusterCount = 0;
|
|
|
|
//
|
|
// We know the count has gone to zero.
|
|
//
|
|
|
|
ASSERT( Vcb->DeallocatedClusters == 0 );
|
|
|
|
} else {
|
|
|
|
PDEALLOCATED_CLUSTERS Temp;
|
|
|
|
Temp = Vcb->PriorDeallocatedClusters;
|
|
|
|
Vcb->PriorDeallocatedClusters = Vcb->ActiveDeallocatedClusters;
|
|
Vcb->ActiveDeallocatedClusters = Temp;
|
|
|
|
//
|
|
// Remember the last Lsn for the prior Mcb.
|
|
//
|
|
|
|
Vcb->PriorDeallocatedClusters->Lsn = LfsQueryLastLsn( Vcb->LogHandle );
|
|
|
|
//
|
|
// Always do at least one more checkpoint.
|
|
//
|
|
|
|
ClearFlag( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_CLEAN );
|
|
}
|
|
}
|
|
|
|
} else {
|
|
|
|
ClearFlag( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_CLEAN );
|
|
}
|
|
}
|
|
|
|
try_exit: NOTHING;
|
|
} finally {
|
|
|
|
DebugUnwind( NtfsCheckpointVolume );
|
|
|
|
if (BitmapAcquired) {
|
|
|
|
NtfsReleaseScb( IrpContext, Vcb->BitmapScb );
|
|
}
|
|
|
|
//
|
|
// If the Dirty Page Table got initialized, free it up.
|
|
//
|
|
|
|
if (DirtyPageTableInitialized) {
|
|
NtfsFreeRestartTable( &DirtyPages );
|
|
}
|
|
|
|
//
|
|
// Release any resources
|
|
//
|
|
|
|
if (OpenAttributeTableAcquired) {
|
|
NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
|
|
}
|
|
|
|
if (TransactionTableAcquired) {
|
|
NtfsReleaseRestartTable( &Vcb->TransactionTable );
|
|
}
|
|
|
|
//
|
|
// Release any names buffer.
|
|
//
|
|
|
|
if (NamesBuffer != NULL) {
|
|
NtfsFreePool( NamesBuffer );
|
|
}
|
|
|
|
//
|
|
// If this checkpoint created a transaction, free the index now.
|
|
//
|
|
|
|
if (IrpContext->TransactionId != 0) {
|
|
|
|
NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable,
|
|
TRUE );
|
|
|
|
NtfsFreeRestartTableIndex( &Vcb->TransactionTable,
|
|
IrpContext->TransactionId );
|
|
|
|
NtfsReleaseRestartTable( &Vcb->TransactionTable );
|
|
|
|
IrpContext->TransactionId = 0;
|
|
}
|
|
|
|
if (AcquireFiles) {
|
|
|
|
#ifdef NTFSDBG
|
|
ASSERT( FlagOn( IrpContext->Flags, IRP_CONTEXT_FLAG_CHECKPOINT_ACTIVE ));
|
|
DebugDoit( ClearFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_CHECKPOINT_ACTIVE ));
|
|
#endif // NTFSDBG
|
|
|
|
NtfsReleaseAllFiles( IrpContext, Vcb, FALSE );
|
|
}
|
|
|
|
//
|
|
// If we didn't own the checkpoint operation then indicate
|
|
// that someone else is free to checkpoint.
|
|
//
|
|
|
|
if (!OwnsCheckpoint) {
|
|
|
|
NtfsAcquireCheckpoint( IrpContext, Vcb );
|
|
ClearFlag( Vcb->CheckpointFlags,
|
|
VCB_CHECKPOINT_IN_PROGRESS | VCB_DUMMY_CHECKPOINT_POSTED);
|
|
|
|
NtfsSetCheckpointNotify( IrpContext, Vcb );
|
|
NtfsReleaseCheckpoint( IrpContext, Vcb );
|
|
}
|
|
|
|
if (RestorePreviousPriority) {
|
|
|
|
KeSetPriorityThread( &PsGetCurrentThread()->Tcb,
|
|
PreviousPriority );
|
|
}
|
|
}
|
|
|
|
//
|
|
// If we need to post a defrag request then do so now.
|
|
//
|
|
|
|
if (PostDefrag) {
|
|
|
|
PDEFRAG_MFT DefragMft;
|
|
|
|
//
|
|
// Use a try-except to ignore allocation errors.
|
|
//
|
|
|
|
try {
|
|
|
|
NtfsAcquireCheckpoint( IrpContext, Vcb );
|
|
|
|
if (!FlagOn( Vcb->MftDefragState, VCB_MFT_DEFRAG_ACTIVE )) {
|
|
|
|
SetFlag( Vcb->MftDefragState, VCB_MFT_DEFRAG_ACTIVE );
|
|
NtfsReleaseCheckpoint( IrpContext, Vcb );
|
|
|
|
DefragMft = NtfsAllocatePool( NonPagedPool, sizeof( DEFRAG_MFT ));
|
|
|
|
DefragMft->Vcb = Vcb;
|
|
DefragMft->DeallocateWorkItem = TRUE;
|
|
|
|
//
|
|
// Send it off.....
|
|
//
|
|
|
|
ExInitializeWorkItem( &DefragMft->WorkQueueItem,
|
|
(PWORKER_THREAD_ROUTINE)NtfsDefragMft,
|
|
(PVOID)DefragMft );
|
|
|
|
ExQueueWorkItem( &DefragMft->WorkQueueItem, CriticalWorkQueue );
|
|
|
|
} else {
|
|
|
|
NtfsReleaseCheckpoint( IrpContext, Vcb );
|
|
}
|
|
|
|
} except( FsRtlIsNtstatusExpected( GetExceptionCode() )
|
|
? EXCEPTION_EXECUTE_HANDLER
|
|
: EXCEPTION_CONTINUE_SEARCH ) {
|
|
|
|
NtfsAcquireCheckpoint( IrpContext, Vcb );
|
|
ClearFlag( Vcb->MftDefragState, VCB_MFT_DEFRAG_ACTIVE );
|
|
NtfsReleaseCheckpoint( IrpContext, Vcb );
|
|
}
|
|
}
|
|
|
|
DebugTrace( -1, Dbg, ("NtfsCheckpointVolume -> VOID\n") );
|
|
}
|
|
|
|
|
|
VOID
|
|
NtfsCheckpointForLogFileFull (
|
|
IN PIRP_CONTEXT IrpContext
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is called to perform the clean checkpoint generated after
|
|
a log file full. This routine will call the clean checkpoint routine
|
|
and then release all of the resources acquired.
|
|
|
|
Arguments:
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
|
|
{
|
|
PAGED_CODE();
|
|
|
|
IrpContext->ExceptionStatus = 0;
|
|
|
|
//
|
|
// Call the checkpoint routine to do the actual work.
|
|
//
|
|
|
|
NtfsCheckpointVolume( IrpContext,
|
|
IrpContext->Vcb,
|
|
FALSE,
|
|
TRUE,
|
|
FALSE,
|
|
IrpContext->LastRestartArea );
|
|
|
|
ASSERT( IrpContext->TransactionId == 0 );
|
|
|
|
while (!IsListEmpty(&IrpContext->ExclusiveFcbList)) {
|
|
|
|
NtfsReleaseFcb( IrpContext,
|
|
(PFCB)CONTAINING_RECORD(IrpContext->ExclusiveFcbList.Flink,
|
|
FCB,
|
|
ExclusiveFcbLinks ));
|
|
}
|
|
|
|
//
|
|
// Go through and free any Scb's in the queue of shared Scb's for transactions.
|
|
//
|
|
|
|
if (IrpContext->SharedScb != NULL) {
|
|
|
|
NtfsReleaseSharedResources( IrpContext );
|
|
}
|
|
|
|
IrpContext->LastRestartArea = Li0;
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
VOID
|
|
NtfsCommitCurrentTransaction (
|
|
IN PIRP_CONTEXT IrpContext
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine commits the current transaction by writing a final record
|
|
to the log and deallocating the transaction Id.
|
|
|
|
Arguments:
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
|
|
{
|
|
PTRANSACTION_ENTRY TransactionEntry;
|
|
PVCB Vcb = IrpContext->Vcb;
|
|
|
|
PAGED_CODE();
|
|
|
|
//
|
|
// If this request created a transaction, complete it now.
|
|
//
|
|
|
|
if (IrpContext->TransactionId != 0) {
|
|
|
|
LSN CommitLsn;
|
|
|
|
//
|
|
// It is possible to get a LOG_FILE_FULL before writing
|
|
// out the first log record of a transaction. In that
|
|
// case there is a transaction Id but we haven't reserved
|
|
// space in the log file. It is wrong to write the
|
|
// commit record in this case because we can get an
|
|
// unexpected LOG_FILE_FULL. We can also test the UndoRecords
|
|
// count in the transaction entry but don't want to acquire
|
|
// the restart table to make this check.
|
|
//
|
|
|
|
if (FlagOn( IrpContext->Flags, IRP_CONTEXT_FLAG_WROTE_LOG )) {
|
|
|
|
//
|
|
// Write the log record to "forget" this transaction,
|
|
// because it should not be aborted. Until if/when we
|
|
// do real TP, commit and forget are atomic.
|
|
//
|
|
|
|
CommitLsn =
|
|
NtfsWriteLog( IrpContext,
|
|
Vcb->MftScb,
|
|
NULL,
|
|
ForgetTransaction,
|
|
NULL,
|
|
0,
|
|
CompensationLogRecord,
|
|
(PVOID)&Li0,
|
|
sizeof(LSN),
|
|
(LONGLONG)0,
|
|
0,
|
|
0,
|
|
0 );
|
|
}
|
|
|
|
//
|
|
// We can now free the transaction table index, because we are
|
|
// done with it now.
|
|
//
|
|
|
|
NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable,
|
|
TRUE );
|
|
|
|
TransactionEntry = (PTRANSACTION_ENTRY)GetRestartEntryFromIndex(
|
|
&Vcb->TransactionTable,
|
|
IrpContext->TransactionId );
|
|
|
|
//
|
|
// Call Lfs to free our undo space.
|
|
//
|
|
|
|
if ((TransactionEntry->UndoRecords != 0) &&
|
|
(!FlagOn( Vcb->VcbState, VCB_STATE_RESTART_IN_PROGRESS ))) {
|
|
|
|
LfsResetUndoTotal( Vcb->LogHandle,
|
|
TransactionEntry->UndoRecords,
|
|
-TransactionEntry->UndoBytes );
|
|
}
|
|
|
|
NtfsFreeRestartTableIndex( &Vcb->TransactionTable,
|
|
IrpContext->TransactionId );
|
|
|
|
IrpContext->TransactionId = 0;
|
|
|
|
NtfsReleaseRestartTable( &Vcb->TransactionTable );
|
|
|
|
//
|
|
// One way we win by being recoverable, is that we do not really
|
|
// have to do write-through - flushing the updates to the log
|
|
// is enough. We don't make this call if we are in the abort
|
|
// transaction path. Otherwise we could get a log file full
|
|
// while aborting.
|
|
//
|
|
|
|
if (FlagOn( IrpContext->TopLevelIrpContext->Flags, IRP_CONTEXT_FLAG_WRITE_THROUGH ) &&
|
|
(IrpContext == IrpContext->TopLevelIrpContext) &&
|
|
(IrpContext->TopLevelIrpContext->ExceptionStatus == STATUS_SUCCESS)) {
|
|
|
|
NtfsUpdateScbSnapshots( IrpContext );
|
|
LfsFlushToLsn( Vcb->LogHandle, CommitLsn );
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
VOID
|
|
NtfsCheckpointCurrentTransaction (
|
|
IN PIRP_CONTEXT IrpContext
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine checkpoints the current transaction by commiting it
|
|
to the log and deallocating the transaction Id. The current request
|
|
cann keep running, but changes to date are committed and will not be
|
|
backed out.
|
|
|
|
Arguments:
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
|
|
{
|
|
PAGED_CODE();
|
|
|
|
NtfsCommitCurrentTransaction( IrpContext );
|
|
|
|
NtfsUpdateScbSnapshots( IrpContext );
|
|
|
|
//
|
|
// Cleanup any recently deallocated record information for this transaction.
|
|
//
|
|
|
|
NtfsDeallocateRecordsComplete( IrpContext );
|
|
IrpContext->DeallocatedClusters = 0;
|
|
IrpContext->FreeClusterChange = 0;
|
|
}
|
|
|
|
|
|
VOID
|
|
NtfsInitializeLogging (
|
|
)
|
|
|
|
/*
|
|
|
|
Routine Description:
|
|
|
|
This routine is to be called once during startup of Ntfs (not once
|
|
per volume), to initialize the logging support.
|
|
|
|
Parameters:
|
|
|
|
None
|
|
|
|
Return Value:
|
|
|
|
None
|
|
|
|
--*/
|
|
|
|
{
|
|
PAGED_CODE();
|
|
|
|
DebugTrace( +1, Dbg, ("NtfsInitializeLogging:\n") );
|
|
LfsInitializeLogFileService();
|
|
DebugTrace( -1, Dbg, ("NtfsInitializeLogging -> VOID\n") );
|
|
}
|
|
|
|
|
|
VOID
|
|
NtfsStartLogFile (
|
|
IN PSCB LogFileScb,
|
|
IN PVCB Vcb
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine opens the log file for a volume by calling Lfs. The returned
|
|
LogHandle is stored in the Vcb. If the log file has not been initialized,
|
|
Lfs detects this and initializes it automatically.
|
|
|
|
Arguments:
|
|
|
|
LogFileScb - The Scb for the log file
|
|
|
|
Vcb - Pointer to the Vcb for this volume
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
|
|
{
|
|
UNICODE_STRING UnicodeName;
|
|
LFS_INFO LfsInfo;
|
|
|
|
PAGED_CODE();
|
|
|
|
DebugTrace( +1, Dbg, ("NtfsStartLogFile:\n") );
|
|
|
|
RtlInitUnicodeString( &UnicodeName, L"NTFS" );
|
|
|
|
LfsInfo = LfsPackLog;
|
|
|
|
//
|
|
// Slam the allocation size into file size and valid data in case there
|
|
// is some error.
|
|
//
|
|
|
|
LogFileScb->Header.FileSize = LogFileScb->Header.AllocationSize;
|
|
LogFileScb->Header.ValidDataLength = LogFileScb->Header.AllocationSize;
|
|
|
|
Vcb->LogHeaderReservation = LfsOpenLogFile( LogFileScb->FileObject,
|
|
UnicodeName,
|
|
1,
|
|
0,
|
|
LogFileScb->Header.AllocationSize.QuadPart,
|
|
&LfsInfo,
|
|
&Vcb->LogHandle );
|
|
|
|
SetFlag( Vcb->VcbState, VCB_STATE_VALID_LOG_HANDLE );
|
|
DebugTrace( -1, Dbg, ("NtfsStartLogFile -> VOID\n") );
|
|
}
|
|
|
|
|
|
VOID
|
|
NtfsStopLogFile (
|
|
IN PVCB Vcb
|
|
)
|
|
|
|
/*
|
|
|
|
Routine Description:
|
|
|
|
This routine should be called during volume dismount to close the volume's
|
|
log file with the log file service.
|
|
|
|
Arguments:
|
|
|
|
Vcb - Pointer to the Vcb for the volume
|
|
|
|
Return Value:
|
|
|
|
None
|
|
|
|
--*/
|
|
|
|
{
|
|
LFS_LOG_HANDLE LogHandle = Vcb->LogHandle;
|
|
|
|
PAGED_CODE();
|
|
|
|
DebugTrace( +1, Dbg, ("NtfsStopLogFile:\n") );
|
|
|
|
if (FlagOn( Vcb->VcbState, VCB_STATE_VALID_LOG_HANDLE )) {
|
|
|
|
ASSERT( LogHandle != NULL );
|
|
LfsFlushToLsn( LogHandle, LfsQueryLastLsn(LogHandle) );
|
|
|
|
ClearFlag( Vcb->VcbState, VCB_STATE_VALID_LOG_HANDLE );
|
|
LfsCloseLogFile( LogHandle );
|
|
}
|
|
|
|
DebugTrace( -1, Dbg, ("NtfsStopLogFile -> VOID\n") );
|
|
}
|
|
|
|
|
|
VOID
|
|
NtfsInitializeRestartTable (
|
|
IN ULONG EntrySize,
|
|
IN ULONG NumberEntries,
|
|
OUT PRESTART_POINTERS TablePointer
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is called to allocate and initialize a new Restart Table,
|
|
and return a pointer to it.
|
|
|
|
Arguments:
|
|
|
|
EntrySize - Size of the table entries, in bytes.
|
|
|
|
NumberEntries - Number of entries to allocate for the table.
|
|
|
|
TablePointer - Returns a pointer to the table.
|
|
|
|
Return Value:
|
|
|
|
None
|
|
|
|
--*/
|
|
|
|
{
|
|
PAGED_CODE();
|
|
|
|
try {
|
|
|
|
RtlZeroMemory( TablePointer, sizeof(RESTART_POINTERS) );
|
|
|
|
//
|
|
// Call common routine to allocate the actual table.
|
|
//
|
|
|
|
InitializeNewTable( EntrySize, NumberEntries, TablePointer );
|
|
|
|
//
|
|
// Initialiaze the resource and spin lock.
|
|
//
|
|
|
|
KeInitializeSpinLock( &TablePointer->SpinLock );
|
|
ExInitializeResource( &TablePointer->Resource );
|
|
TablePointer->ResourceInitialized = TRUE;
|
|
|
|
} finally {
|
|
|
|
DebugUnwind( NtfsInitializeRestartTable );
|
|
|
|
//
|
|
// On error, clean up any partial work that was done.
|
|
//
|
|
|
|
if (AbnormalTermination()) {
|
|
|
|
NtfsFreeRestartTable( TablePointer );
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
VOID
|
|
NtfsFreeRestartTable (
|
|
IN PRESTART_POINTERS TablePointer
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine frees a previously allocated Restart Table.
|
|
|
|
Arguments:
|
|
|
|
TablePointer - Pointer to the Restart Table to delete.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
|
|
{
|
|
PAGED_CODE();
|
|
|
|
if (TablePointer->Table != NULL) {
|
|
NtfsFreePool( TablePointer->Table );
|
|
TablePointer->Table = NULL;
|
|
}
|
|
|
|
if (TablePointer->ResourceInitialized) {
|
|
ExDeleteResource( &TablePointer->Resource );
|
|
TablePointer->ResourceInitialized = FALSE;
|
|
}
|
|
}
|
|
|
|
|
|
VOID
|
|
NtfsExtendRestartTable (
|
|
IN PRESTART_POINTERS TablePointer,
|
|
IN ULONG NumberNewEntries,
|
|
IN ULONG FreeGoal
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine extends a previously allocated Restart Table, by
|
|
creating and initializing a new one, and copying over the the
|
|
table entries from the old one. The old table is then deallocated.
|
|
On return, the table pointer points to the new Restart Table.
|
|
|
|
Arguments:
|
|
|
|
TablePointer - Address of the pointer to the previously created table.
|
|
|
|
NumberNewEntries - The number of addtional entries to be allocated
|
|
in the new table.
|
|
|
|
FreeGoal - A hint as to what point the caller would like to truncate
|
|
the table back to, when sufficient entries are deleted.
|
|
If truncation is not desired, then MAXULONG may be specified.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
|
|
{
|
|
PRESTART_TABLE NewTable, OldTable;
|
|
ULONG OldSize;
|
|
|
|
OldSize = SizeOfRestartTable(TablePointer);
|
|
|
|
//
|
|
// Get pointer to old table.
|
|
//
|
|
|
|
OldTable = TablePointer->Table;
|
|
ASSERT_RESTART_TABLE(OldTable);
|
|
|
|
//
|
|
// Start by initializing a table for the new size.
|
|
//
|
|
|
|
InitializeNewTable( OldTable->EntrySize,
|
|
OldTable->NumberEntries + NumberNewEntries,
|
|
TablePointer );
|
|
|
|
//
|
|
// Copy body of old table in place to new table.
|
|
//
|
|
|
|
NewTable = TablePointer->Table;
|
|
RtlMoveMemory( (NewTable + 1),
|
|
(OldTable + 1),
|
|
OldTable->EntrySize * OldTable->NumberEntries );
|
|
|
|
//
|
|
// Fix up new table's header, and fix up free list.
|
|
//
|
|
|
|
NewTable->FreeGoal = MAXULONG;
|
|
if (FreeGoal != MAXULONG) {
|
|
NewTable->FreeGoal = sizeof(RESTART_TABLE) + FreeGoal * NewTable->EntrySize;
|
|
}
|
|
|
|
if (OldTable->FirstFree != 0) {
|
|
|
|
NewTable->FirstFree = OldTable->FirstFree;
|
|
*(PULONG)GetRestartEntryFromIndex( TablePointer, OldTable->LastFree ) =
|
|
OldSize;;
|
|
} else {
|
|
|
|
NewTable->FirstFree = OldSize;
|
|
}
|
|
|
|
//
|
|
// Copy number allocated
|
|
//
|
|
|
|
NewTable->NumberAllocated = OldTable->NumberAllocated;
|
|
|
|
//
|
|
// Free the old table and return the new one.
|
|
//
|
|
|
|
NtfsFreePool( OldTable );
|
|
|
|
ASSERT_RESTART_TABLE(NewTable);
|
|
}
|
|
|
|
|
|
ULONG
|
|
NtfsAllocateRestartTableIndex (
|
|
IN PRESTART_POINTERS TablePointer
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine allocates an index from within a previously initialized
|
|
Restart Table. If the table is empty, it is extended.
|
|
|
|
Note that the table must already be acquired either shared or exclusive,
|
|
and if it must be extended, then the table is released and will be
|
|
acquired exclusive on return.
|
|
|
|
Arguments:
|
|
|
|
TablePointer - Pointer to the Restart Table in which an index is to
|
|
be allocated.
|
|
|
|
Return Value:
|
|
|
|
The allocated index.
|
|
|
|
--*/
|
|
|
|
{
|
|
PRESTART_TABLE Table;
|
|
ULONG EntryIndex;
|
|
KIRQL OldIrql;
|
|
PULONG Entry;
|
|
|
|
DebugTrace( +1, Dbg, ("NtfsAllocateRestartTableIndex:\n") );
|
|
DebugTrace( 0, Dbg, ("TablePointer = %08lx\n", TablePointer) );
|
|
|
|
Table = TablePointer->Table;
|
|
ASSERT_RESTART_TABLE(Table);
|
|
|
|
//
|
|
// Acquire the spin lock to synchronize the allocation.
|
|
//
|
|
|
|
KeAcquireSpinLock( &TablePointer->SpinLock, &OldIrql );
|
|
|
|
//
|
|
// If the table is empty, then we have to extend it.
|
|
//
|
|
|
|
if (Table->FirstFree == 0) {
|
|
|
|
//
|
|
// First release the spin lock and the table resource, and get
|
|
// the resource exclusive.
|
|
//
|
|
|
|
KeReleaseSpinLock( &TablePointer->SpinLock, OldIrql );
|
|
NtfsReleaseRestartTable( TablePointer );
|
|
NtfsAcquireExclusiveRestartTable( TablePointer, TRUE );
|
|
|
|
//
|
|
// Now extend the table. Note that if this routine raises, we have
|
|
// nothing to release.
|
|
//
|
|
|
|
NtfsExtendRestartTable( TablePointer, 16, MAXULONG );
|
|
|
|
//
|
|
// And re-get our pointer to the restart table
|
|
//
|
|
|
|
Table = TablePointer->Table;
|
|
|
|
//
|
|
// Now get the spin lock again and proceed.
|
|
//
|
|
|
|
KeAcquireSpinLock( &TablePointer->SpinLock, &OldIrql );
|
|
}
|
|
|
|
//
|
|
// Get First Free to return it.
|
|
//
|
|
|
|
EntryIndex = Table->FirstFree;
|
|
|
|
ASSERT( EntryIndex != 0 );
|
|
|
|
//
|
|
// Dequeue this entry and zero it.
|
|
//
|
|
|
|
Entry = (PULONG)GetRestartEntryFromIndex( TablePointer, EntryIndex );
|
|
|
|
Table->FirstFree = *Entry;
|
|
ASSERT( Table->FirstFree != RESTART_ENTRY_ALLOCATED );
|
|
|
|
RtlZeroMemory( Entry, Table->EntrySize );
|
|
|
|
//
|
|
// Show that it's allocated.
|
|
//
|
|
|
|
*Entry = RESTART_ENTRY_ALLOCATED;
|
|
|
|
//
|
|
// If list is going empty, then we fix the LastFree as well.
|
|
//
|
|
|
|
if (Table->FirstFree == 0) {
|
|
|
|
Table->LastFree = 0;
|
|
}
|
|
|
|
Table->NumberAllocated += 1;
|
|
|
|
//
|
|
// Now just release the spin lock before returning.
|
|
//
|
|
|
|
KeReleaseSpinLock( &TablePointer->SpinLock, OldIrql );
|
|
|
|
DebugTrace( -1, Dbg, ("NtfsAllocateRestartTableIndex -> %08lx\n", EntryIndex) );
|
|
|
|
return EntryIndex;
|
|
}
|
|
|
|
|
|
PVOID
|
|
NtfsAllocateRestartTableFromIndex (
|
|
IN PRESTART_POINTERS TablePointer,
|
|
IN ULONG Index
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine allocates a specific index from within a previously
|
|
initialized Restart Table. If the index does not exist within the
|
|
existing table, the table is extended.
|
|
|
|
Note that the table must already be acquired either shared or exclusive,
|
|
and if it must be extended, then the table is released and will be
|
|
acquired exclusive on return.
|
|
|
|
Arguments:
|
|
|
|
TablePointer - Pointer to the Restart Table in which an index is to
|
|
be allocated.
|
|
|
|
Index - The index to be allocated.
|
|
|
|
Return Value:
|
|
|
|
The table entry allocated.
|
|
|
|
--*/
|
|
|
|
{
|
|
PULONG Entry;
|
|
PULONG LastEntry;
|
|
|
|
PRESTART_TABLE Table;
|
|
KIRQL OldIrql;
|
|
|
|
ULONG ThisIndex;
|
|
ULONG LastIndex;
|
|
|
|
DebugTrace( +1, Dbg, ("NtfsAllocateRestartTableFromIndex\n") );
|
|
DebugTrace( 0, Dbg, ("TablePointer = %08lx\n", TablePointer) );
|
|
DebugTrace( 0, Dbg, ("Index = %08lx\n", Index) );
|
|
|
|
Table = TablePointer->Table;
|
|
ASSERT_RESTART_TABLE(Table);
|
|
|
|
//
|
|
// Acquire the spin lock to synchronize the allocation.
|
|
//
|
|
|
|
KeAcquireSpinLock( &TablePointer->SpinLock, &OldIrql );
|
|
|
|
//
|
|
// If the entry is not in the table, we will have to extend the table.
|
|
//
|
|
|
|
if (!IsRestartIndexWithinTable( TablePointer, Index )) {
|
|
|
|
ULONG TableSize;
|
|
ULONG BytesToIndex;
|
|
ULONG AddEntries;
|
|
|
|
//
|
|
// We extend the size by computing the number of entries
|
|
// between the existing size and the desired index and
|
|
// adding 1 to that.
|
|
//
|
|
|
|
TableSize = SizeOfRestartTable( TablePointer );;
|
|
BytesToIndex = Index - TableSize;
|
|
|
|
AddEntries = BytesToIndex / Table->EntrySize + 1;
|
|
|
|
//
|
|
// There should always be an integral number of entries being added.
|
|
//
|
|
|
|
ASSERT( BytesToIndex % Table->EntrySize == 0 );
|
|
|
|
//
|
|
// First release the spin lock and the table resource, and get
|
|
// the resource exclusive.
|
|
//
|
|
|
|
KeReleaseSpinLock( &TablePointer->SpinLock, OldIrql );
|
|
NtfsReleaseRestartTable( TablePointer );
|
|
NtfsAcquireExclusiveRestartTable( TablePointer, TRUE );
|
|
|
|
//
|
|
// Now extend the table. Note that if this routine raises, we have
|
|
// nothing to release.
|
|
//
|
|
|
|
NtfsExtendRestartTable( TablePointer,
|
|
AddEntries,
|
|
TableSize );
|
|
|
|
Table = TablePointer->Table;
|
|
ASSERT_RESTART_TABLE(Table);
|
|
|
|
//
|
|
// Now get the spin lock again and proceed.
|
|
//
|
|
|
|
KeAcquireSpinLock( &TablePointer->SpinLock, &OldIrql );
|
|
}
|
|
|
|
//
|
|
// Now see if the entry is already allocated, and just return if it is.
|
|
//
|
|
|
|
Entry = (PULONG)GetRestartEntryFromIndex( TablePointer, Index );
|
|
|
|
if (!IsRestartTableEntryAllocated(Entry)) {
|
|
|
|
//
|
|
// We now have to walk through the table, looking for the entry
|
|
// we're interested in and the previous entry. Start by looking at the
|
|
// first entry.
|
|
//
|
|
|
|
ThisIndex = Table->FirstFree;
|
|
|
|
//
|
|
// Get the Entry from the list.
|
|
//
|
|
|
|
Entry = (PULONG) GetRestartEntryFromIndex( TablePointer, ThisIndex );
|
|
|
|
//
|
|
// If this is a match, then we pull it out of the list and are done.
|
|
//
|
|
|
|
if (ThisIndex == Index) {
|
|
|
|
//
|
|
// Dequeue this entry.
|
|
//
|
|
|
|
Table->FirstFree = *Entry;
|
|
ASSERT( Table->FirstFree != RESTART_ENTRY_ALLOCATED );
|
|
|
|
//
|
|
// Otherwise we need to walk through the list looking for the
|
|
// predecessor of our entry.
|
|
//
|
|
|
|
} else {
|
|
|
|
while (TRUE) {
|
|
|
|
//
|
|
// Remember the entry just found.
|
|
//
|
|
|
|
LastIndex = ThisIndex;
|
|
LastEntry = Entry;
|
|
|
|
//
|
|
// We should never run out of entries.
|
|
//
|
|
|
|
ASSERT( *LastEntry != 0 );
|
|
|
|
//
|
|
// Lookup up the next entry in the list.
|
|
//
|
|
|
|
ThisIndex = *LastEntry;
|
|
Entry = (PULONG) GetRestartEntryFromIndex( TablePointer, ThisIndex );
|
|
|
|
//
|
|
// If this is our match we are done.
|
|
//
|
|
|
|
if (ThisIndex == Index) {
|
|
|
|
//
|
|
// Dequeue this entry.
|
|
//
|
|
|
|
*LastEntry = *Entry;
|
|
|
|
//
|
|
// If this was the last entry, we update that in the
|
|
// table as well.
|
|
//
|
|
|
|
if (Table->LastFree == ThisIndex) {
|
|
|
|
Table->LastFree = LastIndex;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// If the list is now empty, we fix the LastFree as well.
|
|
//
|
|
|
|
if (Table->FirstFree == 0) {
|
|
|
|
Table->LastFree = 0;
|
|
}
|
|
|
|
//
|
|
// Zero this entry. Then show that this is allocated and increment the
|
|
// allocated count.
|
|
//
|
|
|
|
RtlZeroMemory( Entry, Table->EntrySize );
|
|
*Entry = RESTART_ENTRY_ALLOCATED;
|
|
|
|
Table->NumberAllocated += 1;
|
|
}
|
|
|
|
|
|
//
|
|
// Now just release the spin lock before returning.
|
|
//
|
|
|
|
KeReleaseSpinLock( &TablePointer->SpinLock, OldIrql );
|
|
|
|
DebugTrace( -1, Dbg, ("NtfsAllocateRestartTableFromIndex -> %08lx\n", Entry) );
|
|
|
|
return (PVOID)Entry;
|
|
}
|
|
|
|
|
|
VOID
|
|
NtfsFreeRestartTableIndex (
|
|
IN PRESTART_POINTERS TablePointer,
|
|
IN ULONG Index
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine frees a previously allocated index in a Restart Table.
|
|
If the index is before FreeGoal for the table, it is simply deallocated to
|
|
the front of the list for immediate reuse. If the index is beyond
|
|
FreeGoal, then it is deallocated to the end of the list, to facilitate
|
|
truncation of the list in the event that all of the entries beyond
|
|
FreeGoal are freed. However, this routine does not automatically
|
|
truncate the list, as this would cause too much overhead. The list
|
|
is checked during periodic checkpoint processing.
|
|
|
|
Arguments:
|
|
|
|
TablePointer - Pointer to the Restart Table to which the index is to be
|
|
deallocated.
|
|
|
|
Index - The index being deallocated.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
|
|
{
|
|
PRESTART_TABLE Table;
|
|
PULONG Entry, OldLastEntry;
|
|
KIRQL OldIrql;
|
|
|
|
DebugTrace( +1, Dbg, ("NtfsFreeRestartTableIndex:\n") );
|
|
DebugTrace( 0, Dbg, ("TablePointer = %08lx\n", TablePointer) );
|
|
DebugTrace( 0, Dbg, ("Index = %08lx\n", Index) );
|
|
|
|
//
|
|
// Get pointers to table and the entry we are freeing.
|
|
//
|
|
|
|
Table = TablePointer->Table;
|
|
ASSERT_RESTART_TABLE(Table);
|
|
|
|
ASSERT( Table->FirstFree == 0
|
|
|| (Table->FirstFree >= 0x18)
|
|
&& ((Table->FirstFree - 0x18) % Table->EntrySize) == 0 );
|
|
|
|
ASSERT( (Index >= 0x18)
|
|
&& ((Index - 0x18) % Table->EntrySize) == 0 );
|
|
|
|
Entry = GetRestartEntryFromIndex( TablePointer, Index );
|
|
|
|
//
|
|
// Acquire the spin lock to synchronize the allocation.
|
|
//
|
|
|
|
KeAcquireSpinLock( &TablePointer->SpinLock, &OldIrql );
|
|
|
|
//
|
|
// If the index is before FreeGoal, then do a normal deallocation at
|
|
// the front of the list.
|
|
//
|
|
|
|
if (Index < Table->FreeGoal) {
|
|
|
|
*Entry = Table->FirstFree;
|
|
Table->FirstFree = Index;
|
|
if (Table->LastFree == 0) {
|
|
Table->LastFree = Index;
|
|
}
|
|
|
|
//
|
|
// Otherwise we will deallocate this guy to the end of the list.
|
|
//
|
|
|
|
} else {
|
|
|
|
if (Table->LastFree != 0) {
|
|
OldLastEntry = GetRestartEntryFromIndex( TablePointer,
|
|
Table->LastFree );
|
|
*OldLastEntry = Index;
|
|
} else {
|
|
Table->FirstFree = Index;
|
|
}
|
|
Table->LastFree = Index;
|
|
*Entry = 0;
|
|
}
|
|
|
|
Table->NumberAllocated -= 1;
|
|
|
|
//
|
|
// Now just release the spin lock before returning.
|
|
//
|
|
|
|
KeReleaseSpinLock( &TablePointer->SpinLock, OldIrql );
|
|
|
|
DebugTrace( -1, Dbg, ("NtfsFreeRestartTableIndex -> VOID\n") );
|
|
}
|
|
|
|
|
|
PVOID
|
|
NtfsGetFirstRestartTable (
|
|
IN PRESTART_POINTERS TablePointer
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine returns the first allocated entry from a Restart Table.
|
|
|
|
Arguments:
|
|
|
|
TablePointer - Pointer to the respective Restart Table Pointers structure.
|
|
|
|
Return Value:
|
|
|
|
Pointer to the first entry, or NULL if none are allocated.
|
|
|
|
--*/
|
|
|
|
{
|
|
PCHAR Entry;
|
|
|
|
PAGED_CODE();
|
|
|
|
//
|
|
// If we know the table is empty, we can return immediately.
|
|
//
|
|
|
|
if (IsRestartTableEmpty( TablePointer )) {
|
|
|
|
return NULL;
|
|
}
|
|
|
|
//
|
|
// Otherwise point to the first table entry.
|
|
//
|
|
|
|
Entry = (PCHAR)(TablePointer->Table + 1);
|
|
|
|
//
|
|
// Loop until we hit the first one allocated, or the end of the list.
|
|
//
|
|
|
|
while ((ULONG)(Entry - (PCHAR)TablePointer->Table) <
|
|
SizeOfRestartTable(TablePointer)) {
|
|
|
|
if (IsRestartTableEntryAllocated(Entry)) {
|
|
return (PVOID)Entry;
|
|
}
|
|
|
|
Entry += TablePointer->Table->EntrySize;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
PVOID
|
|
NtfsGetNextRestartTable (
|
|
IN PRESTART_POINTERS TablePointer,
|
|
IN PVOID Current
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine returns the next allocated entry from a Restart Table.
|
|
|
|
Arguments:
|
|
|
|
TablePointer - Pointer to the respective Restart Table Pointers structure.
|
|
|
|
Current - Current entry pointer.
|
|
|
|
Return Value:
|
|
|
|
Pointer to the next entry, or NULL if none are allocated.
|
|
|
|
--*/
|
|
|
|
|
|
{
|
|
PCHAR Entry = (PCHAR)Current;
|
|
|
|
PAGED_CODE();
|
|
|
|
//
|
|
// Point to the next entry.
|
|
//
|
|
|
|
Entry += TablePointer->Table->EntrySize;
|
|
|
|
//
|
|
// Loop until we hit the first one allocated, or the end of the list.
|
|
//
|
|
|
|
while ((ULONG)(Entry - (PCHAR)TablePointer->Table) <
|
|
SizeOfRestartTable(TablePointer)) {
|
|
|
|
if (IsRestartTableEntryAllocated(Entry)) {
|
|
return (PVOID)Entry;
|
|
}
|
|
|
|
Entry += TablePointer->Table->EntrySize;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
//
|
|
// Internal support routine
|
|
//
|
|
|
|
VOID
|
|
DirtyPageRoutine (
|
|
IN PFILE_OBJECT FileObject,
|
|
IN PLARGE_INTEGER FileOffset,
|
|
IN ULONG Length,
|
|
IN PLSN OldestLsn,
|
|
IN PLSN NewestLsn,
|
|
IN PVOID Context1,
|
|
IN PVOID Context2
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is used as the call back routine for retrieving dirty pages
|
|
from the Cache Manager. It adds them to the Dirty Table list whose
|
|
pointer is pointed to by the Context parameter.
|
|
|
|
Arguments:
|
|
|
|
FileObject - Pointer to the file object which has the dirty page
|
|
|
|
FileOffset - File offset for start of dirty page
|
|
|
|
Length - Length recorded for the dirty page
|
|
|
|
OldestLsn - Oldest Lsn of an update not written through stored for that page
|
|
|
|
Context1 - IrpContext
|
|
|
|
Context2 - Pointer to the pointer to the Restart Table
|
|
|
|
Return Value:
|
|
|
|
None
|
|
|
|
--*/
|
|
|
|
{
|
|
PVCB Vcb;
|
|
VCN Vcn;
|
|
ULONG ClusterCount;
|
|
PDIRTY_PAGE_ENTRY PageEntry;
|
|
POPEN_ATTRIBUTE_ENTRY AttributeEntry;
|
|
ULONG PageIndex;
|
|
PIRP_CONTEXT IrpContext = (PIRP_CONTEXT)Context1;
|
|
PRESTART_POINTERS DirtyPageTable = (PRESTART_POINTERS)Context2;
|
|
PSCB_NONPAGED NonpagedScb;
|
|
|
|
UNREFERENCED_PARAMETER( NewestLsn );
|
|
|
|
DebugTrace( +1, Dbg, ("DirtyPageRoutine:\n") );
|
|
DebugTrace( 0, Dbg, ("FileObject = %08lx\n", FileObject) );
|
|
DebugTrace( 0, Dbg, ("FileOffset = %016I64x\n", *FileOffset) );
|
|
DebugTrace( 0, Dbg, ("Length = %08lx\n", Length) );
|
|
DebugTrace( 0, Dbg, ("OldestLsn = %016I64x\n", *OldestLsn) );
|
|
DebugTrace( 0, Dbg, ("Context2 = %08lx\n", Context2) );
|
|
|
|
//
|
|
// Get the Vcb out of the file object.
|
|
//
|
|
|
|
NonpagedScb = CONTAINING_RECORD( FileObject->SectionObjectPointer,
|
|
SCB_NONPAGED,
|
|
SegmentObject );
|
|
|
|
Vcb = NonpagedScb->Vcb;
|
|
|
|
//
|
|
// We noop this call if the open attribute entry for this Scb is 0. We assume
|
|
// there was a clean volume checkpoint which cleared this field.
|
|
//
|
|
|
|
if (NonpagedScb->OpenAttributeTableIndex == 0 ) {
|
|
|
|
DebugTrace( -1, Dbg, ("DirtyPageRoutine -> VOID\n") );
|
|
return;
|
|
}
|
|
|
|
//
|
|
// First allocate an entry in the dirty page table.
|
|
//
|
|
|
|
PageIndex = NtfsAllocateRestartTableIndex( DirtyPageTable );
|
|
|
|
//
|
|
// Get a pointer to the entry we just allocated.
|
|
//
|
|
|
|
PageEntry = GetRestartEntryFromIndex( DirtyPageTable, PageIndex );
|
|
|
|
//
|
|
// Calculate the range of Vcns which are dirty.
|
|
//
|
|
|
|
Vcn = Int64ShraMod32(FileOffset->QuadPart, Vcb->ClusterShift);
|
|
ClusterCount = ClustersFromBytes( Vcb, Length );
|
|
|
|
//
|
|
// Now fill in the Dirty Page Entry, except for the Lcns, because
|
|
// we are not allowed to take page faults now.
|
|
//
|
|
|
|
PageEntry->TargetAttribute = NonpagedScb->OpenAttributeTableIndex;
|
|
PageEntry->LengthOfTransfer = Length;
|
|
PageEntry->LcnsToFollow = ClusterCount;
|
|
PageEntry->Reserved = 0;
|
|
PageEntry->Vcn = Vcn;
|
|
|
|
//
|
|
// We don't use an Lsn which is prior to our current base Lsn
|
|
// or the known flushed lsn for a fuzzy checkpoint.
|
|
//
|
|
|
|
if (OldestLsn->QuadPart < Vcb->LastBaseLsn.QuadPart) {
|
|
|
|
PageEntry->OldestLsn = Vcb->LastBaseLsn;
|
|
|
|
|
|
} else {
|
|
|
|
PageEntry->OldestLsn = *OldestLsn;
|
|
}
|
|
|
|
//
|
|
// Mark the Open Attribute Table Entry for this file.
|
|
//
|
|
|
|
AttributeEntry = (POPEN_ATTRIBUTE_ENTRY)GetRestartEntryFromIndex(
|
|
&Vcb->OpenAttributeTable, PageEntry->TargetAttribute );
|
|
|
|
AttributeEntry->DirtyPagesSeen = TRUE;
|
|
|
|
DebugTrace( -1, Dbg, ("DirtyPageRoutine -> VOID\n") );
|
|
}
|
|
|
|
|
|
//
|
|
// Internal support routine
|
|
//
|
|
|
|
VOID
|
|
LookupLcns (
|
|
IN PIRP_CONTEXT IrpContext,
|
|
IN PSCB Scb,
|
|
IN VCN Vcn,
|
|
IN ULONG ClusterCount,
|
|
IN BOOLEAN MustBeAllocated,
|
|
OUT PLCN_UNALIGNED FirstLcn
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine looks up the Lcns for a range of Vcns, and stores them in
|
|
an output array. One Lcn is stored for each Vcn in the range, even
|
|
if the Lcns are contiguous.
|
|
|
|
Arguments:
|
|
|
|
Scb - Scb for stream on which lookup should occur.
|
|
|
|
Vcn - Start of range of Vcns to look up.
|
|
|
|
ClusterCount - Number of Vcns to look up.
|
|
|
|
MustBeAllocated - FALSE - if need not be allocated, and should check Mcb only
|
|
TRUE - if it must be allocated as far as caller knows (i.e.,
|
|
NtfsLookupAllocation also has checks)
|
|
|
|
FirstLcn - Pointer to storage for first Lcn. The caller must guarantee
|
|
that there is enough space to store ClusterCount Lcns.
|
|
|
|
Return Value:
|
|
|
|
None
|
|
|
|
--*/
|
|
|
|
{
|
|
BOOLEAN Allocated;
|
|
LONGLONG Clusters;
|
|
LCN Lcn;
|
|
ULONG i;
|
|
|
|
PAGED_CODE();
|
|
|
|
DebugTrace( +1, Dbg, ("LookupLcns:\n") );
|
|
DebugTrace( 0, Dbg, ("Scb = %08l\n", Scb) );
|
|
DebugTrace( 0, Dbg, ("Vcn = %016I64x\n", Vcn) );
|
|
DebugTrace( 0, Dbg, ("ClusterCount = %08l\n", ClusterCount) );
|
|
DebugTrace( 0, Dbg, ("FirstLcn = %08lx\n", FirstLcn) );
|
|
|
|
//
|
|
// Loop until we have looked up all of the clusters
|
|
//
|
|
|
|
while (ClusterCount != 0) {
|
|
|
|
if (MustBeAllocated) {
|
|
|
|
//
|
|
// Lookup the next run.
|
|
//
|
|
|
|
Allocated = NtfsLookupAllocation( IrpContext,
|
|
Scb,
|
|
Vcn,
|
|
&Lcn,
|
|
&Clusters,
|
|
NULL,
|
|
NULL );
|
|
|
|
ASSERT( Allocated && (Lcn != 0) );
|
|
|
|
} else {
|
|
|
|
Allocated = NtfsLookupNtfsMcbEntry( &Scb->Mcb, Vcn, &Lcn, &Clusters, NULL, NULL, NULL, NULL );
|
|
|
|
//
|
|
// If we are off the end of the Mcb, then set up to just return
|
|
// Li0 for as many Lcns as are being looked up.
|
|
//
|
|
|
|
if (!Allocated ||
|
|
(Lcn == UNUSED_LCN)) {
|
|
Lcn = 0;
|
|
Clusters = ClusterCount;
|
|
Allocated = FALSE;
|
|
}
|
|
}
|
|
|
|
//
|
|
// If we got as many clusters as we were looking for, then just
|
|
// take the number we were looking for.
|
|
//
|
|
|
|
if (Clusters > ClusterCount) {
|
|
|
|
Clusters = ClusterCount;
|
|
}
|
|
|
|
//
|
|
// Fill in the Lcns in the header.
|
|
//
|
|
|
|
for (i = 0; i < (ULONG)Clusters; i++) {
|
|
|
|
*(FirstLcn++) = Lcn;
|
|
|
|
if (Allocated) {
|
|
Lcn = Lcn + 1;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Adjust loop variables for the number Lcns we just received.
|
|
//
|
|
|
|
Vcn = Vcn + Clusters;
|
|
ClusterCount -= (ULONG)Clusters;
|
|
}
|
|
DebugTrace( -1, Dbg, ("LookupLcns -> VOID\n") );
|
|
}
|
|
|
|
|
|
VOID
|
|
InitializeNewTable (
|
|
IN ULONG EntrySize,
|
|
IN ULONG NumberEntries,
|
|
OUT PRESTART_POINTERS TablePointer
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is called to allocate and initialize a new table when the
|
|
associated Restart Table is being allocated or extended.
|
|
|
|
Arguments:
|
|
|
|
EntrySize - Size of the table entries, in bytes.
|
|
|
|
NumberEntries - Number of entries to allocate for the table.
|
|
|
|
TablePointer - Returns a pointer to the table.
|
|
|
|
Return Value:
|
|
|
|
None
|
|
|
|
--*/
|
|
|
|
{
|
|
PRESTART_TABLE Table;
|
|
PULONG Entry;
|
|
ULONG Size;
|
|
ULONG Offset;
|
|
|
|
//
|
|
// Calculate size of table to allocate.
|
|
//
|
|
|
|
Size = EntrySize * NumberEntries + sizeof(RESTART_TABLE);
|
|
|
|
//
|
|
// Allocate and zero out the table.
|
|
//
|
|
|
|
Table =
|
|
TablePointer->Table = NtfsAllocatePool( NonPagedPool, Size );
|
|
|
|
RtlZeroMemory( Table, Size );
|
|
|
|
//
|
|
// Initialize the table header.
|
|
//
|
|
|
|
Table->EntrySize = (USHORT)EntrySize;
|
|
Table->NumberEntries = (USHORT)NumberEntries;
|
|
Table->FreeGoal = MAXULONG;
|
|
Table->FirstFree = sizeof(RESTART_TABLE);
|
|
Table->LastFree = Table->FirstFree + (NumberEntries - 1) * EntrySize;
|
|
|
|
//
|
|
// Initialize the free list.
|
|
//
|
|
|
|
for (Entry = (PULONG)(Table + 1), Offset = sizeof(RESTART_TABLE) + EntrySize;
|
|
Entry < (PULONG)((PCHAR)Table + Table->LastFree);
|
|
Entry = (PULONG)((PCHAR)Entry + EntrySize), Offset += EntrySize) {
|
|
|
|
*Entry = Offset;
|
|
}
|
|
|
|
ASSERT_RESTART_TABLE(Table);
|
|
}
|
|
|