Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

5553 lines
169 KiB

  1. /*++
  2. Copyright (c) 1991 Microsoft Corporation
  3. Module Name:
  4. LogSup.c
  5. Abstract:
  6. This module implements the Ntfs interfaces to the Log File Service (LFS).
  7. Author:
  8. Tom Miller [TomM] 24-Jul-1991
  9. Revision History:
  10. --*/
  11. #include "NtfsProc.h"
  12. //
  13. // The local debug trace level
  14. //
  15. #define Dbg (DEBUG_TRACE_LOGSUP)
  16. //
  17. // Define a tag for general pool allocations from this module
  18. //
  19. #undef MODULE_POOL_TAG
  20. #define MODULE_POOL_TAG ('LFtN')
  21. #ifdef NTFSDBG
  22. #define ASSERT_RESTART_TABLE(T) { \
  23. PULONG _p = (PULONG)(((PCHAR)(T)) + sizeof(RESTART_TABLE)); \
  24. ULONG _Count = ((T)->EntrySize/4) * (T)->NumberEntries; \
  25. ULONG _i; \
  26. for (_i = 0; _i < _Count; _i += 1) { \
  27. if (_p[_i] == 0xDAADF00D) { \
  28. DbgPrint("DaadFood for table %08lx, At %08lx\n", (T), &_p[_i]); \
  29. ASSERTMSG("ASSERT_RESTART_TABLE ", FALSE); \
  30. } \
  31. } \
  32. }
  33. #else
  34. #define ASSERT_RESTART_TABLE(T) {NOTHING;}
  35. #endif
  36. //
  37. // Local structure for use in DirtyPageRoutine
  38. //
  39. typedef struct {
  40. PRESTART_POINTERS DirtyPageTable;
  41. ULONG DirtyPageIndex;
  42. PFILE_OBJECT OldestFileObject;
  43. LSN OldestLsn;
  44. BOOLEAN Overflow;
  45. } DIRTY_PAGE_CONTEXT, *PDIRTY_PAGE_CONTEXT;
  46. //
  47. // Local procedure prototypes
  48. //
  49. typedef LCN UNALIGNED *PLCN_UNALIGNED;
  50. VOID
  51. DirtyPageRoutine (
  52. IN PFILE_OBJECT FileObject,
  53. IN PLARGE_INTEGER FileOffset,
  54. IN ULONG Length,
  55. IN PLSN OldestLsn,
  56. IN PLSN NewestLsn,
  57. IN PVOID Context1,
  58. IN PVOID Context2
  59. );
  60. BOOLEAN
  61. LookupLcns (
  62. IN PIRP_CONTEXT IrpContext,
  63. IN PSCB Scb,
  64. IN VCN Vcn,
  65. IN ULONG ClusterCount,
  66. IN BOOLEAN MustBeAllocated,
  67. OUT PLCN_UNALIGNED FirstLcn
  68. );
  69. ULONG
  70. NtfsCalculateNamedBytes (
  71. IN PIRP_CONTEXT IrpContext,
  72. IN PVCB Vcb
  73. );
  74. LONG
  75. NtfsCatchOutOfMemoryExceptionFilter (
  76. IN PIRP_CONTEXT IrpContext,
  77. IN PEXCEPTION_POINTERS ExceptionPointer
  78. );
  79. LONG
  80. NtfsCheckpointExceptionFilter (
  81. IN PIRP_CONTEXT IrpContext,
  82. IN PEXCEPTION_POINTERS ExceptionPointer,
  83. IN NTSTATUS ExceptionCode
  84. );
  85. #ifdef ALLOC_PRAGMA
  86. #pragma alloc_text(PAGE, LookupLcns)
  87. #pragma alloc_text(PAGE, NtfsCheckpointCurrentTransaction)
  88. #pragma alloc_text(PAGE, NtfsCheckpointForLogFileFull)
  89. #pragma alloc_text(PAGE, NtfsCheckpointVolume)
  90. #pragma alloc_text(PAGE, NtfsCleanCheckpoint)
  91. #pragma alloc_text(PAGE, NtfsCleanupFailedTransaction)
  92. #pragma alloc_text(PAGE, NtfsCommitCurrentTransaction)
  93. #pragma alloc_text(PAGE, NtfsFreeRecentlyDeallocated)
  94. #pragma alloc_text(PAGE, NtfsFreeRestartTable)
  95. #pragma alloc_text(PAGE, NtfsGetFirstRestartTable)
  96. #pragma alloc_text(PAGE, NtfsGetNextRestartTable)
  97. #pragma alloc_text(PAGE, NtfsInitializeLogging)
  98. #pragma alloc_text(PAGE, NtfsInitializeRestartTable)
  99. #pragma alloc_text(PAGE, NtfsStartLogFile)
  100. #pragma alloc_text(PAGE, NtfsStopLogFile)
  101. #pragma alloc_text(PAGE, NtfsUpdateOatVersion)
  102. #pragma alloc_text(PAGE, NtfsWriteLog)
  103. #endif
  104. LSN
  105. NtfsWriteLog (
  106. IN PIRP_CONTEXT IrpContext,
  107. IN PSCB Scb,
  108. IN PBCB Bcb OPTIONAL,
  109. IN NTFS_LOG_OPERATION RedoOperation,
  110. IN PVOID RedoBuffer OPTIONAL,
  111. IN ULONG RedoLength,
  112. IN NTFS_LOG_OPERATION UndoOperation,
  113. IN PVOID UndoBuffer OPTIONAL,
  114. IN ULONG UndoLength,
  115. IN LONGLONG StreamOffset,
  116. IN ULONG RecordOffset,
  117. IN ULONG AttributeOffset,
  118. IN ULONG StructureSize
  119. )
  120. /*++
  121. Routine Description:
  122. This routine implements an Ntfs-specific interface to LFS for the
  123. purpose of logging updates to file record segments and resident
  124. attributes.
  125. The caller creates one of the predefined log record formats as
  126. determined by the given LogOperation, and calls this routine with
  127. this log record and pointers to the respective file and attribute
  128. records. The list of log operations along with the respective structure
  129. expected for the Log Buffer is present in ntfslog.h.
  130. Arguments:
  131. Scb - Pointer to the Scb for the respective file or Mft. The caller must
  132. have at least shared access to this Scb.
  133. Bcb - If specified, this Bcb will be set dirty specifying the Lsn of
  134. the log record written.
  135. RedoOperation - One of the log operation codes defined in ntfslog.h.
  136. RedoBuffer - A pointer to the structure expected for the given Redo operation,
  137. as summarized in ntfslog.h. This pointer should only be
  138. omitted if and only if the table in ntfslog.h does not show
  139. a log record for this log operation.
  140. RedoLength - Length of the Redo buffer in bytes.
  141. UndoOperation - One of the log operation codes defined in ntfslog.h.
  142. Must be CompensationLogRecord if logging the Undo of
  143. a previous operation, such as during transaction abort.
  144. In this case, of course, the Redo information is from
  145. the Undo information of the record being undone. See
  146. next parameter.
  147. UndoBuffer - A pointer to the structure expected for the given Undo operation,
  148. as summarized in ntfslog.h. This pointer should only be
  149. omitted if and only if the table in ntfslog.h does not show
  150. a log record for this log operation. If this pointer is
  151. identical to RedoBuffer, then UndoLength is ignored and
  152. only a single copy of the RedoBuffer is made, but described
  153. by both the Redo and Undo portions of the log record.
  154. For a compensation log record (UndoOperation ==
  155. CompensationLogRecord), this argument must point to the
  156. UndoNextLsn of the log record being compensated.
  157. UndoLength - Length of the Undo buffer in bytes. Ignored if RedoBuffer ==
  158. UndoBuffer.
  159. For a compensation log record, this argument must be the length
  160. of the original redo record. (Used during restart).
  161. StreamOffset - Offset within the stream for the start of the structure being
  162. modified (Mft or Index), or simply the stream offset for the start
  163. of the update.
  164. RecordOffset - Byte offset from StreamOffset above to update reference
  165. AttributeOffset - Offset within a value to which an update applies, if relevant.
  166. StructureSize - Size of the entire structure being logged.
  167. Return Value:
  168. The Lsn of the log record written. For most callers, this status may be ignored,
  169. because the Lsn is also correctly recorded in the transaction context.
  170. If an error occurs this procedure will raise.
  171. --*/
  172. {
  173. LFS_WRITE_ENTRY WriteEntries[3];
  174. struct {
  175. NTFS_LOG_RECORD_HEADER LogRecordHeader;
  176. LCN Runs[PAGE_SIZE/512 - 1];
  177. } LocalHeader;
  178. PNTFS_LOG_RECORD_HEADER MyHeader;
  179. PVCB Vcb;
  180. LSN UndoNextLsn;
  181. LSN ReturnLsn;
  182. PLSN DirtyLsn = NULL;
  183. ULONG WriteIndex = 0;
  184. ULONG UndoIndex = 0;
  185. ULONG RedoIndex = 0;
  186. LONG UndoBytes = 0;
  187. LONG UndoAdjustmentForLfs = 0;
  188. LONG UndoRecords = 0;
  189. PTRANSACTION_ENTRY TransactionEntry;
  190. ULONG OpenAttributeIndex = 0;
  191. ULONG OnDiskAttributeIndex = 0;
  192. POPEN_ATTRIBUTE_DATA AttributeData = NULL;
  193. BOOLEAN AttributeTableAcquired = FALSE;
  194. BOOLEAN TransactionTableAcquired = FALSE;
  195. ULONG LogClusterCount = ClustersFromBytes( Scb->Vcb, StructureSize );
  196. VCN LogVcn = LlClustersFromBytesTruncate( Scb->Vcb, StreamOffset );
  197. BOOLEAN DecrementLastTransactionLsnCount = FALSE;
  198. PAGED_CODE();
  199. Vcb = Scb->Vcb;
  200. //
  201. // If the log handle is gone, then we noop this call.
  202. //
  203. if (!FlagOn( Vcb->VcbState, VCB_STATE_VALID_LOG_HANDLE )) {
  204. return Li0; //**** LfsZeroLsn;
  205. }
  206. if (FlagOn( Vcb->VcbState, VCB_STATE_MOUNT_READ_ONLY )) {
  207. //
  208. // We'd like to have a chat with whoever sent the log write.
  209. //
  210. ASSERT(!FlagOn( Vcb->VcbState, VCB_STATE_MOUNT_READ_ONLY ));
  211. return Li0;
  212. }
  213. DebugTrace( +1, Dbg, ("NtfsWriteLog:\n") );
  214. DebugTrace( 0, Dbg, ("Scb = %08lx\n", Scb) );
  215. DebugTrace( 0, Dbg, ("Bcb = %08lx\n", Bcb) );
  216. DebugTrace( 0, Dbg, ("RedoOperation = %08lx\n", RedoOperation) );
  217. DebugTrace( 0, Dbg, ("RedoBuffer = %08lx\n", RedoBuffer) );
  218. DebugTrace( 0, Dbg, ("RedoLength = %08lx\n", RedoLength) );
  219. DebugTrace( 0, Dbg, ("UndoOperation = %08lx\n", UndoOperation) );
  220. DebugTrace( 0, Dbg, ("UndoBuffer = %08lx\n", UndoBuffer) );
  221. DebugTrace( 0, Dbg, ("UndoLength = %08lx\n", UndoLength) );
  222. DebugTrace( 0, Dbg, ("StreamOffset = %016I64x\n", StreamOffset) );
  223. DebugTrace( 0, Dbg, ("RecordOffset = %08lx\n", RecordOffset) );
  224. DebugTrace( 0, Dbg, ("AttributeOffset = %08lx\n", AttributeOffset) );
  225. DebugTrace( 0, Dbg, ("StructureSize = %08lx\n", StructureSize) );
  226. //
  227. // Check Redo and Undo lengths
  228. //
  229. ASSERT( ((RedoOperation == UpdateNonresidentValue) && (RedoLength <= PAGE_SIZE)) ||
  230. !ARGUMENT_PRESENT( Scb ) ||
  231. !ARGUMENT_PRESENT( Bcb ) ||
  232. ((Scb->AttributeTypeCode == $INDEX_ALLOCATION) &&
  233. (RedoLength <= Scb->ScbType.Index.BytesPerIndexBuffer)) ||
  234. (RedoLength <= Scb->Vcb->BytesPerFileRecordSegment) );
  235. ASSERT( ((UndoOperation == UpdateNonresidentValue) && (UndoLength <= PAGE_SIZE)) ||
  236. !ARGUMENT_PRESENT( Scb ) ||
  237. !ARGUMENT_PRESENT( Bcb ) ||
  238. ((Scb->AttributeTypeCode == $INDEX_ALLOCATION) &&
  239. (UndoLength <= Scb->ScbType.Index.BytesPerIndexBuffer)) ||
  240. (UndoLength <= Scb->Vcb->BytesPerFileRecordSegment) ||
  241. (UndoOperation == CompensationLogRecord) );
  242. //
  243. // Initialize local pointers.
  244. //
  245. MyHeader = (PNTFS_LOG_RECORD_HEADER)&LocalHeader;
  246. try {
  247. //
  248. // If the structure size is nonzero, then create an open attribute table
  249. // entry.
  250. //
  251. if (StructureSize != 0) {
  252. //
  253. // Allocate an entry in the open attribute table and initialize it,
  254. // if it does not already exist. If we subsequently fail, we do
  255. // not have to clean this up. It will go away on the next checkpoint.
  256. //
  257. if (Scb->NonpagedScb->OpenAttributeTableIndex == 0) {
  258. OPEN_ATTRIBUTE_ENTRY_V0 LocalOpenEntry;
  259. POPEN_ATTRIBUTE_ENTRY OpenAttributeEntry;
  260. POPEN_ATTRIBUTE_ENTRY_V0 OnDiskAttributeEntry;
  261. ULONG EntrySize;
  262. ASSERT( sizeof( OPEN_ATTRIBUTE_ENTRY_V0 ) >= sizeof( OPEN_ATTRIBUTE_ENTRY ));
  263. NtfsAcquireExclusiveRestartTable( &Vcb->OpenAttributeTable, TRUE );
  264. AttributeTableAcquired = TRUE;
  265. //
  266. // Check for a drain pending
  267. //
  268. if (Vcb->OpenAttributeTable.DrainPending) {
  269. SetFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_ONLY_SYNCH_CHECKPOINT );
  270. #ifdef PERF_STATS
  271. IrpContext->LogFullReason = LF_TRANSACTION_DRAIN;
  272. #endif
  273. NtfsRaiseStatus( IrpContext, STATUS_LOG_FILE_FULL, NULL, NULL );
  274. }
  275. //
  276. // Only proceed if the OpenAttributeTableIndex is still 0.
  277. // We may reach this point for the MftScb. It may not be
  278. // acquired when logging changes to file records. We will
  279. // use the OpenAttributeTable for final synchronization
  280. // for the Mft open attribute table entry.
  281. //
  282. if (Scb->NonpagedScb->OpenAttributeTableIndex == 0) {
  283. //
  284. // Our structures require tables to stay within 64KB, since
  285. // we use USHORT offsets. Things are getting out of hand
  286. // at this point anyway. Raise log file full to reset the
  287. // table sizes if we get to this point.
  288. //
  289. if (AllocatedSizeOfRestartTable( Vcb->OnDiskOat ) > MAX_RESTART_TABLE_SIZE) {
  290. #ifdef PERF_STATS
  291. IrpContext->LogFullReason = LF_OPEN_ATTRIBUTES;
  292. #endif
  293. NtfsRaiseStatus( IrpContext, STATUS_LOG_FILE_FULL, NULL, NULL );
  294. }
  295. //
  296. // Allocate the indexes and then the Attribute data structure. The
  297. // try-finally will handle any failures.
  298. //
  299. OpenAttributeIndex = NtfsAllocateRestartTableIndex( &Vcb->OpenAttributeTable, TRUE );
  300. AttributeData = NtfsAllocatePool( PagedPool, sizeof( OPEN_ATTRIBUTE_DATA ) );
  301. OpenAttributeEntry = GetRestartEntryFromIndex( &Vcb->OpenAttributeTable,
  302. OpenAttributeIndex );
  303. //
  304. // Initialize the entry and auxiliary data.
  305. //
  306. if (Scb->AttributeTypeCode == $INDEX_ALLOCATION) {
  307. OpenAttributeEntry->BytesPerIndexBuffer = Scb->ScbType.Index.BytesPerIndexBuffer;
  308. } else {
  309. OpenAttributeEntry->BytesPerIndexBuffer = 0;
  310. }
  311. //
  312. // Its good enough to use the last lsn for the lsnofopenrecord
  313. // since we're serialized on create attributes within a file
  314. //
  315. OpenAttributeEntry->AttributeTypeCode = Scb->AttributeTypeCode;
  316. OpenAttributeEntry->FileReference = Scb->Fcb->FileReference;
  317. OpenAttributeEntry->LsnOfOpenRecord = LfsQueryLastLsn( Vcb->LogHandle );
  318. AttributeData->Overlay.Scb = Scb;
  319. AttributeData->AttributeName = Scb->AttributeName;
  320. AttributeData->AttributeNamePresent = FALSE;
  321. //
  322. // Use the open attribute entry as the default table entry.
  323. //
  324. Scb->NonpagedScb->OnDiskOatIndex = OpenAttributeIndex;
  325. //
  326. // If the on-disk structure is needed then get it now.
  327. //
  328. if (Vcb->RestartVersion == 0) {
  329. NtfsAcquireExclusiveRestartTable( Vcb->OnDiskOat, TRUE );
  330. try {
  331. OnDiskAttributeIndex = NtfsAllocateRestartTableIndex( Vcb->OnDiskOat, TRUE );
  332. OnDiskAttributeEntry = GetRestartEntryFromIndex( Vcb->OnDiskOat,
  333. OnDiskAttributeIndex );
  334. OnDiskAttributeEntry->OatIndex = OpenAttributeIndex;
  335. OnDiskAttributeEntry->FileReference = Scb->Fcb->FileReference;
  336. OnDiskAttributeEntry->LsnOfOpenRecord.QuadPart = 0;
  337. OnDiskAttributeEntry->AttributeTypeCode = Scb->AttributeTypeCode;
  338. OnDiskAttributeEntry->BytesPerIndexBuffer = OpenAttributeEntry->BytesPerIndexBuffer;
  339. OnDiskAttributeEntry->LsnOfOpenRecord.QuadPart = OpenAttributeEntry->LsnOfOpenRecord.QuadPart;
  340. //
  341. // Use this new index.
  342. //
  343. Scb->NonpagedScb->OnDiskOatIndex = OnDiskAttributeIndex;
  344. } finally {
  345. NtfsReleaseRestartTable( Vcb->OnDiskOat );
  346. }
  347. //
  348. // We need to log this so store a copy in our local.
  349. //
  350. } else {
  351. OnDiskAttributeIndex = OpenAttributeIndex;
  352. }
  353. //
  354. // Now store the table indexes.
  355. //
  356. AttributeData->OnDiskAttributeIndex = OnDiskAttributeIndex;
  357. Scb->NonpagedScb->OpenAttributeTableIndex = OpenAttributeIndex;
  358. //
  359. // Now connect the attribute data to the table entry and the Vcb.
  360. //
  361. OpenAttributeEntry->OatData = AttributeData;
  362. InsertTailList( &Vcb->OpenAttributeData, &AttributeData->Links );
  363. RtlCopyMemory( &LocalOpenEntry,
  364. GetRestartEntryFromIndex( Vcb->OnDiskOat, OnDiskAttributeIndex ),
  365. EntrySize = Vcb->OnDiskOat->Table->EntrySize );
  366. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  367. AttributeTableAcquired = FALSE;
  368. OpenAttributeIndex = 0;
  369. //
  370. // Now log the new open attribute table entry before going on,
  371. // to insure that the application of the caller's log record
  372. // will have the information he needs on the attribute. We will
  373. // use the Undo buffer to convey the attribute name. We will
  374. // not infinitely recurse, because now this Scb already has an
  375. // open attribute table index.
  376. //
  377. NtfsWriteLog( IrpContext,
  378. Scb,
  379. NULL,
  380. OpenNonresidentAttribute,
  381. &LocalOpenEntry,
  382. EntrySize,
  383. Noop,
  384. Scb->AttributeName.Length != 0 ?
  385. Scb->AttributeName.Buffer : NULL,
  386. Scb->AttributeName.Length,
  387. (LONGLONG)0,
  388. 0,
  389. 0,
  390. 0 );
  391. } else {
  392. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  393. AttributeTableAcquired = FALSE;
  394. }
  395. }
  396. }
  397. //
  398. // Allocate a transaction ID and initialize it, if it does not already exist.
  399. // If we subsequently fail, we clean it up when the current request is
  400. // completed.
  401. //
  402. if (IrpContext->TransactionId == 0) {
  403. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
  404. TransactionTableAcquired = TRUE;
  405. //
  406. // Our structures require tables to stay within 64KB, since
  407. // we use USHORT offsets. Things are getting out of hand
  408. // at this point anyway. Raise log file full to reset the
  409. // table sizes if we get to this point.
  410. //
  411. // Also raise if we're synchronizing to wait for all transactions to
  412. // finish
  413. //
  414. if ((SizeOfRestartTable( &Vcb->TransactionTable ) > MAX_RESTART_TABLE_SIZE) ||
  415. Vcb->TransactionTable.DrainPending) {
  416. SetFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_ONLY_SYNCH_CHECKPOINT );
  417. #ifdef PERF_STATS
  418. IrpContext->LogFullReason = LF_TRANSACTION_DRAIN;
  419. #endif
  420. NtfsRaiseStatus( IrpContext, STATUS_LOG_FILE_FULL, NULL, NULL );
  421. }
  422. IrpContext->TransactionId =
  423. NtfsAllocateRestartTableIndex( &Vcb->TransactionTable, TRUE );
  424. //
  425. // Obtain the lsn now so that checkpoint code can calculate the BaseLsn correctly
  426. // before we update the FirstLsn of this transaction after calling LfsWrite below.
  427. // This closes the window where we started the transaction with an invalid Lsn till
  428. // we actually write out the transaction and update the Lsn.
  429. //
  430. if (Vcb->LastTransactionLsnCount == 0) {
  431. //
  432. // Since nobody should be updating LastTransactionLsn, just write out the last lsn
  433. //
  434. Vcb->LastTransactionLsn = LfsQueryLastLsn( Vcb->LogHandle );
  435. } else {
  436. //
  437. // Since LastTransactionLsnCount is non-zero, LastTransactionLsn should also be non-zero.
  438. // We should also be moving forward if someone is already ahead of us.
  439. //
  440. ASSERT( (Vcb->LastTransactionLsnCount != 0) &&
  441. (Vcb->LastTransactionLsn.QuadPart != 0) &&
  442. (Vcb->LastTransactionLsn.QuadPart <= LfsQueryLastLsn( Vcb->LogHandle ).QuadPart) );
  443. }
  444. //
  445. // Bump the reference count by one and decrement it after we update the FirstLsn below
  446. //
  447. Vcb->LastTransactionLsnCount += 1;
  448. DecrementLastTransactionLsnCount = TRUE;
  449. ClearFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_WROTE_LOG );
  450. TransactionEntry = (PTRANSACTION_ENTRY)GetRestartEntryFromIndex(
  451. &Vcb->TransactionTable,
  452. IrpContext->TransactionId );
  453. TransactionEntry->TransactionState = TransactionActive;
  454. TransactionEntry->FirstLsn =
  455. TransactionEntry->PreviousLsn =
  456. TransactionEntry->UndoNextLsn = Li0; //**** LfsZeroLsn;
  457. //
  458. // Remember that we will need a commit record even if we abort
  459. // the transaction.
  460. //
  461. TransactionEntry->UndoBytes = QuadAlign( sizeof( NTFS_LOG_RECORD_HEADER ));
  462. TransactionEntry->UndoRecords = 1;
  463. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  464. TransactionTableAcquired = FALSE;
  465. //
  466. // Remember the space for the commit record in our Lfs adjustment.
  467. //
  468. UndoAdjustmentForLfs += QuadAlign( sizeof( NTFS_LOG_RECORD_HEADER ));
  469. //
  470. // If there is an undo operation for this log record, we reserve
  471. // the space for another Lfs log record.
  472. //
  473. if (UndoOperation != Noop) {
  474. UndoAdjustmentForLfs += Vcb->LogHeaderReservation;
  475. }
  476. }
  477. //
  478. // At least for now, assume update is contained in one physical page.
  479. //
  480. //ASSERT( (StructureSize == 0) || (StructureSize <= PAGE_SIZE) );
  481. //
  482. // If there isn't enough room for this structure on the stack, we
  483. // need to allocate an auxilary buffer.
  484. //
  485. if (LogClusterCount > (PAGE_SIZE / 512)) {
  486. MyHeader = (PNTFS_LOG_RECORD_HEADER)
  487. NtfsAllocatePool(PagedPool, sizeof( NTFS_LOG_RECORD_HEADER )
  488. + (LogClusterCount - 1) * sizeof( LCN ));
  489. }
  490. //
  491. // Now fill in the WriteEntries array and the log record header.
  492. //
  493. WriteEntries[0].Buffer = (PVOID)MyHeader;
  494. WriteEntries[0].ByteLength = sizeof(NTFS_LOG_RECORD_HEADER);
  495. WriteIndex += 1;
  496. //
  497. // Lookup the Runs for this log record
  498. //
  499. MyHeader->LcnsToFollow = (USHORT)LogClusterCount;
  500. if (LogClusterCount != 0) {
  501. if (!LookupLcns( IrpContext,
  502. Scb,
  503. LogVcn,
  504. LogClusterCount,
  505. TRUE,
  506. &MyHeader->LcnsForPage[0] )) {
  507. //
  508. // It is possible that the allocation for this range is not allocated.
  509. // This may happen in cases where a stream which descibes itself is
  510. // being hotfixed (perhaps MoveFile in a later release). In the
  511. // hotfix case we will not write this log record. Hotfix will mark
  512. // the volume dirty so we know that the system will verify the volume
  513. // at some point.
  514. //
  515. ASSERT( NtfsGetTopLevelHotFixScb() != NULL );
  516. //
  517. // Cleanup the transaction entry if allocated here.
  518. //
  519. if (!FlagOn( IrpContext->Flags, IRP_CONTEXT_FLAG_WROTE_LOG ) &&
  520. (IrpContext->TransactionId != 0)) {
  521. NtfsCleanupFailedTransaction( IrpContext );
  522. }
  523. ReturnLsn = LfsQueryLastLsn( Vcb->LogHandle );
  524. DirtyLsn = &ReturnLsn;
  525. leave;
  526. }
  527. WriteEntries[0].ByteLength += (LogClusterCount - 1) * sizeof(LCN);
  528. }
  529. //
  530. // If there is a Redo buffer, fill in its write entry.
  531. //
  532. if (RedoLength != 0) {
  533. WriteEntries[1].Buffer = RedoBuffer;
  534. WriteEntries[1].ByteLength = RedoLength;
  535. UndoIndex = RedoIndex = WriteIndex;
  536. WriteIndex += 1;
  537. }
  538. //
  539. // If there is an undo buffer, and it is at a different address than
  540. // the redo buffer, then fill in its write entry.
  541. //
  542. if ((RedoBuffer != UndoBuffer) && (UndoLength != 0) &&
  543. (UndoOperation != CompensationLogRecord)) {
  544. WriteEntries[WriteIndex].Buffer = UndoBuffer;
  545. WriteEntries[WriteIndex].ByteLength = UndoLength;
  546. UndoIndex = WriteIndex;
  547. WriteIndex += 1;
  548. }
  549. //
  550. // Now fill in the rest of the header. Assume Redo and Undo buffer is
  551. // the same, then fix them up if they are not.
  552. //
  553. MyHeader->RedoOperation = (USHORT)RedoOperation;
  554. MyHeader->UndoOperation = (USHORT)UndoOperation;
  555. MyHeader->RedoOffset = (USHORT)WriteEntries[0].ByteLength;
  556. MyHeader->RedoLength = (USHORT)RedoLength;
  557. MyHeader->UndoOffset = MyHeader->RedoOffset;
  558. if (RedoBuffer != UndoBuffer) {
  559. MyHeader->UndoOffset += (USHORT)QuadAlign(MyHeader->RedoLength);
  560. }
  561. MyHeader->UndoLength = (USHORT)UndoLength;
  562. MyHeader->TargetAttribute = (USHORT)Scb->NonpagedScb->OnDiskOatIndex;
  563. MyHeader->RecordOffset = (USHORT)RecordOffset;
  564. MyHeader->AttributeOffset = (USHORT)AttributeOffset;
  565. MyHeader->Reserved = 0;
  566. MyHeader->TargetVcn = LogVcn;
  567. MyHeader->ClusterBlockOffset = (USHORT) LogBlocksFromBytesTruncate( ClusterOffset( Vcb, StreamOffset ));
  568. //
  569. // Finally, get our current transaction entry and call Lfs. We acquire
  570. // the transaction table exclusive both to synchronize the Lsn updates
  571. // on return from Lfs, and also to mark the Bcb dirty before any more
  572. // log records are written.
  573. //
  574. // If we do not do serialize the LfsWrite and CcSetDirtyPinnedData, here is
  575. // what can happen:
  576. //
  577. // We log an update for a page and get an Lsn back
  578. //
  579. // Another thread writes a start of checkpoint record
  580. // This thread then collects all of the dirty pages at that time
  581. // Sometime it writes the dirty page table
  582. //
  583. // The former thread which had been preempted, now sets the Bcb dirty
  584. //
  585. // If we crash at this time, the page we updated is not in the dirty page
  586. // table of the checkpoint, and it its update record is also not seen since
  587. // it was written before the start of the checkpoint!
  588. //
  589. // Note however, since the page being updated is pinned and cannot be written,
  590. // updating the Lsn in the page may simply be considered part of the update.
  591. // Whoever is doing this update (to the Mft or an Index buffer), must have the
  592. // Mft or Index acquired exclusive anyway.
  593. //
  594. NtfsAcquireSharedStarveExRestartTable( &Vcb->TransactionTable, TRUE );
  595. TransactionTableAcquired = TRUE;
  596. TransactionEntry = (PTRANSACTION_ENTRY)GetRestartEntryFromIndex(
  597. &Vcb->TransactionTable,
  598. IrpContext->TransactionId );
  599. //
  600. // Set up the UndoNextLsn. If this is a normal log record, then use
  601. // the UndoNextLsn stored in the transaction entry; otherwise, use
  602. // the one passed in as the Undo buffer.
  603. //
  604. if (UndoOperation != CompensationLogRecord) {
  605. UndoNextLsn = TransactionEntry->UndoNextLsn;
  606. //
  607. // If there is undo information, calculate the number to pass to Lfs
  608. // for undo bytes to reserve.
  609. //
  610. if (UndoOperation != Noop) {
  611. UndoBytes += QuadAlign(WriteEntries[0].ByteLength);
  612. if (UndoIndex != 0) {
  613. UndoBytes += QuadAlign(WriteEntries[UndoIndex].ByteLength);
  614. }
  615. UndoRecords += 1;
  616. }
  617. } else {
  618. UndoNextLsn = *(PLSN)UndoBuffer;
  619. //
  620. // We can reduce our Undo requirements, by the Redo data being
  621. // logged. This is either an abort record for a previous action
  622. // or a commit record. If it is a commit record we accounted
  623. // for it above on the first NtfsWriteLog, and NtfsCommitTransaction
  624. // will adjust for the rest.
  625. //
  626. if (!FlagOn( Vcb->VcbState, VCB_STATE_RESTART_IN_PROGRESS )) {
  627. UndoBytes -= QuadAlign(WriteEntries[0].ByteLength);
  628. if (RedoIndex != 0) {
  629. UndoBytes -= QuadAlign(WriteEntries[RedoIndex].ByteLength);
  630. }
  631. UndoRecords -= 1;
  632. }
  633. }
  634. #ifdef NTFS_LOG_FULL_TEST
  635. //
  636. // Perform log-file-full fail checking. We do not perform this check if
  637. // we are writing an undo record (since we are guaranteed space to undo
  638. // things).
  639. //
  640. if (UndoOperation != CompensationLogRecord &&
  641. (IrpContext->MajorFunction != IRP_MJ_FILE_SYSTEM_CONTROL ||
  642. IrpContext->MinorFunction != IRP_MN_MOUNT_VOLUME)) {
  643. LogFileFullFailCheck( IrpContext );
  644. if (NtfsFailFrequency != 0 &&
  645. (++NtfsPeriodicFail % NtfsFailFrequency) == 0) {
  646. ExRaiseStatus( STATUS_LOG_FILE_FULL );
  647. }
  648. }
  649. #endif
  650. //
  651. // Call Lfs to write the record.
  652. //
  653. LfsWrite( Vcb->LogHandle,
  654. WriteIndex,
  655. &WriteEntries[0],
  656. LfsClientRecord,
  657. &IrpContext->TransactionId,
  658. UndoNextLsn,
  659. TransactionEntry->PreviousLsn,
  660. UndoBytes + UndoAdjustmentForLfs,
  661. 0,
  662. &ReturnLsn );
  663. //
  664. // Now that we are successful, update the transaction entry appropriately.
  665. //
  666. TransactionEntry->UndoBytes += UndoBytes;
  667. TransactionEntry->UndoRecords += UndoRecords;
  668. TransactionEntry->PreviousLsn = ReturnLsn;
  669. //
  670. // The UndoNextLsn for the transaction depends on whether we are
  671. // doing a compensation log record or not.
  672. //
  673. if (UndoOperation != CompensationLogRecord) {
  674. TransactionEntry->UndoNextLsn = ReturnLsn;
  675. } else {
  676. TransactionEntry->UndoNextLsn = UndoNextLsn;
  677. }
  678. //
  679. // If this is the first Lsn, then we have to update that as
  680. // well.
  681. //
  682. if (TransactionEntry->FirstLsn.QuadPart == 0) {
  683. TransactionEntry->FirstLsn = ReturnLsn;
  684. //
  685. // Only decrement the LastTransactionLsnCount if we incremented it earlier as
  686. // it is possible for the FirstLsn to be zero during restart or some other code path.
  687. //
  688. ASSERT( DecrementLastTransactionLsnCount ||
  689. FlagOn( Vcb->VcbState, VCB_STATE_RESTART_IN_PROGRESS ) );
  690. if (DecrementLastTransactionLsnCount) {
  691. //
  692. // We cannot safely assert because we are acquiring the TransactionTable shared.
  693. // Another NtfsWriteLog could be changing these values.
  694. // That's why we have to use the InterlockedDecrement down below on LastTransactionLsnCount.
  695. //
  696. // ASSERT( (Vcb->LastTransactionLsnCount != 0) &&
  697. // (Vcb->LastTransactionLsn.QuadPart != 0)
  698. // (Vcb->LastTransactionLsn.QuadPart <= ReturnLsn.QuadPart) );
  699. InterlockedDecrement(&Vcb->LastTransactionLsnCount);
  700. DecrementLastTransactionLsnCount = FALSE;
  701. }
  702. }
  703. //
  704. // Set to use this Lsn when marking dirty below
  705. //
  706. DirtyLsn = &ReturnLsn;
  707. //
  708. // Set the flag in the Irp Context which indicates we wrote
  709. // a log record to disk.
  710. //
  711. SetFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_WROTE_LOG );
  712. //
  713. // Now set the Bcb dirty if specified. We want to set it no matter
  714. // what happens, because our caller has modified the buffer and is
  715. // counting on us to call the Cache Manager.
  716. //
  717. if (ARGUMENT_PRESENT( Bcb )) {
  718. TIMER_STATUS TimerStatus;
  719. CcSetDirtyPinnedData( Bcb, DirtyLsn );
  720. //
  721. // Synchronize with the checkpoint timer and other instances of this routine.
  722. //
  723. // Perform an interlocked exchange to indicate that a timer is being set.
  724. //
  725. // If the previous value indicates that no timer was set, then we
  726. // enable the volume checkpoint timer. This will guarantee that a checkpoint
  727. // will occur to flush out the dirty Bcb data.
  728. //
  729. // If the timer was set previously, then it is guaranteed that a checkpoint
  730. // will occur without this routine having to reenable the timer.
  731. //
  732. // If the timer and checkpoint occurred between the dirtying of the Bcb and
  733. // the setting of the timer status, then we will be queueing a single extra
  734. // checkpoint on a clean volume. This is not considered harmful.
  735. //
  736. //
  737. // Atomically set the timer status to indicate a timer is being set and
  738. // retrieve the previous value.
  739. //
  740. TimerStatus = InterlockedExchange( (PLONG)&NtfsData.TimerStatus, TIMER_SET );
  741. //
  742. // If the timer is not currently set then we must start the checkpoint timer
  743. // to make sure the above dirtying is flushed out.
  744. //
  745. if (TimerStatus == TIMER_NOT_SET) {
  746. LONGLONG FiveSecondsFromNow = -5*1000*1000*10;
  747. KeSetTimer( &NtfsData.VolumeCheckpointTimer,
  748. *(PLARGE_INTEGER)&FiveSecondsFromNow,
  749. &NtfsData.VolumeCheckpointDpc );
  750. }
  751. }
  752. } finally {
  753. DebugUnwind( NtfsWriteLog );
  754. if (DecrementLastTransactionLsnCount && !TransactionTableAcquired) {
  755. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable,
  756. TRUE );
  757. TransactionTableAcquired = TRUE;
  758. }
  759. if (TransactionTableAcquired) {
  760. if (DecrementLastTransactionLsnCount) {
  761. //
  762. // The TransacationTable could be acquired shared/exclusive at this point.
  763. // That's why we need to use InterlockedDecrement.
  764. //
  765. InterlockedDecrement(&Vcb->LastTransactionLsnCount);
  766. }
  767. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  768. }
  769. //
  770. // Lets cleanup any failed attempt to allocate an attribute entry.
  771. // We only need to check the OpenAttributeIndex if the operation
  772. // was successful.
  773. //
  774. if (OpenAttributeIndex != 0) {
  775. NtfsFreeRestartTableIndex( &Vcb->OpenAttributeTable, OpenAttributeIndex );
  776. if (AttributeData != NULL) {
  777. NtfsFreePool( AttributeData );
  778. }
  779. if (OnDiskAttributeIndex != 0) {
  780. NtfsFreeRestartTableIndex( Vcb->OnDiskOat, OnDiskAttributeIndex );
  781. }
  782. }
  783. if (AttributeTableAcquired) {
  784. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  785. }
  786. if (MyHeader != (PNTFS_LOG_RECORD_HEADER)&LocalHeader) {
  787. NtfsFreePool( MyHeader );
  788. }
  789. }
  790. DebugTrace( -1, Dbg, ("NtfsWriteLog -> %016I64x\n", ReturnLsn ) );
  791. return ReturnLsn;
  792. }
  793. VOID
  794. NtfsCheckpointVolume (
  795. IN PIRP_CONTEXT IrpContext,
  796. IN PVCB Vcb,
  797. IN BOOLEAN OwnsCheckpoint,
  798. IN BOOLEAN CleanVolume,
  799. IN BOOLEAN FlushVolume,
  800. IN ULONG LfsFlags,
  801. IN LSN LastKnownLsn
  802. )
  803. /*++
  804. Routine Description:
  805. This routine is called periodically to perform a checkpoint on the volume
  806. with respect to the log file. The checkpoint dumps a bunch of log file
  807. state information to the log file, and finally writes a summary of the
  808. dumped information in its Restart Area.
  809. This checkpoint dumps the following:
  810. Open Attribute Table
  811. (all of the attribute names for the Attribute Table)
  812. Dirty Pages Table
  813. Transaction Table
  814. Arguments:
  815. Vcb - Pointer to the Vcb on which the checkpoint is to occur.
  816. OwnsCheckpoint - TRUE if the caller has already taken steps to insure
  817. that he may proceed with the checkpointing. In this case we
  818. don't do any checks for other checkpoints and don't clear the
  819. checkpoint flag or notify any waiting checkpoint threads.
  820. CleanVolume - TRUE if the caller wishes to clean the volume before doing
  821. the checkpoint, or FALSE for a normal periodic checkpoint.
  822. FlushVolume - Applies only if CleanVolume is TRUE. This indicates if we should
  823. should flush the volume or only Lsn streams. Only the shutdown thread
  824. can do a clean and flush checkpoint and avoid deadlocks between
  825. pagingio and main resources.
  826. LfsFlags - flags to pass to lfs when writing the restart areas
  827. LastKnownLsn - Applies only if CleanVolume is TRUE. Only perform the
  828. clean checkpoint if this value is the same as the last restart area
  829. in the Vcb. This will prevent us from doing unecesary clean
  830. checkpoints.
  831. Return Value:
  832. None
  833. --*/
  834. {
  835. RESTART_AREA RestartArea;
  836. RESTART_POINTERS DirtyPages;
  837. RESTART_POINTERS Pointers;
  838. PRESTART_POINTERS NewTable = NULL;
  839. LSN BaseLsn;
  840. PATTRIBUTE_NAME_ENTRY NamesBuffer = NULL;
  841. PTRANSACTION_ENTRY TransactionEntry;
  842. LSN OldestDirtyPageLsn = Li0;
  843. KPRIORITY PreviousPriority;
  844. PSCB UsnJournal = NULL;
  845. LOGICAL LfsCleanShutdown = 0;
  846. USN LowestOpenUsn;
  847. volatile LARGE_INTEGER StartTime;
  848. #ifdef PERF_STATS
  849. BOOLEAN Tracking = CleanVolume;
  850. #endif
  851. BOOLEAN DirtyPageTableInitialized = FALSE;
  852. BOOLEAN OpenAttributeTableAcquired = FALSE;
  853. BOOLEAN TransactionTableAcquired = FALSE;
  854. BOOLEAN AcquireFiles = FALSE;
  855. BOOLEAN PostDefrag = FALSE;
  856. BOOLEAN RestorePreviousPriority = FALSE;
  857. BOOLEAN AcquiredVcb = FALSE;
  858. LOGICAL CheckpointInProgress = FALSE;
  859. PAGED_CODE();
  860. DebugTrace( +1, Dbg, ("NtfsCheckpointVolume:\n") );
  861. DebugTrace( 0, Dbg, ("Vcb = %08lx\n", Vcb) );
  862. //
  863. // No checkpointing on readonly volumes.
  864. //
  865. if (NtfsIsVolumeReadOnly( Vcb )) {
  866. return;
  867. }
  868. if (!OwnsCheckpoint) {
  869. //
  870. // Acquire the checkpoint event.
  871. //
  872. NtfsAcquireCheckpoint( IrpContext, Vcb );
  873. //
  874. // We will want to post a defrag if defragging is permitted and enabled
  875. // and we have begun the defrag operation or have excess mapping.
  876. // If the defrag hasn't been triggered then check the Mft free
  877. // space. We can skip defragging if a defrag operation is
  878. // currently active.
  879. //
  880. if (!CleanVolume &&
  881. (FlagOn( Vcb->MftDefragState,
  882. VCB_MFT_DEFRAG_PERMITTED | VCB_MFT_DEFRAG_ENABLED | VCB_MFT_DEFRAG_ACTIVE ) ==
  883. (VCB_MFT_DEFRAG_PERMITTED | VCB_MFT_DEFRAG_ENABLED))) {
  884. if (FlagOn( Vcb->MftDefragState,
  885. VCB_MFT_DEFRAG_TRIGGERED | VCB_MFT_DEFRAG_EXCESS_MAP )) {
  886. PostDefrag = TRUE;
  887. } else {
  888. NtfsCheckForDefrag( Vcb );
  889. if (FlagOn( Vcb->MftDefragState, VCB_MFT_DEFRAG_TRIGGERED )) {
  890. PostDefrag = TRUE;
  891. } else {
  892. ClearFlag( Vcb->MftDefragState, VCB_MFT_DEFRAG_ENABLED );
  893. }
  894. }
  895. }
  896. //
  897. // If a checkpoint is already active, we either have to get out,
  898. // or wait for it.
  899. //
  900. while (FlagOn( Vcb->CheckpointFlags, VCB_CHECKPOINT_SYNC_FLAGS )) {
  901. CheckpointInProgress = FlagOn( Vcb->CheckpointFlags, VCB_CHECKPOINT_IN_PROGRESS );
  902. //
  903. // Release the checkpoint event because we cannot checkpoint now.
  904. //
  905. NtfsReleaseCheckpoint( IrpContext, Vcb );
  906. if (CleanVolume) {
  907. NtfsWaitOnCheckpointNotify( IrpContext, Vcb );
  908. NtfsAcquireCheckpoint( IrpContext, Vcb );
  909. //
  910. // If there prev. was a checkpoint in progress and the last one was
  911. // clean then we don't need to do this clean checkpoint
  912. //
  913. if (CheckpointInProgress && FlagOn( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_CLEAN )) {
  914. NtfsReleaseCheckpoint( IrpContext, Vcb );
  915. return;
  916. }
  917. } else {
  918. return;
  919. }
  920. }
  921. //
  922. // If the log file is gone then simply exit.
  923. //
  924. if (!FlagOn( Vcb->VcbState, VCB_STATE_VALID_LOG_HANDLE )) {
  925. NtfsReleaseCheckpoint( IrpContext, Vcb );
  926. return;
  927. }
  928. //
  929. // We now have the checkpoint event. Check if there is still
  930. // a need to perform the checkpoint.
  931. //
  932. if (CleanVolume &&
  933. (LastKnownLsn.QuadPart != Vcb->LastRestartArea.QuadPart)) {
  934. NtfsReleaseCheckpoint( IrpContext, Vcb );
  935. return;
  936. }
  937. SetFlag( Vcb->CheckpointFlags, VCB_CHECKPOINT_SYNC_FLAGS );
  938. NtfsResetCheckpointNotify( IrpContext, Vcb );
  939. NtfsReleaseCheckpoint( IrpContext, Vcb );
  940. //
  941. // If this is a clean volume checkpoint then boost the priority of
  942. // this thread.
  943. //
  944. if (CleanVolume) {
  945. PreviousPriority = KeSetPriorityThread( (PKTHREAD)PsGetCurrentThread(),
  946. LOW_REALTIME_PRIORITY );
  947. if (PreviousPriority != LOW_REALTIME_PRIORITY) {
  948. RestorePreviousPriority = TRUE;
  949. }
  950. }
  951. }
  952. RtlZeroMemory( &RestartArea, sizeof( RESTART_AREA ) );
  953. RtlZeroMemory( &DirtyPages, sizeof( RESTART_POINTERS ) );
  954. //
  955. // Remember if our caller wants to tell Lfs that this is a
  956. // clean shutdown. We will use the combination of the OwnsCheckpoint and
  957. // CleanCheckpoint flags. This will cover system shutdown and volume
  958. // snapshot cases. Both of these want the volume not to need any restart.
  959. //
  960. if (OwnsCheckpoint && CleanVolume) {
  961. LfsCleanShutdown = TRUE;
  962. }
  963. //
  964. // Record the start time
  965. //
  966. KeQueryTickCount( &StartTime );
  967. #ifdef PERF_STATS
  968. if (Tracking) {
  969. Vcb->ChkPointEntry[ Vcb->CurrentCheckpoint % NUM_CHECKPOINT_ENTRIES ].StartTime = StartTime.QuadPart;
  970. Vcb->ChkPointEntry[ Vcb->CurrentCheckpoint % NUM_CHECKPOINT_ENTRIES ].RestartArea = LastKnownLsn;
  971. Vcb->ChkPointEntry[ Vcb->CurrentCheckpoint % NUM_CHECKPOINT_ENTRIES ].Reason = IrpContext->LogFullReason;
  972. Vcb->ChkPointEntry[ Vcb->CurrentCheckpoint % NUM_CHECKPOINT_ENTRIES ].LogFileFulls = Vcb->UnhandledLogFileFullCount;
  973. // ASSERT( IrpContext->LogFullReason != 0 );
  974. }
  975. #endif
  976. //
  977. // Insure cleanup on the way out
  978. //
  979. try {
  980. POPEN_ATTRIBUTE_ENTRY AttributeEntry;
  981. ULONG NameBytes = 0;
  982. //
  983. // Capture the lowest usn - we have checkpoint synchronization which keeps
  984. // the journal from going away. This value may change but it will only monotically
  985. // increase
  986. //
  987. if (Vcb->UsnJournal != NULL) {
  988. NtfsAcquireResourceShared( IrpContext, Vcb->UsnJournal, TRUE );
  989. NtfsLockFcb( IrpContext, Vcb->UsnJournal->Fcb );
  990. //
  991. // Now we will correctly synchronize, test the list again and capture
  992. // the LowestUsn.
  993. //
  994. if (!IsListEmpty(&Vcb->ModifiedOpenFiles)) {
  995. LowestOpenUsn = ((PFCB_USN_RECORD)CONTAINING_RECORD( Vcb->ModifiedOpenFiles.Flink,
  996. FCB_USN_RECORD,
  997. ModifiedOpenFilesLinks ))->Fcb->Usn;
  998. //
  999. // If the list is empty, then use FileSize
  1000. //
  1001. } else {
  1002. LowestOpenUsn = Vcb->UsnJournal->Header.FileSize.QuadPart;
  1003. }
  1004. NtfsUnlockFcb( IrpContext, Vcb->UsnJournal->Fcb );
  1005. NtfsReleaseResource( IrpContext, Vcb->UsnJournal );
  1006. }
  1007. //
  1008. // Now remember the current "last Lsn" value as the start of
  1009. // our checkpoint. We acquire the transaction table to capture
  1010. // this value to synchronize with threads who are writing log
  1011. // records and setting pages dirty as atomic actions.
  1012. //
  1013. ASSERT( IrpContext->TransactionId == 0 );
  1014. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
  1015. //
  1016. // If LfsFlags == LFS_WRITE_FLAG_WRITE_AT_FRONT then produce
  1017. // the dummy log record that resets the log. This allows us to
  1018. // keep the log in use only at the front so chkdsk can shrink it
  1019. //
  1020. if (FlagOn( LfsFlags, LFS_WRITE_FLAG_WRITE_AT_FRONT )) {
  1021. LSN Lsn;
  1022. LFS_WRITE_ENTRY WriteEntry;
  1023. UCHAR Buffer[ sizeof( NTFS_LOG_RECORD_HEADER ) + 2 * sizeof( LSN )];
  1024. TRANSACTION_ID TransactionId;
  1025. RtlZeroMemory( Buffer, sizeof( Buffer ));
  1026. WriteEntry.Buffer = Buffer;
  1027. WriteEntry.ByteLength = sizeof( Buffer );
  1028. TransactionId = NtfsAllocateRestartTableIndex( &Vcb->TransactionTable, TRUE );
  1029. Lsn.QuadPart = 0;
  1030. LfsGetActiveLsnRange( Vcb->LogHandle,
  1031. Add2Ptr( Buffer, sizeof( NTFS_LOG_RECORD_HEADER )),
  1032. Add2Ptr( Buffer, sizeof( NTFS_LOG_RECORD_HEADER ) + sizeof( LSN )) );
  1033. LfsWrite( Vcb->LogHandle,
  1034. 1,
  1035. &WriteEntry,
  1036. LfsClientRecord,
  1037. &TransactionId,
  1038. Lsn,
  1039. Lsn,
  1040. 0,
  1041. LfsFlags,
  1042. &Lsn );
  1043. NtfsFreeRestartTableIndex( &Vcb->TransactionTable, TransactionId );
  1044. //
  1045. // Commit the transaction so that we can release resources
  1046. //
  1047. NtfsCommitCurrentTransaction( IrpContext );
  1048. }
  1049. BaseLsn =
  1050. RestartArea.StartOfCheckpoint = LfsQueryLastLsn( Vcb->LogHandle );
  1051. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  1052. //
  1053. // Flush any dangling dirty pages from before the last restart.
  1054. // Note that it is arbitrary what Lsn we flush to here, and, in fact,
  1055. // it is not absolutely required that we flush anywhere at all - we
  1056. // could actually rely on the Lazy Writer. All we are trying to do
  1057. // is reduce the amount of work that we will have to do at Restart,
  1058. // by not forcing ourselves to have to go too far back in the log.
  1059. // Presumably this can only happen for some reason the system is
  1060. // starting to produce dirty pages faster than the lazy writer is
  1061. // writing them.
  1062. //
  1063. // (We may wish to play with taking this call out...)
  1064. //
  1065. // This may be an appropriate place to worry about this, but, then
  1066. // again, the Lazy Writer is using (currently) five threads. It may
  1067. // not be appropriate to hold up this one thread doing the checkpoint
  1068. // if the Lazy Writer is getting behind. How many dirty pages we
  1069. // can even have is limited by the size of memory, so if the log file
  1070. // is large enough, this may not be an issue. It seems kind of nice
  1071. // to just let the Lazy Writer keep writing dirty pages as he does
  1072. // now.
  1073. //
  1074. // if (!FlagOn(Vcb->VcbState, VCB_STATE_LAST_CHECKPOINT_CLEAN)) {
  1075. // CcFlushPagesToLsn( Vcb->LogHandle, &Vcb->LastRestartArea );
  1076. // }
  1077. //
  1078. //
  1079. // Now we must clean the volume here if that is what the caller wants.
  1080. //
  1081. if (CleanVolume) {
  1082. #ifdef PERF_STATS
  1083. if (Tracking) {
  1084. SetFlag( IrpContext->TopLevelIrpContext->State, IRP_CONTEXT_STATE_TRACK_IOS );
  1085. }
  1086. #endif
  1087. #ifdef BENL_DBG
  1088. KdPrint(( "NTFS: clean checkpoint %x started %I64x\n", Vcb, StartTime ));
  1089. #endif
  1090. //
  1091. // Update stats
  1092. //
  1093. NtfsCleanCheckpoints += 1;
  1094. //
  1095. // We don't want to clear the pseudo clean bit because we don't want
  1096. // another pseudo clean checkpoint after a clean checkpoint.
  1097. //
  1098. //
  1099. // ClearFlag( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_PSEUDO_CLEAN );
  1100. //
  1101. //
  1102. // Lock down the volume if this is a clean checkpoint.
  1103. //
  1104. if (FlushVolume) {
  1105. NtfsAcquireAllFiles( IrpContext, Vcb, FlushVolume, FALSE, FALSE );
  1106. AcquireFiles = TRUE;
  1107. #ifdef NTFSDBG
  1108. ASSERT( !FlagOn( IrpContext->State, IRP_CONTEXT_STATE_CHECKPOINT_ACTIVE ));
  1109. SetFlag( IrpContext->State, IRP_CONTEXT_STATE_CHECKPOINT_ACTIVE );
  1110. #endif // NTFSDBG
  1111. } else {
  1112. BOOLEAN WaitOnTransactions = FALSE;
  1113. NtfsAcquireSharedVcb( IrpContext, Vcb, TRUE );
  1114. AcquiredVcb = TRUE;
  1115. //
  1116. // Set the flag indicating we're waiting for all transactions to finish and
  1117. // then wait if necc.
  1118. //
  1119. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
  1120. Vcb->TransactionTable.DrainPending = TRUE;
  1121. ASSERT( IrpContext->TransactionId == 0 );
  1122. if (Vcb->TransactionTable.Table->NumberAllocated > 0) {
  1123. KeClearEvent( &Vcb->TransactionsDoneEvent );
  1124. WaitOnTransactions = TRUE;
  1125. }
  1126. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  1127. if (WaitOnTransactions) {
  1128. KeWaitForSingleObject( &Vcb->TransactionsDoneEvent, Executive, KernelMode, FALSE, NULL );
  1129. }
  1130. //
  1131. // Set the flag to disallow new open attributes as well
  1132. //
  1133. NtfsAcquireExclusiveRestartTable( &Vcb->OpenAttributeTable, TRUE );
  1134. Vcb->OpenAttributeTable.DrainPending = TRUE;
  1135. #ifdef PERF_STATS
  1136. if (Tracking) {
  1137. Vcb->ChkPointEntry[ Vcb->CurrentCheckpoint % NUM_CHECKPOINT_ENTRIES ].NumAttributes =
  1138. Vcb->OpenAttributeTable.Table->NumberAllocated;
  1139. }
  1140. #endif
  1141. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  1142. }
  1143. //
  1144. // It isn't safe to checkpoint a dismounted volume, and
  1145. // it doesn't make much sense, either.
  1146. //
  1147. if (!FlagOn( Vcb->VcbState, VCB_STATE_VOLUME_MOUNTED )) {
  1148. leave;
  1149. }
  1150. //
  1151. // Now we will acquire the Open Attribute Table exclusive to delete
  1152. // all of the entries, since we want to write a clean checkpoint.
  1153. // This is OK, since we have the global resource and nothing else
  1154. // can be going on. (Similarly we are writing an empty transaction
  1155. // table, while in fact we will be the only transaction, but there
  1156. // is no need to capture our guy, nor explicitly empty this table.)
  1157. //
  1158. NtfsAcquireExclusiveRestartTable( &Vcb->OpenAttributeTable, TRUE );
  1159. OpenAttributeTableAcquired = TRUE;
  1160. //
  1161. // First reclaim the page we have reserved in the undo total, to
  1162. // guarantee that we can flush the log file.
  1163. //
  1164. LfsResetUndoTotal( Vcb->LogHandle, 1, -(LONG)(2 * PAGE_SIZE) );
  1165. if (FlushVolume) {
  1166. (VOID)NtfsFlushVolume( IrpContext, Vcb, TRUE, FALSE, FALSE, FALSE );
  1167. //
  1168. // Loop through to deallocate all of the open attribute entries. Any
  1169. // that point to an Scb need to get the index in the Scb zeroed. If
  1170. // they do not point to an Scb, we have to see if there is a name to
  1171. // free.
  1172. //
  1173. AttributeEntry = NtfsGetFirstRestartTable( &Vcb->OpenAttributeTable );
  1174. while (AttributeEntry != NULL) {
  1175. NtfsFreeAttributeEntry( Vcb, AttributeEntry );
  1176. AttributeEntry = NtfsGetNextRestartTable( &Vcb->OpenAttributeTable,
  1177. AttributeEntry );
  1178. }
  1179. } else {
  1180. //
  1181. // If we're only flushing out the open attributes rather than the
  1182. // whole volume we own the vcb shared at this point. We've set the
  1183. // drain pending flag to prevent new transactions from being opened. Now
  1184. // start a cycle of finding an entry in the table / flushing it and removing
  1185. // it from the table - We must drop the table before acquiring any file since its
  1186. // an end resource. This will also free the attribute entries
  1187. //
  1188. NtfsFlushLsnStreams( IrpContext, Vcb, TRUE, FALSE );
  1189. }
  1190. SetFlag( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_CLEAN );
  1191. //
  1192. // In a rare reuse path there may still be entries in the open attribute data
  1193. // list. This can happen when we reuse a slot in the open attribute table
  1194. // during restart.
  1195. //
  1196. NtfsFreeAllOpenAttributeData( Vcb );
  1197. //
  1198. // Initialize first in case we get an allocation failure.
  1199. //
  1200. ASSERT( IsRestartTableEmpty( &Vcb->OpenAttributeTable ));
  1201. ASSERT( IsListEmpty( &Vcb->OpenAttributeData ));
  1202. InitializeNewTable( sizeof( OPEN_ATTRIBUTE_ENTRY ),
  1203. INITIAL_NUMBER_ATTRIBUTES,
  1204. &Pointers );
  1205. NtfsFreePool( Vcb->OpenAttributeTable.Table );
  1206. Vcb->OpenAttributeTable.Table = Pointers.Table;
  1207. //
  1208. // Since we are doing a clean checkpoint we may be able to discard the
  1209. // second open attribute table. We have three cases to consider.
  1210. //
  1211. // 1 - We want to use Version 0 on-disk but currently aren't.
  1212. // 2 - We are currently using Version 0 but can free some space.
  1213. // 3 - We are currently using Version 0 but don't want to.
  1214. //
  1215. if (NtfsDefaultRestartVersion != Vcb->RestartVersion) {
  1216. NtfsUpdateOatVersion( Vcb, NtfsDefaultRestartVersion );
  1217. } else if (NtfsDefaultRestartVersion == 0) {
  1218. InitializeNewTable( sizeof( OPEN_ATTRIBUTE_ENTRY_V0 ),
  1219. INITIAL_NUMBER_ATTRIBUTES,
  1220. &Pointers );
  1221. NtfsFreePool( Vcb->OnDiskOat->Table );
  1222. Vcb->OnDiskOat->Table = Pointers.Table;
  1223. }
  1224. //
  1225. // Initialize first in case we get an allocation failure.
  1226. // Make sure we commit the current transaction.
  1227. //
  1228. NtfsCommitCurrentTransaction( IrpContext );
  1229. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
  1230. ASSERT( IsRestartTableEmpty( &Vcb->TransactionTable ));
  1231. InitializeNewTable( sizeof( TRANSACTION_ENTRY ),
  1232. INITIAL_NUMBER_TRANSACTIONS,
  1233. &Pointers );
  1234. NtfsFreePool( Vcb->TransactionTable.Table );
  1235. Vcb->TransactionTable.Table = Pointers.Table;
  1236. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  1237. //
  1238. // Make sure we do not process any log file before the restart
  1239. // area, because we did not dump the open attribute table.
  1240. //
  1241. RestartArea.StartOfCheckpoint = LfsQueryLastLsn( Vcb->LogHandle );
  1242. //
  1243. // More work to do if this is not a clean checkpoint.
  1244. //
  1245. } else {
  1246. DIRTY_PAGE_CONTEXT DirtyPageContext;
  1247. PDIRTY_PAGE_ENTRY DirtyPage;
  1248. POPEN_ATTRIBUTE_ENTRY OpenEntry;
  1249. ULONG JustMe = 0;
  1250. ULONG TempCount;
  1251. BOOLEAN SkipCheckpoint;
  1252. //
  1253. // Now we construct the dirty page table by calling the Cache Manager.
  1254. // For each dirty page on files tagged with our log handle, he will
  1255. // call us back at our DirtyPageRoutine. We will allocate the initial
  1256. // Dirty Page Table, but we will let the call back routine grow it as
  1257. // necessary.
  1258. //
  1259. NtfsInitializeRestartTable( (((Vcb->RestartVersion == 0) ?
  1260. sizeof( DIRTY_PAGE_ENTRY_V0 ) :
  1261. sizeof( DIRTY_PAGE_ENTRY )) +
  1262. ((Vcb->ClustersPerPage - 1) * sizeof(LCN))),
  1263. Vcb->DirtyPageTableSizeHint,
  1264. &DirtyPages );
  1265. NtfsAcquireExclusiveRestartTable( &DirtyPages, TRUE );
  1266. DirtyPageTableInitialized = TRUE;
  1267. //
  1268. // Now we will acquire the Open Attribute Table shared to freeze changes.
  1269. //
  1270. NtfsAcquireExclusiveRestartTable( &Vcb->OpenAttributeTable, TRUE );
  1271. OpenAttributeTableAcquired = TRUE;
  1272. NameBytes = NtfsCalculateNamedBytes( IrpContext, Vcb );
  1273. //
  1274. // Now call the Cache Manager to give us all of our dirty pages
  1275. // via the DirtyPageRoutine callback, and remember what the oldest
  1276. // Lsn is for a dirty page.
  1277. //
  1278. RtlZeroMemory( &DirtyPageContext, sizeof( DirtyPageContext ) );
  1279. DirtyPageContext.DirtyPageTable = &DirtyPages;
  1280. DirtyPageContext.OldestLsn.QuadPart = MAXLONGLONG;
  1281. CcGetDirtyPages( Vcb->LogHandle,
  1282. &DirtyPageRoutine,
  1283. (PVOID)IrpContext,
  1284. (PVOID)&DirtyPageContext );
  1285. OldestDirtyPageLsn = DirtyPageContext.OldestLsn;
  1286. //
  1287. // If we overflowed we can't contain the dirty pages in the dirty page
  1288. // table and need to do a clean checkpoint instead
  1289. //
  1290. if (DirtyPageContext.Overflow) {
  1291. //
  1292. // We need the vcb shared for the flush which must be acquired
  1293. // before the OAT
  1294. //
  1295. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  1296. OpenAttributeTableAcquired = FALSE;
  1297. NtfsAcquireSharedVcb( IrpContext, Vcb, TRUE );
  1298. AcquiredVcb = TRUE;
  1299. NtfsAcquireExclusiveRestartTable( &Vcb->OpenAttributeTable, TRUE );
  1300. OpenAttributeTableAcquired = TRUE;
  1301. //
  1302. // Do a partial flush and see if the table no longer overflows afterwards
  1303. //
  1304. NtfsFlushLsnStreams( IrpContext, Vcb, FALSE, TRUE );
  1305. //
  1306. // Now call the Cache Manager to give us all of our dirty pages
  1307. // via the DirtyPageRoutine callback, and remember what the oldest
  1308. // Lsn is for a dirty page.
  1309. //
  1310. RtlZeroMemory( &DirtyPageContext, sizeof( DirtyPageContext ) );
  1311. DirtyPageContext.DirtyPageTable = &DirtyPages;
  1312. DirtyPageContext.OldestLsn.QuadPart = MAXLONGLONG;
  1313. //
  1314. // Loop through to deallocate all of the prev dirty page entries
  1315. //
  1316. DirtyPage = NtfsGetFirstRestartTable( &DirtyPages );
  1317. while (DirtyPage != NULL) {
  1318. NtfsFreeRestartTableIndex( &DirtyPages,
  1319. GetIndexFromRestartEntry( &DirtyPages,
  1320. DirtyPage ));
  1321. DirtyPage = NtfsGetNextRestartTable( &DirtyPages, DirtyPage );
  1322. }
  1323. NameBytes = NtfsCalculateNamedBytes( IrpContext, Vcb );
  1324. CcGetDirtyPages( Vcb->LogHandle,
  1325. &DirtyPageRoutine,
  1326. (PVOID)IrpContext,
  1327. (PVOID)&DirtyPageContext );
  1328. OldestDirtyPageLsn = DirtyPageContext.OldestLsn;
  1329. //
  1330. // If we still overflowed - give up and run a full clean checkpoint
  1331. //
  1332. if (DirtyPageContext.Overflow) {
  1333. #ifdef PERF_STATS
  1334. IrpContext->LogFullReason = LF_DIRTY_PAGES;
  1335. #endif
  1336. NtfsRaiseStatus( IrpContext, STATUS_LOG_FILE_FULL, NULL, NULL );
  1337. }
  1338. }
  1339. TempCount = DirtyPages.Table->NumberAllocated;
  1340. Vcb->DirtyPageTableSizeHint = (TempCount & ~(INITIAL_DIRTY_TABLE_HINT - 1)) + INITIAL_DIRTY_TABLE_HINT;
  1341. //
  1342. // Skip the fuzzy checkpoint if its not going to make restart any faster
  1343. // i.e the oldest lsn is still the same as the last time we did it
  1344. //
  1345. if (OldestDirtyPageLsn.QuadPart == Vcb->OldestDirtyLsn.QuadPart) {
  1346. //
  1347. // Release any transaction tables
  1348. //
  1349. if (OpenAttributeTableAcquired) {
  1350. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  1351. OpenAttributeTableAcquired = FALSE;
  1352. }
  1353. if (TransactionTableAcquired) {
  1354. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  1355. TransactionTableAcquired = FALSE;
  1356. }
  1357. //
  1358. // Flush the fileobject associated with the page if there is one
  1359. //
  1360. if (DirtyPageContext.OldestFileObject != NULL) {
  1361. PSCB Scb = (PSCB)DirtyPageContext.OldestFileObject->FsContext;
  1362. BOOLEAN AcquiredPaging;
  1363. IO_STATUS_BLOCK Iosb;
  1364. LARGE_INTEGER Offset;
  1365. ULONG Length;
  1366. DirtyPage = GetRestartEntryFromIndex( DirtyPageContext.DirtyPageTable, DirtyPageContext.DirtyPageIndex );
  1367. //
  1368. // At this point the vcn in the dirty page entry is actually a raw offset
  1369. //
  1370. if (Vcb->RestartVersion == 0) {
  1371. Offset.QuadPart = ((PDIRTY_PAGE_ENTRY_V0)DirtyPage)->Vcn;
  1372. Length = ((PDIRTY_PAGE_ENTRY_V0)DirtyPage)->LengthOfTransfer;
  1373. ASSERT( ((PDIRTY_PAGE_ENTRY_V0)DirtyPage)->OldestLsn.QuadPart == DirtyPageContext.OldestLsn.QuadPart );
  1374. } else {
  1375. Offset.QuadPart = DirtyPage->Vcn;
  1376. Length = DirtyPage->LengthOfTransfer;
  1377. ASSERT( DirtyPage->OldestLsn.QuadPart == DirtyPageContext.OldestLsn.QuadPart );
  1378. }
  1379. //
  1380. // Account for UsnJournal biasing if necc.
  1381. // note at this point the vcn is actually still a byte offset
  1382. //
  1383. if (Scb == Vcb->UsnJournal) {
  1384. Offset.QuadPart += Vcb->UsnCacheBias;
  1385. }
  1386. //
  1387. // Acquire same synchronization as a normal lazy write before flushing
  1388. //
  1389. AcquiredPaging = NtfsAcquireScbForLazyWrite( Scb, TRUE );
  1390. CcFlushCache( &Scb->NonpagedScb->SegmentObject, &Offset, Length, &Iosb );
  1391. if (AcquiredPaging) {
  1392. NtfsReleaseScbFromLazyWrite( Scb );
  1393. }
  1394. ObDereferenceObject( DirtyPageContext.OldestFileObject );
  1395. }
  1396. leave;
  1397. }
  1398. //
  1399. // Deref the oldest file if there is any returned from DirtyPageRoutine
  1400. //
  1401. if (DirtyPageContext.OldestFileObject) {
  1402. ObDereferenceObject( DirtyPageContext.OldestFileObject );
  1403. }
  1404. ASSERT( (OldestDirtyPageLsn.QuadPart > Vcb->OldestDirtyLsn.QuadPart) || (TempCount == 0) );
  1405. if (OldestDirtyPageLsn.QuadPart != MAXLONGLONG) {
  1406. Vcb->OldestDirtyLsn = OldestDirtyPageLsn;
  1407. }
  1408. if ((OldestDirtyPageLsn.QuadPart != 0) &&
  1409. OldestDirtyPageLsn.QuadPart < Vcb->LastBaseLsn.QuadPart) {
  1410. OldestDirtyPageLsn = Vcb->LastBaseLsn;
  1411. }
  1412. //
  1413. // Now loop through the dirty page table to extract all of the Vcn/Lcn
  1414. // Mapping that we have, and insert it into the appropriate Scb.
  1415. //
  1416. DirtyPage = NtfsGetFirstRestartTable( &DirtyPages );
  1417. //
  1418. // The dirty page routine is called while holding spin locks,
  1419. // so it cannot take page faults. Thus we must scan the dirty
  1420. // page table we just built and fill in the Lcns here.
  1421. //
  1422. while (DirtyPage != NULL) {
  1423. PSCB Scb;
  1424. //
  1425. // If we have Lcn's then look them up.
  1426. //
  1427. if (DirtyPage->LengthOfTransfer != 0) {
  1428. VCN Vcn;
  1429. PLCN LcnArray;
  1430. //
  1431. // Get the in-memory AttributeEntry from the dirty page entry.
  1432. // Then update the dirty page entry with the on-disk TargetAttribute.
  1433. // Also mark the pages dirty now.
  1434. //
  1435. OpenEntry = GetRestartEntryFromIndex( &Vcb->OpenAttributeTable,
  1436. DirtyPage->TargetAttribute );
  1437. OpenEntry->DirtyPagesSeen = TRUE;
  1438. DirtyPage->TargetAttribute = OpenEntry->OatData->OnDiskAttributeIndex;
  1439. ASSERT( IsRestartTableEntryAllocated( OpenEntry ));
  1440. Scb = OpenEntry->OatData->Overlay.Scb;
  1441. //
  1442. // Account for UsnJournal biasing if necc.
  1443. // note at this point the vcn is actually still a byte offset
  1444. //
  1445. if (Scb == Vcb->UsnJournal) {
  1446. if (Vcb->RestartVersion == 0 ) {
  1447. ((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->Vcn = ((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->Vcn + Vcb->UsnCacheBias;
  1448. } else {
  1449. DirtyPage->Vcn = DirtyPage->Vcn + Vcb->UsnCacheBias;
  1450. }
  1451. }
  1452. //
  1453. // Fix up the count of Lcns.
  1454. //
  1455. DirtyPage->LcnsToFollow = ClustersFromBytes( Vcb, DirtyPage->LengthOfTransfer );
  1456. //
  1457. // Now fix up the page entry to account for the differences in the
  1458. // restart version structures and also make sure we don't have
  1459. // an Lsn which precedes our current base Lsn.
  1460. //
  1461. if (Vcb->RestartVersion == 0) {
  1462. ((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->Reserved = 0;
  1463. if (((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->OldestLsn.QuadPart < Vcb->LastBaseLsn.QuadPart) {
  1464. ((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->OldestLsn.QuadPart = Vcb->LastBaseLsn.QuadPart;
  1465. }
  1466. Vcn = ((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->Vcn;
  1467. Vcn = Int64ShraMod32( Vcn, Vcb->ClusterShift );
  1468. ((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->Vcn = Vcn;
  1469. LcnArray = &((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->LcnsForPage[0];
  1470. } else {
  1471. if (DirtyPage->OldestLsn.QuadPart < Vcb->LastBaseLsn.QuadPart) {
  1472. DirtyPage->OldestLsn.QuadPart = Vcb->LastBaseLsn.QuadPart;
  1473. }
  1474. DirtyPage->Vcn = Vcn = Int64ShraMod32( DirtyPage->Vcn, Vcb->ClusterShift );
  1475. LcnArray = &DirtyPage->LcnsForPage[0];
  1476. }
  1477. LookupLcns( IrpContext,
  1478. Scb,
  1479. Vcn,
  1480. DirtyPage->LcnsToFollow,
  1481. FALSE,
  1482. LcnArray );
  1483. //
  1484. // Otherwise free this dirty page entry.
  1485. //
  1486. } else {
  1487. NtfsFreeRestartTableIndex( &DirtyPages,
  1488. GetIndexFromRestartEntry( &DirtyPages,
  1489. DirtyPage ));
  1490. }
  1491. //
  1492. // Point to next entry in table, or NULL.
  1493. //
  1494. DirtyPage = NtfsGetNextRestartTable( &DirtyPages, DirtyPage );
  1495. }
  1496. //
  1497. // If the followings are all true, we can return as we don't want to
  1498. // keep writing empty fuzzy checkpoints on idling volumes:
  1499. //
  1500. // 1) Last fuzzy checkpoint was clean (no dirty pages or no open transaction)
  1501. // 2) No one has written to the log since last restart record
  1502. // 3) Currently, there isn't any dirty page
  1503. // 4) Currently, there isn't any transaction in the transaction table
  1504. //
  1505. if (FlagOn( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_PSEUDO_CLEAN ) &&
  1506. (RestartArea.StartOfCheckpoint.QuadPart == Vcb->EndOfLastCheckpoint.QuadPart) &&
  1507. IsRestartTableEmpty( &DirtyPages )) {
  1508. NtfsAcquireSharedStarveExRestartTable( &Vcb->TransactionTable, TRUE );
  1509. SkipCheckpoint = IsRestartTableEmpty( &Vcb->TransactionTable );
  1510. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  1511. } else {
  1512. SkipCheckpoint = FALSE;
  1513. }
  1514. if (SkipCheckpoint) {
  1515. //
  1516. // Let's take this opportunity to shrink the Open Attribute and Transaction
  1517. // table back if they have gotten large.
  1518. //
  1519. //
  1520. // First the Open Attribute Table
  1521. //
  1522. if (!OpenAttributeTableAcquired) {
  1523. NtfsAcquireExclusiveRestartTable( &Vcb->OpenAttributeTable, TRUE );
  1524. OpenAttributeTableAcquired = TRUE;
  1525. } else {
  1526. ASSERT( ExIsResourceAcquiredExclusive( &Vcb->OpenAttributeTable.Resource ) );
  1527. }
  1528. if (IsRestartTableEmpty( &Vcb->OpenAttributeTable ) &&
  1529. (Vcb->OpenAttributeTable.Table->NumberEntries > HIGHWATER_ATTRIBUTE_COUNT)) {
  1530. //
  1531. // Initialize first in case we get an allocation failure.
  1532. //
  1533. InitializeNewTable( sizeof( OPEN_ATTRIBUTE_ENTRY ),
  1534. INITIAL_NUMBER_ATTRIBUTES,
  1535. &Pointers );
  1536. NtfsFreePool( Vcb->OpenAttributeTable.Table );
  1537. Vcb->OpenAttributeTable.Table = Pointers.Table;
  1538. //
  1539. // Also reinitialize the OnDisk table if different.
  1540. //
  1541. if (Vcb->OnDiskOat != &Vcb->OpenAttributeTable) {
  1542. //
  1543. // Initialize first in case we get an allocation failure.
  1544. //
  1545. InitializeNewTable( sizeof( OPEN_ATTRIBUTE_ENTRY_V0 ),
  1546. INITIAL_NUMBER_ATTRIBUTES,
  1547. &Pointers );
  1548. NtfsFreePool( Vcb->OnDiskOat->Table );
  1549. Vcb->OnDiskOat->Table = Pointers.Table;
  1550. }
  1551. }
  1552. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  1553. OpenAttributeTableAcquired = FALSE;
  1554. //
  1555. // Now check the transaction table (freeing in the finally clause).
  1556. //
  1557. if (!TransactionTableAcquired) {
  1558. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
  1559. TransactionTableAcquired = TRUE;
  1560. } else {
  1561. ASSERT( ExIsResourceAcquiredExclusive( &Vcb->TransactionTable.Resource ) );
  1562. }
  1563. if (IsRestartTableEmpty( &Vcb->TransactionTable )) {
  1564. LfsResetUndoTotal( Vcb->LogHandle, 2, QuadAlign(sizeof(RESTART_AREA)) + (2 * PAGE_SIZE) );
  1565. if (Vcb->TransactionTable.Table->NumberEntries > HIGHWATER_TRANSACTION_COUNT) {
  1566. //
  1567. // Initialize first in case we get an allocation failure.
  1568. //
  1569. InitializeNewTable( sizeof(TRANSACTION_ENTRY),
  1570. INITIAL_NUMBER_TRANSACTIONS,
  1571. &Pointers );
  1572. NtfsFreePool( Vcb->TransactionTable.Table );
  1573. Vcb->TransactionTable.Table = Pointers.Table;
  1574. }
  1575. }
  1576. leave;
  1577. } else {
  1578. //
  1579. // Take this opportunity to clear this flag first since we now know
  1580. // this is not a pseudo clean checkpoint.
  1581. //
  1582. ClearFlag( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_PSEUDO_CLEAN );
  1583. }
  1584. //
  1585. // If there were any names, then allocate space for them and copy
  1586. // them out.
  1587. //
  1588. if (NameBytes != 0) {
  1589. PATTRIBUTE_NAME_ENTRY Name;
  1590. //
  1591. // Allocate the buffer, with space for two terminating 0's on
  1592. // the end.
  1593. //
  1594. NameBytes += 4;
  1595. Name =
  1596. NamesBuffer = NtfsAllocatePool( NonPagedPool, NameBytes );
  1597. //
  1598. // Now loop to copy the names.
  1599. //
  1600. AttributeEntry = NtfsGetFirstRestartTable( &Vcb->OpenAttributeTable );
  1601. while (AttributeEntry != NULL) {
  1602. //
  1603. // Free the Open Attribute Entry if there were no
  1604. // dirty pages and the Scb is gone. This is the only
  1605. // place they are deleted. (Yes, I know we allocated
  1606. // space for its name, but I didn't want to make three
  1607. // passes through the open attribute table. Permeter
  1608. // is running as we speak, and showing 407 open files
  1609. // on NT/IDW5.)
  1610. //
  1611. if (!AttributeEntry->DirtyPagesSeen
  1612. &&
  1613. (AttributeEntry->OatData->Overlay.Scb == NULL)) {
  1614. ULONG Index;
  1615. //
  1616. // Get the index for the entry.
  1617. //
  1618. Index = GetIndexFromRestartEntry( &Vcb->OpenAttributeTable,
  1619. AttributeEntry );
  1620. //
  1621. // Delete its name and free it up.
  1622. //
  1623. NtfsFreeScbAttributeName( AttributeEntry->OatData->AttributeName.Buffer );
  1624. if (Vcb->RestartVersion == 0) {
  1625. NtfsFreeRestartTableIndex( Vcb->OnDiskOat,
  1626. AttributeEntry->OatData->OnDiskAttributeIndex );
  1627. }
  1628. NtfsFreeOpenAttributeData( AttributeEntry->OatData );
  1629. NtfsFreeRestartTableIndex( &Vcb->OpenAttributeTable, Index );
  1630. //
  1631. // Otherwise, if we are not deleting it, we have to
  1632. // copy its name into the buffer we allocated.
  1633. //
  1634. } else if (AttributeEntry->OatData->AttributeName.Length != 0) {
  1635. //
  1636. // Prefix each name in the buffer with the attribute index
  1637. // and name length. Be sure to use the index that will
  1638. // be on-disk.
  1639. //
  1640. Name->Index = (USHORT) AttributeEntry->OatData->OnDiskAttributeIndex;
  1641. Name->NameLength = AttributeEntry->OatData->AttributeName.Length;
  1642. RtlCopyMemory( &Name->Name[0],
  1643. AttributeEntry->OatData->AttributeName.Buffer,
  1644. AttributeEntry->OatData->AttributeName.Length );
  1645. Name->Name[Name->NameLength / sizeof( WCHAR )] = 0;
  1646. Name = (PATTRIBUTE_NAME_ENTRY)((PCHAR)Name +
  1647. sizeof(ATTRIBUTE_NAME_ENTRY) +
  1648. Name->NameLength);
  1649. ASSERT( (PCHAR)Name <= ((PCHAR)NamesBuffer + NameBytes - 4) );
  1650. }
  1651. AttributeEntry = NtfsGetNextRestartTable( &Vcb->OpenAttributeTable,
  1652. AttributeEntry );
  1653. }
  1654. //
  1655. // Terminate the Names Buffer.
  1656. //
  1657. Name->Index = 0;
  1658. Name->NameLength = 0;
  1659. }
  1660. //
  1661. // Now write all of the non-empty tables to the log.
  1662. //
  1663. //
  1664. // Write the Open Attribute Table
  1665. //
  1666. // Make sure the tables are in sync.
  1667. //
  1668. ASSERT( (IsRestartTableEmpty( Vcb->OnDiskOat ) && IsRestartTableEmpty( &Vcb->OpenAttributeTable )) ||
  1669. (!IsRestartTableEmpty( Vcb->OnDiskOat ) && !IsRestartTableEmpty( &Vcb->OpenAttributeTable )));
  1670. if (!IsRestartTableEmpty( Vcb->OnDiskOat )) {
  1671. RestartArea.OpenAttributeTableLsn =
  1672. NtfsWriteLog( IrpContext,
  1673. Vcb->MftScb,
  1674. NULL,
  1675. OpenAttributeTableDump,
  1676. Vcb->OnDiskOat->Table,
  1677. SizeOfRestartTable( Vcb->OnDiskOat ),
  1678. Noop,
  1679. NULL,
  1680. 0,
  1681. (LONGLONG)0,
  1682. 0,
  1683. 0,
  1684. 0 );
  1685. RestartArea.OpenAttributeTableLength = SizeOfRestartTable( Vcb->OnDiskOat );
  1686. JustMe = 1;
  1687. }
  1688. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  1689. OpenAttributeTableAcquired = FALSE;
  1690. //
  1691. // Write the Open Attribute Names
  1692. //
  1693. if (NameBytes != 0) {
  1694. RestartArea.AttributeNamesLsn =
  1695. NtfsWriteLog( IrpContext,
  1696. Vcb->MftScb,
  1697. NULL,
  1698. AttributeNamesDump,
  1699. NamesBuffer,
  1700. NameBytes,
  1701. Noop,
  1702. NULL,
  1703. 0,
  1704. (LONGLONG)0,
  1705. 0,
  1706. 0,
  1707. 0 );
  1708. RestartArea.AttributeNamesLength = NameBytes;
  1709. JustMe = 1;
  1710. }
  1711. //
  1712. // Write the Dirty Page Table
  1713. //
  1714. if (!IsRestartTableEmpty( &DirtyPages )) {
  1715. RestartArea.DirtyPageTableLsn =
  1716. NtfsWriteLog( IrpContext,
  1717. Vcb->MftScb,
  1718. NULL,
  1719. DirtyPageTableDump,
  1720. DirtyPages.Table,
  1721. SizeOfRestartTable(&DirtyPages),
  1722. Noop,
  1723. NULL,
  1724. 0,
  1725. (LONGLONG)0,
  1726. 0,
  1727. 0,
  1728. 0 );
  1729. RestartArea.DirtyPageTableLength = SizeOfRestartTable(&DirtyPages);
  1730. JustMe = 1;
  1731. }
  1732. //
  1733. // Write the Transaction Table if there is more than just us. We
  1734. // are a transaction if we wrote any log records above.
  1735. //
  1736. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
  1737. TransactionTableAcquired = TRUE;
  1738. //
  1739. // Assume we will want to do at least one more checkpoint.
  1740. //
  1741. ClearFlag( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_CLEAN );
  1742. if ((ULONG)Vcb->TransactionTable.Table->NumberAllocated > JustMe) {
  1743. RestartArea.TransactionTableLsn =
  1744. NtfsWriteLog( IrpContext,
  1745. Vcb->MftScb,
  1746. NULL,
  1747. TransactionTableDump,
  1748. Vcb->TransactionTable.Table,
  1749. SizeOfRestartTable(&Vcb->TransactionTable),
  1750. Noop,
  1751. NULL,
  1752. 0,
  1753. (LONGLONG)0,
  1754. 0,
  1755. 0,
  1756. 0 );
  1757. RestartArea.TransactionTableLength =
  1758. SizeOfRestartTable(&Vcb->TransactionTable);
  1759. //
  1760. // Loop to see if the oldest Lsn comes from the transaction table.
  1761. //
  1762. TransactionEntry = NtfsGetFirstRestartTable( &Vcb->TransactionTable );
  1763. while (TransactionEntry != NULL) {
  1764. if ((TransactionEntry->FirstLsn.QuadPart != 0) &&
  1765. (TransactionEntry->FirstLsn.QuadPart < BaseLsn.QuadPart)) {
  1766. BaseLsn = TransactionEntry->FirstLsn;
  1767. }
  1768. TransactionEntry = NtfsGetNextRestartTable( &Vcb->TransactionTable,
  1769. TransactionEntry );
  1770. }
  1771. //
  1772. // If LastTransactionLsnCount is non-zero, we should check to see if it's smaller than BaseLsn.
  1773. // This is due to the window between creating a transaction to the point where we update
  1774. // the FirstLsn in NtfsWriteLog.
  1775. //
  1776. if (Vcb->LastTransactionLsnCount != 0) {
  1777. if (Vcb->LastTransactionLsn.QuadPart < BaseLsn.QuadPart) {
  1778. BaseLsn = Vcb->LastTransactionLsn;
  1779. }
  1780. }
  1781. //
  1782. // If the transaction table is otherwise empty, then this is a good
  1783. // time to reset our totals with Lfs, in case our counts get off a bit.
  1784. //
  1785. } else {
  1786. //
  1787. // If we are a transaction, then we have to add in our counts.
  1788. //
  1789. if (IrpContext->TransactionId != 0) {
  1790. TransactionEntry = (PTRANSACTION_ENTRY)GetRestartEntryFromIndex(
  1791. &Vcb->TransactionTable, IrpContext->TransactionId );
  1792. LfsResetUndoTotal( Vcb->LogHandle,
  1793. TransactionEntry->UndoRecords + 2,
  1794. TransactionEntry->UndoBytes +
  1795. QuadAlign(sizeof(RESTART_AREA)) + (2 * PAGE_SIZE) );
  1796. //
  1797. // Otherwise, we reset to our "idle" requirements.
  1798. //
  1799. } else {
  1800. LfsResetUndoTotal( Vcb->LogHandle,
  1801. 2,
  1802. QuadAlign(sizeof(RESTART_AREA)) + (2 * PAGE_SIZE) );
  1803. }
  1804. //
  1805. // If the DirtyPage table is empty then mark this as a clean checkpoint.
  1806. //
  1807. if (IsRestartTableEmpty( &DirtyPages )) {
  1808. //
  1809. // Remember the fact that this fuzzy checkpoint is pseudo clean
  1810. // in the sense that the dirty page and transaction tables are empty.
  1811. // It's ok if the other two tables are not empty as they are not important
  1812. // in this case and will be cleaned up later on.
  1813. //
  1814. SetFlag( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_PSEUDO_CLEAN );
  1815. }
  1816. }
  1817. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  1818. TransactionTableAcquired = FALSE;
  1819. }
  1820. //
  1821. // So far BaseLsn holds the minimum of the start Lsn for the checkpoint,
  1822. // or any of the FirstLsn fields for active transactions. Now we see
  1823. // if the oldest Lsn we need in the log should actually come from the
  1824. // oldest page in the dirty page table.
  1825. //
  1826. if ((OldestDirtyPageLsn.QuadPart != 0) &&
  1827. (OldestDirtyPageLsn.QuadPart < BaseLsn.QuadPart)) {
  1828. BaseLsn = OldestDirtyPageLsn;
  1829. }
  1830. //
  1831. // Now fill in the LowestOpenUsn in the RestartArea. This is an unsafe
  1832. // test, but if we think we see an empty list, that is ok. In case no
  1833. // files are open yet, make sure we do not backtrack from the number we got
  1834. // at restart.
  1835. //
  1836. RestartArea.MajorVersion = Vcb->RestartVersion;
  1837. RestartArea.CurrentLsnAtMount = Vcb->CurrentLsnAtMount;
  1838. RestartArea.BytesPerCluster = Vcb->BytesPerCluster;
  1839. RestartArea.Reserved = 0;
  1840. RestartArea.UsnJournalReference = Vcb->UsnJournalReference;
  1841. RestartArea.UsnCacheBias = Vcb->UsnCacheBias;
  1842. UsnJournal = Vcb->UsnJournal;
  1843. if (UsnJournal != NULL) {
  1844. //
  1845. // Continue to advance the Usn in the Vcb on checkpoints, so that
  1846. // if the list goes empty we do not get a restart that has to go
  1847. // back to where we were at boot time. We use the value we captured at
  1848. // the beginning - we own end resources (the transaction tables)
  1849. // here so we can't reacquire the usn journal
  1850. //
  1851. RestartArea.LowestOpenUsn = Vcb->LowestOpenUsn = LowestOpenUsn;
  1852. }
  1853. //
  1854. // BaseLsn must be monotonically increasing or we'll throw away recently
  1855. // deallocatedclusters erroneously before they can be reused
  1856. //
  1857. ASSERT( Vcb->LastBaseLsn.QuadPart <= BaseLsn.QuadPart );
  1858. Vcb->LastBaseLsn = Vcb->LastRestartArea = BaseLsn;
  1859. //
  1860. // Finally, write our Restart Area to describe all of the above, and
  1861. // give Lfs our new BaseLsn.
  1862. //
  1863. LfsWriteRestartArea( Vcb->LogHandle,
  1864. sizeof( RESTART_AREA ),
  1865. &RestartArea,
  1866. LfsCleanShutdown,
  1867. &Vcb->LastRestartArea );
  1868. //
  1869. // Extra work at the end of a clean checkpoint
  1870. //
  1871. if (CleanVolume) {
  1872. //
  1873. // Mark the fact that we've done a clean checkpoint at this time.
  1874. //
  1875. Vcb->CleanCheckpointMark = Vcb->LogFileFullCount;
  1876. Vcb->UnhandledLogFileFullCount = 0;
  1877. Vcb->LastRestartAreaAtNonTopLevelLogFull.QuadPart = 0;
  1878. //
  1879. // Initialize our reserved area.
  1880. // Also set the LastBaseLsn to the restart area itself. This will
  1881. // prevent us from generating future dirty page table entries
  1882. // which go back prior to the restart area.
  1883. //
  1884. Vcb->LastBaseLsn = Vcb->LastRestartArea;
  1885. LfsResetUndoTotal( Vcb->LogHandle, 2, QuadAlign(sizeof(RESTART_AREA)) + (2 * PAGE_SIZE) );
  1886. Vcb->DirtyPageTableSizeHint = INITIAL_DIRTY_TABLE_HINT;
  1887. }
  1888. //
  1889. // Now remember where the log file is at now, so we know when to
  1890. // go idle above.
  1891. //
  1892. Vcb->EndOfLastCheckpoint = LfsQueryLastLsn( Vcb->LogHandle );
  1893. } finally {
  1894. DebugUnwind( NtfsCheckpointVolume );
  1895. //
  1896. // If the Dirty Page Table got initialized, free it up.
  1897. //
  1898. if (DirtyPageTableInitialized) {
  1899. NtfsFreeRestartTable( &DirtyPages );
  1900. }
  1901. //
  1902. // Release any resources
  1903. //
  1904. if (OpenAttributeTableAcquired) {
  1905. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  1906. }
  1907. if (TransactionTableAcquired) {
  1908. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  1909. }
  1910. //
  1911. // Release any names buffer.
  1912. //
  1913. if (NamesBuffer != NULL) {
  1914. NtfsFreePool( NamesBuffer );
  1915. }
  1916. //
  1917. // Free any partial table we allocated.
  1918. //
  1919. if (NewTable != NULL) {
  1920. NtfsFreePool( NewTable );
  1921. }
  1922. //
  1923. // If this checkpoint created a transaction, free the index now.
  1924. //
  1925. if (IrpContext->TransactionId != 0) {
  1926. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable,
  1927. TRUE );
  1928. NtfsFreeRestartTableIndex( &Vcb->TransactionTable,
  1929. IrpContext->TransactionId );
  1930. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  1931. IrpContext->TransactionId = 0;
  1932. }
  1933. if (AcquireFiles) {
  1934. #ifdef NTFSDBG
  1935. ASSERT( FlagOn( IrpContext->State, IRP_CONTEXT_STATE_CHECKPOINT_ACTIVE ));
  1936. DebugDoit( ClearFlag( IrpContext->State, IRP_CONTEXT_STATE_CHECKPOINT_ACTIVE ));
  1937. #endif // NTFSDBG
  1938. NtfsReleaseAllFiles( IrpContext, Vcb, FALSE );
  1939. }
  1940. if (AcquiredVcb) {
  1941. if (CleanVolume) {
  1942. //
  1943. // If we acquire the vcb we also set the drain pending for the transaction table
  1944. // and open attribute table. Turn that off now
  1945. //
  1946. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
  1947. Vcb->TransactionTable.DrainPending = FALSE;
  1948. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  1949. NtfsAcquireExclusiveRestartTable( &Vcb->OpenAttributeTable, TRUE );
  1950. Vcb->OpenAttributeTable.DrainPending = FALSE;
  1951. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  1952. }
  1953. NtfsReleaseVcb( IrpContext, Vcb );
  1954. }
  1955. #ifdef PERF_STATS
  1956. if (Tracking) {
  1957. KeQueryTickCount( (PLARGE_INTEGER)&Vcb->ChkPointEntry[ Vcb->CurrentCheckpoint % NUM_CHECKPOINT_ENTRIES ].ElapsedTime );
  1958. Vcb->ChkPointEntry[ Vcb->CurrentCheckpoint % NUM_CHECKPOINT_ENTRIES ].ElapsedTime -=
  1959. Vcb->ChkPointEntry[ Vcb->CurrentCheckpoint % NUM_CHECKPOINT_ENTRIES ].StartTime;
  1960. Vcb->ChkPointEntry[ Vcb->CurrentCheckpoint % NUM_CHECKPOINT_ENTRIES ].NumIos = IrpContext->Ios;
  1961. Vcb->CurrentCheckpoint += 1;
  1962. }
  1963. #endif
  1964. //
  1965. // Capture the current base lsn before potentially giving up chkpt synchrnonization
  1966. //
  1967. BaseLsn = Vcb->LastBaseLsn;
  1968. //
  1969. // If we didn't own the checkpoint operation then indicate
  1970. // that someone else is free to checkpoint. Hold the checkpoint
  1971. // flags if we plan to trim the usn journal. The checkpoint
  1972. // flags serialize the journal with the delete journal operation.
  1973. //
  1974. ASSERT( !OwnsCheckpoint || CleanVolume );
  1975. if (!OwnsCheckpoint) {
  1976. if ((UsnJournal == NULL) || CleanVolume || AbnormalTermination()) {
  1977. NtfsAcquireCheckpoint( IrpContext, Vcb );
  1978. ClearFlag( Vcb->CheckpointFlags,
  1979. VCB_CHECKPOINT_SYNC_FLAGS | VCB_DUMMY_CHECKPOINT_POSTED );
  1980. NtfsSetCheckpointNotify( IrpContext, Vcb );
  1981. NtfsReleaseCheckpoint( IrpContext, Vcb );
  1982. }
  1983. }
  1984. if (RestorePreviousPriority) {
  1985. KeSetPriorityThread( (PKTHREAD)PsGetCurrentThread(),
  1986. PreviousPriority );
  1987. }
  1988. }
  1989. //
  1990. // We shouldn't have the OAT acquired anymore and the base lsn we're using to
  1991. // trim the deallocated cluster list must not be > than the last base lsn in the
  1992. // vcb
  1993. //
  1994. ASSERT( !ExIsResourceAcquiredSharedLite( &Vcb->OpenAttributeTable.Resource ) &&
  1995. (BaseLsn.QuadPart <= Vcb->LastBaseLsn.QuadPart) );
  1996. NtfsFreeRecentlyDeallocated( IrpContext, Vcb, &BaseLsn, CleanVolume );
  1997. //
  1998. // If there is a Usn Journal, call to perform possible trimming on a periodic checkpoint.
  1999. //
  2000. if (!CleanVolume && (UsnJournal != NULL)) {
  2001. NtfsTrimUsnJournal( IrpContext, Vcb );
  2002. }
  2003. //
  2004. // If we need to post a defrag request then do so now.
  2005. //
  2006. if (PostDefrag) {
  2007. PDEFRAG_MFT DefragMft;
  2008. //
  2009. // Use a try-except to ignore allocation errors.
  2010. //
  2011. try {
  2012. NtfsAcquireCheckpoint( IrpContext, Vcb );
  2013. if (!FlagOn( Vcb->MftDefragState, VCB_MFT_DEFRAG_ACTIVE )) {
  2014. SetFlag( Vcb->MftDefragState, VCB_MFT_DEFRAG_ACTIVE );
  2015. NtfsReleaseCheckpoint( IrpContext, Vcb );
  2016. DefragMft = NtfsAllocatePool( NonPagedPool, sizeof( DEFRAG_MFT ));
  2017. DefragMft->Vcb = Vcb;
  2018. DefragMft->DeallocateWorkItem = TRUE;
  2019. //
  2020. // Send it off.....
  2021. //
  2022. ExInitializeWorkItem( &DefragMft->WorkQueueItem,
  2023. (PWORKER_THREAD_ROUTINE)NtfsDefragMft,
  2024. (PVOID)DefragMft );
  2025. ExQueueWorkItem( &DefragMft->WorkQueueItem, CriticalWorkQueue );
  2026. } else {
  2027. NtfsReleaseCheckpoint( IrpContext, Vcb );
  2028. }
  2029. } except( FsRtlIsNtstatusExpected( GetExceptionCode() )
  2030. ? EXCEPTION_EXECUTE_HANDLER
  2031. : EXCEPTION_CONTINUE_SEARCH ) {
  2032. NtfsAcquireCheckpoint( IrpContext, Vcb );
  2033. ClearFlag( Vcb->MftDefragState, VCB_MFT_DEFRAG_ACTIVE );
  2034. NtfsReleaseCheckpoint( IrpContext, Vcb );
  2035. }
  2036. }
  2037. DebugTrace( -1, Dbg, ("NtfsCheckpointVolume -> VOID\n") );
  2038. }
  2039. VOID
  2040. NtfsCheckpointForLogFileFull (
  2041. IN PIRP_CONTEXT IrpContext
  2042. )
  2043. /*++
  2044. Routine Description:
  2045. This routine is called to perform the clean checkpoint generated after
  2046. a log file full. This routine will call the clean checkpoint routine
  2047. and then release all of the resources acquired.
  2048. Arguments:
  2049. Return Value:
  2050. None.
  2051. --*/
  2052. {
  2053. LSN LastKnownLsn;
  2054. PAGED_CODE();
  2055. ASSERT( FlagOn( IrpContext->TopLevelIrpContext->State, IRP_CONTEXT_STATE_OWNS_TOP_LEVEL ));
  2056. IrpContext->ExceptionStatus = 0;
  2057. //
  2058. // Call the checkpoint routine to do the actual work. Skip this in the case where there is no
  2059. // longer a Vcb in the IrpContext. This can happen if doing some long running operation at
  2060. // mount time (i.e. Usn scan). In that case the long running operation should periodically
  2061. // checkpoint. Then Ntfs will do a clean checkpoint after restart and the remaining work
  2062. // to do in the long-running operation will decrease. At some point it will decrease enough
  2063. // to finish the mount.
  2064. //
  2065. // All of the other work is required since this IrpContext will be used to retry the mount.
  2066. //
  2067. if (IrpContext->Vcb != NULL) {
  2068. //
  2069. // If we're only trying to synchronize with a clean checkpoint use Li0 for
  2070. // the lastknownLsn which will guarantee after NtfsCheckpointvolume gets
  2071. // checkpoint synchronization it won't do anymore work. Otherwise use
  2072. // the last restart area we recorded in NtfsProcessException at the raise point
  2073. //
  2074. if (!FlagOn( IrpContext->Flags, IRP_CONTEXT_FLAG_ONLY_SYNCH_CHECKPOINT )) {
  2075. LastKnownLsn = IrpContext->LastRestartArea;
  2076. } else {
  2077. LastKnownLsn = Li0;
  2078. }
  2079. //
  2080. // This can raise. However, in the case of dismounts, we do want this to
  2081. // plough on and succeed the dismount. For example, cluster service marks
  2082. // the volume offline first and sends the dismount afterward, but still expects it to succeed.
  2083. //
  2084. try {
  2085. NtfsCheckpointVolume( IrpContext,
  2086. IrpContext->Vcb,
  2087. FALSE,
  2088. TRUE,
  2089. FALSE,
  2090. 0,
  2091. LastKnownLsn );
  2092. } except (NtfsCheckpointExceptionFilter( IrpContext,
  2093. GetExceptionInformation(),
  2094. GetExceptionCode() )) {
  2095. //
  2096. // This is a LOG_FILE_FULL raise coming via dismount. Ignore errors
  2097. // because we want the dismount to succeed.
  2098. //
  2099. NtfsMinimumExceptionProcessing( IrpContext );
  2100. if (IrpContext->TransactionId != 0) {
  2101. NtfsCleanupFailedTransaction( IrpContext );
  2102. }
  2103. }
  2104. ClearFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_ONLY_SYNCH_CHECKPOINT );
  2105. }
  2106. ASSERT( IrpContext->TransactionId == 0 );
  2107. ASSERT( !ExIsResourceAcquiredSharedLite( &IrpContext->Vcb->OpenAttributeTable.Resource ) );
  2108. //
  2109. // Cleanup the IrpContext but don't delete it.
  2110. //
  2111. SetFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_DONT_DELETE );
  2112. NtfsCleanupIrpContext( IrpContext, TRUE );
  2113. //
  2114. // Make sure we restore the RestartArea.
  2115. //
  2116. IrpContext->LastRestartArea = Li0;
  2117. return;
  2118. }
  2119. NTSTATUS
  2120. NtfsCheckpointForVolumeSnapshot (
  2121. IN PIRP_CONTEXT IrpContext
  2122. )
  2123. /*++
  2124. Routine Description:
  2125. This routine is called to perform a volume flush and a
  2126. clean checkpoint before a snapshot of the volume is taken.
  2127. Since we need to keep the volume quiescent, we make it a
  2128. point to leave the file resources acquired on exit.
  2129. Arguments:
  2130. IrpContext.
  2131. Return Value:
  2132. Status.
  2133. --*/
  2134. {
  2135. LOGICAL AcquiredCheckpoint;
  2136. LOGICAL AcquiredFiles = FALSE;
  2137. LOGICAL AcquiredVcb = FALSE;
  2138. PVCB Vcb;
  2139. NTSTATUS Status = STATUS_SUCCESS;
  2140. LOGICAL DefragPermitted;
  2141. KPRIORITY PreviousPriority;
  2142. BOOLEAN RestorePreviousPriority = FALSE;
  2143. PAGED_CODE();
  2144. //
  2145. // Clear the Mft defrag flag to stop any actions behind our backs.
  2146. //
  2147. Vcb = IrpContext->Vcb;
  2148. //
  2149. // If this is a readonly volume, then there's nothing we need to do.
  2150. //
  2151. if (NtfsIsVolumeReadOnly( Vcb )) {
  2152. ASSERT( Status == STATUS_SUCCESS );
  2153. DebugTrace( -1, Dbg, ("NtfsCheckpointForVolumeSnapshot -> %08lx\n", Status) );
  2154. return Status;
  2155. }
  2156. NtfsAcquireCheckpoint( IrpContext, Vcb );
  2157. DefragPermitted = FlagOn( Vcb->MftDefragState, VCB_MFT_DEFRAG_PERMITTED );
  2158. ClearFlag( Vcb->MftDefragState, VCB_MFT_DEFRAG_PERMITTED );
  2159. NtfsReleaseCheckpoint( IrpContext, Vcb );
  2160. AcquiredCheckpoint = FALSE;
  2161. try {
  2162. //
  2163. // Then lock out all other checkpoint operations.
  2164. //
  2165. NtfsAcquireCheckpoint( IrpContext, Vcb );
  2166. while (FlagOn( Vcb->CheckpointFlags, VCB_CHECKPOINT_SYNC_FLAGS )) {
  2167. //
  2168. // Release the checkpoint event because we cannot checkpoint now.
  2169. //
  2170. NtfsReleaseCheckpoint( IrpContext, Vcb );
  2171. NtfsWaitOnCheckpointNotify( IrpContext, Vcb );
  2172. NtfsAcquireCheckpoint( IrpContext, Vcb );
  2173. }
  2174. SetFlag( Vcb->CheckpointFlags, VCB_CHECKPOINT_SYNC_FLAGS );
  2175. NtfsResetCheckpointNotify( IrpContext, Vcb );
  2176. NtfsReleaseCheckpoint( IrpContext, Vcb );
  2177. AcquiredCheckpoint = TRUE;
  2178. NtfsAcquireExclusiveVcb( IrpContext, Vcb, TRUE );
  2179. AcquiredVcb = TRUE;
  2180. //
  2181. // Check that the volume is still mounted.
  2182. //
  2183. if (!FlagOn( Vcb->VcbState, VCB_STATE_VOLUME_MOUNTED )) {
  2184. Status = STATUS_VOLUME_DISMOUNTED;
  2185. leave;
  2186. }
  2187. //
  2188. // Start by flushing the volume, because we can't call FlushVolume later
  2189. // while holding only the Main resources without their corresponding
  2190. // pagingio resources. Flushing the userdata doesn't really need to be
  2191. // atomic with the rest of the operation; we just have to make sure that
  2192. // the volume is consistent and restartable without log recovery.
  2193. //
  2194. NtfsFlushVolume( IrpContext,
  2195. Vcb,
  2196. TRUE,
  2197. FALSE,
  2198. TRUE,
  2199. FALSE );
  2200. //
  2201. // Give ourselves some juice. We'll need it.
  2202. //
  2203. PreviousPriority = KeSetPriorityThread( (PKTHREAD)PsGetCurrentThread(),
  2204. LOW_REALTIME_PRIORITY );
  2205. if (PreviousPriority != LOW_REALTIME_PRIORITY) {
  2206. RestorePreviousPriority = TRUE;
  2207. }
  2208. //
  2209. // Lock, stock, clean checkpoint, volume flush and
  2210. // two smoking barrels. No chance of acquiring PagingIo
  2211. // here; pretty much only shutdown has that luxury.
  2212. //
  2213. NtfsAcquireAllFiles( IrpContext, Vcb, TRUE, FALSE, FALSE );
  2214. AcquiredFiles = TRUE;
  2215. //
  2216. // Generate usn CLOSE records. We don't bother to get the FcbMutex because
  2217. // we already have the Fcb main resource exclusively.
  2218. //
  2219. if (Vcb->UsnJournal != NULL) {
  2220. PLIST_ENTRY Links;
  2221. PFCB_USN_RECORD UsnRecord;
  2222. while (TRUE) {
  2223. NtfsLockFcb( IrpContext, Vcb->UsnJournal->Fcb );
  2224. Links = Vcb->ModifiedOpenFiles.Flink;
  2225. if (Links == &Vcb->ModifiedOpenFiles) {
  2226. NtfsUnlockFcb( IrpContext, Vcb->UsnJournal->Fcb );
  2227. break;
  2228. }
  2229. UsnRecord = (PFCB_USN_RECORD)CONTAINING_RECORD( Links,
  2230. FCB_USN_RECORD,
  2231. ModifiedOpenFilesLinks );
  2232. NtfsUnlockFcb( IrpContext, Vcb->UsnJournal->Fcb );
  2233. //
  2234. // Post the CLOSE record. Checkpointing takes this UsnRecord
  2235. // off the ModifiedOpenFiles list.
  2236. //
  2237. NtfsPostUsnChange( IrpContext, UsnRecord->Fcb, USN_REASON_CLOSE );
  2238. NtfsWriteUsnJournalChanges( IrpContext );
  2239. NtfsCheckpointCurrentTransaction( IrpContext );
  2240. }
  2241. }
  2242. SetFlag( Vcb->VcbState, VCB_STATE_VOL_PURGE_IN_PROGRESS );
  2243. #ifdef PERF_STATS
  2244. IrpContext->LogFullReason = LF_SNAPSHOT;
  2245. #endif
  2246. NtfsCheckpointVolume( IrpContext, Vcb, TRUE, TRUE, FALSE, 0, Vcb->LastRestartArea );
  2247. NtfsCommitCurrentTransaction( IrpContext );
  2248. ClearFlag( Vcb->VcbState, VCB_STATE_VOL_PURGE_IN_PROGRESS );
  2249. } finally {
  2250. //
  2251. // Restore DEFRAG_PERMITTED flag if we need to.
  2252. //
  2253. if (DefragPermitted) {
  2254. NtfsAcquireCheckpoint( IrpContext, Vcb );
  2255. SetFlag( Vcb->MftDefragState, VCB_MFT_DEFRAG_PERMITTED );
  2256. NtfsReleaseCheckpoint( IrpContext, Vcb );
  2257. }
  2258. //
  2259. // Release the checkpoint, if we got it, but we aren't releasing
  2260. // all the files unless there was an error.
  2261. //
  2262. if (AcquiredCheckpoint) {
  2263. NtfsAcquireCheckpoint( IrpContext, Vcb );
  2264. ClearFlag( Vcb->CheckpointFlags,
  2265. VCB_CHECKPOINT_SYNC_FLAGS | VCB_DUMMY_CHECKPOINT_POSTED);
  2266. NtfsSetCheckpointNotify( IrpContext, Vcb );
  2267. NtfsReleaseCheckpoint( IrpContext, Vcb );
  2268. }
  2269. //
  2270. // Release the file resources only if we hit an error.
  2271. // We normally do this in the completion routine for the IOCTL.
  2272. //
  2273. if (!NT_SUCCESS( Status ) || AbnormalTermination()) {
  2274. if (AcquiredFiles) {
  2275. NtfsReleaseAllFiles( IrpContext, Vcb, FALSE );
  2276. }
  2277. if (AcquiredVcb) {
  2278. NtfsReleaseVcb( IrpContext, Vcb );
  2279. }
  2280. }
  2281. if (RestorePreviousPriority) {
  2282. KeSetPriorityThread( (PKTHREAD)PsGetCurrentThread(),
  2283. PreviousPriority );
  2284. }
  2285. }
  2286. DebugTrace( -1, Dbg, ("NtfsCheckpointForVolsnap -exit\n") );
  2287. return Status;
  2288. }
  2289. VOID
  2290. NtfsCleanCheckpoint (
  2291. IN PVCB Vcb
  2292. )
  2293. /*++
  2294. Routine Description:
  2295. This routine is called to perform a single clean checkpoint at the top level
  2296. and return. It is used when the lazy writer gets a log file full in order
  2297. to perform the clean checkpoint within the thread doing the lazy write.
  2298. Arguments:
  2299. Return Value:
  2300. None.
  2301. --*/
  2302. {
  2303. IRP_CONTEXT LocalIrpContext;
  2304. PIRP_CONTEXT IrpContext = &LocalIrpContext;
  2305. PAGED_CODE();
  2306. try {
  2307. //
  2308. // Allocate an Irp Context for the request.
  2309. //
  2310. NtfsInitializeIrpContext( NULL, TRUE, &IrpContext );
  2311. IrpContext->Vcb = Vcb;
  2312. if (Vcb->LastRestartAreaAtNonTopLevelLogFull.QuadPart != 0) {
  2313. IrpContext->LastRestartArea = Vcb->LastRestartAreaAtNonTopLevelLogFull;
  2314. } else {
  2315. IrpContext->LastRestartArea = Vcb->LastRestartArea;
  2316. }
  2317. //
  2318. // There is no point in posting any dummy requests.
  2319. //
  2320. NtfsAcquireCheckpoint( IrpContext, IrpContext->Vcb );
  2321. SetFlag( IrpContext->Vcb->CheckpointFlags, VCB_DUMMY_CHECKPOINT_POSTED );
  2322. NtfsReleaseCheckpoint( IrpContext, IrpContext->Vcb );
  2323. //
  2324. // Send this off to the FspDispatch routine. It will handle all of the
  2325. // top level logic as well as deleting the IrpContext.
  2326. //
  2327. NtfsFspDispatch( IrpContext );
  2328. } except( EXCEPTION_EXECUTE_HANDLER ) {
  2329. NOTHING;
  2330. }
  2331. return;
  2332. }
  2333. VOID
  2334. NtfsCommitCurrentTransaction (
  2335. IN PIRP_CONTEXT IrpContext
  2336. )
  2337. /*++
  2338. Routine Description:
  2339. This routine commits the current transaction by writing a final record
  2340. to the log and deallocating the transaction Id.
  2341. Arguments:
  2342. Return Value:
  2343. None.
  2344. --*/
  2345. {
  2346. PTRANSACTION_ENTRY TransactionEntry;
  2347. PVCB Vcb = IrpContext->Vcb;
  2348. PFCB UsnFcb;
  2349. PUSN_FCB ThisUsn, LastUsn;
  2350. PAGED_CODE();
  2351. #if (DBG || defined( NTFS_FREE_ASSERTS ))
  2352. try {
  2353. #endif
  2354. //
  2355. // Walk through the queue of usn records. We want to remove any effect of this operation.
  2356. //
  2357. ThisUsn = &IrpContext->Usn;
  2358. do {
  2359. //
  2360. // If we log the close for a file, then it is time to reset the
  2361. // Usn reasons for the file. Nothing to do here unless we
  2362. // wrote new reasons.
  2363. //
  2364. if (ThisUsn->CurrentUsnFcb != NULL ) {
  2365. PSCB UsnJournal = Vcb->UsnJournal;
  2366. PFCB_USN_RECORD FcbUsnRecord;
  2367. UsnFcb = ThisUsn->CurrentUsnFcb;
  2368. NtfsLockFcb( IrpContext, UsnFcb );
  2369. if (UsnJournal != NULL) {
  2370. NtfsLockFcb( IrpContext, UsnJournal->Fcb );
  2371. }
  2372. FcbUsnRecord = UsnFcb->FcbUsnRecord;
  2373. //
  2374. // After locking the fcb test for the presence of the fcb record again
  2375. // DeleteUsnJournal may have already removed it
  2376. //
  2377. if (FcbUsnRecord) {
  2378. UsnFcb->Usn = FcbUsnRecord->UsnRecord.Usn;
  2379. //
  2380. // Now add or move the Fcb in the ModifiedOpenFiles list.
  2381. //
  2382. if (FlagOn( FcbUsnRecord->UsnRecord.Reason, USN_REASON_CLOSE )) {
  2383. //
  2384. // Clean up the UsnRecord in the Fcb.
  2385. //
  2386. FcbUsnRecord->UsnRecord.Reason = 0;
  2387. FcbUsnRecord->UsnRecord.SourceInfo = 0;
  2388. if (UsnJournal != NULL) {
  2389. if( FcbUsnRecord->ModifiedOpenFilesLinks.Flink != NULL ) {
  2390. RemoveEntryList( &FcbUsnRecord->ModifiedOpenFilesLinks );
  2391. FcbUsnRecord->ModifiedOpenFilesLinks.Flink = NULL;
  2392. if (FcbUsnRecord->TimeOutLinks.Flink != NULL) {
  2393. RemoveEntryList( &FcbUsnRecord->TimeOutLinks );
  2394. FcbUsnRecord->TimeOutLinks.Flink = NULL;
  2395. }
  2396. }
  2397. }
  2398. } else {
  2399. if (UsnJournal != NULL) {
  2400. if (FcbUsnRecord->ModifiedOpenFilesLinks.Flink != NULL) {
  2401. RemoveEntryList( &FcbUsnRecord->ModifiedOpenFilesLinks );
  2402. if (FcbUsnRecord->TimeOutLinks.Flink != NULL) {
  2403. RemoveEntryList( &FcbUsnRecord->TimeOutLinks );
  2404. FcbUsnRecord->TimeOutLinks.Flink = NULL;
  2405. }
  2406. }
  2407. InsertTailList( &Vcb->ModifiedOpenFiles, &FcbUsnRecord->ModifiedOpenFilesLinks );
  2408. if (UsnFcb->CleanupCount == 0) {
  2409. InsertTailList( Vcb->CurrentTimeOutFiles, &FcbUsnRecord->TimeOutLinks );
  2410. }
  2411. }
  2412. }
  2413. }
  2414. //
  2415. // Cleanup the UsnFcb in the IrpContext. It's possible that
  2416. // we might want to reuse the UsnFcb later in this request.
  2417. //
  2418. if (ThisUsn != &IrpContext->Usn) {
  2419. LastUsn->NextUsnFcb = ThisUsn->NextUsnFcb;
  2420. NtfsFreePool( ThisUsn );
  2421. ThisUsn = LastUsn;
  2422. } else {
  2423. RtlZeroMemory( &ThisUsn->CurrentUsnFcb,
  2424. sizeof( USN_FCB ) - FIELD_OFFSET( USN_FCB, CurrentUsnFcb ));
  2425. }
  2426. if (UsnJournal != NULL) {
  2427. NtfsUnlockFcb( IrpContext, UsnJournal->Fcb );
  2428. }
  2429. NtfsUnlockFcb( IrpContext, UsnFcb );
  2430. }
  2431. if (ThisUsn->NextUsnFcb == NULL) { break; }
  2432. //
  2433. // Move to the next entry.
  2434. //
  2435. LastUsn = ThisUsn;
  2436. ThisUsn = ThisUsn->NextUsnFcb;
  2437. } while (TRUE);
  2438. //
  2439. // If this request created a transaction, complete it now.
  2440. //
  2441. if (IrpContext->TransactionId != 0) {
  2442. LSN CommitLsn;
  2443. //
  2444. // It is possible to get a LOG_FILE_FULL before writing
  2445. // out the first log record of a transaction. In that
  2446. // case there is a transaction Id but we haven't reserved
  2447. // space in the log file. It is wrong to write the
  2448. // commit record in this case because we can get an
  2449. // unexpected LOG_FILE_FULL. We can also test the UndoRecords
  2450. // count in the transaction entry but don't want to acquire
  2451. // the restart table to make this check.
  2452. //
  2453. if (FlagOn( IrpContext->Flags, IRP_CONTEXT_FLAG_WROTE_LOG )) {
  2454. //
  2455. // Write the log record to "forget" this transaction,
  2456. // because it should not be aborted. Until if/when we
  2457. // do real TP, commit and forget are atomic.
  2458. //
  2459. CommitLsn =
  2460. NtfsWriteLog( IrpContext,
  2461. Vcb->MftScb,
  2462. NULL,
  2463. ForgetTransaction,
  2464. NULL,
  2465. 0,
  2466. CompensationLogRecord,
  2467. (PVOID)&Li0,
  2468. sizeof( IrpContext->ExceptionStatus ), // final exception status
  2469. (LONGLONG)IrpContext->ExceptionStatus,
  2470. 0,
  2471. 0,
  2472. 0 );
  2473. }
  2474. //
  2475. // We can now free the transaction table index, because we are
  2476. // done with it now.
  2477. //
  2478. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable,
  2479. TRUE );
  2480. TransactionEntry = (PTRANSACTION_ENTRY)GetRestartEntryFromIndex(
  2481. &Vcb->TransactionTable,
  2482. IrpContext->TransactionId );
  2483. //
  2484. // Call Lfs to free our undo space.
  2485. //
  2486. if ((TransactionEntry->UndoRecords != 0) &&
  2487. (!FlagOn( Vcb->VcbState, VCB_STATE_RESTART_IN_PROGRESS ))) {
  2488. LfsResetUndoTotal( Vcb->LogHandle,
  2489. TransactionEntry->UndoRecords,
  2490. -TransactionEntry->UndoBytes );
  2491. }
  2492. NtfsFreeRestartTableIndex( &Vcb->TransactionTable,
  2493. IrpContext->TransactionId );
  2494. //
  2495. // Mark that there is no transaction for the irp and signal
  2496. // any waiters if there are no transactions left
  2497. //
  2498. if (Vcb->TransactionTable.Table->NumberAllocated == 0) {
  2499. KeSetEvent( &Vcb->TransactionsDoneEvent, 0, FALSE );
  2500. }
  2501. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  2502. IrpContext->TransactionId = 0;
  2503. //
  2504. // One way we win by being recoverable, is that we do not really
  2505. // have to do write-through - flushing the updates to the log
  2506. // is enough. We don't make this call if we are in the abort
  2507. // transaction path. Otherwise we could get a log file full
  2508. // while aborting.
  2509. //
  2510. if (FlagOn( IrpContext->TopLevelIrpContext->State, IRP_CONTEXT_STATE_WRITE_THROUGH ) &&
  2511. (IrpContext == IrpContext->TopLevelIrpContext) &&
  2512. (IrpContext->TopLevelIrpContext->ExceptionStatus == STATUS_SUCCESS)) {
  2513. NtfsUpdateScbSnapshots( IrpContext );
  2514. LfsFlushToLsn( Vcb->LogHandle, CommitLsn );
  2515. }
  2516. }
  2517. //
  2518. // Signal any waiters for the new length.
  2519. //
  2520. if (IrpContext->CheckNewLength != NULL) {
  2521. NtfsProcessNewLengthQueue( IrpContext, FALSE );
  2522. }
  2523. #if (DBG || defined( NTFS_FREE_ASSERTS ))
  2524. } except( ASSERT( GetExceptionCode() != STATUS_LOG_FILE_FULL ), EXCEPTION_CONTINUE_SEARCH ) {
  2525. }
  2526. #endif
  2527. }
  2528. VOID
  2529. NtfsCheckpointCurrentTransaction (
  2530. IN PIRP_CONTEXT IrpContext
  2531. )
  2532. /*++
  2533. Routine Description:
  2534. This routine checkpoints the current transaction by commiting it
  2535. to the log and deallocating the transaction Id. The current request
  2536. cann keep running, but changes to date are committed and will not be
  2537. backed out.
  2538. Arguments:
  2539. Return Value:
  2540. None.
  2541. --*/
  2542. {
  2543. PVCB Vcb = IrpContext->Vcb;
  2544. PAGED_CODE();
  2545. //
  2546. // If there are new UsnReasons in the IrpContext, then we should write the journal
  2547. // now. Note that it is ok for a checkpoint to get logfile full, but in general commit
  2548. // should not.
  2549. //
  2550. if ((IrpContext->Usn.NewReasons | IrpContext->Usn.RemovedSourceInfo) != 0) {
  2551. NtfsWriteUsnJournalChanges( IrpContext );
  2552. }
  2553. NtfsCommitCurrentTransaction( IrpContext );
  2554. //
  2555. // Cleanup any recently deallocated record information for this transaction.
  2556. //
  2557. NtfsDeallocateRecordsComplete( IrpContext );
  2558. IrpContext->DeallocatedClusters = 0;
  2559. IrpContext->FreeClusterChange = 0;
  2560. //
  2561. // The following resources may have been flagged for immediate release on commit.
  2562. //
  2563. if (Vcb->AcquireFilesCount == 0) {
  2564. if (FlagOn( IrpContext->Flags, IRP_CONTEXT_FLAG_RELEASE_USN_JRNL )) {
  2565. NtfsReleaseScb( IrpContext, Vcb->UsnJournal );
  2566. }
  2567. if (FlagOn( IrpContext->Flags, IRP_CONTEXT_FLAG_RELEASE_MFT )) {
  2568. NtfsReleaseScb( IrpContext, Vcb->MftScb );
  2569. }
  2570. }
  2571. ClearFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_RELEASE_USN_JRNL |
  2572. IRP_CONTEXT_FLAG_RELEASE_MFT );
  2573. NtfsUpdateScbSnapshots( IrpContext );
  2574. }
  2575. VOID
  2576. NtfsInitializeLogging (
  2577. )
  2578. /*
  2579. Routine Description:
  2580. This routine is to be called once during startup of Ntfs (not once
  2581. per volume), to initialize the logging support.
  2582. Parameters:
  2583. None
  2584. Return Value:
  2585. None
  2586. --*/
  2587. {
  2588. PAGED_CODE();
  2589. DebugTrace( +1, Dbg, ("NtfsInitializeLogging:\n") );
  2590. LfsInitializeLogFileService();
  2591. DebugTrace( -1, Dbg, ("NtfsInitializeLogging -> VOID\n") );
  2592. }
  2593. VOID
  2594. NtfsStartLogFile (
  2595. IN PSCB LogFileScb,
  2596. IN PVCB Vcb
  2597. )
  2598. /*++
  2599. Routine Description:
  2600. This routine opens the log file for a volume by calling Lfs. The returned
  2601. LogHandle is stored in the Vcb. If the log file has not been initialized,
  2602. Lfs detects this and initializes it automatically.
  2603. Arguments:
  2604. LogFileScb - The Scb for the log file
  2605. Vcb - Pointer to the Vcb for this volume
  2606. Return Value:
  2607. None.
  2608. --*/
  2609. {
  2610. UNICODE_STRING UnicodeName;
  2611. LFS_INFO LfsInfo;
  2612. PAGED_CODE();
  2613. DebugTrace( +1, Dbg, ("NtfsStartLogFile:\n") );
  2614. RtlInitUnicodeString( &UnicodeName, L"NTFS" );
  2615. //
  2616. // LfsInfo structure acts as a information conduit between
  2617. // LFS and the NTFS client.
  2618. //
  2619. if (Vcb->MajorVersion >= 3) {
  2620. LfsInfo.LfsClientInfo = LfsFixedPageSize;
  2621. } else {
  2622. LfsInfo.LfsClientInfo = LfsPackLog;
  2623. }
  2624. LfsInfo.ReadOnly = (LOGICAL)NtfsIsVolumeReadOnly( Vcb );
  2625. LfsInfo.InRestart = (LOGICAL)FlagOn( Vcb->VcbState, VCB_STATE_RESTART_IN_PROGRESS );
  2626. LfsInfo.BadRestart = (LOGICAL)FlagOn( Vcb->VcbState, VCB_STATE_BAD_RESTART );
  2627. //
  2628. // Slam the allocation size into file size and valid data in case there
  2629. // is some error.
  2630. //
  2631. LogFileScb->Header.FileSize = LogFileScb->Header.AllocationSize;
  2632. LogFileScb->Header.ValidDataLength = LogFileScb->Header.AllocationSize;
  2633. //
  2634. // Now call into LFS and Open/Restart the log file. This could raise
  2635. // for various reasons, one of which is an attempt to do restart
  2636. // on a write protected volume. Vcb wont have the VALID_LOG_HANDLE flag then.
  2637. //
  2638. Vcb->LogHeaderReservation = LfsOpenLogFile( LogFileScb->FileObject,
  2639. UnicodeName,
  2640. 1,
  2641. 0,
  2642. LogFileScb->Header.AllocationSize.QuadPart,
  2643. &LfsInfo,
  2644. &Vcb->LogHandle,
  2645. &Vcb->LfsWriteData );
  2646. SetFlag( Vcb->VcbState, VCB_STATE_VALID_LOG_HANDLE );
  2647. DebugTrace( -1, Dbg, ("NtfsStartLogFile -> VOID\n") );
  2648. }
  2649. VOID
  2650. NtfsStopLogFile (
  2651. IN PVCB Vcb
  2652. )
  2653. /*
  2654. Routine Description:
  2655. This routine should be called during volume dismount to close the volume's
  2656. log file with the log file service.
  2657. Arguments:
  2658. Vcb - Pointer to the Vcb for the volume
  2659. Return Value:
  2660. None
  2661. --*/
  2662. {
  2663. LFS_LOG_HANDLE LogHandle = Vcb->LogHandle;
  2664. PAGED_CODE();
  2665. DebugTrace( +1, Dbg, ("NtfsStopLogFile:\n") );
  2666. if (FlagOn( Vcb->VcbState, VCB_STATE_VALID_LOG_HANDLE )) {
  2667. ASSERT( LogHandle != NULL );
  2668. //
  2669. // We don't do any logfile flushing if the volume
  2670. // is mounted read only or if the device is already gone.
  2671. //
  2672. if (!NtfsIsVolumeReadOnly( Vcb )) {
  2673. //
  2674. // Proceed even if this call fails. There is nothing
  2675. // more we can do at this point.
  2676. //
  2677. try {
  2678. LfsFlushToLsn( LogHandle, LiMax );
  2679. } except( (FsRtlIsNtstatusExpected( GetExceptionCode() )) ?
  2680. EXCEPTION_EXECUTE_HANDLER :
  2681. EXCEPTION_CONTINUE_SEARCH ) {
  2682. NOTHING;
  2683. }
  2684. }
  2685. ClearFlag( Vcb->VcbState, VCB_STATE_VALID_LOG_HANDLE );
  2686. //
  2687. // Allow LFS to close its books. We do this even for readonly
  2688. // mounts, although we filter writes at the LFS level for those.
  2689. //
  2690. LfsCloseLogFile( LogHandle );
  2691. }
  2692. DebugTrace( -1, Dbg, ("NtfsStopLogFile -> VOID\n") );
  2693. }
  2694. VOID
  2695. NtfsInitializeRestartTable (
  2696. IN ULONG EntrySize,
  2697. IN ULONG NumberEntries,
  2698. OUT PRESTART_POINTERS TablePointer
  2699. )
  2700. /*++
  2701. Routine Description:
  2702. This routine is called to allocate and initialize a new Restart Table,
  2703. and return a pointer to it.
  2704. Arguments:
  2705. EntrySize - Size of the table entries, in bytes.
  2706. NumberEntries - Number of entries to allocate for the table.
  2707. TablePointer - Returns a pointer to the table.
  2708. Return Value:
  2709. None
  2710. --*/
  2711. {
  2712. PAGED_CODE();
  2713. try {
  2714. NtfsInitializeRestartPointers( TablePointer );
  2715. //
  2716. // Call common routine to allocate the actual table.
  2717. //
  2718. InitializeNewTable( EntrySize, NumberEntries, TablePointer );
  2719. } finally {
  2720. DebugUnwind( NtfsInitializeRestartTable );
  2721. //
  2722. // On error, clean up any partial work that was done.
  2723. //
  2724. if (AbnormalTermination()) {
  2725. NtfsFreeRestartTable( TablePointer );
  2726. }
  2727. }
  2728. }
  2729. VOID
  2730. NtfsFreeRestartTable (
  2731. IN PRESTART_POINTERS TablePointer
  2732. )
  2733. /*++
  2734. Routine Description:
  2735. This routine frees a previously allocated Restart Table.
  2736. Arguments:
  2737. TablePointer - Pointer to the Restart Table to delete.
  2738. Return Value:
  2739. None.
  2740. --*/
  2741. {
  2742. PAGED_CODE();
  2743. if (TablePointer->Table != NULL) {
  2744. NtfsFreePool( TablePointer->Table );
  2745. TablePointer->Table = NULL;
  2746. }
  2747. if (TablePointer->ResourceInitialized) {
  2748. ExDeleteResourceLite( &TablePointer->Resource );
  2749. TablePointer->ResourceInitialized = FALSE;
  2750. }
  2751. }
  2752. VOID
  2753. NtfsExtendRestartTable (
  2754. IN PRESTART_POINTERS TablePointer,
  2755. IN ULONG NumberNewEntries,
  2756. IN ULONG FreeGoal
  2757. )
  2758. /*++
  2759. Routine Description:
  2760. This routine extends a previously allocated Restart Table, by
  2761. creating and initializing a new one, and copying over the the
  2762. table entries from the old one. The old table is then deallocated.
  2763. On return, the table pointer points to the new Restart Table.
  2764. Arguments:
  2765. TablePointer - Address of the pointer to the previously created table.
  2766. NumberNewEntries - The number of addtional entries to be allocated
  2767. in the new table.
  2768. FreeGoal - A hint as to what point the caller would like to truncate
  2769. the table back to, when sufficient entries are deleted.
  2770. If truncation is not desired, then MAXULONG may be specified.
  2771. Return Value:
  2772. None.
  2773. --*/
  2774. {
  2775. PRESTART_TABLE NewTable, OldTable;
  2776. ULONG OldSize;
  2777. OldSize = SizeOfRestartTable( TablePointer );
  2778. //
  2779. // Get pointer to old table.
  2780. //
  2781. OldTable = TablePointer->Table;
  2782. ASSERT_RESTART_TABLE( OldTable );
  2783. //
  2784. // Start by initializing a table for the new size.
  2785. //
  2786. InitializeNewTable( OldTable->EntrySize,
  2787. OldTable->NumberEntries + NumberNewEntries,
  2788. TablePointer );
  2789. //
  2790. // Copy body of old table in place to new table.
  2791. //
  2792. NewTable = TablePointer->Table;
  2793. RtlMoveMemory( (NewTable + 1),
  2794. (OldTable + 1),
  2795. OldTable->EntrySize * OldTable->NumberEntries );
  2796. //
  2797. // Fix up new table's header, and fix up free list.
  2798. //
  2799. NewTable->FreeGoal = MAXULONG;
  2800. if (FreeGoal != MAXULONG) {
  2801. NewTable->FreeGoal = sizeof(RESTART_TABLE) + FreeGoal * NewTable->EntrySize;
  2802. }
  2803. if (OldTable->FirstFree != 0) {
  2804. NewTable->FirstFree = OldTable->FirstFree;
  2805. *(PULONG)GetRestartEntryFromIndex( TablePointer, OldTable->LastFree ) =
  2806. OldSize;;
  2807. } else {
  2808. NewTable->FirstFree = OldSize;
  2809. }
  2810. //
  2811. // Copy number allocated
  2812. //
  2813. NewTable->NumberAllocated = OldTable->NumberAllocated;
  2814. ASSERT( NewTable->NumberAllocated >= 0 );
  2815. ASSERT( NewTable->FirstFree != RESTART_ENTRY_ALLOCATED );
  2816. //
  2817. // Free the old table and return the new one.
  2818. //
  2819. NtfsFreePool( OldTable );
  2820. ASSERT_RESTART_TABLE( NewTable );
  2821. }
  2822. ULONG
  2823. NtfsAllocateRestartTableIndex (
  2824. IN PRESTART_POINTERS TablePointer,
  2825. IN ULONG Exclusive
  2826. )
  2827. /*++
  2828. Routine Description:
  2829. This routine allocates an index from within a previously initialized
  2830. Restart Table. If the table is empty, it is extended.
  2831. Note that the table must already be acquired either shared or exclusive,
  2832. and if it must be extended, then the table is released and will be
  2833. acquired exclusive on return.
  2834. Arguments:
  2835. TablePointer - Pointer to the Restart Table in which an index is to
  2836. be allocated.
  2837. Exclusive - Indicates if we have the table exclusive (or if we know that
  2838. synchronization is not a problem).
  2839. Return Value:
  2840. The allocated index.
  2841. --*/
  2842. {
  2843. PRESTART_TABLE Table;
  2844. ULONG EntryIndex;
  2845. KLOCK_QUEUE_HANDLE LockHandle;
  2846. PULONG Entry;
  2847. DebugTrace( +1, Dbg, ("NtfsAllocateRestartTableIndex:\n") );
  2848. DebugTrace( 0, Dbg, ("TablePointer = %08lx\n", TablePointer) );
  2849. Table = TablePointer->Table;
  2850. ASSERT_RESTART_TABLE(Table);
  2851. //
  2852. // Acquire the spin lock to synchronize the allocation.
  2853. //
  2854. KeAcquireInStackQueuedSpinLock( &TablePointer->SpinLock, &LockHandle );
  2855. //
  2856. // If the table is empty, then we have to extend it.
  2857. //
  2858. if (Table->FirstFree == 0) {
  2859. //
  2860. // First release the spin lock and the table resource, and get
  2861. // the resource exclusive.
  2862. //
  2863. KeReleaseInStackQueuedSpinLock( &LockHandle );
  2864. if (!Exclusive) {
  2865. NtfsReleaseRestartTable( TablePointer );
  2866. NtfsAcquireExclusiveRestartTable( TablePointer, TRUE );
  2867. }
  2868. //
  2869. // Now extend the table. Note that if this routine raises, we have
  2870. // nothing to release.
  2871. //
  2872. NtfsExtendRestartTable( TablePointer, 16, MAXULONG );
  2873. //
  2874. // And re-get our pointer to the restart table
  2875. //
  2876. Table = TablePointer->Table;
  2877. //
  2878. // Now get the spin lock again and proceed.
  2879. //
  2880. KeAcquireInStackQueuedSpinLock( &TablePointer->SpinLock, &LockHandle );
  2881. }
  2882. //
  2883. // Get First Free to return it.
  2884. //
  2885. EntryIndex = Table->FirstFree;
  2886. ASSERT( EntryIndex != 0 );
  2887. //
  2888. // Dequeue this entry and zero it.
  2889. //
  2890. Entry = (PULONG)GetRestartEntryFromIndex( TablePointer, EntryIndex );
  2891. Table->FirstFree = *Entry;
  2892. ASSERT( Table->FirstFree != RESTART_ENTRY_ALLOCATED );
  2893. RtlZeroMemory( Entry, Table->EntrySize );
  2894. //
  2895. // Show that it's allocated.
  2896. //
  2897. *Entry = RESTART_ENTRY_ALLOCATED;
  2898. //
  2899. // If list is going empty, then we fix the LastFree as well.
  2900. //
  2901. if (Table->FirstFree == 0) {
  2902. Table->LastFree = 0;
  2903. }
  2904. Table->NumberAllocated += 1;
  2905. //
  2906. // Now just release the spin lock before returning.
  2907. //
  2908. KeReleaseInStackQueuedSpinLock( &LockHandle );
  2909. DebugTrace( -1, Dbg, ("NtfsAllocateRestartTableIndex -> %08lx\n", EntryIndex) );
  2910. return EntryIndex;
  2911. }
  2912. PVOID
  2913. NtfsAllocateRestartTableFromIndex (
  2914. IN PRESTART_POINTERS TablePointer,
  2915. IN ULONG Index
  2916. )
  2917. /*++
  2918. Routine Description:
  2919. This routine allocates a specific index from within a previously
  2920. initialized Restart Table. If the index does not exist within the
  2921. existing table, the table is extended.
  2922. Note that the table must already be acquired either shared or exclusive,
  2923. and if it must be extended, then the table is released and will be
  2924. acquired exclusive on return.
  2925. Arguments:
  2926. TablePointer - Pointer to the Restart Table in which an index is to
  2927. be allocated.
  2928. Index - The index to be allocated.
  2929. Return Value:
  2930. The table entry allocated.
  2931. --*/
  2932. {
  2933. PULONG Entry;
  2934. PULONG LastEntry;
  2935. PRESTART_TABLE Table;
  2936. KLOCK_QUEUE_HANDLE LockHandle;
  2937. ULONG ThisIndex;
  2938. ULONG LastIndex;
  2939. DebugTrace( +1, Dbg, ("NtfsAllocateRestartTableFromIndex\n") );
  2940. DebugTrace( 0, Dbg, ("TablePointer = %08lx\n", TablePointer) );
  2941. DebugTrace( 0, Dbg, ("Index = %08lx\n", Index) );
  2942. Table = TablePointer->Table;
  2943. ASSERT_RESTART_TABLE(Table);
  2944. //
  2945. // Acquire the spin lock to synchronize the allocation.
  2946. //
  2947. KeAcquireInStackQueuedSpinLock( &TablePointer->SpinLock, &LockHandle );
  2948. //
  2949. // If the entry is not in the table, we will have to extend the table.
  2950. //
  2951. if (!IsRestartIndexWithinTable( TablePointer, Index )) {
  2952. ULONG TableSize;
  2953. ULONG BytesToIndex;
  2954. ULONG AddEntries;
  2955. //
  2956. // We extend the size by computing the number of entries
  2957. // between the existing size and the desired index and
  2958. // adding 1 to that.
  2959. //
  2960. TableSize = SizeOfRestartTable( TablePointer );;
  2961. BytesToIndex = Index - TableSize;
  2962. AddEntries = BytesToIndex / Table->EntrySize + 1;
  2963. //
  2964. // There should always be an integral number of entries being added.
  2965. //
  2966. ASSERT( BytesToIndex % Table->EntrySize == 0 );
  2967. //
  2968. // First release the spin lock and the table resource, and get
  2969. // the resource exclusive.
  2970. //
  2971. KeReleaseInStackQueuedSpinLock( &LockHandle );
  2972. NtfsReleaseRestartTable( TablePointer );
  2973. NtfsAcquireExclusiveRestartTable( TablePointer, TRUE );
  2974. //
  2975. // Now extend the table. Note that if this routine raises, we have
  2976. // nothing to release.
  2977. //
  2978. NtfsExtendRestartTable( TablePointer,
  2979. AddEntries,
  2980. TableSize );
  2981. Table = TablePointer->Table;
  2982. ASSERT_RESTART_TABLE(Table);
  2983. //
  2984. // Now get the spin lock again and proceed.
  2985. //
  2986. KeAcquireInStackQueuedSpinLock( &TablePointer->SpinLock, &LockHandle );
  2987. }
  2988. //
  2989. // Now see if the entry is already allocated, and just return if it is.
  2990. //
  2991. Entry = (PULONG)GetRestartEntryFromIndex( TablePointer, Index );
  2992. if (!IsRestartTableEntryAllocated( Entry )) {
  2993. //
  2994. // We now have to walk through the table, looking for the entry
  2995. // we're interested in and the previous entry. Start by looking at the
  2996. // first entry.
  2997. //
  2998. ThisIndex = Table->FirstFree;
  2999. //
  3000. // Get the Entry from the list.
  3001. //
  3002. Entry = (PULONG) GetRestartEntryFromIndex( TablePointer, ThisIndex );
  3003. //
  3004. // If this is a match, then we pull it out of the list and are done.
  3005. //
  3006. if (ThisIndex == Index) {
  3007. //
  3008. // Dequeue this entry.
  3009. //
  3010. Table->FirstFree = *Entry;
  3011. ASSERT( Table->FirstFree != RESTART_ENTRY_ALLOCATED );
  3012. //
  3013. // Otherwise we need to walk through the list looking for the
  3014. // predecessor of our entry.
  3015. //
  3016. } else {
  3017. while (TRUE) {
  3018. //
  3019. // Remember the entry just found.
  3020. //
  3021. LastIndex = ThisIndex;
  3022. LastEntry = Entry;
  3023. //
  3024. // We should never run out of entries.
  3025. //
  3026. ASSERT( *LastEntry != 0 );
  3027. //
  3028. // Lookup up the next entry in the list.
  3029. //
  3030. ThisIndex = *LastEntry;
  3031. Entry = (PULONG) GetRestartEntryFromIndex( TablePointer, ThisIndex );
  3032. //
  3033. // If this is our match we are done.
  3034. //
  3035. if (ThisIndex == Index) {
  3036. //
  3037. // Dequeue this entry.
  3038. //
  3039. *LastEntry = *Entry;
  3040. //
  3041. // If this was the last entry, we update that in the
  3042. // table as well.
  3043. //
  3044. if (Table->LastFree == ThisIndex) {
  3045. Table->LastFree = LastIndex;
  3046. }
  3047. break;
  3048. }
  3049. }
  3050. }
  3051. //
  3052. // If the list is now empty, we fix the LastFree as well.
  3053. //
  3054. if (Table->FirstFree == 0) {
  3055. Table->LastFree = 0;
  3056. }
  3057. //
  3058. // Zero this entry. Then show that this is allocated and increment the
  3059. // allocated count.
  3060. //
  3061. RtlZeroMemory( Entry, Table->EntrySize );
  3062. *Entry = RESTART_ENTRY_ALLOCATED;
  3063. Table->NumberAllocated += 1;
  3064. }
  3065. //
  3066. // Now just release the spin lock before returning.
  3067. //
  3068. KeReleaseInStackQueuedSpinLock( &LockHandle );
  3069. DebugTrace( -1, Dbg, ("NtfsAllocateRestartTableFromIndex -> %08lx\n", Entry) );
  3070. return (PVOID)Entry;
  3071. }
  3072. VOID
  3073. NtfsFreeRestartTableIndex (
  3074. IN PRESTART_POINTERS TablePointer,
  3075. IN ULONG Index
  3076. )
  3077. /*++
  3078. Routine Description:
  3079. This routine frees a previously allocated index in a Restart Table.
  3080. If the index is before FreeGoal for the table, it is simply deallocated to
  3081. the front of the list for immediate reuse. If the index is beyond
  3082. FreeGoal, then it is deallocated to the end of the list, to facilitate
  3083. truncation of the list in the event that all of the entries beyond
  3084. FreeGoal are freed. However, this routine does not automatically
  3085. truncate the list, as this would cause too much overhead. The list
  3086. is checked during periodic checkpoint processing.
  3087. Arguments:
  3088. TablePointer - Pointer to the Restart Table to which the index is to be
  3089. deallocated.
  3090. Index - The index being deallocated.
  3091. Return Value:
  3092. None.
  3093. --*/
  3094. {
  3095. PRESTART_TABLE Table;
  3096. PULONG Entry, OldLastEntry;
  3097. KLOCK_QUEUE_HANDLE LockHandle;
  3098. DebugTrace( +1, Dbg, ("NtfsFreeRestartTableIndex:\n") );
  3099. DebugTrace( 0, Dbg, ("TablePointer = %08lx\n", TablePointer) );
  3100. DebugTrace( 0, Dbg, ("Index = %08lx\n", Index) );
  3101. //
  3102. // Get pointers to table and the entry we are freeing.
  3103. //
  3104. Table = TablePointer->Table;
  3105. ASSERT_RESTART_TABLE(Table);
  3106. ASSERT( (Table->FirstFree == 0) ||
  3107. ((Table->FirstFree >= 0x18) &&
  3108. (((Table->FirstFree - 0x18) % Table->EntrySize) == 0)) );
  3109. ASSERT( (Index >= 0x18) &&
  3110. ((Index - 0x18) % Table->EntrySize) == 0 );
  3111. Entry = GetRestartEntryFromIndex( TablePointer, Index );
  3112. //
  3113. // Acquire the spinlock to synchronize the allocation.
  3114. //
  3115. KeAcquireInStackQueuedSpinLock( &TablePointer->SpinLock, &LockHandle );
  3116. //
  3117. // If the index is before FreeGoal, then do a normal deallocation at
  3118. // the front of the list.
  3119. //
  3120. if (Index < Table->FreeGoal) {
  3121. *Entry = Table->FirstFree;
  3122. ASSERT( Index != RESTART_ENTRY_ALLOCATED );
  3123. Table->FirstFree = Index;
  3124. if (Table->LastFree == 0) {
  3125. Table->LastFree = Index;
  3126. }
  3127. //
  3128. // Otherwise we will deallocate this guy to the end of the list.
  3129. //
  3130. } else {
  3131. if (Table->LastFree != 0) {
  3132. OldLastEntry = GetRestartEntryFromIndex( TablePointer,
  3133. Table->LastFree );
  3134. *OldLastEntry = Index;
  3135. } else {
  3136. ASSERT( Index != RESTART_ENTRY_ALLOCATED );
  3137. Table->FirstFree = Index;
  3138. }
  3139. Table->LastFree = Index;
  3140. *Entry = 0;
  3141. }
  3142. ASSERT( Table->NumberAllocated != 0 );
  3143. Table->NumberAllocated -= 1;
  3144. //
  3145. // Now just release the spin lock before returning.
  3146. //
  3147. KeReleaseInStackQueuedSpinLock( &LockHandle );
  3148. DebugTrace( -1, Dbg, ("NtfsFreeRestartTableIndex -> VOID\n") );
  3149. }
  3150. PVOID
  3151. NtfsGetFirstRestartTable (
  3152. IN PRESTART_POINTERS TablePointer
  3153. )
  3154. /*++
  3155. Routine Description:
  3156. This routine returns the first allocated entry from a Restart Table.
  3157. Arguments:
  3158. TablePointer - Pointer to the respective Restart Table Pointers structure.
  3159. Return Value:
  3160. Pointer to the first entry, or NULL if none are allocated.
  3161. --*/
  3162. {
  3163. PCHAR Entry;
  3164. PAGED_CODE();
  3165. //
  3166. // If we know the table is empty, we can return immediately.
  3167. //
  3168. if (IsRestartTableEmpty( TablePointer )) {
  3169. return NULL;
  3170. }
  3171. //
  3172. // Otherwise point to the first table entry.
  3173. //
  3174. Entry = (PCHAR)(TablePointer->Table + 1);
  3175. //
  3176. // Loop until we hit the first one allocated, or the end of the list.
  3177. //
  3178. while ((ULONG)(Entry - (PCHAR)TablePointer->Table) <
  3179. SizeOfRestartTable(TablePointer)) {
  3180. if (IsRestartTableEntryAllocated(Entry)) {
  3181. return (PVOID)Entry;
  3182. }
  3183. Entry += TablePointer->Table->EntrySize;
  3184. }
  3185. return NULL;
  3186. }
  3187. PVOID
  3188. NtfsGetNextRestartTable (
  3189. IN PRESTART_POINTERS TablePointer,
  3190. IN PVOID Current
  3191. )
  3192. /*++
  3193. Routine Description:
  3194. This routine returns the next allocated entry from a Restart Table.
  3195. Arguments:
  3196. TablePointer - Pointer to the respective Restart Table Pointers structure.
  3197. Current - Current entry pointer.
  3198. Return Value:
  3199. Pointer to the next entry, or NULL if none are allocated.
  3200. --*/
  3201. {
  3202. PCHAR Entry = (PCHAR)Current;
  3203. PAGED_CODE();
  3204. //
  3205. // Point to the next entry.
  3206. //
  3207. Entry += TablePointer->Table->EntrySize;
  3208. //
  3209. // Loop until we hit the first one allocated, or the end of the list.
  3210. //
  3211. while ((ULONG)(Entry - (PCHAR)TablePointer->Table) <
  3212. SizeOfRestartTable(TablePointer)) {
  3213. if (IsRestartTableEntryAllocated(Entry)) {
  3214. return (PVOID)Entry;
  3215. }
  3216. Entry += TablePointer->Table->EntrySize;
  3217. }
  3218. return NULL;
  3219. }
  3220. VOID
  3221. NtfsUpdateOatVersion (
  3222. IN PVCB Vcb,
  3223. IN ULONG NewRestartVersion
  3224. )
  3225. /*++
  3226. Routine Description:
  3227. This routine is called when we are switching the restart version for a volume. This can happen
  3228. either after a clean checkpoint or at mount when we encounter a restart area with a non-default
  3229. version number.
  3230. Arguments:
  3231. Vcb - Pointer to the Vcb for the volume.
  3232. NewRestartVersion - Restart version to start using for this volume.
  3233. Return Value:
  3234. None
  3235. --*/
  3236. {
  3237. PRESTART_POINTERS NewTable = NULL;
  3238. PAGED_CODE();
  3239. DebugTrace( +1, Dbg, ("NtfsUpdateOatVersion\n") );
  3240. ASSERT( (Vcb->RestartVersion != NewRestartVersion) || (Vcb->OnDiskOat == NULL) );
  3241. //
  3242. // Use a try finally to facilitate cleanup.
  3243. //
  3244. try {
  3245. if (NewRestartVersion == 0) {
  3246. //
  3247. // If we are moving to version 0 then allocate a new table and
  3248. // initialize it with the initial number of entries.
  3249. //
  3250. NewTable = NtfsAllocatePool( NonPagedPool, sizeof( RESTART_POINTERS ));
  3251. NtfsInitializeRestartTable( sizeof( OPEN_ATTRIBUTE_ENTRY_V0 ),
  3252. INITIAL_NUMBER_ATTRIBUTES,
  3253. NewTable );
  3254. Vcb->RestartVersion = 0;
  3255. Vcb->OatEntrySize = SIZEOF_OPEN_ATTRIBUTE_ENTRY_V0;
  3256. Vcb->OnDiskOat = NewTable;
  3257. NewTable = NULL;
  3258. } else {
  3259. if (Vcb->OnDiskOat != NULL) {
  3260. NtfsFreeRestartTable( Vcb->OnDiskOat );
  3261. NtfsFreePool( Vcb->OnDiskOat );
  3262. }
  3263. Vcb->OnDiskOat = &Vcb->OpenAttributeTable;
  3264. Vcb->RestartVersion = 1;
  3265. Vcb->OatEntrySize = sizeof( OPEN_ATTRIBUTE_ENTRY );
  3266. }
  3267. } finally {
  3268. DebugUnwind( NtfsUpdateOatVersion );
  3269. if (NewTable != NULL) {
  3270. NtfsFreePool( NewTable );
  3271. }
  3272. }
  3273. DebugTrace( -1, Dbg, ("NtfsUpdateOatVersion -> VOID\n") );
  3274. return;
  3275. }
  3276. //
  3277. // Internal support routine
  3278. //
  3279. VOID
  3280. DirtyPageRoutine (
  3281. IN PFILE_OBJECT FileObject,
  3282. IN PLARGE_INTEGER FileOffset,
  3283. IN ULONG Length,
  3284. IN PLSN OldestLsn,
  3285. IN PLSN NewestLsn,
  3286. IN PVOID Context1,
  3287. IN PVOID Context2
  3288. )
  3289. /*++
  3290. Routine Description:
  3291. This routine is used as the call back routine for retrieving dirty pages
  3292. from the Cache Manager. It adds them to the Dirty Table list whose
  3293. pointer is pointed to by the Context parameter.
  3294. Arguments:
  3295. FileObject - Pointer to the file object which has the dirty page
  3296. FileOffset - File offset for start of dirty page
  3297. Length - Length recorded for the dirty page
  3298. OldestLsn - Oldest Lsn of an update not written through stored for that page
  3299. (Can be zero if set dirty in paths that don't use lsns)
  3300. Context1 - IrpContext
  3301. Context2 - Pointer to the pointer to the Restart Table
  3302. Return Value:
  3303. None
  3304. --*/
  3305. {
  3306. PDIRTY_PAGE_ENTRY PageEntry;
  3307. PDIRTY_PAGE_CONTEXT DirtyPageContext = (PDIRTY_PAGE_CONTEXT)Context2;
  3308. PRESTART_POINTERS DirtyPageTable = DirtyPageContext->DirtyPageTable;
  3309. PSCB_NONPAGED NonpagedScb;
  3310. ULONG PageIndex;
  3311. DebugTrace( +1, Dbg, ("DirtyPageRoutine:\n") );
  3312. DebugTrace( 0, Dbg, ("FileObject = %08lx\n", FileObject) );
  3313. DebugTrace( 0, Dbg, ("FileOffset = %016I64x\n", *FileOffset) );
  3314. DebugTrace( 0, Dbg, ("Length = %08lx\n", Length) );
  3315. DebugTrace( 0, Dbg, ("OldestLsn = %016I64x\n", *OldestLsn) );
  3316. DebugTrace( 0, Dbg, ("Context2 = %08lx\n", Context2) );
  3317. //
  3318. // Get the Vcb out of the file object.
  3319. //
  3320. NonpagedScb = CONTAINING_RECORD( FileObject->SectionObjectPointer,
  3321. SCB_NONPAGED,
  3322. SegmentObject );
  3323. //
  3324. // We noop this call if the open attribute entry for this Scb is 0. We assume
  3325. // there was a clean volume checkpoint which cleared this field.
  3326. //
  3327. if (NonpagedScb->OpenAttributeTableIndex == 0 ) {
  3328. DebugTrace( -1, Dbg, ("DirtyPageRoutine -> VOID\n") );
  3329. return;
  3330. }
  3331. //
  3332. // Check for an overrun in the table and stop processing in that case
  3333. // The restart table format can't accomodate tables greater than 64k in size
  3334. // due to the ushort used for the attribute index
  3335. //
  3336. if (AllocatedSizeOfRestartTable( DirtyPageTable ) > MAX_RESTART_TABLE_SIZE ){
  3337. DirtyPageContext->Overflow = TRUE;
  3338. } else {
  3339. //
  3340. // Get a pointer to the entry we just allocated.
  3341. //
  3342. PageIndex = NtfsAllocateRestartTableIndex( DirtyPageTable, TRUE );
  3343. PageEntry = GetRestartEntryFromIndex( DirtyPageTable, PageIndex );
  3344. //
  3345. // Now fill in the Dirty Page Entry, except for the Lcns, because
  3346. // we are not allowed to take page faults now.
  3347. // Use the index for the in-memory table now. We will update
  3348. // this to the on-disk index back in CheckpointVolume.
  3349. //
  3350. PageEntry->TargetAttribute = NonpagedScb->OpenAttributeTableIndex;
  3351. ASSERT( NonpagedScb->OnDiskOatIndex != 0 );
  3352. PageEntry->LengthOfTransfer = Length;
  3353. //
  3354. // Put the Vcn (FileOffset) and OldestLsn into the page at this point. Note
  3355. // we don't want to put an Lsn into the table which is older than the current
  3356. // BaseLsn. Store it here for now and we will fix it up when we process the
  3357. // DiryPage table back in the checkpoint code.
  3358. //
  3359. if (NonpagedScb->Vcb->RestartVersion == 0) {
  3360. ((PDIRTY_PAGE_ENTRY_V0) PageEntry)->Vcn = FileOffset->QuadPart;
  3361. ((PDIRTY_PAGE_ENTRY_V0) PageEntry)->OldestLsn = *OldestLsn;
  3362. } else {
  3363. PageEntry->Vcn = FileOffset->QuadPart;
  3364. PageEntry->OldestLsn = *OldestLsn;
  3365. }
  3366. //
  3367. // Update the oldest lsn info if this is the new oldest lsn
  3368. //
  3369. if ((OldestLsn->QuadPart != 0) &&
  3370. (OldestLsn->QuadPart < DirtyPageContext->OldestLsn.QuadPart)) {
  3371. if (DirtyPageContext->OldestFileObject != NULL) {
  3372. ObDereferenceObject( DirtyPageContext->OldestFileObject );
  3373. }
  3374. DirtyPageContext->DirtyPageIndex = PageIndex;
  3375. DirtyPageContext->OldestFileObject = FileObject;
  3376. DirtyPageContext->OldestLsn.QuadPart = OldestLsn->QuadPart;
  3377. ObReferenceObject( FileObject );
  3378. }
  3379. }
  3380. DebugTrace( -1, Dbg, ("DirtyPageRoutine -> VOID\n") );
  3381. return;
  3382. UNREFERENCED_PARAMETER( Context1 );
  3383. UNREFERENCED_PARAMETER( NewestLsn );
  3384. }
  3385. //
  3386. // Internal support routine
  3387. //
  3388. BOOLEAN
  3389. LookupLcns (
  3390. IN PIRP_CONTEXT IrpContext,
  3391. IN PSCB Scb,
  3392. IN VCN Vcn,
  3393. IN ULONG ClusterCount,
  3394. IN BOOLEAN MustBeAllocated,
  3395. OUT PLCN_UNALIGNED FirstLcn
  3396. )
  3397. /*++
  3398. Routine Description:
  3399. This routine looks up the Lcns for a range of Vcns, and stores them in
  3400. an output array. One Lcn is stored for each Vcn in the range, even
  3401. if the Lcns are contiguous.
  3402. Arguments:
  3403. Scb - Scb for stream on which lookup should occur.
  3404. Vcn - Start of range of Vcns to look up.
  3405. ClusterCount - Number of Vcns to look up.
  3406. MustBeAllocated - FALSE - if need not be allocated, and should check Mcb only
  3407. TRUE - if it must be allocated as far as caller knows (i.e.,
  3408. NtfsLookupAllocation also has checks)
  3409. FirstLcn - Pointer to storage for first Lcn. The caller must guarantee
  3410. that there is enough space to store ClusterCount Lcns.
  3411. Return Value:
  3412. BOOLEAN - TRUE if we found the clusters, FALSE otherwise.
  3413. --*/
  3414. {
  3415. BOOLEAN Allocated;
  3416. LONGLONG Clusters;
  3417. LCN Lcn;
  3418. ULONG i;
  3419. PAGED_CODE();
  3420. DebugTrace( +1, Dbg, ("LookupLcns:\n") );
  3421. DebugTrace( 0, Dbg, ("Scb = %08l\n", Scb) );
  3422. DebugTrace( 0, Dbg, ("Vcn = %016I64x\n", Vcn) );
  3423. DebugTrace( 0, Dbg, ("ClusterCount = %08l\n", ClusterCount) );
  3424. DebugTrace( 0, Dbg, ("FirstLcn = %08lx\n", FirstLcn) );
  3425. //
  3426. // Loop until we have looked up all of the clusters
  3427. //
  3428. while (ClusterCount != 0) {
  3429. if (MustBeAllocated) {
  3430. //
  3431. // Lookup the next run.
  3432. //
  3433. Allocated = NtfsLookupAllocation( IrpContext,
  3434. Scb,
  3435. Vcn,
  3436. &Lcn,
  3437. &Clusters,
  3438. NULL,
  3439. NULL );
  3440. ASSERT( Lcn != 0 );
  3441. //
  3442. // Raise if this case not met. Otherwise we could walk off the end
  3443. // of the LCN array.
  3444. //
  3445. if (!Allocated) {
  3446. return FALSE;
  3447. } else if (Lcn == 0) {
  3448. NtfsRaiseStatus( IrpContext, STATUS_FILE_CORRUPT_ERROR, NULL, Scb->Fcb );
  3449. }
  3450. } else {
  3451. Allocated = NtfsLookupNtfsMcbEntry( &Scb->Mcb, Vcn, &Lcn, &Clusters, NULL, NULL, NULL, NULL );
  3452. //
  3453. // If we are off the end of the Mcb, then set up to just return
  3454. // Li0 for as many Lcns as are being looked up.
  3455. //
  3456. if (!Allocated ||
  3457. (Lcn == UNUSED_LCN)) {
  3458. Lcn = 0;
  3459. Clusters = ClusterCount;
  3460. Allocated = FALSE;
  3461. }
  3462. }
  3463. //
  3464. // If we got as many clusters as we were looking for, then just
  3465. // take the number we were looking for.
  3466. //
  3467. if (Clusters > ClusterCount) {
  3468. Clusters = ClusterCount;
  3469. }
  3470. //
  3471. // Fill in the Lcns in the header.
  3472. //
  3473. for (i = 0; i < (ULONG)Clusters; i++) {
  3474. *(FirstLcn++) = Lcn;
  3475. if (Allocated) {
  3476. Lcn = Lcn + 1;
  3477. }
  3478. }
  3479. //
  3480. // Adjust loop variables for the number Lcns we just received.
  3481. //
  3482. Vcn = Vcn + Clusters;
  3483. ClusterCount -= (ULONG)Clusters;
  3484. }
  3485. DebugTrace( -1, Dbg, ("LookupLcns -> VOID\n") );
  3486. return TRUE;
  3487. }
  3488. VOID
  3489. InitializeNewTable (
  3490. IN ULONG EntrySize,
  3491. IN ULONG NumberEntries,
  3492. OUT PRESTART_POINTERS TablePointer
  3493. )
  3494. /*++
  3495. Routine Description:
  3496. This routine is called to allocate and initialize a new table when the
  3497. associated Restart Table is being allocated or extended.
  3498. Arguments:
  3499. EntrySize - Size of the table entries, in bytes.
  3500. NumberEntries - Number of entries to allocate for the table.
  3501. TablePointer - Returns a pointer to the table.
  3502. Return Value:
  3503. None
  3504. --*/
  3505. {
  3506. PRESTART_TABLE Table;
  3507. PULONG Entry;
  3508. ULONG Size;
  3509. ULONG Offset;
  3510. ASSERT( EntrySize != 0 );
  3511. //
  3512. // Calculate size of table to allocate.
  3513. //
  3514. Size = EntrySize * NumberEntries + sizeof(RESTART_TABLE);
  3515. //
  3516. // Allocate and zero out the table.
  3517. //
  3518. Table =
  3519. TablePointer->Table = NtfsAllocatePool( NonPagedPool, Size );
  3520. RtlZeroMemory( Table, Size );
  3521. //
  3522. // Initialize the table header.
  3523. //
  3524. Table->EntrySize = (USHORT)EntrySize;
  3525. Table->NumberEntries = (USHORT)NumberEntries;
  3526. Table->FreeGoal = MAXULONG;
  3527. Table->FirstFree = sizeof( RESTART_TABLE );
  3528. Table->LastFree = Table->FirstFree + (NumberEntries - 1) * EntrySize;
  3529. //
  3530. // Initialize the free list.
  3531. //
  3532. for (Entry = (PULONG)(Table + 1), Offset = sizeof(RESTART_TABLE) + EntrySize;
  3533. Entry < (PULONG)((PCHAR)Table + Table->LastFree);
  3534. Entry = (PULONG)((PCHAR)Entry + EntrySize), Offset += EntrySize) {
  3535. *Entry = Offset;
  3536. }
  3537. ASSERT_RESTART_TABLE(Table);
  3538. }
  3539. VOID
  3540. NtfsFreeRecentlyDeallocated (
  3541. IN PIRP_CONTEXT IrpContext,
  3542. IN PVCB Vcb,
  3543. IN PLSN BaseLsn,
  3544. IN ULONG CleanVolume
  3545. )
  3546. /*++
  3547. Routine Description:
  3548. Free up recently deallocated clusters for reuse
  3549. Arguments:
  3550. IrpContext -
  3551. Vcb - volume to clean up
  3552. BaseLsn - the lsn we're up to now in the logfile, used to determine what can be freed
  3553. and the new threshold for future frees
  3554. CleanVolume - if true the volume is being clean checkpointed and all the clusters can be freed
  3555. Return Value:
  3556. None
  3557. --*/
  3558. {
  3559. PDEALLOCATED_CLUSTERS Clusters;
  3560. BOOLEAN RemovedClusters = FALSE;
  3561. PAGED_CODE();
  3562. //
  3563. // Quick exit if the list is empty
  3564. //
  3565. if (IsListEmpty( &Vcb->DeallocatedClusterListHead ) || (Vcb->BitmapScb == NULL)) {
  3566. return;
  3567. }
  3568. NtfsAcquireExclusiveScb( IrpContext, Vcb->BitmapScb );
  3569. Clusters = (PDEALLOCATED_CLUSTERS)Vcb->DeallocatedClusterListHead.Blink;
  3570. //
  3571. // Now we want to check if we can release any of the clusters in the
  3572. // deallocated cluster arrays. We know we can look at the
  3573. // fields in the PriorDeallocatedClusters structure because they
  3574. // are never modified in the running system.
  3575. //
  3576. // We will continue from the oldest in the list list until
  3577. //
  3578. // 1) there are no more dealloc lists
  3579. // 2) there are no clusters in the dealloc list (it must be the only one at this point)
  3580. // 3) the lsn == 0 and we're dirty which means we're at the front
  3581. // 4) the lsn is newer in deallocated cluster list
  3582. //
  3583. try {
  3584. while ((!IsListEmpty( &Vcb->DeallocatedClusterListHead )) &&
  3585. (((Clusters->Lsn.QuadPart != 0) && (BaseLsn->QuadPart > Clusters->Lsn.QuadPart)) ||
  3586. CleanVolume)) {
  3587. RemovedClusters = TRUE;
  3588. //
  3589. // For all deallocated during clean checkpoints and non-most recent
  3590. // ones during fuzzy ones:
  3591. // Remove all of the mappings in the Mcb. Protect this with
  3592. // a try-except.
  3593. //
  3594. try {
  3595. try {
  3596. ULONG i;
  3597. ULONGLONG StartingVcn;
  3598. ULONGLONG StartingLcn;
  3599. ULONGLONG ClusterCount;
  3600. if (Clusters->ClusterCount > 0) {
  3601. for (i = 0; FsRtlGetNextLargeMcbEntry( &Clusters->Mcb, i, &StartingVcn, &StartingLcn, &ClusterCount ); i += 1) {
  3602. if (StartingVcn == StartingLcn) {
  3603. if (NtfsAddCachedRun( IrpContext,
  3604. Vcb,
  3605. StartingLcn,
  3606. ClusterCount,
  3607. RunStateFree ) <= 0) break;
  3608. }
  3609. }
  3610. }
  3611. } finally {
  3612. PDEALLOCATED_CLUSTERS NextClusters = (PDEALLOCATED_CLUSTERS)Clusters->Link.Blink;
  3613. //
  3614. // We are committed to freeing the clusters out of the PriorDeallocatedClusters
  3615. // in any case.
  3616. //
  3617. Vcb->DeallocatedClusters -= Clusters->ClusterCount;
  3618. //
  3619. // Move this cluster list out of the vcb
  3620. //
  3621. RemoveEntryList( &Clusters->Link );
  3622. //
  3623. // delete dynamic clusters lists / reset static ones
  3624. //
  3625. if ((Clusters != &Vcb->DeallocatedClusters1) && (Clusters != &Vcb->DeallocatedClusters2 )) {
  3626. FsRtlUninitializeLargeMcb( &Clusters->Mcb );
  3627. NtfsFreePool( Clusters );
  3628. } else {
  3629. Clusters->Link.Flink = NULL;
  3630. Clusters->ClusterCount = 0;
  3631. FsRtlResetLargeMcb( &Clusters->Mcb, TRUE );
  3632. }
  3633. ASSERT( Vcb->DeallocatedClusters >= 0 );
  3634. Clusters = NextClusters;
  3635. }
  3636. } except( NtfsCatchOutOfMemoryExceptionFilter( IrpContext, GetExceptionInformation() )) {
  3637. //
  3638. // Keep going even if out of memory
  3639. //
  3640. NtfsMinimumExceptionProcessing( IrpContext );
  3641. NOTHING;
  3642. }
  3643. }
  3644. //
  3645. // If we removed any clusters on a fuzzy checkpoint lets make a new active one so
  3646. // the current active one can be cleaned up eventually
  3647. // On a clean checkpoint if we removed all the nodes add a blank one back
  3648. //
  3649. if (!CleanVolume) {
  3650. ASSERT( !IsListEmpty( &Vcb->DeallocatedClusterListHead ) );
  3651. if (RemovedClusters && (Clusters->ClusterCount > 0)) {
  3652. Clusters = NtfsGetDeallocatedClusters( IrpContext, Vcb );
  3653. }
  3654. } else if (IsListEmpty( &Vcb->DeallocatedClusterListHead )) {
  3655. ASSERT( Vcb->DeallocatedClusters1.Link.Flink == NULL );
  3656. Vcb->DeallocatedClusters1.Lsn.QuadPart = 0;
  3657. InsertHeadList( &Vcb->DeallocatedClusterListHead, &Vcb->DeallocatedClusters1.Link );
  3658. }
  3659. } finally {
  3660. NtfsReleaseScb( IrpContext, Vcb->BitmapScb );
  3661. }
  3662. }
  3663. VOID
  3664. NtfsCleanupFailedTransaction (
  3665. IN PIRP_CONTEXT IrpContext
  3666. )
  3667. /*++
  3668. Routine Description:
  3669. This routine is called to cleanup the IrpContext and free structures
  3670. in the event a transaction fails to commit or abort.
  3671. Arguments:
  3672. Return Value:
  3673. None
  3674. --*/
  3675. {
  3676. PUSN_FCB ThisUsn;
  3677. PUSN_FCB LastUsn;
  3678. PAGED_CODE();
  3679. //
  3680. // Clear the flags indicating a transaction is underway.
  3681. //
  3682. ClearFlag( IrpContext->Flags,
  3683. IRP_CONTEXT_FLAG_WROTE_LOG | IRP_CONTEXT_FLAG_RAISED_STATUS | IRP_CONTEXT_FLAG_MODIFIED_BITMAP );
  3684. //
  3685. // Make sure the recently deallocated queue is empty.
  3686. //
  3687. try {
  3688. if (!IsListEmpty( &IrpContext->RecentlyDeallocatedQueue )) {
  3689. NtfsDeallocateRecordsComplete( IrpContext );
  3690. }
  3691. } except (FsRtlIsNtstatusExpected( GetExceptionCode() ) ?
  3692. EXCEPTION_EXECUTE_HANDLER :
  3693. EXCEPTION_CONTINUE_SEARCH) {
  3694. NOTHING;
  3695. }
  3696. //
  3697. // Show that we haven't deallocated any clusters.
  3698. //
  3699. IrpContext->DeallocatedClusters = 0;
  3700. IrpContext->FreeClusterChange = 0;
  3701. //
  3702. // Don't rollback any size changes.
  3703. //
  3704. try {
  3705. NtfsUpdateScbSnapshots( IrpContext );
  3706. } except (FsRtlIsNtstatusExpected( GetExceptionCode() ) ?
  3707. EXCEPTION_EXECUTE_HANDLER :
  3708. EXCEPTION_CONTINUE_SEARCH) {
  3709. NOTHING;
  3710. }
  3711. //
  3712. // Make sure the last restart area is zeroed.
  3713. //
  3714. IrpContext->LastRestartArea.QuadPart = 0;
  3715. //
  3716. // Pull the Usn Fcb fields.
  3717. //
  3718. ThisUsn = &IrpContext->Usn;
  3719. try {
  3720. do {
  3721. if (ThisUsn->CurrentUsnFcb != NULL) {
  3722. PFCB UsnFcb = ThisUsn->CurrentUsnFcb;
  3723. NtfsLockFcb( IrpContext, UsnFcb );
  3724. //
  3725. // If any rename flags are part of the new reasons then
  3726. // make sure to look the name up again.
  3727. //
  3728. if (FlagOn( ThisUsn->NewReasons,
  3729. USN_REASON_RENAME_NEW_NAME | USN_REASON_RENAME_OLD_NAME )) {
  3730. ClearFlag( UsnFcb->FcbState, FCB_STATE_VALID_USN_NAME );
  3731. }
  3732. //
  3733. // Now restore the reason and source info fields.
  3734. //
  3735. ClearFlag( UsnFcb->FcbUsnRecord->UsnRecord.Reason,
  3736. ThisUsn->NewReasons );
  3737. if (UsnFcb->FcbUsnRecord->UsnRecord.Reason == 0) {
  3738. UsnFcb->FcbUsnRecord->UsnRecord.SourceInfo = 0;
  3739. } else {
  3740. SetFlag( UsnFcb->FcbUsnRecord->UsnRecord.SourceInfo,
  3741. ThisUsn->RemovedSourceInfo );
  3742. }
  3743. NtfsUnlockFcb( IrpContext, UsnFcb );
  3744. //
  3745. // Zero out the structure.
  3746. //
  3747. ThisUsn->CurrentUsnFcb = NULL;
  3748. ThisUsn->NewReasons = 0;
  3749. ThisUsn->RemovedSourceInfo = 0;
  3750. ThisUsn->UsnFcbFlags = 0;
  3751. //
  3752. // If not the first pass through the loop then update
  3753. // the last usn structure with what we point to here.
  3754. //
  3755. if (ThisUsn != &IrpContext->Usn) {
  3756. LastUsn->NextUsnFcb = ThisUsn->NextUsnFcb;
  3757. NtfsFreePool( ThisUsn );
  3758. ThisUsn = LastUsn;
  3759. }
  3760. }
  3761. if (ThisUsn->NextUsnFcb == NULL) { break; }
  3762. LastUsn = ThisUsn;
  3763. ThisUsn = ThisUsn->NextUsnFcb;
  3764. } while (TRUE);
  3765. } except (FsRtlIsNtstatusExpected( GetExceptionCode() ) ?
  3766. EXCEPTION_EXECUTE_HANDLER :
  3767. EXCEPTION_CONTINUE_SEARCH) {
  3768. NOTHING;
  3769. }
  3770. //
  3771. // Don't wake any waiters for this failed operation.
  3772. //
  3773. try {
  3774. if (IrpContext->CheckNewLength != NULL) {
  3775. NtfsProcessNewLengthQueue( IrpContext, TRUE );
  3776. }
  3777. } except (FsRtlIsNtstatusExpected( GetExceptionCode() ) ?
  3778. EXCEPTION_EXECUTE_HANDLER :
  3779. EXCEPTION_CONTINUE_SEARCH) {
  3780. NOTHING;
  3781. }
  3782. //
  3783. // Remove this from the transaction table if present.
  3784. //
  3785. if (IrpContext->TransactionId != 0) {
  3786. NtfsAcquireExclusiveRestartTable( &IrpContext->Vcb->TransactionTable,
  3787. TRUE );
  3788. NtfsFreeRestartTableIndex( &IrpContext->Vcb->TransactionTable,
  3789. IrpContext->TransactionId );
  3790. //
  3791. // Notify any waiters if there are no more transactions
  3792. //
  3793. if (IrpContext->Vcb->TransactionTable.Table->NumberAllocated == 0) {
  3794. KeSetEvent( &IrpContext->Vcb->TransactionsDoneEvent, 0, FALSE );
  3795. }
  3796. NtfsReleaseRestartTable( &IrpContext->Vcb->TransactionTable );
  3797. IrpContext->TransactionId = 0;
  3798. }
  3799. IrpContext->ExceptionStatus = STATUS_SUCCESS;
  3800. return;
  3801. }
  3802. //
  3803. // Local support routine
  3804. //
  3805. LONG
  3806. NtfsCatchOutOfMemoryExceptionFilter (
  3807. IN PIRP_CONTEXT IrpContext,
  3808. IN PEXCEPTION_POINTERS ExceptionPointer
  3809. )
  3810. /*++
  3811. Routine Description:
  3812. Exception filter for out of memory errors. This will swallow 0xC0000009A's and let
  3813. all other exceptions filter on
  3814. Arguments:
  3815. IrpContext - IrpContext
  3816. ExceptionPointer - Pointer to the exception context.
  3817. Return Value:
  3818. Exception status - EXCEPTION_CONTINUE_SEARCH if we want to raise to another handler,
  3819. EXCEPTION_EXECUTE_HANDLER if we plan to proceed on.
  3820. --*/
  3821. {
  3822. UNREFERENCED_PARAMETER( IrpContext );
  3823. if (ExceptionPointer->ExceptionRecord->ExceptionCode != STATUS_INSUFFICIENT_RESOURCES) {
  3824. return EXCEPTION_CONTINUE_SEARCH;
  3825. }
  3826. return EXCEPTION_EXECUTE_HANDLER;
  3827. }
  3828. //
  3829. // Local support routine
  3830. //
  3831. LONG
  3832. NtfsCheckpointExceptionFilter (
  3833. IN PIRP_CONTEXT IrpContext,
  3834. IN PEXCEPTION_POINTERS ExceptionPointer,
  3835. IN NTSTATUS ExceptionCode
  3836. )
  3837. {
  3838. //
  3839. // Swallow all expected errors if this is a dismount doing a log file full.
  3840. //
  3841. if ((FlagOn( IrpContext->State, IRP_CONTEXT_STATE_DISMOUNT_LOG_FLUSH )) &&
  3842. (FsRtlIsNtstatusExpected( ExceptionCode ))) {
  3843. return EXCEPTION_EXECUTE_HANDLER;
  3844. } else {
  3845. return EXCEPTION_CONTINUE_SEARCH;
  3846. }
  3847. UNREFERENCED_PARAMETER( ExceptionPointer );
  3848. }
  3849. VOID
  3850. NtfsFreeAttributeEntry (
  3851. IN PVCB Vcb,
  3852. IN POPEN_ATTRIBUTE_ENTRY AttributeEntry
  3853. )
  3854. /*++
  3855. Routine Description:
  3856. Free an attribute entry and all the connected entries in the other tables
  3857. + any memory associated with it
  3858. Arguments:
  3859. IrpContext - IrpContext
  3860. AttributeEntry - Entry to free
  3861. Return Value:
  3862. NONE
  3863. --*/
  3864. {
  3865. ULONG Index;
  3866. if (AttributeEntry->OatData->AttributeNamePresent) {
  3867. //
  3868. // Delete its name, if it has one. Check that we aren't
  3869. // using the hardcode $I30 name.
  3870. //
  3871. NtfsFreeScbAttributeName( AttributeEntry->OatData->AttributeName.Buffer );
  3872. } else if (AttributeEntry->OatData->Overlay.Scb != NULL) {
  3873. AttributeEntry->OatData->Overlay.Scb->NonpagedScb->OpenAttributeTableIndex =
  3874. AttributeEntry->OatData->Overlay.Scb->NonpagedScb->OnDiskOatIndex = 0;
  3875. }
  3876. //
  3877. // Get the index for the entry.
  3878. //
  3879. Index = GetIndexFromRestartEntry( &Vcb->OpenAttributeTable,
  3880. AttributeEntry );
  3881. if (Vcb->RestartVersion == 0) {
  3882. NtfsAcquireExclusiveRestartTable( Vcb->OnDiskOat, TRUE );
  3883. NtfsFreeRestartTableIndex( Vcb->OnDiskOat, AttributeEntry->OatData->OnDiskAttributeIndex );
  3884. NtfsReleaseRestartTable( Vcb->OnDiskOat );
  3885. }
  3886. NtfsFreeOpenAttributeData( AttributeEntry->OatData );
  3887. NtfsFreeRestartTableIndex( &Vcb->OpenAttributeTable, Index );
  3888. }
  3889. ULONG
  3890. NtfsCalculateNamedBytes (
  3891. IN PIRP_CONTEXT IrpContext,
  3892. IN PVCB Vcb
  3893. )
  3894. /*++
  3895. Routine Description:
  3896. Calculated number of named bytes necc. to hold all the open attributes
  3897. Arguments:
  3898. IrpContext - IrpContext
  3899. Vcb -
  3900. Return Value:
  3901. Number of bytes needed to write all the names of the open attributes
  3902. --*/
  3903. {
  3904. POPEN_ATTRIBUTE_ENTRY AttributeEntry;
  3905. ULONG NameBytes = 0;
  3906. //
  3907. // Loop to see how much we will have to allocate for attribute names.
  3908. //
  3909. AttributeEntry = NtfsGetFirstRestartTable( &Vcb->OpenAttributeTable );
  3910. while (AttributeEntry != NULL) {
  3911. //
  3912. // This checks for one type of aliasing.
  3913. //
  3914. // ASSERT( (AttributeEntry->Overlay.Scb == NULL) ||
  3915. // (AttributeEntry->Overlay.Scb->OpenAttributeTableIndex ==
  3916. // GetIndexFromRestartEntry( &Vcb->OpenAttributeTable,
  3917. // AttributeEntry )));
  3918. //
  3919. // Clear the DirtyPageSeen flag prior to collecting the dirty pages,
  3920. // to help us figure out which Open Attribute Entries we still need.
  3921. //
  3922. AttributeEntry->DirtyPagesSeen = FALSE;
  3923. if (AttributeEntry->OatData->AttributeName.Length != 0) {
  3924. //
  3925. // Add to our name total, the size of an Attribute Entry,
  3926. // which includes the size of the terminating UNICODE_NULL.
  3927. //
  3928. NameBytes += AttributeEntry->OatData->AttributeName.Length +
  3929. sizeof(ATTRIBUTE_NAME_ENTRY);
  3930. }
  3931. AttributeEntry = NtfsGetNextRestartTable( &Vcb->OpenAttributeTable,
  3932. AttributeEntry );
  3933. }
  3934. return NameBytes;
  3935. UNREFERENCED_PARAMETER( IrpContext );
  3936. }