Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4886 lines
141 KiB

  1. /*++
  2. Copyright (c) 1991 Microsoft Corporation
  3. Module Name:
  4. LogSup.c
  5. Abstract:
  6. This module implements the Ntfs interfaces to the Log File Service (LFS).
  7. Author:
  8. Tom Miller [TomM] 24-Jul-1991
  9. Revision History:
  10. --*/
  11. #include "NtfsProc.h"
  12. //
  13. // The local debug trace level
  14. //
  15. #define Dbg (DEBUG_TRACE_LOGSUP)
  16. //
  17. // Define a tag for general pool allocations from this module
  18. //
  19. #undef MODULE_POOL_TAG
  20. #define MODULE_POOL_TAG ('LFtN')
  21. #ifdef NTFSDBG
  22. #define ASSERT_RESTART_TABLE(T) { \
  23. PULONG _p = (PULONG)(((PCHAR)(T)) + sizeof(RESTART_TABLE)); \
  24. ULONG _Count = ((T)->EntrySize/4) * (T)->NumberEntries; \
  25. ULONG _i; \
  26. for (_i = 0; _i < _Count; _i += 1) { \
  27. if (_p[_i] == 0xDAADF00D) { \
  28. DbgPrint("DaadFood for table %08lx, At %08lx\n", (T), &_p[_i]); \
  29. ASSERTMSG("ASSERT_RESTART_TABLE ", FALSE); \
  30. } \
  31. } \
  32. }
  33. #else
  34. #define ASSERT_RESTART_TABLE(T) {NOTHING;}
  35. #endif
  36. //
  37. // Local procedure prototypes
  38. //
  39. typedef LCN UNALIGNED *PLCN_UNALIGNED;
  40. VOID
  41. DirtyPageRoutine (
  42. IN PFILE_OBJECT FileObject,
  43. IN PLARGE_INTEGER FileOffset,
  44. IN ULONG Length,
  45. IN PLSN OldestLsn,
  46. IN PLSN NewestLsn,
  47. IN PVOID Context1,
  48. IN PVOID Context2
  49. );
  50. BOOLEAN
  51. LookupLcns (
  52. IN PIRP_CONTEXT IrpContext,
  53. IN PSCB Scb,
  54. IN VCN Vcn,
  55. IN ULONG ClusterCount,
  56. IN BOOLEAN MustBeAllocated,
  57. OUT PLCN_UNALIGNED FirstLcn
  58. );
  59. LONG
  60. NtfsCatchOutOfMemoryExceptionFilter (
  61. IN PIRP_CONTEXT IrpContext,
  62. IN PEXCEPTION_POINTERS ExceptionPointer
  63. );
  64. LONG
  65. NtfsCheckpointExceptionFilter (
  66. IN PIRP_CONTEXT IrpContext,
  67. IN PEXCEPTION_POINTERS ExceptionPointer,
  68. IN NTSTATUS ExceptionCode
  69. );
  70. #ifdef ALLOC_PRAGMA
  71. #pragma alloc_text(PAGE, LookupLcns)
  72. #pragma alloc_text(PAGE, NtfsCheckpointCurrentTransaction)
  73. #pragma alloc_text(PAGE, NtfsCheckpointForLogFileFull)
  74. #pragma alloc_text(PAGE, NtfsCheckpointVolume)
  75. #pragma alloc_text(PAGE, NtfsCleanCheckpoint)
  76. #pragma alloc_text(PAGE, NtfsCleanupFailedTransaction)
  77. #pragma alloc_text(PAGE, NtfsCommitCurrentTransaction)
  78. #pragma alloc_text(PAGE, NtfsFreeRecentlyDeallocated)
  79. #pragma alloc_text(PAGE, NtfsFreeRestartTable)
  80. #pragma alloc_text(PAGE, NtfsGetFirstRestartTable)
  81. #pragma alloc_text(PAGE, NtfsGetNextRestartTable)
  82. #pragma alloc_text(PAGE, NtfsInitializeLogging)
  83. #pragma alloc_text(PAGE, NtfsInitializeRestartTable)
  84. #pragma alloc_text(PAGE, NtfsStartLogFile)
  85. #pragma alloc_text(PAGE, NtfsStopLogFile)
  86. #pragma alloc_text(PAGE, NtfsUpdateOatVersion)
  87. #pragma alloc_text(PAGE, NtfsWriteLog)
  88. #endif
  89. LSN
  90. NtfsWriteLog (
  91. IN PIRP_CONTEXT IrpContext,
  92. IN PSCB Scb,
  93. IN PBCB Bcb OPTIONAL,
  94. IN NTFS_LOG_OPERATION RedoOperation,
  95. IN PVOID RedoBuffer OPTIONAL,
  96. IN ULONG RedoLength,
  97. IN NTFS_LOG_OPERATION UndoOperation,
  98. IN PVOID UndoBuffer OPTIONAL,
  99. IN ULONG UndoLength,
  100. IN LONGLONG StreamOffset,
  101. IN ULONG RecordOffset,
  102. IN ULONG AttributeOffset,
  103. IN ULONG StructureSize
  104. )
  105. /*++
  106. Routine Description:
  107. This routine implements an Ntfs-specific interface to LFS for the
  108. purpose of logging updates to file record segments and resident
  109. attributes.
  110. The caller creates one of the predefined log record formats as
  111. determined by the given LogOperation, and calls this routine with
  112. this log record and pointers to the respective file and attribute
  113. records. The list of log operations along with the respective structure
  114. expected for the Log Buffer is present in ntfslog.h.
  115. Arguments:
  116. Scb - Pointer to the Scb for the respective file or Mft. The caller must
  117. have at least shared access to this Scb.
  118. Bcb - If specified, this Bcb will be set dirty specifying the Lsn of
  119. the log record written.
  120. RedoOperation - One of the log operation codes defined in ntfslog.h.
  121. RedoBuffer - A pointer to the structure expected for the given Redo operation,
  122. as summarized in ntfslog.h. This pointer should only be
  123. omitted if and only if the table in ntfslog.h does not show
  124. a log record for this log operation.
  125. RedoLength - Length of the Redo buffer in bytes.
  126. UndoOperation - One of the log operation codes defined in ntfslog.h.
  127. Must be CompensationLogRecord if logging the Undo of
  128. a previous operation, such as during transaction abort.
  129. In this case, of course, the Redo information is from
  130. the Undo information of the record being undone. See
  131. next parameter.
  132. UndoBuffer - A pointer to the structure expected for the given Undo operation,
  133. as summarized in ntfslog.h. This pointer should only be
  134. omitted if and only if the table in ntfslog.h does not show
  135. a log record for this log operation. If this pointer is
  136. identical to RedoBuffer, then UndoLength is ignored and
  137. only a single copy of the RedoBuffer is made, but described
  138. by both the Redo and Undo portions of the log record.
  139. For a compensation log record (UndoOperation ==
  140. CompensationLogRecord), this argument must point to the
  141. UndoNextLsn of the log record being compensated.
  142. UndoLength - Length of the Undo buffer in bytes. Ignored if RedoBuffer ==
  143. UndoBuffer.
  144. For a compensation log record, this argument must be the length
  145. of the original redo record. (Used during restart).
  146. StreamOffset - Offset within the stream for the start of the structure being
  147. modified (Mft or Index), or simply the stream offset for the start
  148. of the update.
  149. RecordOffset - Byte offset from StreamOffset above to update reference
  150. AttributeOffset - Offset within a value to which an update applies, if relevant.
  151. StructureSize - Size of the entire structure being logged.
  152. Return Value:
  153. The Lsn of the log record written. For most callers, this status may be ignored,
  154. because the Lsn is also correctly recorded in the transaction context.
  155. If an error occurs this procedure will raise.
  156. --*/
  157. {
  158. LFS_WRITE_ENTRY WriteEntries[3];
  159. struct {
  160. NTFS_LOG_RECORD_HEADER LogRecordHeader;
  161. LCN Runs[PAGE_SIZE/512 - 1];
  162. } LocalHeader;
  163. PNTFS_LOG_RECORD_HEADER MyHeader;
  164. PVCB Vcb;
  165. LSN UndoNextLsn;
  166. LSN ReturnLsn;
  167. PLSN DirtyLsn = NULL;
  168. ULONG WriteIndex = 0;
  169. ULONG UndoIndex = 0;
  170. ULONG RedoIndex = 0;
  171. LONG UndoBytes = 0;
  172. LONG UndoAdjustmentForLfs = 0;
  173. LONG UndoRecords = 0;
  174. PTRANSACTION_ENTRY TransactionEntry;
  175. ULONG OpenAttributeIndex = 0;
  176. ULONG OnDiskAttributeIndex = 0;
  177. POPEN_ATTRIBUTE_DATA AttributeData = NULL;
  178. BOOLEAN AttributeTableAcquired = FALSE;
  179. BOOLEAN TransactionTableAcquired = FALSE;
  180. ULONG LogClusterCount = ClustersFromBytes( Scb->Vcb, StructureSize );
  181. VCN LogVcn = LlClustersFromBytesTruncate( Scb->Vcb, StreamOffset );
  182. PAGED_CODE();
  183. Vcb = Scb->Vcb;
  184. //
  185. // If the log handle is gone, then we noop this call.
  186. //
  187. if (!FlagOn( Vcb->VcbState, VCB_STATE_VALID_LOG_HANDLE )) {
  188. return Li0; //**** LfsZeroLsn;
  189. }
  190. if (FlagOn( Vcb->VcbState, VCB_STATE_MOUNT_READ_ONLY )) {
  191. //
  192. // We'd like to have a chat with whoever sent the log write.
  193. //
  194. ASSERT(!FlagOn( Vcb->VcbState, VCB_STATE_MOUNT_READ_ONLY ));
  195. return Li0;
  196. }
  197. DebugTrace( +1, Dbg, ("NtfsWriteLog:\n") );
  198. DebugTrace( 0, Dbg, ("Scb = %08lx\n", Scb) );
  199. DebugTrace( 0, Dbg, ("Bcb = %08lx\n", Bcb) );
  200. DebugTrace( 0, Dbg, ("RedoOperation = %08lx\n", RedoOperation) );
  201. DebugTrace( 0, Dbg, ("RedoBuffer = %08lx\n", RedoBuffer) );
  202. DebugTrace( 0, Dbg, ("RedoLength = %08lx\n", RedoLength) );
  203. DebugTrace( 0, Dbg, ("UndoOperation = %08lx\n", UndoOperation) );
  204. DebugTrace( 0, Dbg, ("UndoBuffer = %08lx\n", UndoBuffer) );
  205. DebugTrace( 0, Dbg, ("UndoLength = %08lx\n", UndoLength) );
  206. DebugTrace( 0, Dbg, ("StreamOffset = %016I64x\n", StreamOffset) );
  207. DebugTrace( 0, Dbg, ("RecordOffset = %08lx\n", RecordOffset) );
  208. DebugTrace( 0, Dbg, ("AttributeOffset = %08lx\n", AttributeOffset) );
  209. DebugTrace( 0, Dbg, ("StructureSize = %08lx\n", StructureSize) );
  210. //
  211. // Check Redo and Undo lengths
  212. //
  213. ASSERT(((RedoOperation == UpdateNonresidentValue) && (RedoLength <= PAGE_SIZE))
  214. ||
  215. !ARGUMENT_PRESENT(Scb)
  216. ||
  217. !ARGUMENT_PRESENT(Bcb)
  218. ||
  219. ((Scb->AttributeTypeCode == $INDEX_ALLOCATION) &&
  220. (RedoLength <= Scb->ScbType.Index.BytesPerIndexBuffer))
  221. ||
  222. (RedoLength <= Scb->Vcb->BytesPerFileRecordSegment));
  223. ASSERT(((UndoOperation == UpdateNonresidentValue) && (UndoLength <= PAGE_SIZE))
  224. ||
  225. !ARGUMENT_PRESENT(Scb)
  226. ||
  227. !ARGUMENT_PRESENT(Bcb)
  228. ||
  229. ((Scb->AttributeTypeCode == $INDEX_ALLOCATION) &&
  230. (UndoLength <= Scb->ScbType.Index.BytesPerIndexBuffer))
  231. ||
  232. (UndoLength <= Scb->Vcb->BytesPerFileRecordSegment)
  233. ||
  234. (UndoOperation == CompensationLogRecord));
  235. //
  236. // Initialize local pointers.
  237. //
  238. MyHeader = (PNTFS_LOG_RECORD_HEADER)&LocalHeader;
  239. try {
  240. //
  241. // If the structure size is nonzero, then create an open attribute table
  242. // entry.
  243. //
  244. if (StructureSize != 0) {
  245. //
  246. // Allocate an entry in the open attribute table and initialize it,
  247. // if it does not already exist. If we subsequently fail, we do
  248. // not have to clean this up. It will go away on the next checkpoint.
  249. //
  250. if (Scb->NonpagedScb->OpenAttributeTableIndex == 0) {
  251. OPEN_ATTRIBUTE_ENTRY_V0 LocalOpenEntry;
  252. POPEN_ATTRIBUTE_ENTRY OpenAttributeEntry;
  253. POPEN_ATTRIBUTE_ENTRY_V0 OnDiskAttributeEntry;
  254. ULONG EntrySize;
  255. ASSERT( sizeof( OPEN_ATTRIBUTE_ENTRY_V0 ) >= sizeof( OPEN_ATTRIBUTE_ENTRY ));
  256. NtfsAcquireExclusiveRestartTable( &Vcb->OpenAttributeTable, TRUE );
  257. AttributeTableAcquired = TRUE;
  258. //
  259. // Only proceed if the OpenAttributeTableIndex is still 0.
  260. // We may reach this point for the MftScb. It may not be
  261. // acquired when logging changes to file records. We will
  262. // use the OpenAttributeTable for final synchronization
  263. // for the Mft open attribute table entry.
  264. //
  265. if (Scb->NonpagedScb->OpenAttributeTableIndex == 0) {
  266. //
  267. // Our structures require tables to stay within 64KB, since
  268. // we use USHORT offsets. Things are getting out of hand
  269. // at this point anyway. Raise log file full to reset the
  270. // table sizes if we get to this point.
  271. //
  272. if (SizeOfRestartTable( Vcb->OnDiskOat ) > 0xF000) {
  273. NtfsRaiseStatus( IrpContext, STATUS_LOG_FILE_FULL, NULL, NULL );
  274. }
  275. //
  276. // Allocate the indexes and then the Attribute data structure. The
  277. // try-finally will handle any failures.
  278. //
  279. OpenAttributeIndex = NtfsAllocateRestartTableIndex( &Vcb->OpenAttributeTable, TRUE );
  280. AttributeData = NtfsAllocatePool( PagedPool, sizeof( OPEN_ATTRIBUTE_DATA ) );
  281. OpenAttributeEntry = GetRestartEntryFromIndex( &Vcb->OpenAttributeTable,
  282. OpenAttributeIndex );
  283. //
  284. // Initialize the entry and auxiliary data.
  285. //
  286. if (Scb->AttributeTypeCode == $INDEX_ALLOCATION) {
  287. OpenAttributeEntry->BytesPerIndexBuffer = Scb->ScbType.Index.BytesPerIndexBuffer;
  288. } else {
  289. OpenAttributeEntry->BytesPerIndexBuffer = 0;
  290. }
  291. //
  292. // Its good enough to use the last lsn for the lsnofopenrecord
  293. // since we're serialized on create attributes within a file
  294. //
  295. OpenAttributeEntry->AttributeTypeCode = Scb->AttributeTypeCode;
  296. OpenAttributeEntry->FileReference = Scb->Fcb->FileReference;
  297. OpenAttributeEntry->LsnOfOpenRecord = LfsQueryLastLsn( Vcb->LogHandle );
  298. AttributeData->Overlay.Scb = Scb;
  299. AttributeData->AttributeName = Scb->AttributeName;
  300. AttributeData->AttributeNamePresent = FALSE;
  301. //
  302. // Use the open attribute entry as the default table entry.
  303. //
  304. Scb->NonpagedScb->OnDiskOatIndex = OpenAttributeIndex;
  305. //
  306. // If the on-disk structure is needed then get it now.
  307. //
  308. if (Vcb->RestartVersion == 0) {
  309. OnDiskAttributeIndex = NtfsAllocateRestartTableIndex( Vcb->OnDiskOat, TRUE );
  310. OnDiskAttributeEntry = GetRestartEntryFromIndex( Vcb->OnDiskOat,
  311. OnDiskAttributeIndex );
  312. OnDiskAttributeEntry->OatIndex = OpenAttributeIndex;
  313. OnDiskAttributeEntry->FileReference = Scb->Fcb->FileReference;
  314. OnDiskAttributeEntry->LsnOfOpenRecord.QuadPart = 0;
  315. OnDiskAttributeEntry->AttributeTypeCode = Scb->AttributeTypeCode;
  316. OnDiskAttributeEntry->BytesPerIndexBuffer = OpenAttributeEntry->BytesPerIndexBuffer;
  317. OnDiskAttributeEntry->LsnOfOpenRecord.QuadPart = OpenAttributeEntry->LsnOfOpenRecord.QuadPart;
  318. //
  319. // Use this new index.
  320. //
  321. Scb->NonpagedScb->OnDiskOatIndex = OnDiskAttributeIndex;
  322. //
  323. // We need to log this so store a copy in our local.
  324. //
  325. } else {
  326. OnDiskAttributeIndex = OpenAttributeIndex;
  327. }
  328. //
  329. // Now store the table indexes.
  330. //
  331. AttributeData->OnDiskAttributeIndex = OnDiskAttributeIndex;
  332. Scb->NonpagedScb->OpenAttributeTableIndex = OpenAttributeIndex;
  333. //
  334. // Now connect the attribute data to the table entry and the Vcb.
  335. //
  336. OpenAttributeEntry->OatData = AttributeData;
  337. InsertTailList( &Vcb->OpenAttributeData, &AttributeData->Links );
  338. RtlCopyMemory( &LocalOpenEntry,
  339. GetRestartEntryFromIndex( Vcb->OnDiskOat, OnDiskAttributeIndex ),
  340. EntrySize = Vcb->OnDiskOat->Table->EntrySize );
  341. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  342. AttributeTableAcquired = FALSE;
  343. OpenAttributeIndex = 0;
  344. //
  345. // Now log the new open attribute table entry before goin on,
  346. // to insure that the application of the caller's log record
  347. // will have the information he needs on the attribute. We will
  348. // use the Undo buffer to convey the attribute name. We will
  349. // not infinitely recurse, because now this Scb already has an
  350. // open attribute table index.
  351. //
  352. NtfsWriteLog( IrpContext,
  353. Scb,
  354. NULL,
  355. OpenNonresidentAttribute,
  356. &LocalOpenEntry,
  357. EntrySize,
  358. Noop,
  359. Scb->AttributeName.Length != 0 ?
  360. Scb->AttributeName.Buffer : NULL,
  361. Scb->AttributeName.Length,
  362. (LONGLONG)0,
  363. 0,
  364. 0,
  365. 0 );
  366. } else {
  367. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  368. AttributeTableAcquired = FALSE;
  369. }
  370. }
  371. }
  372. //
  373. // Allocate a transaction ID and initialize it, if it does not already exist.
  374. // If we subsequently fail, we clean it up when the current request is
  375. // completed.
  376. //
  377. if (IrpContext->TransactionId == 0) {
  378. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
  379. TransactionTableAcquired = TRUE;
  380. //
  381. // Our structures require tables to stay within 64KB, since
  382. // we use USHORT offsets. Things are getting out of hand
  383. // at this point anyway. Raise log file full to reset the
  384. // table sizes if we get to this point.
  385. //
  386. if (SizeOfRestartTable(&Vcb->TransactionTable) > 0xF000) {
  387. NtfsRaiseStatus( IrpContext, STATUS_LOG_FILE_FULL, NULL, NULL );
  388. }
  389. IrpContext->TransactionId =
  390. NtfsAllocateRestartTableIndex( &Vcb->TransactionTable, TRUE );
  391. ClearFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_WROTE_LOG );
  392. TransactionEntry = (PTRANSACTION_ENTRY)GetRestartEntryFromIndex(
  393. &Vcb->TransactionTable,
  394. IrpContext->TransactionId );
  395. TransactionEntry->TransactionState = TransactionActive;
  396. TransactionEntry->FirstLsn =
  397. TransactionEntry->PreviousLsn =
  398. TransactionEntry->UndoNextLsn = Li0; //**** LfsZeroLsn;
  399. //
  400. // Remember that we will need a commit record even if we abort
  401. // the transaction.
  402. //
  403. TransactionEntry->UndoBytes = QuadAlign( sizeof( NTFS_LOG_RECORD_HEADER ));
  404. TransactionEntry->UndoRecords = 1;
  405. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  406. TransactionTableAcquired = FALSE;
  407. //
  408. // Remember the space for the commit record in our Lfs adjustment.
  409. //
  410. UndoAdjustmentForLfs += QuadAlign( sizeof( NTFS_LOG_RECORD_HEADER ));
  411. //
  412. // If there is an undo operation for this log record, we reserve
  413. // the space for another Lfs log record.
  414. //
  415. if (UndoOperation != Noop) {
  416. UndoAdjustmentForLfs += Vcb->LogHeaderReservation;
  417. }
  418. }
  419. //
  420. // At least for now, assume update is contained in one physical page.
  421. //
  422. //ASSERT( (StructureSize == 0) || (StructureSize <= PAGE_SIZE) );
  423. //
  424. // If there isn't enough room for this structure on the stack, we
  425. // need to allocate an auxilary buffer.
  426. //
  427. if (LogClusterCount > (PAGE_SIZE / 512)) {
  428. MyHeader = (PNTFS_LOG_RECORD_HEADER)
  429. NtfsAllocatePool(PagedPool, sizeof( NTFS_LOG_RECORD_HEADER )
  430. + (LogClusterCount - 1) * sizeof( LCN ));
  431. }
  432. //
  433. // Now fill in the WriteEntries array and the log record header.
  434. //
  435. WriteEntries[0].Buffer = (PVOID)MyHeader;
  436. WriteEntries[0].ByteLength = sizeof(NTFS_LOG_RECORD_HEADER);
  437. WriteIndex += 1;
  438. //
  439. // Lookup the Runs for this log record
  440. //
  441. MyHeader->LcnsToFollow = (USHORT)LogClusterCount;
  442. if (LogClusterCount != 0) {
  443. if (!LookupLcns( IrpContext,
  444. Scb,
  445. LogVcn,
  446. LogClusterCount,
  447. TRUE,
  448. &MyHeader->LcnsForPage[0] )) {
  449. //
  450. // It is possible that the allocation for this range is not allocated.
  451. // This may happen in cases where a stream which descibes itself is
  452. // being hotfixed (perhaps MoveFile in a later release). In the
  453. // hotfix case we will not write this log record. Hotfix will mark
  454. // the volume dirty so we know that the system will verify the volume
  455. // at some point.
  456. //
  457. ASSERT( NtfsGetTopLevelHotFixScb() != NULL );
  458. //
  459. // Cleanup the transaction entry if allocated here.
  460. //
  461. if (!FlagOn( IrpContext->Flags, IRP_CONTEXT_FLAG_WROTE_LOG ) &&
  462. (IrpContext->TransactionId != 0)) {
  463. NtfsCleanupFailedTransaction( IrpContext );
  464. }
  465. ReturnLsn = LfsQueryLastLsn( Vcb->LogHandle );
  466. DirtyLsn = &ReturnLsn;
  467. leave;
  468. }
  469. WriteEntries[0].ByteLength += (LogClusterCount - 1) * sizeof(LCN);
  470. }
  471. //
  472. // If there is a Redo buffer, fill in its write entry.
  473. //
  474. if (RedoLength != 0) {
  475. WriteEntries[1].Buffer = RedoBuffer;
  476. WriteEntries[1].ByteLength = RedoLength;
  477. UndoIndex = RedoIndex = WriteIndex;
  478. WriteIndex += 1;
  479. }
  480. //
  481. // If there is an undo buffer, and it is at a different address than
  482. // the redo buffer, then fill in its write entry.
  483. //
  484. if ((RedoBuffer != UndoBuffer) && (UndoLength != 0) &&
  485. (UndoOperation != CompensationLogRecord)) {
  486. WriteEntries[WriteIndex].Buffer = UndoBuffer;
  487. WriteEntries[WriteIndex].ByteLength = UndoLength;
  488. UndoIndex = WriteIndex;
  489. WriteIndex += 1;
  490. }
  491. //
  492. // Now fill in the rest of the header. Assume Redo and Undo buffer is
  493. // the same, then fix them up if they are not.
  494. //
  495. MyHeader->RedoOperation = (USHORT)RedoOperation;
  496. MyHeader->UndoOperation = (USHORT)UndoOperation;
  497. MyHeader->RedoOffset = (USHORT)WriteEntries[0].ByteLength;
  498. MyHeader->RedoLength = (USHORT)RedoLength;
  499. MyHeader->UndoOffset = MyHeader->RedoOffset;
  500. if (RedoBuffer != UndoBuffer) {
  501. MyHeader->UndoOffset += (USHORT)QuadAlign(MyHeader->RedoLength);
  502. }
  503. MyHeader->UndoLength = (USHORT)UndoLength;
  504. MyHeader->TargetAttribute = (USHORT)Scb->NonpagedScb->OnDiskOatIndex;
  505. MyHeader->RecordOffset = (USHORT)RecordOffset;
  506. MyHeader->AttributeOffset = (USHORT)AttributeOffset;
  507. MyHeader->Reserved = 0;
  508. MyHeader->TargetVcn = LogVcn;
  509. MyHeader->ClusterBlockOffset = (USHORT) LogBlocksFromBytesTruncate( ClusterOffset( Vcb, StreamOffset ));
  510. //
  511. // Finally, get our current transaction entry and call Lfs. We acquire
  512. // the transaction table exclusive both to synchronize the Lsn updates
  513. // on return from Lfs, and also to mark the Bcb dirty before any more
  514. // log records are written.
  515. //
  516. // If we do not do serialize the LfsWrite and CcSetDirtyPinnedData, here is
  517. // what can happen:
  518. //
  519. // We log an update for a page and get an Lsn back
  520. //
  521. // Another thread writes a start of checkpoint record
  522. // This thread then collects all of the dirty pages at that time
  523. // Sometime it writes the dirty page table
  524. //
  525. // The former thread which had been preempted, now sets the Bcb dirty
  526. //
  527. // If we crash at this time, the page we updated is not in the dirty page
  528. // table of the checkpoint, and it its update record is also not seen since
  529. // it was written before the start of the checkpoint!
  530. //
  531. // Note however, since the page being updated is pinned and cannot be written,
  532. // updating the Lsn in the page may simply be considered part of the update.
  533. // Whoever is doing this update (to the Mft or an Index buffer), must have the
  534. // Mft or Index acquired exclusive anyway.
  535. //
  536. NtfsAcquireSharedStartExRestartTable( &Vcb->TransactionTable, TRUE );
  537. TransactionTableAcquired = TRUE;
  538. TransactionEntry = (PTRANSACTION_ENTRY)GetRestartEntryFromIndex(
  539. &Vcb->TransactionTable,
  540. IrpContext->TransactionId );
  541. //
  542. // Set up the UndoNextLsn. If this is a normal log record, then use
  543. // the UndoNextLsn stored in the transaction entry; otherwise, use
  544. // the one passed in as the Undo buffer.
  545. //
  546. if (UndoOperation != CompensationLogRecord) {
  547. UndoNextLsn = TransactionEntry->UndoNextLsn;
  548. //
  549. // If there is undo information, calculate the number to pass to Lfs
  550. // for undo bytes to reserve.
  551. //
  552. if (UndoOperation != Noop) {
  553. UndoBytes += QuadAlign(WriteEntries[0].ByteLength);
  554. if (UndoIndex != 0) {
  555. UndoBytes += QuadAlign(WriteEntries[UndoIndex].ByteLength);
  556. }
  557. UndoRecords += 1;
  558. }
  559. } else {
  560. UndoNextLsn = *(PLSN)UndoBuffer;
  561. //
  562. // We can reduce our Undo requirements, by the Redo data being
  563. // logged. This is either an abort record for a previous action
  564. // or a commit record. If it is a commit record we accounted
  565. // for it above on the first NtfsWriteLog, and NtfsCommitTransaction
  566. // will adjust for the rest.
  567. //
  568. if (!FlagOn( Vcb->VcbState, VCB_STATE_RESTART_IN_PROGRESS )) {
  569. UndoBytes -= QuadAlign(WriteEntries[0].ByteLength);
  570. if (RedoIndex != 0) {
  571. UndoBytes -= QuadAlign(WriteEntries[RedoIndex].ByteLength);
  572. }
  573. UndoRecords -= 1;
  574. }
  575. }
  576. #ifdef NTFS_LOG_FULL_TEST
  577. //
  578. // Perform log-file-full fail checking. We do not perform this check if
  579. // we are writing an undo record (since we are guaranteed space to undo
  580. // things).
  581. //
  582. if (UndoOperation != CompensationLogRecord &&
  583. (IrpContext->MajorFunction != IRP_MJ_FILE_SYSTEM_CONTROL ||
  584. IrpContext->MinorFunction != IRP_MN_MOUNT_VOLUME)) {
  585. LogFileFullFailCheck( IrpContext );
  586. if (NtfsFailFrequency != 0 &&
  587. (++NtfsPeriodicFail % NtfsFailFrequency) == 0) {
  588. ExRaiseStatus( STATUS_LOG_FILE_FULL );
  589. }
  590. }
  591. #endif
  592. //
  593. // Call Lfs to write the record.
  594. //
  595. LfsWrite( Vcb->LogHandle,
  596. WriteIndex,
  597. &WriteEntries[0],
  598. LfsClientRecord,
  599. &IrpContext->TransactionId,
  600. UndoNextLsn,
  601. TransactionEntry->PreviousLsn,
  602. UndoBytes + UndoAdjustmentForLfs,
  603. 0,
  604. &ReturnLsn );
  605. //
  606. // Now that we are successful, update the transaction entry appropriately.
  607. //
  608. TransactionEntry->UndoBytes += UndoBytes;
  609. TransactionEntry->UndoRecords += UndoRecords;
  610. TransactionEntry->PreviousLsn = ReturnLsn;
  611. //
  612. // The UndoNextLsn for the transaction depends on whether we are
  613. // doing a compensation log record or not.
  614. //
  615. if (UndoOperation != CompensationLogRecord) {
  616. TransactionEntry->UndoNextLsn = ReturnLsn;
  617. } else {
  618. TransactionEntry->UndoNextLsn = UndoNextLsn;
  619. }
  620. //
  621. // If this is the first Lsn, then we have to update that as
  622. // well.
  623. //
  624. if (TransactionEntry->FirstLsn.QuadPart == 0) {
  625. TransactionEntry->FirstLsn = ReturnLsn;
  626. }
  627. //
  628. // Set to use this Lsn when marking dirty below
  629. //
  630. DirtyLsn = &ReturnLsn;
  631. //
  632. // Set the flag in the Irp Context which indicates we wrote
  633. // a log record to disk.
  634. //
  635. SetFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_WROTE_LOG );
  636. } finally {
  637. DebugUnwind( NtfsWriteLog );
  638. //
  639. // Now set the Bcb dirty if specified. We want to set it no matter
  640. // what happens, because our caller has modified the buffer and is
  641. // counting on us to call the Cache Manager.
  642. //
  643. if (ARGUMENT_PRESENT( Bcb )) {
  644. TIMER_STATUS TimerStatus;
  645. CcSetDirtyPinnedData( Bcb, DirtyLsn );
  646. //
  647. // Synchronize with the checkpoint timer and other instances of this routine.
  648. //
  649. // Perform an interlocked exchange to indicate that a timer is being set.
  650. //
  651. // If the previous value indicates that no timer was set, then we
  652. // enable the volume checkpoint timer. This will guarantee that a checkpoint
  653. // will occur to flush out the dirty Bcb data.
  654. //
  655. // If the timer was set previously, then it is guaranteed that a checkpoint
  656. // will occur without this routine having to reenable the timer.
  657. //
  658. // If the timer and checkpoint occurred between the dirtying of the Bcb and
  659. // the setting of the timer status, then we will be queueing a single extra
  660. // checkpoint on a clean volume. This is not considered harmful.
  661. //
  662. //
  663. // Atomically set the timer status to indicate a timer is being set and
  664. // retrieve the previous value.
  665. //
  666. TimerStatus = InterlockedExchange( (PLONG)&NtfsData.TimerStatus, TIMER_SET );
  667. //
  668. // If the timer is not currently set then we must start the checkpoint timer
  669. // to make sure the above dirtying is flushed out.
  670. //
  671. if (TimerStatus == TIMER_NOT_SET) {
  672. LONGLONG FiveSecondsFromNow = -5*1000*1000*10;
  673. KeSetTimer( &NtfsData.VolumeCheckpointTimer,
  674. *(PLARGE_INTEGER)&FiveSecondsFromNow,
  675. &NtfsData.VolumeCheckpointDpc );
  676. }
  677. }
  678. if (TransactionTableAcquired) {
  679. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  680. }
  681. //
  682. // Lets cleanup any failed attempt to allocate an attribute entry.
  683. // We only need to check the OpenAttributeIndex if the operation
  684. // was successful.
  685. //
  686. if (OpenAttributeIndex != 0) {
  687. NtfsFreeRestartTableIndex( &Vcb->OpenAttributeTable, OpenAttributeIndex );
  688. if (AttributeData != NULL) {
  689. NtfsFreePool( AttributeData );
  690. }
  691. if (OnDiskAttributeIndex != 0) {
  692. NtfsFreeRestartTableIndex( Vcb->OnDiskOat, OnDiskAttributeIndex );
  693. }
  694. }
  695. if (AttributeTableAcquired) {
  696. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  697. }
  698. if (MyHeader != (PNTFS_LOG_RECORD_HEADER)&LocalHeader) {
  699. NtfsFreePool( MyHeader );
  700. }
  701. }
  702. DebugTrace( -1, Dbg, ("NtfsWriteLog -> %016I64x\n", ReturnLsn ) );
  703. return ReturnLsn;
  704. }
  705. VOID
  706. NtfsCheckpointVolume (
  707. IN PIRP_CONTEXT IrpContext,
  708. IN PVCB Vcb,
  709. IN BOOLEAN OwnsCheckpoint,
  710. IN BOOLEAN CleanVolume,
  711. IN BOOLEAN FlushVolume,
  712. IN ULONG LfsFlags,
  713. IN LSN LastKnownLsn
  714. )
  715. /*++
  716. Routine Description:
  717. This routine is called periodically to perform a checkpoint on the volume
  718. with respect to the log file. The checkpoint dumps a bunch of log file
  719. state information to the log file, and finally writes a summary of the
  720. dumped information in its Restart Area.
  721. This checkpoint dumps the following:
  722. Open Attribute Table
  723. (all of the attribute names for the Attribute Table)
  724. Dirty Pages Table
  725. Transaction Table
  726. Arguments:
  727. Vcb - Pointer to the Vcb on which the checkpoint is to occur.
  728. OwnsCheckpoint - TRUE if the caller has already taken steps to insure
  729. that he may proceed with the checkpointing. In this case we
  730. don't do any checks for other checkpoints and don't clear the
  731. checkpoint flag or notify any waiting checkpoint threads.
  732. CleanVolume - TRUE if the caller wishes to clean the volume before doing
  733. the checkpoint, or FALSE for a normal periodic checkpoint.
  734. FlushVolume - Applies only if CleanVolume is TRUE. This indicates if we should
  735. should flush the volume or only Lsn streams. Only the shutdown thread
  736. can do a clean and flush checkpoint and avoid deadlocks between
  737. pagingio and main resources.
  738. LfsFlags - flags to pass to lfs when writing the restart areas
  739. LastKnownLsn - Applies only if CleanVolume is TRUE. Only perform the
  740. clean checkpoint if this value is the same as the last restart area
  741. in the Vcb. This will prevent us from doing unecesary clean
  742. checkpoints.
  743. Return Value:
  744. None
  745. --*/
  746. {
  747. RESTART_AREA RestartArea;
  748. RESTART_POINTERS DirtyPages;
  749. RESTART_POINTERS Pointers;
  750. PRESTART_POINTERS NewTable = NULL;
  751. LSN BaseLsn;
  752. PATTRIBUTE_NAME_ENTRY NamesBuffer = NULL;
  753. PTRANSACTION_ENTRY TransactionEntry;
  754. BOOLEAN DirtyPageTableInitialized = FALSE;
  755. BOOLEAN OpenAttributeTableAcquired = FALSE;
  756. BOOLEAN TransactionTableAcquired = FALSE;
  757. LSN OldestDirtyPageLsn = Li0;
  758. BOOLEAN AcquireFiles = FALSE;
  759. BOOLEAN PostDefrag = FALSE;
  760. KPRIORITY PreviousPriority;
  761. BOOLEAN RestorePreviousPriority = FALSE;
  762. PSCB UsnJournal = NULL;
  763. LOGICAL LfsCleanShutdown = 0;
  764. PAGED_CODE();
  765. DebugTrace( +1, Dbg, ("NtfsCheckpointVolume:\n") );
  766. DebugTrace( 0, Dbg, ("Vcb = %08lx\n", Vcb) );
  767. //
  768. // No checkpointing on readonly volumes.
  769. //
  770. if (NtfsIsVolumeReadOnly( Vcb )) {
  771. return;
  772. }
  773. if (!OwnsCheckpoint) {
  774. //
  775. // Acquire the checkpoint event.
  776. //
  777. NtfsAcquireCheckpoint( IrpContext, Vcb );
  778. //
  779. // We will want to post a defrag if defragging is permitted and enabled
  780. // and we have begun the defrag operation or have excess mapping.
  781. // If the defrag hasn't been triggered then check the Mft free
  782. // space. We can skip defragging if a defrag operation is
  783. // currently active.
  784. //
  785. if (!CleanVolume &&
  786. (FlagOn( Vcb->MftDefragState,
  787. VCB_MFT_DEFRAG_PERMITTED | VCB_MFT_DEFRAG_ENABLED | VCB_MFT_DEFRAG_ACTIVE ) ==
  788. (VCB_MFT_DEFRAG_PERMITTED | VCB_MFT_DEFRAG_ENABLED))) {
  789. if (FlagOn( Vcb->MftDefragState,
  790. VCB_MFT_DEFRAG_TRIGGERED | VCB_MFT_DEFRAG_EXCESS_MAP )) {
  791. PostDefrag = TRUE;
  792. } else {
  793. NtfsCheckForDefrag( Vcb );
  794. if (FlagOn( Vcb->MftDefragState, VCB_MFT_DEFRAG_TRIGGERED )) {
  795. PostDefrag = TRUE;
  796. } else {
  797. ClearFlag( Vcb->MftDefragState, VCB_MFT_DEFRAG_ENABLED );
  798. }
  799. }
  800. }
  801. //
  802. // If a checkpoint is already active, we either have to get out,
  803. // or wait for it.
  804. //
  805. while (FlagOn( Vcb->CheckpointFlags, VCB_CHECKPOINT_SYNC_FLAGS )) {
  806. //
  807. // Release the checkpoint event because we cannot checkpoint now.
  808. //
  809. NtfsReleaseCheckpoint( IrpContext, Vcb );
  810. if (CleanVolume) {
  811. NtfsWaitOnCheckpointNotify( IrpContext, Vcb );
  812. NtfsAcquireCheckpoint( IrpContext, Vcb );
  813. } else {
  814. return;
  815. }
  816. }
  817. //
  818. // If the log file is gone then simply exit.
  819. //
  820. if (!FlagOn( Vcb->VcbState, VCB_STATE_VALID_LOG_HANDLE )) {
  821. NtfsReleaseCheckpoint( IrpContext, Vcb );
  822. return;
  823. }
  824. //
  825. // We now have the checkpoint event. Check if there is still
  826. // a need to perform the checkpoint.
  827. //
  828. if (CleanVolume &&
  829. (LastKnownLsn.QuadPart != Vcb->LastRestartArea.QuadPart)) {
  830. NtfsReleaseCheckpoint( IrpContext, Vcb );
  831. return;
  832. }
  833. SetFlag( Vcb->CheckpointFlags, VCB_CHECKPOINT_SYNC_FLAGS );
  834. NtfsResetCheckpointNotify( IrpContext, Vcb );
  835. NtfsReleaseCheckpoint( IrpContext, Vcb );
  836. //
  837. // If this is a clean volume checkpoint then boost the priority of
  838. // this thread.
  839. //
  840. if (CleanVolume) {
  841. PreviousPriority = KeSetPriorityThread( (PKTHREAD)PsGetCurrentThread(),
  842. LOW_REALTIME_PRIORITY );
  843. if (PreviousPriority != LOW_REALTIME_PRIORITY) {
  844. RestorePreviousPriority = TRUE;
  845. }
  846. }
  847. }
  848. RtlZeroMemory( &RestartArea, sizeof(RESTART_AREA) );
  849. RtlZeroMemory( &DirtyPages, sizeof(RESTART_POINTERS) );
  850. //
  851. // Remember if our caller wants to tell Lfs that this is a
  852. // clean shutdown. We will use the combination of the OwnsCheckpoint and
  853. // CleanCheckpoint flags. This will cover system shutdown and volume
  854. // snapshot cases. Both of these want the volume not to need any restart.
  855. //
  856. if (OwnsCheckpoint && CleanVolume) {
  857. LfsCleanShutdown = TRUE;
  858. }
  859. //
  860. // Insure cleanup on the way out
  861. //
  862. try {
  863. POPEN_ATTRIBUTE_ENTRY AttributeEntry;
  864. ULONG NameBytes = 0;
  865. //
  866. // Now remember the current "last Lsn" value as the start of
  867. // our checkpoint. We acquire the transaction table to capture
  868. // this value to synchronize with threads who are writing log
  869. // records and setting pages dirty as atomic actions.
  870. //
  871. ASSERT( IrpContext->TransactionId == 0 );
  872. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
  873. //
  874. // If LfsFlags == LFS_WRITE_FLAG_WRITE_AT_FRONT then produce
  875. // the dummy log record that resets the log. This allows us to
  876. // keep the log in use only at the front so chkdsk can shrink it
  877. //
  878. if (FlagOn( LfsFlags, LFS_WRITE_FLAG_WRITE_AT_FRONT )) {
  879. LSN Lsn;
  880. LFS_WRITE_ENTRY WriteEntry;
  881. UCHAR Buffer[ sizeof( NTFS_LOG_RECORD_HEADER ) + 2 * sizeof( LSN )];
  882. TRANSACTION_ID TransactionId;
  883. RtlZeroMemory( &Buffer, sizeof( Buffer ) );
  884. WriteEntry.Buffer = Buffer;
  885. WriteEntry.ByteLength = sizeof( Buffer );
  886. TransactionId = NtfsAllocateRestartTableIndex( &Vcb->TransactionTable, TRUE );
  887. Lsn.QuadPart = 0;
  888. LfsGetActiveLsnRange( Vcb->LogHandle,
  889. Add2Ptr( Buffer, sizeof( NTFS_LOG_RECORD_HEADER )),
  890. Add2Ptr( Buffer, sizeof( NTFS_LOG_RECORD_HEADER ) + sizeof( LSN )) );
  891. LfsWrite( Vcb->LogHandle,
  892. 1,
  893. &WriteEntry,
  894. LfsClientRecord,
  895. &TransactionId,
  896. Lsn,
  897. Lsn,
  898. 0,
  899. LfsFlags,
  900. &Lsn );
  901. NtfsFreeRestartTableIndex( &Vcb->TransactionTable, TransactionId );
  902. }
  903. BaseLsn =
  904. RestartArea.StartOfCheckpoint = LfsQueryLastLsn( Vcb->LogHandle );
  905. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  906. // ASSERT( (RestartArea.StartOfCheckpoint.QuadPart != 0) ||
  907. // FlagOn(Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_CLEAN) );
  908. //
  909. // If the last checkpoint was completely clean, and no one has
  910. // written to the log since then, we can just return.
  911. //
  912. if (FlagOn( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_CLEAN )
  913. &&
  914. (RestartArea.StartOfCheckpoint.QuadPart == Vcb->EndOfLastCheckpoint.QuadPart)
  915. &&
  916. !CleanVolume) {
  917. //
  918. // Let's take this opportunity to shrink the Open Attribute and Transaction
  919. // table back if they have gotten large.
  920. //
  921. //
  922. // First the Open Attribute Table
  923. //
  924. NtfsAcquireExclusiveRestartTable( &Vcb->OpenAttributeTable, TRUE );
  925. OpenAttributeTableAcquired = TRUE;
  926. if (IsRestartTableEmpty(&Vcb->OpenAttributeTable)
  927. &&
  928. (Vcb->OpenAttributeTable.Table->NumberEntries >
  929. HIGHWATER_ATTRIBUTE_COUNT)) {
  930. //
  931. // Initialize first in case we get an allocation failure.
  932. //
  933. InitializeNewTable( sizeof( OPEN_ATTRIBUTE_ENTRY ),
  934. INITIAL_NUMBER_ATTRIBUTES,
  935. &Pointers );
  936. NtfsFreePool( Vcb->OpenAttributeTable.Table );
  937. Vcb->OpenAttributeTable.Table = Pointers.Table;
  938. //
  939. // Also reinitialize the OnDisk table if different.
  940. //
  941. if (Vcb->OnDiskOat != &Vcb->OpenAttributeTable) {
  942. //
  943. // Initialize first in case we get an allocation failure.
  944. //
  945. InitializeNewTable( sizeof( OPEN_ATTRIBUTE_ENTRY_V0 ),
  946. INITIAL_NUMBER_ATTRIBUTES,
  947. &Pointers );
  948. NtfsFreePool( Vcb->OnDiskOat->Table );
  949. Vcb->OnDiskOat->Table = Pointers.Table;
  950. }
  951. }
  952. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  953. OpenAttributeTableAcquired = FALSE;
  954. //
  955. // Now check the transaction table (freeing in the finally clause).
  956. //
  957. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
  958. TransactionTableAcquired = TRUE;
  959. if (IsRestartTableEmpty(&Vcb->TransactionTable)
  960. &&
  961. (Vcb->TransactionTable.Table->NumberEntries >
  962. HIGHWATER_TRANSACTION_COUNT)) {
  963. //
  964. // Initialize first in case we get an allocation failure.
  965. //
  966. InitializeNewTable( sizeof(TRANSACTION_ENTRY),
  967. INITIAL_NUMBER_TRANSACTIONS,
  968. &Pointers );
  969. NtfsFreePool( Vcb->TransactionTable.Table );
  970. Vcb->TransactionTable.Table = Pointers.Table;
  971. }
  972. try_return( NOTHING );
  973. }
  974. //
  975. // Flush any dangling dirty pages from before the last restart.
  976. // Note that it is arbitrary what Lsn we flush to here, and, in fact,
  977. // it is not absolutely required that we flush anywhere at all - we
  978. // could actually rely on the Lazy Writer. All we are trying to do
  979. // is reduce the amount of work that we will have to do at Restart,
  980. // by not forcing ourselves to have to go too far back in the log.
  981. // Presumably this can only happen for some reason the system is
  982. // starting to produce dirty pages faster than the lazy writer is
  983. // writing them.
  984. //
  985. // (We may wish to play with taking this call out...)
  986. //
  987. // This may be an appropriate place to worry about this, but, then
  988. // again, the Lazy Writer is using (currently) five threads. It may
  989. // not be appropriate to hold up this one thread doing the checkpoint
  990. // if the Lazy Writer is getting behind. How many dirty pages we
  991. // can even have is limited by the size of memory, so if the log file
  992. // is large enough, this may not be an issue. It seems kind of nice
  993. // to just let the Lazy Writer keep writing dirty pages as he does
  994. // now.
  995. //
  996. // if (!FlagOn(Vcb->VcbState, VCB_STATE_LAST_CHECKPOINT_CLEAN)) {
  997. // CcFlushPagesToLsn( Vcb->LogHandle, &Vcb->LastRestartArea );
  998. // }
  999. //
  1000. //
  1001. // Now we must clean the volume here if that is what the caller wants.
  1002. //
  1003. if (CleanVolume) {
  1004. NtfsCleanCheckpoints += 1;
  1005. //
  1006. // Lock down the volume if this is a clean checkpoint.
  1007. //
  1008. NtfsAcquireAllFiles( IrpContext, Vcb, FlushVolume, FALSE, FALSE );
  1009. #ifdef NTFSDBG
  1010. ASSERT( !FlagOn( IrpContext->State, IRP_CONTEXT_STATE_CHECKPOINT_ACTIVE ));
  1011. DebugDoit( SetFlag( IrpContext->State, IRP_CONTEXT_STATE_CHECKPOINT_ACTIVE ));
  1012. #endif // NTFSDBG
  1013. AcquireFiles = TRUE;
  1014. //
  1015. // Mark the fact that we are doing a clean checkpoint at this time.
  1016. //
  1017. Vcb->CleanCheckpointMark = Vcb->LogFileFullCount;
  1018. //
  1019. // It isn't safe to checkpoint a dismounted volume, and
  1020. // it doesn't make much sense, either.
  1021. //
  1022. if (!FlagOn( Vcb->VcbState, VCB_STATE_VOLUME_MOUNTED )) {
  1023. try_return( NOTHING );
  1024. }
  1025. //
  1026. // Now we will acquire the Open Attribute Table exclusive to delete
  1027. // all of the entries, since we want to write a clean checkpoint.
  1028. // This is OK, since we have the global resource and nothing else
  1029. // can be going on. (Similarly we are writing an empty transaction
  1030. // table, while in fact we will be the only transaction, but there
  1031. // is no need to capture our guy, nor explicitly empty this table.)
  1032. //
  1033. NtfsAcquireExclusiveRestartTable( &Vcb->OpenAttributeTable, TRUE );
  1034. OpenAttributeTableAcquired = TRUE;
  1035. //
  1036. // First reclaim the page we have reserved in the undo total, to
  1037. // guarantee that we can flush the log file.
  1038. //
  1039. LfsResetUndoTotal( Vcb->LogHandle, 1, -(LONG)(2 * PAGE_SIZE) );
  1040. if (FlushVolume) {
  1041. (VOID)NtfsFlushVolume( IrpContext, Vcb, TRUE, FALSE, FALSE, FALSE );
  1042. } else {
  1043. NtfsFlushLsnStreams( Vcb );
  1044. }
  1045. SetFlag( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_CLEAN );
  1046. //
  1047. // Loop through to deallocate all of the open attribute entries. Any
  1048. // that point to an Scb need to get the index in the Scb zeroed. If
  1049. // they do not point to an Scb, we have to see if there is a name to
  1050. // free.
  1051. //
  1052. AttributeEntry = NtfsGetFirstRestartTable( &Vcb->OpenAttributeTable );
  1053. while (AttributeEntry != NULL) {
  1054. ULONG Index;
  1055. if (AttributeEntry->OatData->Overlay.Scb != NULL) {
  1056. AttributeEntry->OatData->Overlay.Scb->NonpagedScb->OpenAttributeTableIndex =
  1057. AttributeEntry->OatData->Overlay.Scb->NonpagedScb->OnDiskOatIndex = 0;
  1058. } else {
  1059. //
  1060. // Delete its name, if it has one. Check that we aren't
  1061. // using the hardcode $I30 name.
  1062. //
  1063. NtfsFreeScbAttributeName( AttributeEntry->OatData->AttributeName.Buffer );
  1064. }
  1065. //
  1066. // Get the index for the entry.
  1067. //
  1068. Index = GetIndexFromRestartEntry( &Vcb->OpenAttributeTable,
  1069. AttributeEntry );
  1070. if (Vcb->RestartVersion == 0) {
  1071. NtfsFreeRestartTableIndex( Vcb->OnDiskOat, AttributeEntry->OatData->OnDiskAttributeIndex );
  1072. }
  1073. NtfsFreeOpenAttributeData( AttributeEntry->OatData );
  1074. NtfsFreeRestartTableIndex( &Vcb->OpenAttributeTable, Index );
  1075. AttributeEntry = NtfsGetNextRestartTable( &Vcb->OpenAttributeTable,
  1076. AttributeEntry );
  1077. }
  1078. //
  1079. // In a rare reuse path there may still be entries in the open attribute data
  1080. // list. This can happen when we reuse a slot in the open attribute table
  1081. // during restart.
  1082. //
  1083. NtfsFreeAllOpenAttributeData( Vcb );
  1084. //
  1085. // Initialize first in case we get an allocation failure.
  1086. //
  1087. ASSERT( IsRestartTableEmpty( &Vcb->OpenAttributeTable ));
  1088. ASSERT( IsListEmpty( &Vcb->OpenAttributeData ));
  1089. InitializeNewTable( sizeof( OPEN_ATTRIBUTE_ENTRY ),
  1090. INITIAL_NUMBER_ATTRIBUTES,
  1091. &Pointers );
  1092. NtfsFreePool( Vcb->OpenAttributeTable.Table );
  1093. Vcb->OpenAttributeTable.Table = Pointers.Table;
  1094. //
  1095. // Since we are doing a clean checkpoint we may be able to discard the
  1096. // second open attribute table. We have three cases to consider.
  1097. //
  1098. // 1 - We want to use Version 0 on-disk but currently aren't.
  1099. // 2 - We are currently using Version 0 but can free some space.
  1100. // 3 - We are currently using Version 0 but don't want to.
  1101. //
  1102. if (NtfsDefaultRestartVersion != Vcb->RestartVersion) {
  1103. NtfsUpdateOatVersion( Vcb, NtfsDefaultRestartVersion );
  1104. } else if (NtfsDefaultRestartVersion == 0) {
  1105. InitializeNewTable( sizeof( OPEN_ATTRIBUTE_ENTRY_V0 ),
  1106. INITIAL_NUMBER_ATTRIBUTES,
  1107. &Pointers );
  1108. NtfsFreePool( Vcb->OnDiskOat->Table );
  1109. Vcb->OnDiskOat->Table = Pointers.Table;
  1110. }
  1111. //
  1112. // Initialize first in case we get an allocation failure.
  1113. // Make sure we commit the current transaction.
  1114. //
  1115. NtfsCommitCurrentTransaction( IrpContext );
  1116. ASSERT(IsRestartTableEmpty(&Vcb->TransactionTable));
  1117. InitializeNewTable( sizeof(TRANSACTION_ENTRY),
  1118. INITIAL_NUMBER_TRANSACTIONS,
  1119. &Pointers );
  1120. NtfsFreePool( Vcb->TransactionTable.Table );
  1121. Vcb->TransactionTable.Table = Pointers.Table;
  1122. //
  1123. // Make sure we do not process any log file before the restart
  1124. // area, because we did not dump the open attribute table.
  1125. //
  1126. RestartArea.StartOfCheckpoint = LfsQueryLastLsn( Vcb->LogHandle );
  1127. //
  1128. // More work to do if this is not a clean checkpoint.
  1129. //
  1130. } else {
  1131. PDIRTY_PAGE_ENTRY DirtyPage;
  1132. POPEN_ATTRIBUTE_ENTRY OpenEntry;
  1133. ULONG JustMe = 0;
  1134. ULONG TempCount;
  1135. //
  1136. // Now we construct the dirty page table by calling the Cache Manager.
  1137. // For each dirty page on files tagged with our log handle, he will
  1138. // call us back at our DirtyPageRoutine. We will allocate the initial
  1139. // Dirty Page Table, but we will let the call back routine grow it as
  1140. // necessary.
  1141. //
  1142. NtfsInitializeRestartTable( (((Vcb->RestartVersion == 0) ?
  1143. sizeof( DIRTY_PAGE_ENTRY_V0 ) :
  1144. sizeof( DIRTY_PAGE_ENTRY )) +
  1145. ((Vcb->ClustersPerPage - 1) * sizeof(LCN))),
  1146. Vcb->DirtyPageTableSizeHint,
  1147. &DirtyPages );
  1148. NtfsAcquireExclusiveRestartTable( &DirtyPages, TRUE );
  1149. DirtyPageTableInitialized = TRUE;
  1150. //
  1151. // Now we will acquire the Open Attribute Table shared to freeze changes.
  1152. //
  1153. NtfsAcquireExclusiveRestartTable( &Vcb->OpenAttributeTable, TRUE );
  1154. OpenAttributeTableAcquired = TRUE;
  1155. //
  1156. // Loop to see how much we will have to allocate for attribute names.
  1157. //
  1158. AttributeEntry = NtfsGetFirstRestartTable( &Vcb->OpenAttributeTable );
  1159. while (AttributeEntry != NULL) {
  1160. //
  1161. // This checks for one type of aliasing.
  1162. //
  1163. // ASSERT( (AttributeEntry->Overlay.Scb == NULL) ||
  1164. // (AttributeEntry->Overlay.Scb->OpenAttributeTableIndex ==
  1165. // GetIndexFromRestartEntry( &Vcb->OpenAttributeTable,
  1166. // AttributeEntry )));
  1167. //
  1168. // Clear the DirtyPageSeen flag prior to collecting the dirty pages,
  1169. // to help us figure out which Open Attribute Entries we still need.
  1170. //
  1171. AttributeEntry->DirtyPagesSeen = FALSE;
  1172. if (AttributeEntry->OatData->AttributeName.Length != 0) {
  1173. //
  1174. // Add to our name total, the size of an Attribute Entry,
  1175. // which includes the size of the terminating UNICODE_NULL.
  1176. //
  1177. NameBytes += AttributeEntry->OatData->AttributeName.Length +
  1178. sizeof(ATTRIBUTE_NAME_ENTRY);
  1179. }
  1180. AttributeEntry = NtfsGetNextRestartTable( &Vcb->OpenAttributeTable,
  1181. AttributeEntry );
  1182. }
  1183. //
  1184. // Now call the Cache Manager to give us all of our dirty pages
  1185. // via the DirtyPageRoutine callback, and remember what the oldest
  1186. // Lsn is for a dirty page.
  1187. //
  1188. OldestDirtyPageLsn = CcGetDirtyPages( Vcb->LogHandle,
  1189. &DirtyPageRoutine,
  1190. (PVOID)IrpContext,
  1191. (PVOID)&DirtyPages );
  1192. TempCount = DirtyPages.Table->NumberAllocated;
  1193. Vcb->DirtyPageTableSizeHint = (TempCount & ~(INITIAL_DIRTY_TABLE_HINT - 1)) + INITIAL_DIRTY_TABLE_HINT;
  1194. if (OldestDirtyPageLsn.QuadPart != 0 &&
  1195. OldestDirtyPageLsn.QuadPart < Vcb->LastBaseLsn.QuadPart) {
  1196. OldestDirtyPageLsn = Vcb->LastBaseLsn;
  1197. }
  1198. //
  1199. // Now loop through the dirty page table to extract all of the Vcn/Lcn
  1200. // Mapping that we have, and insert it into the appropriate Scb.
  1201. //
  1202. DirtyPage = NtfsGetFirstRestartTable( &DirtyPages );
  1203. //
  1204. // The dirty page routine is called while holding spin locks,
  1205. // so it cannot take page faults. Thus we must scan the dirty
  1206. // page table we just built and fill in the Lcns here.
  1207. //
  1208. while (DirtyPage != NULL) {
  1209. PSCB Scb;
  1210. //
  1211. // If we have Lcn's then look them up.
  1212. //
  1213. if (DirtyPage->LengthOfTransfer != 0) {
  1214. VCN Vcn;
  1215. PLCN LcnArray;
  1216. //
  1217. // Get the in-memory AttributeEntry from the dirty page entry.
  1218. // Then update the dirty page entry with the on-disk TargetAttribute.
  1219. // Also mark the pages dirty now.
  1220. //
  1221. OpenEntry = GetRestartEntryFromIndex( &Vcb->OpenAttributeTable,
  1222. DirtyPage->TargetAttribute );
  1223. OpenEntry->DirtyPagesSeen = TRUE;
  1224. DirtyPage->TargetAttribute = OpenEntry->OatData->OnDiskAttributeIndex;
  1225. ASSERT( IsRestartTableEntryAllocated( OpenEntry ));
  1226. Scb = OpenEntry->OatData->Overlay.Scb;
  1227. //
  1228. // Account for UsnJournal biasing if necc.
  1229. // note at this point the vcn is actually still a byte offset
  1230. //
  1231. if (Scb == Vcb->UsnJournal) {
  1232. if (Vcb->RestartVersion == 0 ) {
  1233. ((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->Vcn = ((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->Vcn + Vcb->UsnCacheBias;
  1234. } else {
  1235. DirtyPage->Vcn = DirtyPage->Vcn + Vcb->UsnCacheBias;
  1236. }
  1237. }
  1238. //
  1239. // Fix up the count of Lcns.
  1240. //
  1241. DirtyPage->LcnsToFollow = ClustersFromBytes( Vcb, DirtyPage->LengthOfTransfer );
  1242. //
  1243. // Now fix up the page entry to account for the differences in the
  1244. // restart version structures and also make sure we don't have
  1245. // an Lsn which precedes our current base Lsn.
  1246. //
  1247. if (Vcb->RestartVersion == 0) {
  1248. ((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->Reserved = 0;
  1249. if (((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->OldestLsn.QuadPart < Vcb->LastBaseLsn.QuadPart) {
  1250. ((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->OldestLsn.QuadPart = Vcb->LastBaseLsn.QuadPart;
  1251. }
  1252. Vcn = ((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->Vcn;
  1253. Vcn = Int64ShraMod32( Vcn, Vcb->ClusterShift );
  1254. ((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->Vcn = Vcn;
  1255. LcnArray = &((PDIRTY_PAGE_ENTRY_V0) DirtyPage)->LcnsForPage[0];
  1256. } else {
  1257. if (DirtyPage->OldestLsn.QuadPart < Vcb->LastBaseLsn.QuadPart) {
  1258. DirtyPage->OldestLsn.QuadPart = Vcb->LastBaseLsn.QuadPart;
  1259. }
  1260. DirtyPage->Vcn = Vcn = Int64ShraMod32( DirtyPage->Vcn, Vcb->ClusterShift );
  1261. LcnArray = &DirtyPage->LcnsForPage[0];
  1262. }
  1263. LookupLcns( IrpContext,
  1264. Scb,
  1265. Vcn,
  1266. DirtyPage->LcnsToFollow,
  1267. FALSE,
  1268. LcnArray );
  1269. //
  1270. // Otherwise free this dirty page entry.
  1271. //
  1272. } else {
  1273. NtfsFreeRestartTableIndex( &DirtyPages,
  1274. GetIndexFromRestartEntry( &DirtyPages,
  1275. DirtyPage ));
  1276. }
  1277. //
  1278. // Point to next entry in table, or NULL.
  1279. //
  1280. DirtyPage = NtfsGetNextRestartTable( &DirtyPages, DirtyPage );
  1281. }
  1282. //
  1283. // If there were any names, then allocate space for them and copy
  1284. // them out.
  1285. //
  1286. if (NameBytes != 0) {
  1287. PATTRIBUTE_NAME_ENTRY Name;
  1288. //
  1289. // Allocate the buffer, with space for two terminating 0's on
  1290. // the end.
  1291. //
  1292. NameBytes += 4;
  1293. Name =
  1294. NamesBuffer = NtfsAllocatePool( NonPagedPool, NameBytes );
  1295. //
  1296. // Now loop to copy the names.
  1297. //
  1298. AttributeEntry = NtfsGetFirstRestartTable( &Vcb->OpenAttributeTable );
  1299. while (AttributeEntry != NULL) {
  1300. //
  1301. // Free the Open Attribute Entry if there were no
  1302. // dirty pages and the Scb is gone. This is the only
  1303. // place they are deleted. (Yes, I know we allocated
  1304. // space for its name, but I didn't want to make three
  1305. // passes through the open attribute table. Permeter
  1306. // is running as we speak, and showing 407 open files
  1307. // on NT/IDW5.)
  1308. //
  1309. if (!AttributeEntry->DirtyPagesSeen
  1310. &&
  1311. (AttributeEntry->OatData->Overlay.Scb == NULL)) {
  1312. ULONG Index;
  1313. //
  1314. // Get the index for the entry.
  1315. //
  1316. Index = GetIndexFromRestartEntry( &Vcb->OpenAttributeTable,
  1317. AttributeEntry );
  1318. //
  1319. // Delete its name and free it up.
  1320. //
  1321. NtfsFreeScbAttributeName( AttributeEntry->OatData->AttributeName.Buffer );
  1322. if (Vcb->RestartVersion == 0) {
  1323. NtfsFreeRestartTableIndex( Vcb->OnDiskOat,
  1324. AttributeEntry->OatData->OnDiskAttributeIndex );
  1325. }
  1326. NtfsFreeOpenAttributeData( AttributeEntry->OatData );
  1327. NtfsFreeRestartTableIndex( &Vcb->OpenAttributeTable,
  1328. Index );
  1329. //
  1330. // Otherwise, if we are not deleting it, we have to
  1331. // copy its name into the buffer we allocated.
  1332. //
  1333. } else if (AttributeEntry->OatData->AttributeName.Length != 0) {
  1334. //
  1335. // Prefix each name in the buffer with the attribute index
  1336. // and name length. Be sure to use the index that will
  1337. // be on-disk.
  1338. //
  1339. Name->Index = (USHORT) AttributeEntry->OatData->OnDiskAttributeIndex;
  1340. Name->NameLength = AttributeEntry->OatData->AttributeName.Length;
  1341. RtlCopyMemory( &Name->Name[0],
  1342. AttributeEntry->OatData->AttributeName.Buffer,
  1343. AttributeEntry->OatData->AttributeName.Length );
  1344. Name->Name[Name->NameLength / sizeof( WCHAR )] = 0;
  1345. Name = (PATTRIBUTE_NAME_ENTRY)((PCHAR)Name +
  1346. sizeof(ATTRIBUTE_NAME_ENTRY) +
  1347. Name->NameLength);
  1348. ASSERT( (PCHAR)Name <= ((PCHAR)NamesBuffer + NameBytes - 4) );
  1349. }
  1350. AttributeEntry = NtfsGetNextRestartTable( &Vcb->OpenAttributeTable,
  1351. AttributeEntry );
  1352. }
  1353. //
  1354. // Terminate the Names Buffer.
  1355. //
  1356. Name->Index = 0;
  1357. Name->NameLength = 0;
  1358. }
  1359. //
  1360. // Now write all of the non-empty tables to the log.
  1361. //
  1362. //
  1363. // Write the Open Attribute Table
  1364. //
  1365. // Make sure the tables are in sync.
  1366. //
  1367. ASSERT( (IsRestartTableEmpty( Vcb->OnDiskOat ) && IsRestartTableEmpty( &Vcb->OpenAttributeTable )) ||
  1368. (!IsRestartTableEmpty( Vcb->OnDiskOat ) && !IsRestartTableEmpty( &Vcb->OpenAttributeTable )));
  1369. if (!IsRestartTableEmpty( Vcb->OnDiskOat )) {
  1370. RestartArea.OpenAttributeTableLsn =
  1371. NtfsWriteLog( IrpContext,
  1372. Vcb->MftScb,
  1373. NULL,
  1374. OpenAttributeTableDump,
  1375. Vcb->OnDiskOat->Table,
  1376. SizeOfRestartTable( Vcb->OnDiskOat ),
  1377. Noop,
  1378. NULL,
  1379. 0,
  1380. (LONGLONG)0,
  1381. 0,
  1382. 0,
  1383. 0 );
  1384. RestartArea.OpenAttributeTableLength =
  1385. SizeOfRestartTable( Vcb->OnDiskOat );
  1386. JustMe = 1;
  1387. }
  1388. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  1389. OpenAttributeTableAcquired = FALSE;
  1390. //
  1391. // Write the Open Attribute Names
  1392. //
  1393. if (NameBytes != 0) {
  1394. RestartArea.AttributeNamesLsn =
  1395. NtfsWriteLog( IrpContext,
  1396. Vcb->MftScb,
  1397. NULL,
  1398. AttributeNamesDump,
  1399. NamesBuffer,
  1400. NameBytes,
  1401. Noop,
  1402. NULL,
  1403. 0,
  1404. (LONGLONG)0,
  1405. 0,
  1406. 0,
  1407. 0 );
  1408. RestartArea.AttributeNamesLength = NameBytes;
  1409. JustMe = 1;
  1410. }
  1411. //
  1412. // Write the Dirty Page Table
  1413. //
  1414. if (!IsRestartTableEmpty(&DirtyPages)) {
  1415. RestartArea.DirtyPageTableLsn =
  1416. NtfsWriteLog( IrpContext,
  1417. Vcb->MftScb,
  1418. NULL,
  1419. DirtyPageTableDump,
  1420. DirtyPages.Table,
  1421. SizeOfRestartTable(&DirtyPages),
  1422. Noop,
  1423. NULL,
  1424. 0,
  1425. (LONGLONG)0,
  1426. 0,
  1427. 0,
  1428. 0 );
  1429. RestartArea.DirtyPageTableLength = SizeOfRestartTable(&DirtyPages);
  1430. JustMe = 1;
  1431. }
  1432. //
  1433. // Write the Transaction Table if there is more than just us. We
  1434. // are a transaction if we wrote any log records above.
  1435. //
  1436. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable, TRUE );
  1437. TransactionTableAcquired = TRUE;
  1438. //
  1439. // Assumee will want to do at least one more checkpoint.
  1440. //
  1441. ClearFlag( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_CLEAN );
  1442. if ((ULONG)Vcb->TransactionTable.Table->NumberAllocated > JustMe) {
  1443. RestartArea.TransactionTableLsn =
  1444. NtfsWriteLog( IrpContext,
  1445. Vcb->MftScb,
  1446. NULL,
  1447. TransactionTableDump,
  1448. Vcb->TransactionTable.Table,
  1449. SizeOfRestartTable(&Vcb->TransactionTable),
  1450. Noop,
  1451. NULL,
  1452. 0,
  1453. (LONGLONG)0,
  1454. 0,
  1455. 0,
  1456. 0 );
  1457. RestartArea.TransactionTableLength =
  1458. SizeOfRestartTable(&Vcb->TransactionTable);
  1459. //
  1460. // Loop to see if the oldest Lsn comes from the transaction table.
  1461. //
  1462. TransactionEntry = NtfsGetFirstRestartTable( &Vcb->TransactionTable );
  1463. while (TransactionEntry != NULL) {
  1464. if ((TransactionEntry->FirstLsn.QuadPart != 0)
  1465. &&
  1466. (TransactionEntry->FirstLsn.QuadPart < BaseLsn.QuadPart)) {
  1467. BaseLsn = TransactionEntry->FirstLsn;
  1468. }
  1469. TransactionEntry = NtfsGetNextRestartTable( &Vcb->TransactionTable,
  1470. TransactionEntry );
  1471. }
  1472. //
  1473. // If the transaction table is otherwise empty, then this is a good
  1474. // time to reset our totals with Lfs, in case our counts get off a bit.
  1475. //
  1476. } else {
  1477. //
  1478. // If we are a transaction, then we have to add in our counts.
  1479. //
  1480. if (IrpContext->TransactionId != 0) {
  1481. TransactionEntry = (PTRANSACTION_ENTRY)GetRestartEntryFromIndex(
  1482. &Vcb->TransactionTable, IrpContext->TransactionId );
  1483. LfsResetUndoTotal( Vcb->LogHandle,
  1484. TransactionEntry->UndoRecords + 2,
  1485. TransactionEntry->UndoBytes +
  1486. QuadAlign(sizeof(RESTART_AREA)) + (2 * PAGE_SIZE) );
  1487. //
  1488. // Otherwise, we reset to our "idle" requirements.
  1489. //
  1490. } else {
  1491. LfsResetUndoTotal( Vcb->LogHandle,
  1492. 2,
  1493. QuadAlign(sizeof(RESTART_AREA)) + (2 * PAGE_SIZE) );
  1494. }
  1495. //
  1496. // If the DirtyPage table is empty then mark this as a clean checkpoint.
  1497. //
  1498. if (IsRestartTableEmpty( &DirtyPages )) {
  1499. SetFlag( Vcb->CheckpointFlags, VCB_LAST_CHECKPOINT_CLEAN );
  1500. CleanVolume = TRUE;
  1501. }
  1502. }
  1503. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  1504. TransactionTableAcquired = FALSE;
  1505. }
  1506. //
  1507. // So far BaseLsn holds the minimum of the start Lsn for the checkpoint,
  1508. // or any of the FirstLsn fields for active transactions. Now we see
  1509. // if the oldest Lsn we need in the log should actually come from the
  1510. // oldest page in the dirty page table.
  1511. //
  1512. if ((OldestDirtyPageLsn.QuadPart != 0)
  1513. &&
  1514. (OldestDirtyPageLsn.QuadPart < BaseLsn.QuadPart)) {
  1515. BaseLsn = OldestDirtyPageLsn;
  1516. }
  1517. //
  1518. // Now fill in the LowestOpenUsn in the RestartArea. This is an unsafe
  1519. // test, but if we think we see an empty list, that is ok. In case no
  1520. // files are open yet, make sure we do not backtrack from the number we got
  1521. // at restart.
  1522. //
  1523. RestartArea.MajorVersion = Vcb->RestartVersion;
  1524. RestartArea.CurrentLsnAtMount = Vcb->CurrentLsnAtMount;
  1525. RestartArea.BytesPerCluster = Vcb->BytesPerCluster;
  1526. RestartArea.Reserved = 0;
  1527. RestartArea.UsnJournalReference = Vcb->UsnJournalReference;
  1528. RestartArea.UsnCacheBias = Vcb->UsnCacheBias;
  1529. UsnJournal = Vcb->UsnJournal;
  1530. if (UsnJournal != NULL) {
  1531. NtfsAcquireResourceShared( IrpContext, UsnJournal, TRUE );
  1532. NtfsLockFcb( IrpContext, UsnJournal->Fcb );
  1533. RestartArea.LowestOpenUsn = Vcb->LowestOpenUsn;
  1534. //
  1535. // Now we will correctly synchronize, test the list again and capture
  1536. // the LowestUsn.
  1537. //
  1538. if (!IsListEmpty(&Vcb->ModifiedOpenFiles)) {
  1539. RestartArea.LowestOpenUsn =
  1540. ((PFCB_USN_RECORD)CONTAINING_RECORD( Vcb->ModifiedOpenFiles.Flink,
  1541. FCB_USN_RECORD,
  1542. ModifiedOpenFilesLinks ))->Fcb->Usn;
  1543. //
  1544. // If the list is empty, then use FileSize
  1545. //
  1546. } else {
  1547. RestartArea.LowestOpenUsn = UsnJournal->Header.FileSize.QuadPart;
  1548. }
  1549. //
  1550. // Continue to advance the Usn in the Vcb on checkpoints, so that
  1551. // if the list goes empty we do not get a restart that has to go
  1552. // back to where we were at boot time.
  1553. //
  1554. #ifdef TOMM
  1555. ASSERT(RestartArea.LowestOpenUsn >= Vcb->LowestOpenUsn);
  1556. #endif
  1557. Vcb->LowestOpenUsn = RestartArea.LowestOpenUsn;
  1558. NtfsUnlockFcb( IrpContext, UsnJournal->Fcb );
  1559. NtfsReleaseResource( IrpContext, UsnJournal );
  1560. //
  1561. // Finally, write our Restart Area to describe all of the above, and
  1562. // give Lfs our new BaseLsn.
  1563. //
  1564. Vcb->LastBaseLsn = Vcb->LastRestartArea = BaseLsn;
  1565. LfsWriteRestartArea( Vcb->LogHandle,
  1566. sizeof( RESTART_AREA ),
  1567. &RestartArea,
  1568. LfsCleanShutdown,
  1569. &Vcb->LastRestartArea );
  1570. } else {
  1571. //
  1572. // Finally, write our Restart Area to describe all of the above, and
  1573. // give Lfs our new BaseLsn.
  1574. //
  1575. Vcb->LastBaseLsn = Vcb->LastRestartArea = BaseLsn;
  1576. LfsWriteRestartArea( Vcb->LogHandle,
  1577. sizeof( RESTART_AREA ),
  1578. &RestartArea,
  1579. LfsCleanShutdown,
  1580. &Vcb->LastRestartArea );
  1581. }
  1582. //
  1583. // If this is a clean checkpoint then initialize our reserved area.
  1584. // Also set the LastBaseLsn to the restart area itself. This will
  1585. // prevent us from generating future dirty page table entries
  1586. // which go back prior to the restart area.
  1587. //
  1588. if (CleanVolume) {
  1589. Vcb->LastBaseLsn = Vcb->LastRestartArea;
  1590. LfsResetUndoTotal( Vcb->LogHandle, 2, QuadAlign(sizeof(RESTART_AREA)) + (2 * PAGE_SIZE) );
  1591. Vcb->DirtyPageTableSizeHint = INITIAL_DIRTY_TABLE_HINT;
  1592. }
  1593. //
  1594. // Now remember where the log file is at now, so we know when to
  1595. // go idle above.
  1596. //
  1597. Vcb->EndOfLastCheckpoint = LfsQueryLastLsn( Vcb->LogHandle );
  1598. try_exit: NOTHING;
  1599. } finally {
  1600. DebugUnwind( NtfsCheckpointVolume );
  1601. //
  1602. // If the Dirty Page Table got initialized, free it up.
  1603. //
  1604. if (DirtyPageTableInitialized) {
  1605. NtfsFreeRestartTable( &DirtyPages );
  1606. }
  1607. //
  1608. // Release any resources
  1609. //
  1610. if (OpenAttributeTableAcquired) {
  1611. NtfsReleaseRestartTable( &Vcb->OpenAttributeTable );
  1612. }
  1613. if (TransactionTableAcquired) {
  1614. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  1615. }
  1616. //
  1617. // Release any names buffer.
  1618. //
  1619. if (NamesBuffer != NULL) {
  1620. NtfsFreePool( NamesBuffer );
  1621. }
  1622. //
  1623. // Free any partial table we allocated.
  1624. //
  1625. if (NewTable != NULL) {
  1626. NtfsFreePool( NewTable );
  1627. }
  1628. //
  1629. // If this checkpoint created a transaction, free the index now.
  1630. //
  1631. if (IrpContext->TransactionId != 0) {
  1632. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable,
  1633. TRUE );
  1634. NtfsFreeRestartTableIndex( &Vcb->TransactionTable,
  1635. IrpContext->TransactionId );
  1636. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  1637. IrpContext->TransactionId = 0;
  1638. }
  1639. if (AcquireFiles) {
  1640. #ifdef NTFSDBG
  1641. ASSERT( FlagOn( IrpContext->State, IRP_CONTEXT_STATE_CHECKPOINT_ACTIVE ));
  1642. DebugDoit( ClearFlag( IrpContext->State, IRP_CONTEXT_STATE_CHECKPOINT_ACTIVE ));
  1643. #endif // NTFSDBG
  1644. NtfsReleaseAllFiles( IrpContext, Vcb, FALSE );
  1645. }
  1646. //
  1647. // If we didn't own the checkpoint operation then indicate
  1648. // that someone else is free to checkpoint. Hold the checkpoint
  1649. // flags if we plan to trim the usn journal. The checkpoint
  1650. // flags serialize the journal with the delete journal operation.
  1651. //
  1652. ASSERT( !OwnsCheckpoint || CleanVolume );
  1653. if (!OwnsCheckpoint) {
  1654. if ((UsnJournal == NULL) || CleanVolume || AbnormalTermination()) {
  1655. NtfsAcquireCheckpoint( IrpContext, Vcb );
  1656. ClearFlag( Vcb->CheckpointFlags,
  1657. VCB_CHECKPOINT_SYNC_FLAGS | VCB_DUMMY_CHECKPOINT_POSTED);
  1658. NtfsSetCheckpointNotify( IrpContext, Vcb );
  1659. NtfsReleaseCheckpoint( IrpContext, Vcb );
  1660. }
  1661. }
  1662. if (RestorePreviousPriority) {
  1663. KeSetPriorityThread( (PKTHREAD)PsGetCurrentThread(),
  1664. PreviousPriority );
  1665. }
  1666. }
  1667. NtfsFreeRecentlyDeallocated( IrpContext, Vcb, &BaseLsn, CleanVolume );
  1668. //
  1669. // If there is a Usn Journal, call to perform possible trimming on a periodic checkpoint.
  1670. //
  1671. if (!CleanVolume && (UsnJournal != NULL)) {
  1672. NtfsTrimUsnJournal( IrpContext, Vcb );
  1673. }
  1674. //
  1675. // If we need to post a defrag request then do so now.
  1676. //
  1677. if (PostDefrag) {
  1678. PDEFRAG_MFT DefragMft;
  1679. //
  1680. // Use a try-except to ignore allocation errors.
  1681. //
  1682. try {
  1683. NtfsAcquireCheckpoint( IrpContext, Vcb );
  1684. if (!FlagOn( Vcb->MftDefragState, VCB_MFT_DEFRAG_ACTIVE )) {
  1685. SetFlag( Vcb->MftDefragState, VCB_MFT_DEFRAG_ACTIVE );
  1686. NtfsReleaseCheckpoint( IrpContext, Vcb );
  1687. DefragMft = NtfsAllocatePool( NonPagedPool, sizeof( DEFRAG_MFT ));
  1688. DefragMft->Vcb = Vcb;
  1689. DefragMft->DeallocateWorkItem = TRUE;
  1690. //
  1691. // Send it off.....
  1692. //
  1693. ExInitializeWorkItem( &DefragMft->WorkQueueItem,
  1694. (PWORKER_THREAD_ROUTINE)NtfsDefragMft,
  1695. (PVOID)DefragMft );
  1696. ExQueueWorkItem( &DefragMft->WorkQueueItem, CriticalWorkQueue );
  1697. } else {
  1698. NtfsReleaseCheckpoint( IrpContext, Vcb );
  1699. }
  1700. } except( FsRtlIsNtstatusExpected( GetExceptionCode() )
  1701. ? EXCEPTION_EXECUTE_HANDLER
  1702. : EXCEPTION_CONTINUE_SEARCH ) {
  1703. NtfsAcquireCheckpoint( IrpContext, Vcb );
  1704. ClearFlag( Vcb->MftDefragState, VCB_MFT_DEFRAG_ACTIVE );
  1705. NtfsReleaseCheckpoint( IrpContext, Vcb );
  1706. }
  1707. }
  1708. DebugTrace( -1, Dbg, ("NtfsCheckpointVolume -> VOID\n") );
  1709. }
  1710. VOID
  1711. NtfsCheckpointForLogFileFull (
  1712. IN PIRP_CONTEXT IrpContext
  1713. )
  1714. /*++
  1715. Routine Description:
  1716. This routine is called to perform the clean checkpoint generated after
  1717. a log file full. This routine will call the clean checkpoint routine
  1718. and then release all of the resources acquired.
  1719. Arguments:
  1720. Return Value:
  1721. None.
  1722. --*/
  1723. {
  1724. PAGED_CODE();
  1725. ASSERT( FlagOn( IrpContext->TopLevelIrpContext->State, IRP_CONTEXT_STATE_OWNS_TOP_LEVEL ));
  1726. IrpContext->ExceptionStatus = 0;
  1727. //
  1728. // Call the checkpoint routine to do the actual work. Skip this in the case where there is no
  1729. // longer a Vcb in the IrpContext. This can happen if doing some long running operation at
  1730. // mount time (i.e. Usn scan). In that case the long running operation should periodically
  1731. // checkpoint. Then Ntfs will do a clean checkpoint after restart and the remaining work
  1732. // to do in the long-running operation will decrease. At some point it will decrease enough
  1733. // to finish the mount.
  1734. //
  1735. // All of the other work is required since this IrpContext will be used to retry the mount.
  1736. //
  1737. if (IrpContext->Vcb != NULL) {
  1738. //
  1739. // This can raise. However, in the case of dismounts, we do want this to
  1740. // plough on and succeed the dismount. For example, cluster service marks
  1741. // the volume offline first and sends the dismount afterward, but still expects it to succeed.
  1742. //
  1743. try {
  1744. NtfsCheckpointVolume( IrpContext,
  1745. IrpContext->Vcb,
  1746. FALSE,
  1747. TRUE,
  1748. FALSE,
  1749. 0,
  1750. IrpContext->LastRestartArea );
  1751. } except (NtfsCheckpointExceptionFilter( IrpContext,
  1752. GetExceptionInformation(),
  1753. GetExceptionCode() )) {
  1754. //
  1755. // This is a LOG_FILE_FULL raise coming via dismount. Ignore errors
  1756. // because we want the dismount to succeed.
  1757. //
  1758. NtfsMinimumExceptionProcessing( IrpContext );
  1759. if (IrpContext->TransactionId != 0) {
  1760. NtfsCleanupFailedTransaction( IrpContext );
  1761. }
  1762. }
  1763. }
  1764. ASSERT( IrpContext->TransactionId == 0 );
  1765. //
  1766. // Cleanup the IrpContext but don't delete it.
  1767. //
  1768. SetFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_DONT_DELETE );
  1769. NtfsCleanupIrpContext( IrpContext, TRUE );
  1770. //
  1771. // Make sure we restore the RestartArea.
  1772. //
  1773. IrpContext->LastRestartArea = Li0;
  1774. return;
  1775. }
  1776. NTSTATUS
  1777. NtfsCheckpointForVolumeSnapshot (
  1778. IN PIRP_CONTEXT IrpContext
  1779. )
  1780. /*++
  1781. Routine Description:
  1782. This routine is called to perform a volume flush and a
  1783. clean checkpoint before a snapshot of the volume is taken.
  1784. Since we need to keep the volume quiescent, we make it a
  1785. point to leave the file resources acquired on exit.
  1786. Arguments:
  1787. IrpContext.
  1788. Return Value:
  1789. Status.
  1790. --*/
  1791. {
  1792. LOGICAL AcquiredCheckpoint;
  1793. LOGICAL AcquiredFiles = FALSE;
  1794. LOGICAL AcquiredVcb = FALSE;
  1795. PVCB Vcb;
  1796. NTSTATUS Status = STATUS_SUCCESS;
  1797. LOGICAL DefragPermitted;
  1798. KPRIORITY PreviousPriority;
  1799. BOOLEAN RestorePreviousPriority = FALSE;
  1800. PAGED_CODE();
  1801. //
  1802. // Clear the Mft defrag flag to stop any actions behind our backs.
  1803. //
  1804. Vcb = IrpContext->Vcb;
  1805. //
  1806. // If this is a readonly volume, then there's nothing we need to do.
  1807. //
  1808. if (NtfsIsVolumeReadOnly( Vcb )) {
  1809. ASSERT( Status == STATUS_SUCCESS );
  1810. DebugTrace( -1, Dbg, ("NtfsCheckpointForVolumeSnapshot -> %08lx\n", Status) );
  1811. return Status;
  1812. }
  1813. NtfsAcquireCheckpoint( IrpContext, Vcb );
  1814. DefragPermitted = FlagOn( Vcb->MftDefragState, VCB_MFT_DEFRAG_PERMITTED );
  1815. ClearFlag( Vcb->MftDefragState, VCB_MFT_DEFRAG_PERMITTED );
  1816. NtfsReleaseCheckpoint( IrpContext, Vcb );
  1817. AcquiredCheckpoint = FALSE;
  1818. try {
  1819. //
  1820. // Then lock out all other checkpoint operations.
  1821. //
  1822. NtfsAcquireCheckpoint( IrpContext, Vcb );
  1823. while (FlagOn( Vcb->CheckpointFlags, VCB_CHECKPOINT_SYNC_FLAGS )) {
  1824. //
  1825. // Release the checkpoint event because we cannot checkpoint now.
  1826. //
  1827. NtfsReleaseCheckpoint( IrpContext, Vcb );
  1828. NtfsWaitOnCheckpointNotify( IrpContext, Vcb );
  1829. NtfsAcquireCheckpoint( IrpContext, Vcb );
  1830. }
  1831. SetFlag( Vcb->CheckpointFlags, VCB_CHECKPOINT_SYNC_FLAGS );
  1832. NtfsResetCheckpointNotify( IrpContext, Vcb );
  1833. NtfsReleaseCheckpoint( IrpContext, Vcb );
  1834. AcquiredCheckpoint = TRUE;
  1835. NtfsAcquireExclusiveVcb( IrpContext, Vcb, TRUE );
  1836. AcquiredVcb = TRUE;
  1837. //
  1838. // Check that the volume is still mounted.
  1839. //
  1840. if (!FlagOn( Vcb->VcbState, VCB_STATE_VOLUME_MOUNTED )) {
  1841. Status = STATUS_VOLUME_DISMOUNTED;
  1842. leave;
  1843. }
  1844. //
  1845. // Start by flushing the volume, because we can't call FlushVolume later
  1846. // while holding only the Main resources without their corresponding
  1847. // pagingio resources. Flushing the userdata doesn't really need to be
  1848. // atomic with the rest of the operation; we just have to make sure that
  1849. // the volume is consistent and restartable without log recovery.
  1850. //
  1851. NtfsFlushVolume( IrpContext,
  1852. Vcb,
  1853. TRUE,
  1854. FALSE,
  1855. TRUE,
  1856. FALSE );
  1857. //
  1858. // Give ourselves some juice. We'll need it.
  1859. //
  1860. PreviousPriority = KeSetPriorityThread( (PKTHREAD)PsGetCurrentThread(),
  1861. LOW_REALTIME_PRIORITY );
  1862. if (PreviousPriority != LOW_REALTIME_PRIORITY) {
  1863. RestorePreviousPriority = TRUE;
  1864. }
  1865. //
  1866. // Lock, stock, clean checkpoint, volume flush and
  1867. // two smoking barrels. No chance of acquiring PagingIo
  1868. // here; pretty much only shutdown has that luxury.
  1869. //
  1870. NtfsAcquireAllFiles( IrpContext, Vcb, TRUE, FALSE, FALSE );
  1871. AcquiredFiles = TRUE;
  1872. //
  1873. // Generate usn CLOSE records. We don't bother to get the FcbMutex because
  1874. // we already have the Fcb main resource exclusively.
  1875. //
  1876. if (Vcb->UsnJournal != NULL) {
  1877. PLIST_ENTRY Links;
  1878. PFCB_USN_RECORD UsnRecord;
  1879. while (TRUE) {
  1880. NtfsLockFcb( IrpContext, Vcb->UsnJournal->Fcb );
  1881. Links = Vcb->ModifiedOpenFiles.Flink;
  1882. if (Links == &Vcb->ModifiedOpenFiles) {
  1883. NtfsUnlockFcb( IrpContext, Vcb->UsnJournal->Fcb );
  1884. break;
  1885. }
  1886. UsnRecord = (PFCB_USN_RECORD)CONTAINING_RECORD( Links,
  1887. FCB_USN_RECORD,
  1888. ModifiedOpenFilesLinks );
  1889. NtfsUnlockFcb( IrpContext, Vcb->UsnJournal->Fcb );
  1890. //
  1891. // Post the CLOSE record. Checkpointing takes this UsnRecord
  1892. // off the ModifiedOpenFiles list.
  1893. //
  1894. NtfsPostUsnChange( IrpContext, UsnRecord->Fcb, USN_REASON_CLOSE );
  1895. NtfsWriteUsnJournalChanges( IrpContext );
  1896. NtfsCheckpointCurrentTransaction( IrpContext );
  1897. }
  1898. }
  1899. SetFlag( Vcb->VcbState, VCB_STATE_VOL_PURGE_IN_PROGRESS );
  1900. NtfsCheckpointVolume( IrpContext, Vcb, TRUE, TRUE, FALSE, 0, Vcb->LastRestartArea );
  1901. NtfsCommitCurrentTransaction( IrpContext );
  1902. ClearFlag( Vcb->VcbState, VCB_STATE_VOL_PURGE_IN_PROGRESS );
  1903. } finally {
  1904. //
  1905. // Restore DEFRAG_PERMITTED flag if we need to.
  1906. //
  1907. if (DefragPermitted) {
  1908. NtfsAcquireCheckpoint( IrpContext, Vcb );
  1909. SetFlag( Vcb->MftDefragState, VCB_MFT_DEFRAG_PERMITTED );
  1910. NtfsReleaseCheckpoint( IrpContext, Vcb );
  1911. }
  1912. //
  1913. // Release the checkpoint, if we got it, but we aren't releasing
  1914. // all the files unless there was an error.
  1915. //
  1916. if (AcquiredCheckpoint) {
  1917. NtfsAcquireCheckpoint( IrpContext, Vcb );
  1918. ClearFlag( Vcb->CheckpointFlags,
  1919. VCB_CHECKPOINT_SYNC_FLAGS | VCB_DUMMY_CHECKPOINT_POSTED);
  1920. NtfsSetCheckpointNotify( IrpContext, Vcb );
  1921. NtfsReleaseCheckpoint( IrpContext, Vcb );
  1922. }
  1923. //
  1924. // Release the file resources only if we hit an error.
  1925. // We normally do this in the completion routine for the IOCTL.
  1926. //
  1927. if (!NT_SUCCESS( Status ) || AbnormalTermination()) {
  1928. if (AcquiredFiles) {
  1929. NtfsReleaseAllFiles( IrpContext, Vcb, FALSE );
  1930. }
  1931. if (AcquiredVcb) {
  1932. NtfsReleaseVcb( IrpContext, Vcb );
  1933. }
  1934. }
  1935. if (RestorePreviousPriority) {
  1936. KeSetPriorityThread( (PKTHREAD)PsGetCurrentThread(),
  1937. PreviousPriority );
  1938. }
  1939. }
  1940. DebugTrace( -1, Dbg, ("NtfsCheckpointForVolsnap -exit\n") );
  1941. return Status;
  1942. }
  1943. VOID
  1944. NtfsCleanCheckpoint (
  1945. IN PVCB Vcb
  1946. )
  1947. /*++
  1948. Routine Description:
  1949. This routine is called to perform a single clean checkpoint at the top level
  1950. and return. It is used when the lazy writer gets a log file full in order
  1951. to perform the clean checkpoint within the thread doing the lazy write.
  1952. Arguments:
  1953. Return Value:
  1954. None.
  1955. --*/
  1956. {
  1957. IRP_CONTEXT LocalIrpContext;
  1958. PIRP_CONTEXT IrpContext = &LocalIrpContext;
  1959. PAGED_CODE();
  1960. try {
  1961. //
  1962. // Allocate an Irp Context for the request.
  1963. //
  1964. NtfsInitializeIrpContext( NULL, TRUE, &IrpContext );
  1965. IrpContext->Vcb = Vcb;
  1966. IrpContext->LastRestartArea = Vcb->LastRestartArea;
  1967. //
  1968. // There is no point in posting any dummy requests.
  1969. //
  1970. NtfsAcquireCheckpoint( IrpContext, IrpContext->Vcb );
  1971. SetFlag( IrpContext->Vcb->CheckpointFlags, VCB_DUMMY_CHECKPOINT_POSTED );
  1972. NtfsReleaseCheckpoint( IrpContext, IrpContext->Vcb );
  1973. //
  1974. // Send this off to the FspDispatch routine. It will handle all of the
  1975. // top level logic as well as deleting the IrpContext.
  1976. //
  1977. NtfsFspDispatch( IrpContext );
  1978. } except( EXCEPTION_EXECUTE_HANDLER ) {
  1979. NOTHING;
  1980. }
  1981. return;
  1982. }
  1983. VOID
  1984. NtfsCommitCurrentTransaction (
  1985. IN PIRP_CONTEXT IrpContext
  1986. )
  1987. /*++
  1988. Routine Description:
  1989. This routine commits the current transaction by writing a final record
  1990. to the log and deallocating the transaction Id.
  1991. Arguments:
  1992. Return Value:
  1993. None.
  1994. --*/
  1995. {
  1996. PTRANSACTION_ENTRY TransactionEntry;
  1997. PVCB Vcb = IrpContext->Vcb;
  1998. PFCB UsnFcb;
  1999. PUSN_FCB ThisUsn, LastUsn;
  2000. PAGED_CODE();
  2001. #if (DBG || defined( NTFS_FREE_ASSERTS ))
  2002. try {
  2003. #endif
  2004. //
  2005. // Walk through the queue of usn records. We want to remove any effect of this operation.
  2006. //
  2007. ThisUsn = &IrpContext->Usn;
  2008. do {
  2009. //
  2010. // If we log the close for a file, then it is time to reset the
  2011. // Usn reasons for the file. Nothing to do here unless we
  2012. // wrote new reasons.
  2013. //
  2014. if (ThisUsn->CurrentUsnFcb != NULL ) {
  2015. PSCB UsnJournal = Vcb->UsnJournal;
  2016. PFCB_USN_RECORD FcbUsnRecord;
  2017. UsnFcb = ThisUsn->CurrentUsnFcb;
  2018. NtfsLockFcb( IrpContext, UsnFcb );
  2019. if (UsnJournal != NULL) {
  2020. NtfsLockFcb( IrpContext, UsnJournal->Fcb );
  2021. }
  2022. FcbUsnRecord = UsnFcb->FcbUsnRecord;
  2023. //
  2024. // After locking the fcb test for the presence of the fcb record again
  2025. // DeleteUsnJournal may have already removed it
  2026. //
  2027. if (FcbUsnRecord) {
  2028. UsnFcb->Usn = FcbUsnRecord->UsnRecord.Usn;
  2029. //
  2030. // Now add or move the Fcb in the ModifiedOpenFiles list.
  2031. //
  2032. if (FlagOn( FcbUsnRecord->UsnRecord.Reason, USN_REASON_CLOSE )) {
  2033. //
  2034. // Clean up the UsnRecord in the Fcb.
  2035. //
  2036. FcbUsnRecord->UsnRecord.Reason = 0;
  2037. FcbUsnRecord->UsnRecord.SourceInfo = 0;
  2038. if (UsnJournal != NULL) {
  2039. if( FcbUsnRecord->ModifiedOpenFilesLinks.Flink != NULL ) {
  2040. RemoveEntryList( &FcbUsnRecord->ModifiedOpenFilesLinks );
  2041. FcbUsnRecord->ModifiedOpenFilesLinks.Flink = NULL;
  2042. if (FcbUsnRecord->TimeOutLinks.Flink != NULL) {
  2043. RemoveEntryList( &FcbUsnRecord->TimeOutLinks );
  2044. FcbUsnRecord->TimeOutLinks.Flink = NULL;
  2045. }
  2046. }
  2047. }
  2048. } else {
  2049. if (UsnJournal != NULL) {
  2050. if (FcbUsnRecord->ModifiedOpenFilesLinks.Flink != NULL) {
  2051. RemoveEntryList( &FcbUsnRecord->ModifiedOpenFilesLinks );
  2052. if (FcbUsnRecord->TimeOutLinks.Flink != NULL) {
  2053. RemoveEntryList( &FcbUsnRecord->TimeOutLinks );
  2054. FcbUsnRecord->TimeOutLinks.Flink = NULL;
  2055. }
  2056. }
  2057. InsertTailList( &Vcb->ModifiedOpenFiles, &FcbUsnRecord->ModifiedOpenFilesLinks );
  2058. if (UsnFcb->CleanupCount == 0) {
  2059. InsertTailList( Vcb->CurrentTimeOutFiles, &FcbUsnRecord->TimeOutLinks );
  2060. }
  2061. }
  2062. }
  2063. }
  2064. //
  2065. // Cleanup the UsnFcb in the IrpContext. It's possible that
  2066. // we might want to reuse the UsnFcb later in this request.
  2067. //
  2068. if (ThisUsn != &IrpContext->Usn) {
  2069. LastUsn->NextUsnFcb = ThisUsn->NextUsnFcb;
  2070. NtfsFreePool( ThisUsn );
  2071. ThisUsn = LastUsn;
  2072. } else {
  2073. RtlZeroMemory( &ThisUsn->CurrentUsnFcb,
  2074. sizeof( USN_FCB ) - FIELD_OFFSET( USN_FCB, CurrentUsnFcb ));
  2075. }
  2076. if (UsnJournal != NULL) {
  2077. NtfsUnlockFcb( IrpContext, UsnJournal->Fcb );
  2078. }
  2079. NtfsUnlockFcb( IrpContext, UsnFcb );
  2080. }
  2081. if (ThisUsn->NextUsnFcb == NULL) { break; }
  2082. //
  2083. // Move to the next entry.
  2084. //
  2085. LastUsn = ThisUsn;
  2086. ThisUsn = ThisUsn->NextUsnFcb;
  2087. } while (TRUE);
  2088. //
  2089. // If this request created a transaction, complete it now.
  2090. //
  2091. if (IrpContext->TransactionId != 0) {
  2092. LSN CommitLsn;
  2093. //
  2094. // It is possible to get a LOG_FILE_FULL before writing
  2095. // out the first log record of a transaction. In that
  2096. // case there is a transaction Id but we haven't reserved
  2097. // space in the log file. It is wrong to write the
  2098. // commit record in this case because we can get an
  2099. // unexpected LOG_FILE_FULL. We can also test the UndoRecords
  2100. // count in the transaction entry but don't want to acquire
  2101. // the restart table to make this check.
  2102. //
  2103. if (FlagOn( IrpContext->Flags, IRP_CONTEXT_FLAG_WROTE_LOG )) {
  2104. //
  2105. // Write the log record to "forget" this transaction,
  2106. // because it should not be aborted. Until if/when we
  2107. // do real TP, commit and forget are atomic.
  2108. //
  2109. CommitLsn =
  2110. NtfsWriteLog( IrpContext,
  2111. Vcb->MftScb,
  2112. NULL,
  2113. ForgetTransaction,
  2114. NULL,
  2115. 0,
  2116. CompensationLogRecord,
  2117. (PVOID)&Li0,
  2118. sizeof(LSN),
  2119. (LONGLONG)0,
  2120. 0,
  2121. 0,
  2122. 0 );
  2123. }
  2124. //
  2125. // We can now free the transaction table index, because we are
  2126. // done with it now.
  2127. //
  2128. NtfsAcquireExclusiveRestartTable( &Vcb->TransactionTable,
  2129. TRUE );
  2130. TransactionEntry = (PTRANSACTION_ENTRY)GetRestartEntryFromIndex(
  2131. &Vcb->TransactionTable,
  2132. IrpContext->TransactionId );
  2133. //
  2134. // Call Lfs to free our undo space.
  2135. //
  2136. if ((TransactionEntry->UndoRecords != 0) &&
  2137. (!FlagOn( Vcb->VcbState, VCB_STATE_RESTART_IN_PROGRESS ))) {
  2138. LfsResetUndoTotal( Vcb->LogHandle,
  2139. TransactionEntry->UndoRecords,
  2140. -TransactionEntry->UndoBytes );
  2141. }
  2142. NtfsFreeRestartTableIndex( &Vcb->TransactionTable,
  2143. IrpContext->TransactionId );
  2144. IrpContext->TransactionId = 0;
  2145. NtfsReleaseRestartTable( &Vcb->TransactionTable );
  2146. //
  2147. // One way we win by being recoverable, is that we do not really
  2148. // have to do write-through - flushing the updates to the log
  2149. // is enough. We don't make this call if we are in the abort
  2150. // transaction path. Otherwise we could get a log file full
  2151. // while aborting.
  2152. //
  2153. if (FlagOn( IrpContext->TopLevelIrpContext->State, IRP_CONTEXT_STATE_WRITE_THROUGH ) &&
  2154. (IrpContext == IrpContext->TopLevelIrpContext) &&
  2155. (IrpContext->TopLevelIrpContext->ExceptionStatus == STATUS_SUCCESS)) {
  2156. NtfsUpdateScbSnapshots( IrpContext );
  2157. LfsFlushToLsn( Vcb->LogHandle, CommitLsn );
  2158. }
  2159. }
  2160. //
  2161. // Signal any waiters for the new length.
  2162. //
  2163. if (IrpContext->CheckNewLength != NULL) {
  2164. NtfsProcessNewLengthQueue( IrpContext, FALSE );
  2165. }
  2166. #if (DBG || defined( NTFS_FREE_ASSERTS ))
  2167. } except( ASSERT( GetExceptionCode() != STATUS_LOG_FILE_FULL ), EXCEPTION_CONTINUE_SEARCH ) {
  2168. }
  2169. #endif
  2170. }
  2171. VOID
  2172. NtfsCheckpointCurrentTransaction (
  2173. IN PIRP_CONTEXT IrpContext
  2174. )
  2175. /*++
  2176. Routine Description:
  2177. This routine checkpoints the current transaction by commiting it
  2178. to the log and deallocating the transaction Id. The current request
  2179. cann keep running, but changes to date are committed and will not be
  2180. backed out.
  2181. Arguments:
  2182. Return Value:
  2183. None.
  2184. --*/
  2185. {
  2186. PVCB Vcb = IrpContext->Vcb;
  2187. PAGED_CODE();
  2188. //
  2189. // If there are new UsnReasons in the IrpContext, then we shoudld write the journal
  2190. // now. Note that it is ok for a checkpoint to get logfile full, but in general commit
  2191. // should not.
  2192. //
  2193. if ((IrpContext->Usn.NewReasons | IrpContext->Usn.RemovedSourceInfo) != 0) {
  2194. NtfsWriteUsnJournalChanges( IrpContext );
  2195. }
  2196. NtfsCommitCurrentTransaction( IrpContext );
  2197. //
  2198. // Cleanup any recently deallocated record information for this transaction.
  2199. //
  2200. NtfsDeallocateRecordsComplete( IrpContext );
  2201. IrpContext->DeallocatedClusters = 0;
  2202. IrpContext->FreeClusterChange = 0;
  2203. //
  2204. // The following resources may have been flagged for immediate release on commit.
  2205. //
  2206. if (Vcb->AcquireFilesCount == 0) {
  2207. if (FlagOn( IrpContext->Flags, IRP_CONTEXT_FLAG_RELEASE_USN_JRNL )) {
  2208. NtfsReleaseScb( IrpContext, Vcb->UsnJournal );
  2209. }
  2210. if (FlagOn( IrpContext->Flags, IRP_CONTEXT_FLAG_RELEASE_MFT )) {
  2211. NtfsReleaseScb( IrpContext, Vcb->MftScb );
  2212. }
  2213. }
  2214. ClearFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_RELEASE_USN_JRNL |
  2215. IRP_CONTEXT_FLAG_RELEASE_MFT );
  2216. NtfsUpdateScbSnapshots( IrpContext );
  2217. }
  2218. VOID
  2219. NtfsInitializeLogging (
  2220. )
  2221. /*
  2222. Routine Description:
  2223. This routine is to be called once during startup of Ntfs (not once
  2224. per volume), to initialize the logging support.
  2225. Parameters:
  2226. None
  2227. Return Value:
  2228. None
  2229. --*/
  2230. {
  2231. PAGED_CODE();
  2232. DebugTrace( +1, Dbg, ("NtfsInitializeLogging:\n") );
  2233. LfsInitializeLogFileService();
  2234. DebugTrace( -1, Dbg, ("NtfsInitializeLogging -> VOID\n") );
  2235. }
  2236. VOID
  2237. NtfsStartLogFile (
  2238. IN PSCB LogFileScb,
  2239. IN PVCB Vcb
  2240. )
  2241. /*++
  2242. Routine Description:
  2243. This routine opens the log file for a volume by calling Lfs. The returned
  2244. LogHandle is stored in the Vcb. If the log file has not been initialized,
  2245. Lfs detects this and initializes it automatically.
  2246. Arguments:
  2247. LogFileScb - The Scb for the log file
  2248. Vcb - Pointer to the Vcb for this volume
  2249. Return Value:
  2250. None.
  2251. --*/
  2252. {
  2253. UNICODE_STRING UnicodeName;
  2254. LFS_INFO LfsInfo;
  2255. PAGED_CODE();
  2256. DebugTrace( +1, Dbg, ("NtfsStartLogFile:\n") );
  2257. RtlInitUnicodeString( &UnicodeName, L"NTFS" );
  2258. //
  2259. // LfsInfo structure acts as a information conduit between
  2260. // LFS and the NTFS client.
  2261. //
  2262. if (Vcb->MajorVersion >= 3) {
  2263. LfsInfo.LfsClientInfo = LfsFixedPageSize;
  2264. } else {
  2265. LfsInfo.LfsClientInfo = LfsPackLog;
  2266. }
  2267. LfsInfo.ReadOnly = (LOGICAL)NtfsIsVolumeReadOnly( Vcb );
  2268. LfsInfo.InRestart = (LOGICAL)FlagOn( Vcb->VcbState, VCB_STATE_RESTART_IN_PROGRESS );
  2269. LfsInfo.BadRestart = (LOGICAL)FlagOn( Vcb->VcbState, VCB_STATE_BAD_RESTART );
  2270. //
  2271. // Slam the allocation size into file size and valid data in case there
  2272. // is some error.
  2273. //
  2274. LogFileScb->Header.FileSize = LogFileScb->Header.AllocationSize;
  2275. LogFileScb->Header.ValidDataLength = LogFileScb->Header.AllocationSize;
  2276. //
  2277. // Now call into LFS and Open/Restart the log file. This could raise
  2278. // for various reasons, one of which is an attempt to do restart
  2279. // on a write protected volume. Vcb wont have the VALID_LOG_HANDLE flag then.
  2280. //
  2281. Vcb->LogHeaderReservation = LfsOpenLogFile( LogFileScb->FileObject,
  2282. UnicodeName,
  2283. 1,
  2284. 0,
  2285. LogFileScb->Header.AllocationSize.QuadPart,
  2286. &LfsInfo,
  2287. &Vcb->LogHandle,
  2288. &Vcb->LfsWriteData );
  2289. SetFlag( Vcb->VcbState, VCB_STATE_VALID_LOG_HANDLE );
  2290. DebugTrace( -1, Dbg, ("NtfsStartLogFile -> VOID\n") );
  2291. }
  2292. VOID
  2293. NtfsStopLogFile (
  2294. IN PVCB Vcb
  2295. )
  2296. /*
  2297. Routine Description:
  2298. This routine should be called during volume dismount to close the volume's
  2299. log file with the log file service.
  2300. Arguments:
  2301. Vcb - Pointer to the Vcb for the volume
  2302. Return Value:
  2303. None
  2304. --*/
  2305. {
  2306. LFS_LOG_HANDLE LogHandle = Vcb->LogHandle;
  2307. PAGED_CODE();
  2308. DebugTrace( +1, Dbg, ("NtfsStopLogFile:\n") );
  2309. if (FlagOn( Vcb->VcbState, VCB_STATE_VALID_LOG_HANDLE )) {
  2310. ASSERT( LogHandle != NULL );
  2311. //
  2312. // We don't do any logfile flushing if the volume
  2313. // is mounted read only or if the device is already gone.
  2314. //
  2315. if (!NtfsIsVolumeReadOnly( Vcb )) {
  2316. //
  2317. // Proceed even if this call fails. There is nothing
  2318. // more we can do at this point.
  2319. //
  2320. try {
  2321. LfsFlushToLsn( LogHandle, LiMax );
  2322. } except( (FsRtlIsNtstatusExpected( GetExceptionCode() )) ?
  2323. EXCEPTION_EXECUTE_HANDLER :
  2324. EXCEPTION_CONTINUE_SEARCH ) {
  2325. NOTHING;
  2326. }
  2327. }
  2328. ClearFlag( Vcb->VcbState, VCB_STATE_VALID_LOG_HANDLE );
  2329. //
  2330. // Allow LFS to close its books. We do this even for readonly
  2331. // mounts, although we filter writes at the LFS level for those.
  2332. //
  2333. LfsCloseLogFile( LogHandle );
  2334. }
  2335. DebugTrace( -1, Dbg, ("NtfsStopLogFile -> VOID\n") );
  2336. }
  2337. VOID
  2338. NtfsInitializeRestartTable (
  2339. IN ULONG EntrySize,
  2340. IN ULONG NumberEntries,
  2341. OUT PRESTART_POINTERS TablePointer
  2342. )
  2343. /*++
  2344. Routine Description:
  2345. This routine is called to allocate and initialize a new Restart Table,
  2346. and return a pointer to it.
  2347. Arguments:
  2348. EntrySize - Size of the table entries, in bytes.
  2349. NumberEntries - Number of entries to allocate for the table.
  2350. TablePointer - Returns a pointer to the table.
  2351. Return Value:
  2352. None
  2353. --*/
  2354. {
  2355. PAGED_CODE();
  2356. try {
  2357. NtfsInitializeRestartPointers( TablePointer );
  2358. //
  2359. // Call common routine to allocate the actual table.
  2360. //
  2361. InitializeNewTable( EntrySize, NumberEntries, TablePointer );
  2362. } finally {
  2363. DebugUnwind( NtfsInitializeRestartTable );
  2364. //
  2365. // On error, clean up any partial work that was done.
  2366. //
  2367. if (AbnormalTermination()) {
  2368. NtfsFreeRestartTable( TablePointer );
  2369. }
  2370. }
  2371. }
  2372. VOID
  2373. NtfsFreeRestartTable (
  2374. IN PRESTART_POINTERS TablePointer
  2375. )
  2376. /*++
  2377. Routine Description:
  2378. This routine frees a previously allocated Restart Table.
  2379. Arguments:
  2380. TablePointer - Pointer to the Restart Table to delete.
  2381. Return Value:
  2382. None.
  2383. --*/
  2384. {
  2385. PAGED_CODE();
  2386. if (TablePointer->Table != NULL) {
  2387. NtfsFreePool( TablePointer->Table );
  2388. TablePointer->Table = NULL;
  2389. }
  2390. if (TablePointer->ResourceInitialized) {
  2391. ExDeleteResourceLite( &TablePointer->Resource );
  2392. TablePointer->ResourceInitialized = FALSE;
  2393. }
  2394. }
  2395. VOID
  2396. NtfsExtendRestartTable (
  2397. IN PRESTART_POINTERS TablePointer,
  2398. IN ULONG NumberNewEntries,
  2399. IN ULONG FreeGoal
  2400. )
  2401. /*++
  2402. Routine Description:
  2403. This routine extends a previously allocated Restart Table, by
  2404. creating and initializing a new one, and copying over the the
  2405. table entries from the old one. The old table is then deallocated.
  2406. On return, the table pointer points to the new Restart Table.
  2407. Arguments:
  2408. TablePointer - Address of the pointer to the previously created table.
  2409. NumberNewEntries - The number of addtional entries to be allocated
  2410. in the new table.
  2411. FreeGoal - A hint as to what point the caller would like to truncate
  2412. the table back to, when sufficient entries are deleted.
  2413. If truncation is not desired, then MAXULONG may be specified.
  2414. Return Value:
  2415. None.
  2416. --*/
  2417. {
  2418. PRESTART_TABLE NewTable, OldTable;
  2419. ULONG OldSize;
  2420. OldSize = SizeOfRestartTable(TablePointer);
  2421. //
  2422. // Get pointer to old table.
  2423. //
  2424. OldTable = TablePointer->Table;
  2425. ASSERT_RESTART_TABLE(OldTable);
  2426. //
  2427. // Start by initializing a table for the new size.
  2428. //
  2429. InitializeNewTable( OldTable->EntrySize,
  2430. OldTable->NumberEntries + NumberNewEntries,
  2431. TablePointer );
  2432. //
  2433. // Copy body of old table in place to new table.
  2434. //
  2435. NewTable = TablePointer->Table;
  2436. RtlMoveMemory( (NewTable + 1),
  2437. (OldTable + 1),
  2438. OldTable->EntrySize * OldTable->NumberEntries );
  2439. //
  2440. // Fix up new table's header, and fix up free list.
  2441. //
  2442. NewTable->FreeGoal = MAXULONG;
  2443. if (FreeGoal != MAXULONG) {
  2444. NewTable->FreeGoal = sizeof(RESTART_TABLE) + FreeGoal * NewTable->EntrySize;
  2445. }
  2446. if (OldTable->FirstFree != 0) {
  2447. NewTable->FirstFree = OldTable->FirstFree;
  2448. *(PULONG)GetRestartEntryFromIndex( TablePointer, OldTable->LastFree ) =
  2449. OldSize;;
  2450. } else {
  2451. NewTable->FirstFree = OldSize;
  2452. }
  2453. //
  2454. // Copy number allocated
  2455. //
  2456. NewTable->NumberAllocated = OldTable->NumberAllocated;
  2457. //
  2458. // Free the old table and return the new one.
  2459. //
  2460. NtfsFreePool( OldTable );
  2461. ASSERT_RESTART_TABLE(NewTable);
  2462. }
  2463. ULONG
  2464. NtfsAllocateRestartTableIndex (
  2465. IN PRESTART_POINTERS TablePointer,
  2466. IN ULONG Exclusive
  2467. )
  2468. /*++
  2469. Routine Description:
  2470. This routine allocates an index from within a previously initialized
  2471. Restart Table. If the table is empty, it is extended.
  2472. Note that the table must already be acquired either shared or exclusive,
  2473. and if it must be extended, then the table is released and will be
  2474. acquired exclusive on return.
  2475. Arguments:
  2476. TablePointer - Pointer to the Restart Table in which an index is to
  2477. be allocated.
  2478. Exclusive - Indicates if we have the table exclusive (or if we know that
  2479. synchronization is not a problem).
  2480. Return Value:
  2481. The allocated index.
  2482. --*/
  2483. {
  2484. PRESTART_TABLE Table;
  2485. ULONG EntryIndex;
  2486. KLOCK_QUEUE_HANDLE LockHandle;
  2487. PULONG Entry;
  2488. DebugTrace( +1, Dbg, ("NtfsAllocateRestartTableIndex:\n") );
  2489. DebugTrace( 0, Dbg, ("TablePointer = %08lx\n", TablePointer) );
  2490. Table = TablePointer->Table;
  2491. ASSERT_RESTART_TABLE(Table);
  2492. //
  2493. // Acquire the spin lock to synchronize the allocation.
  2494. //
  2495. KeAcquireInStackQueuedSpinLock( &TablePointer->SpinLock, &LockHandle );
  2496. //
  2497. // If the table is empty, then we have to extend it.
  2498. //
  2499. if (Table->FirstFree == 0) {
  2500. //
  2501. // First release the spin lock and the table resource, and get
  2502. // the resource exclusive.
  2503. //
  2504. KeReleaseInStackQueuedSpinLock( &LockHandle );
  2505. if (!Exclusive) {
  2506. NtfsReleaseRestartTable( TablePointer );
  2507. NtfsAcquireExclusiveRestartTable( TablePointer, TRUE );
  2508. }
  2509. //
  2510. // Now extend the table. Note that if this routine raises, we have
  2511. // nothing to release.
  2512. //
  2513. NtfsExtendRestartTable( TablePointer, 16, MAXULONG );
  2514. //
  2515. // And re-get our pointer to the restart table
  2516. //
  2517. Table = TablePointer->Table;
  2518. //
  2519. // Now get the spin lock again and proceed.
  2520. //
  2521. KeAcquireInStackQueuedSpinLock( &TablePointer->SpinLock, &LockHandle );
  2522. }
  2523. //
  2524. // Get First Free to return it.
  2525. //
  2526. EntryIndex = Table->FirstFree;
  2527. ASSERT( EntryIndex != 0 );
  2528. //
  2529. // Dequeue this entry and zero it.
  2530. //
  2531. Entry = (PULONG)GetRestartEntryFromIndex( TablePointer, EntryIndex );
  2532. Table->FirstFree = *Entry;
  2533. ASSERT( Table->FirstFree != RESTART_ENTRY_ALLOCATED );
  2534. RtlZeroMemory( Entry, Table->EntrySize );
  2535. //
  2536. // Show that it's allocated.
  2537. //
  2538. *Entry = RESTART_ENTRY_ALLOCATED;
  2539. //
  2540. // If list is going empty, then we fix the LastFree as well.
  2541. //
  2542. if (Table->FirstFree == 0) {
  2543. Table->LastFree = 0;
  2544. }
  2545. Table->NumberAllocated += 1;
  2546. //
  2547. // Now just release the spin lock before returning.
  2548. //
  2549. KeReleaseInStackQueuedSpinLock( &LockHandle );
  2550. DebugTrace( -1, Dbg, ("NtfsAllocateRestartTableIndex -> %08lx\n", EntryIndex) );
  2551. return EntryIndex;
  2552. }
  2553. PVOID
  2554. NtfsAllocateRestartTableFromIndex (
  2555. IN PRESTART_POINTERS TablePointer,
  2556. IN ULONG Index
  2557. )
  2558. /*++
  2559. Routine Description:
  2560. This routine allocates a specific index from within a previously
  2561. initialized Restart Table. If the index does not exist within the
  2562. existing table, the table is extended.
  2563. Note that the table must already be acquired either shared or exclusive,
  2564. and if it must be extended, then the table is released and will be
  2565. acquired exclusive on return.
  2566. Arguments:
  2567. TablePointer - Pointer to the Restart Table in which an index is to
  2568. be allocated.
  2569. Index - The index to be allocated.
  2570. Return Value:
  2571. The table entry allocated.
  2572. --*/
  2573. {
  2574. PULONG Entry;
  2575. PULONG LastEntry;
  2576. PRESTART_TABLE Table;
  2577. KLOCK_QUEUE_HANDLE LockHandle;
  2578. ULONG ThisIndex;
  2579. ULONG LastIndex;
  2580. DebugTrace( +1, Dbg, ("NtfsAllocateRestartTableFromIndex\n") );
  2581. DebugTrace( 0, Dbg, ("TablePointer = %08lx\n", TablePointer) );
  2582. DebugTrace( 0, Dbg, ("Index = %08lx\n", Index) );
  2583. Table = TablePointer->Table;
  2584. ASSERT_RESTART_TABLE(Table);
  2585. //
  2586. // Acquire the spin lock to synchronize the allocation.
  2587. //
  2588. KeAcquireInStackQueuedSpinLock( &TablePointer->SpinLock, &LockHandle );
  2589. //
  2590. // If the entry is not in the table, we will have to extend the table.
  2591. //
  2592. if (!IsRestartIndexWithinTable( TablePointer, Index )) {
  2593. ULONG TableSize;
  2594. ULONG BytesToIndex;
  2595. ULONG AddEntries;
  2596. //
  2597. // We extend the size by computing the number of entries
  2598. // between the existing size and the desired index and
  2599. // adding 1 to that.
  2600. //
  2601. TableSize = SizeOfRestartTable( TablePointer );;
  2602. BytesToIndex = Index - TableSize;
  2603. AddEntries = BytesToIndex / Table->EntrySize + 1;
  2604. //
  2605. // There should always be an integral number of entries being added.
  2606. //
  2607. ASSERT( BytesToIndex % Table->EntrySize == 0 );
  2608. //
  2609. // First release the spin lock and the table resource, and get
  2610. // the resource exclusive.
  2611. //
  2612. KeReleaseInStackQueuedSpinLock( &LockHandle );
  2613. NtfsReleaseRestartTable( TablePointer );
  2614. NtfsAcquireExclusiveRestartTable( TablePointer, TRUE );
  2615. //
  2616. // Now extend the table. Note that if this routine raises, we have
  2617. // nothing to release.
  2618. //
  2619. NtfsExtendRestartTable( TablePointer,
  2620. AddEntries,
  2621. TableSize );
  2622. Table = TablePointer->Table;
  2623. ASSERT_RESTART_TABLE(Table);
  2624. //
  2625. // Now get the spin lock again and proceed.
  2626. //
  2627. KeAcquireInStackQueuedSpinLock( &TablePointer->SpinLock, &LockHandle );
  2628. }
  2629. //
  2630. // Now see if the entry is already allocated, and just return if it is.
  2631. //
  2632. Entry = (PULONG)GetRestartEntryFromIndex( TablePointer, Index );
  2633. if (!IsRestartTableEntryAllocated(Entry)) {
  2634. //
  2635. // We now have to walk through the table, looking for the entry
  2636. // we're interested in and the previous entry. Start by looking at the
  2637. // first entry.
  2638. //
  2639. ThisIndex = Table->FirstFree;
  2640. //
  2641. // Get the Entry from the list.
  2642. //
  2643. Entry = (PULONG) GetRestartEntryFromIndex( TablePointer, ThisIndex );
  2644. //
  2645. // If this is a match, then we pull it out of the list and are done.
  2646. //
  2647. if (ThisIndex == Index) {
  2648. //
  2649. // Dequeue this entry.
  2650. //
  2651. Table->FirstFree = *Entry;
  2652. ASSERT( Table->FirstFree != RESTART_ENTRY_ALLOCATED );
  2653. //
  2654. // Otherwise we need to walk through the list looking for the
  2655. // predecessor of our entry.
  2656. //
  2657. } else {
  2658. while (TRUE) {
  2659. //
  2660. // Remember the entry just found.
  2661. //
  2662. LastIndex = ThisIndex;
  2663. LastEntry = Entry;
  2664. //
  2665. // We should never run out of entries.
  2666. //
  2667. ASSERT( *LastEntry != 0 );
  2668. //
  2669. // Lookup up the next entry in the list.
  2670. //
  2671. ThisIndex = *LastEntry;
  2672. Entry = (PULONG) GetRestartEntryFromIndex( TablePointer, ThisIndex );
  2673. //
  2674. // If this is our match we are done.
  2675. //
  2676. if (ThisIndex == Index) {
  2677. //
  2678. // Dequeue this entry.
  2679. //
  2680. *LastEntry = *Entry;
  2681. //
  2682. // If this was the last entry, we update that in the
  2683. // table as well.
  2684. //
  2685. if (Table->LastFree == ThisIndex) {
  2686. Table->LastFree = LastIndex;
  2687. }
  2688. break;
  2689. }
  2690. }
  2691. }
  2692. //
  2693. // If the list is now empty, we fix the LastFree as well.
  2694. //
  2695. if (Table->FirstFree == 0) {
  2696. Table->LastFree = 0;
  2697. }
  2698. //
  2699. // Zero this entry. Then show that this is allocated and increment the
  2700. // allocated count.
  2701. //
  2702. RtlZeroMemory( Entry, Table->EntrySize );
  2703. *Entry = RESTART_ENTRY_ALLOCATED;
  2704. Table->NumberAllocated += 1;
  2705. }
  2706. //
  2707. // Now just release the spin lock before returning.
  2708. //
  2709. KeReleaseInStackQueuedSpinLock( &LockHandle );
  2710. DebugTrace( -1, Dbg, ("NtfsAllocateRestartTableFromIndex -> %08lx\n", Entry) );
  2711. return (PVOID)Entry;
  2712. }
  2713. VOID
  2714. NtfsFreeRestartTableIndex (
  2715. IN PRESTART_POINTERS TablePointer,
  2716. IN ULONG Index
  2717. )
  2718. /*++
  2719. Routine Description:
  2720. This routine frees a previously allocated index in a Restart Table.
  2721. If the index is before FreeGoal for the table, it is simply deallocated to
  2722. the front of the list for immediate reuse. If the index is beyond
  2723. FreeGoal, then it is deallocated to the end of the list, to facilitate
  2724. truncation of the list in the event that all of the entries beyond
  2725. FreeGoal are freed. However, this routine does not automatically
  2726. truncate the list, as this would cause too much overhead. The list
  2727. is checked during periodic checkpoint processing.
  2728. Arguments:
  2729. TablePointer - Pointer to the Restart Table to which the index is to be
  2730. deallocated.
  2731. Index - The index being deallocated.
  2732. Return Value:
  2733. None.
  2734. --*/
  2735. {
  2736. PRESTART_TABLE Table;
  2737. PULONG Entry, OldLastEntry;
  2738. KLOCK_QUEUE_HANDLE LockHandle;
  2739. DebugTrace( +1, Dbg, ("NtfsFreeRestartTableIndex:\n") );
  2740. DebugTrace( 0, Dbg, ("TablePointer = %08lx\n", TablePointer) );
  2741. DebugTrace( 0, Dbg, ("Index = %08lx\n", Index) );
  2742. //
  2743. // Get pointers to table and the entry we are freeing.
  2744. //
  2745. Table = TablePointer->Table;
  2746. ASSERT_RESTART_TABLE(Table);
  2747. ASSERT( Table->FirstFree == 0
  2748. || (Table->FirstFree >= 0x18)
  2749. && ((Table->FirstFree - 0x18) % Table->EntrySize) == 0 );
  2750. ASSERT( (Index >= 0x18)
  2751. && ((Index - 0x18) % Table->EntrySize) == 0 );
  2752. Entry = GetRestartEntryFromIndex( TablePointer, Index );
  2753. //
  2754. // Acquire the spin lock to synchronize the allocation.
  2755. //
  2756. KeAcquireInStackQueuedSpinLock( &TablePointer->SpinLock, &LockHandle );
  2757. //
  2758. // If the index is before FreeGoal, then do a normal deallocation at
  2759. // the front of the list.
  2760. //
  2761. if (Index < Table->FreeGoal) {
  2762. *Entry = Table->FirstFree;
  2763. Table->FirstFree = Index;
  2764. if (Table->LastFree == 0) {
  2765. Table->LastFree = Index;
  2766. }
  2767. //
  2768. // Otherwise we will deallocate this guy to the end of the list.
  2769. //
  2770. } else {
  2771. if (Table->LastFree != 0) {
  2772. OldLastEntry = GetRestartEntryFromIndex( TablePointer,
  2773. Table->LastFree );
  2774. *OldLastEntry = Index;
  2775. } else {
  2776. Table->FirstFree = Index;
  2777. }
  2778. Table->LastFree = Index;
  2779. *Entry = 0;
  2780. }
  2781. Table->NumberAllocated -= 1;
  2782. //
  2783. // Now just release the spin lock before returning.
  2784. //
  2785. KeReleaseInStackQueuedSpinLock( &LockHandle );
  2786. DebugTrace( -1, Dbg, ("NtfsFreeRestartTableIndex -> VOID\n") );
  2787. }
  2788. PVOID
  2789. NtfsGetFirstRestartTable (
  2790. IN PRESTART_POINTERS TablePointer
  2791. )
  2792. /*++
  2793. Routine Description:
  2794. This routine returns the first allocated entry from a Restart Table.
  2795. Arguments:
  2796. TablePointer - Pointer to the respective Restart Table Pointers structure.
  2797. Return Value:
  2798. Pointer to the first entry, or NULL if none are allocated.
  2799. --*/
  2800. {
  2801. PCHAR Entry;
  2802. PAGED_CODE();
  2803. //
  2804. // If we know the table is empty, we can return immediately.
  2805. //
  2806. if (IsRestartTableEmpty( TablePointer )) {
  2807. return NULL;
  2808. }
  2809. //
  2810. // Otherwise point to the first table entry.
  2811. //
  2812. Entry = (PCHAR)(TablePointer->Table + 1);
  2813. //
  2814. // Loop until we hit the first one allocated, or the end of the list.
  2815. //
  2816. while ((ULONG)(Entry - (PCHAR)TablePointer->Table) <
  2817. SizeOfRestartTable(TablePointer)) {
  2818. if (IsRestartTableEntryAllocated(Entry)) {
  2819. return (PVOID)Entry;
  2820. }
  2821. Entry += TablePointer->Table->EntrySize;
  2822. }
  2823. return NULL;
  2824. }
  2825. PVOID
  2826. NtfsGetNextRestartTable (
  2827. IN PRESTART_POINTERS TablePointer,
  2828. IN PVOID Current
  2829. )
  2830. /*++
  2831. Routine Description:
  2832. This routine returns the next allocated entry from a Restart Table.
  2833. Arguments:
  2834. TablePointer - Pointer to the respective Restart Table Pointers structure.
  2835. Current - Current entry pointer.
  2836. Return Value:
  2837. Pointer to the next entry, or NULL if none are allocated.
  2838. --*/
  2839. {
  2840. PCHAR Entry = (PCHAR)Current;
  2841. PAGED_CODE();
  2842. //
  2843. // Point to the next entry.
  2844. //
  2845. Entry += TablePointer->Table->EntrySize;
  2846. //
  2847. // Loop until we hit the first one allocated, or the end of the list.
  2848. //
  2849. while ((ULONG)(Entry - (PCHAR)TablePointer->Table) <
  2850. SizeOfRestartTable(TablePointer)) {
  2851. if (IsRestartTableEntryAllocated(Entry)) {
  2852. return (PVOID)Entry;
  2853. }
  2854. Entry += TablePointer->Table->EntrySize;
  2855. }
  2856. return NULL;
  2857. }
  2858. VOID
  2859. NtfsUpdateOatVersion (
  2860. IN PVCB Vcb,
  2861. IN ULONG NewRestartVersion
  2862. )
  2863. /*++
  2864. Routine Description:
  2865. This routine is called when we are switching the restart version for a volume. This can happen
  2866. either after a clean checkpoint or at mount when we encounter a restart area with a non-default
  2867. version number.
  2868. Arguments:
  2869. Vcb - Pointer to the Vcb for the volume.
  2870. NewRestartVersion - Restart version to start using for this volume.
  2871. Return Value:
  2872. None
  2873. --*/
  2874. {
  2875. PRESTART_POINTERS NewTable = NULL;
  2876. PAGED_CODE();
  2877. DebugTrace( +1, Dbg, ("NtfsUpdateOatVersion\n") );
  2878. ASSERT( (Vcb->RestartVersion != NewRestartVersion) || (Vcb->OnDiskOat == NULL) );
  2879. //
  2880. // Use a try finally to facilitate cleanup.
  2881. //
  2882. try {
  2883. if (NewRestartVersion == 0) {
  2884. //
  2885. // If we are moving to version 0 then allocate a new table and
  2886. // initialize it with the initial number of entries.
  2887. //
  2888. NewTable = NtfsAllocatePool( NonPagedPool, sizeof( RESTART_POINTERS ));
  2889. NtfsInitializeRestartTable( sizeof( OPEN_ATTRIBUTE_ENTRY_V0 ),
  2890. INITIAL_NUMBER_ATTRIBUTES,
  2891. NewTable );
  2892. Vcb->RestartVersion = 0;
  2893. Vcb->OatEntrySize = SIZEOF_OPEN_ATTRIBUTE_ENTRY_V0;
  2894. Vcb->OnDiskOat = NewTable;
  2895. NewTable = NULL;
  2896. } else {
  2897. if (Vcb->OnDiskOat != NULL) {
  2898. NtfsFreeRestartTable( Vcb->OnDiskOat );
  2899. NtfsFreePool( Vcb->OnDiskOat );
  2900. }
  2901. Vcb->OnDiskOat = &Vcb->OpenAttributeTable;
  2902. Vcb->RestartVersion = 1;
  2903. Vcb->OatEntrySize = sizeof( OPEN_ATTRIBUTE_ENTRY );
  2904. }
  2905. } finally {
  2906. DebugUnwind( NtfsUpdateOatVersion );
  2907. if (NewTable != NULL) {
  2908. NtfsFreePool( NewTable );
  2909. }
  2910. }
  2911. DebugTrace( -1, Dbg, ("NtfsUpdateOatVersion -> VOID\n") );
  2912. return;
  2913. }
  2914. //
  2915. // Internal support routine
  2916. //
  2917. VOID
  2918. DirtyPageRoutine (
  2919. IN PFILE_OBJECT FileObject,
  2920. IN PLARGE_INTEGER FileOffset,
  2921. IN ULONG Length,
  2922. IN PLSN OldestLsn,
  2923. IN PLSN NewestLsn,
  2924. IN PVOID Context1,
  2925. IN PVOID Context2
  2926. )
  2927. /*++
  2928. Routine Description:
  2929. This routine is used as the call back routine for retrieving dirty pages
  2930. from the Cache Manager. It adds them to the Dirty Table list whose
  2931. pointer is pointed to by the Context parameter.
  2932. Arguments:
  2933. FileObject - Pointer to the file object which has the dirty page
  2934. FileOffset - File offset for start of dirty page
  2935. Length - Length recorded for the dirty page
  2936. OldestLsn - Oldest Lsn of an update not written through stored for that page
  2937. Context1 - IrpContext
  2938. Context2 - Pointer to the pointer to the Restart Table
  2939. Return Value:
  2940. None
  2941. --*/
  2942. {
  2943. PDIRTY_PAGE_ENTRY PageEntry;
  2944. PRESTART_POINTERS DirtyPageTable = (PRESTART_POINTERS)Context2;
  2945. PSCB_NONPAGED NonpagedScb;
  2946. ULONG PageIndex;
  2947. DebugTrace( +1, Dbg, ("DirtyPageRoutine:\n") );
  2948. DebugTrace( 0, Dbg, ("FileObject = %08lx\n", FileObject) );
  2949. DebugTrace( 0, Dbg, ("FileOffset = %016I64x\n", *FileOffset) );
  2950. DebugTrace( 0, Dbg, ("Length = %08lx\n", Length) );
  2951. DebugTrace( 0, Dbg, ("OldestLsn = %016I64x\n", *OldestLsn) );
  2952. DebugTrace( 0, Dbg, ("Context2 = %08lx\n", Context2) );
  2953. //
  2954. // Get the Vcb out of the file object.
  2955. //
  2956. NonpagedScb = CONTAINING_RECORD( FileObject->SectionObjectPointer,
  2957. SCB_NONPAGED,
  2958. SegmentObject );
  2959. //
  2960. // We noop this call if the open attribute entry for this Scb is 0. We assume
  2961. // there was a clean volume checkpoint which cleared this field.
  2962. //
  2963. if (NonpagedScb->OpenAttributeTableIndex == 0 ) {
  2964. DebugTrace( -1, Dbg, ("DirtyPageRoutine -> VOID\n") );
  2965. return;
  2966. }
  2967. //
  2968. // Get a pointer to the entry we just allocated.
  2969. //
  2970. PageIndex = NtfsAllocateRestartTableIndex( DirtyPageTable, TRUE );
  2971. PageEntry = GetRestartEntryFromIndex( DirtyPageTable, PageIndex );
  2972. //
  2973. // Now fill in the Dirty Page Entry, except for the Lcns, because
  2974. // we are not allowed to take page faults now.
  2975. // Use the index for the in-memory table now. We will update
  2976. // this to the on-disk index back in CheckpointVolume.
  2977. //
  2978. PageEntry->TargetAttribute = NonpagedScb->OpenAttributeTableIndex;
  2979. ASSERT( NonpagedScb->OnDiskOatIndex != 0 );
  2980. PageEntry->LengthOfTransfer = Length;
  2981. //
  2982. // Put the Vcn (FileOffset) and OldestLsn into the page at this point. Note
  2983. // we don't want to put an Lsn into the table which is older than the current
  2984. // BaseLsn. Store it here for now and we will fix it up when we process the
  2985. // DiryPage table back in the checkpoint code.
  2986. //
  2987. if (NonpagedScb->Vcb->RestartVersion == 0) {
  2988. ((PDIRTY_PAGE_ENTRY_V0) PageEntry)->Vcn = FileOffset->QuadPart;
  2989. ((PDIRTY_PAGE_ENTRY_V0) PageEntry)->OldestLsn = *OldestLsn;
  2990. } else {
  2991. PageEntry->Vcn = FileOffset->QuadPart;
  2992. PageEntry->OldestLsn = *OldestLsn;
  2993. }
  2994. DebugTrace( -1, Dbg, ("DirtyPageRoutine -> VOID\n") );
  2995. return;
  2996. UNREFERENCED_PARAMETER( Context1 );
  2997. UNREFERENCED_PARAMETER( NewestLsn );
  2998. }
  2999. //
  3000. // Internal support routine
  3001. //
  3002. BOOLEAN
  3003. LookupLcns (
  3004. IN PIRP_CONTEXT IrpContext,
  3005. IN PSCB Scb,
  3006. IN VCN Vcn,
  3007. IN ULONG ClusterCount,
  3008. IN BOOLEAN MustBeAllocated,
  3009. OUT PLCN_UNALIGNED FirstLcn
  3010. )
  3011. /*++
  3012. Routine Description:
  3013. This routine looks up the Lcns for a range of Vcns, and stores them in
  3014. an output array. One Lcn is stored for each Vcn in the range, even
  3015. if the Lcns are contiguous.
  3016. Arguments:
  3017. Scb - Scb for stream on which lookup should occur.
  3018. Vcn - Start of range of Vcns to look up.
  3019. ClusterCount - Number of Vcns to look up.
  3020. MustBeAllocated - FALSE - if need not be allocated, and should check Mcb only
  3021. TRUE - if it must be allocated as far as caller knows (i.e.,
  3022. NtfsLookupAllocation also has checks)
  3023. FirstLcn - Pointer to storage for first Lcn. The caller must guarantee
  3024. that there is enough space to store ClusterCount Lcns.
  3025. Return Value:
  3026. BOOLEAN - TRUE if we found the clusters, FALSE otherwise.
  3027. --*/
  3028. {
  3029. BOOLEAN Allocated;
  3030. LONGLONG Clusters;
  3031. LCN Lcn;
  3032. ULONG i;
  3033. PAGED_CODE();
  3034. DebugTrace( +1, Dbg, ("LookupLcns:\n") );
  3035. DebugTrace( 0, Dbg, ("Scb = %08l\n", Scb) );
  3036. DebugTrace( 0, Dbg, ("Vcn = %016I64x\n", Vcn) );
  3037. DebugTrace( 0, Dbg, ("ClusterCount = %08l\n", ClusterCount) );
  3038. DebugTrace( 0, Dbg, ("FirstLcn = %08lx\n", FirstLcn) );
  3039. //
  3040. // Loop until we have looked up all of the clusters
  3041. //
  3042. while (ClusterCount != 0) {
  3043. if (MustBeAllocated) {
  3044. //
  3045. // Lookup the next run.
  3046. //
  3047. Allocated = NtfsLookupAllocation( IrpContext,
  3048. Scb,
  3049. Vcn,
  3050. &Lcn,
  3051. &Clusters,
  3052. NULL,
  3053. NULL );
  3054. ASSERT( Lcn != 0 );
  3055. //
  3056. // Raise if this case not met. Otherwise we could walk off the end
  3057. // of the LCN array.
  3058. //
  3059. if (!Allocated) {
  3060. return FALSE;
  3061. } else if (Lcn == 0) {
  3062. NtfsRaiseStatus( IrpContext, STATUS_FILE_CORRUPT_ERROR, NULL, Scb->Fcb );
  3063. }
  3064. } else {
  3065. Allocated = NtfsLookupNtfsMcbEntry( &Scb->Mcb, Vcn, &Lcn, &Clusters, NULL, NULL, NULL, NULL );
  3066. //
  3067. // If we are off the end of the Mcb, then set up to just return
  3068. // Li0 for as many Lcns as are being looked up.
  3069. //
  3070. if (!Allocated ||
  3071. (Lcn == UNUSED_LCN)) {
  3072. Lcn = 0;
  3073. Clusters = ClusterCount;
  3074. Allocated = FALSE;
  3075. }
  3076. }
  3077. //
  3078. // If we got as many clusters as we were looking for, then just
  3079. // take the number we were looking for.
  3080. //
  3081. if (Clusters > ClusterCount) {
  3082. Clusters = ClusterCount;
  3083. }
  3084. //
  3085. // Fill in the Lcns in the header.
  3086. //
  3087. for (i = 0; i < (ULONG)Clusters; i++) {
  3088. *(FirstLcn++) = Lcn;
  3089. if (Allocated) {
  3090. Lcn = Lcn + 1;
  3091. }
  3092. }
  3093. //
  3094. // Adjust loop variables for the number Lcns we just received.
  3095. //
  3096. Vcn = Vcn + Clusters;
  3097. ClusterCount -= (ULONG)Clusters;
  3098. }
  3099. DebugTrace( -1, Dbg, ("LookupLcns -> VOID\n") );
  3100. return TRUE;
  3101. }
  3102. VOID
  3103. InitializeNewTable (
  3104. IN ULONG EntrySize,
  3105. IN ULONG NumberEntries,
  3106. OUT PRESTART_POINTERS TablePointer
  3107. )
  3108. /*++
  3109. Routine Description:
  3110. This routine is called to allocate and initialize a new table when the
  3111. associated Restart Table is being allocated or extended.
  3112. Arguments:
  3113. EntrySize - Size of the table entries, in bytes.
  3114. NumberEntries - Number of entries to allocate for the table.
  3115. TablePointer - Returns a pointer to the table.
  3116. Return Value:
  3117. None
  3118. --*/
  3119. {
  3120. PRESTART_TABLE Table;
  3121. PULONG Entry;
  3122. ULONG Size;
  3123. ULONG Offset;
  3124. ASSERT( EntrySize != 0 );
  3125. //
  3126. // Calculate size of table to allocate.
  3127. //
  3128. Size = EntrySize * NumberEntries + sizeof(RESTART_TABLE);
  3129. //
  3130. // Allocate and zero out the table.
  3131. //
  3132. Table =
  3133. TablePointer->Table = NtfsAllocatePool( NonPagedPool, Size );
  3134. RtlZeroMemory( Table, Size );
  3135. //
  3136. // Initialize the table header.
  3137. //
  3138. Table->EntrySize = (USHORT)EntrySize;
  3139. Table->NumberEntries = (USHORT)NumberEntries;
  3140. Table->FreeGoal = MAXULONG;
  3141. Table->FirstFree = sizeof(RESTART_TABLE);
  3142. Table->LastFree = Table->FirstFree + (NumberEntries - 1) * EntrySize;
  3143. //
  3144. // Initialize the free list.
  3145. //
  3146. for (Entry = (PULONG)(Table + 1), Offset = sizeof(RESTART_TABLE) + EntrySize;
  3147. Entry < (PULONG)((PCHAR)Table + Table->LastFree);
  3148. Entry = (PULONG)((PCHAR)Entry + EntrySize), Offset += EntrySize) {
  3149. *Entry = Offset;
  3150. }
  3151. ASSERT_RESTART_TABLE(Table);
  3152. }
  3153. VOID
  3154. NtfsFreeRecentlyDeallocated (
  3155. IN PIRP_CONTEXT IrpContext,
  3156. IN PVCB Vcb,
  3157. IN PLSN BaseLsn,
  3158. IN ULONG CleanVolume
  3159. )
  3160. /*++
  3161. Routine Description:
  3162. Free up recently deallocated clusters for reuse
  3163. Arguments:
  3164. IrpContext -
  3165. Vcb - volume to clean up
  3166. BaseLsn - the lsn we're up to now in the logfile, used to determine what can be freed
  3167. and the new threshold for future frees
  3168. CleanVolume - if true the volume is being clean checkpointed and all the clusters can be freed
  3169. Return Value:
  3170. None
  3171. --*/
  3172. {
  3173. PDEALLOCATED_CLUSTERS Clusters;
  3174. BOOLEAN RemovedClusters = FALSE;
  3175. PAGED_CODE();
  3176. //
  3177. // Quick exit if the list is empty
  3178. //
  3179. if (IsListEmpty( &Vcb->DeallocatedClusterListHead ) || (Vcb->BitmapScb == NULL)) {
  3180. return;
  3181. }
  3182. NtfsAcquireExclusiveScb( IrpContext, Vcb->BitmapScb );
  3183. Clusters = (PDEALLOCATED_CLUSTERS)Vcb->DeallocatedClusterListHead.Blink;
  3184. //
  3185. // Now we want to check if we can release any of the clusters in the
  3186. // deallocated cluster arrays. We know we can look at the
  3187. // fields in the PriorDeallocatedClusters structure because they
  3188. // are never modified in the running system.
  3189. //
  3190. // We will continue from the oldest in the list list until
  3191. //
  3192. // 1) there are no more dealloc lists
  3193. // 2) there are no clusters in the dealloc list (it must be the only one at this point)
  3194. // 3) the lsn == 0 and we're dirty which means we're at the front
  3195. // 4) the lsn is newer in deallocated cluster list
  3196. //
  3197. try {
  3198. while ((!IsListEmpty( &Vcb->DeallocatedClusterListHead )) &&
  3199. (Clusters->ClusterCount > 0) &&
  3200. (((Clusters->Lsn.QuadPart != 0) && (BaseLsn->QuadPart > Clusters->Lsn.QuadPart)) ||
  3201. CleanVolume)) {
  3202. RemovedClusters = TRUE;
  3203. //
  3204. // For all deallocated during clean checkpoints and non-most recent
  3205. // ones during fuzzt ones:
  3206. // Remove all of the mappings in the Mcb. Protect this with
  3207. // a try-except.
  3208. //
  3209. try {
  3210. try {
  3211. ULONG i;
  3212. ULONGLONG StartingVcn;
  3213. ULONGLONG StartingLcn;
  3214. ULONGLONG ClusterCount;
  3215. for (i = 0; FsRtlGetNextLargeMcbEntry( &Clusters->Mcb, i, &StartingVcn, &StartingLcn, &ClusterCount ); i += 1) {
  3216. if (StartingVcn == StartingLcn) {
  3217. if (NtfsAddCachedRun( IrpContext,
  3218. Vcb,
  3219. StartingLcn,
  3220. ClusterCount,
  3221. RunStateFree ) <= 0) break;
  3222. }
  3223. }
  3224. } finally {
  3225. PDEALLOCATED_CLUSTERS NextClusters = (PDEALLOCATED_CLUSTERS)Clusters->Link.Blink;
  3226. //
  3227. // We are committed to freeing the clusters out of the PriorDeallocatedClusters
  3228. // in any case.
  3229. //
  3230. Vcb->DeallocatedClusters -= Clusters->ClusterCount;
  3231. //
  3232. // Move this cluster list out of the vcb
  3233. //
  3234. RemoveEntryList( &Clusters->Link );
  3235. #ifdef BENL_DBG
  3236. // KdPrint(( "NTFS: freeing dealloc clusters: 0x%x LSN: 0x%x\n", Clusters, Clusters->Lsn ));
  3237. #endif
  3238. //
  3239. // delete dynamic clusters lists / reset static ones
  3240. //
  3241. if ((Clusters != &Vcb->DeallocatedClusters1) && (Clusters != &Vcb->DeallocatedClusters2 )) {
  3242. FsRtlUninitializeLargeMcb( &Clusters->Mcb );
  3243. NtfsFreePool( Clusters );
  3244. } else {
  3245. Clusters->Link.Flink = NULL;
  3246. Clusters->ClusterCount = 0;
  3247. FsRtlResetLargeMcb( &Clusters->Mcb, TRUE );
  3248. }
  3249. ASSERT( Vcb->DeallocatedClusters >= 0 );
  3250. Clusters = NextClusters;
  3251. }
  3252. } except( NtfsCatchOutOfMemoryExceptionFilter( IrpContext, GetExceptionInformation() )) {
  3253. //
  3254. // Keep going even if out of memory
  3255. //
  3256. NtfsMinimumExceptionProcessing( IrpContext );
  3257. NOTHING;
  3258. }
  3259. }
  3260. //
  3261. // If we removed any clusters on a fuzzy checkpoint lets make a new active one so
  3262. // the current active one can be cleaned up eventually
  3263. // On a clean checkpoint if we removed all the nodes add a blank one back
  3264. //
  3265. if (!CleanVolume) {
  3266. ASSERT( !IsListEmpty( &Vcb->DeallocatedClusterListHead ) );
  3267. if (RemovedClusters && (Clusters->ClusterCount > 0)) {
  3268. Clusters = NtfsGetDeallocatedClusters( IrpContext, Vcb );
  3269. }
  3270. } else if (IsListEmpty( &Vcb->DeallocatedClusterListHead)) {
  3271. #ifdef BENL_DBG
  3272. // KdPrint(( "NTFS: adding extra dealloc clusters after clean chkpt: 0x%x\n", &Vcb->DeallocatedClusters1 ));
  3273. #endif
  3274. ASSERT( Vcb->DeallocatedClusters1.Link.Flink == NULL );
  3275. Vcb->DeallocatedClusters1.Lsn.QuadPart = 0;
  3276. InsertHeadList( &Vcb->DeallocatedClusterListHead, &Vcb->DeallocatedClusters1.Link );
  3277. }
  3278. } finally {
  3279. NtfsReleaseScb( IrpContext, Vcb->BitmapScb );
  3280. }
  3281. }
  3282. VOID
  3283. NtfsCleanupFailedTransaction (
  3284. IN PIRP_CONTEXT IrpContext
  3285. )
  3286. /*++
  3287. Routine Description:
  3288. This routine is called to cleanup the IrpContext and free structures
  3289. in the event a transaction fails to commit or abort.
  3290. Arguments:
  3291. Return Value:
  3292. None
  3293. --*/
  3294. {
  3295. PUSN_FCB ThisUsn;
  3296. PUSN_FCB LastUsn;
  3297. PAGED_CODE();
  3298. //
  3299. // Clear the flags indicating a transaction is underway.
  3300. //
  3301. ClearFlag( IrpContext->Flags,
  3302. IRP_CONTEXT_FLAG_WROTE_LOG | IRP_CONTEXT_FLAG_RAISED_STATUS | IRP_CONTEXT_FLAG_MODIFIED_BITMAP );
  3303. //
  3304. // Make sure the recently deallocated queue is empty.
  3305. //
  3306. try {
  3307. if (!IsListEmpty( &IrpContext->RecentlyDeallocatedQueue )) {
  3308. NtfsDeallocateRecordsComplete( IrpContext );
  3309. }
  3310. } except (FsRtlIsNtstatusExpected( GetExceptionCode() ) ?
  3311. EXCEPTION_EXECUTE_HANDLER :
  3312. EXCEPTION_CONTINUE_SEARCH) {
  3313. NOTHING;
  3314. }
  3315. //
  3316. // Show that we haven't deallocated any clusters.
  3317. //
  3318. IrpContext->DeallocatedClusters = 0;
  3319. IrpContext->FreeClusterChange = 0;
  3320. //
  3321. // Don't rollback any size changes.
  3322. //
  3323. try {
  3324. NtfsUpdateScbSnapshots( IrpContext );
  3325. } except (FsRtlIsNtstatusExpected( GetExceptionCode() ) ?
  3326. EXCEPTION_EXECUTE_HANDLER :
  3327. EXCEPTION_CONTINUE_SEARCH) {
  3328. NOTHING;
  3329. }
  3330. //
  3331. // Make sure the last restart area is zeroed.
  3332. //
  3333. IrpContext->LastRestartArea.QuadPart = 0;
  3334. //
  3335. // Pull the Usn Fcb fields.
  3336. //
  3337. ThisUsn = &IrpContext->Usn;
  3338. try {
  3339. do {
  3340. if (ThisUsn->CurrentUsnFcb != NULL) {
  3341. PFCB UsnFcb = ThisUsn->CurrentUsnFcb;
  3342. NtfsLockFcb( IrpContext, UsnFcb );
  3343. //
  3344. // If any rename flags are part of the new reasons then
  3345. // make sure to look the name up again.
  3346. //
  3347. if (FlagOn( ThisUsn->NewReasons,
  3348. USN_REASON_RENAME_NEW_NAME | USN_REASON_RENAME_OLD_NAME )) {
  3349. ClearFlag( UsnFcb->FcbState, FCB_STATE_VALID_USN_NAME );
  3350. }
  3351. //
  3352. // Now restore the reason and source info fields.
  3353. //
  3354. ClearFlag( UsnFcb->FcbUsnRecord->UsnRecord.Reason,
  3355. ThisUsn->NewReasons );
  3356. if (UsnFcb->FcbUsnRecord->UsnRecord.Reason == 0) {
  3357. UsnFcb->FcbUsnRecord->UsnRecord.SourceInfo = 0;
  3358. } else {
  3359. SetFlag( UsnFcb->FcbUsnRecord->UsnRecord.SourceInfo,
  3360. ThisUsn->RemovedSourceInfo );
  3361. }
  3362. NtfsUnlockFcb( IrpContext, UsnFcb );
  3363. //
  3364. // Zero out the structure.
  3365. //
  3366. ThisUsn->CurrentUsnFcb = NULL;
  3367. ThisUsn->NewReasons = 0;
  3368. ThisUsn->RemovedSourceInfo = 0;
  3369. ThisUsn->UsnFcbFlags = 0;
  3370. if (ThisUsn != &IrpContext->Usn) {
  3371. LastUsn->NextUsnFcb = ThisUsn->NextUsnFcb;
  3372. NtfsFreePool( ThisUsn );
  3373. ThisUsn = LastUsn;
  3374. }
  3375. }
  3376. if (ThisUsn->NextUsnFcb == NULL) { break; }
  3377. LastUsn = ThisUsn;
  3378. ThisUsn = ThisUsn->NextUsnFcb;
  3379. } while (TRUE);
  3380. } except (FsRtlIsNtstatusExpected( GetExceptionCode() ) ?
  3381. EXCEPTION_EXECUTE_HANDLER :
  3382. EXCEPTION_CONTINUE_SEARCH) {
  3383. NOTHING;
  3384. }
  3385. //
  3386. // Don't wake any waiters for this failed operation.
  3387. //
  3388. try {
  3389. if (IrpContext->CheckNewLength != NULL) {
  3390. NtfsProcessNewLengthQueue( IrpContext, TRUE );
  3391. }
  3392. } except (FsRtlIsNtstatusExpected( GetExceptionCode() ) ?
  3393. EXCEPTION_EXECUTE_HANDLER :
  3394. EXCEPTION_CONTINUE_SEARCH) {
  3395. NOTHING;
  3396. }
  3397. //
  3398. // Remove this from the transaction table if present.
  3399. //
  3400. if (IrpContext->TransactionId != 0) {
  3401. NtfsAcquireExclusiveRestartTable( &IrpContext->Vcb->TransactionTable,
  3402. TRUE );
  3403. NtfsFreeRestartTableIndex( &IrpContext->Vcb->TransactionTable,
  3404. IrpContext->TransactionId );
  3405. NtfsReleaseRestartTable( &IrpContext->Vcb->TransactionTable );
  3406. IrpContext->TransactionId = 0;
  3407. }
  3408. IrpContext->ExceptionStatus = STATUS_SUCCESS;
  3409. return;
  3410. }
  3411. //
  3412. // Local support routine
  3413. //
  3414. LONG
  3415. NtfsCatchOutOfMemoryExceptionFilter (
  3416. IN PIRP_CONTEXT IrpContext,
  3417. IN PEXCEPTION_POINTERS ExceptionPointer
  3418. )
  3419. /*++
  3420. Routine Description:
  3421. Exception filter for out of memory errors. This will swallow 0xC0000009A's and let
  3422. all other exceptions filter on
  3423. Arguments:
  3424. IrpContext - IrpContext
  3425. ExceptionPointer - Pointer to the exception context.
  3426. Return Value:
  3427. Exception status - EXCEPTION_CONTINUE_SEARCH if we want to raise to another handler,
  3428. EXCEPTION_EXECUTE_HANDLER if we plan to proceed on.
  3429. --*/
  3430. {
  3431. UNREFERENCED_PARAMETER( IrpContext );
  3432. if (ExceptionPointer->ExceptionRecord->ExceptionCode != STATUS_INSUFFICIENT_RESOURCES) {
  3433. return EXCEPTION_CONTINUE_SEARCH;
  3434. }
  3435. return EXCEPTION_EXECUTE_HANDLER;
  3436. }
  3437. //
  3438. // Local support routine
  3439. //
  3440. LONG
  3441. NtfsCheckpointExceptionFilter (
  3442. IN PIRP_CONTEXT IrpContext,
  3443. IN PEXCEPTION_POINTERS ExceptionPointer,
  3444. IN NTSTATUS ExceptionCode
  3445. )
  3446. {
  3447. //
  3448. // Swallow all expected errors if this is a dismount doing a log file full.
  3449. //
  3450. if ((FlagOn( IrpContext->State, IRP_CONTEXT_STATE_DISMOUNT_LOG_FLUSH )) &&
  3451. (FsRtlIsNtstatusExpected( ExceptionCode ))) {
  3452. return EXCEPTION_EXECUTE_HANDLER;
  3453. } else {
  3454. return EXCEPTION_CONTINUE_SEARCH;
  3455. }
  3456. UNREFERENCED_PARAMETER( ExceptionPointer );
  3457. }