Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

11424 lines
382 KiB

  1. /*++
  2. Copyright (c) 1997-1999 Microsoft Corporation
  3. Module Name:
  4. journal.c
  5. Abstract:
  6. This module contains routines to process the NTFS Volume Journal for the
  7. File Replication service. It uses a single thread with an I/O completion
  8. port to post reads to all volume journals we need to monitor.
  9. As USN buffers are filled they a queued to a JournalProcessQueue for
  10. further processing. The Journal Read Thread gets a free buffer from
  11. the free list and posts another read to the volume journal.
  12. A thread pool processes the USN buffers from the JournalprocessQueue.
  13. Author:
  14. David A. Orbits (davidor) 6-Apr-1997
  15. Environment:
  16. User Mode Service
  17. Revision History:
  18. // JOURNAL RECORD FORMAT
  19. //
  20. // The initial Major.Minor version of the Usn record will be 1.0.
  21. // In general, the MinorVersion may be changed if fields are added
  22. // to this structure in such a way that the previous version of the
  23. // software can still correctly the fields it knows about. The
  24. // MajorVersion should only be changed if the previous version of
  25. // any software using this structure would incorrectly handle new
  26. // records due to structure changes.
  27. //
  28. // see \nt\public\sdk\inc\ntioapi.h for the USN_RECORD declaration.
  29. //
  30. #define USN_REASON_DATA_OVERWRITE (0x00000001)
  31. #define USN_REASON_DATA_EXTEND (0x00000002)
  32. #define USN_REASON_DATA_TRUNCATION (0x00000004)
  33. #define USN_REASON_NAMED_DATA_OVERWRITE (0x00000010)
  34. #define USN_REASON_NAMED_DATA_EXTEND (0x00000020)
  35. #define USN_REASON_NAMED_DATA_TRUNCATION (0x00000040)
  36. #define USN_REASON_FILE_CREATE (0x00000100)
  37. #define USN_REASON_FILE_DELETE (0x00000200)
  38. #define USN_REASON_EA_CHANGE (0x00000400)
  39. #define USN_REASON_SECURITY_CHANGE (0x00000800)
  40. #define USN_REASON_RENAME_OLD_NAME (0x00001000) // rename
  41. #define USN_REASON_RENAME_NEW_NAME (0x00002000)
  42. #define USN_REASON_INDEXABLE_CHANGE (0x00004000)
  43. #define USN_REASON_BASIC_INFO_CHANGE (0x00008000)
  44. #define USN_REASON_HARD_LINK_CHANGE (0x00010000)
  45. #define USN_REASON_COMPRESSION_CHANGE (0x00020000)
  46. #define USN_REASON_ENCRYPTION_CHANGE (0x00040000)
  47. #define USN_REASON_OBJECT_ID_CHANGE (0x00080000)
  48. #define USN_REASON_REPARSE_POINT_CHANGE (0x00100000)
  49. #define USN_REASON_STREAM_CHANGE (0x00200000) // named streame cre, del or ren.
  50. #define USN_REASON_CLOSE (0x80000000)
  51. --*/
  52. #define UNICODE 1
  53. #define _UNICODE 1
  54. #include <ntreppch.h>
  55. #pragma hdrstop
  56. #undef DEBSUB
  57. #define DEBSUB "journal:"
  58. #include <frs.h>
  59. #include <genhash.h>
  60. #include <tablefcn.h>
  61. #include <eventlog.h>
  62. #include <perrepsr.h>
  63. #pragma warning( disable:4102) // unreferenced label
  64. //
  65. // The default for Journal Max Size now comes from the registry.
  66. #define JRNL_DEFAULT_ALLOC_DELTA (1*1024*1024)
  67. #define JRNL_USN_SAVE_POINT_INTERVAL (16*1024)
  68. #define JRNL_CLEAN_WRITE_FILTER_INTERVAL (60*1000) /* once a minute */
  69. #define NumberOfJounalBuffers 3
  70. #define FRS_CANCEL_JOURNAL_READ 0xFFFFFFFF
  71. #define FRS_PAUSE_JOURNAL_READ 0xFFFFFFF0
  72. //
  73. // Every 'VSN_SAVE_INTERVAL' VSNs that are handed out, save the state in the
  74. // config record. On restart we take the largest value and add
  75. // 2*(VSN_SAVE_INTERVAL+1) to it so if a crash occurred we ensure that it
  76. // never goes backwards.
  77. //
  78. // A Vsn value of 0 means there is no Vsn. This convention is required
  79. // by FrsPendingInVVector().
  80. //
  81. // MUST BE Power of 2.
  82. #define VSN_SAVE_INTERVAL 0xFF
  83. #define VSN_RESTART_INCREMENT (2*(VSN_SAVE_INTERVAL+1))
  84. //
  85. // Deactivate the Volume Monitor Entry by setting IoActive False, pulling
  86. // it off the _Queue and queueing it to the VolumeMonitorStopQueue.
  87. // Also store an error status. This code assumes you have already ACQUIRED
  88. // THE LOCK ON the VolumeMonitorQueue.
  89. //
  90. #define VmeDeactivate(_Queue, _pVme, _WStatus) \
  91. FrsRtlRemoveEntryQueueLock(_Queue, &_pVme->ListEntry); \
  92. _pVme->IoActive = FALSE; \
  93. _pVme->WStatus = _WStatus; \
  94. /*_pVme->ActiveReplicas -= 1; */ \
  95. DPRINT2(4, "++ vmedeactivate -- onto stop queue %ws (%08x)\n", \
  96. _pVme->FSVolInfo.VolumeLabel, _pVme); \
  97. FrsRtlInsertTailQueue(&VolumeMonitorStopQueue, &_pVme->ListEntry); \
  98. ReleaseVmeRef(_pVme);
  99. //
  100. // The Journal free buffer queue holds the free buffers for journal reads.
  101. //
  102. FRS_QUEUE JournalFreeQueue;
  103. //
  104. // The Journal process queue holds the list of journal buffers with
  105. // data to process.
  106. //
  107. FRS_QUEUE JournalProcessQueue;
  108. //
  109. // The Journal I/O completion port. We keep a read outstanding on each
  110. // NTFS volume monitored.
  111. //
  112. HANDLE JournalCompletionPort;
  113. //
  114. // The handle to the Journal read thread.
  115. //
  116. HANDLE JournalReadThreadHandle = NULL;
  117. //
  118. // Set this flag to stop any further issuing of journal reads.
  119. //
  120. volatile BOOL KillJournalThreads = FALSE;
  121. //
  122. // This is the volume monitor queue. The Journal read thread waits until
  123. // this queue goes non-empty before it waits on the completion port. This
  124. // way it knows the completion port exists without having to poll.
  125. //
  126. FRS_QUEUE VolumeMonitorQueue;
  127. //
  128. // When I/O is Stoped on a given journal the Journal read thread places
  129. // the volume monitor entry on the Stop queue.
  130. //
  131. FRS_QUEUE VolumeMonitorStopQueue;
  132. //
  133. // This is the control queue for all the volume monitor entry change order
  134. // queues.
  135. //
  136. FRS_QUEUE FrsVolumeLayerCOList;
  137. FRS_QUEUE FrsVolumeLayerCOQueue;
  138. //
  139. // This is the expected version number from the USN journal.
  140. //
  141. USHORT ConfigUsnMajorVersion = 2;
  142. //
  143. // This is the count of outstanding journal read requests.
  144. //
  145. ULONG JournalActiveIoRequests = 0;
  146. //
  147. // Change order delay in aging cache. (milliseconds)
  148. //
  149. ULONG ChangeOrderAgingDelay;
  150. //
  151. // This lock is held by JrnlSetReplicaState() when moving a replica
  152. // between lists.
  153. //
  154. CRITICAL_SECTION JrnlReplicaStateLock;
  155. //
  156. // Lock to protect the child lists in the Filter Table. (must be pwr of 2)
  157. // Instead of paying the overhead of having one per node we just use an array
  158. // to help reduce contention. We use the ReplicaNumber masked by the lock
  159. // table size as the index.
  160. //
  161. // Acquire the lock on the ReplicaSet Filter table Child List before
  162. // inserting or removing a child from the list.
  163. //
  164. CRITICAL_SECTION JrnlFilterTableChildLock[NUMBER_FILTER_TABLE_CHILD_LOCKS];
  165. //
  166. // The list of all Replica Structs active, stopped and faulted.
  167. //
  168. extern FRS_QUEUE ReplicaListHead;
  169. extern FRS_QUEUE ReplicaStoppedListHead;
  170. extern FRS_QUEUE ReplicaFaultListHead;
  171. //
  172. // This is used to init our new value for FrsVsn.
  173. //
  174. extern ULONGLONG MaxPartnerClockSkew;
  175. //
  176. // Global sequence number. Inited here with first Vme VSN.
  177. //
  178. extern CRITICAL_SECTION GlobSeqNumLock;
  179. extern ULONGLONG GlobSeqNum;
  180. //
  181. // The table below describes what list the Replica struct should be on for
  182. // a given state as well as the state name.
  183. //
  184. REPLICA_SERVICE_STATE ReplicaServiceState[] = {
  185. {NULL, "ALLOCATED"},
  186. {&ReplicaListHead, "INITIALIZING"},
  187. {&ReplicaListHead, "STARTING"},
  188. {&ReplicaListHead, "ACTIVE"},
  189. {&ReplicaListHead, "PAUSE1"},
  190. {&ReplicaListHead, "PAUSING (2)"},
  191. {&ReplicaListHead, "PAUSED"},
  192. {&ReplicaListHead, "STOPPING"},
  193. {&ReplicaStoppedListHead, "STOPPED"},
  194. {&ReplicaFaultListHead, "ERROR"},
  195. {&ReplicaFaultListHead, "JRNL_WRAP_ERROR"},
  196. {NULL, "REPLICA_DELETED"},
  197. {&ReplicaFaultListHead, "MISMATCHED_VOLUME_SERIAL_NO"},
  198. {&ReplicaFaultListHead, "MISMATCHED_REPLICA_ROOT_OBJECT_ID"},
  199. {&ReplicaFaultListHead, "MISMATCHED_REPLICA_ROOT_FILE_ID"},
  200. {&ReplicaFaultListHead, "MISMATCHED_JOURNAL_ID"}
  201. };
  202. //
  203. // The following struct is used to encapsulate the context of a change
  204. // order request so it can be passed as a context parameter in an
  205. // enumerated call.
  206. //
  207. typedef struct _CHANGE_ORDER_PARAMETERS_ {
  208. PREPLICA OriginalReplica; // Original Replica Set
  209. PREPLICA NewReplica; // The New Replica set in the case of a rename.
  210. ULONGLONG NewParentFid; // The new parent FID in case of a rename.
  211. ULONG NewLocationCmd; // MovDir, MovRs, ...
  212. PUSN_RECORD UsnRecord; // Usn Record that triggered the change order
  213. // creation (i.e. the operation on the root of the subtree).
  214. PFILTER_TABLE_ENTRY OrigParentFilterEntry; // Original parent filter entry of root filter entry
  215. PFILTER_TABLE_ENTRY NewParentFilterEntry; // Current/New parent filter entry of root filter entry
  216. } CHANGE_ORDER_PARAMETERS, *PCHANGE_ORDER_PARAMETERS;
  217. typedef struct _OP_FIELDS_ {
  218. unsigned Op1 : 4;
  219. unsigned Op2 : 4;
  220. unsigned Op3 : 4;
  221. unsigned Op4 : 4;
  222. unsigned Op5 : 4;
  223. unsigned Op6 : 4;
  224. unsigned Op7 : 4;
  225. unsigned Op8 : 4;
  226. } OP_FIELDS, *POP_FIELDS;
  227. typedef struct _CO_LOCATION_CONTROL_CMD_ {
  228. union {
  229. OP_FIELDS OpFields;
  230. ULONG UlongOpFields;
  231. } u1;
  232. } CO_LOCATION_CONTROL_CMD;
  233. #define OpInval 0 // Invalid op (only check for Op1, else done).
  234. #define OpEvap 1 // Evaporate the change order
  235. #define OpNRs 2 // update New Replica Set and New Directory.
  236. #define OpNDir 3 // Update New Directory
  237. #define OpNSt 4 // Update New State stored in next nibble.
  238. #define NSCre CO_LOCATION_CREATE // Create a File or Dir (New FID Generated)
  239. #define NSDel CO_LOCATION_DELETE // Delete a file or Dir (FID retired)
  240. #define NSMovIn CO_LOCATION_MOVEIN // Rename into a R.S.
  241. #define NSMovIn2 CO_LOCATION_MOVEIN2 // Rename into a R.S. from a prev MOVEOUT
  242. #define NSMovOut CO_LOCATION_MOVEOUT // Rename out of any R.S.
  243. #define NSMovRs CO_LOCATION_MOVERS // Rename from one R.S. to another R.S.
  244. #define NSMovDir CO_LOCATION_MOVEDIR // Rename from one dir to another (Same R.S.)
  245. #define NSMax CO_LOCATION_NUM_CMD // No prior Location cmd. Prior change
  246. // Order had a content cmd.
  247. #define NSNoLocationCmd CO_LOCATION_NO_CMD
  248. PCHAR CoLocationNames[]= {"Create" , "Delete", "Movein" , "Movein2",
  249. "Moveout", "Movers", "MoveDir", "NoCmd"};
  250. //
  251. // The following dispatch table specifies what operations are performed when
  252. // a second change arrives for a given FID and a prior change order is still
  253. // pending. The states correspond to the change order location command that
  254. // is to be executed by the update process. Each entry in the dispatch table
  255. // is a ULONG composed of up to 8 operation nibbles which are executed in a loop.
  256. // The operations could evaporate the change order (e.g. a create followed by
  257. // a delete. The create was pending and the delete came in so just blow off
  258. // the change order. The operation could update the parent directory or the
  259. // replica set the directory lives in, or the location command (and thus the
  260. // state) that is to be performed. The MovIn2 state is not a unique input,
  261. // rather it is a special state that lets us remember there was a prior MovOut
  262. // done so if the MovIn2 is followed by a Del or a MovOut we know there is still
  263. // work to be done in the database so we can't evaporate the change order.
  264. // See note (a) below.
  265. //
  266. CO_LOCATION_CONTROL_CMD ChangeOrderLocationStateTable[NSMax+1][NSMax] = {
  267. // Followed by Second Op On Same Fid
  268. //
  269. // Cre Del MovIn MovIn2 MovOut MovRs MovDir
  270. // First
  271. // Op On
  272. // Fid
  273. //Cre
  274. {{0}, {OpEvap}, {0}, {0}, {OpEvap }, {OpNRs}, {OpNDir}},
  275. //Del
  276. {{0}, {0}, {0}, {0}, {0}, {0}, {0}},
  277. //MovIn
  278. {{0}, {OpEvap}, {0}, {0}, {OpEvap }, {OpNRs}, {OpNDir}},
  279. //MovIn2(a)
  280. {{0}, {OpNSt,NSDel}, {0}, {0}, {OpNSt,NSMovOut}, {OpNRs}, {OpNDir}},
  281. //MovOut
  282. {{0}, {0}, {OpNRs,OpNSt,NSMovIn2},
  283. {0}, {0}, {0}, {0}},
  284. //MovRs
  285. {{0}, {OpNSt,NSDel}, {0}, {0}, {OpNSt,NSMovOut}, {OpNRs}, {OpNDir}},
  286. //MovDir
  287. {{0}, {OpNSt,NSDel}, {0}, {0}, {OpNSt,NSMovOut}, {OpNRs,OpNSt,NSMovRs}, {OpNDir}},
  288. //<NONE>
  289. {{OpNRs, OpNSt,NSCre},
  290. {OpNSt,NSDel}, {OpNRs,OpNSt,NSMovIn},
  291. {0}, {OpNSt,NSMovOut}, {OpNRs,OpNSt,NSMovRs}, {OpNDir,OpNSt,NSMovDir}}
  292. };
  293. // (a) The MovIn2 state is artificially introduced to deal with the sequence
  294. // of MovOut followed by a MovIn. There are two problems here. One is that
  295. // many changes could have happened to the file or dir while it was outside
  296. // the R.S. since we were not monitoring it. Consequently the update process
  297. // must do a complete evaluation of the the file/dir properties so we don't
  298. // fail to replicate some change. The second problem is that in the normal
  299. // case a MovIn followed by either a delete or a MovOut results in evaporating
  300. // the change order. However if a MovOut has occurred in the past followed
  301. // by a MovIn we cannot assume that the file or Dir was never in the R.S.
  302. // to begin with. Consider the sequence of MovOut, MovIn, Del. Without the
  303. // MovIn2 state the MovIn followed by Del would result in evaporating the
  304. // change order so the file or dir would be still left in the database.
  305. // By transitioning to the MovIn2 state we go to the Del state when we see
  306. // the Delete so we can remove the entry from the database. Similarly once
  307. // in the MovIn2 state if we see a MovOut then we go to the MovOut state
  308. // rather than evaporating the change order since we still have to update
  309. // the database with the MovOut.
  310. //
  311. // Note: think about a similar problem where the file filter string changes
  312. // and a file is touched so a create CO is generated. If the file is
  313. // then deleted the CO is evaporated. This means that a del CO will
  314. // not be propagated so the file is deleted everywhere. Do we need
  315. // a Cre2 CO analogous to the MovIn2 state?
  316. typedef
  317. ULONG
  318. (NTAPI *PJRNL_FILTER_ENUM_ROUTINE) (
  319. PGENERIC_HASH_TABLE Table,
  320. PVOID Buffer,
  321. PVOID Context
  322. );
  323. LONG
  324. JrnlGetFileCoLocationCmd(
  325. PVOLUME_MONITOR_ENTRY pVme,
  326. IN PUSN_RECORD UsnRecord,
  327. OUT PFILTER_TABLE_ENTRY *PrevParentFilterEntry,
  328. OUT PFILTER_TABLE_ENTRY *CurrParentFilterEntry
  329. );
  330. ULONG
  331. JrnlEnterFileChangeOrder(
  332. IN PUSN_RECORD UsnRecord,
  333. IN ULONG LocationCmd,
  334. IN PFILTER_TABLE_ENTRY OldParentFilterEntry,
  335. IN PFILTER_TABLE_ENTRY NewParentFilterEntry
  336. );
  337. PCHANGE_ORDER_ENTRY
  338. JrnlCreateCo(
  339. IN PREPLICA Replica,
  340. IN PULONGLONG Fid,
  341. IN PULONGLONG ParentFid,
  342. IN PUSN_RECORD UsnRecord,
  343. IN BOOL IsDirectory,
  344. IN PWCHAR FileName,
  345. IN USHORT Length
  346. );
  347. BOOL
  348. JrnlMergeCoTest(
  349. IN PVOLUME_MONITOR_ENTRY pVme,
  350. IN PUNICODE_STRING UFileName,
  351. IN PULONGLONG ParentFid,
  352. IN ULONG StreamLastMergeSeqNum
  353. );
  354. VOID
  355. JrnlUpdateNst(
  356. IN PVOLUME_MONITOR_ENTRY pVme,
  357. IN PUNICODE_STRING UFileName,
  358. IN PULONGLONG ParentFid,
  359. IN ULONG StreamSequenceNumber
  360. );
  361. VOID
  362. JrnlFilterUpdate(
  363. IN PREPLICA CurrentReplica,
  364. IN PUSN_RECORD UsnRecord,
  365. IN ULONG LocationCmd,
  366. IN PFILTER_TABLE_ENTRY OldParentFilterEntry,
  367. IN PFILTER_TABLE_ENTRY NewParentFilterEntry
  368. );
  369. ULONG
  370. JrnlProcessSubTree(
  371. IN PFILTER_TABLE_ENTRY RootFilterEntry,
  372. IN PCHANGE_ORDER_PARAMETERS Cop
  373. );
  374. ULONG
  375. JrnlProcessSubTreeEntry(
  376. PGENERIC_HASH_TABLE Table,
  377. PVOID Buffer,
  378. PVOID Context
  379. );
  380. ULONG
  381. JrnlUpdateChangeOrder(
  382. IN PCHANGE_ORDER_ENTRY ChangeOrder,
  383. IN PREPLICA NewReplica,
  384. IN ULONGLONG NewParentFid,
  385. IN ULONG NewLocationCmd,
  386. IN PUSN_RECORD UsnRecord
  387. );
  388. ULONG
  389. JrnlAddFilterEntryFromUsn(
  390. IN PREPLICA Replica,
  391. IN PUSN_RECORD UsnRecord,
  392. OUT PFILTER_TABLE_ENTRY *RetFilterEntry
  393. );
  394. ULONG
  395. JrnlAddFilterEntry(
  396. IN PREPLICA Replica,
  397. IN PFILTER_TABLE_ENTRY FilterEntry,
  398. OUT PFILTER_TABLE_ENTRY *RetFilterEntry,
  399. IN BOOL Replace
  400. );
  401. ULONG
  402. JrnlDeleteDirFilterEntry(
  403. IN PGENERIC_HASH_TABLE FilterTable,
  404. IN PULONGLONG DFileID,
  405. IN PFILTER_TABLE_ENTRY ArgFilterEntry
  406. );
  407. ULONG
  408. JrnlGetPathAndLevel(
  409. IN PGENERIC_HASH_TABLE FilterTable,
  410. IN PLONGLONG StartDirFileID,
  411. OUT PULONG Level
  412. );
  413. ULONG
  414. JrnlCommand(
  415. PCOMMAND_PACKET CmdPkt
  416. );
  417. ULONG
  418. JrnlPrepareService1(
  419. PREPLICA Replica
  420. );
  421. ULONG
  422. JrnlPrepareService2(
  423. IN PTHREAD_CTX ThreadCtx,
  424. IN PREPLICA Replica
  425. );
  426. ULONG
  427. JrnlInitOneReplicaSet(
  428. PCOMMAND_PACKET CmdPkt
  429. );
  430. ULONG
  431. JrnlCleanOutReplicaSet(
  432. PREPLICA Replica
  433. );
  434. JET_ERR
  435. JrnlInsertParentEntry(
  436. IN PTHREAD_CTX ThreadCtx,
  437. IN PTABLE_CTX TableCtx,
  438. IN PVOID Record,
  439. IN PVOID Context
  440. );
  441. ULONG_PTR
  442. JrnlFilterLinkChild (
  443. PGENERIC_HASH_TABLE Table,
  444. PVOID Buffer,
  445. PVOID Context
  446. );
  447. ULONG_PTR
  448. JrnlFilterLinkChildNoError (
  449. PGENERIC_HASH_TABLE Table,
  450. PVOID Buffer,
  451. PVOID Context
  452. );
  453. ULONG
  454. JrnlFilterUnlinkChild (
  455. PGENERIC_HASH_TABLE Table,
  456. PVOID Buffer,
  457. PVOID Context
  458. );
  459. ULONG_PTR
  460. JrnlFilterGetRoot (
  461. PGENERIC_HASH_TABLE Table,
  462. PVOID Buffer,
  463. PVOID Context
  464. );
  465. ULONG
  466. JrnlSubTreePrint (
  467. PGENERIC_HASH_TABLE Table,
  468. PVOID Buffer,
  469. PVOID Context
  470. );
  471. #if 0
  472. ULONG
  473. JrnlCheckStartFailures(
  474. PFRS_QUEUE Queue
  475. );
  476. #endif
  477. ULONG
  478. JrnlOpen(
  479. IN PREPLICA Replica,
  480. OUT PVOLUME_MONITOR_ENTRY *pVme,
  481. PCONFIG_TABLE_RECORD ConfigRecord
  482. );
  483. ULONG
  484. JrnlSubmitReadThreadRequest(
  485. IN PVOLUME_MONITOR_ENTRY pVme,
  486. IN ULONG Request,
  487. IN ULONG NewState
  488. );
  489. ULONG
  490. JrnlShutdownSingleReplica(
  491. IN PREPLICA Replica,
  492. IN BOOL HaveLock
  493. );
  494. ULONG
  495. JrnlCloseVme(
  496. IN PVOLUME_MONITOR_ENTRY pVme
  497. );
  498. ULONG
  499. JrnlCloseAll(
  500. VOID
  501. );
  502. ULONG
  503. JrnlClose(
  504. IN HANDLE VolumeHandle
  505. );
  506. DWORD
  507. WINAPI
  508. JournalReadThread(
  509. IN LPVOID Context
  510. );
  511. ULONG
  512. JrnlGetEndOfJournal(
  513. IN PVOLUME_MONITOR_ENTRY pVme,
  514. OUT USN *EndOfJournal
  515. );
  516. NTSTATUS
  517. FrsIssueJournalAsyncRead(
  518. IN PJBUFFER Jbuff,
  519. IN PVOLUME_MONITOR_ENTRY pVme
  520. );
  521. ULONG
  522. JrnlEnumerateFilterTreeBU(
  523. PGENERIC_HASH_TABLE Table,
  524. PFILTER_TABLE_ENTRY FilterEntry,
  525. PJRNL_FILTER_ENUM_ROUTINE Function,
  526. PVOID Context
  527. );
  528. ULONG
  529. JrnlEnumerateFilterTreeTD(
  530. PGENERIC_HASH_TABLE Table,
  531. PFILTER_TABLE_ENTRY FilterEntry,
  532. PJRNL_FILTER_ENUM_ROUTINE Function,
  533. PVOID Context
  534. );
  535. VOID
  536. JrnlHashEntryFree(
  537. PGENERIC_HASH_TABLE Table,
  538. PVOID Buffer
  539. );
  540. BOOL
  541. JrnlCompareFid(
  542. PVOID Buf1,
  543. PVOID Buf2,
  544. ULONG Length
  545. );
  546. ULONG
  547. JrnlHashCalcFid (
  548. PVOID Buf,
  549. ULONG Length
  550. );
  551. ULONG
  552. NoHashBuiltin (
  553. PVOID Buf,
  554. ULONG Length
  555. );
  556. BOOL
  557. JrnlCompareGuid(
  558. PVOID Buf1,
  559. PVOID Buf2,
  560. ULONG Length
  561. );
  562. ULONG
  563. JrnlHashCalcGuid (
  564. PVOID Buf,
  565. ULONG Length
  566. );
  567. ULONG
  568. JrnlHashCalcUsn (
  569. PVOID Buf,
  570. ULONG Length
  571. );
  572. VOID
  573. CalcHashFidAndName(
  574. IN PUNICODE_STRING Name,
  575. IN PULONGLONG Fid,
  576. OUT PULONGLONG HashValue
  577. );
  578. ULONG
  579. JrnlCleanWriteFilter(
  580. PCOMMAND_PACKET CmdPkt
  581. );
  582. ULONG
  583. JrnlCleanWriteFilterWorker (
  584. PQHASH_TABLE Table,
  585. PQHASH_ENTRY BeforeNode,
  586. PQHASH_ENTRY TargetNode,
  587. PVOID Context
  588. );
  589. VOID
  590. JrnlSubmitCleanWriteFilter(
  591. IN PVOLUME_MONITOR_ENTRY pVme,
  592. IN ULONG TimeOut
  593. );
  594. #define FRS_JOURNAL_FILTER_PRINT(_Sev_, _Table_, _Buffer_) \
  595. JrnlFilterPrint(_Sev_, _Table_, _Buffer_)
  596. #define FRS_JOURNAL_FILTER_PRINT_FUNCTION JrnlFilterPrintJacket
  597. VOID
  598. JrnlFilterPrint(
  599. ULONG PrintSev,
  600. PGENERIC_HASH_TABLE Table,
  601. PVOID Buffer
  602. );
  603. VOID
  604. JrnlFilterPrintJacket(
  605. PGENERIC_HASH_TABLE Table,
  606. PVOID Buffer
  607. );
  608. #define FRS_JOURNAL_CHANGE_ORDER_PRINT(_Table_, _Buffer_) \
  609. JrnlChangeOrderPrint( _Table_, _Buffer_)
  610. #define FRS_JOURNAL_CHANGE_ORDER_PRINT_FUNCTION JrnlChangeOrderPrint
  611. VOID
  612. JrnlChangeOrderPrint(
  613. PGENERIC_HASH_TABLE Table,
  614. PVOID Buffer
  615. );
  616. ULONG
  617. ChgOrdAcceptInitialize(
  618. VOID
  619. );
  620. VOID
  621. ChgOrdAcceptShutdown(
  622. VOID
  623. );
  624. DWORD
  625. FrsDeleteById(
  626. IN PWCHAR VolumeName,
  627. IN PWCHAR Name,
  628. IN PVOLUME_MONITOR_ENTRY pVme,
  629. IN PVOID Id,
  630. IN DWORD IdLen
  631. );
  632. DWORD
  633. JournalMonitorInit(
  634. VOID
  635. )
  636. /*++
  637. Routine Description:
  638. This routine initializes the NTFS Journal monitor routines and starts
  639. the JournalReadThread.
  640. Arguments:
  641. None.
  642. Thread Return Value:
  643. Win32 status
  644. --*/
  645. {
  646. #undef DEBSUB
  647. #define DEBSUB "JournalMonitorInit:"
  648. ULONG WStatus;
  649. ULONG ThreadId;
  650. JET_ERR jerr;
  651. ULONG i;
  652. if (JournalActiveIoRequests != 0) {
  653. DPRINT1(0, ":S: ERROR - Can't initialize journal with active I/O (%d) in progress.\n",
  654. JournalActiveIoRequests);
  655. return ERROR_REQUEST_ABORTED;
  656. }
  657. //
  658. // No completion port yet.
  659. //
  660. FRS_CLOSE(JournalCompletionPort);
  661. JournalCompletionPort = NULL;
  662. //
  663. // Read change order aging cache delay.
  664. //
  665. CfgRegReadDWord(FKC_CO_AGING_DELAY, NULL, 0, &ChangeOrderAgingDelay);
  666. ChangeOrderAgingDelay *= 1000;
  667. //
  668. // Init the list of volumes we monitor.
  669. //
  670. FrsInitializeQueue(&VolumeMonitorQueue, &VolumeMonitorQueue);
  671. FrsInitializeQueue(&VolumeMonitorStopQueue, &VolumeMonitorStopQueue);
  672. //
  673. // Free list for journal buffers.
  674. //
  675. FrsInitializeQueue(&JournalFreeQueue, &JournalFreeQueue);
  676. //
  677. // Locks for the Filter Table Child Lists.
  678. //
  679. for (i=0; i<NUMBER_FILTER_TABLE_CHILD_LOCKS; i++) {
  680. INITIALIZE_CRITICAL_SECTION(&JrnlFilterTableChildLock[i]);
  681. }
  682. FrsInitializeQueue(&FrsVolumeLayerCOList, &FrsVolumeLayerCOList);
  683. FrsInitializeQueue(&FrsVolumeLayerCOQueue, &FrsVolumeLayerCOList);
  684. //
  685. // Wait for the DB to start up. During shutdown, this event is
  686. // set. Any extraneous commands issued by the journal are
  687. // subsequently ignored by the database.
  688. //
  689. WaitForSingleObject(DataBaseEvent, INFINITE);
  690. if (FrsIsShuttingDown) {
  691. return ERROR_PROCESS_ABORTED;
  692. }
  693. //
  694. // Create a journal read thread. It will wait until an entry is placed
  695. // on the VolumeMonitorQueue.
  696. //
  697. if (!HANDLE_IS_VALID(JournalReadThreadHandle)) {
  698. JournalReadThreadHandle = CreateThread(NULL,
  699. 0,
  700. JournalReadThread,
  701. (LPVOID) NULL,
  702. 0,
  703. &ThreadId);
  704. if (!HANDLE_IS_VALID(JournalReadThreadHandle)) {
  705. WStatus = GetLastError();
  706. DPRINT_WS(0, "Error from CreateThread", WStatus);
  707. return WStatus;
  708. }
  709. DbgCaptureThreadInfo2(L"JrnlRead", JournalReadThread, ThreadId);
  710. }
  711. return ERROR_SUCCESS;
  712. }
  713. VOID
  714. JournalMonitorShutdown(
  715. VOID
  716. )
  717. /*++
  718. Routine Description:
  719. This routine releases handles and frees storage for the NTFS Journal
  720. subsystem.
  721. Arguments:
  722. None.
  723. Thread Return Value:
  724. Win32 status
  725. --*/
  726. {
  727. #undef DEBSUB
  728. #define DEBSUB "JournalMonitorShutdown:"
  729. ULONG WStatus;
  730. JET_ERR jerr;
  731. ULONG i;
  732. DPRINT1(3, ":S: <<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
  733. //
  734. // Stop the Change Order Accept thread.
  735. //
  736. ChgOrdAcceptShutdown();
  737. //
  738. // Locks for the Filter Table Child Lists.
  739. //
  740. for (i=0; i<NUMBER_FILTER_TABLE_CHILD_LOCKS; i++) {
  741. DeleteCriticalSection(&JrnlFilterTableChildLock[i]);
  742. }
  743. }
  744. ULONG
  745. JrnlInitOneReplicaSet(
  746. PCOMMAND_PACKET CmdPkt
  747. )
  748. /*++
  749. Routine Description:
  750. This routine does all the journal and database initialization for a
  751. single replica set. It is used to startup a replica set that failed
  752. to start at service startup or to start a newly created replica set.
  753. Note the Journal and database subsystems must be initialized first.
  754. The Replica arg must have an initialized config record.
  755. Warning - There are no table level locks on the Filter table so only
  756. one replica set can be initialized at a time on a single volume.
  757. Actually this might work since the row locks and child link locks should
  758. be sufficient but it hasn't been tested.
  759. The second part of the initialization is done by the database server so
  760. the journal thread is free to finish processing any pending journal
  761. buffers for this volume since we have to pause it before we can update
  762. the filter table.
  763. Arguments:
  764. CmdPkt - ptr to a cmd packet with a ptr to a replica struct with a
  765. pre-initialized config record.
  766. Thread Return Value:
  767. Frs Error Status
  768. --*/
  769. {
  770. #undef DEBSUB
  771. #define DEBSUB "JrnlInitOneReplicaSet:"
  772. ULONG FStatus;
  773. ULONG WStatus;
  774. PCONFIG_TABLE_RECORD ConfigRecord;
  775. PREPLICA_THREAD_CTX RtCtx;
  776. PREPLICA Replica;
  777. //
  778. // Check that the journal subsystem is up.
  779. //
  780. if (!HANDLE_IS_VALID(JournalReadThreadHandle)) {
  781. return FrsErrorNotInitialized;
  782. }
  783. Replica = CmdPkt->Parameters.JournalRequest.Replica;
  784. //
  785. // Phase 1 of journal monitor init. This opens the USN journal on the volume
  786. // containing the replica set. It allocates the:
  787. // - volume filter hash table,
  788. // - parent file ID table,
  789. // - USN record file name dependency hash table,
  790. // - USN Write Filter Table,
  791. // - Active Child dependency hash table,
  792. // - volume change order list,
  793. // - volume Change Order Aging table hash table and the
  794. // - Active Inbound Change Order hash table.
  795. //
  796. // If the journal is already open then it returns the pVme for the volume
  797. // in the Replica struct.
  798. //
  799. DPRINT3(4, ":S: Phase 1 for replica %ws, id: %d, (%08x)\n",
  800. Replica->ReplicaName->Name, Replica->ReplicaNumber, Replica);
  801. //
  802. // Assume its going to work out ok and go do it.
  803. //
  804. Replica->FStatus = FrsErrorSuccess;
  805. WStatus = JrnlPrepareService1(Replica);
  806. if (!WIN_SUCCESS(WStatus) || (Replica->pVme == NULL)) {
  807. DPRINT1_WS(4, "++ Phase 1 for replica %ws Failed;",
  808. Replica->ReplicaName->Name, WStatus);
  809. //
  810. // add cleanup code, delete vme ...
  811. //
  812. if (FRS_SUCCESS(Replica->FStatus)) {
  813. //
  814. // Return generic error if no specific error code was provided.
  815. //
  816. Replica->FStatus = FrsErrorReplicaPhase1Failed;
  817. }
  818. return Replica->FStatus;
  819. }
  820. ConfigRecord = (PCONFIG_TABLE_RECORD) (Replica->ConfigTable.pDataRecord);
  821. //
  822. // ** WARN ** at this point there is only one Replica Thread
  823. // context associated with the replica.
  824. //
  825. RtCtx = CONTAINING_RECORD(GetListHead(&Replica->ReplicaCtxListHead.ListHead),
  826. REPLICA_THREAD_CTX,
  827. ReplicaCtxList);
  828. DPRINT3(4, "++ Submit replica tree load cmd for replica %ws, id: %d, (%08x)\n",
  829. Replica->ReplicaName->Name, Replica->ReplicaNumber, Replica);
  830. DPRINT3(4, "++ ConfigRecord: %08x, RtCtx: %08x, path: %ws\n",
  831. ConfigRecord, RtCtx, ConfigRecord->FSRootPath);
  832. //
  833. // Propagate the command packet on to the DBService to init the
  834. // replica tables and complete the rest of the initialization.
  835. //
  836. DbsPrepareCmdPkt(CmdPkt, // CmdPkt,
  837. Replica, // Replica,
  838. CMD_LOAD_ONE_REPLICA_FILE_TREE, // CmdRequest,
  839. NULL, // TableCtx,
  840. RtCtx, // CallContext,
  841. 0, // TableType,
  842. 0, // AccessRequest,
  843. 0, // IndexType,
  844. NULL, // KeyValue,
  845. 0, // KeyValueLength,
  846. TRUE); // Submit
  847. //
  848. // Phase 1 is done.
  849. //
  850. return FrsErrorSuccess;
  851. }
  852. ULONG_PTR
  853. JrnlFilterDeleteEntry (
  854. PGENERIC_HASH_TABLE Table,
  855. PVOID Buffer,
  856. PVOID Context
  857. )
  858. /*++
  859. Routine Description:
  860. This function is called thru GhtCleanTableByFilter() to delete all the
  861. Filter table entries for a given Replica Set specified by the
  862. Context parameter.
  863. Arguments:
  864. Table - the hash table being enumerated (to lookup parent entry).
  865. Buffer - a ptr to a FILTER_TABLE_ENTRY
  866. Context - A pointer to the Replica struct for the replica data added to the
  867. table.
  868. Return Value:
  869. True if the entry matches the Replica Context and is to be deleted.
  870. --*/
  871. {
  872. #undef DEBSUB
  873. #define DEBSUB "JrnlFilterDeleteEntry:"
  874. PREPLICA Replica = (PREPLICA) Context;
  875. PFILTER_TABLE_ENTRY FilterEntry = Buffer;
  876. return (FilterEntry->Replica == Replica);
  877. }
  878. ULONG
  879. JrnlCleanOutReplicaSet(
  880. PREPLICA Replica
  881. )
  882. /*++
  883. Routine Description:
  884. This routine cleans out the filter table and parent file ID table entries
  885. associated with the given replica set.
  886. *NOTE* We assume the caller has paused the journal and there is no
  887. activity on either the volume FilterTable or the ParentFidTable.
  888. Warning - There are no table level locks on the Filter table so only
  889. one replica set can be cleaned up t a time on a single volume.
  890. Arguments:
  891. Replica - ptr to replica struct.
  892. Thread Return Value:
  893. Frs Error Status
  894. --*/
  895. {
  896. #undef DEBSUB
  897. #define DEBSUB "JrnlCleanOutReplicaSet:"
  898. PVOLUME_MONITOR_ENTRY pVme = Replica->pVme;
  899. ULONG Cnt;
  900. //
  901. // Check that the journal subsystem is up.
  902. //
  903. if (!HANDLE_IS_VALID(JournalReadThreadHandle)) {
  904. return FrsErrorNotInitialized;
  905. }
  906. //
  907. // Scan the table and delete all the filter entries for this replica set.
  908. //
  909. Cnt = GhtCleanTableByFilter(pVme->FilterTable, JrnlFilterDeleteEntry, Replica);
  910. DPRINT1(4, "Total of %d Filter Table entries deleted.\n", Cnt);
  911. //
  912. // Ditto for the parent file ID table.
  913. //
  914. QHashDeleteByFlags(pVme->ParentFidTable, Replica->ReplicaNumber);
  915. //
  916. // Note: we could also do this for the name space table by moving the
  917. // sequence number into the quadword and putting the replica number
  918. // in flags
  919. return FrsErrorSuccess;
  920. }
  921. DWORD
  922. WINAPI
  923. Monitor(
  924. PFRS_THREAD ThisFrsThreadCtx
  925. )
  926. /*++
  927. Routine Description:
  928. This is the main journal work thread. It processes command packets
  929. and journal buffer packets off its processing queue.
  930. It filters each entry in the USN journal against a filter table for
  931. the volume to determine if the file in question is part of a replica
  932. set. It then builds a change order entry to feed the data base and
  933. the output logs.
  934. Note: Perf: If multiple volumes are being monitored, we could create
  935. additional monitor threads and divide the volumes up among the
  936. threads. The processing of USN records for a given volume is
  937. single threaded though because they must be processed in order.
  938. Arguments:
  939. ThisFrsThreadCtx - A pointer to the FRS_THREAD ctx for this thread.
  940. Thread Return Value:
  941. ERROR_SUCCESS - Thread terminated normally.
  942. Other errors from CreatFile, ReadDirectoryChangesW, CreateEvent, ...
  943. are returned as the thread exit status.
  944. --*/
  945. {
  946. #undef DEBSUB
  947. #define DEBSUB "monitor:"
  948. USN CurrentUsn;
  949. USN NextUsn;
  950. USN JournalConsumed;
  951. ULONGLONG CaptureParentFileID;
  952. PWCHAR Pwc;
  953. DWORD Level;
  954. ULONG RelativePathLength;
  955. ULONG FileAttributes;
  956. LONG DataLength;
  957. PUSN_RECORD UsnRecord;
  958. PUSN_RECORD OldRenUsnRec;
  959. PULONGLONG UsnBuffer;
  960. BOOL SaveFlag;
  961. PLIST_ENTRY Entry;
  962. PJBUFFER Jbuff;
  963. NTSTATUS Status;
  964. ULONG WStatus = ERROR_SUCCESS;
  965. ULONG GStatus;
  966. ULONG FStatus;
  967. PVOLUME_MONITOR_ENTRY pVme;
  968. PFRS_NODE_HEADER Header;
  969. PCONFIG_TABLE_RECORD ConfigRecord;
  970. PCOMMAND_PACKET CmdPkt;
  971. PREPLICA Replica;
  972. BOOL Excluded;
  973. UNICODE_STRING TempUStr;
  974. BOOL IsDirectory;
  975. ULONG UsnReason;
  976. ULONG Flags;
  977. LONG LocationCmd;
  978. PFILTER_TABLE_ENTRY PrevParentFilterEntry;
  979. PFILTER_TABLE_ENTRY CurrParentFilterEntry;
  980. PCXTION Cxtion;
  981. WCHAR FileName[MAX_PATH + 1];
  982. PrevParentFilterEntry = NULL;
  983. CurrParentFilterEntry = NULL;
  984. /******************************************************************************
  985. *******************************************************************************
  986. ** **
  987. ** **
  988. ** M A I N U S N J O U R N A L P R O C E S S L O O P **
  989. ** **
  990. ** **
  991. *******************************************************************************
  992. ******************************************************************************/
  993. DPRINT(5, ":S: Journal is starting.\n");
  994. //
  995. // Try-Finally
  996. //
  997. try {
  998. //
  999. // Capture exception.
  1000. //
  1001. try {
  1002. while (TRUE) {
  1003. //
  1004. // Wait on the JournalProcessQueue for a journal buffer.
  1005. //
  1006. Entry = FrsRtlRemoveHeadQueueTimeout(&JournalProcessQueue, 10*1000);
  1007. if (Entry == NULL) {
  1008. WStatus = GetLastError();
  1009. if (WStatus == WAIT_TIMEOUT) {
  1010. //
  1011. // Go look for more work.
  1012. //
  1013. continue;
  1014. }
  1015. if (WStatus == ERROR_INVALID_HANDLE) {
  1016. DPRINT(4, ":S: JournalProcessQueue is shutdown.\n");
  1017. //
  1018. // The queue has been run down. Close all the journal handles
  1019. // saving the USN to start the next read from. Then close
  1020. // Jet Session and exit.
  1021. //
  1022. WStatus = ERROR_SUCCESS;
  1023. JrnlCloseAll();
  1024. break;
  1025. }
  1026. //
  1027. // Unexpected error from FrsRtlRemoveHeadQueueTimeout
  1028. //
  1029. DPRINT_WS(0, "Error from FrsRtlRemoveHeadQueueTimeout", WStatus);
  1030. JrnlCloseAll();
  1031. break;
  1032. }
  1033. Header = (PFRS_NODE_HEADER) CONTAINING_RECORD(Entry, COMMAND_PACKET, ListEntry);
  1034. if (Header->Type == COMMAND_PACKET_TYPE) {
  1035. //
  1036. // Process the command packet.
  1037. //
  1038. WStatus = JrnlCommand((PCOMMAND_PACKET)Header);
  1039. continue;
  1040. }
  1041. if (Header->Type != JBUFFER_TYPE) {
  1042. //
  1043. // Garbage packet.
  1044. //
  1045. DPRINT2(0, "ERROR - Invalid packet type: %d, size: %d\n",
  1046. Header->Type, Header->Size);
  1047. FRS_ASSERT(!"Jrnl monitor: Invalid packet type");
  1048. }
  1049. ///////////////////////////////////////////////////////////////////
  1050. // //
  1051. // P R O C E S S J O U R N A L D A T A B U F F E R //
  1052. // //
  1053. ///////////////////////////////////////////////////////////////////
  1054. //
  1055. // Increment the Usn Reads Counter
  1056. //
  1057. PM_INC_CTR_SERVICE(PMTotalInst, UsnReads, 1);
  1058. Jbuff = CONTAINING_RECORD(Entry, JBUFFER, ListEntry);
  1059. //DPRINT2(5, "jb: fu %08x (len: %d)\n",
  1060. // Jbuff, Jbuff->DataLength);
  1061. pVme = Jbuff->pVme;
  1062. WStatus = Jbuff->WStatus;
  1063. UsnBuffer = Jbuff->DataBuffer;
  1064. DataLength = Jbuff->DataLength;
  1065. DPRINT1(4, ":U: ***** USN Data for Volume %ws *****\n", pVme->FSVolInfo.VolumeLabel);
  1066. //
  1067. // Pull out the Next USN
  1068. //
  1069. NextUsn = 0;
  1070. if (DataLength != 0) {
  1071. UsnRecord = (PUSN_RECORD)((PCHAR)UsnBuffer + sizeof(USN));
  1072. DataLength -= sizeof(USN);
  1073. NextUsn = *(USN *)UsnBuffer;
  1074. DPRINT1(4, "Next Usn will be: %08lx %08lx\n", PRINTQUAD(NextUsn));
  1075. }
  1076. //
  1077. // Check if I/O is stopped on this journal and throw the buffer away.
  1078. // Could be a pause request.
  1079. //
  1080. if (!pVme->IoActive) {
  1081. CAPTURE_JOURNAL_PROGRESS(pVme, Jbuff->JrnlReadPoint);
  1082. DPRINT1(4, "++ I/O not active on this journal. Freeing buffer. State is: %s\n",
  1083. RSS_NAME(pVme->JournalState));
  1084. //DPRINT1(5, "jb: tf %08x\n", Jbuff);
  1085. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  1086. continue;
  1087. }
  1088. //
  1089. // Check for lost journal data. This is unlikely to happen here since
  1090. // this error will surface when we submit the journal read request.
  1091. // There is other error recovery code that is invoked when we try to start
  1092. // a replica set and the journal restart point is not found.
  1093. //
  1094. if (WStatus == ERROR_NOT_FOUND) {
  1095. DPRINT1(4, ":U: Usn %08lx %08lx has been deleted. Data lost, resync required\n",
  1096. PRINTQUAD(Jbuff->JrnlReadPoint));
  1097. //DPRINT1(5, "jb: tf %08x\n", Jbuff);
  1098. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  1099. //
  1100. // Post an error log entry.
  1101. //
  1102. EPRINT1(EVENT_FRS_IN_ERROR_STATE, JetPath);
  1103. }
  1104. //
  1105. // Some other error.
  1106. //
  1107. if (!WIN_SUCCESS(WStatus)) {
  1108. DPRINT_WS(0, "ERROR - Read Usn Journal failed", WStatus);
  1109. //
  1110. // Put the VME on the stop queue and mark all Replica Sets
  1111. // using this VME as stopped.
  1112. //
  1113. // Add code to walk the replica list to stop replication on a journal error.
  1114. // Is closing the journal the right way to fail?
  1115. //
  1116. JrnlClose(Jbuff->FileHandle);
  1117. CAPTURE_JOURNAL_PROGRESS(pVme, Jbuff->JrnlReadPoint);
  1118. //DPRINT1(5, "jb: tf %08x\n", Jbuff);
  1119. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  1120. continue;
  1121. }
  1122. //
  1123. // Check for data left after USN.
  1124. //
  1125. if (DataLength > 0) {
  1126. //
  1127. // Check version number for mismatch.
  1128. //
  1129. if (UsnRecord->MajorVersion != ConfigUsnMajorVersion) {
  1130. DPRINT2(0, ":U: ERROR - Major version mismatch for USN Journal. Found: %d, Expected: %d\n",
  1131. UsnRecord->MajorVersion, ConfigUsnMajorVersion);
  1132. WStatus = ERROR_REVISION_MISMATCH;
  1133. //
  1134. // Put the VME on the stop queue and mark all Replica Sets
  1135. // using this VME as stopped.
  1136. //
  1137. // Note: Add code to walk the replica list & stop VME on config mismatch.
  1138. // is closing the journal the right way to fail?
  1139. //
  1140. JrnlClose(Jbuff->FileHandle);
  1141. CAPTURE_JOURNAL_PROGRESS(pVme, Jbuff->JrnlReadPoint);
  1142. //DPRINT1(5, "jb: tf %08x\n", Jbuff);
  1143. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  1144. continue;
  1145. }
  1146. }
  1147. //
  1148. // The USN save point for each replica can also depend on the amount of
  1149. // journal data consumed. If there is lots of activity on the journal
  1150. // but little or no activity on a given replica set hosted by the volume
  1151. // then we must keep advancing the USN save point for the replica.
  1152. // Otherwise, if we were to crash we could find ourselves with a USN
  1153. // save point at recovery for data no longer in the journal that we
  1154. // don't want anyway. In addition, if it was still in the journal we
  1155. // would have to plow through it a second time just to find nothing of
  1156. // interest. Once JRNL_USN_SAVE_POINT_INTERVAL bytes of journal data
  1157. // are consumed then trigger a USN save on all active replica sets on
  1158. // this volume. A journal replay could make this go negative so
  1159. // minimize with 0.
  1160. //
  1161. SaveFlag = FALSE;
  1162. LOCK_VME(pVme); // Get the lock to avoid QW Tearing with
  1163. // LastUsnSavePoint update in NEW_VSN() code.
  1164. JournalConsumed = NextUsn - pVme->LastUsnSavePoint;
  1165. if (JournalConsumed < 0) {JournalConsumed = (USN)0;}
  1166. if (JournalConsumed >= (USN) JRNL_USN_SAVE_POINT_INTERVAL) {
  1167. SaveFlag = TRUE;
  1168. DPRINT3(5, "++ USN Save Triggered: NextUsn: %08x %08x "
  1169. "LastSave: %08x %08x "
  1170. "Consumed: %08x %08x\n",
  1171. PRINTQUAD(NextUsn),
  1172. PRINTQUAD(pVme->LastUsnSavePoint),
  1173. PRINTQUAD(JournalConsumed));
  1174. pVme->LastUsnSavePoint = NextUsn;
  1175. }
  1176. UNLOCK_VME(pVme);
  1177. if (SaveFlag) {
  1178. DbsRequestSaveMark(pVme, FALSE);
  1179. }
  1180. ///////////////////////////////////////////////////////////////////
  1181. // //
  1182. // P R O C E S S U S N R E C O R D S //
  1183. // //
  1184. ///////////////////////////////////////////////////////////////////
  1185. //
  1186. // Walk through the buffer and process the results. Note that a single
  1187. // file can appear multiple times. E.G. a copy operation to a file may
  1188. // create the target update the create time and set the attributes.
  1189. // Each one of these is reported as a separate event.
  1190. //
  1191. RESET_JOURNAL_PROGRESS(pVme);
  1192. while (DataLength > 0) {
  1193. Replica = NULL;
  1194. if ((LONG)UsnRecord->RecordLength > DataLength) {
  1195. DPRINT2(0, ":U: ERROR: Bogus DataLength: %d, Record Length Was: %d\n",
  1196. DataLength, UsnRecord->RecordLength );
  1197. break;
  1198. }
  1199. //
  1200. // Track USN of current record being processed and the maximum
  1201. // point of progress reached in the journal.
  1202. //
  1203. CurrentUsn = UsnRecord->Usn;
  1204. pVme->CurrentUsnRecord = CurrentUsn;
  1205. CAPTURE_MAX_JOURNAL_PROGRESS(pVme, CurrentUsn);
  1206. //
  1207. // Check if I/O is stopped on this journal and skip the rest of the
  1208. // buffer. Could be a pause request. Capture current journal
  1209. // progress for an unpause.
  1210. //
  1211. if (!pVme->IoActive) {
  1212. CAPTURE_JOURNAL_PROGRESS(pVme, CurrentUsn);
  1213. DPRINT1(4, ":U: I/O not active on this journal. Freeing buffer. State is: %s\n",
  1214. RSS_NAME(pVme->JournalState));
  1215. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1216. break;
  1217. }
  1218. //
  1219. // Increment the UsnRecordsExamined counter
  1220. //
  1221. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecExamined, 1);
  1222. if (CurrentUsn == QUADZERO) {
  1223. DUMP_USN_RECORD(3, UsnRecord);
  1224. DPRINT(3, "++ Zero USN; skipping\n");
  1225. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1226. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
  1227. goto NEXT_USN_RECORD;
  1228. }
  1229. UsnReason = UsnRecord->Reason;
  1230. FileAttributes = UsnRecord->FileAttributes;
  1231. //
  1232. // If this is close record with a file name of the form
  1233. // "NTFRS_DELETED_FILE_xxxxxx" then delete the file. These are
  1234. // produced when an install override is performed by renaming an
  1235. // open target file to the above name in order to complete an install.
  1236. //
  1237. if ((UsnRecord->FileNameLength/sizeof(WCHAR) > wcslen(INSTALL_OVERRIDE_PREFIX)) &&
  1238. (wcsncmp(UsnRecord->FileName,
  1239. INSTALL_OVERRIDE_PREFIX,
  1240. wcslen(INSTALL_OVERRIDE_PREFIX)) == 0)) {
  1241. if (BooleanFlagOn(UsnReason, USN_REASON_CLOSE)) {
  1242. DUMP_USN_RECORD(3, UsnRecord);
  1243. if (!BooleanFlagOn(UsnReason, USN_REASON_FILE_DELETE) &&
  1244. ((UsnReason & ~USN_REASON_CLOSE) != 0)) {
  1245. //
  1246. // Delete the file.
  1247. //
  1248. RtlMoveMemory (FileName, UsnRecord->FileName, UsnRecord->FileNameLength);
  1249. FileName[UsnRecord->FileNameLength/sizeof(WCHAR)] = UNICODE_NULL;
  1250. WStatus = FrsDeleteById(pVme->DriveLetter,
  1251. FileName,
  1252. pVme,
  1253. &UsnRecord->FileReferenceNumber,
  1254. FILE_ID_LENGTH);
  1255. DPRINT1_WS(2, "++ WARN - cannot delete %ws;", FileName, WStatus);
  1256. }
  1257. DPRINT(3, "++ INSTALL OVERRIDE CLEANUP; skipping\n");
  1258. }
  1259. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1260. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
  1261. goto NEXT_USN_RECORD;
  1262. }
  1263. //
  1264. // Ignore temporary, encrypted files. We do replicate offline
  1265. // files (FILE_ATTRIBUTE_OFFLINE set) because some members
  1266. // may be running HSM and some may not. All members have to
  1267. // have the same data.
  1268. //
  1269. if (FileAttributes & (FILE_ATTRIBUTE_ENCRYPTED)) {
  1270. DUMP_USN_RECORD(3, UsnRecord);
  1271. DPRINT(3, "++ Encrypted; skipping\n");
  1272. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1273. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
  1274. goto NEXT_USN_RECORD;
  1275. }
  1276. //
  1277. // Skip USN records with the SOURCE_DATA_MANAGEMENT flag set.
  1278. // E.G. HSM and SIS would set this flag to prevent triggering
  1279. // replication when the data has not changed.
  1280. //
  1281. if (UsnRecord->SourceInfo & USN_SOURCE_DATA_MANAGEMENT) {
  1282. DUMP_USN_RECORD(3, UsnRecord);
  1283. DPRINT(3, "++ DATA_MANAGEMENT source; skipping\n");
  1284. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1285. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
  1286. goto NEXT_USN_RECORD;
  1287. }
  1288. //
  1289. // If this is an USN_REASON_RENAME_OLD_NAME record that does not have
  1290. // USN_REASON_RENAME_NEW_NAME set then capture the old name so
  1291. // we have it when processing the new name.
  1292. //
  1293. if ((UsnReason & USN_REASON_RENAME_OLD_NAME) &&
  1294. ((UsnReason & USN_REASON_RENAME_NEW_NAME) == 0) ) {
  1295. //
  1296. // Always pick up the old name when we see one. There are times
  1297. // when we will pick up an old name but then filter out the USN
  1298. // record. e.g. not in replica set, a staging file, etc.
  1299. // If we always load the old name then the next Close record
  1300. // with Rename New set will have the correct old name to insert into
  1301. // the name space table. Since multiple rename records can occur
  1302. // in sequence before we see the first close we need to track
  1303. // multiple RENAME_OLD_NAME records.
  1304. //
  1305. GStatus = QHashLookup(pVme->RenOldNameTable,
  1306. &UsnRecord->FileReferenceNumber,
  1307. NULL,
  1308. (PULONG_PTR) &OldRenUsnRec);
  1309. if (GStatus == GHT_STATUS_SUCCESS ) {
  1310. //
  1311. // Existing entry found for this file. Update it.
  1312. //
  1313. if (OldRenUsnRec->RecordLength < UsnRecord->RecordLength) {
  1314. OldRenUsnRec = FrsFree(OldRenUsnRec);
  1315. OldRenUsnRec = FrsAlloc(UsnRecord->RecordLength);
  1316. }
  1317. if (OldRenUsnRec != NULL) {
  1318. RtlMoveMemory (OldRenUsnRec, UsnRecord, UsnRecord->RecordLength);
  1319. DPRINT(3, "++ Rename old. Save name\n");
  1320. GStatus = QHashUpdate(pVme->RenOldNameTable,
  1321. &UsnRecord->FileReferenceNumber,
  1322. NULL,
  1323. (ULONG_PTR) OldRenUsnRec);
  1324. if (GStatus != GHT_STATUS_SUCCESS ) {
  1325. DPRINT1(0, "++ QHashUpdate error: %d\n", GStatus);
  1326. }
  1327. } else {
  1328. DPRINT(0, "++ Rename old. Save name failed -- no memory\n");
  1329. }
  1330. } else {
  1331. //
  1332. // No entry for this file. Create a new one and save USN record.
  1333. //
  1334. OldRenUsnRec = FrsAlloc(UsnRecord->RecordLength);
  1335. if (OldRenUsnRec != NULL) {
  1336. RtlMoveMemory (OldRenUsnRec, UsnRecord, UsnRecord->RecordLength);
  1337. DPRINT(3, "++ Rename old. Save name\n");
  1338. GStatus = QHashInsert(pVme->RenOldNameTable,
  1339. &UsnRecord->FileReferenceNumber,
  1340. NULL,
  1341. (ULONG_PTR) OldRenUsnRec,
  1342. FALSE);
  1343. if (GStatus != GHT_STATUS_SUCCESS ) {
  1344. OldRenUsnRec = FrsFree(OldRenUsnRec);
  1345. DPRINT1(0, "++ QHashInsert error: %d\n", GStatus);
  1346. }
  1347. } else {
  1348. DPRINT(0, "++ Rename old. Save name failed -- no memory\n");
  1349. }
  1350. }
  1351. DUMP_USN_RECORD(3, UsnRecord);
  1352. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1353. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
  1354. goto NEXT_USN_RECORD;
  1355. }
  1356. //
  1357. // FRS uses the NTFS journal filtering feature in which an app can
  1358. // tell NTFS what kinds of journal records it does not want to see.
  1359. // In particular FRS asks NTFS to filter out all journal records
  1360. // except for journal "Close" and "Create" records. NTFS
  1361. // writes a close record to the journal after the last handle to
  1362. // the file is closed. In addition, if the system crashes, at
  1363. // startup NTFS recovery-processing inserts close records for all
  1364. // open and modified files.
  1365. // The Create records need to be examined for directory creates
  1366. // because the close record may not appear for a while. Meanwhile
  1367. // multiple children close records can be processed which would
  1368. // be skipped unless the parent dir create was added to the Filter
  1369. // table. Bug 432549 was a case of this.
  1370. //
  1371. if (!BooleanFlagOn(UsnReason, USN_REASON_CLOSE)) {
  1372. if (BooleanFlagOn(UsnReason, USN_REASON_FILE_CREATE) &&
  1373. BooleanFlagOn(FileAttributes, FILE_ATTRIBUTE_DIRECTORY)) {
  1374. DUMP_USN_RECORD(3, UsnRecord);
  1375. DPRINT(3, "++ Dir Create; Cannot skip\n");
  1376. } else {
  1377. DUMP_USN_RECORD(3, UsnRecord);
  1378. DPRINT(3, "++ Not a close and not dir create; skipping\n");
  1379. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1380. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
  1381. goto NEXT_USN_RECORD;
  1382. }
  1383. }
  1384. //
  1385. // Skip files that have USN_REASON_REPARSE_POINT_CHANGE set.
  1386. // Since symbolic links are unsupported we do not replicate them.
  1387. // HSM and SIS also use reparse points but we only replicate changes
  1388. // to the file and these services change the NTFS File Record to set
  1389. // the reparse point attribute only when they migrate the file data
  1390. // somewhere else. By that time the file had already been created
  1391. // and was replicated when it was created. See NTIOAPI.H for more
  1392. // info about the REPARSE_DATA_BUFFER and the IO_REPARSE_TAG field.
  1393. //
  1394. #if 0
  1395. // This below is faulty because the SIS COPY FILE utility will both set and create
  1396. // files with a reparse point. We will have to rely on the data management test
  1397. // above to filter out the conversion of a file to and from a SIS link.
  1398. if (UsnReason & USN_REASON_REPARSE_POINT_CHANGE) {
  1399. DUMP_USN_RECORD(3, UsnRecord);
  1400. DPRINT(3, "++ Reparse point change; skipping\n");
  1401. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1402. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
  1403. goto NEXT_USN_RECORD;
  1404. }
  1405. #endif
  1406. //
  1407. // If this file record has the reparse attribute set then read
  1408. // the Reparse Tag from the file to see if this is either SIS or HSM.
  1409. //
  1410. if (FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) {
  1411. //
  1412. // Can't filter out Deletes though
  1413. //
  1414. if (!BooleanFlagOn(UsnReason, USN_REASON_FILE_DELETE)) {
  1415. WStatus = FrsCheckReparse(L"--",
  1416. (PULONG)&UsnRecord->FileReferenceNumber,
  1417. FILE_ID_LENGTH,
  1418. pVme->VolumeHandle);
  1419. if (!WIN_SUCCESS(WStatus)) {
  1420. DUMP_USN_RECORD(3, UsnRecord);
  1421. DPRINT_WS(3, "++ FrsGetReparseTag failed, skipping,", WStatus);
  1422. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1423. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
  1424. goto NEXT_USN_RECORD;
  1425. }
  1426. }
  1427. }
  1428. ///////////////////////////////////////////////////////////////////
  1429. // //
  1430. // F I L T E R P R O C E S S I N G //
  1431. // //
  1432. ///////////////////////////////////////////////////////////////////
  1433. //
  1434. // Note: If replication is paused for the replica tree we still
  1435. // process the journal entries so we don't lose data.
  1436. // When replication is later unpaused the update process picks
  1437. // up the change orders from the Replica Set Change order table.
  1438. //
  1439. // If replication was not started for a given replica tree then
  1440. // the directory fids won't be in the table. When replication
  1441. // is stopped for a replica tree its directory fids are purged
  1442. // from the table
  1443. //
  1444. // In the case of file or Dir renames the parent FID in the
  1445. // USN record is the FID of the destination of the rename.
  1446. // If the file/dir was in a replica set prior to the rename its
  1447. // parent file ID will be in the Parent File ID table for the
  1448. // volume.
  1449. //
  1450. // Determine if the file is in a replica set and if a location
  1451. // change is involved. Lookup the previous and current parent FID
  1452. // in the Journal Filter table and return references to their
  1453. // respective filter entries. From this point forward the flow
  1454. // must go thru SKIP_USN_RECORD so the ref counts on PrevParentFilterEntry
  1455. // and CurrParentFilterEntry are decremented appropriately.
  1456. //
  1457. LocationCmd = JrnlGetFileCoLocationCmd(pVme,
  1458. UsnRecord,
  1459. &PrevParentFilterEntry,
  1460. &CurrParentFilterEntry);
  1461. if (LocationCmd == FILE_NOT_IN_REPLICA_SET) {
  1462. goto SKIP_USN_RECORD;
  1463. }
  1464. //
  1465. // Nothing to do; skip the usn record
  1466. //
  1467. if (LocationCmd == CO_LOCATION_NO_CMD &&
  1468. ((UsnRecord->Reason & CO_CONTENT_MASK) == 0)) {
  1469. DUMP_USN_RECORD(5, UsnRecord);
  1470. DPRINT(5, "++ CO_LOCATION_NO_CMD and no content; skipping\n");
  1471. goto SKIP_USN_RECORD;
  1472. }
  1473. //
  1474. // Filter out creates of files with FILE_ATTRIBUTE_TEMPORARY set.
  1475. //
  1476. if (!(FileAttributes & FILE_ATTRIBUTE_DIRECTORY) &&
  1477. (FileAttributes & FILE_ATTRIBUTE_TEMPORARY) &&
  1478. CO_NEW_FILE(LocationCmd)) {
  1479. DUMP_USN_RECORD(5, UsnRecord);
  1480. DPRINT(5, "++ Temporary attribute set on file; skipping\n");
  1481. goto SKIP_USN_RECORD;
  1482. }
  1483. //
  1484. // Determine the Replica and get the Parent File ID.
  1485. //
  1486. if (CurrParentFilterEntry != NULL) {
  1487. CaptureParentFileID = CurrParentFilterEntry->DFileID;
  1488. Replica = CurrParentFilterEntry->Replica;
  1489. } else {
  1490. CaptureParentFileID = PrevParentFilterEntry->DFileID;
  1491. Replica = PrevParentFilterEntry->Replica;
  1492. }
  1493. FRS_ASSERT(Replica != NULL);
  1494. //
  1495. // Under certain conditions a USN record could refer to a file
  1496. // in the FRS PreInstall directory. In particular this can happen
  1497. // during restart when we have lost our journal write filter.
  1498. // No operation on a pre-install file should cause replication.
  1499. // Make special check here for parent FID match.
  1500. //
  1501. if (UsnRecord->ParentFileReferenceNumber == Replica->PreInstallFid) {
  1502. DUMP_USN_RECORD(5, UsnRecord);
  1503. DPRINT(5, "++ USN Record on PreInstall file; skipping\n");
  1504. goto SKIP_USN_RECORD;
  1505. }
  1506. DUMP_USN_RECORD2(3, UsnRecord, Replica->ReplicaNumber, LocationCmd);
  1507. DPRINT2(4, "++ IN REPLICA %d, %ws \n",
  1508. Replica->ReplicaNumber, Replica->ReplicaName->Name);
  1509. //
  1510. // Check for stale USN record. This occurs when a replica tree
  1511. // is reloaded from disk. In this case you can have stale USN records
  1512. // in the journal that predate the current state of the file when it
  1513. // was loaded. To handle this we capture the current USN when the
  1514. // replica tree load starts (Ub), and again when the load finishes
  1515. // (Ue). We save Ub and Ue with the replica config info. The USN
  1516. // of a record (Ur) affecting this replica tree is then compared
  1517. // with these bounds as follows: (Uf is current USN on the file).
  1518. // if Ur < Ub then skip record since the load has the current state.
  1519. // if Ur > Ue then process record since load has old state.
  1520. // if Ur > Uf then process record since load has old state.
  1521. // otherwise skip the record.
  1522. // Only in the last case is it necessary to open the file and read
  1523. // the USN (when Ub <= Ur <= Ue).
  1524. //
  1525. // Note: add code to filter stale USN records after a replica tree load.
  1526. // This is not a problem if the replica tree starts out empty.
  1527. //
  1528. // If the record USN is less than or equal to LastUsnRecordProcessed for
  1529. // this Replica then we must be doing a replay so ignore it.
  1530. // This works because a given file can only be in one Replica
  1531. // set at a time.
  1532. // NOTE: what about MOVERS?
  1533. //
  1534. // NOTE: Hardlinks across replica sets would violate this.
  1535. //
  1536. if (CurrentUsn <= Replica->LastUsnRecordProcessed) {
  1537. DPRINT(5, "++ USN <= LastUsnRecordProcessed. Record skipped.\n");
  1538. goto SKIP_USN_RECORD;
  1539. }
  1540. //
  1541. // If this replica set is paused or has encountered an error
  1542. // then skip the record. When it is restarted we will replay
  1543. // the journal for it.
  1544. //
  1545. if (Replica->ServiceState != REPLICA_STATE_ACTIVE) {
  1546. DPRINT1(5, "++ Replica->ServiceState not active (%s). Record skipped.\n",
  1547. RSS_NAME(Replica->ServiceState));
  1548. goto SKIP_USN_RECORD;
  1549. }
  1550. //
  1551. // Get the ptr to the config record for this replica.
  1552. //
  1553. ConfigRecord = Replica->ConfigTable.pDataRecord;
  1554. //
  1555. // The following call builds the path of the file as we currently
  1556. // know it. If the operation is a MOVEOUT this is the previous path.
  1557. // Since the USN data is historical the file/dir may not be at this
  1558. // location any longer.
  1559. //
  1560. FStatus = JrnlGetPathAndLevel(pVme->FilterTable,
  1561. &CaptureParentFileID,
  1562. &Level);
  1563. if (!FRS_SUCCESS(FStatus)) {
  1564. goto SKIP_USN_RECORD;
  1565. }
  1566. //
  1567. // Consistency checking.
  1568. //
  1569. if (UsnRecord->FileNameLength > (sizeof(FileName) - sizeof(WCHAR))) {
  1570. DPRINT1(0, ":U: ERROR - USN Record Inconsistency - File path length too long (%d bytes)\n",
  1571. UsnRecord->FileNameLength);
  1572. DPRINT3(0, ":U: ERROR - Start of data buf %08x, current ptr %08x, diff %d\n",
  1573. Jbuff->DataBuffer, UsnRecord,
  1574. (PCHAR) UsnRecord - (PCHAR) Jbuff->DataBuffer);
  1575. DPRINT1(0, ":U: ERROR - DataLength: %d\n", Jbuff->DataLength);
  1576. DPRINT(0, ":U: ERROR - Aborting rest of buffer.\n");
  1577. //
  1578. // Drop Refs and force buffer loop to exit.
  1579. //
  1580. FRS_ASSERT(!"Jrnl monitor: USN Record Inconsistency");
  1581. UsnRecord->RecordLength = (ULONG) DataLength;
  1582. goto SKIP_USN_RECORD;
  1583. }
  1584. RtlMoveMemory (FileName, UsnRecord->FileName, UsnRecord->FileNameLength);
  1585. FileName[UsnRecord->FileNameLength/sizeof(WCHAR)] = UNICODE_NULL;
  1586. DPRINT4(4, "++ NameLen %d Relative Level %d Name: %ws\\...\\%ws\n",
  1587. UsnRecord->FileNameLength, Level, Replica->Root, FileName);
  1588. //
  1589. // Determine if this USN entry is a directory or a file.
  1590. //
  1591. IsDirectory = (FileAttributes & FILE_ATTRIBUTE_DIRECTORY);
  1592. //
  1593. // First handle the case for directories.
  1594. //
  1595. if (IsDirectory) {
  1596. DPRINT(4, "++ FILE IS DIRECTORY -------\n");
  1597. //
  1598. // Level is the relative nesting level of the file in the
  1599. // replica tree. The immediate children of the root are Level 0.
  1600. // Ignore files at a depth greater than this.
  1601. // A value of one for ReplDirLevelLimit means allow files in
  1602. // the replica root dir only.
  1603. //
  1604. // Note: Add code to handle rename of a dir from excluded to included.
  1605. // This results in a MOVEDIR Change Order. Not for V1.
  1606. // Ditto for the following - Could be a movedir or movers.
  1607. //
  1608. // Note that a rename of a dir
  1609. // to the bottom level means we delete the subtree because there
  1610. // will be no dirs at the bottom level in the filter table.
  1611. //
  1612. Excluded = (Level >= (ConfigRecord->ReplDirLevelLimit-1));
  1613. if (Excluded && CO_NEW_FILE(LocationCmd)) {
  1614. DPRINT(4,"++ directory exceeds depth limit. Excluded\n");
  1615. goto SKIP_USN_RECORD;
  1616. }
  1617. //
  1618. // See if the name is on the exclusion filter list.
  1619. //
  1620. if (!IsListEmpty(&Replica->DirNameFilterHead)) {
  1621. FrsSetUnicodeStringFromRawString(&TempUStr,
  1622. UsnRecord->FileNameLength,
  1623. UsnRecord->FileName,
  1624. UsnRecord->FileNameLength);
  1625. LOCK_REPLICA(Replica);
  1626. Excluded = FrsCheckNameFilter(&TempUStr, &Replica->DirNameFilterHead);
  1627. //
  1628. // Not excluded if it's on the included list.
  1629. //
  1630. if (Excluded &&
  1631. FrsCheckNameFilter(&TempUStr, &Replica->DirNameInclFilterHead)) {
  1632. Excluded = FALSE;
  1633. }
  1634. UNLOCK_REPLICA(Replica);
  1635. if (Excluded && CO_NEW_FILE(LocationCmd)) {
  1636. DPRINT(4,"++ directory name filter hit. Excluded\n");
  1637. goto SKIP_USN_RECORD;
  1638. }
  1639. }
  1640. //
  1641. // Generate the change orders as we update the filter table.
  1642. //
  1643. DPRINT2(4,"++ DIR location cmd on: %ws\\...\\%ws\n",
  1644. Replica->Root, FileName);
  1645. JrnlFilterUpdate(Replica,
  1646. UsnRecord,
  1647. LocationCmd,
  1648. PrevParentFilterEntry,
  1649. CurrParentFilterEntry);
  1650. } else {
  1651. //
  1652. // Handle the files here.
  1653. //
  1654. // Evaluate the excluded state if this is a file.
  1655. // Files are allowed at the bottom level.
  1656. //
  1657. Excluded = (Level >= ConfigRecord->ReplDirLevelLimit);
  1658. //
  1659. // NOTE: Treat Movedir or movers that is > depth limit as moveout.
  1660. //
  1661. if (Excluded && CO_NEW_FILE(LocationCmd)) {
  1662. DPRINT(4,"++ Filter depth exceeded. File excluded\n");
  1663. goto SKIP_USN_RECORD;
  1664. }
  1665. // Note: Add code to handle rename of file from excluded to included.
  1666. //
  1667. // Excluded file check:
  1668. //
  1669. // 1. If this is a create or MOVEIN of a file with an
  1670. // excluded name then just ignore the USN record.
  1671. //
  1672. // 2. If this is a rename of an excluded file to a visible
  1673. // file then generate a MOVEIN change order for the file.
  1674. //
  1675. // 3. If the file is not in our tables then it must not
  1676. // be visible so ignore it. Note that changing the
  1677. // exclusion list by removing an element will not by itself
  1678. // make those files visible. A rename operation is still
  1679. // needed to get the file into our tables.
  1680. //
  1681. // 4. A rename of a visible file to an excluded file does
  1682. // not make the file excluded since it is still in our tables
  1683. // and present in all replicas. Only a delete or a rename
  1684. // of the file to a point outside the replica set will remove
  1685. // the file from our tables and all other replicas.
  1686. //
  1687. // 5. The addition of an element to the exclusion list only
  1688. // affects future creates. It has no affect on previous
  1689. // file creates that generated an entry in our tables.
  1690. //
  1691. //
  1692. // See if the name is on the exclusion filter list.
  1693. //
  1694. if (!IsListEmpty(&Replica->FileNameFilterHead)) {
  1695. FrsSetUnicodeStringFromRawString(&TempUStr,
  1696. UsnRecord->FileNameLength,
  1697. UsnRecord->FileName,
  1698. UsnRecord->FileNameLength);
  1699. LOCK_REPLICA(Replica);
  1700. Excluded = FrsCheckNameFilter(&TempUStr, &Replica->FileNameFilterHead);
  1701. //
  1702. // Not excluded if it's on the included list.
  1703. //
  1704. if (Excluded &&
  1705. FrsCheckNameFilter(&TempUStr, &Replica->FileNameInclFilterHead)) {
  1706. Excluded = FALSE;
  1707. }
  1708. UNLOCK_REPLICA(Replica);
  1709. if (Excluded && CO_NEW_FILE(LocationCmd)) {
  1710. DPRINT(4,"++ File name filter hit. Excluded\n");
  1711. goto SKIP_USN_RECORD;
  1712. }
  1713. }
  1714. //
  1715. // Looks like this file is real. See if we have a change order
  1716. // pending for it. If so update it, if not, alloc a new one.
  1717. //
  1718. WStatus = JrnlEnterFileChangeOrder(UsnRecord,
  1719. LocationCmd,
  1720. PrevParentFilterEntry,
  1721. CurrParentFilterEntry);
  1722. if (!WIN_SUCCESS(WStatus)) {
  1723. DPRINT(0, "++ ERROR - Change order create or update failed\n");
  1724. }
  1725. }
  1726. //
  1727. // Increment the UsnRecords Accepted counter
  1728. //
  1729. PM_INC_CTR_REPSET(Replica, UsnRecAccepted, 1);
  1730. goto ACCEPT_USN_RECORD;
  1731. SKIP_USN_RECORD:
  1732. //
  1733. // Increment the UsnRecordsRejected counter
  1734. //
  1735. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
  1736. ACCEPT_USN_RECORD:
  1737. //
  1738. // Release the references on the prev and current parent filter
  1739. // entries that were acquired by JrnlGetFileCoLocationCmd().
  1740. //
  1741. if (PrevParentFilterEntry != NULL) {
  1742. GhtDereferenceEntryByAddress(pVme->FilterTable,
  1743. PrevParentFilterEntry,
  1744. TRUE);
  1745. PrevParentFilterEntry = NULL;
  1746. }
  1747. if (CurrParentFilterEntry != NULL) {
  1748. GhtDereferenceEntryByAddress(pVme->FilterTable,
  1749. CurrParentFilterEntry,
  1750. TRUE);
  1751. CurrParentFilterEntry = NULL;
  1752. }
  1753. //
  1754. // This has to be done after processing the record so if a
  1755. // save mark were to happen at the same time we wouldn't
  1756. // erroneously filter out the record above when the CurrentUsn
  1757. // is compared with Replica->LastUsnProcessed.
  1758. //
  1759. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1760. //
  1761. // If we are out of Replay mode for this replica and the
  1762. // replica is active then advance our Journal progress
  1763. // point, Replica->LastUsnRecordProcessed.
  1764. //
  1765. if ((Replica != NULL) &&
  1766. (Replica->ServiceState == REPLICA_STATE_ACTIVE) &&
  1767. !REPLICA_REPLAY_MODE(Replica, pVme)) {
  1768. AcquireQuadLock(&pVme->QuadWriteLock);
  1769. Replica->LastUsnRecordProcessed = CurrentUsn;
  1770. ReleaseQuadLock(&pVme->QuadWriteLock);
  1771. }
  1772. NEXT_USN_RECORD:
  1773. //
  1774. // Advance to next USN Record.
  1775. //
  1776. DataLength -= UsnRecord->RecordLength;
  1777. UsnRecord = (PUSN_RECORD)((PCHAR)UsnRecord + UsnRecord->RecordLength);
  1778. } // end while(DataLength > 0)
  1779. //DPRINT1(5, "jb: tf %08x\n", Jbuff);
  1780. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  1781. } // end while(TRUE)
  1782. //
  1783. // Get exception status.
  1784. //
  1785. } except (EXCEPTION_EXECUTE_HANDLER) {
  1786. GET_EXCEPTION_CODE(WStatus);
  1787. }
  1788. } finally {
  1789. if (WIN_SUCCESS(WStatus)) {
  1790. if (AbnormalTermination()) {
  1791. WStatus = ERROR_OPERATION_ABORTED;
  1792. }
  1793. }
  1794. DPRINT_WS(0, "Journal Monitor thread finally.", WStatus);
  1795. //
  1796. // Trigger FRS shutdown if we terminated abnormally.
  1797. //
  1798. if (!WIN_SUCCESS(WStatus) && (WStatus != ERROR_PROCESS_ABORTED)) {
  1799. DPRINT(0, "Journal Monitor thread terminated abnormally, forcing service shutdown.\n");
  1800. FrsIsShuttingDown = TRUE;
  1801. SetEvent(ShutDownEvent);
  1802. } else {
  1803. WStatus = ERROR_SUCCESS;
  1804. }
  1805. //
  1806. // Cleanup all the storage.
  1807. //
  1808. DPRINT1(3, ":S: T E R M I N A T I N G -- %s\n", DEBSUB);
  1809. JournalMonitorShutdown();
  1810. if (HANDLE_IS_VALID(JournalReadThreadHandle)) {
  1811. WStatus = WaitForSingleObject(JournalReadThreadHandle, 10000);
  1812. CHECK_WAIT_ERRORS2(3, WStatus, 1);
  1813. if (WIN_SUCCESS(WStatus)) {
  1814. DPRINT(4, ":S: Journal Read thread terminated.\n");
  1815. }
  1816. } else {
  1817. DPRINT(4, ":S: Journal Read thread terminate - NULL Handle\n");
  1818. }
  1819. DPRINT(0, ":S: Journal is exiting.\n");
  1820. DPRINT1(4, ":S: ThSupSubmitThreadExitCleanup(ThisFrsThreadCtx) - %08x\n", ThisFrsThreadCtx);
  1821. ThSupSubmitThreadExitCleanup(ThisFrsThreadCtx);
  1822. }
  1823. return WStatus;
  1824. }
  1825. LONG
  1826. JrnlGetFileCoLocationCmd(
  1827. PVOLUME_MONITOR_ENTRY pVme,
  1828. IN PUSN_RECORD UsnRecord,
  1829. OUT PFILTER_TABLE_ENTRY *PrevParentFilterEntry,
  1830. OUT PFILTER_TABLE_ENTRY *CurrParentFilterEntry
  1831. )
  1832. /*++
  1833. Routine Description:
  1834. Given the Reason mask and the current parent file ID in the USN record
  1835. and the previous parent File ID determine the location command for the
  1836. change order. The volume filter table is used to check the presence of
  1837. the parent directories in a replica set and to check if the file has
  1838. moved between two replica sets.
  1839. There are 5 cases shown in the table below. A lookup is done for each File
  1840. ID in the Filter table and these results are tested to generate the change
  1841. order location command value. (M: lookup miss, H: lookup hit). See
  1842. comments elsewhere for outcome defs.
  1843. Prev Curr Prev &
  1844. Parent Parent New
  1845. FID FID Parent R.S.
  1846. Case Lookup Lookup Match Outcome
  1847. 0 M M - FILE_NOT_IN_REPLICA_SET
  1848. 1 M H - MOVEIN
  1849. 2 H M - MOVEOUT (a)
  1850. 3 H H No (a), MOVERS, NAMECHANGE
  1851. 4 H H Yes MOVEDIR, NAMECHANGE
  1852. (a) The parent FID could be in the replica set while the File/Dir FID isn't
  1853. if a subtree enum by the update process hasn't reached the File/Dir FID yet
  1854. (MOVEIN on parent followed by MOVOUT on child) or,
  1855. The child was excluded and now its name is changing to allow inclusion.
  1856. In this case the rename includes a name change so the file is no
  1857. longer excluded.
  1858. During subtree operations filter table lookups must be blocked or races
  1859. causing invalid states will occur.
  1860. 1. MOVEIN - Rename of a directory into a replica set. The lookup failed on
  1861. the previous parent FID but the current parent FID is in the table. We
  1862. add an entry for this DIR to the filter table. The update process must
  1863. enumerate the subtree on disk and evaluate each file for inclusion into
  1864. the tree, updating the Filter table as it goes. We may see file
  1865. operations several levels down from the rename point and have no entry in
  1866. the Filter Table so we pitch those records. The sub-tree enumeration
  1867. process must handle this as it incorporates each file into the IDTable.
  1868. 2. MOVEOUT - Parent FID change to a dir OUTSIDE of any replica set on the
  1869. volume. This is a delete of an entire subtree in the Replica set. We
  1870. enumerate the subtree bottom-up, sending dir level change orders to the
  1871. update process as we delete the filter table entries.
  1872. 3. Name change only. The current Parent FID in the USN record matches the
  1873. Parent FID in the Filter entry for the file or directory. Update the name
  1874. in the filter entry.
  1875. 4. MOVEDIR - previous Parent FID is different from the current parent FID.
  1876. Both are in the Filter table with the same replica set. This is a rename
  1877. to a dir in the SAME replica set. Update the parent FID in the filter
  1878. enty and Filename too.
  1879. 5. MOVERS - The previous Parent FID is different from the current parent File
  1880. ID. Both are in the Filter Table but they have DIFFERENT replica set IDs.
  1881. Update the parent FID, the replica ptr, and name in the filter entry. This
  1882. is a move of an entire subtree from one replica set to another. We
  1883. enumerate the subtree top-down, sending dir level change orders to the
  1884. update process as we update the replica set information in the filter table
  1885. entries.
  1886. Arguments:
  1887. pVme - ptr to the Volume monitor entry for the parent file ID and
  1888. Volume Filter tables.
  1889. UsnRecord - ptr to the UsnRecord.
  1890. PrevParentFilterEntry = return value for the previous parent filter entry
  1891. or null. This is the parent under which
  1892. the file or dir used to reside.
  1893. CurrParentFilterEntry = return value for the current parent filter entry
  1894. or null. This is the parent under which the file
  1895. or dir currently resides.
  1896. NOTE: The caller must decrement the ref counts on the previous and new parent
  1897. filter entries if either is returned non null.
  1898. The table below summarizes the filter entry return values for previous
  1899. and current filter entry. A NULL ptr is returned in the 'No' cases.
  1900. It is the callers job to decrement the reference count on the filter
  1901. entry when a non=null value is returned.
  1902. Result returned in
  1903. PrevParentFilterEntry CurrParentFilterEntry
  1904. File Not in Replica Set No No
  1905. File content Change No Yes
  1906. create No Yes
  1907. delete No Yes
  1908. Movein No Yes
  1909. MoveOut Yes No
  1910. MoveDir Yes Yes
  1911. MoveRS Yes Yes
  1912. Return Value:
  1913. The change order location comand or FILE_NOT_IN_REPLICA_SET.
  1914. --*/
  1915. {
  1916. #undef DEBSUB
  1917. #define DEBSUB "JrnlGetFileCoLocationCmd:"
  1918. ULONG Reason;
  1919. PGENERIC_HASH_TABLE FilterTable;
  1920. PULONGLONG CurrParentFileID;
  1921. ULONGLONG PrevParentFileID;
  1922. PULONGLONG FileID;
  1923. ULONG_PTR Flags;
  1924. ULONG GStatus;
  1925. BOOL PrevParentExists;
  1926. *PrevParentFilterEntry = NULL;
  1927. *CurrParentFilterEntry = NULL;
  1928. //
  1929. // The code below checks for USN records with USN_SOURCE_REPLICATION_MANAGEMENT
  1930. // SourceInfo flag set. Currently we check for this bit for consistency
  1931. // with the state in our write filter table. A warning is generated
  1932. // when we get a mismatch. Eventually we need to remove the write filter
  1933. // hash table and just rely just on the above flag.
  1934. // It also tells us to skip our own records during recovery.
  1935. //
  1936. // First check if it's in the USN filter hash table. If so this is one of
  1937. // our own install writes (FrsCloseWithUsnDampening did the close)
  1938. // so skip the journal record and delete the table entry.
  1939. //
  1940. GStatus = QHashLookup(pVme->FrsWriteFilter,
  1941. &UsnRecord->Usn,
  1942. &PrevParentFileID, // unused result
  1943. &Flags); // unused result
  1944. if (GStatus == GHT_STATUS_SUCCESS) {
  1945. DUMP_USN_RECORD(4, UsnRecord);
  1946. DPRINT1(4, "++ USN Write filter cache hit on usn %08x %08x -- skip record\n",
  1947. PRINTQUAD(UsnRecord->Usn));
  1948. //
  1949. // Some code is closing the handle with usn dampening but did
  1950. // not mark the handle as being managed by ntfrs.
  1951. //
  1952. if (!BooleanFlagOn(UsnRecord->SourceInfo, USN_SOURCE_REPLICATION_MANAGEMENT)) {
  1953. DPRINT2(4, "++ WARN Source not set; usn dampen: SourceInfo is %08x for %08x %08x\n",
  1954. UsnRecord->SourceInfo, PRINTQUAD(UsnRecord->FileReferenceNumber));
  1955. }
  1956. return FILE_NOT_IN_REPLICA_SET;
  1957. }
  1958. //
  1959. // Maybe recovery usn record but spit out a warning anyway. In
  1960. // general, usn records with USN_SOURCE_REPLICATION_MANAGEMENT set should have been
  1961. // closed with usn dampening and filtered out above.
  1962. //
  1963. if (BooleanFlagOn(UsnRecord->SourceInfo, USN_SOURCE_REPLICATION_MANAGEMENT)) {
  1964. DPRINT2(4, "++ WARN Source set; no usn dampen: SourceInfo is %08x for %08x %08x\n",
  1965. UsnRecord->SourceInfo, PRINTQUAD(UsnRecord->FileReferenceNumber));
  1966. }
  1967. //
  1968. // Ignore the usn records generated by the service
  1969. //
  1970. // Note: get rid of writefilter and use SourceInfo always!
  1971. //
  1972. Reason = UsnRecord->Reason;
  1973. if (BooleanFlagOn(UsnRecord->SourceInfo, USN_SOURCE_REPLICATION_MANAGEMENT)) {
  1974. if (Reason & USN_REASON_FILE_DELETE) {
  1975. DPRINT1(4, "++ Process service generated usn record for %08x %08x\n",
  1976. PRINTQUAD(UsnRecord->FileReferenceNumber));
  1977. } else {
  1978. DUMP_USN_RECORD(4, UsnRecord);
  1979. DPRINT1(4, "++ Ignore service generated usn record for %08x %08x\n",
  1980. PRINTQUAD(UsnRecord->FileReferenceNumber));
  1981. return FILE_NOT_IN_REPLICA_SET;
  1982. }
  1983. }
  1984. #ifdef RECOVERY_CONFLICT
  1985. //
  1986. // If a recovery conflict table exists check for a match and skip the USN
  1987. // record. This filters out any USN records caused by our own activities
  1988. // at the time of the crash.
  1989. //
  1990. if (pVme->RecoveryConflictTable != NULL) {
  1991. //
  1992. // Once we pass the journal recovery end point delete the table.
  1993. // It can not have any entries with a larger USN than the end point.
  1994. // ("how can we be sure that all replica sets on this volume have"
  1995. "actually started and so have actually finished using the"
  1996. "conflict table?")
  1997. //
  1998. if (UsnRecord->Usn > pVme->JrnlRecoveryEnd) {
  1999. pVme->RecoveryConflictTable = FrsFreeType(pVme->RecoveryConflictTable);
  2000. } else {
  2001. GStatus = QHashLookup(pVme->RecoveryConflictTable,
  2002. &UsnRecord->FileReferenceNumber,
  2003. &PrevParentFileID, // unused result
  2004. &Flags); // unused result
  2005. if (GStatus == GHT_STATUS_SUCCESS) {
  2006. DUMP_USN_RECORD(1, UsnRecord);
  2007. DPRINT1(1, "++ Recovery conflict table hit on FID %08x %08x -- skip record\n",
  2008. PRINTQUAD(UsnRecord->FileReferenceNumber));
  2009. return FILE_NOT_IN_REPLICA_SET;
  2010. }
  2011. }
  2012. }
  2013. #endif // RECOVERY_CONFLICT
  2014. FilterTable = pVme->FilterTable;
  2015. //
  2016. // Get the previous parent file ID for this file/Dir.
  2017. //
  2018. FileID = &UsnRecord->FileReferenceNumber;
  2019. CurrParentFileID = &UsnRecord->ParentFileReferenceNumber;
  2020. GStatus = QHashLookup(pVme->ParentFidTable, FileID, &PrevParentFileID, &Flags);
  2021. PrevParentExists = (GStatus == GHT_STATUS_SUCCESS);
  2022. //
  2023. // Check to see if we still need to special case any operations on the root
  2024. // dir of a replica set.
  2025. //
  2026. if (PrevParentExists) {
  2027. DPRINT2(5, "++ Fid: %08x %08x PrevParentFid: %08x %08x\n",
  2028. PRINTQUAD(UsnRecord->FileReferenceNumber),
  2029. PRINTQUAD(PrevParentFileID));
  2030. //
  2031. // IF the previous parent FID is not in the Filter table now and this
  2032. // is not a rename operation (which might result in a MOVEIN) then this
  2033. // file is not in a replica set. This case occurs after a MOVEOUT of a
  2034. // parent dir followed by some access to a child.
  2035. //
  2036. GStatus = GhtLookup(FilterTable, &PrevParentFileID, TRUE, PrevParentFilterEntry);
  2037. if ((GStatus != GHT_STATUS_SUCCESS) &&
  2038. ((Reason & USN_REASON_RENAME_NEW_NAME) == 0)) {
  2039. DUMP_USN_RECORD(4, UsnRecord);
  2040. DPRINT(4, "++ NOT IN RS - Entry in Parent File ID table but not FilterTable & not rename.\n");
  2041. return FILE_NOT_IN_REPLICA_SET;
  2042. }
  2043. } else {
  2044. //
  2045. // There is no entry in the parent file ID table for this file or dir.
  2046. // If there is no entry in the filter table for the file's current
  2047. // parent then the file is not in any replica set.
  2048. //
  2049. GStatus = GhtLookup(FilterTable, CurrParentFileID, TRUE, CurrParentFilterEntry);
  2050. if (GStatus != GHT_STATUS_SUCCESS) {
  2051. DUMP_USN_RECORD(4, UsnRecord);
  2052. DPRINT(4, "++ NOT IN RS - Entry not in Parent File ID table or FilterTable.\n");
  2053. return FILE_NOT_IN_REPLICA_SET;
  2054. }
  2055. }
  2056. //
  2057. // A delete has to have an entry in the parent File ID table or it is not
  2058. // in a replica set.
  2059. //
  2060. if (Reason & USN_REASON_FILE_DELETE) {
  2061. //
  2062. // If the Previous parent filter entry is valid then the file/dir
  2063. // was in a replica set so treat it as a delete.
  2064. //
  2065. if (*PrevParentFilterEntry != NULL) {
  2066. *CurrParentFilterEntry = *PrevParentFilterEntry;
  2067. *PrevParentFilterEntry = NULL;
  2068. return CO_LOCATION_DELETE;
  2069. }
  2070. //
  2071. // It wasn't in the parent fid table so either the rename flag is also
  2072. // set or the current parent filter entry is non-null which would be
  2073. // the case for a delete on an excluded file. Either way skip it.
  2074. //
  2075. DUMP_USN_RECORD(4, UsnRecord);
  2076. DPRINT(4, "++ NOT IN RS - delete on excluded file?\n");
  2077. return FILE_NOT_IN_REPLICA_SET;
  2078. }
  2079. //
  2080. // A create has to have an entry for its parent in the Volume Filter Table
  2081. // or it is not in a replica set. It must have no prior entry in the Parent
  2082. // file ID table. (FILE IDs are unique).
  2083. //
  2084. if (Reason & USN_REASON_FILE_CREATE) {
  2085. //
  2086. // If the USN from the journal record is less than or equal to the USN
  2087. // from the file when the replica tree load was done then the created
  2088. // file was already picked up by the load. Otherwise it is an error
  2089. // because we should not have had an entry in the parent ID table yet.
  2090. // At this point we do not have the current USN on the file so we will
  2091. // assume that if a previous parent exists the load got there first and
  2092. // this journal record is stale (so skip the record).
  2093. //
  2094. // In the case where we have paused the journal to startup another
  2095. // replica set we may have to move the next USN to read from the journal
  2096. // back to let this new RS catch-up. In that case we will be seeing
  2097. // records for a second time. If we are in replay mode and the USN
  2098. // for this record is less than the LastUsnRecordProcessed for the target replica
  2099. // set then we ignore the record.
  2100. //
  2101. // Note: add above file usn check.
  2102. //
  2103. if (PrevParentExists) {
  2104. DUMP_USN_RECORD(4, UsnRecord);
  2105. DPRINT(4, "++ NOT IN RS \n");
  2106. return FILE_NOT_IN_REPLICA_SET;
  2107. }
  2108. return CO_LOCATION_CREATE;
  2109. }
  2110. //
  2111. // If not a rename then no location change, but this file is in a Replica Set.
  2112. //
  2113. if ((Reason & USN_REASON_RENAME_NEW_NAME) == 0) {
  2114. //
  2115. // Check for a content update to a file that is not in our tables.
  2116. // It could be an excluded file which gets filtered out later.
  2117. // Or an excluded file that is no longer excluded because the
  2118. // the exclusion list changed.
  2119. // Treat it as a create so we check the exclusion list again
  2120. // and set the USN record create flag for others that may look at it.
  2121. //
  2122. if (*CurrParentFilterEntry != NULL) {
  2123. //UsnRecord->Reason |= USN_REASON_FILE_CREATE;
  2124. //return CO_LOCATION_CREATE;
  2125. //
  2126. // Treat it as a MOVEIN since if it is a directory we need to
  2127. // enumerate the children.
  2128. //
  2129. return CO_LOCATION_MOVEIN;
  2130. }
  2131. //
  2132. // It's not a rename, CurrParentFilterEntry is NULL so to be here
  2133. // PrevParentFilterEntry must be non-null which means that this is
  2134. // a content update to a file we already know about.
  2135. //
  2136. FRS_ASSERT(*PrevParentFilterEntry != NULL);
  2137. *CurrParentFilterEntry = *PrevParentFilterEntry;
  2138. *PrevParentFilterEntry = NULL;
  2139. return CO_LOCATION_NO_CMD;
  2140. }
  2141. //
  2142. // Handle file rename cases. If parent FileIDs match then no location change.
  2143. //
  2144. if ((*PrevParentFilterEntry != NULL) &&
  2145. (PrevParentFileID == *CurrParentFileID)) {
  2146. *CurrParentFilterEntry = *PrevParentFilterEntry;
  2147. *PrevParentFilterEntry = NULL;
  2148. return CO_LOCATION_NO_CMD;
  2149. }
  2150. //
  2151. // Old and new parent file IDs are different. So the file/dir moved across
  2152. // directories. Could be MOVEIN, MOVEOUT, MOVEDIR, MOVERS.
  2153. //
  2154. if (*CurrParentFilterEntry == NULL) {
  2155. GhtLookup(FilterTable, CurrParentFileID, TRUE, CurrParentFilterEntry);
  2156. }
  2157. if (*PrevParentFilterEntry != NULL) {
  2158. if (*CurrParentFilterEntry != NULL) {
  2159. //
  2160. // Old and new parents in table.
  2161. //
  2162. if ((*PrevParentFilterEntry)->Replica ==
  2163. (*CurrParentFilterEntry)->Replica) {
  2164. //
  2165. // Old and New Replica Sets are the same ==> MOVEDIR
  2166. //
  2167. return CO_LOCATION_MOVEDIR;
  2168. } else {
  2169. //
  2170. // Old and New Replica Sets are different ==> MOVERS
  2171. //
  2172. return CO_LOCATION_MOVERS;
  2173. }
  2174. } else {
  2175. //
  2176. // Old parent in table, new parent not in table ==> MOVEOUT
  2177. //
  2178. return CO_LOCATION_MOVEOUT;
  2179. }
  2180. } else {
  2181. if (*CurrParentFilterEntry != NULL) {
  2182. //
  2183. // Old parent not in table, new parent is in table ==> MOVEIN
  2184. //
  2185. return CO_LOCATION_MOVEIN;
  2186. } else {
  2187. //
  2188. // To get here the operation must be a rename on a file/dir
  2189. // that was in the parent file ID table but the previous parent
  2190. // File ID is no longer in the Filter table (MOVEOUT). In addition
  2191. // the current parent File ID is not in the filter table. So this
  2192. // is a rename operation on a file that was in a replica set in the
  2193. // past but is not currently in any replica set. The update process
  2194. // will eventually clean out the stale entries in the parent file
  2195. // ID table.
  2196. //
  2197. DUMP_USN_RECORD(4, UsnRecord);
  2198. DPRINT(4, "++ NOT IN RS - Rename on a file with a MOVEOUT parent.\n");
  2199. return FILE_NOT_IN_REPLICA_SET;
  2200. }
  2201. }
  2202. DUMP_USN_RECORD(4, UsnRecord);
  2203. DPRINT(4, "++ NOT IN RS\n");
  2204. return FILE_NOT_IN_REPLICA_SET;
  2205. }
  2206. ULONG
  2207. JrnlEnterFileChangeOrder(
  2208. IN PUSN_RECORD UsnRecord,
  2209. IN ULONG LocationCmd,
  2210. IN PFILTER_TABLE_ENTRY OldParentFilterEntry,
  2211. IN PFILTER_TABLE_ENTRY NewParentFilterEntry
  2212. )
  2213. /*++
  2214. Routine Description:
  2215. Enter a new change order or update an exisitng change order.
  2216. This routine is for FILES ONLY. Directories are handled in
  2217. JrnlFilterUpdate().
  2218. This routine acquires and releases the locks on both the source and target
  2219. replica set change order lists (in the case of a MOVERS).
  2220. Assumes The caller has taken references on the old and new parent filter entry.
  2221. Arguments:
  2222. UsnRecord - ptr to the UsnRecord.
  2223. LocationCmd - The change order location command. (MOVEIN, MOVEOUT, ...)
  2224. OldParentFilterEntry - The filter entry for the file's previous parent.
  2225. NewParentFilterEntry - The filter entry for the file's current parent.
  2226. Return Value:
  2227. Win32 status.
  2228. --*/
  2229. {
  2230. #undef DEBSUB
  2231. #define DEBSUB "JrnlEnterFileChangeOrder:"
  2232. ULONG GStatus;
  2233. ULONG WStatus = ERROR_GEN_FAILURE;
  2234. PULONGLONG FileID;
  2235. ULONGLONG OriginalParentFileID;
  2236. PCHANGE_ORDER_ENTRY ChangeOrder;
  2237. PGENERIC_HASH_TABLE ChangeOrderTable;
  2238. PREPLICA CurrentReplica;
  2239. PREPLICA OriginalReplica;
  2240. PFILTER_TABLE_ENTRY OriginalParentFilterEntry;
  2241. BOOL PendingCo;
  2242. ULONG StreamSequenceNumber;
  2243. BOOL MergeOk;
  2244. PCXTION Cxtion;
  2245. UNICODE_STRING UnicodeStr, UnicodeStr2;
  2246. PVOLUME_MONITOR_ENTRY pVme;
  2247. PUSN_RECORD OldRenUsnRec;
  2248. //
  2249. // Determine the original parent and replica set if the file has moved around.
  2250. // This determines what change order table we need to examine for a pending
  2251. // change order.
  2252. // Note: Now that we have one change order table per volume, is this still needed?
  2253. //
  2254. if (CO_MOVE_OUT_RS_OR_DIR(LocationCmd)) {
  2255. OriginalParentFilterEntry = OldParentFilterEntry;
  2256. } else {
  2257. OriginalParentFilterEntry = NewParentFilterEntry;
  2258. if (NewParentFilterEntry->DFileID != UsnRecord->ParentFileReferenceNumber) {
  2259. DPRINT(4, "++ Warn - Current parent FID NOT EQUAL to UsnRecord.parentFiD -- Stale USN Rec???\n");
  2260. DPRINT2(4, "++ %08x %08x -- %08x %08x\n",
  2261. PRINTQUAD(NewParentFilterEntry->DFileID),
  2262. PRINTQUAD(UsnRecord->ParentFileReferenceNumber));
  2263. return ERROR_INVALID_PARAMETER;
  2264. }
  2265. }
  2266. OriginalReplica = OriginalParentFilterEntry->Replica;
  2267. OriginalParentFileID = OriginalParentFilterEntry->DFileID;
  2268. pVme = OriginalReplica->pVme;
  2269. ChangeOrderTable = pVme->ChangeOrderTable;
  2270. CurrentReplica = (NewParentFilterEntry != NULL) ?
  2271. NewParentFilterEntry->Replica :
  2272. OldParentFilterEntry->Replica;
  2273. FrsRtlAcquireListLock(&pVme->ChangeOrderList);
  2274. //
  2275. // Make a new stream sequence number. Protected by above list lock.
  2276. //
  2277. StreamSequenceNumber = ++pVme->StreamSequenceNumber;
  2278. //
  2279. // See if there is a pending change order for this file/dir. The call to
  2280. // JrnlUpdateChangeOrder() drops our reference on the change order.
  2281. //
  2282. FileID = &UsnRecord->FileReferenceNumber;
  2283. GStatus = GhtLookupNewest(ChangeOrderTable, FileID, TRUE, &ChangeOrder);
  2284. PendingCo = (GStatus == GHT_STATUS_SUCCESS);
  2285. if (PendingCo) {
  2286. //
  2287. // There is a pending change order. Do a couple consistency checks.
  2288. //
  2289. // This USN record should not be for a file create because that
  2290. // would generate a new File ID which should NOT be in the table.
  2291. //
  2292. // NOT QUITE TRUE -- JrnlGetFileCoLocationCmd() will turn on the
  2293. // USN create flag if it sees a file is in the replica set but not
  2294. // in the parent file ID table. This happens when a file that was on
  2295. // the exclusion list is updated after the exclusion list is changed
  2296. // to allow the file to be included. Because of this situation we can
  2297. // also see the create flag set when the following occurs:
  2298. // 1. A series of file changes result in two COs being produced
  2299. // because the first CO is pulled off the process queue.
  2300. // 2. Subsequent file changes are accumulated in the 2nd CO.
  2301. // 3. Meanwhile the user deletes the file so the first CO aborts when
  2302. // it can't generate the staging file. As part of this abort the
  2303. // IDTable entry for the "new" file is deleted and the ParentFidTable
  2304. // entry is removed.
  2305. // 4. Now another USN record for the file (not the delete yet) arrives
  2306. // to merge with the 2nd CO under construction. Since we don't yet
  2307. // know a delete is coming the code in JrnlGetFileCoLocationCmd()
  2308. // sets the USN create flag as described above.
  2309. // 5. Now we end up here and hit the assert. So to avoid this we check
  2310. // the Pending CO and only assert if is already a create.
  2311. //
  2312. // Yea, yea I could just bag the assert but the above scenario is instructive.
  2313. //
  2314. if ((LocationCmd == CO_LOCATION_CREATE) &&
  2315. (GET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command) == CO_LOCATION_CREATE)){
  2316. DUMP_USN_RECORD2(0, UsnRecord, OriginalReplica->ReplicaNumber, LocationCmd);
  2317. DPRINT(0, "++ ERROR -- USN_REASON_FILE_CREATE with create change order in the table:\n");
  2318. FRS_PRINT_TYPE(0, ChangeOrder);
  2319. FRS_ASSERT(!"JrnlEnterFileCO: USN_REASON_FILE_CREATE with create change order in table");
  2320. goto RETURN;
  2321. }
  2322. //
  2323. // If the pending change order is a delete and the USN record
  2324. // specifies the same same FID this is an error because
  2325. // delete will have retired the FID.
  2326. //
  2327. if (GET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command) == CO_LOCATION_DELETE){
  2328. DUMP_USN_RECORD2(0, UsnRecord, OriginalReplica->ReplicaNumber,
  2329. CO_LOCATION_DELETE);
  2330. DPRINT(0, "++ ERROR - new USN record follows delete with same FID");
  2331. FRS_PRINT_TYPE(0, ChangeOrder);
  2332. FRS_ASSERT(!"JrnlEnterFileCO: new USN record follows delete with same FID");
  2333. goto RETURN;
  2334. }
  2335. //
  2336. // USN MERGE RESTRICTIONS:
  2337. //
  2338. // Check if this USN record can be merged with the pending change order.
  2339. // If this USN record is a delete or a rename then it removes a name
  2340. // from the name space. If there exists a more recent change order
  2341. // that references this name then we can not merge the USN record.
  2342. // Instead we must create a new CO.
  2343. //
  2344. // Consider this sequence:
  2345. // Attrib -r Dir <== creates CO-1
  2346. // Del Dir\Foo <== creates CO-2
  2347. // Del Dir <== Merge with CO-1 causes name conflict.
  2348. //
  2349. // The "Del Dir" CO can't be merged with CO-1 because CO-2 is still
  2350. // using Dir to delete file Foo. If the merge were to take place the
  2351. // delete would fail since Dir is not empty. File Dir\Foo would be
  2352. // deleted but Dir would be left around.
  2353. //
  2354. // Similarly a rename creates a new name in the name space but if there
  2355. // is a more recent CO that references the name then the rename can't
  2356. // be merged.
  2357. //
  2358. // Consider the following sequence: (Bar already exists)
  2359. // Echo TestString > Foo <== creates CO-1
  2360. // Ren Bar Bar2 <== creates CO-2
  2361. // Ren Foo Bar <== Merge with CO-1 causes name conflict.
  2362. //
  2363. // Foo and Bar are different COs on different Fids but they have
  2364. // name space dependencies that prevent merging the Foo rename with
  2365. // CO-1 that does the file update. If we did merge these two COs then
  2366. // the resulting remote CO that is sent out would collide with the
  2367. // pre-existing Bar, thus deleting it. When CO-2 arrived the original
  2368. // Bar would be gone so there would be no Bar2.
  2369. //
  2370. MergeOk = TRUE;
  2371. if (MergeOk &&
  2372. CurrentReplica &&
  2373. (Cxtion = GTabLookup(CurrentReplica->Cxtions,
  2374. &CurrentReplica->JrnlCxtionGuid,
  2375. NULL)) &&
  2376. !GUIDS_EQUAL(&ChangeOrder->JoinGuid, &Cxtion->JoinGuid)) {
  2377. MergeOk = FALSE;
  2378. CHANGE_ORDER_TRACE(3, ChangeOrder, "Invalid join guid Merge NOT OK ");
  2379. }
  2380. //
  2381. // When we see USN_REASON_REPARSE_POINT_CHANGE it could indicate
  2382. // addition or removal of the reparse point as well as just a
  2383. // modification. One problem we can hit is when you remove the reparse
  2384. // point and then immediately delete the file. If these operations get
  2385. // merged then we will only see the delete on the other end.
  2386. // Unfortunately, when we try to delete the file on the other member we
  2387. // may fail. For example, DFS always returns an error when you access a
  2388. // file with a DFS reparse point on it.
  2389. //
  2390. // What we really want to do is prevent merging a removal of a reparse
  2391. // point with a later operation on the file. Since there is no way to
  2392. // differentiate the kinds of reparse point changes we just prevent
  2393. // merging any of them with later non reparse point changes.
  2394. //
  2395. // It is okay to merge a non reparse point change with a later reparse
  2396. // point change.
  2397. //
  2398. if(BooleanFlagOn(ChangeOrder->Cmd.ContentCmd, USN_REASON_REPARSE_POINT_CHANGE) &&
  2399. !BooleanFlagOn(UsnRecord->Reason, USN_REASON_REPARSE_POINT_CHANGE)) {
  2400. MergeOk = FALSE;
  2401. CHANGE_ORDER_TRACE(3, ChangeOrder, "Not a reparse point change Merge NOT OK ");
  2402. }
  2403. if(MergeOk && (BooleanFlagOn(UsnRecord->Reason, USN_REASON_RENAME_NEW_NAME |
  2404. USN_REASON_FILE_DELETE))) {
  2405. //
  2406. // If this is not a serialized operation (MOVEDIR or MOVERS)
  2407. // then first test for conflict on the current name/parent FID of the
  2408. // file. Then if that's ok test for a conflict on the previous name.
  2409. //
  2410. if (CO_MOVE_RS_OR_DIR(LocationCmd)) {
  2411. MergeOk = FALSE;
  2412. CHANGE_ORDER_TRACE(3, ChangeOrder, "MOVERS/DIR Merge NOT OK ");
  2413. }
  2414. if (MergeOk) {
  2415. FrsSetUnicodeStringFromRawString(&UnicodeStr,
  2416. UsnRecord->FileNameLength,
  2417. UsnRecord->FileName,
  2418. UsnRecord->FileNameLength);
  2419. MergeOk = JrnlMergeCoTest(pVme,
  2420. &UnicodeStr,
  2421. &UsnRecord->ParentFileReferenceNumber,
  2422. ChangeOrder->StreamLastMergeSeqNum);
  2423. if (MergeOk) {
  2424. CHANGE_ORDER_TRACE(3, ChangeOrder, "Curr parent Merge OK ");
  2425. } else {
  2426. CHANGE_ORDER_TRACE(3, ChangeOrder, "Curr parent Merge NOT OK ");
  2427. }
  2428. }
  2429. //
  2430. // If the Merge is still on and this is a rename then check for
  2431. // a conflict in the use of the previous name that will go away.
  2432. //
  2433. if (MergeOk &&
  2434. BooleanFlagOn(UsnRecord->Reason, USN_REASON_RENAME_NEW_NAME)) {
  2435. MergeOk = JrnlMergeCoTest(pVme,
  2436. &ChangeOrder->UFileName,
  2437. &OriginalParentFilterEntry->DFileID,
  2438. ChangeOrder->StreamLastMergeSeqNum);
  2439. if (MergeOk) {
  2440. CHANGE_ORDER_TRACE(3, ChangeOrder, "Orig parent Merge OK ");
  2441. } else {
  2442. CHANGE_ORDER_TRACE(3, ChangeOrder, "Orig parent Merge NOT OK ");
  2443. }
  2444. }
  2445. }
  2446. if (MergeOk) {
  2447. //
  2448. // Update the seq number of last USN record to contribute to CO.
  2449. //
  2450. ChangeOrder->StreamLastMergeSeqNum = StreamSequenceNumber;
  2451. }
  2452. PendingCo = MergeOk;
  2453. //
  2454. // Creating new change order; drop reference on current change order
  2455. //
  2456. if (!PendingCo) {
  2457. GStatus = GhtDereferenceEntryByAddress(ChangeOrderTable,
  2458. ChangeOrder,
  2459. TRUE);
  2460. if (GStatus != GHT_STATUS_SUCCESS) {
  2461. DPRINT(0, "++ ERROR: GhtDereferenceEntryByAddress ref count non positive.\n");
  2462. FRS_PRINT_TYPE(0, ChangeOrder);
  2463. FRS_ASSERT(!"JrnlEnterFileCO: ref count non positive");
  2464. goto RETURN;
  2465. }
  2466. }
  2467. }
  2468. if (!PendingCo) {
  2469. //
  2470. // Construct new change order.
  2471. //
  2472. ChangeOrder = JrnlCreateCo(OriginalReplica,
  2473. &UsnRecord->FileReferenceNumber,
  2474. &OriginalParentFilterEntry->DFileID,
  2475. UsnRecord,
  2476. BooleanFlagOn(UsnRecord->FileAttributes,
  2477. FILE_ATTRIBUTE_DIRECTORY),
  2478. UsnRecord->FileName,
  2479. UsnRecord->FileNameLength);
  2480. ChangeOrder->StreamLastMergeSeqNum = StreamSequenceNumber;
  2481. //
  2482. // Set this up now so it appears in the log file. It is overwritten
  2483. // later with the real CO Guid when the CO is issued.
  2484. //
  2485. ChangeOrder->Cmd.ChangeOrderGuid.Data1 = StreamSequenceNumber;
  2486. CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Create", UsnRecord->Reason);
  2487. } else {
  2488. CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Update", UsnRecord->Reason);
  2489. }
  2490. //
  2491. // Update the Name Space Table with the current stream sequence number.
  2492. // Do this for both the file name and the parent dir name. In the case
  2493. // of rename do it for the original and current file name and parent names.
  2494. // So there are four cases. The table below shows where the filename
  2495. // and the File ID come from for each case.
  2496. //
  2497. // File Name Parent Filename
  2498. //
  2499. // Curr File UsnRecord->FileName UsnRecord->ParentFID
  2500. // Curr Parent NewParentFilterEntry->UFileName NewParentFilterEntry->DParentFileID
  2501. // Orig File RenOldNameTable->FileName RenOldNameTable->ParentFID
  2502. // Orig Parent OrigParentFilterEntry->UFileName OrigParentFilterEntry->DParentFileID
  2503. //
  2504. // Note:
  2505. // - The Curr info is only needed if CO is not a MOVEOUT.
  2506. // - The Orig info on the filename is only relevant if CO is a rename.
  2507. // - The Orig info on the parent dir is only relevant if CO is
  2508. // MoveOut, MoveDir or MoveRs.
  2509. //
  2510. if (LocationCmd != CO_LOCATION_MOVEOUT) {
  2511. //
  2512. // Update Curr File (Where the USN record says file went)
  2513. //
  2514. FrsSetUnicodeStringFromRawString(&UnicodeStr,
  2515. UsnRecord->FileNameLength,
  2516. UsnRecord->FileName,
  2517. UsnRecord->FileNameLength);
  2518. JrnlUpdateNst(pVme,
  2519. &UnicodeStr,
  2520. &UsnRecord->ParentFileReferenceNumber,
  2521. StreamSequenceNumber);
  2522. //
  2523. // Update Curr parent (the parent dir where file went)
  2524. //
  2525. JrnlUpdateNst(pVme,
  2526. &NewParentFilterEntry->UFileName,
  2527. &NewParentFilterEntry->DParentFileID,
  2528. StreamSequenceNumber);
  2529. }
  2530. if (BooleanFlagOn(UsnRecord->Reason, USN_REASON_RENAME_NEW_NAME)) {
  2531. //
  2532. // Update Orig File location for rename COs.
  2533. // We use the info saved in the most recent Rename Old USN record for this file
  2534. // on the volume. Then free the saved old name.
  2535. //
  2536. OldRenUsnRec = NULL;
  2537. GStatus = QHashLookup(pVme->RenOldNameTable,
  2538. &UsnRecord->FileReferenceNumber,
  2539. NULL,
  2540. (PULONG_PTR) &OldRenUsnRec);
  2541. if (OldRenUsnRec != NULL) {
  2542. FrsSetUnicodeStringFromRawString(&UnicodeStr2,
  2543. OldRenUsnRec->FileNameLength,
  2544. OldRenUsnRec->FileName,
  2545. OldRenUsnRec->FileNameLength);
  2546. JrnlUpdateNst(pVme,
  2547. &UnicodeStr2,
  2548. &OldRenUsnRec->ParentFileReferenceNumber,
  2549. StreamSequenceNumber);
  2550. OldRenUsnRec = FrsFree(OldRenUsnRec);
  2551. GStatus = QHashDelete(pVme->RenOldNameTable,
  2552. &UsnRecord->FileReferenceNumber);
  2553. if (GStatus != GHT_STATUS_SUCCESS ) {
  2554. DPRINT1(0, "++ QHashDelete error: %d\n", GStatus);
  2555. }
  2556. } else {
  2557. DPRINT1(0, "RENAME_OLD_NAME record not found for Fid: %08x %08x\n",
  2558. PRINTQUAD(UsnRecord->FileReferenceNumber));
  2559. }
  2560. }
  2561. if (CO_MOVE_OUT_RS_OR_DIR(LocationCmd)) {
  2562. //
  2563. // Update Orig Parent (The original parent dir where the file came from)
  2564. //
  2565. JrnlUpdateNst(pVme,
  2566. &OriginalParentFilterEntry->UFileName,
  2567. &OriginalParentFilterEntry->DParentFileID,
  2568. StreamSequenceNumber);
  2569. }
  2570. //
  2571. // Update the change order. This drops our ref on the change order.
  2572. //
  2573. WStatus = JrnlUpdateChangeOrder(ChangeOrder,
  2574. CurrentReplica,
  2575. UsnRecord->ParentFileReferenceNumber,
  2576. LocationCmd,
  2577. UsnRecord);
  2578. if (!WIN_SUCCESS(WStatus)) {
  2579. DPRINT(0, "++ Error - failed to insert or update change order\n");
  2580. DPRINT_WS(0, "JrnlUpdateChangeOrder", WStatus);
  2581. } else {
  2582. DPRINT1(4, "++ ChangeOrder %s success\n", (PendingCo ? "update" : "create"));
  2583. }
  2584. RETURN:
  2585. //
  2586. // Drop the locks on the change order process lists.
  2587. //
  2588. FrsRtlReleaseListLock(&pVme->ChangeOrderList);
  2589. return WStatus;
  2590. }
  2591. PCHANGE_ORDER_ENTRY
  2592. JrnlCreateCo(
  2593. IN PREPLICA Replica,
  2594. IN PULONGLONG Fid,
  2595. IN PULONGLONG ParentFid,
  2596. IN PUSN_RECORD UsnRecord,
  2597. IN BOOL IsDirectory,
  2598. IN PWCHAR FileName,
  2599. IN USHORT Length
  2600. )
  2601. /*++
  2602. Routine Description:
  2603. This functions allocates a change order entry and inits some of the fields.
  2604. Depending on the change order some of these fields are overwritten later.
  2605. Arguments:
  2606. Replica - ptr to replica set for this change order.
  2607. Fid - The file reference number for the local file.
  2608. ParentFid - The parent file reference number for this file.
  2609. UsnRecord - The NTFS USN record describing the change. When walking a
  2610. through a sub-tree this will be the USN record of the sub-tree root.
  2611. IsDirectory - TRUE if this CO is for a directory.
  2612. FileName - Filename for this file. For a sub tree op it comes from the
  2613. filter entry.
  2614. Length - the file name length in bytes.
  2615. Return Value:
  2616. ptr to change order entry.
  2617. --*/
  2618. {
  2619. #undef DEBSUB
  2620. #define DEBSUB "JrnlCreateCo:"
  2621. PCHANGE_ORDER_ENTRY ChangeOrder;
  2622. //
  2623. // Construct new change order.
  2624. // Set the initial reference count to 1.
  2625. //
  2626. ChangeOrder = FrsAllocType(CHANGE_ORDER_ENTRY_TYPE);
  2627. ChangeOrder->HashEntryHeader.ReferenceCount = 1;
  2628. //
  2629. // The command flag CO_FLAG_LOCATION_CMD should be clear.
  2630. // Mark this change order as a file or a directory.
  2631. // Note: If this CO is being generated off of a directory filter table
  2632. // entry (e.g. Moveout) then the ChangeOrder->Cmd.FileAttributes will
  2633. // be zero. ChgOrdReadIdRecord() detects this and inserts the file
  2634. // attributes from the IDTable record.
  2635. //
  2636. SET_CO_LOCATION_CMD(ChangeOrder->Cmd,
  2637. DirOrFile,
  2638. (IsDirectory ? CO_LOCATION_DIR : CO_LOCATION_FILE));
  2639. SET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command, CO_LOCATION_NO_CMD);
  2640. //
  2641. // Capture the file name.
  2642. //
  2643. FRS_ASSERT(Length <= MAX_PATH*2);
  2644. CopyMemory(ChangeOrder->Cmd.FileName, FileName, Length);
  2645. ChangeOrder->Cmd.FileName[Length/2] = UNICODE_NULL;
  2646. ChangeOrder->UFileName.Length = Length;
  2647. ChangeOrder->Cmd.FileNameLength = Length;
  2648. //
  2649. // Set New and orig Replica fields to the replica.
  2650. //
  2651. ChangeOrder->OriginalReplica = Replica;
  2652. ChangeOrder->NewReplica = Replica;
  2653. ChangeOrder->Cmd.OriginalReplicaNum = ReplicaAddrToId(Replica);
  2654. ChangeOrder->Cmd.NewReplicaNum = ReplicaAddrToId(Replica);
  2655. //
  2656. // Set New and orig parent FID fields to the parent FID.
  2657. //
  2658. ChangeOrder->OriginalParentFid = *ParentFid;
  2659. ChangeOrder->NewParentFid = *ParentFid;
  2660. ChangeOrder->ParentFileReferenceNumber = *ParentFid;
  2661. ChangeOrder->FileReferenceNumber = *Fid;
  2662. //
  2663. // Init with data from the USN Record.
  2664. //
  2665. ChangeOrder->EntryCreateTime = CO_TIME_NOW(Replica->pVme);
  2666. ChangeOrder->Cmd.EventTime = UsnRecord->TimeStamp;
  2667. ChangeOrder->Cmd.JrnlFirstUsn = UsnRecord->Usn;
  2668. return ChangeOrder;
  2669. }
  2670. BOOL
  2671. JrnlMergeCoTest(
  2672. IN PVOLUME_MONITOR_ENTRY pVme,
  2673. IN PUNICODE_STRING UFileName,
  2674. IN PULONGLONG ParentFid,
  2675. IN ULONG StreamLastMergeSeqNum
  2676. )
  2677. /*++
  2678. Routine Description:
  2679. Check if a new Usn record can be merged with this change order.
  2680. If there is any reference to the file name in the Usn record stream
  2681. after the point where the last merge occurred then we return FALSE
  2682. indicating the merge is disallowed. The ptr to the QHashEntry is returned
  2683. (if it is found) so LastUseSequenceNumber can be updated.
  2684. Arguments:
  2685. pVme - ptr to the volume monitor entry (w/ name space table) for test.
  2686. UFileName - Unicode Filename for this file.
  2687. ParentFid - The parent file reference number for this file.
  2688. StreamLastMergeSeqNum - The Seq Num of last Usn Record merged into CO.
  2689. Return Value:
  2690. True if Merge is ok else false.
  2691. --*/
  2692. {
  2693. #undef DEBSUB
  2694. #define DEBSUB "JrnlMergeCoTest:"
  2695. ULONGLONG QuadHashValue;
  2696. ULONG StreamLastUseSeqNum;
  2697. PQHASH_ENTRY NstEntry;
  2698. CalcHashFidAndName(UFileName, ParentFid, &QuadHashValue);
  2699. NstEntry = QHashLookupLock(pVme->NameSpaceTable, &QuadHashValue);
  2700. if (NstEntry != NULL) {
  2701. StreamLastUseSeqNum = (ULONG)NstEntry->Flags;
  2702. if (StreamLastUseSeqNum > StreamLastMergeSeqNum) {
  2703. //
  2704. // There is a ref to this name in the Usn stream after
  2705. // point where the last record was merged with this CO.
  2706. // Can't merge this Usn Record.
  2707. //
  2708. return FALSE;
  2709. }
  2710. }
  2711. return TRUE;
  2712. }
  2713. ULONG
  2714. JrnlPurgeOldRenameWorker (
  2715. PQHASH_TABLE Table,
  2716. PQHASH_ENTRY BeforeNode,
  2717. PQHASH_ENTRY TargetNode,
  2718. PVOID Context
  2719. )
  2720. /*++
  2721. Routine Description:
  2722. This function is called thru QHashEnumerateTable() to clean out stale entries.
  2723. Arguments:
  2724. Table - the hash table being enumerated
  2725. BeforeNode -- ptr to the QhashEntry before the node of interest.
  2726. TargetNode -- ptr to the QhashEntry of interest.
  2727. Context - ptr to the USN to compare against.
  2728. Return Value:
  2729. FRS Status
  2730. --*/
  2731. {
  2732. #undef DEBSUB
  2733. #define DEBSUB "JrnlPurgeOldRenameWorker:"
  2734. USN PurgeUsn = *(USN *)Context;
  2735. PUSN_RECORD OldRenUsnRec;
  2736. OldRenUsnRec = (PUSN_RECORD) (TargetNode->Flags);
  2737. if (OldRenUsnRec == NULL) {
  2738. //
  2739. // All valid entries should point to a USN record but if not then
  2740. // just delete the qhash entry.
  2741. //
  2742. return FrsErrorDeleteRequested;
  2743. }
  2744. if (OldRenUsnRec->Usn < PurgeUsn) {
  2745. //
  2746. // This record is past the point of interest so clean it out.
  2747. //
  2748. OldRenUsnRec = FrsFree(OldRenUsnRec);
  2749. TargetNode->Flags = (ULONG_PTR) NULL;
  2750. //
  2751. // Tell QHashEnumerateTable() to delete the node and continue the enum.
  2752. //
  2753. return FrsErrorDeleteRequested;
  2754. }
  2755. return FrsErrorSuccess;
  2756. }
  2757. ULONG
  2758. JrnlPurgeNstWorker (
  2759. PQHASH_TABLE Table,
  2760. PQHASH_ENTRY BeforeNode,
  2761. PQHASH_ENTRY TargetNode,
  2762. PVOID Context
  2763. )
  2764. /*++
  2765. Routine Description:
  2766. This function is called thru QHashEnumerateTable() to clean out stale entries.
  2767. Arguments:
  2768. Table - the hash table being enumerated
  2769. BeforeNode -- ptr to the QhashEntry before the node of interest.
  2770. TargetNode -- ptr to the QhashEntry of interest.
  2771. Context - ptr to the Stream Sequence Number to compare against.
  2772. Return Value:
  2773. FRS Status
  2774. --*/
  2775. {
  2776. #undef DEBSUB
  2777. #define DEBSUB "JrnlPurgeNstWorker:"
  2778. ULONG StreamSeqNum = *(ULONG *)Context;
  2779. if ( (ULONG)(TargetNode->Flags) < StreamSeqNum) {
  2780. DPRINT5(4, "JrnlPurgeNstWorker - BeforeNode: %08x, Link: %08x,"
  2781. " Flags: %08x, Tag: %08x %08x, Data: %08x %08x\n",
  2782. BeforeNode, TargetNode->NextEntry, TargetNode->Flags,
  2783. PRINTQUAD(TargetNode->QKey), PRINTQUAD(TargetNode->QData));
  2784. //
  2785. // Tell QHashEnumerateTable() to delete the node and continue the enum.
  2786. //
  2787. return FrsErrorDeleteRequested;
  2788. }
  2789. return FrsErrorSuccess;
  2790. }
  2791. VOID
  2792. JrnlUpdateNst(
  2793. IN PVOLUME_MONITOR_ENTRY pVme,
  2794. IN PUNICODE_STRING UFileName,
  2795. IN PULONGLONG ParentFid,
  2796. IN ULONG StreamSequenceNumber
  2797. )
  2798. /*++
  2799. Routine Description:
  2800. Update the LastUseSequenceNumber in the Name Space Table.
  2801. If the entry is not present, create it.
  2802. Arguments:
  2803. pVme - ptr to the volume monitor entry (w/ name space table) for test.
  2804. UFileName - Unicode Filename for this file.
  2805. ParentFid - The parent file reference number for this file.
  2806. StreamLastMergeSeqNum - The Seq Num of last Usn Record merged into CO.
  2807. Return Value:
  2808. None.
  2809. --*/
  2810. {
  2811. #undef DEBSUB
  2812. #define DEBSUB "JrnlUpdateNst:"
  2813. ULONGLONG Qhv;
  2814. USN PurgeUsn;
  2815. PQHASH_ENTRY NstEntry;
  2816. ULONG LastFetched, LastCleaned;
  2817. CalcHashFidAndName(UFileName, ParentFid, &Qhv);
  2818. NstEntry = QHashLookupLock(pVme->NameSpaceTable, &Qhv);
  2819. if (NstEntry != NULL) {
  2820. NstEntry->Flags = StreamSequenceNumber;
  2821. } else {
  2822. //
  2823. // Name not found. Create a new entry.
  2824. //
  2825. QHashInsertLock(pVme->NameSpaceTable, &Qhv, &Qhv, StreamSequenceNumber);
  2826. }
  2827. //
  2828. // Every so often sweep the Name Space Table and clean out stale entries.
  2829. // By doing this as part of the Journal monitor thread we can avoid
  2830. // using locks on the NameSpaceTable since this is the only thread that
  2831. // touches it.
  2832. //
  2833. if ((StreamSequenceNumber & 127) == 0) {
  2834. LastFetched = pVme->StreamSequenceNumberFetched;
  2835. LastCleaned = pVme->StreamSequenceNumberClean;
  2836. if ((LastFetched > LastCleaned) &&
  2837. ((LastFetched - LastCleaned) > 100)) {
  2838. //
  2839. // Sweep the table and purge any entries with a Stream Sequence
  2840. // Number less than LastFetched since that CO is no longer in the
  2841. // process queue.
  2842. //
  2843. QHashEnumerateTable(pVme->NameSpaceTable,
  2844. JrnlPurgeNstWorker,
  2845. &LastFetched);
  2846. pVme->StreamSequenceNumberClean = LastFetched;
  2847. //
  2848. // Clean up stray entries in the Old Rename name table too.
  2849. //
  2850. PurgeUsn = pVme->LastUsnSavePoint;
  2851. QHashEnumerateTable(pVme->RenOldNameTable,
  2852. JrnlPurgeOldRenameWorker,
  2853. &PurgeUsn);
  2854. }
  2855. }
  2856. }
  2857. VOID
  2858. JrnlFilterUpdate(
  2859. IN PREPLICA CurrentReplica,
  2860. IN PUSN_RECORD UsnRecord,
  2861. IN ULONG LocationCmd,
  2862. IN PFILTER_TABLE_ENTRY OldParentFilterEntry,
  2863. IN PFILTER_TABLE_ENTRY NewParentFilterEntry
  2864. )
  2865. /*++
  2866. Routine Description:
  2867. Process a directory operation. Generate the change order(s) and update the
  2868. Filter table. This may involve multiple operations over a subtree.
  2869. It assumes it is being called with a USN directory change record and
  2870. that references have been taken on OldParentFilterEntry and
  2871. NewParentFilterEntry.
  2872. Arguments:
  2873. CurrentReplica - ptr to the Replica struct containing the directory now.
  2874. UsnRecord - ptr to the UsnRecord.
  2875. LocationCmd - The change order location command. (MOVEIN, MOVEOUT, ...)
  2876. OldParentFilterEntry - The filter entry for the directory's previous parent.
  2877. NewParentFilterEntry - The filter entry for the directory's current parent.
  2878. Return Value:
  2879. None.
  2880. --*/
  2881. {
  2882. #undef DEBSUB
  2883. #define DEBSUB "JrnlFilterUpdate:"
  2884. PGENERIC_HASH_TABLE FilterTable = CurrentReplica->pVme->FilterTable;
  2885. PFILTER_TABLE_ENTRY FilterEntry;
  2886. ULONG GStatus, WStatus;
  2887. ULONG Flags;
  2888. PULONGLONG FileID;
  2889. PREPLICA OriginalReplica;
  2890. CHANGE_ORDER_PARAMETERS Cop;
  2891. //
  2892. // Determine the file location command to use in the change order.
  2893. // First get the old parent file ID incase this was a rename.
  2894. //
  2895. FileID = &UsnRecord->FileReferenceNumber;
  2896. //
  2897. // If there is no old parent filter entry (Create, Delete, MOVEIN or NO_CMD)
  2898. // then the original replica is NULL.
  2899. //
  2900. OriginalReplica = (OldParentFilterEntry == NULL) ?
  2901. NULL : OldParentFilterEntry->Replica;
  2902. //
  2903. // Look for an entry in the Filter Table for this DIR and create a new
  2904. // one if needed.
  2905. //
  2906. GStatus = GhtLookup(FilterTable, FileID, TRUE, &FilterEntry);
  2907. if (GStatus == GHT_STATUS_SUCCESS) {
  2908. //
  2909. // For a create the entry could already be in the table. This could
  2910. // happen when a Replica Load inserts the directory and then we see the
  2911. // Journal Entry for the create later. If only the Create bit is set
  2912. // in the reason mask there is nothing for us to do.
  2913. //
  2914. if (UsnRecord->Reason == (USN_REASON_FILE_CREATE | USN_REASON_CLOSE)) {
  2915. DPRINT(4,"++ USN_REASON_FILE_CREATE: for dir with entry in table. skipping\n");
  2916. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  2917. return;
  2918. }
  2919. } else {
  2920. //
  2921. // Create a filter entry for this directory if it's a create or movein.
  2922. // A MoveIn is the same as a create dir since we need to create a filter
  2923. // table entry and only a single dir is involved. It is possible that
  2924. // the update process has already found the dir and added the filter
  2925. // entry. If so we generate the change order anyway since there may
  2926. // be other reason flags to consider. There is no original replica
  2927. // for a create or a rename.
  2928. //
  2929. if (CO_NEW_FILE(LocationCmd)) {
  2930. //
  2931. // The following returns with a reference on FilterEntry.
  2932. //
  2933. WStatus = JrnlAddFilterEntryFromUsn(CurrentReplica,
  2934. UsnRecord,
  2935. &FilterEntry);
  2936. if (!WIN_SUCCESS(WStatus)) {
  2937. DUMP_USN_RECORD2(3, UsnRecord, CurrentReplica->ReplicaNumber, LocationCmd);
  2938. DPRINT(1, "++ ERROR - JrnlAddFilterEntryFromUsn failed\n");
  2939. return;
  2940. }
  2941. } else {
  2942. //
  2943. // Note: touching a dir that was previously EXCLUDED fails to add filter entry
  2944. //
  2945. DUMP_USN_RECORD2(3, UsnRecord, CurrentReplica->ReplicaNumber, LocationCmd);
  2946. DPRINT(1, "++ Warning: Dir not found in Filter Table and not a CO_NEW_FILE, skipping\n");
  2947. return;
  2948. }
  2949. }
  2950. //
  2951. // Process the directory through the volume filter and generate the
  2952. // appropriate change orders.
  2953. //
  2954. //
  2955. // Setup the change order parameters.
  2956. //
  2957. // Original and current/new Replica Sets
  2958. // new parent FID.
  2959. // Usn Record triggering change order creation. (i.e. the op on root of
  2960. // the subtree).
  2961. // The location change command.
  2962. // Original and current/new parent filter entries of root filter entry
  2963. //
  2964. Cop.OriginalReplica = OriginalReplica;
  2965. Cop.NewReplica = CurrentReplica;
  2966. Cop.NewParentFid = UsnRecord->ParentFileReferenceNumber;
  2967. Cop.UsnRecord = UsnRecord;
  2968. Cop.NewLocationCmd = LocationCmd;
  2969. Cop.OrigParentFilterEntry = OldParentFilterEntry;
  2970. Cop.NewParentFilterEntry = NewParentFilterEntry;
  2971. //
  2972. // Process the subtree starting at the root filter entry of change.
  2973. //
  2974. WStatus = JrnlProcessSubTree(FilterEntry, &Cop);
  2975. //
  2976. // Drop the ref on the filter entry if it wasn't deleted.
  2977. //
  2978. if ((FilterEntry != NULL) &&
  2979. !((LocationCmd == CO_LOCATION_DELETE) ||
  2980. (LocationCmd == CO_LOCATION_MOVEOUT))) {
  2981. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  2982. }
  2983. return;
  2984. }
  2985. ULONG
  2986. JrnlProcessSubTree(
  2987. IN PFILTER_TABLE_ENTRY RootFilterEntry,
  2988. IN PCHANGE_ORDER_PARAMETERS Cop
  2989. )
  2990. /*++
  2991. Routine Description:
  2992. This function is called to build a change order parameter block and
  2993. enumerate through a filter subtree. It acquires the necessary locks
  2994. for the duration of the operation.
  2995. Arguments:
  2996. RootFilterEntry - The root of the filter subtree being operated on.
  2997. NULL if it doesn't yet exist (e.g. MOVEIN or CREATE).
  2998. Cop - Struct with the change order param data to pass down the subtree.
  2999. Return Value:
  3000. win32 status
  3001. --*/
  3002. {
  3003. #undef DEBSUB
  3004. #define DEBSUB "JrnlProcessSubTree:"
  3005. ULONG WStatus;
  3006. PGENERIC_HASH_TABLE FilterTable;
  3007. PVOLUME_MONITOR_ENTRY pVme;
  3008. PREPLICA NewReplica = Cop->NewReplica;
  3009. ULONG NewLocationCmd = Cop->NewLocationCmd;
  3010. PREPLICA OriginalReplica = Cop->OriginalReplica;
  3011. if (NewLocationCmd == CO_LOCATION_MOVEOUT) {
  3012. pVme = OriginalReplica->pVme;
  3013. } else {
  3014. pVme = NewReplica->pVme;
  3015. }
  3016. FilterTable = pVme->FilterTable;
  3017. //
  3018. // Get the change order process list lock for the volume.
  3019. //
  3020. FrsRtlAcquireListLock(&pVme->ChangeOrderList);
  3021. //
  3022. // dispatch on new location command.
  3023. // Get locks and enumerate subtree top down or bottom up.
  3024. //
  3025. switch (NewLocationCmd) {
  3026. case CO_LOCATION_NO_CMD:
  3027. //
  3028. // Even though there is no location change. There could still be a
  3029. // dir related content change. So process like a create that the
  3030. // update process got to first.
  3031. //
  3032. case CO_LOCATION_CREATE:
  3033. case CO_LOCATION_MOVEIN:
  3034. case CO_LOCATION_MOVEIN2:
  3035. //
  3036. // Create a change order for it. Not really a subtree operation.
  3037. // A MoveIn is the same as a create dir since we need to create a filter
  3038. // table entry and only a single dir is involved. It is possible that
  3039. // the update process has already found the dir and added the filter
  3040. // entry. If so we generate the change order anyway since there may
  3041. // be other reason flags to consider. There is no original replica
  3042. // for a create or a MOVEIN. The caller sets original replica to
  3043. // new replica and has created the filter entry.
  3044. //
  3045. // Bump the ref count to keep the count in sync with the path through
  3046. // JrnlEnumerateFilterTreexx().
  3047. //
  3048. INCREMENT_FILTER_REF_COUNT(RootFilterEntry);
  3049. WStatus = JrnlProcessSubTreeEntry(FilterTable, RootFilterEntry, Cop);
  3050. DPRINT_WS(0, "++ Error - failed to add change order for dir create:", WStatus);
  3051. break;
  3052. case CO_LOCATION_DELETE:
  3053. case CO_LOCATION_MOVEDIR:
  3054. //
  3055. // Create change order for the directory delete and delete filter entry.
  3056. // Not really a subtree operation since the dir can have no children
  3057. // when it's deleted.
  3058. // If the operation is MOVEDIR then JrnlProcessSubTreeEntry() will
  3059. // change the parent dir in the filter entry and put it on the child
  3060. // list of the new parent.
  3061. //
  3062. // Bump the ref count to keep the count in sync with the path through
  3063. // JrnlEnumerateFilterTreexx().
  3064. //
  3065. INCREMENT_FILTER_REF_COUNT(RootFilterEntry);
  3066. JrnlAcquireChildLock(NewReplica);
  3067. WStatus = JrnlProcessSubTreeEntry(FilterTable, RootFilterEntry, Cop);
  3068. DPRINT_WS(0, "++ Error - failed to add change order for dir create:", WStatus);
  3069. JrnlReleaseChildLock(NewReplica);
  3070. break;
  3071. case CO_LOCATION_MOVEOUT:
  3072. //
  3073. // An entire subtree is renamed out of the replica tree.
  3074. //
  3075. // Get the lock on the filter entry child list for this replica.
  3076. // Walk the subtree bottom up, creating the change orders for the
  3077. // MOVEOUT and deleting the filter entries at the same time.
  3078. // Drop the child list lock.
  3079. //
  3080. JrnlAcquireChildLock(OriginalReplica);
  3081. WStatus = JrnlEnumerateFilterTreeBU(FilterTable,
  3082. RootFilterEntry,
  3083. JrnlProcessSubTreeEntry,
  3084. Cop);
  3085. JrnlReleaseChildLock(OriginalReplica);
  3086. DPRINT_WS(0, "++ Error - failed to add change order for dir MOVEOUT:", WStatus);
  3087. break;
  3088. case CO_LOCATION_MOVERS:
  3089. //
  3090. // Get the lock on the filter entry child list for both this replica
  3091. // and the new replica set.
  3092. // Walk the subtree Top-Down, creating the change orders for the MOVERS.
  3093. // Drop the child list locks.
  3094. //
  3095. JrnlAcquireChildLockPair(OriginalReplica, NewReplica);
  3096. WStatus = JrnlEnumerateFilterTreeTD(FilterTable,
  3097. RootFilterEntry,
  3098. JrnlProcessSubTreeEntry,
  3099. Cop);
  3100. JrnlReleaseChildLockPair(OriginalReplica, NewReplica);
  3101. DPRINT_WS(0, "++ Error - failed to add change order for dir MOVERS:", WStatus);
  3102. break;
  3103. default:
  3104. DPRINT(0, "++ ERROR - Invalid NewLocationCmd arg\n");
  3105. FRS_ASSERT(!"JrnlProcessSubTree: Invalid NewLocationCmd");
  3106. } // end switch
  3107. //
  3108. // Release the volume change order lock.
  3109. //
  3110. FrsRtlReleaseListLock(&pVme->ChangeOrderList);
  3111. return WStatus;
  3112. }
  3113. ULONG
  3114. JrnlProcessSubTreeEntry(
  3115. PGENERIC_HASH_TABLE FilterTable,
  3116. PVOID Buffer,
  3117. PVOID Context
  3118. )
  3119. /*++
  3120. Routine Description:
  3121. This function is called thru JrnlEnumerateFilterTreexx() to process a
  3122. Filter entry and submit a change order for same.
  3123. After the change order is generated the filter table entry is updated
  3124. as needed to reflect a new parent or a new replica set or a name change.
  3125. All required locks are acquired by the caller of the enumerate function.
  3126. This includes one or two filter entry child locks and the change order
  3127. list lock.
  3128. The caller has taken out a reference on the FilterEntry (Buffer). We
  3129. retire that reference here.
  3130. Arguments:
  3131. FilterTable - the hash table being enumerated (to lookup parent entry).
  3132. Buffer - a ptr to a FILTER_TABLE_ENTRY
  3133. Context - A pointer to the change order parameter struct.
  3134. Return Value:
  3135. ERROR_SUCCESS to keep the enumeration going.
  3136. Any other status stops the enumeration and returns this value to the
  3137. caller of the enumerate function.
  3138. --*/
  3139. {
  3140. #undef DEBSUB
  3141. #define DEBSUB "JrnlProcessSubTreeEntry:"
  3142. UNICODE_STRING UFileName;
  3143. ULONG WStatus, WStatus1;
  3144. ULONG GStatus;
  3145. BOOL Root;
  3146. PCHANGE_ORDER_ENTRY ChangeOrder;
  3147. PUSN_RECORD UsnRecord;
  3148. ULONG StreamSeqNum;
  3149. ULONG LocationCmd;
  3150. PVOLUME_MONITOR_ENTRY pVme;
  3151. PFILTER_TABLE_ENTRY OrigParentFilterEntry;
  3152. PFILTER_TABLE_ENTRY NewParentFilterEntry;
  3153. PFILTER_TABLE_ENTRY FE, FEList[8];
  3154. ULONG FEx;
  3155. PWCHAR FileName;
  3156. PFILTER_TABLE_ENTRY FilterEntry = (PFILTER_TABLE_ENTRY) Buffer;
  3157. PCHANGE_ORDER_PARAMETERS Cop = (PCHANGE_ORDER_PARAMETERS) Context;
  3158. USHORT Length;
  3159. //
  3160. // The USN record that triggered the SubTree operation
  3161. //
  3162. UsnRecord = Cop->UsnRecord;
  3163. LocationCmd = Cop->NewLocationCmd;
  3164. OrigParentFilterEntry = Cop->OrigParentFilterEntry;
  3165. NewParentFilterEntry = Cop->NewParentFilterEntry;
  3166. pVme = FilterEntry->Replica->pVme;
  3167. //
  3168. // If the FID in the UsnRecord matches the FID in the Filter Entry then
  3169. // this operation is on the root of the subtree and is different than if
  3170. // it was on a child.
  3171. //
  3172. Root = (UsnRecord->FileReferenceNumber == FilterEntry->DFileID);
  3173. #if 0
  3174. // For now no merging of the DIR change orders. If this proves to be a perf
  3175. // problem then need to add the code check for name conflicts.
  3176. //
  3177. // Check for a pending change order for this Dir entry. If the lookup
  3178. // succeeds the ref count is decremented by JrnlUpdateChangeOrder because
  3179. // it may end up evaporating the change order.
  3180. //
  3181. GStatus = GhtLookup(pVme->ChangeOrderTable,
  3182. &FilterEntry->DFileID,
  3183. TRUE,
  3184. &ChangeOrder);
  3185. if (GStatus == GHT_STATUS_SUCCESS) {
  3186. //
  3187. // A pending change order exists, Update it.
  3188. //
  3189. CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Update", UsnRecord->Reason);
  3190. } else {
  3191. #endif
  3192. //
  3193. // No pending change order exists for this Dir. Create one.
  3194. //
  3195. // Since multiple change orders are derived from a single Journal Usn
  3196. // how do we decide to update our stable copy of the Journal USN?
  3197. // The stable copy means the current one we are working on and may not
  3198. // have finished.
  3199. if (Root) {
  3200. //
  3201. // If the root of the sub-tree then name comes from USN Record.
  3202. //
  3203. FileName = UsnRecord->FileName;
  3204. Length = UsnRecord->FileNameLength;
  3205. } else {
  3206. //
  3207. // If not root of sub-tree then name comes from filter entry and
  3208. // JrnlFirstUsn is set to zero.
  3209. //
  3210. FileName = FilterEntry->DFileName;
  3211. Length = (USHORT)(2*wcslen(FilterEntry->DFileName));
  3212. }
  3213. //
  3214. // Create the change order.
  3215. //
  3216. ChangeOrder = JrnlCreateCo(FilterEntry->Replica,
  3217. &FilterEntry->DFileID,
  3218. &FilterEntry->DParentFileID,
  3219. UsnRecord,
  3220. TRUE, // DIR CO
  3221. FileName,
  3222. Length);
  3223. //
  3224. // Make a new stream sequence number and save it in the CO.
  3225. // Stick it in the CO Guid so it appears in the log file.
  3226. // It gets overwritten later with real CO Guid when the CO issues.
  3227. //
  3228. StreamSeqNum = ++pVme->StreamSequenceNumber;
  3229. ChangeOrder->StreamLastMergeSeqNum = StreamSeqNum;
  3230. ChangeOrder->Cmd.ChangeOrderGuid.Data1 = StreamSeqNum;
  3231. ChangeOrder->OriginalParentFid = FilterEntry->DParentFileID;
  3232. if (Root) {
  3233. CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Root Create",
  3234. UsnRecord->Reason);
  3235. } else {
  3236. ChangeOrder->Cmd.JrnlFirstUsn = (USN) 0;
  3237. CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Subdir Create",
  3238. UsnRecord->Reason);
  3239. }
  3240. #if 0
  3241. }
  3242. #endif
  3243. //
  3244. // Update the Name Space Table with the current stream sequence number.
  3245. // Since this is a dir subtree entries are made for all parents implicitly
  3246. // until we get to the root. The root needs to have its parent dir added
  3247. // to the name space table. The table below shows what entries are made
  3248. // depending on the file operation and whether or not this call is for
  3249. // the root entry of the subtree operation.
  3250. //
  3251. // Opn Make Entry using Make Entry using
  3252. // orig name/parent Current name/parent
  3253. // info info (1)
  3254. //
  3255. // Movein No Yes
  3256. // Moveout Yes No
  3257. // Movedir Yes Yes
  3258. // Movers Yes Yes
  3259. //
  3260. // SimpleRen Yes Yes
  3261. // Create No Yes
  3262. // Delete No Yes
  3263. // Update No Yes
  3264. //
  3265. // The last four entries affect single dirs only while the first four
  3266. // can apply to subtrees.
  3267. // (1) If working in a single dir or the root of a sub-tree the current
  3268. // name/parent info comes from the USN record.
  3269. //
  3270. FEx = 0;
  3271. if (Root) {
  3272. if (LocationCmd != CO_LOCATION_MOVEOUT) {
  3273. //
  3274. // Update Curr File (Where the USN record says file went)
  3275. // Update New parent (the parent dir where file went)
  3276. //
  3277. FrsSetUnicodeStringFromRawString(&UFileName,
  3278. UsnRecord->FileNameLength,
  3279. UsnRecord->FileName,
  3280. UsnRecord->FileNameLength);
  3281. JrnlUpdateNst(pVme,
  3282. &UFileName,
  3283. &UsnRecord->ParentFileReferenceNumber,
  3284. StreamSeqNum);
  3285. FRS_ASSERT(NewParentFilterEntry != NULL);
  3286. FEList[FEx++] = NewParentFilterEntry;
  3287. }
  3288. if (CO_MOVE_OUT_RS_OR_DIR(LocationCmd)) {
  3289. //
  3290. // Update with old name/parent of root dir.
  3291. // (Where the Original parent Filter entry says it was.)
  3292. // Update orig parent of root dir (the parent dir where file came from)
  3293. //
  3294. FEList[FEx++] = FilterEntry;
  3295. FRS_ASSERT(OrigParentFilterEntry != NULL);
  3296. FEList[FEx++] = OrigParentFilterEntry;
  3297. }
  3298. } else {
  3299. //
  3300. // Not the root so update using current name/parent of FilterEntry.
  3301. //
  3302. FEList[FEx++] = FilterEntry;
  3303. }
  3304. //
  3305. // Apply the name space table updates.
  3306. //
  3307. while (FEx != 0) {
  3308. FE = FEList[--FEx];
  3309. JrnlUpdateNst(pVme, &FE->UFileName, &FE->DParentFileID, StreamSeqNum);
  3310. }
  3311. //
  3312. // Update or install the change order.
  3313. //
  3314. WStatus = JrnlUpdateChangeOrder(ChangeOrder,
  3315. Cop->NewReplica,
  3316. Cop->NewParentFid,
  3317. Cop->NewLocationCmd,
  3318. (Root ? UsnRecord : NULL));
  3319. //
  3320. // Update the filter entry if necessary.
  3321. //
  3322. //
  3323. // See if the filename part is different and, if so, copy it.
  3324. // Only applies to the Root entry of the subtree.
  3325. // Limit it to MAX_PATH characters.
  3326. //
  3327. if (Root) {
  3328. if (UsnRecord->FileNameLength > 2*MAX_PATH) {
  3329. UsnRecord->FileNameLength = 2*MAX_PATH;
  3330. }
  3331. FrsAllocUnicodeString(&FilterEntry->UFileName,
  3332. FilterEntry->DFileName,
  3333. UsnRecord->FileName,
  3334. UsnRecord->FileNameLength);
  3335. }
  3336. switch (Cop->NewLocationCmd) {
  3337. case CO_LOCATION_CREATE:
  3338. case CO_LOCATION_MOVEIN:
  3339. case CO_LOCATION_MOVEIN2:
  3340. case CO_LOCATION_NO_CMD:
  3341. //
  3342. // On creates and movein the caller has created the filter table
  3343. // entry already (to pass it to this fcn).
  3344. //
  3345. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  3346. break;
  3347. case CO_LOCATION_DELETE:
  3348. case CO_LOCATION_MOVEOUT:
  3349. //
  3350. // Now delete the entry from the Filter Table. If this is the root
  3351. // then first drop the ref count by one to compensate for the first
  3352. // lookup in JrnlFilterUpdate() where all this started.
  3353. // The second ref was taken through the Enumerate list function.
  3354. //
  3355. if (Root) {
  3356. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  3357. }
  3358. WStatus = JrnlDeleteDirFilterEntry(FilterTable, NULL, FilterEntry);
  3359. if (!WIN_SUCCESS(WStatus)) {
  3360. DPRINT(0, "++ ERROR - Dir entry delete failed.\n");
  3361. }
  3362. break;
  3363. case CO_LOCATION_MOVERS:
  3364. //
  3365. // Replica set changed. Update the filter entry.
  3366. //
  3367. FilterEntry->Replica = Cop->NewReplica;
  3368. FilterEntry->DReplicaNumber = Cop->NewReplica->ReplicaNumber;
  3369. /* FALL THRU INTENDED */
  3370. case CO_LOCATION_MOVEDIR:
  3371. //
  3372. // Directory changed. Applies to root on both MOVEDIR and MOVERS.
  3373. // Update the parent file ID in the filter entry and
  3374. // Put the filter entry on the childlist of the new parent.
  3375. //
  3376. if (Root) {
  3377. FilterEntry->DParentFileID = UsnRecord->ParentFileReferenceNumber;
  3378. if (FilterEntry->ChildEntry.Flink == NULL) {
  3379. DPRINT(0, "++ ERROR - Dir entry not on child list\n");
  3380. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
  3381. FRS_ASSERT(!"Dir entry not on child list");
  3382. }
  3383. FrsRemoveEntryList(&FilterEntry->ChildEntry);
  3384. FilterEntry->ChildEntry.Flink = NULL;
  3385. WStatus1 = (ULONG)JrnlFilterLinkChild(FilterTable,
  3386. FilterEntry,
  3387. FilterEntry->Replica);
  3388. if (!WIN_SUCCESS(WStatus1)) {
  3389. DPRINT(0, "++ ERROR - JrnlFilterLinkChild Failed\n");
  3390. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
  3391. FRS_ASSERT(!"JrnlFilterLinkChild Failed");
  3392. }
  3393. }
  3394. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  3395. break;
  3396. default:
  3397. DPRINT1(0, "++ Error - switch arg out of range: %d\n", Cop->NewLocationCmd);
  3398. FRS_ASSERT(!"NewLocationCmd invalid");
  3399. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  3400. }
  3401. //
  3402. // Return the change order status.
  3403. //
  3404. return WStatus;
  3405. }
  3406. ULONG
  3407. JrnlUpdateChangeOrder(
  3408. IN PCHANGE_ORDER_ENTRY ChangeOrder,
  3409. IN PREPLICA NewReplica,
  3410. IN ULONGLONG NewParentFid,
  3411. IN ULONG NewLocationCmd,
  3412. IN PUSN_RECORD UsnRecord
  3413. )
  3414. /*++
  3415. Routine Description:
  3416. This function updates an existing directory change order that is still
  3417. pending in the Replica's change order process list or inserts a new change
  3418. order that has been prepared as described below.
  3419. There are two components to a change order, content and file location.
  3420. A given USN record could have changes to both parts.
  3421. The content component is updated by merging the reason flags from the
  3422. UsnRecord and capturing relevant parameters such as the attributes and
  3423. FileName.
  3424. The location update component is more complicated and uses a state table,
  3425. ChangeOrderLocationStateTable[], to manage the update. The state table
  3426. determines when we update the parent directory or the replica set in the
  3427. change order. This occurs when a directory is renamed. The states in
  3428. the table also correspond to the change order location command to be used.
  3429. The change order may move from one replica set to another. This routine
  3430. assumes that the caller has acquired the change order process list locks
  3431. for both the source and dest replicas. This is the only case where we can
  3432. pull it off the list because there could be a dependent entry that follows
  3433. it in the change order list and an error could result if the update
  3434. process saw the dependent entry first. (Probably only an issue for
  3435. directory creates).
  3436. The Source Change order process list lock is needed for all Location Commands.
  3437. The Destination Change order process list lock is needed for:
  3438. CO_LOCATION_MOVEIN, CO_LOCATION_MOVERS commands.
  3439. The change order may be evaporated in certain cases. If not this routine
  3440. decrements the reference count on the change order before it returns.
  3441. This routine can be called with a new change order but the caller must
  3442. pre-init the change order correctly:
  3443. 1. Bump the initial ref count by 1 (since that is what lookup does).
  3444. 2. The command flag CO_FLAG_ONLIST should be clear so we don't try
  3445. to pull it off a list.
  3446. 3. The length field in the unicode string UFileName must be 0 to
  3447. capture the file name.
  3448. 4. Set New and orig Replica fields to the original replica.
  3449. 5. Set New and orig parent FID fields to the original parent FID.
  3450. 6. The command flag CO_FLAG_LOCATION_CMD should be clear.
  3451. 7. The FileReferenceNumber must be set to the file ID of the file/dir.
  3452. The File Id is the index into the change order table.
  3453. This routine also updates the parent file ID table so the parent File ID
  3454. tracks on renames and the entry is deleted if the change order is
  3455. evaporated or the new location command specifies delete.
  3456. Arguments:
  3457. ChangeOrder - The existing change order to be updated.
  3458. NewReplica - The destination replica the directory is renamed into.
  3459. NewparentFid - The destination parent the directory is renamed into.
  3460. NewLocationCmd - The new location command applied to the directory.
  3461. UsnRecord - The NTFS USN record describing the change. When walking a
  3462. through a sub-tree this will be NULL for all directories
  3463. except for the root.
  3464. Return Value:
  3465. Win32 status.
  3466. --*/
  3467. {
  3468. #undef DEBSUB
  3469. #define DEBSUB "JrnlUpdateChangeOrder:"
  3470. PREPLICA Replica;
  3471. ULONG Control;
  3472. ULONG Op;
  3473. ULONG PreviousState;
  3474. ULONG Reason = 0;
  3475. BOOL EvapFlag = FALSE;
  3476. ULONG GStatus;
  3477. ULONG NewState;
  3478. PVOLUME_MONITOR_ENTRY pVme;
  3479. BOOL SubTreeRoot;
  3480. ULONG WStatus;
  3481. BOOL CoUpdate;
  3482. PCHANGE_ORDER_ENTRY NewParentCo;
  3483. ULONG LocationCmd;
  3484. //
  3485. // Only update parent file IDs on the sub tree root. This is the dir
  3486. // that the USN Record was generated for in the dir rename.
  3487. // For any subordinate dirs the caller must supply NULL.
  3488. // If a changeorder comes in already on the process list then it must
  3489. // be an update.
  3490. //
  3491. SubTreeRoot = (UsnRecord != NULL);
  3492. CoUpdate = CO_FLAG_ON(ChangeOrder, CO_FLAG_ONLIST);
  3493. //
  3494. // If a USN record is supplied then check for any content flags set in the
  3495. // USN reason mask. If so then set the content flag in the change order.
  3496. // When walking a subtree the USN Record is non-null only for the root since
  3497. // the content changes don't apply to the children.
  3498. //
  3499. if (SubTreeRoot) {
  3500. Reason = UsnRecord->Reason;
  3501. if (Reason & CO_CONTENT_MASK) {
  3502. SET_CO_FLAG(ChangeOrder, CO_FLAG_CONTENT_CMD);
  3503. //
  3504. // Update the content portion of the change order. Merge in the
  3505. // reason mask from the Usn Record.
  3506. //
  3507. ChangeOrder->Cmd.ContentCmd |= Reason;
  3508. }
  3509. //
  3510. // Capture the name in the case of rename, create and delete.
  3511. // Limit it to MAX_PATH characters.
  3512. //
  3513. // if ((Reason & CO_LOCATION_MASK) || (ChangeOrder->UFileName.Length == 0)) {
  3514. if ((Reason & USN_REASON_RENAME_NEW_NAME) ||
  3515. (ChangeOrder->UFileName.Length == 0)) {
  3516. if (UsnRecord->FileNameLength > 2*MAX_PATH) {
  3517. UsnRecord->FileNameLength = 2*MAX_PATH;
  3518. }
  3519. FrsAllocUnicodeString(&ChangeOrder->UFileName,
  3520. ChangeOrder->Cmd.FileName,
  3521. UsnRecord->FileName,
  3522. UsnRecord->FileNameLength);
  3523. ChangeOrder->Cmd.FileNameLength = UsnRecord->FileNameLength;
  3524. }
  3525. //
  3526. // Capture most recent file attributes.
  3527. // In the case where we are updating a pending CO,
  3528. // we would miss a series of ops on the same file such as
  3529. // set the hidden bit, close, delete the system bit, close, ...
  3530. //
  3531. ChangeOrder->Cmd.FileAttributes = UsnRecord->FileAttributes;
  3532. //
  3533. // Update to the latest USN contributing to this change order.
  3534. //
  3535. ChangeOrder->Cmd.JrnlUsn = UsnRecord->Usn;
  3536. }
  3537. //
  3538. // Check if there is a new location command. If not go insert the change order.
  3539. //
  3540. if (NewLocationCmd == CO_LOCATION_NO_CMD) {
  3541. goto INSERT_CHANGE_ORDER;
  3542. }
  3543. //
  3544. // Update the parent file ID table based on the new location command.
  3545. //
  3546. if (CO_NEW_FILE(NewLocationCmd)) {
  3547. //
  3548. // Add a new entry for the new file in the R.S.
  3549. //
  3550. ChangeOrder->ParentFileReferenceNumber = NewParentFid;
  3551. GStatus = QHashInsert(NewReplica->pVme->ParentFidTable,
  3552. &ChangeOrder->FileReferenceNumber,
  3553. &NewParentFid,
  3554. NewReplica->ReplicaNumber,
  3555. FALSE);
  3556. if (GStatus != GHT_STATUS_SUCCESS ) {
  3557. DPRINT1(0, "++ QHashInsert error: %d\n", GStatus);
  3558. }
  3559. } else
  3560. if ((NewLocationCmd == CO_LOCATION_DELETE) ||
  3561. (NewLocationCmd == CO_LOCATION_MOVEOUT)) {
  3562. //
  3563. // File is gone. Remove the entry.
  3564. //
  3565. GStatus = QHashDelete(NewReplica->pVme->ParentFidTable,
  3566. &ChangeOrder->FileReferenceNumber);
  3567. if (GStatus != GHT_STATUS_SUCCESS ) {
  3568. DPRINT1(0, "++ QHashDelete error: %d\n", GStatus);
  3569. }
  3570. } else
  3571. if (CO_MOVE_RS_OR_DIR(NewLocationCmd)) {
  3572. //
  3573. // File changed parents. Update the entry for subtree root only.
  3574. //
  3575. if (SubTreeRoot) {
  3576. ChangeOrder->ParentFileReferenceNumber = NewParentFid;
  3577. GStatus = QHashUpdate(NewReplica->pVme->ParentFidTable,
  3578. &ChangeOrder->FileReferenceNumber,
  3579. &NewParentFid,
  3580. 0);
  3581. if (GStatus != GHT_STATUS_SUCCESS ) {
  3582. DPRINT1(0, "++ QHashUpdate error: %d\n", GStatus);
  3583. }
  3584. }
  3585. } else {
  3586. DPRINT1(0, "++ ERROR - Invalid new location command: %d\n", NewLocationCmd);
  3587. }
  3588. //
  3589. // Update the location component of the change order. Fetch the Control
  3590. // DWORD from the table based on the pending command and the new command
  3591. // then perform the specified operation sequence. If the pending change
  3592. // order was for a content change then there is no prior location command.
  3593. // Check for this.
  3594. //
  3595. // Caller has acquired change order process lock for both current and
  3596. // new Replica Sets as appropriate.
  3597. //
  3598. if (CO_FLAG_ON(ChangeOrder, CO_FLAG_LOCATION_CMD)) {
  3599. PreviousState = GET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command);
  3600. } else {
  3601. PreviousState = NSNoLocationCmd;
  3602. SET_CO_FLAG(ChangeOrder, CO_FLAG_LOCATION_CMD);
  3603. }
  3604. Control = ChangeOrderLocationStateTable[PreviousState][NewLocationCmd].u1.UlongOpFields;
  3605. DPRINT5(5,"++ Old state: %s (%d), Input cmd: %s (%d), Ctl Wd: %08x\n",
  3606. CoLocationNames[PreviousState], PreviousState,
  3607. CoLocationNames[NewLocationCmd], NewLocationCmd,
  3608. Control);
  3609. if (Control == 0) {
  3610. DPRINT2(0, "++ ERROR - Invalid transition. Pending: %d New: %d\n",
  3611. PreviousState, NewLocationCmd);
  3612. FRS_ASSERT(!"Invalid CO Location cmd transition-1");
  3613. goto ERROR_RETURN;
  3614. }
  3615. while (Control != 0) {
  3616. Op = Control & 0x0000000F;
  3617. Control = Control >> 4;
  3618. switch (Op) {
  3619. //
  3620. // Done.
  3621. //
  3622. case OpInval:
  3623. DPRINT5(0,"++ Error - Invalid state transition - Old state: %s (%d), Input cmd: %s (%d), Ctl Wd: %08x\n",
  3624. CoLocationNames[PreviousState], PreviousState,
  3625. CoLocationNames[NewLocationCmd], NewLocationCmd,
  3626. Control);
  3627. FRS_ASSERT(!"Invalid CO Location cmd transition-2");
  3628. Control = 0;
  3629. break;
  3630. //
  3631. // Evaporate the pending change order. It should be on the process
  3632. // list associated with the NewReplica. THis should never happen
  3633. // if the previous state is NSNoLocationCmd.
  3634. //
  3635. case OpEvap:
  3636. //
  3637. // Increment the CO Evaporated Counter
  3638. //
  3639. PM_INC_CTR_REPSET(NewReplica, COEvaporated, 1);
  3640. DPRINT(5, "++ OpEvap\n");
  3641. pVme = ChangeOrder->NewReplica->pVme;
  3642. FRS_ASSERT(PreviousState != NSNoLocationCmd);
  3643. FRS_ASSERT(!IsListEmpty(&ChangeOrder->ProcessList));
  3644. FrsRtlRemoveEntryQueueLock(&pVme->ChangeOrderList,
  3645. &ChangeOrder->ProcessList);
  3646. DECREMENT_CHANGE_ORDER_REF_COUNT(ChangeOrder);
  3647. DROP_CO_CXTION_COUNT(ChangeOrder->NewReplica, ChangeOrder, ERROR_SUCCESS);
  3648. CHANGE_ORDER_TRACE(3, ChangeOrder, "Local Co OpEvap");
  3649. DEC_LOCAL_CO_QUEUE_COUNT(ChangeOrder->NewReplica);
  3650. //
  3651. // Delete the entry from the Change Order Table. It should be in
  3652. // the Change order table assoicated with NewReplica. The ref
  3653. // count should be 2 since the caller did a lookup.
  3654. //
  3655. FRS_ASSERT(ChangeOrder->HashEntryHeader.ReferenceCount == 2);
  3656. GStatus = GhtDeleteEntryByAddress(pVme->ChangeOrderTable,
  3657. ChangeOrder,
  3658. TRUE);
  3659. if (GStatus != GHT_STATUS_SUCCESS) {
  3660. DPRINT(0, "++ ERROR - GhtDeleteEntryByAddress failed.\n");
  3661. FRS_PRINT_TYPE(0, ChangeOrder);
  3662. FRS_ASSERT(!"JrnlUpdateCO: CO Table GhtDeleteEntryByAddress failed");
  3663. goto ERROR_RETURN;
  3664. }
  3665. EvapFlag = TRUE;
  3666. break;
  3667. //
  3668. // Update the New Replica Set
  3669. //
  3670. case OpNRs:
  3671. DPRINT(5, "++ OpNRs\n");
  3672. //
  3673. // Update the parent dir on the subtree root and the replica ID
  3674. // on all change orders.
  3675. //
  3676. ChangeOrder->NewReplica = NewReplica;
  3677. /* FALL THRU INTENDED */
  3678. //
  3679. // Update the New Parent Directory on the subtree root only.
  3680. //
  3681. case OpNDir:
  3682. if (Op == OpNDir) {DPRINT(5, "++ OpNDir\n");}
  3683. if (SubTreeRoot) {
  3684. ChangeOrder->NewParentFid = NewParentFid;
  3685. if (CoUpdate) {
  3686. //
  3687. // See if there is a pending change order on the new parent.
  3688. // If there is and it is a create that happens after this
  3689. // change order then move this updated CO to the end of the
  3690. // list so the Parent Create is done first. We do this by
  3691. // removing it from the list and letting the insert code put
  3692. // it back on at the end with a new VSN.
  3693. //
  3694. pVme = ChangeOrder->NewReplica->pVme;
  3695. GStatus = GhtLookup(pVme->ChangeOrderTable,
  3696. &NewParentFid,
  3697. TRUE,
  3698. &NewParentCo);
  3699. if ((GStatus == GHT_STATUS_SUCCESS) &&
  3700. (NewParentCo->Cmd.FrsVsn > ChangeOrder->Cmd.FrsVsn)){
  3701. FRS_ASSERT(!IsListEmpty(&ChangeOrder->ProcessList));
  3702. FrsRtlRemoveEntryQueueLock(&pVme->ChangeOrderList,
  3703. &ChangeOrder->ProcessList);
  3704. DECREMENT_CHANGE_ORDER_REF_COUNT(ChangeOrder);
  3705. DROP_CO_CXTION_COUNT(ChangeOrder->NewReplica,
  3706. ChangeOrder,
  3707. ERROR_SUCCESS);
  3708. CLEAR_CO_FLAG(ChangeOrder, CO_FLAG_ONLIST);
  3709. CHANGE_ORDER_TRACE(3, ChangeOrder, "Local Co OpNDir");
  3710. DEC_LOCAL_CO_QUEUE_COUNT(ChangeOrder->NewReplica);
  3711. GhtDereferenceEntryByAddress(pVme->ChangeOrderTable,
  3712. NewParentCo,
  3713. TRUE);
  3714. }
  3715. }
  3716. }
  3717. break;
  3718. //
  3719. // Update the State / Command.
  3720. //
  3721. case OpNSt:
  3722. NewState = Control & 0x0000000F;
  3723. DPRINT2(5, "++ OpNst: %s (%d)\n", CoLocationNames[NewState], NewState);
  3724. SET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command, NewState);
  3725. Control = Control >> 4;
  3726. break;
  3727. //
  3728. // The table is messed up.
  3729. //
  3730. default:
  3731. DPRINT1(0, "++ Error - Invalid dispatch operation: %d\n", Op);
  3732. FRS_ASSERT(!"Invalid CO dispatch operation");
  3733. goto ERROR_RETURN;
  3734. }
  3735. }
  3736. INSERT_CHANGE_ORDER:
  3737. //
  3738. // If the change order hasn't been deleted then decrement the ref count
  3739. // to balance the Caller's lookup. If the change order is not on a process
  3740. // list because it is new or it switched replica sets then put it on the
  3741. // target list.
  3742. //
  3743. WStatus = ERROR_SUCCESS;
  3744. if (!EvapFlag) {
  3745. Replica = ChangeOrder->NewReplica;
  3746. pVme = Replica->pVme;
  3747. if (!CO_FLAG_ON(ChangeOrder, CO_FLAG_ONLIST)) {
  3748. //
  3749. // No reason to age deletes
  3750. //
  3751. if (CO_FLAG_ON(ChangeOrder, CO_FLAG_LOCATION_CMD) &&
  3752. (GET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command) == CO_LOCATION_DELETE)) {
  3753. ChangeOrder->TimeToRun = CO_TIME_NOW(pVme);
  3754. } else {
  3755. ChangeOrder->TimeToRun = CO_TIME_TO_RUN(pVme);
  3756. }
  3757. //
  3758. // Generate a new Volume Sequnce Number for the change order since
  3759. // it gets sent to the end of the new R.S. process list.
  3760. // The change order VSNs must be kept monotonically increasing
  3761. // within a replica set for change order dampening to work.
  3762. //
  3763. NEW_VSN(pVme, &ChangeOrder->Cmd.FrsVsn);
  3764. SET_CO_FLAG(ChangeOrder, CO_FLAG_LOCALCO);
  3765. //
  3766. // Entry already in Aging table if its a CO update. If this is a
  3767. // duplicate entry for the same FID (because the merge was
  3768. // disallowed then put this entry at the end of the duplicate list.
  3769. //
  3770. if (!CoUpdate) {
  3771. CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Q Insert",
  3772. ChangeOrder->Cmd.ContentCmd);
  3773. GStatus = GhtInsert(pVme->ChangeOrderTable, ChangeOrder, TRUE, TRUE);
  3774. if (GStatus != GHT_STATUS_SUCCESS) {
  3775. DPRINT1(0, "++ ERROR - GhtInsert Failed: %d\n", GStatus);
  3776. FRS_ASSERT(!"Local Co Q Insert Failed");
  3777. goto ERROR_RETURN;
  3778. }
  3779. SET_COE_FLAG(ChangeOrder, COE_FLAG_IN_AGING_CACHE);
  3780. } else {
  3781. CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Aging Update",
  3782. ChangeOrder->Cmd.ContentCmd);
  3783. }
  3784. INCREMENT_CHANGE_ORDER_REF_COUNT(ChangeOrder);
  3785. //
  3786. // For remote COs the cxtion count is incremented when the remote CO
  3787. // goes onto the CO process queue. We don't do this for local COs
  3788. // because the code to shutdown the Jrnl Cxtion may never see the
  3789. // CO count go to zero if we did this. We just set the CO
  3790. // CxtionGuid and the CO JoinGuid here so unjoin / rejoins can be
  3791. // detected.
  3792. //
  3793. INIT_LOCALCO_CXTION_GUID(Replica, ChangeOrder);
  3794. WStatus = FrsRtlInsertTailQueueLock(&pVme->ChangeOrderList,
  3795. &ChangeOrder->ProcessList);
  3796. if (WIN_SUCCESS(WStatus)) {
  3797. SET_CO_FLAG(ChangeOrder, CO_FLAG_ONLIST);
  3798. INC_LOCAL_CO_QUEUE_COUNT(Replica);
  3799. } else {
  3800. DPRINT_WS(0, "++ ERROR - ChangeOrder insert failed:", WStatus);
  3801. }
  3802. }
  3803. GStatus = GhtDereferenceEntryByAddress(pVme->ChangeOrderTable,
  3804. ChangeOrder,
  3805. TRUE);
  3806. if (GStatus != GHT_STATUS_SUCCESS) {
  3807. DPRINT(0, "++ ERROR: GhtDereferenceEntryByAddress ref count non positive.\n");
  3808. FRS_PRINT_TYPE(0, ChangeOrder);
  3809. FRS_ASSERT(!"CO ref count non positive");
  3810. goto ERROR_RETURN;
  3811. }
  3812. }
  3813. return WStatus;
  3814. ERROR_RETURN:
  3815. return ERROR_GEN_FAILURE;
  3816. }
  3817. ULONG
  3818. JrnlDoesChangeOrderHaveChildrenWorker(
  3819. IN PQHASH_TABLE ParentFidTable,
  3820. IN PQHASH_ENTRY BeforeNode,
  3821. IN PQHASH_ENTRY TargetNode,
  3822. IN PVALID_CHILD_CHECK_DATA pValidChildCheckData
  3823. )
  3824. /*++
  3825. Routine Description:
  3826. This function is called thru QHashEnumerateTable().
  3827. Search for a match between the ParentFid and the entry's
  3828. ParentFid (QHASH_ENTRY.QData).
  3829. Arguments:
  3830. Table -- the hash table being enumerated
  3831. BeforeNode -- ptr to the QhashEntry before the node of interest.
  3832. TargetNode -- ptr to the QhashEntry of interest.
  3833. pValidChildCheckData -- ptr to the parent fid
  3834. Return Value:
  3835. FrsErrorResourceInUse - Child of ParentFid was found
  3836. FrsErrorSuccess - No children were found for ParentFid
  3837. --*/
  3838. {
  3839. #undef DEBSUB
  3840. #define DEBSUB "JrnlDoesChangeOrderHaveChildrenWorker:"
  3841. JET_ERR jerr;
  3842. PTHREAD_CTX ThreadCtx = pValidChildCheckData->ThreadCtx;
  3843. PTABLE_CTX TmpIDTableCtx = pValidChildCheckData->TmpIDTableCtx;
  3844. PIDTABLE_RECORD IDTableRec;
  3845. if ((TargetNode->QData == pValidChildCheckData->FileReferenceNumber)){
  3846. if (ThreadCtx == NULL || TmpIDTableCtx == NULL) {
  3847. return FrsErrorResourceInUse;
  3848. }
  3849. jerr = DbsReadRecord(ThreadCtx, &TargetNode->QKey, FileIDIndexx, TmpIDTableCtx);
  3850. //
  3851. // No IDTable entry. OK to delete the child.
  3852. //
  3853. if (jerr == JET_errRecordNotFound) {
  3854. return FrsErrorSuccess;
  3855. }
  3856. if (!JET_SUCCESS(jerr)) {
  3857. DPRINT_JS(0,"++ ERROR - DbsReadRecord failed;", jerr);
  3858. return FrsErrorResourceInUse;
  3859. }
  3860. IDTableRec = (PIDTABLE_RECORD) (TmpIDTableCtx->pDataRecord);
  3861. //
  3862. // This child of the parent is not marked to be deleted which means it is
  3863. // not going away. Hence return that this parent has children. The parent
  3864. // delete will be aborted.
  3865. //
  3866. if (!IsIdRecFlagSet(IDTableRec, IDREC_FLAGS_DELETE_DEFERRED)) {
  3867. return FrsErrorResourceInUse;
  3868. }
  3869. }
  3870. return FrsErrorSuccess;
  3871. }
  3872. BOOL
  3873. JrnlDoesChangeOrderHaveChildren(
  3874. IN PTHREAD_CTX ThreadCtx,
  3875. IN PTABLE_CTX TmpIDTableCtx,
  3876. IN PCHANGE_ORDER_ENTRY ChangeOrder
  3877. )
  3878. /*++
  3879. Routine Description:
  3880. The ChangeOrderAccept thread is issueing a retry of a directory
  3881. delete. The question is, "Does this directory have replicating
  3882. children?" If so, the change order should be retried at a later
  3883. time.
  3884. If not, the change order is sent on to an install thread that
  3885. will empty the directory of any files or subdirectories and
  3886. then delete the directory. The files and subdirectories are
  3887. assumed to have been filtered and are non-replicating. You can
  3888. see why we want to insure there are no replicating files or
  3889. subdirectories in this directory prior to emptying the directory.
  3890. The journal's directory filter table and the journal's parent fid
  3891. table are searched for children of the directory specified by
  3892. ChangeOrder.
  3893. Arguments:
  3894. ChangeOrder - For a retry of a directory delete
  3895. Return Value:
  3896. TRUE - Directory has replicating children in the journal tables
  3897. FALSE - Directory does not have replicating children in the journal tables
  3898. --*/
  3899. {
  3900. #undef DEBSUB
  3901. #define DEBSUB "JrnlDoesChangeOrderHaveChildren:"
  3902. DWORD FStatus;
  3903. PREPLICA Replica;
  3904. PVOLUME_MONITOR_ENTRY pVme;
  3905. PQHASH_TABLE ParentFidTable;
  3906. VALID_CHILD_CHECK_DATA ValidChildCheckData;
  3907. Replica = ChangeOrder->NewReplica;
  3908. //
  3909. // Retry the change order if information about its children is lacking.
  3910. //
  3911. if (!Replica) {
  3912. DPRINT(4, "++ WARN: No Replica in ChangeOrder\n");
  3913. return TRUE;
  3914. }
  3915. pVme = Replica->pVme;
  3916. if (!pVme) {
  3917. DPRINT(4, "++ WARN: No pVme in Replica\n");
  3918. return TRUE;
  3919. }
  3920. ParentFidTable = pVme->ParentFidTable;
  3921. if (!ParentFidTable) {
  3922. DPRINT(4, "++ WARN: No ParentFidTable in pVme\n");
  3923. return TRUE;
  3924. }
  3925. //
  3926. // Look for subdirectories and files.
  3927. //
  3928. ValidChildCheckData.ThreadCtx = ThreadCtx;
  3929. ValidChildCheckData.TmpIDTableCtx = TmpIDTableCtx;
  3930. ValidChildCheckData.FileReferenceNumber = ChangeOrder->FileReferenceNumber;
  3931. FStatus = QHashEnumerateTable(ParentFidTable,
  3932. JrnlDoesChangeOrderHaveChildrenWorker,
  3933. &ValidChildCheckData);
  3934. if (FStatus == FrsErrorResourceInUse) {
  3935. DPRINT(4, "++ Child found; change order has files\n");
  3936. return TRUE;
  3937. }
  3938. DPRINT(4, "++ Child not found; change order has no subdirs or files\n");
  3939. return FALSE;
  3940. }
  3941. ULONG
  3942. JrnlAddFilterEntryFromUsn(
  3943. IN PREPLICA Replica,
  3944. IN PUSN_RECORD UsnRecord,
  3945. OUT PFILTER_TABLE_ENTRY *RetFilterEntry
  3946. )
  3947. /*++
  3948. Routine Description:
  3949. Create a new filter table entry from data in the USN record and the
  3950. Replica struct. Insert it into the Volume Filter Table.
  3951. The caller must decrement the refcount on the filter entry.
  3952. Arguments:
  3953. Replica - ptr to the Replica struct containing the directory now.
  3954. UsnRecord - ptr to the UsnRecord.
  3955. RetFilterEntry - ptr to returned filter table ptr. NULL if caller doesn't
  3956. want a reference to the entry so we drop it here.
  3957. Return Value:
  3958. Win32 status.
  3959. --*/
  3960. {
  3961. #undef DEBSUB
  3962. #define DEBSUB "JrnlAddFilterEntryFromUsn:"
  3963. PFILTER_TABLE_ENTRY FilterEntry;
  3964. ULONG Len;
  3965. ULONG WStatus;
  3966. //
  3967. // Create a new filter entry.
  3968. // The size of the file name field is Len + sizeof(WCHAR) because
  3969. // the file name field is defined as a wchar array of length 1.
  3970. //
  3971. Len = UsnRecord->FileNameLength;
  3972. FilterEntry = FrsAllocTypeSize(FILTER_TABLE_ENTRY_TYPE, Len);
  3973. FilterEntry->DFileID = UsnRecord->FileReferenceNumber;
  3974. FilterEntry->DParentFileID = UsnRecord->ParentFileReferenceNumber;
  3975. FrsCopyUnicodeStringFromRawString(&FilterEntry->UFileName,
  3976. Len + sizeof(WCHAR),
  3977. UsnRecord->FileName,
  3978. Len);
  3979. WStatus = JrnlAddFilterEntry(Replica, FilterEntry, RetFilterEntry, TRUE);
  3980. if (!WIN_SUCCESS(WStatus)) {
  3981. DUMP_USN_RECORD2(0, UsnRecord, Replica->ReplicaNumber, CO_LOCATION_NUM_CMD);
  3982. }
  3983. return WStatus;
  3984. }
  3985. ULONG
  3986. JrnlAddFilterEntryFromCo(
  3987. IN PREPLICA Replica,
  3988. IN PCHANGE_ORDER_ENTRY ChangeOrder,
  3989. OUT PFILTER_TABLE_ENTRY *RetFilterEntry
  3990. )
  3991. /*++
  3992. Routine Description:
  3993. Create a new filter table entry from data in the change order entry and the
  3994. Replica struct. Insert it into the Volume Filter Table. This is called
  3995. when we receive remote change orders that create a directory.
  3996. If this is a recovery change order than the filter entry is replaced if
  3997. there is a conflict.
  3998. The caller must decrement the refcount on the filter entry.
  3999. Arguments:
  4000. Replica - ptr to the Replica struct containing the directory now.
  4001. ChangeOrder -- ptr to the change order entry.
  4002. RetFilterEntry - ptr to returned filter table ptr. NULL if caller doesn't
  4003. want a reference to the entry so we drop it here.
  4004. Return Value:
  4005. Win32 status.
  4006. --*/
  4007. {
  4008. #undef DEBSUB
  4009. #define DEBSUB "JrnlAddFilterEntryFromCo:"
  4010. PFILTER_TABLE_ENTRY FilterEntry;
  4011. ULONG Len;
  4012. ULONG WStatus;
  4013. //
  4014. // Create a new filter entry.
  4015. // NOTE that the actual size of the filename buffer is Len +
  4016. // sizeof(WCHAR) because the definition of FILTER_TABLE_ENTRY
  4017. // includes a single wchar array for filename. Hence, the
  4018. // assignment of UNICODE_NULL to Buffer[Len/2] doesn't scribble
  4019. // past the end of the array.
  4020. //
  4021. Len = ChangeOrder->Cmd.FileNameLength;
  4022. FilterEntry = FrsAllocTypeSize(FILTER_TABLE_ENTRY_TYPE, Len);
  4023. FilterEntry->DFileID = ChangeOrder->FileReferenceNumber;
  4024. FilterEntry->DParentFileID = ChangeOrder->ParentFileReferenceNumber;
  4025. FilterEntry->UFileName.Length = (USHORT)Len;
  4026. CopyMemory(FilterEntry->UFileName.Buffer, ChangeOrder->Cmd.FileName, Len);
  4027. FilterEntry->UFileName.Buffer[Len/2] = UNICODE_NULL;
  4028. //
  4029. // Its possible to receive a change order more than once; and the
  4030. // first change order may have been taken through retry. If the
  4031. // change order was for a directory create, this would leave
  4032. // an idtable entry set to IDREC_FLAGS_NEW_FILE_IN_PROGRESS
  4033. // *and* the directories entries in the filter table. So, always
  4034. // relace an existing entry.
  4035. //
  4036. return JrnlAddFilterEntry(Replica, FilterEntry, RetFilterEntry, TRUE);
  4037. }
  4038. ULONG
  4039. JrnlAddFilterEntry(
  4040. IN PREPLICA Replica,
  4041. IN PFILTER_TABLE_ENTRY FilterEntry,
  4042. OUT PFILTER_TABLE_ENTRY *RetFilterEntry,
  4043. IN BOOL Replace
  4044. )
  4045. /*++
  4046. Routine Description:
  4047. Insert the filter entry into the Volume Filter Table.
  4048. This routine acquires the child list lock for the replica when doing the
  4049. child list insert.
  4050. The caller must decrement the refcount on the filter entry.
  4051. On an insert error the entry is freed and NULL is returned.
  4052. Arguments:
  4053. Replica - ptr to the Replica struct containing the directory now.
  4054. FilterEntry -- ptr to filter entry to insert.
  4055. RetFilterEntry - ptr to returned filter table ptr. NULL if caller doesn't
  4056. want a reference to the entry so we drop it here.
  4057. On an insert error the entry is freed and NULL is returned.
  4058. Replace - If true then replace current entry with this one if conflict.
  4059. Return Value:
  4060. Win32 status.
  4061. --*/
  4062. {
  4063. #undef DEBSUB
  4064. #define DEBSUB "JrnlAddFilterEntry:"
  4065. PGENERIC_HASH_TABLE FilterTable = Replica->pVme->FilterTable;
  4066. ULONG GStatus, WStatus=ERROR_GEN_FAILURE;
  4067. ULONG RetryCount = 0;
  4068. PFILTER_TABLE_ENTRY OldEntry;
  4069. ULONG Len;
  4070. //
  4071. // Start ref count out at one (insert bumps it again to 2) if we
  4072. // return the address of the entry.
  4073. //
  4074. FilterEntry->HashEntryHeader.ReferenceCount = 1;
  4075. FilterEntry->Replica = Replica;
  4076. FilterEntry->DReplicaNumber = Replica->ReplicaNumber;
  4077. RETRY:
  4078. //
  4079. // Insert the entry into the VME Filter Table.
  4080. //
  4081. GStatus = GhtInsert(FilterTable, FilterEntry, TRUE, FALSE);
  4082. if (GStatus != GHT_STATUS_SUCCESS) {
  4083. if (Replace) {
  4084. goto REPLACE;
  4085. }
  4086. DPRINT1(0, "++ ERROR - GhtInsert Failed: %d, Entry conflict. Tried to insert:\n", GStatus);
  4087. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
  4088. FilterEntry = FrsFreeType(FilterEntry);
  4089. //
  4090. // Don't know how to translate GStatus to WStatus. The return value is ignored
  4091. // anyways.
  4092. //
  4093. WStatus = ERROR_GEN_FAILURE;
  4094. goto ERROR_RETURN;
  4095. }
  4096. //
  4097. // Link the filter entry onto the parent's child list and drop the reference
  4098. // if the caller doesn't want the ptr back.
  4099. //
  4100. JrnlAcquireChildLock(Replica);
  4101. WStatus = (ULONG)JrnlFilterLinkChild(FilterTable, FilterEntry, Replica);
  4102. JrnlReleaseChildLock(Replica);
  4103. if (!WIN_SUCCESS(WStatus)) {
  4104. DPRINT(0, "++ ERROR - Failed to put filter entry on Child List\n");
  4105. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
  4106. //
  4107. // Need some code here to add this filter entry to an orphan list
  4108. // in the off chance that the parent will later come into existence
  4109. // and now needs to hook up to the child. The creation of each new
  4110. // entry would then have to scan the orphan list if it was non-empty.
  4111. // Note that because of ordering constraints I don't think this
  4112. // can actually happen except in the case of a remote co dir create
  4113. // while a local co moveout is in process. But in this case when
  4114. // the child dir is found during the enum it will end up getting
  4115. // deleted.
  4116. // If we relax the ordering constraints on dir creates (since they
  4117. // all start out being created in the pre-install area anyway) then
  4118. // this code will definitely be needed.
  4119. //
  4120. // Note: May need dir filter entry orphan list. see note above.
  4121. }
  4122. RETURN:
  4123. if (RetFilterEntry != NULL) {
  4124. *RetFilterEntry = FilterEntry;
  4125. } else {
  4126. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  4127. }
  4128. return WStatus;
  4129. REPLACE:
  4130. //
  4131. // Replace the data in the old entry with the data in the new entry.
  4132. //
  4133. GStatus = GhtLookup(FilterTable, &FilterEntry->DFileID, TRUE, &OldEntry);
  4134. if (GStatus != GHT_STATUS_SUCCESS) {
  4135. FRS_ASSERT(RetryCount++ > 10);
  4136. goto RETRY;
  4137. }
  4138. FRS_ASSERT(OldEntry->DFileID == FilterEntry->DFileID);
  4139. //
  4140. // Undoing a MOVERS for a dir is going to be a pain.
  4141. // Need to check if it can really happen. Could we just abort this CO?
  4142. //
  4143. FRS_ASSERT(OldEntry->Replica == FilterEntry->Replica);
  4144. FRS_ASSERT(OldEntry->DReplicaNumber == FilterEntry->DReplicaNumber);
  4145. if (OldEntry->DParentFileID != FilterEntry->DParentFileID) {
  4146. //
  4147. // If parent FID is different then change child linkage.
  4148. //
  4149. JrnlAcquireChildLock(Replica);
  4150. WStatus = JrnlFilterUnlinkChild (FilterTable, OldEntry, OldEntry->Replica);
  4151. if (!WIN_SUCCESS(WStatus)) {
  4152. DPRINT(0, "++ ERROR - Failed to put filter entry on Child List\n");
  4153. goto REPLACE_ERROR;
  4154. }
  4155. //
  4156. // Update the filter entry with the new parent and reinsert into filter.
  4157. //
  4158. OldEntry->DParentFileID = FilterEntry->DParentFileID;
  4159. WStatus = (ULONG) JrnlFilterLinkChild(FilterTable,
  4160. OldEntry,
  4161. OldEntry->Replica);
  4162. if (!WIN_SUCCESS(WStatus)) {
  4163. DPRINT(0, "++ ERROR - Failed to put filter entry on Child List\n");
  4164. goto REPLACE_ERROR;
  4165. }
  4166. JrnlReleaseChildLock(Replica);
  4167. }
  4168. if (FilterEntry->UFileName.Length <= (OldEntry->UFileName.MaximumLength -
  4169. sizeof(WCHAR))) {
  4170. Len = FilterEntry->UFileName.Length;
  4171. } else {
  4172. //
  4173. // Note: need a swap entry with row locked and ref count 2 to realloc node.
  4174. //
  4175. // Or just alloc a new buffer and set UFileName to point to it with
  4176. // a test on the free side to check if not using the in-node buffer.
  4177. // But do we really need the name?
  4178. // It is used to build the full name path but is it really needed?
  4179. // For now just copy the first n characters.
  4180. //
  4181. Len = OldEntry->UFileName.MaximumLength - sizeof(WCHAR);
  4182. }
  4183. CopyMemory(OldEntry->UFileName.Buffer, FilterEntry->UFileName.Buffer, Len);
  4184. OldEntry->UFileName.Buffer[Len/2] = UNICODE_NULL;
  4185. OldEntry->UFileName.Length = (USHORT) Len;
  4186. FRS_JOURNAL_FILTER_PRINT(5, FilterTable, OldEntry);
  4187. FrsFreeType(FilterEntry);
  4188. FilterEntry = OldEntry;
  4189. WStatus = ERROR_SUCCESS;
  4190. goto RETURN;
  4191. REPLACE_ERROR:
  4192. JrnlReleaseChildLock(Replica);
  4193. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, OldEntry);
  4194. GhtDereferenceEntryByAddress(FilterTable, OldEntry, TRUE);
  4195. ERROR_RETURN:
  4196. GHT_DUMP_TABLE(5, FilterTable);
  4197. if (RetFilterEntry != NULL) {*RetFilterEntry = NULL;}
  4198. return ERROR_GEN_FAILURE;
  4199. }
  4200. ULONG
  4201. JrnlDeleteDirFilterEntry(
  4202. IN PGENERIC_HASH_TABLE FilterTable,
  4203. IN PULONGLONG DFileID,
  4204. IN PFILTER_TABLE_ENTRY ArgFilterEntry
  4205. )
  4206. /*++
  4207. Routine Description:
  4208. Delete the filter entry from the Volume Filter Table.
  4209. The caller acquires the child list lock for the replica when doing the
  4210. child list removal.
  4211. The caller must decrement the refcount on the filter entry.
  4212. Arguments:
  4213. FilterTable - ptr to the filter table struct containing the directory now.
  4214. DFileID - ptr to FID of dir to delete.
  4215. ArgFilterEntry - if non-null then delete this entry and skip lookup.
  4216. Return Value:
  4217. Win32 status.
  4218. --*/
  4219. {
  4220. #undef DEBSUB
  4221. #define DEBSUB "JrnlDeleteDirFilterEntry:"
  4222. ULONG GStatus, WStatus;
  4223. PFILTER_TABLE_ENTRY FilterEntry;
  4224. //
  4225. // Find the entry.
  4226. //
  4227. if (ArgFilterEntry == NULL) {
  4228. GStatus = GhtLookup(FilterTable, DFileID, TRUE, &FilterEntry);
  4229. if (GStatus != GHT_STATUS_SUCCESS) {
  4230. DPRINT1(0, "++ WARNING: Filter entry not found in table for FID= %08x %08x\n",
  4231. PRINTQUAD(*DFileID));
  4232. return ERROR_NOT_FOUND;
  4233. }
  4234. } else {
  4235. FilterEntry = ArgFilterEntry;
  4236. }
  4237. DPRINT1(4, "++ Deleting filter entry, FID= %08x %08x\n", PRINTQUAD(FilterEntry->DFileID));
  4238. //
  4239. // Unlink the filter entry from the parent's child list.
  4240. //
  4241. // Return an error if there are children. This can happen
  4242. // when we take a directory-create through retry. Its children
  4243. // were added when the process queue was unblocked. This
  4244. // function is then called when retrying the change order
  4245. // with the idtable set to IDREC_FLAGS_NEW_FILE_IN_PROGRESS
  4246. //
  4247. if (!IsListEmpty(&FilterEntry->ChildHead)) {
  4248. DPRINT(0, "++ WARN - Dir Delete but child list not empty\n");
  4249. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
  4250. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  4251. return ERROR_GEN_FAILURE;
  4252. }
  4253. if (FilterEntry->ChildEntry.Flink == NULL) {
  4254. //
  4255. // This may happen if we have just completed a MOVEOUT of a dir
  4256. // subtree and a dir create remote CO is ahead of us in the process
  4257. // queue. When the dir create tried to add the filter table entry
  4258. // it won't find the parent so this entry won't be on any parent list.
  4259. // See comment in JrnlAddFilterEntry() about creation of an orphan
  4260. // list in the future.
  4261. //
  4262. DPRINT(0, "++ WARN - Dir entry not on child list\n");
  4263. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
  4264. } else {
  4265. FrsRemoveEntryList(&FilterEntry->ChildEntry);
  4266. FilterEntry->ChildEntry.Flink = NULL;
  4267. }
  4268. //
  4269. // Delete the entry from the filter table.
  4270. //
  4271. GStatus = GhtDeleteEntryByAddress(FilterTable, FilterEntry, TRUE);
  4272. if (GStatus != GHT_STATUS_SUCCESS) {
  4273. DPRINT(0, "++ ERROR - GhtDeleteEntryByAddress failed.\n");
  4274. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
  4275. FRS_ASSERT(!"JrnlDeleteDirFilterEntry failed.");
  4276. return ERROR_GEN_FAILURE;
  4277. }
  4278. return ERROR_SUCCESS;
  4279. }
  4280. ULONG
  4281. JrnlGetPathAndLevel(
  4282. IN PGENERIC_HASH_TABLE FilterTable,
  4283. IN PLONGLONG StartDirFileID,
  4284. OUT PULONG Level
  4285. )
  4286. /*++
  4287. Routine Description:
  4288. Walk the filter table from DirFileID to the root building the directory
  4289. path and counting the levels.
  4290. Arguments:
  4291. FilterTable -- Ptr to the Generic hash table containing a dir filter
  4292. StartDirFileID -- The file id of the directory to start the walk from.
  4293. Level -- The returned nesting level of the dir. (0 means the replcia tree root)
  4294. Return Value:
  4295. FrsError status.
  4296. --*/
  4297. {
  4298. #undef DEBSUB
  4299. #define DEBSUB "JrnlGetPathAndLevel:"
  4300. ULONGLONG DirFileID = *StartDirFileID;
  4301. PFILTER_TABLE_ENTRY FilterEntry;
  4302. ULONG FStatus = FrsErrorSuccess;
  4303. ULONG GStatus;
  4304. *Level = 0;
  4305. GStatus = GhtLookup(FilterTable, &DirFileID, TRUE, &FilterEntry);
  4306. if (GStatus == GHT_STATUS_NOT_FOUND) {
  4307. return FrsErrorNotFound;
  4308. }
  4309. while (GStatus == GHT_STATUS_SUCCESS) {
  4310. //
  4311. // Stop when we hit the replica tree root.
  4312. //
  4313. if (FilterEntry->DParentFileID == ZERO_FID) {
  4314. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  4315. break;
  4316. }
  4317. *Level += 1;
  4318. if (*Level > 100000) {
  4319. //
  4320. // Hung. Corrupt Filter table.
  4321. //
  4322. DPRINT(0, "++ ERROR: Hung in Journal entry filter lookup. Entry skipped\n");
  4323. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  4324. GHT_DUMP_TABLE(0, FilterTable);
  4325. FRS_ASSERT(!"Hung in Journal entry filter lookup");
  4326. return FrsErrorInternalError;
  4327. }
  4328. //
  4329. // Get parent FID & Drop the reference to the filter table entry.
  4330. // Lookup parent's filter entry.
  4331. //
  4332. DirFileID = FilterEntry->DParentFileID;
  4333. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  4334. GStatus = GhtLookup(FilterTable, &DirFileID, TRUE, &FilterEntry);
  4335. if (GStatus != GHT_STATUS_SUCCESS) {
  4336. //
  4337. // Corrupt Filter table or it could be an op on an orphaned
  4338. // dir that will later get deleted.
  4339. //
  4340. DPRINT(0, "++ ERROR: Parent filter entry not found in Journal filter Table.\n");
  4341. //GHT_DUMP_TABLE(0, FilterTable);
  4342. return FrsErrorInternalError;
  4343. }
  4344. }
  4345. return FStatus;
  4346. }
  4347. BOOL
  4348. JrnlIsChangeOrderInReplica(
  4349. IN PCHANGE_ORDER_ENTRY ChangeOrder,
  4350. IN PLONGLONG DirFileID
  4351. )
  4352. /*++
  4353. Routine Description:
  4354. Look up the File ID for the given directory in the given journal filter
  4355. table and if found compare the replica set pointer from the filter entry
  4356. to the replica set pointer in the change order. Return TRUE if match.
  4357. Arguments:
  4358. ChangeOrder -- The change order entry assoicated with the file of interest.
  4359. DirFileID -- The file id of the directory in which the file currently
  4360. resides. This may be different than the parent FID in the
  4361. change order.
  4362. Return Value:
  4363. TRUE if Pointer to Replica Struct or NULL if not found.
  4364. --*/
  4365. {
  4366. #undef DEBSUB
  4367. #define DEBSUB "JrnlIsChangeOrderInReplica:"
  4368. PFILTER_TABLE_ENTRY FilterEntry;
  4369. PGENERIC_HASH_TABLE FilterTable;
  4370. ULONG GStatus;
  4371. PREPLICA Replica, FilterReplica = NULL;
  4372. Replica = ChangeOrder->NewReplica;
  4373. if (Replica == NULL) {
  4374. DPRINT(4, "++ WARN: No Replica in ChangeOrder\n");
  4375. return FALSE;
  4376. }
  4377. if (Replica->pVme == NULL) {
  4378. DPRINT(4, "++ WARN: No pVme in Replica\n");
  4379. return FALSE;
  4380. }
  4381. FilterTable = Replica->pVme->FilterTable;
  4382. if (FilterTable == NULL) {
  4383. DPRINT(4, "++ WARN: No FilterTable in pVme\n");
  4384. return FALSE;
  4385. }
  4386. GStatus = GhtLookup(FilterTable, DirFileID, TRUE, &FilterEntry);
  4387. if (GStatus == GHT_STATUS_SUCCESS) {
  4388. //
  4389. // Get Replica ptr & Drop the reference to the filter table entry.
  4390. //
  4391. FilterReplica = FilterEntry->Replica;
  4392. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  4393. }
  4394. return (Replica == FilterReplica);
  4395. }
  4396. ULONG
  4397. JrnlCommand(
  4398. PCOMMAND_PACKET CmdPkt
  4399. )
  4400. /*++
  4401. Routine Description:
  4402. Process a command packet sent to the Journal sub-system. External
  4403. components interact with the Journal by building a command packet and
  4404. submitting it to the Journal Process Queue. The typical way journal
  4405. processing is started is by issuing the following series of command
  4406. packets using FrsSubmitCommand.
  4407. <Start the journal monitor thread>
  4408. CMD_INIT_SUBSYSTEM: Init and start the journal for all replicas
  4409. CMD_JOURNAL_INIT_ONE_RS: Init service for Replica Set A
  4410. CMD_JOURNAL_INIT_ONE_RS: Init service for Replica Set B
  4411. o
  4412. o
  4413. CMD_JOURNAL_INIT_ONE_RS: Init service for Replica Set Z
  4414. CMD_STOP_SUBSYSTEM: Stop journal processing for all replica sets
  4415. and terminate the journal sub-system.
  4416. Arguments:
  4417. CmdPkt: Command packet to process.
  4418. Return Value:
  4419. Win32 status
  4420. --*/
  4421. {
  4422. #undef DEBSUB
  4423. #define DEBSUB "JrnlCommand:"
  4424. LIST_ENTRY DeadList;
  4425. PLIST_ENTRY Entry;
  4426. ULONG WStatus = ERROR_SUCCESS;
  4427. ULONG FStatus;
  4428. PVOLUME_MONITOR_ENTRY pVme;
  4429. FILETIME SystemTime;
  4430. PCONFIG_TABLE_RECORD ConfigRecord;
  4431. DPRINT1(5, "<<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
  4432. switch (CmdPkt->Command) {
  4433. case CMD_COMMAND_ERROR:
  4434. DPRINT1(0, "ERROR - Invalid journal minor command: %d\n", CmdPkt->Command);
  4435. break;
  4436. case CMD_INIT_SUBSYSTEM:
  4437. //
  4438. // Initialize the journal
  4439. //
  4440. WStatus = JournalMonitorInit();
  4441. DEBUG_FLUSH();
  4442. if (!WIN_SUCCESS(WStatus)) {
  4443. if (!FrsIsShuttingDown) {
  4444. DPRINT_WS(0, "ERROR - Journal cannot start;", WStatus);
  4445. }
  4446. break;
  4447. }
  4448. //
  4449. // Init the change order accept thread.
  4450. //
  4451. if (ChgOrdAcceptInitialize() != FrsErrorSuccess) {
  4452. DPRINT(0, "ERROR - Journal cannot start; can't start change order thread.\n");
  4453. WStatus = ERROR_GEN_FAILURE;
  4454. break;
  4455. }
  4456. DPRINT(0, "Journal has started.\n");
  4457. DEBUG_FLUSH();
  4458. SetEvent(JournalEvent);
  4459. //
  4460. // Free up memory by reducing our working set size
  4461. //
  4462. SetProcessWorkingSetSize(ProcessHandle, (SIZE_T)-1, (SIZE_T)-1);
  4463. break;
  4464. //
  4465. // Close all the journal VMEs, rundown the Process Queue and free
  4466. // all the queue entries. On return the main process loop with
  4467. // see the queue is rundown and will terminate the thread.
  4468. //
  4469. case CMD_STOP_SUBSYSTEM:
  4470. DPRINT(4, "Stopping Journal Subsystem\n");
  4471. JrnlCloseAll();
  4472. FrsRtlRunDownQueue(&JournalProcessQueue, &DeadList);
  4473. FrsFreeTypeList(&DeadList);
  4474. break;
  4475. case CMD_PAUSE_SUBSYSTEM:
  4476. case CMD_QUERY_INFO_SUBSYSTEM:
  4477. case CMD_SET_CONFIG_SUBSYSTEM:
  4478. case CMD_QUERY_CONFIG_SUBSYSTEM:
  4479. case CMD_CANCEL_COMMAND_SUBSYSTEM:
  4480. case CMD_READ_SUBSYSTEM:
  4481. case CMD_WRITE_SUBSYSTEM:
  4482. goto UNSUPPORTED_COMMAND;
  4483. case CMD_START_SERVICE:
  4484. case CMD_STOP_SERVICE:
  4485. case CMD_PAUSE_SERVICE:
  4486. case CMD_QUERY_INFO_SERVICE:
  4487. case CMD_SET_CONFIG_SERVICE:
  4488. case CMD_QUERY_CONFIG_SERVICE:
  4489. case CMD_CANCEL_COMMAND_SERVICE:
  4490. case CMD_READ_SERVICE:
  4491. case CMD_WRITE_SERVICE:
  4492. break;
  4493. //
  4494. // This command is an acknowledgement from the journal read thread that
  4495. // journal read activity on this volume (pVme parameter) has paused.
  4496. // Set the state to JRNL_STATE_PAUSED and signal the event in the
  4497. // VME so any waiters can proceed. Also mark all replica sets on this
  4498. // volume as paused.
  4499. //
  4500. case CMD_JOURNAL_PAUSED:
  4501. pVme = CmdPkt->Parameters.JournalRequest.pVme;
  4502. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  4503. SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_PAUSED);
  4504. //
  4505. // Save time of last replica pause. LastPause
  4506. //
  4507. GetSystemTimeAsFileTime(&SystemTime);
  4508. ForEachListEntry( &pVme->ReplicaListHead, REPLICA, VolReplicaList,
  4509. //
  4510. // Iterator pE is of type REPLICA.
  4511. //
  4512. ConfigRecord = (PCONFIG_TABLE_RECORD) (pE->ConfigTable.pDataRecord);
  4513. COPY_TIME(&ConfigRecord->LastPause, &SystemTime);
  4514. );
  4515. SetEvent(pVme->Event);
  4516. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  4517. break;
  4518. //
  4519. // This command initializes the journal and database for a single replica
  4520. // set. It is intended to be used when creating or starting a replica
  4521. // set after the initial system startup has occurred.
  4522. // Note we don't complete the command here since we propagate it on
  4523. // to the DB server. In the case of failure the command is completed
  4524. // here and status is returned in the cmd pkt ErrorStatus field.
  4525. // The Replica->FStatus field may have more status about the failure.
  4526. //
  4527. case CMD_JOURNAL_INIT_ONE_RS:
  4528. FStatus = JrnlInitOneReplicaSet(CmdPkt);
  4529. if (FRS_SUCCESS(FStatus)) {
  4530. return ERROR_SUCCESS;
  4531. }
  4532. WStatus = ERROR_GEN_FAILURE;
  4533. break;
  4534. //
  4535. // Delete a journal directory filter table entry. We do it in the journal
  4536. // thread so we don't have to lock the table.
  4537. //
  4538. case CMD_JOURNAL_DELETE_DIR_FILTER_ENTRY:
  4539. WStatus = JrnlDeleteDirFilterEntry(
  4540. JrReplica(CmdPkt)->pVme->FilterTable,
  4541. &JrDFileID(CmdPkt),
  4542. NULL);
  4543. break;
  4544. //
  4545. // Cleanout unneeded entries in the Journal Write Filter.
  4546. //
  4547. case CMD_JOURNAL_CLEAN_WRITE_FILTER:
  4548. WStatus = JrnlCleanWriteFilter(CmdPkt);
  4549. break;
  4550. default:
  4551. goto UNSUPPORTED_COMMAND;
  4552. } // end switch
  4553. //
  4554. // Retire the command packet.
  4555. //
  4556. FrsCompleteCommand(CmdPkt, WStatus);
  4557. return WStatus;
  4558. UNSUPPORTED_COMMAND:
  4559. DPRINT1(0, "ERROR - Invalid journal minor command: %d\n", CmdPkt->Command);
  4560. return ERROR_INVALID_PARAMETER;
  4561. }
  4562. JET_ERR
  4563. JrnlInsertFilterEntry(
  4564. IN PTHREAD_CTX ThreadCtx,
  4565. IN PTABLE_CTX TableCtx,
  4566. IN PVOID Record,
  4567. IN PVOID Context
  4568. )
  4569. /*++
  4570. Routine Description:
  4571. This is a worker function passed to FrsEnumerateTable(). Each time
  4572. it is called It inserts a DIRTable record into the Volume filter table.
  4573. Arguments:
  4574. ThreadCtx - Needed to access Jet. (Not used).
  4575. TableCtx - A ptr to a DIRTable context struct.
  4576. Record - A ptr to a DIRTable record.
  4577. Context - A ptr to the Replica set we are loading data for.
  4578. Return Value:
  4579. A Jet error status. Success means call us with the next record.
  4580. Failure means don't call again and pass our status back to the
  4581. caller of FrsEnumerateTable().
  4582. --*/
  4583. {
  4584. #undef DEBSUB
  4585. #define DEBSUB "JrnlInsertFilterEntry:"
  4586. PDIRTABLE_RECORD DIRTableRec = (PDIRTABLE_RECORD) Record;
  4587. PREPLICA Replica = (PREPLICA) Context;
  4588. ULONG NameLen, GStatus;
  4589. PFILTER_TABLE_ENTRY FilterEntry;
  4590. //
  4591. // Abort enum if shutting down.
  4592. //
  4593. if (FrsIsShuttingDown) {
  4594. return JET_errTermInProgress;
  4595. }
  4596. //
  4597. // Build a filter table record big enough to hold the filename
  4598. // and insert into the volume filter table. Note that the
  4599. // file name field is large enough to hold the terminating
  4600. // UNICODE_NULL because the file name field is defined as
  4601. // a wchar array of length 1 in FILTER_TABLE_ENTRY.
  4602. //
  4603. NameLen = wcslen(DIRTableRec->DFileName) * sizeof(WCHAR);
  4604. FilterEntry = FrsAllocTypeSize(FILTER_TABLE_ENTRY_TYPE, NameLen);
  4605. //
  4606. // Copy the data from the DIRTable record to the filter entry
  4607. // and add a pointer to the Replica struct.
  4608. //
  4609. CopyMemory(FilterEntry->DFileName, DIRTableRec->DFileName, NameLen + 2);
  4610. FilterEntry->DFileID = DIRTableRec->DFileID;
  4611. FilterEntry->DParentFileID = DIRTableRec->DParentFileID;
  4612. FilterEntry->DReplicaNumber = DIRTableRec->DReplicaNumber;
  4613. FilterEntry->Replica = Replica;
  4614. FilterEntry->UFileName.Length = (USHORT)NameLen;
  4615. FilterEntry->UFileName.Buffer[NameLen/2] = UNICODE_NULL;
  4616. GStatus = GhtInsert(Replica->pVme->FilterTable, FilterEntry, TRUE, FALSE);
  4617. if (GStatus != GHT_STATUS_SUCCESS) {
  4618. DPRINT1(0, "ERROR - GhtInsert Failed: %d\n", GStatus);
  4619. DBS_DISPLAY_RECORD_SEV(0, TableCtx, TRUE);
  4620. FrsFreeType(FilterEntry);
  4621. return JET_errKeyDuplicate;
  4622. }
  4623. return JET_errSuccess;
  4624. }
  4625. ULONG
  4626. JrnlCleanWriteFilter(
  4627. PCOMMAND_PACKET CmdPkt
  4628. )
  4629. /*++
  4630. Routine Description:
  4631. Walk thru all active replica sets on this volume. Find the minimum
  4632. value for FSVolLastUsn. This is the Joint journal commit point for all
  4633. replica sets on the volume. No replica set will start a journal
  4634. read before this point.
  4635. Then enumerate all entries of the Volume Write Filter table and free
  4636. the entries whose USN is less than the Joint Journal commit point.
  4637. Arguments:
  4638. CmdPkt: Command packet to process.
  4639. Return Value:
  4640. Win32 status
  4641. --*/
  4642. {
  4643. #undef DEBSUB
  4644. #define DEBSUB "JrnlCleanWriteFilter:"
  4645. USN JointJournalCommitUsn = MAXLONGLONG;
  4646. LONGLONG FSVolLastUSN;
  4647. PVOLUME_MONITOR_ENTRY pVme;
  4648. PCONFIG_TABLE_RECORD ConfigRecord;
  4649. ULONG TimeOut = 5*JRNL_CLEAN_WRITE_FILTER_INTERVAL;
  4650. BOOL FoundpVme = FALSE;
  4651. //
  4652. // Ignore if pVme is no longer active; don't retry
  4653. //
  4654. pVme = JrpVme(CmdPkt);
  4655. ForEachListEntry(&VolumeMonitorQueue, VOLUME_MONITOR_ENTRY, ListEntry,
  4656. if (pVme == pE) {
  4657. FoundpVme = TRUE;
  4658. break;
  4659. }
  4660. );
  4661. if (!FoundpVme) {
  4662. return ERROR_SUCCESS;
  4663. }
  4664. //
  4665. // If this journal is currently running then make a cleaning pass.
  4666. //
  4667. if (pVme->IoActive) {
  4668. ForEachListEntry( &pVme->ReplicaListHead, REPLICA, VolReplicaList,
  4669. // Iterator pE is of type PREPLICA.
  4670. //
  4671. // Get QuadWriteLock lock to avoid quadword tearing when FSVolLastUSN is read.
  4672. //
  4673. ConfigRecord = (PCONFIG_TABLE_RECORD)pE->ConfigTable.pDataRecord;
  4674. AcquireQuadLock(&pVme->QuadWriteLock);
  4675. FSVolLastUSN = ConfigRecord->FSVolLastUSN;
  4676. ReleaseQuadLock(&pVme->QuadWriteLock);
  4677. if (FSVolLastUSN < JointJournalCommitUsn) {
  4678. JointJournalCommitUsn = FSVolLastUSN;
  4679. }
  4680. );
  4681. DPRINT1(5, "WRITE FILTER TABLE CLEAN AT JointJournalCommitUsn = %08x %08x\n",
  4682. PRINTQUAD(JointJournalCommitUsn));
  4683. QHashEnumerateTable(pVme->FrsWriteFilter,
  4684. JrnlCleanWriteFilterWorker,
  4685. &JointJournalCommitUsn);
  4686. TimeOut = JRNL_CLEAN_WRITE_FILTER_INTERVAL;
  4687. }
  4688. //
  4689. // Resubmit the clean filter request.
  4690. //
  4691. JrnlSubmitCleanWriteFilter(pVme, TimeOut);
  4692. return ERROR_SUCCESS;
  4693. }
  4694. ULONG
  4695. JrnlCleanWriteFilterWorker (
  4696. PQHASH_TABLE Table,
  4697. PQHASH_ENTRY BeforeNode,
  4698. PQHASH_ENTRY TargetNode,
  4699. PVOID Context
  4700. )
  4701. /*++
  4702. Routine Description:
  4703. This function is called thru QHashEnumerateTable() to process
  4704. an entry.
  4705. Arguments:
  4706. Table - the hash table being enumerated
  4707. BeforeNode -- ptr to the QhashEntry before the node of interest.
  4708. TargetNode -- ptr to the QhashEntry of interest.
  4709. Context - ptr to the USN to compare against.
  4710. Return Value:
  4711. Win32 status
  4712. --*/
  4713. {
  4714. #undef DEBSUB
  4715. #define DEBSUB "JrnlCleanWriteFilterWorker:"
  4716. USN JointJournalCommitUsn = *(USN *)Context;
  4717. if ( (USN)(TargetNode->QKey) < JointJournalCommitUsn) {
  4718. DPRINT5(4, "DelWrtFilterEntry - BeforeNode: %08x, Link: %08x,"
  4719. " Flags: %08x, Tag: %08x %08x, Data: %08x %08x\n",
  4720. BeforeNode, TargetNode->NextEntry, TargetNode->Flags,
  4721. PRINTQUAD(TargetNode->QKey), PRINTQUAD(TargetNode->QData));
  4722. //
  4723. // Tell QHashEnumerateTable() to delete the node and continue the enum.
  4724. //
  4725. return FrsErrorDeleteRequested;
  4726. }
  4727. return FrsErrorSuccess;
  4728. }
  4729. VOID
  4730. JrnlSubmitCleanWriteFilter(
  4731. IN PVOLUME_MONITOR_ENTRY pVme,
  4732. IN ULONG TimeOut
  4733. )
  4734. /*++
  4735. Routine Description:
  4736. Queue a work request to clean the write filter in TimeOut Seconds.
  4737. Arguments:
  4738. pVme -- The Vme of the write filter to clean.
  4739. TimeOut -- The max time to wait before giving up and doing Unjoin.
  4740. Return Value:
  4741. None.
  4742. --*/
  4743. {
  4744. #undef DEBSUB
  4745. #define DEBSUB "JrnlSubmitCleanWriteFilter:"
  4746. PCOMMAND_PACKET Cmd;
  4747. Cmd = FrsAllocCommand(&JournalProcessQueue, CMD_JOURNAL_CLEAN_WRITE_FILTER);
  4748. JrReplica(Cmd) = NULL;
  4749. JrpVme(Cmd) = pVme;
  4750. DPRINT1(5, "Submit CMD_JOURNAL_CLEAN_WRITE_FILTER %08x\n", Cmd);
  4751. FrsDelQueueSubmit(Cmd, TimeOut);
  4752. }
  4753. BOOL
  4754. JrnlSetReplicaState(
  4755. IN PREPLICA Replica,
  4756. IN ULONG NewState
  4757. )
  4758. /*++
  4759. Routine Description:
  4760. Change the state of the Replica set and move it to the associated list.
  4761. Note: If a replica set is in the error state it must first move back
  4762. to the initializing state before it can leave the error state.
  4763. Arguments:
  4764. Replica - The replica set whose state is changing.
  4765. NewState - The new state.
  4766. Return Value:
  4767. TRUE if state change allowed.
  4768. --*/
  4769. {
  4770. #undef DEBSUB
  4771. #define DEBSUB "JrnlSetReplicaState:"
  4772. ULONG OldState;
  4773. PVOLUME_MONITOR_ENTRY pVme;
  4774. WCHAR DsPollingIntervalStr[7]; // Max interval is NTFRSAPI_MAX_INTERVAL.
  4775. extern ULONG DsPollingInterval;
  4776. //
  4777. // Lock the replica lists
  4778. //
  4779. EnterCriticalSection(&JrnlReplicaStateLock);
  4780. OldState = Replica->ServiceState;
  4781. if (OldState > JRNL_STATE_MAX) {
  4782. DPRINT2(0, ":S: ERROR - Invalid previous Replica->ServiceState (%d) for Replica %ws\n",
  4783. OldState, Replica->ReplicaName->Name);
  4784. FRS_ASSERT(!"Invalid previous Replica->ServiceState");
  4785. goto CLEANUP;
  4786. }
  4787. if (NewState > JRNL_STATE_MAX) {
  4788. DPRINT2(0, ":S: ERROR - Invalid new Replica->ServiceState (%d) for Replica %ws\n",
  4789. NewState, Replica->ReplicaName->Name);
  4790. FRS_ASSERT(!"Invalid new Replica->ServiceState");
  4791. goto CLEANUP;
  4792. }
  4793. //
  4794. // If this replica set is in the ERROR State then the only allowed next
  4795. // state is INITIALIZING.
  4796. //
  4797. if ((REPLICA_IN_ERROR_STATE(OldState) || REPLICA_STATE_NEEDS_RESTORE(OldState)) &&
  4798. (NewState != REPLICA_STATE_INITIALIZING) &&
  4799. !REPLICA_STATE_NEEDS_RESTORE(NewState)) {
  4800. DPRINT4(4, ":S: ERROR: Replica (%d) %ws state change from %s to %s disallowed\n",
  4801. Replica->ReplicaNumber,
  4802. (Replica->ReplicaName != NULL) ? Replica->ReplicaName->Name : L"<null>",
  4803. RSS_NAME(OldState),
  4804. RSS_NAME(NewState));
  4805. LeaveCriticalSection(&JrnlReplicaStateLock);
  4806. return FALSE;
  4807. }
  4808. DPRINT4(4, ":S: Replica (%d) %ws state change from %s to %s\n",
  4809. Replica->ReplicaNumber,
  4810. (Replica->ReplicaName != NULL) ? Replica->ReplicaName->Name : L"<null>",
  4811. RSS_NAME(OldState),
  4812. RSS_NAME(NewState));
  4813. //
  4814. // if no state change, we're done.
  4815. //
  4816. if (OldState == NewState) {
  4817. goto CLEANUP;
  4818. }
  4819. //
  4820. // If we went from Active to Paused and are not in Journal Replay mode
  4821. // then advance the Replica->LastUsnRecordProcessed to
  4822. // pVme->CurrentUsnRecordDone.
  4823. //
  4824. pVme = Replica->pVme;
  4825. if (pVme != NULL) {
  4826. if ((OldState == REPLICA_STATE_ACTIVE) &&
  4827. (NewState == REPLICA_STATE_PAUSED) &&
  4828. !REPLICA_REPLAY_MODE(Replica, pVme)) {
  4829. DPRINT2(4, ":U: Replica->LastUsnRecordProcessed was: %08x %08x now: %08x %08x\n",
  4830. PRINTQUAD(Replica->LastUsnRecordProcessed),
  4831. PRINTQUAD(pVme->CurrentUsnRecordDone));
  4832. FRS_ASSERT(pVme->CurrentUsnRecordDone >= Replica->LastUsnRecordProcessed);
  4833. AcquireQuadLock(&pVme->QuadWriteLock);
  4834. Replica->LastUsnRecordProcessed = pVme->CurrentUsnRecordDone;
  4835. ReleaseQuadLock(&pVme->QuadWriteLock);
  4836. }
  4837. }
  4838. //
  4839. // update the new state.
  4840. //
  4841. Replica->ServiceState = NewState;
  4842. //
  4843. // if no list change, we're done.
  4844. //
  4845. if (RSS_LIST(OldState) == RSS_LIST(NewState)) {
  4846. goto CLEANUP;
  4847. }
  4848. //
  4849. // Remove from current list and add to new list.
  4850. //
  4851. if (RSS_LIST(OldState) != NULL) {
  4852. FrsRtlRemoveEntryQueue(RSS_LIST(OldState), &Replica->ReplicaList);
  4853. }
  4854. if (RSS_LIST(NewState) != NULL) {
  4855. FrsRtlInsertTailQueue(RSS_LIST(NewState), &Replica->ReplicaList);
  4856. }
  4857. CLEANUP:
  4858. if (REPLICA_IN_ERROR_STATE(NewState) &&
  4859. !REPLICA_FSTATUS_ROOT_HAS_MOVED(Replica->FStatus)) {
  4860. //
  4861. // Post an error log entry if the replica is in
  4862. // error state but not because the root has moved.
  4863. // If the root has moved then the error log has
  4864. // already been written when the move was detected
  4865. // and this generic eventlog here might confuse the user.
  4866. //
  4867. PWCHAR WStatusUStr, FStatusUStr;
  4868. //
  4869. // Post the failure in the event log.
  4870. //
  4871. if (Replica->Root != NULL) {
  4872. WStatusUStr = L"";
  4873. FStatusUStr = FrsAtoW(ErrLabelFrs(Replica->FStatus));
  4874. EPRINT8(EVENT_FRS_REPLICA_SET_CREATE_FAIL,
  4875. Replica->SetName->Name,
  4876. ComputerDnsName,
  4877. Replica->MemberName->Name,
  4878. Replica->Root,
  4879. Replica->Stage,
  4880. JetPath,
  4881. WStatusUStr,
  4882. FStatusUStr);
  4883. FrsFree(FStatusUStr);
  4884. }
  4885. //
  4886. // Post the generic recovery steps message.
  4887. //
  4888. EPRINT1(EVENT_FRS_IN_ERROR_STATE, JetPath);
  4889. } else if (NewState == REPLICA_STATE_JRNL_WRAP_ERROR) {
  4890. //
  4891. // Get the DsPollingInteval in minutes.
  4892. //
  4893. _itow(DsPollingInterval / (60 * 1000), DsPollingIntervalStr, 10);
  4894. if(DebugInfo.EnableJrnlWrapAutoRestore) {
  4895. EPRINT4(EVENT_FRS_REPLICA_IN_JRNL_WRAP_ERROR, Replica->SetName->Name, Replica->Root,
  4896. Replica->Volume, DsPollingIntervalStr);
  4897. } else {
  4898. EPRINT4(EVENT_FRS_REPLICA_IN_JRNL_WRAP_NO_AUTO_RESTORE, Replica->SetName->Name, Replica->Root,
  4899. Replica->Volume, DsPollingIntervalStr);
  4900. }
  4901. }
  4902. LeaveCriticalSection(&JrnlReplicaStateLock);
  4903. return TRUE;
  4904. }
  4905. ULONG
  4906. JrnlPrepareService1(
  4907. PREPLICA Replica
  4908. )
  4909. /*++
  4910. Routine Description:
  4911. Open the NTFS volume journal and initialize a Volume Monitor Entry for it
  4912. if this is the first replica set to use the volume. The REPLICA struct
  4913. is initialized with a pointer to the volume monitor entry and the file
  4914. path to the root of the replica tree for use in file name generation.
  4915. Init the VME Volume Sequence Number from the Replica config record,
  4916. taking the maximum value seen so far. This value is needed before we
  4917. can do any ReplicaTreeLoad operations on a new replica so we can set
  4918. the correct value in the IDTable and DIRTable entries.
  4919. After any new replica sets are loaded JrnlPrepareService2() is
  4920. called to init the Volume Filter Table with the directory entries for
  4921. every replica set on the volume.
  4922. Arguments:
  4923. Replica - The replica set we are initializing.
  4924. Return Value:
  4925. A Win32 error status.
  4926. Replica->FStatus has the FRS Error status return.
  4927. --*/
  4928. {
  4929. #undef DEBSUB
  4930. #define DEBSUB "JrnlPrepareService1:"
  4931. ULONGLONG CurrentTime;
  4932. PCONFIG_TABLE_RECORD ConfigRecord;
  4933. ULONG WStatus;
  4934. PVOLUME_MONITOR_ENTRY pVme;
  4935. CHAR TimeStr[TIME_STRING_LENGTH];
  4936. if (Replica == NULL) {
  4937. return ERROR_INVALID_PARAMETER;
  4938. }
  4939. DPRINT1(5, ":S: JrnlPrepareService1 for %ws\n", Replica->ReplicaName->Name);
  4940. ConfigRecord = (PCONFIG_TABLE_RECORD)Replica->ConfigTable.pDataRecord;
  4941. //
  4942. // Open the journal. Return the Volume Monitor Entry and save it in
  4943. // the Replica struct.
  4944. //
  4945. WStatus = JrnlOpen(Replica, &pVme, ConfigRecord);
  4946. if (!WIN_SUCCESS(WStatus) || (pVme == NULL)) {
  4947. //
  4948. // Replica->FStatus has the FRS Error status return.
  4949. //
  4950. DPRINT_WS(0, "Error from JrnlOpen", WStatus);
  4951. return WStatus;
  4952. }
  4953. //
  4954. // Set the journal recovery range end point for this replica set.
  4955. //
  4956. Replica->JrnlRecoveryEnd = pVme->JrnlRecoveryEnd;
  4957. //
  4958. // Start the Volume sequence number from the highest value any replica set
  4959. // has used up to now. The FrsVsn is saved in a replica config record
  4960. // every time VSN_SAVE_INTERVAL VSN's have been handed out. If we crashed
  4961. // we could be low by at most VSN_SAVE_INTERVAL VSN's assuming the update
  4962. // request completed. At startup we add VSN_RESTART_INCREMENT to the
  4963. // FrsVsn to ensure we don't use the same VSN twice. Then update the
  4964. // config record so if we start handing out VSNs and crash we don't reuse
  4965. // them. Can't do update here since this Replica struct is not on the
  4966. // VolReplicaList yet.
  4967. //
  4968. // The above solution does not work in the case where the database is
  4969. // lost or restored from backup. In this case other members of the replcia
  4970. // set could have VSNs for files that we originated which are larger than
  4971. // the current VSN value we might now be using. This causes two problems:
  4972. // 1. It fouls up dampening checks when we send out local COs with
  4973. // VSNs that are too small in comparison to what we have sent out in
  4974. // the past resulting in dropped COs, and
  4975. // 2. When we VVJoin with our inbound partners and start receiving change
  4976. // orders that were originated from us in the past, they could arrive
  4977. // with VSNs that are larger than what we are now using. When these
  4978. // "VVJoin Change Orders" to thru VV retire our MasterVV entry in the
  4979. // VVretire version vector is advanced to this larger value. This
  4980. // will cause subsequent locally generated COs to be marked out of order
  4981. // since their VSN is now smaller than the value in the MasterVV entry.
  4982. // This will prevent downsream dampening problems but it could allow
  4983. // a local dir create / child file create to be reordered downstream
  4984. // (since both are marked out of order) and cause the child create to
  4985. // fail if the parent create hasn't occured yet.
  4986. //
  4987. // To deal with the above nonsense we will now use a GMT time value as
  4988. // our initial VSN. We will not join with a partner whose time is
  4989. // off by +/- MaxPartnerClockSkew. So if we start the VSN at
  4990. // GMT + 2*MaxPartnerClockSkew then even if the last CO we originated, before
  4991. // we lost the database, occurred at GMT+MaxPartnerClockSkew and now at
  4992. // restart our current time has moved back to GMT-MaxPartnerClockSkew then
  4993. // we will still join with our partner and our new starting VSN is:
  4994. // (GMT-MaxPartnerClockSkew) + 2*MaxPartnerClockSkew = GMT+MaxPartnerClockSkew
  4995. //
  4996. // This is as large as the last VSN we could have generated if the time
  4997. // between the last CO generated (the crash) and the time at recovery
  4998. // was zero.
  4999. //
  5000. GetSystemTimeAsFileTime((PFILETIME)&CurrentTime);
  5001. LOCK_VME(pVme);
  5002. if (CurrentTime < ConfigRecord->FrsVsn) {
  5003. //
  5004. // Note: This may not be an error situation since on every restart
  5005. // of the service we advance time by 2*MaxPartnerClockSkew to
  5006. // ensure monotonicity (see above) so any time we shutdown the
  5007. // service before we have run at least this amount of time it will
  5008. // appear that time has moved backwards.
  5009. //
  5010. DPRINT(1, ":S: WARNING: Setting FrsVsn - Current system Time has moved backwards from value in config record.\n");
  5011. FileTimeToString((PFILETIME) &CurrentTime, TimeStr);
  5012. DPRINT2(1, ":S: WARNING: CurrentTime is (%08x %08x) %s\n",
  5013. PRINTQUAD(CurrentTime), TimeStr);
  5014. FileTimeToString((PFILETIME) &ConfigRecord->FrsVsn, TimeStr);
  5015. DPRINT2(1, ":S: WARNING: ConfigRecord->FrsVsn is (%08x %08x) %s\n",
  5016. PRINTQUAD(ConfigRecord->FrsVsn), TimeStr);
  5017. CurrentTime = ConfigRecord->FrsVsn;
  5018. }
  5019. if ((CurrentTime + 2*MaxPartnerClockSkew) > pVme->FrsVsn) {
  5020. pVme->FrsVsn = CurrentTime + 2*MaxPartnerClockSkew;
  5021. DPRINT(3, ":S: Setting new pVme->FrsVsn to Current time + 2*MaxPartnerClockSkew\n");
  5022. }
  5023. FileTimeToString((PFILETIME) &pVme->FrsVsn, TimeStr);
  5024. DPRINT2(3, ":S: pVme->FrsVsn is (%08x %08x) %s\n", PRINTQUAD(pVme->FrsVsn), TimeStr);
  5025. if (GlobSeqNum == QUADZERO) {
  5026. //
  5027. // Init the global sequence number with the above computed VSN to keep
  5028. // it monotonically increasing.
  5029. //
  5030. EnterCriticalSection(&GlobSeqNumLock);
  5031. GlobSeqNum = pVme->FrsVsn;
  5032. LeaveCriticalSection(&GlobSeqNumLock);
  5033. }
  5034. UNLOCK_VME(pVme);
  5035. Replica->pVme = pVme;
  5036. return WStatus;
  5037. }
  5038. ULONG
  5039. JrnlPrepareService2(
  5040. IN PTHREAD_CTX ThreadCtx,
  5041. IN PREPLICA Replica
  5042. )
  5043. /*++
  5044. Routine Description:
  5045. Load the volume filter hash table with the DIRTable entries for
  5046. this replica set. Create the change order hash table for this replica
  5047. set and add the REPLICA struct to the replica list for this volume.
  5048. Enumerate through the IDTable and load the parent Fid Hash Table.
  5049. Note: This function is called from the DB Service thread since we have
  5050. to be able to pause the journal before the dir table enum can be done.
  5051. Arguments:
  5052. ThreadCtx -- ptr to the thread context (could be from journal or DB thread)
  5053. Replica - The replica set we are initializing.
  5054. Return Value:
  5055. A Win32 error status.
  5056. --*/
  5057. {
  5058. #undef DEBSUB
  5059. #define DEBSUB "JrnlPrepareService2:"
  5060. JET_ERR jerr, jerr1;
  5061. JET_TABLEID DIRTid;
  5062. CHAR DIRTableName[JET_cbNameMost];
  5063. PTABLE_CTX DIRTableCtx;
  5064. JET_TABLEID IDTid;
  5065. CHAR IDTableName[JET_cbNameMost];
  5066. PTABLE_CTX IDTableCtx;
  5067. PREPLICA_THREAD_CTX RtCtx;
  5068. PCONFIG_TABLE_RECORD ConfigRecord;
  5069. ULONG ReplicaNumber;
  5070. ULONG WStatus;
  5071. PVOLUME_MONITOR_ENTRY pVme;
  5072. JET_TABLEID FrsOpenTableSaveTid; // for FrsOpenTableMacro DEBUG
  5073. PFILTER_TABLE_ENTRY FilterEntry;
  5074. if (Replica == NULL) {
  5075. return ERROR_INVALID_PARAMETER;
  5076. }
  5077. DPRINT1(5, ":S: JrnlPrepareService2 for %ws\n", Replica->ReplicaName->Name);
  5078. ConfigRecord = (PCONFIG_TABLE_RECORD)Replica->ConfigTable.pDataRecord;
  5079. pVme = Replica->pVme;
  5080. //
  5081. // Allocate the replica thread context so we can get the directory
  5082. // filter table. Link it to the Replic context list head.
  5083. //
  5084. RtCtx = FrsAllocType(REPLICA_THREAD_TYPE);
  5085. FrsRtlInsertTailList(&Replica->ReplicaCtxListHead, &RtCtx->ReplicaCtxList);
  5086. ReplicaNumber = Replica->ReplicaNumber;
  5087. DIRTableCtx = &RtCtx->DIRTable;
  5088. //
  5089. // Open the DIR table.
  5090. //
  5091. jerr = DBS_OPEN_TABLE(ThreadCtx, DIRTableCtx, ReplicaNumber, DIRTableName, &DIRTid);
  5092. CLEANUP1_JS(0, "++ DBS_OPEN_TABLE (%s) error:", DIRTableName, jerr, RETURN_INV_DATA);
  5093. //
  5094. // Walk through the DirTable and load the data into the Volume Filter Table
  5095. // by calling JrnlInsertFilterEntry() for this Replica.
  5096. // The Replica points to the VME and the VME points to the
  5097. // volume filter table.
  5098. //
  5099. jerr = FrsEnumerateTable(ThreadCtx,
  5100. DIRTableCtx,
  5101. DFileGuidIndexx,
  5102. JrnlInsertFilterEntry,
  5103. Replica);
  5104. if ((jerr != JET_errNoCurrentRecord)) {
  5105. CLEANUP1_JS(0, "++ FrsEnumerateTable (%s) error:", DIRTableName, jerr, RETURN_INV_DATA);
  5106. }
  5107. //
  5108. // Now that all the entries are in place, walk through the hash table and
  5109. // construct the child lists for this ReplicaSet. This is done as a
  5110. // second pass since we can't be certain of the order in which the
  5111. // entries come from the database. First get the Child List Lock for the
  5112. // Replica Set.
  5113. //
  5114. JrnlAcquireChildLock(Replica);
  5115. WStatus = (ULONG)GhtEnumerateTable(pVme->FilterTable,
  5116. JrnlFilterLinkChildNoError,
  5117. Replica);
  5118. if (!WIN_SUCCESS(WStatus)) {
  5119. JrnlReleaseChildLock(Replica);
  5120. DPRINT_WS(0, "Error from JrnlLinkChildren", WStatus);
  5121. GHT_DUMP_TABLE(4, pVme->FilterTable);
  5122. goto RETURN;
  5123. }
  5124. //
  5125. // Go find the root entry for this Replica Set in the Filter Table.
  5126. //
  5127. FilterEntry = (PFILTER_TABLE_ENTRY) GhtEnumerateTable(pVme->FilterTable,
  5128. JrnlFilterGetRoot,
  5129. Replica);
  5130. if (FilterEntry == NULL) {
  5131. JrnlReleaseChildLock(Replica);
  5132. DPRINT1(0, ":S: Error from JrnlFilterGetRoot. No Root for %d\n",
  5133. Replica->ReplicaNumber);
  5134. GHT_DUMP_TABLE(5, pVme->FilterTable);
  5135. goto RETURN_INV_DATA;
  5136. }
  5137. //
  5138. // Replay the inbound log table and update the volume filter table with
  5139. // any directory changes.
  5140. //
  5141. // Note: Add code to replay the inbound log and update the filter table.
  5142. // It may be better to handle this at startup when we are recovering the
  5143. // staging areas. But, the filter table may not exist yet.
  5144. #if DBG
  5145. if (DoDebug(5, DEBSUB)) {
  5146. DPRINT(5," >>>>>>>>>>>>>>> Top Down dump of Filter Tree <<<<<<<<<<<<<<<<\n");
  5147. JrnlEnumerateFilterTreeTD(pVme->FilterTable,
  5148. FilterEntry,
  5149. JrnlSubTreePrint,
  5150. Replica);
  5151. }
  5152. #endif DBG
  5153. JrnlReleaseChildLock(Replica);
  5154. //
  5155. // Build the Parent directory table.
  5156. //
  5157. IDTableCtx = &RtCtx->IDTable;
  5158. //
  5159. // Open the ID table.
  5160. //
  5161. jerr = DBS_OPEN_TABLE(ThreadCtx, IDTableCtx, ReplicaNumber, IDTableName, &IDTid);
  5162. CLEANUP1_JS(0, "++ Building parent FID table (%s):", IDTableName, jerr, RETURN_INV_DATA);
  5163. //
  5164. // Walk through the IDTable and load the data into the Volume Parent Dir
  5165. // Table by calling JrnlInsertParentEntry() for this Replica.
  5166. // The Replica points to the VME and the VME points to the
  5167. // parent dir table.
  5168. //
  5169. jerr = FrsEnumerateTable(ThreadCtx,
  5170. IDTableCtx,
  5171. GuidIndexx,
  5172. JrnlInsertParentEntry,
  5173. Replica);
  5174. if ((jerr != JET_errNoCurrentRecord)) {
  5175. CLEANUP1_JS(0, "++ FrsEnumerateTable (%s) error:", IDTableName, jerr, RETURN_INV_DATA);
  5176. }
  5177. //
  5178. // Replay the inbound log table and update the volume Parent Dir table
  5179. // for any file creates, deletes or renames.
  5180. //
  5181. // Note: Add code to replay the inbound log and update the Parent Dir table.
  5182. // It may be better to handle this at startup when we are recovering the
  5183. // staging areas. But, the filter table may not exist yet.
  5184. //
  5185. // Add the replica struct to the list of replica sets served by this
  5186. // volume journal.
  5187. //
  5188. if (AcquireVmeRef(pVme) == 0) {
  5189. WStatus = ERROR_OPERATION_ABORTED;
  5190. goto RETURN;
  5191. }
  5192. /////////////////////////////////////////////////
  5193. //
  5194. // Start the first read on the volume. Check first if it is PAUSED and
  5195. // set state to starting. If this is the first replica set on the volume
  5196. // the state will be INITIALIZING and we leave that alone so additional
  5197. // journal buffers get allocated.
  5198. //
  5199. // pVme = Replica->pVme;
  5200. if (pVme->JournalState != JRNL_STATE_INITIALIZING) {
  5201. if (pVme->JournalState == JRNL_STATE_PAUSED) {
  5202. SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_STARTING);
  5203. } else {
  5204. DPRINT2(0, "++ ERROR - Journal for %ws is in an unexpected state: %s\n",
  5205. Replica->ReplicaName->Name, RSS_NAME(pVme->JournalState));
  5206. SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_ERROR);
  5207. WStatus = ERROR_OPERATION_ABORTED;
  5208. goto RETURN;
  5209. }
  5210. }
  5211. //
  5212. // Initialize the LastUsnRecordProcessed for this replica set to the value
  5213. // saved in the config record or the value from the Inlog record with the
  5214. // largest USN so we don't reprocess them. If we end up reading (replaying)
  5215. // the journal at an earlier point to let another replica set catch up we
  5216. // need to ignore those old records. If LastShutdown or FSVolLastUSN is 0
  5217. // then this is the very first time we have started replication on this
  5218. // replica set so set the FSVolLastUSN and LastUsnRecordProcessed to the
  5219. // current journal read point, pVme->JrnlReadPoint.
  5220. //
  5221. if ((ConfigRecord->LastShutdown == 0) ||
  5222. (ConfigRecord->FSVolLastUSN == 0)) {
  5223. if (!(ConfigRecord->ServiceState == CNF_SERVICE_STATE_CREATING)) {
  5224. DPRINT2(0, ":S: BETA ERROR - Service state is %d; not _CREATING for %ws\n",
  5225. ConfigRecord->ServiceState, Replica->ReplicaName->Name);
  5226. }
  5227. ConfigRecord->FSVolLastUSN = pVme->JrnlReadPoint;
  5228. Replica->LastUsnRecordProcessed = pVme->JrnlReadPoint;
  5229. DPRINT1(4, ":S: Replica->LastUsnRecordProcessed is: %08x %08x\n", PRINTQUAD(Replica->LastUsnRecordProcessed));
  5230. } else {
  5231. //
  5232. // Start where we left off and minimize with any other replicas.
  5233. //
  5234. Replica->LastUsnRecordProcessed = ConfigRecord->FSVolLastUSN;
  5235. DPRINT1(4, ":S: Replica->LastUsnRecordProcessed is: %08x %08x\n", PRINTQUAD(Replica->LastUsnRecordProcessed));
  5236. //
  5237. // Advance to largest USN of Inlog record.
  5238. //
  5239. if (Replica->JrnlRecoveryStart > Replica->LastUsnRecordProcessed) {
  5240. Replica->LastUsnRecordProcessed = Replica->JrnlRecoveryStart;
  5241. DPRINT1(4, ":S: Replica->LastUsnRecordProcessed is: %08x %08x (JrnlRecoveryStart > LastUsnRecordProcessed)\n",
  5242. PRINTQUAD(Replica->LastUsnRecordProcessed));
  5243. }
  5244. //
  5245. // start at the earliest USN of any replica set on the volume.
  5246. // If the journal is active it is currently using JrnlReadPoint to
  5247. // track its current read point. Since we may be starting a replica
  5248. // set on an active volume ReplayUsn is used to save the starting
  5249. // point. After the volume is paused and then unpaused ReplayUsn
  5250. // is copied to JrnlReadPoint where the journal will start reading.
  5251. //
  5252. if (pVme->ReplayUsnValid) {
  5253. DPRINT1(4, ":S: ReplayUsn was: %08x %08x\n", PRINTQUAD(pVme->ReplayUsn));
  5254. pVme->ReplayUsn = min(Replica->LastUsnRecordProcessed, pVme->ReplayUsn);
  5255. } else {
  5256. DPRINT(4, ":S: No ReplayUsn was active.\n");
  5257. pVme->ReplayUsn = Replica->LastUsnRecordProcessed;
  5258. pVme->ReplayUsnValid = TRUE;
  5259. }
  5260. DPRINT1(4, ":S: ReplayUsn is: %08x %08x\n", PRINTQUAD(pVme->ReplayUsn));
  5261. }
  5262. //
  5263. // Init the inlog commit point so if we shutdown the saved value is correct.
  5264. //
  5265. Replica->InlogCommitUsn = Replica->LastUsnRecordProcessed;
  5266. DPRINT1(4, ":S: Replica->InlogCommitUsn: %08x %08x\n",
  5267. PRINTQUAD(Replica->InlogCommitUsn));
  5268. //
  5269. // Track the oldest USN save point and the most recent USN progress point
  5270. // for any replica set on the volume.
  5271. //
  5272. if ((pVme->LastUsnSavePoint == (USN)0) ||
  5273. (pVme->LastUsnSavePoint > Replica->LastUsnRecordProcessed)) {
  5274. pVme->LastUsnSavePoint = Replica->LastUsnRecordProcessed;
  5275. }
  5276. if (pVme->MonitorMaxProgressUsn < Replica->LastUsnRecordProcessed) {
  5277. pVme->MonitorMaxProgressUsn = Replica->LastUsnRecordProcessed;
  5278. }
  5279. //
  5280. // This replica's FrsVsn may be out of date by a large margin
  5281. // if it has been awhile since the set was last started successfully.
  5282. // This results in an assert in DbsReplicaSaveMark(). So, as
  5283. // long as the FrsVsns look sane, assign the volume's current
  5284. // Vsn to the replica set.
  5285. //
  5286. FRS_ASSERT(pVme->FrsVsn >= ConfigRecord->FrsVsn);
  5287. ConfigRecord->FrsVsn = pVme->FrsVsn;
  5288. /////////////////////////////////////////////////
  5289. InitializeListHead(&Replica->RecoveryRefreshList);
  5290. InterlockedIncrement(&Replica->ReferenceCount);
  5291. pVme->ActiveReplicas += 1;
  5292. FrsRtlInsertTailList(&pVme->ReplicaListHead, &Replica->VolReplicaList);
  5293. WStatus = ERROR_SUCCESS;
  5294. RETURN:
  5295. //
  5296. // Close the replica tables and release the RtCtx struct.
  5297. //
  5298. DbsFreeRtCtx(ThreadCtx, Replica, RtCtx, TRUE);
  5299. return WStatus;
  5300. RETURN_INV_DATA:
  5301. DbsFreeRtCtx(ThreadCtx, Replica, RtCtx, TRUE);
  5302. return (jerr == JET_errTermInProgress) ? ERROR_OPERATION_ABORTED : ERROR_INVALID_DATA;
  5303. }
  5304. JET_ERR
  5305. JrnlInsertParentEntry(
  5306. IN PTHREAD_CTX ThreadCtx,
  5307. IN PTABLE_CTX TableCtx,
  5308. IN PVOID Record,
  5309. IN PVOID Context
  5310. )
  5311. /*++
  5312. Routine Description:
  5313. This is a worker function passed to FrsEnumerateTable(). Each time
  5314. it is called with an IDTable record it save the parent info in the
  5315. Parent Directory Table for the volume.
  5316. Arguments:
  5317. ThreadCtx - Needed to access Jet.
  5318. TableCtx - A ptr to an IDTable context struct.
  5319. Record - A ptr to a IDTable record.
  5320. Context - A ptr to a Replica struct.
  5321. Thread Return Value:
  5322. A Jet error status. Success means call us with the next record.
  5323. Failure means don't call again and pass our status back to the
  5324. caller of FrsEnumerateTable().
  5325. --*/
  5326. {
  5327. #undef DEBSUB
  5328. #define DEBSUB "JrnlInsertParentEntry:"
  5329. ULONGLONG SystemTime;
  5330. ULONGLONG ExpireTime;
  5331. JET_ERR jerr;
  5332. ULONG GStatus;
  5333. PIDTABLE_RECORD IDTableRec = (PIDTABLE_RECORD) Record ;
  5334. PQHASH_TABLE HashTable = ((PREPLICA) Context)->pVme->ParentFidTable;
  5335. //
  5336. // Abort enum if shutting down.
  5337. //
  5338. if (FrsIsShuttingDown) {
  5339. return JET_errTermInProgress;
  5340. }
  5341. //
  5342. // Check for expired tombstones.
  5343. //
  5344. if (IsIdRecFlagSet(IDTableRec, IDREC_FLAGS_DELETED)) {
  5345. GetSystemTimeAsFileTime((PFILETIME)&SystemTime);
  5346. COPY_TIME(&ExpireTime, &IDTableRec->TombStoneGC);
  5347. if ((ExpireTime < SystemTime) && (ExpireTime != QUADZERO)) {
  5348. //
  5349. // IDTable record has expired. Delete it.
  5350. // If there is a problem, complain but keep going.
  5351. //
  5352. jerr = DbsDeleteTableRecord(TableCtx);
  5353. DPRINT_JS(0, "ERROR - DbsDeleteTableRecord :", jerr);
  5354. return JET_errSuccess;
  5355. }
  5356. }
  5357. //
  5358. // Include the entry if replication is enabled and not marked for deletion
  5359. // and not a new file being created when we last shutdown.
  5360. //
  5361. if (IDTableRec->ReplEnabled &&
  5362. !IsIdRecFlagSet(IDTableRec, IDREC_FLAGS_DELETED) &&
  5363. !IsIdRecFlagSet(IDTableRec, IDREC_FLAGS_NEW_FILE_IN_PROGRESS)) {
  5364. if (IDTableRec->FileID == ZERO_FID) {
  5365. //
  5366. // We shouldn't see any records with a zero FID but some prior
  5367. // bugs could cause this to happen. Dump em out but don't try
  5368. // to insert into table since it will assert.
  5369. //
  5370. DPRINT(0, "++ WARNING -- IDTable record with zero FID found.\n");
  5371. DBS_DISPLAY_RECORD_SEV(0, TableCtx, TRUE);
  5372. } else {
  5373. GStatus = QHashInsert(HashTable,
  5374. &IDTableRec->FileID,
  5375. &IDTableRec->ParentFileID,
  5376. ((PREPLICA) Context)->ReplicaNumber,
  5377. FALSE);
  5378. if (GStatus != GHT_STATUS_SUCCESS ) {
  5379. DPRINT1(0, "++ QHashInsert error: %d\n", GStatus);
  5380. }
  5381. }
  5382. }
  5383. //
  5384. // Return success so we can keep going thru the ID table.
  5385. //
  5386. return JET_errSuccess;
  5387. }
  5388. ULONG_PTR
  5389. JrnlFilterLinkChild (
  5390. PGENERIC_HASH_TABLE Table,
  5391. PVOID Buffer,
  5392. PVOID Context
  5393. )
  5394. /*++
  5395. Routine Description:
  5396. This function is called thru GhtEnumerateTable() to connect this
  5397. filter table entry to the parent list for the replica set passed in
  5398. Context. The GhtEnumerateTable function does not acquire any row locks
  5399. so this function is free to call GhtLookup or GhtInsert without deadlock
  5400. conflicts. It is assumed that the caller knows that it is safe to
  5401. enumerate the table. The caller is also responsible for getting the
  5402. child list lock for the replica set before calling GhtEnumerateTable().
  5403. The child list lock is associated with the replica set so when you have
  5404. the lock the child list entries for all filter entries in this replica
  5405. set are protected. When we enumerate down a subtree we only need to get
  5406. one lock.
  5407. WARNING - There is no table level lock on the Filter Table. The Filter
  5408. table is per volume so multiple replica sets could be using the same
  5409. table. The locking is at the row level where the row is indexed by
  5410. the hash function. This means that this function can only be used
  5411. when the Journal is paused. To start/add a replica set after the
  5412. system is running you must pause the journal, update the filter table
  5413. and then unpause the journal.
  5414. Arguments:
  5415. Table - the hash table being enumerated (to lookup parent entry).
  5416. Buffer - a ptr to a FILTER_TABLE_ENTRY
  5417. Context - A pointer to the Replica struct for the replica data added to the
  5418. table.
  5419. Return Value:
  5420. A Win32 error status. A failure status return aborts enumeration.
  5421. --*/
  5422. {
  5423. #undef DEBSUB
  5424. #define DEBSUB "JrnlFilterLinkChild:"
  5425. PFILTER_TABLE_ENTRY FilterEntry = (PFILTER_TABLE_ENTRY) Buffer;
  5426. PREPLICA Replica = (PREPLICA) Context;
  5427. PFILTER_TABLE_ENTRY ParentFilterEntry;
  5428. ULONG GStatus;
  5429. //
  5430. // Skip entry if it is not associated with the replica set of interest.
  5431. //
  5432. if (FilterEntry->Replica != Replica) {
  5433. return ERROR_SUCCESS;
  5434. }
  5435. //
  5436. // If this is the root of the replica tree there is no parent to link it to.
  5437. //
  5438. if (FilterEntry->DParentFileID == ZERO_FID) {
  5439. return ERROR_SUCCESS;
  5440. }
  5441. //
  5442. // If this entry has already been linked then return an error status to
  5443. // abort the enumeration since the entry can't be on more than one list.
  5444. //
  5445. if (FilterEntry->ChildEntry.Flink != NULL) {
  5446. return ERROR_GEN_FAILURE;
  5447. }
  5448. //
  5449. // Find the parent to link this child to.
  5450. //
  5451. GStatus = GhtLookup(Table,
  5452. &FilterEntry->DParentFileID,
  5453. TRUE,
  5454. &ParentFilterEntry);
  5455. if (GStatus != GHT_STATUS_SUCCESS) {
  5456. DPRINT1(0, "++ Error: Parent entry not found for - %08x\n", FilterEntry);
  5457. FRS_JOURNAL_FILTER_PRINT(0, Table, FilterEntry);
  5458. return ERROR_GEN_FAILURE;
  5459. }
  5460. //
  5461. // Put the Dir on the list and drop the ref count we got from Lookup.
  5462. //
  5463. InsertHeadList(&ParentFilterEntry->ChildHead, &FilterEntry->ChildEntry);
  5464. GhtDereferenceEntryByAddress(Table, ParentFilterEntry, TRUE);
  5465. return ERROR_SUCCESS;
  5466. }
  5467. ULONG_PTR
  5468. JrnlFilterLinkChildNoError(
  5469. PGENERIC_HASH_TABLE Table,
  5470. PVOID Buffer,
  5471. PVOID Context
  5472. )
  5473. /*++
  5474. Routine Description:
  5475. See JrnlFilterLinkChild().
  5476. A dirtable entry may appear to be orphaned if it is stuck in the
  5477. preinstall area and its parent has been deleted. Ignore errors
  5478. for now.
  5479. This can also happen if a remote co create is executed for a dir at the
  5480. same time the subtree containing this dir is being moved out of the
  5481. replica tree. The journal code will remove the filter entries immediately
  5482. so we skip future file changes in the subtree. So the parent is gone when
  5483. the filter entry for the dir create is added. In the course of processing
  5484. the moveout on the parent this dir entry is cleaned up.
  5485. Arguments:
  5486. Table - the hash table being enumerated (to lookup parent entry).
  5487. Buffer - a ptr to a FILTER_TABLE_ENTRY
  5488. Context - A pointer to the Replica struct for the replica data added to the
  5489. table.
  5490. Return Value:
  5491. ERROR_SUCCESS
  5492. --*/
  5493. {
  5494. #undef DEBSUB
  5495. #define DEBSUB "JrnlFilterLinkChildNoError:"
  5496. ULONG WStatus;
  5497. WStatus = (ULONG)JrnlFilterLinkChild(Table, Buffer, Context);
  5498. DPRINT_WS(0, "++ WARN - orphaned dir; probably stuck in preinstall with deleted parent", WStatus);
  5499. return ERROR_SUCCESS;
  5500. }
  5501. ULONG
  5502. JrnlFilterUnlinkChild (
  5503. PGENERIC_HASH_TABLE Table,
  5504. PVOID Buffer,
  5505. PVOID Context
  5506. )
  5507. /*++
  5508. Routine Description:
  5509. This function is unlinks a filter entry from the child list.
  5510. The caller must get the child list lock for the replica set.
  5511. The child list lock is associated with the replica set so when you have
  5512. the lock the child list entries for all filter entries in this replica
  5513. set are protected. When we enumerate down a subtree we only need to get
  5514. one lock.
  5515. Arguments:
  5516. Table - the hash table being enumerated (to lookup parent entry).
  5517. Buffer - a ptr to a FILTER_TABLE_ENTRY
  5518. Context - A pointer to the Replica struct for the replica data added to the
  5519. table.
  5520. Return Value:
  5521. A Win32 error status. A failure status return aborts enumeration.
  5522. --*/
  5523. {
  5524. #undef DEBSUB
  5525. #define DEBSUB "JrnlFilterUnlinkChild:"
  5526. PFILTER_TABLE_ENTRY FilterEntry = (PFILTER_TABLE_ENTRY) Buffer;
  5527. PREPLICA Replica = (PREPLICA) Context;
  5528. PFILTER_TABLE_ENTRY ParentFilterEntry;
  5529. ULONG GStatus;
  5530. //
  5531. // Skip entry if it is not associated with the replica set of interest.
  5532. // Return error_success so this function can be called by GhtEnumerateTable().
  5533. //
  5534. if (FilterEntry->Replica != Replica) {
  5535. return ERROR_SUCCESS;
  5536. }
  5537. //
  5538. // If this entry is not on the list then return an error status to
  5539. // abort the enumeration.
  5540. //
  5541. if (FilterEntry->ChildEntry.Flink == NULL) {
  5542. return ERROR_GEN_FAILURE;
  5543. }
  5544. //
  5545. // Pull the entry off the list.
  5546. //
  5547. FrsRemoveEntryList(&FilterEntry->ChildEntry);
  5548. FilterEntry->ChildEntry.Flink = NULL;
  5549. FilterEntry->ChildEntry.Blink = NULL;
  5550. return ERROR_SUCCESS;
  5551. }
  5552. ULONG_PTR
  5553. JrnlFilterGetRoot (
  5554. PGENERIC_HASH_TABLE Table,
  5555. PVOID Buffer,
  5556. PVOID Context
  5557. )
  5558. /*++
  5559. Routine Description:
  5560. This function is called thru GhtEnumerateTable() to find the root
  5561. of the replica set specified by the Context parameter.
  5562. Arguments:
  5563. Table - the hash table being enumerated (to lookup parent entry).
  5564. Buffer - a ptr to a FILTER_TABLE_ENTRY
  5565. Context - A pointer to the Replica struct for the replica data added to the
  5566. table.
  5567. Return Value:
  5568. The root filter entry for the Replica Set, else NULL to keep looking.
  5569. --*/
  5570. {
  5571. #undef DEBSUB
  5572. #define DEBSUB "JrnlFilterGetRoot:"
  5573. PFILTER_TABLE_ENTRY FilterEntry = (PFILTER_TABLE_ENTRY) Buffer;
  5574. PREPLICA Replica = (PREPLICA) Context;
  5575. //
  5576. // Skip entry if it is not associated with the replica set of interest.
  5577. //
  5578. if (FilterEntry->Replica != Replica) {
  5579. return (ULONG_PTR)NULL;
  5580. }
  5581. //
  5582. // If this is the root of the replica tree we're done.
  5583. //
  5584. if (FilterEntry->DParentFileID == ZERO_FID) {
  5585. return (ULONG_PTR)FilterEntry;
  5586. }
  5587. return (ULONG_PTR)NULL;
  5588. }
  5589. ULONG
  5590. JrnlSubTreePrint (
  5591. PGENERIC_HASH_TABLE Table,
  5592. PVOID Buffer,
  5593. PVOID Context
  5594. )
  5595. /*++
  5596. Routine Description:
  5597. This function is called thru GhtEnumerateTable() to dump a Filter entry.
  5598. The enum caller takes a ref on the entry. we drop it here.
  5599. Arguments:
  5600. Table - the hash table being enumerated (to lookup parent entry).
  5601. Buffer - a ptr to a FILTER_TABLE_ENTRY
  5602. Context - A pointer to the Replica struct for the replica data added to the
  5603. table.
  5604. Return Value:
  5605. Win32 status
  5606. --*/
  5607. {
  5608. #undef DEBSUB
  5609. #define DEBSUB "JrnlSubTreePrint:"
  5610. PFILTER_TABLE_ENTRY FilterEntry = (PFILTER_TABLE_ENTRY) Buffer;
  5611. PREPLICA Replica = (PREPLICA) Context;
  5612. //
  5613. // Abort enum if shutting down.
  5614. //
  5615. if (FrsIsShuttingDown) {
  5616. return ERROR_OPERATION_ABORTED;
  5617. }
  5618. //
  5619. // print the entry if it is associated with the replica set of interest.
  5620. //
  5621. if (FilterEntry->Replica == Replica) {
  5622. FRS_JOURNAL_FILTER_PRINT(4, Table, FilterEntry);
  5623. }
  5624. DECREMENT_FILTER_REF_COUNT(FilterEntry);
  5625. return ERROR_SUCCESS;
  5626. }
  5627. BOOL
  5628. ActiveChildrenKeyMatch(
  5629. PVOID Buf,
  5630. PVOID QKey
  5631. )
  5632. /*++
  5633. Routine Description:
  5634. Check for an exact key match.
  5635. Arguments:
  5636. Buf -- ptr to a Guid1.
  5637. QKey -- ptr to Guid2.
  5638. Return Value:
  5639. TRUE if exact match.
  5640. --*/
  5641. {
  5642. #undef DEBSUB
  5643. #define DEBSUB "ActiveChildrenKeyMatch:"
  5644. PULONG pUL1, pUL2;
  5645. pUL1 = (PULONG) Buf;
  5646. pUL2 = (PULONG) QKey;
  5647. if (!ValueIsMultOf4(pUL1)) {
  5648. DPRINT2(0, "ERROR - Unaligned key value - addr: %08x, Data: %08x\n", pUL1, *pUL1);
  5649. FRS_ASSERT(ValueIsMultOf4(pUL1));
  5650. return 0xFFFFFFFF;
  5651. }
  5652. if (!ValueIsMultOf4(pUL2)) {
  5653. DPRINT2(0, "ERROR - Unaligned key value - addr: %08x, Data: %08x\n", pUL2, *pUL2);
  5654. FRS_ASSERT(ValueIsMultOf4(pUL2));
  5655. return 0xFFFFFFFF;
  5656. }
  5657. return GUIDS_EQUAL(pUL1, pUL2);
  5658. }
  5659. ULONG
  5660. ActiveChildrenHashCalc(
  5661. PVOID Buf,
  5662. PULONGLONG QKey
  5663. )
  5664. /*++
  5665. Routine Description:
  5666. Calculate a hash value for the file guid used in the ActiveChildren Table.
  5667. Arguments:
  5668. Buf -- ptr to a Guid.
  5669. QKey -- Returned 8 byte hash key for the QKey field of QHASH_ENTRY.
  5670. Return Value:
  5671. 32 bit hash value.
  5672. --*/
  5673. {
  5674. #undef DEBSUB
  5675. #define DEBSUB "ActiveChildrenHashCalc:"
  5676. PULONG pUL = (PULONG) Buf;
  5677. PUSHORT pUS = (PUSHORT) Buf;
  5678. if (!ValueIsMultOf4(pUL)) {
  5679. DPRINT2(0, "ERROR - Unaligned key value - addr: %08x, Data: %08x\n", pUL, *pUL);
  5680. FRS_ASSERT(ValueIsMultOf4(pUL));
  5681. return 0xFFFFFFFF;
  5682. }
  5683. //
  5684. // Calc QKey, 4 byte hash is ok.
  5685. //
  5686. *QKey = (ULONGLONG) (pUL[0] ^ pUL[1] ^ pUL[2] ^ pUL[3]);
  5687. //
  5688. // Calc hash based on the time. Include node part for remote COs.
  5689. //
  5690. return (ULONG) (pUS[0] ^ pUS[1] ^ pUS[2] ^ pUS[6] ^ pUS[7]);
  5691. }
  5692. ULONG
  5693. JrnlOpen(
  5694. IN PREPLICA Replica,
  5695. OUT PVOLUME_MONITOR_ENTRY *pVmeArg,
  5696. PCONFIG_TABLE_RECORD ConfigRecord
  5697. )
  5698. /*++
  5699. Routine Description:
  5700. This routine opens the journal specified by the Replica->Volume parameter.
  5701. It creates and fills in a Volume monitor entry that can
  5702. be used to read the NTFS Journal. It checks if objects and object IDs
  5703. are supported on the volume and fails if they aren't. It checks for an
  5704. object ID on the root directory of the volume and puts one there if necessary.
  5705. It keeps a list of volumes (VolumeMonitorQueue) that currently have journal
  5706. files open. If it finds this request in the list it bumps the ref count
  5707. and returns. pVme is set to NULL with status success indicating I/O
  5708. on the journal is proceeding.
  5709. If this volume is not in the list then it is added. The volume Object ID
  5710. is used to identify the volume in the Volume Monitor list. A read
  5711. is not posted to the journal at this time. This allows journal opens for
  5712. other replica sets to be done so we start out at the lowest USN of all
  5713. replica sets hosted by a given volume. In addition we need to know about
  5714. all current replica sets when we start filtering journal entries.
  5715. The volume monitor entry related to to the given replica set is
  5716. returned in pVme. If we fail to open the journal pVmeArg is NULL
  5717. and status indicates the failure.
  5718. If the journal doesn't exist it is created. The max size is set to
  5719. JRNL_DEFAULT_MAX_SIZE MB with an allocation size of
  5720. JRNL_DEFAULT_ALLOC_DELTA MB.
  5721. The following checks are made to make sure that the volume and journal
  5722. info is not changed while the service was not running.
  5723. VOLUME ROOT OBJECTID MISMATCH CHECK:
  5724. In case of a mismatch the information in the Db is updated with the
  5725. correct value for the volume guid.
  5726. JOURNAL ID MISMATCH CHECK:
  5727. In case of a mismatch the replica set is marked to be deleted.
  5728. Arguments:
  5729. Replica: Replica being opened
  5730. pVmeArg: A pointer to return the Volume Monitor Entry in.
  5731. ConfigRecord: The ConfigTqable record for this replica set.
  5732. Return Value:
  5733. Win32 status
  5734. --*/
  5735. {
  5736. #undef DEBSUB
  5737. #define DEBSUB "JrnlOpen:"
  5738. USN_JOURNAL_DATA UsnJournalData;
  5739. CREATE_USN_JOURNAL_DATA CreateUsnJournalData = {
  5740. 0, // MaximumSize from registry
  5741. JRNL_DEFAULT_ALLOC_DELTA // AllocationDelta
  5742. };
  5743. IO_STATUS_BLOCK Iosb;
  5744. ULONG JournalSize;
  5745. NTSTATUS Status;
  5746. DWORD WStatus;
  5747. ULONG BytesReturned;
  5748. PVOLUME_MONITOR_ENTRY pVme;
  5749. HANDLE RootHandle;
  5750. HANDLE VolumeHandle = INVALID_HANDLE_VALUE;
  5751. ULONG VolumeInfoLength;
  5752. PFILE_FS_VOLUME_INFORMATION VolumeInfo;
  5753. FILE_OBJECTID_BUFFER ObjectIdBuffer;
  5754. PLIST_ENTRY Entry;
  5755. WCHAR VolumeRootDir[MAX_PATH + 1];
  5756. CHAR GuidStr[GUID_CHAR_LEN];
  5757. CHAR TimeString[TIME_STRING_LENGTH];
  5758. CHAR HashTableName[40];
  5759. PCOMMAND_PACKET CmdPkt = NULL;
  5760. HANDLE DummyHandle = INVALID_HANDLE_VALUE;
  5761. ULARGE_INTEGER FreeBytesAvailableToCaller;
  5762. ULARGE_INTEGER TotalNumberOfBytes;
  5763. *pVmeArg = NULL;
  5764. //
  5765. // Does the volume exist and is it NTFS?
  5766. //
  5767. WStatus = FrsVerifyVolume(Replica->Volume,
  5768. Replica->SetName->Name,
  5769. FILE_PERSISTENT_ACLS | FILE_SUPPORTS_OBJECT_IDS);
  5770. if (!WIN_SUCCESS(WStatus)) {
  5771. DPRINT2_WS(3, ":S: JrnlOpen - Root path Volume (%ws) for %ws does not exist or is not NTFS;",
  5772. Replica->Volume, Replica->SetName->Name, WStatus);
  5773. Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
  5774. return WStatus;
  5775. }
  5776. //
  5777. // "\\.\" is used as an escape prefix to prevent the name translator
  5778. // from appending a trailing "\" on a drive letter. Need to do a volume open.
  5779. // \\.\E: gets mapped to E: (really an NT internal device name)
  5780. // \\.\E:\ gets mapped to E:\
  5781. // E: gets mapped to E:\
  5782. // E:\ gets mapped to E:\
  5783. //
  5784. //
  5785. // Get a volume handle.
  5786. //
  5787. _wcsupr( Replica->Volume );
  5788. VolumeHandle = CreateFile(Replica->Volume,
  5789. GENERIC_READ | GENERIC_WRITE,
  5790. FILE_SHARE_READ | FILE_SHARE_WRITE,
  5791. NULL,
  5792. OPEN_EXISTING,
  5793. FILE_ATTRIBUTE_NORMAL,
  5794. NULL );
  5795. if (!HANDLE_IS_VALID(VolumeHandle)) {
  5796. WStatus = GetLastError();
  5797. DPRINT1_WS(0, "++ ERROR - JrnlOpen: Unable to open %ws volume :",
  5798. Replica->Volume, WStatus);
  5799. Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
  5800. return WStatus;
  5801. } else {
  5802. WStatus = GetLastError();
  5803. DPRINT1_WS(4, "++ JrnlOpen: Open on volume %ws :", Replica->Volume, WStatus);
  5804. }
  5805. //
  5806. // Get the volume information.
  5807. //
  5808. pVme = FrsAllocType(VOLUME_MONITOR_ENTRY_TYPE);
  5809. pVme->FrsVsn = QUADZERO;
  5810. pVme->ReplayUsnValid = FALSE;
  5811. VolumeInfoLength = sizeof(FILE_FS_VOLUME_INFORMATION) +
  5812. MAXIMUM_VOLUME_LABEL_LENGTH;
  5813. VolumeInfo = &pVme->FSVolInfo;
  5814. Status = NtQueryVolumeInformationFile(VolumeHandle,
  5815. &Iosb,
  5816. VolumeInfo,
  5817. VolumeInfoLength,
  5818. FileFsVolumeInformation);
  5819. if ( NT_SUCCESS(Status) ) {
  5820. VolumeInfo->VolumeLabel[VolumeInfo->VolumeLabelLength/2] = UNICODE_NULL;
  5821. FileTimeToString((PFILETIME) &VolumeInfo->VolumeCreationTime, TimeString);
  5822. DPRINT5(4,":S: %-16ws (%d), %s, VSN: %08X, VolCreTim: %s\n",
  5823. VolumeInfo->VolumeLabel,
  5824. VolumeInfo->VolumeLabelLength,
  5825. (VolumeInfo->SupportsObjects ? "(obj)" : "(no-obj)"),
  5826. VolumeInfo->VolumeSerialNumber,
  5827. TimeString);
  5828. if (!VolumeInfo->SupportsObjects) {
  5829. //
  5830. // No object support on the volume.
  5831. //
  5832. EPRINT4(EVENT_FRS_VOLUME_NOT_SUPPORTED,
  5833. Replica->SetName->Name, ComputerName, Replica->Root, Replica->Volume);
  5834. DPRINT(0, ":S: ERROR - Object IDs are not supported on the volume.\n");
  5835. pVme = FrsFreeType(pVme);
  5836. FRS_CLOSE(VolumeHandle);
  5837. Replica->FStatus = FrsErrorUnsupportedFileSystem;
  5838. return FrsSetLastNTError(STATUS_NOT_IMPLEMENTED);
  5839. }
  5840. //
  5841. // Scan the VolumeMonitorStopQueue to see if we already tried
  5842. // this one and failed.
  5843. //
  5844. ForEachListEntry( &VolumeMonitorStopQueue, VOLUME_MONITOR_ENTRY, ListEntry,
  5845. if (pE->FSVolInfo.VolumeSerialNumber == VolumeInfo->VolumeSerialNumber) {
  5846. //
  5847. // Journaling was stopped on this volume by request. E.g.,
  5848. // when a replica set is stopped and restarted in order
  5849. // to pick up a new file or dir filter list.
  5850. //
  5851. // Allow the restart.
  5852. //
  5853. if (WIN_SUCCESS(pE->WStatus)) {
  5854. //
  5855. // No more references; free the memory
  5856. //
  5857. //
  5858. // Currently, replica sets continue to refererence
  5859. // their Vme even after VmeDeactivate(). So don't
  5860. // free Vmes regardless of their reference count
  5861. //
  5862. // if (pE->ReferenceCount == 0) {
  5863. // FrsRtlRemoveEntryQueueLock(&VolumeMonitorStopQueue,
  5864. // &pE->ListEntry);
  5865. // FrsFreeType(pE);
  5866. // }
  5867. continue;
  5868. }
  5869. //
  5870. // We already tried this one and failed. Free the entry,
  5871. // close the handle and return with same status as last time.
  5872. //
  5873. WStatus = pE->WStatus;
  5874. ReleaseListLock(&VolumeMonitorStopQueue);
  5875. DPRINT3(4,":S: VME is on stop queue. %-16ws, VSN: %08X, VolCreTim: %s\n",
  5876. VolumeInfo->VolumeLabel, VolumeInfo->VolumeSerialNumber,
  5877. TimeString);
  5878. FrsFreeType(pVme);
  5879. FRS_CLOSE(VolumeHandle);
  5880. return WStatus;
  5881. }
  5882. );
  5883. } else {
  5884. DPRINT_NT(0, ":S: ERROR - Volume root QueryVolumeInformationFile failed.", Status);
  5885. pVme = FrsFreeType(pVme);
  5886. FRS_CLOSE(VolumeHandle);
  5887. Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
  5888. return FrsSetLastNTError(Status);
  5889. }
  5890. //
  5891. // Get the volume root dir object ID.
  5892. // Always open the replica root by masking off the FILE_OPEN_REPARSE_POINT flag
  5893. // because we want to open the destination dir not the junction if the root
  5894. // happens to be a mount point.
  5895. //
  5896. wsprintf( VolumeRootDir, TEXT("%ws\\"), Replica->Volume);
  5897. WStatus = FrsOpenSourceFileW(&RootHandle,
  5898. VolumeRootDir,
  5899. WRITE_ACCESS, OPEN_OPTIONS & ~FILE_OPEN_REPARSE_POINT);
  5900. if (WIN_ACCESS_DENIED(WStatus)) {
  5901. //
  5902. // For some mysterious reason the root dir on some volumes ends up
  5903. // with the read-only attribute set. It is currently not understood
  5904. // how this happens (as of 6/2000) but PSS has seen it on a number
  5905. // of cases, generally when DCPromo fails because FRS can't init
  5906. // the sys vol. We are going to just clear it here and try again.
  5907. // Unfortunately the ATTRIB cmd does not work on the root dir.
  5908. //
  5909. FILE_BASIC_INFORMATION BasicInfo;
  5910. HANDLE hFile;
  5911. WStatus = FrsOpenSourceFileW(&hFile,
  5912. VolumeRootDir,
  5913. READ_ATTRIB_ACCESS | FILE_WRITE_ATTRIBUTES,
  5914. OPEN_OPTIONS & ~FILE_OPEN_REPARSE_POINT);
  5915. DPRINT1_WS(0, "++ JrnlOpen: Open on root dir %ws :", VolumeRootDir, WStatus);
  5916. if (HANDLE_IS_VALID(hFile)) {
  5917. Status = NtQueryInformationFile( hFile,
  5918. &Iosb,
  5919. &BasicInfo,
  5920. sizeof( BasicInfo ),
  5921. FileBasicInformation );
  5922. if (NT_SUCCESS( Status )) {
  5923. DPRINT2(0,"Attributes for %s are currently: %0x\n",
  5924. VolumeRootDir, BasicInfo.FileAttributes );
  5925. if (BooleanFlagOn(BasicInfo.FileAttributes , FILE_ATTRIBUTE_READONLY)) {
  5926. ClearFlag(BasicInfo.FileAttributes , FILE_ATTRIBUTE_READONLY);
  5927. Status = NtSetInformationFile( hFile,
  5928. &Iosb,
  5929. &BasicInfo,
  5930. sizeof( BasicInfo ),
  5931. FileBasicInformation );
  5932. if (NT_SUCCESS( Status )) {
  5933. DPRINT(0, "Read-Only attribute cleared succesfully\n" );
  5934. //
  5935. // ******** Add event log message saying what we did.
  5936. //
  5937. } else {
  5938. DPRINT_NT(0, "Couldn't set attributes, error status :", Status );
  5939. }
  5940. }
  5941. CloseHandle( hFile );
  5942. //
  5943. // Now retry the open.
  5944. //
  5945. WStatus = FrsOpenSourceFileW(&RootHandle,
  5946. VolumeRootDir,
  5947. WRITE_ACCESS, OPEN_OPTIONS & ~FILE_OPEN_REPARSE_POINT);
  5948. } else {
  5949. DPRINT_NT(0, "Couldn't get attributes, error status :", Status );
  5950. WStatus = FrsSetLastNTError(Status);
  5951. CloseHandle( hFile );
  5952. }
  5953. }
  5954. }
  5955. if (!WIN_SUCCESS(WStatus)) {
  5956. DPRINT1_WS(0, ":S: ERROR - Failed to open the volume root dir: %ws ;",
  5957. VolumeRootDir, WStatus);
  5958. pVme = FrsFreeType(pVme);
  5959. FRS_CLOSE(VolumeHandle);
  5960. Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
  5961. return WStatus;
  5962. }
  5963. //
  5964. // zero the buffer in case the data that comes back is short.
  5965. //
  5966. ZeroMemory(&ObjectIdBuffer, sizeof(FILE_OBJECTID_BUFFER));
  5967. //
  5968. // Get the Object ID from the volume root.
  5969. //
  5970. Status = NtFsControlFile(
  5971. RootHandle, // file handle
  5972. NULL, // event
  5973. NULL, // apc routine
  5974. NULL, // apc context
  5975. &Iosb, // iosb
  5976. FSCTL_GET_OBJECT_ID, // FsControlCode
  5977. &RootHandle, // input buffer
  5978. sizeof(HANDLE), // input buffer length
  5979. &ObjectIdBuffer, // OutputBuffer for data from the FS
  5980. sizeof(FILE_OBJECTID_BUFFER)); // OutputBuffer Length
  5981. if (NT_SUCCESS(Status)) {
  5982. GuidToStr((GUID *)ObjectIdBuffer.ObjectId, GuidStr);
  5983. DPRINT1(4, ":S: Oid for volume root is %s\n", GuidStr );
  5984. } else
  5985. if (Status == STATUS_NOT_IMPLEMENTED) {
  5986. DPRINT1_NT(0, ":S: ERROR - FSCTL_GET_OBJECT_ID failed on file %ws. Object IDs are not enabled on the volume.\n",
  5987. VolumeRootDir, Status);
  5988. Replica->FStatus = FrsErrorUnsupportedFileSystem;
  5989. }
  5990. //
  5991. // If there is no object ID on the root directory put one there.
  5992. // Date : 02/07/2000
  5993. // STATUS_OBJECT_NAME_NOT_FOUND was the old return value
  5994. // and STATUS_OBJECTID_NOT_FOUND is the new return value.
  5995. // Check for both so it works on systems running older and
  5996. // newer ntfs.sys
  5997. //
  5998. if (Status == STATUS_OBJECT_NAME_NOT_FOUND ||
  5999. Status == STATUS_OBJECTID_NOT_FOUND ) {
  6000. FrsUuidCreate((GUID *)ObjectIdBuffer.ObjectId);
  6001. Status = NtFsControlFile(
  6002. RootHandle, // file handle
  6003. NULL, // event
  6004. NULL, // apc routine
  6005. NULL, // apc context
  6006. &Iosb, // iosb
  6007. FSCTL_SET_OBJECT_ID, // FsControlCode
  6008. &ObjectIdBuffer, // input buffer
  6009. sizeof(FILE_OBJECTID_BUFFER),// input buffer length
  6010. NULL, // OutputBuffer for data from the FS
  6011. 0); // OutputBuffer Length
  6012. if (NT_SUCCESS(Status)) {
  6013. GuidToStr((GUID *)ObjectIdBuffer.ObjectId, GuidStr);
  6014. DPRINT1(4, ":S: Oid set on volume root is %s\n", GuidStr );
  6015. } else {
  6016. DPRINT1(0, ":S: ERROR - FSCTL_SET_OBJECT_ID failed on volume root %ws.\n",
  6017. VolumeRootDir);
  6018. Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
  6019. if (Status == STATUS_NOT_IMPLEMENTED) {
  6020. DPRINT(0, ":S: ERROR - Object IDs are not enabled on the volume.\n");
  6021. Replica->FStatus = FrsErrorUnsupportedFileSystem;
  6022. } else
  6023. if (Status == STATUS_ACCESS_DENIED) {
  6024. DPRINT(0, ":S: ERROR - Access Denied.\n");
  6025. } else {
  6026. DPRINT_NT(0, "ERROR - NtFsControlFile(FSCTL_SET_OBJECT_ID) failed.", Status);
  6027. }
  6028. }
  6029. }
  6030. FRS_CLOSE(RootHandle);
  6031. //
  6032. // If object IDs don't work on the volume then bail.
  6033. //
  6034. if (!NT_SUCCESS(Status)) {
  6035. pVme = FrsFreeType(pVme);
  6036. FRS_CLOSE(VolumeHandle);
  6037. return FrsSetLastNTError(Status);
  6038. }
  6039. //
  6040. // VOLUME ROOT OBJECTID MISMATCH CHECK:
  6041. //
  6042. // Keep the Volume root guid up-to-date in the Db. If it has changed then update it in the config record.
  6043. //
  6044. if (!GUIDS_EQUAL(&(ObjectIdBuffer.ObjectId), &(ConfigRecord->FSVolGuid))) {
  6045. DPRINT1(4,"WARN - Volume root guid mismatch for Replica Set (%ws)\n",Replica->ReplicaName->Name);
  6046. GuidToStr((GUID *)ObjectIdBuffer.ObjectId, GuidStr);
  6047. DPRINT1(4,"WARN - Volume root guid (FS) (%s)\n",GuidStr);
  6048. GuidToStr((GUID *)&(ConfigRecord->FSVolGuid), GuidStr);
  6049. DPRINT1(4,"WARN - Volume root guid (DB) (%s)\n",GuidStr);
  6050. DPRINT1(0,"WARN - Volume root guid updated for Replica Set (%ws)\n",Replica->ReplicaName->Name);
  6051. COPY_GUID(&(ConfigRecord->FSVolGuid), &(ObjectIdBuffer.ObjectId));
  6052. Replica->NeedsUpdate = TRUE;
  6053. }
  6054. //
  6055. // Scan the VolumeMonitorQueue to see if we are already doing this one.
  6056. //
  6057. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  6058. ForEachListEntryLock(&VolumeMonitorQueue, VOLUME_MONITOR_ENTRY, ListEntry,
  6059. //
  6060. // Consider changing this test to use the guid on the vol root dir.
  6061. //
  6062. if (pE->FSVolInfo.VolumeSerialNumber == VolumeInfo->VolumeSerialNumber) {
  6063. //
  6064. // Already monitoring this volume. Free entry and close handle.
  6065. //
  6066. FrsFreeType(pVme);
  6067. pVme = pE;
  6068. FRS_CLOSE(VolumeHandle);
  6069. //
  6070. // Release the lock and Return the Volume Monitor entry pointer.
  6071. //
  6072. //pVme->ActiveReplicas += 1;
  6073. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6074. DPRINT1(4, ":S: Volume %ws already monitored.\n", pVme->FSVolInfo.VolumeLabel);
  6075. //
  6076. // JOURNAL ID MISMATCH CHECK:
  6077. //
  6078. // If LastShutdown is 0 then this is the very first time we have started
  6079. // replication on this replica set so set the current CndUsnJournalID in
  6080. // the config record. Even if Lastshutdown is not 0 CnfUsnJournalID could
  6081. // be 0 because it was not getting correctly updated in Win2K.
  6082. //
  6083. if ((ConfigRecord->LastShutdown == (ULONGLONG)0) ||
  6084. (ConfigRecord->ServiceState == CNF_SERVICE_STATE_CREATING) ||
  6085. (ConfigRecord->CnfUsnJournalID == (ULONGLONG)0)) {
  6086. //
  6087. // Update the JournalID in the Db and set NeedsUpdate so that the
  6088. // config record gets written to the Db at the next update call.
  6089. //
  6090. ConfigRecord->CnfUsnJournalID = pVme->UsnJournalData.UsnJournalID;
  6091. Replica->NeedsUpdate = TRUE;
  6092. } else
  6093. //
  6094. // Check if the JournalID from pVme matches with the CnfUsnJournalID from the
  6095. // config record for this replica set. If it does not then it means that
  6096. // this replica set has been moved. Returning error here will trigger
  6097. // a deletion of the replica set. The set will be recreated at the next
  6098. // poll cycle and it will either be primary or non-auth depending on the
  6099. // case.
  6100. //
  6101. if (ConfigRecord->CnfUsnJournalID != pVme->UsnJournalData.UsnJournalID) {
  6102. //
  6103. // Usn Journal has a new instance code. ==> A delete / create occurred.
  6104. // Treat it as a journal wrap error.
  6105. //
  6106. DPRINT1(0,"ERROR - JournalID mismatch for Replica Set (%ws)\n",Replica->ReplicaName->Name);
  6107. DPRINT2(0,"ERROR - JournalID %x(FS) != %x(DB)\n",
  6108. pVme->UsnJournalData.UsnJournalID, ConfigRecord->CnfUsnJournalID);
  6109. DPRINT1(0,"ERROR - Replica Set (%ws) is marked to be deleted\n",Replica->ReplicaName->Name);
  6110. Replica->FStatus = FrsErrorMismatchedJournalId;
  6111. JrnlSetReplicaState(Replica, REPLICA_STATE_MISMATCHED_JOURNAL_ID);
  6112. return ERROR_REVISION_MISMATCH;
  6113. }
  6114. *pVmeArg = pVme;
  6115. Replica->FStatus = FrsErrorSuccess;
  6116. return ERROR_SUCCESS;
  6117. }
  6118. );
  6119. //
  6120. // Create the Usn Journal if it does not exist.
  6121. //
  6122. CfgRegReadDWord(FKC_NTFS_JRNL_SIZE, NULL, 0, &JournalSize);
  6123. CreateUsnJournalData.MaximumSize = (ULONGLONG)JournalSize * (ULONGLONG)(1024 * 1024);
  6124. DPRINT2(4, ":S: Creating NTFS USN Journal on %ws with size %d MB\n",
  6125. Replica->Volume, JournalSize );
  6126. Status = NtFsControlFile( VolumeHandle,
  6127. NULL,
  6128. NULL,
  6129. NULL,
  6130. &Iosb,
  6131. FSCTL_CREATE_USN_JOURNAL,
  6132. &CreateUsnJournalData,
  6133. sizeof(CreateUsnJournalData),
  6134. NULL,
  6135. 0 );
  6136. //
  6137. // Query the journal for the Journal ID, the USN info, etc.
  6138. //
  6139. if (!DeviceIoControl(VolumeHandle,
  6140. FSCTL_QUERY_USN_JOURNAL,
  6141. NULL,
  6142. 0,
  6143. &pVme->UsnJournalData,
  6144. sizeof(USN_JOURNAL_DATA),
  6145. &BytesReturned,
  6146. NULL)) {
  6147. WStatus = GetLastError();
  6148. DPRINT1_WS(4, ":S: JrnlOpen: FSCTL_QUERY_USN_JOURNAL on volume %ws :",
  6149. Replica->Volume, WStatus);
  6150. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6151. pVme = FrsFreeType(pVme);
  6152. FRS_CLOSE(VolumeHandle);
  6153. if (GetDiskFreeSpaceEx(Replica->Root,&FreeBytesAvailableToCaller,&TotalNumberOfBytes,NULL)) {
  6154. //
  6155. // Print the event log message if the available free space is
  6156. // less than 1%. The current problem to initialize
  6157. // the journal could be due to low disk space.
  6158. //
  6159. if ((FreeBytesAvailableToCaller.QuadPart*100) < TotalNumberOfBytes.QuadPart) {
  6160. if ((Replica->Volume != NULL) && (wcslen(Replica->Volume) >= wcslen(L"\\\\.\\D:"))) {
  6161. //
  6162. // If we are able to get the volume in the form
  6163. // \\.\D: then use the volume in the event log so
  6164. // that we don't print more than one event log
  6165. // message per volume. If we can't get the
  6166. // volume then we print the path.
  6167. //
  6168. EPRINT1(EVENT_FRS_OUT_OF_DISK_SPACE, &Replica->Volume[4]);
  6169. } else {
  6170. EPRINT1(EVENT_FRS_OUT_OF_DISK_SPACE, Replica->Root);
  6171. }
  6172. }
  6173. DPRINT3(4, ":S: Disk space check: %ws FreeBytesAvailableToCaller = %08x %08x,TotalNumberOfBytes = %08x %08x\n",
  6174. Replica->Root,
  6175. PRINTQUAD(FreeBytesAvailableToCaller.QuadPart),
  6176. PRINTQUAD(TotalNumberOfBytes.QuadPart));
  6177. }
  6178. Replica->FStatus = FrsErrorJournalInitFailed;
  6179. return WStatus;
  6180. }
  6181. if (BytesReturned != sizeof(USN_JOURNAL_DATA)) {
  6182. WStatus = GetLastError();
  6183. DPRINT2(4, "JrnlOpen: FSCTL_QUERY_USN_JOURNAL bytes returnd: %d, Expected: %d\n",
  6184. BytesReturned, sizeof(USN_JOURNAL_DATA));
  6185. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6186. pVme = FrsFreeType(pVme);
  6187. FRS_CLOSE(VolumeHandle);
  6188. Replica->FStatus = FrsErrorJournalInitFailed;
  6189. return WStatus;
  6190. }
  6191. //
  6192. // Display the USN Journal Data.
  6193. //
  6194. DPRINT1(4, ":S: UsnJournalID %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.UsnJournalID ));
  6195. DPRINT1(4, ":S: FirstUsn %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.FirstUsn ));
  6196. DPRINT1(4, ":S: NextUsn %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.NextUsn ));
  6197. DPRINT1(4, ":S: LowestValidUsn %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.LowestValidUsn ));
  6198. DPRINT1(4, ":S: MaxUsn %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.MaxUsn ));
  6199. DPRINT1(4, ":S: MaximumSize %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.MaximumSize ));
  6200. DPRINT1(4, ":S: AllocationDelta %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.AllocationDelta));
  6201. //
  6202. // If the NextUsn is 0 then create a dummy file to increment the usn
  6203. // so that we don't end up picking up a valid change at usn 0.
  6204. //
  6205. if (pVme->UsnJournalData.NextUsn == QUADZERO) {
  6206. FrsCreateFileRelativeById(&DummyHandle,
  6207. Replica->PreInstallHandle,
  6208. NULL,
  6209. 0,
  6210. FILE_ATTRIBUTE_TEMPORARY,
  6211. L"NTFRS_TEMP_FILE.TMP",
  6212. (USHORT)(wcslen(L"NTFRS_TEMP_FILE.TMP") * sizeof(WCHAR)),
  6213. NULL,
  6214. FILE_OPEN_IF,
  6215. RESTORE_ACCESS | DELETE);
  6216. if (HANDLE_IS_VALID(DummyHandle)) {
  6217. FrsDeleteByHandle(L"NTFRS_TEMP_FILE.TMP", DummyHandle);
  6218. }
  6219. FRS_CLOSE(DummyHandle);
  6220. }
  6221. //
  6222. //
  6223. // JOURNAL ID MISMATCH CHECK:
  6224. //
  6225. // If LastShutdown is 0 then this is the very first time we have started
  6226. // replication on this replica set so set the current pVme->JrnlReadPoint to
  6227. // the end of the Journal. Also save the Journal ID so we can detect if
  6228. // someone does a delete/create cycle on the journal.
  6229. // There are cases when the replica set gets created
  6230. // and then shutdown without ever initializing.
  6231. //
  6232. if ((ConfigRecord->LastShutdown == (ULONGLONG)0) ||
  6233. (ConfigRecord->ServiceState == CNF_SERVICE_STATE_CREATING) ||
  6234. (ConfigRecord->CnfUsnJournalID == (ULONGLONG)0)) {
  6235. ConfigRecord->CnfUsnJournalID = pVme->UsnJournalData.UsnJournalID;
  6236. Replica->NeedsUpdate = TRUE;
  6237. } else
  6238. if (ConfigRecord->CnfUsnJournalID != pVme->UsnJournalData.UsnJournalID) {
  6239. //
  6240. // Usn Journal has a new instance code. ==> A delete / create occurred.
  6241. // Treat it as a journal wrap error.
  6242. //
  6243. Replica->FStatus = FrsErrorMismatchedJournalId;
  6244. JrnlSetReplicaState(Replica, REPLICA_STATE_MISMATCHED_JOURNAL_ID);
  6245. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6246. pVme = FrsFreeType(pVme);
  6247. FRS_CLOSE(VolumeHandle);
  6248. return ERROR_REVISION_MISMATCH;
  6249. }
  6250. //
  6251. // Re-open the volume to allow for asynchronous IO. We don't
  6252. // open with the "OVERLAPPED" flag initially because then the
  6253. // above "create journal" doesn't finish in time for us to post
  6254. // a "read journal" request. We get a "INVALID_DEVICE_STATE"
  6255. // status.
  6256. //
  6257. FRS_CLOSE(VolumeHandle);
  6258. VolumeHandle = CreateFile(Replica->Volume,
  6259. GENERIC_READ | GENERIC_WRITE,
  6260. FILE_SHARE_READ | FILE_SHARE_WRITE,
  6261. NULL,
  6262. OPEN_EXISTING,
  6263. FILE_FLAG_OVERLAPPED,
  6264. NULL );
  6265. WStatus = GetLastError();
  6266. if (!HANDLE_IS_VALID(VolumeHandle)) {
  6267. DPRINT1_WS(0, "Can't open file %ws;", Replica->Volume, WStatus);
  6268. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6269. pVme = FrsFreeType(pVme);
  6270. Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
  6271. return WStatus;
  6272. } else {
  6273. DPRINT1(4, ":S: JrnlOpen: Open on volume %ws\n", Replica->Volume);
  6274. }
  6275. //
  6276. // This is a new volume journal add it to the list.
  6277. //
  6278. pVme->VolumeHandle = VolumeHandle;
  6279. pVme->DriveLetter[0] = Replica->Volume[wcslen(Replica->Volume) - 2];
  6280. pVme->DriveLetter[1] = Replica->Volume[wcslen(Replica->Volume) - 1];
  6281. pVme->DriveLetter[2] = UNICODE_NULL;
  6282. //
  6283. // Associate the volume handle with the completion port.
  6284. //
  6285. JournalCompletionPort = CreateIoCompletionPort(
  6286. VolumeHandle,
  6287. JournalCompletionPort,
  6288. (ULONG_PTR) pVme, // key associated with this handle
  6289. 0);
  6290. if (NT_SUCCESS(Status) && (JournalCompletionPort != NULL)) {
  6291. //
  6292. // Set the ref count and put the new entry on the queue.
  6293. // This will get the JournalReadThread to start looking at the
  6294. // completion port. Save the volume handle.
  6295. //
  6296. pVme->VolumeHandle = VolumeHandle;
  6297. pVme->ActiveReplicas = 0;
  6298. //
  6299. // Start Ref count at 2. One for being on the VolumeMonitorQueue and
  6300. // one for the initial allocation. The latter is released at VME shutdown.
  6301. //
  6302. pVme->ReferenceCount = 2;
  6303. pVme->JournalState = JRNL_STATE_INITIALIZING;
  6304. FrsRtlInsertTailQueueLock(&VolumeMonitorQueue, &pVme->ListEntry);
  6305. DPRINT2(4, ":S: Create Usn Journal success on %ws, Total vols: %d\n",
  6306. pVme->FSVolInfo.VolumeLabel, VolumeMonitorQueue.Count);
  6307. } else {
  6308. //
  6309. // Journal creation or CreateIoCompletionPort failed. Clean up.
  6310. //
  6311. WStatus = GetLastError();
  6312. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6313. DPRINT_NT(0, ":S: ERROR - Create Usn Journal failed.", Status );
  6314. if (JournalCompletionPort == NULL) {
  6315. DPRINT_WS(0, ":S: ERROR - Failed to create IoCompletion port.", WStatus);
  6316. Status = STATUS_UNSUCCESSFUL;
  6317. }
  6318. pVme = FrsFreeType(pVme);
  6319. FRS_CLOSE(VolumeHandle);
  6320. Replica->FStatus = FrsErrorJournalInitFailed;
  6321. return FrsSetLastNTError(Status);
  6322. }
  6323. //
  6324. // Find end of journal for use in recovery and new replica set creates.
  6325. //
  6326. WStatus = JrnlGetEndOfJournal(pVme, &pVme->JrnlRecoveryEnd);
  6327. if (!WIN_SUCCESS(WStatus)) {
  6328. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6329. pVme = FrsFreeType(pVme);
  6330. FRS_CLOSE(VolumeHandle);
  6331. Replica->FStatus = FrsErrorJournalInitFailed;
  6332. return WStatus;
  6333. }
  6334. DPRINT1(3, ":S: Current End of journal at : %08x %08x\n", PRINTQUAD(pVme->JrnlRecoveryEnd));
  6335. if ((ConfigRecord->LastShutdown == (ULONGLONG)0) ||
  6336. (ConfigRecord->ServiceState == CNF_SERVICE_STATE_CREATING) ||
  6337. (ConfigRecord->CnfUsnJournalID == (ULONGLONG)0)) {
  6338. pVme->JrnlReadPoint = pVme->JrnlRecoveryEnd;
  6339. DPRINT1(4, ":S: Initial journal read starting at: %08x %08x\n", PRINTQUAD(pVme->JrnlReadPoint));
  6340. }
  6341. //
  6342. // Allocate a volume filter hash table.
  6343. //
  6344. _snprintf(HashTableName, sizeof(HashTableName), "FT_%ws", VolumeInfo->VolumeLabel);
  6345. pVme->FilterTable = GhtCreateTable(
  6346. HashTableName, // Table name
  6347. VOLUME_FILTER_HASH_TABLE_ROWS, // NumberRows
  6348. OFFSET(FILTER_TABLE_ENTRY, DFileID), // KeyOffset is dir fid
  6349. sizeof(LONGLONG), // KeyLength
  6350. JrnlHashEntryFree,
  6351. JrnlCompareFid,
  6352. JrnlHashCalcFid,
  6353. FRS_JOURNAL_FILTER_PRINT_FUNCTION);
  6354. //
  6355. // Allocate a parent File ID hash table for the volume.
  6356. //
  6357. // The volume parent file ID table is a specialzed Qhash table intended to
  6358. // economize on memory. There is an entry in this table for every file
  6359. // in a replica set on the volume. There is one of these tables for each
  6360. // volume. Its goal in life is to give us the Old Parent Fid for a file
  6361. // after a rename. The USN journal only provides the new Parent FID.
  6362. // Once we have the old parent FID for a file or dir we can then do a lookup
  6363. // in the Volume Filter Table to determine the file's previous replica set
  6364. // so we can determine if a file or dir has moved across replica sets or
  6365. // out of a replica set entirely.
  6366. //
  6367. //
  6368. pVme->ParentFidTable = FrsAllocTypeSize(QHASH_TABLE_TYPE,
  6369. PARENT_FILEID_TABLE_SIZE);
  6370. SET_QHASH_TABLE_HASH_CALC(pVme->ParentFidTable, JrnlHashCalcFid);
  6371. //
  6372. // Allocate an Active Child hash table for the volume.
  6373. //
  6374. pVme->ActiveChildren = FrsAllocTypeSize(QHASH_TABLE_TYPE,
  6375. ACTIVE_CHILDREN_TABLE_SIZE);
  6376. SET_QHASH_TABLE_FLAG(pVme->ActiveChildren, QHASH_FLAG_LARGE_KEY);
  6377. SET_QHASH_TABLE_HASH_CALC2(pVme->ActiveChildren, ActiveChildrenHashCalc);
  6378. SET_QHASH_TABLE_KEY_MATCH(pVme->ActiveChildren, ActiveChildrenKeyMatch);
  6379. SET_QHASH_TABLE_FREE(pVme->ActiveChildren, FrsFree);
  6380. //
  6381. // Allocate a USN Write Filter Table for the volume and post the first
  6382. // clean request.
  6383. //
  6384. pVme->FrsWriteFilter = FrsAllocTypeSize(QHASH_TABLE_TYPE,
  6385. FRS_WRITE_FILTER_SIZE);
  6386. SET_QHASH_TABLE_HASH_CALC(pVme->FrsWriteFilter, JrnlHashCalcUsn);
  6387. JrnlSubmitCleanWriteFilter(pVme, JRNL_CLEAN_WRITE_FILTER_INTERVAL);
  6388. #ifdef RECOVERY_CONFLICT
  6389. //
  6390. // Allocate a Recovery Conflict hash table for the volume.
  6391. //
  6392. pVme->RecoveryConflictTable = FrsAllocTypeSize(QHASH_TABLE_TYPE,
  6393. RECOVERY_CONFLICT_TABLE_SIZE);
  6394. SET_QHASH_TABLE_HASH_CALC(pVme->RecoveryConflictTable, JrnlHashCalcFid);
  6395. #endif // RECOVERY_CONFLICT
  6396. //
  6397. // Allocate a hash table to record file name dependencies between file
  6398. // operations on this volume in the NTFS journal USN record stream.
  6399. // This is called the Name Space Table and it is used to control when
  6400. // a USN record can be merged into a prior change order affecting the same
  6401. // file. Some examples of when a USN record merge can not be done are
  6402. // given elsewhere, search for USN MERGE RESTRICTIONS.
  6403. //
  6404. pVme->NameSpaceTable = FrsFreeType(pVme->NameSpaceTable);
  6405. pVme->NameSpaceTable = FrsAllocTypeSize(QHASH_TABLE_TYPE, NAME_SPACE_TABLE_SIZE);
  6406. SET_QHASH_TABLE_HASH_CALC(pVme->NameSpaceTable, NoHashBuiltin);
  6407. //
  6408. // Allocate a hash table to record file old names on a rename operation.
  6409. // THe index is the File ID, the data field has a ptr to a USN record.
  6410. //
  6411. pVme->RenOldNameTable = FrsFreeType(pVme->RenOldNameTable);
  6412. pVme->RenOldNameTable = FrsAllocTypeSize(QHASH_TABLE_TYPE, RENAME_OLD_TABLE_SIZE);
  6413. SET_QHASH_TABLE_HASH_CALC(pVme->RenOldNameTable, JrnlHashCalcFid);
  6414. SET_QHASH_TABLE_FREE(pVme->RenOldNameTable, FrsFree);
  6415. //
  6416. // Allocate a Change Order Aging table for this volume.
  6417. //
  6418. sprintf(HashTableName, "CO_%ws", VolumeInfo->VolumeLabel);
  6419. pVme->ChangeOrderTable = GhtCreateTable(
  6420. HashTableName, // Table name
  6421. REPLICA_CHANGE_ORDER_HASH_TABLE_ROWS, // NumberRows
  6422. REPLICA_CHANGE_ORDER_ENTRY_KEY, // KeyOffset
  6423. REPLICA_CHANGE_ORDER_ENTRY_KEY_LENGTH, // KeyLength
  6424. JrnlHashEntryFree,
  6425. JrnlCompareFid,
  6426. JrnlHashCalcFid,
  6427. FRS_JOURNAL_CHANGE_ORDER_PRINT_FUNCTION);
  6428. //
  6429. // Allocate an Active Inbound Change Order hash table for this volume.
  6430. //
  6431. sprintf(HashTableName, "AIBCO_%ws", VolumeInfo->VolumeLabel);
  6432. pVme->ActiveInboundChangeOrderTable = GhtCreateTable(
  6433. HashTableName, // Table name
  6434. ACTIVE_INBOUND_CHANGE_ORDER_HASH_TABLE_ROWS, // NumberRows
  6435. REPLICA_CHANGE_ORDER_FILEGUID_KEY, // KeyOffset
  6436. REPLICA_CHANGE_ORDER_FILEGUID_KEY_LENGTH, // KeyLength
  6437. JrnlHashEntryFree,
  6438. JrnlCompareGuid,
  6439. JrnlHashCalcGuid,
  6440. FRS_JOURNAL_CHANGE_ORDER_PRINT_FUNCTION);
  6441. //
  6442. // Add the volume change order list to the global change order list.
  6443. //
  6444. FrsInitializeQueue(&pVme->ChangeOrderList, &FrsVolumeLayerCOList);
  6445. pVme->InitTime = GetTickCount();
  6446. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6447. //
  6448. // Return the Volume Monitor entry pointer.
  6449. //
  6450. *pVmeArg = pVme;
  6451. return ERROR_SUCCESS;
  6452. }
  6453. #if 0
  6454. ULONG
  6455. JrnlCheckStartFailures(
  6456. PFRS_QUEUE Queue
  6457. )
  6458. /*++
  6459. Routine Description:
  6460. Check for any failures where we couldn't get the first journal read started.
  6461. Arguments:
  6462. A queue with Volume Monitor Entries on it.
  6463. Return Value:
  6464. ERROR_SUCCESS if all journal reads started. (the list is empty).
  6465. --*/
  6466. {
  6467. #undef DEBSUB
  6468. #define DEBSUB "JrnlCheckStartFailures:"
  6469. PLIST_ENTRY Entry;
  6470. PVOLUME_MONITOR_ENTRY pVme;
  6471. ULONG WStatus, RetStatus;
  6472. FrsRtlAcquireQueueLock(Queue);
  6473. Entry = GetListHead(&Queue->ListHead);
  6474. if (Entry == &Queue->ListHead) {
  6475. DPRINT(4, ":S: JrnlCheckStartFailures - Queue empty.\n");
  6476. }
  6477. RetStatus = ERROR_SUCCESS;
  6478. while (Entry != &Queue->ListHead) {
  6479. pVme = CONTAINING_RECORD(Entry, VOLUME_MONITOR_ENTRY, ListEntry);
  6480. WStatus = pVme->WStatus;
  6481. RetStatus = ERROR_GEN_FAILURE;
  6482. if (!WIN_SUCCESS(WStatus) && (WStatus != ERROR_IO_PENDING)) {
  6483. //
  6484. // The I/O was not started. Check error return.
  6485. //
  6486. if (WStatus == ERROR_NOT_FOUND) {
  6487. //
  6488. // Starting USN is not in the Journal. We may have missed
  6489. // some locally originated changes to the replica. This
  6490. // is very bad because we now have to walk the replica
  6491. // tree and the IDTable to see what has changed.
  6492. //
  6493. // Walk the replica sets using this VME and compare their
  6494. // starting USNs with the oldest USN record available on
  6495. // the volume. If it's there then we can at least start
  6496. // those replica sets. Whats left has to be handled the
  6497. // long way.
  6498. //
  6499. //
  6500. // add code to sync up the tree
  6501. //
  6502. DPRINT1(0, ":S: Usn %08lx %08lx has been deleted.\n",
  6503. PRINTQUAD(pVme->JrnlReadPoint));
  6504. DPRINT(0, ":S: Data lost, resync required on Replica ...\n");
  6505. JrnlClose(pVme->VolumeHandle);
  6506. } else {
  6507. DPRINT_WS(0, "Error from JrnlCheckStartFailures", WStatus);
  6508. DPRINT1(0, ":S: ERROR - Replication not started for any replica sets on volume %ws\n",
  6509. pVme->FSVolInfo.VolumeLabel);
  6510. }
  6511. } else {
  6512. DPRINT_WS(0, "Error from JrnlCheckStartFailures", WStatus);
  6513. DPRINT1(0, ":S: ERROR - Replication should have been started for replica sets on volume %ws\n",
  6514. pVme->FSVolInfo.VolumeLabel);
  6515. }
  6516. Entry = GetListNext(Entry);
  6517. }
  6518. FrsRtlReleaseQueueLock(Queue);
  6519. return RetStatus;
  6520. }
  6521. #endif
  6522. ULONG
  6523. JrnlPauseVolume(
  6524. IN PVOLUME_MONITOR_ENTRY pVme,
  6525. IN DWORD MilliSeconds
  6526. )
  6527. /*++
  6528. Routine Description:
  6529. Pause journal read activity on the specified volume. This routine
  6530. queues a completion packet to the journal read thread telling it
  6531. to pause I/O the volume. We then then wait on the event handle in
  6532. the Vme struct.
  6533. Once the read thread stops I/O on the volume it queues a CMD_JOURNAL_PAUSED
  6534. packet to the journal process queue. When this command is processed we
  6535. know that any prior journal buffers that have been queued for this
  6536. volume are now processed so we can signal the event to let the waiter
  6537. proceed.
  6538. Arguments:
  6539. pVme: The volume to pause.
  6540. MilliSeconds - Timeout
  6541. Return Value:
  6542. Win32 status
  6543. --*/
  6544. {
  6545. #undef DEBSUB
  6546. #define DEBSUB "JrnlPauseVolume:"
  6547. ULONG WStatus;
  6548. ULONG RetryCount = 10;
  6549. DPRINT2(5, "***** Pause on Volume %ws - Journal State: %s *****\n",
  6550. pVme->FSVolInfo.VolumeLabel, RSS_NAME(pVme->JournalState));
  6551. RETRY:
  6552. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  6553. //
  6554. // Check if paused already.
  6555. //
  6556. if ((pVme->JournalState == JRNL_STATE_PAUSED) ||
  6557. (pVme->JournalState == JRNL_STATE_INITIALIZING)) {
  6558. WStatus = ERROR_SUCCESS;
  6559. goto RETURN;
  6560. }
  6561. //
  6562. // Check if pause is in progress.
  6563. //
  6564. if ((pVme->JournalState == JRNL_STATE_PAUSE1) ||
  6565. (pVme->JournalState == JRNL_STATE_PAUSE2)) {
  6566. goto WAIT;
  6567. }
  6568. //
  6569. // If I/O is not active on this volume then request is invalid.
  6570. //
  6571. if (pVme->JournalState != JRNL_STATE_ACTIVE) {
  6572. WStatus = ERROR_INVALID_FUNCTION;
  6573. goto RETURN;
  6574. }
  6575. //
  6576. // Submit the pause request to the journal read thread.
  6577. //
  6578. WStatus = JrnlSubmitReadThreadRequest(pVme,
  6579. FRS_PAUSE_JOURNAL_READ,
  6580. JRNL_STATE_PAUSE1);
  6581. if (WStatus == ERROR_BUSY) {
  6582. //
  6583. // Overlapped struct is in use. Retry a few times then bail.
  6584. //
  6585. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6586. if (--RetryCount == 0) {
  6587. return ERROR_BUSY;
  6588. }
  6589. Sleep(250);
  6590. goto RETRY;
  6591. }
  6592. WAIT:
  6593. //
  6594. // Drop the lock and wait on the event.
  6595. //
  6596. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6597. WStatus = WaitForSingleObject(pVme->Event, MilliSeconds);
  6598. CHECK_WAIT_ERRORS(3, WStatus, 1, ACTION_RETURN);
  6599. //
  6600. // Check the result state.
  6601. //
  6602. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  6603. WStatus = (pVme->JournalState == JRNL_STATE_PAUSED) ?
  6604. ERROR_SUCCESS : WAIT_FAILED;
  6605. RETURN:
  6606. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6607. return WStatus;
  6608. }
  6609. ULONG
  6610. JrnlUnPauseVolume(
  6611. IN PVOLUME_MONITOR_ENTRY pVme,
  6612. IN PJBUFFER Jbuff,
  6613. IN BOOL HaveLock
  6614. )
  6615. /*++
  6616. Routine Description:
  6617. Un-Pause journal read activity on the specified volume.
  6618. This routine starts up journal read activity on a volume that has
  6619. been previously paused. It kicks off an async read on the volume
  6620. which will complete on the completion port.
  6621. This routine is called both to initially start activity on a Journal and
  6622. to start the next read on a journal.
  6623. If you are initiating the first journal read or restarting the journal
  6624. after a pause you need to set the journal state to JRNL_STATE_STARTING
  6625. before calling this routine. e.g.
  6626. pVme->JournalState = JRNL_STATE_STARTING;
  6627. On the very first call to start the journal the JournalState should
  6628. be JRNL_STATE_INITIALIZING. This causes an initial set of journal
  6629. data buffers to be allocated. Otherwise we get a buffer from the
  6630. JournalFreeQueue.
  6631. Arguments:
  6632. pVme: The volume to pause.
  6633. Jbuff: An optional caller supplied Journal buffer. If NULL we get
  6634. one off the free list here.
  6635. HaveLock: TRUE means the caller has acquired the volume monitor lock.
  6636. FALSE means we acquire it and release it here.
  6637. Return Value:
  6638. Win32 status
  6639. --*/
  6640. {
  6641. #undef DEBSUB
  6642. #define DEBSUB "JrnlUnPauseVolume:"
  6643. PLIST_ENTRY Entry;
  6644. ULONG WStatus;
  6645. NTSTATUS Status;
  6646. BOOL AllocJbuff = (Jbuff == NULL);
  6647. ULONG SaveJournalState = JRNL_STATE_ERROR;
  6648. ULONG i;
  6649. LONG RetryCount;
  6650. DPRINT2(5, "***** UnPause on Volume %ws - Journal State: %s *****\n",
  6651. pVme->FSVolInfo.VolumeLabel, RSS_NAME(pVme->JournalState));
  6652. //
  6653. // Get the buffer first so we don't block waiting for a free buffer
  6654. // holding the VolumeMonitorQueue lock.
  6655. //
  6656. if (AllocJbuff) {
  6657. if (pVme->JournalState == JRNL_STATE_INITIALIZING) {
  6658. //
  6659. // Allocate a journal buffer from memory if this is a fresh start.
  6660. //
  6661. Jbuff = FrsAllocType(JBUFFER_TYPE);
  6662. //DPRINT1(5, "jb: Am %08x (alloc mem)\n", Jbuff);
  6663. } else {
  6664. //
  6665. // Get a journal buffer from the free list.
  6666. // We wait here until a buffer is available.
  6667. //
  6668. if (HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
  6669. Entry = FrsRtlRemoveHeadQueue(&JournalFreeQueue);
  6670. if (HaveLock) { FrsRtlAcquireQueueLock(&VolumeMonitorQueue); }
  6671. if (Entry == NULL) {
  6672. //
  6673. // Check for abort and cancel all I/O.
  6674. //
  6675. DPRINT(0, "ERROR-JournalFreeQueue Abort.\n");
  6676. if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
  6677. return ERROR_REQUEST_ABORTED;
  6678. }
  6679. Jbuff = CONTAINING_RECORD(Entry, JBUFFER, ListEntry);
  6680. //DPRINT1(5, "jb: ff %08x\n", Jbuff);
  6681. }
  6682. }
  6683. if (!HaveLock) { FrsRtlAcquireQueueLock(&VolumeMonitorQueue); }
  6684. //
  6685. // Check if paused already or stopped. If so, ignore the request.
  6686. //
  6687. if ((pVme->JournalState != JRNL_STATE_STARTING) &&
  6688. (pVme->JournalState != JRNL_STATE_INITIALIZING) &&
  6689. (pVme->JournalState != JRNL_STATE_ACTIVE)) {
  6690. if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
  6691. WStatus = ERROR_SUCCESS;
  6692. goto ERROR_RETURN;
  6693. }
  6694. //
  6695. // If there is already an I/O active don't start another. This can happen
  6696. // when the IOCancel() from a previous Pause request fails to abort the
  6697. // current journal read immediately. Now the unpause request starts a
  6698. // second I/O on the volume. In theory this should be benign since the
  6699. // cancel from the first pause will abort the first read request and the
  6700. // 2nd should complete normally.
  6701. //
  6702. // For now just mark the journal as Active again so when the currently
  6703. // outstanding request completes (or aborts) another read request is issued.
  6704. //
  6705. if (pVme->ActiveIoRequests != 0) {
  6706. DPRINT1(3, "UnPause on volume with non-zero ActiveIoRequest Count: %d\n",
  6707. pVme->ActiveIoRequests);
  6708. if (pVme->ReplayUsnValid) {
  6709. DPRINT(3, "Replay USN is valid. Waiting for ActiveIoRequest to go to zero\n");
  6710. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6711. //
  6712. // Unfortunately if this call is from the journal read thread
  6713. // v.s. another thread unpausing the volume the journal read
  6714. // thread won't be able to decrement the ActiveIoRequests.
  6715. //
  6716. Sleep(5000);
  6717. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  6718. if (pVme->ActiveIoRequests != 0) {
  6719. DPRINT1(3, "ActiveIoRequest still non-zero: %d. Skip replay\n",
  6720. pVme->ActiveIoRequests);
  6721. pVme->ReplayUsnValid = FALSE;
  6722. }
  6723. }
  6724. //
  6725. // The requests have not yet finished. For now just mark the
  6726. // journal as Active again so when the currently outstanding
  6727. // request completes (or aborts) another read request is issued.
  6728. //
  6729. if (pVme->ActiveIoRequests != 0) {
  6730. pVme->IoActive = TRUE;
  6731. SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_ACTIVE);
  6732. if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
  6733. WStatus = ERROR_SUCCESS;
  6734. goto ERROR_RETURN;
  6735. }
  6736. //
  6737. // FALL THRU means startup a read on the journal.
  6738. //
  6739. }
  6740. //
  6741. // If we are just starting up or restarting from a pause and the
  6742. // Replay USN is valid then start reading from there.
  6743. //
  6744. if ((pVme->JournalState != JRNL_STATE_ACTIVE) && pVme->ReplayUsnValid) {
  6745. DPRINT1(4, "JrnlReadPoint was: %08x %08x\n", PRINTQUAD(pVme->JrnlReadPoint));
  6746. pVme->JrnlReadPoint = pVme->ReplayUsn;
  6747. pVme->ReplayUsnValid = FALSE;
  6748. DPRINT1(4, "Loading JrnlReadPoint from ReplayUsn: %08x %08x\n", PRINTQUAD(pVme->ReplayUsn));
  6749. }
  6750. pVme->IoActive = TRUE;
  6751. pVme->StopIo = FALSE; // VME Overlap struct available.
  6752. SaveJournalState = pVme->JournalState;
  6753. if (pVme->JournalState != JRNL_STATE_ACTIVE) {
  6754. SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_ACTIVE);
  6755. }
  6756. pVme->ActiveIoRequests += 1;
  6757. FRS_ASSERT(pVme->ActiveIoRequests == 1);
  6758. if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
  6759. //
  6760. // Post a read on this journal handle to get things started.
  6761. // Note ownership of the buffer goes to another thread via the
  6762. // I/O Completion port so we can't change or look at it
  6763. // (without a lock) unless the read failed. Even if the read
  6764. // completes synchronously the I/O still completes via the port.
  6765. // The same is true of the related VME struct.
  6766. //
  6767. // An NTSTATUS return of STATUS_JOURNAL_ENTRY_DELETED means the requested
  6768. // USN record is no longer in the Journal (i.e. the journal has
  6769. // wrapped). The corresponding win32 error is ERROR_JOURNAL_ENTRY_DELETED.
  6770. //
  6771. RetryCount = 100;
  6772. RETRY_READ:
  6773. Status = FrsIssueJournalAsyncRead(Jbuff, pVme);
  6774. if (!NT_SUCCESS(Status)) {
  6775. if (!HaveLock) { FrsRtlAcquireQueueLock(&VolumeMonitorQueue); }
  6776. if (Status == STATUS_JOURNAL_ENTRY_DELETED) {
  6777. DPRINT(0, " +-+-+-+-+-+- JOURNAL WRAPPED +-+-+-+-+-+-+-+-+-+-\n");
  6778. //
  6779. // The journal wrapped.
  6780. //
  6781. SET_JOURNAL_AND_REPLICA_STATE(pVme, REPLICA_STATE_JRNL_WRAP_ERROR);
  6782. } else
  6783. if ((Status == STATUS_JOURNAL_DELETE_IN_PROGRESS) ||
  6784. (Status == STATUS_JOURNAL_NOT_ACTIVE)) {
  6785. DPRINT(0, " +-+-+-+-+-+- ERROR RETURN FROM FrsIssueJournalAsyncRead +-+-+-+-+-+-+-+-+-+-\n");
  6786. DPRINT(0, "Journal is or is being deleted. FRS requires the NTFS Journal.\n");
  6787. DisplayNTStatus(Status);
  6788. SET_JOURNAL_AND_REPLICA_STATE(pVme, REPLICA_STATE_JRNL_WRAP_ERROR);
  6789. } else
  6790. if (Status == STATUS_DATA_ERROR) {
  6791. //
  6792. // Internal NTFS detected errors: e.g.
  6793. // - Usn record size is not quad-aligned
  6794. // - Usn record size extends beyond the end of the Usn page
  6795. // - Usn record size isn't large enough to contain the Usn record
  6796. // - Usn record size extends beyond end of usn journal
  6797. //
  6798. DPRINT(0, " +-+-+-+-+-+- ERROR RETURN FROM FrsIssueJournalAsyncRead +-+-+-+-+-+-+-+-+-+-\n");
  6799. DPRINT(0, "Journal internal inconsistency detected by NTFS.\n");
  6800. DisplayNTStatus(Status);
  6801. SET_JOURNAL_AND_REPLICA_STATE(pVme, REPLICA_STATE_JRNL_WRAP_ERROR);
  6802. } else {
  6803. DPRINT(0, " +-+-+-+-+-+- ERROR RETURN FROM FrsIssueJournalAsyncRead +-+-+-+-+-+-+-+-+-+-\n");
  6804. DPRINT_NT(0, "ERROR - FrsIssueJournalAsyncRead : ", Status);
  6805. DPRINT_NT(0, "ERROR - FrsIssueJournalAsyncRead Iosb.Status: ", Jbuff->Iosb.Status);
  6806. if ((Status == STATUS_INVALID_PARAMETER) && (RetryCount-- > 0)) {
  6807. if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
  6808. Sleep(500);
  6809. goto RETRY_READ;
  6810. }
  6811. SET_JOURNAL_AND_REPLICA_STATE(pVme, REPLICA_STATE_JRNL_WRAP_ERROR);
  6812. // FRS_ASSERT(FALSE);
  6813. }
  6814. //
  6815. // Restore old journal state.
  6816. //
  6817. pVme->JournalState = SaveJournalState;
  6818. pVme->ActiveIoRequests -= 1;
  6819. FRS_ASSERT(pVme->ActiveIoRequests == 0);
  6820. if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
  6821. WStatus = FrsSetLastNTError(Status);
  6822. DPRINT_WS(0, "Error from FrsIssueJournalAsyncRead", WStatus);
  6823. //
  6824. // Error starting the read. Free Jbuff and return the error.
  6825. //
  6826. goto ERROR_RETURN;
  6827. }
  6828. //
  6829. // IO has started. If this was a fresh start add a few more buffers
  6830. // on the free list so there are enough to work with.
  6831. //
  6832. if (SaveJournalState == JRNL_STATE_INITIALIZING) {
  6833. for (i=0; i<(NumberOfJounalBuffers-1); i++) {
  6834. Jbuff = FrsAllocType(JBUFFER_TYPE);
  6835. //DPRINT1(5, "jb: Am %08x (alloc mem)\n", Jbuff);
  6836. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  6837. }
  6838. }
  6839. return ERROR_SUCCESS;
  6840. ERROR_RETURN:
  6841. //
  6842. // If we allocated a journal buffer here then give it back.
  6843. //
  6844. if (AllocJbuff && (Jbuff != NULL)) {
  6845. if (SaveJournalState == JRNL_STATE_INITIALIZING) {
  6846. //DPRINT1(5, "jb: fm %08x (free mem)\n", Jbuff);
  6847. Jbuff = FrsFreeType(Jbuff);
  6848. } else {
  6849. //DPRINT1(5, "jb: tf %08x\n", Jbuff);
  6850. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  6851. }
  6852. }
  6853. return WStatus;
  6854. }
  6855. ULONG
  6856. JrnlSubmitReadThreadRequest(
  6857. IN PVOLUME_MONITOR_ENTRY pVme,
  6858. IN ULONG Request,
  6859. IN ULONG NewState
  6860. )
  6861. /*++
  6862. Routine Description:
  6863. This routine posts a completion status packet on the journal I/O
  6864. completion port. This is used to either stop journal I/O or just
  6865. pause it while making changes to the filter table. When the journal
  6866. read thread gets the request it will cancel journal I/O on the volume
  6867. handle (which can only be done from that thread). If the post is
  6868. successful then the JournalState is updated with NewState.
  6869. We Assume the caller has acquired the VolumeMonitorQueue lock.
  6870. Arguments:
  6871. pVme - the volume monitor entry with the state for this volume's journal.
  6872. Request - The request type. Either FRS_CANCEL_JOURNAL_READ or
  6873. FRS_PAUSE_JOURNAL_READ.
  6874. NewState - The new state for the journal if the submit succeeds.
  6875. Return Value:
  6876. A WIN32 status.
  6877. --*/
  6878. {
  6879. #undef DEBSUB
  6880. #define DEBSUB "JrnlSubmitReadThreadRequest:"
  6881. ULONG WStatus;
  6882. PCHAR ReqStr;
  6883. if (Request == FRS_CANCEL_JOURNAL_READ) {
  6884. ReqStr = "cancel journal read";
  6885. } else
  6886. if (Request == FRS_PAUSE_JOURNAL_READ) {
  6887. ReqStr = "pause journal read";
  6888. } else {
  6889. DPRINT1(0, "ERROR - Invalid journal request: %08x\n", Request);
  6890. return ERROR_INVALID_PARAMETER;
  6891. }
  6892. if (pVme->StopIo) {
  6893. return ERROR_BUSY;
  6894. }
  6895. if (JournalCompletionPort == NULL) {
  6896. return ERROR_INVALID_HANDLE;
  6897. }
  6898. DPRINT2(5, "Queueing %s IO req on Volume %ws.\n",
  6899. ReqStr, pVme->FSVolInfo.VolumeLabel);
  6900. //
  6901. // Clear the pVme event if the request is to start a stop or pause sequence.
  6902. // Mark the overlapped struct busy,
  6903. // Submit the pause request to the journal read thread.
  6904. //
  6905. if ((NewState == JRNL_STATE_STOPPING) ||
  6906. (NewState == JRNL_STATE_PAUSE1)) {
  6907. ResetEvent(pVme->Event);
  6908. }
  6909. pVme->StopIo = TRUE;
  6910. if (!PostQueuedCompletionStatus(
  6911. JournalCompletionPort,
  6912. Request,
  6913. (ULONG_PTR) pVme,
  6914. &pVme->CancelOverlap)) {
  6915. WStatus = GetLastError();
  6916. DPRINT2_WS(0, "ERROR - Failed on PostQueuedCompletionStatus of %s on %ws :",
  6917. ReqStr, pVme->FSVolInfo.VolumeLabel, WStatus);
  6918. return WStatus;
  6919. }
  6920. //
  6921. // pkt submited. Update state.
  6922. //
  6923. pVme->JournalState = NewState;
  6924. DPRINT1(5, "Packet submitted. Jrnl state is %s\n", RSS_NAME(NewState));
  6925. return ERROR_SUCCESS;
  6926. }
  6927. ULONG
  6928. JrnlShutdownSingleReplica(
  6929. IN PREPLICA Replica,
  6930. IN BOOL HaveLock
  6931. )
  6932. /*++
  6933. Routine Description:
  6934. Detach this replica from its journal. Decrement the ActiveReplicas count
  6935. on the VME. If zero post a completion packet to the JournalCompletionPort
  6936. so the pending journal read request can be canceled by the read thread.
  6937. If no journal thread is active we do it all here.
  6938. If the volume monitor queue is left empty, we close the completion port.
  6939. The caller must have acquired the pVme->ReplicaListHead lock.
  6940. Arguments:
  6941. Replica -- Replica set to detach.
  6942. HaveLock -- TRUE if the caller has acquired the VolumeMonitorQueue
  6943. lock else we get it here.
  6944. Return Value:
  6945. Win32 status.
  6946. --*/
  6947. {
  6948. #undef DEBSUB
  6949. #define DEBSUB "JrnlShutdownSingleReplica:"
  6950. ULONG GStatus;
  6951. LIST_ENTRY DeadList;
  6952. PFRS_QUEUE FrsTempList;
  6953. ULONG WStatus = ERROR_SUCCESS;
  6954. PVOLUME_MONITOR_ENTRY pVme = Replica->pVme;
  6955. DPRINT1(4, ":S: <<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
  6956. if (!HaveLock) {
  6957. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  6958. FrsRtlAcquireQueueLock(&pVme->ReplicaListHead);
  6959. }
  6960. if (pVme->ActiveReplicas == 0) {
  6961. DPRINT1(0, ":S: ActiveReplicas count already zero on %ws\n",
  6962. pVme->FSVolInfo.VolumeLabel);
  6963. WStatus = ERROR_INVALID_HANDLE;
  6964. goto RETURN;
  6965. }
  6966. //
  6967. // It is possible that this replica struct never made it onto the list
  6968. // if it went into the error state during init or startup.
  6969. //
  6970. if (Replica->VolReplicaList.Flink == NULL) {
  6971. DPRINT2(0, ":S: WARN: Replica struct not on pVme ReplicaListHead for on %ws. Current replica State: %s\n",
  6972. pVme->FSVolInfo.VolumeLabel, RSS_NAME(Replica->ServiceState));
  6973. WStatus = ERROR_INVALID_HANDLE;
  6974. goto RETURN;
  6975. }
  6976. //
  6977. // Remove replica from the VME list.
  6978. //
  6979. FrsRtlRemoveEntryListLock(&pVme->ReplicaListHead, &Replica->VolReplicaList);
  6980. pVme->ActiveReplicas -= 1;
  6981. ReleaseVmeRef(pVme);
  6982. DPRINT3(4, "Removed %ws from VME %ws. %d Replicas remain.\n",
  6983. Replica->ReplicaName->Name, pVme->FSVolInfo.VolumeLabel,
  6984. pVme->ActiveReplicas);
  6985. //
  6986. // IF this is the last active Replica on the volume then stop
  6987. // I/O on the journal.
  6988. //
  6989. if (!IsListEmpty(&pVme->ReplicaListHead.ListHead)) {
  6990. WStatus = ERROR_SUCCESS;
  6991. goto RETURN;
  6992. }
  6993. if (pVme->ActiveReplicas != 0) {
  6994. DPRINT2(0, ":S: ERROR - pVme->ReplicaListHead is empty but ActiveReplicas count is non-zero (%d) on %ws\n",
  6995. pVme->ActiveReplicas, pVme->FSVolInfo.VolumeLabel);
  6996. DPRINT(0, ":S: ERROR - Stopping the journal anyway\n");
  6997. pVme->ActiveReplicas = 0;
  6998. }
  6999. //
  7000. // This is the last Replica set on the volume. Stop the journal.
  7001. //
  7002. if (!HANDLE_IS_VALID(JournalReadThreadHandle)) {
  7003. //
  7004. // There is no Journal thread. Put the VME on the
  7005. // stop queue and Close the handle here.
  7006. //
  7007. FrsRtlRemoveEntryQueueLock(&VolumeMonitorQueue, &pVme->ListEntry);
  7008. pVme->IoActive = FALSE;
  7009. pVme->WStatus = ERROR_SUCCESS;
  7010. SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_STOPPED);
  7011. DPRINT1(0, ":S: FrsRtlInsertTailQueue -- onto stop queue %08x\n", pVme);
  7012. FrsRtlInsertTailQueue(&VolumeMonitorStopQueue, &pVme->ListEntry);
  7013. FRS_CLOSE(pVme->VolumeHandle);
  7014. ReleaseVmeRef(pVme);
  7015. if ((VolumeMonitorQueue.Count == 0) &&
  7016. (JournalCompletionPort != NULL)) {
  7017. //
  7018. // Close the completion port.
  7019. //
  7020. // FRS_CLOSE(JournalCompletionPort);
  7021. }
  7022. } else {
  7023. //
  7024. // if I/O not already stopping, queue a completion packet
  7025. // to the journal read thread to cancel the I/O.
  7026. // The journal read thread will then put the VME on the
  7027. // VolumeMonitorStopQueue. If we did it here the VME would
  7028. // go to the Stop queue and the ActiveReplicas count would
  7029. // be decremented before I/O has actually stopped on the journal.
  7030. //
  7031. WStatus = JrnlSubmitReadThreadRequest(pVme,
  7032. FRS_CANCEL_JOURNAL_READ,
  7033. JRNL_STATE_STOPPING);
  7034. if (!WIN_SUCCESS(WStatus)) {
  7035. DPRINT2(0, ":S: ERROR: JrnlSubmitReadThreadRequest to stop Journal Failed on %ws. Current Journal State: %s\n",
  7036. pVme->FSVolInfo.VolumeLabel, RSS_NAME(pVme->JournalState));
  7037. DPRINT_WS(0, "ERROR: Status is", WStatus);
  7038. }
  7039. }
  7040. if (DoDebug(5, DEBSUB)) {
  7041. // "TEST CODE VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV"
  7042. DPRINT(5, "\n");
  7043. DPRINT1(5, "==== start of volume change order hash table dump for %ws ===========\n",
  7044. pVme->FSVolInfo.VolumeLabel);
  7045. DPRINT(5, "\n");
  7046. GHT_DUMP_TABLE(5, pVme->ChangeOrderTable);
  7047. DPRINT(5, "\n");
  7048. DPRINT(5, "========= End of Change order hash table dump ================\n");
  7049. DPRINT(5, "\n");
  7050. DPRINT(5, "\n");
  7051. DPRINT1(5, "==== start of USN write filter table dump for %ws ===========\n",
  7052. pVme->FSVolInfo.VolumeLabel);
  7053. DPRINT(5, "\n");
  7054. QHashEnumerateTable(pVme->FrsWriteFilter, QHashDump, NULL);
  7055. DPRINT(5, "\n");
  7056. DPRINT(5, "==== End of USN write filter table dump ===========\n");
  7057. DPRINT(5, "\n");
  7058. DPRINT(5, "\n");
  7059. DPRINT1(5, "==== start of recovery conflict table dump for %ws ===========\n",
  7060. pVme->FSVolInfo.VolumeLabel);
  7061. DPRINT(5, "\n");
  7062. #ifdef RECOVERY_CONFLICT
  7063. QHashEnumerateTable(pVme->RecoveryConflictTable, QHashDump, NULL);
  7064. DPRINT(5, "\n");
  7065. DPRINT(5, "==== End of recovery conflict table dump ===========\n");
  7066. DPRINT(5, "\n");
  7067. #endif // RECOVERY_CONFLICT
  7068. }
  7069. // "TEST CODE ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
  7070. GHT_DUMP_TABLE(3, pVme->ActiveInboundChangeOrderTable);
  7071. //
  7072. // Drop the initial allocation ref so the count can drop to zero
  7073. // when the last reference is released.
  7074. //
  7075. ReleaseVmeRef(pVme);
  7076. RETURN:
  7077. if (!HaveLock) {
  7078. FrsRtlReleaseQueueLock(&pVme->ReplicaListHead);
  7079. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7080. }
  7081. return WStatus;
  7082. }
  7083. VOID
  7084. JrnlCleanupVme(
  7085. IN PVOLUME_MONITOR_ENTRY pVme
  7086. )
  7087. /*++
  7088. Routine Description:
  7089. Free the VME storage when the ref count goes to zero. Called by the
  7090. ReleaseVmeRef() macro. Don't free the Vme proper because other threads
  7091. may still try to take out a ref on the Vme and they will test the ref count
  7092. for zero and fail.
  7093. Arguments:
  7094. pVme -- Volume Monitor Entry to close.
  7095. Return Value:
  7096. Win32 status.
  7097. --*/
  7098. {
  7099. #undef DEBSUB
  7100. #define DEBSUB "JrnlCleanupVme:"
  7101. USN PurgeUsn;
  7102. DPRINT1(4, "<<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
  7103. if (pVme->ActiveReplicas != 0) {
  7104. DPRINT1(0, "ERROR - ActiveReplicas not yet zero on %ws\n",
  7105. pVme->FSVolInfo.VolumeLabel);
  7106. FRS_ASSERT(!"ActiveReplicas not yet zero on volume");
  7107. return;
  7108. }
  7109. #if 0
  7110. // Note: Don't delete the CO process queue here since CO Accept may still be cleaning up
  7111. // same with aging cache (ChangeOrderTable) and ActiveInboundChangeOrderTable
  7112. FrsRtlDeleteQueue(&pVme->ChangeOrderList);
  7113. GhtDestroyTable(pVme->ChangeOrderTable);
  7114. pVme->ChangeOrderTable = NULL;
  7115. //
  7116. // Cleanup the Active inbound CO Table.
  7117. //
  7118. GhtDestroyTable(pVme->ActiveInboundChangeOrderTable);
  7119. pVme->ActiveInboundChangeOrderTable = NULL;
  7120. #endif
  7121. //
  7122. // Release the Filter Table.
  7123. //
  7124. GhtDestroyTable(pVme->FilterTable);
  7125. pVme->FilterTable = NULL;
  7126. //
  7127. // Release the parent file ID table, the active children table,
  7128. // and the Volume Write Filter.
  7129. //
  7130. pVme->ParentFidTable = FrsFreeType(pVme->ParentFidTable);
  7131. pVme->FrsWriteFilter = FrsFreeType(pVme->FrsWriteFilter);
  7132. pVme->ActiveChildren = FrsFreeType(pVme->ActiveChildren);
  7133. #ifdef RECOVERY_CONFLICT
  7134. pVme->RecoveryConflictTable = FrsFreeType(pVme->RecoveryConflictTable);
  7135. #endif // RECOVERY_CONFLICT
  7136. DPRINT(4, "\n");
  7137. DPRINT1(4, "==== start of NameSpaceTable table dump for %ws ===========\n",
  7138. pVme->FSVolInfo.VolumeLabel);
  7139. DPRINT(4, "\n");
  7140. QHashEnumerateTable(pVme->NameSpaceTable, QHashDump, NULL);
  7141. DPRINT(4, "\n");
  7142. DPRINT(4, "==== End of NameSpaceTable table dump ===========\n");
  7143. DPRINT(4, "\n");
  7144. pVme->NameSpaceTable = FrsFreeType(pVme->NameSpaceTable);
  7145. //
  7146. // Remove all the entries from the RENAME_OLD_NAME table and free the table.
  7147. //
  7148. PurgeUsn = MAXLONGLONG;
  7149. QHashEnumerateTable(pVme->RenOldNameTable,
  7150. JrnlPurgeOldRenameWorker,
  7151. &PurgeUsn);
  7152. pVme->RenOldNameTable = FrsFreeType(pVme->RenOldNameTable);
  7153. // Note: stick the vme on a storage cleanup list
  7154. }
  7155. ULONG
  7156. JrnlCloseVme(
  7157. IN PVOLUME_MONITOR_ENTRY pVme
  7158. )
  7159. /*++
  7160. Routine Description:
  7161. Close this Volume Monitor Entry by doing a shutdown on all replicas.
  7162. We assume the caller has taken the monitor queue lock.
  7163. Arguments:
  7164. pVme -- Volume Monitor Entry to close.
  7165. Return Value:
  7166. Win32 status.
  7167. --*/
  7168. {
  7169. #undef DEBSUB
  7170. #define DEBSUB "JrnlCloseVme:"
  7171. ULONG WStatus = ERROR_SUCCESS;
  7172. DPRINT1(4, "<<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
  7173. if (pVme->ActiveReplicas == 0) {
  7174. DPRINT1(1, "ActiveReplicas count already zero on %ws\n",
  7175. pVme->FSVolInfo.VolumeLabel);
  7176. return ERROR_INVALID_HANDLE;
  7177. }
  7178. //
  7179. // Remove all active replicas from the VME list.
  7180. //
  7181. ForEachListEntry( &pVme->ReplicaListHead, REPLICA, VolReplicaList,
  7182. //
  7183. // The iterator pE is type PREPLICA.
  7184. // Caller must have taken the monitor queue lock to avoid lock order prob.
  7185. //
  7186. WStatus = JrnlShutdownSingleReplica(pE, TRUE);
  7187. DPRINT_WS(0, "Error from JrnlShutdownSingleReplica", WStatus);
  7188. );
  7189. if (pVme->ActiveReplicas != 0) {
  7190. DPRINT2(0, "ActiveReplicas count should be zero on %ws. It is %d\n",
  7191. pVme->FSVolInfo.VolumeLabel, pVme->ActiveReplicas);
  7192. WStatus = ERROR_GEN_FAILURE;
  7193. } else {
  7194. WStatus = ERROR_SUCCESS;
  7195. }
  7196. return WStatus;
  7197. }
  7198. ULONG
  7199. JrnlCloseAll(
  7200. VOID
  7201. )
  7202. /*++
  7203. Routine Description:
  7204. Close all entries on the VolumeMonitorQueue.
  7205. Arguments:
  7206. None.
  7207. Return Value:
  7208. None.
  7209. --*/
  7210. {
  7211. #undef DEBSUB
  7212. #define DEBSUB "JrnlCloseAll:"
  7213. ULONG WStatus;
  7214. DPRINT1(4, "<<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
  7215. if (IsListEmpty(&VolumeMonitorQueue.ListHead)) {
  7216. DPRINT(4, "JrnlCloseAll - VolumeMonitorQueue empty.\n");
  7217. }
  7218. //
  7219. // When all the volumes are stopped journal thread should exit instead
  7220. // of looking for work.
  7221. //
  7222. KillJournalThreads = TRUE;
  7223. ForEachListEntry(&VolumeMonitorQueue, VOLUME_MONITOR_ENTRY, ListEntry,
  7224. WStatus = JrnlCloseVme(pE);
  7225. if (pE->JournalState == JRNL_STATE_STOPPED) {
  7226. continue;
  7227. }
  7228. //
  7229. // Drop the lock and wait for the event.
  7230. //
  7231. if (pE->JournalState == JRNL_STATE_STOPPING) {
  7232. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7233. WStatus = WaitForSingleObject(pE->Event, 2000);
  7234. CHECK_WAIT_ERRORS(3, WStatus, 1, ACTION_CONTINUE);
  7235. //
  7236. // Check the result state.
  7237. //
  7238. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  7239. if (pE->JournalState == JRNL_STATE_STOPPED) {
  7240. continue;
  7241. }
  7242. }
  7243. DPRINT2(1, "ERROR: Request to stop Journal Failed on %ws. Current Journal State: %s\n",
  7244. pE->FSVolInfo.VolumeLabel, RSS_NAME(pE->JournalState));
  7245. //
  7246. // Force it onto the stopped queue and set the state to ERROR.
  7247. //
  7248. if (pE->IoActive) {
  7249. SET_JOURNAL_AND_REPLICA_STATE(pE, JRNL_STATE_ERROR);
  7250. VmeDeactivate(&VolumeMonitorQueue, pE, WStatus);
  7251. }
  7252. );
  7253. return ERROR_SUCCESS;
  7254. }
  7255. ULONG
  7256. JrnlClose(
  7257. IN HANDLE VolumeHandle
  7258. )
  7259. /*++
  7260. Routine Description:
  7261. This routine walks the VolumeMonitorQueue looking for the entry with the
  7262. given VolumeHandle. It then decrements the reference count and if zero
  7263. we post a completion packet to the JournalCompletionPort so the pending
  7264. journal read request can be canceled.
  7265. Arguments:
  7266. VolumeHandle -- The handle of the volume to close.
  7267. Return Value:
  7268. None.
  7269. --*/
  7270. {
  7271. #undef DEBSUB
  7272. #define DEBSUB "JrnlClose:"
  7273. ULONG WStatus;
  7274. BOOL Found;
  7275. DPRINT1(4, "<<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
  7276. Found = FALSE;
  7277. ForEachListEntry(&VolumeMonitorQueue, VOLUME_MONITOR_ENTRY, ListEntry,
  7278. if (pE->VolumeHandle == VolumeHandle) {
  7279. //
  7280. // Handle matches. Close the Volume Monitor Entry.
  7281. //
  7282. Found = TRUE;
  7283. WStatus = JrnlCloseVme(pE);
  7284. if (pE->JournalState == JRNL_STATE_STOPPED) {
  7285. break;
  7286. }
  7287. //
  7288. // Drop the lock and wait for the event.
  7289. //
  7290. if (pE->JournalState == JRNL_STATE_STOPPING) {
  7291. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7292. WStatus = WaitForSingleObject(pE->Event, 2000);
  7293. CHECK_WAIT_ERRORS(3, WStatus, 1, ACTION_CONTINUE);
  7294. //
  7295. // Check the result state.
  7296. //
  7297. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  7298. if (pE->JournalState == JRNL_STATE_STOPPED) {
  7299. break;
  7300. }
  7301. }
  7302. DPRINT2(0, "ERROR: Request to stop Journal Failed on %ws. Current Journal State: %s\n",
  7303. pE->FSVolInfo.VolumeLabel, RSS_NAME(pE->JournalState));
  7304. //
  7305. // Force it onto the stopped queue and set the state to ERROR.
  7306. //
  7307. if (pE->IoActive) {
  7308. SET_JOURNAL_AND_REPLICA_STATE(pE, JRNL_STATE_ERROR);
  7309. VmeDeactivate(&VolumeMonitorQueue, pE, WStatus);
  7310. }
  7311. break;
  7312. }
  7313. );
  7314. if (!Found) {
  7315. DPRINT1(0, "ERROR - JrnlClose - Handle %08x not found in VolumeMonitorQueue\n",
  7316. VolumeHandle);
  7317. }
  7318. return ERROR_SUCCESS;
  7319. }
  7320. VOID
  7321. JrnlNewVsn(
  7322. IN PCHAR Debsub,
  7323. IN ULONG uLineNo,
  7324. IN PVOLUME_MONITOR_ENTRY pVme,
  7325. IN OUT PULONGLONG NewVsn
  7326. )
  7327. /*++
  7328. Routine Description:
  7329. Assign a new VSN for this volume. Save a recovery point after
  7330. VSN_SAVE_INTERVAL VSNs have been handed out.
  7331. Arguments:
  7332. Debsub -- name of Function calling us for trace.
  7333. uLineNo -- Linenumber of caller for trace.
  7334. pVme -- Volume Monitor Entry with the Vsn state.
  7335. NewVsn -- Ptr to return Vsn
  7336. Return Value:
  7337. Win32 status.
  7338. --*/
  7339. {
  7340. #undef DEBSUB
  7341. #define DEBSUB "JrnlNewVsn:"
  7342. ULONGLONG TempVsn;
  7343. BOOL SaveFlag = FALSE;
  7344. LOCK_VME(pVme);
  7345. TempVsn = ++pVme->FrsVsn;
  7346. *NewVsn = TempVsn;
  7347. if ((TempVsn & (ULONGLONG) VSN_SAVE_INTERVAL) == QUADZERO) {
  7348. SaveFlag = TRUE;
  7349. DebPrint(4,
  7350. (PUCHAR) "++ VSN Save Triggered: NextVsn: %08x %08x"
  7351. " LastUsnSaved: %08x %08x CurrUsnDone: %08x %08x\n",
  7352. Debsub,
  7353. uLineNo,
  7354. PRINTQUAD(TempVsn),
  7355. PRINTQUAD(pVme->LastUsnSavePoint),
  7356. PRINTQUAD(pVme->CurrentUsnRecordDone));
  7357. if (pVme->LastUsnSavePoint < pVme->CurrentUsnRecordDone) {
  7358. pVme->LastUsnSavePoint = pVme->CurrentUsnRecordDone;
  7359. }
  7360. }
  7361. UNLOCK_VME(pVme);
  7362. if (SaveFlag) {
  7363. DbsRequestSaveMark(pVme, FALSE);
  7364. }
  7365. // Note: perf: check for change to use ExInterlockedAddLargeStatistic
  7366. // so we can pitch the LOCK_VME. Note the lock is also used to
  7367. // avoid quadword tearing on LastUsnSavePoint with USN save point
  7368. // test in the journal loop. Need to fix that too
  7369. }
  7370. NTSTATUS
  7371. FrsIssueJournalAsyncRead(
  7372. IN PJBUFFER Jbuff,
  7373. IN PVOLUME_MONITOR_ENTRY pVme
  7374. )
  7375. /*++
  7376. Routine Description:
  7377. This routine posts an async read to the journal specified by the handle
  7378. in the Vme using the buffer in Jbuff.
  7379. Note once the async I/O is submitted (and returns STATUS_PENDING)
  7380. the jbuffer and the VME go to another thread via the I/O Completion port
  7381. so neither we nor the caller can change or look at it unless
  7382. the read failed or completed synchronously (unless you have a lock).
  7383. This is because we could block right after the call, the I/O could complete
  7384. and the JournalReadThread could pick up and process the buffer before the
  7385. calling thread ever runs again.
  7386. Arguments:
  7387. Jbuff - The Journal Buffer to use for the read request.
  7388. pVme - The volume monitor entry for the Async Read,
  7389. Return Value:
  7390. NTSTATUS status
  7391. The win32 error status is ERROR_NOT_FOUND when the USN is not found in
  7392. the journal.
  7393. --*/
  7394. {
  7395. #undef DEBSUB
  7396. #define DEBSUB "FrsIssueJournalAsyncRead:"
  7397. NTSTATUS Status;
  7398. ULONG WStatus;
  7399. READ_USN_JOURNAL_DATA ReadUsnJournalData;
  7400. // Current journal poll delay in NTFS is 2 seconds (doesn't apply for async reads)
  7401. #define DELAY_TIME ((LONGLONG)(-20000000))
  7402. #define FRS_USN_REASON_FILTER (USN_REASON_CLOSE | \
  7403. USN_REASON_FILE_CREATE | \
  7404. USN_REASON_RENAME_OLD_NAME)
  7405. //
  7406. // Setup the journal read parameters. BytesToWaitFor set to sizeof(USN)+1
  7407. // causes the read journal call to return after the first entry is placed
  7408. // in the buffer. JrnlReadPoint is the point in the journal to start the read.
  7409. // ReturnOnlyOnClose = TRUE means the returned journal entries only
  7410. // include close records (bit <31> of Reason field is set to one).
  7411. // Otherwise you get a record when any reason bit is set, e.g. create,
  7412. // first write, ...
  7413. //
  7414. ReadUsnJournalData.StartUsn = pVme->JrnlReadPoint; // USN JrnlReadPoint
  7415. ReadUsnJournalData.ReasonMask = FRS_USN_REASON_FILTER; // ULONG ReasonMask
  7416. ReadUsnJournalData.ReturnOnlyOnClose = FALSE; // ULONG ReturnOnlyOnClose
  7417. ReadUsnJournalData.Timeout = DELAY_TIME; // ULONGLONG Timeout
  7418. ReadUsnJournalData.BytesToWaitFor = sizeof(USN)+1; // ULONGLONG BytesToWaitFor
  7419. ReadUsnJournalData.UsnJournalID = pVme->UsnJournalData.UsnJournalID; // Journal ID.
  7420. //
  7421. // This read completes when either the buffer is full or the BytesToWaitFor
  7422. // parameter in the ReadUsnJournalData parameter block is exceeded.
  7423. // The DelayTime in the ReadUsnJournalData parameter block controls how
  7424. // often the NTFS code wakes up and checks the buffer. It is NOT a timeout
  7425. // on this call. Setting BytesToWaitFor to sizeof(USN) + 1
  7426. // means that as soon as any data shows up in the journal the call completes.
  7427. // Using this call with async IO lets us monitor a large number of volumes
  7428. // with a few threads.
  7429. //
  7430. // You can't really have multiple read requests outstanding on a single
  7431. // journal since you don't know where the next read will start until the
  7432. // previous read completes. Even though only one I/O can be outstanding
  7433. // per volume journal it is still possible to have multiple Jbuffs queued
  7434. // for USN processing because the rate of generating new journal entries
  7435. // may exceed the rate at which the data can be processed.
  7436. //
  7437. //
  7438. // Init the buffer Descriptor.
  7439. //
  7440. Jbuff->pVme = pVme;
  7441. Jbuff->Iosb.Information = 0;
  7442. Jbuff->Iosb.Status = 0;
  7443. Jbuff->Overlap.hEvent = NULL;
  7444. Jbuff->JrnlReadPoint = pVme->JrnlReadPoint;
  7445. Jbuff->WStatus = ERROR_IO_PENDING;
  7446. Jbuff->FileHandle = pVme->VolumeHandle;
  7447. //
  7448. // To catch I/O completions with no data.
  7449. //
  7450. ZeroMemory(Jbuff->DataBuffer, sizeof(USN) + sizeof(USN_RECORD));
  7451. InterlockedIncrement(&JournalActiveIoRequests);
  7452. Status = NtFsControlFile(
  7453. Jbuff->FileHandle, // IN HANDLE FileHandle,
  7454. NULL, // IN HANDLE Event OPTIONAL,
  7455. NULL, // IN PIO_APC_ROUTINE ApcRoutine OPTIONAL,
  7456. &Jbuff->Overlap, // IN PVOID ApcContext OPTIONAL,
  7457. &Jbuff->Iosb, // OUT PIO_STATUS_BLOCK IoStatusBlock,
  7458. FSCTL_READ_USN_JOURNAL, // IN ULONG FsControlCode,
  7459. &ReadUsnJournalData, // IN PVOID InputBuffer OPTIONAL,
  7460. sizeof(ReadUsnJournalData), // IN ULONG InputBufferLength,
  7461. Jbuff->DataBuffer, // OUT PVOID OutputBuffer OPTIONAL,
  7462. Jbuff->BufferSize ); // IN ULONG OutputBufferLength
  7463. WStatus = FrsSetLastNTError(Status);
  7464. DPRINT2_WS(4, "ReadUsnJournalData - NTStatus %08lx, USN = %08x %08x",
  7465. Status, PRINTQUAD(ReadUsnJournalData.StartUsn), WStatus);
  7466. if (!NT_SUCCESS(Status)) {
  7467. //
  7468. // I/O not started so it doesn't complete through the port.
  7469. //
  7470. InterlockedDecrement(&JournalActiveIoRequests);
  7471. DPRINT2_WS(0, "ReadUsnJournalData Failed - NTStatus %08lx, USN = %08x %08x",
  7472. Status, PRINTQUAD(ReadUsnJournalData.StartUsn), WStatus);
  7473. }
  7474. return Status;
  7475. }
  7476. BOOL
  7477. JrnlGetQueuedCompletionStatus(
  7478. HANDLE CompletionPort,
  7479. LPDWORD lpNumberOfBytesTransferred,
  7480. PULONG_PTR lpCompletionKey,
  7481. LPOVERLAPPED *lpOverlapped
  7482. )
  7483. /*++
  7484. Routine Description:
  7485. ** NOTE ** Imported version of Win32 function so we can access NTStatus
  7486. return value to seperate out the 32 odd NT to Win32 mappings for
  7487. the ERROR_INVALID_PARAMETER Win32 error code.
  7488. This function waits for pending I/O operations associated with the
  7489. specified completion port to complete. Server applications may have
  7490. several threads issuing this call on the same completion port. As
  7491. I/O operations complete, they are queued to this port. If threads
  7492. are actively waiting in this call, queued requests complete their
  7493. call.
  7494. This API returns a boolean value.
  7495. A value of TRUE means that a pending I/O completed successfully.
  7496. The the number of bytes transfered during the I/O, the completion
  7497. key that indicates which file the I/O occured on, and the overlapped
  7498. structure address used in the original I/O are all returned.
  7499. A value of FALSE indicates one ow two things:
  7500. If *lpOverlapped is NULL, no I/O operation was dequeued. This
  7501. typically means that an error occured while processing the
  7502. parameters to this call, or that the CompletionPort handle has been
  7503. closed or is otherwise invalid. GetLastError() may be used to
  7504. further isolate this.
  7505. If *lpOverlapped is non-NULL, an I/O completion packet was dequeud,
  7506. but the I/O operation resulted in an error. GetLastError() can be
  7507. used to further isolate the I/O error. The the number of bytes
  7508. transfered during the I/O, the completion key that indicates which
  7509. file the I/O occured on, and the overlapped structure address used
  7510. in the original I/O are all returned.
  7511. Arguments:
  7512. CompletionPort - Supplies a handle to a completion port to wait on.
  7513. lpNumberOfBytesTransferred - Returns the number of bytes transfered during the
  7514. I/O operation whose completion is being reported.
  7515. lpCompletionKey - Returns a completion key value specified during
  7516. CreateIoCompletionPort. This is a per-file key that can be used
  7517. to tall the caller the file that an I/O operation completed on.
  7518. lpOverlapped - Returns the address of the overlapped structure that
  7519. was specified when the I/O was issued. The following APIs may
  7520. complete using completion ports. This ONLY occurs if the file
  7521. handle is associated with with a completion port AND an
  7522. overlapped structure was passed to the API.
  7523. LockFileEx
  7524. WriteFile
  7525. ReadFile
  7526. DeviceIoControl
  7527. WaitCommEvent
  7528. ConnectNamedPipe
  7529. TransactNamedPipe
  7530. Return Value:
  7531. TRUE - An I/O operation completed successfully.
  7532. lpNumberOfBytesTransferred, lpCompletionKey, and lpOverlapped
  7533. are all valid.
  7534. FALSE - If lpOverlapped is NULL, the operation failed and no I/O
  7535. completion data is retured. GetLastError() can be used to
  7536. further isolate the cause of the error (bad parameters, invalid
  7537. completion port handle). Otherwise, a pending I/O operation
  7538. completed, but it completed with an error. GetLastError() can
  7539. be used to further isolate the I/O error.
  7540. lpNumberOfBytesTransferred, lpCompletionKey, and lpOverlapped
  7541. are all valid.
  7542. --*/
  7543. {
  7544. #undef DEBSUB
  7545. #define DEBSUB "JrnlGetQueuedCompletionStatus:"
  7546. IO_STATUS_BLOCK IoSb;
  7547. NTSTATUS Status;
  7548. LPOVERLAPPED LocalOverlapped;
  7549. BOOL rv;
  7550. Status = NtRemoveIoCompletion(CompletionPort,
  7551. (PVOID *)lpCompletionKey,
  7552. (PVOID *)&LocalOverlapped,
  7553. &IoSb,
  7554. NULL); // Infinite Timeout.
  7555. if ( !NT_SUCCESS(Status) || Status == STATUS_TIMEOUT ) {
  7556. *lpOverlapped = NULL;
  7557. if ( Status == STATUS_TIMEOUT ) {
  7558. SetLastError(WAIT_TIMEOUT);
  7559. } else {
  7560. FrsSetLastNTError(Status);
  7561. }
  7562. rv = FALSE;
  7563. DPRINT_NT(1, "NtRemoveIoCompletion : ", Status);
  7564. } else {
  7565. *lpOverlapped = LocalOverlapped;
  7566. *lpNumberOfBytesTransferred = (DWORD)IoSb.Information;
  7567. if ( !NT_SUCCESS(IoSb.Status) ){
  7568. FrsSetLastNTError( IoSb.Status );
  7569. DPRINT_NT(1, "NtRemoveIoCompletion : ", IoSb.Status);
  7570. rv = FALSE;
  7571. } else {
  7572. rv = TRUE;
  7573. }
  7574. }
  7575. return rv;
  7576. }
  7577. DWORD
  7578. WINAPI
  7579. JournalReadThread(
  7580. IN LPVOID Context
  7581. )
  7582. /*++
  7583. Routine Description:
  7584. This routine processes the I/O completions on the JournalCompletionPort.
  7585. It also handles cancel requests posted to the port when the volume
  7586. reference count goes to zero. The basic flow is wait on the port,
  7587. check for errors, check for cancel requests and do a cancel, check for
  7588. read success returns. When data comes back. get the next USN to use,
  7589. queue the buffer to the JournalProcessQueue, get a new buffer off
  7590. the free list and post a new read to the journal handle.
  7591. For canceled requests or requests that complete with an error
  7592. put the Volume Monitor Entry on the VolumeMonitorStopQueue along with
  7593. the error status in the entry.
  7594. This one thread processes all the read requests for all the NTFS volumes
  7595. we monitor. Once the first read is posted by an external routine we
  7596. pick it up from here.
  7597. TODO: When we run out of free journal buffers, create more (up to a limit).
  7598. Then put code in the processing loop to trim back the freelist.
  7599. Arguments:
  7600. Context not used. The Journal Global state is implied.
  7601. Thread Return Value:
  7602. NTSTATUS status
  7603. --*/
  7604. {
  7605. #undef DEBSUB
  7606. #define DEBSUB "JournalReadThread:"
  7607. LPOVERLAPPED JbuffOverlap;
  7608. DWORD IoSize;
  7609. PVOLUME_MONITOR_ENTRY pVme;
  7610. PJBUFFER Jbuff;
  7611. ULONG WStatus, WStatus2;
  7612. NTSTATUS Status;
  7613. BOOL StoppedOne;
  7614. BOOL ErrorFlag;
  7615. PLIST_ENTRY Entry;
  7616. USN NextJrnlReadPoint;
  7617. PCOMMAND_PACKET CmdPkt;
  7618. BY_HANDLE_FILE_INFORMATION FileInfo;
  7619. CHAR TimeString[TIME_STRING_LENGTH];
  7620. IO_STATUS_BLOCK Iosb;
  7621. ULONGLONG VolumeInfoData[(sizeof(FILE_FS_VOLUME_INFORMATION) +
  7622. MAXIMUM_VOLUME_LABEL_LENGTH + 7)/8];
  7623. PFILE_FS_VOLUME_INFORMATION VolumeInfo =
  7624. (PFILE_FS_VOLUME_INFORMATION)VolumeInfoData;
  7625. //
  7626. // Try-Finally
  7627. //
  7628. try {
  7629. //
  7630. // Capture exception.
  7631. //
  7632. try {
  7633. WAIT_FOR_WORK:
  7634. //
  7635. // Look for a Volume Monitor Entry to be placed on the work queue.
  7636. // The agent that put the entry on the queue also started the first
  7637. // read to the journal so we can start looking for I/O completions.
  7638. //
  7639. while (TRUE) {
  7640. WStatus = FrsRtlWaitForQueueFull(&VolumeMonitorQueue, 10000);
  7641. DPRINT1_WS(5, "Wait on VolumeMonitorQueue: Count: %d",
  7642. VolumeMonitorQueue.Count, WStatus);
  7643. if (WIN_SUCCESS(WStatus)) {
  7644. break;
  7645. }
  7646. switch (WStatus) {
  7647. case WAIT_TIMEOUT:
  7648. if (KillJournalThreads) {
  7649. //
  7650. // Terminate the thread.
  7651. //
  7652. JournalReadThreadHandle = NULL;
  7653. ExitThread(WStatus);
  7654. }
  7655. break;
  7656. case ERROR_INVALID_HANDLE:
  7657. //
  7658. // The VolumeMonitorQueue was rundown. Exit.
  7659. //
  7660. JournalReadThreadHandle = NULL;
  7661. ExitThread(WStatus);
  7662. break;
  7663. default:
  7664. DPRINT_WS(0, "Unexpected status from FrsRtlWaitForQueueFull", WStatus);
  7665. JournalReadThreadHandle = NULL;
  7666. ExitThread(WStatus);
  7667. }
  7668. }
  7669. //
  7670. // Loop as long as we have volumes to monitor or have I/O outstanding on the port.
  7671. //
  7672. while ((VolumeMonitorQueue.Count != 0) ||
  7673. (JournalActiveIoRequests != 0) ) {
  7674. pVme = NULL;
  7675. JbuffOverlap = NULL;
  7676. WStatus = ERROR_SUCCESS;
  7677. IoSize = 0;
  7678. DPRINT(5, "Waiting on JournalCompletionPort \n");
  7679. ErrorFlag = !JrnlGetQueuedCompletionStatus(JournalCompletionPort,
  7680. &IoSize,
  7681. (PULONG_PTR) &pVme,
  7682. &JbuffOverlap);
  7683. //INFINITE);
  7684. //
  7685. // Check for an error return and see if the completion port has
  7686. // disappeared.
  7687. //
  7688. if (ErrorFlag) {
  7689. WStatus = GetLastError();
  7690. DPRINT_WS(3, "Error from GetQueuedCompletionStatus", WStatus);
  7691. DPRINT5(3, "CompPort: %08x, IoSize: %08x, pVme: %08x, OvLap: %08x, VolHandle: %08x\n",
  7692. JournalCompletionPort, IoSize, pVme, JbuffOverlap, pVme->VolumeHandle);
  7693. if (WStatus == ERROR_INVALID_HANDLE) {
  7694. JournalCompletionPort = NULL;
  7695. JournalReadThreadHandle = NULL;
  7696. ExitThread(WStatus);
  7697. }
  7698. if (WStatus == ERROR_INVALID_PARAMETER) {
  7699. DPRINT(0, "ERROR- Invalid Param from GetQueuedCompletionStatus\n");
  7700. if (!GetFileInformationByHandle(JournalCompletionPort, &FileInfo)) {
  7701. WStatus2 = GetLastError();
  7702. DPRINT_WS(0, "Error from GetFileInformationByHandle", WStatus2);
  7703. } else {
  7704. CHAR FlagBuf[120];
  7705. DPRINT(0, "Info on JournalCompletionPort\n");
  7706. FrsFlagsToStr(FileInfo.dwFileAttributes, FileAttrFlagNameTable,
  7707. sizeof(FlagBuf), FlagBuf);
  7708. DPRINT2(0, "FileAttributes %08x Flags [%s]\n",
  7709. FileInfo.dwFileAttributes, FlagBuf);
  7710. FileTimeToString(&FileInfo.ftCreationTime, TimeString);
  7711. DPRINT1(0, "CreationTime %s\n", TimeString);
  7712. FileTimeToString(&FileInfo.ftLastAccessTime, TimeString);
  7713. DPRINT1(0, "LastAccessTime %08x\n", TimeString);
  7714. FileTimeToString(&FileInfo.ftLastWriteTime, TimeString);
  7715. DPRINT1(0, "LastWriteTime %08x\n", TimeString);
  7716. DPRINT1(0, "VolumeSerialNumber %08x\n", FileInfo.dwVolumeSerialNumber);
  7717. DPRINT1(0, "FileSizeHigh %08x\n", FileInfo.nFileSizeHigh);
  7718. DPRINT1(0, "FileSizeLow %08x\n", FileInfo.nFileSizeLow);
  7719. DPRINT1(0, "NumberOfLinks %08x\n", FileInfo.nNumberOfLinks);
  7720. DPRINT1(0, "FileIndexHigh %08x\n", FileInfo.nFileIndexHigh);
  7721. DPRINT1(0, "FileIndexLow %08x\n", FileInfo.nFileIndexLow);
  7722. }
  7723. //
  7724. // See if the volume handle still works.
  7725. //
  7726. DPRINT(0, "Dumping Volume information\n");
  7727. Status = NtQueryVolumeInformationFile(pVme->VolumeHandle,
  7728. &Iosb,
  7729. VolumeInfo,
  7730. sizeof(VolumeInfoData),
  7731. FileFsVolumeInformation);
  7732. if ( NT_SUCCESS(Status) ) {
  7733. VolumeInfo->VolumeLabel[VolumeInfo->VolumeLabelLength/2] = UNICODE_NULL;
  7734. FileTimeToString((PFILETIME) &VolumeInfo->VolumeCreationTime, TimeString);
  7735. DPRINT5(4,"%-16ws (%d), %s, VSN: %08X, VolCreTim: %s\n",
  7736. VolumeInfo->VolumeLabel,
  7737. VolumeInfo->VolumeLabelLength,
  7738. (VolumeInfo->SupportsObjects ? "(obj)" : "(no-obj)"),
  7739. VolumeInfo->VolumeSerialNumber,
  7740. TimeString);
  7741. } else {
  7742. DPRINT_NT(0, "ERROR - Volume root QueryVolumeInformationFile failed.", Status);
  7743. }
  7744. //
  7745. // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
  7746. // begin workaround for journal bug.
  7747. //
  7748. //
  7749. InterlockedDecrement(&JournalActiveIoRequests);
  7750. if (JbuffOverlap == NULL) {
  7751. //
  7752. // No packet dequeued. Unexpected error Cancel all I/O requests.
  7753. //
  7754. DPRINT(0, "Unexpected error from GetQueuedCompletionStatus. Stopping all journal I/O\n");
  7755. pVme = NULL;
  7756. WStatus = E_UNEXPECTED;
  7757. goto STOP_JOURNAL_IO;
  7758. }
  7759. //
  7760. // Get the base of the Jbuff struct containing this overlap struct.
  7761. //
  7762. Jbuff = CONTAINING_RECORD(JbuffOverlap, JBUFFER, Overlap);
  7763. //DPRINT2(5, "jb: fc %08x (len: %d)\n", Jbuff, IoSize);
  7764. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  7765. pVme->ActiveIoRequests -= 1;
  7766. FRS_ASSERT(pVme->ActiveIoRequests == 0);
  7767. //
  7768. // If I/O on this journal has been stopped or the I/O operation
  7769. // was aborted then free the Jbuff. There should be at most one
  7770. // I/O per volume that comes in with the aborted status.
  7771. //
  7772. // Note: We can still have other Jbufs queued for processing by the
  7773. // USN Journal processing thread for this VME.
  7774. //
  7775. if ((!pVme->IoActive) ||
  7776. (WStatus == ERROR_OPERATION_ABORTED) ) {
  7777. DPRINT1(5, "I/O aborted, putting jbuffer %08x on JournalFreeQueue.\n", Jbuff);
  7778. DPRINT2(5, "Canceled Io on volume %ws, IoSize= %d\n",
  7779. pVme->FSVolInfo.VolumeLabel, IoSize);
  7780. //
  7781. // How do we know when all outstanding Jbuffs have
  7782. // been retired for this VME? need an interlocked ref count?
  7783. // Why does this matter?
  7784. //
  7785. //DPRINT1(5, "jb: tf %08x (abort)\n", Jbuff);
  7786. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  7787. Jbuff = NULL;
  7788. //
  7789. // Even if the operation was aborted. If I/O has not stopped
  7790. // (e.g. a quick pause-unpause sequence) then start another read.
  7791. //
  7792. if (!pVme->IoActive) {
  7793. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7794. continue;
  7795. }
  7796. }
  7797. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7798. DPRINT(0, "Journal request retry\n");
  7799. DPRINT1(0, "Next Usn is: %08x %08x\n", PRINTQUAD(pVme->JrnlReadPoint));
  7800. if (Jbuff != NULL ) {
  7801. DPRINT1(0, "jb: tf %08x (BUG INVAL PARAM)\n", Jbuff);
  7802. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  7803. Jbuff = NULL;
  7804. }
  7805. //
  7806. // Wait and then retry the journal read again.
  7807. //
  7808. Sleep(500);
  7809. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  7810. goto START_NEXT_READ;
  7811. //
  7812. // End workaround for journal bug.
  7813. // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
  7814. //
  7815. //FRS_ASSERT(WStatus != ERROR_INVALID_PARAMETER);
  7816. }
  7817. //
  7818. // Error may be ERROR_OPERATION_ABORTED but shouldn't be success.
  7819. // This gets sorted out below.
  7820. //
  7821. FRS_ASSERT(WStatus != ERROR_SUCCESS);
  7822. }
  7823. //
  7824. // Check if no packet was dequeued from the port.
  7825. //
  7826. if (JbuffOverlap == NULL) {
  7827. //
  7828. // No packet dequeued. Unexpected error Cancel all I/O requests.
  7829. //
  7830. DPRINT(0, "Unexpected error from GetQueuedCompletionStatus. Stopping all journal I/O\n");
  7831. pVme = NULL;
  7832. WStatus = E_UNEXPECTED;
  7833. goto STOP_JOURNAL_IO;
  7834. }
  7835. //
  7836. // A packet was dequeued from the port. First check if this
  7837. // is a request to stop or pause I/O on this journal.
  7838. // There is no Jbuff with this request and the overlap struct
  7839. // is part of the VME.
  7840. //
  7841. if (IoSize == FRS_CANCEL_JOURNAL_READ) {
  7842. pVme->StopIo = FALSE; // VME Overlap struct available.
  7843. DPRINT1(4, "Cancel Journal Read for %ws\n", pVme->FSVolInfo.VolumeLabel);
  7844. //
  7845. // cancel any outstanding I/O on this volume handle and
  7846. // deactivate the VME.
  7847. // Note: Any I/O on this volume handle that has already
  7848. // been completed and queued to the completion port
  7849. // is not affected by the cancel. Use !pVme->IoActive to
  7850. // throw those requests away.
  7851. //
  7852. WStatus = ERROR_SUCCESS;
  7853. goto STOP_JOURNAL_IO;
  7854. } else
  7855. if (IoSize == FRS_PAUSE_JOURNAL_READ) {
  7856. DPRINT2(4, "Pause Journal Read for %ws. Jrnl State: %s\n",
  7857. pVme->FSVolInfo.VolumeLabel, RSS_NAME(pVme->JournalState));
  7858. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  7859. //
  7860. // This is a pause journal request. Stop I/O on the journal
  7861. // but don't deactivate the VME.
  7862. //
  7863. pVme->StopIo = FALSE; // VME Overlap struct available.
  7864. if (pVme->JournalState == JRNL_STATE_PAUSE1) {
  7865. //
  7866. // Cancel I/O on the journal read handle and put a second
  7867. // pause request on the port so we know it was done.
  7868. //
  7869. pVme->IoActive = FALSE;
  7870. if (!CancelIo(pVme->VolumeHandle)) {
  7871. DPRINT_WS(0, "ERROR - Cancel Io;", GetLastError());
  7872. }
  7873. pVme->WStatus = ERROR_SUCCESS;
  7874. WStatus = JrnlSubmitReadThreadRequest(pVme,
  7875. FRS_PAUSE_JOURNAL_READ,
  7876. JRNL_STATE_PAUSE2);
  7877. DPRINT_WS(0, "Error from JrnlSubmitReadThreadRequest", WStatus);
  7878. } else
  7879. if (pVme->JournalState == JRNL_STATE_PAUSE2) {
  7880. //
  7881. // This is the second pause request so there will be no more
  7882. // journal data buffers on this volume. (NOT TRUE, sometimes
  7883. // the abort takes awhile but since IoActive is clear the
  7884. // buffer will be ignored.)
  7885. // Send a paused complete command to the journal process queue.
  7886. // When it gets to the head of the queue, all prior queued
  7887. // journal buffers will have been processed so the filter table
  7888. // can now be updated.
  7889. //
  7890. CmdPkt = FrsAllocCommand(&JournalProcessQueue, CMD_JOURNAL_PAUSED);
  7891. CmdPkt->Parameters.JournalRequest.Replica = NULL;
  7892. CmdPkt->Parameters.JournalRequest.pVme = pVme;
  7893. FrsSubmitCommand(CmdPkt, FALSE);
  7894. } else {
  7895. //
  7896. // If we are stopping while in the middle of a Pause request
  7897. // the stop takes precedence.
  7898. //
  7899. if ((pVme->JournalState != JRNL_STATE_STOPPING) &&
  7900. (pVme->JournalState != JRNL_STATE_STOPPED)) {
  7901. DPRINT2(0, "ERROR: Invalid Journal State: %s on pause request on volume %ws,\n",
  7902. RSS_NAME(pVme->JournalState), pVme->FSVolInfo.VolumeLabel);
  7903. }
  7904. }
  7905. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7906. continue;
  7907. }
  7908. //
  7909. // Not a cancel or pause packet. It must be a journal read response.
  7910. //
  7911. InterlockedDecrement(&JournalActiveIoRequests);
  7912. //
  7913. // Get the base of the Jbuff struct containing this overlap struct.
  7914. //
  7915. Jbuff = CONTAINING_RECORD(JbuffOverlap, JBUFFER, Overlap);
  7916. //DPRINT2(5, "jb: fc %08x (len: %d)\n", Jbuff, IoSize);
  7917. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  7918. pVme->ActiveIoRequests -= 1;
  7919. FRS_ASSERT(pVme->ActiveIoRequests == 0);
  7920. //
  7921. // If I/O on this journal has been stopped or the I/O operation
  7922. // was aborted then free the Jbuff. There should be at most one
  7923. // I/O per volume that comes in with the aborted status.
  7924. //
  7925. // Note: We can still have other Jbufs queued for processing by the
  7926. // USN Journal processing thread for this VME.
  7927. //
  7928. if ((!pVme->IoActive) ||
  7929. (IoSize < sizeof(USN)) ||
  7930. (WStatus == ERROR_OPERATION_ABORTED) ) {
  7931. DPRINT1(5, "I/O aborted, putting jbuffer %08x on JournalFreeQueue.\n", Jbuff);
  7932. DPRINT2(5, "Canceled Io on volume %ws, IoSize= %d\n",
  7933. pVme->FSVolInfo.VolumeLabel, IoSize);
  7934. //
  7935. // How do we know when all outstanding Jbuffs have
  7936. // been retired for this VME? need an interlocked ref count?
  7937. // Why does it matter?
  7938. //
  7939. //DPRINT1(5, "jb: tf %08x (abort)\n", Jbuff);
  7940. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  7941. //
  7942. // Even if the operation was aborted. If I/O has not stopped
  7943. // (e.g. a quick pause-unpause sequence) then start another read.
  7944. //
  7945. if (pVme->IoActive) {
  7946. goto START_NEXT_READ;
  7947. }
  7948. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7949. continue;
  7950. }
  7951. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7952. /**************************************************************
  7953. * *
  7954. * We have a successfull I/O completion packet. *
  7955. * Return the status and data length then put down *
  7956. * another read at the Next uSN on the journal. *
  7957. * *
  7958. **************************************************************/
  7959. Jbuff->WStatus = WStatus;
  7960. Jbuff->DataLength = IoSize;
  7961. //
  7962. // Update next USN in VME and send the journal buffer out for processing.
  7963. //
  7964. NextJrnlReadPoint = *(USN *)(Jbuff->DataBuffer);
  7965. if (NextJrnlReadPoint < pVme->JrnlReadPoint) {
  7966. DPRINT2(0, "USN error: Next < Previous, Next %08x %08x, Prev: %08x %08x\n",
  7967. PRINTQUAD(NextJrnlReadPoint), PRINTQUAD(pVme->JrnlReadPoint));
  7968. WStatus = ERROR_INVALID_DATA;
  7969. goto STOP_JOURNAL_IO;
  7970. }
  7971. pVme->JrnlReadPoint = NextJrnlReadPoint;
  7972. DPRINT1(5, "Next Usn is: %08x %08x\n", PRINTQUAD(pVme->JrnlReadPoint));
  7973. //DPRINT2(5, "jb: tu %08x (len: %d)\n", Jbuff, Jbuff->DataLength);
  7974. FrsRtlInsertTailQueue(&JournalProcessQueue, &Jbuff->ListEntry);
  7975. //
  7976. // If the read request failed for some reason (e.g. ERROR_NOT_FOUND)
  7977. // let USN processing figure it out and start I/O back up as appropriate.
  7978. //
  7979. if (!WIN_SUCCESS(WStatus)) {
  7980. pVme->IoActive = FALSE;
  7981. continue;
  7982. }
  7983. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  7984. START_NEXT_READ:
  7985. //
  7986. // Get a free buffer and start another read on the journal.
  7987. //
  7988. WStatus = JrnlUnPauseVolume(pVme, NULL, TRUE);
  7989. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7990. //
  7991. // Check for abort and cancel all I/O.
  7992. //
  7993. if (WStatus == ERROR_REQUEST_ABORTED) {
  7994. pVme = NULL;
  7995. DPRINT(0, "JournalFreeQueue Abort. Stopping all journal I/O\n");
  7996. goto STOP_JOURNAL_IO;
  7997. }
  7998. //
  7999. // If the response is success or busy then we can expect to see a
  8000. // buffer come through the port.
  8001. //
  8002. if (!WIN_SUCCESS(WStatus) && (WStatus != ERROR_BUSY)) {
  8003. goto STOP_JOURNAL_IO;
  8004. }
  8005. continue;
  8006. STOP_JOURNAL_IO:
  8007. //
  8008. // Test if stopping I/O on just one volume.
  8009. //
  8010. if (pVme != NULL) {
  8011. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  8012. //
  8013. // We should send a cmd packet to the journal process queue since
  8014. // that is the point where all pending journal buffers are completed.
  8015. //
  8016. SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_STOPPED);
  8017. if (!CancelIo(pVme->VolumeHandle)) {
  8018. DPRINT_WS(0, "ERROR - Cancel Io;", GetLastError());
  8019. }
  8020. VmeDeactivate(&VolumeMonitorQueue, pVme, WStatus);
  8021. SetEvent(pVme->Event);
  8022. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  8023. continue;
  8024. }
  8025. //
  8026. // Stop all I/O on all volume journals.
  8027. //
  8028. StoppedOne = FALSE;
  8029. ForEachListEntry(&VolumeMonitorQueue, VOLUME_MONITOR_ENTRY, ListEntry,
  8030. //
  8031. // The loop iterator pE is of type VOLUME_MONITOR_ENTRY.
  8032. //
  8033. if (pE->JournalState != JRNL_STATE_STOPPED) {
  8034. StoppedOne = TRUE;
  8035. SET_JOURNAL_AND_REPLICA_STATE(pE, JRNL_STATE_STOPPED);
  8036. if (!CancelIo(pE->VolumeHandle)) {
  8037. DPRINT_WS(0, "ERROR - Cancel Io;", GetLastError());
  8038. }
  8039. }
  8040. VmeDeactivate(&VolumeMonitorQueue, pE, WStatus);
  8041. SetEvent(pE->Event);
  8042. );
  8043. if (!StoppedOne && (JbuffOverlap == NULL)) {
  8044. //
  8045. // We didn't stop anything and nothing came thru the port.
  8046. // Must be hung.
  8047. //
  8048. DPRINT(0, "ERROR - Readjournalthread hung. Killing thread\n");
  8049. JournalReadThreadHandle = NULL;
  8050. ExitThread(WStatus);
  8051. }
  8052. } // end of while()
  8053. if (KillJournalThreads) {
  8054. //
  8055. // Terminate the thread.
  8056. //
  8057. DPRINT(4, "Readjournalthread Terminating.\n");
  8058. JournalReadThreadHandle = NULL;
  8059. ExitThread(ERROR_SUCCESS);
  8060. }
  8061. goto WAIT_FOR_WORK;
  8062. //
  8063. // Get exception status.
  8064. //
  8065. } except (EXCEPTION_EXECUTE_HANDLER) {
  8066. GET_EXCEPTION_CODE(WStatus);
  8067. }
  8068. } finally {
  8069. if (WIN_SUCCESS(WStatus)) {
  8070. if (AbnormalTermination()) {
  8071. WStatus = ERROR_OPERATION_ABORTED;
  8072. }
  8073. }
  8074. DPRINT_WS(0, "Read Journal Thread finally.", WStatus);
  8075. //
  8076. // Trigger FRS shutdown if we terminated abnormally.
  8077. //
  8078. if (!WIN_SUCCESS(WStatus) && (WStatus != ERROR_PROCESS_ABORTED)) {
  8079. JournalReadThreadHandle = NULL;
  8080. DPRINT(0, "Readjournalthread terminated abnormally, forcing service shutdown.\n");
  8081. FrsIsShuttingDown = TRUE;
  8082. SetEvent(ShutDownEvent);
  8083. }
  8084. }
  8085. return WStatus;
  8086. }
  8087. ULONG
  8088. JrnlGetEndOfJournal(
  8089. IN PVOLUME_MONITOR_ENTRY pVme,
  8090. OUT USN *EndOfJournal
  8091. )
  8092. /*++
  8093. Routine Description:
  8094. Get the address of the end of the USN Journal. This is used for starting
  8095. a new replica set at the end of the journal. The replica tree starts out
  8096. empty so there is no need to read through several megabytes of
  8097. USN records. It is also used to find the end of the journal before
  8098. recovery starts.
  8099. Arguments:
  8100. pVme - The Volume Monitor struct to initialize. It provides the volume
  8101. handle.
  8102. EndOfJournal - Returned USN of the end of the Journal or 0.
  8103. Return Value:
  8104. Win32 status.
  8105. --*/
  8106. {
  8107. #undef DEBSUB
  8108. #define DEBSUB "JrnlGetEndOfJournal:"
  8109. USN_JOURNAL_DATA UsnJrnlData;
  8110. DWORD WStatus;
  8111. ULONG BytesReturned = 0;
  8112. *EndOfJournal = QUADZERO;
  8113. //
  8114. // The following call returns:
  8115. //
  8116. // UsnJournalID Current Instance of Journal
  8117. // FirstUsn First position that can be read from journal
  8118. // NextUsn Next position that will be written to the journal
  8119. // LowestValidUsn First record that was written into the journal for
  8120. // this journal instance. It is possible that enumerating
  8121. // the files on disk will return a USN lower than this
  8122. // value. This indicates that the journal has been
  8123. // restamped since the last USN was written for this file.
  8124. // It means that the file may have been changed and
  8125. // journal data was lost.
  8126. // MaxUsn The largest change USN the journal will support.
  8127. // MaximumSize
  8128. // AllocationDelta
  8129. //
  8130. if (!DeviceIoControl(pVme->VolumeHandle,
  8131. FSCTL_QUERY_USN_JOURNAL,
  8132. NULL, 0,
  8133. &UsnJrnlData, sizeof(UsnJrnlData),
  8134. &BytesReturned, NULL)) {
  8135. WStatus = GetLastError();
  8136. DPRINT_WS(0, "Error from FSCTL_QUERY_USN_JOURNAL", WStatus);
  8137. if (WStatus == ERROR_NOT_READY) {
  8138. //
  8139. // Volume is being dismounted.
  8140. //
  8141. } else
  8142. if (WStatus == ERROR_BAD_COMMAND) {
  8143. //
  8144. // NT status was INVALID_DEVICE_STATE.
  8145. //
  8146. } else
  8147. if (WStatus == ERROR_INVALID_PARAMETER) {
  8148. //
  8149. // Bad Handle.
  8150. //
  8151. } else
  8152. if (WStatus == ERROR_JOURNAL_DELETE_IN_PROGRESS) {
  8153. //
  8154. // Journal being deleted.
  8155. //
  8156. } else
  8157. if (WStatus == ERROR_JOURNAL_NOT_ACTIVE) {
  8158. //
  8159. // Journal ???.
  8160. //
  8161. }
  8162. return WStatus;
  8163. }
  8164. if (BytesReturned != sizeof(UsnJrnlData)) {
  8165. //
  8166. // Unexpected result return.
  8167. //
  8168. return ERROR_JOURNAL_NOT_ACTIVE;
  8169. }
  8170. DPRINT1(4, ":S: EOJ from jrnl query %08x %08x\n", PRINTQUAD(UsnJrnlData.NextUsn));
  8171. //
  8172. // Return the next read point for the journal.
  8173. //
  8174. *EndOfJournal = UsnJrnlData.NextUsn;
  8175. return ERROR_SUCCESS;
  8176. }
  8177. ULONG
  8178. JrnlEnumerateFilterTreeBU(
  8179. PGENERIC_HASH_TABLE Table,
  8180. PFILTER_TABLE_ENTRY FilterEntry,
  8181. PJRNL_FILTER_ENUM_ROUTINE Function,
  8182. PVOID Context
  8183. )
  8184. /*++
  8185. Routine Description:
  8186. This routine walks through the entries in the Volume filter table connected
  8187. by the child list starting with the FilterEntry provided. The traversal
  8188. is bottom up. At each node the function provided is called with the
  8189. entry address and the context pointer.
  8190. It is assumed that the caller has acquired the Filter Table Child list
  8191. lock for the Replica set being traversed.
  8192. Before calling the function with an entry we increment the ref count.
  8193. The Called function must DECREMENT the ref count (or delete the entry).
  8194. Arguments:
  8195. Table - The context of the Hash Table to enumerate.
  8196. FilterEntry - The Filter Entry node to start at.
  8197. Function - The function to call for each entry in the subtree. It is of
  8198. of type PJRNL_FILTER_ENUM_ROUTINE. Return FALSE to abort the
  8199. enumeration else true.
  8200. Context - A context ptr to pass through to the Function.
  8201. Return Value:
  8202. The status code from the argument function.
  8203. --*/
  8204. {
  8205. #undef DEBSUB
  8206. #define DEBSUB "JrnlEnumerateFilterTreeBU:"
  8207. PLIST_ENTRY ListHead;
  8208. ULONG WStatus;
  8209. //
  8210. // Check for no entries in tree.
  8211. //
  8212. if (FilterEntry == NULL) {
  8213. return ERROR_SUCCESS;
  8214. }
  8215. INCREMENT_FILTER_REF_COUNT(FilterEntry);
  8216. ListHead = &FilterEntry->ChildHead;
  8217. ForEachSimpleListEntry(ListHead, FILTER_TABLE_ENTRY, ChildEntry,
  8218. //
  8219. // pE is of type PFILTER_TABLE_ENTRY.
  8220. //
  8221. if (!IsListEmpty(&pE->ChildHead)) {
  8222. //
  8223. // Recurse on the child's list head.
  8224. //
  8225. WStatus = JrnlEnumerateFilterTreeBU(Table, pE, Function, Context);
  8226. } else {
  8227. //
  8228. // Apply the function to the node.
  8229. // The function could remove the node from the list but the list macro
  8230. // has captured the Flink so the traversal can continue.
  8231. //
  8232. INCREMENT_FILTER_REF_COUNT(pE);
  8233. WStatus = (Function)(Table, pE, Context);
  8234. }
  8235. if (!WIN_SUCCESS(WStatus)) {
  8236. goto RETURN;
  8237. }
  8238. );
  8239. WStatus = (Function)(Table, FilterEntry, Context);
  8240. RETURN:
  8241. return WStatus;
  8242. }
  8243. ULONG
  8244. JrnlEnumerateFilterTreeTD(
  8245. PGENERIC_HASH_TABLE Table,
  8246. PFILTER_TABLE_ENTRY FilterEntry,
  8247. PJRNL_FILTER_ENUM_ROUTINE Function,
  8248. PVOID Context
  8249. )
  8250. /*++
  8251. Routine Description:
  8252. This routine walks through the entries in the Volume filter table connected
  8253. by the child list starting with the FilterEntry provided. The traversal
  8254. is Top Down. At each node the function provided is called with the
  8255. entry address and the context pointer.
  8256. It is assumed that the caller has acquired the Filter Table Child list
  8257. lock for the Replica set being traversed.
  8258. Before calling the function with an entry we increment the ref count.
  8259. The Called function must DECREMENT the ref count (or delete the entry).
  8260. Arguments:
  8261. Table - The context of the Hash Table to enumerate.
  8262. FilterEntry - The Filter Entry node to start at.
  8263. Function - The function to call for each entry in the subtree. It is of
  8264. of type PJRNL_FILTER_ENUM_ROUTINE. Return FALSE to abort the
  8265. enumeration else true.
  8266. Context - A context ptr to pass through to the Function.
  8267. Return Value:
  8268. The status code from the argument function.
  8269. --*/
  8270. {
  8271. #undef DEBSUB
  8272. #define DEBSUB "JrnlEnumerateFilterTreeTD:"
  8273. PLIST_ENTRY ListHead;
  8274. ULONG WStatus;
  8275. //
  8276. // Check for no entries in tree.
  8277. //
  8278. if (FilterEntry == NULL) {
  8279. return ERROR_SUCCESS;
  8280. }
  8281. //
  8282. // Apply the function to the root node.
  8283. // The function could remove the node from the table but not from the list
  8284. // since our caller has the child list replica lock. Bump the ref count
  8285. // to keep the memory from being freed.
  8286. //
  8287. INCREMENT_FILTER_REF_COUNT(FilterEntry);
  8288. WStatus = (Function)(Table, FilterEntry, Context);
  8289. if (!WIN_SUCCESS(WStatus)) {
  8290. goto RETURN;
  8291. }
  8292. //
  8293. // Warning: If the function above deletes the node the following ref
  8294. // is invalid. This should not be a problem because deletes should only
  8295. // be done bottom up.
  8296. //
  8297. ListHead = &FilterEntry->ChildHead;
  8298. ForEachSimpleListEntry(ListHead, FILTER_TABLE_ENTRY, ChildEntry,
  8299. //
  8300. // pE is of type PFILTER_TABLE_ENTRY.
  8301. //
  8302. //
  8303. // Apply the function to each child node.
  8304. // The function could remove the node from the list but the list macro
  8305. // has captured the Flink so the traversal can continue.
  8306. //
  8307. if (!IsListEmpty(&pE->ChildHead)) {
  8308. //
  8309. // Recurse on the child's list head.
  8310. //
  8311. WStatus = JrnlEnumerateFilterTreeTD(Table, pE, Function, Context);
  8312. } else {
  8313. INCREMENT_FILTER_REF_COUNT(pE);
  8314. WStatus = (Function)(Table, pE, Context);
  8315. }
  8316. if (!WIN_SUCCESS(WStatus)) {
  8317. goto RETURN;
  8318. }
  8319. );
  8320. WStatus = ERROR_SUCCESS;
  8321. //
  8322. // Done with this Root node so decrement the ref count which could
  8323. // cause it to be deleted.
  8324. //
  8325. RETURN:
  8326. return WStatus;
  8327. }
  8328. VOID
  8329. JrnlHashEntryFree(
  8330. PGENERIC_HASH_TABLE Table,
  8331. PVOID Buffer
  8332. )
  8333. /*++
  8334. Routine Description:
  8335. Free the memory pointed to by Buffer.
  8336. Arguments:
  8337. Table -- ptr to a hash table struct (has heap handle).
  8338. Buffer -- ptr to buffer to free.
  8339. Return Value:
  8340. None.
  8341. --*/
  8342. {
  8343. #undef DEBSUB
  8344. #define DEBSUB "JrnlHashEntryFree:"
  8345. FrsFreeType(Buffer);
  8346. }
  8347. BOOL
  8348. JrnlCompareFid(
  8349. PVOID Buf1,
  8350. PVOID Buf2,
  8351. ULONG Length
  8352. )
  8353. /*++
  8354. Routine Description:
  8355. Compare two keys for equality.
  8356. Arguments:
  8357. Buf1 -- ptr to key value 1.
  8358. Buf1 -- ptr to key value 2.
  8359. Length -- should be 8 bytes.
  8360. Return Value:
  8361. TRUE if they match.
  8362. --*/
  8363. {
  8364. #undef DEBSUB
  8365. #define DEBSUB "JrnlCompareFid:"
  8366. if (!ValueIsMultOf4(Buf1)) {
  8367. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8368. Buf1, Length, *(PULONG)Buf1);
  8369. FRS_ASSERT(ValueIsMultOf4(Buf1));
  8370. return 0xFFFFFFFF;
  8371. }
  8372. if (!ValueIsMultOf4(Buf2)) {
  8373. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8374. Buf2, Length, *(PULONG)Buf2);
  8375. FRS_ASSERT(ValueIsMultOf4(Buf2));
  8376. return 0xFFFFFFFF;
  8377. }
  8378. if (Length != sizeof(ULONGLONG)) {
  8379. DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
  8380. FRS_ASSERT(Length == sizeof(LONGLONG));
  8381. return 0xFFFFFFFF;
  8382. }
  8383. return RtlEqualMemory(Buf1, Buf2, sizeof(ULONGLONG));
  8384. }
  8385. ULONG
  8386. JrnlHashCalcFid (
  8387. PVOID Buf,
  8388. ULONG Length
  8389. )
  8390. /*++
  8391. Routine Description:
  8392. Calculate a hash value on an NTFS file ID for the journal filter table.
  8393. Arguments:
  8394. Buf -- ptr to a file ID.
  8395. Length -- should be 8 bytes.
  8396. Return Value:
  8397. 32 bit hash value.
  8398. --*/
  8399. {
  8400. #undef DEBSUB
  8401. #define DEBSUB "JrnlHashCalcFid:"
  8402. PULONG pUL = (PULONG) Buf;
  8403. if (!ValueIsMultOf4(pUL)) {
  8404. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8405. pUL, Length, *pUL);
  8406. FRS_ASSERT(ValueIsMultOf4(pUL));
  8407. return 0xFFFFFFFF;
  8408. }
  8409. if (Length != sizeof(LONGLONG)) {
  8410. DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
  8411. FRS_ASSERT(Length == sizeof(LONGLONG));
  8412. return 0xFFFFFFFF;
  8413. }
  8414. return HASH_FID(pUL, 0x80000000);
  8415. }
  8416. ULONG
  8417. NoHashBuiltin (
  8418. PVOID Buf,
  8419. ULONG Length
  8420. )
  8421. /*++
  8422. Routine Description:
  8423. No-op function for hash tables that use an external function to
  8424. do hash calculations. It returns the low 4 bytes of the quadword.
  8425. Arguments:
  8426. Buf -- ptr to a file ID.
  8427. Length -- should be 8 bytes.
  8428. Return Value:
  8429. 32 bit hash value.
  8430. --*/
  8431. {
  8432. #undef DEBSUB
  8433. #define DEBSUB "NoHashBuiltin:"
  8434. PULONG pUL = (PULONG) Buf;
  8435. if (!ValueIsMultOf4(pUL)) {
  8436. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8437. pUL, Length, *pUL);
  8438. FRS_ASSERT(ValueIsMultOf4(pUL));
  8439. return 0xFFFFFFFF;
  8440. }
  8441. if (Length != sizeof(LONGLONG)) {
  8442. DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
  8443. FRS_ASSERT(Length == sizeof(LONGLONG));
  8444. return 0xFFFFFFFF;
  8445. }
  8446. return (*pUL & (ULONG) 0x7FFFFFFF);
  8447. }
  8448. BOOL
  8449. JrnlCompareGuid(
  8450. PVOID Buf1,
  8451. PVOID Buf2,
  8452. ULONG Length
  8453. )
  8454. /*++
  8455. Routine Description:
  8456. Compare two keys for equality.
  8457. Arguments:
  8458. Buf1 -- ptr to key value 1.
  8459. Buf1 -- ptr to key value 2.
  8460. Length -- should be 16 bytes.
  8461. Return Value:
  8462. TRUE if they match.
  8463. --*/
  8464. {
  8465. #undef DEBSUB
  8466. #define DEBSUB "JrnlCompareGuid:"
  8467. if (!ValueIsMultOf4(Buf1)) {
  8468. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8469. Buf1, Length, *(PULONG)Buf1);
  8470. FRS_ASSERT(ValueIsMultOf4(Buf1));
  8471. return 0xFFFFFFFF;
  8472. }
  8473. if (!ValueIsMultOf4(Buf2)) {
  8474. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8475. Buf2, Length, *(PULONG)Buf2);
  8476. FRS_ASSERT(ValueIsMultOf4(Buf2));
  8477. return 0xFFFFFFFF;
  8478. }
  8479. if (Length != sizeof(GUID)) {
  8480. DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
  8481. FRS_ASSERT(Length == sizeof(GUID));
  8482. return 0xFFFFFFFF;
  8483. }
  8484. return RtlEqualMemory(Buf1, Buf2, sizeof(GUID));
  8485. }
  8486. ULONG
  8487. JrnlHashCalcGuid (
  8488. PVOID Buf,
  8489. ULONG Length
  8490. )
  8491. /*++
  8492. Routine Description:
  8493. Calculate a hash value for a Guid.
  8494. From \nt\private\rpc\runtime\mtrt\uuidsup.hxx
  8495. This is the "true" OSF DCE format for Uuids. We use this
  8496. when generating Uuids. The NodeId is faked on systems w/o
  8497. a netcard.
  8498. typedef struct _RPC_UUID_GENERATE
  8499. {
  8500. unsigned long TimeLow; // 100 ns units
  8501. unsigned short TimeMid;
  8502. unsigned short TimeHiAndVersion;
  8503. unsigned char ClockSeqHiAndReserved;
  8504. unsigned char ClockSeqLow;
  8505. unsigned char NodeId[6]; // constant
  8506. } RPC_UUID_GENERATE;
  8507. TimeLow wraps every 6.55ms and is mostly zero.
  8508. Not quite true since GUIDs are allocated
  8509. in time based blocks and then successive GUIDS are created by
  8510. bumping the TimeLow by one until the block is consumed.
  8511. Arguments:
  8512. Buf -- ptr to a Guid.
  8513. Length -- should be 16 bytes.
  8514. Return Value:
  8515. 32 bit hash value.
  8516. --*/
  8517. {
  8518. #undef DEBSUB
  8519. #define DEBSUB "JrnlHashCalcGuid:"
  8520. PULONG pUL = (PULONG) Buf;
  8521. PUSHORT pUS = (PUSHORT) Buf;
  8522. if (!ValueIsMultOf4(pUL)) {
  8523. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8524. pUL, Length, *pUL);
  8525. FRS_ASSERT(ValueIsMultOf4(pUL));
  8526. return 0xFFFFFFFF;
  8527. }
  8528. if (Length != sizeof(GUID)) {
  8529. DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
  8530. FRS_ASSERT(Length == sizeof(GUID));
  8531. return 0xFFFFFFFF;
  8532. }
  8533. //
  8534. // Calc hash based on the time since the rest of it is eseentially constant.
  8535. //
  8536. return (ULONG) (pUS[0] ^ pUS[1] ^ pUS[2]);
  8537. }
  8538. ULONG
  8539. JrnlHashCalcUsn (
  8540. PVOID Buf,
  8541. ULONG Length
  8542. )
  8543. /*++
  8544. Routine Description:
  8545. Calculate a hash value on an NTFS USN Journal Index.
  8546. Arguments:
  8547. Buf -- ptr to a file ID.
  8548. Length -- should be 8 bytes.
  8549. Return Value:
  8550. 32 bit hash value.
  8551. --*/
  8552. {
  8553. #undef DEBSUB
  8554. #define DEBSUB "JrnlHashCalcUsn:"
  8555. ULONG Value, HighPart, LowPart;
  8556. if (!ValueIsMultOf4(Buf)) {
  8557. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8558. Buf, Length, *(PULONG)Buf);
  8559. FRS_ASSERT(ValueIsMultOf4(Buf));
  8560. return 0xFFFFFFFF;
  8561. }
  8562. if (Length != sizeof(LONGLONG)) {
  8563. DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
  8564. FRS_ASSERT(Length == sizeof(LONGLONG));
  8565. return 0xFFFFFFFF;
  8566. }
  8567. LowPart = *(PULONG) Buf;
  8568. HighPart = *(PULONG)( (PCHAR) Buf + 4 );
  8569. //
  8570. // USNs are quadword offsets so shift the low part an extra 3 bits.
  8571. //
  8572. Value = (HighPart >> 16) + HighPart + (LowPart >> 19) + (LowPart >> 3);
  8573. return Value;
  8574. }
  8575. VOID
  8576. CalcHashFidAndName(
  8577. IN PUNICODE_STRING Name,
  8578. IN PULONGLONG Fid,
  8579. OUT PULONGLONG HashValue
  8580. )
  8581. /*++
  8582. Routine Description:
  8583. This routine forms a 32 bit hash of the name and File ID args.
  8584. It returns this in the low 32 bits of HashValue. The upper 32 bits are zero.
  8585. Note: If there is room at the end of the Unicode String buffer for the Name,
  8586. code below will add a NULL for printing.
  8587. Arguments:
  8588. Name - The filename to hash.
  8589. Fid - The FID to hash.
  8590. HashValue - The resulting quadword hash value.
  8591. Return Value:
  8592. Not used
  8593. --*/
  8594. {
  8595. #undef DEBSUB
  8596. #define DEBSUB "CalcHashFidAndName:"
  8597. PUSHORT p;
  8598. ULONG NameHash = 0;
  8599. ULONG Shift = 0;
  8600. ULONG FidHash;
  8601. ULONG NChars, MaxNChars;
  8602. PULONG pUL;
  8603. FRS_ASSERT( Name != NULL );
  8604. FRS_ASSERT( Fid != NULL );
  8605. FRS_ASSERT( ValueIsMultOf2(Name->Buffer) );
  8606. FRS_ASSERT( ValueIsMultOf2(Name->Length) );
  8607. FRS_ASSERT( Name->Length != 0 );
  8608. FRS_ASSERT( ValueIsMultOf8(Fid) );
  8609. NChars = Name->Length / sizeof(WCHAR);
  8610. //
  8611. // Combine each unicode character into the hash value, shifting 4 bits
  8612. // each time. Start at the end of the name so file names with different
  8613. // type codes will hash to different table offsets.
  8614. //
  8615. for( p = Name->Buffer + NChars - 1;
  8616. p >= Name->Buffer;
  8617. p-- ) {
  8618. NameHash = NameHash ^ (((ULONG)towupper(*p)) << Shift);
  8619. Shift = (Shift < 16) ? Shift + 4 : 0;
  8620. }
  8621. pUL = (ULONG *) Fid;
  8622. FidHash = (ULONG) HASH_FID(pUL, 0x80000000);
  8623. if (FidHash == 0) {
  8624. DPRINT(4, "Warning - FidHash is zero.\n");
  8625. }
  8626. *HashValue = (ULONGLONG) (NameHash + FidHash);
  8627. if (*HashValue == 0) {
  8628. DPRINT(0, "Error - HashValue is zero.\n");
  8629. }
  8630. //
  8631. // Make sure the FileName has a unicode null at the end before we print it. This is
  8632. //
  8633. MaxNChars = Name->MaximumLength / sizeof(WCHAR);
  8634. if (Name->Buffer[NChars-1] != UNICODE_NULL) {
  8635. if (NChars >= MaxNChars) {
  8636. //
  8637. // No NULL at the end of the name and no room to add one.
  8638. //
  8639. DPRINT4(4, "++ HV: %08x, Hfid: %08x, Fid: %08x %08x, Hnam: %08x, Name: cannot print\n",
  8640. (NameHash+FidHash), FidHash, PRINTQUAD(*Fid), NameHash);
  8641. return;
  8642. }
  8643. Name->Buffer[NChars] = UNICODE_NULL;
  8644. }
  8645. DPRINT5(4, "++ HV: %08x, Hfid: %08x, Fid: %08x %08x, Hnam: %08x, Name: %ws\n",
  8646. (NameHash+FidHash), FidHash, PRINTQUAD(*Fid), NameHash, Name->Buffer);
  8647. }
  8648. VOID
  8649. JrnlFilterPrintJacket(
  8650. PGENERIC_HASH_TABLE Table,
  8651. PVOID Buffer
  8652. )
  8653. {
  8654. JrnlFilterPrint(5, Table, Buffer);
  8655. }
  8656. VOID
  8657. JrnlFilterPrint(
  8658. ULONG PrintSev,
  8659. PGENERIC_HASH_TABLE Table,
  8660. PVOID Buffer
  8661. )
  8662. /*++
  8663. Routine Description:
  8664. print out a hash table entry.
  8665. Arguments:
  8666. Table -- ptr to a hash table struct.
  8667. Buffer -- ptr to entry.
  8668. Return Value:
  8669. none.
  8670. --*/
  8671. {
  8672. #undef DEBSUB
  8673. #define DEBSUB "JrnlFilterPrint:"
  8674. PFILTER_TABLE_ENTRY Entry = (PFILTER_TABLE_ENTRY)Buffer;
  8675. DPRINT3(PrintSev, "Addr: %08x, HashValue: %08x RC: %d\n",
  8676. Entry,
  8677. Entry->HashEntryHeader.HashValue,
  8678. Entry->HashEntryHeader.ReferenceCount);
  8679. DPRINT2(PrintSev, "List Entry - %08x, %08x\n",
  8680. Entry->HashEntryHeader.ListEntry.Flink,
  8681. Entry->HashEntryHeader.ListEntry.Blink);
  8682. DPRINT2(PrintSev, "FileId: %08x %08x, ParentFileId: %08x %08x\n",
  8683. PRINTQUAD(Entry->DFileID), PRINTQUAD(Entry->DParentFileID));
  8684. DPRINT2(PrintSev, "Replica Number: %d, FileName: %ws\n",
  8685. Entry->DReplicaNumber, Entry->UFileName.Buffer);
  8686. DPRINT3(PrintSev, "Sequence Number: %d, Transition Type: %d, FrsVsn: %08x %08x\n",
  8687. READ_FILTER_SEQ_NUMBER(Entry),
  8688. READ_FILTER_TRANS_TYPE(Entry),
  8689. PRINTQUAD(Entry->FrsVsn));
  8690. DPRINT4(PrintSev, "Childhead Entry - %08x, %08x Child Link Entry - %08x, %08x\n",
  8691. Entry->ChildHead.Flink, Entry->ChildHead.Blink,
  8692. Entry->ChildEntry.Flink, Entry->ChildEntry.Blink);
  8693. }
  8694. #undef PrintSev
  8695. VOID
  8696. JrnlChangeOrderPrint(
  8697. PGENERIC_HASH_TABLE Table,
  8698. PVOID Buffer
  8699. )
  8700. /*++
  8701. Routine Description:
  8702. print out a hash table entry.
  8703. Arguments:
  8704. Table -- ptr to a hash table struct. (unused)
  8705. Buffer -- ptr to entry.
  8706. Return Value:
  8707. none.
  8708. --*/
  8709. {
  8710. #undef DEBSUB
  8711. #define DEBSUB "JrnlChangeOrderPrint:"
  8712. FRS_PRINT_TYPE(0, (PCHANGE_ORDER_ENTRY)Buffer);
  8713. }
  8714. VOID
  8715. DumpUsnRecord(
  8716. IN ULONG Severity,
  8717. IN PUSN_RECORD UsnRecord,
  8718. IN ULONG ReplicaNumber,
  8719. IN ULONG LocationCmd,
  8720. IN PCHAR Debsub,
  8721. IN ULONG uLineNo
  8722. )
  8723. /*++
  8724. Routine Description:
  8725. This routine prints out the contents of a NTFS USN Journal Record.
  8726. Arguments:
  8727. Severity -- Severity level for print. (See debug.c, debug.h)
  8728. UsnRecord - The address of the UsnRecord.
  8729. ReplicaNumber - ID number of the replica set
  8730. LocationCmd - Decoded location command for this USN record.
  8731. Debsub -- Name of calling subroutine.
  8732. uLineno -- Line number of caller
  8733. MACRO: DUMP_USN_RECORD, DUMP_USN_RECORD2
  8734. Return Value:
  8735. none.
  8736. --*/
  8737. {
  8738. #undef DEBSUB
  8739. #define DEBSUB "DumpUsnRecord:"
  8740. ULONG Len;
  8741. CHAR TimeString[TIME_STRING_LENGTH];
  8742. CHAR Tstr1[200];
  8743. WCHAR FName[MAX_PATH+1];
  8744. CHAR FlagBuf[120];
  8745. //
  8746. // Don't print this
  8747. //
  8748. if (!DoDebug(Severity, Debsub)) {
  8749. return;
  8750. }
  8751. //
  8752. // Get hh:mm:ss.
  8753. //
  8754. FileTimeToStringClockTime((PFILETIME) &UsnRecord->TimeStamp, TimeString);
  8755. //
  8756. // Put file name in a buffer so we can put a null at the end of it.
  8757. //
  8758. Len = min((ULONG)UsnRecord->FileNameLength, MAX_PATH);
  8759. CopyMemory(FName, UsnRecord->FileName, Len);
  8760. FName[Len/2] = UNICODE_NULL;
  8761. //
  8762. // Build the trace record.
  8763. //
  8764. _snprintf(Tstr1, sizeof(Tstr1),
  8765. ":U: %08x %d Fid %08x %08x PFid %08x %08x At %08x Sr %04x %s %7s %ws",
  8766. (ULONG)UsnRecord->Usn,
  8767. ReplicaNumber,
  8768. PRINTQUAD(UsnRecord->FileReferenceNumber),
  8769. PRINTQUAD(UsnRecord->ParentFileReferenceNumber),
  8770. UsnRecord->FileAttributes,
  8771. UsnRecord->SourceInfo,
  8772. TimeString,
  8773. CoLocationNames[LocationCmd],
  8774. FName
  8775. );
  8776. Tstr1[sizeof(Tstr1)-1] = '\0';
  8777. DebPrint(Severity, "%s\n", Debsub, uLineNo, Tstr1);
  8778. //
  8779. // Output reason string on sep line.
  8780. //
  8781. FrsFlagsToStr(UsnRecord->Reason, UsnReasonNameTable, sizeof(FlagBuf), FlagBuf);
  8782. _snprintf(Tstr1, sizeof(Tstr1),
  8783. ":U: Fid %08x %08x Reason %08x Flags [%s]",
  8784. PRINTQUAD(UsnRecord->FileReferenceNumber),
  8785. UsnRecord->Reason,
  8786. FlagBuf
  8787. );
  8788. Tstr1[sizeof(Tstr1)-1] = '\0';
  8789. DebPrint(Severity, "%s\n", Debsub, uLineNo, Tstr1);
  8790. //
  8791. // Output file attributes string on sep line.
  8792. //
  8793. FrsFlagsToStr(UsnRecord->FileAttributes, FileAttrFlagNameTable, sizeof(FlagBuf), FlagBuf);
  8794. _snprintf(Tstr1, sizeof(Tstr1),
  8795. ":U: Fid %08x %08x Attrs %08x Flags [%s]",
  8796. PRINTQUAD(UsnRecord->FileReferenceNumber),
  8797. UsnRecord->FileAttributes,
  8798. FlagBuf
  8799. );
  8800. Tstr1[sizeof(Tstr1)-1] = '\0';
  8801. DebPrint(Severity, "%s\n", Debsub, uLineNo, Tstr1);
  8802. }
  8803. VOID
  8804. JrnlDumpVmeFilterTable(
  8805. VOID
  8806. )
  8807. /*++
  8808. Routine Description:
  8809. Dump the VME filter table
  8810. Arguments:
  8811. None.
  8812. Return Value:
  8813. None.
  8814. --*/
  8815. {
  8816. #undef DEBSUB
  8817. #define DEBSUB "JrnlDumpVmeFilterTable:"
  8818. ForEachListEntry( &VolumeMonitorStopQueue, VOLUME_MONITOR_ENTRY, ListEntry,
  8819. DPRINT(4, "\n");
  8820. DPRINT1(4, "==== start of VME Filter table dump for %ws ===========\n", pE->FSVolInfo.VolumeLabel);
  8821. DPRINT(4, "\n");
  8822. if (pE->FilterTable != NULL) {
  8823. // GHT_DUMP_TABLE(5, pE->FilterTable);
  8824. NOTHING;
  8825. } else {
  8826. DPRINT(4, "Filter table freed\n");
  8827. }
  8828. DPRINT(4, "\n");
  8829. DPRINT(4, "============== end of Vme Filter table dump ============\n");
  8830. DPRINT(4, "\n");
  8831. );
  8832. }
  8833. /*++
  8834. The two tables below describe all the possible outcomes of a directory
  8835. rename operation. The case numbers in parens are further described below.
  8836. As directory changes appear in the USN data stream the filter table for
  8837. the volume is updated immediately, even in the case of subtree renames.
  8838. This allows us to accurately filter subsequent USN records and associate
  8839. them with the correct replica set.
  8840. (R.S. means Replica Set)
  8841. Parent
  8842. FileID FileID
  8843. Filter Entry Filter Entry Interpretation : Action
  8844. ------------ ------------ -------------- ------
  8845. Absent Absent Wasn't in R.S., Still Isn't: Skip
  8846. (1) Absent Present Wasn't in R.S., Now Is : Create entry (MOVEIN)
  8847. (2) Present Absent Was in R.S. , Now Isn't : MOVEOUT
  8848. Present Present Was in R.S. , Still Is : Eval Further
  8849. The last case above requires further evaluation to determine if the
  8850. directory has moved from one directory to another or from one replica
  8851. set to another.
  8852. FileID Compare R.S. compare
  8853. between Filter Between File
  8854. Entry & USn Rec and Parent Interpretation : Action
  8855. -------------- ----------- -------------- ------
  8856. (3) Same Parent Same R.S. File stayed in same Dir.: Check Name
  8857. Same Parent Diff. R.S. Error, shouldn't happen :
  8858. (4) Diff. Parent Same R.S. Ren to diff dir in R.S. : Update Parent Fid (MOVEDIR)
  8859. (5) Diff. Parent Diff. R.S. Rename to diff R.s. : MOVERS
  8860. For directory renames there are 5 cases to consider:
  8861. 1. MOVEIN - Rename of a directory into a replica set. The filter table lookup
  8862. failed on the FID but the parent FID is in the table. We add an entry for
  8863. this DIR to the filter table. The update process must enumerate the
  8864. subtree on disk and evaluate each file for inclusion into the tree,
  8865. updating the Filter table as it goes. We may see file operations several
  8866. levels down from the rename point and have no entry in the Filter Table so
  8867. we pitch those records. The sub-tree enumeration process must handle this
  8868. as it incorporates each file into the IDTable.
  8869. 2. MOVEOUT - Parent FID change to a dir OUTSIDE of any replica set on the
  8870. volume. This is a delete of an entire subtree in the Replica set. We
  8871. enumerate the subtree bottom-up, sending dir level change orders to the
  8872. update process as we delete the filter table entries.
  8873. 3. Name change only. The Parent FID in the USN record matches the
  8874. Parent FID in the Filter entry for the directory.
  8875. Update the name in the filter entry.
  8876. 4. MOVEDIR - Parent FID in USN record is different from the parent FID in the
  8877. Filter entry so this is a rename to a dir in the SAME replica set.
  8878. Update the parent FID in the filter enty and Filename too.
  8879. 5. MOVERS - The Parent FID in the USN record is associated with a directory
  8880. in a DIFFERENT replica set on the volume. Update the parent FID, the
  8881. replica ptr, and name in the filter entry. This is a move of an entire
  8882. subtree from one replica set to another. We enumerate the subtree
  8883. top-down, sending dir level change orders to the update process as we
  8884. update the replica set information in the filter table entries.
  8885. --*/
  8886. /*
  8887. Note: doc: - update this description
  8888. Removing a sub-tree from a replica set
  8889. This is a multi-stage process that occurs when a directory is renamed out of
  8890. the replica set. This is managed by the update process.
  8891. 1. The Journal Process has marked the filter entry for the renamed directory
  8892. as DELETED. This ensures that operations on any files below this directory
  8893. are filtered out by the Journal process. A change order describing the subtree
  8894. delete is queued to the Replica Change Order process queue.
  8895. 2. When the update process encounters the subtree delete change order it walks
  8896. thru the subtree (using either the directory entries in the Filter Hash Table or
  8897. the Replica IDTable) breadthfirst from the leaves of the subtree to the subtree
  8898. to the subtree root. For each file or directory it tombstones the entry in the
  8899. IDTable and builds a delete change order to send to its outbound partners. In
  8900. addtion it deletes the entries from the volume filter table and the DIRTable as
  8901. it progresses. If a crash or shutdown request ocurrs during this operation
  8902. the process continues with the remaining entries when it resumes.
  8903. 3. The operation completes when the root of the sub-tree is processed.
  8904. Adding a sub-tree (X) to a replica set
  8905. This occurs when directory X is renamed into a replica set. It is managed by
  8906. the Update Process.
  8907. 1. The Journal Process creates a Filter entry for the sub-tree root (X) and
  8908. queues a change order to the update process. At this point the Journal process
  8909. has no knowledge of what is beneath this directory. If it sees an operation on
  8910. a direct child of X it builds a change order and queues it to the update
  8911. process. In addition if it sees a directory create/delete or rename operation
  8912. on a direct child of X it increments sequence number in the Filter Table Entry
  8913. for X and creates a new Filter Table entry as appropriate.
  8914. 2. The update process takes the "sub-tree add" change order and processes the
  8915. sub-tree starting at X, enumerating the subtree down to the leaves in a breadth
  8916. first order. For each entry in the subtree it creates an IDTable entry for the
  8917. file or directory. If a directory it also creates a DIRTable entry and adds an
  8918. entry to the Filter Table. As each Filter Table entry is made the Journal
  8919. subsystem will begin sending change orders to the update process for any new
  8920. file operations under the directory. For each directory, the filter table entry
  8921. is made first, if it doesn't already exist. then the update process enumerates
  8922. the directory contents. If new direct children are created while the
  8923. enumeration is in process change orders are queued to the update process. If
  8924. the USN on the change order is less than or equal to the USN saved when the file
  8925. was first processed then the change order is discarded. Otherwise the change
  8926. occurred after the point when the file was processed.
  8927. It is possible for the update process to receive update or delete
  8928. change orders for files that are not yet present in the IDTable because the
  8929. enumeration hasn't reached them yet. For files or dirs created "behind" the
  8930. enumeration process point, change orders are queued that will pick them up.
  8931. The first problem is solved by having the update process stop processing
  8932. further change orders on this replica set until the enumeration is complete.
  8933. */
  8934. #if 0
  8935. /*
  8936. Recovery mode processing for the NTFS journal.
  8937. Objective: When FRS or the system crashes we have lost the write filter
  8938. the journal code uses to filter out FRS related writes to files.
  8939. We need to reliably identify those USN records that were caused by FRS
  8940. so we don't propagate out a file that was being installed at the time
  8941. of the crash. Such a file will undoubtedly be corrupt and will get sent
  8942. to every member of the replica set.
  8943. In the case of system crashes, NTFS inserts close records into the journal
  8944. for any files that were open at the time of the crash. NTFS marks those
  8945. USN records with a flag that indicates they were written at startup. In
  8946. addtion a user app can force a close record to be written to the journal
  8947. through an FSCTL call. If this happens and no futher modification is made
  8948. to the file then no close record will be written by NTFS when the last handle
  8949. on the file is closed or at startup.
  8950. In the case of FRS service crashes or externally generated process Kills
  8951. FRS will fail to perform a clean shutdown. As each change order is processed
  8952. it is marked as work in process. When the change order either retires or
  8953. goes into a retry state the work in process flag is cleared. From this
  8954. information we can determine those files that may have had FRS generated
  8955. writes in process when the service died.
  8956. The flow is as follows:
  8957. At replica startup scan the inbound log and build a hash table (PendingCOTable)
  8958. of all entries with the following information kept with each entry:
  8959. File FID
  8960. File GUID
  8961. Local/Remote CO flag
  8962. CO Inprocess flag
  8963. Usn index of most recent USN record that contributed to the local CO.
  8964. There could be multiple COs pending for the same file. OR the state of
  8965. the Inprocess flags and save the state of the most recent CO's local/rmt flag.
  8966. The PendingCoTable continues to exist after startup so we can evaluate
  8967. dependencies between newly arrived COs and COs in a retry state in the inlog.
  8968. In addition:
  8969. The Largest NTFS USN for any local inbound CO is saved in RecoveryUsnStart.
  8970. The current end of the USN journal is saved in RecoveryUsnEnd.
  8971. Both are saved in the Replica struct.
  8972. ULONGLONG FileReferenceNumber;
  8973. ULONGLONG ParentFileReferenceNumber;
  8974. USN Usn;
  8975. LARGE_INTEGER TimeStamp;
  8976. */
  8977. Start USN read at Replica->RecoveryUsnStart.
  8978. if (UsnRecord->Usn < Replica->RecoveryUsnEnd) {
  8979. if (IsNtfsRecoveryClose(UsnRecord)) {
  8980. //
  8981. // assume that all the file data may not have been written out
  8982. // so the file may be corrupt.
  8983. //
  8984. PendingCo = InPendingCoTable(Replica->PendingCoTable,
  8985. &UsnRecord->FileReferenceNumber);
  8986. if ((PendingCo == NULL) || (PendingCo->LocalCo)) {
  8987. //
  8988. // The file was being written locally at the time of the crash.
  8989. // It is probably corrupt.
  8990. // Create a file refresh change order and send it to one of our
  8991. // inbound partners to get their version of the file.
  8992. // Note: This request is queued so the first inbound partner to
  8993. // join will get it.
  8994. // Note: Since we are reading after RecoveryUsnStart the USN
  8995. // should not be less than what we see in the inlog.
  8996. //
  8997. FRS_ASSERT(UsnRecord->Usn >= PendingCo->Usn);
  8998. RequestRefreshCo(Replica, &UsnRecord->FileReferenceNumber);
  8999. goto GET_NEXT_USN_RECORD;
  9000. } else {
  9001. //
  9002. // There is a pending remote CO for this file. It will install
  9003. // a new copy of the file.
  9004. //
  9005. // Note: if there are multiple remote COs in the process queue
  9006. // the last one may not be the one that is finally accepted.
  9007. // But we need to be sure that none of the local COs that are pending
  9008. // are allowed propagate.
  9009. //
  9010. // If this CO was in process at the time of the crash and the
  9011. // CO was already propagated to the outlog, the staging file may
  9012. // be corrupted. Delete the CO from the outlog and queue a
  9013. // refresh request to the inbound partner.
  9014. //
  9015. // Note: We could still have a corrupted file. If it was locally
  9016. // changed and we processed the CO, updating the IDTable and
  9017. // inserting the CO in the outlog but a crash still resulted
  9018. // in not all dirty data pages being flushed.
  9019. // WHEN WE GEN THE LOCAL STAGE FILE CAN WE FORCE A FLUSH?
  9020. }
  9021. if (IsFileFrsStagingFile(UsnRecord)) {
  9022. //
  9023. // This is an FRS staging file. It may be corrupt.
  9024. // Delete it and regenerate it by setting a new start state in
  9025. // the related CO. (CO Guid is derived from the name of the file).
  9026. // There may not be a CO for this file if the inlog record has
  9027. // been deleted. There may still be a CO in the outlog though so
  9028. // just delete the staging file, forcing it to be regenerated on
  9029. // demand from the local file.
  9030. //
  9031. // If the local file is suspect then we need to refresh it from
  9032. // an inbound partner so delete the CO in the outlog and let the
  9033. // refresh CO PROPAGATE as needed.
  9034. //
  9035. // Note that the IDTable entry may already have been updated because
  9036. // this CO retired. That would cause the refresh CO to fail to
  9037. // be accepted. Put some state in the refresh CO so when it comes
  9038. // back if that state matches the state in the IDTable entry then
  9039. // we know to accepr the refresh CO regardless of other reconcile
  9040. // info. If however another local or remote CO has updated the
  9041. // file in the interim then the refresh CO is stale and should be
  9042. // discarded.
  9043. //
  9044. SetPendingCoState(SeqNum, PendingCo->LocalCo ? IBCO_STAGING_REQUESTED :
  9045. IBCO_FETCH_REQUESTED);
  9046. }
  9047. goto GET_NEXT_USN_RECORD;
  9048. } else {
  9049. //
  9050. // Read IDTable entry for this file and get the FileUsn.
  9051. // This is the USN associated with the most recent operation on the
  9052. // file that we have handled.
  9053. //
  9054. if (UsnRecord->Usn <= IDTableRec->FileUsn) {
  9055. //
  9056. // This USN record is for an operation that occurred
  9057. // prior to the last action processed related to the file.
  9058. //
  9059. goto GET_NEXT_USN_RECORD;
  9060. } else {
  9061. //
  9062. // This USN record could not have come from FRS because if it did and there was no entry for
  9063. // a change order on the file in the Inbound Log then the LastFileUsn check above would have caught it.
  9064. // This is true because the inbound log record is only deleted after the file is updated and the LastFileUsn
  9065. // is saved in the Jet record for the file.
  9066. // Even if there is a change order pending in the Inbound log, FRS could not have started processing it
  9067. // because the USN Record is not marked as written by NTFS at recovery which would be the case
  9068. // if FRS had been in the middle of an update when the system crashed. Therefore,
  9069. //
  9070. //this is not an FRS generated USN record so process the USN record normally.
  9071. }
  9072. }
  9073. }
  9074. /*
  9075. This solution solves the problem of FRS getting part way thru a file update
  9076. when the system crashes. It must not process the USN record because then it
  9077. would propagate a corrupted file out to all the other members. It also has
  9078. the nice property of refreshing a file from another partner that a user was
  9079. writing at the time of the crash. The User has lost their changes but at
  9080. least the file is back in an uncorrupted state.
  9081. */
  9082. #endif