Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

11062 lines
357 KiB

  1. /*++
  2. Copyright (c) 1997-1999 Microsoft Corporation
  3. Module Name:
  4. journal.c
  5. Abstract:
  6. This module contains routines to process the NTFS Volume Journal for the
  7. File Replication service. It uses a single thread with an I/O completion
  8. port to post reads to all volume journals we need to monitor.
  9. As USN buffers are filled they a queued to a JournalProcessQueue for
  10. further processing. The Journal Read Thread gets a free buffer from
  11. the free list and posts another read to the volume journal.
  12. A thread pool processes the USN buffers from the JournalprocessQueue.
  13. Author:
  14. David A. Orbits (davidor) 6-Apr-1997
  15. Environment:
  16. User Mode Service
  17. Revision History:
  18. // JOURNAL RECORD FORMAT
  19. //
  20. // The initial Major.Minor version of the Usn record will be 1.0.
  21. // In general, the MinorVersion may be changed if fields are added
  22. // to this structure in such a way that the previous version of the
  23. // software can still correctly the fields it knows about. The
  24. // MajorVersion should only be changed if the previous version of
  25. // any software using this structure would incorrectly handle new
  26. // records due to structure changes.
  27. //
  28. // see \nt\public\sdk\inc\ntioapi.h for the USN_RECORD declaration.
  29. //
  30. #define USN_REASON_DATA_OVERWRITE (0x00000001)
  31. #define USN_REASON_DATA_EXTEND (0x00000002)
  32. #define USN_REASON_DATA_TRUNCATION (0x00000004)
  33. #define USN_REASON_NAMED_DATA_OVERWRITE (0x00000010)
  34. #define USN_REASON_NAMED_DATA_EXTEND (0x00000020)
  35. #define USN_REASON_NAMED_DATA_TRUNCATION (0x00000040)
  36. #define USN_REASON_FILE_CREATE (0x00000100)
  37. #define USN_REASON_FILE_DELETE (0x00000200)
  38. #define USN_REASON_EA_CHANGE (0x00000400)
  39. #define USN_REASON_SECURITY_CHANGE (0x00000800)
  40. #define USN_REASON_RENAME_OLD_NAME (0x00001000) // rename
  41. #define USN_REASON_RENAME_NEW_NAME (0x00002000)
  42. #define USN_REASON_INDEXABLE_CHANGE (0x00004000)
  43. #define USN_REASON_BASIC_INFO_CHANGE (0x00008000)
  44. #define USN_REASON_HARD_LINK_CHANGE (0x00010000)
  45. #define USN_REASON_COMPRESSION_CHANGE (0x00020000)
  46. #define USN_REASON_ENCRYPTION_CHANGE (0x00040000)
  47. #define USN_REASON_OBJECT_ID_CHANGE (0x00080000)
  48. #define USN_REASON_REPARSE_POINT_CHANGE (0x00100000)
  49. #define USN_REASON_STREAM_CHANGE (0x00200000) // named streame cre, del or ren.
  50. #define USN_REASON_CLOSE (0x80000000)
  51. --*/
  52. #define UNICODE 1
  53. #define _UNICODE 1
  54. #include <ntreppch.h>
  55. #pragma hdrstop
  56. #undef DEBSUB
  57. #define DEBSUB "journal:"
  58. #include <frs.h>
  59. #include <genhash.h>
  60. #include <tablefcn.h>
  61. #include <eventlog.h>
  62. #include <perrepsr.h>
  63. #pragma warning( disable:4102) // unreferenced label
  64. //
  65. // The default for Journal Max Size now comes from the registry.
  66. #define JRNL_DEFAULT_ALLOC_DELTA (1*1024*1024)
  67. #define JRNL_USN_SAVE_POINT_INTERVAL (16*1024)
  68. #define JRNL_CLEAN_WRITE_FILTER_INTERVAL (60*1000) /* once a minute */
  69. #define NumberOfJounalBuffers 3
  70. #define FRS_CANCEL_JOURNAL_READ 0xFFFFFFFF
  71. #define FRS_PAUSE_JOURNAL_READ 0xFFFFFFF0
  72. //
  73. // Every 'VSN_SAVE_INTERVAL' VSNs that are handed out, save the state in the
  74. // config record. On restart we take the largest value and add
  75. // 2*(VSN_SAVE_INTERVAL+1) to it so if a crash occurred we ensure that it
  76. // never goes backwards.
  77. //
  78. // A Vsn value of 0 means there is no Vsn. This convention is required
  79. // by FrsPendingInVVector().
  80. //
  81. // MUST BE Power of 2.
  82. #define VSN_SAVE_INTERVAL 0xFF
  83. #define VSN_RESTART_INCREMENT (2*(VSN_SAVE_INTERVAL+1))
  84. //
  85. // Deactivate the Volume Monitor Entry by setting IoActive False, pulling
  86. // it off the _Queue and queueing it to the VolumeMonitorStopQueue.
  87. // Also store an error status. This code assumes you have already ACQUIRED
  88. // THE LOCK ON the VolumeMonitorQueue.
  89. //
  90. #define VmeDeactivate(_Queue, _pVme, _WStatus) \
  91. FrsRtlRemoveEntryQueueLock(_Queue, &_pVme->ListEntry); \
  92. _pVme->IoActive = FALSE; \
  93. _pVme->WStatus = _WStatus; \
  94. /*_pVme->ActiveReplicas -= 1; */ \
  95. DPRINT2(4, "++ vmedeactivate -- onto stop queue %ws (%08x)\n", \
  96. _pVme->FSVolInfo.VolumeLabel, _pVme); \
  97. FrsRtlInsertTailQueue(&VolumeMonitorStopQueue, &_pVme->ListEntry); \
  98. ReleaseVmeRef(_pVme);
  99. //
  100. // The Journal free buffer queue holds the free buffers for journal reads.
  101. //
  102. FRS_QUEUE JournalFreeQueue;
  103. //
  104. // The Journal process queue holds the list of journal buffers with
  105. // data to process.
  106. //
  107. FRS_QUEUE JournalProcessQueue;
  108. //
  109. // The Journal I/O completion port. We keep a read outstanding on each
  110. // NTFS volume monitored.
  111. //
  112. HANDLE JournalCompletionPort;
  113. //
  114. // The handle to the Journal read thread.
  115. //
  116. HANDLE JournalReadThreadHandle = NULL;
  117. //
  118. // Set this flag to stop any further issuing of journal reads.
  119. //
  120. volatile BOOL KillJournalThreads = FALSE;
  121. //
  122. // This is the volume monitor queue. The Journal read thread waits until
  123. // this queue goes non-empty before it waits on the completion port. This
  124. // way it knows the completion port exists without having to poll.
  125. //
  126. FRS_QUEUE VolumeMonitorQueue;
  127. //
  128. // When I/O is Stoped on a given journal the Journal read thread places
  129. // the volume monitor entry on the Stop queue.
  130. //
  131. FRS_QUEUE VolumeMonitorStopQueue;
  132. //
  133. // This is the control queue for all the volume monitor entry change order
  134. // queues.
  135. //
  136. FRS_QUEUE FrsVolumeLayerCOList;
  137. FRS_QUEUE FrsVolumeLayerCOQueue;
  138. //
  139. // This is the expected version number from the USN journal.
  140. //
  141. USHORT ConfigUsnMajorVersion = 2;
  142. //
  143. // This is the count of outstanding journal read requests.
  144. //
  145. ULONG JournalActiveIoRequests = 0;
  146. //
  147. // Change order delay in aging cache. (milliseconds)
  148. //
  149. ULONG ChangeOrderAgingDelay;
  150. //
  151. // This lock is held by JrnlSetReplicaState() when moving a replica
  152. // between lists.
  153. //
  154. CRITICAL_SECTION JrnlReplicaStateLock;
  155. //
  156. // Lock to protect the child lists in the Filter Table. (must be pwr of 2)
  157. // Instead of paying the overhead of having one per node we just use an array
  158. // to help reduce contention. We use the ReplicaNumber masked by the lock
  159. // table size as the index.
  160. //
  161. // Acquire the lock on the ReplicaSet Filter table Child List before
  162. // inserting or removing a child from the list.
  163. //
  164. CRITICAL_SECTION JrnlFilterTableChildLock[NUMBER_FILTER_TABLE_CHILD_LOCKS];
  165. //
  166. // The list of all Replica Structs active, stopped and faulted.
  167. //
  168. extern FRS_QUEUE ReplicaListHead;
  169. extern FRS_QUEUE ReplicaStoppedListHead;
  170. extern FRS_QUEUE ReplicaFaultListHead;
  171. //
  172. // This is used to init our new value for FrsVsn.
  173. //
  174. extern ULONGLONG MaxPartnerClockSkew;
  175. //
  176. // Global sequence number. Inited here with first Vme VSN.
  177. //
  178. extern CRITICAL_SECTION GlobSeqNumLock;
  179. extern ULONGLONG GlobSeqNum;
  180. //
  181. // The table below describes what list the Replica struct should be on for
  182. // a given state as well as the state name.
  183. //
  184. REPLICA_SERVICE_STATE ReplicaServiceState[] = {
  185. {NULL, "ALLOCATED"},
  186. {&ReplicaListHead, "INITIALIZING"},
  187. {&ReplicaListHead, "STARTING"},
  188. {&ReplicaListHead, "ACTIVE"},
  189. {&ReplicaListHead, "PAUSE1"},
  190. {&ReplicaListHead, "PAUSING (2)"},
  191. {&ReplicaListHead, "PAUSED"},
  192. {&ReplicaListHead, "STOPPING"},
  193. {&ReplicaStoppedListHead, "STOPPED"},
  194. {&ReplicaFaultListHead, "ERROR"},
  195. {&ReplicaFaultListHead, "JRNL_WRAP_ERROR"},
  196. {NULL, "REPLICA_DELETED"},
  197. {&ReplicaFaultListHead, "MISMATCHED_VOLUME_SERIAL_NO"},
  198. {&ReplicaFaultListHead, "MISMATCHED_REPLICA_ROOT_OBJECT_ID"},
  199. {&ReplicaFaultListHead, "MISMATCHED_REPLICA_ROOT_FILE_ID"},
  200. {&ReplicaFaultListHead, "MISMATCHED_JOURNAL_ID"}
  201. };
  202. //
  203. // The following struct is used to encapsulate the context of a change
  204. // order request so it can be passed as a context parameter in an
  205. // enumerated call.
  206. //
  207. typedef struct _CHANGE_ORDER_PARAMETERS_ {
  208. PREPLICA OriginalReplica; // Original Replica Set
  209. PREPLICA NewReplica; // The New Replica set in the case of a rename.
  210. ULONGLONG NewParentFid; // The new parent FID in case of a rename.
  211. ULONG NewLocationCmd; // MovDir, MovRs, ...
  212. PUSN_RECORD UsnRecord; // Usn Record that triggered the change order
  213. // creation (i.e. the operation on the root of the subtree).
  214. PFILTER_TABLE_ENTRY OrigParentFilterEntry; // Original parent filter entry of root filter entry
  215. PFILTER_TABLE_ENTRY NewParentFilterEntry; // Current/New parent filter entry of root filter entry
  216. } CHANGE_ORDER_PARAMETERS, *PCHANGE_ORDER_PARAMETERS;
  217. typedef struct _OP_FIELDS_ {
  218. unsigned Op1 : 4;
  219. unsigned Op2 : 4;
  220. unsigned Op3 : 4;
  221. unsigned Op4 : 4;
  222. unsigned Op5 : 4;
  223. unsigned Op6 : 4;
  224. unsigned Op7 : 4;
  225. unsigned Op8 : 4;
  226. } OP_FIELDS, *POP_FIELDS;
  227. typedef struct _CO_LOCATION_CONTROL_CMD_ {
  228. union {
  229. OP_FIELDS OpFields;
  230. ULONG UlongOpFields;
  231. } u1;
  232. } CO_LOCATION_CONTROL_CMD;
  233. #define OpInval 0 // Invalid op (only check for Op1, else done).
  234. #define OpEvap 1 // Evaporate the change order
  235. #define OpNRs 2 // update New Replica Set and New Directory.
  236. #define OpNDir 3 // Update New Directory
  237. #define OpNSt 4 // Update New State stored in next nibble.
  238. #define NSCre CO_LOCATION_CREATE // Create a File or Dir (New FID Generated)
  239. #define NSDel CO_LOCATION_DELETE // Delete a file or Dir (FID retired)
  240. #define NSMovIn CO_LOCATION_MOVEIN // Rename into a R.S.
  241. #define NSMovIn2 CO_LOCATION_MOVEIN2 // Rename into a R.S. from a prev MOVEOUT
  242. #define NSMovOut CO_LOCATION_MOVEOUT // Rename out of any R.S.
  243. #define NSMovRs CO_LOCATION_MOVERS // Rename from one R.S. to another R.S.
  244. #define NSMovDir CO_LOCATION_MOVEDIR // Rename from one dir to another (Same R.S.)
  245. #define NSMax CO_LOCATION_NUM_CMD // No prior Location cmd. Prior change
  246. // Order had a content cmd.
  247. #define NSNoLocationCmd CO_LOCATION_NO_CMD
  248. PCHAR CoLocationNames[]= {"Create" , "Delete", "Movein" , "Movein2",
  249. "Moveout", "Movers", "MoveDir", "NoCmd"};
  250. //
  251. // The following dispatch table specifies what operations are performed when
  252. // a second change arrives for a given FID and a prior change order is still
  253. // pending. The states correspond to the change order location command that
  254. // is to be executed by the update process. Each entry in the dispatch table
  255. // is a ULONG composed of up to 8 operation nibbles which are executed in a loop.
  256. // The operations could evaporate the change order (e.g. a create followed by
  257. // a delete. The create was pending and the delete came in so just blow off
  258. // the change order. The operation could update the parent directory or the
  259. // replica set the directory lives in, or the location command (and thus the
  260. // state) that is to be performed. The MovIn2 state is not a unique input,
  261. // rather it is a special state that lets us remember there was a prior MovOut
  262. // done so if the MovIn2 is followed by a Del or a MovOut we know there is still
  263. // work to be done in the database so we can't evaporate the change order.
  264. // See note (a) below.
  265. //
  266. CO_LOCATION_CONTROL_CMD ChangeOrderLocationStateTable[NSMax+1][NSMax] = {
  267. // Followed by Second Op On Same Fid
  268. //
  269. // Cre Del MovIn MovIn2 MovOut MovRs MovDir
  270. // First
  271. // Op On
  272. // Fid
  273. //Cre
  274. {{0}, {OpEvap}, {0}, {0}, {OpEvap }, {OpNRs}, {OpNDir}},
  275. //Del
  276. {{0}, {0}, {0}, {0}, {0}, {0}, {0}},
  277. //MovIn
  278. {{0}, {OpEvap}, {0}, {0}, {OpEvap }, {OpNRs}, {OpNDir}},
  279. //MovIn2(a)
  280. {{0}, {OpNSt,NSDel}, {0}, {0}, {OpNSt,NSMovOut}, {OpNRs}, {OpNDir}},
  281. //MovOut
  282. {{0}, {0}, {OpNRs,OpNSt,NSMovIn2},
  283. {0}, {0}, {0}, {0}},
  284. //MovRs
  285. {{0}, {OpNSt,NSDel}, {0}, {0}, {OpNSt,NSMovOut}, {OpNRs}, {OpNDir}},
  286. //MovDir
  287. {{0}, {OpNSt,NSDel}, {0}, {0}, {OpNSt,NSMovOut}, {OpNRs,OpNSt,NSMovRs}, {OpNDir}},
  288. //<NONE>
  289. {{OpNRs, OpNSt,NSCre},
  290. {OpNSt,NSDel}, {OpNRs,OpNSt,NSMovIn},
  291. {0}, {OpNSt,NSMovOut}, {OpNRs,OpNSt,NSMovRs}, {OpNDir,OpNSt,NSMovDir}}
  292. };
  293. // (a) The MovIn2 state is artificially introduced to deal with the sequence
  294. // of MovOut followed by a MovIn. There are two problems here. One is that
  295. // many changes could have happened to the file or dir while it was outside
  296. // the R.S. since we were not monitoring it. Consequently the update process
  297. // must do a complete evaluation of the the file/dir properties so we don't
  298. // fail to replicate some change. The second problem is that in the normal
  299. // case a MovIn followed by either a delete or a MovOut results in evaporating
  300. // the change order. However if a MovOut has occurred in the past followed
  301. // by a MovIn we cannot assume that the file or Dir was never in the R.S.
  302. // to begin with. Consider the sequence of MovOut, MovIn, Del. Without the
  303. // MovIn2 state the MovIn followed by Del would result in evaporating the
  304. // change order so the file or dir would be still left in the database.
  305. // By transitioning to the MovIn2 state we go to the Del state when we see
  306. // the Delete so we can remove the entry from the database. Similarly once
  307. // in the MovIn2 state if we see a MovOut then we go to the MovOut state
  308. // rather than evaporating the change order since we still have to update
  309. // the database with the MovOut.
  310. //
  311. // Note: think about a similar problem where the file filter string changes
  312. // and a file is touched so a create CO is generated. If the file is
  313. // then deleted the CO is evaporated. This means that a del CO will
  314. // not be propagated so the file is deleted everywhere. Do we need
  315. // a Cre2 CO analogous to the MovIn2 state?
  316. typedef
  317. ULONG
  318. (NTAPI *PJRNL_FILTER_ENUM_ROUTINE) (
  319. PGENERIC_HASH_TABLE Table,
  320. PVOID Buffer,
  321. PVOID Context
  322. );
  323. LONG
  324. JrnlGetFileCoLocationCmd(
  325. PVOLUME_MONITOR_ENTRY pVme,
  326. IN PUSN_RECORD UsnRecord,
  327. OUT PFILTER_TABLE_ENTRY *PrevParentFilterEntry,
  328. OUT PFILTER_TABLE_ENTRY *CurrParentFilterEntry
  329. );
  330. ULONG
  331. JrnlEnterFileChangeOrder(
  332. IN PUSN_RECORD UsnRecord,
  333. IN ULONG LocationCmd,
  334. IN PFILTER_TABLE_ENTRY OldParentFilterEntry,
  335. IN PFILTER_TABLE_ENTRY NewParentFilterEntry
  336. );
  337. PCHANGE_ORDER_ENTRY
  338. JrnlCreateCo(
  339. IN PREPLICA Replica,
  340. IN PULONGLONG Fid,
  341. IN PULONGLONG ParentFid,
  342. IN PUSN_RECORD UsnRecord,
  343. IN BOOL IsDirectory,
  344. IN PWCHAR FileName,
  345. IN USHORT Length
  346. );
  347. BOOL
  348. JrnlMergeCoTest(
  349. IN PVOLUME_MONITOR_ENTRY pVme,
  350. IN PUNICODE_STRING UFileName,
  351. IN PULONGLONG ParentFid,
  352. IN ULONG StreamLastMergeSeqNum
  353. );
  354. VOID
  355. JrnlUpdateNst(
  356. IN PVOLUME_MONITOR_ENTRY pVme,
  357. IN PUNICODE_STRING UFileName,
  358. IN PULONGLONG ParentFid,
  359. IN ULONG StreamSequenceNumber
  360. );
  361. VOID
  362. JrnlFilterUpdate(
  363. IN PREPLICA CurrentReplica,
  364. IN PUSN_RECORD UsnRecord,
  365. IN ULONG LocationCmd,
  366. IN PFILTER_TABLE_ENTRY OldParentFilterEntry,
  367. IN PFILTER_TABLE_ENTRY NewParentFilterEntry
  368. );
  369. ULONG
  370. JrnlProcessSubTree(
  371. IN PFILTER_TABLE_ENTRY RootFilterEntry,
  372. IN PCHANGE_ORDER_PARAMETERS Cop
  373. );
  374. ULONG
  375. JrnlProcessSubTreeEntry(
  376. PGENERIC_HASH_TABLE Table,
  377. PVOID Buffer,
  378. PVOID Context
  379. );
  380. ULONG
  381. JrnlUpdateChangeOrder(
  382. IN PCHANGE_ORDER_ENTRY ChangeOrder,
  383. IN PREPLICA NewReplica,
  384. IN ULONGLONG NewParentFid,
  385. IN ULONG NewLocationCmd,
  386. IN PUSN_RECORD UsnRecord
  387. );
  388. ULONG
  389. JrnlAddFilterEntryFromUsn(
  390. IN PREPLICA Replica,
  391. IN PUSN_RECORD UsnRecord,
  392. OUT PFILTER_TABLE_ENTRY *RetFilterEntry
  393. );
  394. ULONG
  395. JrnlAddFilterEntry(
  396. IN PREPLICA Replica,
  397. IN PFILTER_TABLE_ENTRY FilterEntry,
  398. OUT PFILTER_TABLE_ENTRY *RetFilterEntry,
  399. IN BOOL Replace
  400. );
  401. ULONG
  402. JrnlDeleteDirFilterEntry(
  403. IN PGENERIC_HASH_TABLE FilterTable,
  404. IN PULONGLONG DFileID,
  405. IN PFILTER_TABLE_ENTRY ArgFilterEntry
  406. );
  407. ULONG
  408. JrnlGetPathAndLevel(
  409. IN PGENERIC_HASH_TABLE FilterTable,
  410. IN PLONGLONG StartDirFileID,
  411. OUT PULONG Level
  412. );
  413. ULONG
  414. JrnlCommand(
  415. PCOMMAND_PACKET CmdPkt
  416. );
  417. ULONG
  418. JrnlPrepareService1(
  419. PREPLICA Replica
  420. );
  421. ULONG
  422. JrnlPrepareService2(
  423. IN PTHREAD_CTX ThreadCtx,
  424. IN PREPLICA Replica
  425. );
  426. ULONG
  427. JrnlInitOneReplicaSet(
  428. PCOMMAND_PACKET CmdPkt
  429. );
  430. ULONG
  431. JrnlCleanOutReplicaSet(
  432. PREPLICA Replica
  433. );
  434. JET_ERR
  435. JrnlInsertParentEntry(
  436. IN PTHREAD_CTX ThreadCtx,
  437. IN PTABLE_CTX TableCtx,
  438. IN PVOID Record,
  439. IN PVOID Context
  440. );
  441. ULONG_PTR
  442. JrnlFilterLinkChild (
  443. PGENERIC_HASH_TABLE Table,
  444. PVOID Buffer,
  445. PVOID Context
  446. );
  447. ULONG_PTR
  448. JrnlFilterLinkChildNoError (
  449. PGENERIC_HASH_TABLE Table,
  450. PVOID Buffer,
  451. PVOID Context
  452. );
  453. ULONG
  454. JrnlFilterUnlinkChild (
  455. PGENERIC_HASH_TABLE Table,
  456. PVOID Buffer,
  457. PVOID Context
  458. );
  459. ULONG_PTR
  460. JrnlFilterGetRoot (
  461. PGENERIC_HASH_TABLE Table,
  462. PVOID Buffer,
  463. PVOID Context
  464. );
  465. ULONG
  466. JrnlSubTreePrint (
  467. PGENERIC_HASH_TABLE Table,
  468. PVOID Buffer,
  469. PVOID Context
  470. );
  471. #if 0
  472. ULONG
  473. JrnlCheckStartFailures(
  474. PFRS_QUEUE Queue
  475. );
  476. #endif
  477. ULONG
  478. JrnlOpen(
  479. IN PREPLICA Replica,
  480. OUT PVOLUME_MONITOR_ENTRY *pVme,
  481. PCONFIG_TABLE_RECORD ConfigRecord
  482. );
  483. ULONG
  484. JrnlSubmitReadThreadRequest(
  485. IN PVOLUME_MONITOR_ENTRY pVme,
  486. IN ULONG Request,
  487. IN ULONG NewState
  488. );
  489. ULONG
  490. JrnlShutdownSingleReplica(
  491. IN PREPLICA Replica,
  492. IN BOOL HaveLock
  493. );
  494. ULONG
  495. JrnlCloseVme(
  496. IN PVOLUME_MONITOR_ENTRY pVme
  497. );
  498. ULONG
  499. JrnlCloseAll(
  500. VOID
  501. );
  502. ULONG
  503. JrnlClose(
  504. IN HANDLE VolumeHandle
  505. );
  506. DWORD
  507. WINAPI
  508. JournalReadThread(
  509. IN LPVOID Context
  510. );
  511. ULONG
  512. JrnlGetEndOfJournal(
  513. IN PVOLUME_MONITOR_ENTRY pVme,
  514. OUT USN *EndOfJournal
  515. );
  516. NTSTATUS
  517. FrsIssueJournalAsyncRead(
  518. IN PJBUFFER Jbuff,
  519. IN PVOLUME_MONITOR_ENTRY pVme
  520. );
  521. ULONG
  522. JrnlEnumerateFilterTreeBU(
  523. PGENERIC_HASH_TABLE Table,
  524. PFILTER_TABLE_ENTRY FilterEntry,
  525. PJRNL_FILTER_ENUM_ROUTINE Function,
  526. PVOID Context
  527. );
  528. ULONG
  529. JrnlEnumerateFilterTreeTD(
  530. PGENERIC_HASH_TABLE Table,
  531. PFILTER_TABLE_ENTRY FilterEntry,
  532. PJRNL_FILTER_ENUM_ROUTINE Function,
  533. PVOID Context
  534. );
  535. VOID
  536. JrnlHashEntryFree(
  537. PGENERIC_HASH_TABLE Table,
  538. PVOID Buffer
  539. );
  540. BOOL
  541. JrnlCompareFid(
  542. PVOID Buf1,
  543. PVOID Buf2,
  544. ULONG Length
  545. );
  546. ULONG
  547. JrnlHashCalcFid (
  548. PVOID Buf,
  549. ULONG Length
  550. );
  551. ULONG
  552. NoHashBuiltin (
  553. PVOID Buf,
  554. ULONG Length
  555. );
  556. BOOL
  557. JrnlCompareGuid(
  558. PVOID Buf1,
  559. PVOID Buf2,
  560. ULONG Length
  561. );
  562. ULONG
  563. JrnlHashCalcGuid (
  564. PVOID Buf,
  565. ULONG Length
  566. );
  567. ULONG
  568. JrnlHashCalcUsn (
  569. PVOID Buf,
  570. ULONG Length
  571. );
  572. VOID
  573. CalcHashFidAndName(
  574. IN PUNICODE_STRING Name,
  575. IN PULONGLONG Fid,
  576. OUT PULONGLONG HashValue
  577. );
  578. ULONG
  579. JrnlCleanWriteFilter(
  580. PCOMMAND_PACKET CmdPkt
  581. );
  582. ULONG
  583. JrnlCleanWriteFilterWorker (
  584. PQHASH_TABLE Table,
  585. PQHASH_ENTRY BeforeNode,
  586. PQHASH_ENTRY TargetNode,
  587. PVOID Context
  588. );
  589. VOID
  590. JrnlSubmitCleanWriteFilter(
  591. IN PVOLUME_MONITOR_ENTRY pVme,
  592. IN ULONG TimeOut
  593. );
  594. #define FRS_JOURNAL_FILTER_PRINT(_Sev_, _Table_, _Buffer_) \
  595. JrnlFilterPrint(_Sev_, _Table_, _Buffer_)
  596. #define FRS_JOURNAL_FILTER_PRINT_FUNCTION JrnlFilterPrintJacket
  597. VOID
  598. JrnlFilterPrint(
  599. ULONG PrintSev,
  600. PGENERIC_HASH_TABLE Table,
  601. PVOID Buffer
  602. );
  603. VOID
  604. JrnlFilterPrintJacket(
  605. PGENERIC_HASH_TABLE Table,
  606. PVOID Buffer
  607. );
  608. #define FRS_JOURNAL_CHANGE_ORDER_PRINT(_Table_, _Buffer_) \
  609. JrnlChangeOrderPrint( _Table_, _Buffer_)
  610. #define FRS_JOURNAL_CHANGE_ORDER_PRINT_FUNCTION JrnlChangeOrderPrint
  611. VOID
  612. JrnlChangeOrderPrint(
  613. PGENERIC_HASH_TABLE Table,
  614. PVOID Buffer
  615. );
  616. ULONG
  617. ChgOrdAcceptInitialize(
  618. VOID
  619. );
  620. VOID
  621. ChgOrdAcceptShutdown(
  622. VOID
  623. );
  624. DWORD
  625. JournalMonitorInit(
  626. VOID
  627. )
  628. /*++
  629. Routine Description:
  630. This routine initializes the NTFS Journal monitor routines and starts
  631. the JournalReadThread.
  632. Arguments:
  633. None.
  634. Thread Return Value:
  635. Win32 status
  636. --*/
  637. {
  638. #undef DEBSUB
  639. #define DEBSUB "JournalMonitorInit:"
  640. ULONG WStatus;
  641. ULONG ThreadId;
  642. JET_ERR jerr;
  643. ULONG i;
  644. if (JournalActiveIoRequests != 0) {
  645. DPRINT1(0, ":S: ERROR - Can't initialize journal with active I/O (%d) in progress.\n",
  646. JournalActiveIoRequests);
  647. return ERROR_REQUEST_ABORTED;
  648. }
  649. //
  650. // No completion port yet.
  651. //
  652. FRS_CLOSE(JournalCompletionPort);
  653. JournalCompletionPort = NULL;
  654. //
  655. // Read change order aging cache delay.
  656. //
  657. CfgRegReadDWord(FKC_CO_AGING_DELAY, NULL, 0, &ChangeOrderAgingDelay);
  658. ChangeOrderAgingDelay *= 1000;
  659. //
  660. // Init the list of volumes we monitor.
  661. //
  662. FrsInitializeQueue(&VolumeMonitorQueue, &VolumeMonitorQueue);
  663. FrsInitializeQueue(&VolumeMonitorStopQueue, &VolumeMonitorStopQueue);
  664. //
  665. // Free list for journal buffers.
  666. //
  667. FrsInitializeQueue(&JournalFreeQueue, &JournalFreeQueue);
  668. //
  669. // Locks for the Filter Table Child Lists.
  670. //
  671. for (i=0; i<NUMBER_FILTER_TABLE_CHILD_LOCKS; i++) {
  672. InitializeCriticalSection(&JrnlFilterTableChildLock[i]);
  673. }
  674. FrsInitializeQueue(&FrsVolumeLayerCOList, &FrsVolumeLayerCOList);
  675. FrsInitializeQueue(&FrsVolumeLayerCOQueue, &FrsVolumeLayerCOList);
  676. //
  677. // Wait for the DB to start up. During shutdown, this event is
  678. // set. Any extraneous commands issued by the journal are
  679. // subsequently ignored by the database.
  680. //
  681. WaitForSingleObject(DataBaseEvent, INFINITE);
  682. if (FrsIsShuttingDown) {
  683. return ERROR_PROCESS_ABORTED;
  684. }
  685. //
  686. // Create a journal read thread. It will wait until an entry is placed
  687. // on the VolumeMonitorQueue.
  688. //
  689. if (!HANDLE_IS_VALID(JournalReadThreadHandle)) {
  690. JournalReadThreadHandle = CreateThread(NULL,
  691. 0,
  692. JournalReadThread,
  693. (LPVOID) NULL,
  694. 0,
  695. &ThreadId);
  696. if (!HANDLE_IS_VALID(JournalReadThreadHandle)) {
  697. WStatus = GetLastError();
  698. DPRINT_WS(0, "Error from CreateThread", WStatus);
  699. return WStatus;
  700. }
  701. DbgCaptureThreadInfo2(L"JrnlRead", JournalReadThread, ThreadId);
  702. }
  703. return ERROR_SUCCESS;
  704. }
  705. VOID
  706. JournalMonitorShutdown(
  707. VOID
  708. )
  709. /*++
  710. Routine Description:
  711. This routine releases handles and frees storage for the NTFS Journal
  712. subsystem.
  713. Arguments:
  714. None.
  715. Thread Return Value:
  716. Win32 status
  717. --*/
  718. {
  719. #undef DEBSUB
  720. #define DEBSUB "JournalMonitorShutdown:"
  721. ULONG WStatus;
  722. JET_ERR jerr;
  723. ULONG i;
  724. DPRINT1(3, ":S: <<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
  725. //
  726. // Stop the Change Order Accept thread.
  727. //
  728. ChgOrdAcceptShutdown();
  729. //
  730. // Locks for the Filter Table Child Lists.
  731. //
  732. for (i=0; i<NUMBER_FILTER_TABLE_CHILD_LOCKS; i++) {
  733. DeleteCriticalSection(&JrnlFilterTableChildLock[i]);
  734. }
  735. }
  736. ULONG
  737. JrnlInitOneReplicaSet(
  738. PCOMMAND_PACKET CmdPkt
  739. )
  740. /*++
  741. Routine Description:
  742. This routine does all the journal and database initialization for a
  743. single replica set. It is used to startup a replica set that failed
  744. to start at service startup or to start a newly created replica set.
  745. Note the Journal and database subsystems must be initialized first.
  746. The Replica arg must have an initialized config record.
  747. Warning - There are no table level locks on the Filter table so only
  748. one replica set can be initialized at a time on a single volume.
  749. Actually this might work since the row locks and child link locks should
  750. be sufficient but it hasn't been tested.
  751. The second part of the initialization is done by the database server so
  752. the journal thread is free to finish processing any pending journal
  753. buffers for this volume since we have to pause it before we can update
  754. the filter table.
  755. Arguments:
  756. CmdPkt - ptr to a cmd packet with a ptr to a replica struct with a
  757. pre-initialized config record.
  758. Thread Return Value:
  759. Frs Error Status
  760. --*/
  761. {
  762. #undef DEBSUB
  763. #define DEBSUB "JrnlInitOneReplicaSet:"
  764. ULONG FStatus;
  765. ULONG WStatus;
  766. PCONFIG_TABLE_RECORD ConfigRecord;
  767. PREPLICA_THREAD_CTX RtCtx;
  768. PREPLICA Replica;
  769. //
  770. // Check that the journal subsystem is up.
  771. //
  772. if (!HANDLE_IS_VALID(JournalReadThreadHandle)) {
  773. return FrsErrorNotInitialized;
  774. }
  775. Replica = CmdPkt->Parameters.JournalRequest.Replica;
  776. //
  777. // Phase 1 of journal monitor init. This opens the USN journal on the volume
  778. // containing the replica set. It allocates the:
  779. // - volume filter hash table,
  780. // - parent file ID table,
  781. // - USN record file name dependency hash table,
  782. // - USN Write Filter Table,
  783. // - Active Child dependency hash table,
  784. // - volume change order list,
  785. // - volume Change Order Aging table hash table and the
  786. // - Active Inbound Change Order hash table.
  787. //
  788. // If the journal is already open then it returns the pVme for the volume
  789. // in the Replica struct.
  790. //
  791. DPRINT3(4, ":S: Phase 1 for replica %ws, id: %d, (%08x)\n",
  792. Replica->ReplicaName->Name, Replica->ReplicaNumber, Replica);
  793. //
  794. // Assume its going to work out ok and go do it.
  795. //
  796. Replica->FStatus = FrsErrorSuccess;
  797. WStatus = JrnlPrepareService1(Replica);
  798. if (!WIN_SUCCESS(WStatus) || (Replica->pVme == NULL)) {
  799. DPRINT1_WS(4, "++ Phase 1 for replica %ws Failed;",
  800. Replica->ReplicaName->Name, WStatus);
  801. //
  802. // add cleanup code, delete vme ...
  803. //
  804. if (FRS_SUCCESS(Replica->FStatus)) {
  805. //
  806. // Return generic error if no specific error code was provided.
  807. //
  808. Replica->FStatus = FrsErrorReplicaPhase1Failed;
  809. }
  810. return Replica->FStatus;
  811. }
  812. ConfigRecord = (PCONFIG_TABLE_RECORD) (Replica->ConfigTable.pDataRecord);
  813. //
  814. // ** WARN ** at this point there is only one Replica Thread
  815. // context associated with the replica.
  816. //
  817. RtCtx = CONTAINING_RECORD(GetListHead(&Replica->ReplicaCtxListHead.ListHead),
  818. REPLICA_THREAD_CTX,
  819. ReplicaCtxList);
  820. DPRINT3(4, "++ Submit replica tree load cmd for replica %ws, id: %d, (%08x)\n",
  821. Replica->ReplicaName->Name, Replica->ReplicaNumber, Replica);
  822. DPRINT3(4, "++ ConfigRecord: %08x, RtCtx: %08x, path: %ws\n",
  823. ConfigRecord, RtCtx, ConfigRecord->FSRootPath);
  824. //
  825. // Propagate the command packet on to the DBService to init the
  826. // replica tables and complete the rest of the initialization.
  827. //
  828. DbsPrepareCmdPkt(CmdPkt, // CmdPkt,
  829. Replica, // Replica,
  830. CMD_LOAD_ONE_REPLICA_FILE_TREE, // CmdRequest,
  831. NULL, // TableCtx,
  832. RtCtx, // CallContext,
  833. 0, // TableType,
  834. 0, // AccessRequest,
  835. 0, // IndexType,
  836. NULL, // KeyValue,
  837. 0, // KeyValueLength,
  838. TRUE); // Submit
  839. //
  840. // Phase 1 is done.
  841. //
  842. return FrsErrorSuccess;
  843. }
  844. ULONG_PTR
  845. JrnlFilterDeleteEntry (
  846. PGENERIC_HASH_TABLE Table,
  847. PVOID Buffer,
  848. PVOID Context
  849. )
  850. /*++
  851. Routine Description:
  852. This function is called thru GhtCleanTableByFilter() to delete all the
  853. Filter table entries for a given Replica Set specified by the
  854. Context parameter.
  855. Arguments:
  856. Table - the hash table being enumerated (to lookup parent entry).
  857. Buffer - a ptr to a FILTER_TABLE_ENTRY
  858. Context - A pointer to the Replica struct for the replica data added to the
  859. table.
  860. Return Value:
  861. True if the entry matches the Replica Context and is to be deleted.
  862. --*/
  863. {
  864. #undef DEBSUB
  865. #define DEBSUB "JrnlFilterDeleteEntry:"
  866. PREPLICA Replica = (PREPLICA) Context;
  867. PFILTER_TABLE_ENTRY FilterEntry = Buffer;
  868. return (FilterEntry->Replica == Replica);
  869. }
  870. ULONG
  871. JrnlCleanOutReplicaSet(
  872. PREPLICA Replica
  873. )
  874. /*++
  875. Routine Description:
  876. This routine cleans out the filter table and parent file ID table entries
  877. associated with the given replica set.
  878. *NOTE* We assume the caller has paused the journal and there is no
  879. activity on either the volume FilterTable or the ParentFidTable.
  880. Warning - There are no table level locks on the Filter table so only
  881. one replica set can be cleaned up t a time on a single volume.
  882. Arguments:
  883. Replica - ptr to replica struct.
  884. Thread Return Value:
  885. Frs Error Status
  886. --*/
  887. {
  888. #undef DEBSUB
  889. #define DEBSUB "JrnlCleanOutReplicaSet:"
  890. PVOLUME_MONITOR_ENTRY pVme = Replica->pVme;
  891. ULONG Cnt;
  892. //
  893. // Check that the journal subsystem is up.
  894. //
  895. if (!HANDLE_IS_VALID(JournalReadThreadHandle)) {
  896. return FrsErrorNotInitialized;
  897. }
  898. //
  899. // Scan the table and delete all the filter entries for this replica set.
  900. //
  901. Cnt = GhtCleanTableByFilter(pVme->FilterTable, JrnlFilterDeleteEntry, Replica);
  902. DPRINT1(4, "Total of %d Filter Table entries deleted.\n", Cnt);
  903. //
  904. // Ditto for the parent file ID table.
  905. //
  906. QHashDeleteByFlags(pVme->ParentFidTable, Replica->ReplicaNumber);
  907. //
  908. // Note: we could also do this for the name space table by moving the
  909. // sequence number into the quadword and putting the replica number
  910. // in flags
  911. return FrsErrorSuccess;
  912. }
  913. DWORD
  914. WINAPI
  915. Monitor(
  916. PFRS_THREAD ThisFrsThreadCtx
  917. )
  918. /*++
  919. Routine Description:
  920. This is the main journal work thread. It processes command packets
  921. and journal buffer packets off its processing queue.
  922. It filters each entry in the USN journal against a filter table for
  923. the volume to determine if the file in question is part of a replica
  924. set. It then builds a change order entry to feed the data base and
  925. the output logs.
  926. Note: Perf: If multiple volumes are being monitored, we could create
  927. additional monitor threads and divide the volumes up among the
  928. threads. The processing of USN records for a given volume is
  929. single threaded though because they must be processed in order.
  930. Arguments:
  931. ThisFrsThreadCtx - A pointer to the FRS_THREAD ctx for this thread.
  932. Thread Return Value:
  933. ERROR_SUCCESS - Thread terminated normally.
  934. Other errors from CreatFile, ReadDirectoryChangesW, CreateEvent, ...
  935. are returned as the thread exit status.
  936. --*/
  937. {
  938. #undef DEBSUB
  939. #define DEBSUB "monitor:"
  940. USN CurrentUsn;
  941. USN NextUsn;
  942. USN JournalConsumed;
  943. ULONGLONG CaptureParentFileID;
  944. PWCHAR Pwc;
  945. DWORD Level;
  946. ULONG RelativePathLength;
  947. ULONG FileAttributes;
  948. LONG DataLength;
  949. PUSN_RECORD UsnRecord;
  950. PULONGLONG UsnBuffer;
  951. BOOL SaveFlag;
  952. PLIST_ENTRY Entry;
  953. PJBUFFER Jbuff;
  954. NTSTATUS Status;
  955. ULONG WStatus = ERROR_SUCCESS;
  956. ULONG GStatus;
  957. ULONG FStatus;
  958. PVOLUME_MONITOR_ENTRY pVme;
  959. PFRS_NODE_HEADER Header;
  960. PCONFIG_TABLE_RECORD ConfigRecord;
  961. PCOMMAND_PACKET CmdPkt;
  962. PREPLICA Replica;
  963. BOOL Excluded;
  964. UNICODE_STRING TempUStr;
  965. BOOL IsDirectory;
  966. ULONG UsnReason;
  967. ULONG Flags;
  968. LONG LocationCmd;
  969. PFILTER_TABLE_ENTRY PrevParentFilterEntry;
  970. PFILTER_TABLE_ENTRY CurrParentFilterEntry;
  971. PCXTION Cxtion;
  972. WCHAR FileName[MAX_PATH + 1];
  973. PrevParentFilterEntry = NULL;
  974. CurrParentFilterEntry = NULL;
  975. /******************************************************************************
  976. *******************************************************************************
  977. ** **
  978. ** **
  979. ** M A I N U S N J O U R N A L P R O C E S S L O O P **
  980. ** **
  981. ** **
  982. *******************************************************************************
  983. ******************************************************************************/
  984. DPRINT(5, ":S: Journal is starting.\n");
  985. //
  986. // Try-Finally
  987. //
  988. try {
  989. //
  990. // Capture exception.
  991. //
  992. try {
  993. while (TRUE) {
  994. //
  995. // Wait on the JournalProcessQueue for a journal buffer.
  996. //
  997. Entry = FrsRtlRemoveHeadQueueTimeout(&JournalProcessQueue, 10*1000);
  998. if (Entry == NULL) {
  999. WStatus = GetLastError();
  1000. if (WStatus == WAIT_TIMEOUT) {
  1001. //
  1002. // Go look for more work.
  1003. //
  1004. continue;
  1005. }
  1006. if (WStatus == ERROR_INVALID_HANDLE) {
  1007. DPRINT(4, ":S: JournalProcessQueue is shutdown.\n");
  1008. //
  1009. // The queue has been run down. Close all the journal handles
  1010. // saving the USN to start the next read from. Then close
  1011. // Jet Session and exit.
  1012. //
  1013. WStatus = ERROR_SUCCESS;
  1014. JrnlCloseAll();
  1015. break;
  1016. }
  1017. //
  1018. // Unexpected error from FrsRtlRemoveHeadQueueTimeout
  1019. //
  1020. DPRINT_WS(0, "Error from FrsRtlRemoveHeadQueueTimeout", WStatus);
  1021. JrnlCloseAll();
  1022. break;
  1023. }
  1024. Header = (PFRS_NODE_HEADER) CONTAINING_RECORD(Entry, COMMAND_PACKET, ListEntry);
  1025. if (Header->Type == COMMAND_PACKET_TYPE) {
  1026. //
  1027. // Process the command packet.
  1028. //
  1029. WStatus = JrnlCommand((PCOMMAND_PACKET)Header);
  1030. continue;
  1031. }
  1032. if (Header->Type != JBUFFER_TYPE) {
  1033. //
  1034. // Garbage packet.
  1035. //
  1036. DPRINT2(0, "ERROR - Invalid packet type: %d, size: %d\n",
  1037. Header->Type, Header->Size);
  1038. FRS_ASSERT(!"Jrnl monitor: Invalid packet type");
  1039. }
  1040. ///////////////////////////////////////////////////////////////////
  1041. // //
  1042. // P R O C E S S J O U R N A L D A T A B U F F E R //
  1043. // //
  1044. ///////////////////////////////////////////////////////////////////
  1045. //
  1046. // Increment the Usn Reads Counter
  1047. //
  1048. PM_INC_CTR_SERVICE(PMTotalInst, UsnReads, 1);
  1049. Jbuff = CONTAINING_RECORD(Entry, JBUFFER, ListEntry);
  1050. //DPRINT2(5, "jb: fu %08x (len: %d)\n",
  1051. // Jbuff, Jbuff->DataLength);
  1052. pVme = Jbuff->pVme;
  1053. WStatus = Jbuff->WStatus;
  1054. UsnBuffer = Jbuff->DataBuffer;
  1055. DataLength = Jbuff->DataLength;
  1056. DPRINT1(4, ":U: ***** USN Data for Volume %ws *****\n", pVme->FSVolInfo.VolumeLabel);
  1057. //
  1058. // Pull out the Next USN
  1059. //
  1060. NextUsn = 0;
  1061. if (DataLength != 0) {
  1062. UsnRecord = (PUSN_RECORD)((PCHAR)UsnBuffer + sizeof(USN));
  1063. DataLength -= sizeof(USN);
  1064. NextUsn = *(USN *)UsnBuffer;
  1065. DPRINT1(4, "Next Usn will be: %08lx %08lx\n", PRINTQUAD(NextUsn));
  1066. }
  1067. //
  1068. // Check if I/O is stopped on this journal and throw the buffer away.
  1069. // Could be a pause request.
  1070. //
  1071. if (!pVme->IoActive) {
  1072. CAPTURE_JOURNAL_PROGRESS(pVme, Jbuff->JrnlReadPoint);
  1073. DPRINT1(4, "++ I/O not active on this journal. Freeing buffer. State is: %s\n",
  1074. RSS_NAME(pVme->JournalState));
  1075. //DPRINT1(5, "jb: tf %08x\n", Jbuff);
  1076. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  1077. continue;
  1078. }
  1079. //
  1080. // Check for lost journal data. This is unlikely to happen here since
  1081. // this error will surface when we submit the journal read request.
  1082. // There is other error recovery code that is invoked when we try to start
  1083. // a replica set and the journal restart point is not found.
  1084. //
  1085. if (WStatus == ERROR_NOT_FOUND) {
  1086. DPRINT1(4, ":U: Usn %08lx %08lx has been deleted. Data lost, resync required\n",
  1087. PRINTQUAD(Jbuff->JrnlReadPoint));
  1088. //DPRINT1(5, "jb: tf %08x\n", Jbuff);
  1089. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  1090. //
  1091. // Post an error log entry.
  1092. //
  1093. EPRINT1(EVENT_FRS_IN_ERROR_STATE, JetPath);
  1094. }
  1095. //
  1096. // Some other error.
  1097. //
  1098. if (!WIN_SUCCESS(WStatus)) {
  1099. DPRINT_WS(0, "ERROR - Read Usn Journal failed", WStatus);
  1100. //
  1101. // Put the VME on the stop queue and mark all Replica Sets
  1102. // using this VME as stopped.
  1103. //
  1104. // Add code to walk the replica list to stop replication on a journal error.
  1105. // Is closing the journal the right way to fail?
  1106. //
  1107. JrnlClose(Jbuff->FileHandle);
  1108. CAPTURE_JOURNAL_PROGRESS(pVme, Jbuff->JrnlReadPoint);
  1109. //DPRINT1(5, "jb: tf %08x\n", Jbuff);
  1110. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  1111. continue;
  1112. }
  1113. //
  1114. // Check for data left after USN.
  1115. //
  1116. if (DataLength > 0) {
  1117. //
  1118. // Check version number for mismatch.
  1119. //
  1120. if (UsnRecord->MajorVersion != ConfigUsnMajorVersion) {
  1121. DPRINT2(0, ":U: ERROR - Major version mismatch for USN Journal. Found: %d, Expected: %d\n",
  1122. UsnRecord->MajorVersion, ConfigUsnMajorVersion);
  1123. WStatus = ERROR_REVISION_MISMATCH;
  1124. //
  1125. // Put the VME on the stop queue and mark all Replica Sets
  1126. // using this VME as stopped.
  1127. //
  1128. // Note: Add code to walk the replica list & stop VME on config mismatch.
  1129. // is closing the journal the right way to fail?
  1130. //
  1131. JrnlClose(Jbuff->FileHandle);
  1132. CAPTURE_JOURNAL_PROGRESS(pVme, Jbuff->JrnlReadPoint);
  1133. //DPRINT1(5, "jb: tf %08x\n", Jbuff);
  1134. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  1135. continue;
  1136. }
  1137. }
  1138. //
  1139. // The USN save point for each replica can also depend on the amount of
  1140. // journal data consumed. If there is lots of activity on the journal
  1141. // but little or no activity on a given replica set hosted by the volume
  1142. // then we must keep advancing the USN save point for the replica.
  1143. // Otherwise, if we were to crash we could find ourselves with a USN
  1144. // save point at recovery for data no longer in the journal that we
  1145. // don't want anyway. In addition, if it was still in the journal we
  1146. // would have to plow through it a second time just to find nothing of
  1147. // interest. Once JRNL_USN_SAVE_POINT_INTERVAL bytes of journal data
  1148. // are consumed then trigger a USN save on all active replica sets on
  1149. // this volume. A journal replay could make this go negative so
  1150. // minimize with 0.
  1151. //
  1152. SaveFlag = FALSE;
  1153. LOCK_VME(pVme); // Get the lock to avoid QW Tearing with
  1154. // LastUsnSavePoint update in NEW_VSN() code.
  1155. JournalConsumed = NextUsn - pVme->LastUsnSavePoint;
  1156. if (JournalConsumed < 0) {JournalConsumed = (USN)0;}
  1157. if (JournalConsumed >= (USN) JRNL_USN_SAVE_POINT_INTERVAL) {
  1158. SaveFlag = TRUE;
  1159. DPRINT3(5, "++ USN Save Triggered: NextUsn: %08x %08x "
  1160. "LastSave: %08x %08x "
  1161. "Consumed: %08x %08x\n",
  1162. PRINTQUAD(NextUsn),
  1163. PRINTQUAD(pVme->LastUsnSavePoint),
  1164. PRINTQUAD(JournalConsumed));
  1165. pVme->LastUsnSavePoint = NextUsn;
  1166. }
  1167. UNLOCK_VME(pVme);
  1168. if (SaveFlag) {
  1169. DbsRequestSaveMark(pVme, FALSE);
  1170. }
  1171. ///////////////////////////////////////////////////////////////////
  1172. // //
  1173. // P R O C E S S U S N R E C O R D S //
  1174. // //
  1175. ///////////////////////////////////////////////////////////////////
  1176. //
  1177. // Walk through the buffer and process the results. Note that a single
  1178. // file can appear multiple times. E.G. a copy operation to a file may
  1179. // create the target update the create time and set the attributes.
  1180. // Each one of these is reported as a separate event.
  1181. //
  1182. RESET_JOURNAL_PROGRESS(pVme);
  1183. while (DataLength > 0) {
  1184. Replica = NULL;
  1185. if ((LONG)UsnRecord->RecordLength > DataLength) {
  1186. DPRINT2(0, ":U: ERROR: Bogus DataLength: %d, Record Length Was: %d\n",
  1187. DataLength, UsnRecord->RecordLength );
  1188. break;
  1189. }
  1190. //
  1191. // Track USN of current record being processed and the maximum
  1192. // point of progress reached in the journal.
  1193. //
  1194. CurrentUsn = UsnRecord->Usn;
  1195. pVme->CurrentUsnRecord = CurrentUsn;
  1196. CAPTURE_MAX_JOURNAL_PROGRESS(pVme, CurrentUsn);
  1197. //
  1198. // Check if I/O is stopped on this journal and skip the rest of the
  1199. // buffer. Could be a pause request. Capture current journal
  1200. // progress for an unpause.
  1201. //
  1202. if (!pVme->IoActive) {
  1203. CAPTURE_JOURNAL_PROGRESS(pVme, CurrentUsn);
  1204. DPRINT1(4, ":U: I/O not active on this journal. Freeing buffer. State is: %s\n",
  1205. RSS_NAME(pVme->JournalState));
  1206. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1207. break;
  1208. }
  1209. //
  1210. // Increment the UsnRecordsExamined counter
  1211. //
  1212. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecExamined, 1);
  1213. UsnReason = UsnRecord->Reason;
  1214. FileAttributes = UsnRecord->FileAttributes;
  1215. //
  1216. // Ignore temporary, encrypted files. We do replicate offline
  1217. // files (FILE_ATTRIBUTE_OFFLINE set) because some members
  1218. // may be running HSM and some may not. All members have to
  1219. // have the same data.
  1220. //
  1221. if (FileAttributes & (FILE_ATTRIBUTE_ENCRYPTED)) {
  1222. DUMP_USN_RECORD(3, UsnRecord);
  1223. DPRINT(3, "++ Encrypted; skipping\n");
  1224. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1225. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
  1226. goto NEXT_USN_RECORD;
  1227. }
  1228. //
  1229. // Skip USN records with the SOURCE_DATA_MANAGEMENT flag set.
  1230. // E.G. HSM and SIS would set this flag to prevent triggering
  1231. // replication when the data has not changed.
  1232. //
  1233. if (UsnRecord->SourceInfo & USN_SOURCE_DATA_MANAGEMENT) {
  1234. DUMP_USN_RECORD(3, UsnRecord);
  1235. DPRINT(3, "++ DATA_MANAGEMENT source; skipping\n");
  1236. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1237. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
  1238. goto NEXT_USN_RECORD;
  1239. }
  1240. //
  1241. // FRS uses the NTFS journal filtering feature in which an app can
  1242. // tell NTFS what kinds of journal records it does not want to see.
  1243. // In particular FRS asks NTFS to filter out all journal records
  1244. // except for journal "Close" and "Create" records. NTFS
  1245. // writes a close record to the journal after the last handle to
  1246. // the file is closed. In addition, if the system crashes, at
  1247. // startup NTFS recovery-processing inserts close records for all
  1248. // open and modified files.
  1249. // The Create records need to be examined for directory creates
  1250. // because the close record may not appear for a while. Meanwhile
  1251. // multiple children close records can be processed which would
  1252. // be skipped unless the parent dir create was added to the Filter
  1253. // table. Bug 432549 was a case of this.
  1254. //
  1255. if (!BooleanFlagOn(UsnReason, USN_REASON_CLOSE)) {
  1256. if (BooleanFlagOn(UsnReason, USN_REASON_FILE_CREATE) &&
  1257. BooleanFlagOn(FileAttributes, FILE_ATTRIBUTE_DIRECTORY)) {
  1258. DUMP_USN_RECORD(3, UsnRecord);
  1259. DPRINT(3, "++ Dir Create; Cannot skip\n");
  1260. } else {
  1261. DUMP_USN_RECORD(3, UsnRecord);
  1262. DPRINT(3, "++ Not a close and not dir create; skipping\n");
  1263. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1264. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
  1265. goto NEXT_USN_RECORD;
  1266. }
  1267. }
  1268. //
  1269. // Skip files that have USN_REASON_REPARSE_POINT_CHANGE set.
  1270. // Since symbolic links are unsupported we do not replicate them.
  1271. // HSM and SIS also use reparse points but we only replicate changes
  1272. // to the file and these services change the NTFS File Record to set
  1273. // the reparse point attribute only when they migrate the file data
  1274. // somewhere else. By that time the file had already been created
  1275. // and was replicated when it was created. See NTIOAPI.H for more
  1276. // info about the REPARSE_DATA_BUFFER and the IO_REPARSE_TAG field.
  1277. //
  1278. #if 0
  1279. // This below is faulty because the SIS COPY FILE utility will both set and create
  1280. // files with a reparse point. We will have to rely on the data management test
  1281. // above to filter out the conversion of a file to and from a SIS link.
  1282. if (UsnReason & USN_REASON_REPARSE_POINT_CHANGE) {
  1283. DUMP_USN_RECORD(3, UsnRecord);
  1284. DPRINT(3, "++ Reparse point change; skipping\n");
  1285. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1286. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
  1287. goto NEXT_USN_RECORD;
  1288. }
  1289. #endif
  1290. //
  1291. // If this file record has the reparse attribute set then read
  1292. // the Reparse Tag from the file to see if this is either SIS or HSM.
  1293. //
  1294. if (FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) {
  1295. //
  1296. // Can't filter out Deletes though
  1297. //
  1298. if (!BooleanFlagOn(UsnReason, USN_REASON_FILE_DELETE)) {
  1299. WStatus = FrsCheckReparse(L"--",
  1300. (PULONG)&UsnRecord->FileReferenceNumber,
  1301. FILE_ID_LENGTH,
  1302. pVme->VolumeHandle);
  1303. if (!WIN_SUCCESS(WStatus)) {
  1304. DUMP_USN_RECORD(3, UsnRecord);
  1305. DPRINT_WS(3, "++ FrsGetReparseTag failed, skipping,", WStatus);
  1306. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1307. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
  1308. goto NEXT_USN_RECORD;
  1309. }
  1310. }
  1311. }
  1312. ///////////////////////////////////////////////////////////////////
  1313. // //
  1314. // F I L T E R P R O C E S S I N G //
  1315. // //
  1316. ///////////////////////////////////////////////////////////////////
  1317. //
  1318. // Note: If replication is paused for the replica tree we still
  1319. // process the journal entries so we don't lose data.
  1320. // When replication is later unpaused the update process picks
  1321. // up the change orders from the Replica Set Change order table.
  1322. //
  1323. // If replication was not started for a given replica tree then
  1324. // the directory fids won't be in the table. When replication
  1325. // is stopped for a replica tree its directory fids are purged
  1326. // from the table
  1327. //
  1328. // In the case of file or Dir renames the parent FID in the
  1329. // USN record is the FID of the destination of the rename.
  1330. // If the file/dir was in a replica set prior to the rename its
  1331. // parent file ID will be in the Parent File ID table for the
  1332. // volume.
  1333. //
  1334. // Determine if the file is in a replica set and if a location
  1335. // change is involved. Lookup the previous and current parent FID
  1336. // in the Journal Filter table and return references to their
  1337. // respective filter entries. From this point forward the flow
  1338. // must go thru SKIP_USN_RECORD so the ref counts on PrevParentFilterEntry
  1339. // and CurrParentFilterEntry are decremented appropriately.
  1340. //
  1341. LocationCmd = JrnlGetFileCoLocationCmd(pVme,
  1342. UsnRecord,
  1343. &PrevParentFilterEntry,
  1344. &CurrParentFilterEntry);
  1345. if (LocationCmd == FILE_NOT_IN_REPLICA_SET) {
  1346. goto SKIP_USN_RECORD;
  1347. }
  1348. //
  1349. // Nothing to do; skip the usn record
  1350. //
  1351. if (LocationCmd == CO_LOCATION_NO_CMD &&
  1352. ((UsnRecord->Reason & CO_CONTENT_MASK) == 0)) {
  1353. DUMP_USN_RECORD(5, UsnRecord);
  1354. DPRINT(5, "++ CO_LOCATION_NO_CMD and no content; skipping\n");
  1355. goto SKIP_USN_RECORD;
  1356. }
  1357. //
  1358. // Filter out creates of files with FILE_ATTRIBUTE_TEMPORARY set.
  1359. //
  1360. if (!(FileAttributes & FILE_ATTRIBUTE_DIRECTORY) &&
  1361. (FileAttributes & FILE_ATTRIBUTE_TEMPORARY) &&
  1362. CO_NEW_FILE(LocationCmd)) {
  1363. DUMP_USN_RECORD(5, UsnRecord);
  1364. DPRINT(5, "++ Temporary attribute set on file; skipping\n");
  1365. goto SKIP_USN_RECORD;
  1366. }
  1367. //
  1368. // Determine the Replica and get the Parent File ID.
  1369. //
  1370. if (CurrParentFilterEntry != NULL) {
  1371. CaptureParentFileID = CurrParentFilterEntry->DFileID;
  1372. Replica = CurrParentFilterEntry->Replica;
  1373. } else {
  1374. CaptureParentFileID = PrevParentFilterEntry->DFileID;
  1375. Replica = PrevParentFilterEntry->Replica;
  1376. }
  1377. FRS_ASSERT(Replica != NULL);
  1378. //
  1379. // Under certain conditions a USN record could refer to a file
  1380. // in the FRS PreInstall directory. In particular this can happen
  1381. // during restart when we have lost our journal write filter.
  1382. // No operation on a pre-install file should cause replication.
  1383. // Make special check here for parent FID match.
  1384. //
  1385. if (UsnRecord->ParentFileReferenceNumber == Replica->PreInstallFid) {
  1386. DUMP_USN_RECORD(5, UsnRecord);
  1387. DPRINT(5, "++ USN Record on PreInstall file; skipping\n");
  1388. goto SKIP_USN_RECORD;
  1389. }
  1390. DUMP_USN_RECORD2(3, UsnRecord, Replica->ReplicaNumber, LocationCmd);
  1391. DPRINT2(4, "++ IN REPLICA %d, %ws \n",
  1392. Replica->ReplicaNumber, Replica->ReplicaName->Name);
  1393. //
  1394. // Check for stale USN record. This occurs when a replica tree
  1395. // is reloaded from disk. In this case you can have stale USN records
  1396. // in the journal that predate the current state of the file when it
  1397. // was loaded. To handle this we capture the current USN when the
  1398. // replica tree load starts (Ub), and again when the load finishes
  1399. // (Ue). We save Ub and Ue with the replica config info. The USN
  1400. // of a record (Ur) affecting this replica tree is then compared
  1401. // with these bounds as follows: (Uf is current USN on the file).
  1402. // if Ur < Ub then skip record since the load has the current state.
  1403. // if Ur > Ue then process record since load has old state.
  1404. // if Ur > Uf then process record since load has old state.
  1405. // otherwise skip the record.
  1406. // Only in the last case is it necessary to open the file and read
  1407. // the USN (when Ub <= Ur <= Ue).
  1408. //
  1409. // Note: add code to filter stale USN records after a replica tree load.
  1410. // This is not a problem if the replica tree starts out empty.
  1411. //
  1412. // If the record USN is less than or equal to LastUsnRecordProcessed for
  1413. // this Replica then we must be doing a replay so ignore it.
  1414. // This works because a given file can only be in one Replica
  1415. // set at a time.
  1416. // NOTE: what about MOVERS?
  1417. //
  1418. // NOTE: Hardlinks across replica sets would violate this.
  1419. //
  1420. if (CurrentUsn <= Replica->LastUsnRecordProcessed) {
  1421. DPRINT(5, "++ USN <= LastUsnRecordProcessed. Record skipped.\n");
  1422. goto SKIP_USN_RECORD;
  1423. }
  1424. //
  1425. // If this replica set is paused or has encountered an error
  1426. // then skip the record. When it is restarted we will replay
  1427. // the journal for it.
  1428. //
  1429. if (Replica->ServiceState != REPLICA_STATE_ACTIVE) {
  1430. DPRINT1(5, "++ Replica->ServiceState not active (%s). Record skipped.\n",
  1431. RSS_NAME(Replica->ServiceState));
  1432. goto SKIP_USN_RECORD;
  1433. }
  1434. //
  1435. // Get the ptr to the config record for this replica.
  1436. //
  1437. ConfigRecord = Replica->ConfigTable.pDataRecord;
  1438. //
  1439. // The following call builds the path of the file as we currently
  1440. // know it. If the operation is a MOVEOUT this is the previous path.
  1441. // Since the USN data is historical the file/dir may not be at this
  1442. // location any longer.
  1443. //
  1444. FStatus = JrnlGetPathAndLevel(pVme->FilterTable,
  1445. &CaptureParentFileID,
  1446. &Level);
  1447. if (!FRS_SUCCESS(FStatus)) {
  1448. goto SKIP_USN_RECORD;
  1449. }
  1450. //
  1451. // Consistency checking.
  1452. //
  1453. if (UsnRecord->FileNameLength > (sizeof(FileName) - sizeof(WCHAR))) {
  1454. DPRINT1(0, ":U: ERROR - USN Record Inconsistency - File path length too long (%d bytes)\n",
  1455. UsnRecord->FileNameLength);
  1456. DPRINT3(0, ":U: ERROR - Start of data buf %08x, current ptr %08x, diff %d\n",
  1457. Jbuff->DataBuffer, UsnRecord,
  1458. (PCHAR) UsnRecord - (PCHAR) Jbuff->DataBuffer);
  1459. DPRINT1(0, ":U: ERROR - DataLength: %d\n", Jbuff->DataLength);
  1460. DPRINT(0, ":U: ERROR - Aborting rest of buffer.\n");
  1461. //
  1462. // Drop Refs and force buffer loop to exit.
  1463. //
  1464. FRS_ASSERT(!"Jrnl monitor: USN Record Inconsistency");
  1465. UsnRecord->RecordLength = (ULONG) DataLength;
  1466. goto SKIP_USN_RECORD;
  1467. }
  1468. RtlMoveMemory (FileName, UsnRecord->FileName, UsnRecord->FileNameLength);
  1469. FileName[UsnRecord->FileNameLength/sizeof(WCHAR)] = UNICODE_NULL;
  1470. DPRINT4(4, "++ NameLen %d Relative Level %d Name: %ws\\...\\%ws\n",
  1471. UsnRecord->FileNameLength, Level, Replica->Root, FileName);
  1472. //
  1473. // Determine if this USN entry is a directory or a file.
  1474. //
  1475. IsDirectory = (FileAttributes & FILE_ATTRIBUTE_DIRECTORY);
  1476. //
  1477. // First handle the case for directories.
  1478. //
  1479. if (IsDirectory) {
  1480. DPRINT(4, "++ FILE IS DIRECTORY -------\n");
  1481. //
  1482. // Level is the relative nesting level of the file in the
  1483. // replica tree. The immediate children of the root are Level 0.
  1484. // Ignore files at a depth greater than this.
  1485. // A value of one for ReplDirLevelLimit means allow files in
  1486. // the replica root dir only.
  1487. //
  1488. // Note: Add code to handle rename of a dir from excluded to included.
  1489. // This results in a MOVEDIR Change Order. Not for V1.
  1490. // Ditto for the following - Could be a movedir or movers.
  1491. //
  1492. // Note that a rename of a dir
  1493. // to the bottom level means we delete the subtree because there
  1494. // will be no dirs at the bottom level in the filter table.
  1495. //
  1496. Excluded = (Level >= (ConfigRecord->ReplDirLevelLimit-1));
  1497. if (Excluded && CO_NEW_FILE(LocationCmd)) {
  1498. DPRINT(4,"++ directory exceeds depth limit. Excluded\n");
  1499. goto SKIP_USN_RECORD;
  1500. }
  1501. //
  1502. // See if the name is on the exclusion filter list.
  1503. //
  1504. if (!IsListEmpty(&Replica->DirNameFilterHead)) {
  1505. FrsSetUnicodeStringFromRawString(&TempUStr,
  1506. UsnRecord->FileNameLength,
  1507. UsnRecord->FileName,
  1508. UsnRecord->FileNameLength);
  1509. LOCK_REPLICA(Replica);
  1510. Excluded = FrsCheckNameFilter(&TempUStr, &Replica->DirNameFilterHead);
  1511. //
  1512. // Not excluded if it's on the included list.
  1513. //
  1514. if (Excluded &&
  1515. FrsCheckNameFilter(&TempUStr, &Replica->DirNameInclFilterHead)) {
  1516. Excluded = FALSE;
  1517. }
  1518. UNLOCK_REPLICA(Replica);
  1519. if (Excluded && CO_NEW_FILE(LocationCmd)) {
  1520. DPRINT(4,"++ directory name filter hit. Excluded\n");
  1521. goto SKIP_USN_RECORD;
  1522. }
  1523. }
  1524. //
  1525. // Generate the change orders as we update the filter table.
  1526. //
  1527. DPRINT2(4,"++ DIR location cmd on: %ws\\...\\%ws\n",
  1528. Replica->Root, FileName);
  1529. JrnlFilterUpdate(Replica,
  1530. UsnRecord,
  1531. LocationCmd,
  1532. PrevParentFilterEntry,
  1533. CurrParentFilterEntry);
  1534. } else {
  1535. //
  1536. // Handle the files here.
  1537. //
  1538. // Evaluate the excluded state if this is a file.
  1539. // Files are allowed at the bottom level.
  1540. //
  1541. Excluded = (Level >= ConfigRecord->ReplDirLevelLimit);
  1542. //
  1543. // NOTE: Treat Movedir or movers that is > depth limit as moveout.
  1544. //
  1545. if (Excluded && CO_NEW_FILE(LocationCmd)) {
  1546. DPRINT(4,"++ Filter depth exceeded. File excluded\n");
  1547. goto SKIP_USN_RECORD;
  1548. }
  1549. // Note: Add code to handle rename of file from excluded to included.
  1550. //
  1551. // Excluded file check:
  1552. //
  1553. // 1. If this is a create or MOVEIN of a file with an
  1554. // excluded name then just ignore the USN record.
  1555. //
  1556. // 2. If this is a rename of an excluded file to a visible
  1557. // file then generate a MOVEIN change order for the file.
  1558. //
  1559. // 3. If the file is not in our tables then it must not
  1560. // be visible so ignore it. Note that changing the
  1561. // exclusion list by removing an element will not by itself
  1562. // make those files visible. A rename operation is still
  1563. // needed to get the file into our tables.
  1564. //
  1565. // 4. A rename of a visible file to an excluded file does
  1566. // not make the file excluded since it is still in our tables
  1567. // and present in all replicas. Only a delete or a rename
  1568. // of the file to a point outside the replica set will remove
  1569. // the file from our tables and all other replicas.
  1570. //
  1571. // 5. The addition of an element to the exclusion list only
  1572. // affects future creates. It has no affect on previous
  1573. // file creates that generated an entry in our tables.
  1574. //
  1575. //
  1576. // See if the name is on the exclusion filter list.
  1577. //
  1578. if (!IsListEmpty(&Replica->FileNameFilterHead)) {
  1579. FrsSetUnicodeStringFromRawString(&TempUStr,
  1580. UsnRecord->FileNameLength,
  1581. UsnRecord->FileName,
  1582. UsnRecord->FileNameLength);
  1583. LOCK_REPLICA(Replica);
  1584. Excluded = FrsCheckNameFilter(&TempUStr, &Replica->FileNameFilterHead);
  1585. //
  1586. // Not excluded if it's on the included list.
  1587. //
  1588. if (Excluded &&
  1589. FrsCheckNameFilter(&TempUStr, &Replica->FileNameInclFilterHead)) {
  1590. Excluded = FALSE;
  1591. }
  1592. UNLOCK_REPLICA(Replica);
  1593. if (Excluded && CO_NEW_FILE(LocationCmd)) {
  1594. DPRINT(4,"++ File name filter hit. Excluded\n");
  1595. goto SKIP_USN_RECORD;
  1596. }
  1597. }
  1598. //
  1599. // Looks like this file is real. See if we have a change order
  1600. // pending for it. If so update it, if not, alloc a new one.
  1601. //
  1602. WStatus = JrnlEnterFileChangeOrder(UsnRecord,
  1603. LocationCmd,
  1604. PrevParentFilterEntry,
  1605. CurrParentFilterEntry);
  1606. if (!WIN_SUCCESS(WStatus)) {
  1607. DPRINT(0, "++ ERROR - Change order create or update failed\n");
  1608. }
  1609. }
  1610. //
  1611. // Increment the UsnRecords Accepted counter
  1612. //
  1613. PM_INC_CTR_REPSET(Replica, UsnRecAccepted, 1);
  1614. goto ACCEPT_USN_RECORD;
  1615. SKIP_USN_RECORD:
  1616. //
  1617. // Increment the UsnRecordsRejected counter
  1618. //
  1619. PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
  1620. ACCEPT_USN_RECORD:
  1621. //
  1622. // Release the references on the prev and current parent filter
  1623. // entries that were acquired by JrnlGetFileCoLocationCmd().
  1624. //
  1625. if (PrevParentFilterEntry != NULL) {
  1626. GhtDereferenceEntryByAddress(pVme->FilterTable,
  1627. PrevParentFilterEntry,
  1628. TRUE);
  1629. PrevParentFilterEntry = NULL;
  1630. }
  1631. if (CurrParentFilterEntry != NULL) {
  1632. GhtDereferenceEntryByAddress(pVme->FilterTable,
  1633. CurrParentFilterEntry,
  1634. TRUE);
  1635. CurrParentFilterEntry = NULL;
  1636. }
  1637. //
  1638. // This has to be done after processing the record so if a
  1639. // save mark were to happen at the same time we wouldn't
  1640. // erroneously filter out the record above when the CurrentUsn
  1641. // is compared with Replica->LastUsnProcessed.
  1642. //
  1643. UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
  1644. //
  1645. // If we are out of Replay mode for this replica and the
  1646. // replica is active then advance our Journal progress
  1647. // point, Replica->LastUsnRecordProcessed.
  1648. //
  1649. if ((Replica != NULL) &&
  1650. (Replica->ServiceState == REPLICA_STATE_ACTIVE) &&
  1651. !REPLICA_REPLAY_MODE(Replica, pVme)) {
  1652. AcquireQuadLock(&pVme->QuadWriteLock);
  1653. Replica->LastUsnRecordProcessed = CurrentUsn;
  1654. ReleaseQuadLock(&pVme->QuadWriteLock);
  1655. }
  1656. NEXT_USN_RECORD:
  1657. //
  1658. // Advance to next USN Record.
  1659. //
  1660. DataLength -= UsnRecord->RecordLength;
  1661. UsnRecord = (PUSN_RECORD)((PCHAR)UsnRecord + UsnRecord->RecordLength);
  1662. } // end while(DataLength > 0)
  1663. //DPRINT1(5, "jb: tf %08x\n", Jbuff);
  1664. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  1665. } // end while(TRUE)
  1666. //
  1667. // Get exception status.
  1668. //
  1669. } except (EXCEPTION_EXECUTE_HANDLER) {
  1670. GET_EXCEPTION_CODE(WStatus);
  1671. }
  1672. } finally {
  1673. if (WIN_SUCCESS(WStatus)) {
  1674. if (AbnormalTermination()) {
  1675. WStatus = ERROR_OPERATION_ABORTED;
  1676. }
  1677. }
  1678. DPRINT_WS(0, "Journal Monitor thread finally.", WStatus);
  1679. //
  1680. // Trigger FRS shutdown if we terminated abnormally.
  1681. //
  1682. if (!WIN_SUCCESS(WStatus) && (WStatus != ERROR_PROCESS_ABORTED)) {
  1683. DPRINT(0, "Journal Monitor thread terminated abnormally, forcing service shutdown.\n");
  1684. FrsIsShuttingDown = TRUE;
  1685. SetEvent(ShutDownEvent);
  1686. } else {
  1687. WStatus = ERROR_SUCCESS;
  1688. }
  1689. //
  1690. // Cleanup all the storage.
  1691. //
  1692. DPRINT1(3, ":S: T E R M I N A T I N G -- %s\n", DEBSUB);
  1693. JournalMonitorShutdown();
  1694. if (HANDLE_IS_VALID(JournalReadThreadHandle)) {
  1695. WStatus = WaitForSingleObject(JournalReadThreadHandle, 10000);
  1696. CHECK_WAIT_ERRORS2(3, WStatus, 1);
  1697. if (WIN_SUCCESS(WStatus)) {
  1698. DPRINT(4, ":S: Journal Read thread terminated.\n");
  1699. }
  1700. } else {
  1701. DPRINT(4, ":S: Journal Read thread terminate - NULL Handle\n");
  1702. }
  1703. DPRINT(0, ":S: Journal is exiting.\n");
  1704. DPRINT1(4, ":S: ThSupSubmitThreadExitCleanup(ThisFrsThreadCtx) - %08x\n", ThisFrsThreadCtx);
  1705. ThSupSubmitThreadExitCleanup(ThisFrsThreadCtx);
  1706. }
  1707. return WStatus;
  1708. }
  1709. LONG
  1710. JrnlGetFileCoLocationCmd(
  1711. PVOLUME_MONITOR_ENTRY pVme,
  1712. IN PUSN_RECORD UsnRecord,
  1713. OUT PFILTER_TABLE_ENTRY *PrevParentFilterEntry,
  1714. OUT PFILTER_TABLE_ENTRY *CurrParentFilterEntry
  1715. )
  1716. /*++
  1717. Routine Description:
  1718. Given the Reason mask and the current parent file ID in the USN record
  1719. and the previous parent File ID determine the location command for the
  1720. change order. The volume filter table is used to check the presence of
  1721. the parent directories in a replica set and to check if the file has
  1722. moved between two replica sets.
  1723. There are 5 cases shown in the table below. A lookup is done for each File
  1724. ID in the Filter table and these results are tested to generate the change
  1725. order location command value. (M: lookup miss, H: lookup hit). See
  1726. comments elsewhere for outcome defs.
  1727. Prev Curr Prev &
  1728. Parent Parent New
  1729. FID FID Parent R.S.
  1730. Case Lookup Lookup Match Outcome
  1731. 0 M M - FILE_NOT_IN_REPLICA_SET
  1732. 1 M H - MOVEIN
  1733. 2 H M - MOVEOUT (a)
  1734. 3 H H No (a), MOVERS, NAMECHANGE
  1735. 4 H H Yes MOVEDIR, NAMECHANGE
  1736. (a) The parent FID could be in the replica set while the File/Dir FID isn't
  1737. if a subtree enum by the update process hasn't reached the File/Dir FID yet
  1738. (MOVEIN on parent followed by MOVOUT on child) or,
  1739. The child was excluded and now its name is changing to allow inclusion.
  1740. In this case the rename includes a name change so the file is no
  1741. longer excluded.
  1742. During subtree operations filter table lookups must be blocked or races
  1743. causing invalid states will occur.
  1744. 1. MOVEIN - Rename of a directory into a replica set. The lookup failed on
  1745. the previous parent FID but the current parent FID is in the table. We
  1746. add an entry for this DIR to the filter table. The update process must
  1747. enumerate the subtree on disk and evaluate each file for inclusion into
  1748. the tree, updating the Filter table as it goes. We may see file
  1749. operations several levels down from the rename point and have no entry in
  1750. the Filter Table so we pitch those records. The sub-tree enumeration
  1751. process must handle this as it incorporates each file into the IDTable.
  1752. 2. MOVEOUT - Parent FID change to a dir OUTSIDE of any replica set on the
  1753. volume. This is a delete of an entire subtree in the Replica set. We
  1754. enumerate the subtree bottom-up, sending dir level change orders to the
  1755. update process as we delete the filter table entries.
  1756. 3. Name change only. The current Parent FID in the USN record matches the
  1757. Parent FID in the Filter entry for the file or directory. Update the name
  1758. in the filter entry.
  1759. 4. MOVEDIR - previous Parent FID is different from the current parent FID.
  1760. Both are in the Filter table with the same replica set. This is a rename
  1761. to a dir in the SAME replica set. Update the parent FID in the filter
  1762. enty and Filename too.
  1763. 5. MOVERS - The previous Parent FID is different from the current parent File
  1764. ID. Both are in the Filter Table but they have DIFFERENT replica set IDs.
  1765. Update the parent FID, the replica ptr, and name in the filter entry. This
  1766. is a move of an entire subtree from one replica set to another. We
  1767. enumerate the subtree top-down, sending dir level change orders to the
  1768. update process as we update the replica set information in the filter table
  1769. entries.
  1770. Arguments:
  1771. pVme - ptr to the Volume monitor entry for the parent file ID and
  1772. Volume Filter tables.
  1773. UsnRecord - ptr to the UsnRecord.
  1774. PrevParentFilterEntry = return value for the previous parent filter entry
  1775. or null. This is the parent under which
  1776. the file or dir used to reside.
  1777. CurrParentFilterEntry = return value for the current parent filter entry
  1778. or null. This is the parent under which the file
  1779. or dir currently resides.
  1780. NOTE: The caller must decrement the ref counts on the previous and new parent
  1781. filter entries if either is returned non null.
  1782. The table below summarizes the filter entry return values for previous
  1783. and current filter entry. A NULL ptr is returned in the 'No' cases.
  1784. It is the callers job to decrement the reference count on the filter
  1785. entry when a non=null value is returned.
  1786. Result returned in
  1787. PrevParentFilterEntry CurrParentFilterEntry
  1788. File Not in Replica Set No No
  1789. File content Change No Yes
  1790. create No Yes
  1791. delete No Yes
  1792. Movein No Yes
  1793. MoveOut Yes No
  1794. MoveDir Yes Yes
  1795. MoveRS Yes Yes
  1796. Return Value:
  1797. The change order location comand or FILE_NOT_IN_REPLICA_SET.
  1798. --*/
  1799. {
  1800. #undef DEBSUB
  1801. #define DEBSUB "JrnlGetFileCoLocationCmd:"
  1802. ULONG Reason;
  1803. PGENERIC_HASH_TABLE FilterTable;
  1804. PULONGLONG CurrParentFileID;
  1805. ULONGLONG PrevParentFileID;
  1806. PULONGLONG FileID;
  1807. ULONG_PTR Flags;
  1808. ULONG GStatus;
  1809. BOOL PrevParentExists;
  1810. *PrevParentFilterEntry = NULL;
  1811. *CurrParentFilterEntry = NULL;
  1812. //
  1813. // The code below checks for USN records with USN_SOURCE_REPLICATION_MANAGEMENT
  1814. // SourceInfo flag set. Currently we check for this bit for consistency
  1815. // with the state in our write filter table. A warning is generated
  1816. // when we get a mismatch. Eventually we need to remove the write filter
  1817. // hash table and just rely just on the above flag.
  1818. // It also tells us to skip our own records during recovery.
  1819. //
  1820. // First check if it's in the USN filter hash table. If so this is one of
  1821. // our own install writes (FrsCloseWithUsnDampening did the close)
  1822. // so skip the journal record and delete the table entry.
  1823. //
  1824. GStatus = QHashLookup(pVme->FrsWriteFilter,
  1825. &UsnRecord->Usn,
  1826. &PrevParentFileID, // unused result
  1827. &Flags); // unused result
  1828. if (GStatus == GHT_STATUS_SUCCESS) {
  1829. DUMP_USN_RECORD(4, UsnRecord);
  1830. DPRINT1(4, "++ USN Write filter cache hit on usn %08x %08x -- skip record\n",
  1831. PRINTQUAD(UsnRecord->Usn));
  1832. //
  1833. // Some code is closing the handle with usn dampening but did
  1834. // not mark the handle as being managed by ntfrs.
  1835. //
  1836. if (!BooleanFlagOn(UsnRecord->SourceInfo, USN_SOURCE_REPLICATION_MANAGEMENT)) {
  1837. DPRINT2(4, "++ WARN Source not set; usn dampen: SourceInfo is %08x for %08x %08x\n",
  1838. UsnRecord->SourceInfo, PRINTQUAD(UsnRecord->FileReferenceNumber));
  1839. }
  1840. return FILE_NOT_IN_REPLICA_SET;
  1841. }
  1842. //
  1843. // Maybe recovery usn record but spit out a warning anyway. In
  1844. // general, usn records with USN_SOURCE_REPLICATION_MANAGEMENT set should have been
  1845. // closed with usn dampening and filtered out above.
  1846. //
  1847. if (BooleanFlagOn(UsnRecord->SourceInfo, USN_SOURCE_REPLICATION_MANAGEMENT)) {
  1848. DPRINT2(4, "++ WARN Source set; no usn dampen: SourceInfo is %08x for %08x %08x\n",
  1849. UsnRecord->SourceInfo, PRINTQUAD(UsnRecord->FileReferenceNumber));
  1850. }
  1851. //
  1852. // Ignore the usn records generated by the service
  1853. //
  1854. // Note: get rid of writefilter and use SourceInfo always!
  1855. //
  1856. Reason = UsnRecord->Reason;
  1857. if (BooleanFlagOn(UsnRecord->SourceInfo, USN_SOURCE_REPLICATION_MANAGEMENT)) {
  1858. if (Reason & USN_REASON_FILE_DELETE) {
  1859. DPRINT1(4, "++ Process service generated usn record for %08x %08x\n",
  1860. PRINTQUAD(UsnRecord->FileReferenceNumber));
  1861. } else {
  1862. DUMP_USN_RECORD(4, UsnRecord);
  1863. DPRINT1(4, "++ Ignore service generated usn record for %08x %08x\n",
  1864. PRINTQUAD(UsnRecord->FileReferenceNumber));
  1865. return FILE_NOT_IN_REPLICA_SET;
  1866. }
  1867. }
  1868. #ifdef RECOVERY_CONFLICT
  1869. //
  1870. // If a recovery conflict table exists check for a match and skip the USN
  1871. // record. This filters out any USN records caused by our own activities
  1872. // at the time of the crash.
  1873. //
  1874. if (pVme->RecoveryConflictTable != NULL) {
  1875. //
  1876. // Once we pass the journal recovery end point delete the table.
  1877. // It can not have any entries with a larger USN than the end point.
  1878. // ("how can we be sure that all replica sets on this volume have"
  1879. "actually started and so have actually finished using the"
  1880. "conflict table?")
  1881. //
  1882. if (UsnRecord->Usn > pVme->JrnlRecoveryEnd) {
  1883. pVme->RecoveryConflictTable = FrsFreeType(pVme->RecoveryConflictTable);
  1884. } else {
  1885. GStatus = QHashLookup(pVme->RecoveryConflictTable,
  1886. &UsnRecord->FileReferenceNumber,
  1887. &PrevParentFileID, // unused result
  1888. &Flags); // unused result
  1889. if (GStatus == GHT_STATUS_SUCCESS) {
  1890. DUMP_USN_RECORD(1, UsnRecord);
  1891. DPRINT1(1, "++ Recovery conflict table hit on FID %08x %08x -- skip record\n",
  1892. PRINTQUAD(UsnRecord->FileReferenceNumber));
  1893. return FILE_NOT_IN_REPLICA_SET;
  1894. }
  1895. }
  1896. }
  1897. #endif // RECOVERY_CONFLICT
  1898. FilterTable = pVme->FilterTable;
  1899. //
  1900. // Get the previous parent file ID for this file/Dir.
  1901. //
  1902. FileID = &UsnRecord->FileReferenceNumber;
  1903. CurrParentFileID = &UsnRecord->ParentFileReferenceNumber;
  1904. GStatus = QHashLookup(pVme->ParentFidTable, FileID, &PrevParentFileID, &Flags);
  1905. PrevParentExists = (GStatus == GHT_STATUS_SUCCESS);
  1906. //
  1907. // Check to see if we still need to special case any operations on the root
  1908. // dir of a replica set.
  1909. //
  1910. if (PrevParentExists) {
  1911. DPRINT2(5, "++ Fid: %08x %08x PrevParentFid: %08x %08x\n",
  1912. PRINTQUAD(UsnRecord->FileReferenceNumber),
  1913. PRINTQUAD(PrevParentFileID));
  1914. //
  1915. // IF the previous parent FID is not in the Filter table now and this
  1916. // is not a rename operation (which might result in a MOVEIN) then this
  1917. // file is not in a replica set. This case occurs after a MOVEOUT of a
  1918. // parent dir followed by some access to a child.
  1919. //
  1920. GStatus = GhtLookup(FilterTable, &PrevParentFileID, TRUE, PrevParentFilterEntry);
  1921. if ((GStatus != GHT_STATUS_SUCCESS) &&
  1922. ((Reason & USN_REASON_RENAME_NEW_NAME) == 0)) {
  1923. DUMP_USN_RECORD(4, UsnRecord);
  1924. DPRINT(4, "++ NOT IN RS - Entry in Parent File ID table but not FilterTable & not rename.\n");
  1925. return FILE_NOT_IN_REPLICA_SET;
  1926. }
  1927. } else {
  1928. //
  1929. // There is no entry in the parent file ID table for this file or dir.
  1930. // If there is no entry in the filter table for the file's current
  1931. // parent then the file is not in any replica set.
  1932. //
  1933. GStatus = GhtLookup(FilterTable, CurrParentFileID, TRUE, CurrParentFilterEntry);
  1934. if (GStatus != GHT_STATUS_SUCCESS) {
  1935. DUMP_USN_RECORD(4, UsnRecord);
  1936. DPRINT(4, "++ NOT IN RS - Entry not in Parent File ID table or FilterTable.\n");
  1937. return FILE_NOT_IN_REPLICA_SET;
  1938. }
  1939. }
  1940. //
  1941. // A delete has to have an entry in the parent File ID table or it is not
  1942. // in a replica set.
  1943. //
  1944. if (Reason & USN_REASON_FILE_DELETE) {
  1945. //
  1946. // If the Previous parent filter entry is valid then the file/dir
  1947. // was in a replica set so treat it as a delete.
  1948. //
  1949. if (*PrevParentFilterEntry != NULL) {
  1950. *CurrParentFilterEntry = *PrevParentFilterEntry;
  1951. *PrevParentFilterEntry = NULL;
  1952. return CO_LOCATION_DELETE;
  1953. }
  1954. //
  1955. // It wasn't in the parent fid table so either the rename flag is also
  1956. // set or the current parent filter entry is non-null which would be
  1957. // the case for a delete on an excluded file. Either way skip it.
  1958. //
  1959. DUMP_USN_RECORD(4, UsnRecord);
  1960. DPRINT(4, "++ NOT IN RS - delete on excluded file?\n");
  1961. return FILE_NOT_IN_REPLICA_SET;
  1962. }
  1963. //
  1964. // A create has to have an entry for its parent in the Volume Filter Table
  1965. // or it is not in a replica set. It must have no prior entry in the Parent
  1966. // file ID table. (FILE IDs are unique).
  1967. //
  1968. if (Reason & USN_REASON_FILE_CREATE) {
  1969. //
  1970. // If the USN from the journal record is less than or equal to the USN
  1971. // from the file when the replica tree load was done then the created
  1972. // file was already picked up by the load. Otherwise it is an error
  1973. // because we should not have had an entry in the parent ID table yet.
  1974. // At this point we do not have the current USN on the file so we will
  1975. // assume that if a previous parent exists the load got there first and
  1976. // this journal record is stale (so skip the record).
  1977. //
  1978. // In the case where we have paused the journal to startup another
  1979. // replica set we may have to move the next USN to read from the journal
  1980. // back to let this new RS catch-up. In that case we will be seeing
  1981. // records for a second time. If we are in replay mode and the USN
  1982. // for this record is less than the LastUsnRecordProcessed for the target replica
  1983. // set then we ignore the record.
  1984. //
  1985. // Note: add above file usn check.
  1986. //
  1987. if (PrevParentExists) {
  1988. DUMP_USN_RECORD(4, UsnRecord);
  1989. DPRINT(4, "++ NOT IN RS \n");
  1990. return FILE_NOT_IN_REPLICA_SET;
  1991. }
  1992. return CO_LOCATION_CREATE;
  1993. }
  1994. //
  1995. // If not a rename then no location change, but this file is in a Replica Set.
  1996. //
  1997. if ((Reason & USN_REASON_RENAME_NEW_NAME) == 0) {
  1998. //
  1999. // Check for a content update to a file that is not in our tables.
  2000. // It could be an excluded file which gets filtered out later.
  2001. // Or an excluded file that is no longer excluded because the
  2002. // the exclusion list changed.
  2003. // Treat it as a create so we check the exclusion list again
  2004. // and set the USN record create flag for others that may look at it.
  2005. //
  2006. if (*CurrParentFilterEntry != NULL) {
  2007. //UsnRecord->Reason |= USN_REASON_FILE_CREATE;
  2008. //return CO_LOCATION_CREATE;
  2009. //
  2010. // Treat it as a MOVEIN since if it is a directory we need to
  2011. // enumerate the children.
  2012. //
  2013. return CO_LOCATION_MOVEIN;
  2014. }
  2015. //
  2016. // It's not a rename, CurrParentFilterEntry is NULL so to be here
  2017. // PrevParentFilterEntry must be non-null which means that this is
  2018. // a content update to a file we already know about.
  2019. //
  2020. FRS_ASSERT(*PrevParentFilterEntry != NULL);
  2021. *CurrParentFilterEntry = *PrevParentFilterEntry;
  2022. *PrevParentFilterEntry = NULL;
  2023. return CO_LOCATION_NO_CMD;
  2024. }
  2025. //
  2026. // Handle file rename cases. If parent FileIDs match then no location change.
  2027. //
  2028. if ((*PrevParentFilterEntry != NULL) &&
  2029. (PrevParentFileID == *CurrParentFileID)) {
  2030. *CurrParentFilterEntry = *PrevParentFilterEntry;
  2031. *PrevParentFilterEntry = NULL;
  2032. return CO_LOCATION_NO_CMD;
  2033. }
  2034. //
  2035. // Old and new parent file IDs are different. So the file/dir moved across
  2036. // directories. Could be MOVEIN, MOVEOUT, MOVEDIR, MOVERS.
  2037. //
  2038. if (*CurrParentFilterEntry == NULL) {
  2039. GhtLookup(FilterTable, CurrParentFileID, TRUE, CurrParentFilterEntry);
  2040. }
  2041. if (*PrevParentFilterEntry != NULL) {
  2042. if (*CurrParentFilterEntry != NULL) {
  2043. //
  2044. // Old and new parents in table.
  2045. //
  2046. if ((*PrevParentFilterEntry)->Replica ==
  2047. (*CurrParentFilterEntry)->Replica) {
  2048. //
  2049. // Old and New Replica Sets are the same ==> MOVEDIR
  2050. //
  2051. return CO_LOCATION_MOVEDIR;
  2052. } else {
  2053. //
  2054. // Old and New Replica Sets are different ==> MOVERS
  2055. //
  2056. return CO_LOCATION_MOVERS;
  2057. }
  2058. } else {
  2059. //
  2060. // Old parent in table, new parent not in table ==> MOVEOUT
  2061. //
  2062. return CO_LOCATION_MOVEOUT;
  2063. }
  2064. } else {
  2065. if (*CurrParentFilterEntry != NULL) {
  2066. //
  2067. // Old parent not in table, new parent is in table ==> MOVEIN
  2068. //
  2069. return CO_LOCATION_MOVEIN;
  2070. } else {
  2071. //
  2072. // To get here the operation must be a rename on a file/dir
  2073. // that was in the parent file ID table but the previous parent
  2074. // File ID is no longer in the Filter table (MOVEOUT). In addition
  2075. // the current parent File ID is not in the filter table. So this
  2076. // is a rename operation on a file that was in a replica set in the
  2077. // past but is not currently in any replica set. The update process
  2078. // will eventually clean out the stale entries in the parent file
  2079. // ID table.
  2080. //
  2081. DUMP_USN_RECORD(4, UsnRecord);
  2082. DPRINT(4, "++ NOT IN RS - Rename on a file with a MOVEOUT parent.\n");
  2083. return FILE_NOT_IN_REPLICA_SET;
  2084. }
  2085. }
  2086. DUMP_USN_RECORD(4, UsnRecord);
  2087. DPRINT(4, "++ NOT IN RS\n");
  2088. return FILE_NOT_IN_REPLICA_SET;
  2089. }
  2090. ULONG
  2091. JrnlEnterFileChangeOrder(
  2092. IN PUSN_RECORD UsnRecord,
  2093. IN ULONG LocationCmd,
  2094. IN PFILTER_TABLE_ENTRY OldParentFilterEntry,
  2095. IN PFILTER_TABLE_ENTRY NewParentFilterEntry
  2096. )
  2097. /*++
  2098. Routine Description:
  2099. Enter a new change order or update an exisitng change order.
  2100. This routine is for FILES ONLY. Directories are handled in
  2101. JrnlFilterUpdate().
  2102. This routine acquires and releases the locks on both the source and target
  2103. replica set change order lists (in the case of a MOVERS).
  2104. Assumes The caller has taken references on the old and new parent filter entry.
  2105. Arguments:
  2106. UsnRecord - ptr to the UsnRecord.
  2107. LocationCmd - The change order location command. (MOVEIN, MOVEOUT, ...)
  2108. OldParentFilterEntry - The filter entry for the file's previous parent.
  2109. NewParentFilterEntry - The filter entry for the file's current parent.
  2110. Return Value:
  2111. Win32 status.
  2112. --*/
  2113. {
  2114. #undef DEBSUB
  2115. #define DEBSUB "JrnlEnterFileChangeOrder:"
  2116. ULONG GStatus;
  2117. ULONG WStatus;
  2118. PULONGLONG FileID;
  2119. ULONGLONG OriginalParentFileID;
  2120. PCHANGE_ORDER_ENTRY ChangeOrder;
  2121. PGENERIC_HASH_TABLE ChangeOrderTable;
  2122. PREPLICA CurrentReplica;
  2123. PREPLICA OriginalReplica;
  2124. PFILTER_TABLE_ENTRY OriginalParentFilterEntry;
  2125. BOOL PendingCo;
  2126. ULONG StreamSequenceNumber;
  2127. BOOL MergeOk;
  2128. PCXTION Cxtion;
  2129. UNICODE_STRING UnicodeStr;
  2130. PVOLUME_MONITOR_ENTRY pVme;
  2131. //
  2132. // Determine the original parent and replica set if the file has moved around.
  2133. // This determines what change order table we need to examine for a pending
  2134. // change order.
  2135. // Note: Now that we have one change order table per volume, is this still needed?
  2136. //
  2137. if (CO_MOVE_OUT_RS_OR_DIR(LocationCmd)) {
  2138. OriginalParentFilterEntry = OldParentFilterEntry;
  2139. } else {
  2140. OriginalParentFilterEntry = NewParentFilterEntry;
  2141. if (NewParentFilterEntry->DFileID != UsnRecord->ParentFileReferenceNumber) {
  2142. DPRINT(4, "++ Warn - Current parent FID NOT EQUAL to UsnRecord.parentFiD -- Stale USN Rec???\n");
  2143. DPRINT2(4, "++ %08x %08x -- %08x %08x\n",
  2144. PRINTQUAD(NewParentFilterEntry->DFileID),
  2145. PRINTQUAD(UsnRecord->ParentFileReferenceNumber));
  2146. return ERROR_INVALID_PARAMETER;
  2147. }
  2148. }
  2149. OriginalReplica = OriginalParentFilterEntry->Replica;
  2150. OriginalParentFileID = OriginalParentFilterEntry->DFileID;
  2151. pVme = OriginalReplica->pVme;
  2152. ChangeOrderTable = pVme->ChangeOrderTable;
  2153. CurrentReplica = (NewParentFilterEntry != NULL) ?
  2154. NewParentFilterEntry->Replica :
  2155. OldParentFilterEntry->Replica;
  2156. FrsRtlAcquireListLock(&pVme->ChangeOrderList);
  2157. //
  2158. // Make a new stream sequence number. Protected by above list lock.
  2159. //
  2160. StreamSequenceNumber = ++pVme->StreamSequenceNumber;
  2161. //
  2162. // See if there is a pending change order for this file/dir. The call to
  2163. // JrnlUpdateChangeOrder() drops our reference on the change order.
  2164. //
  2165. FileID = &UsnRecord->FileReferenceNumber;
  2166. GStatus = GhtLookupNewest(ChangeOrderTable, FileID, TRUE, &ChangeOrder);
  2167. PendingCo = (GStatus == GHT_STATUS_SUCCESS);
  2168. if (PendingCo) {
  2169. //
  2170. // There is a pending change order. Do a couple consistency checks.
  2171. //
  2172. // This USN record should not be for a file create because that
  2173. // would generate a new File ID which should NOT be in the table.
  2174. //
  2175. // NOT QUITE TRUE -- JrnlGetFileCoLocationCmd() will turn on the
  2176. // USN create flag if it sees a file is in the replica set but not
  2177. // in the parent file ID table. This happens when a file that was on
  2178. // the exclusion list is updated after the exclusion list is changed
  2179. // to allow the file to be included. Because of this situation we can
  2180. // also see the create flag set when the following occurs:
  2181. // 1. A series of file changes result in two COs being produced
  2182. // because the first CO is pulled off the process queue.
  2183. // 2. Subsequent file changes are accumulated in the 2nd CO.
  2184. // 3. Meanwhile the user deletes the file so the first CO aborts when
  2185. // it can't generate the staging file. As part of this abort the
  2186. // IDTable entry for the "new" file is deleted and the ParentFidTable
  2187. // entry is removed.
  2188. // 4. Now another USN record for the file (not the delete yet) arrives
  2189. // to merge with the 2nd CO under construction. Since we don't yet
  2190. // know a delete is coming the code in JrnlGetFileCoLocationCmd()
  2191. // sets the USN create flag as described above.
  2192. // 5. Now we end up here and hit the assert. So to avoid this we check
  2193. // the Pending CO and only assert if is already a create.
  2194. //
  2195. // Yea, yea I could just bag the assert but the above scenario is instructive.
  2196. //
  2197. if ((LocationCmd == CO_LOCATION_CREATE) &&
  2198. (GET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command) == CO_LOCATION_CREATE)){
  2199. DUMP_USN_RECORD2(0, UsnRecord, OriginalReplica->ReplicaNumber, LocationCmd);
  2200. DPRINT(0, "++ ERROR -- USN_REASON_FILE_CREATE with create change order in the table:\n");
  2201. FRS_PRINT_TYPE(0, ChangeOrder);
  2202. FRS_ASSERT(!"JrnlEnterFileCO: USN_REASON_FILE_CREATE with create change order in table");
  2203. goto RETURN;
  2204. }
  2205. //
  2206. // If the pending change order is a delete and the USN record
  2207. // specifies the same same FID this is an error because
  2208. // delete will have retired the FID.
  2209. //
  2210. if (GET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command) == CO_LOCATION_DELETE){
  2211. DUMP_USN_RECORD2(0, UsnRecord, OriginalReplica->ReplicaNumber,
  2212. CO_LOCATION_DELETE);
  2213. DPRINT(0, "++ ERROR - new USN record follows delete with same FID");
  2214. FRS_PRINT_TYPE(0, ChangeOrder);
  2215. FRS_ASSERT(!"JrnlEnterFileCO: new USN record follows delete with same FID");
  2216. goto RETURN;
  2217. }
  2218. //
  2219. // USN MERGE RESTRICTIONS:
  2220. //
  2221. // Check if this USN record can be merged with the pending change order.
  2222. // If this USN record is a delete or a rename then it removes a name
  2223. // from the name space. If there exists a more recent change order
  2224. // that references this name then we can not merge the USN record.
  2225. // Instead we must create a new CO.
  2226. //
  2227. // Consider this sequence:
  2228. // Attrib -r Dir <== creates CO-1
  2229. // Del Dir\Foo <== creates CO-2
  2230. // Del Dir <== Merge with CO-1 causes name conflict.
  2231. //
  2232. // The "Del Dir" CO can't be merged with CO-1 because CO-2 is still
  2233. // using Dir to delete file Foo. If the merge were to take place the
  2234. // delete would fail since Dir is not empty. File Dir\Foo would be
  2235. // deleted but Dir would be left around.
  2236. //
  2237. // Similarly a rename creates a new name in the name space but if there
  2238. // is a more recent CO that references the name then the rename can't
  2239. // be merged.
  2240. //
  2241. // Consider the following sequence: (Bar already exists)
  2242. // Echo TestString > Foo <== creates CO-1
  2243. // Ren Bar Bar2 <== creates CO-2
  2244. // Ren Foo Bar <== Merge with CO-1 causes name conflict.
  2245. //
  2246. // Foo and Bar are different COs on different Fids but they have
  2247. // name space dependencies that prevent merging the Foo rename with
  2248. // CO-1 that does the file update. If we did merge these two COs then
  2249. // the resulting remote CO that is sent out would collide with the
  2250. // pre-existing Bar, thus deleting it. When CO-2 arrived the original
  2251. // Bar would be gone so there would be no Bar2.
  2252. //
  2253. MergeOk = TRUE;
  2254. if (MergeOk &&
  2255. CurrentReplica &&
  2256. (Cxtion = GTabLookup(CurrentReplica->Cxtions,
  2257. &CurrentReplica->JrnlCxtionGuid,
  2258. NULL)) &&
  2259. !GUIDS_EQUAL(&ChangeOrder->JoinGuid, &Cxtion->JoinGuid)) {
  2260. MergeOk = FALSE;
  2261. CHANGE_ORDER_TRACE(3, ChangeOrder, "Invalid join guid Merge NOT OK ");
  2262. }
  2263. if (BooleanFlagOn(UsnRecord->Reason, USN_REASON_RENAME_NEW_NAME |
  2264. USN_REASON_FILE_DELETE)) {
  2265. //
  2266. // If this is not a serialized operation (MOVEDIR or MOVERS)
  2267. // then first test for conflict on the current name/parent FID of the
  2268. // file. Then if that's ok test for a conflict on the previous name.
  2269. //
  2270. if (CO_MOVE_RS_OR_DIR(LocationCmd)) {
  2271. MergeOk = FALSE;
  2272. CHANGE_ORDER_TRACE(3, ChangeOrder, "MOVERS/DIR Merge NOT OK ");
  2273. }
  2274. if (MergeOk) {
  2275. FrsSetUnicodeStringFromRawString(&UnicodeStr,
  2276. UsnRecord->FileNameLength,
  2277. UsnRecord->FileName,
  2278. UsnRecord->FileNameLength);
  2279. MergeOk = JrnlMergeCoTest(pVme,
  2280. &UnicodeStr,
  2281. &UsnRecord->ParentFileReferenceNumber,
  2282. ChangeOrder->StreamLastMergeSeqNum);
  2283. if (MergeOk) {
  2284. CHANGE_ORDER_TRACE(3, ChangeOrder, "Curr parent Merge OK ");
  2285. } else {
  2286. CHANGE_ORDER_TRACE(3, ChangeOrder, "Curr parent Merge NOT OK ");
  2287. }
  2288. }
  2289. //
  2290. // If the Merge is still on and this is a rename then check for
  2291. // a conflict in the use of the previous name that will go away.
  2292. //
  2293. if (MergeOk &&
  2294. BooleanFlagOn(UsnRecord->Reason, USN_REASON_RENAME_NEW_NAME)) {
  2295. MergeOk = JrnlMergeCoTest(pVme,
  2296. &ChangeOrder->UFileName,
  2297. &OriginalParentFilterEntry->DFileID,
  2298. ChangeOrder->StreamLastMergeSeqNum);
  2299. if (MergeOk) {
  2300. CHANGE_ORDER_TRACE(3, ChangeOrder, "Orig parent Merge OK ");
  2301. } else {
  2302. CHANGE_ORDER_TRACE(3, ChangeOrder, "Orig parent Merge NOT OK ");
  2303. }
  2304. }
  2305. }
  2306. if (MergeOk) {
  2307. //
  2308. // Update the seq number of last USN record to contribute to CO.
  2309. //
  2310. ChangeOrder->StreamLastMergeSeqNum = StreamSequenceNumber;
  2311. }
  2312. PendingCo = MergeOk;
  2313. //
  2314. // Creating new change order; drop reference on current change order
  2315. //
  2316. if (!PendingCo) {
  2317. GStatus = GhtDereferenceEntryByAddress(ChangeOrderTable,
  2318. ChangeOrder,
  2319. TRUE);
  2320. if (GStatus != GHT_STATUS_SUCCESS) {
  2321. DPRINT(0, "++ ERROR: GhtDereferenceEntryByAddress ref count non positive.\n");
  2322. FRS_PRINT_TYPE(0, ChangeOrder);
  2323. FRS_ASSERT(!"JrnlEnterFileCO: ref count non positive");
  2324. goto RETURN;
  2325. }
  2326. }
  2327. }
  2328. if (!PendingCo) {
  2329. //
  2330. // Construct new change order.
  2331. //
  2332. ChangeOrder = JrnlCreateCo(OriginalReplica,
  2333. &UsnRecord->FileReferenceNumber,
  2334. &OriginalParentFilterEntry->DFileID,
  2335. UsnRecord,
  2336. BooleanFlagOn(UsnRecord->FileAttributes,
  2337. FILE_ATTRIBUTE_DIRECTORY),
  2338. UsnRecord->FileName,
  2339. UsnRecord->FileNameLength);
  2340. ChangeOrder->StreamLastMergeSeqNum = StreamSequenceNumber;
  2341. //
  2342. // Set this up now so it appears in the log file. It is overwritten
  2343. // later with the real CO Guid when the CO is issued.
  2344. //
  2345. ChangeOrder->Cmd.ChangeOrderGuid.Data1 = StreamSequenceNumber;
  2346. CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Create", UsnRecord->Reason);
  2347. } else {
  2348. CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Update", UsnRecord->Reason);
  2349. }
  2350. //
  2351. // Update the Name Space Table with the current stream sequence number.
  2352. // Do this for both the file name and the parent dir name. In the case
  2353. // of rename do it for the original and current file name and parent names.
  2354. //
  2355. // Note: The Orig info is only relevant if CO is MoveOut, MoveDir or MoveRs.
  2356. // Note: The Curr info is only relevant if CO is NOT a MoveOut.
  2357. //
  2358. // FName ParentFid
  2359. // Orig File ChangeOrder->UFileName PrevPFE->DFileID
  2360. // Orig Parent PrevPFE->UFileName PrevPFE->DParentFileID
  2361. // Curr File UsnRecord->FileName CurrPFE->DFileID
  2362. // Curr Parent CurrPFE->UFileName CurrPFE->DParentFileID
  2363. //
  2364. if (LocationCmd != CO_LOCATION_MOVEOUT) {
  2365. //
  2366. // Update Curr File (Where the USN record says file went)
  2367. //
  2368. FrsSetUnicodeStringFromRawString(&UnicodeStr,
  2369. UsnRecord->FileNameLength,
  2370. UsnRecord->FileName,
  2371. UsnRecord->FileNameLength);
  2372. JrnlUpdateNst(pVme,
  2373. &UnicodeStr,
  2374. &UsnRecord->ParentFileReferenceNumber,
  2375. StreamSequenceNumber);
  2376. //
  2377. // Update Curr parent (the parent dir where file went)
  2378. //
  2379. JrnlUpdateNst(pVme,
  2380. &NewParentFilterEntry->UFileName,
  2381. &NewParentFilterEntry->DParentFileID,
  2382. StreamSequenceNumber);
  2383. }
  2384. if (CO_MOVE_OUT_RS_OR_DIR(LocationCmd)) {
  2385. //
  2386. // Update Orig File (The pending CO tells where the file came from)
  2387. //
  2388. JrnlUpdateNst(pVme,
  2389. &ChangeOrder->UFileName,
  2390. &OriginalParentFilterEntry->DFileID,
  2391. StreamSequenceNumber);
  2392. //
  2393. // Update Orig Parent (The original parent dir where the file came from)
  2394. //
  2395. JrnlUpdateNst(pVme,
  2396. &OriginalParentFilterEntry->UFileName,
  2397. &OriginalParentFilterEntry->DParentFileID,
  2398. StreamSequenceNumber);
  2399. }
  2400. //
  2401. // Update the change order. This drops our ref on the change order.
  2402. //
  2403. WStatus = JrnlUpdateChangeOrder(ChangeOrder,
  2404. CurrentReplica,
  2405. UsnRecord->ParentFileReferenceNumber,
  2406. LocationCmd,
  2407. UsnRecord);
  2408. if (!WIN_SUCCESS(WStatus)) {
  2409. DPRINT(0, "++ Error - failed to insert or update change order\n");
  2410. DPRINT_WS(0, "JrnlUpdateChangeOrder", WStatus);
  2411. } else {
  2412. DPRINT1(4, "++ ChangeOrder %s success\n", (PendingCo ? "update" : "create"));
  2413. }
  2414. RETURN:
  2415. //
  2416. // Drop the locks on the change order process lists.
  2417. //
  2418. FrsRtlReleaseListLock(&pVme->ChangeOrderList);
  2419. return WStatus;
  2420. }
  2421. PCHANGE_ORDER_ENTRY
  2422. JrnlCreateCo(
  2423. IN PREPLICA Replica,
  2424. IN PULONGLONG Fid,
  2425. IN PULONGLONG ParentFid,
  2426. IN PUSN_RECORD UsnRecord,
  2427. IN BOOL IsDirectory,
  2428. IN PWCHAR FileName,
  2429. IN USHORT Length
  2430. )
  2431. /*++
  2432. Routine Description:
  2433. This functions allocates a change order entry and inits some of the fields.
  2434. Depending on the change order some of these fields are overwritten later.
  2435. Arguments:
  2436. Replica - ptr to replica set for this change order.
  2437. Fid - The file reference number for the local file.
  2438. ParentFid - The parent file reference number for this file.
  2439. UsnRecord - The NTFS USN record describing the change. When walking a
  2440. through a sub-tree this will be the USN record of the sub-tree root.
  2441. IsDirectory - TRUE if this CO is for a directory.
  2442. FileName - Filename for this file. For a sub tree op it comes from the
  2443. filter entry.
  2444. Length - the file name length in bytes.
  2445. Return Value:
  2446. ptr to change order entry.
  2447. --*/
  2448. {
  2449. #undef DEBSUB
  2450. #define DEBSUB "JrnlCreateCo:"
  2451. PCHANGE_ORDER_ENTRY ChangeOrder;
  2452. //
  2453. // Construct new change order.
  2454. // Set the initial reference count to 1.
  2455. //
  2456. ChangeOrder = FrsAllocType(CHANGE_ORDER_ENTRY_TYPE);
  2457. ChangeOrder->HashEntryHeader.ReferenceCount = 1;
  2458. //
  2459. // The command flag CO_FLAG_LOCATION_CMD should be clear.
  2460. // Mark this change order as a file or a directory.
  2461. // Note: If this CO is being generated off of a directory filter table
  2462. // entry (e.g. Moveout) then the ChangeOrder->Cmd.FileAttributes will
  2463. // be zero. ChgOrdReadIdRecord() detects this and inserts the file
  2464. // attributes from the IDTable record.
  2465. //
  2466. SET_CO_LOCATION_CMD(ChangeOrder->Cmd,
  2467. DirOrFile,
  2468. (IsDirectory ? CO_LOCATION_DIR : CO_LOCATION_FILE));
  2469. SET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command, CO_LOCATION_NO_CMD);
  2470. //
  2471. // Capture the file name.
  2472. //
  2473. FRS_ASSERT(Length <= MAX_PATH*2);
  2474. CopyMemory(ChangeOrder->Cmd.FileName, FileName, Length);
  2475. ChangeOrder->Cmd.FileName[Length/2] = UNICODE_NULL;
  2476. ChangeOrder->UFileName.Length = Length;
  2477. ChangeOrder->Cmd.FileNameLength = Length;
  2478. //
  2479. // Set New and orig Replica fields to the replica.
  2480. //
  2481. ChangeOrder->OriginalReplica = Replica;
  2482. ChangeOrder->NewReplica = Replica;
  2483. ChangeOrder->Cmd.OriginalReplicaNum = ReplicaAddrToId(Replica);
  2484. ChangeOrder->Cmd.NewReplicaNum = ReplicaAddrToId(Replica);
  2485. //
  2486. // Set New and orig parent FID fields to the parent FID.
  2487. //
  2488. ChangeOrder->OriginalParentFid = *ParentFid;
  2489. ChangeOrder->NewParentFid = *ParentFid;
  2490. ChangeOrder->ParentFileReferenceNumber = *ParentFid;
  2491. ChangeOrder->FileReferenceNumber = *Fid;
  2492. //
  2493. // Init with data from the USN Record.
  2494. //
  2495. ChangeOrder->EntryCreateTime = CO_TIME_NOW(Replica->pVme);
  2496. ChangeOrder->Cmd.EventTime = UsnRecord->TimeStamp;
  2497. ChangeOrder->Cmd.JrnlFirstUsn = UsnRecord->Usn;
  2498. return ChangeOrder;
  2499. }
  2500. BOOL
  2501. JrnlMergeCoTest(
  2502. IN PVOLUME_MONITOR_ENTRY pVme,
  2503. IN PUNICODE_STRING UFileName,
  2504. IN PULONGLONG ParentFid,
  2505. IN ULONG StreamLastMergeSeqNum
  2506. )
  2507. /*++
  2508. Routine Description:
  2509. Check if a new Usn record can be merged with this change order.
  2510. If there is any reference to the file name in the Usn record stream
  2511. after the point where the last merge occurred then we return FALSE
  2512. indicating the merge is disallowed. The ptr to the QHashEntry is returned
  2513. (if it is found) so LastUseSequenceNumber can be updated.
  2514. Arguments:
  2515. pVme - ptr to the volume monitor entry (w/ name space table) for test.
  2516. UFileName - Unicode Filename for this file.
  2517. ParentFid - The parent file reference number for this file.
  2518. StreamLastMergeSeqNum - The Seq Num of last Usn Record merged into CO.
  2519. Return Value:
  2520. True if Merge is ok else false.
  2521. --*/
  2522. {
  2523. #undef DEBSUB
  2524. #define DEBSUB "JrnlMergeCoTest:"
  2525. ULONGLONG QuadHashValue;
  2526. ULONG StreamLastUseSeqNum;
  2527. PQHASH_ENTRY NstEntry;
  2528. CalcHashFidAndName(UFileName, ParentFid, &QuadHashValue);
  2529. NstEntry = QHashLookupLock(pVme->NameSpaceTable, &QuadHashValue);
  2530. if (NstEntry != NULL) {
  2531. StreamLastUseSeqNum = (ULONG)NstEntry->Flags;
  2532. if (StreamLastUseSeqNum > StreamLastMergeSeqNum) {
  2533. //
  2534. // There is a ref to this name in the Usn stream after
  2535. // point where the last record was merged with this CO.
  2536. // Can't merge this Usn Record.
  2537. //
  2538. return FALSE;
  2539. }
  2540. }
  2541. return TRUE;
  2542. }
  2543. ULONG
  2544. JrnlPurgeNstWorker (
  2545. PQHASH_TABLE Table,
  2546. PQHASH_ENTRY BeforeNode,
  2547. PQHASH_ENTRY TargetNode,
  2548. PVOID Context
  2549. )
  2550. /*++
  2551. Routine Description:
  2552. This function is called thru QHashEnumerateTable() to clean out stale entries.
  2553. Arguments:
  2554. Table - the hash table being enumerated
  2555. BeforeNode -- ptr to the QhashEntry before the node of interest.
  2556. TargetNode -- ptr to the QhashEntry of interest.
  2557. Context - ptr to the Stream Sequence Number to compare against.
  2558. Return Value:
  2559. Win32 status
  2560. --*/
  2561. {
  2562. #undef DEBSUB
  2563. #define DEBSUB "JrnlPurgeNstWorker:"
  2564. ULONG StreamSeqNum = *(ULONG *)Context;
  2565. if ( (ULONG)(TargetNode->Flags) < StreamSeqNum) {
  2566. DPRINT5(4, "JrnlPurgeNstWorker - BeforeNode: %08x, Link: %08x,"
  2567. " Flags: %08x, Tag: %08x %08x, Data: %08x %08x\n",
  2568. BeforeNode, TargetNode->NextEntry, TargetNode->Flags,
  2569. PRINTQUAD(TargetNode->QKey), PRINTQUAD(TargetNode->QData));
  2570. //
  2571. // Tell QHashEnumerateTable() to delete the node and continue the enum.
  2572. //
  2573. return FrsErrorDeleteRequested;
  2574. }
  2575. return FrsErrorSuccess;
  2576. }
  2577. VOID
  2578. JrnlUpdateNst(
  2579. IN PVOLUME_MONITOR_ENTRY pVme,
  2580. IN PUNICODE_STRING UFileName,
  2581. IN PULONGLONG ParentFid,
  2582. IN ULONG StreamSequenceNumber
  2583. )
  2584. /*++
  2585. Routine Description:
  2586. Update the LastUseSequenceNumber in the Name Space Table.
  2587. If the entry is not present, create it.
  2588. Arguments:
  2589. pVme - ptr to the volume monitor entry (w/ name space table) for test.
  2590. UFileName - Unicode Filename for this file.
  2591. ParentFid - The parent file reference number for this file.
  2592. StreamLastMergeSeqNum - The Seq Num of last Usn Record merged into CO.
  2593. Return Value:
  2594. None.
  2595. --*/
  2596. {
  2597. #undef DEBSUB
  2598. #define DEBSUB "JrnlUpdateNst:"
  2599. ULONGLONG Qhv;
  2600. PQHASH_ENTRY NstEntry;
  2601. ULONG LastFetched, LastCleaned;
  2602. CalcHashFidAndName(UFileName, ParentFid, &Qhv);
  2603. NstEntry = QHashLookupLock(pVme->NameSpaceTable, &Qhv);
  2604. if (NstEntry != NULL) {
  2605. NstEntry->Flags = StreamSequenceNumber;
  2606. } else {
  2607. //
  2608. // Name not found. Create a new entry.
  2609. //
  2610. QHashInsertLock(pVme->NameSpaceTable, &Qhv, &Qhv, StreamSequenceNumber);
  2611. }
  2612. //
  2613. // Every so often sweep the Name Space Table and clean out stale entries.
  2614. // By doing this as part of the Journal monitor thread we can avoid
  2615. // using locks on the NameSpaceTable since this is the only thread that
  2616. // touches it.
  2617. //
  2618. if ((StreamSequenceNumber & 127) == 0) {
  2619. LastFetched = pVme->StreamSequenceNumberFetched;
  2620. LastCleaned = pVme->StreamSequenceNumberClean;
  2621. if ((LastFetched > LastCleaned) &&
  2622. ((LastFetched - LastCleaned) > 100)) {
  2623. //
  2624. // Sweep the table and purge any entries with a Stream Sequence
  2625. // Number less than LastFetched since that CO is no longer in the
  2626. // process queue.
  2627. //
  2628. QHashEnumerateTable(pVme->NameSpaceTable,
  2629. JrnlPurgeNstWorker,
  2630. &LastFetched);
  2631. pVme->StreamSequenceNumberClean = LastFetched;
  2632. }
  2633. }
  2634. }
  2635. VOID
  2636. JrnlFilterUpdate(
  2637. IN PREPLICA CurrentReplica,
  2638. IN PUSN_RECORD UsnRecord,
  2639. IN ULONG LocationCmd,
  2640. IN PFILTER_TABLE_ENTRY OldParentFilterEntry,
  2641. IN PFILTER_TABLE_ENTRY NewParentFilterEntry
  2642. )
  2643. /*++
  2644. Routine Description:
  2645. Process a directory operation. Generate the change order(s) and update the
  2646. Filter table. This may involve multiple operations over a subtree.
  2647. It assumes it is being called with a USN directory change record and
  2648. that references have been taken on OldParentFilterEntry and
  2649. NewParentFilterEntry.
  2650. Arguments:
  2651. CurrentReplica - ptr to the Replica struct containing the directory now.
  2652. UsnRecord - ptr to the UsnRecord.
  2653. LocationCmd - The change order location command. (MOVEIN, MOVEOUT, ...)
  2654. OldParentFilterEntry - The filter entry for the directory's previous parent.
  2655. NewParentFilterEntry - The filter entry for the directory's current parent.
  2656. Return Value:
  2657. None.
  2658. --*/
  2659. {
  2660. #undef DEBSUB
  2661. #define DEBSUB "JrnlFilterUpdate:"
  2662. PGENERIC_HASH_TABLE FilterTable = CurrentReplica->pVme->FilterTable;
  2663. PFILTER_TABLE_ENTRY FilterEntry;
  2664. ULONG GStatus, WStatus;
  2665. ULONG Flags;
  2666. PULONGLONG FileID;
  2667. PREPLICA OriginalReplica;
  2668. CHANGE_ORDER_PARAMETERS Cop;
  2669. //
  2670. // Determine the file location command to use in the change order.
  2671. // First get the old parent file ID incase this was a rename.
  2672. //
  2673. FileID = &UsnRecord->FileReferenceNumber;
  2674. //
  2675. // If there is no old parent filter entry (Create, Delete, MOVEIN or NO_CMD)
  2676. // then the original replica is NULL.
  2677. //
  2678. OriginalReplica = (OldParentFilterEntry == NULL) ?
  2679. NULL : OldParentFilterEntry->Replica;
  2680. //
  2681. // Look for an entry in the Filter Table for this DIR and create a new
  2682. // one if needed.
  2683. //
  2684. GStatus = GhtLookup(FilterTable, FileID, TRUE, &FilterEntry);
  2685. if (GStatus == GHT_STATUS_SUCCESS) {
  2686. //
  2687. // For a create the entry could already be in the table. This could
  2688. // happen when a Replica Load inserts the directory and then we see the
  2689. // Journal Entry for the create later. If only the Create bit is set
  2690. // in the reason mask there is nothing for us to do.
  2691. //
  2692. if (UsnRecord->Reason == (USN_REASON_FILE_CREATE | USN_REASON_CLOSE)) {
  2693. DPRINT(4,"++ USN_REASON_FILE_CREATE: for dir with entry in table. skipping\n");
  2694. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  2695. return;
  2696. }
  2697. } else {
  2698. //
  2699. // Create a filter entry for this directory if it's a create or movein.
  2700. // A MoveIn is the same as a create dir since we need to create a filter
  2701. // table entry and only a single dir is involved. It is possible that
  2702. // the update process has already found the dir and added the filter
  2703. // entry. If so we generate the change order anyway since there may
  2704. // be other reason flags to consider. There is no original replica
  2705. // for a create or a rename.
  2706. //
  2707. if (CO_NEW_FILE(LocationCmd)) {
  2708. //
  2709. // The following returns with a reference on FilterEntry.
  2710. //
  2711. WStatus = JrnlAddFilterEntryFromUsn(CurrentReplica,
  2712. UsnRecord,
  2713. &FilterEntry);
  2714. if (!WIN_SUCCESS(WStatus)) {
  2715. DPRINT(4, "++ JrnlAddFilterEntryFromUsn failed\n");
  2716. }
  2717. } else {
  2718. //
  2719. // Note: touching a dir that was previously EXCLUDED fails to add filter entry
  2720. //
  2721. DUMP_USN_RECORD2(3, UsnRecord, CurrentReplica->ReplicaNumber, LocationCmd);
  2722. DPRINT(1, "++ Warning: Dir not found in Filter Table and not a CO_NEW_FILE, skipping\n");
  2723. return;
  2724. }
  2725. }
  2726. //
  2727. // Process the directory through the volume filter and generate the
  2728. // appropriate change orders.
  2729. //
  2730. //
  2731. // Setup the change order parameters.
  2732. //
  2733. // Original and current/new Replica Sets
  2734. // new parent FID.
  2735. // Usn Record triggering change order creation. (i.e. the op on root of
  2736. // the subtree).
  2737. // The location change command.
  2738. // Original and current/new parent filter entries of root filter entry
  2739. //
  2740. Cop.OriginalReplica = OriginalReplica;
  2741. Cop.NewReplica = CurrentReplica;
  2742. Cop.NewParentFid = UsnRecord->ParentFileReferenceNumber;
  2743. Cop.UsnRecord = UsnRecord;
  2744. Cop.NewLocationCmd = LocationCmd;
  2745. Cop.OrigParentFilterEntry = OldParentFilterEntry;
  2746. Cop.NewParentFilterEntry = NewParentFilterEntry;
  2747. //
  2748. // Process the subtree starting at the root filter entry of change.
  2749. //
  2750. WStatus = JrnlProcessSubTree(FilterEntry, &Cop);
  2751. //
  2752. // Drop the ref on the filter entry if it wasn't deleted.
  2753. //
  2754. if ((FilterEntry != NULL) &&
  2755. !((LocationCmd == CO_LOCATION_DELETE) ||
  2756. (LocationCmd == CO_LOCATION_MOVEOUT))) {
  2757. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  2758. }
  2759. return;
  2760. }
  2761. ULONG
  2762. JrnlProcessSubTree(
  2763. IN PFILTER_TABLE_ENTRY RootFilterEntry,
  2764. IN PCHANGE_ORDER_PARAMETERS Cop
  2765. )
  2766. /*++
  2767. Routine Description:
  2768. This function is called to build a change order parameter block and
  2769. enumerate through a filter subtree. It acquires the necessary locks
  2770. for the duration of the operation.
  2771. Arguments:
  2772. RootFilterEntry - The root of the filter subtree being operated on.
  2773. NULL if it doesn't yet exist (e.g. MOVEIN or CREATE).
  2774. Cop - Struct with the change order param data to pass down the subtree.
  2775. Return Value:
  2776. win32 status
  2777. --*/
  2778. {
  2779. #undef DEBSUB
  2780. #define DEBSUB "JrnlProcessSubTree:"
  2781. ULONG WStatus;
  2782. PGENERIC_HASH_TABLE FilterTable;
  2783. PVOLUME_MONITOR_ENTRY pVme;
  2784. PREPLICA NewReplica = Cop->NewReplica;
  2785. ULONG NewLocationCmd = Cop->NewLocationCmd;
  2786. PREPLICA OriginalReplica = Cop->OriginalReplica;
  2787. if (NewLocationCmd == CO_LOCATION_MOVEOUT) {
  2788. pVme = OriginalReplica->pVme;
  2789. } else {
  2790. pVme = NewReplica->pVme;
  2791. }
  2792. FilterTable = pVme->FilterTable;
  2793. //
  2794. // Get the change order process list lock for the volume.
  2795. //
  2796. FrsRtlAcquireListLock(&pVme->ChangeOrderList);
  2797. //
  2798. // dispatch on new location command.
  2799. // Get locks and enumerate subtree top down or bottom up.
  2800. //
  2801. switch (NewLocationCmd) {
  2802. case CO_LOCATION_NO_CMD:
  2803. //
  2804. // Even though there is no location change. There could still be a
  2805. // dir related content change. So process like a create that the
  2806. // update process got to first.
  2807. //
  2808. case CO_LOCATION_CREATE:
  2809. case CO_LOCATION_MOVEIN:
  2810. case CO_LOCATION_MOVEIN2:
  2811. //
  2812. // Create a change order for it. Not really a subtree operation.
  2813. // A MoveIn is the same as a create dir since we need to create a filter
  2814. // table entry and only a single dir is involved. It is possible that
  2815. // the update process has already found the dir and added the filter
  2816. // entry. If so we generate the change order anyway since there may
  2817. // be other reason flags to consider. There is no original replica
  2818. // for a create or a MOVEIN. The caller sets original replica to
  2819. // new replica and has created the filter entry.
  2820. //
  2821. // Bump the ref count to keep the count in sync with the path through
  2822. // JrnlEnumerateFilterTreexx().
  2823. //
  2824. INCREMENT_FILTER_REF_COUNT(RootFilterEntry);
  2825. WStatus = JrnlProcessSubTreeEntry(FilterTable, RootFilterEntry, Cop);
  2826. DPRINT_WS(0, "++ Error - failed to add change order for dir create:", WStatus);
  2827. break;
  2828. case CO_LOCATION_DELETE:
  2829. case CO_LOCATION_MOVEDIR:
  2830. //
  2831. // Create change order for the directory delete and delete filter entry.
  2832. // Not really a subtree operation since the dir can have no children
  2833. // when it's deleted.
  2834. // If the operation is MOVEDIR then JrnlProcessSubTreeEntry() will
  2835. // change the parent dir in the filter entry and put it on the child
  2836. // list of the new parent.
  2837. //
  2838. // Bump the ref count to keep the count in sync with the path through
  2839. // JrnlEnumerateFilterTreexx().
  2840. //
  2841. INCREMENT_FILTER_REF_COUNT(RootFilterEntry);
  2842. JrnlAcquireChildLock(NewReplica);
  2843. WStatus = JrnlProcessSubTreeEntry(FilterTable, RootFilterEntry, Cop);
  2844. DPRINT_WS(0, "++ Error - failed to add change order for dir create:", WStatus);
  2845. JrnlReleaseChildLock(NewReplica);
  2846. break;
  2847. case CO_LOCATION_MOVEOUT:
  2848. //
  2849. // An entire subtree is renamed out of the replica tree.
  2850. //
  2851. // Get the lock on the filter entry child list for this replica.
  2852. // Walk the subtree bottom up, creating the change orders for the
  2853. // MOVEOUT and deleting the filter entries at the same time.
  2854. // Drop the child list lock.
  2855. //
  2856. JrnlAcquireChildLock(OriginalReplica);
  2857. WStatus = JrnlEnumerateFilterTreeBU(FilterTable,
  2858. RootFilterEntry,
  2859. JrnlProcessSubTreeEntry,
  2860. Cop);
  2861. JrnlReleaseChildLock(OriginalReplica);
  2862. DPRINT_WS(0, "++ Error - failed to add change order for dir MOVEOUT:", WStatus);
  2863. break;
  2864. case CO_LOCATION_MOVERS:
  2865. //
  2866. // Get the lock on the filter entry child list for both this replica
  2867. // and the new replica set.
  2868. // Walk the subtree Top-Down, creating the change orders for the MOVERS.
  2869. // Drop the child list locks.
  2870. //
  2871. JrnlAcquireChildLockPair(OriginalReplica, NewReplica);
  2872. WStatus = JrnlEnumerateFilterTreeTD(FilterTable,
  2873. RootFilterEntry,
  2874. JrnlProcessSubTreeEntry,
  2875. Cop);
  2876. JrnlReleaseChildLockPair(OriginalReplica, NewReplica);
  2877. DPRINT_WS(0, "++ Error - failed to add change order for dir MOVERS:", WStatus);
  2878. break;
  2879. default:
  2880. DPRINT(0, "++ ERROR - Invalid NewLocationCmd arg\n");
  2881. FRS_ASSERT(!"JrnlProcessSubTree: Invalid NewLocationCmd");
  2882. } // end switch
  2883. //
  2884. // Release the volume change order lock.
  2885. //
  2886. FrsRtlReleaseListLock(&pVme->ChangeOrderList);
  2887. return WStatus;
  2888. }
  2889. ULONG
  2890. JrnlProcessSubTreeEntry(
  2891. PGENERIC_HASH_TABLE FilterTable,
  2892. PVOID Buffer,
  2893. PVOID Context
  2894. )
  2895. /*++
  2896. Routine Description:
  2897. This function is called thru JrnlEnumerateFilterTreexx() to process a
  2898. Filter entry and submit a change order for same.
  2899. After the change order is generated the filter table entry is updated
  2900. as needed to reflect a new parent or a new replica set or a name change.
  2901. All required locks are acquired by the caller of the enumerate function.
  2902. This includes one or two filter entry child locks and the change order
  2903. list lock.
  2904. The caller has taken out a reference on the FilterEntry (Buffer). We
  2905. retire that reference here.
  2906. Arguments:
  2907. FilterTable - the hash table being enumerated (to lookup parent entry).
  2908. Buffer - a ptr to a FILTER_TABLE_ENTRY
  2909. Context - A pointer to the change order parameter struct.
  2910. Return Value:
  2911. ERROR_SUCCESS to keep the enumeration going.
  2912. Any other status stops the enumeration and returns this value to the
  2913. caller of the enumerate function.
  2914. --*/
  2915. {
  2916. #undef DEBSUB
  2917. #define DEBSUB "JrnlProcessSubTreeEntry:"
  2918. UNICODE_STRING UFileName;
  2919. ULONG WStatus, WStatus1;
  2920. ULONG GStatus;
  2921. BOOL Root;
  2922. PCHANGE_ORDER_ENTRY ChangeOrder;
  2923. PUSN_RECORD UsnRecord;
  2924. ULONG StreamSeqNum;
  2925. ULONG LocationCmd;
  2926. PVOLUME_MONITOR_ENTRY pVme;
  2927. PFILTER_TABLE_ENTRY OrigParentFilterEntry;
  2928. PFILTER_TABLE_ENTRY NewParentFilterEntry;
  2929. PFILTER_TABLE_ENTRY FE, FEList[8];
  2930. ULONG FEx;
  2931. PWCHAR FileName;
  2932. PFILTER_TABLE_ENTRY FilterEntry = (PFILTER_TABLE_ENTRY) Buffer;
  2933. PCHANGE_ORDER_PARAMETERS Cop = (PCHANGE_ORDER_PARAMETERS) Context;
  2934. USHORT Length;
  2935. //
  2936. // The USN record that triggered the SubTree operation
  2937. //
  2938. UsnRecord = Cop->UsnRecord;
  2939. LocationCmd = Cop->NewLocationCmd;
  2940. OrigParentFilterEntry = Cop->OrigParentFilterEntry;
  2941. NewParentFilterEntry = Cop->NewParentFilterEntry;
  2942. pVme = FilterEntry->Replica->pVme;
  2943. //
  2944. // If the FID in the UsnRecord matches the FID in the Filter Entry then
  2945. // this operation is on the root of the subtree and is different than if
  2946. // it was on a child.
  2947. //
  2948. Root = (UsnRecord->FileReferenceNumber == FilterEntry->DFileID);
  2949. #if 0
  2950. // For now no merging of the DIR change orders. If this proves to be a perf
  2951. // problem then need to add the code check for name conflicts.
  2952. //
  2953. // Check for a pending change order for this Dir entry. If the lookup
  2954. // succeeds the ref count is decremented by JrnlUpdateChangeOrder because
  2955. // it may end up evaporating the change order.
  2956. //
  2957. GStatus = GhtLookup(pVme->ChangeOrderTable,
  2958. &FilterEntry->DFileID,
  2959. TRUE,
  2960. &ChangeOrder);
  2961. if (GStatus == GHT_STATUS_SUCCESS) {
  2962. //
  2963. // A pending change order exists, Update it.
  2964. //
  2965. CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Update", UsnRecord->Reason);
  2966. } else {
  2967. #endif
  2968. //
  2969. // No pending change order exists for this Dir. Create one.
  2970. //
  2971. // Since multiple change orders are derived from a single Journal Usn
  2972. // how do we decide to update our stable copy of the Journal USN?
  2973. // The stable copy means the current one we are working on and may not
  2974. // have finished.
  2975. if (Root) {
  2976. //
  2977. // If the root of the sub-tree then name comes from USN Record.
  2978. //
  2979. FileName = UsnRecord->FileName;
  2980. Length = UsnRecord->FileNameLength;
  2981. } else {
  2982. //
  2983. // If not root of sub-tree then name comes from filter entry and
  2984. // JrnlFirstUsn is set to zero.
  2985. //
  2986. FileName = FilterEntry->DFileName;
  2987. Length = (USHORT)(2*wcslen(FilterEntry->DFileName));
  2988. }
  2989. //
  2990. // Create the change order.
  2991. //
  2992. ChangeOrder = JrnlCreateCo(FilterEntry->Replica,
  2993. &FilterEntry->DFileID,
  2994. &FilterEntry->DParentFileID,
  2995. UsnRecord,
  2996. TRUE, // DIR CO
  2997. FileName,
  2998. Length);
  2999. //
  3000. // Make a new stream sequence number and save it in the CO.
  3001. // Stick it in the CO Guid so it appears in the log file.
  3002. // It gets overwritten later with real CO Guid when the CO issues.
  3003. //
  3004. StreamSeqNum = ++pVme->StreamSequenceNumber;
  3005. ChangeOrder->StreamLastMergeSeqNum = StreamSeqNum;
  3006. ChangeOrder->Cmd.ChangeOrderGuid.Data1 = StreamSeqNum;
  3007. ChangeOrder->OriginalParentFid = FilterEntry->DParentFileID;
  3008. if (Root) {
  3009. CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Root Create",
  3010. UsnRecord->Reason);
  3011. } else {
  3012. ChangeOrder->Cmd.JrnlFirstUsn = (USN) 0;
  3013. CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Subdir Create",
  3014. UsnRecord->Reason);
  3015. }
  3016. #if 0
  3017. }
  3018. #endif
  3019. //
  3020. // Update the Name Space Table with the current stream sequence number.
  3021. // Since this is a dir subtree entries are made for all parents implicitly
  3022. // until we get to the root. The root needs to have its parent dir added
  3023. // to the name space table. The table below shows what entries are made
  3024. // depending on the file operation and whether or not this call is for
  3025. // the root entry of the subtree operation.
  3026. //
  3027. // Opn Make Entry using Make Entry using
  3028. // orig name/parent Current name/parent
  3029. // info info (1)
  3030. //
  3031. // Movein No Yes
  3032. // Moveout Yes No
  3033. // Movedir Yes Yes
  3034. // Movers Yes Yes
  3035. //
  3036. // SimpleRen Yes Yes
  3037. // Create No Yes
  3038. // Delete No Yes
  3039. // Update No Yes
  3040. //
  3041. // The last four entries affect single dirs only while the first four
  3042. // can apply to subtrees.
  3043. // (1) If working in a single dir or the root of a sub-tree the current
  3044. // name/parent info comes from the USN record.
  3045. //
  3046. FEx = 0;
  3047. if (Root) {
  3048. if (LocationCmd != CO_LOCATION_MOVEOUT) {
  3049. //
  3050. // Update Curr File (Where the USN record says file went)
  3051. // Update New parent (the parent dir where file went)
  3052. //
  3053. FrsSetUnicodeStringFromRawString(&UFileName,
  3054. UsnRecord->FileNameLength,
  3055. UsnRecord->FileName,
  3056. UsnRecord->FileNameLength);
  3057. JrnlUpdateNst(pVme,
  3058. &UFileName,
  3059. &UsnRecord->ParentFileReferenceNumber,
  3060. StreamSeqNum);
  3061. FRS_ASSERT(NewParentFilterEntry != NULL);
  3062. FEList[FEx++] = NewParentFilterEntry;
  3063. }
  3064. if (CO_MOVE_OUT_RS_OR_DIR(LocationCmd)) {
  3065. //
  3066. // Update with old name/parent of root dir.
  3067. // (Where the Original parent Filter entry says it was.)
  3068. // Update orig parent of root dir (the parent dir where file came from)
  3069. //
  3070. FEList[FEx++] = FilterEntry;
  3071. FRS_ASSERT(OrigParentFilterEntry != NULL);
  3072. FEList[FEx++] = OrigParentFilterEntry;
  3073. }
  3074. } else {
  3075. //
  3076. // Not the root so update using current name/parent of FilterEntry.
  3077. //
  3078. FEList[FEx++] = FilterEntry;
  3079. }
  3080. //
  3081. // Apply the name space table updates.
  3082. //
  3083. while (FEx != 0) {
  3084. FE = FEList[--FEx];
  3085. JrnlUpdateNst(pVme, &FE->UFileName, &FE->DParentFileID, StreamSeqNum);
  3086. }
  3087. //
  3088. // Update or install the change order.
  3089. //
  3090. WStatus = JrnlUpdateChangeOrder(ChangeOrder,
  3091. Cop->NewReplica,
  3092. Cop->NewParentFid,
  3093. Cop->NewLocationCmd,
  3094. (Root ? UsnRecord : NULL));
  3095. //
  3096. // Update the filter entry if necessary.
  3097. //
  3098. //
  3099. // See if the filename part is different and, if so, copy it.
  3100. // Only applies to the Root entry of the subtree.
  3101. // Limit it to MAX_PATH characters.
  3102. //
  3103. if (Root) {
  3104. if (UsnRecord->FileNameLength > 2*MAX_PATH) {
  3105. UsnRecord->FileNameLength = 2*MAX_PATH;
  3106. }
  3107. FrsAllocUnicodeString(&FilterEntry->UFileName,
  3108. FilterEntry->DFileName,
  3109. UsnRecord->FileName,
  3110. UsnRecord->FileNameLength);
  3111. }
  3112. switch (Cop->NewLocationCmd) {
  3113. case CO_LOCATION_CREATE:
  3114. case CO_LOCATION_MOVEIN:
  3115. case CO_LOCATION_MOVEIN2:
  3116. case CO_LOCATION_NO_CMD:
  3117. //
  3118. // On creates and movein the caller has created the filter table
  3119. // entry already (to pass it to this fcn).
  3120. //
  3121. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  3122. break;
  3123. case CO_LOCATION_DELETE:
  3124. case CO_LOCATION_MOVEOUT:
  3125. //
  3126. // Now delete the entry from the Filter Table. If this is the root
  3127. // then first drop the ref count by one to compensate for the first
  3128. // lookup in JrnlFilterUpdate() where all this started.
  3129. // The second ref was taken through the Enumerate list function.
  3130. //
  3131. if (Root) {
  3132. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  3133. }
  3134. WStatus = JrnlDeleteDirFilterEntry(FilterTable, NULL, FilterEntry);
  3135. if (!WIN_SUCCESS(WStatus)) {
  3136. DPRINT(0, "++ ERROR - Dir entry delete failed.\n");
  3137. }
  3138. break;
  3139. case CO_LOCATION_MOVERS:
  3140. //
  3141. // Replica set changed. Update the filter entry.
  3142. //
  3143. FilterEntry->Replica = Cop->NewReplica;
  3144. FilterEntry->DReplicaNumber = Cop->NewReplica->ReplicaNumber;
  3145. /* FALL THRU INTENDED */
  3146. case CO_LOCATION_MOVEDIR:
  3147. //
  3148. // Directory changed. Applies to root on both MOVEDIR and MOVERS.
  3149. // Update the parent file ID in the filter entry and
  3150. // Put the filter entry on the childlist of the new parent.
  3151. //
  3152. if (Root) {
  3153. FilterEntry->DParentFileID = UsnRecord->ParentFileReferenceNumber;
  3154. if (FilterEntry->ChildEntry.Flink == NULL) {
  3155. DPRINT(0, "++ ERROR - Dir entry not on child list\n");
  3156. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
  3157. FRS_ASSERT(!"Dir entry not on child list");
  3158. }
  3159. FrsRemoveEntryList(&FilterEntry->ChildEntry);
  3160. FilterEntry->ChildEntry.Flink = NULL;
  3161. WStatus1 = (ULONG)JrnlFilterLinkChild(FilterTable,
  3162. FilterEntry,
  3163. FilterEntry->Replica);
  3164. if (!WIN_SUCCESS(WStatus1)) {
  3165. DPRINT(0, "++ ERROR - JrnlFilterLinkChild Failed\n");
  3166. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
  3167. FRS_ASSERT(!"JrnlFilterLinkChild Failed");
  3168. }
  3169. }
  3170. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  3171. break;
  3172. default:
  3173. DPRINT1(0, "++ Error - switch arg out of range: %d\n", Cop->NewLocationCmd);
  3174. FRS_ASSERT(!"NewLocationCmd invalid");
  3175. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  3176. }
  3177. //
  3178. // Return the change order status.
  3179. //
  3180. return WStatus;
  3181. }
  3182. ULONG
  3183. JrnlUpdateChangeOrder(
  3184. IN PCHANGE_ORDER_ENTRY ChangeOrder,
  3185. IN PREPLICA NewReplica,
  3186. IN ULONGLONG NewParentFid,
  3187. IN ULONG NewLocationCmd,
  3188. IN PUSN_RECORD UsnRecord
  3189. )
  3190. /*++
  3191. Routine Description:
  3192. This function updates an existing directory change order that is still
  3193. pending in the Replica's change order process list or inserts a new change
  3194. order that has been prepared as described below.
  3195. There are two components to a change order, content and file location.
  3196. A given USN record could have changes to both parts.
  3197. The content component is updated by merging the reason flags from the
  3198. UsnRecord and capturing relevant parameters such as the attributes and
  3199. FileName.
  3200. The location update component is more complicated and uses a state table,
  3201. ChangeOrderLocationStateTable[], to manage the update. The state table
  3202. determines when we update the parent directory or the replica set in the
  3203. change order. This occurs when a directory is renamed. The states in
  3204. the table also correspond to the change order location command to be used.
  3205. The change order may move from one replica set to another. This routine
  3206. assumes that the caller has acquired the change order process list locks
  3207. for both the source and dest replicas. This is the only case where we can
  3208. pull it off the list because there could be a dependent entry that follows
  3209. it in the change order list and an error could result if the update
  3210. process saw the dependent entry first. (Probably only an issue for
  3211. directory creates).
  3212. The Source Change order process list lock is needed for all Location Commands.
  3213. The Destination Change order process list lock is needed for:
  3214. CO_LOCATION_MOVEIN, CO_LOCATION_MOVERS commands.
  3215. The change order may be evaporated in certain cases. If not this routine
  3216. decrements the reference count on the change order before it returns.
  3217. This routine can be called with a new change order but the caller must
  3218. pre-init the change order correctly:
  3219. 1. Bump the initial ref count by 1 (since that is what lookup does).
  3220. 2. The command flag CO_FLAG_ONLIST should be clear so we don't try
  3221. to pull it off a list.
  3222. 3. The length field in the unicode string UFileName must be 0 to
  3223. capture the file name.
  3224. 4. Set New and orig Replica fields to the original replica.
  3225. 5. Set New and orig parent FID fields to the original parent FID.
  3226. 6. The command flag CO_FLAG_LOCATION_CMD should be clear.
  3227. 7. The FileReferenceNumber must be set to the file ID of the file/dir.
  3228. The File Id is the index into the change order table.
  3229. This routine also updates the parent file ID table so the parent File ID
  3230. tracks on renames and the entry is deleted if the change order is
  3231. evaporated or the new location command specifies delete.
  3232. Arguments:
  3233. ChangeOrder - The existing change order to be updated.
  3234. NewReplica - The destination replica the directory is renamed into.
  3235. NewparentFid - The destination parent the directory is renamed into.
  3236. NewLocationCmd - The new location command applied to the directory.
  3237. UsnRecord - The NTFS USN record describing the change. When walking a
  3238. through a sub-tree this will be NULL for all directories
  3239. except for the root.
  3240. Return Value:
  3241. Win32 status.
  3242. --*/
  3243. {
  3244. #undef DEBSUB
  3245. #define DEBSUB "JrnlUpdateChangeOrder:"
  3246. PREPLICA Replica;
  3247. ULONG Control;
  3248. ULONG Op;
  3249. ULONG PreviousState;
  3250. ULONG Reason = 0;
  3251. BOOL EvapFlag = FALSE;
  3252. ULONG GStatus;
  3253. ULONG NewState;
  3254. PVOLUME_MONITOR_ENTRY pVme;
  3255. BOOL SubTreeRoot;
  3256. ULONG WStatus;
  3257. BOOL CoUpdate;
  3258. PCHANGE_ORDER_ENTRY NewParentCo;
  3259. ULONG LocationCmd;
  3260. //
  3261. // Only update parent file IDs on the sub tree root. This is the dir
  3262. // that the USN Record was generated for in the dir rename.
  3263. // For any subordinate dirs the caller must supply NULL.
  3264. // If a changeorder comes in already on the process list then it must
  3265. // be an update.
  3266. //
  3267. SubTreeRoot = (UsnRecord != NULL);
  3268. CoUpdate = CO_FLAG_ON(ChangeOrder, CO_FLAG_ONLIST);
  3269. //
  3270. // If a USN record is supplied then check for any content flags set in the
  3271. // USN reason mask. If so then set the content flag in the change order.
  3272. // When walking a subtree the USN Record is non-null only for the root since
  3273. // the content changes don't apply to the children.
  3274. //
  3275. if (SubTreeRoot) {
  3276. Reason = UsnRecord->Reason;
  3277. if (Reason & CO_CONTENT_MASK) {
  3278. SET_CO_FLAG(ChangeOrder, CO_FLAG_CONTENT_CMD);
  3279. //
  3280. // Update the content portion of the change order. Merge in the
  3281. // reason mask from the Usn Record.
  3282. //
  3283. ChangeOrder->Cmd.ContentCmd |= Reason;
  3284. }
  3285. //
  3286. // Capture the name in the case of rename, create and delete.
  3287. // Limit it to MAX_PATH characters.
  3288. //
  3289. // if ((Reason & CO_LOCATION_MASK) || (ChangeOrder->UFileName.Length == 0)) {
  3290. if ((Reason & USN_REASON_RENAME_NEW_NAME) ||
  3291. (ChangeOrder->UFileName.Length == 0)) {
  3292. if (UsnRecord->FileNameLength > 2*MAX_PATH) {
  3293. UsnRecord->FileNameLength = 2*MAX_PATH;
  3294. }
  3295. FrsAllocUnicodeString(&ChangeOrder->UFileName,
  3296. ChangeOrder->Cmd.FileName,
  3297. UsnRecord->FileName,
  3298. UsnRecord->FileNameLength);
  3299. ChangeOrder->Cmd.FileNameLength = UsnRecord->FileNameLength;
  3300. }
  3301. //
  3302. // Capture most recent file attributes.
  3303. // In the case where we are updating a pending CO,
  3304. // we would miss a series of ops on the same file such as
  3305. // set the hidden bit, close, delete the system bit, close, ...
  3306. //
  3307. ChangeOrder->Cmd.FileAttributes = UsnRecord->FileAttributes;
  3308. //
  3309. // Update to the latest USN contributing to this change order.
  3310. //
  3311. ChangeOrder->Cmd.JrnlUsn = UsnRecord->Usn;
  3312. }
  3313. //
  3314. // Check if there is a new location command. If not go insert the change order.
  3315. //
  3316. if (NewLocationCmd == CO_LOCATION_NO_CMD) {
  3317. goto INSERT_CHANGE_ORDER;
  3318. }
  3319. //
  3320. // Update the parent file ID table based on the new location command.
  3321. //
  3322. if (CO_NEW_FILE(NewLocationCmd)) {
  3323. //
  3324. // Add a new entry for the new file in the R.S.
  3325. //
  3326. ChangeOrder->ParentFileReferenceNumber = NewParentFid;
  3327. GStatus = QHashInsert(NewReplica->pVme->ParentFidTable,
  3328. &ChangeOrder->FileReferenceNumber,
  3329. &NewParentFid,
  3330. NewReplica->ReplicaNumber,
  3331. FALSE);
  3332. if (GStatus != GHT_STATUS_SUCCESS ) {
  3333. DPRINT1(0, "++ QHashInsert error: %d\n", GStatus);
  3334. }
  3335. } else
  3336. if ((NewLocationCmd == CO_LOCATION_DELETE) ||
  3337. (NewLocationCmd == CO_LOCATION_MOVEOUT)) {
  3338. //
  3339. // File is gone. Remove the entry.
  3340. //
  3341. GStatus = QHashDelete(NewReplica->pVme->ParentFidTable,
  3342. &ChangeOrder->FileReferenceNumber);
  3343. if (GStatus != GHT_STATUS_SUCCESS ) {
  3344. DPRINT1(0, "++ QHashDelete error: %d\n", GStatus);
  3345. }
  3346. } else
  3347. if (CO_MOVE_RS_OR_DIR(NewLocationCmd)) {
  3348. //
  3349. // File changed parents. Update the entry for subtree root only.
  3350. //
  3351. if (SubTreeRoot) {
  3352. ChangeOrder->ParentFileReferenceNumber = NewParentFid;
  3353. GStatus = QHashUpdate(NewReplica->pVme->ParentFidTable,
  3354. &ChangeOrder->FileReferenceNumber,
  3355. &NewParentFid,
  3356. 0);
  3357. if (GStatus != GHT_STATUS_SUCCESS ) {
  3358. DPRINT1(0, "++ QHashUpdate error: %d\n", GStatus);
  3359. }
  3360. }
  3361. } else {
  3362. DPRINT1(0, "++ ERROR - Invalid new location command: %d\n", NewLocationCmd);
  3363. }
  3364. //
  3365. // Update the location component of the change order. Fetch the Control
  3366. // DWORD from the table based on the pending command and the new command
  3367. // then perform the specified operation sequence. If the pending change
  3368. // order was for a content change then there is no prior location command.
  3369. // Check for this.
  3370. //
  3371. // Caller has acquired change order process lock for both current and
  3372. // new Replica Sets as appropriate.
  3373. //
  3374. if (CO_FLAG_ON(ChangeOrder, CO_FLAG_LOCATION_CMD)) {
  3375. PreviousState = GET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command);
  3376. } else {
  3377. PreviousState = NSNoLocationCmd;
  3378. SET_CO_FLAG(ChangeOrder, CO_FLAG_LOCATION_CMD);
  3379. }
  3380. Control = ChangeOrderLocationStateTable[PreviousState][NewLocationCmd].u1.UlongOpFields;
  3381. DPRINT5(5,"++ Old state: %s (%d), Input cmd: %s (%d), Ctl Wd: %08x\n",
  3382. CoLocationNames[PreviousState], PreviousState,
  3383. CoLocationNames[NewLocationCmd], NewLocationCmd,
  3384. Control);
  3385. if (Control == 0) {
  3386. DPRINT2(0, "++ ERROR - Invalid transition. Pending: %d New: %d\n",
  3387. PreviousState, NewLocationCmd);
  3388. FRS_ASSERT(!"Invalid CO Location cmd transition-1");
  3389. goto ERROR_RETURN;
  3390. }
  3391. while (Control != 0) {
  3392. Op = Control & 0x0000000F;
  3393. Control = Control >> 4;
  3394. switch (Op) {
  3395. //
  3396. // Done.
  3397. //
  3398. case OpInval:
  3399. DPRINT5(0,"++ Error - Invalid state transition - Old state: %s (%d), Input cmd: %s (%d), Ctl Wd: %08x\n",
  3400. CoLocationNames[PreviousState], PreviousState,
  3401. CoLocationNames[NewLocationCmd], NewLocationCmd,
  3402. Control);
  3403. FRS_ASSERT(!"Invalid CO Location cmd transition-2");
  3404. Control = 0;
  3405. break;
  3406. //
  3407. // Evaporate the pending change order. It should be on the process
  3408. // list associated with the NewReplica. THis should never happen
  3409. // if the previous state is NSNoLocationCmd.
  3410. //
  3411. case OpEvap:
  3412. //
  3413. // Increment the CO Evaporated Counter
  3414. //
  3415. PM_INC_CTR_REPSET(NewReplica, COEvaporated, 1);
  3416. DPRINT(5, "++ OpEvap\n");
  3417. pVme = ChangeOrder->NewReplica->pVme;
  3418. FRS_ASSERT(PreviousState != NSNoLocationCmd);
  3419. FRS_ASSERT(!IsListEmpty(&ChangeOrder->ProcessList));
  3420. FrsRtlRemoveEntryQueueLock(&pVme->ChangeOrderList,
  3421. &ChangeOrder->ProcessList);
  3422. DECREMENT_CHANGE_ORDER_REF_COUNT(ChangeOrder);
  3423. DROP_CO_CXTION_COUNT(ChangeOrder->NewReplica, ChangeOrder, ERROR_SUCCESS);
  3424. CHANGE_ORDER_TRACE(3, ChangeOrder, "Local Co OpEvap");
  3425. //
  3426. // Delete the entry from the Change Order Table. It should be in
  3427. // the Change order table assoicated with NewReplica. The ref
  3428. // count should be 2 since the caller did a lookup.
  3429. //
  3430. FRS_ASSERT(ChangeOrder->HashEntryHeader.ReferenceCount == 2);
  3431. GStatus = GhtDeleteEntryByAddress(pVme->ChangeOrderTable,
  3432. ChangeOrder,
  3433. TRUE);
  3434. if (GStatus != GHT_STATUS_SUCCESS) {
  3435. DPRINT(0, "++ ERROR - GhtDeleteEntryByAddress failed.\n");
  3436. FRS_PRINT_TYPE(0, ChangeOrder);
  3437. FRS_ASSERT(!"JrnlUpdateCO: CO Table GhtDeleteEntryByAddress failed");
  3438. goto ERROR_RETURN;
  3439. }
  3440. EvapFlag = TRUE;
  3441. break;
  3442. //
  3443. // Update the New Replica Set
  3444. //
  3445. case OpNRs:
  3446. DPRINT(5, "++ OpNRs\n");
  3447. //
  3448. // Update the parent dir on the subtree root and the replica ID
  3449. // on all change orders.
  3450. //
  3451. ChangeOrder->NewReplica = NewReplica;
  3452. /* FALL THRU INTENDED */
  3453. //
  3454. // Update the New Parent Directory on the subtree root only.
  3455. //
  3456. case OpNDir:
  3457. if (Op == OpNDir) {DPRINT(5, "++ OpNDir\n");}
  3458. if (SubTreeRoot) {
  3459. ChangeOrder->NewParentFid = NewParentFid;
  3460. if (CoUpdate) {
  3461. //
  3462. // See if there is a pending change order on the new parent.
  3463. // If there is and it is a create that happens after this
  3464. // change order then move this updated CO to the end of the
  3465. // list so the Parent Create is done first. We do this by
  3466. // removing it from the list and letting the insert code put
  3467. // it back on at the end with a new VSN.
  3468. //
  3469. pVme = ChangeOrder->NewReplica->pVme;
  3470. GStatus = GhtLookup(pVme->ChangeOrderTable,
  3471. &NewParentFid,
  3472. TRUE,
  3473. &NewParentCo);
  3474. if ((GStatus == GHT_STATUS_SUCCESS) &&
  3475. (NewParentCo->Cmd.FrsVsn > ChangeOrder->Cmd.FrsVsn)){
  3476. FRS_ASSERT(!IsListEmpty(&ChangeOrder->ProcessList));
  3477. FrsRtlRemoveEntryQueueLock(&pVme->ChangeOrderList,
  3478. &ChangeOrder->ProcessList);
  3479. DECREMENT_CHANGE_ORDER_REF_COUNT(ChangeOrder);
  3480. DROP_CO_CXTION_COUNT(ChangeOrder->NewReplica,
  3481. ChangeOrder,
  3482. ERROR_SUCCESS);
  3483. CLEAR_CO_FLAG(ChangeOrder, CO_FLAG_ONLIST);
  3484. CHANGE_ORDER_TRACE(3, ChangeOrder, "Local Co OpNDir");
  3485. DEC_LOCAL_CO_QUEUE_COUNT(ChangeOrder->NewReplica);
  3486. GhtDereferenceEntryByAddress(pVme->ChangeOrderTable,
  3487. NewParentCo,
  3488. TRUE);
  3489. }
  3490. }
  3491. }
  3492. break;
  3493. //
  3494. // Update the State / Command.
  3495. //
  3496. case OpNSt:
  3497. NewState = Control & 0x0000000F;
  3498. DPRINT2(5, "++ OpNst: %s (%d)\n", CoLocationNames[NewState], NewState);
  3499. SET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command, NewState);
  3500. Control = Control >> 4;
  3501. break;
  3502. //
  3503. // The table is messed up.
  3504. //
  3505. default:
  3506. DPRINT1(0, "++ Error - Invalid dispatch operation: %d\n", Op);
  3507. FRS_ASSERT(!"Invalid CO dispatch operation");
  3508. goto ERROR_RETURN;
  3509. }
  3510. }
  3511. INSERT_CHANGE_ORDER:
  3512. //
  3513. // If the change order hasn't been deleted then decrement the ref count
  3514. // to balance the Caller's lookup. If the change order is not on a process
  3515. // list because it is new or it switched replica sets then put it on the
  3516. // target list.
  3517. //
  3518. WStatus = ERROR_SUCCESS;
  3519. if (!EvapFlag) {
  3520. Replica = ChangeOrder->NewReplica;
  3521. pVme = Replica->pVme;
  3522. if (!CO_FLAG_ON(ChangeOrder, CO_FLAG_ONLIST)) {
  3523. //
  3524. // No reason to age deletes
  3525. //
  3526. if (CO_FLAG_ON(ChangeOrder, CO_FLAG_LOCATION_CMD) &&
  3527. (GET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command) == CO_LOCATION_DELETE)) {
  3528. ChangeOrder->TimeToRun = CO_TIME_NOW(pVme);
  3529. } else {
  3530. ChangeOrder->TimeToRun = CO_TIME_TO_RUN(pVme);
  3531. }
  3532. //
  3533. // Generate a new Volume Sequnce Number for the change order since
  3534. // it gets sent to the end of the new R.S. process list.
  3535. // The change order VSNs must be kept monotonically increasing
  3536. // within a replica set for change order dampening to work.
  3537. //
  3538. NEW_VSN(pVme, &ChangeOrder->Cmd.FrsVsn);
  3539. SET_CO_FLAG(ChangeOrder, CO_FLAG_LOCALCO);
  3540. //
  3541. // Entry already in Aging table if its a CO update. If this is a
  3542. // duplicate entry for the same FID (because the merge was
  3543. // disallowed then put this entry at the end of the duplicate list.
  3544. //
  3545. if (!CoUpdate) {
  3546. CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Q Insert",
  3547. ChangeOrder->Cmd.ContentCmd);
  3548. GStatus = GhtInsert(pVme->ChangeOrderTable, ChangeOrder, TRUE, TRUE);
  3549. if (GStatus != GHT_STATUS_SUCCESS) {
  3550. DPRINT1(0, "++ ERROR - GhtInsert Failed: %d\n", GStatus);
  3551. FRS_ASSERT(!"Local Co Q Insert Failed");
  3552. goto ERROR_RETURN;
  3553. }
  3554. SET_COE_FLAG(ChangeOrder, COE_FLAG_IN_AGING_CACHE);
  3555. } else {
  3556. CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Aging Update",
  3557. ChangeOrder->Cmd.ContentCmd);
  3558. }
  3559. INCREMENT_CHANGE_ORDER_REF_COUNT(ChangeOrder);
  3560. //
  3561. // For remote COs the cxtion count is incremented when the remote CO
  3562. // goes onto the CO process queue. We don't do this for local COs
  3563. // because the code to shutdown the Jrnl Cxtion may never see the
  3564. // CO count go to zero if we did this. We just set the CO
  3565. // CxtionGuid and the CO JoinGuid here so unjoin / rejoins can be
  3566. // detected.
  3567. //
  3568. INIT_LOCALCO_CXTION_GUID(Replica, ChangeOrder);
  3569. WStatus = FrsRtlInsertTailQueueLock(&pVme->ChangeOrderList,
  3570. &ChangeOrder->ProcessList);
  3571. if (WIN_SUCCESS(WStatus)) {
  3572. SET_CO_FLAG(ChangeOrder, CO_FLAG_ONLIST);
  3573. INC_LOCAL_CO_QUEUE_COUNT(Replica);
  3574. } else {
  3575. DPRINT_WS(0, "++ ERROR - ChangeOrder insert failed:", WStatus);
  3576. }
  3577. }
  3578. GStatus = GhtDereferenceEntryByAddress(pVme->ChangeOrderTable,
  3579. ChangeOrder,
  3580. TRUE);
  3581. if (GStatus != GHT_STATUS_SUCCESS) {
  3582. DPRINT(0, "++ ERROR: GhtDereferenceEntryByAddress ref count non positive.\n");
  3583. FRS_PRINT_TYPE(0, ChangeOrder);
  3584. FRS_ASSERT(!"CO ref count non positive");
  3585. goto ERROR_RETURN;
  3586. }
  3587. }
  3588. return WStatus;
  3589. ERROR_RETURN:
  3590. return ERROR_GEN_FAILURE;
  3591. }
  3592. ULONG
  3593. JrnlDoesChangeOrderHaveChildrenWorker(
  3594. IN PQHASH_TABLE ParentFidTable,
  3595. IN PQHASH_ENTRY BeforeNode,
  3596. IN PQHASH_ENTRY TargetNode,
  3597. IN PVALID_CHILD_CHECK_DATA pValidChildCheckData
  3598. )
  3599. /*++
  3600. Routine Description:
  3601. This function is called thru QHashEnumerateTable().
  3602. Search for a match between the ParentFid and the entry's
  3603. ParentFid (QHASH_ENTRY.QData).
  3604. Arguments:
  3605. Table -- the hash table being enumerated
  3606. BeforeNode -- ptr to the QhashEntry before the node of interest.
  3607. TargetNode -- ptr to the QhashEntry of interest.
  3608. pValidChildCheckData -- ptr to the parent fid
  3609. Return Value:
  3610. FrsErrorResourceInUse - Child of ParentFid was found
  3611. FrsErrorSuccess - No children were found for ParentFid
  3612. --*/
  3613. {
  3614. #undef DEBSUB
  3615. #define DEBSUB "JrnlDoesChangeOrderHaveChildrenWorker:"
  3616. JET_ERR jerr;
  3617. PTHREAD_CTX ThreadCtx = pValidChildCheckData->ThreadCtx;
  3618. PTABLE_CTX TmpIDTableCtx = pValidChildCheckData->TmpIDTableCtx;
  3619. PIDTABLE_RECORD IDTableRec;
  3620. if ((TargetNode->QData == pValidChildCheckData->FileReferenceNumber)){
  3621. if (ThreadCtx == NULL || TmpIDTableCtx == NULL) {
  3622. return FrsErrorResourceInUse;
  3623. }
  3624. jerr = DbsReadRecord(ThreadCtx, &TargetNode->QKey, FileIDIndexx, TmpIDTableCtx);
  3625. //
  3626. // No IDTable entry. OK to delete the child.
  3627. //
  3628. if (jerr == JET_errRecordNotFound) {
  3629. return FrsErrorSuccess;
  3630. }
  3631. if (!JET_SUCCESS(jerr)) {
  3632. DPRINT_JS(0,"++ ERROR - DbsReadRecord failed;", jerr);
  3633. return FrsErrorResourceInUse;
  3634. }
  3635. IDTableRec = (PIDTABLE_RECORD) (TmpIDTableCtx->pDataRecord);
  3636. //
  3637. // This child of the parent is not marked to be deleted which means it is
  3638. // not going away. Hence return that this parent has children. The parent
  3639. // delete will be aborted.
  3640. //
  3641. if (!IsIdRecFlagSet(IDTableRec, IDREC_FLAGS_DELETE_DEFERRED)) {
  3642. return FrsErrorResourceInUse;
  3643. }
  3644. }
  3645. return FrsErrorSuccess;
  3646. }
  3647. BOOL
  3648. JrnlDoesChangeOrderHaveChildren(
  3649. IN PTHREAD_CTX ThreadCtx,
  3650. IN PTABLE_CTX TmpIDTableCtx,
  3651. IN PCHANGE_ORDER_ENTRY ChangeOrder
  3652. )
  3653. /*++
  3654. Routine Description:
  3655. The ChangeOrderAccept thread is issueing a retry of a directory
  3656. delete. The question is, "Does this directory have replicating
  3657. children?" If so, the change order should be retried at a later
  3658. time.
  3659. If not, the change order is sent on to an install thread that
  3660. will empty the directory of any files or subdirectories and
  3661. then delete the directory. The files and subdirectories are
  3662. assumed to have been filtered and are non-replicating. You can
  3663. see why we want to insure there are no replicating files or
  3664. subdirectories in this directory prior to emptying the directory.
  3665. The journal's directory filter table and the journal's parent fid
  3666. table are searched for children of the directory specified by
  3667. ChangeOrder.
  3668. Arguments:
  3669. ChangeOrder - For a retry of a directory delete
  3670. Return Value:
  3671. TRUE - Directory has replicating children in the journal tables
  3672. FALSE - Directory does not have replicating children in the journal tables
  3673. --*/
  3674. {
  3675. #undef DEBSUB
  3676. #define DEBSUB "JrnlDoesChangeOrderHaveChildren:"
  3677. DWORD FStatus;
  3678. PREPLICA Replica;
  3679. PVOLUME_MONITOR_ENTRY pVme;
  3680. PQHASH_TABLE ParentFidTable;
  3681. VALID_CHILD_CHECK_DATA ValidChildCheckData;
  3682. Replica = ChangeOrder->NewReplica;
  3683. //
  3684. // Retry the change order if information about its children is lacking.
  3685. //
  3686. if (!Replica) {
  3687. DPRINT(4, "++ WARN: No Replica in ChangeOrder\n");
  3688. return TRUE;
  3689. }
  3690. pVme = Replica->pVme;
  3691. if (!pVme) {
  3692. DPRINT(4, "++ WARN: No pVme in Replica\n");
  3693. return TRUE;
  3694. }
  3695. ParentFidTable = pVme->ParentFidTable;
  3696. if (!ParentFidTable) {
  3697. DPRINT(4, "++ WARN: No ParentFidTable in pVme\n");
  3698. return TRUE;
  3699. }
  3700. //
  3701. // Look for subdirectories and files.
  3702. //
  3703. ValidChildCheckData.ThreadCtx = ThreadCtx;
  3704. ValidChildCheckData.TmpIDTableCtx = TmpIDTableCtx;
  3705. ValidChildCheckData.FileReferenceNumber = ChangeOrder->FileReferenceNumber;
  3706. FStatus = QHashEnumerateTable(ParentFidTable,
  3707. JrnlDoesChangeOrderHaveChildrenWorker,
  3708. &ValidChildCheckData);
  3709. if (FStatus == FrsErrorResourceInUse) {
  3710. DPRINT(4, "++ Child found; change order has files\n");
  3711. return TRUE;
  3712. }
  3713. DPRINT(4, "++ Child not found; change order has no subdirs or files\n");
  3714. return FALSE;
  3715. }
  3716. ULONG
  3717. JrnlAddFilterEntryFromUsn(
  3718. IN PREPLICA Replica,
  3719. IN PUSN_RECORD UsnRecord,
  3720. OUT PFILTER_TABLE_ENTRY *RetFilterEntry
  3721. )
  3722. /*++
  3723. Routine Description:
  3724. Create a new filter table entry from data in the USN record and the
  3725. Replica struct. Insert it into the Volume Filter Table.
  3726. The caller must decrement the refcount on the filter entry.
  3727. Arguments:
  3728. Replica - ptr to the Replica struct containing the directory now.
  3729. UsnRecord - ptr to the UsnRecord.
  3730. RetFilterEntry - ptr to returned filter table ptr. NULL if caller doesn't
  3731. want a reference to the entry so we drop it here.
  3732. Return Value:
  3733. Win32 status.
  3734. --*/
  3735. {
  3736. #undef DEBSUB
  3737. #define DEBSUB "JrnlAddFilterEntryFromUsn:"
  3738. PFILTER_TABLE_ENTRY FilterEntry;
  3739. ULONG Len;
  3740. ULONG WStatus;
  3741. //
  3742. // Create a new filter entry.
  3743. // The size of the file name field is Len + sizeof(WCHAR) because
  3744. // the file name field is defined as a wchar array of length 1.
  3745. //
  3746. Len = UsnRecord->FileNameLength;
  3747. FilterEntry = FrsAllocTypeSize(FILTER_TABLE_ENTRY_TYPE, Len);
  3748. FilterEntry->DFileID = UsnRecord->FileReferenceNumber;
  3749. FilterEntry->DParentFileID = UsnRecord->ParentFileReferenceNumber;
  3750. FrsCopyUnicodeStringFromRawString(&FilterEntry->UFileName,
  3751. Len + sizeof(WCHAR),
  3752. UsnRecord->FileName,
  3753. Len);
  3754. WStatus = JrnlAddFilterEntry(Replica, FilterEntry, RetFilterEntry, FALSE);
  3755. if (!WIN_SUCCESS(WStatus)) {
  3756. DUMP_USN_RECORD2(0, UsnRecord, Replica->ReplicaNumber, CO_LOCATION_NUM_CMD);
  3757. }
  3758. return WStatus;
  3759. }
  3760. ULONG
  3761. JrnlAddFilterEntryFromCo(
  3762. IN PREPLICA Replica,
  3763. IN PCHANGE_ORDER_ENTRY ChangeOrder,
  3764. OUT PFILTER_TABLE_ENTRY *RetFilterEntry
  3765. )
  3766. /*++
  3767. Routine Description:
  3768. Create a new filter table entry from data in the change order entry and the
  3769. Replica struct. Insert it into the Volume Filter Table. This is called
  3770. when we receive remote change orders that create a directory.
  3771. If this is a recovery change order than the filter entry is replaced if
  3772. there is a conflict.
  3773. The caller must decrement the refcount on the filter entry.
  3774. Arguments:
  3775. Replica - ptr to the Replica struct containing the directory now.
  3776. ChangeOrder -- ptr to the change order entry.
  3777. RetFilterEntry - ptr to returned filter table ptr. NULL if caller doesn't
  3778. want a reference to the entry so we drop it here.
  3779. Return Value:
  3780. Win32 status.
  3781. --*/
  3782. {
  3783. #undef DEBSUB
  3784. #define DEBSUB "JrnlAddFilterEntryFromCo:"
  3785. PFILTER_TABLE_ENTRY FilterEntry;
  3786. ULONG Len;
  3787. ULONG WStatus;
  3788. //
  3789. // Create a new filter entry.
  3790. // NOTE that the actual size of the filename buffer is Len +
  3791. // sizeof(WCHAR) because the definition of FILTER_TABLE_ENTRY
  3792. // includes a single wchar array for filename. Hence, the
  3793. // assignment of UNICODE_NULL to Buffer[Len/2] doesn't scribble
  3794. // past the end of the array.
  3795. //
  3796. Len = ChangeOrder->Cmd.FileNameLength;
  3797. FilterEntry = FrsAllocTypeSize(FILTER_TABLE_ENTRY_TYPE, Len);
  3798. FilterEntry->DFileID = ChangeOrder->FileReferenceNumber;
  3799. FilterEntry->DParentFileID = ChangeOrder->ParentFileReferenceNumber;
  3800. FilterEntry->UFileName.Length = (USHORT)Len;
  3801. CopyMemory(FilterEntry->UFileName.Buffer, ChangeOrder->Cmd.FileName, Len);
  3802. FilterEntry->UFileName.Buffer[Len/2] = UNICODE_NULL;
  3803. //
  3804. // Its possible to receive a change order more than once; and the
  3805. // first change order may have been taken through retry. If the
  3806. // change order was for a directory create, this would leave
  3807. // an idtable entry set to IDREC_FLAGS_NEW_FILE_IN_PROGRESS
  3808. // *and* the directories entries in the filter table. So, always
  3809. // relace an existing entry.
  3810. //
  3811. return JrnlAddFilterEntry(Replica, FilterEntry, RetFilterEntry, TRUE);
  3812. }
  3813. ULONG
  3814. JrnlAddFilterEntry(
  3815. IN PREPLICA Replica,
  3816. IN PFILTER_TABLE_ENTRY FilterEntry,
  3817. OUT PFILTER_TABLE_ENTRY *RetFilterEntry,
  3818. IN BOOL Replace
  3819. )
  3820. /*++
  3821. Routine Description:
  3822. Insert the filter entry into the Volume Filter Table.
  3823. This routine acquires the child list lock for the replica when doing the
  3824. child list insert.
  3825. The caller must decrement the refcount on the filter entry.
  3826. On an insert error the entry is freed and NULL is returned.
  3827. Arguments:
  3828. Replica - ptr to the Replica struct containing the directory now.
  3829. FilterEntry -- ptr to filter entry to insert.
  3830. RetFilterEntry - ptr to returned filter table ptr. NULL if caller doesn't
  3831. want a reference to the entry so we drop it here.
  3832. On an insert error the entry is freed and NULL is returned.
  3833. Replace - If true then replace current entry with this one if conflict.
  3834. Return Value:
  3835. Win32 status.
  3836. --*/
  3837. {
  3838. #undef DEBSUB
  3839. #define DEBSUB "JrnlAddFilterEntry:"
  3840. PGENERIC_HASH_TABLE FilterTable = Replica->pVme->FilterTable;
  3841. ULONG GStatus, WStatus;
  3842. ULONG RetryCount = 0;
  3843. PFILTER_TABLE_ENTRY OldEntry;
  3844. ULONG Len;
  3845. //
  3846. // Start ref count out at one (insert bumps it again to 2) if we
  3847. // return the address of the entry.
  3848. //
  3849. FilterEntry->HashEntryHeader.ReferenceCount = 1;
  3850. FilterEntry->Replica = Replica;
  3851. FilterEntry->DReplicaNumber = Replica->ReplicaNumber;
  3852. RETRY:
  3853. //
  3854. // Insert the entry into the VME Filter Table.
  3855. //
  3856. GStatus = GhtInsert(FilterTable, FilterEntry, TRUE, FALSE);
  3857. if (GStatus != GHT_STATUS_SUCCESS) {
  3858. if (Replace) {
  3859. goto REPLACE;
  3860. }
  3861. DPRINT1(0, "++ ERROR - GhtInsert Failed: %d, Entry conflict. Tried to insert:\n", GStatus);
  3862. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
  3863. FilterEntry = FrsFreeType(FilterEntry);
  3864. goto ERROR_RETURN;
  3865. }
  3866. //
  3867. // Link the filter entry onto the parent's child list and drop the reference
  3868. // if the caller doesn't want the ptr back.
  3869. //
  3870. JrnlAcquireChildLock(Replica);
  3871. WStatus = (ULONG)JrnlFilterLinkChild(FilterTable, FilterEntry, Replica);
  3872. JrnlReleaseChildLock(Replica);
  3873. if (!WIN_SUCCESS(WStatus)) {
  3874. DPRINT(0, "++ ERROR - Failed to put filter entry on Child List\n");
  3875. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
  3876. //
  3877. // Need some code here to add this filter entry to an orphan list
  3878. // in the off chance that the parent will later come into existence
  3879. // and now needs to hook up to the child. The creation of each new
  3880. // entry would then have to scan the orphan list if it was non-empty.
  3881. // Note that because of ordering constraints I don't think this
  3882. // can actually happen except in the case of a remote co dir create
  3883. // while a local co moveout is in process. But in this case when
  3884. // the child dir is found during the enum it will end up getting
  3885. // deleted.
  3886. // If we relax the ordering constraints on dir creates (since they
  3887. // all start out being created in the pre-install area anyway) then
  3888. // this code will definitely be needed.
  3889. //
  3890. // Note: May need dir filter entry orphan list. see note above.
  3891. }
  3892. RETURN:
  3893. if (RetFilterEntry != NULL) {
  3894. *RetFilterEntry = FilterEntry;
  3895. } else {
  3896. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  3897. }
  3898. return WStatus;
  3899. REPLACE:
  3900. //
  3901. // Replace the data in the old entry with the data in the new entry.
  3902. //
  3903. GStatus = GhtLookup(FilterTable, &FilterEntry->DFileID, TRUE, &OldEntry);
  3904. if (GStatus != GHT_STATUS_SUCCESS) {
  3905. FRS_ASSERT(RetryCount++ > 10);
  3906. goto RETRY;
  3907. }
  3908. FRS_ASSERT(OldEntry->DFileID == FilterEntry->DFileID);
  3909. //
  3910. // Undoing a MOVERS for a dir is going to be a pain.
  3911. // Need to check if it can really happen. Could we just abort this CO?
  3912. //
  3913. FRS_ASSERT(OldEntry->Replica == FilterEntry->Replica);
  3914. FRS_ASSERT(OldEntry->DReplicaNumber == FilterEntry->DReplicaNumber);
  3915. if (OldEntry->DParentFileID != FilterEntry->DParentFileID) {
  3916. //
  3917. // If parent FID is different then change child linkage.
  3918. //
  3919. JrnlAcquireChildLock(Replica);
  3920. WStatus = JrnlFilterUnlinkChild (FilterTable, OldEntry, OldEntry->Replica);
  3921. if (!WIN_SUCCESS(WStatus)) {
  3922. DPRINT(0, "++ ERROR - Failed to put filter entry on Child List\n");
  3923. goto REPLACE_ERROR;
  3924. }
  3925. //
  3926. // Update the filter entry with the new parent and reinsert into filter.
  3927. //
  3928. OldEntry->DParentFileID = FilterEntry->DParentFileID;
  3929. WStatus = (ULONG) JrnlFilterLinkChild(FilterTable,
  3930. OldEntry,
  3931. OldEntry->Replica);
  3932. if (!WIN_SUCCESS(WStatus)) {
  3933. DPRINT(0, "++ ERROR - Failed to put filter entry on Child List\n");
  3934. goto REPLACE_ERROR;
  3935. }
  3936. JrnlReleaseChildLock(Replica);
  3937. }
  3938. if (FilterEntry->UFileName.Length <= (OldEntry->UFileName.MaximumLength -
  3939. sizeof(WCHAR))) {
  3940. Len = FilterEntry->UFileName.Length;
  3941. } else {
  3942. //
  3943. // Note: need a swap entry with row locked and ref count 2 to realloc node.
  3944. //
  3945. // Or just alloc a new buffer and set UFileName to point to it with
  3946. // a test on the free side to check if not using the in-node buffer.
  3947. // But do we really need the name?
  3948. // It is used to build the full name path but is it really needed?
  3949. // For now just copy the first n characters.
  3950. //
  3951. Len = OldEntry->UFileName.MaximumLength - sizeof(WCHAR);
  3952. }
  3953. CopyMemory(OldEntry->UFileName.Buffer, FilterEntry->UFileName.Buffer, Len);
  3954. OldEntry->UFileName.Buffer[Len/2] = UNICODE_NULL;
  3955. OldEntry->UFileName.Length = (USHORT) Len;
  3956. FRS_JOURNAL_FILTER_PRINT(5, FilterTable, OldEntry);
  3957. FrsFreeType(FilterEntry);
  3958. FilterEntry = OldEntry;
  3959. goto RETURN;
  3960. REPLACE_ERROR:
  3961. JrnlReleaseChildLock(Replica);
  3962. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, OldEntry);
  3963. GhtDereferenceEntryByAddress(FilterTable, OldEntry, TRUE);
  3964. ERROR_RETURN:
  3965. GHT_DUMP_TABLE(5, FilterTable);
  3966. if (RetFilterEntry != NULL) {*RetFilterEntry = NULL;}
  3967. return ERROR_GEN_FAILURE;
  3968. }
  3969. ULONG
  3970. JrnlDeleteDirFilterEntry(
  3971. IN PGENERIC_HASH_TABLE FilterTable,
  3972. IN PULONGLONG DFileID,
  3973. IN PFILTER_TABLE_ENTRY ArgFilterEntry
  3974. )
  3975. /*++
  3976. Routine Description:
  3977. Delete the filter entry from the Volume Filter Table.
  3978. The caller acquires the child list lock for the replica when doing the
  3979. child list removal.
  3980. The caller must decrement the refcount on the filter entry.
  3981. Arguments:
  3982. FilterTable - ptr to the filter table struct containing the directory now.
  3983. DFileID - ptr to FID of dir to delete.
  3984. ArgFilterEntry - if non-null then delete this entry and skip lookup.
  3985. Return Value:
  3986. Win32 status.
  3987. --*/
  3988. {
  3989. #undef DEBSUB
  3990. #define DEBSUB "JrnlDeleteDirFilterEntry:"
  3991. ULONG GStatus, WStatus;
  3992. PFILTER_TABLE_ENTRY FilterEntry;
  3993. //
  3994. // Find the entry.
  3995. //
  3996. if (ArgFilterEntry == NULL) {
  3997. GStatus = GhtLookup(FilterTable, DFileID, TRUE, &FilterEntry);
  3998. if (GStatus != GHT_STATUS_SUCCESS) {
  3999. DPRINT1(0, "++ WARNING: Filter entry not found in table for FID= %08x %08x\n",
  4000. PRINTQUAD(*DFileID));
  4001. return ERROR_NOT_FOUND;
  4002. }
  4003. } else {
  4004. FilterEntry = ArgFilterEntry;
  4005. }
  4006. DPRINT1(4, "++ Deleting filter entry, FID= %08x %08x\n", PRINTQUAD(FilterEntry->DFileID));
  4007. //
  4008. // Unlink the filter entry from the parent's child list.
  4009. //
  4010. // Return an error if there are children. This can happen
  4011. // when we take a directory-create through retry. Its children
  4012. // were added when the process queue was unblocked. This
  4013. // function is then called when retrying the change order
  4014. // with the idtable set to IDREC_FLAGS_NEW_FILE_IN_PROGRESS
  4015. //
  4016. if (!IsListEmpty(&FilterEntry->ChildHead)) {
  4017. DPRINT(0, "++ WARN - Dir Delete but child list not empty\n");
  4018. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
  4019. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  4020. return ERROR_GEN_FAILURE;
  4021. }
  4022. if (FilterEntry->ChildEntry.Flink == NULL) {
  4023. //
  4024. // This may happen if we have just completed a MOVEOUT of a dir
  4025. // subtree and a dir create remote CO is ahead of us in the process
  4026. // queue. When the dir create tried to add the filter table entry
  4027. // it won't find the parent so this entry won't be on any parent list.
  4028. // See comment in JrnlAddFilterEntry() about creation of an orphan
  4029. // list in the future.
  4030. //
  4031. DPRINT(0, "++ WARN - Dir entry not on child list\n");
  4032. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
  4033. } else {
  4034. FrsRemoveEntryList(&FilterEntry->ChildEntry);
  4035. FilterEntry->ChildEntry.Flink = NULL;
  4036. }
  4037. //
  4038. // Delete the entry from the filter table.
  4039. //
  4040. GStatus = GhtDeleteEntryByAddress(FilterTable, FilterEntry, TRUE);
  4041. if (GStatus != GHT_STATUS_SUCCESS) {
  4042. DPRINT(0, "++ ERROR - GhtDeleteEntryByAddress failed.\n");
  4043. FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
  4044. FRS_ASSERT(!"JrnlDeleteDirFilterEntry failed.");
  4045. return ERROR_GEN_FAILURE;
  4046. }
  4047. return ERROR_SUCCESS;
  4048. }
  4049. ULONG
  4050. JrnlGetPathAndLevel(
  4051. IN PGENERIC_HASH_TABLE FilterTable,
  4052. IN PLONGLONG StartDirFileID,
  4053. OUT PULONG Level
  4054. )
  4055. /*++
  4056. Routine Description:
  4057. Walk the filter table from DirFileID to the root building the directory
  4058. path and counting the levels.
  4059. Arguments:
  4060. FilterTable -- Ptr to the Generic hash table containing a dir filter
  4061. StartDirFileID -- The file id of the directory to start the walk from.
  4062. Level -- The returned nesting level of the dir. (0 means the replcia tree root)
  4063. Return Value:
  4064. FrsError status.
  4065. --*/
  4066. {
  4067. #undef DEBSUB
  4068. #define DEBSUB "JrnlGetPathAndLevel:"
  4069. ULONGLONG DirFileID = *StartDirFileID;
  4070. PFILTER_TABLE_ENTRY FilterEntry;
  4071. ULONG FStatus = FrsErrorSuccess;
  4072. ULONG GStatus;
  4073. *Level = 0;
  4074. GStatus = GhtLookup(FilterTable, &DirFileID, TRUE, &FilterEntry);
  4075. if (GStatus == GHT_STATUS_NOT_FOUND) {
  4076. return FrsErrorNotFound;
  4077. }
  4078. while (GStatus == GHT_STATUS_SUCCESS) {
  4079. //
  4080. // Stop when we hit the replica tree root.
  4081. //
  4082. if (FilterEntry->DParentFileID == ZERO_FID) {
  4083. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  4084. break;
  4085. }
  4086. *Level += 1;
  4087. if (*Level > 100000) {
  4088. //
  4089. // Hung. Corrupt Filter table.
  4090. //
  4091. DPRINT(0, "++ ERROR: Hung in Journal entry filter lookup. Entry skipped\n");
  4092. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  4093. GHT_DUMP_TABLE(0, FilterTable);
  4094. FRS_ASSERT(!"Hung in Journal entry filter lookup");
  4095. return FrsErrorInternalError;
  4096. }
  4097. //
  4098. // Get parent FID & Drop the reference to the filter table entry.
  4099. // Lookup parent's filter entry.
  4100. //
  4101. DirFileID = FilterEntry->DParentFileID;
  4102. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  4103. GStatus = GhtLookup(FilterTable, &DirFileID, TRUE, &FilterEntry);
  4104. if (GStatus != GHT_STATUS_SUCCESS) {
  4105. //
  4106. // Corrupt Filter table or it could be an op on an orphaned
  4107. // dir that will later get deleted.
  4108. //
  4109. DPRINT(0, "++ ERROR: Parent filter entry not found in Journal filter Table.\n");
  4110. //GHT_DUMP_TABLE(0, FilterTable);
  4111. return FrsErrorInternalError;
  4112. }
  4113. }
  4114. return FStatus;
  4115. }
  4116. BOOL
  4117. JrnlIsChangeOrderInReplica(
  4118. IN PCHANGE_ORDER_ENTRY ChangeOrder,
  4119. IN PLONGLONG DirFileID
  4120. )
  4121. /*++
  4122. Routine Description:
  4123. Look up the File ID for the given directory in the given journal filter
  4124. table and if found compare the replica set pointer from the filter entry
  4125. to the replica set pointer in the change order. Return TRUE if match.
  4126. Arguments:
  4127. ChangeOrder -- The change order entry assoicated with the file of interest.
  4128. DirFileID -- The file id of the directory in which the file currently
  4129. resides. This may be different than the parent FID in the
  4130. change order.
  4131. Return Value:
  4132. TRUE if Pointer to Replica Struct or NULL if not found.
  4133. --*/
  4134. {
  4135. #undef DEBSUB
  4136. #define DEBSUB "JrnlIsChangeOrderInReplica:"
  4137. PFILTER_TABLE_ENTRY FilterEntry;
  4138. PGENERIC_HASH_TABLE FilterTable;
  4139. ULONG GStatus;
  4140. PREPLICA Replica, FilterReplica = NULL;
  4141. Replica = ChangeOrder->NewReplica;
  4142. if (Replica == NULL) {
  4143. DPRINT(4, "++ WARN: No Replica in ChangeOrder\n");
  4144. return FALSE;
  4145. }
  4146. if (Replica->pVme == NULL) {
  4147. DPRINT(4, "++ WARN: No pVme in Replica\n");
  4148. return FALSE;
  4149. }
  4150. FilterTable = Replica->pVme->FilterTable;
  4151. if (FilterTable == NULL) {
  4152. DPRINT(4, "++ WARN: No FilterTable in pVme\n");
  4153. return FALSE;
  4154. }
  4155. GStatus = GhtLookup(FilterTable, DirFileID, TRUE, &FilterEntry);
  4156. if (GStatus == GHT_STATUS_SUCCESS) {
  4157. //
  4158. // Get Replica ptr & Drop the reference to the filter table entry.
  4159. //
  4160. FilterReplica = FilterEntry->Replica;
  4161. GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
  4162. }
  4163. return (Replica == FilterReplica);
  4164. }
  4165. ULONG
  4166. JrnlCommand(
  4167. PCOMMAND_PACKET CmdPkt
  4168. )
  4169. /*++
  4170. Routine Description:
  4171. Process a command packet sent to the Journal sub-system. External
  4172. components interact with the Journal by building a command packet and
  4173. submitting it to the Journal Process Queue. The typical way journal
  4174. processing is started is by issuing the following series of command
  4175. packets using FrsSubmitCommand.
  4176. <Start the journal monitor thread>
  4177. CMD_INIT_SUBSYSTEM: Init and start the journal for all replicas
  4178. CMD_JOURNAL_INIT_ONE_RS: Init service for Replica Set A
  4179. CMD_JOURNAL_INIT_ONE_RS: Init service for Replica Set B
  4180. o
  4181. o
  4182. CMD_JOURNAL_INIT_ONE_RS: Init service for Replica Set Z
  4183. CMD_STOP_SUBSYSTEM: Stop journal processing for all replica sets
  4184. and terminate the journal sub-system.
  4185. Arguments:
  4186. CmdPkt: Command packet to process.
  4187. Return Value:
  4188. Win32 status
  4189. --*/
  4190. {
  4191. #undef DEBSUB
  4192. #define DEBSUB "JrnlCommand:"
  4193. LIST_ENTRY DeadList;
  4194. PLIST_ENTRY Entry;
  4195. ULONG WStatus;
  4196. ULONG FStatus;
  4197. PVOLUME_MONITOR_ENTRY pVme;
  4198. FILETIME SystemTime;
  4199. PCONFIG_TABLE_RECORD ConfigRecord;
  4200. DPRINT1(5, "<<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
  4201. switch (CmdPkt->Command) {
  4202. case CMD_COMMAND_ERROR:
  4203. DPRINT1(0, "ERROR - Invalid journal minor command: %d\n", CmdPkt->Command);
  4204. break;
  4205. case CMD_INIT_SUBSYSTEM:
  4206. //
  4207. // Initialize the journal
  4208. //
  4209. WStatus = JournalMonitorInit();
  4210. DEBUG_FLUSH();
  4211. if (!WIN_SUCCESS(WStatus)) {
  4212. if (!FrsIsShuttingDown) {
  4213. DPRINT_WS(0, "ERROR - Journal cannot start;", WStatus);
  4214. }
  4215. break;
  4216. }
  4217. //
  4218. // Init the change order accept thread.
  4219. //
  4220. if (ChgOrdAcceptInitialize() != FrsErrorSuccess) {
  4221. DPRINT(0, "ERROR - Journal cannot start; can't start change order thread.\n");
  4222. WStatus = ERROR_GEN_FAILURE;
  4223. break;
  4224. }
  4225. DPRINT(0, "Journal has started.\n");
  4226. DEBUG_FLUSH();
  4227. SetEvent(JournalEvent);
  4228. //
  4229. // Free up memory by reducing our working set size
  4230. //
  4231. SetProcessWorkingSetSize(ProcessHandle, (SIZE_T)-1, (SIZE_T)-1);
  4232. break;
  4233. //
  4234. // Close all the journal VMEs, rundown the Process Queue and free
  4235. // all the queue entries. On return the main process loop with
  4236. // see the queue is rundown and will terminate the thread.
  4237. //
  4238. case CMD_STOP_SUBSYSTEM:
  4239. DPRINT(4, "Stopping Journal Subsystem\n");
  4240. JrnlCloseAll();
  4241. FrsRtlRunDownQueue(&JournalProcessQueue, &DeadList);
  4242. FrsFreeTypeList(&DeadList);
  4243. break;
  4244. case CMD_PAUSE_SUBSYSTEM:
  4245. case CMD_QUERY_INFO_SUBSYSTEM:
  4246. case CMD_SET_CONFIG_SUBSYSTEM:
  4247. case CMD_QUERY_CONFIG_SUBSYSTEM:
  4248. case CMD_CANCEL_COMMAND_SUBSYSTEM:
  4249. case CMD_READ_SUBSYSTEM:
  4250. case CMD_WRITE_SUBSYSTEM:
  4251. goto UNSUPPORTED_COMMAND;
  4252. case CMD_START_SERVICE:
  4253. case CMD_STOP_SERVICE:
  4254. case CMD_PAUSE_SERVICE:
  4255. case CMD_QUERY_INFO_SERVICE:
  4256. case CMD_SET_CONFIG_SERVICE:
  4257. case CMD_QUERY_CONFIG_SERVICE:
  4258. case CMD_CANCEL_COMMAND_SERVICE:
  4259. case CMD_READ_SERVICE:
  4260. case CMD_WRITE_SERVICE:
  4261. break;
  4262. //
  4263. // This command is an acknowledgement from the journal read thread that
  4264. // journal read activity on this volume (pVme parameter) has paused.
  4265. // Set the state to JRNL_STATE_PAUSED and signal the event in the
  4266. // VME so any waiters can proceed. Also mark all replica sets on this
  4267. // volume as paused.
  4268. //
  4269. case CMD_JOURNAL_PAUSED:
  4270. pVme = CmdPkt->Parameters.JournalRequest.pVme;
  4271. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  4272. SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_PAUSED);
  4273. //
  4274. // Save time of last replica pause. LastPause
  4275. //
  4276. GetSystemTimeAsFileTime(&SystemTime);
  4277. ForEachListEntry( &pVme->ReplicaListHead, REPLICA, VolReplicaList,
  4278. //
  4279. // Iterator pE is of type REPLICA.
  4280. //
  4281. ConfigRecord = (PCONFIG_TABLE_RECORD) (pE->ConfigTable.pDataRecord);
  4282. COPY_TIME(&ConfigRecord->LastPause, &SystemTime);
  4283. );
  4284. SetEvent(pVme->Event);
  4285. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  4286. break;
  4287. //
  4288. // This command initializes the journal and database for a single replica
  4289. // set. It is intended to be used when creating or starting a replica
  4290. // set after the initial system startup has occurred.
  4291. // Note we don't complete the command here since we propagate it on
  4292. // to the DB server. In the case of failure the command is completed
  4293. // here and status is returned in the cmd pkt ErrorStatus field.
  4294. // The Replica->FStatus field may have more status about the failure.
  4295. //
  4296. case CMD_JOURNAL_INIT_ONE_RS:
  4297. FStatus = JrnlInitOneReplicaSet(CmdPkt);
  4298. if (FRS_SUCCESS(FStatus)) {
  4299. return ERROR_SUCCESS;
  4300. }
  4301. WStatus = ERROR_GEN_FAILURE;
  4302. break;
  4303. //
  4304. // Delete a journal directory filter table entry. We do it in the journal
  4305. // thread so we don't have to lock the table.
  4306. //
  4307. case CMD_JOURNAL_DELETE_DIR_FILTER_ENTRY:
  4308. WStatus = JrnlDeleteDirFilterEntry(
  4309. JrReplica(CmdPkt)->pVme->FilterTable,
  4310. &JrDFileID(CmdPkt),
  4311. NULL);
  4312. break;
  4313. //
  4314. // Cleanout unneeded entries in the Journal Write Filter.
  4315. //
  4316. case CMD_JOURNAL_CLEAN_WRITE_FILTER:
  4317. WStatus = JrnlCleanWriteFilter(CmdPkt);
  4318. break;
  4319. default:
  4320. goto UNSUPPORTED_COMMAND;
  4321. } // end switch
  4322. //
  4323. // Retire the command packet.
  4324. //
  4325. FrsCompleteCommand(CmdPkt, WStatus);
  4326. return WStatus;
  4327. UNSUPPORTED_COMMAND:
  4328. DPRINT1(0, "ERROR - Invalid journal minor command: %d\n", CmdPkt->Command);
  4329. return ERROR_INVALID_PARAMETER;
  4330. }
  4331. JET_ERR
  4332. JrnlInsertFilterEntry(
  4333. IN PTHREAD_CTX ThreadCtx,
  4334. IN PTABLE_CTX TableCtx,
  4335. IN PVOID Record,
  4336. IN PVOID Context
  4337. )
  4338. /*++
  4339. Routine Description:
  4340. This is a worker function passed to FrsEnumerateTable(). Each time
  4341. it is called It inserts a DIRTable record into the Volume filter table.
  4342. Arguments:
  4343. ThreadCtx - Needed to access Jet. (Not used).
  4344. TableCtx - A ptr to a DIRTable context struct.
  4345. Record - A ptr to a DIRTable record.
  4346. Context - A ptr to the Replica set we are loading data for.
  4347. Return Value:
  4348. A Jet error status. Success means call us with the next record.
  4349. Failure means don't call again and pass our status back to the
  4350. caller of FrsEnumerateTable().
  4351. --*/
  4352. {
  4353. #undef DEBSUB
  4354. #define DEBSUB "JrnlInsertFilterEntry:"
  4355. PDIRTABLE_RECORD DIRTableRec = (PDIRTABLE_RECORD) Record;
  4356. PREPLICA Replica = (PREPLICA) Context;
  4357. ULONG NameLen, GStatus;
  4358. PFILTER_TABLE_ENTRY FilterEntry;
  4359. //
  4360. // Build a filter table record big enough to hold the filename
  4361. // and insert into the volume filter table. Note that the
  4362. // file name field is large enough to hold the terminating
  4363. // UNICODE_NULL because the file name field is defined as
  4364. // a wchar array of length 1 in FILTER_TABLE_ENTRY.
  4365. //
  4366. NameLen = wcslen(DIRTableRec->DFileName) * sizeof(WCHAR);
  4367. FilterEntry = FrsAllocTypeSize(FILTER_TABLE_ENTRY_TYPE, NameLen);
  4368. //
  4369. // Copy the data from the DIRTable record to the filter entry
  4370. // and add a pointer to the Replica struct.
  4371. //
  4372. CopyMemory(FilterEntry->DFileName, DIRTableRec->DFileName, NameLen + 2);
  4373. FilterEntry->DFileID = DIRTableRec->DFileID;
  4374. FilterEntry->DParentFileID = DIRTableRec->DParentFileID;
  4375. FilterEntry->DReplicaNumber = DIRTableRec->DReplicaNumber;
  4376. FilterEntry->Replica = Replica;
  4377. FilterEntry->UFileName.Length = (USHORT)NameLen;
  4378. FilterEntry->UFileName.Buffer[NameLen/2] = UNICODE_NULL;
  4379. GStatus = GhtInsert(Replica->pVme->FilterTable, FilterEntry, TRUE, FALSE);
  4380. if (GStatus != GHT_STATUS_SUCCESS) {
  4381. DPRINT1(0, "ERROR - GhtInsert Failed: %d\n", GStatus);
  4382. DBS_DISPLAY_RECORD_SEV(0, TableCtx, TRUE);
  4383. FrsFreeType(FilterEntry);
  4384. return JET_errKeyDuplicate;
  4385. }
  4386. return JET_errSuccess;
  4387. }
  4388. ULONG
  4389. JrnlCleanWriteFilter(
  4390. PCOMMAND_PACKET CmdPkt
  4391. )
  4392. /*++
  4393. Routine Description:
  4394. Walk thru all active replica sets on this volume. Find the minimum
  4395. value for FSVolLastUsn. This is the Joint journal commit point for all
  4396. replica sets on the volume. No replica set will start a journal
  4397. read before this point.
  4398. Then enumerate all entries of the Volume Write Filter table and free
  4399. the entries whose USN is less than the Joint Journal commit point.
  4400. Arguments:
  4401. CmdPkt: Command packet to process.
  4402. Return Value:
  4403. Win32 status
  4404. --*/
  4405. {
  4406. #undef DEBSUB
  4407. #define DEBSUB "JrnlCleanWriteFilter:"
  4408. USN JointJournalCommitUsn = MAXLONGLONG;
  4409. LONGLONG FSVolLastUSN;
  4410. PVOLUME_MONITOR_ENTRY pVme;
  4411. PCONFIG_TABLE_RECORD ConfigRecord;
  4412. ULONG TimeOut = 5*JRNL_CLEAN_WRITE_FILTER_INTERVAL;
  4413. BOOL FoundpVme = FALSE;
  4414. //
  4415. // Ignore if pVme is no longer active; don't retry
  4416. //
  4417. pVme = JrpVme(CmdPkt);
  4418. ForEachListEntry(&VolumeMonitorQueue, VOLUME_MONITOR_ENTRY, ListEntry,
  4419. if (pVme == pE) {
  4420. FoundpVme = TRUE;
  4421. break;
  4422. }
  4423. );
  4424. if (!FoundpVme) {
  4425. return ERROR_SUCCESS;
  4426. }
  4427. //
  4428. // If this journal is currently running then make a cleaning pass.
  4429. //
  4430. if (pVme->IoActive) {
  4431. ForEachListEntry( &pVme->ReplicaListHead, REPLICA, VolReplicaList,
  4432. // Iterator pE is of type PREPLICA.
  4433. //
  4434. // Get QuadWriteLock lock to avoid quadword tearing when FSVolLastUSN is read.
  4435. //
  4436. ConfigRecord = (PCONFIG_TABLE_RECORD)pE->ConfigTable.pDataRecord;
  4437. AcquireQuadLock(&pVme->QuadWriteLock);
  4438. FSVolLastUSN = ConfigRecord->FSVolLastUSN;
  4439. ReleaseQuadLock(&pVme->QuadWriteLock);
  4440. if (FSVolLastUSN < JointJournalCommitUsn) {
  4441. JointJournalCommitUsn = FSVolLastUSN;
  4442. }
  4443. );
  4444. DPRINT1(5, "WRITE FILTER TABLE CLEAN AT JointJournalCommitUsn = %08x %08x\n",
  4445. PRINTQUAD(JointJournalCommitUsn));
  4446. QHashEnumerateTable(pVme->FrsWriteFilter,
  4447. JrnlCleanWriteFilterWorker,
  4448. &JointJournalCommitUsn);
  4449. TimeOut = JRNL_CLEAN_WRITE_FILTER_INTERVAL;
  4450. }
  4451. //
  4452. // Resubmit the clean filter request.
  4453. //
  4454. JrnlSubmitCleanWriteFilter(pVme, TimeOut);
  4455. return ERROR_SUCCESS;
  4456. }
  4457. ULONG
  4458. JrnlCleanWriteFilterWorker (
  4459. PQHASH_TABLE Table,
  4460. PQHASH_ENTRY BeforeNode,
  4461. PQHASH_ENTRY TargetNode,
  4462. PVOID Context
  4463. )
  4464. /*++
  4465. Routine Description:
  4466. This function is called thru QHashEnumerateTable() to process
  4467. an entry.
  4468. Arguments:
  4469. Table - the hash table being enumerated
  4470. BeforeNode -- ptr to the QhashEntry before the node of interest.
  4471. TargetNode -- ptr to the QhashEntry of interest.
  4472. Context - ptr to the USN to compare against.
  4473. Return Value:
  4474. Win32 status
  4475. --*/
  4476. {
  4477. #undef DEBSUB
  4478. #define DEBSUB "JrnlCleanWriteFilterWorker:"
  4479. USN JointJournalCommitUsn = *(USN *)Context;
  4480. if ( (USN)(TargetNode->QKey) < JointJournalCommitUsn) {
  4481. DPRINT5(4, "DelWrtFilterEntry - BeforeNode: %08x, Link: %08x,"
  4482. " Flags: %08x, Tag: %08x %08x, Data: %08x %08x\n",
  4483. BeforeNode, TargetNode->NextEntry, TargetNode->Flags,
  4484. PRINTQUAD(TargetNode->QKey), PRINTQUAD(TargetNode->QData));
  4485. //
  4486. // Tell QHashEnumerateTable() to delete the node and continue the enum.
  4487. //
  4488. return FrsErrorDeleteRequested;
  4489. }
  4490. return FrsErrorSuccess;
  4491. }
  4492. VOID
  4493. JrnlSubmitCleanWriteFilter(
  4494. IN PVOLUME_MONITOR_ENTRY pVme,
  4495. IN ULONG TimeOut
  4496. )
  4497. /*++
  4498. Routine Description:
  4499. Queue a work request to clean the write filter in TimeOut Seconds.
  4500. Arguments:
  4501. pVme -- The Vme of the write filter to clean.
  4502. TimeOut -- The max time to wait before giving up and doing Unjoin.
  4503. Return Value:
  4504. None.
  4505. --*/
  4506. {
  4507. #undef DEBSUB
  4508. #define DEBSUB "JrnlSubmitCleanWriteFilter:"
  4509. PCOMMAND_PACKET Cmd;
  4510. Cmd = FrsAllocCommand(&JournalProcessQueue, CMD_JOURNAL_CLEAN_WRITE_FILTER);
  4511. JrReplica(Cmd) = NULL;
  4512. JrpVme(Cmd) = pVme;
  4513. DPRINT1(5, "Submit CMD_JOURNAL_CLEAN_WRITE_FILTER %08x\n", Cmd);
  4514. FrsDelQueueSubmit(Cmd, TimeOut);
  4515. }
  4516. BOOL
  4517. JrnlSetReplicaState(
  4518. IN PREPLICA Replica,
  4519. IN ULONG NewState
  4520. )
  4521. /*++
  4522. Routine Description:
  4523. Change the state of the Replica set and move it to the associated list.
  4524. Note: If a replica set is in the error state it must first move back
  4525. to the initializing state before it can leave the error state.
  4526. Arguments:
  4527. Replica - The replica set whose state is changing.
  4528. NewState - The new state.
  4529. Return Value:
  4530. TRUE if state change allowed.
  4531. --*/
  4532. {
  4533. #undef DEBSUB
  4534. #define DEBSUB "JrnlSetReplicaState:"
  4535. ULONG OldState;
  4536. PVOLUME_MONITOR_ENTRY pVme;
  4537. WCHAR DsPollingIntervalStr[7]; // Max interval is NTFRSAPI_MAX_INTERVAL.
  4538. extern ULONG DsPollingInterval;
  4539. //
  4540. // Lock the replica lists
  4541. //
  4542. EnterCriticalSection(&JrnlReplicaStateLock);
  4543. OldState = Replica->ServiceState;
  4544. if (OldState > JRNL_STATE_MAX) {
  4545. DPRINT2(0, ":S: ERROR - Invalid previous Replica->ServiceState (%d) for Replica %ws\n",
  4546. OldState, Replica->ReplicaName->Name);
  4547. FRS_ASSERT(!"Invalid previous Replica->ServiceState");
  4548. goto CLEANUP;
  4549. }
  4550. if (NewState > JRNL_STATE_MAX) {
  4551. DPRINT2(0, ":S: ERROR - Invalid new Replica->ServiceState (%d) for Replica %ws\n",
  4552. NewState, Replica->ReplicaName->Name);
  4553. FRS_ASSERT(!"Invalid new Replica->ServiceState");
  4554. goto CLEANUP;
  4555. }
  4556. //
  4557. // If this replica set is in the ERROR State then the only allowed next
  4558. // state is INITIALIZING.
  4559. //
  4560. if ((REPLICA_IN_ERROR_STATE(OldState) || REPLICA_STATE_NEEDS_RESTORE(OldState)) &&
  4561. (NewState != REPLICA_STATE_INITIALIZING) &&
  4562. !REPLICA_STATE_NEEDS_RESTORE(NewState)) {
  4563. DPRINT4(4, ":S: ERROR: Replica (%d) %ws state change from %s to %s disallowed\n",
  4564. Replica->ReplicaNumber,
  4565. (Replica->ReplicaName != NULL) ? Replica->ReplicaName->Name : L"<null>",
  4566. RSS_NAME(OldState),
  4567. RSS_NAME(NewState));
  4568. LeaveCriticalSection(&JrnlReplicaStateLock);
  4569. return FALSE;
  4570. }
  4571. DPRINT4(4, ":S: Replica (%d) %ws state change from %s to %s\n",
  4572. Replica->ReplicaNumber,
  4573. (Replica->ReplicaName != NULL) ? Replica->ReplicaName->Name : L"<null>",
  4574. RSS_NAME(OldState),
  4575. RSS_NAME(NewState));
  4576. //
  4577. // if no state change, we're done.
  4578. //
  4579. if (OldState == NewState) {
  4580. goto CLEANUP;
  4581. }
  4582. //
  4583. // If we went from Active to Paused and are not in Journal Replay mode
  4584. // then advance the Replica->LastUsnRecordProcessed to
  4585. // pVme->CurrentUsnRecordDone.
  4586. //
  4587. pVme = Replica->pVme;
  4588. if (pVme != NULL) {
  4589. if ((OldState == REPLICA_STATE_ACTIVE) &&
  4590. (NewState == REPLICA_STATE_PAUSED) &&
  4591. !REPLICA_REPLAY_MODE(Replica, pVme)) {
  4592. DPRINT2(4, ":U: Replica->LastUsnRecordProcessed was: %08x %08x now: %08x %08x\n",
  4593. PRINTQUAD(Replica->LastUsnRecordProcessed),
  4594. PRINTQUAD(pVme->CurrentUsnRecordDone));
  4595. FRS_ASSERT(pVme->CurrentUsnRecordDone >= Replica->LastUsnRecordProcessed);
  4596. AcquireQuadLock(&pVme->QuadWriteLock);
  4597. Replica->LastUsnRecordProcessed = pVme->CurrentUsnRecordDone;
  4598. ReleaseQuadLock(&pVme->QuadWriteLock);
  4599. }
  4600. }
  4601. //
  4602. // update the new state.
  4603. //
  4604. Replica->ServiceState = NewState;
  4605. //
  4606. // if no list change, we're done.
  4607. //
  4608. if (RSS_LIST(OldState) == RSS_LIST(NewState)) {
  4609. goto CLEANUP;
  4610. }
  4611. //
  4612. // Remove from current list and add to new list.
  4613. //
  4614. if (RSS_LIST(OldState) != NULL) {
  4615. FrsRtlRemoveEntryQueue(RSS_LIST(OldState), &Replica->ReplicaList);
  4616. }
  4617. if (RSS_LIST(NewState) != NULL) {
  4618. FrsRtlInsertTailQueue(RSS_LIST(NewState), &Replica->ReplicaList);
  4619. }
  4620. CLEANUP:
  4621. if (REPLICA_IN_ERROR_STATE(NewState) &&
  4622. !REPLICA_FSTATUS_ROOT_HAS_MOVED(Replica->FStatus)) {
  4623. //
  4624. // Post an error log entry if the replica is in
  4625. // error state but not because the root has moved.
  4626. // If the root has moved then the error log has
  4627. // already been written when the move was detected
  4628. // and this generic eventlog here might confuse the user.
  4629. //
  4630. PWCHAR WStatusUStr, FStatusUStr;
  4631. //
  4632. // Post the failure in the event log.
  4633. //
  4634. if (Replica->Root != NULL) {
  4635. WStatusUStr = L"";
  4636. FStatusUStr = FrsAtoW(ErrLabelFrs(Replica->FStatus));
  4637. EPRINT8(EVENT_FRS_REPLICA_SET_CREATE_FAIL,
  4638. Replica->SetName->Name,
  4639. ComputerDnsName,
  4640. Replica->MemberName->Name,
  4641. Replica->Root,
  4642. Replica->Stage,
  4643. JetPath,
  4644. WStatusUStr,
  4645. FStatusUStr);
  4646. FrsFree(FStatusUStr);
  4647. }
  4648. //
  4649. // Post the generic recovery steps message.
  4650. //
  4651. EPRINT1(EVENT_FRS_IN_ERROR_STATE, JetPath);
  4652. } else if (NewState == REPLICA_STATE_JRNL_WRAP_ERROR) {
  4653. //
  4654. // Get the DsPollingInteval in minutes.
  4655. //
  4656. _itow(DsPollingInterval / (60 * 1000), DsPollingIntervalStr, 10);
  4657. EPRINT4(EVENT_FRS_REPLICA_IN_JRNL_WRAP_ERROR, Replica->SetName->Name, Replica->Root,
  4658. Replica->Volume, DsPollingIntervalStr);
  4659. }
  4660. LeaveCriticalSection(&JrnlReplicaStateLock);
  4661. return TRUE;
  4662. }
  4663. ULONG
  4664. JrnlPrepareService1(
  4665. PREPLICA Replica
  4666. )
  4667. /*++
  4668. Routine Description:
  4669. Open the NTFS volume journal and initialize a Volume Monitor Entry for it
  4670. if this is the first replica set to use the volume. The REPLICA struct
  4671. is initialized with a pointer to the volume monitor entry and the file
  4672. path to the root of the replica tree for use in file name generation.
  4673. Init the VME Volume Sequence Number from the Replica config record,
  4674. taking the maximum value seen so far. This value is needed before we
  4675. can do any ReplicaTreeLoad operations on a new replica so we can set
  4676. the correct value in the IDTable and DIRTable entries.
  4677. After any new replica sets are loaded JrnlPrepareService2() is
  4678. called to init the Volume Filter Table with the directory entries for
  4679. every replica set on the volume.
  4680. Arguments:
  4681. Replica - The replica set we are initializing.
  4682. Return Value:
  4683. A Win32 error status.
  4684. Replica->FStatus has the FRS Error status return.
  4685. --*/
  4686. {
  4687. #undef DEBSUB
  4688. #define DEBSUB "JrnlPrepareService1:"
  4689. ULONGLONG CurrentTime;
  4690. PCONFIG_TABLE_RECORD ConfigRecord;
  4691. ULONG WStatus;
  4692. PVOLUME_MONITOR_ENTRY pVme;
  4693. CHAR TimeStr[TIME_STRING_LENGTH];
  4694. if (Replica == NULL) {
  4695. return ERROR_INVALID_PARAMETER;
  4696. }
  4697. DPRINT1(5, ":S: JrnlPrepareService1 for %ws\n", Replica->ReplicaName->Name);
  4698. ConfigRecord = (PCONFIG_TABLE_RECORD)Replica->ConfigTable.pDataRecord;
  4699. //
  4700. // Open the journal. Return the Volume Monitor Entry and save it in
  4701. // the Replica struct.
  4702. //
  4703. WStatus = JrnlOpen(Replica, &pVme, ConfigRecord);
  4704. if (!WIN_SUCCESS(WStatus) || (pVme == NULL)) {
  4705. //
  4706. // Replica->FStatus has the FRS Error status return.
  4707. //
  4708. DPRINT_WS(0, "Error from JrnlOpen", WStatus);
  4709. return WStatus;
  4710. }
  4711. //
  4712. // Set the journal recovery range end point for this replica set.
  4713. //
  4714. Replica->JrnlRecoveryEnd = pVme->JrnlRecoveryEnd;
  4715. //
  4716. // Start the Volume sequence number from the highest value any replica set
  4717. // has used up to now. The FrsVsn is saved in a replica config record
  4718. // every time VSN_SAVE_INTERVAL VSN's have been handed out. If we crashed
  4719. // we could be low by at most VSN_SAVE_INTERVAL VSN's assuming the update
  4720. // request completed. At startup we add VSN_RESTART_INCREMENT to the
  4721. // FrsVsn to ensure we don't use the same VSN twice. Then update the
  4722. // config record so if we start handing out VSNs and crash we don't reuse
  4723. // them. Can't do update here since this Replica struct is not on the
  4724. // VolReplicaList yet.
  4725. //
  4726. // The above solution does not work in the case where the database is
  4727. // lost or restored from backup. In this case other members of the replcia
  4728. // set could have VSNs for files that we originated which are larger than
  4729. // the current VSN value we might now be using. This causes two problems:
  4730. // 1. It fouls up dampening checks when we send out local COs with
  4731. // VSNs that are too small in comparison to what we have sent out in
  4732. // the past resulting in dropped COs, and
  4733. // 2. When we VVJoin with our inbound partners and start receiving change
  4734. // orders that were originated from us in the past, they could arrive
  4735. // with VSNs that are larger than what we are now using. When these
  4736. // "VVJoin Change Orders" to thru VV retire our MasterVV entry in the
  4737. // VVretire version vector is advanced to this larger value. This
  4738. // will cause subsequent locally generated COs to be marked out of order
  4739. // since their VSN is now smaller than the value in the MasterVV entry.
  4740. // This will prevent downsream dampening problems but it could allow
  4741. // a local dir create / child file create to be reordered downstream
  4742. // (since both are marked out of order) and cause the child create to
  4743. // fail if the parent create hasn't occured yet.
  4744. //
  4745. // To deal with the above nonsense we will now use a GMT time value as
  4746. // our initial VSN. We will not join with a partner whose time is
  4747. // off by +/- MaxPartnerClockSkew. So if we start the VSN at
  4748. // GMT + 2*MaxPartnerClockSkew then even if the last CO we originated, before
  4749. // we lost the database, occurred at GMT+MaxPartnerClockSkew and now at
  4750. // restart our current time has moved back to GMT-MaxPartnerClockSkew then
  4751. // we will still join with our partner and our new starting VSN is:
  4752. // (GMT-MaxPartnerClockSkew) + 2*MaxPartnerClockSkew = GMT+MaxPartnerClockSkew
  4753. //
  4754. // This is as large as the last VSN we could have generated if the time
  4755. // between the last CO generated (the crash) and the time at recovery
  4756. // was zero.
  4757. //
  4758. GetSystemTimeAsFileTime((PFILETIME)&CurrentTime);
  4759. LOCK_VME(pVme);
  4760. if (CurrentTime < ConfigRecord->FrsVsn) {
  4761. //
  4762. // Note: This may not be an error situation since on every restart
  4763. // of the service we advance time by 2*MaxPartnerClockSkew to
  4764. // ensure monotonicity (see above) so any time we shutdown the
  4765. // service before we have run at least this amount of time it will
  4766. // appear that time has moved backwards.
  4767. //
  4768. DPRINT(1, ":S: WARNING: Setting FrsVsn - Current system Time has moved backwards from value in config record.\n");
  4769. FileTimeToString((PFILETIME) &CurrentTime, TimeStr);
  4770. DPRINT2(1, ":S: WARNING: CurrentTime is (%08x %08x) %s\n",
  4771. PRINTQUAD(CurrentTime), TimeStr);
  4772. FileTimeToString((PFILETIME) &ConfigRecord->FrsVsn, TimeStr);
  4773. DPRINT2(1, ":S: WARNING: ConfigRecord->FrsVsn is (%08x %08x) %s\n",
  4774. PRINTQUAD(ConfigRecord->FrsVsn), TimeStr);
  4775. CurrentTime = ConfigRecord->FrsVsn;
  4776. }
  4777. if ((CurrentTime + 2*MaxPartnerClockSkew) > pVme->FrsVsn) {
  4778. pVme->FrsVsn = CurrentTime + 2*MaxPartnerClockSkew;
  4779. DPRINT(3, ":S: Setting new pVme->FrsVsn to Current time + 2*MaxPartnerClockSkew\n");
  4780. }
  4781. FileTimeToString((PFILETIME) &pVme->FrsVsn, TimeStr);
  4782. DPRINT2(3, ":S: pVme->FrsVsn is (%08x %08x) %s\n", PRINTQUAD(pVme->FrsVsn), TimeStr);
  4783. if (GlobSeqNum == QUADZERO) {
  4784. //
  4785. // Init the global sequence number with the above computed VSN to keep
  4786. // it monotonically increasing.
  4787. //
  4788. EnterCriticalSection(&GlobSeqNumLock);
  4789. GlobSeqNum = pVme->FrsVsn;
  4790. LeaveCriticalSection(&GlobSeqNumLock);
  4791. }
  4792. UNLOCK_VME(pVme);
  4793. Replica->pVme = pVme;
  4794. return WStatus;
  4795. }
  4796. ULONG
  4797. JrnlPrepareService2(
  4798. IN PTHREAD_CTX ThreadCtx,
  4799. IN PREPLICA Replica
  4800. )
  4801. /*++
  4802. Routine Description:
  4803. Load the volume filter hash table with the DIRTable entries for
  4804. this replica set. Create the change order hash table for this replica
  4805. set and add the REPLICA struct to the replica list for this volume.
  4806. Enumerate through the IDTable and load the parent Fid Hash Table.
  4807. Note: This function is called from the DB Service thread since we have
  4808. to be able to pause the journal before the dir table enum can be done.
  4809. Arguments:
  4810. ThreadCtx -- ptr to the thread context (could be from journal or DB thread)
  4811. Replica - The replica set we are initializing.
  4812. Return Value:
  4813. A Win32 error status.
  4814. --*/
  4815. {
  4816. #undef DEBSUB
  4817. #define DEBSUB "JrnlPrepareService2:"
  4818. JET_ERR jerr, jerr1;
  4819. JET_TABLEID DIRTid;
  4820. CHAR DIRTableName[JET_cbNameMost];
  4821. PTABLE_CTX DIRTableCtx;
  4822. JET_TABLEID IDTid;
  4823. CHAR IDTableName[JET_cbNameMost];
  4824. PTABLE_CTX IDTableCtx;
  4825. PREPLICA_THREAD_CTX RtCtx;
  4826. PCONFIG_TABLE_RECORD ConfigRecord;
  4827. ULONG ReplicaNumber;
  4828. ULONG WStatus;
  4829. PVOLUME_MONITOR_ENTRY pVme;
  4830. JET_TABLEID FrsOpenTableSaveTid; // for FrsOpenTableMacro DEBUG
  4831. PFILTER_TABLE_ENTRY FilterEntry;
  4832. if (Replica == NULL) {
  4833. return ERROR_INVALID_PARAMETER;
  4834. }
  4835. DPRINT1(5, ":S: JrnlPrepareService2 for %ws\n", Replica->ReplicaName->Name);
  4836. ConfigRecord = (PCONFIG_TABLE_RECORD)Replica->ConfigTable.pDataRecord;
  4837. pVme = Replica->pVme;
  4838. //
  4839. // Allocate the replica thread context so we can get the directory
  4840. // filter table. Link it to the Replic context list head.
  4841. //
  4842. RtCtx = FrsAllocType(REPLICA_THREAD_TYPE);
  4843. FrsRtlInsertTailList(&Replica->ReplicaCtxListHead, &RtCtx->ReplicaCtxList);
  4844. ReplicaNumber = Replica->ReplicaNumber;
  4845. DIRTableCtx = &RtCtx->DIRTable;
  4846. //
  4847. // Open the DIR table.
  4848. //
  4849. jerr = DBS_OPEN_TABLE(ThreadCtx, DIRTableCtx, ReplicaNumber, DIRTableName, &DIRTid);
  4850. CLEANUP1_JS(0, "++ DBS_OPEN_TABLE (%s) error:", DIRTableName, jerr, RETURN_INV_DATA);
  4851. //
  4852. // Walk through the DirTable and load the data into the Volume Filter Table
  4853. // by calling JrnlInsertFilterEntry() for this Replica.
  4854. // The Replica points to the VME and the VME points to the
  4855. // volume filter table.
  4856. //
  4857. jerr = FrsEnumerateTable(ThreadCtx,
  4858. DIRTableCtx,
  4859. DFileGuidIndexx,
  4860. JrnlInsertFilterEntry,
  4861. Replica);
  4862. if ((jerr != JET_errNoCurrentRecord)) {
  4863. CLEANUP1_JS(0, "++ FrsEnumerateTable (%s) error:", DIRTableName, jerr, RETURN_INV_DATA);
  4864. }
  4865. //
  4866. // Now that all the entries are in place, walk through the hash table and
  4867. // construct the child lists for this ReplicaSet. This is done as a
  4868. // second pass since we can't be certain of the order in which the
  4869. // entries come from the database. First get the Child List Lock for the
  4870. // Replica Set.
  4871. //
  4872. JrnlAcquireChildLock(Replica);
  4873. WStatus = (ULONG)GhtEnumerateTable(pVme->FilterTable,
  4874. JrnlFilterLinkChildNoError,
  4875. Replica);
  4876. if (!WIN_SUCCESS(WStatus)) {
  4877. JrnlReleaseChildLock(Replica);
  4878. DPRINT_WS(0, "Error from JrnlLinkChildren", WStatus);
  4879. GHT_DUMP_TABLE(4, pVme->FilterTable);
  4880. goto RETURN;
  4881. }
  4882. // JrnlReleaseChildLock(Replica);
  4883. // GHT_DUMP_TABLE(5, pVme->FilterTable);
  4884. // JrnlAcquireChildLock(Replica);
  4885. //
  4886. // Go find the root entry for this Replica Set in the Filter Table.
  4887. //
  4888. FilterEntry = (PFILTER_TABLE_ENTRY) GhtEnumerateTable(pVme->FilterTable,
  4889. JrnlFilterGetRoot,
  4890. Replica);
  4891. if (FilterEntry == NULL) {
  4892. DPRINT1(0, ":S: Error from JrnlFilterGetRoot. No Root for %d\n",
  4893. Replica->ReplicaNumber);
  4894. GHT_DUMP_TABLE(5, pVme->FilterTable);
  4895. goto RETURN_INV_DATA;
  4896. }
  4897. //
  4898. // Replay the inbound log table and update the volume filter table with
  4899. // any directory changes.
  4900. //
  4901. // Note: Add code to replay the inbound log and update the filter table.
  4902. // It may be better to handle this at startup when we are recovering the
  4903. // staging areas. But, the filter table may not exist yet.
  4904. #if DBG
  4905. if (DoDebug(5, DEBSUB)) {
  4906. DPRINT(5," >>>>>>>>>>>>>>> Top Down dump of Filter Tree <<<<<<<<<<<<<<<<\n");
  4907. JrnlEnumerateFilterTreeTD(pVme->FilterTable,
  4908. FilterEntry,
  4909. JrnlSubTreePrint,
  4910. Replica);
  4911. }
  4912. #endif DBG
  4913. JrnlReleaseChildLock(Replica);
  4914. //
  4915. // Build the Parent directory table.
  4916. //
  4917. IDTableCtx = &RtCtx->IDTable;
  4918. //
  4919. // Open the ID table.
  4920. //
  4921. jerr = DBS_OPEN_TABLE(ThreadCtx, IDTableCtx, ReplicaNumber, IDTableName, &IDTid);
  4922. CLEANUP1_JS(0, "++ Building parent FID table (%s):", IDTableName, jerr, RETURN_INV_DATA);
  4923. //
  4924. // Walk through the IDTable and load the data into the Volume Parent Dir
  4925. // Table by calling JrnlInsertParentEntry() for this Replica.
  4926. // The Replica points to the VME and the VME points to the
  4927. // parent dir table.
  4928. //
  4929. jerr = FrsEnumerateTable(ThreadCtx,
  4930. IDTableCtx,
  4931. GuidIndexx,
  4932. JrnlInsertParentEntry,
  4933. Replica);
  4934. if ((jerr != JET_errNoCurrentRecord)) {
  4935. CLEANUP1_JS(0, "++ FrsEnumerateTable (%s) error:", IDTableName, jerr, RETURN_INV_DATA);
  4936. }
  4937. //
  4938. // Replay the inbound log table and update the volume Parent Dir table
  4939. // for any file creates, deletes or renames.
  4940. //
  4941. // Note: Add code to replay the inbound log and update the Parent Dir table.
  4942. // It may be better to handle this at startup when we are recovering the
  4943. // staging areas. But, the filter table may not exist yet.
  4944. //
  4945. // Add the replica struct to the list of replica sets served by this
  4946. // volume journal.
  4947. //
  4948. if (AcquireVmeRef(pVme) == 0) {
  4949. WStatus = ERROR_OPERATION_ABORTED;
  4950. goto RETURN;
  4951. }
  4952. /////////////////////////////////////////////////
  4953. //
  4954. // Start the first read on the volume. Check first if it is PAUSED and
  4955. // set state to starting. If this is the first replica set on the volume
  4956. // the state will be INITIALIZING and we leave that alone so additional
  4957. // journal buffers get allocated.
  4958. //
  4959. // pVme = Replica->pVme;
  4960. if (pVme->JournalState != JRNL_STATE_INITIALIZING) {
  4961. if (pVme->JournalState == JRNL_STATE_PAUSED) {
  4962. SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_STARTING);
  4963. } else {
  4964. DPRINT2(0, "++ ERROR - Journal for %ws is in an unexpected state: %s\n",
  4965. Replica->ReplicaName->Name, RSS_NAME(pVme->JournalState));
  4966. SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_ERROR);
  4967. WStatus = ERROR_OPERATION_ABORTED;
  4968. goto RETURN;
  4969. }
  4970. }
  4971. //
  4972. // Initialize the LastUsnRecordProcessed for this replica set to the value
  4973. // saved in the config record or the value from the Inlog record with the
  4974. // largest USN so we don't reprocess them. If we end up reading (replaying)
  4975. // the journal at an earlier point to let another replica set catch up we
  4976. // need to ignore those old records. If LastShutdown or FSVolLastUSN is 0
  4977. // then this is the very first time we have started replication on this
  4978. // replica set so set the FSVolLastUSN and LastUsnRecordProcessed to the
  4979. // current journal read point, pVme->JrnlReadPoint.
  4980. //
  4981. if ((ConfigRecord->LastShutdown == 0) ||
  4982. (ConfigRecord->FSVolLastUSN == 0)) {
  4983. if (!(ConfigRecord->ServiceState == CNF_SERVICE_STATE_CREATING)) {
  4984. DPRINT2(0, ":S: BETA ERROR - Service state is %d; not _CREATING for %ws\n",
  4985. ConfigRecord->ServiceState, Replica->ReplicaName->Name);
  4986. }
  4987. ConfigRecord->FSVolLastUSN = pVme->JrnlReadPoint;
  4988. Replica->LastUsnRecordProcessed = pVme->JrnlReadPoint;
  4989. DPRINT1(4, ":S: Replica->LastUsnRecordProcessed is: %08x %08x\n", PRINTQUAD(Replica->LastUsnRecordProcessed));
  4990. } else {
  4991. //
  4992. // Start where we left off and minimize with any other replicas.
  4993. //
  4994. Replica->LastUsnRecordProcessed = ConfigRecord->FSVolLastUSN;
  4995. DPRINT1(4, ":S: Replica->LastUsnRecordProcessed is: %08x %08x\n", PRINTQUAD(Replica->LastUsnRecordProcessed));
  4996. //
  4997. // Advance to largest USN of Inlog record.
  4998. //
  4999. if (Replica->JrnlRecoveryStart > Replica->LastUsnRecordProcessed) {
  5000. Replica->LastUsnRecordProcessed = Replica->JrnlRecoveryStart;
  5001. DPRINT1(4, ":S: Replica->LastUsnRecordProcessed is: %08x %08x (JrnlRecoveryStart > LastUsnRecordProcessed)\n",
  5002. PRINTQUAD(Replica->LastUsnRecordProcessed));
  5003. }
  5004. //
  5005. // start at the earliest USN of any replica set on the volume.
  5006. // If the journal is active it is currently using JrnlReadPoint to
  5007. // track its current read point. Since we may be starting a replica
  5008. // set on an active volume ReplayUsn is used to save the starting
  5009. // point. After the volume is paused and then unpaused ReplayUsn
  5010. // is copied to JrnlReadPoint where the journal will start reading.
  5011. //
  5012. if (pVme->ReplayUsnValid) {
  5013. DPRINT1(4, ":S: ReplayUsn was: %08x %08x\n", PRINTQUAD(pVme->ReplayUsn));
  5014. pVme->ReplayUsn = min(Replica->LastUsnRecordProcessed, pVme->ReplayUsn);
  5015. } else {
  5016. DPRINT(4, ":S: No ReplayUsn was active.\n");
  5017. pVme->ReplayUsn = Replica->LastUsnRecordProcessed;
  5018. pVme->ReplayUsnValid = TRUE;
  5019. }
  5020. DPRINT1(4, ":S: ReplayUsn is: %08x %08x\n", PRINTQUAD(pVme->ReplayUsn));
  5021. }
  5022. //
  5023. // Init the inlog commit point so if we shutdown the saved value is correct.
  5024. //
  5025. Replica->InlogCommitUsn = Replica->LastUsnRecordProcessed;
  5026. DPRINT1(4, ":S: Replica->InlogCommitUsn: %08x %08x\n",
  5027. PRINTQUAD(Replica->InlogCommitUsn));
  5028. //
  5029. // Track the oldest USN save point and the most recent USN progress point
  5030. // for any replica set on the volume.
  5031. //
  5032. if ((pVme->LastUsnSavePoint == (USN)0) ||
  5033. (pVme->LastUsnSavePoint > Replica->LastUsnRecordProcessed)) {
  5034. pVme->LastUsnSavePoint = Replica->LastUsnRecordProcessed;
  5035. }
  5036. if (pVme->MonitorMaxProgressUsn < Replica->LastUsnRecordProcessed) {
  5037. pVme->MonitorMaxProgressUsn = Replica->LastUsnRecordProcessed;
  5038. }
  5039. //
  5040. // This replica's FrsVsn may be out of date by a large margin
  5041. // if it has been awhile since the set was last started successfully.
  5042. // This results in an assert in DbsReplicaSaveMark(). So, as
  5043. // long as the FrsVsns look sane, assign the volume's current
  5044. // Vsn to the replica set.
  5045. //
  5046. FRS_ASSERT(pVme->FrsVsn >= ConfigRecord->FrsVsn);
  5047. ConfigRecord->FrsVsn = pVme->FrsVsn;
  5048. /////////////////////////////////////////////////
  5049. InitializeListHead(&Replica->RecoveryRefreshList);
  5050. InterlockedIncrement(&Replica->ReferenceCount);
  5051. pVme->ActiveReplicas += 1;
  5052. FrsRtlInsertTailList(&pVme->ReplicaListHead, &Replica->VolReplicaList);
  5053. WStatus = ERROR_SUCCESS;
  5054. RETURN:
  5055. //
  5056. // Close the replica tables and release the RtCtx struct.
  5057. //
  5058. DbsFreeRtCtx(ThreadCtx, Replica, RtCtx, TRUE);
  5059. return WStatus;
  5060. RETURN_INV_DATA:
  5061. DbsFreeRtCtx(ThreadCtx, Replica, RtCtx, TRUE);
  5062. return ERROR_INVALID_DATA;
  5063. }
  5064. JET_ERR
  5065. JrnlInsertParentEntry(
  5066. IN PTHREAD_CTX ThreadCtx,
  5067. IN PTABLE_CTX TableCtx,
  5068. IN PVOID Record,
  5069. IN PVOID Context
  5070. )
  5071. /*++
  5072. Routine Description:
  5073. This is a worker function passed to FrsEnumerateTable(). Each time
  5074. it is called with an IDTable record it save the parent info in the
  5075. Parent Directory Table for the volume.
  5076. Arguments:
  5077. ThreadCtx - Needed to access Jet.
  5078. TableCtx - A ptr to an IDTable context struct.
  5079. Record - A ptr to a IDTable record.
  5080. Context - A ptr to a Replica struct.
  5081. Thread Return Value:
  5082. A Jet error status. Success means call us with the next record.
  5083. Failure means don't call again and pass our status back to the
  5084. caller of FrsEnumerateTable().
  5085. --*/
  5086. {
  5087. #undef DEBSUB
  5088. #define DEBSUB "JrnlInsertParentEntry:"
  5089. ULONGLONG SystemTime;
  5090. ULONGLONG ExpireTime;
  5091. JET_ERR jerr;
  5092. ULONG GStatus;
  5093. PIDTABLE_RECORD IDTableRec = (PIDTABLE_RECORD) Record ;
  5094. PQHASH_TABLE HashTable = ((PREPLICA) Context)->pVme->ParentFidTable;
  5095. //
  5096. // Check for expired tombstones.
  5097. //
  5098. if (IsIdRecFlagSet(IDTableRec, IDREC_FLAGS_DELETED)) {
  5099. GetSystemTimeAsFileTime((PFILETIME)&SystemTime);
  5100. COPY_TIME(&ExpireTime, &IDTableRec->TombStoneGC);
  5101. if ((ExpireTime < SystemTime) && (ExpireTime != QUADZERO)) {
  5102. //
  5103. // IDTable record has expired. Delete it.
  5104. // If there is a problem, complain but keep going.
  5105. //
  5106. jerr = DbsDeleteTableRecord(TableCtx);
  5107. DPRINT_JS(0, "ERROR - DbsDeleteTableRecord :", jerr);
  5108. return JET_errSuccess;
  5109. }
  5110. }
  5111. //
  5112. // Include the entry if replication is enabled and not marked for deletion
  5113. // and not a new file being created when we last shutdown.
  5114. //
  5115. if (IDTableRec->ReplEnabled &&
  5116. !IsIdRecFlagSet(IDTableRec, IDREC_FLAGS_DELETED) &&
  5117. !IsIdRecFlagSet(IDTableRec, IDREC_FLAGS_NEW_FILE_IN_PROGRESS)) {
  5118. if (IDTableRec->FileID == ZERO_FID) {
  5119. //
  5120. // We shouldn't see any records with a zero FID but some prior
  5121. // bugs could cause this to happen. Dump em out but don't try
  5122. // to insert into table since it will assert.
  5123. //
  5124. DPRINT(0, "++ WARNING -- IDTable record with zero FID found.\n");
  5125. DBS_DISPLAY_RECORD_SEV(0, TableCtx, TRUE);
  5126. } else {
  5127. GStatus = QHashInsert(HashTable,
  5128. &IDTableRec->FileID,
  5129. &IDTableRec->ParentFileID,
  5130. ((PREPLICA) Context)->ReplicaNumber,
  5131. FALSE);
  5132. if (GStatus != GHT_STATUS_SUCCESS ) {
  5133. DPRINT1(0, "++ QHashInsert error: %d\n", GStatus);
  5134. }
  5135. }
  5136. }
  5137. //
  5138. // Return success so we can keep going thru the ID table.
  5139. //
  5140. return JET_errSuccess;
  5141. }
  5142. ULONG_PTR
  5143. JrnlFilterLinkChild (
  5144. PGENERIC_HASH_TABLE Table,
  5145. PVOID Buffer,
  5146. PVOID Context
  5147. )
  5148. /*++
  5149. Routine Description:
  5150. This function is called thru GhtEnumerateTable() to connect this
  5151. filter table entry to the parent list for the replica set passed in
  5152. Context. The GhtEnumerateTable function does not acquire any row locks
  5153. so this function is free to call GhtLookup or GhtInsert without deadlock
  5154. conflicts. It is assumed that the caller knows that it is safe to
  5155. enumerate the table. The caller is also responsible for getting the
  5156. child list lock for the replica set before calling GhtEnumerateTable().
  5157. The child list lock is associated with the replica set so when you have
  5158. the lock the child list entries for all filter entries in this replica
  5159. set are protected. When we enumerate down a subtree we only need to get
  5160. one lock.
  5161. WARNING - There is no table level lock on the Filter Table. The Filter
  5162. table is per volume so multiple replica sets could be using the same
  5163. table. The locking is at the row level where the row is indexed by
  5164. the hash function. This means that this function can only be used
  5165. when the Journal is paused. To start/add a replica set after the
  5166. system is running you must pause the journal, update the filter table
  5167. and then unpause the journal.
  5168. Arguments:
  5169. Table - the hash table being enumerated (to lookup parent entry).
  5170. Buffer - a ptr to a FILTER_TABLE_ENTRY
  5171. Context - A pointer to the Replica struct for the replica data added to the
  5172. table.
  5173. Return Value:
  5174. A Win32 error status. A failure status return aborts enumeration.
  5175. --*/
  5176. {
  5177. #undef DEBSUB
  5178. #define DEBSUB "JrnlFilterLinkChild:"
  5179. PFILTER_TABLE_ENTRY FilterEntry = (PFILTER_TABLE_ENTRY) Buffer;
  5180. PREPLICA Replica = (PREPLICA) Context;
  5181. PFILTER_TABLE_ENTRY ParentFilterEntry;
  5182. ULONG GStatus;
  5183. //
  5184. // Skip entry if it is not associated with the replica set of interest.
  5185. //
  5186. if (FilterEntry->Replica != Replica) {
  5187. return ERROR_SUCCESS;
  5188. }
  5189. //
  5190. // If this is the root of the replica tree there is no parent to link it to.
  5191. //
  5192. if (FilterEntry->DParentFileID == ZERO_FID) {
  5193. return ERROR_SUCCESS;
  5194. }
  5195. //
  5196. // If this entry has already been linked then return an error status to
  5197. // abort the enumeration since the entry can't be on more than one list.
  5198. //
  5199. if (FilterEntry->ChildEntry.Flink != NULL) {
  5200. return ERROR_GEN_FAILURE;
  5201. }
  5202. //
  5203. // Find the parent to link this child to.
  5204. //
  5205. GStatus = GhtLookup(Table,
  5206. &FilterEntry->DParentFileID,
  5207. TRUE,
  5208. &ParentFilterEntry);
  5209. if (GStatus != GHT_STATUS_SUCCESS) {
  5210. DPRINT1(0, "++ Error: Parent entry not found for - %08x\n", FilterEntry);
  5211. FRS_JOURNAL_FILTER_PRINT(0, Table, FilterEntry);
  5212. return ERROR_GEN_FAILURE;
  5213. }
  5214. //
  5215. // Put the Dir on the list and drop the ref count we got from Lookup.
  5216. //
  5217. InsertHeadList(&ParentFilterEntry->ChildHead, &FilterEntry->ChildEntry);
  5218. GhtDereferenceEntryByAddress(Table, ParentFilterEntry, TRUE);
  5219. return ERROR_SUCCESS;
  5220. }
  5221. ULONG_PTR
  5222. JrnlFilterLinkChildNoError(
  5223. PGENERIC_HASH_TABLE Table,
  5224. PVOID Buffer,
  5225. PVOID Context
  5226. )
  5227. /*++
  5228. Routine Description:
  5229. See JrnlFilterLinkChild().
  5230. A dirtable entry may appear to be orphaned if it is stuck in the
  5231. preinstall area and its parent has been deleted. Ignore errors
  5232. for now.
  5233. This can also happen if a remote co create is executed for a dir at the
  5234. same time the subtree containing this dir is being moved out of the
  5235. replica tree. The journal code will remove the filter entries immediately
  5236. so we skip future file changes in the subtree. So the parent is gone when
  5237. the filter entry for the dir create is added. In the course of processing
  5238. the moveout on the parent this dir entry is cleaned up.
  5239. Arguments:
  5240. Table - the hash table being enumerated (to lookup parent entry).
  5241. Buffer - a ptr to a FILTER_TABLE_ENTRY
  5242. Context - A pointer to the Replica struct for the replica data added to the
  5243. table.
  5244. Return Value:
  5245. ERROR_SUCCESS
  5246. --*/
  5247. {
  5248. #undef DEBSUB
  5249. #define DEBSUB "JrnlFilterLinkChildNoError:"
  5250. ULONG WStatus;
  5251. WStatus = (ULONG)JrnlFilterLinkChild(Table, Buffer, Context);
  5252. DPRINT_WS(0, "++ WARN - orphaned dir; probably stuck in preinstall with deleted parent", WStatus);
  5253. return ERROR_SUCCESS;
  5254. }
  5255. ULONG
  5256. JrnlFilterUnlinkChild (
  5257. PGENERIC_HASH_TABLE Table,
  5258. PVOID Buffer,
  5259. PVOID Context
  5260. )
  5261. /*++
  5262. Routine Description:
  5263. This function is unlinks a filter entry from the child list.
  5264. The caller must get the child list lock for the replica set.
  5265. The child list lock is associated with the replica set so when you have
  5266. the lock the child list entries for all filter entries in this replica
  5267. set are protected. When we enumerate down a subtree we only need to get
  5268. one lock.
  5269. Arguments:
  5270. Table - the hash table being enumerated (to lookup parent entry).
  5271. Buffer - a ptr to a FILTER_TABLE_ENTRY
  5272. Context - A pointer to the Replica struct for the replica data added to the
  5273. table.
  5274. Return Value:
  5275. A Win32 error status. A failure status return aborts enumeration.
  5276. --*/
  5277. {
  5278. #undef DEBSUB
  5279. #define DEBSUB "JrnlFilterUnlinkChild:"
  5280. PFILTER_TABLE_ENTRY FilterEntry = (PFILTER_TABLE_ENTRY) Buffer;
  5281. PREPLICA Replica = (PREPLICA) Context;
  5282. PFILTER_TABLE_ENTRY ParentFilterEntry;
  5283. ULONG GStatus;
  5284. //
  5285. // Skip entry if it is not associated with the replica set of interest.
  5286. // Return error_success so this function can be called by GhtEnumerateTable().
  5287. //
  5288. if (FilterEntry->Replica != Replica) {
  5289. return ERROR_SUCCESS;
  5290. }
  5291. //
  5292. // If this entry is not on the list then return an error status to
  5293. // abort the enumeration.
  5294. //
  5295. if (FilterEntry->ChildEntry.Flink == NULL) {
  5296. return ERROR_GEN_FAILURE;
  5297. }
  5298. //
  5299. // Pull the entry off the list.
  5300. //
  5301. FrsRemoveEntryList(&FilterEntry->ChildEntry);
  5302. FilterEntry->ChildEntry.Flink = NULL;
  5303. FilterEntry->ChildEntry.Blink = NULL;
  5304. return ERROR_SUCCESS;
  5305. }
  5306. ULONG_PTR
  5307. JrnlFilterGetRoot (
  5308. PGENERIC_HASH_TABLE Table,
  5309. PVOID Buffer,
  5310. PVOID Context
  5311. )
  5312. /*++
  5313. Routine Description:
  5314. This function is called thru GhtEnumerateTable() to find the root
  5315. of the replica set specified by the Context parameter.
  5316. Arguments:
  5317. Table - the hash table being enumerated (to lookup parent entry).
  5318. Buffer - a ptr to a FILTER_TABLE_ENTRY
  5319. Context - A pointer to the Replica struct for the replica data added to the
  5320. table.
  5321. Return Value:
  5322. The root filter entry for the Replica Set, else NULL to keep looking.
  5323. --*/
  5324. {
  5325. #undef DEBSUB
  5326. #define DEBSUB "JrnlFilterGetRoot:"
  5327. PFILTER_TABLE_ENTRY FilterEntry = (PFILTER_TABLE_ENTRY) Buffer;
  5328. PREPLICA Replica = (PREPLICA) Context;
  5329. //
  5330. // Skip entry if it is not associated with the replica set of interest.
  5331. //
  5332. if (FilterEntry->Replica != Replica) {
  5333. return (ULONG_PTR)NULL;
  5334. }
  5335. //
  5336. // If this is the root of the replica tree we're done.
  5337. //
  5338. if (FilterEntry->DParentFileID == ZERO_FID) {
  5339. return (ULONG_PTR)FilterEntry;
  5340. }
  5341. return (ULONG_PTR)NULL;
  5342. }
  5343. ULONG
  5344. JrnlSubTreePrint (
  5345. PGENERIC_HASH_TABLE Table,
  5346. PVOID Buffer,
  5347. PVOID Context
  5348. )
  5349. /*++
  5350. Routine Description:
  5351. This function is called thru GhtEnumerateTable() to dump a Filter entry.
  5352. The enum caller takes a ref on the entry. we drop it here.
  5353. Arguments:
  5354. Table - the hash table being enumerated (to lookup parent entry).
  5355. Buffer - a ptr to a FILTER_TABLE_ENTRY
  5356. Context - A pointer to the Replica struct for the replica data added to the
  5357. table.
  5358. Return Value:
  5359. Win32 status
  5360. --*/
  5361. {
  5362. #undef DEBSUB
  5363. #define DEBSUB "JrnlSubTreePrint:"
  5364. PFILTER_TABLE_ENTRY FilterEntry = (PFILTER_TABLE_ENTRY) Buffer;
  5365. PREPLICA Replica = (PREPLICA) Context;
  5366. //
  5367. // print the entry if it is associated with the replica set of interest.
  5368. //
  5369. if (FilterEntry->Replica == Replica) {
  5370. FRS_JOURNAL_FILTER_PRINT(4, Table, FilterEntry);
  5371. }
  5372. DECREMENT_FILTER_REF_COUNT(FilterEntry);
  5373. return ERROR_SUCCESS;
  5374. }
  5375. BOOL
  5376. ActiveChildrenKeyMatch(
  5377. PVOID Buf,
  5378. PVOID QKey
  5379. )
  5380. /*++
  5381. Routine Description:
  5382. Check for an exact key match.
  5383. Arguments:
  5384. Buf -- ptr to a Guid1.
  5385. QKey -- ptr to Guid2.
  5386. Return Value:
  5387. TRUE if exact match.
  5388. --*/
  5389. {
  5390. #undef DEBSUB
  5391. #define DEBSUB "ActiveChildrenKeyMatch:"
  5392. PULONG pUL1, pUL2;
  5393. pUL1 = (PULONG) Buf;
  5394. pUL2 = (PULONG) QKey;
  5395. if (!ValueIsMultOf4(pUL1)) {
  5396. DPRINT2(0, "ERROR - Unaligned key value - addr: %08x, Data: %08x\n", pUL1, *pUL1);
  5397. FRS_ASSERT(ValueIsMultOf4(pUL1));
  5398. return 0xFFFFFFFF;
  5399. }
  5400. if (!ValueIsMultOf4(pUL2)) {
  5401. DPRINT2(0, "ERROR - Unaligned key value - addr: %08x, Data: %08x\n", pUL2, *pUL2);
  5402. FRS_ASSERT(ValueIsMultOf4(pUL2));
  5403. return 0xFFFFFFFF;
  5404. }
  5405. return GUIDS_EQUAL(pUL1, pUL2);
  5406. }
  5407. ULONG
  5408. ActiveChildrenHashCalc(
  5409. PVOID Buf,
  5410. PULONGLONG QKey
  5411. )
  5412. /*++
  5413. Routine Description:
  5414. Calculate a hash value for the file guid used in the ActiveChildren Table.
  5415. Arguments:
  5416. Buf -- ptr to a Guid.
  5417. QKey -- Returned 8 byte hash key for the QKey field of QHASH_ENTRY.
  5418. Return Value:
  5419. 32 bit hash value.
  5420. --*/
  5421. {
  5422. #undef DEBSUB
  5423. #define DEBSUB "ActiveChildrenHashCalc:"
  5424. PULONG pUL = (PULONG) Buf;
  5425. PUSHORT pUS = (PUSHORT) Buf;
  5426. if (!ValueIsMultOf4(pUL)) {
  5427. DPRINT2(0, "ERROR - Unaligned key value - addr: %08x, Data: %08x\n", pUL, *pUL);
  5428. FRS_ASSERT(ValueIsMultOf4(pUL));
  5429. return 0xFFFFFFFF;
  5430. }
  5431. //
  5432. // Calc QKey, 4 byte hash is ok.
  5433. //
  5434. *QKey = (ULONGLONG) (pUL[0] ^ pUL[1] ^ pUL[2] ^ pUL[3]);
  5435. //
  5436. // Calc hash based on the time. Include node part for remote COs.
  5437. //
  5438. return (ULONG) (pUS[0] ^ pUS[1] ^ pUS[2] ^ pUS[6] ^ pUS[7]);
  5439. }
  5440. ULONG
  5441. JrnlOpen(
  5442. IN PREPLICA Replica,
  5443. OUT PVOLUME_MONITOR_ENTRY *pVmeArg,
  5444. PCONFIG_TABLE_RECORD ConfigRecord
  5445. )
  5446. /*++
  5447. Routine Description:
  5448. This routine opens the journal specified by the Replica->Volume parameter.
  5449. It creates and fills in a Volume monitor entry that can
  5450. be used to read the NTFS Journal. It checks if objects and object IDs
  5451. are supported on the volume and fails if they aren't. It checks for an
  5452. object ID on the root directory of the volume and puts one there if necessary.
  5453. It keeps a list of volumes (VolumeMonitorQueue) that currently have journal
  5454. files open. If it finds this request in the list it bumps the ref count
  5455. and returns. pVme is set to NULL with status success indicating I/O
  5456. on the journal is proceeding.
  5457. If this volume is not in the list then it is added. The volume Object ID
  5458. is used to identify the volume in the Volume Monitor list. A read
  5459. is not posted to the journal at this time. This allows journal opens for
  5460. other replica sets to be done so we start out at the lowest USN of all
  5461. replica sets hosted by a given volume. In addition we need to know about
  5462. all current replica sets when we start filtering journal entries.
  5463. The volume monitor entry related to to the given replica set is
  5464. returned in pVme. If we fail to open the journal pVmeArg is NULL
  5465. and status indicates the failure.
  5466. If the journal doesn't exist it is created. The max size is set to
  5467. JRNL_DEFAULT_MAX_SIZE MB with an allocation size of
  5468. JRNL_DEFAULT_ALLOC_DELTA MB.
  5469. The following checks are made to make sure that the volume and journal
  5470. info is not changed while the service was not running.
  5471. VOLUME ROOT OBJECTID MISMATCH CHECK:
  5472. In case of a mismatch the information in the Db is updated with the
  5473. correct value for the volume guid.
  5474. JOURNAL ID MISMATCH CHECK:
  5475. In case of a mismatch the replica set is marked to be deleted.
  5476. Arguments:
  5477. Replica: Replica being opened
  5478. pVmeArg: A pointer to return the Volume Monitor Entry in.
  5479. ConfigRecord: The ConfigTqable record for this replica set.
  5480. Return Value:
  5481. Win32 status
  5482. --*/
  5483. {
  5484. #undef DEBSUB
  5485. #define DEBSUB "JrnlOpen:"
  5486. USN_JOURNAL_DATA UsnJournalData;
  5487. CREATE_USN_JOURNAL_DATA CreateUsnJournalData = {
  5488. 0, // MaximumSize from registry
  5489. JRNL_DEFAULT_ALLOC_DELTA // AllocationDelta
  5490. };
  5491. IO_STATUS_BLOCK Iosb;
  5492. ULONG JournalSize;
  5493. NTSTATUS Status;
  5494. DWORD WStatus;
  5495. ULONG BytesReturned;
  5496. PVOLUME_MONITOR_ENTRY pVme;
  5497. HANDLE RootHandle;
  5498. HANDLE VolumeHandle = INVALID_HANDLE_VALUE;
  5499. ULONG VolumeInfoLength;
  5500. PFILE_FS_VOLUME_INFORMATION VolumeInfo;
  5501. FILE_OBJECTID_BUFFER ObjectIdBuffer;
  5502. PLIST_ENTRY Entry;
  5503. WCHAR VolumeRootDir[MAX_PATH + 1];
  5504. CHAR GuidStr[GUID_CHAR_LEN];
  5505. CHAR TimeString[32];
  5506. CHAR HashTableName[40];
  5507. PCOMMAND_PACKET CmdPkt = NULL;
  5508. HANDLE DummyHandle = INVALID_HANDLE_VALUE;
  5509. *pVmeArg = NULL;
  5510. //
  5511. // Does the volume exist and is it NTFS?
  5512. //
  5513. WStatus = FrsVerifyVolume(Replica->Volume,
  5514. Replica->SetName->Name,
  5515. FILE_PERSISTENT_ACLS | FILE_SUPPORTS_OBJECT_IDS);
  5516. if (!WIN_SUCCESS(WStatus)) {
  5517. DPRINT2_WS(3, ":S: JrnlOpen - Root path Volume (%ws) for %ws does not exist or is not NTFS;",
  5518. Replica->Volume, Replica->SetName->Name, WStatus);
  5519. Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
  5520. return WStatus;
  5521. }
  5522. //
  5523. // "\\.\" is used as an escape prefix to prevent the name translator
  5524. // from appending a trailing "\" on a drive letter. Need to do a volume open.
  5525. // \\.\E: gets mapped to E: (really an NT internal device name)
  5526. // \\.\E:\ gets mapped to E:\
  5527. // E: gets mapped to E:\
  5528. // E:\ gets mapped to E:\
  5529. //
  5530. //
  5531. // Get a volume handle.
  5532. //
  5533. _wcsupr( Replica->Volume );
  5534. VolumeHandle = CreateFile(Replica->Volume,
  5535. GENERIC_READ | GENERIC_WRITE,
  5536. FILE_SHARE_READ | FILE_SHARE_WRITE,
  5537. NULL,
  5538. OPEN_EXISTING,
  5539. FILE_ATTRIBUTE_NORMAL,
  5540. NULL );
  5541. if (!HANDLE_IS_VALID(VolumeHandle)) {
  5542. WStatus = GetLastError();
  5543. DPRINT1_WS(0, "++ ERROR - JrnlOpen: Unable to open %ws volume :",
  5544. Replica->Volume, WStatus);
  5545. Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
  5546. return WStatus;
  5547. } else {
  5548. WStatus = GetLastError();
  5549. DPRINT1_WS(4, "++ JrnlOpen: Open on volume %ws :", Replica->Volume, WStatus);
  5550. }
  5551. //
  5552. // Get the volume information.
  5553. //
  5554. pVme = FrsAllocType(VOLUME_MONITOR_ENTRY_TYPE);
  5555. pVme->FrsVsn = QUADZERO;
  5556. pVme->ReplayUsnValid = FALSE;
  5557. VolumeInfoLength = sizeof(FILE_FS_VOLUME_INFORMATION) +
  5558. MAXIMUM_VOLUME_LABEL_LENGTH;
  5559. VolumeInfo = &pVme->FSVolInfo;
  5560. Status = NtQueryVolumeInformationFile(VolumeHandle,
  5561. &Iosb,
  5562. VolumeInfo,
  5563. VolumeInfoLength,
  5564. FileFsVolumeInformation);
  5565. if ( NT_SUCCESS(Status) ) {
  5566. VolumeInfo->VolumeLabel[VolumeInfo->VolumeLabelLength/2] = UNICODE_NULL;
  5567. FileTimeToString((PFILETIME) &VolumeInfo->VolumeCreationTime, TimeString);
  5568. DPRINT5(4,":S: %-16ws (%d), %s, VSN: %08X, VolCreTim: %s\n",
  5569. VolumeInfo->VolumeLabel,
  5570. VolumeInfo->VolumeLabelLength,
  5571. (VolumeInfo->SupportsObjects ? "(obj)" : "(no-obj)"),
  5572. VolumeInfo->VolumeSerialNumber,
  5573. TimeString);
  5574. if (!VolumeInfo->SupportsObjects) {
  5575. //
  5576. // No object support on the volume.
  5577. //
  5578. EPRINT4(EVENT_FRS_VOLUME_NOT_SUPPORTED,
  5579. Replica->SetName->Name, ComputerName, Replica->Root, Replica->Volume);
  5580. DPRINT(0, ":S: ERROR - Object IDs are not supported on the volume.\n");
  5581. pVme = FrsFreeType(pVme);
  5582. FRS_CLOSE(VolumeHandle);
  5583. Replica->FStatus = FrsErrorUnsupportedFileSystem;
  5584. return FrsSetLastNTError(STATUS_NOT_IMPLEMENTED);
  5585. }
  5586. //
  5587. // Scan the VolumeMonitorStopQueue to see if we already tried
  5588. // this one and failed.
  5589. //
  5590. ForEachListEntry( &VolumeMonitorStopQueue, VOLUME_MONITOR_ENTRY, ListEntry,
  5591. if (pE->FSVolInfo.VolumeSerialNumber == VolumeInfo->VolumeSerialNumber) {
  5592. //
  5593. // Journaling was stopped on this volume by request. E.g.,
  5594. // when a replica set is stopped and restarted in order
  5595. // to pick up a new file or dir filter list.
  5596. //
  5597. // Allow the restart.
  5598. //
  5599. if (WIN_SUCCESS(pE->WStatus)) {
  5600. //
  5601. // No more references; free the memory
  5602. //
  5603. //
  5604. // Currently, replica sets continue to refererence
  5605. // their Vme even after VmeDeactivate(). So don't
  5606. // free Vmes regardless of their reference count
  5607. //
  5608. // if (pE->ReferenceCount == 0) {
  5609. // FrsRtlRemoveEntryQueueLock(&VolumeMonitorStopQueue,
  5610. // &pE->ListEntry);
  5611. // FrsFreeType(pE);
  5612. // }
  5613. continue;
  5614. }
  5615. //
  5616. // We already tried this one and failed. Free the entry,
  5617. // close the handle and return with same status as last time.
  5618. //
  5619. WStatus = pE->WStatus;
  5620. ReleaseListLock(&VolumeMonitorStopQueue);
  5621. DPRINT3(4,":S: VME is on stop queue. %-16ws, VSN: %08X, VolCreTim: %s\n",
  5622. VolumeInfo->VolumeLabel, VolumeInfo->VolumeSerialNumber,
  5623. TimeString);
  5624. FrsFreeType(pVme);
  5625. FRS_CLOSE(VolumeHandle);
  5626. return WStatus;
  5627. }
  5628. );
  5629. } else {
  5630. DPRINT_NT(0, ":S: ERROR - Volume root QueryVolumeInformationFile failed.", Status);
  5631. pVme = FrsFreeType(pVme);
  5632. FRS_CLOSE(VolumeHandle);
  5633. Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
  5634. return FrsSetLastNTError(Status);
  5635. }
  5636. //
  5637. // Get the volume root dir object ID.
  5638. // Always open the replica root by masking off the FILE_OPEN_REPARSE_POINT flag
  5639. // because we want to open the destination dir not the junction if the root
  5640. // happens to be a mount point.
  5641. //
  5642. wsprintf( VolumeRootDir, TEXT("%ws\\"), Replica->Volume);
  5643. WStatus = FrsOpenSourceFileW(&RootHandle,
  5644. VolumeRootDir,
  5645. WRITE_ACCESS, OPEN_OPTIONS & ~FILE_OPEN_REPARSE_POINT);
  5646. if (WIN_ACCESS_DENIED(WStatus)) {
  5647. //
  5648. // For some mysterious reason the root dir on some volumes ends up
  5649. // with the read-only attribute set. It is currently not understood
  5650. // how this happens (as of 6/2000) but PSS has seen it on a number
  5651. // of cases, generally when DCPromo fails because FRS can't init
  5652. // the sys vol. We are going to just clear it here and try again.
  5653. // Unfortunately the ATTRIB cmd does not work on the root dir.
  5654. //
  5655. FILE_BASIC_INFORMATION BasicInfo;
  5656. HANDLE hFile;
  5657. WStatus = FrsOpenSourceFileW(&hFile,
  5658. VolumeRootDir,
  5659. READ_ATTRIB_ACCESS | FILE_WRITE_ATTRIBUTES,
  5660. OPEN_OPTIONS & ~FILE_OPEN_REPARSE_POINT);
  5661. DPRINT1_WS(0, "++ JrnlOpen: Open on root dir %ws :", VolumeRootDir, WStatus);
  5662. if (HANDLE_IS_VALID(hFile)) {
  5663. Status = NtQueryInformationFile( hFile,
  5664. &Iosb,
  5665. &BasicInfo,
  5666. sizeof( BasicInfo ),
  5667. FileBasicInformation );
  5668. if (NT_SUCCESS( Status )) {
  5669. DPRINT2(0,"Attributes for %s are currently: %0x\n",
  5670. VolumeRootDir, BasicInfo.FileAttributes );
  5671. if (BooleanFlagOn(BasicInfo.FileAttributes , FILE_ATTRIBUTE_READONLY)) {
  5672. ClearFlag(BasicInfo.FileAttributes , FILE_ATTRIBUTE_READONLY);
  5673. Status = NtSetInformationFile( hFile,
  5674. &Iosb,
  5675. &BasicInfo,
  5676. sizeof( BasicInfo ),
  5677. FileBasicInformation );
  5678. if (NT_SUCCESS( Status )) {
  5679. DPRINT(0, "Read-Only attribute cleared succesfully\n" );
  5680. //
  5681. // ******** Add event log message saying what we did.
  5682. //
  5683. } else {
  5684. DPRINT_NT(0, "Couldn't set attributes, error status :", Status );
  5685. }
  5686. }
  5687. CloseHandle( hFile );
  5688. //
  5689. // Now retry the open.
  5690. //
  5691. WStatus = FrsOpenSourceFileW(&RootHandle,
  5692. VolumeRootDir,
  5693. WRITE_ACCESS, OPEN_OPTIONS & ~FILE_OPEN_REPARSE_POINT);
  5694. } else {
  5695. DPRINT_NT(0, "Couldn't get attributes, error status :", Status );
  5696. WStatus = FrsSetLastNTError(Status);
  5697. CloseHandle( hFile );
  5698. }
  5699. }
  5700. }
  5701. if (!WIN_SUCCESS(WStatus)) {
  5702. DPRINT1_WS(0, ":S: ERROR - Failed to open the volume root dir: %ws ;",
  5703. VolumeRootDir, WStatus);
  5704. pVme = FrsFreeType(pVme);
  5705. FRS_CLOSE(VolumeHandle);
  5706. Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
  5707. return WStatus;
  5708. }
  5709. //
  5710. // zero the buffer in case the data that comes back is short.
  5711. //
  5712. ZeroMemory(&ObjectIdBuffer, sizeof(FILE_OBJECTID_BUFFER));
  5713. //
  5714. // Get the Object ID from the volume root.
  5715. //
  5716. Status = NtFsControlFile(
  5717. RootHandle, // file handle
  5718. NULL, // event
  5719. NULL, // apc routine
  5720. NULL, // apc context
  5721. &Iosb, // iosb
  5722. FSCTL_GET_OBJECT_ID, // FsControlCode
  5723. &RootHandle, // input buffer
  5724. sizeof(HANDLE), // input buffer length
  5725. &ObjectIdBuffer, // OutputBuffer for data from the FS
  5726. sizeof(FILE_OBJECTID_BUFFER)); // OutputBuffer Length
  5727. if (NT_SUCCESS(Status)) {
  5728. GuidToStr((GUID *)ObjectIdBuffer.ObjectId, GuidStr);
  5729. DPRINT1(4, ":S: Oid for volume root is %s\n", GuidStr );
  5730. } else
  5731. if (Status == STATUS_NOT_IMPLEMENTED) {
  5732. DPRINT1_NT(0, ":S: ERROR - FSCTL_GET_OBJECT_ID failed on file %ws. Object IDs are not enabled on the volume.\n",
  5733. VolumeRootDir, Status);
  5734. Replica->FStatus = FrsErrorUnsupportedFileSystem;
  5735. }
  5736. //
  5737. // If there is no object ID on the root directory put one there.
  5738. // Date : 02/07/2000
  5739. // STATUS_OBJECT_NAME_NOT_FOUND was the old return value
  5740. // and STATUS_OBJECTID_NOT_FOUND is the new return value.
  5741. // Check for both so it works on systems running older and
  5742. // newer ntfs.sys
  5743. //
  5744. if (Status == STATUS_OBJECT_NAME_NOT_FOUND ||
  5745. Status == STATUS_OBJECTID_NOT_FOUND ) {
  5746. FrsUuidCreate((GUID *)ObjectIdBuffer.ObjectId);
  5747. Status = NtFsControlFile(
  5748. RootHandle, // file handle
  5749. NULL, // event
  5750. NULL, // apc routine
  5751. NULL, // apc context
  5752. &Iosb, // iosb
  5753. FSCTL_SET_OBJECT_ID, // FsControlCode
  5754. &ObjectIdBuffer, // input buffer
  5755. sizeof(FILE_OBJECTID_BUFFER),// input buffer length
  5756. NULL, // OutputBuffer for data from the FS
  5757. 0); // OutputBuffer Length
  5758. if (NT_SUCCESS(Status)) {
  5759. GuidToStr((GUID *)ObjectIdBuffer.ObjectId, GuidStr);
  5760. DPRINT1(4, ":S: Oid set on volume root is %s\n", GuidStr );
  5761. } else {
  5762. DPRINT1(0, ":S: ERROR - FSCTL_SET_OBJECT_ID failed on volume root %ws.\n",
  5763. VolumeRootDir);
  5764. Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
  5765. if (Status == STATUS_NOT_IMPLEMENTED) {
  5766. DPRINT(0, ":S: ERROR - Object IDs are not enabled on the volume.\n");
  5767. Replica->FStatus = FrsErrorUnsupportedFileSystem;
  5768. } else
  5769. if (Status == STATUS_ACCESS_DENIED) {
  5770. DPRINT(0, ":S: ERROR - Access Denied.\n");
  5771. } else {
  5772. DPRINT_NT(0, "ERROR - NtFsControlFile(FSCTL_SET_OBJECT_ID) failed.", Status);
  5773. }
  5774. }
  5775. }
  5776. FRS_CLOSE(RootHandle);
  5777. //
  5778. // If object IDs don't work on the volume then bail.
  5779. //
  5780. if (!NT_SUCCESS(Status)) {
  5781. pVme = FrsFreeType(pVme);
  5782. FRS_CLOSE(VolumeHandle);
  5783. return FrsSetLastNTError(Status);
  5784. }
  5785. //
  5786. // VOLUME ROOT OBJECTID MISMATCH CHECK:
  5787. //
  5788. // Keep the Volume root guid up-to-date in the Db. If it has changed then update it in the config record.
  5789. //
  5790. if (!GUIDS_EQUAL(&(ObjectIdBuffer.ObjectId), &(ConfigRecord->FSVolGuid))) {
  5791. DPRINT1(4,"WARN - Volume root guid mismatch for Replica Set (%ws)\n",Replica->ReplicaName->Name);
  5792. GuidToStr((GUID *)ObjectIdBuffer.ObjectId, GuidStr);
  5793. DPRINT1(4,"WARN - Volume root guid (FS) (%s)\n",GuidStr);
  5794. GuidToStr((GUID *)&(ConfigRecord->FSVolGuid), GuidStr);
  5795. DPRINT1(4,"WARN - Volume root guid (DB) (%s)\n",GuidStr);
  5796. DPRINT1(0,"WARN - Volume root guid updated for Replica Set (%ws)\n",Replica->ReplicaName->Name);
  5797. COPY_GUID(&(ConfigRecord->FSVolGuid), &(ObjectIdBuffer.ObjectId));
  5798. Replica->NeedsUpdate = TRUE;
  5799. }
  5800. //
  5801. // Scan the VolumeMonitorQueue to see if we are already doing this one.
  5802. //
  5803. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  5804. ForEachListEntryLock(&VolumeMonitorQueue, VOLUME_MONITOR_ENTRY, ListEntry,
  5805. //
  5806. // Consider changing this test to use the guid on the vol root dir.
  5807. //
  5808. if (pE->FSVolInfo.VolumeSerialNumber == VolumeInfo->VolumeSerialNumber) {
  5809. //
  5810. // Already monitoring this volume. Free entry and close handle.
  5811. //
  5812. FrsFreeType(pVme);
  5813. pVme = pE;
  5814. FRS_CLOSE(VolumeHandle);
  5815. //
  5816. // Release the lock and Return the Volume Monitor entry pointer.
  5817. //
  5818. //pVme->ActiveReplicas += 1;
  5819. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  5820. DPRINT1(4, ":S: Volume %ws already monitored.\n", pVme->FSVolInfo.VolumeLabel);
  5821. //
  5822. // JOURNAL ID MISMATCH CHECK:
  5823. //
  5824. // If LastShutdown is 0 then this is the very first time we have started
  5825. // replication on this replica set so set the current CndUsnJournalID in
  5826. // the config record. Even if Lastshutdown is not 0 CnfUsnJournalID could
  5827. // be 0 because it was not getting correctly updated in Win2K.
  5828. //
  5829. if ((ConfigRecord->LastShutdown == (ULONGLONG)0) ||
  5830. (ConfigRecord->ServiceState == CNF_SERVICE_STATE_CREATING) ||
  5831. (ConfigRecord->CnfUsnJournalID == (ULONGLONG)0)) {
  5832. //
  5833. // Update the JournalID in the Db and set NeedsUpdate so that the
  5834. // config record gets written to the Db at the next update call.
  5835. //
  5836. ConfigRecord->CnfUsnJournalID = pVme->UsnJournalData.UsnJournalID;
  5837. Replica->NeedsUpdate = TRUE;
  5838. } else
  5839. //
  5840. // Check if the JournalID from pVme matches with the CnfUsnJournalID from the
  5841. // config record for this replica set. If it does not then it means that
  5842. // this replica set has been moved. Returning error here will trigger
  5843. // a deletion of the replica set. The set will be recreated at the next
  5844. // poll cycle and it will either be primary or non-auth depending on the
  5845. // case.
  5846. //
  5847. if (ConfigRecord->CnfUsnJournalID != pVme->UsnJournalData.UsnJournalID) {
  5848. //
  5849. // Usn Journal has a new instance code. ==> A delete / create occurred.
  5850. // Treat it as a journal wrap error.
  5851. //
  5852. DPRINT1(0,"ERROR - JournalID mismatch for Replica Set (%ws)\n",Replica->ReplicaName->Name);
  5853. DPRINT2(0,"ERROR - JournalID %x(FS) != %x(DB)\n",
  5854. pVme->UsnJournalData.UsnJournalID, ConfigRecord->CnfUsnJournalID);
  5855. DPRINT1(0,"ERROR - Replica Set (%ws) is marked to be deleted\n",Replica->ReplicaName->Name);
  5856. Replica->FStatus = FrsErrorMismatchedJournalId;
  5857. JrnlSetReplicaState(Replica, REPLICA_STATE_MISMATCHED_JOURNAL_ID);
  5858. return ERROR_REVISION_MISMATCH;
  5859. }
  5860. *pVmeArg = pVme;
  5861. Replica->FStatus = FrsErrorSuccess;
  5862. return ERROR_SUCCESS;
  5863. }
  5864. );
  5865. //
  5866. // Create the Usn Journal if it does not exist.
  5867. //
  5868. CfgRegReadDWord(FKC_NTFS_JRNL_SIZE, NULL, 0, &JournalSize);
  5869. CreateUsnJournalData.MaximumSize = (ULONGLONG)JournalSize * (ULONGLONG)(1024 * 1024);
  5870. DPRINT2(4, ":S: Creating NTFS USN Journal on %ws with size %d MB\n",
  5871. Replica->Volume, JournalSize );
  5872. Status = NtFsControlFile( VolumeHandle,
  5873. NULL,
  5874. NULL,
  5875. NULL,
  5876. &Iosb,
  5877. FSCTL_CREATE_USN_JOURNAL,
  5878. &CreateUsnJournalData,
  5879. sizeof(CreateUsnJournalData),
  5880. NULL,
  5881. 0 );
  5882. //
  5883. // Query the journal for the Journal ID, the USN info, etc.
  5884. //
  5885. if (!DeviceIoControl(VolumeHandle,
  5886. FSCTL_QUERY_USN_JOURNAL,
  5887. NULL,
  5888. 0,
  5889. &pVme->UsnJournalData,
  5890. sizeof(USN_JOURNAL_DATA),
  5891. &BytesReturned,
  5892. NULL)) {
  5893. WStatus = GetLastError();
  5894. DPRINT1_WS(4, ":S: JrnlOpen: FSCTL_QUERY_USN_JOURNAL on volume %ws :",
  5895. Replica->Volume, WStatus);
  5896. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  5897. pVme = FrsFreeType(pVme);
  5898. FRS_CLOSE(VolumeHandle);
  5899. Replica->FStatus = FrsErrorJournalInitFailed;
  5900. return WStatus;
  5901. }
  5902. if (BytesReturned != sizeof(USN_JOURNAL_DATA)) {
  5903. WStatus = GetLastError();
  5904. DPRINT2(4, "JrnlOpen: FSCTL_QUERY_USN_JOURNAL bytes returnd: %d, Expected: %d\n",
  5905. BytesReturned, sizeof(USN_JOURNAL_DATA));
  5906. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  5907. pVme = FrsFreeType(pVme);
  5908. FRS_CLOSE(VolumeHandle);
  5909. Replica->FStatus = FrsErrorJournalInitFailed;
  5910. return WStatus;
  5911. }
  5912. //
  5913. // Display the USN Journal Data.
  5914. //
  5915. DPRINT1(4, ":S: UsnJournalID %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.UsnJournalID ));
  5916. DPRINT1(4, ":S: FirstUsn %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.FirstUsn ));
  5917. DPRINT1(4, ":S: NextUsn %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.NextUsn ));
  5918. DPRINT1(4, ":S: LowestValidUsn %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.LowestValidUsn ));
  5919. DPRINT1(4, ":S: MaxUsn %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.MaxUsn ));
  5920. DPRINT1(4, ":S: MaximumSize %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.MaximumSize ));
  5921. DPRINT1(4, ":S: AllocationDelta %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.AllocationDelta));
  5922. //
  5923. // If the NextUsn is 0 then create a dummy file to increment the usn
  5924. // so that we don't end up picking up a valid change at usn 0.
  5925. //
  5926. if (pVme->UsnJournalData.NextUsn == QUADZERO) {
  5927. FrsCreateFileRelativeById(&DummyHandle,
  5928. Replica->PreInstallHandle,
  5929. NULL,
  5930. 0,
  5931. FILE_ATTRIBUTE_TEMPORARY,
  5932. L"NTFRS_TEMP_FILE.TMP",
  5933. (USHORT)(wcslen(L"NTFRS_TEMP_FILE.TMP") * sizeof(WCHAR)),
  5934. NULL,
  5935. FILE_OPEN_IF,
  5936. RESTORE_ACCESS | DELETE);
  5937. if (HANDLE_IS_VALID(DummyHandle)) {
  5938. FrsDeleteByHandle(L"NTFRS_TEMP_FILE.TMP", DummyHandle);
  5939. }
  5940. FRS_CLOSE(DummyHandle);
  5941. }
  5942. //
  5943. //
  5944. // JOURNAL ID MISMATCH CHECK:
  5945. //
  5946. // If LastShutdown is 0 then this is the very first time we have started
  5947. // replication on this replica set so set the current pVme->JrnlReadPoint to
  5948. // the end of the Journal. Also save the Journal ID so we can detect if
  5949. // someone does a delete/create cycle on the journal.
  5950. // There are cases when the replica set gets created
  5951. // and then shutdown without ever initializing.
  5952. //
  5953. if ((ConfigRecord->LastShutdown == (ULONGLONG)0) ||
  5954. (ConfigRecord->ServiceState == CNF_SERVICE_STATE_CREATING) ||
  5955. (ConfigRecord->CnfUsnJournalID == (ULONGLONG)0)) {
  5956. ConfigRecord->CnfUsnJournalID = pVme->UsnJournalData.UsnJournalID;
  5957. Replica->NeedsUpdate = TRUE;
  5958. } else
  5959. if (ConfigRecord->CnfUsnJournalID != pVme->UsnJournalData.UsnJournalID) {
  5960. //
  5961. // Usn Journal has a new instance code. ==> A delete / create occurred.
  5962. // Treat it as a journal wrap error.
  5963. //
  5964. Replica->FStatus = FrsErrorMismatchedJournalId;
  5965. JrnlSetReplicaState(Replica, REPLICA_STATE_MISMATCHED_JOURNAL_ID);
  5966. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  5967. pVme = FrsFreeType(pVme);
  5968. FRS_CLOSE(VolumeHandle);
  5969. return ERROR_REVISION_MISMATCH;
  5970. }
  5971. //
  5972. // Re-open the volume to allow for asynchronous IO. We don't
  5973. // open with the "OVERLAPPED" flag initially because then the
  5974. // above "create journal" doesn't finish in time for us to post
  5975. // a "read journal" request. We get a "INVALID_DEVICE_STATE"
  5976. // status.
  5977. //
  5978. FRS_CLOSE(VolumeHandle);
  5979. VolumeHandle = CreateFile(Replica->Volume,
  5980. GENERIC_READ | GENERIC_WRITE,
  5981. FILE_SHARE_READ | FILE_SHARE_WRITE,
  5982. NULL,
  5983. OPEN_EXISTING,
  5984. FILE_FLAG_OVERLAPPED,
  5985. NULL );
  5986. WStatus = GetLastError();
  5987. if (!HANDLE_IS_VALID(VolumeHandle)) {
  5988. DPRINT1_WS(0, "Can't open file %ws;", Replica->Volume, WStatus);
  5989. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  5990. pVme = FrsFreeType(pVme);
  5991. Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
  5992. return WStatus;
  5993. } else {
  5994. DPRINT1(4, ":S: JrnlOpen: Open on volume %ws\n", Replica->Volume);
  5995. }
  5996. //
  5997. // This is a new volume journal add it to the list.
  5998. //
  5999. pVme->VolumeHandle = VolumeHandle;
  6000. pVme->DriveLetter[0] = Replica->Volume[wcslen(Replica->Volume) - 2];
  6001. pVme->DriveLetter[1] = Replica->Volume[wcslen(Replica->Volume) - 1];
  6002. pVme->DriveLetter[2] = UNICODE_NULL;
  6003. //
  6004. // Associate the volume handle with the completion port.
  6005. //
  6006. JournalCompletionPort = CreateIoCompletionPort(
  6007. VolumeHandle,
  6008. JournalCompletionPort,
  6009. (ULONG_PTR) pVme, // key associated with this handle
  6010. 0);
  6011. if (NT_SUCCESS(Status) && (JournalCompletionPort != NULL)) {
  6012. //
  6013. // Set the ref count and put the new entry on the queue.
  6014. // This will get the JournalReadThread to start looking at the
  6015. // completion port. Save the volume handle.
  6016. //
  6017. pVme->VolumeHandle = VolumeHandle;
  6018. pVme->ActiveReplicas = 0;
  6019. //
  6020. // Start Ref count at 2. One for being on the VolumeMonitorQueue and
  6021. // one for the initial allocation. The latter is released at VME shutdown.
  6022. //
  6023. pVme->ReferenceCount = 2;
  6024. pVme->JournalState = JRNL_STATE_INITIALIZING;
  6025. FrsRtlInsertTailQueueLock(&VolumeMonitorQueue, &pVme->ListEntry);
  6026. DPRINT2(4, ":S: Create Usn Journal success on %ws, Total vols: %d\n",
  6027. pVme->FSVolInfo.VolumeLabel, VolumeMonitorQueue.Count);
  6028. } else {
  6029. //
  6030. // Journal creation or CreateIoCompletionPort failed. Clean up.
  6031. //
  6032. WStatus = GetLastError();
  6033. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6034. DPRINT_NT(0, ":S: ERROR - Create Usn Journal failed.", Status );
  6035. if (JournalCompletionPort == NULL) {
  6036. DPRINT_WS(0, ":S: ERROR - Failed to create IoCompletion port.", WStatus);
  6037. Status = STATUS_UNSUCCESSFUL;
  6038. }
  6039. pVme = FrsFreeType(pVme);
  6040. FRS_CLOSE(VolumeHandle);
  6041. Replica->FStatus = FrsErrorJournalInitFailed;
  6042. return FrsSetLastNTError(Status);
  6043. }
  6044. //
  6045. // Find end of journal for use in recovery and new replica set creates.
  6046. //
  6047. WStatus = JrnlGetEndOfJournal(pVme, &pVme->JrnlRecoveryEnd);
  6048. if (!WIN_SUCCESS(WStatus)) {
  6049. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6050. pVme = FrsFreeType(pVme);
  6051. FRS_CLOSE(VolumeHandle);
  6052. Replica->FStatus = FrsErrorJournalInitFailed;
  6053. return WStatus;
  6054. }
  6055. DPRINT1(3, ":S: Current End of journal at : %08x %08x\n", PRINTQUAD(pVme->JrnlRecoveryEnd));
  6056. if ((ConfigRecord->LastShutdown == (ULONGLONG)0) ||
  6057. (ConfigRecord->ServiceState == CNF_SERVICE_STATE_CREATING) ||
  6058. (ConfigRecord->CnfUsnJournalID == (ULONGLONG)0)) {
  6059. pVme->JrnlReadPoint = pVme->JrnlRecoveryEnd;
  6060. DPRINT1(4, ":S: Initial journal read starting at: %08x %08x\n", PRINTQUAD(pVme->JrnlReadPoint));
  6061. }
  6062. //
  6063. // Allocate a volume filter hash table.
  6064. //
  6065. sprintf(HashTableName, "FT_%ws", VolumeInfo->VolumeLabel);
  6066. pVme->FilterTable = GhtCreateTable(
  6067. HashTableName, // Table name
  6068. VOLUME_FILTER_HASH_TABLE_ROWS, // NumberRows
  6069. OFFSET(FILTER_TABLE_ENTRY, DFileID), // KeyOffset is dir fid
  6070. sizeof(LONGLONG), // KeyLength
  6071. JrnlHashEntryFree,
  6072. JrnlCompareFid,
  6073. JrnlHashCalcFid,
  6074. FRS_JOURNAL_FILTER_PRINT_FUNCTION);
  6075. //
  6076. // Allocate a parent File ID hash table for the volume.
  6077. //
  6078. // The volume parent file ID table is a specialzed Qhash table intended to
  6079. // economize on memory. There is an entry in this table for every file
  6080. // in a replica set on the volume. There is one of these tables for each
  6081. // volume. Its goal in life is to give us the Old Parent Fid for a file
  6082. // after a rename. The USN journal only provides the new Parent FID.
  6083. // Once we have the old parent FID for a file or dir we can then do a lookup
  6084. // in the Volume Filter Table to determine the file's previous replica set
  6085. // so we can determine if a file or dir has moved across replica sets or
  6086. // out of a replica set entirely.
  6087. //
  6088. //
  6089. pVme->ParentFidTable = FrsAllocTypeSize(QHASH_TABLE_TYPE,
  6090. PARENT_FILEID_TABLE_SIZE);
  6091. SET_QHASH_TABLE_HASH_CALC(pVme->ParentFidTable, JrnlHashCalcFid);
  6092. //
  6093. // Allocate an Active Child hash table for the volume.
  6094. //
  6095. pVme->ActiveChildren = FrsAllocTypeSize(QHASH_TABLE_TYPE,
  6096. ACTIVE_CHILDREN_TABLE_SIZE);
  6097. SET_QHASH_TABLE_FLAG(pVme->ActiveChildren, QHASH_FLAG_LARGE_KEY);
  6098. SET_QHASH_TABLE_HASH_CALC2(pVme->ActiveChildren, ActiveChildrenHashCalc);
  6099. SET_QHASH_TABLE_KEY_MATCH(pVme->ActiveChildren, ActiveChildrenKeyMatch);
  6100. SET_QHASH_TABLE_FREE(pVme->ActiveChildren, FrsFree);
  6101. //
  6102. // Allocate a USN Write Filter Table for the volume and post the first
  6103. // clean request.
  6104. //
  6105. pVme->FrsWriteFilter = FrsAllocTypeSize(QHASH_TABLE_TYPE,
  6106. FRS_WRITE_FILTER_SIZE);
  6107. SET_QHASH_TABLE_HASH_CALC(pVme->FrsWriteFilter, JrnlHashCalcUsn);
  6108. JrnlSubmitCleanWriteFilter(pVme, JRNL_CLEAN_WRITE_FILTER_INTERVAL);
  6109. #ifdef RECOVERY_CONFLICT
  6110. //
  6111. // Allocate a Recovery Conflict hash table for the volume.
  6112. //
  6113. pVme->RecoveryConflictTable = FrsAllocTypeSize(QHASH_TABLE_TYPE,
  6114. RECOVERY_CONFLICT_TABLE_SIZE);
  6115. SET_QHASH_TABLE_HASH_CALC(pVme->RecoveryConflictTable, JrnlHashCalcFid);
  6116. #endif // RECOVERY_CONFLICT
  6117. //
  6118. // Allocate a hash table to record file name dependencies between file
  6119. // operations on this volume in the NTFS journal USN record stream.
  6120. // This is called the Name Space Table and it is used to control when
  6121. // a USN record can be merged into a prior change order affecting the same
  6122. // file. Some examples of when a USN record merge can not be done are
  6123. // given elsewhere, search for USN MERGE RESTRICTIONS.
  6124. //
  6125. pVme->NameSpaceTable = FrsFreeType(pVme->NameSpaceTable);
  6126. pVme->NameSpaceTable = FrsAllocTypeSize(QHASH_TABLE_TYPE, NAME_SPACE_TABLE_SIZE);
  6127. SET_QHASH_TABLE_HASH_CALC(pVme->NameSpaceTable, NoHashBuiltin);
  6128. //
  6129. // Allocate a Change Order Aging table for this volume.
  6130. //
  6131. sprintf(HashTableName, "CO_%ws", VolumeInfo->VolumeLabel);
  6132. pVme->ChangeOrderTable = GhtCreateTable(
  6133. HashTableName, // Table name
  6134. REPLICA_CHANGE_ORDER_HASH_TABLE_ROWS, // NumberRows
  6135. REPLICA_CHANGE_ORDER_ENTRY_KEY, // KeyOffset
  6136. REPLICA_CHANGE_ORDER_ENTRY_KEY_LENGTH, // KeyLength
  6137. JrnlHashEntryFree,
  6138. JrnlCompareFid,
  6139. JrnlHashCalcFid,
  6140. FRS_JOURNAL_CHANGE_ORDER_PRINT_FUNCTION);
  6141. //
  6142. // Allocate an Active Inbound Change Order hash table for this volume.
  6143. //
  6144. sprintf(HashTableName, "AIBCO_%ws", VolumeInfo->VolumeLabel);
  6145. pVme->ActiveInboundChangeOrderTable = GhtCreateTable(
  6146. HashTableName, // Table name
  6147. ACTIVE_INBOUND_CHANGE_ORDER_HASH_TABLE_ROWS, // NumberRows
  6148. REPLICA_CHANGE_ORDER_FILEGUID_KEY, // KeyOffset
  6149. REPLICA_CHANGE_ORDER_FILEGUID_KEY_LENGTH, // KeyLength
  6150. JrnlHashEntryFree,
  6151. JrnlCompareGuid,
  6152. JrnlHashCalcGuid,
  6153. FRS_JOURNAL_CHANGE_ORDER_PRINT_FUNCTION);
  6154. //
  6155. // Add the volume change order list to the global change order list.
  6156. //
  6157. FrsInitializeQueue(&pVme->ChangeOrderList, &FrsVolumeLayerCOList);
  6158. pVme->InitTime = GetTickCount();
  6159. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6160. //
  6161. // Return the Volume Monitor entry pointer.
  6162. //
  6163. *pVmeArg = pVme;
  6164. return ERROR_SUCCESS;
  6165. }
  6166. #if 0
  6167. ULONG
  6168. JrnlCheckStartFailures(
  6169. PFRS_QUEUE Queue
  6170. )
  6171. /*++
  6172. Routine Description:
  6173. Check for any failures where we couldn't get the first journal read started.
  6174. Arguments:
  6175. A queue with Volume Monitor Entries on it.
  6176. Return Value:
  6177. ERROR_SUCCESS if all journal reads started. (the list is empty).
  6178. --*/
  6179. {
  6180. #undef DEBSUB
  6181. #define DEBSUB "JrnlCheckStartFailures:"
  6182. PLIST_ENTRY Entry;
  6183. PVOLUME_MONITOR_ENTRY pVme;
  6184. ULONG WStatus, RetStatus;
  6185. FrsRtlAcquireQueueLock(Queue);
  6186. Entry = GetListHead(&Queue->ListHead);
  6187. if (Entry == &Queue->ListHead) {
  6188. DPRINT(4, ":S: JrnlCheckStartFailures - Queue empty.\n");
  6189. }
  6190. RetStatus = ERROR_SUCCESS;
  6191. while (Entry != &Queue->ListHead) {
  6192. pVme = CONTAINING_RECORD(Entry, VOLUME_MONITOR_ENTRY, ListEntry);
  6193. WStatus = pVme->WStatus;
  6194. RetStatus = ERROR_GEN_FAILURE;
  6195. if (!WIN_SUCCESS(WStatus) && (WStatus != ERROR_IO_PENDING)) {
  6196. //
  6197. // The I/O was not started. Check error return.
  6198. //
  6199. if (WStatus == ERROR_NOT_FOUND) {
  6200. //
  6201. // Starting USN is not in the Journal. We may have missed
  6202. // some locally originated changes to the replica. This
  6203. // is very bad because we now have to walk the replica
  6204. // tree and the IDTable to see what has changed.
  6205. //
  6206. // Walk the replica sets using this VME and compare their
  6207. // starting USNs with the oldest USN record available on
  6208. // the volume. If it's there then we can at least start
  6209. // those replica sets. Whats left has to be handled the
  6210. // long way.
  6211. //
  6212. //
  6213. // add code to sync up the tree
  6214. //
  6215. DPRINT1(0, ":S: Usn %08lx %08lx has been deleted.\n",
  6216. PRINTQUAD(pVme->JrnlReadPoint));
  6217. DPRINT(0, ":S: Data lost, resync required on Replica ...\n");
  6218. JrnlClose(pVme->VolumeHandle);
  6219. } else {
  6220. DPRINT_WS(0, "Error from JrnlCheckStartFailures", WStatus);
  6221. DPRINT1(0, ":S: ERROR - Replication not started for any replica sets on volume %ws\n",
  6222. pVme->FSVolInfo.VolumeLabel);
  6223. }
  6224. } else {
  6225. DPRINT_WS(0, "Error from JrnlCheckStartFailures", WStatus);
  6226. DPRINT1(0, ":S: ERROR - Replication should have been started for replica sets on volume %ws\n",
  6227. pVme->FSVolInfo.VolumeLabel);
  6228. }
  6229. Entry = GetListNext(Entry);
  6230. }
  6231. FrsRtlReleaseQueueLock(Queue);
  6232. return RetStatus;
  6233. }
  6234. #endif
  6235. ULONG
  6236. JrnlPauseVolume(
  6237. IN PVOLUME_MONITOR_ENTRY pVme,
  6238. IN DWORD MilliSeconds
  6239. )
  6240. /*++
  6241. Routine Description:
  6242. Pause journal read activity on the specified volume. This routine
  6243. queues a completion packet to the journal read thread telling it
  6244. to pause I/O the volume. We then then wait on the event handle in
  6245. the Vme struct.
  6246. Once the read thread stops I/O on the volume it queues a CMD_JOURNAL_PAUSED
  6247. packet to the journal process queue. When this command is processed we
  6248. know that any prior journal buffers that have been queued for this
  6249. volume are now processed so we can signal the event to let the waiter
  6250. proceed.
  6251. Arguments:
  6252. pVme: The volume to pause.
  6253. MilliSeconds - Timeout
  6254. Return Value:
  6255. Win32 status
  6256. --*/
  6257. {
  6258. #undef DEBSUB
  6259. #define DEBSUB "JrnlPauseVolume:"
  6260. ULONG WStatus;
  6261. ULONG RetryCount = 10;
  6262. DPRINT2(5, "***** Pause on Volume %ws - Journal State: %s *****\n",
  6263. pVme->FSVolInfo.VolumeLabel, RSS_NAME(pVme->JournalState));
  6264. RETRY:
  6265. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  6266. //
  6267. // Check if paused already.
  6268. //
  6269. if ((pVme->JournalState == JRNL_STATE_PAUSED) ||
  6270. (pVme->JournalState == JRNL_STATE_INITIALIZING)) {
  6271. WStatus = ERROR_SUCCESS;
  6272. goto RETURN;
  6273. }
  6274. //
  6275. // Check if pause is in progress.
  6276. //
  6277. if ((pVme->JournalState == JRNL_STATE_PAUSE1) ||
  6278. (pVme->JournalState == JRNL_STATE_PAUSE2)) {
  6279. goto WAIT;
  6280. }
  6281. //
  6282. // If I/O is not active on this volume then request is invalid.
  6283. //
  6284. if (pVme->JournalState != JRNL_STATE_ACTIVE) {
  6285. WStatus = ERROR_INVALID_FUNCTION;
  6286. goto RETURN;
  6287. }
  6288. //
  6289. // Submit the pause request to the journal read thread.
  6290. //
  6291. WStatus = JrnlSubmitReadThreadRequest(pVme,
  6292. FRS_PAUSE_JOURNAL_READ,
  6293. JRNL_STATE_PAUSE1);
  6294. if (WStatus == ERROR_BUSY) {
  6295. //
  6296. // Overlapped struct is in use. Retry a few times then bail.
  6297. //
  6298. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6299. if (--RetryCount == 0) {
  6300. return ERROR_BUSY;
  6301. }
  6302. Sleep(250);
  6303. goto RETRY;
  6304. }
  6305. WAIT:
  6306. //
  6307. // Drop the lock and wait on the event.
  6308. //
  6309. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6310. WStatus = WaitForSingleObject(pVme->Event, MilliSeconds);
  6311. CHECK_WAIT_ERRORS(3, WStatus, 1, ACTION_RETURN);
  6312. //
  6313. // Check the result state.
  6314. //
  6315. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  6316. WStatus = (pVme->JournalState == JRNL_STATE_PAUSED) ?
  6317. ERROR_SUCCESS : WAIT_FAILED;
  6318. RETURN:
  6319. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6320. return WStatus;
  6321. }
  6322. ULONG
  6323. JrnlUnPauseVolume(
  6324. IN PVOLUME_MONITOR_ENTRY pVme,
  6325. IN PJBUFFER Jbuff,
  6326. IN BOOL HaveLock
  6327. )
  6328. /*++
  6329. Routine Description:
  6330. Un-Pause journal read activity on the specified volume.
  6331. This routine starts up journal read activity on a volume that has
  6332. been previously paused. It kicks off an async read on the volume
  6333. which will complete on the completion port.
  6334. This routine is called both to initially start activity on a Journal and
  6335. to start the next read on a journal.
  6336. If you are initiating the first journal read or restarting the journal
  6337. after a pause you need to set the journal state to JRNL_STATE_STARTING
  6338. before calling this routine. e.g.
  6339. pVme->JournalState = JRNL_STATE_STARTING;
  6340. On the very first call to start the journal the JournalState should
  6341. be JRNL_STATE_INITIALIZING. This causes an initial set of journal
  6342. data buffers to be allocated. Otherwise we get a buffer from the
  6343. JournalFreeQueue.
  6344. Arguments:
  6345. pVme: The volume to pause.
  6346. Jbuff: An optional caller supplied Journal buffer. If NULL we get
  6347. one off the free list here.
  6348. HaveLock: TRUE means the caller has acquired the volume monitor lock.
  6349. FALSE means we acquire it and release it here.
  6350. Return Value:
  6351. Win32 status
  6352. --*/
  6353. {
  6354. #undef DEBSUB
  6355. #define DEBSUB "JrnlUnPauseVolume:"
  6356. PLIST_ENTRY Entry;
  6357. ULONG WStatus;
  6358. NTSTATUS Status;
  6359. BOOL AllocJbuff = (Jbuff == NULL);
  6360. ULONG SaveJournalState;
  6361. ULONG i;
  6362. LONG RetryCount;
  6363. DPRINT2(5, "***** UnPause on Volume %ws - Journal State: %s *****\n",
  6364. pVme->FSVolInfo.VolumeLabel, RSS_NAME(pVme->JournalState));
  6365. //
  6366. // Get the buffer first so we don't block waiting for a free buffer
  6367. // holding the VolumeMonitorQueue lock.
  6368. //
  6369. if (AllocJbuff) {
  6370. if (pVme->JournalState == JRNL_STATE_INITIALIZING) {
  6371. //
  6372. // Allocate a journal buffer from memory if this is a fresh start.
  6373. //
  6374. Jbuff = FrsAllocType(JBUFFER_TYPE);
  6375. //DPRINT1(5, "jb: Am %08x (alloc mem)\n", Jbuff);
  6376. } else {
  6377. //
  6378. // Get a journal buffer from the free list.
  6379. // We wait here until a buffer is available.
  6380. //
  6381. if (HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
  6382. Entry = FrsRtlRemoveHeadQueue(&JournalFreeQueue);
  6383. if (HaveLock) { FrsRtlAcquireQueueLock(&VolumeMonitorQueue); }
  6384. if (Entry == NULL) {
  6385. //
  6386. // Check for abort and cancel all I/O.
  6387. //
  6388. DPRINT(0, "ERROR-JournalFreeQueue Abort.\n");
  6389. if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
  6390. return ERROR_REQUEST_ABORTED;
  6391. }
  6392. Jbuff = CONTAINING_RECORD(Entry, JBUFFER, ListEntry);
  6393. //DPRINT1(5, "jb: ff %08x\n", Jbuff);
  6394. }
  6395. }
  6396. if (!HaveLock) { FrsRtlAcquireQueueLock(&VolumeMonitorQueue); }
  6397. //
  6398. // Check if paused already or stopped. If so, ignore the request.
  6399. //
  6400. if ((pVme->JournalState != JRNL_STATE_STARTING) &&
  6401. (pVme->JournalState != JRNL_STATE_INITIALIZING) &&
  6402. (pVme->JournalState != JRNL_STATE_ACTIVE)) {
  6403. if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
  6404. WStatus = ERROR_SUCCESS;
  6405. goto ERROR_RETURN;
  6406. }
  6407. //
  6408. // If there is already an I/O active don't start another. This can happen
  6409. // when the IOCancel() from a previous Pause request fails to abort the
  6410. // current journal read immediately. Now the unpause request starts a
  6411. // second I/O on the volume. In theory this should be benign since the
  6412. // cancel from the first pause will abort the first read request and the
  6413. // 2nd should complete normally.
  6414. //
  6415. // For now just mark the journal as Active again so when the currently
  6416. // outstanding request completes (or aborts) another read request is issued.
  6417. //
  6418. if (pVme->ActiveIoRequests != 0) {
  6419. DPRINT1(3, "UnPause on volume with non-zero ActiveIoRequest Count: %d\n",
  6420. pVme->ActiveIoRequests);
  6421. if (pVme->ReplayUsnValid) {
  6422. DPRINT(3, "Replay USN is valid. Waiting for ActiveIoRequest to go to zero\n");
  6423. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6424. //
  6425. // Unfortunately if this call is from the journal read thread
  6426. // v.s. another thread unpausing the volume the journal read
  6427. // thread won't be able to decrement the ActiveIoRequests.
  6428. //
  6429. Sleep(5000);
  6430. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  6431. if (pVme->ActiveIoRequests != 0) {
  6432. DPRINT1(3, "ActiveIoRequest still non-zero: %d. Skip replay\n",
  6433. pVme->ActiveIoRequests);
  6434. pVme->ReplayUsnValid = FALSE;
  6435. }
  6436. }
  6437. //
  6438. // The requests have not yet finished. For now just mark the
  6439. // journal as Active again so when the currently outstanding
  6440. // request completes (or aborts) another read request is issued.
  6441. //
  6442. if (pVme->ActiveIoRequests != 0) {
  6443. pVme->IoActive = TRUE;
  6444. SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_ACTIVE);
  6445. if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
  6446. WStatus = ERROR_SUCCESS;
  6447. goto ERROR_RETURN;
  6448. }
  6449. //
  6450. // FALL THRU means startup a read on the journal.
  6451. //
  6452. }
  6453. //
  6454. // If we are just starting up or restarting from a pause and the
  6455. // Replay USN is valid then start reading from there.
  6456. //
  6457. if ((pVme->JournalState != JRNL_STATE_ACTIVE) && pVme->ReplayUsnValid) {
  6458. DPRINT1(4, "JrnlReadPoint was: %08x %08x\n", PRINTQUAD(pVme->JrnlReadPoint));
  6459. pVme->JrnlReadPoint = pVme->ReplayUsn;
  6460. pVme->ReplayUsnValid = FALSE;
  6461. DPRINT1(4, "Loading JrnlReadPoint from ReplayUsn: %08x %08x\n", PRINTQUAD(pVme->ReplayUsn));
  6462. }
  6463. pVme->IoActive = TRUE;
  6464. pVme->StopIo = FALSE; // VME Overlap struct available.
  6465. SaveJournalState = pVme->JournalState;
  6466. if (pVme->JournalState != JRNL_STATE_ACTIVE) {
  6467. SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_ACTIVE);
  6468. }
  6469. pVme->ActiveIoRequests += 1;
  6470. FRS_ASSERT(pVme->ActiveIoRequests == 1);
  6471. if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
  6472. //
  6473. // Post a read on this journal handle to get things started.
  6474. // Note ownership of the buffer goes to another thread via the
  6475. // I/O Completion port so we can't change or look at it
  6476. // (without a lock) unless the read failed. Even if the read
  6477. // completes synchronously the I/O still completes via the port.
  6478. // The same is true of the related VME struct.
  6479. //
  6480. // An NTSTATUS return of STATUS_JOURNAL_ENTRY_DELETED means the requested
  6481. // USN record is no longer in the Journal (i.e. the journal has
  6482. // wrapped). The corresponding win32 error is ERROR_JOURNAL_ENTRY_DELETED.
  6483. //
  6484. RetryCount = 100;
  6485. RETRY_READ:
  6486. Status = FrsIssueJournalAsyncRead(Jbuff, pVme);
  6487. if (!NT_SUCCESS(Status)) {
  6488. if (!HaveLock) { FrsRtlAcquireQueueLock(&VolumeMonitorQueue); }
  6489. if (Status == STATUS_JOURNAL_ENTRY_DELETED) {
  6490. DPRINT(0, " +-+-+-+-+-+- JOURNAL WRAPPED +-+-+-+-+-+-+-+-+-+-\n");
  6491. //
  6492. // The journal wrapped.
  6493. //
  6494. SET_JOURNAL_AND_REPLICA_STATE(pVme, REPLICA_STATE_JRNL_WRAP_ERROR);
  6495. } else
  6496. if ((Status == STATUS_JOURNAL_DELETE_IN_PROGRESS) ||
  6497. (Status == STATUS_JOURNAL_NOT_ACTIVE)) {
  6498. DPRINT(0, " +-+-+-+-+-+- ERROR RETURN FROM FrsIssueJournalAsyncRead +-+-+-+-+-+-+-+-+-+-\n");
  6499. DPRINT(0, "Journal is or is being deleted. FRS requires the NTFS Journal.\n");
  6500. DisplayNTStatus(Status);
  6501. SET_JOURNAL_AND_REPLICA_STATE(pVme, REPLICA_STATE_JRNL_WRAP_ERROR);
  6502. } else
  6503. if (Status == STATUS_DATA_ERROR) {
  6504. //
  6505. // Internal NTFS detected errors: e.g.
  6506. // - Usn record size is not quad-aligned
  6507. // - Usn record size extends beyond the end of the Usn page
  6508. // - Usn record size isn't large enough to contain the Usn record
  6509. // - Usn record size extends beyond end of usn journal
  6510. //
  6511. DPRINT(0, " +-+-+-+-+-+- ERROR RETURN FROM FrsIssueJournalAsyncRead +-+-+-+-+-+-+-+-+-+-\n");
  6512. DPRINT(0, "Journal internal inconsistency detected by NTFS.\n");
  6513. DisplayNTStatus(Status);
  6514. SET_JOURNAL_AND_REPLICA_STATE(pVme, REPLICA_STATE_JRNL_WRAP_ERROR);
  6515. } else {
  6516. DPRINT(0, " +-+-+-+-+-+- ERROR RETURN FROM FrsIssueJournalAsyncRead +-+-+-+-+-+-+-+-+-+-\n");
  6517. DPRINT_NT(0, "ERROR - FrsIssueJournalAsyncRead : ", Status);
  6518. DPRINT_NT(0, "ERROR - FrsIssueJournalAsyncRead Iosb.Status: ", Jbuff->Iosb.Status);
  6519. if ((Status == STATUS_INVALID_PARAMETER) && (RetryCount-- > 0)) {
  6520. if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
  6521. Sleep(500);
  6522. goto RETRY_READ;
  6523. }
  6524. SET_JOURNAL_AND_REPLICA_STATE(pVme, REPLICA_STATE_JRNL_WRAP_ERROR);
  6525. // FRS_ASSERT(FALSE);
  6526. }
  6527. //
  6528. // Restore old journal state.
  6529. //
  6530. pVme->JournalState = SaveJournalState;
  6531. pVme->ActiveIoRequests -= 1;
  6532. FRS_ASSERT(pVme->ActiveIoRequests == 0);
  6533. if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
  6534. WStatus = FrsSetLastNTError(Status);
  6535. DPRINT_WS(0, "Error from FrsIssueJournalAsyncRead", WStatus);
  6536. //
  6537. // Error starting the read. Free Jbuff and return the error.
  6538. //
  6539. goto ERROR_RETURN;
  6540. }
  6541. //
  6542. // IO has started. If this was a fresh start add a few more buffers
  6543. // on the free list so there are enough to work with.
  6544. //
  6545. if (SaveJournalState == JRNL_STATE_INITIALIZING) {
  6546. for (i=0; i<(NumberOfJounalBuffers-1); i++) {
  6547. Jbuff = FrsAllocType(JBUFFER_TYPE);
  6548. //DPRINT1(5, "jb: Am %08x (alloc mem)\n", Jbuff);
  6549. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  6550. }
  6551. }
  6552. return ERROR_SUCCESS;
  6553. ERROR_RETURN:
  6554. //
  6555. // If we allocated a journal buffer here then give it back.
  6556. //
  6557. if (AllocJbuff && (Jbuff != NULL)) {
  6558. if (SaveJournalState == JRNL_STATE_INITIALIZING) {
  6559. //DPRINT1(5, "jb: fm %08x (free mem)\n", Jbuff);
  6560. Jbuff = FrsFreeType(Jbuff);
  6561. } else {
  6562. //DPRINT1(5, "jb: tf %08x\n", Jbuff);
  6563. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  6564. }
  6565. }
  6566. return WStatus;
  6567. }
  6568. ULONG
  6569. JrnlSubmitReadThreadRequest(
  6570. IN PVOLUME_MONITOR_ENTRY pVme,
  6571. IN ULONG Request,
  6572. IN ULONG NewState
  6573. )
  6574. /*++
  6575. Routine Description:
  6576. This routine posts a completion status packet on the journal I/O
  6577. completion port. This is used to either stop journal I/O or just
  6578. pause it while making changes to the filter table. When the journal
  6579. read thread gets the request it will cancel journal I/O on the volume
  6580. handle (which can only be done from that thread). If the post is
  6581. successful then the JournalState is updated with NewState.
  6582. We Assume the caller has acquired the VolumeMonitorQueue lock.
  6583. Arguments:
  6584. pVme - the volume monitor entry with the state for this volume's journal.
  6585. Request - The request type. Either FRS_CANCEL_JOURNAL_READ or
  6586. FRS_PAUSE_JOURNAL_READ.
  6587. NewState - The new state for the journal if the submit succeeds.
  6588. Return Value:
  6589. A WIN32 status.
  6590. --*/
  6591. {
  6592. #undef DEBSUB
  6593. #define DEBSUB "JrnlSubmitReadThreadRequest:"
  6594. ULONG WStatus;
  6595. PCHAR ReqStr;
  6596. if (Request == FRS_CANCEL_JOURNAL_READ) {
  6597. ReqStr = "cancel journal read";
  6598. } else
  6599. if (Request == FRS_PAUSE_JOURNAL_READ) {
  6600. ReqStr = "pause journal read";
  6601. } else {
  6602. DPRINT1(0, "ERROR - Invalid journal request: %08x\n", Request);
  6603. return ERROR_INVALID_PARAMETER;
  6604. }
  6605. if (pVme->StopIo) {
  6606. return ERROR_BUSY;
  6607. }
  6608. if (JournalCompletionPort == NULL) {
  6609. return ERROR_INVALID_HANDLE;
  6610. }
  6611. DPRINT2(5, "Queueing %s IO req on Volume %ws.\n",
  6612. ReqStr, pVme->FSVolInfo.VolumeLabel);
  6613. //
  6614. // Clear the pVme event if the request is to start a stop or pause sequence.
  6615. // Mark the overlapped struct busy,
  6616. // Submit the pause request to the journal read thread.
  6617. //
  6618. if ((NewState == JRNL_STATE_STOPPING) ||
  6619. (NewState == JRNL_STATE_PAUSE1)) {
  6620. ResetEvent(pVme->Event);
  6621. }
  6622. pVme->StopIo = TRUE;
  6623. if (!PostQueuedCompletionStatus(
  6624. JournalCompletionPort,
  6625. Request,
  6626. (ULONG_PTR) pVme,
  6627. &pVme->CancelOverlap)) {
  6628. WStatus = GetLastError();
  6629. DPRINT2_WS(0, "ERROR - Failed on PostQueuedCompletionStatus of %s on %ws :",
  6630. ReqStr, pVme->FSVolInfo.VolumeLabel, WStatus);
  6631. return WStatus;
  6632. }
  6633. //
  6634. // pkt submited. Update state.
  6635. //
  6636. pVme->JournalState = NewState;
  6637. DPRINT1(5, "Packet submitted. Jrnl state is %s\n", RSS_NAME(NewState));
  6638. return ERROR_SUCCESS;
  6639. }
  6640. ULONG
  6641. JrnlShutdownSingleReplica(
  6642. IN PREPLICA Replica,
  6643. IN BOOL HaveLock
  6644. )
  6645. /*++
  6646. Routine Description:
  6647. Detach this replica from its journal. Decrement the ActiveReplicas count
  6648. on the VME. If zero post a completion packet to the JournalCompletionPort
  6649. so the pending journal read request can be canceled by the read thread.
  6650. If no journal thread is active we do it all here.
  6651. If the volume monitor queue is left empty, we close the completion port.
  6652. The caller must have acquired the pVme->ReplicaListHead lock.
  6653. Arguments:
  6654. Replica -- Replica set to detach.
  6655. HaveLock -- TRUE if the caller has acquired the VolumeMonitorQueue
  6656. lock else we get it here.
  6657. Return Value:
  6658. Win32 status.
  6659. --*/
  6660. {
  6661. #undef DEBSUB
  6662. #define DEBSUB "JrnlShutdownSingleReplica:"
  6663. ULONG GStatus;
  6664. LIST_ENTRY DeadList;
  6665. PFRS_QUEUE FrsTempList;
  6666. ULONG WStatus = ERROR_SUCCESS;
  6667. PVOLUME_MONITOR_ENTRY pVme = Replica->pVme;
  6668. DPRINT1(4, ":S: <<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
  6669. if (!HaveLock) {
  6670. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  6671. FrsRtlAcquireQueueLock(&pVme->ReplicaListHead);
  6672. }
  6673. if (pVme->ActiveReplicas == 0) {
  6674. DPRINT1(0, ":S: ActiveReplicas count already zero on %ws\n",
  6675. pVme->FSVolInfo.VolumeLabel);
  6676. WStatus = ERROR_INVALID_HANDLE;
  6677. goto RETURN;
  6678. }
  6679. //
  6680. // It is possible that this replica struct never made it onto the list
  6681. // if it went into the error state during init or startup.
  6682. //
  6683. if (Replica->VolReplicaList.Flink == NULL) {
  6684. DPRINT2(0, ":S: WARN: Replica struct not on pVme ReplicaListHead for on %ws. Current replica State: %s\n",
  6685. pVme->FSVolInfo.VolumeLabel, RSS_NAME(Replica->ServiceState));
  6686. WStatus = ERROR_INVALID_HANDLE;
  6687. goto RETURN;
  6688. }
  6689. //
  6690. // Remove replica from the VME list.
  6691. //
  6692. FrsRtlRemoveEntryListLock(&pVme->ReplicaListHead, &Replica->VolReplicaList);
  6693. pVme->ActiveReplicas -= 1;
  6694. ReleaseVmeRef(pVme);
  6695. DPRINT3(4, "Removed %ws from VME %ws. %d Replicas remain.\n",
  6696. Replica->ReplicaName->Name, pVme->FSVolInfo.VolumeLabel,
  6697. pVme->ActiveReplicas);
  6698. //
  6699. // IF this is the last active Replica on the volume then stop
  6700. // I/O on the journal.
  6701. //
  6702. if (!IsListEmpty(&pVme->ReplicaListHead.ListHead)) {
  6703. WStatus = ERROR_SUCCESS;
  6704. goto RETURN;
  6705. }
  6706. if (pVme->ActiveReplicas != 0) {
  6707. DPRINT2(0, ":S: ERROR - pVme->ReplicaListHead is empty but ActiveReplicas count is non-zero (%d) on %ws\n",
  6708. pVme->ActiveReplicas, pVme->FSVolInfo.VolumeLabel);
  6709. DPRINT(0, ":S: ERROR - Stopping the journal anyway\n");
  6710. pVme->ActiveReplicas = 0;
  6711. }
  6712. //
  6713. // This is the last Replica set on the volume. Stop the journal.
  6714. //
  6715. if (!HANDLE_IS_VALID(JournalReadThreadHandle)) {
  6716. //
  6717. // There is no Journal thread. Put the VME on the
  6718. // stop queue and Close the handle here.
  6719. //
  6720. FrsRtlRemoveEntryQueueLock(&VolumeMonitorQueue, &pVme->ListEntry);
  6721. pVme->IoActive = FALSE;
  6722. pVme->WStatus = ERROR_SUCCESS;
  6723. SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_STOPPED);
  6724. DPRINT1(0, ":S: FrsRtlInsertTailQueue -- onto stop queue %08x\n", pVme);
  6725. FrsRtlInsertTailQueue(&VolumeMonitorStopQueue, &pVme->ListEntry);
  6726. FRS_CLOSE(pVme->VolumeHandle);
  6727. ReleaseVmeRef(pVme);
  6728. if ((VolumeMonitorQueue.Count == 0) &&
  6729. (JournalCompletionPort != NULL)) {
  6730. //
  6731. // Close the completion port.
  6732. //
  6733. // FRS_CLOSE(JournalCompletionPort);
  6734. }
  6735. } else {
  6736. //
  6737. // if I/O not already stopping, queue a completion packet
  6738. // to the journal read thread to cancel the I/O.
  6739. // The journal read thread will then put the VME on the
  6740. // VolumeMonitorStopQueue. If we did it here the VME would
  6741. // go to the Stop queue and the ActiveReplicas count would
  6742. // be decremented before I/O has actually stopped on the journal.
  6743. //
  6744. WStatus = JrnlSubmitReadThreadRequest(pVme,
  6745. FRS_CANCEL_JOURNAL_READ,
  6746. JRNL_STATE_STOPPING);
  6747. if (!WIN_SUCCESS(WStatus)) {
  6748. DPRINT2(0, ":S: ERROR: JrnlSubmitReadThreadRequest to stop Journal Failed on %ws. Current Journal State: %s\n",
  6749. pVme->FSVolInfo.VolumeLabel, RSS_NAME(pVme->JournalState));
  6750. DPRINT_WS(0, "ERROR: Status is", WStatus);
  6751. }
  6752. }
  6753. if (DoDebug(5, DEBSUB)) {
  6754. // "TEST CODE VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV"
  6755. DPRINT(5, "\n");
  6756. DPRINT1(5, "==== start of volume change order hash table dump for %ws ===========\n",
  6757. pVme->FSVolInfo.VolumeLabel);
  6758. DPRINT(5, "\n");
  6759. GHT_DUMP_TABLE(5, pVme->ChangeOrderTable);
  6760. DPRINT(5, "\n");
  6761. DPRINT(5, "========= End of Change order hash table dump ================\n");
  6762. DPRINT(5, "\n");
  6763. DPRINT(5, "\n");
  6764. DPRINT1(5, "==== start of USN write filter table dump for %ws ===========\n",
  6765. pVme->FSVolInfo.VolumeLabel);
  6766. DPRINT(5, "\n");
  6767. QHashEnumerateTable(pVme->FrsWriteFilter, QHashDump, NULL);
  6768. DPRINT(5, "\n");
  6769. DPRINT(5, "==== End of USN write filter table dump ===========\n");
  6770. DPRINT(5, "\n");
  6771. DPRINT(5, "\n");
  6772. DPRINT1(5, "==== start of recovery conflict table dump for %ws ===========\n",
  6773. pVme->FSVolInfo.VolumeLabel);
  6774. DPRINT(5, "\n");
  6775. #ifdef RECOVERY_CONFLICT
  6776. QHashEnumerateTable(pVme->RecoveryConflictTable, QHashDump, NULL);
  6777. DPRINT(5, "\n");
  6778. DPRINT(5, "==== End of recovery conflict table dump ===========\n");
  6779. DPRINT(5, "\n");
  6780. #endif // RECOVERY_CONFLICT
  6781. }
  6782. // "TEST CODE ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
  6783. GHT_DUMP_TABLE(3, pVme->ActiveInboundChangeOrderTable);
  6784. //
  6785. // Drop the initial allocation ref so the count can drop to zero
  6786. // when the last reference is released.
  6787. //
  6788. ReleaseVmeRef(pVme);
  6789. RETURN:
  6790. if (!HaveLock) {
  6791. FrsRtlReleaseQueueLock(&pVme->ReplicaListHead);
  6792. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6793. }
  6794. return WStatus;
  6795. }
  6796. VOID
  6797. JrnlCleanupVme(
  6798. IN PVOLUME_MONITOR_ENTRY pVme
  6799. )
  6800. /*++
  6801. Routine Description:
  6802. Free the VME storage when the ref count goes to zero. Called by the
  6803. ReleaseVmeRef() macro. Don't free the Vme proper because other threads
  6804. may still try to take out a ref on the Vme and they will test the ref count
  6805. for zero and fail.
  6806. Arguments:
  6807. pVme -- Volume Monitor Entry to close.
  6808. Return Value:
  6809. Win32 status.
  6810. --*/
  6811. {
  6812. #undef DEBSUB
  6813. #define DEBSUB "JrnlCleanupVme:"
  6814. DPRINT1(4, "<<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
  6815. if (pVme->ActiveReplicas != 0) {
  6816. DPRINT1(0, "ERROR - ActiveReplicas not yet zero on %ws\n",
  6817. pVme->FSVolInfo.VolumeLabel);
  6818. FRS_ASSERT(!"ActiveReplicas not yet zero on volume");
  6819. return;
  6820. }
  6821. #if 0
  6822. // Note: Don't delete the CO process queue here since CO Accept may still be cleaning up
  6823. // same with aging cache (ChangeOrderTable) and ActiveInboundChangeOrderTable
  6824. FrsRtlDeleteQueue(&pVme->ChangeOrderList);
  6825. GhtDestroyTable(pVme->ChangeOrderTable);
  6826. pVme->ChangeOrderTable = NULL;
  6827. //
  6828. // Cleanup the Active inbound CO Table.
  6829. //
  6830. GhtDestroyTable(pVme->ActiveInboundChangeOrderTable);
  6831. pVme->ActiveInboundChangeOrderTable = NULL;
  6832. #endif
  6833. //
  6834. // Release the Filter Table.
  6835. //
  6836. GhtDestroyTable(pVme->FilterTable);
  6837. pVme->FilterTable = NULL;
  6838. //
  6839. // Release the parent file ID table, the active children table,
  6840. // and the Volume Write Filter.
  6841. //
  6842. pVme->ParentFidTable = FrsFreeType(pVme->ParentFidTable);
  6843. pVme->FrsWriteFilter = FrsFreeType(pVme->FrsWriteFilter);
  6844. pVme->ActiveChildren = FrsFreeType(pVme->ActiveChildren);
  6845. #ifdef RECOVERY_CONFLICT
  6846. pVme->RecoveryConflictTable = FrsFreeType(pVme->RecoveryConflictTable);
  6847. #endif // RECOVERY_CONFLICT
  6848. DPRINT(4, "\n");
  6849. DPRINT1(4, "==== start of NameSpaceTable table dump for %ws ===========\n",
  6850. pVme->FSVolInfo.VolumeLabel);
  6851. DPRINT(4, "\n");
  6852. QHashEnumerateTable(pVme->NameSpaceTable, QHashDump, NULL);
  6853. DPRINT(4, "\n");
  6854. DPRINT(4, "==== End of NameSpaceTable table dump ===========\n");
  6855. DPRINT(4, "\n");
  6856. pVme->NameSpaceTable = FrsFreeType(pVme->NameSpaceTable);
  6857. // Note: stick the vme on a storage cleanup list
  6858. }
  6859. ULONG
  6860. JrnlCloseVme(
  6861. IN PVOLUME_MONITOR_ENTRY pVme
  6862. )
  6863. /*++
  6864. Routine Description:
  6865. Close this Volume Monitor Entry by doing a shutdown on all replicas.
  6866. We assume the caller has taken the monitor queue lock.
  6867. Arguments:
  6868. pVme -- Volume Monitor Entry to close.
  6869. Return Value:
  6870. Win32 status.
  6871. --*/
  6872. {
  6873. #undef DEBSUB
  6874. #define DEBSUB "JrnlCloseVme:"
  6875. ULONG WStatus = ERROR_SUCCESS;
  6876. DPRINT1(4, "<<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
  6877. if (pVme->ActiveReplicas == 0) {
  6878. DPRINT1(1, "ActiveReplicas count already zero on %ws\n",
  6879. pVme->FSVolInfo.VolumeLabel);
  6880. return ERROR_INVALID_HANDLE;
  6881. }
  6882. //
  6883. // Remove all active replicas from the VME list.
  6884. //
  6885. ForEachListEntry( &pVme->ReplicaListHead, REPLICA, VolReplicaList,
  6886. //
  6887. // The iterator pE is type PREPLICA.
  6888. // Caller must have taken the monitor queue lock to avoid lock order prob.
  6889. //
  6890. WStatus = JrnlShutdownSingleReplica(pE, TRUE);
  6891. DPRINT_WS(0, "Error from JrnlShutdownSingleReplica", WStatus);
  6892. );
  6893. if (pVme->ActiveReplicas != 0) {
  6894. DPRINT2(0, "ActiveReplicas count should be zero on %ws. It is %d\n",
  6895. pVme->FSVolInfo.VolumeLabel, pVme->ActiveReplicas);
  6896. WStatus = ERROR_GEN_FAILURE;
  6897. } else {
  6898. WStatus = ERROR_SUCCESS;
  6899. }
  6900. return WStatus;
  6901. }
  6902. ULONG
  6903. JrnlCloseAll(
  6904. VOID
  6905. )
  6906. /*++
  6907. Routine Description:
  6908. Close all entries on the VolumeMonitorQueue.
  6909. Arguments:
  6910. None.
  6911. Return Value:
  6912. None.
  6913. --*/
  6914. {
  6915. #undef DEBSUB
  6916. #define DEBSUB "JrnlCloseAll:"
  6917. ULONG WStatus;
  6918. DPRINT1(4, "<<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
  6919. if (IsListEmpty(&VolumeMonitorQueue.ListHead)) {
  6920. DPRINT(4, "JrnlCloseAll - VolumeMonitorQueue empty.\n");
  6921. }
  6922. //
  6923. // When all the volumes are stopped journal thread should exit instead
  6924. // of looking for work.
  6925. //
  6926. KillJournalThreads = TRUE;
  6927. ForEachListEntry(&VolumeMonitorQueue, VOLUME_MONITOR_ENTRY, ListEntry,
  6928. WStatus = JrnlCloseVme(pE);
  6929. if (pE->JournalState == JRNL_STATE_STOPPED) {
  6930. continue;
  6931. }
  6932. //
  6933. // Drop the lock and wait for the event.
  6934. //
  6935. if (pE->JournalState == JRNL_STATE_STOPPING) {
  6936. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6937. WStatus = WaitForSingleObject(pE->Event, 2000);
  6938. CHECK_WAIT_ERRORS(3, WStatus, 1, ACTION_CONTINUE);
  6939. //
  6940. // Check the result state.
  6941. //
  6942. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  6943. if (pE->JournalState == JRNL_STATE_STOPPED) {
  6944. continue;
  6945. }
  6946. }
  6947. DPRINT2(1, "ERROR: Request to stop Journal Failed on %ws. Current Journal State: %s\n",
  6948. pE->FSVolInfo.VolumeLabel, RSS_NAME(pE->JournalState));
  6949. //
  6950. // Force it onto the stopped queue and set the state to ERROR.
  6951. //
  6952. if (pE->IoActive) {
  6953. SET_JOURNAL_AND_REPLICA_STATE(pE, JRNL_STATE_ERROR);
  6954. VmeDeactivate(&VolumeMonitorQueue, pE, WStatus);
  6955. }
  6956. );
  6957. return ERROR_SUCCESS;
  6958. }
  6959. ULONG
  6960. JrnlClose(
  6961. IN HANDLE VolumeHandle
  6962. )
  6963. /*++
  6964. Routine Description:
  6965. This routine walks the VolumeMonitorQueue looking for the entry with the
  6966. given VolumeHandle. It then decrements the reference count and if zero
  6967. we post a completion packet to the JournalCompletionPort so the pending
  6968. journal read request can be canceled.
  6969. Arguments:
  6970. VolumeHandle -- The handle of the volume to close.
  6971. Return Value:
  6972. None.
  6973. --*/
  6974. {
  6975. #undef DEBSUB
  6976. #define DEBSUB "JrnlClose:"
  6977. ULONG WStatus;
  6978. BOOL Found;
  6979. DPRINT1(4, "<<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
  6980. Found = FALSE;
  6981. ForEachListEntry(&VolumeMonitorQueue, VOLUME_MONITOR_ENTRY, ListEntry,
  6982. if (pE->VolumeHandle == VolumeHandle) {
  6983. //
  6984. // Handle matches. Close the Volume Monitor Entry.
  6985. //
  6986. Found = TRUE;
  6987. WStatus = JrnlCloseVme(pE);
  6988. if (pE->JournalState == JRNL_STATE_STOPPED) {
  6989. break;
  6990. }
  6991. //
  6992. // Drop the lock and wait for the event.
  6993. //
  6994. if (pE->JournalState == JRNL_STATE_STOPPING) {
  6995. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  6996. WStatus = WaitForSingleObject(pE->Event, 2000);
  6997. CHECK_WAIT_ERRORS(3, WStatus, 1, ACTION_CONTINUE);
  6998. //
  6999. // Check the result state.
  7000. //
  7001. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  7002. if (pE->JournalState == JRNL_STATE_STOPPED) {
  7003. break;
  7004. }
  7005. }
  7006. DPRINT2(0, "ERROR: Request to stop Journal Failed on %ws. Current Journal State: %s\n",
  7007. pE->FSVolInfo.VolumeLabel, RSS_NAME(pE->JournalState));
  7008. //
  7009. // Force it onto the stopped queue and set the state to ERROR.
  7010. //
  7011. if (pE->IoActive) {
  7012. SET_JOURNAL_AND_REPLICA_STATE(pE, JRNL_STATE_ERROR);
  7013. VmeDeactivate(&VolumeMonitorQueue, pE, WStatus);
  7014. }
  7015. break;
  7016. }
  7017. );
  7018. if (!Found) {
  7019. DPRINT1(0, "ERROR - JrnlClose - Handle %08x not found in VolumeMonitorQueue\n",
  7020. VolumeHandle);
  7021. }
  7022. return ERROR_SUCCESS;
  7023. }
  7024. VOID
  7025. JrnlNewVsn(
  7026. IN PCHAR Debsub,
  7027. IN ULONG uLineNo,
  7028. IN PVOLUME_MONITOR_ENTRY pVme,
  7029. IN OUT PULONGLONG NewVsn
  7030. )
  7031. /*++
  7032. Routine Description:
  7033. Assign a new VSN for this volume. Save a recovery point after
  7034. VSN_SAVE_INTERVAL VSNs have been handed out.
  7035. Arguments:
  7036. Debsub -- name of Function calling us for trace.
  7037. uLineNo -- Linenumber of caller for trace.
  7038. pVme -- Volume Monitor Entry with the Vsn state.
  7039. NewVsn -- Ptr to return Vsn
  7040. Return Value:
  7041. Win32 status.
  7042. --*/
  7043. {
  7044. #undef DEBSUB
  7045. #define DEBSUB "JrnlNewVsn:"
  7046. ULONGLONG TempVsn;
  7047. BOOL SaveFlag = FALSE;
  7048. LOCK_VME(pVme);
  7049. TempVsn = ++pVme->FrsVsn;
  7050. *NewVsn = TempVsn;
  7051. if ((TempVsn & (ULONGLONG) VSN_SAVE_INTERVAL) == QUADZERO) {
  7052. SaveFlag = TRUE;
  7053. DebPrint(4,
  7054. (PUCHAR) "++ VSN Save Triggered: NextVsn: %08x %08x"
  7055. " LastUsnSaved: %08x %08x CurrUsnDone: %08x %08x\n",
  7056. Debsub,
  7057. uLineNo,
  7058. PRINTQUAD(TempVsn),
  7059. PRINTQUAD(pVme->LastUsnSavePoint),
  7060. PRINTQUAD(pVme->CurrentUsnRecordDone));
  7061. if (pVme->LastUsnSavePoint < pVme->CurrentUsnRecordDone) {
  7062. pVme->LastUsnSavePoint = pVme->CurrentUsnRecordDone;
  7063. }
  7064. }
  7065. UNLOCK_VME(pVme);
  7066. if (SaveFlag) {
  7067. DbsRequestSaveMark(pVme, FALSE);
  7068. }
  7069. // Note: perf: check for change to use ExInterlockedAddLargeStatistic
  7070. // so we can pitch the LOCK_VME. Note the lock is also used to
  7071. // avoid quadword tearing on LastUsnSavePoint with USN save point
  7072. // test in the journal loop. Need to fix that too
  7073. }
  7074. NTSTATUS
  7075. FrsIssueJournalAsyncRead(
  7076. IN PJBUFFER Jbuff,
  7077. IN PVOLUME_MONITOR_ENTRY pVme
  7078. )
  7079. /*++
  7080. Routine Description:
  7081. This routine posts an async read to the journal specified by the handle
  7082. in the Vme using the buffer in Jbuff.
  7083. Note once the async I/O is submitted (and returns STATUS_PENDING)
  7084. the jbuffer and the VME go to another thread via the I/O Completion port
  7085. so neither we nor the caller can change or look at it unless
  7086. the read failed or completed synchronously (unless you have a lock).
  7087. This is because we could block right after the call, the I/O could complete
  7088. and the JournalReadThread could pick up and process the buffer before the
  7089. calling thread ever runs again.
  7090. Arguments:
  7091. Jbuff - The Journal Buffer to use for the read request.
  7092. pVme - The volume monitor entry for the Async Read,
  7093. Return Value:
  7094. NTSTATUS status
  7095. The win32 error status is ERROR_NOT_FOUND when the USN is not found in
  7096. the journal.
  7097. --*/
  7098. {
  7099. #undef DEBSUB
  7100. #define DEBSUB "FrsIssueJournalAsyncRead:"
  7101. NTSTATUS Status;
  7102. ULONG WStatus;
  7103. READ_USN_JOURNAL_DATA ReadUsnJournalData;
  7104. // Current journal poll delay in NTFS is 2 seconds (doesn't apply for async reads)
  7105. #define DELAY_TIME ((LONGLONG)(-20000000))
  7106. #define FRS_USN_REASON_FILTER (USN_REASON_CLOSE | USN_REASON_FILE_CREATE)
  7107. //
  7108. // Setup the journal read parameters. BytesToWaitFor set to sizeof(USN)+1
  7109. // causes the read journal call to return after the first entry is placed
  7110. // in the buffer. JrnlReadPoint is the point in the journal to start the read.
  7111. // ReturnOnlyOnClose = TRUE means the returned journal entries only
  7112. // include close records (bit <31> of Reason field is set to one).
  7113. // Otherwise you get a record when any reason bit is set, e.g. create,
  7114. // first write, ...
  7115. //
  7116. ReadUsnJournalData.StartUsn = pVme->JrnlReadPoint; // USN JrnlReadPoint
  7117. ReadUsnJournalData.ReasonMask = FRS_USN_REASON_FILTER; // ULONG ReasonMask
  7118. ReadUsnJournalData.ReturnOnlyOnClose = FALSE; // ULONG ReturnOnlyOnClose
  7119. ReadUsnJournalData.Timeout = DELAY_TIME; // ULONGLONG Timeout
  7120. ReadUsnJournalData.BytesToWaitFor = sizeof(USN)+1; // ULONGLONG BytesToWaitFor
  7121. ReadUsnJournalData.UsnJournalID = pVme->UsnJournalData.UsnJournalID; // Journal ID.
  7122. //
  7123. // This read completes when either the buffer is full or the BytesToWaitFor
  7124. // parameter in the ReadUsnJournalData parameter block is exceeded.
  7125. // The DelayTime in the ReadUsnJournalData parameter block controls how
  7126. // often the NTFS code wakes up and checks the buffer. It is NOT a timeout
  7127. // on this call. Setting BytesToWaitFor to sizeof(USN) + 1
  7128. // means that as soon as any data shows up in the journal the call completes.
  7129. // Using this call with async IO lets us monitor a large number of volumes
  7130. // with a few threads.
  7131. //
  7132. // You can't really have multiple read requests outstanding on a single
  7133. // journal since you don't know where the next read will start until the
  7134. // previous read completes. Even though only one I/O can be outstanding
  7135. // per volume journal it is still possible to have multiple Jbuffs queued
  7136. // for USN processing because the rate of generating new journal entries
  7137. // may exceed the rate at which the data can be processed.
  7138. //
  7139. //
  7140. // Init the buffer Descriptor.
  7141. //
  7142. Jbuff->pVme = pVme;
  7143. Jbuff->Iosb.Information = 0;
  7144. Jbuff->Iosb.Status = 0;
  7145. Jbuff->Overlap.hEvent = NULL;
  7146. Jbuff->JrnlReadPoint = pVme->JrnlReadPoint;
  7147. Jbuff->WStatus = ERROR_IO_PENDING;
  7148. Jbuff->FileHandle = pVme->VolumeHandle;
  7149. //
  7150. // To catch I/O completions with no data.
  7151. //
  7152. ZeroMemory(Jbuff->DataBuffer, sizeof(USN) + sizeof(USN_RECORD));
  7153. InterlockedIncrement(&JournalActiveIoRequests);
  7154. Status = NtFsControlFile(
  7155. Jbuff->FileHandle, // IN HANDLE FileHandle,
  7156. NULL, // IN HANDLE Event OPTIONAL,
  7157. NULL, // IN PIO_APC_ROUTINE ApcRoutine OPTIONAL,
  7158. &Jbuff->Overlap, // IN PVOID ApcContext OPTIONAL,
  7159. &Jbuff->Iosb, // OUT PIO_STATUS_BLOCK IoStatusBlock,
  7160. FSCTL_READ_USN_JOURNAL, // IN ULONG FsControlCode,
  7161. &ReadUsnJournalData, // IN PVOID InputBuffer OPTIONAL,
  7162. sizeof(ReadUsnJournalData), // IN ULONG InputBufferLength,
  7163. Jbuff->DataBuffer, // OUT PVOID OutputBuffer OPTIONAL,
  7164. Jbuff->BufferSize ); // IN ULONG OutputBufferLength
  7165. WStatus = FrsSetLastNTError(Status);
  7166. DPRINT2_WS(4, "ReadUsnJournalData - NTStatus %08lx, USN = %08x %08x",
  7167. Status, PRINTQUAD(ReadUsnJournalData.StartUsn), WStatus);
  7168. if (!NT_SUCCESS(Status)) {
  7169. //
  7170. // I/O not started so it doesn't complete through the port.
  7171. //
  7172. InterlockedDecrement(&JournalActiveIoRequests);
  7173. DPRINT2_WS(0, "ReadUsnJournalData Failed - NTStatus %08lx, USN = %08x %08x",
  7174. Status, PRINTQUAD(ReadUsnJournalData.StartUsn), WStatus);
  7175. }
  7176. return Status;
  7177. }
  7178. BOOL
  7179. JrnlGetQueuedCompletionStatus(
  7180. HANDLE CompletionPort,
  7181. LPDWORD lpNumberOfBytesTransferred,
  7182. PULONG_PTR lpCompletionKey,
  7183. LPOVERLAPPED *lpOverlapped
  7184. )
  7185. /*++
  7186. Routine Description:
  7187. ** NOTE ** Imported version of Win32 function so we can access NTStatus
  7188. return value to seperate out the 32 odd NT to Win32 mappings for
  7189. the ERROR_INVALID_PARAMETER Win32 error code.
  7190. This function waits for pending I/O operations associated with the
  7191. specified completion port to complete. Server applications may have
  7192. several threads issuing this call on the same completion port. As
  7193. I/O operations complete, they are queued to this port. If threads
  7194. are actively waiting in this call, queued requests complete their
  7195. call.
  7196. This API returns a boolean value.
  7197. A value of TRUE means that a pending I/O completed successfully.
  7198. The the number of bytes transfered during the I/O, the completion
  7199. key that indicates which file the I/O occured on, and the overlapped
  7200. structure address used in the original I/O are all returned.
  7201. A value of FALSE indicates one ow two things:
  7202. If *lpOverlapped is NULL, no I/O operation was dequeued. This
  7203. typically means that an error occured while processing the
  7204. parameters to this call, or that the CompletionPort handle has been
  7205. closed or is otherwise invalid. GetLastError() may be used to
  7206. further isolate this.
  7207. If *lpOverlapped is non-NULL, an I/O completion packet was dequeud,
  7208. but the I/O operation resulted in an error. GetLastError() can be
  7209. used to further isolate the I/O error. The the number of bytes
  7210. transfered during the I/O, the completion key that indicates which
  7211. file the I/O occured on, and the overlapped structure address used
  7212. in the original I/O are all returned.
  7213. Arguments:
  7214. CompletionPort - Supplies a handle to a completion port to wait on.
  7215. lpNumberOfBytesTransferred - Returns the number of bytes transfered during the
  7216. I/O operation whose completion is being reported.
  7217. lpCompletionKey - Returns a completion key value specified during
  7218. CreateIoCompletionPort. This is a per-file key that can be used
  7219. to tall the caller the file that an I/O operation completed on.
  7220. lpOverlapped - Returns the address of the overlapped structure that
  7221. was specified when the I/O was issued. The following APIs may
  7222. complete using completion ports. This ONLY occurs if the file
  7223. handle is associated with with a completion port AND an
  7224. overlapped structure was passed to the API.
  7225. LockFileEx
  7226. WriteFile
  7227. ReadFile
  7228. DeviceIoControl
  7229. WaitCommEvent
  7230. ConnectNamedPipe
  7231. TransactNamedPipe
  7232. Return Value:
  7233. TRUE - An I/O operation completed successfully.
  7234. lpNumberOfBytesTransferred, lpCompletionKey, and lpOverlapped
  7235. are all valid.
  7236. FALSE - If lpOverlapped is NULL, the operation failed and no I/O
  7237. completion data is retured. GetLastError() can be used to
  7238. further isolate the cause of the error (bad parameters, invalid
  7239. completion port handle). Otherwise, a pending I/O operation
  7240. completed, but it completed with an error. GetLastError() can
  7241. be used to further isolate the I/O error.
  7242. lpNumberOfBytesTransferred, lpCompletionKey, and lpOverlapped
  7243. are all valid.
  7244. --*/
  7245. {
  7246. #undef DEBSUB
  7247. #define DEBSUB "JrnlGetQueuedCompletionStatus:"
  7248. IO_STATUS_BLOCK IoSb;
  7249. NTSTATUS Status;
  7250. LPOVERLAPPED LocalOverlapped;
  7251. BOOL rv;
  7252. Status = NtRemoveIoCompletion(CompletionPort,
  7253. (PVOID *)lpCompletionKey,
  7254. (PVOID *)&LocalOverlapped,
  7255. &IoSb,
  7256. NULL); // Infinite Timeout.
  7257. if ( !NT_SUCCESS(Status) || Status == STATUS_TIMEOUT ) {
  7258. *lpOverlapped = NULL;
  7259. if ( Status == STATUS_TIMEOUT ) {
  7260. SetLastError(WAIT_TIMEOUT);
  7261. } else {
  7262. FrsSetLastNTError(Status);
  7263. }
  7264. rv = FALSE;
  7265. DPRINT_NT(1, "NtRemoveIoCompletion : ", Status);
  7266. } else {
  7267. *lpOverlapped = LocalOverlapped;
  7268. *lpNumberOfBytesTransferred = (DWORD)IoSb.Information;
  7269. if ( !NT_SUCCESS(IoSb.Status) ){
  7270. FrsSetLastNTError( IoSb.Status );
  7271. DPRINT_NT(1, "NtRemoveIoCompletion : ", IoSb.Status);
  7272. rv = FALSE;
  7273. } else {
  7274. rv = TRUE;
  7275. }
  7276. }
  7277. return rv;
  7278. }
  7279. DWORD
  7280. WINAPI
  7281. JournalReadThread(
  7282. IN LPVOID Context
  7283. )
  7284. /*++
  7285. Routine Description:
  7286. This routine processes the I/O completions on the JournalCompletionPort.
  7287. It also handles cancel requests posted to the port when the volume
  7288. reference count goes to zero. The basic flow is wait on the port,
  7289. check for errors, check for cancel requests and do a cancel, check for
  7290. read success returns. When data comes back. get the next USN to use,
  7291. queue the buffer to the JournalProcessQueue, get a new buffer off
  7292. the free list and post a new read to the journal handle.
  7293. For canceled requests or requests that complete with an error
  7294. put the Volume Monitor Entry on the VolumeMonitorStopQueue along with
  7295. the error status in the entry.
  7296. This one thread processes all the read requests for all the NTFS volumes
  7297. we monitor. Once the first read is posted by an external routine we
  7298. pick it up from here.
  7299. TODO: When we run out of free journal buffers, create more (up to a limit).
  7300. Then put code in the processing loop to trim back the freelist.
  7301. Arguments:
  7302. Context not used. The Journal Global state is implied.
  7303. Thread Return Value:
  7304. NTSTATUS status
  7305. --*/
  7306. {
  7307. #undef DEBSUB
  7308. #define DEBSUB "JournalReadThread:"
  7309. LPOVERLAPPED JbuffOverlap;
  7310. DWORD IoSize;
  7311. PVOLUME_MONITOR_ENTRY pVme;
  7312. PJBUFFER Jbuff;
  7313. ULONG WStatus, WStatus2;
  7314. NTSTATUS Status;
  7315. BOOL StoppedOne;
  7316. BOOL ErrorFlag;
  7317. PLIST_ENTRY Entry;
  7318. USN NextJrnlReadPoint;
  7319. PCOMMAND_PACKET CmdPkt;
  7320. BY_HANDLE_FILE_INFORMATION FileInfo;
  7321. CHAR TimeString[32];
  7322. IO_STATUS_BLOCK Iosb;
  7323. ULONGLONG VolumeInfoData[(sizeof(FILE_FS_VOLUME_INFORMATION) +
  7324. MAXIMUM_VOLUME_LABEL_LENGTH + 7)/8];
  7325. PFILE_FS_VOLUME_INFORMATION VolumeInfo =
  7326. (PFILE_FS_VOLUME_INFORMATION)VolumeInfoData;
  7327. //
  7328. // Try-Finally
  7329. //
  7330. try {
  7331. //
  7332. // Capture exception.
  7333. //
  7334. try {
  7335. WAIT_FOR_WORK:
  7336. //
  7337. // Look for a Volume Monitor Entry to be placed on the work queue.
  7338. // The agent that put the entry on the queue also started the first
  7339. // read to the journal so we can start looking for I/O completions.
  7340. //
  7341. while (TRUE) {
  7342. WStatus = FrsRtlWaitForQueueFull(&VolumeMonitorQueue, 10000);
  7343. DPRINT1_WS(5, "Wait on VolumeMonitorQueue: Count: %d",
  7344. VolumeMonitorQueue.Count, WStatus);
  7345. if (WIN_SUCCESS(WStatus)) {
  7346. break;
  7347. }
  7348. switch (WStatus) {
  7349. case WAIT_TIMEOUT:
  7350. if (KillJournalThreads) {
  7351. //
  7352. // Terminate the thread.
  7353. //
  7354. JournalReadThreadHandle = NULL;
  7355. ExitThread(WStatus);
  7356. }
  7357. break;
  7358. case ERROR_INVALID_HANDLE:
  7359. //
  7360. // The VolumeMonitorQueue was rundown. Exit.
  7361. //
  7362. JournalReadThreadHandle = NULL;
  7363. ExitThread(WStatus);
  7364. break;
  7365. default:
  7366. DPRINT_WS(0, "Unexpected status from FrsRtlWaitForQueueFull", WStatus);
  7367. JournalReadThreadHandle = NULL;
  7368. ExitThread(WStatus);
  7369. }
  7370. }
  7371. //
  7372. // Loop as long as we have volumes to monitor or have I/O outstanding on the port.
  7373. //
  7374. while ((VolumeMonitorQueue.Count != 0) ||
  7375. (JournalActiveIoRequests != 0) ) {
  7376. pVme = NULL;
  7377. JbuffOverlap = NULL;
  7378. WStatus = ERROR_SUCCESS;
  7379. IoSize = 0;
  7380. DPRINT(5, "Waiting on JournalCompletionPort \n");
  7381. ErrorFlag = !JrnlGetQueuedCompletionStatus(JournalCompletionPort,
  7382. &IoSize,
  7383. (PULONG_PTR) &pVme,
  7384. &JbuffOverlap);
  7385. //INFINITE);
  7386. //
  7387. // Check for an error return and see if the completion port has
  7388. // disappeared.
  7389. //
  7390. if (ErrorFlag) {
  7391. WStatus = GetLastError();
  7392. DPRINT_WS(3, "Error from GetQueuedCompletionStatus", WStatus);
  7393. DPRINT5(3, "CompPort: %08x, IoSize: %08x, pVme: %08x, OvLap: %08x, VolHandle: %08x\n",
  7394. JournalCompletionPort, IoSize, pVme, JbuffOverlap, pVme->VolumeHandle);
  7395. if (WStatus == ERROR_INVALID_HANDLE) {
  7396. JournalCompletionPort = NULL;
  7397. JournalReadThreadHandle = NULL;
  7398. ExitThread(WStatus);
  7399. }
  7400. if (WStatus == ERROR_INVALID_PARAMETER) {
  7401. DPRINT(0, "ERROR- Invalid Param from GetQueuedCompletionStatus\n");
  7402. if (!GetFileInformationByHandle(JournalCompletionPort, &FileInfo)) {
  7403. WStatus2 = GetLastError();
  7404. DPRINT_WS(0, "Error from GetFileInformationByHandle", WStatus2);
  7405. } else {
  7406. CHAR FlagBuf[120];
  7407. DPRINT(0, "Info on JournalCompletionPort\n");
  7408. FrsFlagsToStr(FileInfo.dwFileAttributes, FileAttrFlagNameTable,
  7409. sizeof(FlagBuf), FlagBuf);
  7410. DPRINT2(0, "FileAttributes %08x Flags [%s]\n",
  7411. FileInfo.dwFileAttributes, FlagBuf);
  7412. FileTimeToString(&FileInfo.ftCreationTime, TimeString);
  7413. DPRINT1(0, "CreationTime %s\n", TimeString);
  7414. FileTimeToString(&FileInfo.ftLastAccessTime, TimeString);
  7415. DPRINT1(0, "LastAccessTime %08x\n", TimeString);
  7416. FileTimeToString(&FileInfo.ftLastWriteTime, TimeString);
  7417. DPRINT1(0, "LastWriteTime %08x\n", TimeString);
  7418. DPRINT1(0, "VolumeSerialNumber %08x\n", FileInfo.dwVolumeSerialNumber);
  7419. DPRINT1(0, "FileSizeHigh %08x\n", FileInfo.nFileSizeHigh);
  7420. DPRINT1(0, "FileSizeLow %08x\n", FileInfo.nFileSizeLow);
  7421. DPRINT1(0, "NumberOfLinks %08x\n", FileInfo.nNumberOfLinks);
  7422. DPRINT1(0, "FileIndexHigh %08x\n", FileInfo.nFileIndexHigh);
  7423. DPRINT1(0, "FileIndexLow %08x\n", FileInfo.nFileIndexLow);
  7424. }
  7425. //
  7426. // See if the volume handle still works.
  7427. //
  7428. DPRINT(0, "Dumping Volume information\n");
  7429. Status = NtQueryVolumeInformationFile(pVme->VolumeHandle,
  7430. &Iosb,
  7431. VolumeInfo,
  7432. sizeof(VolumeInfoData),
  7433. FileFsVolumeInformation);
  7434. if ( NT_SUCCESS(Status) ) {
  7435. VolumeInfo->VolumeLabel[VolumeInfo->VolumeLabelLength/2] = UNICODE_NULL;
  7436. FileTimeToString((PFILETIME) &VolumeInfo->VolumeCreationTime, TimeString);
  7437. DPRINT5(4,"%-16ws (%d), %s, VSN: %08X, VolCreTim: %s\n",
  7438. VolumeInfo->VolumeLabel,
  7439. VolumeInfo->VolumeLabelLength,
  7440. (VolumeInfo->SupportsObjects ? "(obj)" : "(no-obj)"),
  7441. VolumeInfo->VolumeSerialNumber,
  7442. TimeString);
  7443. } else {
  7444. DPRINT_NT(0, "ERROR - Volume root QueryVolumeInformationFile failed.", Status);
  7445. }
  7446. //
  7447. // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
  7448. // begin workaround for journal bug.
  7449. //
  7450. //
  7451. InterlockedDecrement(&JournalActiveIoRequests);
  7452. if (JbuffOverlap == NULL) {
  7453. //
  7454. // No packet dequeued. Unexpected error Cancel all I/O requests.
  7455. //
  7456. DPRINT(0, "Unexpected error from GetQueuedCompletionStatus. Stopping all journal I/O\n");
  7457. pVme = NULL;
  7458. WStatus = E_UNEXPECTED;
  7459. goto STOP_JOURNAL_IO;
  7460. }
  7461. //
  7462. // Get the base of the Jbuff struct containing this overlap struct.
  7463. //
  7464. Jbuff = CONTAINING_RECORD(JbuffOverlap, JBUFFER, Overlap);
  7465. //DPRINT2(5, "jb: fc %08x (len: %d)\n", Jbuff, IoSize);
  7466. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  7467. pVme->ActiveIoRequests -= 1;
  7468. FRS_ASSERT(pVme->ActiveIoRequests == 0);
  7469. //
  7470. // If I/O on this journal has been stopped or the I/O operation
  7471. // was aborted then free the Jbuff. There should be at most one
  7472. // I/O per volume that comes in with the aborted status.
  7473. //
  7474. // Note: We can still have other Jbufs queued for processing by the
  7475. // USN Journal processing thread for this VME.
  7476. //
  7477. if ((!pVme->IoActive) ||
  7478. (WStatus == ERROR_OPERATION_ABORTED) ) {
  7479. DPRINT1(5, "I/O aborted, putting jbuffer %08x on JournalFreeQueue.\n", Jbuff);
  7480. DPRINT2(5, "Canceled Io on volume %ws, IoSize= %d\n",
  7481. pVme->FSVolInfo.VolumeLabel, IoSize);
  7482. //
  7483. // How do we know when all outstanding Jbuffs have
  7484. // been retired for this VME? need an interlocked ref count?
  7485. // Why does this matter?
  7486. //
  7487. //DPRINT1(5, "jb: tf %08x (abort)\n", Jbuff);
  7488. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  7489. Jbuff = NULL;
  7490. //
  7491. // Even if the operation was aborted. If I/O has not stopped
  7492. // (e.g. a quick pause-unpause sequence) then start another read.
  7493. //
  7494. if (!pVme->IoActive) {
  7495. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7496. continue;
  7497. }
  7498. }
  7499. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7500. DPRINT(0, "Journal request retry\n");
  7501. DPRINT1(0, "Next Usn is: %08x %08x\n", PRINTQUAD(pVme->JrnlReadPoint));
  7502. if (Jbuff != NULL ) {
  7503. DPRINT1(0, "jb: tf %08x (BUG INVAL PARAM)\n", Jbuff);
  7504. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  7505. Jbuff = NULL;
  7506. }
  7507. //
  7508. // Wait and then retry the journal read again.
  7509. //
  7510. Sleep(500);
  7511. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  7512. goto START_NEXT_READ;
  7513. //
  7514. // End workaround for journal bug.
  7515. // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
  7516. //
  7517. //FRS_ASSERT(WStatus != ERROR_INVALID_PARAMETER);
  7518. }
  7519. //
  7520. // Error may be ERROR_OPERATION_ABORTED but shouldn't be success.
  7521. // This gets sorted out below.
  7522. //
  7523. FRS_ASSERT(WStatus != ERROR_SUCCESS);
  7524. }
  7525. //
  7526. // Check if no packet was dequeued from the port.
  7527. //
  7528. if (JbuffOverlap == NULL) {
  7529. //
  7530. // No packet dequeued. Unexpected error Cancel all I/O requests.
  7531. //
  7532. DPRINT(0, "Unexpected error from GetQueuedCompletionStatus. Stopping all journal I/O\n");
  7533. pVme = NULL;
  7534. WStatus = E_UNEXPECTED;
  7535. goto STOP_JOURNAL_IO;
  7536. }
  7537. //
  7538. // A packet was dequeued from the port. First check if this
  7539. // is a request to stop or pause I/O on this journal.
  7540. // There is no Jbuff with this request and the overlap struct
  7541. // is part of the VME.
  7542. //
  7543. if (IoSize == FRS_CANCEL_JOURNAL_READ) {
  7544. pVme->StopIo = FALSE; // VME Overlap struct available.
  7545. DPRINT1(4, "Cancel Journal Read for %ws\n", pVme->FSVolInfo.VolumeLabel);
  7546. //
  7547. // cancel any outstanding I/O on this volume handle and
  7548. // deactivate the VME.
  7549. // Note: Any I/O on this volume handle that has already
  7550. // been completed and queued to the completion port
  7551. // is not affected by the cancel. Use !pVme->IoActive to
  7552. // throw those requests away.
  7553. //
  7554. WStatus = ERROR_SUCCESS;
  7555. goto STOP_JOURNAL_IO;
  7556. } else
  7557. if (IoSize == FRS_PAUSE_JOURNAL_READ) {
  7558. DPRINT2(4, "Pause Journal Read for %ws. Jrnl State: %s\n",
  7559. pVme->FSVolInfo.VolumeLabel, RSS_NAME(pVme->JournalState));
  7560. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  7561. //
  7562. // This is a pause journal request. Stop I/O on the journal
  7563. // but don't deactivate the VME.
  7564. //
  7565. pVme->StopIo = FALSE; // VME Overlap struct available.
  7566. if (pVme->JournalState == JRNL_STATE_PAUSE1) {
  7567. //
  7568. // Cancel I/O on the journal read handle and put a second
  7569. // pause request on the port so we know it was done.
  7570. //
  7571. pVme->IoActive = FALSE;
  7572. if (!CancelIo(pVme->VolumeHandle)) {
  7573. DPRINT_WS(0, "ERROR - Cancel Io;", GetLastError());
  7574. }
  7575. pVme->WStatus = ERROR_SUCCESS;
  7576. WStatus = JrnlSubmitReadThreadRequest(pVme,
  7577. FRS_PAUSE_JOURNAL_READ,
  7578. JRNL_STATE_PAUSE2);
  7579. DPRINT_WS(0, "Error from JrnlSubmitReadThreadRequest", WStatus);
  7580. } else
  7581. if (pVme->JournalState == JRNL_STATE_PAUSE2) {
  7582. //
  7583. // This is the second pause request so there will be no more
  7584. // journal data buffers on this volume. (NOT TRUE, sometimes
  7585. // the abort takes awhile but since IoActive is clear the
  7586. // buffer will be ignored.)
  7587. // Send a paused complete command to the journal process queue.
  7588. // When it gets to the head of the queue, all prior queued
  7589. // journal buffers will have been processed so the filter table
  7590. // can now be updated.
  7591. //
  7592. CmdPkt = FrsAllocCommand(&JournalProcessQueue, CMD_JOURNAL_PAUSED);
  7593. CmdPkt->Parameters.JournalRequest.Replica = NULL;
  7594. CmdPkt->Parameters.JournalRequest.pVme = pVme;
  7595. FrsSubmitCommand(CmdPkt, FALSE);
  7596. } else {
  7597. //
  7598. // If we are stopping while in the middle of a Pause request
  7599. // the stop takes precedence.
  7600. //
  7601. if ((pVme->JournalState != JRNL_STATE_STOPPING) &&
  7602. (pVme->JournalState != JRNL_STATE_STOPPED)) {
  7603. DPRINT2(0, "ERROR: Invalid Journal State: %s on pause request on volume %ws,\n",
  7604. RSS_NAME(pVme->JournalState), pVme->FSVolInfo.VolumeLabel);
  7605. }
  7606. }
  7607. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7608. continue;
  7609. }
  7610. //
  7611. // Not a cancel or pause packet. It must be a journal read response.
  7612. //
  7613. InterlockedDecrement(&JournalActiveIoRequests);
  7614. //
  7615. // Get the base of the Jbuff struct containing this overlap struct.
  7616. //
  7617. Jbuff = CONTAINING_RECORD(JbuffOverlap, JBUFFER, Overlap);
  7618. //DPRINT2(5, "jb: fc %08x (len: %d)\n", Jbuff, IoSize);
  7619. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  7620. pVme->ActiveIoRequests -= 1;
  7621. FRS_ASSERT(pVme->ActiveIoRequests == 0);
  7622. //
  7623. // If I/O on this journal has been stopped or the I/O operation
  7624. // was aborted then free the Jbuff. There should be at most one
  7625. // I/O per volume that comes in with the aborted status.
  7626. //
  7627. // Note: We can still have other Jbufs queued for processing by the
  7628. // USN Journal processing thread for this VME.
  7629. //
  7630. if ((!pVme->IoActive) ||
  7631. (IoSize < sizeof(USN)) ||
  7632. (WStatus == ERROR_OPERATION_ABORTED) ) {
  7633. DPRINT1(5, "I/O aborted, putting jbuffer %08x on JournalFreeQueue.\n", Jbuff);
  7634. DPRINT2(5, "Canceled Io on volume %ws, IoSize= %d\n",
  7635. pVme->FSVolInfo.VolumeLabel, IoSize);
  7636. //
  7637. // How do we know when all outstanding Jbuffs have
  7638. // been retired for this VME? need an interlocked ref count?
  7639. // Why does it matter?
  7640. //
  7641. //DPRINT1(5, "jb: tf %08x (abort)\n", Jbuff);
  7642. FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
  7643. //
  7644. // Even if the operation was aborted. If I/O has not stopped
  7645. // (e.g. a quick pause-unpause sequence) then start another read.
  7646. //
  7647. if (pVme->IoActive) {
  7648. goto START_NEXT_READ;
  7649. }
  7650. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7651. continue;
  7652. }
  7653. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7654. /**************************************************************
  7655. * *
  7656. * We have a successfull I/O completion packet. *
  7657. * Return the status and data length then put down *
  7658. * another read at the Next uSN on the journal. *
  7659. * *
  7660. **************************************************************/
  7661. Jbuff->WStatus = WStatus;
  7662. Jbuff->DataLength = IoSize;
  7663. //
  7664. // Update next USN in VME and send the journal buffer out for processing.
  7665. //
  7666. NextJrnlReadPoint = *(USN *)(Jbuff->DataBuffer);
  7667. if (NextJrnlReadPoint < pVme->JrnlReadPoint) {
  7668. DPRINT2(0, "USN error: Next < Previous, Next %08x %08x, Prev: %08x %08x\n",
  7669. PRINTQUAD(NextJrnlReadPoint), PRINTQUAD(pVme->JrnlReadPoint));
  7670. WStatus = ERROR_INVALID_DATA;
  7671. goto STOP_JOURNAL_IO;
  7672. }
  7673. pVme->JrnlReadPoint = NextJrnlReadPoint;
  7674. DPRINT1(5, "Next Usn is: %08x %08x\n", PRINTQUAD(pVme->JrnlReadPoint));
  7675. //DPRINT2(5, "jb: tu %08x (len: %d)\n", Jbuff, Jbuff->DataLength);
  7676. FrsRtlInsertTailQueue(&JournalProcessQueue, &Jbuff->ListEntry);
  7677. //
  7678. // If the read request failed for some reason (e.g. ERROR_NOT_FOUND)
  7679. // let USN processing figure it out and start I/O back up as appropriate.
  7680. //
  7681. if (!WIN_SUCCESS(WStatus)) {
  7682. pVme->IoActive = FALSE;
  7683. continue;
  7684. }
  7685. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  7686. START_NEXT_READ:
  7687. //
  7688. // Get a free buffer and start another read on the journal.
  7689. //
  7690. WStatus = JrnlUnPauseVolume(pVme, NULL, TRUE);
  7691. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7692. //
  7693. // Check for abort and cancel all I/O.
  7694. //
  7695. if (WStatus == ERROR_REQUEST_ABORTED) {
  7696. pVme = NULL;
  7697. DPRINT(0, "JournalFreeQueue Abort. Stopping all journal I/O\n");
  7698. goto STOP_JOURNAL_IO;
  7699. }
  7700. //
  7701. // If the response is success or busy then we can expect to see a
  7702. // buffer come through the port.
  7703. //
  7704. if (!WIN_SUCCESS(WStatus) && (WStatus != ERROR_BUSY)) {
  7705. goto STOP_JOURNAL_IO;
  7706. }
  7707. continue;
  7708. STOP_JOURNAL_IO:
  7709. //
  7710. // Test if stopping I/O on just one volume.
  7711. //
  7712. if (pVme != NULL) {
  7713. FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
  7714. //
  7715. // We should send a cmd packet to the journal process queue since
  7716. // that is the point where all pending journal buffers are completed.
  7717. //
  7718. SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_STOPPED);
  7719. if (!CancelIo(pVme->VolumeHandle)) {
  7720. DPRINT_WS(0, "ERROR - Cancel Io;", GetLastError());
  7721. }
  7722. VmeDeactivate(&VolumeMonitorQueue, pVme, WStatus);
  7723. SetEvent(pVme->Event);
  7724. FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
  7725. continue;
  7726. }
  7727. //
  7728. // Stop all I/O on all volume journals.
  7729. //
  7730. StoppedOne = FALSE;
  7731. ForEachListEntry(&VolumeMonitorQueue, VOLUME_MONITOR_ENTRY, ListEntry,
  7732. //
  7733. // The loop iterator pE is of type VOLUME_MONITOR_ENTRY.
  7734. //
  7735. if (pE->JournalState != JRNL_STATE_STOPPED) {
  7736. StoppedOne = TRUE;
  7737. SET_JOURNAL_AND_REPLICA_STATE(pE, JRNL_STATE_STOPPED);
  7738. if (!CancelIo(pE->VolumeHandle)) {
  7739. DPRINT_WS(0, "ERROR - Cancel Io;", GetLastError());
  7740. }
  7741. }
  7742. VmeDeactivate(&VolumeMonitorQueue, pE, WStatus);
  7743. SetEvent(pE->Event);
  7744. );
  7745. if (!StoppedOne && (JbuffOverlap == NULL)) {
  7746. //
  7747. // We didn't stop anything and nothing came thru the port.
  7748. // Must be hung.
  7749. //
  7750. DPRINT(0, "ERROR - Readjournalthread hung. Killing thread\n");
  7751. JournalReadThreadHandle = NULL;
  7752. ExitThread(WStatus);
  7753. }
  7754. } // end of while()
  7755. if (KillJournalThreads) {
  7756. //
  7757. // Terminate the thread.
  7758. //
  7759. DPRINT(4, "Readjournalthread Terminating.\n");
  7760. JournalReadThreadHandle = NULL;
  7761. ExitThread(ERROR_SUCCESS);
  7762. }
  7763. goto WAIT_FOR_WORK;
  7764. //
  7765. // Get exception status.
  7766. //
  7767. } except (EXCEPTION_EXECUTE_HANDLER) {
  7768. GET_EXCEPTION_CODE(WStatus);
  7769. }
  7770. } finally {
  7771. if (WIN_SUCCESS(WStatus)) {
  7772. if (AbnormalTermination()) {
  7773. WStatus = ERROR_OPERATION_ABORTED;
  7774. }
  7775. }
  7776. DPRINT_WS(0, "Read Journal Thread finally.", WStatus);
  7777. //
  7778. // Trigger FRS shutdown if we terminated abnormally.
  7779. //
  7780. if (!WIN_SUCCESS(WStatus) && (WStatus != ERROR_PROCESS_ABORTED)) {
  7781. JournalReadThreadHandle = NULL;
  7782. DPRINT(0, "Readjournalthread terminated abnormally, forcing service shutdown.\n");
  7783. FrsIsShuttingDown = TRUE;
  7784. SetEvent(ShutDownEvent);
  7785. }
  7786. }
  7787. return WStatus;
  7788. }
  7789. ULONG
  7790. JrnlGetEndOfJournal(
  7791. IN PVOLUME_MONITOR_ENTRY pVme,
  7792. OUT USN *EndOfJournal
  7793. )
  7794. /*++
  7795. Routine Description:
  7796. Get the address of the end of the USN Journal. This is used for starting
  7797. a new replica set at the end of the journal. The replica tree starts out
  7798. empty so there is no need to read through several megabytes of
  7799. USN records. It is also used to find the end of the journal before
  7800. recovery starts.
  7801. Arguments:
  7802. pVme - The Volume Monitor struct to initialize. It provides the volume
  7803. handle.
  7804. EndOfJournal - Returned USN of the end of the Journal or 0.
  7805. Return Value:
  7806. Win32 status.
  7807. --*/
  7808. {
  7809. #undef DEBSUB
  7810. #define DEBSUB "JrnlGetEndOfJournal:"
  7811. USN_JOURNAL_DATA UsnJrnlData;
  7812. DWORD WStatus;
  7813. ULONG BytesReturned = 0;
  7814. *EndOfJournal = QUADZERO;
  7815. //
  7816. // The following call returns:
  7817. //
  7818. // UsnJournalID Current Instance of Journal
  7819. // FirstUsn First position that can be read from journal
  7820. // NextUsn Next position that will be written to the journal
  7821. // LowestValidUsn First record that was written into the journal for
  7822. // this journal instance. It is possible that enumerating
  7823. // the files on disk will return a USN lower than this
  7824. // value. This indicates that the journal has been
  7825. // restamped since the last USN was written for this file.
  7826. // It means that the file may have been changed and
  7827. // journal data was lost.
  7828. // MaxUsn The largest change USN the journal will support.
  7829. // MaximumSize
  7830. // AllocationDelta
  7831. //
  7832. if (!DeviceIoControl(pVme->VolumeHandle,
  7833. FSCTL_QUERY_USN_JOURNAL,
  7834. NULL, 0,
  7835. &UsnJrnlData, sizeof(UsnJrnlData),
  7836. &BytesReturned, NULL)) {
  7837. WStatus = GetLastError();
  7838. DPRINT_WS(0, "Error from FSCTL_QUERY_USN_JOURNAL", WStatus);
  7839. if (WStatus == ERROR_NOT_READY) {
  7840. //
  7841. // Volume is being dismounted.
  7842. //
  7843. } else
  7844. if (WStatus == ERROR_BAD_COMMAND) {
  7845. //
  7846. // NT status was INVALID_DEVICE_STATE.
  7847. //
  7848. } else
  7849. if (WStatus == ERROR_INVALID_PARAMETER) {
  7850. //
  7851. // Bad Handle.
  7852. //
  7853. } else
  7854. if (WStatus == ERROR_JOURNAL_DELETE_IN_PROGRESS) {
  7855. //
  7856. // Journal being deleted.
  7857. //
  7858. } else
  7859. if (WStatus == ERROR_JOURNAL_NOT_ACTIVE) {
  7860. //
  7861. // Journal ???.
  7862. //
  7863. }
  7864. return WStatus;
  7865. }
  7866. if (BytesReturned != sizeof(UsnJrnlData)) {
  7867. //
  7868. // Unexpected result return.
  7869. //
  7870. return ERROR_JOURNAL_NOT_ACTIVE;
  7871. }
  7872. DPRINT1(4, ":S: EOJ from jrnl query %08x %08x\n", PRINTQUAD(UsnJrnlData.NextUsn));
  7873. //
  7874. // Return the next read point for the journal.
  7875. //
  7876. *EndOfJournal = UsnJrnlData.NextUsn;
  7877. return ERROR_SUCCESS;
  7878. }
  7879. ULONG
  7880. JrnlEnumerateFilterTreeBU(
  7881. PGENERIC_HASH_TABLE Table,
  7882. PFILTER_TABLE_ENTRY FilterEntry,
  7883. PJRNL_FILTER_ENUM_ROUTINE Function,
  7884. PVOID Context
  7885. )
  7886. /*++
  7887. Routine Description:
  7888. This routine walks through the entries in the Volume filter table connected
  7889. by the child list starting with the FilterEntry provided. The traversal
  7890. is bottom up. At each node the function provided is called with the
  7891. entry address and the context pointer.
  7892. It is assumed that the caller has acquired the Filter Table Child list
  7893. lock for the Replica set being traversed.
  7894. Before calling the function with an entry we increment the ref count.
  7895. The Called function must DECREMENT the ref count (or delete the entry).
  7896. Arguments:
  7897. Table - The context of the Hash Table to enumerate.
  7898. FilterEntry - The Filter Entry node to start at.
  7899. Function - The function to call for each entry in the subtree. It is of
  7900. of type PJRNL_FILTER_ENUM_ROUTINE. Return FALSE to abort the
  7901. enumeration else true.
  7902. Context - A context ptr to pass through to the Function.
  7903. Return Value:
  7904. The status code from the argument function.
  7905. --*/
  7906. {
  7907. #undef DEBSUB
  7908. #define DEBSUB "JrnlEnumerateFilterTreeBU:"
  7909. PLIST_ENTRY ListHead;
  7910. ULONG WStatus;
  7911. //
  7912. // Check for no entries in tree.
  7913. //
  7914. if (FilterEntry == NULL) {
  7915. return ERROR_SUCCESS;
  7916. }
  7917. INCREMENT_FILTER_REF_COUNT(FilterEntry);
  7918. ListHead = &FilterEntry->ChildHead;
  7919. ForEachSimpleListEntry(ListHead, FILTER_TABLE_ENTRY, ChildEntry,
  7920. //
  7921. // pE is of type PFILTER_TABLE_ENTRY.
  7922. //
  7923. if (!IsListEmpty(&pE->ChildHead)) {
  7924. //
  7925. // Recurse on the child's list head.
  7926. //
  7927. WStatus = JrnlEnumerateFilterTreeBU(Table, pE, Function, Context);
  7928. } else {
  7929. //
  7930. // Apply the function to the node.
  7931. // The function could remove the node from the list but the list macro
  7932. // has captured the Flink so the traversal can continue.
  7933. //
  7934. INCREMENT_FILTER_REF_COUNT(pE);
  7935. WStatus = (Function)(Table, pE, Context);
  7936. }
  7937. if (!WIN_SUCCESS(WStatus)) {
  7938. goto RETURN;
  7939. }
  7940. );
  7941. WStatus = (Function)(Table, FilterEntry, Context);
  7942. RETURN:
  7943. return WStatus;
  7944. }
  7945. ULONG
  7946. JrnlEnumerateFilterTreeTD(
  7947. PGENERIC_HASH_TABLE Table,
  7948. PFILTER_TABLE_ENTRY FilterEntry,
  7949. PJRNL_FILTER_ENUM_ROUTINE Function,
  7950. PVOID Context
  7951. )
  7952. /*++
  7953. Routine Description:
  7954. This routine walks through the entries in the Volume filter table connected
  7955. by the child list starting with the FilterEntry provided. The traversal
  7956. is Top Down. At each node the function provided is called with the
  7957. entry address and the context pointer.
  7958. It is assumed that the caller has acquired the Filter Table Child list
  7959. lock for the Replica set being traversed.
  7960. Before calling the function with an entry we increment the ref count.
  7961. The Called function must DECREMENT the ref count (or delete the entry).
  7962. Arguments:
  7963. Table - The context of the Hash Table to enumerate.
  7964. FilterEntry - The Filter Entry node to start at.
  7965. Function - The function to call for each entry in the subtree. It is of
  7966. of type PJRNL_FILTER_ENUM_ROUTINE. Return FALSE to abort the
  7967. enumeration else true.
  7968. Context - A context ptr to pass through to the Function.
  7969. Return Value:
  7970. The status code from the argument function.
  7971. --*/
  7972. {
  7973. #undef DEBSUB
  7974. #define DEBSUB "JrnlEnumerateFilterTreeTD:"
  7975. PLIST_ENTRY ListHead;
  7976. ULONG WStatus;
  7977. //
  7978. // Check for no entries in tree.
  7979. //
  7980. if (FilterEntry == NULL) {
  7981. return ERROR_SUCCESS;
  7982. }
  7983. //
  7984. // Apply the function to the root node.
  7985. // The function could remove the node from the table but not from the list
  7986. // since our caller has the child list replica lock. Bump the ref count
  7987. // to keep the memory from being freed.
  7988. //
  7989. INCREMENT_FILTER_REF_COUNT(FilterEntry);
  7990. WStatus = (Function)(Table, FilterEntry, Context);
  7991. if (!WIN_SUCCESS(WStatus)) {
  7992. goto RETURN;
  7993. }
  7994. //
  7995. // Warning: If the function above deletes the node the following ref
  7996. // is invalid. This should not be a problem because deletes should only
  7997. // be done bottom up.
  7998. //
  7999. ListHead = &FilterEntry->ChildHead;
  8000. ForEachSimpleListEntry(ListHead, FILTER_TABLE_ENTRY, ChildEntry,
  8001. //
  8002. // pE is of type PFILTER_TABLE_ENTRY.
  8003. //
  8004. //
  8005. // Apply the function to each child node.
  8006. // The function could remove the node from the list but the list macro
  8007. // has captured the Flink so the traversal can continue.
  8008. //
  8009. if (!IsListEmpty(&pE->ChildHead)) {
  8010. //
  8011. // Recurse on the child's list head.
  8012. //
  8013. WStatus = JrnlEnumerateFilterTreeTD(Table, pE, Function, Context);
  8014. } else {
  8015. INCREMENT_FILTER_REF_COUNT(pE);
  8016. WStatus = (Function)(Table, pE, Context);
  8017. }
  8018. if (!WIN_SUCCESS(WStatus)) {
  8019. goto RETURN;
  8020. }
  8021. );
  8022. WStatus = ERROR_SUCCESS;
  8023. //
  8024. // Done with this Root node so decrement the ref count which could
  8025. // cause it to be deleted.
  8026. //
  8027. RETURN:
  8028. return WStatus;
  8029. }
  8030. VOID
  8031. JrnlHashEntryFree(
  8032. PGENERIC_HASH_TABLE Table,
  8033. PVOID Buffer
  8034. )
  8035. /*++
  8036. Routine Description:
  8037. Free the memory pointed to by Buffer.
  8038. Arguments:
  8039. Table -- ptr to a hash table struct (has heap handle).
  8040. Buffer -- ptr to buffer to free.
  8041. Return Value:
  8042. None.
  8043. --*/
  8044. {
  8045. #undef DEBSUB
  8046. #define DEBSUB "JrnlHashEntryFree:"
  8047. FrsFreeType(Buffer);
  8048. }
  8049. BOOL
  8050. JrnlCompareFid(
  8051. PVOID Buf1,
  8052. PVOID Buf2,
  8053. ULONG Length
  8054. )
  8055. /*++
  8056. Routine Description:
  8057. Compare two keys for equality.
  8058. Arguments:
  8059. Buf1 -- ptr to key value 1.
  8060. Buf1 -- ptr to key value 2.
  8061. Length -- should be 8 bytes.
  8062. Return Value:
  8063. TRUE if they match.
  8064. --*/
  8065. {
  8066. #undef DEBSUB
  8067. #define DEBSUB "JrnlCompareFid:"
  8068. if (!ValueIsMultOf4(Buf1)) {
  8069. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8070. Buf1, Length, *(PULONG)Buf1);
  8071. FRS_ASSERT(ValueIsMultOf4(Buf1));
  8072. return 0xFFFFFFFF;
  8073. }
  8074. if (!ValueIsMultOf4(Buf2)) {
  8075. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8076. Buf2, Length, *(PULONG)Buf2);
  8077. FRS_ASSERT(ValueIsMultOf4(Buf2));
  8078. return 0xFFFFFFFF;
  8079. }
  8080. if (Length != sizeof(ULONGLONG)) {
  8081. DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
  8082. FRS_ASSERT(Length == sizeof(LONGLONG));
  8083. return 0xFFFFFFFF;
  8084. }
  8085. return RtlEqualMemory(Buf1, Buf2, sizeof(ULONGLONG));
  8086. }
  8087. ULONG
  8088. JrnlHashCalcFid (
  8089. PVOID Buf,
  8090. ULONG Length
  8091. )
  8092. /*++
  8093. Routine Description:
  8094. Calculate a hash value on an NTFS file ID for the journal filter table.
  8095. Arguments:
  8096. Buf -- ptr to a file ID.
  8097. Length -- should be 8 bytes.
  8098. Return Value:
  8099. 32 bit hash value.
  8100. --*/
  8101. {
  8102. #undef DEBSUB
  8103. #define DEBSUB "JrnlHashCalcFid:"
  8104. PULONG pUL = (PULONG) Buf;
  8105. if (!ValueIsMultOf4(pUL)) {
  8106. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8107. pUL, Length, *pUL);
  8108. FRS_ASSERT(ValueIsMultOf4(pUL));
  8109. return 0xFFFFFFFF;
  8110. }
  8111. if (Length != sizeof(LONGLONG)) {
  8112. DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
  8113. FRS_ASSERT(Length == sizeof(LONGLONG));
  8114. return 0xFFFFFFFF;
  8115. }
  8116. return HASH_FID(pUL, 0x80000000);
  8117. }
  8118. ULONG
  8119. NoHashBuiltin (
  8120. PVOID Buf,
  8121. ULONG Length
  8122. )
  8123. /*++
  8124. Routine Description:
  8125. No-op function for hash tables that use an external function to
  8126. do hash calculations. It returns the low 4 bytes of the quadword.
  8127. Arguments:
  8128. Buf -- ptr to a file ID.
  8129. Length -- should be 8 bytes.
  8130. Return Value:
  8131. 32 bit hash value.
  8132. --*/
  8133. {
  8134. #undef DEBSUB
  8135. #define DEBSUB "NoHashBuiltin:"
  8136. PULONG pUL = (PULONG) Buf;
  8137. if (!ValueIsMultOf4(pUL)) {
  8138. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8139. pUL, Length, *pUL);
  8140. FRS_ASSERT(ValueIsMultOf4(pUL));
  8141. return 0xFFFFFFFF;
  8142. }
  8143. if (Length != sizeof(LONGLONG)) {
  8144. DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
  8145. FRS_ASSERT(Length == sizeof(LONGLONG));
  8146. return 0xFFFFFFFF;
  8147. }
  8148. return (*pUL & (ULONG) 0x7FFFFFFF);
  8149. }
  8150. BOOL
  8151. JrnlCompareGuid(
  8152. PVOID Buf1,
  8153. PVOID Buf2,
  8154. ULONG Length
  8155. )
  8156. /*++
  8157. Routine Description:
  8158. Compare two keys for equality.
  8159. Arguments:
  8160. Buf1 -- ptr to key value 1.
  8161. Buf1 -- ptr to key value 2.
  8162. Length -- should be 16 bytes.
  8163. Return Value:
  8164. TRUE if they match.
  8165. --*/
  8166. {
  8167. #undef DEBSUB
  8168. #define DEBSUB "JrnlCompareGuid:"
  8169. if (!ValueIsMultOf4(Buf1)) {
  8170. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8171. Buf1, Length, *(PULONG)Buf1);
  8172. FRS_ASSERT(ValueIsMultOf4(Buf1));
  8173. return 0xFFFFFFFF;
  8174. }
  8175. if (!ValueIsMultOf4(Buf2)) {
  8176. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8177. Buf2, Length, *(PULONG)Buf2);
  8178. FRS_ASSERT(ValueIsMultOf4(Buf2));
  8179. return 0xFFFFFFFF;
  8180. }
  8181. if (Length != sizeof(GUID)) {
  8182. DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
  8183. FRS_ASSERT(Length == sizeof(GUID));
  8184. return 0xFFFFFFFF;
  8185. }
  8186. return RtlEqualMemory(Buf1, Buf2, sizeof(GUID));
  8187. }
  8188. ULONG
  8189. JrnlHashCalcGuid (
  8190. PVOID Buf,
  8191. ULONG Length
  8192. )
  8193. /*++
  8194. Routine Description:
  8195. Calculate a hash value for a Guid.
  8196. From \nt\private\rpc\runtime\mtrt\uuidsup.hxx
  8197. This is the "true" OSF DCE format for Uuids. We use this
  8198. when generating Uuids. The NodeId is faked on systems w/o
  8199. a netcard.
  8200. typedef struct _RPC_UUID_GENERATE
  8201. {
  8202. unsigned long TimeLow; // 100 ns units
  8203. unsigned short TimeMid;
  8204. unsigned short TimeHiAndVersion;
  8205. unsigned char ClockSeqHiAndReserved;
  8206. unsigned char ClockSeqLow;
  8207. unsigned char NodeId[6]; // constant
  8208. } RPC_UUID_GENERATE;
  8209. TimeLow wraps every 6.55ms and is mostly zero.
  8210. Not quite true since GUIDs are allocated
  8211. in time based blocks and then successive GUIDS are created by
  8212. bumping the TimeLow by one until the block is consumed.
  8213. Arguments:
  8214. Buf -- ptr to a Guid.
  8215. Length -- should be 16 bytes.
  8216. Return Value:
  8217. 32 bit hash value.
  8218. --*/
  8219. {
  8220. #undef DEBSUB
  8221. #define DEBSUB "JrnlHashCalcGuid:"
  8222. PULONG pUL = (PULONG) Buf;
  8223. PUSHORT pUS = (PUSHORT) Buf;
  8224. if (!ValueIsMultOf4(pUL)) {
  8225. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8226. pUL, Length, *pUL);
  8227. FRS_ASSERT(ValueIsMultOf4(pUL));
  8228. return 0xFFFFFFFF;
  8229. }
  8230. if (Length != sizeof(GUID)) {
  8231. DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
  8232. FRS_ASSERT(Length == sizeof(GUID));
  8233. return 0xFFFFFFFF;
  8234. }
  8235. //
  8236. // Calc hash based on the time since the rest of it is eseentially constant.
  8237. //
  8238. return (ULONG) (pUS[0] ^ pUS[1] ^ pUS[2]);
  8239. }
  8240. ULONG
  8241. JrnlHashCalcUsn (
  8242. PVOID Buf,
  8243. ULONG Length
  8244. )
  8245. /*++
  8246. Routine Description:
  8247. Calculate a hash value on an NTFS USN Journal Index.
  8248. Arguments:
  8249. Buf -- ptr to a file ID.
  8250. Length -- should be 8 bytes.
  8251. Return Value:
  8252. 32 bit hash value.
  8253. --*/
  8254. {
  8255. #undef DEBSUB
  8256. #define DEBSUB "JrnlHashCalcUsn:"
  8257. ULONG Value, HighPart, LowPart;
  8258. if (!ValueIsMultOf4(Buf)) {
  8259. DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
  8260. Buf, Length, *(PULONG)Buf);
  8261. FRS_ASSERT(ValueIsMultOf4(Buf));
  8262. return 0xFFFFFFFF;
  8263. }
  8264. if (Length != sizeof(LONGLONG)) {
  8265. DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
  8266. FRS_ASSERT(Length == sizeof(LONGLONG));
  8267. return 0xFFFFFFFF;
  8268. }
  8269. LowPart = *(PULONG) Buf;
  8270. HighPart = *(PULONG)( (PCHAR) Buf + 4 );
  8271. //
  8272. // USNs are quadword offsets so shift the low part an extra 3 bits.
  8273. //
  8274. Value = (HighPart >> 16) + HighPart + (LowPart >> 19) + (LowPart >> 3);
  8275. return Value;
  8276. }
  8277. VOID
  8278. CalcHashFidAndName(
  8279. IN PUNICODE_STRING Name,
  8280. IN PULONGLONG Fid,
  8281. OUT PULONGLONG HashValue
  8282. )
  8283. /*++
  8284. Routine Description:
  8285. This routine forms a 32 bit hash of the name and File ID args.
  8286. It returns this in the low 32 bits of HashValue. The upper 32 bits are zero.
  8287. Note: If there is room at the end of the Unicode String buffer for the Name,
  8288. code below will add a NULL for printing.
  8289. Arguments:
  8290. Name - The filename to hash.
  8291. Fid - The FID to hash.
  8292. HashValue - The resulting quadword hash value.
  8293. Return Value:
  8294. Not used
  8295. --*/
  8296. {
  8297. #undef DEBSUB
  8298. #define DEBSUB "CalcHashFidAndName:"
  8299. PUSHORT p;
  8300. ULONG NameHash = 0;
  8301. ULONG Shift = 0;
  8302. ULONG FidHash;
  8303. ULONG NChars, MaxNChars;
  8304. PULONG pUL;
  8305. FRS_ASSERT( Name != NULL );
  8306. FRS_ASSERT( Fid != NULL );
  8307. FRS_ASSERT( ValueIsMultOf2(Name->Buffer) );
  8308. FRS_ASSERT( ValueIsMultOf2(Name->Length) );
  8309. FRS_ASSERT( Name->Length != 0 );
  8310. FRS_ASSERT( ValueIsMultOf8(Fid) );
  8311. NChars = Name->Length / sizeof(WCHAR);
  8312. //
  8313. // Combine each unicode character into the hash value, shifting 4 bits
  8314. // each time. Start at the end of the name so file names with different
  8315. // type codes will hash to different table offsets.
  8316. //
  8317. for( p = Name->Buffer + NChars - 1;
  8318. p >= Name->Buffer;
  8319. p-- ) {
  8320. NameHash = NameHash ^ (((ULONG)*p) << Shift);
  8321. Shift = (Shift < 16) ? Shift + 4 : 0;
  8322. }
  8323. pUL = (ULONG *) Fid;
  8324. FidHash = (ULONG) HASH_FID(pUL, 0x80000000);
  8325. if (FidHash == 0) {
  8326. DPRINT(4, "Warning - FidHash is zero.\n");
  8327. }
  8328. *HashValue = (ULONGLONG) (NameHash + FidHash);
  8329. if (*HashValue == 0) {
  8330. DPRINT(0, "Error - HashValue is zero.\n");
  8331. }
  8332. //
  8333. // Make sure the FileName has a unicode null at the end before we print it. This is
  8334. //
  8335. MaxNChars = Name->MaximumLength / sizeof(WCHAR);
  8336. if (Name->Buffer[NChars-1] != UNICODE_NULL) {
  8337. if (NChars >= MaxNChars) {
  8338. //
  8339. // No NULL at the end of the name and no room to add one.
  8340. //
  8341. DPRINT4(4, "++ HV: %08x, Hfid: %08x, Fid: %08x %08x, Hnam: %08x, Name: cannot print\n",
  8342. (NameHash+FidHash), FidHash, PRINTQUAD(*Fid), NameHash);
  8343. return;
  8344. }
  8345. Name->Buffer[NChars] = UNICODE_NULL;
  8346. }
  8347. DPRINT5(4, "++ HV: %08x, Hfid: %08x, Fid: %08x %08x, Hnam: %08x, Name: %ws\n",
  8348. (NameHash+FidHash), FidHash, PRINTQUAD(*Fid), NameHash, Name->Buffer);
  8349. }
  8350. VOID
  8351. JrnlFilterPrintJacket(
  8352. PGENERIC_HASH_TABLE Table,
  8353. PVOID Buffer
  8354. )
  8355. {
  8356. JrnlFilterPrint(5, Table, Buffer);
  8357. }
  8358. VOID
  8359. JrnlFilterPrint(
  8360. ULONG PrintSev,
  8361. PGENERIC_HASH_TABLE Table,
  8362. PVOID Buffer
  8363. )
  8364. /*++
  8365. Routine Description:
  8366. print out a hash table entry.
  8367. Arguments:
  8368. Table -- ptr to a hash table struct.
  8369. Buffer -- ptr to entry.
  8370. Return Value:
  8371. none.
  8372. --*/
  8373. {
  8374. #undef DEBSUB
  8375. #define DEBSUB "JrnlFilterPrint:"
  8376. PFILTER_TABLE_ENTRY Entry = (PFILTER_TABLE_ENTRY)Buffer;
  8377. DPRINT3(PrintSev, "Addr: %08x, HashValue: %08x RC: %d\n",
  8378. Entry,
  8379. Entry->HashEntryHeader.HashValue,
  8380. Entry->HashEntryHeader.ReferenceCount);
  8381. DPRINT2(PrintSev, "List Entry - %08x, %08x\n",
  8382. Entry->HashEntryHeader.ListEntry.Flink,
  8383. Entry->HashEntryHeader.ListEntry.Blink);
  8384. DPRINT2(PrintSev, "FileId: %08x %08x, ParentFileId: %08x %08x\n",
  8385. PRINTQUAD(Entry->DFileID), PRINTQUAD(Entry->DParentFileID));
  8386. DPRINT2(PrintSev, "Replica Number: %d, FileName: %ws\n",
  8387. Entry->DReplicaNumber, Entry->UFileName.Buffer);
  8388. DPRINT3(PrintSev, "Sequence Number: %d, Transition Type: %d, FrsVsn: %08x %08x\n",
  8389. READ_FILTER_SEQ_NUMBER(Entry),
  8390. READ_FILTER_TRANS_TYPE(Entry),
  8391. PRINTQUAD(Entry->FrsVsn));
  8392. DPRINT4(PrintSev, "Childhead Entry - %08x, %08x Child Link Entry - %08x, %08x\n",
  8393. Entry->ChildHead.Flink, Entry->ChildHead.Blink,
  8394. Entry->ChildEntry.Flink, Entry->ChildEntry.Blink);
  8395. }
  8396. #undef PrintSev
  8397. VOID
  8398. JrnlChangeOrderPrint(
  8399. PGENERIC_HASH_TABLE Table,
  8400. PVOID Buffer
  8401. )
  8402. /*++
  8403. Routine Description:
  8404. print out a hash table entry.
  8405. Arguments:
  8406. Table -- ptr to a hash table struct. (unused)
  8407. Buffer -- ptr to entry.
  8408. Return Value:
  8409. none.
  8410. --*/
  8411. {
  8412. #undef DEBSUB
  8413. #define DEBSUB "JrnlChangeOrderPrint:"
  8414. FRS_PRINT_TYPE(0, (PCHANGE_ORDER_ENTRY)Buffer);
  8415. }
  8416. VOID
  8417. DumpUsnRecord(
  8418. IN ULONG Severity,
  8419. IN PUSN_RECORD UsnRecord,
  8420. IN ULONG ReplicaNumber,
  8421. IN ULONG LocationCmd,
  8422. IN PCHAR Debsub,
  8423. IN ULONG uLineNo
  8424. )
  8425. /*++
  8426. Routine Description:
  8427. This routine prints out the contents of a NTFS USN Journal Record.
  8428. Arguments:
  8429. Severity -- Severity level for print. (See debug.c, debug.h)
  8430. UsnRecord - The address of the UsnRecord.
  8431. ReplicaNumber - ID number of the replica set
  8432. LocationCmd - Decoded location command for this USN record.
  8433. Debsub -- Name of calling subroutine.
  8434. uLineno -- Line number of caller
  8435. MACRO: DUMP_USN_RECORD, DUMP_USN_RECORD2
  8436. Return Value:
  8437. none.
  8438. --*/
  8439. {
  8440. #undef DEBSUB
  8441. #define DEBSUB "DumpUsnRecord:"
  8442. ULONG Len;
  8443. CHAR TimeString[32];
  8444. CHAR Tstr1[200];
  8445. WCHAR FName[MAX_PATH+1];
  8446. CHAR FlagBuf[120];
  8447. //
  8448. // Don't print this
  8449. //
  8450. if (!DoDebug(Severity, Debsub)) {
  8451. return;
  8452. }
  8453. //
  8454. // Get hh:mm:ss.
  8455. //
  8456. FileTimeToStringClockTime((PFILETIME) &UsnRecord->TimeStamp, TimeString);
  8457. //
  8458. // Put file name in a buffer so we can put a null at the end of it.
  8459. //
  8460. Len = min((ULONG)UsnRecord->FileNameLength, MAX_PATH);
  8461. CopyMemory(FName, UsnRecord->FileName, Len);
  8462. FName[Len/2] = UNICODE_NULL;
  8463. //
  8464. // Build the trace record.
  8465. //
  8466. _snprintf(Tstr1, sizeof(Tstr1),
  8467. ":U: %08x %d Fid %08x %08x PFid %08x %08x At %08x Sr %04x %s %7s %ws",
  8468. (ULONG)UsnRecord->Usn,
  8469. ReplicaNumber,
  8470. PRINTQUAD(UsnRecord->FileReferenceNumber),
  8471. PRINTQUAD(UsnRecord->ParentFileReferenceNumber),
  8472. UsnRecord->FileAttributes,
  8473. UsnRecord->SourceInfo,
  8474. TimeString,
  8475. CoLocationNames[LocationCmd],
  8476. FName
  8477. );
  8478. Tstr1[sizeof(Tstr1)-1] = '\0';
  8479. DebPrint(Severity, "%s\n", Debsub, uLineNo, Tstr1);
  8480. //
  8481. // Output reason string on sep line.
  8482. //
  8483. FrsFlagsToStr(UsnRecord->Reason, UsnReasonNameTable, sizeof(FlagBuf), FlagBuf);
  8484. _snprintf(Tstr1, sizeof(Tstr1),
  8485. ":U: Fid %08x %08x Reason %08x Flags [%s]",
  8486. PRINTQUAD(UsnRecord->FileReferenceNumber),
  8487. UsnRecord->Reason,
  8488. FlagBuf
  8489. );
  8490. Tstr1[sizeof(Tstr1)-1] = '\0';
  8491. DebPrint(Severity, "%s\n", Debsub, uLineNo, Tstr1);
  8492. //
  8493. // Output file attributes string on sep line.
  8494. //
  8495. FrsFlagsToStr(UsnRecord->FileAttributes, FileAttrFlagNameTable, sizeof(FlagBuf), FlagBuf);
  8496. _snprintf(Tstr1, sizeof(Tstr1),
  8497. ":U: Fid %08x %08x Attrs %08x Flags [%s]",
  8498. PRINTQUAD(UsnRecord->FileReferenceNumber),
  8499. UsnRecord->FileAttributes,
  8500. FlagBuf
  8501. );
  8502. Tstr1[sizeof(Tstr1)-1] = '\0';
  8503. DebPrint(Severity, "%s\n", Debsub, uLineNo, Tstr1);
  8504. }
  8505. VOID
  8506. JrnlDumpVmeFilterTable(
  8507. VOID
  8508. )
  8509. /*++
  8510. Routine Description:
  8511. Dump the VME filter table
  8512. Arguments:
  8513. None.
  8514. Return Value:
  8515. None.
  8516. --*/
  8517. {
  8518. #undef DEBSUB
  8519. #define DEBSUB "JrnlDumpVmeFilterTable:"
  8520. ForEachListEntry( &VolumeMonitorStopQueue, VOLUME_MONITOR_ENTRY, ListEntry,
  8521. DPRINT(4, "\n");
  8522. DPRINT1(4, "==== start of VME Filter table dump for %ws ===========\n", pE->FSVolInfo.VolumeLabel);
  8523. DPRINT(4, "\n");
  8524. if (pE->FilterTable != NULL) {
  8525. // GHT_DUMP_TABLE(5, pE->FilterTable);
  8526. NOTHING;
  8527. } else {
  8528. DPRINT(4, "Filter table freed\n");
  8529. }
  8530. DPRINT(4, "\n");
  8531. DPRINT(4, "============== end of Vme Filter table dump ============\n");
  8532. DPRINT(4, "\n");
  8533. );
  8534. }
  8535. /*++
  8536. The two tables below describe all the possible outcomes of a directory
  8537. rename operation. The case numbers in parens are further described below.
  8538. As directory changes appear in the USN data stream the filter table for
  8539. the volume is updated immediately, even in the case of subtree renames.
  8540. This allows us to accurately filter subsequent USN records and associate
  8541. them with the correct replica set.
  8542. (R.S. means Replica Set)
  8543. Parent
  8544. FileID FileID
  8545. Filter Entry Filter Entry Interpretation : Action
  8546. ------------ ------------ -------------- ------
  8547. Absent Absent Wasn't in R.S., Still Isn't: Skip
  8548. (1) Absent Present Wasn't in R.S., Now Is : Create entry (MOVEIN)
  8549. (2) Present Absent Was in R.S. , Now Isn't : MOVEOUT
  8550. Present Present Was in R.S. , Still Is : Eval Further
  8551. The last case above requires further evaluation to determine if the
  8552. directory has moved from one directory to another or from one replica
  8553. set to another.
  8554. FileID Compare R.S. compare
  8555. between Filter Between File
  8556. Entry & USn Rec and Parent Interpretation : Action
  8557. -------------- ----------- -------------- ------
  8558. (3) Same Parent Same R.S. File stayed in same Dir.: Check Name
  8559. Same Parent Diff. R.S. Error, shouldn't happen :
  8560. (4) Diff. Parent Same R.S. Ren to diff dir in R.S. : Update Parent Fid (MOVEDIR)
  8561. (5) Diff. Parent Diff. R.S. Rename to diff R.s. : MOVERS
  8562. For directory renames there are 5 cases to consider:
  8563. 1. MOVEIN - Rename of a directory into a replica set. The filter table lookup
  8564. failed on the FID but the parent FID is in the table. We add an entry for
  8565. this DIR to the filter table. The update process must enumerate the
  8566. subtree on disk and evaluate each file for inclusion into the tree,
  8567. updating the Filter table as it goes. We may see file operations several
  8568. levels down from the rename point and have no entry in the Filter Table so
  8569. we pitch those records. The sub-tree enumeration process must handle this
  8570. as it incorporates each file into the IDTable.
  8571. 2. MOVEOUT - Parent FID change to a dir OUTSIDE of any replica set on the
  8572. volume. This is a delete of an entire subtree in the Replica set. We
  8573. enumerate the subtree bottom-up, sending dir level change orders to the
  8574. update process as we delete the filter table entries.
  8575. 3. Name change only. The Parent FID in the USN record matches the
  8576. Parent FID in the Filter entry for the directory.
  8577. Update the name in the filter entry.
  8578. 4. MOVEDIR - Parent FID in USN record is different from the parent FID in the
  8579. Filter entry so this is a rename to a dir in the SAME replica set.
  8580. Update the parent FID in the filter enty and Filename too.
  8581. 5. MOVERS - The Parent FID in the USN record is associated with a directory
  8582. in a DIFFERENT replica set on the volume. Update the parent FID, the
  8583. replica ptr, and name in the filter entry. This is a move of an entire
  8584. subtree from one replica set to another. We enumerate the subtree
  8585. top-down, sending dir level change orders to the update process as we
  8586. update the replica set information in the filter table entries.
  8587. --*/
  8588. /*
  8589. Note: doc: - update this description
  8590. Removing a sub-tree from a replica set
  8591. This is a multi-stage process that occurs when a directory is renamed out of
  8592. the replica set. This is managed by the update process.
  8593. 1. The Journal Process has marked the filter entry for the renamed directory
  8594. as DELETED. This ensures that operations on any files below this directory
  8595. are filtered out by the Journal process. A change order describing the subtree
  8596. delete is queued to the Replica Change Order process queue.
  8597. 2. When the update process encounters the subtree delete change order it walks
  8598. thru the subtree (using either the directory entries in the Filter Hash Table or
  8599. the Replica IDTable) breadthfirst from the leaves of the subtree to the subtree
  8600. to the subtree root. For each file or directory it tombstones the entry in the
  8601. IDTable and builds a delete change order to send to its outbound partners. In
  8602. addtion it deletes the entries from the volume filter table and the DIRTable as
  8603. it progresses. If a crash or shutdown request ocurrs during this operation
  8604. the process continues with the remaining entries when it resumes.
  8605. 3. The operation completes when the root of the sub-tree is processed.
  8606. Adding a sub-tree (X) to a replica set
  8607. This occurs when directory X is renamed into a replica set. It is managed by
  8608. the Update Process.
  8609. 1. The Journal Process creates a Filter entry for the sub-tree root (X) and
  8610. queues a change order to the update process. At this point the Journal process
  8611. has no knowledge of what is beneath this directory. If it sees an operation on
  8612. a direct child of X it builds a change order and queues it to the update
  8613. process. In addition if it sees a directory create/delete or rename operation
  8614. on a direct child of X it increments sequence number in the Filter Table Entry
  8615. for X and creates a new Filter Table entry as appropriate.
  8616. 2. The update process takes the "sub-tree add" change order and processes the
  8617. sub-tree starting at X, enumerating the subtree down to the leaves in a breadth
  8618. first order. For each entry in the subtree it creates an IDTable entry for the
  8619. file or directory. If a directory it also creates a DIRTable entry and adds an
  8620. entry to the Filter Table. As each Filter Table entry is made the Journal
  8621. subsystem will begin sending change orders to the update process for any new
  8622. file operations under the directory. For each directory, the filter table entry
  8623. is made first, if it doesn't already exist. then the update process enumerates
  8624. the directory contents. If new direct children are created while the
  8625. enumeration is in process change orders are queued to the update process. If
  8626. the USN on the change order is less than or equal to the USN saved when the file
  8627. was first processed then the change order is discarded. Otherwise the change
  8628. occurred after the point when the file was processed.
  8629. It is possible for the update process to receive update or delete
  8630. change orders for files that are not yet present in the IDTable because the
  8631. enumeration hasn't reached them yet. For files or dirs created "behind" the
  8632. enumeration process point, change orders are queued that will pick them up.
  8633. The first problem is solved by having the update process stop processing
  8634. further change orders on this replica set until the enumeration is complete.
  8635. */
  8636. #if 0
  8637. /*
  8638. Recovery mode processing for the NTFS journal.
  8639. Objective: When FRS or the system crashes we have lost the write filter
  8640. the journal code uses to filter out FRS related writes to files.
  8641. We need to reliably identify those USN records that were caused by FRS
  8642. so we don't propagate out a file that was being installed at the time
  8643. of the crash. Such a file will undoubtedly be corrupt and will get sent
  8644. to every member of the replica set.
  8645. In the case of system crashes, NTFS inserts close records into the journal
  8646. for any files that were open at the time of the crash. NTFS marks those
  8647. USN records with a flag that indicates they were written at startup. In
  8648. addtion a user app can force a close record to be written to the journal
  8649. through an FSCTL call. If this happens and no futher modification is made
  8650. to the file then no close record will be written by NTFS when the last handle
  8651. on the file is closed or at startup.
  8652. In the case of FRS service crashes or externally generated process Kills
  8653. FRS will fail to perform a clean shutdown. As each change order is processed
  8654. it is marked as work in process. When the change order either retires or
  8655. goes into a retry state the work in process flag is cleared. From this
  8656. information we can determine those files that may have had FRS generated
  8657. writes in process when the service died.
  8658. The flow is as follows:
  8659. At replica startup scan the inbound log and build a hash table (PendingCOTable)
  8660. of all entries with the following information kept with each entry:
  8661. File FID
  8662. File GUID
  8663. Local/Remote CO flag
  8664. CO Inprocess flag
  8665. Usn index of most recent USN record that contributed to the local CO.
  8666. There could be multiple COs pending for the same file. OR the state of
  8667. the Inprocess flags and save the state of the most recent CO's local/rmt flag.
  8668. The PendingCoTable continues to exist after startup so we can evaluate
  8669. dependencies between newly arrived COs and COs in a retry state in the inlog.
  8670. In addition:
  8671. The Largest NTFS USN for any local inbound CO is saved in RecoveryUsnStart.
  8672. The current end of the USN journal is saved in RecoveryUsnEnd.
  8673. Both are saved in the Replica struct.
  8674. ULONGLONG FileReferenceNumber;
  8675. ULONGLONG ParentFileReferenceNumber;
  8676. USN Usn;
  8677. LARGE_INTEGER TimeStamp;
  8678. */
  8679. Start USN read at Replica->RecoveryUsnStart.
  8680. if (UsnRecord->Usn < Replica->RecoveryUsnEnd) {
  8681. if (IsNtfsRecoveryClose(UsnRecord)) {
  8682. //
  8683. // assume that all the file data may not have been written out
  8684. // so the file may be corrupt.
  8685. //
  8686. PendingCo = InPendingCoTable(Replica->PendingCoTable,
  8687. &UsnRecord->FileReferenceNumber);
  8688. if ((PendingCo == NULL) || (PendingCo->LocalCo)) {
  8689. //
  8690. // The file was being written locally at the time of the crash.
  8691. // It is probably corrupt.
  8692. // Create a file refresh change order and send it to one of our
  8693. // inbound partners to get their version of the file.
  8694. // Note: This request is queued so the first inbound partner to
  8695. // join will get it.
  8696. // Note: Since we are reading after RecoveryUsnStart the USN
  8697. // should not be less than what we see in the inlog.
  8698. //
  8699. FRS_ASSERT(UsnRecord->Usn >= PendingCo->Usn);
  8700. RequestRefreshCo(Replica, &UsnRecord->FileReferenceNumber);
  8701. goto GET_NEXT_USN_RECORD;
  8702. } else {
  8703. //
  8704. // There is a pending remote CO for this file. It will install
  8705. // a new copy of the file.
  8706. //
  8707. // Note: if there are multiple remote COs in the process queue
  8708. // the last one may not be the one that is finally accepted.
  8709. // But we need to be sure that none of the local COs that are pending
  8710. // are allowed propagate.
  8711. //
  8712. // If this CO was in process at the time of the crash and the
  8713. // CO was already propagated to the outlog, the staging file may
  8714. // be corrupted. Delete the CO from the outlog and queue a
  8715. // refresh request to the inbound partner.
  8716. //
  8717. // Note: We could still have a corrupted file. If it was locally
  8718. // changed and we processed the CO, updating the IDTable and
  8719. // inserting the CO in the outlog but a crash still resulted
  8720. // in not all dirty data pages being flushed.
  8721. // WHEN WE GEN THE LOCAL STAGE FILE CAN WE FORCE A FLUSH?
  8722. }
  8723. if (IsFileFrsStagingFile(UsnRecord)) {
  8724. //
  8725. // This is an FRS staging file. It may be corrupt.
  8726. // Delete it and regenerate it by setting a new start state in
  8727. // the related CO. (CO Guid is derived from the name of the file).
  8728. // There may not be a CO for this file if the inlog record has
  8729. // been deleted. There may still be a CO in the outlog though so
  8730. // just delete the staging file, forcing it to be regenerated on
  8731. // demand from the local file.
  8732. //
  8733. // If the local file is suspect then we need to refresh it from
  8734. // an inbound partner so delete the CO in the outlog and let the
  8735. // refresh CO PROPAGATE as needed.
  8736. //
  8737. // Note that the IDTable entry may already have been updated because
  8738. // this CO retired. That would cause the refresh CO to fail to
  8739. // be accepted. Put some state in the refresh CO so when it comes
  8740. // back if that state matches the state in the IDTable entry then
  8741. // we know to accepr the refresh CO regardless of other reconcile
  8742. // info. If however another local or remote CO has updated the
  8743. // file in the interim then the refresh CO is stale and should be
  8744. // discarded.
  8745. //
  8746. SetPendingCoState(SeqNum, PendingCo->LocalCo ? IBCO_STAGING_REQUESTED :
  8747. IBCO_FETCH_REQUESTED);
  8748. }
  8749. goto GET_NEXT_USN_RECORD;
  8750. } else {
  8751. //
  8752. // Read IDTable entry for this file and get the FileUsn.
  8753. // This is the USN associated with the most recent operation on the
  8754. // file that we have handled.
  8755. //
  8756. if (UsnRecord->Usn <= IDTableRec->FileUsn) {
  8757. //
  8758. // This USN record is for an operation that occurred
  8759. // prior to the last action processed related to the file.
  8760. //
  8761. goto GET_NEXT_USN_RECORD;
  8762. } else {
  8763. //
  8764. // This USN record could not have come from FRS because if it did and there was no entry for
  8765. // a change order on the file in the Inbound Log then the LastFileUsn check above would have caught it.
  8766. // This is true because the inbound log record is only deleted after the file is updated and the LastFileUsn
  8767. // is saved in the Jet record for the file.
  8768. // Even if there is a change order pending in the Inbound log, FRS could not have started processing it
  8769. // because the USN Record is not marked as written by NTFS at recovery which would be the case
  8770. // if FRS had been in the middle of an update when the system crashed. Therefore,
  8771. //
  8772. //this is not an FRS generated USN record so process the USN record normally.
  8773. }
  8774. }
  8775. }
  8776. /*
  8777. This solution solves the problem of FRS getting part way thru a file update
  8778. when the system crashes. It must not process the USN record because then it
  8779. would propagate a corrupted file out to all the other members. It also has
  8780. the nice property of refreshing a file from another partner that a user was
  8781. writing at the time of the crash. The User has lost their changes but at
  8782. least the file is back in an uncorrupted state.
  8783. */
  8784. #endif