You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
11424 lines
382 KiB
11424 lines
382 KiB
/*++
|
|
|
|
Copyright (c) 1997-1999 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
journal.c
|
|
|
|
Abstract:
|
|
|
|
This module contains routines to process the NTFS Volume Journal for the
|
|
File Replication service. It uses a single thread with an I/O completion
|
|
port to post reads to all volume journals we need to monitor.
|
|
|
|
As USN buffers are filled they a queued to a JournalProcessQueue for
|
|
further processing. The Journal Read Thread gets a free buffer from
|
|
the free list and posts another read to the volume journal.
|
|
|
|
A thread pool processes the USN buffers from the JournalprocessQueue.
|
|
|
|
Author:
|
|
|
|
David A. Orbits (davidor) 6-Apr-1997
|
|
|
|
Environment:
|
|
|
|
User Mode Service
|
|
|
|
Revision History:
|
|
|
|
// JOURNAL RECORD FORMAT
|
|
//
|
|
// The initial Major.Minor version of the Usn record will be 1.0.
|
|
// In general, the MinorVersion may be changed if fields are added
|
|
// to this structure in such a way that the previous version of the
|
|
// software can still correctly the fields it knows about. The
|
|
// MajorVersion should only be changed if the previous version of
|
|
// any software using this structure would incorrectly handle new
|
|
// records due to structure changes.
|
|
//
|
|
// see \nt\public\sdk\inc\ntioapi.h for the USN_RECORD declaration.
|
|
//
|
|
|
|
#define USN_REASON_DATA_OVERWRITE (0x00000001)
|
|
#define USN_REASON_DATA_EXTEND (0x00000002)
|
|
#define USN_REASON_DATA_TRUNCATION (0x00000004)
|
|
|
|
#define USN_REASON_NAMED_DATA_OVERWRITE (0x00000010)
|
|
#define USN_REASON_NAMED_DATA_EXTEND (0x00000020)
|
|
#define USN_REASON_NAMED_DATA_TRUNCATION (0x00000040)
|
|
|
|
#define USN_REASON_FILE_CREATE (0x00000100)
|
|
#define USN_REASON_FILE_DELETE (0x00000200)
|
|
#define USN_REASON_EA_CHANGE (0x00000400)
|
|
#define USN_REASON_SECURITY_CHANGE (0x00000800)
|
|
|
|
#define USN_REASON_RENAME_OLD_NAME (0x00001000) // rename
|
|
#define USN_REASON_RENAME_NEW_NAME (0x00002000)
|
|
#define USN_REASON_INDEXABLE_CHANGE (0x00004000)
|
|
#define USN_REASON_BASIC_INFO_CHANGE (0x00008000)
|
|
|
|
#define USN_REASON_HARD_LINK_CHANGE (0x00010000)
|
|
#define USN_REASON_COMPRESSION_CHANGE (0x00020000)
|
|
#define USN_REASON_ENCRYPTION_CHANGE (0x00040000)
|
|
#define USN_REASON_OBJECT_ID_CHANGE (0x00080000)
|
|
|
|
#define USN_REASON_REPARSE_POINT_CHANGE (0x00100000)
|
|
#define USN_REASON_STREAM_CHANGE (0x00200000) // named streame cre, del or ren.
|
|
|
|
#define USN_REASON_CLOSE (0x80000000)
|
|
|
|
--*/
|
|
|
|
|
|
#define UNICODE 1
|
|
#define _UNICODE 1
|
|
|
|
|
|
|
|
#include <ntreppch.h>
|
|
#pragma hdrstop
|
|
|
|
#undef DEBSUB
|
|
#define DEBSUB "journal:"
|
|
#include <frs.h>
|
|
#include <genhash.h>
|
|
#include <tablefcn.h>
|
|
#include <eventlog.h>
|
|
#include <perrepsr.h>
|
|
|
|
#pragma warning( disable:4102) // unreferenced label
|
|
|
|
//
|
|
// The default for Journal Max Size now comes from the registry.
|
|
#define JRNL_DEFAULT_ALLOC_DELTA (1*1024*1024)
|
|
#define JRNL_USN_SAVE_POINT_INTERVAL (16*1024)
|
|
|
|
#define JRNL_CLEAN_WRITE_FILTER_INTERVAL (60*1000) /* once a minute */
|
|
|
|
#define NumberOfJounalBuffers 3
|
|
|
|
#define FRS_CANCEL_JOURNAL_READ 0xFFFFFFFF
|
|
#define FRS_PAUSE_JOURNAL_READ 0xFFFFFFF0
|
|
|
|
|
|
//
|
|
// Every 'VSN_SAVE_INTERVAL' VSNs that are handed out, save the state in the
|
|
// config record. On restart we take the largest value and add
|
|
// 2*(VSN_SAVE_INTERVAL+1) to it so if a crash occurred we ensure that it
|
|
// never goes backwards.
|
|
//
|
|
// A Vsn value of 0 means there is no Vsn. This convention is required
|
|
// by FrsPendingInVVector().
|
|
//
|
|
// MUST BE Power of 2.
|
|
#define VSN_SAVE_INTERVAL 0xFF
|
|
#define VSN_RESTART_INCREMENT (2*(VSN_SAVE_INTERVAL+1))
|
|
|
|
|
|
//
|
|
// Deactivate the Volume Monitor Entry by setting IoActive False, pulling
|
|
// it off the _Queue and queueing it to the VolumeMonitorStopQueue.
|
|
// Also store an error status. This code assumes you have already ACQUIRED
|
|
// THE LOCK ON the VolumeMonitorQueue.
|
|
//
|
|
#define VmeDeactivate(_Queue, _pVme, _WStatus) \
|
|
FrsRtlRemoveEntryQueueLock(_Queue, &_pVme->ListEntry); \
|
|
_pVme->IoActive = FALSE; \
|
|
_pVme->WStatus = _WStatus; \
|
|
/*_pVme->ActiveReplicas -= 1; */ \
|
|
DPRINT2(4, "++ vmedeactivate -- onto stop queue %ws (%08x)\n", \
|
|
_pVme->FSVolInfo.VolumeLabel, _pVme); \
|
|
FrsRtlInsertTailQueue(&VolumeMonitorStopQueue, &_pVme->ListEntry); \
|
|
ReleaseVmeRef(_pVme);
|
|
|
|
|
|
//
|
|
// The Journal free buffer queue holds the free buffers for journal reads.
|
|
//
|
|
FRS_QUEUE JournalFreeQueue;
|
|
|
|
//
|
|
// The Journal process queue holds the list of journal buffers with
|
|
// data to process.
|
|
//
|
|
FRS_QUEUE JournalProcessQueue;
|
|
|
|
//
|
|
// The Journal I/O completion port. We keep a read outstanding on each
|
|
// NTFS volume monitored.
|
|
//
|
|
HANDLE JournalCompletionPort;
|
|
|
|
//
|
|
// The handle to the Journal read thread.
|
|
//
|
|
HANDLE JournalReadThreadHandle = NULL;
|
|
|
|
//
|
|
// Set this flag to stop any further issuing of journal reads.
|
|
//
|
|
volatile BOOL KillJournalThreads = FALSE;
|
|
|
|
//
|
|
// This is the volume monitor queue. The Journal read thread waits until
|
|
// this queue goes non-empty before it waits on the completion port. This
|
|
// way it knows the completion port exists without having to poll.
|
|
//
|
|
FRS_QUEUE VolumeMonitorQueue;
|
|
|
|
//
|
|
// When I/O is Stoped on a given journal the Journal read thread places
|
|
// the volume monitor entry on the Stop queue.
|
|
//
|
|
FRS_QUEUE VolumeMonitorStopQueue;
|
|
|
|
//
|
|
// This is the control queue for all the volume monitor entry change order
|
|
// queues.
|
|
//
|
|
FRS_QUEUE FrsVolumeLayerCOList;
|
|
FRS_QUEUE FrsVolumeLayerCOQueue;
|
|
|
|
//
|
|
// This is the expected version number from the USN journal.
|
|
//
|
|
USHORT ConfigUsnMajorVersion = 2;
|
|
|
|
//
|
|
// This is the count of outstanding journal read requests.
|
|
//
|
|
ULONG JournalActiveIoRequests = 0;
|
|
|
|
//
|
|
// Change order delay in aging cache. (milliseconds)
|
|
//
|
|
ULONG ChangeOrderAgingDelay;
|
|
|
|
|
|
//
|
|
// This lock is held by JrnlSetReplicaState() when moving a replica
|
|
// between lists.
|
|
//
|
|
CRITICAL_SECTION JrnlReplicaStateLock;
|
|
|
|
//
|
|
// Lock to protect the child lists in the Filter Table. (must be pwr of 2)
|
|
// Instead of paying the overhead of having one per node we just use an array
|
|
// to help reduce contention. We use the ReplicaNumber masked by the lock
|
|
// table size as the index.
|
|
//
|
|
// Acquire the lock on the ReplicaSet Filter table Child List before
|
|
// inserting or removing a child from the list.
|
|
//
|
|
CRITICAL_SECTION JrnlFilterTableChildLock[NUMBER_FILTER_TABLE_CHILD_LOCKS];
|
|
|
|
//
|
|
// The list of all Replica Structs active, stopped and faulted.
|
|
//
|
|
extern FRS_QUEUE ReplicaListHead;
|
|
extern FRS_QUEUE ReplicaStoppedListHead;
|
|
extern FRS_QUEUE ReplicaFaultListHead;
|
|
|
|
//
|
|
// This is used to init our new value for FrsVsn.
|
|
//
|
|
extern ULONGLONG MaxPartnerClockSkew;
|
|
|
|
//
|
|
// Global sequence number. Inited here with first Vme VSN.
|
|
//
|
|
extern CRITICAL_SECTION GlobSeqNumLock;
|
|
extern ULONGLONG GlobSeqNum;
|
|
|
|
//
|
|
// The table below describes what list the Replica struct should be on for
|
|
// a given state as well as the state name.
|
|
//
|
|
REPLICA_SERVICE_STATE ReplicaServiceState[] = {
|
|
{NULL, "ALLOCATED"},
|
|
{&ReplicaListHead, "INITIALIZING"},
|
|
{&ReplicaListHead, "STARTING"},
|
|
{&ReplicaListHead, "ACTIVE"},
|
|
{&ReplicaListHead, "PAUSE1"},
|
|
{&ReplicaListHead, "PAUSING (2)"},
|
|
{&ReplicaListHead, "PAUSED"},
|
|
{&ReplicaListHead, "STOPPING"},
|
|
{&ReplicaStoppedListHead, "STOPPED"},
|
|
{&ReplicaFaultListHead, "ERROR"},
|
|
{&ReplicaFaultListHead, "JRNL_WRAP_ERROR"},
|
|
{NULL, "REPLICA_DELETED"},
|
|
{&ReplicaFaultListHead, "MISMATCHED_VOLUME_SERIAL_NO"},
|
|
{&ReplicaFaultListHead, "MISMATCHED_REPLICA_ROOT_OBJECT_ID"},
|
|
{&ReplicaFaultListHead, "MISMATCHED_REPLICA_ROOT_FILE_ID"},
|
|
{&ReplicaFaultListHead, "MISMATCHED_JOURNAL_ID"}
|
|
};
|
|
|
|
|
|
//
|
|
// The following struct is used to encapsulate the context of a change
|
|
// order request so it can be passed as a context parameter in an
|
|
// enumerated call.
|
|
//
|
|
typedef struct _CHANGE_ORDER_PARAMETERS_ {
|
|
|
|
PREPLICA OriginalReplica; // Original Replica Set
|
|
PREPLICA NewReplica; // The New Replica set in the case of a rename.
|
|
|
|
ULONGLONG NewParentFid; // The new parent FID in case of a rename.
|
|
ULONG NewLocationCmd; // MovDir, MovRs, ...
|
|
|
|
PUSN_RECORD UsnRecord; // Usn Record that triggered the change order
|
|
// creation (i.e. the operation on the root of the subtree).
|
|
|
|
PFILTER_TABLE_ENTRY OrigParentFilterEntry; // Original parent filter entry of root filter entry
|
|
PFILTER_TABLE_ENTRY NewParentFilterEntry; // Current/New parent filter entry of root filter entry
|
|
|
|
} CHANGE_ORDER_PARAMETERS, *PCHANGE_ORDER_PARAMETERS;
|
|
|
|
|
|
typedef struct _OP_FIELDS_ {
|
|
unsigned Op1 : 4;
|
|
unsigned Op2 : 4;
|
|
unsigned Op3 : 4;
|
|
unsigned Op4 : 4;
|
|
unsigned Op5 : 4;
|
|
unsigned Op6 : 4;
|
|
unsigned Op7 : 4;
|
|
unsigned Op8 : 4;
|
|
} OP_FIELDS, *POP_FIELDS;
|
|
|
|
|
|
typedef struct _CO_LOCATION_CONTROL_CMD_ {
|
|
union {
|
|
OP_FIELDS OpFields;
|
|
ULONG UlongOpFields;
|
|
} u1;
|
|
} CO_LOCATION_CONTROL_CMD;
|
|
|
|
#define OpInval 0 // Invalid op (only check for Op1, else done).
|
|
#define OpEvap 1 // Evaporate the change order
|
|
#define OpNRs 2 // update New Replica Set and New Directory.
|
|
#define OpNDir 3 // Update New Directory
|
|
#define OpNSt 4 // Update New State stored in next nibble.
|
|
|
|
#define NSCre CO_LOCATION_CREATE // Create a File or Dir (New FID Generated)
|
|
#define NSDel CO_LOCATION_DELETE // Delete a file or Dir (FID retired)
|
|
#define NSMovIn CO_LOCATION_MOVEIN // Rename into a R.S.
|
|
#define NSMovIn2 CO_LOCATION_MOVEIN2 // Rename into a R.S. from a prev MOVEOUT
|
|
#define NSMovOut CO_LOCATION_MOVEOUT // Rename out of any R.S.
|
|
#define NSMovRs CO_LOCATION_MOVERS // Rename from one R.S. to another R.S.
|
|
#define NSMovDir CO_LOCATION_MOVEDIR // Rename from one dir to another (Same R.S.)
|
|
#define NSMax CO_LOCATION_NUM_CMD // No prior Location cmd. Prior change
|
|
// Order had a content cmd.
|
|
#define NSNoLocationCmd CO_LOCATION_NO_CMD
|
|
|
|
PCHAR CoLocationNames[]= {"Create" , "Delete", "Movein" , "Movein2",
|
|
"Moveout", "Movers", "MoveDir", "NoCmd"};
|
|
|
|
//
|
|
// The following dispatch table specifies what operations are performed when
|
|
// a second change arrives for a given FID and a prior change order is still
|
|
// pending. The states correspond to the change order location command that
|
|
// is to be executed by the update process. Each entry in the dispatch table
|
|
// is a ULONG composed of up to 8 operation nibbles which are executed in a loop.
|
|
// The operations could evaporate the change order (e.g. a create followed by
|
|
// a delete. The create was pending and the delete came in so just blow off
|
|
// the change order. The operation could update the parent directory or the
|
|
// replica set the directory lives in, or the location command (and thus the
|
|
// state) that is to be performed. The MovIn2 state is not a unique input,
|
|
// rather it is a special state that lets us remember there was a prior MovOut
|
|
// done so if the MovIn2 is followed by a Del or a MovOut we know there is still
|
|
// work to be done in the database so we can't evaporate the change order.
|
|
// See note (a) below.
|
|
//
|
|
|
|
|
|
CO_LOCATION_CONTROL_CMD ChangeOrderLocationStateTable[NSMax+1][NSMax] = {
|
|
|
|
// Followed by Second Op On Same Fid
|
|
//
|
|
// Cre Del MovIn MovIn2 MovOut MovRs MovDir
|
|
|
|
// First
|
|
// Op On
|
|
// Fid
|
|
|
|
//Cre
|
|
{{0}, {OpEvap}, {0}, {0}, {OpEvap }, {OpNRs}, {OpNDir}},
|
|
|
|
//Del
|
|
{{0}, {0}, {0}, {0}, {0}, {0}, {0}},
|
|
|
|
//MovIn
|
|
{{0}, {OpEvap}, {0}, {0}, {OpEvap }, {OpNRs}, {OpNDir}},
|
|
|
|
//MovIn2(a)
|
|
{{0}, {OpNSt,NSDel}, {0}, {0}, {OpNSt,NSMovOut}, {OpNRs}, {OpNDir}},
|
|
|
|
//MovOut
|
|
{{0}, {0}, {OpNRs,OpNSt,NSMovIn2},
|
|
{0}, {0}, {0}, {0}},
|
|
|
|
//MovRs
|
|
{{0}, {OpNSt,NSDel}, {0}, {0}, {OpNSt,NSMovOut}, {OpNRs}, {OpNDir}},
|
|
|
|
//MovDir
|
|
{{0}, {OpNSt,NSDel}, {0}, {0}, {OpNSt,NSMovOut}, {OpNRs,OpNSt,NSMovRs}, {OpNDir}},
|
|
//<NONE>
|
|
{{OpNRs, OpNSt,NSCre},
|
|
{OpNSt,NSDel}, {OpNRs,OpNSt,NSMovIn},
|
|
{0}, {OpNSt,NSMovOut}, {OpNRs,OpNSt,NSMovRs}, {OpNDir,OpNSt,NSMovDir}}
|
|
|
|
};
|
|
|
|
// (a) The MovIn2 state is artificially introduced to deal with the sequence
|
|
// of MovOut followed by a MovIn. There are two problems here. One is that
|
|
// many changes could have happened to the file or dir while it was outside
|
|
// the R.S. since we were not monitoring it. Consequently the update process
|
|
// must do a complete evaluation of the the file/dir properties so we don't
|
|
// fail to replicate some change. The second problem is that in the normal
|
|
// case a MovIn followed by either a delete or a MovOut results in evaporating
|
|
// the change order. However if a MovOut has occurred in the past followed
|
|
// by a MovIn we cannot assume that the file or Dir was never in the R.S.
|
|
// to begin with. Consider the sequence of MovOut, MovIn, Del. Without the
|
|
// MovIn2 state the MovIn followed by Del would result in evaporating the
|
|
// change order so the file or dir would be still left in the database.
|
|
// By transitioning to the MovIn2 state we go to the Del state when we see
|
|
// the Delete so we can remove the entry from the database. Similarly once
|
|
// in the MovIn2 state if we see a MovOut then we go to the MovOut state
|
|
// rather than evaporating the change order since we still have to update
|
|
// the database with the MovOut.
|
|
//
|
|
// Note: think about a similar problem where the file filter string changes
|
|
// and a file is touched so a create CO is generated. If the file is
|
|
// then deleted the CO is evaporated. This means that a del CO will
|
|
// not be propagated so the file is deleted everywhere. Do we need
|
|
// a Cre2 CO analogous to the MovIn2 state?
|
|
|
|
typedef
|
|
ULONG
|
|
(NTAPI *PJRNL_FILTER_ENUM_ROUTINE) (
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer,
|
|
PVOID Context
|
|
);
|
|
|
|
LONG
|
|
JrnlGetFileCoLocationCmd(
|
|
PVOLUME_MONITOR_ENTRY pVme,
|
|
IN PUSN_RECORD UsnRecord,
|
|
OUT PFILTER_TABLE_ENTRY *PrevParentFilterEntry,
|
|
OUT PFILTER_TABLE_ENTRY *CurrParentFilterEntry
|
|
);
|
|
|
|
ULONG
|
|
JrnlEnterFileChangeOrder(
|
|
IN PUSN_RECORD UsnRecord,
|
|
IN ULONG LocationCmd,
|
|
IN PFILTER_TABLE_ENTRY OldParentFilterEntry,
|
|
IN PFILTER_TABLE_ENTRY NewParentFilterEntry
|
|
);
|
|
|
|
PCHANGE_ORDER_ENTRY
|
|
JrnlCreateCo(
|
|
IN PREPLICA Replica,
|
|
IN PULONGLONG Fid,
|
|
IN PULONGLONG ParentFid,
|
|
IN PUSN_RECORD UsnRecord,
|
|
IN BOOL IsDirectory,
|
|
IN PWCHAR FileName,
|
|
IN USHORT Length
|
|
);
|
|
|
|
BOOL
|
|
JrnlMergeCoTest(
|
|
IN PVOLUME_MONITOR_ENTRY pVme,
|
|
IN PUNICODE_STRING UFileName,
|
|
IN PULONGLONG ParentFid,
|
|
IN ULONG StreamLastMergeSeqNum
|
|
);
|
|
|
|
VOID
|
|
JrnlUpdateNst(
|
|
IN PVOLUME_MONITOR_ENTRY pVme,
|
|
IN PUNICODE_STRING UFileName,
|
|
IN PULONGLONG ParentFid,
|
|
IN ULONG StreamSequenceNumber
|
|
);
|
|
|
|
VOID
|
|
JrnlFilterUpdate(
|
|
IN PREPLICA CurrentReplica,
|
|
IN PUSN_RECORD UsnRecord,
|
|
IN ULONG LocationCmd,
|
|
IN PFILTER_TABLE_ENTRY OldParentFilterEntry,
|
|
IN PFILTER_TABLE_ENTRY NewParentFilterEntry
|
|
);
|
|
|
|
ULONG
|
|
JrnlProcessSubTree(
|
|
IN PFILTER_TABLE_ENTRY RootFilterEntry,
|
|
IN PCHANGE_ORDER_PARAMETERS Cop
|
|
);
|
|
|
|
ULONG
|
|
JrnlProcessSubTreeEntry(
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer,
|
|
PVOID Context
|
|
);
|
|
|
|
ULONG
|
|
JrnlUpdateChangeOrder(
|
|
IN PCHANGE_ORDER_ENTRY ChangeOrder,
|
|
IN PREPLICA NewReplica,
|
|
IN ULONGLONG NewParentFid,
|
|
IN ULONG NewLocationCmd,
|
|
IN PUSN_RECORD UsnRecord
|
|
);
|
|
|
|
ULONG
|
|
JrnlAddFilterEntryFromUsn(
|
|
IN PREPLICA Replica,
|
|
IN PUSN_RECORD UsnRecord,
|
|
OUT PFILTER_TABLE_ENTRY *RetFilterEntry
|
|
);
|
|
|
|
ULONG
|
|
JrnlAddFilterEntry(
|
|
IN PREPLICA Replica,
|
|
IN PFILTER_TABLE_ENTRY FilterEntry,
|
|
OUT PFILTER_TABLE_ENTRY *RetFilterEntry,
|
|
IN BOOL Replace
|
|
);
|
|
|
|
ULONG
|
|
JrnlDeleteDirFilterEntry(
|
|
IN PGENERIC_HASH_TABLE FilterTable,
|
|
IN PULONGLONG DFileID,
|
|
IN PFILTER_TABLE_ENTRY ArgFilterEntry
|
|
);
|
|
|
|
ULONG
|
|
JrnlGetPathAndLevel(
|
|
IN PGENERIC_HASH_TABLE FilterTable,
|
|
IN PLONGLONG StartDirFileID,
|
|
OUT PULONG Level
|
|
);
|
|
|
|
ULONG
|
|
JrnlCommand(
|
|
PCOMMAND_PACKET CmdPkt
|
|
);
|
|
|
|
ULONG
|
|
JrnlPrepareService1(
|
|
PREPLICA Replica
|
|
);
|
|
|
|
ULONG
|
|
JrnlPrepareService2(
|
|
IN PTHREAD_CTX ThreadCtx,
|
|
IN PREPLICA Replica
|
|
);
|
|
|
|
ULONG
|
|
JrnlInitOneReplicaSet(
|
|
PCOMMAND_PACKET CmdPkt
|
|
);
|
|
|
|
ULONG
|
|
JrnlCleanOutReplicaSet(
|
|
PREPLICA Replica
|
|
);
|
|
|
|
JET_ERR
|
|
JrnlInsertParentEntry(
|
|
IN PTHREAD_CTX ThreadCtx,
|
|
IN PTABLE_CTX TableCtx,
|
|
IN PVOID Record,
|
|
IN PVOID Context
|
|
);
|
|
|
|
ULONG_PTR
|
|
JrnlFilterLinkChild (
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer,
|
|
PVOID Context
|
|
);
|
|
|
|
ULONG_PTR
|
|
JrnlFilterLinkChildNoError (
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer,
|
|
PVOID Context
|
|
);
|
|
|
|
ULONG
|
|
JrnlFilterUnlinkChild (
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer,
|
|
PVOID Context
|
|
);
|
|
|
|
ULONG_PTR
|
|
JrnlFilterGetRoot (
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer,
|
|
PVOID Context
|
|
);
|
|
|
|
ULONG
|
|
JrnlSubTreePrint (
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer,
|
|
PVOID Context
|
|
);
|
|
#if 0
|
|
ULONG
|
|
JrnlCheckStartFailures(
|
|
PFRS_QUEUE Queue
|
|
);
|
|
#endif
|
|
|
|
ULONG
|
|
JrnlOpen(
|
|
IN PREPLICA Replica,
|
|
OUT PVOLUME_MONITOR_ENTRY *pVme,
|
|
PCONFIG_TABLE_RECORD ConfigRecord
|
|
);
|
|
|
|
ULONG
|
|
JrnlSubmitReadThreadRequest(
|
|
IN PVOLUME_MONITOR_ENTRY pVme,
|
|
IN ULONG Request,
|
|
IN ULONG NewState
|
|
);
|
|
|
|
ULONG
|
|
JrnlShutdownSingleReplica(
|
|
IN PREPLICA Replica,
|
|
IN BOOL HaveLock
|
|
);
|
|
|
|
ULONG
|
|
JrnlCloseVme(
|
|
IN PVOLUME_MONITOR_ENTRY pVme
|
|
);
|
|
|
|
ULONG
|
|
JrnlCloseAll(
|
|
VOID
|
|
);
|
|
|
|
ULONG
|
|
JrnlClose(
|
|
IN HANDLE VolumeHandle
|
|
);
|
|
|
|
|
|
DWORD
|
|
WINAPI
|
|
JournalReadThread(
|
|
IN LPVOID Context
|
|
);
|
|
|
|
ULONG
|
|
JrnlGetEndOfJournal(
|
|
IN PVOLUME_MONITOR_ENTRY pVme,
|
|
OUT USN *EndOfJournal
|
|
);
|
|
|
|
NTSTATUS
|
|
FrsIssueJournalAsyncRead(
|
|
IN PJBUFFER Jbuff,
|
|
IN PVOLUME_MONITOR_ENTRY pVme
|
|
);
|
|
|
|
ULONG
|
|
JrnlEnumerateFilterTreeBU(
|
|
PGENERIC_HASH_TABLE Table,
|
|
PFILTER_TABLE_ENTRY FilterEntry,
|
|
PJRNL_FILTER_ENUM_ROUTINE Function,
|
|
PVOID Context
|
|
);
|
|
|
|
ULONG
|
|
JrnlEnumerateFilterTreeTD(
|
|
PGENERIC_HASH_TABLE Table,
|
|
PFILTER_TABLE_ENTRY FilterEntry,
|
|
PJRNL_FILTER_ENUM_ROUTINE Function,
|
|
PVOID Context
|
|
);
|
|
|
|
VOID
|
|
JrnlHashEntryFree(
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer
|
|
);
|
|
|
|
BOOL
|
|
JrnlCompareFid(
|
|
PVOID Buf1,
|
|
PVOID Buf2,
|
|
ULONG Length
|
|
);
|
|
|
|
ULONG
|
|
JrnlHashCalcFid (
|
|
PVOID Buf,
|
|
ULONG Length
|
|
);
|
|
|
|
ULONG
|
|
NoHashBuiltin (
|
|
PVOID Buf,
|
|
ULONG Length
|
|
);
|
|
|
|
BOOL
|
|
JrnlCompareGuid(
|
|
PVOID Buf1,
|
|
PVOID Buf2,
|
|
ULONG Length
|
|
);
|
|
|
|
ULONG
|
|
JrnlHashCalcGuid (
|
|
PVOID Buf,
|
|
ULONG Length
|
|
);
|
|
|
|
ULONG
|
|
JrnlHashCalcUsn (
|
|
PVOID Buf,
|
|
ULONG Length
|
|
);
|
|
|
|
VOID
|
|
CalcHashFidAndName(
|
|
IN PUNICODE_STRING Name,
|
|
IN PULONGLONG Fid,
|
|
OUT PULONGLONG HashValue
|
|
);
|
|
|
|
ULONG
|
|
JrnlCleanWriteFilter(
|
|
PCOMMAND_PACKET CmdPkt
|
|
);
|
|
|
|
ULONG
|
|
JrnlCleanWriteFilterWorker (
|
|
PQHASH_TABLE Table,
|
|
PQHASH_ENTRY BeforeNode,
|
|
PQHASH_ENTRY TargetNode,
|
|
PVOID Context
|
|
);
|
|
|
|
VOID
|
|
JrnlSubmitCleanWriteFilter(
|
|
IN PVOLUME_MONITOR_ENTRY pVme,
|
|
IN ULONG TimeOut
|
|
);
|
|
|
|
#define FRS_JOURNAL_FILTER_PRINT(_Sev_, _Table_, _Buffer_) \
|
|
JrnlFilterPrint(_Sev_, _Table_, _Buffer_)
|
|
#define FRS_JOURNAL_FILTER_PRINT_FUNCTION JrnlFilterPrintJacket
|
|
VOID
|
|
JrnlFilterPrint(
|
|
ULONG PrintSev,
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer
|
|
);
|
|
|
|
VOID
|
|
JrnlFilterPrintJacket(
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer
|
|
);
|
|
|
|
#define FRS_JOURNAL_CHANGE_ORDER_PRINT(_Table_, _Buffer_) \
|
|
JrnlChangeOrderPrint( _Table_, _Buffer_)
|
|
#define FRS_JOURNAL_CHANGE_ORDER_PRINT_FUNCTION JrnlChangeOrderPrint
|
|
VOID
|
|
JrnlChangeOrderPrint(
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer
|
|
);
|
|
|
|
ULONG
|
|
ChgOrdAcceptInitialize(
|
|
VOID
|
|
);
|
|
|
|
VOID
|
|
ChgOrdAcceptShutdown(
|
|
VOID
|
|
);
|
|
|
|
DWORD
|
|
FrsDeleteById(
|
|
IN PWCHAR VolumeName,
|
|
IN PWCHAR Name,
|
|
IN PVOLUME_MONITOR_ENTRY pVme,
|
|
IN PVOID Id,
|
|
IN DWORD IdLen
|
|
);
|
|
|
|
|
|
DWORD
|
|
JournalMonitorInit(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine initializes the NTFS Journal monitor routines and starts
|
|
the JournalReadThread.
|
|
|
|
Arguments:
|
|
|
|
None.
|
|
|
|
Thread Return Value:
|
|
|
|
Win32 status
|
|
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JournalMonitorInit:"
|
|
|
|
ULONG WStatus;
|
|
ULONG ThreadId;
|
|
JET_ERR jerr;
|
|
ULONG i;
|
|
|
|
if (JournalActiveIoRequests != 0) {
|
|
DPRINT1(0, ":S: ERROR - Can't initialize journal with active I/O (%d) in progress.\n",
|
|
JournalActiveIoRequests);
|
|
return ERROR_REQUEST_ABORTED;
|
|
}
|
|
|
|
//
|
|
// No completion port yet.
|
|
//
|
|
FRS_CLOSE(JournalCompletionPort);
|
|
JournalCompletionPort = NULL;
|
|
|
|
//
|
|
// Read change order aging cache delay.
|
|
//
|
|
CfgRegReadDWord(FKC_CO_AGING_DELAY, NULL, 0, &ChangeOrderAgingDelay);
|
|
ChangeOrderAgingDelay *= 1000;
|
|
|
|
//
|
|
// Init the list of volumes we monitor.
|
|
//
|
|
FrsInitializeQueue(&VolumeMonitorQueue, &VolumeMonitorQueue);
|
|
FrsInitializeQueue(&VolumeMonitorStopQueue, &VolumeMonitorStopQueue);
|
|
|
|
//
|
|
// Free list for journal buffers.
|
|
//
|
|
FrsInitializeQueue(&JournalFreeQueue, &JournalFreeQueue);
|
|
|
|
//
|
|
// Locks for the Filter Table Child Lists.
|
|
//
|
|
for (i=0; i<NUMBER_FILTER_TABLE_CHILD_LOCKS; i++) {
|
|
INITIALIZE_CRITICAL_SECTION(&JrnlFilterTableChildLock[i]);
|
|
}
|
|
FrsInitializeQueue(&FrsVolumeLayerCOList, &FrsVolumeLayerCOList);
|
|
FrsInitializeQueue(&FrsVolumeLayerCOQueue, &FrsVolumeLayerCOList);
|
|
|
|
//
|
|
// Wait for the DB to start up. During shutdown, this event is
|
|
// set. Any extraneous commands issued by the journal are
|
|
// subsequently ignored by the database.
|
|
//
|
|
WaitForSingleObject(DataBaseEvent, INFINITE);
|
|
if (FrsIsShuttingDown) {
|
|
return ERROR_PROCESS_ABORTED;
|
|
}
|
|
|
|
//
|
|
// Create a journal read thread. It will wait until an entry is placed
|
|
// on the VolumeMonitorQueue.
|
|
//
|
|
|
|
if (!HANDLE_IS_VALID(JournalReadThreadHandle)) {
|
|
JournalReadThreadHandle = CreateThread(NULL,
|
|
0,
|
|
JournalReadThread,
|
|
(LPVOID) NULL,
|
|
0,
|
|
&ThreadId);
|
|
|
|
if (!HANDLE_IS_VALID(JournalReadThreadHandle)) {
|
|
WStatus = GetLastError();
|
|
DPRINT_WS(0, "Error from CreateThread", WStatus);
|
|
return WStatus;
|
|
}
|
|
|
|
DbgCaptureThreadInfo2(L"JrnlRead", JournalReadThread, ThreadId);
|
|
}
|
|
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
|
|
VOID
|
|
JournalMonitorShutdown(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine releases handles and frees storage for the NTFS Journal
|
|
subsystem.
|
|
|
|
Arguments:
|
|
|
|
None.
|
|
|
|
Thread Return Value:
|
|
|
|
Win32 status
|
|
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JournalMonitorShutdown:"
|
|
|
|
ULONG WStatus;
|
|
JET_ERR jerr;
|
|
ULONG i;
|
|
|
|
DPRINT1(3, ":S: <<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
|
|
//
|
|
// Stop the Change Order Accept thread.
|
|
//
|
|
ChgOrdAcceptShutdown();
|
|
|
|
//
|
|
// Locks for the Filter Table Child Lists.
|
|
//
|
|
for (i=0; i<NUMBER_FILTER_TABLE_CHILD_LOCKS; i++) {
|
|
DeleteCriticalSection(&JrnlFilterTableChildLock[i]);
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ULONG
|
|
JrnlInitOneReplicaSet(
|
|
PCOMMAND_PACKET CmdPkt
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine does all the journal and database initialization for a
|
|
single replica set. It is used to startup a replica set that failed
|
|
to start at service startup or to start a newly created replica set.
|
|
|
|
Note the Journal and database subsystems must be initialized first.
|
|
|
|
The Replica arg must have an initialized config record.
|
|
|
|
Warning - There are no table level locks on the Filter table so only
|
|
one replica set can be initialized at a time on a single volume.
|
|
Actually this might work since the row locks and child link locks should
|
|
be sufficient but it hasn't been tested.
|
|
|
|
The second part of the initialization is done by the database server so
|
|
the journal thread is free to finish processing any pending journal
|
|
buffers for this volume since we have to pause it before we can update
|
|
the filter table.
|
|
|
|
Arguments:
|
|
|
|
CmdPkt - ptr to a cmd packet with a ptr to a replica struct with a
|
|
pre-initialized config record.
|
|
|
|
Thread Return Value:
|
|
|
|
Frs Error Status
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlInitOneReplicaSet:"
|
|
|
|
ULONG FStatus;
|
|
ULONG WStatus;
|
|
PCONFIG_TABLE_RECORD ConfigRecord;
|
|
PREPLICA_THREAD_CTX RtCtx;
|
|
PREPLICA Replica;
|
|
|
|
//
|
|
// Check that the journal subsystem is up.
|
|
//
|
|
if (!HANDLE_IS_VALID(JournalReadThreadHandle)) {
|
|
return FrsErrorNotInitialized;
|
|
}
|
|
|
|
Replica = CmdPkt->Parameters.JournalRequest.Replica;
|
|
|
|
//
|
|
// Phase 1 of journal monitor init. This opens the USN journal on the volume
|
|
// containing the replica set. It allocates the:
|
|
// - volume filter hash table,
|
|
// - parent file ID table,
|
|
// - USN record file name dependency hash table,
|
|
// - USN Write Filter Table,
|
|
// - Active Child dependency hash table,
|
|
// - volume change order list,
|
|
// - volume Change Order Aging table hash table and the
|
|
// - Active Inbound Change Order hash table.
|
|
//
|
|
// If the journal is already open then it returns the pVme for the volume
|
|
// in the Replica struct.
|
|
//
|
|
DPRINT3(4, ":S: Phase 1 for replica %ws, id: %d, (%08x)\n",
|
|
Replica->ReplicaName->Name, Replica->ReplicaNumber, Replica);
|
|
|
|
//
|
|
// Assume its going to work out ok and go do it.
|
|
//
|
|
Replica->FStatus = FrsErrorSuccess;
|
|
WStatus = JrnlPrepareService1(Replica);
|
|
|
|
if (!WIN_SUCCESS(WStatus) || (Replica->pVme == NULL)) {
|
|
DPRINT1_WS(4, "++ Phase 1 for replica %ws Failed;",
|
|
Replica->ReplicaName->Name, WStatus);
|
|
|
|
//
|
|
// add cleanup code, delete vme ...
|
|
//
|
|
if (FRS_SUCCESS(Replica->FStatus)) {
|
|
//
|
|
// Return generic error if no specific error code was provided.
|
|
//
|
|
Replica->FStatus = FrsErrorReplicaPhase1Failed;
|
|
}
|
|
return Replica->FStatus;
|
|
}
|
|
|
|
|
|
ConfigRecord = (PCONFIG_TABLE_RECORD) (Replica->ConfigTable.pDataRecord);
|
|
|
|
//
|
|
// ** WARN ** at this point there is only one Replica Thread
|
|
// context associated with the replica.
|
|
//
|
|
RtCtx = CONTAINING_RECORD(GetListHead(&Replica->ReplicaCtxListHead.ListHead),
|
|
REPLICA_THREAD_CTX,
|
|
ReplicaCtxList);
|
|
|
|
DPRINT3(4, "++ Submit replica tree load cmd for replica %ws, id: %d, (%08x)\n",
|
|
Replica->ReplicaName->Name, Replica->ReplicaNumber, Replica);
|
|
|
|
DPRINT3(4, "++ ConfigRecord: %08x, RtCtx: %08x, path: %ws\n",
|
|
ConfigRecord, RtCtx, ConfigRecord->FSRootPath);
|
|
|
|
//
|
|
// Propagate the command packet on to the DBService to init the
|
|
// replica tables and complete the rest of the initialization.
|
|
//
|
|
DbsPrepareCmdPkt(CmdPkt, // CmdPkt,
|
|
Replica, // Replica,
|
|
CMD_LOAD_ONE_REPLICA_FILE_TREE, // CmdRequest,
|
|
NULL, // TableCtx,
|
|
RtCtx, // CallContext,
|
|
0, // TableType,
|
|
0, // AccessRequest,
|
|
0, // IndexType,
|
|
NULL, // KeyValue,
|
|
0, // KeyValueLength,
|
|
TRUE); // Submit
|
|
|
|
//
|
|
// Phase 1 is done.
|
|
//
|
|
|
|
return FrsErrorSuccess;
|
|
|
|
}
|
|
|
|
|
|
|
|
ULONG_PTR
|
|
JrnlFilterDeleteEntry (
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer,
|
|
PVOID Context
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function is called thru GhtCleanTableByFilter() to delete all the
|
|
Filter table entries for a given Replica Set specified by the
|
|
Context parameter.
|
|
|
|
Arguments:
|
|
|
|
Table - the hash table being enumerated (to lookup parent entry).
|
|
Buffer - a ptr to a FILTER_TABLE_ENTRY
|
|
Context - A pointer to the Replica struct for the replica data added to the
|
|
table.
|
|
|
|
Return Value:
|
|
|
|
True if the entry matches the Replica Context and is to be deleted.
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlFilterDeleteEntry:"
|
|
|
|
PREPLICA Replica = (PREPLICA) Context;
|
|
PFILTER_TABLE_ENTRY FilterEntry = Buffer;
|
|
|
|
return (FilterEntry->Replica == Replica);
|
|
}
|
|
|
|
|
|
|
|
ULONG
|
|
JrnlCleanOutReplicaSet(
|
|
PREPLICA Replica
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine cleans out the filter table and parent file ID table entries
|
|
associated with the given replica set.
|
|
|
|
*NOTE* We assume the caller has paused the journal and there is no
|
|
activity on either the volume FilterTable or the ParentFidTable.
|
|
|
|
Warning - There are no table level locks on the Filter table so only
|
|
one replica set can be cleaned up t a time on a single volume.
|
|
|
|
Arguments:
|
|
|
|
Replica - ptr to replica struct.
|
|
|
|
Thread Return Value:
|
|
|
|
Frs Error Status
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlCleanOutReplicaSet:"
|
|
|
|
PVOLUME_MONITOR_ENTRY pVme = Replica->pVme;
|
|
ULONG Cnt;
|
|
|
|
//
|
|
// Check that the journal subsystem is up.
|
|
//
|
|
if (!HANDLE_IS_VALID(JournalReadThreadHandle)) {
|
|
return FrsErrorNotInitialized;
|
|
}
|
|
|
|
//
|
|
// Scan the table and delete all the filter entries for this replica set.
|
|
//
|
|
Cnt = GhtCleanTableByFilter(pVme->FilterTable, JrnlFilterDeleteEntry, Replica);
|
|
DPRINT1(4, "Total of %d Filter Table entries deleted.\n", Cnt);
|
|
|
|
//
|
|
// Ditto for the parent file ID table.
|
|
//
|
|
QHashDeleteByFlags(pVme->ParentFidTable, Replica->ReplicaNumber);
|
|
|
|
//
|
|
// Note: we could also do this for the name space table by moving the
|
|
// sequence number into the quadword and putting the replica number
|
|
// in flags
|
|
|
|
return FrsErrorSuccess;
|
|
}
|
|
|
|
|
|
DWORD
|
|
WINAPI
|
|
Monitor(
|
|
PFRS_THREAD ThisFrsThreadCtx
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This is the main journal work thread. It processes command packets
|
|
and journal buffer packets off its processing queue.
|
|
|
|
It filters each entry in the USN journal against a filter table for
|
|
the volume to determine if the file in question is part of a replica
|
|
set. It then builds a change order entry to feed the data base and
|
|
the output logs.
|
|
|
|
Note: Perf: If multiple volumes are being monitored, we could create
|
|
additional monitor threads and divide the volumes up among the
|
|
threads. The processing of USN records for a given volume is
|
|
single threaded though because they must be processed in order.
|
|
|
|
Arguments:
|
|
|
|
ThisFrsThreadCtx - A pointer to the FRS_THREAD ctx for this thread.
|
|
|
|
Thread Return Value:
|
|
|
|
ERROR_SUCCESS - Thread terminated normally.
|
|
Other errors from CreatFile, ReadDirectoryChangesW, CreateEvent, ...
|
|
are returned as the thread exit status.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "monitor:"
|
|
|
|
|
|
USN CurrentUsn;
|
|
USN NextUsn;
|
|
USN JournalConsumed;
|
|
ULONGLONG CaptureParentFileID;
|
|
|
|
PWCHAR Pwc;
|
|
DWORD Level;
|
|
ULONG RelativePathLength;
|
|
|
|
ULONG FileAttributes;
|
|
|
|
LONG DataLength;
|
|
PUSN_RECORD UsnRecord;
|
|
PUSN_RECORD OldRenUsnRec;
|
|
PULONGLONG UsnBuffer;
|
|
BOOL SaveFlag;
|
|
|
|
PLIST_ENTRY Entry;
|
|
PJBUFFER Jbuff;
|
|
|
|
NTSTATUS Status;
|
|
ULONG WStatus = ERROR_SUCCESS;
|
|
ULONG GStatus;
|
|
ULONG FStatus;
|
|
|
|
PVOLUME_MONITOR_ENTRY pVme;
|
|
PFRS_NODE_HEADER Header;
|
|
PCONFIG_TABLE_RECORD ConfigRecord;
|
|
PCOMMAND_PACKET CmdPkt;
|
|
PREPLICA Replica;
|
|
BOOL Excluded;
|
|
UNICODE_STRING TempUStr;
|
|
|
|
BOOL IsDirectory;
|
|
ULONG UsnReason;
|
|
ULONG Flags;
|
|
LONG LocationCmd;
|
|
PFILTER_TABLE_ENTRY PrevParentFilterEntry;
|
|
PFILTER_TABLE_ENTRY CurrParentFilterEntry;
|
|
PCXTION Cxtion;
|
|
WCHAR FileName[MAX_PATH + 1];
|
|
PrevParentFilterEntry = NULL;
|
|
CurrParentFilterEntry = NULL;
|
|
|
|
|
|
/******************************************************************************
|
|
*******************************************************************************
|
|
** **
|
|
** **
|
|
** M A I N U S N J O U R N A L P R O C E S S L O O P **
|
|
** **
|
|
** **
|
|
*******************************************************************************
|
|
******************************************************************************/
|
|
|
|
|
|
DPRINT(5, ":S: Journal is starting.\n");
|
|
|
|
//
|
|
// Try-Finally
|
|
//
|
|
try {
|
|
|
|
//
|
|
// Capture exception.
|
|
//
|
|
try {
|
|
|
|
while (TRUE) {
|
|
//
|
|
// Wait on the JournalProcessQueue for a journal buffer.
|
|
//
|
|
Entry = FrsRtlRemoveHeadQueueTimeout(&JournalProcessQueue, 10*1000);
|
|
if (Entry == NULL) {
|
|
WStatus = GetLastError();
|
|
if (WStatus == WAIT_TIMEOUT) {
|
|
//
|
|
// Go look for more work.
|
|
//
|
|
continue;
|
|
}
|
|
|
|
if (WStatus == ERROR_INVALID_HANDLE) {
|
|
DPRINT(4, ":S: JournalProcessQueue is shutdown.\n");
|
|
//
|
|
// The queue has been run down. Close all the journal handles
|
|
// saving the USN to start the next read from. Then close
|
|
// Jet Session and exit.
|
|
//
|
|
WStatus = ERROR_SUCCESS;
|
|
JrnlCloseAll();
|
|
break;
|
|
}
|
|
//
|
|
// Unexpected error from FrsRtlRemoveHeadQueueTimeout
|
|
//
|
|
DPRINT_WS(0, "Error from FrsRtlRemoveHeadQueueTimeout", WStatus);
|
|
JrnlCloseAll();
|
|
break;
|
|
}
|
|
|
|
Header = (PFRS_NODE_HEADER) CONTAINING_RECORD(Entry, COMMAND_PACKET, ListEntry);
|
|
if (Header->Type == COMMAND_PACKET_TYPE) {
|
|
//
|
|
// Process the command packet.
|
|
//
|
|
WStatus = JrnlCommand((PCOMMAND_PACKET)Header);
|
|
continue;
|
|
}
|
|
|
|
|
|
if (Header->Type != JBUFFER_TYPE) {
|
|
//
|
|
// Garbage packet.
|
|
//
|
|
DPRINT2(0, "ERROR - Invalid packet type: %d, size: %d\n",
|
|
Header->Type, Header->Size);
|
|
FRS_ASSERT(!"Jrnl monitor: Invalid packet type");
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////
|
|
// //
|
|
// P R O C E S S J O U R N A L D A T A B U F F E R //
|
|
// //
|
|
///////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
// Increment the Usn Reads Counter
|
|
//
|
|
PM_INC_CTR_SERVICE(PMTotalInst, UsnReads, 1);
|
|
|
|
Jbuff = CONTAINING_RECORD(Entry, JBUFFER, ListEntry);
|
|
//DPRINT2(5, "jb: fu %08x (len: %d)\n",
|
|
// Jbuff, Jbuff->DataLength);
|
|
|
|
pVme = Jbuff->pVme;
|
|
WStatus = Jbuff->WStatus;
|
|
UsnBuffer = Jbuff->DataBuffer;
|
|
DataLength = Jbuff->DataLength;
|
|
|
|
DPRINT1(4, ":U: ***** USN Data for Volume %ws *****\n", pVme->FSVolInfo.VolumeLabel);
|
|
|
|
//
|
|
// Pull out the Next USN
|
|
//
|
|
NextUsn = 0;
|
|
if (DataLength != 0) {
|
|
UsnRecord = (PUSN_RECORD)((PCHAR)UsnBuffer + sizeof(USN));
|
|
DataLength -= sizeof(USN);
|
|
|
|
NextUsn = *(USN *)UsnBuffer;
|
|
DPRINT1(4, "Next Usn will be: %08lx %08lx\n", PRINTQUAD(NextUsn));
|
|
}
|
|
|
|
//
|
|
// Check if I/O is stopped on this journal and throw the buffer away.
|
|
// Could be a pause request.
|
|
//
|
|
if (!pVme->IoActive) {
|
|
CAPTURE_JOURNAL_PROGRESS(pVme, Jbuff->JrnlReadPoint);
|
|
DPRINT1(4, "++ I/O not active on this journal. Freeing buffer. State is: %s\n",
|
|
RSS_NAME(pVme->JournalState));
|
|
//DPRINT1(5, "jb: tf %08x\n", Jbuff);
|
|
FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
|
|
continue;
|
|
}
|
|
|
|
|
|
//
|
|
// Check for lost journal data. This is unlikely to happen here since
|
|
// this error will surface when we submit the journal read request.
|
|
// There is other error recovery code that is invoked when we try to start
|
|
// a replica set and the journal restart point is not found.
|
|
//
|
|
if (WStatus == ERROR_NOT_FOUND) {
|
|
DPRINT1(4, ":U: Usn %08lx %08lx has been deleted. Data lost, resync required\n",
|
|
PRINTQUAD(Jbuff->JrnlReadPoint));
|
|
|
|
//DPRINT1(5, "jb: tf %08x\n", Jbuff);
|
|
FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
|
|
|
|
//
|
|
// Post an error log entry.
|
|
//
|
|
EPRINT1(EVENT_FRS_IN_ERROR_STATE, JetPath);
|
|
}
|
|
|
|
//
|
|
// Some other error.
|
|
//
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
DPRINT_WS(0, "ERROR - Read Usn Journal failed", WStatus);
|
|
//
|
|
// Put the VME on the stop queue and mark all Replica Sets
|
|
// using this VME as stopped.
|
|
//
|
|
// Add code to walk the replica list to stop replication on a journal error.
|
|
// Is closing the journal the right way to fail?
|
|
//
|
|
JrnlClose(Jbuff->FileHandle);
|
|
|
|
CAPTURE_JOURNAL_PROGRESS(pVme, Jbuff->JrnlReadPoint);
|
|
|
|
//DPRINT1(5, "jb: tf %08x\n", Jbuff);
|
|
FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
|
|
continue;
|
|
}
|
|
|
|
|
|
//
|
|
// Check for data left after USN.
|
|
//
|
|
if (DataLength > 0) {
|
|
//
|
|
// Check version number for mismatch.
|
|
//
|
|
if (UsnRecord->MajorVersion != ConfigUsnMajorVersion) {
|
|
DPRINT2(0, ":U: ERROR - Major version mismatch for USN Journal. Found: %d, Expected: %d\n",
|
|
UsnRecord->MajorVersion, ConfigUsnMajorVersion);
|
|
WStatus = ERROR_REVISION_MISMATCH;
|
|
//
|
|
// Put the VME on the stop queue and mark all Replica Sets
|
|
// using this VME as stopped.
|
|
//
|
|
// Note: Add code to walk the replica list & stop VME on config mismatch.
|
|
// is closing the journal the right way to fail?
|
|
//
|
|
JrnlClose(Jbuff->FileHandle);
|
|
CAPTURE_JOURNAL_PROGRESS(pVme, Jbuff->JrnlReadPoint);
|
|
|
|
//DPRINT1(5, "jb: tf %08x\n", Jbuff);
|
|
FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
//
|
|
// The USN save point for each replica can also depend on the amount of
|
|
// journal data consumed. If there is lots of activity on the journal
|
|
// but little or no activity on a given replica set hosted by the volume
|
|
// then we must keep advancing the USN save point for the replica.
|
|
// Otherwise, if we were to crash we could find ourselves with a USN
|
|
// save point at recovery for data no longer in the journal that we
|
|
// don't want anyway. In addition, if it was still in the journal we
|
|
// would have to plow through it a second time just to find nothing of
|
|
// interest. Once JRNL_USN_SAVE_POINT_INTERVAL bytes of journal data
|
|
// are consumed then trigger a USN save on all active replica sets on
|
|
// this volume. A journal replay could make this go negative so
|
|
// minimize with 0.
|
|
//
|
|
SaveFlag = FALSE;
|
|
LOCK_VME(pVme); // Get the lock to avoid QW Tearing with
|
|
// LastUsnSavePoint update in NEW_VSN() code.
|
|
JournalConsumed = NextUsn - pVme->LastUsnSavePoint;
|
|
if (JournalConsumed < 0) {JournalConsumed = (USN)0;}
|
|
if (JournalConsumed >= (USN) JRNL_USN_SAVE_POINT_INTERVAL) {
|
|
SaveFlag = TRUE;
|
|
DPRINT3(5, "++ USN Save Triggered: NextUsn: %08x %08x "
|
|
"LastSave: %08x %08x "
|
|
"Consumed: %08x %08x\n",
|
|
PRINTQUAD(NextUsn),
|
|
PRINTQUAD(pVme->LastUsnSavePoint),
|
|
PRINTQUAD(JournalConsumed));
|
|
pVme->LastUsnSavePoint = NextUsn;
|
|
}
|
|
UNLOCK_VME(pVme);
|
|
if (SaveFlag) {
|
|
DbsRequestSaveMark(pVme, FALSE);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////
|
|
// //
|
|
// P R O C E S S U S N R E C O R D S //
|
|
// //
|
|
///////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
// Walk through the buffer and process the results. Note that a single
|
|
// file can appear multiple times. E.G. a copy operation to a file may
|
|
// create the target update the create time and set the attributes.
|
|
// Each one of these is reported as a separate event.
|
|
//
|
|
|
|
RESET_JOURNAL_PROGRESS(pVme);
|
|
|
|
while (DataLength > 0) {
|
|
|
|
Replica = NULL;
|
|
|
|
if ((LONG)UsnRecord->RecordLength > DataLength) {
|
|
DPRINT2(0, ":U: ERROR: Bogus DataLength: %d, Record Length Was: %d\n",
|
|
DataLength, UsnRecord->RecordLength );
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Track USN of current record being processed and the maximum
|
|
// point of progress reached in the journal.
|
|
//
|
|
CurrentUsn = UsnRecord->Usn;
|
|
|
|
pVme->CurrentUsnRecord = CurrentUsn;
|
|
CAPTURE_MAX_JOURNAL_PROGRESS(pVme, CurrentUsn);
|
|
|
|
//
|
|
// Check if I/O is stopped on this journal and skip the rest of the
|
|
// buffer. Could be a pause request. Capture current journal
|
|
// progress for an unpause.
|
|
//
|
|
if (!pVme->IoActive) {
|
|
CAPTURE_JOURNAL_PROGRESS(pVme, CurrentUsn);
|
|
DPRINT1(4, ":U: I/O not active on this journal. Freeing buffer. State is: %s\n",
|
|
RSS_NAME(pVme->JournalState));
|
|
UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Increment the UsnRecordsExamined counter
|
|
//
|
|
PM_INC_CTR_SERVICE(PMTotalInst, UsnRecExamined, 1);
|
|
|
|
|
|
if (CurrentUsn == QUADZERO) {
|
|
DUMP_USN_RECORD(3, UsnRecord);
|
|
DPRINT(3, "++ Zero USN; skipping\n");
|
|
UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
|
|
PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
|
|
goto NEXT_USN_RECORD;
|
|
}
|
|
|
|
UsnReason = UsnRecord->Reason;
|
|
FileAttributes = UsnRecord->FileAttributes;
|
|
|
|
|
|
//
|
|
// If this is close record with a file name of the form
|
|
// "NTFRS_DELETED_FILE_xxxxxx" then delete the file. These are
|
|
// produced when an install override is performed by renaming an
|
|
// open target file to the above name in order to complete an install.
|
|
//
|
|
if ((UsnRecord->FileNameLength/sizeof(WCHAR) > wcslen(INSTALL_OVERRIDE_PREFIX)) &&
|
|
(wcsncmp(UsnRecord->FileName,
|
|
INSTALL_OVERRIDE_PREFIX,
|
|
wcslen(INSTALL_OVERRIDE_PREFIX)) == 0)) {
|
|
|
|
if (BooleanFlagOn(UsnReason, USN_REASON_CLOSE)) {
|
|
DUMP_USN_RECORD(3, UsnRecord);
|
|
|
|
if (!BooleanFlagOn(UsnReason, USN_REASON_FILE_DELETE) &&
|
|
((UsnReason & ~USN_REASON_CLOSE) != 0)) {
|
|
//
|
|
// Delete the file.
|
|
//
|
|
RtlMoveMemory (FileName, UsnRecord->FileName, UsnRecord->FileNameLength);
|
|
FileName[UsnRecord->FileNameLength/sizeof(WCHAR)] = UNICODE_NULL;
|
|
|
|
WStatus = FrsDeleteById(pVme->DriveLetter,
|
|
FileName,
|
|
pVme,
|
|
&UsnRecord->FileReferenceNumber,
|
|
FILE_ID_LENGTH);
|
|
DPRINT1_WS(2, "++ WARN - cannot delete %ws;", FileName, WStatus);
|
|
}
|
|
|
|
DPRINT(3, "++ INSTALL OVERRIDE CLEANUP; skipping\n");
|
|
}
|
|
|
|
UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
|
|
PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
|
|
goto NEXT_USN_RECORD;
|
|
}
|
|
|
|
|
|
//
|
|
// Ignore temporary, encrypted files. We do replicate offline
|
|
// files (FILE_ATTRIBUTE_OFFLINE set) because some members
|
|
// may be running HSM and some may not. All members have to
|
|
// have the same data.
|
|
//
|
|
if (FileAttributes & (FILE_ATTRIBUTE_ENCRYPTED)) {
|
|
DUMP_USN_RECORD(3, UsnRecord);
|
|
DPRINT(3, "++ Encrypted; skipping\n");
|
|
UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
|
|
PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
|
|
goto NEXT_USN_RECORD;
|
|
}
|
|
|
|
//
|
|
// Skip USN records with the SOURCE_DATA_MANAGEMENT flag set.
|
|
// E.G. HSM and SIS would set this flag to prevent triggering
|
|
// replication when the data has not changed.
|
|
//
|
|
if (UsnRecord->SourceInfo & USN_SOURCE_DATA_MANAGEMENT) {
|
|
DUMP_USN_RECORD(3, UsnRecord);
|
|
DPRINT(3, "++ DATA_MANAGEMENT source; skipping\n");
|
|
UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
|
|
PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
|
|
goto NEXT_USN_RECORD;
|
|
}
|
|
|
|
//
|
|
// If this is an USN_REASON_RENAME_OLD_NAME record that does not have
|
|
// USN_REASON_RENAME_NEW_NAME set then capture the old name so
|
|
// we have it when processing the new name.
|
|
//
|
|
if ((UsnReason & USN_REASON_RENAME_OLD_NAME) &&
|
|
((UsnReason & USN_REASON_RENAME_NEW_NAME) == 0) ) {
|
|
|
|
//
|
|
// Always pick up the old name when we see one. There are times
|
|
// when we will pick up an old name but then filter out the USN
|
|
// record. e.g. not in replica set, a staging file, etc.
|
|
// If we always load the old name then the next Close record
|
|
// with Rename New set will have the correct old name to insert into
|
|
// the name space table. Since multiple rename records can occur
|
|
// in sequence before we see the first close we need to track
|
|
// multiple RENAME_OLD_NAME records.
|
|
//
|
|
GStatus = QHashLookup(pVme->RenOldNameTable,
|
|
&UsnRecord->FileReferenceNumber,
|
|
NULL,
|
|
(PULONG_PTR) &OldRenUsnRec);
|
|
|
|
if (GStatus == GHT_STATUS_SUCCESS ) {
|
|
//
|
|
// Existing entry found for this file. Update it.
|
|
//
|
|
if (OldRenUsnRec->RecordLength < UsnRecord->RecordLength) {
|
|
OldRenUsnRec = FrsFree(OldRenUsnRec);
|
|
OldRenUsnRec = FrsAlloc(UsnRecord->RecordLength);
|
|
}
|
|
|
|
if (OldRenUsnRec != NULL) {
|
|
RtlMoveMemory (OldRenUsnRec, UsnRecord, UsnRecord->RecordLength);
|
|
DPRINT(3, "++ Rename old. Save name\n");
|
|
|
|
GStatus = QHashUpdate(pVme->RenOldNameTable,
|
|
&UsnRecord->FileReferenceNumber,
|
|
NULL,
|
|
(ULONG_PTR) OldRenUsnRec);
|
|
if (GStatus != GHT_STATUS_SUCCESS ) {
|
|
DPRINT1(0, "++ QHashUpdate error: %d\n", GStatus);
|
|
}
|
|
|
|
} else {
|
|
DPRINT(0, "++ Rename old. Save name failed -- no memory\n");
|
|
}
|
|
|
|
} else {
|
|
//
|
|
// No entry for this file. Create a new one and save USN record.
|
|
//
|
|
OldRenUsnRec = FrsAlloc(UsnRecord->RecordLength);
|
|
|
|
if (OldRenUsnRec != NULL) {
|
|
RtlMoveMemory (OldRenUsnRec, UsnRecord, UsnRecord->RecordLength);
|
|
DPRINT(3, "++ Rename old. Save name\n");
|
|
|
|
GStatus = QHashInsert(pVme->RenOldNameTable,
|
|
&UsnRecord->FileReferenceNumber,
|
|
NULL,
|
|
(ULONG_PTR) OldRenUsnRec,
|
|
FALSE);
|
|
if (GStatus != GHT_STATUS_SUCCESS ) {
|
|
OldRenUsnRec = FrsFree(OldRenUsnRec);
|
|
DPRINT1(0, "++ QHashInsert error: %d\n", GStatus);
|
|
}
|
|
} else {
|
|
DPRINT(0, "++ Rename old. Save name failed -- no memory\n");
|
|
}
|
|
}
|
|
|
|
DUMP_USN_RECORD(3, UsnRecord);
|
|
|
|
|
|
UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
|
|
PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
|
|
goto NEXT_USN_RECORD;
|
|
}
|
|
|
|
//
|
|
// FRS uses the NTFS journal filtering feature in which an app can
|
|
// tell NTFS what kinds of journal records it does not want to see.
|
|
// In particular FRS asks NTFS to filter out all journal records
|
|
// except for journal "Close" and "Create" records. NTFS
|
|
// writes a close record to the journal after the last handle to
|
|
// the file is closed. In addition, if the system crashes, at
|
|
// startup NTFS recovery-processing inserts close records for all
|
|
// open and modified files.
|
|
// The Create records need to be examined for directory creates
|
|
// because the close record may not appear for a while. Meanwhile
|
|
// multiple children close records can be processed which would
|
|
// be skipped unless the parent dir create was added to the Filter
|
|
// table. Bug 432549 was a case of this.
|
|
//
|
|
if (!BooleanFlagOn(UsnReason, USN_REASON_CLOSE)) {
|
|
|
|
if (BooleanFlagOn(UsnReason, USN_REASON_FILE_CREATE) &&
|
|
BooleanFlagOn(FileAttributes, FILE_ATTRIBUTE_DIRECTORY)) {
|
|
DUMP_USN_RECORD(3, UsnRecord);
|
|
DPRINT(3, "++ Dir Create; Cannot skip\n");
|
|
} else {
|
|
DUMP_USN_RECORD(3, UsnRecord);
|
|
DPRINT(3, "++ Not a close and not dir create; skipping\n");
|
|
UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
|
|
PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
|
|
goto NEXT_USN_RECORD;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Skip files that have USN_REASON_REPARSE_POINT_CHANGE set.
|
|
// Since symbolic links are unsupported we do not replicate them.
|
|
// HSM and SIS also use reparse points but we only replicate changes
|
|
// to the file and these services change the NTFS File Record to set
|
|
// the reparse point attribute only when they migrate the file data
|
|
// somewhere else. By that time the file had already been created
|
|
// and was replicated when it was created. See NTIOAPI.H for more
|
|
// info about the REPARSE_DATA_BUFFER and the IO_REPARSE_TAG field.
|
|
//
|
|
#if 0
|
|
// This below is faulty because the SIS COPY FILE utility will both set and create
|
|
// files with a reparse point. We will have to rely on the data management test
|
|
// above to filter out the conversion of a file to and from a SIS link.
|
|
if (UsnReason & USN_REASON_REPARSE_POINT_CHANGE) {
|
|
DUMP_USN_RECORD(3, UsnRecord);
|
|
DPRINT(3, "++ Reparse point change; skipping\n");
|
|
UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
|
|
PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
|
|
goto NEXT_USN_RECORD;
|
|
}
|
|
#endif
|
|
|
|
//
|
|
// If this file record has the reparse attribute set then read
|
|
// the Reparse Tag from the file to see if this is either SIS or HSM.
|
|
//
|
|
if (FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) {
|
|
//
|
|
// Can't filter out Deletes though
|
|
//
|
|
if (!BooleanFlagOn(UsnReason, USN_REASON_FILE_DELETE)) {
|
|
WStatus = FrsCheckReparse(L"--",
|
|
(PULONG)&UsnRecord->FileReferenceNumber,
|
|
FILE_ID_LENGTH,
|
|
pVme->VolumeHandle);
|
|
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
DUMP_USN_RECORD(3, UsnRecord);
|
|
DPRINT_WS(3, "++ FrsGetReparseTag failed, skipping,", WStatus);
|
|
UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
|
|
PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
|
|
goto NEXT_USN_RECORD;
|
|
}
|
|
}
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////
|
|
// //
|
|
// F I L T E R P R O C E S S I N G //
|
|
// //
|
|
///////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
// Note: If replication is paused for the replica tree we still
|
|
// process the journal entries so we don't lose data.
|
|
// When replication is later unpaused the update process picks
|
|
// up the change orders from the Replica Set Change order table.
|
|
//
|
|
// If replication was not started for a given replica tree then
|
|
// the directory fids won't be in the table. When replication
|
|
// is stopped for a replica tree its directory fids are purged
|
|
// from the table
|
|
//
|
|
// In the case of file or Dir renames the parent FID in the
|
|
// USN record is the FID of the destination of the rename.
|
|
// If the file/dir was in a replica set prior to the rename its
|
|
// parent file ID will be in the Parent File ID table for the
|
|
// volume.
|
|
//
|
|
// Determine if the file is in a replica set and if a location
|
|
// change is involved. Lookup the previous and current parent FID
|
|
// in the Journal Filter table and return references to their
|
|
// respective filter entries. From this point forward the flow
|
|
// must go thru SKIP_USN_RECORD so the ref counts on PrevParentFilterEntry
|
|
// and CurrParentFilterEntry are decremented appropriately.
|
|
//
|
|
LocationCmd = JrnlGetFileCoLocationCmd(pVme,
|
|
UsnRecord,
|
|
&PrevParentFilterEntry,
|
|
&CurrParentFilterEntry);
|
|
|
|
if (LocationCmd == FILE_NOT_IN_REPLICA_SET) {
|
|
goto SKIP_USN_RECORD;
|
|
}
|
|
|
|
//
|
|
// Nothing to do; skip the usn record
|
|
//
|
|
if (LocationCmd == CO_LOCATION_NO_CMD &&
|
|
((UsnRecord->Reason & CO_CONTENT_MASK) == 0)) {
|
|
DUMP_USN_RECORD(5, UsnRecord);
|
|
DPRINT(5, "++ CO_LOCATION_NO_CMD and no content; skipping\n");
|
|
goto SKIP_USN_RECORD;
|
|
}
|
|
|
|
//
|
|
// Filter out creates of files with FILE_ATTRIBUTE_TEMPORARY set.
|
|
//
|
|
if (!(FileAttributes & FILE_ATTRIBUTE_DIRECTORY) &&
|
|
(FileAttributes & FILE_ATTRIBUTE_TEMPORARY) &&
|
|
CO_NEW_FILE(LocationCmd)) {
|
|
DUMP_USN_RECORD(5, UsnRecord);
|
|
DPRINT(5, "++ Temporary attribute set on file; skipping\n");
|
|
goto SKIP_USN_RECORD;
|
|
}
|
|
//
|
|
// Determine the Replica and get the Parent File ID.
|
|
//
|
|
if (CurrParentFilterEntry != NULL) {
|
|
CaptureParentFileID = CurrParentFilterEntry->DFileID;
|
|
Replica = CurrParentFilterEntry->Replica;
|
|
} else {
|
|
CaptureParentFileID = PrevParentFilterEntry->DFileID;
|
|
Replica = PrevParentFilterEntry->Replica;
|
|
}
|
|
|
|
FRS_ASSERT(Replica != NULL);
|
|
|
|
//
|
|
// Under certain conditions a USN record could refer to a file
|
|
// in the FRS PreInstall directory. In particular this can happen
|
|
// during restart when we have lost our journal write filter.
|
|
// No operation on a pre-install file should cause replication.
|
|
// Make special check here for parent FID match.
|
|
//
|
|
if (UsnRecord->ParentFileReferenceNumber == Replica->PreInstallFid) {
|
|
DUMP_USN_RECORD(5, UsnRecord);
|
|
DPRINT(5, "++ USN Record on PreInstall file; skipping\n");
|
|
goto SKIP_USN_RECORD;
|
|
}
|
|
|
|
|
|
DUMP_USN_RECORD2(3, UsnRecord, Replica->ReplicaNumber, LocationCmd);
|
|
DPRINT2(4, "++ IN REPLICA %d, %ws \n",
|
|
Replica->ReplicaNumber, Replica->ReplicaName->Name);
|
|
|
|
//
|
|
// Check for stale USN record. This occurs when a replica tree
|
|
// is reloaded from disk. In this case you can have stale USN records
|
|
// in the journal that predate the current state of the file when it
|
|
// was loaded. To handle this we capture the current USN when the
|
|
// replica tree load starts (Ub), and again when the load finishes
|
|
// (Ue). We save Ub and Ue with the replica config info. The USN
|
|
// of a record (Ur) affecting this replica tree is then compared
|
|
// with these bounds as follows: (Uf is current USN on the file).
|
|
// if Ur < Ub then skip record since the load has the current state.
|
|
// if Ur > Ue then process record since load has old state.
|
|
// if Ur > Uf then process record since load has old state.
|
|
// otherwise skip the record.
|
|
// Only in the last case is it necessary to open the file and read
|
|
// the USN (when Ub <= Ur <= Ue).
|
|
//
|
|
// Note: add code to filter stale USN records after a replica tree load.
|
|
// This is not a problem if the replica tree starts out empty.
|
|
|
|
|
|
//
|
|
// If the record USN is less than or equal to LastUsnRecordProcessed for
|
|
// this Replica then we must be doing a replay so ignore it.
|
|
// This works because a given file can only be in one Replica
|
|
// set at a time.
|
|
|
|
// NOTE: what about MOVERS?
|
|
//
|
|
// NOTE: Hardlinks across replica sets would violate this.
|
|
//
|
|
if (CurrentUsn <= Replica->LastUsnRecordProcessed) {
|
|
DPRINT(5, "++ USN <= LastUsnRecordProcessed. Record skipped.\n");
|
|
goto SKIP_USN_RECORD;
|
|
}
|
|
//
|
|
// If this replica set is paused or has encountered an error
|
|
// then skip the record. When it is restarted we will replay
|
|
// the journal for it.
|
|
//
|
|
if (Replica->ServiceState != REPLICA_STATE_ACTIVE) {
|
|
DPRINT1(5, "++ Replica->ServiceState not active (%s). Record skipped.\n",
|
|
RSS_NAME(Replica->ServiceState));
|
|
goto SKIP_USN_RECORD;
|
|
}
|
|
|
|
//
|
|
// Get the ptr to the config record for this replica.
|
|
//
|
|
ConfigRecord = Replica->ConfigTable.pDataRecord;
|
|
|
|
|
|
//
|
|
// The following call builds the path of the file as we currently
|
|
// know it. If the operation is a MOVEOUT this is the previous path.
|
|
// Since the USN data is historical the file/dir may not be at this
|
|
// location any longer.
|
|
//
|
|
FStatus = JrnlGetPathAndLevel(pVme->FilterTable,
|
|
&CaptureParentFileID,
|
|
&Level);
|
|
if (!FRS_SUCCESS(FStatus)) {
|
|
goto SKIP_USN_RECORD;
|
|
}
|
|
|
|
//
|
|
// Consistency checking.
|
|
//
|
|
if (UsnRecord->FileNameLength > (sizeof(FileName) - sizeof(WCHAR))) {
|
|
DPRINT1(0, ":U: ERROR - USN Record Inconsistency - File path length too long (%d bytes)\n",
|
|
UsnRecord->FileNameLength);
|
|
DPRINT3(0, ":U: ERROR - Start of data buf %08x, current ptr %08x, diff %d\n",
|
|
Jbuff->DataBuffer, UsnRecord,
|
|
(PCHAR) UsnRecord - (PCHAR) Jbuff->DataBuffer);
|
|
DPRINT1(0, ":U: ERROR - DataLength: %d\n", Jbuff->DataLength);
|
|
DPRINT(0, ":U: ERROR - Aborting rest of buffer.\n");
|
|
|
|
//
|
|
// Drop Refs and force buffer loop to exit.
|
|
//
|
|
FRS_ASSERT(!"Jrnl monitor: USN Record Inconsistency");
|
|
UsnRecord->RecordLength = (ULONG) DataLength;
|
|
goto SKIP_USN_RECORD;
|
|
}
|
|
|
|
RtlMoveMemory (FileName, UsnRecord->FileName, UsnRecord->FileNameLength);
|
|
FileName[UsnRecord->FileNameLength/sizeof(WCHAR)] = UNICODE_NULL;
|
|
DPRINT4(4, "++ NameLen %d Relative Level %d Name: %ws\\...\\%ws\n",
|
|
UsnRecord->FileNameLength, Level, Replica->Root, FileName);
|
|
|
|
|
|
//
|
|
// Determine if this USN entry is a directory or a file.
|
|
//
|
|
IsDirectory = (FileAttributes & FILE_ATTRIBUTE_DIRECTORY);
|
|
|
|
|
|
//
|
|
// First handle the case for directories.
|
|
//
|
|
if (IsDirectory) {
|
|
DPRINT(4, "++ FILE IS DIRECTORY -------\n");
|
|
|
|
//
|
|
// Level is the relative nesting level of the file in the
|
|
// replica tree. The immediate children of the root are Level 0.
|
|
// Ignore files at a depth greater than this.
|
|
// A value of one for ReplDirLevelLimit means allow files in
|
|
// the replica root dir only.
|
|
//
|
|
// Note: Add code to handle rename of a dir from excluded to included.
|
|
// This results in a MOVEDIR Change Order. Not for V1.
|
|
// Ditto for the following - Could be a movedir or movers.
|
|
//
|
|
// Note that a rename of a dir
|
|
// to the bottom level means we delete the subtree because there
|
|
// will be no dirs at the bottom level in the filter table.
|
|
//
|
|
Excluded = (Level >= (ConfigRecord->ReplDirLevelLimit-1));
|
|
|
|
if (Excluded && CO_NEW_FILE(LocationCmd)) {
|
|
DPRINT(4,"++ directory exceeds depth limit. Excluded\n");
|
|
goto SKIP_USN_RECORD;
|
|
}
|
|
|
|
//
|
|
// See if the name is on the exclusion filter list.
|
|
//
|
|
if (!IsListEmpty(&Replica->DirNameFilterHead)) {
|
|
|
|
FrsSetUnicodeStringFromRawString(&TempUStr,
|
|
UsnRecord->FileNameLength,
|
|
UsnRecord->FileName,
|
|
UsnRecord->FileNameLength);
|
|
|
|
LOCK_REPLICA(Replica);
|
|
Excluded = FrsCheckNameFilter(&TempUStr, &Replica->DirNameFilterHead);
|
|
//
|
|
// Not excluded if it's on the included list.
|
|
//
|
|
if (Excluded &&
|
|
FrsCheckNameFilter(&TempUStr, &Replica->DirNameInclFilterHead)) {
|
|
Excluded = FALSE;
|
|
}
|
|
UNLOCK_REPLICA(Replica);
|
|
|
|
if (Excluded && CO_NEW_FILE(LocationCmd)) {
|
|
DPRINT(4,"++ directory name filter hit. Excluded\n");
|
|
goto SKIP_USN_RECORD;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Generate the change orders as we update the filter table.
|
|
//
|
|
DPRINT2(4,"++ DIR location cmd on: %ws\\...\\%ws\n",
|
|
Replica->Root, FileName);
|
|
|
|
JrnlFilterUpdate(Replica,
|
|
UsnRecord,
|
|
LocationCmd,
|
|
PrevParentFilterEntry,
|
|
CurrParentFilterEntry);
|
|
|
|
} else {
|
|
|
|
|
|
//
|
|
// Handle the files here.
|
|
//
|
|
// Evaluate the excluded state if this is a file.
|
|
// Files are allowed at the bottom level.
|
|
//
|
|
Excluded = (Level >= ConfigRecord->ReplDirLevelLimit);
|
|
|
|
//
|
|
// NOTE: Treat Movedir or movers that is > depth limit as moveout.
|
|
//
|
|
if (Excluded && CO_NEW_FILE(LocationCmd)) {
|
|
DPRINT(4,"++ Filter depth exceeded. File excluded\n");
|
|
goto SKIP_USN_RECORD;
|
|
}
|
|
|
|
|
|
|
|
// Note: Add code to handle rename of file from excluded to included.
|
|
//
|
|
// Excluded file check:
|
|
//
|
|
// 1. If this is a create or MOVEIN of a file with an
|
|
// excluded name then just ignore the USN record.
|
|
//
|
|
// 2. If this is a rename of an excluded file to a visible
|
|
// file then generate a MOVEIN change order for the file.
|
|
//
|
|
// 3. If the file is not in our tables then it must not
|
|
// be visible so ignore it. Note that changing the
|
|
// exclusion list by removing an element will not by itself
|
|
// make those files visible. A rename operation is still
|
|
// needed to get the file into our tables.
|
|
//
|
|
// 4. A rename of a visible file to an excluded file does
|
|
// not make the file excluded since it is still in our tables
|
|
// and present in all replicas. Only a delete or a rename
|
|
// of the file to a point outside the replica set will remove
|
|
// the file from our tables and all other replicas.
|
|
//
|
|
// 5. The addition of an element to the exclusion list only
|
|
// affects future creates. It has no affect on previous
|
|
// file creates that generated an entry in our tables.
|
|
//
|
|
|
|
//
|
|
// See if the name is on the exclusion filter list.
|
|
//
|
|
if (!IsListEmpty(&Replica->FileNameFilterHead)) {
|
|
|
|
FrsSetUnicodeStringFromRawString(&TempUStr,
|
|
UsnRecord->FileNameLength,
|
|
UsnRecord->FileName,
|
|
UsnRecord->FileNameLength);
|
|
|
|
LOCK_REPLICA(Replica);
|
|
Excluded = FrsCheckNameFilter(&TempUStr, &Replica->FileNameFilterHead);
|
|
//
|
|
// Not excluded if it's on the included list.
|
|
//
|
|
if (Excluded &&
|
|
FrsCheckNameFilter(&TempUStr, &Replica->FileNameInclFilterHead)) {
|
|
Excluded = FALSE;
|
|
}
|
|
UNLOCK_REPLICA(Replica);
|
|
|
|
if (Excluded && CO_NEW_FILE(LocationCmd)) {
|
|
DPRINT(4,"++ File name filter hit. Excluded\n");
|
|
goto SKIP_USN_RECORD;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Looks like this file is real. See if we have a change order
|
|
// pending for it. If so update it, if not, alloc a new one.
|
|
//
|
|
WStatus = JrnlEnterFileChangeOrder(UsnRecord,
|
|
LocationCmd,
|
|
PrevParentFilterEntry,
|
|
CurrParentFilterEntry);
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
DPRINT(0, "++ ERROR - Change order create or update failed\n");
|
|
}
|
|
}
|
|
|
|
//
|
|
// Increment the UsnRecords Accepted counter
|
|
//
|
|
PM_INC_CTR_REPSET(Replica, UsnRecAccepted, 1);
|
|
goto ACCEPT_USN_RECORD;
|
|
|
|
SKIP_USN_RECORD:
|
|
//
|
|
// Increment the UsnRecordsRejected counter
|
|
//
|
|
PM_INC_CTR_SERVICE(PMTotalInst, UsnRecRejected, 1);
|
|
|
|
ACCEPT_USN_RECORD:
|
|
//
|
|
// Release the references on the prev and current parent filter
|
|
// entries that were acquired by JrnlGetFileCoLocationCmd().
|
|
//
|
|
if (PrevParentFilterEntry != NULL) {
|
|
GhtDereferenceEntryByAddress(pVme->FilterTable,
|
|
PrevParentFilterEntry,
|
|
TRUE);
|
|
PrevParentFilterEntry = NULL;
|
|
}
|
|
|
|
if (CurrParentFilterEntry != NULL) {
|
|
GhtDereferenceEntryByAddress(pVme->FilterTable,
|
|
CurrParentFilterEntry,
|
|
TRUE);
|
|
CurrParentFilterEntry = NULL;
|
|
}
|
|
|
|
|
|
|
|
//
|
|
// This has to be done after processing the record so if a
|
|
// save mark were to happen at the same time we wouldn't
|
|
// erroneously filter out the record above when the CurrentUsn
|
|
// is compared with Replica->LastUsnProcessed.
|
|
//
|
|
UpdateCurrentUsnRecordDone(pVme, CurrentUsn);
|
|
|
|
//
|
|
// If we are out of Replay mode for this replica and the
|
|
// replica is active then advance our Journal progress
|
|
// point, Replica->LastUsnRecordProcessed.
|
|
//
|
|
if ((Replica != NULL) &&
|
|
(Replica->ServiceState == REPLICA_STATE_ACTIVE) &&
|
|
!REPLICA_REPLAY_MODE(Replica, pVme)) {
|
|
|
|
AcquireQuadLock(&pVme->QuadWriteLock);
|
|
Replica->LastUsnRecordProcessed = CurrentUsn;
|
|
ReleaseQuadLock(&pVme->QuadWriteLock);
|
|
}
|
|
|
|
NEXT_USN_RECORD:
|
|
|
|
//
|
|
// Advance to next USN Record.
|
|
//
|
|
DataLength -= UsnRecord->RecordLength;
|
|
UsnRecord = (PUSN_RECORD)((PCHAR)UsnRecord + UsnRecord->RecordLength);
|
|
|
|
} // end while(DataLength > 0)
|
|
//DPRINT1(5, "jb: tf %08x\n", Jbuff);
|
|
FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
|
|
|
|
} // end while(TRUE)
|
|
|
|
|
|
//
|
|
// Get exception status.
|
|
//
|
|
} except (EXCEPTION_EXECUTE_HANDLER) {
|
|
GET_EXCEPTION_CODE(WStatus);
|
|
}
|
|
|
|
|
|
} finally {
|
|
|
|
if (WIN_SUCCESS(WStatus)) {
|
|
if (AbnormalTermination()) {
|
|
WStatus = ERROR_OPERATION_ABORTED;
|
|
}
|
|
}
|
|
|
|
DPRINT_WS(0, "Journal Monitor thread finally.", WStatus);
|
|
|
|
//
|
|
// Trigger FRS shutdown if we terminated abnormally.
|
|
//
|
|
if (!WIN_SUCCESS(WStatus) && (WStatus != ERROR_PROCESS_ABORTED)) {
|
|
DPRINT(0, "Journal Monitor thread terminated abnormally, forcing service shutdown.\n");
|
|
FrsIsShuttingDown = TRUE;
|
|
SetEvent(ShutDownEvent);
|
|
} else {
|
|
WStatus = ERROR_SUCCESS;
|
|
}
|
|
|
|
//
|
|
// Cleanup all the storage.
|
|
//
|
|
DPRINT1(3, ":S: T E R M I N A T I N G -- %s\n", DEBSUB);
|
|
|
|
JournalMonitorShutdown();
|
|
|
|
if (HANDLE_IS_VALID(JournalReadThreadHandle)) {
|
|
WStatus = WaitForSingleObject(JournalReadThreadHandle, 10000);
|
|
CHECK_WAIT_ERRORS2(3, WStatus, 1);
|
|
|
|
if (WIN_SUCCESS(WStatus)) {
|
|
DPRINT(4, ":S: Journal Read thread terminated.\n");
|
|
}
|
|
|
|
} else {
|
|
DPRINT(4, ":S: Journal Read thread terminate - NULL Handle\n");
|
|
}
|
|
|
|
DPRINT(0, ":S: Journal is exiting.\n");
|
|
DPRINT1(4, ":S: ThSupSubmitThreadExitCleanup(ThisFrsThreadCtx) - %08x\n", ThisFrsThreadCtx);
|
|
ThSupSubmitThreadExitCleanup(ThisFrsThreadCtx);
|
|
}
|
|
|
|
return WStatus;
|
|
}
|
|
|
|
|
|
LONG
|
|
JrnlGetFileCoLocationCmd(
|
|
PVOLUME_MONITOR_ENTRY pVme,
|
|
IN PUSN_RECORD UsnRecord,
|
|
OUT PFILTER_TABLE_ENTRY *PrevParentFilterEntry,
|
|
OUT PFILTER_TABLE_ENTRY *CurrParentFilterEntry
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Given the Reason mask and the current parent file ID in the USN record
|
|
and the previous parent File ID determine the location command for the
|
|
change order. The volume filter table is used to check the presence of
|
|
the parent directories in a replica set and to check if the file has
|
|
moved between two replica sets.
|
|
|
|
There are 5 cases shown in the table below. A lookup is done for each File
|
|
ID in the Filter table and these results are tested to generate the change
|
|
order location command value. (M: lookup miss, H: lookup hit). See
|
|
comments elsewhere for outcome defs.
|
|
|
|
|
|
Prev Curr Prev &
|
|
Parent Parent New
|
|
FID FID Parent R.S.
|
|
Case Lookup Lookup Match Outcome
|
|
|
|
0 M M - FILE_NOT_IN_REPLICA_SET
|
|
|
|
1 M H - MOVEIN
|
|
|
|
2 H M - MOVEOUT (a)
|
|
|
|
3 H H No (a), MOVERS, NAMECHANGE
|
|
|
|
4 H H Yes MOVEDIR, NAMECHANGE
|
|
|
|
|
|
(a) The parent FID could be in the replica set while the File/Dir FID isn't
|
|
if a subtree enum by the update process hasn't reached the File/Dir FID yet
|
|
(MOVEIN on parent followed by MOVOUT on child) or,
|
|
|
|
The child was excluded and now its name is changing to allow inclusion.
|
|
In this case the rename includes a name change so the file is no
|
|
longer excluded.
|
|
|
|
During subtree operations filter table lookups must be blocked or races
|
|
causing invalid states will occur.
|
|
|
|
|
|
1. MOVEIN - Rename of a directory into a replica set. The lookup failed on
|
|
the previous parent FID but the current parent FID is in the table. We
|
|
add an entry for this DIR to the filter table. The update process must
|
|
enumerate the subtree on disk and evaluate each file for inclusion into
|
|
the tree, updating the Filter table as it goes. We may see file
|
|
operations several levels down from the rename point and have no entry in
|
|
the Filter Table so we pitch those records. The sub-tree enumeration
|
|
process must handle this as it incorporates each file into the IDTable.
|
|
|
|
2. MOVEOUT - Parent FID change to a dir OUTSIDE of any replica set on the
|
|
volume. This is a delete of an entire subtree in the Replica set. We
|
|
enumerate the subtree bottom-up, sending dir level change orders to the
|
|
update process as we delete the filter table entries.
|
|
|
|
3. Name change only. The current Parent FID in the USN record matches the
|
|
Parent FID in the Filter entry for the file or directory. Update the name
|
|
in the filter entry.
|
|
|
|
4. MOVEDIR - previous Parent FID is different from the current parent FID.
|
|
Both are in the Filter table with the same replica set. This is a rename
|
|
to a dir in the SAME replica set. Update the parent FID in the filter
|
|
enty and Filename too.
|
|
|
|
5. MOVERS - The previous Parent FID is different from the current parent File
|
|
ID. Both are in the Filter Table but they have DIFFERENT replica set IDs.
|
|
Update the parent FID, the replica ptr, and name in the filter entry. This
|
|
is a move of an entire subtree from one replica set to another. We
|
|
enumerate the subtree top-down, sending dir level change orders to the
|
|
update process as we update the replica set information in the filter table
|
|
entries.
|
|
|
|
|
|
Arguments:
|
|
|
|
pVme - ptr to the Volume monitor entry for the parent file ID and
|
|
Volume Filter tables.
|
|
|
|
UsnRecord - ptr to the UsnRecord.
|
|
|
|
PrevParentFilterEntry = return value for the previous parent filter entry
|
|
or null. This is the parent under which
|
|
the file or dir used to reside.
|
|
|
|
CurrParentFilterEntry = return value for the current parent filter entry
|
|
or null. This is the parent under which the file
|
|
or dir currently resides.
|
|
|
|
NOTE: The caller must decrement the ref counts on the previous and new parent
|
|
filter entries if either is returned non null.
|
|
|
|
The table below summarizes the filter entry return values for previous
|
|
and current filter entry. A NULL ptr is returned in the 'No' cases.
|
|
It is the callers job to decrement the reference count on the filter
|
|
entry when a non=null value is returned.
|
|
|
|
Result returned in
|
|
|
|
PrevParentFilterEntry CurrParentFilterEntry
|
|
File Not in Replica Set No No
|
|
File content Change No Yes
|
|
create No Yes
|
|
delete No Yes
|
|
Movein No Yes
|
|
MoveOut Yes No
|
|
MoveDir Yes Yes
|
|
MoveRS Yes Yes
|
|
|
|
|
|
Return Value:
|
|
|
|
The change order location comand or FILE_NOT_IN_REPLICA_SET.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlGetFileCoLocationCmd:"
|
|
|
|
ULONG Reason;
|
|
PGENERIC_HASH_TABLE FilterTable;
|
|
|
|
PULONGLONG CurrParentFileID;
|
|
ULONGLONG PrevParentFileID;
|
|
PULONGLONG FileID;
|
|
|
|
ULONG_PTR Flags;
|
|
ULONG GStatus;
|
|
BOOL PrevParentExists;
|
|
|
|
*PrevParentFilterEntry = NULL;
|
|
*CurrParentFilterEntry = NULL;
|
|
|
|
//
|
|
// The code below checks for USN records with USN_SOURCE_REPLICATION_MANAGEMENT
|
|
// SourceInfo flag set. Currently we check for this bit for consistency
|
|
// with the state in our write filter table. A warning is generated
|
|
// when we get a mismatch. Eventually we need to remove the write filter
|
|
// hash table and just rely just on the above flag.
|
|
// It also tells us to skip our own records during recovery.
|
|
//
|
|
// First check if it's in the USN filter hash table. If so this is one of
|
|
// our own install writes (FrsCloseWithUsnDampening did the close)
|
|
// so skip the journal record and delete the table entry.
|
|
//
|
|
GStatus = QHashLookup(pVme->FrsWriteFilter,
|
|
&UsnRecord->Usn,
|
|
&PrevParentFileID, // unused result
|
|
&Flags); // unused result
|
|
|
|
if (GStatus == GHT_STATUS_SUCCESS) {
|
|
DUMP_USN_RECORD(4, UsnRecord);
|
|
DPRINT1(4, "++ USN Write filter cache hit on usn %08x %08x -- skip record\n",
|
|
PRINTQUAD(UsnRecord->Usn));
|
|
|
|
//
|
|
// Some code is closing the handle with usn dampening but did
|
|
// not mark the handle as being managed by ntfrs.
|
|
//
|
|
if (!BooleanFlagOn(UsnRecord->SourceInfo, USN_SOURCE_REPLICATION_MANAGEMENT)) {
|
|
DPRINT2(4, "++ WARN Source not set; usn dampen: SourceInfo is %08x for %08x %08x\n",
|
|
UsnRecord->SourceInfo, PRINTQUAD(UsnRecord->FileReferenceNumber));
|
|
}
|
|
|
|
return FILE_NOT_IN_REPLICA_SET;
|
|
}
|
|
|
|
//
|
|
// Maybe recovery usn record but spit out a warning anyway. In
|
|
// general, usn records with USN_SOURCE_REPLICATION_MANAGEMENT set should have been
|
|
// closed with usn dampening and filtered out above.
|
|
//
|
|
if (BooleanFlagOn(UsnRecord->SourceInfo, USN_SOURCE_REPLICATION_MANAGEMENT)) {
|
|
DPRINT2(4, "++ WARN Source set; no usn dampen: SourceInfo is %08x for %08x %08x\n",
|
|
UsnRecord->SourceInfo, PRINTQUAD(UsnRecord->FileReferenceNumber));
|
|
}
|
|
|
|
//
|
|
// Ignore the usn records generated by the service
|
|
//
|
|
// Note: get rid of writefilter and use SourceInfo always!
|
|
//
|
|
Reason = UsnRecord->Reason;
|
|
if (BooleanFlagOn(UsnRecord->SourceInfo, USN_SOURCE_REPLICATION_MANAGEMENT)) {
|
|
if (Reason & USN_REASON_FILE_DELETE) {
|
|
DPRINT1(4, "++ Process service generated usn record for %08x %08x\n",
|
|
PRINTQUAD(UsnRecord->FileReferenceNumber));
|
|
} else {
|
|
DUMP_USN_RECORD(4, UsnRecord);
|
|
DPRINT1(4, "++ Ignore service generated usn record for %08x %08x\n",
|
|
PRINTQUAD(UsnRecord->FileReferenceNumber));
|
|
return FILE_NOT_IN_REPLICA_SET;
|
|
}
|
|
}
|
|
|
|
#ifdef RECOVERY_CONFLICT
|
|
//
|
|
// If a recovery conflict table exists check for a match and skip the USN
|
|
// record. This filters out any USN records caused by our own activities
|
|
// at the time of the crash.
|
|
//
|
|
if (pVme->RecoveryConflictTable != NULL) {
|
|
//
|
|
// Once we pass the journal recovery end point delete the table.
|
|
// It can not have any entries with a larger USN than the end point.
|
|
// ("how can we be sure that all replica sets on this volume have"
|
|
"actually started and so have actually finished using the"
|
|
"conflict table?")
|
|
//
|
|
if (UsnRecord->Usn > pVme->JrnlRecoveryEnd) {
|
|
pVme->RecoveryConflictTable = FrsFreeType(pVme->RecoveryConflictTable);
|
|
} else {
|
|
GStatus = QHashLookup(pVme->RecoveryConflictTable,
|
|
&UsnRecord->FileReferenceNumber,
|
|
&PrevParentFileID, // unused result
|
|
&Flags); // unused result
|
|
|
|
if (GStatus == GHT_STATUS_SUCCESS) {
|
|
DUMP_USN_RECORD(1, UsnRecord);
|
|
DPRINT1(1, "++ Recovery conflict table hit on FID %08x %08x -- skip record\n",
|
|
PRINTQUAD(UsnRecord->FileReferenceNumber));
|
|
return FILE_NOT_IN_REPLICA_SET;
|
|
}
|
|
}
|
|
}
|
|
#endif // RECOVERY_CONFLICT
|
|
|
|
FilterTable = pVme->FilterTable;
|
|
|
|
//
|
|
// Get the previous parent file ID for this file/Dir.
|
|
//
|
|
FileID = &UsnRecord->FileReferenceNumber;
|
|
CurrParentFileID = &UsnRecord->ParentFileReferenceNumber;
|
|
|
|
GStatus = QHashLookup(pVme->ParentFidTable, FileID, &PrevParentFileID, &Flags);
|
|
PrevParentExists = (GStatus == GHT_STATUS_SUCCESS);
|
|
|
|
//
|
|
// Check to see if we still need to special case any operations on the root
|
|
// dir of a replica set.
|
|
//
|
|
if (PrevParentExists) {
|
|
DPRINT2(5, "++ Fid: %08x %08x PrevParentFid: %08x %08x\n",
|
|
PRINTQUAD(UsnRecord->FileReferenceNumber),
|
|
PRINTQUAD(PrevParentFileID));
|
|
|
|
//
|
|
// IF the previous parent FID is not in the Filter table now and this
|
|
// is not a rename operation (which might result in a MOVEIN) then this
|
|
// file is not in a replica set. This case occurs after a MOVEOUT of a
|
|
// parent dir followed by some access to a child.
|
|
//
|
|
GStatus = GhtLookup(FilterTable, &PrevParentFileID, TRUE, PrevParentFilterEntry);
|
|
if ((GStatus != GHT_STATUS_SUCCESS) &&
|
|
((Reason & USN_REASON_RENAME_NEW_NAME) == 0)) {
|
|
DUMP_USN_RECORD(4, UsnRecord);
|
|
DPRINT(4, "++ NOT IN RS - Entry in Parent File ID table but not FilterTable & not rename.\n");
|
|
return FILE_NOT_IN_REPLICA_SET;
|
|
}
|
|
} else {
|
|
//
|
|
// There is no entry in the parent file ID table for this file or dir.
|
|
// If there is no entry in the filter table for the file's current
|
|
// parent then the file is not in any replica set.
|
|
//
|
|
GStatus = GhtLookup(FilterTable, CurrParentFileID, TRUE, CurrParentFilterEntry);
|
|
if (GStatus != GHT_STATUS_SUCCESS) {
|
|
DUMP_USN_RECORD(4, UsnRecord);
|
|
DPRINT(4, "++ NOT IN RS - Entry not in Parent File ID table or FilterTable.\n");
|
|
return FILE_NOT_IN_REPLICA_SET;
|
|
}
|
|
}
|
|
|
|
//
|
|
// A delete has to have an entry in the parent File ID table or it is not
|
|
// in a replica set.
|
|
//
|
|
if (Reason & USN_REASON_FILE_DELETE) {
|
|
//
|
|
// If the Previous parent filter entry is valid then the file/dir
|
|
// was in a replica set so treat it as a delete.
|
|
//
|
|
if (*PrevParentFilterEntry != NULL) {
|
|
*CurrParentFilterEntry = *PrevParentFilterEntry;
|
|
*PrevParentFilterEntry = NULL;
|
|
return CO_LOCATION_DELETE;
|
|
}
|
|
//
|
|
// It wasn't in the parent fid table so either the rename flag is also
|
|
// set or the current parent filter entry is non-null which would be
|
|
// the case for a delete on an excluded file. Either way skip it.
|
|
//
|
|
DUMP_USN_RECORD(4, UsnRecord);
|
|
DPRINT(4, "++ NOT IN RS - delete on excluded file?\n");
|
|
return FILE_NOT_IN_REPLICA_SET;
|
|
}
|
|
|
|
//
|
|
// A create has to have an entry for its parent in the Volume Filter Table
|
|
// or it is not in a replica set. It must have no prior entry in the Parent
|
|
// file ID table. (FILE IDs are unique).
|
|
//
|
|
if (Reason & USN_REASON_FILE_CREATE) {
|
|
//
|
|
// If the USN from the journal record is less than or equal to the USN
|
|
// from the file when the replica tree load was done then the created
|
|
// file was already picked up by the load. Otherwise it is an error
|
|
// because we should not have had an entry in the parent ID table yet.
|
|
// At this point we do not have the current USN on the file so we will
|
|
// assume that if a previous parent exists the load got there first and
|
|
// this journal record is stale (so skip the record).
|
|
//
|
|
// In the case where we have paused the journal to startup another
|
|
// replica set we may have to move the next USN to read from the journal
|
|
// back to let this new RS catch-up. In that case we will be seeing
|
|
// records for a second time. If we are in replay mode and the USN
|
|
// for this record is less than the LastUsnRecordProcessed for the target replica
|
|
// set then we ignore the record.
|
|
//
|
|
// Note: add above file usn check.
|
|
//
|
|
if (PrevParentExists) {
|
|
DUMP_USN_RECORD(4, UsnRecord);
|
|
DPRINT(4, "++ NOT IN RS \n");
|
|
return FILE_NOT_IN_REPLICA_SET;
|
|
}
|
|
return CO_LOCATION_CREATE;
|
|
}
|
|
//
|
|
// If not a rename then no location change, but this file is in a Replica Set.
|
|
//
|
|
if ((Reason & USN_REASON_RENAME_NEW_NAME) == 0) {
|
|
|
|
//
|
|
// Check for a content update to a file that is not in our tables.
|
|
// It could be an excluded file which gets filtered out later.
|
|
// Or an excluded file that is no longer excluded because the
|
|
// the exclusion list changed.
|
|
// Treat it as a create so we check the exclusion list again
|
|
// and set the USN record create flag for others that may look at it.
|
|
//
|
|
if (*CurrParentFilterEntry != NULL) {
|
|
//UsnRecord->Reason |= USN_REASON_FILE_CREATE;
|
|
//return CO_LOCATION_CREATE;
|
|
//
|
|
// Treat it as a MOVEIN since if it is a directory we need to
|
|
// enumerate the children.
|
|
//
|
|
return CO_LOCATION_MOVEIN;
|
|
}
|
|
|
|
//
|
|
// It's not a rename, CurrParentFilterEntry is NULL so to be here
|
|
// PrevParentFilterEntry must be non-null which means that this is
|
|
// a content update to a file we already know about.
|
|
//
|
|
FRS_ASSERT(*PrevParentFilterEntry != NULL);
|
|
*CurrParentFilterEntry = *PrevParentFilterEntry;
|
|
*PrevParentFilterEntry = NULL;
|
|
return CO_LOCATION_NO_CMD;
|
|
}
|
|
|
|
//
|
|
// Handle file rename cases. If parent FileIDs match then no location change.
|
|
//
|
|
if ((*PrevParentFilterEntry != NULL) &&
|
|
(PrevParentFileID == *CurrParentFileID)) {
|
|
*CurrParentFilterEntry = *PrevParentFilterEntry;
|
|
*PrevParentFilterEntry = NULL;
|
|
return CO_LOCATION_NO_CMD;
|
|
}
|
|
|
|
//
|
|
// Old and new parent file IDs are different. So the file/dir moved across
|
|
// directories. Could be MOVEIN, MOVEOUT, MOVEDIR, MOVERS.
|
|
//
|
|
if (*CurrParentFilterEntry == NULL) {
|
|
GhtLookup(FilterTable, CurrParentFileID, TRUE, CurrParentFilterEntry);
|
|
}
|
|
|
|
|
|
if (*PrevParentFilterEntry != NULL) {
|
|
if (*CurrParentFilterEntry != NULL) {
|
|
|
|
//
|
|
// Old and new parents in table.
|
|
//
|
|
if ((*PrevParentFilterEntry)->Replica ==
|
|
(*CurrParentFilterEntry)->Replica) {
|
|
//
|
|
// Old and New Replica Sets are the same ==> MOVEDIR
|
|
//
|
|
return CO_LOCATION_MOVEDIR;
|
|
} else {
|
|
//
|
|
// Old and New Replica Sets are different ==> MOVERS
|
|
//
|
|
return CO_LOCATION_MOVERS;
|
|
}
|
|
|
|
} else {
|
|
//
|
|
// Old parent in table, new parent not in table ==> MOVEOUT
|
|
//
|
|
return CO_LOCATION_MOVEOUT;
|
|
}
|
|
|
|
} else {
|
|
|
|
if (*CurrParentFilterEntry != NULL) {
|
|
//
|
|
// Old parent not in table, new parent is in table ==> MOVEIN
|
|
//
|
|
return CO_LOCATION_MOVEIN;
|
|
} else {
|
|
//
|
|
// To get here the operation must be a rename on a file/dir
|
|
// that was in the parent file ID table but the previous parent
|
|
// File ID is no longer in the Filter table (MOVEOUT). In addition
|
|
// the current parent File ID is not in the filter table. So this
|
|
// is a rename operation on a file that was in a replica set in the
|
|
// past but is not currently in any replica set. The update process
|
|
// will eventually clean out the stale entries in the parent file
|
|
// ID table.
|
|
//
|
|
DUMP_USN_RECORD(4, UsnRecord);
|
|
DPRINT(4, "++ NOT IN RS - Rename on a file with a MOVEOUT parent.\n");
|
|
return FILE_NOT_IN_REPLICA_SET;
|
|
}
|
|
}
|
|
|
|
DUMP_USN_RECORD(4, UsnRecord);
|
|
DPRINT(4, "++ NOT IN RS\n");
|
|
|
|
return FILE_NOT_IN_REPLICA_SET;
|
|
}
|
|
|
|
|
|
|
|
|
|
ULONG
|
|
JrnlEnterFileChangeOrder(
|
|
IN PUSN_RECORD UsnRecord,
|
|
IN ULONG LocationCmd,
|
|
IN PFILTER_TABLE_ENTRY OldParentFilterEntry,
|
|
IN PFILTER_TABLE_ENTRY NewParentFilterEntry
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Enter a new change order or update an exisitng change order.
|
|
This routine is for FILES ONLY. Directories are handled in
|
|
JrnlFilterUpdate().
|
|
|
|
This routine acquires and releases the locks on both the source and target
|
|
replica set change order lists (in the case of a MOVERS).
|
|
|
|
Assumes The caller has taken references on the old and new parent filter entry.
|
|
|
|
Arguments:
|
|
|
|
UsnRecord - ptr to the UsnRecord.
|
|
LocationCmd - The change order location command. (MOVEIN, MOVEOUT, ...)
|
|
OldParentFilterEntry - The filter entry for the file's previous parent.
|
|
NewParentFilterEntry - The filter entry for the file's current parent.
|
|
|
|
Return Value:
|
|
|
|
Win32 status.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlEnterFileChangeOrder:"
|
|
|
|
ULONG GStatus;
|
|
ULONG WStatus = ERROR_GEN_FAILURE;
|
|
PULONGLONG FileID;
|
|
ULONGLONG OriginalParentFileID;
|
|
PCHANGE_ORDER_ENTRY ChangeOrder;
|
|
PGENERIC_HASH_TABLE ChangeOrderTable;
|
|
PREPLICA CurrentReplica;
|
|
PREPLICA OriginalReplica;
|
|
PFILTER_TABLE_ENTRY OriginalParentFilterEntry;
|
|
BOOL PendingCo;
|
|
ULONG StreamSequenceNumber;
|
|
BOOL MergeOk;
|
|
PCXTION Cxtion;
|
|
UNICODE_STRING UnicodeStr, UnicodeStr2;
|
|
PVOLUME_MONITOR_ENTRY pVme;
|
|
PUSN_RECORD OldRenUsnRec;
|
|
|
|
|
|
//
|
|
// Determine the original parent and replica set if the file has moved around.
|
|
// This determines what change order table we need to examine for a pending
|
|
// change order.
|
|
// Note: Now that we have one change order table per volume, is this still needed?
|
|
//
|
|
if (CO_MOVE_OUT_RS_OR_DIR(LocationCmd)) {
|
|
OriginalParentFilterEntry = OldParentFilterEntry;
|
|
} else {
|
|
OriginalParentFilterEntry = NewParentFilterEntry;
|
|
if (NewParentFilterEntry->DFileID != UsnRecord->ParentFileReferenceNumber) {
|
|
DPRINT(4, "++ Warn - Current parent FID NOT EQUAL to UsnRecord.parentFiD -- Stale USN Rec???\n");
|
|
DPRINT2(4, "++ %08x %08x -- %08x %08x\n",
|
|
PRINTQUAD(NewParentFilterEntry->DFileID),
|
|
PRINTQUAD(UsnRecord->ParentFileReferenceNumber));
|
|
return ERROR_INVALID_PARAMETER;
|
|
}
|
|
}
|
|
|
|
OriginalReplica = OriginalParentFilterEntry->Replica;
|
|
OriginalParentFileID = OriginalParentFilterEntry->DFileID;
|
|
|
|
pVme = OriginalReplica->pVme;
|
|
ChangeOrderTable = pVme->ChangeOrderTable;
|
|
|
|
CurrentReplica = (NewParentFilterEntry != NULL) ?
|
|
NewParentFilterEntry->Replica :
|
|
OldParentFilterEntry->Replica;
|
|
|
|
FrsRtlAcquireListLock(&pVme->ChangeOrderList);
|
|
|
|
//
|
|
// Make a new stream sequence number. Protected by above list lock.
|
|
//
|
|
StreamSequenceNumber = ++pVme->StreamSequenceNumber;
|
|
|
|
//
|
|
// See if there is a pending change order for this file/dir. The call to
|
|
// JrnlUpdateChangeOrder() drops our reference on the change order.
|
|
//
|
|
FileID = &UsnRecord->FileReferenceNumber;
|
|
GStatus = GhtLookupNewest(ChangeOrderTable, FileID, TRUE, &ChangeOrder);
|
|
|
|
PendingCo = (GStatus == GHT_STATUS_SUCCESS);
|
|
|
|
|
|
if (PendingCo) {
|
|
//
|
|
// There is a pending change order. Do a couple consistency checks.
|
|
//
|
|
// This USN record should not be for a file create because that
|
|
// would generate a new File ID which should NOT be in the table.
|
|
//
|
|
// NOT QUITE TRUE -- JrnlGetFileCoLocationCmd() will turn on the
|
|
// USN create flag if it sees a file is in the replica set but not
|
|
// in the parent file ID table. This happens when a file that was on
|
|
// the exclusion list is updated after the exclusion list is changed
|
|
// to allow the file to be included. Because of this situation we can
|
|
// also see the create flag set when the following occurs:
|
|
// 1. A series of file changes result in two COs being produced
|
|
// because the first CO is pulled off the process queue.
|
|
// 2. Subsequent file changes are accumulated in the 2nd CO.
|
|
// 3. Meanwhile the user deletes the file so the first CO aborts when
|
|
// it can't generate the staging file. As part of this abort the
|
|
// IDTable entry for the "new" file is deleted and the ParentFidTable
|
|
// entry is removed.
|
|
// 4. Now another USN record for the file (not the delete yet) arrives
|
|
// to merge with the 2nd CO under construction. Since we don't yet
|
|
// know a delete is coming the code in JrnlGetFileCoLocationCmd()
|
|
// sets the USN create flag as described above.
|
|
// 5. Now we end up here and hit the assert. So to avoid this we check
|
|
// the Pending CO and only assert if is already a create.
|
|
//
|
|
// Yea, yea I could just bag the assert but the above scenario is instructive.
|
|
//
|
|
if ((LocationCmd == CO_LOCATION_CREATE) &&
|
|
(GET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command) == CO_LOCATION_CREATE)){
|
|
DUMP_USN_RECORD2(0, UsnRecord, OriginalReplica->ReplicaNumber, LocationCmd);
|
|
DPRINT(0, "++ ERROR -- USN_REASON_FILE_CREATE with create change order in the table:\n");
|
|
FRS_PRINT_TYPE(0, ChangeOrder);
|
|
FRS_ASSERT(!"JrnlEnterFileCO: USN_REASON_FILE_CREATE with create change order in table");
|
|
goto RETURN;
|
|
}
|
|
|
|
//
|
|
// If the pending change order is a delete and the USN record
|
|
// specifies the same same FID this is an error because
|
|
// delete will have retired the FID.
|
|
//
|
|
if (GET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command) == CO_LOCATION_DELETE){
|
|
DUMP_USN_RECORD2(0, UsnRecord, OriginalReplica->ReplicaNumber,
|
|
CO_LOCATION_DELETE);
|
|
DPRINT(0, "++ ERROR - new USN record follows delete with same FID");
|
|
FRS_PRINT_TYPE(0, ChangeOrder);
|
|
FRS_ASSERT(!"JrnlEnterFileCO: new USN record follows delete with same FID");
|
|
goto RETURN;
|
|
}
|
|
|
|
//
|
|
// USN MERGE RESTRICTIONS:
|
|
//
|
|
// Check if this USN record can be merged with the pending change order.
|
|
// If this USN record is a delete or a rename then it removes a name
|
|
// from the name space. If there exists a more recent change order
|
|
// that references this name then we can not merge the USN record.
|
|
// Instead we must create a new CO.
|
|
//
|
|
// Consider this sequence:
|
|
// Attrib -r Dir <== creates CO-1
|
|
// Del Dir\Foo <== creates CO-2
|
|
// Del Dir <== Merge with CO-1 causes name conflict.
|
|
//
|
|
// The "Del Dir" CO can't be merged with CO-1 because CO-2 is still
|
|
// using Dir to delete file Foo. If the merge were to take place the
|
|
// delete would fail since Dir is not empty. File Dir\Foo would be
|
|
// deleted but Dir would be left around.
|
|
//
|
|
// Similarly a rename creates a new name in the name space but if there
|
|
// is a more recent CO that references the name then the rename can't
|
|
// be merged.
|
|
//
|
|
// Consider the following sequence: (Bar already exists)
|
|
// Echo TestString > Foo <== creates CO-1
|
|
// Ren Bar Bar2 <== creates CO-2
|
|
// Ren Foo Bar <== Merge with CO-1 causes name conflict.
|
|
//
|
|
// Foo and Bar are different COs on different Fids but they have
|
|
// name space dependencies that prevent merging the Foo rename with
|
|
// CO-1 that does the file update. If we did merge these two COs then
|
|
// the resulting remote CO that is sent out would collide with the
|
|
// pre-existing Bar, thus deleting it. When CO-2 arrived the original
|
|
// Bar would be gone so there would be no Bar2.
|
|
//
|
|
|
|
MergeOk = TRUE;
|
|
|
|
if (MergeOk &&
|
|
CurrentReplica &&
|
|
(Cxtion = GTabLookup(CurrentReplica->Cxtions,
|
|
&CurrentReplica->JrnlCxtionGuid,
|
|
NULL)) &&
|
|
!GUIDS_EQUAL(&ChangeOrder->JoinGuid, &Cxtion->JoinGuid)) {
|
|
MergeOk = FALSE;
|
|
CHANGE_ORDER_TRACE(3, ChangeOrder, "Invalid join guid Merge NOT OK ");
|
|
}
|
|
|
|
//
|
|
// When we see USN_REASON_REPARSE_POINT_CHANGE it could indicate
|
|
// addition or removal of the reparse point as well as just a
|
|
// modification. One problem we can hit is when you remove the reparse
|
|
// point and then immediately delete the file. If these operations get
|
|
// merged then we will only see the delete on the other end.
|
|
// Unfortunately, when we try to delete the file on the other member we
|
|
// may fail. For example, DFS always returns an error when you access a
|
|
// file with a DFS reparse point on it.
|
|
//
|
|
// What we really want to do is prevent merging a removal of a reparse
|
|
// point with a later operation on the file. Since there is no way to
|
|
// differentiate the kinds of reparse point changes we just prevent
|
|
// merging any of them with later non reparse point changes.
|
|
//
|
|
// It is okay to merge a non reparse point change with a later reparse
|
|
// point change.
|
|
//
|
|
|
|
if(BooleanFlagOn(ChangeOrder->Cmd.ContentCmd, USN_REASON_REPARSE_POINT_CHANGE) &&
|
|
!BooleanFlagOn(UsnRecord->Reason, USN_REASON_REPARSE_POINT_CHANGE)) {
|
|
MergeOk = FALSE;
|
|
CHANGE_ORDER_TRACE(3, ChangeOrder, "Not a reparse point change Merge NOT OK ");
|
|
}
|
|
|
|
if(MergeOk && (BooleanFlagOn(UsnRecord->Reason, USN_REASON_RENAME_NEW_NAME |
|
|
USN_REASON_FILE_DELETE))) {
|
|
|
|
//
|
|
// If this is not a serialized operation (MOVEDIR or MOVERS)
|
|
// then first test for conflict on the current name/parent FID of the
|
|
// file. Then if that's ok test for a conflict on the previous name.
|
|
//
|
|
if (CO_MOVE_RS_OR_DIR(LocationCmd)) {
|
|
MergeOk = FALSE;
|
|
CHANGE_ORDER_TRACE(3, ChangeOrder, "MOVERS/DIR Merge NOT OK ");
|
|
}
|
|
|
|
if (MergeOk) {
|
|
FrsSetUnicodeStringFromRawString(&UnicodeStr,
|
|
UsnRecord->FileNameLength,
|
|
UsnRecord->FileName,
|
|
UsnRecord->FileNameLength);
|
|
MergeOk = JrnlMergeCoTest(pVme,
|
|
&UnicodeStr,
|
|
&UsnRecord->ParentFileReferenceNumber,
|
|
ChangeOrder->StreamLastMergeSeqNum);
|
|
if (MergeOk) {
|
|
CHANGE_ORDER_TRACE(3, ChangeOrder, "Curr parent Merge OK ");
|
|
} else {
|
|
CHANGE_ORDER_TRACE(3, ChangeOrder, "Curr parent Merge NOT OK ");
|
|
}
|
|
}
|
|
|
|
//
|
|
// If the Merge is still on and this is a rename then check for
|
|
// a conflict in the use of the previous name that will go away.
|
|
//
|
|
if (MergeOk &&
|
|
BooleanFlagOn(UsnRecord->Reason, USN_REASON_RENAME_NEW_NAME)) {
|
|
MergeOk = JrnlMergeCoTest(pVme,
|
|
&ChangeOrder->UFileName,
|
|
&OriginalParentFilterEntry->DFileID,
|
|
ChangeOrder->StreamLastMergeSeqNum);
|
|
if (MergeOk) {
|
|
CHANGE_ORDER_TRACE(3, ChangeOrder, "Orig parent Merge OK ");
|
|
} else {
|
|
CHANGE_ORDER_TRACE(3, ChangeOrder, "Orig parent Merge NOT OK ");
|
|
}
|
|
}
|
|
}
|
|
|
|
if (MergeOk) {
|
|
//
|
|
// Update the seq number of last USN record to contribute to CO.
|
|
//
|
|
ChangeOrder->StreamLastMergeSeqNum = StreamSequenceNumber;
|
|
}
|
|
|
|
PendingCo = MergeOk;
|
|
|
|
//
|
|
// Creating new change order; drop reference on current change order
|
|
//
|
|
if (!PendingCo) {
|
|
GStatus = GhtDereferenceEntryByAddress(ChangeOrderTable,
|
|
ChangeOrder,
|
|
TRUE);
|
|
if (GStatus != GHT_STATUS_SUCCESS) {
|
|
DPRINT(0, "++ ERROR: GhtDereferenceEntryByAddress ref count non positive.\n");
|
|
FRS_PRINT_TYPE(0, ChangeOrder);
|
|
FRS_ASSERT(!"JrnlEnterFileCO: ref count non positive");
|
|
goto RETURN;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
if (!PendingCo) {
|
|
//
|
|
// Construct new change order.
|
|
//
|
|
ChangeOrder = JrnlCreateCo(OriginalReplica,
|
|
&UsnRecord->FileReferenceNumber,
|
|
&OriginalParentFilterEntry->DFileID,
|
|
UsnRecord,
|
|
BooleanFlagOn(UsnRecord->FileAttributes,
|
|
FILE_ATTRIBUTE_DIRECTORY),
|
|
UsnRecord->FileName,
|
|
UsnRecord->FileNameLength);
|
|
|
|
ChangeOrder->StreamLastMergeSeqNum = StreamSequenceNumber;
|
|
//
|
|
// Set this up now so it appears in the log file. It is overwritten
|
|
// later with the real CO Guid when the CO is issued.
|
|
//
|
|
ChangeOrder->Cmd.ChangeOrderGuid.Data1 = StreamSequenceNumber;
|
|
|
|
CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Create", UsnRecord->Reason);
|
|
} else {
|
|
CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Update", UsnRecord->Reason);
|
|
}
|
|
|
|
//
|
|
// Update the Name Space Table with the current stream sequence number.
|
|
// Do this for both the file name and the parent dir name. In the case
|
|
// of rename do it for the original and current file name and parent names.
|
|
// So there are four cases. The table below shows where the filename
|
|
// and the File ID come from for each case.
|
|
//
|
|
// File Name Parent Filename
|
|
//
|
|
// Curr File UsnRecord->FileName UsnRecord->ParentFID
|
|
// Curr Parent NewParentFilterEntry->UFileName NewParentFilterEntry->DParentFileID
|
|
// Orig File RenOldNameTable->FileName RenOldNameTable->ParentFID
|
|
// Orig Parent OrigParentFilterEntry->UFileName OrigParentFilterEntry->DParentFileID
|
|
//
|
|
// Note:
|
|
// - The Curr info is only needed if CO is not a MOVEOUT.
|
|
// - The Orig info on the filename is only relevant if CO is a rename.
|
|
// - The Orig info on the parent dir is only relevant if CO is
|
|
// MoveOut, MoveDir or MoveRs.
|
|
//
|
|
|
|
if (LocationCmd != CO_LOCATION_MOVEOUT) {
|
|
//
|
|
// Update Curr File (Where the USN record says file went)
|
|
//
|
|
FrsSetUnicodeStringFromRawString(&UnicodeStr,
|
|
UsnRecord->FileNameLength,
|
|
UsnRecord->FileName,
|
|
UsnRecord->FileNameLength);
|
|
JrnlUpdateNst(pVme,
|
|
&UnicodeStr,
|
|
&UsnRecord->ParentFileReferenceNumber,
|
|
StreamSequenceNumber);
|
|
//
|
|
// Update Curr parent (the parent dir where file went)
|
|
//
|
|
JrnlUpdateNst(pVme,
|
|
&NewParentFilterEntry->UFileName,
|
|
&NewParentFilterEntry->DParentFileID,
|
|
StreamSequenceNumber);
|
|
}
|
|
|
|
if (BooleanFlagOn(UsnRecord->Reason, USN_REASON_RENAME_NEW_NAME)) {
|
|
|
|
//
|
|
// Update Orig File location for rename COs.
|
|
// We use the info saved in the most recent Rename Old USN record for this file
|
|
// on the volume. Then free the saved old name.
|
|
//
|
|
OldRenUsnRec = NULL;
|
|
GStatus = QHashLookup(pVme->RenOldNameTable,
|
|
&UsnRecord->FileReferenceNumber,
|
|
NULL,
|
|
(PULONG_PTR) &OldRenUsnRec);
|
|
|
|
if (OldRenUsnRec != NULL) {
|
|
|
|
FrsSetUnicodeStringFromRawString(&UnicodeStr2,
|
|
OldRenUsnRec->FileNameLength,
|
|
OldRenUsnRec->FileName,
|
|
OldRenUsnRec->FileNameLength);
|
|
|
|
JrnlUpdateNst(pVme,
|
|
&UnicodeStr2,
|
|
&OldRenUsnRec->ParentFileReferenceNumber,
|
|
StreamSequenceNumber);
|
|
|
|
|
|
OldRenUsnRec = FrsFree(OldRenUsnRec);
|
|
|
|
GStatus = QHashDelete(pVme->RenOldNameTable,
|
|
&UsnRecord->FileReferenceNumber);
|
|
|
|
if (GStatus != GHT_STATUS_SUCCESS ) {
|
|
DPRINT1(0, "++ QHashDelete error: %d\n", GStatus);
|
|
}
|
|
|
|
} else {
|
|
DPRINT1(0, "RENAME_OLD_NAME record not found for Fid: %08x %08x\n",
|
|
PRINTQUAD(UsnRecord->FileReferenceNumber));
|
|
}
|
|
}
|
|
|
|
if (CO_MOVE_OUT_RS_OR_DIR(LocationCmd)) {
|
|
|
|
//
|
|
// Update Orig Parent (The original parent dir where the file came from)
|
|
//
|
|
JrnlUpdateNst(pVme,
|
|
&OriginalParentFilterEntry->UFileName,
|
|
&OriginalParentFilterEntry->DParentFileID,
|
|
StreamSequenceNumber);
|
|
}
|
|
|
|
|
|
//
|
|
// Update the change order. This drops our ref on the change order.
|
|
//
|
|
WStatus = JrnlUpdateChangeOrder(ChangeOrder,
|
|
CurrentReplica,
|
|
UsnRecord->ParentFileReferenceNumber,
|
|
LocationCmd,
|
|
UsnRecord);
|
|
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
DPRINT(0, "++ Error - failed to insert or update change order\n");
|
|
DPRINT_WS(0, "JrnlUpdateChangeOrder", WStatus);
|
|
} else {
|
|
DPRINT1(4, "++ ChangeOrder %s success\n", (PendingCo ? "update" : "create"));
|
|
}
|
|
|
|
|
|
RETURN:
|
|
|
|
//
|
|
// Drop the locks on the change order process lists.
|
|
//
|
|
FrsRtlReleaseListLock(&pVme->ChangeOrderList);
|
|
|
|
return WStatus;
|
|
|
|
}
|
|
|
|
|
|
PCHANGE_ORDER_ENTRY
|
|
JrnlCreateCo(
|
|
IN PREPLICA Replica,
|
|
IN PULONGLONG Fid,
|
|
IN PULONGLONG ParentFid,
|
|
IN PUSN_RECORD UsnRecord,
|
|
IN BOOL IsDirectory,
|
|
IN PWCHAR FileName,
|
|
IN USHORT Length
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This functions allocates a change order entry and inits some of the fields.
|
|
|
|
Depending on the change order some of these fields are overwritten later.
|
|
|
|
Arguments:
|
|
|
|
Replica - ptr to replica set for this change order.
|
|
Fid - The file reference number for the local file.
|
|
ParentFid - The parent file reference number for this file.
|
|
UsnRecord - The NTFS USN record describing the change. When walking a
|
|
through a sub-tree this will be the USN record of the sub-tree root.
|
|
IsDirectory - TRUE if this CO is for a directory.
|
|
FileName - Filename for this file. For a sub tree op it comes from the
|
|
filter entry.
|
|
Length - the file name length in bytes.
|
|
|
|
Return Value:
|
|
|
|
ptr to change order entry.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlCreateCo:"
|
|
|
|
PCHANGE_ORDER_ENTRY ChangeOrder;
|
|
|
|
//
|
|
// Construct new change order.
|
|
// Set the initial reference count to 1.
|
|
//
|
|
ChangeOrder = FrsAllocType(CHANGE_ORDER_ENTRY_TYPE);
|
|
ChangeOrder->HashEntryHeader.ReferenceCount = 1;
|
|
|
|
//
|
|
// The command flag CO_FLAG_LOCATION_CMD should be clear.
|
|
// Mark this change order as a file or a directory.
|
|
// Note: If this CO is being generated off of a directory filter table
|
|
// entry (e.g. Moveout) then the ChangeOrder->Cmd.FileAttributes will
|
|
// be zero. ChgOrdReadIdRecord() detects this and inserts the file
|
|
// attributes from the IDTable record.
|
|
//
|
|
SET_CO_LOCATION_CMD(ChangeOrder->Cmd,
|
|
DirOrFile,
|
|
(IsDirectory ? CO_LOCATION_DIR : CO_LOCATION_FILE));
|
|
|
|
SET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command, CO_LOCATION_NO_CMD);
|
|
|
|
//
|
|
// Capture the file name.
|
|
//
|
|
FRS_ASSERT(Length <= MAX_PATH*2);
|
|
CopyMemory(ChangeOrder->Cmd.FileName, FileName, Length);
|
|
ChangeOrder->Cmd.FileName[Length/2] = UNICODE_NULL;
|
|
ChangeOrder->UFileName.Length = Length;
|
|
ChangeOrder->Cmd.FileNameLength = Length;
|
|
|
|
//
|
|
// Set New and orig Replica fields to the replica.
|
|
//
|
|
ChangeOrder->OriginalReplica = Replica;
|
|
ChangeOrder->NewReplica = Replica;
|
|
ChangeOrder->Cmd.OriginalReplicaNum = ReplicaAddrToId(Replica);
|
|
ChangeOrder->Cmd.NewReplicaNum = ReplicaAddrToId(Replica);
|
|
|
|
//
|
|
// Set New and orig parent FID fields to the parent FID.
|
|
//
|
|
ChangeOrder->OriginalParentFid = *ParentFid;
|
|
ChangeOrder->NewParentFid = *ParentFid;
|
|
ChangeOrder->ParentFileReferenceNumber = *ParentFid;
|
|
ChangeOrder->FileReferenceNumber = *Fid;
|
|
|
|
//
|
|
// Init with data from the USN Record.
|
|
//
|
|
ChangeOrder->EntryCreateTime = CO_TIME_NOW(Replica->pVme);
|
|
ChangeOrder->Cmd.EventTime = UsnRecord->TimeStamp;
|
|
ChangeOrder->Cmd.JrnlFirstUsn = UsnRecord->Usn;
|
|
|
|
return ChangeOrder;
|
|
}
|
|
|
|
|
|
BOOL
|
|
JrnlMergeCoTest(
|
|
IN PVOLUME_MONITOR_ENTRY pVme,
|
|
IN PUNICODE_STRING UFileName,
|
|
IN PULONGLONG ParentFid,
|
|
IN ULONG StreamLastMergeSeqNum
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Check if a new Usn record can be merged with this change order.
|
|
If there is any reference to the file name in the Usn record stream
|
|
after the point where the last merge occurred then we return FALSE
|
|
indicating the merge is disallowed. The ptr to the QHashEntry is returned
|
|
(if it is found) so LastUseSequenceNumber can be updated.
|
|
|
|
Arguments:
|
|
|
|
|
|
pVme - ptr to the volume monitor entry (w/ name space table) for test.
|
|
UFileName - Unicode Filename for this file.
|
|
ParentFid - The parent file reference number for this file.
|
|
StreamLastMergeSeqNum - The Seq Num of last Usn Record merged into CO.
|
|
|
|
Return Value:
|
|
|
|
True if Merge is ok else false.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlMergeCoTest:"
|
|
|
|
ULONGLONG QuadHashValue;
|
|
ULONG StreamLastUseSeqNum;
|
|
PQHASH_ENTRY NstEntry;
|
|
|
|
|
|
CalcHashFidAndName(UFileName, ParentFid, &QuadHashValue);
|
|
|
|
NstEntry = QHashLookupLock(pVme->NameSpaceTable, &QuadHashValue);
|
|
|
|
if (NstEntry != NULL) {
|
|
|
|
StreamLastUseSeqNum = (ULONG)NstEntry->Flags;
|
|
|
|
if (StreamLastUseSeqNum > StreamLastMergeSeqNum) {
|
|
//
|
|
// There is a ref to this name in the Usn stream after
|
|
// point where the last record was merged with this CO.
|
|
// Can't merge this Usn Record.
|
|
//
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlPurgeOldRenameWorker (
|
|
PQHASH_TABLE Table,
|
|
PQHASH_ENTRY BeforeNode,
|
|
PQHASH_ENTRY TargetNode,
|
|
PVOID Context
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function is called thru QHashEnumerateTable() to clean out stale entries.
|
|
|
|
Arguments:
|
|
|
|
Table - the hash table being enumerated
|
|
BeforeNode -- ptr to the QhashEntry before the node of interest.
|
|
TargetNode -- ptr to the QhashEntry of interest.
|
|
Context - ptr to the USN to compare against.
|
|
|
|
Return Value:
|
|
|
|
FRS Status
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlPurgeOldRenameWorker:"
|
|
|
|
USN PurgeUsn = *(USN *)Context;
|
|
|
|
PUSN_RECORD OldRenUsnRec;
|
|
|
|
OldRenUsnRec = (PUSN_RECORD) (TargetNode->Flags);
|
|
|
|
if (OldRenUsnRec == NULL) {
|
|
//
|
|
// All valid entries should point to a USN record but if not then
|
|
// just delete the qhash entry.
|
|
//
|
|
return FrsErrorDeleteRequested;
|
|
}
|
|
|
|
if (OldRenUsnRec->Usn < PurgeUsn) {
|
|
//
|
|
// This record is past the point of interest so clean it out.
|
|
//
|
|
OldRenUsnRec = FrsFree(OldRenUsnRec);
|
|
TargetNode->Flags = (ULONG_PTR) NULL;
|
|
//
|
|
// Tell QHashEnumerateTable() to delete the node and continue the enum.
|
|
//
|
|
return FrsErrorDeleteRequested;
|
|
}
|
|
|
|
|
|
return FrsErrorSuccess;
|
|
}
|
|
|
|
|
|
|
|
|
|
ULONG
|
|
JrnlPurgeNstWorker (
|
|
PQHASH_TABLE Table,
|
|
PQHASH_ENTRY BeforeNode,
|
|
PQHASH_ENTRY TargetNode,
|
|
PVOID Context
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function is called thru QHashEnumerateTable() to clean out stale entries.
|
|
|
|
Arguments:
|
|
|
|
Table - the hash table being enumerated
|
|
BeforeNode -- ptr to the QhashEntry before the node of interest.
|
|
TargetNode -- ptr to the QhashEntry of interest.
|
|
Context - ptr to the Stream Sequence Number to compare against.
|
|
|
|
Return Value:
|
|
|
|
FRS Status
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlPurgeNstWorker:"
|
|
|
|
ULONG StreamSeqNum = *(ULONG *)Context;
|
|
|
|
|
|
if ( (ULONG)(TargetNode->Flags) < StreamSeqNum) {
|
|
|
|
DPRINT5(4, "JrnlPurgeNstWorker - BeforeNode: %08x, Link: %08x,"
|
|
" Flags: %08x, Tag: %08x %08x, Data: %08x %08x\n",
|
|
BeforeNode, TargetNode->NextEntry, TargetNode->Flags,
|
|
PRINTQUAD(TargetNode->QKey), PRINTQUAD(TargetNode->QData));
|
|
|
|
//
|
|
// Tell QHashEnumerateTable() to delete the node and continue the enum.
|
|
//
|
|
return FrsErrorDeleteRequested;
|
|
}
|
|
|
|
return FrsErrorSuccess;
|
|
}
|
|
|
|
|
|
|
|
VOID
|
|
JrnlUpdateNst(
|
|
IN PVOLUME_MONITOR_ENTRY pVme,
|
|
IN PUNICODE_STRING UFileName,
|
|
IN PULONGLONG ParentFid,
|
|
IN ULONG StreamSequenceNumber
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Update the LastUseSequenceNumber in the Name Space Table.
|
|
If the entry is not present, create it.
|
|
|
|
Arguments:
|
|
|
|
pVme - ptr to the volume monitor entry (w/ name space table) for test.
|
|
UFileName - Unicode Filename for this file.
|
|
ParentFid - The parent file reference number for this file.
|
|
StreamLastMergeSeqNum - The Seq Num of last Usn Record merged into CO.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlUpdateNst:"
|
|
|
|
ULONGLONG Qhv;
|
|
USN PurgeUsn;
|
|
PQHASH_ENTRY NstEntry;
|
|
ULONG LastFetched, LastCleaned;
|
|
|
|
CalcHashFidAndName(UFileName, ParentFid, &Qhv);
|
|
|
|
NstEntry = QHashLookupLock(pVme->NameSpaceTable, &Qhv);
|
|
|
|
if (NstEntry != NULL) {
|
|
NstEntry->Flags = StreamSequenceNumber;
|
|
} else {
|
|
//
|
|
// Name not found. Create a new entry.
|
|
//
|
|
QHashInsertLock(pVme->NameSpaceTable, &Qhv, &Qhv, StreamSequenceNumber);
|
|
}
|
|
|
|
|
|
//
|
|
// Every so often sweep the Name Space Table and clean out stale entries.
|
|
// By doing this as part of the Journal monitor thread we can avoid
|
|
// using locks on the NameSpaceTable since this is the only thread that
|
|
// touches it.
|
|
//
|
|
if ((StreamSequenceNumber & 127) == 0) {
|
|
LastFetched = pVme->StreamSequenceNumberFetched;
|
|
LastCleaned = pVme->StreamSequenceNumberClean;
|
|
|
|
if ((LastFetched > LastCleaned) &&
|
|
((LastFetched - LastCleaned) > 100)) {
|
|
//
|
|
// Sweep the table and purge any entries with a Stream Sequence
|
|
// Number less than LastFetched since that CO is no longer in the
|
|
// process queue.
|
|
//
|
|
QHashEnumerateTable(pVme->NameSpaceTable,
|
|
JrnlPurgeNstWorker,
|
|
&LastFetched);
|
|
pVme->StreamSequenceNumberClean = LastFetched;
|
|
|
|
//
|
|
// Clean up stray entries in the Old Rename name table too.
|
|
//
|
|
PurgeUsn = pVme->LastUsnSavePoint;
|
|
QHashEnumerateTable(pVme->RenOldNameTable,
|
|
JrnlPurgeOldRenameWorker,
|
|
&PurgeUsn);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
VOID
|
|
JrnlFilterUpdate(
|
|
IN PREPLICA CurrentReplica,
|
|
IN PUSN_RECORD UsnRecord,
|
|
IN ULONG LocationCmd,
|
|
IN PFILTER_TABLE_ENTRY OldParentFilterEntry,
|
|
IN PFILTER_TABLE_ENTRY NewParentFilterEntry
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Process a directory operation. Generate the change order(s) and update the
|
|
Filter table. This may involve multiple operations over a subtree.
|
|
|
|
It assumes it is being called with a USN directory change record and
|
|
that references have been taken on OldParentFilterEntry and
|
|
NewParentFilterEntry.
|
|
|
|
Arguments:
|
|
|
|
CurrentReplica - ptr to the Replica struct containing the directory now.
|
|
UsnRecord - ptr to the UsnRecord.
|
|
LocationCmd - The change order location command. (MOVEIN, MOVEOUT, ...)
|
|
OldParentFilterEntry - The filter entry for the directory's previous parent.
|
|
NewParentFilterEntry - The filter entry for the directory's current parent.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlFilterUpdate:"
|
|
|
|
PGENERIC_HASH_TABLE FilterTable = CurrentReplica->pVme->FilterTable;
|
|
PFILTER_TABLE_ENTRY FilterEntry;
|
|
ULONG GStatus, WStatus;
|
|
ULONG Flags;
|
|
PULONGLONG FileID;
|
|
PREPLICA OriginalReplica;
|
|
CHANGE_ORDER_PARAMETERS Cop;
|
|
|
|
//
|
|
// Determine the file location command to use in the change order.
|
|
// First get the old parent file ID incase this was a rename.
|
|
//
|
|
FileID = &UsnRecord->FileReferenceNumber;
|
|
|
|
//
|
|
// If there is no old parent filter entry (Create, Delete, MOVEIN or NO_CMD)
|
|
// then the original replica is NULL.
|
|
//
|
|
OriginalReplica = (OldParentFilterEntry == NULL) ?
|
|
NULL : OldParentFilterEntry->Replica;
|
|
|
|
//
|
|
// Look for an entry in the Filter Table for this DIR and create a new
|
|
// one if needed.
|
|
//
|
|
GStatus = GhtLookup(FilterTable, FileID, TRUE, &FilterEntry);
|
|
|
|
if (GStatus == GHT_STATUS_SUCCESS) {
|
|
//
|
|
// For a create the entry could already be in the table. This could
|
|
// happen when a Replica Load inserts the directory and then we see the
|
|
// Journal Entry for the create later. If only the Create bit is set
|
|
// in the reason mask there is nothing for us to do.
|
|
//
|
|
if (UsnRecord->Reason == (USN_REASON_FILE_CREATE | USN_REASON_CLOSE)) {
|
|
DPRINT(4,"++ USN_REASON_FILE_CREATE: for dir with entry in table. skipping\n");
|
|
GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
|
|
return;
|
|
}
|
|
} else {
|
|
|
|
//
|
|
// Create a filter entry for this directory if it's a create or movein.
|
|
// A MoveIn is the same as a create dir since we need to create a filter
|
|
// table entry and only a single dir is involved. It is possible that
|
|
// the update process has already found the dir and added the filter
|
|
// entry. If so we generate the change order anyway since there may
|
|
// be other reason flags to consider. There is no original replica
|
|
// for a create or a rename.
|
|
//
|
|
if (CO_NEW_FILE(LocationCmd)) {
|
|
//
|
|
// The following returns with a reference on FilterEntry.
|
|
//
|
|
WStatus = JrnlAddFilterEntryFromUsn(CurrentReplica,
|
|
UsnRecord,
|
|
&FilterEntry);
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
DUMP_USN_RECORD2(3, UsnRecord, CurrentReplica->ReplicaNumber, LocationCmd);
|
|
DPRINT(1, "++ ERROR - JrnlAddFilterEntryFromUsn failed\n");
|
|
return;
|
|
}
|
|
} else {
|
|
//
|
|
// Note: touching a dir that was previously EXCLUDED fails to add filter entry
|
|
//
|
|
DUMP_USN_RECORD2(3, UsnRecord, CurrentReplica->ReplicaNumber, LocationCmd);
|
|
DPRINT(1, "++ Warning: Dir not found in Filter Table and not a CO_NEW_FILE, skipping\n");
|
|
return;
|
|
|
|
}
|
|
}
|
|
|
|
//
|
|
// Process the directory through the volume filter and generate the
|
|
// appropriate change orders.
|
|
//
|
|
|
|
|
|
//
|
|
// Setup the change order parameters.
|
|
//
|
|
// Original and current/new Replica Sets
|
|
// new parent FID.
|
|
// Usn Record triggering change order creation. (i.e. the op on root of
|
|
// the subtree).
|
|
// The location change command.
|
|
// Original and current/new parent filter entries of root filter entry
|
|
//
|
|
Cop.OriginalReplica = OriginalReplica;
|
|
Cop.NewReplica = CurrentReplica;
|
|
Cop.NewParentFid = UsnRecord->ParentFileReferenceNumber;
|
|
Cop.UsnRecord = UsnRecord;
|
|
Cop.NewLocationCmd = LocationCmd;
|
|
Cop.OrigParentFilterEntry = OldParentFilterEntry;
|
|
Cop.NewParentFilterEntry = NewParentFilterEntry;
|
|
|
|
//
|
|
// Process the subtree starting at the root filter entry of change.
|
|
//
|
|
WStatus = JrnlProcessSubTree(FilterEntry, &Cop);
|
|
|
|
//
|
|
// Drop the ref on the filter entry if it wasn't deleted.
|
|
//
|
|
if ((FilterEntry != NULL) &&
|
|
!((LocationCmd == CO_LOCATION_DELETE) ||
|
|
(LocationCmd == CO_LOCATION_MOVEOUT))) {
|
|
GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
ULONG
|
|
JrnlProcessSubTree(
|
|
IN PFILTER_TABLE_ENTRY RootFilterEntry,
|
|
IN PCHANGE_ORDER_PARAMETERS Cop
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function is called to build a change order parameter block and
|
|
enumerate through a filter subtree. It acquires the necessary locks
|
|
for the duration of the operation.
|
|
|
|
Arguments:
|
|
|
|
RootFilterEntry - The root of the filter subtree being operated on.
|
|
NULL if it doesn't yet exist (e.g. MOVEIN or CREATE).
|
|
Cop - Struct with the change order param data to pass down the subtree.
|
|
|
|
Return Value:
|
|
|
|
win32 status
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlProcessSubTree:"
|
|
|
|
ULONG WStatus;
|
|
PGENERIC_HASH_TABLE FilterTable;
|
|
PVOLUME_MONITOR_ENTRY pVme;
|
|
PREPLICA NewReplica = Cop->NewReplica;
|
|
ULONG NewLocationCmd = Cop->NewLocationCmd;
|
|
PREPLICA OriginalReplica = Cop->OriginalReplica;
|
|
|
|
|
|
if (NewLocationCmd == CO_LOCATION_MOVEOUT) {
|
|
pVme = OriginalReplica->pVme;
|
|
} else {
|
|
pVme = NewReplica->pVme;
|
|
}
|
|
|
|
FilterTable = pVme->FilterTable;
|
|
|
|
//
|
|
// Get the change order process list lock for the volume.
|
|
//
|
|
FrsRtlAcquireListLock(&pVme->ChangeOrderList);
|
|
|
|
//
|
|
// dispatch on new location command.
|
|
// Get locks and enumerate subtree top down or bottom up.
|
|
//
|
|
switch (NewLocationCmd) {
|
|
|
|
case CO_LOCATION_NO_CMD:
|
|
//
|
|
// Even though there is no location change. There could still be a
|
|
// dir related content change. So process like a create that the
|
|
// update process got to first.
|
|
//
|
|
case CO_LOCATION_CREATE:
|
|
case CO_LOCATION_MOVEIN:
|
|
case CO_LOCATION_MOVEIN2:
|
|
//
|
|
// Create a change order for it. Not really a subtree operation.
|
|
// A MoveIn is the same as a create dir since we need to create a filter
|
|
// table entry and only a single dir is involved. It is possible that
|
|
// the update process has already found the dir and added the filter
|
|
// entry. If so we generate the change order anyway since there may
|
|
// be other reason flags to consider. There is no original replica
|
|
// for a create or a MOVEIN. The caller sets original replica to
|
|
// new replica and has created the filter entry.
|
|
//
|
|
// Bump the ref count to keep the count in sync with the path through
|
|
// JrnlEnumerateFilterTreexx().
|
|
//
|
|
INCREMENT_FILTER_REF_COUNT(RootFilterEntry);
|
|
|
|
WStatus = JrnlProcessSubTreeEntry(FilterTable, RootFilterEntry, Cop);
|
|
|
|
DPRINT_WS(0, "++ Error - failed to add change order for dir create:", WStatus);
|
|
break;
|
|
|
|
|
|
case CO_LOCATION_DELETE:
|
|
case CO_LOCATION_MOVEDIR:
|
|
//
|
|
// Create change order for the directory delete and delete filter entry.
|
|
// Not really a subtree operation since the dir can have no children
|
|
// when it's deleted.
|
|
// If the operation is MOVEDIR then JrnlProcessSubTreeEntry() will
|
|
// change the parent dir in the filter entry and put it on the child
|
|
// list of the new parent.
|
|
//
|
|
// Bump the ref count to keep the count in sync with the path through
|
|
// JrnlEnumerateFilterTreexx().
|
|
//
|
|
INCREMENT_FILTER_REF_COUNT(RootFilterEntry);
|
|
|
|
JrnlAcquireChildLock(NewReplica);
|
|
|
|
WStatus = JrnlProcessSubTreeEntry(FilterTable, RootFilterEntry, Cop);
|
|
|
|
DPRINT_WS(0, "++ Error - failed to add change order for dir create:", WStatus);
|
|
|
|
JrnlReleaseChildLock(NewReplica);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CO_LOCATION_MOVEOUT:
|
|
//
|
|
// An entire subtree is renamed out of the replica tree.
|
|
//
|
|
// Get the lock on the filter entry child list for this replica.
|
|
// Walk the subtree bottom up, creating the change orders for the
|
|
// MOVEOUT and deleting the filter entries at the same time.
|
|
// Drop the child list lock.
|
|
//
|
|
|
|
JrnlAcquireChildLock(OriginalReplica);
|
|
WStatus = JrnlEnumerateFilterTreeBU(FilterTable,
|
|
RootFilterEntry,
|
|
JrnlProcessSubTreeEntry,
|
|
Cop);
|
|
JrnlReleaseChildLock(OriginalReplica);
|
|
DPRINT_WS(0, "++ Error - failed to add change order for dir MOVEOUT:", WStatus);
|
|
|
|
break;
|
|
|
|
|
|
case CO_LOCATION_MOVERS:
|
|
//
|
|
// Get the lock on the filter entry child list for both this replica
|
|
// and the new replica set.
|
|
// Walk the subtree Top-Down, creating the change orders for the MOVERS.
|
|
// Drop the child list locks.
|
|
//
|
|
|
|
JrnlAcquireChildLockPair(OriginalReplica, NewReplica);
|
|
|
|
WStatus = JrnlEnumerateFilterTreeTD(FilterTable,
|
|
RootFilterEntry,
|
|
JrnlProcessSubTreeEntry,
|
|
Cop);
|
|
|
|
JrnlReleaseChildLockPair(OriginalReplica, NewReplica);
|
|
DPRINT_WS(0, "++ Error - failed to add change order for dir MOVERS:", WStatus);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
DPRINT(0, "++ ERROR - Invalid NewLocationCmd arg\n");
|
|
FRS_ASSERT(!"JrnlProcessSubTree: Invalid NewLocationCmd");
|
|
|
|
} // end switch
|
|
|
|
//
|
|
// Release the volume change order lock.
|
|
//
|
|
FrsRtlReleaseListLock(&pVme->ChangeOrderList);
|
|
|
|
return WStatus;
|
|
|
|
}
|
|
|
|
|
|
|
|
ULONG
|
|
JrnlProcessSubTreeEntry(
|
|
PGENERIC_HASH_TABLE FilterTable,
|
|
PVOID Buffer,
|
|
PVOID Context
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function is called thru JrnlEnumerateFilterTreexx() to process a
|
|
Filter entry and submit a change order for same.
|
|
|
|
After the change order is generated the filter table entry is updated
|
|
as needed to reflect a new parent or a new replica set or a name change.
|
|
|
|
All required locks are acquired by the caller of the enumerate function.
|
|
This includes one or two filter entry child locks and the change order
|
|
list lock.
|
|
|
|
The caller has taken out a reference on the FilterEntry (Buffer). We
|
|
retire that reference here.
|
|
|
|
Arguments:
|
|
|
|
FilterTable - the hash table being enumerated (to lookup parent entry).
|
|
Buffer - a ptr to a FILTER_TABLE_ENTRY
|
|
Context - A pointer to the change order parameter struct.
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS to keep the enumeration going.
|
|
Any other status stops the enumeration and returns this value to the
|
|
caller of the enumerate function.
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlProcessSubTreeEntry:"
|
|
|
|
|
|
UNICODE_STRING UFileName;
|
|
|
|
ULONG WStatus, WStatus1;
|
|
ULONG GStatus;
|
|
BOOL Root;
|
|
PCHANGE_ORDER_ENTRY ChangeOrder;
|
|
PUSN_RECORD UsnRecord;
|
|
ULONG StreamSeqNum;
|
|
ULONG LocationCmd;
|
|
PVOLUME_MONITOR_ENTRY pVme;
|
|
|
|
PFILTER_TABLE_ENTRY OrigParentFilterEntry;
|
|
PFILTER_TABLE_ENTRY NewParentFilterEntry;
|
|
PFILTER_TABLE_ENTRY FE, FEList[8];
|
|
ULONG FEx;
|
|
|
|
PWCHAR FileName;
|
|
PFILTER_TABLE_ENTRY FilterEntry = (PFILTER_TABLE_ENTRY) Buffer;
|
|
PCHANGE_ORDER_PARAMETERS Cop = (PCHANGE_ORDER_PARAMETERS) Context;
|
|
|
|
USHORT Length;
|
|
|
|
|
|
//
|
|
// The USN record that triggered the SubTree operation
|
|
//
|
|
UsnRecord = Cop->UsnRecord;
|
|
LocationCmd = Cop->NewLocationCmd;
|
|
OrigParentFilterEntry = Cop->OrigParentFilterEntry;
|
|
NewParentFilterEntry = Cop->NewParentFilterEntry;
|
|
|
|
pVme = FilterEntry->Replica->pVme;
|
|
|
|
//
|
|
// If the FID in the UsnRecord matches the FID in the Filter Entry then
|
|
// this operation is on the root of the subtree and is different than if
|
|
// it was on a child.
|
|
//
|
|
Root = (UsnRecord->FileReferenceNumber == FilterEntry->DFileID);
|
|
|
|
#if 0
|
|
// For now no merging of the DIR change orders. If this proves to be a perf
|
|
// problem then need to add the code check for name conflicts.
|
|
//
|
|
// Check for a pending change order for this Dir entry. If the lookup
|
|
// succeeds the ref count is decremented by JrnlUpdateChangeOrder because
|
|
// it may end up evaporating the change order.
|
|
//
|
|
GStatus = GhtLookup(pVme->ChangeOrderTable,
|
|
&FilterEntry->DFileID,
|
|
TRUE,
|
|
&ChangeOrder);
|
|
|
|
if (GStatus == GHT_STATUS_SUCCESS) {
|
|
//
|
|
// A pending change order exists, Update it.
|
|
//
|
|
CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Update", UsnRecord->Reason);
|
|
} else {
|
|
#endif
|
|
|
|
//
|
|
// No pending change order exists for this Dir. Create one.
|
|
//
|
|
// Since multiple change orders are derived from a single Journal Usn
|
|
// how do we decide to update our stable copy of the Journal USN?
|
|
// The stable copy means the current one we are working on and may not
|
|
// have finished.
|
|
|
|
if (Root) {
|
|
//
|
|
// If the root of the sub-tree then name comes from USN Record.
|
|
//
|
|
FileName = UsnRecord->FileName;
|
|
Length = UsnRecord->FileNameLength;
|
|
} else {
|
|
//
|
|
// If not root of sub-tree then name comes from filter entry and
|
|
// JrnlFirstUsn is set to zero.
|
|
//
|
|
FileName = FilterEntry->DFileName;
|
|
Length = (USHORT)(2*wcslen(FilterEntry->DFileName));
|
|
}
|
|
|
|
//
|
|
// Create the change order.
|
|
//
|
|
ChangeOrder = JrnlCreateCo(FilterEntry->Replica,
|
|
&FilterEntry->DFileID,
|
|
&FilterEntry->DParentFileID,
|
|
UsnRecord,
|
|
TRUE, // DIR CO
|
|
FileName,
|
|
Length);
|
|
//
|
|
// Make a new stream sequence number and save it in the CO.
|
|
// Stick it in the CO Guid so it appears in the log file.
|
|
// It gets overwritten later with real CO Guid when the CO issues.
|
|
//
|
|
StreamSeqNum = ++pVme->StreamSequenceNumber;
|
|
ChangeOrder->StreamLastMergeSeqNum = StreamSeqNum;
|
|
ChangeOrder->Cmd.ChangeOrderGuid.Data1 = StreamSeqNum;
|
|
|
|
ChangeOrder->OriginalParentFid = FilterEntry->DParentFileID;
|
|
|
|
|
|
if (Root) {
|
|
CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Root Create",
|
|
UsnRecord->Reason);
|
|
} else {
|
|
ChangeOrder->Cmd.JrnlFirstUsn = (USN) 0;
|
|
CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Subdir Create",
|
|
UsnRecord->Reason);
|
|
}
|
|
|
|
#if 0
|
|
}
|
|
#endif
|
|
|
|
|
|
//
|
|
// Update the Name Space Table with the current stream sequence number.
|
|
// Since this is a dir subtree entries are made for all parents implicitly
|
|
// until we get to the root. The root needs to have its parent dir added
|
|
// to the name space table. The table below shows what entries are made
|
|
// depending on the file operation and whether or not this call is for
|
|
// the root entry of the subtree operation.
|
|
//
|
|
// Opn Make Entry using Make Entry using
|
|
// orig name/parent Current name/parent
|
|
// info info (1)
|
|
//
|
|
// Movein No Yes
|
|
// Moveout Yes No
|
|
// Movedir Yes Yes
|
|
// Movers Yes Yes
|
|
//
|
|
// SimpleRen Yes Yes
|
|
// Create No Yes
|
|
// Delete No Yes
|
|
// Update No Yes
|
|
//
|
|
// The last four entries affect single dirs only while the first four
|
|
// can apply to subtrees.
|
|
// (1) If working in a single dir or the root of a sub-tree the current
|
|
// name/parent info comes from the USN record.
|
|
//
|
|
FEx = 0;
|
|
|
|
if (Root) {
|
|
if (LocationCmd != CO_LOCATION_MOVEOUT) {
|
|
//
|
|
// Update Curr File (Where the USN record says file went)
|
|
// Update New parent (the parent dir where file went)
|
|
//
|
|
FrsSetUnicodeStringFromRawString(&UFileName,
|
|
UsnRecord->FileNameLength,
|
|
UsnRecord->FileName,
|
|
UsnRecord->FileNameLength);
|
|
JrnlUpdateNst(pVme,
|
|
&UFileName,
|
|
&UsnRecord->ParentFileReferenceNumber,
|
|
StreamSeqNum);
|
|
|
|
FRS_ASSERT(NewParentFilterEntry != NULL);
|
|
FEList[FEx++] = NewParentFilterEntry;
|
|
}
|
|
|
|
if (CO_MOVE_OUT_RS_OR_DIR(LocationCmd)) {
|
|
|
|
//
|
|
// Update with old name/parent of root dir.
|
|
// (Where the Original parent Filter entry says it was.)
|
|
// Update orig parent of root dir (the parent dir where file came from)
|
|
//
|
|
FEList[FEx++] = FilterEntry;
|
|
|
|
FRS_ASSERT(OrigParentFilterEntry != NULL);
|
|
FEList[FEx++] = OrigParentFilterEntry;
|
|
}
|
|
} else {
|
|
//
|
|
// Not the root so update using current name/parent of FilterEntry.
|
|
//
|
|
FEList[FEx++] = FilterEntry;
|
|
}
|
|
|
|
//
|
|
// Apply the name space table updates.
|
|
//
|
|
while (FEx != 0) {
|
|
FE = FEList[--FEx];
|
|
JrnlUpdateNst(pVme, &FE->UFileName, &FE->DParentFileID, StreamSeqNum);
|
|
}
|
|
|
|
//
|
|
// Update or install the change order.
|
|
//
|
|
WStatus = JrnlUpdateChangeOrder(ChangeOrder,
|
|
Cop->NewReplica,
|
|
Cop->NewParentFid,
|
|
Cop->NewLocationCmd,
|
|
(Root ? UsnRecord : NULL));
|
|
|
|
//
|
|
// Update the filter entry if necessary.
|
|
//
|
|
|
|
//
|
|
// See if the filename part is different and, if so, copy it.
|
|
// Only applies to the Root entry of the subtree.
|
|
// Limit it to MAX_PATH characters.
|
|
//
|
|
if (Root) {
|
|
if (UsnRecord->FileNameLength > 2*MAX_PATH) {
|
|
UsnRecord->FileNameLength = 2*MAX_PATH;
|
|
}
|
|
FrsAllocUnicodeString(&FilterEntry->UFileName,
|
|
FilterEntry->DFileName,
|
|
UsnRecord->FileName,
|
|
UsnRecord->FileNameLength);
|
|
}
|
|
|
|
switch (Cop->NewLocationCmd) {
|
|
|
|
case CO_LOCATION_CREATE:
|
|
case CO_LOCATION_MOVEIN:
|
|
case CO_LOCATION_MOVEIN2:
|
|
case CO_LOCATION_NO_CMD:
|
|
//
|
|
// On creates and movein the caller has created the filter table
|
|
// entry already (to pass it to this fcn).
|
|
//
|
|
GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
|
|
break;
|
|
|
|
case CO_LOCATION_DELETE:
|
|
case CO_LOCATION_MOVEOUT:
|
|
//
|
|
// Now delete the entry from the Filter Table. If this is the root
|
|
// then first drop the ref count by one to compensate for the first
|
|
// lookup in JrnlFilterUpdate() where all this started.
|
|
// The second ref was taken through the Enumerate list function.
|
|
//
|
|
if (Root) {
|
|
GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
|
|
}
|
|
|
|
WStatus = JrnlDeleteDirFilterEntry(FilterTable, NULL, FilterEntry);
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
DPRINT(0, "++ ERROR - Dir entry delete failed.\n");
|
|
}
|
|
break;
|
|
|
|
|
|
case CO_LOCATION_MOVERS:
|
|
//
|
|
// Replica set changed. Update the filter entry.
|
|
//
|
|
FilterEntry->Replica = Cop->NewReplica;
|
|
FilterEntry->DReplicaNumber = Cop->NewReplica->ReplicaNumber;
|
|
|
|
/* FALL THRU INTENDED */
|
|
|
|
case CO_LOCATION_MOVEDIR:
|
|
//
|
|
// Directory changed. Applies to root on both MOVEDIR and MOVERS.
|
|
// Update the parent file ID in the filter entry and
|
|
// Put the filter entry on the childlist of the new parent.
|
|
//
|
|
if (Root) {
|
|
FilterEntry->DParentFileID = UsnRecord->ParentFileReferenceNumber;
|
|
|
|
if (FilterEntry->ChildEntry.Flink == NULL) {
|
|
DPRINT(0, "++ ERROR - Dir entry not on child list\n");
|
|
FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
|
|
FRS_ASSERT(!"Dir entry not on child list");
|
|
}
|
|
|
|
FrsRemoveEntryList(&FilterEntry->ChildEntry);
|
|
FilterEntry->ChildEntry.Flink = NULL;
|
|
|
|
WStatus1 = (ULONG)JrnlFilterLinkChild(FilterTable,
|
|
FilterEntry,
|
|
FilterEntry->Replica);
|
|
if (!WIN_SUCCESS(WStatus1)) {
|
|
DPRINT(0, "++ ERROR - JrnlFilterLinkChild Failed\n");
|
|
FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
|
|
FRS_ASSERT(!"JrnlFilterLinkChild Failed");
|
|
}
|
|
}
|
|
GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
|
|
break;
|
|
|
|
|
|
default:
|
|
DPRINT1(0, "++ Error - switch arg out of range: %d\n", Cop->NewLocationCmd);
|
|
FRS_ASSERT(!"NewLocationCmd invalid");
|
|
GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
|
|
}
|
|
|
|
//
|
|
// Return the change order status.
|
|
//
|
|
return WStatus;
|
|
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlUpdateChangeOrder(
|
|
IN PCHANGE_ORDER_ENTRY ChangeOrder,
|
|
IN PREPLICA NewReplica,
|
|
IN ULONGLONG NewParentFid,
|
|
IN ULONG NewLocationCmd,
|
|
IN PUSN_RECORD UsnRecord
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function updates an existing directory change order that is still
|
|
pending in the Replica's change order process list or inserts a new change
|
|
order that has been prepared as described below.
|
|
|
|
There are two components to a change order, content and file location.
|
|
A given USN record could have changes to both parts.
|
|
|
|
The content component is updated by merging the reason flags from the
|
|
UsnRecord and capturing relevant parameters such as the attributes and
|
|
FileName.
|
|
|
|
The location update component is more complicated and uses a state table,
|
|
ChangeOrderLocationStateTable[], to manage the update. The state table
|
|
determines when we update the parent directory or the replica set in the
|
|
change order. This occurs when a directory is renamed. The states in
|
|
the table also correspond to the change order location command to be used.
|
|
|
|
The change order may move from one replica set to another. This routine
|
|
assumes that the caller has acquired the change order process list locks
|
|
for both the source and dest replicas. This is the only case where we can
|
|
pull it off the list because there could be a dependent entry that follows
|
|
it in the change order list and an error could result if the update
|
|
process saw the dependent entry first. (Probably only an issue for
|
|
directory creates).
|
|
|
|
The Source Change order process list lock is needed for all Location Commands.
|
|
The Destination Change order process list lock is needed for:
|
|
CO_LOCATION_MOVEIN, CO_LOCATION_MOVERS commands.
|
|
|
|
|
|
The change order may be evaporated in certain cases. If not this routine
|
|
decrements the reference count on the change order before it returns.
|
|
|
|
This routine can be called with a new change order but the caller must
|
|
pre-init the change order correctly:
|
|
1. Bump the initial ref count by 1 (since that is what lookup does).
|
|
2. The command flag CO_FLAG_ONLIST should be clear so we don't try
|
|
to pull it off a list.
|
|
3. The length field in the unicode string UFileName must be 0 to
|
|
capture the file name.
|
|
4. Set New and orig Replica fields to the original replica.
|
|
5. Set New and orig parent FID fields to the original parent FID.
|
|
6. The command flag CO_FLAG_LOCATION_CMD should be clear.
|
|
7. The FileReferenceNumber must be set to the file ID of the file/dir.
|
|
The File Id is the index into the change order table.
|
|
|
|
This routine also updates the parent file ID table so the parent File ID
|
|
tracks on renames and the entry is deleted if the change order is
|
|
evaporated or the new location command specifies delete.
|
|
|
|
Arguments:
|
|
|
|
ChangeOrder - The existing change order to be updated.
|
|
NewReplica - The destination replica the directory is renamed into.
|
|
NewparentFid - The destination parent the directory is renamed into.
|
|
NewLocationCmd - The new location command applied to the directory.
|
|
UsnRecord - The NTFS USN record describing the change. When walking a
|
|
through a sub-tree this will be NULL for all directories
|
|
except for the root.
|
|
|
|
Return Value:
|
|
|
|
Win32 status.
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlUpdateChangeOrder:"
|
|
|
|
PREPLICA Replica;
|
|
ULONG Control;
|
|
ULONG Op;
|
|
ULONG PreviousState;
|
|
ULONG Reason = 0;
|
|
BOOL EvapFlag = FALSE;
|
|
ULONG GStatus;
|
|
ULONG NewState;
|
|
PVOLUME_MONITOR_ENTRY pVme;
|
|
BOOL SubTreeRoot;
|
|
ULONG WStatus;
|
|
BOOL CoUpdate;
|
|
PCHANGE_ORDER_ENTRY NewParentCo;
|
|
ULONG LocationCmd;
|
|
|
|
//
|
|
// Only update parent file IDs on the sub tree root. This is the dir
|
|
// that the USN Record was generated for in the dir rename.
|
|
// For any subordinate dirs the caller must supply NULL.
|
|
// If a changeorder comes in already on the process list then it must
|
|
// be an update.
|
|
//
|
|
SubTreeRoot = (UsnRecord != NULL);
|
|
CoUpdate = CO_FLAG_ON(ChangeOrder, CO_FLAG_ONLIST);
|
|
|
|
//
|
|
// If a USN record is supplied then check for any content flags set in the
|
|
// USN reason mask. If so then set the content flag in the change order.
|
|
// When walking a subtree the USN Record is non-null only for the root since
|
|
// the content changes don't apply to the children.
|
|
//
|
|
if (SubTreeRoot) {
|
|
Reason = UsnRecord->Reason;
|
|
if (Reason & CO_CONTENT_MASK) {
|
|
SET_CO_FLAG(ChangeOrder, CO_FLAG_CONTENT_CMD);
|
|
|
|
//
|
|
// Update the content portion of the change order. Merge in the
|
|
// reason mask from the Usn Record.
|
|
//
|
|
ChangeOrder->Cmd.ContentCmd |= Reason;
|
|
}
|
|
//
|
|
// Capture the name in the case of rename, create and delete.
|
|
// Limit it to MAX_PATH characters.
|
|
//
|
|
// if ((Reason & CO_LOCATION_MASK) || (ChangeOrder->UFileName.Length == 0)) {
|
|
if ((Reason & USN_REASON_RENAME_NEW_NAME) ||
|
|
(ChangeOrder->UFileName.Length == 0)) {
|
|
if (UsnRecord->FileNameLength > 2*MAX_PATH) {
|
|
UsnRecord->FileNameLength = 2*MAX_PATH;
|
|
}
|
|
FrsAllocUnicodeString(&ChangeOrder->UFileName,
|
|
ChangeOrder->Cmd.FileName,
|
|
UsnRecord->FileName,
|
|
UsnRecord->FileNameLength);
|
|
ChangeOrder->Cmd.FileNameLength = UsnRecord->FileNameLength;
|
|
}
|
|
|
|
//
|
|
// Capture most recent file attributes.
|
|
// In the case where we are updating a pending CO,
|
|
// we would miss a series of ops on the same file such as
|
|
// set the hidden bit, close, delete the system bit, close, ...
|
|
//
|
|
ChangeOrder->Cmd.FileAttributes = UsnRecord->FileAttributes;
|
|
//
|
|
// Update to the latest USN contributing to this change order.
|
|
//
|
|
ChangeOrder->Cmd.JrnlUsn = UsnRecord->Usn;
|
|
}
|
|
|
|
//
|
|
// Check if there is a new location command. If not go insert the change order.
|
|
//
|
|
if (NewLocationCmd == CO_LOCATION_NO_CMD) {
|
|
goto INSERT_CHANGE_ORDER;
|
|
}
|
|
|
|
//
|
|
// Update the parent file ID table based on the new location command.
|
|
//
|
|
if (CO_NEW_FILE(NewLocationCmd)) {
|
|
//
|
|
// Add a new entry for the new file in the R.S.
|
|
//
|
|
ChangeOrder->ParentFileReferenceNumber = NewParentFid;
|
|
GStatus = QHashInsert(NewReplica->pVme->ParentFidTable,
|
|
&ChangeOrder->FileReferenceNumber,
|
|
&NewParentFid,
|
|
NewReplica->ReplicaNumber,
|
|
FALSE);
|
|
if (GStatus != GHT_STATUS_SUCCESS ) {
|
|
DPRINT1(0, "++ QHashInsert error: %d\n", GStatus);
|
|
}
|
|
} else
|
|
if ((NewLocationCmd == CO_LOCATION_DELETE) ||
|
|
(NewLocationCmd == CO_LOCATION_MOVEOUT)) {
|
|
//
|
|
// File is gone. Remove the entry.
|
|
//
|
|
GStatus = QHashDelete(NewReplica->pVme->ParentFidTable,
|
|
&ChangeOrder->FileReferenceNumber);
|
|
if (GStatus != GHT_STATUS_SUCCESS ) {
|
|
DPRINT1(0, "++ QHashDelete error: %d\n", GStatus);
|
|
}
|
|
} else
|
|
if (CO_MOVE_RS_OR_DIR(NewLocationCmd)) {
|
|
//
|
|
// File changed parents. Update the entry for subtree root only.
|
|
//
|
|
if (SubTreeRoot) {
|
|
ChangeOrder->ParentFileReferenceNumber = NewParentFid;
|
|
GStatus = QHashUpdate(NewReplica->pVme->ParentFidTable,
|
|
&ChangeOrder->FileReferenceNumber,
|
|
&NewParentFid,
|
|
0);
|
|
if (GStatus != GHT_STATUS_SUCCESS ) {
|
|
DPRINT1(0, "++ QHashUpdate error: %d\n", GStatus);
|
|
}
|
|
}
|
|
} else {
|
|
DPRINT1(0, "++ ERROR - Invalid new location command: %d\n", NewLocationCmd);
|
|
}
|
|
|
|
|
|
|
|
//
|
|
// Update the location component of the change order. Fetch the Control
|
|
// DWORD from the table based on the pending command and the new command
|
|
// then perform the specified operation sequence. If the pending change
|
|
// order was for a content change then there is no prior location command.
|
|
// Check for this.
|
|
//
|
|
// Caller has acquired change order process lock for both current and
|
|
// new Replica Sets as appropriate.
|
|
//
|
|
|
|
if (CO_FLAG_ON(ChangeOrder, CO_FLAG_LOCATION_CMD)) {
|
|
PreviousState = GET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command);
|
|
} else {
|
|
PreviousState = NSNoLocationCmd;
|
|
SET_CO_FLAG(ChangeOrder, CO_FLAG_LOCATION_CMD);
|
|
}
|
|
|
|
Control = ChangeOrderLocationStateTable[PreviousState][NewLocationCmd].u1.UlongOpFields;
|
|
|
|
DPRINT5(5,"++ Old state: %s (%d), Input cmd: %s (%d), Ctl Wd: %08x\n",
|
|
CoLocationNames[PreviousState], PreviousState,
|
|
CoLocationNames[NewLocationCmd], NewLocationCmd,
|
|
Control);
|
|
|
|
if (Control == 0) {
|
|
DPRINT2(0, "++ ERROR - Invalid transition. Pending: %d New: %d\n",
|
|
PreviousState, NewLocationCmd);
|
|
FRS_ASSERT(!"Invalid CO Location cmd transition-1");
|
|
goto ERROR_RETURN;
|
|
}
|
|
|
|
while (Control != 0) {
|
|
Op = Control & 0x0000000F;
|
|
Control = Control >> 4;
|
|
|
|
switch (Op) {
|
|
|
|
|
|
//
|
|
// Done.
|
|
//
|
|
case OpInval:
|
|
DPRINT5(0,"++ Error - Invalid state transition - Old state: %s (%d), Input cmd: %s (%d), Ctl Wd: %08x\n",
|
|
CoLocationNames[PreviousState], PreviousState,
|
|
CoLocationNames[NewLocationCmd], NewLocationCmd,
|
|
Control);
|
|
FRS_ASSERT(!"Invalid CO Location cmd transition-2");
|
|
Control = 0;
|
|
break;
|
|
|
|
|
|
//
|
|
// Evaporate the pending change order. It should be on the process
|
|
// list associated with the NewReplica. THis should never happen
|
|
// if the previous state is NSNoLocationCmd.
|
|
//
|
|
case OpEvap:
|
|
|
|
//
|
|
// Increment the CO Evaporated Counter
|
|
//
|
|
PM_INC_CTR_REPSET(NewReplica, COEvaporated, 1);
|
|
|
|
DPRINT(5, "++ OpEvap\n");
|
|
pVme = ChangeOrder->NewReplica->pVme;
|
|
|
|
FRS_ASSERT(PreviousState != NSNoLocationCmd);
|
|
FRS_ASSERT(!IsListEmpty(&ChangeOrder->ProcessList));
|
|
|
|
FrsRtlRemoveEntryQueueLock(&pVme->ChangeOrderList,
|
|
&ChangeOrder->ProcessList);
|
|
DECREMENT_CHANGE_ORDER_REF_COUNT(ChangeOrder);
|
|
DROP_CO_CXTION_COUNT(ChangeOrder->NewReplica, ChangeOrder, ERROR_SUCCESS);
|
|
|
|
CHANGE_ORDER_TRACE(3, ChangeOrder, "Local Co OpEvap");
|
|
|
|
DEC_LOCAL_CO_QUEUE_COUNT(ChangeOrder->NewReplica);
|
|
//
|
|
// Delete the entry from the Change Order Table. It should be in
|
|
// the Change order table assoicated with NewReplica. The ref
|
|
// count should be 2 since the caller did a lookup.
|
|
//
|
|
FRS_ASSERT(ChangeOrder->HashEntryHeader.ReferenceCount == 2);
|
|
|
|
GStatus = GhtDeleteEntryByAddress(pVme->ChangeOrderTable,
|
|
ChangeOrder,
|
|
TRUE);
|
|
if (GStatus != GHT_STATUS_SUCCESS) {
|
|
DPRINT(0, "++ ERROR - GhtDeleteEntryByAddress failed.\n");
|
|
FRS_PRINT_TYPE(0, ChangeOrder);
|
|
FRS_ASSERT(!"JrnlUpdateCO: CO Table GhtDeleteEntryByAddress failed");
|
|
goto ERROR_RETURN;
|
|
}
|
|
EvapFlag = TRUE;
|
|
break;
|
|
|
|
|
|
|
|
//
|
|
// Update the New Replica Set
|
|
//
|
|
case OpNRs:
|
|
|
|
DPRINT(5, "++ OpNRs\n");
|
|
//
|
|
// Update the parent dir on the subtree root and the replica ID
|
|
// on all change orders.
|
|
//
|
|
ChangeOrder->NewReplica = NewReplica;
|
|
|
|
/* FALL THRU INTENDED */
|
|
|
|
//
|
|
// Update the New Parent Directory on the subtree root only.
|
|
//
|
|
case OpNDir:
|
|
|
|
if (Op == OpNDir) {DPRINT(5, "++ OpNDir\n");}
|
|
|
|
if (SubTreeRoot) {
|
|
ChangeOrder->NewParentFid = NewParentFid;
|
|
|
|
if (CoUpdate) {
|
|
//
|
|
// See if there is a pending change order on the new parent.
|
|
// If there is and it is a create that happens after this
|
|
// change order then move this updated CO to the end of the
|
|
// list so the Parent Create is done first. We do this by
|
|
// removing it from the list and letting the insert code put
|
|
// it back on at the end with a new VSN.
|
|
//
|
|
pVme = ChangeOrder->NewReplica->pVme;
|
|
GStatus = GhtLookup(pVme->ChangeOrderTable,
|
|
&NewParentFid,
|
|
TRUE,
|
|
&NewParentCo);
|
|
|
|
if ((GStatus == GHT_STATUS_SUCCESS) &&
|
|
(NewParentCo->Cmd.FrsVsn > ChangeOrder->Cmd.FrsVsn)){
|
|
|
|
FRS_ASSERT(!IsListEmpty(&ChangeOrder->ProcessList));
|
|
FrsRtlRemoveEntryQueueLock(&pVme->ChangeOrderList,
|
|
&ChangeOrder->ProcessList);
|
|
DECREMENT_CHANGE_ORDER_REF_COUNT(ChangeOrder);
|
|
DROP_CO_CXTION_COUNT(ChangeOrder->NewReplica,
|
|
ChangeOrder,
|
|
ERROR_SUCCESS);
|
|
CLEAR_CO_FLAG(ChangeOrder, CO_FLAG_ONLIST);
|
|
CHANGE_ORDER_TRACE(3, ChangeOrder, "Local Co OpNDir");
|
|
DEC_LOCAL_CO_QUEUE_COUNT(ChangeOrder->NewReplica);
|
|
GhtDereferenceEntryByAddress(pVme->ChangeOrderTable,
|
|
NewParentCo,
|
|
TRUE);
|
|
}
|
|
}
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
//
|
|
// Update the State / Command.
|
|
//
|
|
case OpNSt:
|
|
|
|
NewState = Control & 0x0000000F;
|
|
DPRINT2(5, "++ OpNst: %s (%d)\n", CoLocationNames[NewState], NewState);
|
|
SET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command, NewState);
|
|
Control = Control >> 4;
|
|
|
|
break;
|
|
|
|
|
|
//
|
|
// The table is messed up.
|
|
//
|
|
default:
|
|
DPRINT1(0, "++ Error - Invalid dispatch operation: %d\n", Op);
|
|
FRS_ASSERT(!"Invalid CO dispatch operation");
|
|
goto ERROR_RETURN;
|
|
}
|
|
}
|
|
|
|
INSERT_CHANGE_ORDER:
|
|
//
|
|
// If the change order hasn't been deleted then decrement the ref count
|
|
// to balance the Caller's lookup. If the change order is not on a process
|
|
// list because it is new or it switched replica sets then put it on the
|
|
// target list.
|
|
//
|
|
WStatus = ERROR_SUCCESS;
|
|
if (!EvapFlag) {
|
|
|
|
Replica = ChangeOrder->NewReplica;
|
|
pVme = Replica->pVme;
|
|
|
|
if (!CO_FLAG_ON(ChangeOrder, CO_FLAG_ONLIST)) {
|
|
|
|
//
|
|
// No reason to age deletes
|
|
//
|
|
if (CO_FLAG_ON(ChangeOrder, CO_FLAG_LOCATION_CMD) &&
|
|
(GET_CO_LOCATION_CMD(ChangeOrder->Cmd, Command) == CO_LOCATION_DELETE)) {
|
|
ChangeOrder->TimeToRun = CO_TIME_NOW(pVme);
|
|
} else {
|
|
ChangeOrder->TimeToRun = CO_TIME_TO_RUN(pVme);
|
|
}
|
|
|
|
//
|
|
// Generate a new Volume Sequnce Number for the change order since
|
|
// it gets sent to the end of the new R.S. process list.
|
|
// The change order VSNs must be kept monotonically increasing
|
|
// within a replica set for change order dampening to work.
|
|
//
|
|
NEW_VSN(pVme, &ChangeOrder->Cmd.FrsVsn);
|
|
SET_CO_FLAG(ChangeOrder, CO_FLAG_LOCALCO);
|
|
|
|
//
|
|
// Entry already in Aging table if its a CO update. If this is a
|
|
// duplicate entry for the same FID (because the merge was
|
|
// disallowed then put this entry at the end of the duplicate list.
|
|
//
|
|
if (!CoUpdate) {
|
|
CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Q Insert",
|
|
ChangeOrder->Cmd.ContentCmd);
|
|
GStatus = GhtInsert(pVme->ChangeOrderTable, ChangeOrder, TRUE, TRUE);
|
|
if (GStatus != GHT_STATUS_SUCCESS) {
|
|
DPRINT1(0, "++ ERROR - GhtInsert Failed: %d\n", GStatus);
|
|
FRS_ASSERT(!"Local Co Q Insert Failed");
|
|
goto ERROR_RETURN;
|
|
}
|
|
SET_COE_FLAG(ChangeOrder, COE_FLAG_IN_AGING_CACHE);
|
|
} else {
|
|
CHANGE_ORDER_TRACEX(3, ChangeOrder, "Local Co Aging Update",
|
|
ChangeOrder->Cmd.ContentCmd);
|
|
}
|
|
|
|
INCREMENT_CHANGE_ORDER_REF_COUNT(ChangeOrder);
|
|
|
|
//
|
|
// For remote COs the cxtion count is incremented when the remote CO
|
|
// goes onto the CO process queue. We don't do this for local COs
|
|
// because the code to shutdown the Jrnl Cxtion may never see the
|
|
// CO count go to zero if we did this. We just set the CO
|
|
// CxtionGuid and the CO JoinGuid here so unjoin / rejoins can be
|
|
// detected.
|
|
//
|
|
INIT_LOCALCO_CXTION_GUID(Replica, ChangeOrder);
|
|
|
|
WStatus = FrsRtlInsertTailQueueLock(&pVme->ChangeOrderList,
|
|
&ChangeOrder->ProcessList);
|
|
if (WIN_SUCCESS(WStatus)) {
|
|
SET_CO_FLAG(ChangeOrder, CO_FLAG_ONLIST);
|
|
INC_LOCAL_CO_QUEUE_COUNT(Replica);
|
|
} else {
|
|
DPRINT_WS(0, "++ ERROR - ChangeOrder insert failed:", WStatus);
|
|
}
|
|
|
|
}
|
|
|
|
GStatus = GhtDereferenceEntryByAddress(pVme->ChangeOrderTable,
|
|
ChangeOrder,
|
|
TRUE);
|
|
if (GStatus != GHT_STATUS_SUCCESS) {
|
|
DPRINT(0, "++ ERROR: GhtDereferenceEntryByAddress ref count non positive.\n");
|
|
FRS_PRINT_TYPE(0, ChangeOrder);
|
|
FRS_ASSERT(!"CO ref count non positive");
|
|
goto ERROR_RETURN;
|
|
}
|
|
}
|
|
|
|
return WStatus;
|
|
|
|
|
|
ERROR_RETURN:
|
|
|
|
return ERROR_GEN_FAILURE;
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlDoesChangeOrderHaveChildrenWorker(
|
|
IN PQHASH_TABLE ParentFidTable,
|
|
IN PQHASH_ENTRY BeforeNode,
|
|
IN PQHASH_ENTRY TargetNode,
|
|
IN PVALID_CHILD_CHECK_DATA pValidChildCheckData
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function is called thru QHashEnumerateTable().
|
|
|
|
Search for a match between the ParentFid and the entry's
|
|
ParentFid (QHASH_ENTRY.QData).
|
|
|
|
Arguments:
|
|
|
|
Table -- the hash table being enumerated
|
|
BeforeNode -- ptr to the QhashEntry before the node of interest.
|
|
TargetNode -- ptr to the QhashEntry of interest.
|
|
pValidChildCheckData -- ptr to the parent fid
|
|
|
|
Return Value:
|
|
|
|
FrsErrorResourceInUse - Child of ParentFid was found
|
|
FrsErrorSuccess - No children were found for ParentFid
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlDoesChangeOrderHaveChildrenWorker:"
|
|
|
|
JET_ERR jerr;
|
|
PTHREAD_CTX ThreadCtx = pValidChildCheckData->ThreadCtx;
|
|
PTABLE_CTX TmpIDTableCtx = pValidChildCheckData->TmpIDTableCtx;
|
|
PIDTABLE_RECORD IDTableRec;
|
|
|
|
if ((TargetNode->QData == pValidChildCheckData->FileReferenceNumber)){
|
|
|
|
if (ThreadCtx == NULL || TmpIDTableCtx == NULL) {
|
|
return FrsErrorResourceInUse;
|
|
}
|
|
|
|
jerr = DbsReadRecord(ThreadCtx, &TargetNode->QKey, FileIDIndexx, TmpIDTableCtx);
|
|
|
|
//
|
|
// No IDTable entry. OK to delete the child.
|
|
//
|
|
if (jerr == JET_errRecordNotFound) {
|
|
return FrsErrorSuccess;
|
|
}
|
|
|
|
if (!JET_SUCCESS(jerr)) {
|
|
DPRINT_JS(0,"++ ERROR - DbsReadRecord failed;", jerr);
|
|
return FrsErrorResourceInUse;
|
|
}
|
|
|
|
IDTableRec = (PIDTABLE_RECORD) (TmpIDTableCtx->pDataRecord);
|
|
|
|
//
|
|
// This child of the parent is not marked to be deleted which means it is
|
|
// not going away. Hence return that this parent has children. The parent
|
|
// delete will be aborted.
|
|
//
|
|
if (!IsIdRecFlagSet(IDTableRec, IDREC_FLAGS_DELETE_DEFERRED)) {
|
|
return FrsErrorResourceInUse;
|
|
}
|
|
|
|
}
|
|
return FrsErrorSuccess;
|
|
}
|
|
|
|
|
|
BOOL
|
|
JrnlDoesChangeOrderHaveChildren(
|
|
IN PTHREAD_CTX ThreadCtx,
|
|
IN PTABLE_CTX TmpIDTableCtx,
|
|
IN PCHANGE_ORDER_ENTRY ChangeOrder
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
The ChangeOrderAccept thread is issueing a retry of a directory
|
|
delete. The question is, "Does this directory have replicating
|
|
children?" If so, the change order should be retried at a later
|
|
time.
|
|
|
|
If not, the change order is sent on to an install thread that
|
|
will empty the directory of any files or subdirectories and
|
|
then delete the directory. The files and subdirectories are
|
|
assumed to have been filtered and are non-replicating. You can
|
|
see why we want to insure there are no replicating files or
|
|
subdirectories in this directory prior to emptying the directory.
|
|
|
|
The journal's directory filter table and the journal's parent fid
|
|
table are searched for children of the directory specified by
|
|
ChangeOrder.
|
|
|
|
Arguments:
|
|
|
|
ChangeOrder - For a retry of a directory delete
|
|
|
|
Return Value:
|
|
|
|
TRUE - Directory has replicating children in the journal tables
|
|
FALSE - Directory does not have replicating children in the journal tables
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlDoesChangeOrderHaveChildren:"
|
|
DWORD FStatus;
|
|
PREPLICA Replica;
|
|
PVOLUME_MONITOR_ENTRY pVme;
|
|
PQHASH_TABLE ParentFidTable;
|
|
VALID_CHILD_CHECK_DATA ValidChildCheckData;
|
|
|
|
Replica = ChangeOrder->NewReplica;
|
|
|
|
//
|
|
// Retry the change order if information about its children is lacking.
|
|
//
|
|
if (!Replica) {
|
|
DPRINT(4, "++ WARN: No Replica in ChangeOrder\n");
|
|
return TRUE;
|
|
}
|
|
pVme = Replica->pVme;
|
|
if (!pVme) {
|
|
DPRINT(4, "++ WARN: No pVme in Replica\n");
|
|
return TRUE;
|
|
}
|
|
ParentFidTable = pVme->ParentFidTable;
|
|
if (!ParentFidTable) {
|
|
DPRINT(4, "++ WARN: No ParentFidTable in pVme\n");
|
|
return TRUE;
|
|
}
|
|
|
|
//
|
|
// Look for subdirectories and files.
|
|
//
|
|
ValidChildCheckData.ThreadCtx = ThreadCtx;
|
|
ValidChildCheckData.TmpIDTableCtx = TmpIDTableCtx;
|
|
ValidChildCheckData.FileReferenceNumber = ChangeOrder->FileReferenceNumber;
|
|
|
|
FStatus = QHashEnumerateTable(ParentFidTable,
|
|
JrnlDoesChangeOrderHaveChildrenWorker,
|
|
&ValidChildCheckData);
|
|
if (FStatus == FrsErrorResourceInUse) {
|
|
DPRINT(4, "++ Child found; change order has files\n");
|
|
return TRUE;
|
|
}
|
|
DPRINT(4, "++ Child not found; change order has no subdirs or files\n");
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlAddFilterEntryFromUsn(
|
|
IN PREPLICA Replica,
|
|
IN PUSN_RECORD UsnRecord,
|
|
OUT PFILTER_TABLE_ENTRY *RetFilterEntry
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Create a new filter table entry from data in the USN record and the
|
|
Replica struct. Insert it into the Volume Filter Table.
|
|
|
|
The caller must decrement the refcount on the filter entry.
|
|
|
|
Arguments:
|
|
|
|
Replica - ptr to the Replica struct containing the directory now.
|
|
UsnRecord - ptr to the UsnRecord.
|
|
RetFilterEntry - ptr to returned filter table ptr. NULL if caller doesn't
|
|
want a reference to the entry so we drop it here.
|
|
|
|
Return Value:
|
|
|
|
Win32 status.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlAddFilterEntryFromUsn:"
|
|
|
|
PFILTER_TABLE_ENTRY FilterEntry;
|
|
ULONG Len;
|
|
ULONG WStatus;
|
|
|
|
//
|
|
// Create a new filter entry.
|
|
// The size of the file name field is Len + sizeof(WCHAR) because
|
|
// the file name field is defined as a wchar array of length 1.
|
|
//
|
|
Len = UsnRecord->FileNameLength;
|
|
FilterEntry = FrsAllocTypeSize(FILTER_TABLE_ENTRY_TYPE, Len);
|
|
|
|
FilterEntry->DFileID = UsnRecord->FileReferenceNumber;
|
|
FilterEntry->DParentFileID = UsnRecord->ParentFileReferenceNumber;
|
|
|
|
FrsCopyUnicodeStringFromRawString(&FilterEntry->UFileName,
|
|
Len + sizeof(WCHAR),
|
|
UsnRecord->FileName,
|
|
Len);
|
|
|
|
WStatus = JrnlAddFilterEntry(Replica, FilterEntry, RetFilterEntry, TRUE);
|
|
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
DUMP_USN_RECORD2(0, UsnRecord, Replica->ReplicaNumber, CO_LOCATION_NUM_CMD);
|
|
}
|
|
return WStatus;
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlAddFilterEntryFromCo(
|
|
IN PREPLICA Replica,
|
|
IN PCHANGE_ORDER_ENTRY ChangeOrder,
|
|
OUT PFILTER_TABLE_ENTRY *RetFilterEntry
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Create a new filter table entry from data in the change order entry and the
|
|
Replica struct. Insert it into the Volume Filter Table. This is called
|
|
when we receive remote change orders that create a directory.
|
|
|
|
If this is a recovery change order than the filter entry is replaced if
|
|
there is a conflict.
|
|
|
|
The caller must decrement the refcount on the filter entry.
|
|
|
|
Arguments:
|
|
|
|
Replica - ptr to the Replica struct containing the directory now.
|
|
ChangeOrder -- ptr to the change order entry.
|
|
RetFilterEntry - ptr to returned filter table ptr. NULL if caller doesn't
|
|
want a reference to the entry so we drop it here.
|
|
|
|
Return Value:
|
|
|
|
Win32 status.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlAddFilterEntryFromCo:"
|
|
|
|
PFILTER_TABLE_ENTRY FilterEntry;
|
|
ULONG Len;
|
|
ULONG WStatus;
|
|
|
|
//
|
|
// Create a new filter entry.
|
|
// NOTE that the actual size of the filename buffer is Len +
|
|
// sizeof(WCHAR) because the definition of FILTER_TABLE_ENTRY
|
|
// includes a single wchar array for filename. Hence, the
|
|
// assignment of UNICODE_NULL to Buffer[Len/2] doesn't scribble
|
|
// past the end of the array.
|
|
//
|
|
Len = ChangeOrder->Cmd.FileNameLength;
|
|
FilterEntry = FrsAllocTypeSize(FILTER_TABLE_ENTRY_TYPE, Len);
|
|
|
|
FilterEntry->DFileID = ChangeOrder->FileReferenceNumber;
|
|
FilterEntry->DParentFileID = ChangeOrder->ParentFileReferenceNumber;
|
|
|
|
FilterEntry->UFileName.Length = (USHORT)Len;
|
|
CopyMemory(FilterEntry->UFileName.Buffer, ChangeOrder->Cmd.FileName, Len);
|
|
FilterEntry->UFileName.Buffer[Len/2] = UNICODE_NULL;
|
|
|
|
//
|
|
// Its possible to receive a change order more than once; and the
|
|
// first change order may have been taken through retry. If the
|
|
// change order was for a directory create, this would leave
|
|
// an idtable entry set to IDREC_FLAGS_NEW_FILE_IN_PROGRESS
|
|
// *and* the directories entries in the filter table. So, always
|
|
// relace an existing entry.
|
|
//
|
|
return JrnlAddFilterEntry(Replica, FilterEntry, RetFilterEntry, TRUE);
|
|
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlAddFilterEntry(
|
|
IN PREPLICA Replica,
|
|
IN PFILTER_TABLE_ENTRY FilterEntry,
|
|
OUT PFILTER_TABLE_ENTRY *RetFilterEntry,
|
|
IN BOOL Replace
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Insert the filter entry into the Volume Filter Table.
|
|
This routine acquires the child list lock for the replica when doing the
|
|
child list insert.
|
|
|
|
The caller must decrement the refcount on the filter entry.
|
|
|
|
On an insert error the entry is freed and NULL is returned.
|
|
|
|
Arguments:
|
|
|
|
Replica - ptr to the Replica struct containing the directory now.
|
|
FilterEntry -- ptr to filter entry to insert.
|
|
RetFilterEntry - ptr to returned filter table ptr. NULL if caller doesn't
|
|
want a reference to the entry so we drop it here.
|
|
On an insert error the entry is freed and NULL is returned.
|
|
Replace - If true then replace current entry with this one if conflict.
|
|
|
|
Return Value:
|
|
|
|
Win32 status.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlAddFilterEntry:"
|
|
|
|
PGENERIC_HASH_TABLE FilterTable = Replica->pVme->FilterTable;
|
|
ULONG GStatus, WStatus=ERROR_GEN_FAILURE;
|
|
ULONG RetryCount = 0;
|
|
PFILTER_TABLE_ENTRY OldEntry;
|
|
ULONG Len;
|
|
|
|
//
|
|
// Start ref count out at one (insert bumps it again to 2) if we
|
|
// return the address of the entry.
|
|
//
|
|
FilterEntry->HashEntryHeader.ReferenceCount = 1;
|
|
FilterEntry->Replica = Replica;
|
|
FilterEntry->DReplicaNumber = Replica->ReplicaNumber;
|
|
|
|
RETRY:
|
|
//
|
|
// Insert the entry into the VME Filter Table.
|
|
//
|
|
GStatus = GhtInsert(FilterTable, FilterEntry, TRUE, FALSE);
|
|
if (GStatus != GHT_STATUS_SUCCESS) {
|
|
if (Replace) {
|
|
goto REPLACE;
|
|
}
|
|
DPRINT1(0, "++ ERROR - GhtInsert Failed: %d, Entry conflict. Tried to insert:\n", GStatus);
|
|
FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
|
|
FilterEntry = FrsFreeType(FilterEntry);
|
|
//
|
|
// Don't know how to translate GStatus to WStatus. The return value is ignored
|
|
// anyways.
|
|
//
|
|
WStatus = ERROR_GEN_FAILURE;
|
|
goto ERROR_RETURN;
|
|
}
|
|
|
|
//
|
|
// Link the filter entry onto the parent's child list and drop the reference
|
|
// if the caller doesn't want the ptr back.
|
|
//
|
|
JrnlAcquireChildLock(Replica);
|
|
WStatus = (ULONG)JrnlFilterLinkChild(FilterTable, FilterEntry, Replica);
|
|
JrnlReleaseChildLock(Replica);
|
|
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
DPRINT(0, "++ ERROR - Failed to put filter entry on Child List\n");
|
|
FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
|
|
//
|
|
// Need some code here to add this filter entry to an orphan list
|
|
// in the off chance that the parent will later come into existence
|
|
// and now needs to hook up to the child. The creation of each new
|
|
// entry would then have to scan the orphan list if it was non-empty.
|
|
// Note that because of ordering constraints I don't think this
|
|
// can actually happen except in the case of a remote co dir create
|
|
// while a local co moveout is in process. But in this case when
|
|
// the child dir is found during the enum it will end up getting
|
|
// deleted.
|
|
// If we relax the ordering constraints on dir creates (since they
|
|
// all start out being created in the pre-install area anyway) then
|
|
// this code will definitely be needed.
|
|
//
|
|
// Note: May need dir filter entry orphan list. see note above.
|
|
}
|
|
|
|
|
|
RETURN:
|
|
|
|
if (RetFilterEntry != NULL) {
|
|
*RetFilterEntry = FilterEntry;
|
|
} else {
|
|
GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
|
|
}
|
|
|
|
return WStatus;
|
|
|
|
|
|
REPLACE:
|
|
//
|
|
// Replace the data in the old entry with the data in the new entry.
|
|
//
|
|
GStatus = GhtLookup(FilterTable, &FilterEntry->DFileID, TRUE, &OldEntry);
|
|
if (GStatus != GHT_STATUS_SUCCESS) {
|
|
FRS_ASSERT(RetryCount++ > 10);
|
|
goto RETRY;
|
|
}
|
|
|
|
FRS_ASSERT(OldEntry->DFileID == FilterEntry->DFileID);
|
|
//
|
|
// Undoing a MOVERS for a dir is going to be a pain.
|
|
// Need to check if it can really happen. Could we just abort this CO?
|
|
//
|
|
FRS_ASSERT(OldEntry->Replica == FilterEntry->Replica);
|
|
FRS_ASSERT(OldEntry->DReplicaNumber == FilterEntry->DReplicaNumber);
|
|
|
|
|
|
if (OldEntry->DParentFileID != FilterEntry->DParentFileID) {
|
|
//
|
|
// If parent FID is different then change child linkage.
|
|
//
|
|
JrnlAcquireChildLock(Replica);
|
|
|
|
WStatus = JrnlFilterUnlinkChild (FilterTable, OldEntry, OldEntry->Replica);
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
DPRINT(0, "++ ERROR - Failed to put filter entry on Child List\n");
|
|
goto REPLACE_ERROR;
|
|
}
|
|
|
|
//
|
|
// Update the filter entry with the new parent and reinsert into filter.
|
|
//
|
|
OldEntry->DParentFileID = FilterEntry->DParentFileID;
|
|
|
|
WStatus = (ULONG) JrnlFilterLinkChild(FilterTable,
|
|
OldEntry,
|
|
OldEntry->Replica);
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
DPRINT(0, "++ ERROR - Failed to put filter entry on Child List\n");
|
|
goto REPLACE_ERROR;
|
|
}
|
|
JrnlReleaseChildLock(Replica);
|
|
|
|
}
|
|
|
|
if (FilterEntry->UFileName.Length <= (OldEntry->UFileName.MaximumLength -
|
|
sizeof(WCHAR))) {
|
|
Len = FilterEntry->UFileName.Length;
|
|
} else {
|
|
//
|
|
// Note: need a swap entry with row locked and ref count 2 to realloc node.
|
|
//
|
|
// Or just alloc a new buffer and set UFileName to point to it with
|
|
// a test on the free side to check if not using the in-node buffer.
|
|
// But do we really need the name?
|
|
// It is used to build the full name path but is it really needed?
|
|
// For now just copy the first n characters.
|
|
//
|
|
Len = OldEntry->UFileName.MaximumLength - sizeof(WCHAR);
|
|
}
|
|
|
|
CopyMemory(OldEntry->UFileName.Buffer, FilterEntry->UFileName.Buffer, Len);
|
|
OldEntry->UFileName.Buffer[Len/2] = UNICODE_NULL;
|
|
OldEntry->UFileName.Length = (USHORT) Len;
|
|
|
|
FRS_JOURNAL_FILTER_PRINT(5, FilterTable, OldEntry);
|
|
FrsFreeType(FilterEntry);
|
|
FilterEntry = OldEntry;
|
|
WStatus = ERROR_SUCCESS;
|
|
goto RETURN;
|
|
|
|
|
|
REPLACE_ERROR:
|
|
JrnlReleaseChildLock(Replica);
|
|
FRS_JOURNAL_FILTER_PRINT(0, FilterTable, OldEntry);
|
|
GhtDereferenceEntryByAddress(FilterTable, OldEntry, TRUE);
|
|
|
|
|
|
ERROR_RETURN:
|
|
|
|
GHT_DUMP_TABLE(5, FilterTable);
|
|
|
|
if (RetFilterEntry != NULL) {*RetFilterEntry = NULL;}
|
|
return ERROR_GEN_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlDeleteDirFilterEntry(
|
|
IN PGENERIC_HASH_TABLE FilterTable,
|
|
IN PULONGLONG DFileID,
|
|
IN PFILTER_TABLE_ENTRY ArgFilterEntry
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Delete the filter entry from the Volume Filter Table.
|
|
|
|
The caller acquires the child list lock for the replica when doing the
|
|
child list removal.
|
|
|
|
The caller must decrement the refcount on the filter entry.
|
|
|
|
Arguments:
|
|
|
|
FilterTable - ptr to the filter table struct containing the directory now.
|
|
DFileID - ptr to FID of dir to delete.
|
|
ArgFilterEntry - if non-null then delete this entry and skip lookup.
|
|
|
|
Return Value:
|
|
|
|
Win32 status.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlDeleteDirFilterEntry:"
|
|
|
|
ULONG GStatus, WStatus;
|
|
PFILTER_TABLE_ENTRY FilterEntry;
|
|
|
|
|
|
//
|
|
// Find the entry.
|
|
//
|
|
if (ArgFilterEntry == NULL) {
|
|
GStatus = GhtLookup(FilterTable, DFileID, TRUE, &FilterEntry);
|
|
if (GStatus != GHT_STATUS_SUCCESS) {
|
|
DPRINT1(0, "++ WARNING: Filter entry not found in table for FID= %08x %08x\n",
|
|
PRINTQUAD(*DFileID));
|
|
return ERROR_NOT_FOUND;
|
|
}
|
|
} else {
|
|
FilterEntry = ArgFilterEntry;
|
|
}
|
|
|
|
DPRINT1(4, "++ Deleting filter entry, FID= %08x %08x\n", PRINTQUAD(FilterEntry->DFileID));
|
|
|
|
//
|
|
// Unlink the filter entry from the parent's child list.
|
|
//
|
|
// Return an error if there are children. This can happen
|
|
// when we take a directory-create through retry. Its children
|
|
// were added when the process queue was unblocked. This
|
|
// function is then called when retrying the change order
|
|
// with the idtable set to IDREC_FLAGS_NEW_FILE_IN_PROGRESS
|
|
//
|
|
if (!IsListEmpty(&FilterEntry->ChildHead)) {
|
|
DPRINT(0, "++ WARN - Dir Delete but child list not empty\n");
|
|
FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
|
|
GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
|
|
return ERROR_GEN_FAILURE;
|
|
}
|
|
|
|
if (FilterEntry->ChildEntry.Flink == NULL) {
|
|
//
|
|
// This may happen if we have just completed a MOVEOUT of a dir
|
|
// subtree and a dir create remote CO is ahead of us in the process
|
|
// queue. When the dir create tried to add the filter table entry
|
|
// it won't find the parent so this entry won't be on any parent list.
|
|
// See comment in JrnlAddFilterEntry() about creation of an orphan
|
|
// list in the future.
|
|
//
|
|
DPRINT(0, "++ WARN - Dir entry not on child list\n");
|
|
FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
|
|
} else {
|
|
FrsRemoveEntryList(&FilterEntry->ChildEntry);
|
|
FilterEntry->ChildEntry.Flink = NULL;
|
|
}
|
|
|
|
//
|
|
// Delete the entry from the filter table.
|
|
//
|
|
GStatus = GhtDeleteEntryByAddress(FilterTable, FilterEntry, TRUE);
|
|
if (GStatus != GHT_STATUS_SUCCESS) {
|
|
DPRINT(0, "++ ERROR - GhtDeleteEntryByAddress failed.\n");
|
|
FRS_JOURNAL_FILTER_PRINT(0, FilterTable, FilterEntry);
|
|
FRS_ASSERT(!"JrnlDeleteDirFilterEntry failed.");
|
|
return ERROR_GEN_FAILURE;
|
|
}
|
|
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlGetPathAndLevel(
|
|
IN PGENERIC_HASH_TABLE FilterTable,
|
|
IN PLONGLONG StartDirFileID,
|
|
OUT PULONG Level
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Walk the filter table from DirFileID to the root building the directory
|
|
path and counting the levels.
|
|
|
|
Arguments:
|
|
|
|
FilterTable -- Ptr to the Generic hash table containing a dir filter
|
|
StartDirFileID -- The file id of the directory to start the walk from.
|
|
Level -- The returned nesting level of the dir. (0 means the replcia tree root)
|
|
|
|
Return Value:
|
|
|
|
FrsError status.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlGetPathAndLevel:"
|
|
|
|
ULONGLONG DirFileID = *StartDirFileID;
|
|
PFILTER_TABLE_ENTRY FilterEntry;
|
|
ULONG FStatus = FrsErrorSuccess;
|
|
ULONG GStatus;
|
|
|
|
*Level = 0;
|
|
|
|
GStatus = GhtLookup(FilterTable, &DirFileID, TRUE, &FilterEntry);
|
|
|
|
if (GStatus == GHT_STATUS_NOT_FOUND) {
|
|
return FrsErrorNotFound;
|
|
}
|
|
|
|
while (GStatus == GHT_STATUS_SUCCESS) {
|
|
//
|
|
// Stop when we hit the replica tree root.
|
|
//
|
|
if (FilterEntry->DParentFileID == ZERO_FID) {
|
|
GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
|
|
break;
|
|
}
|
|
|
|
*Level += 1;
|
|
if (*Level > 100000) {
|
|
//
|
|
// Hung. Corrupt Filter table.
|
|
//
|
|
DPRINT(0, "++ ERROR: Hung in Journal entry filter lookup. Entry skipped\n");
|
|
GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
|
|
GHT_DUMP_TABLE(0, FilterTable);
|
|
FRS_ASSERT(!"Hung in Journal entry filter lookup");
|
|
return FrsErrorInternalError;
|
|
}
|
|
|
|
//
|
|
// Get parent FID & Drop the reference to the filter table entry.
|
|
// Lookup parent's filter entry.
|
|
//
|
|
DirFileID = FilterEntry->DParentFileID;
|
|
GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
|
|
|
|
|
|
GStatus = GhtLookup(FilterTable, &DirFileID, TRUE, &FilterEntry);
|
|
|
|
if (GStatus != GHT_STATUS_SUCCESS) {
|
|
//
|
|
// Corrupt Filter table or it could be an op on an orphaned
|
|
// dir that will later get deleted.
|
|
//
|
|
DPRINT(0, "++ ERROR: Parent filter entry not found in Journal filter Table.\n");
|
|
//GHT_DUMP_TABLE(0, FilterTable);
|
|
return FrsErrorInternalError;
|
|
}
|
|
}
|
|
|
|
return FStatus;
|
|
}
|
|
|
|
|
|
BOOL
|
|
JrnlIsChangeOrderInReplica(
|
|
IN PCHANGE_ORDER_ENTRY ChangeOrder,
|
|
IN PLONGLONG DirFileID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Look up the File ID for the given directory in the given journal filter
|
|
table and if found compare the replica set pointer from the filter entry
|
|
to the replica set pointer in the change order. Return TRUE if match.
|
|
|
|
Arguments:
|
|
|
|
ChangeOrder -- The change order entry assoicated with the file of interest.
|
|
|
|
DirFileID -- The file id of the directory in which the file currently
|
|
resides. This may be different than the parent FID in the
|
|
change order.
|
|
|
|
|
|
Return Value:
|
|
|
|
TRUE if Pointer to Replica Struct or NULL if not found.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlIsChangeOrderInReplica:"
|
|
|
|
PFILTER_TABLE_ENTRY FilterEntry;
|
|
PGENERIC_HASH_TABLE FilterTable;
|
|
ULONG GStatus;
|
|
PREPLICA Replica, FilterReplica = NULL;
|
|
|
|
|
|
|
|
Replica = ChangeOrder->NewReplica;
|
|
|
|
if (Replica == NULL) {
|
|
DPRINT(4, "++ WARN: No Replica in ChangeOrder\n");
|
|
return FALSE;
|
|
}
|
|
|
|
if (Replica->pVme == NULL) {
|
|
DPRINT(4, "++ WARN: No pVme in Replica\n");
|
|
return FALSE;
|
|
}
|
|
|
|
FilterTable = Replica->pVme->FilterTable;
|
|
if (FilterTable == NULL) {
|
|
DPRINT(4, "++ WARN: No FilterTable in pVme\n");
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
GStatus = GhtLookup(FilterTable, DirFileID, TRUE, &FilterEntry);
|
|
|
|
if (GStatus == GHT_STATUS_SUCCESS) {
|
|
|
|
//
|
|
// Get Replica ptr & Drop the reference to the filter table entry.
|
|
//
|
|
FilterReplica = FilterEntry->Replica;
|
|
GhtDereferenceEntryByAddress(FilterTable, FilterEntry, TRUE);
|
|
}
|
|
|
|
return (Replica == FilterReplica);
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlCommand(
|
|
PCOMMAND_PACKET CmdPkt
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Process a command packet sent to the Journal sub-system. External
|
|
components interact with the Journal by building a command packet and
|
|
submitting it to the Journal Process Queue. The typical way journal
|
|
processing is started is by issuing the following series of command
|
|
packets using FrsSubmitCommand.
|
|
|
|
<Start the journal monitor thread>
|
|
|
|
CMD_INIT_SUBSYSTEM: Init and start the journal for all replicas
|
|
|
|
CMD_JOURNAL_INIT_ONE_RS: Init service for Replica Set A
|
|
CMD_JOURNAL_INIT_ONE_RS: Init service for Replica Set B
|
|
o
|
|
o
|
|
CMD_JOURNAL_INIT_ONE_RS: Init service for Replica Set Z
|
|
|
|
CMD_STOP_SUBSYSTEM: Stop journal processing for all replica sets
|
|
and terminate the journal sub-system.
|
|
|
|
|
|
Arguments:
|
|
|
|
CmdPkt: Command packet to process.
|
|
|
|
|
|
Return Value:
|
|
|
|
Win32 status
|
|
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlCommand:"
|
|
|
|
LIST_ENTRY DeadList;
|
|
PLIST_ENTRY Entry;
|
|
ULONG WStatus = ERROR_SUCCESS;
|
|
ULONG FStatus;
|
|
PVOLUME_MONITOR_ENTRY pVme;
|
|
FILETIME SystemTime;
|
|
PCONFIG_TABLE_RECORD ConfigRecord;
|
|
|
|
|
|
|
|
DPRINT1(5, "<<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
|
|
|
|
switch (CmdPkt->Command) {
|
|
|
|
|
|
case CMD_COMMAND_ERROR:
|
|
DPRINT1(0, "ERROR - Invalid journal minor command: %d\n", CmdPkt->Command);
|
|
break;
|
|
|
|
case CMD_INIT_SUBSYSTEM:
|
|
|
|
//
|
|
// Initialize the journal
|
|
//
|
|
WStatus = JournalMonitorInit();
|
|
DEBUG_FLUSH();
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
if (!FrsIsShuttingDown) {
|
|
DPRINT_WS(0, "ERROR - Journal cannot start;", WStatus);
|
|
}
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Init the change order accept thread.
|
|
//
|
|
if (ChgOrdAcceptInitialize() != FrsErrorSuccess) {
|
|
DPRINT(0, "ERROR - Journal cannot start; can't start change order thread.\n");
|
|
WStatus = ERROR_GEN_FAILURE;
|
|
break;
|
|
}
|
|
|
|
DPRINT(0, "Journal has started.\n");
|
|
DEBUG_FLUSH();
|
|
SetEvent(JournalEvent);
|
|
|
|
//
|
|
// Free up memory by reducing our working set size
|
|
//
|
|
SetProcessWorkingSetSize(ProcessHandle, (SIZE_T)-1, (SIZE_T)-1);
|
|
break;
|
|
|
|
//
|
|
// Close all the journal VMEs, rundown the Process Queue and free
|
|
// all the queue entries. On return the main process loop with
|
|
// see the queue is rundown and will terminate the thread.
|
|
//
|
|
case CMD_STOP_SUBSYSTEM:
|
|
|
|
DPRINT(4, "Stopping Journal Subsystem\n");
|
|
JrnlCloseAll();
|
|
FrsRtlRunDownQueue(&JournalProcessQueue, &DeadList);
|
|
FrsFreeTypeList(&DeadList);
|
|
break;
|
|
|
|
case CMD_PAUSE_SUBSYSTEM:
|
|
case CMD_QUERY_INFO_SUBSYSTEM:
|
|
case CMD_SET_CONFIG_SUBSYSTEM:
|
|
case CMD_QUERY_CONFIG_SUBSYSTEM:
|
|
case CMD_CANCEL_COMMAND_SUBSYSTEM:
|
|
case CMD_READ_SUBSYSTEM:
|
|
case CMD_WRITE_SUBSYSTEM:
|
|
goto UNSUPPORTED_COMMAND;
|
|
|
|
|
|
case CMD_START_SERVICE:
|
|
case CMD_STOP_SERVICE:
|
|
case CMD_PAUSE_SERVICE:
|
|
case CMD_QUERY_INFO_SERVICE:
|
|
case CMD_SET_CONFIG_SERVICE:
|
|
case CMD_QUERY_CONFIG_SERVICE:
|
|
case CMD_CANCEL_COMMAND_SERVICE:
|
|
case CMD_READ_SERVICE:
|
|
case CMD_WRITE_SERVICE:
|
|
|
|
break;
|
|
|
|
//
|
|
// This command is an acknowledgement from the journal read thread that
|
|
// journal read activity on this volume (pVme parameter) has paused.
|
|
// Set the state to JRNL_STATE_PAUSED and signal the event in the
|
|
// VME so any waiters can proceed. Also mark all replica sets on this
|
|
// volume as paused.
|
|
//
|
|
case CMD_JOURNAL_PAUSED:
|
|
|
|
pVme = CmdPkt->Parameters.JournalRequest.pVme;
|
|
|
|
FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
|
|
|
|
SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_PAUSED);
|
|
|
|
//
|
|
// Save time of last replica pause. LastPause
|
|
//
|
|
GetSystemTimeAsFileTime(&SystemTime);
|
|
ForEachListEntry( &pVme->ReplicaListHead, REPLICA, VolReplicaList,
|
|
//
|
|
// Iterator pE is of type REPLICA.
|
|
//
|
|
ConfigRecord = (PCONFIG_TABLE_RECORD) (pE->ConfigTable.pDataRecord);
|
|
COPY_TIME(&ConfigRecord->LastPause, &SystemTime);
|
|
);
|
|
|
|
SetEvent(pVme->Event);
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
break;
|
|
|
|
//
|
|
// This command initializes the journal and database for a single replica
|
|
// set. It is intended to be used when creating or starting a replica
|
|
// set after the initial system startup has occurred.
|
|
// Note we don't complete the command here since we propagate it on
|
|
// to the DB server. In the case of failure the command is completed
|
|
// here and status is returned in the cmd pkt ErrorStatus field.
|
|
// The Replica->FStatus field may have more status about the failure.
|
|
//
|
|
case CMD_JOURNAL_INIT_ONE_RS:
|
|
|
|
FStatus = JrnlInitOneReplicaSet(CmdPkt);
|
|
if (FRS_SUCCESS(FStatus)) {
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
WStatus = ERROR_GEN_FAILURE;
|
|
break;
|
|
|
|
//
|
|
// Delete a journal directory filter table entry. We do it in the journal
|
|
// thread so we don't have to lock the table.
|
|
//
|
|
case CMD_JOURNAL_DELETE_DIR_FILTER_ENTRY:
|
|
|
|
WStatus = JrnlDeleteDirFilterEntry(
|
|
JrReplica(CmdPkt)->pVme->FilterTable,
|
|
&JrDFileID(CmdPkt),
|
|
NULL);
|
|
|
|
break;
|
|
|
|
//
|
|
// Cleanout unneeded entries in the Journal Write Filter.
|
|
//
|
|
case CMD_JOURNAL_CLEAN_WRITE_FILTER:
|
|
|
|
WStatus = JrnlCleanWriteFilter(CmdPkt);
|
|
|
|
break;
|
|
|
|
|
|
default:
|
|
goto UNSUPPORTED_COMMAND;
|
|
|
|
} // end switch
|
|
|
|
//
|
|
// Retire the command packet.
|
|
//
|
|
FrsCompleteCommand(CmdPkt, WStatus);
|
|
|
|
return WStatus;
|
|
|
|
|
|
|
|
UNSUPPORTED_COMMAND:
|
|
DPRINT1(0, "ERROR - Invalid journal minor command: %d\n", CmdPkt->Command);
|
|
return ERROR_INVALID_PARAMETER;
|
|
|
|
}
|
|
|
|
|
|
|
|
JET_ERR
|
|
JrnlInsertFilterEntry(
|
|
IN PTHREAD_CTX ThreadCtx,
|
|
IN PTABLE_CTX TableCtx,
|
|
IN PVOID Record,
|
|
IN PVOID Context
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This is a worker function passed to FrsEnumerateTable(). Each time
|
|
it is called It inserts a DIRTable record into the Volume filter table.
|
|
|
|
Arguments:
|
|
|
|
ThreadCtx - Needed to access Jet. (Not used).
|
|
TableCtx - A ptr to a DIRTable context struct.
|
|
Record - A ptr to a DIRTable record.
|
|
Context - A ptr to the Replica set we are loading data for.
|
|
|
|
Return Value:
|
|
|
|
A Jet error status. Success means call us with the next record.
|
|
Failure means don't call again and pass our status back to the
|
|
caller of FrsEnumerateTable().
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlInsertFilterEntry:"
|
|
|
|
|
|
PDIRTABLE_RECORD DIRTableRec = (PDIRTABLE_RECORD) Record;
|
|
PREPLICA Replica = (PREPLICA) Context;
|
|
|
|
ULONG NameLen, GStatus;
|
|
PFILTER_TABLE_ENTRY FilterEntry;
|
|
|
|
|
|
//
|
|
// Abort enum if shutting down.
|
|
//
|
|
if (FrsIsShuttingDown) {
|
|
return JET_errTermInProgress;
|
|
}
|
|
|
|
//
|
|
// Build a filter table record big enough to hold the filename
|
|
// and insert into the volume filter table. Note that the
|
|
// file name field is large enough to hold the terminating
|
|
// UNICODE_NULL because the file name field is defined as
|
|
// a wchar array of length 1 in FILTER_TABLE_ENTRY.
|
|
//
|
|
NameLen = wcslen(DIRTableRec->DFileName) * sizeof(WCHAR);
|
|
FilterEntry = FrsAllocTypeSize(FILTER_TABLE_ENTRY_TYPE, NameLen);
|
|
|
|
//
|
|
// Copy the data from the DIRTable record to the filter entry
|
|
// and add a pointer to the Replica struct.
|
|
//
|
|
CopyMemory(FilterEntry->DFileName, DIRTableRec->DFileName, NameLen + 2);
|
|
FilterEntry->DFileID = DIRTableRec->DFileID;
|
|
FilterEntry->DParentFileID = DIRTableRec->DParentFileID;
|
|
FilterEntry->DReplicaNumber = DIRTableRec->DReplicaNumber;
|
|
FilterEntry->Replica = Replica;
|
|
FilterEntry->UFileName.Length = (USHORT)NameLen;
|
|
FilterEntry->UFileName.Buffer[NameLen/2] = UNICODE_NULL;
|
|
|
|
GStatus = GhtInsert(Replica->pVme->FilterTable, FilterEntry, TRUE, FALSE);
|
|
if (GStatus != GHT_STATUS_SUCCESS) {
|
|
DPRINT1(0, "ERROR - GhtInsert Failed: %d\n", GStatus);
|
|
DBS_DISPLAY_RECORD_SEV(0, TableCtx, TRUE);
|
|
FrsFreeType(FilterEntry);
|
|
return JET_errKeyDuplicate;
|
|
}
|
|
|
|
return JET_errSuccess;
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlCleanWriteFilter(
|
|
PCOMMAND_PACKET CmdPkt
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Walk thru all active replica sets on this volume. Find the minimum
|
|
value for FSVolLastUsn. This is the Joint journal commit point for all
|
|
replica sets on the volume. No replica set will start a journal
|
|
read before this point.
|
|
|
|
Then enumerate all entries of the Volume Write Filter table and free
|
|
the entries whose USN is less than the Joint Journal commit point.
|
|
|
|
Arguments:
|
|
|
|
CmdPkt: Command packet to process.
|
|
|
|
Return Value:
|
|
|
|
Win32 status
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlCleanWriteFilter:"
|
|
|
|
USN JointJournalCommitUsn = MAXLONGLONG;
|
|
LONGLONG FSVolLastUSN;
|
|
|
|
PVOLUME_MONITOR_ENTRY pVme;
|
|
PCONFIG_TABLE_RECORD ConfigRecord;
|
|
ULONG TimeOut = 5*JRNL_CLEAN_WRITE_FILTER_INTERVAL;
|
|
BOOL FoundpVme = FALSE;
|
|
|
|
//
|
|
// Ignore if pVme is no longer active; don't retry
|
|
//
|
|
pVme = JrpVme(CmdPkt);
|
|
ForEachListEntry(&VolumeMonitorQueue, VOLUME_MONITOR_ENTRY, ListEntry,
|
|
if (pVme == pE) {
|
|
FoundpVme = TRUE;
|
|
break;
|
|
}
|
|
);
|
|
if (!FoundpVme) {
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
//
|
|
// If this journal is currently running then make a cleaning pass.
|
|
//
|
|
if (pVme->IoActive) {
|
|
|
|
ForEachListEntry( &pVme->ReplicaListHead, REPLICA, VolReplicaList,
|
|
// Iterator pE is of type PREPLICA.
|
|
//
|
|
// Get QuadWriteLock lock to avoid quadword tearing when FSVolLastUSN is read.
|
|
//
|
|
ConfigRecord = (PCONFIG_TABLE_RECORD)pE->ConfigTable.pDataRecord;
|
|
|
|
AcquireQuadLock(&pVme->QuadWriteLock);
|
|
FSVolLastUSN = ConfigRecord->FSVolLastUSN;
|
|
ReleaseQuadLock(&pVme->QuadWriteLock);
|
|
|
|
if (FSVolLastUSN < JointJournalCommitUsn) {
|
|
JointJournalCommitUsn = FSVolLastUSN;
|
|
}
|
|
);
|
|
|
|
|
|
DPRINT1(5, "WRITE FILTER TABLE CLEAN AT JointJournalCommitUsn = %08x %08x\n",
|
|
PRINTQUAD(JointJournalCommitUsn));
|
|
|
|
QHashEnumerateTable(pVme->FrsWriteFilter,
|
|
JrnlCleanWriteFilterWorker,
|
|
&JointJournalCommitUsn);
|
|
|
|
TimeOut = JRNL_CLEAN_WRITE_FILTER_INTERVAL;
|
|
}
|
|
//
|
|
// Resubmit the clean filter request.
|
|
//
|
|
JrnlSubmitCleanWriteFilter(pVme, TimeOut);
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlCleanWriteFilterWorker (
|
|
PQHASH_TABLE Table,
|
|
PQHASH_ENTRY BeforeNode,
|
|
PQHASH_ENTRY TargetNode,
|
|
PVOID Context
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function is called thru QHashEnumerateTable() to process
|
|
an entry.
|
|
|
|
Arguments:
|
|
|
|
Table - the hash table being enumerated
|
|
BeforeNode -- ptr to the QhashEntry before the node of interest.
|
|
TargetNode -- ptr to the QhashEntry of interest.
|
|
Context - ptr to the USN to compare against.
|
|
|
|
Return Value:
|
|
|
|
Win32 status
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlCleanWriteFilterWorker:"
|
|
|
|
USN JointJournalCommitUsn = *(USN *)Context;
|
|
|
|
|
|
if ( (USN)(TargetNode->QKey) < JointJournalCommitUsn) {
|
|
|
|
DPRINT5(4, "DelWrtFilterEntry - BeforeNode: %08x, Link: %08x,"
|
|
" Flags: %08x, Tag: %08x %08x, Data: %08x %08x\n",
|
|
BeforeNode, TargetNode->NextEntry, TargetNode->Flags,
|
|
PRINTQUAD(TargetNode->QKey), PRINTQUAD(TargetNode->QData));
|
|
|
|
//
|
|
// Tell QHashEnumerateTable() to delete the node and continue the enum.
|
|
//
|
|
return FrsErrorDeleteRequested;
|
|
}
|
|
|
|
return FrsErrorSuccess;
|
|
}
|
|
|
|
|
|
|
|
|
|
VOID
|
|
JrnlSubmitCleanWriteFilter(
|
|
IN PVOLUME_MONITOR_ENTRY pVme,
|
|
IN ULONG TimeOut
|
|
)
|
|
/*++
|
|
Routine Description:
|
|
|
|
Queue a work request to clean the write filter in TimeOut Seconds.
|
|
|
|
Arguments:
|
|
|
|
pVme -- The Vme of the write filter to clean.
|
|
TimeOut -- The max time to wait before giving up and doing Unjoin.
|
|
|
|
Return Value:
|
|
None.
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlSubmitCleanWriteFilter:"
|
|
|
|
PCOMMAND_PACKET Cmd;
|
|
|
|
Cmd = FrsAllocCommand(&JournalProcessQueue, CMD_JOURNAL_CLEAN_WRITE_FILTER);
|
|
|
|
JrReplica(Cmd) = NULL;
|
|
JrpVme(Cmd) = pVme;
|
|
|
|
DPRINT1(5, "Submit CMD_JOURNAL_CLEAN_WRITE_FILTER %08x\n", Cmd);
|
|
|
|
FrsDelQueueSubmit(Cmd, TimeOut);
|
|
}
|
|
|
|
|
|
|
|
|
|
BOOL
|
|
JrnlSetReplicaState(
|
|
IN PREPLICA Replica,
|
|
IN ULONG NewState
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Change the state of the Replica set and move it to the associated list.
|
|
Note: If a replica set is in the error state it must first move back
|
|
to the initializing state before it can leave the error state.
|
|
|
|
Arguments:
|
|
|
|
Replica - The replica set whose state is changing.
|
|
|
|
NewState - The new state.
|
|
|
|
Return Value:
|
|
|
|
TRUE if state change allowed.
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlSetReplicaState:"
|
|
ULONG OldState;
|
|
PVOLUME_MONITOR_ENTRY pVme;
|
|
WCHAR DsPollingIntervalStr[7]; // Max interval is NTFRSAPI_MAX_INTERVAL.
|
|
extern ULONG DsPollingInterval;
|
|
|
|
|
|
//
|
|
// Lock the replica lists
|
|
//
|
|
EnterCriticalSection(&JrnlReplicaStateLock);
|
|
|
|
OldState = Replica->ServiceState;
|
|
|
|
if (OldState > JRNL_STATE_MAX) {
|
|
DPRINT2(0, ":S: ERROR - Invalid previous Replica->ServiceState (%d) for Replica %ws\n",
|
|
OldState, Replica->ReplicaName->Name);
|
|
FRS_ASSERT(!"Invalid previous Replica->ServiceState");
|
|
goto CLEANUP;
|
|
}
|
|
|
|
if (NewState > JRNL_STATE_MAX) {
|
|
DPRINT2(0, ":S: ERROR - Invalid new Replica->ServiceState (%d) for Replica %ws\n",
|
|
NewState, Replica->ReplicaName->Name);
|
|
FRS_ASSERT(!"Invalid new Replica->ServiceState");
|
|
goto CLEANUP;
|
|
}
|
|
|
|
//
|
|
// If this replica set is in the ERROR State then the only allowed next
|
|
// state is INITIALIZING.
|
|
//
|
|
if ((REPLICA_IN_ERROR_STATE(OldState) || REPLICA_STATE_NEEDS_RESTORE(OldState)) &&
|
|
(NewState != REPLICA_STATE_INITIALIZING) &&
|
|
|
|
!REPLICA_STATE_NEEDS_RESTORE(NewState)) {
|
|
|
|
DPRINT4(4, ":S: ERROR: Replica (%d) %ws state change from %s to %s disallowed\n",
|
|
Replica->ReplicaNumber,
|
|
(Replica->ReplicaName != NULL) ? Replica->ReplicaName->Name : L"<null>",
|
|
RSS_NAME(OldState),
|
|
RSS_NAME(NewState));
|
|
LeaveCriticalSection(&JrnlReplicaStateLock);
|
|
return FALSE;
|
|
}
|
|
|
|
DPRINT4(4, ":S: Replica (%d) %ws state change from %s to %s\n",
|
|
Replica->ReplicaNumber,
|
|
(Replica->ReplicaName != NULL) ? Replica->ReplicaName->Name : L"<null>",
|
|
RSS_NAME(OldState),
|
|
RSS_NAME(NewState));
|
|
|
|
//
|
|
// if no state change, we're done.
|
|
//
|
|
if (OldState == NewState) {
|
|
goto CLEANUP;
|
|
}
|
|
|
|
//
|
|
// If we went from Active to Paused and are not in Journal Replay mode
|
|
// then advance the Replica->LastUsnRecordProcessed to
|
|
// pVme->CurrentUsnRecordDone.
|
|
//
|
|
pVme = Replica->pVme;
|
|
if (pVme != NULL) {
|
|
if ((OldState == REPLICA_STATE_ACTIVE) &&
|
|
(NewState == REPLICA_STATE_PAUSED) &&
|
|
!REPLICA_REPLAY_MODE(Replica, pVme)) {
|
|
|
|
DPRINT2(4, ":U: Replica->LastUsnRecordProcessed was: %08x %08x now: %08x %08x\n",
|
|
PRINTQUAD(Replica->LastUsnRecordProcessed),
|
|
PRINTQUAD(pVme->CurrentUsnRecordDone));
|
|
|
|
FRS_ASSERT(pVme->CurrentUsnRecordDone >= Replica->LastUsnRecordProcessed);
|
|
|
|
AcquireQuadLock(&pVme->QuadWriteLock);
|
|
Replica->LastUsnRecordProcessed = pVme->CurrentUsnRecordDone;
|
|
ReleaseQuadLock(&pVme->QuadWriteLock);
|
|
}
|
|
}
|
|
|
|
//
|
|
// update the new state.
|
|
//
|
|
Replica->ServiceState = NewState;
|
|
|
|
//
|
|
// if no list change, we're done.
|
|
//
|
|
if (RSS_LIST(OldState) == RSS_LIST(NewState)) {
|
|
goto CLEANUP;
|
|
}
|
|
|
|
//
|
|
// Remove from current list and add to new list.
|
|
//
|
|
if (RSS_LIST(OldState) != NULL) {
|
|
FrsRtlRemoveEntryQueue(RSS_LIST(OldState), &Replica->ReplicaList);
|
|
}
|
|
if (RSS_LIST(NewState) != NULL) {
|
|
FrsRtlInsertTailQueue(RSS_LIST(NewState), &Replica->ReplicaList);
|
|
}
|
|
|
|
CLEANUP:
|
|
|
|
if (REPLICA_IN_ERROR_STATE(NewState) &&
|
|
!REPLICA_FSTATUS_ROOT_HAS_MOVED(Replica->FStatus)) {
|
|
//
|
|
// Post an error log entry if the replica is in
|
|
// error state but not because the root has moved.
|
|
// If the root has moved then the error log has
|
|
// already been written when the move was detected
|
|
// and this generic eventlog here might confuse the user.
|
|
//
|
|
PWCHAR WStatusUStr, FStatusUStr;
|
|
|
|
|
|
//
|
|
// Post the failure in the event log.
|
|
//
|
|
if (Replica->Root != NULL) {
|
|
WStatusUStr = L"";
|
|
FStatusUStr = FrsAtoW(ErrLabelFrs(Replica->FStatus));
|
|
|
|
EPRINT8(EVENT_FRS_REPLICA_SET_CREATE_FAIL,
|
|
Replica->SetName->Name,
|
|
ComputerDnsName,
|
|
Replica->MemberName->Name,
|
|
Replica->Root,
|
|
Replica->Stage,
|
|
JetPath,
|
|
WStatusUStr,
|
|
FStatusUStr);
|
|
|
|
FrsFree(FStatusUStr);
|
|
}
|
|
|
|
//
|
|
// Post the generic recovery steps message.
|
|
//
|
|
EPRINT1(EVENT_FRS_IN_ERROR_STATE, JetPath);
|
|
} else if (NewState == REPLICA_STATE_JRNL_WRAP_ERROR) {
|
|
|
|
//
|
|
// Get the DsPollingInteval in minutes.
|
|
//
|
|
_itow(DsPollingInterval / (60 * 1000), DsPollingIntervalStr, 10);
|
|
|
|
if(DebugInfo.EnableJrnlWrapAutoRestore) {
|
|
EPRINT4(EVENT_FRS_REPLICA_IN_JRNL_WRAP_ERROR, Replica->SetName->Name, Replica->Root,
|
|
Replica->Volume, DsPollingIntervalStr);
|
|
} else {
|
|
EPRINT4(EVENT_FRS_REPLICA_IN_JRNL_WRAP_NO_AUTO_RESTORE, Replica->SetName->Name, Replica->Root,
|
|
Replica->Volume, DsPollingIntervalStr);
|
|
}
|
|
}
|
|
|
|
LeaveCriticalSection(&JrnlReplicaStateLock);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlPrepareService1(
|
|
PREPLICA Replica
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Open the NTFS volume journal and initialize a Volume Monitor Entry for it
|
|
if this is the first replica set to use the volume. The REPLICA struct
|
|
is initialized with a pointer to the volume monitor entry and the file
|
|
path to the root of the replica tree for use in file name generation.
|
|
Init the VME Volume Sequence Number from the Replica config record,
|
|
taking the maximum value seen so far. This value is needed before we
|
|
can do any ReplicaTreeLoad operations on a new replica so we can set
|
|
the correct value in the IDTable and DIRTable entries.
|
|
|
|
After any new replica sets are loaded JrnlPrepareService2() is
|
|
called to init the Volume Filter Table with the directory entries for
|
|
every replica set on the volume.
|
|
|
|
|
|
Arguments:
|
|
|
|
Replica - The replica set we are initializing.
|
|
|
|
Return Value:
|
|
|
|
A Win32 error status.
|
|
Replica->FStatus has the FRS Error status return.
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlPrepareService1:"
|
|
|
|
ULONGLONG CurrentTime;
|
|
PCONFIG_TABLE_RECORD ConfigRecord;
|
|
ULONG WStatus;
|
|
PVOLUME_MONITOR_ENTRY pVme;
|
|
CHAR TimeStr[TIME_STRING_LENGTH];
|
|
|
|
|
|
if (Replica == NULL) {
|
|
return ERROR_INVALID_PARAMETER;
|
|
}
|
|
|
|
DPRINT1(5, ":S: JrnlPrepareService1 for %ws\n", Replica->ReplicaName->Name);
|
|
|
|
|
|
ConfigRecord = (PCONFIG_TABLE_RECORD)Replica->ConfigTable.pDataRecord;
|
|
|
|
//
|
|
// Open the journal. Return the Volume Monitor Entry and save it in
|
|
// the Replica struct.
|
|
//
|
|
|
|
WStatus = JrnlOpen(Replica, &pVme, ConfigRecord);
|
|
|
|
if (!WIN_SUCCESS(WStatus) || (pVme == NULL)) {
|
|
//
|
|
// Replica->FStatus has the FRS Error status return.
|
|
//
|
|
DPRINT_WS(0, "Error from JrnlOpen", WStatus);
|
|
return WStatus;
|
|
}
|
|
|
|
//
|
|
// Set the journal recovery range end point for this replica set.
|
|
//
|
|
Replica->JrnlRecoveryEnd = pVme->JrnlRecoveryEnd;
|
|
|
|
//
|
|
// Start the Volume sequence number from the highest value any replica set
|
|
// has used up to now. The FrsVsn is saved in a replica config record
|
|
// every time VSN_SAVE_INTERVAL VSN's have been handed out. If we crashed
|
|
// we could be low by at most VSN_SAVE_INTERVAL VSN's assuming the update
|
|
// request completed. At startup we add VSN_RESTART_INCREMENT to the
|
|
// FrsVsn to ensure we don't use the same VSN twice. Then update the
|
|
// config record so if we start handing out VSNs and crash we don't reuse
|
|
// them. Can't do update here since this Replica struct is not on the
|
|
// VolReplicaList yet.
|
|
//
|
|
// The above solution does not work in the case where the database is
|
|
// lost or restored from backup. In this case other members of the replcia
|
|
// set could have VSNs for files that we originated which are larger than
|
|
// the current VSN value we might now be using. This causes two problems:
|
|
// 1. It fouls up dampening checks when we send out local COs with
|
|
// VSNs that are too small in comparison to what we have sent out in
|
|
// the past resulting in dropped COs, and
|
|
// 2. When we VVJoin with our inbound partners and start receiving change
|
|
// orders that were originated from us in the past, they could arrive
|
|
// with VSNs that are larger than what we are now using. When these
|
|
// "VVJoin Change Orders" to thru VV retire our MasterVV entry in the
|
|
// VVretire version vector is advanced to this larger value. This
|
|
// will cause subsequent locally generated COs to be marked out of order
|
|
// since their VSN is now smaller than the value in the MasterVV entry.
|
|
// This will prevent downsream dampening problems but it could allow
|
|
// a local dir create / child file create to be reordered downstream
|
|
// (since both are marked out of order) and cause the child create to
|
|
// fail if the parent create hasn't occured yet.
|
|
//
|
|
// To deal with the above nonsense we will now use a GMT time value as
|
|
// our initial VSN. We will not join with a partner whose time is
|
|
// off by +/- MaxPartnerClockSkew. So if we start the VSN at
|
|
// GMT + 2*MaxPartnerClockSkew then even if the last CO we originated, before
|
|
// we lost the database, occurred at GMT+MaxPartnerClockSkew and now at
|
|
// restart our current time has moved back to GMT-MaxPartnerClockSkew then
|
|
// we will still join with our partner and our new starting VSN is:
|
|
// (GMT-MaxPartnerClockSkew) + 2*MaxPartnerClockSkew = GMT+MaxPartnerClockSkew
|
|
//
|
|
// This is as large as the last VSN we could have generated if the time
|
|
// between the last CO generated (the crash) and the time at recovery
|
|
// was zero.
|
|
//
|
|
|
|
GetSystemTimeAsFileTime((PFILETIME)&CurrentTime);
|
|
|
|
LOCK_VME(pVme);
|
|
if (CurrentTime < ConfigRecord->FrsVsn) {
|
|
//
|
|
// Note: This may not be an error situation since on every restart
|
|
// of the service we advance time by 2*MaxPartnerClockSkew to
|
|
// ensure monotonicity (see above) so any time we shutdown the
|
|
// service before we have run at least this amount of time it will
|
|
// appear that time has moved backwards.
|
|
//
|
|
DPRINT(1, ":S: WARNING: Setting FrsVsn - Current system Time has moved backwards from value in config record.\n");
|
|
|
|
FileTimeToString((PFILETIME) &CurrentTime, TimeStr);
|
|
DPRINT2(1, ":S: WARNING: CurrentTime is (%08x %08x) %s\n",
|
|
PRINTQUAD(CurrentTime), TimeStr);
|
|
|
|
FileTimeToString((PFILETIME) &ConfigRecord->FrsVsn, TimeStr);
|
|
DPRINT2(1, ":S: WARNING: ConfigRecord->FrsVsn is (%08x %08x) %s\n",
|
|
PRINTQUAD(ConfigRecord->FrsVsn), TimeStr);
|
|
|
|
CurrentTime = ConfigRecord->FrsVsn;
|
|
}
|
|
|
|
if ((CurrentTime + 2*MaxPartnerClockSkew) > pVme->FrsVsn) {
|
|
pVme->FrsVsn = CurrentTime + 2*MaxPartnerClockSkew;
|
|
|
|
DPRINT(3, ":S: Setting new pVme->FrsVsn to Current time + 2*MaxPartnerClockSkew\n");
|
|
}
|
|
|
|
FileTimeToString((PFILETIME) &pVme->FrsVsn, TimeStr);
|
|
DPRINT2(3, ":S: pVme->FrsVsn is (%08x %08x) %s\n", PRINTQUAD(pVme->FrsVsn), TimeStr);
|
|
|
|
if (GlobSeqNum == QUADZERO) {
|
|
//
|
|
// Init the global sequence number with the above computed VSN to keep
|
|
// it monotonically increasing.
|
|
//
|
|
EnterCriticalSection(&GlobSeqNumLock);
|
|
GlobSeqNum = pVme->FrsVsn;
|
|
LeaveCriticalSection(&GlobSeqNumLock);
|
|
}
|
|
|
|
UNLOCK_VME(pVme);
|
|
|
|
|
|
Replica->pVme = pVme;
|
|
|
|
return WStatus;
|
|
}
|
|
|
|
|
|
|
|
|
|
ULONG
|
|
JrnlPrepareService2(
|
|
IN PTHREAD_CTX ThreadCtx,
|
|
IN PREPLICA Replica
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Load the volume filter hash table with the DIRTable entries for
|
|
this replica set. Create the change order hash table for this replica
|
|
set and add the REPLICA struct to the replica list for this volume.
|
|
|
|
Enumerate through the IDTable and load the parent Fid Hash Table.
|
|
|
|
Note: This function is called from the DB Service thread since we have
|
|
to be able to pause the journal before the dir table enum can be done.
|
|
|
|
|
|
Arguments:
|
|
|
|
ThreadCtx -- ptr to the thread context (could be from journal or DB thread)
|
|
Replica - The replica set we are initializing.
|
|
|
|
Return Value:
|
|
|
|
A Win32 error status.
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlPrepareService2:"
|
|
|
|
JET_ERR jerr, jerr1;
|
|
|
|
JET_TABLEID DIRTid;
|
|
CHAR DIRTableName[JET_cbNameMost];
|
|
PTABLE_CTX DIRTableCtx;
|
|
|
|
JET_TABLEID IDTid;
|
|
CHAR IDTableName[JET_cbNameMost];
|
|
PTABLE_CTX IDTableCtx;
|
|
|
|
PREPLICA_THREAD_CTX RtCtx;
|
|
|
|
PCONFIG_TABLE_RECORD ConfigRecord;
|
|
ULONG ReplicaNumber;
|
|
ULONG WStatus;
|
|
PVOLUME_MONITOR_ENTRY pVme;
|
|
JET_TABLEID FrsOpenTableSaveTid; // for FrsOpenTableMacro DEBUG
|
|
|
|
PFILTER_TABLE_ENTRY FilterEntry;
|
|
|
|
|
|
if (Replica == NULL) {
|
|
return ERROR_INVALID_PARAMETER;
|
|
}
|
|
|
|
DPRINT1(5, ":S: JrnlPrepareService2 for %ws\n", Replica->ReplicaName->Name);
|
|
|
|
|
|
ConfigRecord = (PCONFIG_TABLE_RECORD)Replica->ConfigTable.pDataRecord;
|
|
pVme = Replica->pVme;
|
|
|
|
//
|
|
// Allocate the replica thread context so we can get the directory
|
|
// filter table. Link it to the Replic context list head.
|
|
//
|
|
RtCtx = FrsAllocType(REPLICA_THREAD_TYPE);
|
|
FrsRtlInsertTailList(&Replica->ReplicaCtxListHead, &RtCtx->ReplicaCtxList);
|
|
|
|
ReplicaNumber = Replica->ReplicaNumber;
|
|
DIRTableCtx = &RtCtx->DIRTable;
|
|
//
|
|
// Open the DIR table.
|
|
//
|
|
jerr = DBS_OPEN_TABLE(ThreadCtx, DIRTableCtx, ReplicaNumber, DIRTableName, &DIRTid);
|
|
CLEANUP1_JS(0, "++ DBS_OPEN_TABLE (%s) error:", DIRTableName, jerr, RETURN_INV_DATA);
|
|
|
|
//
|
|
// Walk through the DirTable and load the data into the Volume Filter Table
|
|
// by calling JrnlInsertFilterEntry() for this Replica.
|
|
// The Replica points to the VME and the VME points to the
|
|
// volume filter table.
|
|
//
|
|
jerr = FrsEnumerateTable(ThreadCtx,
|
|
DIRTableCtx,
|
|
DFileGuidIndexx,
|
|
JrnlInsertFilterEntry,
|
|
Replica);
|
|
if ((jerr != JET_errNoCurrentRecord)) {
|
|
CLEANUP1_JS(0, "++ FrsEnumerateTable (%s) error:", DIRTableName, jerr, RETURN_INV_DATA);
|
|
}
|
|
|
|
//
|
|
// Now that all the entries are in place, walk through the hash table and
|
|
// construct the child lists for this ReplicaSet. This is done as a
|
|
// second pass since we can't be certain of the order in which the
|
|
// entries come from the database. First get the Child List Lock for the
|
|
// Replica Set.
|
|
//
|
|
|
|
JrnlAcquireChildLock(Replica);
|
|
WStatus = (ULONG)GhtEnumerateTable(pVme->FilterTable,
|
|
JrnlFilterLinkChildNoError,
|
|
Replica);
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
JrnlReleaseChildLock(Replica);
|
|
DPRINT_WS(0, "Error from JrnlLinkChildren", WStatus);
|
|
GHT_DUMP_TABLE(4, pVme->FilterTable);
|
|
goto RETURN;
|
|
}
|
|
|
|
//
|
|
// Go find the root entry for this Replica Set in the Filter Table.
|
|
//
|
|
FilterEntry = (PFILTER_TABLE_ENTRY) GhtEnumerateTable(pVme->FilterTable,
|
|
JrnlFilterGetRoot,
|
|
Replica);
|
|
if (FilterEntry == NULL) {
|
|
JrnlReleaseChildLock(Replica);
|
|
DPRINT1(0, ":S: Error from JrnlFilterGetRoot. No Root for %d\n",
|
|
Replica->ReplicaNumber);
|
|
GHT_DUMP_TABLE(5, pVme->FilterTable);
|
|
goto RETURN_INV_DATA;
|
|
}
|
|
|
|
//
|
|
// Replay the inbound log table and update the volume filter table with
|
|
// any directory changes.
|
|
//
|
|
// Note: Add code to replay the inbound log and update the filter table.
|
|
// It may be better to handle this at startup when we are recovering the
|
|
// staging areas. But, the filter table may not exist yet.
|
|
|
|
|
|
#if DBG
|
|
if (DoDebug(5, DEBSUB)) {
|
|
DPRINT(5," >>>>>>>>>>>>>>> Top Down dump of Filter Tree <<<<<<<<<<<<<<<<\n");
|
|
JrnlEnumerateFilterTreeTD(pVme->FilterTable,
|
|
FilterEntry,
|
|
JrnlSubTreePrint,
|
|
Replica);
|
|
}
|
|
#endif DBG
|
|
|
|
JrnlReleaseChildLock(Replica);
|
|
|
|
//
|
|
// Build the Parent directory table.
|
|
//
|
|
|
|
IDTableCtx = &RtCtx->IDTable;
|
|
//
|
|
// Open the ID table.
|
|
//
|
|
jerr = DBS_OPEN_TABLE(ThreadCtx, IDTableCtx, ReplicaNumber, IDTableName, &IDTid);
|
|
CLEANUP1_JS(0, "++ Building parent FID table (%s):", IDTableName, jerr, RETURN_INV_DATA);
|
|
|
|
//
|
|
// Walk through the IDTable and load the data into the Volume Parent Dir
|
|
// Table by calling JrnlInsertParentEntry() for this Replica.
|
|
// The Replica points to the VME and the VME points to the
|
|
// parent dir table.
|
|
//
|
|
jerr = FrsEnumerateTable(ThreadCtx,
|
|
IDTableCtx,
|
|
GuidIndexx,
|
|
JrnlInsertParentEntry,
|
|
Replica);
|
|
if ((jerr != JET_errNoCurrentRecord)) {
|
|
CLEANUP1_JS(0, "++ FrsEnumerateTable (%s) error:", IDTableName, jerr, RETURN_INV_DATA);
|
|
}
|
|
//
|
|
// Replay the inbound log table and update the volume Parent Dir table
|
|
// for any file creates, deletes or renames.
|
|
//
|
|
// Note: Add code to replay the inbound log and update the Parent Dir table.
|
|
// It may be better to handle this at startup when we are recovering the
|
|
// staging areas. But, the filter table may not exist yet.
|
|
//
|
|
// Add the replica struct to the list of replica sets served by this
|
|
// volume journal.
|
|
//
|
|
if (AcquireVmeRef(pVme) == 0) {
|
|
WStatus = ERROR_OPERATION_ABORTED;
|
|
goto RETURN;
|
|
}
|
|
|
|
/////////////////////////////////////////////////
|
|
|
|
//
|
|
// Start the first read on the volume. Check first if it is PAUSED and
|
|
// set state to starting. If this is the first replica set on the volume
|
|
// the state will be INITIALIZING and we leave that alone so additional
|
|
// journal buffers get allocated.
|
|
//
|
|
// pVme = Replica->pVme;
|
|
if (pVme->JournalState != JRNL_STATE_INITIALIZING) {
|
|
if (pVme->JournalState == JRNL_STATE_PAUSED) {
|
|
SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_STARTING);
|
|
} else {
|
|
DPRINT2(0, "++ ERROR - Journal for %ws is in an unexpected state: %s\n",
|
|
Replica->ReplicaName->Name, RSS_NAME(pVme->JournalState));
|
|
SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_ERROR);
|
|
WStatus = ERROR_OPERATION_ABORTED;
|
|
goto RETURN;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Initialize the LastUsnRecordProcessed for this replica set to the value
|
|
// saved in the config record or the value from the Inlog record with the
|
|
// largest USN so we don't reprocess them. If we end up reading (replaying)
|
|
// the journal at an earlier point to let another replica set catch up we
|
|
// need to ignore those old records. If LastShutdown or FSVolLastUSN is 0
|
|
// then this is the very first time we have started replication on this
|
|
// replica set so set the FSVolLastUSN and LastUsnRecordProcessed to the
|
|
// current journal read point, pVme->JrnlReadPoint.
|
|
//
|
|
if ((ConfigRecord->LastShutdown == 0) ||
|
|
(ConfigRecord->FSVolLastUSN == 0)) {
|
|
|
|
if (!(ConfigRecord->ServiceState == CNF_SERVICE_STATE_CREATING)) {
|
|
DPRINT2(0, ":S: BETA ERROR - Service state is %d; not _CREATING for %ws\n",
|
|
ConfigRecord->ServiceState, Replica->ReplicaName->Name);
|
|
}
|
|
ConfigRecord->FSVolLastUSN = pVme->JrnlReadPoint;
|
|
Replica->LastUsnRecordProcessed = pVme->JrnlReadPoint;
|
|
DPRINT1(4, ":S: Replica->LastUsnRecordProcessed is: %08x %08x\n", PRINTQUAD(Replica->LastUsnRecordProcessed));
|
|
} else {
|
|
|
|
//
|
|
// Start where we left off and minimize with any other replicas.
|
|
//
|
|
Replica->LastUsnRecordProcessed = ConfigRecord->FSVolLastUSN;
|
|
DPRINT1(4, ":S: Replica->LastUsnRecordProcessed is: %08x %08x\n", PRINTQUAD(Replica->LastUsnRecordProcessed));
|
|
|
|
//
|
|
// Advance to largest USN of Inlog record.
|
|
//
|
|
if (Replica->JrnlRecoveryStart > Replica->LastUsnRecordProcessed) {
|
|
Replica->LastUsnRecordProcessed = Replica->JrnlRecoveryStart;
|
|
DPRINT1(4, ":S: Replica->LastUsnRecordProcessed is: %08x %08x (JrnlRecoveryStart > LastUsnRecordProcessed)\n",
|
|
PRINTQUAD(Replica->LastUsnRecordProcessed));
|
|
}
|
|
|
|
//
|
|
// start at the earliest USN of any replica set on the volume.
|
|
// If the journal is active it is currently using JrnlReadPoint to
|
|
// track its current read point. Since we may be starting a replica
|
|
// set on an active volume ReplayUsn is used to save the starting
|
|
// point. After the volume is paused and then unpaused ReplayUsn
|
|
// is copied to JrnlReadPoint where the journal will start reading.
|
|
//
|
|
if (pVme->ReplayUsnValid) {
|
|
DPRINT1(4, ":S: ReplayUsn was: %08x %08x\n", PRINTQUAD(pVme->ReplayUsn));
|
|
pVme->ReplayUsn = min(Replica->LastUsnRecordProcessed, pVme->ReplayUsn);
|
|
} else {
|
|
DPRINT(4, ":S: No ReplayUsn was active.\n");
|
|
pVme->ReplayUsn = Replica->LastUsnRecordProcessed;
|
|
pVme->ReplayUsnValid = TRUE;
|
|
}
|
|
DPRINT1(4, ":S: ReplayUsn is: %08x %08x\n", PRINTQUAD(pVme->ReplayUsn));
|
|
|
|
}
|
|
|
|
//
|
|
// Init the inlog commit point so if we shutdown the saved value is correct.
|
|
//
|
|
Replica->InlogCommitUsn = Replica->LastUsnRecordProcessed;
|
|
DPRINT1(4, ":S: Replica->InlogCommitUsn: %08x %08x\n",
|
|
PRINTQUAD(Replica->InlogCommitUsn));
|
|
|
|
//
|
|
// Track the oldest USN save point and the most recent USN progress point
|
|
// for any replica set on the volume.
|
|
//
|
|
if ((pVme->LastUsnSavePoint == (USN)0) ||
|
|
(pVme->LastUsnSavePoint > Replica->LastUsnRecordProcessed)) {
|
|
pVme->LastUsnSavePoint = Replica->LastUsnRecordProcessed;
|
|
}
|
|
|
|
if (pVme->MonitorMaxProgressUsn < Replica->LastUsnRecordProcessed) {
|
|
pVme->MonitorMaxProgressUsn = Replica->LastUsnRecordProcessed;
|
|
}
|
|
|
|
|
|
//
|
|
// This replica's FrsVsn may be out of date by a large margin
|
|
// if it has been awhile since the set was last started successfully.
|
|
// This results in an assert in DbsReplicaSaveMark(). So, as
|
|
// long as the FrsVsns look sane, assign the volume's current
|
|
// Vsn to the replica set.
|
|
//
|
|
FRS_ASSERT(pVme->FrsVsn >= ConfigRecord->FrsVsn);
|
|
ConfigRecord->FrsVsn = pVme->FrsVsn;
|
|
|
|
/////////////////////////////////////////////////
|
|
|
|
InitializeListHead(&Replica->RecoveryRefreshList);
|
|
InterlockedIncrement(&Replica->ReferenceCount);
|
|
pVme->ActiveReplicas += 1;
|
|
FrsRtlInsertTailList(&pVme->ReplicaListHead, &Replica->VolReplicaList);
|
|
|
|
WStatus = ERROR_SUCCESS;
|
|
|
|
RETURN:
|
|
//
|
|
// Close the replica tables and release the RtCtx struct.
|
|
//
|
|
DbsFreeRtCtx(ThreadCtx, Replica, RtCtx, TRUE);
|
|
|
|
return WStatus;
|
|
|
|
RETURN_INV_DATA:
|
|
|
|
DbsFreeRtCtx(ThreadCtx, Replica, RtCtx, TRUE);
|
|
return (jerr == JET_errTermInProgress) ? ERROR_OPERATION_ABORTED : ERROR_INVALID_DATA;
|
|
}
|
|
|
|
|
|
|
|
JET_ERR
|
|
JrnlInsertParentEntry(
|
|
IN PTHREAD_CTX ThreadCtx,
|
|
IN PTABLE_CTX TableCtx,
|
|
IN PVOID Record,
|
|
IN PVOID Context
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This is a worker function passed to FrsEnumerateTable(). Each time
|
|
it is called with an IDTable record it save the parent info in the
|
|
Parent Directory Table for the volume.
|
|
|
|
Arguments:
|
|
|
|
ThreadCtx - Needed to access Jet.
|
|
TableCtx - A ptr to an IDTable context struct.
|
|
Record - A ptr to a IDTable record.
|
|
Context - A ptr to a Replica struct.
|
|
|
|
Thread Return Value:
|
|
|
|
A Jet error status. Success means call us with the next record.
|
|
Failure means don't call again and pass our status back to the
|
|
caller of FrsEnumerateTable().
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlInsertParentEntry:"
|
|
|
|
ULONGLONG SystemTime;
|
|
ULONGLONG ExpireTime;
|
|
|
|
JET_ERR jerr;
|
|
ULONG GStatus;
|
|
|
|
PIDTABLE_RECORD IDTableRec = (PIDTABLE_RECORD) Record ;
|
|
|
|
PQHASH_TABLE HashTable = ((PREPLICA) Context)->pVme->ParentFidTable;
|
|
|
|
//
|
|
// Abort enum if shutting down.
|
|
//
|
|
if (FrsIsShuttingDown) {
|
|
return JET_errTermInProgress;
|
|
}
|
|
|
|
//
|
|
// Check for expired tombstones.
|
|
//
|
|
if (IsIdRecFlagSet(IDTableRec, IDREC_FLAGS_DELETED)) {
|
|
|
|
GetSystemTimeAsFileTime((PFILETIME)&SystemTime);
|
|
COPY_TIME(&ExpireTime, &IDTableRec->TombStoneGC);
|
|
|
|
if ((ExpireTime < SystemTime) && (ExpireTime != QUADZERO)) {
|
|
|
|
//
|
|
// IDTable record has expired. Delete it.
|
|
// If there is a problem, complain but keep going.
|
|
//
|
|
jerr = DbsDeleteTableRecord(TableCtx);
|
|
DPRINT_JS(0, "ERROR - DbsDeleteTableRecord :", jerr);
|
|
return JET_errSuccess;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Include the entry if replication is enabled and not marked for deletion
|
|
// and not a new file being created when we last shutdown.
|
|
//
|
|
if (IDTableRec->ReplEnabled &&
|
|
!IsIdRecFlagSet(IDTableRec, IDREC_FLAGS_DELETED) &&
|
|
!IsIdRecFlagSet(IDTableRec, IDREC_FLAGS_NEW_FILE_IN_PROGRESS)) {
|
|
|
|
if (IDTableRec->FileID == ZERO_FID) {
|
|
//
|
|
// We shouldn't see any records with a zero FID but some prior
|
|
// bugs could cause this to happen. Dump em out but don't try
|
|
// to insert into table since it will assert.
|
|
//
|
|
DPRINT(0, "++ WARNING -- IDTable record with zero FID found.\n");
|
|
DBS_DISPLAY_RECORD_SEV(0, TableCtx, TRUE);
|
|
|
|
} else {
|
|
|
|
GStatus = QHashInsert(HashTable,
|
|
&IDTableRec->FileID,
|
|
&IDTableRec->ParentFileID,
|
|
((PREPLICA) Context)->ReplicaNumber,
|
|
FALSE);
|
|
if (GStatus != GHT_STATUS_SUCCESS ) {
|
|
DPRINT1(0, "++ QHashInsert error: %d\n", GStatus);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//
|
|
// Return success so we can keep going thru the ID table.
|
|
//
|
|
return JET_errSuccess;
|
|
}
|
|
|
|
|
|
|
|
ULONG_PTR
|
|
JrnlFilterLinkChild (
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer,
|
|
PVOID Context
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function is called thru GhtEnumerateTable() to connect this
|
|
filter table entry to the parent list for the replica set passed in
|
|
Context. The GhtEnumerateTable function does not acquire any row locks
|
|
so this function is free to call GhtLookup or GhtInsert without deadlock
|
|
conflicts. It is assumed that the caller knows that it is safe to
|
|
enumerate the table. The caller is also responsible for getting the
|
|
child list lock for the replica set before calling GhtEnumerateTable().
|
|
|
|
The child list lock is associated with the replica set so when you have
|
|
the lock the child list entries for all filter entries in this replica
|
|
set are protected. When we enumerate down a subtree we only need to get
|
|
one lock.
|
|
|
|
WARNING - There is no table level lock on the Filter Table. The Filter
|
|
table is per volume so multiple replica sets could be using the same
|
|
table. The locking is at the row level where the row is indexed by
|
|
the hash function. This means that this function can only be used
|
|
when the Journal is paused. To start/add a replica set after the
|
|
system is running you must pause the journal, update the filter table
|
|
and then unpause the journal.
|
|
|
|
Arguments:
|
|
|
|
Table - the hash table being enumerated (to lookup parent entry).
|
|
Buffer - a ptr to a FILTER_TABLE_ENTRY
|
|
Context - A pointer to the Replica struct for the replica data added to the
|
|
table.
|
|
|
|
Return Value:
|
|
|
|
A Win32 error status. A failure status return aborts enumeration.
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlFilterLinkChild:"
|
|
|
|
PFILTER_TABLE_ENTRY FilterEntry = (PFILTER_TABLE_ENTRY) Buffer;
|
|
PREPLICA Replica = (PREPLICA) Context;
|
|
|
|
PFILTER_TABLE_ENTRY ParentFilterEntry;
|
|
ULONG GStatus;
|
|
|
|
//
|
|
// Skip entry if it is not associated with the replica set of interest.
|
|
//
|
|
if (FilterEntry->Replica != Replica) {
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
//
|
|
// If this is the root of the replica tree there is no parent to link it to.
|
|
//
|
|
if (FilterEntry->DParentFileID == ZERO_FID) {
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
//
|
|
// If this entry has already been linked then return an error status to
|
|
// abort the enumeration since the entry can't be on more than one list.
|
|
//
|
|
if (FilterEntry->ChildEntry.Flink != NULL) {
|
|
return ERROR_GEN_FAILURE;
|
|
}
|
|
|
|
//
|
|
// Find the parent to link this child to.
|
|
//
|
|
GStatus = GhtLookup(Table,
|
|
&FilterEntry->DParentFileID,
|
|
TRUE,
|
|
&ParentFilterEntry);
|
|
|
|
if (GStatus != GHT_STATUS_SUCCESS) {
|
|
DPRINT1(0, "++ Error: Parent entry not found for - %08x\n", FilterEntry);
|
|
FRS_JOURNAL_FILTER_PRINT(0, Table, FilterEntry);
|
|
return ERROR_GEN_FAILURE;
|
|
}
|
|
|
|
//
|
|
// Put the Dir on the list and drop the ref count we got from Lookup.
|
|
//
|
|
InsertHeadList(&ParentFilterEntry->ChildHead, &FilterEntry->ChildEntry);
|
|
|
|
GhtDereferenceEntryByAddress(Table, ParentFilterEntry, TRUE);
|
|
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
|
|
|
|
ULONG_PTR
|
|
JrnlFilterLinkChildNoError(
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer,
|
|
PVOID Context
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
See JrnlFilterLinkChild().
|
|
|
|
A dirtable entry may appear to be orphaned if it is stuck in the
|
|
preinstall area and its parent has been deleted. Ignore errors
|
|
for now.
|
|
|
|
This can also happen if a remote co create is executed for a dir at the
|
|
same time the subtree containing this dir is being moved out of the
|
|
replica tree. The journal code will remove the filter entries immediately
|
|
so we skip future file changes in the subtree. So the parent is gone when
|
|
the filter entry for the dir create is added. In the course of processing
|
|
the moveout on the parent this dir entry is cleaned up.
|
|
|
|
Arguments:
|
|
|
|
Table - the hash table being enumerated (to lookup parent entry).
|
|
Buffer - a ptr to a FILTER_TABLE_ENTRY
|
|
Context - A pointer to the Replica struct for the replica data added to the
|
|
table.
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlFilterLinkChildNoError:"
|
|
ULONG WStatus;
|
|
|
|
WStatus = (ULONG)JrnlFilterLinkChild(Table, Buffer, Context);
|
|
|
|
DPRINT_WS(0, "++ WARN - orphaned dir; probably stuck in preinstall with deleted parent", WStatus);
|
|
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
|
|
|
|
ULONG
|
|
JrnlFilterUnlinkChild (
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer,
|
|
PVOID Context
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function is unlinks a filter entry from the child list.
|
|
|
|
The caller must get the child list lock for the replica set.
|
|
The child list lock is associated with the replica set so when you have
|
|
the lock the child list entries for all filter entries in this replica
|
|
set are protected. When we enumerate down a subtree we only need to get
|
|
one lock.
|
|
|
|
|
|
Arguments:
|
|
|
|
Table - the hash table being enumerated (to lookup parent entry).
|
|
Buffer - a ptr to a FILTER_TABLE_ENTRY
|
|
Context - A pointer to the Replica struct for the replica data added to the
|
|
table.
|
|
|
|
Return Value:
|
|
|
|
A Win32 error status. A failure status return aborts enumeration.
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlFilterUnlinkChild:"
|
|
|
|
PFILTER_TABLE_ENTRY FilterEntry = (PFILTER_TABLE_ENTRY) Buffer;
|
|
PREPLICA Replica = (PREPLICA) Context;
|
|
|
|
PFILTER_TABLE_ENTRY ParentFilterEntry;
|
|
ULONG GStatus;
|
|
|
|
//
|
|
// Skip entry if it is not associated with the replica set of interest.
|
|
// Return error_success so this function can be called by GhtEnumerateTable().
|
|
//
|
|
if (FilterEntry->Replica != Replica) {
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
//
|
|
// If this entry is not on the list then return an error status to
|
|
// abort the enumeration.
|
|
//
|
|
if (FilterEntry->ChildEntry.Flink == NULL) {
|
|
return ERROR_GEN_FAILURE;
|
|
}
|
|
|
|
//
|
|
// Pull the entry off the list.
|
|
//
|
|
FrsRemoveEntryList(&FilterEntry->ChildEntry);
|
|
|
|
FilterEntry->ChildEntry.Flink = NULL;
|
|
FilterEntry->ChildEntry.Blink = NULL;
|
|
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
|
|
|
|
ULONG_PTR
|
|
JrnlFilterGetRoot (
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer,
|
|
PVOID Context
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function is called thru GhtEnumerateTable() to find the root
|
|
of the replica set specified by the Context parameter.
|
|
|
|
Arguments:
|
|
|
|
Table - the hash table being enumerated (to lookup parent entry).
|
|
Buffer - a ptr to a FILTER_TABLE_ENTRY
|
|
Context - A pointer to the Replica struct for the replica data added to the
|
|
table.
|
|
|
|
Return Value:
|
|
|
|
The root filter entry for the Replica Set, else NULL to keep looking.
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlFilterGetRoot:"
|
|
|
|
PFILTER_TABLE_ENTRY FilterEntry = (PFILTER_TABLE_ENTRY) Buffer;
|
|
PREPLICA Replica = (PREPLICA) Context;
|
|
|
|
//
|
|
// Skip entry if it is not associated with the replica set of interest.
|
|
//
|
|
if (FilterEntry->Replica != Replica) {
|
|
return (ULONG_PTR)NULL;
|
|
}
|
|
|
|
//
|
|
// If this is the root of the replica tree we're done.
|
|
//
|
|
if (FilterEntry->DParentFileID == ZERO_FID) {
|
|
return (ULONG_PTR)FilterEntry;
|
|
}
|
|
|
|
return (ULONG_PTR)NULL;
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlSubTreePrint (
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer,
|
|
PVOID Context
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function is called thru GhtEnumerateTable() to dump a Filter entry.
|
|
|
|
The enum caller takes a ref on the entry. we drop it here.
|
|
|
|
Arguments:
|
|
|
|
Table - the hash table being enumerated (to lookup parent entry).
|
|
Buffer - a ptr to a FILTER_TABLE_ENTRY
|
|
Context - A pointer to the Replica struct for the replica data added to the
|
|
table.
|
|
|
|
Return Value:
|
|
|
|
Win32 status
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlSubTreePrint:"
|
|
|
|
PFILTER_TABLE_ENTRY FilterEntry = (PFILTER_TABLE_ENTRY) Buffer;
|
|
PREPLICA Replica = (PREPLICA) Context;
|
|
|
|
|
|
//
|
|
// Abort enum if shutting down.
|
|
//
|
|
if (FrsIsShuttingDown) {
|
|
return ERROR_OPERATION_ABORTED;
|
|
}
|
|
|
|
//
|
|
// print the entry if it is associated with the replica set of interest.
|
|
//
|
|
if (FilterEntry->Replica == Replica) {
|
|
FRS_JOURNAL_FILTER_PRINT(4, Table, FilterEntry);
|
|
}
|
|
|
|
DECREMENT_FILTER_REF_COUNT(FilterEntry);
|
|
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
BOOL
|
|
ActiveChildrenKeyMatch(
|
|
PVOID Buf,
|
|
PVOID QKey
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
Check for an exact key match.
|
|
|
|
Arguments:
|
|
Buf -- ptr to a Guid1.
|
|
QKey -- ptr to Guid2.
|
|
|
|
Return Value:
|
|
TRUE if exact match.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "ActiveChildrenKeyMatch:"
|
|
|
|
PULONG pUL1, pUL2;
|
|
|
|
pUL1 = (PULONG) Buf;
|
|
pUL2 = (PULONG) QKey;
|
|
|
|
if (!ValueIsMultOf4(pUL1)) {
|
|
DPRINT2(0, "ERROR - Unaligned key value - addr: %08x, Data: %08x\n", pUL1, *pUL1);
|
|
FRS_ASSERT(ValueIsMultOf4(pUL1));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
if (!ValueIsMultOf4(pUL2)) {
|
|
DPRINT2(0, "ERROR - Unaligned key value - addr: %08x, Data: %08x\n", pUL2, *pUL2);
|
|
FRS_ASSERT(ValueIsMultOf4(pUL2));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
return GUIDS_EQUAL(pUL1, pUL2);
|
|
}
|
|
|
|
|
|
ULONG
|
|
ActiveChildrenHashCalc(
|
|
PVOID Buf,
|
|
PULONGLONG QKey
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
Calculate a hash value for the file guid used in the ActiveChildren Table.
|
|
|
|
Arguments:
|
|
Buf -- ptr to a Guid.
|
|
QKey -- Returned 8 byte hash key for the QKey field of QHASH_ENTRY.
|
|
|
|
Return Value:
|
|
32 bit hash value.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "ActiveChildrenHashCalc:"
|
|
|
|
PULONG pUL = (PULONG) Buf;
|
|
PUSHORT pUS = (PUSHORT) Buf;
|
|
|
|
if (!ValueIsMultOf4(pUL)) {
|
|
DPRINT2(0, "ERROR - Unaligned key value - addr: %08x, Data: %08x\n", pUL, *pUL);
|
|
FRS_ASSERT(ValueIsMultOf4(pUL));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
//
|
|
// Calc QKey, 4 byte hash is ok.
|
|
//
|
|
*QKey = (ULONGLONG) (pUL[0] ^ pUL[1] ^ pUL[2] ^ pUL[3]);
|
|
|
|
//
|
|
// Calc hash based on the time. Include node part for remote COs.
|
|
//
|
|
return (ULONG) (pUS[0] ^ pUS[1] ^ pUS[2] ^ pUS[6] ^ pUS[7]);
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlOpen(
|
|
IN PREPLICA Replica,
|
|
OUT PVOLUME_MONITOR_ENTRY *pVmeArg,
|
|
PCONFIG_TABLE_RECORD ConfigRecord
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine opens the journal specified by the Replica->Volume parameter.
|
|
It creates and fills in a Volume monitor entry that can
|
|
be used to read the NTFS Journal. It checks if objects and object IDs
|
|
are supported on the volume and fails if they aren't. It checks for an
|
|
object ID on the root directory of the volume and puts one there if necessary.
|
|
|
|
It keeps a list of volumes (VolumeMonitorQueue) that currently have journal
|
|
files open. If it finds this request in the list it bumps the ref count
|
|
and returns. pVme is set to NULL with status success indicating I/O
|
|
on the journal is proceeding.
|
|
|
|
If this volume is not in the list then it is added. The volume Object ID
|
|
is used to identify the volume in the Volume Monitor list. A read
|
|
is not posted to the journal at this time. This allows journal opens for
|
|
other replica sets to be done so we start out at the lowest USN of all
|
|
replica sets hosted by a given volume. In addition we need to know about
|
|
all current replica sets when we start filtering journal entries.
|
|
|
|
The volume monitor entry related to to the given replica set is
|
|
returned in pVme. If we fail to open the journal pVmeArg is NULL
|
|
and status indicates the failure.
|
|
|
|
If the journal doesn't exist it is created. The max size is set to
|
|
JRNL_DEFAULT_MAX_SIZE MB with an allocation size of
|
|
JRNL_DEFAULT_ALLOC_DELTA MB.
|
|
|
|
The following checks are made to make sure that the volume and journal
|
|
info is not changed while the service was not running.
|
|
|
|
VOLUME ROOT OBJECTID MISMATCH CHECK:
|
|
In case of a mismatch the information in the Db is updated with the
|
|
correct value for the volume guid.
|
|
|
|
JOURNAL ID MISMATCH CHECK:
|
|
In case of a mismatch the replica set is marked to be deleted.
|
|
|
|
|
|
|
|
Arguments:
|
|
|
|
Replica: Replica being opened
|
|
|
|
pVmeArg: A pointer to return the Volume Monitor Entry in.
|
|
|
|
ConfigRecord: The ConfigTqable record for this replica set.
|
|
|
|
|
|
Return Value:
|
|
|
|
Win32 status
|
|
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlOpen:"
|
|
|
|
USN_JOURNAL_DATA UsnJournalData;
|
|
CREATE_USN_JOURNAL_DATA CreateUsnJournalData = {
|
|
0, // MaximumSize from registry
|
|
JRNL_DEFAULT_ALLOC_DELTA // AllocationDelta
|
|
};
|
|
IO_STATUS_BLOCK Iosb;
|
|
ULONG JournalSize;
|
|
NTSTATUS Status;
|
|
DWORD WStatus;
|
|
ULONG BytesReturned;
|
|
PVOLUME_MONITOR_ENTRY pVme;
|
|
HANDLE RootHandle;
|
|
HANDLE VolumeHandle = INVALID_HANDLE_VALUE;
|
|
ULONG VolumeInfoLength;
|
|
PFILE_FS_VOLUME_INFORMATION VolumeInfo;
|
|
FILE_OBJECTID_BUFFER ObjectIdBuffer;
|
|
PLIST_ENTRY Entry;
|
|
WCHAR VolumeRootDir[MAX_PATH + 1];
|
|
CHAR GuidStr[GUID_CHAR_LEN];
|
|
CHAR TimeString[TIME_STRING_LENGTH];
|
|
CHAR HashTableName[40];
|
|
PCOMMAND_PACKET CmdPkt = NULL;
|
|
HANDLE DummyHandle = INVALID_HANDLE_VALUE;
|
|
ULARGE_INTEGER FreeBytesAvailableToCaller;
|
|
ULARGE_INTEGER TotalNumberOfBytes;
|
|
|
|
*pVmeArg = NULL;
|
|
|
|
//
|
|
// Does the volume exist and is it NTFS?
|
|
//
|
|
WStatus = FrsVerifyVolume(Replica->Volume,
|
|
Replica->SetName->Name,
|
|
FILE_PERSISTENT_ACLS | FILE_SUPPORTS_OBJECT_IDS);
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
DPRINT2_WS(3, ":S: JrnlOpen - Root path Volume (%ws) for %ws does not exist or is not NTFS;",
|
|
Replica->Volume, Replica->SetName->Name, WStatus);
|
|
Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
|
|
return WStatus;
|
|
}
|
|
|
|
//
|
|
// "\\.\" is used as an escape prefix to prevent the name translator
|
|
// from appending a trailing "\" on a drive letter. Need to do a volume open.
|
|
// \\.\E: gets mapped to E: (really an NT internal device name)
|
|
// \\.\E:\ gets mapped to E:\
|
|
// E: gets mapped to E:\
|
|
// E:\ gets mapped to E:\
|
|
//
|
|
|
|
//
|
|
// Get a volume handle.
|
|
//
|
|
_wcsupr( Replica->Volume );
|
|
VolumeHandle = CreateFile(Replica->Volume,
|
|
GENERIC_READ | GENERIC_WRITE,
|
|
FILE_SHARE_READ | FILE_SHARE_WRITE,
|
|
NULL,
|
|
OPEN_EXISTING,
|
|
FILE_ATTRIBUTE_NORMAL,
|
|
NULL );
|
|
|
|
if (!HANDLE_IS_VALID(VolumeHandle)) {
|
|
WStatus = GetLastError();
|
|
DPRINT1_WS(0, "++ ERROR - JrnlOpen: Unable to open %ws volume :",
|
|
Replica->Volume, WStatus);
|
|
Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
|
|
return WStatus;
|
|
} else {
|
|
WStatus = GetLastError();
|
|
DPRINT1_WS(4, "++ JrnlOpen: Open on volume %ws :", Replica->Volume, WStatus);
|
|
}
|
|
|
|
//
|
|
// Get the volume information.
|
|
//
|
|
pVme = FrsAllocType(VOLUME_MONITOR_ENTRY_TYPE);
|
|
pVme->FrsVsn = QUADZERO;
|
|
pVme->ReplayUsnValid = FALSE;
|
|
|
|
|
|
VolumeInfoLength = sizeof(FILE_FS_VOLUME_INFORMATION) +
|
|
MAXIMUM_VOLUME_LABEL_LENGTH;
|
|
|
|
VolumeInfo = &pVme->FSVolInfo;
|
|
|
|
Status = NtQueryVolumeInformationFile(VolumeHandle,
|
|
&Iosb,
|
|
VolumeInfo,
|
|
VolumeInfoLength,
|
|
FileFsVolumeInformation);
|
|
if ( NT_SUCCESS(Status) ) {
|
|
|
|
VolumeInfo->VolumeLabel[VolumeInfo->VolumeLabelLength/2] = UNICODE_NULL;
|
|
FileTimeToString((PFILETIME) &VolumeInfo->VolumeCreationTime, TimeString);
|
|
|
|
DPRINT5(4,":S: %-16ws (%d), %s, VSN: %08X, VolCreTim: %s\n",
|
|
VolumeInfo->VolumeLabel,
|
|
VolumeInfo->VolumeLabelLength,
|
|
(VolumeInfo->SupportsObjects ? "(obj)" : "(no-obj)"),
|
|
VolumeInfo->VolumeSerialNumber,
|
|
TimeString);
|
|
|
|
if (!VolumeInfo->SupportsObjects) {
|
|
//
|
|
// No object support on the volume.
|
|
//
|
|
EPRINT4(EVENT_FRS_VOLUME_NOT_SUPPORTED,
|
|
Replica->SetName->Name, ComputerName, Replica->Root, Replica->Volume);
|
|
DPRINT(0, ":S: ERROR - Object IDs are not supported on the volume.\n");
|
|
pVme = FrsFreeType(pVme);
|
|
FRS_CLOSE(VolumeHandle);
|
|
Replica->FStatus = FrsErrorUnsupportedFileSystem;
|
|
return FrsSetLastNTError(STATUS_NOT_IMPLEMENTED);
|
|
}
|
|
|
|
//
|
|
// Scan the VolumeMonitorStopQueue to see if we already tried
|
|
// this one and failed.
|
|
//
|
|
|
|
ForEachListEntry( &VolumeMonitorStopQueue, VOLUME_MONITOR_ENTRY, ListEntry,
|
|
|
|
if (pE->FSVolInfo.VolumeSerialNumber == VolumeInfo->VolumeSerialNumber) {
|
|
//
|
|
// Journaling was stopped on this volume by request. E.g.,
|
|
// when a replica set is stopped and restarted in order
|
|
// to pick up a new file or dir filter list.
|
|
//
|
|
// Allow the restart.
|
|
//
|
|
if (WIN_SUCCESS(pE->WStatus)) {
|
|
//
|
|
// No more references; free the memory
|
|
//
|
|
//
|
|
// Currently, replica sets continue to refererence
|
|
// their Vme even after VmeDeactivate(). So don't
|
|
// free Vmes regardless of their reference count
|
|
//
|
|
// if (pE->ReferenceCount == 0) {
|
|
// FrsRtlRemoveEntryQueueLock(&VolumeMonitorStopQueue,
|
|
// &pE->ListEntry);
|
|
// FrsFreeType(pE);
|
|
// }
|
|
continue;
|
|
}
|
|
|
|
//
|
|
// We already tried this one and failed. Free the entry,
|
|
// close the handle and return with same status as last time.
|
|
//
|
|
WStatus = pE->WStatus;
|
|
|
|
ReleaseListLock(&VolumeMonitorStopQueue);
|
|
|
|
DPRINT3(4,":S: VME is on stop queue. %-16ws, VSN: %08X, VolCreTim: %s\n",
|
|
VolumeInfo->VolumeLabel, VolumeInfo->VolumeSerialNumber,
|
|
TimeString);
|
|
FrsFreeType(pVme);
|
|
FRS_CLOSE(VolumeHandle);
|
|
return WStatus;
|
|
}
|
|
);
|
|
|
|
} else {
|
|
DPRINT_NT(0, ":S: ERROR - Volume root QueryVolumeInformationFile failed.", Status);
|
|
pVme = FrsFreeType(pVme);
|
|
FRS_CLOSE(VolumeHandle);
|
|
Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
|
|
return FrsSetLastNTError(Status);
|
|
}
|
|
|
|
//
|
|
// Get the volume root dir object ID.
|
|
// Always open the replica root by masking off the FILE_OPEN_REPARSE_POINT flag
|
|
// because we want to open the destination dir not the junction if the root
|
|
// happens to be a mount point.
|
|
//
|
|
wsprintf( VolumeRootDir, TEXT("%ws\\"), Replica->Volume);
|
|
WStatus = FrsOpenSourceFileW(&RootHandle,
|
|
VolumeRootDir,
|
|
WRITE_ACCESS, OPEN_OPTIONS & ~FILE_OPEN_REPARSE_POINT);
|
|
|
|
if (WIN_ACCESS_DENIED(WStatus)) {
|
|
//
|
|
// For some mysterious reason the root dir on some volumes ends up
|
|
// with the read-only attribute set. It is currently not understood
|
|
// how this happens (as of 6/2000) but PSS has seen it on a number
|
|
// of cases, generally when DCPromo fails because FRS can't init
|
|
// the sys vol. We are going to just clear it here and try again.
|
|
// Unfortunately the ATTRIB cmd does not work on the root dir.
|
|
//
|
|
FILE_BASIC_INFORMATION BasicInfo;
|
|
HANDLE hFile;
|
|
|
|
WStatus = FrsOpenSourceFileW(&hFile,
|
|
VolumeRootDir,
|
|
READ_ATTRIB_ACCESS | FILE_WRITE_ATTRIBUTES,
|
|
OPEN_OPTIONS & ~FILE_OPEN_REPARSE_POINT);
|
|
DPRINT1_WS(0, "++ JrnlOpen: Open on root dir %ws :", VolumeRootDir, WStatus);
|
|
|
|
if (HANDLE_IS_VALID(hFile)) {
|
|
|
|
Status = NtQueryInformationFile( hFile,
|
|
&Iosb,
|
|
&BasicInfo,
|
|
sizeof( BasicInfo ),
|
|
FileBasicInformation );
|
|
if (NT_SUCCESS( Status )) {
|
|
|
|
DPRINT2(0,"Attributes for %s are currently: %0x\n",
|
|
VolumeRootDir, BasicInfo.FileAttributes );
|
|
|
|
if (BooleanFlagOn(BasicInfo.FileAttributes , FILE_ATTRIBUTE_READONLY)) {
|
|
ClearFlag(BasicInfo.FileAttributes , FILE_ATTRIBUTE_READONLY);
|
|
|
|
Status = NtSetInformationFile( hFile,
|
|
&Iosb,
|
|
&BasicInfo,
|
|
sizeof( BasicInfo ),
|
|
FileBasicInformation );
|
|
if (NT_SUCCESS( Status )) {
|
|
DPRINT(0, "Read-Only attribute cleared succesfully\n" );
|
|
//
|
|
// ******** Add event log message saying what we did.
|
|
//
|
|
|
|
} else {
|
|
DPRINT_NT(0, "Couldn't set attributes, error status :", Status );
|
|
}
|
|
}
|
|
|
|
CloseHandle( hFile );
|
|
|
|
//
|
|
// Now retry the open.
|
|
//
|
|
WStatus = FrsOpenSourceFileW(&RootHandle,
|
|
VolumeRootDir,
|
|
WRITE_ACCESS, OPEN_OPTIONS & ~FILE_OPEN_REPARSE_POINT);
|
|
} else {
|
|
DPRINT_NT(0, "Couldn't get attributes, error status :", Status );
|
|
WStatus = FrsSetLastNTError(Status);
|
|
CloseHandle( hFile );
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
DPRINT1_WS(0, ":S: ERROR - Failed to open the volume root dir: %ws ;",
|
|
VolumeRootDir, WStatus);
|
|
|
|
pVme = FrsFreeType(pVme);
|
|
FRS_CLOSE(VolumeHandle);
|
|
Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
|
|
return WStatus;
|
|
}
|
|
|
|
//
|
|
// zero the buffer in case the data that comes back is short.
|
|
//
|
|
ZeroMemory(&ObjectIdBuffer, sizeof(FILE_OBJECTID_BUFFER));
|
|
|
|
//
|
|
// Get the Object ID from the volume root.
|
|
//
|
|
Status = NtFsControlFile(
|
|
RootHandle, // file handle
|
|
NULL, // event
|
|
NULL, // apc routine
|
|
NULL, // apc context
|
|
&Iosb, // iosb
|
|
FSCTL_GET_OBJECT_ID, // FsControlCode
|
|
&RootHandle, // input buffer
|
|
sizeof(HANDLE), // input buffer length
|
|
&ObjectIdBuffer, // OutputBuffer for data from the FS
|
|
sizeof(FILE_OBJECTID_BUFFER)); // OutputBuffer Length
|
|
|
|
if (NT_SUCCESS(Status)) {
|
|
GuidToStr((GUID *)ObjectIdBuffer.ObjectId, GuidStr);
|
|
DPRINT1(4, ":S: Oid for volume root is %s\n", GuidStr );
|
|
} else
|
|
if (Status == STATUS_NOT_IMPLEMENTED) {
|
|
DPRINT1_NT(0, ":S: ERROR - FSCTL_GET_OBJECT_ID failed on file %ws. Object IDs are not enabled on the volume.\n",
|
|
VolumeRootDir, Status);
|
|
Replica->FStatus = FrsErrorUnsupportedFileSystem;
|
|
}
|
|
|
|
//
|
|
// If there is no object ID on the root directory put one there.
|
|
// Date : 02/07/2000
|
|
// STATUS_OBJECT_NAME_NOT_FOUND was the old return value
|
|
// and STATUS_OBJECTID_NOT_FOUND is the new return value.
|
|
// Check for both so it works on systems running older and
|
|
// newer ntfs.sys
|
|
//
|
|
if (Status == STATUS_OBJECT_NAME_NOT_FOUND ||
|
|
Status == STATUS_OBJECTID_NOT_FOUND ) {
|
|
|
|
FrsUuidCreate((GUID *)ObjectIdBuffer.ObjectId);
|
|
|
|
Status = NtFsControlFile(
|
|
RootHandle, // file handle
|
|
NULL, // event
|
|
NULL, // apc routine
|
|
NULL, // apc context
|
|
&Iosb, // iosb
|
|
FSCTL_SET_OBJECT_ID, // FsControlCode
|
|
&ObjectIdBuffer, // input buffer
|
|
sizeof(FILE_OBJECTID_BUFFER),// input buffer length
|
|
NULL, // OutputBuffer for data from the FS
|
|
0); // OutputBuffer Length
|
|
|
|
if (NT_SUCCESS(Status)) {
|
|
GuidToStr((GUID *)ObjectIdBuffer.ObjectId, GuidStr);
|
|
DPRINT1(4, ":S: Oid set on volume root is %s\n", GuidStr );
|
|
} else {
|
|
DPRINT1(0, ":S: ERROR - FSCTL_SET_OBJECT_ID failed on volume root %ws.\n",
|
|
VolumeRootDir);
|
|
|
|
Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
|
|
if (Status == STATUS_NOT_IMPLEMENTED) {
|
|
DPRINT(0, ":S: ERROR - Object IDs are not enabled on the volume.\n");
|
|
Replica->FStatus = FrsErrorUnsupportedFileSystem;
|
|
} else
|
|
if (Status == STATUS_ACCESS_DENIED) {
|
|
DPRINT(0, ":S: ERROR - Access Denied.\n");
|
|
} else {
|
|
DPRINT_NT(0, "ERROR - NtFsControlFile(FSCTL_SET_OBJECT_ID) failed.", Status);
|
|
}
|
|
}
|
|
}
|
|
|
|
FRS_CLOSE(RootHandle);
|
|
|
|
//
|
|
// If object IDs don't work on the volume then bail.
|
|
//
|
|
if (!NT_SUCCESS(Status)) {
|
|
pVme = FrsFreeType(pVme);
|
|
FRS_CLOSE(VolumeHandle);
|
|
return FrsSetLastNTError(Status);
|
|
}
|
|
|
|
|
|
//
|
|
// VOLUME ROOT OBJECTID MISMATCH CHECK:
|
|
//
|
|
// Keep the Volume root guid up-to-date in the Db. If it has changed then update it in the config record.
|
|
//
|
|
if (!GUIDS_EQUAL(&(ObjectIdBuffer.ObjectId), &(ConfigRecord->FSVolGuid))) {
|
|
|
|
DPRINT1(4,"WARN - Volume root guid mismatch for Replica Set (%ws)\n",Replica->ReplicaName->Name);
|
|
|
|
GuidToStr((GUID *)ObjectIdBuffer.ObjectId, GuidStr);
|
|
DPRINT1(4,"WARN - Volume root guid (FS) (%s)\n",GuidStr);
|
|
|
|
GuidToStr((GUID *)&(ConfigRecord->FSVolGuid), GuidStr);
|
|
DPRINT1(4,"WARN - Volume root guid (DB) (%s)\n",GuidStr);
|
|
|
|
DPRINT1(0,"WARN - Volume root guid updated for Replica Set (%ws)\n",Replica->ReplicaName->Name);
|
|
|
|
COPY_GUID(&(ConfigRecord->FSVolGuid), &(ObjectIdBuffer.ObjectId));
|
|
Replica->NeedsUpdate = TRUE;
|
|
}
|
|
|
|
//
|
|
// Scan the VolumeMonitorQueue to see if we are already doing this one.
|
|
//
|
|
FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
|
|
|
|
ForEachListEntryLock(&VolumeMonitorQueue, VOLUME_MONITOR_ENTRY, ListEntry,
|
|
//
|
|
// Consider changing this test to use the guid on the vol root dir.
|
|
//
|
|
if (pE->FSVolInfo.VolumeSerialNumber == VolumeInfo->VolumeSerialNumber) {
|
|
|
|
//
|
|
// Already monitoring this volume. Free entry and close handle.
|
|
//
|
|
FrsFreeType(pVme);
|
|
pVme = pE;
|
|
FRS_CLOSE(VolumeHandle);
|
|
|
|
//
|
|
// Release the lock and Return the Volume Monitor entry pointer.
|
|
//
|
|
//pVme->ActiveReplicas += 1;
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
DPRINT1(4, ":S: Volume %ws already monitored.\n", pVme->FSVolInfo.VolumeLabel);
|
|
//
|
|
// JOURNAL ID MISMATCH CHECK:
|
|
//
|
|
// If LastShutdown is 0 then this is the very first time we have started
|
|
// replication on this replica set so set the current CndUsnJournalID in
|
|
// the config record. Even if Lastshutdown is not 0 CnfUsnJournalID could
|
|
// be 0 because it was not getting correctly updated in Win2K.
|
|
//
|
|
if ((ConfigRecord->LastShutdown == (ULONGLONG)0) ||
|
|
(ConfigRecord->ServiceState == CNF_SERVICE_STATE_CREATING) ||
|
|
(ConfigRecord->CnfUsnJournalID == (ULONGLONG)0)) {
|
|
|
|
//
|
|
// Update the JournalID in the Db and set NeedsUpdate so that the
|
|
// config record gets written to the Db at the next update call.
|
|
//
|
|
ConfigRecord->CnfUsnJournalID = pVme->UsnJournalData.UsnJournalID;
|
|
Replica->NeedsUpdate = TRUE;
|
|
|
|
} else
|
|
//
|
|
// Check if the JournalID from pVme matches with the CnfUsnJournalID from the
|
|
// config record for this replica set. If it does not then it means that
|
|
// this replica set has been moved. Returning error here will trigger
|
|
// a deletion of the replica set. The set will be recreated at the next
|
|
// poll cycle and it will either be primary or non-auth depending on the
|
|
// case.
|
|
//
|
|
|
|
if (ConfigRecord->CnfUsnJournalID != pVme->UsnJournalData.UsnJournalID) {
|
|
//
|
|
// Usn Journal has a new instance code. ==> A delete / create occurred.
|
|
// Treat it as a journal wrap error.
|
|
//
|
|
|
|
DPRINT1(0,"ERROR - JournalID mismatch for Replica Set (%ws)\n",Replica->ReplicaName->Name);
|
|
DPRINT2(0,"ERROR - JournalID %x(FS) != %x(DB)\n",
|
|
pVme->UsnJournalData.UsnJournalID, ConfigRecord->CnfUsnJournalID);
|
|
DPRINT1(0,"ERROR - Replica Set (%ws) is marked to be deleted\n",Replica->ReplicaName->Name);
|
|
|
|
Replica->FStatus = FrsErrorMismatchedJournalId;
|
|
JrnlSetReplicaState(Replica, REPLICA_STATE_MISMATCHED_JOURNAL_ID);
|
|
return ERROR_REVISION_MISMATCH;
|
|
}
|
|
*pVmeArg = pVme;
|
|
Replica->FStatus = FrsErrorSuccess;
|
|
return ERROR_SUCCESS;
|
|
}
|
|
);
|
|
|
|
|
|
//
|
|
// Create the Usn Journal if it does not exist.
|
|
//
|
|
CfgRegReadDWord(FKC_NTFS_JRNL_SIZE, NULL, 0, &JournalSize);
|
|
CreateUsnJournalData.MaximumSize = (ULONGLONG)JournalSize * (ULONGLONG)(1024 * 1024);
|
|
|
|
DPRINT2(4, ":S: Creating NTFS USN Journal on %ws with size %d MB\n",
|
|
Replica->Volume, JournalSize );
|
|
|
|
Status = NtFsControlFile( VolumeHandle,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
&Iosb,
|
|
FSCTL_CREATE_USN_JOURNAL,
|
|
&CreateUsnJournalData,
|
|
sizeof(CreateUsnJournalData),
|
|
NULL,
|
|
0 );
|
|
|
|
//
|
|
// Query the journal for the Journal ID, the USN info, etc.
|
|
//
|
|
|
|
if (!DeviceIoControl(VolumeHandle,
|
|
FSCTL_QUERY_USN_JOURNAL,
|
|
NULL,
|
|
0,
|
|
&pVme->UsnJournalData,
|
|
sizeof(USN_JOURNAL_DATA),
|
|
&BytesReturned,
|
|
NULL)) {
|
|
|
|
WStatus = GetLastError();
|
|
|
|
DPRINT1_WS(4, ":S: JrnlOpen: FSCTL_QUERY_USN_JOURNAL on volume %ws :",
|
|
Replica->Volume, WStatus);
|
|
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
pVme = FrsFreeType(pVme);
|
|
FRS_CLOSE(VolumeHandle);
|
|
|
|
if (GetDiskFreeSpaceEx(Replica->Root,&FreeBytesAvailableToCaller,&TotalNumberOfBytes,NULL)) {
|
|
//
|
|
// Print the event log message if the available free space is
|
|
// less than 1%. The current problem to initialize
|
|
// the journal could be due to low disk space.
|
|
//
|
|
if ((FreeBytesAvailableToCaller.QuadPart*100) < TotalNumberOfBytes.QuadPart) {
|
|
if ((Replica->Volume != NULL) && (wcslen(Replica->Volume) >= wcslen(L"\\\\.\\D:"))) {
|
|
//
|
|
// If we are able to get the volume in the form
|
|
// \\.\D: then use the volume in the event log so
|
|
// that we don't print more than one event log
|
|
// message per volume. If we can't get the
|
|
// volume then we print the path.
|
|
//
|
|
EPRINT1(EVENT_FRS_OUT_OF_DISK_SPACE, &Replica->Volume[4]);
|
|
} else {
|
|
EPRINT1(EVENT_FRS_OUT_OF_DISK_SPACE, Replica->Root);
|
|
}
|
|
}
|
|
DPRINT3(4, ":S: Disk space check: %ws FreeBytesAvailableToCaller = %08x %08x,TotalNumberOfBytes = %08x %08x\n",
|
|
Replica->Root,
|
|
PRINTQUAD(FreeBytesAvailableToCaller.QuadPart),
|
|
PRINTQUAD(TotalNumberOfBytes.QuadPart));
|
|
}
|
|
|
|
Replica->FStatus = FrsErrorJournalInitFailed;
|
|
return WStatus;
|
|
}
|
|
|
|
|
|
if (BytesReturned != sizeof(USN_JOURNAL_DATA)) {
|
|
|
|
WStatus = GetLastError();
|
|
|
|
DPRINT2(4, "JrnlOpen: FSCTL_QUERY_USN_JOURNAL bytes returnd: %d, Expected: %d\n",
|
|
BytesReturned, sizeof(USN_JOURNAL_DATA));
|
|
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
pVme = FrsFreeType(pVme);
|
|
FRS_CLOSE(VolumeHandle);
|
|
Replica->FStatus = FrsErrorJournalInitFailed;
|
|
return WStatus;
|
|
}
|
|
|
|
//
|
|
// Display the USN Journal Data.
|
|
//
|
|
DPRINT1(4, ":S: UsnJournalID %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.UsnJournalID ));
|
|
DPRINT1(4, ":S: FirstUsn %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.FirstUsn ));
|
|
DPRINT1(4, ":S: NextUsn %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.NextUsn ));
|
|
DPRINT1(4, ":S: LowestValidUsn %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.LowestValidUsn ));
|
|
DPRINT1(4, ":S: MaxUsn %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.MaxUsn ));
|
|
DPRINT1(4, ":S: MaximumSize %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.MaximumSize ));
|
|
DPRINT1(4, ":S: AllocationDelta %08x %08x\n", PRINTQUAD(pVme->UsnJournalData.AllocationDelta));
|
|
|
|
//
|
|
// If the NextUsn is 0 then create a dummy file to increment the usn
|
|
// so that we don't end up picking up a valid change at usn 0.
|
|
//
|
|
if (pVme->UsnJournalData.NextUsn == QUADZERO) {
|
|
|
|
FrsCreateFileRelativeById(&DummyHandle,
|
|
Replica->PreInstallHandle,
|
|
NULL,
|
|
0,
|
|
FILE_ATTRIBUTE_TEMPORARY,
|
|
L"NTFRS_TEMP_FILE.TMP",
|
|
(USHORT)(wcslen(L"NTFRS_TEMP_FILE.TMP") * sizeof(WCHAR)),
|
|
NULL,
|
|
FILE_OPEN_IF,
|
|
RESTORE_ACCESS | DELETE);
|
|
|
|
if (HANDLE_IS_VALID(DummyHandle)) {
|
|
FrsDeleteByHandle(L"NTFRS_TEMP_FILE.TMP", DummyHandle);
|
|
}
|
|
|
|
FRS_CLOSE(DummyHandle);
|
|
|
|
}
|
|
|
|
//
|
|
//
|
|
// JOURNAL ID MISMATCH CHECK:
|
|
//
|
|
// If LastShutdown is 0 then this is the very first time we have started
|
|
// replication on this replica set so set the current pVme->JrnlReadPoint to
|
|
// the end of the Journal. Also save the Journal ID so we can detect if
|
|
// someone does a delete/create cycle on the journal.
|
|
// There are cases when the replica set gets created
|
|
// and then shutdown without ever initializing.
|
|
//
|
|
if ((ConfigRecord->LastShutdown == (ULONGLONG)0) ||
|
|
(ConfigRecord->ServiceState == CNF_SERVICE_STATE_CREATING) ||
|
|
(ConfigRecord->CnfUsnJournalID == (ULONGLONG)0)) {
|
|
|
|
ConfigRecord->CnfUsnJournalID = pVme->UsnJournalData.UsnJournalID;
|
|
Replica->NeedsUpdate = TRUE;
|
|
} else
|
|
if (ConfigRecord->CnfUsnJournalID != pVme->UsnJournalData.UsnJournalID) {
|
|
//
|
|
// Usn Journal has a new instance code. ==> A delete / create occurred.
|
|
// Treat it as a journal wrap error.
|
|
//
|
|
Replica->FStatus = FrsErrorMismatchedJournalId;
|
|
JrnlSetReplicaState(Replica, REPLICA_STATE_MISMATCHED_JOURNAL_ID);
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
pVme = FrsFreeType(pVme);
|
|
FRS_CLOSE(VolumeHandle);
|
|
return ERROR_REVISION_MISMATCH;
|
|
}
|
|
|
|
//
|
|
// Re-open the volume to allow for asynchronous IO. We don't
|
|
// open with the "OVERLAPPED" flag initially because then the
|
|
// above "create journal" doesn't finish in time for us to post
|
|
// a "read journal" request. We get a "INVALID_DEVICE_STATE"
|
|
// status.
|
|
//
|
|
FRS_CLOSE(VolumeHandle);
|
|
VolumeHandle = CreateFile(Replica->Volume,
|
|
GENERIC_READ | GENERIC_WRITE,
|
|
FILE_SHARE_READ | FILE_SHARE_WRITE,
|
|
NULL,
|
|
OPEN_EXISTING,
|
|
FILE_FLAG_OVERLAPPED,
|
|
NULL );
|
|
|
|
WStatus = GetLastError();
|
|
|
|
if (!HANDLE_IS_VALID(VolumeHandle)) {
|
|
DPRINT1_WS(0, "Can't open file %ws;", Replica->Volume, WStatus);
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
pVme = FrsFreeType(pVme);
|
|
Replica->FStatus = FrsErrorVolumeRootDirOpenFail;
|
|
return WStatus;
|
|
} else {
|
|
DPRINT1(4, ":S: JrnlOpen: Open on volume %ws\n", Replica->Volume);
|
|
}
|
|
|
|
//
|
|
// This is a new volume journal add it to the list.
|
|
//
|
|
pVme->VolumeHandle = VolumeHandle;
|
|
pVme->DriveLetter[0] = Replica->Volume[wcslen(Replica->Volume) - 2];
|
|
pVme->DriveLetter[1] = Replica->Volume[wcslen(Replica->Volume) - 1];
|
|
pVme->DriveLetter[2] = UNICODE_NULL;
|
|
|
|
//
|
|
// Associate the volume handle with the completion port.
|
|
//
|
|
JournalCompletionPort = CreateIoCompletionPort(
|
|
VolumeHandle,
|
|
JournalCompletionPort,
|
|
(ULONG_PTR) pVme, // key associated with this handle
|
|
0);
|
|
|
|
if (NT_SUCCESS(Status) && (JournalCompletionPort != NULL)) {
|
|
|
|
//
|
|
// Set the ref count and put the new entry on the queue.
|
|
// This will get the JournalReadThread to start looking at the
|
|
// completion port. Save the volume handle.
|
|
//
|
|
pVme->VolumeHandle = VolumeHandle;
|
|
pVme->ActiveReplicas = 0;
|
|
//
|
|
// Start Ref count at 2. One for being on the VolumeMonitorQueue and
|
|
// one for the initial allocation. The latter is released at VME shutdown.
|
|
//
|
|
pVme->ReferenceCount = 2;
|
|
pVme->JournalState = JRNL_STATE_INITIALIZING;
|
|
FrsRtlInsertTailQueueLock(&VolumeMonitorQueue, &pVme->ListEntry);
|
|
|
|
DPRINT2(4, ":S: Create Usn Journal success on %ws, Total vols: %d\n",
|
|
pVme->FSVolInfo.VolumeLabel, VolumeMonitorQueue.Count);
|
|
} else {
|
|
|
|
//
|
|
// Journal creation or CreateIoCompletionPort failed. Clean up.
|
|
//
|
|
WStatus = GetLastError();
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
|
|
DPRINT_NT(0, ":S: ERROR - Create Usn Journal failed.", Status );
|
|
|
|
if (JournalCompletionPort == NULL) {
|
|
DPRINT_WS(0, ":S: ERROR - Failed to create IoCompletion port.", WStatus);
|
|
Status = STATUS_UNSUCCESSFUL;
|
|
}
|
|
|
|
pVme = FrsFreeType(pVme);
|
|
FRS_CLOSE(VolumeHandle);
|
|
Replica->FStatus = FrsErrorJournalInitFailed;
|
|
return FrsSetLastNTError(Status);
|
|
}
|
|
|
|
//
|
|
// Find end of journal for use in recovery and new replica set creates.
|
|
//
|
|
WStatus = JrnlGetEndOfJournal(pVme, &pVme->JrnlRecoveryEnd);
|
|
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
pVme = FrsFreeType(pVme);
|
|
FRS_CLOSE(VolumeHandle);
|
|
Replica->FStatus = FrsErrorJournalInitFailed;
|
|
return WStatus;
|
|
}
|
|
|
|
DPRINT1(3, ":S: Current End of journal at : %08x %08x\n", PRINTQUAD(pVme->JrnlRecoveryEnd));
|
|
|
|
if ((ConfigRecord->LastShutdown == (ULONGLONG)0) ||
|
|
(ConfigRecord->ServiceState == CNF_SERVICE_STATE_CREATING) ||
|
|
(ConfigRecord->CnfUsnJournalID == (ULONGLONG)0)) {
|
|
|
|
pVme->JrnlReadPoint = pVme->JrnlRecoveryEnd;
|
|
DPRINT1(4, ":S: Initial journal read starting at: %08x %08x\n", PRINTQUAD(pVme->JrnlReadPoint));
|
|
}
|
|
|
|
//
|
|
// Allocate a volume filter hash table.
|
|
//
|
|
_snprintf(HashTableName, sizeof(HashTableName), "FT_%ws", VolumeInfo->VolumeLabel);
|
|
|
|
pVme->FilterTable = GhtCreateTable(
|
|
HashTableName, // Table name
|
|
VOLUME_FILTER_HASH_TABLE_ROWS, // NumberRows
|
|
OFFSET(FILTER_TABLE_ENTRY, DFileID), // KeyOffset is dir fid
|
|
sizeof(LONGLONG), // KeyLength
|
|
JrnlHashEntryFree,
|
|
JrnlCompareFid,
|
|
JrnlHashCalcFid,
|
|
FRS_JOURNAL_FILTER_PRINT_FUNCTION);
|
|
|
|
//
|
|
// Allocate a parent File ID hash table for the volume.
|
|
//
|
|
// The volume parent file ID table is a specialzed Qhash table intended to
|
|
// economize on memory. There is an entry in this table for every file
|
|
// in a replica set on the volume. There is one of these tables for each
|
|
// volume. Its goal in life is to give us the Old Parent Fid for a file
|
|
// after a rename. The USN journal only provides the new Parent FID.
|
|
// Once we have the old parent FID for a file or dir we can then do a lookup
|
|
// in the Volume Filter Table to determine the file's previous replica set
|
|
// so we can determine if a file or dir has moved across replica sets or
|
|
// out of a replica set entirely.
|
|
//
|
|
//
|
|
pVme->ParentFidTable = FrsAllocTypeSize(QHASH_TABLE_TYPE,
|
|
PARENT_FILEID_TABLE_SIZE);
|
|
SET_QHASH_TABLE_HASH_CALC(pVme->ParentFidTable, JrnlHashCalcFid);
|
|
|
|
//
|
|
// Allocate an Active Child hash table for the volume.
|
|
//
|
|
pVme->ActiveChildren = FrsAllocTypeSize(QHASH_TABLE_TYPE,
|
|
ACTIVE_CHILDREN_TABLE_SIZE);
|
|
|
|
SET_QHASH_TABLE_FLAG(pVme->ActiveChildren, QHASH_FLAG_LARGE_KEY);
|
|
SET_QHASH_TABLE_HASH_CALC2(pVme->ActiveChildren, ActiveChildrenHashCalc);
|
|
SET_QHASH_TABLE_KEY_MATCH(pVme->ActiveChildren, ActiveChildrenKeyMatch);
|
|
SET_QHASH_TABLE_FREE(pVme->ActiveChildren, FrsFree);
|
|
//
|
|
// Allocate a USN Write Filter Table for the volume and post the first
|
|
// clean request.
|
|
//
|
|
pVme->FrsWriteFilter = FrsAllocTypeSize(QHASH_TABLE_TYPE,
|
|
FRS_WRITE_FILTER_SIZE);
|
|
SET_QHASH_TABLE_HASH_CALC(pVme->FrsWriteFilter, JrnlHashCalcUsn);
|
|
JrnlSubmitCleanWriteFilter(pVme, JRNL_CLEAN_WRITE_FILTER_INTERVAL);
|
|
|
|
|
|
#ifdef RECOVERY_CONFLICT
|
|
//
|
|
// Allocate a Recovery Conflict hash table for the volume.
|
|
//
|
|
pVme->RecoveryConflictTable = FrsAllocTypeSize(QHASH_TABLE_TYPE,
|
|
RECOVERY_CONFLICT_TABLE_SIZE);
|
|
SET_QHASH_TABLE_HASH_CALC(pVme->RecoveryConflictTable, JrnlHashCalcFid);
|
|
#endif // RECOVERY_CONFLICT
|
|
|
|
//
|
|
// Allocate a hash table to record file name dependencies between file
|
|
// operations on this volume in the NTFS journal USN record stream.
|
|
// This is called the Name Space Table and it is used to control when
|
|
// a USN record can be merged into a prior change order affecting the same
|
|
// file. Some examples of when a USN record merge can not be done are
|
|
// given elsewhere, search for USN MERGE RESTRICTIONS.
|
|
//
|
|
pVme->NameSpaceTable = FrsFreeType(pVme->NameSpaceTable);
|
|
pVme->NameSpaceTable = FrsAllocTypeSize(QHASH_TABLE_TYPE, NAME_SPACE_TABLE_SIZE);
|
|
SET_QHASH_TABLE_HASH_CALC(pVme->NameSpaceTable, NoHashBuiltin);
|
|
|
|
//
|
|
// Allocate a hash table to record file old names on a rename operation.
|
|
// THe index is the File ID, the data field has a ptr to a USN record.
|
|
//
|
|
pVme->RenOldNameTable = FrsFreeType(pVme->RenOldNameTable);
|
|
pVme->RenOldNameTable = FrsAllocTypeSize(QHASH_TABLE_TYPE, RENAME_OLD_TABLE_SIZE);
|
|
SET_QHASH_TABLE_HASH_CALC(pVme->RenOldNameTable, JrnlHashCalcFid);
|
|
SET_QHASH_TABLE_FREE(pVme->RenOldNameTable, FrsFree);
|
|
|
|
//
|
|
// Allocate a Change Order Aging table for this volume.
|
|
//
|
|
sprintf(HashTableName, "CO_%ws", VolumeInfo->VolumeLabel);
|
|
|
|
pVme->ChangeOrderTable = GhtCreateTable(
|
|
HashTableName, // Table name
|
|
REPLICA_CHANGE_ORDER_HASH_TABLE_ROWS, // NumberRows
|
|
REPLICA_CHANGE_ORDER_ENTRY_KEY, // KeyOffset
|
|
REPLICA_CHANGE_ORDER_ENTRY_KEY_LENGTH, // KeyLength
|
|
JrnlHashEntryFree,
|
|
JrnlCompareFid,
|
|
JrnlHashCalcFid,
|
|
FRS_JOURNAL_CHANGE_ORDER_PRINT_FUNCTION);
|
|
|
|
//
|
|
// Allocate an Active Inbound Change Order hash table for this volume.
|
|
//
|
|
sprintf(HashTableName, "AIBCO_%ws", VolumeInfo->VolumeLabel);
|
|
|
|
pVme->ActiveInboundChangeOrderTable = GhtCreateTable(
|
|
HashTableName, // Table name
|
|
ACTIVE_INBOUND_CHANGE_ORDER_HASH_TABLE_ROWS, // NumberRows
|
|
REPLICA_CHANGE_ORDER_FILEGUID_KEY, // KeyOffset
|
|
REPLICA_CHANGE_ORDER_FILEGUID_KEY_LENGTH, // KeyLength
|
|
JrnlHashEntryFree,
|
|
JrnlCompareGuid,
|
|
JrnlHashCalcGuid,
|
|
FRS_JOURNAL_CHANGE_ORDER_PRINT_FUNCTION);
|
|
|
|
//
|
|
// Add the volume change order list to the global change order list.
|
|
//
|
|
FrsInitializeQueue(&pVme->ChangeOrderList, &FrsVolumeLayerCOList);
|
|
pVme->InitTime = GetTickCount();
|
|
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
|
|
//
|
|
// Return the Volume Monitor entry pointer.
|
|
//
|
|
*pVmeArg = pVme;
|
|
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
|
|
|
|
#if 0
|
|
ULONG
|
|
JrnlCheckStartFailures(
|
|
PFRS_QUEUE Queue
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Check for any failures where we couldn't get the first journal read started.
|
|
|
|
Arguments:
|
|
|
|
A queue with Volume Monitor Entries on it.
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS if all journal reads started. (the list is empty).
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlCheckStartFailures:"
|
|
|
|
PLIST_ENTRY Entry;
|
|
PVOLUME_MONITOR_ENTRY pVme;
|
|
ULONG WStatus, RetStatus;
|
|
|
|
|
|
FrsRtlAcquireQueueLock(Queue);
|
|
|
|
Entry = GetListHead(&Queue->ListHead);
|
|
|
|
if (Entry == &Queue->ListHead) {
|
|
DPRINT(4, ":S: JrnlCheckStartFailures - Queue empty.\n");
|
|
}
|
|
|
|
RetStatus = ERROR_SUCCESS;
|
|
|
|
while (Entry != &Queue->ListHead) {
|
|
|
|
pVme = CONTAINING_RECORD(Entry, VOLUME_MONITOR_ENTRY, ListEntry);
|
|
|
|
WStatus = pVme->WStatus;
|
|
RetStatus = ERROR_GEN_FAILURE;
|
|
|
|
if (!WIN_SUCCESS(WStatus) && (WStatus != ERROR_IO_PENDING)) {
|
|
//
|
|
// The I/O was not started. Check error return.
|
|
//
|
|
|
|
if (WStatus == ERROR_NOT_FOUND) {
|
|
//
|
|
// Starting USN is not in the Journal. We may have missed
|
|
// some locally originated changes to the replica. This
|
|
// is very bad because we now have to walk the replica
|
|
// tree and the IDTable to see what has changed.
|
|
//
|
|
// Walk the replica sets using this VME and compare their
|
|
// starting USNs with the oldest USN record available on
|
|
// the volume. If it's there then we can at least start
|
|
// those replica sets. Whats left has to be handled the
|
|
// long way.
|
|
//
|
|
//
|
|
// add code to sync up the tree
|
|
//
|
|
DPRINT1(0, ":S: Usn %08lx %08lx has been deleted.\n",
|
|
PRINTQUAD(pVme->JrnlReadPoint));
|
|
DPRINT(0, ":S: Data lost, resync required on Replica ...\n");
|
|
JrnlClose(pVme->VolumeHandle);
|
|
} else {
|
|
DPRINT_WS(0, "Error from JrnlCheckStartFailures", WStatus);
|
|
DPRINT1(0, ":S: ERROR - Replication not started for any replica sets on volume %ws\n",
|
|
pVme->FSVolInfo.VolumeLabel);
|
|
}
|
|
} else {
|
|
DPRINT_WS(0, "Error from JrnlCheckStartFailures", WStatus);
|
|
DPRINT1(0, ":S: ERROR - Replication should have been started for replica sets on volume %ws\n",
|
|
pVme->FSVolInfo.VolumeLabel);
|
|
}
|
|
|
|
Entry = GetListNext(Entry);
|
|
}
|
|
|
|
FrsRtlReleaseQueueLock(Queue);
|
|
return RetStatus;
|
|
}
|
|
#endif
|
|
|
|
|
|
ULONG
|
|
JrnlPauseVolume(
|
|
IN PVOLUME_MONITOR_ENTRY pVme,
|
|
IN DWORD MilliSeconds
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Pause journal read activity on the specified volume. This routine
|
|
queues a completion packet to the journal read thread telling it
|
|
to pause I/O the volume. We then then wait on the event handle in
|
|
the Vme struct.
|
|
|
|
Once the read thread stops I/O on the volume it queues a CMD_JOURNAL_PAUSED
|
|
packet to the journal process queue. When this command is processed we
|
|
know that any prior journal buffers that have been queued for this
|
|
volume are now processed so we can signal the event to let the waiter
|
|
proceed.
|
|
|
|
Arguments:
|
|
|
|
pVme: The volume to pause.
|
|
|
|
MilliSeconds - Timeout
|
|
|
|
Return Value:
|
|
|
|
Win32 status
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlPauseVolume:"
|
|
|
|
ULONG WStatus;
|
|
ULONG RetryCount = 10;
|
|
|
|
DPRINT2(5, "***** Pause on Volume %ws - Journal State: %s *****\n",
|
|
pVme->FSVolInfo.VolumeLabel, RSS_NAME(pVme->JournalState));
|
|
|
|
RETRY:
|
|
|
|
FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
|
|
|
|
//
|
|
// Check if paused already.
|
|
//
|
|
if ((pVme->JournalState == JRNL_STATE_PAUSED) ||
|
|
(pVme->JournalState == JRNL_STATE_INITIALIZING)) {
|
|
WStatus = ERROR_SUCCESS;
|
|
goto RETURN;
|
|
}
|
|
|
|
//
|
|
// Check if pause is in progress.
|
|
//
|
|
if ((pVme->JournalState == JRNL_STATE_PAUSE1) ||
|
|
(pVme->JournalState == JRNL_STATE_PAUSE2)) {
|
|
goto WAIT;
|
|
}
|
|
|
|
//
|
|
// If I/O is not active on this volume then request is invalid.
|
|
//
|
|
if (pVme->JournalState != JRNL_STATE_ACTIVE) {
|
|
WStatus = ERROR_INVALID_FUNCTION;
|
|
goto RETURN;
|
|
}
|
|
|
|
//
|
|
// Submit the pause request to the journal read thread.
|
|
//
|
|
WStatus = JrnlSubmitReadThreadRequest(pVme,
|
|
FRS_PAUSE_JOURNAL_READ,
|
|
JRNL_STATE_PAUSE1);
|
|
if (WStatus == ERROR_BUSY) {
|
|
//
|
|
// Overlapped struct is in use. Retry a few times then bail.
|
|
//
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
if (--RetryCount == 0) {
|
|
return ERROR_BUSY;
|
|
}
|
|
Sleep(250);
|
|
goto RETRY;
|
|
}
|
|
|
|
WAIT:
|
|
//
|
|
// Drop the lock and wait on the event.
|
|
//
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
|
|
WStatus = WaitForSingleObject(pVme->Event, MilliSeconds);
|
|
CHECK_WAIT_ERRORS(3, WStatus, 1, ACTION_RETURN);
|
|
|
|
//
|
|
// Check the result state.
|
|
//
|
|
FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
|
|
|
|
WStatus = (pVme->JournalState == JRNL_STATE_PAUSED) ?
|
|
ERROR_SUCCESS : WAIT_FAILED;
|
|
|
|
RETURN:
|
|
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
return WStatus;
|
|
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlUnPauseVolume(
|
|
IN PVOLUME_MONITOR_ENTRY pVme,
|
|
IN PJBUFFER Jbuff,
|
|
IN BOOL HaveLock
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Un-Pause journal read activity on the specified volume.
|
|
This routine starts up journal read activity on a volume that has
|
|
been previously paused. It kicks off an async read on the volume
|
|
which will complete on the completion port.
|
|
|
|
This routine is called both to initially start activity on a Journal and
|
|
to start the next read on a journal.
|
|
|
|
If you are initiating the first journal read or restarting the journal
|
|
after a pause you need to set the journal state to JRNL_STATE_STARTING
|
|
before calling this routine. e.g.
|
|
|
|
pVme->JournalState = JRNL_STATE_STARTING;
|
|
|
|
On the very first call to start the journal the JournalState should
|
|
be JRNL_STATE_INITIALIZING. This causes an initial set of journal
|
|
data buffers to be allocated. Otherwise we get a buffer from the
|
|
JournalFreeQueue.
|
|
|
|
Arguments:
|
|
|
|
pVme: The volume to pause.
|
|
|
|
Jbuff: An optional caller supplied Journal buffer. If NULL we get
|
|
one off the free list here.
|
|
|
|
HaveLock: TRUE means the caller has acquired the volume monitor lock.
|
|
FALSE means we acquire it and release it here.
|
|
|
|
Return Value:
|
|
|
|
Win32 status
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlUnPauseVolume:"
|
|
|
|
PLIST_ENTRY Entry;
|
|
ULONG WStatus;
|
|
NTSTATUS Status;
|
|
BOOL AllocJbuff = (Jbuff == NULL);
|
|
ULONG SaveJournalState = JRNL_STATE_ERROR;
|
|
ULONG i;
|
|
LONG RetryCount;
|
|
|
|
DPRINT2(5, "***** UnPause on Volume %ws - Journal State: %s *****\n",
|
|
pVme->FSVolInfo.VolumeLabel, RSS_NAME(pVme->JournalState));
|
|
|
|
|
|
//
|
|
// Get the buffer first so we don't block waiting for a free buffer
|
|
// holding the VolumeMonitorQueue lock.
|
|
//
|
|
|
|
if (AllocJbuff) {
|
|
|
|
if (pVme->JournalState == JRNL_STATE_INITIALIZING) {
|
|
//
|
|
// Allocate a journal buffer from memory if this is a fresh start.
|
|
//
|
|
Jbuff = FrsAllocType(JBUFFER_TYPE);
|
|
//DPRINT1(5, "jb: Am %08x (alloc mem)\n", Jbuff);
|
|
} else {
|
|
//
|
|
// Get a journal buffer from the free list.
|
|
// We wait here until a buffer is available.
|
|
//
|
|
if (HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
|
|
Entry = FrsRtlRemoveHeadQueue(&JournalFreeQueue);
|
|
if (HaveLock) { FrsRtlAcquireQueueLock(&VolumeMonitorQueue); }
|
|
|
|
if (Entry == NULL) {
|
|
//
|
|
// Check for abort and cancel all I/O.
|
|
//
|
|
DPRINT(0, "ERROR-JournalFreeQueue Abort.\n");
|
|
if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
|
|
return ERROR_REQUEST_ABORTED;
|
|
}
|
|
|
|
Jbuff = CONTAINING_RECORD(Entry, JBUFFER, ListEntry);
|
|
//DPRINT1(5, "jb: ff %08x\n", Jbuff);
|
|
}
|
|
}
|
|
|
|
if (!HaveLock) { FrsRtlAcquireQueueLock(&VolumeMonitorQueue); }
|
|
|
|
//
|
|
// Check if paused already or stopped. If so, ignore the request.
|
|
//
|
|
if ((pVme->JournalState != JRNL_STATE_STARTING) &&
|
|
(pVme->JournalState != JRNL_STATE_INITIALIZING) &&
|
|
(pVme->JournalState != JRNL_STATE_ACTIVE)) {
|
|
if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
|
|
WStatus = ERROR_SUCCESS;
|
|
goto ERROR_RETURN;
|
|
}
|
|
|
|
//
|
|
// If there is already an I/O active don't start another. This can happen
|
|
// when the IOCancel() from a previous Pause request fails to abort the
|
|
// current journal read immediately. Now the unpause request starts a
|
|
// second I/O on the volume. In theory this should be benign since the
|
|
// cancel from the first pause will abort the first read request and the
|
|
// 2nd should complete normally.
|
|
//
|
|
// For now just mark the journal as Active again so when the currently
|
|
// outstanding request completes (or aborts) another read request is issued.
|
|
//
|
|
if (pVme->ActiveIoRequests != 0) {
|
|
DPRINT1(3, "UnPause on volume with non-zero ActiveIoRequest Count: %d\n",
|
|
pVme->ActiveIoRequests);
|
|
if (pVme->ReplayUsnValid) {
|
|
DPRINT(3, "Replay USN is valid. Waiting for ActiveIoRequest to go to zero\n");
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
//
|
|
// Unfortunately if this call is from the journal read thread
|
|
// v.s. another thread unpausing the volume the journal read
|
|
// thread won't be able to decrement the ActiveIoRequests.
|
|
//
|
|
Sleep(5000);
|
|
FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
|
|
|
|
if (pVme->ActiveIoRequests != 0) {
|
|
DPRINT1(3, "ActiveIoRequest still non-zero: %d. Skip replay\n",
|
|
pVme->ActiveIoRequests);
|
|
pVme->ReplayUsnValid = FALSE;
|
|
}
|
|
}
|
|
|
|
//
|
|
// The requests have not yet finished. For now just mark the
|
|
// journal as Active again so when the currently outstanding
|
|
// request completes (or aborts) another read request is issued.
|
|
//
|
|
if (pVme->ActiveIoRequests != 0) {
|
|
pVme->IoActive = TRUE;
|
|
SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_ACTIVE);
|
|
|
|
if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
|
|
|
|
WStatus = ERROR_SUCCESS;
|
|
goto ERROR_RETURN;
|
|
}
|
|
|
|
//
|
|
// FALL THRU means startup a read on the journal.
|
|
//
|
|
}
|
|
|
|
|
|
|
|
//
|
|
// If we are just starting up or restarting from a pause and the
|
|
// Replay USN is valid then start reading from there.
|
|
//
|
|
if ((pVme->JournalState != JRNL_STATE_ACTIVE) && pVme->ReplayUsnValid) {
|
|
DPRINT1(4, "JrnlReadPoint was: %08x %08x\n", PRINTQUAD(pVme->JrnlReadPoint));
|
|
pVme->JrnlReadPoint = pVme->ReplayUsn;
|
|
pVme->ReplayUsnValid = FALSE;
|
|
DPRINT1(4, "Loading JrnlReadPoint from ReplayUsn: %08x %08x\n", PRINTQUAD(pVme->ReplayUsn));
|
|
}
|
|
|
|
pVme->IoActive = TRUE;
|
|
pVme->StopIo = FALSE; // VME Overlap struct available.
|
|
|
|
SaveJournalState = pVme->JournalState;
|
|
if (pVme->JournalState != JRNL_STATE_ACTIVE) {
|
|
SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_ACTIVE);
|
|
}
|
|
|
|
pVme->ActiveIoRequests += 1;
|
|
FRS_ASSERT(pVme->ActiveIoRequests == 1);
|
|
|
|
|
|
if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
|
|
|
|
//
|
|
// Post a read on this journal handle to get things started.
|
|
// Note ownership of the buffer goes to another thread via the
|
|
// I/O Completion port so we can't change or look at it
|
|
// (without a lock) unless the read failed. Even if the read
|
|
// completes synchronously the I/O still completes via the port.
|
|
// The same is true of the related VME struct.
|
|
//
|
|
// An NTSTATUS return of STATUS_JOURNAL_ENTRY_DELETED means the requested
|
|
// USN record is no longer in the Journal (i.e. the journal has
|
|
// wrapped). The corresponding win32 error is ERROR_JOURNAL_ENTRY_DELETED.
|
|
//
|
|
|
|
RetryCount = 100;
|
|
|
|
RETRY_READ:
|
|
Status = FrsIssueJournalAsyncRead(Jbuff, pVme);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
|
|
if (!HaveLock) { FrsRtlAcquireQueueLock(&VolumeMonitorQueue); }
|
|
if (Status == STATUS_JOURNAL_ENTRY_DELETED) {
|
|
DPRINT(0, " +-+-+-+-+-+- JOURNAL WRAPPED +-+-+-+-+-+-+-+-+-+-\n");
|
|
|
|
//
|
|
// The journal wrapped.
|
|
//
|
|
SET_JOURNAL_AND_REPLICA_STATE(pVme, REPLICA_STATE_JRNL_WRAP_ERROR);
|
|
|
|
} else
|
|
if ((Status == STATUS_JOURNAL_DELETE_IN_PROGRESS) ||
|
|
(Status == STATUS_JOURNAL_NOT_ACTIVE)) {
|
|
|
|
DPRINT(0, " +-+-+-+-+-+- ERROR RETURN FROM FrsIssueJournalAsyncRead +-+-+-+-+-+-+-+-+-+-\n");
|
|
DPRINT(0, "Journal is or is being deleted. FRS requires the NTFS Journal.\n");
|
|
DisplayNTStatus(Status);
|
|
SET_JOURNAL_AND_REPLICA_STATE(pVme, REPLICA_STATE_JRNL_WRAP_ERROR);
|
|
|
|
} else
|
|
if (Status == STATUS_DATA_ERROR) {
|
|
//
|
|
// Internal NTFS detected errors: e.g.
|
|
// - Usn record size is not quad-aligned
|
|
// - Usn record size extends beyond the end of the Usn page
|
|
// - Usn record size isn't large enough to contain the Usn record
|
|
// - Usn record size extends beyond end of usn journal
|
|
//
|
|
DPRINT(0, " +-+-+-+-+-+- ERROR RETURN FROM FrsIssueJournalAsyncRead +-+-+-+-+-+-+-+-+-+-\n");
|
|
DPRINT(0, "Journal internal inconsistency detected by NTFS.\n");
|
|
DisplayNTStatus(Status);
|
|
SET_JOURNAL_AND_REPLICA_STATE(pVme, REPLICA_STATE_JRNL_WRAP_ERROR);
|
|
|
|
} else {
|
|
DPRINT(0, " +-+-+-+-+-+- ERROR RETURN FROM FrsIssueJournalAsyncRead +-+-+-+-+-+-+-+-+-+-\n");
|
|
DPRINT_NT(0, "ERROR - FrsIssueJournalAsyncRead : ", Status);
|
|
DPRINT_NT(0, "ERROR - FrsIssueJournalAsyncRead Iosb.Status: ", Jbuff->Iosb.Status);
|
|
|
|
if ((Status == STATUS_INVALID_PARAMETER) && (RetryCount-- > 0)) {
|
|
if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
|
|
Sleep(500);
|
|
goto RETRY_READ;
|
|
}
|
|
|
|
SET_JOURNAL_AND_REPLICA_STATE(pVme, REPLICA_STATE_JRNL_WRAP_ERROR);
|
|
// FRS_ASSERT(FALSE);
|
|
}
|
|
//
|
|
// Restore old journal state.
|
|
//
|
|
pVme->JournalState = SaveJournalState;
|
|
pVme->ActiveIoRequests -= 1;
|
|
FRS_ASSERT(pVme->ActiveIoRequests == 0);
|
|
|
|
if (!HaveLock) { FrsRtlReleaseQueueLock(&VolumeMonitorQueue); }
|
|
|
|
WStatus = FrsSetLastNTError(Status);
|
|
DPRINT_WS(0, "Error from FrsIssueJournalAsyncRead", WStatus);
|
|
//
|
|
// Error starting the read. Free Jbuff and return the error.
|
|
//
|
|
|
|
goto ERROR_RETURN;
|
|
}
|
|
|
|
//
|
|
// IO has started. If this was a fresh start add a few more buffers
|
|
// on the free list so there are enough to work with.
|
|
//
|
|
if (SaveJournalState == JRNL_STATE_INITIALIZING) {
|
|
for (i=0; i<(NumberOfJounalBuffers-1); i++) {
|
|
Jbuff = FrsAllocType(JBUFFER_TYPE);
|
|
//DPRINT1(5, "jb: Am %08x (alloc mem)\n", Jbuff);
|
|
FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
|
|
}
|
|
}
|
|
|
|
return ERROR_SUCCESS;
|
|
|
|
ERROR_RETURN:
|
|
|
|
//
|
|
// If we allocated a journal buffer here then give it back.
|
|
//
|
|
if (AllocJbuff && (Jbuff != NULL)) {
|
|
if (SaveJournalState == JRNL_STATE_INITIALIZING) {
|
|
//DPRINT1(5, "jb: fm %08x (free mem)\n", Jbuff);
|
|
Jbuff = FrsFreeType(Jbuff);
|
|
} else {
|
|
//DPRINT1(5, "jb: tf %08x\n", Jbuff);
|
|
FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
|
|
}
|
|
}
|
|
|
|
return WStatus;
|
|
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlSubmitReadThreadRequest(
|
|
IN PVOLUME_MONITOR_ENTRY pVme,
|
|
IN ULONG Request,
|
|
IN ULONG NewState
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine posts a completion status packet on the journal I/O
|
|
completion port. This is used to either stop journal I/O or just
|
|
pause it while making changes to the filter table. When the journal
|
|
read thread gets the request it will cancel journal I/O on the volume
|
|
handle (which can only be done from that thread). If the post is
|
|
successful then the JournalState is updated with NewState.
|
|
|
|
We Assume the caller has acquired the VolumeMonitorQueue lock.
|
|
|
|
Arguments:
|
|
|
|
pVme - the volume monitor entry with the state for this volume's journal.
|
|
|
|
Request - The request type. Either FRS_CANCEL_JOURNAL_READ or
|
|
FRS_PAUSE_JOURNAL_READ.
|
|
|
|
NewState - The new state for the journal if the submit succeeds.
|
|
|
|
Return Value:
|
|
|
|
A WIN32 status.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlSubmitReadThreadRequest:"
|
|
|
|
ULONG WStatus;
|
|
PCHAR ReqStr;
|
|
|
|
|
|
if (Request == FRS_CANCEL_JOURNAL_READ) {
|
|
ReqStr = "cancel journal read";
|
|
|
|
} else
|
|
if (Request == FRS_PAUSE_JOURNAL_READ) {
|
|
ReqStr = "pause journal read";
|
|
|
|
} else {
|
|
DPRINT1(0, "ERROR - Invalid journal request: %08x\n", Request);
|
|
return ERROR_INVALID_PARAMETER;
|
|
}
|
|
|
|
if (pVme->StopIo) {
|
|
return ERROR_BUSY;
|
|
}
|
|
|
|
if (JournalCompletionPort == NULL) {
|
|
return ERROR_INVALID_HANDLE;
|
|
}
|
|
|
|
DPRINT2(5, "Queueing %s IO req on Volume %ws.\n",
|
|
ReqStr, pVme->FSVolInfo.VolumeLabel);
|
|
|
|
//
|
|
// Clear the pVme event if the request is to start a stop or pause sequence.
|
|
// Mark the overlapped struct busy,
|
|
// Submit the pause request to the journal read thread.
|
|
//
|
|
if ((NewState == JRNL_STATE_STOPPING) ||
|
|
(NewState == JRNL_STATE_PAUSE1)) {
|
|
ResetEvent(pVme->Event);
|
|
}
|
|
|
|
pVme->StopIo = TRUE;
|
|
|
|
if (!PostQueuedCompletionStatus(
|
|
JournalCompletionPort,
|
|
Request,
|
|
(ULONG_PTR) pVme,
|
|
&pVme->CancelOverlap)) {
|
|
|
|
WStatus = GetLastError();
|
|
DPRINT2_WS(0, "ERROR - Failed on PostQueuedCompletionStatus of %s on %ws :",
|
|
ReqStr, pVme->FSVolInfo.VolumeLabel, WStatus);
|
|
return WStatus;
|
|
}
|
|
|
|
//
|
|
// pkt submited. Update state.
|
|
//
|
|
pVme->JournalState = NewState;
|
|
|
|
DPRINT1(5, "Packet submitted. Jrnl state is %s\n", RSS_NAME(NewState));
|
|
|
|
return ERROR_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
ULONG
|
|
JrnlShutdownSingleReplica(
|
|
IN PREPLICA Replica,
|
|
IN BOOL HaveLock
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Detach this replica from its journal. Decrement the ActiveReplicas count
|
|
on the VME. If zero post a completion packet to the JournalCompletionPort
|
|
so the pending journal read request can be canceled by the read thread.
|
|
If no journal thread is active we do it all here.
|
|
|
|
If the volume monitor queue is left empty, we close the completion port.
|
|
|
|
The caller must have acquired the pVme->ReplicaListHead lock.
|
|
|
|
Arguments:
|
|
|
|
Replica -- Replica set to detach.
|
|
|
|
HaveLock -- TRUE if the caller has acquired the VolumeMonitorQueue
|
|
lock else we get it here.
|
|
|
|
Return Value:
|
|
|
|
Win32 status.
|
|
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlShutdownSingleReplica:"
|
|
|
|
ULONG GStatus;
|
|
LIST_ENTRY DeadList;
|
|
PFRS_QUEUE FrsTempList;
|
|
ULONG WStatus = ERROR_SUCCESS;
|
|
PVOLUME_MONITOR_ENTRY pVme = Replica->pVme;
|
|
|
|
|
|
DPRINT1(4, ":S: <<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
|
|
|
|
if (!HaveLock) {
|
|
FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
|
|
FrsRtlAcquireQueueLock(&pVme->ReplicaListHead);
|
|
}
|
|
|
|
if (pVme->ActiveReplicas == 0) {
|
|
DPRINT1(0, ":S: ActiveReplicas count already zero on %ws\n",
|
|
pVme->FSVolInfo.VolumeLabel);
|
|
WStatus = ERROR_INVALID_HANDLE;
|
|
goto RETURN;
|
|
}
|
|
|
|
//
|
|
// It is possible that this replica struct never made it onto the list
|
|
// if it went into the error state during init or startup.
|
|
//
|
|
if (Replica->VolReplicaList.Flink == NULL) {
|
|
DPRINT2(0, ":S: WARN: Replica struct not on pVme ReplicaListHead for on %ws. Current replica State: %s\n",
|
|
pVme->FSVolInfo.VolumeLabel, RSS_NAME(Replica->ServiceState));
|
|
WStatus = ERROR_INVALID_HANDLE;
|
|
goto RETURN;
|
|
}
|
|
|
|
//
|
|
// Remove replica from the VME list.
|
|
//
|
|
FrsRtlRemoveEntryListLock(&pVme->ReplicaListHead, &Replica->VolReplicaList);
|
|
pVme->ActiveReplicas -= 1;
|
|
ReleaseVmeRef(pVme);
|
|
|
|
DPRINT3(4, "Removed %ws from VME %ws. %d Replicas remain.\n",
|
|
Replica->ReplicaName->Name, pVme->FSVolInfo.VolumeLabel,
|
|
pVme->ActiveReplicas);
|
|
|
|
//
|
|
// IF this is the last active Replica on the volume then stop
|
|
// I/O on the journal.
|
|
//
|
|
if (!IsListEmpty(&pVme->ReplicaListHead.ListHead)) {
|
|
WStatus = ERROR_SUCCESS;
|
|
goto RETURN;
|
|
}
|
|
|
|
if (pVme->ActiveReplicas != 0) {
|
|
DPRINT2(0, ":S: ERROR - pVme->ReplicaListHead is empty but ActiveReplicas count is non-zero (%d) on %ws\n",
|
|
pVme->ActiveReplicas, pVme->FSVolInfo.VolumeLabel);
|
|
DPRINT(0, ":S: ERROR - Stopping the journal anyway\n");
|
|
pVme->ActiveReplicas = 0;
|
|
}
|
|
|
|
//
|
|
// This is the last Replica set on the volume. Stop the journal.
|
|
//
|
|
if (!HANDLE_IS_VALID(JournalReadThreadHandle)) {
|
|
|
|
//
|
|
// There is no Journal thread. Put the VME on the
|
|
// stop queue and Close the handle here.
|
|
//
|
|
FrsRtlRemoveEntryQueueLock(&VolumeMonitorQueue, &pVme->ListEntry);
|
|
pVme->IoActive = FALSE;
|
|
pVme->WStatus = ERROR_SUCCESS;
|
|
|
|
SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_STOPPED);
|
|
|
|
DPRINT1(0, ":S: FrsRtlInsertTailQueue -- onto stop queue %08x\n", pVme);
|
|
FrsRtlInsertTailQueue(&VolumeMonitorStopQueue, &pVme->ListEntry);
|
|
|
|
FRS_CLOSE(pVme->VolumeHandle);
|
|
ReleaseVmeRef(pVme);
|
|
|
|
if ((VolumeMonitorQueue.Count == 0) &&
|
|
(JournalCompletionPort != NULL)) {
|
|
//
|
|
// Close the completion port.
|
|
//
|
|
// FRS_CLOSE(JournalCompletionPort);
|
|
}
|
|
|
|
} else {
|
|
//
|
|
// if I/O not already stopping, queue a completion packet
|
|
// to the journal read thread to cancel the I/O.
|
|
// The journal read thread will then put the VME on the
|
|
// VolumeMonitorStopQueue. If we did it here the VME would
|
|
// go to the Stop queue and the ActiveReplicas count would
|
|
// be decremented before I/O has actually stopped on the journal.
|
|
//
|
|
WStatus = JrnlSubmitReadThreadRequest(pVme,
|
|
FRS_CANCEL_JOURNAL_READ,
|
|
JRNL_STATE_STOPPING);
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
DPRINT2(0, ":S: ERROR: JrnlSubmitReadThreadRequest to stop Journal Failed on %ws. Current Journal State: %s\n",
|
|
pVme->FSVolInfo.VolumeLabel, RSS_NAME(pVme->JournalState));
|
|
DPRINT_WS(0, "ERROR: Status is", WStatus);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if (DoDebug(5, DEBSUB)) {
|
|
// "TEST CODE VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV"
|
|
DPRINT(5, "\n");
|
|
DPRINT1(5, "==== start of volume change order hash table dump for %ws ===========\n",
|
|
pVme->FSVolInfo.VolumeLabel);
|
|
DPRINT(5, "\n");
|
|
GHT_DUMP_TABLE(5, pVme->ChangeOrderTable);
|
|
DPRINT(5, "\n");
|
|
DPRINT(5, "========= End of Change order hash table dump ================\n");
|
|
DPRINT(5, "\n");
|
|
|
|
|
|
|
|
DPRINT(5, "\n");
|
|
DPRINT1(5, "==== start of USN write filter table dump for %ws ===========\n",
|
|
pVme->FSVolInfo.VolumeLabel);
|
|
DPRINT(5, "\n");
|
|
|
|
QHashEnumerateTable(pVme->FrsWriteFilter, QHashDump, NULL);
|
|
DPRINT(5, "\n");
|
|
DPRINT(5, "==== End of USN write filter table dump ===========\n");
|
|
DPRINT(5, "\n");
|
|
|
|
|
|
DPRINT(5, "\n");
|
|
DPRINT1(5, "==== start of recovery conflict table dump for %ws ===========\n",
|
|
pVme->FSVolInfo.VolumeLabel);
|
|
DPRINT(5, "\n");
|
|
|
|
|
|
#ifdef RECOVERY_CONFLICT
|
|
QHashEnumerateTable(pVme->RecoveryConflictTable, QHashDump, NULL);
|
|
DPRINT(5, "\n");
|
|
DPRINT(5, "==== End of recovery conflict table dump ===========\n");
|
|
DPRINT(5, "\n");
|
|
#endif // RECOVERY_CONFLICT
|
|
}
|
|
|
|
// "TEST CODE ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
|
|
|
|
GHT_DUMP_TABLE(3, pVme->ActiveInboundChangeOrderTable);
|
|
|
|
//
|
|
// Drop the initial allocation ref so the count can drop to zero
|
|
// when the last reference is released.
|
|
//
|
|
ReleaseVmeRef(pVme);
|
|
|
|
RETURN:
|
|
if (!HaveLock) {
|
|
FrsRtlReleaseQueueLock(&pVme->ReplicaListHead);
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
}
|
|
return WStatus;
|
|
}
|
|
|
|
|
|
|
|
VOID
|
|
JrnlCleanupVme(
|
|
IN PVOLUME_MONITOR_ENTRY pVme
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Free the VME storage when the ref count goes to zero. Called by the
|
|
ReleaseVmeRef() macro. Don't free the Vme proper because other threads
|
|
may still try to take out a ref on the Vme and they will test the ref count
|
|
for zero and fail.
|
|
|
|
Arguments:
|
|
|
|
pVme -- Volume Monitor Entry to close.
|
|
|
|
Return Value:
|
|
|
|
Win32 status.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlCleanupVme:"
|
|
|
|
|
|
USN PurgeUsn;
|
|
|
|
DPRINT1(4, "<<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
|
|
|
|
if (pVme->ActiveReplicas != 0) {
|
|
DPRINT1(0, "ERROR - ActiveReplicas not yet zero on %ws\n",
|
|
pVme->FSVolInfo.VolumeLabel);
|
|
FRS_ASSERT(!"ActiveReplicas not yet zero on volume");
|
|
return;
|
|
}
|
|
|
|
#if 0
|
|
// Note: Don't delete the CO process queue here since CO Accept may still be cleaning up
|
|
// same with aging cache (ChangeOrderTable) and ActiveInboundChangeOrderTable
|
|
FrsRtlDeleteQueue(&pVme->ChangeOrderList);
|
|
|
|
GhtDestroyTable(pVme->ChangeOrderTable);
|
|
pVme->ChangeOrderTable = NULL;
|
|
|
|
//
|
|
// Cleanup the Active inbound CO Table.
|
|
//
|
|
GhtDestroyTable(pVme->ActiveInboundChangeOrderTable);
|
|
pVme->ActiveInboundChangeOrderTable = NULL;
|
|
#endif
|
|
|
|
//
|
|
// Release the Filter Table.
|
|
//
|
|
GhtDestroyTable(pVme->FilterTable);
|
|
pVme->FilterTable = NULL;
|
|
//
|
|
// Release the parent file ID table, the active children table,
|
|
// and the Volume Write Filter.
|
|
//
|
|
pVme->ParentFidTable = FrsFreeType(pVme->ParentFidTable);
|
|
pVme->FrsWriteFilter = FrsFreeType(pVme->FrsWriteFilter);
|
|
pVme->ActiveChildren = FrsFreeType(pVme->ActiveChildren);
|
|
|
|
#ifdef RECOVERY_CONFLICT
|
|
pVme->RecoveryConflictTable = FrsFreeType(pVme->RecoveryConflictTable);
|
|
#endif // RECOVERY_CONFLICT
|
|
|
|
|
|
DPRINT(4, "\n");
|
|
DPRINT1(4, "==== start of NameSpaceTable table dump for %ws ===========\n",
|
|
pVme->FSVolInfo.VolumeLabel);
|
|
DPRINT(4, "\n");
|
|
|
|
QHashEnumerateTable(pVme->NameSpaceTable, QHashDump, NULL);
|
|
DPRINT(4, "\n");
|
|
DPRINT(4, "==== End of NameSpaceTable table dump ===========\n");
|
|
DPRINT(4, "\n");
|
|
|
|
pVme->NameSpaceTable = FrsFreeType(pVme->NameSpaceTable);
|
|
|
|
//
|
|
// Remove all the entries from the RENAME_OLD_NAME table and free the table.
|
|
//
|
|
PurgeUsn = MAXLONGLONG;
|
|
QHashEnumerateTable(pVme->RenOldNameTable,
|
|
JrnlPurgeOldRenameWorker,
|
|
&PurgeUsn);
|
|
|
|
pVme->RenOldNameTable = FrsFreeType(pVme->RenOldNameTable);
|
|
|
|
// Note: stick the vme on a storage cleanup list
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlCloseVme(
|
|
IN PVOLUME_MONITOR_ENTRY pVme
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Close this Volume Monitor Entry by doing a shutdown on all replicas.
|
|
|
|
We assume the caller has taken the monitor queue lock.
|
|
|
|
Arguments:
|
|
|
|
pVme -- Volume Monitor Entry to close.
|
|
|
|
Return Value:
|
|
|
|
Win32 status.
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlCloseVme:"
|
|
|
|
ULONG WStatus = ERROR_SUCCESS;
|
|
|
|
|
|
DPRINT1(4, "<<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
|
|
|
|
if (pVme->ActiveReplicas == 0) {
|
|
DPRINT1(1, "ActiveReplicas count already zero on %ws\n",
|
|
pVme->FSVolInfo.VolumeLabel);
|
|
return ERROR_INVALID_HANDLE;
|
|
}
|
|
|
|
//
|
|
// Remove all active replicas from the VME list.
|
|
//
|
|
ForEachListEntry( &pVme->ReplicaListHead, REPLICA, VolReplicaList,
|
|
//
|
|
// The iterator pE is type PREPLICA.
|
|
// Caller must have taken the monitor queue lock to avoid lock order prob.
|
|
//
|
|
WStatus = JrnlShutdownSingleReplica(pE, TRUE);
|
|
DPRINT_WS(0, "Error from JrnlShutdownSingleReplica", WStatus);
|
|
);
|
|
|
|
if (pVme->ActiveReplicas != 0) {
|
|
DPRINT2(0, "ActiveReplicas count should be zero on %ws. It is %d\n",
|
|
pVme->FSVolInfo.VolumeLabel, pVme->ActiveReplicas);
|
|
WStatus = ERROR_GEN_FAILURE;
|
|
} else {
|
|
WStatus = ERROR_SUCCESS;
|
|
}
|
|
|
|
return WStatus;
|
|
}
|
|
|
|
ULONG
|
|
JrnlCloseAll(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Close all entries on the VolumeMonitorQueue.
|
|
|
|
Arguments:
|
|
|
|
None.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlCloseAll:"
|
|
|
|
ULONG WStatus;
|
|
|
|
DPRINT1(4, "<<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
|
|
|
|
if (IsListEmpty(&VolumeMonitorQueue.ListHead)) {
|
|
DPRINT(4, "JrnlCloseAll - VolumeMonitorQueue empty.\n");
|
|
}
|
|
|
|
//
|
|
// When all the volumes are stopped journal thread should exit instead
|
|
// of looking for work.
|
|
//
|
|
KillJournalThreads = TRUE;
|
|
ForEachListEntry(&VolumeMonitorQueue, VOLUME_MONITOR_ENTRY, ListEntry,
|
|
|
|
WStatus = JrnlCloseVme(pE);
|
|
|
|
if (pE->JournalState == JRNL_STATE_STOPPED) {
|
|
continue;
|
|
}
|
|
|
|
//
|
|
// Drop the lock and wait for the event.
|
|
//
|
|
if (pE->JournalState == JRNL_STATE_STOPPING) {
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
|
|
WStatus = WaitForSingleObject(pE->Event, 2000);
|
|
CHECK_WAIT_ERRORS(3, WStatus, 1, ACTION_CONTINUE);
|
|
|
|
//
|
|
// Check the result state.
|
|
//
|
|
FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
|
|
|
|
if (pE->JournalState == JRNL_STATE_STOPPED) {
|
|
continue;
|
|
}
|
|
}
|
|
DPRINT2(1, "ERROR: Request to stop Journal Failed on %ws. Current Journal State: %s\n",
|
|
pE->FSVolInfo.VolumeLabel, RSS_NAME(pE->JournalState));
|
|
//
|
|
// Force it onto the stopped queue and set the state to ERROR.
|
|
//
|
|
if (pE->IoActive) {
|
|
SET_JOURNAL_AND_REPLICA_STATE(pE, JRNL_STATE_ERROR);
|
|
|
|
VmeDeactivate(&VolumeMonitorQueue, pE, WStatus);
|
|
}
|
|
);
|
|
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlClose(
|
|
IN HANDLE VolumeHandle
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine walks the VolumeMonitorQueue looking for the entry with the
|
|
given VolumeHandle. It then decrements the reference count and if zero
|
|
we post a completion packet to the JournalCompletionPort so the pending
|
|
journal read request can be canceled.
|
|
|
|
Arguments:
|
|
|
|
VolumeHandle -- The handle of the volume to close.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlClose:"
|
|
|
|
ULONG WStatus;
|
|
BOOL Found;
|
|
|
|
DPRINT1(4, "<<<<<<<...E N T E R I N G -- %s...>>>>>>>>\n", DEBSUB);
|
|
|
|
Found = FALSE;
|
|
|
|
ForEachListEntry(&VolumeMonitorQueue, VOLUME_MONITOR_ENTRY, ListEntry,
|
|
|
|
if (pE->VolumeHandle == VolumeHandle) {
|
|
|
|
//
|
|
// Handle matches. Close the Volume Monitor Entry.
|
|
//
|
|
Found = TRUE;
|
|
WStatus = JrnlCloseVme(pE);
|
|
if (pE->JournalState == JRNL_STATE_STOPPED) {
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Drop the lock and wait for the event.
|
|
//
|
|
if (pE->JournalState == JRNL_STATE_STOPPING) {
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
|
|
WStatus = WaitForSingleObject(pE->Event, 2000);
|
|
CHECK_WAIT_ERRORS(3, WStatus, 1, ACTION_CONTINUE);
|
|
|
|
//
|
|
// Check the result state.
|
|
//
|
|
FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
|
|
|
|
if (pE->JournalState == JRNL_STATE_STOPPED) {
|
|
break;
|
|
}
|
|
}
|
|
DPRINT2(0, "ERROR: Request to stop Journal Failed on %ws. Current Journal State: %s\n",
|
|
pE->FSVolInfo.VolumeLabel, RSS_NAME(pE->JournalState));
|
|
//
|
|
// Force it onto the stopped queue and set the state to ERROR.
|
|
//
|
|
if (pE->IoActive) {
|
|
SET_JOURNAL_AND_REPLICA_STATE(pE, JRNL_STATE_ERROR);
|
|
VmeDeactivate(&VolumeMonitorQueue, pE, WStatus);
|
|
}
|
|
|
|
break;
|
|
}
|
|
);
|
|
|
|
if (!Found) {
|
|
DPRINT1(0, "ERROR - JrnlClose - Handle %08x not found in VolumeMonitorQueue\n",
|
|
VolumeHandle);
|
|
}
|
|
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
|
|
|
|
|
|
VOID
|
|
JrnlNewVsn(
|
|
IN PCHAR Debsub,
|
|
IN ULONG uLineNo,
|
|
IN PVOLUME_MONITOR_ENTRY pVme,
|
|
IN OUT PULONGLONG NewVsn
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Assign a new VSN for this volume. Save a recovery point after
|
|
VSN_SAVE_INTERVAL VSNs have been handed out.
|
|
|
|
Arguments:
|
|
|
|
Debsub -- name of Function calling us for trace.
|
|
uLineNo -- Linenumber of caller for trace.
|
|
pVme -- Volume Monitor Entry with the Vsn state.
|
|
NewVsn -- Ptr to return Vsn
|
|
|
|
Return Value:
|
|
|
|
Win32 status.
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlNewVsn:"
|
|
|
|
|
|
ULONGLONG TempVsn;
|
|
BOOL SaveFlag = FALSE;
|
|
|
|
|
|
LOCK_VME(pVme);
|
|
|
|
TempVsn = ++pVme->FrsVsn;
|
|
*NewVsn = TempVsn;
|
|
|
|
if ((TempVsn & (ULONGLONG) VSN_SAVE_INTERVAL) == QUADZERO) {
|
|
SaveFlag = TRUE;
|
|
|
|
|
|
DebPrint(4,
|
|
(PUCHAR) "++ VSN Save Triggered: NextVsn: %08x %08x"
|
|
" LastUsnSaved: %08x %08x CurrUsnDone: %08x %08x\n",
|
|
Debsub,
|
|
uLineNo,
|
|
PRINTQUAD(TempVsn),
|
|
PRINTQUAD(pVme->LastUsnSavePoint),
|
|
PRINTQUAD(pVme->CurrentUsnRecordDone));
|
|
|
|
if (pVme->LastUsnSavePoint < pVme->CurrentUsnRecordDone) {
|
|
pVme->LastUsnSavePoint = pVme->CurrentUsnRecordDone;
|
|
}
|
|
}
|
|
|
|
UNLOCK_VME(pVme);
|
|
|
|
if (SaveFlag) {
|
|
DbsRequestSaveMark(pVme, FALSE);
|
|
}
|
|
|
|
// Note: perf: check for change to use ExInterlockedAddLargeStatistic
|
|
// so we can pitch the LOCK_VME. Note the lock is also used to
|
|
// avoid quadword tearing on LastUsnSavePoint with USN save point
|
|
// test in the journal loop. Need to fix that too
|
|
|
|
}
|
|
|
|
|
|
|
|
NTSTATUS
|
|
FrsIssueJournalAsyncRead(
|
|
IN PJBUFFER Jbuff,
|
|
IN PVOLUME_MONITOR_ENTRY pVme
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine posts an async read to the journal specified by the handle
|
|
in the Vme using the buffer in Jbuff.
|
|
|
|
Note once the async I/O is submitted (and returns STATUS_PENDING)
|
|
the jbuffer and the VME go to another thread via the I/O Completion port
|
|
so neither we nor the caller can change or look at it unless
|
|
the read failed or completed synchronously (unless you have a lock).
|
|
This is because we could block right after the call, the I/O could complete
|
|
and the JournalReadThread could pick up and process the buffer before the
|
|
calling thread ever runs again.
|
|
|
|
Arguments:
|
|
|
|
Jbuff - The Journal Buffer to use for the read request.
|
|
|
|
pVme - The volume monitor entry for the Async Read,
|
|
|
|
Return Value:
|
|
|
|
NTSTATUS status
|
|
|
|
The win32 error status is ERROR_NOT_FOUND when the USN is not found in
|
|
the journal.
|
|
|
|
|
|
--*/
|
|
{
|
|
|
|
#undef DEBSUB
|
|
#define DEBSUB "FrsIssueJournalAsyncRead:"
|
|
|
|
NTSTATUS Status;
|
|
ULONG WStatus;
|
|
|
|
READ_USN_JOURNAL_DATA ReadUsnJournalData;
|
|
|
|
|
|
// Current journal poll delay in NTFS is 2 seconds (doesn't apply for async reads)
|
|
#define DELAY_TIME ((LONGLONG)(-20000000))
|
|
#define FRS_USN_REASON_FILTER (USN_REASON_CLOSE | \
|
|
USN_REASON_FILE_CREATE | \
|
|
USN_REASON_RENAME_OLD_NAME)
|
|
|
|
|
|
//
|
|
// Setup the journal read parameters. BytesToWaitFor set to sizeof(USN)+1
|
|
// causes the read journal call to return after the first entry is placed
|
|
// in the buffer. JrnlReadPoint is the point in the journal to start the read.
|
|
// ReturnOnlyOnClose = TRUE means the returned journal entries only
|
|
// include close records (bit <31> of Reason field is set to one).
|
|
// Otherwise you get a record when any reason bit is set, e.g. create,
|
|
// first write, ...
|
|
//
|
|
|
|
ReadUsnJournalData.StartUsn = pVme->JrnlReadPoint; // USN JrnlReadPoint
|
|
ReadUsnJournalData.ReasonMask = FRS_USN_REASON_FILTER; // ULONG ReasonMask
|
|
ReadUsnJournalData.ReturnOnlyOnClose = FALSE; // ULONG ReturnOnlyOnClose
|
|
ReadUsnJournalData.Timeout = DELAY_TIME; // ULONGLONG Timeout
|
|
ReadUsnJournalData.BytesToWaitFor = sizeof(USN)+1; // ULONGLONG BytesToWaitFor
|
|
ReadUsnJournalData.UsnJournalID = pVme->UsnJournalData.UsnJournalID; // Journal ID.
|
|
|
|
//
|
|
// This read completes when either the buffer is full or the BytesToWaitFor
|
|
// parameter in the ReadUsnJournalData parameter block is exceeded.
|
|
// The DelayTime in the ReadUsnJournalData parameter block controls how
|
|
// often the NTFS code wakes up and checks the buffer. It is NOT a timeout
|
|
// on this call. Setting BytesToWaitFor to sizeof(USN) + 1
|
|
// means that as soon as any data shows up in the journal the call completes.
|
|
// Using this call with async IO lets us monitor a large number of volumes
|
|
// with a few threads.
|
|
//
|
|
// You can't really have multiple read requests outstanding on a single
|
|
// journal since you don't know where the next read will start until the
|
|
// previous read completes. Even though only one I/O can be outstanding
|
|
// per volume journal it is still possible to have multiple Jbuffs queued
|
|
// for USN processing because the rate of generating new journal entries
|
|
// may exceed the rate at which the data can be processed.
|
|
//
|
|
|
|
//
|
|
// Init the buffer Descriptor.
|
|
//
|
|
Jbuff->pVme = pVme;
|
|
Jbuff->Iosb.Information = 0;
|
|
Jbuff->Iosb.Status = 0;
|
|
Jbuff->Overlap.hEvent = NULL;
|
|
Jbuff->JrnlReadPoint = pVme->JrnlReadPoint;
|
|
Jbuff->WStatus = ERROR_IO_PENDING;
|
|
Jbuff->FileHandle = pVme->VolumeHandle;
|
|
//
|
|
// To catch I/O completions with no data.
|
|
//
|
|
ZeroMemory(Jbuff->DataBuffer, sizeof(USN) + sizeof(USN_RECORD));
|
|
|
|
InterlockedIncrement(&JournalActiveIoRequests);
|
|
|
|
Status = NtFsControlFile(
|
|
Jbuff->FileHandle, // IN HANDLE FileHandle,
|
|
NULL, // IN HANDLE Event OPTIONAL,
|
|
NULL, // IN PIO_APC_ROUTINE ApcRoutine OPTIONAL,
|
|
&Jbuff->Overlap, // IN PVOID ApcContext OPTIONAL,
|
|
&Jbuff->Iosb, // OUT PIO_STATUS_BLOCK IoStatusBlock,
|
|
FSCTL_READ_USN_JOURNAL, // IN ULONG FsControlCode,
|
|
&ReadUsnJournalData, // IN PVOID InputBuffer OPTIONAL,
|
|
sizeof(ReadUsnJournalData), // IN ULONG InputBufferLength,
|
|
Jbuff->DataBuffer, // OUT PVOID OutputBuffer OPTIONAL,
|
|
Jbuff->BufferSize ); // IN ULONG OutputBufferLength
|
|
|
|
WStatus = FrsSetLastNTError(Status);
|
|
DPRINT2_WS(4, "ReadUsnJournalData - NTStatus %08lx, USN = %08x %08x",
|
|
Status, PRINTQUAD(ReadUsnJournalData.StartUsn), WStatus);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
|
|
//
|
|
// I/O not started so it doesn't complete through the port.
|
|
//
|
|
InterlockedDecrement(&JournalActiveIoRequests);
|
|
DPRINT2_WS(0, "ReadUsnJournalData Failed - NTStatus %08lx, USN = %08x %08x",
|
|
Status, PRINTQUAD(ReadUsnJournalData.StartUsn), WStatus);
|
|
}
|
|
|
|
return Status;
|
|
}
|
|
|
|
|
|
BOOL
|
|
JrnlGetQueuedCompletionStatus(
|
|
HANDLE CompletionPort,
|
|
LPDWORD lpNumberOfBytesTransferred,
|
|
PULONG_PTR lpCompletionKey,
|
|
LPOVERLAPPED *lpOverlapped
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
** NOTE ** Imported version of Win32 function so we can access NTStatus
|
|
return value to seperate out the 32 odd NT to Win32 mappings for
|
|
the ERROR_INVALID_PARAMETER Win32 error code.
|
|
|
|
This function waits for pending I/O operations associated with the
|
|
specified completion port to complete. Server applications may have
|
|
several threads issuing this call on the same completion port. As
|
|
I/O operations complete, they are queued to this port. If threads
|
|
are actively waiting in this call, queued requests complete their
|
|
call.
|
|
|
|
This API returns a boolean value.
|
|
|
|
A value of TRUE means that a pending I/O completed successfully.
|
|
The the number of bytes transfered during the I/O, the completion
|
|
key that indicates which file the I/O occured on, and the overlapped
|
|
structure address used in the original I/O are all returned.
|
|
|
|
A value of FALSE indicates one ow two things:
|
|
|
|
If *lpOverlapped is NULL, no I/O operation was dequeued. This
|
|
typically means that an error occured while processing the
|
|
parameters to this call, or that the CompletionPort handle has been
|
|
closed or is otherwise invalid. GetLastError() may be used to
|
|
further isolate this.
|
|
|
|
If *lpOverlapped is non-NULL, an I/O completion packet was dequeud,
|
|
but the I/O operation resulted in an error. GetLastError() can be
|
|
used to further isolate the I/O error. The the number of bytes
|
|
transfered during the I/O, the completion key that indicates which
|
|
file the I/O occured on, and the overlapped structure address used
|
|
in the original I/O are all returned.
|
|
|
|
Arguments:
|
|
|
|
CompletionPort - Supplies a handle to a completion port to wait on.
|
|
|
|
lpNumberOfBytesTransferred - Returns the number of bytes transfered during the
|
|
I/O operation whose completion is being reported.
|
|
|
|
lpCompletionKey - Returns a completion key value specified during
|
|
CreateIoCompletionPort. This is a per-file key that can be used
|
|
to tall the caller the file that an I/O operation completed on.
|
|
|
|
lpOverlapped - Returns the address of the overlapped structure that
|
|
was specified when the I/O was issued. The following APIs may
|
|
complete using completion ports. This ONLY occurs if the file
|
|
handle is associated with with a completion port AND an
|
|
overlapped structure was passed to the API.
|
|
|
|
LockFileEx
|
|
WriteFile
|
|
ReadFile
|
|
DeviceIoControl
|
|
WaitCommEvent
|
|
ConnectNamedPipe
|
|
TransactNamedPipe
|
|
|
|
Return Value:
|
|
|
|
TRUE - An I/O operation completed successfully.
|
|
lpNumberOfBytesTransferred, lpCompletionKey, and lpOverlapped
|
|
are all valid.
|
|
|
|
FALSE - If lpOverlapped is NULL, the operation failed and no I/O
|
|
completion data is retured. GetLastError() can be used to
|
|
further isolate the cause of the error (bad parameters, invalid
|
|
completion port handle). Otherwise, a pending I/O operation
|
|
completed, but it completed with an error. GetLastError() can
|
|
be used to further isolate the I/O error.
|
|
lpNumberOfBytesTransferred, lpCompletionKey, and lpOverlapped
|
|
are all valid.
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlGetQueuedCompletionStatus:"
|
|
|
|
IO_STATUS_BLOCK IoSb;
|
|
NTSTATUS Status;
|
|
LPOVERLAPPED LocalOverlapped;
|
|
BOOL rv;
|
|
|
|
Status = NtRemoveIoCompletion(CompletionPort,
|
|
(PVOID *)lpCompletionKey,
|
|
(PVOID *)&LocalOverlapped,
|
|
&IoSb,
|
|
NULL); // Infinite Timeout.
|
|
|
|
if ( !NT_SUCCESS(Status) || Status == STATUS_TIMEOUT ) {
|
|
*lpOverlapped = NULL;
|
|
|
|
if ( Status == STATUS_TIMEOUT ) {
|
|
SetLastError(WAIT_TIMEOUT);
|
|
} else {
|
|
FrsSetLastNTError(Status);
|
|
}
|
|
|
|
rv = FALSE;
|
|
DPRINT_NT(1, "NtRemoveIoCompletion : ", Status);
|
|
|
|
} else {
|
|
|
|
*lpOverlapped = LocalOverlapped;
|
|
|
|
*lpNumberOfBytesTransferred = (DWORD)IoSb.Information;
|
|
|
|
if ( !NT_SUCCESS(IoSb.Status) ){
|
|
FrsSetLastNTError( IoSb.Status );
|
|
DPRINT_NT(1, "NtRemoveIoCompletion : ", IoSb.Status);
|
|
rv = FALSE;
|
|
} else {
|
|
rv = TRUE;
|
|
}
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
|
|
DWORD
|
|
WINAPI
|
|
JournalReadThread(
|
|
IN LPVOID Context
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine processes the I/O completions on the JournalCompletionPort.
|
|
It also handles cancel requests posted to the port when the volume
|
|
reference count goes to zero. The basic flow is wait on the port,
|
|
check for errors, check for cancel requests and do a cancel, check for
|
|
read success returns. When data comes back. get the next USN to use,
|
|
queue the buffer to the JournalProcessQueue, get a new buffer off
|
|
the free list and post a new read to the journal handle.
|
|
|
|
For canceled requests or requests that complete with an error
|
|
put the Volume Monitor Entry on the VolumeMonitorStopQueue along with
|
|
the error status in the entry.
|
|
|
|
This one thread processes all the read requests for all the NTFS volumes
|
|
we monitor. Once the first read is posted by an external routine we
|
|
pick it up from here.
|
|
|
|
TODO: When we run out of free journal buffers, create more (up to a limit).
|
|
Then put code in the processing loop to trim back the freelist.
|
|
|
|
|
|
Arguments:
|
|
|
|
Context not used. The Journal Global state is implied.
|
|
|
|
Thread Return Value:
|
|
|
|
NTSTATUS status
|
|
|
|
|
|
--*/
|
|
{
|
|
|
|
#undef DEBSUB
|
|
#define DEBSUB "JournalReadThread:"
|
|
|
|
LPOVERLAPPED JbuffOverlap;
|
|
DWORD IoSize;
|
|
PVOLUME_MONITOR_ENTRY pVme;
|
|
PJBUFFER Jbuff;
|
|
ULONG WStatus, WStatus2;
|
|
NTSTATUS Status;
|
|
BOOL StoppedOne;
|
|
BOOL ErrorFlag;
|
|
PLIST_ENTRY Entry;
|
|
USN NextJrnlReadPoint;
|
|
PCOMMAND_PACKET CmdPkt;
|
|
BY_HANDLE_FILE_INFORMATION FileInfo;
|
|
CHAR TimeString[TIME_STRING_LENGTH];
|
|
|
|
IO_STATUS_BLOCK Iosb;
|
|
|
|
ULONGLONG VolumeInfoData[(sizeof(FILE_FS_VOLUME_INFORMATION) +
|
|
MAXIMUM_VOLUME_LABEL_LENGTH + 7)/8];
|
|
PFILE_FS_VOLUME_INFORMATION VolumeInfo =
|
|
(PFILE_FS_VOLUME_INFORMATION)VolumeInfoData;
|
|
|
|
|
|
//
|
|
// Try-Finally
|
|
//
|
|
try {
|
|
|
|
//
|
|
// Capture exception.
|
|
//
|
|
try {
|
|
|
|
WAIT_FOR_WORK:
|
|
//
|
|
// Look for a Volume Monitor Entry to be placed on the work queue.
|
|
// The agent that put the entry on the queue also started the first
|
|
// read to the journal so we can start looking for I/O completions.
|
|
//
|
|
while (TRUE) {
|
|
|
|
WStatus = FrsRtlWaitForQueueFull(&VolumeMonitorQueue, 10000);
|
|
|
|
DPRINT1_WS(5, "Wait on VolumeMonitorQueue: Count: %d",
|
|
VolumeMonitorQueue.Count, WStatus);
|
|
|
|
if (WIN_SUCCESS(WStatus)) {
|
|
break;
|
|
}
|
|
|
|
switch (WStatus) {
|
|
|
|
case WAIT_TIMEOUT:
|
|
if (KillJournalThreads) {
|
|
//
|
|
// Terminate the thread.
|
|
//
|
|
JournalReadThreadHandle = NULL;
|
|
ExitThread(WStatus);
|
|
}
|
|
break;
|
|
|
|
case ERROR_INVALID_HANDLE:
|
|
//
|
|
// The VolumeMonitorQueue was rundown. Exit.
|
|
//
|
|
JournalReadThreadHandle = NULL;
|
|
ExitThread(WStatus);
|
|
break;
|
|
|
|
default:
|
|
|
|
DPRINT_WS(0, "Unexpected status from FrsRtlWaitForQueueFull", WStatus);
|
|
JournalReadThreadHandle = NULL;
|
|
ExitThread(WStatus);
|
|
}
|
|
}
|
|
|
|
//
|
|
// Loop as long as we have volumes to monitor or have I/O outstanding on the port.
|
|
//
|
|
while ((VolumeMonitorQueue.Count != 0) ||
|
|
(JournalActiveIoRequests != 0) ) {
|
|
|
|
pVme = NULL;
|
|
JbuffOverlap = NULL;
|
|
WStatus = ERROR_SUCCESS;
|
|
IoSize = 0;
|
|
|
|
DPRINT(5, "Waiting on JournalCompletionPort \n");
|
|
ErrorFlag = !JrnlGetQueuedCompletionStatus(JournalCompletionPort,
|
|
&IoSize,
|
|
(PULONG_PTR) &pVme,
|
|
&JbuffOverlap);
|
|
//INFINITE);
|
|
//
|
|
// Check for an error return and see if the completion port has
|
|
// disappeared.
|
|
//
|
|
if (ErrorFlag) {
|
|
WStatus = GetLastError();
|
|
DPRINT_WS(3, "Error from GetQueuedCompletionStatus", WStatus);
|
|
DPRINT5(3, "CompPort: %08x, IoSize: %08x, pVme: %08x, OvLap: %08x, VolHandle: %08x\n",
|
|
JournalCompletionPort, IoSize, pVme, JbuffOverlap, pVme->VolumeHandle);
|
|
|
|
if (WStatus == ERROR_INVALID_HANDLE) {
|
|
JournalCompletionPort = NULL;
|
|
JournalReadThreadHandle = NULL;
|
|
ExitThread(WStatus);
|
|
}
|
|
|
|
if (WStatus == ERROR_INVALID_PARAMETER) {
|
|
DPRINT(0, "ERROR- Invalid Param from GetQueuedCompletionStatus\n");
|
|
if (!GetFileInformationByHandle(JournalCompletionPort, &FileInfo)) {
|
|
WStatus2 = GetLastError();
|
|
DPRINT_WS(0, "Error from GetFileInformationByHandle", WStatus2);
|
|
} else {
|
|
CHAR FlagBuf[120];
|
|
DPRINT(0, "Info on JournalCompletionPort\n");
|
|
|
|
FrsFlagsToStr(FileInfo.dwFileAttributes, FileAttrFlagNameTable,
|
|
sizeof(FlagBuf), FlagBuf);
|
|
|
|
DPRINT2(0, "FileAttributes %08x Flags [%s]\n",
|
|
FileInfo.dwFileAttributes, FlagBuf);
|
|
|
|
FileTimeToString(&FileInfo.ftCreationTime, TimeString);
|
|
DPRINT1(0, "CreationTime %s\n", TimeString);
|
|
|
|
FileTimeToString(&FileInfo.ftLastAccessTime, TimeString);
|
|
DPRINT1(0, "LastAccessTime %08x\n", TimeString);
|
|
|
|
FileTimeToString(&FileInfo.ftLastWriteTime, TimeString);
|
|
DPRINT1(0, "LastWriteTime %08x\n", TimeString);
|
|
|
|
DPRINT1(0, "VolumeSerialNumber %08x\n", FileInfo.dwVolumeSerialNumber);
|
|
DPRINT1(0, "FileSizeHigh %08x\n", FileInfo.nFileSizeHigh);
|
|
DPRINT1(0, "FileSizeLow %08x\n", FileInfo.nFileSizeLow);
|
|
DPRINT1(0, "NumberOfLinks %08x\n", FileInfo.nNumberOfLinks);
|
|
DPRINT1(0, "FileIndexHigh %08x\n", FileInfo.nFileIndexHigh);
|
|
DPRINT1(0, "FileIndexLow %08x\n", FileInfo.nFileIndexLow);
|
|
}
|
|
|
|
//
|
|
// See if the volume handle still works.
|
|
//
|
|
DPRINT(0, "Dumping Volume information\n");
|
|
Status = NtQueryVolumeInformationFile(pVme->VolumeHandle,
|
|
&Iosb,
|
|
VolumeInfo,
|
|
sizeof(VolumeInfoData),
|
|
FileFsVolumeInformation);
|
|
|
|
if ( NT_SUCCESS(Status) ) {
|
|
|
|
VolumeInfo->VolumeLabel[VolumeInfo->VolumeLabelLength/2] = UNICODE_NULL;
|
|
FileTimeToString((PFILETIME) &VolumeInfo->VolumeCreationTime, TimeString);
|
|
|
|
DPRINT5(4,"%-16ws (%d), %s, VSN: %08X, VolCreTim: %s\n",
|
|
VolumeInfo->VolumeLabel,
|
|
VolumeInfo->VolumeLabelLength,
|
|
(VolumeInfo->SupportsObjects ? "(obj)" : "(no-obj)"),
|
|
VolumeInfo->VolumeSerialNumber,
|
|
TimeString);
|
|
} else {
|
|
DPRINT_NT(0, "ERROR - Volume root QueryVolumeInformationFile failed.", Status);
|
|
}
|
|
|
|
//
|
|
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
|
// begin workaround for journal bug.
|
|
//
|
|
//
|
|
InterlockedDecrement(&JournalActiveIoRequests);
|
|
|
|
if (JbuffOverlap == NULL) {
|
|
|
|
//
|
|
// No packet dequeued. Unexpected error Cancel all I/O requests.
|
|
//
|
|
DPRINT(0, "Unexpected error from GetQueuedCompletionStatus. Stopping all journal I/O\n");
|
|
pVme = NULL;
|
|
WStatus = E_UNEXPECTED;
|
|
goto STOP_JOURNAL_IO;
|
|
}
|
|
|
|
//
|
|
// Get the base of the Jbuff struct containing this overlap struct.
|
|
//
|
|
Jbuff = CONTAINING_RECORD(JbuffOverlap, JBUFFER, Overlap);
|
|
//DPRINT2(5, "jb: fc %08x (len: %d)\n", Jbuff, IoSize);
|
|
|
|
FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
|
|
pVme->ActiveIoRequests -= 1;
|
|
FRS_ASSERT(pVme->ActiveIoRequests == 0);
|
|
|
|
//
|
|
// If I/O on this journal has been stopped or the I/O operation
|
|
// was aborted then free the Jbuff. There should be at most one
|
|
// I/O per volume that comes in with the aborted status.
|
|
//
|
|
// Note: We can still have other Jbufs queued for processing by the
|
|
// USN Journal processing thread for this VME.
|
|
//
|
|
if ((!pVme->IoActive) ||
|
|
(WStatus == ERROR_OPERATION_ABORTED) ) {
|
|
|
|
DPRINT1(5, "I/O aborted, putting jbuffer %08x on JournalFreeQueue.\n", Jbuff);
|
|
DPRINT2(5, "Canceled Io on volume %ws, IoSize= %d\n",
|
|
pVme->FSVolInfo.VolumeLabel, IoSize);
|
|
//
|
|
// How do we know when all outstanding Jbuffs have
|
|
// been retired for this VME? need an interlocked ref count?
|
|
// Why does this matter?
|
|
//
|
|
//DPRINT1(5, "jb: tf %08x (abort)\n", Jbuff);
|
|
FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
|
|
Jbuff = NULL;
|
|
//
|
|
// Even if the operation was aborted. If I/O has not stopped
|
|
// (e.g. a quick pause-unpause sequence) then start another read.
|
|
//
|
|
if (!pVme->IoActive) {
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
|
|
DPRINT(0, "Journal request retry\n");
|
|
DPRINT1(0, "Next Usn is: %08x %08x\n", PRINTQUAD(pVme->JrnlReadPoint));
|
|
|
|
if (Jbuff != NULL ) {
|
|
DPRINT1(0, "jb: tf %08x (BUG INVAL PARAM)\n", Jbuff);
|
|
FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
|
|
Jbuff = NULL;
|
|
}
|
|
|
|
//
|
|
// Wait and then retry the journal read again.
|
|
//
|
|
Sleep(500);
|
|
FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
|
|
goto START_NEXT_READ;
|
|
//
|
|
// End workaround for journal bug.
|
|
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
|
//
|
|
//FRS_ASSERT(WStatus != ERROR_INVALID_PARAMETER);
|
|
}
|
|
|
|
//
|
|
// Error may be ERROR_OPERATION_ABORTED but shouldn't be success.
|
|
// This gets sorted out below.
|
|
//
|
|
FRS_ASSERT(WStatus != ERROR_SUCCESS);
|
|
}
|
|
|
|
//
|
|
// Check if no packet was dequeued from the port.
|
|
//
|
|
if (JbuffOverlap == NULL) {
|
|
|
|
//
|
|
// No packet dequeued. Unexpected error Cancel all I/O requests.
|
|
//
|
|
DPRINT(0, "Unexpected error from GetQueuedCompletionStatus. Stopping all journal I/O\n");
|
|
pVme = NULL;
|
|
WStatus = E_UNEXPECTED;
|
|
goto STOP_JOURNAL_IO;
|
|
}
|
|
|
|
//
|
|
// A packet was dequeued from the port. First check if this
|
|
// is a request to stop or pause I/O on this journal.
|
|
// There is no Jbuff with this request and the overlap struct
|
|
// is part of the VME.
|
|
//
|
|
if (IoSize == FRS_CANCEL_JOURNAL_READ) {
|
|
pVme->StopIo = FALSE; // VME Overlap struct available.
|
|
|
|
DPRINT1(4, "Cancel Journal Read for %ws\n", pVme->FSVolInfo.VolumeLabel);
|
|
//
|
|
// cancel any outstanding I/O on this volume handle and
|
|
// deactivate the VME.
|
|
// Note: Any I/O on this volume handle that has already
|
|
// been completed and queued to the completion port
|
|
// is not affected by the cancel. Use !pVme->IoActive to
|
|
// throw those requests away.
|
|
//
|
|
|
|
WStatus = ERROR_SUCCESS;
|
|
goto STOP_JOURNAL_IO;
|
|
} else
|
|
|
|
if (IoSize == FRS_PAUSE_JOURNAL_READ) {
|
|
|
|
DPRINT2(4, "Pause Journal Read for %ws. Jrnl State: %s\n",
|
|
pVme->FSVolInfo.VolumeLabel, RSS_NAME(pVme->JournalState));
|
|
|
|
FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
|
|
//
|
|
// This is a pause journal request. Stop I/O on the journal
|
|
// but don't deactivate the VME.
|
|
//
|
|
pVme->StopIo = FALSE; // VME Overlap struct available.
|
|
if (pVme->JournalState == JRNL_STATE_PAUSE1) {
|
|
//
|
|
// Cancel I/O on the journal read handle and put a second
|
|
// pause request on the port so we know it was done.
|
|
//
|
|
pVme->IoActive = FALSE;
|
|
if (!CancelIo(pVme->VolumeHandle)) {
|
|
DPRINT_WS(0, "ERROR - Cancel Io;", GetLastError());
|
|
}
|
|
|
|
pVme->WStatus = ERROR_SUCCESS;
|
|
WStatus = JrnlSubmitReadThreadRequest(pVme,
|
|
FRS_PAUSE_JOURNAL_READ,
|
|
JRNL_STATE_PAUSE2);
|
|
DPRINT_WS(0, "Error from JrnlSubmitReadThreadRequest", WStatus);
|
|
|
|
} else
|
|
if (pVme->JournalState == JRNL_STATE_PAUSE2) {
|
|
|
|
//
|
|
// This is the second pause request so there will be no more
|
|
// journal data buffers on this volume. (NOT TRUE, sometimes
|
|
// the abort takes awhile but since IoActive is clear the
|
|
// buffer will be ignored.)
|
|
// Send a paused complete command to the journal process queue.
|
|
// When it gets to the head of the queue, all prior queued
|
|
// journal buffers will have been processed so the filter table
|
|
// can now be updated.
|
|
//
|
|
CmdPkt = FrsAllocCommand(&JournalProcessQueue, CMD_JOURNAL_PAUSED);
|
|
CmdPkt->Parameters.JournalRequest.Replica = NULL;
|
|
CmdPkt->Parameters.JournalRequest.pVme = pVme;
|
|
FrsSubmitCommand(CmdPkt, FALSE);
|
|
|
|
} else {
|
|
//
|
|
// If we are stopping while in the middle of a Pause request
|
|
// the stop takes precedence.
|
|
//
|
|
if ((pVme->JournalState != JRNL_STATE_STOPPING) &&
|
|
(pVme->JournalState != JRNL_STATE_STOPPED)) {
|
|
DPRINT2(0, "ERROR: Invalid Journal State: %s on pause request on volume %ws,\n",
|
|
RSS_NAME(pVme->JournalState), pVme->FSVolInfo.VolumeLabel);
|
|
}
|
|
}
|
|
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
//
|
|
// Not a cancel or pause packet. It must be a journal read response.
|
|
//
|
|
InterlockedDecrement(&JournalActiveIoRequests);
|
|
|
|
//
|
|
// Get the base of the Jbuff struct containing this overlap struct.
|
|
//
|
|
Jbuff = CONTAINING_RECORD(JbuffOverlap, JBUFFER, Overlap);
|
|
//DPRINT2(5, "jb: fc %08x (len: %d)\n", Jbuff, IoSize);
|
|
|
|
FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
|
|
pVme->ActiveIoRequests -= 1;
|
|
FRS_ASSERT(pVme->ActiveIoRequests == 0);
|
|
|
|
//
|
|
// If I/O on this journal has been stopped or the I/O operation
|
|
// was aborted then free the Jbuff. There should be at most one
|
|
// I/O per volume that comes in with the aborted status.
|
|
//
|
|
// Note: We can still have other Jbufs queued for processing by the
|
|
// USN Journal processing thread for this VME.
|
|
//
|
|
if ((!pVme->IoActive) ||
|
|
(IoSize < sizeof(USN)) ||
|
|
(WStatus == ERROR_OPERATION_ABORTED) ) {
|
|
|
|
DPRINT1(5, "I/O aborted, putting jbuffer %08x on JournalFreeQueue.\n", Jbuff);
|
|
DPRINT2(5, "Canceled Io on volume %ws, IoSize= %d\n",
|
|
pVme->FSVolInfo.VolumeLabel, IoSize);
|
|
//
|
|
// How do we know when all outstanding Jbuffs have
|
|
// been retired for this VME? need an interlocked ref count?
|
|
// Why does it matter?
|
|
//
|
|
//DPRINT1(5, "jb: tf %08x (abort)\n", Jbuff);
|
|
FrsRtlInsertTailQueue(&JournalFreeQueue, &Jbuff->ListEntry);
|
|
//
|
|
// Even if the operation was aborted. If I/O has not stopped
|
|
// (e.g. a quick pause-unpause sequence) then start another read.
|
|
//
|
|
if (pVme->IoActive) {
|
|
goto START_NEXT_READ;
|
|
}
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
continue;
|
|
}
|
|
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
|
|
|
|
/**************************************************************
|
|
* *
|
|
* We have a successfull I/O completion packet. *
|
|
* Return the status and data length then put down *
|
|
* another read at the Next uSN on the journal. *
|
|
* *
|
|
**************************************************************/
|
|
|
|
Jbuff->WStatus = WStatus;
|
|
Jbuff->DataLength = IoSize;
|
|
|
|
//
|
|
// Update next USN in VME and send the journal buffer out for processing.
|
|
//
|
|
NextJrnlReadPoint = *(USN *)(Jbuff->DataBuffer);
|
|
if (NextJrnlReadPoint < pVme->JrnlReadPoint) {
|
|
DPRINT2(0, "USN error: Next < Previous, Next %08x %08x, Prev: %08x %08x\n",
|
|
PRINTQUAD(NextJrnlReadPoint), PRINTQUAD(pVme->JrnlReadPoint));
|
|
WStatus = ERROR_INVALID_DATA;
|
|
goto STOP_JOURNAL_IO;
|
|
}
|
|
|
|
pVme->JrnlReadPoint = NextJrnlReadPoint;
|
|
|
|
DPRINT1(5, "Next Usn is: %08x %08x\n", PRINTQUAD(pVme->JrnlReadPoint));
|
|
|
|
//DPRINT2(5, "jb: tu %08x (len: %d)\n", Jbuff, Jbuff->DataLength);
|
|
|
|
FrsRtlInsertTailQueue(&JournalProcessQueue, &Jbuff->ListEntry);
|
|
|
|
//
|
|
// If the read request failed for some reason (e.g. ERROR_NOT_FOUND)
|
|
// let USN processing figure it out and start I/O back up as appropriate.
|
|
//
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
pVme->IoActive = FALSE;
|
|
continue;
|
|
}
|
|
|
|
FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
|
|
|
|
|
|
START_NEXT_READ:
|
|
//
|
|
// Get a free buffer and start another read on the journal.
|
|
//
|
|
WStatus = JrnlUnPauseVolume(pVme, NULL, TRUE);
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
|
|
//
|
|
// Check for abort and cancel all I/O.
|
|
//
|
|
if (WStatus == ERROR_REQUEST_ABORTED) {
|
|
pVme = NULL;
|
|
DPRINT(0, "JournalFreeQueue Abort. Stopping all journal I/O\n");
|
|
goto STOP_JOURNAL_IO;
|
|
}
|
|
//
|
|
// If the response is success or busy then we can expect to see a
|
|
// buffer come through the port.
|
|
//
|
|
if (!WIN_SUCCESS(WStatus) && (WStatus != ERROR_BUSY)) {
|
|
goto STOP_JOURNAL_IO;
|
|
}
|
|
|
|
continue;
|
|
|
|
|
|
|
|
STOP_JOURNAL_IO:
|
|
|
|
//
|
|
// Test if stopping I/O on just one volume.
|
|
//
|
|
if (pVme != NULL) {
|
|
FrsRtlAcquireQueueLock(&VolumeMonitorQueue);
|
|
//
|
|
// We should send a cmd packet to the journal process queue since
|
|
// that is the point where all pending journal buffers are completed.
|
|
//
|
|
SET_JOURNAL_AND_REPLICA_STATE(pVme, JRNL_STATE_STOPPED);
|
|
if (!CancelIo(pVme->VolumeHandle)) {
|
|
DPRINT_WS(0, "ERROR - Cancel Io;", GetLastError());
|
|
}
|
|
VmeDeactivate(&VolumeMonitorQueue, pVme, WStatus);
|
|
SetEvent(pVme->Event);
|
|
FrsRtlReleaseQueueLock(&VolumeMonitorQueue);
|
|
continue;
|
|
}
|
|
|
|
//
|
|
// Stop all I/O on all volume journals.
|
|
//
|
|
StoppedOne = FALSE;
|
|
|
|
ForEachListEntry(&VolumeMonitorQueue, VOLUME_MONITOR_ENTRY, ListEntry,
|
|
//
|
|
// The loop iterator pE is of type VOLUME_MONITOR_ENTRY.
|
|
//
|
|
if (pE->JournalState != JRNL_STATE_STOPPED) {
|
|
StoppedOne = TRUE;
|
|
SET_JOURNAL_AND_REPLICA_STATE(pE, JRNL_STATE_STOPPED);
|
|
if (!CancelIo(pE->VolumeHandle)) {
|
|
DPRINT_WS(0, "ERROR - Cancel Io;", GetLastError());
|
|
}
|
|
}
|
|
|
|
VmeDeactivate(&VolumeMonitorQueue, pE, WStatus);
|
|
SetEvent(pE->Event);
|
|
);
|
|
|
|
if (!StoppedOne && (JbuffOverlap == NULL)) {
|
|
//
|
|
// We didn't stop anything and nothing came thru the port.
|
|
// Must be hung.
|
|
//
|
|
DPRINT(0, "ERROR - Readjournalthread hung. Killing thread\n");
|
|
JournalReadThreadHandle = NULL;
|
|
ExitThread(WStatus);
|
|
}
|
|
|
|
|
|
} // end of while()
|
|
|
|
|
|
|
|
if (KillJournalThreads) {
|
|
//
|
|
// Terminate the thread.
|
|
//
|
|
DPRINT(4, "Readjournalthread Terminating.\n");
|
|
JournalReadThreadHandle = NULL;
|
|
ExitThread(ERROR_SUCCESS);
|
|
}
|
|
|
|
goto WAIT_FOR_WORK;
|
|
|
|
|
|
//
|
|
// Get exception status.
|
|
//
|
|
} except (EXCEPTION_EXECUTE_HANDLER) {
|
|
GET_EXCEPTION_CODE(WStatus);
|
|
}
|
|
|
|
} finally {
|
|
|
|
if (WIN_SUCCESS(WStatus)) {
|
|
if (AbnormalTermination()) {
|
|
WStatus = ERROR_OPERATION_ABORTED;
|
|
}
|
|
}
|
|
|
|
DPRINT_WS(0, "Read Journal Thread finally.", WStatus);
|
|
|
|
//
|
|
// Trigger FRS shutdown if we terminated abnormally.
|
|
//
|
|
if (!WIN_SUCCESS(WStatus) && (WStatus != ERROR_PROCESS_ABORTED)) {
|
|
JournalReadThreadHandle = NULL;
|
|
DPRINT(0, "Readjournalthread terminated abnormally, forcing service shutdown.\n");
|
|
FrsIsShuttingDown = TRUE;
|
|
SetEvent(ShutDownEvent);
|
|
}
|
|
}
|
|
|
|
return WStatus;
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlGetEndOfJournal(
|
|
IN PVOLUME_MONITOR_ENTRY pVme,
|
|
OUT USN *EndOfJournal
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Get the address of the end of the USN Journal. This is used for starting
|
|
a new replica set at the end of the journal. The replica tree starts out
|
|
empty so there is no need to read through several megabytes of
|
|
USN records. It is also used to find the end of the journal before
|
|
recovery starts.
|
|
|
|
Arguments:
|
|
|
|
pVme - The Volume Monitor struct to initialize. It provides the volume
|
|
handle.
|
|
|
|
EndOfJournal - Returned USN of the end of the Journal or 0.
|
|
|
|
Return Value:
|
|
|
|
Win32 status.
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlGetEndOfJournal:"
|
|
|
|
USN_JOURNAL_DATA UsnJrnlData;
|
|
|
|
DWORD WStatus;
|
|
ULONG BytesReturned = 0;
|
|
|
|
*EndOfJournal = QUADZERO;
|
|
|
|
//
|
|
// The following call returns:
|
|
//
|
|
// UsnJournalID Current Instance of Journal
|
|
// FirstUsn First position that can be read from journal
|
|
// NextUsn Next position that will be written to the journal
|
|
// LowestValidUsn First record that was written into the journal for
|
|
// this journal instance. It is possible that enumerating
|
|
// the files on disk will return a USN lower than this
|
|
// value. This indicates that the journal has been
|
|
// restamped since the last USN was written for this file.
|
|
// It means that the file may have been changed and
|
|
// journal data was lost.
|
|
// MaxUsn The largest change USN the journal will support.
|
|
// MaximumSize
|
|
// AllocationDelta
|
|
//
|
|
|
|
if (!DeviceIoControl(pVme->VolumeHandle,
|
|
FSCTL_QUERY_USN_JOURNAL,
|
|
NULL, 0,
|
|
&UsnJrnlData, sizeof(UsnJrnlData),
|
|
&BytesReturned, NULL)) {
|
|
|
|
WStatus = GetLastError();
|
|
DPRINT_WS(0, "Error from FSCTL_QUERY_USN_JOURNAL", WStatus);
|
|
|
|
if (WStatus == ERROR_NOT_READY) {
|
|
//
|
|
// Volume is being dismounted.
|
|
//
|
|
|
|
} else
|
|
if (WStatus == ERROR_BAD_COMMAND) {
|
|
//
|
|
// NT status was INVALID_DEVICE_STATE.
|
|
//
|
|
|
|
} else
|
|
if (WStatus == ERROR_INVALID_PARAMETER) {
|
|
//
|
|
// Bad Handle.
|
|
//
|
|
|
|
} else
|
|
if (WStatus == ERROR_JOURNAL_DELETE_IN_PROGRESS) {
|
|
//
|
|
// Journal being deleted.
|
|
//
|
|
|
|
} else
|
|
if (WStatus == ERROR_JOURNAL_NOT_ACTIVE) {
|
|
//
|
|
// Journal ???.
|
|
//
|
|
}
|
|
|
|
return WStatus;
|
|
|
|
}
|
|
|
|
if (BytesReturned != sizeof(UsnJrnlData)) {
|
|
//
|
|
// Unexpected result return.
|
|
//
|
|
return ERROR_JOURNAL_NOT_ACTIVE;
|
|
}
|
|
|
|
|
|
DPRINT1(4, ":S: EOJ from jrnl query %08x %08x\n", PRINTQUAD(UsnJrnlData.NextUsn));
|
|
|
|
//
|
|
// Return the next read point for the journal.
|
|
//
|
|
*EndOfJournal = UsnJrnlData.NextUsn;
|
|
|
|
return ERROR_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlEnumerateFilterTreeBU(
|
|
PGENERIC_HASH_TABLE Table,
|
|
PFILTER_TABLE_ENTRY FilterEntry,
|
|
PJRNL_FILTER_ENUM_ROUTINE Function,
|
|
PVOID Context
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine walks through the entries in the Volume filter table connected
|
|
by the child list starting with the FilterEntry provided. The traversal
|
|
is bottom up. At each node the function provided is called with the
|
|
entry address and the context pointer.
|
|
|
|
It is assumed that the caller has acquired the Filter Table Child list
|
|
lock for the Replica set being traversed.
|
|
|
|
Before calling the function with an entry we increment the ref count.
|
|
The Called function must DECREMENT the ref count (or delete the entry).
|
|
|
|
Arguments:
|
|
|
|
Table - The context of the Hash Table to enumerate.
|
|
FilterEntry - The Filter Entry node to start at.
|
|
Function - The function to call for each entry in the subtree. It is of
|
|
of type PJRNL_FILTER_ENUM_ROUTINE. Return FALSE to abort the
|
|
enumeration else true.
|
|
Context - A context ptr to pass through to the Function.
|
|
|
|
Return Value:
|
|
|
|
The status code from the argument function.
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlEnumerateFilterTreeBU:"
|
|
|
|
PLIST_ENTRY ListHead;
|
|
ULONG WStatus;
|
|
|
|
//
|
|
// Check for no entries in tree.
|
|
//
|
|
if (FilterEntry == NULL) {
|
|
return ERROR_SUCCESS;
|
|
}
|
|
INCREMENT_FILTER_REF_COUNT(FilterEntry);
|
|
ListHead = &FilterEntry->ChildHead;
|
|
|
|
ForEachSimpleListEntry(ListHead, FILTER_TABLE_ENTRY, ChildEntry,
|
|
//
|
|
// pE is of type PFILTER_TABLE_ENTRY.
|
|
//
|
|
if (!IsListEmpty(&pE->ChildHead)) {
|
|
//
|
|
// Recurse on the child's list head.
|
|
//
|
|
WStatus = JrnlEnumerateFilterTreeBU(Table, pE, Function, Context);
|
|
} else {
|
|
|
|
//
|
|
// Apply the function to the node.
|
|
// The function could remove the node from the list but the list macro
|
|
// has captured the Flink so the traversal can continue.
|
|
//
|
|
INCREMENT_FILTER_REF_COUNT(pE);
|
|
WStatus = (Function)(Table, pE, Context);
|
|
}
|
|
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
goto RETURN;
|
|
}
|
|
);
|
|
|
|
WStatus = (Function)(Table, FilterEntry, Context);
|
|
|
|
RETURN:
|
|
|
|
return WStatus;
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlEnumerateFilterTreeTD(
|
|
PGENERIC_HASH_TABLE Table,
|
|
PFILTER_TABLE_ENTRY FilterEntry,
|
|
PJRNL_FILTER_ENUM_ROUTINE Function,
|
|
PVOID Context
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine walks through the entries in the Volume filter table connected
|
|
by the child list starting with the FilterEntry provided. The traversal
|
|
is Top Down. At each node the function provided is called with the
|
|
entry address and the context pointer.
|
|
|
|
It is assumed that the caller has acquired the Filter Table Child list
|
|
lock for the Replica set being traversed.
|
|
|
|
Before calling the function with an entry we increment the ref count.
|
|
The Called function must DECREMENT the ref count (or delete the entry).
|
|
|
|
Arguments:
|
|
|
|
Table - The context of the Hash Table to enumerate.
|
|
FilterEntry - The Filter Entry node to start at.
|
|
Function - The function to call for each entry in the subtree. It is of
|
|
of type PJRNL_FILTER_ENUM_ROUTINE. Return FALSE to abort the
|
|
enumeration else true.
|
|
Context - A context ptr to pass through to the Function.
|
|
|
|
Return Value:
|
|
|
|
The status code from the argument function.
|
|
|
|
--*/
|
|
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlEnumerateFilterTreeTD:"
|
|
|
|
PLIST_ENTRY ListHead;
|
|
ULONG WStatus;
|
|
//
|
|
// Check for no entries in tree.
|
|
//
|
|
if (FilterEntry == NULL) {
|
|
return ERROR_SUCCESS;
|
|
}
|
|
//
|
|
// Apply the function to the root node.
|
|
// The function could remove the node from the table but not from the list
|
|
// since our caller has the child list replica lock. Bump the ref count
|
|
// to keep the memory from being freed.
|
|
//
|
|
INCREMENT_FILTER_REF_COUNT(FilterEntry);
|
|
|
|
WStatus = (Function)(Table, FilterEntry, Context);
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
goto RETURN;
|
|
}
|
|
//
|
|
// Warning: If the function above deletes the node the following ref
|
|
// is invalid. This should not be a problem because deletes should only
|
|
// be done bottom up.
|
|
//
|
|
ListHead = &FilterEntry->ChildHead;
|
|
|
|
ForEachSimpleListEntry(ListHead, FILTER_TABLE_ENTRY, ChildEntry,
|
|
//
|
|
// pE is of type PFILTER_TABLE_ENTRY.
|
|
//
|
|
//
|
|
// Apply the function to each child node.
|
|
// The function could remove the node from the list but the list macro
|
|
// has captured the Flink so the traversal can continue.
|
|
//
|
|
if (!IsListEmpty(&pE->ChildHead)) {
|
|
//
|
|
// Recurse on the child's list head.
|
|
//
|
|
WStatus = JrnlEnumerateFilterTreeTD(Table, pE, Function, Context);
|
|
} else {
|
|
INCREMENT_FILTER_REF_COUNT(pE);
|
|
WStatus = (Function)(Table, pE, Context);
|
|
}
|
|
|
|
if (!WIN_SUCCESS(WStatus)) {
|
|
goto RETURN;
|
|
}
|
|
|
|
);
|
|
|
|
WStatus = ERROR_SUCCESS;
|
|
|
|
//
|
|
// Done with this Root node so decrement the ref count which could
|
|
// cause it to be deleted.
|
|
//
|
|
RETURN:
|
|
|
|
return WStatus;
|
|
}
|
|
|
|
|
|
|
|
VOID
|
|
JrnlHashEntryFree(
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Free the memory pointed to by Buffer.
|
|
|
|
Arguments:
|
|
|
|
Table -- ptr to a hash table struct (has heap handle).
|
|
Buffer -- ptr to buffer to free.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlHashEntryFree:"
|
|
|
|
FrsFreeType(Buffer);
|
|
}
|
|
|
|
|
|
BOOL
|
|
JrnlCompareFid(
|
|
PVOID Buf1,
|
|
PVOID Buf2,
|
|
ULONG Length
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Compare two keys for equality.
|
|
|
|
Arguments:
|
|
|
|
Buf1 -- ptr to key value 1.
|
|
Buf1 -- ptr to key value 2.
|
|
Length -- should be 8 bytes.
|
|
|
|
Return Value:
|
|
|
|
TRUE if they match.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlCompareFid:"
|
|
|
|
if (!ValueIsMultOf4(Buf1)) {
|
|
DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
|
|
Buf1, Length, *(PULONG)Buf1);
|
|
FRS_ASSERT(ValueIsMultOf4(Buf1));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
if (!ValueIsMultOf4(Buf2)) {
|
|
DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
|
|
Buf2, Length, *(PULONG)Buf2);
|
|
FRS_ASSERT(ValueIsMultOf4(Buf2));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
if (Length != sizeof(ULONGLONG)) {
|
|
DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
|
|
FRS_ASSERT(Length == sizeof(LONGLONG));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
return RtlEqualMemory(Buf1, Buf2, sizeof(ULONGLONG));
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlHashCalcFid (
|
|
PVOID Buf,
|
|
ULONG Length
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Calculate a hash value on an NTFS file ID for the journal filter table.
|
|
|
|
Arguments:
|
|
|
|
Buf -- ptr to a file ID.
|
|
Length -- should be 8 bytes.
|
|
|
|
Return Value:
|
|
|
|
32 bit hash value.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlHashCalcFid:"
|
|
|
|
PULONG pUL = (PULONG) Buf;
|
|
|
|
if (!ValueIsMultOf4(pUL)) {
|
|
DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
|
|
pUL, Length, *pUL);
|
|
FRS_ASSERT(ValueIsMultOf4(pUL));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
if (Length != sizeof(LONGLONG)) {
|
|
DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
|
|
FRS_ASSERT(Length == sizeof(LONGLONG));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
return HASH_FID(pUL, 0x80000000);
|
|
}
|
|
|
|
|
|
ULONG
|
|
NoHashBuiltin (
|
|
PVOID Buf,
|
|
ULONG Length
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
No-op function for hash tables that use an external function to
|
|
do hash calculations. It returns the low 4 bytes of the quadword.
|
|
|
|
Arguments:
|
|
|
|
Buf -- ptr to a file ID.
|
|
Length -- should be 8 bytes.
|
|
|
|
Return Value:
|
|
|
|
32 bit hash value.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "NoHashBuiltin:"
|
|
|
|
|
|
PULONG pUL = (PULONG) Buf;
|
|
|
|
if (!ValueIsMultOf4(pUL)) {
|
|
DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
|
|
pUL, Length, *pUL);
|
|
FRS_ASSERT(ValueIsMultOf4(pUL));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
if (Length != sizeof(LONGLONG)) {
|
|
DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
|
|
FRS_ASSERT(Length == sizeof(LONGLONG));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
return (*pUL & (ULONG) 0x7FFFFFFF);
|
|
}
|
|
|
|
|
|
BOOL
|
|
JrnlCompareGuid(
|
|
PVOID Buf1,
|
|
PVOID Buf2,
|
|
ULONG Length
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Compare two keys for equality.
|
|
|
|
Arguments:
|
|
|
|
Buf1 -- ptr to key value 1.
|
|
Buf1 -- ptr to key value 2.
|
|
Length -- should be 16 bytes.
|
|
|
|
Return Value:
|
|
|
|
TRUE if they match.
|
|
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlCompareGuid:"
|
|
|
|
if (!ValueIsMultOf4(Buf1)) {
|
|
DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
|
|
Buf1, Length, *(PULONG)Buf1);
|
|
FRS_ASSERT(ValueIsMultOf4(Buf1));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
if (!ValueIsMultOf4(Buf2)) {
|
|
DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
|
|
Buf2, Length, *(PULONG)Buf2);
|
|
FRS_ASSERT(ValueIsMultOf4(Buf2));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
if (Length != sizeof(GUID)) {
|
|
DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
|
|
FRS_ASSERT(Length == sizeof(GUID));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
return RtlEqualMemory(Buf1, Buf2, sizeof(GUID));
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlHashCalcGuid (
|
|
PVOID Buf,
|
|
ULONG Length
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Calculate a hash value for a Guid.
|
|
|
|
From \nt\private\rpc\runtime\mtrt\uuidsup.hxx
|
|
|
|
This is the "true" OSF DCE format for Uuids. We use this
|
|
when generating Uuids. The NodeId is faked on systems w/o
|
|
a netcard.
|
|
|
|
typedef struct _RPC_UUID_GENERATE
|
|
{
|
|
unsigned long TimeLow; // 100 ns units
|
|
unsigned short TimeMid;
|
|
unsigned short TimeHiAndVersion;
|
|
unsigned char ClockSeqHiAndReserved;
|
|
unsigned char ClockSeqLow;
|
|
unsigned char NodeId[6]; // constant
|
|
} RPC_UUID_GENERATE;
|
|
|
|
TimeLow wraps every 6.55ms and is mostly zero.
|
|
Not quite true since GUIDs are allocated
|
|
in time based blocks and then successive GUIDS are created by
|
|
bumping the TimeLow by one until the block is consumed.
|
|
|
|
Arguments:
|
|
|
|
Buf -- ptr to a Guid.
|
|
Length -- should be 16 bytes.
|
|
|
|
Return Value:
|
|
|
|
32 bit hash value.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlHashCalcGuid:"
|
|
|
|
PULONG pUL = (PULONG) Buf;
|
|
PUSHORT pUS = (PUSHORT) Buf;
|
|
|
|
if (!ValueIsMultOf4(pUL)) {
|
|
DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
|
|
pUL, Length, *pUL);
|
|
FRS_ASSERT(ValueIsMultOf4(pUL));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
if (Length != sizeof(GUID)) {
|
|
DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
|
|
FRS_ASSERT(Length == sizeof(GUID));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
//
|
|
// Calc hash based on the time since the rest of it is eseentially constant.
|
|
//
|
|
return (ULONG) (pUS[0] ^ pUS[1] ^ pUS[2]);
|
|
|
|
}
|
|
|
|
|
|
ULONG
|
|
JrnlHashCalcUsn (
|
|
PVOID Buf,
|
|
ULONG Length
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Calculate a hash value on an NTFS USN Journal Index.
|
|
|
|
Arguments:
|
|
|
|
Buf -- ptr to a file ID.
|
|
Length -- should be 8 bytes.
|
|
|
|
Return Value:
|
|
|
|
32 bit hash value.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlHashCalcUsn:"
|
|
|
|
ULONG Value, HighPart, LowPart;
|
|
|
|
if (!ValueIsMultOf4(Buf)) {
|
|
DPRINT3(0, "ERROR - Unaligned key value - addr: %08x, len: %d, Data: %08x\n",
|
|
Buf, Length, *(PULONG)Buf);
|
|
FRS_ASSERT(ValueIsMultOf4(Buf));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
if (Length != sizeof(LONGLONG)) {
|
|
DPRINT1(0, "ERROR - Invalid Length: %d\n", Length);
|
|
FRS_ASSERT(Length == sizeof(LONGLONG));
|
|
return 0xFFFFFFFF;
|
|
}
|
|
|
|
LowPart = *(PULONG) Buf;
|
|
HighPart = *(PULONG)( (PCHAR) Buf + 4 );
|
|
|
|
//
|
|
// USNs are quadword offsets so shift the low part an extra 3 bits.
|
|
//
|
|
Value = (HighPart >> 16) + HighPart + (LowPart >> 19) + (LowPart >> 3);
|
|
|
|
return Value;
|
|
|
|
}
|
|
|
|
|
|
VOID
|
|
CalcHashFidAndName(
|
|
IN PUNICODE_STRING Name,
|
|
IN PULONGLONG Fid,
|
|
OUT PULONGLONG HashValue
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine forms a 32 bit hash of the name and File ID args.
|
|
It returns this in the low 32 bits of HashValue. The upper 32 bits are zero.
|
|
|
|
Note: If there is room at the end of the Unicode String buffer for the Name,
|
|
code below will add a NULL for printing.
|
|
|
|
Arguments:
|
|
|
|
Name - The filename to hash.
|
|
Fid - The FID to hash.
|
|
HashValue - The resulting quadword hash value.
|
|
|
|
Return Value:
|
|
|
|
Not used
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "CalcHashFidAndName:"
|
|
|
|
PUSHORT p;
|
|
ULONG NameHash = 0;
|
|
ULONG Shift = 0;
|
|
ULONG FidHash;
|
|
ULONG NChars, MaxNChars;
|
|
PULONG pUL;
|
|
|
|
FRS_ASSERT( Name != NULL );
|
|
FRS_ASSERT( Fid != NULL );
|
|
FRS_ASSERT( ValueIsMultOf2(Name->Buffer) );
|
|
FRS_ASSERT( ValueIsMultOf2(Name->Length) );
|
|
FRS_ASSERT( Name->Length != 0 );
|
|
FRS_ASSERT( ValueIsMultOf8(Fid) );
|
|
|
|
|
|
NChars = Name->Length / sizeof(WCHAR);
|
|
|
|
//
|
|
// Combine each unicode character into the hash value, shifting 4 bits
|
|
// each time. Start at the end of the name so file names with different
|
|
// type codes will hash to different table offsets.
|
|
//
|
|
for( p = Name->Buffer + NChars - 1;
|
|
p >= Name->Buffer;
|
|
p-- ) {
|
|
|
|
NameHash = NameHash ^ (((ULONG)towupper(*p)) << Shift);
|
|
Shift = (Shift < 16) ? Shift + 4 : 0;
|
|
|
|
}
|
|
|
|
pUL = (ULONG *) Fid;
|
|
FidHash = (ULONG) HASH_FID(pUL, 0x80000000);
|
|
if (FidHash == 0) {
|
|
DPRINT(4, "Warning - FidHash is zero.\n");
|
|
}
|
|
|
|
*HashValue = (ULONGLONG) (NameHash + FidHash);
|
|
|
|
if (*HashValue == 0) {
|
|
DPRINT(0, "Error - HashValue is zero.\n");
|
|
}
|
|
|
|
//
|
|
// Make sure the FileName has a unicode null at the end before we print it. This is
|
|
//
|
|
MaxNChars = Name->MaximumLength / sizeof(WCHAR);
|
|
|
|
if (Name->Buffer[NChars-1] != UNICODE_NULL) {
|
|
if (NChars >= MaxNChars) {
|
|
//
|
|
// No NULL at the end of the name and no room to add one.
|
|
//
|
|
DPRINT4(4, "++ HV: %08x, Hfid: %08x, Fid: %08x %08x, Hnam: %08x, Name: cannot print\n",
|
|
(NameHash+FidHash), FidHash, PRINTQUAD(*Fid), NameHash);
|
|
return;
|
|
}
|
|
Name->Buffer[NChars] = UNICODE_NULL;
|
|
}
|
|
|
|
DPRINT5(4, "++ HV: %08x, Hfid: %08x, Fid: %08x %08x, Hnam: %08x, Name: %ws\n",
|
|
(NameHash+FidHash), FidHash, PRINTQUAD(*Fid), NameHash, Name->Buffer);
|
|
|
|
}
|
|
|
|
|
|
VOID
|
|
JrnlFilterPrintJacket(
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer
|
|
)
|
|
{
|
|
JrnlFilterPrint(5, Table, Buffer);
|
|
}
|
|
|
|
|
|
VOID
|
|
JrnlFilterPrint(
|
|
ULONG PrintSev,
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
print out a hash table entry.
|
|
|
|
Arguments:
|
|
|
|
Table -- ptr to a hash table struct.
|
|
Buffer -- ptr to entry.
|
|
|
|
Return Value:
|
|
|
|
none.
|
|
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlFilterPrint:"
|
|
|
|
PFILTER_TABLE_ENTRY Entry = (PFILTER_TABLE_ENTRY)Buffer;
|
|
|
|
DPRINT3(PrintSev, "Addr: %08x, HashValue: %08x RC: %d\n",
|
|
Entry,
|
|
Entry->HashEntryHeader.HashValue,
|
|
Entry->HashEntryHeader.ReferenceCount);
|
|
|
|
DPRINT2(PrintSev, "List Entry - %08x, %08x\n",
|
|
Entry->HashEntryHeader.ListEntry.Flink,
|
|
Entry->HashEntryHeader.ListEntry.Blink);
|
|
|
|
|
|
DPRINT2(PrintSev, "FileId: %08x %08x, ParentFileId: %08x %08x\n",
|
|
PRINTQUAD(Entry->DFileID), PRINTQUAD(Entry->DParentFileID));
|
|
|
|
DPRINT2(PrintSev, "Replica Number: %d, FileName: %ws\n",
|
|
Entry->DReplicaNumber, Entry->UFileName.Buffer);
|
|
|
|
DPRINT3(PrintSev, "Sequence Number: %d, Transition Type: %d, FrsVsn: %08x %08x\n",
|
|
READ_FILTER_SEQ_NUMBER(Entry),
|
|
READ_FILTER_TRANS_TYPE(Entry),
|
|
PRINTQUAD(Entry->FrsVsn));
|
|
|
|
DPRINT4(PrintSev, "Childhead Entry - %08x, %08x Child Link Entry - %08x, %08x\n",
|
|
Entry->ChildHead.Flink, Entry->ChildHead.Blink,
|
|
Entry->ChildEntry.Flink, Entry->ChildEntry.Blink);
|
|
|
|
}
|
|
|
|
#undef PrintSev
|
|
|
|
|
|
|
|
VOID
|
|
JrnlChangeOrderPrint(
|
|
PGENERIC_HASH_TABLE Table,
|
|
PVOID Buffer
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
print out a hash table entry.
|
|
|
|
Arguments:
|
|
|
|
Table -- ptr to a hash table struct. (unused)
|
|
Buffer -- ptr to entry.
|
|
|
|
Return Value:
|
|
|
|
none.
|
|
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlChangeOrderPrint:"
|
|
|
|
FRS_PRINT_TYPE(0, (PCHANGE_ORDER_ENTRY)Buffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VOID
|
|
DumpUsnRecord(
|
|
IN ULONG Severity,
|
|
IN PUSN_RECORD UsnRecord,
|
|
IN ULONG ReplicaNumber,
|
|
IN ULONG LocationCmd,
|
|
IN PCHAR Debsub,
|
|
IN ULONG uLineNo
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine prints out the contents of a NTFS USN Journal Record.
|
|
|
|
Arguments:
|
|
|
|
Severity -- Severity level for print. (See debug.c, debug.h)
|
|
UsnRecord - The address of the UsnRecord.
|
|
ReplicaNumber - ID number of the replica set
|
|
LocationCmd - Decoded location command for this USN record.
|
|
Debsub -- Name of calling subroutine.
|
|
uLineno -- Line number of caller
|
|
|
|
MACRO: DUMP_USN_RECORD, DUMP_USN_RECORD2
|
|
|
|
Return Value:
|
|
|
|
none.
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "DumpUsnRecord:"
|
|
|
|
ULONG Len;
|
|
CHAR TimeString[TIME_STRING_LENGTH];
|
|
CHAR Tstr1[200];
|
|
WCHAR FName[MAX_PATH+1];
|
|
CHAR FlagBuf[120];
|
|
|
|
//
|
|
// Don't print this
|
|
//
|
|
if (!DoDebug(Severity, Debsub)) {
|
|
return;
|
|
}
|
|
//
|
|
// Get hh:mm:ss.
|
|
//
|
|
FileTimeToStringClockTime((PFILETIME) &UsnRecord->TimeStamp, TimeString);
|
|
|
|
//
|
|
// Put file name in a buffer so we can put a null at the end of it.
|
|
//
|
|
Len = min((ULONG)UsnRecord->FileNameLength, MAX_PATH);
|
|
CopyMemory(FName, UsnRecord->FileName, Len);
|
|
FName[Len/2] = UNICODE_NULL;
|
|
|
|
//
|
|
// Build the trace record.
|
|
//
|
|
_snprintf(Tstr1, sizeof(Tstr1),
|
|
":U: %08x %d Fid %08x %08x PFid %08x %08x At %08x Sr %04x %s %7s %ws",
|
|
(ULONG)UsnRecord->Usn,
|
|
ReplicaNumber,
|
|
PRINTQUAD(UsnRecord->FileReferenceNumber),
|
|
PRINTQUAD(UsnRecord->ParentFileReferenceNumber),
|
|
UsnRecord->FileAttributes,
|
|
UsnRecord->SourceInfo,
|
|
TimeString,
|
|
CoLocationNames[LocationCmd],
|
|
FName
|
|
);
|
|
Tstr1[sizeof(Tstr1)-1] = '\0';
|
|
|
|
DebPrint(Severity, "%s\n", Debsub, uLineNo, Tstr1);
|
|
|
|
//
|
|
// Output reason string on sep line.
|
|
//
|
|
FrsFlagsToStr(UsnRecord->Reason, UsnReasonNameTable, sizeof(FlagBuf), FlagBuf);
|
|
|
|
_snprintf(Tstr1, sizeof(Tstr1),
|
|
":U: Fid %08x %08x Reason %08x Flags [%s]",
|
|
PRINTQUAD(UsnRecord->FileReferenceNumber),
|
|
UsnRecord->Reason,
|
|
FlagBuf
|
|
);
|
|
Tstr1[sizeof(Tstr1)-1] = '\0';
|
|
|
|
DebPrint(Severity, "%s\n", Debsub, uLineNo, Tstr1);
|
|
|
|
//
|
|
// Output file attributes string on sep line.
|
|
//
|
|
FrsFlagsToStr(UsnRecord->FileAttributes, FileAttrFlagNameTable, sizeof(FlagBuf), FlagBuf);
|
|
|
|
_snprintf(Tstr1, sizeof(Tstr1),
|
|
":U: Fid %08x %08x Attrs %08x Flags [%s]",
|
|
PRINTQUAD(UsnRecord->FileReferenceNumber),
|
|
UsnRecord->FileAttributes,
|
|
FlagBuf
|
|
);
|
|
Tstr1[sizeof(Tstr1)-1] = '\0';
|
|
|
|
DebPrint(Severity, "%s\n", Debsub, uLineNo, Tstr1);
|
|
}
|
|
|
|
VOID
|
|
JrnlDumpVmeFilterTable(
|
|
VOID
|
|
)
|
|
/*++
|
|
Routine Description:
|
|
Dump the VME filter table
|
|
|
|
Arguments:
|
|
None.
|
|
|
|
Return Value:
|
|
None.
|
|
--*/
|
|
{
|
|
#undef DEBSUB
|
|
#define DEBSUB "JrnlDumpVmeFilterTable:"
|
|
|
|
ForEachListEntry( &VolumeMonitorStopQueue, VOLUME_MONITOR_ENTRY, ListEntry,
|
|
|
|
|
|
DPRINT(4, "\n");
|
|
DPRINT1(4, "==== start of VME Filter table dump for %ws ===========\n", pE->FSVolInfo.VolumeLabel);
|
|
DPRINT(4, "\n");
|
|
if (pE->FilterTable != NULL) {
|
|
// GHT_DUMP_TABLE(5, pE->FilterTable);
|
|
NOTHING;
|
|
} else {
|
|
DPRINT(4, "Filter table freed\n");
|
|
}
|
|
DPRINT(4, "\n");
|
|
DPRINT(4, "============== end of Vme Filter table dump ============\n");
|
|
DPRINT(4, "\n");
|
|
|
|
|
|
);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*++
|
|
|
|
The two tables below describe all the possible outcomes of a directory
|
|
rename operation. The case numbers in parens are further described below.
|
|
As directory changes appear in the USN data stream the filter table for
|
|
the volume is updated immediately, even in the case of subtree renames.
|
|
This allows us to accurately filter subsequent USN records and associate
|
|
them with the correct replica set.
|
|
(R.S. means Replica Set)
|
|
|
|
Parent
|
|
FileID FileID
|
|
Filter Entry Filter Entry Interpretation : Action
|
|
------------ ------------ -------------- ------
|
|
Absent Absent Wasn't in R.S., Still Isn't: Skip
|
|
(1) Absent Present Wasn't in R.S., Now Is : Create entry (MOVEIN)
|
|
(2) Present Absent Was in R.S. , Now Isn't : MOVEOUT
|
|
Present Present Was in R.S. , Still Is : Eval Further
|
|
|
|
The last case above requires further evaluation to determine if the
|
|
directory has moved from one directory to another or from one replica
|
|
set to another.
|
|
|
|
FileID Compare R.S. compare
|
|
between Filter Between File
|
|
Entry & USn Rec and Parent Interpretation : Action
|
|
-------------- ----------- -------------- ------
|
|
(3) Same Parent Same R.S. File stayed in same Dir.: Check Name
|
|
Same Parent Diff. R.S. Error, shouldn't happen :
|
|
(4) Diff. Parent Same R.S. Ren to diff dir in R.S. : Update Parent Fid (MOVEDIR)
|
|
(5) Diff. Parent Diff. R.S. Rename to diff R.s. : MOVERS
|
|
|
|
For directory renames there are 5 cases to consider:
|
|
|
|
1. MOVEIN - Rename of a directory into a replica set. The filter table lookup
|
|
failed on the FID but the parent FID is in the table. We add an entry for
|
|
this DIR to the filter table. The update process must enumerate the
|
|
subtree on disk and evaluate each file for inclusion into the tree,
|
|
updating the Filter table as it goes. We may see file operations several
|
|
levels down from the rename point and have no entry in the Filter Table so
|
|
we pitch those records. The sub-tree enumeration process must handle this
|
|
as it incorporates each file into the IDTable.
|
|
|
|
|
|
2. MOVEOUT - Parent FID change to a dir OUTSIDE of any replica set on the
|
|
volume. This is a delete of an entire subtree in the Replica set. We
|
|
enumerate the subtree bottom-up, sending dir level change orders to the
|
|
update process as we delete the filter table entries.
|
|
|
|
|
|
3. Name change only. The Parent FID in the USN record matches the
|
|
Parent FID in the Filter entry for the directory.
|
|
Update the name in the filter entry.
|
|
|
|
|
|
4. MOVEDIR - Parent FID in USN record is different from the parent FID in the
|
|
Filter entry so this is a rename to a dir in the SAME replica set.
|
|
Update the parent FID in the filter enty and Filename too.
|
|
|
|
5. MOVERS - The Parent FID in the USN record is associated with a directory
|
|
in a DIFFERENT replica set on the volume. Update the parent FID, the
|
|
replica ptr, and name in the filter entry. This is a move of an entire
|
|
subtree from one replica set to another. We enumerate the subtree
|
|
top-down, sending dir level change orders to the update process as we
|
|
update the replica set information in the filter table entries.
|
|
|
|
|
|
--*/
|
|
|
|
|
|
|
|
|
|
/*
|
|
Note: doc: - update this description
|
|
|
|
Removing a sub-tree from a replica set
|
|
|
|
This is a multi-stage process that occurs when a directory is renamed out of
|
|
the replica set. This is managed by the update process.
|
|
|
|
1. The Journal Process has marked the filter entry for the renamed directory
|
|
as DELETED. This ensures that operations on any files below this directory
|
|
are filtered out by the Journal process. A change order describing the subtree
|
|
delete is queued to the Replica Change Order process queue.
|
|
|
|
2. When the update process encounters the subtree delete change order it walks
|
|
thru the subtree (using either the directory entries in the Filter Hash Table or
|
|
the Replica IDTable) breadthfirst from the leaves of the subtree to the subtree
|
|
to the subtree root. For each file or directory it tombstones the entry in the
|
|
IDTable and builds a delete change order to send to its outbound partners. In
|
|
addtion it deletes the entries from the volume filter table and the DIRTable as
|
|
it progresses. If a crash or shutdown request ocurrs during this operation
|
|
the process continues with the remaining entries when it resumes.
|
|
|
|
3. The operation completes when the root of the sub-tree is processed.
|
|
|
|
|
|
|
|
Adding a sub-tree (X) to a replica set
|
|
|
|
This occurs when directory X is renamed into a replica set. It is managed by
|
|
the Update Process.
|
|
|
|
1. The Journal Process creates a Filter entry for the sub-tree root (X) and
|
|
queues a change order to the update process. At this point the Journal process
|
|
has no knowledge of what is beneath this directory. If it sees an operation on
|
|
a direct child of X it builds a change order and queues it to the update
|
|
process. In addition if it sees a directory create/delete or rename operation
|
|
on a direct child of X it increments sequence number in the Filter Table Entry
|
|
for X and creates a new Filter Table entry as appropriate.
|
|
|
|
2. The update process takes the "sub-tree add" change order and processes the
|
|
sub-tree starting at X, enumerating the subtree down to the leaves in a breadth
|
|
first order. For each entry in the subtree it creates an IDTable entry for the
|
|
file or directory. If a directory it also creates a DIRTable entry and adds an
|
|
entry to the Filter Table. As each Filter Table entry is made the Journal
|
|
subsystem will begin sending change orders to the update process for any new
|
|
file operations under the directory. For each directory, the filter table entry
|
|
is made first, if it doesn't already exist. then the update process enumerates
|
|
the directory contents. If new direct children are created while the
|
|
enumeration is in process change orders are queued to the update process. If
|
|
the USN on the change order is less than or equal to the USN saved when the file
|
|
was first processed then the change order is discarded. Otherwise the change
|
|
occurred after the point when the file was processed.
|
|
|
|
It is possible for the update process to receive update or delete
|
|
change orders for files that are not yet present in the IDTable because the
|
|
enumeration hasn't reached them yet. For files or dirs created "behind" the
|
|
enumeration process point, change orders are queued that will pick them up.
|
|
The first problem is solved by having the update process stop processing
|
|
further change orders on this replica set until the enumeration is complete.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
#if 0
|
|
|
|
/*
|
|
|
|
Recovery mode processing for the NTFS journal.
|
|
|
|
Objective: When FRS or the system crashes we have lost the write filter
|
|
the journal code uses to filter out FRS related writes to files.
|
|
We need to reliably identify those USN records that were caused by FRS
|
|
so we don't propagate out a file that was being installed at the time
|
|
of the crash. Such a file will undoubtedly be corrupt and will get sent
|
|
to every member of the replica set.
|
|
|
|
In the case of system crashes, NTFS inserts close records into the journal
|
|
for any files that were open at the time of the crash. NTFS marks those
|
|
USN records with a flag that indicates they were written at startup. In
|
|
addtion a user app can force a close record to be written to the journal
|
|
through an FSCTL call. If this happens and no futher modification is made
|
|
to the file then no close record will be written by NTFS when the last handle
|
|
on the file is closed or at startup.
|
|
|
|
In the case of FRS service crashes or externally generated process Kills
|
|
FRS will fail to perform a clean shutdown. As each change order is processed
|
|
it is marked as work in process. When the change order either retires or
|
|
goes into a retry state the work in process flag is cleared. From this
|
|
information we can determine those files that may have had FRS generated
|
|
writes in process when the service died.
|
|
|
|
The flow is as follows:
|
|
|
|
At replica startup scan the inbound log and build a hash table (PendingCOTable)
|
|
of all entries with the following information kept with each entry:
|
|
|
|
File FID
|
|
File GUID
|
|
Local/Remote CO flag
|
|
CO Inprocess flag
|
|
Usn index of most recent USN record that contributed to the local CO.
|
|
|
|
There could be multiple COs pending for the same file. OR the state of
|
|
the Inprocess flags and save the state of the most recent CO's local/rmt flag.
|
|
The PendingCoTable continues to exist after startup so we can evaluate
|
|
dependencies between newly arrived COs and COs in a retry state in the inlog.
|
|
|
|
In addition:
|
|
The Largest NTFS USN for any local inbound CO is saved in RecoveryUsnStart.
|
|
The current end of the USN journal is saved in RecoveryUsnEnd.
|
|
Both are saved in the Replica struct.
|
|
|
|
|
|
ULONGLONG FileReferenceNumber;
|
|
ULONGLONG ParentFileReferenceNumber;
|
|
USN Usn;
|
|
LARGE_INTEGER TimeStamp;
|
|
|
|
*/
|
|
Start USN read at Replica->RecoveryUsnStart.
|
|
|
|
if (UsnRecord->Usn < Replica->RecoveryUsnEnd) {
|
|
|
|
|
|
if (IsNtfsRecoveryClose(UsnRecord)) {
|
|
//
|
|
// assume that all the file data may not have been written out
|
|
// so the file may be corrupt.
|
|
//
|
|
PendingCo = InPendingCoTable(Replica->PendingCoTable,
|
|
&UsnRecord->FileReferenceNumber);
|
|
if ((PendingCo == NULL) || (PendingCo->LocalCo)) {
|
|
//
|
|
// The file was being written locally at the time of the crash.
|
|
// It is probably corrupt.
|
|
// Create a file refresh change order and send it to one of our
|
|
// inbound partners to get their version of the file.
|
|
// Note: This request is queued so the first inbound partner to
|
|
// join will get it.
|
|
// Note: Since we are reading after RecoveryUsnStart the USN
|
|
// should not be less than what we see in the inlog.
|
|
//
|
|
FRS_ASSERT(UsnRecord->Usn >= PendingCo->Usn);
|
|
RequestRefreshCo(Replica, &UsnRecord->FileReferenceNumber);
|
|
|
|
goto GET_NEXT_USN_RECORD;
|
|
|
|
} else {
|
|
//
|
|
// There is a pending remote CO for this file. It will install
|
|
// a new copy of the file.
|
|
//
|
|
// Note: if there are multiple remote COs in the process queue
|
|
// the last one may not be the one that is finally accepted.
|
|
// But we need to be sure that none of the local COs that are pending
|
|
// are allowed propagate.
|
|
//
|
|
// If this CO was in process at the time of the crash and the
|
|
// CO was already propagated to the outlog, the staging file may
|
|
// be corrupted. Delete the CO from the outlog and queue a
|
|
// refresh request to the inbound partner.
|
|
//
|
|
// Note: We could still have a corrupted file. If it was locally
|
|
// changed and we processed the CO, updating the IDTable and
|
|
// inserting the CO in the outlog but a crash still resulted
|
|
// in not all dirty data pages being flushed.
|
|
// WHEN WE GEN THE LOCAL STAGE FILE CAN WE FORCE A FLUSH?
|
|
|
|
}
|
|
|
|
if (IsFileFrsStagingFile(UsnRecord)) {
|
|
//
|
|
// This is an FRS staging file. It may be corrupt.
|
|
// Delete it and regenerate it by setting a new start state in
|
|
// the related CO. (CO Guid is derived from the name of the file).
|
|
// There may not be a CO for this file if the inlog record has
|
|
// been deleted. There may still be a CO in the outlog though so
|
|
// just delete the staging file, forcing it to be regenerated on
|
|
// demand from the local file.
|
|
//
|
|
// If the local file is suspect then we need to refresh it from
|
|
// an inbound partner so delete the CO in the outlog and let the
|
|
// refresh CO PROPAGATE as needed.
|
|
//
|
|
// Note that the IDTable entry may already have been updated because
|
|
// this CO retired. That would cause the refresh CO to fail to
|
|
// be accepted. Put some state in the refresh CO so when it comes
|
|
// back if that state matches the state in the IDTable entry then
|
|
// we know to accepr the refresh CO regardless of other reconcile
|
|
// info. If however another local or remote CO has updated the
|
|
// file in the interim then the refresh CO is stale and should be
|
|
// discarded.
|
|
//
|
|
SetPendingCoState(SeqNum, PendingCo->LocalCo ? IBCO_STAGING_REQUESTED :
|
|
IBCO_FETCH_REQUESTED);
|
|
|
|
}
|
|
goto GET_NEXT_USN_RECORD;
|
|
|
|
} else {
|
|
//
|
|
// Read IDTable entry for this file and get the FileUsn.
|
|
// This is the USN associated with the most recent operation on the
|
|
// file that we have handled.
|
|
//
|
|
|
|
if (UsnRecord->Usn <= IDTableRec->FileUsn) {
|
|
//
|
|
// This USN record is for an operation that occurred
|
|
// prior to the last action processed related to the file.
|
|
//
|
|
goto GET_NEXT_USN_RECORD;
|
|
|
|
} else {
|
|
//
|
|
// This USN record could not have come from FRS because if it did and there was no entry for
|
|
// a change order on the file in the Inbound Log then the LastFileUsn check above would have caught it.
|
|
// This is true because the inbound log record is only deleted after the file is updated and the LastFileUsn
|
|
// is saved in the Jet record for the file.
|
|
// Even if there is a change order pending in the Inbound log, FRS could not have started processing it
|
|
// because the USN Record is not marked as written by NTFS at recovery which would be the case
|
|
// if FRS had been in the middle of an update when the system crashed. Therefore,
|
|
//
|
|
//this is not an FRS generated USN record so process the USN record normally.
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
This solution solves the problem of FRS getting part way thru a file update
|
|
when the system crashes. It must not process the USN record because then it
|
|
would propagate a corrupted file out to all the other members. It also has
|
|
the nice property of refreshing a file from another partner that a user was
|
|
writing at the time of the crash. The User has lost their changes but at
|
|
least the file is back in an uncorrupted state.
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|