Module Name:
SIS Groveler file groveling functions
Cedric Krumbein, 1998
User Mode
Revision History:
#include "all.hxx"
#define CLEAR_FILE(FILE) ( \
(FILE).entry.fileID = 0, \ (FILE).entry.fileSize = 0, \ (FILE).entry.signature = 0, \ (FILE).entry.attributes = 0, \ (FILE).entry.csIndex = nullCSIndex, \ (FILE).entry.createTime = 0, \ (FILE).entry.writeTime = 0, \ (FILE).parentID = 0, \ (FILE).retryTime = 0, \ (FILE).startTime = 0, \ (FILE).stopTime = 0, \ (FILE).readSynch.Internal = 0, \ (FILE).readSynch.InternalHigh = 0, \ (FILE).readSynch.Offset = 0, \ (FILE).readSynch.OffsetHigh = 0, \ (FILE).fileName[0] = _T('\0') )
(OVERLAPPED).Internal = 0, \ (OVERLAPPED).InternalHigh = 0, \ (OVERLAPPED).Offset = 0, \ (OVERLAPPED).OffsetHigh = 0 )
// Is CS index set?
static const CSID nullCSIndex = { 0, 0, 0, _T('\0'), _T('\0'), _T('\0'), _T('\0'), _T('\0'), _T('\0'), _T('\0'), _T('\0') };
#define HasCSIndex(CSID) \
(memcmp(&(CSID), &nullCSIndex, sizeof(CSID)) != 0)
#define SameCSIndex(CSID1, CSID2) \
(memcmp(&(CSID1), &(CSID2), sizeof(CSID)) == 0)
// Exceptions
enum TargetException { TARGET_INVALID, TARGET_ERROR };
/*****************************************************************************/ /************************** Miscellaneous functions **************************/ /*****************************************************************************/
// NewHandler() is installed by _set_new_handler() to throw an
// exception when the system can't allocate any more memory.
static INT __cdecl NewHandler(size_t size) { throw MEMORY_ERROR; return 0; // Dummy return
// FileIDCompare() is used by qsort() and bsearch()
// to sort or look up a matching file ID.
static INT __cdecl FileIDCompare( const VOID *id1, const VOID *id2) { DWORDLONG fileID1 = *(DWORDLONG *)id1, fileID2 = *(DWORDLONG *)id2;
return fileID1 < fileID2 ? -1 : fileID1 > fileID2 ? +1 : 0; }
// qsStringCompare() is used by qsort() to sort an array of character strings.
static INT __cdecl qsStringCompare( const VOID *str1, const VOID *str2) { return _tcsicmp(*(TCHAR **)str1, *(TCHAR **)str2); }
// bsStringCompare() is used by bsearch() look up a matching character string.
// It is assumed that str1 is the path name string we are searching for and
// str2 is the excluded path name string in the excluded paths list. Note
// that if the excluded path is \a\b, then we return a match on anything that
// is in this directory or subdirectory, as well as an exact match.
// E.g.: \a\b\c\d.foo & \a\b\foo will match, but \a\b.foo will not.
static INT __cdecl bsStringCompare( const VOID *str1, const VOID *str2) { TCHAR *s1 = *(TCHAR **) str1; TCHAR *s2 = *(TCHAR **) str2;
// str2 is the excluded name. Make sure we catch subdirectories under it,
// but make sure we don't confuse \a\bx with \a\b
size_t l = _tcslen(s2); INT r = _tcsnicmp(s1, s2, l);
if (0 == r) if (_tcslen(s1) > l && _T('\\') != s1[l]) r = 1;
return r; }
/*****************************************************************************/ /********************** Groveler class private methods ***********************/ /*****************************************************************************/
// IsAllowedID() returns FALSE if the directory or file ID
// is on the list of disallowed IDs, and TRUE otherwise.
BOOL Groveler::IsAllowedID(DWORDLONG fileID) const { BOOL result; ASSERT(fileID != 0);
if (numDisallowedIDs == 0) {
ASSERT(disallowedIDs == NULL); result = TRUE;
} else {
ASSERT(disallowedIDs != NULL); result = bsearch( &fileID, disallowedIDs, numDisallowedIDs, sizeof(DWORDLONG), FileIDCompare) == NULL; }
DPRINTF((L"IsAllowedID: %04I64x.%012I64x, (%s)\n", ((fileID >> 48) & 0xffff), (fileID & 0xffffffffffff), result ? L"yes" : L"no"));
return result; }
// IsAllowedName() returns FALSE if the directory or file name
// is on the list of disallowed names, and TRUE otherwise.
BOOL Groveler::IsAllowedName(TCHAR *fileName) const { BOOL result; ASSERT(fileName != NULL);
if (numDisallowedNames == 0) {
ASSERT(disallowedNames == NULL); result = TRUE;
} else {
ASSERT(disallowedNames != NULL);
result = (bsearch( &fileName, disallowedNames, numDisallowedNames, sizeof(TCHAR *), bsStringCompare) == NULL); }
// The name wasn't on the disallowed list, see if the GrovelAllPaths option
// is set. If not then it must be in the RIS valid path, check for it.
if (result && !GrovelAllPaths && (RISPath != NULL)) {
// Do not do this cheeck if this is the root directory of the volume
if (wcscmp(fileName,L"\\") != 0) {
result = (bsStringCompare(&fileName,&RISPath) == 0); } }
DPRINTF((L"IsAllowedName: \"%s\", (%s)\n", fileName, result ? L"yes" : L"no"));
return result; }
// WaitForEvent suspends the thread until the specified event is set.
VOID Groveler::WaitForEvent(HANDLE event) { DWORD eventNum;
BOOL success;
ASSERT(event != NULL);
eventNum = WaitForSingleObject(event, INFINITE); ASSERT_ERROR(eventNum == WAIT_OBJECT_0);
success = ResetEvent(event); ASSERT_ERROR(success); }
// OpenFileByID() opens the file with the given volumeHandle and fileID.
BOOL Groveler::OpenFileByID( FileData *file, BOOL writeEnable) { UNICODE_STRING fileIDString;
OBJECT_ATTRIBUTES objectAttributes;
IO_STATUS_BLOCK ioStatusBlock;
NTSTATUS ntStatus;
ASSERT(volumeHandle != NULL); ASSERT(file != NULL); ASSERT(file->entry.fileID != 0); ASSERT(file->handle == NULL);
fileIDString.Length = sizeof(DWORDLONG); fileIDString.MaximumLength = sizeof(DWORDLONG); fileIDString.Buffer = (WCHAR *)&file->entry.fileID;
objectAttributes.Length = sizeof(OBJECT_ATTRIBUTES); objectAttributes.RootDirectory = volumeHandle; objectAttributes.ObjectName = &fileIDString; objectAttributes.Attributes = OBJ_CASE_INSENSITIVE; objectAttributes.SecurityDescriptor = NULL; objectAttributes.SecurityQualityOfService = NULL;
ntStatus = NtCreateFile( &file->handle, GENERIC_READ | (writeEnable ? GENERIC_WRITE : 0), &objectAttributes, &ioStatusBlock, NULL, 0, FILE_SHARE_READ | FILE_SHARE_DELETE | (writeEnable ? FILE_SHARE_WRITE : 0), FILE_OPEN, FILE_OPEN_BY_FILE_ID | FILE_OPEN_REPARSE_POINT | FILE_NO_INTERMEDIATE_BUFFERING, NULL, 0);
if (ntStatus == STATUS_SUCCESS) { DWORD bytesReturned; MARK_HANDLE_INFO markHandleInfo = {USN_SOURCE_DATA_MANAGEMENT, volumeHandle, 0};
// Mark the handle so the usn entry for the merge operation (if completed)
// can be detected and skipped.
BOOL rc = DeviceIoControl( file->handle, FSCTL_MARK_HANDLE, &markHandleInfo, sizeof markHandleInfo, NULL, 0, &bytesReturned, NULL);
if (!rc) { DPRINTF((_T("%s: FSCTL_MARK_HANDLE failed, %lu\n"), driveLetterName, GetLastError())); }
#if DBG
// Get the file name
ASSERT(file->fileName[0] == _T('\0'));
struct TFileName2 { ULONG nameLen; TCHAR name[MAX_PATH+1]; } tFileName[1];
ntStatus = NtQueryInformationFile( file->handle, &ioStatusBlock, tFileName, sizeof tFileName, FileNameInformation);
if (ntStatus == STATUS_SUCCESS) { r = StringCbCopyN(file->fileName, sizeof(file->fileName), tFileName->name, tFileName->nameLen);
ASSERT(r == S_OK);
} else {
r = StringCbCopy(file->fileName, sizeof(file->fileName), L"<unresolved name>");
ASSERT(r == S_OK); } } #endif
return TRUE; }
ASSERT(file->handle == NULL); SetLastError(RtlNtStatusToDosError(ntStatus)); return FALSE; }
// OpenFileByName() opens the file with the given fileName.
BOOL Groveler::OpenFileByName( FileData *file, BOOL writeEnable, TCHAR *fileName) { UNICODE_STRING dosPathName, ntPathName;
OBJECT_ATTRIBUTES objectAttributes;
IO_STATUS_BLOCK ioStatusBlock;
NTSTATUS ntStatus;
ASSERT(file != NULL); ASSERT(file->handle == NULL);
if (fileName == NULL) fileName = file->fileName; ASSERT(fileName[0] != _T('\0'));
#ifdef _UNICODE
dosPathName.Buffer = fileName; #else
if (!RtlCreateUnicodeStringFromAsciiz(&dosPathName, fileName)) { ntStatus = STATUS_NO_MEMORY; goto Error; } #endif
if (RtlDosPathNameToNtPathName_U(dosPathName.Buffer, &ntPathName, NULL, NULL)) {
objectAttributes.Length = sizeof(OBJECT_ATTRIBUTES); objectAttributes.RootDirectory = NULL; objectAttributes.ObjectName = &ntPathName; objectAttributes.Attributes = OBJ_CASE_INSENSITIVE; objectAttributes.SecurityDescriptor = NULL; objectAttributes.SecurityQualityOfService = NULL;
ntStatus = NtCreateFile( &file->handle, GENERIC_READ | (writeEnable ? GENERIC_WRITE : 0), &objectAttributes, &ioStatusBlock, NULL, 0, FILE_SHARE_READ | FILE_SHARE_DELETE | (writeEnable ? FILE_SHARE_WRITE : 0), FILE_OPEN, FILE_OPEN_REPARSE_POINT | FILE_NO_INTERMEDIATE_BUFFERING, NULL, 0);
} else { ntStatus = STATUS_NO_MEMORY; }
#ifndef _UNICODE
RtlFreeUnicodeString(&dosPathName); #endif
if (ntStatus == STATUS_SUCCESS) { DWORD bytesReturned; MARK_HANDLE_INFO markHandleInfo = {USN_SOURCE_DATA_MANAGEMENT, volumeHandle, 0};
// Mark the handle so the usn entry for the merge operation (if completed)
// can be detected and skipped.
BOOL rc = DeviceIoControl( file->handle, FSCTL_MARK_HANDLE, &markHandleInfo, sizeof markHandleInfo, NULL, 0, &bytesReturned, NULL);
if (!rc) { DPRINTF((_T("%s: FSCTL_MARK_HANDLE failed, %lu\n"), driveLetterName, GetLastError())); } return TRUE; }
ASSERT(file->handle == NULL); SetLastError(RtlNtStatusToDosError(ntStatus)); return FALSE; }
// IsFileMapped() checks if the file is mapped by another user.
BOOL Groveler::IsFileMapped(FileData *file) { _SIS_LINK_FILES sisLinkFiles;
DWORD transferCount;
BOOL success;
ASSERT(grovHandle != NULL); ASSERT(file->handle != NULL);
sisLinkFiles.operation = SIS_LINK_FILES_OP_VERIFY_NO_MAP; sisLinkFiles.u.VerifyNoMap.file = file->handle;
success = DeviceIoControl( grovHandle, FSCTL_SIS_LINK_FILES, (VOID *)&sisLinkFiles, sizeof(_SIS_LINK_FILES), NULL, 0, &transferCount, NULL);
if (success) return FALSE;
// SetOplock() sets an oplock on the open file.
BOOL Groveler::SetOplock(FileData *file) { BOOL success;
ASSERT(file != NULL); ASSERT(file->handle != NULL); ASSERT(file->oplock.Internal == 0); ASSERT(file->oplock.InternalHigh == 0); ASSERT(file->oplock.Offset == 0); ASSERT(file->oplock.OffsetHigh == 0); ASSERT(file->oplock.hEvent != NULL); ASSERT(IsReset(file->oplock.hEvent));
success = DeviceIoControl( file->handle, FSCTL_REQUEST_BATCH_OPLOCK, NULL, 0, NULL, 0, NULL, &file->oplock);
if (success) { ASSERT(IsSet(file->oplock.hEvent)); success = ResetEvent(file->oplock.hEvent); ASSERT_ERROR(success); CLEAR_OVERLAPPED(file->oplock); SetLastError(0); return FALSE; }
if (GetLastError() != ERROR_IO_PENDING) { ASSERT(IsReset(file->oplock.hEvent)); CLEAR_OVERLAPPED(file->oplock); return FALSE; }
return TRUE; }
// CloseFile() closes the file if it is still open. If an oplock was
// set on the file, it then waits for and resets the oplock break
// event triggered by the closing of the file or by an outside access.
VOID Groveler::CloseFile(FileData *file) { BOOL success;
ASSERT(file != NULL); ASSERT(file->oplock.hEvent != NULL);
if (file->handle == NULL) { ASSERT(file->oplock.Internal == 0); ASSERT(file->oplock.InternalHigh == 0); ASSERT(file->oplock.Offset == 0); ASSERT(file->oplock.OffsetHigh == 0); ASSERT(IsReset(file->oplock.hEvent)); } else { success = CloseHandle(file->handle); ASSERT_ERROR(success); file->handle = NULL;
if (file->oplock.Internal != 0 || file->oplock.InternalHigh != 0 || file->oplock.Offset != 0 || file->oplock.OffsetHigh != 0) { WaitForEvent(file->oplock.hEvent); CLEAR_OVERLAPPED(file->oplock); } } }
// CreateDatabase() creates the database. Initialize it such that if
// extract_log is called before scan_volume, it will return Grovel_overrun
// without attempting any USN extraction. Also, the first time scan_volume
// is called (with or without start_over), it will know to initialize
// lastUSN and do a full volume scan.
BOOL Groveler::CreateDatabase(void) { USN_JOURNAL_DATA usnJournalData;
TFileName tempName; TCHAR listValue[17];
SGNativeListEntry listEntry;
LONG num;
tempName.assign(driveName); tempName.append(_T("\\"));
rootID = GetFileID(NULL,tempName.name); if (rootID == 0) { DPRINTF((_T("%s: CreateDatabase: can't get root directory ID\n"), driveLetterName)); goto Error; }
if (get_usn_log_info(&usnJournalData) != Grovel_ok) { DWORD lastError = GetLastError();
if (lastError == ERROR_JOURNAL_NOT_ACTIVE) { DPRINTF((_T("%s: CreateDatabase: journal not active\n"), driveLetterName)); if ((set_usn_log_size(65536) != ERROR_SUCCESS) || get_usn_log_info(&usnJournalData) != ERROR_SUCCESS) {
DPRINTF((_T("%s: CreateDatabase: can't initialize USN journal\n"), driveLetterName)); goto Error; } } else { DPRINTF((_T("%s: CreateDatabase: can't initialize last USN\n"), driveLetterName)); goto Error; } } lastUSN = usnJournalData.NextUsn; usnID = usnJournalData.UsnJournalID;
sgDatabase->Close(); if (!sgDatabase->Create(databaseName)) { DPRINTF((_T("%s: CreateDatabase: can't create database \"%s\"\n"), driveLetterName, databaseName)); goto Error; }
num = sgDatabase->StackPut(rootID, FALSE); if (num < 0) goto Error; ASSERT(num == 1);
// Write UNINITIALIZED_USN into the database now, to be replaced when scan_volume
// is complete. This will be a flag to indicate if the database contents are valid.
(void)StringCbPrintf(listValue, sizeof(listValue), _T("%016I64x"), UNINITIALIZED_USN); listEntry.name = LAST_USN_NAME; listEntry.value = listValue; num = sgDatabase->ListWrite(&listEntry); if (num < 0) goto Error; ASSERT(num == 1);
(void)StringCbPrintf(listValue, sizeof(listValue), _T("%016I64x"), usnID); listEntry.name = USN_ID_NAME; listEntry.value = listValue; num = sgDatabase->ListWrite(&listEntry); if (num < 0) goto Error; ASSERT(num == 1);
return TRUE;
lastUSN = usnID = UNINITIALIZED_USN; return FALSE; }
#define MAX_ACTIONS 5
// DoTransaction() performs the specified operations
// on the database within a single transaction.
VOID Groveler::DoTransaction( DWORD numActions, DatabaseActionList *actionList) { DatabaseActionList *action;
LONG num;
ASSERT(sgDatabase != NULL); ASSERT(actionList != NULL);
if (sgDatabase->BeginTransaction() < 0) throw DATABASE_ERROR;
for (i = 0; i < numActions; i++) { action = &actionList[i]; switch(action->type) {
ASSERT(action->u.tableEntry != NULL); num = sgDatabase->TablePut(action->u.tableEntry); ASSERT(num < 0 || num == 1); break;
ASSERT(action->u.fileID != 0); num = sgDatabase->TableDeleteByFileID(action->u.fileID); break;
ASSERT(action->u.queueEntry != NULL); num = sgDatabase->QueuePut(action->u.queueEntry); ASSERT(num < 0 || num == 1); if (num == 1) numFilesEnqueued++; break;
ASSERT(action->u.queueIndex != 0); num = sgDatabase->QueueDelete(action->u.queueIndex); ASSERT(num <= 1); if (num == 1) numFilesDequeued++; #if DBG
else DPRINTF((_T("DoTransaction: QUEUE_DELETE unsuccessful (%d)"), num)); #endif
ASSERT_PRINTF(FALSE, (_T("type=%lu\n"), action->type)); }
if (num < 0) { sgDatabase->AbortTransaction(); throw DATABASE_ERROR; } }
if (!sgDatabase->CommitTransaction()) { sgDatabase->AbortTransaction(); throw DATABASE_ERROR; } }
// EnqueueCSIndex() deletes all entries with the specified CS index from the
// table and enqueues them to be re-groveled, all within a single transaction.
VOID Groveler::EnqueueCSIndex(CSID *csIndex) { SGNativeTableEntry tableEntry;
SGNativeQueueEntry oldQueueEntry, newQueueEntry;
DWORD count;
LONG num;
ASSERT(csIndex != NULL); ASSERT(HasCSIndex(*csIndex));
newQueueEntry.parentID = 0; newQueueEntry.reason = 0; newQueueEntry.readyTime = GetTime() + grovelInterval; newQueueEntry.retryTime = 0; newQueueEntry.fileName = NULL;
oldQueueEntry.fileName = NULL;
count = 0;
if (sgDatabase->BeginTransaction() < 0) throw DATABASE_ERROR;
tableEntry.csIndex = *csIndex; num = sgDatabase->TableGetFirstByCSIndex(&tableEntry);
while (num > 0) { ASSERT(num == 1); count++;
oldQueueEntry.fileID = tableEntry.fileID; num = sgDatabase->QueueGetFirstByFileID(&oldQueueEntry); if (num < 0) break; ASSERT(num == 1);
if (num == 0) { newQueueEntry.fileID = tableEntry.fileID; num = sgDatabase->QueuePut(&newQueueEntry); if (num < 0) break; ASSERT(num == 1); numFilesEnqueued++; }
num = sgDatabase->TableGetNext(&tableEntry); }
if (num < 0) { sgDatabase->AbortTransaction(); throw DATABASE_ERROR; }
num = sgDatabase->TableDeleteByCSIndex(csIndex); if (num < 0) { sgDatabase->AbortTransaction(); throw DATABASE_ERROR; }
ASSERT(count == (DWORD)num);
if (!sgDatabase->CommitTransaction()) { sgDatabase->AbortTransaction(); throw DATABASE_ERROR; } }
#define GROVEL_START 2
#define NUM_EVENTS 3
// SigCheckPoint suspends the thread until the target file completes its read
// operation. If the time allotment expires before the operation completes,
// the grovelStart event is set to signal grovel() to awaken, and this method
// won't return until grovel() sets the grovelStart event. If the file's
// oplock breaks before this method returns, the file will be closed.
VOID Groveler::SigCheckPoint( FileData *target, BOOL targetRead) { HANDLE events[NUM_EVENTS];
DWORD elapsedTime, timeOut, eventNum, eventTime;
BOOL targetOplockBroke = FALSE, waitingForGrovelStart = FALSE, success;
ASSERT(target != NULL); ASSERT(target->handle != NULL); ASSERT(target->oplock .hEvent != NULL); ASSERT(target->readSynch.hEvent != NULL); ASSERT(grovelStartEvent != NULL); ASSERT(grovelStopEvent != NULL);
events[TARGET_OPLOCK_BREAK] = target->oplock .hEvent; events[TARGET_READ_DONE] = target->readSynch.hEvent; events[GROVEL_START] = grovelStartEvent;
while (TRUE) { if (waitingForGrovelStart) timeOut = INFINITE; else if (timeAllotted == INFINITE) timeOut = targetRead ? INFINITE : 0; else { elapsedTime = GetTickCount() - startAllottedTime; if (timeAllotted > elapsedTime) timeOut = targetRead ? timeAllotted - elapsedTime : 0; else { waitingForGrovelStart = TRUE; timeOut = INFINITE; grovelStatus = Grovel_pending; ASSERT(IsReset(grovelStopEvent)); success = SetEvent(grovelStopEvent); ASSERT_ERROR(success); } }
eventNum = WaitForMultipleObjects(NUM_EVENTS, events, FALSE, timeOut); eventTime = GetTickCount();
switch (eventNum) {
ASSERT(!targetOplockBroke); targetOplockBroke = TRUE; success = ResetEvent(target->oplock.hEvent); ASSERT_ERROR(success); if (!targetRead) { CLEAR_OVERLAPPED(target->oplock); CloseFile(target); } DPRINTF((_T("%s: target file %s oplock broke during hash\n"), driveLetterName, target->fileName)); break;
ASSERT(targetRead); targetRead = FALSE; success = ResetEvent(target->readSynch.hEvent); ASSERT_ERROR(success); target->stopTime = eventTime; if (targetOplockBroke) { CLEAR_OVERLAPPED(target->oplock); CloseFile(target); } break;
ASSERT(waitingForGrovelStart); waitingForGrovelStart = FALSE; success = ResetEvent(grovelStartEvent); ASSERT_ERROR(success); break;
ASSERT(!waitingForGrovelStart); if (!targetRead) { if (terminate) throw TERMINATE; if (targetOplockBroke) throw TARGET_ERROR; return; } waitingForGrovelStart = TRUE; grovelStatus = Grovel_pending; ASSERT(IsReset(grovelStopEvent)); success = SetEvent(grovelStopEvent); ASSERT_ERROR(success); break;
ASSERT_PRINTF(FALSE, (_T("eventNum=%lu\n"), eventNum)); } } }
#define GROVEL_START 4
#define NUM_EVENTS 5
// CmpCheckPoint suspends the thread until the target file, the
// match file, or both complete their read operations. If the time
// allotment expires before the operations complete, the grovelStart
// event is set to signal grovel() to awaken, and this method won't
// return until grovel() sets the grovelStart event. If either file's
// oplock breaks before this method returns, the file will be closed.
VOID Groveler::CmpCheckPoint( FileData *target, FileData *match, BOOL targetRead, BOOL matchRead) { HANDLE events[NUM_EVENTS];
DWORD elapsedTime, timeOut, eventNum, eventTime;
BOOL targetOplockBroke = FALSE, matchOplockBroke = FALSE, waitingForGrovelStart = FALSE, success;
ASSERT(target != NULL); ASSERT(match != NULL); ASSERT(target->handle != NULL); ASSERT(match ->handle != NULL); ASSERT(target->oplock .hEvent != NULL); ASSERT(match ->oplock .hEvent != NULL); ASSERT(target->readSynch.hEvent != NULL); ASSERT(match ->readSynch.hEvent != NULL); ASSERT(grovelStartEvent != NULL); ASSERT(grovelStopEvent != NULL);
events[TARGET_OPLOCK_BREAK] = target->oplock .hEvent; events[MATCH_OPLOCK_BREAK] = match ->oplock .hEvent; events[TARGET_READ_DONE] = target->readSynch.hEvent; events[MATCH_READ_DONE] = match ->readSynch.hEvent; events[GROVEL_START] = grovelStartEvent;
while (TRUE) { if (waitingForGrovelStart) timeOut = INFINITE; else if (timeAllotted == INFINITE) timeOut = targetRead || matchRead ? INFINITE : 0; else { elapsedTime = GetTickCount() - startAllottedTime; if (timeAllotted > elapsedTime) timeOut = targetRead || matchRead ? timeAllotted - elapsedTime : 0; else { waitingForGrovelStart = TRUE; timeOut = INFINITE; grovelStatus = Grovel_pending; ASSERT(IsReset(grovelStopEvent)); success = SetEvent(grovelStopEvent); ASSERT_ERROR(success); } }
eventNum = WaitForMultipleObjects(NUM_EVENTS, events, FALSE, timeOut); eventTime = GetTickCount();
switch (eventNum) {
ASSERT(!targetOplockBroke); targetOplockBroke = TRUE; success = ResetEvent(target->oplock.hEvent); ASSERT_ERROR(success); if (!targetRead) { CLEAR_OVERLAPPED(target->oplock); CloseFile(target); } DPRINTF((_T("%s: target file %s oplock broke during compare\n"), driveLetterName, target->fileName)); break;
ASSERT(!matchOplockBroke); matchOplockBroke = TRUE; success = ResetEvent(match->oplock.hEvent); ASSERT_ERROR(success); if (!matchRead) { CLEAR_OVERLAPPED(match->oplock); CloseFile(match); } DPRINTF((_T("%s: match file %s oplock broke during compare\n"), driveLetterName, match->fileName)); break;
ASSERT(targetRead); targetRead = FALSE; success = ResetEvent(target->readSynch.hEvent); ASSERT_ERROR(success); target->stopTime = eventTime; if (targetOplockBroke) { CLEAR_OVERLAPPED(target->oplock); CloseFile(target); } break;
ASSERT(matchRead); matchRead = FALSE; success = ResetEvent(match->readSynch.hEvent); ASSERT_ERROR(success); match->stopTime = eventTime; if (matchOplockBroke) { CLEAR_OVERLAPPED(match->oplock); CloseFile(match); } break;
ASSERT(waitingForGrovelStart); waitingForGrovelStart = FALSE; success = ResetEvent(grovelStartEvent); ASSERT_ERROR(success); break;
ASSERT(!waitingForGrovelStart); if (!targetRead && !matchRead) { if (terminate) throw TERMINATE; if (targetOplockBroke) throw TARGET_ERROR; if (matchOplockBroke) throw MATCH_ERROR; return; } waitingForGrovelStart = TRUE; grovelStatus = Grovel_pending; ASSERT(IsReset(grovelStopEvent)); success = SetEvent(grovelStopEvent); ASSERT_ERROR(success); break;
ASSERT_PRINTF(FALSE, (_T("eventNum=%lu\n"), eventNum)); } } }
#define MERGE_DONE 2
#define GROVEL_START 3
#define NUM_EVENTS 4
// MergeCheckPoint suspends the thread until the merge operation is completed.
// If the time allotment expires before the merge is completed, the
// grovelStart event is set to signal grovel() to awaken, and this method
// won't return until grovel() sets the grovelStart event. If either file's
// oplock breaks before the merge is completed, the abortMerge event is set.
BOOL Groveler::MergeCheckPoint( FileData *target, FileData *match, OVERLAPPED *mergeSynch, HANDLE abortMergeEvent, BOOL merge) { HANDLE events[NUM_EVENTS];
DWORD elapsedTime, timeOut, eventNum, eventTime, lastError = STATUS_TIMEOUT;
BOOL targetOplockBroke = FALSE, matchOplockBroke = FALSE, waitingForGrovelStart = FALSE, mergeSuccess = FALSE, success;
ASSERT(target != NULL); ASSERT(target->handle != NULL); ASSERT(target->oplock.hEvent != NULL);
ASSERT(match != NULL); ASSERT(match->handle != NULL); ASSERT(match->oplock.hEvent != NULL);
ASSERT(mergeSynch != NULL); ASSERT(mergeSynch->hEvent != NULL);
ASSERT(abortMergeEvent != NULL); ASSERT(grovelStartEvent != NULL); ASSERT(grovelStopEvent != NULL);
ASSERT(grovHandle != NULL);
events[TARGET_OPLOCK_BREAK] = target->oplock.hEvent; events[MATCH_OPLOCK_BREAK] = match ->oplock.hEvent; events[MERGE_DONE] = mergeSynch-> hEvent; events[GROVEL_START] = grovelStartEvent;
while (TRUE) { if (waitingForGrovelStart) timeOut = INFINITE; else if (timeAllotted == INFINITE) timeOut = merge ? INFINITE : 0; else { elapsedTime = GetTickCount() - startAllottedTime; if (timeAllotted > elapsedTime) timeOut = merge ? timeAllotted - elapsedTime : 0; else { waitingForGrovelStart = TRUE; timeOut = INFINITE; grovelStatus = Grovel_pending; ASSERT(IsReset(grovelStopEvent)); success = SetEvent(grovelStopEvent); ASSERT_ERROR(success); } }
eventNum = WaitForMultipleObjects(NUM_EVENTS, events, FALSE, timeOut); eventTime = GetTickCount();
switch (eventNum) {
ASSERT(!targetOplockBroke); targetOplockBroke = TRUE; success = ResetEvent(target->oplock.hEvent); ASSERT_ERROR(success); CLEAR_OVERLAPPED(target->oplock); if (merge) { success = SetEvent(abortMergeEvent); ASSERT_ERROR(success); } DPRINTF((_T("%s: target file %s oplock broke during merge\n"), driveLetterName, target->fileName)); break;
ASSERT(!matchOplockBroke); matchOplockBroke = TRUE; success = ResetEvent(match->oplock.hEvent); ASSERT_ERROR(success); CLEAR_OVERLAPPED(match->oplock); if (merge) { success = SetEvent(abortMergeEvent); ASSERT_ERROR(success); } DPRINTF((_T("%s: match file %s oplock broke during merge\n"), driveLetterName, match->fileName)); break;
ASSERT(merge); merge = FALSE; success = ResetEvent(mergeSynch->hEvent); ASSERT_ERROR(success); target->stopTime = eventTime; mergeSuccess = GetOverlappedResult( grovHandle, mergeSynch, &lastError, FALSE); if (!mergeSuccess) lastError = GetLastError(); else if (lastError != ERROR_SUCCESS) mergeSuccess = FALSE; else { GetCSIndex(target->handle, &target->entry.csIndex); if (!HasCSIndex(match->entry.csIndex)) GetCSIndex(match->handle, &match->entry.csIndex); } CloseFile(target); CloseFile(match); break;
ASSERT(waitingForGrovelStart); waitingForGrovelStart = FALSE; success = ResetEvent(grovelStartEvent); ASSERT_ERROR(success); break;
ASSERT(!waitingForGrovelStart); if (!merge) { success = ResetEvent(abortMergeEvent); ASSERT_ERROR(success); if (terminate) throw TERMINATE; if (!mergeSuccess) SetLastError(lastError); return mergeSuccess; } waitingForGrovelStart = TRUE; grovelStatus = Grovel_pending; ASSERT(IsReset(grovelStopEvent)); success = SetEvent(grovelStopEvent); ASSERT_ERROR(success); break;
ASSERT_PRINTF(FALSE, (_T("eventNum=%lu\n"), eventNum)); } } }
// The following seven methods (GetTarget(), CalculateSignature(),
// GetMatchList(), GetCSFile(), GetMatch(), Compare(), and Merge())
// implement the phases of the groveling process.
// Structures used by the methods.
struct MatchListEntry { DWORDLONG fileID, createTime, writeTime; };
struct CSIndexEntry { CSID csIndex; TCHAR name[1]; };
// GetTarget() is the first phase of groveling a file. It dequeues
// a file to be groveled (the "target" file), opens it, checks that
// it meets all criteria, then passes it on to the next phases.
BOOL Groveler::GetTarget( FileData *target, DWORD *queueIndex) { SGNativeTableEntry tableEntry;
SGNativeQueueEntry queueEntry, otherQueueEntry;
TFileName targetName, parentName;
DWORD lastError;
DWORDLONG currentTime, readyTime;
#if DBG
DWORD earliestTime; #endif
LONG num;
BOOL byName, success;
TPRINTF((_T("GETTarget: entered\n")));
ASSERT(target != NULL); ASSERT(target->handle == NULL); ASSERT(target->entry.fileID == 0); ASSERT(target->fileName[0] == _T('\0')); ASSERT(!HasCSIndex(target->entry.csIndex));
ASSERT(queueIndex != NULL); ASSERT(sgDatabase != NULL);
// Dequeue a file to be groveled. If the queue is empty or if no
// entry's ready time has been reached, return Grovel_ok to grovel().
queueEntry.fileName = target->fileName; num = sgDatabase->QueueGetFirst(&queueEntry); if (num < 0) throw DATABASE_ERROR; if (num == 0) { DPRINTF((_T("%s: queue is empty\n"), driveLetterName)); return FALSE; } ASSERT(num == 1);
currentTime = GetTime(); if (queueEntry.readyTime > currentTime) { #if DBG
earliestTime = (DWORD)((queueEntry.readyTime - currentTime) / 10000); DPRINTF((_T("%s: earliest queue entry ready to be groveled in %lu.%03lu sec\n"), driveLetterName, earliestTime / 1000, earliestTime % 1000)); #endif
return FALSE; }
*queueIndex = queueEntry.order; target->entry.fileID = queueEntry.fileID; target->parentID = queueEntry.parentID; target->retryTime = queueEntry.retryTime;
// Open the file by ID or name, and check by name
// that the file and its parent directory are allowed.
byName = target->entry.fileID == 0; if (byName) {
ASSERT(target->parentID != 0); ASSERT(target->fileName[0] != _T('\0'));
DPRINTF((_T("--> 0x%08lx 0x%016I64x:\"%s\"\n"), queueEntry.reason, target->parentID, target->fileName)); #endif
if (!GetFileName(volumeHandle, target->parentID, &parentName)) { DPRINTF((_T("%s: can't get name for directory 0x%016I64x\n"), driveLetterName, target->parentID)); throw TARGET_INVALID; }
targetName.assign(parentName.name); targetName.append(_T("\\")); targetName.append(target->fileName);
if (!IsAllowedName(targetName.name)) { DPRINTF((_T("%s: target file \"%s\" is disallowed\n"), driveLetterName, targetName.name)); throw TARGET_INVALID; }
targetName.assign(driveName); targetName.append(parentName.name); targetName.append(_T("\\")); targetName.append(target->fileName);
if (!OpenFileByName(target, FALSE, targetName.name)) { lastError = GetLastError(); if (lastError == ERROR_FILE_NOT_FOUND || lastError == ERROR_PATH_NOT_FOUND) { DPRINTF((_T("%s: target file \"%s\" doesn\'t exist\n"), driveLetterName, targetName.name)); throw TARGET_INVALID; } DPRINTF((_T("%s: can't open target file \"%s\": %lu\n"), driveLetterName, targetName.name, lastError)); throw TARGET_ERROR; }
// Set an oplock on the target file.
if (!SetOplock(target)) { DPRINTF((_T("%s: can't set oplock on target file \"%s\": %lu\n"), driveLetterName, targetName.name, GetLastError())); throw TARGET_ERROR; }
} else {
ASSERT(target->parentID == 0); ASSERT(target->fileName[0] == _T('\0'));
target->parentID = 0;
DPRINTF((_T("--> 0x%08lx 0x%016I64x 0x%016I64x\n"), queueEntry.reason, target->entry.fileID, target->parentID)); #endif
TPRINTF((_T("GETTarget: Opening %s:0x%016I64x by ID\n"), driveName,target->entry.fileID));
if (!OpenFileByID(target, FALSE)) { lastError = GetLastError(); if (lastError == ERROR_FILE_NOT_FOUND || lastError == ERROR_PATH_NOT_FOUND || lastError == ERROR_INVALID_PARAMETER) {
DPRINTF((_T("%s: target file 0x%016I64x doesn\'t exist: %lu\n"), driveLetterName, target->entry.fileID, lastError));
DPRINTF((_T("%s: can't open target file 0x%016I64x: %lu\n"), driveLetterName, target->entry.fileID, lastError));
// Set an oplock on the target file.
TPRINTF((_T("GETTarget: Successfully opened %s:0x%016I64x by ID\n"), driveName,target->entry.fileID));
if (!SetOplock(target)) { DPRINTF((_T("%s: can't set oplock on target file %s: %lu\n"), driveLetterName, target->fileName, GetLastError())); throw TARGET_ERROR; }
if (!GetFileName(target->handle, &targetName)) { DPRINTF((_T("%s: can't get name for target file %s\n"), driveLetterName, target->fileName)); throw TARGET_ERROR; }
if (!IsAllowedName(targetName.name)) { DPRINTF((_T("%s: target file \"%s\" is disallowed\n"), driveLetterName, targetName.name)); throw TARGET_INVALID; } }
// Get the information on the target file.
if (!GetFileInformationByHandle(target->handle, &fileInfo)) { #if DBG
if (byName) { DPRINTF((_T("%s: can't get information on target file \"%s\": %lu\n"), driveLetterName, targetName.name, GetLastError())); } else { DPRINTF((_T("%s: can't get information on target file %s: %lu\n"), driveLetterName, target->fileName, GetLastError())); } #endif
word.HighPart = fileInfo.nFileIndexHigh; word.LowPart = fileInfo.nFileIndexLow; if (byName) target->entry.fileID = word.QuadPart; else { ASSERT(target->entry.fileID == word.QuadPart); }
target->parentID = 0; // We don't need the parent ID any more.
// If the target file was opened by name, check
// if it currently has an entry in the queue by ID.
if (byName) { otherQueueEntry.fileID = target->entry.fileID; otherQueueEntry.fileName = NULL; num = sgDatabase->QueueGetFirstByFileID(&otherQueueEntry); if (num < 0) throw DATABASE_ERROR;
if (num > 0) { ASSERT(num == 1); DPRINTF((_T("%s: target file \"%s\" is already in queue as 0x%016I64x\n"), driveLetterName, targetName.name, target->entry.fileID)); target->entry.fileID = 0; // Prevent the table entry from being deleted.
// Fill in the target file's remaining information values.
word.HighPart = fileInfo.nFileSizeHigh; word.LowPart = fileInfo.nFileSizeLow; target->entry.fileSize = word.QuadPart;
target->entry.attributes = fileInfo.dwFileAttributes & FILE_ATTRIBUTE_ENCRYPTED;
word.HighPart = fileInfo.ftCreationTime.dwHighDateTime; word.LowPart = fileInfo.ftCreationTime.dwLowDateTime; target->entry.createTime = word.QuadPart;
word.HighPart = fileInfo.ftLastWriteTime.dwHighDateTime; word.LowPart = fileInfo.ftLastWriteTime.dwLowDateTime; target->entry.writeTime = word.QuadPart;
// If the target file is a reparse point, check if it
// is a SIS reparse point. If it is, get the CS index.
if ((fileInfo.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) == 0) target->entry.csIndex = nullCSIndex; else if (!GetCSIndex(target->handle, &target->entry.csIndex)) { DPRINTF((_T("%s: target file %s is a non-SIS reparse point\n"), driveLetterName, target->fileName)); throw TARGET_INVALID; }
// Check if the target file is too small or has any disallowed attributes.
if ((fileInfo.dwFileAttributes & disallowedAttributes) != 0 || fileInfo.nNumberOfLinks != 1 || target->entry.fileSize < minFileSize) { DPRINTF((_T("%s: target file \"%s\" is disallowed\n"), driveLetterName, target->fileName)); throw TARGET_INVALID; }
// If a table entry exists for the target file, check if it is
// consistent with the information we have on the file. If it is, and
// the file was opened by name, or if the queue entry was the result
// of a SIS merge, close the file and go on to grovel the next target.
tableEntry.fileID = target->entry.fileID; num = sgDatabase->TableGetFirstByFileID(&tableEntry); if (num < 0) throw DATABASE_ERROR;
if (num > 0) { ASSERT(num == 1); ASSERT(tableEntry.fileID == target->entry.fileID);
if (target->entry.fileSize == tableEntry.fileSize && target->entry.attributes == tableEntry.attributes && SameCSIndex(target->entry.csIndex, tableEntry.csIndex) && target->entry.createTime == tableEntry.createTime && target->entry.writeTime == tableEntry.writeTime) {
if (byName) { DPRINTF((_T("%s: target file \"%s\" has already been groveled\n"), driveLetterName, targetName.name)); target->entry.fileID = 0; // Prevent the table entry from being deleted.
if (queueEntry.reason == USN_REASON_BASIC_INFO_CHANGE) { DPRINTF((_T("%s: queue entry for file %s is the result of a SIS merge\n"), driveLetterName, target->fileName)); target->entry.fileID = 0; // Prevent the table entry from being deleted.
throw TARGET_INVALID; } } }
// Check if the time since the target file was last modified is too short.
// If it is, close the file and go on to grovel the next target file.
readyTime = (target->entry.createTime > target->entry.writeTime ? target->entry.createTime : target->entry.writeTime) + minFileAge; currentTime = GetTime(); if (currentTime < readyTime) throw TARGET_ERROR;
// Check if the target file is mapped by another user.
if (IsFileMapped(target)) { DPRINTF((_T("%s: target file %s is already mapped\n"), driveLetterName, target->fileName)); throw TARGET_ERROR; }
TPRINTF((_T("GETTarget: returning\n")));
return TRUE; }
// CalculateSignature() calculates the target file's signature. It reads two
// pages, 1/3 and 2/3 through the file, and calculates the signature on each
// page.
VOID Groveler::CalculateSignature(FileData *target) { DWORD lastPageSize, bytesToRead, prevBytesToRead, bytesToRequest, prevBytesToRequest = 0, bytesRead, toggle, lastError;
DWORDLONG numPages, pageNum, prevPageNum, lastPageNum, firstPageToRead, lastPageToRead;
BOOL targetReadDone = FALSE, success;
INT i, nPagesToRead;
ASSERT(target != NULL); ASSERT(target->entry.fileID != 0); ASSERT(target->handle != NULL);
target->entry.signature = 0;
if (0 == target->entry.fileSize) return;
numPages = (target->entry.fileSize - 1) / SIG_PAGE_SIZE + 1; lastPageSize = (DWORD)((target->entry.fileSize - 1) % SIG_PAGE_SIZE) + 1; lastPageNum = numPages - 1;
ASSERT(numPages > 0);
firstPageToRead = (numPages + 2) / 3 - 1; lastPageToRead = lastPageNum - firstPageToRead;
if (lastPageToRead > firstPageToRead) nPagesToRead = 2; else nPagesToRead = 1;
toggle = 0; pageNum = firstPageToRead;
// We'll read at most two pages, but make at most three passes through the loop
// since we're doing asynchronous reads.
for (i = 0; i <= nPagesToRead; ++i) {
// Unless this is the first pass through the loop,
// wait for the previous read of the target file to complete.
if (i > 0) { SigCheckPoint(target, !targetReadDone);
success = GetOverlappedResult( target->handle, &target->readSynch, &bytesRead, FALSE); if (!success) { DPRINTF((_T("%s: error getting target file %s read results: %lu\n"), driveLetterName, target->fileName, GetLastError())); throw TARGET_ERROR; }
if (bytesRead != prevBytesToRequest && bytesRead != prevBytesToRead) { DPRINTF((_T("%s: sig read only %lu of %lu bytes from target file %s\n"), driveLetterName, bytesRead, prevBytesToRequest, target->fileName)); throw TARGET_ERROR; }
if (bytesRead >= sigReportThreshold) { hashReadCount++; hashReadTime += target->stopTime - target->startTime; } }
// Unless we've read all of the pages, begin reading the next page.
if (i < nPagesToRead) { offset.QuadPart = pageNum * SIG_PAGE_SIZE; target->readSynch.Offset = offset.LowPart; target->readSynch.OffsetHigh = offset.HighPart;
bytesToRead = pageNum == lastPageNum ? lastPageSize : SIG_PAGE_SIZE; bytesToRequest = bytesToRead + sectorSize - 1; bytesToRequest -= bytesToRequest % sectorSize;
target->startTime = GetTickCount(); targetReadDone = ReadFile(target->handle, target->buffer[toggle], bytesToRequest, NULL, &target->readSynch); target->stopTime = GetTickCount(); lastError = GetLastError();
if (targetReadDone) { success = ResetEvent(target->readSynch.hEvent); ASSERT_ERROR(success); } else if (lastError != ERROR_IO_PENDING) { DPRINTF((_T("%s: error reading target file %s: %lu\n"), driveLetterName, target->fileName, lastError)); throw TARGET_ERROR; } }
// Unless this is the first pass through the loop,
// calculate the signature of the target file page just read.
if (i > 0) target->entry.signature = Checksum((VOID *)target->buffer[1-toggle], prevBytesToRead, prevPageNum * SIG_PAGE_SIZE, target->entry.signature);
prevPageNum = pageNum; prevBytesToRead = bytesToRead; prevBytesToRequest = bytesToRequest; toggle = 1-toggle; pageNum = lastPageToRead;
} }
// GetMatchList() looks for file entries in the database ("match" files)
// with the same size, signature, and attributes as the target file.
VOID Groveler::GetMatchList( FileData *target, FIFO *matchList, Table *csIndexTable) { SGNativeTableEntry tableEntry;
MatchListEntry *matchListEntry;
CSIndexEntry *csIndexEntry;
LONG num;
BOOL success;
ASSERT(target != NULL); ASSERT(target->entry.fileID != 0); ASSERT(target->entry.fileSize > 0); ASSERT(target->handle != NULL);
ASSERT(matchList != NULL); ASSERT(matchList->Number() == 0);
ASSERT(csIndexTable != NULL); ASSERT(csIndexTable->Number() == 0);
ASSERT(sgDatabase != NULL);
tableEntry.fileSize = target->entry.fileSize; tableEntry.signature = target->entry.signature; tableEntry.attributes = target->entry.attributes;
DPRINTF((_T("--> {%I64u, 0x%016I64x, 0x%lx}\n"), tableEntry.fileSize, tableEntry.signature, tableEntry.attributes)); #endif
num = sgDatabase->TableGetFirstByAttr(&tableEntry);
while (num > 0) {
ASSERT(num == 1); ASSERT(tableEntry.fileID != 0); ASSERT(tableEntry.fileSize == target->entry.fileSize); ASSERT(tableEntry.signature == target->entry.signature); ASSERT(tableEntry.attributes == target->entry.attributes);
if (!HasCSIndex(tableEntry.csIndex)) {
matchListEntry = new MatchListEntry; ASSERT(matchListEntry != NULL);
matchListEntry->fileID = tableEntry.fileID; matchListEntry->createTime = tableEntry.createTime; matchListEntry->writeTime = tableEntry.writeTime; matchList->Put((VOID *)matchListEntry);
DPRINTF((_T(" 0x%016I64x\n"), tableEntry.fileID)); #endif
} else {
csIndexEntry = (CSIndexEntry *)csIndexTable->Get ((const VOID *)&tableEntry.csIndex, sizeof(CSID));
if (csIndexEntry == NULL) {
TCHAR *csName = GetCSName(&tableEntry.csIndex); if (csName != NULL) {
// Calculate how big the name buffer is and how big the
// overall structure is. Note that the CSIndexEntry has
// space in it for one character, which we account for.
int nameBufLen = ((wcslen(csName) + 1) * sizeof(TCHAR)); //account for NULL
int bufLen = ((sizeof(CSIndexEntry)-sizeof(TCHAR)) + nameBufLen); csIndexEntry = (CSIndexEntry *)(new BYTE[bufLen]); ASSERT(csIndexEntry != NULL);
csIndexEntry->csIndex = tableEntry.csIndex;
(void)StringCbCopy( csIndexEntry->name, nameBufLen, csName);
FreeCSName(csName); csName = NULL; }
success = csIndexTable->Put((VOID *)csIndexEntry, sizeof(CSID)); ASSERT_ERROR(success); }
DPRINTF((_T(" 0x%016I64x %s\n"), match->entry.fileID, csIndexEntry->name)); #endif
num = sgDatabase->TableGetNext(&tableEntry); }
if (num < 0) throw DATABASE_ERROR; }
// GetCSFile() pops the first entry from the CS index table and opens it.
BOOL Groveler::GetCSFile( FileData *target, FileData *match, Table *csIndexTable) { CSIndexEntry *csIndexEntry;
TFileName csFileName;
DWORD lastError;
LONG num;
ASSERT(target != NULL); ASSERT(target->entry.fileID != 0); ASSERT(target->entry.fileSize > 0); ASSERT(target->handle != NULL);
ASSERT(match != NULL); ASSERT(match->entry.fileID == 0); ASSERT(match->entry.fileSize == 0); ASSERT(match->entry.signature == 0); ASSERT(match->entry.attributes == 0); ASSERT(!HasCSIndex(match->entry.csIndex)); ASSERT(match->entry.createTime == 0); ASSERT(match->entry.writeTime == 0); ASSERT(match->handle == NULL); ASSERT(match->parentID == 0); ASSERT(match->retryTime == 0); ASSERT(match->fileName[0] == _T('\0'));
ASSERT(csIndexTable != NULL); ASSERT(sgDatabase != NULL);
// Pop the first entry from the CS index table. If the entry's CS
// index is the same as the target file's, skip to the next entry.
do { csIndexEntry = (CSIndexEntry *)csIndexTable->GetFirst(); if (csIndexEntry == NULL) { match->entry.csIndex = nullCSIndex; match->fileName[0] = _T('\0'); return FALSE; }
match->entry.csIndex = csIndexEntry->csIndex; _tcscpy(match->fileName, csIndexEntry->name);
delete csIndexEntry; csIndexEntry = NULL; } while (SameCSIndex(target->entry.csIndex, match->entry.csIndex));
match->entry.fileSize = target->entry.fileSize; match->entry.signature = target->entry.signature; match->entry.attributes = target->entry.attributes;
csFileName.assign(driveName); csFileName.append(CS_DIR_PATH); csFileName.append(_T("\\")); csFileName.append(match->fileName); csFileName.append(_T(".sis"));
// Open the CS file. If the file doesn't exist, remove all entries
// from the table that point to this file. If the file can't be
// opened for any other reason, mark that the target file may
// need to be groveled again, then go on to the next match file.
DPRINTF((_T("--> %s\n"), match->fileName)); #endif
if (!OpenFileByName(match, FALSE, csFileName.name)) { lastError = GetLastError(); if (lastError == ERROR_FILE_NOT_FOUND || lastError == ERROR_PATH_NOT_FOUND) { DPRINTF((_T("%s: CS file %s doesn't exist\n"), driveLetterName, match->fileName)); throw MATCH_INVALID; } DPRINTF((_T("%s: can't open CS file %s: %lu\n"), driveLetterName, match->fileName, lastError)); throw MATCH_ERROR; }
// Get the information on the CS file. If this fails,
// close the file, mark that the target file may need to
// be groveled again, then go on to the next match file.
if (!GetFileInformationByHandle(match->handle, &fileInfo)) { DPRINTF((_T("%s: can't get information on CS file %s: %lu\n"), driveLetterName, match->fileName, GetLastError())); throw MATCH_ERROR; }
// If the CS file's information doesn't match its expected values, close the
// CS file, delete the match file entry from the table, and go on to the
// next match file. Otherwise, go on to compare the target and CS files.
fileSize.HighPart = fileInfo.nFileSizeHigh; fileSize.LowPart = fileInfo.nFileSizeLow;
if (match->entry.fileSize != fileSize.QuadPart) { DPRINTF((_T("%s: CS file %s doesn't have expected information\n"), driveLetterName, match->fileName)); throw MATCH_STALE; }
return TRUE; }
// GetMatch() pops the first entry from the match list and opens it.
BOOL Groveler::GetMatch( FileData *target, FileData *match, FIFO *matchList) { SGNativeQueueEntry queueEntry;
MatchListEntry *matchListEntry;
DWORD attributes, lastError;
ULARGE_INTEGER fileID, fileSize, createTime, writeTime;
LONG num;
ASSERT(target != NULL); ASSERT(target->entry.fileID != 0); ASSERT(target->entry.fileSize > 0); ASSERT(target->handle != NULL);
ASSERT(match != NULL); ASSERT(match->entry.fileID == 0); ASSERT(match->entry.fileSize == 0); ASSERT(match->entry.signature == 0); ASSERT(match->entry.attributes == 0); ASSERT(!HasCSIndex(match->entry.csIndex)); ASSERT(match->entry.createTime == 0); ASSERT(match->entry.writeTime == 0); ASSERT(match->handle == NULL); ASSERT(match->parentID == 0); ASSERT(match->retryTime == 0); ASSERT(match->fileName[0] == _T('\0'));
ASSERT(matchList != NULL); ASSERT(sgDatabase != NULL);
// Pop the first entry from the match list. If the entry's file ID is
// the same as the target file's, or if the entry is on the queue after
// having been enqueued by extract_log(), skip to the next entry.
while (TRUE) { matchListEntry = (MatchListEntry *)matchList->Get(); if (matchListEntry == NULL) { match->entry.fileID = 0; match->entry.createTime = 0; match->entry.writeTime = 0; return FALSE; }
match->entry.fileID = matchListEntry->fileID; match->entry.createTime = matchListEntry->createTime; match->entry.writeTime = matchListEntry->writeTime;
delete matchListEntry; matchListEntry = NULL;
ASSERT(match->entry.fileID != 0);
if (target->entry.fileID == match->entry.fileID) continue;
queueEntry.fileID = match->entry.fileID; queueEntry.fileName = NULL; num = sgDatabase->QueueGetFirstByFileID(&queueEntry); if (num < 0) throw DATABASE_ERROR; if (num > 0) { ASSERT(num == 1); if (queueEntry.reason != 0) { DPRINTF((_T("%s: match file 0x%016I64x is in the queue from USN\n"), driveLetterName, match->entry.fileID)); continue; } }
break; }
match->entry.fileSize = target->entry.fileSize; match->entry.signature = target->entry.signature; match->entry.attributes = target->entry.attributes;
// Open the match file. If it doesn't exist, remove its entry from the table.
// If the file can't be opened for any other reason, mark that the target
// file may need to be groveled again, then go on to the next match file.
DPRINTF((_T("--> 0x%016I64x\n"), match->entry.fileID)); #endif
if (!OpenFileByID(match, FALSE)) { lastError = GetLastError(); if (lastError == ERROR_FILE_NOT_FOUND || lastError == ERROR_PATH_NOT_FOUND || lastError == ERROR_INVALID_PARAMETER) { DPRINTF((_T("%s: match file 0x%016I64x doesn\'t exist: %lu\n"), driveLetterName, match->entry.fileID, lastError)); throw MATCH_INVALID; }
DPRINTF((_T("%s: can't open match file 0x%016I64x: %lu\n"), driveLetterName, match->entry.fileID, lastError)); throw MATCH_ERROR; }
// Set an oplock on the match file.
if (!SetOplock(match)) { DPRINTF((_T("%s: can't set oplock on match file %s: %lu\n"), driveLetterName, match->fileName, GetLastError())); throw MATCH_ERROR; }
// Get the information on the match file. If this fails,
// close the file, mark that the target file may need to
// be groveled again, then go on to the next match file.
if (!GetFileInformationByHandle(match->handle, &fileInfo)) { DPRINTF((_T("%s: can't get information on match file %s: %lu\n"), driveLetterName, match->fileName, GetLastError())); throw MATCH_ERROR; }
fileID.HighPart = fileInfo.nFileIndexHigh; fileID.LowPart = fileInfo.nFileIndexLow; ASSERT(match->entry.fileID == fileID.QuadPart);
fileSize.HighPart = fileInfo.nFileSizeHigh; fileSize.LowPart = fileInfo.nFileSizeLow;
attributes = fileInfo.dwFileAttributes & FILE_ATTRIBUTE_ENCRYPTED;
createTime.HighPart = fileInfo.ftCreationTime.dwHighDateTime; createTime.LowPart = fileInfo.ftCreationTime.dwLowDateTime;
writeTime.HighPart = fileInfo.ftLastWriteTime.dwHighDateTime; writeTime.LowPart = fileInfo.ftLastWriteTime.dwLowDateTime;
// If the match file's information isn't consistent with its table entry, close
// the file, enqueue it to be re-groveled, and go on to the next match file.
if (match->entry.fileSize != fileSize .QuadPart || match->entry.attributes != attributes || match->entry.createTime != createTime.QuadPart || match->entry.writeTime != writeTime .QuadPart) { DPRINTF((_T("%s: match file %s doesn't match its information\n"), driveLetterName, match->fileName)); throw MATCH_STALE; }
if ((fileInfo.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0) { if (GetCSIndex(match->handle, &match->entry.csIndex)) { DPRINTF((_T("%s: match file %s is a SIS reparse point\n"), driveLetterName, match->fileName)); throw MATCH_STALE; }
DPRINTF((_T("%s: match file %s is a non-SIS reparse point\n"), driveLetterName, match->fileName)); throw MATCH_INVALID; }
// Check if the match file is mapped by another user.
if (IsFileMapped(match)) { DPRINTF((_T("%s: match file %s is already mapped\n"), driveLetterName, match->fileName)); throw MATCH_ERROR; }
return TRUE; }
// Compare() compares the target and match files. It reads each file
// one page (64 kB) at a time and compares each pair of pages.
BOOL Groveler::Compare( FileData *target, FileData *match) { DWORD lastPageSize, bytesToRead = 0, prevBytesToRead, bytesToRequest = 0, prevBytesToRequest = 0, bytesRead, toggle, targetTime, matchTime, lastError;
DWORDLONG numPages, pageNum, prevPageNum;
BOOL targetReadDone = FALSE, matchReadDone = FALSE, filesMatch, success;
ASSERT(target != NULL); ASSERT(target->handle != NULL); ASSERT(target->entry.fileID != 0);
ASSERT(match != NULL); ASSERT(match->handle != NULL); ASSERT( match->entry.fileID != 0 && !HasCSIndex(match->entry.csIndex) || match->entry.fileID == 0 && match->fileName[0] != _T('\0') && HasCSIndex(match->entry.csIndex));
ASSERT(target->entry.fileSize == match->entry.fileSize); ASSERT(target->entry.signature == match->entry.signature); ASSERT(target->entry.attributes == match->entry.attributes);
numPages = (target->entry.fileSize - 1) / CMP_PAGE_SIZE + 1; lastPageSize = (DWORD)((target->entry.fileSize - 1) % CMP_PAGE_SIZE) + 1;
toggle = 0; filesMatch = TRUE;
for (pageNum = 0; pageNum <= numPages; pageNum++) {
// Unless this is the first pass through the loop,
// wait for the previous read of both files to complete.
if (pageNum > 0) { CmpCheckPoint(target, match, !targetReadDone, !matchReadDone);
success = GetOverlappedResult( target->handle, &target->readSynch, &bytesRead, FALSE); if (!success) { DPRINTF((_T("%s: error getting target file %s read results: %lu\n"), driveLetterName, target->fileName, GetLastError())); throw TARGET_ERROR; }
if (bytesRead != prevBytesToRequest && bytesRead != prevBytesToRead) { DPRINTF((_T("%s: cmp read only %lu of %lu bytes from target file %s\n"), driveLetterName, bytesRead, prevBytesToRequest, target->fileName)); throw TARGET_ERROR; }
success = GetOverlappedResult( match->handle, &match->readSynch, &bytesRead, FALSE); if (!success) { #if DBG
if (match->entry.fileID != 0) { DPRINTF((_T("%s: error getting match file %s read results: %lu\n"), driveLetterName, match->fileName, GetLastError())); } else { DPRINTF((_T("%s: error getting CS file %s read results: %lu\n"), driveLetterName, match->fileName, GetLastError())); } #endif
throw MATCH_ERROR; }
if (bytesRead != prevBytesToRequest && bytesRead != prevBytesToRead) { #if DBG
if (match->entry.fileID != 0) { DPRINTF((_T("%s: read only %lu of %lu bytes from match file %s\n"), driveLetterName, bytesRead, prevBytesToRequest, match->fileName)); } else { DPRINTF((_T("%s: read only %lu of %lu bytes from CS file %s\n"), driveLetterName, bytesRead, prevBytesToRequest, match->fileName)); } #endif
throw MATCH_ERROR; }
if (bytesRead >= cmpReportThreshold) { compareReadCount += 2; if (targetReadDone) { // Non-overlapped
targetTime = target->stopTime - target->startTime; matchTime = match ->stopTime - match ->startTime; compareReadTime += targetTime + matchTime; } else { // Overlapped
targetTime = target->stopTime - target->startTime; matchTime = match ->stopTime - target->startTime; compareReadTime += targetTime > matchTime ? targetTime : matchTime; } }
if (!filesMatch) break; }
// Unless all pages of the target file have already been read,
// begin reading the next page of the file.
if (pageNum < numPages) { offset.QuadPart = pageNum * CMP_PAGE_SIZE; target->readSynch.Offset = match ->readSynch.Offset = offset.LowPart; target->readSynch.OffsetHigh = match ->readSynch.OffsetHigh = offset.HighPart;
bytesToRead = pageNum < numPages-1 ? CMP_PAGE_SIZE : lastPageSize; bytesToRequest = bytesToRead + sectorSize - 1; bytesToRequest -= bytesToRequest % sectorSize;
target->startTime = GetTickCount(); targetReadDone = ReadFile(target->handle, target->buffer[toggle], bytesToRequest, NULL, &target->readSynch); target->stopTime = GetTickCount(); lastError = GetLastError();
if (targetReadDone) { success = ResetEvent(target->readSynch.hEvent); ASSERT_ERROR(success); } else if (lastError != ERROR_IO_PENDING) { DPRINTF((_T("%s: error reading target file %s: %lu\n"), driveLetterName, target->fileName, lastError)); throw TARGET_ERROR; }
match->startTime = GetTickCount(); matchReadDone = ReadFile(match->handle, match->buffer[toggle], bytesToRequest, NULL, &match->readSynch); match->stopTime = GetTickCount(); lastError = GetLastError();
if (matchReadDone) { success = ResetEvent(match->readSynch.hEvent); ASSERT_ERROR(success); } else if (lastError != ERROR_IO_PENDING) { #if DBG
if (match->entry.fileID != 0) { DPRINTF((_T("%s: error reading match file %s: %lu\n"), driveLetterName, match->fileName, lastError)); } else { DPRINTF((_T("%s: error reading CS file %s: %lu\n"), driveLetterName, match->fileName, lastError)); } #endif
throw MATCH_ERROR; } }
// Unless this is the first pass through the loop,
// compare the target and match file pages just read.
if (pageNum > 0) filesMatch = memcmp(target->buffer[1-toggle], match ->buffer[1-toggle], prevBytesToRead) == 0;
prevPageNum = pageNum; prevBytesToRead = bytesToRead; prevBytesToRequest = bytesToRequest; toggle = 1-toggle; }
if (!filesMatch) { #if DBG
if (match->entry.fileID != 0) { DPRINTF((_T("%s:1 files %s and %s failed compare (sz: 0x%x)\n"), driveLetterName, target->fileName, match->fileName, target->entry.fileSize)); } else { DPRINTF((_T("%s:2 files %s and %s failed compare (sz: 0x%x)\n"), driveLetterName, target->fileName, match->fileName, target->entry.fileSize)); } #endif
return FALSE; }
return TRUE; }
// Merge() calls the SIS filter to merge the target and match files.
BOOL Groveler::Merge( FileData *target, FileData *match, OVERLAPPED *mergeSynch, HANDLE abortMergeEvent) { _SIS_LINK_FILES sisLinkFiles;
#if DBG
TCHAR *csName; #endif
DWORD transferCount, lastError;
BOOL mergeDone, merged, success;
ASSERT(target != NULL); ASSERT(target->handle != NULL); ASSERT(target->entry.fileID != 0);
ASSERT(match != NULL); ASSERT(match->handle != NULL); ASSERT( match->entry.fileID != 0 && !HasCSIndex(match->entry.csIndex) || match->entry.fileID == 0 && match->fileName[0] != _T('\0') && HasCSIndex(match->entry.csIndex));
ASSERT(mergeSynch != NULL); ASSERT(mergeSynch->Internal == 0); ASSERT(mergeSynch->InternalHigh == 0); ASSERT(mergeSynch->Offset == 0); ASSERT(mergeSynch->OffsetHigh == 0); ASSERT(mergeSynch->hEvent != NULL); ASSERT(IsReset(mergeSynch->hEvent));
ASSERT(abortMergeEvent != NULL); ASSERT(IsReset(abortMergeEvent));
ASSERT(target->entry.fileSize == match->entry.fileSize); ASSERT(target->entry.signature == match->entry.signature); ASSERT(target->entry.attributes == match->entry.attributes);
ASSERT(grovHandle != NULL);
// Set up to merge the files.
if (match->entry.fileID != 0) { sisLinkFiles.operation = SIS_LINK_FILES_OP_MERGE; sisLinkFiles.u.Merge.file1 = target->handle; sisLinkFiles.u.Merge.file2 = match ->handle; sisLinkFiles.u.Merge.abortEvent = NULL; // Should be abortMergeEvent
} else { sisLinkFiles.operation = SIS_LINK_FILES_OP_MERGE_CS; sisLinkFiles.u.MergeWithCS.file1 = target->handle; sisLinkFiles.u.MergeWithCS.abortEvent = NULL; // Should be abortMergeEvent
sisLinkFiles.u.MergeWithCS.CSid = match->entry.csIndex; }
// Call the SIS filter to merge the files.
target->startTime = GetTickCount(); mergeDone = DeviceIoControl( grovHandle, FSCTL_SIS_LINK_FILES, (VOID *)&sisLinkFiles, sizeof(_SIS_LINK_FILES), NULL, 0, NULL, mergeSynch); target->stopTime = GetTickCount();
// If the merge completed successfully before the call returned, reset
// the merge done event, get the new CS indices, and close the files.
if (mergeDone) { success = ResetEvent(mergeSynch->hEvent); ASSERT_ERROR(success); mergeTime += target->stopTime - target->startTime; GetCSIndex(target->handle, &target->entry.csIndex); if (!HasCSIndex(match->entry.csIndex)) GetCSIndex(match->handle, &match->entry.csIndex); CloseFile(target); CloseFile(match); }
// If the merge failed, close the files and return an error status.
else { lastError = GetLastError(); if (lastError != ERROR_IO_PENDING) { CloseFile(target); CloseFile(match);
#if DBG
if (match->entry.fileID != 0) { DPRINTF((_T("%s:3 files %s and %s failed merge: %lu\n"), driveLetterName, target->fileName, match->fileName, lastError)); } else { DPRINTF((_T("%s:4 files %s and %s failed merge: %lu\n"), driveLetterName, target->fileName, match->fileName, lastError)); } #endif
return FALSE; }
// If the merge is in progress, wait for it to complete.
// (MergeCheckPoint() will get the new CS indices and close the files.
else { merged = MergeCheckPoint(target, match, mergeSynch, abortMergeEvent, !mergeDone);
if (!merged) { #if DBG
lastError = GetLastError(); if (match->entry.fileID != 0) { DPRINTF((_T("%s: error getting merge results of files %s and %s: %lu\n"), driveLetterName, target->fileName, match->fileName, lastError)); } else { DPRINTF((_T("%s: error getting merge results of files %s and %s: %lu\n"), driveLetterName, target->fileName, match->fileName, lastError)); } #endif
return FALSE; } } }
// If the merge succeeded, analyze and report the results.
mergeTime += target->stopTime - target->startTime; merged = HasCSIndex (target->entry.csIndex) && SameCSIndex(target->entry.csIndex, match->entry.csIndex);
#if DBG
csName = GetCSName(&target->entry.csIndex);
if (merged) { if (match->entry.fileID != 0) { DPRINTF((_T("%s: files %s and %s merged: CS index is %s\n"), driveLetterName, target->fileName, match->fileName, csName != NULL ? csName : _T("..."))); } else { DPRINTF((_T("%s: files %s and %s merged\n"), driveLetterName, target->fileName, match->fileName)); } } else { if (match->entry.fileID != 0) { DPRINTF((_T("%s:5 files %s and %s merged, but CS indices don't match\n"), driveLetterName, target->fileName, match->fileName)); } else { DPRINTF((_T("%s:6 files %s and %s merged, but CS indices don't match\n"), driveLetterName, target->fileName, match->fileName)); } }
if (csName != NULL) { FreeCSName(csName); csName = NULL; }
return merged; }
// Worker() performs the groveling processing.
VOID Groveler::Worker() { FileData target, match;
SGNativeQueueEntry queueEntry;
FIFO *matchList = NULL;
Table *csIndexTable = NULL;
OVERLAPPED mergeSynch = { 0, 0, 0, 0, NULL };
HANDLE abortMergeEvent = NULL;
TCHAR *csName;
DatabaseActionList actionList[MAX_ACTIONS];
BYTE *buffer1 = NULL, *buffer2 = NULL, *buffer3 = NULL, *buffer4 = NULL;
DWORD queueIndex, bufferSize, numCompares, numMatches, numActions;
#if DBG
DWORD enqueueTime; #endif
LONG num;
BOOL needToRetry, hashed, gotMatch, filesMatch, merged, success;
CLEAR_FILE(target); CLEAR_OVERLAPPED(target.oplock); target.handle = NULL;
CLEAR_FILE(match); CLEAR_OVERLAPPED(match.oplock); match.handle = NULL;
// Create the events.
try {
if ((target.oplock .hEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL || (match .oplock .hEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL || (target.readSynch.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL || (match .readSynch.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL || (mergeSynch .hEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL || (abortMergeEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL) { DPRINTF((_T("%s: unable to create events: %lu\n"), driveLetterName, GetLastError())); throw INITIALIZE_ERROR; }
// Allocate and align the file buffers.
bufferSize = SIG_PAGE_SIZE > CMP_PAGE_SIZE ? SIG_PAGE_SIZE : CMP_PAGE_SIZE + sectorSize; buffer1 = new BYTE[bufferSize]; ASSERT(buffer1 != NULL); buffer2 = new BYTE[bufferSize]; ASSERT(buffer2 != NULL); buffer3 = new BYTE[bufferSize]; ASSERT(buffer3 != NULL); buffer4 = new BYTE[bufferSize]; ASSERT(buffer4 != NULL);
ASSERT(inUseFileID1 == NULL); ASSERT(inUseFileID2 == NULL);
inUseFileID1 = &target.entry.fileID; inUseFileID2 = &match .entry.fileID;
target.buffer[0] = buffer1 + sectorSize - (PtrToUlong(buffer1) % sectorSize); target.buffer[1] = buffer2 + sectorSize - (PtrToUlong(buffer2) % sectorSize); match .buffer[0] = buffer3 + sectorSize - (PtrToUlong(buffer3) % sectorSize); match .buffer[1] = buffer4 + sectorSize - (PtrToUlong(buffer4) % sectorSize);
// Signal to grovel() that this thread is alive,
// then wait for it to signal to start.
grovelStatus = Grovel_ok; ASSERT(IsReset(grovelStopEvent)); success = SetEvent(grovelStopEvent); ASSERT_ERROR(success);
WaitForEvent(grovelStartEvent); if (terminate) throw TERMINATE;
#ifdef _CRTDBG
_CrtMemState s[2], sdiff; int stateIndex = 0;
_CrtMemCheckpoint(&s[stateIndex]); stateIndex = 1; #endif
// The main loop.
while (TRUE) { try {
#ifdef _CRTDBG
if (_CrtMemDifference(&sdiff, &s[stateIndex^1], &s[stateIndex])) _CrtMemDumpStatistics(&sdiff); stateIndex ^= 1; #endif
hashed = FALSE; numCompares = 0; numMatches = 0; merged = FALSE; needToRetry = FALSE;
// Get a target file. abortGroveling is set when scan_volume is attempting to
// sync up with this thread. We stop here, a safe place to let scan_volume
// replace the database.
if (abortGroveling || !GetTarget(&target, &queueIndex)) { CLEAR_FILE(target);
grovelStatus = Grovel_ok; ASSERT(IsReset(grovelStopEvent)); success = SetEvent(grovelStopEvent); ASSERT_ERROR(success);
WaitForEvent(grovelStartEvent); if (terminate) throw TERMINATE;
continue; }
// Calculate the target file's signature.
hashed = TRUE;
// Get a list of match files.
ASSERT(matchList == NULL); ASSERT(csIndexTable == NULL);
matchList = new FIFO(); ASSERT(matchList != NULL);
csIndexTable = new Table(); ASSERT(csIndexTable != NULL);
GetMatchList(&target, matchList, csIndexTable);
// Compare the target file to each match file until a matching file is found
// or all comparisons fail. Try the SIS files first, then the regular files.
while (TRUE) { try {
gotMatch = FALSE;
if (!gotMatch && csIndexTable != NULL) { gotMatch = GetCSFile(&target, &match, csIndexTable); if (!gotMatch) { delete csIndexTable; csIndexTable = NULL; } }
if (!gotMatch && matchList != NULL) { gotMatch = GetMatch(&target, &match, matchList); if (!gotMatch) { delete matchList; matchList = NULL; } }
// After comparing the target file to every file on both
// lists, close the target file and update the database,
// then go on to process the next target file.
if (!gotMatch) { CloseFile(&target);
numActions = 3; actionList[0].type = TABLE_DELETE_BY_FILE_ID; actionList[0].u.fileID = target.entry.fileID; actionList[1].type = TABLE_PUT; actionList[1].u.tableEntry = &target.entry; actionList[2].type = QUEUE_DELETE; actionList[2].u.queueIndex = queueIndex;
if (needToRetry) { queueEntry.fileID = target.entry.fileID; queueEntry.parentID = target.parentID; queueEntry.reason = 0; queueEntry.fileName = NULL;
queueEntry.retryTime = target.retryTime * 2; // Exponential back-off
if (queueEntry.retryTime < grovelInterval) queueEntry.retryTime = grovelInterval; queueEntry.readyTime = GetTime() + queueEntry.retryTime;
numActions = 4; actionList[3].type = QUEUE_PUT; actionList[3].u.queueEntry = &queueEntry; }
#if DBG
if (!HasCSIndex(target.entry.csIndex)) { TRACE_PRINTF(TC_groveler, 4, (_T("%s: adding file {%s, %I64u, 0x%016I64x} to table\n"), driveLetterName, target.fileName, target.entry.fileSize, target.entry.signature)); } else { csName = GetCSName(&target.entry.csIndex); TRACE_PRINTF(TC_groveler, 4, (_T("%s: adding file {%s, %I64u, 0x%016I64x, %s} to table\n"), driveLetterName, target.fileName, target.entry.fileSize, target.entry.signature, csName != NULL ? csName : _T("..."))); if (csName != NULL) { FreeCSName(csName); csName = NULL; } }
if (needToRetry) { enqueueTime = (DWORD)(queueEntry.retryTime / 10000); DPRINTF((_T(" Re-enqueuing target file %s to be groveled in %lu.%03lu sec\n"), target.fileName, enqueueTime / 1000, enqueueTime % 1000)); }
DoTransaction(numActions, actionList); break; }
// Compare the target file with this match file.
ASSERT(!inCompare); inCompare = TRUE; filesMatch = Compare(&target, &match); inCompare = FALSE;
if (!filesMatch) { CloseFile(&match); CLEAR_FILE(match); continue; }
// If the target and match files are identical, go on to merge them.
merged = Merge(&target, &match, &mergeSynch, abortMergeEvent);
// Update the database as follows:
// - Update the target file's table entry.
// - If the merge succeeded and the match file was a regular file,
// update the match file's table entry.
// - If the merge failed, re-enqueue the target file to be groveled again.
numActions = 3; actionList[0].type = TABLE_DELETE_BY_FILE_ID; actionList[0].u.fileID = target.entry.fileID; actionList[1].type = TABLE_PUT; actionList[1].u.tableEntry = &target.entry; actionList[2].type = QUEUE_DELETE; actionList[2].u.queueIndex = queueIndex;
if (merged) { if (match.entry.fileID != 0) { actionList[numActions ].type = TABLE_DELETE_BY_FILE_ID; actionList[numActions++].u.fileID = match.entry.fileID; actionList[numActions ].type = TABLE_PUT; actionList[numActions++].u.tableEntry = &match.entry; } } else { queueEntry.fileID = target.entry.fileID; queueEntry.parentID = target.parentID; queueEntry.reason = 0; queueEntry.fileName = NULL;
queueEntry.retryTime = target.retryTime * 2; // Exponential back-off
if (queueEntry.retryTime < grovelInterval) queueEntry.retryTime = grovelInterval; queueEntry.readyTime = GetTime() + queueEntry.retryTime;
actionList[numActions ].type = QUEUE_PUT; actionList[numActions++].u.queueEntry = &queueEntry; }
#if DBG
if (!HasCSIndex(target.entry.csIndex)) { TPRINTF((_T("%s: adding file {%s, %I64u, 0x%016I64x} to table\n"), driveLetterName, target.fileName, target.entry.fileSize, target.entry.signature)); } else { csName = GetCSName(&target.entry.csIndex); TPRINTF((_T("%s: adding file {%s, %I64u, 0x%016I64x, %s} to table\n"), driveLetterName, target.fileName, target.entry.fileSize, target.entry.signature, csName != NULL ? csName : _T("..."))); if (csName != NULL) { FreeCSName(csName); csName = NULL; } }
if (!merged) { enqueueTime = (DWORD)(queueEntry.retryTime / 10000); DPRINTF((_T(" Re-enqueuing target file %s to be groveled in %lu.%03lu sec\n"), target.fileName, enqueueTime / 1000, enqueueTime % 1000)); }
DoTransaction(numActions, actionList); break; }
// Match exceptions
catch (MatchException matchException) {
inCompare = FALSE;
switch (matchException) {
// MATCH_INVALID: the match file doesn't exist or is disallowed. Close the file
// and remove its entry from the table, then go on to try the next match file.
if (match.entry.fileID != 0) { ASSERT(!HasCSIndex(match.entry.csIndex)); num = sgDatabase->TableDeleteByFileID(match.entry.fileID); if (num < 0) throw DATABASE_ERROR; ASSERT(num == 1); } else { ASSERT(HasCSIndex(match.entry.csIndex)); num = sgDatabase->TableDeleteByCSIndex(&match.entry.csIndex); if (num < 0) throw DATABASE_ERROR; ASSERT(num > 0); }
CLEAR_FILE(match); break;
// MATCH_ERROR: an error occured while opening or reading the match
// file. Close the file and mark that the target file may need to be
// groveled again, then go on to try the next match file.
CloseFile(&match); CLEAR_FILE(match); needToRetry = TRUE; break;
// MATCH_STALE: the match file table entry is invalid for some reason.
// Close the file, remove its entry from the table, enqueue
// it to be re-groveled, then go on to the next match file.
if (match.entry.fileID != 0) {
queueEntry.fileID = match.entry.fileID; queueEntry.parentID = match.parentID; queueEntry.reason = 0; queueEntry.readyTime = GetTime() + grovelInterval; queueEntry.retryTime = 0; queueEntry.fileName = NULL;
numActions = 2; actionList[0].type = TABLE_DELETE_BY_FILE_ID; actionList[0].u.fileID = match.entry.fileID; actionList[1].type = QUEUE_PUT; actionList[1].u.queueEntry = &queueEntry; #if DBG
enqueueTime = (DWORD)(grovelInterval / 10000); DPRINTF((_T(" Enqueuing match file %s to be groveled in %lu.%03lu sec\n"), match.fileName, enqueueTime / 1000, enqueueTime % 1000)); #endif
DoTransaction(numActions, actionList);
} else {
ASSERT(HasCSIndex(match.entry.csIndex)); EnqueueCSIndex(&match.entry.csIndex);
CLEAR_FILE(match); break;
ASSERT_PRINTF(FALSE, (_T("matchException=%lu\n"), matchException)); } } } }
// Target exceptions
catch (TargetException targetException) {
inCompare = FALSE;
DPRINTF((_T("WORKER: Handling TargetException %d, status=%d\n"), targetException,GetLastError()));
switch (targetException) {
// TARGET_INVALID: the target file is invalid for some reason: it doesn't
// exist, it is disallowed properties, it is in the queue by both file
// name and file ID, or it was in the queue by file name and has already
// been groveled. Close the files, remove the target file's entry from
// the table, then go on to grovel the next target file.
CloseFile(&target); CloseFile(&match);
if (matchList != NULL) { delete matchList; matchList = NULL; }
if (csIndexTable != NULL) { delete csIndexTable; csIndexTable = NULL; }
numActions = 1; actionList[0].type = QUEUE_DELETE; actionList[0].u.queueIndex = queueIndex;
if (target.entry.fileID != 0) { numActions = 2; actionList[1].type = TABLE_DELETE_BY_FILE_ID; actionList[1].u.fileID = target.entry.fileID; }
DoTransaction(numActions, actionList); break;
// An error occured while opening or reading the target file. Close
// the files and re-enqueue the target file to be groveled again.
ASSERT(target.entry.fileID != 0 || target.fileName[0] != _T('\0'));
CloseFile(&target); CloseFile(&match);
queueEntry.fileID = target.entry.fileID; queueEntry.parentID = target.parentID; queueEntry.reason = 0; queueEntry.fileName = target.entry.fileID == 0 ? target.fileName : NULL;
queueEntry.retryTime = target.retryTime * 2; // Exponential back-off
if (queueEntry.retryTime < grovelInterval) queueEntry.retryTime = grovelInterval; queueEntry.readyTime = GetTime() + queueEntry.retryTime;
actionList[0].type = QUEUE_DELETE; actionList[0].u.queueIndex = queueIndex; actionList[1].type = QUEUE_PUT; actionList[1].u.queueEntry = &queueEntry;
#if DBG
enqueueTime = (DWORD)(queueEntry.retryTime / 10000); if (target.entry.fileID != 0) { DPRINTF((_T(" Re-enqueuing target file %s to be groveled in %lu.%03lu sec\n"), target.fileName, enqueueTime / 1000, enqueueTime % 1000)); } else { DPRINTF((_T(" Re-enqueuing target file %s to be groveled in %lu.%03lu sec\n"), target.fileName, enqueueTime / 1000, enqueueTime % 1000)); } #endif
DoTransaction(2, actionList); break;
ASSERT_PRINTF(FALSE, (_T("targetException=%lu\n"), targetException)); } }
// Do some clean-up.
ASSERT(target.handle == NULL); ASSERT(match .handle == NULL);
if (matchList != NULL) { delete matchList; matchList = NULL; }
if (csIndexTable != NULL) { delete csIndexTable; csIndexTable = NULL; }
// Update the activity counters for this target file,
// then go on to process the next file.
if (hashed) { hashCount++; hashBytes += target.entry.fileSize; }
compareCount += numCompares; compareBytes += numCompares * target.entry.fileSize;
matchCount += numMatches; matchBytes += numMatches * target.entry.fileSize;
if (merged) { mergeCount++; mergeBytes += target.entry.fileSize; }
CLEAR_FILE(target); CLEAR_FILE(match);
CLEAR_OVERLAPPED(mergeSynch); } }
// Terminal exceptions
catch (TerminalException terminalException) { switch (terminalException) {
// DATABASE_ERROR: an error occured in the database. Return an error status.
case DATABASE_ERROR: break;
// MEMORY_ERROR: unable to allocate memory. Return an error status.
case MEMORY_ERROR: DPRINTF((_T("%s: Unable to allocate memory\n"), driveLetterName)); break;
// TERMINATE: grovel() signaled for this thread to terminate.
case TERMINATE: break;
default: ASSERT_PRINTF(FALSE, (_T("terminalException=%lu\n"), terminalException)); } }
// Close the files and clean up.
CloseFile(&target); CloseFile(&target);
CLEAR_FILE(target); CLEAR_FILE(match);
if (matchList != NULL) { delete matchList; matchList = NULL; }
if (csIndexTable != NULL) { delete csIndexTable; csIndexTable = NULL; }
if (target.oplock.hEvent != NULL) { success = CloseHandle(target.oplock.hEvent); ASSERT_ERROR(success); target.oplock.hEvent = NULL; } if (match.oplock.hEvent != NULL) { success = CloseHandle(match.oplock.hEvent); ASSERT_ERROR(success); match.oplock.hEvent = NULL; } if (target.readSynch.hEvent != NULL) { success = CloseHandle(target.readSynch.hEvent); ASSERT_ERROR(success); target.readSynch.hEvent = NULL; } if (match.readSynch.hEvent != NULL) { success = CloseHandle(match.readSynch.hEvent); ASSERT_ERROR(success); match.readSynch.hEvent = NULL; } if (mergeSynch.hEvent != NULL) { success = CloseHandle(mergeSynch.hEvent); ASSERT_ERROR(success); mergeSynch.hEvent = NULL; } if (abortMergeEvent != NULL) { success = CloseHandle(abortMergeEvent); ASSERT_ERROR(success); abortMergeEvent = NULL; }
if (buffer1 != NULL) { delete [] buffer1; buffer1 = NULL; } if (buffer2 != NULL) { delete [] buffer2; buffer2 = NULL; } if (buffer3 != NULL) { delete [] buffer3; buffer3 = NULL; } if (buffer4 != NULL) { delete [] buffer4; buffer4 = NULL; }
inUseFileID1 = NULL; inUseFileID2 = NULL;
// Signal grovel() that this thread is terminating by
// setting the grovelStop event with an error status.
grovelThread = NULL;
grovelStatus = Grovel_error; ASSERT(IsReset(grovelStopEvent)); success = SetEvent(grovelStopEvent); ASSERT_ERROR(success); }
/*****************************************************************************/ /******************* Groveler class static private methods *******************/ /*****************************************************************************/
// WorkerThread() runs in its own thread.
// It calls Worker() to perform the groveling processing.
DWORD Groveler::WorkerThread(VOID *groveler) { ((Groveler *)groveler)->Worker(); return 0; // Dummy return value
/*****************************************************************************/ /*********************** Groveler class public methods ***********************/ /*****************************************************************************/
BOOL Groveler::set_log_drive(const _TCHAR *drive_name) { return SGDatabase::set_log_drive(drive_name); }
// is_sis_installed tests whether the SIS filter is
// installed on a volume by calling SIS copyfile.
BOOL Groveler::is_sis_installed(const _TCHAR *drive_name) { HANDLE volHandle;
DWORD transferCount, lastError;
BOOL success;
if (volHandle == INVALID_HANDLE_VALUE) return FALSE;
copyFile.SourceFileNameLength = 0; copyFile.DestinationFileNameLength = 0; copyFile.Flags = COPYFILE_SIS_REPLACE;
success = DeviceIoControl( volHandle, FSCTL_SIS_COPYFILE, (VOID *)©File, sizeof(SI_COPYFILE), NULL, 0, &transferCount, NULL);
lastError = GetLastError(); ASSERT(!success);
success = CloseHandle(volHandle); ASSERT_ERROR(success);
switch (lastError) {
case ERROR_INVALID_PARAMETER: return TRUE; //sis is installed on this volume
ASSERT_PRINTF(FALSE, (_T("lastError=%lu\n"), lastError)); }
return FALSE; // Dummy return value
// The groveler constructor creates and initializes all class variables.
Groveler::Groveler() { volumeHandle = NULL; grovHandle = NULL;
sgDatabase = NULL; driveName = NULL; driveLetterName = NULL; databaseName = NULL;
numDisallowedIDs = 0; numDisallowedNames = 0; disallowedIDs = NULL; disallowedNames = NULL;
grovelStartEvent = NULL; grovelStopEvent = NULL; grovelThread = NULL;
inUseFileID1 = NULL; inUseFileID2 = NULL;
abortGroveling = FALSE; inCompare = FALSE; inScan = FALSE; terminate = TRUE;
// The groveler destructor destroys all class variables.
Groveler::~Groveler() { // If the volume is open, call close() to close it.
ASSERT(volumeHandle == NULL); ASSERT(grovHandle == NULL);
ASSERT(sgDatabase == NULL); ASSERT(driveName == NULL); ASSERT(driveLetterName == NULL); ASSERT(databaseName == NULL);
ASSERT(numDisallowedIDs == 0); ASSERT(numDisallowedNames == 0); ASSERT(disallowedIDs == NULL); ASSERT(disallowedNames == NULL);
ASSERT(grovelStartEvent == NULL); ASSERT(grovelStopEvent == NULL); ASSERT(grovelThread == NULL);
ASSERT(inUseFileID1 == NULL); ASSERT(inUseFileID2 == NULL);
ASSERT(terminate); ASSERT(!inCompare); ASSERT(!inScan);
// Open() opens the specified volume.
GrovelStatus Groveler::open( IN const TCHAR *drive_name, IN const TCHAR *drive_letterName, IN BOOL is_log_drive, IN DOUBLE read_report_discard_threshold, IN DWORD min_file_size, IN DWORD min_file_age, IN BOOL allow_compressed_files, IN BOOL allow_encrypted_files, IN BOOL allow_hidden_files, IN BOOL allow_offline_files, IN BOOL allow_temporary_files, IN int previousGrovelAllPathsState, IN DWORD num_excluded_paths, IN const TCHAR **excluded_paths, IN DWORD base_regrovel_interval, IN DWORD max_regrovel_interval) { DWORD threadID;
TCHAR fileStr[MAX_PATH+1];
TCHAR listValue[MAX_PATH+1], *strPtr;
USN_JOURNAL_DATA usnJournalData;
SGNativeListEntry listEntry;
DWORD sectorsPerCluster, numberOfFreeClusters, totalNumberOfClusters, bufferSize, strLen, i;
GrovelStatus openStatus;
LONG num;
BOOL success;
ASSERT(volumeHandle == NULL); ASSERT(grovHandle == NULL);
ASSERT(sgDatabase == NULL); ASSERT(databaseName == NULL);
ASSERT(numDisallowedIDs == 0); ASSERT(numDisallowedNames == 0); ASSERT(disallowedIDs == NULL); ASSERT(disallowedNames == NULL);
ASSERT(grovelStartEvent == NULL); ASSERT(grovelStopEvent == NULL); ASSERT(grovelThread == NULL);
ASSERT(inUseFileID1 == NULL); ASSERT(inUseFileID2 == NULL);
ASSERT(terminate); ASSERT(!inCompare); ASSERT(!inScan);
#if 0
while (!IsDebuggerPresent()) Sleep(2000);
DebugBreak(); #endif
// Make sure that the filter has run phase 2 initialization if this is
// a SIS enabled volume.
// Get drive name without trailing slash
int nBufSize = wcslen(drive_name) + 1; //in chars
driveName = new TCHAR[nBufSize];
(void)StringCchCopy(driveName, nBufSize, drive_name); TrimTrailingChar(driveName,L'\\');
// Get drive Letter name without trailing "\" or ":"
nBufSize = wcslen(drive_letterName) + 1; //in chars
driveLetterName = new TCHAR[nBufSize];
(void)StringCchCopy(driveLetterName, nBufSize, drive_letterName);
TrimTrailingChar(driveLetterName,L'\\'); TrimTrailingChar(driveLetterName,L':');
#ifdef _CRTDBG
// Send all reports to STDOUT
_CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE ); _CrtSetReportFile( _CRT_WARN, _CRTDBG_FILE_STDERR ); _CrtSetReportMode( _CRT_ERROR, _CRTDBG_MODE_FILE ); _CrtSetReportFile( _CRT_ERROR, _CRTDBG_FILE_STDERR ); _CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE ); _CrtSetReportFile( _CRT_ASSERT, _CRTDBG_FILE_STDERR ); #endif
// Open the volume and the GrovelerFile. The SIS fsctl
// functions require that we pass in a handle to GrovelerFile as a means
// of proving our "privilege". An access violation is returned if we don't.
if (volumeHandle == INVALID_HANDLE_VALUE) { volumeHandle = NULL; DPRINTF((_T("%s: Can't open volume \"%s\" %lu\n"), driveLetterName, driveName, GetLastError())); close(); return Grovel_error; }
(void)StringCbCopy(fileStr,sizeof(fileStr),driveName); (void)StringCbCat(fileStr,sizeof(fileStr),CS_DIR_PATH); (void)StringCbCat(fileStr,sizeof(fileStr),_T("\\")); (void)StringCbCat(fileStr,sizeof(fileStr),GROVELER_FILE_NAME);
grovHandle = CreateFile( fileStr, GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_OVERLAPPED, NULL); if (grovHandle == INVALID_HANDLE_VALUE) { grovHandle = NULL; DPRINTF((_T("%s: can't open groveler file \"%s\": %lu\n"), driveLetterName, fileStr, GetLastError())); close(); return Grovel_error; }
(void)StringCbCopy(fileStr,sizeof(fileStr),driveName); (void)StringCbCat(fileStr,sizeof(fileStr),_T("\\"));
success = GetDiskFreeSpace(fileStr, §orsPerCluster, §orSize, &numberOfFreeClusters, &totalNumberOfClusters); ASSERT(success);
ASSERT(SIG_PAGE_SIZE % sectorSize == 0); ASSERT(CMP_PAGE_SIZE % sectorSize == 0);
sigReportThreshold = (DWORD)((DOUBLE)SIG_PAGE_SIZE * read_report_discard_threshold); cmpReportThreshold = (DWORD)((DOUBLE)CMP_PAGE_SIZE * read_report_discard_threshold);
// Open this volume's database. If this fails, create a
// new database. If that fails, return an error status.
ASSERT(databaseName == NULL); strLen = _tcslen(driveName) + _tcslen(CS_DIR_PATH) + _tcslen(DATABASE_FILE_NAME) + 1; // +1 for '\'
databaseName = new TCHAR[strLen+1]; ASSERT(databaseName != NULL);
(void)StringCchPrintf(databaseName, (strLen+1), _T("%s%s\\%s"), driveName, CS_DIR_PATH, DATABASE_FILE_NAME);
sgDatabase = new SGDatabase(); if (sgDatabase == NULL) { DPRINTF((_T("%s: can't create database object\n"), driveLetterName)); close(); return Grovel_error; }
openStatus = Grovel_ok;
if (get_usn_log_info(&usnJournalData) != Grovel_ok) { DPRINTF((_T("%s: can't initialize usnID\n"), driveLetterName)); } else { usnID = usnJournalData.UsnJournalID;
if (!sgDatabase->Open(driveLetterName,databaseName, is_log_drive)) { DPRINTF((_T("%s: can't open database \"%s\"\n"), driveLetterName, databaseName)); } else { listValue[0] = _T('\0'); listEntry.name = LAST_USN_NAME; listEntry.value = listValue; if (sgDatabase->ListRead(&listEntry) <= 0 || _stscanf(listValue, _T("%I64x"), &lastUSN) != 1 || lastUSN == UNINITIALIZED_USN) { DPRINTF((_T("%s: can't get last USN value\n"), driveLetterName)); } else { DWORDLONG storedUsnID;
listValue[0] = _T('\0'); listEntry.name = USN_ID_NAME; listEntry.value = listValue; if (sgDatabase->ListRead(&listEntry) <= 0 || _stscanf(listValue, _T("%I64x"), &storedUsnID) != 1 || storedUsnID != usnID) { DPRINTF((_T("%s: can't get USN ID value from database\n"), driveLetterName)); } else { //
// See if any uncommited operations
num = sgDatabase->StackCount(); if (0 == num) {
// See if the RIS state changed. If not, we can
// continue. If so, reset grovel state so we will
// rescan the volume.
if (GrovelAllPaths == previousGrovelAllPathsState) {
goto OpenedDatabase;
} else {
DPRINTF((L"GrovelAllPaths state changed, rescanning the volume\n")); } } } } } }
// Set abortGroveling to block the worker thread, and set lastUSN to block extract_log
// until scan_volume starts.
abortGroveling = TRUE; lastUSN = usnID = UNINITIALIZED_USN; openStatus = Grovel_new;
// Create the disallowed directories list.
if (num_excluded_paths == 0) { disallowedIDs = NULL; disallowedNames = NULL; } else { disallowedIDs = new DWORDLONG[num_excluded_paths]; disallowedNames = new TCHAR * [num_excluded_paths]; ASSERT(disallowedIDs != NULL); ASSERT(disallowedNames != NULL);
for (i = 0; i < num_excluded_paths; i++) { ASSERT(excluded_paths[i] != NULL);
if (excluded_paths[i][0] == _T('\\')) { strLen = _tcslen(excluded_paths[i]); while (strLen > 1 && excluded_paths[i][strLen-1] == _T('\\')) strLen--;
strPtr = new TCHAR[strLen+1]; ASSERT(strPtr != NULL); disallowedNames[numDisallowedNames++] = strPtr;
_tcsncpy(strPtr, excluded_paths[i], strLen); strPtr[strLen] = _T('\0');
fileID = GetFileID(drive_name,strPtr); if (fileID != 0) { disallowedIDs[numDisallowedIDs++] = fileID; } TPRINTF((L"%s: Exclude path=\"%s\", ID=%04I64x.%012I64x\n", driveLetterName, strPtr, ((fileID >> 48) & 0xffff), (fileID & 0xffffffffffff))); } }
if (numDisallowedNames == 0) { delete disallowedNames; disallowedNames = NULL; } else if (numDisallowedNames > 1) qsort( disallowedNames, numDisallowedNames, sizeof(TCHAR *), qsStringCompare);
if (numDisallowedIDs == 0) { delete disallowedIDs; disallowedIDs = NULL; } else if (numDisallowedIDs > 1) qsort( disallowedIDs, numDisallowedIDs, sizeof(DWORDLONG), FileIDCompare); }
// Set the remaining class values.
// minFileAge is expressed in 10^-7 seconds, min_file_age in milliseconds.
minFileSize = min_file_size > MIN_FILE_SIZE ? min_file_size : MIN_FILE_SIZE; minFileAge = min_file_age * 10000; grovelInterval = minFileAge > MIN_GROVEL_INTERVAL ? minFileAge : MIN_GROVEL_INTERVAL;
disallowedAttributes = FILE_ATTRIBUTE_DIRECTORY | (allow_compressed_files ? 0 : FILE_ATTRIBUTE_COMPRESSED) | (allow_encrypted_files ? 0 : FILE_ATTRIBUTE_ENCRYPTED) | (allow_hidden_files ? 0 : FILE_ATTRIBUTE_HIDDEN) | (allow_offline_files ? 0 : FILE_ATTRIBUTE_OFFLINE) | (allow_temporary_files ? 0 : FILE_ATTRIBUTE_TEMPORARY);
// Create the events used to handshake with the worker thread.
if ((grovelStartEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL || (grovelStopEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL) { DPRINTF((_T("%s: unable to create events: %lu\n"), driveLetterName, GetLastError())); close(); return Grovel_error; }
// Create the worker thread, then wait for it to set
// the grovelStop event to announce its existence.
terminate = FALSE;
grovelThread = CreateThread( NULL, 0, WorkerThread, (VOID *)this, 0, &threadID); if (grovelThread == NULL) { DPRINTF((_T("%s: can't create the worker thread: %lu\n"), driveLetterName, GetLastError())); close(); return Grovel_error; }
if (grovelStatus == Grovel_error) { grovelThread = NULL; close(); return Grovel_error; } ASSERT(grovelStatus == Grovel_ok);
return openStatus; }
GrovelStatus Groveler::close() { DWORD i;
LONG num;
BOOL success;
// If active, signal the worker thread to stop,
// then wait for it to acknowledge.
terminate = TRUE;
if (grovelThread != NULL) { ASSERT(grovelStartEvent != NULL); ASSERT(grovelStopEvent != NULL);
timeAllotted = INFINITE; do { ASSERT(IsReset(grovelStartEvent)); success = SetEvent(grovelStartEvent); ASSERT_ERROR(success); WaitForEvent(grovelStopEvent); } while (grovelStatus != Grovel_error);
grovelThread = NULL; }
inCompare = FALSE; inScan = FALSE; usnID = UNINITIALIZED_USN;
ASSERT(inUseFileID1 == NULL); ASSERT(inUseFileID2 == NULL);
// Close the events.
if (grovelStartEvent != NULL) { success = CloseHandle(grovelStartEvent); ASSERT_ERROR(success); grovelStartEvent = NULL; }
if (grovelStopEvent != NULL) { success = CloseHandle(grovelStopEvent); ASSERT_ERROR(success); grovelStopEvent = NULL; }
// If the volume or GrovelerFile are open, close them.
if (volumeHandle != NULL) { success = CloseHandle(volumeHandle); ASSERT_ERROR(success); volumeHandle = NULL; }
if (grovHandle != NULL) { success = CloseHandle(grovHandle); ASSERT_ERROR(success); grovHandle = NULL; }
// Close this volume's database.
if (sgDatabase != NULL) { delete sgDatabase; sgDatabase = NULL; }
if (databaseName != NULL) { delete[] databaseName; databaseName = NULL; }
// Deallocate the disallowed directory lists.
if (numDisallowedNames == 0) { ASSERT(disallowedNames == NULL); } else { for (i = 0; i < numDisallowedNames; i++) delete (disallowedNames[i]); delete disallowedNames; disallowedNames = NULL; numDisallowedNames = 0; }
if (numDisallowedIDs == 0) { ASSERT(disallowedIDs == NULL); } else { delete disallowedIDs; disallowedIDs = NULL; numDisallowedIDs = 0; }
if (driveName != NULL) { delete[] driveName; driveName = NULL; }
if (driveLetterName != NULL) { delete[] driveLetterName; driveLetterName = NULL; }
return Grovel_ok; }
// grovel() is the front-end method for controlling the groveling
// process on each NTFS volume. The groveling process itself is
// implemented in the Worker() method. grovel() starts the groveling
// process by setting the grovelStart event. Worker() signals back to
// grovel() that it is finished or has used up its time allocation by
// setting the grovelStop event, which causes grovel() to return.
GrovelStatus Groveler::grovel( IN DWORD time_allotted,
OUT DWORD *hash_read_ops, OUT DWORD *hash_read_time, OUT DWORD *count_of_files_hashed, OUT DWORDLONG *bytes_of_files_hashed,
OUT DWORD *compare_read_ops, OUT DWORD *compare_read_time, OUT DWORD *count_of_files_compared, OUT DWORDLONG *bytes_of_files_compared,
OUT DWORD *count_of_files_matching, OUT DWORDLONG *bytes_of_files_matching,
OUT DWORD *merge_time, OUT DWORD *count_of_files_merged, OUT DWORDLONG *bytes_of_files_merged,
OUT DWORD *count_of_files_enqueued, OUT DWORD *count_of_files_dequeued) { DWORD timeConsumed;
BOOL success;
ASSERT(volumeHandle != NULL);
hashCount = 0; hashReadCount = 0; hashReadTime = 0; hashBytes = 0;
compareCount = 0; compareReadCount = 0; compareReadTime = 0; compareBytes = 0;
matchCount = 0; matchBytes = 0;
mergeCount = 0; mergeTime = 0; mergeBytes = 0;
numFilesEnqueued = 0; numFilesDequeued = 0;
timeAllotted = INFINITE; #else
timeAllotted = time_allotted; #endif
startAllottedTime = GetTickCount();
ASSERT(IsReset(grovelStartEvent)); success = SetEvent(grovelStartEvent); ASSERT_ERROR(success); WaitForEvent(grovelStopEvent); timeConsumed = GetTickCount() - startAllottedTime;
// Return the performance statistics.
if (count_of_files_hashed != NULL) *count_of_files_hashed = hashCount; if (hash_read_ops != NULL) *hash_read_ops = hashReadCount; if (hash_read_time != NULL) *hash_read_time = hashReadTime; if (bytes_of_files_hashed != NULL) *bytes_of_files_hashed = hashBytes;
if (count_of_files_compared != NULL) *count_of_files_compared = compareCount; if (compare_read_ops != NULL) *compare_read_ops = compareReadCount; if (compare_read_time != NULL) *compare_read_time = compareReadTime; if (bytes_of_files_compared != NULL) *bytes_of_files_compared = compareBytes;
if (count_of_files_matching != NULL) *count_of_files_matching = matchCount; if (bytes_of_files_matching != NULL) *bytes_of_files_matching = matchBytes;
if (count_of_files_merged != NULL) *count_of_files_merged = mergeCount; if (merge_time != NULL) *merge_time = mergeTime; if (bytes_of_files_merged != NULL) *bytes_of_files_merged = mergeBytes;
if (count_of_files_enqueued != NULL) *count_of_files_enqueued = numFilesEnqueued; if (count_of_files_dequeued != NULL) *count_of_files_dequeued = numFilesDequeued;
TRACE_PRINTF(TC_groveler, 2, (_T("%s Count Reads Bytes Time (sec)\n"), driveLetterName)); TRACE_PRINTF(TC_groveler, 2, (_T(" Hashings: %7lu %7lu %7I64u %4lu.%03lu Time: %5lu.%03lu sec\n"), hashCount, hashReadCount, hashBytes, hashReadTime / 1000, hashReadTime % 1000, timeConsumed / 1000, timeConsumed % 1000)); TRACE_PRINTF(TC_groveler, 2, (_T(" Compares: %7lu %7lu %7I64u %4lu.%03lu Enqueues: %lu\n"), compareCount, compareReadCount, compareBytes, compareReadTime / 1000, compareReadTime % 1000, numFilesEnqueued)); TRACE_PRINTF(TC_groveler, 2, (_T(" Matches: %7lu %7I64u Dequeues: %lu\n"), matchCount, matchBytes, numFilesDequeued)); TRACE_PRINTF(TC_groveler, 2, (_T(" Merges: %7lu %7I64u %4lu.%03lu\n"), mergeCount, mergeBytes, mergeTime / 1000, mergeTime % 1000));
return grovelStatus; }
// count_of_files_in_queue() returns a count of the number
// of files in this volume's queue waiting to be groveled.
DWORD Groveler::count_of_files_in_queue() const { LONG numEntries;
ASSERT(volumeHandle != NULL); ASSERT(sgDatabase != NULL);
numEntries = sgDatabase->QueueCount(); if (numEntries < 0) return 0;
TPRINTF((_T("%s: count_of_files_in_queue=%ld\n"), driveLetterName, numEntries));
return (DWORD)numEntries; }
// count_of_files_to_compare() returns 1 if two files are ready to be
// compared or are in the process of being compared, and 0 otherwise.
DWORD Groveler::count_of_files_to_compare() const { DWORD numCompareFiles;
ASSERT(volumeHandle != NULL); ASSERT(sgDatabase != NULL);
numCompareFiles = inCompare ? 1 : 0;
TPRINTF((_T("%s: count_of_files_to_compare=%lu\n"), driveLetterName, numCompareFiles));
return numCompareFiles; }
// time_to_first_file_ready() returns the time in milliseconds until
// the first entry in the queue is ready to be groveled. If the queue
// is empty, it returns INFINITE. If an error occurs, it returns 0.
DWORD Groveler::time_to_first_file_ready() const { SGNativeQueueEntry queueEntry;
DWORDLONG currentTime;
DWORD earliestTime;
LONG num;
ASSERT(volumeHandle != NULL); ASSERT(sgDatabase != NULL);
queueEntry.fileName = NULL; num = sgDatabase->QueueGetFirst(&queueEntry); if (num < 0) return 0;
if (num == 0) earliestTime = INFINITE; else { ASSERT(num == 1); currentTime = GetTime(); earliestTime = queueEntry.readyTime > currentTime ? (DWORD)((queueEntry.readyTime - currentTime) / 10000) : 0; }
TPRINTF((_T("%s: time_to_first_file_ready=%lu.%03lu\n"), driveLetterName, earliestTime / 1000, earliestTime % 1000));
return earliestTime; }