Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4143 lines
132 KiB

  1. /*++
  2. Copyright (c) 1998-1999 Microsoft Corporation
  3. Module Name:
  4. groveler.cpp
  5. Abstract:
  6. SIS Groveler file groveling functions
  7. Authors:
  8. Cedric Krumbein, 1998
  9. Environment:
  10. User Mode
  11. Revision History:
  12. --*/
  13. #include "all.hxx"
  14. #define CLEAR_FILE(FILE) ( \
  15. (FILE).entry.fileID = 0, \
  16. (FILE).entry.fileSize = 0, \
  17. (FILE).entry.signature = 0, \
  18. (FILE).entry.attributes = 0, \
  19. (FILE).entry.csIndex = nullCSIndex, \
  20. (FILE).entry.createTime = 0, \
  21. (FILE).entry.writeTime = 0, \
  22. (FILE).parentID = 0, \
  23. (FILE).retryTime = 0, \
  24. (FILE).startTime = 0, \
  25. (FILE).stopTime = 0, \
  26. (FILE).readSynch.Internal = 0, \
  27. (FILE).readSynch.InternalHigh = 0, \
  28. (FILE).readSynch.Offset = 0, \
  29. (FILE).readSynch.OffsetHigh = 0, \
  30. (FILE).fileName[0] = _T('\0') )
  31. #define CLEAR_OVERLAPPED(OVERLAPPED) ( \
  32. (OVERLAPPED).Internal = 0, \
  33. (OVERLAPPED).InternalHigh = 0, \
  34. (OVERLAPPED).Offset = 0, \
  35. (OVERLAPPED).OffsetHigh = 0 )
  36. // Is CS index set?
  37. static const CSID nullCSIndex = {
  38. 0, 0, 0,
  39. _T('\0'), _T('\0'), _T('\0'), _T('\0'),
  40. _T('\0'), _T('\0'), _T('\0'), _T('\0')
  41. };
  42. #define HasCSIndex(CSID) \
  43. (memcmp(&(CSID), &nullCSIndex, sizeof(CSID)) != 0)
  44. #define SameCSIndex(CSID1, CSID2) \
  45. (memcmp(&(CSID1), &(CSID2), sizeof(CSID)) == 0)
  46. // Exceptions
  47. enum TerminalException {
  48. INITIALIZE_ERROR,
  49. DATABASE_ERROR,
  50. MEMORY_ERROR,
  51. TERMINATE
  52. };
  53. enum TargetException {
  54. TARGET_INVALID,
  55. TARGET_ERROR
  56. };
  57. enum MatchException {
  58. MATCH_INVALID,
  59. MATCH_ERROR,
  60. MATCH_STALE
  61. };
  62. /*****************************************************************************/
  63. /************************** Miscellaneous functions **************************/
  64. /*****************************************************************************/
  65. // NewHandler() is installed by _set_new_handler() to throw an
  66. // exception when the system can't allocate any more memory.
  67. static INT __cdecl NewHandler(size_t size)
  68. {
  69. throw MEMORY_ERROR;
  70. return 0; // Dummy return
  71. }
  72. /*****************************************************************************/
  73. // FileIDCompare() is used by qsort() and bsearch()
  74. // to sort or look up a matching file ID.
  75. static INT __cdecl FileIDCompare(
  76. const VOID *id1,
  77. const VOID *id2)
  78. {
  79. DWORDLONG fileID1 = *(DWORDLONG *)id1,
  80. fileID2 = *(DWORDLONG *)id2;
  81. return fileID1 < fileID2 ? -1
  82. : fileID1 > fileID2 ? +1
  83. : 0;
  84. }
  85. /*****************************************************************************/
  86. // qsStringCompare() is used by qsort() to sort an array of character strings.
  87. static INT __cdecl qsStringCompare(
  88. const VOID *str1,
  89. const VOID *str2)
  90. {
  91. return _tcsicmp(*(TCHAR **)str1, *(TCHAR **)str2);
  92. }
  93. /*****************************************************************************/
  94. // bsStringCompare() is used by bsearch() look up a matching character string.
  95. // It is assumed that str1 is the path name string we are searching for and
  96. // str2 is the excluded path name string in the excluded paths list. Note
  97. // that if the excluded path is \a\b, then we return a match on anything that
  98. // is in this directory or subdirectory, as well as an exact match.
  99. // E.g.: \a\b\c\d.foo & \a\b\foo will match, but \a\b.foo will not.
  100. static INT __cdecl bsStringCompare(
  101. const VOID *str1,
  102. const VOID *str2)
  103. {
  104. TCHAR *s1 = *(TCHAR **) str1;
  105. TCHAR *s2 = *(TCHAR **) str2;
  106. // str2 is the excluded name. Make sure we catch subdirectories under it,
  107. // but make sure we don't confuse \a\bx with \a\b
  108. size_t l = _tcslen(s2);
  109. INT r = _tcsnicmp(s1, s2, l);
  110. if (0 == r)
  111. if (_tcslen(s1) > l && _T('\\') != s1[l])
  112. r = 1;
  113. return r;
  114. }
  115. /*****************************************************************************/
  116. /********************** Groveler class private methods ***********************/
  117. /*****************************************************************************/
  118. // IsAllowedID() returns FALSE if the directory or file ID
  119. // is on the list of disallowed IDs, and TRUE otherwise.
  120. BOOL Groveler::IsAllowedID(DWORDLONG fileID) const
  121. {
  122. BOOL result;
  123. ASSERT(fileID != 0);
  124. if (numDisallowedIDs == 0) {
  125. ASSERT(disallowedIDs == NULL);
  126. result = TRUE;
  127. } else {
  128. ASSERT(disallowedIDs != NULL);
  129. result = bsearch( &fileID,
  130. disallowedIDs,
  131. numDisallowedIDs,
  132. sizeof(DWORDLONG),
  133. FileIDCompare) == NULL;
  134. }
  135. DPRINTF((L"IsAllowedID: %04I64x.%012I64x, (%s)\n",
  136. ((fileID >> 48) & 0xffff),
  137. (fileID & 0xffffffffffff),
  138. result ? L"yes" : L"no"));
  139. return result;
  140. }
  141. /*****************************************************************************/
  142. // IsAllowedName() returns FALSE if the directory or file name
  143. // is on the list of disallowed names, and TRUE otherwise.
  144. BOOL Groveler::IsAllowedName(TCHAR *fileName) const
  145. {
  146. BOOL result;
  147. ASSERT(fileName != NULL);
  148. if (numDisallowedNames == 0) {
  149. ASSERT(disallowedNames == NULL);
  150. result = TRUE;
  151. } else {
  152. ASSERT(disallowedNames != NULL);
  153. result = (bsearch( &fileName,
  154. disallowedNames,
  155. numDisallowedNames,
  156. sizeof(TCHAR *),
  157. bsStringCompare) == NULL);
  158. }
  159. //
  160. // The name wasn't on the disallowed list, see if the GrovelAllPaths option
  161. // is set. If not then it must be in the RIS valid path, check for it.
  162. //
  163. if (result && !GrovelAllPaths && (RISPath != NULL)) {
  164. //
  165. // Do not do this cheeck if this is the root directory of the volume
  166. //
  167. if (wcscmp(fileName,L"\\") != 0) {
  168. result = (bsStringCompare(&fileName,&RISPath) == 0);
  169. }
  170. }
  171. DPRINTF((L"IsAllowedName: \"%s\", (%s)\n", fileName, result ? L"yes" : L"no"));
  172. return result;
  173. }
  174. /*****************************************************************************/
  175. // WaitForEvent suspends the thread until the specified event is set.
  176. VOID Groveler::WaitForEvent(HANDLE event)
  177. {
  178. DWORD eventNum;
  179. BOOL success;
  180. ASSERT(event != NULL);
  181. eventNum = WaitForSingleObject(event, INFINITE);
  182. ASSERT_ERROR(eventNum == WAIT_OBJECT_0);
  183. success = ResetEvent(event);
  184. ASSERT_ERROR(success);
  185. }
  186. /*****************************************************************************/
  187. // OpenFileByID() opens the file with the given volumeHandle and fileID.
  188. BOOL Groveler::OpenFileByID(
  189. FileData *file,
  190. BOOL writeEnable)
  191. {
  192. UNICODE_STRING fileIDString;
  193. OBJECT_ATTRIBUTES objectAttributes;
  194. IO_STATUS_BLOCK ioStatusBlock;
  195. NTSTATUS ntStatus;
  196. ASSERT(volumeHandle != NULL);
  197. ASSERT(file != NULL);
  198. ASSERT(file->entry.fileID != 0);
  199. ASSERT(file->handle == NULL);
  200. fileIDString.Length = sizeof(DWORDLONG);
  201. fileIDString.MaximumLength = sizeof(DWORDLONG);
  202. fileIDString.Buffer = (WCHAR *)&file->entry.fileID;
  203. objectAttributes.Length = sizeof(OBJECT_ATTRIBUTES);
  204. objectAttributes.RootDirectory = volumeHandle;
  205. objectAttributes.ObjectName = &fileIDString;
  206. objectAttributes.Attributes = OBJ_CASE_INSENSITIVE;
  207. objectAttributes.SecurityDescriptor = NULL;
  208. objectAttributes.SecurityQualityOfService = NULL;
  209. ntStatus = NtCreateFile(
  210. &file->handle,
  211. GENERIC_READ |
  212. (writeEnable ? GENERIC_WRITE : 0),
  213. &objectAttributes,
  214. &ioStatusBlock,
  215. NULL,
  216. 0,
  217. FILE_SHARE_READ |
  218. FILE_SHARE_DELETE |
  219. (writeEnable ? FILE_SHARE_WRITE : 0),
  220. FILE_OPEN,
  221. FILE_OPEN_BY_FILE_ID |
  222. FILE_OPEN_REPARSE_POINT |
  223. FILE_NO_INTERMEDIATE_BUFFERING,
  224. NULL,
  225. 0);
  226. if (ntStatus == STATUS_SUCCESS) {
  227. DWORD bytesReturned;
  228. MARK_HANDLE_INFO markHandleInfo =
  229. {USN_SOURCE_DATA_MANAGEMENT, volumeHandle, 0};
  230. // Mark the handle so the usn entry for the merge operation (if completed)
  231. // can be detected and skipped.
  232. BOOL rc = DeviceIoControl(
  233. file->handle,
  234. FSCTL_MARK_HANDLE,
  235. &markHandleInfo,
  236. sizeof markHandleInfo,
  237. NULL,
  238. 0,
  239. &bytesReturned,
  240. NULL);
  241. if (!rc) {
  242. DPRINTF((_T("%s: FSCTL_MARK_HANDLE failed, %lu\n"),
  243. driveLetterName, GetLastError()));
  244. }
  245. #if DBG
  246. // Get the file name
  247. {
  248. HRESULT r;
  249. ASSERT(file->fileName[0] == _T('\0'));
  250. struct TFileName2 {
  251. ULONG nameLen;
  252. TCHAR name[MAX_PATH+1];
  253. } tFileName[1];
  254. ntStatus = NtQueryInformationFile(
  255. file->handle,
  256. &ioStatusBlock,
  257. tFileName,
  258. sizeof tFileName,
  259. FileNameInformation);
  260. if (ntStatus == STATUS_SUCCESS) {
  261. r = StringCbCopyN(file->fileName,
  262. sizeof(file->fileName),
  263. tFileName->name,
  264. tFileName->nameLen);
  265. ASSERT(r == S_OK);
  266. } else {
  267. r = StringCbCopy(file->fileName,
  268. sizeof(file->fileName),
  269. L"<unresolved name>");
  270. ASSERT(r == S_OK);
  271. }
  272. }
  273. #endif
  274. return TRUE;
  275. }
  276. ASSERT(file->handle == NULL);
  277. SetLastError(RtlNtStatusToDosError(ntStatus));
  278. return FALSE;
  279. }
  280. /*****************************************************************************/
  281. // OpenFileByName() opens the file with the given fileName.
  282. BOOL Groveler::OpenFileByName(
  283. FileData *file,
  284. BOOL writeEnable,
  285. TCHAR *fileName)
  286. {
  287. UNICODE_STRING dosPathName,
  288. ntPathName;
  289. OBJECT_ATTRIBUTES objectAttributes;
  290. IO_STATUS_BLOCK ioStatusBlock;
  291. NTSTATUS ntStatus;
  292. ASSERT(file != NULL);
  293. ASSERT(file->handle == NULL);
  294. if (fileName == NULL)
  295. fileName = file->fileName;
  296. ASSERT(fileName[0] != _T('\0'));
  297. #ifdef _UNICODE
  298. dosPathName.Buffer = fileName;
  299. #else
  300. if (!RtlCreateUnicodeStringFromAsciiz(&dosPathName, fileName)) {
  301. ntStatus = STATUS_NO_MEMORY;
  302. goto Error;
  303. }
  304. #endif
  305. if (RtlDosPathNameToNtPathName_U(dosPathName.Buffer, &ntPathName, NULL, NULL)) {
  306. objectAttributes.Length = sizeof(OBJECT_ATTRIBUTES);
  307. objectAttributes.RootDirectory = NULL;
  308. objectAttributes.ObjectName = &ntPathName;
  309. objectAttributes.Attributes = OBJ_CASE_INSENSITIVE;
  310. objectAttributes.SecurityDescriptor = NULL;
  311. objectAttributes.SecurityQualityOfService = NULL;
  312. ntStatus = NtCreateFile(
  313. &file->handle,
  314. GENERIC_READ |
  315. (writeEnable ? GENERIC_WRITE : 0),
  316. &objectAttributes,
  317. &ioStatusBlock,
  318. NULL,
  319. 0,
  320. FILE_SHARE_READ |
  321. FILE_SHARE_DELETE |
  322. (writeEnable ? FILE_SHARE_WRITE : 0),
  323. FILE_OPEN,
  324. FILE_OPEN_REPARSE_POINT |
  325. FILE_NO_INTERMEDIATE_BUFFERING,
  326. NULL,
  327. 0);
  328. RtlFreeUnicodeString(&ntPathName);
  329. } else {
  330. ntStatus = STATUS_NO_MEMORY;
  331. }
  332. #ifndef _UNICODE
  333. RtlFreeUnicodeString(&dosPathName);
  334. #endif
  335. if (ntStatus == STATUS_SUCCESS) {
  336. DWORD bytesReturned;
  337. MARK_HANDLE_INFO markHandleInfo =
  338. {USN_SOURCE_DATA_MANAGEMENT, volumeHandle, 0};
  339. // Mark the handle so the usn entry for the merge operation (if completed)
  340. // can be detected and skipped.
  341. BOOL rc = DeviceIoControl(
  342. file->handle,
  343. FSCTL_MARK_HANDLE,
  344. &markHandleInfo,
  345. sizeof markHandleInfo,
  346. NULL,
  347. 0,
  348. &bytesReturned,
  349. NULL);
  350. if (!rc) {
  351. DPRINTF((_T("%s: FSCTL_MARK_HANDLE failed, %lu\n"),
  352. driveLetterName, GetLastError()));
  353. }
  354. return TRUE;
  355. }
  356. ASSERT(file->handle == NULL);
  357. SetLastError(RtlNtStatusToDosError(ntStatus));
  358. return FALSE;
  359. }
  360. /*****************************************************************************/
  361. // IsFileMapped() checks if the file is mapped by another user.
  362. BOOL Groveler::IsFileMapped(FileData *file)
  363. {
  364. _SIS_LINK_FILES sisLinkFiles;
  365. DWORD transferCount;
  366. BOOL success;
  367. ASSERT(grovHandle != NULL);
  368. ASSERT(file->handle != NULL);
  369. sisLinkFiles.operation = SIS_LINK_FILES_OP_VERIFY_NO_MAP;
  370. sisLinkFiles.u.VerifyNoMap.file = file->handle;
  371. success = DeviceIoControl(
  372. grovHandle,
  373. FSCTL_SIS_LINK_FILES,
  374. (VOID *)&sisLinkFiles,
  375. sizeof(_SIS_LINK_FILES),
  376. NULL,
  377. 0,
  378. &transferCount,
  379. NULL);
  380. if (success)
  381. return FALSE;
  382. ASSERT(GetLastError() == ERROR_SHARING_VIOLATION);
  383. return TRUE;
  384. }
  385. /*****************************************************************************/
  386. // SetOplock() sets an oplock on the open file.
  387. BOOL Groveler::SetOplock(FileData *file)
  388. {
  389. BOOL success;
  390. ASSERT(file != NULL);
  391. ASSERT(file->handle != NULL);
  392. ASSERT(file->oplock.Internal == 0);
  393. ASSERT(file->oplock.InternalHigh == 0);
  394. ASSERT(file->oplock.Offset == 0);
  395. ASSERT(file->oplock.OffsetHigh == 0);
  396. ASSERT(file->oplock.hEvent != NULL);
  397. ASSERT(IsReset(file->oplock.hEvent));
  398. success = DeviceIoControl(
  399. file->handle,
  400. FSCTL_REQUEST_BATCH_OPLOCK,
  401. NULL,
  402. 0,
  403. NULL,
  404. 0,
  405. NULL,
  406. &file->oplock);
  407. if (success) {
  408. ASSERT(IsSet(file->oplock.hEvent));
  409. success = ResetEvent(file->oplock.hEvent);
  410. ASSERT_ERROR(success);
  411. CLEAR_OVERLAPPED(file->oplock);
  412. SetLastError(0);
  413. return FALSE;
  414. }
  415. if (GetLastError() != ERROR_IO_PENDING) {
  416. ASSERT(IsReset(file->oplock.hEvent));
  417. CLEAR_OVERLAPPED(file->oplock);
  418. return FALSE;
  419. }
  420. return TRUE;
  421. }
  422. /*****************************************************************************/
  423. // CloseFile() closes the file if it is still open. If an oplock was
  424. // set on the file, it then waits for and resets the oplock break
  425. // event triggered by the closing of the file or by an outside access.
  426. VOID Groveler::CloseFile(FileData *file)
  427. {
  428. BOOL success;
  429. ASSERT(file != NULL);
  430. ASSERT(file->oplock.hEvent != NULL);
  431. if (file->handle == NULL) {
  432. ASSERT(file->oplock.Internal == 0);
  433. ASSERT(file->oplock.InternalHigh == 0);
  434. ASSERT(file->oplock.Offset == 0);
  435. ASSERT(file->oplock.OffsetHigh == 0);
  436. ASSERT(IsReset(file->oplock.hEvent));
  437. } else {
  438. success = CloseHandle(file->handle);
  439. ASSERT_ERROR(success);
  440. file->handle = NULL;
  441. if (file->oplock.Internal != 0
  442. || file->oplock.InternalHigh != 0
  443. || file->oplock.Offset != 0
  444. || file->oplock.OffsetHigh != 0) {
  445. WaitForEvent(file->oplock.hEvent);
  446. CLEAR_OVERLAPPED(file->oplock);
  447. }
  448. }
  449. }
  450. /*****************************************************************************/
  451. // CreateDatabase() creates the database. Initialize it such that if
  452. // extract_log is called before scan_volume, it will return Grovel_overrun
  453. // without attempting any USN extraction. Also, the first time scan_volume
  454. // is called (with or without start_over), it will know to initialize
  455. // lastUSN and do a full volume scan.
  456. BOOL Groveler::CreateDatabase(void)
  457. {
  458. USN_JOURNAL_DATA usnJournalData;
  459. TFileName tempName;
  460. TCHAR listValue[17];
  461. DWORDLONG rootID;
  462. SGNativeListEntry listEntry;
  463. LONG num;
  464. tempName.assign(driveName);
  465. tempName.append(_T("\\"));
  466. rootID = GetFileID(NULL,tempName.name);
  467. if (rootID == 0) {
  468. DPRINTF((_T("%s: CreateDatabase: can't get root directory ID\n"),
  469. driveLetterName));
  470. goto Error;
  471. }
  472. if (get_usn_log_info(&usnJournalData) != Grovel_ok) {
  473. DWORD lastError = GetLastError();
  474. if (lastError == ERROR_JOURNAL_NOT_ACTIVE) {
  475. DPRINTF((_T("%s: CreateDatabase: journal not active\n"), driveLetterName));
  476. if ((set_usn_log_size(65536) != ERROR_SUCCESS) ||
  477. get_usn_log_info(&usnJournalData) != ERROR_SUCCESS) {
  478. DPRINTF((_T("%s: CreateDatabase: can't initialize USN journal\n"),
  479. driveLetterName));
  480. goto Error;
  481. }
  482. } else {
  483. DPRINTF((_T("%s: CreateDatabase: can't initialize last USN\n"),
  484. driveLetterName));
  485. goto Error;
  486. }
  487. }
  488. lastUSN = usnJournalData.NextUsn;
  489. usnID = usnJournalData.UsnJournalID;
  490. sgDatabase->Close();
  491. if (!sgDatabase->Create(databaseName)) {
  492. DPRINTF((_T("%s: CreateDatabase: can't create database \"%s\"\n"),
  493. driveLetterName, databaseName));
  494. goto Error;
  495. }
  496. num = sgDatabase->StackPut(rootID, FALSE);
  497. if (num < 0)
  498. goto Error;
  499. ASSERT(num == 1);
  500. // Write UNINITIALIZED_USN into the database now, to be replaced when scan_volume
  501. // is complete. This will be a flag to indicate if the database contents are valid.
  502. (void)StringCbPrintf(listValue, sizeof(listValue), _T("%016I64x"), UNINITIALIZED_USN);
  503. listEntry.name = LAST_USN_NAME;
  504. listEntry.value = listValue;
  505. num = sgDatabase->ListWrite(&listEntry);
  506. if (num < 0)
  507. goto Error;
  508. ASSERT(num == 1);
  509. (void)StringCbPrintf(listValue, sizeof(listValue), _T("%016I64x"), usnID);
  510. listEntry.name = USN_ID_NAME;
  511. listEntry.value = listValue;
  512. num = sgDatabase->ListWrite(&listEntry);
  513. if (num < 0)
  514. goto Error;
  515. ASSERT(num == 1);
  516. return TRUE;
  517. Error:
  518. lastUSN = usnID = UNINITIALIZED_USN;
  519. return FALSE;
  520. }
  521. /*****************************************************************************/
  522. #define MAX_ACTIONS 5
  523. // DoTransaction() performs the specified operations
  524. // on the database within a single transaction.
  525. VOID Groveler::DoTransaction(
  526. DWORD numActions,
  527. DatabaseActionList *actionList)
  528. {
  529. DatabaseActionList *action;
  530. DWORD i;
  531. LONG num;
  532. ASSERT(sgDatabase != NULL);
  533. ASSERT(actionList != NULL);
  534. if (sgDatabase->BeginTransaction() < 0)
  535. throw DATABASE_ERROR;
  536. for (i = 0; i < numActions; i++) {
  537. action = &actionList[i];
  538. switch(action->type) {
  539. case TABLE_PUT:
  540. ASSERT(action->u.tableEntry != NULL);
  541. num = sgDatabase->TablePut(action->u.tableEntry);
  542. ASSERT(num < 0 || num == 1);
  543. break;
  544. case TABLE_DELETE_BY_FILE_ID:
  545. ASSERT(action->u.fileID != 0);
  546. num = sgDatabase->TableDeleteByFileID(action->u.fileID);
  547. break;
  548. case QUEUE_PUT:
  549. ASSERT(action->u.queueEntry != NULL);
  550. num = sgDatabase->QueuePut(action->u.queueEntry);
  551. ASSERT(num < 0 || num == 1);
  552. if (num == 1)
  553. numFilesEnqueued++;
  554. break;
  555. case QUEUE_DELETE:
  556. ASSERT(action->u.queueIndex != 0);
  557. num = sgDatabase->QueueDelete(action->u.queueIndex);
  558. ASSERT(num <= 1);
  559. if (num == 1)
  560. numFilesDequeued++;
  561. #if DBG
  562. else
  563. DPRINTF((_T("DoTransaction: QUEUE_DELETE unsuccessful (%d)"), num));
  564. #endif
  565. break;
  566. default:
  567. ASSERT_PRINTF(FALSE, (_T("type=%lu\n"), action->type));
  568. }
  569. if (num < 0) {
  570. sgDatabase->AbortTransaction();
  571. throw DATABASE_ERROR;
  572. }
  573. }
  574. if (!sgDatabase->CommitTransaction()) {
  575. sgDatabase->AbortTransaction();
  576. throw DATABASE_ERROR;
  577. }
  578. }
  579. /*****************************************************************************/
  580. // EnqueueCSIndex() deletes all entries with the specified CS index from the
  581. // table and enqueues them to be re-groveled, all within a single transaction.
  582. VOID Groveler::EnqueueCSIndex(CSID *csIndex)
  583. {
  584. SGNativeTableEntry tableEntry;
  585. SGNativeQueueEntry oldQueueEntry,
  586. newQueueEntry;
  587. DWORD count;
  588. LONG num;
  589. ASSERT(csIndex != NULL);
  590. ASSERT(HasCSIndex(*csIndex));
  591. newQueueEntry.parentID = 0;
  592. newQueueEntry.reason = 0;
  593. newQueueEntry.readyTime = GetTime() + grovelInterval;
  594. newQueueEntry.retryTime = 0;
  595. newQueueEntry.fileName = NULL;
  596. oldQueueEntry.fileName = NULL;
  597. count = 0;
  598. if (sgDatabase->BeginTransaction() < 0)
  599. throw DATABASE_ERROR;
  600. tableEntry.csIndex = *csIndex;
  601. num = sgDatabase->TableGetFirstByCSIndex(&tableEntry);
  602. while (num > 0) {
  603. ASSERT(num == 1);
  604. count++;
  605. oldQueueEntry.fileID = tableEntry.fileID;
  606. num = sgDatabase->QueueGetFirstByFileID(&oldQueueEntry);
  607. if (num < 0)
  608. break;
  609. ASSERT(num == 1);
  610. if (num == 0) {
  611. newQueueEntry.fileID = tableEntry.fileID;
  612. num = sgDatabase->QueuePut(&newQueueEntry);
  613. if (num < 0)
  614. break;
  615. ASSERT(num == 1);
  616. numFilesEnqueued++;
  617. }
  618. num = sgDatabase->TableGetNext(&tableEntry);
  619. }
  620. if (num < 0) {
  621. sgDatabase->AbortTransaction();
  622. throw DATABASE_ERROR;
  623. }
  624. num = sgDatabase->TableDeleteByCSIndex(csIndex);
  625. if (num < 0) {
  626. sgDatabase->AbortTransaction();
  627. throw DATABASE_ERROR;
  628. }
  629. ASSERT(count == (DWORD)num);
  630. if (!sgDatabase->CommitTransaction()) {
  631. sgDatabase->AbortTransaction();
  632. throw DATABASE_ERROR;
  633. }
  634. }
  635. /*****************************************************************************/
  636. #define TARGET_OPLOCK_BREAK 0
  637. #define TARGET_READ_DONE 1
  638. #define GROVEL_START 2
  639. #define NUM_EVENTS 3
  640. // SigCheckPoint suspends the thread until the target file completes its read
  641. // operation. If the time allotment expires before the operation completes,
  642. // the grovelStart event is set to signal grovel() to awaken, and this method
  643. // won't return until grovel() sets the grovelStart event. If the file's
  644. // oplock breaks before this method returns, the file will be closed.
  645. VOID Groveler::SigCheckPoint(
  646. FileData *target,
  647. BOOL targetRead)
  648. {
  649. HANDLE events[NUM_EVENTS];
  650. DWORD elapsedTime,
  651. timeOut,
  652. eventNum,
  653. eventTime;
  654. BOOL targetOplockBroke = FALSE,
  655. waitingForGrovelStart = FALSE,
  656. success;
  657. ASSERT(target != NULL);
  658. ASSERT(target->handle != NULL);
  659. ASSERT(target->oplock .hEvent != NULL);
  660. ASSERT(target->readSynch.hEvent != NULL);
  661. ASSERT(grovelStartEvent != NULL);
  662. ASSERT(grovelStopEvent != NULL);
  663. events[TARGET_OPLOCK_BREAK] = target->oplock .hEvent;
  664. events[TARGET_READ_DONE] = target->readSynch.hEvent;
  665. events[GROVEL_START] = grovelStartEvent;
  666. while (TRUE) {
  667. if (waitingForGrovelStart)
  668. timeOut = INFINITE;
  669. else if (timeAllotted == INFINITE)
  670. timeOut = targetRead ? INFINITE : 0;
  671. else {
  672. elapsedTime = GetTickCount() - startAllottedTime;
  673. if (timeAllotted > elapsedTime)
  674. timeOut = targetRead ? timeAllotted - elapsedTime : 0;
  675. else {
  676. waitingForGrovelStart = TRUE;
  677. timeOut = INFINITE;
  678. grovelStatus = Grovel_pending;
  679. ASSERT(IsReset(grovelStopEvent));
  680. success = SetEvent(grovelStopEvent);
  681. ASSERT_ERROR(success);
  682. }
  683. }
  684. eventNum = WaitForMultipleObjects(NUM_EVENTS, events, FALSE, timeOut);
  685. eventTime = GetTickCount();
  686. switch (eventNum) {
  687. case WAIT_OBJECT_0 + TARGET_OPLOCK_BREAK:
  688. ASSERT(!targetOplockBroke);
  689. targetOplockBroke = TRUE;
  690. success = ResetEvent(target->oplock.hEvent);
  691. ASSERT_ERROR(success);
  692. if (!targetRead) {
  693. CLEAR_OVERLAPPED(target->oplock);
  694. CloseFile(target);
  695. }
  696. DPRINTF((_T("%s: target file %s oplock broke during hash\n"),
  697. driveLetterName, target->fileName));
  698. break;
  699. case WAIT_OBJECT_0 + TARGET_READ_DONE:
  700. ASSERT(targetRead);
  701. targetRead = FALSE;
  702. success = ResetEvent(target->readSynch.hEvent);
  703. ASSERT_ERROR(success);
  704. target->stopTime = eventTime;
  705. if (targetOplockBroke) {
  706. CLEAR_OVERLAPPED(target->oplock);
  707. CloseFile(target);
  708. }
  709. break;
  710. case WAIT_OBJECT_0 + GROVEL_START:
  711. ASSERT(waitingForGrovelStart);
  712. waitingForGrovelStart = FALSE;
  713. success = ResetEvent(grovelStartEvent);
  714. ASSERT_ERROR(success);
  715. break;
  716. case WAIT_TIMEOUT:
  717. ASSERT(!waitingForGrovelStart);
  718. if (!targetRead) {
  719. if (terminate)
  720. throw TERMINATE;
  721. if (targetOplockBroke)
  722. throw TARGET_ERROR;
  723. return;
  724. }
  725. waitingForGrovelStart = TRUE;
  726. grovelStatus = Grovel_pending;
  727. ASSERT(IsReset(grovelStopEvent));
  728. success = SetEvent(grovelStopEvent);
  729. ASSERT_ERROR(success);
  730. break;
  731. default:
  732. ASSERT_PRINTF(FALSE, (_T("eventNum=%lu\n"), eventNum));
  733. }
  734. }
  735. }
  736. #undef TARGET_OPLOCK_BREAK
  737. #undef TARGET_READ_DONE
  738. #undef GROVEL_START
  739. #undef NUM_EVENTS
  740. /*****************************************************************************/
  741. #define TARGET_OPLOCK_BREAK 0
  742. #define MATCH_OPLOCK_BREAK 1
  743. #define TARGET_READ_DONE 2
  744. #define MATCH_READ_DONE 3
  745. #define GROVEL_START 4
  746. #define NUM_EVENTS 5
  747. // CmpCheckPoint suspends the thread until the target file, the
  748. // match file, or both complete their read operations. If the time
  749. // allotment expires before the operations complete, the grovelStart
  750. // event is set to signal grovel() to awaken, and this method won't
  751. // return until grovel() sets the grovelStart event. If either file's
  752. // oplock breaks before this method returns, the file will be closed.
  753. VOID Groveler::CmpCheckPoint(
  754. FileData *target,
  755. FileData *match,
  756. BOOL targetRead,
  757. BOOL matchRead)
  758. {
  759. HANDLE events[NUM_EVENTS];
  760. DWORD elapsedTime,
  761. timeOut,
  762. eventNum,
  763. eventTime;
  764. BOOL targetOplockBroke = FALSE,
  765. matchOplockBroke = FALSE,
  766. waitingForGrovelStart = FALSE,
  767. success;
  768. ASSERT(target != NULL);
  769. ASSERT(match != NULL);
  770. ASSERT(target->handle != NULL);
  771. ASSERT(match ->handle != NULL);
  772. ASSERT(target->oplock .hEvent != NULL);
  773. ASSERT(match ->oplock .hEvent != NULL);
  774. ASSERT(target->readSynch.hEvent != NULL);
  775. ASSERT(match ->readSynch.hEvent != NULL);
  776. ASSERT(grovelStartEvent != NULL);
  777. ASSERT(grovelStopEvent != NULL);
  778. events[TARGET_OPLOCK_BREAK] = target->oplock .hEvent;
  779. events[MATCH_OPLOCK_BREAK] = match ->oplock .hEvent;
  780. events[TARGET_READ_DONE] = target->readSynch.hEvent;
  781. events[MATCH_READ_DONE] = match ->readSynch.hEvent;
  782. events[GROVEL_START] = grovelStartEvent;
  783. while (TRUE) {
  784. if (waitingForGrovelStart)
  785. timeOut = INFINITE;
  786. else if (timeAllotted == INFINITE)
  787. timeOut = targetRead || matchRead ? INFINITE : 0;
  788. else {
  789. elapsedTime = GetTickCount() - startAllottedTime;
  790. if (timeAllotted > elapsedTime)
  791. timeOut = targetRead || matchRead
  792. ? timeAllotted - elapsedTime : 0;
  793. else {
  794. waitingForGrovelStart = TRUE;
  795. timeOut = INFINITE;
  796. grovelStatus = Grovel_pending;
  797. ASSERT(IsReset(grovelStopEvent));
  798. success = SetEvent(grovelStopEvent);
  799. ASSERT_ERROR(success);
  800. }
  801. }
  802. eventNum = WaitForMultipleObjects(NUM_EVENTS, events, FALSE, timeOut);
  803. eventTime = GetTickCount();
  804. switch (eventNum) {
  805. case WAIT_OBJECT_0 + TARGET_OPLOCK_BREAK:
  806. ASSERT(!targetOplockBroke);
  807. targetOplockBroke = TRUE;
  808. success = ResetEvent(target->oplock.hEvent);
  809. ASSERT_ERROR(success);
  810. if (!targetRead) {
  811. CLEAR_OVERLAPPED(target->oplock);
  812. CloseFile(target);
  813. }
  814. DPRINTF((_T("%s: target file %s oplock broke during compare\n"),
  815. driveLetterName, target->fileName));
  816. break;
  817. case WAIT_OBJECT_0 + MATCH_OPLOCK_BREAK:
  818. ASSERT(!matchOplockBroke);
  819. matchOplockBroke = TRUE;
  820. success = ResetEvent(match->oplock.hEvent);
  821. ASSERT_ERROR(success);
  822. if (!matchRead) {
  823. CLEAR_OVERLAPPED(match->oplock);
  824. CloseFile(match);
  825. }
  826. DPRINTF((_T("%s: match file %s oplock broke during compare\n"),
  827. driveLetterName, match->fileName));
  828. break;
  829. case WAIT_OBJECT_0 + TARGET_READ_DONE:
  830. ASSERT(targetRead);
  831. targetRead = FALSE;
  832. success = ResetEvent(target->readSynch.hEvent);
  833. ASSERT_ERROR(success);
  834. target->stopTime = eventTime;
  835. if (targetOplockBroke) {
  836. CLEAR_OVERLAPPED(target->oplock);
  837. CloseFile(target);
  838. }
  839. break;
  840. case WAIT_OBJECT_0 + MATCH_READ_DONE:
  841. ASSERT(matchRead);
  842. matchRead = FALSE;
  843. success = ResetEvent(match->readSynch.hEvent);
  844. ASSERT_ERROR(success);
  845. match->stopTime = eventTime;
  846. if (matchOplockBroke) {
  847. CLEAR_OVERLAPPED(match->oplock);
  848. CloseFile(match);
  849. }
  850. break;
  851. case WAIT_OBJECT_0 + GROVEL_START:
  852. ASSERT(waitingForGrovelStart);
  853. waitingForGrovelStart = FALSE;
  854. success = ResetEvent(grovelStartEvent);
  855. ASSERT_ERROR(success);
  856. break;
  857. case WAIT_TIMEOUT:
  858. ASSERT(!waitingForGrovelStart);
  859. if (!targetRead && !matchRead) {
  860. if (terminate)
  861. throw TERMINATE;
  862. if (targetOplockBroke)
  863. throw TARGET_ERROR;
  864. if (matchOplockBroke)
  865. throw MATCH_ERROR;
  866. return;
  867. }
  868. waitingForGrovelStart = TRUE;
  869. grovelStatus = Grovel_pending;
  870. ASSERT(IsReset(grovelStopEvent));
  871. success = SetEvent(grovelStopEvent);
  872. ASSERT_ERROR(success);
  873. break;
  874. default:
  875. ASSERT_PRINTF(FALSE, (_T("eventNum=%lu\n"), eventNum));
  876. }
  877. }
  878. }
  879. #undef TARGET_OPLOCK_BREAK
  880. #undef MATCH_OPLOCK_BREAK
  881. #undef TARGET_READ_DONE
  882. #undef MATCH_READ_DONE
  883. #undef GROVEL_START
  884. #undef NUM_EVENTS
  885. /*****************************************************************************/
  886. #define TARGET_OPLOCK_BREAK 0
  887. #define MATCH_OPLOCK_BREAK 1
  888. #define MERGE_DONE 2
  889. #define GROVEL_START 3
  890. #define NUM_EVENTS 4
  891. // MergeCheckPoint suspends the thread until the merge operation is completed.
  892. // If the time allotment expires before the merge is completed, the
  893. // grovelStart event is set to signal grovel() to awaken, and this method
  894. // won't return until grovel() sets the grovelStart event. If either file's
  895. // oplock breaks before the merge is completed, the abortMerge event is set.
  896. BOOL Groveler::MergeCheckPoint(
  897. FileData *target,
  898. FileData *match,
  899. OVERLAPPED *mergeSynch,
  900. HANDLE abortMergeEvent,
  901. BOOL merge)
  902. {
  903. HANDLE events[NUM_EVENTS];
  904. DWORD elapsedTime,
  905. timeOut,
  906. eventNum,
  907. eventTime,
  908. lastError = STATUS_TIMEOUT;
  909. BOOL targetOplockBroke = FALSE,
  910. matchOplockBroke = FALSE,
  911. waitingForGrovelStart = FALSE,
  912. mergeSuccess = FALSE,
  913. success;
  914. ASSERT(target != NULL);
  915. ASSERT(target->handle != NULL);
  916. ASSERT(target->oplock.hEvent != NULL);
  917. ASSERT(match != NULL);
  918. ASSERT(match->handle != NULL);
  919. ASSERT(match->oplock.hEvent != NULL);
  920. ASSERT(mergeSynch != NULL);
  921. ASSERT(mergeSynch->hEvent != NULL);
  922. ASSERT(abortMergeEvent != NULL);
  923. ASSERT(grovelStartEvent != NULL);
  924. ASSERT(grovelStopEvent != NULL);
  925. ASSERT(grovHandle != NULL);
  926. events[TARGET_OPLOCK_BREAK] = target->oplock.hEvent;
  927. events[MATCH_OPLOCK_BREAK] = match ->oplock.hEvent;
  928. events[MERGE_DONE] = mergeSynch-> hEvent;
  929. events[GROVEL_START] = grovelStartEvent;
  930. while (TRUE) {
  931. if (waitingForGrovelStart)
  932. timeOut = INFINITE;
  933. else if (timeAllotted == INFINITE)
  934. timeOut = merge ? INFINITE : 0;
  935. else {
  936. elapsedTime = GetTickCount() - startAllottedTime;
  937. if (timeAllotted > elapsedTime)
  938. timeOut = merge ? timeAllotted - elapsedTime : 0;
  939. else {
  940. waitingForGrovelStart = TRUE;
  941. timeOut = INFINITE;
  942. grovelStatus = Grovel_pending;
  943. ASSERT(IsReset(grovelStopEvent));
  944. success = SetEvent(grovelStopEvent);
  945. ASSERT_ERROR(success);
  946. }
  947. }
  948. eventNum = WaitForMultipleObjects(NUM_EVENTS, events, FALSE, timeOut);
  949. eventTime = GetTickCount();
  950. switch (eventNum) {
  951. case WAIT_OBJECT_0 + TARGET_OPLOCK_BREAK:
  952. ASSERT(!targetOplockBroke);
  953. targetOplockBroke = TRUE;
  954. success = ResetEvent(target->oplock.hEvent);
  955. ASSERT_ERROR(success);
  956. CLEAR_OVERLAPPED(target->oplock);
  957. if (merge) {
  958. success = SetEvent(abortMergeEvent);
  959. ASSERT_ERROR(success);
  960. }
  961. DPRINTF((_T("%s: target file %s oplock broke during merge\n"),
  962. driveLetterName, target->fileName));
  963. break;
  964. case WAIT_OBJECT_0 + MATCH_OPLOCK_BREAK:
  965. ASSERT(!matchOplockBroke);
  966. matchOplockBroke = TRUE;
  967. success = ResetEvent(match->oplock.hEvent);
  968. ASSERT_ERROR(success);
  969. CLEAR_OVERLAPPED(match->oplock);
  970. if (merge) {
  971. success = SetEvent(abortMergeEvent);
  972. ASSERT_ERROR(success);
  973. }
  974. DPRINTF((_T("%s: match file %s oplock broke during merge\n"),
  975. driveLetterName, match->fileName));
  976. break;
  977. case WAIT_OBJECT_0 + MERGE_DONE:
  978. ASSERT(merge);
  979. merge = FALSE;
  980. success = ResetEvent(mergeSynch->hEvent);
  981. ASSERT_ERROR(success);
  982. target->stopTime = eventTime;
  983. mergeSuccess = GetOverlappedResult(
  984. grovHandle,
  985. mergeSynch,
  986. &lastError,
  987. FALSE);
  988. if (!mergeSuccess)
  989. lastError = GetLastError();
  990. else if (lastError != ERROR_SUCCESS)
  991. mergeSuccess = FALSE;
  992. else {
  993. GetCSIndex(target->handle, &target->entry.csIndex);
  994. if (!HasCSIndex(match->entry.csIndex))
  995. GetCSIndex(match->handle, &match->entry.csIndex);
  996. }
  997. CloseFile(target);
  998. CloseFile(match);
  999. break;
  1000. case WAIT_OBJECT_0 + GROVEL_START:
  1001. ASSERT(waitingForGrovelStart);
  1002. waitingForGrovelStart = FALSE;
  1003. success = ResetEvent(grovelStartEvent);
  1004. ASSERT_ERROR(success);
  1005. break;
  1006. case WAIT_TIMEOUT:
  1007. ASSERT(!waitingForGrovelStart);
  1008. if (!merge) {
  1009. success = ResetEvent(abortMergeEvent);
  1010. ASSERT_ERROR(success);
  1011. if (terminate)
  1012. throw TERMINATE;
  1013. if (!mergeSuccess)
  1014. SetLastError(lastError);
  1015. return mergeSuccess;
  1016. }
  1017. waitingForGrovelStart = TRUE;
  1018. grovelStatus = Grovel_pending;
  1019. ASSERT(IsReset(grovelStopEvent));
  1020. success = SetEvent(grovelStopEvent);
  1021. ASSERT_ERROR(success);
  1022. break;
  1023. default:
  1024. ASSERT_PRINTF(FALSE, (_T("eventNum=%lu\n"), eventNum));
  1025. }
  1026. }
  1027. }
  1028. #undef TARGET_OPLOCK_BREAK
  1029. #undef MATCH_OPLOCK_BREAK
  1030. #undef GROVEL_START
  1031. #undef MERGE_DONE
  1032. #undef NUM_EVENTS
  1033. /*****************************************************************************/
  1034. // The following seven methods (GetTarget(), CalculateSignature(),
  1035. // GetMatchList(), GetCSFile(), GetMatch(), Compare(), and Merge())
  1036. // implement the phases of the groveling process.
  1037. // Structures used by the methods.
  1038. struct MatchListEntry {
  1039. DWORDLONG fileID,
  1040. createTime,
  1041. writeTime;
  1042. };
  1043. struct CSIndexEntry {
  1044. CSID csIndex;
  1045. TCHAR name[1];
  1046. };
  1047. /*****************************************************************************/
  1048. // GetTarget() is the first phase of groveling a file. It dequeues
  1049. // a file to be groveled (the "target" file), opens it, checks that
  1050. // it meets all criteria, then passes it on to the next phases.
  1051. BOOL Groveler::GetTarget(
  1052. FileData *target,
  1053. DWORD *queueIndex)
  1054. {
  1055. SGNativeTableEntry tableEntry;
  1056. SGNativeQueueEntry queueEntry,
  1057. otherQueueEntry;
  1058. TFileName targetName,
  1059. parentName;
  1060. BY_HANDLE_FILE_INFORMATION fileInfo;
  1061. DWORD lastError;
  1062. DWORDLONG currentTime,
  1063. readyTime;
  1064. #if DBG
  1065. DWORD earliestTime;
  1066. #endif
  1067. ULARGE_INTEGER word;
  1068. LONG num;
  1069. BOOL byName,
  1070. success;
  1071. TPRINTF((_T("GETTarget: entered\n")));
  1072. ASSERT(target != NULL);
  1073. ASSERT(target->handle == NULL);
  1074. ASSERT(target->entry.fileID == 0);
  1075. ASSERT(target->fileName[0] == _T('\0'));
  1076. ASSERT(!HasCSIndex(target->entry.csIndex));
  1077. ASSERT(queueIndex != NULL);
  1078. ASSERT(sgDatabase != NULL);
  1079. // Dequeue a file to be groveled. If the queue is empty or if no
  1080. // entry's ready time has been reached, return Grovel_ok to grovel().
  1081. queueEntry.fileName = target->fileName;
  1082. num = sgDatabase->QueueGetFirst(&queueEntry);
  1083. if (num < 0)
  1084. throw DATABASE_ERROR;
  1085. if (num == 0) {
  1086. DPRINTF((_T("%s: queue is empty\n"), driveLetterName));
  1087. return FALSE;
  1088. }
  1089. ASSERT(num == 1);
  1090. currentTime = GetTime();
  1091. if (queueEntry.readyTime > currentTime) {
  1092. #if DBG
  1093. earliestTime = (DWORD)((queueEntry.readyTime - currentTime) / 10000);
  1094. DPRINTF((_T("%s: earliest queue entry ready to be groveled in %lu.%03lu sec\n"),
  1095. driveLetterName, earliestTime / 1000, earliestTime % 1000));
  1096. #endif
  1097. return FALSE;
  1098. }
  1099. *queueIndex = queueEntry.order;
  1100. target->entry.fileID = queueEntry.fileID;
  1101. target->parentID = queueEntry.parentID;
  1102. target->retryTime = queueEntry.retryTime;
  1103. // Open the file by ID or name, and check by name
  1104. // that the file and its parent directory are allowed.
  1105. byName = target->entry.fileID == 0;
  1106. if (byName) {
  1107. ASSERT(target->parentID != 0);
  1108. ASSERT(target->fileName[0] != _T('\0'));
  1109. #ifdef DEBUG_USN_REASON
  1110. DPRINTF((_T("--> 0x%08lx 0x%016I64x:\"%s\"\n"),
  1111. queueEntry.reason, target->parentID, target->fileName));
  1112. #endif
  1113. if (!GetFileName(volumeHandle, target->parentID, &parentName)) {
  1114. DPRINTF((_T("%s: can't get name for directory 0x%016I64x\n"),
  1115. driveLetterName, target->parentID));
  1116. throw TARGET_INVALID;
  1117. }
  1118. targetName.assign(parentName.name);
  1119. targetName.append(_T("\\"));
  1120. targetName.append(target->fileName);
  1121. if (!IsAllowedName(targetName.name)) {
  1122. DPRINTF((_T("%s: target file \"%s\" is disallowed\n"),
  1123. driveLetterName, targetName.name));
  1124. throw TARGET_INVALID;
  1125. }
  1126. targetName.assign(driveName);
  1127. targetName.append(parentName.name);
  1128. targetName.append(_T("\\"));
  1129. targetName.append(target->fileName);
  1130. if (!OpenFileByName(target, FALSE, targetName.name)) {
  1131. lastError = GetLastError();
  1132. if (lastError == ERROR_FILE_NOT_FOUND
  1133. || lastError == ERROR_PATH_NOT_FOUND) {
  1134. DPRINTF((_T("%s: target file \"%s\" doesn\'t exist\n"),
  1135. driveLetterName, targetName.name));
  1136. throw TARGET_INVALID;
  1137. }
  1138. DPRINTF((_T("%s: can't open target file \"%s\": %lu\n"),
  1139. driveLetterName, targetName.name, lastError));
  1140. throw TARGET_ERROR;
  1141. }
  1142. // Set an oplock on the target file.
  1143. if (!SetOplock(target)) {
  1144. DPRINTF((_T("%s: can't set oplock on target file \"%s\": %lu\n"),
  1145. driveLetterName, targetName.name, GetLastError()));
  1146. throw TARGET_ERROR;
  1147. }
  1148. } else {
  1149. ASSERT(target->parentID == 0);
  1150. ASSERT(target->fileName[0] == _T('\0'));
  1151. target->parentID = 0;
  1152. #ifdef DEBUG_USN_REASON
  1153. DPRINTF((_T("--> 0x%08lx 0x%016I64x 0x%016I64x\n"),
  1154. queueEntry.reason, target->entry.fileID, target->parentID));
  1155. #endif
  1156. TPRINTF((_T("GETTarget: Opening %s:0x%016I64x by ID\n"),
  1157. driveName,target->entry.fileID));
  1158. if (!OpenFileByID(target, FALSE)) {
  1159. lastError = GetLastError();
  1160. if (lastError == ERROR_FILE_NOT_FOUND
  1161. || lastError == ERROR_PATH_NOT_FOUND
  1162. || lastError == ERROR_INVALID_PARAMETER) {
  1163. DPRINTF((_T("%s: target file 0x%016I64x doesn\'t exist: %lu\n"),
  1164. driveLetterName, target->entry.fileID, lastError));
  1165. throw TARGET_INVALID;
  1166. }
  1167. DPRINTF((_T("%s: can't open target file 0x%016I64x: %lu\n"),
  1168. driveLetterName, target->entry.fileID, lastError));
  1169. throw TARGET_ERROR;
  1170. }
  1171. // Set an oplock on the target file.
  1172. TPRINTF((_T("GETTarget: Successfully opened %s:0x%016I64x by ID\n"),
  1173. driveName,target->entry.fileID));
  1174. if (!SetOplock(target)) {
  1175. DPRINTF((_T("%s: can't set oplock on target file %s: %lu\n"),
  1176. driveLetterName, target->fileName, GetLastError()));
  1177. throw TARGET_ERROR;
  1178. }
  1179. if (!GetFileName(target->handle, &targetName)) {
  1180. DPRINTF((_T("%s: can't get name for target file %s\n"),
  1181. driveLetterName, target->fileName));
  1182. throw TARGET_ERROR;
  1183. }
  1184. if (!IsAllowedName(targetName.name)) {
  1185. DPRINTF((_T("%s: target file \"%s\" is disallowed\n"),
  1186. driveLetterName, targetName.name));
  1187. throw TARGET_INVALID;
  1188. }
  1189. }
  1190. // Get the information on the target file.
  1191. if (!GetFileInformationByHandle(target->handle, &fileInfo)) {
  1192. #if DBG
  1193. if (byName) {
  1194. DPRINTF((_T("%s: can't get information on target file \"%s\": %lu\n"),
  1195. driveLetterName, targetName.name, GetLastError()));
  1196. } else {
  1197. DPRINTF((_T("%s: can't get information on target file %s: %lu\n"),
  1198. driveLetterName, target->fileName, GetLastError()));
  1199. }
  1200. #endif
  1201. throw TARGET_ERROR;
  1202. }
  1203. word.HighPart = fileInfo.nFileIndexHigh;
  1204. word.LowPart = fileInfo.nFileIndexLow;
  1205. if (byName)
  1206. target->entry.fileID = word.QuadPart;
  1207. else {
  1208. ASSERT(target->entry.fileID == word.QuadPart);
  1209. }
  1210. target->parentID = 0; // We don't need the parent ID any more.
  1211. // If the target file was opened by name, check
  1212. // if it currently has an entry in the queue by ID.
  1213. if (byName) {
  1214. otherQueueEntry.fileID = target->entry.fileID;
  1215. otherQueueEntry.fileName = NULL;
  1216. num = sgDatabase->QueueGetFirstByFileID(&otherQueueEntry);
  1217. if (num < 0)
  1218. throw DATABASE_ERROR;
  1219. if (num > 0) {
  1220. ASSERT(num == 1);
  1221. DPRINTF((_T("%s: target file \"%s\" is already in queue as 0x%016I64x\n"),
  1222. driveLetterName, targetName.name, target->entry.fileID));
  1223. target->entry.fileID = 0; // Prevent the table entry from being deleted.
  1224. throw TARGET_INVALID;
  1225. }
  1226. }
  1227. // Fill in the target file's remaining information values.
  1228. word.HighPart = fileInfo.nFileSizeHigh;
  1229. word.LowPart = fileInfo.nFileSizeLow;
  1230. target->entry.fileSize = word.QuadPart;
  1231. target->entry.attributes = fileInfo.dwFileAttributes & FILE_ATTRIBUTE_ENCRYPTED;
  1232. word.HighPart = fileInfo.ftCreationTime.dwHighDateTime;
  1233. word.LowPart = fileInfo.ftCreationTime.dwLowDateTime;
  1234. target->entry.createTime = word.QuadPart;
  1235. word.HighPart = fileInfo.ftLastWriteTime.dwHighDateTime;
  1236. word.LowPart = fileInfo.ftLastWriteTime.dwLowDateTime;
  1237. target->entry.writeTime = word.QuadPart;
  1238. // If the target file is a reparse point, check if it
  1239. // is a SIS reparse point. If it is, get the CS index.
  1240. if ((fileInfo.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) == 0)
  1241. target->entry.csIndex = nullCSIndex;
  1242. else if (!GetCSIndex(target->handle, &target->entry.csIndex)) {
  1243. DPRINTF((_T("%s: target file %s is a non-SIS reparse point\n"),
  1244. driveLetterName, target->fileName));
  1245. throw TARGET_INVALID;
  1246. }
  1247. // Check if the target file is too small or has any disallowed attributes.
  1248. if ((fileInfo.dwFileAttributes & disallowedAttributes) != 0
  1249. || fileInfo.nNumberOfLinks != 1
  1250. || target->entry.fileSize < minFileSize) {
  1251. DPRINTF((_T("%s: target file \"%s\" is disallowed\n"),
  1252. driveLetterName, target->fileName));
  1253. throw TARGET_INVALID;
  1254. }
  1255. // If a table entry exists for the target file, check if it is
  1256. // consistent with the information we have on the file. If it is, and
  1257. // the file was opened by name, or if the queue entry was the result
  1258. // of a SIS merge, close the file and go on to grovel the next target.
  1259. tableEntry.fileID = target->entry.fileID;
  1260. num = sgDatabase->TableGetFirstByFileID(&tableEntry);
  1261. if (num < 0)
  1262. throw DATABASE_ERROR;
  1263. if (num > 0) {
  1264. ASSERT(num == 1);
  1265. ASSERT(tableEntry.fileID == target->entry.fileID);
  1266. if (target->entry.fileSize == tableEntry.fileSize
  1267. && target->entry.attributes == tableEntry.attributes
  1268. && SameCSIndex(target->entry.csIndex, tableEntry.csIndex)
  1269. && target->entry.createTime == tableEntry.createTime
  1270. && target->entry.writeTime == tableEntry.writeTime) {
  1271. if (byName) {
  1272. DPRINTF((_T("%s: target file \"%s\" has already been groveled\n"),
  1273. driveLetterName, targetName.name));
  1274. target->entry.fileID = 0; // Prevent the table entry from being deleted.
  1275. throw TARGET_INVALID;
  1276. }
  1277. if (queueEntry.reason == USN_REASON_BASIC_INFO_CHANGE) {
  1278. DPRINTF((_T("%s: queue entry for file %s is the result of a SIS merge\n"),
  1279. driveLetterName, target->fileName));
  1280. target->entry.fileID = 0; // Prevent the table entry from being deleted.
  1281. throw TARGET_INVALID;
  1282. }
  1283. }
  1284. }
  1285. // Check if the time since the target file was last modified is too short.
  1286. // If it is, close the file and go on to grovel the next target file.
  1287. readyTime = (target->entry.createTime > target->entry.writeTime
  1288. ? target->entry.createTime : target->entry.writeTime) + minFileAge;
  1289. currentTime = GetTime();
  1290. if (currentTime < readyTime)
  1291. throw TARGET_ERROR;
  1292. // Check if the target file is mapped by another user.
  1293. if (IsFileMapped(target)) {
  1294. DPRINTF((_T("%s: target file %s is already mapped\n"),
  1295. driveLetterName, target->fileName));
  1296. throw TARGET_ERROR;
  1297. }
  1298. TPRINTF((_T("GETTarget: returning\n")));
  1299. return TRUE;
  1300. }
  1301. /*****************************************************************************/
  1302. // CalculateSignature() calculates the target file's signature. It reads two
  1303. // pages, 1/3 and 2/3 through the file, and calculates the signature on each
  1304. // page.
  1305. VOID Groveler::CalculateSignature(FileData *target)
  1306. {
  1307. DWORD lastPageSize,
  1308. bytesToRead,
  1309. prevBytesToRead,
  1310. bytesToRequest,
  1311. prevBytesToRequest = 0,
  1312. bytesRead,
  1313. toggle,
  1314. lastError;
  1315. DWORDLONG numPages,
  1316. pageNum,
  1317. prevPageNum,
  1318. lastPageNum,
  1319. firstPageToRead,
  1320. lastPageToRead;
  1321. ULARGE_INTEGER offset;
  1322. BOOL targetReadDone = FALSE,
  1323. success;
  1324. INT i,
  1325. nPagesToRead;
  1326. ASSERT(target != NULL);
  1327. ASSERT(target->entry.fileID != 0);
  1328. ASSERT(target->handle != NULL);
  1329. target->entry.signature = 0;
  1330. if (0 == target->entry.fileSize)
  1331. return;
  1332. numPages = (target->entry.fileSize - 1) / SIG_PAGE_SIZE + 1;
  1333. lastPageSize = (DWORD)((target->entry.fileSize - 1) % SIG_PAGE_SIZE) + 1;
  1334. lastPageNum = numPages - 1;
  1335. ASSERT(numPages > 0);
  1336. firstPageToRead = (numPages + 2) / 3 - 1;
  1337. lastPageToRead = lastPageNum - firstPageToRead;
  1338. if (lastPageToRead > firstPageToRead)
  1339. nPagesToRead = 2;
  1340. else
  1341. nPagesToRead = 1;
  1342. toggle = 0;
  1343. pageNum = firstPageToRead;
  1344. // We'll read at most two pages, but make at most three passes through the loop
  1345. // since we're doing asynchronous reads.
  1346. for (i = 0; i <= nPagesToRead; ++i) {
  1347. // Unless this is the first pass through the loop,
  1348. // wait for the previous read of the target file to complete.
  1349. if (i > 0) {
  1350. SigCheckPoint(target, !targetReadDone);
  1351. success = GetOverlappedResult(
  1352. target->handle,
  1353. &target->readSynch,
  1354. &bytesRead,
  1355. FALSE);
  1356. if (!success) {
  1357. DPRINTF((_T("%s: error getting target file %s read results: %lu\n"),
  1358. driveLetterName, target->fileName, GetLastError()));
  1359. throw TARGET_ERROR;
  1360. }
  1361. if (bytesRead != prevBytesToRequest &&
  1362. bytesRead != prevBytesToRead) {
  1363. DPRINTF((_T("%s: sig read only %lu of %lu bytes from target file %s\n"),
  1364. driveLetterName, bytesRead, prevBytesToRequest, target->fileName));
  1365. throw TARGET_ERROR;
  1366. }
  1367. if (bytesRead >= sigReportThreshold) {
  1368. hashReadCount++;
  1369. hashReadTime += target->stopTime - target->startTime;
  1370. }
  1371. }
  1372. // Unless we've read all of the pages, begin reading the next page.
  1373. if (i < nPagesToRead) {
  1374. offset.QuadPart = pageNum * SIG_PAGE_SIZE;
  1375. target->readSynch.Offset = offset.LowPart;
  1376. target->readSynch.OffsetHigh = offset.HighPart;
  1377. bytesToRead = pageNum == lastPageNum ? lastPageSize : SIG_PAGE_SIZE;
  1378. bytesToRequest = bytesToRead + sectorSize - 1;
  1379. bytesToRequest -= bytesToRequest % sectorSize;
  1380. target->startTime = GetTickCount();
  1381. targetReadDone = ReadFile(target->handle, target->buffer[toggle],
  1382. bytesToRequest, NULL, &target->readSynch);
  1383. target->stopTime = GetTickCount();
  1384. lastError = GetLastError();
  1385. if (targetReadDone) {
  1386. success = ResetEvent(target->readSynch.hEvent);
  1387. ASSERT_ERROR(success);
  1388. } else if (lastError != ERROR_IO_PENDING) {
  1389. DPRINTF((_T("%s: error reading target file %s: %lu\n"),
  1390. driveLetterName, target->fileName, lastError));
  1391. throw TARGET_ERROR;
  1392. }
  1393. }
  1394. // Unless this is the first pass through the loop,
  1395. // calculate the signature of the target file page just read.
  1396. if (i > 0)
  1397. target->entry.signature = Checksum((VOID *)target->buffer[1-toggle],
  1398. prevBytesToRead, prevPageNum * SIG_PAGE_SIZE, target->entry.signature);
  1399. prevPageNum = pageNum;
  1400. prevBytesToRead = bytesToRead;
  1401. prevBytesToRequest = bytesToRequest;
  1402. toggle = 1-toggle;
  1403. pageNum = lastPageToRead;
  1404. }
  1405. }
  1406. /*****************************************************************************/
  1407. // GetMatchList() looks for file entries in the database ("match" files)
  1408. // with the same size, signature, and attributes as the target file.
  1409. VOID Groveler::GetMatchList(
  1410. FileData *target,
  1411. FIFO *matchList,
  1412. Table *csIndexTable)
  1413. {
  1414. SGNativeTableEntry tableEntry;
  1415. MatchListEntry *matchListEntry;
  1416. CSIndexEntry *csIndexEntry;
  1417. LONG num;
  1418. BOOL success;
  1419. ASSERT(target != NULL);
  1420. ASSERT(target->entry.fileID != 0);
  1421. ASSERT(target->entry.fileSize > 0);
  1422. ASSERT(target->handle != NULL);
  1423. ASSERT(matchList != NULL);
  1424. ASSERT(matchList->Number() == 0);
  1425. ASSERT(csIndexTable != NULL);
  1426. ASSERT(csIndexTable->Number() == 0);
  1427. ASSERT(sgDatabase != NULL);
  1428. tableEntry.fileSize = target->entry.fileSize;
  1429. tableEntry.signature = target->entry.signature;
  1430. tableEntry.attributes = target->entry.attributes;
  1431. #ifdef DEBUG_GET_BY_ATTR
  1432. DPRINTF((_T("--> {%I64u, 0x%016I64x, 0x%lx}\n"),
  1433. tableEntry.fileSize, tableEntry.signature, tableEntry.attributes));
  1434. #endif
  1435. num = sgDatabase->TableGetFirstByAttr(&tableEntry);
  1436. while (num > 0) {
  1437. ASSERT(num == 1);
  1438. ASSERT(tableEntry.fileID != 0);
  1439. ASSERT(tableEntry.fileSize == target->entry.fileSize);
  1440. ASSERT(tableEntry.signature == target->entry.signature);
  1441. ASSERT(tableEntry.attributes == target->entry.attributes);
  1442. if (!HasCSIndex(tableEntry.csIndex)) {
  1443. matchListEntry = new MatchListEntry;
  1444. ASSERT(matchListEntry != NULL);
  1445. matchListEntry->fileID = tableEntry.fileID;
  1446. matchListEntry->createTime = tableEntry.createTime;
  1447. matchListEntry->writeTime = tableEntry.writeTime;
  1448. matchList->Put((VOID *)matchListEntry);
  1449. #ifdef DEBUG_GET_BY_ATTR
  1450. DPRINTF((_T(" 0x%016I64x\n"), tableEntry.fileID));
  1451. #endif
  1452. } else {
  1453. csIndexEntry = (CSIndexEntry *)csIndexTable->Get
  1454. ((const VOID *)&tableEntry.csIndex, sizeof(CSID));
  1455. if (csIndexEntry == NULL) {
  1456. TCHAR *csName = GetCSName(&tableEntry.csIndex);
  1457. if (csName != NULL) {
  1458. //
  1459. // Calculate how big the name buffer is and how big the
  1460. // overall structure is. Note that the CSIndexEntry has
  1461. // space in it for one character, which we account for.
  1462. //
  1463. int nameBufLen = ((wcslen(csName) + 1) * sizeof(TCHAR)); //account for NULL
  1464. int bufLen = ((sizeof(CSIndexEntry)-sizeof(TCHAR)) + nameBufLen);
  1465. csIndexEntry = (CSIndexEntry *)(new BYTE[bufLen]);
  1466. ASSERT(csIndexEntry != NULL);
  1467. csIndexEntry->csIndex = tableEntry.csIndex;
  1468. (void)StringCbCopy( csIndexEntry->name,
  1469. nameBufLen,
  1470. csName);
  1471. FreeCSName(csName);
  1472. csName = NULL;
  1473. }
  1474. success = csIndexTable->Put((VOID *)csIndexEntry, sizeof(CSID));
  1475. ASSERT_ERROR(success);
  1476. }
  1477. #ifdef DEBUG_GET_BY_ATTR
  1478. DPRINTF((_T(" 0x%016I64x %s\n"),
  1479. match->entry.fileID, csIndexEntry->name));
  1480. #endif
  1481. }
  1482. num = sgDatabase->TableGetNext(&tableEntry);
  1483. }
  1484. if (num < 0)
  1485. throw DATABASE_ERROR;
  1486. }
  1487. /*****************************************************************************/
  1488. // GetCSFile() pops the first entry from the CS index table and opens it.
  1489. BOOL Groveler::GetCSFile(
  1490. FileData *target,
  1491. FileData *match,
  1492. Table *csIndexTable)
  1493. {
  1494. CSIndexEntry *csIndexEntry;
  1495. TFileName csFileName;
  1496. DWORD lastError;
  1497. BY_HANDLE_FILE_INFORMATION fileInfo;
  1498. ULARGE_INTEGER fileSize;
  1499. LONG num;
  1500. ASSERT(target != NULL);
  1501. ASSERT(target->entry.fileID != 0);
  1502. ASSERT(target->entry.fileSize > 0);
  1503. ASSERT(target->handle != NULL);
  1504. ASSERT(match != NULL);
  1505. ASSERT(match->entry.fileID == 0);
  1506. ASSERT(match->entry.fileSize == 0);
  1507. ASSERT(match->entry.signature == 0);
  1508. ASSERT(match->entry.attributes == 0);
  1509. ASSERT(!HasCSIndex(match->entry.csIndex));
  1510. ASSERT(match->entry.createTime == 0);
  1511. ASSERT(match->entry.writeTime == 0);
  1512. ASSERT(match->handle == NULL);
  1513. ASSERT(match->parentID == 0);
  1514. ASSERT(match->retryTime == 0);
  1515. ASSERT(match->fileName[0] == _T('\0'));
  1516. ASSERT(csIndexTable != NULL);
  1517. ASSERT(sgDatabase != NULL);
  1518. // Pop the first entry from the CS index table. If the entry's CS
  1519. // index is the same as the target file's, skip to the next entry.
  1520. do {
  1521. csIndexEntry = (CSIndexEntry *)csIndexTable->GetFirst();
  1522. if (csIndexEntry == NULL) {
  1523. match->entry.csIndex = nullCSIndex;
  1524. match->fileName[0] = _T('\0');
  1525. return FALSE;
  1526. }
  1527. ASSERT(HasCSIndex(csIndexEntry->csIndex));
  1528. match->entry.csIndex = csIndexEntry->csIndex;
  1529. _tcscpy(match->fileName, csIndexEntry->name);
  1530. delete csIndexEntry;
  1531. csIndexEntry = NULL;
  1532. } while (SameCSIndex(target->entry.csIndex, match->entry.csIndex));
  1533. match->entry.fileSize = target->entry.fileSize;
  1534. match->entry.signature = target->entry.signature;
  1535. match->entry.attributes = target->entry.attributes;
  1536. csFileName.assign(driveName);
  1537. csFileName.append(CS_DIR_PATH);
  1538. csFileName.append(_T("\\"));
  1539. csFileName.append(match->fileName);
  1540. csFileName.append(_T(".sis"));
  1541. // Open the CS file. If the file doesn't exist, remove all entries
  1542. // from the table that point to this file. If the file can't be
  1543. // opened for any other reason, mark that the target file may
  1544. // need to be groveled again, then go on to the next match file.
  1545. #ifdef DEBUG_GET_BY_ATTR
  1546. DPRINTF((_T("--> %s\n"), match->fileName));
  1547. #endif
  1548. if (!OpenFileByName(match, FALSE, csFileName.name)) {
  1549. lastError = GetLastError();
  1550. if (lastError == ERROR_FILE_NOT_FOUND
  1551. || lastError == ERROR_PATH_NOT_FOUND) {
  1552. DPRINTF((_T("%s: CS file %s doesn't exist\n"),
  1553. driveLetterName, match->fileName));
  1554. throw MATCH_INVALID;
  1555. }
  1556. DPRINTF((_T("%s: can't open CS file %s: %lu\n"),
  1557. driveLetterName, match->fileName, lastError));
  1558. throw MATCH_ERROR;
  1559. }
  1560. // Get the information on the CS file. If this fails,
  1561. // close the file, mark that the target file may need to
  1562. // be groveled again, then go on to the next match file.
  1563. if (!GetFileInformationByHandle(match->handle, &fileInfo)) {
  1564. DPRINTF((_T("%s: can't get information on CS file %s: %lu\n"),
  1565. driveLetterName, match->fileName, GetLastError()));
  1566. throw MATCH_ERROR;
  1567. }
  1568. // If the CS file's information doesn't match its expected values, close the
  1569. // CS file, delete the match file entry from the table, and go on to the
  1570. // next match file. Otherwise, go on to compare the target and CS files.
  1571. fileSize.HighPart = fileInfo.nFileSizeHigh;
  1572. fileSize.LowPart = fileInfo.nFileSizeLow;
  1573. if (match->entry.fileSize != fileSize.QuadPart) {
  1574. DPRINTF((_T("%s: CS file %s doesn't have expected information\n"),
  1575. driveLetterName, match->fileName));
  1576. throw MATCH_STALE;
  1577. }
  1578. return TRUE;
  1579. }
  1580. /*****************************************************************************/
  1581. // GetMatch() pops the first entry from the match list and opens it.
  1582. BOOL Groveler::GetMatch(
  1583. FileData *target,
  1584. FileData *match,
  1585. FIFO *matchList)
  1586. {
  1587. SGNativeQueueEntry queueEntry;
  1588. MatchListEntry *matchListEntry;
  1589. DWORD attributes,
  1590. lastError;
  1591. BY_HANDLE_FILE_INFORMATION fileInfo;
  1592. ULARGE_INTEGER fileID,
  1593. fileSize,
  1594. createTime,
  1595. writeTime;
  1596. LONG num;
  1597. ASSERT(target != NULL);
  1598. ASSERT(target->entry.fileID != 0);
  1599. ASSERT(target->entry.fileSize > 0);
  1600. ASSERT(target->handle != NULL);
  1601. ASSERT(match != NULL);
  1602. ASSERT(match->entry.fileID == 0);
  1603. ASSERT(match->entry.fileSize == 0);
  1604. ASSERT(match->entry.signature == 0);
  1605. ASSERT(match->entry.attributes == 0);
  1606. ASSERT(!HasCSIndex(match->entry.csIndex));
  1607. ASSERT(match->entry.createTime == 0);
  1608. ASSERT(match->entry.writeTime == 0);
  1609. ASSERT(match->handle == NULL);
  1610. ASSERT(match->parentID == 0);
  1611. ASSERT(match->retryTime == 0);
  1612. ASSERT(match->fileName[0] == _T('\0'));
  1613. ASSERT(matchList != NULL);
  1614. ASSERT(sgDatabase != NULL);
  1615. // Pop the first entry from the match list. If the entry's file ID is
  1616. // the same as the target file's, or if the entry is on the queue after
  1617. // having been enqueued by extract_log(), skip to the next entry.
  1618. while (TRUE) {
  1619. matchListEntry = (MatchListEntry *)matchList->Get();
  1620. if (matchListEntry == NULL) {
  1621. match->entry.fileID = 0;
  1622. match->entry.createTime = 0;
  1623. match->entry.writeTime = 0;
  1624. return FALSE;
  1625. }
  1626. match->entry.fileID = matchListEntry->fileID;
  1627. match->entry.createTime = matchListEntry->createTime;
  1628. match->entry.writeTime = matchListEntry->writeTime;
  1629. delete matchListEntry;
  1630. matchListEntry = NULL;
  1631. ASSERT(match->entry.fileID != 0);
  1632. if (target->entry.fileID == match->entry.fileID)
  1633. continue;
  1634. queueEntry.fileID = match->entry.fileID;
  1635. queueEntry.fileName = NULL;
  1636. num = sgDatabase->QueueGetFirstByFileID(&queueEntry);
  1637. if (num < 0)
  1638. throw DATABASE_ERROR;
  1639. if (num > 0) {
  1640. ASSERT(num == 1);
  1641. if (queueEntry.reason != 0) {
  1642. DPRINTF((_T("%s: match file 0x%016I64x is in the queue from USN\n"),
  1643. driveLetterName, match->entry.fileID));
  1644. continue;
  1645. }
  1646. }
  1647. break;
  1648. }
  1649. match->entry.fileSize = target->entry.fileSize;
  1650. match->entry.signature = target->entry.signature;
  1651. match->entry.attributes = target->entry.attributes;
  1652. // Open the match file. If it doesn't exist, remove its entry from the table.
  1653. // If the file can't be opened for any other reason, mark that the target
  1654. // file may need to be groveled again, then go on to the next match file.
  1655. #ifdef DEBUG_GET_BY_ATTR
  1656. DPRINTF((_T("--> 0x%016I64x\n"), match->entry.fileID));
  1657. #endif
  1658. if (!OpenFileByID(match, FALSE)) {
  1659. lastError = GetLastError();
  1660. if (lastError == ERROR_FILE_NOT_FOUND
  1661. || lastError == ERROR_PATH_NOT_FOUND
  1662. || lastError == ERROR_INVALID_PARAMETER) {
  1663. DPRINTF((_T("%s: match file 0x%016I64x doesn\'t exist: %lu\n"),
  1664. driveLetterName, match->entry.fileID, lastError));
  1665. throw MATCH_INVALID;
  1666. }
  1667. DPRINTF((_T("%s: can't open match file 0x%016I64x: %lu\n"),
  1668. driveLetterName, match->entry.fileID, lastError));
  1669. throw MATCH_ERROR;
  1670. }
  1671. // Set an oplock on the match file.
  1672. if (!SetOplock(match)) {
  1673. DPRINTF((_T("%s: can't set oplock on match file %s: %lu\n"),
  1674. driveLetterName, match->fileName, GetLastError()));
  1675. throw MATCH_ERROR;
  1676. }
  1677. // Get the information on the match file. If this fails,
  1678. // close the file, mark that the target file may need to
  1679. // be groveled again, then go on to the next match file.
  1680. if (!GetFileInformationByHandle(match->handle, &fileInfo)) {
  1681. DPRINTF((_T("%s: can't get information on match file %s: %lu\n"),
  1682. driveLetterName, match->fileName, GetLastError()));
  1683. throw MATCH_ERROR;
  1684. }
  1685. fileID.HighPart = fileInfo.nFileIndexHigh;
  1686. fileID.LowPart = fileInfo.nFileIndexLow;
  1687. ASSERT(match->entry.fileID == fileID.QuadPart);
  1688. fileSize.HighPart = fileInfo.nFileSizeHigh;
  1689. fileSize.LowPart = fileInfo.nFileSizeLow;
  1690. attributes = fileInfo.dwFileAttributes & FILE_ATTRIBUTE_ENCRYPTED;
  1691. createTime.HighPart = fileInfo.ftCreationTime.dwHighDateTime;
  1692. createTime.LowPart = fileInfo.ftCreationTime.dwLowDateTime;
  1693. writeTime.HighPart = fileInfo.ftLastWriteTime.dwHighDateTime;
  1694. writeTime.LowPart = fileInfo.ftLastWriteTime.dwLowDateTime;
  1695. // If the match file's information isn't consistent with its table entry, close
  1696. // the file, enqueue it to be re-groveled, and go on to the next match file.
  1697. if (match->entry.fileSize != fileSize .QuadPart
  1698. || match->entry.attributes != attributes
  1699. || match->entry.createTime != createTime.QuadPart
  1700. || match->entry.writeTime != writeTime .QuadPart) {
  1701. DPRINTF((_T("%s: match file %s doesn't match its information\n"),
  1702. driveLetterName, match->fileName));
  1703. throw MATCH_STALE;
  1704. }
  1705. if ((fileInfo.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0) {
  1706. if (GetCSIndex(match->handle, &match->entry.csIndex)) {
  1707. DPRINTF((_T("%s: match file %s is a SIS reparse point\n"),
  1708. driveLetterName, match->fileName));
  1709. throw MATCH_STALE;
  1710. }
  1711. DPRINTF((_T("%s: match file %s is a non-SIS reparse point\n"),
  1712. driveLetterName, match->fileName));
  1713. throw MATCH_INVALID;
  1714. }
  1715. // Check if the match file is mapped by another user.
  1716. if (IsFileMapped(match)) {
  1717. DPRINTF((_T("%s: match file %s is already mapped\n"),
  1718. driveLetterName, match->fileName));
  1719. throw MATCH_ERROR;
  1720. }
  1721. return TRUE;
  1722. }
  1723. /*****************************************************************************/
  1724. // Compare() compares the target and match files. It reads each file
  1725. // one page (64 kB) at a time and compares each pair of pages.
  1726. BOOL Groveler::Compare(
  1727. FileData *target,
  1728. FileData *match)
  1729. {
  1730. DWORD lastPageSize,
  1731. bytesToRead = 0,
  1732. prevBytesToRead,
  1733. bytesToRequest = 0,
  1734. prevBytesToRequest = 0,
  1735. bytesRead,
  1736. toggle,
  1737. targetTime,
  1738. matchTime,
  1739. lastError;
  1740. DWORDLONG numPages,
  1741. pageNum,
  1742. prevPageNum;
  1743. ULARGE_INTEGER offset;
  1744. BOOL targetReadDone = FALSE,
  1745. matchReadDone = FALSE,
  1746. filesMatch,
  1747. success;
  1748. ASSERT(target != NULL);
  1749. ASSERT(target->handle != NULL);
  1750. ASSERT(target->entry.fileID != 0);
  1751. ASSERT(match != NULL);
  1752. ASSERT(match->handle != NULL);
  1753. ASSERT( match->entry.fileID != 0
  1754. && !HasCSIndex(match->entry.csIndex)
  1755. || match->entry.fileID == 0
  1756. && match->fileName[0] != _T('\0')
  1757. && HasCSIndex(match->entry.csIndex));
  1758. ASSERT(target->entry.fileSize == match->entry.fileSize);
  1759. ASSERT(target->entry.signature == match->entry.signature);
  1760. ASSERT(target->entry.attributes == match->entry.attributes);
  1761. numPages = (target->entry.fileSize - 1) / CMP_PAGE_SIZE + 1;
  1762. lastPageSize = (DWORD)((target->entry.fileSize - 1) % CMP_PAGE_SIZE) + 1;
  1763. toggle = 0;
  1764. filesMatch = TRUE;
  1765. for (pageNum = 0; pageNum <= numPages; pageNum++) {
  1766. // Unless this is the first pass through the loop,
  1767. // wait for the previous read of both files to complete.
  1768. if (pageNum > 0) {
  1769. CmpCheckPoint(target, match, !targetReadDone, !matchReadDone);
  1770. success = GetOverlappedResult(
  1771. target->handle,
  1772. &target->readSynch,
  1773. &bytesRead,
  1774. FALSE);
  1775. if (!success) {
  1776. DPRINTF((_T("%s: error getting target file %s read results: %lu\n"),
  1777. driveLetterName, target->fileName, GetLastError()));
  1778. throw TARGET_ERROR;
  1779. }
  1780. if (bytesRead != prevBytesToRequest &&
  1781. bytesRead != prevBytesToRead) {
  1782. DPRINTF((_T("%s: cmp read only %lu of %lu bytes from target file %s\n"),
  1783. driveLetterName, bytesRead, prevBytesToRequest, target->fileName));
  1784. throw TARGET_ERROR;
  1785. }
  1786. success = GetOverlappedResult(
  1787. match->handle,
  1788. &match->readSynch,
  1789. &bytesRead,
  1790. FALSE);
  1791. if (!success) {
  1792. #if DBG
  1793. if (match->entry.fileID != 0) {
  1794. DPRINTF((_T("%s: error getting match file %s read results: %lu\n"),
  1795. driveLetterName, match->fileName, GetLastError()));
  1796. } else {
  1797. DPRINTF((_T("%s: error getting CS file %s read results: %lu\n"),
  1798. driveLetterName, match->fileName, GetLastError()));
  1799. }
  1800. #endif
  1801. throw MATCH_ERROR;
  1802. }
  1803. if (bytesRead != prevBytesToRequest &&
  1804. bytesRead != prevBytesToRead) {
  1805. #if DBG
  1806. if (match->entry.fileID != 0) {
  1807. DPRINTF((_T("%s: read only %lu of %lu bytes from match file %s\n"),
  1808. driveLetterName, bytesRead, prevBytesToRequest, match->fileName));
  1809. } else {
  1810. DPRINTF((_T("%s: read only %lu of %lu bytes from CS file %s\n"),
  1811. driveLetterName, bytesRead, prevBytesToRequest, match->fileName));
  1812. }
  1813. #endif
  1814. throw MATCH_ERROR;
  1815. }
  1816. if (bytesRead >= cmpReportThreshold) {
  1817. compareReadCount += 2;
  1818. if (targetReadDone) { // Non-overlapped
  1819. targetTime = target->stopTime - target->startTime;
  1820. matchTime = match ->stopTime - match ->startTime;
  1821. compareReadTime += targetTime + matchTime;
  1822. } else { // Overlapped
  1823. targetTime = target->stopTime - target->startTime;
  1824. matchTime = match ->stopTime - target->startTime;
  1825. compareReadTime += targetTime > matchTime ? targetTime : matchTime;
  1826. }
  1827. }
  1828. if (!filesMatch)
  1829. break;
  1830. }
  1831. // Unless all pages of the target file have already been read,
  1832. // begin reading the next page of the file.
  1833. if (pageNum < numPages) {
  1834. offset.QuadPart = pageNum * CMP_PAGE_SIZE;
  1835. target->readSynch.Offset =
  1836. match ->readSynch.Offset = offset.LowPart;
  1837. target->readSynch.OffsetHigh =
  1838. match ->readSynch.OffsetHigh = offset.HighPart;
  1839. bytesToRead = pageNum < numPages-1 ? CMP_PAGE_SIZE : lastPageSize;
  1840. bytesToRequest = bytesToRead + sectorSize - 1;
  1841. bytesToRequest -= bytesToRequest % sectorSize;
  1842. target->startTime = GetTickCount();
  1843. targetReadDone = ReadFile(target->handle, target->buffer[toggle],
  1844. bytesToRequest, NULL, &target->readSynch);
  1845. target->stopTime = GetTickCount();
  1846. lastError = GetLastError();
  1847. if (targetReadDone) {
  1848. success = ResetEvent(target->readSynch.hEvent);
  1849. ASSERT_ERROR(success);
  1850. } else if (lastError != ERROR_IO_PENDING) {
  1851. DPRINTF((_T("%s: error reading target file %s: %lu\n"),
  1852. driveLetterName, target->fileName, lastError));
  1853. throw TARGET_ERROR;
  1854. }
  1855. match->startTime = GetTickCount();
  1856. matchReadDone = ReadFile(match->handle, match->buffer[toggle],
  1857. bytesToRequest, NULL, &match->readSynch);
  1858. match->stopTime = GetTickCount();
  1859. lastError = GetLastError();
  1860. if (matchReadDone) {
  1861. success = ResetEvent(match->readSynch.hEvent);
  1862. ASSERT_ERROR(success);
  1863. } else if (lastError != ERROR_IO_PENDING) {
  1864. #if DBG
  1865. if (match->entry.fileID != 0) {
  1866. DPRINTF((_T("%s: error reading match file %s: %lu\n"),
  1867. driveLetterName, match->fileName, lastError));
  1868. } else {
  1869. DPRINTF((_T("%s: error reading CS file %s: %lu\n"),
  1870. driveLetterName, match->fileName, lastError));
  1871. }
  1872. #endif
  1873. throw MATCH_ERROR;
  1874. }
  1875. }
  1876. // Unless this is the first pass through the loop,
  1877. // compare the target and match file pages just read.
  1878. if (pageNum > 0)
  1879. filesMatch = memcmp(target->buffer[1-toggle],
  1880. match ->buffer[1-toggle], prevBytesToRead) == 0;
  1881. prevPageNum = pageNum;
  1882. prevBytesToRead = bytesToRead;
  1883. prevBytesToRequest = bytesToRequest;
  1884. toggle = 1-toggle;
  1885. }
  1886. if (!filesMatch) {
  1887. #if DBG
  1888. if (match->entry.fileID != 0) {
  1889. DPRINTF((_T("%s:1 files %s and %s failed compare (sz: 0x%x)\n"),
  1890. driveLetterName, target->fileName, match->fileName, target->entry.fileSize));
  1891. } else {
  1892. DPRINTF((_T("%s:2 files %s and %s failed compare (sz: 0x%x)\n"),
  1893. driveLetterName, target->fileName, match->fileName, target->entry.fileSize));
  1894. }
  1895. #endif
  1896. return FALSE;
  1897. }
  1898. return TRUE;
  1899. }
  1900. /*****************************************************************************/
  1901. // Merge() calls the SIS filter to merge the target and match files.
  1902. BOOL Groveler::Merge(
  1903. FileData *target,
  1904. FileData *match,
  1905. OVERLAPPED *mergeSynch,
  1906. HANDLE abortMergeEvent)
  1907. {
  1908. _SIS_LINK_FILES sisLinkFiles;
  1909. #if DBG
  1910. TCHAR *csName;
  1911. #endif
  1912. DWORD transferCount,
  1913. lastError;
  1914. BOOL mergeDone,
  1915. merged,
  1916. success;
  1917. ASSERT(target != NULL);
  1918. ASSERT(target->handle != NULL);
  1919. ASSERT(target->entry.fileID != 0);
  1920. ASSERT(match != NULL);
  1921. ASSERT(match->handle != NULL);
  1922. ASSERT( match->entry.fileID != 0
  1923. && !HasCSIndex(match->entry.csIndex)
  1924. || match->entry.fileID == 0
  1925. && match->fileName[0] != _T('\0')
  1926. && HasCSIndex(match->entry.csIndex));
  1927. ASSERT(mergeSynch != NULL);
  1928. ASSERT(mergeSynch->Internal == 0);
  1929. ASSERT(mergeSynch->InternalHigh == 0);
  1930. ASSERT(mergeSynch->Offset == 0);
  1931. ASSERT(mergeSynch->OffsetHigh == 0);
  1932. ASSERT(mergeSynch->hEvent != NULL);
  1933. ASSERT(IsReset(mergeSynch->hEvent));
  1934. ASSERT(abortMergeEvent != NULL);
  1935. ASSERT(IsReset(abortMergeEvent));
  1936. ASSERT(target->entry.fileSize == match->entry.fileSize);
  1937. ASSERT(target->entry.signature == match->entry.signature);
  1938. ASSERT(target->entry.attributes == match->entry.attributes);
  1939. ASSERT(grovHandle != NULL);
  1940. // Set up to merge the files.
  1941. if (match->entry.fileID != 0) {
  1942. sisLinkFiles.operation = SIS_LINK_FILES_OP_MERGE;
  1943. sisLinkFiles.u.Merge.file1 = target->handle;
  1944. sisLinkFiles.u.Merge.file2 = match ->handle;
  1945. sisLinkFiles.u.Merge.abortEvent = NULL; // Should be abortMergeEvent
  1946. } else {
  1947. sisLinkFiles.operation = SIS_LINK_FILES_OP_MERGE_CS;
  1948. sisLinkFiles.u.MergeWithCS.file1 = target->handle;
  1949. sisLinkFiles.u.MergeWithCS.abortEvent = NULL; // Should be abortMergeEvent
  1950. sisLinkFiles.u.MergeWithCS.CSid = match->entry.csIndex;
  1951. }
  1952. // Call the SIS filter to merge the files.
  1953. target->startTime = GetTickCount();
  1954. mergeDone = DeviceIoControl(
  1955. grovHandle,
  1956. FSCTL_SIS_LINK_FILES,
  1957. (VOID *)&sisLinkFiles,
  1958. sizeof(_SIS_LINK_FILES),
  1959. NULL,
  1960. 0,
  1961. NULL,
  1962. mergeSynch);
  1963. target->stopTime = GetTickCount();
  1964. // If the merge completed successfully before the call returned, reset
  1965. // the merge done event, get the new CS indices, and close the files.
  1966. if (mergeDone) {
  1967. success = ResetEvent(mergeSynch->hEvent);
  1968. ASSERT_ERROR(success);
  1969. mergeTime += target->stopTime - target->startTime;
  1970. GetCSIndex(target->handle, &target->entry.csIndex);
  1971. if (!HasCSIndex(match->entry.csIndex))
  1972. GetCSIndex(match->handle, &match->entry.csIndex);
  1973. CloseFile(target);
  1974. CloseFile(match);
  1975. }
  1976. // If the merge failed, close the files and return an error status.
  1977. else {
  1978. lastError = GetLastError();
  1979. if (lastError != ERROR_IO_PENDING) {
  1980. CloseFile(target);
  1981. CloseFile(match);
  1982. #if DBG
  1983. if (match->entry.fileID != 0) {
  1984. DPRINTF((_T("%s:3 files %s and %s failed merge: %lu\n"),
  1985. driveLetterName, target->fileName, match->fileName, lastError));
  1986. } else {
  1987. DPRINTF((_T("%s:4 files %s and %s failed merge: %lu\n"),
  1988. driveLetterName, target->fileName, match->fileName, lastError));
  1989. }
  1990. #endif
  1991. return FALSE;
  1992. }
  1993. // If the merge is in progress, wait for it to complete.
  1994. // (MergeCheckPoint() will get the new CS indices and close the files.
  1995. else {
  1996. merged = MergeCheckPoint(target, match, mergeSynch,
  1997. abortMergeEvent, !mergeDone);
  1998. if (!merged) {
  1999. #if DBG
  2000. lastError = GetLastError();
  2001. if (match->entry.fileID != 0) {
  2002. DPRINTF((_T("%s: error getting merge results of files %s and %s: %lu\n"),
  2003. driveLetterName, target->fileName, match->fileName, lastError));
  2004. } else {
  2005. DPRINTF((_T("%s: error getting merge results of files %s and %s: %lu\n"),
  2006. driveLetterName, target->fileName, match->fileName, lastError));
  2007. }
  2008. #endif
  2009. return FALSE;
  2010. }
  2011. }
  2012. }
  2013. // If the merge succeeded, analyze and report the results.
  2014. mergeTime += target->stopTime - target->startTime;
  2015. merged = HasCSIndex (target->entry.csIndex)
  2016. && SameCSIndex(target->entry.csIndex, match->entry.csIndex);
  2017. #if DBG
  2018. csName = GetCSName(&target->entry.csIndex);
  2019. if (merged) {
  2020. if (match->entry.fileID != 0) {
  2021. DPRINTF((_T("%s: files %s and %s merged: CS index is %s\n"),
  2022. driveLetterName, target->fileName, match->fileName,
  2023. csName != NULL ? csName : _T("...")));
  2024. } else {
  2025. DPRINTF((_T("%s: files %s and %s merged\n"),
  2026. driveLetterName, target->fileName, match->fileName));
  2027. }
  2028. } else {
  2029. if (match->entry.fileID != 0) {
  2030. DPRINTF((_T("%s:5 files %s and %s merged, but CS indices don't match\n"),
  2031. driveLetterName, target->fileName, match->fileName));
  2032. } else {
  2033. DPRINTF((_T("%s:6 files %s and %s merged, but CS indices don't match\n"),
  2034. driveLetterName, target->fileName, match->fileName));
  2035. }
  2036. }
  2037. if (csName != NULL) {
  2038. FreeCSName(csName);
  2039. csName = NULL;
  2040. }
  2041. #endif
  2042. return merged;
  2043. }
  2044. /*****************************************************************************/
  2045. // Worker() performs the groveling processing.
  2046. VOID Groveler::Worker()
  2047. {
  2048. FileData target,
  2049. match;
  2050. SGNativeQueueEntry queueEntry;
  2051. FIFO *matchList = NULL;
  2052. Table *csIndexTable = NULL;
  2053. OVERLAPPED mergeSynch = { 0, 0, 0, 0, NULL };
  2054. HANDLE abortMergeEvent = NULL;
  2055. TCHAR *csName;
  2056. DatabaseActionList actionList[MAX_ACTIONS];
  2057. BYTE *buffer1 = NULL,
  2058. *buffer2 = NULL,
  2059. *buffer3 = NULL,
  2060. *buffer4 = NULL;
  2061. DWORD queueIndex,
  2062. bufferSize,
  2063. numCompares,
  2064. numMatches,
  2065. numActions;
  2066. #if DBG
  2067. DWORD enqueueTime;
  2068. #endif
  2069. LONG num;
  2070. BOOL needToRetry,
  2071. hashed,
  2072. gotMatch,
  2073. filesMatch,
  2074. merged,
  2075. success;
  2076. CLEAR_FILE(target);
  2077. CLEAR_OVERLAPPED(target.oplock);
  2078. target.handle = NULL;
  2079. CLEAR_FILE(match);
  2080. CLEAR_OVERLAPPED(match.oplock);
  2081. match.handle = NULL;
  2082. _set_new_handler(NewHandler);
  2083. // Create the events.
  2084. try {
  2085. if ((target.oplock .hEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL
  2086. || (match .oplock .hEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL
  2087. || (target.readSynch.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL
  2088. || (match .readSynch.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL
  2089. || (mergeSynch .hEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL
  2090. || (abortMergeEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL) {
  2091. DPRINTF((_T("%s: unable to create events: %lu\n"),
  2092. driveLetterName, GetLastError()));
  2093. throw INITIALIZE_ERROR;
  2094. }
  2095. // Allocate and align the file buffers.
  2096. bufferSize = SIG_PAGE_SIZE > CMP_PAGE_SIZE ? SIG_PAGE_SIZE : CMP_PAGE_SIZE
  2097. + sectorSize;
  2098. buffer1 = new BYTE[bufferSize];
  2099. ASSERT(buffer1 != NULL);
  2100. buffer2 = new BYTE[bufferSize];
  2101. ASSERT(buffer2 != NULL);
  2102. buffer3 = new BYTE[bufferSize];
  2103. ASSERT(buffer3 != NULL);
  2104. buffer4 = new BYTE[bufferSize];
  2105. ASSERT(buffer4 != NULL);
  2106. ASSERT(inUseFileID1 == NULL);
  2107. ASSERT(inUseFileID2 == NULL);
  2108. inUseFileID1 = &target.entry.fileID;
  2109. inUseFileID2 = &match .entry.fileID;
  2110. target.buffer[0] = buffer1 + sectorSize - (PtrToUlong(buffer1) % sectorSize);
  2111. target.buffer[1] = buffer2 + sectorSize - (PtrToUlong(buffer2) % sectorSize);
  2112. match .buffer[0] = buffer3 + sectorSize - (PtrToUlong(buffer3) % sectorSize);
  2113. match .buffer[1] = buffer4 + sectorSize - (PtrToUlong(buffer4) % sectorSize);
  2114. // Signal to grovel() that this thread is alive,
  2115. // then wait for it to signal to start.
  2116. grovelStatus = Grovel_ok;
  2117. ASSERT(IsReset(grovelStopEvent));
  2118. success = SetEvent(grovelStopEvent);
  2119. ASSERT_ERROR(success);
  2120. WaitForEvent(grovelStartEvent);
  2121. if (terminate)
  2122. throw TERMINATE;
  2123. #ifdef _CRTDBG
  2124. _CrtMemState s[2], sdiff;
  2125. int stateIndex = 0;
  2126. _CrtMemCheckpoint(&s[stateIndex]);
  2127. stateIndex = 1;
  2128. #endif
  2129. // The main loop.
  2130. while (TRUE) {
  2131. try {
  2132. #ifdef _CRTDBG
  2133. _CrtMemCheckpoint(&s[stateIndex]);
  2134. if (_CrtMemDifference(&sdiff, &s[stateIndex^1], &s[stateIndex]))
  2135. _CrtMemDumpStatistics(&sdiff);
  2136. stateIndex ^= 1;
  2137. #endif
  2138. hashed = FALSE;
  2139. numCompares = 0;
  2140. numMatches = 0;
  2141. merged = FALSE;
  2142. needToRetry = FALSE;
  2143. // Get a target file. abortGroveling is set when scan_volume is attempting to
  2144. // sync up with this thread. We stop here, a safe place to let scan_volume
  2145. // replace the database.
  2146. if (abortGroveling || !GetTarget(&target, &queueIndex)) {
  2147. CLEAR_FILE(target);
  2148. grovelStatus = Grovel_ok;
  2149. ASSERT(IsReset(grovelStopEvent));
  2150. success = SetEvent(grovelStopEvent);
  2151. ASSERT_ERROR(success);
  2152. WaitForEvent(grovelStartEvent);
  2153. if (terminate)
  2154. throw TERMINATE;
  2155. continue;
  2156. }
  2157. // Calculate the target file's signature.
  2158. hashed = TRUE;
  2159. CalculateSignature(&target);
  2160. // Get a list of match files.
  2161. ASSERT(matchList == NULL);
  2162. ASSERT(csIndexTable == NULL);
  2163. matchList = new FIFO();
  2164. ASSERT(matchList != NULL);
  2165. csIndexTable = new Table();
  2166. ASSERT(csIndexTable != NULL);
  2167. GetMatchList(&target, matchList, csIndexTable);
  2168. // Compare the target file to each match file until a matching file is found
  2169. // or all comparisons fail. Try the SIS files first, then the regular files.
  2170. while (TRUE) {
  2171. try {
  2172. gotMatch = FALSE;
  2173. if (!gotMatch && csIndexTable != NULL) {
  2174. gotMatch = GetCSFile(&target, &match, csIndexTable);
  2175. if (!gotMatch) {
  2176. delete csIndexTable;
  2177. csIndexTable = NULL;
  2178. }
  2179. }
  2180. if (!gotMatch && matchList != NULL) {
  2181. gotMatch = GetMatch(&target, &match, matchList);
  2182. if (!gotMatch) {
  2183. delete matchList;
  2184. matchList = NULL;
  2185. }
  2186. }
  2187. // After comparing the target file to every file on both
  2188. // lists, close the target file and update the database,
  2189. // then go on to process the next target file.
  2190. if (!gotMatch) {
  2191. CloseFile(&target);
  2192. numActions = 3;
  2193. actionList[0].type = TABLE_DELETE_BY_FILE_ID;
  2194. actionList[0].u.fileID = target.entry.fileID;
  2195. actionList[1].type = TABLE_PUT;
  2196. actionList[1].u.tableEntry = &target.entry;
  2197. actionList[2].type = QUEUE_DELETE;
  2198. actionList[2].u.queueIndex = queueIndex;
  2199. if (needToRetry) {
  2200. queueEntry.fileID = target.entry.fileID;
  2201. queueEntry.parentID = target.parentID;
  2202. queueEntry.reason = 0;
  2203. queueEntry.fileName = NULL;
  2204. queueEntry.retryTime = target.retryTime * 2; // Exponential back-off
  2205. if (queueEntry.retryTime < grovelInterval)
  2206. queueEntry.retryTime = grovelInterval;
  2207. queueEntry.readyTime = GetTime() + queueEntry.retryTime;
  2208. numActions = 4;
  2209. actionList[3].type = QUEUE_PUT;
  2210. actionList[3].u.queueEntry = &queueEntry;
  2211. }
  2212. #if DBG
  2213. if (!HasCSIndex(target.entry.csIndex)) {
  2214. TRACE_PRINTF(TC_groveler, 4,
  2215. (_T("%s: adding file {%s, %I64u, 0x%016I64x} to table\n"),
  2216. driveLetterName, target.fileName, target.entry.fileSize,
  2217. target.entry.signature));
  2218. } else {
  2219. csName = GetCSName(&target.entry.csIndex);
  2220. TRACE_PRINTF(TC_groveler, 4,
  2221. (_T("%s: adding file {%s, %I64u, 0x%016I64x, %s} to table\n"),
  2222. driveLetterName, target.fileName, target.entry.fileSize,
  2223. target.entry.signature, csName != NULL ? csName : _T("...")));
  2224. if (csName != NULL) {
  2225. FreeCSName(csName);
  2226. csName = NULL;
  2227. }
  2228. }
  2229. if (needToRetry) {
  2230. enqueueTime = (DWORD)(queueEntry.retryTime / 10000);
  2231. DPRINTF((_T(" Re-enqueuing target file %s to be groveled in %lu.%03lu sec\n"),
  2232. target.fileName, enqueueTime / 1000, enqueueTime % 1000));
  2233. }
  2234. #endif
  2235. DoTransaction(numActions, actionList);
  2236. break;
  2237. }
  2238. // Compare the target file with this match file.
  2239. numCompares++;
  2240. ASSERT(!inCompare);
  2241. inCompare = TRUE;
  2242. filesMatch = Compare(&target, &match);
  2243. inCompare = FALSE;
  2244. if (!filesMatch) {
  2245. CloseFile(&match);
  2246. CLEAR_FILE(match);
  2247. continue;
  2248. }
  2249. // If the target and match files are identical, go on to merge them.
  2250. numMatches++;
  2251. merged = Merge(&target, &match, &mergeSynch, abortMergeEvent);
  2252. // Update the database as follows:
  2253. //
  2254. // - Update the target file's table entry.
  2255. //
  2256. // - If the merge succeeded and the match file was a regular file,
  2257. // update the match file's table entry.
  2258. //
  2259. // - If the merge failed, re-enqueue the target file to be groveled again.
  2260. numActions = 3;
  2261. actionList[0].type = TABLE_DELETE_BY_FILE_ID;
  2262. actionList[0].u.fileID = target.entry.fileID;
  2263. actionList[1].type = TABLE_PUT;
  2264. actionList[1].u.tableEntry = &target.entry;
  2265. actionList[2].type = QUEUE_DELETE;
  2266. actionList[2].u.queueIndex = queueIndex;
  2267. if (merged) {
  2268. if (match.entry.fileID != 0) {
  2269. actionList[numActions ].type = TABLE_DELETE_BY_FILE_ID;
  2270. actionList[numActions++].u.fileID = match.entry.fileID;
  2271. actionList[numActions ].type = TABLE_PUT;
  2272. actionList[numActions++].u.tableEntry = &match.entry;
  2273. }
  2274. } else {
  2275. queueEntry.fileID = target.entry.fileID;
  2276. queueEntry.parentID = target.parentID;
  2277. queueEntry.reason = 0;
  2278. queueEntry.fileName = NULL;
  2279. queueEntry.retryTime = target.retryTime * 2; // Exponential back-off
  2280. if (queueEntry.retryTime < grovelInterval)
  2281. queueEntry.retryTime = grovelInterval;
  2282. queueEntry.readyTime = GetTime() + queueEntry.retryTime;
  2283. actionList[numActions ].type = QUEUE_PUT;
  2284. actionList[numActions++].u.queueEntry = &queueEntry;
  2285. }
  2286. #if DBG
  2287. if (!HasCSIndex(target.entry.csIndex)) {
  2288. TPRINTF((_T("%s: adding file {%s, %I64u, 0x%016I64x} to table\n"),
  2289. driveLetterName, target.fileName, target.entry.fileSize,
  2290. target.entry.signature));
  2291. } else {
  2292. csName = GetCSName(&target.entry.csIndex);
  2293. TPRINTF((_T("%s: adding file {%s, %I64u, 0x%016I64x, %s} to table\n"),
  2294. driveLetterName, target.fileName, target.entry.fileSize,
  2295. target.entry.signature, csName != NULL ? csName : _T("...")));
  2296. if (csName != NULL) {
  2297. FreeCSName(csName);
  2298. csName = NULL;
  2299. }
  2300. }
  2301. if (!merged) {
  2302. enqueueTime = (DWORD)(queueEntry.retryTime / 10000);
  2303. DPRINTF((_T(" Re-enqueuing target file %s to be groveled in %lu.%03lu sec\n"),
  2304. target.fileName, enqueueTime / 1000, enqueueTime % 1000));
  2305. }
  2306. #endif
  2307. DoTransaction(numActions, actionList);
  2308. break;
  2309. }
  2310. // Match exceptions
  2311. catch (MatchException matchException) {
  2312. inCompare = FALSE;
  2313. switch (matchException) {
  2314. // MATCH_INVALID: the match file doesn't exist or is disallowed. Close the file
  2315. // and remove its entry from the table, then go on to try the next match file.
  2316. case MATCH_INVALID:
  2317. CloseFile(&match);
  2318. if (match.entry.fileID != 0) {
  2319. ASSERT(!HasCSIndex(match.entry.csIndex));
  2320. num = sgDatabase->TableDeleteByFileID(match.entry.fileID);
  2321. if (num < 0)
  2322. throw DATABASE_ERROR;
  2323. ASSERT(num == 1);
  2324. } else {
  2325. ASSERT(HasCSIndex(match.entry.csIndex));
  2326. num = sgDatabase->TableDeleteByCSIndex(&match.entry.csIndex);
  2327. if (num < 0)
  2328. throw DATABASE_ERROR;
  2329. ASSERT(num > 0);
  2330. }
  2331. CLEAR_FILE(match);
  2332. break;
  2333. // MATCH_ERROR: an error occured while opening or reading the match
  2334. // file. Close the file and mark that the target file may need to be
  2335. // groveled again, then go on to try the next match file.
  2336. case MATCH_ERROR:
  2337. CloseFile(&match);
  2338. CLEAR_FILE(match);
  2339. needToRetry = TRUE;
  2340. break;
  2341. // MATCH_STALE: the match file table entry is invalid for some reason.
  2342. // Close the file, remove its entry from the table, enqueue
  2343. // it to be re-groveled, then go on to the next match file.
  2344. case MATCH_STALE:
  2345. CloseFile(&match);
  2346. if (match.entry.fileID != 0) {
  2347. queueEntry.fileID = match.entry.fileID;
  2348. queueEntry.parentID = match.parentID;
  2349. queueEntry.reason = 0;
  2350. queueEntry.readyTime = GetTime() + grovelInterval;
  2351. queueEntry.retryTime = 0;
  2352. queueEntry.fileName = NULL;
  2353. numActions = 2;
  2354. actionList[0].type = TABLE_DELETE_BY_FILE_ID;
  2355. actionList[0].u.fileID = match.entry.fileID;
  2356. actionList[1].type = QUEUE_PUT;
  2357. actionList[1].u.queueEntry = &queueEntry;
  2358. #if DBG
  2359. enqueueTime = (DWORD)(grovelInterval / 10000);
  2360. DPRINTF((_T(" Enqueuing match file %s to be groveled in %lu.%03lu sec\n"),
  2361. match.fileName, enqueueTime / 1000, enqueueTime % 1000));
  2362. #endif
  2363. DoTransaction(numActions, actionList);
  2364. } else {
  2365. ASSERT(HasCSIndex(match.entry.csIndex));
  2366. EnqueueCSIndex(&match.entry.csIndex);
  2367. }
  2368. CLEAR_FILE(match);
  2369. break;
  2370. default:
  2371. ASSERT_PRINTF(FALSE, (_T("matchException=%lu\n"),
  2372. matchException));
  2373. }
  2374. }
  2375. }
  2376. }
  2377. // Target exceptions
  2378. catch (TargetException targetException) {
  2379. inCompare = FALSE;
  2380. DPRINTF((_T("WORKER: Handling TargetException %d, status=%d\n"),
  2381. targetException,GetLastError()));
  2382. switch (targetException) {
  2383. // TARGET_INVALID: the target file is invalid for some reason: it doesn't
  2384. // exist, it is disallowed properties, it is in the queue by both file
  2385. // name and file ID, or it was in the queue by file name and has already
  2386. // been groveled. Close the files, remove the target file's entry from
  2387. // the table, then go on to grovel the next target file.
  2388. case TARGET_INVALID:
  2389. CloseFile(&target);
  2390. CloseFile(&match);
  2391. if (matchList != NULL) {
  2392. delete matchList;
  2393. matchList = NULL;
  2394. }
  2395. if (csIndexTable != NULL) {
  2396. delete csIndexTable;
  2397. csIndexTable = NULL;
  2398. }
  2399. numActions = 1;
  2400. actionList[0].type = QUEUE_DELETE;
  2401. actionList[0].u.queueIndex = queueIndex;
  2402. if (target.entry.fileID != 0) {
  2403. numActions = 2;
  2404. actionList[1].type = TABLE_DELETE_BY_FILE_ID;
  2405. actionList[1].u.fileID = target.entry.fileID;
  2406. }
  2407. DoTransaction(numActions, actionList);
  2408. break;
  2409. // An error occured while opening or reading the target file. Close
  2410. // the files and re-enqueue the target file to be groveled again.
  2411. case TARGET_ERROR:
  2412. ASSERT(target.entry.fileID != 0
  2413. || target.fileName[0] != _T('\0'));
  2414. CloseFile(&target);
  2415. CloseFile(&match);
  2416. queueEntry.fileID = target.entry.fileID;
  2417. queueEntry.parentID = target.parentID;
  2418. queueEntry.reason = 0;
  2419. queueEntry.fileName = target.entry.fileID == 0
  2420. ? target.fileName : NULL;
  2421. queueEntry.retryTime = target.retryTime * 2; // Exponential back-off
  2422. if (queueEntry.retryTime < grovelInterval)
  2423. queueEntry.retryTime = grovelInterval;
  2424. queueEntry.readyTime = GetTime() + queueEntry.retryTime;
  2425. actionList[0].type = QUEUE_DELETE;
  2426. actionList[0].u.queueIndex = queueIndex;
  2427. actionList[1].type = QUEUE_PUT;
  2428. actionList[1].u.queueEntry = &queueEntry;
  2429. #if DBG
  2430. enqueueTime = (DWORD)(queueEntry.retryTime / 10000);
  2431. if (target.entry.fileID != 0) {
  2432. DPRINTF((_T(" Re-enqueuing target file %s to be groveled in %lu.%03lu sec\n"),
  2433. target.fileName, enqueueTime / 1000, enqueueTime % 1000));
  2434. } else {
  2435. DPRINTF((_T(" Re-enqueuing target file %s to be groveled in %lu.%03lu sec\n"),
  2436. target.fileName, enqueueTime / 1000, enqueueTime % 1000));
  2437. }
  2438. #endif
  2439. DoTransaction(2, actionList);
  2440. break;
  2441. default:
  2442. ASSERT_PRINTF(FALSE, (_T("targetException=%lu\n"),
  2443. targetException));
  2444. }
  2445. }
  2446. // Do some clean-up.
  2447. ASSERT(target.handle == NULL);
  2448. ASSERT(match .handle == NULL);
  2449. if (matchList != NULL) {
  2450. delete matchList;
  2451. matchList = NULL;
  2452. }
  2453. if (csIndexTable != NULL) {
  2454. delete csIndexTable;
  2455. csIndexTable = NULL;
  2456. }
  2457. // Update the activity counters for this target file,
  2458. // then go on to process the next file.
  2459. if (hashed) {
  2460. hashCount++;
  2461. hashBytes += target.entry.fileSize;
  2462. }
  2463. compareCount += numCompares;
  2464. compareBytes += numCompares * target.entry.fileSize;
  2465. matchCount += numMatches;
  2466. matchBytes += numMatches * target.entry.fileSize;
  2467. if (merged) {
  2468. mergeCount++;
  2469. mergeBytes += target.entry.fileSize;
  2470. }
  2471. CLEAR_FILE(target);
  2472. CLEAR_FILE(match);
  2473. CLEAR_OVERLAPPED(mergeSynch);
  2474. }
  2475. }
  2476. // Terminal exceptions
  2477. catch (TerminalException terminalException) {
  2478. switch (terminalException) {
  2479. case INITIALIZE_ERROR:
  2480. break;
  2481. // DATABASE_ERROR: an error occured in the database. Return an error status.
  2482. case DATABASE_ERROR:
  2483. break;
  2484. // MEMORY_ERROR: unable to allocate memory. Return an error status.
  2485. case MEMORY_ERROR:
  2486. DPRINTF((_T("%s: Unable to allocate memory\n"),
  2487. driveLetterName));
  2488. break;
  2489. // TERMINATE: grovel() signaled for this thread to terminate.
  2490. case TERMINATE:
  2491. break;
  2492. default:
  2493. ASSERT_PRINTF(FALSE, (_T("terminalException=%lu\n"),
  2494. terminalException));
  2495. }
  2496. }
  2497. // Close the files and clean up.
  2498. CloseFile(&target);
  2499. CloseFile(&target);
  2500. CLEAR_FILE(target);
  2501. CLEAR_FILE(match);
  2502. if (matchList != NULL) {
  2503. delete matchList;
  2504. matchList = NULL;
  2505. }
  2506. if (csIndexTable != NULL) {
  2507. delete csIndexTable;
  2508. csIndexTable = NULL;
  2509. }
  2510. if (target.oplock.hEvent != NULL) {
  2511. success = CloseHandle(target.oplock.hEvent);
  2512. ASSERT_ERROR(success);
  2513. target.oplock.hEvent = NULL;
  2514. }
  2515. if (match.oplock.hEvent != NULL) {
  2516. success = CloseHandle(match.oplock.hEvent);
  2517. ASSERT_ERROR(success);
  2518. match.oplock.hEvent = NULL;
  2519. }
  2520. if (target.readSynch.hEvent != NULL) {
  2521. success = CloseHandle(target.readSynch.hEvent);
  2522. ASSERT_ERROR(success);
  2523. target.readSynch.hEvent = NULL;
  2524. }
  2525. if (match.readSynch.hEvent != NULL) {
  2526. success = CloseHandle(match.readSynch.hEvent);
  2527. ASSERT_ERROR(success);
  2528. match.readSynch.hEvent = NULL;
  2529. }
  2530. if (mergeSynch.hEvent != NULL) {
  2531. success = CloseHandle(mergeSynch.hEvent);
  2532. ASSERT_ERROR(success);
  2533. mergeSynch.hEvent = NULL;
  2534. }
  2535. if (abortMergeEvent != NULL) {
  2536. success = CloseHandle(abortMergeEvent);
  2537. ASSERT_ERROR(success);
  2538. abortMergeEvent = NULL;
  2539. }
  2540. if (buffer1 != NULL) {
  2541. delete [] buffer1;
  2542. buffer1 = NULL;
  2543. }
  2544. if (buffer2 != NULL) {
  2545. delete [] buffer2;
  2546. buffer2 = NULL;
  2547. }
  2548. if (buffer3 != NULL) {
  2549. delete [] buffer3;
  2550. buffer3 = NULL;
  2551. }
  2552. if (buffer4 != NULL) {
  2553. delete [] buffer4;
  2554. buffer4 = NULL;
  2555. }
  2556. inUseFileID1 = NULL;
  2557. inUseFileID2 = NULL;
  2558. // Signal grovel() that this thread is terminating by
  2559. // setting the grovelStop event with an error status.
  2560. grovelThread = NULL;
  2561. grovelStatus = Grovel_error;
  2562. ASSERT(IsReset(grovelStopEvent));
  2563. success = SetEvent(grovelStopEvent);
  2564. ASSERT_ERROR(success);
  2565. }
  2566. /*****************************************************************************/
  2567. /******************* Groveler class static private methods *******************/
  2568. /*****************************************************************************/
  2569. // WorkerThread() runs in its own thread.
  2570. // It calls Worker() to perform the groveling processing.
  2571. DWORD Groveler::WorkerThread(VOID *groveler)
  2572. {
  2573. ((Groveler *)groveler)->Worker();
  2574. return 0; // Dummy return value
  2575. }
  2576. /*****************************************************************************/
  2577. /*********************** Groveler class public methods ***********************/
  2578. /*****************************************************************************/
  2579. BOOL Groveler::set_log_drive(const _TCHAR *drive_name)
  2580. {
  2581. return SGDatabase::set_log_drive(drive_name);
  2582. }
  2583. // is_sis_installed tests whether the SIS filter is
  2584. // installed on a volume by calling SIS copyfile.
  2585. BOOL Groveler::is_sis_installed(const _TCHAR *drive_name)
  2586. {
  2587. HANDLE volHandle;
  2588. SI_COPYFILE copyFile;
  2589. DWORD transferCount,
  2590. lastError;
  2591. BOOL success;
  2592. volHandle = CreateFile(
  2593. drive_name,
  2594. GENERIC_READ,
  2595. FILE_SHARE_READ | FILE_SHARE_WRITE,
  2596. NULL,
  2597. OPEN_EXISTING,
  2598. FILE_FLAG_BACKUP_SEMANTICS,
  2599. NULL);
  2600. if (volHandle == INVALID_HANDLE_VALUE)
  2601. return FALSE;
  2602. copyFile.SourceFileNameLength = 0;
  2603. copyFile.DestinationFileNameLength = 0;
  2604. copyFile.Flags = COPYFILE_SIS_REPLACE;
  2605. success = DeviceIoControl(
  2606. volHandle,
  2607. FSCTL_SIS_COPYFILE,
  2608. (VOID *)&copyFile,
  2609. sizeof(SI_COPYFILE),
  2610. NULL,
  2611. 0,
  2612. &transferCount,
  2613. NULL);
  2614. lastError = GetLastError();
  2615. ASSERT(!success);
  2616. success = CloseHandle(volHandle);
  2617. ASSERT_ERROR(success);
  2618. switch (lastError) {
  2619. case ERROR_INVALID_FUNCTION:
  2620. return FALSE;
  2621. case ERROR_INVALID_PARAMETER:
  2622. return TRUE; //sis is installed on this volume
  2623. default:
  2624. ASSERT_PRINTF(FALSE, (_T("lastError=%lu\n"), lastError));
  2625. }
  2626. return FALSE; // Dummy return value
  2627. }
  2628. /*****************************************************************************/
  2629. // The groveler constructor creates and initializes all class variables.
  2630. Groveler::Groveler()
  2631. {
  2632. volumeHandle = NULL;
  2633. grovHandle = NULL;
  2634. sgDatabase = NULL;
  2635. driveName = NULL;
  2636. driveLetterName = NULL;
  2637. databaseName = NULL;
  2638. numDisallowedIDs = 0;
  2639. numDisallowedNames = 0;
  2640. disallowedIDs = NULL;
  2641. disallowedNames = NULL;
  2642. grovelStartEvent = NULL;
  2643. grovelStopEvent = NULL;
  2644. grovelThread = NULL;
  2645. inUseFileID1 = NULL;
  2646. inUseFileID2 = NULL;
  2647. abortGroveling = FALSE;
  2648. inCompare = FALSE;
  2649. inScan = FALSE;
  2650. terminate = TRUE;
  2651. usnID = lastUSN = UNINITIALIZED_USN;
  2652. }
  2653. /*****************************************************************************/
  2654. // The groveler destructor destroys all class variables.
  2655. Groveler::~Groveler()
  2656. {
  2657. // If the volume is open, call close() to close it.
  2658. close();
  2659. ASSERT(volumeHandle == NULL);
  2660. ASSERT(grovHandle == NULL);
  2661. ASSERT(sgDatabase == NULL);
  2662. ASSERT(driveName == NULL);
  2663. ASSERT(driveLetterName == NULL);
  2664. ASSERT(databaseName == NULL);
  2665. ASSERT(numDisallowedIDs == 0);
  2666. ASSERT(numDisallowedNames == 0);
  2667. ASSERT(disallowedIDs == NULL);
  2668. ASSERT(disallowedNames == NULL);
  2669. ASSERT(grovelStartEvent == NULL);
  2670. ASSERT(grovelStopEvent == NULL);
  2671. ASSERT(grovelThread == NULL);
  2672. ASSERT(inUseFileID1 == NULL);
  2673. ASSERT(inUseFileID2 == NULL);
  2674. ASSERT(terminate);
  2675. ASSERT(!inCompare);
  2676. ASSERT(!inScan);
  2677. ASSERT(usnID == UNINITIALIZED_USN);
  2678. }
  2679. /*****************************************************************************/
  2680. // Open() opens the specified volume.
  2681. GrovelStatus Groveler::open(
  2682. IN const TCHAR *drive_name,
  2683. IN const TCHAR *drive_letterName,
  2684. IN BOOL is_log_drive,
  2685. IN DOUBLE read_report_discard_threshold,
  2686. IN DWORD min_file_size,
  2687. IN DWORD min_file_age,
  2688. IN BOOL allow_compressed_files,
  2689. IN BOOL allow_encrypted_files,
  2690. IN BOOL allow_hidden_files,
  2691. IN BOOL allow_offline_files,
  2692. IN BOOL allow_temporary_files,
  2693. IN int previousGrovelAllPathsState,
  2694. IN DWORD num_excluded_paths,
  2695. IN const TCHAR **excluded_paths,
  2696. IN DWORD base_regrovel_interval,
  2697. IN DWORD max_regrovel_interval)
  2698. {
  2699. DWORD threadID;
  2700. TCHAR fileStr[MAX_PATH+1];
  2701. TCHAR listValue[MAX_PATH+1],
  2702. *strPtr;
  2703. USN_JOURNAL_DATA usnJournalData;
  2704. SGNativeListEntry listEntry;
  2705. DWORDLONG fileID;
  2706. DWORD sectorsPerCluster,
  2707. numberOfFreeClusters,
  2708. totalNumberOfClusters,
  2709. bufferSize,
  2710. strLen,
  2711. i;
  2712. GrovelStatus openStatus;
  2713. LONG num;
  2714. BOOL success;
  2715. ASSERT(volumeHandle == NULL);
  2716. ASSERT(grovHandle == NULL);
  2717. ASSERT(sgDatabase == NULL);
  2718. ASSERT(databaseName == NULL);
  2719. ASSERT(numDisallowedIDs == 0);
  2720. ASSERT(numDisallowedNames == 0);
  2721. ASSERT(disallowedIDs == NULL);
  2722. ASSERT(disallowedNames == NULL);
  2723. ASSERT(grovelStartEvent == NULL);
  2724. ASSERT(grovelStopEvent == NULL);
  2725. ASSERT(grovelThread == NULL);
  2726. ASSERT(inUseFileID1 == NULL);
  2727. ASSERT(inUseFileID2 == NULL);
  2728. ASSERT(terminate);
  2729. ASSERT(!inCompare);
  2730. ASSERT(!inScan);
  2731. ASSERT(usnID == UNINITIALIZED_USN);
  2732. #if 0
  2733. while (!IsDebuggerPresent())
  2734. Sleep(2000);
  2735. DebugBreak();
  2736. #endif
  2737. //
  2738. // Make sure that the filter has run phase 2 initialization if this is
  2739. // a SIS enabled volume.
  2740. //
  2741. is_sis_installed(drive_name);
  2742. //
  2743. // Get drive name without trailing slash
  2744. //
  2745. int nBufSize = wcslen(drive_name) + 1; //in chars
  2746. driveName = new TCHAR[nBufSize];
  2747. (void)StringCchCopy(driveName, nBufSize, drive_name);
  2748. TrimTrailingChar(driveName,L'\\');
  2749. //
  2750. // Get drive Letter name without trailing "\" or ":"
  2751. //
  2752. nBufSize = wcslen(drive_letterName) + 1; //in chars
  2753. driveLetterName = new TCHAR[nBufSize];
  2754. (void)StringCchCopy(driveLetterName, nBufSize, drive_letterName);
  2755. TrimTrailingChar(driveLetterName,L'\\');
  2756. TrimTrailingChar(driveLetterName,L':');
  2757. #ifdef _CRTDBG
  2758. // Send all reports to STDOUT
  2759. _CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE );
  2760. _CrtSetReportFile( _CRT_WARN, _CRTDBG_FILE_STDERR );
  2761. _CrtSetReportMode( _CRT_ERROR, _CRTDBG_MODE_FILE );
  2762. _CrtSetReportFile( _CRT_ERROR, _CRTDBG_FILE_STDERR );
  2763. _CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE );
  2764. _CrtSetReportFile( _CRT_ASSERT, _CRTDBG_FILE_STDERR );
  2765. #endif
  2766. // Open the volume and the GrovelerFile. The SIS fsctl
  2767. // functions require that we pass in a handle to GrovelerFile as a means
  2768. // of proving our "privilege". An access violation is returned if we don't.
  2769. volumeHandle = CreateFile(
  2770. driveName,
  2771. GENERIC_READ | GENERIC_WRITE,
  2772. FILE_SHARE_READ | FILE_SHARE_WRITE,
  2773. NULL,
  2774. OPEN_EXISTING,
  2775. FILE_FLAG_OVERLAPPED|FILE_FLAG_BACKUP_SEMANTICS,
  2776. NULL);
  2777. if (volumeHandle == INVALID_HANDLE_VALUE) {
  2778. volumeHandle = NULL;
  2779. DPRINTF((_T("%s: Can't open volume \"%s\" %lu\n"),
  2780. driveLetterName, driveName, GetLastError()));
  2781. close();
  2782. return Grovel_error;
  2783. }
  2784. (void)StringCbCopy(fileStr,sizeof(fileStr),driveName);
  2785. (void)StringCbCat(fileStr,sizeof(fileStr),CS_DIR_PATH);
  2786. (void)StringCbCat(fileStr,sizeof(fileStr),_T("\\"));
  2787. (void)StringCbCat(fileStr,sizeof(fileStr),GROVELER_FILE_NAME);
  2788. grovHandle = CreateFile(
  2789. fileStr,
  2790. GENERIC_READ | GENERIC_WRITE,
  2791. FILE_SHARE_READ | FILE_SHARE_WRITE,
  2792. NULL,
  2793. OPEN_EXISTING,
  2794. FILE_FLAG_OVERLAPPED,
  2795. NULL);
  2796. if (grovHandle == INVALID_HANDLE_VALUE) {
  2797. grovHandle = NULL;
  2798. DPRINTF((_T("%s: can't open groveler file \"%s\": %lu\n"),
  2799. driveLetterName, fileStr, GetLastError()));
  2800. close();
  2801. return Grovel_error;
  2802. }
  2803. (void)StringCbCopy(fileStr,sizeof(fileStr),driveName);
  2804. (void)StringCbCat(fileStr,sizeof(fileStr),_T("\\"));
  2805. success = GetDiskFreeSpace(fileStr, &sectorsPerCluster, &sectorSize,
  2806. &numberOfFreeClusters, &totalNumberOfClusters);
  2807. ASSERT(success);
  2808. ASSERT(SIG_PAGE_SIZE % sectorSize == 0);
  2809. ASSERT(CMP_PAGE_SIZE % sectorSize == 0);
  2810. sigReportThreshold =
  2811. (DWORD)((DOUBLE)SIG_PAGE_SIZE * read_report_discard_threshold);
  2812. cmpReportThreshold =
  2813. (DWORD)((DOUBLE)CMP_PAGE_SIZE * read_report_discard_threshold);
  2814. //
  2815. // Open this volume's database. If this fails, create a
  2816. // new database. If that fails, return an error status.
  2817. //
  2818. ASSERT(databaseName == NULL);
  2819. strLen = _tcslen(driveName) + _tcslen(CS_DIR_PATH) + _tcslen(DATABASE_FILE_NAME) + 1; // +1 for '\'
  2820. databaseName = new TCHAR[strLen+1];
  2821. ASSERT(databaseName != NULL);
  2822. (void)StringCchPrintf(databaseName, (strLen+1), _T("%s%s\\%s"), driveName, CS_DIR_PATH, DATABASE_FILE_NAME);
  2823. sgDatabase = new SGDatabase();
  2824. if (sgDatabase == NULL) {
  2825. DPRINTF((_T("%s: can't create database object\n"),
  2826. driveLetterName));
  2827. close();
  2828. return Grovel_error;
  2829. }
  2830. openStatus = Grovel_ok;
  2831. if (get_usn_log_info(&usnJournalData) != Grovel_ok) {
  2832. DPRINTF((_T("%s: can't initialize usnID\n"),
  2833. driveLetterName));
  2834. } else {
  2835. usnID = usnJournalData.UsnJournalID;
  2836. if (!sgDatabase->Open(driveLetterName,databaseName, is_log_drive)) {
  2837. DPRINTF((_T("%s: can't open database \"%s\"\n"),
  2838. driveLetterName, databaseName));
  2839. } else {
  2840. listValue[0] = _T('\0');
  2841. listEntry.name = LAST_USN_NAME;
  2842. listEntry.value = listValue;
  2843. if (sgDatabase->ListRead(&listEntry) <= 0
  2844. || _stscanf(listValue, _T("%I64x"), &lastUSN) != 1
  2845. || lastUSN == UNINITIALIZED_USN) {
  2846. DPRINTF((_T("%s: can't get last USN value\n"), driveLetterName));
  2847. } else {
  2848. DWORDLONG storedUsnID;
  2849. listValue[0] = _T('\0');
  2850. listEntry.name = USN_ID_NAME;
  2851. listEntry.value = listValue;
  2852. if (sgDatabase->ListRead(&listEntry) <= 0
  2853. || _stscanf(listValue, _T("%I64x"), &storedUsnID) != 1
  2854. || storedUsnID != usnID) {
  2855. DPRINTF((_T("%s: can't get USN ID value from database\n"), driveLetterName));
  2856. } else {
  2857. //
  2858. // See if any uncommited operations
  2859. //
  2860. num = sgDatabase->StackCount();
  2861. if (0 == num) {
  2862. //
  2863. // See if the RIS state changed. If not, we can
  2864. // continue. If so, reset grovel state so we will
  2865. // rescan the volume.
  2866. //
  2867. if (GrovelAllPaths == previousGrovelAllPathsState) {
  2868. goto OpenedDatabase;
  2869. } else {
  2870. DPRINTF((L"GrovelAllPaths state changed, rescanning the volume\n"));
  2871. }
  2872. }
  2873. }
  2874. }
  2875. }
  2876. }
  2877. // Set abortGroveling to block the worker thread, and set lastUSN to block extract_log
  2878. // until scan_volume starts.
  2879. abortGroveling = TRUE;
  2880. lastUSN = usnID = UNINITIALIZED_USN;
  2881. openStatus = Grovel_new;
  2882. OpenedDatabase:
  2883. // Create the disallowed directories list.
  2884. if (num_excluded_paths == 0) {
  2885. disallowedIDs = NULL;
  2886. disallowedNames = NULL;
  2887. } else {
  2888. disallowedIDs = new DWORDLONG[num_excluded_paths];
  2889. disallowedNames = new TCHAR * [num_excluded_paths];
  2890. ASSERT(disallowedIDs != NULL);
  2891. ASSERT(disallowedNames != NULL);
  2892. for (i = 0; i < num_excluded_paths; i++) {
  2893. ASSERT(excluded_paths[i] != NULL);
  2894. if (excluded_paths[i][0] == _T('\\')) {
  2895. strLen = _tcslen(excluded_paths[i]);
  2896. while (strLen > 1 && excluded_paths[i][strLen-1] == _T('\\'))
  2897. strLen--;
  2898. strPtr = new TCHAR[strLen+1];
  2899. ASSERT(strPtr != NULL);
  2900. disallowedNames[numDisallowedNames++] = strPtr;
  2901. _tcsncpy(strPtr, excluded_paths[i], strLen);
  2902. strPtr[strLen] = _T('\0');
  2903. fileID = GetFileID(drive_name,strPtr);
  2904. if (fileID != 0) {
  2905. disallowedIDs[numDisallowedIDs++] = fileID;
  2906. }
  2907. TPRINTF((L"%s: Exclude path=\"%s\", ID=%04I64x.%012I64x\n",
  2908. driveLetterName,
  2909. strPtr,
  2910. ((fileID >> 48) & 0xffff),
  2911. (fileID & 0xffffffffffff)));
  2912. }
  2913. }
  2914. if (numDisallowedNames == 0) {
  2915. delete disallowedNames;
  2916. disallowedNames = NULL;
  2917. } else if (numDisallowedNames > 1)
  2918. qsort(
  2919. disallowedNames,
  2920. numDisallowedNames,
  2921. sizeof(TCHAR *),
  2922. qsStringCompare);
  2923. if (numDisallowedIDs == 0) {
  2924. delete disallowedIDs;
  2925. disallowedIDs = NULL;
  2926. } else if (numDisallowedIDs > 1)
  2927. qsort(
  2928. disallowedIDs,
  2929. numDisallowedIDs,
  2930. sizeof(DWORDLONG),
  2931. FileIDCompare);
  2932. }
  2933. //
  2934. // Set the remaining class values.
  2935. //
  2936. // minFileAge is expressed in 10^-7 seconds, min_file_age in milliseconds.
  2937. //
  2938. minFileSize = min_file_size > MIN_FILE_SIZE ? min_file_size : MIN_FILE_SIZE;
  2939. minFileAge = min_file_age * 10000;
  2940. grovelInterval = minFileAge > MIN_GROVEL_INTERVAL ? minFileAge : MIN_GROVEL_INTERVAL;
  2941. disallowedAttributes = FILE_ATTRIBUTE_DIRECTORY
  2942. | (allow_compressed_files ? 0 : FILE_ATTRIBUTE_COMPRESSED)
  2943. | (allow_encrypted_files ? 0 : FILE_ATTRIBUTE_ENCRYPTED)
  2944. | (allow_hidden_files ? 0 : FILE_ATTRIBUTE_HIDDEN)
  2945. | (allow_offline_files ? 0 : FILE_ATTRIBUTE_OFFLINE)
  2946. | (allow_temporary_files ? 0 : FILE_ATTRIBUTE_TEMPORARY);
  2947. //
  2948. // Create the events used to handshake with the worker thread.
  2949. //
  2950. if ((grovelStartEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL
  2951. || (grovelStopEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL) {
  2952. DPRINTF((_T("%s: unable to create events: %lu\n"),
  2953. driveLetterName, GetLastError()));
  2954. close();
  2955. return Grovel_error;
  2956. }
  2957. //
  2958. // Create the worker thread, then wait for it to set
  2959. // the grovelStop event to announce its existence.
  2960. //
  2961. terminate = FALSE;
  2962. grovelThread = CreateThread(
  2963. NULL,
  2964. 0,
  2965. WorkerThread,
  2966. (VOID *)this,
  2967. 0,
  2968. &threadID);
  2969. if (grovelThread == NULL) {
  2970. DPRINTF((_T("%s: can't create the worker thread: %lu\n"),
  2971. driveLetterName, GetLastError()));
  2972. close();
  2973. return Grovel_error;
  2974. }
  2975. WaitForEvent(grovelStopEvent);
  2976. if (grovelStatus == Grovel_error) {
  2977. grovelThread = NULL;
  2978. close();
  2979. return Grovel_error;
  2980. }
  2981. ASSERT(grovelStatus == Grovel_ok);
  2982. return openStatus;
  2983. }
  2984. /*****************************************************************************/
  2985. GrovelStatus Groveler::close()
  2986. {
  2987. DWORD i;
  2988. LONG num;
  2989. BOOL success;
  2990. // If active, signal the worker thread to stop,
  2991. // then wait for it to acknowledge.
  2992. terminate = TRUE;
  2993. if (grovelThread != NULL) {
  2994. ASSERT(grovelStartEvent != NULL);
  2995. ASSERT(grovelStopEvent != NULL);
  2996. timeAllotted = INFINITE;
  2997. do {
  2998. ASSERT(IsReset(grovelStartEvent));
  2999. success = SetEvent(grovelStartEvent);
  3000. ASSERT_ERROR(success);
  3001. WaitForEvent(grovelStopEvent);
  3002. } while (grovelStatus != Grovel_error);
  3003. grovelThread = NULL;
  3004. }
  3005. inCompare = FALSE;
  3006. inScan = FALSE;
  3007. usnID = UNINITIALIZED_USN;
  3008. ASSERT(inUseFileID1 == NULL);
  3009. ASSERT(inUseFileID2 == NULL);
  3010. // Close the events.
  3011. if (grovelStartEvent != NULL) {
  3012. success = CloseHandle(grovelStartEvent);
  3013. ASSERT_ERROR(success);
  3014. grovelStartEvent = NULL;
  3015. }
  3016. if (grovelStopEvent != NULL) {
  3017. success = CloseHandle(grovelStopEvent);
  3018. ASSERT_ERROR(success);
  3019. grovelStopEvent = NULL;
  3020. }
  3021. // If the volume or GrovelerFile are open, close them.
  3022. if (volumeHandle != NULL) {
  3023. success = CloseHandle(volumeHandle);
  3024. ASSERT_ERROR(success);
  3025. volumeHandle = NULL;
  3026. }
  3027. if (grovHandle != NULL) {
  3028. success = CloseHandle(grovHandle);
  3029. ASSERT_ERROR(success);
  3030. grovHandle = NULL;
  3031. }
  3032. // Close this volume's database.
  3033. if (sgDatabase != NULL) {
  3034. delete sgDatabase;
  3035. sgDatabase = NULL;
  3036. }
  3037. if (databaseName != NULL) {
  3038. delete[] databaseName;
  3039. databaseName = NULL;
  3040. }
  3041. // Deallocate the disallowed directory lists.
  3042. if (numDisallowedNames == 0) {
  3043. ASSERT(disallowedNames == NULL);
  3044. } else {
  3045. for (i = 0; i < numDisallowedNames; i++)
  3046. delete (disallowedNames[i]);
  3047. delete disallowedNames;
  3048. disallowedNames = NULL;
  3049. numDisallowedNames = 0;
  3050. }
  3051. if (numDisallowedIDs == 0) {
  3052. ASSERT(disallowedIDs == NULL);
  3053. } else {
  3054. delete disallowedIDs;
  3055. disallowedIDs = NULL;
  3056. numDisallowedIDs = 0;
  3057. }
  3058. if (driveName != NULL) {
  3059. delete[] driveName;
  3060. driveName = NULL;
  3061. }
  3062. if (driveLetterName != NULL) {
  3063. delete[] driveLetterName;
  3064. driveLetterName = NULL;
  3065. }
  3066. return Grovel_ok;
  3067. }
  3068. /*****************************************************************************/
  3069. // grovel() is the front-end method for controlling the groveling
  3070. // process on each NTFS volume. The groveling process itself is
  3071. // implemented in the Worker() method. grovel() starts the groveling
  3072. // process by setting the grovelStart event. Worker() signals back to
  3073. // grovel() that it is finished or has used up its time allocation by
  3074. // setting the grovelStop event, which causes grovel() to return.
  3075. GrovelStatus Groveler::grovel(
  3076. IN DWORD time_allotted,
  3077. OUT DWORD *hash_read_ops,
  3078. OUT DWORD *hash_read_time,
  3079. OUT DWORD *count_of_files_hashed,
  3080. OUT DWORDLONG *bytes_of_files_hashed,
  3081. OUT DWORD *compare_read_ops,
  3082. OUT DWORD *compare_read_time,
  3083. OUT DWORD *count_of_files_compared,
  3084. OUT DWORDLONG *bytes_of_files_compared,
  3085. OUT DWORD *count_of_files_matching,
  3086. OUT DWORDLONG *bytes_of_files_matching,
  3087. OUT DWORD *merge_time,
  3088. OUT DWORD *count_of_files_merged,
  3089. OUT DWORDLONG *bytes_of_files_merged,
  3090. OUT DWORD *count_of_files_enqueued,
  3091. OUT DWORD *count_of_files_dequeued)
  3092. {
  3093. DWORD timeConsumed;
  3094. BOOL success;
  3095. ASSERT(volumeHandle != NULL);
  3096. hashCount = 0;
  3097. hashReadCount = 0;
  3098. hashReadTime = 0;
  3099. hashBytes = 0;
  3100. compareCount = 0;
  3101. compareReadCount = 0;
  3102. compareReadTime = 0;
  3103. compareBytes = 0;
  3104. matchCount = 0;
  3105. matchBytes = 0;
  3106. mergeCount = 0;
  3107. mergeTime = 0;
  3108. mergeBytes = 0;
  3109. numFilesEnqueued = 0;
  3110. numFilesDequeued = 0;
  3111. #ifdef DEBUG_UNTHROTTLED
  3112. timeAllotted = INFINITE;
  3113. #else
  3114. timeAllotted = time_allotted;
  3115. #endif
  3116. startAllottedTime = GetTickCount();
  3117. ASSERT(IsReset(grovelStartEvent));
  3118. success = SetEvent(grovelStartEvent);
  3119. ASSERT_ERROR(success);
  3120. WaitForEvent(grovelStopEvent);
  3121. timeConsumed = GetTickCount() - startAllottedTime;
  3122. // Return the performance statistics.
  3123. if (count_of_files_hashed != NULL)
  3124. *count_of_files_hashed = hashCount;
  3125. if (hash_read_ops != NULL)
  3126. *hash_read_ops = hashReadCount;
  3127. if (hash_read_time != NULL)
  3128. *hash_read_time = hashReadTime;
  3129. if (bytes_of_files_hashed != NULL)
  3130. *bytes_of_files_hashed = hashBytes;
  3131. if (count_of_files_compared != NULL)
  3132. *count_of_files_compared = compareCount;
  3133. if (compare_read_ops != NULL)
  3134. *compare_read_ops = compareReadCount;
  3135. if (compare_read_time != NULL)
  3136. *compare_read_time = compareReadTime;
  3137. if (bytes_of_files_compared != NULL)
  3138. *bytes_of_files_compared = compareBytes;
  3139. if (count_of_files_matching != NULL)
  3140. *count_of_files_matching = matchCount;
  3141. if (bytes_of_files_matching != NULL)
  3142. *bytes_of_files_matching = matchBytes;
  3143. if (count_of_files_merged != NULL)
  3144. *count_of_files_merged = mergeCount;
  3145. if (merge_time != NULL)
  3146. *merge_time = mergeTime;
  3147. if (bytes_of_files_merged != NULL)
  3148. *bytes_of_files_merged = mergeBytes;
  3149. if (count_of_files_enqueued != NULL)
  3150. *count_of_files_enqueued = numFilesEnqueued;
  3151. if (count_of_files_dequeued != NULL)
  3152. *count_of_files_dequeued = numFilesDequeued;
  3153. TRACE_PRINTF(TC_groveler, 2,
  3154. (_T("%s Count Reads Bytes Time (sec)\n"),
  3155. driveLetterName));
  3156. TRACE_PRINTF(TC_groveler, 2,
  3157. (_T(" Hashings: %7lu %7lu %7I64u %4lu.%03lu Time: %5lu.%03lu sec\n"),
  3158. hashCount, hashReadCount, hashBytes,
  3159. hashReadTime / 1000, hashReadTime % 1000,
  3160. timeConsumed / 1000, timeConsumed % 1000));
  3161. TRACE_PRINTF(TC_groveler, 2,
  3162. (_T(" Compares: %7lu %7lu %7I64u %4lu.%03lu Enqueues: %lu\n"),
  3163. compareCount, compareReadCount, compareBytes,
  3164. compareReadTime / 1000, compareReadTime % 1000, numFilesEnqueued));
  3165. TRACE_PRINTF(TC_groveler, 2,
  3166. (_T(" Matches: %7lu %7I64u Dequeues: %lu\n"),
  3167. matchCount, matchBytes, numFilesDequeued));
  3168. TRACE_PRINTF(TC_groveler, 2,
  3169. (_T(" Merges: %7lu %7I64u %4lu.%03lu\n"),
  3170. mergeCount, mergeBytes, mergeTime / 1000, mergeTime % 1000));
  3171. return grovelStatus;
  3172. }
  3173. /*****************************************************************************/
  3174. // count_of_files_in_queue() returns a count of the number
  3175. // of files in this volume's queue waiting to be groveled.
  3176. DWORD Groveler::count_of_files_in_queue() const
  3177. {
  3178. LONG numEntries;
  3179. ASSERT(volumeHandle != NULL);
  3180. ASSERT(sgDatabase != NULL);
  3181. numEntries = sgDatabase->QueueCount();
  3182. if (numEntries < 0)
  3183. return 0;
  3184. TPRINTF((_T("%s: count_of_files_in_queue=%ld\n"),
  3185. driveLetterName, numEntries));
  3186. return (DWORD)numEntries;
  3187. }
  3188. /*****************************************************************************/
  3189. // count_of_files_to_compare() returns 1 if two files are ready to be
  3190. // compared or are in the process of being compared, and 0 otherwise.
  3191. DWORD Groveler::count_of_files_to_compare() const
  3192. {
  3193. DWORD numCompareFiles;
  3194. ASSERT(volumeHandle != NULL);
  3195. ASSERT(sgDatabase != NULL);
  3196. numCompareFiles = inCompare ? 1 : 0;
  3197. TPRINTF((_T("%s: count_of_files_to_compare=%lu\n"),
  3198. driveLetterName, numCompareFiles));
  3199. return numCompareFiles;
  3200. }
  3201. /*****************************************************************************/
  3202. // time_to_first_file_ready() returns the time in milliseconds until
  3203. // the first entry in the queue is ready to be groveled. If the queue
  3204. // is empty, it returns INFINITE. If an error occurs, it returns 0.
  3205. DWORD Groveler::time_to_first_file_ready() const
  3206. {
  3207. SGNativeQueueEntry queueEntry;
  3208. DWORDLONG currentTime;
  3209. DWORD earliestTime;
  3210. LONG num;
  3211. ASSERT(volumeHandle != NULL);
  3212. ASSERT(sgDatabase != NULL);
  3213. queueEntry.fileName = NULL;
  3214. num = sgDatabase->QueueGetFirst(&queueEntry);
  3215. if (num < 0)
  3216. return 0;
  3217. if (num == 0)
  3218. earliestTime = INFINITE;
  3219. else {
  3220. ASSERT(num == 1);
  3221. currentTime = GetTime();
  3222. earliestTime = queueEntry.readyTime > currentTime
  3223. ? (DWORD)((queueEntry.readyTime - currentTime) / 10000)
  3224. : 0;
  3225. }
  3226. TPRINTF((_T("%s: time_to_first_file_ready=%lu.%03lu\n"),
  3227. driveLetterName, earliestTime / 1000, earliestTime % 1000));
  3228. return earliestTime;
  3229. }