Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4060 lines
125 KiB

  1. /*++
  2. Copyright (c) 1998-1999 Microsoft Corporation
  3. Module Name:
  4. groveler.cpp
  5. Abstract:
  6. SIS Groveler file groveling functions
  7. Authors:
  8. Cedric Krumbein, 1998
  9. Environment:
  10. User Mode
  11. Revision History:
  12. --*/
  13. #include "all.hxx"
  14. #define CLEAR_FILE(FILE) ( \
  15. (FILE).entry.fileID = 0, \
  16. (FILE).entry.fileSize = 0, \
  17. (FILE).entry.signature = 0, \
  18. (FILE).entry.attributes = 0, \
  19. (FILE).entry.csIndex = nullCSIndex, \
  20. (FILE).entry.createTime = 0, \
  21. (FILE).entry.writeTime = 0, \
  22. (FILE).parentID = 0, \
  23. (FILE).retryTime = 0, \
  24. (FILE).startTime = 0, \
  25. (FILE).stopTime = 0, \
  26. (FILE).readSynch.Internal = 0, \
  27. (FILE).readSynch.InternalHigh = 0, \
  28. (FILE).readSynch.Offset = 0, \
  29. (FILE).readSynch.OffsetHigh = 0, \
  30. (FILE).fileName[0] = _T('\0') )
  31. #define CLEAR_OVERLAPPED(OVERLAPPED) ( \
  32. (OVERLAPPED).Internal = 0, \
  33. (OVERLAPPED).InternalHigh = 0, \
  34. (OVERLAPPED).Offset = 0, \
  35. (OVERLAPPED).OffsetHigh = 0 )
  36. // Is CS index set?
  37. static const CSID nullCSIndex = {
  38. 0, 0, 0,
  39. _T('\0'), _T('\0'), _T('\0'), _T('\0'),
  40. _T('\0'), _T('\0'), _T('\0'), _T('\0')
  41. };
  42. #define HasCSIndex(CSID) \
  43. (memcmp(&(CSID), &nullCSIndex, sizeof(CSID)) != 0)
  44. #define SameCSIndex(CSID1, CSID2) \
  45. (memcmp(&(CSID1), &(CSID2), sizeof(CSID)) == 0)
  46. // Exceptions
  47. enum TerminalException {
  48. INITIALIZE_ERROR,
  49. DATABASE_ERROR,
  50. MEMORY_ERROR,
  51. TERMINATE
  52. };
  53. enum TargetException {
  54. TARGET_INVALID,
  55. TARGET_ERROR
  56. };
  57. enum MatchException {
  58. MATCH_INVALID,
  59. MATCH_ERROR,
  60. MATCH_STALE
  61. };
  62. /*****************************************************************************/
  63. /************************** Miscellaneous functions **************************/
  64. /*****************************************************************************/
  65. // NewHandler() is installed by _set_new_handler() to throw an
  66. // exception when the system can't allocate any more memory.
  67. static INT __cdecl NewHandler(size_t size)
  68. {
  69. throw MEMORY_ERROR;
  70. return 0; // Dummy return
  71. }
  72. /*****************************************************************************/
  73. // FileIDCompare() is used by qsort() and bsearch()
  74. // to sort or look up a matching file ID.
  75. static INT __cdecl FileIDCompare(
  76. const VOID *id1,
  77. const VOID *id2)
  78. {
  79. DWORDLONG fileID1 = *(DWORDLONG *)id1,
  80. fileID2 = *(DWORDLONG *)id2;
  81. return fileID1 < fileID2 ? -1
  82. : fileID1 > fileID2 ? +1
  83. : 0;
  84. }
  85. /*****************************************************************************/
  86. // qsStringCompare() is used by qsort() to sort an array of character strings.
  87. static INT __cdecl qsStringCompare(
  88. const VOID *str1,
  89. const VOID *str2)
  90. {
  91. return _tcsicmp(*(TCHAR **)str1, *(TCHAR **)str2);
  92. }
  93. /*****************************************************************************/
  94. // bsStringCompare() is used by bsearch() look up a matching character string.
  95. // It is assumed that str1 is the path name string we are searching for and
  96. // str2 is the excluded path name string in the excluded paths list. Note
  97. // that if the excluded path is \a\b, then we return a match on anything that
  98. // is in this directory or subdirectory, as well as an exact match.
  99. // E.g.: \a\b\c\d.foo & \a\b\foo will match, but \a\b.foo will not.
  100. static INT __cdecl bsStringCompare(
  101. const VOID *str1,
  102. const VOID *str2)
  103. {
  104. TCHAR *s1 = *(TCHAR **) str1;
  105. TCHAR *s2 = *(TCHAR **) str2;
  106. // str2 is the excluded name. Make sure we catch subdirectories under it,
  107. // but make sure we don't confuse \a\bx with \a\b
  108. size_t l = _tcslen(s2);
  109. INT r = _tcsnicmp(s1, s2, l);
  110. if (0 == r)
  111. if (_tcslen(s1) > l && _T('\\') != s1[l])
  112. r = 1;
  113. return r;
  114. }
  115. /*****************************************************************************/
  116. /********************** Groveler class private methods ***********************/
  117. /*****************************************************************************/
  118. // IsAllowedID() returns FALSE if the directory or file ID
  119. // is on the list of disallowed IDs, and TRUE otherwise.
  120. BOOL Groveler::IsAllowedID(DWORDLONG fileID) const
  121. {
  122. ASSERT(fileID != 0);
  123. if (numDisallowedIDs == 0) {
  124. ASSERT(disallowedIDs == NULL);
  125. return TRUE;
  126. }
  127. ASSERT(disallowedIDs != NULL);
  128. return bsearch(
  129. &fileID,
  130. disallowedIDs,
  131. numDisallowedIDs,
  132. sizeof(DWORDLONG),
  133. FileIDCompare) == NULL;
  134. }
  135. /*****************************************************************************/
  136. // IsAllowedName() returns FALSE if the directory or file name
  137. // is on the list of disallowed names, and TRUE otherwise.
  138. BOOL Groveler::IsAllowedName(TCHAR *fileName) const
  139. {
  140. ASSERT(fileName != NULL);
  141. if (numDisallowedNames == 0) {
  142. ASSERT(disallowedNames == NULL);
  143. return TRUE;
  144. }
  145. ASSERT(disallowedNames != NULL);
  146. return bsearch(
  147. &fileName,
  148. disallowedNames,
  149. numDisallowedNames,
  150. sizeof(TCHAR *),
  151. bsStringCompare) == NULL;
  152. }
  153. /*****************************************************************************/
  154. // WaitForEvent suspends the thread until the specified event is set.
  155. VOID Groveler::WaitForEvent(HANDLE event)
  156. {
  157. DWORD eventNum;
  158. BOOL success;
  159. ASSERT(event != NULL);
  160. eventNum = WaitForSingleObject(event, INFINITE);
  161. ASSERT_ERROR(eventNum == WAIT_OBJECT_0);
  162. success = ResetEvent(event);
  163. ASSERT_ERROR(success);
  164. }
  165. /*****************************************************************************/
  166. // OpenFileByID() opens the file with the given volumeHandle and fileID.
  167. BOOL Groveler::OpenFileByID(
  168. FileData *file,
  169. BOOL writeEnable)
  170. {
  171. UNICODE_STRING fileIDString;
  172. OBJECT_ATTRIBUTES objectAttributes;
  173. IO_STATUS_BLOCK ioStatusBlock;
  174. NTSTATUS ntStatus;
  175. ASSERT(volumeHandle != NULL);
  176. ASSERT(file != NULL);
  177. ASSERT(file->entry.fileID != 0);
  178. ASSERT(file->handle == NULL);
  179. fileIDString.Length = sizeof(DWORDLONG);
  180. fileIDString.MaximumLength = sizeof(DWORDLONG);
  181. fileIDString.Buffer = (WCHAR *)&file->entry.fileID;
  182. objectAttributes.Length = sizeof(OBJECT_ATTRIBUTES);
  183. objectAttributes.RootDirectory = volumeHandle;
  184. objectAttributes.ObjectName = &fileIDString;
  185. objectAttributes.Attributes = OBJ_CASE_INSENSITIVE;
  186. objectAttributes.SecurityDescriptor = NULL;
  187. objectAttributes.SecurityQualityOfService = NULL;
  188. ntStatus = NtCreateFile(
  189. &file->handle,
  190. GENERIC_READ |
  191. (writeEnable ? GENERIC_WRITE : 0),
  192. &objectAttributes,
  193. &ioStatusBlock,
  194. NULL,
  195. 0,
  196. FILE_SHARE_READ |
  197. FILE_SHARE_DELETE |
  198. (writeEnable ? FILE_SHARE_WRITE : 0),
  199. FILE_OPEN,
  200. FILE_OPEN_BY_FILE_ID |
  201. FILE_OPEN_REPARSE_POINT |
  202. FILE_NO_INTERMEDIATE_BUFFERING,
  203. NULL,
  204. 0);
  205. if (ntStatus == STATUS_SUCCESS) {
  206. DWORD bytesReturned;
  207. MARK_HANDLE_INFO markHandleInfo =
  208. {USN_SOURCE_DATA_MANAGEMENT, volumeHandle, 0};
  209. // Mark the handle so the usn entry for the merge operation (if completed)
  210. // can be detected and skipped.
  211. BOOL rc = DeviceIoControl(
  212. file->handle,
  213. FSCTL_MARK_HANDLE,
  214. &markHandleInfo,
  215. sizeof markHandleInfo,
  216. NULL,
  217. 0,
  218. &bytesReturned,
  219. NULL);
  220. if (!rc) {
  221. DPRINTF((_T("%s: FSCTL_MARK_HANDLE failed, %lu\n"),
  222. driveLetterName, GetLastError()));
  223. }
  224. #if DBG
  225. // Get the file name
  226. ASSERT(file->fileName[0] == _T('\0'));
  227. struct TFileName2 {
  228. ULONG nameLen;
  229. TCHAR name[MAX_PATH+1];
  230. } tFileName[1];
  231. ntStatus = NtQueryInformationFile(
  232. file->handle,
  233. &ioStatusBlock,
  234. tFileName,
  235. sizeof tFileName,
  236. FileNameInformation);
  237. if (ntStatus == STATUS_SUCCESS) {
  238. int c = min(MAX_PATH, tFileName->nameLen / sizeof(TCHAR));
  239. memcpy(file->fileName, tFileName->name, c * sizeof(TCHAR));
  240. file->fileName[c] = _T('\0');
  241. } else {
  242. memcpy(file->fileName, _T("<unresolved name>"), 18 * sizeof(TCHAR));
  243. }
  244. #endif
  245. return TRUE;
  246. }
  247. ASSERT(file->handle == NULL);
  248. SetLastError(RtlNtStatusToDosError(ntStatus));
  249. return FALSE;
  250. }
  251. /*****************************************************************************/
  252. // OpenFileByName() opens the file with the given fileName.
  253. BOOL Groveler::OpenFileByName(
  254. FileData *file,
  255. BOOL writeEnable,
  256. TCHAR *fileName)
  257. {
  258. UNICODE_STRING dosPathName,
  259. ntPathName;
  260. OBJECT_ATTRIBUTES objectAttributes;
  261. IO_STATUS_BLOCK ioStatusBlock;
  262. NTSTATUS ntStatus;
  263. ASSERT(file != NULL);
  264. ASSERT(file->handle == NULL);
  265. if (fileName == NULL)
  266. fileName = file->fileName;
  267. ASSERT(fileName[0] != _T('\0'));
  268. #ifdef _UNICODE
  269. dosPathName.Buffer = fileName;
  270. #else
  271. if (!RtlCreateUnicodeStringFromAsciiz(&dosPathName, fileName)) {
  272. ntStatus = STATUS_NO_MEMORY;
  273. goto Error;
  274. }
  275. #endif
  276. if (RtlDosPathNameToNtPathName_U(dosPathName.Buffer, &ntPathName, NULL, NULL)) {
  277. objectAttributes.Length = sizeof(OBJECT_ATTRIBUTES);
  278. objectAttributes.RootDirectory = NULL;
  279. objectAttributes.ObjectName = &ntPathName;
  280. objectAttributes.Attributes = OBJ_CASE_INSENSITIVE;
  281. objectAttributes.SecurityDescriptor = NULL;
  282. objectAttributes.SecurityQualityOfService = NULL;
  283. ntStatus = NtCreateFile(
  284. &file->handle,
  285. GENERIC_READ |
  286. (writeEnable ? GENERIC_WRITE : 0),
  287. &objectAttributes,
  288. &ioStatusBlock,
  289. NULL,
  290. 0,
  291. FILE_SHARE_READ |
  292. FILE_SHARE_DELETE |
  293. (writeEnable ? FILE_SHARE_WRITE : 0),
  294. FILE_OPEN,
  295. FILE_OPEN_REPARSE_POINT |
  296. FILE_NO_INTERMEDIATE_BUFFERING,
  297. NULL,
  298. 0);
  299. RtlFreeUnicodeString(&ntPathName);
  300. } else {
  301. ntStatus = STATUS_NO_MEMORY;
  302. }
  303. #ifndef _UNICODE
  304. RtlFreeUnicodeString(&dosPathName);
  305. #endif
  306. if (ntStatus == STATUS_SUCCESS) {
  307. DWORD bytesReturned;
  308. MARK_HANDLE_INFO markHandleInfo =
  309. {USN_SOURCE_DATA_MANAGEMENT, volumeHandle, 0};
  310. // Mark the handle so the usn entry for the merge operation (if completed)
  311. // can be detected and skipped.
  312. BOOL rc = DeviceIoControl(
  313. file->handle,
  314. FSCTL_MARK_HANDLE,
  315. &markHandleInfo,
  316. sizeof markHandleInfo,
  317. NULL,
  318. 0,
  319. &bytesReturned,
  320. NULL);
  321. if (!rc) {
  322. DPRINTF((_T("%s: FSCTL_MARK_HANDLE failed, %lu\n"),
  323. driveLetterName, GetLastError()));
  324. }
  325. return TRUE;
  326. }
  327. ASSERT(file->handle == NULL);
  328. SetLastError(RtlNtStatusToDosError(ntStatus));
  329. return FALSE;
  330. }
  331. /*****************************************************************************/
  332. // IsFileMapped() checks if the file is mapped by another user.
  333. BOOL Groveler::IsFileMapped(FileData *file)
  334. {
  335. _SIS_LINK_FILES sisLinkFiles;
  336. DWORD transferCount;
  337. BOOL success;
  338. ASSERT(grovHandle != NULL);
  339. ASSERT(file->handle != NULL);
  340. sisLinkFiles.operation = SIS_LINK_FILES_OP_VERIFY_NO_MAP;
  341. sisLinkFiles.u.VerifyNoMap.file = file->handle;
  342. success = DeviceIoControl(
  343. grovHandle,
  344. FSCTL_SIS_LINK_FILES,
  345. (VOID *)&sisLinkFiles,
  346. sizeof(_SIS_LINK_FILES),
  347. NULL,
  348. 0,
  349. &transferCount,
  350. NULL);
  351. if (success)
  352. return FALSE;
  353. ASSERT(GetLastError() == ERROR_SHARING_VIOLATION);
  354. return TRUE;
  355. }
  356. /*****************************************************************************/
  357. // SetOplock() sets an oplock on the open file.
  358. BOOL Groveler::SetOplock(FileData *file)
  359. {
  360. BOOL success;
  361. ASSERT(file != NULL);
  362. ASSERT(file->handle != NULL);
  363. ASSERT(file->oplock.Internal == 0);
  364. ASSERT(file->oplock.InternalHigh == 0);
  365. ASSERT(file->oplock.Offset == 0);
  366. ASSERT(file->oplock.OffsetHigh == 0);
  367. ASSERT(file->oplock.hEvent != NULL);
  368. ASSERT(IsReset(file->oplock.hEvent));
  369. success = DeviceIoControl(
  370. file->handle,
  371. FSCTL_REQUEST_BATCH_OPLOCK,
  372. NULL,
  373. 0,
  374. NULL,
  375. 0,
  376. NULL,
  377. &file->oplock);
  378. if (success) {
  379. ASSERT(IsSet(file->oplock.hEvent));
  380. success = ResetEvent(file->oplock.hEvent);
  381. ASSERT_ERROR(success);
  382. CLEAR_OVERLAPPED(file->oplock);
  383. SetLastError(0);
  384. return FALSE;
  385. }
  386. if (GetLastError() != ERROR_IO_PENDING) {
  387. ASSERT(IsReset(file->oplock.hEvent));
  388. CLEAR_OVERLAPPED(file->oplock);
  389. return FALSE;
  390. }
  391. return TRUE;
  392. }
  393. /*****************************************************************************/
  394. // CloseFile() closes the file if it is still open. If an oplock was
  395. // set on the file, it then waits for and resets the oplock break
  396. // event triggered by the closing of the file or by an outside access.
  397. VOID Groveler::CloseFile(FileData *file)
  398. {
  399. BOOL success;
  400. ASSERT(file != NULL);
  401. ASSERT(file->oplock.hEvent != NULL);
  402. if (file->handle == NULL) {
  403. ASSERT(file->oplock.Internal == 0);
  404. ASSERT(file->oplock.InternalHigh == 0);
  405. ASSERT(file->oplock.Offset == 0);
  406. ASSERT(file->oplock.OffsetHigh == 0);
  407. ASSERT(IsReset(file->oplock.hEvent));
  408. } else {
  409. success = CloseHandle(file->handle);
  410. ASSERT_ERROR(success);
  411. file->handle = NULL;
  412. if (file->oplock.Internal != 0
  413. || file->oplock.InternalHigh != 0
  414. || file->oplock.Offset != 0
  415. || file->oplock.OffsetHigh != 0) {
  416. WaitForEvent(file->oplock.hEvent);
  417. CLEAR_OVERLAPPED(file->oplock);
  418. }
  419. }
  420. }
  421. /*****************************************************************************/
  422. // CreateDatabase() creates the database. Initialize it such that if
  423. // extract_log is called before scan_volume, it will return Grovel_overrun
  424. // without attempting any USN extraction. Also, the first time scan_volume
  425. // is called (with or without start_over), it will know to initialize
  426. // lastUSN and do a full volume scan.
  427. BOOL Groveler::CreateDatabase(void)
  428. {
  429. USN_JOURNAL_DATA usnJournalData;
  430. TFileName tempName;
  431. TCHAR listValue[17];
  432. DWORDLONG rootID;
  433. SGNativeListEntry listEntry;
  434. LONG num;
  435. tempName.assign(driveName);
  436. tempName.append(_T("\\"));
  437. rootID = GetFileID(tempName.name);
  438. if (rootID == 0) {
  439. DPRINTF((_T("%s: CreateDatabase: can't get root directory ID\n"),
  440. driveLetterName));
  441. goto Error;
  442. }
  443. if (get_usn_log_info(&usnJournalData) != Grovel_ok) {
  444. DWORD lastError = GetLastError();
  445. if (lastError == ERROR_JOURNAL_NOT_ACTIVE) {
  446. DPRINTF((_T("%s: CreateDatabase: journal not active\n"), driveLetterName));
  447. if (set_usn_log_size(65536) != Grovel_ok ||
  448. get_usn_log_info(&usnJournalData) != Grovel_ok) {
  449. DPRINTF((_T("%s: CreateDatabase: can't initialize USN journal\n"),
  450. driveLetterName));
  451. goto Error;
  452. }
  453. } else {
  454. DPRINTF((_T("%s: CreateDatabase: can't initialize last USN\n"),
  455. driveLetterName));
  456. goto Error;
  457. }
  458. }
  459. lastUSN = usnJournalData.NextUsn;
  460. usnID = usnJournalData.UsnJournalID;
  461. sgDatabase->Close();
  462. if (!sgDatabase->Create(databaseName)) {
  463. DPRINTF((_T("%s: CreateDatabase: can't create database \"%s\": %lu\n"),
  464. driveLetterName, databaseName));
  465. goto Error;
  466. }
  467. num = sgDatabase->StackPut(rootID, FALSE);
  468. if (num < 0)
  469. goto Error;
  470. ASSERT(num == 1);
  471. // Write UNINITIALIZED_USN into the database now, to be replaced when scan_volume
  472. // is complete. This will be a flag to indicate if the database contents are valid.
  473. _stprintf(listValue, _T("%016I64x"), UNINITIALIZED_USN);
  474. listEntry.name = LAST_USN_NAME;
  475. listEntry.value = listValue;
  476. num = sgDatabase->ListWrite(&listEntry);
  477. if (num < 0)
  478. goto Error;
  479. ASSERT(num == 1);
  480. _stprintf(listValue, _T("%016I64x"), usnID);
  481. listEntry.name = USN_ID_NAME;
  482. listEntry.value = listValue;
  483. num = sgDatabase->ListWrite(&listEntry);
  484. if (num < 0)
  485. goto Error;
  486. ASSERT(num == 1);
  487. return TRUE;
  488. Error:
  489. lastUSN = usnID = UNINITIALIZED_USN;
  490. return FALSE;
  491. }
  492. /*****************************************************************************/
  493. #define MAX_ACTIONS 5
  494. // DoTransaction() performs the specified operations
  495. // on the database within a single transaction.
  496. VOID Groveler::DoTransaction(
  497. DWORD numActions,
  498. DatabaseActionList *actionList)
  499. {
  500. DatabaseActionList *action;
  501. DWORD i;
  502. LONG num;
  503. ASSERT(sgDatabase != NULL);
  504. ASSERT(actionList != NULL);
  505. if (sgDatabase->BeginTransaction() < 0)
  506. throw DATABASE_ERROR;
  507. for (i = 0; i < numActions; i++) {
  508. action = &actionList[i];
  509. switch(action->type) {
  510. case TABLE_PUT:
  511. ASSERT(action->u.tableEntry != NULL);
  512. num = sgDatabase->TablePut(action->u.tableEntry);
  513. ASSERT(num < 0 || num == 1);
  514. break;
  515. case TABLE_DELETE_BY_FILE_ID:
  516. ASSERT(action->u.fileID != 0);
  517. num = sgDatabase->TableDeleteByFileID(action->u.fileID);
  518. break;
  519. case QUEUE_PUT:
  520. ASSERT(action->u.queueEntry != NULL);
  521. num = sgDatabase->QueuePut(action->u.queueEntry);
  522. ASSERT(num < 0 || num == 1);
  523. if (num == 1)
  524. numFilesEnqueued++;
  525. break;
  526. case QUEUE_DELETE:
  527. ASSERT(action->u.queueIndex != 0);
  528. num = sgDatabase->QueueDelete(action->u.queueIndex);
  529. ASSERT(num <= 1);
  530. if (num == 1)
  531. numFilesDequeued++;
  532. #if DBG
  533. else
  534. DPRINTF((_T("DoTransaction: QUEUE_DELETE unsuccessful (%d)"), num));
  535. #endif
  536. break;
  537. default:
  538. ASSERT_PRINTF(FALSE, (_T("type=%lu\n"), action->type));
  539. }
  540. if (num < 0) {
  541. sgDatabase->AbortTransaction();
  542. throw DATABASE_ERROR;
  543. }
  544. }
  545. if (!sgDatabase->CommitTransaction()) {
  546. sgDatabase->AbortTransaction();
  547. throw DATABASE_ERROR;
  548. }
  549. }
  550. /*****************************************************************************/
  551. // EnqueueCSIndex() deletes all entries with the specified CS index from the
  552. // table and enqueues them to be re-groveled, all within a single transaction.
  553. VOID Groveler::EnqueueCSIndex(CSID *csIndex)
  554. {
  555. SGNativeTableEntry tableEntry;
  556. SGNativeQueueEntry oldQueueEntry,
  557. newQueueEntry;
  558. DWORD count;
  559. LONG num;
  560. ASSERT(csIndex != NULL);
  561. ASSERT(HasCSIndex(*csIndex));
  562. newQueueEntry.parentID = 0;
  563. newQueueEntry.reason = 0;
  564. newQueueEntry.readyTime = GetTime() + grovelInterval;
  565. newQueueEntry.retryTime = 0;
  566. newQueueEntry.fileName = NULL;
  567. oldQueueEntry.fileName = NULL;
  568. count = 0;
  569. if (sgDatabase->BeginTransaction() < 0)
  570. throw DATABASE_ERROR;
  571. tableEntry.csIndex = *csIndex;
  572. num = sgDatabase->TableGetFirstByCSIndex(&tableEntry);
  573. while (num > 0) {
  574. ASSERT(num == 1);
  575. count++;
  576. oldQueueEntry.fileID = tableEntry.fileID;
  577. num = sgDatabase->QueueGetFirstByFileID(&oldQueueEntry);
  578. if (num < 0)
  579. break;
  580. ASSERT(num == 1);
  581. if (num == 0) {
  582. newQueueEntry.fileID = tableEntry.fileID;
  583. num = sgDatabase->QueuePut(&newQueueEntry);
  584. if (num < 0)
  585. break;
  586. ASSERT(num == 1);
  587. numFilesEnqueued++;
  588. }
  589. num = sgDatabase->TableGetNext(&tableEntry);
  590. }
  591. if (num < 0) {
  592. sgDatabase->AbortTransaction();
  593. throw DATABASE_ERROR;
  594. }
  595. num = sgDatabase->TableDeleteByCSIndex(csIndex);
  596. if (num < 0) {
  597. sgDatabase->AbortTransaction();
  598. throw DATABASE_ERROR;
  599. }
  600. ASSERT(count == (DWORD)num);
  601. if (!sgDatabase->CommitTransaction()) {
  602. sgDatabase->AbortTransaction();
  603. throw DATABASE_ERROR;
  604. }
  605. }
  606. /*****************************************************************************/
  607. #define TARGET_OPLOCK_BREAK 0
  608. #define TARGET_READ_DONE 1
  609. #define GROVEL_START 2
  610. #define NUM_EVENTS 3
  611. // SigCheckPoint suspends the thread until the target file completes its read
  612. // operation. If the time allotment expires before the operation completes,
  613. // the grovelStart event is set to signal grovel() to awaken, and this method
  614. // won't return until grovel() sets the grovelStart event. If the file's
  615. // oplock breaks before this method returns, the file will be closed.
  616. VOID Groveler::SigCheckPoint(
  617. FileData *target,
  618. BOOL targetRead)
  619. {
  620. HANDLE events[NUM_EVENTS];
  621. DWORD elapsedTime,
  622. timeOut,
  623. eventNum,
  624. eventTime;
  625. BOOL targetOplockBroke = FALSE,
  626. waitingForGrovelStart = FALSE,
  627. success;
  628. ASSERT(target != NULL);
  629. ASSERT(target->handle != NULL);
  630. ASSERT(target->oplock .hEvent != NULL);
  631. ASSERT(target->readSynch.hEvent != NULL);
  632. ASSERT(grovelStartEvent != NULL);
  633. ASSERT(grovelStopEvent != NULL);
  634. events[TARGET_OPLOCK_BREAK] = target->oplock .hEvent;
  635. events[TARGET_READ_DONE] = target->readSynch.hEvent;
  636. events[GROVEL_START] = grovelStartEvent;
  637. while (TRUE) {
  638. if (waitingForGrovelStart)
  639. timeOut = INFINITE;
  640. else if (timeAllotted == INFINITE)
  641. timeOut = targetRead ? INFINITE : 0;
  642. else {
  643. elapsedTime = GetTickCount() - startAllottedTime;
  644. if (timeAllotted > elapsedTime)
  645. timeOut = targetRead ? timeAllotted - elapsedTime : 0;
  646. else {
  647. waitingForGrovelStart = TRUE;
  648. timeOut = INFINITE;
  649. grovelStatus = Grovel_pending;
  650. ASSERT(IsReset(grovelStopEvent));
  651. success = SetEvent(grovelStopEvent);
  652. ASSERT_ERROR(success);
  653. }
  654. }
  655. eventNum = WaitForMultipleObjects(NUM_EVENTS, events, FALSE, timeOut);
  656. eventTime = GetTickCount();
  657. switch (eventNum) {
  658. case WAIT_OBJECT_0 + TARGET_OPLOCK_BREAK:
  659. ASSERT(!targetOplockBroke);
  660. targetOplockBroke = TRUE;
  661. success = ResetEvent(target->oplock.hEvent);
  662. ASSERT_ERROR(success);
  663. if (!targetRead) {
  664. CLEAR_OVERLAPPED(target->oplock);
  665. CloseFile(target);
  666. }
  667. DPRINTF((_T("%s: target file %s oplock broke during hash\n"),
  668. driveLetterName, target->fileName));
  669. break;
  670. case WAIT_OBJECT_0 + TARGET_READ_DONE:
  671. ASSERT(targetRead);
  672. targetRead = FALSE;
  673. success = ResetEvent(target->readSynch.hEvent);
  674. ASSERT_ERROR(success);
  675. target->stopTime = eventTime;
  676. if (targetOplockBroke) {
  677. CLEAR_OVERLAPPED(target->oplock);
  678. CloseFile(target);
  679. }
  680. break;
  681. case WAIT_OBJECT_0 + GROVEL_START:
  682. ASSERT(waitingForGrovelStart);
  683. waitingForGrovelStart = FALSE;
  684. success = ResetEvent(grovelStartEvent);
  685. ASSERT_ERROR(success);
  686. break;
  687. case WAIT_TIMEOUT:
  688. ASSERT(!waitingForGrovelStart);
  689. if (!targetRead) {
  690. if (terminate)
  691. throw TERMINATE;
  692. if (targetOplockBroke)
  693. throw TARGET_ERROR;
  694. return;
  695. }
  696. waitingForGrovelStart = TRUE;
  697. grovelStatus = Grovel_pending;
  698. ASSERT(IsReset(grovelStopEvent));
  699. success = SetEvent(grovelStopEvent);
  700. ASSERT_ERROR(success);
  701. break;
  702. default:
  703. ASSERT_PRINTF(FALSE, (_T("eventNum=%lu\n"), eventNum));
  704. }
  705. }
  706. }
  707. #undef TARGET_OPLOCK_BREAK
  708. #undef TARGET_READ_DONE
  709. #undef GROVEL_START
  710. #undef NUM_EVENTS
  711. /*****************************************************************************/
  712. #define TARGET_OPLOCK_BREAK 0
  713. #define MATCH_OPLOCK_BREAK 1
  714. #define TARGET_READ_DONE 2
  715. #define MATCH_READ_DONE 3
  716. #define GROVEL_START 4
  717. #define NUM_EVENTS 5
  718. // CmpCheckPoint suspends the thread until the target file, the
  719. // match file, or both complete their read operations. If the time
  720. // allotment expires before the operations complete, the grovelStart
  721. // event is set to signal grovel() to awaken, and this method won't
  722. // return until grovel() sets the grovelStart event. If either file's
  723. // oplock breaks before this method returns, the file will be closed.
  724. VOID Groveler::CmpCheckPoint(
  725. FileData *target,
  726. FileData *match,
  727. BOOL targetRead,
  728. BOOL matchRead)
  729. {
  730. HANDLE events[NUM_EVENTS];
  731. DWORD elapsedTime,
  732. timeOut,
  733. eventNum,
  734. eventTime;
  735. BOOL targetOplockBroke = FALSE,
  736. matchOplockBroke = FALSE,
  737. waitingForGrovelStart = FALSE,
  738. success;
  739. ASSERT(target != NULL);
  740. ASSERT(match != NULL);
  741. ASSERT(target->handle != NULL);
  742. ASSERT(match ->handle != NULL);
  743. ASSERT(target->oplock .hEvent != NULL);
  744. ASSERT(match ->oplock .hEvent != NULL);
  745. ASSERT(target->readSynch.hEvent != NULL);
  746. ASSERT(match ->readSynch.hEvent != NULL);
  747. ASSERT(grovelStartEvent != NULL);
  748. ASSERT(grovelStopEvent != NULL);
  749. events[TARGET_OPLOCK_BREAK] = target->oplock .hEvent;
  750. events[MATCH_OPLOCK_BREAK] = match ->oplock .hEvent;
  751. events[TARGET_READ_DONE] = target->readSynch.hEvent;
  752. events[MATCH_READ_DONE] = match ->readSynch.hEvent;
  753. events[GROVEL_START] = grovelStartEvent;
  754. while (TRUE) {
  755. if (waitingForGrovelStart)
  756. timeOut = INFINITE;
  757. else if (timeAllotted == INFINITE)
  758. timeOut = targetRead || matchRead ? INFINITE : 0;
  759. else {
  760. elapsedTime = GetTickCount() - startAllottedTime;
  761. if (timeAllotted > elapsedTime)
  762. timeOut = targetRead || matchRead
  763. ? timeAllotted - elapsedTime : 0;
  764. else {
  765. waitingForGrovelStart = TRUE;
  766. timeOut = INFINITE;
  767. grovelStatus = Grovel_pending;
  768. ASSERT(IsReset(grovelStopEvent));
  769. success = SetEvent(grovelStopEvent);
  770. ASSERT_ERROR(success);
  771. }
  772. }
  773. eventNum = WaitForMultipleObjects(NUM_EVENTS, events, FALSE, timeOut);
  774. eventTime = GetTickCount();
  775. switch (eventNum) {
  776. case WAIT_OBJECT_0 + TARGET_OPLOCK_BREAK:
  777. ASSERT(!targetOplockBroke);
  778. targetOplockBroke = TRUE;
  779. success = ResetEvent(target->oplock.hEvent);
  780. ASSERT_ERROR(success);
  781. if (!targetRead) {
  782. CLEAR_OVERLAPPED(target->oplock);
  783. CloseFile(target);
  784. }
  785. DPRINTF((_T("%s: target file %s oplock broke during compare\n"),
  786. driveLetterName, target->fileName));
  787. break;
  788. case WAIT_OBJECT_0 + MATCH_OPLOCK_BREAK:
  789. ASSERT(!matchOplockBroke);
  790. matchOplockBroke = TRUE;
  791. success = ResetEvent(match->oplock.hEvent);
  792. ASSERT_ERROR(success);
  793. if (!matchRead) {
  794. CLEAR_OVERLAPPED(match->oplock);
  795. CloseFile(match);
  796. }
  797. DPRINTF((_T("%s: match file %s oplock broke during compare\n"),
  798. driveLetterName, match->fileName));
  799. break;
  800. case WAIT_OBJECT_0 + TARGET_READ_DONE:
  801. ASSERT(targetRead);
  802. targetRead = FALSE;
  803. success = ResetEvent(target->readSynch.hEvent);
  804. ASSERT_ERROR(success);
  805. target->stopTime = eventTime;
  806. if (targetOplockBroke) {
  807. CLEAR_OVERLAPPED(target->oplock);
  808. CloseFile(target);
  809. }
  810. break;
  811. case WAIT_OBJECT_0 + MATCH_READ_DONE:
  812. ASSERT(matchRead);
  813. matchRead = FALSE;
  814. success = ResetEvent(match->readSynch.hEvent);
  815. ASSERT_ERROR(success);
  816. match->stopTime = eventTime;
  817. if (matchOplockBroke) {
  818. CLEAR_OVERLAPPED(match->oplock);
  819. CloseFile(match);
  820. }
  821. break;
  822. case WAIT_OBJECT_0 + GROVEL_START:
  823. ASSERT(waitingForGrovelStart);
  824. waitingForGrovelStart = FALSE;
  825. success = ResetEvent(grovelStartEvent);
  826. ASSERT_ERROR(success);
  827. break;
  828. case WAIT_TIMEOUT:
  829. ASSERT(!waitingForGrovelStart);
  830. if (!targetRead && !matchRead) {
  831. if (terminate)
  832. throw TERMINATE;
  833. if (targetOplockBroke)
  834. throw TARGET_ERROR;
  835. if (matchOplockBroke)
  836. throw MATCH_ERROR;
  837. return;
  838. }
  839. waitingForGrovelStart = TRUE;
  840. grovelStatus = Grovel_pending;
  841. ASSERT(IsReset(grovelStopEvent));
  842. success = SetEvent(grovelStopEvent);
  843. ASSERT_ERROR(success);
  844. break;
  845. default:
  846. ASSERT_PRINTF(FALSE, (_T("eventNum=%lu\n"), eventNum));
  847. }
  848. }
  849. }
  850. #undef TARGET_OPLOCK_BREAK
  851. #undef MATCH_OPLOCK_BREAK
  852. #undef TARGET_READ_DONE
  853. #undef MATCH_READ_DONE
  854. #undef GROVEL_START
  855. #undef NUM_EVENTS
  856. /*****************************************************************************/
  857. #define TARGET_OPLOCK_BREAK 0
  858. #define MATCH_OPLOCK_BREAK 1
  859. #define MERGE_DONE 2
  860. #define GROVEL_START 3
  861. #define NUM_EVENTS 4
  862. // MergeCheckPoint suspends the thread until the merge operation is completed.
  863. // If the time allotment expires before the merge is completed, the
  864. // grovelStart event is set to signal grovel() to awaken, and this method
  865. // won't return until grovel() sets the grovelStart event. If either file's
  866. // oplock breaks before the merge is completed, the abortMerge event is set.
  867. BOOL Groveler::MergeCheckPoint(
  868. FileData *target,
  869. FileData *match,
  870. OVERLAPPED *mergeSynch,
  871. HANDLE abortMergeEvent,
  872. BOOL merge)
  873. {
  874. HANDLE events[NUM_EVENTS];
  875. DWORD elapsedTime,
  876. timeOut,
  877. eventNum,
  878. eventTime,
  879. lastError = STATUS_TIMEOUT;
  880. BOOL targetOplockBroke = FALSE,
  881. matchOplockBroke = FALSE,
  882. waitingForGrovelStart = FALSE,
  883. mergeSuccess = FALSE,
  884. success;
  885. ASSERT(target != NULL);
  886. ASSERT(target->handle != NULL);
  887. ASSERT(target->oplock.hEvent != NULL);
  888. ASSERT(match != NULL);
  889. ASSERT(match->handle != NULL);
  890. ASSERT(match->oplock.hEvent != NULL);
  891. ASSERT(mergeSynch != NULL);
  892. ASSERT(mergeSynch->hEvent != NULL);
  893. ASSERT(abortMergeEvent != NULL);
  894. ASSERT(grovelStartEvent != NULL);
  895. ASSERT(grovelStopEvent != NULL);
  896. ASSERT(grovHandle != NULL);
  897. events[TARGET_OPLOCK_BREAK] = target->oplock.hEvent;
  898. events[MATCH_OPLOCK_BREAK] = match ->oplock.hEvent;
  899. events[MERGE_DONE] = mergeSynch-> hEvent;
  900. events[GROVEL_START] = grovelStartEvent;
  901. while (TRUE) {
  902. if (waitingForGrovelStart)
  903. timeOut = INFINITE;
  904. else if (timeAllotted == INFINITE)
  905. timeOut = merge ? INFINITE : 0;
  906. else {
  907. elapsedTime = GetTickCount() - startAllottedTime;
  908. if (timeAllotted > elapsedTime)
  909. timeOut = merge ? timeAllotted - elapsedTime : 0;
  910. else {
  911. waitingForGrovelStart = TRUE;
  912. timeOut = INFINITE;
  913. grovelStatus = Grovel_pending;
  914. ASSERT(IsReset(grovelStopEvent));
  915. success = SetEvent(grovelStopEvent);
  916. ASSERT_ERROR(success);
  917. }
  918. }
  919. eventNum = WaitForMultipleObjects(NUM_EVENTS, events, FALSE, timeOut);
  920. eventTime = GetTickCount();
  921. switch (eventNum) {
  922. case WAIT_OBJECT_0 + TARGET_OPLOCK_BREAK:
  923. ASSERT(!targetOplockBroke);
  924. targetOplockBroke = TRUE;
  925. success = ResetEvent(target->oplock.hEvent);
  926. ASSERT_ERROR(success);
  927. CLEAR_OVERLAPPED(target->oplock);
  928. if (merge) {
  929. success = SetEvent(abortMergeEvent);
  930. ASSERT_ERROR(success);
  931. }
  932. DPRINTF((_T("%s: target file %s oplock broke during merge\n"),
  933. driveLetterName, target->fileName));
  934. break;
  935. case WAIT_OBJECT_0 + MATCH_OPLOCK_BREAK:
  936. ASSERT(!matchOplockBroke);
  937. matchOplockBroke = TRUE;
  938. success = ResetEvent(match->oplock.hEvent);
  939. ASSERT_ERROR(success);
  940. CLEAR_OVERLAPPED(match->oplock);
  941. if (merge) {
  942. success = SetEvent(abortMergeEvent);
  943. ASSERT_ERROR(success);
  944. }
  945. DPRINTF((_T("%s: match file %s oplock broke during merge\n"),
  946. driveLetterName, match->fileName));
  947. break;
  948. case WAIT_OBJECT_0 + MERGE_DONE:
  949. ASSERT(merge);
  950. merge = FALSE;
  951. success = ResetEvent(mergeSynch->hEvent);
  952. ASSERT_ERROR(success);
  953. target->stopTime = eventTime;
  954. mergeSuccess = GetOverlappedResult(
  955. grovHandle,
  956. mergeSynch,
  957. &lastError,
  958. FALSE);
  959. if (!mergeSuccess)
  960. lastError = GetLastError();
  961. else if (lastError != ERROR_SUCCESS)
  962. mergeSuccess = FALSE;
  963. else {
  964. GetCSIndex(target->handle, &target->entry.csIndex);
  965. if (!HasCSIndex(match->entry.csIndex))
  966. GetCSIndex(match->handle, &match->entry.csIndex);
  967. }
  968. CloseFile(target);
  969. CloseFile(match);
  970. break;
  971. case WAIT_OBJECT_0 + GROVEL_START:
  972. ASSERT(waitingForGrovelStart);
  973. waitingForGrovelStart = FALSE;
  974. success = ResetEvent(grovelStartEvent);
  975. ASSERT_ERROR(success);
  976. break;
  977. case WAIT_TIMEOUT:
  978. ASSERT(!waitingForGrovelStart);
  979. if (!merge) {
  980. success = ResetEvent(abortMergeEvent);
  981. ASSERT_ERROR(success);
  982. if (terminate)
  983. throw TERMINATE;
  984. if (!mergeSuccess)
  985. SetLastError(lastError);
  986. return mergeSuccess;
  987. }
  988. waitingForGrovelStart = TRUE;
  989. grovelStatus = Grovel_pending;
  990. ASSERT(IsReset(grovelStopEvent));
  991. success = SetEvent(grovelStopEvent);
  992. ASSERT_ERROR(success);
  993. break;
  994. default:
  995. ASSERT_PRINTF(FALSE, (_T("eventNum=%lu\n"), eventNum));
  996. }
  997. }
  998. }
  999. #undef TARGET_OPLOCK_BREAK
  1000. #undef MATCH_OPLOCK_BREAK
  1001. #undef GROVEL_START
  1002. #undef MERGE_DONE
  1003. #undef NUM_EVENTS
  1004. /*****************************************************************************/
  1005. // The following seven methods (GetTarget(), CalculateSignature(),
  1006. // GetMatchList(), GetCSFile(), GetMatch(), Compare(), and Merge())
  1007. // implement the phases of the groveling process.
  1008. // Structures used by the methods.
  1009. struct MatchListEntry {
  1010. DWORDLONG fileID,
  1011. createTime,
  1012. writeTime;
  1013. };
  1014. struct CSIndexEntry {
  1015. CSID csIndex;
  1016. TCHAR name[1];
  1017. };
  1018. /*****************************************************************************/
  1019. // GetTarget() is the first phase of groveling a file. It dequeues
  1020. // a file to be groveled (the "target" file), opens it, checks that
  1021. // it meets all criteria, then passes it on to the next phases.
  1022. BOOL Groveler::GetTarget(
  1023. FileData *target,
  1024. DWORD *queueIndex)
  1025. {
  1026. SGNativeTableEntry tableEntry;
  1027. SGNativeQueueEntry queueEntry,
  1028. otherQueueEntry;
  1029. TFileName targetName,
  1030. parentName;
  1031. BY_HANDLE_FILE_INFORMATION fileInfo;
  1032. DWORD lastError;
  1033. DWORDLONG currentTime,
  1034. readyTime;
  1035. #if DBG
  1036. DWORD earliestTime;
  1037. #endif
  1038. ULARGE_INTEGER word;
  1039. LONG num;
  1040. BOOL byName,
  1041. success;
  1042. TPRINTF((_T("GETTarget: entered\n")));
  1043. ASSERT(target != NULL);
  1044. ASSERT(target->handle == NULL);
  1045. ASSERT(target->entry.fileID == 0);
  1046. ASSERT(target->fileName[0] == _T('\0'));
  1047. ASSERT(!HasCSIndex(target->entry.csIndex));
  1048. ASSERT(queueIndex != NULL);
  1049. ASSERT(sgDatabase != NULL);
  1050. // Dequeue a file to be groveled. If the queue is empty or if no
  1051. // entry's ready time has been reached, return Grovel_ok to grovel().
  1052. queueEntry.fileName = target->fileName;
  1053. num = sgDatabase->QueueGetFirst(&queueEntry);
  1054. if (num < 0)
  1055. throw DATABASE_ERROR;
  1056. if (num == 0) {
  1057. DPRINTF((_T("%s: queue is empty\n"), driveLetterName));
  1058. return FALSE;
  1059. }
  1060. ASSERT(num == 1);
  1061. currentTime = GetTime();
  1062. if (queueEntry.readyTime > currentTime) {
  1063. #if DBG
  1064. earliestTime = (DWORD)((queueEntry.readyTime - currentTime) / 10000);
  1065. DPRINTF((_T("%s: earliest queue entry ready to be groveled in %lu.%03lu sec\n"),
  1066. driveLetterName, earliestTime / 1000, earliestTime % 1000));
  1067. #endif
  1068. return FALSE;
  1069. }
  1070. *queueIndex = queueEntry.order;
  1071. target->entry.fileID = queueEntry.fileID;
  1072. target->parentID = queueEntry.parentID;
  1073. target->retryTime = queueEntry.retryTime;
  1074. // Open the file by ID or name, and check by name
  1075. // that the file and its parent directory are allowed.
  1076. byName = target->entry.fileID == 0;
  1077. if (byName) {
  1078. ASSERT(target->parentID != 0);
  1079. ASSERT(target->fileName[0] != _T('\0'));
  1080. #ifdef DEBUG_USN_REASON
  1081. DPRINTF((_T("--> 0x%08lx 0x%016I64x:\"%s\"\n"),
  1082. queueEntry.reason, target->parentID, target->fileName));
  1083. #endif
  1084. if (!GetFileName(volumeHandle, target->parentID, &parentName)) {
  1085. DPRINTF((_T("%s: can't get name for directory 0x%016I64x\n"),
  1086. driveLetterName, target->parentID));
  1087. throw TARGET_INVALID;
  1088. }
  1089. targetName.assign(parentName.name);
  1090. targetName.append(_T("\\"));
  1091. targetName.append(target->fileName);
  1092. if (!IsAllowedName(targetName.name)) {
  1093. DPRINTF((_T("%s: target file \"%s\" is disallowed\n"),
  1094. driveLetterName, targetName.name));
  1095. throw TARGET_INVALID;
  1096. }
  1097. targetName.assign(driveName);
  1098. targetName.append(parentName.name);
  1099. targetName.append(_T("\\"));
  1100. targetName.append(target->fileName);
  1101. if (!OpenFileByName(target, FALSE, targetName.name)) {
  1102. lastError = GetLastError();
  1103. if (lastError == ERROR_FILE_NOT_FOUND
  1104. || lastError == ERROR_PATH_NOT_FOUND) {
  1105. DPRINTF((_T("%s: target file \"%s\" doesn\'t exist\n"),
  1106. driveLetterName, targetName.name));
  1107. throw TARGET_INVALID;
  1108. }
  1109. DPRINTF((_T("%s: can't open target file \"%s\": %lu\n"),
  1110. driveLetterName, targetName.name, lastError));
  1111. throw TARGET_ERROR;
  1112. }
  1113. // Set an oplock on the target file.
  1114. if (!SetOplock(target)) {
  1115. DPRINTF((_T("%s: can't set oplock on target file \"%s\": %lu\n"),
  1116. driveLetterName, targetName.name, GetLastError()));
  1117. throw TARGET_ERROR;
  1118. }
  1119. } else {
  1120. ASSERT(target->parentID == 0);
  1121. ASSERT(target->fileName[0] == _T('\0'));
  1122. target->parentID = 0;
  1123. #ifdef DEBUG_USN_REASON
  1124. DPRINTF((_T("--> 0x%08lx 0x%016I64x 0x%016I64x\n"),
  1125. queueEntry.reason, target->entry.fileID, target->parentID));
  1126. #endif
  1127. TPRINTF((_T("GETTarget: Opening %s:0x%016I64x by ID\n"),
  1128. driveName,target->entry.fileID));
  1129. if (!OpenFileByID(target, FALSE)) {
  1130. lastError = GetLastError();
  1131. if (lastError == ERROR_FILE_NOT_FOUND
  1132. || lastError == ERROR_PATH_NOT_FOUND
  1133. || lastError == ERROR_INVALID_PARAMETER) {
  1134. DPRINTF((_T("%s: target file 0x%016I64x doesn\'t exist: %lu\n"),
  1135. driveLetterName, target->entry.fileID, lastError));
  1136. throw TARGET_INVALID;
  1137. }
  1138. DPRINTF((_T("%s: can't open target file 0x%016I64x: %lu\n"),
  1139. driveLetterName, target->entry.fileID, lastError));
  1140. throw TARGET_ERROR;
  1141. }
  1142. // Set an oplock on the target file.
  1143. TPRINTF((_T("GETTarget: Successfully opened %s:0x%016I64x by ID\n"),
  1144. driveName,target->entry.fileID));
  1145. if (!SetOplock(target)) {
  1146. DPRINTF((_T("%s: can't set oplock on target file %s: %lu\n"),
  1147. driveLetterName, target->fileName, GetLastError()));
  1148. throw TARGET_ERROR;
  1149. }
  1150. if (!GetFileName(target->handle, &targetName)) {
  1151. DPRINTF((_T("%s: can't get name for target file %s\n"),
  1152. driveLetterName, target->fileName));
  1153. throw TARGET_ERROR;
  1154. }
  1155. if (!IsAllowedName(targetName.name)) {
  1156. DPRINTF((_T("%s: target file \"%s\" is disallowed\n"),
  1157. driveLetterName, targetName.name));
  1158. throw TARGET_INVALID;
  1159. }
  1160. }
  1161. // Get the information on the target file.
  1162. if (!GetFileInformationByHandle(target->handle, &fileInfo)) {
  1163. #if DBG
  1164. if (byName) {
  1165. DPRINTF((_T("%s: can't get information on target file \"%s\": %lu\n"),
  1166. driveLetterName, targetName.name, GetLastError()));
  1167. } else {
  1168. DPRINTF((_T("%s: can't get information on target file %s: %lu\n"),
  1169. driveLetterName, target->fileName, GetLastError()));
  1170. }
  1171. #endif
  1172. throw TARGET_ERROR;
  1173. }
  1174. word.HighPart = fileInfo.nFileIndexHigh;
  1175. word.LowPart = fileInfo.nFileIndexLow;
  1176. if (byName)
  1177. target->entry.fileID = word.QuadPart;
  1178. else {
  1179. ASSERT(target->entry.fileID == word.QuadPart);
  1180. }
  1181. target->parentID = 0; // We don't need the parent ID any more.
  1182. // If the target file was opened by name, check
  1183. // if it currently has an entry in the queue by ID.
  1184. if (byName) {
  1185. otherQueueEntry.fileID = target->entry.fileID;
  1186. otherQueueEntry.fileName = NULL;
  1187. num = sgDatabase->QueueGetFirstByFileID(&otherQueueEntry);
  1188. if (num < 0)
  1189. throw DATABASE_ERROR;
  1190. if (num > 0) {
  1191. ASSERT(num == 1);
  1192. DPRINTF((_T("%s: target file \"%s\" is already in queue as 0x%016I64x\n"),
  1193. driveLetterName, targetName.name, target->entry.fileID));
  1194. target->entry.fileID = 0; // Prevent the table entry from being deleted.
  1195. throw TARGET_INVALID;
  1196. }
  1197. }
  1198. // Fill in the target file's remaining information values.
  1199. word.HighPart = fileInfo.nFileSizeHigh;
  1200. word.LowPart = fileInfo.nFileSizeLow;
  1201. target->entry.fileSize = word.QuadPart;
  1202. target->entry.attributes = fileInfo.dwFileAttributes & FILE_ATTRIBUTE_ENCRYPTED;
  1203. word.HighPart = fileInfo.ftCreationTime.dwHighDateTime;
  1204. word.LowPart = fileInfo.ftCreationTime.dwLowDateTime;
  1205. target->entry.createTime = word.QuadPart;
  1206. word.HighPart = fileInfo.ftLastWriteTime.dwHighDateTime;
  1207. word.LowPart = fileInfo.ftLastWriteTime.dwLowDateTime;
  1208. target->entry.writeTime = word.QuadPart;
  1209. // If the target file is a reparse point, check if it
  1210. // is a SIS reparse point. If it is, get the CS index.
  1211. if ((fileInfo.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) == 0)
  1212. target->entry.csIndex = nullCSIndex;
  1213. else if (!GetCSIndex(target->handle, &target->entry.csIndex)) {
  1214. DPRINTF((_T("%s: target file %s is a non-SIS reparse point\n"),
  1215. driveLetterName, target->fileName));
  1216. throw TARGET_INVALID;
  1217. }
  1218. // Check if the target file is too small or has any disallowed attributes.
  1219. if ((fileInfo.dwFileAttributes & disallowedAttributes) != 0
  1220. || fileInfo.nNumberOfLinks != 1
  1221. || target->entry.fileSize < minFileSize) {
  1222. DPRINTF((_T("%s: target file \"%s\" is disallowed\n"),
  1223. driveLetterName, target->fileName));
  1224. throw TARGET_INVALID;
  1225. }
  1226. // If a table entry exists for the target file, check if it is
  1227. // consistent with the information we have on the file. If it is, and
  1228. // the file was opened by name, or if the queue entry was the result
  1229. // of a SIS merge, close the file and go on to grovel the next target.
  1230. tableEntry.fileID = target->entry.fileID;
  1231. num = sgDatabase->TableGetFirstByFileID(&tableEntry);
  1232. if (num < 0)
  1233. throw DATABASE_ERROR;
  1234. if (num > 0) {
  1235. ASSERT(num == 1);
  1236. ASSERT(tableEntry.fileID == target->entry.fileID);
  1237. if (target->entry.fileSize == tableEntry.fileSize
  1238. && target->entry.attributes == tableEntry.attributes
  1239. && SameCSIndex(target->entry.csIndex, tableEntry.csIndex)
  1240. && target->entry.createTime == tableEntry.createTime
  1241. && target->entry.writeTime == tableEntry.writeTime) {
  1242. if (byName) {
  1243. DPRINTF((_T("%s: target file \"%s\" has already been groveled\n"),
  1244. driveLetterName, targetName.name));
  1245. target->entry.fileID = 0; // Prevent the table entry from being deleted.
  1246. throw TARGET_INVALID;
  1247. }
  1248. if (queueEntry.reason == USN_REASON_BASIC_INFO_CHANGE) {
  1249. DPRINTF((_T("%s: queue entry for file %s is the result of a SIS merge\n"),
  1250. driveLetterName, target->fileName));
  1251. target->entry.fileID = 0; // Prevent the table entry from being deleted.
  1252. throw TARGET_INVALID;
  1253. }
  1254. }
  1255. }
  1256. // Check if the time since the target file was last modified is too short.
  1257. // If it is, close the file and go on to grovel the next target file.
  1258. readyTime = (target->entry.createTime > target->entry.writeTime
  1259. ? target->entry.createTime : target->entry.writeTime) + minFileAge;
  1260. currentTime = GetTime();
  1261. if (currentTime < readyTime)
  1262. throw TARGET_ERROR;
  1263. // Check if the target file is mapped by another user.
  1264. if (IsFileMapped(target)) {
  1265. DPRINTF((_T("%s: target file %s is already mapped\n"),
  1266. driveLetterName, target->fileName));
  1267. throw TARGET_ERROR;
  1268. }
  1269. TPRINTF((_T("GETTarget: returning\n")));
  1270. return TRUE;
  1271. }
  1272. /*****************************************************************************/
  1273. // CalculateSignature() calculates the target file's signature. It reads two
  1274. // pages, 1/3 and 2/3 through the file, and calculates the signature on each
  1275. // page.
  1276. VOID Groveler::CalculateSignature(FileData *target)
  1277. {
  1278. DWORD lastPageSize,
  1279. bytesToRead,
  1280. prevBytesToRead,
  1281. bytesToRequest,
  1282. prevBytesToRequest,
  1283. bytesRead,
  1284. toggle,
  1285. lastError;
  1286. DWORDLONG numPages,
  1287. pageNum,
  1288. prevPageNum,
  1289. lastPageNum,
  1290. firstPageToRead,
  1291. lastPageToRead;
  1292. ULARGE_INTEGER offset;
  1293. BOOL targetReadDone,
  1294. success;
  1295. INT i,
  1296. nPagesToRead;
  1297. ASSERT(target != NULL);
  1298. ASSERT(target->entry.fileID != 0);
  1299. ASSERT(target->handle != NULL);
  1300. target->entry.signature = 0;
  1301. if (0 == target->entry.fileSize)
  1302. return;
  1303. numPages = (target->entry.fileSize - 1) / SIG_PAGE_SIZE + 1;
  1304. lastPageSize = (DWORD)((target->entry.fileSize - 1) % SIG_PAGE_SIZE) + 1;
  1305. lastPageNum = numPages - 1;
  1306. ASSERT(numPages > 0);
  1307. firstPageToRead = (numPages + 2) / 3 - 1;
  1308. lastPageToRead = lastPageNum - firstPageToRead;
  1309. if (lastPageToRead > firstPageToRead)
  1310. nPagesToRead = 2;
  1311. else
  1312. nPagesToRead = 1;
  1313. toggle = 0;
  1314. pageNum = firstPageToRead;
  1315. // We'll read at most two pages, but make at most three passes through the loop
  1316. // since we're doing asynchronous reads.
  1317. for (i = 0; i <= nPagesToRead; ++i) {
  1318. // Unless this is the first pass through the loop,
  1319. // wait for the previous read of the target file to complete.
  1320. if (i > 0) {
  1321. SigCheckPoint(target, !targetReadDone);
  1322. success = GetOverlappedResult(
  1323. target->handle,
  1324. &target->readSynch,
  1325. &bytesRead,
  1326. FALSE);
  1327. if (!success) {
  1328. DPRINTF((_T("%s: error getting target file %s read results: %lu\n"),
  1329. driveLetterName, target->fileName, GetLastError()));
  1330. throw TARGET_ERROR;
  1331. }
  1332. if (bytesRead != prevBytesToRequest &&
  1333. bytesRead != prevBytesToRead) {
  1334. DPRINTF((_T("%s: sig read only %lu of %lu bytes from target file %s\n"),
  1335. driveLetterName, bytesRead, prevBytesToRequest, target->fileName));
  1336. throw TARGET_ERROR;
  1337. }
  1338. if (bytesRead >= sigReportThreshold) {
  1339. hashReadCount++;
  1340. hashReadTime += target->stopTime - target->startTime;
  1341. }
  1342. }
  1343. // Unless we've read all of the pages, begin reading the next page.
  1344. if (i < nPagesToRead) {
  1345. offset.QuadPart = pageNum * SIG_PAGE_SIZE;
  1346. target->readSynch.Offset = offset.LowPart;
  1347. target->readSynch.OffsetHigh = offset.HighPart;
  1348. bytesToRead = pageNum == lastPageNum ? lastPageSize : SIG_PAGE_SIZE;
  1349. bytesToRequest = bytesToRead + sectorSize - 1;
  1350. bytesToRequest -= bytesToRequest % sectorSize;
  1351. target->startTime = GetTickCount();
  1352. targetReadDone = ReadFile(target->handle, target->buffer[toggle],
  1353. bytesToRequest, NULL, &target->readSynch);
  1354. target->stopTime = GetTickCount();
  1355. lastError = GetLastError();
  1356. if (targetReadDone) {
  1357. success = ResetEvent(target->readSynch.hEvent);
  1358. ASSERT_ERROR(success);
  1359. } else if (lastError != ERROR_IO_PENDING) {
  1360. DPRINTF((_T("%s: error reading target file %s: %lu\n"),
  1361. driveLetterName, target->fileName, lastError));
  1362. throw TARGET_ERROR;
  1363. }
  1364. }
  1365. // Unless this is the first pass through the loop,
  1366. // calculate the signature of the target file page just read.
  1367. if (i > 0)
  1368. target->entry.signature = Checksum((VOID *)target->buffer[1-toggle],
  1369. prevBytesToRead, prevPageNum * SIG_PAGE_SIZE, target->entry.signature);
  1370. prevPageNum = pageNum;
  1371. prevBytesToRead = bytesToRead;
  1372. prevBytesToRequest = bytesToRequest;
  1373. toggle = 1-toggle;
  1374. pageNum = lastPageToRead;
  1375. }
  1376. }
  1377. /*****************************************************************************/
  1378. // GetMatchList() looks for file entries in the database ("match" files)
  1379. // with the same size, signature, and attributes as the target file.
  1380. VOID Groveler::GetMatchList(
  1381. FileData *target,
  1382. FIFO *matchList,
  1383. Table *csIndexTable)
  1384. {
  1385. SGNativeTableEntry tableEntry;
  1386. MatchListEntry *matchListEntry;
  1387. CSIndexEntry *csIndexEntry;
  1388. TCHAR *csName;
  1389. DWORD nameLen;
  1390. LONG num;
  1391. BOOL success;
  1392. ASSERT(target != NULL);
  1393. ASSERT(target->entry.fileID != 0);
  1394. ASSERT(target->entry.fileSize > 0);
  1395. ASSERT(target->handle != NULL);
  1396. ASSERT(matchList != NULL);
  1397. ASSERT(matchList->Number() == 0);
  1398. ASSERT(csIndexTable != NULL);
  1399. ASSERT(csIndexTable->Number() == 0);
  1400. ASSERT(sgDatabase != NULL);
  1401. tableEntry.fileSize = target->entry.fileSize;
  1402. tableEntry.signature = target->entry.signature;
  1403. tableEntry.attributes = target->entry.attributes;
  1404. #ifdef DEBUG_GET_BY_ATTR
  1405. DPRINTF((_T("--> {%I64u, 0x%016I64x, 0x%lx}\n"),
  1406. tableEntry.fileSize, tableEntry.signature, tableEntry.attributes));
  1407. #endif
  1408. num = sgDatabase->TableGetFirstByAttr(&tableEntry);
  1409. while (num > 0) {
  1410. ASSERT(num == 1);
  1411. ASSERT(tableEntry.fileID != 0);
  1412. ASSERT(tableEntry.fileSize == target->entry.fileSize);
  1413. ASSERT(tableEntry.signature == target->entry.signature);
  1414. ASSERT(tableEntry.attributes == target->entry.attributes);
  1415. if (!HasCSIndex(tableEntry.csIndex)) {
  1416. matchListEntry = new MatchListEntry;
  1417. ASSERT(matchListEntry != NULL);
  1418. matchListEntry->fileID = tableEntry.fileID;
  1419. matchListEntry->createTime = tableEntry.createTime;
  1420. matchListEntry->writeTime = tableEntry.writeTime;
  1421. matchList->Put((VOID *)matchListEntry);
  1422. #ifdef DEBUG_GET_BY_ATTR
  1423. DPRINTF((_T(" 0x%016I64x\n"), tableEntry.fileID));
  1424. #endif
  1425. } else {
  1426. csIndexEntry = (CSIndexEntry *)csIndexTable->Get
  1427. ((const VOID *)&tableEntry.csIndex, sizeof(CSID));
  1428. if (csIndexEntry == NULL) {
  1429. csName = GetCSName(&tableEntry.csIndex);
  1430. ASSERT(csName != NULL);
  1431. nameLen = _tcslen(csName);
  1432. csIndexEntry = (CSIndexEntry *)
  1433. (new BYTE[sizeof(CSIndexEntry) + nameLen * sizeof(TCHAR)]);
  1434. ASSERT(csIndexEntry != NULL);
  1435. csIndexEntry->csIndex = tableEntry.csIndex;
  1436. _tcscpy(csIndexEntry->name, csName);
  1437. FreeCSName(csName);
  1438. csName = NULL;
  1439. success = csIndexTable->Put
  1440. ((VOID *)csIndexEntry, sizeof(CSID));
  1441. ASSERT_ERROR(success);
  1442. }
  1443. #ifdef DEBUG_GET_BY_ATTR
  1444. DPRINTF((_T(" 0x%016I64x %s\n"),
  1445. match->entry.fileID, csIndexEntry->name));
  1446. #endif
  1447. }
  1448. num = sgDatabase->TableGetNext(&tableEntry);
  1449. }
  1450. if (num < 0)
  1451. throw DATABASE_ERROR;
  1452. }
  1453. /*****************************************************************************/
  1454. // GetCSFile() pops the first entry from the CS index table and opens it.
  1455. BOOL Groveler::GetCSFile(
  1456. FileData *target,
  1457. FileData *match,
  1458. Table *csIndexTable)
  1459. {
  1460. CSIndexEntry *csIndexEntry;
  1461. TFileName csFileName;
  1462. DWORD lastError;
  1463. BY_HANDLE_FILE_INFORMATION fileInfo;
  1464. ULARGE_INTEGER fileSize;
  1465. LONG num;
  1466. ASSERT(target != NULL);
  1467. ASSERT(target->entry.fileID != 0);
  1468. ASSERT(target->entry.fileSize > 0);
  1469. ASSERT(target->handle != NULL);
  1470. ASSERT(match != NULL);
  1471. ASSERT(match->entry.fileID == 0);
  1472. ASSERT(match->entry.fileSize == 0);
  1473. ASSERT(match->entry.signature == 0);
  1474. ASSERT(match->entry.attributes == 0);
  1475. ASSERT(!HasCSIndex(match->entry.csIndex));
  1476. ASSERT(match->entry.createTime == 0);
  1477. ASSERT(match->entry.writeTime == 0);
  1478. ASSERT(match->handle == NULL);
  1479. ASSERT(match->parentID == 0);
  1480. ASSERT(match->retryTime == 0);
  1481. ASSERT(match->fileName[0] == _T('\0'));
  1482. ASSERT(csIndexTable != NULL);
  1483. ASSERT(sgDatabase != NULL);
  1484. // Pop the first entry from the CS index table. If the entry's CS
  1485. // index is the same as the target file's, skip to the next entry.
  1486. do {
  1487. csIndexEntry = (CSIndexEntry *)csIndexTable->GetFirst();
  1488. if (csIndexEntry == NULL) {
  1489. match->entry.csIndex = nullCSIndex;
  1490. match->fileName[0] = _T('\0');
  1491. return FALSE;
  1492. }
  1493. ASSERT(HasCSIndex(csIndexEntry->csIndex));
  1494. match->entry.csIndex = csIndexEntry->csIndex;
  1495. _tcscpy(match->fileName, csIndexEntry->name);
  1496. delete csIndexEntry;
  1497. csIndexEntry = NULL;
  1498. } while (SameCSIndex(target->entry.csIndex, match->entry.csIndex));
  1499. match->entry.fileSize = target->entry.fileSize;
  1500. match->entry.signature = target->entry.signature;
  1501. match->entry.attributes = target->entry.attributes;
  1502. csFileName.assign(driveName);
  1503. csFileName.append(CS_DIR_PATH);
  1504. csFileName.append(_T("\\"));
  1505. csFileName.append(match->fileName);
  1506. csFileName.append(_T(".sis"));
  1507. // Open the CS file. If the file doesn't exist, remove all entries
  1508. // from the table that point to this file. If the file can't be
  1509. // opened for any other reason, mark that the target file may
  1510. // need to be groveled again, then go on to the next match file.
  1511. #ifdef DEBUG_GET_BY_ATTR
  1512. DPRINTF((_T("--> %s\n"), match->fileName));
  1513. #endif
  1514. if (!OpenFileByName(match, FALSE, csFileName.name)) {
  1515. lastError = GetLastError();
  1516. if (lastError == ERROR_FILE_NOT_FOUND
  1517. || lastError == ERROR_PATH_NOT_FOUND) {
  1518. DPRINTF((_T("%s: CS file %s doesn't exist\n"),
  1519. driveLetterName, match->fileName));
  1520. throw MATCH_INVALID;
  1521. }
  1522. DPRINTF((_T("%s: can't open CS file %s: %lu\n"),
  1523. driveLetterName, match->fileName, lastError));
  1524. throw MATCH_ERROR;
  1525. }
  1526. // Get the information on the CS file. If this fails,
  1527. // close the file, mark that the target file may need to
  1528. // be groveled again, then go on to the next match file.
  1529. if (!GetFileInformationByHandle(match->handle, &fileInfo)) {
  1530. DPRINTF((_T("%s: can't get information on CS file %s: %lu\n"),
  1531. driveLetterName, match->fileName, GetLastError()));
  1532. throw MATCH_ERROR;
  1533. }
  1534. // If the CS file's information doesn't match its expected values, close the
  1535. // CS file, delete the match file entry from the table, and go on to the
  1536. // next match file. Otherwise, go on to compare the target and CS files.
  1537. fileSize.HighPart = fileInfo.nFileSizeHigh;
  1538. fileSize.LowPart = fileInfo.nFileSizeLow;
  1539. if (match->entry.fileSize != fileSize.QuadPart) {
  1540. DPRINTF((_T("%s: CS file %s doesn't have expected information\n"),
  1541. driveLetterName, match->fileName));
  1542. throw MATCH_STALE;
  1543. }
  1544. return TRUE;
  1545. }
  1546. /*****************************************************************************/
  1547. // GetMatch() pops the first entry from the match list and opens it.
  1548. BOOL Groveler::GetMatch(
  1549. FileData *target,
  1550. FileData *match,
  1551. FIFO *matchList)
  1552. {
  1553. SGNativeQueueEntry queueEntry;
  1554. MatchListEntry *matchListEntry;
  1555. DWORD attributes,
  1556. lastError;
  1557. BY_HANDLE_FILE_INFORMATION fileInfo;
  1558. ULARGE_INTEGER fileID,
  1559. fileSize,
  1560. createTime,
  1561. writeTime;
  1562. LONG num;
  1563. ASSERT(target != NULL);
  1564. ASSERT(target->entry.fileID != 0);
  1565. ASSERT(target->entry.fileSize > 0);
  1566. ASSERT(target->handle != NULL);
  1567. ASSERT(match != NULL);
  1568. ASSERT(match->entry.fileID == 0);
  1569. ASSERT(match->entry.fileSize == 0);
  1570. ASSERT(match->entry.signature == 0);
  1571. ASSERT(match->entry.attributes == 0);
  1572. ASSERT(!HasCSIndex(match->entry.csIndex));
  1573. ASSERT(match->entry.createTime == 0);
  1574. ASSERT(match->entry.writeTime == 0);
  1575. ASSERT(match->handle == NULL);
  1576. ASSERT(match->parentID == 0);
  1577. ASSERT(match->retryTime == 0);
  1578. ASSERT(match->fileName[0] == _T('\0'));
  1579. ASSERT(matchList != NULL);
  1580. ASSERT(sgDatabase != NULL);
  1581. // Pop the first entry from the match list. If the entry's file ID is
  1582. // the same as the target file's, or if the entry is on the queue after
  1583. // having been enqueued by extract_log(), skip to the next entry.
  1584. while (TRUE) {
  1585. matchListEntry = (MatchListEntry *)matchList->Get();
  1586. if (matchListEntry == NULL) {
  1587. match->entry.fileID = 0;
  1588. match->entry.createTime = 0;
  1589. match->entry.writeTime = 0;
  1590. return FALSE;
  1591. }
  1592. match->entry.fileID = matchListEntry->fileID;
  1593. match->entry.createTime = matchListEntry->createTime;
  1594. match->entry.writeTime = matchListEntry->writeTime;
  1595. delete matchListEntry;
  1596. matchListEntry = NULL;
  1597. ASSERT(match->entry.fileID != 0);
  1598. if (target->entry.fileID == match->entry.fileID)
  1599. continue;
  1600. queueEntry.fileID = match->entry.fileID;
  1601. queueEntry.fileName = NULL;
  1602. num = sgDatabase->QueueGetFirstByFileID(&queueEntry);
  1603. if (num < 0)
  1604. throw DATABASE_ERROR;
  1605. if (num > 0) {
  1606. ASSERT(num == 1);
  1607. if (queueEntry.reason != 0) {
  1608. DPRINTF((_T("%s: match file 0x%016I64x is in the queue from USN\n"),
  1609. driveLetterName, match->entry.fileID));
  1610. continue;
  1611. }
  1612. }
  1613. break;
  1614. }
  1615. match->entry.fileSize = target->entry.fileSize;
  1616. match->entry.signature = target->entry.signature;
  1617. match->entry.attributes = target->entry.attributes;
  1618. // Open the match file. If it doesn't exist, remove its entry from the table.
  1619. // If the file can't be opened for any other reason, mark that the target
  1620. // file may need to be groveled again, then go on to the next match file.
  1621. #ifdef DEBUG_GET_BY_ATTR
  1622. DPRINTF((_T("--> 0x%016I64x\n"), match->entry.fileID));
  1623. #endif
  1624. if (!OpenFileByID(match, FALSE)) {
  1625. lastError = GetLastError();
  1626. if (lastError == ERROR_FILE_NOT_FOUND
  1627. || lastError == ERROR_PATH_NOT_FOUND
  1628. || lastError == ERROR_INVALID_PARAMETER) {
  1629. DPRINTF((_T("%s: match file 0x%016I64x doesn\'t exist: %lu\n"),
  1630. driveLetterName, match->entry.fileID, lastError));
  1631. throw MATCH_INVALID;
  1632. }
  1633. DPRINTF((_T("%s: can't open match file 0x%016I64x: %lu\n"),
  1634. driveLetterName, match->entry.fileID, lastError));
  1635. throw MATCH_ERROR;
  1636. }
  1637. // Set an oplock on the match file.
  1638. if (!SetOplock(match)) {
  1639. DPRINTF((_T("%s: can't set oplock on match file %s: %lu\n"),
  1640. driveLetterName, match->fileName, GetLastError()));
  1641. throw MATCH_ERROR;
  1642. }
  1643. // Get the information on the match file. If this fails,
  1644. // close the file, mark that the target file may need to
  1645. // be groveled again, then go on to the next match file.
  1646. if (!GetFileInformationByHandle(match->handle, &fileInfo)) {
  1647. DPRINTF((_T("%s: can't get information on match file %s: %lu\n"),
  1648. driveLetterName, match->fileName, GetLastError()));
  1649. throw MATCH_ERROR;
  1650. }
  1651. fileID.HighPart = fileInfo.nFileIndexHigh;
  1652. fileID.LowPart = fileInfo.nFileIndexLow;
  1653. ASSERT(match->entry.fileID == fileID.QuadPart);
  1654. fileSize.HighPart = fileInfo.nFileSizeHigh;
  1655. fileSize.LowPart = fileInfo.nFileSizeLow;
  1656. attributes = fileInfo.dwFileAttributes & FILE_ATTRIBUTE_ENCRYPTED;
  1657. createTime.HighPart = fileInfo.ftCreationTime.dwHighDateTime;
  1658. createTime.LowPart = fileInfo.ftCreationTime.dwLowDateTime;
  1659. writeTime.HighPart = fileInfo.ftLastWriteTime.dwHighDateTime;
  1660. writeTime.LowPart = fileInfo.ftLastWriteTime.dwLowDateTime;
  1661. // If the match file's information isn't consistent with its table entry, close
  1662. // the file, enqueue it to be re-groveled, and go on to the next match file.
  1663. if (match->entry.fileSize != fileSize .QuadPart
  1664. || match->entry.attributes != attributes
  1665. || match->entry.createTime != createTime.QuadPart
  1666. || match->entry.writeTime != writeTime .QuadPart) {
  1667. DPRINTF((_T("%s: match file %s doesn't match its information\n"),
  1668. driveLetterName, match->fileName));
  1669. throw MATCH_STALE;
  1670. }
  1671. if ((fileInfo.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0) {
  1672. if (GetCSIndex(match->handle, &match->entry.csIndex)) {
  1673. DPRINTF((_T("%s: match file %s is a SIS reparse point\n"),
  1674. driveLetterName, match->fileName));
  1675. throw MATCH_STALE;
  1676. }
  1677. DPRINTF((_T("%s: match file %s is a non-SIS reparse point\n"),
  1678. driveLetterName, match->fileName));
  1679. throw MATCH_INVALID;
  1680. }
  1681. // Check if the match file is mapped by another user.
  1682. if (IsFileMapped(match)) {
  1683. DPRINTF((_T("%s: match file %s is already mapped\n"),
  1684. driveLetterName, match->fileName));
  1685. throw MATCH_ERROR;
  1686. }
  1687. return TRUE;
  1688. }
  1689. /*****************************************************************************/
  1690. // Compare() compares the target and match files. It reads each file
  1691. // one page (64 kB) at a time and compares each pair of pages.
  1692. BOOL Groveler::Compare(
  1693. FileData *target,
  1694. FileData *match)
  1695. {
  1696. DWORD lastPageSize,
  1697. bytesToRead = 0,
  1698. prevBytesToRead,
  1699. bytesToRequest = 0,
  1700. prevBytesToRequest,
  1701. bytesRead,
  1702. toggle,
  1703. targetTime,
  1704. matchTime,
  1705. lastError;
  1706. DWORDLONG numPages,
  1707. pageNum,
  1708. prevPageNum;
  1709. ULARGE_INTEGER offset;
  1710. BOOL targetReadDone,
  1711. matchReadDone,
  1712. filesMatch,
  1713. success;
  1714. ASSERT(target != NULL);
  1715. ASSERT(target->handle != NULL);
  1716. ASSERT(target->entry.fileID != 0);
  1717. ASSERT(match != NULL);
  1718. ASSERT(match->handle != NULL);
  1719. ASSERT( match->entry.fileID != 0
  1720. && !HasCSIndex(match->entry.csIndex)
  1721. || match->entry.fileID == 0
  1722. && match->fileName[0] != _T('\0')
  1723. && HasCSIndex(match->entry.csIndex));
  1724. ASSERT(target->entry.fileSize == match->entry.fileSize);
  1725. ASSERT(target->entry.signature == match->entry.signature);
  1726. ASSERT(target->entry.attributes == match->entry.attributes);
  1727. numPages = (target->entry.fileSize - 1) / CMP_PAGE_SIZE + 1;
  1728. lastPageSize = (DWORD)((target->entry.fileSize - 1) % CMP_PAGE_SIZE) + 1;
  1729. toggle = 0;
  1730. filesMatch = TRUE;
  1731. for (pageNum = 0; pageNum <= numPages; pageNum++) {
  1732. // Unless this is the first pass through the loop,
  1733. // wait for the previous read of both files to complete.
  1734. if (pageNum > 0) {
  1735. CmpCheckPoint(target, match, !targetReadDone, !matchReadDone);
  1736. success = GetOverlappedResult(
  1737. target->handle,
  1738. &target->readSynch,
  1739. &bytesRead,
  1740. FALSE);
  1741. if (!success) {
  1742. DPRINTF((_T("%s: error getting target file %s read results: %lu\n"),
  1743. driveLetterName, target->fileName, GetLastError()));
  1744. throw TARGET_ERROR;
  1745. }
  1746. if (bytesRead != prevBytesToRequest &&
  1747. bytesRead != prevBytesToRead) {
  1748. DPRINTF((_T("%s: cmp read only %lu of %lu bytes from target file %s\n"),
  1749. driveLetterName, bytesRead, prevBytesToRequest, target->fileName));
  1750. throw TARGET_ERROR;
  1751. }
  1752. success = GetOverlappedResult(
  1753. match->handle,
  1754. &match->readSynch,
  1755. &bytesRead,
  1756. FALSE);
  1757. if (!success) {
  1758. #if DBG
  1759. if (match->entry.fileID != 0) {
  1760. DPRINTF((_T("%s: error getting match file %s read results: %lu\n"),
  1761. driveLetterName, match->fileName, GetLastError()));
  1762. } else {
  1763. DPRINTF((_T("%s: error getting CS file %s read results: %lu\n"),
  1764. driveLetterName, match->fileName, GetLastError()));
  1765. }
  1766. #endif
  1767. throw MATCH_ERROR;
  1768. }
  1769. if (bytesRead != prevBytesToRequest &&
  1770. bytesRead != prevBytesToRead) {
  1771. #if DBG
  1772. if (match->entry.fileID != 0) {
  1773. DPRINTF((_T("%s: read only %lu of %lu bytes from match file %s\n"),
  1774. driveLetterName, bytesRead, prevBytesToRequest, match->fileName));
  1775. } else {
  1776. DPRINTF((_T("%s: read only %lu of %lu bytes from CS file %s\n"),
  1777. driveLetterName, bytesRead, prevBytesToRequest, match->fileName));
  1778. }
  1779. #endif
  1780. throw MATCH_ERROR;
  1781. }
  1782. if (bytesRead >= cmpReportThreshold) {
  1783. compareReadCount += 2;
  1784. if (targetReadDone) { // Non-overlapped
  1785. targetTime = target->stopTime - target->startTime;
  1786. matchTime = match ->stopTime - match ->startTime;
  1787. compareReadTime += targetTime + matchTime;
  1788. } else { // Overlapped
  1789. targetTime = target->stopTime - target->startTime;
  1790. matchTime = match ->stopTime - target->startTime;
  1791. compareReadTime += targetTime > matchTime ? targetTime : matchTime;
  1792. }
  1793. }
  1794. if (!filesMatch)
  1795. break;
  1796. }
  1797. // Unless all pages of the target file have already been read,
  1798. // begin reading the next page of the file.
  1799. if (pageNum < numPages) {
  1800. offset.QuadPart = pageNum * CMP_PAGE_SIZE;
  1801. target->readSynch.Offset =
  1802. match ->readSynch.Offset = offset.LowPart;
  1803. target->readSynch.OffsetHigh =
  1804. match ->readSynch.OffsetHigh = offset.HighPart;
  1805. bytesToRead = pageNum < numPages-1 ? CMP_PAGE_SIZE : lastPageSize;
  1806. bytesToRequest = bytesToRead + sectorSize - 1;
  1807. bytesToRequest -= bytesToRequest % sectorSize;
  1808. target->startTime = GetTickCount();
  1809. targetReadDone = ReadFile(target->handle, target->buffer[toggle],
  1810. bytesToRequest, NULL, &target->readSynch);
  1811. target->stopTime = GetTickCount();
  1812. lastError = GetLastError();
  1813. if (targetReadDone) {
  1814. success = ResetEvent(target->readSynch.hEvent);
  1815. ASSERT_ERROR(success);
  1816. } else if (lastError != ERROR_IO_PENDING) {
  1817. DPRINTF((_T("%s: error reading target file %s: %lu\n"),
  1818. driveLetterName, target->fileName, lastError));
  1819. throw TARGET_ERROR;
  1820. }
  1821. match->startTime = GetTickCount();
  1822. matchReadDone = ReadFile(match->handle, match->buffer[toggle],
  1823. bytesToRequest, NULL, &match->readSynch);
  1824. match->stopTime = GetTickCount();
  1825. lastError = GetLastError();
  1826. if (matchReadDone) {
  1827. success = ResetEvent(match->readSynch.hEvent);
  1828. ASSERT_ERROR(success);
  1829. } else if (lastError != ERROR_IO_PENDING) {
  1830. #if DBG
  1831. if (match->entry.fileID != 0) {
  1832. DPRINTF((_T("%s: error reading match file %s: %lu\n"),
  1833. driveLetterName, match->fileName, lastError));
  1834. } else {
  1835. DPRINTF((_T("%s: error reading CS file %s: %lu\n"),
  1836. driveLetterName, match->fileName, lastError));
  1837. }
  1838. #endif
  1839. throw MATCH_ERROR;
  1840. }
  1841. }
  1842. // Unless this is the first pass through the loop,
  1843. // compare the target and match file pages just read.
  1844. if (pageNum > 0)
  1845. filesMatch = memcmp(target->buffer[1-toggle],
  1846. match ->buffer[1-toggle], prevBytesToRead) == 0;
  1847. prevPageNum = pageNum;
  1848. prevBytesToRead = bytesToRead;
  1849. prevBytesToRequest = bytesToRequest;
  1850. toggle = 1-toggle;
  1851. }
  1852. if (!filesMatch) {
  1853. #if DBG
  1854. if (match->entry.fileID != 0) {
  1855. DPRINTF((_T("%s:1 files %s and %s failed compare (sz: 0x%x)\n"),
  1856. driveLetterName, target->fileName, match->fileName, target->entry.fileSize));
  1857. } else {
  1858. DPRINTF((_T("%s:2 files %s and %s failed compare (sz: 0x%x)\n"),
  1859. driveLetterName, target->fileName, match->fileName, target->entry.fileSize));
  1860. }
  1861. #endif
  1862. return FALSE;
  1863. }
  1864. return TRUE;
  1865. }
  1866. /*****************************************************************************/
  1867. // Merge() calls the SIS filter to merge the target and match files.
  1868. BOOL Groveler::Merge(
  1869. FileData *target,
  1870. FileData *match,
  1871. OVERLAPPED *mergeSynch,
  1872. HANDLE abortMergeEvent)
  1873. {
  1874. _SIS_LINK_FILES sisLinkFiles;
  1875. #if DBG
  1876. TCHAR *csName;
  1877. #endif
  1878. DWORD transferCount,
  1879. lastError;
  1880. BOOL mergeDone,
  1881. merged,
  1882. success;
  1883. ASSERT(target != NULL);
  1884. ASSERT(target->handle != NULL);
  1885. ASSERT(target->entry.fileID != 0);
  1886. ASSERT(match != NULL);
  1887. ASSERT(match->handle != NULL);
  1888. ASSERT( match->entry.fileID != 0
  1889. && !HasCSIndex(match->entry.csIndex)
  1890. || match->entry.fileID == 0
  1891. && match->fileName[0] != _T('\0')
  1892. && HasCSIndex(match->entry.csIndex));
  1893. ASSERT(mergeSynch != NULL);
  1894. ASSERT(mergeSynch->Internal == 0);
  1895. ASSERT(mergeSynch->InternalHigh == 0);
  1896. ASSERT(mergeSynch->Offset == 0);
  1897. ASSERT(mergeSynch->OffsetHigh == 0);
  1898. ASSERT(mergeSynch->hEvent != NULL);
  1899. ASSERT(IsReset(mergeSynch->hEvent));
  1900. ASSERT(abortMergeEvent != NULL);
  1901. ASSERT(IsReset(abortMergeEvent));
  1902. ASSERT(target->entry.fileSize == match->entry.fileSize);
  1903. ASSERT(target->entry.signature == match->entry.signature);
  1904. ASSERT(target->entry.attributes == match->entry.attributes);
  1905. ASSERT(grovHandle != NULL);
  1906. // Set up to merge the files.
  1907. if (match->entry.fileID != 0) {
  1908. sisLinkFiles.operation = SIS_LINK_FILES_OP_MERGE;
  1909. sisLinkFiles.u.Merge.file1 = target->handle;
  1910. sisLinkFiles.u.Merge.file2 = match ->handle;
  1911. sisLinkFiles.u.Merge.abortEvent = NULL; // Should be abortMergeEvent
  1912. } else {
  1913. sisLinkFiles.operation = SIS_LINK_FILES_OP_MERGE_CS;
  1914. sisLinkFiles.u.MergeWithCS.file1 = target->handle;
  1915. sisLinkFiles.u.MergeWithCS.abortEvent = NULL; // Should be abortMergeEvent
  1916. sisLinkFiles.u.MergeWithCS.CSid = match->entry.csIndex;
  1917. }
  1918. // Call the SIS filter to merge the files.
  1919. target->startTime = GetTickCount();
  1920. mergeDone = DeviceIoControl(
  1921. grovHandle,
  1922. FSCTL_SIS_LINK_FILES,
  1923. (VOID *)&sisLinkFiles,
  1924. sizeof(_SIS_LINK_FILES),
  1925. NULL,
  1926. 0,
  1927. NULL,
  1928. mergeSynch);
  1929. target->stopTime = GetTickCount();
  1930. // If the merge completed successfully before the call returned, reset
  1931. // the merge done event, get the new CS indices, and close the files.
  1932. if (mergeDone) {
  1933. success = ResetEvent(mergeSynch->hEvent);
  1934. ASSERT_ERROR(success);
  1935. mergeTime += target->stopTime - target->startTime;
  1936. GetCSIndex(target->handle, &target->entry.csIndex);
  1937. if (!HasCSIndex(match->entry.csIndex))
  1938. GetCSIndex(match->handle, &match->entry.csIndex);
  1939. CloseFile(target);
  1940. CloseFile(match);
  1941. }
  1942. // If the merge failed, close the files and return an error status.
  1943. else {
  1944. lastError = GetLastError();
  1945. if (lastError != ERROR_IO_PENDING) {
  1946. CloseFile(target);
  1947. CloseFile(match);
  1948. #if DBG
  1949. if (match->entry.fileID != 0) {
  1950. DPRINTF((_T("%s:3 files %s and %s failed merge: %lu\n"),
  1951. driveLetterName, target->fileName, match->fileName, lastError));
  1952. } else {
  1953. DPRINTF((_T("%s:4 files %s and %s failed merge: %lu\n"),
  1954. driveLetterName, target->fileName, match->fileName, lastError));
  1955. }
  1956. #endif
  1957. return FALSE;
  1958. }
  1959. // If the merge is in progress, wait for it to complete.
  1960. // (MergeCheckPoint() will get the new CS indices and close the files.
  1961. else {
  1962. merged = MergeCheckPoint(target, match, mergeSynch,
  1963. abortMergeEvent, !mergeDone);
  1964. if (!merged) {
  1965. #if DBG
  1966. lastError = GetLastError();
  1967. if (match->entry.fileID != 0) {
  1968. DPRINTF((_T("%s: error getting merge results of files %s and %s: %lu\n"),
  1969. driveLetterName, target->fileName, match->fileName, lastError));
  1970. } else {
  1971. DPRINTF((_T("%s: error getting merge results of files %s and %s: %lu\n"),
  1972. driveLetterName, target->fileName, match->fileName, lastError));
  1973. }
  1974. #endif
  1975. return FALSE;
  1976. }
  1977. }
  1978. }
  1979. // If the merge succeeded, analyze and report the results.
  1980. mergeTime += target->stopTime - target->startTime;
  1981. merged = HasCSIndex (target->entry.csIndex)
  1982. && SameCSIndex(target->entry.csIndex, match->entry.csIndex);
  1983. #if DBG
  1984. csName = GetCSName(&target->entry.csIndex);
  1985. if (merged) {
  1986. if (match->entry.fileID != 0) {
  1987. DPRINTF((_T("%s: files %s and %s merged: CS index is %s\n"),
  1988. driveLetterName, target->fileName, match->fileName,
  1989. csName != NULL ? csName : _T("...")));
  1990. } else {
  1991. DPRINTF((_T("%s: files %s and %s merged\n"),
  1992. driveLetterName, target->fileName, match->fileName));
  1993. }
  1994. } else {
  1995. if (match->entry.fileID != 0) {
  1996. DPRINTF((_T("%s:5 files %s and %s merged, but CS indices don't match\n"),
  1997. driveLetterName, target->fileName, match->fileName));
  1998. } else {
  1999. DPRINTF((_T("%s:6 files %s and %s merged, but CS indices don't match\n"),
  2000. driveLetterName, target->fileName, match->fileName));
  2001. }
  2002. }
  2003. if (csName != NULL) {
  2004. FreeCSName(csName);
  2005. csName = NULL;
  2006. }
  2007. #endif
  2008. return merged;
  2009. }
  2010. /*****************************************************************************/
  2011. // Worker() performs the groveling processing.
  2012. VOID Groveler::Worker()
  2013. {
  2014. FileData target,
  2015. match;
  2016. SGNativeQueueEntry queueEntry;
  2017. FIFO *matchList = NULL;
  2018. Table *csIndexTable = NULL;
  2019. OVERLAPPED mergeSynch = { 0, 0, 0, 0, NULL };
  2020. HANDLE abortMergeEvent = NULL;
  2021. TCHAR *csName;
  2022. DatabaseActionList actionList[MAX_ACTIONS];
  2023. BYTE *buffer1 = NULL,
  2024. *buffer2 = NULL,
  2025. *buffer3 = NULL,
  2026. *buffer4 = NULL;
  2027. DWORD queueIndex,
  2028. bufferSize,
  2029. numCompares,
  2030. numMatches,
  2031. numActions;
  2032. #if DBG
  2033. DWORD enqueueTime;
  2034. #endif
  2035. LONG num;
  2036. BOOL needToRetry,
  2037. hashed,
  2038. gotMatch,
  2039. filesMatch,
  2040. merged,
  2041. success;
  2042. CLEAR_FILE(target);
  2043. CLEAR_OVERLAPPED(target.oplock);
  2044. target.handle = NULL;
  2045. CLEAR_FILE(match);
  2046. CLEAR_OVERLAPPED(match.oplock);
  2047. match.handle = NULL;
  2048. _set_new_handler(NewHandler);
  2049. // Create the events.
  2050. try {
  2051. if ((target.oplock .hEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL
  2052. || (match .oplock .hEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL
  2053. || (target.readSynch.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL
  2054. || (match .readSynch.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL
  2055. || (mergeSynch .hEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL
  2056. || (abortMergeEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL) {
  2057. DPRINTF((_T("%s: unable to create events: %lu\n"),
  2058. driveLetterName, GetLastError()));
  2059. throw INITIALIZE_ERROR;
  2060. }
  2061. // Allocate and align the file buffers.
  2062. bufferSize = SIG_PAGE_SIZE > CMP_PAGE_SIZE ? SIG_PAGE_SIZE : CMP_PAGE_SIZE
  2063. + sectorSize;
  2064. buffer1 = new BYTE[bufferSize];
  2065. ASSERT(buffer1 != NULL);
  2066. buffer2 = new BYTE[bufferSize];
  2067. ASSERT(buffer2 != NULL);
  2068. buffer3 = new BYTE[bufferSize];
  2069. ASSERT(buffer3 != NULL);
  2070. buffer4 = new BYTE[bufferSize];
  2071. ASSERT(buffer4 != NULL);
  2072. ASSERT(inUseFileID1 == NULL);
  2073. ASSERT(inUseFileID2 == NULL);
  2074. inUseFileID1 = &target.entry.fileID;
  2075. inUseFileID2 = &match .entry.fileID;
  2076. target.buffer[0] = buffer1 + sectorSize - (PtrToUlong(buffer1) % sectorSize);
  2077. target.buffer[1] = buffer2 + sectorSize - (PtrToUlong(buffer2) % sectorSize);
  2078. match .buffer[0] = buffer3 + sectorSize - (PtrToUlong(buffer3) % sectorSize);
  2079. match .buffer[1] = buffer4 + sectorSize - (PtrToUlong(buffer4) % sectorSize);
  2080. // Signal to grovel() that this thread is alive,
  2081. // then wait for it to signal to start.
  2082. grovelStatus = Grovel_ok;
  2083. ASSERT(IsReset(grovelStopEvent));
  2084. success = SetEvent(grovelStopEvent);
  2085. ASSERT_ERROR(success);
  2086. WaitForEvent(grovelStartEvent);
  2087. if (terminate)
  2088. throw TERMINATE;
  2089. #ifdef _CRTDBG
  2090. _CrtMemState s[2], sdiff;
  2091. int stateIndex = 0;
  2092. _CrtMemCheckpoint(&s[stateIndex]);
  2093. stateIndex = 1;
  2094. #endif
  2095. // The main loop.
  2096. while (TRUE) {
  2097. try {
  2098. #ifdef _CRTDBG
  2099. _CrtMemCheckpoint(&s[stateIndex]);
  2100. if (_CrtMemDifference(&sdiff, &s[stateIndex^1], &s[stateIndex]))
  2101. _CrtMemDumpStatistics(&sdiff);
  2102. stateIndex ^= 1;
  2103. #endif
  2104. hashed = FALSE;
  2105. numCompares = 0;
  2106. numMatches = 0;
  2107. merged = FALSE;
  2108. needToRetry = FALSE;
  2109. // Get a target file. abortGroveling is set when scan_volume is attempting to
  2110. // sync up with this thread. We stop here, a safe place to let scan_volume
  2111. // replace the database.
  2112. if (abortGroveling || !GetTarget(&target, &queueIndex)) {
  2113. CLEAR_FILE(target);
  2114. grovelStatus = Grovel_ok;
  2115. ASSERT(IsReset(grovelStopEvent));
  2116. success = SetEvent(grovelStopEvent);
  2117. ASSERT_ERROR(success);
  2118. WaitForEvent(grovelStartEvent);
  2119. if (terminate)
  2120. throw TERMINATE;
  2121. continue;
  2122. }
  2123. // Calculate the target file's signature.
  2124. hashed = TRUE;
  2125. CalculateSignature(&target);
  2126. // Get a list of match files.
  2127. ASSERT(matchList == NULL);
  2128. ASSERT(csIndexTable == NULL);
  2129. matchList = new FIFO();
  2130. ASSERT(matchList != NULL);
  2131. csIndexTable = new Table();
  2132. ASSERT(csIndexTable != NULL);
  2133. GetMatchList(&target, matchList, csIndexTable);
  2134. // Compare the target file to each match file until a matching file is found
  2135. // or all comparisons fail. Try the SIS files first, then the regular files.
  2136. while (TRUE) {
  2137. try {
  2138. gotMatch = FALSE;
  2139. if (!gotMatch && csIndexTable != NULL) {
  2140. gotMatch = GetCSFile(&target, &match, csIndexTable);
  2141. if (!gotMatch) {
  2142. delete csIndexTable;
  2143. csIndexTable = NULL;
  2144. }
  2145. }
  2146. if (!gotMatch && matchList != NULL) {
  2147. gotMatch = GetMatch(&target, &match, matchList);
  2148. if (!gotMatch) {
  2149. delete matchList;
  2150. matchList = NULL;
  2151. }
  2152. }
  2153. // After comparing the target file to every file on both
  2154. // lists, close the target file and update the database,
  2155. // then go on to process the next target file.
  2156. if (!gotMatch) {
  2157. CloseFile(&target);
  2158. numActions = 3;
  2159. actionList[0].type = TABLE_DELETE_BY_FILE_ID;
  2160. actionList[0].u.fileID = target.entry.fileID;
  2161. actionList[1].type = TABLE_PUT;
  2162. actionList[1].u.tableEntry = &target.entry;
  2163. actionList[2].type = QUEUE_DELETE;
  2164. actionList[2].u.queueIndex = queueIndex;
  2165. if (needToRetry) {
  2166. queueEntry.fileID = target.entry.fileID;
  2167. queueEntry.parentID = target.parentID;
  2168. queueEntry.reason = 0;
  2169. queueEntry.fileName = NULL;
  2170. queueEntry.retryTime = target.retryTime * 2; // Exponential back-off
  2171. if (queueEntry.retryTime < grovelInterval)
  2172. queueEntry.retryTime = grovelInterval;
  2173. queueEntry.readyTime = GetTime() + queueEntry.retryTime;
  2174. numActions = 4;
  2175. actionList[3].type = QUEUE_PUT;
  2176. actionList[3].u.queueEntry = &queueEntry;
  2177. }
  2178. #if DBG
  2179. if (!HasCSIndex(target.entry.csIndex)) {
  2180. TRACE_PRINTF(TC_groveler, 4,
  2181. (_T("%s: adding file {%s, %I64u, 0x%016I64x} to table\n"),
  2182. driveLetterName, target.fileName, target.entry.fileSize,
  2183. target.entry.signature));
  2184. } else {
  2185. csName = GetCSName(&target.entry.csIndex);
  2186. TRACE_PRINTF(TC_groveler, 4,
  2187. (_T("%s: adding file {%s, %I64u, 0x%016I64x, %s} to table\n"),
  2188. driveLetterName, target.fileName, target.entry.fileSize,
  2189. target.entry.signature, csName != NULL ? csName : _T("...")));
  2190. if (csName != NULL) {
  2191. FreeCSName(csName);
  2192. csName = NULL;
  2193. }
  2194. }
  2195. if (needToRetry) {
  2196. enqueueTime = (DWORD)(queueEntry.retryTime / 10000);
  2197. DPRINTF((_T(" Re-enqueuing target file %s to be groveled in %lu.%03lu sec\n"),
  2198. target.fileName, enqueueTime / 1000, enqueueTime % 1000));
  2199. }
  2200. #endif
  2201. DoTransaction(numActions, actionList);
  2202. break;
  2203. }
  2204. // Compare the target file with this match file.
  2205. numCompares++;
  2206. ASSERT(!inCompare);
  2207. inCompare = TRUE;
  2208. filesMatch = Compare(&target, &match);
  2209. inCompare = FALSE;
  2210. if (!filesMatch) {
  2211. CloseFile(&match);
  2212. CLEAR_FILE(match);
  2213. continue;
  2214. }
  2215. // If the target and match files are identical, go on to merge them.
  2216. numMatches++;
  2217. merged = Merge(&target, &match, &mergeSynch, abortMergeEvent);
  2218. // Update the database as follows:
  2219. //
  2220. // - Update the target file's table entry.
  2221. //
  2222. // - If the merge succeeded and the match file was a regular file,
  2223. // update the match file's table entry.
  2224. //
  2225. // - If the merge failed, re-enqueue the target file to be groveled again.
  2226. numActions = 3;
  2227. actionList[0].type = TABLE_DELETE_BY_FILE_ID;
  2228. actionList[0].u.fileID = target.entry.fileID;
  2229. actionList[1].type = TABLE_PUT;
  2230. actionList[1].u.tableEntry = &target.entry;
  2231. actionList[2].type = QUEUE_DELETE;
  2232. actionList[2].u.queueIndex = queueIndex;
  2233. if (merged) {
  2234. if (match.entry.fileID != 0) {
  2235. actionList[numActions ].type = TABLE_DELETE_BY_FILE_ID;
  2236. actionList[numActions++].u.fileID = match.entry.fileID;
  2237. actionList[numActions ].type = TABLE_PUT;
  2238. actionList[numActions++].u.tableEntry = &match.entry;
  2239. }
  2240. } else {
  2241. queueEntry.fileID = target.entry.fileID;
  2242. queueEntry.parentID = target.parentID;
  2243. queueEntry.reason = 0;
  2244. queueEntry.fileName = NULL;
  2245. queueEntry.retryTime = target.retryTime * 2; // Exponential back-off
  2246. if (queueEntry.retryTime < grovelInterval)
  2247. queueEntry.retryTime = grovelInterval;
  2248. queueEntry.readyTime = GetTime() + queueEntry.retryTime;
  2249. actionList[numActions ].type = QUEUE_PUT;
  2250. actionList[numActions++].u.queueEntry = &queueEntry;
  2251. }
  2252. #if DBG
  2253. if (!HasCSIndex(target.entry.csIndex)) {
  2254. TPRINTF((_T("%s: adding file {%s, %I64u, 0x%016I64x} to table\n"),
  2255. driveLetterName, target.fileName, target.entry.fileSize,
  2256. target.entry.signature));
  2257. } else {
  2258. csName = GetCSName(&target.entry.csIndex);
  2259. TPRINTF((_T("%s: adding file {%s, %I64u, 0x%016I64x, %s} to table\n"),
  2260. driveLetterName, target.fileName, target.entry.fileSize,
  2261. target.entry.signature, csName != NULL ? csName : _T("...")));
  2262. if (csName != NULL) {
  2263. FreeCSName(csName);
  2264. csName = NULL;
  2265. }
  2266. }
  2267. if (!merged) {
  2268. enqueueTime = (DWORD)(queueEntry.retryTime / 10000);
  2269. DPRINTF((_T(" Re-enqueuing target file %s to be groveled in %lu.%03lu sec\n"),
  2270. target.fileName, enqueueTime / 1000, enqueueTime % 1000));
  2271. }
  2272. #endif
  2273. DoTransaction(numActions, actionList);
  2274. break;
  2275. }
  2276. // Match exceptions
  2277. catch (MatchException matchException) {
  2278. inCompare = FALSE;
  2279. switch (matchException) {
  2280. // MATCH_INVALID: the match file doesn't exist or is disallowed. Close the file
  2281. // and remove its entry from the table, then go on to try the next match file.
  2282. case MATCH_INVALID:
  2283. CloseFile(&match);
  2284. if (match.entry.fileID != 0) {
  2285. ASSERT(!HasCSIndex(match.entry.csIndex));
  2286. num = sgDatabase->TableDeleteByFileID(match.entry.fileID);
  2287. if (num < 0)
  2288. throw DATABASE_ERROR;
  2289. ASSERT(num == 1);
  2290. } else {
  2291. ASSERT(HasCSIndex(match.entry.csIndex));
  2292. num = sgDatabase->TableDeleteByCSIndex(&match.entry.csIndex);
  2293. if (num < 0)
  2294. throw DATABASE_ERROR;
  2295. ASSERT(num > 0);
  2296. }
  2297. CLEAR_FILE(match);
  2298. break;
  2299. // MATCH_ERROR: an error occured while opening or reading the match
  2300. // file. Close the file and mark that the target file may need to be
  2301. // groveled again, then go on to try the next match file.
  2302. case MATCH_ERROR:
  2303. CloseFile(&match);
  2304. CLEAR_FILE(match);
  2305. needToRetry = TRUE;
  2306. break;
  2307. // MATCH_STALE: the match file table entry is invalid for some reason.
  2308. // Close the file, remove its entry from the table, enqueue
  2309. // it to be re-groveled, then go on to the next match file.
  2310. case MATCH_STALE:
  2311. CloseFile(&match);
  2312. if (match.entry.fileID != 0) {
  2313. queueEntry.fileID = match.entry.fileID;
  2314. queueEntry.parentID = match.parentID;
  2315. queueEntry.reason = 0;
  2316. queueEntry.readyTime = GetTime() + grovelInterval;
  2317. queueEntry.retryTime = 0;
  2318. queueEntry.fileName = NULL;
  2319. numActions = 2;
  2320. actionList[0].type = TABLE_DELETE_BY_FILE_ID;
  2321. actionList[0].u.fileID = match.entry.fileID;
  2322. actionList[1].type = QUEUE_PUT;
  2323. actionList[1].u.queueEntry = &queueEntry;
  2324. #if DBG
  2325. enqueueTime = (DWORD)(grovelInterval / 10000);
  2326. DPRINTF((_T(" Enqueuing match file %s to be groveled in %lu.%03lu sec\n"),
  2327. match.fileName, enqueueTime / 1000, enqueueTime % 1000));
  2328. #endif
  2329. DoTransaction(numActions, actionList);
  2330. } else {
  2331. ASSERT(HasCSIndex(match.entry.csIndex));
  2332. EnqueueCSIndex(&match.entry.csIndex);
  2333. }
  2334. CLEAR_FILE(match);
  2335. break;
  2336. default:
  2337. ASSERT_PRINTF(FALSE, (_T("matchException=%lu\n"),
  2338. matchException));
  2339. }
  2340. }
  2341. }
  2342. }
  2343. // Target exceptions
  2344. catch (TargetException targetException) {
  2345. inCompare = FALSE;
  2346. DPRINTF((_T("WORKER: Handling TargetException %d, status=%d\n"),
  2347. targetException,GetLastError()));
  2348. switch (targetException) {
  2349. // TARGET_INVALID: the target file is invalid for some reason: it doesn't
  2350. // exist, it is disallowed properties, it is in the queue by both file
  2351. // name and file ID, or it was in the queue by file name and has already
  2352. // been groveled. Close the files, remove the target file's entry from
  2353. // the table, then go on to grovel the next target file.
  2354. case TARGET_INVALID:
  2355. CloseFile(&target);
  2356. CloseFile(&match);
  2357. if (matchList != NULL) {
  2358. delete matchList;
  2359. matchList = NULL;
  2360. }
  2361. if (csIndexTable != NULL) {
  2362. delete csIndexTable;
  2363. csIndexTable = NULL;
  2364. }
  2365. numActions = 1;
  2366. actionList[0].type = QUEUE_DELETE;
  2367. actionList[0].u.queueIndex = queueIndex;
  2368. if (target.entry.fileID != 0) {
  2369. numActions = 2;
  2370. actionList[1].type = TABLE_DELETE_BY_FILE_ID;
  2371. actionList[1].u.fileID = target.entry.fileID;
  2372. }
  2373. DoTransaction(numActions, actionList);
  2374. break;
  2375. // An error occured while opening or reading the target file. Close
  2376. // the files and re-enqueue the target file to be groveled again.
  2377. case TARGET_ERROR:
  2378. ASSERT(target.entry.fileID != 0
  2379. || target.fileName[0] != _T('\0'));
  2380. CloseFile(&target);
  2381. CloseFile(&match);
  2382. queueEntry.fileID = target.entry.fileID;
  2383. queueEntry.parentID = target.parentID;
  2384. queueEntry.reason = 0;
  2385. queueEntry.fileName = target.entry.fileID == 0
  2386. ? target.fileName : NULL;
  2387. queueEntry.retryTime = target.retryTime * 2; // Exponential back-off
  2388. if (queueEntry.retryTime < grovelInterval)
  2389. queueEntry.retryTime = grovelInterval;
  2390. queueEntry.readyTime = GetTime() + queueEntry.retryTime;
  2391. actionList[0].type = QUEUE_DELETE;
  2392. actionList[0].u.queueIndex = queueIndex;
  2393. actionList[1].type = QUEUE_PUT;
  2394. actionList[1].u.queueEntry = &queueEntry;
  2395. #if DBG
  2396. enqueueTime = (DWORD)(queueEntry.retryTime / 10000);
  2397. if (target.entry.fileID != 0) {
  2398. DPRINTF((_T(" Re-enqueuing target file %s to be groveled in %lu.%03lu sec\n"),
  2399. target.fileName, enqueueTime / 1000, enqueueTime % 1000));
  2400. } else {
  2401. DPRINTF((_T(" Re-enqueuing target file %s to be groveled in %lu.%03lu sec\n"),
  2402. target.fileName, enqueueTime / 1000, enqueueTime % 1000));
  2403. }
  2404. #endif
  2405. DoTransaction(2, actionList);
  2406. break;
  2407. default:
  2408. ASSERT_PRINTF(FALSE, (_T("targetException=%lu\n"),
  2409. targetException));
  2410. }
  2411. }
  2412. // Do some clean-up.
  2413. ASSERT(target.handle == NULL);
  2414. ASSERT(match .handle == NULL);
  2415. if (matchList != NULL) {
  2416. delete matchList;
  2417. matchList = NULL;
  2418. }
  2419. if (csIndexTable != NULL) {
  2420. delete csIndexTable;
  2421. csIndexTable = NULL;
  2422. }
  2423. // Update the activity counters for this target file,
  2424. // then go on to process the next file.
  2425. if (hashed) {
  2426. hashCount++;
  2427. hashBytes += target.entry.fileSize;
  2428. }
  2429. compareCount += numCompares;
  2430. compareBytes += numCompares * target.entry.fileSize;
  2431. matchCount += numMatches;
  2432. matchBytes += numMatches * target.entry.fileSize;
  2433. if (merged) {
  2434. mergeCount++;
  2435. mergeBytes += target.entry.fileSize;
  2436. }
  2437. CLEAR_FILE(target);
  2438. CLEAR_FILE(match);
  2439. CLEAR_OVERLAPPED(mergeSynch);
  2440. }
  2441. }
  2442. // Terminal exceptions
  2443. catch (TerminalException terminalException) {
  2444. switch (terminalException) {
  2445. case INITIALIZE_ERROR:
  2446. break;
  2447. // DATABASE_ERROR: an error occured in the database. Return an error status.
  2448. case DATABASE_ERROR:
  2449. break;
  2450. // MEMORY_ERROR: unable to allocate memory. Return an error status.
  2451. case MEMORY_ERROR:
  2452. DPRINTF((_T("%s: Unable to allocate memory\n"),
  2453. driveLetterName));
  2454. break;
  2455. // TERMINATE: grovel() signaled for this thread to terminate.
  2456. case TERMINATE:
  2457. break;
  2458. default:
  2459. ASSERT_PRINTF(FALSE, (_T("terminalException=%lu\n"),
  2460. terminalException));
  2461. }
  2462. }
  2463. // Close the files and clean up.
  2464. CloseFile(&target);
  2465. CloseFile(&target);
  2466. CLEAR_FILE(target);
  2467. CLEAR_FILE(match);
  2468. if (matchList != NULL) {
  2469. delete matchList;
  2470. matchList = NULL;
  2471. }
  2472. if (csIndexTable != NULL) {
  2473. delete csIndexTable;
  2474. csIndexTable = NULL;
  2475. }
  2476. if (target.oplock.hEvent != NULL) {
  2477. success = CloseHandle(target.oplock.hEvent);
  2478. ASSERT_ERROR(success);
  2479. target.oplock.hEvent = NULL;
  2480. }
  2481. if (match.oplock.hEvent != NULL) {
  2482. success = CloseHandle(match.oplock.hEvent);
  2483. ASSERT_ERROR(success);
  2484. match.oplock.hEvent = NULL;
  2485. }
  2486. if (target.readSynch.hEvent != NULL) {
  2487. success = CloseHandle(target.readSynch.hEvent);
  2488. ASSERT_ERROR(success);
  2489. target.readSynch.hEvent = NULL;
  2490. }
  2491. if (match.readSynch.hEvent != NULL) {
  2492. success = CloseHandle(match.readSynch.hEvent);
  2493. ASSERT_ERROR(success);
  2494. match.readSynch.hEvent = NULL;
  2495. }
  2496. if (mergeSynch.hEvent != NULL) {
  2497. success = CloseHandle(mergeSynch.hEvent);
  2498. ASSERT_ERROR(success);
  2499. mergeSynch.hEvent = NULL;
  2500. }
  2501. if (abortMergeEvent != NULL) {
  2502. success = CloseHandle(abortMergeEvent);
  2503. ASSERT_ERROR(success);
  2504. abortMergeEvent = NULL;
  2505. }
  2506. if (buffer1 != NULL) {
  2507. delete buffer1;
  2508. buffer1 = NULL;
  2509. }
  2510. if (buffer2 != NULL) {
  2511. delete buffer2;
  2512. buffer2 = NULL;
  2513. }
  2514. if (buffer3 != NULL) {
  2515. delete buffer3;
  2516. buffer3 = NULL;
  2517. }
  2518. if (buffer4 != NULL) {
  2519. delete buffer4;
  2520. buffer4 = NULL;
  2521. }
  2522. inUseFileID1 = NULL;
  2523. inUseFileID2 = NULL;
  2524. // Signal grovel() that this thread is terminating by
  2525. // setting the grovelStop event with an error status.
  2526. grovelThread = NULL;
  2527. grovelStatus = Grovel_error;
  2528. ASSERT(IsReset(grovelStopEvent));
  2529. success = SetEvent(grovelStopEvent);
  2530. ASSERT_ERROR(success);
  2531. }
  2532. /*****************************************************************************/
  2533. /******************* Groveler class static private methods *******************/
  2534. /*****************************************************************************/
  2535. // WorkerThread() runs in its own thread.
  2536. // It calls Worker() to perform the groveling processing.
  2537. DWORD Groveler::WorkerThread(VOID *groveler)
  2538. {
  2539. ((Groveler *)groveler)->Worker();
  2540. return 0; // Dummy return value
  2541. }
  2542. /*****************************************************************************/
  2543. /*********************** Groveler class public methods ***********************/
  2544. /*****************************************************************************/
  2545. BOOL Groveler::set_log_drive(const _TCHAR *drive_name)
  2546. {
  2547. return SGDatabase::set_log_drive(drive_name);
  2548. }
  2549. // is_sis_installed tests whether the SIS filter is
  2550. // installed on a volume by calling SIS copyfile.
  2551. BOOL Groveler::is_sis_installed(const _TCHAR *drive_name)
  2552. {
  2553. HANDLE volHandle;
  2554. SI_COPYFILE copyFile;
  2555. DWORD transferCount,
  2556. lastError;
  2557. BOOL success;
  2558. volHandle = CreateFile(
  2559. drive_name,
  2560. GENERIC_READ,
  2561. FILE_SHARE_READ | FILE_SHARE_WRITE,
  2562. NULL,
  2563. OPEN_EXISTING,
  2564. FILE_FLAG_BACKUP_SEMANTICS,
  2565. NULL);
  2566. if (volHandle == INVALID_HANDLE_VALUE)
  2567. return FALSE;
  2568. copyFile.SourceFileNameLength = 0;
  2569. copyFile.DestinationFileNameLength = 0;
  2570. copyFile.Flags = COPYFILE_SIS_REPLACE;
  2571. success = DeviceIoControl(
  2572. volHandle,
  2573. FSCTL_SIS_COPYFILE,
  2574. (VOID *)&copyFile,
  2575. sizeof(SI_COPYFILE),
  2576. NULL,
  2577. 0,
  2578. &transferCount,
  2579. NULL);
  2580. lastError = GetLastError();
  2581. ASSERT(!success);
  2582. success = CloseHandle(volHandle);
  2583. ASSERT_ERROR(success);
  2584. switch (lastError) {
  2585. case ERROR_INVALID_FUNCTION:
  2586. return FALSE;
  2587. case ERROR_INVALID_PARAMETER:
  2588. return TRUE; //sis is installed on this volume
  2589. default:
  2590. ASSERT_PRINTF(FALSE, (_T("lastError=%lu\n"), lastError));
  2591. }
  2592. return FALSE; // Dummy return value
  2593. }
  2594. /*****************************************************************************/
  2595. // The groveler constructor creates and initializes all class variables.
  2596. Groveler::Groveler()
  2597. {
  2598. volumeHandle = NULL;
  2599. grovHandle = NULL;
  2600. sgDatabase = NULL;
  2601. driveName = NULL;
  2602. driveLetterName = NULL;
  2603. databaseName = NULL;
  2604. numDisallowedIDs = 0;
  2605. numDisallowedNames = 0;
  2606. disallowedIDs = NULL;
  2607. disallowedNames = NULL;
  2608. grovelStartEvent = NULL;
  2609. grovelStopEvent = NULL;
  2610. grovelThread = NULL;
  2611. inUseFileID1 = NULL;
  2612. inUseFileID2 = NULL;
  2613. abortGroveling = FALSE;
  2614. inCompare = FALSE;
  2615. inScan = FALSE;
  2616. terminate = TRUE;
  2617. usnID = lastUSN = UNINITIALIZED_USN;
  2618. }
  2619. /*****************************************************************************/
  2620. // The groveler destructor destroys all class variables.
  2621. Groveler::~Groveler()
  2622. {
  2623. // If the volume is open, call close() to close it.
  2624. close();
  2625. ASSERT(volumeHandle == NULL);
  2626. ASSERT(grovHandle == NULL);
  2627. ASSERT(sgDatabase == NULL);
  2628. ASSERT(driveName == NULL);
  2629. ASSERT(driveLetterName == NULL);
  2630. ASSERT(databaseName == NULL);
  2631. ASSERT(numDisallowedIDs == 0);
  2632. ASSERT(numDisallowedNames == 0);
  2633. ASSERT(disallowedIDs == NULL);
  2634. ASSERT(disallowedNames == NULL);
  2635. ASSERT(grovelStartEvent == NULL);
  2636. ASSERT(grovelStopEvent == NULL);
  2637. ASSERT(grovelThread == NULL);
  2638. ASSERT(inUseFileID1 == NULL);
  2639. ASSERT(inUseFileID2 == NULL);
  2640. ASSERT(terminate);
  2641. ASSERT(!inCompare);
  2642. ASSERT(!inScan);
  2643. ASSERT(usnID == UNINITIALIZED_USN);
  2644. }
  2645. /*****************************************************************************/
  2646. // Open() opens the specified volume.
  2647. GrovelStatus Groveler::open(
  2648. IN const TCHAR *drive_name,
  2649. IN const TCHAR *drive_letterName,
  2650. IN BOOL is_log_drive,
  2651. IN DOUBLE read_report_discard_threshold,
  2652. IN DWORD min_file_size,
  2653. IN DWORD min_file_age,
  2654. IN BOOL allow_compressed_files,
  2655. IN BOOL allow_encrypted_files,
  2656. IN BOOL allow_hidden_files,
  2657. IN BOOL allow_offline_files,
  2658. IN BOOL allow_temporary_files,
  2659. IN DWORD num_excluded_paths,
  2660. IN const TCHAR **excluded_paths,
  2661. IN DWORD base_regrovel_interval,
  2662. IN DWORD max_regrovel_interval)
  2663. {
  2664. DWORD threadID;
  2665. TCHAR fileStr[MAX_PATH];
  2666. TCHAR listValue[MAX_PATH+1],
  2667. *strPtr;
  2668. USN_JOURNAL_DATA usnJournalData;
  2669. SGNativeListEntry listEntry;
  2670. DWORDLONG fileID;
  2671. DWORD sectorsPerCluster,
  2672. numberOfFreeClusters,
  2673. totalNumberOfClusters,
  2674. bufferSize,
  2675. strLen,
  2676. i;
  2677. GrovelStatus openStatus;
  2678. LONG num;
  2679. #if DBG
  2680. BOOL wroteHeader = FALSE;
  2681. #endif
  2682. BOOL success;
  2683. ASSERT(volumeHandle == NULL);
  2684. ASSERT(grovHandle == NULL);
  2685. ASSERT(sgDatabase == NULL);
  2686. ASSERT(databaseName == NULL);
  2687. ASSERT(numDisallowedIDs == 0);
  2688. ASSERT(numDisallowedNames == 0);
  2689. ASSERT(disallowedIDs == NULL);
  2690. ASSERT(disallowedNames == NULL);
  2691. ASSERT(grovelStartEvent == NULL);
  2692. ASSERT(grovelStopEvent == NULL);
  2693. ASSERT(grovelThread == NULL);
  2694. ASSERT(inUseFileID1 == NULL);
  2695. ASSERT(inUseFileID2 == NULL);
  2696. ASSERT(terminate);
  2697. ASSERT(!inCompare);
  2698. ASSERT(!inScan);
  2699. ASSERT(usnID == UNINITIALIZED_USN);
  2700. #if 0
  2701. while (!IsDebuggerPresent())
  2702. Sleep(2000);
  2703. DebugBreak();
  2704. #endif
  2705. //
  2706. // Make sure that the filter has run phase 2 initialization if this is
  2707. // a SIS enabled volume.
  2708. //
  2709. is_sis_installed(drive_name);
  2710. driveName = new TCHAR[_tcslen(drive_name) + 1];
  2711. _tcscpy(driveName, drive_name);
  2712. driveName[_tcslen(driveName)-1] = _T('\0');; //remove trailing '\'
  2713. driveLetterName = new TCHAR[_tcslen(drive_letterName) + 1];
  2714. _tcscpy(driveLetterName, drive_letterName);
  2715. strLen = _tcslen(driveLetterName);
  2716. if (strLen > 2) {
  2717. driveLetterName[strLen-2] = _T('\0'); //remove trailing ':\'
  2718. }
  2719. #ifdef _CRTDBG
  2720. // Send all reports to STDOUT
  2721. _CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE );
  2722. _CrtSetReportFile( _CRT_WARN, _CRTDBG_FILE_STDERR );
  2723. _CrtSetReportMode( _CRT_ERROR, _CRTDBG_MODE_FILE );
  2724. _CrtSetReportFile( _CRT_ERROR, _CRTDBG_FILE_STDERR );
  2725. _CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE );
  2726. _CrtSetReportFile( _CRT_ASSERT, _CRTDBG_FILE_STDERR );
  2727. #endif
  2728. // Open the volume and the GrovelerFile. The SIS fsctl
  2729. // functions require that we pass in a handle to GrovelerFile as a means
  2730. // of proving our "privilege". An access violation is returned if we don't.
  2731. volumeHandle = CreateFile(
  2732. driveName,
  2733. GENERIC_READ | GENERIC_WRITE,
  2734. FILE_SHARE_READ | FILE_SHARE_WRITE,
  2735. NULL,
  2736. OPEN_EXISTING,
  2737. FILE_FLAG_OVERLAPPED|FILE_FLAG_BACKUP_SEMANTICS,
  2738. NULL);
  2739. if (volumeHandle == INVALID_HANDLE_VALUE) {
  2740. volumeHandle = NULL;
  2741. DPRINTF((_T("%s: Can't open volume \"%s\" %lu\n"),
  2742. driveLetterName, driveName, GetLastError()));
  2743. close();
  2744. return Grovel_error;
  2745. }
  2746. _tcscpy(fileStr,driveName);
  2747. _tcscat(fileStr,CS_DIR_PATH);
  2748. _tcscat(fileStr,_T("\\"));
  2749. _tcscat(fileStr,GROVELER_FILE_NAME);
  2750. grovHandle = CreateFile(
  2751. fileStr,
  2752. GENERIC_READ | GENERIC_WRITE,
  2753. FILE_SHARE_READ | FILE_SHARE_WRITE,
  2754. NULL,
  2755. OPEN_EXISTING,
  2756. FILE_FLAG_OVERLAPPED,
  2757. NULL);
  2758. if (grovHandle == INVALID_HANDLE_VALUE) {
  2759. grovHandle = NULL;
  2760. DPRINTF((_T("%s: can't open groveler file \"%s\": %lu\n"),
  2761. driveLetterName, fileStr, GetLastError()));
  2762. close();
  2763. return Grovel_error;
  2764. }
  2765. _tcscpy(fileStr,driveName);
  2766. _tcscat(fileStr,_T("\\"));
  2767. success = GetDiskFreeSpace(fileStr, &sectorsPerCluster, &sectorSize,
  2768. &numberOfFreeClusters, &totalNumberOfClusters);
  2769. ASSERT(success);
  2770. ASSERT(SIG_PAGE_SIZE % sectorSize == 0);
  2771. ASSERT(CMP_PAGE_SIZE % sectorSize == 0);
  2772. sigReportThreshold =
  2773. (DWORD)((DOUBLE)SIG_PAGE_SIZE * read_report_discard_threshold);
  2774. cmpReportThreshold =
  2775. (DWORD)((DOUBLE)CMP_PAGE_SIZE * read_report_discard_threshold);
  2776. //
  2777. // Open this volume's database. If this fails, create a
  2778. // new database. If that fails, return an error status.
  2779. //
  2780. ASSERT(databaseName == NULL);
  2781. strLen = _tcslen(driveName) + _tcslen(CS_DIR_PATH) + _tcslen(DATABASE_FILE_NAME) + 1; // +1 for '\'
  2782. databaseName = new TCHAR[strLen+1];
  2783. ASSERT(databaseName != NULL);
  2784. _stprintf(databaseName, _T("%s%s\\%s"), driveName, CS_DIR_PATH, DATABASE_FILE_NAME);
  2785. sgDatabase = new SGDatabase();
  2786. if (sgDatabase == NULL) {
  2787. DPRINTF((_T("%s: can't create database object\n"),
  2788. driveLetterName));
  2789. close();
  2790. return Grovel_error;
  2791. }
  2792. openStatus = Grovel_ok;
  2793. if (get_usn_log_info(&usnJournalData) != Grovel_ok) {
  2794. DPRINTF((_T("%s: can't initialize usnID\n"),
  2795. driveLetterName));
  2796. } else {
  2797. usnID = usnJournalData.UsnJournalID;
  2798. if (!sgDatabase->Open(databaseName, is_log_drive)) {
  2799. DPRINTF((_T("%s: can't open database \"%s\"\n"),
  2800. driveLetterName, databaseName));
  2801. } else {
  2802. listValue[0] = _T('\0');
  2803. listEntry.name = LAST_USN_NAME;
  2804. listEntry.value = listValue;
  2805. if (sgDatabase->ListRead(&listEntry) <= 0
  2806. || _stscanf(listValue, _T("%I64x"), &lastUSN) != 1
  2807. || lastUSN == UNINITIALIZED_USN) {
  2808. DPRINTF((_T("%s: can't get last USN value\n"), driveLetterName));
  2809. } else {
  2810. DWORDLONG storedUsnID;
  2811. listValue[0] = _T('\0');
  2812. listEntry.name = USN_ID_NAME;
  2813. listEntry.value = listValue;
  2814. if (sgDatabase->ListRead(&listEntry) <= 0
  2815. || _stscanf(listValue, _T("%I64x"), &storedUsnID) != 1
  2816. || storedUsnID != usnID) {
  2817. DPRINTF((_T("%s: can't get USN ID value from database\n"), driveLetterName));
  2818. } else {
  2819. num = sgDatabase->StackCount();
  2820. if (0 == num)
  2821. goto OpenedDatabase;
  2822. }
  2823. }
  2824. }
  2825. }
  2826. // Set abortGroveling to block the worker thread, and set lastUSN to block extract_log
  2827. // until scan_volume starts.
  2828. abortGroveling = TRUE;
  2829. lastUSN = usnID = UNINITIALIZED_USN;
  2830. openStatus = Grovel_new;
  2831. OpenedDatabase:
  2832. // Create the disallowed directories list.
  2833. if (num_excluded_paths == 0) {
  2834. disallowedIDs = NULL;
  2835. disallowedNames = NULL;
  2836. } else {
  2837. disallowedIDs = new DWORDLONG[num_excluded_paths];
  2838. disallowedNames = new TCHAR * [num_excluded_paths];
  2839. ASSERT(disallowedIDs != NULL);
  2840. ASSERT(disallowedNames != NULL);
  2841. for (i = 0; i < num_excluded_paths; i++) {
  2842. ASSERT(excluded_paths[i] != NULL);
  2843. if (excluded_paths[i][0] == _T('\\')) {
  2844. strLen = _tcslen(excluded_paths[i]);
  2845. while (strLen > 1 && excluded_paths[i][strLen-1] == _T('\\'))
  2846. strLen--;
  2847. strPtr = new TCHAR[strLen+1];
  2848. ASSERT(strPtr != NULL);
  2849. disallowedNames[numDisallowedNames++] = strPtr;
  2850. _tcsncpy(strPtr, excluded_paths[i], strLen);
  2851. strPtr[strLen] = _T('\0');
  2852. fileID = GetFileID(strPtr);
  2853. if (fileID != 0)
  2854. disallowedIDs[numDisallowedIDs++] = fileID;
  2855. #if DBG
  2856. else {
  2857. if (!wroteHeader) {
  2858. DPRINTF((_T("%s: can't open excluded paths\n"),
  2859. driveLetterName));
  2860. wroteHeader = TRUE;
  2861. }
  2862. DPRINTF((_T("\t%s\n"), strPtr));
  2863. }
  2864. #endif
  2865. }
  2866. }
  2867. if (numDisallowedNames == 0) {
  2868. delete disallowedNames;
  2869. disallowedNames = NULL;
  2870. } else if (numDisallowedNames > 1)
  2871. qsort(
  2872. disallowedNames,
  2873. numDisallowedNames,
  2874. sizeof(TCHAR *),
  2875. qsStringCompare);
  2876. if (numDisallowedIDs == 0) {
  2877. delete disallowedIDs;
  2878. disallowedIDs = NULL;
  2879. } else if (numDisallowedIDs > 1)
  2880. qsort(
  2881. disallowedIDs,
  2882. numDisallowedIDs,
  2883. sizeof(DWORDLONG),
  2884. FileIDCompare);
  2885. }
  2886. //
  2887. // Set the remaining class values.
  2888. //
  2889. // minFileAge is expressed in 10^-7 seconds, min_file_age in milliseconds.
  2890. //
  2891. minFileSize = min_file_size > MIN_FILE_SIZE ? min_file_size : MIN_FILE_SIZE;
  2892. minFileAge = min_file_age * 10000;
  2893. grovelInterval = minFileAge > MIN_GROVEL_INTERVAL ? minFileAge : MIN_GROVEL_INTERVAL;
  2894. disallowedAttributes = FILE_ATTRIBUTE_DIRECTORY
  2895. | (allow_compressed_files ? 0 : FILE_ATTRIBUTE_COMPRESSED)
  2896. | (allow_encrypted_files ? 0 : FILE_ATTRIBUTE_ENCRYPTED)
  2897. | (allow_hidden_files ? 0 : FILE_ATTRIBUTE_HIDDEN)
  2898. | (allow_offline_files ? 0 : FILE_ATTRIBUTE_OFFLINE)
  2899. | (allow_temporary_files ? 0 : FILE_ATTRIBUTE_TEMPORARY);
  2900. //
  2901. // Create the events used to handshake with the worker thread.
  2902. //
  2903. if ((grovelStartEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL
  2904. || (grovelStopEvent = CreateEvent(NULL, TRUE, FALSE, NULL)) == NULL) {
  2905. DPRINTF((_T("%s: unable to create events: %lu\n"),
  2906. driveLetterName, GetLastError()));
  2907. close();
  2908. return Grovel_error;
  2909. }
  2910. //
  2911. // Create the worker thread, then wait for it to set
  2912. // the grovelStop event to announce its existence.
  2913. //
  2914. terminate = FALSE;
  2915. grovelThread = CreateThread(
  2916. NULL,
  2917. 0,
  2918. WorkerThread,
  2919. (VOID *)this,
  2920. 0,
  2921. &threadID);
  2922. if (grovelThread == NULL) {
  2923. DPRINTF((_T("%s: can't create the worker thread: %lu\n"),
  2924. driveLetterName, GetLastError()));
  2925. close();
  2926. return Grovel_error;
  2927. }
  2928. WaitForEvent(grovelStopEvent);
  2929. if (grovelStatus == Grovel_error) {
  2930. grovelThread = NULL;
  2931. close();
  2932. return Grovel_error;
  2933. }
  2934. ASSERT(grovelStatus == Grovel_ok);
  2935. return openStatus;
  2936. }
  2937. /*****************************************************************************/
  2938. GrovelStatus Groveler::close()
  2939. {
  2940. DWORD i;
  2941. LONG num;
  2942. BOOL success;
  2943. // If active, signal the worker thread to stop,
  2944. // then wait for it to acknowledge.
  2945. terminate = TRUE;
  2946. if (grovelThread != NULL) {
  2947. ASSERT(grovelStartEvent != NULL);
  2948. ASSERT(grovelStopEvent != NULL);
  2949. timeAllotted = INFINITE;
  2950. do {
  2951. ASSERT(IsReset(grovelStartEvent));
  2952. success = SetEvent(grovelStartEvent);
  2953. ASSERT_ERROR(success);
  2954. WaitForEvent(grovelStopEvent);
  2955. } while (grovelStatus != Grovel_error);
  2956. grovelThread = NULL;
  2957. }
  2958. inCompare = FALSE;
  2959. inScan = FALSE;
  2960. usnID = UNINITIALIZED_USN;
  2961. ASSERT(inUseFileID1 == NULL);
  2962. ASSERT(inUseFileID2 == NULL);
  2963. // Close the events.
  2964. if (grovelStartEvent != NULL) {
  2965. success = CloseHandle(grovelStartEvent);
  2966. ASSERT_ERROR(success);
  2967. grovelStartEvent = NULL;
  2968. }
  2969. if (grovelStopEvent != NULL) {
  2970. success = CloseHandle(grovelStopEvent);
  2971. ASSERT_ERROR(success);
  2972. grovelStopEvent = NULL;
  2973. }
  2974. // If the volume or GrovelerFile are open, close them.
  2975. if (volumeHandle != NULL) {
  2976. success = CloseHandle(volumeHandle);
  2977. ASSERT_ERROR(success);
  2978. volumeHandle = NULL;
  2979. }
  2980. if (grovHandle != NULL) {
  2981. success = CloseHandle(grovHandle);
  2982. ASSERT_ERROR(success);
  2983. grovHandle = NULL;
  2984. }
  2985. // Close this volume's database.
  2986. if (sgDatabase != NULL) {
  2987. delete sgDatabase;
  2988. sgDatabase = NULL;
  2989. }
  2990. if (databaseName != NULL) {
  2991. delete[] databaseName;
  2992. databaseName = NULL;
  2993. }
  2994. // Deallocate the disallowed directory lists.
  2995. if (numDisallowedNames == 0) {
  2996. ASSERT(disallowedNames == NULL);
  2997. } else {
  2998. for (i = 0; i < numDisallowedNames; i++)
  2999. delete (disallowedNames[i]);
  3000. delete disallowedNames;
  3001. disallowedNames = NULL;
  3002. numDisallowedNames = 0;
  3003. }
  3004. if (numDisallowedIDs == 0) {
  3005. ASSERT(disallowedIDs == NULL);
  3006. } else {
  3007. delete disallowedIDs;
  3008. disallowedIDs = NULL;
  3009. numDisallowedIDs = 0;
  3010. }
  3011. if (driveName != NULL) {
  3012. delete[] driveName;
  3013. driveName = NULL;
  3014. }
  3015. if (driveLetterName != NULL) {
  3016. delete[] driveLetterName;
  3017. driveLetterName = NULL;
  3018. }
  3019. return Grovel_ok;
  3020. }
  3021. /*****************************************************************************/
  3022. // grovel() is the front-end method for controlling the groveling
  3023. // process on each NTFS volume. The groveling process itself is
  3024. // implemented in the Worker() method. grovel() starts the groveling
  3025. // process by setting the grovelStart event. Worker() signals back to
  3026. // grovel() that it is finished or has used up its time allocation by
  3027. // setting the grovelStop event, which causes grovel() to return.
  3028. GrovelStatus Groveler::grovel(
  3029. IN DWORD time_allotted,
  3030. OUT DWORD *hash_read_ops,
  3031. OUT DWORD *hash_read_time,
  3032. OUT DWORD *count_of_files_hashed,
  3033. OUT DWORDLONG *bytes_of_files_hashed,
  3034. OUT DWORD *compare_read_ops,
  3035. OUT DWORD *compare_read_time,
  3036. OUT DWORD *count_of_files_compared,
  3037. OUT DWORDLONG *bytes_of_files_compared,
  3038. OUT DWORD *count_of_files_matching,
  3039. OUT DWORDLONG *bytes_of_files_matching,
  3040. OUT DWORD *merge_time,
  3041. OUT DWORD *count_of_files_merged,
  3042. OUT DWORDLONG *bytes_of_files_merged,
  3043. OUT DWORD *count_of_files_enqueued,
  3044. OUT DWORD *count_of_files_dequeued)
  3045. {
  3046. DWORD timeConsumed;
  3047. BOOL success;
  3048. ASSERT(volumeHandle != NULL);
  3049. hashCount = 0;
  3050. hashReadCount = 0;
  3051. hashReadTime = 0;
  3052. hashBytes = 0;
  3053. compareCount = 0;
  3054. compareReadCount = 0;
  3055. compareReadTime = 0;
  3056. compareBytes = 0;
  3057. matchCount = 0;
  3058. matchBytes = 0;
  3059. mergeCount = 0;
  3060. mergeTime = 0;
  3061. mergeBytes = 0;
  3062. numFilesEnqueued = 0;
  3063. numFilesDequeued = 0;
  3064. #ifdef DEBUG_UNTHROTTLED
  3065. timeAllotted = INFINITE;
  3066. #else
  3067. timeAllotted = time_allotted;
  3068. #endif
  3069. startAllottedTime = GetTickCount();
  3070. ASSERT(IsReset(grovelStartEvent));
  3071. success = SetEvent(grovelStartEvent);
  3072. ASSERT_ERROR(success);
  3073. WaitForEvent(grovelStopEvent);
  3074. timeConsumed = GetTickCount() - startAllottedTime;
  3075. // Return the performance statistics.
  3076. if (count_of_files_hashed != NULL)
  3077. *count_of_files_hashed = hashCount;
  3078. if (hash_read_ops != NULL)
  3079. *hash_read_ops = hashReadCount;
  3080. if (hash_read_time != NULL)
  3081. *hash_read_time = hashReadTime;
  3082. if (bytes_of_files_hashed != NULL)
  3083. *bytes_of_files_hashed = hashBytes;
  3084. if (count_of_files_compared != NULL)
  3085. *count_of_files_compared = compareCount;
  3086. if (compare_read_ops != NULL)
  3087. *compare_read_ops = compareReadCount;
  3088. if (compare_read_time != NULL)
  3089. *compare_read_time = compareReadTime;
  3090. if (bytes_of_files_compared != NULL)
  3091. *bytes_of_files_compared = compareBytes;
  3092. if (count_of_files_matching != NULL)
  3093. *count_of_files_matching = matchCount;
  3094. if (bytes_of_files_matching != NULL)
  3095. *bytes_of_files_matching = matchBytes;
  3096. if (count_of_files_merged != NULL)
  3097. *count_of_files_merged = mergeCount;
  3098. if (merge_time != NULL)
  3099. *merge_time = mergeTime;
  3100. if (bytes_of_files_merged != NULL)
  3101. *bytes_of_files_merged = mergeBytes;
  3102. if (count_of_files_enqueued != NULL)
  3103. *count_of_files_enqueued = numFilesEnqueued;
  3104. if (count_of_files_dequeued != NULL)
  3105. *count_of_files_dequeued = numFilesDequeued;
  3106. TRACE_PRINTF(TC_groveler, 2,
  3107. (_T("%s Count Reads Bytes Time (sec)\n"),
  3108. driveLetterName));
  3109. TRACE_PRINTF(TC_groveler, 2,
  3110. (_T(" Hashings: %7lu %7lu %7I64u %4lu.%03lu Time: %5lu.%03lu sec\n"),
  3111. hashCount, hashReadCount, hashBytes,
  3112. hashReadTime / 1000, hashReadTime % 1000,
  3113. timeConsumed / 1000, timeConsumed % 1000));
  3114. TRACE_PRINTF(TC_groveler, 2,
  3115. (_T(" Compares: %7lu %7lu %7I64u %4lu.%03lu Enqueues: %lu\n"),
  3116. compareCount, compareReadCount, compareBytes,
  3117. compareReadTime / 1000, compareReadTime % 1000, numFilesEnqueued));
  3118. TRACE_PRINTF(TC_groveler, 2,
  3119. (_T(" Matches: %7lu %7I64u Dequeues: %lu\n"),
  3120. matchCount, matchBytes, numFilesDequeued));
  3121. TRACE_PRINTF(TC_groveler, 2,
  3122. (_T(" Merges: %7lu %7I64u %4lu.%03lu\n"),
  3123. mergeCount, mergeBytes, mergeTime / 1000, mergeTime % 1000));
  3124. return grovelStatus;
  3125. }
  3126. /*****************************************************************************/
  3127. // count_of_files_in_queue() returns a count of the number
  3128. // of files in this volume's queue waiting to be groveled.
  3129. DWORD Groveler::count_of_files_in_queue() const
  3130. {
  3131. LONG numEntries;
  3132. ASSERT(volumeHandle != NULL);
  3133. ASSERT(sgDatabase != NULL);
  3134. numEntries = sgDatabase->QueueCount();
  3135. if (numEntries < 0)
  3136. return 0;
  3137. TPRINTF((_T("%s: count_of_files_in_queue=%ld\n"),
  3138. driveLetterName, numEntries));
  3139. return (DWORD)numEntries;
  3140. }
  3141. /*****************************************************************************/
  3142. // count_of_files_to_compare() returns 1 if two files are ready to be
  3143. // compared or are in the process of being compared, and 0 otherwise.
  3144. DWORD Groveler::count_of_files_to_compare() const
  3145. {
  3146. DWORD numCompareFiles;
  3147. ASSERT(volumeHandle != NULL);
  3148. ASSERT(sgDatabase != NULL);
  3149. numCompareFiles = inCompare ? 1 : 0;
  3150. TPRINTF((_T("%s: count_of_files_to_compare=%lu\n"),
  3151. driveLetterName, numCompareFiles));
  3152. return numCompareFiles;
  3153. }
  3154. /*****************************************************************************/
  3155. // time_to_first_file_ready() returns the time in milliseconds until
  3156. // the first entry in the queue is ready to be groveled. If the queue
  3157. // is empty, it returns INFINITE. If an error occurs, it returns 0.
  3158. DWORD Groveler::time_to_first_file_ready() const
  3159. {
  3160. SGNativeQueueEntry queueEntry;
  3161. DWORDLONG currentTime;
  3162. DWORD earliestTime;
  3163. LONG num;
  3164. ASSERT(volumeHandle != NULL);
  3165. ASSERT(sgDatabase != NULL);
  3166. queueEntry.fileName = NULL;
  3167. num = sgDatabase->QueueGetFirst(&queueEntry);
  3168. if (num < 0)
  3169. return 0;
  3170. if (num == 0)
  3171. earliestTime = INFINITE;
  3172. else {
  3173. ASSERT(num == 1);
  3174. currentTime = GetTime();
  3175. earliestTime = queueEntry.readyTime > currentTime
  3176. ? (DWORD)((queueEntry.readyTime - currentTime) / 10000)
  3177. : 0;
  3178. }
  3179. TPRINTF((_T("%s: time_to_first_file_ready=%lu.%03lu\n"),
  3180. driveLetterName, earliestTime / 1000, earliestTime % 1000));
  3181. return earliestTime;
  3182. }