/*++

Copyright (c) 1998  Microsoft Corporation

Module Name:

    extract.cpp

Abstract:

    SIS Groveler USN journal reading functions

Authors:

    Cedric Krumbein, 1998

Environment:

    User Mode

Revision History:

--*/

#include "all.hxx"

// NT Update Sequence Number (USN) journal definitions

#define USN_ADD_REASONS ( 0U              \
    | USN_REASON_DATA_OVERWRITE           \
    | USN_REASON_DATA_EXTEND              \
    | USN_REASON_DATA_TRUNCATION          \
    | USN_REASON_NAMED_DATA_OVERWRITE     \
    | USN_REASON_NAMED_DATA_EXTEND        \
    | USN_REASON_NAMED_DATA_TRUNCATION    \
    | USN_REASON_FILE_CREATE              \
    | USN_REASON_FILE_DELETE              \
/*  | USN_REASON_PROPERTY_CHANGE       */ \
/*  | USN_REASON_SECURITY_CHANGE       */ \
/*  | USN_REASON_RENAME_OLD_NAME       */ \
/*  | USN_REASON_RENAME_NEW_NAME       */ \
    | USN_REASON_INDEXABLE_CHANGE         \
    | USN_REASON_BASIC_INFO_CHANGE        \
/*  | USN_REASON_HARD_LINK_CHANGE      */ \
    | USN_REASON_COMPRESSION_CHANGE       \
    | USN_REASON_ENCRYPTION_CHANGE        \
    | USN_REASON_OBJECT_ID_CHANGE         \
/*  | USN_REASON_REPARSE_POINT_CHANGE  */ \
    | USN_REASON_CLOSE                    \
)

/*****************************************************************************/

// set_usn_log_size() sets the maximum size of this volume's USN journal.

GrovelStatus Groveler::set_usn_log_size(
    IN DWORDLONG usn_log_size)
{
    CREATE_USN_JOURNAL_DATA createUSN;

    DWORD transferCount;

    ASSERT(volumeHandle != NULL);

    createUSN.MaximumSize     = usn_log_size;
    createUSN.AllocationDelta = USN_PAGE_SIZE;

// Set the maximum size of the USN journal.

    if (!DeviceIoControl(
        volumeHandle,
        FSCTL_CREATE_USN_JOURNAL,
        &createUSN,
        sizeof(CREATE_USN_JOURNAL_DATA),
        NULL,
        0,
        &transferCount,
        NULL)) {
        DPRINTF((_T("%s: error setting USN journal size: %lu\n"),
            driveName, GetLastError()));
        return Grovel_error;
    }

    TPRINTF((_T("%s: set USN journal size to %I64u\n"),
        driveName, usn_log_size));

    return Grovel_ok;
}

/*****************************************************************************/

// get_usn_log_size() returns the current size of this volume's USN journal.

GrovelStatus Groveler::get_usn_log_info(
    OUT USN_JOURNAL_DATA *usnJournalData)
{
    DWORD transferCount,
          lastError;

    BOOL success;

    ASSERT(volumeHandle != NULL);

// Query the USN journal settings.

    success = DeviceIoControl(
        volumeHandle,
        FSCTL_QUERY_USN_JOURNAL,
        NULL,
        0,
        usnJournalData,
        sizeof(USN_JOURNAL_DATA),
        &transferCount,
        NULL);

    if (!success)
        lastError = GetLastError();
    else if (transferCount != sizeof(USN_JOURNAL_DATA)) {
        lastError = 0;
        success   = FALSE;
    }

    if (!success) {
        DPRINTF((_T("%s: error querying USN journal settings: %lu\n"),
            driveName, lastError));
        return Grovel_error;
    }

    TPRINTF((_T("%s: USN journal: ID=0x%I64x size=0x%I64x\n"),
        driveName, usnJournalData->UsnJournalID,
        usnJournalData->MaximumSize));

    return Grovel_ok;
}

/*****************************************************************************/

// extract_log() reads this volume's USN journal.

// If the lastUSN parameter equals zero or doesn't exist, the USN journal
// is read from the beginning. Otherwise, the lastUSN paramerer indicates
// the most recent USN entry read during the last call of extract_log().
// If the lastUSN entry is still available in the USN journal, read the
// journal beginning at the entry following the lastUSN entry. If the
// lastUSN entry is no longer available, it indicates that the USN
// journal has wrapped: read all entries from the journal.

enum USNException {
    USN_ERROR
};

enum DatabaseException {
    DATABASE_ERROR
};

GrovelStatus Groveler::extract_log2(
    OUT DWORD     *num_entries_extracted,
    OUT DWORDLONG *num_bytes_extracted,
    OUT DWORDLONG *num_bytes_skipped,
    OUT DWORD     *num_files_enqueued,
    OUT DWORD     *num_files_dequeued)
{
    struct FileEntry {
        DWORDLONG fileID,
                  parentID,
                  timeStamp;
        DWORD     attributes,
                  reason;
    } *fileEntry = NULL;

    struct DirEntry {
        DWORDLONG dirID;
    } *dirEntry = NULL;

    Table *fileTable = NULL,
          *dirTable  = NULL;

    BYTE usnBuffer[USN_PAGE_SIZE + sizeof(DWORDLONG)];

    READ_USN_JOURNAL_DATA readUSN;

    USN_RECORD *usnRecord;

    SGNativeTableEntry tableEntry;

    SGNativeQueueEntry queueEntry;

    SGNativeStackEntry stackEntry;

    SGNativeListEntry listEntry;

    TCHAR listValue[17];

    DWORDLONG usn_log_size,
              numBytesExtracted = 0,
              numBytesSkipped   = 0,
              startUSN,
              firstUSN,
              nextUSN,
              thisUSN;

    DWORD numEntriesExtracted = 0,
          numTableDeletions   = 0,
          numQueueDeletions   = 0 ,
          numQueueAdditions   = 0,
          numActions          = 0,
          offset,
          bytesRead,
          lastError;

    LONG num;

    BOOL firstEntry = TRUE,
         deleteEntry,
         addEntry,
         success;

    GrovelStatus    status;

    ASSERT(volumeHandle != NULL);
    ASSERT(sgDatabase != NULL);

// If we don't know the previous USN, we can't extract.

    if (lastUSN == UNINITIALIZED_USN) {
        status = Grovel_overrun;
        goto Abort;
    }
    ASSERT(usnID != UNINITIALIZED_USN);

    fileTable = new Table;
    ASSERT(fileTable != NULL);

    if (inScan) {
        dirTable = new Table;
        ASSERT(dirTable != NULL);
    }

// Set up to read the volume's USN journal.

    startUSN = lastUSN == UNINITIALIZED_USN ? 0 : lastUSN;

    readUSN.ReturnOnlyOnClose =  1;
    readUSN.Timeout           =  0;
    readUSN.BytesToWaitFor    =  0;
    readUSN.ReasonMask        = ~0U;
    readUSN.UsnJournalID      =  usnID;

// Read the USN journal one page at a time.

    try {
        while (TRUE) {
            readUSN.StartUsn = startUSN;

            if (!DeviceIoControl(
                volumeHandle,
                FSCTL_READ_USN_JOURNAL,
                &readUSN,
                sizeof(READ_USN_JOURNAL_DATA),
                usnBuffer,
                USN_PAGE_SIZE + sizeof(DWORDLONG),
                &bytesRead,
                NULL)) {

                lastError = GetLastError();


// NTRAID#65198-2000/03/10-nealch  Handle USN id change (treat as overwrite w/ unknown no. of bytes skipped)

// If the journal overflowed, report by how much.

                if (lastError == ERROR_KEY_DELETED || lastError == ERROR_JOURNAL_ENTRY_DELETED) {
                    USN_JOURNAL_DATA usnJournalData;

                    if (get_usn_log_info(&usnJournalData) != Grovel_ok)
                        return Grovel_error;

                    // The USN journal will not wrap in our lifetimes so we don't really need
                    // to handle USN Journal wrapping.
                    ASSERT((DWORDLONG) usnJournalData.FirstUsn > lastUSN);

                    numBytesSkipped = (DWORDLONG) usnJournalData.FirstUsn - lastUSN;
                    goto Overrun;
                }

                throw USN_ERROR;
            }

            lastError = 0;

            if (bytesRead < sizeof(DWORDLONG))
                throw USN_ERROR;

            nextUSN = *(DWORDLONG *)usnBuffer;
            if (nextUSN < startUSN)
                throw USN_ERROR;

            if (nextUSN == startUSN) {
                if (bytesRead != sizeof(DWORDLONG))
                    throw USN_ERROR;
                break;
            }

            bytesRead         -= sizeof(DWORDLONG);
            offset             = 0;
            numBytesExtracted += bytesRead;

// Process each USN journal entry.

            while (bytesRead > 0) {
                if (bytesRead < sizeof(USN_RECORD))
                    throw USN_ERROR;

                usnRecord = (USN_RECORD *)&usnBuffer[offset + sizeof(DWORDLONG)];

                if (usnRecord->RecordLength <
                        offsetof(USN_RECORD, FileName) + usnRecord->FileNameLength
                 || usnRecord->RecordLength > bytesRead)
                    throw USN_ERROR;

                thisUSN = (DWORDLONG)usnRecord->Usn;
                if (thisUSN < startUSN + offset)
                    throw USN_ERROR;

// If this is the first entry, check if it is the expected
// USN. If it isn't, the USN journal has wrapped.

                if (firstEntry)
                    if (startUSN == 0)
                        numBytesSkipped = thisUSN;
                    else if (thisUSN <= startUSN + usnRecord->RecordLength)
                        numBytesSkipped = 0;
                    else
                        numBytesSkipped = thisUSN - startUSN - usnRecord->RecordLength;

// Skip the first entry if the starting address is greater than zero.
// After skipping the first entry, examine each USN entry as follows:
//
// - If the entry is a directory, and a volume scan is underway,
//   add the directory's ID to the directory table.
//
// - If the entry is a file, add it to the file table. Include
//   its ID and its parent directory's ID, its most recent time
//   stamp and attributes, and its accumulated reason bits.

                if (firstEntry && startUSN > 0)
                    numBytesExtracted -= usnRecord->RecordLength;

                else {
                    if (usnRecord->      FileReferenceNumber == 0
                     || usnRecord->ParentFileReferenceNumber == 0)
                        throw USN_ERROR;

// The entry is a directory.

                    if ((usnRecord->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) {

                        if (dirTable != NULL) {
                            dirEntry = (DirEntry *)dirTable->Get(
                                (const VOID *)&usnRecord->FileReferenceNumber,
                                sizeof(DWORDLONG));

                            if (dirEntry != NULL) {
                                ASSERT(dirEntry->dirID == usnRecord->FileReferenceNumber);
                            } else {
                                dirEntry = new DirEntry;
                                ASSERT(dirEntry != NULL);
                                dirEntry->dirID = usnRecord->FileReferenceNumber;
                                success = dirTable->Put((VOID *)dirEntry, sizeof(DWORDLONG));
                                ASSERT(success);
                            }
                        }
                    }

// The entry is a file.  If USN_SOURCE_DATA_MANAGEMENT is set, assume this entry was created by
// the groveler during a merge operation.

                    else if ((usnRecord->SourceInfo & USN_SOURCE_DATA_MANAGEMENT) == 0) {

                        fileEntry = (FileEntry *)fileTable->Get(
                            (const VOID *)&usnRecord->FileReferenceNumber,
                            sizeof(DWORDLONG));

                        if (fileEntry != NULL) {
                            ASSERT(fileEntry->fileID == usnRecord->FileReferenceNumber);
                        } else {
                            fileEntry = new FileEntry;
                            ASSERT(fileEntry != NULL);
                            fileEntry->fileID   = usnRecord->FileReferenceNumber;
                            fileEntry->reason   = 0;
                            success = fileTable->Put((VOID *)fileEntry, sizeof(DWORDLONG));
                            ASSERT(success);
                        }

                        fileEntry->parentID   = usnRecord->ParentFileReferenceNumber;
                        fileEntry->timeStamp  = (DWORDLONG)usnRecord->TimeStamp.QuadPart;
                        fileEntry->attributes =            usnRecord->FileAttributes;

                        if ((usnRecord->Reason & USN_REASON_FILE_DELETE) != 0)
                            fileEntry->reason  = USN_REASON_FILE_DELETE;
                        else
                            fileEntry->reason |= usnRecord->Reason;
                    } else {
                        TPRINTF((_T("%s: USN_SOURCE_DATA_MANAGEMENT set on file 0x%016I64x\n"),
							driveName, usnRecord->FileReferenceNumber));
                    }

                    if (numEntriesExtracted++ == 0)
                        firstUSN = thisUSN;
                }

                lastUSN    = thisUSN;
                offset    += usnRecord->RecordLength;
                bytesRead -= usnRecord->RecordLength;
                firstEntry = FALSE;
            }

            startUSN = nextUSN;
        }
    }

// If an error occured while reading the USN journal, return an error status.

    catch (USNException usnException) {
        ASSERT(usnException == USN_ERROR);

        if (fileTable != NULL) {
            delete fileTable;
            fileTable = NULL;
        }

        if (dirTable != NULL) {
            delete dirTable;
            dirTable = NULL;
        }

        lastUSN = UNINITIALIZED_USN;

        DPRINTF((_T("%s: error reading USN journal: %lu\n"),
            driveName, lastError));
        return Grovel_error;
    }

// We've finished reading the USN journal, so update the database. Process
// each entry in the file table, and group the updates into transactions.

    try {
        while ((fileEntry = (FileEntry *)fileTable->GetFirst()) != NULL) {
            ASSERT(fileEntry->fileID != 0);

// If the file is currently open in the grovel process, skip this entry.

            if (inUseFileID1 != NULL && fileEntry->fileID == *inUseFileID1
             || inUseFileID2 != NULL && fileEntry->fileID == *inUseFileID2) {

                DPRINTF((_T("%s: extract_log/grovel collision on file 0x%016I64x\n"),
                    driveName, fileEntry->fileID));

            } else {

// Delete the file from the queue and the table...
//
// - if the file's most recent reason bits in the USN journal
//   indicate it was deleted,
//
// - if the file or the file's most recent parent directory is disallowed,
//
// - or if the file has disallowed attributes.
//
// Otherwise, update or add the file to the queue...
//
// - if the file's reason bits indicate it was changed,
//
// - or if the file isn't present in the table.

                if (fileEntry->reason == USN_REASON_FILE_DELETE
                 || !IsAllowedID(fileEntry->fileID)
                 || !IsAllowedID(fileEntry->parentID)
                 || (fileEntry->attributes & disallowedAttributes) != 0) {
                    deleteEntry = TRUE;
                    addEntry    = FALSE;
                } else {
                    deleteEntry = FALSE;
                    if ((fileEntry->reason & USN_ADD_REASONS) != 0)
                        addEntry = TRUE;
                    else {
                        tableEntry.fileID = fileEntry->fileID;
                        num = sgDatabase->TableGetFirstByFileID(&tableEntry);
                        if (num < 0)
                            throw DATABASE_ERROR;
                        ASSERT(num == 0 || num == 1);
                        addEntry = num == 0;
                    }
                }

                if (deleteEntry || addEntry) {
                    if (numActions == 0) {
                        if (sgDatabase->BeginTransaction() < 0)
                            throw DATABASE_ERROR;
                        numActions = 1;
                    }

                    queueEntry.reason = 0;

                    num = sgDatabase->TableDeleteByFileID(fileEntry->fileID);
                    if (num < 0)
                        throw DATABASE_ERROR;
                    if (num > 0) {
                        ASSERT(num == 1);
                        numTableDeletions++;
                        numActions++;
                    }

                    queueEntry.fileID   = fileEntry->fileID;
                    queueEntry.fileName = NULL;
                    num = sgDatabase->QueueGetFirstByFileID(&queueEntry);
                    if (num < 0)
                        throw DATABASE_ERROR;

                    if (num > 0) {
                        ASSERT(num == 1);
                        num = sgDatabase->QueueDeleteByFileID(fileEntry->fileID);
                        if (num < 0)
                            throw DATABASE_ERROR;
                        ASSERT(num == 1);
                        numQueueDeletions++;
                        numActions++;
                    }

                    if (addEntry) {
                        queueEntry.fileID    = fileEntry->fileID;
                        queueEntry.parentID  = 0;
                        queueEntry.reason   |= fileEntry->reason;
                        queueEntry.readyTime = fileEntry->timeStamp + minFileAge;
                        queueEntry.retryTime = 0;
                        queueEntry.fileName  = NULL;

                        num = sgDatabase->QueuePut(&queueEntry);
                        if (num < 0)
                            throw DATABASE_ERROR;
                        ASSERT(num == 1);

#ifdef DEBUG_USN_REASON
                        if (numQueueAdditions == 0) {
                            DPRINTF((_T("--> __REASON__ _____FILE_ID______\n")));
                        }
                        DPRINTF((_T("    0x%08lx 0x%016I64x\n"),
                            fileEntry->reason, fileEntry->fileID));
#endif

                        numQueueAdditions++;
                        numActions++;
                    }

                    if (numActions >= MAX_ACTIONS_PER_TRANSACTION) {
                        if (!sgDatabase->CommitTransaction())
                            throw DATABASE_ERROR;
                        TPRINTF((_T("%s: committing %lu actions to \"%s\"\n"),
                            driveName, numActions, databaseName));
                        numActions = 0;
                    }
                }
            }

            delete fileEntry;
            fileEntry = NULL;
        }

        delete fileTable;
        fileTable = NULL;

// Process each entry in the directory table. If the directory hasn't already
// been scanned or isn't on the list to be scanned, add it to the list.

        if (dirTable != NULL) {
            ASSERT(inScan);

            while ((dirEntry = (DirEntry *)dirTable->GetFirst()) != NULL) {
                ASSERT(dirEntry->dirID != 0);

                stackEntry.fileID = dirEntry->dirID;
                num = sgDatabase->StackGetFirstByFileID(&stackEntry);
                if (num < 0)
                    throw DATABASE_ERROR;

                if (num == 0) {
                    if (numActions == 0) {
                        if (sgDatabase->BeginTransaction() < 0)
                            throw DATABASE_ERROR;
                        numActions = 1;
                    }

                    num = sgDatabase->StackPut(dirEntry->dirID, FALSE);
                    if (num < 0)
                        throw DATABASE_ERROR;
                    ASSERT(num == 1);
                    numActions++;

                    if (numActions >= MAX_ACTIONS_PER_TRANSACTION) {
                        if (!sgDatabase->CommitTransaction())
                            throw DATABASE_ERROR;
                        TPRINTF((_T("%s: committing %lu actions to \"%s\"\n"),
                            driveName, numActions, databaseName));
                        numActions = 0;
                    }
                }

                delete dirEntry;
                dirEntry = NULL;
            }

            delete dirTable;
            dirTable = NULL;
        }

// Update the last USN number in the database, then commit the changes.  If we're
// doing a volume scan, don't update the lastUSN until the scan is complete.

        if (!inScan) {
            _stprintf(listValue, _T("%016I64x"), lastUSN);
            listEntry.name  = LAST_USN_NAME;
            listEntry.value = listValue;
            num = sgDatabase->ListWrite(&listEntry);
            if (num <= 0)
                throw DATABASE_ERROR;
        }

        if (numActions > 0) {
            if (!sgDatabase->CommitTransaction())
                throw DATABASE_ERROR;
            TPRINTF((_T("%s: committing %lu actions to \"%s\"\n"),
                driveName, numActions, databaseName));
            numActions = 0;
        }
    }

// If a database error occured, return an error status.

    catch (DatabaseException databaseException) {

        ASSERT(databaseException == DATABASE_ERROR);

        if (numActions > 0) {
            sgDatabase->AbortTransaction();
            numActions = 0;
        }

        if (fileTable != NULL) {
            delete fileTable;
            fileTable = NULL;
        }

        if (dirTable != NULL) {
            delete dirTable;
            dirTable = NULL;
        }

        return Grovel_error;
    }

    Overrun:

    status = numBytesSkipped == 0 ? Grovel_ok : Grovel_overrun;

    Abort:

// Return the performance statistics.

    if (num_entries_extracted != NULL)
        *num_entries_extracted = numEntriesExtracted;
    if (num_bytes_extracted   != NULL)
        *num_bytes_extracted   = numBytesExtracted;
    if (num_bytes_skipped     != NULL)
        *num_bytes_skipped     = numBytesSkipped;
    if (num_files_enqueued    != NULL)
        *num_files_enqueued    = numQueueAdditions;
    if (num_files_dequeued    != NULL)
        *num_files_dequeued    = numQueueDeletions;

#if DBG
    if (numEntriesExtracted > 0 && firstUSN < lastUSN) {
        TRACE_PRINTF(TC_extract, 2,
            (_T("%s: USN 0x%I64x-%I64x\n"), driveName, firstUSN, lastUSN));
    } else {
        TRACE_PRINTF(TC_extract, 2,
            (_T("%s: USN 0x%I64x\n"),       driveName, lastUSN));
    }
    TRACE_PRINTF(TC_extract, 2,
        (_T("   NumEntriesExtracted=%lu NumBytesExtracted=%I64u NumBytesSkipped=%I64u\n"),
        numEntriesExtracted, numBytesExtracted, numBytesSkipped));
    TRACE_PRINTF(TC_extract, 2,
        (_T("   NumTableDeletions=%lu NumQueueDeletions=%lu NumQueueAdditions=%lu\n"),
        numTableDeletions, numQueueDeletions, numQueueAdditions));
#endif

    return status;

}

GrovelStatus Groveler::extract_log(
    OUT DWORD     *num_entries_extracted,
    OUT DWORDLONG *num_bytes_extracted,
    OUT DWORDLONG *num_bytes_skipped,
    OUT DWORD     *num_files_enqueued,
    OUT DWORD     *num_files_dequeued)
{
	GrovelStatus status;

#ifdef _CRTDBG
_CrtMemState s1, s2, sdiff;

_CrtMemCheckpoint(&s1);
#endif

	status = extract_log2(
				num_entries_extracted,
				num_bytes_extracted,
				num_bytes_skipped,
				num_files_enqueued,
				num_files_dequeued);

#ifdef _CRTDBG
_CrtMemCheckpoint(&s2);
if (_CrtMemDifference(&sdiff, &s1, &s2))
	_CrtMemDumpStatistics(&sdiff);
#endif

	return status;
}