/* * CSVParse.C * * CSV Parsing functions * * Copyright 1997 Microsoft Corporation. All Rights Reserved. */ #include #include #include #include #include "dbgutil.h" #include #define CR_CHAR 0x0d #define LF_CHAR 0x0a #define CCH_READ_BUFFER 256 #define NUM_ITEM_SLOTS 32 /*************************************************************************** Name : ReadCSVChar Purpose : Reads a single char from a file Parameters: hFile = file handle pcbBuffer = pointer to size of buffer lppBuffer = pointer to pointer to buffer lppRead = pointer to pointer to next location to use Returns : -1 = Out of memory 0 = End of file 1 = Char successfully read Comment : Dynamically grows *lppBuffer as necessary ***************************************************************************/ int ReadCSVChar(HANDLE hFile, int *pcbBuffer, PUCHAR *lppBuffer, PUCHAR *lppRead) { int cbOffset; ULONG cbReadFile; PUCHAR lpBuffer; cbOffset = (int) (*lppRead - *lppBuffer); if (cbOffset >= *pcbBuffer) { // Buffer is too small. Reallocate! *pcbBuffer += CCH_READ_BUFFER; if (! (lpBuffer = LocalReAlloc(*lppBuffer, *pcbBuffer, LMEM_MOVEABLE | LMEM_ZEROINIT))) { DebugTrace("LocalReAlloc(%u) -> %u\n", *pcbBuffer, GetLastError()); return(-1); } *lppBuffer = lpBuffer; *lppRead = *lppBuffer + cbOffset; } // 1 character at a time if (ReadFile(hFile, *lppRead, 1, &cbReadFile, NULL) && cbReadFile) return(1); return(0); } /*************************************************************************** Name : ReadCSVItem Purpose : Reads an item from a CSV file Parameters: hFile = file handle pcbBuffer = pointer to size of buffer lppBuffer = pointer to pointer to buffer szSep = current separator string Returns : -1 = Out of memory 0 = Item read in, and none left 1 = Item read in, more items left Comment : CSV special characters are '"', szSep, CR and LF. Rules for quotes: 1. If an item starts with a '"', then the item is quoted and must end with a '"'. 2. Any '"' characters found in a non-quoted string will not be treated specially. Technically, there should not be quotes in a non-quoted string, but we have to do something if we find one. 3. A quoted item ends with: a) quote szSep b) quote newline or, c) quote 4. Two quotes together in a quoted string are translated into a single quote. ***************************************************************************/ int ReadCSVItem(HANDLE hFile, int *pcbBuffer, PUCHAR *lppBuffer, LPTSTR szSep) { BOOL fQuoted, fDone, fFoundSepCh; int cbReadFile; PUCHAR lpRead, szSepT; // This function is always called with one character already read lpRead = *lppBuffer; if (*lpRead == '"') { fQuoted = TRUE; cbReadFile = ReadCSVChar(hFile, pcbBuffer, lppBuffer, &lpRead); } else { fQuoted = FALSE; cbReadFile = 1; } szSepT = szSep; fDone = FALSE; do { if (cbReadFile <= 0) { // End of file means end of item if (cbReadFile == 0) *lpRead = '\0'; break; } fFoundSepCh = FALSE; switch (*lpRead) { case CR_CHAR: case LF_CHAR: if (!fQuoted) { // End of line and item *lpRead = '\0'; cbReadFile = 0; fDone = TRUE; } break; case '"': if (fQuoted) { // See if the next character is a quote, CR, or LF lpRead++; cbReadFile = ReadCSVChar(hFile, pcbBuffer, lppBuffer, &lpRead); if ((cbReadFile <= 0) || (*lpRead == '"') || (*lpRead == CR_CHAR) || (*lpRead == LF_CHAR)) { if ((cbReadFile <= 0) || (*lpRead != '"')) { if (cbReadFile >= 0) { // End of file, or CR or LF *(lpRead - 1) = '\0'; cbReadFile = 0; } // else out of memory fDone = TRUE; } else { // Embedded quote - get rid of one lpRead--; } break; } // We have read another character, and it is not a quote, CR, or LF // Two possibilities: // 1) Separator // 2) Something else - this is an error condition szSepT = szSep; while ((cbReadFile > 0) && (*szSepT != '\0') && (*lpRead == *szSepT)) { szSepT++; if (*szSepT != '\0') { lpRead++; cbReadFile = ReadCSVChar(hFile, pcbBuffer, lppBuffer, &lpRead); } } if ((cbReadFile <= 0) || (*szSepT == '\0')) { if (cbReadFile >= 0) { // If cbReadFile is zero, we hit the end of file // before finding the complete separator. In this // case, we simply take every character we have // read, including the second quote, and use that // as the item. // // Otherwise, we found the complete separator. if (cbReadFile > 0) lpRead -= lstrlen(szSep); *lpRead = '\0'; } fDone = TRUE; } else { // We found a second quote, but it was not followed by a // separator. In this case, we keep reading as if we are // in an unquoted string. fQuoted = FALSE; } } break; default: if (!fQuoted) { if (*lpRead == *szSepT) { szSepT++; if (*szSepT == '\0') { // End of separator, thus end of item lpRead -= (lstrlen(szSep) - 1); *lpRead = '\0'; fDone = TRUE; } else fFoundSepCh = TRUE; } } break; } if (!fDone) { if (!fFoundSepCh) szSepT = szSep; lpRead++; cbReadFile = ReadCSVChar(hFile, pcbBuffer, lppBuffer, &lpRead); } } while (!fDone); return(cbReadFile); } /*************************************************************************** Name : InsertItem Purpose : Takes an item read from the file and inserts it into the array Parameters: iItem = array index to insert pcItemSlots = number of currently allocated elements cGrow = number of items to grow the array by, if necessary prgItemSlots = pointer to the actual array lpBuffer = string to insert Returns : TRUE = item successfully inserted FALSE = out of memory ***************************************************************************/ BOOL InsertItem(int iItem, int *pcItemSlots, int cGrow, PUCHAR **prgItemSlots, PUCHAR lpBuffer) { PUCHAR *rgItemSlotsNew, lpItem; // Make sure there's room, first if (iItem >= *pcItemSlots) { // Array is too small. Reallocate! *pcItemSlots += cGrow; rgItemSlotsNew = LocalReAlloc(*prgItemSlots, *pcItemSlots * sizeof(PUCHAR), LMEM_MOVEABLE | LMEM_ZEROINIT); if (!rgItemSlotsNew) { DebugTrace("LocalReAlloc(%u) -> %u\n", *pcItemSlots * sizeof(PUCHAR), GetLastError()); return(FALSE); } *prgItemSlots = rgItemSlotsNew; } lpItem = LocalAlloc(LPTR, lstrlen(lpBuffer) + 1); if (!lpItem) { DebugTrace("LocalAlloc(%u) -> %u\n", lstrlen(lpBuffer) + 1, GetLastError()); return(FALSE); } StrCpyN(lpItem, lpBuffer, lstrlen(lpBuffer) + 1); (*prgItemSlots)[iItem] = lpItem; return(TRUE); } /*************************************************************************** Name : ReadCSVLine x Purpose : Reads a line from a CSV file with fixups for special characters Parameters: hFile = file handle szSep = list separator of the current regional settings lpcItems -> Returned number of items lprgItems -> Returned array of item strings. Caller is responsible for LocalFree'ing each string pointer and this array pointer. Returns : HRESULT Comment : Calls the above helper functions to do most of the work. ***************************************************************************/ HRESULT ReadCSVLine(HANDLE hFile, LPTSTR szSep, ULONG * lpcItems, PUCHAR ** lpprgItems) { HRESULT hResult = hrSuccess; register ULONG i; PUCHAR lpBuffer = NULL, lpRead, lpItem; ULONG cbBuffer = 0; int cbReadFile = -1; UCHAR chLastChar; ULONG iItem = 0; ULONG cItemSlots = 0; PUCHAR * rgItemSlots = NULL; LPTSTR szSepT; // Start out with 1024 character buffer. Realloc as necesary. cbBuffer = CCH_READ_BUFFER; if (! (lpRead = lpBuffer = LocalAlloc(LPTR, cbBuffer))) { DebugTrace("LocalAlloc(%u) -> %u\n", cbBuffer, GetLastError()); goto exit; } // Start out with 32 item slots. Realloc as necesary. cItemSlots = NUM_ITEM_SLOTS; if (! (rgItemSlots = LocalAlloc(LPTR, cItemSlots * sizeof(PUCHAR)))) { DebugTrace("LocalAlloc(%u) -> %u\n", cItemSlots * sizeof(PUCHAR), GetLastError()); goto exit; } // Skip past leading CR/LF characters do cbReadFile = ReadCSVChar(hFile, &cbBuffer, &lpBuffer, &lpRead); while((cbReadFile > 0) && ((*lpBuffer == CR_CHAR) || (*lpBuffer == LF_CHAR))); if (cbReadFile == 0) { // Nothing to return DebugTrace("ReadFile -> EOF\n"); hResult = ResultFromScode(MAPI_E_NOT_FOUND); } // Read items until end of line or EOF while (cbReadFile > 0) { cbReadFile = ReadCSVItem(hFile, &cbBuffer, &lpBuffer, szSep); if (cbReadFile >= 0) { // Dup the item into the next array slot. if (!InsertItem(iItem, &cItemSlots, cbReadFile ? NUM_ITEM_SLOTS : 1, &rgItemSlots, lpBuffer)) cbReadFile = -1; else iItem++; if (cbReadFile > 0) { // More data to be read lpRead = lpBuffer; cbReadFile = ReadCSVChar(hFile, &cbBuffer, &lpBuffer, &lpRead); } } } exit: if (cbReadFile < 0) hResult = ResultFromScode(MAPI_E_NOT_ENOUGH_MEMORY); if (lpBuffer) { LocalFree(lpBuffer); } if (hResult) { // Clean up if (rgItemSlots) { for (i = 0; i < iItem; i++) { if (rgItemSlots[i]) { LocalFree(rgItemSlots[i]); } } LocalFree(rgItemSlots); } *lpcItems = 0; *lpprgItems = NULL; } else { *lpcItems = iItem; // One based *lpprgItems = rgItemSlots; } return(hResult); }