// // fsteam.cpp // Implements a file stream // for reading text files line by line. // the standard C streams, only support // unicode as binary streams which are a pain to work // with). // // This class reads/writes both ANSI and UNICODE files // and converts to/from UNICODE internally // // Does not do any CR/LF translations either on input // or output. // // Copyright(C) Microsoft Corporation 2000 // Author: Nadim Abdo (nadima) // #include "stdafx.h" #define TRC_GROUP TRC_GROUP_UI #define TRC_FILE "fstream.cpp" #include #include "fstream.h" #ifndef UNICODE // // Adding ansi support is just a matter of converting // from UNICODE file to ANSI internal if the file // has a UNICODE BOM // #error THIS MODULE ASSUMES BEING COMPILED UNICODE, ADD ANSI IF NEEDED #endif CTscFileStream::CTscFileStream() { DC_BEGIN_FN("~CFileStream"); _hFile = INVALID_HANDLE_VALUE; _pBuffer = NULL; _fOpenForRead = FALSE; _fOpenForWrite = FALSE; _fReadToEOF = FALSE; _fFileIsUnicode = FALSE; _fAtStartOfFile = TRUE; _pAnsiLineBuf = NULL; _cbAnsiBufSize = 0; DC_END_FN(); } CTscFileStream::~CTscFileStream() { DC_BEGIN_FN("~CFileStream"); Close(); if(_hFile != INVALID_HANDLE_VALUE) { CloseHandle(_hFile); _hFile = INVALID_HANDLE_VALUE; } if(_pBuffer) { LocalFree(_pBuffer); _pBuffer = NULL; } if(_pAnsiLineBuf) { LocalFree(_pAnsiLineBuf); _pAnsiLineBuf = NULL; } DC_END_FN(); } INT CTscFileStream::OpenForRead(LPTSTR szFileName) { DC_BEGIN_FN("OpenForRead"); INT err; err = Close(); if(err != ERR_SUCCESS) { return err; } //Alloc read buffers if(!_pBuffer) { _pBuffer = (PBYTE)LocalAlloc(LPTR, READ_BUF_SIZE); if(!_pBuffer) { return ERR_OUT_OF_MEM; } } if(!_pAnsiLineBuf) { _pAnsiLineBuf = (PBYTE)LocalAlloc(LPTR, LINEBUF_SIZE); if(!_pAnsiLineBuf) { return ERR_OUT_OF_MEM; } _cbAnsiBufSize = LINEBUF_SIZE; } memset(_pBuffer, 0, READ_BUF_SIZE); memset(_pAnsiLineBuf, 0, LINEBUF_SIZE); _hFile = CreateFile( szFileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_ALWAYS, //Creates if !exist FILE_ATTRIBUTE_NORMAL, NULL); if(INVALID_HANDLE_VALUE == _hFile) { TRC_ERR((TB, _T("CreateFile failed: %s - err:%x"), szFileName, GetLastError())); return ERR_CREATEFILE; } #ifdef OS_WINCE DWORD dwRes; dwRes = SetFilePointer( _hFile, 0, NULL, FILE_BEGIN); if (dwRes == (DWORD)0xffffffff) { DWORD dwErr = GetLastError(); TRC_ERR((TB, _T("CreateFile failed to reset: %s - err:%x"), szFileName, GetLastError())); return ERR_CREATEFILE; } #endif _curBytePtr = 0; _curBufSize = 0; _tcsncpy(_szFileName, szFileName, MAX_PATH-1); //Yes this is ok, the size is MAX_PATH+1 ;-) _szFileName[MAX_PATH] = 0; _fOpenForRead = TRUE; _fFileIsUnicode = FALSE; _fAtStartOfFile = TRUE; DC_END_FN(); return ERR_SUCCESS; } // // Opens the stream for writing // always nukes the existing file contents // INT CTscFileStream::OpenForWrite(LPTSTR szFileName, BOOL fWriteUnicode) { DC_BEGIN_FN("OpenForWrite"); INT err; DWORD dwAttributes = 0; err = Close(); if(err != ERR_SUCCESS) { return err; } if(_pAnsiLineBuf) { LocalFree(_pAnsiLineBuf); _pAnsiLineBuf = NULL; } _pAnsiLineBuf = (PBYTE)LocalAlloc(LPTR, LINEBUF_SIZE); if(!_pAnsiLineBuf) { return ERR_OUT_OF_MEM; } _cbAnsiBufSize = LINEBUF_SIZE; // // Preserve any existing attributes // dwAttributes = GetFileAttributes(szFileName); if (-1 == dwAttributes) { TRC_ERR((TB,_T("GetFileAttributes for %s failed 0x%x"), szFileName, GetLastError())); dwAttributes = FILE_ATTRIBUTE_NORMAL; } _hFile = CreateFile( szFileName, GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS, //Creates and reset dwAttributes, NULL); if(INVALID_HANDLE_VALUE == _hFile) { TRC_ERR((TB, _T("CreateFile failed: %s - err:%x"), szFileName, GetLastError())); return ERR_CREATEFILE; } _tcsncpy(_szFileName, szFileName, MAX_PATH-1); //Yes this is ok, the size is MAX_PATH+1 ;-) _szFileName[MAX_PATH] = 0; _fOpenForWrite = TRUE; _fFileIsUnicode = fWriteUnicode; _fAtStartOfFile = TRUE; DC_END_FN(); return ERR_SUCCESS; } INT CTscFileStream::Close() { DC_BEGIN_FN("Close"); if(_hFile != INVALID_HANDLE_VALUE) { CloseHandle(_hFile); _hFile = INVALID_HANDLE_VALUE; } _fOpenForRead = _fOpenForWrite = FALSE; _fReadToEOF = FALSE; _tcscpy(_szFileName, _T("")); //Don't free the read buffers //they'll be cached for subsequent use DC_END_FN(); return ERR_SUCCESS; } // // Read a line from the file and return it as UNICODE // // Read up to the next newline, or till cbLineSize/sizeof(WCHAR) or // untill the EOF. Whichever comes first. // // INT CTscFileStream::ReadNextLine(LPWSTR szLine, INT cbLineSize) { BOOL bRet = FALSE; INT cbBytesCopied = 0; INT cbOutputSize = 0; BOOL fDone = FALSE; PBYTE pOutBuf = NULL; //where to write the result BOOL fFirstIter = TRUE; DC_BEGIN_FN("ReadNextLine"); TRC_ASSERT(_hFile != INVALID_HANDLE_VALUE, (TB,_T("No file handle"))); TRC_ASSERT(_pBuffer, (TB,_T("NO buffer"))); if(_fOpenForRead && !_fReadToEOF && cbLineSize && szLine) { // //Read up to a line's worth (terminated by \n) //but stop short if szLine is too small // // //Check if we've got enough buffered bytes to read from //if not go ahead and read another buffer's worth // while(!fDone) { if(_curBytePtr >= _curBufSize) { //Read next buffer full DWORD cbRead = 0; bRet = ReadFile(_hFile, _pBuffer, READ_BUF_SIZE, &cbRead, NULL); if(!bRet && GetLastError() == ERROR_HANDLE_EOF) { //cancel error bRet = TRUE; _fReadToEOF = TRUE; } if(bRet) { if(cbRead) { _curBufSize = cbRead; _curBytePtr = 0; } else { _fReadToEOF = TRUE; if(cbBytesCopied) { //reached EOF but we've returned at least //some data return ERR_SUCCESS; } else { //EOF can't read any data return ERR_EOF; } } } else { TRC_NRM((TB,_T("ReadFile returned fail:%x"), GetLastError())); return ERR_FILEOP; } } TRC_ASSERT(_curBytePtr < READ_BUF_SIZE, (TB,_T("_curBytePtr %d exceeds buf size"), _curBytePtr)); // // If we're at the start of the file, // if(_fAtStartOfFile) { //CAREFULL this could update the current byte ptr CheckFirstBufMarkedUnicode(); _fAtStartOfFile = FALSE; } if(fFirstIter) { if(_fFileIsUnicode) { //file is unicode output directly into user buffer pOutBuf = (PBYTE)szLine; //leave a space for a trailing WCHAR null cbOutputSize = cbLineSize - sizeof(WCHAR); } else { //read half as many chars as there are bytes in the output //buf because conversion doubles. //leave a space for a trailing WCHAR null cbOutputSize = cbLineSize/sizeof(WCHAR) - 2; //Alloc ANSI buffer for this line //if cached buffer is too small if(cbOutputSize + 2 > _cbAnsiBufSize) { if ( _pAnsiLineBuf) { LocalFree( _pAnsiLineBuf); _pAnsiLineBuf = NULL; } _pAnsiLineBuf = (PBYTE)LocalAlloc(LPTR, cbOutputSize + 2); if(!_pAnsiLineBuf) { return ERR_OUT_OF_MEM; } _cbAnsiBufSize = cbOutputSize + 2; } //file is ANSI output into temporary buffer for conversion pOutBuf = _pAnsiLineBuf; } fFirstIter = FALSE; } PBYTE pStartByte = (PBYTE)_pBuffer + _curBytePtr; PBYTE pReadByte = pStartByte; PBYTE pNewLine = NULL; //Find newline. Don't bother scanning further than we can //write in the input buffer int maxreaddist = min(_curBufSize-_curBytePtr, cbOutputSize-cbBytesCopied); PBYTE pEndByte = (PBYTE)pStartByte + maxreaddist; for(;pReadByte= sizeof(WCHAR)) { LPWSTR pwsz = (LPWSTR)_pBuffer; if(UNICODE_BOM == *pwsz) { TRC_NRM((TB,_T("File is UNICODE"))); _fFileIsUnicode = TRUE; _curBytePtr += sizeof(WCHAR); } else { TRC_NRM((TB,_T("File is ANSI"))); _fFileIsUnicode = FALSE; } } else { //File to small (less than 2 bytes) //can't be unicode _fFileIsUnicode = FALSE; } DC_END_FN(); } // // Write string szLine to the file // converting to ANSI if the file is not a unicode file // also writeout the UNICODE BOM at the start of the // the file // INT CTscFileStream::Write(LPWSTR szLine) { DC_BEGIN_FN("WriteNext"); BOOL bRet = FALSE; DWORD cbWrite = 0; PBYTE pDataOut = NULL; DWORD dwWritten; if(_fOpenForWrite && szLine) { TRC_ASSERT(_hFile != INVALID_HANDLE_VALUE, (TB,_T("No file handle"))); if(_fFileIsUnicode) { if(_fAtStartOfFile) { //Write the BOM WCHAR wcBOM = UNICODE_BOM; bRet = WriteFile( _hFile, &wcBOM, sizeof(wcBOM), &dwWritten, NULL); if(!bRet || dwWritten != sizeof(wcBOM)) { TRC_NRM((TB,_T("WriteFile returned fail:%x"), GetLastError())); return ERR_FILEOP; } _fAtStartOfFile = FALSE; } //Write UNICODE data out directly pDataOut = (PBYTE)szLine; cbWrite = wcslen(szLine) * sizeof(WCHAR); } else { //Convert UNICODE data to ANSI //before writing it out TRC_ASSERT(_pAnsiLineBuf && _cbAnsiBufSize, (TB,_T("ANSI conversion buffer should be allocated"))); INT ret = WideCharToMultiByte( CP_ACP, WC_COMPOSITECHECK | WC_DEFAULTCHAR, szLine, -1, (LPSTR)_pAnsiLineBuf, _cbAnsiBufSize, NULL, // system default character. NULL); // no notification of conversion failure. if(ret) { pDataOut = _pAnsiLineBuf; cbWrite = ret - 1; //don't write out the NULL } else { TRC_ERR((TB,_T("MultiByteToWideChar failed: %x"), GetLastError())); DWORD dwErr = GetLastError(); if(ERROR_INSUFFICIENT_BUFFER == dwErr) { return ERR_BUFTOOSMALL; } else { return ERR_UNKNOWN; } } } bRet = WriteFile( _hFile, pDataOut, cbWrite, &dwWritten, NULL); if(bRet && dwWritten == cbWrite) { return ERR_SUCCESS; } else { TRC_NRM((TB,_T("WriteFile returned fail:%x"), GetLastError())); return ERR_FILEOP; } } else { if(!_fOpenForWrite) { return ERR_NOTOPENFORWRITE; } else { return ERR_UNKNOWN; } } DC_END_FN(); } // // Remap a \r\n pair from the end of the line // to a \n // void CTscFileStream::EatCRLF(LPWSTR szLine, INT nChars) { if(szLine && nChars >= 2) { if(szLine[nChars-1] == _T('\n') && szLine[nChars-2] == _T('\r')) { szLine[nChars-2] = _T('\n'); //this adds a double NULL to the end of the string szLine[nChars-1] = 0; } } }