|
|
//
// fsteam.cpp
// Implements a file stream
// for reading text files line by line.
// the standard C streams, only support
// unicode as binary streams which are a pain to work
// with).
//
// This class reads/writes both ANSI and UNICODE files
// and converts to/from UNICODE internally
//
// Does not do any CR/LF translations either on input
// or output.
//
// Copyright(C) Microsoft Corporation 2000
// Author: Nadim Abdo (nadima)
//
#include "stdafx.h"
#define TRC_GROUP TRC_GROUP_UI
#define TRC_FILE "fstream.cpp"
#include <atrcapi.h>
#include "fstream.h"
#ifndef UNICODE
//
// Adding ansi support is just a matter of converting
// from UNICODE file to ANSI internal if the file
// has a UNICODE BOM
//
#error THIS MODULE ASSUMES BEING COMPILED UNICODE, ADD ANSI IF NEEDED
#endif
CTscFileStream::CTscFileStream() { DC_BEGIN_FN("~CFileStream"); _hFile = INVALID_HANDLE_VALUE; _pBuffer = NULL; _fOpenForRead = FALSE; _fOpenForWrite = FALSE; _fReadToEOF = FALSE; _fFileIsUnicode = FALSE; _fAtStartOfFile = TRUE; _pAnsiLineBuf = NULL; _cbAnsiBufSize = 0; DC_END_FN(); }
CTscFileStream::~CTscFileStream() { DC_BEGIN_FN("~CFileStream"); Close();
if(_hFile != INVALID_HANDLE_VALUE) { CloseHandle(_hFile); _hFile = INVALID_HANDLE_VALUE; } if(_pBuffer) { LocalFree(_pBuffer); _pBuffer = NULL; } if(_pAnsiLineBuf) { LocalFree(_pAnsiLineBuf); _pAnsiLineBuf = NULL; } DC_END_FN(); }
INT CTscFileStream::OpenForRead(LPTSTR szFileName) { DC_BEGIN_FN("OpenForRead"); INT err;
err = Close(); if(err != ERR_SUCCESS) { return err; }
//Alloc read buffers
if(!_pBuffer) { _pBuffer = (PBYTE)LocalAlloc(LPTR, READ_BUF_SIZE); if(!_pBuffer) { return ERR_OUT_OF_MEM; } } if(!_pAnsiLineBuf) { _pAnsiLineBuf = (PBYTE)LocalAlloc(LPTR, LINEBUF_SIZE); if(!_pAnsiLineBuf) { return ERR_OUT_OF_MEM; } _cbAnsiBufSize = LINEBUF_SIZE; } memset(_pBuffer, 0, READ_BUF_SIZE); memset(_pAnsiLineBuf, 0, LINEBUF_SIZE);
_hFile = CreateFile( szFileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_ALWAYS, //Creates if !exist
FILE_ATTRIBUTE_NORMAL, NULL);
if(INVALID_HANDLE_VALUE == _hFile) { TRC_ERR((TB, _T("CreateFile failed: %s - err:%x"), szFileName, GetLastError())); return ERR_CREATEFILE; }
#ifdef OS_WINCE
DWORD dwRes; dwRes = SetFilePointer( _hFile, 0, NULL, FILE_BEGIN); if (dwRes == (DWORD)0xffffffff) { DWORD dwErr = GetLastError(); TRC_ERR((TB, _T("CreateFile failed to reset: %s - err:%x"), szFileName, GetLastError())); return ERR_CREATEFILE; }
#endif
_curBytePtr = 0; _curBufSize = 0; _tcsncpy(_szFileName, szFileName, MAX_PATH-1); //Yes this is ok, the size is MAX_PATH+1 ;-)
_szFileName[MAX_PATH] = 0; _fOpenForRead = TRUE; _fFileIsUnicode = FALSE; _fAtStartOfFile = TRUE;
DC_END_FN(); return ERR_SUCCESS; }
//
// Opens the stream for writing
// always nukes the existing file contents
//
INT CTscFileStream::OpenForWrite(LPTSTR szFileName, BOOL fWriteUnicode) { DC_BEGIN_FN("OpenForWrite");
INT err; DWORD dwAttributes = 0; err = Close(); if(err != ERR_SUCCESS) { return err; }
if(_pAnsiLineBuf) { LocalFree(_pAnsiLineBuf); _pAnsiLineBuf = NULL; } _pAnsiLineBuf = (PBYTE)LocalAlloc(LPTR, LINEBUF_SIZE); if(!_pAnsiLineBuf) { return ERR_OUT_OF_MEM; } _cbAnsiBufSize = LINEBUF_SIZE;
//
// Preserve any existing attributes
//
dwAttributes = GetFileAttributes(szFileName); if (-1 == dwAttributes) { TRC_ERR((TB,_T("GetFileAttributes for %s failed 0x%x"), szFileName, GetLastError())); dwAttributes = FILE_ATTRIBUTE_NORMAL; }
_hFile = CreateFile( szFileName, GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS, //Creates and reset
dwAttributes, NULL);
if(INVALID_HANDLE_VALUE == _hFile) { TRC_ERR((TB, _T("CreateFile failed: %s - err:%x"), szFileName, GetLastError())); return ERR_CREATEFILE; }
_tcsncpy(_szFileName, szFileName, MAX_PATH-1); //Yes this is ok, the size is MAX_PATH+1 ;-)
_szFileName[MAX_PATH] = 0; _fOpenForWrite = TRUE; _fFileIsUnicode = fWriteUnicode; _fAtStartOfFile = TRUE;
DC_END_FN(); return ERR_SUCCESS; }
INT CTscFileStream::Close() { DC_BEGIN_FN("Close"); if(_hFile != INVALID_HANDLE_VALUE) { CloseHandle(_hFile); _hFile = INVALID_HANDLE_VALUE; } _fOpenForRead = _fOpenForWrite = FALSE; _fReadToEOF = FALSE; _tcscpy(_szFileName, _T("")); //Don't free the read buffers
//they'll be cached for subsequent use
DC_END_FN(); return ERR_SUCCESS; }
//
// Read a line from the file and return it as UNICODE
//
// Read up to the next newline, or till cbLineSize/sizeof(WCHAR) or
// untill the EOF. Whichever comes first.
//
//
INT CTscFileStream::ReadNextLine(LPWSTR szLine, INT cbLineSize) { BOOL bRet = FALSE; INT cbBytesCopied = 0; INT cbOutputSize = 0; BOOL fDone = FALSE; PBYTE pOutBuf = NULL; //where to write the result
BOOL fFirstIter = TRUE; DC_BEGIN_FN("ReadNextLine");
TRC_ASSERT(_hFile != INVALID_HANDLE_VALUE, (TB,_T("No file handle"))); TRC_ASSERT(_pBuffer, (TB,_T("NO buffer")));
if(_fOpenForRead && !_fReadToEOF && cbLineSize && szLine) { //
//Read up to a line's worth (terminated by \n)
//but stop short if szLine is too small
//
//
//Check if we've got enough buffered bytes to read from
//if not go ahead and read another buffer's worth
//
while(!fDone) { if(_curBytePtr >= _curBufSize) { //Read next buffer full
DWORD cbRead = 0; bRet = ReadFile(_hFile, _pBuffer, READ_BUF_SIZE, &cbRead, NULL); if(!bRet && GetLastError() == ERROR_HANDLE_EOF) { //cancel error
bRet = TRUE; _fReadToEOF = TRUE; } if(bRet) { if(cbRead) { _curBufSize = cbRead; _curBytePtr = 0; } else { _fReadToEOF = TRUE; if(cbBytesCopied) { //reached EOF but we've returned at least
//some data
return ERR_SUCCESS; } else { //EOF can't read any data
return ERR_EOF; } } } else { TRC_NRM((TB,_T("ReadFile returned fail:%x"), GetLastError())); return ERR_FILEOP; } } TRC_ASSERT(_curBytePtr < READ_BUF_SIZE, (TB,_T("_curBytePtr %d exceeds buf size"), _curBytePtr)); //
// If we're at the start of the file,
//
if(_fAtStartOfFile) { //CAREFULL this could update the current byte ptr
CheckFirstBufMarkedUnicode(); _fAtStartOfFile = FALSE; }
if(fFirstIter) { if(_fFileIsUnicode) { //file is unicode output directly into user buffer
pOutBuf = (PBYTE)szLine; //leave a space for a trailing WCHAR null
cbOutputSize = cbLineSize - sizeof(WCHAR); } else { //read half as many chars as there are bytes in the output
//buf because conversion doubles.
//leave a space for a trailing WCHAR null
cbOutputSize = cbLineSize/sizeof(WCHAR) - 2; //Alloc ANSI buffer for this line
//if cached buffer is too small
if(cbOutputSize + 2 > _cbAnsiBufSize) { if ( _pAnsiLineBuf) { LocalFree( _pAnsiLineBuf); _pAnsiLineBuf = NULL; } _pAnsiLineBuf = (PBYTE)LocalAlloc(LPTR, cbOutputSize + 2); if(!_pAnsiLineBuf) { return ERR_OUT_OF_MEM; } _cbAnsiBufSize = cbOutputSize + 2; } //file is ANSI output into temporary buffer for conversion
pOutBuf = _pAnsiLineBuf; } fFirstIter = FALSE; }
PBYTE pStartByte = (PBYTE)_pBuffer + _curBytePtr; PBYTE pReadByte = pStartByte; PBYTE pNewLine = NULL; //Find newline. Don't bother scanning further than we can
//write in the input buffer
int maxreaddist = min(_curBufSize-_curBytePtr, cbOutputSize-cbBytesCopied); PBYTE pEndByte = (PBYTE)pStartByte + maxreaddist; for(;pReadByte<pEndByte;pReadByte++) { if(*pReadByte == '\n') { if(_fFileIsUnicode) { //
// Check if the previous byte was a zero
// if so we've hit the '0x0 0xa' byte pair
// for a unicode '\n'
//
if(pReadByte != pStartByte && *(pReadByte - 1) == 0) { pNewLine = pReadByte; break; } } else { pNewLine = pReadByte; break; } } } if(pNewLine) { int cbBytesToCopy = (pNewLine - pStartByte) + (_fFileIsUnicode ? sizeof(WCHAR) : sizeof(CHAR)); if(cbBytesToCopy <= (cbOutputSize-cbBytesCopied)) { memcpy( pOutBuf + cbBytesCopied, pStartByte, cbBytesToCopy); _curBytePtr += cbBytesToCopy; cbBytesCopied += cbBytesToCopy; fDone = TRUE; } } else { //Didn't find a newline
memcpy( pOutBuf + cbBytesCopied, pStartByte, maxreaddist); //we're done if we filled up the output
_curBytePtr += maxreaddist; cbBytesCopied += maxreaddist; if(cbBytesCopied == cbOutputSize) { fDone = TRUE; } } } // iterate over file buffer chunks
//Ensure trailing null
pOutBuf[cbBytesCopied] = 0; if(_fFileIsUnicode) { pOutBuf[cbBytesCopied+1] = 0; }
//Done reading line
if(_fFileIsUnicode) { EatCRLF( (LPWSTR)szLine, cbBytesCopied/sizeof(WCHAR)); return ERR_SUCCESS; } else { //The file is ANSI. Conv to UNICODE,
//first copy the contents out of the output
//Now convert to UNICODE
int ret = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, (LPCSTR)_pAnsiLineBuf, -1, szLine, cbLineSize/sizeof(WCHAR)); if(ret) { EatCRLF( (LPWSTR)szLine, ret - 1); return ERR_SUCCESS; } else { TRC_ERR((TB,_T("MultiByteToWideChar failed: %x"), GetLastError())); DWORD dwErr = GetLastError(); if(ERROR_INSUFFICIENT_BUFFER == dwErr) { return ERR_BUFTOOSMALL; } else { return ERR_UNKNOWN; } } } } else { //error path
if(_fReadToEOF) { return ERR_EOF; } if(!_fOpenForRead) { return ERR_NOTOPENFORREAD; } else if (!_pBuffer) { return ERR_OUT_OF_MEM; } else { return ERR_UNKNOWN; } }
DC_END_FN(); }
// check for the UNICODE BOM and eat it
void CTscFileStream::CheckFirstBufMarkedUnicode() { DC_BEGIN_FN("CheckFirstBufMarkedUnicode"); TRC_ASSERT(_pBuffer, (TB,_T("NO buffer"))); if(_curBufSize >= sizeof(WCHAR)) { LPWSTR pwsz = (LPWSTR)_pBuffer; if(UNICODE_BOM == *pwsz) { TRC_NRM((TB,_T("File is UNICODE"))); _fFileIsUnicode = TRUE; _curBytePtr += sizeof(WCHAR); } else { TRC_NRM((TB,_T("File is ANSI"))); _fFileIsUnicode = FALSE; } } else { //File to small (less than 2 bytes)
//can't be unicode
_fFileIsUnicode = FALSE; } DC_END_FN(); }
//
// Write string szLine to the file
// converting to ANSI if the file is not a unicode file
// also writeout the UNICODE BOM at the start of the
// the file
//
INT CTscFileStream::Write(LPWSTR szLine) { DC_BEGIN_FN("WriteNext"); BOOL bRet = FALSE; DWORD cbWrite = 0; PBYTE pDataOut = NULL; DWORD dwWritten;
if(_fOpenForWrite && szLine) { TRC_ASSERT(_hFile != INVALID_HANDLE_VALUE, (TB,_T("No file handle"))); if(_fFileIsUnicode) { if(_fAtStartOfFile) { //Write the BOM
WCHAR wcBOM = UNICODE_BOM; bRet = WriteFile( _hFile, &wcBOM, sizeof(wcBOM), &dwWritten, NULL); if(!bRet || dwWritten != sizeof(wcBOM)) { TRC_NRM((TB,_T("WriteFile returned fail:%x"), GetLastError())); return ERR_FILEOP; } _fAtStartOfFile = FALSE; } //Write UNICODE data out directly
pDataOut = (PBYTE)szLine; cbWrite = wcslen(szLine) * sizeof(WCHAR); } else { //Convert UNICODE data to ANSI
//before writing it out
TRC_ASSERT(_pAnsiLineBuf && _cbAnsiBufSize, (TB,_T("ANSI conversion buffer should be allocated")));
INT ret = WideCharToMultiByte( CP_ACP, WC_COMPOSITECHECK | WC_DEFAULTCHAR, szLine, -1, (LPSTR)_pAnsiLineBuf, _cbAnsiBufSize, NULL, // system default character.
NULL); // no notification of conversion failure.
if(ret) { pDataOut = _pAnsiLineBuf; cbWrite = ret - 1; //don't write out the NULL
} else { TRC_ERR((TB,_T("MultiByteToWideChar failed: %x"), GetLastError())); DWORD dwErr = GetLastError(); if(ERROR_INSUFFICIENT_BUFFER == dwErr) { return ERR_BUFTOOSMALL; } else { return ERR_UNKNOWN; } } }
bRet = WriteFile( _hFile, pDataOut, cbWrite, &dwWritten, NULL); if(bRet && dwWritten == cbWrite) { return ERR_SUCCESS; } else { TRC_NRM((TB,_T("WriteFile returned fail:%x"), GetLastError())); return ERR_FILEOP; } } else { if(!_fOpenForWrite) { return ERR_NOTOPENFORWRITE; } else { return ERR_UNKNOWN; } }
DC_END_FN(); }
//
// Remap a \r\n pair from the end of the line
// to a \n
//
void CTscFileStream::EatCRLF(LPWSTR szLine, INT nChars) { if(szLine && nChars >= 2) { if(szLine[nChars-1] == _T('\n') && szLine[nChars-2] == _T('\r')) { szLine[nChars-2] = _T('\n'); //this adds a double NULL to the end of the string
szLine[nChars-1] = 0; } } }
|