|
|
/////////////////////////////////////////////////////////////////////////////////
//
// fusion\xmlparser\BufferedStream.cxx
//
/////////////////////////////////////////////////////////////////////////////////
#include "stdinc.h"
#include "core.hxx"
#pragma hdrstop
#include <memory.h>
//#include <shlwapip.h>
#include <ole2.h>
#include <xmlparser.h>
#include "bufferedstream.hxx"
#include "xmlstream.hxx"
#include "encodingstream.hxx"
#include "xmlhelper.hxx"
const long BLOCK_SIZE = 4096; // no point remembering a line buffer longer than this because client
// probably can't deal with that anyway.
const long MAX_LINE_BUFFER = 512;
BufferedStream::BufferedStream(XMLStream *pXMLStream) { _pchBuffer = NULL; _lSize = 0; _pXMLStream = pXMLStream; init(); } /////////////////////////////////////////////////////////////////////////////
void BufferedStream::init() { _lCurrent = _lUsed = _lMark = 0; _lLine = 1; // lines start at 1.
_lMarkedline = 1; _lLinepos = 0; _lMarkedlinepos = 0; _chLast = 0; _lStartAt = 0; _fEof = false; _lLockedPos = -1; _lLastWhiteSpace = -1; _lLockCount = 0; _fNotified = false; _fFrozen = false; _pPendingEncoding = NULL; } /////////////////////////////////////////////////////////////////////////////
BufferedStream::~BufferedStream() { delete [] _pchBuffer; _pStmInput = NULL; delete _pPendingEncoding; _pPendingEncoding = NULL;
} /////////////////////////////////////////////////////////////////////////////
HRESULT BufferedStream::Reset() { init();
delete[] _pchBuffer; _pchBuffer = NULL; _lSize = 0; _pStmInput = NULL; _lLockedPos = -1; _lLockCount = 0; _fFrozen = false; delete _pPendingEncoding; _pPendingEncoding = NULL; return S_OK; } /////////////////////////////////////////////////////////////////////////////
HRESULT BufferedStream::Load( /* [unique][in] */ EncodingStream __RPC_FAR *pStm) { if (pStm != NULL) { init(); _pStmInput = pStm; return S_OK; } else { _pStmInput = NULL; } return S_OK; } /////////////////////////////////////////////////////////////////////////////
HRESULT BufferedStream::AppendData( const BYTE* in, ULONG length, BOOL lastBuffer) { HRESULT hr;
if (_fEof) { init(); }
if (!_pStmInput) { EncodingStream* stream = (EncodingStream*)EncodingStream::newEncodingStream(NULL); if (stream == NULL) return E_OUTOFMEMORY; _pStmInput = stream; stream->Release(); // Smart pointer is holding a ref
}
checkhr2(_pStmInput->AppendData(in, length, lastBuffer));
return S_OK;
} /////////////////////////////////////////////////////////////////////////////
HRESULT BufferedStream::nextChar( /* [out] */ WCHAR* ch, /* [out] */ bool* fEOF) { HRESULT hr;
if (_lCurrent >= _lUsed) { if (_fEof) { *fEOF = true; return S_OK; } if (! _fNotified && _lUsed > 0) { _fNotified = true; // notify data available BEFORE blowing
// NOTE: this code approximates what prepareForInput does
// in order to accurately predict when the buffer is about to
// be re-allocated.
long shift = _fFrozen ? 0 : getNewStart(); // is data about to shift?
long used = _lUsed - shift; // this is how much is really used after shift
if (_lSize - used < BLOCK_SIZE + 1) // +1 for null termination.
{ // we will reallocate !! So return a special
// return code
hr = E_DATA_REALLOCATE; } else hr = E_DATA_AVAILABLE; // away the old data so parser can save it if need be.
checkhr2( _pXMLStream->ErrorCallback(hr) ); }
checkhr2( fillBuffer() ); if (_fEof) { *fEOF = true; return S_OK; } _fNotified = false; }
WCHAR result = _pchBuffer[_lCurrent++];
switch (result) { case 0xa: case 0xd: if (result == 0xd || _chLast != 0xd) _lLine++; _lLinepos = _lCurrent; _chLast = result; _lLastWhiteSpace = _lCurrent; break; case 0x20: case 0x9: _lLastWhiteSpace = _lCurrent; break; case 0xfffe: case 0xffff: //case 0xfeff:
::FusionpDbgPrintEx( FUSION_DBG_LEVEL_ERROR, "SXS.DLL: XML Parser found either 0xfffe or 0xffff\n");
return XML_E_BADCHARDATA; }
*ch = result; return S_OK; } /////////////////////////////////////////////////////////////////////////////
HRESULT BufferedStream::scanPCData( /* [out] */ WCHAR* ch, /* [out] */ bool* fWhitespace) { WCHAR result; bool foundNonWhiteSpace = false;
if (! isWhiteSpace(*ch)) foundNonWhiteSpace = true;
// Then skip the data until we find '<', '>' or '&'
while (_lCurrent < _lUsed) { result = _pchBuffer[_lCurrent++];
switch (result) { case ']': // xiaoyu : the specified chars can be changed for our own purpose
case '>': case '<': case '&': case '\'': // so this can be used to scan attribute values also.
case '"': // so this can be used to scan attribute values also.
*ch = result; if (foundNonWhiteSpace) *fWhitespace = false; return S_OK; break;
case 0xa: case 0xd: if (result == 0xd || _chLast != 0xd) _lLine++; _lLinepos = _lCurrent; _chLast = result; _lLastWhiteSpace = _lCurrent; break; case 0x20: case 0x9: _lLastWhiteSpace = _lCurrent; break; case 0xfffe: case 0xffff: ::FusionpDbgPrintEx( FUSION_DBG_LEVEL_ERROR, "SXS.DLL: XML Parser found either 0xfffe or 0xffff\n");
return XML_E_BADCHARDATA; default: foundNonWhiteSpace = true; break; } }
// And just return E_PENDING if we run out of buffer.
if (foundNonWhiteSpace) *fWhitespace = false; return E_PENDING; } /////////////////////////////////////////////////////////////////////////////
long BufferedStream::getLine() { return _lMarkedline; } /////////////////////////////////////////////////////////////////////////////
long BufferedStream::getLinePos() { // _lMarkedlinepos is the position of the beginning of the marked line
// relative to the beginning of the buffer, and _lMark is the
// position of the marked token relative to the beginning of the
// buffer, So the position of the marked token relative to the
// current line is the difference between the two.
// We also return a 1-based position so that the start of the
// line = column 1. This is consistent with the line numbers
// which are also 1-based.
return (_lMarkedlinepos > _lMark+1) ? 0 : _lMark+1-_lMarkedlinepos; } /////////////////////////////////////////////////////////////////////////////
long BufferedStream::getInputPos() { return _lStartAt+_lMark; } /////////////////////////////////////////////////////////////////////////////
WCHAR* BufferedStream::getLineBuf(ULONG* len, ULONG* startpos) { *len = 0; if (_pchBuffer == NULL) return NULL;
WCHAR* result = &_pchBuffer[_lMarkedlinepos];
ULONG i = 0; // internal _pchBuffer is guarenteed to be null terminated.
WCHAR ch = result[i]; while (ch != 0 && ch != L'\n' && ch != L'\r') { i++; ch = result[i]; } *len = i; // also return the line position relative to start of
// returned buffer.
*startpos = (_lMarkedlinepos > _lMark+1) ? 0 : _lMark+1-_lMarkedlinepos; return result; } /////////////////////////////////////////////////////////////////////////////
HRESULT BufferedStream::switchEncoding(const WCHAR * charset, ULONG len) { HRESULT hr = S_OK;
if (!_pStmInput) { hr = E_FAIL; goto CleanUp; } else { _pPendingEncoding = Encoding::newEncoding(charset, len); if (_pPendingEncoding == NULL) { hr = E_OUTOFMEMORY; goto CleanUp; }
if (! _fFrozen) { hr = doSwitchEncoding(); } } CleanUp: return hr; } /////////////////////////////////////////////////////////////////////////////
HRESULT BufferedStream::doSwitchEncoding() { Encoding* encoding = _pPendingEncoding; _pPendingEncoding = NULL;
HRESULT hr = _pStmInput->switchEncodingAt(encoding, _lStartAt + _lCurrent); if (hr == S_FALSE) { // need to re-read to force re-decode into new encoding.
// In other words we have to forget that we read past this
// position already so that the next call to nextChar
// will call FillBuffer again.
// (+1 so that nextChar works correctly).
_lUsed = _lStartAt + _lCurrent; hr = S_OK; } else if (FAILED(hr)) { hr = (hr == E_INVALIDARG) ? XML_E_INVALIDENCODING : XML_E_INVALIDSWITCH; } return hr; } /////////////////////////////////////////////////////////////////////////////
// Returns a pointer to a contiguous block of text accumulated
// from the last time Mark() was called up to but not including
// the last character read. (This allows a parser to have a
// lookahead character that is not included in the token).
HRESULT BufferedStream::getToken(const WCHAR**p, long* len) { if (_pchBuffer == NULL) return E_FAIL;
if (_lCurrent != _lCurrent2) { // need to fix up buffer since it is no
// out of sync since we've been compressing
// whitespace.
} *p = &_pchBuffer[_lMark]; *len = getTokenLength(); return S_OK; } /////////////////////////////////////////////////////////////////////////////
void BufferedStream::Lock() { // We allow nested locking - where the outer lock wins - unlock only
// really unlocks when the outer lock is unlocked.
if (++_lLockCount == 1) { _lLockedPos = _lMark; _lLockedLine = _lMarkedline; _lLockedLinePos = _lMarkedlinepos; } } /////////////////////////////////////////////////////////////////////////////
void BufferedStream::UnLock() { if (--_lLockCount == 0) { _lMark = _lLockedPos; _lMarkedline = _lLockedLine; _lMarkedlinepos = _lLockedLinePos; _lLockedPos = -1; } } /////////////////////////////////////////////////////////////////////////////
HRESULT BufferedStream::Freeze() { HRESULT hr; if (_lCurrent > _lMidPoint) { // Since we freeze the buffer a lot now (any time we're inside
// a tag) we need to shift the bytes down in the buffer more
// frequently in order to guarentee we have space in the buffer
// when we need it. Otherwize the buffer would tend to just
// keep growing and growing. So we shift the buffer when we
// go past the midpoint.
checkhr2( prepareForInput() ); } _fFrozen = true; return S_OK; } /////////////////////////////////////////////////////////////////////////////
HRESULT BufferedStream::UnFreeze() { _fFrozen = false; if (_pPendingEncoding) { return doSwitchEncoding(); } return S_OK; } /////////////////////////////////////////////////////////////////////////////
HRESULT BufferedStream::fillBuffer() { HRESULT hr; checkhr2( prepareForInput() );
if (_pStmInput) { long space = _lSize - _lUsed - 1; // reserve 1 for NULL termination
// get more bytes.
ULONG read = 0; HRESULT rc = _pStmInput->Read(&_pchBuffer[_lUsed], space*sizeof(WCHAR), &read);
_lUsed += read/sizeof(WCHAR); // stream must return unicode characters.
_pchBuffer[_lUsed] = 0; // NULL terminate the _pchBuffer.
if (FAILED(rc)) return rc;
if (read == 0) { _fEof = true; // increment _lCurrent, so that getToken returns
// last character in file.
_lCurrent++; _lCurrent2++; } } else { // SetInput or AppendData hasn't been called yet.
return E_PENDING; }
return S_OK; } /////////////////////////////////////////////////////////////////////////////
HRESULT BufferedStream::prepareForInput() { // move the currently used section of the _pchBuffer
// (buf[mark] to buf[used]) down to the beginning of
// the _pchBuffer.
long newstart = 0;
// BUGBUG - if this code is changed BufferedStream::nextChar has to
// be updated also so that they stay in sync, otherwise we might
// re-allocated the buffer without generating an E_DATA_REALLOCATE
// notification - which would be very bad (causes GPF's in the parser).
if (! _fFrozen) // can't shift bits if the buffer is frozen.
{ newstart = getNewStart();
if (newstart > 0) { WCHAR* src = &_pchBuffer[newstart]; _lUsed -= newstart; _lStartAt += newstart; ::memmove(_pchBuffer,src,_lUsed*sizeof(WCHAR)); _lCurrent -= newstart; _lCurrent2 -= newstart; _lLastWhiteSpace -= newstart; _lLinepos = (_lLinepos > newstart) ? _lLinepos-newstart : 0; _lMarkedlinepos = (_lLinepos > newstart) ? _lMarkedlinepos-newstart : 0; _lMark -= newstart; _lLockedLinePos = (_lLockedLinePos > newstart) ? _lLockedLinePos-newstart : 0; _lLockedPos -= newstart; } }
// make sure we have a reasonable amount of space
// left in the _pchBuffer.
long space = _lSize - _lUsed; if (space > 0) space--; // reserve 1 for NULL termination
if (_pchBuffer == NULL || space < BLOCK_SIZE) { // double the size of the buffer.
long newsize = (_lSize == 0) ? BLOCK_SIZE : (_lSize*2);
WCHAR* newbuf = NEW (WCHAR[newsize]); if (newbuf == NULL) { // try more conservative allocation.
newsize = _lSize + BLOCK_SIZE; newbuf = NEW (WCHAR[newsize]); } if (newbuf == NULL && space == 0) return E_OUTOFMEMORY;
if (newbuf != NULL) { if (_pchBuffer != NULL) { // copy old bytes to new _pchBuffer.
::memcpy(newbuf,_pchBuffer,_lUsed*sizeof(WCHAR)); delete [] _pchBuffer; } newbuf[_lUsed] = 0; // make sure it's null terminated.
_pchBuffer = newbuf; _lSize = newsize; _lMidPoint = newsize / 2;
} }
return S_OK; } /////////////////////////////////////////////////////////////////////////////
long BufferedStream::getNewStart() { long newstart = 0;
// Unless the buffer is frozen, in which case we just reallocate and
// do no shifting of data.
if (_lLockedPos > 0) { // and try and preserve the beginning of the marked line if we can
if (_lLockedLinePos < _lLockedPos && _lLockedPos - _lLockedLinePos < MAX_LINE_BUFFER) { newstart = _lLockedLinePos; } } else if (_lMark > 0) { // and try and preserve the beginning of the marked line if we can
newstart = _lMark; if (_lMarkedlinepos < _lMark && _lMark - _lMarkedlinepos < MAX_LINE_BUFFER) // watch out for long lines
{ newstart = _lMarkedlinepos; } } return newstart; }
|