windows-xp/Source/XPSP1/NT/base/win32/fusion/xmlparser/bufferedstream.cxx


								/////////////////////////////////////////////////////////////////////////////////

								//

								// fusion\xmlparser\BufferedStream.cxx

								//

								/////////////////////////////////////////////////////////////////////////////////


								#include "stdinc.h"

								#include "core.hxx"

								#pragma hdrstop


								#include <memory.h>

								//#include <shlwapip.h>

								#include <ole2.h>

								#include <xmlparser.h>


								#include "bufferedstream.hxx"

								#include "xmlstream.hxx"

								#include "encodingstream.hxx"

								#include "xmlhelper.hxx"


								const long BLOCK_SIZE = 4096;

								// no point remembering a line buffer longer than this because client

								// probably can't deal with that anyway.

								const long MAX_LINE_BUFFER = 512;


								BufferedStream::BufferedStream(XMLStream *pXMLStream)

								{

								    _pchBuffer = NULL;

								    _lSize = 0;

								    _pXMLStream = pXMLStream;

								    init();

								}

								/////////////////////////////////////////////////////////////////////////////

								void BufferedStream::init()

								{

								    _lCurrent = _lUsed = _lMark = 0;

								    _lLine			= 1; // lines start at 1.

								    _lMarkedline	= 1;

								    _lLinepos		= 0;

								    _lMarkedlinepos = 0;

								    _chLast		= 0;

								    _lStartAt		= 0;

								    _fEof			= false;

								    _lLockedPos	= -1;

								    _lLastWhiteSpace = -1;

								    _lLockCount	= 0;

								    _fNotified		= false;

								    _fFrozen		= false;

									_pPendingEncoding = NULL;

								}

								/////////////////////////////////////////////////////////////////////////////

								BufferedStream::~BufferedStream()

								{

								    delete [] _pchBuffer;

								    _pStmInput = NULL;

									delete _pPendingEncoding;

								    _pPendingEncoding = NULL;


								}

								/////////////////////////////////////////////////////////////////////////////

								HRESULT BufferedStream::Reset()

								{

								    init();


								    delete[] _pchBuffer;

								    _pchBuffer = NULL;

								    _lSize = 0;

								    _pStmInput = NULL;

								    _lLockedPos = -1;

								    _lLockCount = 0;

								    _fFrozen = false;

								    delete _pPendingEncoding;

								    _pPendingEncoding = NULL;

								    return S_OK;

								}

								/////////////////////////////////////////////////////////////////////////////

								HRESULT

								BufferedStream::Load(

								        /* [unique][in] */ EncodingStream __RPC_FAR *pStm)

								{

								    if (pStm != NULL)

								    {

								        init();

								        _pStmInput = pStm;

								        return S_OK;

								    }

								    else

								    {

								        _pStmInput = NULL;

								    }

								    return S_OK;

								}

								/////////////////////////////////////////////////////////////////////////////

								HRESULT

								BufferedStream::AppendData( const BYTE* in, ULONG length, BOOL lastBuffer)

								{

								    HRESULT hr;


								    if (_fEof)

								    {

								        init();

								    }


								    if (!_pStmInput)

								    {

								        EncodingStream* stream = (EncodingStream*)EncodingStream::newEncodingStream(NULL);

								        if (stream == NULL)

								            return E_OUTOFMEMORY;

								        _pStmInput = stream;

								        stream->Release(); // Smart pointer is holding a ref

								    }


								    checkhr2(_pStmInput->AppendData(in, length, lastBuffer));


								    return S_OK;


								}

								/////////////////////////////////////////////////////////////////////////////

								HRESULT

								BufferedStream::nextChar(

								        /* [out] */  WCHAR* ch,

								        /* [out] */ bool* fEOF)

								{

								    HRESULT hr;


								    if (_lCurrent >= _lUsed)

								    {

								        if (_fEof)

								        {

								            *fEOF = true;

								            return S_OK;

								        }

								        if (! _fNotified && _lUsed > 0)

								        {

								            _fNotified = true;          // notify data available BEFORE blowing


								            // NOTE: this code approximates what prepareForInput does

								            // in order to accurately predict when the buffer is about to

								            // be re-allocated.


								            long shift = _fFrozen ? 0 : getNewStart(); // is data about to shift?

								            long used = _lUsed - shift; // this is how much is really used after shift

								            if (_lSize - used < BLOCK_SIZE + 1) // +1 for null termination.

								            {

								                // we will reallocate !!  So return a special

								                // return code

								                hr = E_DATA_REALLOCATE;

								            }

								            else

								                hr = E_DATA_AVAILABLE;    // away the old data so parser can save it if need be.

								            checkhr2( _pXMLStream->ErrorCallback(hr) );

								        }


								        checkhr2( fillBuffer() );

								        if (_fEof)

								        {

								            *fEOF = true;

								            return S_OK;

								        }

								        _fNotified = false;

								    }


								    WCHAR result = _pchBuffer[_lCurrent++];


								    switch (result)

								    {

								    case 0xa:

								    case 0xd:

								        if (result == 0xd || _chLast != 0xd)

								            _lLine++;

								        _lLinepos = _lCurrent;

								        _chLast = result;

								        _lLastWhiteSpace = _lCurrent;

								        break;

								    case 0x20:

								    case 0x9:

								        _lLastWhiteSpace = _lCurrent;

								        break;

								    case 0xfffe:

									case 0xffff:

								    //case 0xfeff:

										::FusionpDbgPrintEx(

								            FUSION_DBG_LEVEL_ERROR,

								            "SXS.DLL: XML Parser found either 0xfffe or 0xffff\n");


								        return XML_E_BADCHARDATA;

								    }


								    *ch = result;

								    return S_OK;

								}

								/////////////////////////////////////////////////////////////////////////////

								HRESULT BufferedStream::scanPCData(

								    /* [out] */ WCHAR* ch,

								    /* [out] */ bool* fWhitespace)

								{

								    WCHAR result;

								    bool foundNonWhiteSpace = false;


								    if (! isWhiteSpace(*ch))

								        foundNonWhiteSpace = true;


								    // Then skip the data until we find '<', '>' or '&'

								    while (_lCurrent < _lUsed)

								    {

								        result = _pchBuffer[_lCurrent++];


								        switch (result)

								        {

								        case ']':  // xiaoyu : the specified chars can be changed for our own purpose

								        case '>':

								        case '<':

								        case '&':

								        case '\'':  // so this can be used to scan attribute values also.

								        case '"':   // so this can be used to scan attribute values also.

								            *ch = result;

								            if (foundNonWhiteSpace)

								                *fWhitespace = false;

								            return S_OK;

								            break;


								        case 0xa:

								        case 0xd:

								            if (result == 0xd || _chLast != 0xd)

								                _lLine++;

								            _lLinepos = _lCurrent;

								            _chLast = result;

								            _lLastWhiteSpace = _lCurrent;

								            break;

								        case 0x20:

								        case 0x9:

								            _lLastWhiteSpace = _lCurrent;

								            break;

								        case 0xfffe:

								        case 0xffff:

								            ::FusionpDbgPrintEx(

								                FUSION_DBG_LEVEL_ERROR,

								                "SXS.DLL: XML Parser found either 0xfffe or 0xffff\n");


								            return XML_E_BADCHARDATA;

								        default:

								            foundNonWhiteSpace = true;

								            break;

								        }

								    }


								    // And just return E_PENDING if we run out of buffer.

								    if (foundNonWhiteSpace)

								        *fWhitespace = false;

								    return E_PENDING;

								}

								/////////////////////////////////////////////////////////////////////////////

								long BufferedStream::getLine()

								{

								    return _lMarkedline;

								}

								/////////////////////////////////////////////////////////////////////////////

								long BufferedStream::getLinePos()

								{

								    // _lMarkedlinepos is the position of the beginning of the marked line

								    // relative to the beginning of the buffer, and _lMark is the

								    // position of the marked token relative to the beginning of the

								    // buffer, So the position of the marked token relative to the

								    // current line is the difference between the two.

								    // We also return a 1-based position so that the start of the

								    // line = column 1.  This is consistent with the line numbers

								    // which are also 1-based.

								    return (_lMarkedlinepos > _lMark+1) ? 0 : _lMark+1-_lMarkedlinepos;

								}

								/////////////////////////////////////////////////////////////////////////////

								long BufferedStream::getInputPos()

								{

								    return _lStartAt+_lMark;

								}

								/////////////////////////////////////////////////////////////////////////////

								WCHAR* BufferedStream::getLineBuf(ULONG* len, ULONG* startpos)

								{

								    *len = 0;

								    if (_pchBuffer == NULL)

								        return NULL;


								    WCHAR* result = &_pchBuffer[_lMarkedlinepos];


								    ULONG i = 0;

								    // internal _pchBuffer is guarenteed to be null terminated.

								    WCHAR ch = result[i];

								    while (ch != 0 && ch != L'\n' && ch != L'\r')

								    {

								        i++;

								        ch = result[i];

								    }

								    *len = i;

								    // also return the line position relative to start of

								    // returned buffer.

								    *startpos = (_lMarkedlinepos > _lMark+1) ? 0 : _lMark+1-_lMarkedlinepos;

								    return result;

								}

								/////////////////////////////////////////////////////////////////////////////

								HRESULT BufferedStream::switchEncoding(const WCHAR * charset, ULONG len)

								{

								    HRESULT hr = S_OK;


								    if (!_pStmInput)

								    {

								        hr = E_FAIL;

								        goto CleanUp;

								    }

								    else

								    {

								        _pPendingEncoding = Encoding::newEncoding(charset, len);

								        if (_pPendingEncoding == NULL)

								        {

								            hr = E_OUTOFMEMORY;

								            goto CleanUp;

								        }


								        if (! _fFrozen)

								        {

								             hr = doSwitchEncoding();

								        }

								    }

								CleanUp:

								    return hr;

								}

								/////////////////////////////////////////////////////////////////////////////

								HRESULT BufferedStream::doSwitchEncoding()

								{

								    Encoding* encoding = _pPendingEncoding;

								    _pPendingEncoding = NULL;


								    HRESULT hr = _pStmInput->switchEncodingAt(encoding, _lStartAt + _lCurrent);

								    if (hr == S_FALSE)

								    {

								        // need to re-read to force re-decode into new encoding.

								        // In other words we have to forget that we read past this

								        // position already so that the next call to nextChar

								        // will call FillBuffer again.

								        // (+1 so that nextChar works correctly).

								        _lUsed = _lStartAt + _lCurrent;

								        hr = S_OK;

								    }

								    else if (FAILED(hr))

								    {

								        hr = (hr == E_INVALIDARG) ? XML_E_INVALIDENCODING : XML_E_INVALIDSWITCH;

								    }

								    return hr;

								}

								/////////////////////////////////////////////////////////////////////////////

								// Returns a pointer to a contiguous block of text accumulated

								// from the last time Mark() was called up to but not including

								// the last character read. (This allows a parser to have a

								// lookahead character that is not included in the token).

								HRESULT

								BufferedStream::getToken(const WCHAR**p, long* len)

								{

								    if (_pchBuffer == NULL)

								        return E_FAIL;


								    if (_lCurrent != _lCurrent2)

								    {

								        // need to fix up buffer since it is no

								        // out of sync since we've been compressing

								        // whitespace.


								    }

								    *p = &_pchBuffer[_lMark];

								    *len = getTokenLength();

								    return S_OK;

								}

								/////////////////////////////////////////////////////////////////////////////

								void

								BufferedStream::Lock()

								{

								    // We allow nested locking - where the outer lock wins - unlock only

								    // really unlocks when the outer lock is unlocked.

								    if (++_lLockCount == 1)

								    {

								        _lLockedPos = _lMark;

								        _lLockedLine = _lMarkedline;

								        _lLockedLinePos = _lMarkedlinepos;

								    }

								}

								/////////////////////////////////////////////////////////////////////////////

								void

								BufferedStream::UnLock()

								{

								    if (--_lLockCount == 0)

								    {

								        _lMark = _lLockedPos;

								        _lMarkedline = _lLockedLine;

								        _lMarkedlinepos = _lLockedLinePos;

								        _lLockedPos = -1;

								    }

								}

								/////////////////////////////////////////////////////////////////////////////

								HRESULT

								BufferedStream::Freeze()

								{

								    HRESULT hr;

								    if (_lCurrent > _lMidPoint)

								    {

								        // Since we freeze the buffer a lot now (any time we're inside

								        // a tag) we need to shift the bytes down in the buffer more

								        // frequently in order to guarentee we have space in the buffer

								        // when we need it.  Otherwize the buffer would tend to just

								        // keep growing and growing.  So we shift the buffer when we

								        // go past the midpoint.

								        checkhr2( prepareForInput() );


								    }

								    _fFrozen = true;

								    return S_OK;

								}

								/////////////////////////////////////////////////////////////////////////////

								HRESULT

								BufferedStream::UnFreeze()

								{

								    _fFrozen = false;

								    if (_pPendingEncoding)

								    {

								        return doSwitchEncoding();

								    }

								    return S_OK;

								}

								/////////////////////////////////////////////////////////////////////////////

								HRESULT

								BufferedStream::fillBuffer()

								{

								    HRESULT hr;


								    checkhr2( prepareForInput() );


								    if (_pStmInput)

								    {

								        long space = _lSize - _lUsed - 1; // reserve 1 for NULL termination


								        // get more bytes.

								        ULONG read = 0;

								        HRESULT rc = _pStmInput->Read(&_pchBuffer[_lUsed], space*sizeof(WCHAR), &read);


								        _lUsed += read/sizeof(WCHAR); // stream must return unicode characters.

								        _pchBuffer[_lUsed] = 0; // NULL terminate the _pchBuffer.


								        if (FAILED(rc))

								            return rc;


								        if (read == 0)

								        {

								            _fEof = true;

								            // increment _lCurrent, so that getToken returns

								            // last character in file.

								            _lCurrent++; _lCurrent2++;

								        }

								    }

								    else

								    {

								        // SetInput or AppendData hasn't been called yet.

								        return E_PENDING;

								    }


								    return S_OK;

								}

								/////////////////////////////////////////////////////////////////////////////

								HRESULT

								BufferedStream::prepareForInput()

								{

								    // move the currently used section of the _pchBuffer

								    // (buf[mark] to buf[used]) down to the beginning of

								    // the _pchBuffer.


								    long newstart = 0;


								    // BUGBUG - if this code is changed BufferedStream::nextChar has to

								    // be updated also so that they stay in sync, otherwise we might

								    // re-allocated the buffer without generating an E_DATA_REALLOCATE

								    // notification - which would be very bad (causes GPF's in the parser).


								    if (! _fFrozen)  // can't shift bits if the buffer is frozen.

								    {

								        newstart = getNewStart();


								        if (newstart > 0)

								        {

								            WCHAR* src = &_pchBuffer[newstart];

								            _lUsed -= newstart;

								            _lStartAt += newstart;

								            ::memmove(_pchBuffer,src,_lUsed*sizeof(WCHAR));

								            _lCurrent -= newstart;

								            _lCurrent2 -= newstart;

								            _lLastWhiteSpace -= newstart;

								            _lLinepos = (_lLinepos > newstart) ? _lLinepos-newstart : 0;

								            _lMarkedlinepos = (_lLinepos > newstart) ? _lMarkedlinepos-newstart : 0;

								            _lMark -= newstart;

								            _lLockedLinePos = (_lLockedLinePos > newstart) ? _lLockedLinePos-newstart : 0;

								            _lLockedPos -= newstart;

								        }

								    }


								    // make sure we have a reasonable amount of space

								    // left in the _pchBuffer.

								    long space = _lSize - _lUsed;

								    if (space > 0) space--; // reserve 1 for NULL termination

								    if (_pchBuffer == NULL || space < BLOCK_SIZE)

								    {

								        // double the size of the buffer.

										long newsize = (_lSize == 0) ? BLOCK_SIZE : (_lSize*2);


								        WCHAR* newbuf = NEW (WCHAR[newsize]);

								        if (newbuf == NULL)

								        {

								            // try more conservative allocation.

								            newsize = _lSize + BLOCK_SIZE;

								            newbuf = NEW (WCHAR[newsize]);

								        }

								        if (newbuf == NULL && space == 0)

								            return E_OUTOFMEMORY;


								        if (newbuf != NULL)

								        {

								            if (_pchBuffer != NULL)

								            {

								                // copy old bytes to new _pchBuffer.

								                ::memcpy(newbuf,_pchBuffer,_lUsed*sizeof(WCHAR));

								                delete [] _pchBuffer;

								            }

								            newbuf[_lUsed] = 0; // make sure it's null terminated.

								            _pchBuffer = newbuf;

								            _lSize = newsize;

								            _lMidPoint = newsize / 2;


								        }

								    }


								    return S_OK;

								}

								/////////////////////////////////////////////////////////////////////////////

								long

								BufferedStream::getNewStart()

								{

								    long newstart = 0;


								    // Unless the buffer is frozen, in which case we just reallocate and

								    // do no shifting of data.

								    if (_lLockedPos > 0)

								    {

								        // and try and preserve the beginning of the marked line if we can

								        if (_lLockedLinePos < _lLockedPos &&

								            _lLockedPos - _lLockedLinePos < MAX_LINE_BUFFER)

								        {

								            newstart = _lLockedLinePos;

								        }

								    }

								    else if (_lMark > 0)

								    {

								        // and try and preserve the beginning of the marked line if we can

								        newstart = _lMark;

								        if (_lMarkedlinepos < _lMark &&

								            _lMark - _lMarkedlinepos < MAX_LINE_BUFFER) // watch out for long lines

								        {

								            newstart = _lMarkedlinepos;

								        }

								    }

								    return newstart;

								}