windows-server-2003/base/win32/fusion/xmlparser/bufferedstream.hxx

/////////////////////////////////////////////////////////////////////////////////
//
// fusion\xmlparser\BufferedStream.hxx
//
/////////////////////////////////////////////////////////////////////////////////
#ifndef _FUSION_XMLPARSER__BUFFEREDSTREAM_H_INCLUDE_
#define _FUSION_XMLPARSER__BUFFEREDSTREAM_H_INCLUDE_
#pragma once

#include "encodingstream.hxx"

// Returned from nextChar when a new buffer is read.  This gives the
// caller some idea of download progress without having to count
// characters.  Just call nextChar again to continue on as normal.
#define E_DATA_AVAILABLE  0xC00CE600L
#define E_DATA_REALLOCATE 0xC00CE601L

//------------------------------------------------------------------------
// This class adds buffering and auto-growing semantics to an IStream
// so that a variable length chunk of an IStream can be collected
// in memory for processing using Mark() and getToken() methods.
// It also supports collapsing of newlines into 0x20 if you use 
// nextChar2 instead of nextChar.
// It also guarentees a line buffer so that a pointer to the 
// beginning of the line can be returned in error conditions.
// (for the degenerate case where there are no new lines, it
// returns the last 100 characters).
//
// Alternatively, buffers can be appended instead of
// using an IStream.  In this case the BufferedStream returns
// E_PENDING until the last buffer is appended.  Use AppendData instead
// of Load(IStream.  

class XMLStream;

class BufferedStream 
{   
public:
    BufferedStream(XMLStream *pXMLStream);
    ~BufferedStream();

    // Method 1: pass in an IStream.  The IStream must return unicode 
	// characters.
    HRESULT Load( 
        /* [unique][in] */ EncodingStream  *pStm);
    
    // Method 2: append raw buffers, set lastBuffer to TRUE you are ready to
    // return E_ENDOFINPUT.  Length is number of chars in buffer.  To do unicode
    // you must provide a byte order mark (0xFFFE or OxFEFF depending
    // on whether it is bigendian or little endian).
    HRESULT AppendData(const BYTE* buffer, ULONG length, BOOL lastBuffer);

    HRESULT Reset();

	// Get next char from buffer , if EOF, set fEOF to be true
    HRESULT nextChar( 
		/* [out] */ WCHAR* ch, 
		/* [out] */ bool* fEOF);

    // Marks the last character read as the start of a buffer
    // that grows until Mark is called again.  You can mark backwards
    // from last character read anywhere up to last marked position
    // by passing a non-zero delta.  For example, to mark the
    // position at the 3rd last character read, call Mark(3);

    // xiaoyu : _lCurrent always points to the char to read next
    inline void Mark(long back = 0) 
    {
        _lMark = (_lCurrent > back) ? (_lCurrent - back - 1) : 0;
        if (_lLinepos != _lCurrent)
        {
            // only move the marked line position forward, if we haven't
            // marked the actual new line characters.  This ensures we
            // return useful information from getLineBuf.
            _lMarkedline = _lLine;
            _lMarkedlinepos = _lLinepos;
        }
    }

    // Returns a pointer to a contiguous block of text accumulated 
    // from the last time Mark() was called up to but not including
    // the last character read. (This allows a parser to have a
    // lookahead character that is not included in the token).
    HRESULT getToken(const WCHAR**p, long* len);

	HRESULT switchEncoding(const WCHAR * charset, ULONG len);

    // Returns Marked position.
    long getLine();
    long getLinePos();
    WCHAR* getLineBuf(ULONG* len, ULONG* startpos);
    long getInputPos(); // absolute position.

    long getTokenLength() // convenience function.
    { 
		return (_lCurrent - 1 - _lMark);
    }

    inline bool isWhiteSpace(WCHAR ch) // no matter what value of "ch"
    {
		UNUSED(ch);
        return (_lLastWhiteSpace == _lCurrent);
    }

    inline void setWhiteSpace(bool flag = true)
    {
        _lLastWhiteSpace = flag ? _lCurrent : _lCurrent-1;
    }

    void init();

    // Lock/UnLock is another level on top of Mark/Reset that 
    // works as follows. If you Lock(), then the buffer keeps everything
    // until you UnLock at which time it resets the "Marked" position to
    // the Locked() position.  This is so that you can scan through
    // a series of tokens, but then return all of them in one chunk.
    void Lock();
    void UnLock();

    // Freezing the buffer makes the buffer always grow WITHOUT shifting
    // data around in the buffer.  This makes it valid to hold on to pointers
    // in the buffer until the buffer is unfrozen.
    HRESULT Freeze();
    HRESULT UnFreeze();
    // NTRAID#NTBUG9 - 571792 - jonwis - 2002/04/25 - Dead code removal
#ifdef FUSION_USE_OLD_XML_PARSER_SOURCE
	WCHAR*  getEncoding();
#endif
    // Special XML optimization.
    HRESULT scanPCData( 
        /* [out] */ WCHAR* ch,
        /* [out] */ bool* fWhitespace);

private:
    WCHAR nextChar();

    HRESULT fillBuffer();
    HRESULT prepareForInput();
	HRESULT doSwitchEncoding();
    long    getNewStart();

    REncodingStream _pStmInput; // input stream
    WCHAR*  _pchBuffer; // buffer containing chars from input stream.
    long    _lCurrent; // current read position in buffer
    long    _lCurrent2; // used when collapsing white space.
    long    _lSize; // total size of buffer.
    long    _lMark; // start of current token.
    long    _lUsed; // amount of buffer currently used.
    WCHAR   _chLast; // last character returned.
    long    _lLine; // current line number
    long    _lLinepos; // position of start of last line.
    long    _lMarkedline; // current line number of marked position.
    long    _lMarkedlinepos; 
    long    _lStartAt; // The number of unicode characters before the current buffer
    bool    _fEof;
    bool    _fNotified;
    bool    _fFrozen;
    long    _lLockCount;
    long    _lLockedPos;
    long    _lLockedLine;
    long    _lLockedLinePos;
    long    _lLastWhiteSpace;
    long    _lMidPoint; 
	Encoding* _pPendingEncoding;
    XMLStream *_pXMLStream; // regular pointer pointing back to the XMLStream object
};

#endif // _BUFFEREDSTREAM_HXX