|
|
/*
* * Copyright (c) 1998,1999 Microsoft Corporation. All rights reserved. * EXEMPT: copyright change only, no build required * */ #include "stdinc.h"
#include "core.hxx"
#pragma hdrstop
#include "xmlhelper.hxx"
#include "xmlstream.hxx"
#include "bufferedstream.hxx"
#include "xmlparser.hxx"
const long BLOCK_SIZE = 512; const long STACK_INCREMENT = 10;
// macros used in this file
#define INTERNALERROR return XML_E_INTERNALERROR;
#define checkeof(a,b) if (_fEOF) return b;
#define ADVANCE hr = _pInput->nextChar(&_chLookahead, &_fEOF); if (hr != S_OK) return hr;
#define ADVANCETO(a) hr = AdvanceTo(a); if (hr != S_OK) return hr;
#define ISWHITESPACE(ch) _pInput->isWhiteSpace(ch)
#define STATE(state) { _sSubState = state; return S_OK; }
#define GOTOSTART(state) { _sSubState = state; goto Start; }
#define DELAYMARK(hr) ((hr == S_OK) || ((hr >= static_cast<HRESULT>(XML_E_TOKEN_ERROR)) && (hr < static_cast<HRESULT>(XML_E_LASTERROR))))
#define XML_E_FOUNDPEREF 0x8000e5ff
// The tokenizer has special handling for the following attribute types.
// These values are derived from the XML_AT_XXXX types provided in SetType
// and are also calculated during parsing of an ATTLIST for parsing of
// default values.
typedef enum { XMLTYPE_CDATA, // the default.
XMLTYPE_NAME, XMLTYPE_NAMES, XMLTYPE_NMTOKEN, XMLTYPE_NMTOKENS, } XML_ATTRIBUTE_TYPE;
//==============================================================================
// xiaoyu : a simplified table : only deal with comments, not include DOCTYPE, NotationDecl, EntityDecl and ElementDecl.
// Parse an <!^xxxxxxxx Declaration.
const StateEntry g_DeclarationTable[] = { // 0 '<' ^ '!'
{ OP_CHAR, L"!", 1, (DWORD)XML_E_INTERNALERROR, }, // 1 '<!' ^ '-'
{ OP_PEEK, L"-", 2, 4, 0 }, // 2 '<!-'
{ OP_COMMENT, NULL, 3, }, // 3 done !!
{ OP_POP, NULL, 0, 0 },
// 4 '<!' ^ '['
{ OP_PEEK, L"[", 5, (DWORD)XML_E_BADDECLNAME, 0 }, //xiaoyu : we do not consider others <!XXX, which is a DTD subset
// 5 '<![...'
{ OP_CONDSECT, NULL, 3, } };
//==============================================================================
// Parse an <?xml or <?xml:namespace declaration.
const StateEntry g_XMLDeclarationTable[] = { // 0 must be xml declaration - and not xml namespace declaration
{ OP_TOKEN, NULL, 1, XML_XMLDECL, 0 }, // 1 '<?xml' ^ S version="1.0" ...
{ OP_OWS, NULL, 2 }, // 2 '<?xml' S ^ version="1.0" ...
{ OP_SNCHAR, NULL, 3, (DWORD)XML_E_XMLDECLSYNTAX }, // 3 '<?xml' S ^ version="1.0" ...
{ OP_NAME, NULL, 4, }, // 4 '<?xml' S version^="1.0" ...
{ OP_STRCMP, L"version", 5, 12, XML_VERSION }, // 5
{ OP_EQUALS, NULL, 6 }, // 6 '<?xml' S version = ^ "1.0" ...
{ OP_ATTRVAL, NULL, 32, 0}, // 7 '<?xml' S version '=' value ^
{ OP_TOKEN, NULL, 8, XML_PCDATA, -1 }, // 8 ^ are we done ?
{ OP_CHARWS, L"?", 28, 9 }, // must be '?' or whitespace.
// 9 ^ S? [encoding|standalone] '?>'
{ OP_OWS, NULL, 10 }, // 10
{ OP_CHAR, L"?", 28, 33 }, // may have '?' after skipping whitespace.
// 11 ^ [encoding|standalone] '?>'
{ OP_NAME, NULL, 12, }, // 12
{ OP_STRCMP, L"standalone", 23, 13, XML_STANDALONE }, // 13
{ OP_STRCMP, L"encoding", 14, (DWORD)XML_E_UNEXPECTED_ATTRIBUTE, XML_ENCODING }, // 14
{ OP_EQUALS, NULL, 15 }, // 15
{ OP_ATTRVAL, NULL, 16, 0 }, // 16
{ OP_ENCODING, NULL, 17, 0, -1 }, // 17
{ OP_TOKEN, NULL, 18, XML_PCDATA, -1 },
// 18 ^ are we done ?
{ OP_CHARWS, L"?", 28, 19 }, // must be '?' or whitespace.
// 19 ^ S? standalone '?>'
{ OP_OWS, NULL, 20 }, // 20
{ OP_CHAR, L"?", 28, 34 }, // may have '?' after skipping whitespace.
// 21 ^ standalone '?>'
{ OP_NAME, NULL, 22, }, // 22
{ OP_STRCMP, L"standalone", 23, (DWORD)XML_E_UNEXPECTED_ATTRIBUTE, XML_STANDALONE }, // 23
{ OP_EQUALS, NULL, 24 }, // 24
{ OP_ATTRVAL, NULL, 25, 0 }, // 25
{ OP_STRCMP, L"yes", 31, 30, -1 },
// 26 <?xml ....... ^ '?>' -- now expecting just the closing '?>' chars
{ OP_OWS, NULL, 27 }, // 27
{ OP_CHAR, L"?", 28, (DWORD)XML_E_XMLDECLSYNTAX, 0 }, // 28
{ OP_CHAR, L">", 29, (DWORD)XML_E_XMLDECLSYNTAX, 0 }, // 29 done !!
{ OP_POP, NULL, 0, XMLStream::XML_ENDXMLDECL },
//----------------------- check standalone values "yes" or "no"
// 30
{ OP_STRCMP, L"no", 31, (DWORD)XML_E_INVALID_STANDALONE, -1 }, // 31
{ OP_TOKEN, NULL, 26, XML_PCDATA, -1 }, //----------------------- check version = "1.0"
// 32
{ OP_STRCMP, L"1.0", 7, (DWORD)XML_E_INVALID_VERSION, -1 }, // 33
{ OP_SNCHAR, NULL, 11, (DWORD)XML_E_XMLDECLSYNTAX }, // 34
{ OP_SNCHAR, NULL, 21, (DWORD)XML_E_XMLDECLSYNTAX }, };
static const WCHAR* g_pstrCDATA = L"CDATA"; ////////////////////////////////////////////////////////////////////////
XMLStream::XMLStream(XMLParser * pXMLParser) : _pStack(1), _pStreams(1) { // precondition: 'func' is never NULL
_fnState = &XMLStream::init; _pInput = NULL; _pchBuffer = NULL; _fDTD = false; //_fInternalSubset = false;
_cStreamDepth = 0; _pXMLParser = pXMLParser;
_init(); SetFlags(0); } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::init() { HRESULT hr = S_OK;
if (_pInput == NULL) { //haven' called put-stream yet
return XML_E_ENDOFINPUT; } _init(); _fnState = &XMLStream::parseContent;
checkhr2(push(&XMLStream::firstAdvance,0));
return hr; } ////////////////////////////////////////////////////////////////////////
void XMLStream::_init() { _fEOF = false; _chLookahead = 0; _nToken = XML_PENDING; _chTerminator = 0; _lLengthDelta = 0; _lNslen = _lNssep = 0; _sSubState = 0; _lMarkDelta = 0; _fUsingBuffer = false; _lBufLen = 0; delete[] _pchBuffer; _pchBuffer = NULL; _lBufSize = 0; _fDelayMark = false; _fFoundWhitespace = false; _fFoundNonWhitespace = false; _fWasUsingBuffer = false; _chNextLookahead = 0;
_fParsingAttDef = false; _fFoundFirstElement = false; _fReturnAttributeValue = true; //_fHandlePE = true;
_pTable = NULL; } ////////////////////////////////////////////////////////////////////////
XMLStream::~XMLStream() { delete _pInput; delete[] _pchBuffer;
_pInput = NULL; _pchBuffer = NULL;
InputInfo* pi = _pStreams.peek(); while (pi != NULL) { // Previous stream is finished also, so
// pop it and continue on.
delete pi->_pInput; pi = _pStreams.pop(); } } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::AppendData( /* [in] */ const BYTE *buffer, /* [in] */ long length, /* [in] */ BOOL last) { if (_pInput == NULL) { _pInput = NEW (BufferedStream(this)); if (_pInput == NULL) return E_OUTOFMEMORY; init(); }
HRESULT hr = _pInput->AppendData(buffer, length, last);
return hr; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::Reset( void) { init(); delete _pInput; _pInput = NULL;
return S_OK; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::PushStream( /* [unique][in] */ EncodingStream *p, /* [in] */ bool fExternalPE) { UNUSED(fExternalPE);
if (_pStreams.used() == 0 && _pInput == NULL) init();
_cStreamDepth++;
if (_fDelayMark && _pInput != NULL) { mark(_lMarkDelta); _lMarkDelta = 0; _fDelayMark = false; }
// Save current input stream.
if (_pInput != NULL) { InputInfo* pi = _pStreams.push(); if (pi == NULL) return E_OUTOFMEMORY; pi->_pInput = _pInput; pi->_chLookahead = _chLookahead; //pi->_fPE = true; // assume this is a parameter entity.
//pi->_fExternalPE = fExternalPE;
//pi->_fInternalSubset = _fInternalSubset;
if (&XMLStream::skipWhiteSpace == _fnState && _pStack.used() > 0) { StateInfo* pSI = _pStack.peek(); pi->_fnState = pSI->_fnState; } else pi->_fnState = _fnState;
// and prepend pe text with space as per xml spec.
_chLookahead = L' '; _chNextLookahead = _chLookahead; _pInput = NULL; }
_pInput = NEW (BufferedStream(this)); if (_pInput == NULL) return E_OUTOFMEMORY;
if (p != NULL) _pInput->Load(p); if (_chLookahead == L' ') _pInput->setWhiteSpace(); // _pInput didn't see this space char.
return S_OK; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::PopStream() { // This method has to pop all streams until it finds a stream that
// can deliver the next _chLookahead character.
HRESULT hr = S_OK;
InputInfo* pi = NULL;
pi = _pStreams.peek(); if (pi == NULL) return S_FALSE;
_chLookahead = pi->_chLookahead;
// Found previous stream, so we can continue.
_fEOF = false;
// Ok, so we actually got the next character, so
// we can now safely throw away the previous
// lookahead character and return the next
// non-whitespace character from the previous stream.
delete _pInput;
_pInput = pi->_pInput; if (_chLookahead == L' ') _pInput->setWhiteSpace();
// BUGBUG: we need to clear this so that the parser does not
// try and pop a download in the internalPE case (when handling XML_E_ENDOFINPUT in run())
// but this means that internal PEs never get XMLNF_ENDENTITY notifications generated.
// The DTDNodeFactory requires this behaviour currently (incorrectly)
_pStreams.pop();
_cStreamDepth--;
return hr; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::GetNextToken( /* [out] */ DWORD *t, /* [out] */ const WCHAR **text, /* [out] */ long *length, /* [out] */ long *nslen) { HRESULT hr;
if (_fDTD) return E_UNEXPECTED;
if (_fDelayMark) { mark(_lMarkDelta); _lMarkDelta = 0; _fDelayMark = false; }
hr = (this->*_fnState)(); while (hr == S_OK && _nToken == XML_PENDING) hr = (this->*_fnState)(); if (hr == S_OK) *t = _nToken; else if (hr == E_PENDING) { *t = XML_PENDING; *length = *nslen = 0; *text = NULL; goto CleanUp; } else *t = XML_PENDING; // At this point hr == S_OK or it is some error. So we
// want to return the text of the current token, since this
// is useful in both cases.
if (! _fUsingBuffer) { getToken(text,length); if (_lLengthDelta != 0) { // xiaoyu : IF STOP WITHIN, HAVE A CAREFUL LOOK : in ParsingAttributeValue, we have to read ahead of one char '"'
*length += _lLengthDelta; _lLengthDelta = 0; } // This can only happen in the context of a DTD.
// if (_fWasUsingBuffer)
// {
// _fUsingBuffer = _fWasUsingBuffer;
// _fWasUsingBuffer = false;
// }
} else { // xiaoyu : IF STOP WITHIN, HAVE A CAREFUL LOOK
*text = _pchBuffer; *length = _lBufLen; _fUsingBuffer = false; _fFoundWhitespace = false; _lBufLen = 0; _lLengthDelta = 0; } if (DELAYMARK(hr)) { // Mark next time around so that error information points to the
// beginning of this token.
_fDelayMark = true; } else { // xiaoyu : IF STOP WITHIN, HAVE A CAREFUL LOOK
// otherwise mark this spot right away so we point to the exact
// source of the error.
mark(_lMarkDelta); _lMarkDelta = 0; } _nToken = XML_PENDING; *nslen = _lNslen; _lNslen = _lNssep = 0;
CleanUp: return hr; } ////////////////////////////////////////////////////////////////////////
ULONG XMLStream::GetLine() { BufferedStream* input = getCurrentStream(); if (input != NULL) return input->getLine(); return 0; } ////////////////////////////////////////////////////////////////////////
ULONG XMLStream::GetLinePosition( ) { BufferedStream* input = getCurrentStream(); if (input != NULL) return input->getLinePos(); return 0; } ////////////////////////////////////////////////////////////////////////
ULONG XMLStream::GetInputPosition( ) { BufferedStream* input = getCurrentStream(); if (input != NULL) return input->getInputPos(); return 0; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::GetLineBuffer( /* [out] */ const WCHAR * *buf, ULONG* len, ULONG* startpos) { if (buf) *buf = NULL; if (len) *len = 0; if (startpos) *startpos = 0; if (buf == NULL || len == NULL) return E_INVALIDARG;
*buf = NULL; BufferedStream* input = getCurrentStream(); if (input) *buf = input->getLineBuf(len, startpos); return S_OK; } ////////////////////////////////////////////////////////////////////////
BufferedStream* XMLStream::getCurrentStream() { // Return the most recent stream that
// actually has somthing to return.
BufferedStream* input = _pInput; if (!_pInput) { return NULL; } int i = _pStreams.used()-1; do { ULONG len = 0, pos = 0; // const WCHAR* buf = input->getLineBuf(&len, &pos); // generates C4189: 'buf' local variable is initialized but not referenced
(void) input->getLineBuf(&len, &pos); if (len > 0) return input;
if (i >= 0) input = _pStreams[i--]->_pInput; else break; } while (input != NULL); return NULL; } ////////////////////////////////////////////////////////////////////////
void XMLStream::SetFlags( unsigned short usFlags) { _usFlags = usFlags; // And break out the flags for performance reasons.
//_fFloatingAmp = (usFlags & XMLFLAG_FLOATINGAMP) != 0;
_fShortEndTags = (usFlags & XMLFLAG_SHORTENDTAGS) != 0; _fCaseInsensitive = (usFlags & XMLFLAG_CASEINSENSITIVE) != 0; _fNoNamespaces = (usFlags & XMLFLAG_NONAMESPACES) != 0; //_fNoWhitespaceNodes = false; // this is now bogus. (usFlags & XMLFLAG_NOWHITESPACE) != 0;
//_fIE4Quirks = (_usFlags & XMLFLAG_IE4QUIRKS) != 0;
//_fNoDTDNodes = (_usFlags & XMLFLAG_NODTDNODES) != 0;
} ////////////////////////////////////////////////////////////////////////
unsigned short XMLStream::GetFlags() { return _usFlags; } ////////////////////////////////////////////////////////////////////////
//======================================================================
// Real Implementation
HRESULT XMLStream::firstAdvance() { HRESULT hr;
ADVANCE; checkhr2(pop(false));
return S_OK; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::parseContent() { HRESULT hr = S_OK;
if (_fEOF) return XML_E_ENDOFINPUT;
switch (_chLookahead){ case L'<': ADVANCE; checkeof(_chLookahead, XML_E_UNCLOSEDDECL); switch (_chLookahead) { case L'!': checkhr2(_pInput->Freeze()); // stop shifting data until '>'
return pushTable( 0, g_DeclarationTable, (DWORD)XML_E_UNCLOSEDDECL); case L'?': checkhr2(push( &XMLStream::parsePI )); return parsePI(); case L'/': checkhr2(push(&XMLStream::parseEndTag)); return parseEndTag(); default: checkhr2(push( &XMLStream::parseElement )); // push ParseContent, and _fnState = parseElement
if (_fFoundFirstElement) { return parseElement(); } else { // Return special end prolog token and then continue with
// with parseElement.
_fFoundFirstElement = true; _nToken = XML_ENDPROLOG; } } break;
default: checkhr2(push(&XMLStream::parsePCData)); return parsePCData(); break; } return S_OK; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::skipWhiteSpace() { HRESULT hr = S_OK;
while (ISWHITESPACE(_chLookahead) && ! _fEOF) { ADVANCE; } checkhr2(pop(false)); return hr; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::parseElement() { HRESULT hr = S_OK; switch (_sSubState) { case 0: checkhr2(_pInput->Freeze()); // stop shifting data until '>'
checkhr2(push( &XMLStream::parseName, 1)); checkhr2(parseName()); _sSubState = 1; // fall through
case 1: checkeof(_chLookahead, XML_E_UNCLOSEDSTARTTAG); _nToken = XML_ELEMENT; // and then try and parse the attributes, and return
// to state 2 to finish up. With an optimization
// for the case where there are no attributes.
if (_chLookahead == L'/' || _chLookahead == L'>') { _sSubState = 2; } else { if (!ISWHITESPACE(_chLookahead)) { return XML_E_BADNAMECHAR; } _chEndChar = L'/'; // for empty tags. //xiaoyu : used to match ENDTAG
checkhr2(push(&XMLStream::parseAttributes,2)); } return S_OK; break;
case 2: // finish up with start tag.
mark(); // only return '>' or '/>' in _nToken text
if (_chLookahead == L'/') { // must be empty tag sequence '/>'.
ADVANCE; _nToken = XML_EMPTYTAGEND; } else if (_chLookahead == L'>') { _nToken = XML_TAGEND; } else if (ISWHITESPACE(_chLookahead)) { return XML_E_UNEXPECTED_WHITESPACE; } else return XML_E_EXPECTINGTAGEND;
_sSubState = 3; // fall through
case 3: checkeof(_chLookahead, XML_E_UNCLOSEDSTARTTAG); if (_chLookahead != L'>') { if (ISWHITESPACE(_chLookahead)) return XML_E_UNEXPECTED_WHITESPACE; else return XML_E_EXPECTINGTAGEND; } ADVANCE; mark(); checkhr2(pop());// return to parseContent.
return _pInput->UnFreeze(); break;
case 4: // swollow up bad tag
// Allow the weird CDF madness <PRECACHE="YES"/>
// For total compatibility we fake out the parser by returning
// XML_EMPTYTAGEND, this way the rest of the tag becomes PCDATA.
// YUK -- but it works.
_nToken = XML_EMPTYTAGEND; mark(); checkhr2(pop());// return to parseContent.
return _pInput->UnFreeze(); break;
default: INTERNALERROR; } //return S_OK;
} ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::parseEndTag() { HRESULT hr = S_OK; switch (_sSubState) { case 0: ADVANCE; // soak up the '/'
mark(); // SHORT END TAG SUPPORT, IE4 Compatibility Mode only.
if (! _fShortEndTags || _chLookahead != L'>') { checkhr2(push( &XMLStream::parseName, 1)); checkhr2(parseName()); } _sSubState = 1; // fall through
case 1: // finish parsing end tag
checkeof(_chLookahead, XML_E_UNCLOSEDENDTAG); _nToken = XML_ENDTAG; checkhr2(push(&XMLStream::skipWhiteSpace, 2)); return S_OK;
case 2: checkeof(_chLookahead, XML_E_UNCLOSEDENDTAG); if (_chLookahead != L'>') { return XML_E_BADNAMECHAR; } ADVANCE; mark(); checkhr2(pop());// return to parseContent.
break;
default: INTERNALERROR; } return S_OK; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::parsePI() { HRESULT hr = S_OK; switch (_sSubState) { case 0: //_fWasDTD = _fDTD; // as far as Advance is concerned, the contents
//_fHandlePE = false; // of a PI are not special.
ADVANCE; checkhr2(_pInput->Freeze()); // stop shifting data until '?>'
mark(); // don't include '?' in tag name.
if (_chLookahead == L'x' || _chLookahead == L'X') { // perhaps this is the magic <?xml version="1.0"?> declaration.
STATE(7); // jump to state 7.
} // fall through
_sSubState = 1; case 1: checkhr2(push( &XMLStream::parseName, 2)); checkhr2(parseName()); _sSubState = 2; // fall through
case 2: checkeof(_chLookahead, XML_E_UNCLOSEDPI); if (_chLookahead != L'?' && ! ISWHITESPACE(_chLookahead)) { return XML_E_BADNAMECHAR; } _nToken = XML_PI; STATE(3); // found startpi _nToken and return to _sSubState 3
break;
case 3: // finish with rest of PI
if (_chLookahead == L'?') { ADVANCE; if (_chLookahead == L'>') { STATE(6); } else { return XML_E_EXPECTINGTAGEND; } }
checkhr2(push(&XMLStream::skipWhiteSpace, 4)); checkhr2( skipWhiteSpace() ); _sSubState = 4; // fall through
case 4: // support for normalized whitespace
mark(); // strip whitespace from beginning of PI data, since this is
// just the separator between the PI target name and the PI data.
_sSubState = 5; // fallthrough
case 5: while (! _fEOF ) { if (_chLookahead == L'?') { ADVANCE; break; } if (! isCharData(_chLookahead)) return XML_E_PIDECLSYNTAX; ADVANCE; } _sSubState = 6; // go to next state
// fall through.
case 6: checkeof(_chLookahead, XML_E_UNCLOSEDPI); if (_chLookahead == L'>') { ADVANCE; _lLengthDelta = -2; // don't include '?>' in PI CDATA.
} else { // Hmmm. Must be a lone '?' so go back to state 5.
STATE(5); } _nToken = XML_ENDPI; //_fHandlePE = true;
checkhr2(pop()); return _pInput->UnFreeze(); break;
case 7: // recognize 'm' in '<?xml' declaration
ADVANCE; if (_chLookahead != L'm' && _chLookahead != L'M') { STATE(11); // not 'xml' so jump to state 11 to parse name
} _sSubState = 8; // fall through
case 8: // recognize L'l' in '<?xml' declaration
ADVANCE; if (_chLookahead != L'l' && _chLookahead != L'L') { STATE(11); // not 'xml' so jump to state 11 to parse name
} _sSubState = 9; // fall through
case 9: // now need whitespace or ':' or '?' to terminate name.
ADVANCE; if (ISWHITESPACE(_chLookahead)) { if (! _fCaseInsensitive) { const WCHAR* t = NULL; long len =0; // for prefix bug : xiaoyuw@08/28/00
getToken(&t,&len); //if (! StringEquals(L"xml",t,3,false)) // case sensitive
if (::FusionpCompareStrings(L"xml", 3, t, 3, false)!=0) // not equal
return XML_E_BADXMLCASE; } return pushTable(10, g_XMLDeclarationTable, (DWORD)XML_E_UNCLOSEDPI); } if (isNameChar(_chLookahead) || _chLookahead == ':') { STATE(11); // Hmmm. Must be something else then so continue parsing name
} else { return XML_E_XMLDECLSYNTAX; } break;
case 10: //_fHandlePE = true;
checkhr2(pop()); return _pInput->UnFreeze(); break;
case 11: if (_chLookahead == ':') ADVANCE; _sSubState = 12; // fall through
case 12: if (isNameChar(_chLookahead)) { checkhr2(push( &XMLStream::parseName, 2)); _sSubState = 1; // but skip IsStartNameChar test
checkhr2(parseName()); return S_OK; } else { STATE(2); } break;
default: INTERNALERROR; }
//return S_OK;
} ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::parseComment() { // ok, so '<!-' has been parsed so far
HRESULT hr = S_OK; switch (_sSubState) { case 0: //_fWasDTD = _fDTD; // as far as the DTD is concerned, the contents
//_fHandlePE = false; // of a COMMENT are not special.
ADVANCE; // soak up first '-'
checkeof(_chLookahead, XML_E_UNCLOSEDCOMMENT); if (_chLookahead != L'-') { return XML_E_COMMENTSYNTAX; } _sSubState = 1; // fall through
case 1: ADVANCE; // soak up second '-'
mark(); // don't include '<!--' in comment text
_sSubState = 2; // fall through;
case 2: while (! _fEOF) { if (_chLookahead == L'-') { ADVANCE; // soak up first closing L'-'
break; } if (! isCharData(_chLookahead)) return XML_E_BADCHARDATA; ADVANCE; } checkeof(_chLookahead, XML_E_UNCLOSEDCOMMENT); _sSubState = 3; // advance to next state
// fall through.
case 3: if (_chLookahead != L'-') { // Hmmm, must have been a floating L'-' so go back to state 2
STATE(2); } ADVANCE; // soak up second closing L'-'
_sSubState = 4; // fall through
case 4: checkeof(_chLookahead, XML_E_UNCLOSEDCOMMENT); //if (_chLookahead != L'>' && ! _fIE4Quirks)
if (_chLookahead != L'>') { // cannot have floating L'--' unless we are in compatibility mode.
return XML_E_COMMENTSYNTAX; } ADVANCE; // soak up closing L'>'
_lLengthDelta = -3; // don't include L'-->' in PI CDATA.
_nToken = XML_COMMENT; checkhr2(pop()); //_fHandlePE = true;
break;
default: INTERNALERROR; } return S_OK; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::parseName() { HRESULT hr = S_OK; switch (_sSubState) { case 0: if (! isStartNameChar(_chLookahead)) { if (ISWHITESPACE(_chLookahead)) hr = XML_E_UNEXPECTED_WHITESPACE; else hr = XML_E_BADSTARTNAMECHAR; goto CleanUp; } mark(); _sSubState = 1; // fall through
case 1: _lNslen = _lNssep = 0; while (isNameChar(_chLookahead) && !_fEOF) { ADVANCE; } hr = pop(false); // return to the previous state
break;
default: INTERNALERROR; }
CleanUp: return hr; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::parseAttributes() { HRESULT hr = S_OK; switch (_sSubState) { case 0: //_nAttrType = XMLTYPE_CDATA;
_fCheckAttribute = false; checkhr2(push(&XMLStream::skipWhiteSpace, 1)); checkhr2( skipWhiteSpace() ); _sSubState = 1; // fall through
case 1: if (_chLookahead == _chEndChar || _chLookahead == L'>' ) { checkhr2(pop()); // no attributes.
return S_OK; } checkhr2( push( &XMLStream::parseName, 2 ) ); checkhr2( parseName() );
if (!ISWHITESPACE(_chLookahead) && _chLookahead != L'=') { return XML_E_BADNAMECHAR; } _sSubState = 2; // fall through
case 2: if (ISWHITESPACE(_chLookahead)) { // Eq ::= S? '=' S?
STATE(7); }
checkeof(_chLookahead, XML_E_UNCLOSEDSTARTTAG); _nToken = XML_ATTRIBUTE; _sSubState = 3; return S_OK; break;
case 3: if (ISWHITESPACE(_chLookahead)) return XML_E_UNEXPECTED_WHITESPACE; _fWhitespace = false; _sSubState = 4; // fall through
case 4: if (_chLookahead != L'=') { return XML_E_MISSINGEQUALS; } ADVANCE; if (ISWHITESPACE(_chLookahead)) { // allow whitespace between '=' and attribute value.
checkhr2(push(&XMLStream::skipWhiteSpace, 5)); checkhr2( skipWhiteSpace() ); } _sSubState = 5; // fall through
case 5: if (ISWHITESPACE(_chLookahead)) return XML_E_UNEXPECTED_WHITESPACE; if (_chLookahead != L'"' && _chLookahead != L'\'') { return XML_E_MISSINGQUOTE; } _chTerminator = _chLookahead; ADVANCE; mark(); return push(&XMLStream::parseAttrValue, 6); //_sSubState = 6;
// fall through;
case 6: checkeof(_chLookahead, XML_E_UNCLOSEDSTARTTAG); if (_chLookahead == _chEndChar || _chLookahead == L'>') { checkhr2(pop()); return S_OK; } if (! ISWHITESPACE(_chLookahead) ) { return XML_E_MISSINGWHITESPACE; } STATE(0); // go back to state 0
break;
case 7: // allow whitespace between attribute and '='
_lLengthDelta = _pInput->getTokenLength(); checkhr2(push(&XMLStream::skipWhiteSpace, 8)); checkhr2( skipWhiteSpace() ); _sSubState = 8; // fall through
case 8: checkeof(_chLookahead, XML_E_UNCLOSEDSTARTTAG); _lLengthDelta -= _pInput->getTokenLength(); STATE(2); break;
default: INTERNALERROR; } //return hr;
} ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::parseAttrValue() { HRESULT hr = S_OK;
switch (_sSubState) { case 0: _fParsingAttDef = true; // mark beginning of attribute data
_sSubState = 2; // fall through;
case 2: while ( _chLookahead != _chTerminator && _chLookahead != L'<' && ! _fEOF ) { if (_chLookahead == L'&') { // then parse entity ref and then return
// to state 2 to continue with PCDATA.
return push(&XMLStream::parseEntityRef,2); } hr = _pInput->scanPCData(&_chLookahead, &_fWhitespace); if (FAILED(hr)) { if (hr == E_PENDING) { hr = S_OK; ADVANCE; } return hr; } } _sSubState = 3; // fall through
case 3: checkeof(_chLookahead, XML_E_UNCLOSEDSTRING); if (_chLookahead == _chTerminator) { ADVANCE; if (_fReturnAttributeValue) { // return what we have so far - if anything.
if ((_fUsingBuffer && _lBufLen > 0) || _pInput->getTokenLength() > 1) { _lLengthDelta = -1; // don't include string _chTerminator.
_nToken = XML_PCDATA; } } else { _fReturnAttributeValue = true; // reset to default value.
} _fParsingAttDef = false; checkhr2(pop()); return S_OK; } else { return XML_E_BADCHARINSTRING; } break;
default: INTERNALERROR; } //return hr;
} ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::ScanHexDigits() { HRESULT hr = S_OK; while (! _fEOF && _chLookahead != L';') { if (! isHexDigit(_chLookahead)) { return ISWHITESPACE(_chLookahead) ? XML_E_UNEXPECTED_WHITESPACE : XML_E_BADCHARINENTREF; } ADVANCE; } checkeof(_chLookahead, XML_E_UNEXPECTEDEOF); return hr; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::ScanDecimalDigits() { HRESULT hr = S_OK; while (! _fEOF && _chLookahead != L';') { if (! isDigit(_chLookahead)) { return ISWHITESPACE(_chLookahead) ? XML_E_UNEXPECTED_WHITESPACE : XML_E_BADCHARINENTREF; } ADVANCE; } checkeof(_chLookahead, XML_E_UNEXPECTEDEOF); return hr; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::parsePCData() { HRESULT hr = S_OK;
switch (_sSubState) { case 0: _fWhitespace = true; _sSubState = 1; // fall through;
case 1: // This state is used when we are not normalizing white space. This
// is a separate state for performance reasons.
// Normalizing whitespace is about 11% slower.
while (_chLookahead != L'<' && ! _fEOF ) { if (_chLookahead == L'&') { // then parse entity ref and then return
// to state 1 to continue with PCDATA.
return push(&XMLStream::parseEntityRef,1); }
if (_chLookahead == L'>') { WCHAR* pText = NULL; long len = 0; _pInput->getToken((const WCHAR**)&pText, &len); //if (len >= 2 && StrCmpN(L"]]", pText + len - 2, 2) == 0)
if ((len >= 2) && (::FusionpCompareStrings(L"]]", 2, pText + len - 2, 2, false)==0)) return XML_E_INVALID_CDATACLOSINGTAG; } // This slows us down too much.
// else if (! isCharData(_chLookahead))
// {
// return XML_E_BADCHARDATA;
// }
hr = _pInput->scanPCData(&_chLookahead, &_fWhitespace); if (FAILED(hr)) { if (hr == E_PENDING) { hr = S_OK; ADVANCE; } return hr; } checkhr2(hr); } _sSubState = 2; // fall through
case 2: if (_pInput->getTokenLength() > 0 || _fUsingBuffer) { _nToken = _fWhitespace ? XML_WHITESPACE : XML_PCDATA; } checkhr2(pop()); break;
default: INTERNALERROR; } return S_OK; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::parseEntityRef() { HRESULT hr = S_OK; long entityLen = 0, lLen = 1; const WCHAR* t = NULL; long len = 0;
Start: switch (_sSubState) { case 0: // ^ ( '&#' [0-9]+ ) | ('&#X' [0-9a-fA-F]+) | ('&' Name) ';'
_nPreToken = XML_PENDING; _lEntityPos = _pInput->getTokenLength(); // record entity position.
_fPCDataPending = (_lEntityPos > 0);
if (PreEntityText()) { // remember the pending text before parsing the entity.
_nPreToken = _nToken; _nToken = XML_PENDING; } _sSubState = 1; // fall through
case 1: ADVANCE; // soak up the '&'
_sSubState = 2; // fall through
case 2: checkeof(_chLookahead, XML_E_UNEXPECTEDEOF); if (_chLookahead == L'#') { ADVANCE; _sSubState = 3; // fall through
} else { // Loose entity parsing allows "...&6..."
if (! isStartNameChar(_chLookahead)) { /*
if (_fFloatingAmp) { // then it isn't an entity reference, so go back to PCDATA
if (_fUsingBuffer) { // this in case we are normalizing white space.
PushChar(L'&'); } _fWhitespace = false; checkhr2(pop()); return S_OK; } else */ if (ISWHITESPACE(_chLookahead)) return XML_E_UNEXPECTED_WHITESPACE; else return XML_E_BADSTARTNAMECHAR; } checkhr2(push(&XMLStream::parseName, 6)); _sSubState = 1; // avoid doing a mark() so we can return PCDATA if necessary.
return parseName(); } break;
// ------------- Numeric entity references --------------------
case 3: checkeof(_chLookahead, XML_E_UNEXPECTEDEOF); if (_chLookahead == L'x') { // hex character reference.
ADVANCE; STATE(5); // go to state 5
} _sSubState = 4; // fall through
case 4: // '&#' ^ [0-9]+ ';'
checkhr2(ScanDecimalDigits()); if (_chLookahead != L';') { STATE(9); }
entityLen = _pInput->getTokenLength() - _lEntityPos; getToken(&t, &len); checkhr2(DecimalToUnicode(t + _lEntityPos + 2, entityLen - 2, _wcEntityValue)); lLen = 2; _nToken = XML_NUMENTITYREF; GOTOSTART(10); // have to use GOTOSTART() because we want to use the values of t and len
break;
case 5: // '&#X' ^ [0-9a-fA-F]+
checkhr2(ScanHexDigits()); if (_chLookahead != L';') { STATE(9); }
entityLen = _pInput->getTokenLength() - _lEntityPos; getToken(&t, &len); checkhr2(HexToUnicode(t + _lEntityPos + 3, entityLen - 3, _wcEntityValue)); lLen = 3; _nToken = XML_HEXENTITYREF; GOTOSTART(10); // have to use GOTOSTART() because we want to use the values of t and len
break; // ------------- Named Entity References --------------------
case 6: // '&' Name ^ ';'
checkeof(_chLookahead, XML_E_UNEXPECTEDEOF); if (_chLookahead != L';') { STATE(9); }
// If parseName found a namespace then we need to calculate the
// real nslen taking the pending PC data and '&' into account
// and remember this in case we have to return the PCDATA.
_nEntityNSLen = (_lNslen > 0) ? _lNslen - _lEntityPos - 1 : 0; _fUsingBuffer = false;
entityLen = _pInput->getTokenLength() - _lEntityPos; getToken(&t, &len);
if (0 != (_wcEntityValue = BuiltinEntity(t + _lEntityPos + 1, entityLen - 1))) //||
//(_fIE4Quirks && 0xFFFF != (_wcEntityValue = LookupBuiltinEntity(t + _lEntityPos + 1, entityLen - 1))))
{ lLen = 1; _nToken = XML_BUILTINENTITYREF; GOTOSTART(10); // have to use GOTOSTART() because we want to use the values of t and len
} else //xiaoyu : Fusion XML Parser does not support external ref,
// so, if it is not a builtIn ref, we would return error
return XML_E_MISSINGSEMICOLON; break; //xiaoyu : Fusion XML Parser does not support external ref
/*
if (_nPreToken != XML_PENDING) { // Return previous token (XML_PCDATA or XML_WHITESPACE)
_lLengthDelta = -entityLen; _lMarkDelta = entityLen - 1; // don't include '&' in _nToken.
_nToken = _nPreToken; STATE(7); }
mark(entityLen-1); // don't include '&' in _nToken.
_sSubState = 7; // fall through
case 7: ADVANCE; // soak up the ';'
_nToken = XML_ENTITYREF; _lNslen = _nEntityNSLen; _lLengthDelta = -1; // don't include the ';'
STATE(8); // return token and resume in state 8.
break; */ case 8: mark(); checkhr2(pop()); return S_OK; /*
case 9: // Soft entity handling - we just continue with PCDATA in
// this case.
if (_fFloatingAmp) { if (_fUsingBuffer) { // this in case we are normalizing white space. In this case
// we have to copy what we have so far to the normalized buffer.
long endpos = _pInput->getTokenLength(); const WCHAR* t; long len; getToken(&t, &len); for (long i = _lEntityPos; i < endpos; i++) PushChar(t[i]); } _fWhitespace = false; checkhr2(pop()); return S_OK; } else return XML_E_MISSINGSEMICOLON; break; */
case 10: // Return the text before builtin or char entityref as XML_PCDATA
if (_nPreToken) { _nPreToken = _nToken; _nToken = XML_PCDATA; _lLengthDelta = -entityLen; _lMarkDelta = entityLen - lLen; // don't include '&' in _nToken.
STATE(11); // return token and resume in state 12.
} else { _nPreToken = _nToken; mark(entityLen - lLen); GOTOSTART(11); } break;
case 11: // push the builtin entity
_fUsingBuffer = true; PushChar(_wcEntityValue); _nToken = _nPreToken; STATE(12); // return token and resume in state 12.
break;
case 12: ADVANCE; // soak up the ';'
STATE(8); // resume in state 8.
break;
default: INTERNALERROR; } return S_OK; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::pushTable(short substate, const StateEntry* table, DWORD le) { HRESULT hr = S_OK;
checkhr2(push(&XMLStream::parseTable, substate)); _pTable = table; UNUSED(le); //_lEOFError = le;
return hr; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::push(StateFunc f, short s) { StateInfo* pSI = _pStack.push(); if (pSI == NULL) return E_OUTOFMEMORY;
pSI->_sSubState = s; pSI->_fnState = _fnState; pSI->_pTable = _pTable; pSI->_cStreamDepth = _cStreamDepth;
_sSubState = 0; _fnState = f;
return S_OK; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::pop(bool boundary) { StateInfo* pSI = _pStack.peek();
// prefix bug fix : xiaoyuw@08/29/00
ASSERT_NTC(pSI != NULL);
if (_fDTD && ! (_fParsingAttDef) && boundary && _cStreamDepth != pSI->_cStreamDepth) // _fParsingNames ||
{ // If we are in a PE and we are popping out to a state that is NOT in a PE
// and this is a pop where we need to check this condition, then return an error.
// For example, the following is not well formed because the parameter entity
// pops us out of the ContentModel state in which the PE was found:
// <!DOCTYPE foo [
// <!ENTITY % foo "a)">
// <!ELEMENT bar ( %foo; >
// ]>...
return XML_E_PE_NESTING; }
_fnState = pSI->_fnState; _sSubState = pSI->_sSubState; _pTable = pSI->_pTable; //_lEOFError = pSI->_lEOFError;
_pStack.pop();
return S_OK; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::switchTo(StateFunc f) { HRESULT hr;
// Make sure we keep the old stream depth.
StateInfo* pSI = _pStack.peek();
// prefix bug fix : xiaoyuw@08/29/00
ASSERT_NTC(pSI != NULL); int currentDepth = _cStreamDepth; _cStreamDepth = pSI->_cStreamDepth;
checkhr2(pop(false)); checkhr2(push(f,_sSubState)); // keep return to _sSubState the same
_cStreamDepth = currentDepth;
return (this->*f)(); } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::parseCondSect() { HRESULT hr = S_OK; switch (_sSubState) { case 0: ADVANCE; // soak up the '[' character
//if (_fFoundPEREf) return S_OK;
_sSubState = 1; // fall through
case 1: // now match magic '[CDATA[' sequence.
checkeof(_chLookahead, XML_E_UNCLOSEDMARKUPDECL); if (_chLookahead == L'C') { _pchCDataState = g_pstrCDATA; STATE(5); // goto state 5
} _sSubState = 2; // must be IGNORE, INCLUDE or %pe;
// fall through
case 2: // must be DTD markup declaration
// '<![' ^ S? ('INCLUDE' | 'IGNORE' | %pe;) S? [...]]> or
// skip optional whitespace
//if (_fInternalSubset)
// return XML_E_CONDSECTINSUBSET;
checkeof(_chLookahead, XML_E_EXPECTINGOPENBRACKET); checkhr2(push(&XMLStream::skipWhiteSpace, 3)); return skipWhiteSpace(); // must return because of %pe;
case 3: checkeof(_chLookahead, XML_E_UNCLOSEDMARKUPDECL); checkhr2(push(&XMLStream::parseName,4)); return parseName();
case 4: // scanned 'INCLUDE' or 'IGNORE'
{ const WCHAR* t = NULL; long len = 0; getToken(&t,&len); //if (StringEquals(L"IGNORE",t,len,false))
//{
// return switchTo(&XMLStream::parseIgnoreSect);
//}
//else if (StringEquals(L"INCLUDE",t,len,false))
//{
// return switchTo(&XMLStream::parseIncludeSect);
//}
//else
return XML_E_BADENDCONDSECT; } break;
case 5: // parse CDATA name
while (*_pchCDataState != 0 && _chLookahead == *_pchCDataState && ! _fEOF) { ADVANCE; // advance first, before incrementing _pchCDataState
_pchCDataState++; // so that this state is re-entrant in the E_PENDING case.
checkeof(_chLookahead, XML_E_UNCLOSEDMARKUPDECL); } if (*_pchCDataState != 0) { // must be INCLUDE or IGNORE section so go to state 2.
_sSubState = 2; } else if (_chLookahead != L'[') { return XML_E_EXPECTINGOPENBRACKET; } else if (_fDTD) return XML_E_CDATAINVALID; else return switchTo(&XMLStream::parseCData);
return S_OK; break;
default: INTERNALERROR; } return S_OK; }
////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::parseCData() { HRESULT hr = S_OK; switch (_sSubState) { case 0: ADVANCE; // soak up the '[' character.
mark(); // don't include 'CDATA[' in CDATA text
_sSubState = 1; // fall through
case 1: while (_chLookahead != L']' && ! _fEOF) { // scanPCData will stop when it sees a ']' character.
hr = _pInput->scanPCData(&_chLookahead, &_fWhitespace); if (FAILED(hr)) { if (hr == E_PENDING) { hr = S_OK; ADVANCE; } return hr; } } checkeof(_chLookahead, XML_E_UNCLOSEDCDATA); _sSubState = 2; // fall through
case 2: ADVANCE; // soak up first L']' character.
checkeof(_chLookahead, XML_E_UNCLOSEDCDATA); if (_chLookahead != L']') { // must have been floating ']' character, so
// return to state 1.
STATE(1); } _sSubState = 3; // fall through
case 3: ADVANCE; // soak up second ']' character.
checkeof(_chLookahead, XML_E_UNCLOSEDCDATA); if (_chLookahead == L']') { // Ah, an extra ']' character, tricky !!
// In this case we stay in state 3 until we find a non ']' character
// so you can terminate a CDATA section with ']]]]]]]]]]]]]]]]>'
// and everying except the final ']]>' is treated as CDATA.
STATE(3); } else if (_chLookahead != L'>') { // must have been floating "]]" pair, so
// return to state 1.
STATE(1); } _sSubState = 4; // fall through
case 4: ADVANCE; // soak up the '>'
_nToken = XML_CDATA; _lLengthDelta = -3; // don't include terminating ']]>' in text.
checkhr2(pop()); // return to parseContent.
return S_OK; break;
default: INTERNALERROR; } return S_OK; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::parseEquals() { HRESULT hr = S_OK; switch (_sSubState) { case 0: // Eq ::= S? '=' S?
if (ISWHITESPACE(_chLookahead)) { // allow whitespace between attribute and '='
checkhr2(push(&XMLStream::skipWhiteSpace, 1)); checkhr2( skipWhiteSpace() ); } _sSubState = 1; // fall through
case 1: if (_chLookahead != L'=') { return XML_E_MISSINGEQUALS; } ADVANCE; if (ISWHITESPACE(_chLookahead)) { // allow whitespace between '=' and attribute value.
checkhr2(push(&XMLStream::skipWhiteSpace, 2)); checkhr2( skipWhiteSpace() ); } _sSubState = 2; // fall through
case 2: checkhr2(pop(false)); break;
default: INTERNALERROR;
} return S_OK; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::parseTable() { HRESULT hr = S_OK;
while (hr == S_OK && _nToken == XML_PENDING) { const StateEntry* pSE = &_pTable[_sSubState];
DWORD newState = pSE->_sGoto;
switch (pSE->_sOp) { case OP_WS: //checkeof(_chLookahead, _lEOFError);
if (! ISWHITESPACE(_chLookahead)) return XML_E_MISSINGWHITESPACE; // fall through
case OP_OWS: //checkeof(_chLookahead, _lEOFError);
checkhr2(push(&XMLStream::skipWhiteSpace, (short)newState)); checkhr2(skipWhiteSpace()); //if (_fFoundPEREf) return XML_E_FOUNDPEREF;
break; case OP_CHARWS: //if (_fFoundPEREf) return S_OK;
mark(); //checkeof(_chLookahead, _lEOFError);
if (_chLookahead == pSE->_pch[0]) { ADVANCE; newState = pSE->_sGoto; _nToken = pSE->_lDelta; } else if (! ISWHITESPACE(_chLookahead)) { return XML_E_WHITESPACEORQUESTIONMARK; } else newState = pSE->_sArg1; break; case OP_CHAR: //if (_fFoundPEREf) return S_OK;
mark(); case OP_CHAR2: //if (_fFoundPEREf) return S_OK;
//checkeof(_chLookahead, _lEOFError);
if (_chLookahead == pSE->_pch[0]) { ADVANCE; newState = pSE->_sGoto; _nToken = pSE->_lDelta; //if (_nToken == XML_GROUP)
//_nAttrType = XMLTYPE_NMTOKEN;
} else { newState = pSE->_sArg1; if (newState >= XML_E_PARSEERRORBASE && ISWHITESPACE(_chLookahead)) return XML_E_UNEXPECTED_WHITESPACE; } break; case OP_PEEK: //if (_fFoundPEREf) return S_OK;
//checkeof(_chLookahead, _lEOFError);
if (_chLookahead == pSE->_pch[0]) { newState = pSE->_sGoto; } else newState = pSE->_sArg1; break;
case OP_NAME: //if (_fFoundPEREf) return S_OK;
//checkeof(_chLookahead, _lEOFError);
checkhr2(push(&XMLStream::parseName, (short)newState)); checkhr2(parseName()); break; case OP_TOKEN: _nToken = pSE->_sArg1; _lLengthDelta = pSE->_lDelta; break; case OP_POP: _lLengthDelta = pSE->_lDelta; if (_lLengthDelta == 0) mark(); // The _lDelta field contains a boolean flag to tell us whether this
// pop needs to check for parameter entity boundary or not.
checkhr2(pop(pSE->_lDelta == 0)); // we're done !
_nToken = pSE->_sArg1; //_nAttrType = XMLTYPE_CDATA;
return S_OK; case OP_STRCMP: { const WCHAR* t = NULL; long len = 0; getToken(&t,&len); long delta = (pSE->_lDelta < 0) ? pSE->_lDelta : 0; //if (StringEquals(pSE->_pch,t,len+delta,_fCaseInsensitive))
if (::FusionpCompareStrings(pSE->_pch, len+delta, t, len+delta, _fCaseInsensitive)==0) { if (pSE->_lDelta > 0) { _nToken = pSE->_lDelta; _lLengthDelta = 0; }
newState = pSE->_sGoto; } else newState = pSE->_sArg1; } break;
case OP_COMMENT: return push(&XMLStream::parseComment, (short)newState); break;
case OP_CONDSECT: //if (_fFoundPEREf) return S_OK;
// parse <![CDATA[...]]> or <![IGNORE[...]]>
return push(&XMLStream::parseCondSect, (short)newState);
case OP_SNCHAR: //checkeof(_chLookahead, _lEOFError);
if (isStartNameChar(_chLookahead)) { newState = pSE->_sGoto; } else newState = pSE->_sArg1; break; case OP_EQUALS: //if (_fFoundPEREf) return S_OK;
//checkeof(_chLookahead, _lEOFError);
checkhr2(push(&XMLStream::parseEquals, (short)newState)); checkhr2(parseEquals()); break; case OP_ENCODING: { const WCHAR* t = NULL; // prefix bug fix, xiaoyuw@08/29/00
long len = 0; // prefix bug fix, xiaoyuw@08/29/00
checkhr2(_pInput->getToken(&t,&len)); checkhr2(_pInput->switchEncoding(t, len+pSE->_lDelta)); } break;
case OP_ATTRVAL: //if (_fFoundPEREf) return S_OK;
if (_chLookahead != L'"' && _chLookahead != L'\'') { return XML_E_MISSINGQUOTE; } _chTerminator = _chLookahead; ADVANCE; mark(); _fReturnAttributeValue = (pSE->_sArg1 == 1); //checkeof(_chLookahead, _lEOFError);
return push(&XMLStream::parseAttrValue, (short)newState); break;
} // end of switch
if (_fnState != &XMLStream::parseTable) return S_OK;
if (newState >= XML_E_PARSEERRORBASE) return (HRESULT)newState; else _sSubState = (short)newState; } // end of while
if (_nToken == XMLStream::XML_ENDDECL) { return _pInput->UnFreeze(); } return S_OK; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::_PushChar(WCHAR ch) { // buffer needs to grow.
long newsize = (_lBufSize+512)*2 ; WCHAR* newbuf = NEW ( WCHAR[newsize]); if (newbuf == NULL) return E_OUTOFMEMORY;
if (_pchBuffer != NULL){ ::memcpy(newbuf, _pchBuffer, sizeof(WCHAR)*_lBufLen); delete[] _pchBuffer; }
_lBufSize = newsize; _pchBuffer = newbuf; _pchBuffer[_lBufLen++] = ch; return S_OK; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::AdvanceTo(short substate) { // This method combines and advance with a state switch in one
// atomic operation that handles the E_PENDING case properly.
_sSubState = substate;
//HRESULT hr = (!_fDTD) ? _pInput->nextChar(&_chLookahead, &_fEOF) : DTDAdvance();
HRESULT hr = _pInput->nextChar(&_chLookahead, &_fEOF); if ((hr == static_cast<HRESULT>(E_PENDING)) || (hr == static_cast<HRESULT>(E_DATA_AVAILABLE)) || (hr == static_cast<HRESULT>(E_DATA_REALLOCATE)) || (hr == static_cast<HRESULT>(XML_E_FOUNDPEREF))) { // Then we must do an advance next time around before continuing
// with previous state. Push will save the _sSubState and return
// to it.
push(&XMLStream::firstAdvance,substate); } return hr; } ////////////////////////////////////////////////////////////////////////
bool XMLStream::PreEntityText() { // This is a helper function that calculates whether or not to
// return some PCDATA or WHITEPACE before an entity reference.
if (_fPCDataPending) { // return what we have so far.
//if (_fWhitespace && ! _fIE4Quirks) // in IE4 mode we do not have WHITESPACE nodes
// and entities are always resolved, so return
// the leading whitespace as PCDATA.
if (_fWhitespace ) _nToken = XML_WHITESPACE; else _nToken = XML_PCDATA;
long entityLen = _pInput->getTokenLength() - _lEntityPos; _lLengthDelta = -entityLen; _lMarkDelta = entityLen; _fPCDataPending = false; _fWhitespace = true; return true; }
return false; } ////////////////////////////////////////////////////////////////////////
HRESULT XMLStream::ErrorCallback(HRESULT hr) { if (hr == static_cast<HRESULT>(E_DATA_AVAILABLE)) hr = static_cast<HRESULT>(XML_DATAAVAILABLE); else if (hr == static_cast<HRESULT>(E_DATA_REALLOCATE)) hr = static_cast<HRESULT>(XML_DATAREALLOCATE); return _pXMLParser->ErrorCallback(hr); }
|