You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1294 lines
38 KiB
1294 lines
38 KiB
/////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// fusion\xmlparser\xmlparser.cxx
|
|
// just commend "SysFreeString" and SysAllocString()
|
|
//
|
|
/////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "stdinc.h"
|
|
#include "core.hxx"
|
|
#include "xmlhelper.hxx"
|
|
#pragma hdrstop
|
|
#include "xmlparser.hxx"
|
|
#include "xmlstream.hxx"
|
|
#include <objbase.h>
|
|
#include "fusioninitializecriticalsection.h"
|
|
|
|
const USHORT STACK_INCREMENT=10;
|
|
|
|
#define PUSHNODEINFO(pNodeInfo)\
|
|
if (_cNodeInfoAllocated == _cNodeInfoCurrent)\
|
|
{\
|
|
checkhr2(GrowNodeInfo());\
|
|
}\
|
|
_paNodeInfo[_cNodeInfoCurrent++] = _pCurrent;
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
XMLParser::XMLParser()
|
|
: _pDownloads(1),
|
|
_pStack(STACK_INCREMENT),
|
|
_pTokenizer(NULL),
|
|
_pCurrent(NULL),
|
|
_lCurrentElement(0),
|
|
_paNodeInfo(NULL),
|
|
_cNodeInfoAllocated(0),
|
|
_cNodeInfoCurrent(0),
|
|
_pdc(NULL),
|
|
_usFlags(NULL),
|
|
_fCaseInsensitive(false),
|
|
_bstrError(NULL),
|
|
_fRunEntryCount(0),
|
|
_fInsideRun(false),
|
|
_cAttributes(0),
|
|
_pRoot(NULL),
|
|
_fLastError(S_OK),
|
|
_fStopped(false),
|
|
_fSuspended(false),
|
|
_fStarted(false),
|
|
_fWaiting(false),
|
|
_dwSafetyOptions(0)
|
|
{
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT
|
|
XMLParser::HrInitialize()
|
|
{
|
|
HRESULT hr = NOERROR;
|
|
return hr;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
XMLParser::~XMLParser()
|
|
{
|
|
Reset();
|
|
|
|
// Cleanup tagname buffers in context for good this time...
|
|
for (long i = _pStack.size()-1; i>=0; i--)
|
|
{
|
|
MY_XML_NODE_INFO* pNodeInfo = _pStack[i];
|
|
if (pNodeInfo->_pwcTagName != NULL)
|
|
{
|
|
delete [] pNodeInfo->_pwcTagName;
|
|
pNodeInfo->_pwcTagName = NULL;
|
|
pNodeInfo->_ulBufLen = 0;
|
|
}
|
|
// NULL out the node pointer in case it point's to a GC'd object :-)
|
|
pNodeInfo->pNode = NULL;
|
|
}
|
|
|
|
delete[] _paNodeInfo;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT STDMETHODCALLTYPE
|
|
XMLParser::QueryInterface(REFIID riid, void ** ppvObject)
|
|
{
|
|
// Since this one class implements both IXMLNodeSource and
|
|
// IXMLParser, we must override QueryInterface since the
|
|
// IUnknown template doesn't know about the IXMLNodeSource
|
|
// interface.
|
|
|
|
if (ppvObject == NULL)
|
|
return E_INVALIDARG;
|
|
|
|
HRESULT hr = S_OK;
|
|
if (riid == IID_IXMLNodeSource || riid == IID_Parser)
|
|
{
|
|
*ppvObject = static_cast<IXMLNodeSource*>(this);
|
|
AddRef();
|
|
}
|
|
else
|
|
{
|
|
hr = _unknown<IXMLParser, &IID_IXMLParser>::QueryInterface(riid, ppvObject);
|
|
}
|
|
return hr;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
ULONG STDMETHODCALLTYPE
|
|
XMLParser::AddRef(void)
|
|
{
|
|
return _unknown<IXMLParser, &IID_IXMLParser>::AddRef();
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
ULONG STDMETHODCALLTYPE
|
|
XMLParser::Release(void)
|
|
{
|
|
return _unknown<IXMLParser, &IID_IXMLParser>::Release();
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT STDMETHODCALLTYPE
|
|
XMLParser::SetInput(IUnknown *pStm)
|
|
{
|
|
if (pStm == NULL)
|
|
return E_INVALIDARG;
|
|
|
|
if (_pDownloads.used() == 0)
|
|
init();
|
|
HRESULT hr = S_OK;
|
|
|
|
checkhr2(PushTokenizer());
|
|
|
|
// Get the url path
|
|
// Continue even if we cannot get it
|
|
IStream * pStream = NULL;
|
|
hr = pStm->QueryInterface(IID_IStream, (void**)&pStream);
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = PushStream(pStream, false);
|
|
pStream->Release();
|
|
}
|
|
return hr;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT STDMETHODCALLTYPE
|
|
XMLParser::PushData(
|
|
/* [in] */ const char __RPC_FAR *pData,
|
|
/* [in] */ ULONG ulChars,
|
|
/* [in] */ BOOL fLastBuffer)
|
|
{
|
|
return E_NOTIMPL;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT STDMETHODCALLTYPE
|
|
XMLParser::SetFactory(IXMLNodeFactory __RPC_FAR *pNodeFactory)
|
|
{
|
|
//STACK_ENTRY;
|
|
|
|
_pFactory = pNodeFactory;
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT STDMETHODCALLTYPE
|
|
XMLParser::GetFactory(IXMLNodeFactory** ppNodeFactory)
|
|
{
|
|
if (ppNodeFactory == NULL) return E_INVALIDARG;
|
|
if (_pFactory)
|
|
{
|
|
*ppNodeFactory = _pFactory;
|
|
(*ppNodeFactory)->AddRef();
|
|
}
|
|
else
|
|
{
|
|
*ppNodeFactory = NULL;
|
|
}
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT STDMETHODCALLTYPE
|
|
XMLParser::Run(long lChars)
|
|
{
|
|
HRESULT hr = NOERROR;
|
|
|
|
FN_TRACE_HR(hr);
|
|
|
|
XML_NODE_INFO info;
|
|
XML_NODE_INFO* aNodeInfo[1];
|
|
|
|
USHORT numRecs;
|
|
|
|
bool fIsAttribute = false;
|
|
bool stop = false;
|
|
|
|
if (_fSuspended)
|
|
_fSuspended = FALSE; // caller must want to resume.
|
|
|
|
if (_pFactory == NULL)
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XMLParser::Run() failing because _pFactory is NULL\n");
|
|
|
|
hr = E_FAIL;
|
|
goto Exit;
|
|
}
|
|
|
|
if (_fStopped)
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XMLParser::Run() failing because _fStopped is set\n");
|
|
|
|
hr = XML_E_STOPPED;
|
|
goto Exit;
|
|
}
|
|
|
|
if (_pTokenizer == NULL)
|
|
{
|
|
if (_fLastError != S_OK)
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XMLParser::Run() failing because _pTokenizer == NULL and _fLastError != S_OK (== 0x%08lx)\n", _fLastError);
|
|
|
|
hr = _fLastError;
|
|
goto Exit;
|
|
}
|
|
else
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XMLParser::Run() failing because _pTokenizer == NULL and _fLastError == S_OK\n");
|
|
|
|
// must be _fStarted == false
|
|
hr = XMLPARSER_IDLE;
|
|
goto Exit;
|
|
}
|
|
}
|
|
|
|
// Check for recurrsive entry and whether caller actually
|
|
// wants anything parsed.
|
|
if (_fInsideRun || lChars == 0)
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XMLParser::Run() failing because _fInsideRun is set or lChars == 0\n");
|
|
|
|
hr = E_PENDING;
|
|
goto Exit;
|
|
}
|
|
{
|
|
BoolLock flock(&_fInsideRun);
|
|
|
|
if (_fLastError != 0)
|
|
{
|
|
// one more chance to cleanup the parser stack.
|
|
hr = _fLastError;
|
|
goto cleanup_stack;
|
|
}
|
|
|
|
if (! _fStarted)
|
|
{
|
|
_fStarted = true;
|
|
hr = _pFactory->NotifyEvent(this, XMLNF_STARTDOCUMENT);
|
|
if (_fStopped){ // watch for onReadyStateChange handlers
|
|
hr = S_OK; // fussing with the parser state.
|
|
goto Exit;
|
|
}
|
|
}
|
|
|
|
_fWaiting = false;
|
|
if (_fPendingBeginChildren)
|
|
{
|
|
_fPendingBeginChildren = false;
|
|
hr = _pFactory->BeginChildren(this, (XML_NODE_INFO*)_pCurrent);
|
|
}
|
|
if (_fPendingEndChildren)
|
|
{
|
|
_fPendingEndChildren = false;
|
|
hr = _pFactory->EndChildren(this, TRUE, (XML_NODE_INFO*)_pCurrent);
|
|
if (FAILED(hr))
|
|
hr = pop(); // no match needed
|
|
}
|
|
|
|
info.dwSize = sizeof(XML_NODE_INFO);
|
|
info.dwType = XMLStream::XML_PENDING;
|
|
info.dwSubType = 0;
|
|
info.pwcText = NULL;
|
|
info.ulLen = 0;
|
|
info.ulNsPrefixLen = 0;
|
|
info.pNode = NULL;
|
|
info.pReserved = NULL;
|
|
aNodeInfo[0] = &info;
|
|
|
|
more:
|
|
_fRunEntryCount++; // count of callers inside this loop...
|
|
|
|
while (hr == 0 && ! _fSuspended)
|
|
{
|
|
info.dwSubType = 0;
|
|
|
|
// The XMLStream error codes have been aligned with the
|
|
// XMLParser error code so no mapping is necessary.
|
|
hr = _pTokenizer->GetNextToken(&info.dwType, (const WCHAR **)&info.pwcText, (long*)&info.ulLen, (long*)&info.ulNsPrefixLen);
|
|
if (hr == E_PENDING)
|
|
{
|
|
_fWaiting = true;
|
|
break;
|
|
}
|
|
|
|
if (! _fFoundNonWS &&
|
|
info.dwType != XMLStream::XML_PENDING &&
|
|
info.dwType != XML_WHITESPACE &&
|
|
info.dwType != XML_XMLDECL)
|
|
{
|
|
_fFoundNonWS = true;
|
|
}
|
|
|
|
// Now the NodeType is the same as the XMLToken value. We set
|
|
// this up by aligning the two enums.
|
|
switch (info.dwType)
|
|
{
|
|
case 0:
|
|
break;
|
|
// --------- Container Nodes -------------------
|
|
case XML_XMLDECL:
|
|
if (_fFoundNonWS)
|
|
{
|
|
hr = XML_E_BADXMLDECL;
|
|
break;
|
|
}
|
|
goto containers;
|
|
|
|
case XML_ATTRIBUTE:
|
|
fIsAttribute = true;
|
|
goto containers;
|
|
|
|
case XML_VERSION:
|
|
info.dwSubType = info.dwType;
|
|
info.dwType = XML_ATTRIBUTE;
|
|
_fGotVersion = true;
|
|
fIsAttribute = true;
|
|
goto containers;
|
|
|
|
case XML_STANDALONE:
|
|
case XML_ENCODING:
|
|
if (! _fGotVersion && _pDownloads.used() == 1)
|
|
{
|
|
hr = XML_E_EXPECTING_VERSION;
|
|
break;
|
|
}
|
|
if (info.dwType == XML_STANDALONE)
|
|
{
|
|
if (_pDownloads.used() > 1)
|
|
{
|
|
hr = XML_E_UNEXPECTED_STANDALONE;
|
|
break;
|
|
}
|
|
}
|
|
info.dwSubType = info.dwType;
|
|
info.dwType = XML_ATTRIBUTE;
|
|
fIsAttribute = true;
|
|
goto containers;
|
|
// fall through
|
|
case XML_ELEMENT:
|
|
containers:
|
|
if (_fRootLevel)
|
|
{
|
|
// Special rules apply for root level tags.
|
|
if (info.dwType == XML_ELEMENT)
|
|
{
|
|
// This is a root level element.
|
|
if (! _fFoundRoot)
|
|
{
|
|
_fFoundRoot = true;
|
|
}
|
|
else
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XML Parser has found multiple roots in the document which is an error.\n");
|
|
|
|
hr = XML_E_MULTIPLEROOTS;
|
|
break;
|
|
}
|
|
}
|
|
else if (info.dwType != XML_PI &&
|
|
info.dwType != XML_XMLDECL &&
|
|
info.dwType != XML_DOCTYPE)
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XML Parser has found an initial element which is not valid at the root level.\n");
|
|
|
|
hr = XML_E_INVALIDATROOTLEVEL;
|
|
break;
|
|
}
|
|
}
|
|
|
|
info.fTerminal = FALSE;
|
|
|
|
if (fIsAttribute)
|
|
{
|
|
breakhr( pushAttribute(info));
|
|
fIsAttribute = false;
|
|
}
|
|
else
|
|
{
|
|
breakhr( push(info));
|
|
}
|
|
break;
|
|
case XML_PCDATA:
|
|
case XML_CDATA:
|
|
terminals:
|
|
// Special rules apply for root level tags.
|
|
if (_fRootLevel)
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XML Parser has found PCDATA at the root level which is not valid XML.\n");
|
|
|
|
hr = XML_E_INVALIDATROOTLEVEL;
|
|
break;
|
|
}
|
|
// fall through
|
|
case XML_COMMENT:
|
|
case XML_WHITESPACE:
|
|
tcreatenode:
|
|
info.fTerminal = TRUE;
|
|
if (_cAttributes != 0)
|
|
{
|
|
// We are inside the attribute list, so we need to push this.
|
|
hr = pushAttributeValue(info);
|
|
break;
|
|
}
|
|
hr = _pFactory->CreateNode(this, _pNode, 1, aNodeInfo);
|
|
info.pNode = NULL;
|
|
break;
|
|
|
|
case XML_ENTITYREF:
|
|
if (_fRootLevel)
|
|
{
|
|
hr = XML_E_INVALIDATROOTLEVEL;
|
|
break;
|
|
}
|
|
|
|
// We handle builtin entities and char entities in xmlstream
|
|
// so these must be user defined entity, so treat it like a regular terminal node.
|
|
goto terminals;
|
|
break;
|
|
|
|
case XMLStream::XML_BUILTINENTITYREF:
|
|
case XMLStream::XML_HEXENTITYREF:
|
|
case XMLStream::XML_NUMENTITYREF:
|
|
// pass real entityref type as subtype so we can publish these
|
|
// subtypes eventually.
|
|
info.dwSubType = info.dwType; // XML_ENTITYREF;
|
|
info.dwType = XML_PCDATA;
|
|
|
|
if (_cAttributes == 0)
|
|
{
|
|
goto tcreatenode;
|
|
}
|
|
|
|
// We are inside the attribute list, so we need to push this.
|
|
info.fTerminal = TRUE;
|
|
hr = pushAttributeValue(info);
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = CopyText(_pCurrent);
|
|
}
|
|
break;
|
|
|
|
case XMLStream::XML_TAGEND: // ">"
|
|
numRecs = 1+_cAttributes;
|
|
if (_cAttributes != 0) // this is safe because _rawstack does NOT reclaim
|
|
{ // the popped stack entries.
|
|
popAttributes();
|
|
}
|
|
hr = _pFactory->CreateNode(this, _pNode, numRecs, (XML_NODE_INFO **)&_paNodeInfo[_lCurrentElement]);
|
|
_pNode = _pCurrent->pNode;
|
|
if (FAILED(hr))
|
|
{
|
|
_fPendingBeginChildren = true;
|
|
break;
|
|
}
|
|
breakhr( _pFactory->BeginChildren(this, (XML_NODE_INFO*)_pCurrent));
|
|
break;
|
|
|
|
// The ENDXMLDECL is like EMPTYENDTAGs since we've been
|
|
// buffering up their attributes, and we have still got to call CreateNode.
|
|
case XMLStream::XML_ENDXMLDECL:
|
|
_fGotVersion = false; // reset back to initial state.
|
|
// fall through.
|
|
case XMLStream::XML_EMPTYTAGEND:
|
|
numRecs = 1+_cAttributes;
|
|
if (_cAttributes != 0)
|
|
{
|
|
popAttributes();
|
|
}
|
|
hr = _pFactory->CreateNode(this, _pNode, numRecs, (XML_NODE_INFO **)&_paNodeInfo[_lCurrentElement]);
|
|
if (FAILED(hr))
|
|
{
|
|
_fPendingEndChildren = true;
|
|
break;
|
|
}
|
|
breakhr(_pFactory->EndChildren(this, TRUE, (XML_NODE_INFO*)_pCurrent));
|
|
breakhr(pop()); // no match needed
|
|
break;
|
|
|
|
case XMLStream::XML_ENDTAG: // "</"
|
|
if (_pStack.used() == 0)
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XML Parser has found an unexpected end tag.\n");
|
|
|
|
hr = XML_E_UNEXPECTEDENDTAG;
|
|
}
|
|
else
|
|
{
|
|
XML_NODE_INFO* pCurrent = (XML_NODE_INFO*)_pCurrent; // save current record
|
|
breakhr(pop(info.pwcText, info.ulLen)); // check tag/match
|
|
breakhr(_pFactory->EndChildren(this, FALSE, (XML_NODE_INFO*)pCurrent));
|
|
}
|
|
break;
|
|
|
|
case XMLStream::XML_ENDPROLOG:
|
|
// For top level document only, (not for DTD's or
|
|
// entities), call EndProlog on the node factory.
|
|
if (_fRootLevel && ! _pdc->_fEntity && ! _pdc->_fDTD)
|
|
breakhr( _pFactory->NotifyEvent(this, XMLNF_ENDPROLOG));
|
|
break;
|
|
|
|
default:
|
|
hr = E_FAIL;
|
|
break; // break from switch()
|
|
}
|
|
}
|
|
_fRunEntryCount--;
|
|
|
|
stop = false;
|
|
if (hr == static_cast<HRESULT>(XML_E_ENDOFINPUT))
|
|
{
|
|
hr = S_OK;
|
|
bool inDTD = _pdc->_fDTD;
|
|
bool inEntity = _pdc->_fEntity;
|
|
bool inPEReference = _pdc->_fPEReference;
|
|
|
|
if (inEntity && _pdc->_fDepth != _pStack.used())
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XML Parser found unclosed tags at the end of the input stream.\n");
|
|
|
|
// Entity itself was unbalanced.
|
|
hr = ReportUnclosedTags(_pdc->_fDepth);
|
|
}
|
|
else if (PopDownload() == S_OK)
|
|
{
|
|
// then we must have just finished a DTD and we still have more to do
|
|
// BUGBUG -- need to check that entity is well formed, i.e. no tags
|
|
// left open.
|
|
|
|
if (!inPEReference)
|
|
{
|
|
if (inEntity)
|
|
{
|
|
hr = _pFactory->NotifyEvent(this, XMLNF_ENDENTITY);
|
|
}
|
|
else if (inDTD)
|
|
{
|
|
hr = _pFactory->NotifyEvent(this, XMLNF_ENDDTD);
|
|
}
|
|
}
|
|
if (FAILED(hr))
|
|
{
|
|
goto cleanup_stack;
|
|
}
|
|
|
|
// In a synchronous DTD download, there is another parser
|
|
// parser Run() call on the stack above us, so let's return
|
|
// back to that Run method so we don't complete the parsing
|
|
// out from under it.
|
|
if (_fRunEntryCount > 0){
|
|
hr = S_OK;
|
|
goto Exit;
|
|
}
|
|
|
|
if (_fStopped){
|
|
hr = S_OK;
|
|
goto Exit;
|
|
}
|
|
goto more;
|
|
}
|
|
else
|
|
{
|
|
if (_pStack.used() > 0)
|
|
{
|
|
hr = ReportUnclosedTags(0);
|
|
}
|
|
else if (! _fFoundRoot)
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XML Parser has found no root in the document.\n");
|
|
|
|
hr = XML_E_MISSINGROOT;
|
|
}
|
|
stop = true;
|
|
}
|
|
}
|
|
|
|
cleanup_stack:
|
|
|
|
if (hr != S_OK && hr != E_PENDING)
|
|
{
|
|
stop = true;
|
|
_fLastError = hr;
|
|
|
|
// Pass all the XML_NODE_INFO structs to the Error function so the client
|
|
// gets a chance to cleanup the PVOID pNode fields.
|
|
HRESULT edr = _pFactory->Error(this, hr,
|
|
(USHORT)(_paNodeInfo ? _lCurrentElement+1 : 0), (XML_NODE_INFO**)_paNodeInfo);
|
|
if (edr != 0)
|
|
_fLastError = hr;
|
|
}
|
|
|
|
if (stop && ! _fStopped)
|
|
{
|
|
//TraceTag((tagParserError, "Parser stopping with hr %x", hr));
|
|
_fLastError = hr;
|
|
_fStopped = true;
|
|
_fStarted = false;
|
|
HRESULT edr;
|
|
edr = _pFactory->NotifyEvent(this, XMLNF_ENDDOCUMENT);
|
|
if (edr != 0)
|
|
{
|
|
hr = edr; // allow factory to change error code (except to S_OK)
|
|
if (S_OK == _fLastError)
|
|
{
|
|
// Make sure the node factory always finds out about errors.
|
|
edr = _pFactory->Error(this, hr, 0, NULL);
|
|
if (edr != 0)
|
|
hr = edr;
|
|
}
|
|
_fLastError = hr;
|
|
}
|
|
}
|
|
}
|
|
Exit:
|
|
return hr;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT
|
|
XMLParser::popAttributes()
|
|
{
|
|
// Now I pop all the attributes that were pushed for this tag.
|
|
// I know we have at least one attribute.
|
|
|
|
while (_cAttributes > 0)
|
|
{
|
|
popAttribute(); // no match needed
|
|
}
|
|
Assert(_pStack.used() == _lCurrentElement+1);
|
|
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT STDMETHODCALLTYPE
|
|
XMLParser::GetParserState(void)
|
|
{
|
|
if (_fLastError != 0)
|
|
return static_cast<HRESULT>(XMLPARSER_ERROR);
|
|
|
|
if (_fStopped)
|
|
return static_cast<HRESULT>(XMLPARSER_STOPPED);
|
|
|
|
if (_fSuspended)
|
|
return static_cast<HRESULT>(XMLPARSER_SUSPENDED);
|
|
|
|
if (! _fStarted)
|
|
return static_cast<HRESULT>(XMLPARSER_IDLE);
|
|
|
|
if (_fWaiting)
|
|
return static_cast<HRESULT>(XMLPARSER_WAITING);
|
|
|
|
return static_cast<HRESULT>(XMLPARSER_BUSY);
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT STDMETHODCALLTYPE
|
|
XMLParser::Abort(
|
|
/* [in] */ BSTR bstrErrorInfo)
|
|
{
|
|
_fStopped = true;
|
|
_fSuspended = true; // force Run to terminate...
|
|
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT STDMETHODCALLTYPE
|
|
XMLParser::Suspend( void)
|
|
{
|
|
_fSuspended = true; // force Run to suspend
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT STDMETHODCALLTYPE
|
|
XMLParser::Reset( void)
|
|
{
|
|
init();
|
|
|
|
_pRoot = NULL;
|
|
_pFactory = NULL;
|
|
_pNode = NULL;
|
|
_bstrError = NULL;
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
ULONG STDMETHODCALLTYPE
|
|
XMLParser::GetLineNumber(void)
|
|
{
|
|
if (_pTokenizer) return _pTokenizer->GetLine();
|
|
else return 0;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
ULONG STDMETHODCALLTYPE
|
|
XMLParser::GetLinePosition( void)
|
|
{
|
|
if (_pTokenizer) return _pTokenizer->GetLinePosition();
|
|
else return 0;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
ULONG STDMETHODCALLTYPE
|
|
XMLParser::GetAbsolutePosition( void)
|
|
{
|
|
if (_pTokenizer) return _pTokenizer->GetInputPosition();
|
|
else return 0;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT STDMETHODCALLTYPE
|
|
XMLParser::GetLineBuffer(
|
|
/* [out] */ const WCHAR __RPC_FAR *__RPC_FAR *ppwcBuf,
|
|
/* [out] */ ULONG __RPC_FAR *pulLen,
|
|
/* [out] */ ULONG __RPC_FAR *pulStartPos)
|
|
{
|
|
if (pulLen == NULL || pulStartPos == NULL) return E_INVALIDARG;
|
|
|
|
//STACK_ENTRY;
|
|
|
|
if (_pTokenizer)
|
|
{
|
|
return _pTokenizer->GetLineBuffer(ppwcBuf, pulLen, pulStartPos);
|
|
}
|
|
*ppwcBuf = NULL;
|
|
*pulLen = 0;
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT STDMETHODCALLTYPE
|
|
XMLParser::GetLastError( void)
|
|
{
|
|
return _fLastError;
|
|
}
|
|
|
|
//------------ PRIVATE METHODS --------------------------------------------------
|
|
HRESULT
|
|
//XMLParser::PushTokenizer(
|
|
// URLStream* stream)
|
|
XMLParser::PushTokenizer()
|
|
{
|
|
_pTokenizer = NEW (XMLStream(this));
|
|
if (_pTokenizer == NULL)
|
|
return E_OUTOFMEMORY;
|
|
|
|
_pTokenizer->SetFlags(_usFlags);
|
|
// _fTokenizerChanged = true;
|
|
|
|
//HRESULT hr= PushDownload(stream, _pTokenizer);
|
|
HRESULT hr= PushDownload(_pTokenizer);
|
|
if (FAILED(hr))
|
|
{
|
|
delete _pTokenizer;
|
|
_pTokenizer = NULL;
|
|
return hr;
|
|
}
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT
|
|
//XMLParser::PushDownload(URLStream* stream, XMLStream* tokenizer)
|
|
XMLParser::PushDownload(XMLStream* tokenizer)
|
|
{
|
|
// NOTE: tokenizer can be null, in the case of a parameter entity download.
|
|
|
|
_pdc = _pDownloads.push();
|
|
if (_pdc == NULL)
|
|
{
|
|
return E_OUTOFMEMORY;
|
|
}
|
|
if (_pDownloads.used() > 1)
|
|
_fRootLevel = false;
|
|
|
|
_pdc->_pTokenizer = tokenizer;
|
|
_pdc->_fDTD = false;
|
|
_pdc->_fEntity = false;
|
|
_pdc->_fAsync = false;
|
|
_pdc->_fFoundNonWS = _fFoundNonWS;
|
|
_pdc->_fFoundRoot = _fFoundRoot;
|
|
_pdc->_fRootLevel = _fRootLevel;
|
|
_pdc->_fDepth = _pStack.used();
|
|
|
|
_fFoundNonWS = false;
|
|
_fFoundRoot = false;
|
|
|
|
_fRootLevel = (_pStack.used() == 0 && _pDownloads.used() == 1);
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
return hr;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT
|
|
XMLParser::PushStream(IStream* pStm, bool fpe)
|
|
{
|
|
EncodingStream* stream = (EncodingStream*)EncodingStream::newEncodingStream(pStm); // refcount = 1
|
|
if (stream == NULL)
|
|
return E_OUTOFMEMORY;
|
|
/*
|
|
if (_usFlags & XMLFLAG_RUNBUFFERONLY)
|
|
stream->setReadStream(false);
|
|
*/
|
|
_pdc->_pEncodingStream = stream;
|
|
stream->Release(); // Smart pointer is holding a ref
|
|
|
|
HRESULT hr = _pTokenizer->PushStream(stream, fpe);
|
|
if (hr == E_PENDING)
|
|
{
|
|
_fWaiting = true;
|
|
}
|
|
return hr;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT
|
|
XMLParser::PopDownload()
|
|
{
|
|
// NOTE: tokenizer can be null, in the case of a parameter entity download.
|
|
HRESULT hr = S_OK;
|
|
|
|
if (_pdc != NULL)
|
|
{
|
|
if (_pdc->_pTokenizer)
|
|
{
|
|
_pdc->_pTokenizer->Reset();
|
|
delete _pdc->_pTokenizer;
|
|
_pdc->_pTokenizer = NULL;
|
|
}
|
|
_pdc->_pEncodingStream = NULL;
|
|
|
|
// restore saved value of foundnonws.
|
|
_fFoundNonWS = _pdc->_fFoundNonWS;
|
|
_pdc = _pDownloads.pop();
|
|
}
|
|
if (_pdc != NULL)
|
|
{
|
|
if (_pdc->_pTokenizer != NULL)
|
|
{
|
|
_pTokenizer = _pdc->_pTokenizer;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_pTokenizer = NULL;
|
|
hr = S_FALSE;
|
|
}
|
|
|
|
if (_pStack.used() == 0 && _pDownloads.used() == 1)
|
|
_fRootLevel = true;
|
|
|
|
return hr;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT
|
|
XMLParser::GrowNodeInfo()
|
|
{
|
|
USHORT newsize = _cNodeInfoAllocated + STACK_INCREMENT;
|
|
MY_XML_NODE_INFO** pNewArray = NEW (PMY_XML_NODE_INFO[newsize]);
|
|
if (pNewArray == NULL)
|
|
return E_OUTOFMEMORY;
|
|
// Now since STACK_INCREMENT is the same for _pStack then _pStack
|
|
// has also re-allocated. Therefore we need to re-initialize all
|
|
// the pointers in this array - since they point into the _pStack's memory.
|
|
for (int i = _pStack.used() - 1; i >= 0; i--)
|
|
{
|
|
pNewArray[i] = _pStack[i];
|
|
}
|
|
delete[] _paNodeInfo;
|
|
_paNodeInfo = pNewArray;
|
|
_cNodeInfoAllocated = newsize;
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT
|
|
XMLParser::GrowBuffer(PMY_XML_NODE_INFO pNodeInfo, long newlen)
|
|
{
|
|
delete [] pNodeInfo->_pwcTagName;
|
|
pNodeInfo->_pwcTagName = NULL;
|
|
// add 50 characters to avoid too many reallocations.
|
|
pNodeInfo->_pwcTagName = NEW (WCHAR[ newlen ]);
|
|
if (pNodeInfo->_pwcTagName == NULL)
|
|
return E_OUTOFMEMORY;
|
|
pNodeInfo->_ulBufLen = newlen;
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT
|
|
XMLParser::push(XML_NODE_INFO& info)
|
|
{
|
|
HRESULT hr;
|
|
_lCurrentElement = _pStack.used();
|
|
|
|
_pCurrent = _pStack.push();
|
|
if (_pCurrent == NULL)
|
|
return E_OUTOFMEMORY;
|
|
|
|
*((XML_NODE_INFO*)_pCurrent) = info;
|
|
PUSHNODEINFO(_pCurrent);
|
|
|
|
_fRootLevel = false;
|
|
|
|
// Save the tag name into the private buffer so it sticks around until the
|
|
// close tag </foo> which could be anywhere down the road after the
|
|
// BufferedStream been overwritten
|
|
|
|
// THIS CODE IS OPTIMIZED FOR PERFORMANCE WHICH IS WHY IT IS NOT
|
|
// CALLING THE CopyText METHOD.
|
|
|
|
|
|
if (_pCurrent->_ulBufLen < info.ulLen+1)
|
|
{
|
|
checkhr2(GrowBuffer(_pCurrent, info.ulLen + 50));
|
|
}
|
|
Assert(info.ulLen >= 0);
|
|
::memcpy(_pCurrent->_pwcTagName, info.pwcText, info.ulLen*sizeof(WCHAR));
|
|
_pCurrent->_pwcTagName[info.ulLen] = L'\0';
|
|
|
|
// And make the XML_NODE_INFO point to private buffer.
|
|
_pCurrent->pwcText = _pCurrent->_pwcTagName;
|
|
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT
|
|
XMLParser::pushAttribute(XML_NODE_INFO& info)
|
|
{
|
|
HRESULT hr;
|
|
if (_cAttributes != 0)
|
|
{
|
|
// Attributes are special in that they are supposed to be unique.
|
|
// So here we actually check this.
|
|
for (long i = _pStack.used()-1; i > _lCurrentElement; i--)
|
|
{
|
|
XML_NODE_INFO* ptr = _pStack[i];
|
|
|
|
if (ptr->dwType != XML_ATTRIBUTE)
|
|
continue; // ignore attribute values.
|
|
|
|
if (ptr->ulLen != info.ulLen)
|
|
{
|
|
continue; // we're ok with this one
|
|
}
|
|
|
|
// Optimized for the normal case where there is no match
|
|
if (::memcmp(ptr->pwcText, info.pwcText, info.ulLen*sizeof(WCHAR)) == 0)
|
|
{
|
|
if (! _fCaseInsensitive)
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XML Parser found a duplicate attribute\n");
|
|
|
|
return XML_E_DUPLICATEATTRIBUTE;
|
|
}
|
|
//else if (StrCmpNI(ptr->pwcText, info.pwcText, info.ulLen) == 0)
|
|
else if (::FusionpCompareStrings(ptr->pwcText, ::wcslen(ptr->pwcText), info.pwcText, info.ulLen, true) == 0)
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XML Parser found a duplicate attribute (#2)\n");
|
|
|
|
// Duplicate attributes are allowed in IE4 mode!!
|
|
// But only the latest one shows up
|
|
// So we have to delete the previous duplication
|
|
return XML_E_DUPLICATEATTRIBUTE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
_cAttributes++;
|
|
|
|
_pCurrent = _pStack.push();
|
|
if (_pCurrent == NULL)
|
|
return E_OUTOFMEMORY;
|
|
|
|
*((XML_NODE_INFO*)_pCurrent) = info;
|
|
PUSHNODEINFO(_pCurrent);
|
|
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT
|
|
XMLParser::pushAttributeValue(XML_NODE_INFO& info)
|
|
{
|
|
HRESULT hr;
|
|
// Attributes are saved in the BufferedStream so we can point to the
|
|
// real text in the buffered stream instead of copying it !!
|
|
|
|
_pCurrent = _pStack.push();
|
|
if (_pCurrent == NULL)
|
|
return E_OUTOFMEMORY;
|
|
|
|
// store attribute value quote character in the pReserved field.
|
|
info.pReserved = (PVOID)_pTokenizer->getAttrValueQuoteChar();
|
|
|
|
*((XML_NODE_INFO*)_pCurrent) = info;
|
|
PUSHNODEINFO(_pCurrent);
|
|
|
|
// this is really the count of nodes on the stack, not just attributes.
|
|
_cAttributes++;
|
|
return S_OK;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT
|
|
XMLParser::pop(const WCHAR* tag, ULONG len)
|
|
{
|
|
HRESULT hr = S_OK;
|
|
|
|
if (_pCurrent == NULL || _pStack.used() == 0)
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XML Parser found an unexpected end tag.\n");
|
|
|
|
hr = XML_E_UNEXPECTEDENDTAG;
|
|
goto Cleanup;
|
|
}
|
|
if (len != 0)
|
|
{
|
|
if (_pCurrent->ulLen != len)
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XML Parser found an end tag mismatch\n");
|
|
|
|
hr = XML_E_ENDTAGMISMATCH;
|
|
}
|
|
// Optimized for the normal case where there is no match
|
|
else if (::memcmp(_pCurrent->pwcText, tag, len*sizeof(WCHAR)) != 0)
|
|
{
|
|
if (! _fCaseInsensitive)
|
|
{
|
|
::FusionpDbgPrintEx(
|
|
FUSION_DBG_LEVEL_ERROR,
|
|
"SXS.DLL: XML Parser found an end tag mismatch.\n");
|
|
|
|
hr = XML_E_ENDTAGMISMATCH;
|
|
}
|
|
else if (::FusionpCompareStrings(_pCurrent->pwcText, len, tag, len, true) != 0)
|
|
{
|
|
hr = XML_E_ENDTAGMISMATCH;
|
|
}
|
|
}
|
|
if (FAILED(hr))
|
|
{
|
|
goto Cleanup;
|
|
}
|
|
}
|
|
|
|
// We don't delete the fTagName because we're going to reuse this field
|
|
// later to avoid lots of memory allocations.
|
|
|
|
_pCurrent = _pStack.pop();
|
|
_cNodeInfoCurrent--;
|
|
|
|
if (_pCurrent == 0)
|
|
{
|
|
_pNode = _pRoot;
|
|
if (_pDownloads.used() == 1)
|
|
_fRootLevel = true;
|
|
}
|
|
else
|
|
{
|
|
_pNode = _pCurrent->pNode;
|
|
}
|
|
|
|
Cleanup:
|
|
return hr;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT XMLParser::pop()
|
|
{
|
|
// We don't delete the fTagName because we're going to reuse this field
|
|
// later to avoid lots of memory allocations.
|
|
|
|
_pCurrent = _pStack.pop();
|
|
_cNodeInfoCurrent--;
|
|
|
|
if (_pCurrent == 0)
|
|
{
|
|
_pNode = _pRoot;
|
|
if (_pDownloads.used() == 1)
|
|
_fRootLevel = true;
|
|
}
|
|
else
|
|
{
|
|
_pNode = _pCurrent->pNode;
|
|
}
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
void XMLParser::popAttribute()
|
|
{
|
|
Assert(_pStack.used() > 0);
|
|
|
|
_pCurrent = _pStack.pop();
|
|
_cNodeInfoCurrent--;
|
|
|
|
Assert(_pCurrent != 0);
|
|
|
|
_cAttributes--;
|
|
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT
|
|
XMLParser::CopyText(PMY_XML_NODE_INFO pNodeInfo)
|
|
{
|
|
HRESULT hr = S_OK;
|
|
if (pNodeInfo->_pwcTagName != pNodeInfo->pwcText)
|
|
{
|
|
ULONG len = pNodeInfo->ulLen;
|
|
|
|
// Copy the current text into the buffer.
|
|
if (pNodeInfo->_ulBufLen < len+1)
|
|
{
|
|
checkhr2(GrowBuffer(pNodeInfo, len + 50));
|
|
}
|
|
if (len > 0)
|
|
{
|
|
::memcpy(pNodeInfo->_pwcTagName, pNodeInfo->pwcText, len*sizeof(WCHAR));
|
|
}
|
|
pNodeInfo->_pwcTagName[len] = L'\0';
|
|
|
|
// And make the XML_NODE_INFO point to private buffer.
|
|
pNodeInfo->pwcText = pNodeInfo->_pwcTagName;
|
|
}
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT
|
|
XMLParser::CopyContext()
|
|
{
|
|
// For performance reasons we try not to copy the data for attributes
|
|
// and their values when we push them on the stack. We can do this
|
|
// because the tokenizer tries to freeze the internal buffers while
|
|
// parsing attributes and thereby guarentee that the pointers stay
|
|
// good. But occasionally the BufferedStream has to reallocate when
|
|
// the attributes are right at the end of the buffer.
|
|
|
|
long last = _pStack.used();
|
|
for (long i = _cAttributes; i > 0 ; i--)
|
|
{
|
|
long index = last - i;
|
|
MY_XML_NODE_INFO* ptr = _pStack[index];
|
|
CopyText(ptr);
|
|
}
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT XMLParser::ReportUnclosedTags(int start)
|
|
{
|
|
HRESULT hr = XML_E_UNCLOSEDTAG;
|
|
// Build a string containing the list of unclosed tags and format an error
|
|
// message containing this text.
|
|
int tags = _pStack.used();
|
|
|
|
WCHAR* buffer = NULL;
|
|
WCHAR* msgbuf = NULL;
|
|
unsigned long size = 0;
|
|
unsigned long used = 0;
|
|
|
|
for (long i = start; i < tags; i++)
|
|
{
|
|
XML_NODE_INFO* ptr = _pStack[i];
|
|
if (ptr->dwType == XML_ATTRIBUTE)
|
|
break;
|
|
|
|
if (used + ptr->ulLen + 3 > size) // +3 for '<','>' and '\0'
|
|
{
|
|
long newsize = used + ptr->ulLen + 500;
|
|
WCHAR* newbuf = NEW (WCHAR[newsize]);
|
|
if (newbuf == NULL)
|
|
{
|
|
goto nomem;
|
|
}
|
|
if (buffer != NULL)
|
|
{
|
|
//
|
|
// Used appears to be the number of -characters- that are
|
|
// being allocated, not the number of -bytes-. So, we need
|
|
// to multiply it by the size of a character.
|
|
//
|
|
::memcpy(newbuf, buffer, used * sizeof(WCHAR));
|
|
delete[] buffer;
|
|
}
|
|
|
|
size = newsize;
|
|
buffer = newbuf;
|
|
}
|
|
if (i > start)
|
|
{
|
|
buffer[used++] = ',';
|
|
buffer[used++] = ' ';
|
|
}
|
|
::memcpy(&buffer[used], ptr->pwcText, sizeof(WCHAR) * ptr->ulLen);
|
|
used += ptr->ulLen;
|
|
buffer[used] = '\0';
|
|
}
|
|
goto cleanup;
|
|
|
|
nomem:
|
|
hr = E_OUTOFMEMORY;
|
|
|
|
cleanup:
|
|
|
|
delete [] buffer;
|
|
delete [] msgbuf;
|
|
|
|
return hr;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT XMLParser::init()
|
|
{
|
|
_fLastError = 0;
|
|
_fStopped = false;
|
|
_fSuspended = false;
|
|
_pNode = _pRoot;
|
|
_fStarted = false;
|
|
_fStopped = false;
|
|
_fWaiting = false;
|
|
_fFoundRoot = false;
|
|
_fFoundNonWS = false;
|
|
_pTokenizer = NULL;
|
|
_fGotVersion = false;
|
|
_fRootLevel = true;
|
|
_cAttributes = 0;
|
|
|
|
|
|
_fPendingBeginChildren = false;
|
|
_fPendingEndChildren = false;
|
|
|
|
while (_pCurrent != NULL)
|
|
{
|
|
_pCurrent = _pStack.pop();
|
|
}
|
|
|
|
_cNodeInfoCurrent = 0;
|
|
_lCurrentElement = 0;
|
|
|
|
// cleanup downloads
|
|
while (_pdc != NULL)
|
|
{
|
|
PopDownload();
|
|
}
|
|
|
|
_pCurrent = NULL;
|
|
return S_OK;
|
|
}
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
HRESULT
|
|
XMLParser::ErrorCallback(HRESULT hr)
|
|
{
|
|
Assert(hr == XMLStream::XML_DATAAVAILABLE ||
|
|
hr == XMLStream::XML_DATAREALLOCATE);
|
|
|
|
if (hr == static_cast<HRESULT>(XMLStream::XML_DATAREALLOCATE))
|
|
{
|
|
// This is more serious. We have to actually save away the
|
|
// context because the buffers are about to be reallocated.
|
|
checkhr2(CopyContext());
|
|
}
|
|
checkhr2(_pFactory->NotifyEvent(this, XMLNF_DATAAVAILABLE));
|
|
return hr;
|
|
}
|