Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1294 lines
38 KiB

  1. /////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // fusion\xmlparser\xmlparser.cxx
  4. // just commend "SysFreeString" and SysAllocString()
  5. //
  6. /////////////////////////////////////////////////////////////////////////////////
  7. #include "stdinc.h"
  8. #include "core.hxx"
  9. #include "xmlhelper.hxx"
  10. #pragma hdrstop
  11. #include "xmlparser.hxx"
  12. #include "xmlstream.hxx"
  13. #include <objbase.h>
  14. #include "fusioninitializecriticalsection.h"
  15. const USHORT STACK_INCREMENT=10;
  16. #define PUSHNODEINFO(pNodeInfo)\
  17. if (_cNodeInfoAllocated == _cNodeInfoCurrent)\
  18. {\
  19. checkhr2(GrowNodeInfo());\
  20. }\
  21. _paNodeInfo[_cNodeInfoCurrent++] = _pCurrent;
  22. /////////////////////////////////////////////////////////////////////////////
  23. XMLParser::XMLParser()
  24. : _pDownloads(1),
  25. _pStack(STACK_INCREMENT),
  26. _pTokenizer(NULL),
  27. _pCurrent(NULL),
  28. _lCurrentElement(0),
  29. _paNodeInfo(NULL),
  30. _cNodeInfoAllocated(0),
  31. _cNodeInfoCurrent(0),
  32. _pdc(NULL),
  33. _usFlags(NULL),
  34. _fCaseInsensitive(false),
  35. _bstrError(NULL),
  36. _fRunEntryCount(0),
  37. _fInsideRun(false),
  38. _cAttributes(0),
  39. _pRoot(NULL),
  40. _fLastError(S_OK),
  41. _fStopped(false),
  42. _fSuspended(false),
  43. _fStarted(false),
  44. _fWaiting(false),
  45. _dwSafetyOptions(0)
  46. {
  47. }
  48. /////////////////////////////////////////////////////////////////////////////
  49. HRESULT
  50. XMLParser::HrInitialize()
  51. {
  52. HRESULT hr = NOERROR;
  53. return hr;
  54. }
  55. /////////////////////////////////////////////////////////////////////////////
  56. XMLParser::~XMLParser()
  57. {
  58. Reset();
  59. // Cleanup tagname buffers in context for good this time...
  60. for (long i = _pStack.size()-1; i>=0; i--)
  61. {
  62. MY_XML_NODE_INFO* pNodeInfo = _pStack[i];
  63. if (pNodeInfo->_pwcTagName != NULL)
  64. {
  65. delete [] pNodeInfo->_pwcTagName;
  66. pNodeInfo->_pwcTagName = NULL;
  67. pNodeInfo->_ulBufLen = 0;
  68. }
  69. // NULL out the node pointer in case it point's to a GC'd object :-)
  70. pNodeInfo->pNode = NULL;
  71. }
  72. delete[] _paNodeInfo;
  73. }
  74. /////////////////////////////////////////////////////////////////////////////
  75. HRESULT STDMETHODCALLTYPE
  76. XMLParser::QueryInterface(REFIID riid, void ** ppvObject)
  77. {
  78. // Since this one class implements both IXMLNodeSource and
  79. // IXMLParser, we must override QueryInterface since the
  80. // IUnknown template doesn't know about the IXMLNodeSource
  81. // interface.
  82. if (ppvObject == NULL)
  83. return E_INVALIDARG;
  84. HRESULT hr = S_OK;
  85. if (riid == IID_IXMLNodeSource || riid == IID_Parser)
  86. {
  87. *ppvObject = static_cast<IXMLNodeSource*>(this);
  88. AddRef();
  89. }
  90. else
  91. {
  92. hr = _unknown<IXMLParser, &IID_IXMLParser>::QueryInterface(riid, ppvObject);
  93. }
  94. return hr;
  95. }
  96. /////////////////////////////////////////////////////////////////////////////
  97. ULONG STDMETHODCALLTYPE
  98. XMLParser::AddRef(void)
  99. {
  100. return _unknown<IXMLParser, &IID_IXMLParser>::AddRef();
  101. }
  102. /////////////////////////////////////////////////////////////////////////////
  103. ULONG STDMETHODCALLTYPE
  104. XMLParser::Release(void)
  105. {
  106. return _unknown<IXMLParser, &IID_IXMLParser>::Release();
  107. }
  108. /////////////////////////////////////////////////////////////////////////////
  109. HRESULT STDMETHODCALLTYPE
  110. XMLParser::SetInput(IUnknown *pStm)
  111. {
  112. if (pStm == NULL)
  113. return E_INVALIDARG;
  114. if (_pDownloads.used() == 0)
  115. init();
  116. HRESULT hr = S_OK;
  117. checkhr2(PushTokenizer());
  118. // Get the url path
  119. // Continue even if we cannot get it
  120. IStream * pStream = NULL;
  121. hr = pStm->QueryInterface(IID_IStream, (void**)&pStream);
  122. if (SUCCEEDED(hr))
  123. {
  124. hr = PushStream(pStream, false);
  125. pStream->Release();
  126. }
  127. return hr;
  128. }
  129. /////////////////////////////////////////////////////////////////////////////
  130. HRESULT STDMETHODCALLTYPE
  131. XMLParser::PushData(
  132. /* [in] */ const char __RPC_FAR *pData,
  133. /* [in] */ ULONG ulChars,
  134. /* [in] */ BOOL fLastBuffer)
  135. {
  136. return E_NOTIMPL;
  137. }
  138. /////////////////////////////////////////////////////////////////////////////
  139. HRESULT STDMETHODCALLTYPE
  140. XMLParser::SetFactory(IXMLNodeFactory __RPC_FAR *pNodeFactory)
  141. {
  142. //STACK_ENTRY;
  143. _pFactory = pNodeFactory;
  144. return S_OK;
  145. }
  146. /////////////////////////////////////////////////////////////////////////////
  147. HRESULT STDMETHODCALLTYPE
  148. XMLParser::GetFactory(IXMLNodeFactory** ppNodeFactory)
  149. {
  150. if (ppNodeFactory == NULL) return E_INVALIDARG;
  151. if (_pFactory)
  152. {
  153. *ppNodeFactory = _pFactory;
  154. (*ppNodeFactory)->AddRef();
  155. }
  156. else
  157. {
  158. *ppNodeFactory = NULL;
  159. }
  160. return S_OK;
  161. }
  162. /////////////////////////////////////////////////////////////////////////////
  163. HRESULT STDMETHODCALLTYPE
  164. XMLParser::Run(long lChars)
  165. {
  166. HRESULT hr = NOERROR;
  167. FN_TRACE_HR(hr);
  168. XML_NODE_INFO info;
  169. XML_NODE_INFO* aNodeInfo[1];
  170. USHORT numRecs;
  171. bool fIsAttribute = false;
  172. bool stop = false;
  173. if (_fSuspended)
  174. _fSuspended = FALSE; // caller must want to resume.
  175. if (_pFactory == NULL)
  176. {
  177. ::FusionpDbgPrintEx(
  178. FUSION_DBG_LEVEL_ERROR,
  179. "SXS.DLL: XMLParser::Run() failing because _pFactory is NULL\n");
  180. hr = E_FAIL;
  181. goto Exit;
  182. }
  183. if (_fStopped)
  184. {
  185. ::FusionpDbgPrintEx(
  186. FUSION_DBG_LEVEL_ERROR,
  187. "SXS.DLL: XMLParser::Run() failing because _fStopped is set\n");
  188. hr = XML_E_STOPPED;
  189. goto Exit;
  190. }
  191. if (_pTokenizer == NULL)
  192. {
  193. if (_fLastError != S_OK)
  194. {
  195. ::FusionpDbgPrintEx(
  196. FUSION_DBG_LEVEL_ERROR,
  197. "SXS.DLL: XMLParser::Run() failing because _pTokenizer == NULL and _fLastError != S_OK (== 0x%08lx)\n", _fLastError);
  198. hr = _fLastError;
  199. goto Exit;
  200. }
  201. else
  202. {
  203. ::FusionpDbgPrintEx(
  204. FUSION_DBG_LEVEL_ERROR,
  205. "SXS.DLL: XMLParser::Run() failing because _pTokenizer == NULL and _fLastError == S_OK\n");
  206. // must be _fStarted == false
  207. hr = XMLPARSER_IDLE;
  208. goto Exit;
  209. }
  210. }
  211. // Check for recurrsive entry and whether caller actually
  212. // wants anything parsed.
  213. if (_fInsideRun || lChars == 0)
  214. {
  215. ::FusionpDbgPrintEx(
  216. FUSION_DBG_LEVEL_ERROR,
  217. "SXS.DLL: XMLParser::Run() failing because _fInsideRun is set or lChars == 0\n");
  218. hr = E_PENDING;
  219. goto Exit;
  220. }
  221. {
  222. BoolLock flock(&_fInsideRun);
  223. if (_fLastError != 0)
  224. {
  225. // one more chance to cleanup the parser stack.
  226. hr = _fLastError;
  227. goto cleanup_stack;
  228. }
  229. if (! _fStarted)
  230. {
  231. _fStarted = true;
  232. hr = _pFactory->NotifyEvent(this, XMLNF_STARTDOCUMENT);
  233. if (_fStopped){ // watch for onReadyStateChange handlers
  234. hr = S_OK; // fussing with the parser state.
  235. goto Exit;
  236. }
  237. }
  238. _fWaiting = false;
  239. if (_fPendingBeginChildren)
  240. {
  241. _fPendingBeginChildren = false;
  242. hr = _pFactory->BeginChildren(this, (XML_NODE_INFO*)_pCurrent);
  243. }
  244. if (_fPendingEndChildren)
  245. {
  246. _fPendingEndChildren = false;
  247. hr = _pFactory->EndChildren(this, TRUE, (XML_NODE_INFO*)_pCurrent);
  248. if (FAILED(hr))
  249. hr = pop(); // no match needed
  250. }
  251. info.dwSize = sizeof(XML_NODE_INFO);
  252. info.dwType = XMLStream::XML_PENDING;
  253. info.dwSubType = 0;
  254. info.pwcText = NULL;
  255. info.ulLen = 0;
  256. info.ulNsPrefixLen = 0;
  257. info.pNode = NULL;
  258. info.pReserved = NULL;
  259. aNodeInfo[0] = &info;
  260. more:
  261. _fRunEntryCount++; // count of callers inside this loop...
  262. while (hr == 0 && ! _fSuspended)
  263. {
  264. info.dwSubType = 0;
  265. // The XMLStream error codes have been aligned with the
  266. // XMLParser error code so no mapping is necessary.
  267. hr = _pTokenizer->GetNextToken(&info.dwType, (const WCHAR **)&info.pwcText, (long*)&info.ulLen, (long*)&info.ulNsPrefixLen);
  268. if (hr == E_PENDING)
  269. {
  270. _fWaiting = true;
  271. break;
  272. }
  273. if (! _fFoundNonWS &&
  274. info.dwType != XMLStream::XML_PENDING &&
  275. info.dwType != XML_WHITESPACE &&
  276. info.dwType != XML_XMLDECL)
  277. {
  278. _fFoundNonWS = true;
  279. }
  280. // Now the NodeType is the same as the XMLToken value. We set
  281. // this up by aligning the two enums.
  282. switch (info.dwType)
  283. {
  284. case 0:
  285. break;
  286. // --------- Container Nodes -------------------
  287. case XML_XMLDECL:
  288. if (_fFoundNonWS)
  289. {
  290. hr = XML_E_BADXMLDECL;
  291. break;
  292. }
  293. goto containers;
  294. case XML_ATTRIBUTE:
  295. fIsAttribute = true;
  296. goto containers;
  297. case XML_VERSION:
  298. info.dwSubType = info.dwType;
  299. info.dwType = XML_ATTRIBUTE;
  300. _fGotVersion = true;
  301. fIsAttribute = true;
  302. goto containers;
  303. case XML_STANDALONE:
  304. case XML_ENCODING:
  305. if (! _fGotVersion && _pDownloads.used() == 1)
  306. {
  307. hr = XML_E_EXPECTING_VERSION;
  308. break;
  309. }
  310. if (info.dwType == XML_STANDALONE)
  311. {
  312. if (_pDownloads.used() > 1)
  313. {
  314. hr = XML_E_UNEXPECTED_STANDALONE;
  315. break;
  316. }
  317. }
  318. info.dwSubType = info.dwType;
  319. info.dwType = XML_ATTRIBUTE;
  320. fIsAttribute = true;
  321. goto containers;
  322. // fall through
  323. case XML_ELEMENT:
  324. containers:
  325. if (_fRootLevel)
  326. {
  327. // Special rules apply for root level tags.
  328. if (info.dwType == XML_ELEMENT)
  329. {
  330. // This is a root level element.
  331. if (! _fFoundRoot)
  332. {
  333. _fFoundRoot = true;
  334. }
  335. else
  336. {
  337. ::FusionpDbgPrintEx(
  338. FUSION_DBG_LEVEL_ERROR,
  339. "SXS.DLL: XML Parser has found multiple roots in the document which is an error.\n");
  340. hr = XML_E_MULTIPLEROOTS;
  341. break;
  342. }
  343. }
  344. else if (info.dwType != XML_PI &&
  345. info.dwType != XML_XMLDECL &&
  346. info.dwType != XML_DOCTYPE)
  347. {
  348. ::FusionpDbgPrintEx(
  349. FUSION_DBG_LEVEL_ERROR,
  350. "SXS.DLL: XML Parser has found an initial element which is not valid at the root level.\n");
  351. hr = XML_E_INVALIDATROOTLEVEL;
  352. break;
  353. }
  354. }
  355. info.fTerminal = FALSE;
  356. if (fIsAttribute)
  357. {
  358. breakhr( pushAttribute(info));
  359. fIsAttribute = false;
  360. }
  361. else
  362. {
  363. breakhr( push(info));
  364. }
  365. break;
  366. case XML_PCDATA:
  367. case XML_CDATA:
  368. terminals:
  369. // Special rules apply for root level tags.
  370. if (_fRootLevel)
  371. {
  372. ::FusionpDbgPrintEx(
  373. FUSION_DBG_LEVEL_ERROR,
  374. "SXS.DLL: XML Parser has found PCDATA at the root level which is not valid XML.\n");
  375. hr = XML_E_INVALIDATROOTLEVEL;
  376. break;
  377. }
  378. // fall through
  379. case XML_COMMENT:
  380. case XML_WHITESPACE:
  381. tcreatenode:
  382. info.fTerminal = TRUE;
  383. if (_cAttributes != 0)
  384. {
  385. // We are inside the attribute list, so we need to push this.
  386. hr = pushAttributeValue(info);
  387. break;
  388. }
  389. hr = _pFactory->CreateNode(this, _pNode, 1, aNodeInfo);
  390. info.pNode = NULL;
  391. break;
  392. case XML_ENTITYREF:
  393. if (_fRootLevel)
  394. {
  395. hr = XML_E_INVALIDATROOTLEVEL;
  396. break;
  397. }
  398. // We handle builtin entities and char entities in xmlstream
  399. // so these must be user defined entity, so treat it like a regular terminal node.
  400. goto terminals;
  401. break;
  402. case XMLStream::XML_BUILTINENTITYREF:
  403. case XMLStream::XML_HEXENTITYREF:
  404. case XMLStream::XML_NUMENTITYREF:
  405. // pass real entityref type as subtype so we can publish these
  406. // subtypes eventually.
  407. info.dwSubType = info.dwType; // XML_ENTITYREF;
  408. info.dwType = XML_PCDATA;
  409. if (_cAttributes == 0)
  410. {
  411. goto tcreatenode;
  412. }
  413. // We are inside the attribute list, so we need to push this.
  414. info.fTerminal = TRUE;
  415. hr = pushAttributeValue(info);
  416. if (SUCCEEDED(hr))
  417. {
  418. hr = CopyText(_pCurrent);
  419. }
  420. break;
  421. case XMLStream::XML_TAGEND: // ">"
  422. numRecs = 1+_cAttributes;
  423. if (_cAttributes != 0) // this is safe because _rawstack does NOT reclaim
  424. { // the popped stack entries.
  425. popAttributes();
  426. }
  427. hr = _pFactory->CreateNode(this, _pNode, numRecs, (XML_NODE_INFO **)&_paNodeInfo[_lCurrentElement]);
  428. _pNode = _pCurrent->pNode;
  429. if (FAILED(hr))
  430. {
  431. _fPendingBeginChildren = true;
  432. break;
  433. }
  434. breakhr( _pFactory->BeginChildren(this, (XML_NODE_INFO*)_pCurrent));
  435. break;
  436. // The ENDXMLDECL is like EMPTYENDTAGs since we've been
  437. // buffering up their attributes, and we have still got to call CreateNode.
  438. case XMLStream::XML_ENDXMLDECL:
  439. _fGotVersion = false; // reset back to initial state.
  440. // fall through.
  441. case XMLStream::XML_EMPTYTAGEND:
  442. numRecs = 1+_cAttributes;
  443. if (_cAttributes != 0)
  444. {
  445. popAttributes();
  446. }
  447. hr = _pFactory->CreateNode(this, _pNode, numRecs, (XML_NODE_INFO **)&_paNodeInfo[_lCurrentElement]);
  448. if (FAILED(hr))
  449. {
  450. _fPendingEndChildren = true;
  451. break;
  452. }
  453. breakhr(_pFactory->EndChildren(this, TRUE, (XML_NODE_INFO*)_pCurrent));
  454. breakhr(pop()); // no match needed
  455. break;
  456. case XMLStream::XML_ENDTAG: // "</"
  457. if (_pStack.used() == 0)
  458. {
  459. ::FusionpDbgPrintEx(
  460. FUSION_DBG_LEVEL_ERROR,
  461. "SXS.DLL: XML Parser has found an unexpected end tag.\n");
  462. hr = XML_E_UNEXPECTEDENDTAG;
  463. }
  464. else
  465. {
  466. XML_NODE_INFO* pCurrent = (XML_NODE_INFO*)_pCurrent; // save current record
  467. breakhr(pop(info.pwcText, info.ulLen)); // check tag/match
  468. breakhr(_pFactory->EndChildren(this, FALSE, (XML_NODE_INFO*)pCurrent));
  469. }
  470. break;
  471. case XMLStream::XML_ENDPROLOG:
  472. // For top level document only, (not for DTD's or
  473. // entities), call EndProlog on the node factory.
  474. if (_fRootLevel && ! _pdc->_fEntity && ! _pdc->_fDTD)
  475. breakhr( _pFactory->NotifyEvent(this, XMLNF_ENDPROLOG));
  476. break;
  477. default:
  478. hr = E_FAIL;
  479. break; // break from switch()
  480. }
  481. }
  482. _fRunEntryCount--;
  483. stop = false;
  484. if (hr == static_cast<HRESULT>(XML_E_ENDOFINPUT))
  485. {
  486. hr = S_OK;
  487. bool inDTD = _pdc->_fDTD;
  488. bool inEntity = _pdc->_fEntity;
  489. bool inPEReference = _pdc->_fPEReference;
  490. if (inEntity && _pdc->_fDepth != _pStack.used())
  491. {
  492. ::FusionpDbgPrintEx(
  493. FUSION_DBG_LEVEL_ERROR,
  494. "SXS.DLL: XML Parser found unclosed tags at the end of the input stream.\n");
  495. // Entity itself was unbalanced.
  496. hr = ReportUnclosedTags(_pdc->_fDepth);
  497. }
  498. else if (PopDownload() == S_OK)
  499. {
  500. // then we must have just finished a DTD and we still have more to do
  501. // BUGBUG -- need to check that entity is well formed, i.e. no tags
  502. // left open.
  503. if (!inPEReference)
  504. {
  505. if (inEntity)
  506. {
  507. hr = _pFactory->NotifyEvent(this, XMLNF_ENDENTITY);
  508. }
  509. else if (inDTD)
  510. {
  511. hr = _pFactory->NotifyEvent(this, XMLNF_ENDDTD);
  512. }
  513. }
  514. if (FAILED(hr))
  515. {
  516. goto cleanup_stack;
  517. }
  518. // In a synchronous DTD download, there is another parser
  519. // parser Run() call on the stack above us, so let's return
  520. // back to that Run method so we don't complete the parsing
  521. // out from under it.
  522. if (_fRunEntryCount > 0){
  523. hr = S_OK;
  524. goto Exit;
  525. }
  526. if (_fStopped){
  527. hr = S_OK;
  528. goto Exit;
  529. }
  530. goto more;
  531. }
  532. else
  533. {
  534. if (_pStack.used() > 0)
  535. {
  536. hr = ReportUnclosedTags(0);
  537. }
  538. else if (! _fFoundRoot)
  539. {
  540. ::FusionpDbgPrintEx(
  541. FUSION_DBG_LEVEL_ERROR,
  542. "SXS.DLL: XML Parser has found no root in the document.\n");
  543. hr = XML_E_MISSINGROOT;
  544. }
  545. stop = true;
  546. }
  547. }
  548. cleanup_stack:
  549. if (hr != S_OK && hr != E_PENDING)
  550. {
  551. stop = true;
  552. _fLastError = hr;
  553. // Pass all the XML_NODE_INFO structs to the Error function so the client
  554. // gets a chance to cleanup the PVOID pNode fields.
  555. HRESULT edr = _pFactory->Error(this, hr,
  556. (USHORT)(_paNodeInfo ? _lCurrentElement+1 : 0), (XML_NODE_INFO**)_paNodeInfo);
  557. if (edr != 0)
  558. _fLastError = hr;
  559. }
  560. if (stop && ! _fStopped)
  561. {
  562. //TraceTag((tagParserError, "Parser stopping with hr %x", hr));
  563. _fLastError = hr;
  564. _fStopped = true;
  565. _fStarted = false;
  566. HRESULT edr;
  567. edr = _pFactory->NotifyEvent(this, XMLNF_ENDDOCUMENT);
  568. if (edr != 0)
  569. {
  570. hr = edr; // allow factory to change error code (except to S_OK)
  571. if (S_OK == _fLastError)
  572. {
  573. // Make sure the node factory always finds out about errors.
  574. edr = _pFactory->Error(this, hr, 0, NULL);
  575. if (edr != 0)
  576. hr = edr;
  577. }
  578. _fLastError = hr;
  579. }
  580. }
  581. }
  582. Exit:
  583. return hr;
  584. }
  585. /////////////////////////////////////////////////////////////////////////////
  586. HRESULT
  587. XMLParser::popAttributes()
  588. {
  589. // Now I pop all the attributes that were pushed for this tag.
  590. // I know we have at least one attribute.
  591. while (_cAttributes > 0)
  592. {
  593. popAttribute(); // no match needed
  594. }
  595. Assert(_pStack.used() == _lCurrentElement+1);
  596. return S_OK;
  597. }
  598. /////////////////////////////////////////////////////////////////////////////
  599. HRESULT STDMETHODCALLTYPE
  600. XMLParser::GetParserState(void)
  601. {
  602. if (_fLastError != 0)
  603. return static_cast<HRESULT>(XMLPARSER_ERROR);
  604. if (_fStopped)
  605. return static_cast<HRESULT>(XMLPARSER_STOPPED);
  606. if (_fSuspended)
  607. return static_cast<HRESULT>(XMLPARSER_SUSPENDED);
  608. if (! _fStarted)
  609. return static_cast<HRESULT>(XMLPARSER_IDLE);
  610. if (_fWaiting)
  611. return static_cast<HRESULT>(XMLPARSER_WAITING);
  612. return static_cast<HRESULT>(XMLPARSER_BUSY);
  613. }
  614. /////////////////////////////////////////////////////////////////////////////
  615. HRESULT STDMETHODCALLTYPE
  616. XMLParser::Abort(
  617. /* [in] */ BSTR bstrErrorInfo)
  618. {
  619. _fStopped = true;
  620. _fSuspended = true; // force Run to terminate...
  621. return S_OK;
  622. }
  623. /////////////////////////////////////////////////////////////////////////////
  624. HRESULT STDMETHODCALLTYPE
  625. XMLParser::Suspend( void)
  626. {
  627. _fSuspended = true; // force Run to suspend
  628. return S_OK;
  629. }
  630. /////////////////////////////////////////////////////////////////////////////
  631. HRESULT STDMETHODCALLTYPE
  632. XMLParser::Reset( void)
  633. {
  634. init();
  635. _pRoot = NULL;
  636. _pFactory = NULL;
  637. _pNode = NULL;
  638. _bstrError = NULL;
  639. return S_OK;
  640. }
  641. /////////////////////////////////////////////////////////////////////////////
  642. ULONG STDMETHODCALLTYPE
  643. XMLParser::GetLineNumber(void)
  644. {
  645. if (_pTokenizer) return _pTokenizer->GetLine();
  646. else return 0;
  647. }
  648. /////////////////////////////////////////////////////////////////////////////
  649. ULONG STDMETHODCALLTYPE
  650. XMLParser::GetLinePosition( void)
  651. {
  652. if (_pTokenizer) return _pTokenizer->GetLinePosition();
  653. else return 0;
  654. }
  655. /////////////////////////////////////////////////////////////////////////////
  656. ULONG STDMETHODCALLTYPE
  657. XMLParser::GetAbsolutePosition( void)
  658. {
  659. if (_pTokenizer) return _pTokenizer->GetInputPosition();
  660. else return 0;
  661. }
  662. /////////////////////////////////////////////////////////////////////////////
  663. HRESULT STDMETHODCALLTYPE
  664. XMLParser::GetLineBuffer(
  665. /* [out] */ const WCHAR __RPC_FAR *__RPC_FAR *ppwcBuf,
  666. /* [out] */ ULONG __RPC_FAR *pulLen,
  667. /* [out] */ ULONG __RPC_FAR *pulStartPos)
  668. {
  669. if (pulLen == NULL || pulStartPos == NULL) return E_INVALIDARG;
  670. //STACK_ENTRY;
  671. if (_pTokenizer)
  672. {
  673. return _pTokenizer->GetLineBuffer(ppwcBuf, pulLen, pulStartPos);
  674. }
  675. *ppwcBuf = NULL;
  676. *pulLen = 0;
  677. return S_OK;
  678. }
  679. /////////////////////////////////////////////////////////////////////////////
  680. HRESULT STDMETHODCALLTYPE
  681. XMLParser::GetLastError( void)
  682. {
  683. return _fLastError;
  684. }
  685. //------------ PRIVATE METHODS --------------------------------------------------
  686. HRESULT
  687. //XMLParser::PushTokenizer(
  688. // URLStream* stream)
  689. XMLParser::PushTokenizer()
  690. {
  691. _pTokenizer = NEW (XMLStream(this));
  692. if (_pTokenizer == NULL)
  693. return E_OUTOFMEMORY;
  694. _pTokenizer->SetFlags(_usFlags);
  695. // _fTokenizerChanged = true;
  696. //HRESULT hr= PushDownload(stream, _pTokenizer);
  697. HRESULT hr= PushDownload(_pTokenizer);
  698. if (FAILED(hr))
  699. {
  700. delete _pTokenizer;
  701. _pTokenizer = NULL;
  702. return hr;
  703. }
  704. return S_OK;
  705. }
  706. /////////////////////////////////////////////////////////////////////////////
  707. HRESULT
  708. //XMLParser::PushDownload(URLStream* stream, XMLStream* tokenizer)
  709. XMLParser::PushDownload(XMLStream* tokenizer)
  710. {
  711. // NOTE: tokenizer can be null, in the case of a parameter entity download.
  712. _pdc = _pDownloads.push();
  713. if (_pdc == NULL)
  714. {
  715. return E_OUTOFMEMORY;
  716. }
  717. if (_pDownloads.used() > 1)
  718. _fRootLevel = false;
  719. _pdc->_pTokenizer = tokenizer;
  720. _pdc->_fDTD = false;
  721. _pdc->_fEntity = false;
  722. _pdc->_fAsync = false;
  723. _pdc->_fFoundNonWS = _fFoundNonWS;
  724. _pdc->_fFoundRoot = _fFoundRoot;
  725. _pdc->_fRootLevel = _fRootLevel;
  726. _pdc->_fDepth = _pStack.used();
  727. _fFoundNonWS = false;
  728. _fFoundRoot = false;
  729. _fRootLevel = (_pStack.used() == 0 && _pDownloads.used() == 1);
  730. HRESULT hr = S_OK;
  731. return hr;
  732. }
  733. /////////////////////////////////////////////////////////////////////////////
  734. HRESULT
  735. XMLParser::PushStream(IStream* pStm, bool fpe)
  736. {
  737. EncodingStream* stream = (EncodingStream*)EncodingStream::newEncodingStream(pStm); // refcount = 1
  738. if (stream == NULL)
  739. return E_OUTOFMEMORY;
  740. /*
  741. if (_usFlags & XMLFLAG_RUNBUFFERONLY)
  742. stream->setReadStream(false);
  743. */
  744. _pdc->_pEncodingStream = stream;
  745. stream->Release(); // Smart pointer is holding a ref
  746. HRESULT hr = _pTokenizer->PushStream(stream, fpe);
  747. if (hr == E_PENDING)
  748. {
  749. _fWaiting = true;
  750. }
  751. return hr;
  752. }
  753. /////////////////////////////////////////////////////////////////////////////
  754. HRESULT
  755. XMLParser::PopDownload()
  756. {
  757. // NOTE: tokenizer can be null, in the case of a parameter entity download.
  758. HRESULT hr = S_OK;
  759. if (_pdc != NULL)
  760. {
  761. if (_pdc->_pTokenizer)
  762. {
  763. _pdc->_pTokenizer->Reset();
  764. delete _pdc->_pTokenizer;
  765. _pdc->_pTokenizer = NULL;
  766. }
  767. _pdc->_pEncodingStream = NULL;
  768. // restore saved value of foundnonws.
  769. _fFoundNonWS = _pdc->_fFoundNonWS;
  770. _pdc = _pDownloads.pop();
  771. }
  772. if (_pdc != NULL)
  773. {
  774. if (_pdc->_pTokenizer != NULL)
  775. {
  776. _pTokenizer = _pdc->_pTokenizer;
  777. }
  778. }
  779. else
  780. {
  781. _pTokenizer = NULL;
  782. hr = S_FALSE;
  783. }
  784. if (_pStack.used() == 0 && _pDownloads.used() == 1)
  785. _fRootLevel = true;
  786. return hr;
  787. }
  788. /////////////////////////////////////////////////////////////////////////////
  789. HRESULT
  790. XMLParser::GrowNodeInfo()
  791. {
  792. USHORT newsize = _cNodeInfoAllocated + STACK_INCREMENT;
  793. MY_XML_NODE_INFO** pNewArray = NEW (PMY_XML_NODE_INFO[newsize]);
  794. if (pNewArray == NULL)
  795. return E_OUTOFMEMORY;
  796. // Now since STACK_INCREMENT is the same for _pStack then _pStack
  797. // has also re-allocated. Therefore we need to re-initialize all
  798. // the pointers in this array - since they point into the _pStack's memory.
  799. for (int i = _pStack.used() - 1; i >= 0; i--)
  800. {
  801. pNewArray[i] = _pStack[i];
  802. }
  803. delete[] _paNodeInfo;
  804. _paNodeInfo = pNewArray;
  805. _cNodeInfoAllocated = newsize;
  806. return S_OK;
  807. }
  808. /////////////////////////////////////////////////////////////////////////////
  809. HRESULT
  810. XMLParser::GrowBuffer(PMY_XML_NODE_INFO pNodeInfo, long newlen)
  811. {
  812. delete [] pNodeInfo->_pwcTagName;
  813. pNodeInfo->_pwcTagName = NULL;
  814. // add 50 characters to avoid too many reallocations.
  815. pNodeInfo->_pwcTagName = NEW (WCHAR[ newlen ]);
  816. if (pNodeInfo->_pwcTagName == NULL)
  817. return E_OUTOFMEMORY;
  818. pNodeInfo->_ulBufLen = newlen;
  819. return S_OK;
  820. }
  821. /////////////////////////////////////////////////////////////////////////////
  822. HRESULT
  823. XMLParser::push(XML_NODE_INFO& info)
  824. {
  825. HRESULT hr;
  826. _lCurrentElement = _pStack.used();
  827. _pCurrent = _pStack.push();
  828. if (_pCurrent == NULL)
  829. return E_OUTOFMEMORY;
  830. *((XML_NODE_INFO*)_pCurrent) = info;
  831. PUSHNODEINFO(_pCurrent);
  832. _fRootLevel = false;
  833. // Save the tag name into the private buffer so it sticks around until the
  834. // close tag </foo> which could be anywhere down the road after the
  835. // BufferedStream been overwritten
  836. // THIS CODE IS OPTIMIZED FOR PERFORMANCE WHICH IS WHY IT IS NOT
  837. // CALLING THE CopyText METHOD.
  838. if (_pCurrent->_ulBufLen < info.ulLen+1)
  839. {
  840. checkhr2(GrowBuffer(_pCurrent, info.ulLen + 50));
  841. }
  842. Assert(info.ulLen >= 0);
  843. ::memcpy(_pCurrent->_pwcTagName, info.pwcText, info.ulLen*sizeof(WCHAR));
  844. _pCurrent->_pwcTagName[info.ulLen] = L'\0';
  845. // And make the XML_NODE_INFO point to private buffer.
  846. _pCurrent->pwcText = _pCurrent->_pwcTagName;
  847. return S_OK;
  848. }
  849. /////////////////////////////////////////////////////////////////////////////
  850. HRESULT
  851. XMLParser::pushAttribute(XML_NODE_INFO& info)
  852. {
  853. HRESULT hr;
  854. if (_cAttributes != 0)
  855. {
  856. // Attributes are special in that they are supposed to be unique.
  857. // So here we actually check this.
  858. for (long i = _pStack.used()-1; i > _lCurrentElement; i--)
  859. {
  860. XML_NODE_INFO* ptr = _pStack[i];
  861. if (ptr->dwType != XML_ATTRIBUTE)
  862. continue; // ignore attribute values.
  863. if (ptr->ulLen != info.ulLen)
  864. {
  865. continue; // we're ok with this one
  866. }
  867. // Optimized for the normal case where there is no match
  868. if (::memcmp(ptr->pwcText, info.pwcText, info.ulLen*sizeof(WCHAR)) == 0)
  869. {
  870. if (! _fCaseInsensitive)
  871. {
  872. ::FusionpDbgPrintEx(
  873. FUSION_DBG_LEVEL_ERROR,
  874. "SXS.DLL: XML Parser found a duplicate attribute\n");
  875. return XML_E_DUPLICATEATTRIBUTE;
  876. }
  877. //else if (StrCmpNI(ptr->pwcText, info.pwcText, info.ulLen) == 0)
  878. else if (::FusionpCompareStrings(ptr->pwcText, ::wcslen(ptr->pwcText), info.pwcText, info.ulLen, true) == 0)
  879. {
  880. ::FusionpDbgPrintEx(
  881. FUSION_DBG_LEVEL_ERROR,
  882. "SXS.DLL: XML Parser found a duplicate attribute (#2)\n");
  883. // Duplicate attributes are allowed in IE4 mode!!
  884. // But only the latest one shows up
  885. // So we have to delete the previous duplication
  886. return XML_E_DUPLICATEATTRIBUTE;
  887. }
  888. }
  889. }
  890. }
  891. _cAttributes++;
  892. _pCurrent = _pStack.push();
  893. if (_pCurrent == NULL)
  894. return E_OUTOFMEMORY;
  895. *((XML_NODE_INFO*)_pCurrent) = info;
  896. PUSHNODEINFO(_pCurrent);
  897. return S_OK;
  898. }
  899. /////////////////////////////////////////////////////////////////////////////
  900. HRESULT
  901. XMLParser::pushAttributeValue(XML_NODE_INFO& info)
  902. {
  903. HRESULT hr;
  904. // Attributes are saved in the BufferedStream so we can point to the
  905. // real text in the buffered stream instead of copying it !!
  906. _pCurrent = _pStack.push();
  907. if (_pCurrent == NULL)
  908. return E_OUTOFMEMORY;
  909. // store attribute value quote character in the pReserved field.
  910. info.pReserved = (PVOID)_pTokenizer->getAttrValueQuoteChar();
  911. *((XML_NODE_INFO*)_pCurrent) = info;
  912. PUSHNODEINFO(_pCurrent);
  913. // this is really the count of nodes on the stack, not just attributes.
  914. _cAttributes++;
  915. return S_OK;
  916. }
  917. /////////////////////////////////////////////////////////////////////////////
  918. HRESULT
  919. XMLParser::pop(const WCHAR* tag, ULONG len)
  920. {
  921. HRESULT hr = S_OK;
  922. if (_pCurrent == NULL || _pStack.used() == 0)
  923. {
  924. ::FusionpDbgPrintEx(
  925. FUSION_DBG_LEVEL_ERROR,
  926. "SXS.DLL: XML Parser found an unexpected end tag.\n");
  927. hr = XML_E_UNEXPECTEDENDTAG;
  928. goto Cleanup;
  929. }
  930. if (len != 0)
  931. {
  932. if (_pCurrent->ulLen != len)
  933. {
  934. ::FusionpDbgPrintEx(
  935. FUSION_DBG_LEVEL_ERROR,
  936. "SXS.DLL: XML Parser found an end tag mismatch\n");
  937. hr = XML_E_ENDTAGMISMATCH;
  938. }
  939. // Optimized for the normal case where there is no match
  940. else if (::memcmp(_pCurrent->pwcText, tag, len*sizeof(WCHAR)) != 0)
  941. {
  942. if (! _fCaseInsensitive)
  943. {
  944. ::FusionpDbgPrintEx(
  945. FUSION_DBG_LEVEL_ERROR,
  946. "SXS.DLL: XML Parser found an end tag mismatch.\n");
  947. hr = XML_E_ENDTAGMISMATCH;
  948. }
  949. else if (::FusionpCompareStrings(_pCurrent->pwcText, len, tag, len, true) != 0)
  950. {
  951. hr = XML_E_ENDTAGMISMATCH;
  952. }
  953. }
  954. if (FAILED(hr))
  955. {
  956. goto Cleanup;
  957. }
  958. }
  959. // We don't delete the fTagName because we're going to reuse this field
  960. // later to avoid lots of memory allocations.
  961. _pCurrent = _pStack.pop();
  962. _cNodeInfoCurrent--;
  963. if (_pCurrent == 0)
  964. {
  965. _pNode = _pRoot;
  966. if (_pDownloads.used() == 1)
  967. _fRootLevel = true;
  968. }
  969. else
  970. {
  971. _pNode = _pCurrent->pNode;
  972. }
  973. Cleanup:
  974. return hr;
  975. }
  976. /////////////////////////////////////////////////////////////////////////////
  977. HRESULT XMLParser::pop()
  978. {
  979. // We don't delete the fTagName because we're going to reuse this field
  980. // later to avoid lots of memory allocations.
  981. _pCurrent = _pStack.pop();
  982. _cNodeInfoCurrent--;
  983. if (_pCurrent == 0)
  984. {
  985. _pNode = _pRoot;
  986. if (_pDownloads.used() == 1)
  987. _fRootLevel = true;
  988. }
  989. else
  990. {
  991. _pNode = _pCurrent->pNode;
  992. }
  993. return S_OK;
  994. }
  995. /////////////////////////////////////////////////////////////////////////////
  996. void XMLParser::popAttribute()
  997. {
  998. Assert(_pStack.used() > 0);
  999. _pCurrent = _pStack.pop();
  1000. _cNodeInfoCurrent--;
  1001. Assert(_pCurrent != 0);
  1002. _cAttributes--;
  1003. }
  1004. /////////////////////////////////////////////////////////////////////////////
  1005. HRESULT
  1006. XMLParser::CopyText(PMY_XML_NODE_INFO pNodeInfo)
  1007. {
  1008. HRESULT hr = S_OK;
  1009. if (pNodeInfo->_pwcTagName != pNodeInfo->pwcText)
  1010. {
  1011. ULONG len = pNodeInfo->ulLen;
  1012. // Copy the current text into the buffer.
  1013. if (pNodeInfo->_ulBufLen < len+1)
  1014. {
  1015. checkhr2(GrowBuffer(pNodeInfo, len + 50));
  1016. }
  1017. if (len > 0)
  1018. {
  1019. ::memcpy(pNodeInfo->_pwcTagName, pNodeInfo->pwcText, len*sizeof(WCHAR));
  1020. }
  1021. pNodeInfo->_pwcTagName[len] = L'\0';
  1022. // And make the XML_NODE_INFO point to private buffer.
  1023. pNodeInfo->pwcText = pNodeInfo->_pwcTagName;
  1024. }
  1025. return S_OK;
  1026. }
  1027. /////////////////////////////////////////////////////////////////////////////
  1028. HRESULT
  1029. XMLParser::CopyContext()
  1030. {
  1031. // For performance reasons we try not to copy the data for attributes
  1032. // and their values when we push them on the stack. We can do this
  1033. // because the tokenizer tries to freeze the internal buffers while
  1034. // parsing attributes and thereby guarentee that the pointers stay
  1035. // good. But occasionally the BufferedStream has to reallocate when
  1036. // the attributes are right at the end of the buffer.
  1037. long last = _pStack.used();
  1038. for (long i = _cAttributes; i > 0 ; i--)
  1039. {
  1040. long index = last - i;
  1041. MY_XML_NODE_INFO* ptr = _pStack[index];
  1042. CopyText(ptr);
  1043. }
  1044. return S_OK;
  1045. }
  1046. /////////////////////////////////////////////////////////////////////////////
  1047. HRESULT XMLParser::ReportUnclosedTags(int start)
  1048. {
  1049. HRESULT hr = XML_E_UNCLOSEDTAG;
  1050. // Build a string containing the list of unclosed tags and format an error
  1051. // message containing this text.
  1052. int tags = _pStack.used();
  1053. WCHAR* buffer = NULL;
  1054. WCHAR* msgbuf = NULL;
  1055. unsigned long size = 0;
  1056. unsigned long used = 0;
  1057. for (long i = start; i < tags; i++)
  1058. {
  1059. XML_NODE_INFO* ptr = _pStack[i];
  1060. if (ptr->dwType == XML_ATTRIBUTE)
  1061. break;
  1062. if (used + ptr->ulLen + 3 > size) // +3 for '<','>' and '\0'
  1063. {
  1064. long newsize = used + ptr->ulLen + 500;
  1065. WCHAR* newbuf = NEW (WCHAR[newsize]);
  1066. if (newbuf == NULL)
  1067. {
  1068. goto nomem;
  1069. }
  1070. if (buffer != NULL)
  1071. {
  1072. //
  1073. // Used appears to be the number of -characters- that are
  1074. // being allocated, not the number of -bytes-. So, we need
  1075. // to multiply it by the size of a character.
  1076. //
  1077. ::memcpy(newbuf, buffer, used * sizeof(WCHAR));
  1078. delete[] buffer;
  1079. }
  1080. size = newsize;
  1081. buffer = newbuf;
  1082. }
  1083. if (i > start)
  1084. {
  1085. buffer[used++] = ',';
  1086. buffer[used++] = ' ';
  1087. }
  1088. ::memcpy(&buffer[used], ptr->pwcText, sizeof(WCHAR) * ptr->ulLen);
  1089. used += ptr->ulLen;
  1090. buffer[used] = '\0';
  1091. }
  1092. goto cleanup;
  1093. nomem:
  1094. hr = E_OUTOFMEMORY;
  1095. cleanup:
  1096. delete [] buffer;
  1097. delete [] msgbuf;
  1098. return hr;
  1099. }
  1100. /////////////////////////////////////////////////////////////////////////////
  1101. HRESULT XMLParser::init()
  1102. {
  1103. _fLastError = 0;
  1104. _fStopped = false;
  1105. _fSuspended = false;
  1106. _pNode = _pRoot;
  1107. _fStarted = false;
  1108. _fStopped = false;
  1109. _fWaiting = false;
  1110. _fFoundRoot = false;
  1111. _fFoundNonWS = false;
  1112. _pTokenizer = NULL;
  1113. _fGotVersion = false;
  1114. _fRootLevel = true;
  1115. _cAttributes = 0;
  1116. _fPendingBeginChildren = false;
  1117. _fPendingEndChildren = false;
  1118. while (_pCurrent != NULL)
  1119. {
  1120. _pCurrent = _pStack.pop();
  1121. }
  1122. _cNodeInfoCurrent = 0;
  1123. _lCurrentElement = 0;
  1124. // cleanup downloads
  1125. while (_pdc != NULL)
  1126. {
  1127. PopDownload();
  1128. }
  1129. _pCurrent = NULL;
  1130. return S_OK;
  1131. }
  1132. /////////////////////////////////////////////////////////////////////////////
  1133. HRESULT
  1134. XMLParser::ErrorCallback(HRESULT hr)
  1135. {
  1136. Assert(hr == XMLStream::XML_DATAAVAILABLE ||
  1137. hr == XMLStream::XML_DATAREALLOCATE);
  1138. if (hr == static_cast<HRESULT>(XMLStream::XML_DATAREALLOCATE))
  1139. {
  1140. // This is more serious. We have to actually save away the
  1141. // context because the buffers are about to be reallocated.
  1142. checkhr2(CopyContext());
  1143. }
  1144. checkhr2(_pFactory->NotifyEvent(this, XMLNF_DATAAVAILABLE));
  1145. return hr;
  1146. }