Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2069 lines
58 KiB

  1. /*
  2. *
  3. * Copyright (c) 1998,1999 Microsoft Corporation. All rights reserved.
  4. * EXEMPT: copyright change only, no build required
  5. *
  6. */
  7. #include "stdinc.h"
  8. #include "core.hxx"
  9. #pragma hdrstop
  10. #include "xmlhelper.hxx"
  11. #include "xmlstream.hxx"
  12. #include "bufferedstream.hxx"
  13. #include "xmlparser.hxx"
  14. const long BLOCK_SIZE = 512;
  15. const long STACK_INCREMENT = 10;
  16. // macros used in this file
  17. #define INTERNALERROR return XML_E_INTERNALERROR;
  18. #define checkeof(a,b) if (_fEOF) return b;
  19. #define ADVANCE hr = _pInput->nextChar(&_chLookahead, &_fEOF); if (hr != S_OK) return hr;
  20. #define ADVANCETO(a) hr = AdvanceTo(a); if (hr != S_OK) return hr;
  21. #define ISWHITESPACE(ch) _pInput->isWhiteSpace(ch)
  22. #define STATE(state) { _sSubState = state; return S_OK; }
  23. #define GOTOSTART(state) { _sSubState = state; goto Start; }
  24. #define DELAYMARK(hr) (hr == S_OK || (hr >= XML_E_TOKEN_ERROR && hr < XML_E_LASTERROR))
  25. #define XML_E_FOUNDPEREF 0x8000e5ff
  26. // The tokenizer has special handling for the following attribute types.
  27. // These values are derived from the XML_AT_XXXX types provided in SetType
  28. // and are also calculated during parsing of an ATTLIST for parsing of
  29. // default values.
  30. typedef enum
  31. {
  32. XMLTYPE_CDATA, // the default.
  33. XMLTYPE_NAME,
  34. XMLTYPE_NAMES,
  35. XMLTYPE_NMTOKEN,
  36. XMLTYPE_NMTOKENS,
  37. } XML_ATTRIBUTE_TYPE;
  38. //==============================================================================
  39. // xiaoyu : a simplified table : only deal with comments, not include DOCTYPE, NotationDecl, EntityDecl and ElementDecl.
  40. // Parse an <!^xxxxxxxx Declaration.
  41. const StateEntry g_DeclarationTable[] =
  42. {
  43. // 0 '<' ^ '!'
  44. { OP_CHAR, L"!", 1, (DWORD)XML_E_INTERNALERROR, },
  45. // 1 '<!' ^ '-'
  46. { OP_PEEK, L"-", 2, 4, 0 },
  47. // 2 '<!-'
  48. { OP_COMMENT, NULL, 3, },
  49. // 3 done !!
  50. { OP_POP, NULL, 0, 0 },
  51. // 4 '<!' ^ '['
  52. { OP_PEEK, L"[", 5, (DWORD)XML_E_BADDECLNAME, 0 }, //xiaoyu : we do not consider others <!XXX, which is a DTD subset
  53. // 5 '<![...'
  54. { OP_CONDSECT, NULL, 3, }
  55. };
  56. //==============================================================================
  57. // Parse an <?xml or <?xml:namespace declaration.
  58. const StateEntry g_XMLDeclarationTable[] =
  59. {
  60. // 0 must be xml declaration - and not xml namespace declaration
  61. { OP_TOKEN, NULL, 1, XML_XMLDECL, 0 },
  62. // 1 '<?xml' ^ S version="1.0" ...
  63. { OP_OWS, NULL, 2 },
  64. // 2 '<?xml' S ^ version="1.0" ...
  65. { OP_SNCHAR, NULL, 3, (DWORD)XML_E_XMLDECLSYNTAX },
  66. // 3 '<?xml' S ^ version="1.0" ...
  67. { OP_NAME, NULL, 4, },
  68. // 4 '<?xml' S version^="1.0" ...
  69. { OP_STRCMP, L"version", 5, 12, XML_VERSION },
  70. // 5
  71. { OP_EQUALS, NULL, 6 },
  72. // 6 '<?xml' S version = ^ "1.0" ...
  73. { OP_ATTRVAL, NULL, 32, 0},
  74. // 7 '<?xml' S version '=' value ^
  75. { OP_TOKEN, NULL, 8, XML_PCDATA, -1 },
  76. // 8 ^ are we done ?
  77. { OP_CHARWS, L"?", 28, 9 }, // must be '?' or whitespace.
  78. // 9 ^ S? [encoding|standalone] '?>'
  79. { OP_OWS, NULL, 10 },
  80. // 10
  81. { OP_CHAR, L"?", 28, 33 }, // may have '?' after skipping whitespace.
  82. // 11 ^ [encoding|standalone] '?>'
  83. { OP_NAME, NULL, 12, },
  84. // 12
  85. { OP_STRCMP, L"standalone", 23, 13, XML_STANDALONE },
  86. // 13
  87. { OP_STRCMP, L"encoding", 14, (DWORD)XML_E_UNEXPECTED_ATTRIBUTE, XML_ENCODING },
  88. // 14
  89. { OP_EQUALS, NULL, 15 },
  90. // 15
  91. { OP_ATTRVAL, NULL, 16, 0 },
  92. // 16
  93. { OP_ENCODING, NULL, 17, 0, -1 },
  94. // 17
  95. { OP_TOKEN, NULL, 18, XML_PCDATA, -1 },
  96. // 18 ^ are we done ?
  97. { OP_CHARWS, L"?", 28, 19 }, // must be '?' or whitespace.
  98. // 19 ^ S? standalone '?>'
  99. { OP_OWS, NULL, 20 },
  100. // 20
  101. { OP_CHAR, L"?", 28, 34 }, // may have '?' after skipping whitespace.
  102. // 21 ^ standalone '?>'
  103. { OP_NAME, NULL, 22, },
  104. // 22
  105. { OP_STRCMP, L"standalone", 23, (DWORD)XML_E_UNEXPECTED_ATTRIBUTE,
  106. XML_STANDALONE },
  107. // 23
  108. { OP_EQUALS, NULL, 24 },
  109. // 24
  110. { OP_ATTRVAL, NULL, 25, 0 },
  111. // 25
  112. { OP_STRCMP, L"yes", 31, 30, -1 },
  113. // 26 <?xml ....... ^ '?>' -- now expecting just the closing '?>' chars
  114. { OP_OWS, NULL, 27 },
  115. // 27
  116. { OP_CHAR, L"?", 28, (DWORD)XML_E_XMLDECLSYNTAX, 0 },
  117. // 28
  118. { OP_CHAR, L">", 29, (DWORD)XML_E_XMLDECLSYNTAX, 0 },
  119. // 29 done !!
  120. { OP_POP, NULL, 0, XMLStream::XML_ENDXMLDECL },
  121. //----------------------- check standalone values "yes" or "no"
  122. // 30
  123. { OP_STRCMP, L"no", 31, (DWORD)XML_E_INVALID_STANDALONE, -1 },
  124. // 31
  125. { OP_TOKEN, NULL, 26, XML_PCDATA, -1 },
  126. //----------------------- check version = "1.0"
  127. // 32
  128. { OP_STRCMP, L"1.0", 7, (DWORD)XML_E_INVALID_VERSION, -1 },
  129. // 33
  130. { OP_SNCHAR, NULL, 11, (DWORD)XML_E_XMLDECLSYNTAX },
  131. // 34
  132. { OP_SNCHAR, NULL, 21, (DWORD)XML_E_XMLDECLSYNTAX },
  133. };
  134. static const WCHAR* g_pstrCDATA = L"CDATA";
  135. ////////////////////////////////////////////////////////////////////////
  136. XMLStream::XMLStream(XMLParser * pXMLParser)
  137. : _pStack(1), _pStreams(1)
  138. {
  139. // precondition: 'func' is never NULL
  140. _fnState = &XMLStream::init;
  141. _pInput = NULL;
  142. _pchBuffer = NULL;
  143. _fDTD = false;
  144. //_fInternalSubset = false;
  145. _cStreamDepth = 0;
  146. _pXMLParser = pXMLParser;
  147. _init();
  148. SetFlags(0);
  149. }
  150. ////////////////////////////////////////////////////////////////////////
  151. HRESULT
  152. XMLStream::init()
  153. {
  154. HRESULT hr = S_OK;
  155. if (_pInput == NULL)
  156. {
  157. //haven' called put-stream yet
  158. return XML_E_ENDOFINPUT;
  159. }
  160. _init();
  161. #ifdef FUSION_USE_OLD_XML_PARSER_SOURCE
  162. if (_fDTD)
  163. {
  164. _fnState = &XMLStream::parseDTDContent;
  165. }
  166. else
  167. #endif
  168. {
  169. _fnState = &XMLStream::parseContent;
  170. }
  171. checkhr2(push(&XMLStream::firstAdvance,0));
  172. return hr;
  173. }
  174. ////////////////////////////////////////////////////////////////////////
  175. void
  176. XMLStream::_init()
  177. {
  178. _fEOF = false;
  179. //_fEOPE = false;
  180. _chLookahead = 0;
  181. _nToken = XML_PENDING;
  182. _chTerminator = 0;
  183. _lLengthDelta = 0;
  184. _lNslen = _lNssep = 0;
  185. _sSubState = 0;
  186. _lMarkDelta = 0;
  187. //_nAttrType = XMLTYPE_CDATA;
  188. _fUsingBuffer = false;
  189. _lBufLen = 0;
  190. delete[] _pchBuffer;
  191. _pchBuffer = NULL;
  192. _lBufSize = 0;
  193. _fDelayMark = false;
  194. _fFoundWhitespace = false;
  195. _fFoundNonWhitespace = false;
  196. //_fFoundPEREf = false;
  197. _fWasUsingBuffer = false;
  198. _chNextLookahead = 0;
  199. //_lParseStringLevel = 0;
  200. //_cConditionalSection = 0;
  201. //_cIgnoreSectLevel = 0;
  202. //_fWasDTD = false;
  203. _fParsingAttDef = false;
  204. _fFoundFirstElement = false;
  205. _fReturnAttributeValue = true;
  206. //_fHandlePE = true;
  207. _pTable = NULL;
  208. //_lEOFError = 0;
  209. }
  210. ////////////////////////////////////////////////////////////////////////
  211. XMLStream::~XMLStream()
  212. {
  213. delete _pInput;
  214. delete[] _pchBuffer;
  215. InputInfo* pi = _pStreams.peek();
  216. while (pi != NULL)
  217. {
  218. // Previous stream is finished also, so
  219. // pop it and continue on.
  220. delete pi->_pInput;
  221. pi = _pStreams.pop();
  222. }
  223. }
  224. ////////////////////////////////////////////////////////////////////////
  225. HRESULT
  226. XMLStream::AppendData(
  227. /* [in] */ const BYTE *buffer,
  228. /* [in] */ long length,
  229. /* [in] */ BOOL last)
  230. {
  231. if (_pInput == NULL)
  232. {
  233. _pInput = NEW (BufferedStream(this));
  234. if (_pInput == NULL)
  235. return E_OUTOFMEMORY;
  236. init();
  237. }
  238. HRESULT hr = _pInput->AppendData(buffer, length, last);
  239. return hr;
  240. }
  241. ////////////////////////////////////////////////////////////////////////
  242. HRESULT
  243. XMLStream::Reset( void)
  244. {
  245. init();
  246. delete _pInput;
  247. _pInput = NULL;
  248. return S_OK;
  249. }
  250. ////////////////////////////////////////////////////////////////////////
  251. HRESULT
  252. XMLStream::PushStream(
  253. /* [unique][in] */ EncodingStream *p,
  254. /* [in] */ bool fExternalPE)
  255. {
  256. UNUSED(fExternalPE);
  257. if (_pStreams.used() == 0 && _pInput == NULL)
  258. init();
  259. _cStreamDepth++;
  260. if (_fDelayMark && _pInput != NULL)
  261. {
  262. mark(_lMarkDelta);
  263. _lMarkDelta = 0;
  264. _fDelayMark = false;
  265. }
  266. // Save current input stream.
  267. if (_pInput != NULL)
  268. {
  269. InputInfo* pi = _pStreams.push();
  270. if (pi == NULL)
  271. return E_OUTOFMEMORY;
  272. pi->_pInput = _pInput;
  273. pi->_chLookahead = _chLookahead;
  274. //pi->_fPE = true; // assume this is a parameter entity.
  275. //pi->_fExternalPE = fExternalPE;
  276. //pi->_fInternalSubset = _fInternalSubset;
  277. if (&XMLStream::skipWhiteSpace == _fnState && _pStack.used() > 0) {
  278. StateInfo* pSI = _pStack.peek();
  279. pi->_fnState = pSI->_fnState;
  280. }
  281. else
  282. pi->_fnState = _fnState;
  283. // and prepend pe text with space as per xml spec.
  284. _chLookahead = L' ';
  285. _chNextLookahead = _chLookahead;
  286. _pInput = NULL;
  287. }
  288. _pInput = NEW (BufferedStream(this));
  289. if (_pInput == NULL)
  290. return E_OUTOFMEMORY;
  291. if (p != NULL)
  292. _pInput->Load(p);
  293. if (_chLookahead == L' ')
  294. _pInput->setWhiteSpace(); // _pInput didn't see this space char.
  295. return S_OK;
  296. }
  297. ////////////////////////////////////////////////////////////////////////
  298. HRESULT
  299. XMLStream::PopStream()
  300. {
  301. // This method has to pop all streams until it finds a stream that
  302. // can deliver the next _chLookahead character.
  303. HRESULT hr = S_OK;
  304. InputInfo* pi = NULL;
  305. pi = _pStreams.peek();
  306. if (pi == NULL) return S_FALSE;
  307. _chLookahead = pi->_chLookahead;
  308. // Found previous stream, so we can continue.
  309. _fEOF = false;
  310. // Ok, so we actually got the next character, so
  311. // we can now safely throw away the previous
  312. // lookahead character and return the next
  313. // non-whitespace character from the previous stream.
  314. delete _pInput;
  315. _pInput = pi->_pInput;
  316. if (_chLookahead == L' ')
  317. _pInput->setWhiteSpace();
  318. // BUGBUG: we need to clear this so that the parser does not
  319. // try and pop a download in the internalPE case (when handling XML_E_ENDOFINPUT in run())
  320. // but this means that internal PEs never get XMLNF_ENDENTITY notifications generated.
  321. // The DTDNodeFactory requires this behaviour currently (incorrectly)
  322. _pStreams.pop();
  323. _cStreamDepth--;
  324. return hr;
  325. }
  326. ////////////////////////////////////////////////////////////////////////
  327. HRESULT
  328. XMLStream::GetNextToken(
  329. /* [out] */ DWORD *t,
  330. /* [out] */ const WCHAR **text,
  331. /* [out] */ long *length,
  332. /* [out] */ long *nslen)
  333. {
  334. HRESULT hr;
  335. if (_fDTD)
  336. return E_UNEXPECTED;
  337. if (_fDelayMark)
  338. {
  339. mark(_lMarkDelta);
  340. _lMarkDelta = 0;
  341. _fDelayMark = false;
  342. }
  343. hr = (this->*_fnState)();
  344. while (hr == S_OK && _nToken == XML_PENDING)
  345. hr = (this->*_fnState)();
  346. if (hr == S_OK)
  347. *t = _nToken;
  348. else if (hr == E_PENDING) {
  349. *t = XML_PENDING;
  350. *length = *nslen = 0;
  351. *text = NULL;
  352. goto CleanUp;
  353. }
  354. else
  355. *t = XML_PENDING;
  356. // At this point hr == S_OK or it is some error. So we
  357. // want to return the text of the current token, since this
  358. // is useful in both cases.
  359. if (! _fUsingBuffer)
  360. {
  361. getToken(text,length);
  362. if (_lLengthDelta != 0)
  363. { // xiaoyu : IF STOP WITHIN, HAVE A CAREFUL LOOK : in ParsingAttributeValue, we have to read ahead of one char '"'
  364. *length += _lLengthDelta;
  365. _lLengthDelta = 0;
  366. }
  367. // This can only happen in the context of a DTD.
  368. // if (_fWasUsingBuffer)
  369. // {
  370. // _fUsingBuffer = _fWasUsingBuffer;
  371. // _fWasUsingBuffer = false;
  372. // }
  373. }
  374. else
  375. { // xiaoyu : IF STOP WITHIN, HAVE A CAREFUL LOOK
  376. *text = _pchBuffer;
  377. *length = _lBufLen;
  378. _fUsingBuffer = false;
  379. _fFoundWhitespace = false;
  380. _lBufLen = 0;
  381. _lLengthDelta = 0;
  382. }
  383. if (DELAYMARK(hr))
  384. {
  385. // Mark next time around so that error information points to the
  386. // beginning of this token.
  387. _fDelayMark = true;
  388. }
  389. else
  390. { // xiaoyu : IF STOP WITHIN, HAVE A CAREFUL LOOK
  391. // otherwise mark this spot right away so we point to the exact
  392. // source of the error.
  393. mark(_lMarkDelta);
  394. _lMarkDelta = 0;
  395. }
  396. _nToken = XML_PENDING;
  397. *nslen = _lNslen;
  398. _lNslen = _lNssep = 0;
  399. CleanUp:
  400. return hr;
  401. }
  402. ////////////////////////////////////////////////////////////////////////
  403. ULONG
  404. XMLStream::GetLine()
  405. {
  406. BufferedStream* input = getCurrentStream();
  407. if (input != NULL)
  408. return input->getLine();
  409. return 0;
  410. }
  411. ////////////////////////////////////////////////////////////////////////
  412. ULONG
  413. XMLStream::GetLinePosition( )
  414. {
  415. BufferedStream* input = getCurrentStream();
  416. if (input != NULL)
  417. return input->getLinePos();
  418. return 0;
  419. }
  420. ////////////////////////////////////////////////////////////////////////
  421. ULONG
  422. XMLStream::GetInputPosition( )
  423. {
  424. BufferedStream* input = getCurrentStream();
  425. if (input != NULL)
  426. return input->getInputPos();
  427. return 0;
  428. }
  429. ////////////////////////////////////////////////////////////////////////
  430. HRESULT
  431. XMLStream::GetLineBuffer(
  432. /* [out] */ const WCHAR * *buf, ULONG* len, ULONG* startpos)
  433. {
  434. if (buf == NULL || len == NULL)
  435. return E_INVALIDARG;
  436. *buf = NULL;
  437. BufferedStream* input = getCurrentStream();
  438. if (input)
  439. *buf = input->getLineBuf(len, startpos);
  440. return S_OK;
  441. }
  442. ////////////////////////////////////////////////////////////////////////
  443. BufferedStream*
  444. XMLStream::getCurrentStream()
  445. {
  446. // Return the most recent stream that
  447. // actually has somthing to return.
  448. BufferedStream* input = _pInput;
  449. if (!_pInput)
  450. {
  451. return NULL;
  452. }
  453. int i = _pStreams.used()-1;
  454. do
  455. {
  456. ULONG len = 0, pos = 0;
  457. // const WCHAR* buf = input->getLineBuf(&len, &pos); // generates C4189: 'buf' local variable is initialized but not referenced
  458. (void) input->getLineBuf(&len, &pos);
  459. if (len > 0)
  460. return input;
  461. if (i >= 0)
  462. input = _pStreams[i--]->_pInput;
  463. else
  464. break;
  465. }
  466. while (input != NULL);
  467. return NULL;
  468. }
  469. ////////////////////////////////////////////////////////////////////////
  470. void
  471. XMLStream::SetFlags( unsigned short usFlags)
  472. {
  473. _usFlags = usFlags;
  474. // And break out the flags for performance reasons.
  475. //_fFloatingAmp = (usFlags & XMLFLAG_FLOATINGAMP) != 0;
  476. _fShortEndTags = (usFlags & XMLFLAG_SHORTENDTAGS) != 0;
  477. _fCaseInsensitive = (usFlags & XMLFLAG_CASEINSENSITIVE) != 0;
  478. _fNoNamespaces = (usFlags & XMLFLAG_NONAMESPACES) != 0;
  479. //_fNoWhitespaceNodes = false; // this is now bogus. (usFlags & XMLFLAG_NOWHITESPACE) != 0;
  480. //_fIE4Quirks = (_usFlags & XMLFLAG_IE4QUIRKS) != 0;
  481. //_fNoDTDNodes = (_usFlags & XMLFLAG_NODTDNODES) != 0;
  482. }
  483. ////////////////////////////////////////////////////////////////////////
  484. unsigned short
  485. XMLStream::GetFlags()
  486. {
  487. return _usFlags;
  488. }
  489. ////////////////////////////////////////////////////////////////////////
  490. //======================================================================
  491. // Real Implementation
  492. HRESULT
  493. XMLStream::firstAdvance()
  494. {
  495. HRESULT hr;
  496. ADVANCE;
  497. checkhr2(pop(false));
  498. return S_OK;
  499. }
  500. ////////////////////////////////////////////////////////////////////////
  501. HRESULT
  502. XMLStream::parseContent()
  503. {
  504. HRESULT hr = S_OK;
  505. if (_fEOF)
  506. return XML_E_ENDOFINPUT;
  507. switch (_chLookahead){
  508. case L'<':
  509. ADVANCE;
  510. checkeof(_chLookahead, XML_E_UNCLOSEDDECL);
  511. switch (_chLookahead)
  512. {
  513. case L'!':
  514. checkhr2(_pInput->Freeze()); // stop shifting data until '>'
  515. return pushTable( 0, g_DeclarationTable, (DWORD)XML_E_UNCLOSEDDECL);
  516. case L'?':
  517. checkhr2(push( &XMLStream::parsePI ));
  518. return parsePI();
  519. case L'/':
  520. checkhr2(push(&XMLStream::parseEndTag));
  521. return parseEndTag();
  522. default:
  523. checkhr2(push( &XMLStream::parseElement )); // push ParseContent, and _fnState = parseElement
  524. if (_fFoundFirstElement)
  525. {
  526. return parseElement();
  527. }
  528. else
  529. {
  530. // Return special end prolog token and then continue with
  531. // with parseElement.
  532. _fFoundFirstElement = true;
  533. _nToken = XML_ENDPROLOG;
  534. }
  535. }
  536. break;
  537. default:
  538. checkhr2(push(&XMLStream::parsePCData));
  539. return parsePCData();
  540. break;
  541. }
  542. return S_OK;
  543. }
  544. ////////////////////////////////////////////////////////////////////////
  545. HRESULT
  546. XMLStream::skipWhiteSpace()
  547. {
  548. HRESULT hr = S_OK;
  549. while (ISWHITESPACE(_chLookahead) && ! _fEOF)
  550. {
  551. ADVANCE;
  552. }
  553. checkhr2(pop(false));
  554. return hr;
  555. }
  556. ////////////////////////////////////////////////////////////////////////
  557. HRESULT
  558. XMLStream::parseElement()
  559. {
  560. HRESULT hr = S_OK;
  561. switch (_sSubState)
  562. {
  563. case 0:
  564. checkhr2(_pInput->Freeze()); // stop shifting data until '>'
  565. checkhr2(push( &XMLStream::parseName, 1));
  566. checkhr2(parseName());
  567. _sSubState = 1;
  568. // fall through
  569. case 1:
  570. checkeof(_chLookahead, XML_E_UNCLOSEDSTARTTAG);
  571. _nToken = XML_ELEMENT;
  572. // and then try and parse the attributes, and return
  573. // to state 2 to finish up. With an optimization
  574. // for the case where there are no attributes.
  575. if (_chLookahead == L'/' || _chLookahead == L'>')
  576. {
  577. _sSubState = 2;
  578. }
  579. else {
  580. if (!ISWHITESPACE(_chLookahead))
  581. {
  582. return XML_E_BADNAMECHAR;
  583. }
  584. _chEndChar = L'/'; // for empty tags. //xiaoyu : used to match ENDTAG
  585. checkhr2(push(&XMLStream::parseAttributes,2));
  586. }
  587. return S_OK;
  588. break;
  589. case 2: // finish up with start tag.
  590. mark(); // only return '>' or '/>' in _nToken text
  591. if (_chLookahead == L'/')
  592. {
  593. // must be empty tag sequence '/>'.
  594. ADVANCE;
  595. _nToken = XML_EMPTYTAGEND;
  596. }
  597. else if (_chLookahead == L'>')
  598. {
  599. _nToken = XML_TAGEND;
  600. }
  601. else if (ISWHITESPACE(_chLookahead))
  602. {
  603. return XML_E_UNEXPECTED_WHITESPACE;
  604. }
  605. else
  606. return XML_E_EXPECTINGTAGEND;
  607. _sSubState = 3;
  608. // fall through
  609. case 3:
  610. checkeof(_chLookahead, XML_E_UNCLOSEDSTARTTAG);
  611. if (_chLookahead != L'>')
  612. {
  613. if (ISWHITESPACE(_chLookahead))
  614. return XML_E_UNEXPECTED_WHITESPACE;
  615. else
  616. return XML_E_EXPECTINGTAGEND;
  617. }
  618. ADVANCE;
  619. mark();
  620. checkhr2(pop());// return to parseContent.
  621. return _pInput->UnFreeze();
  622. break;
  623. case 4: // swollow up bad tag
  624. // Allow the weird CDF madness <PRECACHE="YES"/>
  625. // For total compatibility we fake out the parser by returning
  626. // XML_EMPTYTAGEND, this way the rest of the tag becomes PCDATA.
  627. // YUK -- but it works.
  628. _nToken = XML_EMPTYTAGEND;
  629. mark();
  630. checkhr2(pop());// return to parseContent.
  631. return _pInput->UnFreeze();
  632. break;
  633. default:
  634. INTERNALERROR;
  635. }
  636. //return S_OK;
  637. }
  638. ////////////////////////////////////////////////////////////////////////
  639. HRESULT
  640. XMLStream::parseEndTag()
  641. {
  642. HRESULT hr = S_OK;
  643. switch (_sSubState)
  644. {
  645. case 0:
  646. ADVANCE; // soak up the '/'
  647. mark();
  648. // SHORT END TAG SUPPORT, IE4 Compatibility Mode only.
  649. if (! _fShortEndTags || _chLookahead != L'>')
  650. {
  651. checkhr2(push( &XMLStream::parseName, 1));
  652. checkhr2(parseName());
  653. }
  654. _sSubState = 1;
  655. // fall through
  656. case 1: // finish parsing end tag
  657. checkeof(_chLookahead, XML_E_UNCLOSEDENDTAG);
  658. _nToken = XML_ENDTAG;
  659. checkhr2(push(&XMLStream::skipWhiteSpace, 2));
  660. return S_OK;
  661. case 2:
  662. checkeof(_chLookahead, XML_E_UNCLOSEDENDTAG);
  663. if (_chLookahead != L'>')
  664. {
  665. return XML_E_BADNAMECHAR;
  666. }
  667. ADVANCE;
  668. mark();
  669. checkhr2(pop());// return to parseContent.
  670. break;
  671. default:
  672. INTERNALERROR;
  673. }
  674. return S_OK;
  675. }
  676. ////////////////////////////////////////////////////////////////////////
  677. HRESULT
  678. XMLStream::parsePI()
  679. {
  680. HRESULT hr = S_OK;
  681. switch (_sSubState)
  682. {
  683. case 0:
  684. //_fWasDTD = _fDTD; // as far as Advance is concerned, the contents
  685. //_fHandlePE = false; // of a PI are not special.
  686. ADVANCE;
  687. checkhr2(_pInput->Freeze()); // stop shifting data until '?>'
  688. mark(); // don't include '?' in tag name.
  689. if (_chLookahead == L'x' || _chLookahead == L'X')
  690. {
  691. // perhaps this is the magic <?xml version="1.0"?> declaration.
  692. STATE(7); // jump to state 7.
  693. }
  694. // fall through
  695. _sSubState = 1;
  696. case 1:
  697. checkhr2(push( &XMLStream::parseName, 2));
  698. checkhr2(parseName());
  699. _sSubState = 2;
  700. // fall through
  701. case 2:
  702. checkeof(_chLookahead, XML_E_UNCLOSEDPI);
  703. if (_chLookahead != L'?' && ! ISWHITESPACE(_chLookahead))
  704. {
  705. return XML_E_BADNAMECHAR;
  706. }
  707. _nToken = XML_PI;
  708. STATE(3); // found startpi _nToken and return to _sSubState 3
  709. break;
  710. case 3: // finish with rest of PI
  711. if (_chLookahead == L'?')
  712. {
  713. ADVANCE;
  714. if (_chLookahead == L'>')
  715. {
  716. STATE(6);
  717. }
  718. else
  719. {
  720. return XML_E_EXPECTINGTAGEND;
  721. }
  722. }
  723. checkhr2(push(&XMLStream::skipWhiteSpace, 4));
  724. checkhr2( skipWhiteSpace() );
  725. _sSubState = 4;
  726. // fall through
  727. case 4: // support for normalized whitespace
  728. mark(); // strip whitespace from beginning of PI data, since this is
  729. // just the separator between the PI target name and the PI data.
  730. _sSubState = 5;
  731. // fallthrough
  732. case 5:
  733. while (! _fEOF )
  734. {
  735. if (_chLookahead == L'?')
  736. {
  737. ADVANCE;
  738. break;
  739. }
  740. if (! isCharData(_chLookahead))
  741. return XML_E_PIDECLSYNTAX;
  742. ADVANCE;
  743. }
  744. _sSubState = 6; // go to next state
  745. // fall through.
  746. case 6:
  747. checkeof(_chLookahead, XML_E_UNCLOSEDPI);
  748. if (_chLookahead == L'>')
  749. {
  750. ADVANCE;
  751. _lLengthDelta = -2; // don't include '?>' in PI CDATA.
  752. }
  753. else
  754. {
  755. // Hmmm. Must be a lone '?' so go back to state 5.
  756. STATE(5);
  757. }
  758. _nToken = XML_ENDPI;
  759. //_fHandlePE = true;
  760. checkhr2(pop());
  761. return _pInput->UnFreeze();
  762. break;
  763. case 7: // recognize 'm' in '<?xml' declaration
  764. ADVANCE;
  765. if (_chLookahead != L'm' && _chLookahead != L'M')
  766. {
  767. STATE(11); // not 'xml' so jump to state 11 to parse name
  768. }
  769. _sSubState = 8;
  770. // fall through
  771. case 8: // recognize L'l' in '<?xml' declaration
  772. ADVANCE;
  773. if (_chLookahead != L'l' && _chLookahead != L'L')
  774. {
  775. STATE(11); // not 'xml' so jump to state 11 to parse name
  776. }
  777. _sSubState = 9;
  778. // fall through
  779. case 9: // now need whitespace or ':' or '?' to terminate name.
  780. ADVANCE;
  781. if (ISWHITESPACE(_chLookahead))
  782. {
  783. if (! _fCaseInsensitive)
  784. {
  785. const WCHAR* t = NULL;
  786. long len =0; // for prefix bug : xiaoyuw@08/28/00
  787. getToken(&t,&len);
  788. //if (! StringEquals(L"xml",t,3,false)) // case sensitive
  789. if (::FusionpCompareStrings(L"xml", 3, t, 3, false)!=0) // not equal
  790. return XML_E_BADXMLCASE;
  791. }
  792. return pushTable(10, g_XMLDeclarationTable, (DWORD)XML_E_UNCLOSEDPI);
  793. }
  794. if (isNameChar(_chLookahead) || _chLookahead == ':')
  795. {
  796. STATE(11); // Hmmm. Must be something else then so continue parsing name
  797. }
  798. else
  799. {
  800. return XML_E_XMLDECLSYNTAX;
  801. }
  802. break;
  803. case 10:
  804. //_fHandlePE = true;
  805. checkhr2(pop());
  806. return _pInput->UnFreeze();
  807. break;
  808. case 11:
  809. if (_chLookahead == ':')
  810. ADVANCE;
  811. _sSubState = 12;
  812. // fall through
  813. case 12:
  814. if (isNameChar(_chLookahead))
  815. {
  816. checkhr2(push( &XMLStream::parseName, 2));
  817. _sSubState = 1; // but skip IsStartNameChar test
  818. checkhr2(parseName());
  819. return S_OK;
  820. }
  821. else
  822. {
  823. STATE(2);
  824. }
  825. break;
  826. default:
  827. INTERNALERROR;
  828. }
  829. //return S_OK;
  830. }
  831. ////////////////////////////////////////////////////////////////////////
  832. HRESULT
  833. XMLStream::parseComment()
  834. {
  835. // ok, so '<!-' has been parsed so far
  836. HRESULT hr = S_OK;
  837. switch (_sSubState)
  838. {
  839. case 0:
  840. //_fWasDTD = _fDTD; // as far as the DTD is concerned, the contents
  841. //_fHandlePE = false; // of a COMMENT are not special.
  842. ADVANCE; // soak up first '-'
  843. checkeof(_chLookahead, XML_E_UNCLOSEDCOMMENT);
  844. if (_chLookahead != L'-')
  845. {
  846. return XML_E_COMMENTSYNTAX;
  847. }
  848. _sSubState = 1;
  849. // fall through
  850. case 1:
  851. ADVANCE; // soak up second '-'
  852. mark(); // don't include '<!--' in comment text
  853. _sSubState = 2;
  854. // fall through;
  855. case 2:
  856. while (! _fEOF)
  857. {
  858. if (_chLookahead == L'-')
  859. {
  860. ADVANCE; // soak up first closing L'-'
  861. break;
  862. }
  863. if (! isCharData(_chLookahead))
  864. return XML_E_BADCHARDATA;
  865. ADVANCE;
  866. }
  867. checkeof(_chLookahead, XML_E_UNCLOSEDCOMMENT);
  868. _sSubState = 3; // advance to next state
  869. // fall through.
  870. case 3:
  871. if (_chLookahead != L'-')
  872. {
  873. // Hmmm, must have been a floating L'-' so go back to state 2
  874. STATE(2);
  875. }
  876. ADVANCE; // soak up second closing L'-'
  877. _sSubState = 4;
  878. // fall through
  879. case 4:
  880. checkeof(_chLookahead, XML_E_UNCLOSEDCOMMENT);
  881. //if (_chLookahead != L'>' && ! _fIE4Quirks)
  882. if (_chLookahead != L'>')
  883. {
  884. // cannot have floating L'--' unless we are in compatibility mode.
  885. return XML_E_COMMENTSYNTAX;
  886. }
  887. ADVANCE; // soak up closing L'>'
  888. _lLengthDelta = -3; // don't include L'-->' in PI CDATA.
  889. _nToken = XML_COMMENT;
  890. checkhr2(pop());
  891. //_fHandlePE = true;
  892. break;
  893. default:
  894. INTERNALERROR;
  895. }
  896. return S_OK;
  897. }
  898. ////////////////////////////////////////////////////////////////////////
  899. HRESULT
  900. XMLStream::parseName()
  901. {
  902. HRESULT hr = S_OK;
  903. switch (_sSubState)
  904. {
  905. case 0:
  906. if (! isStartNameChar(_chLookahead))
  907. {
  908. if (ISWHITESPACE(_chLookahead))
  909. hr = XML_E_UNEXPECTED_WHITESPACE;
  910. else
  911. hr = XML_E_BADSTARTNAMECHAR;
  912. goto CleanUp;
  913. }
  914. mark();
  915. _sSubState = 1;
  916. // fall through
  917. case 1:
  918. _lNslen = _lNssep = 0;
  919. while (isNameChar(_chLookahead) && !_fEOF)
  920. {
  921. ADVANCE;
  922. }
  923. hr = pop(false); // return to the previous state
  924. break;
  925. default:
  926. INTERNALERROR;
  927. }
  928. CleanUp:
  929. return hr;
  930. }
  931. ////////////////////////////////////////////////////////////////////////
  932. HRESULT
  933. XMLStream::parseAttributes()
  934. {
  935. HRESULT hr = S_OK;
  936. switch (_sSubState)
  937. {
  938. case 0:
  939. //_nAttrType = XMLTYPE_CDATA;
  940. _fCheckAttribute = false;
  941. checkhr2(push(&XMLStream::skipWhiteSpace, 1));
  942. checkhr2( skipWhiteSpace() );
  943. _sSubState = 1;
  944. // fall through
  945. case 1:
  946. if (_chLookahead == _chEndChar || _chLookahead == L'>' )
  947. {
  948. checkhr2(pop()); // no attributes.
  949. return S_OK;
  950. }
  951. checkhr2( push( &XMLStream::parseName, 2 ) );
  952. checkhr2( parseName() );
  953. if (!ISWHITESPACE(_chLookahead) && _chLookahead != L'=')
  954. {
  955. return XML_E_BADNAMECHAR;
  956. }
  957. _sSubState = 2;
  958. // fall through
  959. case 2:
  960. if (ISWHITESPACE(_chLookahead))
  961. {
  962. // Eq ::= S? '=' S?
  963. STATE(7);
  964. }
  965. checkeof(_chLookahead, XML_E_UNCLOSEDSTARTTAG);
  966. _nToken = XML_ATTRIBUTE;
  967. _sSubState = 3;
  968. return S_OK;
  969. break;
  970. case 3:
  971. if (ISWHITESPACE(_chLookahead))
  972. return XML_E_UNEXPECTED_WHITESPACE;
  973. _fWhitespace = false;
  974. _sSubState = 4;
  975. // fall through
  976. case 4:
  977. if (_chLookahead != L'=')
  978. {
  979. return XML_E_MISSINGEQUALS;
  980. }
  981. ADVANCE;
  982. if (ISWHITESPACE(_chLookahead))
  983. {
  984. // allow whitespace between '=' and attribute value.
  985. checkhr2(push(&XMLStream::skipWhiteSpace, 5));
  986. checkhr2( skipWhiteSpace() );
  987. }
  988. _sSubState = 5;
  989. // fall through
  990. case 5:
  991. if (ISWHITESPACE(_chLookahead))
  992. return XML_E_UNEXPECTED_WHITESPACE;
  993. if (_chLookahead != L'"' && _chLookahead != L'\'')
  994. {
  995. return XML_E_MISSINGQUOTE;
  996. }
  997. _chTerminator = _chLookahead;
  998. ADVANCE;
  999. mark();
  1000. return push(&XMLStream::parseAttrValue, 6);
  1001. //_sSubState = 6;
  1002. // fall through;
  1003. case 6:
  1004. checkeof(_chLookahead, XML_E_UNCLOSEDSTARTTAG);
  1005. if (_chLookahead == _chEndChar || _chLookahead == L'>')
  1006. {
  1007. checkhr2(pop());
  1008. return S_OK;
  1009. }
  1010. if (! ISWHITESPACE(_chLookahead) )
  1011. {
  1012. return XML_E_MISSINGWHITESPACE;
  1013. }
  1014. STATE(0); // go back to state 0
  1015. break;
  1016. case 7:
  1017. // allow whitespace between attribute and '='
  1018. _lLengthDelta = _pInput->getTokenLength();
  1019. checkhr2(push(&XMLStream::skipWhiteSpace, 8));
  1020. checkhr2( skipWhiteSpace() );
  1021. _sSubState = 8;
  1022. // fall through
  1023. case 8:
  1024. checkeof(_chLookahead, XML_E_UNCLOSEDSTARTTAG);
  1025. _lLengthDelta -= _pInput->getTokenLength();
  1026. STATE(2);
  1027. break;
  1028. default:
  1029. INTERNALERROR;
  1030. }
  1031. //return hr;
  1032. }
  1033. ////////////////////////////////////////////////////////////////////////
  1034. HRESULT XMLStream::parseAttrValue()
  1035. {
  1036. HRESULT hr = S_OK;
  1037. switch (_sSubState)
  1038. {
  1039. case 0:
  1040. _fParsingAttDef = true;
  1041. // mark beginning of attribute data
  1042. _sSubState = 2;
  1043. // fall through;
  1044. case 2:
  1045. while ( _chLookahead != _chTerminator &&
  1046. _chLookahead != L'<' &&
  1047. ! _fEOF )
  1048. {
  1049. if (_chLookahead == L'&')
  1050. {
  1051. // then parse entity ref and then return
  1052. // to state 2 to continue with PCDATA.
  1053. return push(&XMLStream::parseEntityRef,2);
  1054. }
  1055. hr = _pInput->scanPCData(&_chLookahead, &_fWhitespace);
  1056. if (FAILED(hr))
  1057. {
  1058. if (hr == E_PENDING)
  1059. {
  1060. hr = S_OK;
  1061. ADVANCE;
  1062. }
  1063. return hr;
  1064. }
  1065. }
  1066. _sSubState = 3;
  1067. // fall through
  1068. case 3:
  1069. checkeof(_chLookahead, XML_E_UNCLOSEDSTRING);
  1070. if (_chLookahead == _chTerminator)
  1071. {
  1072. ADVANCE;
  1073. if (_fReturnAttributeValue)
  1074. {
  1075. // return what we have so far - if anything.
  1076. if ((_fUsingBuffer && _lBufLen > 0) ||
  1077. _pInput->getTokenLength() > 1)
  1078. {
  1079. _lLengthDelta = -1; // don't include string _chTerminator.
  1080. _nToken = XML_PCDATA;
  1081. }
  1082. }
  1083. else
  1084. {
  1085. _fReturnAttributeValue = true; // reset to default value.
  1086. }
  1087. _fParsingAttDef = false;
  1088. checkhr2(pop());
  1089. return S_OK;
  1090. }
  1091. else
  1092. {
  1093. return XML_E_BADCHARINSTRING;
  1094. }
  1095. break;
  1096. default:
  1097. INTERNALERROR;
  1098. }
  1099. //return hr;
  1100. }
  1101. ////////////////////////////////////////////////////////////////////////
  1102. HRESULT
  1103. XMLStream::ScanHexDigits()
  1104. {
  1105. HRESULT hr = S_OK;
  1106. while (! _fEOF && _chLookahead != L';')
  1107. {
  1108. if (! isHexDigit(_chLookahead))
  1109. {
  1110. return ISWHITESPACE(_chLookahead) ? XML_E_UNEXPECTED_WHITESPACE : XML_E_BADCHARINENTREF;
  1111. }
  1112. ADVANCE;
  1113. }
  1114. checkeof(_chLookahead, XML_E_UNEXPECTEDEOF);
  1115. return hr;
  1116. }
  1117. ////////////////////////////////////////////////////////////////////////
  1118. HRESULT
  1119. XMLStream::ScanDecimalDigits()
  1120. {
  1121. HRESULT hr = S_OK;
  1122. while (! _fEOF && _chLookahead != L';')
  1123. {
  1124. if (! isDigit(_chLookahead))
  1125. {
  1126. return ISWHITESPACE(_chLookahead) ? XML_E_UNEXPECTED_WHITESPACE : XML_E_BADCHARINENTREF;
  1127. }
  1128. ADVANCE;
  1129. }
  1130. checkeof(_chLookahead, XML_E_UNEXPECTEDEOF);
  1131. return hr;
  1132. }
  1133. ////////////////////////////////////////////////////////////////////////
  1134. HRESULT
  1135. XMLStream::parsePCData()
  1136. {
  1137. HRESULT hr = S_OK;
  1138. switch (_sSubState)
  1139. {
  1140. case 0:
  1141. _fWhitespace = true;
  1142. _sSubState = 1;
  1143. // fall through;
  1144. case 1:
  1145. // This state is used when we are not normalizing white space. This
  1146. // is a separate state for performance reasons.
  1147. // Normalizing whitespace is about 11% slower.
  1148. while (_chLookahead != L'<' && ! _fEOF )
  1149. {
  1150. if (_chLookahead == L'&')
  1151. {
  1152. // then parse entity ref and then return
  1153. // to state 1 to continue with PCDATA.
  1154. return push(&XMLStream::parseEntityRef,1);
  1155. }
  1156. if (_chLookahead == L'>')
  1157. {
  1158. WCHAR* pText = NULL;
  1159. long len = 0;
  1160. _pInput->getToken((const WCHAR**)&pText, &len);
  1161. //if (len >= 2 && StrCmpN(L"]]", pText + len - 2, 2) == 0)
  1162. if ((len >= 2) && (::FusionpCompareStrings(L"]]", 2, pText + len - 2, 2, false)==0))
  1163. return XML_E_INVALID_CDATACLOSINGTAG;
  1164. }
  1165. // This slows us down too much.
  1166. // else if (! isCharData(_chLookahead))
  1167. // {
  1168. // return XML_E_BADCHARDATA;
  1169. // }
  1170. hr = _pInput->scanPCData(&_chLookahead, &_fWhitespace);
  1171. if (FAILED(hr))
  1172. {
  1173. if (hr == E_PENDING)
  1174. {
  1175. hr = S_OK;
  1176. ADVANCE;
  1177. }
  1178. return hr;
  1179. }
  1180. checkhr2(hr);
  1181. }
  1182. _sSubState = 2;
  1183. // fall through
  1184. case 2:
  1185. if (_pInput->getTokenLength() > 0 || _fUsingBuffer)
  1186. {
  1187. _nToken = _fWhitespace ? XML_WHITESPACE : XML_PCDATA;
  1188. }
  1189. checkhr2(pop());
  1190. break;
  1191. default:
  1192. INTERNALERROR;
  1193. }
  1194. return S_OK;
  1195. }
  1196. ////////////////////////////////////////////////////////////////////////
  1197. HRESULT
  1198. XMLStream::parseEntityRef()
  1199. {
  1200. HRESULT hr = S_OK;
  1201. long entityLen = 0, lLen = 1;
  1202. const WCHAR* t = NULL;
  1203. long len = 0;
  1204. Start:
  1205. switch (_sSubState)
  1206. {
  1207. case 0: // ^ ( '&#' [0-9]+ ) | ('&#X' [0-9a-fA-F]+) | ('&' Name) ';'
  1208. _nPreToken = XML_PENDING;
  1209. _lEntityPos = _pInput->getTokenLength(); // record entity position.
  1210. _fPCDataPending = (_lEntityPos > 0);
  1211. if (PreEntityText())
  1212. {
  1213. // remember the pending text before parsing the entity.
  1214. _nPreToken = _nToken;
  1215. _nToken = XML_PENDING;
  1216. }
  1217. _sSubState = 1;
  1218. // fall through
  1219. case 1:
  1220. ADVANCE; // soak up the '&'
  1221. _sSubState = 2;
  1222. // fall through
  1223. case 2:
  1224. checkeof(_chLookahead, XML_E_UNEXPECTEDEOF);
  1225. if (_chLookahead == L'#')
  1226. {
  1227. ADVANCE;
  1228. _sSubState = 3;
  1229. // fall through
  1230. }
  1231. else
  1232. {
  1233. // Loose entity parsing allows "...&6..."
  1234. if (! isStartNameChar(_chLookahead))
  1235. {
  1236. /*
  1237. if (_fFloatingAmp)
  1238. {
  1239. // then it isn't an entity reference, so go back to PCDATA
  1240. if (_fUsingBuffer)
  1241. {
  1242. // this in case we are normalizing white space.
  1243. PushChar(L'&');
  1244. }
  1245. _fWhitespace = false;
  1246. checkhr2(pop());
  1247. return S_OK;
  1248. }
  1249. else */
  1250. if (ISWHITESPACE(_chLookahead))
  1251. return XML_E_UNEXPECTED_WHITESPACE;
  1252. else
  1253. return XML_E_BADSTARTNAMECHAR;
  1254. }
  1255. checkhr2(push(&XMLStream::parseName, 6));
  1256. _sSubState = 1; // avoid doing a mark() so we can return PCDATA if necessary.
  1257. return parseName();
  1258. }
  1259. break;
  1260. // ------------- Numeric entity references --------------------
  1261. case 3:
  1262. checkeof(_chLookahead, XML_E_UNEXPECTEDEOF);
  1263. if (_chLookahead == L'x')
  1264. {
  1265. // hex character reference.
  1266. ADVANCE;
  1267. STATE(5); // go to state 5
  1268. }
  1269. _sSubState = 4;
  1270. // fall through
  1271. case 4: // '&#' ^ [0-9]+ ';'
  1272. checkhr2(ScanDecimalDigits());
  1273. if (_chLookahead != L';')
  1274. {
  1275. STATE(9);
  1276. }
  1277. entityLen = _pInput->getTokenLength() - _lEntityPos;
  1278. getToken(&t, &len);
  1279. checkhr2(DecimalToUnicode(t + _lEntityPos + 2, entityLen - 2, _wcEntityValue));
  1280. lLen = 2;
  1281. _nToken = XML_NUMENTITYREF;
  1282. GOTOSTART(10); // have to use GOTOSTART() because we want to use the values of t and len
  1283. break;
  1284. case 5: // '&#X' ^ [0-9a-fA-F]+
  1285. checkhr2(ScanHexDigits());
  1286. if (_chLookahead != L';')
  1287. {
  1288. STATE(9);
  1289. }
  1290. entityLen = _pInput->getTokenLength() - _lEntityPos;
  1291. getToken(&t, &len);
  1292. checkhr2(HexToUnicode(t + _lEntityPos + 3, entityLen - 3, _wcEntityValue));
  1293. lLen = 3;
  1294. _nToken = XML_HEXENTITYREF;
  1295. GOTOSTART(10); // have to use GOTOSTART() because we want to use the values of t and len
  1296. break;
  1297. // ------------- Named Entity References --------------------
  1298. case 6: // '&' Name ^ ';'
  1299. checkeof(_chLookahead, XML_E_UNEXPECTEDEOF);
  1300. if (_chLookahead != L';')
  1301. {
  1302. STATE(9);
  1303. }
  1304. // If parseName found a namespace then we need to calculate the
  1305. // real nslen taking the pending PC data and '&' into account
  1306. // and remember this in case we have to return the PCDATA.
  1307. _nEntityNSLen = (_lNslen > 0) ? _lNslen - _lEntityPos - 1 : 0;
  1308. _fUsingBuffer = false;
  1309. entityLen = _pInput->getTokenLength() - _lEntityPos;
  1310. getToken(&t, &len);
  1311. if (0 != (_wcEntityValue = BuiltinEntity(t + _lEntityPos + 1, entityLen - 1))) //||
  1312. //(_fIE4Quirks && 0xFFFF != (_wcEntityValue = LookupBuiltinEntity(t + _lEntityPos + 1, entityLen - 1))))
  1313. {
  1314. lLen = 1;
  1315. _nToken = XML_BUILTINENTITYREF;
  1316. GOTOSTART(10); // have to use GOTOSTART() because we want to use the values of t and len
  1317. }
  1318. else //xiaoyu : Fusion XML Parser does not support external ref,
  1319. // so, if it is not a builtIn ref, we would return error
  1320. return XML_E_MISSINGSEMICOLON;
  1321. break;
  1322. //xiaoyu : Fusion XML Parser does not support external ref
  1323. /*
  1324. if (_nPreToken != XML_PENDING)
  1325. {
  1326. // Return previous token (XML_PCDATA or XML_WHITESPACE)
  1327. _lLengthDelta = -entityLen;
  1328. _lMarkDelta = entityLen - 1; // don't include '&' in _nToken.
  1329. _nToken = _nPreToken;
  1330. STATE(7);
  1331. }
  1332. mark(entityLen-1); // don't include '&' in _nToken.
  1333. _sSubState = 7;
  1334. // fall through
  1335. case 7:
  1336. ADVANCE; // soak up the ';'
  1337. _nToken = XML_ENTITYREF;
  1338. _lNslen = _nEntityNSLen;
  1339. _lLengthDelta = -1; // don't include the ';'
  1340. STATE(8); // return token and resume in state 8.
  1341. break;
  1342. */
  1343. case 8:
  1344. mark();
  1345. checkhr2(pop());
  1346. return S_OK;
  1347. /*
  1348. case 9:
  1349. // Soft entity handling - we just continue with PCDATA in
  1350. // this case.
  1351. if (_fFloatingAmp)
  1352. {
  1353. if (_fUsingBuffer)
  1354. {
  1355. // this in case we are normalizing white space. In this case
  1356. // we have to copy what we have so far to the normalized buffer.
  1357. long endpos = _pInput->getTokenLength();
  1358. const WCHAR* t; long len;
  1359. getToken(&t, &len);
  1360. for (long i = _lEntityPos; i < endpos; i++)
  1361. PushChar(t[i]);
  1362. }
  1363. _fWhitespace = false;
  1364. checkhr2(pop());
  1365. return S_OK;
  1366. }
  1367. else
  1368. return XML_E_MISSINGSEMICOLON;
  1369. break;
  1370. */
  1371. case 10:
  1372. // Return the text before builtin or char entityref as XML_PCDATA
  1373. if (_nPreToken)
  1374. {
  1375. _nPreToken = _nToken;
  1376. _nToken = XML_PCDATA;
  1377. _lLengthDelta = -entityLen;
  1378. _lMarkDelta = entityLen - lLen; // don't include '&' in _nToken.
  1379. STATE(11); // return token and resume in state 12.
  1380. }
  1381. else
  1382. {
  1383. _nPreToken = _nToken;
  1384. mark(entityLen - lLen);
  1385. GOTOSTART(11);
  1386. }
  1387. break;
  1388. case 11:
  1389. // push the builtin entity
  1390. _fUsingBuffer = true;
  1391. PushChar(_wcEntityValue);
  1392. _nToken = _nPreToken;
  1393. STATE(12); // return token and resume in state 12.
  1394. break;
  1395. case 12:
  1396. ADVANCE; // soak up the ';'
  1397. STATE(8); // resume in state 8.
  1398. break;
  1399. default:
  1400. INTERNALERROR;
  1401. }
  1402. return S_OK;
  1403. }
  1404. ////////////////////////////////////////////////////////////////////////
  1405. HRESULT
  1406. XMLStream::pushTable(short substate, const StateEntry* table, DWORD le)
  1407. {
  1408. HRESULT hr = S_OK;
  1409. checkhr2(push(&XMLStream::parseTable, substate));
  1410. _pTable = table;
  1411. UNUSED(le);
  1412. //_lEOFError = le;
  1413. return hr;
  1414. }
  1415. ////////////////////////////////////////////////////////////////////////
  1416. HRESULT
  1417. XMLStream::push(StateFunc f, short s)
  1418. {
  1419. StateInfo* pSI = _pStack.push();
  1420. if (pSI == NULL)
  1421. return E_OUTOFMEMORY;
  1422. pSI->_sSubState = s;
  1423. pSI->_fnState = _fnState;
  1424. pSI->_pTable = _pTable;
  1425. pSI->_cStreamDepth = _cStreamDepth;
  1426. _sSubState = 0;
  1427. _fnState = f;
  1428. return S_OK;
  1429. }
  1430. ////////////////////////////////////////////////////////////////////////
  1431. HRESULT
  1432. XMLStream::pop(bool boundary)
  1433. {
  1434. StateInfo* pSI = _pStack.peek();
  1435. // prefix bug fix : xiaoyuw@08/29/00
  1436. ASSERT_NTC(pSI != NULL);
  1437. if (_fDTD &&
  1438. ! (_fParsingAttDef) && boundary && _cStreamDepth != pSI->_cStreamDepth) // _fParsingNames ||
  1439. {
  1440. // If we are in a PE and we are popping out to a state that is NOT in a PE
  1441. // and this is a pop where we need to check this condition, then return an error.
  1442. // For example, the following is not well formed because the parameter entity
  1443. // pops us out of the ContentModel state in which the PE was found:
  1444. // <!DOCTYPE foo [
  1445. // <!ENTITY % foo "a)">
  1446. // <!ELEMENT bar ( %foo; >
  1447. // ]>...
  1448. return XML_E_PE_NESTING;
  1449. }
  1450. _fnState = pSI->_fnState;
  1451. _sSubState = pSI->_sSubState;
  1452. _pTable = pSI->_pTable;
  1453. //_lEOFError = pSI->_lEOFError;
  1454. _pStack.pop();
  1455. return S_OK;
  1456. }
  1457. ////////////////////////////////////////////////////////////////////////
  1458. HRESULT
  1459. XMLStream::switchTo(StateFunc f)
  1460. {
  1461. HRESULT hr;
  1462. // Make sure we keep the old stream depth.
  1463. StateInfo* pSI = _pStack.peek();
  1464. // prefix bug fix : xiaoyuw@08/29/00
  1465. ASSERT_NTC(pSI != NULL);
  1466. int currentDepth = _cStreamDepth;
  1467. _cStreamDepth = pSI->_cStreamDepth;
  1468. checkhr2(pop(false));
  1469. checkhr2(push(f,_sSubState)); // keep return to _sSubState the same
  1470. _cStreamDepth = currentDepth;
  1471. return (this->*f)();
  1472. }
  1473. ////////////////////////////////////////////////////////////////////////
  1474. HRESULT
  1475. XMLStream::parseCondSect()
  1476. {
  1477. HRESULT hr = S_OK;
  1478. switch (_sSubState)
  1479. {
  1480. case 0:
  1481. ADVANCE; // soak up the '[' character
  1482. //if (_fFoundPEREf) return S_OK;
  1483. _sSubState = 1;
  1484. // fall through
  1485. case 1: // now match magic '[CDATA[' sequence.
  1486. checkeof(_chLookahead, XML_E_UNCLOSEDMARKUPDECL);
  1487. if (_chLookahead == L'C')
  1488. {
  1489. _pchCDataState = g_pstrCDATA;
  1490. STATE(5); // goto state 5
  1491. }
  1492. _sSubState = 2; // must be IGNORE, INCLUDE or %pe;
  1493. // fall through
  1494. case 2: // must be DTD markup declaration
  1495. // '<![' ^ S? ('INCLUDE' | 'IGNORE' | %pe;) S? [...]]> or
  1496. // skip optional whitespace
  1497. //if (_fInternalSubset)
  1498. // return XML_E_CONDSECTINSUBSET;
  1499. checkeof(_chLookahead, XML_E_EXPECTINGOPENBRACKET);
  1500. checkhr2(push(&XMLStream::skipWhiteSpace, 3));
  1501. return skipWhiteSpace(); // must return because of %pe;
  1502. case 3:
  1503. checkeof(_chLookahead, XML_E_UNCLOSEDMARKUPDECL);
  1504. checkhr2(push(&XMLStream::parseName,4));
  1505. return parseName();
  1506. case 4: // scanned 'INCLUDE' or 'IGNORE'
  1507. {
  1508. const WCHAR* t = NULL;
  1509. long len = 0;
  1510. getToken(&t,&len);
  1511. //if (StringEquals(L"IGNORE",t,len,false))
  1512. //{
  1513. // return switchTo(&XMLStream::parseIgnoreSect);
  1514. //}
  1515. //else if (StringEquals(L"INCLUDE",t,len,false))
  1516. //{
  1517. // return switchTo(&XMLStream::parseIncludeSect);
  1518. //}
  1519. //else
  1520. return XML_E_BADENDCONDSECT;
  1521. }
  1522. break;
  1523. case 5: // parse CDATA name
  1524. while (*_pchCDataState != 0 && _chLookahead == *_pchCDataState && ! _fEOF)
  1525. {
  1526. ADVANCE; // advance first, before incrementing _pchCDataState
  1527. _pchCDataState++; // so that this state is re-entrant in the E_PENDING case.
  1528. checkeof(_chLookahead, XML_E_UNCLOSEDMARKUPDECL);
  1529. }
  1530. if (*_pchCDataState != 0)
  1531. {
  1532. // must be INCLUDE or IGNORE section so go to state 2.
  1533. _sSubState = 2;
  1534. }
  1535. else if (_chLookahead != L'[')
  1536. {
  1537. return XML_E_EXPECTINGOPENBRACKET;
  1538. }
  1539. else if (_fDTD)
  1540. return XML_E_CDATAINVALID;
  1541. else
  1542. return switchTo(&XMLStream::parseCData);
  1543. return S_OK;
  1544. break;
  1545. default:
  1546. INTERNALERROR;
  1547. }
  1548. return S_OK;
  1549. }
  1550. ////////////////////////////////////////////////////////////////////////
  1551. HRESULT
  1552. XMLStream::parseCData()
  1553. {
  1554. HRESULT hr = S_OK;
  1555. switch (_sSubState)
  1556. {
  1557. case 0:
  1558. ADVANCE; // soak up the '[' character.
  1559. mark(); // don't include 'CDATA[' in CDATA text
  1560. _sSubState = 1;
  1561. // fall through
  1562. case 1:
  1563. while (_chLookahead != L']' && ! _fEOF)
  1564. {
  1565. // scanPCData will stop when it sees a ']' character.
  1566. hr = _pInput->scanPCData(&_chLookahead, &_fWhitespace);
  1567. if (FAILED(hr))
  1568. {
  1569. if (hr == E_PENDING)
  1570. {
  1571. hr = S_OK;
  1572. ADVANCE;
  1573. }
  1574. return hr;
  1575. }
  1576. }
  1577. checkeof(_chLookahead, XML_E_UNCLOSEDCDATA);
  1578. _sSubState = 2;
  1579. // fall through
  1580. case 2:
  1581. ADVANCE; // soak up first L']' character.
  1582. checkeof(_chLookahead, XML_E_UNCLOSEDCDATA);
  1583. if (_chLookahead != L']')
  1584. {
  1585. // must have been floating ']' character, so
  1586. // return to state 1.
  1587. STATE(1);
  1588. }
  1589. _sSubState = 3;
  1590. // fall through
  1591. case 3:
  1592. ADVANCE; // soak up second ']' character.
  1593. checkeof(_chLookahead, XML_E_UNCLOSEDCDATA);
  1594. if (_chLookahead == L']')
  1595. {
  1596. // Ah, an extra ']' character, tricky !!
  1597. // In this case we stay in state 3 until we find a non ']' character
  1598. // so you can terminate a CDATA section with ']]]]]]]]]]]]]]]]>'
  1599. // and everying except the final ']]>' is treated as CDATA.
  1600. STATE(3);
  1601. }
  1602. else if (_chLookahead != L'>')
  1603. {
  1604. // must have been floating "]]" pair, so
  1605. // return to state 1.
  1606. STATE(1);
  1607. }
  1608. _sSubState = 4;
  1609. // fall through
  1610. case 4:
  1611. ADVANCE; // soak up the '>'
  1612. _nToken = XML_CDATA;
  1613. _lLengthDelta = -3; // don't include terminating ']]>' in text.
  1614. checkhr2(pop()); // return to parseContent.
  1615. return S_OK;
  1616. break;
  1617. default:
  1618. INTERNALERROR;
  1619. }
  1620. return S_OK;
  1621. }
  1622. ////////////////////////////////////////////////////////////////////////
  1623. HRESULT
  1624. XMLStream::parseEquals()
  1625. {
  1626. HRESULT hr = S_OK;
  1627. switch (_sSubState)
  1628. {
  1629. case 0: // Eq ::= S? '=' S?
  1630. if (ISWHITESPACE(_chLookahead))
  1631. {
  1632. // allow whitespace between attribute and '='
  1633. checkhr2(push(&XMLStream::skipWhiteSpace, 1));
  1634. checkhr2( skipWhiteSpace() );
  1635. }
  1636. _sSubState = 1;
  1637. // fall through
  1638. case 1:
  1639. if (_chLookahead != L'=')
  1640. {
  1641. return XML_E_MISSINGEQUALS;
  1642. }
  1643. ADVANCE;
  1644. if (ISWHITESPACE(_chLookahead))
  1645. {
  1646. // allow whitespace between '=' and attribute value.
  1647. checkhr2(push(&XMLStream::skipWhiteSpace, 2));
  1648. checkhr2( skipWhiteSpace() );
  1649. }
  1650. _sSubState = 2;
  1651. // fall through
  1652. case 2:
  1653. checkhr2(pop(false));
  1654. break;
  1655. default:
  1656. INTERNALERROR;
  1657. }
  1658. return S_OK;
  1659. }
  1660. ////////////////////////////////////////////////////////////////////////
  1661. HRESULT
  1662. XMLStream::parseTable()
  1663. {
  1664. HRESULT hr = S_OK;
  1665. while (hr == S_OK && _nToken == XML_PENDING)
  1666. {
  1667. const StateEntry* pSE = &_pTable[_sSubState];
  1668. DWORD newState = pSE->_sGoto;
  1669. switch (pSE->_sOp)
  1670. {
  1671. case OP_WS:
  1672. //checkeof(_chLookahead, _lEOFError);
  1673. if (! ISWHITESPACE(_chLookahead))
  1674. return XML_E_MISSINGWHITESPACE;
  1675. // fall through
  1676. case OP_OWS:
  1677. //checkeof(_chLookahead, _lEOFError);
  1678. checkhr2(push(&XMLStream::skipWhiteSpace, (short)newState));
  1679. checkhr2(skipWhiteSpace());
  1680. //if (_fFoundPEREf) return XML_E_FOUNDPEREF;
  1681. break;
  1682. case OP_CHARWS:
  1683. //if (_fFoundPEREf) return S_OK;
  1684. mark();
  1685. //checkeof(_chLookahead, _lEOFError);
  1686. if (_chLookahead == pSE->_pch[0])
  1687. {
  1688. ADVANCE;
  1689. newState = pSE->_sGoto;
  1690. _nToken = pSE->_lDelta;
  1691. }
  1692. else if (! ISWHITESPACE(_chLookahead))
  1693. {
  1694. return XML_E_WHITESPACEORQUESTIONMARK;
  1695. }
  1696. else
  1697. newState = pSE->_sArg1;
  1698. break;
  1699. case OP_CHAR:
  1700. //if (_fFoundPEREf) return S_OK;
  1701. mark();
  1702. case OP_CHAR2:
  1703. //if (_fFoundPEREf) return S_OK;
  1704. //checkeof(_chLookahead, _lEOFError);
  1705. if (_chLookahead == pSE->_pch[0])
  1706. {
  1707. ADVANCE;
  1708. newState = pSE->_sGoto;
  1709. _nToken = pSE->_lDelta;
  1710. //if (_nToken == XML_GROUP)
  1711. //_nAttrType = XMLTYPE_NMTOKEN;
  1712. }
  1713. else
  1714. {
  1715. newState = pSE->_sArg1;
  1716. if (newState >= XML_E_PARSEERRORBASE &&
  1717. ISWHITESPACE(_chLookahead))
  1718. return XML_E_UNEXPECTED_WHITESPACE;
  1719. }
  1720. break;
  1721. case OP_PEEK:
  1722. //if (_fFoundPEREf) return S_OK;
  1723. //checkeof(_chLookahead, _lEOFError);
  1724. if (_chLookahead == pSE->_pch[0])
  1725. {
  1726. newState = pSE->_sGoto;
  1727. }
  1728. else
  1729. newState = pSE->_sArg1;
  1730. break;
  1731. case OP_NAME:
  1732. //if (_fFoundPEREf) return S_OK;
  1733. //checkeof(_chLookahead, _lEOFError);
  1734. checkhr2(push(&XMLStream::parseName, (short)newState));
  1735. checkhr2(parseName());
  1736. break;
  1737. case OP_TOKEN:
  1738. _nToken = pSE->_sArg1;
  1739. _lLengthDelta = pSE->_lDelta;
  1740. break;
  1741. case OP_POP:
  1742. _lLengthDelta = pSE->_lDelta;
  1743. if (_lLengthDelta == 0) mark();
  1744. // The _lDelta field contains a boolean flag to tell us whether this
  1745. // pop needs to check for parameter entity boundary or not.
  1746. checkhr2(pop(pSE->_lDelta == 0)); // we're done !
  1747. _nToken = pSE->_sArg1;
  1748. //_nAttrType = XMLTYPE_CDATA;
  1749. return S_OK;
  1750. case OP_STRCMP:
  1751. {
  1752. const WCHAR* t = NULL;
  1753. long len = 0;
  1754. getToken(&t,&len);
  1755. long delta = (pSE->_lDelta < 0) ? pSE->_lDelta : 0;
  1756. //if (StringEquals(pSE->_pch,t,len+delta,_fCaseInsensitive))
  1757. if (::FusionpCompareStrings(pSE->_pch, len+delta, t, len+delta, _fCaseInsensitive)==0)
  1758. {
  1759. if (pSE->_lDelta > 0)
  1760. {
  1761. _nToken = pSE->_lDelta;
  1762. _lLengthDelta = 0;
  1763. }
  1764. newState = pSE->_sGoto;
  1765. }
  1766. else
  1767. newState = pSE->_sArg1;
  1768. }
  1769. break;
  1770. case OP_COMMENT:
  1771. return push(&XMLStream::parseComment, (short)newState);
  1772. break;
  1773. case OP_CONDSECT:
  1774. //if (_fFoundPEREf) return S_OK;
  1775. // parse <![CDATA[...]]> or <![IGNORE[...]]>
  1776. return push(&XMLStream::parseCondSect, (short)newState);
  1777. case OP_SNCHAR:
  1778. //checkeof(_chLookahead, _lEOFError);
  1779. if (isStartNameChar(_chLookahead))
  1780. {
  1781. newState = pSE->_sGoto;
  1782. }
  1783. else
  1784. newState = pSE->_sArg1;
  1785. break;
  1786. case OP_EQUALS:
  1787. //if (_fFoundPEREf) return S_OK;
  1788. //checkeof(_chLookahead, _lEOFError);
  1789. checkhr2(push(&XMLStream::parseEquals, (short)newState));
  1790. checkhr2(parseEquals());
  1791. break;
  1792. case OP_ENCODING:
  1793. {
  1794. const WCHAR* t = NULL; // prefix bug fix, xiaoyuw@08/29/00
  1795. long len = 0; // prefix bug fix, xiaoyuw@08/29/00
  1796. _pInput->getToken(&t,&len);
  1797. hr = _pInput->switchEncoding(t, len+pSE->_lDelta);
  1798. }
  1799. break;
  1800. case OP_ATTRVAL:
  1801. //if (_fFoundPEREf) return S_OK;
  1802. if (_chLookahead != L'"' && _chLookahead != L'\'')
  1803. {
  1804. return XML_E_MISSINGQUOTE;
  1805. }
  1806. _chTerminator = _chLookahead;
  1807. ADVANCE;
  1808. mark();
  1809. _fReturnAttributeValue = (pSE->_sArg1 == 1);
  1810. //checkeof(_chLookahead, _lEOFError);
  1811. return push(&XMLStream::parseAttrValue, (short)newState);
  1812. break;
  1813. } // end of switch
  1814. if (_fnState != &XMLStream::parseTable)
  1815. return S_OK;
  1816. if (newState >= XML_E_PARSEERRORBASE)
  1817. return (HRESULT)newState;
  1818. else
  1819. _sSubState = (short)newState;
  1820. } // end of while
  1821. if (_nToken == XMLStream::XML_ENDDECL)
  1822. {
  1823. return _pInput->UnFreeze();
  1824. }
  1825. return S_OK;
  1826. }
  1827. ////////////////////////////////////////////////////////////////////////
  1828. HRESULT
  1829. XMLStream::_PushChar(WCHAR ch)
  1830. {
  1831. // buffer needs to grow.
  1832. long newsize = (_lBufSize+512)*2 ;
  1833. WCHAR* newbuf = NEW ( WCHAR[newsize]);
  1834. if (newbuf == NULL)
  1835. return E_OUTOFMEMORY;
  1836. if (_pchBuffer != NULL){
  1837. ::memcpy(newbuf, _pchBuffer, sizeof(WCHAR)*_lBufLen);
  1838. delete[] _pchBuffer;
  1839. }
  1840. _lBufSize = newsize;
  1841. _pchBuffer = newbuf;
  1842. _pchBuffer[_lBufLen++] = ch;
  1843. return S_OK;
  1844. }
  1845. ////////////////////////////////////////////////////////////////////////
  1846. HRESULT
  1847. XMLStream::AdvanceTo(short substate)
  1848. {
  1849. // This method combines and advance with a state switch in one
  1850. // atomic operation that handles the E_PENDING case properly.
  1851. _sSubState = substate;
  1852. //HRESULT hr = (!_fDTD) ? _pInput->nextChar(&_chLookahead, &_fEOF) : DTDAdvance();
  1853. HRESULT hr = _pInput->nextChar(&_chLookahead, &_fEOF) ;
  1854. if (hr != S_OK && (hr == E_PENDING || hr == E_DATA_AVAILABLE || hr == E_DATA_REALLOCATE || hr == XML_E_FOUNDPEREF))
  1855. {
  1856. // Then we must do an advance next time around before continuing
  1857. // with previous state. Push will save the _sSubState and return
  1858. // to it.
  1859. push(&XMLStream::firstAdvance,substate);
  1860. }
  1861. return hr;
  1862. }
  1863. ////////////////////////////////////////////////////////////////////////
  1864. bool
  1865. XMLStream::PreEntityText()
  1866. {
  1867. // This is a helper function that calculates whether or not to
  1868. // return some PCDATA or WHITEPACE before an entity reference.
  1869. if (_fPCDataPending)
  1870. {
  1871. // return what we have so far.
  1872. //if (_fWhitespace && ! _fIE4Quirks) // in IE4 mode we do not have WHITESPACE nodes
  1873. // and entities are always resolved, so return
  1874. // the leading whitespace as PCDATA.
  1875. if (_fWhitespace )
  1876. _nToken = XML_WHITESPACE;
  1877. else
  1878. _nToken = XML_PCDATA;
  1879. long entityLen = _pInput->getTokenLength() - _lEntityPos;
  1880. _lLengthDelta = -entityLen;
  1881. _lMarkDelta = entityLen;
  1882. _fPCDataPending = false;
  1883. _fWhitespace = true;
  1884. return true;
  1885. }
  1886. return false;
  1887. }
  1888. ////////////////////////////////////////////////////////////////////////
  1889. HRESULT
  1890. XMLStream::ErrorCallback(HRESULT hr)
  1891. {
  1892. if (hr == E_DATA_AVAILABLE)
  1893. hr = XML_DATAAVAILABLE;
  1894. else if (hr == E_DATA_REALLOCATE)
  1895. hr = XML_DATAREALLOCATE;
  1896. return _pXMLParser->ErrorCallback(hr);
  1897. }