Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2062 lines
60 KiB

  1. /*
  2. *
  3. * Copyright (c) 1998,1999 Microsoft Corporation. All rights reserved.
  4. * EXEMPT: copyright change only, no build required
  5. *
  6. */
  7. #include "stdinc.h"
  8. #include "core.hxx"
  9. #pragma hdrstop
  10. #include "xmlhelper.hxx"
  11. #include "xmlstream.hxx"
  12. #include "bufferedstream.hxx"
  13. #include "xmlparser.hxx"
  14. const long BLOCK_SIZE = 512;
  15. const long STACK_INCREMENT = 10;
  16. // macros used in this file
  17. #define INTERNALERROR return XML_E_INTERNALERROR;
  18. #define checkeof(a,b) if (_fEOF) return b;
  19. #define ADVANCE hr = _pInput->nextChar(&_chLookahead, &_fEOF); if (hr != S_OK) return hr;
  20. #define ADVANCETO(a) hr = AdvanceTo(a); if (hr != S_OK) return hr;
  21. #define ISWHITESPACE(ch) _pInput->isWhiteSpace(ch)
  22. #define STATE(state) { _sSubState = state; return S_OK; }
  23. #define GOTOSTART(state) { _sSubState = state; goto Start; }
  24. #define DELAYMARK(hr) ((hr == S_OK) || ((hr >= static_cast<HRESULT>(XML_E_TOKEN_ERROR)) && (hr < static_cast<HRESULT>(XML_E_LASTERROR))))
  25. #define XML_E_FOUNDPEREF 0x8000e5ff
  26. // The tokenizer has special handling for the following attribute types.
  27. // These values are derived from the XML_AT_XXXX types provided in SetType
  28. // and are also calculated during parsing of an ATTLIST for parsing of
  29. // default values.
  30. typedef enum
  31. {
  32. XMLTYPE_CDATA, // the default.
  33. XMLTYPE_NAME,
  34. XMLTYPE_NAMES,
  35. XMLTYPE_NMTOKEN,
  36. XMLTYPE_NMTOKENS,
  37. } XML_ATTRIBUTE_TYPE;
  38. //==============================================================================
  39. // xiaoyu : a simplified table : only deal with comments, not include DOCTYPE, NotationDecl, EntityDecl and ElementDecl.
  40. // Parse an <!^xxxxxxxx Declaration.
  41. const StateEntry g_DeclarationTable[] =
  42. {
  43. // 0 '<' ^ '!'
  44. { OP_CHAR, L"!", 1, (DWORD)XML_E_INTERNALERROR, },
  45. // 1 '<!' ^ '-'
  46. { OP_PEEK, L"-", 2, 4, 0 },
  47. // 2 '<!-'
  48. { OP_COMMENT, NULL, 3, },
  49. // 3 done !!
  50. { OP_POP, NULL, 0, 0 },
  51. // 4 '<!' ^ '['
  52. { OP_PEEK, L"[", 5, (DWORD)XML_E_BADDECLNAME, 0 }, //xiaoyu : we do not consider others <!XXX, which is a DTD subset
  53. // 5 '<![...'
  54. { OP_CONDSECT, NULL, 3, }
  55. };
  56. //==============================================================================
  57. // Parse an <?xml or <?xml:namespace declaration.
  58. const StateEntry g_XMLDeclarationTable[] =
  59. {
  60. // 0 must be xml declaration - and not xml namespace declaration
  61. { OP_TOKEN, NULL, 1, XML_XMLDECL, 0 },
  62. // 1 '<?xml' ^ S version="1.0" ...
  63. { OP_OWS, NULL, 2 },
  64. // 2 '<?xml' S ^ version="1.0" ...
  65. { OP_SNCHAR, NULL, 3, (DWORD)XML_E_XMLDECLSYNTAX },
  66. // 3 '<?xml' S ^ version="1.0" ...
  67. { OP_NAME, NULL, 4, },
  68. // 4 '<?xml' S version^="1.0" ...
  69. { OP_STRCMP, L"version", 5, 12, XML_VERSION },
  70. // 5
  71. { OP_EQUALS, NULL, 6 },
  72. // 6 '<?xml' S version = ^ "1.0" ...
  73. { OP_ATTRVAL, NULL, 32, 0},
  74. // 7 '<?xml' S version '=' value ^
  75. { OP_TOKEN, NULL, 8, XML_PCDATA, -1 },
  76. // 8 ^ are we done ?
  77. { OP_CHARWS, L"?", 28, 9 }, // must be '?' or whitespace.
  78. // 9 ^ S? [encoding|standalone] '?>'
  79. { OP_OWS, NULL, 10 },
  80. // 10
  81. { OP_CHAR, L"?", 28, 33 }, // may have '?' after skipping whitespace.
  82. // 11 ^ [encoding|standalone] '?>'
  83. { OP_NAME, NULL, 12, },
  84. // 12
  85. { OP_STRCMP, L"standalone", 23, 13, XML_STANDALONE },
  86. // 13
  87. { OP_STRCMP, L"encoding", 14, (DWORD)XML_E_UNEXPECTED_ATTRIBUTE, XML_ENCODING },
  88. // 14
  89. { OP_EQUALS, NULL, 15 },
  90. // 15
  91. { OP_ATTRVAL, NULL, 16, 0 },
  92. // 16
  93. { OP_ENCODING, NULL, 17, 0, -1 },
  94. // 17
  95. { OP_TOKEN, NULL, 18, XML_PCDATA, -1 },
  96. // 18 ^ are we done ?
  97. { OP_CHARWS, L"?", 28, 19 }, // must be '?' or whitespace.
  98. // 19 ^ S? standalone '?>'
  99. { OP_OWS, NULL, 20 },
  100. // 20
  101. { OP_CHAR, L"?", 28, 34 }, // may have '?' after skipping whitespace.
  102. // 21 ^ standalone '?>'
  103. { OP_NAME, NULL, 22, },
  104. // 22
  105. { OP_STRCMP, L"standalone", 23, (DWORD)XML_E_UNEXPECTED_ATTRIBUTE,
  106. XML_STANDALONE },
  107. // 23
  108. { OP_EQUALS, NULL, 24 },
  109. // 24
  110. { OP_ATTRVAL, NULL, 25, 0 },
  111. // 25
  112. { OP_STRCMP, L"yes", 31, 30, -1 },
  113. // 26 <?xml ....... ^ '?>' -- now expecting just the closing '?>' chars
  114. { OP_OWS, NULL, 27 },
  115. // 27
  116. { OP_CHAR, L"?", 28, (DWORD)XML_E_XMLDECLSYNTAX, 0 },
  117. // 28
  118. { OP_CHAR, L">", 29, (DWORD)XML_E_XMLDECLSYNTAX, 0 },
  119. // 29 done !!
  120. { OP_POP, NULL, 0, XMLStream::XML_ENDXMLDECL },
  121. //----------------------- check standalone values "yes" or "no"
  122. // 30
  123. { OP_STRCMP, L"no", 31, (DWORD)XML_E_INVALID_STANDALONE, -1 },
  124. // 31
  125. { OP_TOKEN, NULL, 26, XML_PCDATA, -1 },
  126. //----------------------- check version = "1.0"
  127. // 32
  128. { OP_STRCMP, L"1.0", 7, (DWORD)XML_E_INVALID_VERSION, -1 },
  129. // 33
  130. { OP_SNCHAR, NULL, 11, (DWORD)XML_E_XMLDECLSYNTAX },
  131. // 34
  132. { OP_SNCHAR, NULL, 21, (DWORD)XML_E_XMLDECLSYNTAX },
  133. };
  134. static const WCHAR* g_pstrCDATA = L"CDATA";
  135. ////////////////////////////////////////////////////////////////////////
  136. XMLStream::XMLStream(XMLParser * pXMLParser)
  137. : _pStack(1), _pStreams(1)
  138. {
  139. // precondition: 'func' is never NULL
  140. _fnState = &XMLStream::init;
  141. _pInput = NULL;
  142. _pchBuffer = NULL;
  143. _fDTD = false;
  144. //_fInternalSubset = false;
  145. _cStreamDepth = 0;
  146. _pXMLParser = pXMLParser;
  147. _init();
  148. SetFlags(0);
  149. }
  150. ////////////////////////////////////////////////////////////////////////
  151. HRESULT
  152. XMLStream::init()
  153. {
  154. HRESULT hr = S_OK;
  155. if (_pInput == NULL)
  156. {
  157. //haven' called put-stream yet
  158. return XML_E_ENDOFINPUT;
  159. }
  160. _init();
  161. _fnState = &XMLStream::parseContent;
  162. checkhr2(push(&XMLStream::firstAdvance,0));
  163. return hr;
  164. }
  165. ////////////////////////////////////////////////////////////////////////
  166. void
  167. XMLStream::_init()
  168. {
  169. _fEOF = false;
  170. _chLookahead = 0;
  171. _nToken = XML_PENDING;
  172. _chTerminator = 0;
  173. _lLengthDelta = 0;
  174. _lNslen = _lNssep = 0;
  175. _sSubState = 0;
  176. _lMarkDelta = 0;
  177. _fUsingBuffer = false;
  178. _lBufLen = 0;
  179. delete[] _pchBuffer;
  180. _pchBuffer = NULL;
  181. _lBufSize = 0;
  182. _fDelayMark = false;
  183. _fFoundWhitespace = false;
  184. _fFoundNonWhitespace = false;
  185. _fWasUsingBuffer = false;
  186. _chNextLookahead = 0;
  187. _fParsingAttDef = false;
  188. _fFoundFirstElement = false;
  189. _fReturnAttributeValue = true;
  190. //_fHandlePE = true;
  191. _pTable = NULL;
  192. }
  193. ////////////////////////////////////////////////////////////////////////
  194. XMLStream::~XMLStream()
  195. {
  196. delete _pInput;
  197. delete[] _pchBuffer;
  198. _pInput = NULL;
  199. _pchBuffer = NULL;
  200. InputInfo* pi = _pStreams.peek();
  201. while (pi != NULL)
  202. {
  203. // Previous stream is finished also, so
  204. // pop it and continue on.
  205. delete pi->_pInput;
  206. pi = _pStreams.pop();
  207. }
  208. }
  209. ////////////////////////////////////////////////////////////////////////
  210. HRESULT
  211. XMLStream::AppendData(
  212. /* [in] */ const BYTE *buffer,
  213. /* [in] */ long length,
  214. /* [in] */ BOOL last)
  215. {
  216. if (_pInput == NULL)
  217. {
  218. _pInput = NEW (BufferedStream(this));
  219. if (_pInput == NULL)
  220. return E_OUTOFMEMORY;
  221. init();
  222. }
  223. HRESULT hr = _pInput->AppendData(buffer, length, last);
  224. return hr;
  225. }
  226. ////////////////////////////////////////////////////////////////////////
  227. HRESULT
  228. XMLStream::Reset( void)
  229. {
  230. init();
  231. delete _pInput;
  232. _pInput = NULL;
  233. return S_OK;
  234. }
  235. ////////////////////////////////////////////////////////////////////////
  236. HRESULT
  237. XMLStream::PushStream(
  238. /* [unique][in] */ EncodingStream *p,
  239. /* [in] */ bool fExternalPE)
  240. {
  241. UNUSED(fExternalPE);
  242. if (_pStreams.used() == 0 && _pInput == NULL)
  243. init();
  244. _cStreamDepth++;
  245. if (_fDelayMark && _pInput != NULL)
  246. {
  247. mark(_lMarkDelta);
  248. _lMarkDelta = 0;
  249. _fDelayMark = false;
  250. }
  251. // Save current input stream.
  252. if (_pInput != NULL)
  253. {
  254. InputInfo* pi = _pStreams.push();
  255. if (pi == NULL)
  256. return E_OUTOFMEMORY;
  257. pi->_pInput = _pInput;
  258. pi->_chLookahead = _chLookahead;
  259. //pi->_fPE = true; // assume this is a parameter entity.
  260. //pi->_fExternalPE = fExternalPE;
  261. //pi->_fInternalSubset = _fInternalSubset;
  262. if (&XMLStream::skipWhiteSpace == _fnState && _pStack.used() > 0) {
  263. StateInfo* pSI = _pStack.peek();
  264. pi->_fnState = pSI->_fnState;
  265. }
  266. else
  267. pi->_fnState = _fnState;
  268. // and prepend pe text with space as per xml spec.
  269. _chLookahead = L' ';
  270. _chNextLookahead = _chLookahead;
  271. _pInput = NULL;
  272. }
  273. _pInput = NEW (BufferedStream(this));
  274. if (_pInput == NULL)
  275. return E_OUTOFMEMORY;
  276. if (p != NULL)
  277. _pInput->Load(p);
  278. if (_chLookahead == L' ')
  279. _pInput->setWhiteSpace(); // _pInput didn't see this space char.
  280. return S_OK;
  281. }
  282. ////////////////////////////////////////////////////////////////////////
  283. HRESULT
  284. XMLStream::PopStream()
  285. {
  286. // This method has to pop all streams until it finds a stream that
  287. // can deliver the next _chLookahead character.
  288. HRESULT hr = S_OK;
  289. InputInfo* pi = NULL;
  290. pi = _pStreams.peek();
  291. if (pi == NULL) return S_FALSE;
  292. _chLookahead = pi->_chLookahead;
  293. // Found previous stream, so we can continue.
  294. _fEOF = false;
  295. // Ok, so we actually got the next character, so
  296. // we can now safely throw away the previous
  297. // lookahead character and return the next
  298. // non-whitespace character from the previous stream.
  299. delete _pInput;
  300. _pInput = pi->_pInput;
  301. if (_chLookahead == L' ')
  302. _pInput->setWhiteSpace();
  303. // BUGBUG: we need to clear this so that the parser does not
  304. // try and pop a download in the internalPE case (when handling XML_E_ENDOFINPUT in run())
  305. // but this means that internal PEs never get XMLNF_ENDENTITY notifications generated.
  306. // The DTDNodeFactory requires this behaviour currently (incorrectly)
  307. _pStreams.pop();
  308. _cStreamDepth--;
  309. return hr;
  310. }
  311. ////////////////////////////////////////////////////////////////////////
  312. HRESULT
  313. XMLStream::GetNextToken(
  314. /* [out] */ DWORD *t,
  315. /* [out] */ const WCHAR **text,
  316. /* [out] */ long *length,
  317. /* [out] */ long *nslen)
  318. {
  319. HRESULT hr;
  320. if (_fDTD)
  321. return E_UNEXPECTED;
  322. if (_fDelayMark)
  323. {
  324. mark(_lMarkDelta);
  325. _lMarkDelta = 0;
  326. _fDelayMark = false;
  327. }
  328. hr = (this->*_fnState)();
  329. while (hr == S_OK && _nToken == XML_PENDING)
  330. hr = (this->*_fnState)();
  331. if (hr == S_OK)
  332. *t = _nToken;
  333. else if (hr == E_PENDING) {
  334. *t = XML_PENDING;
  335. *length = *nslen = 0;
  336. *text = NULL;
  337. goto CleanUp;
  338. }
  339. else
  340. *t = XML_PENDING;
  341. // At this point hr == S_OK or it is some error. So we
  342. // want to return the text of the current token, since this
  343. // is useful in both cases.
  344. if (! _fUsingBuffer)
  345. {
  346. getToken(text,length);
  347. if (_lLengthDelta != 0)
  348. { // xiaoyu : IF STOP WITHIN, HAVE A CAREFUL LOOK : in ParsingAttributeValue, we have to read ahead of one char '"'
  349. *length += _lLengthDelta;
  350. _lLengthDelta = 0;
  351. }
  352. // This can only happen in the context of a DTD.
  353. // if (_fWasUsingBuffer)
  354. // {
  355. // _fUsingBuffer = _fWasUsingBuffer;
  356. // _fWasUsingBuffer = false;
  357. // }
  358. }
  359. else
  360. { // xiaoyu : IF STOP WITHIN, HAVE A CAREFUL LOOK
  361. *text = _pchBuffer;
  362. *length = _lBufLen;
  363. _fUsingBuffer = false;
  364. _fFoundWhitespace = false;
  365. _lBufLen = 0;
  366. _lLengthDelta = 0;
  367. }
  368. if (DELAYMARK(hr))
  369. {
  370. // Mark next time around so that error information points to the
  371. // beginning of this token.
  372. _fDelayMark = true;
  373. }
  374. else
  375. { // xiaoyu : IF STOP WITHIN, HAVE A CAREFUL LOOK
  376. // otherwise mark this spot right away so we point to the exact
  377. // source of the error.
  378. mark(_lMarkDelta);
  379. _lMarkDelta = 0;
  380. }
  381. _nToken = XML_PENDING;
  382. *nslen = _lNslen;
  383. _lNslen = _lNssep = 0;
  384. CleanUp:
  385. return hr;
  386. }
  387. ////////////////////////////////////////////////////////////////////////
  388. ULONG
  389. XMLStream::GetLine()
  390. {
  391. BufferedStream* input = getCurrentStream();
  392. if (input != NULL)
  393. return input->getLine();
  394. return 0;
  395. }
  396. ////////////////////////////////////////////////////////////////////////
  397. ULONG
  398. XMLStream::GetLinePosition( )
  399. {
  400. BufferedStream* input = getCurrentStream();
  401. if (input != NULL)
  402. return input->getLinePos();
  403. return 0;
  404. }
  405. ////////////////////////////////////////////////////////////////////////
  406. ULONG
  407. XMLStream::GetInputPosition( )
  408. {
  409. BufferedStream* input = getCurrentStream();
  410. if (input != NULL)
  411. return input->getInputPos();
  412. return 0;
  413. }
  414. ////////////////////////////////////////////////////////////////////////
  415. HRESULT
  416. XMLStream::GetLineBuffer(
  417. /* [out] */ const WCHAR * *buf, ULONG* len, ULONG* startpos)
  418. {
  419. if (buf) *buf = NULL;
  420. if (len) *len = 0;
  421. if (startpos) *startpos = 0;
  422. if (buf == NULL || len == NULL)
  423. return E_INVALIDARG;
  424. *buf = NULL;
  425. BufferedStream* input = getCurrentStream();
  426. if (input)
  427. *buf = input->getLineBuf(len, startpos);
  428. return S_OK;
  429. }
  430. ////////////////////////////////////////////////////////////////////////
  431. BufferedStream*
  432. XMLStream::getCurrentStream()
  433. {
  434. // Return the most recent stream that
  435. // actually has somthing to return.
  436. BufferedStream* input = _pInput;
  437. if (!_pInput)
  438. {
  439. return NULL;
  440. }
  441. int i = _pStreams.used()-1;
  442. do
  443. {
  444. ULONG len = 0, pos = 0;
  445. // const WCHAR* buf = input->getLineBuf(&len, &pos); // generates C4189: 'buf' local variable is initialized but not referenced
  446. (void) input->getLineBuf(&len, &pos);
  447. if (len > 0)
  448. return input;
  449. if (i >= 0)
  450. input = _pStreams[i--]->_pInput;
  451. else
  452. break;
  453. }
  454. while (input != NULL);
  455. return NULL;
  456. }
  457. ////////////////////////////////////////////////////////////////////////
  458. void
  459. XMLStream::SetFlags( unsigned short usFlags)
  460. {
  461. _usFlags = usFlags;
  462. // And break out the flags for performance reasons.
  463. //_fFloatingAmp = (usFlags & XMLFLAG_FLOATINGAMP) != 0;
  464. _fShortEndTags = (usFlags & XMLFLAG_SHORTENDTAGS) != 0;
  465. _fCaseInsensitive = (usFlags & XMLFLAG_CASEINSENSITIVE) != 0;
  466. _fNoNamespaces = (usFlags & XMLFLAG_NONAMESPACES) != 0;
  467. //_fNoWhitespaceNodes = false; // this is now bogus. (usFlags & XMLFLAG_NOWHITESPACE) != 0;
  468. //_fIE4Quirks = (_usFlags & XMLFLAG_IE4QUIRKS) != 0;
  469. //_fNoDTDNodes = (_usFlags & XMLFLAG_NODTDNODES) != 0;
  470. }
  471. ////////////////////////////////////////////////////////////////////////
  472. unsigned short
  473. XMLStream::GetFlags()
  474. {
  475. return _usFlags;
  476. }
  477. ////////////////////////////////////////////////////////////////////////
  478. //======================================================================
  479. // Real Implementation
  480. HRESULT
  481. XMLStream::firstAdvance()
  482. {
  483. HRESULT hr;
  484. ADVANCE;
  485. checkhr2(pop(false));
  486. return S_OK;
  487. }
  488. ////////////////////////////////////////////////////////////////////////
  489. HRESULT
  490. XMLStream::parseContent()
  491. {
  492. HRESULT hr = S_OK;
  493. if (_fEOF)
  494. return XML_E_ENDOFINPUT;
  495. switch (_chLookahead){
  496. case L'<':
  497. ADVANCE;
  498. checkeof(_chLookahead, XML_E_UNCLOSEDDECL);
  499. switch (_chLookahead)
  500. {
  501. case L'!':
  502. checkhr2(_pInput->Freeze()); // stop shifting data until '>'
  503. return pushTable( 0, g_DeclarationTable, (DWORD)XML_E_UNCLOSEDDECL);
  504. case L'?':
  505. checkhr2(push( &XMLStream::parsePI ));
  506. return parsePI();
  507. case L'/':
  508. checkhr2(push(&XMLStream::parseEndTag));
  509. return parseEndTag();
  510. default:
  511. checkhr2(push( &XMLStream::parseElement )); // push ParseContent, and _fnState = parseElement
  512. if (_fFoundFirstElement)
  513. {
  514. return parseElement();
  515. }
  516. else
  517. {
  518. // Return special end prolog token and then continue with
  519. // with parseElement.
  520. _fFoundFirstElement = true;
  521. _nToken = XML_ENDPROLOG;
  522. }
  523. }
  524. break;
  525. default:
  526. checkhr2(push(&XMLStream::parsePCData));
  527. return parsePCData();
  528. break;
  529. }
  530. return S_OK;
  531. }
  532. ////////////////////////////////////////////////////////////////////////
  533. HRESULT
  534. XMLStream::skipWhiteSpace()
  535. {
  536. HRESULT hr = S_OK;
  537. while (ISWHITESPACE(_chLookahead) && ! _fEOF)
  538. {
  539. ADVANCE;
  540. }
  541. checkhr2(pop(false));
  542. return hr;
  543. }
  544. ////////////////////////////////////////////////////////////////////////
  545. HRESULT
  546. XMLStream::parseElement()
  547. {
  548. HRESULT hr = S_OK;
  549. switch (_sSubState)
  550. {
  551. case 0:
  552. checkhr2(_pInput->Freeze()); // stop shifting data until '>'
  553. checkhr2(push( &XMLStream::parseName, 1));
  554. checkhr2(parseName());
  555. _sSubState = 1;
  556. // fall through
  557. case 1:
  558. checkeof(_chLookahead, XML_E_UNCLOSEDSTARTTAG);
  559. _nToken = XML_ELEMENT;
  560. // and then try and parse the attributes, and return
  561. // to state 2 to finish up. With an optimization
  562. // for the case where there are no attributes.
  563. if (_chLookahead == L'/' || _chLookahead == L'>')
  564. {
  565. _sSubState = 2;
  566. }
  567. else {
  568. if (!ISWHITESPACE(_chLookahead))
  569. {
  570. return XML_E_BADNAMECHAR;
  571. }
  572. _chEndChar = L'/'; // for empty tags. //xiaoyu : used to match ENDTAG
  573. checkhr2(push(&XMLStream::parseAttributes,2));
  574. }
  575. return S_OK;
  576. break;
  577. case 2: // finish up with start tag.
  578. mark(); // only return '>' or '/>' in _nToken text
  579. if (_chLookahead == L'/')
  580. {
  581. // must be empty tag sequence '/>'.
  582. ADVANCE;
  583. _nToken = XML_EMPTYTAGEND;
  584. }
  585. else if (_chLookahead == L'>')
  586. {
  587. _nToken = XML_TAGEND;
  588. }
  589. else if (ISWHITESPACE(_chLookahead))
  590. {
  591. return XML_E_UNEXPECTED_WHITESPACE;
  592. }
  593. else
  594. return XML_E_EXPECTINGTAGEND;
  595. _sSubState = 3;
  596. // fall through
  597. case 3:
  598. checkeof(_chLookahead, XML_E_UNCLOSEDSTARTTAG);
  599. if (_chLookahead != L'>')
  600. {
  601. if (ISWHITESPACE(_chLookahead))
  602. return XML_E_UNEXPECTED_WHITESPACE;
  603. else
  604. return XML_E_EXPECTINGTAGEND;
  605. }
  606. ADVANCE;
  607. mark();
  608. checkhr2(pop());// return to parseContent.
  609. return _pInput->UnFreeze();
  610. break;
  611. case 4: // swollow up bad tag
  612. // Allow the weird CDF madness <PRECACHE="YES"/>
  613. // For total compatibility we fake out the parser by returning
  614. // XML_EMPTYTAGEND, this way the rest of the tag becomes PCDATA.
  615. // YUK -- but it works.
  616. _nToken = XML_EMPTYTAGEND;
  617. mark();
  618. checkhr2(pop());// return to parseContent.
  619. return _pInput->UnFreeze();
  620. break;
  621. default:
  622. INTERNALERROR;
  623. }
  624. //return S_OK;
  625. }
  626. ////////////////////////////////////////////////////////////////////////
  627. HRESULT
  628. XMLStream::parseEndTag()
  629. {
  630. HRESULT hr = S_OK;
  631. switch (_sSubState)
  632. {
  633. case 0:
  634. ADVANCE; // soak up the '/'
  635. mark();
  636. // SHORT END TAG SUPPORT, IE4 Compatibility Mode only.
  637. if (! _fShortEndTags || _chLookahead != L'>')
  638. {
  639. checkhr2(push( &XMLStream::parseName, 1));
  640. checkhr2(parseName());
  641. }
  642. _sSubState = 1;
  643. // fall through
  644. case 1: // finish parsing end tag
  645. checkeof(_chLookahead, XML_E_UNCLOSEDENDTAG);
  646. _nToken = XML_ENDTAG;
  647. checkhr2(push(&XMLStream::skipWhiteSpace, 2));
  648. return S_OK;
  649. case 2:
  650. checkeof(_chLookahead, XML_E_UNCLOSEDENDTAG);
  651. if (_chLookahead != L'>')
  652. {
  653. return XML_E_BADNAMECHAR;
  654. }
  655. ADVANCE;
  656. mark();
  657. checkhr2(pop());// return to parseContent.
  658. break;
  659. default:
  660. INTERNALERROR;
  661. }
  662. return S_OK;
  663. }
  664. ////////////////////////////////////////////////////////////////////////
  665. HRESULT
  666. XMLStream::parsePI()
  667. {
  668. HRESULT hr = S_OK;
  669. switch (_sSubState)
  670. {
  671. case 0:
  672. //_fWasDTD = _fDTD; // as far as Advance is concerned, the contents
  673. //_fHandlePE = false; // of a PI are not special.
  674. ADVANCE;
  675. checkhr2(_pInput->Freeze()); // stop shifting data until '?>'
  676. mark(); // don't include '?' in tag name.
  677. if (_chLookahead == L'x' || _chLookahead == L'X')
  678. {
  679. // perhaps this is the magic <?xml version="1.0"?> declaration.
  680. STATE(7); // jump to state 7.
  681. }
  682. // fall through
  683. _sSubState = 1;
  684. case 1:
  685. checkhr2(push( &XMLStream::parseName, 2));
  686. checkhr2(parseName());
  687. _sSubState = 2;
  688. // fall through
  689. case 2:
  690. checkeof(_chLookahead, XML_E_UNCLOSEDPI);
  691. if (_chLookahead != L'?' && ! ISWHITESPACE(_chLookahead))
  692. {
  693. return XML_E_BADNAMECHAR;
  694. }
  695. _nToken = XML_PI;
  696. STATE(3); // found startpi _nToken and return to _sSubState 3
  697. break;
  698. case 3: // finish with rest of PI
  699. if (_chLookahead == L'?')
  700. {
  701. ADVANCE;
  702. if (_chLookahead == L'>')
  703. {
  704. STATE(6);
  705. }
  706. else
  707. {
  708. return XML_E_EXPECTINGTAGEND;
  709. }
  710. }
  711. checkhr2(push(&XMLStream::skipWhiteSpace, 4));
  712. checkhr2( skipWhiteSpace() );
  713. _sSubState = 4;
  714. // fall through
  715. case 4: // support for normalized whitespace
  716. mark(); // strip whitespace from beginning of PI data, since this is
  717. // just the separator between the PI target name and the PI data.
  718. _sSubState = 5;
  719. // fallthrough
  720. case 5:
  721. while (! _fEOF )
  722. {
  723. if (_chLookahead == L'?')
  724. {
  725. ADVANCE;
  726. break;
  727. }
  728. if (! isCharData(_chLookahead))
  729. return XML_E_PIDECLSYNTAX;
  730. ADVANCE;
  731. }
  732. _sSubState = 6; // go to next state
  733. // fall through.
  734. case 6:
  735. checkeof(_chLookahead, XML_E_UNCLOSEDPI);
  736. if (_chLookahead == L'>')
  737. {
  738. ADVANCE;
  739. _lLengthDelta = -2; // don't include '?>' in PI CDATA.
  740. }
  741. else
  742. {
  743. // Hmmm. Must be a lone '?' so go back to state 5.
  744. STATE(5);
  745. }
  746. _nToken = XML_ENDPI;
  747. //_fHandlePE = true;
  748. checkhr2(pop());
  749. return _pInput->UnFreeze();
  750. break;
  751. case 7: // recognize 'm' in '<?xml' declaration
  752. ADVANCE;
  753. if (_chLookahead != L'm' && _chLookahead != L'M')
  754. {
  755. STATE(11); // not 'xml' so jump to state 11 to parse name
  756. }
  757. _sSubState = 8;
  758. // fall through
  759. case 8: // recognize L'l' in '<?xml' declaration
  760. ADVANCE;
  761. if (_chLookahead != L'l' && _chLookahead != L'L')
  762. {
  763. STATE(11); // not 'xml' so jump to state 11 to parse name
  764. }
  765. _sSubState = 9;
  766. // fall through
  767. case 9: // now need whitespace or ':' or '?' to terminate name.
  768. ADVANCE;
  769. if (ISWHITESPACE(_chLookahead))
  770. {
  771. if (! _fCaseInsensitive)
  772. {
  773. const WCHAR* t = NULL;
  774. long len =0; // for prefix bug : xiaoyuw@08/28/00
  775. getToken(&t,&len);
  776. //if (! StringEquals(L"xml",t,3,false)) // case sensitive
  777. if (::FusionpCompareStrings(L"xml", 3, t, 3, false)!=0) // not equal
  778. return XML_E_BADXMLCASE;
  779. }
  780. return pushTable(10, g_XMLDeclarationTable, (DWORD)XML_E_UNCLOSEDPI);
  781. }
  782. if (isNameChar(_chLookahead) || _chLookahead == ':')
  783. {
  784. STATE(11); // Hmmm. Must be something else then so continue parsing name
  785. }
  786. else
  787. {
  788. return XML_E_XMLDECLSYNTAX;
  789. }
  790. break;
  791. case 10:
  792. //_fHandlePE = true;
  793. checkhr2(pop());
  794. return _pInput->UnFreeze();
  795. break;
  796. case 11:
  797. if (_chLookahead == ':')
  798. ADVANCE;
  799. _sSubState = 12;
  800. // fall through
  801. case 12:
  802. if (isNameChar(_chLookahead))
  803. {
  804. checkhr2(push( &XMLStream::parseName, 2));
  805. _sSubState = 1; // but skip IsStartNameChar test
  806. checkhr2(parseName());
  807. return S_OK;
  808. }
  809. else
  810. {
  811. STATE(2);
  812. }
  813. break;
  814. default:
  815. INTERNALERROR;
  816. }
  817. //return S_OK;
  818. }
  819. ////////////////////////////////////////////////////////////////////////
  820. HRESULT
  821. XMLStream::parseComment()
  822. {
  823. // ok, so '<!-' has been parsed so far
  824. HRESULT hr = S_OK;
  825. switch (_sSubState)
  826. {
  827. case 0:
  828. //_fWasDTD = _fDTD; // as far as the DTD is concerned, the contents
  829. //_fHandlePE = false; // of a COMMENT are not special.
  830. ADVANCE; // soak up first '-'
  831. checkeof(_chLookahead, XML_E_UNCLOSEDCOMMENT);
  832. if (_chLookahead != L'-')
  833. {
  834. return XML_E_COMMENTSYNTAX;
  835. }
  836. _sSubState = 1;
  837. // fall through
  838. case 1:
  839. ADVANCE; // soak up second '-'
  840. mark(); // don't include '<!--' in comment text
  841. _sSubState = 2;
  842. // fall through;
  843. case 2:
  844. while (! _fEOF)
  845. {
  846. if (_chLookahead == L'-')
  847. {
  848. ADVANCE; // soak up first closing L'-'
  849. break;
  850. }
  851. if (! isCharData(_chLookahead))
  852. return XML_E_BADCHARDATA;
  853. ADVANCE;
  854. }
  855. checkeof(_chLookahead, XML_E_UNCLOSEDCOMMENT);
  856. _sSubState = 3; // advance to next state
  857. // fall through.
  858. case 3:
  859. if (_chLookahead != L'-')
  860. {
  861. // Hmmm, must have been a floating L'-' so go back to state 2
  862. STATE(2);
  863. }
  864. ADVANCE; // soak up second closing L'-'
  865. _sSubState = 4;
  866. // fall through
  867. case 4:
  868. checkeof(_chLookahead, XML_E_UNCLOSEDCOMMENT);
  869. //if (_chLookahead != L'>' && ! _fIE4Quirks)
  870. if (_chLookahead != L'>')
  871. {
  872. // cannot have floating L'--' unless we are in compatibility mode.
  873. return XML_E_COMMENTSYNTAX;
  874. }
  875. ADVANCE; // soak up closing L'>'
  876. _lLengthDelta = -3; // don't include L'-->' in PI CDATA.
  877. _nToken = XML_COMMENT;
  878. checkhr2(pop());
  879. //_fHandlePE = true;
  880. break;
  881. default:
  882. INTERNALERROR;
  883. }
  884. return S_OK;
  885. }
  886. ////////////////////////////////////////////////////////////////////////
  887. HRESULT
  888. XMLStream::parseName()
  889. {
  890. HRESULT hr = S_OK;
  891. switch (_sSubState)
  892. {
  893. case 0:
  894. if (! isStartNameChar(_chLookahead))
  895. {
  896. if (ISWHITESPACE(_chLookahead))
  897. hr = XML_E_UNEXPECTED_WHITESPACE;
  898. else
  899. hr = XML_E_BADSTARTNAMECHAR;
  900. goto CleanUp;
  901. }
  902. mark();
  903. _sSubState = 1;
  904. // fall through
  905. case 1:
  906. _lNslen = _lNssep = 0;
  907. while (isNameChar(_chLookahead) && !_fEOF)
  908. {
  909. ADVANCE;
  910. }
  911. hr = pop(false); // return to the previous state
  912. break;
  913. default:
  914. INTERNALERROR;
  915. }
  916. CleanUp:
  917. return hr;
  918. }
  919. ////////////////////////////////////////////////////////////////////////
  920. HRESULT
  921. XMLStream::parseAttributes()
  922. {
  923. HRESULT hr = S_OK;
  924. switch (_sSubState)
  925. {
  926. case 0:
  927. //_nAttrType = XMLTYPE_CDATA;
  928. _fCheckAttribute = false;
  929. checkhr2(push(&XMLStream::skipWhiteSpace, 1));
  930. checkhr2( skipWhiteSpace() );
  931. _sSubState = 1;
  932. // fall through
  933. case 1:
  934. if (_chLookahead == _chEndChar || _chLookahead == L'>' )
  935. {
  936. checkhr2(pop()); // no attributes.
  937. return S_OK;
  938. }
  939. checkhr2( push( &XMLStream::parseName, 2 ) );
  940. checkhr2( parseName() );
  941. if (!ISWHITESPACE(_chLookahead) && _chLookahead != L'=')
  942. {
  943. return XML_E_BADNAMECHAR;
  944. }
  945. _sSubState = 2;
  946. // fall through
  947. case 2:
  948. if (ISWHITESPACE(_chLookahead))
  949. {
  950. // Eq ::= S? '=' S?
  951. STATE(7);
  952. }
  953. checkeof(_chLookahead, XML_E_UNCLOSEDSTARTTAG);
  954. _nToken = XML_ATTRIBUTE;
  955. _sSubState = 3;
  956. return S_OK;
  957. break;
  958. case 3:
  959. if (ISWHITESPACE(_chLookahead))
  960. return XML_E_UNEXPECTED_WHITESPACE;
  961. _fWhitespace = false;
  962. _sSubState = 4;
  963. // fall through
  964. case 4:
  965. if (_chLookahead != L'=')
  966. {
  967. return XML_E_MISSINGEQUALS;
  968. }
  969. ADVANCE;
  970. if (ISWHITESPACE(_chLookahead))
  971. {
  972. // allow whitespace between '=' and attribute value.
  973. checkhr2(push(&XMLStream::skipWhiteSpace, 5));
  974. checkhr2( skipWhiteSpace() );
  975. }
  976. _sSubState = 5;
  977. // fall through
  978. case 5:
  979. if (ISWHITESPACE(_chLookahead))
  980. return XML_E_UNEXPECTED_WHITESPACE;
  981. if (_chLookahead != L'"' && _chLookahead != L'\'')
  982. {
  983. return XML_E_MISSINGQUOTE;
  984. }
  985. _chTerminator = _chLookahead;
  986. ADVANCE;
  987. mark();
  988. return push(&XMLStream::parseAttrValue, 6);
  989. //_sSubState = 6;
  990. // fall through;
  991. case 6:
  992. checkeof(_chLookahead, XML_E_UNCLOSEDSTARTTAG);
  993. if (_chLookahead == _chEndChar || _chLookahead == L'>')
  994. {
  995. checkhr2(pop());
  996. return S_OK;
  997. }
  998. if (! ISWHITESPACE(_chLookahead) )
  999. {
  1000. return XML_E_MISSINGWHITESPACE;
  1001. }
  1002. STATE(0); // go back to state 0
  1003. break;
  1004. case 7:
  1005. // allow whitespace between attribute and '='
  1006. _lLengthDelta = _pInput->getTokenLength();
  1007. checkhr2(push(&XMLStream::skipWhiteSpace, 8));
  1008. checkhr2( skipWhiteSpace() );
  1009. _sSubState = 8;
  1010. // fall through
  1011. case 8:
  1012. checkeof(_chLookahead, XML_E_UNCLOSEDSTARTTAG);
  1013. _lLengthDelta -= _pInput->getTokenLength();
  1014. STATE(2);
  1015. break;
  1016. default:
  1017. INTERNALERROR;
  1018. }
  1019. //return hr;
  1020. }
  1021. ////////////////////////////////////////////////////////////////////////
  1022. HRESULT XMLStream::parseAttrValue()
  1023. {
  1024. HRESULT hr = S_OK;
  1025. switch (_sSubState)
  1026. {
  1027. case 0:
  1028. _fParsingAttDef = true;
  1029. // mark beginning of attribute data
  1030. _sSubState = 2;
  1031. // fall through;
  1032. case 2:
  1033. while ( _chLookahead != _chTerminator &&
  1034. _chLookahead != L'<' &&
  1035. ! _fEOF )
  1036. {
  1037. if (_chLookahead == L'&')
  1038. {
  1039. // then parse entity ref and then return
  1040. // to state 2 to continue with PCDATA.
  1041. return push(&XMLStream::parseEntityRef,2);
  1042. }
  1043. hr = _pInput->scanPCData(&_chLookahead, &_fWhitespace);
  1044. if (FAILED(hr))
  1045. {
  1046. if (hr == E_PENDING)
  1047. {
  1048. hr = S_OK;
  1049. ADVANCE;
  1050. }
  1051. return hr;
  1052. }
  1053. }
  1054. _sSubState = 3;
  1055. // fall through
  1056. case 3:
  1057. checkeof(_chLookahead, XML_E_UNCLOSEDSTRING);
  1058. if (_chLookahead == _chTerminator)
  1059. {
  1060. ADVANCE;
  1061. if (_fReturnAttributeValue)
  1062. {
  1063. // return what we have so far - if anything.
  1064. if ((_fUsingBuffer && _lBufLen > 0) ||
  1065. _pInput->getTokenLength() > 1)
  1066. {
  1067. _lLengthDelta = -1; // don't include string _chTerminator.
  1068. _nToken = XML_PCDATA;
  1069. }
  1070. }
  1071. else
  1072. {
  1073. _fReturnAttributeValue = true; // reset to default value.
  1074. }
  1075. _fParsingAttDef = false;
  1076. checkhr2(pop());
  1077. return S_OK;
  1078. }
  1079. else
  1080. {
  1081. return XML_E_BADCHARINSTRING;
  1082. }
  1083. break;
  1084. default:
  1085. INTERNALERROR;
  1086. }
  1087. //return hr;
  1088. }
  1089. ////////////////////////////////////////////////////////////////////////
  1090. HRESULT
  1091. XMLStream::ScanHexDigits()
  1092. {
  1093. HRESULT hr = S_OK;
  1094. while (! _fEOF && _chLookahead != L';')
  1095. {
  1096. if (! isHexDigit(_chLookahead))
  1097. {
  1098. return ISWHITESPACE(_chLookahead) ? XML_E_UNEXPECTED_WHITESPACE : XML_E_BADCHARINENTREF;
  1099. }
  1100. ADVANCE;
  1101. }
  1102. checkeof(_chLookahead, XML_E_UNEXPECTEDEOF);
  1103. return hr;
  1104. }
  1105. ////////////////////////////////////////////////////////////////////////
  1106. HRESULT
  1107. XMLStream::ScanDecimalDigits()
  1108. {
  1109. HRESULT hr = S_OK;
  1110. while (! _fEOF && _chLookahead != L';')
  1111. {
  1112. if (! isDigit(_chLookahead))
  1113. {
  1114. return ISWHITESPACE(_chLookahead) ? XML_E_UNEXPECTED_WHITESPACE : XML_E_BADCHARINENTREF;
  1115. }
  1116. ADVANCE;
  1117. }
  1118. checkeof(_chLookahead, XML_E_UNEXPECTEDEOF);
  1119. return hr;
  1120. }
  1121. ////////////////////////////////////////////////////////////////////////
  1122. HRESULT
  1123. XMLStream::parsePCData()
  1124. {
  1125. HRESULT hr = S_OK;
  1126. switch (_sSubState)
  1127. {
  1128. case 0:
  1129. _fWhitespace = true;
  1130. _sSubState = 1;
  1131. // fall through;
  1132. case 1:
  1133. // This state is used when we are not normalizing white space. This
  1134. // is a separate state for performance reasons.
  1135. // Normalizing whitespace is about 11% slower.
  1136. while (_chLookahead != L'<' && ! _fEOF )
  1137. {
  1138. if (_chLookahead == L'&')
  1139. {
  1140. // then parse entity ref and then return
  1141. // to state 1 to continue with PCDATA.
  1142. return push(&XMLStream::parseEntityRef,1);
  1143. }
  1144. if (_chLookahead == L'>')
  1145. {
  1146. WCHAR* pText = NULL;
  1147. long len = 0;
  1148. _pInput->getToken((const WCHAR**)&pText, &len);
  1149. //if (len >= 2 && StrCmpN(L"]]", pText + len - 2, 2) == 0)
  1150. if ((len >= 2) && (::FusionpCompareStrings(L"]]", 2, pText + len - 2, 2, false)==0))
  1151. return XML_E_INVALID_CDATACLOSINGTAG;
  1152. }
  1153. // This slows us down too much.
  1154. // else if (! isCharData(_chLookahead))
  1155. // {
  1156. // return XML_E_BADCHARDATA;
  1157. // }
  1158. hr = _pInput->scanPCData(&_chLookahead, &_fWhitespace);
  1159. if (FAILED(hr))
  1160. {
  1161. if (hr == E_PENDING)
  1162. {
  1163. hr = S_OK;
  1164. ADVANCE;
  1165. }
  1166. return hr;
  1167. }
  1168. checkhr2(hr);
  1169. }
  1170. _sSubState = 2;
  1171. // fall through
  1172. case 2:
  1173. if (_pInput->getTokenLength() > 0 || _fUsingBuffer)
  1174. {
  1175. _nToken = _fWhitespace ? XML_WHITESPACE : XML_PCDATA;
  1176. }
  1177. checkhr2(pop());
  1178. break;
  1179. default:
  1180. INTERNALERROR;
  1181. }
  1182. return S_OK;
  1183. }
  1184. ////////////////////////////////////////////////////////////////////////
  1185. HRESULT
  1186. XMLStream::parseEntityRef()
  1187. {
  1188. HRESULT hr = S_OK;
  1189. long entityLen = 0, lLen = 1;
  1190. const WCHAR* t = NULL;
  1191. long len = 0;
  1192. Start:
  1193. switch (_sSubState)
  1194. {
  1195. case 0: // ^ ( '&#' [0-9]+ ) | ('&#X' [0-9a-fA-F]+) | ('&' Name) ';'
  1196. _nPreToken = XML_PENDING;
  1197. _lEntityPos = _pInput->getTokenLength(); // record entity position.
  1198. _fPCDataPending = (_lEntityPos > 0);
  1199. if (PreEntityText())
  1200. {
  1201. // remember the pending text before parsing the entity.
  1202. _nPreToken = _nToken;
  1203. _nToken = XML_PENDING;
  1204. }
  1205. _sSubState = 1;
  1206. // fall through
  1207. case 1:
  1208. ADVANCE; // soak up the '&'
  1209. _sSubState = 2;
  1210. // fall through
  1211. case 2:
  1212. checkeof(_chLookahead, XML_E_UNEXPECTEDEOF);
  1213. if (_chLookahead == L'#')
  1214. {
  1215. ADVANCE;
  1216. _sSubState = 3;
  1217. // fall through
  1218. }
  1219. else
  1220. {
  1221. // Loose entity parsing allows "...&6..."
  1222. if (! isStartNameChar(_chLookahead))
  1223. {
  1224. /*
  1225. if (_fFloatingAmp)
  1226. {
  1227. // then it isn't an entity reference, so go back to PCDATA
  1228. if (_fUsingBuffer)
  1229. {
  1230. // this in case we are normalizing white space.
  1231. PushChar(L'&');
  1232. }
  1233. _fWhitespace = false;
  1234. checkhr2(pop());
  1235. return S_OK;
  1236. }
  1237. else */
  1238. if (ISWHITESPACE(_chLookahead))
  1239. return XML_E_UNEXPECTED_WHITESPACE;
  1240. else
  1241. return XML_E_BADSTARTNAMECHAR;
  1242. }
  1243. checkhr2(push(&XMLStream::parseName, 6));
  1244. _sSubState = 1; // avoid doing a mark() so we can return PCDATA if necessary.
  1245. return parseName();
  1246. }
  1247. break;
  1248. // ------------- Numeric entity references --------------------
  1249. case 3:
  1250. checkeof(_chLookahead, XML_E_UNEXPECTEDEOF);
  1251. if (_chLookahead == L'x')
  1252. {
  1253. // hex character reference.
  1254. ADVANCE;
  1255. STATE(5); // go to state 5
  1256. }
  1257. _sSubState = 4;
  1258. // fall through
  1259. case 4: // '&#' ^ [0-9]+ ';'
  1260. checkhr2(ScanDecimalDigits());
  1261. if (_chLookahead != L';')
  1262. {
  1263. STATE(9);
  1264. }
  1265. entityLen = _pInput->getTokenLength() - _lEntityPos;
  1266. getToken(&t, &len);
  1267. checkhr2(DecimalToUnicode(t + _lEntityPos + 2, entityLen - 2, _wcEntityValue));
  1268. lLen = 2;
  1269. _nToken = XML_NUMENTITYREF;
  1270. GOTOSTART(10); // have to use GOTOSTART() because we want to use the values of t and len
  1271. break;
  1272. case 5: // '&#X' ^ [0-9a-fA-F]+
  1273. checkhr2(ScanHexDigits());
  1274. if (_chLookahead != L';')
  1275. {
  1276. STATE(9);
  1277. }
  1278. entityLen = _pInput->getTokenLength() - _lEntityPos;
  1279. getToken(&t, &len);
  1280. checkhr2(HexToUnicode(t + _lEntityPos + 3, entityLen - 3, _wcEntityValue));
  1281. lLen = 3;
  1282. _nToken = XML_HEXENTITYREF;
  1283. GOTOSTART(10); // have to use GOTOSTART() because we want to use the values of t and len
  1284. break;
  1285. // ------------- Named Entity References --------------------
  1286. case 6: // '&' Name ^ ';'
  1287. checkeof(_chLookahead, XML_E_UNEXPECTEDEOF);
  1288. if (_chLookahead != L';')
  1289. {
  1290. STATE(9);
  1291. }
  1292. // If parseName found a namespace then we need to calculate the
  1293. // real nslen taking the pending PC data and '&' into account
  1294. // and remember this in case we have to return the PCDATA.
  1295. _nEntityNSLen = (_lNslen > 0) ? _lNslen - _lEntityPos - 1 : 0;
  1296. _fUsingBuffer = false;
  1297. entityLen = _pInput->getTokenLength() - _lEntityPos;
  1298. getToken(&t, &len);
  1299. if (0 != (_wcEntityValue = BuiltinEntity(t + _lEntityPos + 1, entityLen - 1))) //||
  1300. //(_fIE4Quirks && 0xFFFF != (_wcEntityValue = LookupBuiltinEntity(t + _lEntityPos + 1, entityLen - 1))))
  1301. {
  1302. lLen = 1;
  1303. _nToken = XML_BUILTINENTITYREF;
  1304. GOTOSTART(10); // have to use GOTOSTART() because we want to use the values of t and len
  1305. }
  1306. else //xiaoyu : Fusion XML Parser does not support external ref,
  1307. // so, if it is not a builtIn ref, we would return error
  1308. return XML_E_MISSINGSEMICOLON;
  1309. break;
  1310. //xiaoyu : Fusion XML Parser does not support external ref
  1311. /*
  1312. if (_nPreToken != XML_PENDING)
  1313. {
  1314. // Return previous token (XML_PCDATA or XML_WHITESPACE)
  1315. _lLengthDelta = -entityLen;
  1316. _lMarkDelta = entityLen - 1; // don't include '&' in _nToken.
  1317. _nToken = _nPreToken;
  1318. STATE(7);
  1319. }
  1320. mark(entityLen-1); // don't include '&' in _nToken.
  1321. _sSubState = 7;
  1322. // fall through
  1323. case 7:
  1324. ADVANCE; // soak up the ';'
  1325. _nToken = XML_ENTITYREF;
  1326. _lNslen = _nEntityNSLen;
  1327. _lLengthDelta = -1; // don't include the ';'
  1328. STATE(8); // return token and resume in state 8.
  1329. break;
  1330. */
  1331. case 8:
  1332. mark();
  1333. checkhr2(pop());
  1334. return S_OK;
  1335. /*
  1336. case 9:
  1337. // Soft entity handling - we just continue with PCDATA in
  1338. // this case.
  1339. if (_fFloatingAmp)
  1340. {
  1341. if (_fUsingBuffer)
  1342. {
  1343. // this in case we are normalizing white space. In this case
  1344. // we have to copy what we have so far to the normalized buffer.
  1345. long endpos = _pInput->getTokenLength();
  1346. const WCHAR* t; long len;
  1347. getToken(&t, &len);
  1348. for (long i = _lEntityPos; i < endpos; i++)
  1349. PushChar(t[i]);
  1350. }
  1351. _fWhitespace = false;
  1352. checkhr2(pop());
  1353. return S_OK;
  1354. }
  1355. else
  1356. return XML_E_MISSINGSEMICOLON;
  1357. break;
  1358. */
  1359. case 10:
  1360. // Return the text before builtin or char entityref as XML_PCDATA
  1361. if (_nPreToken)
  1362. {
  1363. _nPreToken = _nToken;
  1364. _nToken = XML_PCDATA;
  1365. _lLengthDelta = -entityLen;
  1366. _lMarkDelta = entityLen - lLen; // don't include '&' in _nToken.
  1367. STATE(11); // return token and resume in state 12.
  1368. }
  1369. else
  1370. {
  1371. _nPreToken = _nToken;
  1372. mark(entityLen - lLen);
  1373. GOTOSTART(11);
  1374. }
  1375. break;
  1376. case 11:
  1377. // push the builtin entity
  1378. _fUsingBuffer = true;
  1379. PushChar(_wcEntityValue);
  1380. _nToken = _nPreToken;
  1381. STATE(12); // return token and resume in state 12.
  1382. break;
  1383. case 12:
  1384. ADVANCE; // soak up the ';'
  1385. STATE(8); // resume in state 8.
  1386. break;
  1387. default:
  1388. INTERNALERROR;
  1389. }
  1390. return S_OK;
  1391. }
  1392. ////////////////////////////////////////////////////////////////////////
  1393. HRESULT
  1394. XMLStream::pushTable(short substate, const StateEntry* table, DWORD le)
  1395. {
  1396. HRESULT hr = S_OK;
  1397. checkhr2(push(&XMLStream::parseTable, substate));
  1398. _pTable = table;
  1399. UNUSED(le);
  1400. //_lEOFError = le;
  1401. return hr;
  1402. }
  1403. ////////////////////////////////////////////////////////////////////////
  1404. HRESULT
  1405. XMLStream::push(StateFunc f, short s)
  1406. {
  1407. StateInfo* pSI = _pStack.push();
  1408. if (pSI == NULL)
  1409. return E_OUTOFMEMORY;
  1410. pSI->_sSubState = s;
  1411. pSI->_fnState = _fnState;
  1412. pSI->_pTable = _pTable;
  1413. pSI->_cStreamDepth = _cStreamDepth;
  1414. _sSubState = 0;
  1415. _fnState = f;
  1416. return S_OK;
  1417. }
  1418. ////////////////////////////////////////////////////////////////////////
  1419. HRESULT
  1420. XMLStream::pop(bool boundary)
  1421. {
  1422. StateInfo* pSI = _pStack.peek();
  1423. // prefix bug fix : xiaoyuw@08/29/00
  1424. ASSERT_NTC(pSI != NULL);
  1425. if (_fDTD &&
  1426. ! (_fParsingAttDef) && boundary && _cStreamDepth != pSI->_cStreamDepth) // _fParsingNames ||
  1427. {
  1428. // If we are in a PE and we are popping out to a state that is NOT in a PE
  1429. // and this is a pop where we need to check this condition, then return an error.
  1430. // For example, the following is not well formed because the parameter entity
  1431. // pops us out of the ContentModel state in which the PE was found:
  1432. // <!DOCTYPE foo [
  1433. // <!ENTITY % foo "a)">
  1434. // <!ELEMENT bar ( %foo; >
  1435. // ]>...
  1436. return XML_E_PE_NESTING;
  1437. }
  1438. _fnState = pSI->_fnState;
  1439. _sSubState = pSI->_sSubState;
  1440. _pTable = pSI->_pTable;
  1441. //_lEOFError = pSI->_lEOFError;
  1442. _pStack.pop();
  1443. return S_OK;
  1444. }
  1445. ////////////////////////////////////////////////////////////////////////
  1446. HRESULT
  1447. XMLStream::switchTo(StateFunc f)
  1448. {
  1449. HRESULT hr;
  1450. // Make sure we keep the old stream depth.
  1451. StateInfo* pSI = _pStack.peek();
  1452. // prefix bug fix : xiaoyuw@08/29/00
  1453. ASSERT_NTC(pSI != NULL);
  1454. int currentDepth = _cStreamDepth;
  1455. _cStreamDepth = pSI->_cStreamDepth;
  1456. checkhr2(pop(false));
  1457. checkhr2(push(f,_sSubState)); // keep return to _sSubState the same
  1458. _cStreamDepth = currentDepth;
  1459. return (this->*f)();
  1460. }
  1461. ////////////////////////////////////////////////////////////////////////
  1462. HRESULT
  1463. XMLStream::parseCondSect()
  1464. {
  1465. HRESULT hr = S_OK;
  1466. switch (_sSubState)
  1467. {
  1468. case 0:
  1469. ADVANCE; // soak up the '[' character
  1470. //if (_fFoundPEREf) return S_OK;
  1471. _sSubState = 1;
  1472. // fall through
  1473. case 1: // now match magic '[CDATA[' sequence.
  1474. checkeof(_chLookahead, XML_E_UNCLOSEDMARKUPDECL);
  1475. if (_chLookahead == L'C')
  1476. {
  1477. _pchCDataState = g_pstrCDATA;
  1478. STATE(5); // goto state 5
  1479. }
  1480. _sSubState = 2; // must be IGNORE, INCLUDE or %pe;
  1481. // fall through
  1482. case 2: // must be DTD markup declaration
  1483. // '<![' ^ S? ('INCLUDE' | 'IGNORE' | %pe;) S? [...]]> or
  1484. // skip optional whitespace
  1485. //if (_fInternalSubset)
  1486. // return XML_E_CONDSECTINSUBSET;
  1487. checkeof(_chLookahead, XML_E_EXPECTINGOPENBRACKET);
  1488. checkhr2(push(&XMLStream::skipWhiteSpace, 3));
  1489. return skipWhiteSpace(); // must return because of %pe;
  1490. case 3:
  1491. checkeof(_chLookahead, XML_E_UNCLOSEDMARKUPDECL);
  1492. checkhr2(push(&XMLStream::parseName,4));
  1493. return parseName();
  1494. case 4: // scanned 'INCLUDE' or 'IGNORE'
  1495. {
  1496. const WCHAR* t = NULL;
  1497. long len = 0;
  1498. getToken(&t,&len);
  1499. //if (StringEquals(L"IGNORE",t,len,false))
  1500. //{
  1501. // return switchTo(&XMLStream::parseIgnoreSect);
  1502. //}
  1503. //else if (StringEquals(L"INCLUDE",t,len,false))
  1504. //{
  1505. // return switchTo(&XMLStream::parseIncludeSect);
  1506. //}
  1507. //else
  1508. return XML_E_BADENDCONDSECT;
  1509. }
  1510. break;
  1511. case 5: // parse CDATA name
  1512. while (*_pchCDataState != 0 && _chLookahead == *_pchCDataState && ! _fEOF)
  1513. {
  1514. ADVANCE; // advance first, before incrementing _pchCDataState
  1515. _pchCDataState++; // so that this state is re-entrant in the E_PENDING case.
  1516. checkeof(_chLookahead, XML_E_UNCLOSEDMARKUPDECL);
  1517. }
  1518. if (*_pchCDataState != 0)
  1519. {
  1520. // must be INCLUDE or IGNORE section so go to state 2.
  1521. _sSubState = 2;
  1522. }
  1523. else if (_chLookahead != L'[')
  1524. {
  1525. return XML_E_EXPECTINGOPENBRACKET;
  1526. }
  1527. else if (_fDTD)
  1528. return XML_E_CDATAINVALID;
  1529. else
  1530. return switchTo(&XMLStream::parseCData);
  1531. return S_OK;
  1532. break;
  1533. default:
  1534. INTERNALERROR;
  1535. }
  1536. return S_OK;
  1537. }
  1538. ////////////////////////////////////////////////////////////////////////
  1539. HRESULT
  1540. XMLStream::parseCData()
  1541. {
  1542. HRESULT hr = S_OK;
  1543. switch (_sSubState)
  1544. {
  1545. case 0:
  1546. ADVANCE; // soak up the '[' character.
  1547. mark(); // don't include 'CDATA[' in CDATA text
  1548. _sSubState = 1;
  1549. // fall through
  1550. case 1:
  1551. while (_chLookahead != L']' && ! _fEOF)
  1552. {
  1553. // scanPCData will stop when it sees a ']' character.
  1554. hr = _pInput->scanPCData(&_chLookahead, &_fWhitespace);
  1555. if (FAILED(hr))
  1556. {
  1557. if (hr == E_PENDING)
  1558. {
  1559. hr = S_OK;
  1560. ADVANCE;
  1561. }
  1562. return hr;
  1563. }
  1564. }
  1565. checkeof(_chLookahead, XML_E_UNCLOSEDCDATA);
  1566. _sSubState = 2;
  1567. // fall through
  1568. case 2:
  1569. ADVANCE; // soak up first L']' character.
  1570. checkeof(_chLookahead, XML_E_UNCLOSEDCDATA);
  1571. if (_chLookahead != L']')
  1572. {
  1573. // must have been floating ']' character, so
  1574. // return to state 1.
  1575. STATE(1);
  1576. }
  1577. _sSubState = 3;
  1578. // fall through
  1579. case 3:
  1580. ADVANCE; // soak up second ']' character.
  1581. checkeof(_chLookahead, XML_E_UNCLOSEDCDATA);
  1582. if (_chLookahead == L']')
  1583. {
  1584. // Ah, an extra ']' character, tricky !!
  1585. // In this case we stay in state 3 until we find a non ']' character
  1586. // so you can terminate a CDATA section with ']]]]]]]]]]]]]]]]>'
  1587. // and everying except the final ']]>' is treated as CDATA.
  1588. STATE(3);
  1589. }
  1590. else if (_chLookahead != L'>')
  1591. {
  1592. // must have been floating "]]" pair, so
  1593. // return to state 1.
  1594. STATE(1);
  1595. }
  1596. _sSubState = 4;
  1597. // fall through
  1598. case 4:
  1599. ADVANCE; // soak up the '>'
  1600. _nToken = XML_CDATA;
  1601. _lLengthDelta = -3; // don't include terminating ']]>' in text.
  1602. checkhr2(pop()); // return to parseContent.
  1603. return S_OK;
  1604. break;
  1605. default:
  1606. INTERNALERROR;
  1607. }
  1608. return S_OK;
  1609. }
  1610. ////////////////////////////////////////////////////////////////////////
  1611. HRESULT
  1612. XMLStream::parseEquals()
  1613. {
  1614. HRESULT hr = S_OK;
  1615. switch (_sSubState)
  1616. {
  1617. case 0: // Eq ::= S? '=' S?
  1618. if (ISWHITESPACE(_chLookahead))
  1619. {
  1620. // allow whitespace between attribute and '='
  1621. checkhr2(push(&XMLStream::skipWhiteSpace, 1));
  1622. checkhr2( skipWhiteSpace() );
  1623. }
  1624. _sSubState = 1;
  1625. // fall through
  1626. case 1:
  1627. if (_chLookahead != L'=')
  1628. {
  1629. return XML_E_MISSINGEQUALS;
  1630. }
  1631. ADVANCE;
  1632. if (ISWHITESPACE(_chLookahead))
  1633. {
  1634. // allow whitespace between '=' and attribute value.
  1635. checkhr2(push(&XMLStream::skipWhiteSpace, 2));
  1636. checkhr2( skipWhiteSpace() );
  1637. }
  1638. _sSubState = 2;
  1639. // fall through
  1640. case 2:
  1641. checkhr2(pop(false));
  1642. break;
  1643. default:
  1644. INTERNALERROR;
  1645. }
  1646. return S_OK;
  1647. }
  1648. ////////////////////////////////////////////////////////////////////////
  1649. HRESULT
  1650. XMLStream::parseTable()
  1651. {
  1652. HRESULT hr = S_OK;
  1653. while (hr == S_OK && _nToken == XML_PENDING)
  1654. {
  1655. const StateEntry* pSE = &_pTable[_sSubState];
  1656. DWORD newState = pSE->_sGoto;
  1657. switch (pSE->_sOp)
  1658. {
  1659. case OP_WS:
  1660. //checkeof(_chLookahead, _lEOFError);
  1661. if (! ISWHITESPACE(_chLookahead))
  1662. return XML_E_MISSINGWHITESPACE;
  1663. // fall through
  1664. case OP_OWS:
  1665. //checkeof(_chLookahead, _lEOFError);
  1666. checkhr2(push(&XMLStream::skipWhiteSpace, (short)newState));
  1667. checkhr2(skipWhiteSpace());
  1668. //if (_fFoundPEREf) return XML_E_FOUNDPEREF;
  1669. break;
  1670. case OP_CHARWS:
  1671. //if (_fFoundPEREf) return S_OK;
  1672. mark();
  1673. //checkeof(_chLookahead, _lEOFError);
  1674. if (_chLookahead == pSE->_pch[0])
  1675. {
  1676. ADVANCE;
  1677. newState = pSE->_sGoto;
  1678. _nToken = pSE->_lDelta;
  1679. }
  1680. else if (! ISWHITESPACE(_chLookahead))
  1681. {
  1682. return XML_E_WHITESPACEORQUESTIONMARK;
  1683. }
  1684. else
  1685. newState = pSE->_sArg1;
  1686. break;
  1687. case OP_CHAR:
  1688. //if (_fFoundPEREf) return S_OK;
  1689. mark();
  1690. case OP_CHAR2:
  1691. //if (_fFoundPEREf) return S_OK;
  1692. //checkeof(_chLookahead, _lEOFError);
  1693. if (_chLookahead == pSE->_pch[0])
  1694. {
  1695. ADVANCE;
  1696. newState = pSE->_sGoto;
  1697. _nToken = pSE->_lDelta;
  1698. //if (_nToken == XML_GROUP)
  1699. //_nAttrType = XMLTYPE_NMTOKEN;
  1700. }
  1701. else
  1702. {
  1703. newState = pSE->_sArg1;
  1704. if (newState >= XML_E_PARSEERRORBASE &&
  1705. ISWHITESPACE(_chLookahead))
  1706. return XML_E_UNEXPECTED_WHITESPACE;
  1707. }
  1708. break;
  1709. case OP_PEEK:
  1710. //if (_fFoundPEREf) return S_OK;
  1711. //checkeof(_chLookahead, _lEOFError);
  1712. if (_chLookahead == pSE->_pch[0])
  1713. {
  1714. newState = pSE->_sGoto;
  1715. }
  1716. else
  1717. newState = pSE->_sArg1;
  1718. break;
  1719. case OP_NAME:
  1720. //if (_fFoundPEREf) return S_OK;
  1721. //checkeof(_chLookahead, _lEOFError);
  1722. checkhr2(push(&XMLStream::parseName, (short)newState));
  1723. checkhr2(parseName());
  1724. break;
  1725. case OP_TOKEN:
  1726. _nToken = pSE->_sArg1;
  1727. _lLengthDelta = pSE->_lDelta;
  1728. break;
  1729. case OP_POP:
  1730. _lLengthDelta = pSE->_lDelta;
  1731. if (_lLengthDelta == 0) mark();
  1732. // The _lDelta field contains a boolean flag to tell us whether this
  1733. // pop needs to check for parameter entity boundary or not.
  1734. checkhr2(pop(pSE->_lDelta == 0)); // we're done !
  1735. _nToken = pSE->_sArg1;
  1736. //_nAttrType = XMLTYPE_CDATA;
  1737. return S_OK;
  1738. case OP_STRCMP:
  1739. {
  1740. const WCHAR* t = NULL;
  1741. long len = 0;
  1742. getToken(&t,&len);
  1743. long delta = (pSE->_lDelta < 0) ? pSE->_lDelta : 0;
  1744. //if (StringEquals(pSE->_pch,t,len+delta,_fCaseInsensitive))
  1745. if (::FusionpCompareStrings(pSE->_pch, len+delta, t, len+delta, _fCaseInsensitive)==0)
  1746. {
  1747. if (pSE->_lDelta > 0)
  1748. {
  1749. _nToken = pSE->_lDelta;
  1750. _lLengthDelta = 0;
  1751. }
  1752. newState = pSE->_sGoto;
  1753. }
  1754. else
  1755. newState = pSE->_sArg1;
  1756. }
  1757. break;
  1758. case OP_COMMENT:
  1759. return push(&XMLStream::parseComment, (short)newState);
  1760. break;
  1761. case OP_CONDSECT:
  1762. //if (_fFoundPEREf) return S_OK;
  1763. // parse <![CDATA[...]]> or <![IGNORE[...]]>
  1764. return push(&XMLStream::parseCondSect, (short)newState);
  1765. case OP_SNCHAR:
  1766. //checkeof(_chLookahead, _lEOFError);
  1767. if (isStartNameChar(_chLookahead))
  1768. {
  1769. newState = pSE->_sGoto;
  1770. }
  1771. else
  1772. newState = pSE->_sArg1;
  1773. break;
  1774. case OP_EQUALS:
  1775. //if (_fFoundPEREf) return S_OK;
  1776. //checkeof(_chLookahead, _lEOFError);
  1777. checkhr2(push(&XMLStream::parseEquals, (short)newState));
  1778. checkhr2(parseEquals());
  1779. break;
  1780. case OP_ENCODING:
  1781. {
  1782. const WCHAR* t = NULL; // prefix bug fix, xiaoyuw@08/29/00
  1783. long len = 0; // prefix bug fix, xiaoyuw@08/29/00
  1784. checkhr2(_pInput->getToken(&t,&len));
  1785. checkhr2(_pInput->switchEncoding(t, len+pSE->_lDelta));
  1786. }
  1787. break;
  1788. case OP_ATTRVAL:
  1789. //if (_fFoundPEREf) return S_OK;
  1790. if (_chLookahead != L'"' && _chLookahead != L'\'')
  1791. {
  1792. return XML_E_MISSINGQUOTE;
  1793. }
  1794. _chTerminator = _chLookahead;
  1795. ADVANCE;
  1796. mark();
  1797. _fReturnAttributeValue = (pSE->_sArg1 == 1);
  1798. //checkeof(_chLookahead, _lEOFError);
  1799. return push(&XMLStream::parseAttrValue, (short)newState);
  1800. break;
  1801. } // end of switch
  1802. if (_fnState != &XMLStream::parseTable)
  1803. return S_OK;
  1804. if (newState >= XML_E_PARSEERRORBASE)
  1805. return (HRESULT)newState;
  1806. else
  1807. _sSubState = (short)newState;
  1808. } // end of while
  1809. if (_nToken == XMLStream::XML_ENDDECL)
  1810. {
  1811. return _pInput->UnFreeze();
  1812. }
  1813. return S_OK;
  1814. }
  1815. ////////////////////////////////////////////////////////////////////////
  1816. HRESULT
  1817. XMLStream::_PushChar(WCHAR ch)
  1818. {
  1819. // buffer needs to grow.
  1820. long newsize = (_lBufSize+512)*2 ;
  1821. WCHAR* newbuf = NEW ( WCHAR[newsize]);
  1822. if (newbuf == NULL)
  1823. return E_OUTOFMEMORY;
  1824. if (_pchBuffer != NULL){
  1825. ::memcpy(newbuf, _pchBuffer, sizeof(WCHAR)*_lBufLen);
  1826. delete[] _pchBuffer;
  1827. }
  1828. _lBufSize = newsize;
  1829. _pchBuffer = newbuf;
  1830. _pchBuffer[_lBufLen++] = ch;
  1831. return S_OK;
  1832. }
  1833. ////////////////////////////////////////////////////////////////////////
  1834. HRESULT
  1835. XMLStream::AdvanceTo(short substate)
  1836. {
  1837. // This method combines and advance with a state switch in one
  1838. // atomic operation that handles the E_PENDING case properly.
  1839. _sSubState = substate;
  1840. //HRESULT hr = (!_fDTD) ? _pInput->nextChar(&_chLookahead, &_fEOF) : DTDAdvance();
  1841. HRESULT hr = _pInput->nextChar(&_chLookahead, &_fEOF);
  1842. if ((hr == static_cast<HRESULT>(E_PENDING)) ||
  1843. (hr == static_cast<HRESULT>(E_DATA_AVAILABLE)) ||
  1844. (hr == static_cast<HRESULT>(E_DATA_REALLOCATE)) ||
  1845. (hr == static_cast<HRESULT>(XML_E_FOUNDPEREF)))
  1846. {
  1847. // Then we must do an advance next time around before continuing
  1848. // with previous state. Push will save the _sSubState and return
  1849. // to it.
  1850. push(&XMLStream::firstAdvance,substate);
  1851. }
  1852. return hr;
  1853. }
  1854. ////////////////////////////////////////////////////////////////////////
  1855. bool
  1856. XMLStream::PreEntityText()
  1857. {
  1858. // This is a helper function that calculates whether or not to
  1859. // return some PCDATA or WHITEPACE before an entity reference.
  1860. if (_fPCDataPending)
  1861. {
  1862. // return what we have so far.
  1863. //if (_fWhitespace && ! _fIE4Quirks) // in IE4 mode we do not have WHITESPACE nodes
  1864. // and entities are always resolved, so return
  1865. // the leading whitespace as PCDATA.
  1866. if (_fWhitespace )
  1867. _nToken = XML_WHITESPACE;
  1868. else
  1869. _nToken = XML_PCDATA;
  1870. long entityLen = _pInput->getTokenLength() - _lEntityPos;
  1871. _lLengthDelta = -entityLen;
  1872. _lMarkDelta = entityLen;
  1873. _fPCDataPending = false;
  1874. _fWhitespace = true;
  1875. return true;
  1876. }
  1877. return false;
  1878. }
  1879. ////////////////////////////////////////////////////////////////////////
  1880. HRESULT
  1881. XMLStream::ErrorCallback(HRESULT hr)
  1882. {
  1883. if (hr == static_cast<HRESULT>(E_DATA_AVAILABLE))
  1884. hr = static_cast<HRESULT>(XML_DATAAVAILABLE);
  1885. else if (hr == static_cast<HRESULT>(E_DATA_REALLOCATE))
  1886. hr = static_cast<HRESULT>(XML_DATAREALLOCATE);
  1887. return _pXMLParser->ErrorCallback(hr);
  1888. }