Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

302 lines
9.7 KiB

  1. /*
  2. *
  3. * Copyright (c) 1998,1999 Microsoft Corporation. All rights reserved.
  4. * EXEMPT: copyright change only, no build required
  5. *
  6. */
  7. #ifndef _XMLSTREAM_HXX
  8. #define _XMLSTREAM_HXX
  9. #pragma once
  10. #include "bufferedstream.hxx"
  11. #include "encodingstream.hxx"
  12. #include "_rawstack.hxx"
  13. class XMLParser;
  14. // the XMLStream class uses the error code and token types defined in the xmlparser
  15. #include <ole2.h>
  16. #include <xmlparser.h>
  17. //==============================================================================
  18. // This enum and StateEntry struct are used in table driven parsing for DTD
  19. // stuff - so that the parser isn't bloated by this stuff. This is about 15%
  20. // slower than a hand written parser.
  21. typedef enum {
  22. OP_OWS, // optional whitespace
  23. OP_WS, // required whitespace
  24. OP_CHAR, // char comparison, _pch[0] is char, _sArg1 is else goto state or error code
  25. OP_CHAR2, // same os OP_CHAR - except it doesn't do _pInput->Mark.
  26. OP_PEEK, // same as OP_CHAR - except it doesn't advance.
  27. OP_NAME, // scan name
  28. OP_TOKEN, // return token, _sArg1 = token
  29. OP_STRING, // scan a string
  30. OP_EXTID, // scan an external id.
  31. OP_STRCMP, // string comparison.
  32. OP_POP, // pop state
  33. OP_NWS, // not whitespace conditional
  34. OP_SUBSET, // skip an internal subset
  35. OP_PUBIDOPTION, // conditional for _fShortPubIdOption
  36. OP_NMTOKEN,
  37. OP_TABLE, // push a new table. (pointer in _pch field).
  38. OP_STABLE, // switch to new table. (pointer in _pch field).
  39. OP_COMMENT,
  40. OP_CONDSECT,
  41. OP_SNCHAR, // conditional 'is start name char'
  42. OP_EQUALS, // scan ' = '
  43. OP_ENCODING, // switch encoding.
  44. OP_CHARWS, // match char or must be white space.
  45. OP_ATTRVAL, //parse attribute values.(_sArg1 = return PCDATA token or not)
  46. OP_PETEST,
  47. OP_ATTEXPAND,
  48. OP_NMSTRING, // unqualified name within quote
  49. OP_FAKESYSTEM,
  50. } OPCODE;
  51. typedef struct {
  52. OPCODE _sOp;
  53. const WCHAR* _pch;
  54. DWORD _sGoto;
  55. DWORD _sArg1;
  56. long _lDelta; // for when we do a Mark(), or Token if OP_CHAR
  57. } StateEntry;
  58. //================================================================================
  59. class XMLStream
  60. {
  61. public:
  62. XMLStream(XMLParser * pXMLParser);
  63. ~XMLStream();
  64. //------------------------------------------------------------------------
  65. // These are some more tokens that the XMLStream returns.
  66. // xiaoyu : only few are used in fusion manifest
  67. typedef enum
  68. {
  69. // ADDITIONAL TOKENS THAT THE PARSER PULLS UP
  70. XML_PENDING = 0, // still parsing.
  71. XML_NUMENTITYREF = XML_LASTSUBNODETYPE, // &23;
  72. XML_HEXENTITYREF, // &x0cf7;
  73. XML_BUILTINENTITYREF, //&gt;
  74. XML_TAGEND, // >
  75. XML_EMPTYTAGEND, // /> (text = tag name)
  76. XML_ENDTAG, // </ (text = tag name)
  77. XML_ENDPI, // text = pi body minus '?>'
  78. XML_ENDXMLDECL, // end of xml declaration
  79. XML_ENDDECL, // '>'
  80. XML_CLOSEPAREN,
  81. XML_ENDCONDSECT, // ']]>'
  82. XML_STARTDTDSUBSET,
  83. XML_ENDPROLOG,
  84. XML_DATAAVAILABLE,
  85. XML_DATAREALLOCATE,
  86. } XMLToken;
  87. HRESULT PushStream(
  88. /* [in] */ EncodingStream *pStm,
  89. /* [in] */ bool fExternalPE);
  90. HRESULT AppendData(
  91. /* [in] */ const BYTE *buffer,
  92. /* [in] */ long length,
  93. /* [in] */ BOOL lastBuffer);
  94. HRESULT Reset( void);
  95. HRESULT GetNextToken(
  96. /* [out] */ DWORD *token,
  97. /* [out] */ const WCHAR **text,
  98. /* [out] */ long *length,
  99. /* [out] */ long *nslen);
  100. ULONG GetLine();
  101. ULONG GetLinePosition();
  102. ULONG GetInputPosition();
  103. HRESULT GetLineBuffer(
  104. /* [out] */ const WCHAR * *buf,
  105. /* [out] */ ULONG* len,
  106. /* [out] */ ULONG* startpos);
  107. void SetFlags(
  108. /* [in] */ unsigned short usFlags);
  109. unsigned short GetFlags();
  110. HRESULT ErrorCallback(HRESULT hr);
  111. WCHAR getAttrValueQuoteChar() { return _chTerminator; }
  112. private:
  113. HRESULT init();
  114. void _init();
  115. HRESULT firstAdvance();
  116. HRESULT parseContent();
  117. HRESULT parseElement();
  118. HRESULT parseEndTag();
  119. HRESULT parsePI();
  120. HRESULT parseComment();
  121. HRESULT parseName();
  122. HRESULT parseAttributes();
  123. HRESULT parseAttrValue();
  124. HRESULT parsePCData();
  125. HRESULT parseEntityRef();
  126. HRESULT parseCondSect();
  127. HRESULT parseCData();
  128. HRESULT parseTable();
  129. HRESULT parseEquals();
  130. HRESULT skipWhiteSpace();
  131. inline void mark(long back = 0) { _pInput->Mark(back); }
  132. typedef HRESULT (XMLStream::* StateFunc)();
  133. // The state machine consists of functions where each
  134. // function can determine for itself its own substates
  135. // so that when it is reactivated by a pop() it can pick
  136. // up where it left off. The current substate is set
  137. // to zero on a push() and at pop() time it is restored
  138. // to whatever it was told to be in the push().
  139. HRESULT push(StateFunc f, short substate = 0);
  140. HRESULT pushTable(short substate = 0, const StateEntry* table = NULL, DWORD le = 0);
  141. HRESULT pop(bool boundary = true);
  142. HRESULT switchTo(StateFunc f); // pop & push
  143. // Advance and jump to state
  144. HRESULT AdvanceTo(short substate);
  145. HRESULT PopStream();
  146. HRESULT ScanHexDigits();
  147. HRESULT ScanDecimalDigits();
  148. bool PreEntityText();
  149. // Always use this function instead of calling _pInput->getToken
  150. inline void getToken(const WCHAR** ppText, long* pLen) { _pInput->getToken(ppText,pLen); }
  151. BufferedStream* getCurrentStream();
  152. StateFunc _fnState; // current function.
  153. short _sSubState; // current substate.
  154. short _sSavedState;
  155. struct StateInfo
  156. {
  157. StateFunc _fnState;
  158. short _sSubState;
  159. const StateEntry* _pTable;
  160. //DWORD _lEOFError;
  161. int _cStreamDepth;
  162. };
  163. _rawstack<StateInfo> _pStack;
  164. struct InputInfo
  165. {
  166. BufferedStream* _pInput;
  167. WCHAR _chLookahead;
  168. //bool _fPE;
  169. //bool _fExternalPE;
  170. //bool _fInternalSubset; // remember that we were in internal subset.
  171. StateFunc _fnState; // remember the state function when pushstream
  172. // it is used to check parameter entity replacement text
  173. // is properly nested with markup declarations.
  174. };
  175. _rawstack<InputInfo> _pStreams;
  176. // Cache the current value of _pStreams.used() which is used to making sure
  177. // a parameter entity doesn't pop out of the scope in which it was entered.
  178. int _cStreamDepth;
  179. BufferedStream* _pInput; // current input stream.
  180. WCHAR _chNextLookahead;
  181. bool _fWasUsingBuffer;
  182. long _lParseStringLevel;
  183. DWORD _nPreToken;
  184. DWORD _nToken;
  185. long _lLengthDelta; // amount to adjust token length by
  186. long _lMarkDelta; // amount to adjust mark position by
  187. bool _fDelayMark;
  188. bool _fFoundFirstElement; // special trick for EndProlog.
  189. WCHAR _chLookahead;
  190. bool _fWhitespace; // found whitespace while parsing PCDATA
  191. WCHAR _chTerminator;
  192. WCHAR _chEndChar; // for parseAttributes.
  193. bool _fEOF; // reached end of file.
  194. long _lNslen; // namespace length
  195. long _lNssep; // namespace separator length ':' or '::'.
  196. long _lEntityPos; // for parsing entity references.
  197. bool _fPCDataPending; // whether pcdata is pending during parseEntityRef.
  198. const WCHAR* _pchCDataState;
  199. int _cAttrCount;
  200. int _nEntityNSLen; // saved namespace info for entity references.
  201. // Switches.
  202. unsigned short _usFlags;
  203. // bool _fFloatingAmp; // used in ParseEntityRef()
  204. bool _fShortEndTags; // used in ParserEndTag()
  205. bool _fCaseInsensitive;
  206. bool _fNoNamespaces; // used in parseName()
  207. //bool _fNoWhitespaceNodes; // used in DTD data
  208. //bool _fIE4Quirks; // xiaoyu : what it means?
  209. bool _fNoDTDNodes; // only used in GetDTDNextToken(). may be deleted later
  210. //bool _fHandlePE; // This flag is used to turn on and off parameter entity handling in DTD
  211. // xiaoyu: used in ParsePI(), ParseDTD(), parseComment(),
  212. // parsePEDecl(), parseIgnoSet()
  213. // for table driven parsing.
  214. const StateEntry* _pTable;
  215. //DWORD _lEOFError; // used in parsePEDecl(), pushTable(), parseTable(),
  216. // buffer used during whitespace normalization
  217. WCHAR* _pchBuffer;
  218. long _lBufLen;
  219. long _lBufSize;
  220. bool _fFoundWhitespace;
  221. bool _fUsingBuffer;
  222. bool _fFoundNonWhitespace;
  223. bool _fCheckAttribute; // need to check the attribute name
  224. // xiaoyu : used for complicate attribute type, such as "xml:lang", "xmlns"
  225. bool _fDTD; // xiaoyu whether xml contains DTD
  226. //bool _fInternalSubset; // xiaoyu used in ParseDTD
  227. //int _cConditionalSection;
  228. //bool _fFoundPEREf;
  229. //bool _fWasDTD;
  230. // bool _fParsingNames;
  231. bool _fParsingAttDef; // used in ParseAttrValue()
  232. //int _cIgnoreSectLevel;
  233. //bool _fResolved; // used in ParseEntity();
  234. bool _fReturnAttributeValue;
  235. //int _cStreams; // used to identify if PushStream was called.
  236. // used in parseEntity();
  237. WCHAR _wcEntityValue; // used in parseEntityRef()
  238. XMLParser * _pXMLParser; // regular pointer pointing back to the parser
  239. inline HRESULT PushChar(WCHAR ch)
  240. {
  241. if (_lBufLen < _lBufSize)
  242. {
  243. _pchBuffer[_lBufLen++] = ch; return S_OK;
  244. }
  245. else return _PushChar(ch);
  246. }
  247. HRESULT _PushChar(WCHAR ch); // grow the buffer.
  248. };
  249. #endif // _XML_STREAM_HXX