Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

281 lines
14 KiB

  1. // HtmParse.h : Declaration of the CHtmParse
  2. // Copyright (c)1997-1999 Microsoft Corporation, All Rights Reserved
  3. #ifndef __HTMPARSE_H_
  4. #define __HTMPARSE_H_
  5. #include "resource.h" // main symbols
  6. #include "guids.h"
  7. #include "lexhtml.h"
  8. #include "token.h"
  9. #define tokClsIgnore tokclsError // if you don't want to use the token class info in the rule, use this.
  10. #define cbBufPadding 0x800 // we allocate this much extra memory so that subsequent reallocs are saved
  11. #define MIN_TOK 100 // init size for token stack to keep track of nested blocks. e.g. <table>...<table>...</table>...</table>
  12. // init value for number of <TBODY> tags
  13. #define cTBodyInit 20 // init size of nested TBODY's. we start with the assumption that we won't have more than these many nested TBODYs and reallocate if needed.
  14. #define cchspBlockMax 20 // size of spacing block index. we can't have more than 20 digit number
  15. // state flags for space preservation
  16. #define initState 0x0000
  17. #define inChar 0x0001
  18. #define inSpace 0x0002
  19. #define inEOL 0x0003
  20. #define inTab 0x0004
  21. #define inTagOpen 0x0005
  22. #define inTagClose 0x0006
  23. #define inTagEq 0x0007
  24. // used by space preservation in comments
  25. #define chCommentSp '2'
  26. #define chCommentEOL '3'
  27. #define chCommentTab '4'
  28. // Specializations for hrTokenizeAndParse
  29. #define PARSE_SPECIAL_NONE 0x00000000
  30. #define PARSE_SPECIAL_HEAD_ONLY 0x00000001
  31. /////////////////////////////////////////////////////////////////////////////
  32. // CTriEditParse
  33. class ATL_NO_VTABLE CTriEditParse :
  34. public CComObjectRootEx<CComSingleThreadModel>,
  35. public CComCoClass<CTriEditParse, &CLSID_TriEditParse>,
  36. public ITokenGen
  37. {
  38. public:
  39. CTriEditParse();
  40. ~CTriEditParse();
  41. DECLARE_REGISTRY_RESOURCEID(IDR_TRIEDITPARSE)
  42. BEGIN_COM_MAP(CTriEditParse)
  43. COM_INTERFACE_ENTRY_IID(IID_ITokenGen, ITokenGen)
  44. END_COM_MAP()
  45. // ITokenGen // copied from CColorHtml
  46. public:
  47. STDMETHOD(NextToken)(LPCWSTR pszText, UINT cbText, UINT* pcbCur, DWORD * pLXS, TXTB* pToken);
  48. STDMETHOD(hrTokenizeAndParse)(HGLOBAL hOld, HGLOBAL *phNew, IStream *pStmNew, DWORD dwFlags, FilterMode mode, int cbSizeIn, UINT* pcbSizeOut, IUnknown* pUnkTrident, HGLOBAL *phgTokArray, UINT *pcMaxToken, HGLOBAL *phgDocRestore, BSTR bstrBaseURL, DWORD dwReserved);
  49. private:
  50. static long m_bInit;
  51. PSUBLANG m_rgSublang;
  52. IUnknown *m_pUnkTrident; // we cache it in hrTokenizeAndParse()
  53. HGLOBAL m_hgDocRestore; // we cache it in hrTokenizeAndParse()
  54. LPWSTR m_bstrBaseURL;
  55. // flag used to remember if we have a unicode file that has 0xff,0xfe at the begining
  56. BOOL m_fUnicodeFile;
  57. // following m_c's keep track of number of respective tags found
  58. // during parsing. e.g. m_cHtml will keep track of count of
  59. // <html> tags
  60. INT m_cHtml;
  61. INT m_cDTC;
  62. INT m_cObj;
  63. INT m_cSSSIn;
  64. INT m_cSSSOut;
  65. INT m_cNbsp;
  66. INT m_cHdr;
  67. INT m_cFtr;
  68. INT m_cObjIn;
  69. INT m_cComment;
  70. INT m_cAImgLink;
  71. UINT m_cMaxToken; // Max of token array (pTokArray)
  72. BOOL m_fEndTagFound; // end tag found
  73. INT m_iControl; // index in applet collection
  74. BOOL m_fSpecialSSS; // found special SSS <%@....%>
  75. // used to save space preservation info
  76. HGLOBAL m_hgspInfo;
  77. WORD *m_pspInfo;
  78. WORD *m_pspInfoOut;
  79. WORD *m_pspInfoOutStart;
  80. WORD *m_pspInfoCur;
  81. UINT m_ichStartSP; // save all prev spacing info at this ich
  82. INT m_ispInfoBase;
  83. INT m_ispInfoIn;
  84. INT m_ispInfoOut;
  85. INT m_iArrayspLast;
  86. INT m_ispInfoBlock;
  87. INT m_cchspInfoTotal;
  88. BOOL m_fDontDeccItem; // we don't have counters for items that we don't process, so we use this to preserve the total count
  89. // used by <TBODY> code.
  90. // Trident puts in extra <tbody></tbody> tags inside table
  91. // and filtering tries to remove them.
  92. HGLOBAL m_hgTBodyStack;
  93. UINT *m_pTBodyStack;
  94. INT m_iMaxTBody;
  95. INT m_iTBodyMax;
  96. // used by Page Transition DTC code
  97. // page transition dtc is a special case in filtering because
  98. // we have to maintain its location inside the head section.
  99. BOOL m_fInHdrIn;
  100. INT m_cchPTDTCObj;
  101. INT m_ichPTDTC;
  102. INT m_cchPTDTC;
  103. INT m_indexBeginBody;
  104. INT m_indexEndBody;
  105. WCHAR *m_pPTDTC;
  106. HGLOBAL m_hgPTDTC;
  107. // used by the code that recreates our own pre-Body part of the document
  108. BOOL m_fHasTitleIn;
  109. INT m_indexTitleIn;
  110. INT m_ichTitleIn;
  111. INT m_cchTitleIn;
  112. INT m_ichBeginBodyTagIn;
  113. INT m_indexHttpEquivIn;
  114. INT m_ichBeginHeadTagIn;
  115. // used by APPLET pretty-printing code
  116. int m_cAppletIn;
  117. int m_cAppletOut;
  118. // used to keep track of multiple occurances of BODY, HTML, TITLE & HEAD tags
  119. int m_cBodyTags;
  120. int m_cHtmlTags;
  121. int m_cTitleTags;
  122. int m_cHeadTags;
  123. void SetTable(DWORD lxs);
  124. void InitSublanguages();
  125. void PreProcessToken(TOKSTRUCT *pTokArray, INT *pitokCur, LPWSTR pszText, UINT cbCur, TXTB token, DWORD lxs, INT tagID, FilterMode mode);
  126. void PostProcessToken(OLECHAR *pwOld, OLECHAR *pwNew, UINT *pcbNew, UINT cbCur, UINT cbCurSav, TXTB token, FilterMode mode, DWORD lxs, DWORD dwFlags);
  127. int ValidateTag(LPWSTR pszText);
  128. int GetTagID(LPWSTR pszText, TXTB token);
  129. HRESULT hrMarkSpacing(WCHAR *pwOld, UINT cbCur, INT *pchStartSP);
  130. void SetSPInfoState(WORD inState, WORD *pdwState, WORD *pdwStatePrev, BOOL *pfSave);
  131. BOOL FRestoreSpacing(LPWSTR pwNew, LPWSTR pwOld, UINT *pichNewCur, INT *pcchwspInfo, INT cchRange, INT ichtoktagStart, BOOL fLookback, INT index);
  132. HRESULT hrMarkOrdering(WCHAR *pwOld, TOKSTRUCT *pTokArray, INT iArrayStart, INT iArrayEnd, UINT cbCur, INT *pichStartOR);
  133. BOOL FRestoreOrder(WCHAR *pwNew, WCHAR *pwOld, WORD *pspInfoOrder, UINT *pichNewCur, INT cwOrderInfo, TOKSTRUCT *pTokArray, INT iArrayStart, INT iArrayEnd, INT iArrayDSPStart, INT iArrayDSPEnd, INT cchNewCopy, HGLOBAL *phgNew);
  134. void SaveSpacingSpecial(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR *ppwNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, INT iArray, UINT *pichNewCur);
  135. void RestoreSpacingSpecial(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR *ppwNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT iArray, UINT *pichNewCur);
  136. HRESULT ProcessToken(DWORD &lxs, TXTB &tok, LPWSTR pszText, UINT cbCur, TOKSTACK *pTokStack, INT *pitokTop, TOKSTRUCT *pTokArray, INT iArrayPos, INT tagID);
  137. void FilterHtml(LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, FilterMode mode, DWORD dwFlags);
  138. struct FilterTok
  139. {
  140. TOKEN tokBegin;
  141. TOKEN tokBegin2; // supporting token
  142. TOKEN tokClsBegin;
  143. TOKEN tokEnd;
  144. TOKEN tokEnd2; // supporting token
  145. TOKEN tokClsEnd;
  146. };
  147. typedef void (_stdcall* PFNACTION)(CTriEditParse *, LPWSTR, LPWSTR *, UINT *, HGLOBAL *, TOKSTRUCT *, UINT*, FilterTok, INT*, UINT*, UINT*, DWORD);
  148. struct FilterRule
  149. {
  150. FilterTok ft;
  151. PFNACTION pfn;
  152. };
  153. // Following are static functions. We could make them members, but it wasn't felt necessary then.
  154. void static fnRestoreDTC(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  155. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  156. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  157. void static fnSaveDTC(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  158. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  159. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  160. void static fnRestoreSSS(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  161. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  162. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  163. void static fnSaveSSS(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  164. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  165. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  166. void static fnRestoreHtmlTag(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  167. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  168. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  169. void static fnSaveHtmlTag(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  170. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  171. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  172. void static fnSaveNBSP(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  173. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  174. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy,
  175. DWORD dwFlags);
  176. void static fnRestoreNBSP(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  177. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  178. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy,
  179. DWORD dwFlags);
  180. void static fnSaveHdr(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  181. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  182. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy,
  183. DWORD dwFlags);
  184. void static fnRestoreHdr(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  185. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  186. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy,
  187. DWORD dwFlags);
  188. void static fnSaveFtr(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  189. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  190. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy,
  191. DWORD dwFlags);
  192. void static fnRestoreFtr(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  193. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  194. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy,
  195. DWORD dwFlags);
  196. void static fnRestoreSpace(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  197. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  198. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  199. void static fnSaveSpace(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  200. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  201. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  202. void static fnRestoreSpaceEnd(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  203. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  204. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  205. void static fnRestoreObject(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  206. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  207. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  208. void static fnSaveObject(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  209. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  210. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  211. void static fnRestoreTbody(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  212. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  213. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  214. void static fnSaveTbody(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  215. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  216. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  217. void static fnSaveApplet(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  218. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  219. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  220. void static fnRestoreApplet(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  221. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  222. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  223. void static fnSaveAImgLink(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  224. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  225. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  226. void static fnRestoreAImgLink(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  227. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  228. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  229. void static fnSaveComment(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  230. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  231. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  232. void static fnRestoreComment(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  233. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  234. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  235. void static fnSaveTextArea(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  236. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  237. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  238. void static fnRestoreTextArea(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
  239. TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
  240. INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
  241. #define cRuleMax 26 /* max number of filtering rules. if you add a new rule above, change this too */
  242. FilterRule m_FilterRule[cRuleMax];
  243. };
  244. #endif //__HTMPARSE_H_