// HtmParse.h : Declaration of the CHtmParse // Copyright (c)1997-1999 Microsoft Corporation, All Rights Reserved #ifndef __HTMPARSE_H_ #define __HTMPARSE_H_ #include "resource.h" // main symbols #include "guids.h" #include "lexhtml.h" #include "token.h" #define tokClsIgnore tokclsError // if you don't want to use the token class info in the rule, use this. #define cbBufPadding 0x800 // we allocate this much extra memory so that subsequent reallocs are saved #define MIN_TOK 100 // init size for token stack to keep track of nested blocks. e.g. ...
...
... // init value for number of tags #define cTBodyInit 20 // init size of nested TBODY's. we start with the assumption that we won't have more than these many nested TBODYs and reallocate if needed. #define cchspBlockMax 20 // size of spacing block index. we can't have more than 20 digit number // state flags for space preservation #define initState 0x0000 #define inChar 0x0001 #define inSpace 0x0002 #define inEOL 0x0003 #define inTab 0x0004 #define inTagOpen 0x0005 #define inTagClose 0x0006 #define inTagEq 0x0007 // used by space preservation in comments #define chCommentSp '2' #define chCommentEOL '3' #define chCommentTab '4' // Specializations for hrTokenizeAndParse #define PARSE_SPECIAL_NONE 0x00000000 #define PARSE_SPECIAL_HEAD_ONLY 0x00000001 ///////////////////////////////////////////////////////////////////////////// // CTriEditParse class ATL_NO_VTABLE CTriEditParse : public CComObjectRootEx, public CComCoClass, public ITokenGen { public: CTriEditParse(); ~CTriEditParse(); DECLARE_REGISTRY_RESOURCEID(IDR_TRIEDITPARSE) BEGIN_COM_MAP(CTriEditParse) COM_INTERFACE_ENTRY_IID(IID_ITokenGen, ITokenGen) END_COM_MAP() // ITokenGen // copied from CColorHtml public: STDMETHOD(NextToken)(LPCWSTR pszText, UINT cbText, UINT* pcbCur, DWORD * pLXS, TXTB* pToken); STDMETHOD(hrTokenizeAndParse)(HGLOBAL hOld, HGLOBAL *phNew, IStream *pStmNew, DWORD dwFlags, FilterMode mode, int cbSizeIn, UINT* pcbSizeOut, IUnknown* pUnkTrident, HGLOBAL *phgTokArray, UINT *pcMaxToken, HGLOBAL *phgDocRestore, BSTR bstrBaseURL, DWORD dwReserved); private: static long m_bInit; PSUBLANG m_rgSublang; IUnknown *m_pUnkTrident; // we cache it in hrTokenizeAndParse() HGLOBAL m_hgDocRestore; // we cache it in hrTokenizeAndParse() LPWSTR m_bstrBaseURL; // flag used to remember if we have a unicode file that has 0xff,0xfe at the begining BOOL m_fUnicodeFile; // following m_c's keep track of number of respective tags found // during parsing. e.g. m_cHtml will keep track of count of // tags INT m_cHtml; INT m_cDTC; INT m_cObj; INT m_cSSSIn; INT m_cSSSOut; INT m_cNbsp; INT m_cHdr; INT m_cFtr; INT m_cObjIn; INT m_cComment; INT m_cAImgLink; UINT m_cMaxToken; // Max of token array (pTokArray) BOOL m_fEndTagFound; // end tag found INT m_iControl; // index in applet collection BOOL m_fSpecialSSS; // found special SSS <%@....%> // used to save space preservation info HGLOBAL m_hgspInfo; WORD *m_pspInfo; WORD *m_pspInfoOut; WORD *m_pspInfoOutStart; WORD *m_pspInfoCur; UINT m_ichStartSP; // save all prev spacing info at this ich INT m_ispInfoBase; INT m_ispInfoIn; INT m_ispInfoOut; INT m_iArrayspLast; INT m_ispInfoBlock; INT m_cchspInfoTotal; BOOL m_fDontDeccItem; // we don't have counters for items that we don't process, so we use this to preserve the total count // used by code. // Trident puts in extra tags inside table // and filtering tries to remove them. HGLOBAL m_hgTBodyStack; UINT *m_pTBodyStack; INT m_iMaxTBody; INT m_iTBodyMax; // used by Page Transition DTC code // page transition dtc is a special case in filtering because // we have to maintain its location inside the head section. BOOL m_fInHdrIn; INT m_cchPTDTCObj; INT m_ichPTDTC; INT m_cchPTDTC; INT m_indexBeginBody; INT m_indexEndBody; WCHAR *m_pPTDTC; HGLOBAL m_hgPTDTC; // used by the code that recreates our own pre-Body part of the document BOOL m_fHasTitleIn; INT m_indexTitleIn; INT m_ichTitleIn; INT m_cchTitleIn; INT m_ichBeginBodyTagIn; INT m_indexHttpEquivIn; INT m_ichBeginHeadTagIn; // used by APPLET pretty-printing code int m_cAppletIn; int m_cAppletOut; // used to keep track of multiple occurances of BODY, HTML, TITLE & HEAD tags int m_cBodyTags; int m_cHtmlTags; int m_cTitleTags; int m_cHeadTags; void SetTable(DWORD lxs); void InitSublanguages(); void PreProcessToken(TOKSTRUCT *pTokArray, INT *pitokCur, LPWSTR pszText, UINT cbCur, TXTB token, DWORD lxs, INT tagID, FilterMode mode); void PostProcessToken(OLECHAR *pwOld, OLECHAR *pwNew, UINT *pcbNew, UINT cbCur, UINT cbCurSav, TXTB token, FilterMode mode, DWORD lxs, DWORD dwFlags); int ValidateTag(LPWSTR pszText); int GetTagID(LPWSTR pszText, TXTB token); HRESULT hrMarkSpacing(WCHAR *pwOld, UINT cbCur, INT *pchStartSP); void SetSPInfoState(WORD inState, WORD *pdwState, WORD *pdwStatePrev, BOOL *pfSave); BOOL FRestoreSpacing(LPWSTR pwNew, LPWSTR pwOld, UINT *pichNewCur, INT *pcchwspInfo, INT cchRange, INT ichtoktagStart, BOOL fLookback, INT index); HRESULT hrMarkOrdering(WCHAR *pwOld, TOKSTRUCT *pTokArray, INT iArrayStart, INT iArrayEnd, UINT cbCur, INT *pichStartOR); BOOL FRestoreOrder(WCHAR *pwNew, WCHAR *pwOld, WORD *pspInfoOrder, UINT *pichNewCur, INT cwOrderInfo, TOKSTRUCT *pTokArray, INT iArrayStart, INT iArrayEnd, INT iArrayDSPStart, INT iArrayDSPEnd, INT cchNewCopy, HGLOBAL *phgNew); void SaveSpacingSpecial(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR *ppwNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, INT iArray, UINT *pichNewCur); void RestoreSpacingSpecial(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR *ppwNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT iArray, UINT *pichNewCur); HRESULT ProcessToken(DWORD &lxs, TXTB &tok, LPWSTR pszText, UINT cbCur, TOKSTACK *pTokStack, INT *pitokTop, TOKSTRUCT *pTokArray, INT iArrayPos, INT tagID); void FilterHtml(LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, FilterMode mode, DWORD dwFlags); struct FilterTok { TOKEN tokBegin; TOKEN tokBegin2; // supporting token TOKEN tokClsBegin; TOKEN tokEnd; TOKEN tokEnd2; // supporting token TOKEN tokClsEnd; }; typedef void (_stdcall* PFNACTION)(CTriEditParse *, LPWSTR, LPWSTR *, UINT *, HGLOBAL *, TOKSTRUCT *, UINT*, FilterTok, INT*, UINT*, UINT*, DWORD); struct FilterRule { FilterTok ft; PFNACTION pfn; }; // Following are static functions. We could make them members, but it wasn't felt necessary then. void static fnRestoreDTC(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnSaveDTC(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnRestoreSSS(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnSaveSSS(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnRestoreHtmlTag(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnSaveHtmlTag(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnSaveNBSP(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnRestoreNBSP(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnSaveHdr(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnRestoreHdr(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnSaveFtr(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnRestoreFtr(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnRestoreSpace(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnSaveSpace(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnRestoreSpaceEnd(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnRestoreObject(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnSaveObject(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnRestoreTbody(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnSaveTbody(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnSaveApplet(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnRestoreApplet(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnSaveAImgLink(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnRestoreAImgLink(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnSaveComment(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnRestoreComment(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnSaveTextArea(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); void static fnRestoreTextArea(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft, INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags); #define cRuleMax 26 /* max number of filtering rules. if you add a new rule above, change this too */ FilterRule m_FilterRule[cRuleMax]; }; #endif //__HTMPARSE_H_