You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
281 lines
14 KiB
281 lines
14 KiB
// HtmParse.h : Declaration of the CHtmParse
|
|
// Copyright (c)1997-1999 Microsoft Corporation, All Rights Reserved
|
|
|
|
#ifndef __HTMPARSE_H_
|
|
#define __HTMPARSE_H_
|
|
|
|
#include "resource.h" // main symbols
|
|
#include "guids.h"
|
|
#include "lexhtml.h"
|
|
#include "token.h"
|
|
|
|
#define tokClsIgnore tokclsError // if you don't want to use the token class info in the rule, use this.
|
|
|
|
#define cbBufPadding 0x800 // we allocate this much extra memory so that subsequent reallocs are saved
|
|
#define MIN_TOK 100 // init size for token stack to keep track of nested blocks. e.g. <table>...<table>...</table>...</table>
|
|
|
|
// init value for number of <TBODY> tags
|
|
#define cTBodyInit 20 // init size of nested TBODY's. we start with the assumption that we won't have more than these many nested TBODYs and reallocate if needed.
|
|
|
|
#define cchspBlockMax 20 // size of spacing block index. we can't have more than 20 digit number
|
|
// state flags for space preservation
|
|
#define initState 0x0000
|
|
#define inChar 0x0001
|
|
#define inSpace 0x0002
|
|
#define inEOL 0x0003
|
|
#define inTab 0x0004
|
|
#define inTagOpen 0x0005
|
|
#define inTagClose 0x0006
|
|
#define inTagEq 0x0007
|
|
|
|
// used by space preservation in comments
|
|
#define chCommentSp '2'
|
|
#define chCommentEOL '3'
|
|
#define chCommentTab '4'
|
|
|
|
// Specializations for hrTokenizeAndParse
|
|
#define PARSE_SPECIAL_NONE 0x00000000
|
|
#define PARSE_SPECIAL_HEAD_ONLY 0x00000001
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
// CTriEditParse
|
|
class ATL_NO_VTABLE CTriEditParse :
|
|
public CComObjectRootEx<CComSingleThreadModel>,
|
|
public CComCoClass<CTriEditParse, &CLSID_TriEditParse>,
|
|
public ITokenGen
|
|
{
|
|
public:
|
|
CTriEditParse();
|
|
~CTriEditParse();
|
|
|
|
DECLARE_REGISTRY_RESOURCEID(IDR_TRIEDITPARSE)
|
|
|
|
BEGIN_COM_MAP(CTriEditParse)
|
|
COM_INTERFACE_ENTRY_IID(IID_ITokenGen, ITokenGen)
|
|
END_COM_MAP()
|
|
|
|
|
|
|
|
// ITokenGen // copied from CColorHtml
|
|
public:
|
|
STDMETHOD(NextToken)(LPCWSTR pszText, UINT cbText, UINT* pcbCur, DWORD * pLXS, TXTB* pToken);
|
|
STDMETHOD(hrTokenizeAndParse)(HGLOBAL hOld, HGLOBAL *phNew, IStream *pStmNew, DWORD dwFlags, FilterMode mode, int cbSizeIn, UINT* pcbSizeOut, IUnknown* pUnkTrident, HGLOBAL *phgTokArray, UINT *pcMaxToken, HGLOBAL *phgDocRestore, BSTR bstrBaseURL, DWORD dwReserved);
|
|
|
|
private:
|
|
static long m_bInit;
|
|
PSUBLANG m_rgSublang;
|
|
|
|
IUnknown *m_pUnkTrident; // we cache it in hrTokenizeAndParse()
|
|
HGLOBAL m_hgDocRestore; // we cache it in hrTokenizeAndParse()
|
|
LPWSTR m_bstrBaseURL;
|
|
|
|
// flag used to remember if we have a unicode file that has 0xff,0xfe at the begining
|
|
BOOL m_fUnicodeFile;
|
|
|
|
// following m_c's keep track of number of respective tags found
|
|
// during parsing. e.g. m_cHtml will keep track of count of
|
|
// <html> tags
|
|
INT m_cHtml;
|
|
INT m_cDTC;
|
|
INT m_cObj;
|
|
INT m_cSSSIn;
|
|
INT m_cSSSOut;
|
|
INT m_cNbsp;
|
|
INT m_cHdr;
|
|
INT m_cFtr;
|
|
INT m_cObjIn;
|
|
INT m_cComment;
|
|
INT m_cAImgLink;
|
|
|
|
UINT m_cMaxToken; // Max of token array (pTokArray)
|
|
BOOL m_fEndTagFound; // end tag found
|
|
INT m_iControl; // index in applet collection
|
|
BOOL m_fSpecialSSS; // found special SSS <%@....%>
|
|
|
|
// used to save space preservation info
|
|
HGLOBAL m_hgspInfo;
|
|
WORD *m_pspInfo;
|
|
WORD *m_pspInfoOut;
|
|
WORD *m_pspInfoOutStart;
|
|
WORD *m_pspInfoCur;
|
|
UINT m_ichStartSP; // save all prev spacing info at this ich
|
|
INT m_ispInfoBase;
|
|
INT m_ispInfoIn;
|
|
INT m_ispInfoOut;
|
|
INT m_iArrayspLast;
|
|
INT m_ispInfoBlock;
|
|
INT m_cchspInfoTotal;
|
|
BOOL m_fDontDeccItem; // we don't have counters for items that we don't process, so we use this to preserve the total count
|
|
|
|
// used by <TBODY> code.
|
|
// Trident puts in extra <tbody></tbody> tags inside table
|
|
// and filtering tries to remove them.
|
|
HGLOBAL m_hgTBodyStack;
|
|
UINT *m_pTBodyStack;
|
|
INT m_iMaxTBody;
|
|
INT m_iTBodyMax;
|
|
|
|
// used by Page Transition DTC code
|
|
// page transition dtc is a special case in filtering because
|
|
// we have to maintain its location inside the head section.
|
|
BOOL m_fInHdrIn;
|
|
INT m_cchPTDTCObj;
|
|
INT m_ichPTDTC;
|
|
INT m_cchPTDTC;
|
|
INT m_indexBeginBody;
|
|
INT m_indexEndBody;
|
|
WCHAR *m_pPTDTC;
|
|
HGLOBAL m_hgPTDTC;
|
|
|
|
// used by the code that recreates our own pre-Body part of the document
|
|
BOOL m_fHasTitleIn;
|
|
INT m_indexTitleIn;
|
|
INT m_ichTitleIn;
|
|
INT m_cchTitleIn;
|
|
INT m_ichBeginBodyTagIn;
|
|
INT m_indexHttpEquivIn;
|
|
INT m_ichBeginHeadTagIn;
|
|
|
|
// used by APPLET pretty-printing code
|
|
int m_cAppletIn;
|
|
int m_cAppletOut;
|
|
|
|
// used to keep track of multiple occurances of BODY, HTML, TITLE & HEAD tags
|
|
int m_cBodyTags;
|
|
int m_cHtmlTags;
|
|
int m_cTitleTags;
|
|
int m_cHeadTags;
|
|
|
|
void SetTable(DWORD lxs);
|
|
void InitSublanguages();
|
|
void PreProcessToken(TOKSTRUCT *pTokArray, INT *pitokCur, LPWSTR pszText, UINT cbCur, TXTB token, DWORD lxs, INT tagID, FilterMode mode);
|
|
void PostProcessToken(OLECHAR *pwOld, OLECHAR *pwNew, UINT *pcbNew, UINT cbCur, UINT cbCurSav, TXTB token, FilterMode mode, DWORD lxs, DWORD dwFlags);
|
|
int ValidateTag(LPWSTR pszText);
|
|
int GetTagID(LPWSTR pszText, TXTB token);
|
|
HRESULT hrMarkSpacing(WCHAR *pwOld, UINT cbCur, INT *pchStartSP);
|
|
void SetSPInfoState(WORD inState, WORD *pdwState, WORD *pdwStatePrev, BOOL *pfSave);
|
|
BOOL FRestoreSpacing(LPWSTR pwNew, LPWSTR pwOld, UINT *pichNewCur, INT *pcchwspInfo, INT cchRange, INT ichtoktagStart, BOOL fLookback, INT index);
|
|
HRESULT hrMarkOrdering(WCHAR *pwOld, TOKSTRUCT *pTokArray, INT iArrayStart, INT iArrayEnd, UINT cbCur, INT *pichStartOR);
|
|
BOOL FRestoreOrder(WCHAR *pwNew, WCHAR *pwOld, WORD *pspInfoOrder, UINT *pichNewCur, INT cwOrderInfo, TOKSTRUCT *pTokArray, INT iArrayStart, INT iArrayEnd, INT iArrayDSPStart, INT iArrayDSPEnd, INT cchNewCopy, HGLOBAL *phgNew);
|
|
void SaveSpacingSpecial(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR *ppwNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, INT iArray, UINT *pichNewCur);
|
|
void RestoreSpacingSpecial(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR *ppwNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, UINT iArray, UINT *pichNewCur);
|
|
|
|
HRESULT ProcessToken(DWORD &lxs, TXTB &tok, LPWSTR pszText, UINT cbCur, TOKSTACK *pTokStack, INT *pitokTop, TOKSTRUCT *pTokArray, INT iArrayPos, INT tagID);
|
|
void FilterHtml(LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew, TOKSTRUCT *pTokArray, FilterMode mode, DWORD dwFlags);
|
|
|
|
|
|
struct FilterTok
|
|
{
|
|
TOKEN tokBegin;
|
|
TOKEN tokBegin2; // supporting token
|
|
TOKEN tokClsBegin;
|
|
TOKEN tokEnd;
|
|
TOKEN tokEnd2; // supporting token
|
|
TOKEN tokClsEnd;
|
|
};
|
|
|
|
typedef void (_stdcall* PFNACTION)(CTriEditParse *, LPWSTR, LPWSTR *, UINT *, HGLOBAL *, TOKSTRUCT *, UINT*, FilterTok, INT*, UINT*, UINT*, DWORD);
|
|
struct FilterRule
|
|
{
|
|
FilterTok ft;
|
|
PFNACTION pfn;
|
|
};
|
|
|
|
// Following are static functions. We could make them members, but it wasn't felt necessary then.
|
|
void static fnRestoreDTC(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnSaveDTC(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
|
|
void static fnRestoreSSS(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnSaveSSS(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
|
|
void static fnRestoreHtmlTag(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnSaveHtmlTag(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnSaveNBSP(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy,
|
|
DWORD dwFlags);
|
|
void static fnRestoreNBSP(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy,
|
|
DWORD dwFlags);
|
|
void static fnSaveHdr(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy,
|
|
DWORD dwFlags);
|
|
void static fnRestoreHdr(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy,
|
|
DWORD dwFlags);
|
|
void static fnSaveFtr(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy,
|
|
DWORD dwFlags);
|
|
void static fnRestoreFtr(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy,
|
|
DWORD dwFlags);
|
|
void static fnRestoreSpace(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnSaveSpace(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnRestoreSpaceEnd(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnRestoreObject(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnSaveObject(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnRestoreTbody(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnSaveTbody(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnSaveApplet(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnRestoreApplet(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnSaveAImgLink(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnRestoreAImgLink(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnSaveComment(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnRestoreComment(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnSaveTextArea(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
void static fnRestoreTextArea(CTriEditParse *ptep, LPWSTR pwOld, LPWSTR* ppwNew, UINT *pcchNew, HGLOBAL *phgNew,
|
|
TOKSTRUCT *pTokArray, UINT *piArrayStart, FilterTok ft,
|
|
INT *pcHtml, UINT *pichNewCur, UINT *pichBeginCopy, DWORD dwFlags);
|
|
|
|
#define cRuleMax 26 /* max number of filtering rules. if you add a new rule above, change this too */
|
|
FilterRule m_FilterRule[cRuleMax];
|
|
|
|
};
|
|
|
|
|
|
#endif //__HTMPARSE_H_
|