You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
142 lines
4.2 KiB
142 lines
4.2 KiB
// tokhtml.h - Tokens and lex state for HTML
|
|
// Copyright (c)1997-1999 Microsoft Corporation, All Rights Reserved
|
|
//
|
|
// Include lex.h before including this file.
|
|
//
|
|
|
|
#ifndef __TOKHTML_H__
|
|
#define __TOKHTML_H__
|
|
|
|
#if 0
|
|
// Moved to the IDL
|
|
enum HtmlToken
|
|
{
|
|
tokUNKNOWN = tokclsError,
|
|
tokIDENTIFIER = tokclsIdentMin, // identifier/plain text
|
|
tokNEWLINE = tokclsUserMin,
|
|
//
|
|
// colored HTML items
|
|
//
|
|
tokElem, // element name
|
|
tokAttr, // attribute name
|
|
tokValue, // attribute value
|
|
tokComment, // comment
|
|
tokEntity, // entity reference: e.g. " "
|
|
tokTag, // tag delimiter
|
|
tokString, // string
|
|
tokSpace, // whitespace and unrecognized text in a tag
|
|
tokOp, // operator
|
|
tokSSS, // server-side script <%...%>
|
|
//
|
|
// parsed HTML and SGML items - tokens folded with items above
|
|
//
|
|
tokName, // NAMETOKEN
|
|
tokNum, // NUMTOKEN
|
|
tokParEnt, // parameter entity: e.g. "%name;"
|
|
tokResName, // reserved name
|
|
//
|
|
// operators - colors folded with tokOp above
|
|
//
|
|
tokOP_MIN,
|
|
tokOpDash = tokOP_MIN, // -
|
|
tokOP_SINGLE,
|
|
tokOpQuestion = tokOP_SINGLE, // ?
|
|
tokOpComma, // ,
|
|
tokOpPipe, // |
|
|
tokOpPlus, // +
|
|
tokOpEqual, // =
|
|
tokOpStar, // *
|
|
tokOpAmp, // &
|
|
tokOpCent, // %
|
|
tokOpLP, // (
|
|
tokOpRP, // )
|
|
tokOpLB, // [
|
|
tokOpRB, // ]
|
|
tokOP_MAX, // token op MAX
|
|
|
|
tokEOF
|
|
};
|
|
|
|
// the state of lexical analyser
|
|
//
|
|
// We're generally in one of two states:
|
|
// 1. scanning text
|
|
// 2. scanning tag info
|
|
//
|
|
// Within these states, the lexer can be in several substates.
|
|
//
|
|
// Text substates:
|
|
//
|
|
// inText HTML text content -- process markup
|
|
// inPLAINTEXT after a <PLAINTEXT> tag - remainder of file is not HTML
|
|
// inCOMMENT COMMENT content -- suppress all markup but </COMMENT>
|
|
// color text as comment
|
|
// inXMP XMP content -- suppress all markup but </XMP>
|
|
// inLISTING LISTING content -- suppress all markup but </LISTING>
|
|
// inSCRIPT SCRIPT content -- color using script engine.
|
|
//
|
|
// Tag substates:
|
|
//
|
|
// inTag inside a tag < ... >
|
|
// inBangTag inside an SGML MDO tag <! ... >
|
|
// inPITag inside an SGML Prcessing Instruction tag <? ... >
|
|
// inHTXTag inside an ODBC HTML Extension template tag <% ... %>
|
|
// inEndTag inside an end tag </name>
|
|
// inAttribute expecting an attribute
|
|
// inValue expecting an attribute value (right of =)
|
|
// inComment inside a comment
|
|
// inString inside a " string, terminated by "
|
|
// inStringA inside a ' (Alternate) string, terminated by '
|
|
//
|
|
enum HtmlLexState
|
|
{
|
|
// tag types
|
|
inTag = 0x00000001, // < ... >
|
|
inBangTag = 0x00000002, // <! ... >
|
|
inPITag = 0x00000004, // <? ... >
|
|
inHTXTag = 0x00000008, // <% ... %>
|
|
inEndTag = 0x00000010, // </ ... >
|
|
|
|
// tag scanning states
|
|
inAttribute = 0x00000020,
|
|
inValue = 0x00000040,
|
|
|
|
inComment = 0x00000080,
|
|
inString = 0x00000100,
|
|
inStringA = 0x00000200,
|
|
|
|
// text content model states
|
|
inPLAINTEXT = 0x00001000,
|
|
inCOMMENT = 0x00002000,
|
|
inXMP = 0x00004000,
|
|
inLISTING = 0x00008000,
|
|
inSCRIPT = 0x00010000,
|
|
|
|
// sublanguages
|
|
inVariant = 0x00F00000, // mask for sublang index
|
|
inHTML2 = 0x00000000,
|
|
inIExplore2 = 0x00100000,
|
|
inIExplore3 = 0x00200000,
|
|
|
|
// script languages
|
|
inJavaScript = 0x01000000,
|
|
inVBScript = 0x02000000,
|
|
|
|
};
|
|
|
|
// masks for subsets of the state
|
|
#define INTAG (inTag|inBangTag|inPITag|inHTXTag|inEndTag)
|
|
#define INSTRING (inString|inStringA)
|
|
#define TAGMASK (INTAG|inAttribute|inValue|inComment|INSTRING)
|
|
#define TEXTMASK (inPLAINTEXT|inCOMMENT|inXMP|inLISTING|inSCRIPT)
|
|
#define STATEMASK (TAGMASK|TEXTMASK)
|
|
|
|
#endif
|
|
|
|
|
|
// convert state <-> sublang index
|
|
inline DWORD SubLangIndexFromLxs(DWORD lxs) { return (lxs & inVariant) >> 20UL; }
|
|
inline DWORD LxsFromSubLangIndex(DWORD isl) { return (isl << 20UL) & inVariant; }
|
|
|
|
#endif // __TOKHTML_H__
|
|
|