Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

142 lines
4.2 KiB

  1. // tokhtml.h - Tokens and lex state for HTML
  2. // Copyright (c)1997-1999 Microsoft Corporation, All Rights Reserved
  3. //
  4. // Include lex.h before including this file.
  5. //
  6. #ifndef __TOKHTML_H__
  7. #define __TOKHTML_H__
  8. #if 0
  9. // Moved to the IDL
  10. enum HtmlToken
  11. {
  12. tokUNKNOWN = tokclsError,
  13. tokIDENTIFIER = tokclsIdentMin, // identifier/plain text
  14. tokNEWLINE = tokclsUserMin,
  15. //
  16. // colored HTML items
  17. //
  18. tokElem, // element name
  19. tokAttr, // attribute name
  20. tokValue, // attribute value
  21. tokComment, // comment
  22. tokEntity, // entity reference: e.g. " "
  23. tokTag, // tag delimiter
  24. tokString, // string
  25. tokSpace, // whitespace and unrecognized text in a tag
  26. tokOp, // operator
  27. tokSSS, // server-side script <%...%>
  28. //
  29. // parsed HTML and SGML items - tokens folded with items above
  30. //
  31. tokName, // NAMETOKEN
  32. tokNum, // NUMTOKEN
  33. tokParEnt, // parameter entity: e.g. "%name;"
  34. tokResName, // reserved name
  35. //
  36. // operators - colors folded with tokOp above
  37. //
  38. tokOP_MIN,
  39. tokOpDash = tokOP_MIN, // -
  40. tokOP_SINGLE,
  41. tokOpQuestion = tokOP_SINGLE, // ?
  42. tokOpComma, // ,
  43. tokOpPipe, // |
  44. tokOpPlus, // +
  45. tokOpEqual, // =
  46. tokOpStar, // *
  47. tokOpAmp, // &
  48. tokOpCent, // %
  49. tokOpLP, // (
  50. tokOpRP, // )
  51. tokOpLB, // [
  52. tokOpRB, // ]
  53. tokOP_MAX, // token op MAX
  54. tokEOF
  55. };
  56. // the state of lexical analyser
  57. //
  58. // We're generally in one of two states:
  59. // 1. scanning text
  60. // 2. scanning tag info
  61. //
  62. // Within these states, the lexer can be in several substates.
  63. //
  64. // Text substates:
  65. //
  66. // inText HTML text content -- process markup
  67. // inPLAINTEXT after a <PLAINTEXT> tag - remainder of file is not HTML
  68. // inCOMMENT COMMENT content -- suppress all markup but </COMMENT>
  69. // color text as comment
  70. // inXMP XMP content -- suppress all markup but </XMP>
  71. // inLISTING LISTING content -- suppress all markup but </LISTING>
  72. // inSCRIPT SCRIPT content -- color using script engine.
  73. //
  74. // Tag substates:
  75. //
  76. // inTag inside a tag < ... >
  77. // inBangTag inside an SGML MDO tag <! ... >
  78. // inPITag inside an SGML Prcessing Instruction tag <? ... >
  79. // inHTXTag inside an ODBC HTML Extension template tag <% ... %>
  80. // inEndTag inside an end tag </name>
  81. // inAttribute expecting an attribute
  82. // inValue expecting an attribute value (right of =)
  83. // inComment inside a comment
  84. // inString inside a " string, terminated by "
  85. // inStringA inside a ' (Alternate) string, terminated by '
  86. //
  87. enum HtmlLexState
  88. {
  89. // tag types
  90. inTag = 0x00000001, // < ... >
  91. inBangTag = 0x00000002, // <! ... >
  92. inPITag = 0x00000004, // <? ... >
  93. inHTXTag = 0x00000008, // <% ... %>
  94. inEndTag = 0x00000010, // </ ... >
  95. // tag scanning states
  96. inAttribute = 0x00000020,
  97. inValue = 0x00000040,
  98. inComment = 0x00000080,
  99. inString = 0x00000100,
  100. inStringA = 0x00000200,
  101. // text content model states
  102. inPLAINTEXT = 0x00001000,
  103. inCOMMENT = 0x00002000,
  104. inXMP = 0x00004000,
  105. inLISTING = 0x00008000,
  106. inSCRIPT = 0x00010000,
  107. // sublanguages
  108. inVariant = 0x00F00000, // mask for sublang index
  109. inHTML2 = 0x00000000,
  110. inIExplore2 = 0x00100000,
  111. inIExplore3 = 0x00200000,
  112. // script languages
  113. inJavaScript = 0x01000000,
  114. inVBScript = 0x02000000,
  115. };
  116. // masks for subsets of the state
  117. #define INTAG (inTag|inBangTag|inPITag|inHTXTag|inEndTag)
  118. #define INSTRING (inString|inStringA)
  119. #define TAGMASK (INTAG|inAttribute|inValue|inComment|INSTRING)
  120. #define TEXTMASK (inPLAINTEXT|inCOMMENT|inXMP|inLISTING|inSCRIPT)
  121. #define STATEMASK (TAGMASK|TEXTMASK)
  122. #endif
  123. // convert state <-> sublang index
  124. inline DWORD SubLangIndexFromLxs(DWORD lxs) { return (lxs & inVariant) >> 20UL; }
  125. inline DWORD LxsFromSubLangIndex(DWORD isl) { return (isl << 20UL) & inVariant; }
  126. #endif // __TOKHTML_H__