Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

269 lines
11 KiB

  1. /*++
  2. Copyright (c) 1995 Microsoft Corporation
  3. Module Name:
  4. lexer.cxx
  5. Abstract:
  6. This file exports the class the class CQryLexer and other declarations
  7. that recognize the tokens in the string repressentation of the search
  8. filter. The format of the search filter according to the RFC 1960.
  9. Author:
  10. Shankara Shastry [ShankSh] 08-Jul-1996
  11. */
  12. #ifndef _QRYLEXER_HXX
  13. #define _QRYLEXER_HXX
  14. //
  15. // chunk of memory allocated for lexeme each time memory is needed.
  16. //
  17. #define LEXEME_UNIT_LENGTH 256
  18. //
  19. // Allowable tokens in the search string
  20. //
  21. #define TOKEN_ERROR 0
  22. #define TOKEN_LPARAN 1
  23. #define TOKEN_RPARAN 2
  24. #define TOKEN_OR 3
  25. #define TOKEN_AND 4
  26. #define TOKEN_NOT 5
  27. #define TOKEN_APPROX_EQ 6
  28. #define TOKEN_EQ 7
  29. #define TOKEN_LE 8
  30. #define TOKEN_GE 9
  31. #define TOKEN_PRESENT 10
  32. #define TOKEN_ATTRTYPE 11
  33. #define TOKEN_ATTRVAL 12
  34. #define TOKEN_ENDINPUT 13
  35. #define TOKEN_START 0
  36. //
  37. // Final states;
  38. //
  39. #define ERROR_STATE 100
  40. #define STATE_LPARAN 101
  41. #define STATE_RPARAN 102
  42. #define STATE_OR 103
  43. #define STATE_AND 104
  44. #define STATE_NOT 105
  45. #define STATE_APPORX_EQ 106
  46. #define STATE_EQ 107
  47. #define STATE_LE 108
  48. #define STATE_GE 109
  49. #define STATE_PRESENT 110
  50. #define STATE_ATTRTYPE 111
  51. #define STATE_ATTRVAL 112
  52. #define STATE_END 113
  53. #define FINAL_STATES_BEGIN 100
  54. // Since the lexical specification forces the lexer to have some knowledge
  55. // of the grammar, there are two start states where recognizing an ATTRTYPE
  56. // or ATTRVAL is valid. DFA starts with ATTRTYPE_START_STATE and switches to
  57. // ATTRVAL_START_STATE when an AttrType is recognized and vice-versa
  58. #define ATTRTYPE_START_STATE 0
  59. #define ATTRVAL_START_STATE 1
  60. #define MAX_STATES 11 // No. of states in the DFA
  61. // No. of different groups of characters for which the DFA behaves differently
  62. // For eg., all alphabetical characters generate the same behaviour and can be
  63. // considered the same as for DFA is concerned. This is mainly to reduce the
  64. // size of the table.
  65. #define MAX_CHAR_CLASSES 18
  66. // which specifies all other characters not mentioned explicitly.
  67. #define OTHER_CHAR_CLASS 14
  68. //Various actions associated with a particular entry in the DFA table.
  69. #define ACTION_DEFAULT 0
  70. #define ACTION_IGNORE_ESCAPECHAR 1
  71. #define ACTION_PUSHBACK_CHAR 2
  72. #define ACTION_PUSHBACK_2CHAR 3
  73. /* The state transition table is a table Table[i,j] with i being the current
  74. state and j being the input sets and the value Table[i,j] being the structure
  75. containing the next state and the action id to be performed. State 0 and 1 are
  76. the starting states when recognizing AttrType and AttrVal respectively.
  77. '(' ')' '|' '&' '!' '~' '=' '<' '>' '*' '\' 'alpha' 'num' '.' 'other' '\0' 'space' ';'
  78. 0 {101,0}, {102,0}, {103,0}, {104,0}, {105,0}, {100,0}, { 3, 0}, { 4, 0}, { 5, 0}, {100,0}, { 6, 1}, { 7, 0}, { 8, 0}, {100,0}, {100,0}, {113,0}, { 0 , 0}, {100,0}, \
  79. 1 {101,0}, {102,0}, {103,0}, {104,0}, {105,0}, { 2, 0}, { 3, 0}, { 4, 0}, { 5, 0}, { 9,0}, {100,0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, {113,0}, { 1, 0}, { 9, 0}, \
  80. 2 {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {106,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}, \
  81. 3 {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {010,0}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107, 2}, {107,2}, \
  82. 4 {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {108,0}, {100,0}, {100,0}, {100,0}, {100,0}, {108,2}, {108,2}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}, \
  83. 5 {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {109,0}, {100,0}, {100,0}, {100,0}, {100,0}, {109,2}, {109,2}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}, \
  84. 6 { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, {100,0}, { 9 , 0}, { 9, 0}, \
  85. 7 {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,0}, {100,0}, {111,2}, { 7, 0}, { 7, 0}, {111,2}, {111,2}, {111,2}, { 7, 0}, { 7, 0}, \
  86. 8 {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {100,0}, {111,2}, {111,2}, { 8, 0}, { 8, 0}, {111,2}, {111,2}, {111, 2}, {111,2}, \
  87. 9 {112,2}, {112,2}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9,0}, { 6, 1}, { 9, 0}, { 9 0}, { 9, 0}, { 9, 0}, {112,2}, { 9 , 0}, { 9, 0}, \
  88. 10 {100,0}, {110,2}, {100,0}, {100,0}, {100,0}, {100,0}, {108,0}, {100,0}, {100,0}, {100,0}, {100,0}, {107,3}, {100,0}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}, \
  89. */
  90. #define gStateTable {\
  91. {{101,0}, {102,0}, {103,0}, {104,0}, {105,0}, {100,0}, { 3, 0}, { 4, 0}, { 5, 0}, {100,0}, { 6, 1}, { 7, 0}, { 8, 0}, {100,0}, {100,0}, {113,0}, { 0 , 0}, {100,0}}, \
  92. {{101,0}, {102,0}, {103,0}, {104,0}, {105,0}, { 2, 0}, { 3, 0}, { 4, 0}, { 5, 0}, { 9,0}, {100,0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, {113,0}, { 1 , 0}, { 9, 0}}, \
  93. {{100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {106,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}}, \
  94. {{107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {012,0}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107, 2}, {107,2}}, \
  95. {{100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {108,0}, {100,0}, {100,0}, {100,0}, {100,0}, {108,2}, {108,2}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}}, \
  96. {{100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {109,0}, {100,0}, {100,0}, {100,0}, {100,0}, {109,2}, {109,2}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}}, \
  97. {{ 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, {100,0}, { 9 , 0}, { 9, 0}}, \
  98. {{111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,0}, {100,0}, {111,2}, { 7, 0}, { 7, 0}, {111,2}, {111,2}, {111,2}, { 7, 0}, { 7, 0}}, \
  99. {{111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {100,0}, {111,2}, {111,2}, { 8, 0}, { 8, 0}, {111,2}, {111,2}, {111, 2}, {111,2}}, \
  100. {{112,2}, {112,2}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9,0}, { 6, 1}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, {112,2}, { 9 , 0}, { 9, 0}}, \
  101. {{100,0}, {110,2}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {107,3}, {100,0}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}}}
  102. // This is the table comtaining the chsracter class to which a particular
  103. // character belongs. This is used to index the state transition table.
  104. // Basivally, for each of the characters possible, this points to one of the
  105. // columns in the state transition table defined above.
  106. // Most of them are 14 indicating that they are 'other'
  107. #define gCharClassTable { \
  108. 15, 14, 14, 14, 14, 14, 14, 14, 14, 16, 16, 16, 16, 16, 14, 14, \
  109. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  110. 16, 4, 14, 14, 14, 14, 3, 14, 0, 1, 9, 14, 14, 14, 13, 14, \
  111. 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 14, 17, 7, 6, 8, 14, \
  112. 14, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \
  113. 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 14, 10, 14, 14, 14, \
  114. 14, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \
  115. 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 14, 2, 14, 5, 14, \
  116. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  117. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  118. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  119. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  120. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  121. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  122. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  123. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  124. }
  125. LPWSTR
  126. RemoveWhiteSpaces(
  127. LPWSTR pszText
  128. );
  129. // structure representing an entry in the DFA;
  130. typedef struct DFA_STATE {
  131. DWORD dwNextState;
  132. DWORD dwActionId;
  133. }DFA_STATE;
  134. //CLexeme maintains the lexeme corresponding to the current token
  135. class CLexeme
  136. {
  137. public:
  138. CLexeme();
  139. HRESULT
  140. PushNextChar(
  141. WCHAR wcNextChar);
  142. HRESULT
  143. PushBackChar();
  144. ~CLexeme();
  145. void
  146. ResetLexeme() { _dwIndex = 0; }
  147. LPWSTR
  148. CLexeme::GetLexeme() { return (RemoveWhiteSpaces(_pszLexeme)); }
  149. private:
  150. LPWSTR _pszLexeme;
  151. DWORD _dwMaxLength;
  152. DWORD _dwIndex;
  153. };
  154. //CQryLexer maintains all the state information and returns the next token
  155. class CQryLexer
  156. {
  157. public:
  158. // Initialize the lexer with the string szBuffer.
  159. CQryLexer(LPWSTR szBuffer);
  160. ~CQryLexer();
  161. // Return the next token and its value.
  162. HRESULT
  163. CQryLexer::GetNextToken(LPWSTR *szToken, LPDWORD pdwToken);
  164. HRESULT
  165. CQryLexer::GetCurrentToken(
  166. LPWSTR *ppszToken,
  167. LPDWORD pdwToken
  168. );
  169. private:
  170. WCHAR
  171. CQryLexer::NextChar();
  172. void
  173. CQryLexer::PushbackChar();
  174. DWORD
  175. CQryLexer::GetCharClass(WCHAR wc) {
  176. if(wc < 256)
  177. return (_pCharClassTable[wc]);
  178. else
  179. // some unicode character; put in the other class.
  180. return (OTHER_CHAR_CLASS);
  181. }
  182. // Given the currentState reached and the character just scanned and the
  183. // action id, perform the action
  184. HRESULT
  185. CQryLexer::PerformAction(
  186. DWORD dwCurrState,
  187. WCHAR wcCurrChar,
  188. DWORD dwActionId
  189. );
  190. DWORD
  191. CQryLexer::GetTokenFromState(
  192. DWORD dwCurrState
  193. );
  194. // The common DFA state transition table for all the instances of the class
  195. static DFA_STATE _pStateTable[][MAX_CHAR_CLASSES];
  196. // The common table mapping the characters to the character classes.
  197. static DWORD _pCharClassTable[];
  198. LPWSTR _Buffer; // String being analysed
  199. LPWSTR _ptr; // pointer to the next character to be analysed.
  200. DFA_STATE _currState; // maintains the state information for the DFA
  201. DWORD _dwState; // maintains the state information for the DFA
  202. DWORD _dwEndofString; // To indicate end of pattern
  203. CLexeme _lexeme;
  204. DWORD _dwStateSave; // maintains the state information for the DFA
  205. BOOL _bInitialized;
  206. BOOL _bGetNext;
  207. };
  208. #endif