Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

325 lines
11 KiB

  1. /*++
  2. Copyright (c) 1995 Microsoft Corporation
  3. Module Name:
  4. lexer.cxx
  5. Abstract:
  6. This file exports the class the class CLexer and other declarations
  7. that recognize the tokens in the string repressentation of the search
  8. filter. The format of the search filter according to the specification of
  9. Minimal SQL Grammar which is a subset of ANSI SQL 92.
  10. Author:
  11. Shankara Shastry [ShankSh] 13-Dec-1996
  12. */
  13. /*
  14. #include <stdlib.h>
  15. #include <nt.h>
  16. #include <ntrtl.h>
  17. #include <nturtl.h>
  18. #include <windows.h>
  19. #include <wtypes.h>
  20. */
  21. #include "dswarn.h"
  22. #include "..\..\..\include\procs.hxx"
  23. //
  24. // chunk of memory allocated for lexeme each time memory is needed.
  25. //
  26. #define LEXEME_UNIT_LENGTH 256
  27. //
  28. // Allowable tokens in the search string
  29. //
  30. #define TOKEN_ERROR 0
  31. #define TOKEN_EQ 1
  32. #define TOKEN_STAR 2
  33. #define TOKEN_LPARAN 3
  34. #define TOKEN_RPARAN 4
  35. #define TOKEN_INTEGER_LITERAL 5
  36. #define TOKEN_REAL_LITERAL 6
  37. #define TOKEN_STRING_LITERAL 7
  38. #define TOKEN_USER_DEFINED_NAME 8
  39. #define TOKEN_COMMA 9
  40. #define TOKEN_LT 10
  41. #define TOKEN_GT 11
  42. #define TOKEN_LE 12
  43. #define TOKEN_GE 13
  44. #define TOKEN_NE 14
  45. #define TOKEN_SELECT 15
  46. #define TOKEN_ALL 16
  47. #define TOKEN_FROM 17
  48. #define TOKEN_WHERE 18
  49. #define TOKEN_BOOLEAN_LITERAL 19
  50. #define TOKEN_AND 20
  51. #define TOKEN_OR 21
  52. #define TOKEN_NOT 22
  53. #define TOKEN_ORDER 23
  54. #define TOKEN_BY 24
  55. #define TOKEN_ASC 25
  56. #define TOKEN_DESC 26
  57. #define TOKEN_END 27
  58. #define TOKEN_START 0
  59. #define MAX_KEYWORD_LEN 20
  60. #define gKWTable { \
  61. L"SELECT", \
  62. L"ALL", \
  63. L"FROM", \
  64. L"WHERE", \
  65. L"AND", \
  66. L"OR", \
  67. L"NOT", \
  68. L"TRUE", \
  69. L"FALSE", \
  70. L"ON", \
  71. L"OFF", \
  72. L"YES", \
  73. L"NO", \
  74. L"ORDER", \
  75. L"BY", \
  76. L"ASC", \
  77. L"DESC", \
  78. L"" }
  79. #define gKW2Token {\
  80. TOKEN_SELECT, \
  81. TOKEN_ALL, \
  82. TOKEN_FROM, \
  83. TOKEN_WHERE, \
  84. TOKEN_AND, \
  85. TOKEN_OR, \
  86. TOKEN_NOT, \
  87. TOKEN_BOOLEAN_LITERAL, \
  88. TOKEN_BOOLEAN_LITERAL, \
  89. TOKEN_BOOLEAN_LITERAL, \
  90. TOKEN_BOOLEAN_LITERAL, \
  91. TOKEN_BOOLEAN_LITERAL, \
  92. TOKEN_BOOLEAN_LITERAL, \
  93. TOKEN_ORDER, \
  94. TOKEN_BY, \
  95. TOKEN_ASC, \
  96. TOKEN_DESC, \
  97. TOKEN_ERROR }
  98. //
  99. // Final states;
  100. //
  101. #define STATE_ERROR 100
  102. #define STATE_EQ 101
  103. #define STATE_STAR 102
  104. #define STATE_LPARAN 103
  105. #define STATE_RPARAN 104
  106. #define STATE_INTEGER_LITERAL 105
  107. #define STATE_REAL_LITERAL 106
  108. #define STATE_STRING_LITERAL 107
  109. #define STATE_USER_DEFINED_NAME 108
  110. #define STATE_COMMA 109
  111. #define STATE_LT 110
  112. #define STATE_GT 111
  113. #define STATE_LE 112
  114. #define STATE_GE 113
  115. #define STATE_NE 114
  116. #define STATE_SELECT 115
  117. #define STATE_ALL 116
  118. #define STATE_FROM 117
  119. #define STATE_WHERE 118
  120. #define STATE_BOOLEAN_LITERAL 119
  121. #define STATE_AND 120
  122. #define STATE_OR 121
  123. #define STATE_NOT 122
  124. #define STATE_ORDER 123
  125. #define STATE_BY 124
  126. #define STATE_ASC 125
  127. #define STATE_DESC 126
  128. #define STATE_END 127
  129. #define START_STATE 0
  130. #define FINAL_STATES_BEGIN 100
  131. #define MAX_DFA_STATES 12 // No. of states in the DFA
  132. // No. of different groups of characters for which the DFA behaves differently
  133. // For eg., all alphabetical characters generate the same behaviour and can be
  134. // considered the same as for DFA is concerned. This is mainly to reduce the
  135. // size of the table.
  136. #define MAX_CHAR_CLASSES 17
  137. // which specifies all other characters not mentioned explicitly.
  138. #define OTHER_CHAR_CLASS 14
  139. //Various actions associated with a particular entry in the DFA table.
  140. #define ACTION_DEFAULT 0
  141. #define ACTION_IGNORE_ESCAPECHAR 1
  142. #define ACTION_PUSHBACK_CHAR 2
  143. /* The state transition table is a table Table[i,j] with i being the current
  144. state and j being the input sets and the value Table[i,j] being the structure
  145. containing the next state and the action id to be performed. State 0 and 1 are
  146. the starting states when recognizing AttrType and AttrVal respectively.*/
  147. /* '<' '>' '=' '*' '(' ')' '+' '-' alpha-{e,E} digit '.' ''' {E,e} 'space' 'other' '\0' ',' */
  148. #define gStateTable {\
  149. /* 0 */ {{ 1, 0}, { 2, 0}, {101,0}, {102,0}, {103,0}, {104,0}, { 4, 0}, { 4, 0}, { 3, 0}, { 5, 0}, { 6, 0}, {10, 1}, { 3, 0}, { 0 ,1}, {100,0}, {127,0}, {109,0}}, \
  150. /* 1 */ {{110,2}, {114,0}, {112,0}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}, {110,2}}, \
  151. /* 2 */ {{111,2}, {111,2}, {113,0}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}}, \
  152. /* 3 */ {{108,2}, {108,2}, {108,2}, { 3,0}, {108,2}, {108,2}, { 3,0}, { 3,0}, { 3, 0}, { 3, 0}, { 3,0}, { 3,0}, { 3, 0}, {108,2}, {108,2}, {108,2}, {108,2}}, \
  153. /* 4 */ {{100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, { 5, 0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,2}, {100,0}}, \
  154. /* 5 */ {{105,2}, {105,2}, {105,2}, {105,2}, {105,2}, {105,2}, {105,2}, {105,2}, {105,2}, { 5, 0}, { 6, 0}, {105,2}, { 8, 0}, {105,2}, {105,2}, {105,2}, {105,2}}, \
  155. /* 6 */ {{100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, { 7, 0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,2}, {100,0}}, \
  156. /* 7 */ {{106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, { 7, 0}, {106,2}, {106,2}, { 8, 0}, {106,2}, {106,2}, {106,2}, {106,2}}, \
  157. /* 8 */ {{100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, { 9, 0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,2}, {100,0}}, \
  158. /* 9 */ {{106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, { 9, 0}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}, {106,2}}, \
  159. /* 10*/ {{ 10,0}, { 10,0}, { 10,0}, { 10,0}, { 10,0}, { 10,0}, { 10,0}, { 10,0}, { 10,0}, { 10,0}, { 10,0}, { 11,1}, { 10,0}, { 10,0}, { 10,0}, {100,2}, { 10,0}}, \
  160. /* 11*/ {{107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, { 10,0}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}}}
  161. // This is the table containing the character class to which a particular
  162. // character belongs. This is used to index the state transition table.
  163. // Basically, for each of the characters possible, this points to one of the
  164. // columns in the state transition table defined above.
  165. // Most of them are 14 indicating that they are 'other'
  166. #define gCharClassTable { \
  167. 15, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 14, 14, \
  168. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  169. 13, 14, 14, 14, 14, 14, 14, 11, 4, 5, 3, 6, 16, 7, 10, 14, \
  170. 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 14, 14, 0, 2, 1, 14, \
  171. 14, 8, 8, 8, 8, 12, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \
  172. 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 14, 14, 14, 14, 14, \
  173. 14, 8, 8, 8, 8, 12, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \
  174. 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 14, 14, 14, 14, 14, \
  175. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  176. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  177. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  178. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  179. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  180. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  181. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  182. 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \
  183. }
  184. // structure representing an entry in the DFA;
  185. typedef struct DFA_STATE {
  186. DWORD dwNextState;
  187. DWORD dwActionId;
  188. }DFA_STATE;
  189. //CLexeme maintains the lexeme corresponding to the current token
  190. class CLexeme
  191. {
  192. public:
  193. CLexeme();
  194. HRESULT
  195. PushNextChar(
  196. WCHAR wcNextChar);
  197. ~CLexeme();
  198. void
  199. ResetLexeme() { _dwIndex = 0; }
  200. LPWSTR
  201. CLexeme::GetLexeme() { return (_pszLexeme); }
  202. private:
  203. LPWSTR _pszLexeme;
  204. DWORD _dwMaxLength;
  205. DWORD _dwIndex;
  206. };
  207. //CLexer maintains all the state information and returns the next token
  208. class CLexer
  209. {
  210. public:
  211. // Initialize the lexer with the string szBuffer.
  212. CLexer(LPWSTR szBuffer);
  213. ~CLexer();
  214. // Return the next token and its value.
  215. HRESULT
  216. CLexer::GetNextToken(LPWSTR *szToken, LPDWORD pdwToken);
  217. HRESULT
  218. CLexer::GetCurrentToken(
  219. LPWSTR *ppszToken,
  220. LPDWORD pdwToken
  221. );
  222. private:
  223. WCHAR
  224. CLexer::NextChar();
  225. void
  226. CLexer::PushbackChar();
  227. DWORD
  228. CLexer::GetCharClass(WCHAR wc) {
  229. if(wc < 256)
  230. return (_pCharClassTable[wc]);
  231. else
  232. // some unicode character; put in the other class.
  233. return (OTHER_CHAR_CLASS);
  234. }
  235. // Given the currentState reached and the character just scanned and the
  236. // action id, perform the action
  237. HRESULT
  238. CLexer::PerformAction(
  239. DWORD dwCurrState,
  240. WCHAR wcCurrChar,
  241. DWORD dwActionId
  242. );
  243. DWORD
  244. CLexer::GetTokenFromState(
  245. DWORD dwCurrState
  246. );
  247. // The common DFA state transition table for all the instances of the class
  248. static DFA_STATE _pStateTable[][MAX_CHAR_CLASSES];
  249. // The common table mapping the characters to the character classes.
  250. static DWORD _pCharClassTable[];
  251. // The table to hold the keywords
  252. static WCHAR _pKeywordTable[][MAX_KEYWORD_LEN];
  253. static DWORD _pKW2Token[];
  254. LPWSTR _Buffer; // String being analysed
  255. LPWSTR _ptr; // pointer to the next character to be analysed.
  256. DFA_STATE _currState; // maintains the state information for the DFA
  257. DWORD _dwState; // maintains the state information for the DFA
  258. DWORD _dwEndofString; // To indicate end of pattern
  259. CLexeme _lexeme;
  260. DWORD _dwStateSave; // maintains the state information for the DFA
  261. BOOL _bInitialized;
  262. };