Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

451 lines
11 KiB

  1. /*++
  2. Copyright (c) 2000 Microsoft Corporation
  3. Module Name:
  4. adllexer.cpp
  5. Abstract:
  6. Implementation of the lexer for the ADL language
  7. Author:
  8. t-eugenz - August 2000
  9. Environment:
  10. User mode only.
  11. Revision History:
  12. Created - August 2000
  13. --*/
  14. #include "adl.h"
  15. //
  16. // Constant values outside WCHAR range, for special characters
  17. //
  18. #define CHAR_COMMA 65538
  19. #define CHAR_QUOTE 65539
  20. #define CHAR_SEMICOLON 65540
  21. #define CHAR_OPENPAREN 65541
  22. #define CHAR_CLOSEPAREN 65542
  23. #define CHAR_NULL 65543
  24. #define CHAR_NEWLINE 65544
  25. #define CHAR_RETURN 65545
  26. #define CHAR_TAB 65546
  27. #define CHAR_SPACE 65547
  28. #define CHAR_AT 65548
  29. #define CHAR_SLASH 65549
  30. #define CHAR_PERIOD 65550
  31. //
  32. // States of the lexer DFA
  33. //
  34. #define STATE_WHITESPACE 0
  35. #define STATE_BEGIN 1
  36. #define STATE_IDENT 2
  37. #define STATE_QUOTE 3
  38. #define STATE_DONE 4
  39. //
  40. // If the character is found in the special character map, use the special
  41. // symbol (>65535), otherwise use the regular character value
  42. //
  43. #define RESOLVE_CHAR(CHAR, MAP, ITER, ITEREND) \
  44. ((((ITER) = (MAP).find((CHAR)) ) == (ITEREND) ) ? (CHAR) : (*(ITER)).second)
  45. AdlLexer::AdlLexer(IN const WCHAR *input,
  46. IN OUT AdlStatement *adlStat,
  47. IN const PADL_LANGUAGE_SPEC pLang)
  48. /*++
  49. Routine Description:
  50. Constructor for the AdlLexer. Initializes the mapping for finding special
  51. characters, and other initial state information
  52. Arguments:
  53. input - The input string
  54. adlStat - The AdlStatement instance, for token garbage collection
  55. pLang - The ADL language description
  56. Return Value:
  57. none
  58. --*/
  59. {
  60. _input = input;
  61. _pLang = pLang;
  62. _adlStat = adlStat;
  63. _position = 0;
  64. _tokCount = 0;
  65. //
  66. // Special character mapping
  67. //
  68. _mapCharCode[_pLang->CH_NULL] = CHAR_NULL;
  69. _mapCharCode[_pLang->CH_SPACE] = CHAR_SPACE;
  70. _mapCharCode[_pLang->CH_TAB] = CHAR_TAB;
  71. _mapCharCode[_pLang->CH_NEWLINE] = CHAR_NEWLINE;
  72. _mapCharCode[_pLang->CH_RETURN] = CHAR_RETURN;
  73. _mapCharCode[_pLang->CH_QUOTE] = CHAR_QUOTE;
  74. _mapCharCode[_pLang->CH_COMMA] = CHAR_COMMA;
  75. _mapCharCode[_pLang->CH_SEMICOLON] = CHAR_SEMICOLON;
  76. _mapCharCode[_pLang->CH_OPENPAREN] = CHAR_OPENPAREN;
  77. _mapCharCode[_pLang->CH_CLOSEPAREN] = CHAR_CLOSEPAREN;
  78. _mapCharCode[_pLang->CH_AT] = CHAR_AT;
  79. _mapCharCode[_pLang->CH_SLASH] = CHAR_SLASH;
  80. _mapCharCode[_pLang->CH_PERIOD] = CHAR_PERIOD;
  81. //
  82. // Only find end of map once
  83. //
  84. _iterEnd = _mapCharCode.end();
  85. //
  86. // Place all special tokens into a map, for O(log n) string searches
  87. //
  88. _mapStringToken[_pLang->SZ_TK_AND] = TK_AND;
  89. _mapStringToken[_pLang->SZ_TK_EXCEPT] = TK_EXCEPT;
  90. _mapStringToken[_pLang->SZ_TK_ON] = TK_ON;
  91. _mapStringToken[_pLang->SZ_TK_ALLOWED] = TK_ALLOWED;
  92. _mapStringToken[_pLang->SZ_TK_AS] = TK_AS;
  93. _mapStringToken[_pLang->SZ_TK_THIS_OBJECT] = TK_THIS_OBJECT;
  94. _mapStringToken[_pLang->SZ_TK_CONTAINERS] = TK_CONTAINERS;
  95. _mapStringToken[_pLang->SZ_TK_OBJECTS] = TK_OBJECTS;
  96. _mapStringToken[_pLang->SZ_TK_CONTAINERS_OBJECTS] = TK_CONTAINERS_OBJECTS;
  97. _mapStringToken[_pLang->SZ_TK_NO_PROPAGATE] = TK_NO_PROPAGATE;
  98. }
  99. DWORD AdlLexer::NextToken(OUT AdlToken **value)
  100. /*++
  101. Routine Description:
  102. This retrieves the next token from the input string. This is basically a
  103. DFA which begins in the WHITESPACE state, and runs until it reaches
  104. the DONE state, at which point it returns a token.
  105. Arguments:
  106. value - Pointer to a new token containing the string value
  107. is stored in *value
  108. Return Value:
  109. DWORD - The token type, as #define'd by YACC in tokens.h
  110. --*/
  111. {
  112. //
  113. // Initial DFA state
  114. //
  115. DWORD state = STATE_WHITESPACE;
  116. DWORD tokType = TK_ERROR;
  117. wstring curToken;
  118. DWORD dwInput;
  119. DWORD dwTokStart = 0;
  120. //
  121. // First token should be the grammar type
  122. //
  123. if( _tokCount == 0 )
  124. {
  125. _tokCount++;
  126. return _pLang->dwLanguageType;
  127. }
  128. dwInput = RESOLVE_CHAR(_input[_position], _mapCharCode, _iter, _iterEnd);
  129. while( state != STATE_DONE )
  130. {
  131. switch( state )
  132. {
  133. case STATE_WHITESPACE:
  134. switch( dwInput )
  135. {
  136. case CHAR_NULL:
  137. tokType = 0;
  138. state = STATE_DONE;
  139. break;
  140. case CHAR_NEWLINE:
  141. _position++;
  142. dwInput = RESOLVE_CHAR(_input[_position],
  143. _mapCharCode,
  144. _iter,
  145. _iterEnd);
  146. break;
  147. case CHAR_RETURN:
  148. _position++;
  149. dwInput = RESOLVE_CHAR(_input[_position],
  150. _mapCharCode,
  151. _iter,
  152. _iterEnd);
  153. break;
  154. case CHAR_SPACE:
  155. _position++;
  156. dwInput = RESOLVE_CHAR(_input[_position],
  157. _mapCharCode,
  158. _iter,
  159. _iterEnd);
  160. break;
  161. case CHAR_TAB:
  162. _position++;
  163. dwInput = RESOLVE_CHAR(_input[_position],
  164. _mapCharCode,
  165. _iter,
  166. _iterEnd);
  167. break;
  168. default:
  169. state = STATE_BEGIN;
  170. break;
  171. }
  172. break;
  173. case STATE_BEGIN:
  174. dwTokStart = _position;
  175. tokType = TK_ERROR;
  176. switch( dwInput )
  177. {
  178. case CHAR_NULL:
  179. state = STATE_DONE;
  180. break;
  181. case CHAR_COMMA:
  182. if( tokType == TK_ERROR )
  183. {
  184. tokType = TK_COMMA;
  185. }
  186. case CHAR_OPENPAREN:
  187. if( tokType == TK_ERROR )
  188. {
  189. tokType = TK_OPENPAREN;
  190. }
  191. case CHAR_CLOSEPAREN:
  192. if( tokType == TK_ERROR )
  193. {
  194. tokType = TK_CLOSEPAREN;
  195. }
  196. case CHAR_SEMICOLON:
  197. if( tokType == TK_ERROR )
  198. {
  199. tokType = TK_SEMICOLON;
  200. }
  201. case CHAR_AT:
  202. if( tokType == TK_ERROR )
  203. {
  204. tokType = TK_AT;
  205. }
  206. case CHAR_SLASH:
  207. if( tokType == TK_ERROR )
  208. {
  209. tokType = TK_SLASH;
  210. }
  211. case CHAR_PERIOD:
  212. if( tokType == TK_ERROR )
  213. {
  214. tokType = TK_PERIOD;
  215. }
  216. //
  217. // Same action for all special single-char tokens
  218. //
  219. curToken.append( &(_input[_position]), 1 );
  220. _position++;
  221. dwInput = RESOLVE_CHAR(_input[_position],
  222. _mapCharCode,
  223. _iter,
  224. _iterEnd);
  225. state = STATE_DONE;
  226. break;
  227. case CHAR_QUOTE:
  228. _position++;
  229. dwInput = RESOLVE_CHAR(_input[_position],
  230. _mapCharCode,
  231. _iter,
  232. _iterEnd);
  233. state = STATE_QUOTE;
  234. tokType = TK_IDENT;
  235. break;
  236. default:
  237. state = STATE_IDENT;
  238. tokType = TK_IDENT;
  239. break;
  240. }
  241. break;
  242. case STATE_IDENT:
  243. switch( dwInput )
  244. {
  245. case CHAR_NULL:
  246. case CHAR_COMMA:
  247. case CHAR_OPENPAREN:
  248. case CHAR_CLOSEPAREN:
  249. case CHAR_SEMICOLON:
  250. case CHAR_NEWLINE:
  251. case CHAR_RETURN:
  252. case CHAR_TAB:
  253. case CHAR_SPACE:
  254. case CHAR_AT:
  255. case CHAR_SLASH:
  256. case CHAR_PERIOD:
  257. case CHAR_QUOTE:
  258. state = STATE_DONE;
  259. break;
  260. default:
  261. curToken.append( &(_input[_position]), 1 );
  262. _position++;
  263. dwInput = RESOLVE_CHAR(_input[_position],
  264. _mapCharCode,
  265. _iter,
  266. _iterEnd);
  267. break;
  268. }
  269. break;
  270. case STATE_QUOTE:
  271. switch( dwInput )
  272. {
  273. case CHAR_NULL:
  274. case CHAR_TAB:
  275. case CHAR_NEWLINE:
  276. case CHAR_RETURN:
  277. throw AdlStatement::ERROR_UNTERMINATED_STRING;
  278. break;
  279. case CHAR_QUOTE:
  280. _position++;
  281. dwInput = RESOLVE_CHAR(_input[_position],
  282. _mapCharCode,
  283. _iter,
  284. _iterEnd);
  285. state = STATE_DONE;
  286. break;
  287. default:
  288. curToken.append( &(_input[_position]), 1 );
  289. _position++;
  290. dwInput = RESOLVE_CHAR(_input[_position],
  291. _mapCharCode,
  292. _iter,
  293. _iterEnd);
  294. break;
  295. }
  296. break;
  297. default:
  298. //
  299. // Should never get here, well-defined states
  300. //
  301. assert(FALSE);
  302. break;
  303. }
  304. }
  305. //
  306. // Done state was reached
  307. // Export the string and column/row info in YACC-form here
  308. //
  309. AdlToken *outVal;
  310. outVal = new AdlToken(curToken.c_str(), dwTokStart, _position - 1);
  311. _adlStat->AddToken(outVal);
  312. //
  313. // Check if the string is a special token, case-insensitive
  314. //
  315. if( _mapStringToken.find(outVal->GetValue()) != _mapStringToken.end() )
  316. {
  317. tokType = _mapStringToken[outVal->GetValue()];
  318. }
  319. *value = outVal;
  320. //
  321. // Set this token to be the error token. This way, if the string is
  322. // not accepted by the parser, we know at which token the parser failed
  323. // If another error occurs later, this value will be overwritten
  324. //
  325. _adlStat->SetErrorToken(outVal);
  326. _tokCount++;
  327. return tokType;
  328. }