Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

528 lines
20 KiB

  1. /************************************************************************/
  2. /* */
  3. /* RCPP - Resource Compiler Pre-Processor for NT system */
  4. /* */
  5. /* P0GETTOK.C - Tokenization routines */
  6. /* */
  7. /* 29-Nov-90 w-BrianM Update for NT from PM SDK RCPP */
  8. /* */
  9. /************************************************************************/
  10. #include "rc.h"
  11. /************************************************************************
  12. ** MAP_TOKEN : a token has two representations and additional information.
  13. ** (ex : const, has basic token of L_CONST,
  14. ** mapped token of [L_TYPE | L_MODIFIER]
  15. ** and info based on what the map token is)
  16. ** MAP_AND_FILL : has two representations, but none of the extra info.
  17. ** (ex : '<', has basic of L_LT, and map of L_RELOP)
  18. ** NOMAP_TOKEN : has 1 representation and additional info.
  19. ** (ex: a string, basic and 'map' type L_STRING and ptrs to the actual str)
  20. ** NOMAP_AND_FILL : has 1 representation and no additional info.
  21. ** (ex : 'while', has basic and 'map' of L_WHILE)
  22. ** the FILL versions fill the token with the basic token type.
  23. ************************************************************************/
  24. #define MAP_TOKEN(otok)\
  25. (Basic_token = (otok), TS_VALUE(Basic_token))
  26. #define MAP_AND_FILL(otok)\
  27. (yylval.yy_token = Basic_token = (otok), TS_VALUE(Basic_token))
  28. #define NOMAP_TOKEN(otok)\
  29. (Basic_token = (otok))
  30. #define NOMAP_AND_FILL(otok)\
  31. (yylval.yy_token = Basic_token = (otok))
  32. /************************************************************************/
  33. /* yylex - main tokenization routine */
  34. /************************************************************************/
  35. token_t
  36. yylex(
  37. void
  38. )
  39. {
  40. REG WCHAR last_mapped;
  41. WCHAR mapped_c;
  42. WCHAR buf[5];
  43. REG token_t lex_token;
  44. for(;;) {
  45. last_mapped = mapped_c = CHARMAP(GETCH());
  46. first_switch:
  47. switch(mapped_c) {
  48. case LX_EACH:
  49. case LX_ASCII:
  50. if (fAFXSymbols && PREVCH() == SYMUSESTART || PREVCH() == SYMDEFSTART
  51. || PREVCH() == SYMDELIMIT) {
  52. myfwrite(&(PREVCH()), sizeof(WCHAR), 1, OUTPUTFILE);
  53. continue;
  54. }
  55. Msg_Temp = GET_MSG(2018);
  56. SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, PREVCH());
  57. error(2018);
  58. continue;
  59. break;
  60. case LX_OBRACE:
  61. return(NOMAP_AND_FILL(L_LCURLY));
  62. break;
  63. case LX_CBRACE:
  64. return(NOMAP_AND_FILL(L_RCURLY));
  65. break;
  66. case LX_OBRACK:
  67. return(NOMAP_AND_FILL(L_LBRACK));
  68. break;
  69. case LX_CBRACK:
  70. return(NOMAP_AND_FILL(L_RBRACK));
  71. break;
  72. case LX_OPAREN:
  73. return(NOMAP_AND_FILL(L_LPAREN));
  74. break;
  75. case LX_CPAREN:
  76. return(NOMAP_AND_FILL(L_RPAREN));
  77. break;
  78. case LX_COMMA:
  79. return(NOMAP_AND_FILL(L_COMMA));
  80. break;
  81. case LX_QUEST:
  82. return(NOMAP_AND_FILL(L_QUEST));
  83. break;
  84. case LX_SEMI:
  85. return(NOMAP_AND_FILL(L_SEMI));
  86. break;
  87. case LX_TILDE:
  88. return(NOMAP_AND_FILL(L_TILDE));
  89. break;
  90. case LX_NUMBER:
  91. return(MAP_TOKEN(getnum(PREVCH())));
  92. break;
  93. case LX_MINUS:
  94. switch(last_mapped = CHARMAP(GETCH())) {
  95. case LX_EQ:
  96. return(MAP_AND_FILL(L_MINUSEQ));
  97. break;
  98. case LX_GT:
  99. return(MAP_AND_FILL(L_POINTSTO));
  100. break;
  101. case LX_MINUS:
  102. return(MAP_AND_FILL(L_DECR));
  103. break;
  104. default:
  105. lex_token = L_MINUS;
  106. break;
  107. }
  108. break;
  109. case LX_PLUS:
  110. switch(last_mapped = CHARMAP(GETCH())) {
  111. case LX_EQ:
  112. return(MAP_AND_FILL(L_PLUSEQ));
  113. break;
  114. case LX_PLUS:
  115. return(MAP_AND_FILL(L_INCR));
  116. break;
  117. default:
  118. lex_token = L_PLUS;
  119. break;
  120. }
  121. break;
  122. case LX_AND:
  123. switch(last_mapped = CHARMAP(GETCH())) {
  124. case LX_EQ:
  125. return(MAP_AND_FILL(L_ANDEQ));
  126. break;
  127. case LX_AND:
  128. return(MAP_AND_FILL(L_ANDAND));
  129. break;
  130. default:
  131. lex_token = L_AND;
  132. break;
  133. }
  134. break;
  135. case LX_OR:
  136. switch(last_mapped = CHARMAP(GETCH())) {
  137. case LX_EQ:
  138. return(MAP_AND_FILL(L_OREQ));
  139. break;
  140. case LX_OR:
  141. return(MAP_AND_FILL(L_OROR));
  142. break;
  143. default:
  144. lex_token = L_OR;
  145. break;
  146. }
  147. break;
  148. case LX_COLON:
  149. return(NOMAP_AND_FILL(L_COLON));
  150. break;
  151. case LX_HAT:
  152. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  153. return(MAP_AND_FILL(L_XOREQ));
  154. }
  155. lex_token = L_XOR;
  156. break;
  157. case LX_PERCENT:
  158. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  159. return(MAP_AND_FILL(L_MODEQ));
  160. }
  161. lex_token = L_MOD;
  162. break;
  163. case LX_EQ:
  164. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  165. return(MAP_AND_FILL(L_EQUALS));
  166. }
  167. lex_token = L_ASSIGN;
  168. break;
  169. case LX_BANG:
  170. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  171. return(MAP_AND_FILL(L_NOTEQ));
  172. }
  173. lex_token = L_EXCLAIM;
  174. break;
  175. case LX_SLASH:
  176. switch(last_mapped = CHARMAP(GETCH())) {
  177. case LX_STAR:
  178. dump_comment();
  179. continue;
  180. break;
  181. case LX_SLASH:
  182. DumpSlashComment();
  183. continue;
  184. break;
  185. case LX_EQ:
  186. return(MAP_AND_FILL(L_DIVEQ));
  187. break;
  188. default:
  189. lex_token = L_DIV;
  190. break;
  191. }
  192. break;
  193. case LX_STAR:
  194. switch(last_mapped = CHARMAP(GETCH())) {
  195. case LX_SLASH:
  196. if( ! Prep ) {
  197. strcpy (Msg_Text, GET_MSG(2138));
  198. error(2138); /* (nested comments) */
  199. } else {
  200. myfwrite(L"*/", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
  201. }
  202. continue;
  203. case LX_EQ:
  204. return(MAP_AND_FILL(L_MULTEQ));
  205. break;
  206. default:
  207. lex_token = L_MULT;
  208. break;
  209. }
  210. break;
  211. case LX_LT:
  212. switch(last_mapped = CHARMAP(GETCH())) {
  213. case LX_LT:
  214. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  215. return(MAP_AND_FILL(L_LSHFTEQ));
  216. }
  217. mapped_c = LX_LSHIFT;
  218. lex_token = L_LSHIFT;
  219. break;
  220. case LX_EQ:
  221. return(MAP_AND_FILL(L_LTEQ));
  222. break;
  223. default:
  224. lex_token = L_LT;
  225. break;
  226. }
  227. break;
  228. case LX_LSHIFT:
  229. /*
  230. ** if the next char is not an =, then we unget and return,
  231. ** since the only way in here is if we broke on the char
  232. ** following '<<'. since we'll have already worked the handle_eos()
  233. ** code prior to getting here, we'll not see another eos,
  234. ** UNLESS i/o buffering is char by char. ???
  235. ** see also, LX_RSHIFT
  236. */
  237. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  238. return(MAP_AND_FILL(L_LSHFTEQ));
  239. }
  240. UNGETCH();
  241. return(MAP_AND_FILL(L_LSHIFT));
  242. break;
  243. case LX_GT:
  244. switch(last_mapped = CHARMAP(GETCH())) {
  245. case LX_EQ:
  246. return(MAP_AND_FILL(L_GTEQ));
  247. case LX_GT:
  248. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  249. return(MAP_AND_FILL(L_RSHFTEQ));
  250. }
  251. mapped_c = LX_RSHIFT;
  252. lex_token = L_RSHIFT;
  253. break;
  254. default:
  255. lex_token = L_GT;
  256. break;
  257. }
  258. break;
  259. case LX_RSHIFT:
  260. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  261. return(MAP_AND_FILL(L_RSHFTEQ));
  262. }
  263. UNGETCH();
  264. return(MAP_AND_FILL(L_RSHIFT));
  265. break;
  266. case LX_POUND:
  267. if( ! Prep ) {
  268. strcpy (Msg_Text, GET_MSG(2014));
  269. error(2014);/* # sign must be first non-whitespace */
  270. UNGETCH(); /* replace it */
  271. Linenumber--; /* do_newline counts a newline */
  272. do_newline(); /* may be a 'real' prepro line */
  273. } else {
  274. myfwrite(L"#", sizeof(WCHAR), 1, OUTPUTFILE);
  275. }
  276. continue;
  277. break;
  278. case LX_EOS:
  279. if(PREVCH() == L'\\') {
  280. if( ! Prep ) {
  281. if( ! checknl()) { /* ignore the new line */
  282. strcpy (Msg_Text, GET_MSG(2017));
  283. error(2017);/* illegal escape sequence */
  284. }
  285. } else {
  286. myfwrite(L"\\", sizeof(WCHAR), 1, OUTPUTFILE);
  287. *buf = get_non_eof();
  288. myfwrite(buf, sizeof(WCHAR), 1, OUTPUTFILE);
  289. }
  290. continue;
  291. }
  292. if(Macro_depth == 0) {
  293. if( ! io_eob()) { /* not the end of the buffer */
  294. continue;
  295. }
  296. if(fpop()) { /* have more files to read */
  297. continue;
  298. }
  299. return(MAP_AND_FILL(L_EOF)); /* all gone . . . */
  300. }
  301. handle_eos(); /* found end of macro */
  302. continue;
  303. break;
  304. case LX_DQUOTE:
  305. if( ! Prep ) {
  306. str_const();
  307. return(NOMAP_TOKEN(L_STRING));
  308. }
  309. prep_string(L'\"');
  310. continue;
  311. break;
  312. case LX_SQUOTE:
  313. if( ! Prep ) {
  314. return(MAP_TOKEN(char_const()));
  315. }
  316. prep_string(L'\'');
  317. continue;
  318. break;
  319. case LX_CR: /* ??? check for nl next */
  320. continue;
  321. break;
  322. case LX_NL:
  323. if(On_pound_line) {
  324. UNGETCH();
  325. return(NOMAP_TOKEN(L_NOTOKEN));
  326. }
  327. if(Prep) {
  328. // must manually write '\r' with '\n' when writing 16-bit strings
  329. myfwrite(L"\r\n", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
  330. }
  331. do_newline();
  332. continue;
  333. break;
  334. case LX_WHITE: /* skip all white space */
  335. if( ! Prep ) { /* check only once */
  336. do {
  337. ;
  338. } while(LXC_IS_WHITE(GETCH()));
  339. }
  340. else {
  341. WCHAR c;
  342. c = PREVCH();
  343. do {
  344. myfwrite(&c, sizeof(WCHAR), 1, OUTPUTFILE);
  345. } while(LXC_IS_WHITE(c = GETCH()));
  346. }
  347. UNGETCH();
  348. continue;
  349. break;
  350. case LX_ILL:
  351. if( ! Prep ) {
  352. Msg_Temp = GET_MSG(2018);
  353. SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, PREVCH());
  354. error(2018);/* unknown character */
  355. } else {
  356. myfwrite(&(PREVCH()), sizeof(WCHAR), 1, OUTPUTFILE);
  357. }
  358. continue;
  359. break;
  360. case LX_BACKSLASH:
  361. if( ! Prep ) {
  362. if( ! checknl()) { /* ignore the new line */
  363. strcpy (Msg_Text, GET_MSG(2017));
  364. error(2017);/* illegal escape sequence */
  365. }
  366. }
  367. else {
  368. myfwrite(L"\\", sizeof(WCHAR), 1, OUTPUTFILE);
  369. *buf = get_non_eof();
  370. myfwrite(buf, sizeof(WCHAR), 1, OUTPUTFILE);
  371. }
  372. continue;
  373. break;
  374. case LX_DOT:
  375. dot_switch:
  376. switch(last_mapped = CHARMAP(GETCH())) {
  377. case LX_BACKSLASH:
  378. if(checknl()) {
  379. goto dot_switch;
  380. }
  381. UNGETCH();
  382. break;
  383. case LX_EOS:
  384. if(handle_eos() == BACKSLASH_EOS) {
  385. break;
  386. }
  387. goto dot_switch;
  388. break;
  389. case LX_DOT:
  390. if( ! checkop(L'.') ) {
  391. strcpy (Msg_Text, GET_MSG(2142));
  392. error(2142);/* ellipsis requires three '.'s */
  393. }
  394. return(NOMAP_AND_FILL(L_ELLIPSIS));
  395. break;
  396. case LX_NUMBER:
  397. /*
  398. ** don't worry about getting correct hash value.
  399. ** The text equivalent of a real number is never
  400. ** hashed
  401. */
  402. Reuse_W[0] = L'.';
  403. Reuse_W[1] = PREVCH();
  404. return(MAP_TOKEN(get_real(&Reuse_W[2])));
  405. break;
  406. }
  407. UNGETCH();
  408. return(MAP_AND_FILL(L_PERIOD));
  409. break;
  410. case LX_NOEXPAND:
  411. SKIPCH(); /* just skip length */
  412. continue;
  413. case LX_ID:
  414. {
  415. pdefn_t pdef;
  416. if(Macro_depth > 0) {
  417. if( ! lex_getid(PREVCH())) {
  418. goto avoid_expand;
  419. }
  420. }
  421. else {
  422. getid(PREVCH());
  423. }
  424. if( ((pdef = get_defined()) != 0)
  425. &&
  426. ( ! DEFN_EXPANDING(pdef))
  427. &&
  428. ( can_expand(pdef))
  429. ) {
  430. continue;
  431. }
  432. avoid_expand:
  433. if( ! Prep ) {
  434. /* M00BUG get near copy of identifier???? */
  435. HLN_NAME(yylval.yy_ident) = Reuse_W;
  436. HLN_HASH(yylval.yy_ident) = Reuse_W_hash;
  437. HLN_LENGTH(yylval.yy_ident) = (UINT)Reuse_W_length;
  438. return(L_IDENT);
  439. } else {
  440. myfwrite(Reuse_W, (Reuse_W_length - 1) * sizeof(WCHAR), 1, OUTPUTFILE);
  441. return(NOMAP_TOKEN(L_NOTOKEN));
  442. }
  443. }
  444. continue;
  445. break;
  446. }
  447. /*
  448. ** all the multichar ( -> -- -= etc ) operands
  449. ** must come through here. we've gotten the next char,
  450. ** and not matched one of the possiblities, but we have to check
  451. ** for the end of the buffer character and act accordingly
  452. ** if it is the eob, then we handle it and go back for another try.
  453. ** otherwise, we unget the char we got, and return the base token.
  454. */
  455. if(last_mapped == LX_EOS) {
  456. if(handle_eos() != BACKSLASH_EOS) {
  457. goto first_switch;
  458. }
  459. }
  460. UNGETCH(); /* cause we got an extra one to check */
  461. return(MAP_AND_FILL(lex_token));
  462. }
  463. }
  464. /************************************************************************
  465. **
  466. ** lex_getid: reads an identifier for the main lexer. The
  467. ** identifier is read into Reuse_W. This function should not handle
  468. ** an end of string if it is rescanning a macro expansion, because
  469. ** this could switch the context with regards to whether the macro
  470. ** is expandable or not. Similarly, the noexpand marker must only be
  471. ** allowed if a macro is being rescanned, otherwise let this character
  472. ** be caught as an illegal character in text
  473. ************************************************************************/
  474. int
  475. lex_getid(
  476. WCHAR c
  477. )
  478. {
  479. REG WCHAR *p;
  480. int length = 0;
  481. p = Reuse_W;
  482. *p++ = c;
  483. c &= HASH_MASK;
  484. for(;;) {
  485. while(LXC_IS_IDENT(*p = GETCH())) { /* collect character */
  486. c += (*p & HASH_MASK); /* hash it */
  487. p++;
  488. }
  489. if(CHARMAP(*p) == LX_NOEXPAND ) {
  490. length = (int)GETCH();
  491. continue;
  492. }
  493. UNGETCH();
  494. break; /* out of for loop - only way out */
  495. }
  496. if(p >= LIMIT(Reuse_W)) { /* is this error # correct? */
  497. strcpy (Msg_Text, GET_MSG(1067));
  498. fatal(1067);
  499. }
  500. if(((p - Reuse_W) > LIMIT_ID_LENGTH) && ( ! Prep )) {
  501. p = Reuse_W + LIMIT_ID_LENGTH;
  502. *p = L'\0';
  503. c = local_c_hash(Reuse_W);
  504. Msg_Temp = GET_MSG(4011);
  505. SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, Reuse_W);
  506. warning(4011); /* id truncated */
  507. } else {
  508. *p = L'\0'; /* terminates identifier for expandable check */
  509. }
  510. Reuse_W_hash = (hash_t)c;
  511. Reuse_W_length = (UINT)((p - Reuse_W) + 1);
  512. return(length != (p - Reuse_W));
  513. }