Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

557 lines
14 KiB

  1. /************************************************************************/
  2. /* */
  3. /* RCPP - Resource Compiler Pre-Processor for NT system */
  4. /* */
  5. /* P0GETTOK.C - Tokenization routines */
  6. /* */
  7. /* 29-Nov-90 w-BrianM Update for NT from PM SDK RCPP */
  8. /* */
  9. /************************************************************************/
  10. #include <stdio.h>
  11. #include "rcpptype.h"
  12. #include "rcppdecl.h"
  13. #include "rcppext.h"
  14. #include "grammar.h"
  15. #include "p0defs.h"
  16. #include "charmap.h"
  17. /************************************************************************
  18. ** MAP_TOKEN : a token has two representations and additional information.
  19. ** (ex : const, has basic token of L_CONST,
  20. ** mapped token of [L_TYPE | L_MODIFIER]
  21. ** and info based on what the map token is)
  22. ** MAP_AND_FILL : has two representations, but none of the extra info.
  23. ** (ex : '<', has basic of L_LT, and map of L_RELOP)
  24. ** NOMAP_TOKEN : has 1 representation and additional info.
  25. ** (ex: a string, basic and 'map' type L_STRING and ptrs to the actual str)
  26. ** NOMAP_AND_FILL : has 1 representation and no additional info.
  27. ** (ex : 'while', has basic and 'map' of L_WHILE)
  28. ** the FILL versions fill the token with the basic token type.
  29. ************************************************************************/
  30. #define MAP_TOKEN(otok)\
  31. (Basic_token = (otok), TS_VALUE(Basic_token))
  32. #define MAP_AND_FILL(otok)\
  33. (yylval.yy_token = Basic_token = (otok), TS_VALUE(Basic_token))
  34. #define NOMAP_TOKEN(otok)\
  35. (Basic_token = (otok))
  36. #define NOMAP_AND_FILL(otok)\
  37. (yylval.yy_token = Basic_token = (otok))
  38. /************************************************************************/
  39. /* yylex - main tokenization routine */
  40. /************************************************************************/
  41. token_t yylex(void)
  42. {
  43. REG UCHAR last_mapped;
  44. UCHAR mapped_c;
  45. REG token_t lex_token;
  46. for(;;) {
  47. last_mapped = mapped_c = CHARMAP(GETCH());
  48. first_switch:
  49. switch(mapped_c) {
  50. case LX_EACH:
  51. case LX_ASCII:
  52. Msg_Temp = GET_MSG(2018);
  53. SET_MSG (Msg_Text, Msg_Temp, PREVCH());
  54. error(2018);
  55. continue;
  56. break;
  57. case LX_OBRACE:
  58. return(NOMAP_AND_FILL(L_LCURLY));
  59. break;
  60. case LX_CBRACE:
  61. return(NOMAP_AND_FILL(L_RCURLY));
  62. break;
  63. case LX_OBRACK:
  64. return(NOMAP_AND_FILL(L_LBRACK));
  65. break;
  66. case LX_CBRACK:
  67. return(NOMAP_AND_FILL(L_RBRACK));
  68. break;
  69. case LX_OPAREN:
  70. return(NOMAP_AND_FILL(L_LPAREN));
  71. break;
  72. case LX_CPAREN:
  73. return(NOMAP_AND_FILL(L_RPAREN));
  74. break;
  75. case LX_COMMA:
  76. return(NOMAP_AND_FILL(L_COMMA));
  77. break;
  78. case LX_QUEST:
  79. return(NOMAP_AND_FILL(L_QUEST));
  80. break;
  81. case LX_SEMI:
  82. return(NOMAP_AND_FILL(L_SEMI));
  83. break;
  84. case LX_TILDE:
  85. return(NOMAP_AND_FILL(L_TILDE));
  86. break;
  87. case LX_NUMBER:
  88. return(MAP_TOKEN(getnum(PREVCH())));
  89. break;
  90. case LX_MINUS:
  91. switch(last_mapped = CHARMAP(GETCH())) {
  92. case LX_EQ:
  93. return(MAP_AND_FILL(L_MINUSEQ));
  94. break;
  95. case LX_GT:
  96. return(MAP_AND_FILL(L_POINTSTO));
  97. break;
  98. case LX_MINUS:
  99. return(MAP_AND_FILL(L_DECR));
  100. break;
  101. default:
  102. lex_token = L_MINUS;
  103. break;
  104. }
  105. break;
  106. case LX_PLUS:
  107. switch(last_mapped = CHARMAP(GETCH())) {
  108. case LX_EQ:
  109. return(MAP_AND_FILL(L_PLUSEQ));
  110. break;
  111. case LX_PLUS:
  112. return(MAP_AND_FILL(L_INCR));
  113. break;
  114. default:
  115. lex_token = L_PLUS;
  116. break;
  117. }
  118. break;
  119. case LX_AND:
  120. switch(last_mapped = CHARMAP(GETCH())) {
  121. case LX_EQ:
  122. return(MAP_AND_FILL(L_ANDEQ));
  123. break;
  124. case LX_AND:
  125. return(MAP_AND_FILL(L_ANDAND));
  126. break;
  127. default:
  128. lex_token = L_AND;
  129. break;
  130. }
  131. break;
  132. case LX_OR:
  133. switch(last_mapped = CHARMAP(GETCH())) {
  134. case LX_EQ:
  135. return(MAP_AND_FILL(L_OREQ));
  136. break;
  137. case LX_OR:
  138. return(MAP_AND_FILL(L_OROR));
  139. break;
  140. default:
  141. lex_token = L_OR;
  142. break;
  143. }
  144. break;
  145. case LX_COLON:
  146. return(NOMAP_AND_FILL(L_COLON));
  147. break;
  148. case LX_HAT:
  149. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  150. return(MAP_AND_FILL(L_XOREQ));
  151. }
  152. lex_token = L_XOR;
  153. break;
  154. case LX_PERCENT:
  155. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  156. return(MAP_AND_FILL(L_MODEQ));
  157. }
  158. lex_token = L_MOD;
  159. break;
  160. case LX_EQ:
  161. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  162. return(MAP_AND_FILL(L_EQUALS));
  163. }
  164. lex_token = L_ASSIGN;
  165. break;
  166. case LX_BANG:
  167. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  168. return(MAP_AND_FILL(L_NOTEQ));
  169. }
  170. lex_token = L_EXCLAIM;
  171. break;
  172. case LX_SLASH:
  173. switch(last_mapped = CHARMAP(GETCH())) {
  174. case LX_STAR:
  175. dump_comment();
  176. continue;
  177. break;
  178. case LX_SLASH:
  179. DumpSlashComment();
  180. continue;
  181. break;
  182. case LX_EQ:
  183. return(MAP_AND_FILL(L_DIVEQ));
  184. break;
  185. default:
  186. lex_token = L_DIV;
  187. break;
  188. }
  189. break;
  190. case LX_STAR:
  191. switch(last_mapped = CHARMAP(GETCH())) {
  192. case LX_SLASH:
  193. if( ! Prep ) {
  194. Msg_Temp = GET_MSG(2138);
  195. SET_MSG (Msg_Text, Msg_Temp);
  196. error(2138); /* (nested comments) */
  197. }
  198. else {
  199. fwrite("*/", 2, 1, OUTPUTFILE);
  200. }
  201. continue;
  202. case LX_EQ:
  203. return(MAP_AND_FILL(L_MULTEQ));
  204. break;
  205. default:
  206. lex_token = L_MULT;
  207. break;
  208. }
  209. break;
  210. case LX_LT:
  211. switch(last_mapped = CHARMAP(GETCH())) {
  212. case LX_LT:
  213. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  214. return(MAP_AND_FILL(L_LSHFTEQ));
  215. }
  216. mapped_c = LX_LSHIFT;
  217. lex_token = L_LSHIFT;
  218. break;
  219. case LX_EQ:
  220. return(MAP_AND_FILL(L_LTEQ));
  221. break;
  222. default:
  223. lex_token = L_LT;
  224. break;
  225. }
  226. break;
  227. case LX_LSHIFT:
  228. /*
  229. ** if the next char is not an =, then we unget and return,
  230. ** since the only way in here is if we broke on the char
  231. ** following '<<'. since we'll have already worked the handle_eos()
  232. ** code prior to getting here, we'll not see another eos,
  233. ** UNLESS i/o buffering is char by char. ???
  234. ** see also, LX_RSHIFT
  235. */
  236. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  237. return(MAP_AND_FILL(L_LSHFTEQ));
  238. }
  239. UNGETCH();
  240. return(MAP_AND_FILL(L_LSHIFT));
  241. break;
  242. case LX_GT:
  243. switch(last_mapped = CHARMAP(GETCH())) {
  244. case LX_EQ:
  245. return(MAP_AND_FILL(L_GTEQ));
  246. case LX_GT:
  247. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  248. return(MAP_AND_FILL(L_RSHFTEQ));
  249. }
  250. mapped_c = LX_RSHIFT;
  251. lex_token = L_RSHIFT;
  252. break;
  253. default:
  254. lex_token = L_GT;
  255. break;
  256. }
  257. break;
  258. case LX_RSHIFT:
  259. if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
  260. return(MAP_AND_FILL(L_RSHFTEQ));
  261. }
  262. UNGETCH();
  263. return(MAP_AND_FILL(L_RSHIFT));
  264. break;
  265. case LX_POUND:
  266. if( ! Prep ) {
  267. Msg_Temp = GET_MSG(2014);
  268. SET_MSG (Msg_Text, Msg_Temp);
  269. error(2014);/* # sign must be first non-whitespace */
  270. UNGETCH(); /* replace it */
  271. Linenumber--; /* do_newline counts a newline */
  272. do_newline(); /* may be a 'real' prepro line */
  273. }
  274. else {
  275. fwrite("#", 1, 1, OUTPUTFILE);
  276. }
  277. continue;
  278. break;
  279. case LX_EOS:
  280. if(PREVCH() == '\\') {
  281. if( ! Prep ) {
  282. if( ! checknl()) { /* ignore the new line */
  283. Msg_Temp = GET_MSG(2017);
  284. SET_MSG (Msg_Text, Msg_Temp);
  285. error(2017);/* illegal escape sequence */
  286. }
  287. }
  288. else {
  289. fputc('\\', OUTPUTFILE);
  290. fputc(get_non_eof(), OUTPUTFILE);
  291. }
  292. continue;
  293. }
  294. if(Macro_depth == 0) {
  295. if( ! io_eob()) { /* not the end of the buffer */
  296. continue;
  297. }
  298. if(fpop()) { /* have more files to read */
  299. continue;
  300. }
  301. return(MAP_AND_FILL(L_EOF)); /* all gone . . . */
  302. }
  303. handle_eos(); /* found end of macro */
  304. continue;
  305. break;
  306. case LX_DQUOTE:
  307. if( ! Prep ) {
  308. str_const();
  309. return(NOMAP_TOKEN(L_STRING));
  310. }
  311. prep_string('\"');
  312. continue;
  313. break;
  314. case LX_SQUOTE:
  315. if( ! Prep ) {
  316. return(MAP_TOKEN(char_const()));
  317. }
  318. prep_string('\'');
  319. continue;
  320. break;
  321. case LX_CR: /* ??? check for nl next */
  322. continue;
  323. break;
  324. case LX_NL:
  325. if(On_pound_line) {
  326. UNGETCH();
  327. return(NOMAP_TOKEN(L_NOTOKEN));
  328. }
  329. if(Prep) {
  330. fputc('\n', OUTPUTFILE);
  331. }
  332. do_newline();
  333. continue;
  334. break;
  335. case LX_WHITE: /* skip all white space */
  336. if( ! Prep ) { /* check only once */
  337. do {
  338. ;
  339. } while(LXC_IS_WHITE(GETCH()));
  340. }
  341. else {
  342. UCHAR c;
  343. c = PREVCH();
  344. do {
  345. fputc(c, OUTPUTFILE);
  346. } while(LXC_IS_WHITE(c = GETCH()));
  347. }
  348. UNGETCH();
  349. continue;
  350. break;
  351. /* Note:
  352. * RCPP.EXE does not support DBCS code.
  353. * Therefore, we should be displaied error message.
  354. * IBM-J PTR 12JP-0092
  355. * MSHQ PTR xxxxx
  356. */
  357. case LX_LEADBYTE:
  358. if( ! Prep ) { /* check only once */
  359. Msg_Temp = GET_MSG(2018);
  360. SET_MSG (Msg_Text, Msg_Temp, PREVCH());
  361. error(2018);
  362. Msg_Temp = GET_MSG(2018);
  363. SET_MSG (Msg_Text, Msg_Temp, GETCH());
  364. error(2018);
  365. }
  366. else {
  367. fputc(PREVCH(), OUTPUTFILE);
  368. #ifdef DBCS // token_t yylex(void)
  369. fputc(get_non_eof(), OUTPUTFILE);
  370. #else
  371. fputc(GETCH(), OUTPUTFILE);
  372. #endif // DBCS
  373. }
  374. continue;
  375. break;
  376. case LX_ILL:
  377. if( ! Prep ) {
  378. Msg_Temp = GET_MSG(2018);
  379. SET_MSG (Msg_Text, Msg_Temp, PREVCH());
  380. error(2018);/* unknown character */
  381. } else {
  382. fputc(PREVCH(), OUTPUTFILE);
  383. }
  384. continue;
  385. break;
  386. case LX_BACKSLASH:
  387. if( ! Prep ) {
  388. if( ! checknl()) { /* ignore the new line */
  389. Msg_Temp = GET_MSG(2017);
  390. SET_MSG (Msg_Text, Msg_Temp);
  391. error(2017);/* illegal escape sequence */
  392. }
  393. }
  394. else {
  395. fputc('\\', OUTPUTFILE);
  396. fputc(get_non_eof(), OUTPUTFILE);
  397. }
  398. continue;
  399. break;
  400. case LX_DOT:
  401. dot_switch:
  402. switch(last_mapped = CHARMAP(GETCH())) {
  403. case LX_BACKSLASH:
  404. if(checknl()) {
  405. goto dot_switch;
  406. }
  407. UNGETCH();
  408. break;
  409. case LX_EOS:
  410. if(handle_eos() == BACKSLASH_EOS) {
  411. break;
  412. }
  413. goto dot_switch;
  414. break;
  415. case LX_DOT:
  416. if( ! checkop('.') ) {
  417. Msg_Temp = GET_MSG(2142);
  418. SET_MSG (Msg_Text, Msg_Temp);
  419. error(2142);/* ellipsis requires three '.'s */
  420. }
  421. return(NOMAP_AND_FILL(L_ELLIPSIS));
  422. break;
  423. case LX_NUMBER:
  424. /*
  425. ** don't worry about getting correct hash value.
  426. ** The text equivalent of a real number is never
  427. ** hashed
  428. */
  429. Reuse_1[0] = '.';
  430. Reuse_1[1] = PREVCH();
  431. return(MAP_TOKEN(get_real(&Reuse_1[2])));
  432. break;
  433. }
  434. UNGETCH();
  435. return(MAP_AND_FILL(L_PERIOD));
  436. break;
  437. case LX_NOEXPAND:
  438. SKIPCH(); /* just skip length */
  439. continue;
  440. case LX_ID:
  441. {
  442. pdefn_t pdef;
  443. if(Macro_depth > 0) {
  444. if( ! lex_getid(PREVCH())) {
  445. goto avoid_expand;
  446. }
  447. }
  448. else {
  449. getid(PREVCH());
  450. }
  451. if( ((pdef = get_defined()) != 0)
  452. &&
  453. ( ! DEFN_EXPANDING(pdef))
  454. &&
  455. ( can_expand(pdef))
  456. ) {
  457. continue;
  458. }
  459. avoid_expand:
  460. if( ! Prep ) {
  461. /* M00BUG get near copy of identifier???? */
  462. HLN_NAME(yylval.yy_ident) = Reuse_1;
  463. HLN_HASH(yylval.yy_ident) = Reuse_1_hash;
  464. HLN_LENGTH(yylval.yy_ident) = (UCHAR)Reuse_1_length;
  465. return(L_IDENT);
  466. }
  467. else {
  468. fwrite(Reuse_1, Reuse_1_length - 1, 1, OUTPUTFILE);
  469. return(NOMAP_TOKEN(L_NOTOKEN));
  470. }
  471. }
  472. continue;
  473. break;
  474. }
  475. /*
  476. ** all the multichar ( -> -- -= etc ) operands
  477. ** must come through here. we've gotten the next char,
  478. ** and not matched one of the possiblities, but we have to check
  479. ** for the end of the buffer character and act accordingly
  480. ** if it is the eob, then we handle it and go back for another try.
  481. ** otherwise, we unget the char we got, and return the base token.
  482. */
  483. if(last_mapped == LX_EOS) {
  484. if(handle_eos() != BACKSLASH_EOS) {
  485. goto first_switch;
  486. }
  487. }
  488. UNGETCH(); /* cause we got an extra one to check */
  489. return(MAP_AND_FILL(lex_token));
  490. }
  491. }
  492. /************************************************************************
  493. **
  494. ** lex_getid: reads an identifier for the main lexer. The
  495. ** identifier is read into Reuse_1. This function should not handle
  496. ** an end of string if it is rescanning a macro expansion, because
  497. ** this could switch the context with regards to whether the macro
  498. ** is expandable or not. Similarly, the noexpand marker must only be
  499. ** allowed if a macro is being rescanned, otherwise let this character
  500. ** be caught as an illegal character in text
  501. ************************************************************************/
  502. int lex_getid(UCHAR c)
  503. {
  504. REG UCHAR *p;
  505. int length = 0;
  506. p = Reuse_1;
  507. *p++ = c;
  508. c &= HASH_MASK;
  509. for(;;) {
  510. while(LXC_IS_IDENT(*p = GETCH())) { /* collect character */
  511. c += (*p & HASH_MASK); /* hash it */
  512. p++;
  513. }
  514. if(CHARMAP(*p) == LX_NOEXPAND ) {
  515. length = (int)GETCH();
  516. continue;
  517. }
  518. UNGETCH();
  519. break; /* out of for loop - only way out */
  520. }
  521. if(p >= LIMIT(Reuse_1)) { /* is this error # correct? */
  522. Msg_Temp = GET_MSG(1067);
  523. SET_MSG (Msg_Text, Msg_Temp);
  524. fatal(1067);
  525. }
  526. if(((p - Reuse_1) > LIMIT_ID_LENGTH) && ( ! Prep )) {
  527. p = Reuse_1 + LIMIT_ID_LENGTH;
  528. *p = '\0';
  529. c = local_c_hash(Reuse_1);
  530. Msg_Temp = GET_MSG(4011);
  531. SET_MSG (Msg_Text, Msg_Temp, Reuse_1);
  532. warning(4011); /* id truncated */
  533. }
  534. else {
  535. *p = '\0'; /* terminates identifier for expandable check */
  536. }
  537. Reuse_1_hash = c;
  538. Reuse_1_length = (UCHAR)((p - Reuse_1) + 1);
  539. return(length != (p - Reuse_1));
  540. }