Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1102 lines
30 KiB

  1. /************************************************************************/
  2. /* */
  3. /* RCPP - Resource Compiler Pre-Processor for NT system */
  4. /* */
  5. /* SCANNER.C - Routines for token scanning */
  6. /* */
  7. /* 29-Nov-90 w-BrianM Update for NT from PM SDK RCPP */
  8. /* */
  9. /************************************************************************/
  10. #include "rc.h"
  11. #define ABS(x) ((x > 0) ? x : -1 * x)
  12. #define ALERT_CHAR L'\007' /* ANSI alert character is ASCII BEL */
  13. ULONG lCPPTotalLinenumber = 0;
  14. extern int vfCurrFileType; //- Added for 16-bit file support.
  15. /************************************************************************/
  16. /* Local Function Prototypes */
  17. /************************************************************************/
  18. token_t c_size(long);
  19. int ctoi(int);
  20. int escape(int);
  21. token_t get_real(PWCHAR);
  22. token_t l_size(long);
  23. long matol(PWCHAR, int);
  24. token_t uc_size(long);
  25. token_t ul_size(long);
  26. void skip_1comment(void);
  27. /************************************************************************/
  28. /* local_c_hash */
  29. /************************************************************************/
  30. hash_t
  31. local_c_hash(
  32. REG WCHAR *name
  33. )
  34. {
  35. REG hash_t i;
  36. i = 0;
  37. while(*name) {
  38. i += (*name & HASH_MASK);
  39. name++;
  40. }
  41. return(i);
  42. }
  43. /************************************************************************
  44. * GETID - Get an identifier or keyword.
  45. * (we know that we're given at least 1 id char)
  46. * in addition, we'll hash the value using 'c'.
  47. ************************************************************************/
  48. void
  49. getid(
  50. REG UINT c
  51. )
  52. {
  53. REG WCHAR *p;
  54. p = Reuse_W;
  55. *p++ = (WCHAR)c;
  56. c &= HASH_MASK;
  57. repeat:
  58. while(LXC_IS_IDENT(*p = GETCH())) { /* while it's an id char . . . */
  59. c += (*p & HASH_MASK); /* hash it */
  60. p++;
  61. }
  62. if(*p != EOS_CHAR) {
  63. if((*p == L'\\') && (checknl())) {
  64. goto repeat;
  65. }
  66. UNGETCH();
  67. if(p >= LIMIT(Reuse_W)) {
  68. strcpy (Msg_Text, GET_MSG (1067));
  69. fatal(1067);
  70. }
  71. if( ((p - Reuse_W) > LIMIT_ID_LENGTH) && ( ! Prep )) {
  72. p = Reuse_W + LIMIT_ID_LENGTH;
  73. *p++ = L'\0';
  74. c = local_c_hash(Reuse_W);
  75. Msg_Temp = GET_MSG (4011);
  76. SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, Reuse_W);
  77. warning(4011); /* id truncated */
  78. } else {
  79. *p++ = L'\0';
  80. }
  81. Reuse_W_hash = (hash_t)c;
  82. Reuse_W_length = (UINT)(p - Reuse_W);
  83. return;
  84. }
  85. if(io_eob()) { /* end of file in middle of id */
  86. strcpy (Msg_Text, GET_MSG (1004));
  87. fatal(1004);
  88. }
  89. goto repeat;
  90. }
  91. /************************************************************************
  92. ** prep_string : outputs char/string constants when preprocessing only
  93. ************************************************************************/
  94. void
  95. prep_string(
  96. REG WCHAR c
  97. )
  98. {
  99. REG WCHAR *p_buf;
  100. int term_char;
  101. p_buf = Reuse_W;
  102. term_char = c;
  103. *p_buf++ = c; /* save the open quote */
  104. for(;;) {
  105. switch(CHARMAP(c = GETCH())) {
  106. case LX_DQUOTE:
  107. case LX_SQUOTE:
  108. if(c == (WCHAR)term_char) {
  109. *p_buf++ = (WCHAR)term_char;/* save the terminating quote */
  110. goto out_of_loop;
  111. }
  112. break;
  113. case LX_BACKSLASH:
  114. *p_buf++ = c;
  115. break;
  116. case LX_CR:
  117. continue;
  118. case LX_NL:
  119. UNGETCH();
  120. goto out_of_loop;
  121. case LX_EOS:
  122. if(c == L'\\') {
  123. *p_buf++ = c;
  124. c = get_non_eof();
  125. break;
  126. }
  127. handle_eos();
  128. continue;
  129. }
  130. *p_buf++ = c;
  131. if(p_buf >= &Reuse_W[MED_BUFFER - 1]) {
  132. *p_buf = L'\0';
  133. myfwrite(Reuse_W, (size_t)(p_buf - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE);
  134. p_buf = Reuse_W;
  135. }
  136. }
  137. out_of_loop:
  138. *p_buf = L'\0';
  139. myfwrite(Reuse_W, (size_t)(p_buf - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE);
  140. }
  141. /************************************************************************
  142. ** char_const : gather up a character constant
  143. ** we're called after finding the openning single quote.
  144. ************************************************************************/
  145. token_t
  146. char_const(
  147. void
  148. )
  149. {
  150. REG WCHAR c;
  151. value_t value;
  152. token_t tok;
  153. tok = (token_t)(Jflag ? L_CUNSIGNED : L_CINTEGER);
  154. first_switch:
  155. switch(CHARMAP(c = GETCH())) {
  156. case LX_BACKSLASH:
  157. break;
  158. case LX_SQUOTE:
  159. strcpy (Msg_Text, GET_MSG (2137)); //"empty character constant"
  160. error(2137);
  161. value.v_long = 0;
  162. UNGETCH();
  163. break;
  164. case LX_EOS: /* ??? assumes i/o buffering > 1 char */
  165. if(handle_eos() != BACKSLASH_EOS) {
  166. goto first_switch;
  167. }
  168. value.v_long = escape(get_non_eof());
  169. if( tok == L_CUNSIGNED ) { /* don't sign extend */
  170. value.v_long &= 0xff;
  171. }
  172. break;
  173. case LX_NL:
  174. /* newline in character constant */
  175. strcpy (Msg_Text, GET_MSG (2001));
  176. error (2001);
  177. UNGETCH();
  178. /*
  179. ** FALLTHROUGH
  180. */
  181. default:
  182. value.v_long = c;
  183. break;
  184. }
  185. if((c = get_non_eof()) != L'\'') {
  186. strcpy (Msg_Text, GET_MSG (2015));
  187. error (2015); /* too many chars in constant */
  188. do {
  189. if(c == L'\n') {
  190. strcpy (Msg_Text, GET_MSG (2016));
  191. error(2016); /* missing closing ' */
  192. break;
  193. }
  194. } while((c = get_non_eof()) != L'\'');
  195. }
  196. yylval.yy_tree = build_const(tok, &value);
  197. return(tok);
  198. }
  199. /************************************************************************
  200. ** str_const : gather up a string constant
  201. ************************************************************************/
  202. void
  203. str_const(
  204. VOID
  205. )
  206. {
  207. REG WCHAR c;
  208. REG PWCHAR p_buf;
  209. int not_warned_yet = TRUE;
  210. p_buf = yylval.yy_string.str_ptr = Macro_buffer;
  211. /*
  212. ** Is it possible that reading this string during a rescan will
  213. ** overwrite the expansion being rescanned? No, because a macro
  214. ** expansion is limited to the top half of Macro_buffer.
  215. ** For Macro_depth > 0, this is like copying the string from
  216. ** somewhere in the top half of Macro_buffer to the bottom half
  217. ** of Macro_buffer.
  218. ** Note that the restriction on the size of an expanded macro is
  219. ** stricter than the limit on an L_STRING length. An expanded
  220. ** macro is limited to around 1019 bytes, but an L_STRING is
  221. ** limited to 2043 bytes.
  222. */
  223. for(;;) {
  224. switch(CHARMAP(c = GETCH())) {
  225. case LX_NL:
  226. UNGETCH();
  227. strcpy (Msg_Text, GET_MSG (2001));
  228. error(2001);
  229. /*
  230. ** FALLTHROUGH
  231. */
  232. case LX_DQUOTE:
  233. *p_buf++ = L'\0';
  234. yylval.yy_string.str_len = (USHORT)(p_buf-yylval.yy_string.str_ptr);
  235. return;
  236. break;
  237. case LX_EOS:
  238. if(handle_eos() != BACKSLASH_EOS) {
  239. continue;
  240. }
  241. if(InInclude) {
  242. break;
  243. }
  244. else {
  245. c = (WCHAR)escape(get_non_eof()); /* process escaped char */
  246. }
  247. break;
  248. }
  249. if(p_buf - Macro_buffer > LIMIT_STRING_LENGTH) {
  250. if( not_warned_yet ) {
  251. strcpy (Msg_Text, GET_MSG (4009));
  252. warning(4009); /* string too big, truncating */
  253. not_warned_yet = FALSE;
  254. }
  255. } else {
  256. *p_buf++ = c;
  257. }
  258. }
  259. }
  260. /************************************************************************
  261. ** do_newline : does work after a newline has been found.
  262. ************************************************************************/
  263. void
  264. do_newline(
  265. void
  266. )
  267. {
  268. ++Linenumber;
  269. for(;;) {
  270. switch(CHARMAP(GETCH())) {
  271. case LX_BOM: // ignore Byte Order Mark
  272. break;
  273. case LX_CR:
  274. break;
  275. case LX_POUND:
  276. preprocess();
  277. break;
  278. case LX_SLASH:
  279. if( ! skip_comment()) {
  280. goto leave_do_newline;
  281. }
  282. break;
  283. case LX_NL:
  284. if ((lCPPTotalLinenumber++ & RC_PREPROCESS_UPDATE) == 0)
  285. UpdateStatus(1, lCPPTotalLinenumber);
  286. Linenumber++;
  287. // must manually write '\r' with '\n' when writing 16-bit strings
  288. if( Prep ) { /* preprocessing only */
  289. myfwrite(L"\r", sizeof(WCHAR), 1, OUTPUTFILE);
  290. }
  291. /*
  292. ** FALLTHROUGH
  293. */
  294. case LX_WHITE:
  295. if( Prep ) { /* preprocessing only, output whitespace */
  296. myfwrite(&(PREVCH()), sizeof(WCHAR), 1, OUTPUTFILE);
  297. } else {
  298. do {
  299. ;
  300. } while(LXC_IS_WHITE(GETCH()));
  301. UNGETCH();
  302. }
  303. break;
  304. case LX_EOS:
  305. if(PREVCH() == EOS_CHAR || PREVCH() == CONTROL_Z) {
  306. if(io_eob()) { /* leaves us pointing at a valid char */
  307. return;
  308. }
  309. break;
  310. }
  311. if(checknl()) {
  312. continue;
  313. }
  314. /* it's a backslash */
  315. /*
  316. ** FALLTHROUGH
  317. */
  318. default: /* first non-white is not a '#', leave */
  319. leave_do_newline:
  320. UNGETCH();
  321. return;
  322. }
  323. }
  324. }
  325. /************************************************************************
  326. * GETNUM - Get a number from the input stream.
  327. *
  328. * ARGUMENTS
  329. * radix - the radix of the number to be accumulated. Can only be 8, 10,
  330. * or 16
  331. * pval - a pointer to a VALUE union to be filled in with the value
  332. *
  333. * RETURNS - type of the token (L_CINTEGER or L_CFLOAT)
  334. *
  335. * SIDE EFFECTS -
  336. * does push back on the input stream.
  337. * writes into pval by reference
  338. * uses buffer Reuse_W
  339. *
  340. * DESCRIPTION -
  341. * Accumulate the number according to the rules for each radix.
  342. * Set up the format string according to the radix (or distinguish
  343. * integer from float if radix is 10) and convert to binary.
  344. *
  345. * AUTHOR - Ralph Ryan, Sept. 8, 1982
  346. *
  347. * MODIFICATIONS - none
  348. *
  349. ************************************************************************/
  350. token_t
  351. getnum(
  352. REG WCHAR c
  353. )
  354. {
  355. REG WCHAR *p;
  356. WCHAR *start;
  357. int radix;
  358. token_t tok;
  359. value_t value;
  360. tok = L_CINTEGER;
  361. start = (Tiny_lexer_nesting ? Exp_ptr : Reuse_W);
  362. p = start;
  363. if( c == L'0' ) {
  364. c = get_non_eof();
  365. if( IS_X(c) ) {
  366. radix = 16;
  367. if( Prep ) {
  368. *p++ = L'0';
  369. *p++ = L'x';
  370. }
  371. for(c = get_non_eof(); LXC_IS_XDIGIT(c); c = get_non_eof()) {
  372. /* no check for overflow? */
  373. *p++ = c;
  374. }
  375. if((p == Reuse_W) && (Tiny_lexer_nesting == 0)) {
  376. strcpy (Msg_Text, GET_MSG (2153));
  377. error(2153);
  378. }
  379. goto check_suffix;
  380. } else {
  381. radix = 8;
  382. *p++ = L'0'; /* for preprocessing or 0.xxx case */
  383. }
  384. } else {
  385. radix = 10;
  386. }
  387. while( LXC_IS_DIGIT((WCHAR)c) ) {
  388. *p++ = c;
  389. c = get_non_eof();
  390. }
  391. if( IS_DOT(c) || IS_E(c) ) {
  392. UNGETCH();
  393. return(get_real(p));
  394. }
  395. check_suffix:
  396. if( IS_EL(c) ) {
  397. if( Prep ) {
  398. *p++ = c;
  399. }
  400. c = get_non_eof();
  401. if( IS_U(c) ) {
  402. if(Prep) {
  403. *p++ = c;
  404. }
  405. tok = L_LONGUNSIGNED;
  406. } else {
  407. tok = L_LONGINT;
  408. UNGETCH();
  409. }
  410. } else if( IS_U(c) ) {
  411. if( Prep ) {
  412. *p++ = c;
  413. }
  414. c = get_non_eof();
  415. if( IS_EL(c) ) {
  416. if( Prep ) {
  417. *p++ = c;
  418. }
  419. tok = L_LONGUNSIGNED;
  420. } else {
  421. tok = L_CUNSIGNED;
  422. UNGETCH();
  423. }
  424. } else {
  425. UNGETCH();
  426. }
  427. *p = L'\0';
  428. if( start == Exp_ptr ) {
  429. Exp_ptr = p;
  430. return(L_NOTOKEN);
  431. } else if( Prep ) {
  432. myfwrite( Reuse_W, (size_t)(p - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE);
  433. return(L_NOTOKEN);
  434. }
  435. value.v_long = matol(Reuse_W,radix);
  436. switch(tok) {
  437. case L_CINTEGER:
  438. tok = (radix == 10)
  439. ? c_size(value.v_long)
  440. : uc_size(value.v_long)
  441. ;
  442. break;
  443. case L_LONGINT:
  444. tok = l_size(value.v_long);
  445. break;
  446. case L_CUNSIGNED:
  447. tok = ul_size(value.v_long);
  448. break;
  449. }
  450. yylval.yy_tree = build_const(tok, &value);
  451. return(tok);
  452. }
  453. /************************************************************************
  454. ** get_real : gathers the real part/exponent of a real number.
  455. ** Input : ptr to the null terminator of the whole part
  456. ** pointer to receive value.
  457. ** Output : L_CFLOAT
  458. **
  459. ** ASSUMES whole part is either at Exp_ptr or Reuse_W.
  460. ************************************************************************/
  461. token_t
  462. get_real(
  463. REG PWCHAR p
  464. )
  465. {
  466. REG int c;
  467. token_t tok;
  468. c = get_non_eof();
  469. if(Cross_compile && (Tiny_lexer_nesting == 0)) {
  470. strcpy (Msg_Text, GET_MSG (4012));
  471. warning(4012); /* float constant in cross compilation */
  472. Cross_compile = FALSE; /* only one msg per file */
  473. }
  474. /*
  475. ** if the next char is a digit, then we've been called after
  476. ** finding a '.'. if this is true, then
  477. ** we want to find the fractional part of the number.
  478. ** if it's a '.', then we've been called after finding
  479. ** a whole part, and we want the fraction.
  480. */
  481. if( LXC_IS_DIGIT((WCHAR)c) || IS_DOT(c) ) {
  482. do {
  483. *p++ = (WCHAR)c;
  484. c = (int)get_non_eof();
  485. } while( LXC_IS_DIGIT((WCHAR)c) );
  486. }
  487. if( IS_E((WCHAR)c) ) { /* now have found the exponent */
  488. *p++ = (WCHAR)c; /* save the 'e' */
  489. c = (WCHAR)get_non_eof(); /* skip it */
  490. if( IS_SIGN(c) ) { /* optional sign */
  491. *p++ = (WCHAR)c; /* save the sign */
  492. c = (int)get_non_eof();
  493. }
  494. if( ! LXC_IS_DIGIT((WCHAR)c)) {
  495. if( ! Rflag ) {
  496. if(Tiny_lexer_nesting == 0) {
  497. Msg_Temp = GET_MSG (2021);
  498. SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, c);
  499. error(2021); /* missing or malformed exponent */
  500. }
  501. *p++ = L'0';
  502. }
  503. } else {
  504. do { /* gather the exponent */
  505. *p++ = (WCHAR)c;
  506. c = (int)get_non_eof();
  507. } while( LXC_IS_DIGIT((WCHAR)c) );
  508. }
  509. }
  510. if( IS_F((WCHAR)c) ) {
  511. tok = L_CFLOAT;
  512. if( Prep ) {
  513. *p++ = (WCHAR)c;
  514. }
  515. } else if( IS_EL((WCHAR)c) ) {
  516. tok = L_CLDOUBLE;
  517. if( Prep ) {
  518. *p++ = (WCHAR)c;
  519. }
  520. } else {
  521. UNGETCH();
  522. tok = L_CDOUBLE;
  523. }
  524. *p = L'\0';
  525. if( Tiny_lexer_nesting > 0 ) {
  526. Exp_ptr = p;
  527. return(L_NOTOKEN);
  528. }
  529. else if( Prep ) {
  530. myfwrite( Reuse_W, (size_t)(p - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE);
  531. return(L_NOTOKEN);
  532. }
  533. /*
  534. ** reals aren't used during preprocessing
  535. */
  536. return(tok);
  537. }
  538. /************************************************************************
  539. ** matol : ascii to long, given a radix.
  540. ************************************************************************/
  541. long
  542. matol(
  543. REG PWCHAR p_start,
  544. REG int radix
  545. )
  546. {
  547. long result, old_result;
  548. unsigned int i;
  549. old_result = result = 0;
  550. while(*p_start) {
  551. result *= radix;
  552. i = ctoi(*p_start);
  553. if( ((int)i >= radix) && (! Prep) ) {
  554. Msg_Temp = GET_MSG (2020);
  555. SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, *p_start, radix);
  556. error(2020); /* illegal digit % for base % */
  557. }
  558. result += i;
  559. p_start++;
  560. if(radix == 10) {
  561. if(result < old_result) {
  562. p_start--; /* fix the string ptr since we have overflowed */
  563. break;
  564. }
  565. } else if(*p_start) {
  566. /*
  567. ** the loop is not finished.
  568. ** we will multiply by the radix again
  569. ** check the upper bits. if they're on, then
  570. ** that mult will overflow the value
  571. */
  572. if(radix == 8) {
  573. if(result & 0xe0000000) {
  574. break;
  575. }
  576. } else if(result & 0xf0000000) {
  577. break;
  578. }
  579. }
  580. old_result = result;
  581. }
  582. if(*p_start) {
  583. strcpy (Msg_Text, GET_MSG (2177));
  584. error(2177); /* constant too big */
  585. result = 0;
  586. }
  587. return(result);
  588. }
  589. /************************************************************************
  590. ** uc_size : returns 'int' or 'long' (virtual unsigned).
  591. ** if their are no bits in the upper part of the value,
  592. ** then it's an int. otherwise, it's a long.
  593. ** this is valid too if target sizeof(int) != sizeof(long).
  594. ** then L_CINTEGER and L_LONGINT are synonymous.
  595. ************************************************************************/
  596. token_t
  597. uc_size(
  598. long value
  599. )
  600. {
  601. return((token_t)((value > INT_MAX) ? L_CUNSIGNED : L_CINTEGER));
  602. }
  603. /************************************************************************
  604. ** c_size : returns 'int' or 'long' for signed numbers.
  605. ** if the sign bit of the lower word is on or any bits
  606. ** in the upper word are on, then we must use 'long'.
  607. ************************************************************************/
  608. token_t
  609. c_size(
  610. long value
  611. )
  612. {
  613. return((token_t)((ABS(value) > INT_MAX) ? L_LONGINT : L_CINTEGER));
  614. }
  615. /************************************************************************
  616. ** l_size : returns 'longint' or 'longunsigned' for long numbers.
  617. ** if the sign bit of the high word is on this is 'longunsigned';
  618. ************************************************************************/
  619. token_t
  620. l_size(
  621. long value
  622. )
  623. {
  624. return((token_t)((value > LONG_MAX) ? L_LONGUNSIGNED : L_LONGINT));
  625. }
  626. /************************************************************************
  627. ** ul_size : returns 'unsigned' or 'longunsigned' for unsigned numbers.
  628. ** if the number can't be represented as unsigned, it is promoted to
  629. ** unsignedlong.
  630. ************************************************************************/
  631. token_t
  632. ul_size(
  633. long value
  634. )
  635. {
  636. return((token_t)((ABS(value) > UINT_MAX-1) ? L_LONGUNSIGNED : L_CUNSIGNED));
  637. }
  638. /************************************************************************
  639. ** ctoi : character to int.
  640. ************************************************************************/
  641. int
  642. ctoi(
  643. int c
  644. )
  645. {
  646. if(LXC_IS_DIGIT((WCHAR)c)) {
  647. return(c - L'0');
  648. } else {
  649. return(towupper((WCHAR)c) - towupper(L'A') + 10);
  650. }
  651. }
  652. /************************************************************************
  653. * ESCAPE - get an escaped character
  654. *
  655. * ARGUMENTS - none
  656. *
  657. * RETURNS - value of escaped character
  658. *
  659. * SIDE EFFECTS - may push back input
  660. *
  661. * DESCRIPTION - An escape ( '\' ) was discovered in the input. Translate
  662. * the next symbol or symbols into an escape sequence.
  663. *
  664. * AUTHOR - Ralph Ryan, Sept. 7, 1982
  665. *
  666. * MODIFICATIONS - none
  667. *
  668. ************************************************************************/
  669. int
  670. escape(
  671. REG int c
  672. )
  673. {
  674. REG int value;
  675. int cnt;
  676. escape_again:
  677. if( LXC_IS_ODIGIT((WCHAR)c) ) {/* \ooo is an octal number, must fit into a byte */
  678. cnt = 1;
  679. for(value = ctoi(c), c = get_non_eof();
  680. (cnt < 3) && LXC_IS_ODIGIT((WCHAR)c);
  681. cnt++, c = get_non_eof()
  682. ) {
  683. value *= 8;
  684. value += ctoi(c);
  685. }
  686. if( ! Prep ) {
  687. if(value > 255) {
  688. Msg_Temp = GET_MSG (2022);
  689. SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, value);
  690. error (2022);
  691. }
  692. }
  693. UNGETCH();
  694. return((char)value);
  695. }
  696. switch( c ) {
  697. case L'a':
  698. return(ALERT_CHAR);
  699. break;
  700. case L'b':
  701. return(L'\b');
  702. break;
  703. case L'f':
  704. return(L'\f');
  705. break;
  706. case L'n':
  707. return fMacRsrcs ? (L'\r') : (L'\n');
  708. break;
  709. case L'r':
  710. return fMacRsrcs ? (L'\n') : (L'\r');
  711. break;
  712. case L't':
  713. return(L'\t');
  714. break;
  715. case L'v':
  716. return(L'\v');
  717. break;
  718. case L'x':
  719. cnt = 0;
  720. value = 0;
  721. c = get_non_eof();
  722. while((cnt < 3) && LXC_IS_XDIGIT((WCHAR)c)) {
  723. value *= 16;
  724. value += ctoi(c);
  725. c = get_non_eof();
  726. cnt++;
  727. }
  728. if(cnt == 0) {
  729. strcpy (Msg_Text, GET_MSG (2153));
  730. error (2153);
  731. }
  732. UNGETCH();
  733. return((char)value); /* cast to get sign extend */
  734. default:
  735. if(c != L'\\') {
  736. return(c);
  737. } else {
  738. if(checknl()) {
  739. c = get_non_eof();
  740. goto escape_again;
  741. } else {
  742. return(c);
  743. }
  744. }
  745. }
  746. }
  747. /************************************************************************
  748. * CHECKOP - Check whether the next input character matches the argument.
  749. *
  750. * ARGUMENTS
  751. * short op - the character to be checked against
  752. *
  753. * RETURNS
  754. * TRUE or FALSE
  755. *
  756. * SIDE EFFECTS
  757. * Will push character back onto the input if there is no match.
  758. *
  759. * DESCRIPTION
  760. * If the next input character matches op, return TRUE. Otherwise
  761. * push it back onto the input.
  762. *
  763. * AUTHOR - Ralph Ryan, Sept. 9, 1982
  764. *
  765. * MODIFICATIONS - none
  766. *
  767. ************************************************************************/
  768. int
  769. checkop(
  770. int op
  771. )
  772. {
  773. if(op == (int)get_non_eof()) {
  774. return(TRUE);
  775. }
  776. UNGETCH();
  777. return(FALSE);
  778. }
  779. /************************************************************************
  780. ** DumpSlashComment : while skipping a comment, output it.
  781. ************************************************************************/
  782. void
  783. DumpSlashComment(
  784. VOID
  785. )
  786. {
  787. if( ! Cflag ) {
  788. skip_NLonly();
  789. return;
  790. }
  791. myfwrite(L"//", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
  792. for(;;) {
  793. WCHAR c;
  794. switch(CHARMAP(c = GETCH())) {
  795. // must manually write '\r' with '\n' when writing 16-bit strings
  796. //case LX_CR:
  797. // continue;
  798. case LX_EOS:
  799. handle_eos();
  800. continue;
  801. case LX_NL:
  802. UNGETCH();
  803. return;
  804. }
  805. myfwrite(&c, sizeof(WCHAR), 1, OUTPUTFILE);
  806. }
  807. }
  808. /************************************************************************
  809. ** dump_comment : while skipping a comment, output it.
  810. ************************************************************************/
  811. void
  812. dump_comment(
  813. void
  814. )
  815. {
  816. if( ! Cflag ) {
  817. skip_1comment();
  818. return;
  819. }
  820. myfwrite(L"/*", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
  821. for(;;) {
  822. WCHAR c;
  823. switch(CHARMAP(c = GETCH())) {
  824. case LX_STAR:
  825. if(checkop(L'/')) {
  826. myfwrite(L"*/", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
  827. return;
  828. }
  829. break;
  830. case LX_EOS:
  831. handle_eos();
  832. continue;
  833. case LX_NL:
  834. Linenumber++;
  835. break; /* output below */
  836. // must manually write '\r' with '\n' when writing 16-bit strings
  837. //case LX_CR:
  838. // continue;
  839. }
  840. myfwrite(&c, sizeof(WCHAR), 1, OUTPUTFILE);
  841. }
  842. }
  843. /************************************************************************/
  844. /* skip_comment() */
  845. /************************************************************************/
  846. int
  847. skip_comment(
  848. void
  849. )
  850. {
  851. if(checkop(L'*')) {
  852. skip_1comment();
  853. return(TRUE);
  854. } else if(checkop(L'/')) {
  855. skip_NLonly();
  856. return(TRUE);
  857. } else {
  858. return(FALSE);
  859. }
  860. }
  861. /************************************************************************
  862. ** skip_1comment : we're called when we're already in a comment.
  863. ** we're looking for the comment close. we also count newlines
  864. ** and output them if we're preprocessing.
  865. ************************************************************************/
  866. void
  867. skip_1comment(
  868. void
  869. )
  870. {
  871. UINT c;
  872. for(;;) {
  873. c = GETCH();
  874. if(c == L'*') {
  875. recheck:
  876. c = GETCH();
  877. if(c == L'/') { /* end of comment */
  878. return;
  879. } else if(c == L'*') {
  880. /*
  881. ** if we get another '*' go back and check for a slash
  882. */
  883. goto recheck;
  884. } else if(c == EOS_CHAR) {
  885. handle_eos();
  886. goto recheck;
  887. }
  888. }
  889. /*
  890. ** note we fall through here. we know this baby is not a '*'
  891. ** we used to unget the char and continue. since we check for
  892. ** another '*' inside the above test, we can fall through here
  893. ** without ungetting/getting and checking again.
  894. */
  895. if(c <= L'\n') {
  896. /*
  897. ** hopefully, the above test is less expensive than doing two tests
  898. */
  899. if(c == L'\n') {
  900. Linenumber++;
  901. if(Prep) {
  902. myfwrite(L"\r\n", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
  903. }
  904. } else if(c == EOS_CHAR) {
  905. handle_eos();
  906. }
  907. }
  908. }
  909. }
  910. /************************************************************************
  911. ** skip_cwhite : while the current character is whitespace or a comment.
  912. ** a newline is NOT whitespace.
  913. ************************************************************************/
  914. WCHAR
  915. skip_cwhite(
  916. void
  917. )
  918. {
  919. REG WCHAR c;
  920. skip_cwhite_again:
  921. while((c = GETCH()) <= L'/') { /* many chars are above this */
  922. if(c == L'/') {
  923. if( ! skip_comment()) {
  924. return(L'/');
  925. }
  926. } else if(c > L' ') { /* char is between '!' and '.' */
  927. return(c);
  928. } else {
  929. switch(CHARMAP(c)) {
  930. case LX_EOS:
  931. handle_eos();
  932. break;
  933. case LX_WHITE:
  934. continue;
  935. break;
  936. case LX_CR:
  937. continue;
  938. break;
  939. default:
  940. return(c);
  941. break;
  942. }
  943. }
  944. }
  945. if((c == L'\\') && (checknl())) {
  946. goto skip_cwhite_again;
  947. }
  948. return(c);
  949. }
  950. /************************************************************************
  951. ** checknl : check for newline, skipping carriage return if there is one.
  952. ** also increments Linenumber, so this should be used by routines which
  953. ** will not push the newline back in such a way that rawtok() will be invoked,
  954. ** find the newline and do another increment.
  955. ************************************************************************/
  956. int
  957. checknl(
  958. void
  959. )
  960. {
  961. REG WCHAR c;
  962. for(;;) {
  963. c = GETCH();
  964. if(c > L'\r') {
  965. UNGETCH();
  966. return(FALSE);
  967. }
  968. switch(c) {
  969. case L'\n':
  970. Linenumber++;
  971. // must manually write '\r' with '\n' when writing 16-bit strings
  972. if( Prep ) {
  973. myfwrite(L"\r\n", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
  974. }
  975. return(TRUE);
  976. break;
  977. case L'\r':
  978. continue;
  979. break;
  980. case EOS_CHAR:
  981. handle_eos();
  982. PREVCH() = L'\\'; /* M00HACK - needs pushback */
  983. continue;
  984. break;
  985. default:
  986. UNGETCH();
  987. return(FALSE);
  988. break;
  989. }
  990. }
  991. }
  992. /************************************************************************
  993. ** get_non_eof : get a real char.
  994. ************************************************************************/
  995. WCHAR
  996. get_non_eof(
  997. void
  998. )
  999. {
  1000. WCHAR c;
  1001. get_non_eof_again:
  1002. while((c = GETCH()) <= L'\r') {
  1003. if(c == L'\r') {
  1004. continue;
  1005. } else if(c != EOS_CHAR) {
  1006. break;
  1007. }
  1008. if(Tiny_lexer_nesting > 0) {
  1009. break;
  1010. }
  1011. handle_eos();
  1012. }
  1013. if((c == L'\\') && (checknl())) {
  1014. goto get_non_eof_again;
  1015. }
  1016. return(c);
  1017. }