Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1132 lines
25 KiB

  1. /************************************************************************/
  2. /* */
  3. /* RCPP - Resource Compiler Pre-Processor for NT system */
  4. /* */
  5. /* SCANNER.C - Routines for token scanning */
  6. /* */
  7. /* 29-Nov-90 w-BrianM Update for NT from PM SDK RCPP */
  8. /* */
  9. /************************************************************************/
  10. #include <stdio.h>
  11. #include <ctype.h>
  12. #include <limits.h>
  13. #include "rcpptype.h"
  14. #include "rcppdecl.h"
  15. #include "rcppext.h"
  16. #include "p0defs.h"
  17. #include "charmap.h"
  18. #include "grammar.h"
  19. #include "rcunicod.h"
  20. #define ABS(x) ((x > 0) ? x : -1 * x)
  21. #define ALERT_CHAR '\007' /* ANSI alert character is ASCII BEL */
  22. extern int vfCurrFileType; //- Added for 16-bit file support.
  23. /************************************************************************/
  24. /* Local Function Prototypes */
  25. /************************************************************************/
  26. token_t c_size(long);
  27. int ctoi(int);
  28. int escape(int);
  29. token_t get_real(PUCHAR);
  30. token_t l_size(long);
  31. long matol(PUCHAR, int);
  32. token_t uc_size(long);
  33. token_t ul_size(long);
  34. void skip_1comment(void);
  35. /************************************************************************/
  36. /* local_c_hash */
  37. /************************************************************************/
  38. hash_t local_c_hash(REG char *name)
  39. {
  40. REG hash_t i;
  41. i = 0;
  42. while(*name) {
  43. i += (*name & HASH_MASK);
  44. name++;
  45. }
  46. return(i);
  47. }
  48. /************************************************************************
  49. * GETID - Get an identifier or keyword.
  50. * (we know that we're given at least 1 id char)
  51. * in addition, we'll hash the value using 'c'.
  52. ************************************************************************/
  53. void getid(REG UINT c)
  54. {
  55. REG UCHAR *p;
  56. p = Reuse_1;
  57. *p++ = (UCHAR)c;
  58. c &= HASH_MASK;
  59. repeat:
  60. while(LXC_IS_IDENT(*p = GETCH())) { /* while it's an id char . . . */
  61. c += (*p & HASH_MASK); /* hash it */
  62. p++;
  63. }
  64. if(*p != EOS_CHAR) {
  65. if((*p == '\\') && (checknl())) {
  66. goto repeat;
  67. }
  68. UNGETCH();
  69. if(p >= LIMIT(Reuse_1)) {
  70. Msg_Temp = GET_MSG (1067);
  71. SET_MSG (Msg_Text, Msg_Temp);
  72. fatal(1067);
  73. }
  74. if( ((p - Reuse_1) > LIMIT_ID_LENGTH) && ( ! Prep )) {
  75. p = Reuse_1 + LIMIT_ID_LENGTH;
  76. *p++ = '\0';
  77. c = local_c_hash(Reuse_1);
  78. Msg_Temp = GET_MSG (4011);
  79. SET_MSG (Msg_Text, Msg_Temp, Reuse_1);
  80. warning(4011); /* id truncated */
  81. }
  82. else {
  83. *p++ = '\0';
  84. }
  85. Reuse_1_hash = (UCHAR)c;
  86. Reuse_1_length = (UCHAR)(p - Reuse_1);
  87. return;
  88. }
  89. if(io_eob()) { /* end of file in middle of id */
  90. Msg_Temp = GET_MSG (1004);
  91. SET_MSG (Msg_Text, Msg_Temp);
  92. fatal(1004);
  93. }
  94. goto repeat;
  95. }
  96. /************************************************************************
  97. ** prep_string : outputs char/string constants when preprocessing only
  98. ************************************************************************/
  99. void prep_string(REG UCHAR c)
  100. {
  101. REG char *p_buf;
  102. int term_char;
  103. p_buf = Reuse_1;
  104. term_char = c;
  105. *p_buf++ = c; /* save the open quote */
  106. //-
  107. //- The following check was added to support 16-bit files.
  108. //- For the 8-bit file, the code has not changed at all.
  109. //-
  110. if (vfCurrFileType == DFT_FILE_IS_8_BIT) {
  111. for(;;) {
  112. switch(CHARMAP(c = GETCH())) {
  113. case LX_DQUOTE:
  114. case LX_SQUOTE:
  115. if(c == (UCHAR)term_char) {
  116. *p_buf++ = (UCHAR)term_char;/* save the terminating quote */
  117. goto out_of_loop;
  118. }
  119. break;
  120. case LX_BACKSLASH:
  121. case LX_LEADBYTE:
  122. *p_buf++ = c;
  123. c = get_non_eof();
  124. break;
  125. case LX_CR:
  126. continue;
  127. case LX_NL:
  128. UNGETCH();
  129. goto out_of_loop;
  130. case LX_EOS:
  131. if(c == '\\') {
  132. *p_buf++ = c;
  133. c = get_non_eof();
  134. break;
  135. }
  136. handle_eos();
  137. continue;
  138. break;
  139. }
  140. *p_buf++ = c;
  141. if(p_buf == &Reuse_1[MED_BUFFER - 1]) {
  142. *p_buf = '\0';
  143. fwrite(Reuse_1, (size_t)(p_buf - Reuse_1), 1, OUTPUTFILE);
  144. p_buf = Reuse_1;
  145. }
  146. }
  147. } else {
  148. WCHAR wchCurr;
  149. for(;;) {
  150. wchCurr = (WCHAR)wchCheckWideChar ();
  151. c = GETCH (); //- Make sure buffers increment normally.
  152. if (wchCurr < 127) {
  153. switch(CHARMAP(c)) {
  154. case LX_DQUOTE:
  155. case LX_SQUOTE:
  156. if(c == (UCHAR)term_char) {
  157. *p_buf++ = (UCHAR)term_char;/* save the quote */
  158. goto out_of_loop;
  159. }
  160. break;
  161. case LX_BACKSLASH:
  162. *p_buf++ = c;
  163. break;
  164. case LX_CR:
  165. continue;
  166. case LX_NL:
  167. UNGETCH();
  168. goto out_of_loop;
  169. case LX_EOS:
  170. handle_eos ();
  171. continue;
  172. }
  173. *p_buf++ = c;
  174. } else {
  175. //- Write out as 6 octal characters.
  176. //- This is the safest way to do it.
  177. *p_buf++ = '\\';
  178. *p_buf++ = (CHAR)('0' + ((wchCurr >> 15) & 7));
  179. *p_buf++ = (CHAR)('0' + ((wchCurr >> 12) & 7));
  180. *p_buf++ = (CHAR)('0' + ((wchCurr >> 9) & 7));
  181. *p_buf++ = (CHAR)('0' + ((wchCurr >> 6) & 7));
  182. *p_buf++ = (CHAR)('0' + ((wchCurr >> 3) & 7));
  183. *p_buf++ = (CHAR)('0' + (wchCurr & 7));
  184. }
  185. if(p_buf > &Reuse_1[MED_BUFFER - 10]) {
  186. *p_buf = '\0';
  187. fwrite(Reuse_1, (size_t)(p_buf - Reuse_1), 1, OUTPUTFILE);
  188. p_buf = Reuse_1;
  189. }
  190. }
  191. }
  192. out_of_loop:
  193. *p_buf = '\0';
  194. fwrite(Reuse_1, (size_t)(p_buf - Reuse_1), 1, OUTPUTFILE);
  195. }
  196. /************************************************************************
  197. ** char_const : gather up a character constant
  198. ** we're called after finding the openning single quote.
  199. ************************************************************************/
  200. token_t char_const(void)
  201. {
  202. REG UCHAR c;
  203. value_t value;
  204. token_t tok;
  205. tok = (token_t)(Jflag ? L_CUNSIGNED : L_CINTEGER);
  206. first_switch:
  207. switch(CHARMAP(c = GETCH())) {
  208. case LX_BACKSLASH:
  209. break;
  210. case LX_SQUOTE:
  211. Msg_Temp = GET_MSG (2137);
  212. SET_MSG (Msg_Text, Msg_Temp); /* empty character constant */
  213. error(2137);
  214. value.v_long = 0;
  215. UNGETCH();
  216. break;
  217. case LX_EOS: /* ??? assumes i/o buffering > 1 char */
  218. if(handle_eos() != BACKSLASH_EOS) {
  219. goto first_switch;
  220. }
  221. value.v_long = escape(get_non_eof());
  222. if( tok == L_CUNSIGNED ) { /* don't sign extend */
  223. value.v_long &= 0xff;
  224. }
  225. break;
  226. case LX_NL:
  227. /* newline in character constant */
  228. Msg_Temp = GET_MSG (2001);
  229. SET_MSG(Msg_Text, Msg_Temp);
  230. error (2001);
  231. UNGETCH();
  232. /*
  233. ** FALLTHROUGH
  234. */
  235. default:
  236. value.v_long = c;
  237. break;
  238. }
  239. if((c = get_non_eof()) != '\'') {
  240. Msg_Temp = GET_MSG (2015);
  241. SET_MSG (Msg_Text, Msg_Temp);
  242. error (2015); /* too many chars in constant */
  243. do {
  244. if(c == '\n') {
  245. Msg_Temp = GET_MSG (2016);
  246. SET_MSG (Msg_Text, Msg_Temp);
  247. error(2016); /* missing closing ' */
  248. break;
  249. }
  250. } while((c = get_non_eof()) != '\'');
  251. }
  252. yylval.yy_tree = build_const(tok, &value);
  253. return(tok);
  254. }
  255. /************************************************************************
  256. ** str_const : gather up a string constant
  257. ************************************************************************/
  258. void str_const(VOID)
  259. {
  260. REG UCHAR c;
  261. REG PUCHAR p_buf;
  262. int not_warned_yet = TRUE;
  263. p_buf = yylval.yy_string.str_ptr = Macro_buffer;
  264. /*
  265. ** Is it possible that reading this string during a rescan will
  266. ** overwrite the expansion being rescanned? No, because a macro
  267. ** expansion is limited to the top half of Macro_buffer.
  268. ** For Macro_depth > 0, this is like copying the string from
  269. ** somewhere in the top half of Macro_buffer to the bottom half
  270. ** of Macro_buffer.
  271. ** Note that the restriction on the size of an expanded macro is
  272. ** stricter than the limit on an L_STRING length. An expanded
  273. ** macro is limited to around 1019 bytes, but an L_STRING is
  274. ** limited to 2043 bytes.
  275. */
  276. for(;;) {
  277. switch(CHARMAP(c = GETCH())) {
  278. case LX_NL:
  279. UNGETCH();
  280. Msg_Temp = GET_MSG (2001);
  281. SET_MSG (Msg_Text, Msg_Temp);
  282. error(2001);
  283. /*
  284. ** FALLTHROUGH
  285. */
  286. case LX_DQUOTE:
  287. *p_buf++ = '\0';
  288. yylval.yy_string.str_len = (USHORT)(p_buf-yylval.yy_string.str_ptr);
  289. return;
  290. break;
  291. case LX_LEADBYTE:
  292. *p_buf++ = c;
  293. c = get_non_eof();
  294. break;
  295. case LX_EOS:
  296. if(handle_eos() != BACKSLASH_EOS) {
  297. continue;
  298. }
  299. if(InInclude) {
  300. break;
  301. }
  302. else {
  303. c = (UCHAR)escape(get_non_eof()); /* process escaped char */
  304. }
  305. break;
  306. }
  307. if(p_buf - Macro_buffer > LIMIT_STRING_LENGTH) {
  308. if( not_warned_yet ) {
  309. Msg_Temp = GET_MSG (4009);
  310. SET_MSG (Msg_Text, Msg_Temp);
  311. warning(4009); /* string too big, truncating */
  312. not_warned_yet = FALSE;
  313. }
  314. }
  315. else {
  316. *p_buf++ = c;
  317. }
  318. }
  319. }
  320. /************************************************************************
  321. ** do_newline : does work after a newline has been found.
  322. ************************************************************************/
  323. void do_newline()
  324. {
  325. ++Linenumber;
  326. for(;;) {
  327. switch(CHARMAP(GETCH())) {
  328. case LX_CR:
  329. break;
  330. case LX_POUND:
  331. preprocess();
  332. break;
  333. case LX_SLASH:
  334. if( ! skip_comment()) {
  335. goto leave_do_newline;
  336. }
  337. break;
  338. case LX_NL:
  339. Linenumber++;
  340. /*
  341. ** FALLTHROUGH
  342. */
  343. case LX_WHITE:
  344. if( Prep ) { /* preprocessing only, output whitespace */
  345. fputc(PREVCH(), OUTPUTFILE);
  346. }
  347. else {
  348. do {
  349. ;
  350. } while(LXC_IS_WHITE(GETCH()));
  351. UNGETCH();
  352. }
  353. break;
  354. case LX_EOS:
  355. if(PREVCH() == EOS_CHAR || PREVCH() == CONTROL_Z) {
  356. if(io_eob()) { /* leaves us pointing at a valid char */
  357. return;
  358. }
  359. break;
  360. }
  361. if(checknl()) {
  362. continue;
  363. }
  364. /* it's a backslash */
  365. /*
  366. ** FALLTHROUGH
  367. */
  368. default: /* first non-white is not a '#', leave */
  369. leave_do_newline:
  370. UNGETCH();
  371. return;
  372. }
  373. }
  374. }
  375. /************************************************************************
  376. * GETNUM - Get a number from the input stream.
  377. *
  378. * ARGUMENTS
  379. * radix - the radix of the number to be accumulated. Can only be 8, 10,
  380. * or 16
  381. * pval - a pointer to a VALUE union to be filled in with the value
  382. *
  383. * RETURNS - type of the token (L_CINTEGER or L_CFLOAT)
  384. *
  385. * SIDE EFFECTS -
  386. * does push back on the input stream.
  387. * writes into pval by reference
  388. * uses buffer Reuse_1
  389. *
  390. * DESCRIPTION -
  391. * Accumulate the number according to the rules for each radix.
  392. * Set up the format string according to the radix (or distinguish
  393. * integer from float if radix is 10) and convert to binary.
  394. *
  395. * AUTHOR - Ralph Ryan, Sept. 8, 1982
  396. *
  397. * MODIFICATIONS - none
  398. *
  399. ************************************************************************/
  400. token_t getnum(REG UCHAR c)
  401. {
  402. REG char *p;
  403. UCHAR *start;
  404. int radix;
  405. token_t tok;
  406. value_t value;
  407. tok = L_CINTEGER;
  408. start = (Tiny_lexer_nesting ? Exp_ptr : Reuse_1);
  409. p = start;
  410. if( c == '0' ) {
  411. c = get_non_eof();
  412. if( IS_X(c) ) {
  413. radix = 16;
  414. if( Prep ) {
  415. *p++ = '0';
  416. *p++ = 'x';
  417. }
  418. for(c = get_non_eof(); LXC_IS_XDIGIT(c); c = get_non_eof()) {
  419. /* no check for overflow? */
  420. *p++ = c;
  421. }
  422. if((p == Reuse_1) && (Tiny_lexer_nesting == 0)) {
  423. Msg_Temp = GET_MSG (2153);
  424. SET_MSG (Msg_Text, Msg_Temp);
  425. error(2153);
  426. }
  427. goto check_suffix;
  428. }
  429. else {
  430. radix = 8;
  431. *p++ = '0'; /* for preprocessing or 0.xxx case */
  432. }
  433. }
  434. else {
  435. radix = 10;
  436. }
  437. while( LXC_IS_DIGIT(c) ) {
  438. *p++ = c;
  439. c = get_non_eof();
  440. }
  441. if( IS_DOT(c) || IS_E(c) ) {
  442. UNGETCH();
  443. return(get_real(p));
  444. }
  445. check_suffix:
  446. if( IS_EL(c) ) {
  447. if( Prep ) {
  448. *p++ = c;
  449. }
  450. c = get_non_eof();
  451. if( IS_U(c) ) {
  452. if(Prep) {
  453. *p++ = c;
  454. }
  455. tok = L_LONGUNSIGNED;
  456. }
  457. else {
  458. tok = L_LONGINT;
  459. UNGETCH();
  460. }
  461. }
  462. else if( IS_U(c) ) {
  463. if( Prep ) {
  464. *p++ = c;
  465. }
  466. c = get_non_eof();
  467. if( IS_EL(c) ) {
  468. if( Prep ) {
  469. *p++ = c;
  470. }
  471. tok = L_LONGUNSIGNED;
  472. }
  473. else {
  474. tok = L_CUNSIGNED;
  475. UNGETCH();
  476. }
  477. }
  478. else {
  479. UNGETCH();
  480. }
  481. *p = '\0';
  482. if( start == Exp_ptr ) {
  483. Exp_ptr = p;
  484. return(L_NOTOKEN);
  485. }
  486. else if( Prep ) {
  487. fwrite( Reuse_1, (size_t)(p - Reuse_1), 1, OUTPUTFILE);
  488. return(L_NOTOKEN);
  489. }
  490. value.v_long = matol(Reuse_1,radix);
  491. switch(tok) {
  492. case L_CINTEGER:
  493. tok = (radix == 10)
  494. ? c_size(value.v_long)
  495. : uc_size(value.v_long)
  496. ;
  497. break;
  498. case L_LONGINT:
  499. tok = l_size(value.v_long);
  500. break;
  501. case L_CUNSIGNED:
  502. tok = ul_size(value.v_long);
  503. break;
  504. }
  505. yylval.yy_tree = build_const(tok, &value);
  506. return(tok);
  507. }
  508. /************************************************************************
  509. ** get_real : gathers the real part/exponent of a real number.
  510. ** Input : ptr to the null terminator of the whole part
  511. ** pointer to receive value.
  512. ** Output : L_CFLOAT
  513. **
  514. ** ASSUMES whole part is either at Exp_ptr or Reuse_1.
  515. ************************************************************************/
  516. token_t get_real(REG PUCHAR p)
  517. {
  518. REG int c;
  519. token_t tok;
  520. c = get_non_eof();
  521. if(Cross_compile && (Tiny_lexer_nesting == 0)) {
  522. Msg_Temp = GET_MSG (4012);
  523. SET_MSG (Msg_Text, Msg_Temp);
  524. warning(4012); /* float constant in cross compilation */
  525. Cross_compile = FALSE; /* only one msg per file */
  526. }
  527. /*
  528. ** if the next char is a digit, then we've been called after
  529. ** finding a '.'. if this is true, then
  530. ** we want to find the fractional part of the number.
  531. ** if it's a '.', then we've been called after finding
  532. ** a whole part, and we want the fraction.
  533. */
  534. if( LXC_IS_DIGIT(c) || IS_DOT(c) ) {
  535. do {
  536. *p++ = (UCHAR)c;
  537. c = (int)get_non_eof();
  538. } while( LXC_IS_DIGIT(c) );
  539. }
  540. if( IS_E(c) ) { /* now have found the exponent */
  541. *p++ = (UCHAR)c; /* save the 'e' */
  542. c = (UCHAR)get_non_eof(); /* skip it */
  543. if( IS_SIGN(c) ) { /* optional sign */
  544. *p++ = (UCHAR)c; /* save the sign */
  545. c = (int)get_non_eof();
  546. }
  547. if( ! LXC_IS_DIGIT(c)) {
  548. if( ! Rflag ) {
  549. if(Tiny_lexer_nesting == 0) {
  550. Msg_Temp = GET_MSG (2021);
  551. SET_MSG (Msg_Text, Msg_Temp, c);
  552. error(2021); /* missing or malformed exponent */
  553. }
  554. *p++ = '0';
  555. }
  556. }
  557. else {
  558. do { /* gather the exponent */
  559. *p++ = (UCHAR)c;
  560. c = (int)get_non_eof();
  561. } while( LXC_IS_DIGIT(c) );
  562. }
  563. }
  564. if( IS_F(c) ) {
  565. tok = L_CFLOAT;
  566. if( Prep ) {
  567. *p++ = (UCHAR)c;
  568. }
  569. }
  570. else if( IS_EL(c) ) {
  571. tok = L_CLDOUBLE;
  572. if( Prep ) {
  573. *p++ = (UCHAR)c;
  574. }
  575. }
  576. else {
  577. UNGETCH();
  578. tok = L_CDOUBLE;
  579. }
  580. *p = '\0';
  581. if( Tiny_lexer_nesting > 0 ) {
  582. Exp_ptr = p;
  583. return(L_NOTOKEN);
  584. }
  585. else if( Prep ) {
  586. fwrite( Reuse_1, (size_t)(p - Reuse_1), 1, OUTPUTFILE);
  587. return(L_NOTOKEN);
  588. }
  589. /*
  590. ** reals aren't used during preprocessing
  591. */
  592. return(tok);
  593. }
  594. /************************************************************************
  595. ** matol : ascii to long, given a radix.
  596. ************************************************************************/
  597. long matol(REG PUCHAR p_start,REG int radix)
  598. {
  599. long result, old_result;
  600. unsigned int i;
  601. old_result = result = 0;
  602. while(*p_start) {
  603. result *= radix;
  604. i = ctoi(*p_start);
  605. if( ((int)i >= radix) && (! Prep) ) {
  606. Msg_Temp = GET_MSG (2020);
  607. SET_MSG (Msg_Text, Msg_Temp, *p_start, radix);
  608. error(2020); /* illegal digit % for base % */
  609. }
  610. result += i;
  611. p_start++;
  612. if(radix == 10) {
  613. if(result < old_result) {
  614. p_start--; /* fix the string ptr since we have overflowed */
  615. break;
  616. }
  617. }
  618. else if(*p_start) {
  619. /*
  620. ** the loop is not finished.
  621. ** we will multiply by the radix again
  622. ** check the upper bits. if they're on, then
  623. ** that mult will overflow the value
  624. */
  625. if(radix == 8) {
  626. if(result & 0xe0000000) {
  627. break;
  628. }
  629. }
  630. else if(result & 0xf0000000) {
  631. break;
  632. }
  633. }
  634. old_result = result;
  635. }
  636. if(*p_start) {
  637. Msg_Temp = GET_MSG (2177);
  638. SET_MSG (Msg_Text, Msg_Temp);
  639. error(2177); /* constant too big */
  640. result = 0;
  641. }
  642. return(result);
  643. }
  644. /************************************************************************
  645. ** uc_size : returns 'int' or 'long' (virtual unsigned).
  646. ** if their are no bits in the upper part of the value,
  647. ** then it's an int. otherwise, it's a long.
  648. ** this is valid too if target sizeof(int) != sizeof(long).
  649. ** then L_CINTEGER and L_LONGINT are synonymous.
  650. ************************************************************************/
  651. token_t uc_size(long value)
  652. {
  653. return((token_t)((value > INT_MAX) ? L_CUNSIGNED : L_CINTEGER));
  654. }
  655. /************************************************************************
  656. ** c_size : returns 'int' or 'long' for signed numbers.
  657. ** if the sign bit of the lower word is on or any bits
  658. ** in the upper word are on, then we must use 'long'.
  659. ************************************************************************/
  660. token_t c_size(long value)
  661. {
  662. return((token_t)((ABS(value) > INT_MAX) ? L_LONGINT : L_CINTEGER));
  663. }
  664. /************************************************************************
  665. ** l_size : returns 'longint' or 'longunsigned' for long numbers.
  666. ** if the sign bit of the high word is on this is 'longunsigned';
  667. ************************************************************************/
  668. token_t l_size(long value)
  669. {
  670. return((token_t)((value > LONG_MAX) ? L_LONGUNSIGNED : L_LONGINT));
  671. }
  672. /************************************************************************
  673. ** ul_size : returns 'unsigned' or 'longunsigned' for unsigned numbers.
  674. ** if the number can't be represented as unsigned, it is promoted to
  675. ** unsignedlong.
  676. ************************************************************************/
  677. token_t ul_size(long value)
  678. {
  679. return((token_t)((ABS(value) > UINT_MAX-1) ? L_LONGUNSIGNED : L_CUNSIGNED));
  680. }
  681. /************************************************************************
  682. ** ctoi : character to int.
  683. ************************************************************************/
  684. int ctoi(int c)
  685. {
  686. if(LXC_IS_DIGIT(c)) {
  687. return(c - '0');
  688. }
  689. else {
  690. return(toupper(c) - toupper('A') + 10);
  691. }
  692. }
  693. /************************************************************************
  694. * ESCAPE - get an escaped character
  695. *
  696. * ARGUMENTS - none
  697. *
  698. * RETURNS - value of escaped character
  699. *
  700. * SIDE EFFECTS - may push back input
  701. *
  702. * DESCRIPTION - An escape ( '\' ) was discovered in the input. Translate
  703. * the next symbol or symbols into an escape sequence.
  704. *
  705. * AUTHOR - Ralph Ryan, Sept. 7, 1982
  706. *
  707. * MODIFICATIONS - none
  708. *
  709. ************************************************************************/
  710. int escape(REG int c)
  711. {
  712. REG int value;
  713. int cnt;
  714. escape_again:
  715. if( LXC_IS_ODIGIT(c) ) {/* \ooo is an octal number, must fit into a byte */
  716. cnt = 1;
  717. for(value = ctoi(c), c = get_non_eof();
  718. (cnt < 3) && LXC_IS_ODIGIT(c);
  719. cnt++, c = get_non_eof()
  720. ) {
  721. value *= 8;
  722. value += ctoi(c);
  723. }
  724. if( ! Prep ) {
  725. if(value > 255) {
  726. Msg_Temp = GET_MSG (2022);
  727. SET_MSG (Msg_Text, Msg_Temp, value);
  728. error (2022);
  729. }
  730. }
  731. UNGETCH();
  732. return((char)value);
  733. }
  734. switch( c ) {
  735. case 'a':
  736. return(ALERT_CHAR);
  737. break;
  738. case 'b':
  739. return('\b');
  740. break;
  741. case 'f':
  742. return('\f');
  743. break;
  744. case 'n':
  745. return('\n');
  746. break;
  747. case 'r':
  748. return('\r');
  749. break;
  750. case 't':
  751. return('\t');
  752. break;
  753. case 'v':
  754. return('\v');
  755. break;
  756. case 'x':
  757. cnt = 0;
  758. value = 0;
  759. c = get_non_eof();
  760. while((cnt < 3) && LXC_IS_XDIGIT(c)) {
  761. value *= 16;
  762. value += ctoi(c);
  763. c = get_non_eof();
  764. cnt++;
  765. }
  766. if(cnt == 0) {
  767. Msg_Temp = GET_MSG (2153);
  768. SET_MSG (Msg_Text, Msg_Temp);
  769. error (2153);
  770. }
  771. UNGETCH();
  772. return((char)value); /* cast to get sign extend */
  773. default:
  774. if(c != '\\') {
  775. return(c);
  776. }
  777. else {
  778. if(checknl()) {
  779. c = get_non_eof();
  780. goto escape_again;
  781. }
  782. else {
  783. return(c);
  784. }
  785. }
  786. }
  787. }
  788. /************************************************************************
  789. * CHECKOP - Check whether the next input character matches the argument.
  790. *
  791. * ARGUMENTS
  792. * short op - the character to be checked against
  793. *
  794. * RETURNS
  795. * TRUE or FALSE
  796. *
  797. * SIDE EFFECTS
  798. * Will push character back onto the input if there is no match.
  799. *
  800. * DESCRIPTION
  801. * If the next input character matches op, return TRUE. Otherwise
  802. * push it back onto the input.
  803. *
  804. * AUTHOR - Ralph Ryan, Sept. 9, 1982
  805. *
  806. * MODIFICATIONS - none
  807. *
  808. ************************************************************************/
  809. int checkop(int op)
  810. {
  811. if(op == (int)get_non_eof()) {
  812. return(TRUE);
  813. }
  814. UNGETCH();
  815. return(FALSE);
  816. }
  817. /************************************************************************
  818. ** DumpSlashComment : while skipping a comment, output it.
  819. ************************************************************************/
  820. void DumpSlashComment(VOID)
  821. {
  822. if( ! Cflag ) {
  823. skip_NLonly();
  824. return;
  825. }
  826. fwrite("//", 2, 1, OUTPUTFILE);
  827. for(;;) {
  828. REG UCHAR c;
  829. switch(CHARMAP(c = GETCH())) {
  830. case LX_CR:
  831. continue;
  832. case LX_EOS:
  833. handle_eos();
  834. continue;
  835. case LX_NL:
  836. UNGETCH();
  837. return;
  838. }
  839. fputc(c, OUTPUTFILE);
  840. }
  841. }
  842. /************************************************************************
  843. ** dump_comment : while skipping a comment, output it.
  844. ************************************************************************/
  845. void dump_comment()
  846. {
  847. if( ! Cflag ) {
  848. skip_1comment();
  849. return;
  850. }
  851. fwrite("/*", 2, 1, OUTPUTFILE);
  852. for(;;) {
  853. REG UCHAR c;
  854. switch(CHARMAP(c = GETCH())) {
  855. case LX_STAR:
  856. if(checkop('/')) {
  857. fwrite("*/", 2, 1, OUTPUTFILE);
  858. return;
  859. }
  860. break;
  861. case LX_EOS:
  862. handle_eos();
  863. continue;
  864. case LX_NL:
  865. Linenumber++;
  866. break; /* output below */
  867. case LX_CR:
  868. continue;
  869. }
  870. fputc(c, OUTPUTFILE);
  871. }
  872. }
  873. /************************************************************************/
  874. /* skip_comment() */
  875. /************************************************************************/
  876. int skip_comment(void)
  877. {
  878. if(checkop('*')) {
  879. skip_1comment();
  880. return(TRUE);
  881. }
  882. else if(checkop('/')) {
  883. skip_NLonly();
  884. return(TRUE);
  885. }
  886. else {
  887. return(FALSE);
  888. }
  889. }
  890. /************************************************************************
  891. ** skip_1comment : we're called when we're already in a comment.
  892. ** we're looking for the comment close. we also count newlines
  893. ** and output them if we're preprocessing.
  894. ************************************************************************/
  895. void skip_1comment(void)
  896. {
  897. UINT c;
  898. for(;;) {
  899. c = GETCH();
  900. if(c == '*') {
  901. recheck:
  902. c = GETCH();
  903. if(c == '/') { /* end of comment */
  904. return;
  905. }
  906. else if(c == '*') {
  907. /*
  908. ** if we get another '*' go back and check for a slash
  909. */
  910. goto recheck;
  911. }
  912. else if(c == EOS_CHAR) {
  913. handle_eos();
  914. goto recheck;
  915. }
  916. }
  917. /*
  918. ** note we fall through here. we know this baby is not a '*'
  919. ** we used to unget the char and continue. since we check for
  920. ** another '*' inside the above test, we can fall through here
  921. ** without ungetting/getting and checking again.
  922. */
  923. if(c <= '\n') {
  924. /*
  925. ** hopefully, the above test is less expensive than doing two tests
  926. */
  927. if(c == '\n') {
  928. Linenumber++;
  929. if(Prep) {
  930. fputc('\n', OUTPUTFILE);
  931. }
  932. }
  933. else if(c == EOS_CHAR) {
  934. handle_eos();
  935. }
  936. }
  937. }
  938. }
  939. /************************************************************************
  940. ** skip_cwhite : while the current character is whitespace or a comment.
  941. ** a newline is NOT whitespace.
  942. ************************************************************************/
  943. UCHAR skip_cwhite(void)
  944. {
  945. REG UCHAR c;
  946. skip_cwhite_again:
  947. while((c = GETCH()) <= '/') { /* many chars are above this */
  948. if(c == '/') {
  949. if( ! skip_comment()) {
  950. return('/');
  951. }
  952. }
  953. else if(c > ' ') { /* char is between '!' and '.' */
  954. return(c);
  955. }
  956. else {
  957. switch(CHARMAP(c)) {
  958. case LX_EOS:
  959. handle_eos();
  960. break;
  961. case LX_WHITE:
  962. continue;
  963. break;
  964. case LX_CR:
  965. continue;
  966. break;
  967. default:
  968. return(c);
  969. break;
  970. }
  971. }
  972. }
  973. if((c == '\\') && (checknl())) {
  974. goto skip_cwhite_again;
  975. }
  976. return(c);
  977. }
  978. /************************************************************************
  979. ** checknl : check for newline, skipping carriage return if there is one.
  980. ** also increments Linenumber, so this should be used by routines which
  981. ** will not push the newline back in such a way that rawtok() will be invoked,
  982. ** find the newline and do another increment.
  983. ************************************************************************/
  984. int checknl(void)
  985. {
  986. REG UCHAR c;
  987. for(;;) {
  988. c = GETCH();
  989. if(c > '\r') {
  990. UNGETCH();
  991. return(FALSE);
  992. }
  993. switch(c) {
  994. case '\n':
  995. Linenumber++;
  996. if( Prep ) {
  997. fputc('\n', OUTPUTFILE);
  998. }
  999. return(TRUE);
  1000. break;
  1001. case '\r':
  1002. continue;
  1003. break;
  1004. case EOS_CHAR:
  1005. handle_eos();
  1006. PREVCH() = '\\'; /* M00HACK - needs pushback */
  1007. continue;
  1008. break;
  1009. default:
  1010. UNGETCH();
  1011. return(FALSE);
  1012. break;
  1013. }
  1014. }
  1015. }
  1016. /************************************************************************
  1017. ** get_non_eof : get a real char.
  1018. ************************************************************************/
  1019. UCHAR get_non_eof(void)
  1020. {
  1021. UCHAR c;
  1022. get_non_eof_again:
  1023. while((c = GETCH()) <= '\r') {
  1024. if(c == '\r') {
  1025. continue;
  1026. }
  1027. else if(c != EOS_CHAR) {
  1028. break;
  1029. }
  1030. if(Tiny_lexer_nesting > 0) {
  1031. break;
  1032. }
  1033. handle_eos();
  1034. }
  1035. if((c == '\\') && (checknl())) {
  1036. goto get_non_eof_again;
  1037. }
  1038. return(c);
  1039. }