Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1383 lines
43 KiB

  1. /* Copyright (C) Boris Nikolaus, Germany, 1996-1997. All rights reserved. */
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <ctype.h>
  5. #include <stdarg.h>
  6. #include <string.h>
  7. #include "defs.h"
  8. #include "parser.h"
  9. int linedirective = 1;
  10. int constargs = 0;
  11. char *conststr = "";
  12. char *ll = "ll";
  13. char *LL = "LL";
  14. int outline = 1;
  15. char outfilename[256];
  16. char incfilename[256];
  17. FILE *fout, *finc;
  18. char *startsym = NULL;
  19. char *prefix = "";
  20. item_t symbols[4096];
  21. int nsymbols = 0;
  22. item_t *items[32768];
  23. int nitems = 0;
  24. char *tags[256];
  25. int ntags = 0;
  26. item_t *check[4096];
  27. int ncheck = 0;
  28. int expected_lr = 0;
  29. int found_lr = 0;
  30. int optimizer = 0;
  31. char *usetypes = NULL;
  32. char *USETYPES = NULL;
  33. /* list for checking for left-recursions */
  34. typedef struct rhslst_s {
  35. struct rhslst_s *next;
  36. struct rhs_s *rhs;
  37. } rhslst_t;
  38. item_t *get_symbol(char *identifier);
  39. char *convert(char *ccode);
  40. int output_rhs2(char *identifier, struct rhs_s *rhs, int *number, int depth, int *count, int *nextcount);
  41. void add_rules2(item_t **istk, int istkp, rhslst_t *c);
  42. void check_lr(item_t *symbol);
  43. #ifndef HAS_GETOPT
  44. extern int getopt(int argc, char **argv, const char *opts);
  45. extern char *optarg;
  46. extern int optind;
  47. #endif
  48. void open_file(char *file);
  49. /* print an error message */
  50. void
  51. error(LLPOS *pos, char *fmt, ...)
  52. {
  53. va_list args;
  54. va_start(args, fmt);
  55. llverror(stderr, pos, fmt, args);
  56. va_end(args);
  57. }
  58. /* error function required by parser generator */
  59. void
  60. llverror(FILE *f, LLPOS *pos, char *fmt, va_list args)
  61. {
  62. if (pos && pos->line && pos->file)
  63. fprintf(f, "Error at line %d of \"%s\": ", pos->line,
  64. pos->file);
  65. vfprintf(f, fmt, args);
  66. putc('\n', f);
  67. exit(1);
  68. }
  69. /* write to c file, count lines for #line directive */
  70. void
  71. output(char *fmt, ...)
  72. {
  73. char buf[32768];
  74. char *p;
  75. va_list args;
  76. va_start(args, fmt);
  77. vsprintf(buf, fmt, args);
  78. fputs(buf, fout);
  79. #if 1
  80. fflush(fout);
  81. #endif
  82. for (p = buf; (p = strchr(p, '\n')); ) {
  83. outline++;
  84. p++;
  85. }
  86. va_end(args);
  87. }
  88. /* emit #line directive for lines of generated file */
  89. void
  90. output_line()
  91. {
  92. fprintf(fout, "#line %d \"%s\"\n", ++outline, outfilename);
  93. }
  94. /* write to h file */
  95. void
  96. incput(char *fmt, ...)
  97. {
  98. va_list args;
  99. va_start(args, fmt);
  100. vfprintf(finc, fmt, args);
  101. #if 1
  102. fflush(fout);
  103. #endif
  104. va_end(args);
  105. }
  106. /* emit a call for one item into c file */
  107. void
  108. output_call(int number, item_t *item, char *args)
  109. {
  110. char *ident;
  111. char *pre;
  112. /* get identifier of item */
  113. if (item->altidentifier)
  114. ident = item->altidentifier;
  115. else
  116. ident = item->identifier;
  117. /* terminal symbol? then use llterm(), otherwise name of non-terminal */
  118. if (!item->isnonterm) {
  119. /* if identifier is a single character, then use these character as */
  120. /* token value, otherwise add prefix to get the name of the #define */
  121. if (*ident == '\'')
  122. pre = "";
  123. else
  124. pre = prefix;
  125. /* call llterm to check the next token; */
  126. /* if item has a tag, use an LLSTYPE argument to get the item's val */
  127. if (item->tag) {
  128. output("%sterm(%s%s, &%slval, &%sstate_%d, &%sstate_%d)",
  129. ll, pre, ident, ll, ll, number - 1, ll, number);
  130. } else {
  131. output("%sterm(%s%s, (%sSTYPE *)0, &%sstate_%d, &%sstate_%d)",
  132. ll, pre, ident, LL, ll, number - 1, ll, number);
  133. }
  134. } else {
  135. /* call non-terminal function for parsing the non-terminal; */
  136. /* if item has a tag, use an tagtyped argument to get the item's val; */
  137. /* if item has arguments, add this argument list */
  138. if (item->tag) {
  139. if (*args)
  140. output("%s_%s(&%satt_%d, &%sstate_%d, &%sstate_%d, %s)",
  141. ll, ident, ll, number, ll, number - 1, ll, number, args);
  142. else
  143. output("%s_%s(&%satt_%d, &%sstate_%d, &%sstate_%d)",
  144. ll, ident, ll, number, ll, number - 1, ll, number);
  145. } else {
  146. if (*args)
  147. output("%s_%s(&%sstate_%d, &%sstate_%d, %s)",
  148. ll, ident, ll, number - 1, ll, number, args);
  149. else
  150. output("%s_%s(&%sstate_%d, &%sstate_%d)",
  151. ll, ident, ll, number - 1, ll, number);
  152. }
  153. }
  154. }
  155. /* emit all actions needed for parsing a rhs */
  156. void
  157. output_rhs(char *identifier, struct rhs_s *rhs)
  158. {
  159. int count[32], nextcount = 2;
  160. int i, number = 1;
  161. count[1] = 1;
  162. /* we need a state and some debugs when entering */
  163. output("%sSTATE %sstate_0;\n", LL, ll);
  164. output("%sDEBUG_ENTER(\"%s\");\n", LL, identifier);
  165. output("\n%sstate_0 = *%sin;\n", ll, ll);
  166. /* one define for the LLFAILED() macro */
  167. output("#undef failed\n#define failed failed1\n");
  168. /* output rhs parsing actions */
  169. i = output_rhs2(identifier, rhs, &number, 1, count, &nextcount);
  170. /* print needed closing braces */
  171. if (i) {
  172. for (; i > 0; i--)
  173. output("}");
  174. output("\n");
  175. }
  176. /* leave (successful) this parsing function */
  177. output("%sDEBUG_LEAVE(\"%s\", 1);\n", LL, identifier);
  178. output("return 1;\n");
  179. /* leave unsuccessful this parsing function */
  180. output("failed1: %sDEBUG_LEAVE(\"%s\", 0);\n", LL, identifier);
  181. output("return 0;\n");
  182. }
  183. /* emit actions needed for parsing a rhs */
  184. int
  185. output_rhs2(char *identifier, struct rhs_s *rhs, int *number, int depth, int *count, int *nextcount)
  186. {
  187. int i, j, n;
  188. item_t *item;
  189. struct rhs_s *r;
  190. /* empty rule? then completed */
  191. if (!rhs) {
  192. output("*%sout = %sstate_%d;\n", ll, ll, *number - 1);
  193. return 0;
  194. }
  195. /* check type of rhs */
  196. switch (rhs->type) {
  197. case eCCode:
  198. /* some C code statements shall be inserted: */
  199. /* prefixed by #line directive if desired, dump the code, and */
  200. /* use another #line directive to reference the generated file again */
  201. /* return 1 brace which need to be closed */
  202. if (linedirective)
  203. output("#line %d \"%s\"\n", rhs->u.ccode.line, rhs->u.ccode.file);
  204. output("{%s\n", convert(rhs->u.ccode.ccode));
  205. if (linedirective)
  206. output_line();
  207. return 1;
  208. case eItem:
  209. /* one item shall be parsed: */
  210. /* get vars for a new state and for the item's value (if needed), */
  211. /* dump the call for parsing the item, branch to the corresponding */
  212. /* failed label if the parsing failed, copy the item's value if */
  213. /* there's any and increment the number of the items */
  214. /* return 1 brace which need to be closed */
  215. output("{%sSTATE %sstate_%d;", LL, ll, *number);
  216. item = get_symbol(rhs->u.item.identifier);
  217. if (item->tag)
  218. output("%s %satt_%d;", item->tag, ll, *number);
  219. output("\nif (!");
  220. output_call(*number, item, rhs->u.item.args);
  221. output(") goto failed%d;\n", count[depth]);
  222. if (!item->isnonterm && item->tag) {
  223. for (i = 0; i < ntags; i++)
  224. if (!strcmp(tags[i], item->tag))
  225. break;
  226. if (i >= ntags)
  227. output("%satt_%d = %slval;\n", ll, *number, ll);
  228. else
  229. output("%satt_%d = %slval._%s;\n", ll, *number, ll, item->tag);
  230. }
  231. (*number)++;
  232. return 1;
  233. case eSequence:
  234. /* a sequence of items shall be parsed: */
  235. /* output all items of this sequence and return the counted number of */
  236. /* braces to be closed */
  237. /* dump copy of last output state before last ccode or at end */
  238. i = j = 0;
  239. for (; rhs; rhs = rhs->u.sequence.next) {
  240. if (!j) {
  241. for (r = rhs; r; r = r->u.sequence.next) {
  242. if (r->u.sequence.element->type != eCCode)
  243. break;
  244. }
  245. if (!r) {
  246. output("*%sout = %sstate_%d;\n", ll, ll, *number - 1);
  247. j = 1;
  248. }
  249. }
  250. i += output_rhs2(identifier, rhs->u.sequence.element, number,
  251. depth, count, nextcount);
  252. }
  253. if (!j)
  254. output("*%sout = %sstate_%d;\n", ll, ll, *number - 1);
  255. return i;
  256. case eAlternative:
  257. /* a list of alternatives shall be parsed: */
  258. /* if there's only one alternative, parse this one alternative */
  259. /* otherwise we need to emit some backtracking code: */
  260. /* - a define for the LLFAILED macro */
  261. /* - a current position into the input stream, */
  262. /* - a current stack position for the backtracking, */
  263. /* - a stack check (and resize if required), */
  264. /* - a switch statement for the alternatives, */
  265. /* - a case entry for each alternative, */
  266. /* - a debug statement for each alternative, */
  267. /* - the actions of each alternative, */
  268. /* - closing braces for the actions */
  269. /* - a default case in the switch statement for failure of parsing */
  270. /* by any alternative */
  271. /* - a failed label for start of backtracking */
  272. /* - code for backtracking (resetting the position into the input */
  273. /* stream, resetting the stack position */
  274. /* - two braces later be closed */
  275. if (!rhs->u.alternative.next)
  276. return output_rhs2(identifier, rhs->u.alternative.element, number,
  277. depth, count, nextcount);
  278. count[depth + 1] = (*nextcount)++;
  279. output("#undef failed\n#define failed failed%d\n",
  280. count[depth + 1]);
  281. output("{unsigned %spos%d = %scpos, %sstp%d = %scstp;\n",
  282. ll, depth, ll, ll, depth, ll);
  283. output("%sCHECKSTK;\n", LL);
  284. output("for (;;) {\n");
  285. output("switch (%sstk[%scstp++]) {\n", ll, ll);
  286. n = *number;
  287. j = 1;
  288. for (; rhs; rhs = rhs->u.alternative.next) {
  289. output("case %d: case -%d:\n", j, j);
  290. output("%sDEBUG_ALTERNATIVE(\"%s\", %d);\n", LL, identifier, j);
  291. i = output_rhs2(identifier, rhs->u.alternative.element, number,
  292. depth + 1, count, nextcount);
  293. output("break;\n");
  294. if (i) {
  295. for (; i > 0; i--)
  296. output("}");
  297. output("\n");
  298. }
  299. *number = n;
  300. j++;
  301. }
  302. output("default:\n");
  303. output("%sstk[--%scstp] = 1;\n", ll, ll);
  304. output("goto failed%d;\n", count[depth]);
  305. output("failed%d:\n", count[depth + 1]);
  306. output("%sDEBUG_BACKTRACKING(\"%s\");\n", LL, identifier);
  307. output("if (%sstk[--%scstp] < 0) %sstk[%scstp] = 0; else %sstk[%scstp]++;\n", ll, ll, ll, ll, ll, ll);
  308. output("%scpos = %spos%d; %scstp = %sstp%d;\n", ll, ll, depth, ll, ll, depth);
  309. output("continue;\n");
  310. output("} break;\n");
  311. return 2;
  312. #if 0
  313. case eBounded:
  314. /* this code does not work due to a design bug I wanted to parse */
  315. /* EBNF repetions; will fix it when I've time or need */
  316. count[depth + 1] = (*nextcount)++;
  317. output("#undef failed\n#define failed failed%d\n",
  318. count[depth + 1]);
  319. output("{unsigned %spos%d = %scpos, %sstp%d = %scstp;\n",
  320. ll, depth, ll, ll, depth, ll);
  321. output("int %sm, %sn, %ss, %sl = %d, %su = ",
  322. ll, ll, ll, ll, rhs->u.bounded.bounds.lower, ll);
  323. if (rhs->u.bounded.bounds.upper)
  324. output("%d;\n", rhs->u.bounded.bounds.upper);
  325. else
  326. output("INT_MAX - 1;\n");
  327. if (rhs->u.bounded.items->type == eNode)
  328. output("int %sf = 1;\n", ll);
  329. output("%sCHECKSTK;\n", LL);
  330. output("%ss = (%sstk[%scstp] > 0 ? 1 : -1);\n", ll, ll, ll);
  331. output("if (!(%sn = %sstk[%scstp++]) || %sn * %ss > %su - %sl + 1) {\n",
  332. ll, ll, ll, ll, ll, ll, ll);
  333. output("%sstk[--%scstp] = 1; %scpos = %spos%d; %scstp = %sstp%d; goto failed%d; }\n",
  334. ll, ll, ll, ll, depth, ll, ll, depth, count[depth]);
  335. output("for (%sm = %sn = %su + 1 - %sn * %ss; %sn; %sn--) {\n",
  336. ll, ll, ll, ll, ll, ll, ll);
  337. output("%sDEBUG_ITERATION(\"%s\", %sm - %sn + 1);\n",
  338. LL, identifier, ll, ll);
  339. n = *number;
  340. if (rhs->u.bounded.items->type == eNode) {
  341. output("if (!%sf) {\n", ll);
  342. i = output_rhs2(identifier, rhs->u.bounded.items->u.node.left,
  343. number, depth + 1, count, nextcount) + 1;
  344. output("%sf = 1;\n", ll);
  345. if (i) {
  346. for (; i > 0; i--)
  347. output("}");
  348. output("\n");
  349. }
  350. i = output_rhs2(identifier, rhs->u.bounded.items->u.node.right,
  351. number, depth + 1, count, nextcount);
  352. if (i) {
  353. for (; i > 0; i--)
  354. output("}");
  355. output("\n");
  356. }
  357. output("%sf = 0;\n", ll);
  358. } else {
  359. i = output_rhs2(identifier, rhs->u.bounded.items, number,
  360. depth + 1, count, nextcount);
  361. if (i) {
  362. for (; i > 0; i--)
  363. output("}");
  364. output("\n");
  365. }
  366. }
  367. *number = n;
  368. output("} failed%d:\n", count[depth + 1]);
  369. if (rhs->u.bounded.items->type == eNode) {
  370. output("if (%sf || %sm - %sn < %sl || (%sstk[%sstp%d] < 0 && %sn)) {\n",
  371. ll, ll, ll, ll, ll, ll, depth, ll);
  372. } else {
  373. output("if (%sm - %sn < %sl || (%sstk[%sstp%d] < 0 && %sn)) {\n",
  374. ll, ll, ll, ll, ll, depth, ll);
  375. }
  376. output("%sstk[%sstp%d] = 1; %scpos = %spos%d; %scstp = %sstp%d; goto failed%d; }\n",
  377. ll, ll, depth, ll, ll, depth, ll, ll, depth, count[depth]);
  378. output("%sstk[%sstp%d] = (%su - %sm + %sn + 1) * %ss;\n",
  379. ll, ll, depth, ll, ll, ll, ll);
  380. return 1;
  381. #endif
  382. }
  383. abort();
  384. /*NOTREACHED*/
  385. }
  386. /* save the name of the start symbol */
  387. void
  388. set_start(char *startstr)
  389. {
  390. startsym = startstr;
  391. }
  392. /* save the prefix to be used for the #defines of the terminals */
  393. void
  394. set_prefix(char *prefixstr)
  395. {
  396. prefix = prefixstr;
  397. }
  398. /* save the prefix to be used for the generated functions, macros and types */
  399. void
  400. set_module(char *modulestr)
  401. {
  402. char *p;
  403. ll = strdup(modulestr);
  404. for (p = ll; *p; p++)
  405. *p = (char)tolower(*p);
  406. LL = strdup(modulestr);
  407. for (p = LL; *p; p++)
  408. *p = (char)toupper(*p);
  409. }
  410. /* find the tag in the list of tag declarations or add it if it's new */
  411. char *
  412. find_tag(char *tag)
  413. {
  414. int i;
  415. for (i = 0; i < ntags; i++)
  416. if (!strcmp(tags[i], tag))
  417. return tags[i];
  418. return tags[ntags++] = tag;
  419. }
  420. /* create an lhs symbol or an terminal symbol */
  421. item_t *
  422. create_symbol(int isnonterm, int isexternal, char *tag, char *identifier, char *altidentifier,
  423. char **args)
  424. {
  425. symbols[nsymbols].isnonterm = isnonterm;
  426. symbols[nsymbols].isexternal = isexternal;
  427. symbols[nsymbols].tag = tag;
  428. symbols[nsymbols].identifier = identifier;
  429. symbols[nsymbols].altidentifier = altidentifier;
  430. symbols[nsymbols].args = args;
  431. symbols[nsymbols].items = NULL;
  432. symbols[nsymbols].empty = 0;
  433. return symbols + nsymbols++;
  434. }
  435. /* search a symbol */
  436. item_t *
  437. find_symbol(char *identifier)
  438. {
  439. int i;
  440. for (i = 0; i < nsymbols; i++) {
  441. if (!strcmp(symbols[i].identifier, identifier))
  442. return symbols + i;
  443. }
  444. return NULL;
  445. }
  446. /* search a symbol or add it if it's new */
  447. item_t *
  448. get_symbol(char *identifier)
  449. {
  450. item_t *item;
  451. char *ident;
  452. item = find_symbol(identifier);
  453. if (!item) {
  454. if (*identifier == '\"') {
  455. ident = strdup(identifier + 1);
  456. ident[strlen(ident) - 1] = 0;
  457. item = create_symbol(0, 0, NULL, identifier, ident, NULL);
  458. } else {
  459. item = create_symbol(*identifier != '\'', 0, NULL, identifier, NULL,
  460. NULL);
  461. }
  462. }
  463. return item;
  464. }
  465. /* start the definition of the rhs of a symbol */
  466. void
  467. start_rule(item_t *symbol)
  468. {
  469. if (!symbol->isnonterm)
  470. error(NULL, "symbol %s is a terminal\n", symbol->identifier);
  471. if (symbol->items)
  472. error(NULL, "symbol %s twice defined\n", symbol->identifier);
  473. symbol->items = items + nitems;
  474. }
  475. /* add rhs items of one alternative to current definition */
  476. void
  477. add_items(item_t **i, int n)
  478. {
  479. if (nitems + n + 1 > sizeof(items) / sizeof(*items))
  480. error(NULL, "out of item space\n");
  481. while (n--)
  482. items[nitems++] = *i++;
  483. items[nitems++] = (item_t *)1; /* end-of-alternative */
  484. }
  485. /* finish current definition */
  486. void
  487. end_rule(item_t *item)
  488. {
  489. if (nitems >= sizeof(items) / sizeof(*items))
  490. error(NULL, "out of item space\n");
  491. items[nitems++] = NULL; /* end-of-definition */
  492. }
  493. /* save the rules for lr-recursion search */
  494. void
  495. add_rules(item_t *item, struct rhs_s *rhs)
  496. {
  497. item_t *istk[64];
  498. rhslst_t l;
  499. start_rule(item);
  500. l.next = NULL;
  501. l.rhs = rhs;
  502. add_rules2(istk, 0, &l);
  503. end_rule(item);
  504. }
  505. /* save the rules for lr-recursion search */
  506. void
  507. add_rules2(item_t **istk, int istkp, rhslst_t *c)
  508. {
  509. struct rhs_s *rhs;
  510. item_t *item;
  511. rhslst_t l, ll;
  512. if (!c) {
  513. add_items(istk, istkp);
  514. return;
  515. }
  516. rhs = c->rhs;
  517. c = c->next;
  518. if (!rhs) {
  519. add_rules2(istk, istkp, c);
  520. return;
  521. }
  522. switch (rhs->type) {
  523. case eCCode:
  524. add_rules2(istk, istkp, c);
  525. break;
  526. case eItem:
  527. item = get_symbol(rhs->u.item.identifier);
  528. istk[istkp++] = item;
  529. add_rules2(istk, istkp, c);
  530. break;
  531. case eSequence:
  532. if (rhs->u.sequence.next) {
  533. l.next = c;
  534. l.rhs = rhs->u.sequence.next;
  535. ll.next = &l;
  536. ll.rhs = rhs->u.sequence.element;
  537. add_rules2(istk, istkp, &ll);
  538. } else {
  539. l.next = c;
  540. l.rhs = rhs->u.sequence.element;
  541. add_rules2(istk, istkp, &l);
  542. }
  543. break;
  544. case eAlternative:
  545. l.next = c;
  546. l.rhs = rhs->u.alternative.element;
  547. add_rules2(istk, istkp, &l);
  548. if (rhs->u.alternative.next) {
  549. l.rhs = rhs->u.alternative.next;
  550. add_rules2(istk, istkp, &l);
  551. }
  552. break;
  553. #if 0
  554. case eBounded:
  555. if (rhs->u.bounded.items->type == eNode) {
  556. if (!rhs->u.bounded.bounds.lower)
  557. add_rules2(istk, istkp, c);
  558. if (rhs->u.bounded.bounds.lower <= 1 &&
  559. (rhs->u.bounded.bounds.upper >= 1 ||
  560. !rhs->u.bounded.bounds.upper)) {
  561. l.next = c;
  562. l.rhs = rhs->u.bounded.items->u.node.right;
  563. add_rules2(istk, istkp, &l);
  564. }
  565. if (rhs->u.bounded.bounds.lower <= 2 &&
  566. (rhs->u.bounded.bounds.upper >= 2 ||
  567. !rhs->u.bounded.bounds.upper)) {
  568. l.next = c;
  569. l.rhs = rhs->u.bounded.items->u.node.right;
  570. ll.next = &l;
  571. ll.rhs = rhs->u.bounded.items->u.node.left;
  572. lll.next = &ll;
  573. lll.rhs = rhs->u.bounded.items->u.node.right;
  574. add_rules2(istk, istkp, &lll);
  575. }
  576. } else {
  577. if (!rhs->u.bounded.bounds.lower)
  578. add_rules2(istk, istkp, c);
  579. if (rhs->u.bounded.bounds.lower <= 1 &&
  580. (rhs->u.bounded.bounds.upper >= 1 ||
  581. !rhs->u.bounded.bounds.upper)) {
  582. l.next = c;
  583. l.rhs = rhs->u.bounded.items;
  584. add_rules2(istk, istkp, &l);
  585. }
  586. }
  587. break;
  588. case eNode:
  589. abort();
  590. #endif
  591. }
  592. }
  593. /* convert some C code containing special vars ($1..$n, $$, $<1..$<n, $<<, */
  594. /* $>1..$>n, $>>, @1..@n, @@) into real C code */
  595. char *
  596. convert(char *ccode)
  597. {
  598. static char buffer[4096];
  599. char *p = buffer;
  600. while (*ccode) {
  601. if (*ccode == '$') {
  602. if (ccode[1] == '$') {
  603. sprintf(p, "(*%sret)", ll);
  604. p += strlen(p);
  605. ccode += 2;
  606. continue;
  607. } else if (ccode[1] == '<') {
  608. if (ccode[2] == '<') {
  609. sprintf(p, "(*%sin)", ll);
  610. p += strlen(p);
  611. ccode += 3;
  612. continue;
  613. } else if (isdigit(ccode[2])) {
  614. sprintf(p, "%sstate_%d",
  615. ll, strtol(ccode + 2, &ccode, 10) - 1);
  616. p += strlen(p);
  617. continue;
  618. }
  619. } else if (ccode[1] == '>') {
  620. if (ccode[2] == '>') {
  621. sprintf(p, "(*%sout)", ll);
  622. p += strlen(p);
  623. ccode += 3;
  624. continue;
  625. } else if (isdigit(ccode[2])) {
  626. sprintf(p, "%sstate_%d", ll, strtol(ccode + 2, &ccode, 10));
  627. p += strlen(p);
  628. continue;
  629. }
  630. } else if (isdigit(ccode[1])) {
  631. sprintf(p, "%satt_%d", ll, strtol(ccode + 1, &ccode, 10));
  632. p += strlen(p);
  633. continue;
  634. } else {
  635. ccode++;
  636. sprintf(p, "%sarg_", ll);
  637. p += strlen(p);
  638. }
  639. } else if (*ccode == '@') {
  640. if (ccode[1] == '@') {
  641. sprintf(p, "%sstate_0.pos", ll);
  642. p += strlen(p);
  643. ccode += 2;
  644. continue;
  645. } else if (isdigit(ccode[1])) {
  646. sprintf(p, "%sstate_%d.pos", ll, strtol(ccode + 1, &ccode, 10));
  647. p += strlen(p);
  648. continue;
  649. }
  650. }
  651. *p++ = *ccode++;
  652. }
  653. *p = 0;
  654. return strdup(buffer);
  655. }
  656. /* create start of include file */
  657. void
  658. create_inc()
  659. {
  660. int i, termnr;
  661. if (usetypes) {
  662. incput("typedef %sSTYPE %sSTYPE;\n", USETYPES, LL);
  663. incput("typedef %sTERM %sTERM;\n", USETYPES, LL);
  664. } else {
  665. incput("typedef union %sSTYPE{\n", LL);
  666. for (i = 0; i < ntags; i++)
  667. incput("\t%s _%s;\n", tags[i], tags[i]);
  668. incput("} %sSTYPE;\n", LL);
  669. incput("typedef struct %sTERM {\n", LL);
  670. incput("\tint token;\n");
  671. incput("\t%sSTYPE lval;\n", LL);
  672. incput("\t%sPOS pos;\n", LL);
  673. incput("} %sTERM;\n", LL);
  674. }
  675. incput("void %sscanner(%sTERM **tokens, unsigned *ntokens);\n", ll, LL);
  676. incput("int %sparser(%sTERM *tokens, unsigned ntokens, %s%sSTATE *%sin, %sSTATE *llout);\n",
  677. ll, LL, conststr, LL, ll, LL);
  678. incput("void %sprinterror(FILE *f);\n", ll);
  679. incput("void %sverror(FILE *f, %sPOS *pos, char *fmt, va_list args);\n",
  680. ll, LL);
  681. incput("void %serror(FILE *f, %sPOS *pos, char *fmt, ...);\n", ll, LL);
  682. incput("int %sgettoken(int *token, %sSTYPE *lval, %sPOS *pos);\n",
  683. ll, LL, LL);
  684. incput("#if %sDEBUG > 0\n", LL);
  685. incput("void %sdebug_init();\n", ll);
  686. incput("#endif\n");
  687. if (!usetypes) {
  688. termnr = 257;
  689. for (i = 0; i < nsymbols; i++) {
  690. if (!symbols[i].isnonterm && *symbols[i].identifier != '\'')
  691. incput("#define %s%s %d\n", prefix,
  692. symbols[i].altidentifier ? symbols[i].altidentifier :
  693. symbols[i].identifier, termnr++);
  694. }
  695. }
  696. }
  697. /* create start of c file */
  698. void
  699. create_vardefs()
  700. {
  701. output("#include <stdio.h>\n");
  702. output("#include <stdlib.h>\n");
  703. output("#include <stdarg.h>\n");
  704. output("#include <limits.h>\n");
  705. output("#include \"%s\"\n\n", incfilename);
  706. output("int %scpos;\n", ll);
  707. output("int *%sstk;\n", ll);
  708. output("unsigned %sstksize;\n", ll);
  709. output("unsigned %scstp = 1;\n", ll);
  710. output("%sTERM *%stokens;\n", LL, ll);
  711. output("int %sntokens;\n", ll);
  712. output("char %serrormsg[256];\n", ll);
  713. output("%sPOS %serrorpos;\n", LL, ll);
  714. output("int %sepos;\n", ll);
  715. output("%sSTYPE %slval;\n", LL, ll);
  716. output("\n");
  717. output("int %sterm(int token, %sSTYPE *lval, %s%sSTATE *%sin, %sSTATE *%sout);\n", ll, LL, conststr, LL, ll, LL, ll);
  718. output("void %sfailed(%sPOS *pos, char *fmt, ...);\n", ll, LL);
  719. output("void %sresizestk();\n", ll);
  720. output("#define %sCHECKSTK do{if (%scstp + 1 >= %sstksize) %sresizestk();}while(/*CONSTCOND*/0)\n", LL, ll, ll, ll);
  721. output("#define %sFAILED(_err) do{%sfailed _err; goto failed;}while(/*CONSTCOND*/0)\n", LL, ll);
  722. output("#define %sCUTOFF do{unsigned i; for (i = %sstp; i < %scstp; i++) if (%sstk[i] > 0) %sstk[i] = -%sstk[i];}while(/*CONSTCOND*/0)\n", LL, ll, ll, ll, ll, ll);
  723. output("#define %sCUTTHIS do{if (%sstk[%sstp] > 0) %sstk[%sstp] = -%sstk[%sstp];}while(/*CONSTCOND*/0)\n", LL, ll, ll, ll, ll, ll, ll);
  724. output("#define %sCUTALL do{unsigned i; for (i = 0; i < %scstp; i++) if (%sstk[i] > 0) %sstk[i] = -%sstk[i];}while(/*CONSTCOND*/0)\n", LL, ll, ll, ll, ll);
  725. output("\n");
  726. output("#if %sDEBUG > 0\n", LL);
  727. output("int %sdebug;\n", ll);
  728. output("int last_linenr;\n");
  729. output("char *last_file = \"\";\n");
  730. output("#define %sDEBUG_ENTER(_ident) %sdebug_enter(_ident)\n", LL, ll);
  731. output("#define %sDEBUG_LEAVE(_ident,_succ) %sdebug_leave(_ident,_succ)\n", LL, ll);
  732. output("#define %sDEBUG_ALTERNATIVE(_ident,_alt) %sdebug_alternative(_ident,_alt)\n", LL, ll);
  733. output("#define %sDEBUG_ITERATION(_ident,_num) %sdebug_iteration(_ident,_num)\n", LL, ll);
  734. output("#define %sDEBUG_TOKEN(_exp,_pos) %sdebug_token(_exp,_pos)\n", LL, ll);
  735. output("#define %sDEBUG_ANYTOKEN(_pos) %sdebug_anytoken(_pos)\n", LL, ll);
  736. output("#define %sDEBUG_BACKTRACKING(_ident) %sdebug_backtracking(_ident)\n", LL, ll);
  737. output("void %sdebug_init();\n", ll);
  738. output("void %sdebug_enter(char *ident);\n", ll);
  739. output("void %sdebug_leave(char *ident, int succ);\n", ll);
  740. output("void %sdebug_alternative(char *ident, int alt);\n", ll);
  741. output("void %sdebug_token(int expected, unsigned pos);\n", ll);
  742. output("void %sdebug_anytoken(unsigned pos);\n", ll);
  743. output("void %sdebug_backtracking(char *ident);\n", ll);
  744. output("void %sprinttoken(%sTERM *token, char *identifier, FILE *f);\n", ll, LL);
  745. output("#else\n");
  746. output("#define %sDEBUG_ENTER(_ident)\n", LL);
  747. output("#define %sDEBUG_LEAVE(_ident,_succ)\n", LL);
  748. output("#define %sDEBUG_ALTERNATIVE(_ident,_alt)\n", LL);
  749. output("#define %sDEBUG_ITERATION(_ident,_num)\n", LL);
  750. output("#define %sDEBUG_TOKEN(_exp,_pos)\n", LL);
  751. output("#define %sDEBUG_ANYTOKEN(_pos)\n", LL);
  752. output("#define %sDEBUG_BACKTRACKING(_ident)\n", LL);
  753. output("#endif\n");
  754. output("\n");
  755. }
  756. /* create end of c file */
  757. void
  758. create_trailer()
  759. {
  760. int i, j;
  761. char *p, *q;
  762. char buffer[256];
  763. if (startsym) {
  764. output("int\n");
  765. output("%sparser(%sTERM *tokens, unsigned ntokens, %s%sSTATE *%sin, %sSTATE *%sout)\n", ll, LL, conststr, LL, ll, LL, ll);
  766. output("{\n");
  767. output("unsigned i;\n");
  768. output("%sDEBUG_ENTER(\"%sparser\");\n", LL, ll);
  769. output("%stokens = tokens; %sntokens = ntokens;\n", ll, ll);
  770. output("for (i = 0; i < %sstksize; i++) %sstk[i] = 1;\n", ll, ll);
  771. output("%scstp = 1; %scpos = 0; %sepos = 0; *%serrormsg = 0;\n",
  772. ll, ll, ll, ll);
  773. output("#if %sDEBUG > 0\n", LL);
  774. output("last_linenr = 0; last_file = \"\";\n");
  775. output("#endif\n");
  776. output("{unsigned %spos1 = %scpos, %sstp1 = %scstp;\n", ll, ll, ll, ll);
  777. output("%sCHECKSTK;\n", LL);
  778. output("for (;;) {\n");
  779. output("switch (%sstk[%scstp++]) {\n", ll, ll);
  780. output("case 1: case -1:\n");
  781. output("if (!%s_%s(%sin, %sout)) goto failed2;\n",
  782. ll, startsym, ll, ll);
  783. output("if (%scpos != %sntokens) goto failed2;\n", ll, ll);
  784. output("break;\n");
  785. output("default:\n");
  786. output("%sstk[--%scstp] = 1;\n", ll, ll);
  787. output("goto failed1;\n");
  788. output("failed2:\n");
  789. output("%sDEBUG_BACKTRACKING(\"%sparser\");\n", LL, ll);
  790. output("if (%sstk[--%scstp] < 0) %sstk[%scstp] = 0; else %sstk[%scstp]++;\n", ll, ll, ll, ll, ll, ll);
  791. output("%scpos = %spos1; %scstp = %sstp1;\n", ll, ll, ll, ll);
  792. output("continue;\n");
  793. output("} break;\n");
  794. output("}}\n");
  795. output("%sDEBUG_LEAVE(\"%sparser\", 1);\n", LL, ll);
  796. output("return 1;\n");
  797. output("failed1:\n");
  798. output("%sDEBUG_LEAVE(\"%sparser\", 0);\n", LL, ll);
  799. output("return 0;\n");
  800. output("}\n");
  801. output("\n");
  802. }
  803. output("int\n");
  804. output("%sterm(int token, %sSTYPE *lval, %s%sSTATE *%sin, %sSTATE *%sout)\n", ll, LL, conststr, LL, ll, LL, ll);
  805. output("{\n");
  806. output("#if %sDEBUG > 0\n", LL);
  807. output("\tif (%sdebug > 0 && (%stokens[%scpos].pos.line > last_linenr || strcmp(%stokens[%scpos].pos.file, last_file))) {\n", ll, ll, ll, ll, ll);
  808. output("\tfprintf(stderr, \"File \\\"%%s\\\", Line %%5d \\r\",\n");
  809. output("\t\t%stokens[%scpos].pos.file, %stokens[%scpos].pos.line);\n", ll, ll, ll, ll);
  810. output("\tlast_linenr = %stokens[%scpos].pos.line / 10 * 10 + 9;\n", ll, ll);
  811. output("\tlast_file = %stokens[%scpos].pos.file;\n", ll, ll);
  812. output("\t}\n");
  813. output("#endif\n");
  814. output("\tif (%sstk[%scstp] != 1 && %sstk[%scstp] != -1) {\n", ll, ll, ll, ll);
  815. output("\t\t%sDEBUG_BACKTRACKING(\"%sterm\");\n", LL, ll);
  816. output("\t\t%sstk[%scstp] = 1;\n", ll, ll);
  817. output("\t\treturn 0;\n");
  818. output("\t}\n");
  819. output("\t%sDEBUG_TOKEN(token, %scpos);\n", LL, ll);
  820. output("\tif (%scpos < %sntokens && %stokens[%scpos].token == token) {\n", ll, ll, ll, ll);
  821. output("\t\tif (lval)\n");
  822. output("\t\t\t*lval = %stokens[%scpos].lval;\n", ll, ll);
  823. output("\t\t*%sout = *%sin;\n", ll, ll);
  824. output("\t\t%sout->pos = %stokens[%scpos].pos;\n", ll, ll, ll);
  825. output("\t\t%scpos++;\n", ll);
  826. output("\t\t%sCHECKSTK;\n", LL);
  827. output("\t\t%scstp++;\n", ll);
  828. output("\t\treturn 1;\n");
  829. output("\t}\n");
  830. output("\t%sfailed(&%stokens[%scpos].pos, NULL);\n", ll, ll, ll);
  831. output("\t%sstk[%scstp] = 1;\n", ll, ll);
  832. output("\treturn 0;\n");
  833. output("}\n");
  834. output("\n");
  835. output("int\n");
  836. output("%sanyterm(%sSTYPE *lval, %s%sSTATE *%sin, %sSTATE *%sout)\n", ll, LL, conststr, LL, ll, LL, ll);
  837. output("{\n");
  838. output("#if %sDEBUG > 0\n", LL);
  839. output("\tif (%sdebug > 0 && (%stokens[%scpos].pos.line > last_linenr || strcmp(%stokens[%scpos].pos.file, last_file))) {\n", ll, ll, ll, ll, ll);
  840. output("\tfprintf(stderr, \"File \\\"%%s\\\", Line %%5d \\r\",\n");
  841. output("\t\t%stokens[%scpos].pos.file, %stokens[%scpos].pos.line);\n", ll, ll, ll, ll);
  842. output("\tlast_linenr = %stokens[%scpos].pos.line / 10 * 10 + 9;\n", ll, ll);
  843. output("\tlast_file = %stokens[%scpos].pos.file;\n", ll, ll);
  844. output("\t}\n");
  845. output("#endif\n");
  846. output("\tif (%sstk[%scstp] != 1 && %sstk[%scstp] != -1) {\n", ll, ll, ll, ll);
  847. output("\t\t%sDEBUG_BACKTRACKING(\"%sanyterm\");\n", LL, ll);
  848. output("\t\t%sstk[%scstp] = 1;\n", ll, ll);
  849. output("\t\treturn 0;\n");
  850. output("\t}\n");
  851. output("\t%sDEBUG_ANYTOKEN(%scpos);\n", LL, ll);
  852. output("\tif (%scpos < %sntokens) {\n", ll, ll);
  853. output("\t\tif (lval)\n");
  854. output("\t\t\t*lval = %stokens[%scpos].lval;\n", ll, ll);
  855. output("\t\t*%sout = *%sin;\n", ll, ll);
  856. output("\t\t%sout->pos = %stokens[%scpos].pos;\n", ll, ll, ll);
  857. output("\t\t%scpos++;\n", ll);
  858. output("\t\t%sCHECKSTK;\n", LL);
  859. output("\t\t%scstp++;\n", ll);
  860. output("\t\treturn 1;\n");
  861. output("\t}\n");
  862. output("\t%sfailed(&%stokens[%scpos].pos, NULL);\n", ll, ll, ll);
  863. output("\t%sstk[%scstp] = 1;\n", ll, ll);
  864. output("\treturn 0;\n");
  865. output("}\n");
  866. output("void\n");
  867. output("%sscanner(%sTERM **tokens, unsigned *ntokens)\n", ll, LL);
  868. output("{\n");
  869. output("\tunsigned i = 0;\n");
  870. output("#if %sDEBUG > 0\n", LL);
  871. output("\tint line = -1;\n");
  872. output("#endif\n");
  873. output("\n");
  874. output("\t*ntokens = 1024;\n");
  875. output("\t*tokens = (%sTERM *)malloc(*ntokens * sizeof(%sTERM));\n", LL, LL);
  876. output("\twhile (%sgettoken(&(*tokens)[i].token, &(*tokens)[i].lval, &(*tokens)[i].pos)) {\n", ll);
  877. output("#if %sDEBUG > 0\n", LL);
  878. output("\t\tif (%sdebug > 0 && (*tokens)[i].pos.line > line) {\n", ll);
  879. output("\t\t\tline = (*tokens)[i].pos.line / 10 * 10 + 9;\n");
  880. output("\t\t\tfprintf(stderr, \"File \\\"%%s\\\", Line %%5d \\r\",\n");
  881. output("\t\t\t\t(*tokens)[i].pos.file, (*tokens)[i].pos.line);\n");
  882. output("\t\t}\n");
  883. output("#endif\n");
  884. output("\t\tif (++i >= *ntokens) {\n");
  885. output("\t\t\t*ntokens *= 2;\n");
  886. output("\t\t\t*tokens = (%sTERM *)realloc(*tokens, *ntokens * sizeof(%sTERM));\n", LL, LL);
  887. output("\t\t}\n");
  888. output("\t}\n");
  889. output("\t(*tokens)[i].token = 0;\n");
  890. output("\t*ntokens = i;\n");
  891. output("#if %sDEBUG > 0\n", LL);
  892. output("\t%sdebug_init();\n", ll);
  893. output("#endif\n");
  894. output("\t%sresizestk();\n", ll);
  895. output("}\n");
  896. output("\n");
  897. output("void\n");
  898. output("%sfailed(%sPOS *pos, char *fmt, ...)\n", ll, LL);
  899. output("{\n");
  900. output("\tva_list args;\n");
  901. output("\n");
  902. output("\tva_start(args, fmt);\n");
  903. output("\tif (%scpos > %sepos || %scpos == %sepos && !*%serrormsg) {\n", ll, ll, ll, ll, ll);
  904. output("\t\t%sepos = %scpos;\n", ll, ll);
  905. output("\t\tif (fmt)\n");
  906. output("\t\t\tvsprintf(%serrormsg, fmt, args);\n", ll);
  907. output("\t\telse\n");
  908. output("\t\t\t*%serrormsg = 0;\n", ll);
  909. output("\t\t%serrorpos = *pos;\n", ll);
  910. output("\t}\n");
  911. output("\tva_end(args);\n");
  912. output("}\n");
  913. output("\n");
  914. output("void\n");
  915. output("%sprinterror(FILE *f)\n", ll);
  916. output("{\n");
  917. output("#if %sDEBUG > 0\n", LL);
  918. output("\tfputs(\" \\r\", stderr);\n");
  919. output("#endif\n");
  920. output("\tif (*%serrormsg)\n", ll);
  921. output("\t\t%serror(f, &%serrorpos, %serrormsg);\n", ll, ll, ll);
  922. output("\telse\n");
  923. output("\t\t%serror(f, &%serrorpos, \"Syntax error\");\n", ll, ll);
  924. output("}\n");
  925. output("\n");
  926. output("void\n");
  927. output("%serror(FILE *f, %sPOS *pos, char *fmt, ...)\n", ll, LL);
  928. output("{\n");
  929. output("\tva_list args;\n");
  930. output("\tva_start(args, fmt);\n");
  931. output("\t%sverror(f, pos, fmt, args);\n", ll);
  932. output("\tva_end(args);\n");
  933. output("}\n");
  934. output("\n");
  935. output("void\n");
  936. output("%sresizestk()\n", ll);
  937. output("{\n");
  938. output("\tunsigned i;\n");
  939. output("\n");
  940. output("\tif (%scstp + 1 >= %sstksize) {\n", ll, ll);
  941. output("\t\ti = %sstksize;\n", ll);
  942. output("\t\tif (!%sstksize)\n", ll);
  943. output("\t\t\t%sstk = (int *)malloc((%sstksize = 4096) * sizeof(int));\n", ll, ll);
  944. output("\t\telse\n");
  945. output("\t\t\t%sstk = (int *)realloc(%sstk, (%sstksize *= 2) * sizeof(int));\n", ll, ll, ll);
  946. output("\t\tfor (; i < %sstksize; i++)\n", ll);
  947. output("\t\t\t%sstk[i] = 1;\n", ll);
  948. output("\t}\n");
  949. output("}\n");
  950. output("\n");
  951. output("#if %sDEBUG > 0\n", LL);
  952. output("int %sdepth;\n", ll);
  953. output("char *%stokentab[] = {\n", ll);
  954. for (i = 0; i < 257; i++) {
  955. if (i == 0)
  956. output("\"EOF\"");
  957. else if (i == '\\' || i == '\"')
  958. output(",\"'\\%c'\"", i);
  959. else if (i >= 32 && i < 127)
  960. output(",\"'%c'\"", i);
  961. else if (i < 257)
  962. output(",\"#%d\"", i);
  963. if ((i % 8) == 7)
  964. output("\n");
  965. }
  966. for (j = 0; j < nsymbols; j++) {
  967. if (!symbols[j].isnonterm && *symbols[j].identifier != '\'') {
  968. for (p = symbols[j].identifier, q = buffer; *p; p++) {
  969. if (*p == '\"' || *p == '\\')
  970. *q++ = '\\';
  971. *q++ = *p;
  972. }
  973. *q = 0;
  974. output(",\"%s\"", buffer);
  975. if ((i++ % 8) == 7)
  976. output("\n");
  977. }
  978. }
  979. if (i % 8)
  980. output("\n");
  981. output("};\n");
  982. output("\n");
  983. output("void\n");
  984. output("%sdebug_init()\n", ll);
  985. output("{\n");
  986. output("\tchar *p;\n");
  987. output("\tp = getenv(\"%sDEBUG\");\n", LL);
  988. output("\tif (p)\n");
  989. output("\t\t%sdebug = atoi(p);\n", ll);
  990. output("}\n");
  991. output("\n");
  992. output("void\n");
  993. output("%sdebug_enter(char *ident)\n", ll);
  994. output("{\n");
  995. output("\tint i;\n");
  996. output("\n");
  997. output("\tif (%sdebug < 2)\n", ll);
  998. output("\t\treturn;\n");
  999. output("\tfor (i = 0; i < %sdepth; i++)\n", ll);
  1000. output("\t\tfputs(\"| \", stdout);\n");
  1001. output("\tprintf(\"/--- trying rule %%s\\n\", ident);\n");
  1002. output("\t%sdepth++;\n", ll);
  1003. output("}\n");
  1004. output("\n");
  1005. output("void\n");
  1006. output("%sdebug_leave(char *ident, int succ)\n", ll);
  1007. output("{\n");
  1008. output("\tint i;\n");
  1009. output("\n");
  1010. output("\tif (%sdebug < 2)\n", ll);
  1011. output("\t\treturn;\n");
  1012. output("\t%sdepth--;\n", ll);
  1013. output("\tfor (i = 0; i < %sdepth; i++)\n", ll);
  1014. output("\t\tfputs(\"| \", stdout);\n");
  1015. output("\tif (succ)\n");
  1016. output("\t\tprintf(\"\\\\--- succeeded to apply rule %%s\\n\", ident);\n");
  1017. output("\telse\n");
  1018. output("\t\tprintf(\"\\\\--- failed to apply rule %%s\\n\", ident);\n");
  1019. output("}\n");
  1020. output("\n");
  1021. output("void\n");
  1022. output("%sdebug_alternative(char *ident, int alt)\n", ll);
  1023. output("{\n");
  1024. output("\tint i;\n");
  1025. output("\n");
  1026. output("\tif (%sdebug < 2)\n", ll);
  1027. output("\t\treturn;\n");
  1028. output("\tfor (i = 0; i < %sdepth - 1; i++)\n", ll);
  1029. output("\t\tfputs(\"| \", stdout);\n");
  1030. output("\tprintf(\">--- trying alternative %%d for rule %%s\\n\", alt, ident);\n");
  1031. output("}\n");
  1032. output("\n");
  1033. output("%sdebug_iteration(char *ident, int num)\n", ll);
  1034. output("{\n");
  1035. output("\tint i;\n");
  1036. output("\n");
  1037. output("\tif (%sdebug < 2)\n", ll);
  1038. output("\t\treturn;\n");
  1039. output("\tfor (i = 0; i < %sdepth - 1; i++)\n", ll);
  1040. output("\t\tfputs(\"| \", stdout);\n");
  1041. output("\tprintf(\">--- trying iteration %%d for rule %%s\\n\", num, ident);\n");
  1042. output("}\n");
  1043. output("\n");
  1044. output("void\n");
  1045. output("%sdebug_token(int expected, unsigned pos)\n", ll);
  1046. output("{\n");
  1047. output("\tint i;\n");
  1048. output("\n");
  1049. output("\tif (%sdebug < 2)\n", ll);
  1050. output("\t\treturn;\n");
  1051. output("\tfor (i = 0; i < %sdepth; i++)\n", ll);
  1052. output("\t\tfputs(\"| \", stdout);\n");
  1053. output("\tif (pos < %sntokens && expected == %stokens[pos].token)\n", ll, ll);
  1054. output("\t\tprintf(\" found token \");\n");
  1055. output("\telse\n");
  1056. output("\t\tprintf(\" expected token %%s, found token \", %stokentab[expected]);\n", ll);
  1057. output("\tif (pos >= %sntokens)\n", ll);
  1058. output("\t\tprintf(\"<EOF>\");\n");
  1059. output("\telse\n");
  1060. output("\t\t%sprinttoken(%stokens + pos, %stokentab[%stokens[pos].token], stdout);\n", ll, ll, ll, ll);
  1061. output("\tputchar('\\n');\n");
  1062. output("}\n");
  1063. output("\n");
  1064. output("void\n");
  1065. output("%sdebug_anytoken(unsigned pos)\n", ll);
  1066. output("{\n");
  1067. output("\tint i;\n");
  1068. output("\n");
  1069. output("\tif (%sdebug < 2)\n", ll);
  1070. output("\t\treturn;\n");
  1071. output("\tfor (i = 0; i < %sdepth; i++)\n", ll);
  1072. output("\t\tfputs(\"| \", stdout);\n");
  1073. output("\tprintf(\" found token \");\n");
  1074. output("\tif (pos >= %sntokens)\n", ll);
  1075. output("\t\tprintf(\"<EOF>\");\n");
  1076. output("\telse\n");
  1077. output("\t\t%sprinttoken(%stokens + pos, %stokentab[%stokens[pos].token], stdout);\n", ll, ll, ll, ll);
  1078. output("\tputchar('\\n');\n");
  1079. output("}\n");
  1080. output("\n");
  1081. output("void\n");
  1082. output("%sdebug_backtracking(char *ident)\n", ll);
  1083. output("{\n");
  1084. output("\tint i;\n");
  1085. output("\n");
  1086. output("\tif (%sdebug < 2)\n", ll);
  1087. output("\t\treturn;\n");
  1088. output("\tfor (i = 0; i < %sdepth; i++)\n", ll);
  1089. output("\t\tfputs(\"| \", stdout);\n");
  1090. output("\tprintf(\" backtracking rule %%s\\n\", ident);\n");
  1091. output("}\n");
  1092. output("\n");
  1093. output("#endif\n");
  1094. }
  1095. /* search for left recursion and complain about if they are unexpected */
  1096. void
  1097. search_leftrecursion()
  1098. {
  1099. int i;
  1100. item_t **item;
  1101. int done;
  1102. int empty;
  1103. /* check for missing rules */
  1104. for (i = 0; i < nsymbols; i++) {
  1105. if (symbols[i].isnonterm && !symbols[i].isexternal && !symbols[i].items)
  1106. error(NULL, "missing rule for symbol %s\n", symbols[i].identifier);
  1107. }
  1108. /* mark rules that may be empty */
  1109. do {
  1110. done = 1;
  1111. for (i = 0; i < nsymbols; i++) {
  1112. if (symbols[i].empty || !symbols[i].isnonterm ||
  1113. symbols[i].isexternal)
  1114. continue;
  1115. item = symbols[i].items;
  1116. do {
  1117. empty = 1;
  1118. for (; *item != (item_t *)1; item++) {
  1119. if (!(*item)->empty)
  1120. empty = 0;
  1121. }
  1122. item++;
  1123. if (empty) {
  1124. symbols[i].empty = 1;
  1125. done = 0;
  1126. }
  1127. } while (*item);
  1128. }
  1129. } while (!done);
  1130. /* check every rule for left recursion */
  1131. for (i = 0; i < nsymbols; i++)
  1132. symbols[i].checked = 0;
  1133. for (i = 0; i < nsymbols; i++) {
  1134. if (!symbols[i].checked)
  1135. check_lr(symbols + i);
  1136. }
  1137. if (found_lr > expected_lr) {
  1138. fprintf(stderr, "Found %d left recursions, exiting\n", found_lr);
  1139. exit(1);
  1140. }
  1141. }
  1142. /* check one rule for left recursion */
  1143. void
  1144. check_lr(item_t *symbol)
  1145. {
  1146. int i;
  1147. item_t **item;
  1148. int try_flag;
  1149. if (!symbol->isnonterm || symbol->isexternal)
  1150. return;
  1151. for (i = 0; i < ncheck; i++) {
  1152. if (check[i] == symbol) {
  1153. if (++found_lr > expected_lr) {
  1154. fprintf(stderr, "Error: found left recursion: ");
  1155. for (; i < ncheck; i++)
  1156. fprintf(stderr, "%s->", check[i]->identifier);
  1157. fprintf(stderr, "%s\n", symbol->identifier);
  1158. }
  1159. return;
  1160. }
  1161. }
  1162. check[ncheck++] = symbol;
  1163. item = symbol->items;
  1164. do {
  1165. try_flag = 1;
  1166. for (; *item != (item_t *)1; item++) {
  1167. if (try_flag)
  1168. check_lr(*item);
  1169. try_flag = try_flag && (*item)->empty;
  1170. }
  1171. item++;
  1172. } while (*item);
  1173. ncheck--;
  1174. }
  1175. /* main program */
  1176. int
  1177. __cdecl main(int argc, char **argv)
  1178. {
  1179. extern int optind;
  1180. int c;
  1181. LLSTATE in, out;
  1182. LLTERM *tokens;
  1183. unsigned ntokens;
  1184. char *p;
  1185. /* parse option args */
  1186. while ((c = getopt(argc, argv, "i:Olt:c")) != EOF) {
  1187. switch (c) {
  1188. case 'i':
  1189. expected_lr = atoi(optarg);
  1190. break;
  1191. case 'l':
  1192. linedirective = 0;
  1193. break;
  1194. case 'O':
  1195. optimizer = 1;
  1196. break;
  1197. case 't':
  1198. usetypes = strdup(optarg);
  1199. for (p = usetypes; *p; p++)
  1200. *p = (char)tolower(*p);
  1201. USETYPES = strdup(optarg);
  1202. for (p = USETYPES; *p; p++)
  1203. *p = (char)toupper(*p);
  1204. break;
  1205. case 'c':
  1206. constargs = 1;
  1207. conststr = "const ";
  1208. break;
  1209. default:
  1210. usage:
  1211. fprintf(stderr, "Usage: %s [-i #ignore_lr] [-l] [-c] [-t from_prefix] filename.ll\n", argv[0]);
  1212. exit(1);
  1213. }
  1214. }
  1215. if (argc != optind + 1)
  1216. goto usage;
  1217. /* open input file and output files */
  1218. open_file(argv[optind]);
  1219. strcpy(outfilename, argv[optind]);
  1220. if (strlen(outfilename) > 3 &&
  1221. !strcmp(outfilename + strlen(outfilename) - 3, ".ll"))
  1222. outfilename[strlen(outfilename) - 3] = 0;
  1223. strcat(outfilename, ".c");
  1224. fout = fopen(outfilename, "w");
  1225. if (!fout) {
  1226. perror(outfilename);
  1227. exit(1);
  1228. }
  1229. fprintf(fout, "/* Copyright (C) Boris Nikolaus, Germany, 1996-1997. All rights reserved. */\n\n");
  1230. strcpy(incfilename, argv[optind]);
  1231. if (strlen(incfilename) > 3 &&
  1232. !strcmp(incfilename + strlen(incfilename) - 3, ".ll"))
  1233. incfilename[strlen(incfilename) - 3] = 0;
  1234. strcat(incfilename, ".h");
  1235. finc = fopen(incfilename, "w");
  1236. if (!finc) {
  1237. perror(incfilename);
  1238. exit(1);
  1239. }
  1240. fprintf(finc, "/* Copyright (C) Boris Nikolaus, Germany, 1996-1997. All rights reserved. */\n\n");
  1241. /* scan and parse the parser description */
  1242. llscanner(&tokens, &ntokens);
  1243. if (!llparser(tokens, ntokens, &in, &out))
  1244. llprinterror(stderr);
  1245. /* check for left recursions */
  1246. search_leftrecursion();
  1247. /* optimize */
  1248. if (optimizer)
  1249. /*create_firstsets()*/ ;
  1250. /* create end of c file and header file */
  1251. create_trailer();
  1252. create_inc();
  1253. /* finished! */
  1254. fclose(fout);
  1255. fclose(finc);
  1256. return 0;
  1257. }
  1258. /* why is this function not in MS libc? */
  1259. #ifndef HAS_GETOPT
  1260. char *optarg;
  1261. int optind = 1;
  1262. static int optpos = 1;
  1263. int getopt(int argc, char **argv, const char *options) {
  1264. char *p, *q;
  1265. optarg = NULL;
  1266. /* find start of next option */
  1267. do {
  1268. if (optind >= argc)
  1269. return EOF;
  1270. if (*argv[optind] != '-' && *argv[optind] != '/')
  1271. return EOF;
  1272. p = argv[optind] + optpos++;
  1273. if (!*p) {
  1274. optind++;
  1275. optpos = 1;
  1276. }
  1277. } while (!*p);
  1278. /* find option in option string */
  1279. q = strchr(options, *p);
  1280. if (!q)
  1281. return '?';
  1282. /* set optarg for parameterized option and adjust optind and optpos for next call */
  1283. if (q[1] == ':') {
  1284. if (p[1]) {
  1285. optarg = p + 1;
  1286. optind++;
  1287. optpos = 1;
  1288. } else if (++optind < argc) {
  1289. optarg = argv[optind];
  1290. optind++;
  1291. optpos = 1;
  1292. } else {
  1293. return '?';
  1294. }
  1295. }
  1296. /* return found option */
  1297. return *p;
  1298. }
  1299. #endif