Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

670 lines
19 KiB

  1. /*****************************************************************************
  2. * mktable - table-building program to ease table maintenance problems
  3. *
  4. * DESCRIPTION
  5. * Several parts of the FORTRAN compiler need large tables.
  6. * For example, the lexer contains tables of keywords and multicharacter
  7. * tokens; the intrinsic-function handler contains a table of all the
  8. * FORTRAN intrinsic functions.
  9. * Maintaining these tables can be aggravating, since they are typically
  10. * large and involve lots of drudge work (like changing many sequentially-
  11. * numbered macro definitions) to modify.
  12. *
  13. * `mktable' can be used to build tables automatically as part of the
  14. * usual compiler building process. Its usages and semantics are as
  15. * follows.
  16. *
  17. * `mktable' takes a "table" file on its standard input. Each line of
  18. * the table file has one of the following forms:
  19. *
  20. * # commentary information
  21. * "key-string" [index-macro-name [arbitrary-stuff]]
  22. * <blank line>
  23. *
  24. * The key string and arbitrary-stuff form the contents of a single
  25. * table record. The index-macro-name is #define'd to be the index
  26. * of the given record in the table. If the index-macro-name is absent or
  27. * is an empty string ("") then no macro definition is produced for the
  28. * record.
  29. *
  30. * `mktable' produces its output on four files:
  31. * mktable.keys: the key string
  32. * mktable.defs: #define <index_macro_name> <index to mktable.keys>
  33. * mktable.indx: contains the initialization part of a definition
  34. * for an index array for key-letter indexed tables,
  35. * or the initialization part of a collision-resolution
  36. * table for linear-list hashed tables.
  37. * (not generated for sorted or _open-addressed tables.)
  38. * mktable.info: contains arbitrary-stuff
  39. *
  40. * For example, if the table to be defined were named "symtab" and the
  41. * table being constructed was of the "sorted" type (suitable for binary
  42. * search),
  43. *
  44. * # contents of symtab:
  45. * "alpha" ST_ALPHA 2, 4, MONADIC
  46. * "gamma" ST_GAMMA 2, 3, MONADIC
  47. * "delta" ST_DELTA 2, 1, DYADIC
  48. * "epsilon"
  49. *
  50. * then `mktable' produces the following in mktable.keys:
  51. *
  52. * "alpha","delta","epsilon","gamma"
  53. *
  54. * and the following in mktable.defs:
  55. *
  56. * #define ST_ALPHA 0
  57. * #define ST_DELTA 1
  58. * #define ST_GAMMA 2
  59. *
  60. * and in mktable.info :
  61. *
  62. * {2, 4, MONADIC}, {2, 1, DYADIC}, {0}, {2, 3, MONADIC}
  63. *
  64. * The files might be included in a C source program in the
  65. * following way:
  66. *
  67. * #include "mktable.defs"
  68. * ...
  69. * char *symname[] = {
  70. * # include "mktable.keys"
  71. * };
  72. * struct syminfo
  73. * {
  74. * int size;
  75. * int cycles;
  76. * int arity;
  77. * };
  78. * struct syminfo symtab[] = {
  79. * # include "mktable.info"
  80. * };
  81. *
  82. * The `mktable' command itself is used in one of the following ways:
  83. *
  84. * mktable "open" size <tablefile
  85. * This form creates an _open-addressed hash table, keyed on
  86. * the string fields at the beginning of each record in the
  87. * table file. The hash function used is the absolute value
  88. * of the sum of all the characters in a key, modulo the table
  89. * size. The collision resolution function is simply one plus
  90. * the last hash, modulo the table size.
  91. * Since some of the entries in the hash table may be empty,
  92. * and `mktable' has no way of knowing how to fill them,
  93. * one of the records supplied by the user will be replicated
  94. * in the empty entries with its key value set to NULL.
  95. * "table.c" will be created with the hash table itself, and
  96. * "table.h" will be created with index-macro definitions that
  97. * may be used to index directly into the table in "table.c".
  98. *
  99. * mktable "hashed" size <tablefile
  100. * This form creates a hash table keyed on the string fields
  101. * at the beginning of each table file record. The hash function
  102. * is the absolute value of the sum of all the characters in a
  103. * key, modulo the table size. Collision resolution is handled
  104. * with linear chaining, as follows: If two keys hash to the
  105. * same table location, the first one will be placed in the table,
  106. * and the corresponding entry of the collision resolution vector
  107. * will contain the (integer) index of the next table slot to be
  108. * checked for the hash synonym. When the collision resolution
  109. * vector entry is -1, the end of the chain has been reached.
  110. * Note that since all entries are stored in the main table, the
  111. * `size' must be at least as large as the number of entries.
  112. * As with _open addressing, some slots in the table may be
  113. * padded with a replicated entry (key value set to NULL).
  114. * "table.c" receives the hash table. "table.h" receives the
  115. * index-macro definitions that will index into the table in
  116. * "table.c". "tabindex.c" receives the conflict resolution
  117. * vector.
  118. *
  119. * mktable "sorted" <tablefile
  120. * This form creates a table sorted in ascending order, keyed
  121. * on the string fields at the beginning of each record in the
  122. * table file. Comparisons are ordered according to the ASCII
  123. * values of the characters being compared.
  124. * "table.c" will be created with the sorted table itself, and
  125. * "table.h" will be created with index-macro definitions that
  126. * may be used to index directly into the table in "table.c".
  127. *
  128. * mktable "key-letter" <tablefile
  129. * This form creates a key-letter-indexed table.
  130. * The string fields serve as the
  131. * key letter. An auxiliary table indexed from 'A' to 'Z'+1
  132. * gives the starting index of all the entries whose keys begin
  133. * with each letter (the last entry duplicates the entry for 'Z').
  134. * "table.c" will contain the sorted table. "tabindex.c" will
  135. * contain the auxiliary index table information. "table.h" will
  136. * contain the index-macro definitions that may be used to index
  137. * directly into the "table.c" table.
  138. * Note that key-letter tables are sorted in a peculiar way;
  139. * in ascending order by first letter of the key, but descending
  140. * order by the remainder of the key. This is required by
  141. * FORTRAN, to insure that longer keywords are matched before
  142. * shorter keywords that are initial substrings of the longer
  143. * keywords.
  144. * Also note that the key strings themselves are missing the first
  145. * char, since by indexing into the table, we are always assured
  146. * of having matched the first char.
  147. *
  148. * AUTHOR
  149. * February, 1984 Allen Akin
  150. *
  151. * MODIFICATIONS
  152. * March 8, 1984 Allen Akin
  153. * Added linear-list resolved hashing.
  154. *****************************************************************************/
  155. #include <stdio.h>
  156. #include <ctype.h>
  157. #include <stdlib.h>
  158. #include <string.h>
  159. #define MAXRECORDS 300 /* maximum-size table we can handle */
  160. #define MAXLINE 82 /* maximum line length (incl "\n\0") */
  161. #define HASHED 0 /* flag used by table loader */
  162. #define LINEAR 1 /* ditto */
  163. #define OPENADDR 2 /* ditto */
  164. #define KEYFILE "mktable.key" /* name of table output file */
  165. #define DEFFILE "mktable.def" /* name of index defs output file */
  166. #define INDEXFILE "mktable.ind" /* name of table index output file */
  167. #define INFOFILE "mktable.inf" /* gots the infos in it */
  168. typedef struct rec {
  169. char *key; /* key-string field */
  170. char *id; /* index macro identifier */
  171. char *other; /* other stuff in the record - output untouched */
  172. struct rec *link; /* pointer to next record in hash synonyms list */
  173. } Rec_t;
  174. int Upper = 0;
  175. FILE *Fkeys, *Findex, *Fdefs, *Finfo;
  176. /************************************************************************/
  177. /* Function Prototypes */
  178. /************************************************************************/
  179. void main (int argc, char **argv);
  180. void usage (void);
  181. void error(char * message);
  182. void open_addr(int size);
  183. void hash_linear(int size);
  184. void sorted(void);
  185. void key_letter(void);
  186. int load(Rec_t *record, int method, int size);
  187. void startoutput(void);
  188. void endoutput(void);
  189. void outrec(Rec_t *rec);
  190. void outdef(char *name, int value);
  191. void outinx(int value);
  192. void sortrec(Rec_t **rptr, int size);
  193. int hash(register char *name);
  194. /************************************************************************/
  195. /* Program code */
  196. /************************************************************************/
  197. void __cdecl
  198. main (
  199. int argc,
  200. char **argv
  201. )
  202. {
  203. if (argc <= 1)
  204. usage();
  205. if(strcmp(argv[1], "-U") == 0) {
  206. Upper = 1;
  207. argv++;
  208. argc--;
  209. }
  210. if (strcmp(argv[1], "open") == 0) {
  211. if (argc != 3)
  212. usage();
  213. open_addr(atoi(argv[2]));
  214. } else if (strcmp(argv[1], "hashed") == 0) {
  215. if (argc != 3)
  216. usage();
  217. hash_linear(atoi(argv[2]));
  218. } else if (strcmp(argv[1], "sorted") == 0) {
  219. if (argc != 2)
  220. usage();
  221. sorted();
  222. } else if (strcmp(argv[1], "key-letter") == 0) {
  223. if (argc != 2)
  224. usage();
  225. key_letter();
  226. } else
  227. usage();
  228. exit(0);
  229. }
  230. void
  231. usage (
  232. void
  233. )
  234. {
  235. error("usage: mktable (open SIZE | hashed SIZE | sorted | key-letter) <table-master");
  236. }
  237. void
  238. error(
  239. char * message
  240. )
  241. {
  242. fprintf(stderr, "%s\n", message);
  243. exit(1);
  244. }
  245. void
  246. open_addr(
  247. int size
  248. )
  249. {
  250. register Rec_t *record; /* points to array storing all records */
  251. Rec_t defrec; /* "default" record for empty array slot */
  252. register int i;
  253. if (size <= 0)
  254. error("hash table size specified is less than zero");
  255. if ((record = (Rec_t *)calloc(size, sizeof(Rec_t))) == NULL)
  256. error("insufficient memory for hash table");
  257. for (i = 0; i < size; ++i)
  258. record[i].key = NULL;
  259. if (load(record, OPENADDR, size) == 0)
  260. error("couldn't find any input records");
  261. defrec.key = NULL;
  262. defrec.id = NULL;
  263. for (i = 0; i < size; ++i)
  264. if (record[i].key != NULL)
  265. break;
  266. defrec.other = record[i].other;
  267. startoutput();
  268. for (i = 0; i < size; ++i) {
  269. if (record[i].key == NULL) {
  270. outrec(&defrec);
  271. } else {
  272. outrec(&record[i]);
  273. outdef(record[i].id, i);
  274. }
  275. }
  276. endoutput();
  277. _unlink(INDEXFILE);
  278. }
  279. void
  280. hash_linear(
  281. int size
  282. )
  283. {
  284. register Rec_t *record, /* stores some records, all buckets */
  285. *rp;
  286. Rec_t defrec; /* default record for empty hash table slots */
  287. register int i,
  288. nextslot, /* next empty slot in main hash table */
  289. prev;
  290. if (size <= 0)
  291. error("hash table size specified is less than zero");
  292. if ((record = (Rec_t *)calloc(size, sizeof(Rec_t))) == NULL)
  293. error("insufficient memory for hash table");
  294. for (i = 0; i < size; ++i) {
  295. record[i].key = NULL;
  296. record[i].link = NULL;
  297. }
  298. if ((i = load(record, HASHED, size)) == 0)
  299. error("couldn't find any input records");
  300. if (i > size)
  301. error("too many records to hold in table");
  302. defrec.key = NULL;
  303. defrec.id = NULL;
  304. for (i = 0; i < size; ++i) {
  305. if (record[i].key != NULL)
  306. break;
  307. }
  308. defrec.other = record[i].other;
  309. defrec.link = NULL;
  310. /*
  311. * The `load' routine has built a hash table `record'.
  312. * Each entry in `record' is either empty (key == NULL) or contains a record.
  313. * Each record may have a NULL link field, or a link field that points to
  314. * a hash synonym.
  315. * With this section of code, we rearrange the linked lists of hash synonyms
  316. * so that all the entries are stored in `record'.
  317. */
  318. nextslot = 0;
  319. for (i = 0; i < size; ++i) {
  320. if ((record[i].key != NULL) &&
  321. (record[i].link != NULL) &&
  322. ((record[i].link < record) || (record[i].link >= (record + size))))
  323. {
  324. for (prev = i, rp = record[i].link; rp != NULL; rp = rp->link) {
  325. while (record[nextslot].key != NULL)
  326. ++nextslot;
  327. record[prev].link = &record[nextslot];
  328. record[nextslot] = *rp;
  329. prev = nextslot;
  330. }
  331. }
  332. }
  333. startoutput();
  334. for (i = 0; i < size; ++i) {
  335. if (record[i].key == NULL) {
  336. outrec(&defrec);
  337. outinx(-1);
  338. } else {
  339. outrec(&record[i]);
  340. if (record[i].link == NULL)
  341. outinx(-1);
  342. else
  343. outinx(record[i].link - record); /* cvt. to inx in table */
  344. outdef(record[i].id, i);
  345. }
  346. }
  347. endoutput();
  348. }
  349. void
  350. sorted(
  351. void
  352. )
  353. {
  354. Rec_t record[MAXRECORDS],
  355. *rptr[MAXRECORDS];
  356. register int i, size;
  357. size = load(record, LINEAR, MAXRECORDS);
  358. for (i = 0; i < size; ++i)
  359. rptr[i] = &record[i];
  360. sortrec(rptr, size);
  361. startoutput();
  362. for (i = 0; i < size; ++i) {
  363. outrec(rptr[i]);
  364. outdef(rptr[i]->id, i);
  365. }
  366. endoutput();
  367. _unlink(INDEXFILE);
  368. }
  369. void
  370. key_letter(
  371. void
  372. )
  373. {
  374. Rec_t record[MAXRECORDS],
  375. *rptr[MAXRECORDS],
  376. *temp;
  377. register int i, size, j, k, l;
  378. register char lastletter;
  379. size = load(record, LINEAR, MAXRECORDS);
  380. for (i = 0; i < size; ++i)
  381. rptr[i] = &record[i];
  382. sortrec(rptr, size);
  383. for (i = 0; i < size; i = j) {
  384. for (j = i; j < size; ++j) {
  385. if (rptr[i]->key[0] != rptr[j]->key[0])
  386. break;
  387. }
  388. l = j - 1;
  389. for (k = i; k < l; ++k, --l) {
  390. temp = rptr[k];
  391. rptr[k] = rptr[l];
  392. rptr[l] = temp;
  393. }
  394. }
  395. startoutput();
  396. lastletter = (char)((Upper ? 'A' : '_') - 1);
  397. for (i = 0; i < size; ++i)
  398. {
  399. while (rptr[i]->key[0] > lastletter) {
  400. outinx(i);
  401. ++lastletter;
  402. }
  403. outrec(rptr[i]);
  404. outdef(rptr[i]->id, i);
  405. }
  406. for (; lastletter < (char)((Upper ? 'Z' : 'z') + 1); ++lastletter)
  407. outinx(size);
  408. endoutput();
  409. }
  410. int
  411. load(
  412. Rec_t *record,
  413. int method,
  414. int size
  415. )
  416. {
  417. char *line;
  418. register char *p;
  419. int rec, h, chainlen, maxchainlen = 0, collisions = 0;
  420. Rec_t r;
  421. for (rec = 0; ; ++rec)
  422. {
  423. if ((line = malloc(MAXLINE)) == NULL)
  424. error("insufficient memory to load records");
  425. if (fgets(line, MAXLINE, stdin) == NULL)
  426. break;
  427. if (rec >= size)
  428. error("too many records to handle");
  429. r.key = r.id = r.other = NULL;
  430. r.link = NULL;
  431. for (p = line; *p && isspace(*p); ++p)
  432. ;
  433. if (*p != '"') {
  434. free(line);
  435. --rec;
  436. continue;
  437. }
  438. r.key = ++p;
  439. for (; *p != '"'; ++p) {
  440. if(Upper && (islower(*p)))
  441. *p = (char)toupper(*p);
  442. }
  443. *p++ = '\0';
  444. for (; *p && isspace(*p); ++p) /* skip space key and id */
  445. ;
  446. if (*p == '"' && *(p + 1) == '"') { /* no id */
  447. r.id = NULL;
  448. p += 2;
  449. } else if (*p) {
  450. r.id = p++; /* id start */
  451. for (; *p && ( ! isspace(*p)); ++p) /* til first space */
  452. ;
  453. if(*p) {
  454. *p++ = '\0'; /* terminate id */
  455. }
  456. }
  457. for (; *p && isspace(*p); ++p) /* skip space til other info */
  458. ;
  459. if(*p) {
  460. r.other = p++;
  461. for (; *p != '\n' && *p != '\0'; ++p)
  462. ;
  463. *p = '\0';
  464. }
  465. if (method == LINEAR) {
  466. record[rec] = r;
  467. } else if (method == OPENADDR) {
  468. chainlen = 0;
  469. for(h = hash(r.key) % size; record[h].key; h = (h+1) % size) {
  470. ++chainlen;
  471. ++collisions;
  472. }
  473. maxchainlen = (chainlen < maxchainlen)? maxchainlen: chainlen;
  474. record[h] = r;
  475. } else { /* method == HASHED */
  476. Rec_t *rp;
  477. h = hash(r.key) % size;
  478. if (record[h].key == NULL) {
  479. record[h] = r;
  480. } else {
  481. if ((rp = (Rec_t *)malloc(sizeof(Rec_t))) == NULL)
  482. error("insufficient memory to store all records");
  483. *rp = record[h];
  484. r.link = rp;
  485. record[h] = r;
  486. ++collisions;
  487. chainlen = 1;
  488. for (rp = &record[h]; rp->link != NULL; rp = rp->link)
  489. ++chainlen;
  490. maxchainlen = (chainlen < maxchainlen)? maxchainlen: chainlen;
  491. }
  492. }
  493. }
  494. if (method == HASHED || method == OPENADDR)
  495. fprintf(stderr, "%d collisions, max chain length %d\n", collisions, maxchainlen);
  496. return rec;
  497. }
  498. void
  499. startoutput(
  500. void
  501. )
  502. {
  503. if ((Fkeys = fopen(KEYFILE, "w")) == NULL)
  504. error("can't open keys output file");
  505. if ((Findex = fopen(INDEXFILE, "w")) == NULL)
  506. error("can't open index output file");
  507. if ((Fdefs = fopen(DEFFILE, "w")) == NULL)
  508. error("can't open definitions output file");
  509. if ((Finfo = fopen(INFOFILE, "w")) == NULL)
  510. error("can't open info output file");
  511. }
  512. void
  513. endoutput(
  514. void
  515. )
  516. {
  517. fclose(Fkeys);
  518. fclose(Findex);
  519. fclose(Fdefs);
  520. fclose(Finfo);
  521. }
  522. void outrec(Rec_t *rec)
  523. {
  524. if (rec->key == NULL)
  525. fprintf(Fkeys, "NULL,\n");
  526. else
  527. fprintf(Fkeys, "\"%s\",\n", ((rec->key) + 1));
  528. if (rec->other == NULL)
  529. fprintf(Finfo, "{0},\n");
  530. else
  531. fprintf(Finfo, "{%s},\n", rec->other);
  532. }
  533. void
  534. outdef(
  535. char *name,
  536. int value
  537. )
  538. {
  539. if (name != NULL)
  540. fprintf(Fdefs, "#define %s %d\n", name, value);
  541. }
  542. void
  543. outinx(
  544. int value
  545. )
  546. {
  547. fprintf(Findex, "%d,\n", value);
  548. }
  549. /*
  550. * Following code defines the hash function used in `mktable' and in
  551. * the compiler. Since we must guarantee they are the same function,
  552. * we use a single source file.
  553. *
  554. * `mktable' does not use the standard include file that the compiler
  555. * uses, so we define the allowable register declarations here.
  556. */
  557. #define REG1 register
  558. #define REG2 register
  559. #define REG3 register
  560. void
  561. sortrec(
  562. Rec_t **rptr,
  563. int size
  564. )
  565. {
  566. register int j, i, gap;
  567. Rec_t *temp;
  568. for (gap = size / 2; gap > 0; gap /= 2) {
  569. for (i = gap; i < size; ++i) {
  570. for (j = i - gap; j >= 0; j -= gap) {
  571. if (strcmp(rptr[j]->key, rptr[j + gap]->key) <= 0)
  572. break;
  573. temp = rptr[j];
  574. rptr[j] = rptr[j + gap];
  575. rptr[j + gap] = temp;
  576. }
  577. }
  578. }
  579. }
  580. int
  581. hash(
  582. register char *name
  583. )
  584. {
  585. register int i;
  586. i = 0;
  587. while(*name) {
  588. i += *name++ ;
  589. }
  590. return(i) ;
  591. }