windows-server-2003/sdktools/rcdll/mktable.c


								/*****************************************************************************

								 * mktable - table-building program to ease table maintenance problems

								 *

								 * DESCRIPTION

								 *  Several parts of the FORTRAN compiler need large tables.

								 *  For example, the lexer contains tables of keywords and multicharacter

								 *  tokens; the intrinsic-function handler contains a table of all the

								 *  FORTRAN intrinsic functions.

								 *  Maintaining these tables can be aggravating, since they are typically

								 *  large and involve lots of drudge work (like changing many sequentially-

								 *  numbered macro definitions) to modify.

								 *

								 *  `mktable' can be used to build tables automatically as part of the

								 *  usual compiler building process.  Its usages and semantics are as

								 *  follows.

								 *

								 *  `mktable' takes a "table" file on its standard input.  Each line of

								 *  the table file has one of the following forms:

								 *

								 *      # commentary information

								 *      "key-string" [index-macro-name [arbitrary-stuff]]

								 *      <blank line>

								 *

								 *  The key string and arbitrary-stuff form the contents of a single

								 *  table record.  The index-macro-name is #define'd to be the index

								 *  of the given record in the table.  If the index-macro-name is absent or

								 *  is an empty string ("") then no macro definition is produced for the

								 *  record.

								 *

								 *  `mktable' produces its output on four files:

								 *      mktable.keys: the key string

								 *      mktable.defs: #define <index_macro_name> <index to mktable.keys>

								 *      mktable.indx: contains the initialization part of a definition

								 *          for an index array for key-letter indexed tables,

								 *          or the initialization part of a collision-resolution

								 *          table for linear-list hashed tables.

								 *          (not generated for sorted or _open-addressed tables.)

								 *      mktable.info: contains arbitrary-stuff

								 *

								 *  For example, if the table to be defined were named "symtab" and the

								 *  table being constructed was of the "sorted" type (suitable for binary

								 *  search),

								 *

								 *      # contents of symtab:

								 *      "alpha" ST_ALPHA    2, 4, MONADIC

								 *      "gamma" ST_GAMMA    2, 3, MONADIC

								 *      "delta" ST_DELTA    2, 1, DYADIC

								 *      "epsilon"

								 *

								 *  then `mktable' produces the following in mktable.keys:

								 *

								 *      "alpha","delta","epsilon","gamma"

								 *

								 *  and the following in mktable.defs:

								 *

								 *      #define ST_ALPHA 0

								 *      #define ST_DELTA 1

								 *      #define ST_GAMMA 2

								 *

								 *  and in mktable.info :

								 *

								 *      {2, 4, MONADIC}, {2, 1, DYADIC}, {0}, {2, 3, MONADIC}

								 *

								 *  The files might be included in a C source program in the

								 *  following way:

								 *

								 *      #include "mktable.defs"

								 *      ...

								 *      char    *symname[] = {

								 *      #   include "mktable.keys"

								 *          };

								 *      struct syminfo

								 *          {

								 *          int size;

								 *          int cycles;

								 *          int arity;

								 *          };

								 *      struct syminfo symtab[] = {

								 *      #   include "mktable.info"

								 *          };

								 *

								 *  The `mktable' command itself is used in one of the following ways:

								 *

								 *  mktable "open" size <tablefile

								 *      This form creates an _open-addressed hash table, keyed on

								 *      the string fields at the beginning of each record in the

								 *      table file.  The hash function used is the absolute value

								 *      of the sum of all the characters in a key, modulo the table

								 *      size.  The collision resolution function is simply one plus

								 *      the last hash, modulo the table size.

								 *      Since some of the entries in the hash table may be empty,

								 *      and `mktable' has no way of knowing how to fill them,

								 *      one of the records supplied by the user will be replicated

								 *      in the empty entries with its key value set to NULL.

								 *      "table.c" will be created with the hash table itself, and

								 *      "table.h" will be created with index-macro definitions that

								 *      may be used to index directly into the table in "table.c".

								 *

								 *  mktable "hashed" size <tablefile

								 *      This form creates a hash table keyed on the string fields

								 *      at the beginning of each table file record.  The hash function

								 *      is the absolute value of the sum of all the characters in a

								 *      key, modulo the table size.  Collision resolution is handled

								 *      with linear chaining, as follows:  If two keys hash to the

								 *      same table location, the first one will be placed in the table,

								 *      and the corresponding entry of the collision resolution vector

								 *      will contain the (integer) index of the next table slot to be

								 *      checked for the hash synonym.  When the collision resolution

								 *      vector entry is -1, the end of the chain has been reached.

								 *      Note that since all entries are stored in the main table, the

								 *      `size' must be at least as large as the number of entries.

								 *      As with _open addressing, some slots in the table may be

								 *      padded with a replicated entry (key value set to NULL).

								 *      "table.c" receives the hash table.  "table.h" receives the

								 *      index-macro definitions that will index into the table in

								 *      "table.c".  "tabindex.c" receives the conflict resolution

								 *      vector.

								 *

								 *  mktable "sorted" <tablefile

								 *      This form creates a table sorted in ascending order, keyed

								 *      on the string fields at the beginning of each record in the

								 *      table file.  Comparisons are ordered according to the ASCII

								 *      values of the characters being compared.

								 *      "table.c" will be created with the sorted table itself, and

								 *      "table.h" will be created with index-macro definitions that

								 *      may be used to index directly into the table in "table.c".

								 *

								 *  mktable "key-letter" <tablefile

								 *      This form creates a key-letter-indexed table.

								 *      The string fields serve as the

								 *      key letter.  An auxiliary table indexed from 'A' to 'Z'+1

								 *      gives the starting index of all the entries whose keys begin

								 *      with each letter (the last entry duplicates the entry for 'Z').

								 *      "table.c" will contain the sorted table.  "tabindex.c" will

								 *      contain the auxiliary index table information.  "table.h" will

								 *      contain the index-macro definitions that may be used to index

								 *      directly into the "table.c" table.

								 *      Note that key-letter tables are sorted in a peculiar way;

								 *      in ascending order by first letter of the key, but descending

								 *      order by the remainder of the key.  This is required by

								 *      FORTRAN, to insure that longer keywords are matched before

								 *      shorter keywords that are initial substrings of the longer

								 *      keywords.

								 *      Also note that the key strings themselves are missing the first

								 *      char, since by indexing into the table, we are always assured

								 *      of having matched the first char.

								 *

								 * AUTHOR

								 *      February, 1984      Allen Akin

								 *

								 * MODIFICATIONS

								 *  March 8, 1984       Allen Akin

								 *      Added linear-list resolved hashing.

								 *****************************************************************************/


								#include <stdio.h>

								#include <ctype.h>

								#include <stdlib.h>

								#include <string.h>


								#define MAXRECORDS  300     /* maximum-size table we can handle */

								#define MAXLINE     82      /* maximum line length (incl "\n\0") */


								#define HASHED      0       /* flag used by table loader */

								#define LINEAR      1       /* ditto */

								#define OPENADDR    2       /* ditto */


								#define KEYFILE         "mktable.key"   /* name of table output file */

								#define DEFFILE         "mktable.def"   /* name of index defs output file */

								#define INDEXFILE       "mktable.ind"   /* name of table index output file */

								#define INFOFILE        "mktable.inf"   /* gots the infos in it */


								typedef struct rec {

								    char *key;      /* key-string field */

								    char *id;       /* index macro identifier */

								    char *other;    /* other stuff in the record - output untouched */

								    struct rec *link;   /* pointer to next record in hash synonyms list */

								} Rec_t;


								int Upper = 0;


								FILE *Fkeys, *Findex, *Fdefs, *Finfo;


								/************************************************************************/

								/* Function Prototypes                          */

								/************************************************************************/

								void main (int argc, char **argv);

								void usage (void);

								void error(char * message);

								void open_addr(int size);

								void hash_linear(int size);

								void sorted(void);

								void key_letter(void);

								int load(Rec_t *record, int method, int size);

								void startoutput(void);

								void endoutput(void);

								void outrec(Rec_t *rec);

								void outdef(char *name, int value);

								void outinx(int value);

								void sortrec(Rec_t **rptr, int size);

								int hash(register char *name);


								/************************************************************************/

								/* Program code                             */

								/************************************************************************/

								void  __cdecl

								main (

								    int argc,

								    char **argv

								    )

								{

								    if (argc <= 1)

								        usage();


								    if(strcmp(argv[1], "-U") == 0) {

								        Upper = 1;

								        argv++;

								        argc--;

								    }


								    if (strcmp(argv[1], "open") == 0) {

								        if (argc != 3)

								            usage();

								        open_addr(atoi(argv[2]));

								    } else if (strcmp(argv[1], "hashed") == 0) {

								        if (argc != 3)

								            usage();

								        hash_linear(atoi(argv[2]));

								    } else if (strcmp(argv[1], "sorted") == 0) {

								        if (argc != 2)

								            usage();

								        sorted();

								    } else if (strcmp(argv[1], "key-letter") == 0) {

								        if (argc != 2)

								            usage();

								        key_letter();

								    } else

								        usage();

								    exit(0);

								}


								void

								usage (

								    void

								    )

								{

								    error("usage: mktable (open SIZE | hashed SIZE | sorted | key-letter) <table-master");

								}


								void

								error(

								    char * message

								    )

								{

								    fprintf(stderr, "%s\n", message);

								    exit(1);

								}


								void

								open_addr(

								    int size

								    )

								{

								    register Rec_t *record;     /* points to array storing all records */

								    Rec_t defrec;               /* "default" record for empty array slot */

								    register int i;


								    if (size <= 0)

								        error("hash table size specified is less than zero");


								    if ((record = (Rec_t *)calloc(size, sizeof(Rec_t))) == NULL)

								        error("insufficient memory for hash table");


								    for (i = 0; i < size; ++i)

								        record[i].key = NULL;


								    if (load(record, OPENADDR, size) == 0)

								        error("couldn't find any input records");


								    defrec.key = NULL;

								    defrec.id = NULL;

								    for (i = 0; i < size; ++i)

								    if (record[i].key != NULL)

								        break;

								    defrec.other = record[i].other;


								    startoutput();


								    for (i = 0; i < size; ++i) {

								        if (record[i].key == NULL) {

								            outrec(&defrec);

								        } else {

								            outrec(&record[i]);

								            outdef(record[i].id, i);

								        }

								    }


								    endoutput();

								    _unlink(INDEXFILE);

								}


								void

								hash_linear(

								    int size

								    )

								{

								    register Rec_t *record,     /* stores some records, all buckets */

								                    *rp;

								    Rec_t defrec;               /* default record for empty hash table slots */

								    register int i,

								                 nextslot,      /* next empty slot in main hash table */

								                 prev;


								    if (size <= 0)

								        error("hash table size specified is less than zero");


								    if ((record = (Rec_t *)calloc(size, sizeof(Rec_t))) == NULL)

								        error("insufficient memory for hash table");


								    for (i = 0; i < size; ++i) {

								        record[i].key = NULL;

								        record[i].link = NULL;

								    }


								    if ((i = load(record, HASHED, size)) == 0)

								        error("couldn't find any input records");


								    if (i > size)

								        error("too many records to hold in table");


								    defrec.key = NULL;

								    defrec.id = NULL;

								    for (i = 0; i < size; ++i) {

								        if (record[i].key != NULL)

								            break;

								    }

								    defrec.other = record[i].other;

								    defrec.link = NULL;

								    /*

								     * The `load' routine has built a hash table `record'.

								     * Each entry in `record' is either empty (key == NULL) or contains a record.

								     * Each record may have a NULL link field, or a link field that points to

								     * a hash synonym.

								     * With this section of code, we rearrange the linked lists of hash synonyms

								     * so that all the entries are stored in `record'.

								     */

								    nextslot = 0;

								    for (i = 0; i < size; ++i) {

								        if ((record[i].key != NULL) &&

								            (record[i].link != NULL) &&

								            ((record[i].link < record) || (record[i].link >= (record + size))))

								        {

								            for (prev = i, rp = record[i].link; rp != NULL; rp = rp->link) {

								                while (record[nextslot].key != NULL)

								                    ++nextslot;

								                record[prev].link = &record[nextslot];

								                record[nextslot] = *rp;

								                prev = nextslot;

								            }

								        }

								    }


								    startoutput();


								    for (i = 0; i < size; ++i) {

								        if (record[i].key == NULL) {

								            outrec(&defrec);

								            outinx(-1);

								        } else {

								            outrec(&record[i]);

								            if (record[i].link == NULL)

								                outinx(-1);

								            else

								                outinx(record[i].link - record);    /* cvt. to inx in table */

								            outdef(record[i].id, i);

								        }

								    }


								    endoutput();

								}


								void

								sorted(

								    void

								    )

								{

								    Rec_t  record[MAXRECORDS],

								          *rptr[MAXRECORDS];

								    register int i, size;


								    size = load(record, LINEAR, MAXRECORDS);


								    for (i = 0; i < size; ++i)

								        rptr[i] = &record[i];


								    sortrec(rptr, size);


								    startoutput();


								    for (i = 0; i < size; ++i) {

								        outrec(rptr[i]);

								        outdef(rptr[i]->id, i);

								    }


								    endoutput();

								    _unlink(INDEXFILE);

								}


								void

								key_letter(

								    void

								    )

								{

								    Rec_t  record[MAXRECORDS],

								          *rptr[MAXRECORDS],

								          *temp;

								    register int i, size, j, k, l;


								    register char lastletter;


								    size = load(record, LINEAR, MAXRECORDS);


								    for (i = 0; i < size; ++i)

								        rptr[i] = &record[i];


								    sortrec(rptr, size);


								    for (i = 0; i < size; i = j) {

								        for (j = i; j < size; ++j) {

								            if (rptr[i]->key[0] != rptr[j]->key[0])

								                break;

								        }


								        l = j - 1;


								        for (k = i; k < l; ++k, --l) {

								            temp = rptr[k];

								            rptr[k] = rptr[l];

								            rptr[l] = temp;

								        }

								    }


								    startoutput();


								    lastletter = (char)((Upper ? 'A' : '_') - 1);

								    for (i = 0; i < size; ++i)

								    {

								        while (rptr[i]->key[0] > lastletter) {

								            outinx(i);

								            ++lastletter;

								        }

								        outrec(rptr[i]);

								        outdef(rptr[i]->id, i);

								    }


								    for (; lastletter < (char)((Upper ? 'Z' : 'z') + 1); ++lastletter)

								        outinx(size);


								    endoutput();

								}


								int

								load(

								    Rec_t *record,

								    int method,

								    int size

								    )

								{

								    char *line;

								    register char *p;

								    int rec, h, chainlen, maxchainlen = 0, collisions = 0;

								    Rec_t r;


								    for (rec = 0; ; ++rec)

								    {

								        if ((line = malloc(MAXLINE)) == NULL)

								            error("insufficient memory to load records");


								        if (fgets(line, MAXLINE, stdin) == NULL)

								            break;


								        if (rec >= size)

								            error("too many records to handle");


								        r.key = r.id = r.other = NULL;

								        r.link = NULL;


								        for (p = line; *p && isspace(*p); ++p)

								            ;

								        if (*p != '"') {

								            free(line);

								            --rec;

								            continue;

								        }

								        r.key = ++p;

								        for (; *p != '"'; ++p) {

								            if(Upper && (islower(*p)))

								                *p = (char)toupper(*p);

								        }


								        *p++ = '\0';


								        for (; *p && isspace(*p); ++p)          /* skip space key and id */

								            ;

								        if (*p == '"' && *(p + 1) == '"') {     /* no id */

								            r.id = NULL;

								            p += 2;

								        } else if (*p) {

								            r.id = p++;                         /* id start */

								            for (; *p && ( ! isspace(*p)); ++p) /* til first space */

								                ;

								            if(*p) {

								                *p++ = '\0';                    /* terminate id */

								            }

								        }


								        for (; *p && isspace(*p); ++p)      /* skip space til other info */

								            ;

								        if(*p) {

								            r.other = p++;

								            for (; *p != '\n' && *p != '\0'; ++p)

								                ;

								            *p = '\0';

								        }


								        if (method == LINEAR) {

								            record[rec] = r;

								        } else if (method == OPENADDR) {

								            chainlen = 0;

								            for(h = hash(r.key) % size; record[h].key; h = (h+1) % size) {

								                ++chainlen;

								                ++collisions;

								            }

								            maxchainlen = (chainlen < maxchainlen)? maxchainlen: chainlen;

								            record[h] = r;

								        } else { /* method == HASHED */

								            Rec_t  *rp;


								            h = hash(r.key) % size;

								            if (record[h].key == NULL) {

								                record[h] = r;

								            } else {

								                if ((rp = (Rec_t *)malloc(sizeof(Rec_t))) == NULL)

								                    error("insufficient memory to store all records");

								                *rp = record[h];

								                r.link = rp;

								                record[h] = r;

								                ++collisions;

								                chainlen = 1;

								                for (rp = &record[h]; rp->link != NULL; rp = rp->link)

								                    ++chainlen;

								                maxchainlen = (chainlen < maxchainlen)? maxchainlen: chainlen;

								            }

								        }

								    }


								    if (method == HASHED || method == OPENADDR)

								        fprintf(stderr, "%d collisions, max chain length %d\n", collisions, maxchainlen);


								    return rec;

								}


								void

								startoutput(

								    void

								    )

								{

								    if ((Fkeys = fopen(KEYFILE, "w")) == NULL)

								        error("can't open keys output file");


								    if ((Findex = fopen(INDEXFILE, "w")) == NULL)

								        error("can't open index output file");


								    if ((Fdefs = fopen(DEFFILE, "w")) == NULL)

								        error("can't open definitions output file");


								    if ((Finfo = fopen(INFOFILE, "w")) == NULL)

								        error("can't open info output file");

								}


								void

								endoutput(

								    void

								    )

								{

								    fclose(Fkeys);

								    fclose(Findex);

								    fclose(Fdefs);

								    fclose(Finfo);

								}


								void outrec(Rec_t *rec)

								{

								    if (rec->key == NULL)

								        fprintf(Fkeys, "NULL,\n");

								    else

								        fprintf(Fkeys, "\"%s\",\n", ((rec->key) + 1));


								    if (rec->other == NULL)

								        fprintf(Finfo, "{0},\n");

								    else

								        fprintf(Finfo, "{%s},\n", rec->other);

								}


								void

								outdef(

								    char *name,

								    int value

								    )

								{

								    if (name != NULL)

								        fprintf(Fdefs, "#define %s %d\n", name, value);

								}


								void

								outinx(

								    int value

								    )

								{

								    fprintf(Findex, "%d,\n", value);

								}

								/*

								 * Following code defines the hash function used in `mktable' and in

								 * the compiler.  Since we must guarantee they are the same function,

								 * we use a single source file.

								 *

								 * `mktable' does not use the standard include file that the compiler

								 * uses, so we define the allowable register declarations here.

								 */

								#define REG1 register

								#define REG2 register

								#define REG3 register


								void

								sortrec(

								    Rec_t **rptr,

								    int size

								    )

								{

								    register int j, i, gap;

								    Rec_t  *temp;


								    for (gap = size / 2; gap > 0; gap /= 2) {

								        for (i = gap; i < size; ++i) {

								            for (j = i - gap; j >= 0; j -= gap) {

								                if (strcmp(rptr[j]->key, rptr[j + gap]->key) <= 0)

								                    break;

								                temp = rptr[j];

								                rptr[j] = rptr[j + gap];

								                rptr[j + gap] = temp;

								            }

								        }

								    }

								}


								int

								hash(

								    register char *name

								    )

								{

								    register    int i;


								    i = 0;

								    while(*name) {

								        i += *name++ ;

								    }

								    return(i) ;

								}