windows-server-2003/sdktools/rcdll/p0gettok.c

/***********************************************************************
* Microsoft (R) Windows (R) Resource Compiler
*
* Copyright (c) Microsoft Corporation.  All rights reserved.
*
* File Comments:
*
*
***********************************************************************/

#include "rc.h"

/************************************************************************
**      MAP_TOKEN : a token has two representations and additional information.
**      (ex : const, has basic token of L_CONST,
**                              mapped token of [L_TYPE | L_MODIFIER]
**                              and info based on what the map token is)
**      MAP_AND_FILL : has two representations, but none of the extra info.
**      (ex : '<', has basic of L_LT, and map of L_RELOP)
**  NOMAP_TOKEN : has 1 representation and additional info.
**      (ex: a string, basic and 'map' type L_STRING and ptrs to the actual str)
**      NOMAP_AND_FILL : has 1 representation and no additional info.
**      (ex : 'while', has basic and 'map' of L_WHILE)
**  the FILL versions fill the token with the basic token type.
************************************************************************/
#define MAP_TOKEN(otok)\
        (Basic_token = (otok), TS_VALUE(Basic_token))
#define MAP_AND_FILL(otok)\
        (yylval.yy_token = Basic_token = (otok), TS_VALUE(Basic_token))
#define NOMAP_TOKEN(otok)\
        (Basic_token = (otok))
#define NOMAP_AND_FILL(otok)\
        (yylval.yy_token = Basic_token = (otok))


/************************************************************************/
/* yylex - main tokenization routine                                    */
/************************************************************************/

token_t
yylex(
    void
    )
{
    REG WCHAR           last_mapped;
    WCHAR               mapped_c;
    WCHAR               buf[5];
    REG token_t         lex_token;

    for(;;) {
        last_mapped = mapped_c = CHARMAP(GETCH());
first_switch:
        switch(mapped_c) {
            case LX_EACH:
            case LX_ASCII:
                if (fAFXSymbols && PREVCH() == SYMUSESTART || PREVCH() == SYMDEFSTART
                    || PREVCH() == SYMDELIMIT) {
                    myfwrite(&(PREVCH()), sizeof(WCHAR), 1, OUTPUTFILE);
                    continue;
                }
                error(2018, PREVCH());
                continue;

            case LX_OBRACE:
                return(NOMAP_AND_FILL(L_LCURLY));

            case LX_CBRACE:
                return(NOMAP_AND_FILL(L_RCURLY));

            case LX_OBRACK:
                return(NOMAP_AND_FILL(L_LBRACK));

            case LX_CBRACK:
                return(NOMAP_AND_FILL(L_RBRACK));

            case LX_OPAREN:
                return(NOMAP_AND_FILL(L_LPAREN));

            case LX_CPAREN:
                return(NOMAP_AND_FILL(L_RPAREN));

            case LX_COMMA:
                return(NOMAP_AND_FILL(L_COMMA));

            case LX_QUEST:
                return(NOMAP_AND_FILL(L_QUEST));

            case LX_SEMI:
                return(NOMAP_AND_FILL(L_SEMI));

            case LX_TILDE:
                return(NOMAP_AND_FILL(L_TILDE));

            case LX_NUMBER:
                return(MAP_TOKEN(getnum(PREVCH())));

            case LX_MINUS:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_EQ:
                        return(MAP_AND_FILL(L_MINUSEQ));

                    case LX_GT:
                        return(MAP_AND_FILL(L_POINTSTO));

                    case LX_MINUS:
                        return(MAP_AND_FILL(L_DECR));

                    default:
                        lex_token = L_MINUS;
                        break;
                }
                break;

            case LX_PLUS:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_EQ:
                        return(MAP_AND_FILL(L_PLUSEQ));

                    case LX_PLUS:
                        return(MAP_AND_FILL(L_INCR));

                    default:
                        lex_token = L_PLUS;
                        break;
                }
                break;

            case LX_AND:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_EQ:
                        return(MAP_AND_FILL(L_ANDEQ));

                    case LX_AND:
                        return(MAP_AND_FILL(L_ANDAND));

                    default:
                        lex_token = L_AND;
                        break;
                }
                break;

            case LX_OR:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_EQ:
                        return(MAP_AND_FILL(L_OREQ));

                    case LX_OR:
                        return(MAP_AND_FILL(L_OROR));

                    default:
                        lex_token = L_OR;
                        break;
                }
                break;

            case LX_COLON:
                return(NOMAP_AND_FILL(L_COLON));

            case LX_HAT:
                if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                    return(MAP_AND_FILL(L_XOREQ));
                }
                lex_token = L_XOR;
                break;

            case LX_PERCENT:
                if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                    return(MAP_AND_FILL(L_MODEQ));
                }
                lex_token = L_MOD;
                break;

            case LX_EQ:
                if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                    return(MAP_AND_FILL(L_EQUALS));
                }
                lex_token = L_ASSIGN;
                break;

            case LX_BANG:
                if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                    return(MAP_AND_FILL(L_NOTEQ));
                }
                lex_token = L_EXCLAIM;
                break;

            case LX_SLASH:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_STAR:
                        dump_comment();
                        continue;

                    case LX_SLASH:
                        DumpSlashComment();
                        continue;

                    case LX_EQ:
                        return(MAP_AND_FILL(L_DIVEQ));

                    default:
                        lex_token = L_DIV;
                        break;
                }
                break;

            case LX_STAR:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_SLASH:
                        if( ! Prep ) {
                            error(2138); /* (nested comments) */
                        } else {
                            myfwrite(L"*/", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
                        }
                        continue;

                    case LX_EQ:
                        return(MAP_AND_FILL(L_MULTEQ));

                    default:
                        lex_token = L_MULT;
                        break;
                }
                break;

            case LX_LT:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_LT:
                        if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                            return(MAP_AND_FILL(L_LSHFTEQ));
                        }
                        mapped_c = LX_LSHIFT;
                        lex_token = L_LSHIFT;
                        break;

                    case LX_EQ:
                        return(MAP_AND_FILL(L_LTEQ));

                    default:
                        lex_token = L_LT;
                        break;
                }
                break;

            case LX_LSHIFT:
                /*
                **  if the next char is not an =, then we unget and return,
                **  since the only way in here is if we broke on the char
                **  following '<<'. since we'll have already worked the handle_eos()
                **  code prior to getting here, we'll not see another eos,
                **  UNLESS i/o buffering is char by char. ???
                **  see also, LX_RSHIFT
                */
                if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                    return(MAP_AND_FILL(L_LSHFTEQ));
                }
                UNGETCH();
                return(MAP_AND_FILL(L_LSHIFT));

            case LX_GT:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_EQ:
                        return(MAP_AND_FILL(L_GTEQ));

                    case LX_GT:
                        if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                            return(MAP_AND_FILL(L_RSHFTEQ));
                        }
                        mapped_c = LX_RSHIFT;
                        lex_token = L_RSHIFT;
                        break;

                    default:
                        lex_token = L_GT;
                        break;
                }
                break;

            case LX_RSHIFT:
                if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                    return(MAP_AND_FILL(L_RSHFTEQ));
                }
                UNGETCH();
                return(MAP_AND_FILL(L_RSHIFT));

            case LX_POUND:
                if( ! Prep ) {
                    error(2014);/* # sign must be first non-whitespace */
                    UNGETCH();              /* replace it */
                    Linenumber--;   /* do_newline counts a newline */
                    do_newline();   /* may be a 'real' prepro line */
                } else {
                    myfwrite(L"#", sizeof(WCHAR), 1, OUTPUTFILE);
                }
                continue;

            case LX_EOS:
                if(PREVCH() == L'\\') {
                    if( ! Prep ) {
                        if( ! checknl()) {  /* ignore the new line */
                            error(2017);/* illegal escape sequence */
                        }
                    } else {
                        myfwrite(L"\\", sizeof(WCHAR), 1, OUTPUTFILE);
                        *buf = get_non_eof();
                        myfwrite(buf, sizeof(WCHAR), 1, OUTPUTFILE);
                    }
                    continue;
                }

                if(Macro_depth == 0) {
                    if( ! io_eob()) {       /* not the end of the buffer */
                        continue;
                    }
                    if(fpop()) {            /* have more files to read */
                        continue;
                    }
                    return(MAP_AND_FILL(L_EOF));    /* all gone . . . */
                }
                handle_eos();                       /* found end of macro */
                continue;

            case LX_DQUOTE:
                if( ! Prep ) {
                    str_const();
                    return(NOMAP_TOKEN(L_STRING));
                }
                prep_string(L'\"');
                continue;

            case LX_SQUOTE:
                if( ! Prep ) {
                    return(MAP_TOKEN(char_const()));
                }
                prep_string(L'\'');
                continue;

            case LX_CR:             /*  ??? check for nl next  */
                continue;

            case LX_NL:
                if(On_pound_line) {
                    UNGETCH();
                    return(NOMAP_TOKEN(L_NOTOKEN));
                }
                if(Prep) {
                    // must manually write '\r' with '\n' when writing 16-bit strings
                    myfwrite(L"\r\n", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
                }
                do_newline();
                continue;

            case LX_WHITE:          /* skip all white space */
                if( ! Prep ) {      /* check only once */
                    do {
                        ;
                    } while(LXC_IS_WHITE(GETCH()));
                }
                else {
                    WCHAR   c;

                    c = PREVCH();
                    do {
                        myfwrite(&c, sizeof(WCHAR), 1, OUTPUTFILE);
                    } while(LXC_IS_WHITE(c = GETCH()));
                }
                UNGETCH();
                continue;

            case LX_ILL:
                if( ! Prep ) {
                    error(2018, PREVCH());/* unknown character */
                } else {
                    myfwrite(&(PREVCH()), sizeof(WCHAR), 1, OUTPUTFILE);
                }
                continue;

            case LX_BACKSLASH:
                if( ! Prep ) {
                    if( ! checknl()) {      /* ignore the new line */
                        error(2017);/* illegal escape sequence */
                    }
                }
                else {
                    myfwrite(L"\\", sizeof(WCHAR), 1, OUTPUTFILE);
                    *buf = get_non_eof();
                    myfwrite(buf, sizeof(WCHAR), 1, OUTPUTFILE);
                }
                continue;

            case LX_DOT:
dot_switch:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_BACKSLASH:
                        if(checknl()) {
                            goto dot_switch;
                        }
                        UNGETCH();
                        break;

                    case LX_EOS:
                        if(handle_eos() == BACKSLASH_EOS) {
                            break;
                        }
                        goto dot_switch;

                    case LX_DOT:
                        if( ! checkop(L'.') ) {
                            error(2142);/* ellipsis requires three '.'s */
                        }
                        return(NOMAP_AND_FILL(L_ELLIPSIS));

                    case LX_NUMBER:
                        /*
                        **      don't worry about getting correct hash value.
                        **      The text equivalent of a real number is never
                        **      hashed
                        */
                        Reuse_W[0] = L'.';
                        Reuse_W[1] = PREVCH();
                        return(MAP_TOKEN(get_real(&Reuse_W[2])));
                }
                UNGETCH();
                return(MAP_AND_FILL(L_PERIOD));

            case LX_NOEXPAND:
                SKIPCH();                   /* just skip length */
                continue;

            case LX_ID:
                {
                    pdefn_t pdef;

                    if(Macro_depth > 0) {
                        if( ! lex_getid(PREVCH())) {
                            goto avoid_expand;
                        }
                    }
                    else {
                        getid(PREVCH());
                    }

                    if( ((pdef = get_defined()) != 0)
                        &&
                        ( ! DEFN_EXPANDING(pdef))
                        &&
                        ( can_expand(pdef))
                        ) {
                        continue;
                    }

avoid_expand:
                    if( ! Prep ) {
                        /* M00BUG get near copy of identifier???? */
                        HLN_NAME(yylval.yy_ident) = Reuse_W;
                        HLN_HASH(yylval.yy_ident) = Reuse_W_hash;
                        HLN_LENGTH(yylval.yy_ident) = (UINT)Reuse_W_length;
                        return(L_IDENT);
                    } else {
                        myfwrite(Reuse_W, (Reuse_W_length - 1) * sizeof(WCHAR), 1, OUTPUTFILE);
                        return(NOMAP_TOKEN(L_NOTOKEN));
                    }
                }
                continue;
        }
        /*
        **  all the multichar ( -> -- -= etc ) operands
        **  must come through here. we've gotten the next char,
        **  and not matched one of the possiblities, but we have to check
        **  for the end of the buffer character and act accordingly
        **  if it is the eob, then we handle it and go back for another try.
        **  otherwise, we unget the char we got, and return the base token.
        */
        if(last_mapped == LX_EOS) {
            if(handle_eos() != BACKSLASH_EOS) {
                goto first_switch;
            }
        }
        UNGETCH();      /* cause we got an extra one to check */
        return(MAP_AND_FILL(lex_token));
    }
}


/************************************************************************
**
**      lex_getid: reads an identifier for the main lexer.  The
**              identifier is read into Reuse_W. This function should not handle
**              an end of string if it is rescanning a macro expansion, because
**              this could switch the context with regards to whether the macro
**            is expandable or not.  Similarly, the noexpand marker must only be
**           allowed if a macro is being rescanned, otherwise let this character
**              be caught as an illegal character in text
************************************************************************/
int
lex_getid(
    WCHAR c
    )
{
    REG WCHAR   *p;
    int         length = 0;

    p = Reuse_W;
    *p++ = c;
    c &= HASH_MASK;
    for(;;) {
        while(LXC_IS_IDENT(*p = GETCH())) { /* collect character */
            c += (*p & HASH_MASK);                      /* hash it */
            p++;
        }

        if(CHARMAP(*p) == LX_NOEXPAND ) {
            length = (int)GETCH();
            continue;
        }

        UNGETCH();
        break;                          /* out of for loop  -  only way out */
    }

    if(p >= LIMIT(Reuse_W)) {   /* is this error # correct? */
        fatal(1067);
    }

    if(((p - Reuse_W) > LIMIT_ID_LENGTH) && ( ! Prep )) {
        p = Reuse_W + LIMIT_ID_LENGTH;
        *p = L'\0';
        c = local_c_hash(Reuse_W);
        warning(4011, Reuse_W);  /* id truncated */
    } else {
        *p = L'\0';              /* terminates identifier for expandable check */
    }

    Reuse_W_hash = (hash_t)c;
    Reuse_W_length = (UINT)((p - Reuse_W) + 1);

    return(length != (p - Reuse_W));
}