windows-server-2003/ds/security/authz/test/adl/adllexer.cpp


								/*++


								Copyright (c) 2000  Microsoft Corporation


								Module Name:


								   adllexer.cpp


								Abstract:


								   Implementation of the lexer for the ADL language


								Author:


								    t-eugenz - August 2000


								Environment:


								    User mode only.


								Revision History:


								    Created - August 2000


								--*/


								#include "adl.h"


								//

								// Constant values outside WCHAR range, for special characters

								//


								#define CHAR_COMMA      65538

								#define CHAR_QUOTE      65539

								#define CHAR_SEMICOLON  65540

								#define CHAR_OPENPAREN  65541

								#define CHAR_CLOSEPAREN 65542

								#define CHAR_NULL       65543

								#define CHAR_NEWLINE    65544

								#define CHAR_RETURN     65545

								#define CHAR_TAB        65546

								#define CHAR_SPACE      65547

								#define CHAR_AT         65548

								#define CHAR_SLASH      65549

								#define CHAR_PERIOD     65550


								//

								// States of the lexer DFA

								//


								#define STATE_WHITESPACE    0

								#define STATE_BEGIN         1

								#define STATE_IDENT         2

								#define STATE_QUOTE         3

								#define STATE_DONE          4


								//

								// If the character is found in the special character map, use the special

								// symbol (>65535), otherwise use the regular character value

								//


								#define RESOLVE_CHAR(CHAR, MAP, ITER, ITEREND) \

								   ((((ITER) = (MAP).find((CHAR)) ) == (ITEREND) ) ? (CHAR) : (*(ITER)).second)


								AdlLexer::AdlLexer(IN       const WCHAR *input,

								                   IN OUT   AdlStatement *adlStat,

								                   IN       const PADL_LANGUAGE_SPEC pLang)

								/*++


								Routine Description:


								    Constructor for the AdlLexer. Initializes the mapping for finding special

								    characters, and other initial state information


								Arguments:


								    input   -   The input string


								    adlStat -   The AdlStatement instance, for token garbage collection


								    pLang   -   The ADL language description


								Return Value:


								    none


								--*/


								{


								    _input = input;

								    _pLang = pLang;

								    _adlStat = adlStat;


								    _position = 0;

								    _tokCount = 0;


								    //

								    // Special character mapping

								    //


								    _mapCharCode[_pLang->CH_NULL] = CHAR_NULL;

								    _mapCharCode[_pLang->CH_SPACE] = CHAR_SPACE;

								    _mapCharCode[_pLang->CH_TAB] = CHAR_TAB;

								    _mapCharCode[_pLang->CH_NEWLINE] = CHAR_NEWLINE;

								    _mapCharCode[_pLang->CH_RETURN] = CHAR_RETURN;

								    _mapCharCode[_pLang->CH_QUOTE] = CHAR_QUOTE;

								    _mapCharCode[_pLang->CH_COMMA] = CHAR_COMMA;

								    _mapCharCode[_pLang->CH_SEMICOLON] = CHAR_SEMICOLON;

								    _mapCharCode[_pLang->CH_OPENPAREN] = CHAR_OPENPAREN;

								    _mapCharCode[_pLang->CH_CLOSEPAREN] = CHAR_CLOSEPAREN;

								    _mapCharCode[_pLang->CH_AT] = CHAR_AT;

								    _mapCharCode[_pLang->CH_SLASH] = CHAR_SLASH;

								    _mapCharCode[_pLang->CH_PERIOD] = CHAR_PERIOD;


								    //

								    // Only find end of map once

								    //


								    _iterEnd = _mapCharCode.end();


								    //

								    // Place all special tokens into a map, for O(log n) string searches

								    //


								    _mapStringToken[_pLang->SZ_TK_AND] = TK_AND;

								    _mapStringToken[_pLang->SZ_TK_EXCEPT] = TK_EXCEPT;

								    _mapStringToken[_pLang->SZ_TK_ON] = TK_ON;

								    _mapStringToken[_pLang->SZ_TK_ALLOWED] = TK_ALLOWED;

								    _mapStringToken[_pLang->SZ_TK_AS] = TK_AS;

								    _mapStringToken[_pLang->SZ_TK_THIS_OBJECT] = TK_THIS_OBJECT;

								    _mapStringToken[_pLang->SZ_TK_CONTAINERS] = TK_CONTAINERS;

								    _mapStringToken[_pLang->SZ_TK_OBJECTS] = TK_OBJECTS;

								    _mapStringToken[_pLang->SZ_TK_CONTAINERS_OBJECTS] = TK_CONTAINERS_OBJECTS;

								    _mapStringToken[_pLang->SZ_TK_NO_PROPAGATE] = TK_NO_PROPAGATE;


								}


								DWORD AdlLexer::NextToken(OUT AdlToken **value)

								/*++


								Routine Description:


								    This retrieves the next token from the input string. This is basically a

								    DFA which begins in the WHITESPACE state, and runs until it reaches

								    the DONE state, at which point it returns a token.


								Arguments:


								    value   -   Pointer to a new token containing the string value

								                is stored in *value


								Return Value:


								    DWORD   -   The token type, as #define'd by YACC in tokens.h


								--*/

								{


								    //

								    // Initial DFA state

								    //


								    DWORD state = STATE_WHITESPACE;


								    DWORD tokType = TK_ERROR;


								    wstring curToken;


								    DWORD dwInput;


								    DWORD dwTokStart = 0;


								    //

								    // First token should be the grammar type

								    //


								    if( _tokCount == 0 )

								    {

								        _tokCount++;

								        return _pLang->dwLanguageType;


								    }


								    dwInput = RESOLVE_CHAR(_input[_position], _mapCharCode, _iter, _iterEnd);


								    while( state != STATE_DONE )

								    {

								        switch( state )

								        {


								        case STATE_WHITESPACE:


								            switch( dwInput )

								            {


								            case CHAR_NULL:

								                tokType = 0;

								                state = STATE_DONE;

								                break;


								            case CHAR_NEWLINE:

								                _position++;

								                dwInput = RESOLVE_CHAR(_input[_position],

								                                       _mapCharCode,

								                                       _iter,

								                                       _iterEnd);


								                break;


								            case CHAR_RETURN:

								                _position++;

								                dwInput = RESOLVE_CHAR(_input[_position],

								                                       _mapCharCode,

								                                       _iter,

								                                       _iterEnd);


								                break;


								            case CHAR_SPACE:

								                _position++;

								                dwInput = RESOLVE_CHAR(_input[_position],

								                                       _mapCharCode,

								                                       _iter,

								                                       _iterEnd);

								                break;


								            case CHAR_TAB:

								                _position++;

								                dwInput = RESOLVE_CHAR(_input[_position],

								                                       _mapCharCode,

								                                       _iter,

								                                       _iterEnd);

								                break;


								            default:

								                state = STATE_BEGIN;

								                break;

								            }


								            break;


								        case STATE_BEGIN:


								            dwTokStart = _position;


								            tokType = TK_ERROR;


								            switch( dwInput )

								            {

								            case CHAR_NULL:

								                state = STATE_DONE;

								                break;


								            case CHAR_COMMA:

								                if( tokType == TK_ERROR )

								                {

								                    tokType = TK_COMMA;

								                }


								            case CHAR_OPENPAREN:

								                if( tokType == TK_ERROR )

								                {

								                    tokType = TK_OPENPAREN;

								                }


								            case CHAR_CLOSEPAREN:

								                if( tokType == TK_ERROR )

								                {

								                    tokType = TK_CLOSEPAREN;

								                }


								            case CHAR_SEMICOLON:

								                if( tokType == TK_ERROR )

								                {

								                    tokType = TK_SEMICOLON;

								                }


								            case CHAR_AT:

								                if( tokType == TK_ERROR )

								                {

								                    tokType = TK_AT;

								                }


								            case CHAR_SLASH:

								                if( tokType == TK_ERROR )

								                {

								                    tokType = TK_SLASH;

								                }


								            case CHAR_PERIOD:

								                if( tokType == TK_ERROR )

								                {

								                    tokType = TK_PERIOD;

								                }


								                //

								                // Same action for all special single-char tokens

								                //

								                curToken.append( &(_input[_position]), 1 );

								                _position++;

								                dwInput = RESOLVE_CHAR(_input[_position],

								                                       _mapCharCode,

								                                       _iter,

								                                       _iterEnd);


								                state = STATE_DONE;

								                break;


								            case CHAR_QUOTE:

								                _position++;

								                dwInput = RESOLVE_CHAR(_input[_position],

								                                       _mapCharCode,

								                                       _iter,

								                                       _iterEnd);


								                state = STATE_QUOTE;

								                tokType = TK_IDENT;

								                break;


								            default:

								                state = STATE_IDENT;

								                tokType = TK_IDENT;

								                break;

								            }


								            break;


								        case STATE_IDENT:


								            switch( dwInput )

								            {

								            case CHAR_NULL:

								            case CHAR_COMMA:

								            case CHAR_OPENPAREN:

								            case CHAR_CLOSEPAREN:

								            case CHAR_SEMICOLON:

								            case CHAR_NEWLINE:

								            case CHAR_RETURN:

								            case CHAR_TAB:

								            case CHAR_SPACE:

								            case CHAR_AT:

								            case CHAR_SLASH:

								            case CHAR_PERIOD:

								            case CHAR_QUOTE:


								                state = STATE_DONE;

								                break;


								            default:

								                curToken.append( &(_input[_position]), 1 );

								                _position++;

								                dwInput = RESOLVE_CHAR(_input[_position],

								                                       _mapCharCode,

								                                       _iter,

								                                       _iterEnd);


								                break;

								            }


								            break;


								        case STATE_QUOTE:


								            switch( dwInput )

								            {

								            case CHAR_NULL:

								            case CHAR_TAB:

								            case CHAR_NEWLINE:

								            case CHAR_RETURN:

								                throw AdlStatement::ERROR_UNTERMINATED_STRING;

								                break;


								            case CHAR_QUOTE:


								                _position++;

								                dwInput = RESOLVE_CHAR(_input[_position],

								                                       _mapCharCode,

								                                       _iter,

								                                       _iterEnd);

								                state = STATE_DONE;

								                break;


								            default:

								                curToken.append( &(_input[_position]), 1 );

								                _position++;

								                dwInput = RESOLVE_CHAR(_input[_position],

								                                       _mapCharCode,

								                                       _iter,

								                                       _iterEnd);

								                break;

								            }


								            break;


								        default:


								            //

								            // Should never get here, well-defined states

								            //


								            assert(FALSE);

								            break;

								        }

								    }


								    //

								    // Done state was reached

								    // Export the string and column/row info in YACC-form here

								    //


								    AdlToken *outVal;


								    outVal = new AdlToken(curToken.c_str(), dwTokStart, _position - 1);


								    _adlStat->AddToken(outVal);


								    //

								    // Check if the string is a special token, case-insensitive

								    //


								    if( _mapStringToken.find(outVal->GetValue()) != _mapStringToken.end() )

								    {

								        tokType = _mapStringToken[outVal->GetValue()];

								    }


								    *value = outVal;


									//

									// Set this token to be the error token. This way, if the string is

									// not accepted by the parser, we know at which token the parser failed

									// If another error occurs later, this value will be overwritten

									//


									_adlStat->SetErrorToken(outVal);


								    _tokCount++;


								    return tokType;

								}