Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

451 lines
11 KiB

/*++
Copyright (c) 2000 Microsoft Corporation
Module Name:
adllexer.cpp
Abstract:
Implementation of the lexer for the ADL language
Author:
t-eugenz - August 2000
Environment:
User mode only.
Revision History:
Created - August 2000
--*/
#include "adl.h"
//
// Constant values outside WCHAR range, for special characters
//
#define CHAR_COMMA 65538
#define CHAR_QUOTE 65539
#define CHAR_SEMICOLON 65540
#define CHAR_OPENPAREN 65541
#define CHAR_CLOSEPAREN 65542
#define CHAR_NULL 65543
#define CHAR_NEWLINE 65544
#define CHAR_RETURN 65545
#define CHAR_TAB 65546
#define CHAR_SPACE 65547
#define CHAR_AT 65548
#define CHAR_SLASH 65549
#define CHAR_PERIOD 65550
//
// States of the lexer DFA
//
#define STATE_WHITESPACE 0
#define STATE_BEGIN 1
#define STATE_IDENT 2
#define STATE_QUOTE 3
#define STATE_DONE 4
//
// If the character is found in the special character map, use the special
// symbol (>65535), otherwise use the regular character value
//
#define RESOLVE_CHAR(CHAR, MAP, ITER, ITEREND) \
((((ITER) = (MAP).find((CHAR)) ) == (ITEREND) ) ? (CHAR) : (*(ITER)).second)
AdlLexer::AdlLexer(IN const WCHAR *input,
IN OUT AdlStatement *adlStat,
IN const PADL_LANGUAGE_SPEC pLang)
/*++
Routine Description:
Constructor for the AdlLexer. Initializes the mapping for finding special
characters, and other initial state information
Arguments:
input - The input string
adlStat - The AdlStatement instance, for token garbage collection
pLang - The ADL language description
Return Value:
none
--*/
{
_input = input;
_pLang = pLang;
_adlStat = adlStat;
_position = 0;
_tokCount = 0;
//
// Special character mapping
//
_mapCharCode[_pLang->CH_NULL] = CHAR_NULL;
_mapCharCode[_pLang->CH_SPACE] = CHAR_SPACE;
_mapCharCode[_pLang->CH_TAB] = CHAR_TAB;
_mapCharCode[_pLang->CH_NEWLINE] = CHAR_NEWLINE;
_mapCharCode[_pLang->CH_RETURN] = CHAR_RETURN;
_mapCharCode[_pLang->CH_QUOTE] = CHAR_QUOTE;
_mapCharCode[_pLang->CH_COMMA] = CHAR_COMMA;
_mapCharCode[_pLang->CH_SEMICOLON] = CHAR_SEMICOLON;
_mapCharCode[_pLang->CH_OPENPAREN] = CHAR_OPENPAREN;
_mapCharCode[_pLang->CH_CLOSEPAREN] = CHAR_CLOSEPAREN;
_mapCharCode[_pLang->CH_AT] = CHAR_AT;
_mapCharCode[_pLang->CH_SLASH] = CHAR_SLASH;
_mapCharCode[_pLang->CH_PERIOD] = CHAR_PERIOD;
//
// Only find end of map once
//
_iterEnd = _mapCharCode.end();
//
// Place all special tokens into a map, for O(log n) string searches
//
_mapStringToken[_pLang->SZ_TK_AND] = TK_AND;
_mapStringToken[_pLang->SZ_TK_EXCEPT] = TK_EXCEPT;
_mapStringToken[_pLang->SZ_TK_ON] = TK_ON;
_mapStringToken[_pLang->SZ_TK_ALLOWED] = TK_ALLOWED;
_mapStringToken[_pLang->SZ_TK_AS] = TK_AS;
_mapStringToken[_pLang->SZ_TK_THIS_OBJECT] = TK_THIS_OBJECT;
_mapStringToken[_pLang->SZ_TK_CONTAINERS] = TK_CONTAINERS;
_mapStringToken[_pLang->SZ_TK_OBJECTS] = TK_OBJECTS;
_mapStringToken[_pLang->SZ_TK_CONTAINERS_OBJECTS] = TK_CONTAINERS_OBJECTS;
_mapStringToken[_pLang->SZ_TK_NO_PROPAGATE] = TK_NO_PROPAGATE;
}
DWORD AdlLexer::NextToken(OUT AdlToken **value)
/*++
Routine Description:
This retrieves the next token from the input string. This is basically a
DFA which begins in the WHITESPACE state, and runs until it reaches
the DONE state, at which point it returns a token.
Arguments:
value - Pointer to a new token containing the string value
is stored in *value
Return Value:
DWORD - The token type, as #define'd by YACC in tokens.h
--*/
{
//
// Initial DFA state
//
DWORD state = STATE_WHITESPACE;
DWORD tokType = TK_ERROR;
wstring curToken;
DWORD dwInput;
DWORD dwTokStart = 0;
//
// First token should be the grammar type
//
if( _tokCount == 0 )
{
_tokCount++;
return _pLang->dwLanguageType;
}
dwInput = RESOLVE_CHAR(_input[_position], _mapCharCode, _iter, _iterEnd);
while( state != STATE_DONE )
{
switch( state )
{
case STATE_WHITESPACE:
switch( dwInput )
{
case CHAR_NULL:
tokType = 0;
state = STATE_DONE;
break;
case CHAR_NEWLINE:
_position++;
dwInput = RESOLVE_CHAR(_input[_position],
_mapCharCode,
_iter,
_iterEnd);
break;
case CHAR_RETURN:
_position++;
dwInput = RESOLVE_CHAR(_input[_position],
_mapCharCode,
_iter,
_iterEnd);
break;
case CHAR_SPACE:
_position++;
dwInput = RESOLVE_CHAR(_input[_position],
_mapCharCode,
_iter,
_iterEnd);
break;
case CHAR_TAB:
_position++;
dwInput = RESOLVE_CHAR(_input[_position],
_mapCharCode,
_iter,
_iterEnd);
break;
default:
state = STATE_BEGIN;
break;
}
break;
case STATE_BEGIN:
dwTokStart = _position;
tokType = TK_ERROR;
switch( dwInput )
{
case CHAR_NULL:
state = STATE_DONE;
break;
case CHAR_COMMA:
if( tokType == TK_ERROR )
{
tokType = TK_COMMA;
}
case CHAR_OPENPAREN:
if( tokType == TK_ERROR )
{
tokType = TK_OPENPAREN;
}
case CHAR_CLOSEPAREN:
if( tokType == TK_ERROR )
{
tokType = TK_CLOSEPAREN;
}
case CHAR_SEMICOLON:
if( tokType == TK_ERROR )
{
tokType = TK_SEMICOLON;
}
case CHAR_AT:
if( tokType == TK_ERROR )
{
tokType = TK_AT;
}
case CHAR_SLASH:
if( tokType == TK_ERROR )
{
tokType = TK_SLASH;
}
case CHAR_PERIOD:
if( tokType == TK_ERROR )
{
tokType = TK_PERIOD;
}
//
// Same action for all special single-char tokens
//
curToken.append( &(_input[_position]), 1 );
_position++;
dwInput = RESOLVE_CHAR(_input[_position],
_mapCharCode,
_iter,
_iterEnd);
state = STATE_DONE;
break;
case CHAR_QUOTE:
_position++;
dwInput = RESOLVE_CHAR(_input[_position],
_mapCharCode,
_iter,
_iterEnd);
state = STATE_QUOTE;
tokType = TK_IDENT;
break;
default:
state = STATE_IDENT;
tokType = TK_IDENT;
break;
}
break;
case STATE_IDENT:
switch( dwInput )
{
case CHAR_NULL:
case CHAR_COMMA:
case CHAR_OPENPAREN:
case CHAR_CLOSEPAREN:
case CHAR_SEMICOLON:
case CHAR_NEWLINE:
case CHAR_RETURN:
case CHAR_TAB:
case CHAR_SPACE:
case CHAR_AT:
case CHAR_SLASH:
case CHAR_PERIOD:
case CHAR_QUOTE:
state = STATE_DONE;
break;
default:
curToken.append( &(_input[_position]), 1 );
_position++;
dwInput = RESOLVE_CHAR(_input[_position],
_mapCharCode,
_iter,
_iterEnd);
break;
}
break;
case STATE_QUOTE:
switch( dwInput )
{
case CHAR_NULL:
case CHAR_TAB:
case CHAR_NEWLINE:
case CHAR_RETURN:
throw AdlStatement::ERROR_UNTERMINATED_STRING;
break;
case CHAR_QUOTE:
_position++;
dwInput = RESOLVE_CHAR(_input[_position],
_mapCharCode,
_iter,
_iterEnd);
state = STATE_DONE;
break;
default:
curToken.append( &(_input[_position]), 1 );
_position++;
dwInput = RESOLVE_CHAR(_input[_position],
_mapCharCode,
_iter,
_iterEnd);
break;
}
break;
default:
//
// Should never get here, well-defined states
//
assert(FALSE);
break;
}
}
//
// Done state was reached
// Export the string and column/row info in YACC-form here
//
AdlToken *outVal;
outVal = new AdlToken(curToken.c_str(), dwTokStart, _position - 1);
_adlStat->AddToken(outVal);
//
// Check if the string is a special token, case-insensitive
//
if( _mapStringToken.find(outVal->GetValue()) != _mapStringToken.end() )
{
tokType = _mapStringToken[outVal->GetValue()];
}
*value = outVal;
//
// Set this token to be the error token. This way, if the string is
// not accepted by the parser, we know at which token the parser failed
// If another error occurs later, this value will be overwritten
//
_adlStat->SetErrorToken(outVal);
_tokCount++;
return tokType;
}