|
|
/*++
Copyright (c) 1995 Microsoft Corporation
Module Name:
lexer.cxx
Abstract:
This file exports the class the class CQryLexer and other declarations that recognize the tokens in the string repressentation of the search filter. The format of the search filter according to the RFC 1960.
Author:
Shankara Shastry [ShankSh] 08-Jul-1996
*/ #ifndef _QRYLEXER_HXX
#define _QRYLEXER_HXX
//
// chunk of memory allocated for lexeme each time memory is needed.
//
#define LEXEME_UNIT_LENGTH 256
//
// Allowable tokens in the search string
//
#define TOKEN_ERROR 0
#define TOKEN_LPARAN 1
#define TOKEN_RPARAN 2
#define TOKEN_OR 3
#define TOKEN_AND 4
#define TOKEN_NOT 5
#define TOKEN_APPROX_EQ 6
#define TOKEN_EQ 7
#define TOKEN_LE 8
#define TOKEN_GE 9
#define TOKEN_PRESENT 10
#define TOKEN_ATTRTYPE 11
#define TOKEN_ATTRVAL 12
#define TOKEN_ED 13
#define TOKEN_START 0
//
// Final states;
//
#define ERROR_STATE 100
#define STATE_LPARAN 101
#define STATE_RPARAN 102
#define STATE_OR 103
#define STATE_AND 104
#define STATE_NOT 105
#define STATE_APPORX_EQ 106
#define STATE_EQ 107
#define STATE_LE 108
#define STATE_GE 109
#define STATE_PRESENT 110
#define STATE_ATTRTYPE 111
#define STATE_ATTRVAL 112
#define STATE_END 113
#define FINAL_STATES_BEGIN 100
// Since the lexical specification forces the lexer to have some knowledge
// of the grammar, there are two start states where recognizing an ATTRTYPE
// or ATTRVAL is valid. DFA starts with ATTRTYPE_START_STATE and switches to
// ATTRVAL_START_STATE when an AttrType is recognized and vice-versa
#define ATTRTYPE_START_STATE 0
#define ATTRVAL_START_STATE 1
#define MAX_STATES 11 // No. of states in the DFA
// No. of different groups of characters for which the DFA behaves differently
// For eg., all alphabetical characters generate the same behaviour and can be
// considered the same as for DFA is concerned. This is mainly to reduce the
// size of the table.
#define MAX_CHAR_CLASSES 18
// which specifies all other characters not mentioned explicitly.
#define OTHER_CHAR_CLASS 14
//Various actions associated with a particular entry in the DFA table.
#define ACTION_DEFAULT 0
#define ACTION_IGNORE_ESCAPECHAR 1
#define ACTION_PUSHBACK_CHAR 2
#define ACTION_PUSHBACK_2CHAR 3
/* The state transition table is a table Table[i,j] with i being the current
state and j being the input sets and the value Table[i,j] being the structure containing the next state and the action id to be performed. State 0 and 1 are the starting states when recognizing AttrType and AttrVal respectively.
'(' ')' '|' '&' '!' '~' '=' '<' '>' '*' '\' 'alpha' 'num' '.' 'other' '\0' 'space' ';' 0 {101,0}, {102,0}, {103,0}, {104,0}, {105,0}, {100,0}, { 3, 0}, { 4, 0}, { 5, 0}, {100,0}, { 7, 0}, { 7, 0}, { 8, 0}, {100,0}, {100,0}, {113,0}, { 0 , 0}, {100,0}, \ 1 {101,0}, {102,0}, {103,0}, {104,0}, {105,0}, { 2, 0}, { 3, 0}, { 4, 0}, { 5, 0}, { 9,0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, {113,0}, { 1, 0}, { 9, 0}, \ 2 {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {106,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}, \ 3 {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {010,0}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107, 2}, {107,2}, \ 4 {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {108,0}, {100,0}, {100,0}, {100,0}, {108,2}, {108,2}, {108,2}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}, \ 5 {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {109,0}, {100,0}, {100,0}, {100,0}, {109,2}, {109,2}, {109,2}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}, \ 6 { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, {100,0}, { 9 , 0}, { 9, 0}, \ 7 {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,0}, {100,0}, { 7, 0}, { 7, 0}, { 7, 0}, {111,2}, { 7, 0}, {111,2}, { 7, 0}, { 7, 0}, \ 8 {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {100,0}, {111,2}, {111,2}, { 8, 0}, { 8, 0}, {111,2}, {111,2}, {111, 2}, {111,2}, \ 9 {112,2}, {112,2}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9,0}, { 9, 0}, { 9, 0}, { 9 0}, { 9, 0}, { 9, 0}, {112,2}, { 9 , 0}, { 9, 0}, \ 10 {100,0}, {110,2}, {100,0}, {100,0}, {100,0}, {100,0}, {108,0}, {100,0}, {100,0}, {100,0}, {107,3}, {107,3}, {100,0}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}, \
*/
#define gStateTable {\
{{101,0}, {102,0}, {103,0}, {104,0}, {105,0}, {100,0}, { 3, 0}, { 4, 0}, { 5, 0}, {100,0}, { 7, 0}, { 7, 0}, { 8, 0}, {100,0}, {100,0}, {113,0}, { 0 , 0}, {100,0}}, \ {{101,0}, {102,0}, {103,0}, {104,0}, {105,0}, { 2, 0}, { 3, 0}, { 4, 0}, { 5, 0}, { 9,0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, {113,0}, { 1 , 0}, { 9, 0}}, \ {{100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {106,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}}, \ {{107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {012,0}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107,2}, {107, 2}, {107,2}}, \ {{100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {108,0}, {100,0}, {100,0}, {100,0}, {108,2}, {108,2}, {108,2}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}}, \ {{100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {109,0}, {100,0}, {100,0}, {100,0}, {109,2}, {109,2}, {109,2}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}}, \ {{ 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, {100,0}, { 9 , 0}, { 9, 0}}, \ {{111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,0}, {100,0}, { 7, 0}, { 7, 0}, { 7, 0}, {111,2}, { 7, 0}, {111,2}, { 7, 0}, { 7, 0}}, \ {{111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {111,2}, {100,0}, {111,2}, {111,2}, { 8, 0}, { 8, 0}, {111,2}, {111,2}, {111, 2}, {111,2}}, \ {{112,2}, {112,2}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9,0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, { 9, 0}, {112,2}, { 9 , 0}, { 9, 0}}, \ {{100,0}, {110,2}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {100,0}, {107,3}, {107,3}, {100,0}, {100,0}, {100,0}, {100,0}, {100, 0}, {100,0}}}
// This is the table comtaining the chsracter class to which a particular
// character belongs. This is used to index the state transition table.
// Basivally, for each of the characters possible, this points to one of the
// columns in the state transition table defined above.
// Most of them are 14 indicating that they are 'other'
#define gCharClassTable { \
15, 14, 14, 14, 14, 14, 14, 14, 14, 16, 16, 16, 16, 16, 14, 14, \ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ 16, 4, 14, 14, 14, 14, 3, 14, 0, 1, 9, 14, 14, 14, 13, 14, \ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 14, 17, 7, 6, 8, 14, \ 14, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 14, 10, 14, 14, 14, \ 14, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 14, 2, 14, 5, 14, \ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ }
LPWSTR RemoveWhiteSpaces( LPWSTR pszText );
// structure representing an entry in the DFA;
typedef struct DFA_STATE { DWORD dwNextState; DWORD dwActionId; }DFA_STATE;
//CLexeme maintains the lexeme corresponding to the current token
class CLexeme { public:
CLexeme();
HRESULT PushNextChar( WCHAR wcNextChar);
HRESULT PushBackChar(); ~CLexeme();
void ResetLexeme() { _dwIndex = 0; }
LPWSTR CLexeme::GetLexeme() { return (RemoveWhiteSpaces(_pszLexeme)); }
private:
LPWSTR _pszLexeme; DWORD _dwMaxLength; DWORD _dwIndex; };
//CQryLexer maintains all the state information and returns the next token
class CQryLexer { public:
// Initialize the lexer with the string szBuffer.
CQryLexer(LPWSTR szBuffer);
~CQryLexer(); // Return the next token and its value.
HRESULT CQryLexer::GetNextToken(LPWSTR *szToken, LPDWORD pdwToken);
HRESULT CQryLexer::GetCurrentToken( LPWSTR *ppszToken, LPDWORD pdwToken );
private:
WCHAR CQryLexer::NextChar();
void CQryLexer::PushbackChar();
DWORD CQryLexer::GetCharClass(WCHAR wc) { if(wc < 256) return (_pCharClassTable[wc]); else // some unicode character; put in the other class.
return (OTHER_CHAR_CLASS); }
// Given the currentState reached and the character just scanned and the
// action id, perform the action
HRESULT CQryLexer::PerformAction( DWORD dwCurrState, WCHAR wcCurrChar, DWORD dwActionId );
DWORD CQryLexer::GetTokenFromState( DWORD dwCurrState );
// The common DFA state transition table for all the instances of the class
static DFA_STATE _pStateTable[][MAX_CHAR_CLASSES];
// The common table mapping the characters to the character classes.
static DWORD _pCharClassTable[];
LPWSTR _Buffer; // String being analysed
LPWSTR _ptr; // pointer to the next character to be analysed.
DFA_STATE _currState; // maintains the state information for the DFA
DWORD _dwState; // maintains the state information for the DFA
DWORD _dwEndofString; // To indicate end of pattern
CLexeme _lexeme; DWORD _dwStateSave; // maintains the state information for the DFA
BOOL _bInitialized; BOOL _bGetNext; };
#endif
|