/*++ Copyright (c) 1995 Microsoft Corporation Module Name: lexer.hxx Abstract: This module implements functions to recognize the tokens in the string repressentation of the search filter. The format of the search filter according to the RFC 1960. Author: Shankara Shastry [ShankSh] 08-Jul-1996 ++*/ #include "nds.hxx" #pragma hdrstop DFA_STATE CQryLexer::_pStateTable[MAX_STATES][MAX_CHAR_CLASSES] = gStateTable; DWORD CQryLexer::_pCharClassTable[] = gCharClassTable; //+--------------------------------------------------------------------------- // Function: CQryLexer // // Synopsis: Constructor: Allocate memory for the pattern and initialize // // Arguments: szBuffer: pattern // // Returns: // // Modifies: // // History: 07-09-96 ShankSh Created. // //---------------------------------------------------------------------------- CQryLexer::CQryLexer( LPWSTR szBuffer ): _ptr(NULL), _Buffer(NULL), _dwEndofString(0), _dwState(ATTRTYPE_START_STATE), _lexeme() { _bInitialized = FALSE; if (!szBuffer || !*szBuffer) { return; } _Buffer = (LPWSTR) AllocADsMem( (wcslen(szBuffer)+1) * sizeof(WCHAR) ); if(_Buffer) wcscpy(_Buffer, szBuffer ); _ptr = _Buffer; } //+--------------------------------------------------------------------------- // Function: GetNextToken // // Synopsis: Give the next valid token // // Arguments: // // Returns: // // Modifies: // // History: 07-09-96 ShankSh Created. // //---------------------------------------------------------------------------- HRESULT CQryLexer::GetNextToken( LPWSTR *ppszToken, LPDWORD pdwToken ) { WCHAR wcNextChar; DWORD dwActionId; DFA_STATE dfaState; DWORD dwStartState = _dwState; // If there is no pattern if(!_ptr) { *pdwToken = TOKEN_ENDINPUT; RRETURN (S_OK); } // Start forming the lexeme. _lexeme.ResetLexeme(); *ppszToken = NULL; *pdwToken = TOKEN_ERROR; while (_dwState != ERROR_STATE && _dwState < FINAL_STATES_BEGIN) { // Get the character class from the character and then index the // state table wcNextChar = NextChar(); DWORD now = GetCharClass(wcNextChar); dwActionId = _pStateTable[_dwState][GetCharClass(wcNextChar)]. dwActionId; _dwState = _pStateTable[_dwState][GetCharClass(wcNextChar)]. dwNextState; if(_dwState == ERROR_STATE) { BAIL_ON_FAILURE (E_FAIL); } PerformAction(_dwState, wcNextChar, dwActionId ); } _bInitialized = TRUE; if(*pdwToken == TOKEN_ENDINPUT) RRETURN (S_OK); *ppszToken = _lexeme.GetLexeme(); *pdwToken = GetTokenFromState(_dwState); _dwStateSave = _dwState; // This is to set the start state for the next token to be recognized if(*pdwToken == TOKEN_ATTRTYPE) { _dwState = ATTRVAL_START_STATE; } else if (*pdwToken == TOKEN_ATTRVAL) { _dwState = ATTRTYPE_START_STATE; } else if (*pdwToken == TOKEN_PRESENT) { _dwState = ATTRTYPE_START_STATE; } else { _dwState = dwStartState; } RRETURN (S_OK); error: RRETURN (E_FAIL); } //+--------------------------------------------------------------------------- // Function: GetCurrentToken // // Synopsis: Give the current valid token, and do not advance unless // it is the first token // // Arguments: // // Returns: // // Modifies: // // History: 07-09-96 ShankSh Created. // //---------------------------------------------------------------------------- HRESULT CQryLexer::GetCurrentToken( LPWSTR *ppszToken, LPDWORD pdwToken ) { if (!_bInitialized) { HRESULT hr; hr = GetNextToken( ppszToken, pdwToken ); return hr; } else { *ppszToken = _lexeme.GetLexeme(); *pdwToken = GetTokenFromState(_dwStateSave); return (S_OK); } } //+--------------------------------------------------------------------------- // Function: NextChar // // Synopsis: Returns the next chaarcter in the pattern // // Arguments: // // Returns: // // Modifies: // // History: 07-09-96 ShankSh Created. // //---------------------------------------------------------------------------- WCHAR CQryLexer::NextChar() { if (_ptr == NULL || *_ptr == L'\0') { _dwEndofString = TRUE; return(L'\0'); } return(*_ptr++); } //+--------------------------------------------------------------------------- // Function: PushbackChar // // Synopsis: Puts back a character to the unrecognised pattern // // Arguments: // // Returns: // // Modifies: // // History: 07-09-96 ShankSh Created. // //---------------------------------------------------------------------------- void CQryLexer::PushbackChar() { if (_dwEndofString) { return; } _ptr--; } HRESULT CQryLexer::PerformAction( DWORD dwCurrState, WCHAR wcCurrChar, DWORD dwActionId ) { switch(dwActionId) { case ACTION_PUSHBACK_CHAR: PushbackChar(); break; case ACTION_PUSHBACK_2CHAR: PushbackChar(); PushbackChar(); _lexeme.PushBackChar(); break; case ACTION_IGNORE_ESCAPECHAR: break; case ACTION_DEFAULT: _lexeme.PushNextChar(wcCurrChar); break; } if(_dwState >= FINAL_STATES_BEGIN) _lexeme.PushNextChar(L'\0'); RRETURN (S_OK); } //+--------------------------------------------------------------------------- // Function: CQryLexer::GetTokenFromState // // Synopsis: // // Arguments: // // Returns: // // Modifies: // // History: 07-09-96 ShankSh Created. // //---------------------------------------------------------------------------- inline DWORD CQryLexer::GetTokenFromState( DWORD dwCurrState ) { return (dwCurrState - FINAL_STATES_BEGIN); } //+--------------------------------------------------------------------------- // Function: ~CQryLexer // // Synopsis: // // Arguments: // // Returns: // // Modifies: // // History: 07-09-96 ShankSh Created. // //---------------------------------------------------------------------------- CQryLexer::~CQryLexer() { if( _Buffer ) FreeADsMem (_Buffer); } //+--------------------------------------------------------------------------- // Function: CLexeme // // Synopsis: Constructor: Allocate memory for the pattern and initialize // // Arguments: // // Returns: // // Modifies: // // History: 07-09-96 ShankSh Created. // //---------------------------------------------------------------------------- CLexeme::CLexeme( ): _dwMaxLength(0), _dwIndex(0) { _pszLexeme = (LPWSTR) AllocADsMem(LEXEME_UNIT_LENGTH * sizeof(WCHAR)); if(_pszLexeme) _dwMaxLength = LEXEME_UNIT_LENGTH; } //+--------------------------------------------------------------------------- // Function: ~CLexeme // // Synopsis: Destructor // // Arguments: // // Returns: // // Modifies: // // History: 07-09-96 ShankSh Created. // //---------------------------------------------------------------------------- inline CLexeme::~CLexeme( ) { if(_pszLexeme) FreeADsMem(_pszLexeme); } //+--------------------------------------------------------------------------- // Function: PushNextChar // // Synopsis: Add the next character after making sure there is enough memory // // Arguments: // // Returns: // // Modifies: // // History: 07-09-96 ShankSh Created. // //---------------------------------------------------------------------------- HRESULT CLexeme::PushNextChar( WCHAR wcNextChar ) { if(_dwIndex >= _dwMaxLength) { _pszLexeme = (LPWSTR) ReallocADsMem( _pszLexeme, _dwMaxLength * sizeof(WCHAR), (_dwMaxLength + LEXEME_UNIT_LENGTH) * sizeof(WCHAR) ); BAIL_ON_NULL(_pszLexeme); _dwMaxLength += LEXEME_UNIT_LENGTH; } _pszLexeme[_dwIndex++] = wcNextChar; RRETURN (S_OK); error: RRETURN (E_FAIL); } HRESULT CLexeme::PushBackChar() { _pszLexeme[--_dwIndex] = '\0'; RRETURN (S_OK); } //+--------------------------------------------------------------------------- // // Function: RemoveWhiteSpaces // // Synopsis: Removes the leading and trailing white spaces // // Arguments: pszText Text strings from which the leading // and trailing white spaces are to be // removed // // Returns: LPWSTR Pointer to the modified string // // Modifies: // // History: 08-15-96 ShankSh Created. // //---------------------------------------------------------------------------- LPWSTR RemoveWhiteSpaces( LPWSTR pszText) { LPWSTR pChar; if(!pszText) return (pszText); while(*pszText && iswspace(*pszText)) pszText++; for(pChar = pszText + wcslen(pszText) - 1; pChar >= pszText; pChar--) { if(!iswspace(*pChar)) break; else *pChar = L'\0'; } return pszText; }