windows-server-2003/inetsrv/query/qutil/querylib/scanner.cxx


								//+---------------------------------------------------------------------------

								//

								//  Copyright (C) 1994-1998, Microsoft Corporation.

								//

								//  File:   SCANNER.CXX

								//

								//  Contents:   Implementation of CQueryScanner

								//

								//  History:    22-May-92   AmyA        Created.

								//              23-Jun-92   MikeHew     Added weight token recognition.

								//              17-May-94   t-jeffc     Added error info and reg ex support.

								//

								//----------------------------------------------------------------------------


								#include <pch.cxx>

								#pragma hdrstop


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::CQueryScanner, public

								//

								//  Synopsis:   Create a scanner from a string.

								//

								//  Arguments:  [buffer] -- the string to be scanned.

								//              [fLookForTextualKeywords] -- TRUE if the scanner should

								//                                           look for "and/or/not/near" in

								//                                           text form.

								//              [lcid]   -- language for and/or/not/near detection

								//              [fTreatPlusAsToken] -- TRUE if the scanner should treat the

								//                                           '+' character as a token (used

								//                                           in GroupBy parsing)

								//

								//  Notes:      This string is not copied, so the scanner does not own it.

								//              If the string is changed outside of the scanner, it will

								//              affect the information that is returned.

								//

								//  History:    30-Apr-92   AmyA        Created

								//

								//----------------------------------------------------------------------------


								CQueryScanner::CQueryScanner(

								    WCHAR const * buffer,

								    BOOL fLookForTextualKeywords,

								    LCID lcid,

								    BOOL fTreatPlusAsToken )

								    : _text( buffer ),

								      _pBuf( buffer ),

								      _pLookAhead( buffer ),

								      _fLookForTextualKeywords( fLookForTextualKeywords ),

								      _fTreatPlusAsToken( fTreatPlusAsToken ),

								      _lcid( lcid )

								{

								    Accept();

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::AcceptWord, public

								//

								//  Synopsis:   Consumes a single word out of a phrase

								//

								//  Requires:   Should be called after AcqWord

								//

								//  History:    15-Sep-92   BartoszM    Created

								//

								//----------------------------------------------------------------------------


								void CQueryScanner::AcceptWord()

								{

								    _pLookAhead = _text;

								    Accept();

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::AcceptColumn, public

								//

								//  Synopsis:   Consumes a column name out of a phrase

								//

								//  Requires:   Should be called after AcqColumn

								//

								//  History:    15-Sep-92   BartoszM    Created

								//

								//----------------------------------------------------------------------------


								void CQueryScanner::AcceptColumn()

								{

								    AcceptWord();

								}


								struct SStringToken

								{

								    WCHAR *  pwcToken;

								    unsigned cwc;

								    Token    token;

								};


								static SStringToken s_EnglishStringTokens[] =

								{

								    { L"AND",    (sizeof L"AND" / sizeof WCHAR) - 1,    AND_TOKEN },

								    { L"OR",     (sizeof L"OR" / sizeof WCHAR) - 1,     OR_TOKEN },

								    { L"NOT",    (sizeof L"NOT" / sizeof WCHAR) - 1,    NOT_TOKEN },

								    { L"NEAR",   (sizeof L"NEAR" / sizeof WCHAR) - 1,   PROX_TOKEN },

								};


								static SStringToken s_GermanStringTokens[] =

								{

								    { L"UND",    (sizeof L"UND" / sizeof WCHAR) - 1,    AND_TOKEN },

								    { L"ODER",   (sizeof L"ODER" / sizeof WCHAR) - 1,   OR_TOKEN },

								    { L"NICHT",  (sizeof L"NICHT" / sizeof WCHAR) - 1,  NOT_TOKEN },

								    { L"NAH",    (sizeof L"NAH" / sizeof WCHAR) - 1,    PROX_TOKEN },

								};


								static SStringToken s_FrenchStringTokens[] =

								{

								    { L"ET",     (sizeof L"ET" / sizeof WCHAR) - 1,     AND_TOKEN },

								    { L"OU",     (sizeof L"OU" / sizeof WCHAR) - 1,     OR_TOKEN },

								    { L"SANS",   (sizeof L"SANS" / sizeof WCHAR) - 1,   NOT_TOKEN },

								    { L"PRES",   (sizeof L"PRES" / sizeof WCHAR) - 1,   PROX_TOKEN },

								};


								static SStringToken s_SpanishStringTokens[] =

								{

								    { L"Y",      (sizeof L"Y" / sizeof WCHAR) - 1,      AND_TOKEN },

								    { L"O",      (sizeof L"O" / sizeof WCHAR) - 1,      OR_TOKEN },

								    { L"NO",     (sizeof L"NO" / sizeof WCHAR) - 1,     NOT_TOKEN },

								    { L"CERCA",  (sizeof L"CERCA" / sizeof WCHAR) - 1,  PROX_TOKEN },

								};


								static SStringToken s_DutchStringTokens[] =

								{

								    { L"EN",     (sizeof L"EN" / sizeof WCHAR) - 1,     AND_TOKEN },

								    { L"OF",     (sizeof L"OF" / sizeof WCHAR) - 1,     OR_TOKEN },

								    { L"NIET",   (sizeof L"NIET" / sizeof WCHAR) - 1,   NOT_TOKEN },

								    { L"NABIJ",  (sizeof L"NABIJ" / sizeof WCHAR) - 1,  PROX_TOKEN },

								};


								static WCHAR aSwedishNear[] = { L'N', 0xc4, L'R', L'A', 0 };


								static SStringToken s_SwedishStringTokens[] =

								{

								    { L"OCH",    (sizeof L"OCH" / sizeof WCHAR) - 1,    AND_TOKEN },

								    { L"ELLER",  (sizeof L"ELLER" / sizeof WCHAR) - 1,  OR_TOKEN },

								    { L"INTE",   (sizeof L"INTE" / sizeof WCHAR) - 1,   NOT_TOKEN },

								    { aSwedishNear, 4,                                  PROX_TOKEN },

								};


								static SStringToken s_ItalianStringTokens[] =

								{

								    { L"E",      (sizeof L"E" / sizeof WCHAR) - 1,      AND_TOKEN },

								    { L"O",      (sizeof L"O" / sizeof WCHAR) - 1,      OR_TOKEN },

								    { L"NO",     (sizeof L"NO" / sizeof WCHAR) - 1,     NOT_TOKEN },

								    { L"VICINO", (sizeof L"VICINO" / sizeof WCHAR) - 1, PROX_TOKEN },

								};


								const unsigned cStringTokens = sizeof(s_EnglishStringTokens) /

								                               sizeof(s_EnglishStringTokens[0]);


								#define WORD_STR  L"{}!&|~*@#()[],\t=<>\n\"^ "


								//+---------------------------------------------------------------------------

								//

								//  Function:   InternalFindStringToken

								//

								//  Synopsis:   Looks for a textual token in plain text.

								//

								//  Arguments:  [pwcIn]   -- string to search

								//              [token]   -- returns the token found

								//              [cwc]     -- returns length of token found

								//              [pTokens] -- token array to use

								//

								//  Returns:    Pointer to token or 0 if none was found

								//

								//  History:    08-Feb-96   dlee        created

								//

								//----------------------------------------------------------------------------


								WCHAR * InternalFindStringToken(

								    WCHAR *        pwcIn,

								    Token &        token,

								    unsigned &     cwc,

								    SStringToken * pTokens )

								{

								    // for each of and/or/not/near


								    WCHAR *pwcOut = 0;


								    for ( unsigned i = 0; i < cStringTokens; i++ )

								    {

								        WCHAR *pwcStr = wcsstr( pwcIn, pTokens[i].pwcToken );


								        while ( pwcStr )

								        {

								            // found a match -- does it have white space on either side?


								            WCHAR wcBeyond = * (pwcStr + pTokens[i].cwc);

								            if ( ( ( 0 == wcBeyond ) ||

								                   ( wcschr( WORD_STR, wcBeyond ) ) ) &&

								                 ( ( pwcStr == pwcIn ) ||

								                   ( iswspace( * ( pwcStr - 1 ) ) ) ) )

								            {

								                // if the first match found or the match closest to the

								                // beginning of the string, use it.


								                if ( ( 0 == pwcOut ) ||

								                     ( pwcStr < pwcOut ) )

								                {

								                    pwcOut = pwcStr;

								                    token = pTokens[i].token;

								                    cwc = pTokens[i].cwc;

								                }


								                break;

								            }


								            pwcStr = wcsstr( pwcStr + 1, pTokens[i].pwcToken );

								        }

								    }


								    return pwcOut;

								} //InternalFindStringToken


								SStringToken * GetStringTokenArray(

								    LCID lcid )

								{

								    SStringToken *pTokens;


								    switch ( PRIMARYLANGID( LANGIDFROMLCID( lcid ) ) )

								    {

								        case LANG_GERMAN :

								            pTokens = s_GermanStringTokens;

								            break;

								        case LANG_FRENCH :

								            pTokens = s_FrenchStringTokens;

								            break;

								        case LANG_SPANISH :

								            pTokens = s_SpanishStringTokens;

								            break;

								        case LANG_DUTCH :

								            pTokens = s_DutchStringTokens;

								            break;

								        case LANG_SWEDISH :

								            pTokens = s_SwedishStringTokens;

								            break;

								        case LANG_ITALIAN :

								            pTokens = s_ItalianStringTokens;

								            break;

								        case LANG_NEUTRAL :

								        case LANG_ENGLISH :

								        default :

								            pTokens = s_EnglishStringTokens;

								            break;

								    }


								    Win4Assert( 0 != pTokens );


								    return pTokens;

								}


								//+---------------------------------------------------------------------------

								//

								//  Function:   FindStringToken

								//

								//  Synopsis:   Looks for a textual token in plain text.  Always tries

								//              English, tries a different language depending on _lcid.

								//

								//  Arguments:  [pwcIn] -- string to search

								//              [token] -- returns the token found

								//              [cwc]   -- returns length of token found

								//

								//  Returns:    Pointer to token or 0 if none was found

								//

								//  History:    08-Feb-96   dlee        created

								//

								//----------------------------------------------------------------------------


								WCHAR * CQueryScanner::FindStringToken(

								    WCHAR *    pwcIn,

								    Token &    token,

								    unsigned & cwc )

								{

								    SStringToken * pTokens = GetStringTokenArray( _lcid );


								    WCHAR * pwcToken = InternalFindStringToken( pwcIn, token, cwc, pTokens );


								    // if the search above wasn't in English, try English too.


								    if ( pTokens != s_EnglishStringTokens )

								    {

								        unsigned cwcEnglish;

								        Token tokenEnglish;

								        WCHAR * pwcEnglish = InternalFindStringToken( pwcIn,

								                                                      tokenEnglish,

								                                                      cwcEnglish,

								                                                      s_EnglishStringTokens );


								        // If there is no language-specific match or the English match

								        // occurs before the language-specific match, use the English

								        // match.


								        if ( ( 0 != pwcEnglish ) &&

								             ( ( 0 == pwcToken ) || ( pwcEnglish < pwcToken ) ) )

								        {

								            pwcToken = pwcEnglish;

								            token = tokenEnglish;

								            cwc = cwcEnglish;

								        }

								    }


								    return pwcToken;

								} //FindStringToken


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::Accept, public

								//

								//  Synopsis:   Determines what the next token is.  Will advance _pLookAhead

								//              over the next token and white space.

								//

								//  Notes:      There are five different types of TEXT_TOKENS, Phrase, Path,

								//              Number, Column and Command.  Since the length of the token

								//              depends on which token it is, _pLookAhead is forwarded to the

								//              end of the longest, and _text is used to parse the token in the

								//              various Acq and Get methods.

								//

								//  History:    30-Apr-92   AmyA        Created

								//              19-May-92   AmyA        Added Guid hack

								//              23-Jun-92   MikeHew     Added weight token recognition.

								//              26-May-94   t-jeffc     Added more tokens; rearranged to

								//                                      support parsing errors

								//

								//----------------------------------------------------------------------------


								void CQueryScanner::Accept()

								{

								    EatWhiteSpace();


								    _text = _pLookAhead;


								    switch ( *_pLookAhead )

								    {

								    case '&':

								        _pLookAhead++;

								        _token = AND_TOKEN;

								        break;


								    case '*':

								        _pLookAhead++;


								        if ( *_pLookAhead == '*' )

								        {

								            _token = FUZ2_TOKEN;

								            _pLookAhead++;

								        }

								        else

								            _token = FUZZY_TOKEN;

								        break;


								    case '=':

								        _pLookAhead++;

								        _token = EQUAL_TOKEN;

								        break;


								    case '<':

								        _pLookAhead++;

								        if ( *_pLookAhead == '=' )

								        {

								            _token = LESS_EQUAL_TOKEN;

								            _pLookAhead++;

								        }

								        else

								            _token = LESS_TOKEN;

								        break;


								    case '>':

								        _pLookAhead++;

								        if ( *_pLookAhead == '=' )

								        {

								            _token = GREATER_EQUAL_TOKEN;

								            _pLookAhead++;

								        }

								        else

								            _token = GREATER_TOKEN;

								        break;


								    case '!':

								        _pLookAhead++;

								        if ( *_pLookAhead == '=' )

								        {

								            _token = NOT_EQUAL_TOKEN;

								            _pLookAhead++;

								        }

								        else

								        {

								            _token = NOT_TOKEN;

								        }

								        break;


								    case '|':

								        _pLookAhead++;

								        _token = OR_TOKEN;

								        break;


								    case '~':

								        _pLookAhead++;

								        _token = PROX_TOKEN;

								        break;


								    case '@':

								        _pLookAhead++;

								        _token = PROP_TOKEN;

								        break;


								    case '#':

								        _pLookAhead++;

								        _token = PROP_REGEX_TOKEN;

								        break;


								    case '(':

								        _pLookAhead++;

								        _token = OPEN_TOKEN;

								        break;


								    case ')':

								        _pLookAhead++;

								        _token = CLOSE_TOKEN;

								        break;


								    case '[':

								        _pLookAhead++;

								        _token = W_OPEN_TOKEN;

								        break;


								    case ']':

								        _pLookAhead++;

								        _token = W_CLOSE_TOKEN;

								        break;


								    case ',':

								        _pLookAhead++;

								        _token = COMMA_TOKEN;

								        break;


								    case '\0':

								    case 0x1A:  // CTRL-Z

								        _token = EOS_TOKEN;

								        break;


								    case '"':

								        _pLookAhead++;

								        _token = QUOTES_TOKEN;

								        break;


								    case '$':

								        _pLookAhead++;

								        _token = PROP_NATLANG_TOKEN;

								        break;


								    case '{':

								       _pLookAhead++;

								      _token = C_OPEN_TOKEN;

								      break;


								    case '}':

								       _pLookAhead++;

								      _token = C_CLOSE_TOKEN;

								      break;


								    case '^':

								    {

								        WCHAR wc = *(_pLookAhead + 1);


								        BOOL fOk = TRUE;


								        if (L'a' == wc) // all bits

								            _token = ALLOF_TOKEN;

								        else if (L's' == wc) // some bits

								            _token = SOMEOF_TOKEN;

								        else

								            fOk = FALSE;


								        if (fOk)

								        {

								            _pLookAhead += 2;

								            break;

								        }

								    }

								    // FALL THROUGH


								    case '+':

								        if (*_pLookAhead == L'+' && _fTreatPlusAsToken)

								        {

								            _pLookAhead++;

								            _token = PLUS_TOKEN;

								            break;

								        }

								    // FALL THROUGH


								    default:

								    {

								        // forwards pwcEnd over anything that could be in a phrase,

								        // which is the most inclusive of the TEXT_TOKENs.

								        // (except, for regex's and phrases in quotes - but they're

								        // handled separately)


								        WCHAR const *pwcEnd = _text + wcscspn( _text, PHRASE_STR );


								        if ( _fLookForTextualKeywords )

								        {

								            unsigned cwc = (unsigned) ( pwcEnd - _text );

								            cwc = __min( cwc, MAX_PATH * 2 );


								            // if a textual keyword is beyond 500 chars in the string,

								            // blow it off -- the workaround is to use the '&|~' version.


								            WCHAR awcBuf[ 1 + MAX_PATH * 2 ];

								            RtlCopyMemory( awcBuf, _text, cwc * sizeof WCHAR );

								            awcBuf[ cwc ] = 0;


								            ULONG cwcOut = LCMapString( _lcid,

								                                        LCMAP_UPPERCASE,

								                                        awcBuf,

								                                        cwc,

								                                        awcBuf,

								                                        cwc );

								            if ( cwcOut != cwc )

								                THROW( CException() );


								            Token token;

								            unsigned cwcToken;

								            WCHAR *pwcTok = FindStringToken( awcBuf, token, cwcToken );


								            if ( 0 != pwcTok )

								            {

								                // a textual token exists in the string


								                if ( pwcTok == awcBuf )

								                {

								                    // textual token at the start of the string


								                    _token = token;

								                    _pLookAhead = _text + cwcToken;

								                }

								                else

								                {

								                    // textual token in the middle of the string, stop the

								                    // current token at that point and get it next time

								                    // Accept() is called.


								                    _pLookAhead = _text + ( pwcTok - awcBuf );

								                    _token = TEXT_TOKEN;

								                }

								            }

								            else

								            {

								                _pLookAhead = pwcEnd;

								                _token = TEXT_TOKEN;

								            }

								        }

								        else

								        {

								            _pLookAhead = pwcEnd;

								            _token = TEXT_TOKEN;

								        }


								        break;

								    }

								    }

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::AllocReturnString, private inline

								//

								//  Synopsis:   Copies all of the relevant characters of the string that

								//              _text is pointing to and returns the new string.

								//

								//  History:    17 Apr 97   AlanW       Created

								//

								//----------------------------------------------------------------------------


								inline WCHAR * CQueryScanner::AllocReturnString( int cch )

								{

								    WCHAR * newBuf = new WCHAR [ cch + 1 ];

								    RtlCopyMemory ( newBuf, _text, cch * sizeof(WCHAR));

								    newBuf[cch] = L'\0';


								    _text += cch;

								    while ( iswspace(*_text) )

								        _text++;


								    return newBuf;

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::AcqPath, public

								//

								//  Synopsis:   Copies all of the relevant characters of the string that

								//              _text is pointing to and returns the new string.  Will

								//              return 0 if _text is at end of whole TEXT_TOKEN.

								//

								//  Notes:      Since the string is copied, the caller of this function is

								//              responsible for freeing the memory occupied by the string.

								//              This method can be called several times before calling

								//              Accept(), so many paths can be acquired if they exist in the

								//              scanner.

								//

								//  History:    30-Apr-92   AmyA        Created

								//

								//----------------------------------------------------------------------------


								WCHAR * CQueryScanner::AcqPath()

								{

								    if ( IsEndOfTextToken() )

								        return 0;


								    // how many characters follow _text that are not in CMND_STR?


								    int count = wcscspn( _text, CMND_STR );


								    return AllocReturnString( count );

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::AcqWord, public

								//

								//  Synopsis:   Copies the word that _text is pointing to and returns the

								//              new string. Positions _text after the word and whitespace.

								//              Returns 0 if at the end of a TEXT_TOKEN.

								//

								//  History:    29-Jun-92    MikeHew    Created.

								//

								//----------------------------------------------------------------------------


								WCHAR * CQueryScanner::AcqWord()

								{

								    if ( IsEndOfTextToken() )

								        return 0;


								    WCHAR const * pEnd = _text;


								    while ( !iswspace(*pEnd) && pEnd < _pLookAhead )

								        pEnd++;


								    unsigned count = CiPtrToUint( pEnd - _text );


								    return AllocReturnString( count );

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::AcqColumn, public

								//

								//  Synopsis:   Copies a column name and returns the new string.  A column

								//              name is either a single word, or a quoted string.

								//              Positions _text after the word and whitespace.

								//

								//  Returns:    WCHAR* pointer to column name.  0 if no column name found.

								//

								//  History:    17 Apr 97    AlanW      Created.

								//

								//----------------------------------------------------------------------------


								WCHAR * CQueryScanner::AcqColumn()

								{

								    if ( QUOTES_TOKEN == _token)

								    {

								        Accept();

								        WCHAR * pwszOut = AcqPhraseInQuotes();

								        _text = _pLookAhead;

								        return pwszOut;

								    }


								    if ( IsEndOfTextToken() )

								        return 0;


								    int count = wcscspn( _text, COLUMN_STR );

								    return AllocReturnString( count );

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::AcqPhrase, public

								//

								//  Synopsis:   Copies all of the relevant characters of the string that

								//              _text is pointing to and returns the new string.

								//              Returns 0 if at the end of a text token.

								//

								//  Notes:      Since the string is copied, the caller of this function is

								//              responsible for freeing the memory occupied by the string.

								//              The difference between this function and AcqPath is that this

								//              should only be called once before calling Accept().

								//

								//  History:    30-Apr-92   AmyA        Created

								//              09-May-96   DwightKr    Strip trailing white space

								//

								//----------------------------------------------------------------------------


								WCHAR * CQueryScanner::AcqPhrase()

								{

								    if( IsEndOfTextToken() )

								        return 0;


								    //

								    //  Strip trailing white-space from the end of the phrase.  _pLookAhead

								    //  points to the first character of the NEXT phrase.

								    //

								    WCHAR const * pEnd = _pLookAhead - 1;

								    while ( (pEnd > _text) && iswspace(*pEnd) )

								    {

								        pEnd--;

								    }


								    unsigned count = CiPtrToUint( pEnd - _text ) + 1;


								    WCHAR * newBuf = new WCHAR [ count + 1 ];

								    RtlCopyMemory( newBuf, _text, count * sizeof( WCHAR ) );

								    newBuf[count] = 0;


								    return newBuf;

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::AcqRegEx, public

								//

								//  Synopsis:   Copies all of the relevant characters of the string that

								//              _text is pointing to and returns the new string.  Matches

								//              the longest string possible - the only restriction is that

								//              the regex can not contain any of the characters in REGEX_STR

								//              outside of <> braces (which may be nested).

								//              Returns 0 if the regex is empty.

								//

								//  Notes:      Since the string is copied, the caller of this function is

								//              responsible for freeing the memory occupied by the string.

								//              Because some regex characters are duplicated in the query

								//              language, _pLookAhead is ignored (and actually reset) in

								//              this operation.  Like AcqPhrase(), this should be called only

								//              once before Accept().

								//

								//  History:    10-May-94   t-jeffc     Created

								//

								//----------------------------------------------------------------------------


								WCHAR * CQueryScanner::AcqRegEx()

								{

								    WCHAR const * pEnd = _text;

								    BOOL fDone = FALSE;

								    BOOL fQuoted = FALSE;


								    if ( *pEnd == L'"' )

								    {

								        fQuoted = TRUE;

								        pEnd++;

								    }


								    // scan the string - stop at \0 or if any REGEX_STR characters are

								    // found outside of braces

								    //

								    for( ;; )

								    {

								        switch( *pEnd )

								        {

								        case '\0':

								            if ( fQuoted )

								                THROW( CException( QPARSE_E_UNEXPECTED_EOS ) );


								            fDone = TRUE;

								            break;


								        case ' ':

								            if ( !fQuoted )

								                fDone = TRUE;

								            break;


								        case ')':

								            if ( !fQuoted )

								            {

								                if ( ( pEnd != _text ) &&

								                     ( '|' != (*(pEnd-1)) ) )

								                    fDone = TRUE;

								            }

								            break;


								        case '"':

								            if ( fQuoted )

								            {

								                pEnd++;

								                fDone = TRUE;

								            }

								            break;


								        default:

								            break;


								        } // switch( *pEnd )


								        if( fDone ) break;


								        pEnd++;

								    }


								    if( _text == pEnd )

								        return 0;


								    // set _pLookAhead

								    _pLookAhead = pEnd;


								    // copy the string

								    unsigned count = CiPtrToUint( _pLookAhead - _text );


								    if ( fQuoted )

								    {

								        Win4Assert( count >= 2 );

								        count -= 2;

								    }


								    WCHAR * newBuf = new WCHAR[ count + 1 ];


								    RtlCopyMemory( newBuf, _text + (fQuoted ? 1 : 0), count * sizeof( WCHAR ) );

								    newBuf[ count ] = 0;


								    return newBuf;


								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::AcqPhraseInQuotes, public

								//

								//  Synopsis:   Copies all characters until a matching " is found, or until

								//              the end of string.  Embedded quotes are escaped with a quote:

								//              "Bill ""the man"" Gates"

								//

								//  Notes:      Since the string is copied, the caller of this function is

								//              responsible for freeing the memory occupied by the string.

								//

								//  History:    18-Jan-95   SitaramR        Created

								//               3-Jul-96   dlee            added embedded quotes

								//

								//----------------------------------------------------------------------------


								WCHAR * CQueryScanner::AcqPhraseInQuotes()

								{

								    WCHAR const * pEnd = _text;


								    do

								    {

								        if ( 0 == *pEnd )

								            break;


								        if ( L'"' == *pEnd )

								        {

								            if ( L'"' == *(pEnd+1) )

								                pEnd++;

								            else

								                break;

								        }


								        pEnd++;

								    } while ( TRUE );


								    unsigned count = CiPtrToUint( pEnd - _text );


								    WCHAR * newBuf = new WCHAR [ count + 1 ];

								    WCHAR * pwcNewBuf = newBuf;

								    WCHAR const * pStart = _text;


								    // copy the string, but remove the extra quote characters


								    while ( pStart < pEnd )

								    {

								        *pwcNewBuf++ = *pStart++;

								        if ( L'"' == *pStart )

								            pStart++;

								    }


								    *pwcNewBuf = 0;


								    if ( *pEnd == L'"' )

								        _pLookAhead = pEnd + 1;

								    else

								        _pLookAhead = pEnd;


								    return newBuf;

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::GetNumber, public

								//

								//  Synopsis:   If _text is at the end of the TEXT_TOKEN, returns FALSE.

								//              If not, puts the ULONG from the scanner into number and

								//              returns TRUE.

								//

								//  Arguments:  [number] -- the ULONG which will be changed and passed back

								//                          out as the ULONG from the scanner.

								//              [fAtEnd] -- returns TRUE if at the end of the scanned string

								//

								//  Notes:      May be called several times in a loop before Accept() is

								//              called.

								//

								//  History:    11-May-92   AmyA        Created

								//

								//----------------------------------------------------------------------------

								BOOL CQueryScanner::GetNumber( ULONG & number, BOOL & fAtEnd )

								{

								    if ( IsEndOfTextToken() || !iswdigit(*_text) || (*_text == L'-') )

								        return FALSE;


								    // is this a hex number?


								    ULONG base = 10;


								    if (_text[0] == L'0' && (_text[1] == L'x' || _text[1] == L'X'))

								    {

								        _text += 2;

								        base = 16;

								    }


								    const WCHAR * pwcStart = _text;


								    number = wcstoul( _text, (WCHAR **)(&_text), base );


								    // looks like a real number?


								    if ( ( pwcStart == _text ) ||

								         ( L'.' == *_text ) )

								        return FALSE;


								    while ( iswspace(*_text) )

								        _text++;


								    fAtEnd = ( 0 == *_text );


								    return TRUE;

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::GetNumber, public

								//

								//  Synopsis:   If _text is at the end of the TEXT_TOKEN, returns FALSE.

								//              If not, puts the LONG from the scanner into number and

								//              returns TRUE.

								//

								//  Arguments:  [number] -- the LONG which will be changed and passed back

								//                          out as the LONG from the scanner.

								//              [fAtEnd] -- returns TRUE if at the end of the scanned string

								//

								//  Notes:      May be called several times in a loop before Accept() is

								//              called.

								//

								//  History:    96-Jan-15   DwightKr    Created

								//

								//----------------------------------------------------------------------------

								BOOL CQueryScanner::GetNumber( LONG & number, BOOL & fAtEnd )

								{

								    WCHAR *text = (WCHAR *) _text;


								    BOOL IsNegative = FALSE;


								    ULONG ulMax = (ULONG) LONG_MAX;


								    if ( L'-' == _text[0] )

								    {

								        IsNegative = TRUE;


								        ulMax++; // can represent 1 more negative than positive.


								        _text++;

								    }


								    ULONG ulNumber;

								    if ( !GetNumber( ulNumber, fAtEnd ) )

								    {

								        _text = text;

								        return FALSE;

								    }


								    //  Signed number overflow/underflow


								    if ( ulNumber > ulMax )

								    {

								        _text = text;

								        return FALSE;

								    }


								    if ( IsNegative )

								    {

								        if ( ulMax == ulNumber )

								            number = LONG_MIN;

								        else

								            number = - (LONG) ulNumber;

								    }

								    else

								    {

								        number = (LONG) ulNumber;

								    }


								    return TRUE;

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::GetNumber, public

								//

								//  Synopsis:   If _text is at the end of the TEXT_TOKEN, returns FALSE.

								//              If not, puts the ULONG from the scanner into number and

								//              returns TRUE.

								//

								//  Arguments:  [number] -- the ULONG which will be changed and passed back

								//                          out as the ULONG from the scanner.

								//              [fAtEnd] -- returns TRUE if at the end of the scanned string

								//

								//  Notes:      May be called several times in a loop before Accept() is

								//              called.

								//

								//  History:    27-Feb-96   dlee        Created

								//

								//----------------------------------------------------------------------------

								BOOL CQueryScanner::GetNumber( unsigned _int64 & number, BOOL & fAtEnd )

								{

								    if ( IsEndOfTextToken() || !iswdigit(*_text) || (*_text == L'-') )

								        return FALSE;


								    // is this a hex number?


								    ULONG base = 10;


								    if (_text[0] == L'0' && (_text[1] == L'x' || _text[1] == L'X'))

								    {

								        _text += 2;

								        base = 16;

								    }


								    const WCHAR * pwcStart = _text;


								    number = _wcstoui64( _text, (WCHAR **)(&_text), base );


								    // looks like a real number?


								    if ( ( pwcStart == _text ) ||

								         ( L'.' == *_text ) )

								        return FALSE;


								    while ( iswspace(*_text) )

								        _text++;


								    fAtEnd = ( 0 == *_text );


								    return TRUE;

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::GetNumber, public

								//

								//  Synopsis:   If _text is at the end of the TEXT_TOKEN, returns FALSE.

								//              If not, puts the LONG from the scanner into number and

								//              returns TRUE.

								//

								//  Arguments:  [number] -- the LONG which will be changed and passed back

								//                          out as the LONG from the scanner.

								//              [fAtEnd] -- returns TRUE if at the end of the scanned string

								//

								//  Notes:      May be called several times in a loop before Accept() is

								//              called.

								//

								//  History:    27-Feb-96   dlee        Created

								//

								//----------------------------------------------------------------------------

								BOOL CQueryScanner::GetNumber( _int64 & number, BOOL & fAtEnd )

								{

								    WCHAR *text = (WCHAR *) _text;


								    BOOL IsNegative = FALSE;


								    unsigned _int64 ullMax = (unsigned _int64) _I64_MAX;


								    if ( L'-' == _text[0] )

								    {

								        IsNegative = TRUE;


								        ullMax++; // can represent 1 more negative than positive.


								        _text++;

								    }


								    unsigned _int64 ullNumber;

								    if ( !GetNumber( ullNumber, fAtEnd ) )

								    {

								        _text = text;

								        return FALSE;

								    }


								    //  Signed number overflow/underflow


								    if ( ullNumber > ullMax )

								    {

								        _text = text;

								        return FALSE;

								    }


								    if ( IsNegative )

								    {

								        if ( ullMax == ullNumber )

								            number = _I64_MIN;

								        else

								            number = -((_int64) ullNumber);

								    }

								    else

								    {

								        number = (_int64) ullNumber;

								    }


								    return TRUE;

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::GetNumber, public

								//

								//  Synopsis:   If _text is at the end of the TEXT_TOKEN, returns FALSE.

								//              If not, puts the LONG from the scanner into number and

								//              returns TRUE.

								//

								//  Arguments:  [number] -- the double which will be changed and passed back

								//                          out as the double from the scanner.

								//

								//  Notes:      May be called several times in a loop before Accept() is

								//              called.

								//

								//  History:    96-Jan-15   DwightKr    Created

								//

								//----------------------------------------------------------------------------

								BOOL CQueryScanner::GetNumber( double & number )

								{

								    if ( IsEndOfTextToken() || !iswdigit(*_text) )

								        return FALSE;


								    if ( swscanf( _text, L"%lf", &number ) != 1 )

								    {

								        return FALSE;

								    }


								    while ( iswspace(*_text) != 0 )

								        _text++;


								    return TRUE;

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::GetCommandChar, public

								//

								//  Synopsis:   Returns the command character pointed to by _text and advances

								//              _text.  If the command can't be uniquely determined by the

								//              first character, each subsequent call will return the next

								//              character in the word.  After the command has been determined,

								//              AcceptCommand() should be called and then operand parsing may begin.

								//

								//  History:    14-May-92   AmyA        Created

								//              16-May-94   t-jeffc     Returns one character at a time to

								//                                      support more commands

								//

								//----------------------------------------------------------------------------


								WCHAR CQueryScanner::GetCommandChar()

								{

								    if( IsEndOfTextToken() )

								        return 0;


								    WCHAR chCommand = _text[0];


								    _text++;


								    return towlower( chCommand );

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::AcceptCommand, public

								//

								//  Synopsis:   Advances _text past any characters in the command.

								//              Used when enough command characters have been

								//              read to uniquely determine the command and begin parsing

								//              the operands.

								//

								//  History:    16-May-94   t-jeffc         Created

								//

								//----------------------------------------------------------------------------


								void  CQueryScanner::AcceptCommand()

								{

								    int cChars = wcscspn( _text, CMND_STR ); // how many characters follow

								                                             // _text that are not in CMND_STR


								    _text += cChars;


								    _pLookAhead = _text;


								    Accept();

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::ResetBuffer, public

								//

								//  Synopsis:   Puts a new string into _pBuf and resets _pLookAhead

								//              accordingly.

								//

								//  Arguments:  [buffer] -- the new string for _pBuf

								//

								//  History:    05-May-92   AmyA        Created

								//

								//----------------------------------------------------------------------------


								void CQueryScanner::ResetBuffer( WCHAR const * buffer )

								{

								    _pBuf = buffer;

								    _pLookAhead = _pBuf;

								    Accept();

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::EatWhiteSpace, private

								//

								//  Synopsis:   Advances _pLookAhead past any white space in the string.

								//

								//  History:    29-Apr-92   AmyA        Created

								//

								//----------------------------------------------------------------------------


								void CQueryScanner::EatWhiteSpace()

								{

								    while ( iswspace(*_pLookAhead) != 0 )

								        _pLookAhead++;

								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::IsEndOfTextToken, private

								//

								//  Synopsis:   Returns TRUE if the current token is not a TEXT_TOKEN or

								//              if the string starting at _text to _pLookAhead contains

								//              nothing but whitespace.

								//

								//  History:    27-May-94   t-jeffc     Created

								//

								//----------------------------------------------------------------------------


								BOOL CQueryScanner::IsEndOfTextToken()

								{

								    if( _token == TEXT_TOKEN && _text < _pLookAhead )

								        return FALSE;

								    else

								        return TRUE;


								}


								//+---------------------------------------------------------------------------

								//

								//  Member:     CQueryScanner::AcqLine, public

								//

								//  Synopsis:   Copies all of the remaining characters on the line;

								//              return 0 if _text is at end of whole TEXT_TOKEN.

								//

								//  Arguments:  [fParseQuotes] -- if TRUE, initial and final quotes are removed

								//

								//  Notes:      Since the string is copied, the caller of this function is

								//              responsible for freeing the memory occupied by the string.

								//              This method can be called several times before calling

								//              Accept(), so many paths can be acquired if they exist in the

								//              scanner.

								//

								//  History:    96-Jan-03   DwightKr    Created

								//              96-Feb-26   DwightKr    Allow lines to be quoted

								//

								//----------------------------------------------------------------------------


								WCHAR * CQueryScanner::AcqLine( BOOL fParseQuotes )

								{

								    if ( *_text == L'\0' )

								        return 0;


								    unsigned cwcBuffer = wcslen(_text);


								    //

								    // If there are \r, \n, or other white space at the end of the string,

								    // strip it off

								    //


								    while ( cwcBuffer > 0 && _text[cwcBuffer-1] <= L' ' )

								        cwcBuffer--;


								    if ( fParseQuotes )

								    {

								        //

								        //  If there is a pair of quotes delimiting this line, strip them off

								        //


								        if ( (L'"' == _text[0]) && (cwcBuffer > 1) )

								        {

								            if ( L'"' == _text[cwcBuffer-1] )

								                cwcBuffer--;


								            _text++;

								            cwcBuffer--;

								        }

								    }


								    WCHAR *pText = new WCHAR [ cwcBuffer + 1 ];

								    RtlCopyMemory( pText, _text, cwcBuffer * sizeof(WCHAR) );

								    pText[cwcBuffer] = 0;


								    _pLookAhead = _text + cwcBuffer - 1;


								    return pText;

								} //AcqLine