//+--------------------------------------------------------------------------- // // Copyright (C) 1994-1998, Microsoft Corporation. // // File: SCANNER.CXX // // Contents: Implementation of CQueryScanner // // History: 22-May-92 AmyA Created. // 23-Jun-92 MikeHew Added weight token recognition. // 17-May-94 t-jeffc Added error info and reg ex support. // //---------------------------------------------------------------------------- #include #pragma hdrstop //+--------------------------------------------------------------------------- // // Member: CQueryScanner::CQueryScanner, public // // Synopsis: Create a scanner from a string. // // Arguments: [buffer] -- the string to be scanned. // [fLookForTextualKeywords] -- TRUE if the scanner should // look for "and/or/not/near" in // text form. // [lcid] -- language for and/or/not/near detection // [fTreatPlusAsToken] -- TRUE if the scanner should treat the // '+' character as a token (used // in GroupBy parsing) // // Notes: This string is not copied, so the scanner does not own it. // If the string is changed outside of the scanner, it will // affect the information that is returned. // // History: 30-Apr-92 AmyA Created // //---------------------------------------------------------------------------- CQueryScanner::CQueryScanner( WCHAR const * buffer, BOOL fLookForTextualKeywords, LCID lcid, BOOL fTreatPlusAsToken ) : _text( buffer ), _pBuf( buffer ), _pLookAhead( buffer ), _fLookForTextualKeywords( fLookForTextualKeywords ), _fTreatPlusAsToken( fTreatPlusAsToken ), _lcid( lcid ) { Accept(); } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::AcceptWord, public // // Synopsis: Consumes a single word out of a phrase // // Requires: Should be called after AcqWord // // History: 15-Sep-92 BartoszM Created // //---------------------------------------------------------------------------- void CQueryScanner::AcceptWord() { _pLookAhead = _text; Accept(); } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::AcceptColumn, public // // Synopsis: Consumes a column name out of a phrase // // Requires: Should be called after AcqColumn // // History: 15-Sep-92 BartoszM Created // //---------------------------------------------------------------------------- void CQueryScanner::AcceptColumn() { AcceptWord(); } struct SStringToken { WCHAR * pwcToken; unsigned cwc; Token token; }; static SStringToken s_EnglishStringTokens[] = { { L"AND", (sizeof L"AND" / sizeof WCHAR) - 1, AND_TOKEN }, { L"OR", (sizeof L"OR" / sizeof WCHAR) - 1, OR_TOKEN }, { L"NOT", (sizeof L"NOT" / sizeof WCHAR) - 1, NOT_TOKEN }, { L"NEAR", (sizeof L"NEAR" / sizeof WCHAR) - 1, PROX_TOKEN }, }; static SStringToken s_GermanStringTokens[] = { { L"UND", (sizeof L"UND" / sizeof WCHAR) - 1, AND_TOKEN }, { L"ODER", (sizeof L"ODER" / sizeof WCHAR) - 1, OR_TOKEN }, { L"NICHT", (sizeof L"NICHT" / sizeof WCHAR) - 1, NOT_TOKEN }, { L"NAH", (sizeof L"NAH" / sizeof WCHAR) - 1, PROX_TOKEN }, }; static SStringToken s_FrenchStringTokens[] = { { L"ET", (sizeof L"ET" / sizeof WCHAR) - 1, AND_TOKEN }, { L"OU", (sizeof L"OU" / sizeof WCHAR) - 1, OR_TOKEN }, { L"SANS", (sizeof L"SANS" / sizeof WCHAR) - 1, NOT_TOKEN }, { L"PRES", (sizeof L"PRES" / sizeof WCHAR) - 1, PROX_TOKEN }, }; static SStringToken s_SpanishStringTokens[] = { { L"Y", (sizeof L"Y" / sizeof WCHAR) - 1, AND_TOKEN }, { L"O", (sizeof L"O" / sizeof WCHAR) - 1, OR_TOKEN }, { L"NO", (sizeof L"NO" / sizeof WCHAR) - 1, NOT_TOKEN }, { L"CERCA", (sizeof L"CERCA" / sizeof WCHAR) - 1, PROX_TOKEN }, }; static SStringToken s_DutchStringTokens[] = { { L"EN", (sizeof L"EN" / sizeof WCHAR) - 1, AND_TOKEN }, { L"OF", (sizeof L"OF" / sizeof WCHAR) - 1, OR_TOKEN }, { L"NIET", (sizeof L"NIET" / sizeof WCHAR) - 1, NOT_TOKEN }, { L"NABIJ", (sizeof L"NABIJ" / sizeof WCHAR) - 1, PROX_TOKEN }, }; static WCHAR aSwedishNear[] = { L'N', 0xc4, L'R', L'A', 0 }; static SStringToken s_SwedishStringTokens[] = { { L"OCH", (sizeof L"OCH" / sizeof WCHAR) - 1, AND_TOKEN }, { L"ELLER", (sizeof L"ELLER" / sizeof WCHAR) - 1, OR_TOKEN }, { L"INTE", (sizeof L"INTE" / sizeof WCHAR) - 1, NOT_TOKEN }, { aSwedishNear, 4, PROX_TOKEN }, }; static SStringToken s_ItalianStringTokens[] = { { L"E", (sizeof L"E" / sizeof WCHAR) - 1, AND_TOKEN }, { L"O", (sizeof L"O" / sizeof WCHAR) - 1, OR_TOKEN }, { L"NO", (sizeof L"NO" / sizeof WCHAR) - 1, NOT_TOKEN }, { L"VICINO", (sizeof L"VICINO" / sizeof WCHAR) - 1, PROX_TOKEN }, }; const unsigned cStringTokens = sizeof(s_EnglishStringTokens) / sizeof(s_EnglishStringTokens[0]); #define WORD_STR L"{}!&|~*@#()[],\t=<>\n\"^ " //+--------------------------------------------------------------------------- // // Function: InternalFindStringToken // // Synopsis: Looks for a textual token in plain text. // // Arguments: [pwcIn] -- string to search // [token] -- returns the token found // [cwc] -- returns length of token found // [pTokens] -- token array to use // // Returns: Pointer to token or 0 if none was found // // History: 08-Feb-96 dlee created // //---------------------------------------------------------------------------- WCHAR * InternalFindStringToken( WCHAR * pwcIn, Token & token, unsigned & cwc, SStringToken * pTokens ) { // for each of and/or/not/near WCHAR *pwcOut = 0; for ( unsigned i = 0; i < cStringTokens; i++ ) { WCHAR *pwcStr = wcsstr( pwcIn, pTokens[i].pwcToken ); while ( pwcStr ) { // found a match -- does it have white space on either side? WCHAR wcBeyond = * (pwcStr + pTokens[i].cwc); if ( ( ( 0 == wcBeyond ) || ( wcschr( WORD_STR, wcBeyond ) ) ) && ( ( pwcStr == pwcIn ) || ( iswspace( * ( pwcStr - 1 ) ) ) ) ) { // if the first match found or the match closest to the // beginning of the string, use it. if ( ( 0 == pwcOut ) || ( pwcStr < pwcOut ) ) { pwcOut = pwcStr; token = pTokens[i].token; cwc = pTokens[i].cwc; } break; } pwcStr = wcsstr( pwcStr + 1, pTokens[i].pwcToken ); } } return pwcOut; } //InternalFindStringToken SStringToken * GetStringTokenArray( LCID lcid ) { SStringToken *pTokens; switch ( PRIMARYLANGID( LANGIDFROMLCID( lcid ) ) ) { case LANG_GERMAN : pTokens = s_GermanStringTokens; break; case LANG_FRENCH : pTokens = s_FrenchStringTokens; break; case LANG_SPANISH : pTokens = s_SpanishStringTokens; break; case LANG_DUTCH : pTokens = s_DutchStringTokens; break; case LANG_SWEDISH : pTokens = s_SwedishStringTokens; break; case LANG_ITALIAN : pTokens = s_ItalianStringTokens; break; case LANG_NEUTRAL : case LANG_ENGLISH : default : pTokens = s_EnglishStringTokens; break; } Win4Assert( 0 != pTokens ); return pTokens; } //+--------------------------------------------------------------------------- // // Function: FindStringToken // // Synopsis: Looks for a textual token in plain text. Always tries // English, tries a different language depending on _lcid. // // Arguments: [pwcIn] -- string to search // [token] -- returns the token found // [cwc] -- returns length of token found // // Returns: Pointer to token or 0 if none was found // // History: 08-Feb-96 dlee created // //---------------------------------------------------------------------------- WCHAR * CQueryScanner::FindStringToken( WCHAR * pwcIn, Token & token, unsigned & cwc ) { SStringToken * pTokens = GetStringTokenArray( _lcid ); WCHAR * pwcToken = InternalFindStringToken( pwcIn, token, cwc, pTokens ); // if the search above wasn't in English, try English too. if ( pTokens != s_EnglishStringTokens ) { unsigned cwcEnglish; Token tokenEnglish; WCHAR * pwcEnglish = InternalFindStringToken( pwcIn, tokenEnglish, cwcEnglish, s_EnglishStringTokens ); // If there is no language-specific match or the English match // occurs before the language-specific match, use the English // match. if ( ( 0 != pwcEnglish ) && ( ( 0 == pwcToken ) || ( pwcEnglish < pwcToken ) ) ) { pwcToken = pwcEnglish; token = tokenEnglish; cwc = cwcEnglish; } } return pwcToken; } //FindStringToken //+--------------------------------------------------------------------------- // // Member: CQueryScanner::Accept, public // // Synopsis: Determines what the next token is. Will advance _pLookAhead // over the next token and white space. // // Notes: There are five different types of TEXT_TOKENS, Phrase, Path, // Number, Column and Command. Since the length of the token // depends on which token it is, _pLookAhead is forwarded to the // end of the longest, and _text is used to parse the token in the // various Acq and Get methods. // // History: 30-Apr-92 AmyA Created // 19-May-92 AmyA Added Guid hack // 23-Jun-92 MikeHew Added weight token recognition. // 26-May-94 t-jeffc Added more tokens; rearranged to // support parsing errors // //---------------------------------------------------------------------------- void CQueryScanner::Accept() { EatWhiteSpace(); _text = _pLookAhead; switch ( *_pLookAhead ) { case '&': _pLookAhead++; _token = AND_TOKEN; break; case '*': _pLookAhead++; if ( *_pLookAhead == '*' ) { _token = FUZ2_TOKEN; _pLookAhead++; } else _token = FUZZY_TOKEN; break; case '=': _pLookAhead++; _token = EQUAL_TOKEN; break; case '<': _pLookAhead++; if ( *_pLookAhead == '=' ) { _token = LESS_EQUAL_TOKEN; _pLookAhead++; } else _token = LESS_TOKEN; break; case '>': _pLookAhead++; if ( *_pLookAhead == '=' ) { _token = GREATER_EQUAL_TOKEN; _pLookAhead++; } else _token = GREATER_TOKEN; break; case '!': _pLookAhead++; if ( *_pLookAhead == '=' ) { _token = NOT_EQUAL_TOKEN; _pLookAhead++; } else { _token = NOT_TOKEN; } break; case '|': _pLookAhead++; _token = OR_TOKEN; break; case '~': _pLookAhead++; _token = PROX_TOKEN; break; case '@': _pLookAhead++; _token = PROP_TOKEN; break; case '#': _pLookAhead++; _token = PROP_REGEX_TOKEN; break; case '(': _pLookAhead++; _token = OPEN_TOKEN; break; case ')': _pLookAhead++; _token = CLOSE_TOKEN; break; case '[': _pLookAhead++; _token = W_OPEN_TOKEN; break; case ']': _pLookAhead++; _token = W_CLOSE_TOKEN; break; case ',': _pLookAhead++; _token = COMMA_TOKEN; break; case '\0': case 0x1A: // CTRL-Z _token = EOS_TOKEN; break; case '"': _pLookAhead++; _token = QUOTES_TOKEN; break; case '$': _pLookAhead++; _token = PROP_NATLANG_TOKEN; break; case '{': _pLookAhead++; _token = C_OPEN_TOKEN; break; case '}': _pLookAhead++; _token = C_CLOSE_TOKEN; break; case '^': { WCHAR wc = *(_pLookAhead + 1); BOOL fOk = TRUE; if (L'a' == wc) // all bits _token = ALLOF_TOKEN; else if (L's' == wc) // some bits _token = SOMEOF_TOKEN; else fOk = FALSE; if (fOk) { _pLookAhead += 2; break; } } // FALL THROUGH case '+': if (*_pLookAhead == L'+' && _fTreatPlusAsToken) { _pLookAhead++; _token = PLUS_TOKEN; break; } // FALL THROUGH default: { // forwards pwcEnd over anything that could be in a phrase, // which is the most inclusive of the TEXT_TOKENs. // (except, for regex's and phrases in quotes - but they're // handled separately) WCHAR const *pwcEnd = _text + wcscspn( _text, PHRASE_STR ); if ( _fLookForTextualKeywords ) { unsigned cwc = (unsigned) ( pwcEnd - _text ); cwc = __min( cwc, MAX_PATH * 2 ); // if a textual keyword is beyond 500 chars in the string, // blow it off -- the workaround is to use the '&|~' version. WCHAR awcBuf[ 1 + MAX_PATH * 2 ]; RtlCopyMemory( awcBuf, _text, cwc * sizeof WCHAR ); awcBuf[ cwc ] = 0; ULONG cwcOut = LCMapString( _lcid, LCMAP_UPPERCASE, awcBuf, cwc, awcBuf, cwc ); if ( cwcOut != cwc ) THROW( CException() ); Token token; unsigned cwcToken; WCHAR *pwcTok = FindStringToken( awcBuf, token, cwcToken ); if ( 0 != pwcTok ) { // a textual token exists in the string if ( pwcTok == awcBuf ) { // textual token at the start of the string _token = token; _pLookAhead = _text + cwcToken; } else { // textual token in the middle of the string, stop the // current token at that point and get it next time // Accept() is called. _pLookAhead = _text + ( pwcTok - awcBuf ); _token = TEXT_TOKEN; } } else { _pLookAhead = pwcEnd; _token = TEXT_TOKEN; } } else { _pLookAhead = pwcEnd; _token = TEXT_TOKEN; } break; } } } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::AllocReturnString, private inline // // Synopsis: Copies all of the relevant characters of the string that // _text is pointing to and returns the new string. // // History: 17 Apr 97 AlanW Created // //---------------------------------------------------------------------------- inline WCHAR * CQueryScanner::AllocReturnString( int cch ) { WCHAR * newBuf = new WCHAR [ cch + 1 ]; RtlCopyMemory ( newBuf, _text, cch * sizeof(WCHAR)); newBuf[cch] = L'\0'; _text += cch; while ( iswspace(*_text) ) _text++; return newBuf; } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::AcqPath, public // // Synopsis: Copies all of the relevant characters of the string that // _text is pointing to and returns the new string. Will // return 0 if _text is at end of whole TEXT_TOKEN. // // Notes: Since the string is copied, the caller of this function is // responsible for freeing the memory occupied by the string. // This method can be called several times before calling // Accept(), so many paths can be acquired if they exist in the // scanner. // // History: 30-Apr-92 AmyA Created // //---------------------------------------------------------------------------- WCHAR * CQueryScanner::AcqPath() { if ( IsEndOfTextToken() ) return 0; // how many characters follow _text that are not in CMND_STR? int count = wcscspn( _text, CMND_STR ); return AllocReturnString( count ); } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::AcqWord, public // // Synopsis: Copies the word that _text is pointing to and returns the // new string. Positions _text after the word and whitespace. // Returns 0 if at the end of a TEXT_TOKEN. // // History: 29-Jun-92 MikeHew Created. // //---------------------------------------------------------------------------- WCHAR * CQueryScanner::AcqWord() { if ( IsEndOfTextToken() ) return 0; WCHAR const * pEnd = _text; while ( !iswspace(*pEnd) && pEnd < _pLookAhead ) pEnd++; unsigned count = CiPtrToUint( pEnd - _text ); return AllocReturnString( count ); } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::AcqColumn, public // // Synopsis: Copies a column name and returns the new string. A column // name is either a single word, or a quoted string. // Positions _text after the word and whitespace. // // Returns: WCHAR* pointer to column name. 0 if no column name found. // // History: 17 Apr 97 AlanW Created. // //---------------------------------------------------------------------------- WCHAR * CQueryScanner::AcqColumn() { if ( QUOTES_TOKEN == _token) { Accept(); WCHAR * pwszOut = AcqPhraseInQuotes(); _text = _pLookAhead; return pwszOut; } if ( IsEndOfTextToken() ) return 0; int count = wcscspn( _text, COLUMN_STR ); return AllocReturnString( count ); } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::AcqPhrase, public // // Synopsis: Copies all of the relevant characters of the string that // _text is pointing to and returns the new string. // Returns 0 if at the end of a text token. // // Notes: Since the string is copied, the caller of this function is // responsible for freeing the memory occupied by the string. // The difference between this function and AcqPath is that this // should only be called once before calling Accept(). // // History: 30-Apr-92 AmyA Created // 09-May-96 DwightKr Strip trailing white space // //---------------------------------------------------------------------------- WCHAR * CQueryScanner::AcqPhrase() { if( IsEndOfTextToken() ) return 0; // // Strip trailing white-space from the end of the phrase. _pLookAhead // points to the first character of the NEXT phrase. // WCHAR const * pEnd = _pLookAhead - 1; while ( (pEnd > _text) && iswspace(*pEnd) ) { pEnd--; } unsigned count = CiPtrToUint( pEnd - _text ) + 1; WCHAR * newBuf = new WCHAR [ count + 1 ]; RtlCopyMemory( newBuf, _text, count * sizeof( WCHAR ) ); newBuf[count] = 0; return newBuf; } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::AcqRegEx, public // // Synopsis: Copies all of the relevant characters of the string that // _text is pointing to and returns the new string. Matches // the longest string possible - the only restriction is that // the regex can not contain any of the characters in REGEX_STR // outside of <> braces (which may be nested). // Returns 0 if the regex is empty. // // Notes: Since the string is copied, the caller of this function is // responsible for freeing the memory occupied by the string. // Because some regex characters are duplicated in the query // language, _pLookAhead is ignored (and actually reset) in // this operation. Like AcqPhrase(), this should be called only // once before Accept(). // // History: 10-May-94 t-jeffc Created // //---------------------------------------------------------------------------- WCHAR * CQueryScanner::AcqRegEx() { WCHAR const * pEnd = _text; BOOL fDone = FALSE; BOOL fQuoted = FALSE; if ( *pEnd == L'"' ) { fQuoted = TRUE; pEnd++; } // scan the string - stop at \0 or if any REGEX_STR characters are // found outside of braces // for( ;; ) { switch( *pEnd ) { case '\0': if ( fQuoted ) THROW( CException( QPARSE_E_UNEXPECTED_EOS ) ); fDone = TRUE; break; case ' ': if ( !fQuoted ) fDone = TRUE; break; case ')': if ( !fQuoted ) { if ( ( pEnd != _text ) && ( '|' != (*(pEnd-1)) ) ) fDone = TRUE; } break; case '"': if ( fQuoted ) { pEnd++; fDone = TRUE; } break; default: break; } // switch( *pEnd ) if( fDone ) break; pEnd++; } if( _text == pEnd ) return 0; // set _pLookAhead _pLookAhead = pEnd; // copy the string unsigned count = CiPtrToUint( _pLookAhead - _text ); if ( fQuoted ) { Win4Assert( count >= 2 ); count -= 2; } WCHAR * newBuf = new WCHAR[ count + 1 ]; RtlCopyMemory( newBuf, _text + (fQuoted ? 1 : 0), count * sizeof( WCHAR ) ); newBuf[ count ] = 0; return newBuf; } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::AcqPhraseInQuotes, public // // Synopsis: Copies all characters until a matching " is found, or until // the end of string. Embedded quotes are escaped with a quote: // "Bill ""the man"" Gates" // // Notes: Since the string is copied, the caller of this function is // responsible for freeing the memory occupied by the string. // // History: 18-Jan-95 SitaramR Created // 3-Jul-96 dlee added embedded quotes // //---------------------------------------------------------------------------- WCHAR * CQueryScanner::AcqPhraseInQuotes() { WCHAR const * pEnd = _text; do { if ( 0 == *pEnd ) break; if ( L'"' == *pEnd ) { if ( L'"' == *(pEnd+1) ) pEnd++; else break; } pEnd++; } while ( TRUE ); unsigned count = CiPtrToUint( pEnd - _text ); WCHAR * newBuf = new WCHAR [ count + 1 ]; WCHAR * pwcNewBuf = newBuf; WCHAR const * pStart = _text; // copy the string, but remove the extra quote characters while ( pStart < pEnd ) { *pwcNewBuf++ = *pStart++; if ( L'"' == *pStart ) pStart++; } *pwcNewBuf = 0; if ( *pEnd == L'"' ) _pLookAhead = pEnd + 1; else _pLookAhead = pEnd; return newBuf; } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::GetNumber, public // // Synopsis: If _text is at the end of the TEXT_TOKEN, returns FALSE. // If not, puts the ULONG from the scanner into number and // returns TRUE. // // Arguments: [number] -- the ULONG which will be changed and passed back // out as the ULONG from the scanner. // [fAtEnd] -- returns TRUE if at the end of the scanned string // // Notes: May be called several times in a loop before Accept() is // called. // // History: 11-May-92 AmyA Created // //---------------------------------------------------------------------------- BOOL CQueryScanner::GetNumber( ULONG & number, BOOL & fAtEnd ) { if ( IsEndOfTextToken() || !iswdigit(*_text) || (*_text == L'-') ) return FALSE; // is this a hex number? ULONG base = 10; if (_text[0] == L'0' && (_text[1] == L'x' || _text[1] == L'X')) { _text += 2; base = 16; } const WCHAR * pwcStart = _text; number = wcstoul( _text, (WCHAR **)(&_text), base ); // looks like a real number? if ( ( pwcStart == _text ) || ( L'.' == *_text ) ) return FALSE; while ( iswspace(*_text) ) _text++; fAtEnd = ( 0 == *_text ); return TRUE; } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::GetNumber, public // // Synopsis: If _text is at the end of the TEXT_TOKEN, returns FALSE. // If not, puts the LONG from the scanner into number and // returns TRUE. // // Arguments: [number] -- the LONG which will be changed and passed back // out as the LONG from the scanner. // [fAtEnd] -- returns TRUE if at the end of the scanned string // // Notes: May be called several times in a loop before Accept() is // called. // // History: 96-Jan-15 DwightKr Created // //---------------------------------------------------------------------------- BOOL CQueryScanner::GetNumber( LONG & number, BOOL & fAtEnd ) { WCHAR *text = (WCHAR *) _text; BOOL IsNegative = FALSE; ULONG ulMax = (ULONG) LONG_MAX; if ( L'-' == _text[0] ) { IsNegative = TRUE; ulMax++; // can represent 1 more negative than positive. _text++; } ULONG ulNumber; if ( !GetNumber( ulNumber, fAtEnd ) ) { _text = text; return FALSE; } // Signed number overflow/underflow if ( ulNumber > ulMax ) { _text = text; return FALSE; } if ( IsNegative ) { if ( ulMax == ulNumber ) number = LONG_MIN; else number = - (LONG) ulNumber; } else { number = (LONG) ulNumber; } return TRUE; } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::GetNumber, public // // Synopsis: If _text is at the end of the TEXT_TOKEN, returns FALSE. // If not, puts the ULONG from the scanner into number and // returns TRUE. // // Arguments: [number] -- the ULONG which will be changed and passed back // out as the ULONG from the scanner. // [fAtEnd] -- returns TRUE if at the end of the scanned string // // Notes: May be called several times in a loop before Accept() is // called. // // History: 27-Feb-96 dlee Created // //---------------------------------------------------------------------------- BOOL CQueryScanner::GetNumber( unsigned _int64 & number, BOOL & fAtEnd ) { if ( IsEndOfTextToken() || !iswdigit(*_text) || (*_text == L'-') ) return FALSE; // is this a hex number? ULONG base = 10; if (_text[0] == L'0' && (_text[1] == L'x' || _text[1] == L'X')) { _text += 2; base = 16; } const WCHAR * pwcStart = _text; number = _wcstoui64( _text, (WCHAR **)(&_text), base ); // looks like a real number? if ( ( pwcStart == _text ) || ( L'.' == *_text ) ) return FALSE; while ( iswspace(*_text) ) _text++; fAtEnd = ( 0 == *_text ); return TRUE; } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::GetNumber, public // // Synopsis: If _text is at the end of the TEXT_TOKEN, returns FALSE. // If not, puts the LONG from the scanner into number and // returns TRUE. // // Arguments: [number] -- the LONG which will be changed and passed back // out as the LONG from the scanner. // [fAtEnd] -- returns TRUE if at the end of the scanned string // // Notes: May be called several times in a loop before Accept() is // called. // // History: 27-Feb-96 dlee Created // //---------------------------------------------------------------------------- BOOL CQueryScanner::GetNumber( _int64 & number, BOOL & fAtEnd ) { WCHAR *text = (WCHAR *) _text; BOOL IsNegative = FALSE; unsigned _int64 ullMax = (unsigned _int64) _I64_MAX; if ( L'-' == _text[0] ) { IsNegative = TRUE; ullMax++; // can represent 1 more negative than positive. _text++; } unsigned _int64 ullNumber; if ( !GetNumber( ullNumber, fAtEnd ) ) { _text = text; return FALSE; } // Signed number overflow/underflow if ( ullNumber > ullMax ) { _text = text; return FALSE; } if ( IsNegative ) { if ( ullMax == ullNumber ) number = _I64_MIN; else number = -((_int64) ullNumber); } else { number = (_int64) ullNumber; } return TRUE; } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::GetNumber, public // // Synopsis: If _text is at the end of the TEXT_TOKEN, returns FALSE. // If not, puts the LONG from the scanner into number and // returns TRUE. // // Arguments: [number] -- the double which will be changed and passed back // out as the double from the scanner. // // Notes: May be called several times in a loop before Accept() is // called. // // History: 96-Jan-15 DwightKr Created // //---------------------------------------------------------------------------- BOOL CQueryScanner::GetNumber( double & number ) { if ( IsEndOfTextToken() || !iswdigit(*_text) ) return FALSE; if ( swscanf( _text, L"%lf", &number ) != 1 ) { return FALSE; } while ( iswspace(*_text) != 0 ) _text++; return TRUE; } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::GetCommandChar, public // // Synopsis: Returns the command character pointed to by _text and advances // _text. If the command can't be uniquely determined by the // first character, each subsequent call will return the next // character in the word. After the command has been determined, // AcceptCommand() should be called and then operand parsing may begin. // // History: 14-May-92 AmyA Created // 16-May-94 t-jeffc Returns one character at a time to // support more commands // //---------------------------------------------------------------------------- WCHAR CQueryScanner::GetCommandChar() { if( IsEndOfTextToken() ) return 0; WCHAR chCommand = _text[0]; _text++; return towlower( chCommand ); } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::AcceptCommand, public // // Synopsis: Advances _text past any characters in the command. // Used when enough command characters have been // read to uniquely determine the command and begin parsing // the operands. // // History: 16-May-94 t-jeffc Created // //---------------------------------------------------------------------------- void CQueryScanner::AcceptCommand() { int cChars = wcscspn( _text, CMND_STR ); // how many characters follow // _text that are not in CMND_STR _text += cChars; _pLookAhead = _text; Accept(); } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::ResetBuffer, public // // Synopsis: Puts a new string into _pBuf and resets _pLookAhead // accordingly. // // Arguments: [buffer] -- the new string for _pBuf // // History: 05-May-92 AmyA Created // //---------------------------------------------------------------------------- void CQueryScanner::ResetBuffer( WCHAR const * buffer ) { _pBuf = buffer; _pLookAhead = _pBuf; Accept(); } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::EatWhiteSpace, private // // Synopsis: Advances _pLookAhead past any white space in the string. // // History: 29-Apr-92 AmyA Created // //---------------------------------------------------------------------------- void CQueryScanner::EatWhiteSpace() { while ( iswspace(*_pLookAhead) != 0 ) _pLookAhead++; } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::IsEndOfTextToken, private // // Synopsis: Returns TRUE if the current token is not a TEXT_TOKEN or // if the string starting at _text to _pLookAhead contains // nothing but whitespace. // // History: 27-May-94 t-jeffc Created // //---------------------------------------------------------------------------- BOOL CQueryScanner::IsEndOfTextToken() { if( _token == TEXT_TOKEN && _text < _pLookAhead ) return FALSE; else return TRUE; } //+--------------------------------------------------------------------------- // // Member: CQueryScanner::AcqLine, public // // Synopsis: Copies all of the remaining characters on the line; // return 0 if _text is at end of whole TEXT_TOKEN. // // Arguments: [fParseQuotes] -- if TRUE, initial and final quotes are removed // // Notes: Since the string is copied, the caller of this function is // responsible for freeing the memory occupied by the string. // This method can be called several times before calling // Accept(), so many paths can be acquired if they exist in the // scanner. // // History: 96-Jan-03 DwightKr Created // 96-Feb-26 DwightKr Allow lines to be quoted // //---------------------------------------------------------------------------- WCHAR * CQueryScanner::AcqLine( BOOL fParseQuotes ) { if ( *_text == L'\0' ) return 0; unsigned cwcBuffer = wcslen(_text); // // If there are \r, \n, or other white space at the end of the string, // strip it off // while ( cwcBuffer > 0 && _text[cwcBuffer-1] <= L' ' ) cwcBuffer--; if ( fParseQuotes ) { // // If there is a pair of quotes delimiting this line, strip them off // if ( (L'"' == _text[0]) && (cwcBuffer > 1) ) { if ( L'"' == _text[cwcBuffer-1] ) cwcBuffer--; _text++; cwcBuffer--; } } WCHAR *pText = new WCHAR [ cwcBuffer + 1 ]; RtlCopyMemory( pText, _text, cwcBuffer * sizeof(WCHAR) ); pText[cwcBuffer] = 0; _pLookAhead = _text + cwcBuffer - 1; return pText; } //AcqLine