mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
562 lines
14 KiB
562 lines
14 KiB
//+---------------------------------------------------------------------------
|
|
//
|
|
// Copyright (C) 1996, Microsoft Corporation.
|
|
//
|
|
// File: tokstr.cxx
|
|
//
|
|
// Contents: Used to break down a string into its tokens
|
|
//
|
|
// History: 96/Feb/13 DwightKr Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
#include <pch.cxx>
|
|
#pragma hdrstop
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Method: CTokenizeString::CTokenizeString - public constructor
|
|
//
|
|
// History: 96/Jan/23 DwightKr Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
CTokenizeString::CTokenizeString( WCHAR const * wcsString ) :
|
|
_wcsString(wcsString),
|
|
_wcsCurrentToken(wcsString),
|
|
_wcsNextToken(wcsString)
|
|
{
|
|
Accept();
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Method: CTokenizeString::Accept - public
|
|
//
|
|
// History: 96/Jan/23 DwightKr Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
void CTokenizeString::Accept()
|
|
{
|
|
EatWhiteSpace();
|
|
|
|
_wcsCurrentToken = _wcsNextToken;
|
|
|
|
switch ( *_wcsCurrentToken )
|
|
{
|
|
case L'"':
|
|
_wcsNextToken++;
|
|
_token = QUOTES_TOKEN;
|
|
break;
|
|
|
|
case L'{':
|
|
_wcsNextToken++;
|
|
_token = C_OPEN_TOKEN;
|
|
break;
|
|
|
|
case L'}':
|
|
_wcsNextToken++;
|
|
_token = C_CLOSE_TOKEN;
|
|
break;
|
|
|
|
case L',':
|
|
_wcsNextToken++;
|
|
_token = COMMA_TOKEN;
|
|
break;
|
|
|
|
case 0:
|
|
_token = EOS_TOKEN;
|
|
break;
|
|
|
|
default:
|
|
_wcsNextToken = _wcsCurrentToken + wcscspn( _wcsCurrentToken, WORD_STR );
|
|
_token = TEXT_TOKEN;
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CTokenizeString:AcqWord, public
|
|
//
|
|
// Synopsis: Copies the word that _wcsCurrentToken is pointing to and
|
|
// returns the new string. Positions _wcsCurrentToken after
|
|
// the word and whitespace. Returns 0 if at the end of a
|
|
// TEXT_TOKEN.
|
|
//
|
|
// History: 96-Feb-13 DwightKr Created.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
WCHAR * CTokenizeString::AcqWord()
|
|
{
|
|
if ( IsEndOfTextToken() )
|
|
return 0;
|
|
|
|
WCHAR const * pEnd = _wcsNextToken;
|
|
|
|
int cwcToken = (int)(pEnd - _wcsCurrentToken + 1);
|
|
|
|
WCHAR * newBuf = new WCHAR [ cwcToken ];
|
|
RtlCopyMemory( newBuf, _wcsCurrentToken, cwcToken * sizeof(WCHAR));
|
|
newBuf[cwcToken-1] = 0;
|
|
|
|
_wcsCurrentToken = pEnd;
|
|
while ( iswspace(*_wcsCurrentToken) )
|
|
_wcsCurrentToken++;
|
|
|
|
return newBuf;
|
|
}
|
|
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CTokenizeString::GetNumber, public
|
|
//
|
|
// Synopsis: If _text is at the end of the TEXT_TOKEN, returns FALSE.
|
|
// If not, puts the unsigned _int64 from the scanner into number
|
|
// and returns TRUE.
|
|
//
|
|
// Arguments: [number] -- the unsigned _int64 which will be changed and
|
|
// passed back out as the ULONG from the scanner.
|
|
//
|
|
// Notes: May be called several times in a loop before Accept() is
|
|
// called.
|
|
//
|
|
// History: 96-Feb-13 AmyA Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
BOOL CTokenizeString::GetNumber( unsigned _int64 & number )
|
|
{
|
|
ULONG base = 10;
|
|
WCHAR const * wcsCurrentToken = _wcsCurrentToken;
|
|
|
|
if ( IsEndOfTextToken() ||
|
|
!iswdigit(*_wcsCurrentToken) ||
|
|
(*_wcsCurrentToken == L'-') )
|
|
{
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
if ( _wcsCurrentToken[0] == L'0' &&
|
|
(_wcsCurrentToken[1] == L'x' || _wcsCurrentToken[1] == L'X'))
|
|
{
|
|
_wcsCurrentToken += 2;
|
|
base = 16;
|
|
}
|
|
|
|
number = _wcstoui64( _wcsCurrentToken, (WCHAR **)(&_wcsCurrentToken), base );
|
|
|
|
//
|
|
// looks like a real number?
|
|
//
|
|
|
|
if ( ( wcsCurrentToken == _wcsCurrentToken ) ||
|
|
( L'.' == *_wcsCurrentToken ) )
|
|
{
|
|
_wcsCurrentToken = wcsCurrentToken;
|
|
return FALSE;
|
|
}
|
|
|
|
while ( iswspace(*_wcsCurrentToken) )
|
|
_wcsCurrentToken++;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CTokenizeString::GetNumber, public
|
|
//
|
|
// Synopsis: If _wcsCurrentToken is at the end of the TEXT_TOKEN, returns FALSE.
|
|
// If not, puts the _int64 from the scanner into number and
|
|
// returns TRUE.
|
|
//
|
|
// Arguments: [number] -- the _int64 which will be changed and passed back
|
|
// out as the _int64 from the scanner.
|
|
//
|
|
// Notes: May be called several times in a loop before Accept() is
|
|
// called.
|
|
//
|
|
// History: 96-Feb-13 DwightKr Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
BOOL CTokenizeString::GetNumber( _int64 & number )
|
|
{
|
|
WCHAR *text = (WCHAR *) _wcsCurrentToken;
|
|
BOOL IsNegative = FALSE;
|
|
if ( L'-' == _wcsCurrentToken[0] )
|
|
{
|
|
IsNegative = TRUE;
|
|
_wcsCurrentToken++;
|
|
}
|
|
|
|
unsigned _int64 ui64Number;
|
|
if ( !GetNumber( ui64Number ) )
|
|
{
|
|
_wcsCurrentToken = text;
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
if ( IsNegative )
|
|
{
|
|
if ( ui64Number > 0x8000000000000000L )
|
|
{
|
|
_wcsCurrentToken = text;
|
|
return FALSE;
|
|
}
|
|
|
|
number = -((_int64) ui64Number);
|
|
}
|
|
else
|
|
{
|
|
number = (_int64) ui64Number;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CTokenizeString::GetNumber, public
|
|
//
|
|
// Synopsis: If _wcsCurrentToken is at the end of the TEXT_TOKEN, returns FALSE.
|
|
// If not, puts the LONG from the scanner into number and
|
|
// returns TRUE.
|
|
//
|
|
// Arguments: [number] -- the double which will be changed and passed back
|
|
// out as the double from the scanner.
|
|
//
|
|
// Notes: May be called several times in a loop before Accept() is
|
|
// called.
|
|
//
|
|
// History: 96-Feb-13 DwightKr Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
BOOL CTokenizeString::GetNumber( double & number )
|
|
{
|
|
if ( IsEndOfTextToken() ||
|
|
((L'-' != *_wcsCurrentToken) &&
|
|
(iswdigit(*_wcsCurrentToken) == 0) )
|
|
)
|
|
{
|
|
return FALSE;
|
|
}
|
|
|
|
if ( swscanf( _wcsCurrentToken, L"%lf", &number ) != 1 )
|
|
{
|
|
return FALSE;
|
|
}
|
|
|
|
while ( iswspace(*_wcsCurrentToken) != 0 )
|
|
_wcsCurrentToken++;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CTokenizeString::GetGUID, public
|
|
//
|
|
// Synopsis: If _wcsCurrentToken is at the end of the TEXT_TOKEN, returns FALSE.
|
|
// If not, puts the guid into guid & returns TRUE;
|
|
//
|
|
// Arguments: [guid] -- the guid which will be changed and passed back
|
|
// out as the output from the scanner.
|
|
//
|
|
// Notes: May be called several times in a loop before Accept() is
|
|
// called.
|
|
//
|
|
// History: 96-Feb-13 DwightKr Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
BOOL CTokenizeString::GetGUID( GUID & guid )
|
|
{
|
|
if ( IsEndOfTextToken() || !iswdigit(*_wcsCurrentToken) )
|
|
return FALSE;
|
|
|
|
|
|
// 0123456789 123456789 123456789 123456
|
|
// A guid MUST have the syntax XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
|
|
//
|
|
|
|
//
|
|
// Don't use wsscanf. We're scanning into *bytes*, but wsscanf assumes
|
|
// result locations are *dwords*. Thus a write to the last few bytes of
|
|
// the guid writes over other memory!
|
|
//
|
|
WCHAR wcsGuid[37];
|
|
RtlZeroMemory( wcsGuid, sizeof(wcsGuid) );
|
|
wcsncpy( wcsGuid, _wcsCurrentToken, 36 );
|
|
|
|
if ( wcsGuid[8] != L'-' )
|
|
return FALSE;
|
|
|
|
wcsGuid[8] = 0;
|
|
WCHAR * pwcStart = &wcsGuid[0];
|
|
WCHAR * pwcEnd;
|
|
guid.Data1 = wcstoul( pwcStart, &pwcEnd, 16 );
|
|
if ( pwcEnd < &wcsGuid[8] ) // Non-digit found before wcsGuid[8]
|
|
return FALSE;
|
|
|
|
if ( wcsGuid[13] != L'-' )
|
|
return FALSE;
|
|
|
|
wcsGuid[13] = 0;
|
|
pwcStart = &wcsGuid[9];
|
|
guid.Data2 = (USHORT)wcstoul( pwcStart, &pwcEnd, 16 );
|
|
if ( pwcEnd < &wcsGuid[13] )
|
|
return FALSE;
|
|
|
|
|
|
if ( wcsGuid[18] != L'-' )
|
|
return FALSE;
|
|
|
|
wcsGuid[18] = 0;
|
|
pwcStart = &wcsGuid[14];
|
|
guid.Data3 = (USHORT)wcstoul( pwcStart, &pwcEnd, 16 );
|
|
if ( pwcEnd < &wcsGuid[18] )
|
|
return FALSE;
|
|
|
|
WCHAR wc = wcsGuid[21];
|
|
wcsGuid[21] = 0;
|
|
pwcStart = &wcsGuid[19];
|
|
guid.Data4[0] = (unsigned char)wcstoul( pwcStart, &pwcEnd, 16 );
|
|
if ( pwcEnd < &wcsGuid[21] )
|
|
return FALSE;
|
|
|
|
wcsGuid[21] = wc;
|
|
|
|
if ( wcsGuid[23] != L'-' )
|
|
return FALSE;
|
|
|
|
wcsGuid[23] = 0;
|
|
pwcStart = &wcsGuid[21];
|
|
guid.Data4[1] = (unsigned char)wcstoul( pwcStart, &pwcEnd, 16 );
|
|
if ( pwcEnd < &wcsGuid[23] )
|
|
return FALSE;
|
|
|
|
for ( unsigned i = 0; i < 6; i++ )
|
|
{
|
|
wc = wcsGuid[26+i*2];
|
|
wcsGuid[26+i*2] = 0;
|
|
pwcStart = &wcsGuid[24+i*2];
|
|
guid.Data4[2+i] = (unsigned char)wcstoul( pwcStart, &pwcEnd, 16 );
|
|
if ( pwcEnd < &wcsGuid[26+i*2] )
|
|
return FALSE;
|
|
|
|
wcsGuid[26+i*2] = wc;
|
|
}
|
|
|
|
_wcsCurrentToken += 36;
|
|
|
|
_wcsNextToken = _wcsCurrentToken;
|
|
|
|
EatWhiteSpace();
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CTokenizeString::AcqPhrase, public
|
|
//
|
|
// Synopsis: gets all characters up to end-of-line or next quote
|
|
//
|
|
// History: 96-Feb-13 DwightKr Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
WCHAR * CTokenizeString::AcqPhrase()
|
|
{
|
|
//
|
|
// Find the closing "
|
|
//
|
|
|
|
WCHAR const * wcsClosingQuote = _wcsCurrentToken;
|
|
|
|
do
|
|
{
|
|
if ( 0 == *wcsClosingQuote )
|
|
break;
|
|
|
|
if ( L'"' == *wcsClosingQuote )
|
|
{
|
|
if ( L'"' == *(wcsClosingQuote+1) )
|
|
wcsClosingQuote++;
|
|
else
|
|
break;
|
|
}
|
|
|
|
wcsClosingQuote++;
|
|
} while ( TRUE );
|
|
|
|
//
|
|
// We've found the closing quote. Build a buffer big enough to
|
|
// contain the string.
|
|
//
|
|
ULONG cwcToken = (ULONG)(wcsClosingQuote - _wcsCurrentToken + 1);
|
|
XArray<WCHAR> wcsToken( cwcToken );
|
|
|
|
//
|
|
// copy the string, but remove the extra quote characters
|
|
//
|
|
WCHAR * pwcNewBuf = wcsToken.GetPointer();
|
|
WCHAR const * pStart = _wcsCurrentToken;
|
|
|
|
while ( pStart < wcsClosingQuote )
|
|
{
|
|
*pwcNewBuf++ = *pStart++;
|
|
if ( L'"' == *pStart )
|
|
pStart++;
|
|
}
|
|
|
|
*pwcNewBuf = 0;
|
|
|
|
_wcsCurrentToken += cwcToken - 1;
|
|
_wcsNextToken = _wcsCurrentToken;
|
|
|
|
EatWhiteSpace();
|
|
|
|
return wcsToken.Acquire();
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Member: CTokenizeString::AcqVector, public
|
|
//
|
|
// Synopsis: Gets each of the vector elements upto the next }
|
|
//
|
|
// History: 96-Feb-13 DwightKr Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
void CTokenizeString::AcqVector( PROPVARIANT & propVariant )
|
|
{
|
|
//
|
|
// Determine the VT type of this vector.
|
|
//
|
|
|
|
GUID guid;
|
|
_int64 i64Value;
|
|
double dblValue;
|
|
|
|
if ( GetGUID( guid ) )
|
|
{
|
|
propVariant.vt = VT_CLSID | VT_VECTOR;
|
|
propVariant.cauuid.cElems = 0;
|
|
|
|
CDynArrayInPlace<GUID> pElems;
|
|
|
|
do
|
|
{
|
|
Accept();
|
|
|
|
pElems.Add( guid, propVariant.cauuid.cElems );
|
|
propVariant.cauuid.cElems++;
|
|
|
|
if ( LookAhead() == COMMA_TOKEN )
|
|
{
|
|
Accept();
|
|
}
|
|
|
|
} while ( GetGUID( guid ) );
|
|
|
|
propVariant.cauuid.pElems = pElems.Acquire();
|
|
}
|
|
else if ( GetNumber( i64Value ) )
|
|
{
|
|
propVariant.vt = VT_I8 | VT_VECTOR;
|
|
propVariant.cah.cElems = 0;
|
|
|
|
CDynArrayInPlace<_int64> pElems;
|
|
|
|
do
|
|
{
|
|
Accept();
|
|
|
|
pElems.Add( i64Value, propVariant.cah.cElems );
|
|
propVariant.cah.cElems++;
|
|
|
|
if ( LookAhead() == COMMA_TOKEN )
|
|
{
|
|
Accept();
|
|
}
|
|
|
|
} while ( GetNumber( i64Value ) );
|
|
|
|
propVariant.cah.pElems = (LARGE_INTEGER *) pElems.Acquire();
|
|
}
|
|
else if ( GetNumber( dblValue ) )
|
|
{
|
|
propVariant.vt = VT_R8 | VT_VECTOR;
|
|
propVariant.cadbl.cElems = 0;
|
|
|
|
CDynArrayInPlace<double> pElems;
|
|
do
|
|
{ Accept();
|
|
|
|
pElems.Add( dblValue, propVariant.cadbl.cElems );
|
|
propVariant.cadbl.cElems++;
|
|
|
|
if ( LookAhead() == COMMA_TOKEN )
|
|
{
|
|
Accept();
|
|
}
|
|
|
|
} while ( GetNumber( dblValue ) );
|
|
|
|
propVariant.cadbl.pElems = pElems.Acquire();
|
|
}
|
|
else
|
|
{
|
|
propVariant.vt = VT_LPWSTR | VT_VECTOR;
|
|
CDynArrayInPlace<WCHAR *> pElems;
|
|
propVariant.calpwstr.cElems = 0;
|
|
|
|
while ( (LookAhead() != C_CLOSE_TOKEN) &&
|
|
(LookAhead() != EOS_TOKEN)
|
|
)
|
|
{
|
|
//
|
|
// If its a quoted string, get everything between the quotes.
|
|
//
|
|
if ( LookAhead() == QUOTES_TOKEN )
|
|
{
|
|
Accept(); // Skip over the quote
|
|
pElems.Add(AcqPhrase(), propVariant.calpwstr.cElems );
|
|
Accept(); // Skip over the string
|
|
|
|
if ( LookAhead() != QUOTES_TOKEN )
|
|
{
|
|
THROW( CHTXException(MSG_CI_HTX_MISSING_QUOTE, 0, 0) );
|
|
}
|
|
Accept(); // Skip over the quote
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// Get the next word
|
|
//
|
|
|
|
pElems.Add( AcqWord(), propVariant.calpwstr.cElems );
|
|
Accept(); // Skip over the string
|
|
}
|
|
|
|
propVariant.calpwstr.cElems++;
|
|
if ( LookAhead() == COMMA_TOKEN )
|
|
{
|
|
Accept();
|
|
}
|
|
}
|
|
|
|
propVariant.calpwstr.pElems = pElems.Acquire();
|
|
}
|
|
}
|