Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

562 lines
14 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Copyright (C) 1996, Microsoft Corporation.
  4. //
  5. // File: tokstr.cxx
  6. //
  7. // Contents: Used to break down a string into its tokens
  8. //
  9. // History: 96/Feb/13 DwightKr Created
  10. //
  11. //----------------------------------------------------------------------------
  12. #include <pch.cxx>
  13. #pragma hdrstop
  14. //+---------------------------------------------------------------------------
  15. //
  16. // Method: CTokenizeString::CTokenizeString - public constructor
  17. //
  18. // History: 96/Jan/23 DwightKr Created
  19. //
  20. //----------------------------------------------------------------------------
  21. CTokenizeString::CTokenizeString( WCHAR const * wcsString ) :
  22. _wcsString(wcsString),
  23. _wcsCurrentToken(wcsString),
  24. _wcsNextToken(wcsString)
  25. {
  26. Accept();
  27. }
  28. //+---------------------------------------------------------------------------
  29. //
  30. // Method: CTokenizeString::Accept - public
  31. //
  32. // History: 96/Jan/23 DwightKr Created
  33. //
  34. //----------------------------------------------------------------------------
  35. void CTokenizeString::Accept()
  36. {
  37. EatWhiteSpace();
  38. _wcsCurrentToken = _wcsNextToken;
  39. switch ( *_wcsCurrentToken )
  40. {
  41. case L'"':
  42. _wcsNextToken++;
  43. _token = QUOTES_TOKEN;
  44. break;
  45. case L'{':
  46. _wcsNextToken++;
  47. _token = C_OPEN_TOKEN;
  48. break;
  49. case L'}':
  50. _wcsNextToken++;
  51. _token = C_CLOSE_TOKEN;
  52. break;
  53. case L',':
  54. _wcsNextToken++;
  55. _token = COMMA_TOKEN;
  56. break;
  57. case 0:
  58. _token = EOS_TOKEN;
  59. break;
  60. default:
  61. _wcsNextToken = _wcsCurrentToken + wcscspn( _wcsCurrentToken, WORD_STR );
  62. _token = TEXT_TOKEN;
  63. break;
  64. }
  65. }
  66. //+---------------------------------------------------------------------------
  67. //
  68. // Member: CTokenizeString:AcqWord, public
  69. //
  70. // Synopsis: Copies the word that _wcsCurrentToken is pointing to and
  71. // returns the new string. Positions _wcsCurrentToken after
  72. // the word and whitespace. Returns 0 if at the end of a
  73. // TEXT_TOKEN.
  74. //
  75. // History: 96-Feb-13 DwightKr Created.
  76. //
  77. //----------------------------------------------------------------------------
  78. WCHAR * CTokenizeString::AcqWord()
  79. {
  80. if ( IsEndOfTextToken() )
  81. return 0;
  82. WCHAR const * pEnd = _wcsNextToken;
  83. int cwcToken = (int)(pEnd - _wcsCurrentToken + 1);
  84. WCHAR * newBuf = new WCHAR [ cwcToken ];
  85. RtlCopyMemory( newBuf, _wcsCurrentToken, cwcToken * sizeof(WCHAR));
  86. newBuf[cwcToken-1] = 0;
  87. _wcsCurrentToken = pEnd;
  88. while ( iswspace(*_wcsCurrentToken) )
  89. _wcsCurrentToken++;
  90. return newBuf;
  91. }
  92. //+---------------------------------------------------------------------------
  93. //
  94. // Member: CTokenizeString::GetNumber, public
  95. //
  96. // Synopsis: If _text is at the end of the TEXT_TOKEN, returns FALSE.
  97. // If not, puts the unsigned _int64 from the scanner into number
  98. // and returns TRUE.
  99. //
  100. // Arguments: [number] -- the unsigned _int64 which will be changed and
  101. // passed back out as the ULONG from the scanner.
  102. //
  103. // Notes: May be called several times in a loop before Accept() is
  104. // called.
  105. //
  106. // History: 96-Feb-13 AmyA Created
  107. //
  108. //----------------------------------------------------------------------------
  109. BOOL CTokenizeString::GetNumber( unsigned _int64 & number )
  110. {
  111. ULONG base = 10;
  112. WCHAR const * wcsCurrentToken = _wcsCurrentToken;
  113. if ( IsEndOfTextToken() ||
  114. !iswdigit(*_wcsCurrentToken) ||
  115. (*_wcsCurrentToken == L'-') )
  116. {
  117. return FALSE;
  118. }
  119. if ( _wcsCurrentToken[0] == L'0' &&
  120. (_wcsCurrentToken[1] == L'x' || _wcsCurrentToken[1] == L'X'))
  121. {
  122. _wcsCurrentToken += 2;
  123. base = 16;
  124. }
  125. number = _wcstoui64( _wcsCurrentToken, (WCHAR **)(&_wcsCurrentToken), base );
  126. //
  127. // looks like a real number?
  128. //
  129. if ( ( wcsCurrentToken == _wcsCurrentToken ) ||
  130. ( L'.' == *_wcsCurrentToken ) )
  131. {
  132. _wcsCurrentToken = wcsCurrentToken;
  133. return FALSE;
  134. }
  135. while ( iswspace(*_wcsCurrentToken) )
  136. _wcsCurrentToken++;
  137. return TRUE;
  138. }
  139. //+---------------------------------------------------------------------------
  140. //
  141. // Member: CTokenizeString::GetNumber, public
  142. //
  143. // Synopsis: If _wcsCurrentToken is at the end of the TEXT_TOKEN, returns FALSE.
  144. // If not, puts the _int64 from the scanner into number and
  145. // returns TRUE.
  146. //
  147. // Arguments: [number] -- the _int64 which will be changed and passed back
  148. // out as the _int64 from the scanner.
  149. //
  150. // Notes: May be called several times in a loop before Accept() is
  151. // called.
  152. //
  153. // History: 96-Feb-13 DwightKr Created
  154. //
  155. //----------------------------------------------------------------------------
  156. BOOL CTokenizeString::GetNumber( _int64 & number )
  157. {
  158. WCHAR *text = (WCHAR *) _wcsCurrentToken;
  159. BOOL IsNegative = FALSE;
  160. if ( L'-' == _wcsCurrentToken[0] )
  161. {
  162. IsNegative = TRUE;
  163. _wcsCurrentToken++;
  164. }
  165. unsigned _int64 ui64Number;
  166. if ( !GetNumber( ui64Number ) )
  167. {
  168. _wcsCurrentToken = text;
  169. return FALSE;
  170. }
  171. if ( IsNegative )
  172. {
  173. if ( ui64Number > 0x8000000000000000L )
  174. {
  175. _wcsCurrentToken = text;
  176. return FALSE;
  177. }
  178. number = -((_int64) ui64Number);
  179. }
  180. else
  181. {
  182. number = (_int64) ui64Number;
  183. }
  184. return TRUE;
  185. }
  186. //+---------------------------------------------------------------------------
  187. //
  188. // Member: CTokenizeString::GetNumber, public
  189. //
  190. // Synopsis: If _wcsCurrentToken is at the end of the TEXT_TOKEN, returns FALSE.
  191. // If not, puts the LONG from the scanner into number and
  192. // returns TRUE.
  193. //
  194. // Arguments: [number] -- the double which will be changed and passed back
  195. // out as the double from the scanner.
  196. //
  197. // Notes: May be called several times in a loop before Accept() is
  198. // called.
  199. //
  200. // History: 96-Feb-13 DwightKr Created
  201. //
  202. //----------------------------------------------------------------------------
  203. BOOL CTokenizeString::GetNumber( double & number )
  204. {
  205. if ( IsEndOfTextToken() ||
  206. ((L'-' != *_wcsCurrentToken) &&
  207. (iswdigit(*_wcsCurrentToken) == 0) )
  208. )
  209. {
  210. return FALSE;
  211. }
  212. if ( swscanf( _wcsCurrentToken, L"%lf", &number ) != 1 )
  213. {
  214. return FALSE;
  215. }
  216. while ( iswspace(*_wcsCurrentToken) != 0 )
  217. _wcsCurrentToken++;
  218. return TRUE;
  219. }
  220. //+---------------------------------------------------------------------------
  221. //
  222. // Member: CTokenizeString::GetGUID, public
  223. //
  224. // Synopsis: If _wcsCurrentToken is at the end of the TEXT_TOKEN, returns FALSE.
  225. // If not, puts the guid into guid & returns TRUE;
  226. //
  227. // Arguments: [guid] -- the guid which will be changed and passed back
  228. // out as the output from the scanner.
  229. //
  230. // Notes: May be called several times in a loop before Accept() is
  231. // called.
  232. //
  233. // History: 96-Feb-13 DwightKr Created
  234. //
  235. //----------------------------------------------------------------------------
  236. BOOL CTokenizeString::GetGUID( GUID & guid )
  237. {
  238. if ( IsEndOfTextToken() || !iswdigit(*_wcsCurrentToken) )
  239. return FALSE;
  240. // 0123456789 123456789 123456789 123456
  241. // A guid MUST have the syntax XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
  242. //
  243. //
  244. // Don't use wsscanf. We're scanning into *bytes*, but wsscanf assumes
  245. // result locations are *dwords*. Thus a write to the last few bytes of
  246. // the guid writes over other memory!
  247. //
  248. WCHAR wcsGuid[37];
  249. RtlZeroMemory( wcsGuid, sizeof(wcsGuid) );
  250. wcsncpy( wcsGuid, _wcsCurrentToken, 36 );
  251. if ( wcsGuid[8] != L'-' )
  252. return FALSE;
  253. wcsGuid[8] = 0;
  254. WCHAR * pwcStart = &wcsGuid[0];
  255. WCHAR * pwcEnd;
  256. guid.Data1 = wcstoul( pwcStart, &pwcEnd, 16 );
  257. if ( pwcEnd < &wcsGuid[8] ) // Non-digit found before wcsGuid[8]
  258. return FALSE;
  259. if ( wcsGuid[13] != L'-' )
  260. return FALSE;
  261. wcsGuid[13] = 0;
  262. pwcStart = &wcsGuid[9];
  263. guid.Data2 = (USHORT)wcstoul( pwcStart, &pwcEnd, 16 );
  264. if ( pwcEnd < &wcsGuid[13] )
  265. return FALSE;
  266. if ( wcsGuid[18] != L'-' )
  267. return FALSE;
  268. wcsGuid[18] = 0;
  269. pwcStart = &wcsGuid[14];
  270. guid.Data3 = (USHORT)wcstoul( pwcStart, &pwcEnd, 16 );
  271. if ( pwcEnd < &wcsGuid[18] )
  272. return FALSE;
  273. WCHAR wc = wcsGuid[21];
  274. wcsGuid[21] = 0;
  275. pwcStart = &wcsGuid[19];
  276. guid.Data4[0] = (unsigned char)wcstoul( pwcStart, &pwcEnd, 16 );
  277. if ( pwcEnd < &wcsGuid[21] )
  278. return FALSE;
  279. wcsGuid[21] = wc;
  280. if ( wcsGuid[23] != L'-' )
  281. return FALSE;
  282. wcsGuid[23] = 0;
  283. pwcStart = &wcsGuid[21];
  284. guid.Data4[1] = (unsigned char)wcstoul( pwcStart, &pwcEnd, 16 );
  285. if ( pwcEnd < &wcsGuid[23] )
  286. return FALSE;
  287. for ( unsigned i = 0; i < 6; i++ )
  288. {
  289. wc = wcsGuid[26+i*2];
  290. wcsGuid[26+i*2] = 0;
  291. pwcStart = &wcsGuid[24+i*2];
  292. guid.Data4[2+i] = (unsigned char)wcstoul( pwcStart, &pwcEnd, 16 );
  293. if ( pwcEnd < &wcsGuid[26+i*2] )
  294. return FALSE;
  295. wcsGuid[26+i*2] = wc;
  296. }
  297. _wcsCurrentToken += 36;
  298. _wcsNextToken = _wcsCurrentToken;
  299. EatWhiteSpace();
  300. return TRUE;
  301. }
  302. //+---------------------------------------------------------------------------
  303. //
  304. // Member: CTokenizeString::AcqPhrase, public
  305. //
  306. // Synopsis: gets all characters up to end-of-line or next quote
  307. //
  308. // History: 96-Feb-13 DwightKr Created
  309. //
  310. //----------------------------------------------------------------------------
  311. WCHAR * CTokenizeString::AcqPhrase()
  312. {
  313. //
  314. // Find the closing "
  315. //
  316. WCHAR const * wcsClosingQuote = _wcsCurrentToken;
  317. do
  318. {
  319. if ( 0 == *wcsClosingQuote )
  320. break;
  321. if ( L'"' == *wcsClosingQuote )
  322. {
  323. if ( L'"' == *(wcsClosingQuote+1) )
  324. wcsClosingQuote++;
  325. else
  326. break;
  327. }
  328. wcsClosingQuote++;
  329. } while ( TRUE );
  330. //
  331. // We've found the closing quote. Build a buffer big enough to
  332. // contain the string.
  333. //
  334. ULONG cwcToken = (ULONG)(wcsClosingQuote - _wcsCurrentToken + 1);
  335. XArray<WCHAR> wcsToken( cwcToken );
  336. //
  337. // copy the string, but remove the extra quote characters
  338. //
  339. WCHAR * pwcNewBuf = wcsToken.GetPointer();
  340. WCHAR const * pStart = _wcsCurrentToken;
  341. while ( pStart < wcsClosingQuote )
  342. {
  343. *pwcNewBuf++ = *pStart++;
  344. if ( L'"' == *pStart )
  345. pStart++;
  346. }
  347. *pwcNewBuf = 0;
  348. _wcsCurrentToken += cwcToken - 1;
  349. _wcsNextToken = _wcsCurrentToken;
  350. EatWhiteSpace();
  351. return wcsToken.Acquire();
  352. }
  353. //+---------------------------------------------------------------------------
  354. //
  355. // Member: CTokenizeString::AcqVector, public
  356. //
  357. // Synopsis: Gets each of the vector elements upto the next }
  358. //
  359. // History: 96-Feb-13 DwightKr Created
  360. //
  361. //----------------------------------------------------------------------------
  362. void CTokenizeString::AcqVector( PROPVARIANT & propVariant )
  363. {
  364. //
  365. // Determine the VT type of this vector.
  366. //
  367. GUID guid;
  368. _int64 i64Value;
  369. double dblValue;
  370. if ( GetGUID( guid ) )
  371. {
  372. propVariant.vt = VT_CLSID | VT_VECTOR;
  373. propVariant.cauuid.cElems = 0;
  374. CDynArrayInPlace<GUID> pElems;
  375. do
  376. {
  377. Accept();
  378. pElems.Add( guid, propVariant.cauuid.cElems );
  379. propVariant.cauuid.cElems++;
  380. if ( LookAhead() == COMMA_TOKEN )
  381. {
  382. Accept();
  383. }
  384. } while ( GetGUID( guid ) );
  385. propVariant.cauuid.pElems = pElems.Acquire();
  386. }
  387. else if ( GetNumber( i64Value ) )
  388. {
  389. propVariant.vt = VT_I8 | VT_VECTOR;
  390. propVariant.cah.cElems = 0;
  391. CDynArrayInPlace<_int64> pElems;
  392. do
  393. {
  394. Accept();
  395. pElems.Add( i64Value, propVariant.cah.cElems );
  396. propVariant.cah.cElems++;
  397. if ( LookAhead() == COMMA_TOKEN )
  398. {
  399. Accept();
  400. }
  401. } while ( GetNumber( i64Value ) );
  402. propVariant.cah.pElems = (LARGE_INTEGER *) pElems.Acquire();
  403. }
  404. else if ( GetNumber( dblValue ) )
  405. {
  406. propVariant.vt = VT_R8 | VT_VECTOR;
  407. propVariant.cadbl.cElems = 0;
  408. CDynArrayInPlace<double> pElems;
  409. do
  410. { Accept();
  411. pElems.Add( dblValue, propVariant.cadbl.cElems );
  412. propVariant.cadbl.cElems++;
  413. if ( LookAhead() == COMMA_TOKEN )
  414. {
  415. Accept();
  416. }
  417. } while ( GetNumber( dblValue ) );
  418. propVariant.cadbl.pElems = pElems.Acquire();
  419. }
  420. else
  421. {
  422. propVariant.vt = VT_LPWSTR | VT_VECTOR;
  423. CDynArrayInPlace<WCHAR *> pElems;
  424. propVariant.calpwstr.cElems = 0;
  425. while ( (LookAhead() != C_CLOSE_TOKEN) &&
  426. (LookAhead() != EOS_TOKEN)
  427. )
  428. {
  429. //
  430. // If its a quoted string, get everything between the quotes.
  431. //
  432. if ( LookAhead() == QUOTES_TOKEN )
  433. {
  434. Accept(); // Skip over the quote
  435. pElems.Add(AcqPhrase(), propVariant.calpwstr.cElems );
  436. Accept(); // Skip over the string
  437. if ( LookAhead() != QUOTES_TOKEN )
  438. {
  439. THROW( CHTXException(MSG_CI_HTX_MISSING_QUOTE, 0, 0) );
  440. }
  441. Accept(); // Skip over the quote
  442. }
  443. else
  444. {
  445. //
  446. // Get the next word
  447. //
  448. pElems.Add( AcqWord(), propVariant.calpwstr.cElems );
  449. Accept(); // Skip over the string
  450. }
  451. propVariant.calpwstr.cElems++;
  452. if ( LookAhead() == COMMA_TOKEN )
  453. {
  454. Accept();
  455. }
  456. }
  457. propVariant.calpwstr.pElems = pElems.Acquire();
  458. }
  459. }