Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1435 lines
37 KiB

  1. /**********************************************************************/
  2. /** Microsoft Windows NT **/
  3. /** Copyright(c) Microsoft Corp., 1994 **/
  4. /**********************************************************************/
  5. /*
  6. string.cxx
  7. This module contains a light weight string class
  8. FILE HISTORY:
  9. Johnl 15-Aug-1994 Created
  10. MuraliK 27-Feb-1995 Modified to be a standalone module with buffer.
  11. MuraliK 2-June-1995 Made into separate library
  12. */
  13. #include "precomp.hxx"
  14. //
  15. // Normal includes only for this module to be active
  16. //
  17. # include <opt_time.h>
  18. extern "C" {
  19. # include <nt.h>
  20. # include <ntrtl.h>
  21. # include <nturtl.h>
  22. # include <windows.h>
  23. };
  24. # include "dbgutil.h"
  25. # include <string.hxx>
  26. # include <auxctrs.h>
  27. # include <tchar.h>
  28. # include <mbstring.h>
  29. //
  30. // String globals
  31. //
  32. typedef UCHAR * ( __cdecl * PFNSTRCASE ) ( UCHAR * );
  33. typedef INT ( __cdecl * PFNSTRNICMP ) ( const UCHAR *, const UCHAR *, size_t );
  34. typedef INT ( __cdecl * PFNSTRICMP ) ( const UCHAR *, const UCHAR * );
  35. typedef size_t ( __cdecl * PFNSTRLEN ) ( const UCHAR * );
  36. typedef UCHAR * (__cdecl * PFNSTRRCHR) (const UCHAR *, UINT);
  37. PFNSTRCASE g_pfnStrupr = _mbsupr;
  38. PFNSTRCASE g_pfnStrlwr = _mbslwr;
  39. PFNSTRNICMP g_pfnStrnicmp = _mbsnicmp;
  40. PFNSTRICMP g_pfnStricmp = _mbsicmp;
  41. PFNSTRLEN g_pfnStrlen = _mbslen;
  42. PFNSTRRCHR g_pfnStrrchr = _mbsrchr;
  43. BOOL g_fFavorDBCS = FALSE;
  44. #define UTF8_HACK_KEY "System\\CurrentControlSet\\Services\\InetInfo\\Parameters"
  45. #define UTF8_HACK_VALUE "FavorDBCS"
  46. //
  47. // Private Definations
  48. //
  49. //
  50. // When appending data, this is the extra amount we request to avoid
  51. // reallocations
  52. //
  53. #define STR_SLOP 128
  54. //
  55. // Converts a value between zero and fifteen to the appropriate hex digit
  56. //
  57. #define HEXDIGIT( nDigit ) \
  58. (TCHAR)((nDigit) > 9 ? \
  59. (nDigit) - 10 + 'A' \
  60. : (nDigit) + '0')
  61. //
  62. // Converts a single hex digit to its decimal equivalent
  63. //
  64. #define TOHEX( ch ) \
  65. ((ch) > '9' ? \
  66. (ch) >= 'a' ? \
  67. (ch) - 'a' + 10 : \
  68. (ch) - 'A' + 10 \
  69. : (ch) - '0')
  70. /*******************************************************************
  71. NAME: STR::STR
  72. SYNOPSIS: Construct a string object
  73. ENTRY: Optional object initializer
  74. NOTES: If the object is not valid (i.e. !IsValid()) then GetLastError
  75. should be called.
  76. The object is guaranteed to construct successfully if nothing
  77. or NULL is passed as the initializer.
  78. ********************************************************************/
  79. // Inlined in string.hxx
  80. VOID
  81. STR::AuxInit( const BYTE * pInit )
  82. {
  83. BOOL fRet;
  84. if ( pInit )
  85. {
  86. INT cbCopy = (::strlen( (const CHAR * ) pInit ) + 1) * sizeof(CHAR);
  87. fRet = Resize( cbCopy );
  88. if ( fRet ) {
  89. CopyMemory( QueryPtr(), pInit, cbCopy );
  90. m_cchLen = (cbCopy)/sizeof(CHAR) - 1;
  91. } else {
  92. BUFFER::SetValid( FALSE);
  93. }
  94. } else {
  95. *((CHAR *) QueryPtr()) = '\0';
  96. m_cchLen = 0;
  97. }
  98. return;
  99. } // STR::AuxInit()
  100. /*******************************************************************
  101. NAME: STR::AuxAppend
  102. SYNOPSIS: Appends the string onto this one.
  103. ENTRY: Object to append
  104. ********************************************************************/
  105. BOOL STR::AuxAppend( const BYTE * pStr, UINT cbStr, BOOL fAddSlop )
  106. {
  107. DBG_ASSERT( pStr != NULL );
  108. UINT cbThis = QueryCB();
  109. //
  110. // Only resize when we have to. When we do resize, we tack on
  111. // some extra space to avoid extra reallocations.
  112. //
  113. // Note: QuerySize returns the requested size of the string buffer,
  114. // *not* the strlen of the buffer
  115. //
  116. AcIncrement( CacStringAppend);
  117. if ( QuerySize() < cbThis + cbStr + sizeof(CHAR) )
  118. {
  119. if ( !Resize( cbThis + cbStr + (fAddSlop ? STR_SLOP : sizeof(CHAR) )) )
  120. return FALSE;
  121. }
  122. // copy the exact string and append a null character
  123. memcpy( (BYTE *) QueryPtr() + cbThis,
  124. pStr,
  125. cbStr);
  126. m_cchLen += cbStr/sizeof(CHAR);
  127. *((CHAR *) QueryPtr() + m_cchLen) = '\0'; // append an explicit null char
  128. return TRUE;
  129. } // STR::AuxAppend()
  130. #if 0
  131. // STR::SetLen() is inlined now
  132. BOOL
  133. STR::SetLen( IN DWORD cchLen)
  134. /*++
  135. Truncates the length of the string stored in this buffer
  136. to specified value.
  137. --*/
  138. {
  139. if ( cchLen >= QuerySize()) {
  140. // the buffer itself is not sufficient for this length. return error.
  141. return ( FALSE);
  142. }
  143. // null terminate the string at specified location
  144. *((CHAR *) QueryPtr() + cchLen) = '\0';
  145. m_cchLen = cchLen;
  146. return ( TRUE);
  147. } // STR::SetLen()
  148. #endif // 0
  149. /*******************************************************************
  150. NAME: STR::LoadString
  151. SYNOPSIS: Loads a string resource from this module's string table
  152. or from the system string table
  153. ENTRY: dwResID - System error or module string ID
  154. lpszModuleName - name of the module from which to load.
  155. If NULL, then load the string from system table.
  156. ********************************************************************/
  157. BOOL STR::LoadString( IN DWORD dwResID,
  158. IN LPCTSTR lpszModuleName, // Optional
  159. IN DWORD dwLangID // Optional
  160. )
  161. {
  162. BOOL fReturn = FALSE;
  163. INT cch;
  164. //
  165. // If lpszModuleName is NULL, load the string from system's string table.
  166. //
  167. if ( lpszModuleName == NULL) {
  168. BYTE * pchBuff = NULL;
  169. //
  170. // Call the appropriate function so we don't have to do the Unicode
  171. // conversion
  172. //
  173. cch = ::FormatMessageA( FORMAT_MESSAGE_ALLOCATE_BUFFER |
  174. FORMAT_MESSAGE_IGNORE_INSERTS |
  175. FORMAT_MESSAGE_MAX_WIDTH_MASK |
  176. FORMAT_MESSAGE_FROM_SYSTEM,
  177. NULL,
  178. dwResID,
  179. dwLangID,
  180. (LPSTR) &pchBuff,
  181. 1024,
  182. NULL );
  183. if ( cch ) {
  184. fReturn = Copy( (LPCSTR) pchBuff, cch );
  185. }
  186. //
  187. // Free the buffer FormatMessage allocated
  188. //
  189. if ( cch )
  190. {
  191. ::LocalFree( (VOID*) pchBuff );
  192. }
  193. } else {
  194. CHAR ach[STR_MAX_RES_SIZE];
  195. cch = ::LoadStringA( GetModuleHandle( lpszModuleName),
  196. dwResID,
  197. (CHAR *) ach,
  198. sizeof(ach));
  199. if ( cch )
  200. {
  201. fReturn = Copy( (LPSTR) ach, cch );
  202. }
  203. }
  204. return ( fReturn);
  205. } // STR::LoadString()
  206. BOOL STR::LoadString( IN DWORD dwResID,
  207. IN HMODULE hModule
  208. )
  209. {
  210. DBG_ASSERT( hModule != NULL );
  211. BOOL fReturn = FALSE;
  212. INT cch;
  213. CHAR ach[STR_MAX_RES_SIZE];
  214. cch = ::LoadStringA(hModule,
  215. dwResID,
  216. (CHAR *) ach,
  217. sizeof(ach));
  218. if ( cch ) {
  219. fReturn = Copy( (LPSTR) ach, cch );
  220. }
  221. return ( fReturn);
  222. } // STR::LoadString()
  223. BOOL
  224. STR::FormatString(
  225. IN DWORD dwResID,
  226. IN LPCTSTR apszInsertParams[],
  227. IN LPCTSTR lpszModuleName,
  228. IN DWORD cbMaxMsg
  229. )
  230. {
  231. DWORD cch;
  232. LPSTR pchBuff;
  233. BOOL fRet = FALSE;
  234. cch = ::FormatMessageA( FORMAT_MESSAGE_ALLOCATE_BUFFER |
  235. FORMAT_MESSAGE_ARGUMENT_ARRAY |
  236. FORMAT_MESSAGE_FROM_HMODULE,
  237. GetModuleHandle( lpszModuleName ),
  238. dwResID,
  239. 0,
  240. (LPSTR) &pchBuff,
  241. cbMaxMsg * sizeof(WCHAR),
  242. (va_list *) apszInsertParams );
  243. if ( cch )
  244. {
  245. fRet = Copy( (LPCSTR) pchBuff, cch );
  246. ::LocalFree( (VOID*) pchBuff );
  247. }
  248. /* INTRINSA suppress = uninitialized */
  249. return fRet;
  250. }
  251. /*******************************************************************
  252. NAME: STR::Escape
  253. SYNOPSIS: Replaces non-ASCII characters with their hex equivalent
  254. NOTES:
  255. HISTORY:
  256. Johnl 17-Aug-1994 Created
  257. ********************************************************************/
  258. BOOL STR::Escape( VOID )
  259. {
  260. CHAR * pch = QueryStr();
  261. int i = 0;
  262. CHAR ch;
  263. DBG_ASSERT( pch );
  264. while ( ch = pch[i] )
  265. {
  266. //
  267. // Escape characters that are in the non-printable range
  268. // but ignore CR and LF
  269. //
  270. if ( (((ch >= 0) && (ch <= 32)) ||
  271. ((ch >= 128) && (ch <= 159))||
  272. (ch == '%') || (ch == '?') || (ch == '+') || (ch == '&') ||
  273. (ch == '#')) &&
  274. !(ch == '\n' || ch == '\r') )
  275. {
  276. if ( !Resize( QuerySize() + 2 * sizeof(CHAR) ))
  277. return FALSE;
  278. //
  279. // Resize can change the base pointer
  280. //
  281. pch = QueryStr();
  282. //
  283. // Insert the escape character
  284. //
  285. pch[i] = '%';
  286. //
  287. // Insert a space for the two hex digits (memory can overlap)
  288. //
  289. /* INTRINSA suppress = uninitialized */
  290. ::memmove( &pch[i+3],
  291. &pch[i+1],
  292. (::strlen( &pch[i+1] ) + 1) * sizeof(CHAR));
  293. //
  294. // Convert the low then the high character to hex
  295. //
  296. UINT nDigit = (UINT)(ch % 16);
  297. pch[i+2] = HEXDIGIT( nDigit );
  298. ch /= 16;
  299. nDigit = (UINT)(ch % 16);
  300. pch[i+1] = HEXDIGIT( nDigit );
  301. i += 3;
  302. }
  303. else
  304. i++;
  305. }
  306. m_cchLen = ::strlen( QueryStr()); // to be safe recalc the new length
  307. return TRUE;
  308. } // STR::Escape()
  309. /*******************************************************************
  310. NAME: STR::EscapeSpaces
  311. SYNOPSIS: Replaces all spaces with their hex equivalent
  312. NOTES:
  313. HISTORY:
  314. Johnl 17-Aug-1994 Created
  315. ********************************************************************/
  316. BOOL STR::EscapeSpaces( VOID )
  317. {
  318. CHAR * pch = QueryStr();
  319. CHAR * pchTmp;
  320. int i = 0;
  321. DBG_ASSERT( pch );
  322. while ( pchTmp = strchr( pch + i, ' ' ))
  323. {
  324. i = DIFF( pchTmp - QueryStr() );
  325. if ( !Resize( QuerySize() + 2 * sizeof(CHAR) ))
  326. return FALSE;
  327. //
  328. // Resize can change the base pointer
  329. //
  330. pch = QueryStr();
  331. //
  332. // Insert the escape character
  333. //
  334. pch[i] = '%';
  335. //
  336. // Insert a space for the two hex digits (memory can overlap)
  337. //
  338. ::memmove( &pch[i+3],
  339. &pch[i+1],
  340. (::strlen( &pch[i+1] ) + 1) * sizeof(CHAR));
  341. //
  342. // This routine only replaces spaces
  343. //
  344. pch[i+1] = '2';
  345. pch[i+2] = '0';
  346. }
  347. //
  348. // If i is zero then no spaces were found
  349. //
  350. if ( i != 0 )
  351. {
  352. m_cchLen = ::strlen( QueryStr()); // to be safe recalc the new length
  353. }
  354. return TRUE;
  355. } // STR::EscapeSpaces()
  356. /*******************************************************************
  357. NAME: STR::Unescape
  358. SYNOPSIS: Replaces hex escapes with the Latin-1 equivalent
  359. NOTES: This is a Unicode only method
  360. HISTORY:
  361. Johnl 17-Aug-1994 Created
  362. ********************************************************************/
  363. BOOL STR::Unescape( VOID )
  364. {
  365. CHAR *pScan;
  366. CHAR *pDest;
  367. CHAR *pNextScan;
  368. wchar_t wch;
  369. DWORD dwLen;
  370. BOOL fChanged = FALSE;
  371. pDest = pScan = strchr( QueryStr(), '%');
  372. while (pScan)
  373. {
  374. if ( (pScan[1] == 'u' || pScan[1] == 'U') &&
  375. ::isxdigit( (UCHAR)pScan[2] ) &&
  376. ::isxdigit( (UCHAR)pScan[3] ) &&
  377. ::isxdigit( (UCHAR)pScan[4] ) &&
  378. ::isxdigit( (UCHAR)pScan[5] ) )
  379. {
  380. wch = TOHEX(pScan[2]) * 4096 + TOHEX(pScan[3]) * 256;
  381. wch += TOHEX(pScan[4]) * 16 + TOHEX(pScan[5]);
  382. dwLen = WideCharToMultiByte( CP_ACP,
  383. 0,
  384. &wch,
  385. 1,
  386. (LPSTR) pDest,
  387. 2,
  388. NULL,
  389. NULL );
  390. pDest += dwLen;
  391. pScan += 6;
  392. fChanged = TRUE;
  393. }
  394. else if ( ::isxdigit( (UCHAR)pScan[1] ) && // WinSE 4944
  395. ::isxdigit( (UCHAR)pScan[2] ))
  396. {
  397. *pDest = TOHEX(pScan[1]) * 16 + TOHEX(pScan[2]);
  398. pDest ++;
  399. pScan += 3;
  400. fChanged = TRUE;
  401. }
  402. else // Not an escaped char, just a '%'
  403. {
  404. if (fChanged)
  405. *pDest = *pScan;
  406. pDest++;
  407. pScan++;
  408. }
  409. //
  410. // Copy all the information between this and the next escaped char
  411. //
  412. pNextScan = strchr( pScan, '%');
  413. if (fChanged) // pScan!=pDest, so we have to copy the char's
  414. {
  415. if (!pNextScan) // That was the last '%' in the string
  416. {
  417. ::memmove( pDest,
  418. pScan,
  419. (::strlen( pScan ) + 1) * sizeof(CHAR)); // +1 to copy '\0'
  420. }
  421. else // There is another '%', and it is not back to back with this one
  422. if (dwLen = DIFF(pNextScan - pScan))
  423. {
  424. ::memmove( pDest,
  425. pScan,
  426. dwLen * sizeof(CHAR));
  427. pDest += dwLen;
  428. }
  429. }
  430. pScan = pNextScan;
  431. }
  432. if ( fChanged )
  433. {
  434. m_cchLen = ::strlen( QueryStr()); // for safety recalc the length
  435. }
  436. return TRUE;
  437. }
  438. BOOL
  439. STR::CopyToBuffer( WCHAR * lpszBuffer, LPDWORD lpcch) const
  440. /*++
  441. Description:
  442. Copies the string into the WCHAR buffer passed in if the buffer
  443. is sufficient to hold the translated string.
  444. If the buffer is small, the function returns small and sets *lpcch
  445. to contain the required number of characters.
  446. Arguments:
  447. lpszBuffer pointer to WCHAR buffer which on return contains
  448. the UNICODE version of string on success.
  449. lpcch pointer to DWORD containing the length of the buffer.
  450. If *lpcch == 0 then the function returns TRUE with
  451. the count of characters required stored in *lpcch.
  452. Also in this case lpszBuffer is not affected.
  453. Returns:
  454. TRUE on success.
  455. FALSE on failure. Use GetLastError() for further details.
  456. History:
  457. MuraliK 11-30-94
  458. --*/
  459. {
  460. BOOL fReturn = TRUE;
  461. if ( lpcch == NULL) {
  462. SetLastError( ERROR_INVALID_PARAMETER);
  463. return ( FALSE);
  464. }
  465. if ( *lpcch == 0) {
  466. //
  467. // Inquiring the size of buffer alone
  468. //
  469. *lpcch = QueryCCH() + 1; // add one character for terminating null
  470. } else {
  471. //
  472. // Copy after conversion from ANSI to Unicode
  473. //
  474. int iRet;
  475. iRet = MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED,
  476. QueryStrA(), QueryCCH() + 1,
  477. lpszBuffer, (int )*lpcch);
  478. if ( iRet == 0 || iRet != (int ) *lpcch) {
  479. //
  480. // Error in conversion.
  481. //
  482. fReturn = FALSE;
  483. }
  484. }
  485. return ( fReturn);
  486. } // STR::CopyToBuffer()
  487. BOOL
  488. STR::CopyToBuffer( CHAR * lpszBuffer, LPDWORD lpcch) const
  489. /*++
  490. Description:
  491. Copies the string into the CHAR buffer passed in if the buffer
  492. is sufficient to hold the translated string.
  493. If the buffer is small, the function returns small and sets *lpcch
  494. to contain the required number of characters.
  495. Arguments:
  496. lpszBuffer pointer to CHAR buffer which on return contains
  497. the string on success.
  498. lpcch pointer to DWORD containing the length of the buffer.
  499. If *lpcch == 0 then the function returns TRUE with
  500. the count of characters required stored in *lpcch.
  501. Also in this case lpszBuffer is not affected.
  502. Returns:
  503. TRUE on success.
  504. FALSE on failure. Use GetLastError() for further details.
  505. History:
  506. MuraliK 20-Nov-1996
  507. --*/
  508. {
  509. BOOL fReturn = TRUE;
  510. if ( lpcch == NULL) {
  511. SetLastError( ERROR_INVALID_PARAMETER);
  512. return ( FALSE);
  513. }
  514. register DWORD cch = QueryCCH() + 1;
  515. if ( (*lpcch >= cch) && ( NULL != lpszBuffer)) {
  516. DBG_ASSERT( lpszBuffer);
  517. CopyMemory( lpszBuffer, QueryStrA(), cch);
  518. } else {
  519. DBG_ASSERT( (NULL == lpszBuffer) || (*lpcch < cch));
  520. SetLastError( ERROR_INSUFFICIENT_BUFFER);
  521. fReturn = FALSE;
  522. }
  523. *lpcch = cch;
  524. return ( fReturn);
  525. } // STR::CopyToBuffer()
  526. BOOL
  527. STR::SafeCopy( const CHAR * pchInit )
  528. {
  529. DWORD cchLen = 0;
  530. char cFirstByte = '\0';
  531. BOOL bReturn = TRUE;
  532. if ( QueryPtr() ) {
  533. cFirstByte = *(QueryStr());
  534. cchLen = m_cchLen;
  535. *(QueryStr()) = '\0';
  536. m_cchLen = 0;
  537. }
  538. if (pchInit != NULL) {
  539. bReturn = AuxAppend( (const BYTE *) pchInit, ::strlen( pchInit ), FALSE );
  540. if (!bReturn && QueryPtr()) {
  541. *(QueryStr()) = cFirstByte;
  542. m_cchLen = cchLen;
  543. }
  544. }
  545. return bReturn;
  546. }
  547. /*******************************************************************
  548. NAME: ::CollapseWhite
  549. SYNOPSIS: Collapses white space starting at the passed pointer.
  550. RETURNS: Returns a pointer to the next chunk of white space or the
  551. end of the string.
  552. NOTES: This is a Unicode only method
  553. HISTORY:
  554. Johnl 24-Aug-1994 Created
  555. ********************************************************************/
  556. WCHAR * CollapseWhite( WCHAR * pch )
  557. {
  558. LPWSTR pchStart = pch;
  559. while ( ISWHITE( *pch ) )
  560. pch++;
  561. ::memmove( pchStart,
  562. pch,
  563. DIFF(pch - pchStart) );
  564. while ( *pch && !ISWHITE( *pch ))
  565. pch++;
  566. return pch;
  567. } // CollapseWhite()
  568. //
  569. // Private constants.
  570. //
  571. #define ACTION_NOTHING 0x00000000
  572. #define ACTION_EMIT_CH 0x00010000
  573. #define ACTION_EMIT_DOT_CH 0x00020000
  574. #define ACTION_EMIT_DOT_DOT_CH 0x00030000
  575. #define ACTION_BACKUP 0x00040000
  576. #define ACTION_MASK 0xFFFF0000
  577. //
  578. // Private globals.
  579. //
  580. INT p_StateTable[16] =
  581. {
  582. // state 0
  583. 0 , // other
  584. 0 , // "."
  585. 4 , // EOS
  586. 1 , // "\"
  587. // state 1
  588. 0 , // other
  589. 2 , // "."
  590. 4 , // EOS
  591. 1 , // "\"
  592. // state 2
  593. 0 , // other
  594. 3 , // "."
  595. 4 , // EOS
  596. 1 , // "\"
  597. // state 3
  598. 0 , // other
  599. 0 , // "."
  600. 4 , // EOS
  601. 1 // "\"
  602. };
  603. INT p_ActionTable[16] =
  604. {
  605. // state 0
  606. ACTION_EMIT_CH, // other
  607. ACTION_EMIT_CH, // "."
  608. ACTION_EMIT_CH, // EOS
  609. ACTION_EMIT_CH, // "\"
  610. // state 1
  611. ACTION_EMIT_CH, // other
  612. ACTION_NOTHING, // "."
  613. ACTION_EMIT_CH, // EOS
  614. ACTION_NOTHING, // "\"
  615. // state 2
  616. ACTION_EMIT_DOT_CH, // other
  617. ACTION_NOTHING, // "."
  618. ACTION_EMIT_CH, // EOS
  619. ACTION_NOTHING, // "\"
  620. // state 3
  621. ACTION_EMIT_DOT_DOT_CH, // other
  622. ACTION_EMIT_DOT_DOT_CH, // "."
  623. ACTION_BACKUP, // EOS
  624. ACTION_BACKUP // "\"
  625. };
  626. // since max states = 4, we calculat the index by multiplying with 4.
  627. # define IndexFromState( st) ( (st) * 4)
  628. // the following table provides the index for various ISA Latin1 characters
  629. // in the incoming URL.
  630. // It assumes that the URL is ISO Latin1 == ASCII
  631. INT p_rgIndexForChar[] = {
  632. 2, // null char
  633. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1 thru 10
  634. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 11 thru 20
  635. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 21 thru 30
  636. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 31 thru 40
  637. 0, 0, 0, 0, 0, 1, 3, 0, 0, 0, // 41 thru 50 46 = '.' 47 = '/'
  638. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 51 thru 60
  639. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 61 thru 70
  640. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 71 thru 80
  641. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 81 thru 90
  642. 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, // 91 thru 100 92 = '\\'
  643. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 101 thru 110
  644. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 111 thru 120
  645. 0, 0, 0, 0, 0, 0, 0, 0 // 121 thru 128
  646. };
  647. #define IS_UTF8_TRAILBYTE(ch) (((ch) & 0xc0) == 0x80)
  648. /*******************************************************************
  649. NAME: IsUTF8URL
  650. ENTRY: pszPath - The path to sanitize.
  651. HISTORY:
  652. atsusk 06-Jan-1998 Created.
  653. ********************************************************************/
  654. BOOL IsUTF8URL(CHAR * pszPath)
  655. {
  656. CHAR ch;
  657. if ( g_fFavorDBCS )
  658. {
  659. return ( MultiByteToWideChar( CP_ACP,
  660. MB_ERR_INVALID_CHARS,
  661. pszPath,
  662. -1,
  663. NULL,
  664. 0) == 0);
  665. }
  666. while (ch = *pszPath++) {
  667. if (ch & 0x80) {
  668. wchar_t wch;
  669. int iLen;
  670. BOOL bDefault = FALSE;
  671. char chTrail1;
  672. char chTrail2;
  673. chTrail1 = *pszPath++;
  674. if (chTrail1) {
  675. chTrail2 = *pszPath;
  676. } else {
  677. chTrail2 = 0;
  678. }
  679. if ( ((ch & 0xF0) == 0xE0) &&
  680. IS_UTF8_TRAILBYTE(chTrail1) &&
  681. IS_UTF8_TRAILBYTE(chTrail2) ) {
  682. // handle three byte case
  683. // 1110xxxx 10xxxxxx 10xxxxxx
  684. wch = (wchar_t) (((ch & 0x0f) << 12) |
  685. ((chTrail1 & 0x3f) << 6) |
  686. (chTrail2 & 0x3f));
  687. pszPath++;
  688. } else
  689. if ( ((ch & 0xE0) == 0xC0) &&
  690. IS_UTF8_TRAILBYTE(chTrail1) ) {
  691. // handle two byte case
  692. // 110xxxxx 10xxxxxx
  693. wch = (wchar_t) (((ch & 0x1f) << 6) | (chTrail1 & 0x3f));
  694. } else
  695. return FALSE;
  696. iLen = WideCharToMultiByte( CP_ACP,
  697. 0,
  698. &wch,
  699. 1,
  700. NULL,
  701. 0,
  702. NULL,
  703. &bDefault );
  704. if (bDefault == TRUE || iLen == 0 || iLen > 2)
  705. return FALSE;
  706. }
  707. }
  708. return TRUE;
  709. } // IsUTF8URL()
  710. /*******************************************************************
  711. NAME: CanonURL
  712. SYNOPSIS: Sanitizes a path by removing bogus path elements.
  713. As expected, "/./" entries are simply removed, and
  714. "/../" entries are removed along with the previous
  715. path element.
  716. To maintain compatibility with URL path semantics
  717. additional transformations are required. All backward
  718. slashes "\\" are converted to forward slashes. Any
  719. repeated forward slashes (such as "///") are mapped to
  720. single backslashes.
  721. A state table (see the p_StateTable global at the
  722. beginning of this file) is used to perform most of
  723. the transformations. The table's rows are indexed
  724. by current state, and the columns are indexed by
  725. the current character's "class" (either slash, dot,
  726. NULL, or other). Each entry in the table consists
  727. of the new state tagged with an action to perform.
  728. See the ACTION_* constants for the valid action
  729. codes.
  730. ENTRY: pszPath - The path to sanitize.
  731. fIsDBCSLocale - Indicates the server is in a
  732. locale that uses DBCS.
  733. HISTORY:
  734. KeithMo 07-Sep-1994 Created.
  735. MuraliK 28-Apr-1995 Adopted this for symbolic paths
  736. ********************************************************************/
  737. INT
  738. CanonURL(
  739. CHAR * pszPath,
  740. BOOL fIsDBCSLocale
  741. )
  742. {
  743. UCHAR * pszSrc;
  744. UCHAR * pszDest;
  745. DWORD ch;
  746. INT index;
  747. BOOL fDBCS = FALSE;
  748. DWORD cchMultiByte = 0;
  749. DBG_ASSERT( pszPath != NULL );
  750. //
  751. // Always look for UTF8 except when DBCS characters are detected
  752. //
  753. BOOL fScanForUTF8 = IsUTF8URL(pszPath);
  754. // If fScanForUTF8 is true, this URL is UTF8. don't recognize DBCS.
  755. if (fIsDBCSLocale && fScanForUTF8) {
  756. fIsDBCSLocale = FALSE;
  757. }
  758. //
  759. // Start our scan at the first character
  760. //
  761. pszSrc = pszDest = (UCHAR *) pszPath;
  762. //
  763. // State 0 is the initial state.
  764. //
  765. index = 0; // State = 0
  766. //
  767. // Loop until we enter state 4 (the final, accepting state).
  768. //
  769. do {
  770. //
  771. // Grab the next character from the path and compute its
  772. // next state. While we're at it, map any forward
  773. // slashes to backward slashes.
  774. //
  775. index = IndexFromState( p_StateTable[index]); // 4 = # states
  776. ch = (DWORD ) *pszSrc++;
  777. //
  778. // If this is a DBCS trailing byte - skip it
  779. //
  780. if ( !fIsDBCSLocale )
  781. {
  782. index += (( ch >= 0x80) ? 0 : p_rgIndexForChar[ch]);
  783. }
  784. else
  785. {
  786. if ( fDBCS )
  787. {
  788. //
  789. // If this is a 0 terminator, we need to set next
  790. // state accordingly
  791. //
  792. if ( ch == 0 )
  793. {
  794. index += p_rgIndexForChar[ ch ];
  795. }
  796. //
  797. // fDBCS == TRUE means this byte was a trail byte.
  798. // index is implicitly set to zero.
  799. //
  800. fDBCS = FALSE;
  801. }
  802. else
  803. {
  804. index += (( ch >= 0x80) ? 0 : p_rgIndexForChar[ch]);
  805. if ( IsDBCSLeadByte( (UCHAR)ch ) )
  806. {
  807. //
  808. // This is a lead byte, so the next is a trail.
  809. //
  810. fDBCS = TRUE;
  811. }
  812. }
  813. }
  814. //
  815. // Interesting UTF8 characters always have the top bit set
  816. //
  817. if ( (ch & 0x80) && fScanForUTF8 )
  818. {
  819. wchar_t wch;
  820. UCHAR mbstr[2];
  821. //
  822. // This is a UTF8 character, convert it here.
  823. // index is implicitly set to zero.
  824. //
  825. if ( cchMultiByte < 2 )
  826. {
  827. char chTrail1;
  828. char chTrail2;
  829. chTrail1 = *pszSrc;
  830. if (chTrail1) {
  831. chTrail2 = *(pszSrc+1);
  832. } else {
  833. chTrail2 = 0;
  834. }
  835. wch = 0;
  836. if ((ch & 0xf0) == 0xe0)
  837. {
  838. // handle three byte case
  839. // 1110xxxx 10xxxxxx 10xxxxxx
  840. wch = (wchar_t) (((ch & 0x0f) << 12) |
  841. ((chTrail1 & 0x3f) << 6) |
  842. (chTrail2 & 0x3f));
  843. cchMultiByte = WideCharToMultiByte( CP_ACP,
  844. 0,
  845. &wch,
  846. 1,
  847. (LPSTR) mbstr,
  848. 2,
  849. NULL,
  850. NULL );
  851. ch = mbstr[0];
  852. pszSrc += (3 - cchMultiByte);
  853. // WinSE 12843: Security Fix, Index should be updated for this character
  854. index += (( ch >= 0x80) ? 0 : p_rgIndexForChar[ch]);
  855. } else if ((ch & 0xe0) == 0xc0)
  856. {
  857. // handle two byte case
  858. // 110xxxxx 10xxxxxx
  859. wch = (wchar_t) (((ch & 0x1f) << 6) | (chTrail1 & 0x3f));
  860. cchMultiByte = WideCharToMultiByte( CP_ACP,
  861. 0,
  862. &wch,
  863. 1,
  864. (LPSTR) mbstr,
  865. 2,
  866. NULL,
  867. NULL );
  868. ch = mbstr[0];
  869. pszSrc += (2 - cchMultiByte);
  870. // WinSE 12843: Security Fix, Index should be updated for this character
  871. index += (( ch >= 0x80) ? 0 : p_rgIndexForChar[ch]);
  872. }
  873. } else {
  874. //
  875. // get ready to emit 2nd byte of converted character
  876. //
  877. ch = mbstr[1];
  878. cchMultiByte = 0;
  879. }
  880. }
  881. //
  882. // Perform the action associated with the state.
  883. //
  884. switch( p_ActionTable[index] )
  885. {
  886. case ACTION_EMIT_DOT_DOT_CH :
  887. *pszDest++ = '.';
  888. /* fall through */
  889. case ACTION_EMIT_DOT_CH :
  890. *pszDest++ = '.';
  891. /* fall through */
  892. case ACTION_EMIT_CH :
  893. *pszDest++ = (CHAR ) ch;
  894. /* fall through */
  895. case ACTION_NOTHING :
  896. break;
  897. case ACTION_BACKUP :
  898. if( (pszDest > ( (UCHAR *) pszPath + 1 ) ) && (*pszPath == '/'))
  899. {
  900. pszDest--;
  901. DBG_ASSERT( *pszDest == '/' );
  902. *pszDest = '\0';
  903. pszDest = (UCHAR *) strrchr( pszPath, '/') + 1;
  904. }
  905. *pszDest = '\0';
  906. break;
  907. default :
  908. DBG_ASSERT( !"Invalid action code in state table!" );
  909. index = IndexFromState(0) + 2; // move to invalid state
  910. DBG_ASSERT( p_StateTable[index] == 4);
  911. *pszDest++ = '\0';
  912. break;
  913. }
  914. } while( p_StateTable[index] != 4 );
  915. //
  916. // point to terminating nul
  917. //
  918. if (p_ActionTable[index] == ACTION_EMIT_CH) {
  919. pszDest--;
  920. }
  921. DBG_ASSERT(*pszDest == '\0' && pszDest > (UCHAR*) pszPath);
  922. return DIFF(pszDest - (UCHAR*)pszPath);
  923. } // CanonURL()
  924. DWORD
  925. InitializeStringFunctions(
  926. VOID
  927. )
  928. /*++
  929. Initializes the string function pointers depending on the system code page.
  930. If the code page doesn't have multi-byte characters, then pointers
  931. resolve to regular single byte functions. Otherwise, they resolve to more
  932. expense multi-byte functions.
  933. Arguments:
  934. None
  935. Returns:
  936. 0 if successful, else Win32 Error
  937. --*/
  938. {
  939. CPINFO CodePageInfo;
  940. BOOL bRet;
  941. HKEY hKey;
  942. DWORD dwRet;
  943. bRet = GetCPInfo( CP_ACP, &CodePageInfo );
  944. if ( bRet && CodePageInfo.MaxCharSize == 1 )
  945. {
  946. g_pfnStrlwr = (PFNSTRCASE) _strlwr;
  947. g_pfnStrupr = (PFNSTRCASE) _strupr;
  948. g_pfnStrnicmp = (PFNSTRNICMP) _strnicmp;
  949. g_pfnStricmp = (PFNSTRICMP) _stricmp;
  950. g_pfnStrlen = (PFNSTRLEN) strlen;
  951. g_pfnStrrchr = (PFNSTRRCHR) strrchr;
  952. }
  953. //
  954. // Do we need to hack for Korean?
  955. //
  956. dwRet = RegOpenKeyEx( HKEY_LOCAL_MACHINE,
  957. UTF8_HACK_KEY,
  958. 0,
  959. KEY_READ,
  960. &hKey );
  961. if ( dwRet == ERROR_SUCCESS )
  962. {
  963. DWORD dwValue = 0;
  964. DWORD cbValue = sizeof( dwValue );
  965. dwRet = RegQueryValueEx( hKey,
  966. UTF8_HACK_VALUE,
  967. NULL,
  968. NULL,
  969. (LPBYTE) &dwValue,
  970. &cbValue );
  971. if ( dwRet == ERROR_SUCCESS )
  972. {
  973. g_fFavorDBCS = !!dwValue;
  974. }
  975. DBG_REQUIRE( RegCloseKey( hKey ) == ERROR_SUCCESS );
  976. }
  977. return ERROR_SUCCESS;
  978. }
  979. UCHAR *
  980. IISstrupr(
  981. UCHAR * pszString
  982. )
  983. /*++
  984. Wrapper for strupr() call.
  985. Arguments:
  986. pszString - String to uppercase
  987. Returns:
  988. Pointer to string uppercased
  989. --*/
  990. {
  991. DBG_ASSERT( g_pfnStrupr != NULL );
  992. return g_pfnStrupr( pszString );
  993. }
  994. UCHAR *
  995. IISstrlwr(
  996. UCHAR * pszString
  997. )
  998. /*++
  999. Wrapper for strlwr() call.
  1000. Arguments:
  1001. pszString - String to lowercase
  1002. Returns:
  1003. Pointer to string lowercased
  1004. --*/
  1005. {
  1006. DBG_ASSERT( g_pfnStrlwr != NULL );
  1007. return g_pfnStrlwr( pszString );
  1008. }
  1009. size_t
  1010. IISstrlen(
  1011. UCHAR * pszString
  1012. )
  1013. /*++
  1014. Wrapper for strlen() call.
  1015. Arguments:
  1016. pszString - String to check
  1017. Returns:
  1018. Length of string
  1019. --*/
  1020. {
  1021. DBG_ASSERT( g_pfnStrlen != NULL );
  1022. return g_pfnStrlen( pszString );
  1023. }
  1024. INT
  1025. IISstrnicmp(
  1026. UCHAR * pszString1,
  1027. UCHAR * pszString2,
  1028. size_t size
  1029. )
  1030. /*++
  1031. Wrapper for strnicmp() call.
  1032. Arguments:
  1033. pszString1 - String1
  1034. pszString2 - String2
  1035. size - # characters to compare upto
  1036. Returns:
  1037. 0 if equal, -1 if pszString1 < pszString2, else 1
  1038. --*/
  1039. {
  1040. DBG_ASSERT( g_pfnStrnicmp != NULL );
  1041. return g_pfnStrnicmp( pszString1, pszString2, size );
  1042. }
  1043. INT
  1044. IISstricmp(
  1045. UCHAR * pszString1,
  1046. UCHAR * pszString2
  1047. )
  1048. /*++
  1049. Wrapper for stricmp() call.
  1050. Arguments:
  1051. pszString1 - String1
  1052. pszString2 - String2
  1053. Returns:
  1054. 0 if equal, -1 if pszString1 < pszString2, else 1
  1055. --*/
  1056. {
  1057. DBG_ASSERT( g_pfnStricmp != NULL );
  1058. return g_pfnStricmp( pszString1, pszString2 );
  1059. }
  1060. // like strncpy, but doesn't pad the end of the string with zeroes, which
  1061. // is expensive when `source' is short and `count' is large
  1062. char *
  1063. IISstrncpy(
  1064. char * dest,
  1065. const char * source,
  1066. size_t count)
  1067. {
  1068. char *start = dest;
  1069. while (count && (*dest++ = *source++)) /* copy string */
  1070. count--;
  1071. if (count) /* append one zero */
  1072. *dest = '\0';
  1073. return(start);
  1074. }
  1075. UCHAR *
  1076. IISstrrchr(
  1077. const UCHAR * pszString,
  1078. UINT c
  1079. )
  1080. /*++
  1081. Wrapper for strrchr() call.
  1082. Arguments:
  1083. pszString - String
  1084. c - Character to find.
  1085. Returns:
  1086. pointer to the char or NULL.
  1087. --*/
  1088. {
  1089. DBG_ASSERT( g_pfnStrrchr != NULL );
  1090. return g_pfnStrrchr( pszString, c );
  1091. }