Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

461 lines
15 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1996 - 1998.
  5. //
  6. // File: string.cxx
  7. //
  8. // Contents: Yet another string class and support functions
  9. //
  10. // History: 96/Jan/3 DwightKr Created
  11. //
  12. //----------------------------------------------------------------------------
  13. #include <pch.cxx>
  14. #pragma hdrstop
  15. #include <locale.h>
  16. //+---------------------------------------------------------------------------
  17. //
  18. // Member: CVirtualString::CVirtualString - public constructor
  19. //
  20. // Synopsis: Initializes the string by virtually allocating a buffer.
  21. //
  22. // History: 96/Jan/03 DwightKr Created.
  23. //
  24. //----------------------------------------------------------------------------
  25. CVirtualString::CVirtualString( unsigned cwcBuffer )
  26. : _wcsString(0),
  27. _wcsEnd(0),
  28. _cwcBuffer(cwcBuffer),
  29. _pwcLastCommitted(0)
  30. {
  31. _wcsString = new WCHAR[ _cwcBuffer ];
  32. _pwcLastCommitted = _wcsString + _cwcBuffer - 1;
  33. _wcsEnd = _wcsString;
  34. *_wcsEnd = 0;
  35. } //CVirtualString
  36. //+---------------------------------------------------------------------------
  37. //
  38. // Member: CVirtualString::~CVirtualString - public destructor
  39. //
  40. // Synopsis: Releases virtual memory assocated with this buffer
  41. //
  42. // History: 96/Jan/03 DwightKr Created.
  43. //
  44. //----------------------------------------------------------------------------
  45. CVirtualString::~CVirtualString()
  46. {
  47. delete [] _wcsString;
  48. } //~CVirtualString
  49. //+---------------------------------------------------------------------------
  50. //
  51. // Member: CVirtualString::GrowBuffer, private
  52. //
  53. // Synopsis: Commits or re-allocates the string as needed
  54. //
  55. // Arguments: [cwcValue] - # of WCHARs by which to grow the buffer
  56. //
  57. // History: 96/Mar/25 dlee Created from DwightKr's StrCat
  58. //
  59. //----------------------------------------------------------------------------
  60. void CVirtualString::GrowBuffer( ULONG cwcValue )
  61. {
  62. unsigned cwcNewString = cwcValue + 1;
  63. unsigned cwcOldString = CiPtrToUint( _wcsEnd - _wcsString );
  64. unsigned cwcRemaining = CiPtrToUint( _cwcBuffer - cwcOldString );
  65. Win4Assert( _cwcBuffer >= cwcOldString );
  66. if ( cwcRemaining < cwcNewString )
  67. {
  68. DWORD cwcOldBuffer = _cwcBuffer;
  69. DWORD cwcNewBuffer = _cwcBuffer;
  70. do
  71. {
  72. cwcNewBuffer *= 2;
  73. cwcRemaining = cwcNewBuffer - cwcOldString;
  74. }
  75. while ( cwcRemaining < cwcNewString );
  76. XArray<WCHAR> xTemp( cwcNewBuffer );
  77. RtlCopyMemory( xTemp.GetPointer(),
  78. _wcsString,
  79. _cwcBuffer * sizeof WCHAR );
  80. delete [] _wcsString;
  81. _wcsString = xTemp.Acquire();
  82. _cwcBuffer = cwcNewBuffer;
  83. _wcsEnd = _wcsString + cwcOldString;
  84. _pwcLastCommitted = _wcsString + _cwcBuffer - 1;
  85. }
  86. } //GrowBuffer
  87. // if TRUE, the character doesn't need to be URL escaped
  88. static const BYTE g_afNoEscape[128] =
  89. {
  90. FALSE, // 00 (NUL)
  91. FALSE, // 01 (SOH)
  92. FALSE, // 02 (STX)
  93. FALSE, // 03 (ETX)
  94. FALSE, // 04 (EOT)
  95. FALSE, // 05 (ENQ)
  96. FALSE, // 06 (ACK)
  97. FALSE, // 07 (BEL)
  98. FALSE, // 08 (BS)
  99. FALSE, // 09 (HT)
  100. FALSE, // 0A (LF)
  101. FALSE, // 0B (VT)
  102. FALSE, // 0C (FF)
  103. FALSE, // 0D (CR)
  104. FALSE, // 0E (SI)
  105. FALSE, // 0F (SO)
  106. FALSE, // 10 (DLE)
  107. FALSE, // 11 (DC1)
  108. FALSE, // 12 (DC2)
  109. FALSE, // 13 (DC3)
  110. FALSE, // 14 (DC4)
  111. FALSE, // 15 (NAK)
  112. FALSE, // 16 (SYN)
  113. FALSE, // 17 (ETB)
  114. FALSE, // 18 (CAN)
  115. FALSE, // 19 (EM)
  116. FALSE, // 1A (SUB)
  117. FALSE, // 1B (ESC)
  118. FALSE, // 1C (FS)
  119. FALSE, // 1D (GS)
  120. FALSE, // 1E (RS)
  121. FALSE, // 1F (US)
  122. FALSE, // 20 SPACE
  123. FALSE, // 21 !
  124. FALSE, // 22 "
  125. FALSE, // 23 #
  126. FALSE, // 24 $
  127. FALSE, // 25 %
  128. FALSE, // 26 &
  129. FALSE, // 27 '
  130. FALSE, // 28 (
  131. FALSE, // 29 )
  132. FALSE, // 2A *
  133. FALSE, // 2B +
  134. FALSE, // 2C ,
  135. FALSE, // 2D -
  136. TRUE, // 2E .
  137. TRUE, // 2F /
  138. TRUE, // 30 0
  139. TRUE, // 31 1
  140. TRUE, // 32 2
  141. TRUE, // 33 3
  142. TRUE, // 34 4
  143. TRUE, // 35 5
  144. TRUE, // 36 6
  145. TRUE, // 37 7
  146. TRUE, // 38 8
  147. TRUE, // 39 9
  148. TRUE, // 3A :
  149. FALSE, // 3B ;
  150. FALSE, // 3C <
  151. TRUE, // 3D =
  152. FALSE, // 3E >
  153. FALSE, // 3F ?
  154. FALSE, // 40 @
  155. TRUE, // 41 A
  156. TRUE, // 42 B
  157. TRUE, // 43 C
  158. TRUE, // 44 D
  159. TRUE, // 45 E
  160. TRUE, // 46 F
  161. TRUE, // 47 G
  162. TRUE, // 48 H
  163. TRUE, // 49 I
  164. TRUE, // 4A J
  165. TRUE, // 4B K
  166. TRUE, // 4C L
  167. TRUE, // 4D M
  168. TRUE, // 4E N
  169. TRUE, // 4F O
  170. TRUE, // 50 P
  171. TRUE, // 51 Q
  172. TRUE, // 52 R
  173. TRUE, // 53 S
  174. TRUE, // 54 T
  175. TRUE, // 55 U
  176. TRUE, // 56 V
  177. TRUE, // 57 W
  178. TRUE, // 58 X
  179. TRUE, // 59 Y
  180. TRUE, // 5A Z
  181. FALSE, // 5B [
  182. FALSE, // 5C \
  183. FALSE, // 5D ]
  184. FALSE, // 5E ^
  185. FALSE, // 5F _
  186. FALSE, // 60 `
  187. TRUE, // 61 a
  188. TRUE, // 62 b
  189. TRUE, // 63 c
  190. TRUE, // 64 d
  191. TRUE, // 65 e
  192. TRUE, // 66 f
  193. TRUE, // 67 g
  194. TRUE, // 68 h
  195. TRUE, // 69 i
  196. TRUE, // 6A j
  197. TRUE, // 6B k
  198. TRUE, // 6C l
  199. TRUE, // 6D m
  200. TRUE, // 6E n
  201. TRUE, // 6F o
  202. TRUE, // 70 p
  203. TRUE, // 71 q
  204. TRUE, // 72 r
  205. TRUE, // 73 s
  206. TRUE, // 74 t
  207. TRUE, // 75 u
  208. TRUE, // 76 v
  209. TRUE, // 77 w
  210. TRUE, // 78 x
  211. TRUE, // 79 y
  212. TRUE, // 7A z
  213. FALSE, // 7B {
  214. FALSE, // 7C |
  215. FALSE, // 7D }
  216. FALSE, // 7E ~
  217. FALSE, // 7F (DEL)
  218. };
  219. static const unsigned cNoEscape = sizeof g_afNoEscape / sizeof g_afNoEscape[0];
  220. //+---------------------------------------------------------------------------
  221. //
  222. // Function: IsNoUrlEscape
  223. //
  224. // Synopsis: Determines if a character doesn't need URL escaping.
  225. //
  226. // Arguments: [c] -- Character to test, T must be unsigned.
  227. //
  228. // Returns: TRUE if b doesn't need URL escaping.
  229. //
  230. // History: 98/Apr/22 dlee Created.
  231. //
  232. //----------------------------------------------------------------------------
  233. template<class T> inline BOOL IsNoUrlEscape( T c )
  234. {
  235. if ( c < cNoEscape )
  236. return g_afNoEscape[ c ];
  237. return FALSE;
  238. } //IsNoUrlEscape
  239. //+---------------------------------------------------------------------------
  240. //
  241. // Function: URLEscapeW
  242. //
  243. // Synopsis: Appends an escaped version of a string to a virtual string.
  244. //
  245. // History: 96/Apr/03 dlee Created from DwightKr's code
  246. // 96/May/21 DwightKr Escape spaces
  247. // 97/Nov/19 AlanW Allow %ummmm escape codes
  248. //
  249. //----------------------------------------------------------------------------
  250. void URLEscapeW( WCHAR const * wcsString,
  251. CVirtualString & StrResult,
  252. ULONG ulCodepage,
  253. BOOL fConvertSpaceToPlus )
  254. {
  255. BOOL fTryConvertMB = TRUE;
  256. //
  257. // All spaces are converted to plus signs (+), percents are doubled,
  258. // Non alphanumeric characters are represented by their
  259. // hexadecimal ASCII equivalents.
  260. //
  261. Win4Assert( wcsString != 0 );
  262. while ( *wcsString != 0 )
  263. {
  264. //
  265. // Spaces can be treated differently on either size of the ?.
  266. // Spaces before the ? (the URI) needs to have spaces escaped;
  267. // those AFTER the ? can be EITHER escaped, or changed to a +.
  268. // Use either '+' or the % escape depending upon fConverSpaceToPlus
  269. //
  270. if ( IsNoUrlEscape( *wcsString ) )
  271. {
  272. StrResult.CharCat( *wcsString );
  273. }
  274. else if ( L' ' == *wcsString )
  275. {
  276. if ( fConvertSpaceToPlus )
  277. StrResult.CharCat( L'+' );
  278. else
  279. StrResult.StrCat( L"%20", 3 );
  280. }
  281. else if ( L'%' == *wcsString )
  282. {
  283. StrResult.StrCat( L"%%", 2 );
  284. }
  285. else if ( *wcsString < 0x80 )
  286. {
  287. StrResult.CharCat( L'%' );
  288. unsigned hiNibble = ((*wcsString) & 0xf0) >> 4;
  289. unsigned loNibble = (*wcsString) & 0x0f;
  290. StrResult.CharCat( hiNibble > 9 ? (hiNibble-10 + L'A') : (hiNibble + L'0') );
  291. StrResult.CharCat( loNibble > 9 ? (loNibble-10 + L'A') : (loNibble + L'0') );
  292. }
  293. else
  294. {
  295. Win4Assert( *wcsString >= 0x80 );
  296. //
  297. // We encountered a character outside the ASCII range.
  298. // Try counverting the Unicode string to multi-byte. If the
  299. // conversion succeeds, continue by converting to 8 bit characters.
  300. // Otherwise, convert this and any other Unicode characters to the
  301. // %ummmm escape.
  302. //
  303. if ( fTryConvertMB )
  304. {
  305. ULONG cchString = wcslen(wcsString);
  306. XArray<BYTE> pszString( cchString*2 );
  307. BOOL fUsedDefaultChar = FALSE;
  308. DWORD cbConvert;
  309. ULONG cbString = pszString.Count();
  310. do
  311. {
  312. cbConvert = WideCharToMultiByte( ulCodepage,
  313. #if (WINVER >= 0x0500)
  314. WC_NO_BEST_FIT_CHARS |
  315. #endif // (WINVER >= 0x0500)
  316. WC_COMPOSITECHECK |
  317. WC_DEFAULTCHAR,
  318. wcsString,
  319. cchString,
  320. (CHAR *) pszString.Get(),
  321. cbString,
  322. 0,
  323. &fUsedDefaultChar );
  324. if ( 0 == cbConvert )
  325. {
  326. Win4Assert( cbString > 0 );
  327. if ( GetLastError() == ERROR_INSUFFICIENT_BUFFER )
  328. {
  329. cbString += cbString;
  330. delete pszString.Acquire();
  331. pszString.Init(cbString);
  332. }
  333. else if ( GetLastError() == ERROR_INVALID_PARAMETER )
  334. {
  335. // Presumably unknown code page.
  336. fUsedDefaultChar = TRUE;
  337. break;
  338. }
  339. else
  340. {
  341. THROW( CException() );
  342. }
  343. }
  344. } while ( 0 == cbConvert );
  345. if ( ! fUsedDefaultChar )
  346. {
  347. URLEscapeMToW(pszString.Get(), cbConvert, StrResult, fConvertSpaceToPlus );
  348. return;
  349. }
  350. fTryConvertMB = FALSE;
  351. }
  352. // Convert to an escaped Unicode character
  353. StrResult.StrCat( L"%u", 2 );
  354. USHORT wch = *wcsString;
  355. unsigned iNibble = (wch & 0xf000) >> 12;
  356. StrResult.CharCat( iNibble > 9 ? (iNibble-10 + L'A') : (iNibble + L'0') );
  357. iNibble = (wch & 0x0f00) >> 8;
  358. StrResult.CharCat( iNibble > 9 ? (iNibble-10 + L'A') : (iNibble + L'0') );
  359. iNibble = (wch & 0x00f0) >> 4;
  360. StrResult.CharCat( iNibble > 9 ? (iNibble-10 + L'A') : (iNibble + L'0') );
  361. iNibble = wch & 0x000f;
  362. StrResult.CharCat( iNibble > 9 ? (iNibble-10 + L'A') : (iNibble + L'0') );
  363. }
  364. wcsString++;
  365. }
  366. } //URLEscapeW
  367. //+---------------------------------------------------------------------------
  368. //
  369. // Function: URLEscapeMToW
  370. //
  371. // Synopsis: Appends an escaped version of a string to a virtual string.
  372. // The string is 'pseudo-UniCode'. A multi-byte input string
  373. // is converted to a UniCode URL, which is implicitly ASCII.
  374. //
  375. // History: 96/Apr/03 dlee Created from DwightKr's code
  376. // 96/May/21 DwightKr Escape spaces
  377. // 96-Sep-17 KyleP Modified URLEscapeW
  378. //
  379. //----------------------------------------------------------------------------
  380. void URLEscapeMToW( BYTE const * psz,
  381. unsigned cc,
  382. CVirtualString & StrResult,
  383. BOOL fConvertSpaceToPlus )
  384. {
  385. //
  386. // All spaces are converted to plus signs (+), percents are doubled,
  387. // Non alphanumeric characters are represented by their
  388. // hexadecimal ASCII equivalents.
  389. //
  390. Win4Assert( psz != 0 );
  391. for( unsigned i = 0; i < cc; i++ )
  392. {
  393. //
  394. // Spaces can be treated differently on either size of the ?.
  395. // Spaces before the ? (the URI) needs to have spaces escaped;
  396. // those AFTER the ? can be EITHER escaped, or changed to a +.
  397. // Use either '+' or the % escape depending upon fConverSpaceToPlus
  398. //
  399. if ( IsNoUrlEscape( psz[i] ) )
  400. {
  401. StrResult.CharCat( (WCHAR)psz[i] );
  402. }
  403. else if ( L' ' == psz[i] )
  404. {
  405. if ( fConvertSpaceToPlus )
  406. StrResult.CharCat( L'+' );
  407. else
  408. StrResult.StrCat( L"%20", 3 );
  409. }
  410. else if ( L'%' == psz[i] )
  411. {
  412. StrResult.StrCat( L"%%", 2 );
  413. }
  414. else
  415. {
  416. StrResult.CharCat( L'%' );
  417. unsigned hiNibble = ((psz[i]) & 0xf0) >> 4;
  418. unsigned loNibble = (psz[i]) & 0x0f;
  419. StrResult.CharCat( hiNibble > 9 ? (hiNibble-10 + L'A') : (hiNibble + L'0') );
  420. StrResult.CharCat( loNibble > 9 ? (loNibble-10 + L'A') : (loNibble + L'0') );
  421. }
  422. }
  423. } //URLEscapeMToW