Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

322 lines
8.9 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1996 - 1998.
  5. //
  6. // File: cgiesc.cxx
  7. //
  8. // Contents: WEB CGI escape & unescape classes
  9. //
  10. // History: 96/Jan/3 DwightKr Created
  11. //
  12. //----------------------------------------------------------------------------
  13. #include <pch.cxx>
  14. #pragma hdrstop
  15. #include <cgiesc.hxx>
  16. //+---------------------------------------------------------------------------
  17. //
  18. // Function: DecodeURLEscapes - Decode URL escapes
  19. //
  20. // Synopsis: Removes the escape characters from a string, converting to
  21. // Unicode along the way.
  22. //
  23. // Arguments: [pIn] - string to convert
  24. // [l] - length of string in chars, updated on return
  25. // [pOut] - converted string
  26. // [ulCodePage] - code page for translation
  27. //
  28. //----------------------------------------------------------------------------
  29. void DecodeURLEscapes( BYTE * pIn, ULONG & l, WCHAR * pOut, ULONG ulCodePage )
  30. {
  31. WCHAR * p2 = pOut;
  32. WCHAR c1;
  33. WCHAR c2;
  34. XArray<BYTE> xDeferBuf;
  35. BYTE * pDefer = 0;
  36. ULONG l2 = l;
  37. for( ; l2; l2-- )
  38. {
  39. BOOL fSaveAsUnicode = FALSE;
  40. // Convert ASCII to corresponding character
  41. // If Latin-1 character, save for MB translation, accumulate char
  42. // If Unicode escape, flush accumulated chars and save converted char
  43. c1 = *pIn;
  44. //
  45. // Spaces are escaped by converting them into plus signs.
  46. // Convert them back.
  47. //
  48. if ( c1 == '+' )
  49. {
  50. c1 = ' ';
  51. pIn++;
  52. }
  53. else if (c1 == '%')
  54. {
  55. //
  56. // Special characters are converted to values of the format %XY
  57. // where XY is the HEX code for the ASCII character.
  58. //
  59. // A percent sign is transmitted as %%.
  60. //
  61. if (*(pIn+1) == '%')
  62. {
  63. c1 = '%';
  64. pIn += 2;
  65. l2--;
  66. }
  67. else if (l2 >= 3)
  68. {
  69. pIn++;
  70. c1 = (WCHAR) toupper(*pIn);
  71. c2 = (WCHAR) toupper(*(pIn+1));
  72. if ( c1 == 'U' && l2 >= 6 )
  73. {
  74. // Unicode escape, %uxxxx
  75. c1 = c2;
  76. c2 = (WCHAR) toupper(*(pIn+2));
  77. WCHAR c3 = (WCHAR) toupper(*(pIn+3));
  78. WCHAR c4 = (WCHAR) toupper(*(pIn+4));
  79. if ( isxdigit( c1 ) && isxdigit( c2 ) &&
  80. isxdigit( c3 ) && isxdigit( c4 ) )
  81. {
  82. c1 = ((c1 >= 'A') ? (c1-'A')+10 : c1-'0') << 12;
  83. c1 += ((c2 >= 'A') ? (c2-'A')+10 : c2-'0') << 8;
  84. c1 += ((c3 >= 'A') ? (c3-'A')+10 : c3-'0') << 4;
  85. c1 += ((c4 >= 'A') ? (c4-'A')+10 : c4-'0');
  86. if ( pDefer )
  87. {
  88. unsigned cchDefer = CiPtrToUint( pDefer - xDeferBuf.GetPointer() );
  89. cchDefer = MultiByteToWideChar( ulCodePage,
  90. 0,
  91. (char *) xDeferBuf.GetPointer(),
  92. cchDefer,
  93. p2,
  94. cchDefer );
  95. Win4Assert( cchDefer != 0 );
  96. pDefer = 0;
  97. p2 += cchDefer;
  98. }
  99. pIn += 5;
  100. l2 -= 5;
  101. fSaveAsUnicode = TRUE;
  102. }
  103. else
  104. {
  105. c1 = '%';
  106. }
  107. }
  108. else if ( isxdigit( c1 ) && isxdigit( c2 ) )
  109. {
  110. c1 = ( ((c1 >= 'A') ? (c1-'A')+10 : c1-'0')*16 +
  111. ((c2 >= 'A') ? (c2-'A')+10 : c2-'0') );
  112. pIn += 2;
  113. l2 -= 2;
  114. if ( c1 >= 0x80 && 0 == pDefer )
  115. {
  116. // The character needs to be deferred for MBCS
  117. // translation.
  118. if (xDeferBuf.GetPointer() == 0)
  119. {
  120. xDeferBuf.Init( l2+1 );
  121. }
  122. pDefer = xDeferBuf.GetPointer();
  123. }
  124. }
  125. else
  126. c1 = '%';
  127. }
  128. else
  129. {
  130. pIn++;
  131. if ( c1 >= 0x80 && 0 == pDefer )
  132. {
  133. // The character needs to be deferred for MBCS
  134. // translation.
  135. if (xDeferBuf.GetPointer() == 0)
  136. {
  137. xDeferBuf.Init( l2+1 );
  138. }
  139. pDefer = xDeferBuf.GetPointer();
  140. }
  141. }
  142. }
  143. else
  144. {
  145. pIn++;
  146. }
  147. if (! fSaveAsUnicode)
  148. {
  149. if ( c1 >= 0x80 && 0 == pDefer )
  150. {
  151. // The character needs to be deferred for MBCS
  152. // translation.
  153. if (xDeferBuf.GetPointer() == 0)
  154. {
  155. xDeferBuf.Init( l2+1 );
  156. }
  157. pDefer = xDeferBuf.GetPointer();
  158. }
  159. }
  160. else
  161. {
  162. Win4Assert( pDefer == 0 );
  163. }
  164. if (pDefer)
  165. {
  166. Win4Assert( c1 < 0x100 );
  167. *pDefer++ = (BYTE) c1;
  168. }
  169. else
  170. {
  171. *p2++ = c1;
  172. }
  173. }
  174. if ( pDefer )
  175. {
  176. unsigned cchDefer = CiPtrToUint( pDefer - xDeferBuf.GetPointer() );
  177. cchDefer = MultiByteToWideChar( ulCodePage,
  178. 0,
  179. (char *) xDeferBuf.GetPointer(),
  180. cchDefer,
  181. p2,
  182. cchDefer );
  183. Win4Assert( cchDefer != 0 );
  184. pDefer = 0;
  185. p2 += cchDefer;
  186. }
  187. *p2 = 0;
  188. l = CiPtrToUlong( p2 - pOut );
  189. }
  190. void DecodeEscapes( WCHAR * p, ULONG & l )
  191. {
  192. DecodeEscapes( p, l, p );
  193. }
  194. void DecodeEscapes( WCHAR * pIn, ULONG & l, WCHAR * pOut )
  195. {
  196. WCHAR * p2;
  197. int c1;
  198. int c2;
  199. ULONG l2 = l;
  200. for( p2=pOut; l2; l2-- )
  201. {
  202. //
  203. // Spaces are escaped by converting them into plus signs.
  204. // Convert them back.
  205. //
  206. if ( *pIn == L'+' )
  207. {
  208. *p2++ = L' ';
  209. pIn++;
  210. }
  211. else if (*pIn == L'%')
  212. {
  213. //
  214. // Special characters are converted to values of the format %XY
  215. // where XY is the HEX code for the ASCII character.
  216. //
  217. // A percent sign is transmitted as %%.
  218. //
  219. if (*(pIn+1) == L'%')
  220. {
  221. *p2++ = L'%';
  222. pIn += 2;
  223. l2--;
  224. }
  225. else if (l2 > 2)
  226. {
  227. pIn++;
  228. c1=towupper(*pIn);
  229. c2=towupper(*(pIn+1));
  230. if ( isxdigit( c1 ) && isxdigit( c2 ) )
  231. {
  232. *p2++ = ( ((c1 >= L'A') ? (c1-L'A')+10 : c1-L'0')*16 +
  233. ((c2 >= L'A') ? (c2-L'A')+10 : c2-L'0')
  234. );
  235. pIn += 2;
  236. l2 -= 2;
  237. }
  238. else
  239. *p2++ = L'%';
  240. }
  241. else
  242. {
  243. *p2++ = *pIn++;
  244. }
  245. }
  246. else
  247. {
  248. *p2++ = *pIn++;
  249. }
  250. }
  251. *p2 = 0;
  252. l = CiPtrToUlong( p2 - pOut );
  253. }
  254. //+---------------------------------------------------------------------------
  255. //
  256. // Function: DecodeHtmlNumeric - decode HTML numeric entity
  257. //
  258. // Synopsis: Looks for sequences like "&#12345;" and converts in-place
  259. // to a single unicode character.
  260. //
  261. // Arguments: [pIn] - string to convert
  262. //
  263. //----------------------------------------------------------------------------
  264. void DecodeHtmlNumeric( WCHAR * pIn )
  265. {
  266. pIn = wcschr( pIn, L'&' );
  267. WCHAR * p2 = pIn;
  268. while (pIn && *pIn)
  269. {
  270. if (*pIn == L'&' && pIn[1] == L'#')
  271. {
  272. pIn += 2;
  273. USHORT ch = 0;
  274. while (*pIn && *pIn != L';')
  275. {
  276. if (*pIn >= L'0' && *pIn <= L'9')
  277. ch = ch*10 + (*pIn - L'0');
  278. pIn++;
  279. }
  280. if (*pIn)
  281. pIn++;
  282. *p2++ = ch;
  283. }
  284. else
  285. {
  286. *p2++ = *pIn++;
  287. }
  288. }
  289. if (p2)
  290. *p2 = 0;
  291. }