Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2330 lines
67 KiB

  1. /*++
  2. Copyright (c) 1995 Microsoft Corporation
  3. Module Name:
  4. parseurl.cxx
  5. Abstract:
  6. Contains functions to parse the basic URLs - FTP, Gopher, HTTP.
  7. An URL parser simply acts as a macro: it must break out the protocol-specific
  8. information from the URL and initiate opening the identified resource: all
  9. this can be accomplished by calling the relevant Internet protocol APIs.
  10. Code in this module is based on RFC1738
  11. Contents:
  12. IsValidUrl
  13. DoesSchemeRequireSlashes
  14. ParseUrl
  15. CrackUrl
  16. EncodeUrlPath
  17. (HexCharToNumber)
  18. (NumberToHexChar)
  19. DecodeUrl
  20. DecodeUrlInSitu
  21. DecodeUrlStringInSitu
  22. GetUrlAddressInfo
  23. GetUrlAddress
  24. MapUrlSchemeName
  25. MapUrlScheme
  26. MapUrlSchemeToName
  27. Author:
  28. Richard L Firth (rfirth) 26-Apr-1995
  29. Environment:
  30. Win32(s) user-mode DLL
  31. Revision History:
  32. 26-Apr-1995
  33. Created
  34. --*/
  35. #include <wininetp.h>
  36. //
  37. // private manifests
  38. //
  39. #define RESERVED SAFE
  40. //
  41. // private macros
  42. //
  43. //#define HEX_CHAR_TO_NUMBER(ch) \
  44. // ((ch <= '9') \
  45. // ? (ch - '0') \
  46. // : ((ch >= 'a') \
  47. // ? ((ch - 'a') + 10) \
  48. // : ((ch - 'A') + 10)))
  49. #define NUMBER_TO_HEX_CHAR(n) \
  50. (((n) <= 9) ? ((char)(n) + '0') : (((char)(n) - 10) + 'A'))
  51. #define IS_UNSAFE_URL_CHARACTER(Char, Scheme) \
  52. (((UCHAR)(Char) <= 0x20) || ((UCHAR)(Char) >= 0x7f) \
  53. || (SafetyList[(Char) - 0x21] & (UNSAFE | Scheme)))
  54. #define IS_UNSAFE_URL_WIDECHARACTER(wChar, Scheme) \
  55. (((WCHAR)(wChar) <= 0x0020) || ((WCHAR)(wChar) >= 0x007f) \
  56. || (SafetyList[(wChar) - 0x0021] & (UNSAFE | Scheme)))
  57. //
  58. // private types
  59. //
  60. //
  61. // private prototypes
  62. //
  63. PRIVATE
  64. char
  65. HexCharToNumber(
  66. IN char ch
  67. );
  68. PRIVATE
  69. char
  70. NumberToHexChar(
  71. IN int Number
  72. );
  73. //
  74. // private data
  75. //
  76. //
  77. // SafetyList - the list of characters above 0x20 and below 0x7f that are
  78. // classified as safe, unsafe or scheme-specific. Safe characters do not need
  79. // to be escaped for any URL scheme. Unsafe characters must be escaped for all
  80. // URL schemes. Scheme-specific characters need only be escaped for the relevant
  81. // scheme(s)
  82. //
  83. const
  84. PRIVATE
  85. UCHAR
  86. SafetyList[] = {
  87. //
  88. // UNSAFE: 0x00..0x20
  89. //
  90. SAFE | HOSTNAME, // 0x21 (!)
  91. UNSAFE, // 0x22 (")
  92. UNSAFE, // 0x23 (#)
  93. SAFE | HOSTNAME, // 0x24 ($)
  94. UNSAFE, // 0x25 (%)
  95. RESERVED | HOSTNAME, // 0x26 (&)
  96. SAFE | HOSTNAME, // 0x27 (')
  97. SAFE | HOSTNAME, // 0x28 (()
  98. SAFE | HOSTNAME, // 0x29 ())
  99. SAFE | HOSTNAME, // 0x2A (*)
  100. SCHEME_GOPHER | HOSTNAME, // 0x2B (+)
  101. SAFE | HOSTNAME, // 0x2C (,)
  102. SAFE, // 0x2D (-)
  103. SAFE, // 0x2E (.)
  104. RESERVED | HOSTNAME, // 0x2F (/)
  105. SAFE, // 0x30 (0)
  106. SAFE, // 0x31 (1)
  107. SAFE, // 0x32 (2)
  108. SAFE, // 0x33 (3)
  109. SAFE, // 0x34 (4)
  110. SAFE, // 0x35 (5)
  111. SAFE, // 0x36 (6)
  112. SAFE, // 0x37 (7)
  113. SAFE, // 0x38 (8)
  114. SAFE, // 0x39 (9)
  115. RESERVED | HOSTNAME, // 0x3A (:)
  116. RESERVED | HOSTNAME, // 0x3B (;)
  117. UNSAFE, // 0x3C (<)
  118. RESERVED | HOSTNAME, // 0x3D (=)
  119. UNSAFE, // 0x3E (>)
  120. RESERVED | SCHEME_GOPHER | HOSTNAME, // 0x3F (?)
  121. RESERVED | HOSTNAME, // 0x40 (@)
  122. SAFE, // 0x41 (A)
  123. SAFE, // 0x42 (B)
  124. SAFE, // 0x43 (C)
  125. SAFE, // 0x44 (D)
  126. SAFE, // 0x45 (E)
  127. SAFE, // 0x46 (F)
  128. SAFE, // 0x47 (G)
  129. SAFE, // 0x48 (H)
  130. SAFE, // 0x49 (I)
  131. SAFE, // 0x4A (J)
  132. SAFE, // 0x4B (K)
  133. SAFE, // 0x4C (L)
  134. SAFE, // 0x4D (M)
  135. SAFE, // 0x4E (N)
  136. SAFE, // 0x4F (O)
  137. SAFE, // 0x50 (P)
  138. SAFE, // 0x51 (Q)
  139. SAFE, // 0x42 (R)
  140. SAFE, // 0x43 (S)
  141. SAFE, // 0x44 (T)
  142. SAFE, // 0x45 (U)
  143. SAFE, // 0x46 (V)
  144. SAFE, // 0x47 (W)
  145. SAFE, // 0x48 (X)
  146. SAFE, // 0x49 (Y)
  147. SAFE, // 0x5A (Z)
  148. UNSAFE, // 0x5B ([)
  149. UNSAFE, // 0x5C (\)
  150. UNSAFE, // 0x5D (])
  151. UNSAFE, // 0x5E (^)
  152. SAFE, // 0x5F (_)
  153. UNSAFE, // 0x60 (`)
  154. SAFE, // 0x61 (a)
  155. SAFE, // 0x62 (b)
  156. SAFE, // 0x63 (c)
  157. SAFE, // 0x64 (d)
  158. SAFE, // 0x65 (e)
  159. SAFE, // 0x66 (f)
  160. SAFE, // 0x67 (g)
  161. SAFE, // 0x68 (h)
  162. SAFE, // 0x69 (i)
  163. SAFE, // 0x6A (j)
  164. SAFE, // 0x6B (k)
  165. SAFE, // 0x6C (l)
  166. SAFE, // 0x6D (m)
  167. SAFE, // 0x6E (n)
  168. SAFE, // 0x6F (o)
  169. SAFE, // 0x70 (p)
  170. SAFE, // 0x71 (q)
  171. SAFE, // 0x72 (r)
  172. SAFE, // 0x73 (s)
  173. SAFE, // 0x74 (t)
  174. SAFE, // 0x75 (u)
  175. SAFE, // 0x76 (v)
  176. SAFE, // 0x77 (w)
  177. SAFE, // 0x78 (x)
  178. SAFE, // 0x79 (y)
  179. SAFE, // 0x7A (z)
  180. UNSAFE, // 0x7B ({)
  181. UNSAFE, // 0x7C (|)
  182. UNSAFE, // 0x7D (})
  183. UNSAFE // 0x7E (~)
  184. //
  185. // UNSAFE: 0x7F..0xFF
  186. //
  187. };
  188. INT ByteCountForLeadUtf8Byte(char ch)
  189. {
  190. static const int aiByteCountForFirstZero[] = {1,1,2,3,4,5,6,1}; // the final 1 shouldn't happen on a proper UTF-8 string
  191. DWORD dwFirstZeroBit = 0;
  192. BYTE chMask = 0x80; // binary 1000 0000
  193. // While the mask reveals a non-zero and we haven't counted zeroes past
  194. //the range of aiByteCountForLeadNibbleInUtf8[], look for a zero.
  195. while ((char)chMask & ch
  196. && dwFirstZeroBit < ARRAY_ELEMENTS(aiByteCountForFirstZero)-1 )
  197. {
  198. dwFirstZeroBit++;
  199. chMask = chMask >> 1;
  200. }
  201. return aiByteCountForFirstZero[dwFirstZeroBit];
  202. }
  203. LPSTR Utf8StrChr( LPSTR pString, LPSTR pEnd, char chTarget)
  204. {
  205. while( pString < pEnd && *pString != '\0')
  206. {
  207. if (*pString == chTarget)
  208. return pString;
  209. pString += ByteCountForLeadUtf8Byte(*pString);
  210. }
  211. return NULL;
  212. };
  213. LPSTR Utf8StrChrEx( LPSTR pString, LPSTR pEnd, char chTarget1, char chTarget2)
  214. {
  215. while( pString < pEnd && *pString != '\0')
  216. {
  217. if (*pString == chTarget1
  218. || *pString == chTarget2)
  219. {
  220. return pString;
  221. }
  222. pString += ByteCountForLeadUtf8Byte(*pString);
  223. }
  224. return NULL;
  225. };
  226. //
  227. // UrlSchemeList - the list of schemes that we support
  228. //
  229. typedef struct {
  230. LPSTR SchemeName;
  231. DWORD SchemeLength;
  232. INTERNET_SCHEME SchemeType;
  233. DWORD SchemeFlags;
  234. BOOL NeedSlashes;
  235. DWORD OpenFlags;
  236. } URL_SCHEME_INFO;
  237. const
  238. PRIVATE
  239. URL_SCHEME_INFO
  240. UrlSchemeList[] = {
  241. NULL, 0, INTERNET_SCHEME_DEFAULT, 0, FALSE, 0,
  242. "http", 4, INTERNET_SCHEME_HTTP, SCHEME_HTTP, TRUE, 0,
  243. "https", 5, INTERNET_SCHEME_HTTPS, SCHEME_HTTP, TRUE, WINHTTP_FLAG_SECURE,
  244. };
  245. #define NUMBER_OF_URL_SCHEMES ARRAY_ELEMENTS(UrlSchemeList)
  246. BOOL ScanSchemes(LPTSTR pszToCheck, DWORD ccStr, PDWORD pwResult)
  247. {
  248. for (DWORD i=0; i<NUMBER_OF_URL_SCHEMES; i++)
  249. {
  250. if ((UrlSchemeList[i].SchemeLength == ccStr)
  251. && (strnicmp(UrlSchemeList[i].SchemeName, pszToCheck, ccStr)==0))
  252. {
  253. *pwResult = i;
  254. return TRUE;
  255. }
  256. }
  257. return FALSE;
  258. }
  259. //
  260. // functions
  261. //
  262. BOOL
  263. IsValidUrl(
  264. IN LPCSTR lpszUrl
  265. )
  266. /*++
  267. Routine Description:
  268. Determines whether an URL has a valid format
  269. Arguments:
  270. lpszUrl - pointer to URL to check.
  271. Assumes: 1. lpszUrl is non-NULL, non-empty string
  272. Return Value:
  273. BOOL
  274. --*/
  275. {
  276. INET_ASSERT(lpszUrl != NULL);
  277. INET_ASSERT(*lpszUrl != '\0');
  278. while (*lpszUrl != '\0') {
  279. if (IS_UNSAFE_URL_CHARACTER(*lpszUrl, SCHEME_ANY)) {
  280. return FALSE;
  281. }
  282. ++lpszUrl;
  283. }
  284. return TRUE;
  285. }
  286. BOOL
  287. IsValidHostNameW(
  288. IN LPCWSTR lpwszHostName,
  289. IN DWORD dwFlags
  290. )
  291. /*++
  292. Routine Description:
  293. Determines whether an hostname has valid chars in it
  294. Arguments:
  295. lpwszHostName - Pointer to hostname to check. Assumes lpwszHostName
  296. is non-NULL and points to a non-empty UNICODE string.
  297. dwFlags - Flags that modify validation.
  298. If IVH_DISALLOW_IPV6_SCOPE_ID is set then an IPv6 literal
  299. address containing a scope ID will be invalid
  300. Return Value:
  301. BOOL
  302. --*/
  303. {
  304. SOCKADDR_IN6 Address;
  305. INT Error;
  306. INT AddressLength;
  307. BOOL bAllowScopeID = ((dwFlags & IVHN_DISALLOW_IPV6_SCOPE_ID) == 0);
  308. INET_ASSERT(lpwszHostName != NULL);
  309. // first check if this is a valid IPv4 iteral
  310. AddressLength = (INT)sizeof(Address);
  311. Error = _I_WSAStringToAddressW((LPWSTR)lpwszHostName, AF_INET, NULL, (LPSOCKADDR)&Address, &AddressLength);
  312. if (Error == 0) {
  313. return TRUE;
  314. }
  315. // now check if this is a valid IPv6 literal
  316. AddressLength = sizeof(Address);
  317. Error = _I_WSAStringToAddressW((LPWSTR)lpwszHostName, AF_INET6, NULL, (LPSOCKADDR)&Address, &AddressLength);
  318. if (Error == 0) {
  319. // is an IPv6 literal but we also require surrounding brackets
  320. if ((*lpwszHostName == L'[') && (*(lpwszHostName+lstrlenW(lpwszHostName)-1) == L']')) {
  321. // check scope ID situation
  322. if (bAllowScopeID) {
  323. return TRUE;
  324. } else {
  325. if (Address.sin6_scope_id == 0) {
  326. return TRUE;
  327. }
  328. }
  329. }
  330. }
  331. // not a literal address so do strict bad character checking
  332. while (*lpwszHostName != L'\0') {
  333. if (IS_UNSAFE_URL_WIDECHARACTER(*lpwszHostName, HOSTNAME)) {
  334. return FALSE;
  335. }
  336. ++lpwszHostName;
  337. }
  338. return TRUE;
  339. }
  340. BOOL
  341. IsValidHostNameA(
  342. IN LPCSTR lpszHostName,
  343. IN DWORD dwFlags
  344. )
  345. /*++
  346. Routine Description:
  347. Determines whether an hostname has valid chars in it
  348. Arguments:
  349. lpszHostName - pointer to Hostname to check.
  350. lpszHostName - Pointer to hostname to check. Assumes lpszHostName
  351. is non-NULL and points to a non-empty ASCII string.
  352. dwFlags - Flags that modify validation.
  353. If IVH_DISALLOW_IPV6_SCOPE_ID is set then an IPv6 literal
  354. address containing a scope ID will be invalid
  355. Return Value:
  356. BOOL
  357. --*/
  358. {
  359. SOCKADDR_IN6 Address;
  360. INT Error;
  361. INT AddressLength;
  362. BOOL bAllowScopeID = ((dwFlags & IVHN_DISALLOW_IPV6_SCOPE_ID) == 0);
  363. INET_ASSERT(lpszHostName != NULL);
  364. // first check if this is a valid IPv4 iteral
  365. AddressLength = sizeof(Address);
  366. Error = _I_WSAStringToAddressA((LPSTR)lpszHostName, AF_INET, NULL, (LPSOCKADDR)&Address, &AddressLength);
  367. if (Error == 0) {
  368. return TRUE;
  369. }
  370. // now check if this is a valid IPv6 literal
  371. AddressLength = sizeof(Address);
  372. Error = _I_WSAStringToAddressA((LPSTR)lpszHostName, AF_INET6, NULL, (LPSOCKADDR)&Address, &AddressLength);
  373. if (Error == 0) {
  374. // is an IPv6 literal but we also require surrounding brackets
  375. if ((*lpszHostName == '[') && (*(lpszHostName+lstrlen(lpszHostName)-1) == ']')) {
  376. // check scope ID situation
  377. if (bAllowScopeID) {
  378. return TRUE;
  379. } else {
  380. if (Address.sin6_scope_id == 0) {
  381. return TRUE;
  382. }
  383. }
  384. }
  385. }
  386. // not a literal address so do strict bad character checking
  387. while (*lpszHostName != '\0') {
  388. if (IS_UNSAFE_URL_CHARACTER(*lpszHostName, HOSTNAME)) {
  389. return FALSE;
  390. }
  391. ++lpszHostName;
  392. }
  393. return TRUE;
  394. }
  395. BOOL
  396. DoesSchemeRequireSlashes(
  397. IN LPSTR lpszScheme,
  398. IN DWORD dwSchemeLength,
  399. IN BOOL bHasHostName
  400. )
  401. /*++
  402. Routine Description:
  403. Determines whether a protocol scheme requires slashes
  404. Arguments:
  405. lpszScheme - pointer to protocol scheme in question
  406. (does not include ':' or slashes, just scheme name)
  407. dwUrlLength - if not 0, string length of lpszScheme
  408. Return Value:
  409. BOOL
  410. --*/
  411. {
  412. DWORD i;
  413. //
  414. // if dwSchemeLength is 0 then lpszUrl is ASCIIZ. Find its length
  415. //
  416. if (dwSchemeLength == 0) {
  417. dwSchemeLength = strlen(lpszScheme);
  418. }
  419. if (ScanSchemes(lpszScheme, dwSchemeLength, &i))
  420. {
  421. return UrlSchemeList[i].NeedSlashes;
  422. }
  423. return bHasHostName;
  424. }
  425. DWORD
  426. CrackUrl(
  427. IN OUT LPSTR lpszUrl,
  428. IN DWORD dwUrlLength,
  429. IN BOOL bEscape,
  430. OUT LPINTERNET_SCHEME lpSchemeType OPTIONAL,
  431. OUT LPSTR* lpszSchemeName OPTIONAL,
  432. OUT LPDWORD lpdwSchemeNameLength OPTIONAL,
  433. OUT LPSTR* lpszHostName OPTIONAL,
  434. OUT LPDWORD lpdwHostNameLength OPTIONAL,
  435. IN BOOL fUnescapeHostName,
  436. OUT LPINTERNET_PORT lpServerPort OPTIONAL,
  437. OUT LPSTR* lpszUserName OPTIONAL,
  438. OUT LPDWORD lpdwUserNameLength OPTIONAL,
  439. OUT LPSTR* lpszPassword OPTIONAL,
  440. OUT LPDWORD lpdwPasswordLength OPTIONAL,
  441. OUT LPSTR* lpszUrlPath OPTIONAL,
  442. OUT LPDWORD lpdwUrlPathLength OPTIONAL,
  443. OUT LPSTR* lpszExtraInfo OPTIONAL,
  444. OUT LPDWORD lpdwExtraInfoLength OPTIONAL,
  445. OUT LPBOOL pHavePort
  446. )
  447. /*++
  448. Routine Description:
  449. Cracks an URL into its constituent parts
  450. Assumes: 1. If one of the optional lpsz fields is present (e.g. lpszUserName)
  451. then the accompanying lpdw field must also be supplied
  452. bEscape is no longer used/supported and must always be false.
  453. Arguments:
  454. lpszUrl - pointer to URL to crack. This buffer WILL BE
  455. OVERWRITTEN if it contains escape sequences that
  456. we will convert back to ANSI characters and
  457. fUnescapeHostName == TRUE
  458. dwUrlLength - if not 0, string length of lpszUrl
  459. bEscape - TRUE if we are to escape the url-path
  460. lpSchemeType - returned scheme type - e.g. INTERNET_SCHEME_HTTP
  461. lpszSchemeName - returned scheme name
  462. lpdwSchemeNameLength - length of scheme name
  463. lpszHostName - returned host name
  464. lpdwHostNameLength - length of host name buffer
  465. lpServerPort - returned server port if present in the URL, else 0
  466. lpszUserName - returned user name if present
  467. lpdwUserNameLength - length of user name buffer
  468. lpszPassword - returned password if present
  469. lpdwPasswordLength - length of password buffer
  470. lpszUrlPath - returned, canonicalized URL path
  471. lpdwUrlPathLength - length of url-path buffer
  472. lpszExtraInfo - returned search string or intra-page link if present
  473. lpdwExtraInfoLength - length of extra info buffer
  474. pHavePort - returned boolean indicating whether port was specified
  475. Return Value:
  476. DWORD
  477. Success - ERROR_SUCCESS
  478. Failure - ERROR_WINHTTP_UNRECOGNIZED_SCHEME
  479. --*/
  480. {
  481. DWORD error = ERROR_WINHTTP_INTERNAL_ERROR;
  482. DWORD schemeLength;
  483. INTERNET_SCHEME schemeType;
  484. LPSTR pCursor, pEnd;
  485. if(bEscape)
  486. {
  487. INET_ASSERT(!"bEscape==TRUE no longer supported for parseurl.cxx::CrackUrl()");
  488. error = ERROR_INVALID_PARAMETER;
  489. goto quit;
  490. };
  491. //
  492. // if dwUrlLength is 0 then lpszUrl is ASCIIZ. Find its length
  493. //
  494. if (dwUrlLength == 0) {
  495. dwUrlLength = strlen(lpszUrl);
  496. }
  497. pCursor = lpszUrl;
  498. pEnd = lpszUrl + dwUrlLength;
  499. //
  500. // extract the scheme (ex: "SCHEME://host/path...")
  501. //
  502. pEnd = Utf8StrChr(pCursor, pEnd, ':');
  503. if (pEnd == NULL)
  504. {
  505. error = ERROR_WINHTTP_UNRECOGNIZED_SCHEME;
  506. goto quit;
  507. }
  508. schemeLength = (DWORD)(pEnd - pCursor);
  509. //
  510. // We now point to the scheme with pCursor.. extract some info about it
  511. //
  512. DWORD i;
  513. int skip;
  514. BOOL needSlashes;
  515. BOOL haveSlashes;
  516. needSlashes = FALSE;
  517. haveSlashes = FALSE;
  518. schemeType = INTERNET_SCHEME_UNKNOWN;
  519. if (ScanSchemes(pCursor, schemeLength, &i))
  520. {
  521. schemeType = UrlSchemeList[i].SchemeType;
  522. needSlashes = UrlSchemeList[i].NeedSlashes;
  523. }
  524. else
  525. {
  526. error = ERROR_WINHTTP_UNRECOGNIZED_SCHEME;
  527. goto quit;
  528. }
  529. skip = 1; // skip ':'
  530. if ((dwUrlLength - schemeLength > 3) && (memcmp(&lpszUrl[schemeLength], "://", 3) == 0))
  531. {
  532. skip = 3; // skip "://"
  533. haveSlashes = TRUE;
  534. }
  535. //
  536. // If we don't have slashes, make sure we don't need them.
  537. // If we have slashes, make sure they are required.
  538. //
  539. if( (haveSlashes || needSlashes) && !(haveSlashes && needSlashes))
  540. {
  541. error = ERROR_WINHTTP_INVALID_URL;
  542. goto quit;
  543. }
  544. //
  545. // We've parsed the scheme, so set up that result.
  546. //
  547. if (ARGUMENT_PRESENT(lpSchemeType)) {
  548. *lpSchemeType = schemeType;
  549. }
  550. if (ARGUMENT_PRESENT(lpszSchemeName)) {
  551. *lpszSchemeName = lpszUrl;
  552. *lpdwSchemeNameLength = schemeLength;
  553. }
  554. //
  555. // Now crack the rest of the URL
  556. //
  557. lpszUrl += schemeLength + skip;
  558. dwUrlLength -= schemeLength + skip;
  559. error = GetUrlAddress(&lpszUrl,
  560. &dwUrlLength,
  561. lpszUserName,
  562. lpdwUserNameLength,
  563. lpszPassword,
  564. lpdwPasswordLength,
  565. lpszHostName,
  566. lpdwHostNameLength,
  567. fUnescapeHostName,
  568. lpServerPort,
  569. pHavePort
  570. );
  571. if (error != ERROR_SUCCESS)
  572. goto quit;
  573. if (ARGUMENT_PRESENT(lpszExtraInfo))
  574. {
  575. pCursor = Utf8StrChrEx(lpszUrl, lpszUrl+dwUrlLength, '#', '?');
  576. if (pCursor == NULL)
  577. pCursor = lpszUrl+dwUrlLength;
  578. *lpszExtraInfo = pCursor;
  579. *lpdwExtraInfoLength = (DWORD)(lpszUrl+dwUrlLength-pCursor);
  580. dwUrlLength -= *lpdwExtraInfoLength;
  581. }
  582. //
  583. // If the user didn't ask for the extra info, it is returned appended to the url path.
  584. //
  585. if (ARGUMENT_PRESENT(lpszUrlPath))
  586. {
  587. *lpszUrlPath = lpszUrl;
  588. *lpdwUrlPathLength = dwUrlLength;
  589. }
  590. quit:
  591. return error;
  592. }
  593. #define DEFAULT_REALLOC_SIZE 1024
  594. DWORD
  595. EncodeUrlPath(
  596. IN DWORD Flags,
  597. IN DWORD SchemeFlags,
  598. IN LPSTR UrlPath,
  599. IN DWORD UrlPathLength,
  600. OUT LPSTR* pEncodedUrlPath,
  601. IN OUT LPDWORD EncodedUrlPathLength
  602. )
  603. /*++
  604. Routine Description:
  605. Encodes an URL-path. That is, escapes the string. Creates a new URL-path in
  606. which all the 'unsafe' and reserved characters for this scheme have been
  607. converted to escape sequences
  608. Arguments:
  609. Flags - controlling expansion
  610. SchemeFlags - which scheme we are encoding for -
  611. SCHEME_HTTP, etc.
  612. UrlPath - pointer to the unescaped string
  613. UrlPathLength - length of Url
  614. EncodedUrlPath - pointer to buffer where encoded URL will be
  615. written
  616. EncodedUrlPathLength - IN: size of EncodedUrlPath
  617. OUT: number of bytes written to EncodedUrlPath
  618. Return Value:
  619. DWORD
  620. Success - ERROR_SUCCESS
  621. Failure - ERROR_INSUFFICIENT_BUFFER
  622. UrlPathLength not large enough to store encoded URL path
  623. --*/
  624. {
  625. DWORD error;
  626. DWORD len;
  627. len = *EncodedUrlPathLength;
  628. LPSTR EncodedUrlPath = *pEncodedUrlPath;
  629. UCHAR ch;
  630. UNREFERENCED_PARAMETER(UrlPathLength);
  631. while(0 != (ch = (UCHAR)*UrlPath++))
  632. {
  633. //
  634. // check whether this character is safe. For now, we encode all unsafe
  635. // and scheme-specific characters the same way (i.e. irrespective of
  636. // scheme)
  637. //
  638. // We are allowing '/' to be copied unmodified
  639. //
  640. if (len < 3)
  641. {
  642. LPSTR pStr = (LPSTR)REALLOCATE_MEMORY(*pEncodedUrlPath, *EncodedUrlPathLength+DEFAULT_REALLOC_SIZE);
  643. if (pStr)
  644. {
  645. EncodedUrlPath = pStr+*EncodedUrlPathLength-len;
  646. *pEncodedUrlPath = pStr;
  647. len += DEFAULT_REALLOC_SIZE;
  648. *EncodedUrlPathLength += DEFAULT_REALLOC_SIZE;
  649. }
  650. else
  651. {
  652. goto error;
  653. }
  654. }
  655. if (IS_UNSAFE_URL_CHARACTER(ch, SchemeFlags)
  656. && !((ch == '/') && (Flags & NO_ENCODE_PATH_SEP)))
  657. {
  658. *EncodedUrlPath++ = '%';
  659. //*EncodedUrlPath++ = NumberToHexChar((int)ch / 16);
  660. *EncodedUrlPath++ = (CHAR)NUMBER_TO_HEX_CHAR((int)ch / 16);
  661. //*EncodedUrlPath++ = NumberToHexChar((int)ch % 16);
  662. *EncodedUrlPath++ = (CHAR)NUMBER_TO_HEX_CHAR((int)ch % 16);
  663. len -= 2; // extra --len below
  664. }
  665. else
  666. {
  667. *EncodedUrlPath++ = (signed char)ch;
  668. }
  669. --len;
  670. }
  671. *EncodedUrlPath = '\0';
  672. *EncodedUrlPathLength -= len;
  673. error = ERROR_SUCCESS;
  674. quit:
  675. return error;
  676. error:
  677. error = ERROR_NOT_ENOUGH_MEMORY;
  678. goto quit;
  679. }
  680. PRIVATE
  681. char
  682. HexCharToNumber(
  683. IN char ch
  684. )
  685. /*++
  686. Routine Description:
  687. Converts an ANSI character in the range '0'..'9' 'A'..'F' 'a'..'f' to its
  688. corresponding hexadecimal value (0..f)
  689. Arguments:
  690. ch - character to convert
  691. Return Value:
  692. char
  693. hexadecimal value of ch, as an 8-bit (signed) character value
  694. --*/
  695. {
  696. return (CHAR)((ch <= '9') ? (ch - '0')
  697. : ((ch >= 'a') ? ((ch - 'a') + 10) : ((ch - 'A') + 10)));
  698. }
  699. PRIVATE
  700. char
  701. NumberToHexChar(
  702. IN int Number
  703. )
  704. /*++
  705. Routine Description:
  706. Converts a number in the range 0..15 to its ASCII character hex representation
  707. ('0'..'F')
  708. Arguments:
  709. Number - to convert
  710. Return Value:
  711. char
  712. character in above range
  713. --*/
  714. {
  715. return (Number <= 9) ? (char)('0' + Number) : (char)('A' + (Number - 10));
  716. }
  717. DWORD
  718. DecodeUrl(
  719. IN LPSTR Url,
  720. IN DWORD UrlLength,
  721. OUT LPSTR DecodedString,
  722. IN OUT LPDWORD DecodedLength
  723. )
  724. /*++
  725. Routine Description:
  726. Converts an URL string with embedded escape sequences (%xx) to a counted
  727. string
  728. It is safe to pass the same pointer for the string to convert, and the
  729. buffer for the converted results: if the current character is not escaped,
  730. it just gets overwritten, else the input pointer is moved ahead 2 characters
  731. further than the output pointer, which is benign
  732. Arguments:
  733. Url - pointer to URL string to convert
  734. UrlLength - number of characters in UrlString
  735. DecodedString - pointer to buffer that receives converted string
  736. DecodedLength - IN: number of characters in buffer
  737. OUT: number of characters converted
  738. Return Value:
  739. DWORD
  740. Success - ERROR_SUCCESS
  741. Failure - ERROR_WINHTTP_INVALID_URL
  742. UrlString couldn't be converted
  743. ERROR_INSUFFICIENT_BUFFER
  744. ConvertedString isn't large enough to hold all the converted
  745. UrlString
  746. --*/
  747. {
  748. DWORD bufferRemaining;
  749. bufferRemaining = *DecodedLength;
  750. while (UrlLength && bufferRemaining) {
  751. char ch;
  752. if (*Url == '%') {
  753. //
  754. // BUGBUG - would %00 ever appear in an URL?
  755. //
  756. ++Url;
  757. if (isxdigit(*Url)) {
  758. ch = HexCharToNumber(*Url++) << 4;
  759. if (isxdigit(*Url)) {
  760. ch |= HexCharToNumber(*Url++);
  761. } else {
  762. return ERROR_WINHTTP_INVALID_URL;
  763. }
  764. } else {
  765. return ERROR_WINHTTP_INVALID_URL;
  766. }
  767. UrlLength -= 3;
  768. } else {
  769. ch = *Url++;
  770. --UrlLength;
  771. }
  772. *DecodedString++ = ch;
  773. --bufferRemaining;
  774. }
  775. if (UrlLength == 0) {
  776. *DecodedLength -= bufferRemaining;
  777. return ERROR_SUCCESS;
  778. } else {
  779. return ERROR_INSUFFICIENT_BUFFER;
  780. }
  781. }
  782. DWORD
  783. DecodeUrlInSitu(
  784. IN LPSTR BufferAddress,
  785. IN OUT LPDWORD BufferLength
  786. )
  787. /*++
  788. Routine Description:
  789. Decodes an URL string, if it contains escape sequences. The conversion is
  790. done in place, since we know that a string containing escapes is longer than
  791. the string with escape sequences (3 bytes) converted to characters (1 byte)
  792. Arguments:
  793. BufferAddress - pointer to the string to convert
  794. BufferLength - IN: number of characters to convert
  795. OUT: length of converted string
  796. Return Value:
  797. DWORD
  798. Success - ERROR_SUCCESS
  799. Failure - ERROR_WINHTTP_INVALID_URL
  800. ERROR_INSUFFICIENT_BUFFER
  801. --*/
  802. {
  803. DWORD stringLength;
  804. stringLength = *BufferLength;
  805. if (memchr(BufferAddress, '%', stringLength)) {
  806. return DecodeUrl(BufferAddress,
  807. stringLength,
  808. BufferAddress,
  809. BufferLength
  810. );
  811. } else {
  812. //
  813. // no escape character in the string, just return success
  814. //
  815. return ERROR_SUCCESS;
  816. }
  817. }
  818. DWORD
  819. DecodeUrlStringInSitu(
  820. IN LPSTR BufferAddress,
  821. IN OUT LPDWORD BufferLength
  822. )
  823. /*++
  824. Routine Description:
  825. Performs DecodeUrlInSitu() on a string and zero terminates it
  826. Assumes: 1. Even if no decoding is performed, *BufferLength is large enough
  827. to fit an extra '\0' character
  828. Arguments:
  829. BufferAddress - pointer to the string to convert
  830. BufferLength - IN: number of characters to convert
  831. OUT: length of converted string, excluding '\0'
  832. Return Value:
  833. DWORD
  834. Success - ERROR_SUCCESS
  835. Failure - ERROR_WINHTTP_INVALID_URL
  836. ERROR_INSUFFICIENT_BUFFER
  837. --*/
  838. {
  839. DWORD error;
  840. error = DecodeUrlInSitu(BufferAddress, BufferLength);
  841. if (error == ERROR_SUCCESS) {
  842. BufferAddress[*BufferLength] = '\0';
  843. }
  844. return error;
  845. }
  846. DWORD
  847. GetUrlAddressInfo(
  848. IN OUT LPSTR* Url,
  849. IN OUT LPDWORD UrlLength,
  850. OUT LPSTR* PartOne,
  851. OUT LPDWORD PartOneLength,
  852. OUT LPBOOL PartOneEscape,
  853. OUT LPSTR* PartTwo,
  854. OUT LPDWORD PartTwoLength,
  855. OUT LPBOOL PartTwoEscape
  856. )
  857. /*++
  858. Routine Description:
  859. Given a string of the form foo:bar, splits them into 2 counted strings about
  860. the ':' character. The address string may or may not contain a ':'.
  861. This function is intended to split into substrings the host:port and
  862. username:password strings commonly used in Internet address specifications
  863. and by association, in URLs
  864. Modified to handle IPv6 literal addresses in URLs surrounded by brackets "[ ]" as per
  865. RFC 2732. Input of "[foo]:bar" is now considered equivalent to "foo:bar". The brackets
  866. ARE returned as part of a string and counted.
  867. Arguments:
  868. Url - pointer to pointer to string containing URL. On output
  869. this is advanced past the address parts
  870. UrlLength - pointer to length of URL in UrlString. On output this is
  871. reduced by the number of characters parsed
  872. PartOne - pointer which will receive first part of address string
  873. PartOneLength - pointer which will receive length of first part of address
  874. string
  875. PartOneEscape - TRUE on output if PartOne contains escape sequences
  876. PartTwo - pointer which will receive second part of address string
  877. PartTwoLength - pointer which will receive length of second part of address
  878. string
  879. PartOneEscape - TRUE on output if PartTwo contains escape sequences
  880. Return Value:
  881. DWORD
  882. Success - ERROR_SUCCESS
  883. Failure - ERROR_WINHTTP_INVALID_URL
  884. --*/
  885. {
  886. LPSTR pString;
  887. LPSTR pColon;
  888. DWORD partLength;
  889. LPBOOL partEscape;
  890. DWORD length;
  891. //
  892. // parse out <host>[:<port>] or <name>[:<password>] (i.e. <part1>[:<part2>]
  893. //
  894. pString = *Url;
  895. pColon = NULL;
  896. partLength = 0;
  897. *PartOne = pString;
  898. *PartOneLength = 0;
  899. *PartOneEscape = FALSE;
  900. *PartTwoEscape = FALSE;
  901. partEscape = PartOneEscape;
  902. length = *UrlLength;
  903. if ((length != 0) && (*pString == '[')) {
  904. //
  905. // If the first part starts with a '[' then we assume it's an IPv6
  906. // literal address and it must be terminated with a ']'.
  907. //
  908. // Note we DO NOT output PartOneEscape == TRUE if there is a % in
  909. // the IPv6 literal address designating a Scope ID.
  910. //
  911. *PartOne = pString;
  912. for (;;) {
  913. if(*pString & ~0x7F)
  914. return ERROR_WINHTTP_INVALID_URL;
  915. ++partLength;
  916. ++pString;
  917. --length;
  918. if (length == 0) {
  919. return ERROR_WINHTTP_INVALID_URL;
  920. }
  921. if (*pString == ']') {
  922. ++partLength;
  923. break;
  924. }
  925. }
  926. ++pString;
  927. --length;
  928. //
  929. // If there's more, then there should be a colon or forward slash
  930. // We allow http://[addr]/...
  931. // http://[addr]:port/...
  932. // not
  933. // http://[addr]junk/...
  934. //
  935. if (length != 0) {
  936. if ((*pString != ':') &&
  937. (*pString != '/'))
  938. return ERROR_WINHTTP_INVALID_URL;
  939. }
  940. }
  941. while ((*pString != '/') && (*pString != '\0') && (length != 0)) {
  942. if (*pString == '%') {
  943. //
  944. // if there is a % in the string then it *must* (RFC 1738) be the
  945. // start of an escape sequence. This function just reports the
  946. // address of the substrings and their lengths; calling functions
  947. // must handle the escape sequences (i.e. it is their responsibility
  948. // to decide where to put the results)
  949. //
  950. *partEscape = TRUE;
  951. }
  952. if (*pString == ':') {
  953. if (pColon != NULL) {
  954. //
  955. // we don't expect more than 1 ':'
  956. //
  957. return ERROR_WINHTTP_INVALID_URL;
  958. }
  959. pColon = pString;
  960. *PartOneLength = partLength;
  961. if (partLength == 0) {
  962. *PartOne = NULL;
  963. }
  964. partLength = 0;
  965. partEscape = PartTwoEscape;
  966. } else {
  967. ++partLength;
  968. }
  969. if(*pString & ~0x7F)
  970. return ERROR_WINHTTP_INVALID_URL;
  971. ++pString;
  972. --length;
  973. }
  974. //
  975. // we either ended on the host (or user) name or the port number (or
  976. // password), one of which we don't know the length of
  977. //
  978. if (pColon == NULL) {
  979. *PartOneLength = partLength;
  980. *PartTwo = NULL;
  981. *PartTwoLength = 0;
  982. *PartTwoEscape = FALSE;
  983. } else {
  984. *PartTwoLength = partLength;
  985. *PartTwo = pColon + 1;
  986. //
  987. // in both the <user>:<password> and <host>:<port> cases, we cannot have
  988. // the second part without the first, although both parts being zero
  989. // length is OK (host name will be sorted out elsewhere, but (for now,
  990. // at least) I am allowing <>:<> for username:password, since I don't
  991. // see it expressly disallowed in the RFC. I may be revisiting this code
  992. // later...)
  993. //
  994. // N.B.: ftp://ftp.microsoft.com uses http://:0/-http-gw-internal-/menu.gif
  995. // if ((*PartOneLength == 0) && (partLength != 0)) {
  996. // return ERROR_WINHTTP_INVALID_URL;
  997. // }
  998. }
  999. //
  1000. // update the URL pointer and length remaining
  1001. //
  1002. *Url = pString;
  1003. *UrlLength = length;
  1004. return ERROR_SUCCESS;
  1005. }
  1006. DWORD
  1007. GetUrlAddress(
  1008. IN OUT LPSTR* lpszUrl,
  1009. OUT LPDWORD lpdwUrlLength,
  1010. OUT LPSTR* lpszUserName OPTIONAL,
  1011. OUT LPDWORD lpdwUserNameLength OPTIONAL,
  1012. OUT LPSTR* lpszPassword OPTIONAL,
  1013. OUT LPDWORD lpdwPasswordLength OPTIONAL,
  1014. OUT LPSTR* lpszHostName OPTIONAL,
  1015. OUT LPDWORD lpdwHostNameLength OPTIONAL,
  1016. IN BOOL fUnescapeHostName,
  1017. OUT LPINTERNET_PORT lpPort OPTIONAL,
  1018. OUT LPBOOL pHavePort
  1019. )
  1020. /*++
  1021. Routine Description:
  1022. This function extracts any and all parts of the address information for a
  1023. generic URL. If any of the address parts contain escaped characters (%nn)
  1024. then they are converted in situ
  1025. The generic addressing format (RFC 1738) is:
  1026. <user>:<password>@<host>:<port>
  1027. The addressing information cannot contain a password without a user name,
  1028. or a port without a host name
  1029. NB: ftp://ftp.microsoft.com uses URL's that have a port without a host name!
  1030. (e.g. http://:0/-http-gw-internal-/menu.gif)
  1031. Although only the lpszUrl and lpdwUrlLength fields are required, the address
  1032. parts will be checked for presence and completeness
  1033. Assumes: 1. If one of the optional lpsz fields is present (e.g. lpszUserName)
  1034. then the accompanying lpdw field must also be supplied
  1035. Arguments:
  1036. lpszUrl - IN: pointer to the URL to parse
  1037. OUT: URL remaining after address information
  1038. N.B. The url-path is NOT canonicalized (unescaped)
  1039. because it may contain protocol-specific information
  1040. which must be parsed out by the protocol-specific
  1041. parser
  1042. lpdwUrlLength - returned length of the remainder of the URL after the
  1043. address information
  1044. lpszUserName - returned pointer to the user name
  1045. This parameter can be omitted by those protocol parsers
  1046. that do not require or expect user names in the URL
  1047. lpdwUserNameLength - returned length of the user name part
  1048. This parameter can be omitted by those protocol parsers
  1049. that do not require or expect user names in the URL
  1050. lpszPassword - returned pointer to the password
  1051. This parameter can be omitted by those protocol parsers
  1052. that do not require or expect user passwords in the URL
  1053. lpdwPasswordLength - returned length of the password
  1054. This parameter can be omitted by those protocol parsers
  1055. that do not require or expect user passwords in the URL
  1056. lpszHostName - returned pointer to the host name
  1057. This parameter can be omitted by those protocol parsers
  1058. that do not require the host name info
  1059. lpdwHostNameLength - returned length of the host name
  1060. This parameter can be omitted by those protocol parsers
  1061. that do not require the host name info
  1062. lpPort - returned value of the port field
  1063. This parameter can be omitted by those protocol parsers
  1064. that do not require or expect user port number
  1065. pHavePort - returned boolean indicating whether a port was specified
  1066. in the URL or not. This value is not returned if the
  1067. lpPort parameter is omitted.
  1068. Return Value:
  1069. DWORD
  1070. Success - ERROR_SUCCESS
  1071. Failure - ERROR_WINHTTP_INVALID_URL
  1072. We could not parse some part of the address info, or we
  1073. found address info where the protocol parser didn't expect
  1074. any
  1075. ERROR_INSUFFICIENT_BUFFER
  1076. We could not convert an escaped string
  1077. --*/
  1078. {
  1079. DWORD error = ERROR_WINHTTP_INTERNAL_ERROR;
  1080. DWORD urlLength;
  1081. LPSTR pUrl;
  1082. BOOL part1Escape;
  1083. BOOL part2Escape;
  1084. char portNumber[INTERNET_MAX_PORT_NUMBER_LENGTH + 1];
  1085. DWORD portNumberLength;
  1086. LPSTR pPortNumber;
  1087. LPSTR hostName;
  1088. DWORD hostNameLength;
  1089. pUrl = *lpszUrl;
  1090. urlLength = strlen(pUrl);
  1091. char *pHead, *pTail;
  1092. //
  1093. // check to see if there is an '@' separating user name & password. If we
  1094. // see a '/' or get to the end of the string before we see the '@' then
  1095. // there is no username:password part
  1096. //
  1097. char *pAt, *pSlash;
  1098. pHead = pUrl;
  1099. pTail = pHead + urlLength;
  1100. pSlash = Utf8StrChr(pHead, pTail, '/');
  1101. if (pSlash == NULL)
  1102. pSlash = pTail;
  1103. pAt = Utf8StrChr(pHead, pSlash, '@');
  1104. {
  1105. char *pUsername, *pPassword;
  1106. int iUsernameLength, iPasswordLength;
  1107. pUsername = pSlash;
  1108. pPassword = pSlash;
  1109. iUsernameLength = 0;
  1110. iPasswordLength = 0;
  1111. if (pAt != NULL)
  1112. {
  1113. pUsername = pHead;
  1114. pPassword = Utf8StrChr( pUsername, pAt, ':'); // still a ':' ahead of the actual password..
  1115. if (pPassword == NULL)
  1116. pPassword = pAt;
  1117. iUsernameLength = (DWORD)(pPassword - pUsername);
  1118. if (*pPassword == ':')
  1119. pPassword++;
  1120. iPasswordLength = (DWORD)(pAt - pPassword);
  1121. pHead = pAt + 1;
  1122. }
  1123. if (ARGUMENT_PRESENT(lpszUserName))
  1124. {
  1125. INET_ASSERT(ARGUMENT_PRESENT(lpdwUserNameLength));
  1126. *lpszUserName = pUsername;
  1127. *lpdwUserNameLength = iUsernameLength;
  1128. }
  1129. if (ARGUMENT_PRESENT(lpszPassword))
  1130. {
  1131. INET_ASSERT(ARGUMENT_PRESENT(lpdwPasswordLength));
  1132. *lpszPassword = pPassword;
  1133. *lpdwPasswordLength = iPasswordLength;
  1134. }
  1135. }
  1136. //
  1137. // now get the host name and the optional port
  1138. //
  1139. pUrl = pHead;
  1140. urlLength = (DWORD)(pTail - pHead);
  1141. pPortNumber = portNumber;
  1142. portNumberLength = sizeof(portNumber);
  1143. error = GetUrlAddressInfo(&pUrl,
  1144. &urlLength,
  1145. &hostName,
  1146. &hostNameLength,
  1147. &part1Escape,
  1148. &pPortNumber,
  1149. &portNumberLength,
  1150. &part2Escape
  1151. );
  1152. if (error != ERROR_SUCCESS)
  1153. goto done;
  1154. //
  1155. // the URL address information MUST contain the host name
  1156. //
  1157. if ((hostName == NULL) || (hostNameLength == 0))
  1158. {
  1159. error = ERROR_WINHTTP_INVALID_URL;
  1160. goto done;
  1161. }
  1162. if (ARGUMENT_PRESENT(lpszHostName))
  1163. {
  1164. INET_ASSERT(ARGUMENT_PRESENT(lpdwHostNameLength));
  1165. //
  1166. // if the host name contains escaped characters, convert them in situ
  1167. //
  1168. if (part1Escape && fUnescapeHostName)
  1169. {
  1170. error = DecodeUrlInSitu(hostName, &hostNameLength);
  1171. if (error != ERROR_SUCCESS)
  1172. goto done;
  1173. }
  1174. *lpszHostName = hostName;
  1175. *lpdwHostNameLength = hostNameLength;
  1176. }
  1177. //
  1178. // if there is a port field, convert it if there are escaped characters,
  1179. // check it for valid numeric characters, and convert it to a number
  1180. //
  1181. if (portNumberLength != 0)
  1182. {
  1183. DWORD i;
  1184. DWORD port;
  1185. INET_ASSERT(pPortNumber != NULL);
  1186. //
  1187. // We can ignore part2Escape because below we detect
  1188. //non-digits in the port.
  1189. //
  1190. //
  1191. // ensure all characters in the port number buffer are numeric, and
  1192. // calculate the port number at the same time
  1193. //
  1194. for (i = 0, port = 0; i < portNumberLength; ++i, ++pPortNumber)
  1195. {
  1196. if (!isdigit(*pPortNumber))
  1197. {
  1198. error = ERROR_WINHTTP_INVALID_URL;
  1199. goto done;
  1200. }
  1201. port = port * 10 + (int)(*pPortNumber - '0');
  1202. // We won't allow ports larger than 65535 ((2^16)-1)
  1203. // We have to check this every time to make sure that someone
  1204. // doesn't try to overflow a DWORD.
  1205. if (port > 65535)
  1206. {
  1207. error = ERROR_WINHTTP_INVALID_URL;
  1208. goto done;
  1209. }
  1210. }
  1211. if (ARGUMENT_PRESENT(lpPort))
  1212. *lpPort = (INTERNET_PORT)port;
  1213. if (ARGUMENT_PRESENT(pHavePort))
  1214. *pHavePort = TRUE;
  1215. }
  1216. else
  1217. {
  1218. if (ARGUMENT_PRESENT(lpPort))
  1219. *lpPort = INTERNET_INVALID_PORT_NUMBER;
  1220. if (ARGUMENT_PRESENT(pHavePort))
  1221. *pHavePort = FALSE;
  1222. }
  1223. //
  1224. // update the URL pointer and the length of the url-path
  1225. //
  1226. *lpszUrl = pUrl;
  1227. *lpdwUrlLength = urlLength;
  1228. error = ERROR_SUCCESS;
  1229. done:
  1230. return error;
  1231. }
  1232. INTERNET_SCHEME
  1233. MapUrlSchemeName(
  1234. IN LPSTR lpszSchemeName,
  1235. IN DWORD dwSchemeNameLength
  1236. )
  1237. /*++
  1238. Routine Description:
  1239. Maps a scheme name/length to a scheme name type
  1240. Arguments:
  1241. lpszSchemeName - pointer to name of scheme to map
  1242. dwSchemeNameLength - length of scheme (if -1, lpszSchemeName is ASCIZ)
  1243. Return Value:
  1244. INTERNET_SCHEME
  1245. --*/
  1246. {
  1247. if (dwSchemeNameLength == (DWORD)-1) {
  1248. dwSchemeNameLength = (DWORD)lstrlen(lpszSchemeName);
  1249. }
  1250. DWORD i;
  1251. if (ScanSchemes(lpszSchemeName, dwSchemeNameLength, &i))
  1252. {
  1253. return UrlSchemeList[i].SchemeType;
  1254. }
  1255. return INTERNET_SCHEME_UNKNOWN;
  1256. }
  1257. LPSTR
  1258. MapUrlScheme(
  1259. IN INTERNET_SCHEME Scheme,
  1260. OUT LPDWORD lpdwSchemeNameLength
  1261. )
  1262. /*++
  1263. Routine Description:
  1264. Maps the enumerated scheme name type to the name
  1265. Arguments:
  1266. Scheme - enumerated scheme type to map
  1267. lpdwSchemeNameLength - pointer to returned length of scheme name
  1268. Return Value:
  1269. LPSTR - pointer to scheme name or NULL
  1270. --*/
  1271. {
  1272. if ((Scheme >= INTERNET_SCHEME_FIRST)
  1273. && (Scheme <= INTERNET_SCHEME_LAST))
  1274. {
  1275. *lpdwSchemeNameLength = UrlSchemeList[Scheme].SchemeLength;
  1276. return UrlSchemeList[Scheme].SchemeName;
  1277. }
  1278. *lpdwSchemeNameLength = 0;
  1279. return NULL;
  1280. }
  1281. LPSTR
  1282. MapUrlSchemeToName(
  1283. IN INTERNET_SCHEME Scheme
  1284. )
  1285. /*++
  1286. Routine Description:
  1287. Maps the enumerated scheme name type to the name
  1288. Arguments:
  1289. Scheme - enumerated scheme type to map
  1290. Return Value:
  1291. LPSTR - pointer to scheme name or NULL
  1292. --*/
  1293. {
  1294. if ((Scheme >= INTERNET_SCHEME_FIRST)
  1295. && (Scheme <= INTERNET_SCHEME_LAST)) {
  1296. return UrlSchemeList[Scheme].SchemeName;
  1297. }
  1298. return NULL;
  1299. }
  1300. //
  1301. //
  1302. // UnsafeInPathAndQueryFlags flag in table set to 1 if symbol is unsafe for path or query
  1303. // question mark treated as safe
  1304. // this table is fater then SafetyList because it requires no substraction and no masking
  1305. // and only one bound checking to access it
  1306. //
  1307. //
  1308. const
  1309. PRIVATE
  1310. BYTE
  1311. UnsafeInPathAndQueryFlags[128] = {
  1312. // 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
  1313. // xx xx xx xx xx xx xx xx xx xx xx xx xx xx xx xx
  1314. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1315. // 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
  1316. // xx xx xx xx xx xx xx xx xx xx xx xx xx xx xx xx
  1317. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1318. // 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
  1319. // ! " # $ % & ' ( ) * + , - . /
  1320. 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  1321. // 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
  1322. // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
  1323. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
  1324. // 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
  1325. // @ A B C D E F G H I J K L M N O
  1326. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  1327. // 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
  1328. // P Q R S T U V W X Y Z [ \ ] ^ _
  1329. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
  1330. // 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
  1331. // ` a b c d e f g h i j k l m n o
  1332. 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  1333. // 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
  1334. // p q r s t u v w x y z { | } ~ xx
  1335. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1
  1336. };
  1337. //
  1338. //
  1339. // ADD_HEX_TO_STRING adds ch in "%hh" format to a given string and increases string ptr
  1340. // for use inside ConvertUnicodeToMultiByte only
  1341. //
  1342. //
  1343. #define ADD_HEX_TO_STRING(pStr, ch) \
  1344. { UCHAR c = (UCHAR)(ch);\
  1345. *pStr++ = '%'; \
  1346. *pStr++ = hexArray[c>>4]; \
  1347. *pStr++ = hexArray[c & 0x0f]; \
  1348. }
  1349. //#define ADD_HEX_TO_STRING(pStr, ch) \
  1350. // { UCHAR c = (UCHAR)ch; *(DWORD*)pStr = (DWORD)'%' + ((DWORD)(hexArray[c>>4]) << 8) + ((DWORD)(hexArray[c & 0x0f]) << 16); \
  1351. // pStr += 3; }
  1352. /*
  1353. * ConvertUnicodeToMultiByte:
  1354. *
  1355. dwFlags:
  1356. WINHTTP_FLAG_VALID_HOSTNAME only for server name; fast conversion is performed, no escaping
  1357. WINHTTP_FLAG_NULL_CODEPAGE assumes string contains only ASCII chars, fast conversion is performed
  1358. WINHTTP_FLAG_ESCAPE_PERCENT if escaping enabled, escape percent as well
  1359. WINHTTP_FLAG_ESCAPE_DISABLE disable escaping (if WINHTTP_FLAG_VALID_HOSTNAME not set)
  1360. WINHTTP_FLAG_ESCAPE_DISABLE_QUERY if escaping enabled escape path part, but do not escape query
  1361. */
  1362. DWORD
  1363. ConvertUnicodeToMultiByte(
  1364. LPCWSTR lpszObjectName,
  1365. DWORD dwCodePage,
  1366. MEMORYPACKET* pmp,
  1367. DWORD dwFlags)
  1368. {
  1369. static CHAR* hexArray = "0123456789ABCDEF";
  1370. DWORD dwError = ERROR_SUCCESS;
  1371. BOOL bPureAscii = TRUE;
  1372. BOOL bTreatPercentAsSafe = (dwFlags & WINHTTP_FLAG_ESCAPE_PERCENT) ? FALSE : TRUE;
  1373. BOOL bNeedEscaping = (dwFlags & WINHTTP_FLAG_ESCAPE_DISABLE) ? FALSE : TRUE;
  1374. BOOL bEscapeQuery = (dwFlags & WINHTTP_FLAG_ESCAPE_DISABLE_QUERY) ? FALSE : TRUE;
  1375. //determine size of string and/or safe characters
  1376. DWORD dwUnsafeChars = 0;
  1377. DWORD dwUnicodeUrlSize;
  1378. if (dwFlags & WINHTTP_FLAG_VALID_HOSTNAME)
  1379. {
  1380. bNeedEscaping = FALSE;
  1381. if (!IsValidHostNameW(lpszObjectName, 0)) { // 0 == allow v6 literal scope ids
  1382. dwError = ERROR_WINHTTP_INVALID_URL;
  1383. goto done;
  1384. }
  1385. dwUnicodeUrlSize = lstrlenW(lpszObjectName)+1;
  1386. }
  1387. else if ((dwFlags & WINHTTP_FLAG_NULL_CODEPAGE) && !bNeedEscaping)
  1388. {
  1389. //if no escaping needed there is no need to calcaulate num of unsafe char
  1390. dwUnicodeUrlSize = lstrlenW(lpszObjectName)+1;
  1391. }
  1392. else
  1393. {
  1394. // optimization to check for unsafe characters, and optimize the common case.
  1395. // calculate the length, and while parsing the string, check if there are unsafeChars
  1396. PCWSTR pwStr;
  1397. if (bTreatPercentAsSafe)
  1398. for(pwStr = lpszObjectName; *pwStr; ++pwStr)
  1399. {
  1400. UINT16 wc = *pwStr;
  1401. if (wc <= 0x7f)
  1402. {
  1403. if (UnsafeInPathAndQueryFlags[wc] && (wc != L'%'))
  1404. ++dwUnsafeChars;
  1405. }
  1406. else
  1407. {
  1408. bPureAscii = FALSE;
  1409. ++dwUnsafeChars;
  1410. }
  1411. }
  1412. else
  1413. for(pwStr = lpszObjectName; *pwStr; ++pwStr)
  1414. {
  1415. UINT16 wc = *pwStr;
  1416. if (wc <= 0x7f)
  1417. {
  1418. if (UnsafeInPathAndQueryFlags[wc])
  1419. ++dwUnsafeChars;
  1420. }
  1421. else
  1422. {
  1423. bPureAscii = FALSE;
  1424. ++dwUnsafeChars;
  1425. }
  1426. }
  1427. dwUnicodeUrlSize = (DWORD)(pwStr-lpszObjectName+1);
  1428. }
  1429. //convert to MBCS
  1430. if (bPureAscii)
  1431. {
  1432. pmp->dwAlloc = dwUnicodeUrlSize;
  1433. if (bNeedEscaping)
  1434. pmp->dwAlloc += 2 * dwUnsafeChars;
  1435. pmp->psStr = (LPSTR)ALLOCATE_FIXED_MEMORY(pmp->dwAlloc);
  1436. if (!pmp->psStr)
  1437. {
  1438. pmp->dwAlloc = 0;
  1439. dwError = ERROR_NOT_ENOUGH_MEMORY;
  1440. goto done;
  1441. }
  1442. PSTR pStr = pmp->psStr;
  1443. if (bNeedEscaping)
  1444. {
  1445. UCHAR chPercent = bTreatPercentAsSafe ? (UCHAR)'%' : (UCHAR)0;
  1446. if (bEscapeQuery)
  1447. for (; *lpszObjectName; ++lpszObjectName)
  1448. {
  1449. UCHAR ch = (UCHAR)*lpszObjectName;
  1450. if (!UnsafeInPathAndQueryFlags[ch] || (ch == chPercent))
  1451. *pStr++ = ch;
  1452. else
  1453. {
  1454. ADD_HEX_TO_STRING (pStr, ch)
  1455. }
  1456. }
  1457. else
  1458. for (; *lpszObjectName && (*lpszObjectName != L'?'); ++lpszObjectName)
  1459. {
  1460. UCHAR ch = (UCHAR)*lpszObjectName;
  1461. if (!UnsafeInPathAndQueryFlags[ch] || ch == chPercent)
  1462. *pStr++ = ch;
  1463. else
  1464. {
  1465. ADD_HEX_TO_STRING (pStr, ch)
  1466. }
  1467. }
  1468. }
  1469. for (; *lpszObjectName; ++lpszObjectName)
  1470. *pStr++ = (CHAR)*lpszObjectName;
  1471. *pStr = '\0';
  1472. pmp->dwSize = (DWORD)(pStr - pmp->psStr);
  1473. }
  1474. else if (dwCodePage == CP_UTF8)
  1475. {
  1476. //converts to UTF8 and performs escaping at same time
  1477. pmp->dwAlloc = dwUnicodeUrlSize + (bNeedEscaping ? 8 : 2) * dwUnsafeChars; //yep, some extra allocation possible
  1478. pmp->psStr = (LPSTR)ALLOCATE_FIXED_MEMORY(pmp->dwAlloc);
  1479. if (!pmp->psStr)
  1480. {
  1481. pmp->dwAlloc = 0;
  1482. dwError = ERROR_NOT_ENOUGH_MEMORY;
  1483. goto done;
  1484. }
  1485. PSTR pStr = pmp->psStr;
  1486. if (bNeedEscaping)
  1487. {
  1488. WCHAR wcPercent = bTreatPercentAsSafe ? L'%' : (WCHAR)0;
  1489. WCHAR wcQMark = bEscapeQuery ? (WCHAR)0 : L'?';
  1490. for (; *lpszObjectName && (*lpszObjectName != wcQMark); ++lpszObjectName)
  1491. {
  1492. UINT16 wc = *lpszObjectName;
  1493. if (wc <= 0x007f) // encode to one byte
  1494. {
  1495. if (!UnsafeInPathAndQueryFlags[wc] || wc == wcPercent)
  1496. *pStr++ = (CHAR)wc;
  1497. else
  1498. {
  1499. ADD_HEX_TO_STRING (pStr, wc)
  1500. }
  1501. }
  1502. else if (wc <= 0x07FF) //encode to two bytes
  1503. {
  1504. ADD_HEX_TO_STRING (pStr, 0xC0 | (wc >> 6))
  1505. ADD_HEX_TO_STRING (pStr, 0x80 | (wc & 0x3F))
  1506. }
  1507. else //encode to three bytes
  1508. {
  1509. ADD_HEX_TO_STRING (pStr, 0xe0 | (wc >> 12))
  1510. ADD_HEX_TO_STRING (pStr, 0x80 | ((wc >> 6) & 0x3F))
  1511. ADD_HEX_TO_STRING (pStr, 0x80 | (wc & 0x3F))
  1512. }
  1513. }
  1514. }
  1515. for (; *lpszObjectName; ++lpszObjectName)
  1516. {
  1517. UINT16 wc = *lpszObjectName;
  1518. if (wc <= 0x007f) // encode to one byte
  1519. {
  1520. *pStr++ = (CHAR)wc;
  1521. }
  1522. else if (wc <= 0x07FF) //encode to two bytes
  1523. {
  1524. *pStr++ = (CHAR)(0xC0 | (wc >> 6));
  1525. *pStr++ = (CHAR)(0x80 | (wc & 0x3F));
  1526. //*(WORD*)pStr = (WORD)0x80C0 | (wc >> 6) | ((wc & 0x3F) << 8);
  1527. //pStr += 2;
  1528. }
  1529. else //encode to three bytes
  1530. {
  1531. *pStr++ = (CHAR)(0xe0 | (wc >> 12));
  1532. *pStr++ = (CHAR)(0x80 | ((wc >> 6) & 0x3F));
  1533. *pStr++ = (CHAR)(0x80 | (wc & 0x3F));
  1534. //DWORD tmp = 0x8080e0 | (wc >> 12) | ((wc << 2) & 0x3f00) | (((DWORD)wc << 16) & 0x3f0000);
  1535. //*(DWORD*)pStr = tmp;
  1536. //pStr += 3;
  1537. }
  1538. }
  1539. *pStr = '\0';
  1540. pmp->dwSize = (DWORD)(pStr - pmp->psStr);
  1541. }
  1542. else
  1543. {
  1544. //last and final, so not to loose perf don't set dwCodePage to values other then CP_UTF8 :)
  1545. // convert with WideCharToMultiByte()
  1546. pmp->dwAlloc = WideCharToMultiByte(dwCodePage, 0, lpszObjectName, dwUnicodeUrlSize, NULL, 0, NULL, NULL);
  1547. if (!pmp->dwAlloc)
  1548. {
  1549. dwError = GetLastError();
  1550. goto done;
  1551. }
  1552. pmp->psStr = (LPSTR)ALLOCATE_FIXED_MEMORY(pmp->dwAlloc);
  1553. if (!pmp->psStr)
  1554. {
  1555. pmp->dwAlloc = 0;
  1556. dwError = ERROR_NOT_ENOUGH_MEMORY;
  1557. goto done;
  1558. }
  1559. //find out if query is present
  1560. PCHAR pchQMInConverted = NULL;
  1561. DWORD dwQuerySize;
  1562. if (bNeedEscaping)
  1563. {
  1564. WCHAR* pQM = wcschr(lpszObjectName, L'?');
  1565. if (pQM)
  1566. {
  1567. DWORD dwPathSize = 0;
  1568. if (pQM != lpszObjectName)
  1569. {
  1570. dwPathSize = WideCharToMultiByte(dwCodePage, 0, lpszObjectName, (DWORD)(pQM - lpszObjectName), pmp->psStr, pmp->dwAlloc, NULL, NULL);
  1571. if (!dwPathSize)
  1572. {
  1573. dwError = GetLastError();
  1574. goto done;
  1575. }
  1576. }
  1577. dwQuerySize = WideCharToMultiByte(dwCodePage, 0, pQM, dwUnicodeUrlSize - (DWORD)(pQM - lpszObjectName), pmp->psStr + dwPathSize, pmp->dwAlloc - dwPathSize, NULL, NULL);
  1578. if (!dwQuerySize)
  1579. {
  1580. dwError = GetLastError();
  1581. goto done;
  1582. }
  1583. --dwQuerySize;
  1584. pmp->dwSize = dwPathSize + dwQuerySize;
  1585. pchQMInConverted = pmp->psStr + dwPathSize;
  1586. }
  1587. }
  1588. if (!pchQMInConverted)
  1589. {
  1590. pmp->dwSize = WideCharToMultiByte(dwCodePage, 0, lpszObjectName, dwUnicodeUrlSize, pmp->psStr, pmp->dwAlloc, NULL, NULL);
  1591. if (!pmp->dwSize)
  1592. {
  1593. dwError = GetLastError();
  1594. goto done;
  1595. }
  1596. else
  1597. --(pmp->dwSize);
  1598. }
  1599. if (bNeedEscaping)
  1600. {
  1601. //collect information about code page
  1602. DWORD dwCharSize = 1;
  1603. if (dwCodePage != CP_UTF7)
  1604. {
  1605. CPINFO CPInfo;
  1606. if (!GetCPInfo(dwCodePage, &CPInfo))
  1607. {
  1608. dwError = GetLastError();
  1609. goto done;
  1610. }
  1611. dwCharSize = CPInfo.MaxCharSize;
  1612. }
  1613. UCHAR chPercent = bTreatPercentAsSafe ? '%' : (UCHAR)0;
  1614. if (dwCharSize == 1)
  1615. {
  1616. dwUnsafeChars = 0;
  1617. //calculate number of unsafe chars
  1618. PSTR pStop = pchQMInConverted ? pchQMInConverted : (pmp->psStr + pmp->dwSize);
  1619. PSTR pStr = pmp->psStr;
  1620. //this loop counts unsafe chars in path, count '?' as well
  1621. for(; pStr != pStop; ++pStr)
  1622. {
  1623. UCHAR ch = *pStr;
  1624. if ((ch > 0x7F) || (UnsafeInPathAndQueryFlags[ch] && (ch != chPercent)) || (ch == '?'))
  1625. ++dwUnsafeChars;
  1626. }
  1627. //this loop counts unsafe chars in query, do not count '?'
  1628. for(; *pStr; ++pStr)
  1629. {
  1630. UCHAR ch = *pStr;
  1631. if ((ch > 0x7F) || (UnsafeInPathAndQueryFlags[ch] && (ch != chPercent)))
  1632. ++dwUnsafeChars;
  1633. }
  1634. if (dwUnsafeChars == 0)
  1635. goto done;
  1636. //make new allocation
  1637. DWORD dwNewAlloc = pmp->dwAlloc + dwUnsafeChars*2;
  1638. LPSTR pDest, pNewStr;
  1639. pNewStr = pDest = (LPSTR)ALLOCATE_FIXED_MEMORY(dwNewAlloc);
  1640. if (!pDest)
  1641. {
  1642. dwError = ERROR_NOT_ENOUGH_MEMORY;
  1643. goto done;
  1644. }
  1645. //escaping
  1646. //escape path part
  1647. pStr = pmp->psStr;
  1648. for(; pStr != pStop; ++pStr)
  1649. {
  1650. UCHAR ch = *pStr;
  1651. if ((ch <= 0x7F) && ((!UnsafeInPathAndQueryFlags[ch] && (ch != '?')) || (ch == chPercent)))
  1652. *pDest++ = ch;
  1653. else
  1654. {
  1655. ADD_HEX_TO_STRING (pDest, ch)
  1656. }
  1657. }
  1658. //escape query part
  1659. for(; *pStr; ++pStr)
  1660. {
  1661. UCHAR ch = *pStr;
  1662. if ((ch <= 0x7F) && (!UnsafeInPathAndQueryFlags[ch] || (ch == chPercent)))
  1663. *pDest++ = ch;
  1664. else
  1665. {
  1666. ADD_HEX_TO_STRING (pDest, ch)
  1667. }
  1668. }
  1669. *pDest = '\0';
  1670. FREE_FIXED_MEMORY(pmp->psStr);
  1671. pmp->psStr = pNewStr;
  1672. pmp->dwSize = (DWORD)(pDest-pNewStr);
  1673. pmp->dwAlloc = dwNewAlloc;
  1674. }
  1675. else
  1676. {
  1677. //well, string is mbcs
  1678. dwUnsafeChars = 0;
  1679. //calculate number of unsafe chars
  1680. PSTR pStop = pchQMInConverted ? pchQMInConverted : (pmp->psStr + pmp->dwSize);
  1681. PSTR pStr = pmp->psStr;
  1682. //this loop counts unsafe chars in path, count '?' as well
  1683. while (pStr != pStop)
  1684. {
  1685. UCHAR ch = *pStr;
  1686. if (IsDBCSLeadByteEx(dwCodePage, ch))
  1687. {
  1688. //do not allow percent here
  1689. if ((ch > 0x7F) || UnsafeInPathAndQueryFlags[ch] || (ch == '?'))
  1690. ++dwUnsafeChars;
  1691. ++pStr;
  1692. ch = *pStr;
  1693. if ((ch > 0x7F) || UnsafeInPathAndQueryFlags[ch] || (ch == '?'))
  1694. ++dwUnsafeChars;
  1695. ++pStr;
  1696. }
  1697. else
  1698. {
  1699. if ((ch > 0x7F) || (UnsafeInPathAndQueryFlags[ch] && (ch != chPercent)) || (ch == '?'))
  1700. ++dwUnsafeChars;
  1701. ++pStr;
  1702. }
  1703. }
  1704. //this loop counts unsafe chars in query, do not count '?'
  1705. while(*pStr)
  1706. {
  1707. UCHAR ch = *pStr;
  1708. if (IsDBCSLeadByteEx(dwCodePage, ch))
  1709. {
  1710. //do not allow percent here
  1711. if ((ch > 0x7F) || UnsafeInPathAndQueryFlags[ch])
  1712. ++dwUnsafeChars;
  1713. ++pStr;
  1714. ch = *pStr;
  1715. if ((ch > 0x7F) || UnsafeInPathAndQueryFlags[ch])
  1716. ++dwUnsafeChars;
  1717. ++pStr;
  1718. }
  1719. else
  1720. {
  1721. if ((ch > 0x7F) || (UnsafeInPathAndQueryFlags[ch] && (ch != chPercent)))
  1722. ++dwUnsafeChars;
  1723. ++pStr;
  1724. }
  1725. }
  1726. if (dwUnsafeChars == 0)
  1727. goto done;
  1728. //make new allocation
  1729. DWORD dwNewAlloc = pmp->dwAlloc + dwUnsafeChars*2;
  1730. LPSTR pDest, pNewStr;
  1731. pNewStr = pDest = (LPSTR)ALLOCATE_FIXED_MEMORY(dwNewAlloc);
  1732. if (!pDest)
  1733. {
  1734. dwError = ERROR_NOT_ENOUGH_MEMORY;
  1735. goto done;
  1736. }
  1737. //escaping
  1738. //escape path part
  1739. pStr = pmp->psStr;
  1740. while (pStr != pStop)
  1741. {
  1742. UCHAR ch = *pStr;
  1743. if (IsDBCSLeadByteEx(dwCodePage, ch))
  1744. {
  1745. //do not allow percent here
  1746. if ((ch <= 0x7F) && !UnsafeInPathAndQueryFlags[ch] && (ch != '?'))
  1747. *pDest++ = ch;
  1748. else
  1749. {
  1750. ADD_HEX_TO_STRING (pDest, ch)
  1751. }
  1752. ++pStr;
  1753. ch = *pStr;
  1754. if ((ch <= 0x7F) && !UnsafeInPathAndQueryFlags[ch] && (ch != '?'))
  1755. *pDest++ = ch;
  1756. else
  1757. {
  1758. ADD_HEX_TO_STRING (pDest, ch)
  1759. }
  1760. ++pStr;
  1761. }
  1762. else
  1763. {
  1764. if ((ch <= 0x7F) && ((!UnsafeInPathAndQueryFlags[ch] && (ch != '?')) || (ch == chPercent)))
  1765. *pDest++ = ch;
  1766. else
  1767. {
  1768. ADD_HEX_TO_STRING (pDest, ch)
  1769. }
  1770. ++pStr;
  1771. }
  1772. }
  1773. //escape query part
  1774. while (*pStr)
  1775. {
  1776. UCHAR ch = *pStr;
  1777. if (IsDBCSLeadByteEx(dwCodePage, ch))
  1778. {
  1779. //do not allow percent here
  1780. if ((ch <= 0x7F) && !UnsafeInPathAndQueryFlags[ch])
  1781. *pDest++ = ch;
  1782. else
  1783. {
  1784. ADD_HEX_TO_STRING (pDest, ch)
  1785. }
  1786. ++pStr;
  1787. ch = *pStr;
  1788. if ((ch <= 0x7F) && !UnsafeInPathAndQueryFlags[ch])
  1789. *pDest++ = ch;
  1790. else
  1791. {
  1792. ADD_HEX_TO_STRING (pDest, ch)
  1793. }
  1794. ++pStr;
  1795. }
  1796. else
  1797. {
  1798. if ((ch <= 0x7F) && (!UnsafeInPathAndQueryFlags[ch] || (ch == chPercent)))
  1799. *pDest++ = ch;
  1800. else
  1801. {
  1802. ADD_HEX_TO_STRING (pDest, ch)
  1803. }
  1804. ++pStr;
  1805. }
  1806. }
  1807. *pDest = '\0';
  1808. FREE_FIXED_MEMORY(pmp->psStr);
  1809. pmp->psStr = pNewStr;
  1810. pmp->dwSize = (DWORD)(pDest-pNewStr);
  1811. pmp->dwAlloc = dwNewAlloc;
  1812. }
  1813. }
  1814. }
  1815. done:
  1816. if (pmp->psStr)
  1817. pmp->dwAlloc = (pmp->dwAlloc > MP_MAX_STACK_USE) ? pmp->dwAlloc : MP_MAX_STACK_USE+1;// to force FREE in ~MEMORYPACKET
  1818. return dwError;
  1819. }