Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1731 lines
47 KiB

  1. /*++
  2. Copyright (c) 1995 Microsoft Corporation
  3. Module Name:
  4. parseurl.cxx
  5. Abstract:
  6. Contains functions to parse the basic URLs - FTP, Gopher, HTTP.
  7. An URL parser simply acts as a macro: it must break out the protocol-specific
  8. information from the URL and initiate opening the identified resource: all
  9. this can be accomplished by calling the relevant Internet protocol APIs.
  10. Code in this module is based on RFC1738
  11. Contents:
  12. IsValidUrl
  13. DoesSchemeRequireSlashes
  14. ParseUrl
  15. CrackUrl
  16. EncodeUrlPath
  17. (HexCharToNumber)
  18. (NumberToHexChar)
  19. DecodeUrl
  20. DecodeUrlInSitu
  21. DecodeUrlStringInSitu
  22. GetUrlAddressInfo
  23. GetUrlAddress
  24. MapUrlSchemeName
  25. MapUrlScheme
  26. MapUrlSchemeToName
  27. Author:
  28. Richard L Firth (rfirth) 26-Apr-1995
  29. Environment:
  30. Win32(s) user-mode DLL
  31. Revision History:
  32. 26-Apr-1995
  33. Created
  34. --*/
  35. #include <wininetp.h>
  36. //
  37. // private manifests
  38. //
  39. #define RESERVED SAFE
  40. //
  41. // private macros
  42. //
  43. //#define HEX_CHAR_TO_NUMBER(ch) \
  44. // ((ch <= '9') \
  45. // ? (ch - '0') \
  46. // : ((ch >= 'a') \
  47. // ? ((ch - 'a') + 10) \
  48. // : ((ch - 'A') + 10)))
  49. #define NUMBER_TO_HEX_CHAR(n) \
  50. (((n) <= 9) ? ((char)(n) + '0') : (((char)(n) - 10) + 'A'))
  51. #define IS_UNSAFE_URL_CHARACTER(Char, Scheme) \
  52. (((UCHAR)(Char) <= 0x20) || ((UCHAR)(Char) >= 0x7f) \
  53. || (SafetyList[(Char) - 0x21] & (UNSAFE | Scheme)))
  54. #define IS_UNSAFE_URL_WIDECHARACTER(wChar, Scheme) \
  55. (((WCHAR)(wChar) <= 0x0020) || ((WCHAR)(wChar) >= 0x007f) \
  56. || (SafetyList[(wChar) - 0x0021] & (UNSAFE | Scheme)))
  57. //
  58. // private types
  59. //
  60. //
  61. // private prototypes
  62. //
  63. PRIVATE
  64. char
  65. HexCharToNumber(
  66. IN char ch
  67. );
  68. PRIVATE
  69. char
  70. NumberToHexChar(
  71. IN int Number
  72. );
  73. //
  74. // private data
  75. //
  76. //
  77. // SafetyList - the list of characters above 0x20 and below 0x7f that are
  78. // classified as safe, unsafe or scheme-specific. Safe characters do not need
  79. // to be escaped for any URL scheme. Unsafe characters must be escaped for all
  80. // URL schemes. Scheme-specific characters need only be escaped for the relevant
  81. // scheme(s)
  82. //
  83. const
  84. PRIVATE
  85. UCHAR
  86. SafetyList[] = {
  87. //
  88. // UNSAFE: 0x00..0x20
  89. //
  90. SAFE | HOSTNAME, // 0x21 (!)
  91. UNSAFE, // 0x22 (")
  92. UNSAFE, // 0x23 (#)
  93. SAFE | HOSTNAME, // 0x24 ($)
  94. UNSAFE, // 0x25 (%)
  95. RESERVED | HOSTNAME, // 0x26 (&)
  96. SAFE | HOSTNAME, // 0x27 (')
  97. SAFE | HOSTNAME, // 0x28 (()
  98. SAFE | HOSTNAME, // 0x29 ())
  99. SAFE | HOSTNAME, // 0x2A (*)
  100. SCHEME_GOPHER | HOSTNAME, // 0x2B (+)
  101. SAFE | HOSTNAME, // 0x2C (,)
  102. SAFE, // 0x2D (-)
  103. SAFE, // 0x2E (.)
  104. RESERVED | HOSTNAME, // 0x2F (/)
  105. SAFE, // 0x30 (0)
  106. SAFE, // 0x31 (1)
  107. SAFE, // 0x32 (2)
  108. SAFE, // 0x33 (3)
  109. SAFE, // 0x34 (4)
  110. SAFE, // 0x35 (5)
  111. SAFE, // 0x36 (6)
  112. SAFE, // 0x37 (7)
  113. SAFE, // 0x38 (8)
  114. SAFE, // 0x39 (9)
  115. RESERVED | HOSTNAME, // 0x3A (:)
  116. RESERVED | HOSTNAME, // 0x3B (;)
  117. UNSAFE, // 0x3C (<)
  118. RESERVED | HOSTNAME, // 0x3D (=)
  119. UNSAFE, // 0x3E (>)
  120. RESERVED | SCHEME_GOPHER | HOSTNAME, // 0x3F (?)
  121. RESERVED | HOSTNAME, // 0x40 (@)
  122. SAFE, // 0x41 (A)
  123. SAFE, // 0x42 (B)
  124. SAFE, // 0x43 (C)
  125. SAFE, // 0x44 (D)
  126. SAFE, // 0x45 (E)
  127. SAFE, // 0x46 (F)
  128. SAFE, // 0x47 (G)
  129. SAFE, // 0x48 (H)
  130. SAFE, // 0x49 (I)
  131. SAFE, // 0x4A (J)
  132. SAFE, // 0x4B (K)
  133. SAFE, // 0x4C (L)
  134. SAFE, // 0x4D (M)
  135. SAFE, // 0x4E (N)
  136. SAFE, // 0x4F (O)
  137. SAFE, // 0x50 (P)
  138. SAFE, // 0x51 (Q)
  139. SAFE, // 0x42 (R)
  140. SAFE, // 0x43 (S)
  141. SAFE, // 0x44 (T)
  142. SAFE, // 0x45 (U)
  143. SAFE, // 0x46 (V)
  144. SAFE, // 0x47 (W)
  145. SAFE, // 0x48 (X)
  146. SAFE, // 0x49 (Y)
  147. SAFE, // 0x5A (Z)
  148. UNSAFE, // 0x5B ([)
  149. UNSAFE, // 0x5C (\)
  150. UNSAFE, // 0x5D (])
  151. UNSAFE, // 0x5E (^)
  152. SAFE, // 0x5F (_)
  153. UNSAFE, // 0x60 (`)
  154. SAFE, // 0x61 (a)
  155. SAFE, // 0x62 (b)
  156. SAFE, // 0x63 (c)
  157. SAFE, // 0x64 (d)
  158. SAFE, // 0x65 (e)
  159. SAFE, // 0x66 (f)
  160. SAFE, // 0x67 (g)
  161. SAFE, // 0x68 (h)
  162. SAFE, // 0x69 (i)
  163. SAFE, // 0x6A (j)
  164. SAFE, // 0x6B (k)
  165. SAFE, // 0x6C (l)
  166. SAFE, // 0x6D (m)
  167. SAFE, // 0x6E (n)
  168. SAFE, // 0x6F (o)
  169. SAFE, // 0x70 (p)
  170. SAFE, // 0x71 (q)
  171. SAFE, // 0x72 (r)
  172. SAFE, // 0x73 (s)
  173. SAFE, // 0x74 (t)
  174. SAFE, // 0x75 (u)
  175. SAFE, // 0x76 (v)
  176. SAFE, // 0x77 (w)
  177. SAFE, // 0x78 (x)
  178. SAFE, // 0x79 (y)
  179. SAFE, // 0x7A (z)
  180. UNSAFE, // 0x7B ({)
  181. UNSAFE, // 0x7C (|)
  182. UNSAFE, // 0x7D (})
  183. UNSAFE // 0x7E (~)
  184. //
  185. // UNSAFE: 0x7F..0xFF
  186. //
  187. };
  188. //
  189. // UrlSchemeList - the list of schemes that we support
  190. //
  191. typedef struct {
  192. LPSTR SchemeName;
  193. DWORD SchemeLength;
  194. INTERNET_SCHEME SchemeType;
  195. DWORD SchemeFlags;
  196. BOOL NeedSlashes;
  197. DWORD OpenFlags;
  198. } URL_SCHEME_INFO;
  199. const
  200. PRIVATE
  201. URL_SCHEME_INFO
  202. UrlSchemeList[] = {
  203. NULL, 0, INTERNET_SCHEME_DEFAULT, 0, FALSE, 0,
  204. "http", 4, INTERNET_SCHEME_HTTP, SCHEME_HTTP, TRUE, 0,
  205. "https", 5, INTERNET_SCHEME_HTTPS, SCHEME_HTTP, TRUE, WINHTTP_FLAG_SECURE,
  206. };
  207. #define NUMBER_OF_URL_SCHEMES ARRAY_ELEMENTS(UrlSchemeList)
  208. BOOL ScanSchemes(LPTSTR pszToCheck, DWORD ccStr, PDWORD pwResult)
  209. {
  210. for (DWORD i=0; i<NUMBER_OF_URL_SCHEMES; i++)
  211. {
  212. if ((UrlSchemeList[i].SchemeLength == ccStr)
  213. && (strnicmp(UrlSchemeList[i].SchemeName, pszToCheck, ccStr)==0))
  214. {
  215. *pwResult = i;
  216. return TRUE;
  217. }
  218. }
  219. return FALSE;
  220. }
  221. //
  222. // functions
  223. //
  224. BOOL
  225. IsValidUrl(
  226. IN LPCSTR lpszUrl
  227. )
  228. /*++
  229. Routine Description:
  230. Determines whether an URL has a valid format
  231. Arguments:
  232. lpszUrl - pointer to URL to check.
  233. Assumes: 1. lpszUrl is non-NULL, non-empty string
  234. Return Value:
  235. BOOL
  236. --*/
  237. {
  238. INET_ASSERT(lpszUrl != NULL);
  239. INET_ASSERT(*lpszUrl != '\0');
  240. while (*lpszUrl != '\0') {
  241. if (IS_UNSAFE_URL_CHARACTER(*lpszUrl, SCHEME_ANY)) {
  242. return FALSE;
  243. }
  244. ++lpszUrl;
  245. }
  246. return TRUE;
  247. }
  248. BOOL
  249. DoesSchemeRequireSlashes(
  250. IN LPSTR lpszScheme,
  251. IN DWORD dwSchemeLength,
  252. IN BOOL bHasHostName
  253. )
  254. /*++
  255. Routine Description:
  256. Determines whether a protocol scheme requires slashes
  257. Arguments:
  258. lpszScheme - pointer to protocol scheme in question
  259. (does not include ':' or slashes, just scheme name)
  260. dwUrlLength - if not 0, string length of lpszScheme
  261. Return Value:
  262. BOOL
  263. --*/
  264. {
  265. DWORD i;
  266. //
  267. // if dwSchemeLength is 0 then lpszUrl is ASCIIZ. Find its length
  268. //
  269. if (dwSchemeLength == 0) {
  270. dwSchemeLength = strlen(lpszScheme);
  271. }
  272. if (ScanSchemes(lpszScheme, dwSchemeLength, &i))
  273. {
  274. return UrlSchemeList[i].NeedSlashes;
  275. }
  276. return bHasHostName;
  277. }
  278. DWORD
  279. CrackUrl(
  280. IN OUT LPSTR lpszUrl,
  281. IN DWORD dwUrlLength,
  282. IN BOOL bEscape,
  283. OUT LPINTERNET_SCHEME lpSchemeType OPTIONAL,
  284. OUT LPSTR* lpszSchemeName OPTIONAL,
  285. OUT LPDWORD lpdwSchemeNameLength OPTIONAL,
  286. OUT LPSTR* lpszHostName OPTIONAL,
  287. OUT LPDWORD lpdwHostNameLength OPTIONAL,
  288. OUT LPINTERNET_PORT lpServerPort OPTIONAL,
  289. OUT LPSTR* lpszUserName OPTIONAL,
  290. OUT LPDWORD lpdwUserNameLength OPTIONAL,
  291. OUT LPSTR* lpszPassword OPTIONAL,
  292. OUT LPDWORD lpdwPasswordLength OPTIONAL,
  293. OUT LPSTR* lpszUrlPath OPTIONAL,
  294. OUT LPDWORD lpdwUrlPathLength OPTIONAL,
  295. OUT LPSTR* lpszExtraInfo OPTIONAL,
  296. OUT LPDWORD lpdwExtraInfoLength OPTIONAL,
  297. OUT LPBOOL pHavePort
  298. )
  299. /*++
  300. Routine Description:
  301. Cracks an URL into its constituent parts
  302. Assumes: 1. If one of the optional lpsz fields is present (e.g. lpszUserName)
  303. then the accompanying lpdw field must also be supplied
  304. Arguments:
  305. lpszUrl - pointer to URL to crack. This buffer WILL BE
  306. OVERWRITTEN if it contains escape sequences that
  307. we will convert back to ANSI characters
  308. dwUrlLength - if not 0, string length of lpszUrl
  309. bEscape - TRUE if we are to escape the url-path
  310. lpSchemeType - returned scheme type - e.g. INTERNET_SCHEME_HTTP
  311. lpszSchemeName - returned scheme name
  312. lpdwSchemeNameLength - length of scheme name
  313. lpszHostName - returned host name
  314. lpdwHostNameLength - length of host name buffer
  315. lpServerPort - returned server port if present in the URL, else 0
  316. lpszUserName - returned user name if present
  317. lpdwUserNameLength - length of user name buffer
  318. lpszPassword - returned password if present
  319. lpdwPasswordLength - length of password buffer
  320. lpszUrlPath - returned, canonicalized URL path
  321. lpdwUrlPathLength - length of url-path buffer
  322. lpszExtraInfo - returned search string or intra-page link if present
  323. lpdwExtraInfoLength - length of extra info buffer
  324. pHavePort - returned boolean indicating whether port was specified
  325. Return Value:
  326. DWORD
  327. Success - ERROR_SUCCESS
  328. Failure - ERROR_WINHTTP_UNRECOGNIZED_SCHEME
  329. --*/
  330. {
  331. DWORD error;
  332. DWORD schemeLength;
  333. INTERNET_SCHEME schemeType;
  334. //
  335. // if dwUrlLength is 0 then lpszUrl is ASCIIZ. Find its length
  336. //
  337. if (dwUrlLength == 0) {
  338. dwUrlLength = strlen(lpszUrl);
  339. }
  340. //
  341. // get parser based on the protocol name
  342. //
  343. for (schemeLength = 0; lpszUrl[schemeLength] != ':'; ++schemeLength) {
  344. if ((dwUrlLength == 0) || (lpszUrl[schemeLength] == '\0')) {
  345. //
  346. // no ':' in URL? Bogus (dude)
  347. //
  348. error = ERROR_WINHTTP_UNRECOGNIZED_SCHEME;
  349. goto quit;
  350. }
  351. --dwUrlLength;
  352. }
  353. DWORD i;
  354. int skip;
  355. BOOL isGeneric;
  356. BOOL needSlashes;
  357. BOOL haveSlashes;
  358. isGeneric = FALSE;
  359. needSlashes = FALSE;
  360. haveSlashes = FALSE;
  361. schemeType = INTERNET_SCHEME_UNKNOWN;
  362. if (ScanSchemes(lpszUrl, schemeLength, &i))
  363. {
  364. schemeType = UrlSchemeList[i].SchemeType;
  365. needSlashes = UrlSchemeList[i].NeedSlashes;
  366. }
  367. else
  368. {
  369. error = ERROR_WINHTTP_UNRECOGNIZED_SCHEME;
  370. goto quit;
  371. }
  372. skip = 1; // skip ':'
  373. if ((dwUrlLength > 3) && (memcmp(&lpszUrl[schemeLength], "://", 3) == 0)) {
  374. skip = 3; // skip "://"
  375. haveSlashes = TRUE;
  376. }
  377. //
  378. // If we don't have slashes, make sure we don't need them.
  379. // If we have slashes, make sure they are required.
  380. //
  381. if ((!haveSlashes && !needSlashes) || (haveSlashes && needSlashes)) {
  382. if (ARGUMENT_PRESENT(lpSchemeType)) {
  383. *lpSchemeType = schemeType;
  384. }
  385. if (ARGUMENT_PRESENT(lpszSchemeName)) {
  386. *lpszSchemeName = lpszUrl;
  387. *lpdwSchemeNameLength = schemeLength;
  388. }
  389. lpszUrl += schemeLength + skip;
  390. dwUrlLength -= skip;
  391. if (isGeneric) {
  392. if (ARGUMENT_PRESENT(lpszUserName)) {
  393. *lpszUserName = NULL;
  394. *lpdwUserNameLength = 0;
  395. }
  396. if (ARGUMENT_PRESENT(lpszPassword)) {
  397. *lpszPassword = NULL;
  398. *lpdwPasswordLength = 0;
  399. }
  400. if (ARGUMENT_PRESENT(lpszHostName)) {
  401. *lpszHostName = NULL;
  402. *lpdwHostNameLength = 0;
  403. }
  404. if (ARGUMENT_PRESENT(lpServerPort)) {
  405. *lpServerPort = 0;
  406. }
  407. error = ERROR_SUCCESS;
  408. } else {
  409. error = GetUrlAddress(&lpszUrl,
  410. &dwUrlLength,
  411. lpszUserName,
  412. lpdwUserNameLength,
  413. lpszPassword,
  414. lpdwPasswordLength,
  415. lpszHostName,
  416. lpdwHostNameLength,
  417. lpServerPort,
  418. pHavePort
  419. );
  420. }
  421. if (bEscape && (error == ERROR_SUCCESS)) {
  422. error = DecodeUrlInSitu(lpszUrl, &dwUrlLength);
  423. }
  424. if ((error == ERROR_SUCCESS) && ARGUMENT_PRESENT(lpszExtraInfo)) {
  425. *lpdwExtraInfoLength = 0;
  426. for (i = 0; i < (int)dwUrlLength; i++) {
  427. if (lpszUrl[i] == '?' || lpszUrl[i] == '#') {
  428. *lpszExtraInfo = &lpszUrl[i];
  429. *lpdwExtraInfoLength = dwUrlLength - i;
  430. dwUrlLength -= *lpdwExtraInfoLength;
  431. }
  432. }
  433. }
  434. if ((error == ERROR_SUCCESS) && ARGUMENT_PRESENT(lpszUrlPath)) {
  435. *lpszUrlPath = lpszUrl;
  436. *lpdwUrlPathLength = dwUrlLength;
  437. }
  438. } else {
  439. error = ERROR_WINHTTP_UNRECOGNIZED_SCHEME;
  440. }
  441. quit:
  442. return error;
  443. }
  444. #define DEFAULT_REALLOC_SIZE 1024
  445. DWORD
  446. EncodeUrlPath(
  447. IN DWORD Flags,
  448. IN DWORD SchemeFlags,
  449. IN LPSTR UrlPath,
  450. IN DWORD UrlPathLength,
  451. OUT LPSTR* pEncodedUrlPath,
  452. IN OUT LPDWORD EncodedUrlPathLength
  453. )
  454. /*++
  455. Routine Description:
  456. Encodes an URL-path. That is, escapes the string. Creates a new URL-path in
  457. which all the 'unsafe' and reserved characters for this scheme have been
  458. converted to escape sequences
  459. Arguments:
  460. Flags - controlling expansion
  461. SchemeFlags - which scheme we are encoding for -
  462. SCHEME_HTTP, etc.
  463. UrlPath - pointer to the unescaped string
  464. UrlPathLength - length of Url
  465. EncodedUrlPath - pointer to buffer where encoded URL will be
  466. written
  467. EncodedUrlPathLength - IN: size of EncodedUrlPath
  468. OUT: number of bytes written to EncodedUrlPath
  469. Return Value:
  470. DWORD
  471. Success - ERROR_SUCCESS
  472. Failure - ERROR_INSUFFICIENT_BUFFER
  473. UrlPathLength not large enough to store encoded URL path
  474. --*/
  475. {
  476. DWORD error;
  477. DWORD len;
  478. len = *EncodedUrlPathLength;
  479. LPSTR EncodedUrlPath = *pEncodedUrlPath;
  480. UCHAR ch;
  481. while(ch = (UCHAR)*UrlPath++)
  482. {
  483. //
  484. // check whether this character is safe. For now, we encode all unsafe
  485. // and scheme-specific characters the same way (i.e. irrespective of
  486. // scheme)
  487. //
  488. // We are allowing '/' to be copied unmodified
  489. //
  490. if (len < 3)
  491. {
  492. LPSTR pStr = (LPSTR)REALLOCATE_MEMORY(*pEncodedUrlPath, *EncodedUrlPathLength+DEFAULT_REALLOC_SIZE, LMEM_MOVEABLE);
  493. if (pStr)
  494. {
  495. EncodedUrlPath = pStr+*EncodedUrlPathLength-len;
  496. *pEncodedUrlPath = pStr;
  497. len += DEFAULT_REALLOC_SIZE;
  498. *EncodedUrlPathLength += DEFAULT_REALLOC_SIZE;
  499. }
  500. else
  501. {
  502. goto error;
  503. }
  504. }
  505. if (IS_UNSAFE_URL_CHARACTER(ch, SchemeFlags)
  506. && !((ch == '/') && (Flags & NO_ENCODE_PATH_SEP)))
  507. {
  508. *EncodedUrlPath++ = '%';
  509. //*EncodedUrlPath++ = NumberToHexChar((int)ch / 16);
  510. *EncodedUrlPath++ = NUMBER_TO_HEX_CHAR((int)ch / 16);
  511. //*EncodedUrlPath++ = NumberToHexChar((int)ch % 16);
  512. *EncodedUrlPath++ = NUMBER_TO_HEX_CHAR((int)ch % 16);
  513. len -= 2; // extra --len below
  514. }
  515. else
  516. {
  517. *EncodedUrlPath++ = (signed char)ch;
  518. }
  519. --len;
  520. }
  521. *EncodedUrlPath = '\0';
  522. *EncodedUrlPathLength -= len;
  523. error = ERROR_SUCCESS;
  524. quit:
  525. return error;
  526. error:
  527. error = ERROR_INSUFFICIENT_BUFFER;
  528. goto quit;
  529. }
  530. PRIVATE
  531. char
  532. HexCharToNumber(
  533. IN char ch
  534. )
  535. /*++
  536. Routine Description:
  537. Converts an ANSI character in the range '0'..'9' 'A'..'F' 'a'..'f' to its
  538. corresponding hexadecimal value (0..f)
  539. Arguments:
  540. ch - character to convert
  541. Return Value:
  542. char
  543. hexadecimal value of ch, as an 8-bit (signed) character value
  544. --*/
  545. {
  546. return (ch <= '9') ? (ch - '0')
  547. : ((ch >= 'a') ? ((ch - 'a') + 10) : ((ch - 'A') + 10));
  548. }
  549. PRIVATE
  550. char
  551. NumberToHexChar(
  552. IN int Number
  553. )
  554. /*++
  555. Routine Description:
  556. Converts a number in the range 0..15 to its ASCII character hex representation
  557. ('0'..'F')
  558. Arguments:
  559. Number - to convert
  560. Return Value:
  561. char
  562. character in above range
  563. --*/
  564. {
  565. return (Number <= 9) ? (char)('0' + Number) : (char)('A' + (Number - 10));
  566. }
  567. DWORD
  568. DecodeUrl(
  569. IN LPSTR Url,
  570. IN DWORD UrlLength,
  571. OUT LPSTR DecodedString,
  572. IN OUT LPDWORD DecodedLength
  573. )
  574. /*++
  575. Routine Description:
  576. Converts an URL string with embedded escape sequences (%xx) to a counted
  577. string
  578. It is safe to pass the same pointer for the string to convert, and the
  579. buffer for the converted results: if the current character is not escaped,
  580. it just gets overwritten, else the input pointer is moved ahead 2 characters
  581. further than the output pointer, which is benign
  582. Arguments:
  583. Url - pointer to URL string to convert
  584. UrlLength - number of characters in UrlString
  585. DecodedString - pointer to buffer that receives converted string
  586. DecodedLength - IN: number of characters in buffer
  587. OUT: number of characters converted
  588. Return Value:
  589. DWORD
  590. Success - ERROR_SUCCESS
  591. Failure - ERROR_WINHTTP_INVALID_URL
  592. UrlString couldn't be converted
  593. ERROR_INSUFFICIENT_BUFFER
  594. ConvertedString isn't large enough to hold all the converted
  595. UrlString
  596. --*/
  597. {
  598. DWORD bufferRemaining;
  599. bufferRemaining = *DecodedLength;
  600. while (UrlLength && bufferRemaining) {
  601. char ch;
  602. if (*Url == '%') {
  603. //
  604. // BUGBUG - would %00 ever appear in an URL?
  605. //
  606. ++Url;
  607. if (isxdigit(*Url)) {
  608. ch = HexCharToNumber(*Url++) << 4;
  609. if (isxdigit(*Url)) {
  610. ch |= HexCharToNumber(*Url++);
  611. } else {
  612. return ERROR_WINHTTP_INVALID_URL;
  613. }
  614. } else {
  615. return ERROR_WINHTTP_INVALID_URL;
  616. }
  617. UrlLength -= 3;
  618. } else {
  619. ch = *Url++;
  620. --UrlLength;
  621. }
  622. *DecodedString++ = ch;
  623. --bufferRemaining;
  624. }
  625. if (UrlLength == 0) {
  626. *DecodedLength -= bufferRemaining;
  627. return ERROR_SUCCESS;
  628. } else {
  629. return ERROR_INSUFFICIENT_BUFFER;
  630. }
  631. }
  632. DWORD
  633. DecodeUrlInSitu(
  634. IN LPSTR BufferAddress,
  635. IN OUT LPDWORD BufferLength
  636. )
  637. /*++
  638. Routine Description:
  639. Decodes an URL string, if it contains escape sequences. The conversion is
  640. done in place, since we know that a string containing escapes is longer than
  641. the string with escape sequences (3 bytes) converted to characters (1 byte)
  642. Arguments:
  643. BufferAddress - pointer to the string to convert
  644. BufferLength - IN: number of characters to convert
  645. OUT: length of converted string
  646. Return Value:
  647. DWORD
  648. Success - ERROR_SUCCESS
  649. Failure - ERROR_WINHTTP_INVALID_URL
  650. ERROR_INSUFFICIENT_BUFFER
  651. --*/
  652. {
  653. DWORD stringLength;
  654. stringLength = *BufferLength;
  655. if (memchr(BufferAddress, '%', stringLength)) {
  656. return DecodeUrl(BufferAddress,
  657. stringLength,
  658. BufferAddress,
  659. BufferLength
  660. );
  661. } else {
  662. //
  663. // no escape character in the string, just return success
  664. //
  665. return ERROR_SUCCESS;
  666. }
  667. }
  668. DWORD
  669. DecodeUrlStringInSitu(
  670. IN LPSTR BufferAddress,
  671. IN OUT LPDWORD BufferLength
  672. )
  673. /*++
  674. Routine Description:
  675. Performs DecodeUrlInSitu() on a string and zero terminates it
  676. Assumes: 1. Even if no decoding is performed, *BufferLength is large enough
  677. to fit an extra '\0' character
  678. Arguments:
  679. BufferAddress - pointer to the string to convert
  680. BufferLength - IN: number of characters to convert
  681. OUT: length of converted string, excluding '\0'
  682. Return Value:
  683. DWORD
  684. Success - ERROR_SUCCESS
  685. Failure - ERROR_WINHTTP_INVALID_URL
  686. ERROR_INSUFFICIENT_BUFFER
  687. --*/
  688. {
  689. DWORD error;
  690. error = DecodeUrlInSitu(BufferAddress, BufferLength);
  691. if (error == ERROR_SUCCESS) {
  692. BufferAddress[*BufferLength] = '\0';
  693. }
  694. return error;
  695. }
  696. DWORD
  697. GetUrlAddressInfo(
  698. IN OUT LPSTR* Url,
  699. IN OUT LPDWORD UrlLength,
  700. OUT LPSTR* PartOne,
  701. OUT LPDWORD PartOneLength,
  702. OUT LPBOOL PartOneEscape,
  703. OUT LPSTR* PartTwo,
  704. OUT LPDWORD PartTwoLength,
  705. OUT LPBOOL PartTwoEscape
  706. )
  707. /*++
  708. Routine Description:
  709. Given a string of the form foo:bar, splits them into 2 counted strings about
  710. the ':' character. The address string may or may not contain a ':'.
  711. This function is intended to split into substrings the host:port and
  712. username:password strings commonly used in Internet address specifications
  713. and by association, in URLs
  714. Arguments:
  715. Url - pointer to pointer to string containing URL. On output
  716. this is advanced past the address parts
  717. UrlLength - pointer to length of URL in UrlString. On output this is
  718. reduced by the number of characters parsed
  719. PartOne - pointer which will receive first part of address string
  720. PartOneLength - pointer which will receive length of first part of address
  721. string
  722. PartOneEscape - TRUE on output if PartOne contains escape sequences
  723. PartTwo - pointer which will receive second part of address string
  724. PartTwoLength - pointer which will receive length of second part of address
  725. string
  726. PartOneEscape - TRUE on output if PartTwo contains escape sequences
  727. Return Value:
  728. DWORD
  729. Success - ERROR_SUCCESS
  730. Failure - ERROR_WINHTTP_INVALID_URL
  731. --*/
  732. {
  733. LPSTR pString;
  734. LPSTR pColon;
  735. DWORD partLength;
  736. LPBOOL partEscape;
  737. DWORD length;
  738. //
  739. // parse out <host>[:<port>] or <name>[:<password>] (i.e. <part1>[:<part2>]
  740. //
  741. pString = *Url;
  742. pColon = NULL;
  743. partLength = 0;
  744. *PartOne = pString;
  745. *PartOneLength = 0;
  746. *PartOneEscape = FALSE;
  747. *PartTwoEscape = FALSE;
  748. partEscape = PartOneEscape;
  749. length = *UrlLength;
  750. while ((*pString != '/') && (*pString != '\0') && (length != 0)) {
  751. if (*pString == '%') {
  752. //
  753. // if there is a % in the string then it *must* (RFC 1738) be the
  754. // start of an escape sequence. This function just reports the
  755. // address of the substrings and their lengths; calling functions
  756. // must handle the escape sequences (i.e. it is their responsibility
  757. // to decide where to put the results)
  758. //
  759. *partEscape = TRUE;
  760. }
  761. if (*pString == ':') {
  762. if (pColon != NULL) {
  763. //
  764. // we don't expect more than 1 ':'
  765. //
  766. return ERROR_WINHTTP_INVALID_URL;
  767. }
  768. pColon = pString;
  769. *PartOneLength = partLength;
  770. if (partLength == 0) {
  771. *PartOne = NULL;
  772. }
  773. partLength = 0;
  774. partEscape = PartTwoEscape;
  775. } else {
  776. ++partLength;
  777. }
  778. ++pString;
  779. --length;
  780. }
  781. //
  782. // we either ended on the host (or user) name or the port number (or
  783. // password), one of which we don't know the length of
  784. //
  785. if (pColon == NULL) {
  786. *PartOneLength = partLength;
  787. *PartTwo = NULL;
  788. *PartTwoLength = 0;
  789. *PartTwoEscape = FALSE;
  790. } else {
  791. *PartTwoLength = partLength;
  792. *PartTwo = pColon + 1;
  793. //
  794. // in both the <user>:<password> and <host>:<port> cases, we cannot have
  795. // the second part without the first, although both parts being zero
  796. // length is OK (host name will be sorted out elsewhere, but (for now,
  797. // at least) I am allowing <>:<> for username:password, since I don't
  798. // see it expressly disallowed in the RFC. I may be revisiting this code
  799. // later...)
  800. //
  801. // N.B.: ftp://ftp.microsoft.com uses http://:0/-http-gw-internal-/menu.gif
  802. // if ((*PartOneLength == 0) && (partLength != 0)) {
  803. // return ERROR_WINHTTP_INVALID_URL;
  804. // }
  805. }
  806. //
  807. // update the URL pointer and length remaining
  808. //
  809. *Url = pString;
  810. *UrlLength = length;
  811. return ERROR_SUCCESS;
  812. }
  813. DWORD
  814. GetUrlAddress(
  815. IN OUT LPSTR* lpszUrl,
  816. OUT LPDWORD lpdwUrlLength,
  817. OUT LPSTR* lpszUserName OPTIONAL,
  818. OUT LPDWORD lpdwUserNameLength OPTIONAL,
  819. OUT LPSTR* lpszPassword OPTIONAL,
  820. OUT LPDWORD lpdwPasswordLength OPTIONAL,
  821. OUT LPSTR* lpszHostName OPTIONAL,
  822. OUT LPDWORD lpdwHostNameLength OPTIONAL,
  823. OUT LPINTERNET_PORT lpPort OPTIONAL,
  824. OUT LPBOOL pHavePort
  825. )
  826. /*++
  827. Routine Description:
  828. This function extracts any and all parts of the address information for a
  829. generic URL. If any of the address parts contain escaped characters (%nn)
  830. then they are converted in situ
  831. The generic addressing format (RFC 1738) is:
  832. <user>:<password>@<host>:<port>
  833. The addressing information cannot contain a password without a user name,
  834. or a port without a host name
  835. NB: ftp://ftp.microsoft.com uses URL's that have a port without a host name!
  836. (e.g. http://:0/-http-gw-internal-/menu.gif)
  837. Although only the lpszUrl and lpdwUrlLength fields are required, the address
  838. parts will be checked for presence and completeness
  839. Assumes: 1. If one of the optional lpsz fields is present (e.g. lpszUserName)
  840. then the accompanying lpdw field must also be supplied
  841. Arguments:
  842. lpszUrl - IN: pointer to the URL to parse
  843. OUT: URL remaining after address information
  844. N.B. The url-path is NOT canonicalized (unescaped)
  845. because it may contain protocol-specific information
  846. which must be parsed out by the protocol-specific
  847. parser
  848. lpdwUrlLength - returned length of the remainder of the URL after the
  849. address information
  850. lpszUserName - returned pointer to the user name
  851. This parameter can be omitted by those protocol parsers
  852. that do not require or expect user names in the URL
  853. lpdwUserNameLength - returned length of the user name part
  854. This parameter can be omitted by those protocol parsers
  855. that do not require or expect user names in the URL
  856. lpszPassword - returned pointer to the password
  857. This parameter can be omitted by those protocol parsers
  858. that do not require or expect user passwords in the URL
  859. lpdwPasswordLength - returned length of the password
  860. This parameter can be omitted by those protocol parsers
  861. that do not require or expect user passwords in the URL
  862. lpszHostName - returned pointer to the host name
  863. This parameter can be omitted by those protocol parsers
  864. that do not require the host name info
  865. lpdwHostNameLength - returned length of the host name
  866. This parameter can be omitted by those protocol parsers
  867. that do not require the host name info
  868. lpPort - returned value of the port field
  869. This parameter can be omitted by those protocol parsers
  870. that do not require or expect user port number
  871. pHavePort - returned boolean indicating whether a port was specified
  872. in the URL or not. This value is not returned if the
  873. lpPort parameter is omitted.
  874. Return Value:
  875. DWORD
  876. Success - ERROR_SUCCESS
  877. Failure - ERROR_WINHTTP_INVALID_URL
  878. We could not parse some part of the address info, or we
  879. found address info where the protocol parser didn't expect
  880. any
  881. ERROR_INSUFFICIENT_BUFFER
  882. We could not convert an escaped string
  883. --*/
  884. {
  885. LPSTR pAt;
  886. DWORD urlLength;
  887. LPSTR pUrl;
  888. BOOL part1Escape;
  889. BOOL part2Escape;
  890. char portNumber[INTERNET_MAX_PORT_NUMBER_LENGTH + 1];
  891. DWORD portNumberLength;
  892. LPSTR pPortNumber;
  893. DWORD error;
  894. LPSTR hostName;
  895. DWORD hostNameLength;
  896. pUrl = *lpszUrl;
  897. urlLength = strlen(pUrl);
  898. //
  899. // check to see if there is an '@' separating user name & password. If we
  900. // see a '/' or get to the end of the string before we see the '@' then
  901. // there is no username:password part
  902. //
  903. pAt = NULL;
  904. for (DWORD i = 0; i < urlLength; ++i) {
  905. if (pUrl[i] == '/') {
  906. break;
  907. } else if (pUrl[i] == '@') {
  908. pAt = &pUrl[i];
  909. break;
  910. }
  911. }
  912. if (pAt != NULL) {
  913. DWORD addressPartLength;
  914. LPSTR userName;
  915. DWORD userNameLength;
  916. LPSTR password;
  917. DWORD passwordLength;
  918. addressPartLength = (DWORD) (pAt - pUrl);
  919. urlLength -= addressPartLength;
  920. error = GetUrlAddressInfo(&pUrl,
  921. &addressPartLength,
  922. &userName,
  923. &userNameLength,
  924. &part1Escape,
  925. &password,
  926. &passwordLength,
  927. &part2Escape
  928. );
  929. if (error != ERROR_SUCCESS) {
  930. return error;
  931. }
  932. //
  933. // ensure there is no address information unparsed before the '@'
  934. //
  935. INET_ASSERT(addressPartLength == 0);
  936. INET_ASSERT(pUrl == pAt);
  937. if (ARGUMENT_PRESENT(lpszUserName)) {
  938. INET_ASSERT(ARGUMENT_PRESENT(lpdwUserNameLength));
  939. //
  940. // convert the user name in situ
  941. //
  942. if (part1Escape) {
  943. INET_ASSERT(userName != NULL);
  944. INET_ASSERT(userNameLength != 0);
  945. error = DecodeUrlInSitu(userName, &userNameLength);
  946. if (error != ERROR_SUCCESS) {
  947. return error;
  948. }
  949. }
  950. *lpszUserName = userName;
  951. *lpdwUserNameLength = userNameLength;
  952. }
  953. if (ARGUMENT_PRESENT(lpszPassword)) {
  954. //
  955. // convert the password in situ
  956. //
  957. if (part2Escape) {
  958. INET_ASSERT(userName != NULL);
  959. INET_ASSERT(userNameLength != 0);
  960. INET_ASSERT(password != NULL);
  961. INET_ASSERT(passwordLength != 0);
  962. error = DecodeUrlInSitu(password, &passwordLength);
  963. if (error != ERROR_SUCCESS) {
  964. return error;
  965. }
  966. }
  967. *lpszPassword = password;
  968. *lpdwPasswordLength = passwordLength;
  969. }
  970. //
  971. // the URL pointer now points at the host:port fields (remember that
  972. // ExtractAddressParts() must have bumped pUrl up to the end of the
  973. // password field (if present) which ends at pAt)
  974. //
  975. ++pUrl;
  976. //
  977. // similarly, bump urlLength to account for the '@'
  978. //
  979. --urlLength;
  980. } else {
  981. //
  982. // no '@' therefore no username or password
  983. //
  984. if (ARGUMENT_PRESENT(lpszUserName)) {
  985. INET_ASSERT(ARGUMENT_PRESENT(lpdwUserNameLength));
  986. *lpszUserName = NULL;
  987. *lpdwUserNameLength = 0;
  988. }
  989. if (ARGUMENT_PRESENT(lpszPassword)) {
  990. INET_ASSERT(ARGUMENT_PRESENT(lpdwPasswordLength));
  991. *lpszPassword = NULL;
  992. *lpdwPasswordLength = 0;
  993. }
  994. }
  995. //
  996. // now get the host name and the optional port
  997. //
  998. pPortNumber = portNumber;
  999. portNumberLength = sizeof(portNumber);
  1000. error = GetUrlAddressInfo(&pUrl,
  1001. &urlLength,
  1002. &hostName,
  1003. &hostNameLength,
  1004. &part1Escape,
  1005. &pPortNumber,
  1006. &portNumberLength,
  1007. &part2Escape
  1008. );
  1009. if (error != ERROR_SUCCESS) {
  1010. return error;
  1011. }
  1012. //
  1013. // the URL address information MUST contain the host name
  1014. //
  1015. // if ((hostName == NULL) || (hostNameLength == 0)) {
  1016. // return ERROR_WINHTTP_INVALID_URL;
  1017. // }
  1018. if (ARGUMENT_PRESENT(lpszHostName)) {
  1019. INET_ASSERT(ARGUMENT_PRESENT(lpdwHostNameLength));
  1020. //
  1021. // if the host name contains escaped characters, convert them in situ
  1022. //
  1023. if (part1Escape) {
  1024. error = DecodeUrlInSitu(hostName, &hostNameLength);
  1025. if (error != ERROR_SUCCESS) {
  1026. return error;
  1027. }
  1028. }
  1029. *lpszHostName = hostName;
  1030. *lpdwHostNameLength = hostNameLength;
  1031. }
  1032. //
  1033. // if there is a port field, convert it if there are escaped characters,
  1034. // check it for valid numeric characters, and convert it to a number
  1035. //
  1036. if (ARGUMENT_PRESENT(lpPort)) {
  1037. if (portNumberLength != 0) {
  1038. DWORD i;
  1039. DWORD port;
  1040. INET_ASSERT(pPortNumber != NULL);
  1041. if (part2Escape) {
  1042. error = DecodeUrlInSitu(pPortNumber, &portNumberLength);
  1043. if (error != ERROR_SUCCESS) {
  1044. return error;
  1045. }
  1046. }
  1047. //
  1048. // ensure all characters in the port number buffer are numeric, and
  1049. // calculate the port number at the same time
  1050. //
  1051. for (i = 0, port = 0; i < portNumberLength; ++i) {
  1052. if (!isdigit(*pPortNumber)) {
  1053. return ERROR_WINHTTP_INVALID_URL;
  1054. }
  1055. port = port * 10 + (int)(*pPortNumber++ - '0');
  1056. // We won't allow ports larger than 65535 ((2^16)-1)
  1057. // We have to check this every time to make sure that someone
  1058. // doesn't try to overflow a DWORD.
  1059. if (port > 65535)
  1060. {
  1061. return ERROR_WINHTTP_INVALID_URL;
  1062. }
  1063. }
  1064. *lpPort = (INTERNET_PORT)port;
  1065. if (ARGUMENT_PRESENT(pHavePort)) {
  1066. *pHavePort = TRUE;
  1067. }
  1068. } else {
  1069. *lpPort = INTERNET_INVALID_PORT_NUMBER;
  1070. if (ARGUMENT_PRESENT(pHavePort)) {
  1071. *pHavePort = FALSE;
  1072. }
  1073. }
  1074. }
  1075. //
  1076. // update the URL pointer and the length of the url-path
  1077. //
  1078. *lpszUrl = pUrl;
  1079. *lpdwUrlLength = urlLength;
  1080. return ERROR_SUCCESS;
  1081. }
  1082. INTERNET_SCHEME
  1083. MapUrlSchemeName(
  1084. IN LPSTR lpszSchemeName,
  1085. IN DWORD dwSchemeNameLength
  1086. )
  1087. /*++
  1088. Routine Description:
  1089. Maps a scheme name/length to a scheme name type
  1090. Arguments:
  1091. lpszSchemeName - pointer to name of scheme to map
  1092. dwSchemeNameLength - length of scheme (if -1, lpszSchemeName is ASCIZ)
  1093. Return Value:
  1094. INTERNET_SCHEME
  1095. --*/
  1096. {
  1097. if (dwSchemeNameLength == (DWORD)-1) {
  1098. dwSchemeNameLength = (DWORD)lstrlen(lpszSchemeName);
  1099. }
  1100. DWORD i;
  1101. if (ScanSchemes(lpszSchemeName, dwSchemeNameLength, &i))
  1102. {
  1103. return UrlSchemeList[i].SchemeType;
  1104. }
  1105. return INTERNET_SCHEME_UNKNOWN;
  1106. }
  1107. LPSTR
  1108. MapUrlScheme(
  1109. IN INTERNET_SCHEME Scheme,
  1110. OUT LPDWORD lpdwSchemeNameLength
  1111. )
  1112. /*++
  1113. Routine Description:
  1114. Maps the enumerated scheme name type to the name
  1115. Arguments:
  1116. Scheme - enumerated scheme type to map
  1117. lpdwSchemeNameLength - pointer to returned length of scheme name
  1118. Return Value:
  1119. LPSTR - pointer to scheme name or NULL
  1120. --*/
  1121. {
  1122. if ((Scheme >= INTERNET_SCHEME_FIRST)
  1123. && (Scheme <= INTERNET_SCHEME_LAST))
  1124. {
  1125. *lpdwSchemeNameLength = UrlSchemeList[Scheme].SchemeLength;
  1126. return UrlSchemeList[Scheme].SchemeName;
  1127. }
  1128. *lpdwSchemeNameLength = 0;
  1129. return NULL;
  1130. }
  1131. LPSTR
  1132. MapUrlSchemeToName(
  1133. IN INTERNET_SCHEME Scheme
  1134. )
  1135. /*++
  1136. Routine Description:
  1137. Maps the enumerated scheme name type to the name
  1138. Arguments:
  1139. Scheme - enumerated scheme type to map
  1140. Return Value:
  1141. LPSTR - pointer to scheme name or NULL
  1142. --*/
  1143. {
  1144. if ((Scheme >= INTERNET_SCHEME_FIRST)
  1145. && (Scheme <= INTERNET_SCHEME_LAST)) {
  1146. return UrlSchemeList[Scheme].SchemeName;
  1147. }
  1148. return NULL;
  1149. }
  1150. /*
  1151. * ConvertUnicodeToMultiByte:
  1152. *
  1153. * dwFlags: WINHTTP_FLAG_NULL_CODEPAGE-> assumes correctly encoded string packaged into UTF8, no escaping done.
  1154. WINHTTP_FLAG_VALID_HOSTNAME-> only for server name
  1155. only the previous flag valid for server name passed in here.
  1156. if both of these are not specified, then
  1157. if dwCodePage is not INVALID, it'll be used to convert unicode string to ANSI.
  1158. else UTF8 will be used.
  1159. if ESCAPE && ESCAPE_PERCENT is specified, the ANSI url will be escaped (incl. %) else it will be escaped w/o
  1160. escaping %s.
  1161. */
  1162. DWORD
  1163. ConvertUnicodeToMultiByte(
  1164. LPCWSTR lpszObjectName,
  1165. DWORD dwCodePage,
  1166. MEMORYPACKET* pmp,
  1167. DWORD dwFlags)
  1168. {
  1169. DWORD dwError = ERROR_SUCCESS;
  1170. LPSTR pStr;
  1171. WCHAR wc;
  1172. LPCWSTR pwStr;
  1173. BOOL bStrip0s = TRUE;
  1174. DWORD dwUnicodeUrlSize;
  1175. //determine size of string and/or safe characters
  1176. if ((dwFlags & WINHTTP_FLAG_NULL_CODEPAGE) ||
  1177. (dwFlags & WINHTTP_FLAG_VALID_HOSTNAME))
  1178. {
  1179. if (dwFlags & WINHTTP_FLAG_VALID_HOSTNAME)
  1180. {
  1181. for (pwStr = lpszObjectName; wc = *pwStr; ++pwStr)
  1182. {
  1183. if (IS_UNSAFE_URL_WIDECHARACTER(wc, HOSTNAME))
  1184. {
  1185. dwError = ERROR_WINHTTP_INVALID_URL;
  1186. goto done;
  1187. }
  1188. }
  1189. pmp->dwAlloc = dwUnicodeUrlSize = (DWORD)(pwStr-lpszObjectName+1);
  1190. }
  1191. else
  1192. {
  1193. pmp->dwAlloc = dwUnicodeUrlSize = lstrlenW(lpszObjectName)+1;
  1194. }
  1195. }
  1196. else
  1197. {
  1198. DWORD dwUnsafeChars = 0;
  1199. // optimization to check for unsafe characters, and optimize the common case.
  1200. // calculate the length, and while parsing the string, check if there are unsafeChars
  1201. for(pwStr = lpszObjectName; wc = *pwStr; ++pwStr)
  1202. {
  1203. if (IS_UNSAFE_URL_WIDECHARACTER(wc, 0))
  1204. ++dwUnsafeChars;
  1205. }
  1206. dwUnicodeUrlSize = (DWORD)(pwStr-lpszObjectName+1);
  1207. if (dwUnsafeChars == 0)
  1208. {
  1209. pmp->dwAlloc = dwUnicodeUrlSize;
  1210. }
  1211. else
  1212. {
  1213. bStrip0s = FALSE;
  1214. }
  1215. }
  1216. //convert to MBCS
  1217. if (bStrip0s)
  1218. {
  1219. INET_ASSERT(pmp->dwAlloc);
  1220. pmp->psStr = (LPSTR)ALLOCATE_FIXED_MEMORY(pmp->dwAlloc);
  1221. if (!pmp->psStr)
  1222. {
  1223. pmp->dwAlloc = 0;
  1224. dwError = ERROR_NOT_ENOUGH_MEMORY;
  1225. goto done;
  1226. }
  1227. pmp->dwSize = pmp->dwAlloc-1;
  1228. for (pStr = pmp->psStr; wc = *lpszObjectName; ++lpszObjectName)
  1229. {
  1230. *(pStr)++ = (CHAR)wc;
  1231. }
  1232. *pStr = '\0';
  1233. }
  1234. else
  1235. {
  1236. // convert with WideCharToMultiByte()
  1237. pmp->dwAlloc = WideCharToMultiByte(dwCodePage, 0, lpszObjectName, dwUnicodeUrlSize, NULL, 0, NULL, NULL);
  1238. if (pmp->dwAlloc)
  1239. {
  1240. pmp->psStr = (LPSTR)ALLOCATE_FIXED_MEMORY(pmp->dwAlloc);
  1241. if (!pmp->psStr)
  1242. {
  1243. pmp->dwAlloc = 0;
  1244. dwError = ERROR_NOT_ENOUGH_MEMORY;
  1245. goto done;
  1246. }
  1247. pmp->dwSize = WideCharToMultiByte(dwCodePage, 0, lpszObjectName, dwUnicodeUrlSize, pmp->psStr, pmp->dwAlloc, NULL, NULL);
  1248. if (!pmp->dwSize)
  1249. {
  1250. dwError = GetLastError();
  1251. goto done;
  1252. }
  1253. else
  1254. pmp->dwSize -= 1;
  1255. }
  1256. else
  1257. {
  1258. dwError = GetLastError();
  1259. goto done;
  1260. }
  1261. }
  1262. //escaping
  1263. if (dwFlags & WINHTTP_FLAG_DEFAULT_ESCAPE)
  1264. {
  1265. INET_ASSERT (! (dwFlags & WINHTTP_FLAG_VALID_HOSTNAME));
  1266. static CHAR* hexArray = "0123456789ABCDEF";
  1267. UCHAR ch;
  1268. DWORD dwUnsafeChars = 0;
  1269. DWORD dwNewAlloc;
  1270. LPSTR pDest, pNewStr;
  1271. for(pStr = pmp->psStr; ch = *pStr; pStr = CharNextExA((WORD)dwCodePage, pStr, 0))
  1272. {
  1273. if (IS_UNSAFE_URL_CHARACTER(ch, SCHEME_HTTP))
  1274. ++dwUnsafeChars;
  1275. else if(ch == '?')
  1276. break;
  1277. }
  1278. if (dwUnsafeChars == 0)
  1279. goto done;
  1280. dwNewAlloc = pmp->dwAlloc + dwUnsafeChars*2;
  1281. pNewStr = pDest = (LPSTR)ALLOCATE_FIXED_MEMORY(dwNewAlloc);
  1282. if (!pDest)
  1283. {
  1284. dwError = ERROR_NOT_ENOUGH_MEMORY;
  1285. goto done;
  1286. }
  1287. BOOL bEscapePercent = (dwFlags & WINHTTP_FLAG_ESCAPE_PERCENT) ? TRUE : FALSE;
  1288. BOOL bHitQuery = FALSE;
  1289. LPSTR pNext;
  1290. BOOL bLead;
  1291. for (pStr = pmp->psStr; ch = *pStr;)
  1292. {
  1293. pNext = CharNextExA((WORD)dwCodePage, pStr, 0);
  1294. bLead = TRUE;
  1295. do
  1296. {
  1297. ch = *pStr;
  1298. if (IS_UNSAFE_URL_CHARACTER(ch, SCHEME_HTTP)
  1299. && (!bLead || (ch != '%') || bEscapePercent) )
  1300. {
  1301. *pDest++ = '%';
  1302. *pDest++ = hexArray[ch>>4];
  1303. *pDest++ = hexArray[ch & 0x0f];
  1304. }
  1305. else
  1306. {
  1307. *pDest++ = ch;
  1308. if ((ch == '?') && bLead)
  1309. {
  1310. bHitQuery = TRUE;
  1311. ++pStr;
  1312. INET_ASSERT(pStr == pNext);
  1313. break;
  1314. }
  1315. }
  1316. bLead = FALSE;
  1317. }
  1318. while (++pStr != pNext);
  1319. if (bHitQuery)
  1320. break;
  1321. }
  1322. if (bHitQuery)
  1323. {
  1324. for ( ; ch = *pStr; pStr++)
  1325. {
  1326. *pDest++ = ch;
  1327. }
  1328. }
  1329. *pDest = '\0';
  1330. FREE_FIXED_MEMORY(pmp->psStr);
  1331. pmp->psStr = pNewStr;
  1332. pmp->dwSize = (DWORD)(pDest-pNewStr);
  1333. pmp->dwAlloc = dwNewAlloc;
  1334. }
  1335. done:
  1336. if (pmp->psStr)
  1337. pmp->dwAlloc = (pmp->dwAlloc > MP_MAX_STACK_USE) ? pmp->dwAlloc : MP_MAX_STACK_USE+1;// to force FREE in ~MEMORYPACKET
  1338. return dwError;
  1339. }