Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

8887 lines
231 KiB

  1. /*++
  2. Copyright (c) 1994 Microsoft Corporation
  3. Module Name:
  4. urlpars.cpp
  5. Abstract:
  6. Contains all the worker routines for Combine and Canonicalize
  7. Contents:
  8. (ConvertChar)
  9. Author:
  10. Zeke Lucas (zekel) 16-Dez-96
  11. Ahsan Kabir (akabir): UrlCombine parser rewritten in July-Sept98
  12. Environment:
  13. Win32(s) user-mode DLL
  14. Revision History:
  15. there is about one percent of this derived
  16. from the Spyglass or MSHTML/WININET codebase
  17. --*/
  18. #include "priv.h"
  19. #include <shstr.h>
  20. #ifdef UNIX
  21. #include <shlobj.h>
  22. #endif
  23. #include <intshcut.h>
  24. #ifdef _X86_
  25. #include <w95wraps.h>
  26. #endif
  27. #include <shlwapip.h>
  28. #ifdef UNIX
  29. #include "unixstuff.h"
  30. #endif
  31. #include <wininet.h>
  32. #define DM_PERF 0 // perf stats
  33. #define PF_LOGSCHEMEHITS 0x00000001
  34. #ifndef CPP_FUNCTIONS
  35. #define CPP_FUNCTIONS
  36. #include <crtfree.h>
  37. #endif
  38. #define USE_FAST_PARSER
  39. #ifdef DEBUG
  40. //#define PROOFREAD_PARSES
  41. #endif
  42. // Same as in wininet; however, this is only theoretical, since urls aren't necessarily so
  43. // constrained. However, this is true throughout the product, so we'll have to do this.
  44. #define INTERNET_MAX_PATH_LENGTH 2048
  45. #define INTERNET_MAX_SCHEME_LENGTH 32
  46. #define HEX_ESCAPE L'%'
  47. #define HEX_ESCAPE_A '%'
  48. #define TERMSTR(pch) *(pch) = L'\0'
  49. // (WCHAR) 8 is backspace
  50. #define DEADSEGCHAR ((WCHAR) 8)
  51. #define KILLSEG(pch) *(pch) = DEADSEGCHAR
  52. #define CR L'\r'
  53. #define LF L'\n'
  54. #define TAB L'\t'
  55. #define SPC L' '
  56. #define SLASH L'/'
  57. #define WHACK L'\\'
  58. #define QUERY L'?'
  59. #define POUND L'#'
  60. #define SEMICOLON L';'
  61. #define COLON L':'
  62. #define BAR L'|'
  63. #define DOT L'.'
  64. #define AT L'@'
  65. #define UPF_SCHEME_OPAQUE 0x00000001 // should not be treated as heriarchical
  66. #define UPF_SCHEME_INTERNET 0x00000002
  67. #define UPF_SCHEME_NOHISTORY 0x00000004
  68. #define UPF_SCHEME_CONVERT 0x00000008 // treat slashes and whacks as equiv
  69. #define UPF_SCHEME_DONTCORRECT 0x00000010 // Don't try to autocorrect to this scheme
  70. #define UPF_SEG_ABSOLUTE 0x00000100 // the initial segment is the root
  71. #define UPF_SEG_LOCKFIRST 0x00000200 // this is for file parsing
  72. #define UPF_SEG_EMPTYSEG 0x00000400 // this was an empty string, but is still important
  73. #define UPF_EXSEG_DIRECTORY 0x00001000 // the final segment is a "directory" (trailing slash)
  74. #define UPF_FILEISPATHURL 0x10000000 // this is for file paths, dont unescape because they are actually dos paths
  75. //
  76. // the masks are for inheritance purposes during BlendParts
  77. // if you inherit that part you inherit that mask
  78. //
  79. #define UPF_SCHEME_MASK 0x000000FF
  80. #define UPF_SEG_MASK 0x00000F00
  81. #define UPF_EXSEG_MASK 0x0000F000
  82. // right now these masks are unused, and can be recycled
  83. #define UPF_SERVER_MASK 0x000F0000
  84. #define UPF_QUERY_MASK 0x0F000000
  85. extern "C" int _StrCmpNA(LPCSTR lpStr1, LPCSTR lpStr2, int nChar, BOOL fMBCS);
  86. extern "C" LPSTR _StrChrA(LPCSTR lpStart, WORD wMatch, BOOL fMBCS);
  87. typedef struct _UrlParts
  88. {
  89. DWORD dwFlags;
  90. LPWSTR pszScheme;
  91. URL_SCHEME eScheme;
  92. LPWSTR pszServer;
  93. LPWSTR pszSegments;
  94. DWORD cSegments;
  95. LPWSTR pszExtraSegs;
  96. DWORD cExtraSegs;
  97. LPWSTR pszQuery;
  98. LPWSTR pszFragment;
  99. } URLPARTS, *PURLPARTS;
  100. HRESULT SHUrlParse(LPCWSTR pszBase, LPCWSTR pszUrl, PSHSTRW pstrOut, DWORD dwFlags);
  101. HRESULT SHUrlCreateFromPath(LPCWSTR pszPath, PSHSTRW pstrOut, DWORD dwFlags);
  102. // Ansi wrappers might overwrite the unicode core's return value
  103. // We should try to prevent that
  104. HRESULT ReconcileHresults(HRESULT hr1, HRESULT hr2)
  105. {
  106. return (hr2==S_OK) ? hr1 : hr2;
  107. }
  108. PRIVATE CONST WORD isSafe[96] =
  109. /* Bit 0 alphadigit -- 'a' to 'z', '0' to '9', 'A' to 'Z'
  110. ** Bit 1 Hex -- '0' to '9', 'a' to 'f', 'A' to 'F'
  111. ** Bit 2 valid scheme -- alphadigit | "-" | "." | "+"
  112. ** Bit 3 mark -- "%" | "$"| "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" | ","
  113. */
  114. /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
  115. // {0, 8, 0, 0, 8, 8, 0, 8, 8, 8, 8, 4, 8,12,12, 0, /* 2x !"#$%&'()*+,-./ */
  116. // IE4 BETA1: allow + through unmolested. Should consider other options
  117. // post beta1. 12feb97 tonyci
  118. {0, 8, 0, 0, 8, 8, 0, 8, 8, 8, 8, 12, 8,12,12, 0, /* 2x !"#$%&'()*+,-./ */
  119. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 8, 8, 0, 8, 0, 0, /* 3x 0123456789:;<=>? */
  120. 8, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x @ABCDEFGHIJKLMNO */
  121. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 8, /* 5X PQRSTUVWXYZ[\]^_ */
  122. 0, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x `abcdefghijklmno */
  123. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 8, 0}; /* 7X pqrstuvwxyz{|}~ DEL */
  124. PRIVATE const WCHAR hex[] = L"0123456789ABCDEF";
  125. PRIVATE inline BOOL IsSafe(WCHAR ch, WORD mask)
  126. {
  127. if(((ch > 31 ) && (ch < 128) && (isSafe[ch - 32] & mask)))
  128. return TRUE;
  129. return FALSE;
  130. }
  131. #define IsAlphaDigit(c) IsSafe(c, 1)
  132. #define IsHex(c) IsSafe(c, 2)
  133. #define IsValidSchemeCharA(c) IsSafe(c, 5)
  134. #define IsSafePathChar(c) ((c > 0xff) || IsSafe(c, 9))
  135. #define IsUpper(c) ((c) >= 'A' && (c) <= 'Z')
  136. PRIVATE inline BOOL IsAsciiCharW(WCHAR ch)
  137. {
  138. return (!(ch >> 8) && ((CHAR) ch));
  139. }
  140. PRIVATE inline WCHAR Ascii_ToLowerW(WCHAR ch)
  141. {
  142. return (ch >= L'A' && ch <= L'Z') ? (ch - L'A' + L'a') : ch;
  143. }
  144. BOOL IsValidSchemeCharW(WCHAR ch)
  145. {
  146. if(IsAsciiCharW(ch))
  147. return IsSafe( (CHAR) ch, 5);
  148. return FALSE;
  149. }
  150. WCHAR const c_szHttpScheme[] = L"http";
  151. WCHAR const c_szFileScheme[] = L"file";
  152. WCHAR const c_szFTPScheme[] = L"ftp";
  153. WCHAR const c_szGopherScheme[] = L"gopher";
  154. WCHAR const c_szMailToScheme[] = L"mailto";
  155. WCHAR const c_szNewsScheme[] = L"news";
  156. WCHAR const c_szNNTPScheme[] = L"nntp";
  157. WCHAR const c_szTelnetScheme[] = L"telnet";
  158. WCHAR const c_szWAISScheme[] = L"wais";
  159. WCHAR const c_szMkScheme[] = L"mk";
  160. WCHAR const c_szHttpsScheme[] = L"https";
  161. WCHAR const c_szLocalScheme[] = L"local";
  162. WCHAR const c_szShellScheme[] = L"shell";
  163. WCHAR const c_szJSScheme[] = L"javascript";
  164. WCHAR const c_szVSScheme[] = L"vbscript";
  165. WCHAR const c_szAboutScheme[] = L"about";
  166. WCHAR const c_szSnewsScheme[] = L"snews";
  167. WCHAR const c_szResScheme[] = L"res";
  168. WCHAR const c_szRootedScheme[] = L"ms-shell-rooted";
  169. WCHAR const c_szIDListScheme[] = L"ms-shell-idlist";
  170. WCHAR const c_szMsHelpScheme[] = L"hcp";
  171. const struct
  172. {
  173. LPCWSTR pszScheme;
  174. URL_SCHEME eScheme;
  175. DWORD cchScheme;
  176. DWORD dwFlags;
  177. } g_mpUrlSchemeTypes[] =
  178. {
  179. // Because we use a linear search, sort this in the order of
  180. // most common usage.
  181. { c_szHttpScheme, URL_SCHEME_HTTP, SIZECHARS(c_szHttpScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
  182. { c_szFileScheme, URL_SCHEME_FILE, SIZECHARS(c_szFileScheme) - 1, UPF_SCHEME_CONVERT},
  183. { c_szFTPScheme, URL_SCHEME_FTP, SIZECHARS(c_szFTPScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
  184. { c_szHttpsScheme, URL_SCHEME_HTTPS, SIZECHARS(c_szHttpsScheme) -1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT|UPF_SCHEME_DONTCORRECT},
  185. { c_szNewsScheme, URL_SCHEME_NEWS, SIZECHARS(c_szNewsScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
  186. { c_szMailToScheme, URL_SCHEME_MAILTO, SIZECHARS(c_szMailToScheme) - 1, UPF_SCHEME_OPAQUE},
  187. { c_szGopherScheme, URL_SCHEME_GOPHER, SIZECHARS(c_szGopherScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
  188. { c_szNNTPScheme, URL_SCHEME_NNTP, SIZECHARS(c_szNNTPScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
  189. { c_szTelnetScheme, URL_SCHEME_TELNET, SIZECHARS(c_szTelnetScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
  190. { c_szWAISScheme, URL_SCHEME_WAIS, SIZECHARS(c_szWAISScheme) - 1, 0},
  191. { c_szMkScheme, URL_SCHEME_MK, SIZECHARS(c_szMkScheme) - 1, UPF_SCHEME_NOHISTORY},
  192. { c_szShellScheme, URL_SCHEME_SHELL, SIZECHARS(c_szShellScheme) - 1, UPF_SCHEME_OPAQUE},
  193. { c_szLocalScheme, URL_SCHEME_LOCAL, SIZECHARS(c_szLocalScheme) - 1, 0},
  194. { c_szJSScheme, URL_SCHEME_JAVASCRIPT,SIZECHARS(c_szJSScheme) - 1, UPF_SCHEME_OPAQUE|UPF_SCHEME_NOHISTORY},
  195. { c_szVSScheme, URL_SCHEME_VBSCRIPT, SIZECHARS(c_szVSScheme) - 1, UPF_SCHEME_OPAQUE|UPF_SCHEME_NOHISTORY},
  196. { c_szSnewsScheme, URL_SCHEME_SNEWS, SIZECHARS(c_szSnewsScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
  197. { c_szAboutScheme, URL_SCHEME_ABOUT, SIZECHARS(c_szAboutScheme) - 1, UPF_SCHEME_OPAQUE|UPF_SCHEME_NOHISTORY},
  198. { c_szResScheme, URL_SCHEME_RES, SIZECHARS(c_szResScheme) - 1, UPF_SCHEME_NOHISTORY},
  199. { c_szRootedScheme, URL_SCHEME_MSSHELLROOTED, SIZECHARS(c_szRootedScheme) - 1, 0},
  200. { c_szIDListScheme, URL_SCHEME_MSSHELLIDLIST, SIZECHARS(c_szIDListScheme) - 1, 0},
  201. { c_szMsHelpScheme, URL_SCHEME_MSHELP, SIZECHARS(c_szMsHelpScheme) - 1, 0},
  202. };
  203. PRIVATE int _StrCmpNMixed(LPCSTR psz, LPCWSTR pwz, DWORD cch)
  204. {
  205. int iRet = 0;
  206. //
  207. // we dont have to real mbcs conversion here because we are
  208. // guaranteed to have only ascii chars here
  209. //
  210. for (;cch; psz++, pwz++, cch--)
  211. {
  212. WCHAR ch = *psz;
  213. if (ch != *pwz)
  214. {
  215. //
  216. // this makes it case insensitive
  217. if (IsUpper(ch) && (ch + 32) == *pwz)
  218. continue;
  219. if(ch > *pwz)
  220. iRet = 1;
  221. else
  222. iRet = -1;
  223. break;
  224. }
  225. }
  226. return iRet;
  227. }
  228. //*** g_iScheme -- cache for g_mpUrlSchemeTypes
  229. // DESCRIPTION
  230. // we call GetSchemeTypeAndFlags many times for the same scheme. if
  231. // it's the 0th table entry, no biggee. if it's a later entry linear
  232. // search isnt very good. add a 1-element MRU cache. even for the most common
  233. // (by far) case of "http" (0th entry), we *still* win due to the cheaper
  234. // StrCmpC and skipped loop.
  235. // NOTES
  236. // g_iScheme refs/sets are atomic so no need for lock
  237. int g_iScheme; // last guy we hit
  238. #ifdef DEBUG
  239. int g_cSTTot, g_cSTHit, g_cSTHit0;
  240. #endif
  241. //
  242. // all of the pszScheme to nScheme functions are necessary at this point
  243. // because some parsing is vioent, and some is necessarily soft
  244. //
  245. PRIVATE URL_SCHEME
  246. GetSchemeTypeAndFlagsW(LPCWSTR pszScheme, DWORD cchScheme, LPDWORD pdwFlags)
  247. {
  248. DWORD i;
  249. ASSERT(pszScheme);
  250. #ifdef DEBUG
  251. if ((g_cSTTot % 10) == 0)
  252. TraceMsg(DM_PERF, "gstaf: tot=%d hit=%d hit0=%d", g_cSTTot, g_cSTHit, g_cSTHit0);
  253. #endif
  254. DBEXEC(TRUE, g_cSTTot++);
  255. // check cache 1st
  256. i = g_iScheme;
  257. if (cchScheme == g_mpUrlSchemeTypes[i].cchScheme
  258. && StrCmpNCW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme) == 0)
  259. {
  260. DBEXEC(TRUE, i == 0 ? g_cSTHit0++ : g_cSTHit++);
  261. Lhit:
  262. if (pdwFlags)
  263. *pdwFlags = g_mpUrlSchemeTypes[i].dwFlags;
  264. // update cache (unconditionally)
  265. g_iScheme = i;
  266. return g_mpUrlSchemeTypes[i].eScheme;
  267. }
  268. for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
  269. {
  270. if(cchScheme == g_mpUrlSchemeTypes[i].cchScheme
  271. && 0 == StrCmpNIW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme))
  272. goto Lhit;
  273. }
  274. if (pdwFlags)
  275. {
  276. *pdwFlags = 0;
  277. }
  278. return URL_SCHEME_UNKNOWN;
  279. }
  280. PRIVATE URL_SCHEME
  281. GetSchemeTypeAndFlagsA(LPCSTR pszScheme, DWORD cchScheme, LPDWORD pdwFlags)
  282. {
  283. DWORD i;
  284. ASSERT(pszScheme);
  285. for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
  286. {
  287. if(0 == _StrCmpNMixed(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme))
  288. {
  289. if (pdwFlags)
  290. *pdwFlags = g_mpUrlSchemeTypes[i].dwFlags;
  291. return g_mpUrlSchemeTypes[i].eScheme;
  292. }
  293. }
  294. if (pdwFlags)
  295. {
  296. *pdwFlags = 0;
  297. }
  298. return URL_SCHEME_UNKNOWN;
  299. }
  300. PRIVATE DWORD GetSchemeFlags(URL_SCHEME eScheme)
  301. {
  302. DWORD i;
  303. for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
  304. {
  305. if(eScheme == g_mpUrlSchemeTypes[i].eScheme)
  306. {
  307. return g_mpUrlSchemeTypes[i].dwFlags;
  308. }
  309. }
  310. return 0;
  311. }
  312. /*----------------------------------------------------------
  313. Purpose: Return the scheme ordinal type (URL_SCHEME_*) based on the
  314. URL string.
  315. Returns: URL_SCHEME_ ordinal
  316. Cond: --
  317. */
  318. PRIVATE inline BOOL IsSameSchemeW(LPCWSTR pszLocal, LPCWSTR pszGlobal, DWORD cch)
  319. {
  320. ASSERT(pszLocal);
  321. ASSERT(pszGlobal);
  322. ASSERT(cch);
  323. return !StrCmpNIW(pszLocal, pszGlobal, cch);
  324. }
  325. PRIVATE BOOL IsSameSchemeA(LPCSTR pszLocal, LPCWSTR pszGlobal, DWORD cch)
  326. {
  327. ASSERT(pszLocal);
  328. ASSERT(pszGlobal);
  329. ASSERT(cch);
  330. return !_StrCmpNMixed(pszLocal, pszGlobal, cch);
  331. }
  332. PRIVATE URL_SCHEME
  333. SchemeTypeFromStringA(
  334. LPCSTR psz,
  335. DWORD cch)
  336. {
  337. DWORD i;
  338. // psz is a counted string (by cch), not a null-terminated string,
  339. // so use IS_VALID_READ_BUFFER instead of IS_VALID_STRING_PTRA.
  340. ASSERT(IS_VALID_READ_BUFFER(psz, CHAR, cch));
  341. ASSERT(cch);
  342. // We use a linear search. A binary search wouldn't pay off
  343. // because the list isn't big enough, and we can sort the list
  344. // according to the most popular protocol schemes and pay off
  345. // bigger.
  346. for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
  347. {
  348. if(cch == g_mpUrlSchemeTypes[i].cchScheme &&
  349. IsSameSchemeA(psz, g_mpUrlSchemeTypes[i].pszScheme, cch))
  350. return g_mpUrlSchemeTypes[i].eScheme;
  351. }
  352. return URL_SCHEME_UNKNOWN;
  353. }
  354. PRIVATE URL_SCHEME
  355. SchemeTypeFromStringW(
  356. LPCWSTR psz,
  357. DWORD cch)
  358. {
  359. DWORD i;
  360. // psz is a counted string (by cch), not a null-terminated string,
  361. // so use IS_VALID_READ_BUFFER instead of IS_VALID_STRING_PTRW.
  362. ASSERT(IS_VALID_READ_BUFFER(psz, WCHAR, cch));
  363. ASSERT(cch);
  364. // We use a linear search. A binary search wouldn't pay off
  365. // because the list isn't big enough, and we can sort the list
  366. // according to the most popular protocol schemes and pay off
  367. // bigger.
  368. for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
  369. {
  370. if(cch == g_mpUrlSchemeTypes[i].cchScheme &&
  371. IsSameSchemeW(psz, g_mpUrlSchemeTypes[i].pszScheme, cch))
  372. return g_mpUrlSchemeTypes[i].eScheme;
  373. }
  374. return URL_SCHEME_UNKNOWN;
  375. }
  376. //
  377. // these are used during path fumbling that i do
  378. // each string between a path delimiter ( '/' or '\')
  379. // is a segment. we dont ever really care about
  380. // empty ("") segments, so it is best to use
  381. // NextLiveSegment().
  382. //
  383. inline PRIVATE LPWSTR
  384. NextSegment(LPWSTR psz)
  385. {
  386. ASSERT (psz);
  387. return psz + lstrlenW(psz) + 1;
  388. }
  389. #define IsLiveSegment(p) ((p) && (*p) != DEADSEGCHAR)
  390. PRIVATE LPWSTR
  391. NextLiveSegment(LPWSTR pszSeg, DWORD *piSeg, DWORD cSegs)
  392. {
  393. if(pszSeg) do
  394. {
  395. //
  396. // count the number of dead segments that we skip.
  397. // if the segment isnt dead, then we can just skip one,
  398. // the current one.
  399. //
  400. DWORD cSkip;
  401. for (cSkip = 0; (*pszSeg) == DEADSEGCHAR; pszSeg++, cSkip++);
  402. cSkip = cSkip ? cSkip : 1;
  403. if((*piSeg) + cSkip < cSegs)
  404. {
  405. pszSeg = NextSegment(pszSeg);
  406. (*piSeg) += cSkip;
  407. }
  408. else
  409. pszSeg = NULL;
  410. } while (pszSeg && (*pszSeg == DEADSEGCHAR));
  411. return pszSeg;
  412. }
  413. PRIVATE LPWSTR
  414. LastLiveSegment(LPWSTR pszSeg, DWORD cSegs, BOOL fFailIfFirst)
  415. {
  416. DWORD iSeg = 0;
  417. LPWSTR pszLast = NULL;
  418. BOOL fLastIsFirst = FALSE;
  419. if(cSegs)
  420. {
  421. if(IsLiveSegment(pszSeg))
  422. {
  423. pszLast = pszSeg;
  424. fLastIsFirst = TRUE;
  425. }
  426. while(pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs))
  427. {
  428. if(!pszLast)
  429. fLastIsFirst = TRUE;
  430. else
  431. fLastIsFirst = FALSE;
  432. pszLast = pszSeg;
  433. }
  434. if(fFailIfFirst && fLastIsFirst)
  435. pszLast = NULL;
  436. }
  437. return pszLast;
  438. }
  439. PRIVATE LPWSTR
  440. FirstLiveSegment(LPWSTR pszSeg, DWORD *piSeg, DWORD cSegs)
  441. {
  442. ASSERT(piSeg);
  443. *piSeg = 0;
  444. if(!pszSeg || !cSegs)
  445. return NULL;
  446. if(!IsLiveSegment(pszSeg))
  447. pszSeg = NextLiveSegment(pszSeg, piSeg, cSegs);
  448. return pszSeg;
  449. }
  450. inline BOOL IsDosDrive(LPCWSTR p)
  451. {
  452. return (*p && p[1] == COLON);
  453. }
  454. inline BOOL IsDosPath(LPCWSTR p)
  455. {
  456. return (*p == WHACK || IsDosDrive(p));
  457. }
  458. inline BOOL IsDriveUrl(const WCHAR *p)
  459. {
  460. return (*p && p[1] == BAR);
  461. }
  462. inline BOOL IsDrive(LPCWSTR p)
  463. {
  464. return (IsDosDrive(p) || IsDriveUrl(p));
  465. }
  466. inline BOOL IsSeparator(const WCHAR *p)
  467. {
  468. return (*p == SLASH || *p == WHACK );
  469. }
  470. inline BOOL IsAbsolute(const WCHAR *p)
  471. {
  472. #ifndef UNIX
  473. return (IsSeparator(p) || IsDrive(p));
  474. #else
  475. return (IsSeparator(p)) ;
  476. #endif
  477. }
  478. #define IsUNC(pathW) PathIsUNCW(pathW)
  479. inline BOOL IsDot(LPCWSTR p) // if p == "." return TRUE
  480. {
  481. return (*p == DOT && !p[1]);
  482. }
  483. inline BOOL IsDotDot(LPCWSTR p) // if p == ".." return TRUE
  484. {
  485. return (*p == DOT && p[1] == DOT && !p[2]);
  486. }
  487. //+---------------------------------------------------------------------------
  488. //
  489. // Method: ConvertChar
  490. //
  491. // Synopsis:
  492. //
  493. // Arguments: [szStr] --
  494. // [cIn] --
  495. // [cOut] --
  496. //
  497. // Returns:
  498. //
  499. // History: 03-20-96 JoeS (Joe Souza) Created
  500. //
  501. // Notes:
  502. //
  503. //----------------------------------------------------------------------------
  504. static void ConvertChar(LPWSTR ptr, WCHAR cIn, WCHAR cOut, BOOL fProtectExtra)
  505. {
  506. while (*ptr)
  507. {
  508. if (fProtectExtra && (*ptr == QUERY || *ptr == POUND ))
  509. {
  510. break;
  511. }
  512. if (*ptr == cIn)
  513. {
  514. *ptr = cOut;
  515. }
  516. ptr++;
  517. }
  518. }
  519. PUBLIC void WininetFixFileSlashes(WCHAR *p)
  520. {
  521. // NB: This function assumes that p points to a file URL.
  522. // The file URL *MUST* be of the form "file://...".
  523. // HTParse() guarantees that this will be so.
  524. int schemelen = 0;
  525. schemelen = SIZECHARS(L"file://") - 1;
  526. /* In UNIX system, we don't need to convert the SLASH to WHACK */
  527. if (p && lstrlenW(p) > schemelen)
  528. {
  529. #ifdef UNIX
  530. ConvertChar(p + schemelen, WHACK, SLASH, TRUE);
  531. #else
  532. ConvertChar(p + schemelen, SLASH, WHACK, TRUE);
  533. #endif
  534. }
  535. }
  536. //
  537. // in the URL spec, it says that all whitespace should be ignored
  538. // due to the fact that it is possible to introduce
  539. // new whitespace and eliminate other whitespace
  540. // however, we are only going to strip out TAB CR LF
  541. // because we consider SPACE's to be significant.
  542. //
  543. PRIVATE inline BOOL IsInsignificantWhite(WCHAR ch)
  544. {
  545. return (ch == TAB ||
  546. ch == CR ||
  547. ch == LF);
  548. }
  549. #define IsWhite(c) ((DWORD) (c) > 32 ? FALSE : TRUE)
  550. PRIVATE void TrimAndStripInsignificantWhite(WCHAR *psz)
  551. {
  552. ASSERT(psz);
  553. if(*psz)
  554. {
  555. LPCWSTR pszSrc = psz;
  556. LPWSTR pszDest = psz;
  557. LPWSTR pszLastSpace = NULL;
  558. // first trim the front side by just moving the source pointer.
  559. while(*pszSrc && IsWhite(*pszSrc)) {
  560. pszSrc++;
  561. }
  562. //
  563. // Copy the body stripping "insignificant" white spaces.
  564. // Remember the last white space to trim trailing space later.
  565. //
  566. while (*pszSrc)
  567. {
  568. if(IsInsignificantWhite(*pszSrc)) {
  569. pszSrc++;
  570. } else {
  571. if (IsWhite(*pszSrc)) {
  572. if (pszLastSpace==NULL) {
  573. pszLastSpace = pszDest;
  574. }
  575. } else {
  576. pszLastSpace = NULL;
  577. }
  578. *pszDest++ = *pszSrc++;
  579. }
  580. }
  581. // Trim the trailing space
  582. if (pszLastSpace) {
  583. *pszLastSpace = L'\0';
  584. } else {
  585. *pszDest = L'\0';
  586. }
  587. }
  588. #if 0
  589. // APPCOMPAT - NETSCAPE compatibility - zekel 29-JAN-97
  590. // we want to leave one space in the string
  591. if(*psz== L'\0')
  592. {
  593. psz[0] = SPC;
  594. psz[1] = L'\0';
  595. }
  596. #endif
  597. }
  598. struct EXTKEY
  599. {
  600. PCSTR szExt;
  601. PCWSTR wszExt;
  602. DWORD cchExt;
  603. };
  604. const EXTKEY ExtTable[] = {
  605. { ".html", L".html", ARRAYSIZE(".html") - 1 },
  606. { ".htm", L".htm", ARRAYSIZE(".htm") - 1 },
  607. { ".xml", L".xml", ARRAYSIZE(".xml") - 1 },
  608. { ".doc", L".doc", ARRAYSIZE(".doc") - 1 },
  609. { ".xls", L".xls", ARRAYSIZE(".xls") - 1 },
  610. { ".ppt", L".ppt", ARRAYSIZE(".ppt") - 1 },
  611. { ".rtf", L".rtf", ARRAYSIZE(".rtf") - 1 },
  612. { ".dot", L".dot", ARRAYSIZE(".dot") - 1 },
  613. { ".xlw", L".xlw", ARRAYSIZE(".xlw") - 1 },
  614. { ".pps", L".pps", ARRAYSIZE(".pps") - 1 },
  615. { ".xlt", L".xlt", ARRAYSIZE(".xlt") - 1 },
  616. { ".hta", L".hta", ARRAYSIZE(".hta") - 1 },
  617. { ".pot", L".pot", ARRAYSIZE(".pot") - 1 }
  618. };
  619. inline BOOL CompareExtA(PCSTR psz, DWORD_PTR cch)
  620. {
  621. for (DWORD i=0; i < ARRAYSIZE(ExtTable); i++)
  622. {
  623. if (ExtTable[i].cchExt>cch)
  624. continue;
  625. if (!StrCmpNIA(psz - (LONG_PTR)ExtTable[i].cchExt, ExtTable[i].szExt, ExtTable[i].cchExt))
  626. return TRUE;
  627. }
  628. return FALSE;
  629. }
  630. inline BOOL CompareExtW(PCWSTR pwsz, DWORD_PTR cch)
  631. {
  632. for (DWORD i=0; i < ARRAYSIZE(ExtTable); i++)
  633. {
  634. if (ExtTable[i].cchExt>cch)
  635. continue;
  636. if (!StrCmpNIW(pwsz - (LONG_PTR)ExtTable[i].cchExt, ExtTable[i].wszExt, ExtTable[i].cchExt))
  637. return TRUE;
  638. }
  639. return FALSE;
  640. }
  641. PRIVATE LPCSTR FindFragmentA(LPCSTR psz, BOOL fMBCS, BOOL fIsFile)
  642. {
  643. CHAR *pch = _StrChrA(psz, POUND, fMBCS);
  644. if(pch && fIsFile)
  645. {
  646. CHAR *pchQuery = _StrChrA(psz, QUERY, fMBCS);
  647. if (pchQuery && (pchQuery < pch))
  648. goto exit;
  649. do
  650. {
  651. LONG_PTR cch = pch - psz;
  652. // REARCHITECT: we shouldn't hardcode ".htm".
  653. // #s are significant in dospaths - zekel 9-JUL-97
  654. // so we want to check the path in front and make sure
  655. // that it is an html file. we believe this heuristic should work
  656. // in about 99% of all cases.
  657. //
  658. // if it is not an html file it is not a hash
  659. if (CompareExtA(pch, cch))
  660. {
  661. break;
  662. }
  663. } while (pch = _StrChrA(++pch, POUND, fMBCS));
  664. }
  665. exit:
  666. return pch;
  667. }
  668. PRIVATE LPCWSTR FindFragmentW(LPCWSTR psz, BOOL fIsFile)
  669. {
  670. WCHAR *pch = StrChrW(psz, POUND);
  671. if(pch && fIsFile)
  672. {
  673. WCHAR *pchQuery = StrChrW(psz, QUERY);
  674. if (pchQuery && (pchQuery < pch))
  675. goto exit;
  676. do
  677. {
  678. LONG_PTR cch = pch - psz;
  679. // REARCHITECT: we shouldn't hardcode ".htm".
  680. // #s are significant in dospaths - zekel 9-JUL-97
  681. // so we want to check the path in front and make sure
  682. // that it is an html file. we believe this heuristic should work
  683. // in about 99% of all cases.
  684. //
  685. // if it is not an html file it is not a hash
  686. if (CompareExtW(pch, cch))
  687. {
  688. break;
  689. }
  690. } while (pch = StrChrW(++pch, POUND));
  691. }
  692. exit:
  693. return pch;
  694. }
  695. PRIVATE VOID BreakFragment(LPWSTR *ppsz, PURLPARTS parts)
  696. {
  697. ASSERT(ppsz);
  698. ASSERT(*ppsz);
  699. //
  700. // Opaque URLs are not allowed to use fragments - zekel 27-feb-97
  701. // Is it possible for an opaque URL to use a fragment?
  702. // right now we assume not. i suspect so but will leave it this way for now
  703. // this is especially important to javascript and vbscript
  704. // FEATURE: this might be worth investigation, but probably can't change this code
  705. //
  706. if(!**ppsz || parts->dwFlags & UPF_SCHEME_OPAQUE)
  707. return;
  708. WCHAR *pch = (LPWSTR) FindFragmentW(*ppsz, parts->eScheme == URL_SCHEME_FILE);
  709. if (pch)
  710. {
  711. TERMSTR(pch);
  712. parts->pszFragment = pch +1;
  713. }
  714. }
  715. PRIVATE inline BOOL IsUrlPrefixA(LPCSTR psz)
  716. {
  717. //
  718. // Optimized for this particular case. Notice that most of it
  719. // will be lego-ized out anyway.
  720. //
  721. if (psz[0]=='u' || psz[0]=='U') {
  722. if (psz[1]=='r' || psz[1]=='R') {
  723. if (psz[2]=='l' || psz[2]=='L') {
  724. return TRUE;
  725. }
  726. }
  727. }
  728. return FALSE;
  729. // return !StrCmpNIA(psz, c_szURLPrefixA, c_cchURLPrefix);
  730. }
  731. PRIVATE inline BOOL IsUrlPrefixW(LPCWSTR psz)
  732. {
  733. //
  734. // Optimized for this particular case. Notice that most of it
  735. // will be lego-ized out anyway.
  736. //
  737. if (psz[0]==L'u' || psz[0]==L'U') {
  738. if (psz[1]==L'r' || psz[1]==L'R') {
  739. if (psz[2]==L'l' || psz[2]==L'L') {
  740. return TRUE;
  741. }
  742. }
  743. }
  744. return FALSE;
  745. // return !StrCmpNIW(psz, c_szURLPrefixW, c_cchURLPrefix);
  746. }
  747. //
  748. // if FindScheme() succeeds, it returns a pointer to the scheme,
  749. // and the cch holds the count of chars for the scheme
  750. // if it fails, and cch is non-zero then cch is how much should be skipped.
  751. // this is to allow "URL:/foo/bar", a relative URL with the "URL:" prefix.
  752. //
  753. LPCSTR FindSchemeA(LPCSTR psz, LPDWORD pcchScheme)
  754. {
  755. LPCSTR pch;
  756. DWORD cch;
  757. ASSERT(pcchScheme);
  758. ASSERT(psz);
  759. *pcchScheme = 0;
  760. for (pch = psz, cch = 0; *pch; pch++, cch++)
  761. {
  762. if (*pch == ':')
  763. {
  764. if (IsUrlPrefixA(psz))
  765. {
  766. psz = pch +1;
  767. // set pcchScheme to skip past "URL:"
  768. *pcchScheme = cch + 1;
  769. // reset cch for the scheme len
  770. cch = -1;
  771. continue;
  772. }
  773. else
  774. {
  775. //
  776. // Scheme found if it is at least two characters
  777. if(cch > 1)
  778. {
  779. *pcchScheme = cch;
  780. return psz;
  781. }
  782. break;
  783. }
  784. }
  785. if(!IsValidSchemeCharA(*pch))
  786. break;
  787. }
  788. return NULL;
  789. }
  790. //
  791. // FindSchemeW() around for Perf reasons for ParseURL()
  792. // Any changes in either FindScheme() needs to reflected in the other
  793. //
  794. LPCWSTR FindSchemeW(LPCWSTR psz, LPDWORD pcchScheme, BOOL fAllowSemicolon = FALSE)
  795. {
  796. LPCWSTR pch;
  797. DWORD cch;
  798. ASSERT(pcchScheme);
  799. ASSERT(psz);
  800. *pcchScheme = 0;
  801. for (pch = psz, cch = 0; *pch; pch++, cch++)
  802. {
  803. if (*pch == L':' ||
  804. // Autocorrect permits a semicolon typo
  805. (fAllowSemicolon && *pch == L';'))
  806. {
  807. if (IsUrlPrefixW(psz))
  808. {
  809. psz = pch +1;
  810. // set pcchScheme to skip past "URL:"
  811. *pcchScheme = cch + 1;
  812. // reset cch for the scheme len
  813. cch = -1;
  814. continue;
  815. }
  816. else
  817. {
  818. //
  819. // Scheme found if it is at least two characters
  820. if(cch > 1)
  821. {
  822. *pcchScheme = cch;
  823. return psz;
  824. }
  825. break;
  826. }
  827. }
  828. if(!IsValidSchemeCharW(*pch))
  829. break;
  830. }
  831. return NULL;
  832. }
  833. PRIVATE DWORD
  834. CountSlashes(LPCWSTR *ppsz)
  835. {
  836. DWORD cSlashes = 0;
  837. LPCWSTR pch = *ppsz;
  838. while (IsSeparator(pch))
  839. {
  840. *ppsz = pch;
  841. pch++;
  842. cSlashes++;
  843. }
  844. return cSlashes;
  845. }
  846. PRIVATE LPCWSTR
  847. FindDosPath(LPCWSTR psz)
  848. {
  849. if (IsDosDrive(psz) || IsUNC(psz))
  850. {
  851. return psz;
  852. }
  853. else
  854. {
  855. DWORD cch;
  856. LPCWSTR pszScheme = FindSchemeW(psz, &cch);
  857. if (pszScheme && URL_SCHEME_FILE == GetSchemeTypeAndFlagsW(pszScheme, cch, NULL))
  858. {
  859. LPCWSTR pch = psz + cch + 1;
  860. DWORD c = CountSlashes(&pch);
  861. switch (c)
  862. {
  863. case 2:
  864. if(IsDosDrive(++pch))
  865. return pch;
  866. break;
  867. case 4:
  868. return --pch;
  869. }
  870. }
  871. }
  872. return NULL;
  873. }
  874. /*+++
  875. WininetCopyUrlForParse()
  876. this copies the url and prepends a "file://" if necessary
  877. This should never be called except from wininet
  878. everyone else should be calling UrlCreateFromPath()
  879. Parameters
  880. IN -
  881. pszDst the destination buffer
  882. pszSrc source buffer
  883. OUT -
  884. pszDst is filled with a Live URL
  885. Returns
  886. VOID
  887. NOTE - Assume "file:" if no scheme and it looks like fully-qualified file path.
  888. ---*/
  889. static const WCHAR c_szFileSchemeString[] = L"file://";
  890. PRIVATE HRESULT
  891. WininetCopyUrlForParse(PSHSTRW pstrDst, LPCWSTR pszSrc)
  892. {
  893. #ifndef UNIX
  894. if (IsDrive(pszSrc) || IsUNC(pszSrc))
  895. {
  896. //
  897. // NOTE: the first SetStr will always succeed
  898. // because the default buffer is more than "file://"
  899. pstrDst->SetStr(c_szFileSchemeString);
  900. return pstrDst->Append(pszSrc);
  901. }
  902. else
  903. #endif /* !UNIX */
  904. return pstrDst->SetStr(pszSrc);
  905. }
  906. PRIVATE HRESULT
  907. CopyUrlForParse(LPCWSTR pszUrl, PSHSTRW pstrUrl, DWORD dwFlags)
  908. {
  909. LPCWSTR pch;
  910. HRESULT hr;
  911. //
  912. // now we will make copies of the URLs so that we can rip them apart
  913. // WininetCopyUrlForParse() will prepend a file: if it wants...
  914. //
  915. if(dwFlags & URL_WININET_COMPATIBILITY)
  916. {
  917. hr = WininetCopyUrlForParse(pstrUrl, pszUrl);
  918. }
  919. else if(pch = FindDosPath(pszUrl))
  920. {
  921. hr = SHUrlCreateFromPath(pch, pstrUrl, dwFlags);
  922. }
  923. else
  924. {
  925. hr = pstrUrl->SetStr(pszUrl);
  926. }
  927. // Trim leading and trailing whitespace
  928. // Remove tab and CRLF characters. Netscape does this.
  929. if(SUCCEEDED(hr))
  930. TrimAndStripInsignificantWhite(pstrUrl->GetInplaceStr());
  931. return hr;
  932. }
  933. PRIVATE VOID BreakScheme(LPWSTR *ppsz, PURLPARTS parts)
  934. {
  935. if(!**ppsz || IsDrive(*ppsz))
  936. return;
  937. DWORD cch;
  938. //
  939. // if FindScheme() succeeds, it returns a pointer to the scheme,
  940. // and the cch holds the count of chars for the scheme
  941. // if it fails, and cch is none zero then cch is how much should be skipped.
  942. // this is to allow "URL:/foo/bar", a relative URL with the "URL:" prefix.
  943. //
  944. if(NULL != (parts->pszScheme = (LPWSTR) FindSchemeW(*ppsz, &cch)))
  945. {
  946. parts->pszScheme[cch] = '\0';
  947. CharLowerW(parts->pszScheme);
  948. // put the pointer past the scheme for next Break()
  949. *ppsz = parts->pszScheme + cch + 1;
  950. #ifdef DEBUG
  951. if (g_dwPrototype & PF_LOGSCHEMEHITS)
  952. {
  953. // this is for logging of url schemes, to make sure that we have the right order
  954. int c = GetPrivateProfileIntW(L"SchemeHits", parts->pszScheme, 0, L"UrlPars.ini");
  955. WCHAR szc[25];
  956. wsprintfW(szc, L"%d", ++c);
  957. WritePrivateProfileStringW(L"SchemeHits", parts->pszScheme, szc, L"UrlPars.ini");
  958. }
  959. #endif //DEBUG
  960. parts->eScheme = GetSchemeTypeAndFlagsW(parts->pszScheme, cch, &parts->dwFlags);
  961. }
  962. else if (cch)
  963. *ppsz += cch + 1;
  964. }
  965. PRIVATE VOID BreakQuery(LPWSTR *ppsz, PURLPARTS parts)
  966. {
  967. WCHAR *pch;
  968. if(!**ppsz)
  969. return;
  970. if(parts->dwFlags & UPF_SCHEME_OPAQUE)
  971. return;
  972. pch = StrChrW(*ppsz, QUERY);
  973. //
  974. // APPCOMPAT NETSCAPE COMPATBILITY - zekel - 27-JAN-97
  975. // we will also get http://foo#frag?query
  976. // even tho legally it should be http://foo?query#frag
  977. // of course we will put it back together the right way.
  978. //
  979. if(!pch && parts->pszFragment)
  980. pch = StrChrW(parts->pszFragment, QUERY);
  981. // found our query string...
  982. if (pch)
  983. {
  984. TERMSTR(pch);
  985. parts->pszQuery = pch + 1;
  986. }
  987. }
  988. PRIVATE VOID MkBreakServer(LPWSTR *ppsz, PURLPARTS parts)
  989. {
  990. //
  991. // NOTE: we dont convert WHACKs to SLASHs because mk can be of the
  992. // form <mk:@class:\\Server\Share\file.itl/path/in/the/file.gif
  993. // and we want to preserve the DOS/UNC path as it is
  994. //
  995. if (**ppsz == TEXT('@'))
  996. {
  997. WCHAR *pch;
  998. // treat everything to separator as host
  999. //
  1000. parts->pszServer = *ppsz;
  1001. pch = StrChrW(*ppsz ,SLASH);
  1002. if (pch)
  1003. {
  1004. parts->dwFlags |= UPF_SEG_ABSOLUTE;
  1005. TERMSTR(pch);
  1006. *ppsz = pch + 1;
  1007. }
  1008. else
  1009. *ppsz += lstrlenW(*ppsz);
  1010. }
  1011. }
  1012. PRIVATE VOID DefaultBreakServer(LPWSTR *ppsz, PURLPARTS parts)
  1013. {
  1014. if (**ppsz == SLASH)
  1015. {
  1016. parts->dwFlags |= UPF_SEG_ABSOLUTE;
  1017. (*ppsz)++;
  1018. if (**ppsz == SLASH)
  1019. {
  1020. // we have a winner!
  1021. WCHAR * pch;
  1022. parts->pszServer = (*ppsz) + 1;
  1023. pch = StrChrW(parts->pszServer, SLASH);
  1024. if(pch)
  1025. {
  1026. TERMSTR(pch);
  1027. *ppsz = pch + 1;
  1028. }
  1029. else
  1030. *ppsz = *ppsz + lstrlenW(*ppsz);
  1031. }
  1032. }
  1033. else if(parts->pszScheme)
  1034. parts->dwFlags |= UPF_SCHEME_OPAQUE;
  1035. }
  1036. PRIVATE VOID FileBreakServer(LPWSTR *ppsz, PURLPARTS parts)
  1037. {
  1038. LPWSTR pch;
  1039. // CountSlashes() will set *ppsz to the last slash
  1040. DWORD cSlashes = CountSlashes((LPCWSTR *)ppsz);
  1041. if(cSlashes || IsDrive(*ppsz))
  1042. parts->dwFlags |= UPF_SEG_ABSOLUTE;
  1043. switch (cSlashes)
  1044. {
  1045. case 0:
  1046. break;
  1047. case 4:
  1048. // we identify file://\\UNC as a true DOS path with no escaped characters
  1049. parts->dwFlags |= UPF_FILEISPATHURL;
  1050. // fall through
  1051. case 2:
  1052. if(IsDrive((*ppsz) + 1))
  1053. {
  1054. // this is a root drive
  1055. TERMSTR(*ppsz);
  1056. parts->pszServer = *ppsz;
  1057. (*ppsz)++;
  1058. // we identify file://C:\PATH as a true DOS path with no escaped characters
  1059. parts->dwFlags |= UPF_FILEISPATHURL;
  1060. break;
  1061. } //else fallthru to UNC handling
  1062. // fall through
  1063. case 5:
  1064. case 6:
  1065. //
  1066. // cases like "file:////..." or "file://///..."
  1067. // we see this as a UNC path
  1068. // lets set the server
  1069. //
  1070. parts->pszServer = ++(*ppsz);
  1071. for(pch = *ppsz; *pch && !IsSeparator(pch); pch++);
  1072. if(pch && *pch)
  1073. {
  1074. TERMSTR(pch);
  1075. *ppsz = pch + 1;
  1076. }
  1077. else
  1078. *ppsz = pch + lstrlenW(pch);
  1079. break;
  1080. case 1:
  1081. //
  1082. //we think of "file:/..." as on the local machine
  1083. // so we have zero length pszServer
  1084. //
  1085. case 3:
  1086. //
  1087. //we think of file:///... as properly normalized on the local machine
  1088. // so we have zero length pszServer
  1089. //
  1090. default:
  1091. // there is just too many, we pretend that there is just one and ignore
  1092. // the rest
  1093. TERMSTR(*ppsz);
  1094. parts->pszServer = *ppsz;
  1095. (*ppsz)++;
  1096. break;
  1097. }
  1098. // detect file://localserver/c:/path
  1099. if(parts->pszServer && !StrCmpIW(parts->pszServer, L"localhost"))
  1100. parts->pszServer = NULL;
  1101. }
  1102. PRIVATE VOID BreakServer(LPWSTR *ppsz, PURLPARTS parts, BOOL fConvert)
  1103. {
  1104. if(!**ppsz || parts->dwFlags & UPF_SCHEME_OPAQUE)
  1105. return;
  1106. //
  1107. // APPCOMPAT - we pretend that whacks are the equiv of slashes - zekel 17-MAR-97
  1108. // this is because the internet uses slashes and DOS
  1109. // uses whacks. so for useability's sake we allow both.
  1110. // but not in all cases. in particular, the "mk:" stream
  1111. // protocol depends upon the buggy behavior of one of IE30's
  1112. // many URL parsers treating relative URLs with whacks as one
  1113. // segment.
  1114. // NOTE: IE30 had inconsistent behavior WRT URLs. so we handled
  1115. // this case differently depending on when we saw, looked, touched, or
  1116. // played with these URLs. wininet would always convert, but mshtml
  1117. // sometimes would other times not.
  1118. //
  1119. // with MK: we cannot convert the base, or the relative
  1120. // but in breakpath we have to allow for the use of WHACK
  1121. // to indicate a root path
  1122. //
  1123. // we dont have to fProtectExtra because query and fragments
  1124. // are already broken off if necessary.
  1125. if (fConvert)
  1126. ConvertChar(*ppsz, WHACK, SLASH, FALSE);
  1127. switch(parts->eScheme)
  1128. {
  1129. case URL_SCHEME_FILE:
  1130. FileBreakServer(ppsz, parts);
  1131. break;
  1132. case URL_SCHEME_MK:
  1133. MkBreakServer(ppsz, parts);
  1134. break;
  1135. default:
  1136. DefaultBreakServer(ppsz, parts);
  1137. break;
  1138. }
  1139. }
  1140. PRIVATE VOID DefaultBreakSegments(LPWSTR psz, PURLPARTS parts)
  1141. {
  1142. WCHAR *pch;
  1143. while (pch = StrChrW(psz, SLASH))
  1144. {
  1145. parts->cSegments++;
  1146. TERMSTR(pch);
  1147. psz = pch + 1;
  1148. }
  1149. if(!*psz || IsDot(psz) || IsDotDot(psz))
  1150. {
  1151. if (!*psz && parts->cSegments > 1)
  1152. parts->cSegments--;
  1153. parts->dwFlags |= UPF_EXSEG_DIRECTORY;
  1154. }
  1155. }
  1156. PRIVATE VOID DefaultBreakPath(LPWSTR *ppsz, PURLPARTS parts)
  1157. {
  1158. if(!**ppsz)
  1159. return;
  1160. //
  1161. // this will keep the drive letter from being backed up over
  1162. // during canonicalization. if we want keep the UNC share
  1163. // from being backed up we should do it here
  1164. // or in FileBreakServer() similarly
  1165. //
  1166. if(IsDrive(*ppsz))
  1167. {
  1168. parts->dwFlags |= UPF_SEG_LOCKFIRST;
  1169. // also convert "c|" to "c:"
  1170. }
  1171. parts->pszSegments = *ppsz;
  1172. parts->cSegments = 1;
  1173. if(!(parts->dwFlags & UPF_SCHEME_OPAQUE))
  1174. DefaultBreakSegments(parts->pszSegments, parts);
  1175. }
  1176. PRIVATE VOID BreakPath(LPWSTR *ppsz, PURLPARTS parts)
  1177. {
  1178. if(!**ppsz)
  1179. return;
  1180. if (parts->dwFlags & UPF_SCHEME_OPAQUE)
  1181. {
  1182. parts->pszSegments = *ppsz;
  1183. parts->cSegments = 1;
  1184. }
  1185. else
  1186. {
  1187. //
  1188. // we only need to check for absolute when there was
  1189. // no server segment. if there was a server segment,
  1190. // then absolute has already been set, and we need
  1191. // to preserve any separators that exist in the path
  1192. //
  1193. if(!parts->pszServer && IsSeparator(*ppsz))
  1194. {
  1195. parts->dwFlags |= UPF_SEG_ABSOLUTE;
  1196. (*ppsz)++;
  1197. }
  1198. DefaultBreakPath(ppsz, parts);
  1199. }
  1200. }
  1201. BOOL _ShouldBreakBase(PURLPARTS parts, LPCWSTR pszBase)
  1202. {
  1203. if (pszBase)
  1204. {
  1205. if (!parts->pszScheme)
  1206. return TRUE;
  1207. DWORD cch;
  1208. LPCWSTR pszScheme = FindSchemeW(pszBase, &cch);
  1209. // this means that this will only optimize on known schemes
  1210. // if both urls use URL_SCHEME_UNKNOWN...then we parse both.
  1211. if (pszScheme && parts->eScheme == GetSchemeTypeAndFlagsW(pszScheme, cch, NULL))
  1212. return TRUE;
  1213. }
  1214. return FALSE;
  1215. }
  1216. /*+++
  1217. BreakUrl()
  1218. Break a URL for its consituent parts
  1219. Parameters
  1220. IN -
  1221. the URL to crack open, need not be fully qualified
  1222. OUT -
  1223. parts absolute or relative may be nonzero (but not both).
  1224. host, anchor and access may be nonzero if they were specified.
  1225. Any which are nonzero point to zero terminated strings.
  1226. Returns
  1227. VOID
  1228. Details -
  1229. WARNING !! function munges the incoming buffer
  1230. ---*/
  1231. #define BreakUrl(s, p) BreakUrls(s, p, NULL, NULL, NULL, 0)
  1232. //
  1233. // **BreakUrls()**
  1234. // RETURNS
  1235. // S_OK if the two urls need to be blended
  1236. // S_FALSE if pszUrl is absolute, or there is no pszBase
  1237. // failure some sort of memory allocation error
  1238. //
  1239. PRIVATE HRESULT
  1240. BreakUrls(LPWSTR pszUrl, PURLPARTS parts, LPCWSTR pszBase, PSHSTRW pstrBase, PURLPARTS partsBase, DWORD dwFlags)
  1241. {
  1242. HRESULT hr = S_FALSE;
  1243. ASSERT(pszUrl && parts);
  1244. ZeroMemory(parts, SIZEOF(URLPARTS));
  1245. if(!*pszUrl)
  1246. parts->dwFlags |= UPF_SEG_EMPTYSEG;
  1247. //
  1248. // WARNING: this order is specific, according to the proposed standard
  1249. //
  1250. if(*pszUrl || pszBase)
  1251. {
  1252. BOOL fConvert;
  1253. BreakScheme(&pszUrl, parts);
  1254. BreakFragment(&pszUrl, parts);
  1255. BreakQuery(&pszUrl, parts);
  1256. //
  1257. // this is the first time that we need to access
  1258. // pszBase if it exists, so this is when we copy and parse
  1259. //
  1260. if (_ShouldBreakBase(parts, pszBase))
  1261. {
  1262. hr = CopyUrlForParse(pszBase, pstrBase, dwFlags);
  1263. // this will be some kind of memory error
  1264. if(FAILED(hr))
  1265. return hr;
  1266. // ASSERT(hr != S_FALSE);
  1267. BreakUrl(pstrBase->GetInplaceStr(), partsBase);
  1268. fConvert = (partsBase->dwFlags & UPF_SCHEME_CONVERT);
  1269. }
  1270. else
  1271. fConvert = (parts->dwFlags & UPF_SCHEME_CONVERT);
  1272. BreakServer(&pszUrl, parts, fConvert);
  1273. BreakPath(&pszUrl, parts);
  1274. }
  1275. return hr;
  1276. }
  1277. /*+++
  1278. BlendParts() & all dependant Blend* functions
  1279. Blends the parts structures into one, taking the relavent
  1280. bits from each one and dumping the unused data.
  1281. Parameters
  1282. IN -
  1283. partsUrl the primary or relative parts - Takes precedence
  1284. partsBase the base or referrers parts
  1285. OUT -
  1286. partsOut the combined result
  1287. Returns
  1288. VOID -
  1289. NOTE: this will frequently NULL out the entire partsBase.
  1290. ---*/
  1291. PRIVATE VOID
  1292. BlendScheme(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
  1293. {
  1294. if(partsUrl->pszScheme)
  1295. {
  1296. LPCWSTR pszScheme = partsOut->pszScheme = partsUrl->pszScheme;
  1297. URL_SCHEME eScheme = partsOut->eScheme = partsUrl->eScheme;
  1298. partsOut->dwFlags |= (partsUrl->dwFlags & UPF_SCHEME_MASK);
  1299. //
  1300. // this checks to make sure that these are the same scheme, and
  1301. // that the scheme is allowed to be used in relative URLs
  1302. // file: is not allowed to because of weirdness with drive letters
  1303. // and \\UNC\shares
  1304. //
  1305. if ((eScheme && (eScheme != partsBase->eScheme) || eScheme == URL_SCHEME_FILE) ||
  1306. (!partsBase->pszScheme) ||
  1307. (partsBase->pszScheme && StrCmpW(pszScheme, partsBase->pszScheme)))
  1308. {
  1309. // they are different schemes. DUMP partsBase.
  1310. ZeroMemory(partsBase, SIZEOF(URLPARTS));
  1311. }
  1312. }
  1313. else
  1314. {
  1315. partsOut->pszScheme = partsBase->pszScheme;
  1316. partsOut->eScheme = partsBase->eScheme;
  1317. partsOut->dwFlags |= (partsBase->dwFlags & UPF_SCHEME_MASK);
  1318. }
  1319. }
  1320. PRIVATE VOID
  1321. BlendServer(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
  1322. {
  1323. ASSERT(partsUrl && partsBase && partsOut);
  1324. //
  1325. // if we have different hosts then everything but the pszAccess is DUMPED
  1326. //
  1327. if(partsUrl->pszServer)
  1328. {
  1329. partsOut->pszServer = partsUrl->pszServer;
  1330. // NOTUSED partsOut->dwFlags |= (partsUrl->dwFlags & UPF_SERVER_MASK);
  1331. if ((partsBase->pszServer && StrCmpW(partsUrl->pszServer, partsBase->pszServer)))
  1332. {
  1333. // they are different Servers. DUMP partsBase.
  1334. ZeroMemory(partsBase, SIZEOF(URLPARTS));
  1335. }
  1336. }
  1337. else
  1338. {
  1339. partsOut->pszServer = partsBase->pszServer;
  1340. // NOTUSED partsOut->dwFlags |= (partsBase->dwFlags & UPF_SERVER_MASK);
  1341. }
  1342. }
  1343. PRIVATE VOID
  1344. BlendPath(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
  1345. {
  1346. ASSERT(partsUrl && partsBase && partsOut);
  1347. if (partsUrl->dwFlags & UPF_SEG_ABSOLUTE)
  1348. {
  1349. if((partsBase->dwFlags & UPF_SEG_LOCKFIRST) &&
  1350. !(partsUrl->dwFlags & UPF_SEG_LOCKFIRST))
  1351. {
  1352. // this keeps the drive letters when necessary
  1353. partsOut->pszSegments = partsBase->pszSegments;
  1354. partsOut->cSegments = 1; // only keep the first segment
  1355. partsOut->dwFlags |= (partsBase->dwFlags & UPF_SEG_MASK) ;
  1356. partsOut->pszExtraSegs = partsUrl->pszSegments;
  1357. partsOut->cExtraSegs = partsUrl->cSegments;
  1358. partsOut->dwFlags |= (partsUrl->dwFlags & UPF_EXSEG_MASK);
  1359. }
  1360. else
  1361. {
  1362. // just use the absolute path
  1363. partsOut->pszSegments = partsUrl->pszSegments;
  1364. partsOut->cSegments = partsUrl->cSegments;
  1365. partsOut->dwFlags |= (partsUrl->dwFlags & (UPF_SEG_MASK |UPF_EXSEG_MASK) );
  1366. }
  1367. ZeroMemory(partsBase, SIZEOF(URLPARTS));
  1368. }
  1369. else if ((partsBase->dwFlags & UPF_SEG_ABSOLUTE))
  1370. {
  1371. // Adopt path not name
  1372. partsOut->pszSegments = partsBase->pszSegments;
  1373. partsOut->cSegments = partsBase->cSegments;
  1374. partsOut->dwFlags |= (partsBase->dwFlags & UPF_SEG_MASK );
  1375. if(partsUrl->cSegments || partsUrl->dwFlags & UPF_SEG_EMPTYSEG)
  1376. {
  1377. //
  1378. // this a relative path that needs to be combined
  1379. //
  1380. partsOut->pszExtraSegs = partsUrl->pszSegments;
  1381. partsOut->cExtraSegs = partsUrl->cSegments;
  1382. partsOut->dwFlags |= (partsUrl->dwFlags & UPF_EXSEG_MASK );
  1383. if (!(partsBase->dwFlags & UPF_EXSEG_DIRECTORY))
  1384. {
  1385. //
  1386. // knock off the file name segment
  1387. // as long as the it isnt the first or the first is not locked
  1388. // or it isnt a dotdot. in the case of http://site/dir/, dir/ is
  1389. // not actually killed, only the NULL terminator following it is.
  1390. //
  1391. LPWSTR pszLast = LastLiveSegment(partsOut->pszSegments, partsOut->cSegments, partsOut->dwFlags & UPF_SEG_LOCKFIRST);
  1392. if(pszLast && !IsDotDot(pszLast))
  1393. {
  1394. if(partsUrl->dwFlags & UPF_SEG_EMPTYSEG)
  1395. partsOut->dwFlags |= UPF_EXSEG_DIRECTORY;
  1396. KILLSEG(pszLast);
  1397. }
  1398. }
  1399. }
  1400. else
  1401. partsOut->dwFlags |= (partsBase->dwFlags & UPF_EXSEG_MASK);
  1402. }
  1403. else if (partsUrl->cSegments)
  1404. {
  1405. partsOut->pszSegments = partsUrl->pszSegments;
  1406. partsOut->cSegments = partsUrl->cSegments;
  1407. partsOut->dwFlags |= (partsUrl->dwFlags & (UPF_SEG_MASK |UPF_EXSEG_MASK) );
  1408. }
  1409. else if (partsBase->cSegments)
  1410. {
  1411. partsOut->pszSegments = partsBase->pszSegments;
  1412. partsOut->cSegments = partsBase->cSegments;
  1413. partsOut->dwFlags |= (partsBase->dwFlags & (UPF_SEG_MASK |UPF_EXSEG_MASK) );
  1414. }
  1415. // regardless, we want to zero if we have relative segs
  1416. if (partsUrl->cSegments)
  1417. ZeroMemory(partsBase, SIZEOF(URLPARTS));
  1418. }
  1419. PRIVATE VOID
  1420. BlendQuery(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
  1421. {
  1422. if(partsUrl->pszQuery)
  1423. {
  1424. LPCWSTR pszQuery = partsOut->pszQuery = partsUrl->pszQuery;
  1425. // NOTUSED partsOut->dwFlags |= (partsUrl->dwFlags & UPF_Query_MASK);
  1426. if ((partsBase->pszQuery && StrCmpW(pszQuery, partsBase->pszQuery)))
  1427. {
  1428. // they are different Querys. DUMP partsBase.
  1429. ZeroMemory(partsBase, SIZEOF(URLPARTS));
  1430. }
  1431. }
  1432. else
  1433. {
  1434. partsOut->pszQuery = partsBase->pszQuery;
  1435. // NOTUSED partsOut->dwFlags |= (partsBase->dwFlags & UPF_Query_MASK);
  1436. }
  1437. }
  1438. PRIVATE VOID
  1439. BlendFragment(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
  1440. {
  1441. if(partsUrl->pszFragment || partsUrl->cSegments)
  1442. {
  1443. LPCWSTR pszFragment = partsOut->pszFragment = partsUrl->pszFragment;
  1444. // NOTUSED partsOut->dwFlags |= (partsUrl->dwFlags & UPF_Fragment_MASK);
  1445. if ((partsBase->pszFragment && StrCmpW(pszFragment, partsBase->pszFragment)))
  1446. {
  1447. // they are different Fragments. DUMP partsBase.
  1448. ZeroMemory(partsBase, SIZEOF(URLPARTS));
  1449. }
  1450. }
  1451. else
  1452. {
  1453. partsOut->pszFragment = partsBase->pszFragment;
  1454. // NOTUSED partsOut->dwFlags |= (partsBase->dwFlags & UPF_Fragment_MASK);
  1455. }
  1456. }
  1457. PRIVATE VOID
  1458. BlendParts(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
  1459. {
  1460. //
  1461. // partsUrl always takes priority over partsBase
  1462. //
  1463. ASSERT(partsUrl && partsBase && partsOut);
  1464. ZeroMemory(partsOut, SIZEOF(URLPARTS));
  1465. BlendScheme( partsUrl, partsBase, partsOut);
  1466. BlendServer( partsUrl, partsBase, partsOut);
  1467. BlendPath( partsUrl, partsBase, partsOut);
  1468. BlendQuery( partsUrl, partsBase, partsOut);
  1469. BlendFragment( partsUrl, partsBase, partsOut);
  1470. }
  1471. PRIVATE VOID
  1472. CanonServer(PURLPARTS parts)
  1473. {
  1474. //
  1475. // we only do stuff if this server is an internet style
  1476. // server. that way it uses FQDNs and IP port numbers
  1477. //
  1478. if (parts->pszServer && (parts->dwFlags & UPF_SCHEME_INTERNET))
  1479. {
  1480. LPWSTR pszName = StrRChrW(parts->pszServer, NULL, L'@');
  1481. if(!pszName)
  1482. pszName = parts->pszServer;
  1483. // this should just point to the FQDN:Port
  1484. CharLowerW(pszName);
  1485. //
  1486. // Ignore default port numbers, and trailing dots on FQDNs
  1487. // which will only cause identical adresses to look different
  1488. //
  1489. {
  1490. WCHAR *pch = StrChrW(pszName, COLON);
  1491. if (pch && parts->eScheme)
  1492. {
  1493. BOOL fIgnorePort = FALSE;
  1494. //
  1495. // FEATURE we should actually be getting this from
  1496. // the services file to find out the default protocol port
  1497. // but we dont think that most people will change them - zekel 17-Dec-96
  1498. //
  1499. switch(parts->eScheme)
  1500. {
  1501. case URL_SCHEME_HTTP:
  1502. if(StrCmpW(pch, L":80") == 0)
  1503. fIgnorePort = TRUE;
  1504. break;
  1505. case URL_SCHEME_FTP:
  1506. if(StrCmpW(pch, L":21") == 0)
  1507. fIgnorePort = TRUE;
  1508. break;
  1509. case URL_SCHEME_GOPHER:
  1510. if(StrCmpW(pch, L":70") == 0)
  1511. fIgnorePort = TRUE;
  1512. break;
  1513. case URL_SCHEME_HTTPS:
  1514. if(StrCmpW(pch, L":443") == 0)
  1515. fIgnorePort = TRUE;
  1516. break;
  1517. default:
  1518. break;
  1519. }
  1520. if(fIgnorePort)
  1521. TERMSTR(pch); // It is the default: ignore it
  1522. }
  1523. }
  1524. }
  1525. }
  1526. PRIVATE VOID
  1527. CanonCombineSegs(PURLPARTS parts)
  1528. {
  1529. ASSERT(parts);
  1530. ASSERT(parts->pszExtraSegs && parts->cExtraSegs);
  1531. LPWSTR pszLast = LastLiveSegment(parts->pszSegments, parts->cSegments, parts->dwFlags & UPF_SEG_LOCKFIRST);
  1532. LPWSTR pszExtra = parts->pszExtraSegs;
  1533. DWORD iExtra = 0;
  1534. DWORD cExtras = parts->cExtraSegs;
  1535. if(!IsLiveSegment(pszExtra))
  1536. pszExtra = NextLiveSegment(pszExtra, &iExtra, cExtras);
  1537. while(pszExtra && IsDotDot(pszExtra))
  1538. {
  1539. if (pszLast)
  1540. KILLSEG(pszLast);
  1541. KILLSEG(pszExtra);
  1542. pszLast = LastLiveSegment(parts->pszSegments, parts->cSegments, parts->dwFlags & UPF_SEG_LOCKFIRST);
  1543. pszExtra = NextLiveSegment(pszExtra, &iExtra, cExtras);
  1544. }
  1545. }
  1546. PRIVATE VOID
  1547. CanonSegments(LPWSTR pszSeg,
  1548. DWORD cSegs,
  1549. BOOL fLockFirst)
  1550. {
  1551. DWORD iSeg = 0;
  1552. LPWSTR pszLastSeg = NULL;
  1553. LPWSTR pszFirstSeg = pszSeg;
  1554. BOOL fLastIsFirst = TRUE;
  1555. BOOL fFirstSeg = TRUE;
  1556. ASSERT (pszSeg && cSegs);
  1557. pszSeg = FirstLiveSegment(pszSeg, &iSeg, cSegs);
  1558. while (pszSeg)
  1559. {
  1560. if(IsDot(pszSeg))
  1561. {
  1562. // if it is just a "." we can discard the segment
  1563. KILLSEG(pszSeg);
  1564. }
  1565. else if(IsDotDot(pszSeg))
  1566. {
  1567. // if it is ".." then we discard it and the last seg
  1568. //
  1569. // if we are at the first (root) or
  1570. // the last is the root and it is locked
  1571. // then we dont want to do anything
  1572. //
  1573. if(pszLastSeg && !IsDotDot(pszLastSeg) && !(fLastIsFirst && fLockFirst))
  1574. {
  1575. KILLSEG(pszLastSeg);
  1576. pszLastSeg = NULL;
  1577. KILLSEG(pszSeg);
  1578. }
  1579. }
  1580. if(IsLiveSegment(pszSeg))
  1581. {
  1582. if(!pszLastSeg && fFirstSeg)
  1583. fLastIsFirst = TRUE;
  1584. else
  1585. fLastIsFirst = FALSE;
  1586. pszLastSeg = pszSeg;
  1587. fFirstSeg = FALSE;
  1588. }
  1589. else
  1590. {
  1591. pszLastSeg = LastLiveSegment(pszFirstSeg, iSeg, fLockFirst);
  1592. }
  1593. pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs);
  1594. }
  1595. }
  1596. PRIVATE VOID
  1597. CanonPath(PURLPARTS parts)
  1598. {
  1599. ASSERT(parts);
  1600. if(parts->cSegments)
  1601. CanonSegments(parts->pszSegments, parts->cSegments, (parts->dwFlags & UPF_SEG_LOCKFIRST));
  1602. if(parts->cExtraSegs)
  1603. CanonSegments(parts->pszExtraSegs, parts->cExtraSegs, FALSE);
  1604. if(parts->cExtraSegs)
  1605. CanonCombineSegs(parts);
  1606. }
  1607. PRIVATE VOID
  1608. CanonParts(PURLPARTS parts)
  1609. {
  1610. ASSERT(parts);
  1611. //CanonScheme(parts);
  1612. CanonServer(parts);
  1613. CanonPath(parts);
  1614. //CanonQuery(parts);
  1615. //CanonFragment(parts);
  1616. }
  1617. PRIVATE HRESULT
  1618. BuildScheme(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
  1619. {
  1620. HRESULT hr = S_OK;
  1621. ASSERT(parts && pstr);
  1622. if(parts->pszScheme)
  1623. {
  1624. hr = pstr->Append(parts->pszScheme);
  1625. if(SUCCEEDED(hr))
  1626. hr = pstr->Append(COLON);
  1627. }
  1628. return hr;
  1629. }
  1630. PRIVATE HRESULT
  1631. BuildServer(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
  1632. {
  1633. HRESULT hr = S_OK;
  1634. ASSERT(parts && pstr);
  1635. switch(parts->eScheme)
  1636. {
  1637. case URL_SCHEME_MK:
  1638. // CraigC's "mk:" has no // but acts like it does
  1639. break;
  1640. case URL_SCHEME_FILE:
  1641. if ((dwFlags & URL_WININET_COMPATIBILITY) || (dwFlags & URL_FILE_USE_PATHURL))
  1642. {
  1643. if(parts->pszServer && *parts->pszServer)
  1644. hr = pstr->Append(L"////");
  1645. else if (parts->pszSegments && IsDrive(parts->pszSegments))
  1646. hr = pstr->Append(SLASH);
  1647. else if (parts->dwFlags & UPF_SEG_ABSOLUTE)
  1648. hr = pstr->Append(L"//");
  1649. }
  1650. else if (parts->dwFlags & UPF_SEG_ABSOLUTE)
  1651. hr = pstr->Append(L"//");
  1652. break;
  1653. default:
  1654. if(parts->pszServer && SUCCEEDED(hr))
  1655. hr = pstr->Append(L"//");
  1656. break;
  1657. }
  1658. if(parts->pszServer && SUCCEEDED(hr))
  1659. hr = pstr->Append(parts->pszServer);
  1660. return hr;
  1661. }
  1662. PRIVATE HRESULT
  1663. BuildSegments(LPWSTR pszSeg, DWORD cSegs, PSHSTRW pstr, BOOL fRoot, BOOL *pfSlashLast)
  1664. {
  1665. DWORD iSeg = 0;
  1666. HRESULT hr = S_FALSE;
  1667. *pfSlashLast = FALSE;
  1668. ASSERT(pszSeg && pstr);
  1669. pszSeg = FirstLiveSegment(pszSeg, &iSeg, cSegs);
  1670. if(!fRoot && pszSeg)
  1671. {
  1672. hr = pstr->Append(pszSeg);
  1673. if(SUCCEEDED(hr))
  1674. pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs);
  1675. else
  1676. pszSeg = NULL;
  1677. }
  1678. while (pszSeg)
  1679. {
  1680. hr = pstr->Append(SLASH);
  1681. if(SUCCEEDED(hr) && *pszSeg)
  1682. {
  1683. hr = pstr->Append(pszSeg);
  1684. *pfSlashLast = FALSE;
  1685. }
  1686. else
  1687. *pfSlashLast = TRUE;
  1688. if(SUCCEEDED(hr))
  1689. pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs);
  1690. else
  1691. break;
  1692. }
  1693. return hr;
  1694. }
  1695. PRIVATE HRESULT
  1696. BuildPath(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
  1697. {
  1698. HRESULT hr = S_OK;
  1699. BOOL fSlashLast = FALSE;
  1700. DWORD iSeg;
  1701. LPWSTR pszSegFirst = NULL;
  1702. ASSERT(parts && pstr);
  1703. if(parts->cSegments)
  1704. {
  1705. hr = BuildSegments(parts->pszSegments, parts->cSegments, pstr, parts->dwFlags & UPF_SEG_ABSOLUTE, &fSlashLast);
  1706. if (fSlashLast)
  1707. pstr->Append(SLASH);
  1708. }
  1709. if(SUCCEEDED(hr) && parts->cExtraSegs)
  1710. {
  1711. BOOL f = fSlashLast;
  1712. hr = BuildSegments(parts->pszExtraSegs, parts->cExtraSegs, pstr, !fSlashLast, &fSlashLast);
  1713. if (fSlashLast)
  1714. pstr->Append(SLASH);
  1715. if (hr == S_FALSE)
  1716. fSlashLast = f;
  1717. }
  1718. // trailing slash on a server name for IIS
  1719. if( !fSlashLast &&
  1720. (
  1721. (parts->dwFlags & UPF_EXSEG_DIRECTORY) ||
  1722. // if this is just a server name by itself
  1723. (!FirstLiveSegment(parts->pszSegments, &iSeg, parts->cSegments) &&
  1724. !FirstLiveSegment(parts->pszExtraSegs, &iSeg, parts->cExtraSegs) &&
  1725. parts->dwFlags & UPF_SEG_ABSOLUTE)
  1726. )
  1727. )
  1728. {
  1729. hr = pstr->Append(SLASH);
  1730. }
  1731. return hr;
  1732. }
  1733. PRIVATE HRESULT
  1734. BuildQuery(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
  1735. {
  1736. HRESULT hr = S_OK;
  1737. ASSERT(parts && pstr);
  1738. if(parts->pszQuery)
  1739. {
  1740. hr = pstr->Append(QUERY);
  1741. if(SUCCEEDED(hr))
  1742. hr = pstr->Append(parts->pszQuery);
  1743. }
  1744. return hr;
  1745. }
  1746. PRIVATE HRESULT
  1747. BuildFragment(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
  1748. {
  1749. HRESULT hr = S_OK;
  1750. ASSERT(parts && pstr);
  1751. if(parts->pszFragment)
  1752. {
  1753. hr = pstr->Append(POUND);
  1754. if(SUCCEEDED(hr))
  1755. hr = pstr->Append(parts->pszFragment);
  1756. }
  1757. return hr;
  1758. }
  1759. PRIVATE HRESULT
  1760. BuildUrl(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
  1761. {
  1762. HRESULT hr;
  1763. ASSERT(parts && pstr);
  1764. if(
  1765. (SUCCEEDED(hr = BuildScheme(parts, dwFlags, pstr))) &&
  1766. (SUCCEEDED(hr = BuildServer(parts, dwFlags, pstr))) &&
  1767. (SUCCEEDED(hr = BuildPath(parts, dwFlags, pstr))) &&
  1768. (SUCCEEDED(hr = BuildQuery(parts, dwFlags, pstr)))
  1769. )
  1770. hr = BuildFragment(parts, dwFlags, pstr);
  1771. return hr;
  1772. }
  1773. /*+++
  1774. SHUrlEscape()
  1775. Escapes an URL
  1776. right now, i am only escaping stuff in the Path part of the URL
  1777. Parameters
  1778. IN -
  1779. pszUrl URL to examine
  1780. pstrOut SHSTR destination
  1781. dwFlags the relevant URL_* flags,
  1782. Returns
  1783. HRESULT -
  1784. SUCCESS S_OK
  1785. ERROR only E_OUTOFMEMORY
  1786. Helper Routines
  1787. Escape*(part) each part gets its own escape routine (ie EscapeScheme)
  1788. EscapeSpaces will only escape spaces (WININET compatibility mostly)
  1789. EscapeSegmentsGetNeededSize gets the required size of destination buffer for all path segments
  1790. EscapeLiveSegment does the work of escaping each path segment
  1791. ---*/
  1792. PRIVATE HRESULT
  1793. EscapeSpaces(LPCWSTR psz, PSHSTRW pstr, DWORD dwFlags)
  1794. {
  1795. HRESULT hr = S_OK;
  1796. LPCWSTR pch;
  1797. DWORD cSpaces = 0;
  1798. ASSERT(psz && pstr);
  1799. pstr->Reset();
  1800. for (pch = psz; *pch; pch++)
  1801. {
  1802. if (*pch == SPC)
  1803. cSpaces++;
  1804. }
  1805. if(cSpaces)
  1806. {
  1807. hr = pstr->SetSize(lstrlenW(psz) + cSpaces * 2 + 1);
  1808. if(SUCCEEDED(hr))
  1809. {
  1810. LPWSTR pchOut = pstr->GetInplaceStr();
  1811. for (pch = psz; *pch; pch++)
  1812. {
  1813. if ((*pch == POUND || *pch == QUERY) && (dwFlags & URL_DONT_ESCAPE_EXTRA_INFO))
  1814. {
  1815. StrCpyW(pchOut, pch);
  1816. pchOut += lstrlenW(pchOut);
  1817. break;
  1818. }
  1819. if (*pch == SPC)
  1820. {
  1821. *pchOut++ = HEX_ESCAPE;
  1822. *pchOut++ = L'2';
  1823. *pchOut++ = L'0';
  1824. }
  1825. else
  1826. {
  1827. *pchOut++ = *pch;
  1828. }
  1829. }
  1830. TERMSTR(pchOut);
  1831. }
  1832. }
  1833. else
  1834. hr = pstr->SetStr(psz);
  1835. return hr;
  1836. }
  1837. inline PRIVATE HRESULT
  1838. EscapeScheme(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
  1839. {
  1840. ASSERT(partsUrl && partsOut);
  1841. partsOut->pszScheme = partsUrl->pszScheme;
  1842. partsOut->eScheme = partsUrl->eScheme;
  1843. return S_OK;
  1844. }
  1845. inline PRIVATE HRESULT
  1846. EscapeServer(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
  1847. {
  1848. ASSERT(partsUrl && partsOut);
  1849. partsOut->pszServer = partsUrl->pszServer;
  1850. return S_OK;
  1851. }
  1852. inline PRIVATE HRESULT
  1853. EscapeQuery(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
  1854. {
  1855. ASSERT(partsUrl && partsOut);
  1856. partsOut->pszQuery = partsUrl->pszQuery;
  1857. return S_OK;
  1858. }
  1859. inline PRIVATE HRESULT
  1860. EscapeFragment(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
  1861. {
  1862. ASSERT(partsUrl && partsOut);
  1863. partsOut->pszFragment = partsUrl->pszFragment;
  1864. return S_OK;
  1865. }
  1866. PRIVATE BOOL
  1867. GetEscapeStringSize(LPWSTR psz, DWORD dwFlags, LPDWORD pcch)
  1868. {
  1869. BOOL fResize = FALSE;
  1870. ASSERT(psz);
  1871. ASSERT(pcch);
  1872. for (*pcch = 0; *psz; psz++)
  1873. {
  1874. (*pcch)++;
  1875. if(!IsSafePathChar(*psz) ||
  1876. ((dwFlags & URL_ESCAPE_PERCENT) && (*psz == HEX_ESCAPE)))
  1877. {
  1878. fResize = TRUE;
  1879. *pcch += 2;
  1880. }
  1881. }
  1882. // for the NULL term
  1883. (*pcch)++;
  1884. return fResize;
  1885. }
  1886. PRIVATE DWORD
  1887. EscapeSegmentsGetNeededSize(LPWSTR pszSegments, DWORD cSegs, DWORD dwFlags)
  1888. {
  1889. DWORD cchNeeded = 0;
  1890. BOOL fResize = FALSE;
  1891. LPWSTR pszSeg;
  1892. DWORD iSeg;
  1893. ASSERT(pszSegments && cSegs);
  1894. pszSeg = FirstLiveSegment(pszSegments, &iSeg, cSegs);
  1895. while (IsLiveSegment(pszSeg))
  1896. {
  1897. DWORD cch;
  1898. if(GetEscapeStringSize(pszSeg, dwFlags, &cch))
  1899. fResize = TRUE;
  1900. cchNeeded += cch;
  1901. pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs);
  1902. }
  1903. return fResize ? cchNeeded : 0;
  1904. }
  1905. PRIVATE VOID
  1906. EscapeString(LPCWSTR pszSeg, DWORD dwFlags, LPWSTR *ppchOut)
  1907. {
  1908. LPWSTR pchIn; // This pointer has been trusted to not modify it's contents, just iterate.
  1909. LPWSTR pchOut = *ppchOut;
  1910. WCHAR ch;
  1911. for (pchIn = (LPWSTR)pszSeg; *pchIn; pchIn++)
  1912. {
  1913. ch = *pchIn;
  1914. if (!IsSafePathChar(ch) ||
  1915. ((dwFlags & URL_ESCAPE_PERCENT) && (ch == HEX_ESCAPE)))
  1916. {
  1917. *pchOut++ = HEX_ESCAPE;
  1918. *pchOut++ = hex[(ch >> 4) & 15];
  1919. *pchOut++ = hex[ch & 15];
  1920. }
  1921. else
  1922. *pchOut++ = *pchIn;
  1923. }
  1924. TERMSTR(pchOut);
  1925. // move past the terminator
  1926. pchOut++;
  1927. *ppchOut = pchOut;
  1928. }
  1929. PRIVATE HRESULT
  1930. EscapeSegments(LPWSTR pszSegments, DWORD cSegs, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
  1931. {
  1932. DWORD cchNeeded;
  1933. HRESULT hr = S_OK;
  1934. ASSERT(pszSegments && cSegs && partsOut && pstr);
  1935. cchNeeded = EscapeSegmentsGetNeededSize(pszSegments, cSegs, dwFlags);
  1936. if(cchNeeded)
  1937. {
  1938. ASSERT(pstr);
  1939. hr = pstr->SetSize(cchNeeded);
  1940. if(SUCCEEDED(hr))
  1941. {
  1942. LPWSTR pchOut = pstr->GetInplaceStr();
  1943. LPWSTR pszSeg;
  1944. DWORD iSeg;
  1945. partsOut->pszSegments = pchOut;
  1946. partsOut->cSegments = 0;
  1947. pszSeg = FirstLiveSegment(pszSegments, &iSeg, cSegs);
  1948. while (IsLiveSegment(pszSeg))
  1949. {
  1950. EscapeString(pszSeg, dwFlags, &pchOut);
  1951. partsOut->cSegments++;
  1952. pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs);
  1953. }
  1954. }
  1955. }
  1956. else
  1957. {
  1958. partsOut->cSegments = cSegs;
  1959. partsOut->pszSegments = pszSegments;
  1960. }
  1961. return hr;
  1962. }
  1963. PRIVATE HRESULT
  1964. EscapePath(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
  1965. {
  1966. HRESULT hr = S_OK;
  1967. ASSERT(partsUrl && partsOut && pstr);
  1968. if(partsUrl->cSegments)
  1969. {
  1970. hr = EscapeSegments(partsUrl->pszSegments, partsUrl->cSegments, dwFlags, partsOut, pstr);
  1971. }
  1972. else
  1973. {
  1974. partsOut->cSegments = 0;
  1975. partsOut->pszSegments = NULL;
  1976. }
  1977. return hr;
  1978. }
  1979. HRESULT
  1980. SHUrlEscape (LPCWSTR pszUrl,
  1981. PSHSTRW pstrOut,
  1982. DWORD dwFlags)
  1983. {
  1984. #ifdef TESTING_SPACES_ONLY
  1985. return EscapeSpaces(pszUrl, pstrOut, dwFlags);
  1986. #else //TESTING_SPACES_ONLY
  1987. SHSTRW strUrl;
  1988. HRESULT hr;
  1989. ASSERT(pszUrl && pstrOut);
  1990. if(!pszUrl || !pstrOut)
  1991. return E_INVALIDARG;
  1992. //
  1993. // EscapeSpaces is remarkably poor,
  1994. // but so is this kind of functionality...
  1995. // it doesnt do any kind of real parsing, it
  1996. // only looks for spaces and escapes them...
  1997. //
  1998. if(dwFlags & URL_ESCAPE_SPACES_ONLY)
  1999. return EscapeSpaces(pszUrl, pstrOut, dwFlags);
  2000. // We are just passed a segment so we only want to
  2001. // escape that and nothing else. Don't look for
  2002. // URL pieces.
  2003. if(dwFlags & URL_ESCAPE_SEGMENT_ONLY)
  2004. {
  2005. URLPARTS partsOut;
  2006. SHSTRW strTemp;
  2007. EscapeSegments((LPWSTR)pszUrl, 1, dwFlags, &partsOut, &strTemp);
  2008. pstrOut->SetStr(partsOut.pszSegments);
  2009. return S_OK;
  2010. }
  2011. pstrOut->Reset();
  2012. hr = strUrl.SetStr(pszUrl);
  2013. if(SUCCEEDED(hr))
  2014. {
  2015. URLPARTS partsUrl, partsOut;
  2016. SHSTRW strPath;
  2017. BreakUrl(strUrl.GetInplaceStr(), &partsUrl);
  2018. ZeroMemory(&partsOut, SIZEOF(URLPARTS));
  2019. //
  2020. // NOTE the only function here that is really active right now is the EscapePath
  2021. // if some other part needs to be escaped, then add a new SHSTR in the 4th param
  2022. // and change the appropriate subroutine
  2023. //
  2024. if(
  2025. (SUCCEEDED(hr = EscapeScheme(&partsUrl, dwFlags, &partsOut, NULL)))
  2026. && (SUCCEEDED(hr = EscapeServer(&partsUrl, dwFlags, &partsOut, NULL)))
  2027. && (SUCCEEDED(hr = EscapePath(&partsUrl, dwFlags, &partsOut, &strPath)))
  2028. && (SUCCEEDED(hr = EscapeQuery(&partsUrl, dwFlags, &partsOut, NULL)))
  2029. && (SUCCEEDED(hr = EscapeFragment(&partsUrl, dwFlags, &partsOut, NULL)))
  2030. )
  2031. {
  2032. partsOut.dwFlags = partsUrl.dwFlags;
  2033. hr = BuildUrl(&partsOut, dwFlags, pstrOut);
  2034. }
  2035. }
  2036. else
  2037. hr = E_OUTOFMEMORY;
  2038. return hr;
  2039. #endif //TESTING_SPACES_ONLY
  2040. }
  2041. /*+++
  2042. SHUrlUnescape()
  2043. Unescapes a string in place. this is ok because
  2044. it should never grow
  2045. Parameters
  2046. IN -
  2047. psz string to unescape inplace
  2048. dwFlags the relevant URL_* flags,
  2049. Returns
  2050. HRESULT -
  2051. SUCCESS S_OK
  2052. ERROR DOESNT error right now
  2053. Helper Routines
  2054. HexToWord takes a hexdigit and returns WORD with the right number or -1
  2055. IsEscapedChar looks at a ptr for "%XX" where X is a hexdigit
  2056. TranslateEscapedChar translates "%XX" to an 8 bit char
  2057. ---*/
  2058. PRIVATE WORD
  2059. HexToWord(WCHAR ch)
  2060. {
  2061. if(ch >= TEXT('0') && ch <= TEXT('9'))
  2062. return (WORD) ch - TEXT('0');
  2063. if(ch >= TEXT('A') && ch <= TEXT('F'))
  2064. return (WORD) ch - TEXT('A') + 10;
  2065. if(ch >= TEXT('a') && ch <= TEXT('f'))
  2066. return (WORD) ch - TEXT('a') + 10;
  2067. ASSERT(FALSE); //we have tried to use a non-hex number
  2068. return (WORD) -1;
  2069. }
  2070. PRIVATE BOOL inline
  2071. IsEscapedOctetW(LPCWSTR pch)
  2072. {
  2073. return (pch[0] == HEX_ESCAPE && IsHex(pch[1]) && IsHex(pch[2])) ? TRUE : FALSE;
  2074. }
  2075. PRIVATE BOOL inline
  2076. IsEscapedOctetA(LPCSTR pch)
  2077. {
  2078. return (pch[0] == HEX_ESCAPE_A && IsHex((WCHAR)pch[1]) && IsHex((WCHAR)pch[2])) ? TRUE : FALSE;
  2079. }
  2080. PRIVATE WCHAR
  2081. TranslateEscapedOctetW(LPCWSTR pch)
  2082. {
  2083. WCHAR ch;
  2084. ASSERT(IsEscapedOctetW(pch));
  2085. pch++;
  2086. ch = (WCHAR) HexToWord(*pch++) * 16; // hi nibble
  2087. ch += HexToWord(*pch); // lo nibble
  2088. return ch;
  2089. }
  2090. PRIVATE CHAR
  2091. TranslateEscapedOctetA(LPCSTR pch)
  2092. {
  2093. CHAR ch;
  2094. ASSERT(IsEscapedOctetA(pch));
  2095. pch++;
  2096. ch = (CHAR) HexToWord(*pch++) * 16; // hi nibble
  2097. ch += HexToWord(*pch); // lo nibble
  2098. return ch;
  2099. }
  2100. HRESULT SHUrlUnescapeA(LPSTR psz, DWORD dwFlags)
  2101. {
  2102. CHAR *pchSrc = psz;
  2103. CHAR *pchDst = psz;
  2104. while (*pchSrc)
  2105. {
  2106. if ((*pchSrc == POUND || *pchSrc == QUERY) && (dwFlags & URL_DONT_ESCAPE_EXTRA_INFO))
  2107. {
  2108. StrCpyA(pchDst, pchSrc);
  2109. pchDst += lstrlenA(pchDst);
  2110. break;
  2111. }
  2112. if (IsEscapedOctetA(pchSrc))
  2113. {
  2114. CHAR ch = TranslateEscapedOctetA(pchSrc);
  2115. *pchDst++ = ch;
  2116. pchSrc += 3; // enuff for "%XX"
  2117. }
  2118. else
  2119. {
  2120. *pchDst++ = *pchSrc++;
  2121. }
  2122. }
  2123. TERMSTR(pchDst);
  2124. return S_OK;
  2125. }
  2126. HRESULT SHUrlUnescapeW(LPWSTR psz, DWORD dwFlags)
  2127. {
  2128. WCHAR *pchSrc = psz;
  2129. WCHAR *pchDst = psz;
  2130. while (*pchSrc)
  2131. {
  2132. if ((*pchSrc == POUND || *pchSrc == QUERY) && (dwFlags & URL_DONT_ESCAPE_EXTRA_INFO))
  2133. {
  2134. StrCpyW(pchDst, pchSrc);
  2135. pchDst += lstrlenW(pchDst);
  2136. break;
  2137. }
  2138. if (IsEscapedOctetW(pchSrc))
  2139. {
  2140. WCHAR ch = TranslateEscapedOctetW(pchSrc);
  2141. *pchDst++ = ch;
  2142. pchSrc += 3; // enuff for "%XX"
  2143. }
  2144. else
  2145. {
  2146. *pchDst++ = *pchSrc++;
  2147. }
  2148. }
  2149. TERMSTR(pchDst);
  2150. return S_OK;
  2151. }
  2152. PRIVATE HRESULT
  2153. BuildDosPath(PURLPARTS parts, PSHSTRW pstrOut, DWORD dwFlags)
  2154. {
  2155. HRESULT hr;
  2156. // this will disable a preceding slash when there is a drive
  2157. if(parts->pszSegments && IsDrive(parts->pszSegments))
  2158. parts->dwFlags = (parts->dwFlags & ~UPF_SEG_ABSOLUTE);
  2159. // if there is a zero length server then
  2160. // we skip building it
  2161. if(parts->pszServer && !*parts->pszServer)
  2162. parts->pszServer = NULL;
  2163. // this prevents all the special file goo checking
  2164. parts->eScheme = URL_SCHEME_UNKNOWN;
  2165. //
  2166. // then go ahead and put the path together
  2167. if( (SUCCEEDED(hr = BuildServer(parts, dwFlags, pstrOut))) &&
  2168. (!parts->cSegments || SUCCEEDED(hr = BuildPath(parts, dwFlags, pstrOut)))
  2169. )
  2170. {
  2171. // then decode it cuz paths arent escaped
  2172. if (IsFlagSet(dwFlags, URL_FILE_USE_PATHURL))
  2173. WininetFixFileSlashes(pstrOut->GetInplaceStr());
  2174. else
  2175. #ifndef UNIX
  2176. ConvertChar(pstrOut->GetInplaceStr(), SLASH, WHACK, TRUE);
  2177. #else
  2178. ConvertChar(pstrOut->GetInplaceStr(), WHACK, SLASH, TRUE);
  2179. #endif
  2180. if(IsFlagClear(parts->dwFlags, UPF_FILEISPATHURL))
  2181. SHUrlUnescapeW(pstrOut->GetInplaceStr(), dwFlags);
  2182. if(IsDriveUrl(*pstrOut))
  2183. {
  2184. LPWSTR pszTemp = pstrOut->GetInplaceStr();
  2185. pszTemp[1] = COLON;
  2186. }
  2187. }
  2188. return hr;
  2189. }
  2190. HRESULT
  2191. SHPathCreateFromUrl(LPCWSTR pszUrl, PSHSTRW pstrOut, DWORD dwFlags)
  2192. {
  2193. HRESULT hr;
  2194. SHSTRW strUrl;
  2195. ASSERT(pszUrl && pstrOut);
  2196. pstrOut->Reset();
  2197. hr = strUrl.SetStr(pszUrl);
  2198. if(SUCCEEDED(hr))
  2199. {
  2200. URLPARTS partsUrl;
  2201. // first we need to break it open
  2202. BreakUrl(strUrl.GetInplaceStr(), &partsUrl);
  2203. // then we make sure it is a file:
  2204. if(partsUrl.eScheme == URL_SCHEME_FILE)
  2205. {
  2206. hr = BuildDosPath(&partsUrl, pstrOut, dwFlags);
  2207. }
  2208. else
  2209. hr = E_INVALIDARG;
  2210. }
  2211. return hr;
  2212. }
  2213. HRESULT
  2214. SHUrlCreateFromPath(LPCWSTR pszPath, PSHSTRW pstrOut, DWORD dwFlags)
  2215. {
  2216. HRESULT hr;
  2217. SHSTRW strPath;
  2218. ASSERT(pszPath && pstrOut);
  2219. if(PathIsURLW(pszPath))
  2220. {
  2221. if(SUCCEEDED(hr = pstrOut->SetStr(pszPath)))
  2222. return S_FALSE;
  2223. else
  2224. return hr;
  2225. }
  2226. pstrOut->Reset();
  2227. hr = strPath.SetStr(pszPath);
  2228. TrimAndStripInsignificantWhite(strPath.GetInplaceStr());
  2229. if(SUCCEEDED(hr))
  2230. {
  2231. URLPARTS partsIn, partsOut;
  2232. SHSTRW strEscapedPath, strEscapedServer;
  2233. LPWSTR pch = strPath.GetInplaceStr();
  2234. ZeroMemory(&partsIn, SIZEOF(URLPARTS));
  2235. partsIn.pszScheme = (LPWSTR)c_szFileScheme;
  2236. partsIn.eScheme = URL_SCHEME_FILE;
  2237. partsIn.dwFlags = UPF_SCHEME_CONVERT;
  2238. // first break the path
  2239. BreakFragment(&pch, &partsIn);
  2240. BreakServer(&pch, &partsIn, TRUE);
  2241. BreakPath(&pch, &partsIn);
  2242. partsOut = partsIn;
  2243. // then escape the path if we arent using path URLs
  2244. if (IsFlagClear(dwFlags, URL_FILE_USE_PATHURL))
  2245. {
  2246. hr = EscapePath(&partsIn, dwFlags | URL_ESCAPE_PERCENT, &partsOut, &strEscapedPath);
  2247. if(SUCCEEDED(hr) && partsOut.pszServer)
  2248. {
  2249. //
  2250. // i am treating the pszServer exactly like a path segment
  2251. //
  2252. DWORD cchNeeded;
  2253. if(GetEscapeStringSize(partsOut.pszServer, dwFlags | URL_ESCAPE_PERCENT, &cchNeeded) &&
  2254. SUCCEEDED(hr = strEscapedServer.SetSize(cchNeeded)))
  2255. {
  2256. pch = strEscapedServer.GetInplaceStr();
  2257. EscapeString(partsOut.pszServer, dwFlags | URL_ESCAPE_PERCENT, &pch);
  2258. partsOut.pszServer = strEscapedServer.GetInplaceStr();
  2259. }
  2260. }
  2261. }
  2262. if(!partsOut.pszServer && IsFlagSet(partsOut.dwFlags, UPF_SEG_ABSOLUTE))
  2263. partsOut.pszServer = L"";
  2264. // then build the URL
  2265. if(SUCCEEDED(hr))
  2266. {
  2267. if(URL_SCHEME_FILE == partsOut.eScheme && IsFlagSet(dwFlags, URL_FILE_USE_PATHURL))
  2268. {
  2269. if (SUCCEEDED(hr = pstrOut->SetStr(c_szFileSchemeString)))
  2270. hr = BuildDosPath(&partsOut, pstrOut, dwFlags);
  2271. }
  2272. else
  2273. hr = BuildUrl(&partsOut, dwFlags, pstrOut);
  2274. }
  2275. if (SUCCEEDED(hr) && (IsFlagSet(dwFlags, URL_WININET_COMPATIBILITY)))
  2276. WininetFixFileSlashes(pstrOut->GetInplaceStr());
  2277. }
  2278. return hr;
  2279. }
  2280. /*+++
  2281. SHUrlParse()
  2282. Canonicalize an URL
  2283. or Combine and Canonicalize two URLs
  2284. Parameters
  2285. IN -
  2286. pszBase the base or referring URL, may be NULL
  2287. pszUrl the relative URL
  2288. dwFlags the relevant URL_* flags,
  2289. Returns
  2290. HRESULT -
  2291. SUCCESS S_OK
  2292. ERROR appropriate error, usually just E_OUTOFMEMORY;
  2293. NOTE: pszUrl will always take precedence over pszBase.
  2294. ---*/
  2295. HRESULT SHUrlParse(LPCWSTR pszBase, LPCWSTR pszUrl, PSHSTRW pstrOut, DWORD dwFlags)
  2296. {
  2297. HRESULT hr = S_OK;
  2298. URLPARTS partsUrl, partsOut, partsBase;
  2299. SHSTRW strBase;
  2300. SHSTRW strUrl;
  2301. ASSERT(pszUrl);
  2302. ASSERT(pstrOut);
  2303. TraceMsgW(TF_URL | TF_FUNC, "entering SHUrlParse(%s, %s, 0x%X", pszBase,pszUrl ? pszUrl : L"NULL", dwFlags);
  2304. pstrOut->Reset();
  2305. //
  2306. // Don't bother parsing if all we have in an inter-page link as the
  2307. // pszUrl and no pszBase to parse
  2308. //
  2309. if (pszUrl[0] == POUND && (!pszBase || !*pszBase))
  2310. {
  2311. hr = pstrOut->SetStr(pszUrl);
  2312. goto quit;
  2313. }
  2314. //
  2315. // for Perf reasons we want to parse the relative url first.
  2316. // if it is an absolute URL, we need never look at the base.
  2317. //
  2318. hr = CopyUrlForParse(pszUrl, &strUrl, dwFlags);
  2319. if(FAILED(hr))
  2320. goto quit;
  2321. // -- Cybersitter compat ----
  2322. // Some bug fix broke the original parser. No time to go back and
  2323. // fix it, but since we know what to expect, we'll return this straight instead.
  2324. // Basically, when we canonicalize ://, we produce :///
  2325. if (!StrCmpW(strUrl, L"://"))
  2326. {
  2327. hr = pstrOut->SetStr(L":///");
  2328. goto quit;
  2329. }
  2330. //
  2331. // BreakUrls will decide if it is necessary to look at the relative
  2332. //
  2333. hr = BreakUrls(strUrl.GetInplaceStr(), &partsUrl, pszBase, &strBase, &partsBase, dwFlags);
  2334. if(FAILED(hr))
  2335. goto quit;
  2336. if(S_OK == hr) {
  2337. //
  2338. // this is where the real combination logic happens
  2339. // this first parts is the one that takes precedence
  2340. //
  2341. BlendParts(&partsUrl, &partsBase, &partsOut);
  2342. }
  2343. else
  2344. partsOut = partsUrl;
  2345. //
  2346. // we will now do the work of putting it together
  2347. // if these fail, it is because we are out of memory.
  2348. //
  2349. if (!(dwFlags & URL_DONT_SIMPLIFY))
  2350. CanonParts(&partsOut);
  2351. if(URL_SCHEME_FILE == partsOut.eScheme && IsFlagSet(dwFlags, URL_FILE_USE_PATHURL))
  2352. {
  2353. if (SUCCEEDED(hr = pstrOut->SetStr(c_szFileSchemeString)))
  2354. hr = BuildDosPath(&partsOut, pstrOut, dwFlags);
  2355. }
  2356. else
  2357. hr = BuildUrl(&partsOut, dwFlags, pstrOut);
  2358. if(SUCCEEDED(hr))
  2359. {
  2360. if (dwFlags & URL_UNESCAPE)
  2361. SHUrlUnescapeW(pstrOut->GetInplaceStr(), dwFlags);
  2362. if (dwFlags & URL_ESCAPE_SPACES_ONLY || dwFlags & URL_ESCAPE_UNSAFE)
  2363. {
  2364. //
  2365. // we are going to reuse strUrl here
  2366. //
  2367. hr = strUrl.SetStr(*pstrOut);
  2368. if(SUCCEEDED(hr))
  2369. hr = SHUrlEscape(strUrl, pstrOut, dwFlags);
  2370. }
  2371. }
  2372. if (SUCCEEDED(hr) &&
  2373. (IsFlagSet(dwFlags, URL_WININET_COMPATIBILITY)) &&
  2374. (partsOut.eScheme == URL_SCHEME_FILE))
  2375. WininetFixFileSlashes(pstrOut->GetInplaceStr());
  2376. quit:
  2377. if(FAILED(hr))
  2378. {
  2379. pstrOut->Reset();
  2380. TraceMsg(TF_URL | TF_FUNC, TEXT("FAILED SHUrlParse() hr = 0x%X\n"), hr);
  2381. }
  2382. else
  2383. TraceMsgW(TF_URL | TF_FUNC, "SUCCEEDED SHUrlParse() %s\n", (LPCWSTR)*pstrOut);
  2384. return hr;
  2385. }
  2386. typedef struct _LOGON {
  2387. LPWSTR pszUser;
  2388. LPWSTR pszPass;
  2389. LPWSTR pszHost;
  2390. LPWSTR pszPort;
  2391. } LOGON, *PLOGON;
  2392. PRIVATE void
  2393. BreakLogon(LPWSTR psz, PLOGON plo)
  2394. {
  2395. ASSERT(psz);
  2396. ASSERT(plo);
  2397. WCHAR *pch = StrChrW(psz, L'@');
  2398. if(pch)
  2399. {
  2400. TERMSTR(pch);
  2401. plo->pszHost = pch + 1;
  2402. plo->pszUser = psz;
  2403. pch = StrChrW(psz, COLON);
  2404. if (pch)
  2405. {
  2406. TERMSTR(pch);
  2407. plo->pszPass = pch + 1;
  2408. }
  2409. }
  2410. else
  2411. plo->pszHost = psz;
  2412. pch = StrChrW(plo->pszHost, COLON);
  2413. if (pch)
  2414. {
  2415. TERMSTR(pch);
  2416. plo->pszPort = pch + 1;
  2417. }
  2418. }
  2419. PRIVATE HRESULT
  2420. InternetGetPart(DWORD dwPart, PURLPARTS parts, PSHSTRW pstr, DWORD dwFlags)
  2421. {
  2422. HRESULT hr = E_FAIL;
  2423. if(parts->pszServer)
  2424. {
  2425. LOGON lo = {0};
  2426. BreakLogon(parts->pszServer, &lo);
  2427. switch (dwPart)
  2428. {
  2429. case URL_PART_HOSTNAME:
  2430. hr = pstr->Append(lo.pszHost);
  2431. break;
  2432. case URL_PART_USERNAME:
  2433. hr = pstr->Append(lo.pszUser);
  2434. break;
  2435. case URL_PART_PASSWORD:
  2436. hr = pstr->Append(lo.pszPass);
  2437. break;
  2438. case URL_PART_PORT:
  2439. hr = pstr->Append(lo.pszPort);
  2440. break;
  2441. default:
  2442. ASSERT(FALSE);
  2443. }
  2444. }
  2445. return hr;
  2446. }
  2447. PRIVATE HRESULT
  2448. SHUrlGetPart(PSHSTRW pstrIn, PSHSTRW pstrOut, DWORD dwPart, DWORD dwFlags)
  2449. {
  2450. ASSERT(pstrIn);
  2451. ASSERT(pstrOut);
  2452. ASSERT(dwPart);
  2453. HRESULT hr = S_OK;
  2454. URLPARTS parts;
  2455. BreakUrl(pstrIn->GetInplaceStr(), &parts);
  2456. if(dwFlags & URL_PARTFLAG_KEEPSCHEME)
  2457. {
  2458. hr = pstrOut->SetStr(parts.pszScheme);
  2459. if(SUCCEEDED(hr))
  2460. hr = pstrOut->Append(COLON);
  2461. }
  2462. else
  2463. pstrOut->Reset();
  2464. if(SUCCEEDED(hr))
  2465. {
  2466. switch (dwPart)
  2467. {
  2468. case URL_PART_SCHEME:
  2469. hr = pstrOut->SetStr(parts.pszScheme);
  2470. break;
  2471. case URL_PART_HOSTNAME:
  2472. if (parts.eScheme == URL_SCHEME_FILE)
  2473. {
  2474. hr = pstrOut->SetStr(parts.pszServer);
  2475. break;
  2476. }
  2477. // else fall through
  2478. case URL_PART_USERNAME:
  2479. case URL_PART_PASSWORD:
  2480. case URL_PART_PORT:
  2481. if(parts.dwFlags & UPF_SCHEME_INTERNET)
  2482. {
  2483. hr = InternetGetPart(dwPart, &parts, pstrOut, dwFlags);
  2484. }
  2485. else
  2486. hr = E_FAIL;
  2487. break;
  2488. case URL_PART_QUERY:
  2489. hr = pstrOut->SetStr(parts.pszQuery);
  2490. break;
  2491. default:
  2492. ASSERT(FALSE);
  2493. hr = E_UNEXPECTED;
  2494. }
  2495. }
  2496. return hr;
  2497. }
  2498. #define c_szURLPrefixesKey "Software\\Microsoft\\Windows\\CurrentVersion\\URL\\Prefixes"
  2499. const WCHAR c_szDefaultURLPrefixKey[] = L"Software\\Microsoft\\Windows\\CurrentVersion\\URL\\DefaultPrefix";
  2500. PRIVATE inline LPCWSTR SkipLeadingSlashes(LPCWSTR psz)
  2501. {
  2502. // Skip two leading slashes.
  2503. if (psz[0] == SLASH && psz[1] == SLASH)
  2504. psz += 2;
  2505. return psz;
  2506. }
  2507. PRIVATE HRESULT
  2508. UrlGuessScheme(LPCWSTR pszUrl, PSHSTRW pstr)
  2509. {
  2510. HRESULT hr = S_FALSE;
  2511. ASSERT(pszUrl && pstr);
  2512. HKEY hkeyPrefixes;
  2513. if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, c_szURLPrefixesKey, 0, KEY_QUERY_VALUE, &hkeyPrefixes)
  2514. == ERROR_SUCCESS)
  2515. {
  2516. DWORD dwiValue;
  2517. CHAR rgchValueName[MAX_PATH];
  2518. DWORD cchValueName = SIZECHARS(rgchValueName);
  2519. DWORD dwType;
  2520. CHAR rgchPrefix[MAX_PATH];
  2521. DWORD cbPrefix = SIZEOF(rgchPrefix);
  2522. // need to get past the initial two slashes if applicable
  2523. pszUrl = SkipLeadingSlashes(pszUrl);
  2524. for (dwiValue = 0;
  2525. RegEnumValueA(hkeyPrefixes, dwiValue, rgchValueName,
  2526. &cchValueName, NULL, &dwType, (PBYTE)rgchPrefix,
  2527. &cbPrefix) == ERROR_SUCCESS;
  2528. dwiValue++)
  2529. {
  2530. WCHAR wszValue[MAX_PATH];
  2531. MultiByteToWideChar(CP_ACP, 0, rgchValueName, -1, wszValue, ARRAYSIZE(wszValue));
  2532. // we check to make sure that we match and there is something more
  2533. if (!StrCmpNIW(pszUrl, wszValue, cchValueName) && pszUrl[cchValueName])
  2534. {
  2535. MultiByteToWideChar(CP_ACP, 0, rgchPrefix, -1, wszValue, ARRAYSIZE(wszValue));
  2536. if(SUCCEEDED(hr = pstr->SetStr(wszValue)))
  2537. hr = pstr->Append(pszUrl);
  2538. break;
  2539. }
  2540. cchValueName = SIZECHARS(rgchValueName);
  2541. cbPrefix = SIZEOF(rgchPrefix);
  2542. }
  2543. RegCloseKey(hkeyPrefixes);
  2544. }
  2545. return(hr);
  2546. }
  2547. /*----------------------------------------------------------
  2548. Purpose: Grabs the default URL prefix in the registry and applies
  2549. it to the given URL.
  2550. Returns: S_OK
  2551. S_FALSE if there is no default prefix
  2552. */
  2553. const WCHAR c_szDefaultScheme[] = L"http://";
  2554. HRESULT
  2555. UrlApplyDefaultScheme(
  2556. LPCWSTR pszUrl,
  2557. PSHSTRW pstr)
  2558. {
  2559. HRESULT hr = S_FALSE;
  2560. WCHAR szDef[MAX_PATH];
  2561. DWORD cbSize = SIZEOF(szDef);
  2562. ASSERT(pszUrl && pstr);
  2563. ASSERT(!PathIsURLW(pszUrl));
  2564. DWORD dwType;
  2565. if (NO_ERROR == SHRegGetUSValueW(c_szDefaultURLPrefixKey, NULL, &dwType, (LPVOID)szDef, &cbSize, TRUE, (LPVOID)c_szDefaultScheme, SIZEOF(c_szDefaultScheme)))
  2566. {
  2567. pszUrl = SkipLeadingSlashes(pszUrl);
  2568. if(SUCCEEDED(hr = pstr->SetStr(szDef)))
  2569. hr = pstr->Append(pszUrl);
  2570. }
  2571. return hr;
  2572. }
  2573. /*----------------------------------------------------------
  2574. Purpose: Guesses a URL protocol based upon a list in the registry,
  2575. compared to the first few characters of the given
  2576. URL suffix.
  2577. Returns: S_OK if a URL protocol is determined
  2578. S_FALSE if there were no problems but no prefix was prepended
  2579. */
  2580. HRESULT
  2581. SHUrlApplyScheme(
  2582. LPCWSTR pszUrl,
  2583. PSHSTRW pstrOut,
  2584. DWORD dwFlags)
  2585. {
  2586. HRESULT hr = S_FALSE;
  2587. ASSERT(IS_VALID_STRING_PTRW(pszUrl, -1));
  2588. //
  2589. // if there is already scheme there, we do nothing
  2590. // unless the caller insists. this is to support
  2591. // a string that looks like www.foo.com:8001.
  2592. // this is a site that needs to be guessed at but
  2593. // it also could be a valid scheme since '.' and '-'
  2594. // are both valid scheme chars.
  2595. //
  2596. DWORD cch;
  2597. if((dwFlags & URL_APPLY_FORCEAPPLY) || !FindSchemeW(pszUrl, &cch))
  2598. {
  2599. if(dwFlags & URL_APPLY_GUESSSCHEME)
  2600. hr = UrlGuessScheme(pszUrl, pstrOut);
  2601. if (hr != S_OK && (dwFlags & URL_APPLY_GUESSFILE))
  2602. {
  2603. LPCWSTR psz = FindDosPath(pszUrl);
  2604. // only change hr if we actually converted.
  2605. if(psz && SUCCEEDED(SHUrlCreateFromPath(psz, pstrOut, 0)))
  2606. hr = S_OK;
  2607. }
  2608. if (hr != S_OK && (dwFlags & URL_APPLY_DEFAULT || !dwFlags))
  2609. hr = UrlApplyDefaultScheme(pszUrl, pstrOut);
  2610. }
  2611. return hr;
  2612. }
  2613. PRIVATE HRESULT
  2614. CopyOutA(PSHSTRA pstr, LPSTR psz, LPDWORD pcch)
  2615. {
  2616. HRESULT hr = S_OK;
  2617. DWORD cch;
  2618. ASSERT(pstr);
  2619. ASSERT(psz);
  2620. ASSERT(pcch);
  2621. cch = pstr->GetLen();
  2622. if((*pcch > cch) && psz)
  2623. lstrcpyA(psz, pstr->GetStr());
  2624. else
  2625. hr = E_POINTER;
  2626. *pcch = cch + (FAILED(hr) ? 1 : 0);
  2627. return hr;
  2628. }
  2629. //*** StrCopyOutW --
  2630. // NOTES
  2631. // WARNING: must match semantics of CopyOutW! (esp. the *pcchOut part)
  2632. PRIVATE HRESULT
  2633. StrCopyOutW(LPCWSTR pszIn, LPWSTR pszOut, LPDWORD pcchOut)
  2634. {
  2635. DWORD cch;
  2636. cch = lstrlenW(pszIn);
  2637. if (cch < *pcchOut && pszOut) {
  2638. *pcchOut = cch;
  2639. StrCpyW(pszOut, pszIn);
  2640. return S_OK;
  2641. }
  2642. else {
  2643. *pcchOut = cch + 1;
  2644. return E_POINTER;
  2645. }
  2646. }
  2647. //***
  2648. // NOTES
  2649. // WARNING: StrCopyOutW must match this func, so if you change this change
  2650. // it too
  2651. PRIVATE HRESULT
  2652. CopyOutW(PSHSTRW pstr, LPWSTR psz, LPDWORD pcch)
  2653. {
  2654. HRESULT hr = S_OK;
  2655. DWORD cch;
  2656. ASSERT(pstr);
  2657. ASSERT(psz);
  2658. ASSERT(pcch);
  2659. cch = pstr->GetLen();
  2660. if((*pcch > cch) && psz)
  2661. StrCpyW(psz, pstr->GetStr());
  2662. else
  2663. hr = E_POINTER;
  2664. *pcch = cch + (FAILED(hr) ? 1 : 0);
  2665. return hr;
  2666. }
  2667. LWSTDAPI
  2668. UrlCanonicalizeA(LPCSTR pszIn,
  2669. LPSTR pszOut,
  2670. LPDWORD pcchOut,
  2671. DWORD dwFlags)
  2672. {
  2673. HRESULT hr;
  2674. SHSTRA straOut;
  2675. RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlCanonicalizeA: Caller passed invalid pszIn");
  2676. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlCanonicalizeA: Caller passed invalid pcchOut");
  2677. RIPMSG(NULL==pcchOut || (pszOut && IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCanonicalizeA: Caller passed invalid pszOut");
  2678. #ifdef DEBUG
  2679. if (pcchOut)
  2680. {
  2681. if (pszOut == pszIn)
  2682. DEBUGWhackPathStringA(pszOut, *pcchOut);
  2683. else
  2684. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  2685. }
  2686. #endif
  2687. if (!pszIn
  2688. || !pszOut
  2689. || !pcchOut
  2690. || !*pcchOut)
  2691. {
  2692. hr = E_INVALIDARG;
  2693. }
  2694. else
  2695. {
  2696. hr = UrlCombineA("", pszIn, pszOut, pcchOut, dwFlags);
  2697. }
  2698. return hr;
  2699. }
  2700. LWSTDAPI
  2701. UrlEscapeA(LPCSTR pszIn,
  2702. LPSTR pszOut,
  2703. LPDWORD pcchOut,
  2704. DWORD dwFlags)
  2705. {
  2706. HRESULT hr;
  2707. SHSTRA straOut;
  2708. RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlEscapeA: Caller passed invalid pszin");
  2709. RIPMSG(NULL!=pcchOut, "UrlEscapeA: Caller passed invalid pcchOut");
  2710. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlEscapeA: Caller passed invalid pszOut");
  2711. #ifdef DEBUG
  2712. if (pcchOut)
  2713. {
  2714. if (pszOut==pszOut)
  2715. DEBUGWhackPathStringA(pszOut, *pcchOut);
  2716. else
  2717. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  2718. }
  2719. #endif
  2720. if (!pszIn || !pszOut ||
  2721. !pcchOut || !*pcchOut)
  2722. hr = E_INVALIDARG;
  2723. else
  2724. {
  2725. SHSTRW strwOut;
  2726. SHSTRW strUrl;
  2727. if(SUCCEEDED(strUrl.SetStr(pszIn)))
  2728. hr = SHUrlEscape(strUrl, &strwOut, dwFlags);
  2729. else
  2730. hr = E_OUTOFMEMORY;
  2731. if(SUCCEEDED(hr))
  2732. hr = ReconcileHresults(hr, straOut.SetStr(strwOut));
  2733. }
  2734. if(SUCCEEDED(hr))
  2735. hr = ReconcileHresults(hr, CopyOutA(&straOut, pszOut, pcchOut));
  2736. return hr;
  2737. }
  2738. LWSTDAPI
  2739. UrlGetPartA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwPart, DWORD dwFlags)
  2740. {
  2741. HRESULT hr;
  2742. SHSTRA straOut;
  2743. RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlGetPartA: Caller passed invalid pszIn");
  2744. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlGetPartA: Caller passed invalid pcchOut");
  2745. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlGetPartA: Caller passed invalid pszOut");
  2746. #ifdef DEBUG
  2747. if (pcchOut)
  2748. {
  2749. if (pszOut==pszIn)
  2750. DEBUGWhackPathStringA(pszOut, *pcchOut);
  2751. else
  2752. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  2753. }
  2754. #endif
  2755. if (!pszIn || !pszOut ||
  2756. !pcchOut || !*pcchOut || dwPart == URL_PART_NONE)
  2757. hr = E_INVALIDARG;
  2758. else
  2759. {
  2760. SHSTRW strwOut;
  2761. SHSTRW strwIn;
  2762. if(SUCCEEDED(strwIn.SetStr(pszIn)))
  2763. hr = SHUrlGetPart(&strwIn, &strwOut, dwPart, dwFlags);
  2764. else
  2765. hr = E_OUTOFMEMORY;
  2766. if(SUCCEEDED(hr))
  2767. hr = ReconcileHresults(hr, straOut.SetStr(strwOut));
  2768. }
  2769. if(SUCCEEDED(hr))
  2770. hr = ReconcileHresults(hr, CopyOutA(&straOut, pszOut, pcchOut));
  2771. return hr;
  2772. }
  2773. LWSTDAPI_(BOOL) UrlIsA(LPCSTR pszURL, URLIS UrlIs)
  2774. {
  2775. BOOL fRet = FALSE;
  2776. RIPMSG(pszURL && IS_VALID_STRING_PTRA(pszURL, -1), "UrlIsA: Caller passed invalid pszURL");
  2777. if(pszURL)
  2778. {
  2779. DWORD cchScheme, dwFlags;
  2780. LPCSTR pszScheme = FindSchemeA(pszURL, &cchScheme);
  2781. if(pszScheme)
  2782. {
  2783. URL_SCHEME eScheme = GetSchemeTypeAndFlagsA(pszScheme, cchScheme, &dwFlags);
  2784. switch (UrlIs)
  2785. {
  2786. case URLIS_URL:
  2787. fRet = TRUE;
  2788. break;
  2789. case URLIS_OPAQUE:
  2790. fRet = (dwFlags & UPF_SCHEME_OPAQUE);
  2791. break;
  2792. case URLIS_NOHISTORY:
  2793. fRet = (dwFlags & UPF_SCHEME_NOHISTORY);
  2794. break;
  2795. case URLIS_FILEURL:
  2796. fRet = (eScheme == URL_SCHEME_FILE);
  2797. break;
  2798. default:
  2799. // if it cant be done quck and dirty
  2800. // then we need to thunk to the wide version
  2801. SHSTRW strUrl;
  2802. if (SUCCEEDED(strUrl.SetStr(pszURL)))
  2803. {
  2804. fRet = UrlIsW(strUrl, UrlIs);
  2805. }
  2806. }
  2807. }
  2808. }
  2809. return fRet;
  2810. }
  2811. LWSTDAPI_(BOOL) UrlIsW(LPCWSTR pszURL, URLIS UrlIs)
  2812. {
  2813. BOOL fRet = FALSE;
  2814. RIPMSG(NULL!=pszURL && IS_VALID_STRING_PTRW(pszURL, -1), "UrlIsW: Caller passed invalid pszURL");
  2815. if(pszURL)
  2816. {
  2817. DWORD cchScheme, dwFlags;
  2818. LPCWSTR pszScheme = FindSchemeW(pszURL, &cchScheme);
  2819. if(pszScheme)
  2820. {
  2821. SHSTRW str;
  2822. URL_SCHEME eScheme = GetSchemeTypeAndFlagsW(pszScheme, cchScheme, &dwFlags);
  2823. switch (UrlIs)
  2824. {
  2825. case URLIS_URL:
  2826. fRet = TRUE;
  2827. break;
  2828. case URLIS_OPAQUE:
  2829. fRet = (dwFlags & UPF_SCHEME_OPAQUE);
  2830. break;
  2831. case URLIS_NOHISTORY:
  2832. fRet = (dwFlags & UPF_SCHEME_NOHISTORY);
  2833. break;
  2834. case URLIS_FILEURL:
  2835. fRet = (eScheme == URL_SCHEME_FILE);
  2836. break;
  2837. case URLIS_APPLIABLE:
  2838. if (eScheme == URL_SCHEME_UNKNOWN)
  2839. {
  2840. if (S_OK == UrlGuessScheme(pszURL, &str))
  2841. fRet = TRUE;
  2842. }
  2843. break;
  2844. // these cases need a broken URL
  2845. case URLIS_DIRECTORY:
  2846. case URLIS_HASQUERY:
  2847. {
  2848. URLPARTS parts;
  2849. if (SUCCEEDED(str.SetStr(pszURL))
  2850. && SUCCEEDED(BreakUrl(str.GetInplaceStr(), &parts)))
  2851. {
  2852. switch(UrlIs)
  2853. {
  2854. case URLIS_DIRECTORY:
  2855. // if the last seg has a trailing slash, or
  2856. // if there are no path segments at all...
  2857. fRet = (!parts.cSegments || (parts.dwFlags & UPF_EXSEG_DIRECTORY));
  2858. break;
  2859. case URLIS_HASQUERY:
  2860. fRet = (parts.pszQuery && *parts.pszQuery);
  2861. break;
  2862. default:
  2863. ASSERT(FALSE);
  2864. break;
  2865. }
  2866. }
  2867. }
  2868. break;
  2869. default:
  2870. AssertMsg(FALSE, "UrlIs() called with invalid flag");
  2871. }
  2872. }
  2873. }
  2874. return fRet;
  2875. }
  2876. LWSTDAPI_(BOOL) UrlIsOpaqueA(LPCSTR pszURL)
  2877. {
  2878. return UrlIsA(pszURL, URLIS_OPAQUE);
  2879. }
  2880. LWSTDAPI_(BOOL) UrlIsOpaqueW(LPCWSTR pszURL)
  2881. {
  2882. return UrlIsW(pszURL, URLIS_OPAQUE);
  2883. }
  2884. LWSTDAPI_(BOOL) UrlIsNoHistoryA(LPCSTR pszURL)
  2885. {
  2886. return UrlIsA(pszURL, URLIS_NOHISTORY);
  2887. }
  2888. LWSTDAPI_(BOOL) UrlIsNoHistoryW(LPCWSTR pszURL)
  2889. {
  2890. return UrlIsW(pszURL, URLIS_NOHISTORY);
  2891. }
  2892. LWSTDAPI_(LPCSTR) UrlGetLocationA(LPCSTR pszURL)
  2893. {
  2894. CPINFO cpinfo;
  2895. BOOL fMBCS = (GetCPInfo(CP_ACP, &cpinfo) && cpinfo.LeadByte[0]);
  2896. RIPMSG(pszURL && IS_VALID_STRING_PTRA(pszURL, -1), "UrlGetLocationA: Caller passed invalid pszURL");
  2897. if(pszURL)
  2898. {
  2899. DWORD cchScheme, dwFlags;
  2900. LPCSTR pszScheme = FindSchemeA(pszURL, &cchScheme);
  2901. if(pszScheme)
  2902. {
  2903. URL_SCHEME eScheme = GetSchemeTypeAndFlagsA(pszScheme, cchScheme, &dwFlags);
  2904. return (dwFlags & UPF_SCHEME_OPAQUE) ? NULL : FindFragmentA(pszURL, fMBCS, (eScheme == URL_SCHEME_FILE));
  2905. }
  2906. }
  2907. return NULL;
  2908. }
  2909. LWSTDAPI_(LPCWSTR) UrlGetLocationW(LPCWSTR wzURL)
  2910. {
  2911. RIPMSG(wzURL && IS_VALID_STRING_PTRW(wzURL, -1), "UrlGetLocationW: Caller passed invalid wzURL");
  2912. if(wzURL)
  2913. {
  2914. DWORD cchScheme, dwFlags;
  2915. LPCWSTR pszScheme = FindSchemeW(wzURL, &cchScheme);
  2916. if(pszScheme)
  2917. {
  2918. URL_SCHEME eScheme = GetSchemeTypeAndFlagsW(pszScheme, cchScheme, &dwFlags);
  2919. return (dwFlags & UPF_SCHEME_OPAQUE) ? NULL : FindFragmentW(wzURL, (eScheme == URL_SCHEME_FILE));
  2920. }
  2921. }
  2922. return NULL;
  2923. }
  2924. LWSTDAPI_(int) UrlCompareA(LPCSTR psz1, LPCSTR psz2, BOOL fIgnoreSlash)
  2925. {
  2926. RIPMSG(psz1 && IS_VALID_STRING_PTRA(psz1, -1), "UrlCompareA: Caller passed invalid psz1");
  2927. RIPMSG(psz2 && IS_VALID_STRING_PTRA(psz1, -1), "UrlCompareA: Caller passed invalid psz2");
  2928. if (psz1 && psz2)
  2929. {
  2930. SHSTRW str1, str2;
  2931. if(SUCCEEDED(str1.SetStr(psz1)) && SUCCEEDED(str2.SetStr(psz2)) &&
  2932. SUCCEEDED(SHUrlUnescapeW(str1.GetInplaceStr(), 0)) && SUCCEEDED(SHUrlUnescapeW(str2.GetInplaceStr(), 0)) )
  2933. {
  2934. if(fIgnoreSlash)
  2935. {
  2936. LPWSTR pch;
  2937. pch = str1.GetInplaceStr() + str1.GetLen() - 1;
  2938. if(*pch == SLASH)
  2939. TERMSTR(pch);
  2940. pch = str2.GetInplaceStr() + str2.GetLen() - 1;
  2941. if(*pch == SLASH)
  2942. TERMSTR(pch);
  2943. }
  2944. return StrCmpW(str1, str2);
  2945. }
  2946. }
  2947. return lstrcmpA(psz1, psz2);
  2948. }
  2949. LWSTDAPI
  2950. UrlUnescapeA(LPSTR pszUrl, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags)
  2951. {
  2952. RIPMSG(pszUrl && IS_VALID_STRING_PTRA(pszUrl, -1), "UrlUnescapeA: Caller passed invalid pszUrl");
  2953. if(dwFlags & URL_UNESCAPE_INPLACE)
  2954. {
  2955. return SHUrlUnescapeA(pszUrl, dwFlags);
  2956. }
  2957. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlUnescapeA: Caller passed invalid pcchOut");
  2958. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlUnescapeA: Caller passed invalid pszOut");
  2959. #ifdef DEBUG
  2960. if (pcchOut)
  2961. {
  2962. if (pszOut==pszUrl)
  2963. DEBUGWhackPathStringA(pszOut, *pcchOut);
  2964. else
  2965. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  2966. }
  2967. #endif
  2968. if (!pszUrl
  2969. || !pcchOut
  2970. || !*pcchOut
  2971. || !pszOut)
  2972. {
  2973. return E_INVALIDARG;
  2974. }
  2975. SHSTRA str;
  2976. HRESULT hr = str.SetStr(pszUrl);
  2977. if(SUCCEEDED(hr))
  2978. {
  2979. SHUrlUnescapeA(str.GetInplaceStr(), dwFlags);
  2980. hr = CopyOutA(&str, pszOut, pcchOut);
  2981. }
  2982. return hr;
  2983. }
  2984. LWSTDAPI
  2985. PathCreateFromUrlA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags)
  2986. {
  2987. HRESULT hr;
  2988. SHSTRA straOut;
  2989. RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "PathCreateFromUrlA: Caller passed invalid pszIn");
  2990. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "PathCreateFromUrlA: Caller passed invalid pcchOut");
  2991. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "PathCreateFromUrlA: Caller passed invalid pszOut");
  2992. #ifdef DEBUG
  2993. if (pcchOut)
  2994. {
  2995. if (pszOut==pszIn)
  2996. DEBUGWhackPathStringA(pszOut, *pcchOut);
  2997. else
  2998. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  2999. }
  3000. #endif
  3001. if (!pszIn || !pszOut ||
  3002. !pcchOut || !*pcchOut )
  3003. hr = E_INVALIDARG;
  3004. else
  3005. {
  3006. SHSTRW strwOut;
  3007. SHSTRW strwIn;
  3008. if(SUCCEEDED(strwIn.SetStr(pszIn)))
  3009. hr = SHPathCreateFromUrl(strwIn, &strwOut, dwFlags);
  3010. else
  3011. hr = E_OUTOFMEMORY;
  3012. if(SUCCEEDED(hr))
  3013. hr = straOut.SetStr(strwOut);
  3014. }
  3015. if(SUCCEEDED(hr) )
  3016. hr = CopyOutA(&straOut, pszOut, pcchOut);
  3017. return hr;
  3018. }
  3019. LWSTDAPI
  3020. UrlCreateFromPathA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags)
  3021. {
  3022. HRESULT hr;
  3023. SHSTRA straOut;
  3024. RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlCreateFromPathA: Caller passed invalid pszIn");
  3025. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlCreateFromPathA: Caller passed invalid pcchOut");
  3026. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCreateFromPathA: Caller passed invalid pszOut");
  3027. #ifdef DEBUG
  3028. if (pcchOut)
  3029. {
  3030. if (pszOut==pszIn)
  3031. DEBUGWhackPathStringA(pszOut, *pcchOut);
  3032. else
  3033. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  3034. }
  3035. #endif
  3036. if (!pszIn || !pszOut ||
  3037. !pcchOut || !*pcchOut )
  3038. hr = E_INVALIDARG;
  3039. else
  3040. {
  3041. SHSTRW strwOut;
  3042. SHSTRW strwIn;
  3043. if(SUCCEEDED(strwIn.SetStr(pszIn)))
  3044. hr = SHUrlCreateFromPath(strwIn, &strwOut, dwFlags);
  3045. else
  3046. hr = E_OUTOFMEMORY;
  3047. if(SUCCEEDED(hr))
  3048. {
  3049. hr = ReconcileHresults(hr, straOut.SetStr(strwOut));
  3050. }
  3051. }
  3052. if(SUCCEEDED(hr) )
  3053. hr = ReconcileHresults(hr, CopyOutA(&straOut, pszOut, pcchOut));
  3054. return hr;
  3055. }
  3056. LWSTDAPI
  3057. UrlApplySchemeA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags)
  3058. {
  3059. HRESULT hr;
  3060. SHSTRA straOut;
  3061. RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlApplySchemeA: Caller passed invalid pszIn");
  3062. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlApplySchemeA: Caller passed invalid pcchOut");
  3063. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlApplySchemeA: Caller passed invalid pszOut");
  3064. #ifdef DEBUG
  3065. if (pcchOut)
  3066. {
  3067. if (pszOut==pszIn)
  3068. DEBUGWhackPathStringA(pszOut, *pcchOut);
  3069. else
  3070. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  3071. }
  3072. #endif
  3073. if (!pszIn || !pszOut ||
  3074. !pcchOut || !*pcchOut )
  3075. hr = E_INVALIDARG;
  3076. else
  3077. {
  3078. SHSTRW strwOut;
  3079. SHSTRW strwIn;
  3080. if(SUCCEEDED(strwIn.SetStr(pszIn)))
  3081. hr = SHUrlApplyScheme(strwIn, &strwOut, dwFlags);
  3082. else
  3083. hr = E_OUTOFMEMORY;
  3084. if(S_OK == (hr))
  3085. hr = straOut.SetStr(strwOut);
  3086. }
  3087. if(S_OK == (hr))
  3088. hr = CopyOutA(&straOut, pszOut, pcchOut);
  3089. return hr;
  3090. }
  3091. // PERF_CACHE
  3092. //*** g_szUCCanon -- 1-element cache for UrlCanonicalizeW
  3093. // DESCRIPTION
  3094. // it turns out a large # of our calls a) are for the same thing,
  3095. // and b) have pszOut(canon)=pszIn(raw). so cache the most recent guy.
  3096. LONG g_lockUC;
  3097. WCHAR g_szUCCanon[64]; // post-canon guy (also used for pre-canon check)
  3098. DWORD g_dwUCFlags;
  3099. #ifdef DEBUG
  3100. int g_cUCTot, g_cUCHit;
  3101. #endif
  3102. LWSTDAPI
  3103. UrlCanonicalizeW(LPCWSTR pszUrl,
  3104. LPWSTR pszCanonicalized,
  3105. LPDWORD pcchCanonicalized,
  3106. DWORD dwFlags)
  3107. {
  3108. HRESULT hr;
  3109. SHSTRW strwOut;
  3110. RIPMSG(pszUrl && IS_VALID_STRING_PTRW(pszUrl, -1), "UrlCanonicalizeW: Caller passed invalid pszUrl");
  3111. RIPMSG(NULL!=pcchCanonicalized && IS_VALID_WRITE_PTR(pcchCanonicalized, DWORD), "UrlCanonicalizeW: Caller passed invalid pcchCanonicalized");
  3112. RIPMSG(NULL==pcchCanonicalized || (pszCanonicalized && IS_VALID_WRITE_BUFFER(pszCanonicalized, char, *pcchCanonicalized)), "UrlCanonicalizeW: Caller passed invalid pszCanonicalized");
  3113. #ifdef DEBUG
  3114. if (pcchCanonicalized)
  3115. {
  3116. if (pszCanonicalized == pszUrl)
  3117. DEBUGWhackPathStringW(pszCanonicalized, *pcchCanonicalized);
  3118. else
  3119. DEBUGWhackPathBufferW(pszCanonicalized, *pcchCanonicalized);
  3120. }
  3121. #endif
  3122. if (!pszUrl
  3123. || !pszCanonicalized
  3124. || !pcchCanonicalized
  3125. || !*pcchCanonicalized)
  3126. {
  3127. hr = E_INVALIDARG;
  3128. }
  3129. else
  3130. {
  3131. #ifdef DEBUG
  3132. if ((g_cUCTot % 10) == 0)
  3133. TraceMsg(DM_PERF, "uc: tot=%d hit=%d", g_cUCTot, g_cUCHit);
  3134. #endif
  3135. DBEXEC(TRUE, g_cUCTot++);
  3136. // try the cache 1st
  3137. if (InterlockedExchange(&g_lockUC, 1) == 0) {
  3138. hr = E_FAIL;
  3139. if ((g_dwUCFlags==dwFlags)
  3140. &&
  3141. (!(dwFlags & URL_ESCAPE_PERCENT))
  3142. &&
  3143. StrCmpCW(pszUrl, g_szUCCanon) == 0)
  3144. {
  3145. DBEXEC(TRUE, g_cUCHit++);
  3146. DWORD cchTmp = *pcchCanonicalized;
  3147. hr = StrCopyOutW(g_szUCCanon, pszCanonicalized, pcchCanonicalized);
  3148. if (FAILED(hr))
  3149. *pcchCanonicalized = cchTmp; // restore!
  3150. }
  3151. InterlockedExchange(&g_lockUC, 0);
  3152. if (SUCCEEDED(hr))
  3153. return hr;
  3154. }
  3155. hr = UrlCombineW(L"", pszUrl, pszCanonicalized, pcchCanonicalized, dwFlags);
  3156. if (SUCCEEDED(hr) && *pcchCanonicalized < ARRAYSIZE(g_szUCCanon)) {
  3157. if (InterlockedExchange(&g_lockUC, 1) == 0) {
  3158. StrCpyW(g_szUCCanon, pszCanonicalized);
  3159. g_dwUCFlags = dwFlags;
  3160. InterlockedExchange(&g_lockUC, 0);
  3161. }
  3162. }
  3163. }
  3164. return hr;
  3165. }
  3166. LWSTDAPI
  3167. UrlEscapeW(LPCWSTR pszUrl,
  3168. LPWSTR pszEscaped,
  3169. LPDWORD pcchEscaped,
  3170. DWORD dwFlags)
  3171. {
  3172. HRESULT hr;
  3173. SHSTRW strwOut;
  3174. RIPMSG(pszUrl && IS_VALID_STRING_PTRW(pszUrl, -1), "UrlEscapeW: Caller passed invalid pszUrl");
  3175. RIPMSG(NULL!=pcchEscaped && IS_VALID_WRITE_PTR(pcchEscaped, DWORD), "UrlEscapeW: Caller passed invalid pcchEscaped");
  3176. RIPMSG(pszEscaped && (NULL==pcchEscaped || IS_VALID_WRITE_BUFFER(pszEscaped, WCHAR, *pcchEscaped)), "UrlEscapeW: Caller passed invalid pszEscaped");
  3177. #ifdef DEBUG
  3178. if (pcchEscaped)
  3179. {
  3180. if (pszEscaped==pszUrl)
  3181. DEBUGWhackPathStringW(pszEscaped, *pcchEscaped);
  3182. else
  3183. DEBUGWhackPathBufferW(pszEscaped, *pcchEscaped);
  3184. }
  3185. #endif
  3186. if (!pszUrl || !pszEscaped ||
  3187. !pcchEscaped || !*pcchEscaped)
  3188. hr = E_INVALIDARG;
  3189. else
  3190. {
  3191. hr = SHUrlEscape(pszUrl, &strwOut, dwFlags);
  3192. }
  3193. if(SUCCEEDED(hr) )
  3194. hr = CopyOutW(&strwOut, pszEscaped, pcchEscaped);
  3195. return hr;
  3196. }
  3197. LWSTDAPI_(int) UrlCompareW(LPCWSTR psz1, LPCWSTR psz2, BOOL fIgnoreSlash)
  3198. {
  3199. RIPMSG(psz1 && IS_VALID_STRING_PTRW(psz1, -1), "UrlCompareW: Caller passed invalid psz1");
  3200. RIPMSG(psz2 && IS_VALID_STRING_PTRW(psz1, -1), "UrlCompareW: Caller passed invalid psz2");
  3201. if (psz1 && psz2)
  3202. {
  3203. SHSTRW str1, str2;
  3204. if( SUCCEEDED(str1.SetStr(psz1)) && SUCCEEDED(str2.SetStr(psz2)) &&
  3205. SUCCEEDED(SHUrlUnescapeW(str1.GetInplaceStr(), 0)) && SUCCEEDED(SHUrlUnescapeW(str2.GetInplaceStr(), 0)))
  3206. {
  3207. if(fIgnoreSlash)
  3208. {
  3209. LPWSTR pch;
  3210. pch = str1.GetInplaceStr() + str1.GetLen() - 1;
  3211. if(*pch == SLASH)
  3212. TERMSTR(pch);
  3213. pch = str2.GetInplaceStr() + str2.GetLen() - 1;
  3214. if(*pch == SLASH)
  3215. TERMSTR(pch);
  3216. }
  3217. return StrCmpW(str1, str2);
  3218. }
  3219. }
  3220. return StrCmpW(psz1, psz2);
  3221. }
  3222. LWSTDAPI
  3223. UrlUnescapeW(LPWSTR pszUrl, LPWSTR pszOut, LPDWORD pcchOut, DWORD dwFlags)
  3224. {
  3225. RIPMSG(pszUrl && IS_VALID_STRING_PTRW(pszUrl, -1), "UrlUnescapeW: Caller passed invalid pszUrl");
  3226. if(dwFlags & URL_UNESCAPE_INPLACE)
  3227. {
  3228. return SHUrlUnescapeW(pszUrl, dwFlags);
  3229. }
  3230. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlUnescapeW: Caller passed invalid pcchOut");
  3231. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlUnescapeW: Caller passed invalid pszOut");
  3232. #ifdef DEBUG
  3233. if (pcchOut)
  3234. {
  3235. if (pszOut==pszUrl)
  3236. DEBUGWhackPathStringW(pszOut, *pcchOut);
  3237. else
  3238. DEBUGWhackPathBufferW(pszOut, *pcchOut);
  3239. }
  3240. #endif
  3241. if (!pszUrl
  3242. || !pcchOut
  3243. || !*pcchOut
  3244. || !pszOut)
  3245. {
  3246. return E_INVALIDARG;
  3247. }
  3248. SHSTRW str;
  3249. HRESULT hr = str.SetStr(pszUrl);
  3250. if(SUCCEEDED(hr))
  3251. {
  3252. SHUrlUnescapeW(str.GetInplaceStr(), dwFlags);
  3253. hr = CopyOutW(&str, pszOut, pcchOut);
  3254. }
  3255. return hr;
  3256. }
  3257. LWSTDAPI
  3258. PathCreateFromUrlW
  3259. (LPCWSTR pszIn,
  3260. LPWSTR pszOut,
  3261. LPDWORD pcchOut,
  3262. DWORD dwFlags)
  3263. {
  3264. HRESULT hr;
  3265. SHSTRW strOut;
  3266. RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "PathCreateFromUrlW: Caller passed invalid pszIn");
  3267. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "PathCreateFromUrlW: Caller passed invalid pcchOut");
  3268. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "PathCreateFromUrlW: Caller passed invalid pszOut");
  3269. #ifdef DEBUG
  3270. if (pcchOut)
  3271. {
  3272. if (pszOut==pszIn)
  3273. DEBUGWhackPathStringW(pszOut, *pcchOut);
  3274. else
  3275. DEBUGWhackPathBufferW(pszOut, *pcchOut);
  3276. }
  3277. #endif
  3278. if (!pszIn || !pszOut ||
  3279. !pcchOut || !*pcchOut )
  3280. hr = E_INVALIDARG;
  3281. else
  3282. hr = SHPathCreateFromUrl(pszIn, &strOut, dwFlags);
  3283. if(SUCCEEDED(hr) )
  3284. hr = CopyOutW(&strOut, pszOut, pcchOut);
  3285. return hr;
  3286. }
  3287. LWSTDAPI
  3288. UrlCreateFromPathW
  3289. (LPCWSTR pszIn,
  3290. LPWSTR pszOut,
  3291. LPDWORD pcchOut,
  3292. DWORD dwFlags)
  3293. {
  3294. HRESULT hr;
  3295. SHSTRW strOut;
  3296. RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "UrlCreateFromPathW: Caller passed invalid pszIn");
  3297. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlCreateFromPathW: Caller passed invalid pcchOut");
  3298. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlCreateFromPathW: Caller passed invalid pszOut");
  3299. #ifdef DEBUG
  3300. if (pcchOut)
  3301. {
  3302. if (pszOut==pszIn)
  3303. DEBUGWhackPathStringW(pszOut, *pcchOut);
  3304. else
  3305. DEBUGWhackPathBufferW(pszOut, *pcchOut);
  3306. }
  3307. #endif
  3308. if (!pszIn || !pszOut ||
  3309. !pcchOut || !*pcchOut )
  3310. hr = E_INVALIDARG;
  3311. else
  3312. hr = SHUrlCreateFromPath(pszIn, &strOut, dwFlags);
  3313. if(SUCCEEDED(hr) )
  3314. hr = ReconcileHresults(hr, CopyOutW(&strOut, pszOut, pcchOut));
  3315. return hr;
  3316. }
  3317. LWSTDAPI
  3318. UrlGetPartW(LPCWSTR pszIn, LPWSTR pszOut, LPDWORD pcchOut, DWORD dwPart, DWORD dwFlags)
  3319. {
  3320. SHSTRW strIn, strOut;
  3321. HRESULT hr;
  3322. RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "UrlGetPartW: Caller passed invalid pszIn");
  3323. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlGetPartW: Caller passed invalid pcchOut");
  3324. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlGetPartW: Caller passed invalid pszOut");
  3325. #ifdef DEBUG
  3326. if (pcchOut)
  3327. {
  3328. if (pszOut==pszIn)
  3329. DEBUGWhackPathStringW(pszOut, *pcchOut);
  3330. else
  3331. DEBUGWhackPathBufferW(pszOut, *pcchOut);
  3332. }
  3333. #endif
  3334. if (!pszIn || !pszOut ||
  3335. !pcchOut || !*pcchOut || !dwPart)
  3336. hr = E_INVALIDARG;
  3337. else if (SUCCEEDED(hr = strIn.SetStr(pszIn)))
  3338. hr = SHUrlGetPart(&strIn, &strOut, dwPart, dwFlags);
  3339. if(SUCCEEDED(hr) )
  3340. hr = CopyOutW(&strOut, pszOut, pcchOut);
  3341. return hr;
  3342. }
  3343. LWSTDAPI
  3344. UrlApplySchemeW
  3345. (LPCWSTR pszIn,
  3346. LPWSTR pszOut,
  3347. LPDWORD pcchOut,
  3348. DWORD dwFlags)
  3349. {
  3350. HRESULT hr;
  3351. SHSTRW strOut;
  3352. RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "UrlApplySchemeW: Caller passed invalid pszIn");
  3353. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlApplySchemeW: Caller passed invalid pcchOut");
  3354. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlApplySchemeW: Caller passed invalid pszOut");
  3355. #ifdef DEBUG
  3356. if (pcchOut)
  3357. {
  3358. if (pszOut==pszIn)
  3359. DEBUGWhackPathStringW(pszOut, *pcchOut);
  3360. else
  3361. DEBUGWhackPathBufferW(pszOut, *pcchOut);
  3362. }
  3363. #endif
  3364. if (!pszIn || !pszOut ||
  3365. !pcchOut || !*pcchOut )
  3366. hr = E_INVALIDARG;
  3367. else
  3368. hr = SHUrlApplyScheme(pszIn, &strOut, dwFlags);
  3369. if(S_OK == (hr))
  3370. hr = CopyOutW(&strOut, pszOut, pcchOut);
  3371. return hr;
  3372. }
  3373. //
  3374. // this is the same table used by both URLMON and WININET's cache
  3375. //
  3376. const static BYTE Translate[256] =
  3377. {
  3378. 1, 14,110, 25, 97,174,132,119,138,170,125,118, 27,233,140, 51,
  3379. 87,197,177,107,234,169, 56, 68, 30, 7,173, 73,188, 40, 36, 65,
  3380. 49,213,104,190, 57,211,148,223, 48,115, 15, 2, 67,186,210, 28,
  3381. 12,181,103, 70, 22, 58, 75, 78,183,167,238,157,124,147,172,144,
  3382. 176,161,141, 86, 60, 66,128, 83,156,241, 79, 46,168,198, 41,254,
  3383. 178, 85,253,237,250,154,133, 88, 35,206, 95,116,252,192, 54,221,
  3384. 102,218,255,240, 82,106,158,201, 61, 3, 89, 9, 42,155,159, 93,
  3385. 166, 80, 50, 34,175,195,100, 99, 26,150, 16,145, 4, 33, 8,189,
  3386. 121, 64, 77, 72,208,245,130,122,143, 55,105,134, 29,164,185,194,
  3387. 193,239,101,242, 5,171,126, 11, 74, 59,137,228,108,191,232,139,
  3388. 6, 24, 81, 20,127, 17, 91, 92,251,151,225,207, 21, 98,113,112,
  3389. 84,226, 18,214,199,187, 13, 32, 94,220,224,212,247,204,196, 43,
  3390. 249,236, 45,244,111,182,153,136,129, 90,217,202, 19,165,231, 71,
  3391. 230,142, 96,227, 62,179,246,114,162, 53,160,215,205,180, 47,109,
  3392. 44, 38, 31,149,135, 0,216, 52, 63, 23, 37, 69, 39,117,146,184,
  3393. 163,200,222,235,248,243,219, 10,152,131,123,229,203, 76,120,209
  3394. };
  3395. PRIVATE void _HashData(LPBYTE pbData, DWORD cbData, LPBYTE pbHash, DWORD cbHash)
  3396. {
  3397. DWORD i, j;
  3398. // seed the hash
  3399. for (i = cbHash; i-- > 0;)
  3400. pbHash[i] = (BYTE) i;
  3401. // do the hash
  3402. for (j = cbData; j-- > 0;)
  3403. {
  3404. for (i = cbHash; i-- > 0;)
  3405. pbHash[i] = Translate[pbHash[i] ^ pbData[j]];
  3406. }
  3407. }
  3408. LWSTDAPI
  3409. HashData(LPBYTE pbData, DWORD cbData, LPBYTE pbHash, DWORD cbHash)
  3410. {
  3411. RIPMSG(pbData && IS_VALID_READ_BUFFER(pbData, BYTE, cbData), "HashData: Caller passed invalid pbData");
  3412. RIPMSG(pbHash && IS_VALID_WRITE_BUFFER(pbHash, BYTE, cbHash), "HashData: Caller passed invalid pbHash");
  3413. if (pbData && pbHash)
  3414. {
  3415. _HashData(pbData, cbData, pbHash, cbHash);
  3416. return S_OK;
  3417. }
  3418. return E_INVALIDARG;
  3419. }
  3420. LWSTDAPI
  3421. UrlHashA(LPCSTR psz, LPBYTE pb, DWORD cb)
  3422. {
  3423. HRESULT hr = E_INVALIDARG;
  3424. RIPMSG(psz && IS_VALID_STRING_PTRA(psz, -1), "UrlHashA: Caller passed invalid psz");
  3425. RIPMSG(pb && IS_VALID_WRITE_BUFFER(pb, BYTE, cb), "UrlHashA: Caller passed invalid pb");
  3426. if (psz && pb)
  3427. {
  3428. _HashData((LPBYTE) psz, lstrlenA(psz), pb, cb);
  3429. return S_OK;
  3430. }
  3431. return hr;
  3432. }
  3433. LWSTDAPI
  3434. UrlHashW(LPCWSTR psz, LPBYTE pb, DWORD cb)
  3435. {
  3436. HRESULT hr;
  3437. RIPMSG(psz && IS_VALID_STRING_PTRW(psz, -1), "UrlHashW: Caller passed invalid psz");
  3438. RIPMSG(pb && IS_VALID_WRITE_BUFFER(pb, BYTE, cb), "UrlHashW: Caller passed invalid pb");
  3439. if (psz && pb)
  3440. {
  3441. SHSTRA str;
  3442. if (SUCCEEDED( hr = str.SetStr(psz)))
  3443. hr = UrlHashA(str, pb, cb);
  3444. }
  3445. else
  3446. {
  3447. hr = E_INVALIDARG;
  3448. }
  3449. return hr;
  3450. }
  3451. /***************************** ParseURL Functions *****************************/
  3452. // these were originally in URL.DLL and then moved to shlwapi.
  3453. // i just added them from url.c for reuse of code.
  3454. // ParseURL now does no MBCS thunks, to keep it fast.
  3455. //
  3456. // declarations for ParseURL() APIs
  3457. //
  3458. typedef const PARSEDURLA CPARSEDURLA;
  3459. typedef const PARSEDURLA * PCPARSEDURLA;
  3460. typedef const PARSEDURLW CPARSEDURLW;
  3461. typedef const PARSEDURLW * PCPARSEDURLW;
  3462. #ifdef DEBUG
  3463. BOOL
  3464. IsValidPCPARSEDURLA(
  3465. LPCSTR pcszURL,
  3466. PCPARSEDURLA pcpu)
  3467. {
  3468. return(IS_VALID_READ_PTR(pcpu, CPARSEDURLA) &&
  3469. (IS_VALID_STRING_PTRA(pcpu->pszProtocol, -1) &&
  3470. EVAL(IsStringContainedA(pcszURL, pcpu->pszProtocol)) &&
  3471. EVAL(pcpu->cchProtocol < (UINT)lstrlenA(pcpu->pszProtocol))) &&
  3472. (IS_VALID_STRING_PTRA(pcpu->pszSuffix, -1) &&
  3473. EVAL(IsStringContainedA(pcszURL, pcpu->pszSuffix)) &&
  3474. EVAL(pcpu->cchSuffix <= (UINT)lstrlenA(pcpu->pszSuffix))) &&
  3475. EVAL(pcpu->cchProtocol + pcpu->cchSuffix < (UINT)lstrlenA(pcszURL)));
  3476. }
  3477. BOOL
  3478. IsValidPCPARSEDURLW(
  3479. LPCWSTR pcszURL,
  3480. PCPARSEDURLW pcpu)
  3481. {
  3482. return(IS_VALID_READ_PTR(pcpu, CPARSEDURLW) &&
  3483. (IS_VALID_STRING_PTRW(pcpu->pszProtocol, -1) &&
  3484. EVAL(IsStringContainedW(pcszURL, pcpu->pszProtocol)) &&
  3485. EVAL(pcpu->cchProtocol < (UINT)lstrlenW(pcpu->pszProtocol))) &&
  3486. (IS_VALID_STRING_PTRW(pcpu->pszSuffix, -1) &&
  3487. EVAL(IsStringContainedW(pcszURL, pcpu->pszSuffix)) &&
  3488. EVAL(pcpu->cchSuffix <= (UINT)lstrlenW(pcpu->pszSuffix))) &&
  3489. EVAL(pcpu->cchProtocol + pcpu->cchSuffix < (UINT)lstrlenW(pcszURL)));
  3490. }
  3491. #endif
  3492. /*----------------------------------------------------------
  3493. Purpose: Parse the given path into the PARSEDURL structure.
  3494. ******
  3495. ****** This function must not do any extraneous
  3496. ****** things. It must be small and fast.
  3497. ******
  3498. Returns: NOERROR if a valid URL format
  3499. URL_E_INVALID_SYNTAX if not
  3500. Cond: --
  3501. */
  3502. STDMETHODIMP
  3503. ParseURLA(
  3504. LPCSTR pcszURL,
  3505. PPARSEDURLA ppu)
  3506. {
  3507. HRESULT hr = E_INVALIDARG;
  3508. RIP(IS_VALID_STRING_PTRA(pcszURL, -1));
  3509. RIP(IS_VALID_WRITE_PTR(ppu, PARSEDURLA));
  3510. if (pcszURL && ppu && SIZEOF(*ppu) == ppu->cbSize)
  3511. {
  3512. DWORD cch;
  3513. hr = URL_E_INVALID_SYNTAX; // assume error
  3514. ppu->pszProtocol = FindSchemeA(pcszURL, &cch);
  3515. if(ppu->pszProtocol)
  3516. {
  3517. ppu->cchProtocol = cch;
  3518. // Determine protocol scheme number
  3519. ppu->nScheme = SchemeTypeFromStringA(ppu->pszProtocol, cch);
  3520. ppu->pszSuffix = ppu->pszProtocol + cch + 1;
  3521. //
  3522. // APPCOMPAT - Backwards compatibility - zekel 28-feb-97
  3523. // ParseURL() believes in file: urls like "file://C:\foo\bar"
  3524. // and some pieces of code will use it to get the Dos Path.
  3525. // new code should always call PathCreateFromUrl() to
  3526. // get the dos path of a file: URL.
  3527. //
  3528. // i am leaving this behavior in case some compat stuff is out there.
  3529. //
  3530. if (URL_SCHEME_FILE == ppu->nScheme &&
  3531. '/' == ppu->pszSuffix[0] && '/' == ppu->pszSuffix[1])
  3532. {
  3533. // Yes; skip the "//"
  3534. ppu->pszSuffix += 2;
  3535. #ifndef UNIX
  3536. // FOR UNIX: If we have /vobs/build, we don't want to make
  3537. // There might be a third slash. Skip it.
  3538. if ('/' == *ppu->pszSuffix)
  3539. ppu->pszSuffix++;
  3540. #endif
  3541. }
  3542. ppu->cchSuffix = lstrlenA(ppu->pszSuffix);
  3543. hr = S_OK;
  3544. }
  3545. }
  3546. #ifdef DEBUG
  3547. if (hr == S_OK)
  3548. {
  3549. CHAR rgchDebugProtocol[MAX_PATH];
  3550. CHAR rgchDebugSuffix[MAX_PATH];
  3551. // (+ 1) for null terminator.
  3552. lstrcpynA(rgchDebugProtocol, ppu->pszProtocol,
  3553. min(ppu->cchProtocol + 1, SIZECHARS(rgchDebugProtocol)));
  3554. // (+ 1) for null terminator.
  3555. lstrcpynA(rgchDebugSuffix, ppu->pszSuffix,
  3556. min(ppu->cchSuffix + 1, SIZECHARS(rgchDebugSuffix)));
  3557. TraceMsgA(TF_URL, "ParseURL(): Parsed protocol \"%s\" and suffix \"%s\" from URL \"%s\".",
  3558. rgchDebugProtocol,
  3559. rgchDebugSuffix,
  3560. pcszURL);
  3561. }
  3562. else
  3563. {
  3564. TraceMsgA(TF_URL, "ParseURL(): Failed to parse \"%s\"", pcszURL);
  3565. }
  3566. #endif
  3567. ASSERT(FAILED(hr) ||
  3568. EVAL(IsValidPCPARSEDURLA(pcszURL, ppu)));
  3569. return(hr);
  3570. }
  3571. /*----------------------------------------------------------
  3572. Purpose: Parse the given path into the PARSEDURL structure.
  3573. ******
  3574. ****** This function must not do any extraneous
  3575. ****** things. It must be small and fast.
  3576. ******
  3577. Returns: NOERROR if a valid URL format
  3578. URL_E_INVALID_SYNTAX if not
  3579. Cond: --
  3580. */
  3581. STDMETHODIMP
  3582. ParseURLW(
  3583. LPCWSTR pcszURL,
  3584. PPARSEDURLW ppu)
  3585. {
  3586. HRESULT hr = E_INVALIDARG;
  3587. RIP(IS_VALID_STRING_PTRW(pcszURL, -1));
  3588. RIP(IS_VALID_WRITE_PTR(ppu, PARSEDURLW));
  3589. if (pcszURL && ppu && SIZEOF(*ppu) == ppu->cbSize)
  3590. {
  3591. DWORD cch;
  3592. hr = URL_E_INVALID_SYNTAX; // assume error
  3593. ppu->pszProtocol = FindSchemeW(pcszURL, &cch);
  3594. if(ppu->pszProtocol)
  3595. {
  3596. ppu->cchProtocol = cch;
  3597. // Determine protocol scheme number
  3598. ppu->nScheme = SchemeTypeFromStringW(ppu->pszProtocol, cch);
  3599. ppu->pszSuffix = ppu->pszProtocol + cch + 1;
  3600. //
  3601. // APPCOMPAT - Backwards compatibility - zekel 28-feb-97
  3602. // ParseURL() believes in file: urls like "file://C:\foo\bar"
  3603. // and some pieces of code will use it to get the Dos Path.
  3604. // new code should always call PathCreateFromUrl() to
  3605. // get the dos path of a file: URL.
  3606. //
  3607. // i am leaving this behavior in case some compat stuff is out there.
  3608. //
  3609. if (URL_SCHEME_FILE == ppu->nScheme &&
  3610. '/' == ppu->pszSuffix[0] && '/' == ppu->pszSuffix[1])
  3611. {
  3612. // Yes; skip the "//"
  3613. ppu->pszSuffix += 2;
  3614. #ifndef UNIX
  3615. // There might be a third slash. Skip it.
  3616. // IEUNIX - On UNIX, it's a root directory, so don't skip it!
  3617. if ('/' == *ppu->pszSuffix)
  3618. ppu->pszSuffix++;
  3619. #endif
  3620. }
  3621. ppu->cchSuffix = lstrlenW(ppu->pszSuffix);
  3622. hr = S_OK;
  3623. }
  3624. }
  3625. #ifdef DEBUG
  3626. if (hr==S_OK)
  3627. {
  3628. WCHAR rgchDebugProtocol[MAX_PATH];
  3629. WCHAR rgchDebugSuffix[MAX_PATH];
  3630. // (+ 1) for null terminator.
  3631. StrCpyNW(rgchDebugProtocol, ppu->pszProtocol,
  3632. min(ppu->cchProtocol + 1, SIZECHARS(rgchDebugProtocol)));
  3633. // (+ 1) for null terminator.
  3634. StrCpyNW(rgchDebugSuffix, ppu->pszSuffix,
  3635. min(ppu->cchSuffix + 1, SIZECHARS(rgchDebugSuffix)));
  3636. TraceMsg(TF_URL, "ParseURL(): Parsed protocol \"%s\" and suffix \"%s\" from URL \"%s\".",
  3637. rgchDebugProtocol,
  3638. rgchDebugSuffix,
  3639. pcszURL);
  3640. }
  3641. else
  3642. {
  3643. TraceMsg(TF_URL, "ParseURL(): Failed to parse \"%s\"", pcszURL);
  3644. }
  3645. #endif
  3646. ASSERT(FAILED(hr) ||
  3647. EVAL(IsValidPCPARSEDURLW(pcszURL, ppu)));
  3648. return(hr);
  3649. }
  3650. #ifdef USE_FAST_PARSER
  3651. // GetSchemeTypeAndFlagsSpecialW
  3652. // performs the same behavior as GetSchemeTypeAndFlagsW plus, when successful
  3653. // copies the canonicalised form of the scheme back.
  3654. PRIVATE URL_SCHEME
  3655. GetSchemeTypeAndFlagsSpecialW(LPWSTR pszScheme, DWORD cchScheme, LPDWORD pdwFlags)
  3656. {
  3657. DWORD i;
  3658. ASSERT(pszScheme);
  3659. #ifdef DEBUG
  3660. if ((g_cSTTot % 10) == 0)
  3661. TraceMsg(DM_PERF, "gstaf: tot=%d hit=%d hit0=%d", g_cSTTot, g_cSTHit, g_cSTHit0);
  3662. #endif
  3663. DBEXEC(TRUE, g_cSTTot++);
  3664. // check cache 1st
  3665. i = g_iScheme;
  3666. if (cchScheme == g_mpUrlSchemeTypes[i].cchScheme
  3667. && StrCmpNCW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme) == 0)
  3668. {
  3669. DBEXEC(TRUE, i == 0 ? g_cSTHit0++ : g_cSTHit++);
  3670. Lhit:
  3671. if (pdwFlags)
  3672. *pdwFlags = g_mpUrlSchemeTypes[i].dwFlags;
  3673. // update cache (unconditionally)
  3674. g_iScheme = i;
  3675. // We need to do this because the scheme might not be canonicalised
  3676. memcpy(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme*sizeof(WCHAR));
  3677. return g_mpUrlSchemeTypes[i].eScheme;
  3678. }
  3679. for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
  3680. {
  3681. if(cchScheme == g_mpUrlSchemeTypes[i].cchScheme
  3682. && 0 == StrCmpNIW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme))
  3683. goto Lhit;
  3684. }
  3685. if (pdwFlags)
  3686. {
  3687. *pdwFlags = 0;
  3688. }
  3689. return URL_SCHEME_UNKNOWN;
  3690. }
  3691. // URL_STRING --------------------------------------------------------------------------------------
  3692. // is a container for the combined URL. It attempts to construct a string from the information
  3693. // fed into it. If there is not enough buffer space available, it will measure how much additional
  3694. // space will be required to hold the string.
  3695. WCHAR wszBogus[] = L"";
  3696. // US_* are the various modes of transforming characters fed into the container.
  3697. // US_NOTHING do nothing to the character.
  3698. // US_UNESCAPE turn entries of the form %xx into the unescaped form
  3699. // US_ESCAPE_UNSAFE transform invalid path characters into %xx sequences
  3700. // US_ESCAPE_SPACES transform only spaces in to %20 sequences
  3701. enum
  3702. {
  3703. US_NOTHING,
  3704. US_UNESCAPE,
  3705. US_ESCAPE_UNSAFE,
  3706. US_ESCAPE_SPACES
  3707. };
  3708. class URL_STRING
  3709. {
  3710. protected:
  3711. URL_SCHEME _eScheme;
  3712. DWORD _ccWork, _ccMark, _ccLastWhite, _ccQuery, _ccFragment, _ccBuffer, _dwSchemeInfo;
  3713. DWORD _dwOldFlags, _dwFlags, _dwMode;
  3714. BOOL _fFixSlashes, _fExpecting, _fError;
  3715. WCHAR _wchLast, _wszInternalString[256];
  3716. PWSTR _pszWork;
  3717. VOID baseAccept(WCHAR wch);
  3718. VOID TrackWhiteSpace(WCHAR wch);
  3719. public:
  3720. URL_STRING(DWORD dwFlags);
  3721. ~URL_STRING();
  3722. VOID CleanAccept(WCHAR wch);
  3723. VOID Accept(WCHAR wch);
  3724. VOID Accept(PWSTR a_psz);
  3725. VOID Contract(BOOL fContractLevel = TRUE);
  3726. VOID TrimEndWhiteSpace();
  3727. PWSTR GetStart();
  3728. LONG GetTotalLength();
  3729. BOOL AnyProblems();
  3730. VOID NoteScheme(URL_SCHEME a_eScheme, DWORD a_dwSchemeInfo);
  3731. VOID AddSchemeNote(DWORD a_dwSchemeInfo);
  3732. DWORD GetSchemeNotes();
  3733. URL_SCHEME QueryScheme();
  3734. VOID Mark();
  3735. VOID ClearMark();
  3736. VOID EraseMarkedText();
  3737. DWORD CompareMarkWith(PWSTR psz);
  3738. DWORD CompareLast(PCWSTR psz, DWORD cc);
  3739. VOID EnableMunging();
  3740. VOID DisableMunging();
  3741. VOID DisableSlashFixing();
  3742. VOID BackupFlags();
  3743. VOID RestoreFlags();
  3744. VOID AddFlagNote(DWORD dwFlag);
  3745. VOID NotifyQuery();
  3746. VOID NotifyFragment();
  3747. VOID DropQuery();
  3748. VOID DropFragment();
  3749. };
  3750. // -------------------------------------------------------------------------------
  3751. URL_STRING::URL_STRING(DWORD dwFlags)
  3752. {
  3753. _ccBuffer = ARRAYSIZE(_wszInternalString);
  3754. _ccWork = 1;
  3755. _pszWork = _wszInternalString;
  3756. _ccQuery = _ccFragment = _ccMark = 0;
  3757. _eScheme = URL_SCHEME_UNKNOWN;
  3758. _dwOldFlags = _dwFlags = dwFlags;
  3759. _dwMode = US_NOTHING;
  3760. _fFixSlashes = TRUE;
  3761. _fError = _fExpecting = FALSE;
  3762. }
  3763. URL_STRING::~URL_STRING()
  3764. {
  3765. if (_ccBuffer > ARRAYSIZE(_wszInternalString))
  3766. {
  3767. LocalFree(_pszWork);
  3768. }
  3769. }
  3770. // -------------------------------------------------------------------------------
  3771. // These are the standard functions used for adding characters to an url.
  3772. VOID URL_STRING::baseAccept(WCHAR wch)
  3773. {
  3774. _pszWork[_ccWork-1] = (_fFixSlashes
  3775. ? ((wch!=WHACK) ? wch : SLASH)
  3776. : wch);
  3777. _ccWork++;
  3778. if (_ccWork>_ccBuffer)
  3779. {
  3780. if (!_fError)
  3781. {
  3782. PWSTR psz = (PWSTR)LocalAlloc(LPTR, 2*_ccBuffer*sizeof(WCHAR));
  3783. if (!psz)
  3784. {
  3785. _ccWork--;
  3786. _fError = TRUE;
  3787. return;
  3788. }
  3789. memcpy(psz, _pszWork, (_ccWork-1)*sizeof(WCHAR));
  3790. if (_ccBuffer>ARRAYSIZE(_wszInternalString))
  3791. {
  3792. LocalFree(_pszWork);
  3793. }
  3794. _ccBuffer *= 2;
  3795. _pszWork = psz;
  3796. }
  3797. else
  3798. {
  3799. _ccWork--;
  3800. }
  3801. }
  3802. }
  3803. VOID URL_STRING::TrackWhiteSpace(WCHAR wch)
  3804. {
  3805. if (IsWhite(wch))
  3806. {
  3807. if (!_ccLastWhite)
  3808. {
  3809. _ccLastWhite = _ccWork;
  3810. }
  3811. }
  3812. else
  3813. {
  3814. _ccLastWhite = 0;
  3815. }
  3816. }
  3817. // -- URL_STRING::Accept ----------------------------
  3818. // Based on the current munging mode, transform the character into the
  3819. // desired form and add it to the string.
  3820. VOID URL_STRING::Accept(WCHAR wch)
  3821. {
  3822. TrackWhiteSpace(wch);
  3823. switch (_dwMode)
  3824. {
  3825. case US_NOTHING:
  3826. break;
  3827. case US_UNESCAPE:
  3828. if (_fExpecting)
  3829. {
  3830. if (!IsHex(wch))
  3831. {
  3832. baseAccept(HEX_ESCAPE);
  3833. if (_wchLast!=L'\0')
  3834. {
  3835. baseAccept(_wchLast);
  3836. }
  3837. _fExpecting = FALSE;
  3838. break;
  3839. }
  3840. else if (_wchLast!=L'\0')
  3841. {
  3842. wch = (HexToWord(_wchLast)*16) + HexToWord(wch);
  3843. TrackWhiteSpace(wch);
  3844. _fExpecting = FALSE;
  3845. if ((wch==WHACK) && _fFixSlashes)
  3846. {
  3847. _fFixSlashes = FALSE;
  3848. baseAccept(wch);
  3849. _fFixSlashes = TRUE;
  3850. return;
  3851. }
  3852. break;
  3853. }
  3854. else
  3855. {
  3856. _wchLast = wch;
  3857. }
  3858. return;
  3859. }
  3860. if (wch==HEX_ESCAPE)
  3861. {
  3862. _fExpecting = TRUE;
  3863. _wchLast = L'\0';
  3864. return;
  3865. }
  3866. break;
  3867. case US_ESCAPE_UNSAFE:
  3868. if ((wch==SLASH)
  3869. ||
  3870. (wch==WHACK && _fFixSlashes)
  3871. ||
  3872. (IsSafePathChar(wch) && (wch!=HEX_ESCAPE || !(_dwFlags & URL_ESCAPE_PERCENT))))
  3873. {
  3874. break;
  3875. }
  3876. baseAccept(L'%');
  3877. baseAccept(hex[(wch >> 4) & 15]);
  3878. baseAccept(hex[wch & 15]);
  3879. return;
  3880. case US_ESCAPE_SPACES:
  3881. if (wch==SPC)
  3882. {
  3883. baseAccept(L'%');
  3884. baseAccept(L'2');
  3885. baseAccept(L'0');
  3886. return;
  3887. }
  3888. break;
  3889. default:
  3890. ASSERT(FALSE);
  3891. }
  3892. baseAccept(wch);
  3893. }
  3894. // -- Accept --------------------------------
  3895. // Accept only a string
  3896. VOID URL_STRING::Accept(PWSTR psz)
  3897. {
  3898. while (*psz)
  3899. {
  3900. Accept(*psz);
  3901. psz++;
  3902. }
  3903. }
  3904. // -- Contract
  3905. // Whenever we call Contract, we're pointing past the last separator. We want to
  3906. // omit the segment between this separator and the one before it.
  3907. // This should be used ONLY when we're examining the path segment of the urls.
  3908. VOID URL_STRING::Contract(BOOL fContractLevel)
  3909. {
  3910. ASSERT(_ccWork && _ccMark);
  3911. // _ccWork is 1 after wherever the next character will be placed
  3912. // subtract +1 to derive what the last character in the url is
  3913. DWORD _ccEnd = _ccWork-1 - 1;
  3914. if (_eScheme!=URL_SCHEME_MK)
  3915. {
  3916. if (!fContractLevel && (_pszWork[_ccEnd]==SLASH || _pszWork[_ccEnd]==WHACK))
  3917. {
  3918. return;
  3919. }
  3920. do
  3921. {
  3922. _ccEnd--;
  3923. }
  3924. while ((_ccEnd>=_ccMark-1) && _pszWork[_ccEnd]!=SLASH && _pszWork[_ccEnd]!=WHACK);
  3925. }
  3926. else
  3927. {
  3928. if (!fContractLevel && (_pszWork[_ccEnd]==SLASH))
  3929. {
  3930. return;
  3931. }
  3932. do
  3933. {
  3934. _ccEnd--;
  3935. }
  3936. while ((_ccEnd>=_ccMark-1) && _pszWork[_ccEnd]!=SLASH);
  3937. }
  3938. if (_ccEnd<_ccMark-1)
  3939. {
  3940. _ccEnd = _ccMark-1;
  3941. }
  3942. else
  3943. {
  3944. _ccEnd++;
  3945. }
  3946. _ccWork = _ccEnd + 1;
  3947. }
  3948. VOID URL_STRING::TrimEndWhiteSpace()
  3949. {
  3950. if (_ccLastWhite)
  3951. {
  3952. _ccWork = _ccLastWhite;
  3953. _ccLastWhite = 0;
  3954. }
  3955. }
  3956. VOID URL_STRING::CleanAccept(WCHAR wch)
  3957. {
  3958. baseAccept(wch);
  3959. }
  3960. // -------------------------------------------------------------------------------
  3961. // These member functions return information about the url that is being formed
  3962. PWSTR URL_STRING::GetStart()
  3963. {
  3964. return _pszWork;
  3965. }
  3966. LONG URL_STRING::GetTotalLength()
  3967. {
  3968. return _ccWork - 1;
  3969. }
  3970. BOOL URL_STRING::AnyProblems()
  3971. {
  3972. return _fError;
  3973. }
  3974. // -------------------------------------------------------------------------------
  3975. VOID URL_STRING::NoteScheme(URL_SCHEME a_eScheme, DWORD a_dwSchemeInfo)
  3976. {
  3977. _eScheme = a_eScheme;
  3978. _dwSchemeInfo = a_dwSchemeInfo;
  3979. _fFixSlashes = a_dwSchemeInfo & UPF_SCHEME_CONVERT;
  3980. }
  3981. VOID URL_STRING::AddSchemeNote(DWORD a_dwSchemeInfo)
  3982. {
  3983. _dwSchemeInfo |= a_dwSchemeInfo;
  3984. _fFixSlashes = _dwSchemeInfo & UPF_SCHEME_CONVERT;
  3985. }
  3986. DWORD URL_STRING::GetSchemeNotes()
  3987. {
  3988. return _dwSchemeInfo;
  3989. }
  3990. URL_SCHEME URL_STRING::QueryScheme()
  3991. {
  3992. return _eScheme;
  3993. }
  3994. // -------------------------------------------------------------------------------
  3995. VOID URL_STRING::Mark()
  3996. {
  3997. _ccMark = _ccWork;
  3998. }
  3999. VOID URL_STRING::ClearMark()
  4000. {
  4001. _ccMark = 0;
  4002. }
  4003. VOID URL_STRING::EraseMarkedText()
  4004. {
  4005. if (_ccMark)
  4006. {
  4007. _ccWork = _ccMark;
  4008. _ccMark = 0;
  4009. }
  4010. }
  4011. DWORD URL_STRING::CompareMarkWith(PWSTR psz)
  4012. {
  4013. if (_ccMark)
  4014. {
  4015. *(_pszWork + _ccWork - 1) = L'\0';
  4016. return (StrCmpW(_pszWork + _ccMark - 1, psz));
  4017. }
  4018. // In other words, return that the string isn't present.
  4019. return 1;
  4020. }
  4021. DWORD URL_STRING::CompareLast(PCWSTR psz, DWORD cc)
  4022. {
  4023. if (_ccWork > cc)
  4024. {
  4025. return StrCmpNIW(_pszWork + _ccWork - 1 - cc, psz, cc);
  4026. }
  4027. return 1;
  4028. }
  4029. // -------------------------------------------------------------------------------
  4030. VOID URL_STRING::NotifyQuery()
  4031. {
  4032. if (!_ccQuery)
  4033. {
  4034. _ccQuery = _ccWork;
  4035. }
  4036. }
  4037. VOID URL_STRING::NotifyFragment()
  4038. {
  4039. if (!_ccFragment)
  4040. {
  4041. _ccFragment = _ccWork;
  4042. CleanAccept(POUND);
  4043. }
  4044. }
  4045. VOID URL_STRING::DropQuery()
  4046. {
  4047. if (_ccQuery)
  4048. {
  4049. _ccWork = _ccQuery;
  4050. _ccQuery = _ccFragment = 0;
  4051. }
  4052. }
  4053. VOID URL_STRING::DropFragment()
  4054. {
  4055. if (_ccFragment)
  4056. {
  4057. _ccWork = _ccFragment;
  4058. _ccFragment = 0;
  4059. }
  4060. }
  4061. // -------------------------------------------------------------------------------
  4062. // These member functions are for determining how the url's characters are going
  4063. // to be represented
  4064. VOID URL_STRING::EnableMunging()
  4065. {
  4066. _dwMode = US_NOTHING;
  4067. // For opaque urls, munge ONLY if we're explicitly asked to URL_ESCAPE or URL_UNESCAPE,
  4068. // but NOT URL_ESCAPE_SPACES_ONLY
  4069. // For query and fragment, never allow for URL_ESCAPE_UNSAFE and for
  4070. // others ONLY when URL_DONT_ESCAPE_EXTRA_INFO is specified
  4071. if ((_dwSchemeInfo & UPF_SCHEME_OPAQUE)
  4072. && (_dwFlags & URL_ESCAPE_SPACES_ONLY))
  4073. return;
  4074. if ((_ccQuery || _ccFragment)
  4075. && ((_dwFlags & (URL_DONT_ESCAPE_EXTRA_INFO | URL_ESCAPE_UNSAFE))))
  4076. return;
  4077. if (_dwFlags & URL_UNESCAPE)
  4078. {
  4079. _dwMode = US_UNESCAPE;
  4080. }
  4081. else if (_dwFlags & URL_ESCAPE_UNSAFE)
  4082. {
  4083. _dwMode = US_ESCAPE_UNSAFE;
  4084. }
  4085. else if (_dwFlags & URL_ESCAPE_SPACES_ONLY)
  4086. {
  4087. _dwMode = US_ESCAPE_SPACES;
  4088. }
  4089. }
  4090. VOID URL_STRING::DisableMunging()
  4091. {
  4092. _dwMode = US_NOTHING;
  4093. }
  4094. VOID URL_STRING::DisableSlashFixing()
  4095. {
  4096. _fFixSlashes = FALSE;
  4097. }
  4098. VOID URL_STRING::AddFlagNote(DWORD dwFlag)
  4099. {
  4100. _dwFlags |= dwFlag;
  4101. }
  4102. VOID URL_STRING::BackupFlags()
  4103. {
  4104. _dwOldFlags = _dwFlags;
  4105. }
  4106. VOID URL_STRING::RestoreFlags()
  4107. {
  4108. ASSERT((_eScheme==URL_SCHEME_FILE) || (_dwFlags==_dwOldFlags));
  4109. _dwFlags = _dwOldFlags;
  4110. EnableMunging();
  4111. }
  4112. // -------------------------------------------------------------------------------
  4113. // URL ------------------------------------------------------------------------------------
  4114. // The URL class is used to examine the base and relative URLs to determine what
  4115. // will go into the URL_STRING container. The difference should be clear:
  4116. // URL instances look, but don't touch. URL_STRINGs are used solely to build urls.
  4117. class URL
  4118. {
  4119. private:
  4120. PCWSTR _pszUrl, _pszWork;
  4121. URL_SCHEME _eScheme;
  4122. DWORD _dwSchemeNotes, _dwFlags;
  4123. BOOL _fPathCompressionOn;
  4124. BOOL _fIgnoreQuery;
  4125. WCHAR SmallForm(WCHAR wch);
  4126. BOOL IsAlpha(WCHAR ch);
  4127. PCWSTR IsUrlPrefix(PCWSTR psz);
  4128. BOOL IsLocalDrive(PCWSTR psz);
  4129. BOOL IsQualifiedDrive(PCWSTR psz);
  4130. BOOL DetectSymbols(WCHAR wch1, WCHAR wch2 = '\0', WCHAR wch3 = '\0');
  4131. PCWSTR NextChar(PCWSTR psz);
  4132. PCWSTR FeedUntil(PCWSTR psz, URL_STRING* pus, WCHAR wchDelim1 = '\0', WCHAR wchDelim2 = '\0', WCHAR wchDelim3 = '\0', WCHAR wchDelim4 = '\0');
  4133. BOOL DetectFileServer();
  4134. BOOL DetectMkServer();
  4135. BOOL DefaultDetectServer();
  4136. VOID FeedDefaultServer(URL_STRING* pus);
  4137. VOID FeedFileServer(URL_STRING* pus);
  4138. VOID FeedFtpServer(URL_STRING* pus);
  4139. VOID FeedHttpServer(URL_STRING* pus);
  4140. VOID FeedMkServer(URL_STRING* pus);
  4141. PCWSTR FeedPort(PCWSTR psz, URL_STRING* pus);
  4142. public:
  4143. VOID Setup(PCWSTR pszInUrl, DWORD a_dwFlags = 0);
  4144. VOID Reset();
  4145. BOOL IsReset();
  4146. BOOL DetectAndFeedScheme(URL_STRING* pus, BOOL fReconcileSchemes = FALSE);
  4147. VOID SetScheme(URL_SCHEME eScheme, DWORD dwFlag);
  4148. URL_SCHEME GetScheme();
  4149. VOID AddSchemeNote(DWORD dwFlag);
  4150. DWORD GetSchemeNotes();
  4151. BOOL DetectServer();
  4152. BOOL DetectAbsolutePath();
  4153. BOOL DetectPath();
  4154. BOOL DetectQueryOrFragment();
  4155. BOOL DetectQuery();
  4156. BOOL DetectLocalDrive();
  4157. BOOL DetectSlash();
  4158. BOOL DetectAnything();
  4159. WCHAR PeekNext();
  4160. VOID FeedPath(URL_STRING* pus, BOOL fMarkServer = TRUE);
  4161. PCWSTR CopySegment(PCWSTR psz, URL_STRING* pus, BOOL* pfContinue);
  4162. DWORD DetectDots(PCWSTR* ppsz);
  4163. VOID StopPathCompression();
  4164. VOID FeedServer(URL_STRING* pus);
  4165. VOID FeedLocalDrive(URL_STRING* pus);
  4166. VOID FeedQueryAndFragment(URL_STRING* pus);
  4167. VOID IgnoreQuery();
  4168. };
  4169. // -------------------------------------------------------------------------------
  4170. VOID URL::Setup(PCWSTR pszInUrl, DWORD a_dwFlags)
  4171. {
  4172. while (*pszInUrl && IsWhite(*pszInUrl))
  4173. {
  4174. pszInUrl++;
  4175. }
  4176. _pszWork = _pszUrl = pszInUrl;
  4177. _eScheme = URL_SCHEME_UNKNOWN;
  4178. _dwSchemeNotes = 0;
  4179. _dwFlags = a_dwFlags;
  4180. _fPathCompressionOn = TRUE;
  4181. _fIgnoreQuery = FALSE;
  4182. }
  4183. VOID URL::Reset()
  4184. {
  4185. _pszWork = wszBogus;
  4186. }
  4187. BOOL URL::IsReset()
  4188. {
  4189. return (_pszWork==wszBogus);
  4190. }
  4191. // -------------------------------------------------------------------------------
  4192. inline WCHAR URL::SmallForm(WCHAR wch)
  4193. {
  4194. return (wch < L'A' || wch > L'Z') ? wch : (wch - L'A' + L'a');
  4195. }
  4196. inline BOOL URL::IsAlpha(WCHAR ch)
  4197. {
  4198. return ((ch >= 'a') && (ch <= 'z'))
  4199. ||
  4200. ((ch >= 'A') && (ch <= 'Z'));
  4201. }
  4202. inline PCWSTR URL::IsUrlPrefix(PCWSTR psz)
  4203. {
  4204. // We want to skip instances of "URL:"
  4205. psz = NextChar(psz);
  4206. if (*psz==L'u' || *psz==L'U')
  4207. {
  4208. psz = NextChar(psz+1);
  4209. if (*psz==L'r' || *psz==L'R')
  4210. {
  4211. psz = NextChar(psz+1);
  4212. if (*psz==L'l' || *psz==L'L')
  4213. {
  4214. psz = NextChar(psz+1);
  4215. if (*psz==COLON)
  4216. {
  4217. return NextChar(psz+1);
  4218. }
  4219. }
  4220. }
  4221. }
  4222. return NULL;
  4223. }
  4224. inline BOOL URL::IsLocalDrive(PCWSTR psz)
  4225. {
  4226. psz = NextChar(psz);
  4227. return (IsAlpha(*psz)
  4228. &&
  4229. ((*NextChar(psz+1)==COLON) || (*NextChar(psz+1)==BAR)));
  4230. }
  4231. // -- IsQualifiedDrive --------
  4232. // On Win32 systems, a qualified drive is either
  4233. // i. <letter>: or ii. \\UNC\
  4234. // Under unix, it's only /.
  4235. inline BOOL URL::IsQualifiedDrive(PCWSTR psz)
  4236. {
  4237. psz = NextChar(psz);
  4238. BOOL fResult = IsLocalDrive(psz);
  4239. if (!fResult && *psz==WHACK)
  4240. {
  4241. psz = NextChar(psz+1);
  4242. fResult = *psz==WHACK;
  4243. }
  4244. return fResult;
  4245. }
  4246. // -- DetectSymbols -------------
  4247. // This is used to help determine what part of the URL we have reached.
  4248. inline BOOL URL::DetectSymbols(WCHAR wch1, WCHAR wch2, WCHAR wch3)
  4249. {
  4250. ASSERT(_pszWork);
  4251. PCWSTR psz = NextChar(_pszWork);
  4252. return (*psz && (*psz==wch1 || *psz==wch2 || *psz==wch3));
  4253. }
  4254. BOOL URL::DetectSlash()
  4255. {
  4256. return DetectSymbols(SLASH, WHACK);
  4257. }
  4258. BOOL URL::DetectAnything()
  4259. {
  4260. return (*NextChar(_pszWork)!=L'\0');
  4261. }
  4262. // -- NextChar -------------------------------------
  4263. // We use NextChar instead of *psz because we want to
  4264. // ignore characters such as TAB, CR, etc.
  4265. inline PCWSTR URL::NextChar(PCWSTR psz)
  4266. {
  4267. while (IsInsignificantWhite(*psz))
  4268. {
  4269. psz++;
  4270. }
  4271. return psz;
  4272. }
  4273. WCHAR URL::PeekNext()
  4274. {
  4275. return (*NextChar(NextChar(_pszWork)+1));
  4276. }
  4277. // -------------------------------------------------------------------------------
  4278. inline PCWSTR URL::FeedUntil(PCWSTR psz, URL_STRING* pus, WCHAR wchDelim1, WCHAR wchDelim2, WCHAR wchDelim3, WCHAR wchDelim4)
  4279. {
  4280. psz = NextChar(psz);
  4281. while (*psz && *psz!=wchDelim1 && *psz!=wchDelim2 && *psz!=wchDelim3 && *psz!=wchDelim4)
  4282. {
  4283. pus->Accept(*psz);
  4284. psz = NextChar(psz+1);
  4285. }
  4286. return psz;
  4287. }
  4288. // -------------------------------------------------------------------------------
  4289. VOID URL::SetScheme(URL_SCHEME eScheme, DWORD dwFlag)
  4290. {
  4291. _eScheme = eScheme;
  4292. _dwSchemeNotes = dwFlag;
  4293. }
  4294. URL_SCHEME URL::GetScheme()
  4295. {
  4296. return _eScheme;
  4297. }
  4298. VOID URL::AddSchemeNote(DWORD dwFlag)
  4299. {
  4300. _dwSchemeNotes |= dwFlag;
  4301. }
  4302. DWORD URL::GetSchemeNotes()
  4303. {
  4304. return _dwSchemeNotes;
  4305. }
  4306. BOOL URL::DetectAndFeedScheme(URL_STRING* pus, BOOL fReconcileSchemes)
  4307. {
  4308. ASSERT(_pszWork);
  4309. ASSERT(!fReconcileSchemes || (fReconcileSchemes && pus->QueryScheme()!=URL_SCHEME_FILE));
  4310. PCWSTR psz = NextChar(_pszWork);
  4311. BOOL fResult = (IsQualifiedDrive(_pszWork));
  4312. if (fResult)
  4313. {
  4314. //
  4315. // Detected a File URL that isn't explicitly marked as such, ie C:\foo,
  4316. // in this case, we need to confirm that we're not overwriting
  4317. // a fully qualified relative URL with an Accept("file:"), although
  4318. // if the relative URL is the same scheme as the base, we now
  4319. // need to make the BASE-file URL take precedence.
  4320. //
  4321. _eScheme = URL_SCHEME_FILE;
  4322. if (!fReconcileSchemes)
  4323. {
  4324. pus->Accept((PWSTR)c_szFileScheme);
  4325. pus->Accept(COLON);
  4326. _dwSchemeNotes = g_mpUrlSchemeTypes[1].dwFlags;
  4327. pus->NoteScheme(_eScheme, _dwSchemeNotes);
  4328. pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE);
  4329. }
  4330. else if (pus->QueryScheme() != URL_SCHEME_FILE)
  4331. {
  4332. Reset();
  4333. }
  4334. goto exit;
  4335. }
  4336. for (;;)
  4337. {
  4338. while (IsValidSchemeCharW(*psz))
  4339. {
  4340. psz = NextChar(psz + 1);
  4341. }
  4342. if (*psz!=COLON)
  4343. {
  4344. break;
  4345. }
  4346. if (IsUrlPrefix(_pszWork))
  4347. {
  4348. // However, we want to skip instances of URL:
  4349. _pszWork = psz = NextChar(psz+1);
  4350. continue;
  4351. }
  4352. DWORD ccScheme = 0;
  4353. PCWSTR pszClone = NextChar(_pszWork);
  4354. if (!fReconcileSchemes)
  4355. {
  4356. while (pszClone<=psz)
  4357. {
  4358. pus->Accept(SmallForm(*pszClone));
  4359. ccScheme++;
  4360. pszClone = NextChar(pszClone+1);
  4361. }
  4362. _pszWork = pszClone;
  4363. // Subtract one for the colon
  4364. ccScheme--;
  4365. // BUG BUG Since we're smallifying the scheme above, we might be able to
  4366. // avoid calling this func, call GetSchemeTypeAndFlags instead.
  4367. _eScheme = GetSchemeTypeAndFlagsSpecialW(pus->GetStart(), ccScheme, &_dwSchemeNotes);
  4368. pus->NoteScheme(_eScheme, _dwSchemeNotes);
  4369. }
  4370. else
  4371. {
  4372. PWSTR pszKnownScheme = pus->GetStart();
  4373. while (pszClone<=psz && SmallForm(*pszClone)==*pszKnownScheme)
  4374. {
  4375. pszClone = NextChar(pszClone+1);
  4376. pszKnownScheme++;
  4377. }
  4378. if (pszClone<=psz)
  4379. {
  4380. Reset();
  4381. }
  4382. else
  4383. {
  4384. _pszWork = pszClone;
  4385. }
  4386. }
  4387. fResult = TRUE;
  4388. break;
  4389. }
  4390. exit:
  4391. return fResult;
  4392. }
  4393. // -------------------------------------------------------------------------------
  4394. BOOL URL::DetectServer()
  4395. {
  4396. ASSERT(_pszWork);
  4397. BOOL fRet;
  4398. switch (_eScheme)
  4399. {
  4400. case URL_SCHEME_FILE:
  4401. fRet = DetectFileServer();
  4402. break;
  4403. case URL_SCHEME_MK:
  4404. fRet = DetectMkServer();
  4405. break;
  4406. default:
  4407. fRet = DefaultDetectServer();
  4408. break;
  4409. }
  4410. return fRet;
  4411. }
  4412. BOOL URL::DetectLocalDrive()
  4413. {
  4414. return IsLocalDrive(_pszWork);
  4415. }
  4416. BOOL URL::DetectFileServer()
  4417. {
  4418. ASSERT(_pszWork);
  4419. PCWSTR psz = _pszWork;
  4420. BOOL fResult = IsLocalDrive(_pszWork);
  4421. if (fResult)
  4422. {
  4423. _dwSchemeNotes |= UPF_FILEISPATHURL;
  4424. }
  4425. else
  4426. {
  4427. fResult = DetectSymbols(SLASH, WHACK);
  4428. }
  4429. return fResult;
  4430. }
  4431. BOOL URL::DetectMkServer()
  4432. {
  4433. ASSERT(_pszWork);
  4434. PCWSTR psz = NextChar(_pszWork);
  4435. BOOL fResult = (*psz==L'@');
  4436. if (fResult)
  4437. {
  4438. _pszWork = NextChar(psz + 1);
  4439. }
  4440. return fResult;
  4441. }
  4442. BOOL URL::DefaultDetectServer()
  4443. {
  4444. BOOL fResult = FALSE;
  4445. if (DetectSymbols(SLASH, WHACK))
  4446. {
  4447. PCWSTR psz = NextChar(_pszWork + 1);
  4448. fResult = ((*psz==SLASH) || (*psz==WHACK));
  4449. }
  4450. return fResult;
  4451. }
  4452. VOID URL::FeedServer(URL_STRING* pus)
  4453. {
  4454. ASSERT(_pszWork);
  4455. switch (_eScheme)
  4456. {
  4457. case URL_SCHEME_FILE:
  4458. FeedFileServer(pus);
  4459. break;
  4460. case URL_SCHEME_MK:
  4461. FeedMkServer(pus);
  4462. break;
  4463. case URL_SCHEME_FTP:
  4464. FeedFtpServer(pus);
  4465. break;
  4466. case URL_SCHEME_HTTP:
  4467. case URL_SCHEME_HTTPS:
  4468. FeedHttpServer(pus);
  4469. break;
  4470. default:
  4471. FeedDefaultServer(pus);
  4472. break;
  4473. }
  4474. }
  4475. VOID URL::FeedMkServer(URL_STRING* pus)
  4476. {
  4477. ASSERT(_pszWork);
  4478. pus->EnableMunging();
  4479. pus->Accept(L'@');
  4480. _pszWork = FeedUntil(_pszWork, pus, SLASH);
  4481. if (!*_pszWork)
  4482. {
  4483. pus->TrimEndWhiteSpace();
  4484. }
  4485. else
  4486. {
  4487. _pszWork = NextChar(_pszWork+1);
  4488. }
  4489. pus->Accept(SLASH);
  4490. }
  4491. VOID URL::FeedLocalDrive(URL_STRING* pus)
  4492. {
  4493. pus->Accept(*NextChar(_pszWork));
  4494. _pszWork = NextChar(_pszWork+1);
  4495. pus->Accept(*_pszWork);
  4496. _pszWork = NextChar(_pszWork+1);
  4497. pus->DisableMunging();
  4498. }
  4499. VOID URL::FeedFileServer(URL_STRING* pus)
  4500. {
  4501. PCWSTR psz = NextChar(_pszWork);
  4502. // pus->BackupFlags();
  4503. while (*psz==SLASH || *psz==WHACK)
  4504. {
  4505. psz = NextChar(psz+1);
  4506. }
  4507. DWORD dwSlashes = (DWORD)(psz - _pszWork);
  4508. switch (dwSlashes)
  4509. {
  4510. case 4:
  4511. pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE);
  4512. _dwSchemeNotes |= UPF_FILEISPATHURL;
  4513. // 4 to 6 slashes == 1 UNC
  4514. case 2:
  4515. if (IsLocalDrive(psz))
  4516. {
  4517. pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE);
  4518. }
  4519. case 5:
  4520. case 6:
  4521. pus->Accept(SLASH);
  4522. pus->Accept(SLASH);
  4523. if (!IsLocalDrive(psz))
  4524. {
  4525. pus->EnableMunging();
  4526. psz = FeedUntil(psz, pus, SLASH, WHACK);
  4527. if (!*psz)
  4528. {
  4529. pus->TrimEndWhiteSpace();
  4530. Reset();
  4531. }
  4532. else
  4533. {
  4534. _pszWork = NextChar(psz+1);
  4535. }
  4536. }
  4537. else
  4538. {
  4539. _pszWork = psz;
  4540. }
  4541. pus->Accept(SLASH);
  4542. break;
  4543. // If there are no slashes, then it can't be a UNC.
  4544. case 0:
  4545. if (IsLocalDrive(psz))
  4546. {
  4547. pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE);
  4548. }
  4549. // We think of "file:/" and "file:///" to be on the local machine
  4550. // And if there are more slashes than we typically handle, we'll treat them as 1.
  4551. case 1:
  4552. case 3:
  4553. // This is a not-good-case
  4554. default:
  4555. pus->Accept(SLASH);
  4556. pus->Accept(SLASH);
  4557. pus->Accept(SLASH);
  4558. _pszWork = NextChar(psz);
  4559. break;
  4560. }
  4561. }
  4562. VOID URL::FeedFtpServer(URL_STRING* pus)
  4563. {
  4564. ASSERT(_pszWork);
  4565. PCWSTR psz = NextChar(_pszWork);
  4566. if (*psz==WHACK || *psz==SLASH)
  4567. {
  4568. pus->Accept(*psz);
  4569. psz = NextChar(psz+1);
  4570. }
  4571. if (*psz==WHACK || *psz==SLASH)
  4572. {
  4573. pus->Accept(*psz);
  4574. psz = NextChar(psz+1);
  4575. }
  4576. pus->EnableMunging();
  4577. // The following is a grotesque and gruesome hack. We need to preserve case for
  4578. // embedded username/password
  4579. _pszWork = psz;
  4580. BOOL fPossibleUserPasswordCombo = FALSE;
  4581. while (*psz && *psz!=SLASH && *psz!=POUND && *psz!=QUERY)
  4582. {
  4583. if (*psz==L'@')
  4584. {
  4585. fPossibleUserPasswordCombo = TRUE;
  4586. break;
  4587. }
  4588. psz = NextChar(psz+1);
  4589. }
  4590. psz = _pszWork;
  4591. if (fPossibleUserPasswordCombo)
  4592. {
  4593. while (*psz!=L'@')
  4594. {
  4595. pus->Accept(*psz);
  4596. psz = NextChar(psz+1);
  4597. }
  4598. }
  4599. // This still leaves the issue of slashes, colons, ?s, @s, and #s in passwords; I guess they
  4600. // ought to be escaped. (You just can't win, sometimes.)
  4601. while (*psz && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND)
  4602. {
  4603. pus->Accept(SmallForm(*psz));
  4604. psz = NextChar(psz+1);
  4605. }
  4606. if (*psz==COLON)
  4607. {
  4608. psz = FeedPort(psz, pus);
  4609. }
  4610. pus->DisableMunging();
  4611. _pszWork = psz;
  4612. if (!*psz)
  4613. {
  4614. pus->TrimEndWhiteSpace();
  4615. pus->Accept(SLASH);
  4616. }
  4617. else
  4618. {
  4619. if (*psz==QUERY || *psz==POUND)
  4620. {
  4621. pus->Accept(SLASH);
  4622. }
  4623. else
  4624. {
  4625. pus->Accept(*psz);
  4626. _pszWork = NextChar(psz+1);
  4627. }
  4628. }
  4629. }
  4630. VOID URL::FeedHttpServer(URL_STRING* pus)
  4631. {
  4632. // This is a version of FeedDefaultServer, stripped of non-essentials.
  4633. // This includes a hack to enable username/password combos in http urls.
  4634. ASSERT(_pszWork);
  4635. PCWSTR psz = NextChar(_pszWork);
  4636. if (*psz==WHACK || *psz==SLASH)
  4637. {
  4638. pus->Accept(*psz);
  4639. psz = NextChar(psz+1);
  4640. }
  4641. if (*psz==WHACK || *psz==SLASH)
  4642. {
  4643. pus->Accept(*psz);
  4644. psz = NextChar(psz+1);
  4645. }
  4646. pus->EnableMunging();
  4647. // WARNING! FeedPort also calls Mark(). Must be careful that they don't overlap.
  4648. pus->Mark();
  4649. PCWSTR pszRestart = psz;
  4650. while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND && *psz!=AT)
  4651. {
  4652. pus->Accept(SmallForm(*psz));
  4653. psz = NextChar(psz+1);
  4654. }
  4655. if (*psz==COLON)
  4656. {
  4657. // We either have a port or a password.
  4658. PCWSTR pszPort = psz;
  4659. do
  4660. {
  4661. psz = NextChar(psz+1);
  4662. }
  4663. while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND && *psz!=AT);
  4664. if (*psz!=AT)
  4665. {
  4666. psz = FeedPort(pszPort, pus);
  4667. }
  4668. }
  4669. if (*psz==AT)
  4670. {
  4671. // We've hit a username/password combo. So we have to undo our case-changing
  4672. psz = pszRestart;
  4673. pus->EraseMarkedText();
  4674. while (*psz!=AT)
  4675. {
  4676. pus->Accept(*psz);
  4677. psz = NextChar(psz+1);
  4678. }
  4679. // Now we carry on as before
  4680. while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND)
  4681. {
  4682. pus->Accept(SmallForm(*psz));
  4683. psz = NextChar(psz+1);
  4684. }
  4685. if (*psz==COLON)
  4686. {
  4687. psz = FeedPort(psz, pus);
  4688. }
  4689. }
  4690. pus->ClearMark();
  4691. pus->DisableMunging();
  4692. _pszWork = psz;
  4693. if (!*psz)
  4694. {
  4695. pus->TrimEndWhiteSpace();
  4696. if ((_eScheme!=URL_SCHEME_UNKNOWN) && !(_dwSchemeNotes & UPF_SCHEME_OPAQUE))
  4697. {
  4698. pus->Accept(SLASH);
  4699. }
  4700. }
  4701. else
  4702. {
  4703. if (*psz==QUERY || *psz==POUND)
  4704. {
  4705. pus->Accept(SLASH);
  4706. }
  4707. else
  4708. {
  4709. pus->Accept(*psz);
  4710. _pszWork = NextChar(psz+1);
  4711. }
  4712. }
  4713. }
  4714. VOID URL::FeedDefaultServer(URL_STRING* pus)
  4715. {
  4716. ASSERT(_pszWork);
  4717. PCWSTR psz = NextChar(_pszWork);
  4718. if (!(_dwSchemeNotes & UPF_SCHEME_INTERNET))
  4719. {
  4720. pus->DisableSlashFixing();
  4721. }
  4722. if (*psz==WHACK || *psz==SLASH)
  4723. {
  4724. pus->Accept(*psz);
  4725. psz = NextChar(psz+1);
  4726. }
  4727. if (*psz==WHACK || *psz==SLASH)
  4728. {
  4729. pus->Accept(*psz);
  4730. psz = NextChar(psz+1);
  4731. }
  4732. if (_dwSchemeNotes & UPF_SCHEME_INTERNET)
  4733. {
  4734. pus->EnableMunging();
  4735. while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND)
  4736. {
  4737. pus->Accept(SmallForm(*psz));
  4738. psz = NextChar(psz+1);
  4739. }
  4740. if (*psz==COLON)
  4741. {
  4742. psz = FeedPort(psz, pus);
  4743. }
  4744. pus->DisableMunging();
  4745. }
  4746. else
  4747. {
  4748. while (*psz && *psz!=SLASH)
  4749. {
  4750. pus->Accept(*psz);
  4751. psz = NextChar(psz+1);
  4752. }
  4753. }
  4754. _pszWork = psz;
  4755. if (!*psz)
  4756. {
  4757. pus->TrimEndWhiteSpace();
  4758. if ((_eScheme!=URL_SCHEME_UNKNOWN) && !(_dwSchemeNotes & UPF_SCHEME_OPAQUE))
  4759. {
  4760. pus->Accept(SLASH);
  4761. }
  4762. }
  4763. else
  4764. {
  4765. if (*psz==QUERY || *psz==POUND)
  4766. {
  4767. pus->Accept(SLASH);
  4768. }
  4769. else
  4770. {
  4771. pus->Accept(*psz);
  4772. _pszWork = NextChar(psz+1);
  4773. }
  4774. }
  4775. }
  4776. PCWSTR URL::FeedPort(PCWSTR psz, URL_STRING* pus)
  4777. {
  4778. BOOL fIgnorePort = FALSE;
  4779. pus->Mark();
  4780. psz = FeedUntil(psz, pus, SLASH, WHACK, POUND, QUERY);
  4781. if (!(_dwFlags & URL_DONT_SIMPLIFY))
  4782. {
  4783. // Here, decide whether or not to ignore the port
  4784. // FEATURE we should actually be getting this from
  4785. // the services file to find out the default protocol port
  4786. // but we dont think that most people will change them - zekel 17-Dec-96
  4787. switch(_eScheme)
  4788. {
  4789. case URL_SCHEME_HTTP:
  4790. if (pus->CompareMarkWith(L":80")==0)
  4791. fIgnorePort = TRUE;
  4792. break;
  4793. case URL_SCHEME_HTTPS:
  4794. if (pus->CompareMarkWith(L":443")==0)
  4795. fIgnorePort = TRUE;
  4796. break;
  4797. case URL_SCHEME_FTP:
  4798. if (pus->CompareMarkWith(L":21")==0)
  4799. fIgnorePort = TRUE;
  4800. break;
  4801. case URL_SCHEME_GOPHER:
  4802. if (pus->CompareMarkWith(L":70")==0)
  4803. fIgnorePort = TRUE;
  4804. break;
  4805. }
  4806. }
  4807. if (fIgnorePort)
  4808. {
  4809. pus->EraseMarkedText();
  4810. }
  4811. else
  4812. {
  4813. pus->ClearMark();
  4814. }
  4815. return psz;
  4816. }
  4817. // -------------------------------------------------------------------------------
  4818. BOOL URL::DetectAbsolutePath()
  4819. {
  4820. BOOL fResult = FALSE;
  4821. if (_dwSchemeNotes & UPF_SCHEME_OPAQUE)
  4822. {
  4823. fResult = TRUE;
  4824. }
  4825. else if (DetectSymbols(SLASH, WHACK))
  4826. {
  4827. fResult = TRUE;
  4828. _pszWork = NextChar(_pszWork+1);
  4829. }
  4830. return fResult;
  4831. }
  4832. BOOL URL::DetectPath()
  4833. {
  4834. return (*NextChar(_pszWork) && !DetectSymbols(QUERY, POUND));
  4835. }
  4836. VOID URL::FeedPath(URL_STRING* pus, BOOL fMarkServer)
  4837. {
  4838. ASSERT(_pszWork);
  4839. PCWSTR psz = NextChar(_pszWork);
  4840. if (fMarkServer)
  4841. {
  4842. pus->Mark();
  4843. }
  4844. if (_dwSchemeNotes & UPF_SCHEME_OPAQUE)
  4845. {
  4846. _pszWork = FeedUntil(psz, pus);
  4847. pus->TrimEndWhiteSpace();
  4848. }
  4849. else
  4850. {
  4851. DWORD cDots;
  4852. BOOL fContinue = TRUE;
  4853. do
  4854. {
  4855. cDots = 0;
  4856. PCWSTR pszTmp = psz;
  4857. if (_fPathCompressionOn)
  4858. {
  4859. cDots = DetectDots(&psz);
  4860. }
  4861. if (cDots)
  4862. {
  4863. if (cDots==2)
  4864. {
  4865. pus->Contract();
  4866. }
  4867. continue;
  4868. }
  4869. psz = CopySegment(pszTmp, pus, &fContinue);
  4870. }
  4871. while (fContinue);
  4872. _pszWork = psz;
  4873. if (!*_pszWork)
  4874. {
  4875. pus->TrimEndWhiteSpace();
  4876. }
  4877. }
  4878. }
  4879. // pfContinue indicates whether there's anything following that would
  4880. // be of relevance to a path
  4881. PCWSTR URL::CopySegment(PCWSTR psz, URL_STRING* pus, BOOL* pfContinue)
  4882. {
  4883. ASSERT(pfContinue);
  4884. BOOL fStop = FALSE;
  4885. psz = NextChar(psz);
  4886. while (!fStop)
  4887. {
  4888. switch (*psz)
  4889. {
  4890. case POUND:
  4891. if (_eScheme==URL_SCHEME_FILE)
  4892. {
  4893. // Since #s are valid for dos paths, we have to accept them except
  4894. // for when they follow a .htm/.html file (See FindFragmentA/W)
  4895. // However, some inconsistencies may still arise...
  4896. for (DWORD i=0; i < ARRAYSIZE(ExtTable); i++)
  4897. {
  4898. if (!pus->CompareLast(ExtTable[i].wszExt, ExtTable[i].cchExt))
  4899. break;
  4900. }
  4901. // If we haven't found a matching file extension, we'll treat as a filename character.
  4902. if (i==ARRAYSIZE(ExtTable))
  4903. {
  4904. pus->Accept(*psz);
  4905. psz = NextChar(psz+1);
  4906. break;
  4907. }
  4908. }
  4909. goto next;
  4910. case QUERY:
  4911. // We're going to support query as a legitimate character in file urls.
  4912. // *sigh*
  4913. if (_eScheme==URL_SCHEME_FILE)
  4914. {
  4915. if (_fIgnoreQuery)
  4916. {
  4917. psz = wszBogus;
  4918. }
  4919. else
  4920. {
  4921. pus->CleanAccept(*psz);
  4922. psz = NextChar(psz+1);
  4923. break;
  4924. }
  4925. }
  4926. case L'\0':
  4927. next:
  4928. *pfContinue = FALSE;
  4929. fStop = TRUE;
  4930. break;
  4931. case SLASH:
  4932. case WHACK:
  4933. fStop = TRUE;
  4934. // fall through
  4935. default:
  4936. pus->Accept(*psz);
  4937. psz = NextChar(psz+1);
  4938. break;
  4939. }
  4940. }
  4941. return psz;
  4942. }
  4943. DWORD URL::DetectDots(PCWSTR* ppsz)
  4944. {
  4945. PCWSTR psz;
  4946. if (ppsz)
  4947. {
  4948. psz = *ppsz;
  4949. }
  4950. else
  4951. {
  4952. psz = NextChar(_pszWork);
  4953. }
  4954. DWORD cDots = 0;
  4955. if (*psz==DOT)
  4956. {
  4957. psz = NextChar(psz+1);
  4958. cDots++;
  4959. if (*psz==DOT)
  4960. {
  4961. psz = NextChar(psz+1);
  4962. cDots++;
  4963. }
  4964. switch (*psz)
  4965. {
  4966. case WHACK:
  4967. if (_eScheme==URL_SCHEME_MK)
  4968. {
  4969. cDots = 0;
  4970. }
  4971. case SLASH:
  4972. psz = NextChar(psz+1);
  4973. break;
  4974. case QUERY:
  4975. case POUND:
  4976. case L'\0':
  4977. break;
  4978. default:
  4979. cDots = 0;
  4980. break;
  4981. }
  4982. }
  4983. if (ppsz)
  4984. {
  4985. *ppsz = psz;
  4986. }
  4987. return cDots;
  4988. }
  4989. VOID URL::StopPathCompression()
  4990. {
  4991. _fPathCompressionOn = FALSE;
  4992. }
  4993. // -------------------------------------------------------------------------------
  4994. BOOL URL::DetectQueryOrFragment()
  4995. {
  4996. return (DetectSymbols(QUERY, POUND));
  4997. }
  4998. BOOL URL::DetectQuery()
  4999. {
  5000. return (DetectSymbols(QUERY));
  5001. }
  5002. VOID URL::IgnoreQuery()
  5003. {
  5004. ASSERT(_eScheme==URL_SCHEME_FILE);
  5005. _fIgnoreQuery = TRUE;
  5006. }
  5007. VOID URL::FeedQueryAndFragment(URL_STRING* pus)
  5008. {
  5009. ASSERT(_pszWork);
  5010. if (_dwSchemeNotes & UPF_SCHEME_OPAQUE)
  5011. {
  5012. PCWSTR psz = NextChar(_pszWork);
  5013. while (*psz)
  5014. {
  5015. pus->Accept(*psz);
  5016. psz = NextChar(psz+1);
  5017. }
  5018. _pszWork = psz;
  5019. return;
  5020. }
  5021. PCWSTR psz = NextChar(_pszWork);
  5022. // This is okay since *psz must equal { ? | # }
  5023. if (*psz==QUERY)
  5024. {
  5025. pus->CleanAccept(QUERY);
  5026. }
  5027. // By munging, I mean taking an URL of form http://a/b#c?d and producing http://a/b?d#c
  5028. // We do this by default; however, we won't do this when we've been passed a fragment only
  5029. // as a relative url
  5030. // Query's always override.
  5031. if (*psz==QUERY)
  5032. {
  5033. pus->DropQuery();
  5034. pus->NotifyQuery();
  5035. pus->EnableMunging();
  5036. psz = NextChar(psz+1);
  5037. while (*psz)
  5038. {
  5039. if (*psz==POUND)
  5040. {
  5041. pus->NotifyFragment();
  5042. }
  5043. else
  5044. {
  5045. pus->Accept(*psz);
  5046. }
  5047. psz = NextChar(psz+1);
  5048. }
  5049. }
  5050. else
  5051. {
  5052. // This line of code will determine whether we've been passed a fragment for a relative url
  5053. // For properly formed base urls, this won't matter.
  5054. BOOL fMunge = psz!=NextChar(_pszUrl);
  5055. pus->DropFragment();
  5056. pus->NotifyFragment();
  5057. pus->EnableMunging();
  5058. psz = NextChar(psz+1);
  5059. // The following line is bogus. It just keeps going until the end. Not good.
  5060. // We MAY or MAY NOT fix this, depending on how much people scream at me.
  5061. // This may be an issue for Netscape compatibility.
  5062. // What we could do is: when either query or fragment would be blank, preserve as is.
  5063. // This would minimise breaking compatibility across the board.
  5064. // -- AKABIR, 09/28/98
  5065. while ((*psz==QUERY && !fMunge) || *psz)
  5066. {
  5067. if (*psz==QUERY)
  5068. {
  5069. pus->CleanAccept(QUERY);
  5070. }
  5071. else
  5072. {
  5073. pus->Accept(*psz);
  5074. }
  5075. psz = NextChar(psz+1);
  5076. }
  5077. if (*psz==QUERY)
  5078. {
  5079. pus->DropFragment();
  5080. pus->NotifyQuery();
  5081. pus->CleanAccept(*psz);
  5082. psz = NextChar(psz+1);
  5083. while (*psz)
  5084. {
  5085. pus->Accept(*psz);
  5086. psz = NextChar(psz+1);
  5087. }
  5088. pus->TrimEndWhiteSpace();
  5089. pus->NotifyFragment();
  5090. psz = NextChar(_pszWork);
  5091. pus->CleanAccept(*psz);
  5092. psz = NextChar(psz+1);
  5093. while (*psz!=QUERY)
  5094. {
  5095. pus->Accept(*psz);
  5096. psz = NextChar(psz+1);
  5097. }
  5098. }
  5099. }
  5100. pus->TrimEndWhiteSpace();
  5101. pus->ClearMark();
  5102. }
  5103. // -------------------------------------------------------------------------------
  5104. HRESULT
  5105. BlendUrls(URL& urlBase, URL& urlRelative, URL_STRING* pusOut, DWORD dwFlags)
  5106. {
  5107. HRESULT hr = S_OK;
  5108. // -- SCHEME --------------------------------------------------------------------------
  5109. // Examine each url's scheme.
  5110. // We won't continue to use urlBase IF
  5111. // 1. their tokenized schemes are not identical
  5112. // 2. the scheme is a file
  5113. // 3. the actual string schemes are not identical
  5114. // this checks to make sure that these are the same scheme, and
  5115. // that the scheme is allowed to be used in relative URLs
  5116. // file: is not allowed to because of weirdness with drive letters
  5117. // and \\UNC\shares
  5118. BOOL fBaseServerDetected = FALSE, fRelativeServerDetected = FALSE;
  5119. BOOL fDetectAbsoluteRelPath = FALSE;
  5120. BOOL fDetectedRelScheme = urlRelative.DetectAndFeedScheme(pusOut);
  5121. BOOL fDetectedBaseScheme = FALSE;
  5122. if (fDetectedRelScheme
  5123. && ((pusOut->QueryScheme()==URL_SCHEME_FILE)
  5124. || (urlRelative.GetSchemeNotes() & UPF_SCHEME_OPAQUE)))
  5125. {
  5126. urlBase.Reset();
  5127. }
  5128. else if ((fDetectedBaseScheme = urlBase.DetectAndFeedScheme(pusOut, fDetectedRelScheme)))
  5129. {
  5130. if (!fDetectedRelScheme)
  5131. {
  5132. urlRelative.SetScheme(urlBase.GetScheme(), urlBase.GetSchemeNotes());
  5133. }
  5134. }
  5135. // We fall back on the original parser for those cases we don't handle yet.
  5136. // (dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY)
  5137. if (((pusOut->QueryScheme()==URL_SCHEME_FILE)
  5138. || (!(fDetectedRelScheme || fDetectedBaseScheme)))
  5139. && ((dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY)))
  5140. {
  5141. hr = E_FAIL;
  5142. goto exit;
  5143. }
  5144. if ((pusOut->QueryScheme()==URL_SCHEME_UNKNOWN))
  5145. {
  5146. // BUG BUG For IE4 compat, we need to use the old parser. However
  5147. // if we're passed URL_PLUGGABLE_PROTOCOL, we'll use this parser.
  5148. if (!(dwFlags & URL_PLUGGABLE_PROTOCOL))
  5149. {
  5150. hr = E_FAIL;
  5151. goto exit;
  5152. }
  5153. urlRelative.StopPathCompression();
  5154. // Same schemes, so now we look at the base url to divine the opacity
  5155. if (urlBase.DetectAnything() && !urlBase.IsReset())
  5156. {
  5157. if (!urlBase.DetectSlash())
  5158. {
  5159. if (!urlRelative.DetectQueryOrFragment())
  5160. {
  5161. urlBase.Reset();
  5162. }
  5163. urlBase.AddSchemeNote(UPF_SCHEME_OPAQUE);
  5164. urlRelative.AddSchemeNote(UPF_SCHEME_OPAQUE);
  5165. pusOut->AddSchemeNote(UPF_SCHEME_OPAQUE);
  5166. }
  5167. }
  5168. else if (!urlRelative.DetectSlash())
  5169. {
  5170. // If urlBase is reset, that means the schemes are different,
  5171. // so we only have urlRelative to figure out opacity.
  5172. urlRelative.AddSchemeNote(UPF_SCHEME_OPAQUE);
  5173. pusOut->AddSchemeNote(UPF_SCHEME_OPAQUE);
  5174. }
  5175. }
  5176. else if (pusOut->QueryScheme()==URL_SCHEME_FTP)
  5177. {
  5178. // For ftp urls, we'll assume that we're being passed properly formed urls.
  5179. // Some ftp sites allow backslashes in their object filenames, so we should
  5180. // allow access to these. Also, domain passwords would otherwise need escaping.
  5181. pusOut->DisableSlashFixing();
  5182. }
  5183. if (dwFlags & URL_DONT_SIMPLIFY)
  5184. {
  5185. urlBase.StopPathCompression();
  5186. urlRelative.StopPathCompression();
  5187. }
  5188. // -- SERVER --------------------------------------------------------------------------
  5189. // Decide on the server to use.
  5190. // Question: if urlBase and UrlRelative have the same explicit server, isn't it pointless
  5191. // to continue looking at url base anyway?
  5192. pusOut->EnableMunging();
  5193. if (!(pusOut->GetSchemeNotes() & UPF_SCHEME_OPAQUE))
  5194. {
  5195. if (urlRelative.DetectServer()
  5196. && !(urlBase.DetectServer() && (urlRelative.PeekNext()!=SLASH) && (urlRelative.PeekNext()!=WHACK)))
  5197. {
  5198. fRelativeServerDetected = TRUE;
  5199. urlRelative.FeedServer(pusOut);
  5200. urlBase.Reset();
  5201. }
  5202. else if (urlBase.DetectServer())
  5203. {
  5204. fBaseServerDetected = TRUE;
  5205. urlBase.FeedServer(pusOut);
  5206. }
  5207. }
  5208. // -- PATH ----------------------------------------------------------------------------
  5209. // Figure out the path
  5210. // If the relative url has a path, and it starts with a slash/whack, forget about the
  5211. // base's path and stuff. Otherwise, inherit the base and attach the relative
  5212. // Potential problem: when rel path is empty, we expect to knock of the last base segment
  5213. if (pusOut->QueryScheme()==URL_SCHEME_FILE)
  5214. {
  5215. // Hack for back compat
  5216. // If the relative url consists of a query string, we'll append that to
  5217. // our resultant url, rather than the base's query string
  5218. if (urlRelative.DetectQuery())
  5219. {
  5220. urlBase.IgnoreQuery();
  5221. }
  5222. else
  5223. {
  5224. BOOL fResult1 = urlRelative.DetectAbsolutePath();
  5225. BOOL fResult2 = urlRelative.DetectLocalDrive();
  5226. if (fResult2)
  5227. {
  5228. urlBase.Reset();
  5229. urlRelative.FeedLocalDrive(pusOut);
  5230. if (urlRelative.DetectAbsolutePath())
  5231. {
  5232. pusOut->Accept(SLASH);
  5233. }
  5234. }
  5235. else
  5236. {
  5237. if (urlBase.DetectLocalDrive())
  5238. {
  5239. urlBase.FeedLocalDrive(pusOut);
  5240. if (fResult1)
  5241. {
  5242. pusOut->Accept(SLASH);
  5243. urlBase.Reset();
  5244. }
  5245. else if (urlBase.DetectAbsolutePath())
  5246. {
  5247. pusOut->Accept(SLASH);
  5248. }
  5249. }
  5250. else if (fResult1)
  5251. {
  5252. if (fRelativeServerDetected)
  5253. {
  5254. pusOut->Accept(SLASH);
  5255. }
  5256. urlBase.Reset();
  5257. }
  5258. }
  5259. }
  5260. }
  5261. else if (pusOut->QueryScheme()==URL_SCHEME_UNKNOWN)
  5262. {
  5263. if (pusOut->GetSchemeNotes() & UPF_SCHEME_OPAQUE)
  5264. {
  5265. if (!urlRelative.DetectAnything())
  5266. {
  5267. urlRelative.Reset();
  5268. }
  5269. }
  5270. else
  5271. {
  5272. // This code fragment is for urls with unknown schemes, that are to be
  5273. // treated hierarchically. Note that the authority (which has been passed in
  5274. // already) is terminated with /, ?, or \0. The / is *optional*, and should be
  5275. // appended if and only if the urls being combined call for it.
  5276. if (urlBase.IsReset())
  5277. {
  5278. // At this point, we're examining only the relative url. We've been brought to
  5279. // a stop by the presence of /, ? or \0. So
  5280. if (urlRelative.DetectSlash() && !fDetectedRelScheme)
  5281. {
  5282. pusOut->Accept(SLASH);
  5283. }
  5284. }
  5285. else
  5286. {
  5287. // In this case, we have both the relative and base urls to look at.
  5288. // What's the terminator for the base url
  5289. if ((urlRelative.DetectSlash()
  5290. || (!urlBase.DetectAnything()
  5291. && urlRelative.DetectAnything()
  5292. && !urlRelative.DetectQuery()))
  5293. && !fDetectedRelScheme)
  5294. {
  5295. pusOut->Accept(SLASH);
  5296. }
  5297. }
  5298. }
  5299. }
  5300. pusOut->EnableMunging();
  5301. if ((fBaseServerDetected && (fDetectAbsoluteRelPath = urlRelative.DetectAbsolutePath())))
  5302. {
  5303. if (!fRelativeServerDetected)
  5304. {
  5305. pusOut->RestoreFlags();
  5306. }
  5307. if (fDetectAbsoluteRelPath && urlRelative.DetectDots(NULL))
  5308. {
  5309. urlRelative.StopPathCompression();
  5310. }
  5311. urlRelative.FeedPath(pusOut);
  5312. urlBase.Reset();
  5313. }
  5314. else if (urlBase.DetectPath())
  5315. {
  5316. urlBase.FeedPath(pusOut);
  5317. // We don't want to contract the base path's free segment if
  5318. // a. the scheme is opaque
  5319. // b. the relative url has a path
  5320. // c. the relative url has no path, just a fragment/query
  5321. if (!(urlBase.GetSchemeNotes() & UPF_SCHEME_OPAQUE))
  5322. {
  5323. pusOut->RestoreFlags();
  5324. if (urlRelative.DetectPath()
  5325. || !urlRelative.DetectQueryOrFragment())
  5326. {
  5327. if (urlRelative.DetectPath() || !fDetectedRelScheme)
  5328. {
  5329. pusOut->Contract(FALSE);
  5330. }
  5331. if (fDetectedRelScheme)
  5332. {
  5333. urlRelative.StopPathCompression();
  5334. }
  5335. urlRelative.FeedPath(pusOut, FALSE);
  5336. urlBase.Reset();
  5337. }
  5338. else
  5339. {
  5340. urlRelative.FeedPath(pusOut, FALSE);
  5341. }
  5342. }
  5343. else
  5344. {
  5345. urlRelative.StopPathCompression();
  5346. urlRelative.FeedPath(pusOut, FALSE);
  5347. }
  5348. }
  5349. else if (urlRelative.DetectPath())
  5350. {
  5351. if (!fRelativeServerDetected)
  5352. {
  5353. pusOut->RestoreFlags();
  5354. }
  5355. else if (urlRelative.DetectDots(NULL))
  5356. {
  5357. urlRelative.StopPathCompression();
  5358. }
  5359. urlRelative.FeedPath(pusOut);
  5360. urlBase.Reset();
  5361. }
  5362. pusOut->ClearMark();
  5363. pusOut->DisableSlashFixing();
  5364. // -- QUERY AND FRAGMENT -----------------------------------------------------------
  5365. // Figure out the query
  5366. if (urlBase.DetectQueryOrFragment())
  5367. {
  5368. urlBase.FeedQueryAndFragment(pusOut);
  5369. }
  5370. if (urlRelative.DetectQueryOrFragment())
  5371. {
  5372. urlRelative.FeedQueryAndFragment(pusOut);
  5373. }
  5374. pusOut->CleanAccept(L'\0');
  5375. if (pusOut->AnyProblems())
  5376. {
  5377. hr = E_OUTOFMEMORY;
  5378. }
  5379. exit:
  5380. return hr;
  5381. }
  5382. HRESULT
  5383. FormUrlCombineResultW(LPCWSTR pszBase,
  5384. LPCWSTR pszRelative,
  5385. LPWSTR pszCombined,
  5386. LPDWORD pcchCombined,
  5387. DWORD dwFlags)
  5388. {
  5389. if ((dwFlags & URL_ESCAPE_UNSAFE)
  5390. && (dwFlags & URL_ESCAPE_SPACES_ONLY))
  5391. {
  5392. // In the original parser, ESCAPE_SPACES_ONLY takes precedence over ESCAPE_UNSAFE
  5393. // Deactivate UNSAFE
  5394. dwFlags ^= URL_ESCAPE_UNSAFE;
  5395. }
  5396. DWORD dwTempFlags = dwFlags;
  5397. if (dwFlags & URL_UNESCAPE)
  5398. {
  5399. if (dwFlags & URL_ESCAPE_UNSAFE)
  5400. {
  5401. dwTempFlags ^= URL_ESCAPE_UNSAFE;
  5402. }
  5403. if (dwFlags & URL_ESCAPE_SPACES_ONLY)
  5404. {
  5405. dwTempFlags ^= URL_ESCAPE_SPACES_ONLY;
  5406. }
  5407. }
  5408. // Make a copy of the relative url if the client wants to either
  5409. // a. unescape and escape the URL (since roundtripping is not guaranteed), or
  5410. // b. use the same location for relative URL's buffer for the combined url
  5411. HRESULT hr;
  5412. URL curlBase, curlRelative;
  5413. curlBase.Setup((PWSTR)pszBase);
  5414. curlRelative.Setup((PWSTR)pszRelative);
  5415. URL_STRING us(dwTempFlags);
  5416. hr = BlendUrls(curlBase, curlRelative, &us, dwTempFlags);
  5417. if (SUCCEEDED(hr))
  5418. {
  5419. DWORD ccBuffer = us.GetTotalLength();
  5420. if ((dwFlags & URL_UNESCAPE)
  5421. && (dwFlags & (URL_ESCAPE_UNSAFE | URL_ESCAPE_SPACES_ONLY)))
  5422. {
  5423. // No need to strip out URL_UNESCAPE
  5424. hr = UrlEscapeW(us.GetStart(), pszCombined, pcchCombined, dwFlags);
  5425. goto exit;
  5426. }
  5427. if (ccBuffer > *pcchCombined)
  5428. {
  5429. hr = E_POINTER;
  5430. }
  5431. else if (pszCombined)
  5432. {
  5433. memcpy(pszCombined, us.GetStart(), ccBuffer*sizeof(WCHAR));
  5434. // We return only the number of characters, not buffer size required.
  5435. ccBuffer--;
  5436. }
  5437. *pcchCombined = ccBuffer;
  5438. }
  5439. else if (hr==E_FAIL)
  5440. {
  5441. // ASSERT(((dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY)));
  5442. // We fall back on the original parser for those cases we don't handle yet.
  5443. // We should do this if and only if the new parser
  5444. // doesn't handle the flags cited above
  5445. // or we're passed a pluggable protocol without the forcing flag.
  5446. SHSTRW strwOut;
  5447. hr = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags);
  5448. if(SUCCEEDED(hr))
  5449. {
  5450. hr = ReconcileHresults(hr, CopyOutW(&strwOut, pszCombined, pcchCombined));
  5451. }
  5452. }
  5453. exit:
  5454. return hr;
  5455. }
  5456. HRESULT
  5457. FormUrlCombineResultA(LPCSTR pszBase,
  5458. LPCSTR pszRelative,
  5459. LPSTR pszCombined,
  5460. LPDWORD pcchCombined,
  5461. DWORD dwFlags)
  5462. {
  5463. if ((dwFlags & URL_ESCAPE_UNSAFE)
  5464. &&
  5465. (dwFlags & URL_ESCAPE_SPACES_ONLY))
  5466. {
  5467. // In the original parser, ESCAPE_SPACES_ONLY takes precedence over ESCAPE_UNSAFE
  5468. // Deactivate UNSAFE
  5469. dwFlags ^= URL_ESCAPE_UNSAFE;
  5470. }
  5471. // Make a copy of the relative url if the client wants to either
  5472. // a. unescape and escape the URL (since roundtripping is not guaranteed), or
  5473. // b. use the same location for relative URL's buffer for the combined url
  5474. SHSTRW strwBase;
  5475. SHSTRW strwRelative;
  5476. HRESULT hr;
  5477. if (!(SUCCEEDED(strwBase.SetStr(pszBase)) && SUCCEEDED(strwRelative.SetStr(pszRelative))))
  5478. {
  5479. return E_OUTOFMEMORY;
  5480. }
  5481. DWORD dwTempFlags = dwFlags;
  5482. if (dwFlags & URL_UNESCAPE)
  5483. {
  5484. if (dwFlags & URL_ESCAPE_UNSAFE)
  5485. {
  5486. dwTempFlags ^= URL_ESCAPE_UNSAFE;
  5487. }
  5488. if (dwFlags & URL_ESCAPE_SPACES_ONLY)
  5489. {
  5490. dwTempFlags ^= URL_ESCAPE_SPACES_ONLY;
  5491. }
  5492. }
  5493. URL curlBase, curlRelative;
  5494. curlBase.Setup(strwBase);
  5495. curlRelative.Setup(strwRelative);
  5496. URL_STRING us(dwTempFlags);
  5497. hr = BlendUrls(curlBase, curlRelative, &us, dwTempFlags);
  5498. if (SUCCEEDED(hr))
  5499. {
  5500. SHSTRA straOut;
  5501. if ((dwFlags & URL_UNESCAPE)
  5502. && (dwFlags & (URL_ESCAPE_UNSAFE | URL_ESCAPE_SPACES_ONLY)))
  5503. {
  5504. SHSTRW strwTemp;
  5505. // No need to strip out URL_UNESCAPE
  5506. hr = SHUrlEscape(us.GetStart(), &strwTemp, dwFlags);
  5507. hr = ReconcileHresults(hr, straOut.SetStr(strwTemp));
  5508. }
  5509. else
  5510. {
  5511. hr = straOut.SetStr(us.GetStart());
  5512. }
  5513. if (SUCCEEDED(hr))
  5514. {
  5515. hr = CopyOutA(&straOut, pszCombined, pcchCombined);
  5516. }
  5517. }
  5518. else if (hr==E_FAIL)
  5519. {
  5520. // ASSERT(((dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY)));
  5521. // We fall back on the original parser for those cases we don't handle yet.
  5522. // We should do this if and only if the new parser
  5523. // doesn't handle the flags cited above
  5524. SHSTRW strwOut;
  5525. hr = SHUrlParse(strwBase, strwRelative, &strwOut, dwFlags);
  5526. if (SUCCEEDED(hr))
  5527. {
  5528. SHSTRA straOut;
  5529. hr = ReconcileHresults(hr, straOut.SetStr(strwOut));
  5530. if(SUCCEEDED(hr))
  5531. hr = ReconcileHresults(hr, CopyOutA(&straOut, pszCombined, pcchCombined));
  5532. }
  5533. }
  5534. return hr;
  5535. }
  5536. #ifdef PROOFREAD_PARSES
  5537. EXTERN_C DWORD g_dwProofMode;
  5538. enum
  5539. {
  5540. PP_COMPARE,
  5541. PP_ORIGINAL_ONLY,
  5542. PP_NEW_ONLY
  5543. };
  5544. //#define SHOW_MESSAGEBOX
  5545. VOID LogData(PWSTR pszMsg)
  5546. {
  5547. SHSTRA str;
  5548. str.SetStr(pszMsg);
  5549. CHAR szFileName[MAX_PATH];
  5550. DWORD dwSize = MAX_PATH;
  5551. CHAR szComputerName[MAX_PATH];
  5552. HANDLE hResultsFile = NULL;
  5553. strcpy(szFileName, "\\\\BANYAN\\IPTD\\AKABIR\\1315\\");
  5554. if (!GetComputerNameA(szComputerName, &dwSize))
  5555. {
  5556. goto exit;
  5557. }
  5558. lstrcatA(szFileName, szComputerName);
  5559. hResultsFile = CreateFileA( szFileName,
  5560. GENERIC_WRITE,
  5561. FILE_SHARE_WRITE | FILE_SHARE_READ,
  5562. NULL,
  5563. OPEN_ALWAYS,
  5564. 0,
  5565. NULL);
  5566. if (INVALID_HANDLE_VALUE == hResultsFile)
  5567. hResultsFile = NULL;
  5568. if (hResultsFile)
  5569. {
  5570. if (SetFilePointer(hResultsFile, 0, NULL, FILE_END)==0xFFFFFFFF)
  5571. {
  5572. goto exit;
  5573. }
  5574. DWORD dwFoo;
  5575. if (0==WriteFile(hResultsFile, (PVOID)(PSTR)str, lstrlenW(pszMsg), &dwFoo, NULL))
  5576. {
  5577. DWORD dwE = GetLastError();
  5578. }
  5579. }
  5580. exit:
  5581. if (hResultsFile)
  5582. {
  5583. CloseHandle(hResultsFile);
  5584. }
  5585. }
  5586. HRESULT ProofreadParses(HRESULT hr,
  5587. LPCWSTR pszBase,
  5588. LPCWSTR pszRelative,
  5589. LPWSTR pszCombined,
  5590. PDWORD pcchCombined,
  5591. DWORD dwFlags,
  5592. DWORD dwSize
  5593. )
  5594. {
  5595. static WCHAR szLast[2084];
  5596. SHSTRW strwOut;
  5597. switch(g_dwProofMode)
  5598. {
  5599. case PP_COMPARE:
  5600. {
  5601. HRESULT hr2 = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags);
  5602. WCHAR wstr[2084];
  5603. DWORD ccLen = min(2084, dwSize), ccUrl = SUCCEEDED(hr) ? *pcchCombined : 0;
  5604. if(SUCCEEDED(hr2))
  5605. {
  5606. hr2 = CopyOutW(&strwOut, wstr, &ccLen);
  5607. if (hr2 == E_POINTER && hr == E_POINTER)
  5608. {
  5609. goto exitpoint;
  5610. }
  5611. // Check if cached combine equals the new parser's result
  5612. if (!StrCmpW(pszCombined, szLast))
  5613. {
  5614. goto exitpoint;
  5615. }
  5616. // Check if cached combine equals the old parser's result
  5617. if (!StrCmpW(wstr, szLast))
  5618. {
  5619. *pcchCombined = ccLen;
  5620. StrCpyNW(pszCombined, wstr, ccLen + 1);
  5621. hr = hr2;
  5622. goto exitpoint;
  5623. }
  5624. if (SUCCEEDED(hr))
  5625. {
  5626. StrCpyNW(szLast, wstr, ccLen);
  5627. if (!StrCmpW(wstr, pszCombined))
  5628. {
  5629. goto exitpoint;
  5630. }
  5631. DWORD dwBogus;
  5632. if ((dwFlags & URL_ESCAPE_SPACES_ONLY) && !(dwFlags & URL_UNESCAPE))
  5633. {
  5634. PCWSTR psz = FindSchemeW(pszCombined, &dwBogus);
  5635. DWORD dw;
  5636. if (psz
  5637. &&
  5638. (URL_SCHEME_UNKNOWN
  5639. !=GetSchemeTypeAndFlagsW(psz, dwBogus, &dw))
  5640. &&
  5641. (dw & UPF_SCHEME_OPAQUE))
  5642. {
  5643. goto exitpoint;
  5644. }
  5645. }
  5646. // Filter
  5647. // base: "http://foo/bar/"
  5648. // rel: ""
  5649. // old: "http://foo/bar"
  5650. // new: "http://foo/bar/"
  5651. if ((*pszRelative==L'\0')
  5652. &&
  5653. (!StrCmpNW(pszCombined, wstr, ccLen))
  5654. &&
  5655. (ccUrl==(ccLen+1))
  5656. &&
  5657. (pszCombined[ccLen]==L'/'))
  5658. {
  5659. goto exitpoint;
  5660. }
  5661. // Filter
  5662. // base: "http://foo/bar/what?ho"
  5663. // rel: ""
  5664. // old: "http://foo/bar/?ho"
  5665. // new: "http://foo/bar/"
  5666. if ((*pszRelative==L'\0')
  5667. &&
  5668. (!StrCmpNW(pszCombined, wstr, ccUrl))
  5669. &&
  5670. (wstr[ccUrl]==QUERY))
  5671. {
  5672. goto exitpoint;
  5673. }
  5674. // Filter
  5675. // base: "http://foo/bar/what?ho"
  5676. // rel: "/"
  5677. // old: "http://foo"
  5678. // new: "http://foo/"
  5679. if ((*pszRelative==L'/')
  5680. &&
  5681. (!StrCmpNW(pszCombined, wstr, ccLen))
  5682. &&
  5683. (ccUrl==(ccLen+1))
  5684. &&
  5685. (pszCombined[ccLen]==L'/'))
  5686. {
  5687. goto exitpoint;
  5688. }
  5689. WCHAR wmsg[8192];
  5690. wnsprintfW(wmsg,
  5691. ARRAYSIZE(wmsg),
  5692. L"Flags:%#x\nBase:\"%s\"\nRelative:\"%s\"\nOriginal result:\"%s\"\nNew result:\"%s\"\nUse original, not new, result?\n",
  5693. dwFlags,
  5694. pszBase,
  5695. pszRelative,
  5696. wstr,
  5697. pszCombined
  5698. );
  5699. #ifdef SHOW_MESSAGEBOX
  5700. if (IDYES==MessageBoxW(
  5701. NULL,
  5702. wmsg,
  5703. L"CONTACT AKABIR: URLCOMBINE FAILURE",
  5704. MB_YESNO | MB_ICONERROR | MB_TASKMODAL))
  5705. {
  5706. StrCpyNW(pszCombined, wstr, dwSize);
  5707. *pcchCombined = ccLen;
  5708. }
  5709. else
  5710. {
  5711. StrCpyNW(szLast, pszCombined, *pcchCombined);
  5712. }
  5713. #endif
  5714. LogData(wmsg);
  5715. }
  5716. else
  5717. {
  5718. WCHAR wmsg[8192];
  5719. wnsprintfW(wmsg,
  5720. ARRAYSIZE(wmsg),
  5721. L"Flags:%#x\nBase:\"%s\"\nRelative:\"%s\"\nFAILED:%#x\nExpected:\"%s\"\n",
  5722. dwFlags,
  5723. pszBase,
  5724. pszRelative,
  5725. hr,
  5726. wstr);
  5727. #ifdef SHOW_MESSAGEBOX
  5728. MessageBoxW(
  5729. NULL,
  5730. wmsg,
  5731. L"CONTACT AKABIR: URLCOMBINE FAILURE",
  5732. MB_OK | MB_ICONERROR | MB_TASKMODAL);
  5733. #endif
  5734. StrCpyNW(pszCombined, wstr, dwSize);
  5735. *pcchCombined = ccLen;
  5736. LogData(wmsg);
  5737. }
  5738. hr = hr2;
  5739. }
  5740. }
  5741. break;
  5742. case PP_NEW_ONLY:
  5743. break;
  5744. case PP_ORIGINAL_ONLY:
  5745. {
  5746. hr = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags);
  5747. if(SUCCEEDED(hr))
  5748. {
  5749. hr = CopyOutW(&strwOut, pszCombined, pcchCombined);
  5750. }
  5751. }
  5752. break;
  5753. }
  5754. exitpoint:
  5755. return hr;
  5756. }
  5757. #endif //PROOFREAD_PARSES
  5758. LWSTDAPI
  5759. UrlCombineW(LPCWSTR pszBase,
  5760. LPCWSTR pszRelative,
  5761. LPWSTR pszCombined,
  5762. LPDWORD pcchCombined,
  5763. DWORD dwFlags)
  5764. {
  5765. HRESULT hr = E_INVALIDARG;
  5766. if (pszBase && pszRelative && pcchCombined)
  5767. {
  5768. RIP(IS_VALID_STRING_PTRW(pszBase, INTERNET_MAX_PATH_LENGTH));
  5769. RIP(IS_VALID_STRING_PTRW(pszRelative, INTERNET_MAX_PATH_LENGTH));
  5770. RIP(IS_VALID_WRITE_PTR(pcchCombined, DWORD));
  5771. RIP((!pszCombined || IS_VALID_WRITE_BUFFER(pszCombined, WCHAR, *pcchCombined)));
  5772. #ifdef PROOFREAD_PARSES
  5773. DWORD dwSize = *pcchCombined;
  5774. #endif
  5775. hr = FormUrlCombineResultW(pszBase, pszRelative, pszCombined, pcchCombined, dwFlags);
  5776. #ifdef PROOFREAD_PARSES
  5777. hr = ProofreadParses(hr, pszBase, pszRelative, pszCombined, pcchCombined, dwFlags, dwSize);
  5778. #endif
  5779. }
  5780. return hr;
  5781. }
  5782. LWSTDAPI
  5783. UrlCombineA(LPCSTR pszBase,
  5784. LPCSTR pszRelative,
  5785. LPSTR pszOut,
  5786. LPDWORD pcchOut,
  5787. DWORD dwFlags)
  5788. {
  5789. HRESULT hr;
  5790. if (!pszBase
  5791. || !pszRelative
  5792. || !pcchOut)
  5793. {
  5794. hr = E_INVALIDARG;
  5795. }
  5796. else
  5797. {
  5798. RIP(IS_VALID_STRING_PTRA(pszBase, INTERNET_MAX_PATH_LENGTH));
  5799. RIP(IS_VALID_STRING_PTRA(pszRelative, INTERNET_MAX_PATH_LENGTH));
  5800. RIP(IS_VALID_WRITE_PTR(pcchOut, DWORD));
  5801. RIP((!pszOut || IS_VALID_WRITE_BUFFER(pszOut, CHAR, *pcchOut)));
  5802. hr = FormUrlCombineResultA(pszBase, pszRelative, pszOut, pcchOut, dwFlags);
  5803. }
  5804. return hr;
  5805. }
  5806. #else // end USE_FAST_PARSER
  5807. LWSTDAPI
  5808. UrlCombineW(LPCWSTR pszBase,
  5809. LPCWSTR pszRelative,
  5810. LPWSTR pszCombined,
  5811. LPDWORD pcchCombined,
  5812. DWORD dwFlags)
  5813. {
  5814. HRESULT hr = E_INVALIDARG;
  5815. RIPMSG(pszBase && IS_VALID_STRING_PTRW(pszBase, -1), "UrlCombineW: Caller passed invalid pszBase");
  5816. RIPMSG(pszRelative && IS_VALID_STRING_PTRW(pszRelative, -1), "UrlCombineW: Caller passed invalid pszRelative");
  5817. RIPMSG(NULL!=pcchOut, "UrlCombineW: Caller passed invalid pcchOut");
  5818. RIPMSG(NULL==pcchOut || (pszOut && IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCombineW: Caller passed invalid pszOut");
  5819. #ifdef DEBUG
  5820. if (pcchOut)
  5821. {
  5822. if (pszOut == pszBase || pszOut == pszRelative)
  5823. DEBUGWhackPathStringW(pszOut, *pcchOut);
  5824. else
  5825. DEBUGWhackPathBufferW(pszOut, *pcchOut);
  5826. }
  5827. #endif
  5828. if (pszBase && pszRelative && pcchCombined)
  5829. {
  5830. SHSTRW strwOut;
  5831. hr = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags);
  5832. if(SUCCEEDED(hr))
  5833. {
  5834. hr = CopyOutW(&strwOut, pszCombined, pcchCombined);
  5835. }
  5836. }
  5837. return hr;
  5838. }
  5839. LWSTDAPI
  5840. UrlCombineA(LPCSTR pszBase,
  5841. LPCSTR pszRelative,
  5842. LPSTR pszOut,
  5843. LPDWORD pcchOut,
  5844. DWORD dwFlags)
  5845. {
  5846. HRESULT hr;
  5847. SHSTRA straOut;
  5848. RIPMSG(pszBase && IS_VALID_STRING_PTRA(pszBase, -1), "UrlCombineA: Caller passed invalid pszBase");
  5849. RIPMSG(pszRelative && IS_VALID_STRING_PTRA(pszRelative, -1), "UrlCombineA: Caller passed invalid pszRelative");
  5850. RIPMSG(NULL!=pcchOut, "UrlCombineA: Caller passed invalid pcchOut");
  5851. RIPMSG(NULL==pcchOut || (pszOut && IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCombineA: Caller passed invalid pszOut");
  5852. #ifdef DEBUG
  5853. if (pcchOut)
  5854. {
  5855. if (pszOut == pszBase || pszOut == pszRelative)
  5856. DEBUGWhackPathStringA(pszOut, *pcchOut);
  5857. else
  5858. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  5859. }
  5860. #endif
  5861. if (!pszBase || !pszRelative || !pcchOut)
  5862. hr = E_INVALIDARG;
  5863. else
  5864. {
  5865. SHSTRW strwOut;
  5866. SHSTRW strwBase;
  5867. SHSTRW strwRelative;
  5868. if(SUCCEEDED(strwBase.SetStr(pszBase)) && SUCCEEDED(strwRelative.SetStr(pszRelative)))
  5869. hr = SHUrlParse((LPWSTR) strwBase, (LPWSTR)strwRelative, &strwOut, dwFlags);
  5870. else
  5871. hr = E_OUTOFMEMORY;
  5872. if(SUCCEEDED(hr))
  5873. hr = straOut.SetStr(strwOut);
  5874. }
  5875. if(SUCCEEDED(hr) )
  5876. hr = CopyOutA(&straOut, pszOut, pcchOut);
  5877. return hr;
  5878. }
  5879. #endif // !USE_FAST_PARSER
  5880. //
  5881. // Combines the desired scheme with the string after the scheme with a : in between. For
  5882. // some protocols, a // is placed after the colon.
  5883. //
  5884. PRIVATE HRESULT ColonSlashSlashW
  5885. (
  5886. LPCWSTR pszScheme, // url protocol (lower-case)
  5887. LPCWSTR pszAfterScheme, // string to append after the protocol
  5888. LPWSTR pszTranslatedUrl, // output buffer
  5889. int cchMax // size of output buffer
  5890. )
  5891. {
  5892. StrCpyNW(pszTranslatedUrl, pszScheme, cchMax);
  5893. // Append : after scheme and possibly a // as well.
  5894. int cchScheme = lstrlenW(pszScheme);
  5895. if (cchMax > cchScheme + 3)
  5896. {
  5897. pszTranslatedUrl[cchScheme] = L':';
  5898. // Number of characters to skip over in the buffer (how many non alphanums originally
  5899. // followed the protocol)
  5900. int cchSkip = 0;
  5901. // Number of characters past the protocol: to skip over in the URL (Do we insert slashes?)
  5902. int cchSlashes = 0;
  5903. // Modify this conditional to include any other protocols to always follow with ://
  5904. // Right now, http, https and ftp are automatic
  5905. if (!StrCmpW(pszScheme, L"http") || !StrCmpW(pszScheme, L"ftp") || !StrCmpW(pszScheme, L"https") )
  5906. {
  5907. //
  5908. // When preparing to copy the contents of pszAfterScheme into pszUrl, we can
  5909. // skip over as many as 3 non alpha numeric characters, since we are adding ://
  5910. // to the protocol directly
  5911. //
  5912. while ((cchSkip < 3) && pszAfterScheme[cchSkip] && !IsCharAlphaNumericW(pszAfterScheme[cchSkip]))
  5913. {
  5914. cchSkip++;
  5915. }
  5916. pszTranslatedUrl[cchScheme+1] = L'/';
  5917. pszTranslatedUrl[cchScheme+2] = L'/';
  5918. pszTranslatedUrl[cchScheme+3] = L'\0';
  5919. cchSlashes = 2;
  5920. }
  5921. else
  5922. // some other protocol
  5923. {
  5924. // just skip over colon
  5925. cchSkip = 1;
  5926. pszTranslatedUrl[cchScheme+1] = L'\0';
  5927. }
  5928. // Copy the rest of the Url from the UrlBuffer into the Url
  5929. StrCatBuffW(pszTranslatedUrl, pszAfterScheme + cchSkip, cchMax);
  5930. }
  5931. return S_OK;
  5932. }
  5933. //
  5934. // Scans the url for a scheme and if it does not match the known schemes, then
  5935. // a closest match is found.
  5936. //
  5937. LWSTDAPI
  5938. UrlFixupW
  5939. (
  5940. LPCWSTR pcszUrl, // URL to correct
  5941. LPWSTR pszTranslatedUrl, // buffer for corrected url (can be same as pcszUrl)
  5942. DWORD cchMax // size of pszTranslatedUrl
  5943. )
  5944. {
  5945. HRESULT hr = S_OK;
  5946. //
  5947. // Find the scheme
  5948. //
  5949. WCHAR szScheme[INTERNET_MAX_SCHEME_LENGTH];
  5950. ULONG cchScheme = 0;
  5951. LPCWSTR pszScheme = FindSchemeW(pcszUrl, &cchScheme, TRUE);
  5952. if (NULL == pszScheme || cchScheme > (ARRAYSIZE(szScheme)-1))
  5953. {
  5954. // No scheme found
  5955. return S_FALSE;
  5956. }
  5957. for (ULONG cch=0; cch < cchScheme; ++cch, ++pszScheme)
  5958. {
  5959. szScheme[cch] = Ascii_ToLowerW(*pszScheme);
  5960. }
  5961. szScheme[cch] = L'\0';
  5962. LPCWSTR pszAfterScheme = pszScheme;
  5963. //
  5964. // If input and output buffers are the same, copy the stuff after the scheme
  5965. // to another buffer so it doesn't get clobbered when we recombine.
  5966. //
  5967. WCHAR szBuf[INTERNET_MAX_PATH_LENGTH];
  5968. if (pcszUrl == pszTranslatedUrl)
  5969. {
  5970. StrCpyNW(szBuf, pszAfterScheme, ARRAYSIZE(szBuf));
  5971. pszAfterScheme = szBuf;
  5972. }
  5973. //
  5974. // See if it matches any of our known schemes
  5975. //
  5976. BOOL fKnownScheme = FALSE;
  5977. for (ULONG i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); ++i)
  5978. {
  5979. if (StrCmpW(szScheme, g_mpUrlSchemeTypes[i].pszScheme) == 0)
  5980. {
  5981. fKnownScheme = TRUE;
  5982. break;
  5983. }
  5984. }
  5985. //
  5986. // If it matches a known scheme, then just fix :// if it's ftp or http
  5987. //
  5988. if (fKnownScheme ||
  5989. // Check for pluggable protocols too
  5990. NO_ERROR == SHGetValueW(HKEY_CLASSES_ROOT, szScheme, L"URL Protocol",
  5991. NULL, NULL, NULL))
  5992. {
  5993. ColonSlashSlashW(szScheme, pszAfterScheme, pszTranslatedUrl, cchMax);
  5994. return S_OK;
  5995. }
  5996. //
  5997. // Try to find a good match for the mispelled scheme
  5998. //
  5999. // These are weights used in the heuristic for the protocol matching
  6000. // iFloor is roughly the minimum percentage of characters that must match in
  6001. // order to make a change
  6002. const int cFloor = 60;
  6003. // A match in the first character has the greatest weight
  6004. const int cCorrectFirstChar = 150;
  6005. // Any other matched character
  6006. const int cCorrectChar = 100;
  6007. // The weight given to a character that only matches the preceding
  6008. // or subsequent character in the protocol
  6009. const int cOffByOneChar = 80;
  6010. // We penalize characters that are off by one, but if we have already
  6011. // observed the offset and subsequent characters continue the offset, we add this
  6012. const int cOffsetBonus = 20;
  6013. // The value of the best "match" found so far. Higher is a better match.
  6014. int iBestEval = 0;
  6015. // The protocol that's the best fit for the misspelled one
  6016. LPCWSTR pszBestMatch = NULL;
  6017. ULONG cchProt;
  6018. for (ULONG j = 0; j < ARRAYSIZE(g_mpUrlSchemeTypes); ++j)
  6019. {
  6020. // Is this one we don't correct to?
  6021. //
  6022. // Note: https is removed from this list. The potential for an intended "http" to
  6023. // be corrected to "https" is too high, and "http" is far more common. All this
  6024. // means is that if someone wants to get to an https site, they have to have it right.
  6025. //
  6026. if (IsFlagSet(g_mpUrlSchemeTypes[j].dwFlags, UPF_SCHEME_DONTCORRECT))
  6027. continue;
  6028. LPCWSTR pszProtocol = g_mpUrlSchemeTypes[j].pszScheme;
  6029. cchProt = g_mpUrlSchemeTypes[j].cchScheme;
  6030. // Evaluation of the fit of the currently tested protocol
  6031. int iEval = 0;
  6032. //
  6033. // Keep track of the positive or negative offset in the protocol
  6034. // such as "qhttp" instead of "http" or "elnet" instead of "telnet'
  6035. //
  6036. int iPosOffset = 0;
  6037. int iNegOffset = 0;
  6038. //
  6039. // The first character has the most weight. "htp" corrects
  6040. // to "http" and not "ftp" "ftt" corrects to "ftp"
  6041. //
  6042. if (*szScheme == *pszProtocol)
  6043. {
  6044. iEval += cCorrectFirstChar;
  6045. }
  6046. // Check for a negative offset
  6047. else if(*szScheme == pszProtocol[1])
  6048. {
  6049. iEval += cOffByOneChar;
  6050. iNegOffset = 1;
  6051. }
  6052. //
  6053. // We go through the characters in the protocol, even to the
  6054. // terminating null if iPosOffset == 1 (it is never more than 1)
  6055. // This is so the final "p" in "qhttp" gets a chance to be compared
  6056. //
  6057. for (i=1; i < cchProt + iPosOffset; i++)
  6058. {
  6059. // No points for null terminations matching
  6060. if (szScheme[i] == L'\0')
  6061. break;
  6062. //
  6063. // Check for adjacent character match
  6064. //
  6065. if (szScheme[i] == pszProtocol[i])
  6066. {
  6067. iEval += cCorrectChar;
  6068. }
  6069. else
  6070. {
  6071. if (szScheme[i] == pszProtocol[i - 1])
  6072. {
  6073. iEval += cOffByOneChar;
  6074. if (iPosOffset)
  6075. iEval += cOffsetBonus;
  6076. else
  6077. iPosOffset = 1;
  6078. }
  6079. else
  6080. {
  6081. if(szScheme[i] == pszProtocol[i + 1])
  6082. {
  6083. iEval += cOffByOneChar;
  6084. if (iNegOffset)
  6085. iEval += cOffsetBonus;
  6086. else
  6087. iNegOffset = 1;
  6088. }
  6089. }
  6090. }
  6091. }
  6092. // Divide the Evaluated value by the MAX(cchScheme, cchProt)
  6093. iEval = iEval / (cchScheme > cchProt ? cchScheme : cchProt);
  6094. // A new best match?
  6095. if (iEval > iBestEval)
  6096. {
  6097. iBestEval = iEval;
  6098. pszBestMatch = pszProtocol;
  6099. //
  6100. // If we found an unquestionably good match (only 1 non-firstchar typo),
  6101. // break out of the loop
  6102. //
  6103. if (iEval >= 100)
  6104. break;
  6105. }
  6106. }
  6107. // If a good enough match was found, then correct url
  6108. if (iBestEval >= cFloor)
  6109. {
  6110. ColonSlashSlashW(pszBestMatch, pszAfterScheme, pszTranslatedUrl,cchMax);
  6111. }
  6112. else
  6113. {
  6114. hr = S_FALSE;
  6115. }
  6116. return hr;
  6117. }
  6118. // This is a port of InternetCrackUrl from wininet.
  6119. // NTRAID:108139 akabir We REALLY NEED TO CLEAN THIS CODE UP.
  6120. // RAID 109209
  6121. // A lot of the stuff is redundant with the other code available, but we
  6122. // need to be careful not to cause any regressions. Thus, I'm leaving it in for now.
  6123. //
  6124. // UrlSchemeList - the list of schemes that we support
  6125. //
  6126. typedef struct {
  6127. LPWSTR SchemeName;
  6128. DWORD SchemeLength;
  6129. SHINTERNET_SCHEME SchemeType;
  6130. BOOL NeedSlashes;
  6131. } URL_SCHEME_INFO;
  6132. #define UrlUnescapeInPlaceW(pszUrl, dwFlags) UrlUnescapeW(pszUrl, NULL, NULL, dwFlags | URL_UNESCAPE_INPLACE)
  6133. // NOTE MEGA REDUNDANCY. We could use the similar table above and check for opaque. However
  6134. // we'd have to modify that table
  6135. PRIVATE
  6136. URL_SCHEME_INFO
  6137. UrlSchemeList[] = {
  6138. NULL, 0, SHINTERNET_SCHEME_DEFAULT, FALSE,
  6139. L"ftp", 3, SHINTERNET_SCHEME_FTP, TRUE,
  6140. L"gopher", 6, SHINTERNET_SCHEME_GOPHER, TRUE,
  6141. L"http", 4, SHINTERNET_SCHEME_HTTP, TRUE,
  6142. L"https", 5, SHINTERNET_SCHEME_HTTPS, TRUE,
  6143. L"file", 4, SHINTERNET_SCHEME_FILE, TRUE,
  6144. L"news", 4, SHINTERNET_SCHEME_NEWS, FALSE,
  6145. L"mailto", 6, SHINTERNET_SCHEME_MAILTO, FALSE,
  6146. L"socks", 5, SHINTERNET_SCHEME_SOCKS, FALSE,
  6147. L"javascript", 10, SHINTERNET_SCHEME_JAVASCRIPT, FALSE,
  6148. L"vbscript", 8, SHINTERNET_SCHEME_VBSCRIPT, FALSE,
  6149. L"res", 3, SHINTERNET_SCHEME_RES, TRUE
  6150. };
  6151. #define NUMBER_OF_URL_SCHEMES ARRAYSIZE(UrlSchemeList)
  6152. // swiped from wininet\macros.h
  6153. #define IsDigit(c) (((c) >= L'0') && ((c) <= L'9'))
  6154. #define ARGUMENT_PRESENT(ArgumentPointer) (\
  6155. (CHAR *)(ArgumentPointer) != (CHAR *)(NULL) )
  6156. BOOL ScanSchemes(LPWSTR pszToCheck, DWORD ccStr, PDWORD pwResult)
  6157. {
  6158. for (DWORD i=0; i<NUMBER_OF_URL_SCHEMES; i++)
  6159. {
  6160. if ((UrlSchemeList[i].SchemeLength == ccStr)
  6161. && (StrCmpNIW(UrlSchemeList[i].SchemeName, pszToCheck, ccStr)==0))
  6162. {
  6163. *pwResult = i;
  6164. return TRUE;
  6165. }
  6166. }
  6167. return FALSE;
  6168. }
  6169. #define ProbeWriteStringBufferW(a, b) ProbeWriteBuffer((LPVOID)a, (b*sizeof(WCHAR)));
  6170. #define PAGE_SIZE 4096
  6171. DWORD
  6172. ProbeWriteBuffer(
  6173. IN LPVOID lpBuffer,
  6174. IN DWORD dwBufferLength
  6175. )
  6176. /*++
  6177. Routine Description:
  6178. Probes a buffer for writeability. Used as part of API parameter validation,
  6179. this function tests the first and last locations in a buffer. This is not
  6180. as strict as the IsBadXPtr() Windows APIs, but it means we don't have to
  6181. test every location in the buffer
  6182. Arguments:
  6183. lpBuffer - pointer to buffer to test
  6184. dwBufferLength - length of buffer
  6185. Return Value:
  6186. DWORD
  6187. Success - ERROR_SUCCESS
  6188. Failure - ERROR_INVALID_PARAMETER
  6189. --*/
  6190. {
  6191. DWORD error;
  6192. //
  6193. // the buffer can be NULL if the probe length is 0. Otherwise, its an error
  6194. //
  6195. if (lpBuffer == NULL) {
  6196. error = (dwBufferLength == 0) ? ERROR_SUCCESS : ERROR_INVALID_PARAMETER;
  6197. } else if (dwBufferLength != 0) {
  6198. __try {
  6199. LPBYTE p;
  6200. LPBYTE end;
  6201. volatile BYTE b;
  6202. p = (LPBYTE)lpBuffer;
  6203. end = p + dwBufferLength - 1;
  6204. b = *end;
  6205. *end = b;
  6206. //
  6207. // visit every page in the buffer - it doesn't matter that we may
  6208. // test a character in the middle of a page
  6209. //
  6210. for (; p < end; p += PAGE_SIZE) {
  6211. b = *p;
  6212. *p = b;
  6213. }
  6214. error = ERROR_SUCCESS;
  6215. } __except(EXCEPTION_EXECUTE_HANDLER) {
  6216. error = ERROR_INVALID_PARAMETER;
  6217. }
  6218. ENDEXCEPT
  6219. } else {
  6220. //
  6221. // zero-length buffer
  6222. //
  6223. error = ERROR_SUCCESS;
  6224. }
  6225. return error;
  6226. }
  6227. DWORD
  6228. ProbeReadBuffer(
  6229. IN LPVOID lpBuffer,
  6230. IN DWORD dwBufferLength
  6231. )
  6232. /*++
  6233. Routine Description:
  6234. Probes a buffer for readability. Used as part of API parameter validation,
  6235. this function tests the first and last locations in a buffer. This is not
  6236. as strict as the IsBadXPtr() Windows APIs, but it means we don't have to
  6237. test every location in the buffer
  6238. Arguments:
  6239. lpBuffer - pointer to buffer to test
  6240. dwBufferLength - length of buffer
  6241. Return Value:
  6242. DWORD
  6243. Success - ERROR_SUCCESS
  6244. Failure - ERROR_INVALID_PARAMETER
  6245. --*/
  6246. {
  6247. DWORD error;
  6248. // the buffer can be NULL if the probe length is 0. Otherwise, its an error
  6249. if (lpBuffer == NULL) {
  6250. error = (dwBufferLength == 0) ? ERROR_SUCCESS : ERROR_INVALID_PARAMETER;
  6251. } else if (dwBufferLength != 0) {
  6252. __try {
  6253. LPBYTE p;
  6254. LPBYTE end;
  6255. volatile BYTE b;
  6256. p = (LPBYTE)lpBuffer;
  6257. end = p + dwBufferLength - 1;
  6258. b = *end;
  6259. //
  6260. // visit every page in the buffer - it doesn't matter that we may
  6261. // test a character in the middle of a page
  6262. //
  6263. for (; p < end; p += PAGE_SIZE) {
  6264. b = *p;
  6265. }
  6266. error = ERROR_SUCCESS;
  6267. } __except(EXCEPTION_EXECUTE_HANDLER) {
  6268. error = ERROR_INVALID_PARAMETER;
  6269. }
  6270. ENDEXCEPT
  6271. } else {
  6272. //
  6273. // zero-length buffer
  6274. //
  6275. error = ERROR_INVALID_PARAMETER;
  6276. }
  6277. return error;
  6278. }
  6279. DWORD
  6280. ProbeStringW(
  6281. IN LPWSTR lpString,
  6282. OUT LPDWORD lpdwStringLength
  6283. )
  6284. /*++
  6285. Routine Description:
  6286. Probes a wide string buffer for readability, and returns the length of the string
  6287. Arguments:
  6288. lpString - pointer to string to check
  6289. lpdwStringLength - returned length of string
  6290. Return Value:
  6291. DWORD
  6292. Success - ERROR_SUCCESS
  6293. Failure - ERROR_INVALID_PARAMETER
  6294. --*/
  6295. {
  6296. DWORD error;
  6297. DWORD length;
  6298. //
  6299. // initialize string length and return code
  6300. //
  6301. length = 0;
  6302. error = ERROR_SUCCESS;
  6303. //
  6304. // the buffer can be NULL
  6305. //
  6306. if (lpString != NULL) {
  6307. __try {
  6308. //
  6309. // unfortunately, for a string, we have to visit every location in
  6310. // the buffer to find the terminator
  6311. //
  6312. while (*lpString != '\0') {
  6313. ++length;
  6314. ++lpString;
  6315. }
  6316. } __except(EXCEPTION_EXECUTE_HANDLER) {
  6317. error = ERROR_INVALID_PARAMETER;
  6318. }
  6319. ENDEXCEPT
  6320. }
  6321. *lpdwStringLength = length;
  6322. return error;
  6323. }
  6324. DWORD
  6325. DecodeUrl(
  6326. IN LPWSTR Url,
  6327. IN DWORD UrlLength,
  6328. OUT LPWSTR DecodedString,
  6329. IN OUT LPDWORD DecodedLength
  6330. )
  6331. /*++
  6332. Routine Description:
  6333. Converts an URL string with embedded escape sequences (%xx) to a counted
  6334. string
  6335. It is safe to pass the same pointer for the string to convert, and the
  6336. buffer for the converted results: if the current character is not escaped,
  6337. it just gets overwritten, else the input pointer is moved ahead 2 characters
  6338. further than the output pointer, which is benign
  6339. Arguments:
  6340. Url - pointer to URL string to convert
  6341. UrlLength - number of characters in UrlString
  6342. DecodedString - pointer to buffer that receives converted string
  6343. DecodedLength - IN: number of characters in buffer
  6344. OUT: number of characters converted
  6345. Return Value:
  6346. DWORD
  6347. Success - ERROR_SUCCESS
  6348. Failure - ERROR_INTERNET_INVALID_URL
  6349. UrlString couldn't be converted
  6350. ERROR_INSUFFICIENT_BUFFER
  6351. ConvertedString isn't large enough to hold all the converted
  6352. UrlString
  6353. --*/
  6354. {
  6355. // NOTE We can replace this function with UrlUnescapeInPlace
  6356. DWORD bufferRemaining;
  6357. bufferRemaining = *DecodedLength;
  6358. while (UrlLength && bufferRemaining) {
  6359. WCHAR ch;
  6360. if (*Url == L'%') {
  6361. //
  6362. // REVIEW - would %00 ever appear in an URL?
  6363. //
  6364. if (IsHex(*(Url+1)) && IsHex(*(Url+2)))
  6365. {
  6366. ch = TranslateEscapedOctetW(Url);
  6367. Url += 3;
  6368. } else {
  6369. return ERROR_INTERNET_INVALID_URL;
  6370. }
  6371. UrlLength -= 3;
  6372. } else {
  6373. ch = *Url++;
  6374. --UrlLength;
  6375. }
  6376. *DecodedString++ = ch;
  6377. --bufferRemaining;
  6378. }
  6379. if (UrlLength == 0) {
  6380. *DecodedLength -= bufferRemaining;
  6381. return ERROR_SUCCESS;
  6382. } else {
  6383. return ERROR_INSUFFICIENT_BUFFER;
  6384. }
  6385. }
  6386. DWORD
  6387. DecodeUrlInSitu(
  6388. IN LPWSTR BufferAddress,
  6389. IN OUT LPDWORD BufferLength
  6390. )
  6391. /*++
  6392. Routine Description:
  6393. Decodes an URL string, if it contains escape sequences. The conversion is
  6394. done in place, since we know that a string containing escapes is longer than
  6395. the string with escape sequences (3 bytes) converted to characters (1 byte)
  6396. Arguments:
  6397. BufferAddress - pointer to the string to convert
  6398. BufferLength - IN: number of characters to convert
  6399. OUT: length of converted string
  6400. Return Value:
  6401. DWORD
  6402. Success - ERROR_SUCCESS
  6403. Failure - ERROR_INTERNET_INVALID_URL
  6404. ERROR_INSUFFICIENT_BUFFER
  6405. --*/
  6406. {
  6407. // NOTE We can replace this function with UrlUnescapeInPlace
  6408. DWORD stringLength = *BufferLength;
  6409. return DecodeUrl(BufferAddress,
  6410. stringLength,
  6411. BufferAddress,
  6412. BufferLength);
  6413. }
  6414. DWORD
  6415. GetUrlAddressInfo(
  6416. IN OUT LPWSTR* Url,
  6417. IN OUT LPDWORD UrlLength,
  6418. OUT LPWSTR* PartOne,
  6419. OUT LPDWORD PartOneLength,
  6420. OUT LPBOOL PartOneEscape,
  6421. OUT LPWSTR* PartTwo,
  6422. OUT LPDWORD PartTwoLength,
  6423. OUT LPBOOL PartTwoEscape
  6424. )
  6425. /*++
  6426. Routine Description:
  6427. Given a string of the form foo:bar, splits them into 2 counted strings about
  6428. the ':' character. The address string may or may not contain a ':'.
  6429. This function is intended to split into substrings the host:port and
  6430. username:password strings commonly used in Internet address specifications
  6431. and by association, in URLs
  6432. Arguments:
  6433. Url - pointer to pointer to string containing URL. On output
  6434. this is advanced past the address parts
  6435. UrlLength - pointer to length of URL in UrlString. On output this is
  6436. reduced by the number of characters parsed
  6437. PartOne - pointer which will receive first part of address string
  6438. PartOneLength - pointer which will receive length of first part of address
  6439. string
  6440. PartOneEscape - TRUE on output if PartOne contains escape sequences
  6441. PartTwo - pointer which will receive second part of address string
  6442. PartTwoLength - pointer which will receive length of second part of address
  6443. string
  6444. PartOneEscape - TRUE on output if PartTwo contains escape sequences
  6445. Return Value:
  6446. DWORD
  6447. Success - ERROR_SUCCESS
  6448. Failure - ERROR_INTERNET_INVALID_URL
  6449. --*/
  6450. {
  6451. LPWSTR pString;
  6452. LPWSTR pColon;
  6453. DWORD partLength;
  6454. LPBOOL partEscape;
  6455. DWORD length;
  6456. //
  6457. // parse out <host>[:<port>] or <name>[:<password>] (i.e. <part1>[:<part2>]
  6458. //
  6459. pString = *Url;
  6460. pColon = NULL;
  6461. partLength = 0;
  6462. *PartOne = pString;
  6463. *PartOneLength = 0;
  6464. *PartOneEscape = FALSE;
  6465. *PartTwoEscape = FALSE;
  6466. partEscape = PartOneEscape;
  6467. length = *UrlLength;
  6468. while ((*pString!=SLASH) && (*pString != L'\0') && (length != 0)) {
  6469. if (*pString==HEX_ESCAPE) {
  6470. // if there is a % in the string then it *must* (RFC 1738) be the
  6471. // start of an escape sequence. This function just reports the
  6472. // address of the substrings and their lengths; calling functions
  6473. // must handle the escape sequences (i.e. it is their responsibility
  6474. // to decide where to put the results)
  6475. //
  6476. *partEscape = TRUE;
  6477. }
  6478. if (*pString==COLON) {
  6479. if (pColon != NULL) {
  6480. //
  6481. // we don't expect more than 1 ':'
  6482. //
  6483. // ISSUE Note that passwords might contain colons, and thus not work in this
  6484. // case
  6485. return ERROR_INTERNET_INVALID_URL;
  6486. }
  6487. pColon = pString;
  6488. *PartOneLength = partLength;
  6489. if (partLength == 0) {
  6490. *PartOne = NULL;
  6491. }
  6492. partLength = 0;
  6493. partEscape = PartTwoEscape;
  6494. } else {
  6495. ++partLength;
  6496. }
  6497. ++pString;
  6498. --length;
  6499. }
  6500. //
  6501. // we either ended on the host (or user) name or the port number (or
  6502. // password), one of which we don't know the length of
  6503. //
  6504. if (pColon == NULL) {
  6505. *PartOneLength = partLength;
  6506. *PartTwo = NULL;
  6507. *PartTwoLength = 0;
  6508. *PartTwoEscape = FALSE;
  6509. } else {
  6510. *PartTwoLength = partLength;
  6511. *PartTwo = pColon + 1;
  6512. //
  6513. // in both the <user>:<password> and <host>:<port> cases, we cannot have
  6514. // the second part without the first, although both parts being zero
  6515. // length is OK (host name will be sorted out elsewhere, but (for now,
  6516. // at least) I am allowing <>:<> for username:password, since I don't
  6517. // see it expressly disallowed in the RFC. I may be revisiting this code
  6518. // later...)
  6519. //
  6520. // N.B.: ftp://ftp.microsoft.com uses http://:0/-http-gw-internal-/menu.gif
  6521. // if ((*PartOneLength == 0) && (partLength != 0)) {
  6522. // return ERROR_INTERNET_INVALID_URL;
  6523. // }
  6524. }
  6525. //
  6526. // update the URL pointer and length remaining
  6527. //
  6528. *Url = pString;
  6529. *UrlLength = length;
  6530. return ERROR_SUCCESS;
  6531. }
  6532. DWORD
  6533. GetUrlAddress(
  6534. IN OUT LPWSTR* lpszUrl,
  6535. OUT LPDWORD lpdwUrlLength,
  6536. OUT LPWSTR* lpszUserName OPTIONAL,
  6537. OUT LPDWORD lpdwUserNameLength OPTIONAL,
  6538. OUT LPWSTR* lpszPassword OPTIONAL,
  6539. OUT LPDWORD lpdwPasswordLength OPTIONAL,
  6540. OUT LPWSTR* lpszHostName OPTIONAL,
  6541. OUT LPDWORD lpdwHostNameLength OPTIONAL,
  6542. OUT LPSHINTERNET_PORT lpPort OPTIONAL,
  6543. OUT LPBOOL pHavePort
  6544. )
  6545. /*++
  6546. Routine Description:
  6547. This function extracts any and all parts of the address information for a
  6548. generic URL. If any of the address parts contain escaped characters (%nn)
  6549. then they are converted in situ
  6550. The generic addressing format (RFC 1738) is:
  6551. <user>:<password>@<host>:<port>
  6552. The addressing information cannot contain a password without a user name,
  6553. or a port without a host name
  6554. NB: ftp://ftp.microsoft.com uses URL's that have a port without a host name!
  6555. (e.g. http://:0/-http-gw-internal-/menu.gif)
  6556. Although only the lpszUrl and lpdwUrlLength fields are required, the address
  6557. parts will be checked for presence and completeness
  6558. Assumes: 1. If one of the optional lpsz fields is present (e.g. lpszUserName)
  6559. then the accompanying lpdw field must also be supplied
  6560. Arguments:
  6561. lpszUrl - IN: pointer to the URL to parse
  6562. OUT: URL remaining after address information
  6563. N.B. The url-path is NOT canonicalized (unescaped)
  6564. because it may contain protocol-specific information
  6565. which must be parsed out by the protocol-specific
  6566. parser
  6567. lpdwUrlLength - returned length of the remainder of the URL after the
  6568. address information
  6569. lpszUserName - returned pointer to the user name
  6570. This parameter can be omitted by those protocol parsers
  6571. that do not require or expect user names in the URL
  6572. lpdwUserNameLength - returned length of the user name part
  6573. This parameter can be omitted by those protocol parsers
  6574. that do not require or expect user names in the URL
  6575. lpszPassword - returned pointer to the password
  6576. This parameter can be omitted by those protocol parsers
  6577. that do not require or expect user passwords in the URL
  6578. lpdwPasswordLength - returned length of the password
  6579. This parameter can be omitted by those protocol parsers
  6580. that do not require or expect user passwords in the URL
  6581. lpszHostName - returned pointer to the host name
  6582. This parameter can be omitted by those protocol parsers
  6583. that do not require the host name info
  6584. lpdwHostNameLength - returned length of the host name
  6585. This parameter can be omitted by those protocol parsers
  6586. that do not require the host name info
  6587. lpPort - returned value of the port field
  6588. This parameter can be omitted by those protocol parsers
  6589. that do not require or expect user port number
  6590. pHavePort - returned boolean indicating whether a port was specified
  6591. in the URL or not. This value is not returned if the
  6592. lpPort parameter is omitted.
  6593. Return Value:
  6594. DWORD
  6595. Success - ERROR_SUCCESS
  6596. Failure - ERROR_INTERNET_INVALID_URL
  6597. We could not parse some part of the address info, or we
  6598. found address info where the protocol parser didn't expect
  6599. any
  6600. ERROR_INSUFFICIENT_BUFFER
  6601. We could not convert an escaped string
  6602. --*/
  6603. {
  6604. LPWSTR pAt;
  6605. DWORD urlLength;
  6606. LPWSTR pUrl;
  6607. BOOL part1Escape;
  6608. BOOL part2Escape;
  6609. WCHAR portNumber[INTERNET_MAX_PORT_NUMBER_LENGTH + 1];
  6610. DWORD portNumberLength;
  6611. LPWSTR pPortNumber;
  6612. DWORD error;
  6613. LPWSTR hostName;
  6614. DWORD hostNameLength;
  6615. pUrl = *lpszUrl;
  6616. urlLength = lstrlenW(pUrl);
  6617. //
  6618. // check to see if there is an '@' separating user name & password. If we
  6619. // see a '/' or get to the end of the string before we see the '@' then
  6620. // there is no username:password part
  6621. //
  6622. pAt = NULL;
  6623. for (DWORD i = 0; i < urlLength; ++i) {
  6624. if (pUrl[i]==SLASH) {
  6625. break;
  6626. } else if (pUrl[i]==AT) {
  6627. pAt = &pUrl[i];
  6628. break;
  6629. }
  6630. }
  6631. if (pAt != NULL) {
  6632. DWORD addressPartLength;
  6633. LPWSTR userName;
  6634. DWORD userNameLength;
  6635. LPWSTR password;
  6636. DWORD passwordLength;
  6637. addressPartLength = (DWORD) (pAt - pUrl);
  6638. urlLength -= addressPartLength;
  6639. error = GetUrlAddressInfo(&pUrl,
  6640. &addressPartLength,
  6641. &userName,
  6642. &userNameLength,
  6643. &part1Escape,
  6644. &password,
  6645. &passwordLength,
  6646. &part2Escape
  6647. );
  6648. if (error != ERROR_SUCCESS) {
  6649. return error;
  6650. }
  6651. //
  6652. // ensure there is no address information unparsed before the '@'
  6653. //
  6654. ASSERT(addressPartLength == 0);
  6655. ASSERT(pUrl == pAt);
  6656. if (ARGUMENT_PRESENT(lpszUserName)) {
  6657. ASSERT(ARGUMENT_PRESENT(lpdwUserNameLength));
  6658. //
  6659. // convert the user name in situ
  6660. //
  6661. if (part1Escape) {
  6662. ASSERT(userName != NULL);
  6663. ASSERT(userNameLength != 0);
  6664. error = DecodeUrlInSitu(userName, &userNameLength);
  6665. if (error != ERROR_SUCCESS) {
  6666. return error;
  6667. }
  6668. }
  6669. *lpszUserName = userName;
  6670. *lpdwUserNameLength = userNameLength;
  6671. }
  6672. if (ARGUMENT_PRESENT(lpszPassword)) {
  6673. // convert the password in situ
  6674. if (part2Escape) {
  6675. ASSERT(userName != NULL);
  6676. ASSERT(userNameLength != 0);
  6677. ASSERT(password != NULL);
  6678. ASSERT(passwordLength != 0);
  6679. error = DecodeUrlInSitu(password, &passwordLength);
  6680. if (error != ERROR_SUCCESS) {
  6681. return error;
  6682. }
  6683. }
  6684. *lpszPassword = password;
  6685. *lpdwPasswordLength = passwordLength;
  6686. }
  6687. //
  6688. // the URL pointer now points at the host:port fields (remember that
  6689. // ExtractAddressParts() must have bumped pUrl up to the end of the
  6690. // password field (if present) which ends at pAt)
  6691. //
  6692. ++pUrl;
  6693. //
  6694. // similarly, bump urlLength to account for the '@'
  6695. //
  6696. --urlLength;
  6697. } else {
  6698. //
  6699. // no '@' therefore no username or password
  6700. //
  6701. if (ARGUMENT_PRESENT(lpszUserName)) {
  6702. ASSERT(ARGUMENT_PRESENT(lpdwUserNameLength));
  6703. *lpszUserName = NULL;
  6704. *lpdwUserNameLength = 0;
  6705. }
  6706. if (ARGUMENT_PRESENT(lpszPassword)) {
  6707. ASSERT(ARGUMENT_PRESENT(lpdwPasswordLength));
  6708. *lpszPassword = NULL;
  6709. *lpdwPasswordLength = 0;
  6710. }
  6711. }
  6712. //
  6713. // now get the host name and the optional port
  6714. //
  6715. pPortNumber = portNumber;
  6716. portNumberLength = sizeof(portNumber);
  6717. error = GetUrlAddressInfo(&pUrl,
  6718. &urlLength,
  6719. &hostName,
  6720. &hostNameLength,
  6721. &part1Escape,
  6722. &pPortNumber,
  6723. &portNumberLength,
  6724. &part2Escape
  6725. );
  6726. if (error != ERROR_SUCCESS) {
  6727. return error;
  6728. }
  6729. //
  6730. // the URL address information MUST contain the host name
  6731. //
  6732. // if ((hostName == NULL) || (hostNameLength == 0)) {
  6733. // return ERROR_INTERNET_INVALID_URL;
  6734. // }
  6735. if (ARGUMENT_PRESENT(lpszHostName)) {
  6736. ASSERT(ARGUMENT_PRESENT(lpdwHostNameLength));
  6737. //
  6738. // if the host name contains escaped characters, convert them in situ
  6739. //
  6740. if (part1Escape) {
  6741. error = DecodeUrlInSitu(hostName, &hostNameLength);
  6742. if (error != ERROR_SUCCESS) {
  6743. return error;
  6744. }
  6745. }
  6746. *lpszHostName = hostName;
  6747. *lpdwHostNameLength = hostNameLength;
  6748. }
  6749. //
  6750. // if there is a port field, convert it if there are escaped characters,
  6751. // check it for valid numeric characters, and convert it to a number
  6752. //
  6753. if (ARGUMENT_PRESENT(lpPort)) {
  6754. if (portNumberLength != 0) {
  6755. DWORD i;
  6756. DWORD port;
  6757. ASSERT(pPortNumber != NULL);
  6758. if (part2Escape) {
  6759. error = DecodeUrlInSitu(pPortNumber, &portNumberLength);
  6760. if (error != ERROR_SUCCESS) {
  6761. return error;
  6762. }
  6763. }
  6764. //
  6765. // ensure all characters in the port number buffer are numeric, and
  6766. // calculate the port number at the same time
  6767. //
  6768. for (i = 0, port = 0; i < portNumberLength; ++i) {
  6769. if (!IsDigit(*pPortNumber)) {
  6770. return ERROR_INTERNET_INVALID_URL;
  6771. }
  6772. port = port * 10 + (int)(*pPortNumber++ - L'0');
  6773. // We won't allow ports larger than 65535 ((2^16)-1)
  6774. // We have to check this every time to make sure that someone
  6775. // doesn't try to overflow a DWORD.
  6776. if (port > 65535)
  6777. {
  6778. return ERROR_INTERNET_INVALID_URL;
  6779. }
  6780. }
  6781. *lpPort = (SHINTERNET_PORT)port;
  6782. if (ARGUMENT_PRESENT(pHavePort)) {
  6783. *pHavePort = TRUE;
  6784. }
  6785. } else {
  6786. *lpPort = INTERNET_INVALID_PORT_NUMBER;
  6787. if (ARGUMENT_PRESENT(pHavePort)) {
  6788. *pHavePort = FALSE;
  6789. }
  6790. }
  6791. }
  6792. //
  6793. // update the URL pointer and the length of the url-path
  6794. //
  6795. *lpszUrl = pUrl;
  6796. *lpdwUrlLength = urlLength;
  6797. return ERROR_SUCCESS;
  6798. }
  6799. DWORD
  6800. CrackUrl(
  6801. IN OUT LPWSTR lpszUrl,
  6802. IN DWORD dwUrlLength,
  6803. IN BOOL bEscape,
  6804. OUT LPSHINTERNET_SCHEME lpSchemeType OPTIONAL,
  6805. OUT LPWSTR* lpszSchemeName OPTIONAL,
  6806. OUT LPDWORD lpdwSchemeNameLength OPTIONAL,
  6807. OUT LPWSTR* lpszHostName OPTIONAL,
  6808. OUT LPDWORD lpdwHostNameLength OPTIONAL,
  6809. OUT LPSHINTERNET_PORT lpServerPort OPTIONAL,
  6810. OUT LPWSTR* lpszUserName OPTIONAL,
  6811. OUT LPDWORD lpdwUserNameLength OPTIONAL,
  6812. OUT LPWSTR* lpszPassword OPTIONAL,
  6813. OUT LPDWORD lpdwPasswordLength OPTIONAL,
  6814. OUT LPWSTR* lpszUrlPath OPTIONAL,
  6815. OUT LPDWORD lpdwUrlPathLength OPTIONAL,
  6816. OUT LPWSTR* lpszExtraInfo OPTIONAL,
  6817. OUT LPDWORD lpdwExtraInfoLength OPTIONAL,
  6818. OUT LPBOOL pHavePort
  6819. )
  6820. /*++
  6821. Routine Description:
  6822. Cracks an URL into its constituent parts
  6823. Assumes: 1. If one of the optional lpsz fields is present (e.g. lpszUserName)
  6824. then the accompanying lpdw field must also be supplied
  6825. Arguments:
  6826. lpszUrl - pointer to URL to crack. This buffer WILL BE
  6827. OVERWRITTEN if it contains escape sequences that
  6828. we will convert back to ANSI characters
  6829. dwUrlLength - if not 0, string length of lpszUrl
  6830. bEscape - TRUE if we are to escape the url-path
  6831. lpSchemeType - returned scheme type - e.g. INTERNET_SCHEME_HTTP
  6832. lpszSchemeName - returned scheme name
  6833. lpdwSchemeNameLength - length of scheme name
  6834. lpszHostName - returned host name
  6835. lpdwHostNameLength - length of host name buffer
  6836. lpServerPort - returned server port if present in the URL, else 0
  6837. lpszUserName - returned user name if present
  6838. lpdwUserNameLength - length of user name buffer
  6839. lpszPassword - returned password if present
  6840. lpdwPasswordLength - length of password buffer
  6841. lpszUrlPath - returned, canonicalized URL path
  6842. lpdwUrlPathLength - length of url-path buffer
  6843. lpszExtraInfo - returned search string or intra-page link if present
  6844. lpdwExtraInfoLength - length of extra info buffer
  6845. pHavePort - returned boolean indicating whether port was specified
  6846. Return Value:
  6847. DWORD
  6848. Success - ERROR_SUCCESS
  6849. Failure - ERROR_INTERNET_UNRECOGNIZED_SCHEME
  6850. --*/
  6851. {
  6852. DWORD error;
  6853. DWORD schemeLength;
  6854. SHINTERNET_SCHEME schemeType;
  6855. //
  6856. // if dwUrlLength is 0 then lpszUrl is ASCIIZ. Find its length
  6857. //
  6858. if (dwUrlLength == 0) {
  6859. dwUrlLength = lstrlenW(lpszUrl);
  6860. }
  6861. //
  6862. // get parser based on the protocol name
  6863. //
  6864. for (schemeLength = 0; lpszUrl[schemeLength]!=COLON; ++schemeLength) {
  6865. if ((dwUrlLength == 0) || (lpszUrl[schemeLength] == '\0')) {
  6866. //
  6867. // no ':' in URL? Bogus (dude)
  6868. //
  6869. error = ERROR_INTERNET_UNRECOGNIZED_SCHEME;
  6870. goto quit;
  6871. }
  6872. --dwUrlLength;
  6873. }
  6874. DWORD i;
  6875. int skip;
  6876. BOOL isGeneric;
  6877. BOOL needSlashes;
  6878. BOOL haveSlashes;
  6879. isGeneric = FALSE;
  6880. needSlashes = FALSE;
  6881. haveSlashes = FALSE;
  6882. schemeType = SHINTERNET_SCHEME_UNKNOWN;
  6883. if (ScanSchemes(lpszUrl, schemeLength, &i))
  6884. {
  6885. schemeType = UrlSchemeList[i].SchemeType;
  6886. needSlashes = UrlSchemeList[i].NeedSlashes;
  6887. }
  6888. skip = 1; // skip ':'
  6889. if ((dwUrlLength > 3) && (StrCmpNIW(&lpszUrl[schemeLength], L"://", 3) == 0)) {
  6890. skip = 3; // skip "://"
  6891. haveSlashes = TRUE;
  6892. }
  6893. if (schemeType == SHINTERNET_SCHEME_FILE)
  6894. isGeneric = TRUE;
  6895. if (schemeType == SHINTERNET_SCHEME_NEWS ||
  6896. schemeType == SHINTERNET_SCHEME_UNKNOWN) {
  6897. //
  6898. // urls can be hierarchical or opaque. if the slashes
  6899. // exist, then we should assume hierarchical
  6900. // when we dont know the scheme or it is news:.
  6901. // otherwise it is opaque (isGeneric)
  6902. //
  6903. needSlashes = haveSlashes;
  6904. isGeneric = !haveSlashes;
  6905. }
  6906. //
  6907. // If we don't have slashes, make sure we don't need them.
  6908. // If we have slashes, make sure they are required.
  6909. //
  6910. if ((!haveSlashes && !needSlashes) || (haveSlashes && needSlashes)) {
  6911. if (ARGUMENT_PRESENT(lpSchemeType)) {
  6912. *lpSchemeType = schemeType;
  6913. }
  6914. if (ARGUMENT_PRESENT(lpszSchemeName)) {
  6915. *lpszSchemeName = lpszUrl;
  6916. *lpdwSchemeNameLength = schemeLength;
  6917. }
  6918. lpszUrl += schemeLength + skip;
  6919. dwUrlLength -= skip;
  6920. if (SHINTERNET_SCHEME_RES == schemeType) {
  6921. if (ARGUMENT_PRESENT(lpszUserName)) {
  6922. *lpszUserName = NULL;
  6923. *lpdwUserNameLength = 0;
  6924. }
  6925. if (ARGUMENT_PRESENT(lpszPassword)) {
  6926. *lpszPassword = NULL;
  6927. *lpdwPasswordLength = 0;
  6928. }
  6929. if (ARGUMENT_PRESENT(lpServerPort)) {
  6930. *lpServerPort = 0;
  6931. }
  6932. PWSTR psz = lpszUrl;
  6933. while (*lpszUrl && *lpszUrl!=SLASH)
  6934. lpszUrl++;
  6935. if (ARGUMENT_PRESENT(lpszHostName)) {
  6936. *lpszHostName = psz;
  6937. *lpdwHostNameLength = (DWORD)(lpszUrl - psz);
  6938. dwUrlLength -= *lpdwHostNameLength;
  6939. error = DecodeUrlInSitu(*lpszHostName, lpdwHostNameLength);
  6940. }
  6941. } else if (isGeneric) {
  6942. if (ARGUMENT_PRESENT(lpszUserName)) {
  6943. *lpszUserName = NULL;
  6944. *lpdwUserNameLength = 0;
  6945. }
  6946. if (ARGUMENT_PRESENT(lpszPassword)) {
  6947. *lpszPassword = NULL;
  6948. *lpdwPasswordLength = 0;
  6949. }
  6950. if (ARGUMENT_PRESENT(lpszHostName)) {
  6951. *lpszHostName = NULL;
  6952. *lpdwHostNameLength = 0;
  6953. }
  6954. if (ARGUMENT_PRESENT(lpServerPort)) {
  6955. *lpServerPort = 0;
  6956. }
  6957. error = ERROR_SUCCESS;
  6958. } else {
  6959. error = GetUrlAddress(&lpszUrl,
  6960. &dwUrlLength,
  6961. lpszUserName,
  6962. lpdwUserNameLength,
  6963. lpszPassword,
  6964. lpdwPasswordLength,
  6965. lpszHostName,
  6966. lpdwHostNameLength,
  6967. lpServerPort,
  6968. pHavePort
  6969. );
  6970. }
  6971. if (bEscape && (error == ERROR_SUCCESS)) {
  6972. error = DecodeUrlInSitu(lpszUrl, &dwUrlLength);
  6973. }
  6974. if ((error == ERROR_SUCCESS) && ARGUMENT_PRESENT(lpszExtraInfo)) {
  6975. *lpdwExtraInfoLength = 0;
  6976. for (i = 0; i < (int)dwUrlLength; i++) {
  6977. if (lpszUrl[i] == '?' || lpszUrl[i] == '#') {
  6978. *lpszExtraInfo = &lpszUrl[i];
  6979. *lpdwExtraInfoLength = dwUrlLength - i;
  6980. dwUrlLength -= *lpdwExtraInfoLength;
  6981. }
  6982. }
  6983. }
  6984. if ((error == ERROR_SUCCESS) && ARGUMENT_PRESENT(lpszUrlPath)) {
  6985. *lpszUrlPath = lpszUrl;
  6986. *lpdwUrlPathLength = dwUrlLength;
  6987. }
  6988. } else {
  6989. error = ERROR_INTERNET_UNRECOGNIZED_SCHEME;
  6990. }
  6991. quit:
  6992. return error;
  6993. }
  6994. BOOL
  6995. WINAPI
  6996. UrlCrackW(
  6997. IN LPCWSTR lpszUrl,
  6998. IN DWORD dwUrlLength,
  6999. IN DWORD dwFlags,
  7000. IN LPSHURL_COMPONENTSW lpUrlComponents
  7001. )
  7002. /*++
  7003. Routine Description:
  7004. Cracks an URL into its constituent parts. Optionally escapes the url-path.
  7005. We assume that the user has supplied large enough buffers for the various
  7006. URL parts
  7007. Arguments:
  7008. lpszUrl - pointer to URL to crack
  7009. dwUrlLength - 0 if lpszUrl is ASCIIZ string, else length of lpszUrl
  7010. dwFlags - flags controlling operation
  7011. lpUrlComponents - pointer to URL_COMPONENTS
  7012. Return Value:
  7013. BOOL
  7014. Success - TRUE
  7015. Failure - FALSE. Call GetLastError() for more info
  7016. --*/
  7017. {
  7018. DWORD error = ERROR_SUCCESS;
  7019. // validate parameters
  7020. if (ARGUMENT_PRESENT(lpszUrl)) {
  7021. if (!dwUrlLength) {
  7022. error = ProbeStringW((LPWSTR)lpszUrl, &dwUrlLength);
  7023. } else if (IsBadReadPtr((LPVOID)lpszUrl, dwUrlLength*sizeof(WCHAR))) {
  7024. error = ERROR_INVALID_PARAMETER;
  7025. }
  7026. } else {
  7027. error = ERROR_INVALID_PARAMETER;
  7028. }
  7029. if (error != ERROR_SUCCESS)
  7030. {
  7031. goto quit;
  7032. }
  7033. if (IsBadWritePtr(lpUrlComponents, sizeof(*lpUrlComponents))
  7034. || (lpUrlComponents->dwStructSize != sizeof(*lpUrlComponents)))
  7035. {
  7036. error = ERROR_INVALID_PARAMETER;
  7037. goto quit;
  7038. }
  7039. //
  7040. // we only allow two flags for this API
  7041. //
  7042. if (dwFlags & ~(ICU_ESCAPE | ICU_DECODE)) {
  7043. error = ERROR_INVALID_PARAMETER;
  7044. goto quit;
  7045. }
  7046. //
  7047. // get the individual components to return. If they reference a buffer then
  7048. // check it for writeability
  7049. //
  7050. LPWSTR lpUrl;
  7051. LPWSTR urlCopy;
  7052. SHINTERNET_SCHEME schemeType;
  7053. LPWSTR schemeName;
  7054. DWORD schemeNameLength;
  7055. LPWSTR hostName;
  7056. DWORD hostNameLength;
  7057. SHINTERNET_PORT nPort;
  7058. LPWSTR userName;
  7059. DWORD userNameLength;
  7060. LPWSTR password;
  7061. DWORD passwordLength;
  7062. LPWSTR urlPath;
  7063. DWORD urlPathLength;
  7064. LPWSTR extraInfo;
  7065. DWORD extraInfoLength;
  7066. BOOL copyComponent;
  7067. BOOL havePort;
  7068. copyComponent = FALSE;
  7069. schemeName = lpUrlComponents->lpszScheme;
  7070. schemeNameLength = lpUrlComponents->dwSchemeLength;
  7071. if ((schemeName != NULL) && (schemeNameLength != 0)) {
  7072. error = ProbeWriteStringBufferW((LPVOID)schemeName, schemeNameLength);
  7073. if (error != ERROR_SUCCESS) {
  7074. goto quit;
  7075. }
  7076. *schemeName = '\0';
  7077. copyComponent = TRUE;
  7078. }
  7079. hostName = lpUrlComponents->lpszHostName;
  7080. hostNameLength = lpUrlComponents->dwHostNameLength;
  7081. if ((hostName != NULL) && (hostNameLength != 0)) {
  7082. error = ProbeWriteStringBufferW((LPVOID)hostName, hostNameLength);
  7083. if (error != ERROR_SUCCESS) {
  7084. goto quit;
  7085. }
  7086. *hostName = '\0';
  7087. copyComponent = TRUE;
  7088. }
  7089. userName = lpUrlComponents->lpszUserName;
  7090. userNameLength = lpUrlComponents->dwUserNameLength;
  7091. if ((userName != NULL) && (userNameLength != 0)) {
  7092. error = ProbeWriteStringBufferW((LPVOID)userName, userNameLength);
  7093. if (error != ERROR_SUCCESS) {
  7094. goto quit;
  7095. }
  7096. *userName = '\0';
  7097. copyComponent = TRUE;
  7098. }
  7099. password = lpUrlComponents->lpszPassword;
  7100. passwordLength = lpUrlComponents->dwPasswordLength;
  7101. if ((password != NULL) && (passwordLength != 0)) {
  7102. error = ProbeWriteStringBufferW((LPVOID)password, passwordLength);
  7103. if (error != ERROR_SUCCESS) {
  7104. goto quit;
  7105. }
  7106. *password = '\0';
  7107. copyComponent = TRUE;
  7108. }
  7109. urlPath = lpUrlComponents->lpszUrlPath;
  7110. urlPathLength = lpUrlComponents->dwUrlPathLength;
  7111. if ((urlPath != NULL) && (urlPathLength != 0)) {
  7112. error = ProbeWriteStringBufferW((LPVOID)urlPath, urlPathLength);
  7113. if (error != ERROR_SUCCESS) {
  7114. goto quit;
  7115. }
  7116. *urlPath = '\0';
  7117. copyComponent = TRUE;
  7118. }
  7119. extraInfo = lpUrlComponents->lpszExtraInfo;
  7120. extraInfoLength = lpUrlComponents->dwExtraInfoLength;
  7121. if ((extraInfo != NULL) && (extraInfoLength != 0)) {
  7122. error = ProbeWriteStringBufferW((LPVOID)extraInfo, extraInfoLength);
  7123. if (error != ERROR_SUCCESS) {
  7124. goto quit;
  7125. }
  7126. *extraInfo = '\0';
  7127. copyComponent = TRUE;
  7128. }
  7129. //
  7130. // we can only escape or decode the URL if the caller has provided us with
  7131. // buffers to write the escaped strings into
  7132. //
  7133. if (dwFlags & (ICU_ESCAPE | ICU_DECODE)) {
  7134. if (!copyComponent) {
  7135. error = ERROR_INVALID_PARAMETER;
  7136. goto quit;
  7137. }
  7138. //
  7139. // create a copy of the URL. CrackUrl() will modify this in situ. We
  7140. // need to copy the results back to the user's buffer(s)
  7141. //
  7142. DWORD dw = dwUrlLength;
  7143. if (!dw)
  7144. {
  7145. dw = lstrlenW(lpszUrl);
  7146. }
  7147. urlCopy = new WCHAR[dw+1];
  7148. if (urlCopy == NULL) {
  7149. error = ERROR_NOT_ENOUGH_MEMORY;
  7150. goto quit;
  7151. }
  7152. memcpy(urlCopy, lpszUrl, (dw+1)*sizeof(WCHAR));
  7153. lpUrl = urlCopy;
  7154. } else {
  7155. lpUrl = (LPWSTR)lpszUrl;
  7156. urlCopy = NULL;
  7157. }
  7158. //
  7159. // crack the URL into its constituent parts
  7160. //
  7161. error = CrackUrl(lpUrl,
  7162. dwUrlLength,
  7163. (dwFlags & ICU_ESCAPE) ? TRUE : FALSE,
  7164. &schemeType,
  7165. &schemeName,
  7166. &schemeNameLength,
  7167. &hostName,
  7168. &hostNameLength,
  7169. &nPort,
  7170. &userName,
  7171. &userNameLength,
  7172. &password,
  7173. &passwordLength,
  7174. &urlPath,
  7175. &urlPathLength,
  7176. extraInfoLength ? &extraInfo : NULL,
  7177. extraInfoLength ? &extraInfoLength : 0,
  7178. &havePort
  7179. );
  7180. if (error != ERROR_SUCCESS) {
  7181. goto crack_error;
  7182. }
  7183. BOOL copyFailure;
  7184. copyFailure = FALSE;
  7185. //
  7186. // update the URL_COMPONENTS structure based on the results, and what was
  7187. // asked for
  7188. //
  7189. if (lpUrlComponents->lpszScheme != NULL) {
  7190. if (lpUrlComponents->dwSchemeLength > schemeNameLength) {
  7191. memcpy(lpUrlComponents->lpszScheme, schemeName, schemeNameLength*sizeof(WCHAR));
  7192. lpUrlComponents->lpszScheme[schemeNameLength] = '\0';
  7193. if (dwFlags & ICU_DECODE) {
  7194. UrlUnescapeInPlaceW(lpUrlComponents->lpszScheme, 0);
  7195. }
  7196. } else {
  7197. ++schemeNameLength;
  7198. copyFailure = TRUE;
  7199. }
  7200. lpUrlComponents->dwSchemeLength = schemeNameLength;
  7201. } else if (lpUrlComponents->dwSchemeLength != 0) {
  7202. lpUrlComponents->lpszScheme = schemeName;
  7203. lpUrlComponents->dwSchemeLength = schemeNameLength;
  7204. }
  7205. if (lpUrlComponents->lpszHostName != NULL) {
  7206. if (lpUrlComponents->dwHostNameLength > hostNameLength) {
  7207. memcpy(lpUrlComponents->lpszHostName, hostName, hostNameLength*sizeof(WCHAR));
  7208. lpUrlComponents->lpszHostName[hostNameLength] = '\0';
  7209. if (dwFlags & ICU_DECODE) {
  7210. UrlUnescapeInPlaceW(lpUrlComponents->lpszHostName, 0);
  7211. }
  7212. } else {
  7213. ++hostNameLength;
  7214. copyFailure = TRUE;
  7215. }
  7216. lpUrlComponents->dwHostNameLength = hostNameLength;
  7217. } else if (lpUrlComponents->dwHostNameLength != 0) {
  7218. lpUrlComponents->lpszHostName = hostName;
  7219. lpUrlComponents->dwHostNameLength = hostNameLength;
  7220. }
  7221. if (lpUrlComponents->lpszUserName != NULL) {
  7222. if (lpUrlComponents->dwUserNameLength > userNameLength) {
  7223. memcpy(lpUrlComponents->lpszUserName, userName, userNameLength*sizeof(WCHAR));
  7224. lpUrlComponents->lpszUserName[userNameLength] = '\0';
  7225. if (dwFlags & ICU_DECODE) {
  7226. UrlUnescapeInPlaceW(lpUrlComponents->lpszUserName, 0);
  7227. }
  7228. } else {
  7229. ++userNameLength;
  7230. copyFailure = TRUE;
  7231. }
  7232. lpUrlComponents->dwUserNameLength = userNameLength;
  7233. } else if (lpUrlComponents->dwUserNameLength != 0) {
  7234. lpUrlComponents->lpszUserName = userName;
  7235. lpUrlComponents->dwUserNameLength = userNameLength;
  7236. }
  7237. if (lpUrlComponents->lpszPassword != NULL) {
  7238. if (lpUrlComponents->dwPasswordLength > passwordLength) {
  7239. memcpy(lpUrlComponents->lpszPassword, password, passwordLength*sizeof(WCHAR));
  7240. lpUrlComponents->lpszPassword[passwordLength] = '\0';
  7241. if (dwFlags & ICU_DECODE) {
  7242. UrlUnescapeInPlaceW(lpUrlComponents->lpszPassword, 0);
  7243. }
  7244. } else {
  7245. ++passwordLength;
  7246. copyFailure = TRUE;
  7247. }
  7248. lpUrlComponents->dwPasswordLength = passwordLength;
  7249. } else if (lpUrlComponents->dwPasswordLength != 0) {
  7250. lpUrlComponents->lpszPassword = password;
  7251. lpUrlComponents->dwPasswordLength = passwordLength;
  7252. }
  7253. if (lpUrlComponents->lpszUrlPath != NULL) {
  7254. if(schemeType == SHINTERNET_SCHEME_FILE)
  7255. {
  7256. //
  7257. // for file: urls we return the path component
  7258. // as a valid dos path.
  7259. //
  7260. copyFailure = FAILED(PathCreateFromUrlW(lpUrl, lpUrlComponents->lpszUrlPath, &(lpUrlComponents->dwUrlPathLength), 0));
  7261. }
  7262. else if (lpUrlComponents->dwUrlPathLength > urlPathLength) {
  7263. memcpy(lpUrlComponents->lpszUrlPath, urlPath, urlPathLength*sizeof(WCHAR));
  7264. lpUrlComponents->lpszUrlPath[urlPathLength] = '\0';
  7265. if (dwFlags & ICU_DECODE) {
  7266. UrlUnescapeInPlaceW(lpUrlComponents->lpszUrlPath, 0);
  7267. }
  7268. lpUrlComponents->dwUrlPathLength = urlPathLength;
  7269. } else {
  7270. ++urlPathLength;
  7271. copyFailure = TRUE;
  7272. lpUrlComponents->dwUrlPathLength = urlPathLength;
  7273. }
  7274. } else if (lpUrlComponents->dwUrlPathLength != 0) {
  7275. lpUrlComponents->lpszUrlPath = urlPath;
  7276. lpUrlComponents->dwUrlPathLength = urlPathLength;
  7277. }
  7278. if (lpUrlComponents->lpszExtraInfo != NULL) {
  7279. if (lpUrlComponents->dwExtraInfoLength > extraInfoLength) {
  7280. memcpy(lpUrlComponents->lpszExtraInfo, extraInfo, extraInfoLength*sizeof(WCHAR));
  7281. lpUrlComponents->lpszExtraInfo[extraInfoLength] = '\0';
  7282. if (dwFlags & ICU_DECODE) {
  7283. UrlUnescapeInPlaceW(lpUrlComponents->lpszExtraInfo, 0);
  7284. }
  7285. } else {
  7286. ++extraInfoLength;
  7287. copyFailure = TRUE;
  7288. }
  7289. lpUrlComponents->dwExtraInfoLength = extraInfoLength;
  7290. } else if (lpUrlComponents->dwExtraInfoLength != 0) {
  7291. lpUrlComponents->lpszExtraInfo = extraInfo;
  7292. lpUrlComponents->dwExtraInfoLength = extraInfoLength;
  7293. }
  7294. //
  7295. // we may have failed to copy one or more components because we didn't have
  7296. // enough buffer space.
  7297. //
  7298. // N.B. Don't change error below here. If need be, move this test lower
  7299. //
  7300. if (copyFailure) {
  7301. error = ERROR_INSUFFICIENT_BUFFER;
  7302. }
  7303. //
  7304. // copy the scheme type
  7305. //
  7306. lpUrlComponents->nScheme = schemeType;
  7307. //
  7308. // convert 0 port (not in URL) to default value for scheme
  7309. //
  7310. if (nPort == INTERNET_INVALID_PORT_NUMBER && !havePort) {
  7311. switch (schemeType) {
  7312. case SHINTERNET_SCHEME_FTP:
  7313. nPort = INTERNET_DEFAULT_FTP_PORT;
  7314. break;
  7315. case SHINTERNET_SCHEME_GOPHER:
  7316. nPort = INTERNET_DEFAULT_GOPHER_PORT;
  7317. break;
  7318. case SHINTERNET_SCHEME_HTTP:
  7319. nPort = INTERNET_DEFAULT_HTTP_PORT;
  7320. break;
  7321. case SHINTERNET_SCHEME_HTTPS:
  7322. nPort = INTERNET_DEFAULT_HTTPS_PORT;
  7323. break;
  7324. }
  7325. }
  7326. lpUrlComponents->nPort = nPort;
  7327. crack_error:
  7328. if (urlCopy != NULL) {
  7329. delete [] urlCopy;
  7330. }
  7331. quit:
  7332. // return HRESULT_FROM_WIN32(error);
  7333. if (error!=ERROR_SUCCESS)
  7334. {
  7335. SetLastError(error);
  7336. }
  7337. return error==ERROR_SUCCESS;
  7338. }