Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

8800 lines
238 KiB

  1. /*++
  2. Copyright (c) 1994 Microsoft Corporation
  3. Module Name:
  4. urlpars.cpp
  5. Abstract:
  6. Contains all the worker routines for Combine and Canonicalize
  7. Contents:
  8. (ConvertChar)
  9. Author:
  10. Zeke Lucas (zekel) 16-Dez-96
  11. Ahsan Kabir (akabir): UrlCombine parser rewritten in July-Sept98
  12. Environment:
  13. Win32(s) user-mode DLL
  14. Revision History:
  15. there is about one percent of this derived
  16. from the Spyglass or MSHTML/WININET codebase
  17. --*/
  18. #include "priv.h"
  19. #include <shstr.h>
  20. #ifdef UNIX
  21. #include <shlobj.h>
  22. #endif
  23. #include <intshcut.h>
  24. #include <shlwapip.h>
  25. #ifdef UNIX
  26. #include "unixstuff.h"
  27. #endif
  28. #include <wininet.h>
  29. #define DM_PERF 0 // perf stats
  30. #define PF_LOGSCHEMEHITS 0x00000001
  31. #ifndef CPP_FUNCTIONS
  32. #define CPP_FUNCTIONS
  33. #include <crtfree.h>
  34. #endif
  35. #define USE_FAST_PARSER
  36. #ifdef DEBUG
  37. //#define PROOFREAD_PARSES
  38. #endif
  39. // Same as in wininet; however, this is only theoretical, since urls aren't necessarily so
  40. // constrained. However, this is true throughout the product, so we'll have to do this.
  41. #define INTERNET_MAX_PATH_LENGTH 2048
  42. #define INTERNET_MAX_SCHEME_LENGTH 32
  43. #define HEX_ESCAPE L'%'
  44. #define HEX_ESCAPE_A '%'
  45. #define TERMSTR(pch) *(pch) = L'\0'
  46. // (WCHAR) 8 is backspace
  47. #define DEADSEGCHAR ((WCHAR) 8)
  48. #define KILLSEG(pch) *(pch) = DEADSEGCHAR
  49. #define CR L'\r'
  50. #define LF L'\n'
  51. #define TAB L'\t'
  52. #define SPC L' '
  53. #define SLASH L'/'
  54. #define WHACK L'\\'
  55. #define QUERY L'?'
  56. #define POUND L'#'
  57. #define SEMICOLON L';'
  58. #define COLON L':'
  59. #define BAR L'|'
  60. #define DOT L'.'
  61. #define AT L'@'
  62. #define UPF_SCHEME_OPAQUE 0x00000001 // should not be treated as heriarchical
  63. #define UPF_SCHEME_INTERNET 0x00000002
  64. #define UPF_SCHEME_NOHISTORY 0x00000004
  65. #define UPF_SCHEME_CONVERT 0x00000008 // treat slashes and whacks as equiv
  66. #define UPF_SCHEME_DONTCORRECT 0x00000010 // Don't try to autocorrect to this scheme
  67. #define UPF_SEG_ABSOLUTE 0x00000100 // the initial segment is the root
  68. #define UPF_SEG_LOCKFIRST 0x00000200 // this is for file parsing
  69. #define UPF_SEG_EMPTYSEG 0x00000400 // this was an empty string, but is still important
  70. #define UPF_EXSEG_DIRECTORY 0x00001000 // the final segment is a "directory" (trailing slash)
  71. #define UPF_FILEISPATHURL 0x10000000 // this is for file paths, dont unescape because they are actually dos paths
  72. //
  73. // the masks are for inheritance purposes during BlendParts
  74. // if you inherit that part you inherit that mask
  75. //
  76. #define UPF_SCHEME_MASK 0x000000FF
  77. #define UPF_SEG_MASK 0x00000F00
  78. #define UPF_EXSEG_MASK 0x0000F000
  79. // right now these masks are unused, and can be recycled
  80. #define UPF_SERVER_MASK 0x000F0000
  81. #define UPF_QUERY_MASK 0x0F000000
  82. extern "C" int _StrCmpNA(LPCSTR lpStr1, LPCSTR lpStr2, int nChar, BOOL fMBCS);
  83. extern "C" LPSTR _StrChrA(LPCSTR lpStart, WORD wMatch, BOOL fMBCS);
  84. typedef struct _UrlParts
  85. {
  86. DWORD dwFlags;
  87. LPWSTR pszScheme;
  88. URL_SCHEME eScheme;
  89. LPWSTR pszServer;
  90. LPWSTR pszSegments;
  91. DWORD cSegments;
  92. LPWSTR pszExtraSegs;
  93. DWORD cExtraSegs;
  94. LPWSTR pszQuery;
  95. LPWSTR pszFragment;
  96. } URLPARTS, *PURLPARTS;
  97. HRESULT SHUrlParse(LPCWSTR pszBase, LPCWSTR pszUrl, PSHSTRW pstrOut, DWORD dwFlags);
  98. HRESULT SHUrlCreateFromPath(LPCWSTR pszPath, PSHSTRW pstrOut, DWORD dwFlags);
  99. // Ansi wrappers might overwrite the unicode core's return value
  100. // We should try to prevent that
  101. HRESULT ReconcileHresults(HRESULT hr1, HRESULT hr2)
  102. {
  103. return (hr2==S_OK) ? hr1 : hr2;
  104. }
  105. PRIVATE CONST WORD isSafe[96] =
  106. /* Bit 0 alphadigit -- 'a' to 'z', '0' to '9', 'A' to 'Z'
  107. ** Bit 1 Hex -- '0' to '9', 'a' to 'f', 'A' to 'F'
  108. ** Bit 2 valid scheme -- alphadigit | "-" | "." | "+"
  109. ** Bit 3 mark -- "%" | "$"| "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" | ","
  110. */
  111. /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
  112. // {0, 8, 0, 0, 8, 8, 0, 8, 8, 8, 8, 4, 8,12,12, 0, /* 2x !"#$%&'()*+,-./ */
  113. // IE4 BETA1: allow + through unmolested. Should consider other options
  114. // post beta1. 12feb97 tonyci
  115. {0, 8, 0, 0, 8, 8, 0, 8, 8, 8, 8, 12, 8,12,12, 0, /* 2x !"#$%&'()*+,-./ */
  116. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 8, 8, 0, 8, 0, 0, /* 3x 0123456789:;<=>? */
  117. 8, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x @ABCDEFGHIJKLMNO */
  118. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 8, /* 5X PQRSTUVWXYZ[\]^_ */
  119. 0, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x `abcdefghijklmno */
  120. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 8, 0}; /* 7X pqrstuvwxyz{|}~ DEL */
  121. PRIVATE const WCHAR hex[] = L"0123456789ABCDEF";
  122. PRIVATE inline BOOL IsSafe(WCHAR ch, WORD mask)
  123. {
  124. if(((ch > 31 ) && (ch < 128) && (isSafe[ch - 32] & mask)))
  125. return TRUE;
  126. return FALSE;
  127. }
  128. #define IsAlphaDigit(c) IsSafe(c, 1)
  129. #define IsHex(c) IsSafe(c, 2)
  130. #define IsValidSchemeCharA(c) IsSafe(c, 5)
  131. #define IsSafePathChar(c) ((c > 0xff) || IsSafe(c, 9))
  132. #define IsUpper(c) ((c) >= 'A' && (c) <= 'Z')
  133. PRIVATE inline BOOL IsAsciiCharW(WCHAR ch)
  134. {
  135. return (!(ch >> 8) && ((CHAR) ch));
  136. }
  137. PRIVATE inline WCHAR Ascii_ToLowerW(WCHAR ch)
  138. {
  139. return (ch >= L'A' && ch <= L'Z') ? (ch - L'A' + L'a') : ch;
  140. }
  141. BOOL IsValidSchemeCharW(WCHAR ch)
  142. {
  143. if(IsAsciiCharW(ch))
  144. return IsSafe( (CHAR) ch, 5);
  145. return FALSE;
  146. }
  147. WCHAR const c_szHttpScheme[] = L"http";
  148. WCHAR const c_szFileScheme[] = L"file";
  149. WCHAR const c_szFTPScheme[] = L"ftp";
  150. WCHAR const c_szGopherScheme[] = L"gopher";
  151. WCHAR const c_szMailToScheme[] = L"mailto";
  152. WCHAR const c_szNewsScheme[] = L"news";
  153. WCHAR const c_szNNTPScheme[] = L"nntp";
  154. WCHAR const c_szTelnetScheme[] = L"telnet";
  155. WCHAR const c_szWAISScheme[] = L"wais";
  156. WCHAR const c_szMkScheme[] = L"mk";
  157. WCHAR const c_szHttpsScheme[] = L"https";
  158. WCHAR const c_szLocalScheme[] = L"local";
  159. WCHAR const c_szShellScheme[] = L"shell";
  160. WCHAR const c_szJSScheme[] = L"javascript";
  161. WCHAR const c_szVSScheme[] = L"vbscript";
  162. WCHAR const c_szAboutScheme[] = L"about";
  163. WCHAR const c_szSnewsScheme[] = L"snews";
  164. WCHAR const c_szResScheme[] = L"res";
  165. WCHAR const c_szRootedScheme[] = L"ms-shell-rooted";
  166. WCHAR const c_szIDListScheme[] = L"ms-shell-idlist";
  167. WCHAR const c_szMsHelpScheme[] = L"hcp";
  168. const struct
  169. {
  170. LPCWSTR pszScheme;
  171. URL_SCHEME eScheme;
  172. DWORD cchScheme;
  173. DWORD dwFlags;
  174. } g_mpUrlSchemeTypes[] =
  175. {
  176. // Because we use a linear search, sort this in the order of
  177. // most common usage.
  178. { c_szHttpScheme, URL_SCHEME_HTTP, SIZECHARS(c_szHttpScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
  179. { c_szFileScheme, URL_SCHEME_FILE, SIZECHARS(c_szFileScheme) - 1, UPF_SCHEME_CONVERT},
  180. { c_szFTPScheme, URL_SCHEME_FTP, SIZECHARS(c_szFTPScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
  181. { c_szHttpsScheme, URL_SCHEME_HTTPS, SIZECHARS(c_szHttpsScheme) -1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT|UPF_SCHEME_DONTCORRECT},
  182. { c_szNewsScheme, URL_SCHEME_NEWS, SIZECHARS(c_szNewsScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
  183. { c_szMailToScheme, URL_SCHEME_MAILTO, SIZECHARS(c_szMailToScheme) - 1, UPF_SCHEME_OPAQUE},
  184. { c_szGopherScheme, URL_SCHEME_GOPHER, SIZECHARS(c_szGopherScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
  185. { c_szNNTPScheme, URL_SCHEME_NNTP, SIZECHARS(c_szNNTPScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
  186. { c_szTelnetScheme, URL_SCHEME_TELNET, SIZECHARS(c_szTelnetScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
  187. { c_szWAISScheme, URL_SCHEME_WAIS, SIZECHARS(c_szWAISScheme) - 1, 0},
  188. { c_szMkScheme, URL_SCHEME_MK, SIZECHARS(c_szMkScheme) - 1, UPF_SCHEME_NOHISTORY},
  189. { c_szShellScheme, URL_SCHEME_SHELL, SIZECHARS(c_szShellScheme) - 1, UPF_SCHEME_OPAQUE},
  190. { c_szLocalScheme, URL_SCHEME_LOCAL, SIZECHARS(c_szLocalScheme) - 1, 0},
  191. { c_szJSScheme, URL_SCHEME_JAVASCRIPT,SIZECHARS(c_szJSScheme) - 1, UPF_SCHEME_OPAQUE|UPF_SCHEME_NOHISTORY},
  192. { c_szVSScheme, URL_SCHEME_VBSCRIPT, SIZECHARS(c_szVSScheme) - 1, UPF_SCHEME_OPAQUE|UPF_SCHEME_NOHISTORY},
  193. { c_szSnewsScheme, URL_SCHEME_SNEWS, SIZECHARS(c_szSnewsScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
  194. { c_szAboutScheme, URL_SCHEME_ABOUT, SIZECHARS(c_szAboutScheme) - 1, UPF_SCHEME_OPAQUE|UPF_SCHEME_NOHISTORY},
  195. { c_szResScheme, URL_SCHEME_RES, SIZECHARS(c_szResScheme) - 1, UPF_SCHEME_NOHISTORY},
  196. { c_szRootedScheme, URL_SCHEME_MSSHELLROOTED, SIZECHARS(c_szRootedScheme) - 1, 0},
  197. { c_szIDListScheme, URL_SCHEME_MSSHELLIDLIST, SIZECHARS(c_szIDListScheme) - 1, 0},
  198. { c_szMsHelpScheme, URL_SCHEME_MSHELP, SIZECHARS(c_szMsHelpScheme) - 1, 0},
  199. };
  200. PRIVATE int _StrCmpNMixed(LPCSTR psz, LPCWSTR pwz, DWORD cch)
  201. {
  202. int iRet = 0;
  203. //
  204. // we dont have to real mbcs conversion here because we are
  205. // guaranteed to have only ascii chars here
  206. //
  207. for (;cch; psz++, pwz++, cch--)
  208. {
  209. WCHAR ch = *psz;
  210. if (ch != *pwz)
  211. {
  212. //
  213. // this makes it case insensitive
  214. if (IsUpper(ch) && (ch + 32) == *pwz)
  215. continue;
  216. if(ch > *pwz)
  217. iRet = 1;
  218. else
  219. iRet = -1;
  220. break;
  221. }
  222. }
  223. return iRet;
  224. }
  225. //*** g_iScheme -- cache for g_mpUrlSchemeTypes
  226. // DESCRIPTION
  227. // we call GetSchemeTypeAndFlags many times for the same scheme. if
  228. // it's the 0th table entry, no biggee. if it's a later entry linear
  229. // search isnt very good. add a 1-element MRU cache. even for the most common
  230. // (by far) case of "http" (0th entry), we *still* win due to the cheaper
  231. // StrCmpC and skipped loop.
  232. // NOTES
  233. // g_iScheme refs/sets are atomic so no need for lock
  234. int g_iScheme; // last guy we hit
  235. #ifdef DEBUG
  236. int g_cSTTot, g_cSTHit, g_cSTHit0;
  237. #endif
  238. //
  239. // all of the pszScheme to nScheme functions are necessary at this point
  240. // because some parsing is vioent, and some is necessarily soft
  241. //
  242. PRIVATE URL_SCHEME
  243. GetSchemeTypeAndFlagsW(LPCWSTR pszScheme, DWORD cchScheme, LPDWORD pdwFlags)
  244. {
  245. DWORD i;
  246. ASSERT(pszScheme);
  247. #ifdef DEBUG
  248. if ((g_cSTTot % 10) == 0)
  249. TraceMsg(DM_PERF, "gstaf: tot=%d hit=%d hit0=%d", g_cSTTot, g_cSTHit, g_cSTHit0);
  250. #endif
  251. DBEXEC(TRUE, g_cSTTot++);
  252. // check cache 1st
  253. i = g_iScheme;
  254. if (cchScheme == g_mpUrlSchemeTypes[i].cchScheme
  255. && StrCmpNCW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme) == 0)
  256. {
  257. DBEXEC(TRUE, i == 0 ? g_cSTHit0++ : g_cSTHit++);
  258. Lhit:
  259. if (pdwFlags)
  260. *pdwFlags = g_mpUrlSchemeTypes[i].dwFlags;
  261. // update cache (unconditionally)
  262. g_iScheme = i;
  263. return g_mpUrlSchemeTypes[i].eScheme;
  264. }
  265. for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
  266. {
  267. if(cchScheme == g_mpUrlSchemeTypes[i].cchScheme
  268. && 0 == StrCmpNIW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme))
  269. goto Lhit;
  270. }
  271. if (pdwFlags)
  272. {
  273. *pdwFlags = 0;
  274. }
  275. return URL_SCHEME_UNKNOWN;
  276. }
  277. PRIVATE URL_SCHEME
  278. GetSchemeTypeAndFlagsA(LPCSTR pszScheme, DWORD cchScheme, LPDWORD pdwFlags)
  279. {
  280. DWORD i;
  281. ASSERT(pszScheme);
  282. for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
  283. {
  284. if(0 == _StrCmpNMixed(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme))
  285. {
  286. if (pdwFlags)
  287. *pdwFlags = g_mpUrlSchemeTypes[i].dwFlags;
  288. return g_mpUrlSchemeTypes[i].eScheme;
  289. }
  290. }
  291. if (pdwFlags)
  292. {
  293. *pdwFlags = 0;
  294. }
  295. return URL_SCHEME_UNKNOWN;
  296. }
  297. /*----------------------------------------------------------
  298. Purpose: Return the scheme ordinal type (URL_SCHEME_*) based on the
  299. URL string.
  300. Returns: URL_SCHEME_ ordinal
  301. Cond: --
  302. */
  303. PRIVATE inline BOOL IsSameSchemeW(LPCWSTR pszLocal, LPCWSTR pszGlobal, DWORD cch)
  304. {
  305. ASSERT(pszLocal);
  306. ASSERT(pszGlobal);
  307. ASSERT(cch);
  308. return !StrCmpNIW(pszLocal, pszGlobal, cch);
  309. }
  310. PRIVATE BOOL IsSameSchemeA(LPCSTR pszLocal, LPCWSTR pszGlobal, DWORD cch)
  311. {
  312. ASSERT(pszLocal);
  313. ASSERT(pszGlobal);
  314. ASSERT(cch);
  315. return !_StrCmpNMixed(pszLocal, pszGlobal, cch);
  316. }
  317. PRIVATE URL_SCHEME
  318. SchemeTypeFromStringA(
  319. LPCSTR psz,
  320. DWORD cch)
  321. {
  322. DWORD i;
  323. // psz is a counted string (by cch), not a null-terminated string,
  324. // so use IS_VALID_READ_BUFFER instead of IS_VALID_STRING_PTRA.
  325. ASSERT(IS_VALID_READ_BUFFER(psz, CHAR, cch));
  326. ASSERT(cch);
  327. // We use a linear search. A binary search wouldn't pay off
  328. // because the list isn't big enough, and we can sort the list
  329. // according to the most popular protocol schemes and pay off
  330. // bigger.
  331. for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
  332. {
  333. if(cch == g_mpUrlSchemeTypes[i].cchScheme &&
  334. IsSameSchemeA(psz, g_mpUrlSchemeTypes[i].pszScheme, cch))
  335. return g_mpUrlSchemeTypes[i].eScheme;
  336. }
  337. return URL_SCHEME_UNKNOWN;
  338. }
  339. PRIVATE URL_SCHEME
  340. SchemeTypeFromStringW(
  341. LPCWSTR psz,
  342. DWORD cch)
  343. {
  344. DWORD i;
  345. // psz is a counted string (by cch), not a null-terminated string,
  346. // so use IS_VALID_READ_BUFFER instead of IS_VALID_STRING_PTRW.
  347. ASSERT(IS_VALID_READ_BUFFER(psz, WCHAR, cch));
  348. ASSERT(cch);
  349. // We use a linear search. A binary search wouldn't pay off
  350. // because the list isn't big enough, and we can sort the list
  351. // according to the most popular protocol schemes and pay off
  352. // bigger.
  353. for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
  354. {
  355. if(cch == g_mpUrlSchemeTypes[i].cchScheme &&
  356. IsSameSchemeW(psz, g_mpUrlSchemeTypes[i].pszScheme, cch))
  357. return g_mpUrlSchemeTypes[i].eScheme;
  358. }
  359. return URL_SCHEME_UNKNOWN;
  360. }
  361. //
  362. // these are used during path fumbling that i do
  363. // each string between a path delimiter ( '/' or '\')
  364. // is a segment. we dont ever really care about
  365. // empty ("") segments, so it is best to use
  366. // NextLiveSegment().
  367. //
  368. inline PRIVATE LPWSTR
  369. NextSegment(LPWSTR psz)
  370. {
  371. ASSERT (psz);
  372. return psz + lstrlenW(psz) + 1;
  373. }
  374. #define IsLiveSegment(p) ((p) && (*p) != DEADSEGCHAR)
  375. PRIVATE LPWSTR
  376. NextLiveSegment(LPWSTR pszSeg, DWORD *piSeg, DWORD cSegs)
  377. {
  378. if(pszSeg) do
  379. {
  380. //
  381. // count the number of dead segments that we skip.
  382. // if the segment isnt dead, then we can just skip one,
  383. // the current one.
  384. //
  385. DWORD cSkip;
  386. for (cSkip = 0; (*pszSeg) == DEADSEGCHAR; pszSeg++, cSkip++);
  387. cSkip = cSkip ? cSkip : 1;
  388. if((*piSeg) + cSkip < cSegs)
  389. {
  390. pszSeg = NextSegment(pszSeg);
  391. (*piSeg) += cSkip;
  392. }
  393. else
  394. pszSeg = NULL;
  395. } while (pszSeg && (*pszSeg == DEADSEGCHAR));
  396. return pszSeg;
  397. }
  398. PRIVATE LPWSTR
  399. LastLiveSegment(LPWSTR pszSeg, DWORD cSegs, BOOL fFailIfFirst)
  400. {
  401. DWORD iSeg = 0;
  402. LPWSTR pszLast = NULL;
  403. BOOL fLastIsFirst = FALSE;
  404. if(cSegs)
  405. {
  406. if(IsLiveSegment(pszSeg))
  407. {
  408. pszLast = pszSeg;
  409. fLastIsFirst = TRUE;
  410. }
  411. while(pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs))
  412. {
  413. if(!pszLast)
  414. fLastIsFirst = TRUE;
  415. else
  416. fLastIsFirst = FALSE;
  417. pszLast = pszSeg;
  418. }
  419. if(fFailIfFirst && fLastIsFirst)
  420. pszLast = NULL;
  421. }
  422. return pszLast;
  423. }
  424. PRIVATE LPWSTR
  425. FirstLiveSegment(LPWSTR pszSeg, DWORD *piSeg, DWORD cSegs)
  426. {
  427. ASSERT(piSeg);
  428. *piSeg = 0;
  429. if(!pszSeg || !cSegs)
  430. return NULL;
  431. if(!IsLiveSegment(pszSeg))
  432. pszSeg = NextLiveSegment(pszSeg, piSeg, cSegs);
  433. return pszSeg;
  434. }
  435. inline BOOL IsDosDrive(LPCWSTR p)
  436. {
  437. return (*p && p[1] == COLON);
  438. }
  439. inline BOOL IsDosPath(LPCWSTR p)
  440. {
  441. return (*p == WHACK || IsDosDrive(p));
  442. }
  443. inline BOOL IsDriveUrl(const WCHAR *p)
  444. {
  445. return (*p && p[1] == BAR);
  446. }
  447. inline BOOL IsDrive(LPCWSTR p)
  448. {
  449. return (IsDosDrive(p) || IsDriveUrl(p));
  450. }
  451. inline BOOL IsSeparator(const WCHAR *p)
  452. {
  453. return (*p == SLASH || *p == WHACK );
  454. }
  455. inline BOOL IsAbsolute(const WCHAR *p)
  456. {
  457. #ifndef UNIX
  458. return (IsSeparator(p) || IsDrive(p));
  459. #else
  460. return (IsSeparator(p)) ;
  461. #endif
  462. }
  463. #define IsUNC(pathW) PathIsUNCW(pathW)
  464. inline BOOL IsDot(LPCWSTR p) // if p == "." return TRUE
  465. {
  466. return (*p == DOT && !p[1]);
  467. }
  468. inline BOOL IsDotDot(LPCWSTR p) // if p == ".." return TRUE
  469. {
  470. return (*p == DOT && p[1] == DOT && !p[2]);
  471. }
  472. //+---------------------------------------------------------------------------
  473. //
  474. // Method: ConvertChar
  475. //
  476. // Synopsis:
  477. //
  478. // Arguments: [szStr] --
  479. // [cIn] --
  480. // [cOut] --
  481. //
  482. // Returns:
  483. //
  484. // History: 03-20-96 JoeS (Joe Souza) Created
  485. //
  486. // Notes:
  487. //
  488. //----------------------------------------------------------------------------
  489. static void ConvertChar(LPWSTR ptr, WCHAR cIn, WCHAR cOut, BOOL fProtectExtra)
  490. {
  491. while (*ptr)
  492. {
  493. if (fProtectExtra && (*ptr == QUERY || *ptr == POUND ))
  494. {
  495. break;
  496. }
  497. if (*ptr == cIn)
  498. {
  499. *ptr = cOut;
  500. }
  501. ptr++;
  502. }
  503. }
  504. PUBLIC void WininetFixFileSlashes(WCHAR *p)
  505. {
  506. // NB: This function assumes that p points to a file URL.
  507. // The file URL *MUST* be of the form "file://...".
  508. // HTParse() guarantees that this will be so.
  509. int schemelen = 0;
  510. schemelen = SIZECHARS(L"file://") - 1;
  511. /* In UNIX system, we don't need to convert the SLASH to WHACK */
  512. if (p && lstrlenW(p) > schemelen)
  513. {
  514. #ifdef UNIX
  515. ConvertChar(p + schemelen, WHACK, SLASH, TRUE);
  516. #else
  517. ConvertChar(p + schemelen, SLASH, WHACK, TRUE);
  518. #endif
  519. }
  520. }
  521. //
  522. // in the URL spec, it says that all whitespace should be ignored
  523. // due to the fact that it is possible to introduce
  524. // new whitespace and eliminate other whitespace
  525. // however, we are only going to strip out TAB CR LF
  526. // because we consider SPACE's to be significant.
  527. //
  528. PRIVATE inline BOOL IsInsignificantWhite(WCHAR ch)
  529. {
  530. return (ch == TAB ||
  531. ch == CR ||
  532. ch == LF);
  533. }
  534. #define IsWhite(c) ((DWORD) (c) > 32 ? FALSE : TRUE)
  535. PRIVATE void TrimAndStripInsignificantWhite(WCHAR *psz)
  536. {
  537. ASSERT(psz);
  538. if(*psz)
  539. {
  540. LPCWSTR pszSrc = psz;
  541. LPWSTR pszDest = psz;
  542. LPWSTR pszLastSpace = NULL;
  543. // first trim the front side by just moving the source pointer.
  544. while(*pszSrc && IsWhite(*pszSrc)) {
  545. pszSrc++;
  546. }
  547. //
  548. // Copy the body stripping "insignificant" white spaces.
  549. // Remember the last white space to trim trailing space later.
  550. //
  551. while (*pszSrc)
  552. {
  553. if(IsInsignificantWhite(*pszSrc)) {
  554. pszSrc++;
  555. } else {
  556. if (IsWhite(*pszSrc)) {
  557. if (pszLastSpace==NULL) {
  558. pszLastSpace = pszDest;
  559. }
  560. } else {
  561. pszLastSpace = NULL;
  562. }
  563. *pszDest++ = *pszSrc++;
  564. }
  565. }
  566. // Trim the trailing space
  567. if (pszLastSpace) {
  568. *pszLastSpace = L'\0';
  569. } else {
  570. *pszDest = L'\0';
  571. }
  572. }
  573. }
  574. struct EXTKEY
  575. {
  576. PCSTR szExt;
  577. PCWSTR wszExt;
  578. DWORD cchExt;
  579. };
  580. const EXTKEY ExtTable[] = {
  581. { ".html", L".html", ARRAYSIZE(".html") - 1 },
  582. { ".htm", L".htm", ARRAYSIZE(".htm") - 1 },
  583. { ".xml", L".xml", ARRAYSIZE(".xml") - 1 },
  584. { ".doc", L".doc", ARRAYSIZE(".doc") - 1 },
  585. { ".xls", L".xls", ARRAYSIZE(".xls") - 1 },
  586. { ".ppt", L".ppt", ARRAYSIZE(".ppt") - 1 },
  587. { ".rtf", L".rtf", ARRAYSIZE(".rtf") - 1 },
  588. { ".dot", L".dot", ARRAYSIZE(".dot") - 1 },
  589. { ".xlw", L".xlw", ARRAYSIZE(".xlw") - 1 },
  590. { ".pps", L".pps", ARRAYSIZE(".pps") - 1 },
  591. { ".xlt", L".xlt", ARRAYSIZE(".xlt") - 1 },
  592. { ".hta", L".hta", ARRAYSIZE(".hta") - 1 },
  593. { ".pot", L".pot", ARRAYSIZE(".pot") - 1 },
  594. { ".pdf", L".pdf", ARRAYSIZE(".pdf") - 1 }
  595. };
  596. inline BOOL CompareExtA(PCSTR psz, DWORD_PTR cch)
  597. {
  598. for (DWORD i=0; i < ARRAYSIZE(ExtTable); i++)
  599. {
  600. if (ExtTable[i].cchExt>cch)
  601. continue;
  602. if (!StrCmpNIA(psz - (LONG_PTR)ExtTable[i].cchExt, ExtTable[i].szExt, ExtTable[i].cchExt))
  603. return TRUE;
  604. }
  605. return FALSE;
  606. }
  607. inline BOOL CompareExtW(PCWSTR pwsz, DWORD_PTR cch)
  608. {
  609. for (DWORD i=0; i < ARRAYSIZE(ExtTable); i++)
  610. {
  611. if (ExtTable[i].cchExt>cch)
  612. continue;
  613. if (!StrCmpNIW(pwsz - (LONG_PTR)ExtTable[i].cchExt, ExtTable[i].wszExt, ExtTable[i].cchExt))
  614. return TRUE;
  615. }
  616. return FALSE;
  617. }
  618. PRIVATE LPCSTR FindFragmentA(LPCSTR psz, BOOL fMBCS, BOOL fIsFile)
  619. {
  620. CHAR *pch = _StrChrA(psz, POUND, fMBCS);
  621. if(pch && fIsFile)
  622. {
  623. CHAR *pchQuery = _StrChrA(psz, QUERY, fMBCS);
  624. if (pchQuery && (pchQuery < pch))
  625. goto exit;
  626. do
  627. {
  628. LONG_PTR cch = pch - psz;
  629. // REARCHITECT: we shouldn't hardcode ".htm".
  630. // #s are significant in dospaths - zekel 9-JUL-97
  631. // so we want to check the path in front and make sure
  632. // that it is an html file. we believe this heuristic should work
  633. // in about 99% of all cases.
  634. //
  635. // if it is not an html file it is not a hash
  636. if (CompareExtA(pch, cch))
  637. {
  638. break;
  639. }
  640. } while (pch = _StrChrA(++pch, POUND, fMBCS));
  641. }
  642. exit:
  643. return pch;
  644. }
  645. PRIVATE LPCWSTR FindFragmentW(LPCWSTR psz, BOOL fIsFile)
  646. {
  647. WCHAR *pch = StrChrW(psz, POUND);
  648. if(pch && fIsFile)
  649. {
  650. WCHAR *pchQuery = StrChrW(psz, QUERY);
  651. if (pchQuery && (pchQuery < pch))
  652. goto exit;
  653. do
  654. {
  655. LONG_PTR cch = pch - psz;
  656. // REARCHITECT: we shouldn't hardcode ".htm".
  657. // #s are significant in dospaths - zekel 9-JUL-97
  658. // so we want to check the path in front and make sure
  659. // that it is an html file. we believe this heuristic should work
  660. // in about 99% of all cases.
  661. //
  662. // if it is not an html file it is not a hash
  663. if (CompareExtW(pch, cch))
  664. {
  665. break;
  666. }
  667. } while (pch = StrChrW(++pch, POUND));
  668. }
  669. exit:
  670. return pch;
  671. }
  672. PRIVATE VOID BreakFragment(LPWSTR *ppsz, PURLPARTS parts)
  673. {
  674. ASSERT(ppsz);
  675. ASSERT(*ppsz);
  676. //
  677. // Opaque URLs are not allowed to use fragments - zekel 27-feb-97
  678. // Is it possible for an opaque URL to use a fragment?
  679. // right now we assume not. i suspect so but will leave it this way for now
  680. // this is especially important to javascript and vbscript
  681. // FEATURE: this might be worth investigation, but probably can't change this code
  682. //
  683. if(!**ppsz || parts->dwFlags & UPF_SCHEME_OPAQUE)
  684. return;
  685. WCHAR *pch = (LPWSTR) FindFragmentW(*ppsz, parts->eScheme == URL_SCHEME_FILE);
  686. if (pch)
  687. {
  688. TERMSTR(pch);
  689. parts->pszFragment = pch +1;
  690. }
  691. }
  692. PRIVATE inline BOOL IsUrlPrefixA(LPCSTR psz)
  693. {
  694. //
  695. // Optimized for this particular case. Notice that most of it
  696. // will be lego-ized out anyway.
  697. //
  698. if (psz[0]=='u' || psz[0]=='U') {
  699. if (psz[1]=='r' || psz[1]=='R') {
  700. if (psz[2]=='l' || psz[2]=='L') {
  701. return TRUE;
  702. }
  703. }
  704. }
  705. return FALSE;
  706. // return !StrCmpNIA(psz, c_szURLPrefixA, c_cchURLPrefix);
  707. }
  708. PRIVATE inline BOOL IsUrlPrefixW(LPCWSTR psz)
  709. {
  710. //
  711. // Optimized for this particular case. Notice that most of it
  712. // will be lego-ized out anyway.
  713. //
  714. if (psz[0]==L'u' || psz[0]==L'U') {
  715. if (psz[1]==L'r' || psz[1]==L'R') {
  716. if (psz[2]==L'l' || psz[2]==L'L') {
  717. return TRUE;
  718. }
  719. }
  720. }
  721. return FALSE;
  722. // return !StrCmpNIW(psz, c_szURLPrefixW, c_cchURLPrefix);
  723. }
  724. //
  725. // if FindScheme() succeeds, it returns a pointer to the scheme,
  726. // and the cch holds the count of chars for the scheme
  727. // if it fails, and cch is non-zero then cch is how much should be skipped.
  728. // this is to allow "URL:/foo/bar", a relative URL with the "URL:" prefix.
  729. //
  730. LPCSTR FindSchemeA(LPCSTR psz, LPDWORD pcchScheme)
  731. {
  732. LPCSTR pch;
  733. DWORD cch;
  734. ASSERT(pcchScheme);
  735. ASSERT(psz);
  736. *pcchScheme = 0;
  737. for (pch = psz, cch = 0; *pch; pch++, cch++)
  738. {
  739. if (*pch == ':')
  740. {
  741. if (IsUrlPrefixA(psz))
  742. {
  743. psz = pch +1;
  744. // set pcchScheme to skip past "URL:"
  745. *pcchScheme = cch + 1;
  746. // reset cch for the scheme len
  747. cch = -1;
  748. continue;
  749. }
  750. else
  751. {
  752. //
  753. // Scheme found if it is at least two characters
  754. if(cch > 1)
  755. {
  756. *pcchScheme = cch;
  757. return psz;
  758. }
  759. break;
  760. }
  761. }
  762. if(!IsValidSchemeCharA(*pch))
  763. break;
  764. }
  765. return NULL;
  766. }
  767. //
  768. // FindSchemeW() around for Perf reasons for ParseURL()
  769. // Any changes in either FindScheme() needs to reflected in the other
  770. //
  771. LPCWSTR FindSchemeW(LPCWSTR psz, LPDWORD pcchScheme, BOOL fAllowSemicolon = FALSE)
  772. {
  773. LPCWSTR pch;
  774. DWORD cch;
  775. ASSERT(pcchScheme);
  776. ASSERT(psz);
  777. *pcchScheme = 0;
  778. for (pch = psz, cch = 0; *pch; pch++, cch++)
  779. {
  780. if (*pch == L':' ||
  781. // Autocorrect permits a semicolon typo
  782. (fAllowSemicolon && *pch == L';'))
  783. {
  784. if (IsUrlPrefixW(psz))
  785. {
  786. psz = pch +1;
  787. // set pcchScheme to skip past "URL:"
  788. *pcchScheme = cch + 1;
  789. // reset cch for the scheme len
  790. cch = -1;
  791. continue;
  792. }
  793. else
  794. {
  795. //
  796. // Scheme found if it is at least two characters
  797. if(cch > 1)
  798. {
  799. *pcchScheme = cch;
  800. return psz;
  801. }
  802. break;
  803. }
  804. }
  805. if(!IsValidSchemeCharW(*pch))
  806. break;
  807. }
  808. return NULL;
  809. }
  810. PRIVATE DWORD
  811. CountSlashes(LPCWSTR *ppsz)
  812. {
  813. DWORD cSlashes = 0;
  814. LPCWSTR pch = *ppsz;
  815. while (IsSeparator(pch))
  816. {
  817. *ppsz = pch;
  818. pch++;
  819. cSlashes++;
  820. }
  821. return cSlashes;
  822. }
  823. PRIVATE LPCWSTR
  824. FindDosPath(LPCWSTR psz)
  825. {
  826. if (IsDosDrive(psz) || IsUNC(psz))
  827. {
  828. return psz;
  829. }
  830. else
  831. {
  832. DWORD cch;
  833. LPCWSTR pszScheme = FindSchemeW(psz, &cch);
  834. if (pszScheme && URL_SCHEME_FILE == GetSchemeTypeAndFlagsW(pszScheme, cch, NULL))
  835. {
  836. LPCWSTR pch = psz + cch + 1;
  837. DWORD c = CountSlashes(&pch);
  838. switch (c)
  839. {
  840. case 2:
  841. if(IsDosDrive(++pch))
  842. return pch;
  843. break;
  844. case 4:
  845. return --pch;
  846. }
  847. }
  848. }
  849. return NULL;
  850. }
  851. /*+++
  852. WininetCopyUrlForParse()
  853. this copies the url and prepends a "file://" if necessary
  854. This should never be called except from wininet
  855. everyone else should be calling UrlCreateFromPath()
  856. Parameters
  857. IN -
  858. pszDst the destination buffer
  859. pszSrc source buffer
  860. OUT -
  861. pszDst is filled with a Live URL
  862. Returns
  863. VOID
  864. NOTE - Assume "file:" if no scheme and it looks like fully-qualified file path.
  865. ---*/
  866. static const WCHAR c_szFileSchemeString[] = L"file://";
  867. PRIVATE HRESULT
  868. WininetCopyUrlForParse(PSHSTRW pstrDst, LPCWSTR pszSrc)
  869. {
  870. #ifndef UNIX
  871. if (IsDrive(pszSrc) || IsUNC(pszSrc))
  872. {
  873. //
  874. // NOTE: the first SetStr will always succeed
  875. // because the default buffer is more than "file://"
  876. pstrDst->SetStr(c_szFileSchemeString);
  877. return pstrDst->Append(pszSrc);
  878. }
  879. else
  880. #endif /* !UNIX */
  881. return pstrDst->SetStr(pszSrc);
  882. }
  883. PRIVATE HRESULT
  884. CopyUrlForParse(LPCWSTR pszUrl, PSHSTRW pstrUrl, DWORD dwFlags)
  885. {
  886. LPCWSTR pch;
  887. HRESULT hr;
  888. //
  889. // now we will make copies of the URLs so that we can rip them apart
  890. // WininetCopyUrlForParse() will prepend a file: if it wants...
  891. //
  892. if(dwFlags & URL_WININET_COMPATIBILITY)
  893. {
  894. hr = WininetCopyUrlForParse(pstrUrl, pszUrl);
  895. }
  896. else if(pch = FindDosPath(pszUrl))
  897. {
  898. hr = SHUrlCreateFromPath(pch, pstrUrl, dwFlags);
  899. }
  900. else
  901. {
  902. hr = pstrUrl->SetStr(pszUrl);
  903. }
  904. // Trim leading and trailing whitespace
  905. // Remove tab and CRLF characters. Netscape does this.
  906. if(SUCCEEDED(hr))
  907. TrimAndStripInsignificantWhite(pstrUrl->GetInplaceStr());
  908. return hr;
  909. }
  910. PRIVATE VOID BreakScheme(LPWSTR *ppsz, PURLPARTS parts)
  911. {
  912. if(!**ppsz || IsDrive(*ppsz))
  913. return;
  914. DWORD cch;
  915. //
  916. // if FindScheme() succeeds, it returns a pointer to the scheme,
  917. // and the cch holds the count of chars for the scheme
  918. // if it fails, and cch is none zero then cch is how much should be skipped.
  919. // this is to allow "URL:/foo/bar", a relative URL with the "URL:" prefix.
  920. //
  921. if(NULL != (parts->pszScheme = (LPWSTR) FindSchemeW(*ppsz, &cch)))
  922. {
  923. parts->pszScheme[cch] = '\0';
  924. CharLowerW(parts->pszScheme);
  925. // put the pointer past the scheme for next Break()
  926. *ppsz = parts->pszScheme + cch + 1;
  927. #ifdef DEBUG
  928. if (g_dwPrototype & PF_LOGSCHEMEHITS)
  929. {
  930. // this is for logging of url schemes, to make sure that we have the right order
  931. int c = GetPrivateProfileIntW(L"SchemeHits", parts->pszScheme, 0, L"UrlPars.ini");
  932. WCHAR szc[25];
  933. StringCchPrintfW(szc, ARRAYSIZE(szc), L"%d", ++c);
  934. WritePrivateProfileStringW(L"SchemeHits", parts->pszScheme, szc, L"UrlPars.ini");
  935. }
  936. #endif //DEBUG
  937. parts->eScheme = GetSchemeTypeAndFlagsW(parts->pszScheme, cch, &parts->dwFlags);
  938. }
  939. else if (cch)
  940. *ppsz += cch + 1;
  941. }
  942. PRIVATE VOID BreakQuery(LPWSTR *ppsz, PURLPARTS parts)
  943. {
  944. WCHAR *pch;
  945. if(!**ppsz)
  946. return;
  947. if(parts->dwFlags & UPF_SCHEME_OPAQUE)
  948. return;
  949. pch = StrChrW(*ppsz, QUERY);
  950. //
  951. // APPCOMPAT NETSCAPE COMPATBILITY - zekel - 27-JAN-97
  952. // we will also get http://foo#frag?query
  953. // even tho legally it should be http://foo?query#frag
  954. // of course we will put it back together the right way.
  955. //
  956. if(!pch && parts->pszFragment)
  957. pch = StrChrW(parts->pszFragment, QUERY);
  958. // found our query string...
  959. if (pch)
  960. {
  961. TERMSTR(pch);
  962. parts->pszQuery = pch + 1;
  963. }
  964. }
  965. PRIVATE VOID MkBreakServer(LPWSTR *ppsz, PURLPARTS parts)
  966. {
  967. //
  968. // NOTE: we dont convert WHACKs to SLASHs because mk can be of the
  969. // form <mk:@class:\\Server\Share\file.itl/path/in/the/file.gif
  970. // and we want to preserve the DOS/UNC path as it is
  971. //
  972. if (**ppsz == TEXT('@'))
  973. {
  974. WCHAR *pch;
  975. // treat everything to separator as host
  976. //
  977. parts->pszServer = *ppsz;
  978. pch = StrChrW(*ppsz ,SLASH);
  979. if (pch)
  980. {
  981. parts->dwFlags |= UPF_SEG_ABSOLUTE;
  982. TERMSTR(pch);
  983. *ppsz = pch + 1;
  984. }
  985. else
  986. *ppsz += lstrlenW(*ppsz);
  987. }
  988. }
  989. PRIVATE VOID DefaultBreakServer(LPWSTR *ppsz, PURLPARTS parts)
  990. {
  991. if (**ppsz == SLASH)
  992. {
  993. parts->dwFlags |= UPF_SEG_ABSOLUTE;
  994. (*ppsz)++;
  995. if (**ppsz == SLASH)
  996. {
  997. // we have a winner!
  998. WCHAR * pch;
  999. parts->pszServer = (*ppsz) + 1;
  1000. pch = StrChrW(parts->pszServer, SLASH);
  1001. if(pch)
  1002. {
  1003. TERMSTR(pch);
  1004. *ppsz = pch + 1;
  1005. }
  1006. else
  1007. *ppsz = *ppsz + lstrlenW(*ppsz);
  1008. }
  1009. }
  1010. else if(parts->pszScheme)
  1011. parts->dwFlags |= UPF_SCHEME_OPAQUE;
  1012. }
  1013. PRIVATE VOID FileBreakServer(LPWSTR *ppsz, PURLPARTS parts)
  1014. {
  1015. LPWSTR pch;
  1016. // CountSlashes() will set *ppsz to the last slash
  1017. DWORD cSlashes = CountSlashes((LPCWSTR *)ppsz);
  1018. if(cSlashes || IsDrive(*ppsz))
  1019. parts->dwFlags |= UPF_SEG_ABSOLUTE;
  1020. switch (cSlashes)
  1021. {
  1022. case 0:
  1023. break;
  1024. case 4:
  1025. // we identify file://\\UNC as a true DOS path with no escaped characters
  1026. parts->dwFlags |= UPF_FILEISPATHURL;
  1027. // fall through
  1028. case 2:
  1029. if(IsDrive((*ppsz) + 1))
  1030. {
  1031. // this is a root drive
  1032. TERMSTR(*ppsz);
  1033. parts->pszServer = *ppsz;
  1034. (*ppsz)++;
  1035. // we identify file://C:\PATH as a true DOS path with no escaped characters
  1036. parts->dwFlags |= UPF_FILEISPATHURL;
  1037. break;
  1038. } //else fallthru to UNC handling
  1039. // fall through
  1040. case 5:
  1041. case 6:
  1042. //
  1043. // cases like "file:////..." or "file://///..."
  1044. // we see this as a UNC path
  1045. // lets set the server
  1046. //
  1047. parts->pszServer = ++(*ppsz);
  1048. for(pch = *ppsz; *pch && !IsSeparator(pch); pch++);
  1049. if(pch && *pch)
  1050. {
  1051. TERMSTR(pch);
  1052. *ppsz = pch + 1;
  1053. }
  1054. else
  1055. *ppsz = pch + lstrlenW(pch);
  1056. break;
  1057. case 1:
  1058. //
  1059. //we think of "file:/..." as on the local machine
  1060. // so we have zero length pszServer
  1061. //
  1062. case 3:
  1063. //
  1064. //we think of file:///... as properly normalized on the local machine
  1065. // so we have zero length pszServer
  1066. //
  1067. default:
  1068. // there is just too many, we pretend that there is just one and ignore
  1069. // the rest
  1070. TERMSTR(*ppsz);
  1071. parts->pszServer = *ppsz;
  1072. (*ppsz)++;
  1073. break;
  1074. }
  1075. // detect file://localserver/c:/path
  1076. if(parts->pszServer && !StrCmpIW(parts->pszServer, L"localhost"))
  1077. parts->pszServer = NULL;
  1078. }
  1079. PRIVATE VOID BreakServer(LPWSTR *ppsz, PURLPARTS parts, BOOL fConvert)
  1080. {
  1081. if(!**ppsz || parts->dwFlags & UPF_SCHEME_OPAQUE)
  1082. return;
  1083. //
  1084. // APPCOMPAT - we pretend that whacks are the equiv of slashes - zekel 17-MAR-97
  1085. // this is because the internet uses slashes and DOS
  1086. // uses whacks. so for useability's sake we allow both.
  1087. // but not in all cases. in particular, the "mk:" stream
  1088. // protocol depends upon the buggy behavior of one of IE30's
  1089. // many URL parsers treating relative URLs with whacks as one
  1090. // segment.
  1091. // NOTE: IE30 had inconsistent behavior WRT URLs. so we handled
  1092. // this case differently depending on when we saw, looked, touched, or
  1093. // played with these URLs. wininet would always convert, but mshtml
  1094. // sometimes would other times not.
  1095. //
  1096. // with MK: we cannot convert the base, or the relative
  1097. // but in breakpath we have to allow for the use of WHACK
  1098. // to indicate a root path
  1099. //
  1100. // we dont have to fProtectExtra because query and fragments
  1101. // are already broken off if necessary.
  1102. if (fConvert)
  1103. ConvertChar(*ppsz, WHACK, SLASH, FALSE);
  1104. switch(parts->eScheme)
  1105. {
  1106. case URL_SCHEME_FILE:
  1107. FileBreakServer(ppsz, parts);
  1108. break;
  1109. case URL_SCHEME_MK:
  1110. MkBreakServer(ppsz, parts);
  1111. break;
  1112. default:
  1113. DefaultBreakServer(ppsz, parts);
  1114. break;
  1115. }
  1116. }
  1117. PRIVATE VOID DefaultBreakSegments(LPWSTR psz, PURLPARTS parts)
  1118. {
  1119. WCHAR *pch;
  1120. while (pch = StrChrW(psz, SLASH))
  1121. {
  1122. parts->cSegments++;
  1123. TERMSTR(pch);
  1124. psz = pch + 1;
  1125. }
  1126. if(!*psz || IsDot(psz) || IsDotDot(psz))
  1127. {
  1128. if (!*psz && parts->cSegments > 1)
  1129. parts->cSegments--;
  1130. parts->dwFlags |= UPF_EXSEG_DIRECTORY;
  1131. }
  1132. }
  1133. PRIVATE VOID DefaultBreakPath(LPWSTR *ppsz, PURLPARTS parts)
  1134. {
  1135. if(!**ppsz)
  1136. return;
  1137. //
  1138. // this will keep the drive letter from being backed up over
  1139. // during canonicalization. if we want keep the UNC share
  1140. // from being backed up we should do it here
  1141. // or in FileBreakServer() similarly
  1142. //
  1143. if(IsDrive(*ppsz))
  1144. {
  1145. parts->dwFlags |= UPF_SEG_LOCKFIRST;
  1146. // also convert "c|" to "c:"
  1147. }
  1148. parts->pszSegments = *ppsz;
  1149. parts->cSegments = 1;
  1150. if(!(parts->dwFlags & UPF_SCHEME_OPAQUE))
  1151. DefaultBreakSegments(parts->pszSegments, parts);
  1152. }
  1153. PRIVATE VOID BreakPath(LPWSTR *ppsz, PURLPARTS parts)
  1154. {
  1155. if(!**ppsz)
  1156. return;
  1157. if (parts->dwFlags & UPF_SCHEME_OPAQUE)
  1158. {
  1159. parts->pszSegments = *ppsz;
  1160. parts->cSegments = 1;
  1161. }
  1162. else
  1163. {
  1164. //
  1165. // we only need to check for absolute when there was
  1166. // no server segment. if there was a server segment,
  1167. // then absolute has already been set, and we need
  1168. // to preserve any separators that exist in the path
  1169. //
  1170. if(!parts->pszServer && IsSeparator(*ppsz))
  1171. {
  1172. parts->dwFlags |= UPF_SEG_ABSOLUTE;
  1173. (*ppsz)++;
  1174. }
  1175. DefaultBreakPath(ppsz, parts);
  1176. }
  1177. }
  1178. BOOL _ShouldBreakBase(PURLPARTS parts, LPCWSTR pszBase)
  1179. {
  1180. if (pszBase)
  1181. {
  1182. if (!parts->pszScheme)
  1183. return TRUE;
  1184. DWORD cch;
  1185. LPCWSTR pszScheme = FindSchemeW(pszBase, &cch);
  1186. // this means that this will only optimize on known schemes
  1187. // if both urls use URL_SCHEME_UNKNOWN...then we parse both.
  1188. if (pszScheme && parts->eScheme == GetSchemeTypeAndFlagsW(pszScheme, cch, NULL))
  1189. return TRUE;
  1190. }
  1191. return FALSE;
  1192. }
  1193. /*+++
  1194. BreakUrl()
  1195. Break a URL for its consituent parts
  1196. Parameters
  1197. IN -
  1198. the URL to crack open, need not be fully qualified
  1199. OUT -
  1200. parts absolute or relative may be nonzero (but not both).
  1201. host, anchor and access may be nonzero if they were specified.
  1202. Any which are nonzero point to zero terminated strings.
  1203. Returns
  1204. VOID
  1205. Details -
  1206. WARNING !! function munges the incoming buffer
  1207. ---*/
  1208. #define BreakUrl(s, p) BreakUrls(s, p, NULL, NULL, NULL, 0)
  1209. //
  1210. // **BreakUrls()**
  1211. // RETURNS
  1212. // S_OK if the two urls need to be blended
  1213. // S_FALSE if pszUrl is absolute, or there is no pszBase
  1214. // failure some sort of memory allocation error
  1215. //
  1216. PRIVATE HRESULT
  1217. BreakUrls(LPWSTR pszUrl, PURLPARTS parts, LPCWSTR pszBase, PSHSTRW pstrBase, PURLPARTS partsBase, DWORD dwFlags)
  1218. {
  1219. HRESULT hr = S_FALSE;
  1220. ASSERT(pszUrl && parts);
  1221. ZeroMemory(parts, SIZEOF(URLPARTS));
  1222. if(!*pszUrl)
  1223. parts->dwFlags |= UPF_SEG_EMPTYSEG;
  1224. //
  1225. // WARNING: this order is specific, according to the proposed standard
  1226. //
  1227. if(*pszUrl || pszBase)
  1228. {
  1229. BOOL fConvert;
  1230. BreakScheme(&pszUrl, parts);
  1231. BreakFragment(&pszUrl, parts);
  1232. BreakQuery(&pszUrl, parts);
  1233. //
  1234. // this is the first time that we need to access
  1235. // pszBase if it exists, so this is when we copy and parse
  1236. //
  1237. if (_ShouldBreakBase(parts, pszBase))
  1238. {
  1239. hr = CopyUrlForParse(pszBase, pstrBase, dwFlags);
  1240. // this will be some kind of memory error
  1241. if(FAILED(hr))
  1242. return hr;
  1243. // ASSERT(hr != S_FALSE);
  1244. BreakUrl(pstrBase->GetInplaceStr(), partsBase);
  1245. fConvert = (partsBase->dwFlags & UPF_SCHEME_CONVERT);
  1246. }
  1247. else
  1248. fConvert = (parts->dwFlags & UPF_SCHEME_CONVERT);
  1249. BreakServer(&pszUrl, parts, fConvert);
  1250. BreakPath(&pszUrl, parts);
  1251. }
  1252. return hr;
  1253. }
  1254. /*+++
  1255. BlendParts() & all dependant Blend* functions
  1256. Blends the parts structures into one, taking the relavent
  1257. bits from each one and dumping the unused data.
  1258. Parameters
  1259. IN -
  1260. partsUrl the primary or relative parts - Takes precedence
  1261. partsBase the base or referrers parts
  1262. OUT -
  1263. partsOut the combined result
  1264. Returns
  1265. VOID -
  1266. NOTE: this will frequently NULL out the entire partsBase.
  1267. ---*/
  1268. PRIVATE VOID
  1269. BlendScheme(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
  1270. {
  1271. if(partsUrl->pszScheme)
  1272. {
  1273. LPCWSTR pszScheme = partsOut->pszScheme = partsUrl->pszScheme;
  1274. URL_SCHEME eScheme = partsOut->eScheme = partsUrl->eScheme;
  1275. partsOut->dwFlags |= (partsUrl->dwFlags & UPF_SCHEME_MASK);
  1276. //
  1277. // this checks to make sure that these are the same scheme, and
  1278. // that the scheme is allowed to be used in relative URLs
  1279. // file: is not allowed to because of weirdness with drive letters
  1280. // and \\UNC\shares
  1281. //
  1282. if ((eScheme && (eScheme != partsBase->eScheme) || eScheme == URL_SCHEME_FILE) ||
  1283. (!partsBase->pszScheme) ||
  1284. (partsBase->pszScheme && StrCmpW(pszScheme, partsBase->pszScheme)))
  1285. {
  1286. // they are different schemes. DUMP partsBase.
  1287. ZeroMemory(partsBase, SIZEOF(URLPARTS));
  1288. }
  1289. }
  1290. else
  1291. {
  1292. partsOut->pszScheme = partsBase->pszScheme;
  1293. partsOut->eScheme = partsBase->eScheme;
  1294. partsOut->dwFlags |= (partsBase->dwFlags & UPF_SCHEME_MASK);
  1295. }
  1296. }
  1297. PRIVATE VOID
  1298. BlendServer(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
  1299. {
  1300. ASSERT(partsUrl && partsBase && partsOut);
  1301. //
  1302. // if we have different hosts then everything but the pszAccess is DUMPED
  1303. //
  1304. if(partsUrl->pszServer)
  1305. {
  1306. partsOut->pszServer = partsUrl->pszServer;
  1307. // NOTUSED partsOut->dwFlags |= (partsUrl->dwFlags & UPF_SERVER_MASK);
  1308. if ((partsBase->pszServer && StrCmpW(partsUrl->pszServer, partsBase->pszServer)))
  1309. {
  1310. // they are different Servers. DUMP partsBase.
  1311. ZeroMemory(partsBase, SIZEOF(URLPARTS));
  1312. }
  1313. }
  1314. else
  1315. {
  1316. partsOut->pszServer = partsBase->pszServer;
  1317. // NOTUSED partsOut->dwFlags |= (partsBase->dwFlags & UPF_SERVER_MASK);
  1318. }
  1319. }
  1320. PRIVATE VOID
  1321. BlendPath(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
  1322. {
  1323. ASSERT(partsUrl && partsBase && partsOut);
  1324. if (partsUrl->dwFlags & UPF_SEG_ABSOLUTE)
  1325. {
  1326. if((partsBase->dwFlags & UPF_SEG_LOCKFIRST) &&
  1327. !(partsUrl->dwFlags & UPF_SEG_LOCKFIRST))
  1328. {
  1329. // this keeps the drive letters when necessary
  1330. partsOut->pszSegments = partsBase->pszSegments;
  1331. partsOut->cSegments = 1; // only keep the first segment
  1332. partsOut->dwFlags |= (partsBase->dwFlags & UPF_SEG_MASK) ;
  1333. partsOut->pszExtraSegs = partsUrl->pszSegments;
  1334. partsOut->cExtraSegs = partsUrl->cSegments;
  1335. partsOut->dwFlags |= (partsUrl->dwFlags & UPF_EXSEG_MASK);
  1336. }
  1337. else
  1338. {
  1339. // just use the absolute path
  1340. partsOut->pszSegments = partsUrl->pszSegments;
  1341. partsOut->cSegments = partsUrl->cSegments;
  1342. partsOut->dwFlags |= (partsUrl->dwFlags & (UPF_SEG_MASK |UPF_EXSEG_MASK) );
  1343. }
  1344. ZeroMemory(partsBase, SIZEOF(URLPARTS));
  1345. }
  1346. else if ((partsBase->dwFlags & UPF_SEG_ABSOLUTE))
  1347. {
  1348. // Adopt path not name
  1349. partsOut->pszSegments = partsBase->pszSegments;
  1350. partsOut->cSegments = partsBase->cSegments;
  1351. partsOut->dwFlags |= (partsBase->dwFlags & UPF_SEG_MASK );
  1352. if(partsUrl->cSegments || partsUrl->dwFlags & UPF_SEG_EMPTYSEG)
  1353. {
  1354. //
  1355. // this a relative path that needs to be combined
  1356. //
  1357. partsOut->pszExtraSegs = partsUrl->pszSegments;
  1358. partsOut->cExtraSegs = partsUrl->cSegments;
  1359. partsOut->dwFlags |= (partsUrl->dwFlags & UPF_EXSEG_MASK );
  1360. if (!(partsBase->dwFlags & UPF_EXSEG_DIRECTORY))
  1361. {
  1362. //
  1363. // knock off the file name segment
  1364. // as long as the it isnt the first or the first is not locked
  1365. // or it isnt a dotdot. in the case of http://site/dir/, dir/ is
  1366. // not actually killed, only the NULL terminator following it is.
  1367. //
  1368. LPWSTR pszLast = LastLiveSegment(partsOut->pszSegments, partsOut->cSegments, partsOut->dwFlags & UPF_SEG_LOCKFIRST);
  1369. if(pszLast && !IsDotDot(pszLast))
  1370. {
  1371. if(partsUrl->dwFlags & UPF_SEG_EMPTYSEG)
  1372. partsOut->dwFlags |= UPF_EXSEG_DIRECTORY;
  1373. KILLSEG(pszLast);
  1374. }
  1375. }
  1376. }
  1377. else
  1378. partsOut->dwFlags |= (partsBase->dwFlags & UPF_EXSEG_MASK);
  1379. }
  1380. else if (partsUrl->cSegments)
  1381. {
  1382. partsOut->pszSegments = partsUrl->pszSegments;
  1383. partsOut->cSegments = partsUrl->cSegments;
  1384. partsOut->dwFlags |= (partsUrl->dwFlags & (UPF_SEG_MASK |UPF_EXSEG_MASK) );
  1385. }
  1386. else if (partsBase->cSegments)
  1387. {
  1388. partsOut->pszSegments = partsBase->pszSegments;
  1389. partsOut->cSegments = partsBase->cSegments;
  1390. partsOut->dwFlags |= (partsBase->dwFlags & (UPF_SEG_MASK |UPF_EXSEG_MASK) );
  1391. }
  1392. // regardless, we want to zero if we have relative segs
  1393. if (partsUrl->cSegments)
  1394. ZeroMemory(partsBase, SIZEOF(URLPARTS));
  1395. }
  1396. PRIVATE VOID
  1397. BlendQuery(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
  1398. {
  1399. if(partsUrl->pszQuery)
  1400. {
  1401. LPCWSTR pszQuery = partsOut->pszQuery = partsUrl->pszQuery;
  1402. // NOTUSED partsOut->dwFlags |= (partsUrl->dwFlags & UPF_Query_MASK);
  1403. if ((partsBase->pszQuery && StrCmpW(pszQuery, partsBase->pszQuery)))
  1404. {
  1405. // they are different Querys. DUMP partsBase.
  1406. ZeroMemory(partsBase, SIZEOF(URLPARTS));
  1407. }
  1408. }
  1409. else
  1410. {
  1411. partsOut->pszQuery = partsBase->pszQuery;
  1412. // NOTUSED partsOut->dwFlags |= (partsBase->dwFlags & UPF_Query_MASK);
  1413. }
  1414. }
  1415. PRIVATE VOID
  1416. BlendFragment(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
  1417. {
  1418. if(partsUrl->pszFragment || partsUrl->cSegments)
  1419. {
  1420. LPCWSTR pszFragment = partsOut->pszFragment = partsUrl->pszFragment;
  1421. // NOTUSED partsOut->dwFlags |= (partsUrl->dwFlags & UPF_Fragment_MASK);
  1422. if ((partsBase->pszFragment && StrCmpW(pszFragment, partsBase->pszFragment)))
  1423. {
  1424. // they are different Fragments. DUMP partsBase.
  1425. ZeroMemory(partsBase, SIZEOF(URLPARTS));
  1426. }
  1427. }
  1428. else
  1429. {
  1430. partsOut->pszFragment = partsBase->pszFragment;
  1431. // NOTUSED partsOut->dwFlags |= (partsBase->dwFlags & UPF_Fragment_MASK);
  1432. }
  1433. }
  1434. PRIVATE VOID
  1435. BlendParts(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
  1436. {
  1437. //
  1438. // partsUrl always takes priority over partsBase
  1439. //
  1440. ASSERT(partsUrl && partsBase && partsOut);
  1441. ZeroMemory(partsOut, SIZEOF(URLPARTS));
  1442. BlendScheme( partsUrl, partsBase, partsOut);
  1443. BlendServer( partsUrl, partsBase, partsOut);
  1444. BlendPath( partsUrl, partsBase, partsOut);
  1445. BlendQuery( partsUrl, partsBase, partsOut);
  1446. BlendFragment( partsUrl, partsBase, partsOut);
  1447. }
  1448. PRIVATE VOID
  1449. CanonServer(PURLPARTS parts)
  1450. {
  1451. //
  1452. // we only do stuff if this server is an internet style
  1453. // server. that way it uses FQDNs and IP port numbers
  1454. //
  1455. if (parts->pszServer && (parts->dwFlags & UPF_SCHEME_INTERNET))
  1456. {
  1457. LPWSTR pszName = StrRChrW(parts->pszServer, NULL, L'@');
  1458. if(!pszName)
  1459. pszName = parts->pszServer;
  1460. // this should just point to the FQDN:Port
  1461. CharLowerW(pszName);
  1462. //
  1463. // Ignore default port numbers, and trailing dots on FQDNs
  1464. // which will only cause identical adresses to look different
  1465. //
  1466. {
  1467. WCHAR *pch = StrChrW(pszName, COLON);
  1468. if (pch && parts->eScheme)
  1469. {
  1470. BOOL fIgnorePort = FALSE;
  1471. //
  1472. // FEATURE we should actually be getting this from
  1473. // the services file to find out the default protocol port
  1474. // but we dont think that most people will change them - zekel 17-Dec-96
  1475. //
  1476. switch(parts->eScheme)
  1477. {
  1478. case URL_SCHEME_HTTP:
  1479. if(StrCmpW(pch, L":80") == 0)
  1480. fIgnorePort = TRUE;
  1481. break;
  1482. case URL_SCHEME_FTP:
  1483. if(StrCmpW(pch, L":21") == 0)
  1484. fIgnorePort = TRUE;
  1485. break;
  1486. case URL_SCHEME_GOPHER:
  1487. if(StrCmpW(pch, L":70") == 0)
  1488. fIgnorePort = TRUE;
  1489. break;
  1490. case URL_SCHEME_HTTPS:
  1491. if(StrCmpW(pch, L":443") == 0)
  1492. fIgnorePort = TRUE;
  1493. break;
  1494. default:
  1495. break;
  1496. }
  1497. if(fIgnorePort)
  1498. TERMSTR(pch); // It is the default: ignore it
  1499. }
  1500. }
  1501. }
  1502. }
  1503. PRIVATE VOID
  1504. CanonCombineSegs(PURLPARTS parts)
  1505. {
  1506. ASSERT(parts);
  1507. ASSERT(parts->pszExtraSegs && parts->cExtraSegs);
  1508. LPWSTR pszLast = LastLiveSegment(parts->pszSegments, parts->cSegments, parts->dwFlags & UPF_SEG_LOCKFIRST);
  1509. LPWSTR pszExtra = parts->pszExtraSegs;
  1510. DWORD iExtra = 0;
  1511. DWORD cExtras = parts->cExtraSegs;
  1512. if(!IsLiveSegment(pszExtra))
  1513. pszExtra = NextLiveSegment(pszExtra, &iExtra, cExtras);
  1514. while(pszExtra && IsDotDot(pszExtra))
  1515. {
  1516. if (pszLast)
  1517. KILLSEG(pszLast);
  1518. KILLSEG(pszExtra);
  1519. pszLast = LastLiveSegment(parts->pszSegments, parts->cSegments, parts->dwFlags & UPF_SEG_LOCKFIRST);
  1520. pszExtra = NextLiveSegment(pszExtra, &iExtra, cExtras);
  1521. }
  1522. }
  1523. PRIVATE VOID
  1524. CanonSegments(LPWSTR pszSeg,
  1525. DWORD cSegs,
  1526. BOOL fLockFirst)
  1527. {
  1528. DWORD iSeg = 0;
  1529. LPWSTR pszLastSeg = NULL;
  1530. LPWSTR pszFirstSeg = pszSeg;
  1531. BOOL fLastIsFirst = TRUE;
  1532. BOOL fFirstSeg = TRUE;
  1533. ASSERT (pszSeg && cSegs);
  1534. pszSeg = FirstLiveSegment(pszSeg, &iSeg, cSegs);
  1535. while (pszSeg)
  1536. {
  1537. if(IsDot(pszSeg))
  1538. {
  1539. // if it is just a "." we can discard the segment
  1540. KILLSEG(pszSeg);
  1541. }
  1542. else if(IsDotDot(pszSeg))
  1543. {
  1544. // if it is ".." then we discard it and the last seg
  1545. //
  1546. // if we are at the first (root) or
  1547. // the last is the root and it is locked
  1548. // then we dont want to do anything
  1549. //
  1550. if(pszLastSeg && !IsDotDot(pszLastSeg) && !(fLastIsFirst && fLockFirst))
  1551. {
  1552. KILLSEG(pszLastSeg);
  1553. pszLastSeg = NULL;
  1554. KILLSEG(pszSeg);
  1555. }
  1556. }
  1557. if(IsLiveSegment(pszSeg))
  1558. {
  1559. if(!pszLastSeg && fFirstSeg)
  1560. fLastIsFirst = TRUE;
  1561. else
  1562. fLastIsFirst = FALSE;
  1563. pszLastSeg = pszSeg;
  1564. fFirstSeg = FALSE;
  1565. }
  1566. else
  1567. {
  1568. pszLastSeg = LastLiveSegment(pszFirstSeg, iSeg, fLockFirst);
  1569. }
  1570. pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs);
  1571. }
  1572. }
  1573. PRIVATE VOID
  1574. CanonPath(PURLPARTS parts)
  1575. {
  1576. ASSERT(parts);
  1577. if(parts->cSegments)
  1578. CanonSegments(parts->pszSegments, parts->cSegments, (parts->dwFlags & UPF_SEG_LOCKFIRST));
  1579. if(parts->cExtraSegs)
  1580. CanonSegments(parts->pszExtraSegs, parts->cExtraSegs, FALSE);
  1581. if(parts->cExtraSegs)
  1582. CanonCombineSegs(parts);
  1583. }
  1584. PRIVATE VOID
  1585. CanonParts(PURLPARTS parts)
  1586. {
  1587. ASSERT(parts);
  1588. //CanonScheme(parts);
  1589. CanonServer(parts);
  1590. CanonPath(parts);
  1591. //CanonQuery(parts);
  1592. //CanonFragment(parts);
  1593. }
  1594. PRIVATE HRESULT
  1595. BuildScheme(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
  1596. {
  1597. HRESULT hr = S_OK;
  1598. ASSERT(parts && pstr);
  1599. if(parts->pszScheme)
  1600. {
  1601. hr = pstr->Append(parts->pszScheme);
  1602. if(SUCCEEDED(hr))
  1603. hr = pstr->Append(COLON);
  1604. }
  1605. return hr;
  1606. }
  1607. PRIVATE HRESULT
  1608. BuildServer(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
  1609. {
  1610. HRESULT hr = S_OK;
  1611. ASSERT(parts && pstr);
  1612. switch(parts->eScheme)
  1613. {
  1614. case URL_SCHEME_MK:
  1615. // CraigC's "mk:" has no // but acts like it does
  1616. break;
  1617. case URL_SCHEME_FILE:
  1618. if ((dwFlags & URL_WININET_COMPATIBILITY) || (dwFlags & URL_FILE_USE_PATHURL))
  1619. {
  1620. if(parts->pszServer && *parts->pszServer)
  1621. hr = pstr->Append(L"////");
  1622. else if (parts->pszSegments && IsDrive(parts->pszSegments))
  1623. hr = pstr->Append(SLASH);
  1624. else if (parts->dwFlags & UPF_SEG_ABSOLUTE)
  1625. hr = pstr->Append(L"//");
  1626. }
  1627. else if (parts->dwFlags & UPF_SEG_ABSOLUTE)
  1628. hr = pstr->Append(L"//");
  1629. break;
  1630. default:
  1631. if(parts->pszServer && SUCCEEDED(hr))
  1632. hr = pstr->Append(L"//");
  1633. break;
  1634. }
  1635. if(parts->pszServer && SUCCEEDED(hr))
  1636. hr = pstr->Append(parts->pszServer);
  1637. return hr;
  1638. }
  1639. PRIVATE HRESULT
  1640. BuildSegments(LPWSTR pszSeg, DWORD cSegs, PSHSTRW pstr, BOOL fRoot, BOOL *pfSlashLast)
  1641. {
  1642. DWORD iSeg = 0;
  1643. HRESULT hr = S_FALSE;
  1644. *pfSlashLast = FALSE;
  1645. ASSERT(pszSeg && pstr);
  1646. pszSeg = FirstLiveSegment(pszSeg, &iSeg, cSegs);
  1647. if(!fRoot && pszSeg)
  1648. {
  1649. hr = pstr->Append(pszSeg);
  1650. if(SUCCEEDED(hr))
  1651. pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs);
  1652. else
  1653. pszSeg = NULL;
  1654. }
  1655. while (pszSeg)
  1656. {
  1657. hr = pstr->Append(SLASH);
  1658. if(SUCCEEDED(hr) && *pszSeg)
  1659. {
  1660. hr = pstr->Append(pszSeg);
  1661. *pfSlashLast = FALSE;
  1662. }
  1663. else
  1664. *pfSlashLast = TRUE;
  1665. if(SUCCEEDED(hr))
  1666. pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs);
  1667. else
  1668. break;
  1669. }
  1670. return hr;
  1671. }
  1672. PRIVATE HRESULT
  1673. BuildPath(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
  1674. {
  1675. HRESULT hr = S_OK;
  1676. BOOL fSlashLast = FALSE;
  1677. DWORD iSeg;
  1678. LPWSTR pszSegFirst = NULL;
  1679. ASSERT(parts && pstr);
  1680. if(parts->cSegments)
  1681. {
  1682. hr = BuildSegments(parts->pszSegments, parts->cSegments, pstr, parts->dwFlags & UPF_SEG_ABSOLUTE, &fSlashLast);
  1683. if (fSlashLast)
  1684. pstr->Append(SLASH);
  1685. }
  1686. if(SUCCEEDED(hr) && parts->cExtraSegs)
  1687. {
  1688. BOOL f = fSlashLast;
  1689. hr = BuildSegments(parts->pszExtraSegs, parts->cExtraSegs, pstr, !fSlashLast, &fSlashLast);
  1690. if (fSlashLast)
  1691. pstr->Append(SLASH);
  1692. if (hr == S_FALSE)
  1693. fSlashLast = f;
  1694. }
  1695. // trailing slash on a server name for IIS
  1696. if( !fSlashLast &&
  1697. (
  1698. (parts->dwFlags & UPF_EXSEG_DIRECTORY) ||
  1699. // if this is just a server name by itself
  1700. (!FirstLiveSegment(parts->pszSegments, &iSeg, parts->cSegments) &&
  1701. !FirstLiveSegment(parts->pszExtraSegs, &iSeg, parts->cExtraSegs) &&
  1702. parts->dwFlags & UPF_SEG_ABSOLUTE)
  1703. )
  1704. )
  1705. {
  1706. hr = pstr->Append(SLASH);
  1707. }
  1708. return hr;
  1709. }
  1710. PRIVATE HRESULT
  1711. BuildQuery(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
  1712. {
  1713. HRESULT hr = S_OK;
  1714. ASSERT(parts && pstr);
  1715. if(parts->pszQuery)
  1716. {
  1717. hr = pstr->Append(QUERY);
  1718. if(SUCCEEDED(hr))
  1719. hr = pstr->Append(parts->pszQuery);
  1720. }
  1721. return hr;
  1722. }
  1723. PRIVATE HRESULT
  1724. BuildFragment(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
  1725. {
  1726. HRESULT hr = S_OK;
  1727. ASSERT(parts && pstr);
  1728. if(parts->pszFragment)
  1729. {
  1730. hr = pstr->Append(POUND);
  1731. if(SUCCEEDED(hr))
  1732. hr = pstr->Append(parts->pszFragment);
  1733. }
  1734. return hr;
  1735. }
  1736. PRIVATE HRESULT
  1737. BuildUrl(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
  1738. {
  1739. HRESULT hr;
  1740. ASSERT(parts && pstr);
  1741. if(
  1742. (SUCCEEDED(hr = BuildScheme(parts, dwFlags, pstr))) &&
  1743. (SUCCEEDED(hr = BuildServer(parts, dwFlags, pstr))) &&
  1744. (SUCCEEDED(hr = BuildPath(parts, dwFlags, pstr))) &&
  1745. (SUCCEEDED(hr = BuildQuery(parts, dwFlags, pstr)))
  1746. )
  1747. hr = BuildFragment(parts, dwFlags, pstr);
  1748. return hr;
  1749. }
  1750. /*+++
  1751. SHUrlEscape()
  1752. Escapes an URL
  1753. right now, i am only escaping stuff in the Path part of the URL
  1754. Parameters
  1755. IN -
  1756. pszUrl URL to examine
  1757. pstrOut SHSTR destination
  1758. dwFlags the relevant URL_* flags,
  1759. Returns
  1760. HRESULT -
  1761. SUCCESS S_OK
  1762. ERROR only E_OUTOFMEMORY
  1763. Helper Routines
  1764. Escape*(part) each part gets its own escape routine (ie EscapeScheme)
  1765. EscapeSpaces will only escape spaces (WININET compatibility mostly)
  1766. EscapeSegmentsGetNeededSize gets the required size of destination buffer for all path segments
  1767. EscapeLiveSegment does the work of escaping each path segment
  1768. ---*/
  1769. PRIVATE HRESULT
  1770. EscapeSpaces(LPCWSTR psz, PSHSTRW pstr, DWORD dwFlags)
  1771. {
  1772. HRESULT hr = S_OK;
  1773. LPCWSTR pch;
  1774. DWORD cSpaces = 0;
  1775. ASSERT(psz && pstr);
  1776. pstr->Reset();
  1777. for (pch = psz; *pch; pch++)
  1778. {
  1779. if (*pch == SPC)
  1780. cSpaces++;
  1781. }
  1782. if(cSpaces)
  1783. {
  1784. hr = pstr->SetSize(lstrlenW(psz) + cSpaces * 2 + 1);
  1785. if(SUCCEEDED(hr))
  1786. {
  1787. int cchRemaing = pstr->GetSize();
  1788. LPWSTR pchOut = pstr->GetInplaceStr();
  1789. for (pch = psz; *pch; pch++)
  1790. {
  1791. if ((*pch == POUND || *pch == QUERY) && (dwFlags & URL_DONT_ESCAPE_EXTRA_INFO))
  1792. {
  1793. int cchCopied;
  1794. StringCchCopyW(pchOut, cchRemaing, pch);
  1795. cchCopied = lstrlenW(pchOut);
  1796. pchOut += cchCopied;
  1797. cchRemaing -= cchCopied;
  1798. break;
  1799. }
  1800. if (*pch == SPC)
  1801. {
  1802. *pchOut++ = HEX_ESCAPE;
  1803. *pchOut++ = L'2';
  1804. *pchOut++ = L'0';
  1805. cchRemaing -= 3;
  1806. }
  1807. else
  1808. {
  1809. *pchOut++ = *pch;
  1810. cchRemaing--;
  1811. }
  1812. ASSERT(cchRemaing >= 0);
  1813. }
  1814. TERMSTR(pchOut);
  1815. }
  1816. }
  1817. else
  1818. {
  1819. hr = pstr->SetStr(psz);
  1820. }
  1821. return hr;
  1822. }
  1823. inline PRIVATE HRESULT
  1824. EscapeScheme(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
  1825. {
  1826. ASSERT(partsUrl && partsOut);
  1827. partsOut->pszScheme = partsUrl->pszScheme;
  1828. partsOut->eScheme = partsUrl->eScheme;
  1829. return S_OK;
  1830. }
  1831. inline PRIVATE HRESULT
  1832. EscapeServer(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
  1833. {
  1834. ASSERT(partsUrl && partsOut);
  1835. partsOut->pszServer = partsUrl->pszServer;
  1836. return S_OK;
  1837. }
  1838. inline PRIVATE HRESULT
  1839. EscapeQuery(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
  1840. {
  1841. ASSERT(partsUrl && partsOut);
  1842. partsOut->pszQuery = partsUrl->pszQuery;
  1843. return S_OK;
  1844. }
  1845. inline PRIVATE HRESULT
  1846. EscapeFragment(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
  1847. {
  1848. ASSERT(partsUrl && partsOut);
  1849. partsOut->pszFragment = partsUrl->pszFragment;
  1850. return S_OK;
  1851. }
  1852. PRIVATE BOOL
  1853. GetEscapeStringSize(LPWSTR psz, DWORD dwFlags, LPDWORD pcch)
  1854. {
  1855. BOOL fResize = FALSE;
  1856. ASSERT(psz);
  1857. ASSERT(pcch);
  1858. for (*pcch = 0; *psz; psz++)
  1859. {
  1860. (*pcch)++;
  1861. if(!IsSafePathChar(*psz) ||
  1862. ((dwFlags & URL_ESCAPE_PERCENT) && (*psz == HEX_ESCAPE)))
  1863. {
  1864. fResize = TRUE;
  1865. *pcch += 2;
  1866. }
  1867. }
  1868. // for the NULL term
  1869. (*pcch)++;
  1870. return fResize;
  1871. }
  1872. PRIVATE DWORD
  1873. EscapeSegmentsGetNeededSize(LPWSTR pszSegments, DWORD cSegs, DWORD dwFlags)
  1874. {
  1875. DWORD cchNeeded = 0;
  1876. BOOL fResize = FALSE;
  1877. LPWSTR pszSeg;
  1878. DWORD iSeg;
  1879. ASSERT(pszSegments && cSegs);
  1880. pszSeg = FirstLiveSegment(pszSegments, &iSeg, cSegs);
  1881. while (IsLiveSegment(pszSeg))
  1882. {
  1883. DWORD cch;
  1884. if(GetEscapeStringSize(pszSeg, dwFlags, &cch))
  1885. fResize = TRUE;
  1886. cchNeeded += cch;
  1887. pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs);
  1888. }
  1889. return fResize ? cchNeeded : 0;
  1890. }
  1891. PRIVATE VOID
  1892. EscapeString(LPCWSTR pszSeg, DWORD dwFlags, LPWSTR *ppchOut)
  1893. {
  1894. LPWSTR pchIn; // This pointer has been trusted to not modify it's contents, just iterate.
  1895. LPWSTR pchOut = *ppchOut;
  1896. WCHAR ch;
  1897. for (pchIn = (LPWSTR)pszSeg; *pchIn; pchIn++)
  1898. {
  1899. ch = *pchIn;
  1900. if (!IsSafePathChar(ch) ||
  1901. ((dwFlags & URL_ESCAPE_PERCENT) && (ch == HEX_ESCAPE)))
  1902. {
  1903. *pchOut++ = HEX_ESCAPE;
  1904. *pchOut++ = hex[(ch >> 4) & 15];
  1905. *pchOut++ = hex[ch & 15];
  1906. }
  1907. else
  1908. *pchOut++ = *pchIn;
  1909. }
  1910. TERMSTR(pchOut);
  1911. // move past the terminator
  1912. pchOut++;
  1913. *ppchOut = pchOut;
  1914. }
  1915. PRIVATE HRESULT
  1916. EscapeSegments(LPWSTR pszSegments, DWORD cSegs, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
  1917. {
  1918. DWORD cchNeeded;
  1919. HRESULT hr = S_OK;
  1920. ASSERT(pszSegments && cSegs && partsOut && pstr);
  1921. cchNeeded = EscapeSegmentsGetNeededSize(pszSegments, cSegs, dwFlags);
  1922. if(cchNeeded)
  1923. {
  1924. ASSERT(pstr);
  1925. hr = pstr->SetSize(cchNeeded);
  1926. if(SUCCEEDED(hr))
  1927. {
  1928. LPWSTR pchOut = pstr->GetInplaceStr();
  1929. LPWSTR pszSeg;
  1930. DWORD iSeg;
  1931. partsOut->pszSegments = pchOut;
  1932. partsOut->cSegments = 0;
  1933. pszSeg = FirstLiveSegment(pszSegments, &iSeg, cSegs);
  1934. while (IsLiveSegment(pszSeg))
  1935. {
  1936. EscapeString(pszSeg, dwFlags, &pchOut);
  1937. partsOut->cSegments++;
  1938. pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs);
  1939. }
  1940. }
  1941. }
  1942. else
  1943. {
  1944. partsOut->cSegments = cSegs;
  1945. partsOut->pszSegments = pszSegments;
  1946. }
  1947. return hr;
  1948. }
  1949. PRIVATE HRESULT
  1950. EscapePath(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
  1951. {
  1952. HRESULT hr = S_OK;
  1953. ASSERT(partsUrl && partsOut && pstr);
  1954. if(partsUrl->cSegments)
  1955. {
  1956. hr = EscapeSegments(partsUrl->pszSegments, partsUrl->cSegments, dwFlags, partsOut, pstr);
  1957. }
  1958. else
  1959. {
  1960. partsOut->cSegments = 0;
  1961. partsOut->pszSegments = NULL;
  1962. }
  1963. return hr;
  1964. }
  1965. HRESULT
  1966. SHUrlEscape (LPCWSTR pszUrl,
  1967. PSHSTRW pstrOut,
  1968. DWORD dwFlags)
  1969. {
  1970. #ifdef TESTING_SPACES_ONLY
  1971. return EscapeSpaces(pszUrl, pstrOut, dwFlags);
  1972. #else //TESTING_SPACES_ONLY
  1973. SHSTRW strUrl;
  1974. HRESULT hr;
  1975. ASSERT(pszUrl && pstrOut);
  1976. if(!pszUrl || !pstrOut)
  1977. return E_INVALIDARG;
  1978. //
  1979. // EscapeSpaces is remarkably poor,
  1980. // but so is this kind of functionality...
  1981. // it doesnt do any kind of real parsing, it
  1982. // only looks for spaces and escapes them...
  1983. //
  1984. if(dwFlags & URL_ESCAPE_SPACES_ONLY)
  1985. return EscapeSpaces(pszUrl, pstrOut, dwFlags);
  1986. // We are just passed a segment so we only want to
  1987. // escape that and nothing else. Don't look for
  1988. // URL pieces.
  1989. if(dwFlags & URL_ESCAPE_SEGMENT_ONLY)
  1990. {
  1991. URLPARTS partsOut;
  1992. SHSTRW strTemp;
  1993. EscapeSegments((LPWSTR)pszUrl, 1, dwFlags, &partsOut, &strTemp);
  1994. pstrOut->SetStr(partsOut.pszSegments);
  1995. return S_OK;
  1996. }
  1997. pstrOut->Reset();
  1998. hr = strUrl.SetStr(pszUrl);
  1999. if(SUCCEEDED(hr))
  2000. {
  2001. URLPARTS partsUrl, partsOut;
  2002. SHSTRW strPath;
  2003. BreakUrl(strUrl.GetInplaceStr(), &partsUrl);
  2004. ZeroMemory(&partsOut, SIZEOF(URLPARTS));
  2005. //
  2006. // NOTE the only function here that is really active right now is the EscapePath
  2007. // if some other part needs to be escaped, then add a new SHSTR in the 4th param
  2008. // and change the appropriate subroutine
  2009. //
  2010. if(
  2011. (SUCCEEDED(hr = EscapeScheme(&partsUrl, dwFlags, &partsOut, NULL)))
  2012. && (SUCCEEDED(hr = EscapeServer(&partsUrl, dwFlags, &partsOut, NULL)))
  2013. && (SUCCEEDED(hr = EscapePath(&partsUrl, dwFlags, &partsOut, &strPath)))
  2014. && (SUCCEEDED(hr = EscapeQuery(&partsUrl, dwFlags, &partsOut, NULL)))
  2015. && (SUCCEEDED(hr = EscapeFragment(&partsUrl, dwFlags, &partsOut, NULL)))
  2016. )
  2017. {
  2018. partsOut.dwFlags = partsUrl.dwFlags;
  2019. hr = BuildUrl(&partsOut, dwFlags, pstrOut);
  2020. }
  2021. }
  2022. else
  2023. hr = E_OUTOFMEMORY;
  2024. return hr;
  2025. #endif //TESTING_SPACES_ONLY
  2026. }
  2027. /*+++
  2028. SHUrlUnescape()
  2029. Unescapes a string in place. this is ok because
  2030. it should never grow
  2031. Parameters
  2032. IN -
  2033. psz string to unescape inplace
  2034. dwFlags the relevant URL_* flags,
  2035. Returns
  2036. HRESULT -
  2037. SUCCESS S_OK
  2038. ERROR DOESNT error right now
  2039. Helper Routines
  2040. HexToWord takes a hexdigit and returns WORD with the right number or -1
  2041. IsEscapedChar looks at a ptr for "%XX" where X is a hexdigit
  2042. TranslateEscapedChar translates "%XX" to an 8 bit char
  2043. ---*/
  2044. PRIVATE WORD
  2045. HexToWord(WCHAR ch)
  2046. {
  2047. if(ch >= TEXT('0') && ch <= TEXT('9'))
  2048. return (WORD) ch - TEXT('0');
  2049. if(ch >= TEXT('A') && ch <= TEXT('F'))
  2050. return (WORD) ch - TEXT('A') + 10;
  2051. if(ch >= TEXT('a') && ch <= TEXT('f'))
  2052. return (WORD) ch - TEXT('a') + 10;
  2053. ASSERT(FALSE); //we have tried to use a non-hex number
  2054. return (WORD) -1;
  2055. }
  2056. PRIVATE BOOL inline
  2057. IsEscapedOctetW(LPCWSTR pch)
  2058. {
  2059. return (pch[0] == HEX_ESCAPE && IsHex(pch[1]) && IsHex(pch[2])) ? TRUE : FALSE;
  2060. }
  2061. PRIVATE BOOL inline
  2062. IsEscapedOctetA(LPCSTR pch)
  2063. {
  2064. return (pch[0] == HEX_ESCAPE_A && IsHex((WCHAR)pch[1]) && IsHex((WCHAR)pch[2])) ? TRUE : FALSE;
  2065. }
  2066. PRIVATE WCHAR
  2067. TranslateEscapedOctetW(LPCWSTR pch)
  2068. {
  2069. WCHAR ch;
  2070. ASSERT(IsEscapedOctetW(pch));
  2071. pch++;
  2072. ch = (WCHAR) HexToWord(*pch++) * 16; // hi nibble
  2073. ch += HexToWord(*pch); // lo nibble
  2074. return ch;
  2075. }
  2076. PRIVATE CHAR
  2077. TranslateEscapedOctetA(LPCSTR pch)
  2078. {
  2079. CHAR ch;
  2080. ASSERT(IsEscapedOctetA(pch));
  2081. pch++;
  2082. ch = (CHAR) HexToWord(*pch++) * 16; // hi nibble
  2083. ch += HexToWord(*pch); // lo nibble
  2084. return ch;
  2085. }
  2086. HRESULT SHUrlUnescapeA(LPSTR psz, DWORD dwFlags)
  2087. {
  2088. CHAR *pchSrc = psz;
  2089. CHAR *pchDst = psz;
  2090. while (*pchSrc)
  2091. {
  2092. if ((*pchSrc == POUND || *pchSrc == QUERY) && (dwFlags & URL_DONT_ESCAPE_EXTRA_INFO))
  2093. {
  2094. while (*pchDst++ = *pchSrc++) {};
  2095. break;
  2096. }
  2097. if (IsEscapedOctetA(pchSrc))
  2098. {
  2099. CHAR ch = TranslateEscapedOctetA(pchSrc);
  2100. *pchDst++ = ch;
  2101. pchSrc += 3; // enuff for "%XX"
  2102. }
  2103. else
  2104. {
  2105. *pchDst++ = *pchSrc++;
  2106. }
  2107. }
  2108. TERMSTR(pchDst);
  2109. return S_OK;
  2110. }
  2111. HRESULT SHUrlUnescapeW(LPWSTR psz, DWORD dwFlags)
  2112. {
  2113. WCHAR *pchSrc = psz;
  2114. WCHAR *pchDst = psz;
  2115. while (*pchSrc)
  2116. {
  2117. if ((*pchSrc == POUND || *pchSrc == QUERY) && (dwFlags & URL_DONT_ESCAPE_EXTRA_INFO))
  2118. {
  2119. while (*pchDst++ = *pchSrc++) {};
  2120. break;
  2121. }
  2122. if (IsEscapedOctetW(pchSrc))
  2123. {
  2124. WCHAR ch = TranslateEscapedOctetW(pchSrc);
  2125. *pchDst++ = ch;
  2126. pchSrc += 3; // enuff for "%XX"
  2127. }
  2128. else
  2129. {
  2130. *pchDst++ = *pchSrc++;
  2131. }
  2132. }
  2133. TERMSTR(pchDst);
  2134. return S_OK;
  2135. }
  2136. PRIVATE HRESULT
  2137. BuildDosPath(PURLPARTS parts, PSHSTRW pstrOut, DWORD dwFlags)
  2138. {
  2139. HRESULT hr;
  2140. // this will disable a preceding slash when there is a drive
  2141. if(parts->pszSegments && IsDrive(parts->pszSegments))
  2142. parts->dwFlags = (parts->dwFlags & ~UPF_SEG_ABSOLUTE);
  2143. // if there is a zero length server then
  2144. // we skip building it
  2145. if(parts->pszServer && !*parts->pszServer)
  2146. parts->pszServer = NULL;
  2147. // this prevents all the special file goo checking
  2148. parts->eScheme = URL_SCHEME_UNKNOWN;
  2149. //
  2150. // then go ahead and put the path together
  2151. if( (SUCCEEDED(hr = BuildServer(parts, dwFlags, pstrOut))) &&
  2152. (!parts->cSegments || SUCCEEDED(hr = BuildPath(parts, dwFlags, pstrOut)))
  2153. )
  2154. {
  2155. // then decode it cuz paths arent escaped
  2156. if (IsFlagSet(dwFlags, URL_FILE_USE_PATHURL))
  2157. WininetFixFileSlashes(pstrOut->GetInplaceStr());
  2158. else
  2159. #ifndef UNIX
  2160. ConvertChar(pstrOut->GetInplaceStr(), SLASH, WHACK, TRUE);
  2161. #else
  2162. ConvertChar(pstrOut->GetInplaceStr(), WHACK, SLASH, TRUE);
  2163. #endif
  2164. if(IsFlagClear(parts->dwFlags, UPF_FILEISPATHURL))
  2165. SHUrlUnescapeW(pstrOut->GetInplaceStr(), dwFlags);
  2166. if(IsDriveUrl(*pstrOut))
  2167. {
  2168. LPWSTR pszTemp = pstrOut->GetInplaceStr();
  2169. pszTemp[1] = COLON;
  2170. }
  2171. }
  2172. return hr;
  2173. }
  2174. HRESULT
  2175. SHPathCreateFromUrl(LPCWSTR pszUrl, PSHSTRW pstrOut, DWORD dwFlags)
  2176. {
  2177. HRESULT hr;
  2178. SHSTRW strUrl;
  2179. ASSERT(pszUrl && pstrOut);
  2180. pstrOut->Reset();
  2181. hr = strUrl.SetStr(pszUrl);
  2182. if(SUCCEEDED(hr))
  2183. {
  2184. URLPARTS partsUrl;
  2185. // first we need to break it open
  2186. BreakUrl(strUrl.GetInplaceStr(), &partsUrl);
  2187. // then we make sure it is a file:
  2188. if(partsUrl.eScheme == URL_SCHEME_FILE)
  2189. {
  2190. hr = BuildDosPath(&partsUrl, pstrOut, dwFlags);
  2191. }
  2192. else
  2193. hr = E_INVALIDARG;
  2194. }
  2195. return hr;
  2196. }
  2197. HRESULT
  2198. SHUrlCreateFromPath(LPCWSTR pszPath, PSHSTRW pstrOut, DWORD dwFlags)
  2199. {
  2200. HRESULT hr;
  2201. SHSTRW strPath;
  2202. ASSERT(pszPath && pstrOut);
  2203. if(PathIsURLW(pszPath))
  2204. {
  2205. if(SUCCEEDED(hr = pstrOut->SetStr(pszPath)))
  2206. return S_FALSE;
  2207. else
  2208. return hr;
  2209. }
  2210. pstrOut->Reset();
  2211. hr = strPath.SetStr(pszPath);
  2212. TrimAndStripInsignificantWhite(strPath.GetInplaceStr());
  2213. if(SUCCEEDED(hr))
  2214. {
  2215. URLPARTS partsIn, partsOut;
  2216. SHSTRW strEscapedPath, strEscapedServer;
  2217. LPWSTR pch = strPath.GetInplaceStr();
  2218. ZeroMemory(&partsIn, SIZEOF(URLPARTS));
  2219. partsIn.pszScheme = (LPWSTR)c_szFileScheme;
  2220. partsIn.eScheme = URL_SCHEME_FILE;
  2221. partsIn.dwFlags = UPF_SCHEME_CONVERT;
  2222. // first break the path
  2223. BreakFragment(&pch, &partsIn);
  2224. BreakServer(&pch, &partsIn, TRUE);
  2225. BreakPath(&pch, &partsIn);
  2226. partsOut = partsIn;
  2227. // then escape the path if we arent using path URLs
  2228. if (IsFlagClear(dwFlags, URL_FILE_USE_PATHURL))
  2229. {
  2230. hr = EscapePath(&partsIn, dwFlags | URL_ESCAPE_PERCENT, &partsOut, &strEscapedPath);
  2231. if(SUCCEEDED(hr) && partsOut.pszServer)
  2232. {
  2233. //
  2234. // i am treating the pszServer exactly like a path segment
  2235. //
  2236. DWORD cchNeeded;
  2237. if(GetEscapeStringSize(partsOut.pszServer, dwFlags | URL_ESCAPE_PERCENT, &cchNeeded) &&
  2238. SUCCEEDED(hr = strEscapedServer.SetSize(cchNeeded)))
  2239. {
  2240. pch = strEscapedServer.GetInplaceStr();
  2241. EscapeString(partsOut.pszServer, dwFlags | URL_ESCAPE_PERCENT, &pch);
  2242. partsOut.pszServer = strEscapedServer.GetInplaceStr();
  2243. }
  2244. }
  2245. }
  2246. if(!partsOut.pszServer && IsFlagSet(partsOut.dwFlags, UPF_SEG_ABSOLUTE))
  2247. partsOut.pszServer = L"";
  2248. // then build the URL
  2249. if(SUCCEEDED(hr))
  2250. {
  2251. if(URL_SCHEME_FILE == partsOut.eScheme && IsFlagSet(dwFlags, URL_FILE_USE_PATHURL))
  2252. {
  2253. if (SUCCEEDED(hr = pstrOut->SetStr(c_szFileSchemeString)))
  2254. hr = BuildDosPath(&partsOut, pstrOut, dwFlags);
  2255. }
  2256. else
  2257. hr = BuildUrl(&partsOut, dwFlags, pstrOut);
  2258. }
  2259. if (SUCCEEDED(hr) && (IsFlagSet(dwFlags, URL_WININET_COMPATIBILITY)))
  2260. WininetFixFileSlashes(pstrOut->GetInplaceStr());
  2261. }
  2262. return hr;
  2263. }
  2264. /*+++
  2265. SHUrlParse()
  2266. Canonicalize an URL
  2267. or Combine and Canonicalize two URLs
  2268. Parameters
  2269. IN -
  2270. pszBase the base or referring URL, may be NULL
  2271. pszUrl the relative URL
  2272. dwFlags the relevant URL_* flags,
  2273. Returns
  2274. HRESULT -
  2275. SUCCESS S_OK
  2276. ERROR appropriate error, usually just E_OUTOFMEMORY;
  2277. NOTE: pszUrl will always take precedence over pszBase.
  2278. ---*/
  2279. HRESULT SHUrlParse(LPCWSTR pszBase, LPCWSTR pszUrl, PSHSTRW pstrOut, DWORD dwFlags)
  2280. {
  2281. HRESULT hr = S_OK;
  2282. URLPARTS partsUrl, partsOut, partsBase;
  2283. SHSTRW strBase;
  2284. SHSTRW strUrl;
  2285. ASSERT(pszUrl);
  2286. ASSERT(pstrOut);
  2287. TraceMsgW(TF_URL | TF_FUNC, "entering SHUrlParse(%s, %s, 0x%X", pszBase,pszUrl ? pszUrl : L"NULL", dwFlags);
  2288. pstrOut->Reset();
  2289. //
  2290. // Don't bother parsing if all we have in an inter-page link as the
  2291. // pszUrl and no pszBase to parse
  2292. //
  2293. if (pszUrl[0] == POUND && (!pszBase || !*pszBase))
  2294. {
  2295. hr = pstrOut->SetStr(pszUrl);
  2296. goto quit;
  2297. }
  2298. //
  2299. // for Perf reasons we want to parse the relative url first.
  2300. // if it is an absolute URL, we need never look at the base.
  2301. //
  2302. hr = CopyUrlForParse(pszUrl, &strUrl, dwFlags);
  2303. if(FAILED(hr))
  2304. goto quit;
  2305. // -- Cybersitter compat ----
  2306. // Some bug fix broke the original parser. No time to go back and
  2307. // fix it, but since we know what to expect, we'll return this straight instead.
  2308. // Basically, when we canonicalize ://, we produce :///
  2309. if (!StrCmpW(strUrl, L"://"))
  2310. {
  2311. hr = pstrOut->SetStr(L":///");
  2312. goto quit;
  2313. }
  2314. //
  2315. // BreakUrls will decide if it is necessary to look at the relative
  2316. //
  2317. hr = BreakUrls(strUrl.GetInplaceStr(), &partsUrl, pszBase, &strBase, &partsBase, dwFlags);
  2318. if(FAILED(hr))
  2319. goto quit;
  2320. if(S_OK == hr) {
  2321. //
  2322. // this is where the real combination logic happens
  2323. // this first parts is the one that takes precedence
  2324. //
  2325. BlendParts(&partsUrl, &partsBase, &partsOut);
  2326. }
  2327. else
  2328. partsOut = partsUrl;
  2329. //
  2330. // we will now do the work of putting it together
  2331. // if these fail, it is because we are out of memory.
  2332. //
  2333. if (!(dwFlags & URL_DONT_SIMPLIFY))
  2334. CanonParts(&partsOut);
  2335. if(URL_SCHEME_FILE == partsOut.eScheme && IsFlagSet(dwFlags, URL_FILE_USE_PATHURL))
  2336. {
  2337. if (SUCCEEDED(hr = pstrOut->SetStr(c_szFileSchemeString)))
  2338. hr = BuildDosPath(&partsOut, pstrOut, dwFlags);
  2339. }
  2340. else
  2341. hr = BuildUrl(&partsOut, dwFlags, pstrOut);
  2342. if(SUCCEEDED(hr))
  2343. {
  2344. if (dwFlags & URL_UNESCAPE)
  2345. SHUrlUnescapeW(pstrOut->GetInplaceStr(), dwFlags);
  2346. if (dwFlags & URL_ESCAPE_SPACES_ONLY || dwFlags & URL_ESCAPE_UNSAFE)
  2347. {
  2348. //
  2349. // we are going to reuse strUrl here
  2350. //
  2351. hr = strUrl.SetStr(*pstrOut);
  2352. if(SUCCEEDED(hr))
  2353. hr = SHUrlEscape(strUrl, pstrOut, dwFlags);
  2354. }
  2355. }
  2356. if (SUCCEEDED(hr) &&
  2357. (IsFlagSet(dwFlags, URL_WININET_COMPATIBILITY)) &&
  2358. (partsOut.eScheme == URL_SCHEME_FILE))
  2359. WininetFixFileSlashes(pstrOut->GetInplaceStr());
  2360. quit:
  2361. if(FAILED(hr))
  2362. {
  2363. pstrOut->Reset();
  2364. TraceMsg(TF_URL | TF_FUNC, TEXT("FAILED SHUrlParse() hr = 0x%X\n"), hr);
  2365. }
  2366. else
  2367. TraceMsgW(TF_URL | TF_FUNC, "SUCCEEDED SHUrlParse() %s\n", (LPCWSTR)*pstrOut);
  2368. return hr;
  2369. }
  2370. typedef struct _LOGON {
  2371. LPWSTR pszUser;
  2372. LPWSTR pszPass;
  2373. LPWSTR pszHost;
  2374. LPWSTR pszPort;
  2375. } LOGON, *PLOGON;
  2376. PRIVATE void
  2377. BreakLogon(LPWSTR psz, PLOGON plo)
  2378. {
  2379. ASSERT(psz);
  2380. ASSERT(plo);
  2381. WCHAR *pch = StrChrW(psz, L'@');
  2382. if(pch)
  2383. {
  2384. TERMSTR(pch);
  2385. plo->pszHost = pch + 1;
  2386. plo->pszUser = psz;
  2387. pch = StrChrW(psz, COLON);
  2388. if (pch)
  2389. {
  2390. TERMSTR(pch);
  2391. plo->pszPass = pch + 1;
  2392. }
  2393. }
  2394. else
  2395. plo->pszHost = psz;
  2396. pch = StrChrW(plo->pszHost, COLON);
  2397. if (pch)
  2398. {
  2399. TERMSTR(pch);
  2400. plo->pszPort = pch + 1;
  2401. }
  2402. }
  2403. PRIVATE HRESULT
  2404. InternetGetPart(DWORD dwPart, PURLPARTS parts, PSHSTRW pstr, DWORD dwFlags)
  2405. {
  2406. HRESULT hr = E_FAIL;
  2407. if(parts->pszServer)
  2408. {
  2409. LOGON lo = {0};
  2410. BreakLogon(parts->pszServer, &lo);
  2411. switch (dwPart)
  2412. {
  2413. case URL_PART_HOSTNAME:
  2414. hr = pstr->Append(lo.pszHost);
  2415. break;
  2416. case URL_PART_USERNAME:
  2417. hr = pstr->Append(lo.pszUser);
  2418. break;
  2419. case URL_PART_PASSWORD:
  2420. hr = pstr->Append(lo.pszPass);
  2421. break;
  2422. case URL_PART_PORT:
  2423. hr = pstr->Append(lo.pszPort);
  2424. break;
  2425. default:
  2426. ASSERT(FALSE);
  2427. }
  2428. }
  2429. return hr;
  2430. }
  2431. PRIVATE HRESULT
  2432. SHUrlGetPart(PSHSTRW pstrIn, PSHSTRW pstrOut, DWORD dwPart, DWORD dwFlags)
  2433. {
  2434. ASSERT(pstrIn);
  2435. ASSERT(pstrOut);
  2436. ASSERT(dwPart);
  2437. HRESULT hr = S_OK;
  2438. URLPARTS parts;
  2439. BreakUrl(pstrIn->GetInplaceStr(), &parts);
  2440. if(dwFlags & URL_PARTFLAG_KEEPSCHEME)
  2441. {
  2442. hr = pstrOut->SetStr(parts.pszScheme);
  2443. if(SUCCEEDED(hr))
  2444. hr = pstrOut->Append(COLON);
  2445. }
  2446. else
  2447. pstrOut->Reset();
  2448. if(SUCCEEDED(hr))
  2449. {
  2450. switch (dwPart)
  2451. {
  2452. case URL_PART_SCHEME:
  2453. hr = pstrOut->SetStr(parts.pszScheme);
  2454. break;
  2455. case URL_PART_HOSTNAME:
  2456. if (parts.eScheme == URL_SCHEME_FILE)
  2457. {
  2458. hr = pstrOut->SetStr(parts.pszServer);
  2459. break;
  2460. }
  2461. // else fall through
  2462. case URL_PART_USERNAME:
  2463. case URL_PART_PASSWORD:
  2464. case URL_PART_PORT:
  2465. if(parts.dwFlags & UPF_SCHEME_INTERNET)
  2466. {
  2467. hr = InternetGetPart(dwPart, &parts, pstrOut, dwFlags);
  2468. }
  2469. else
  2470. hr = E_FAIL;
  2471. break;
  2472. case URL_PART_QUERY:
  2473. hr = pstrOut->SetStr(parts.pszQuery);
  2474. break;
  2475. default:
  2476. ASSERT(FALSE);
  2477. hr = E_UNEXPECTED;
  2478. }
  2479. }
  2480. return hr;
  2481. }
  2482. #define c_szURLPrefixesKey "Software\\Microsoft\\Windows\\CurrentVersion\\URL\\Prefixes"
  2483. const WCHAR c_szDefaultURLPrefixKey[] = L"Software\\Microsoft\\Windows\\CurrentVersion\\URL\\DefaultPrefix";
  2484. PRIVATE inline LPCWSTR SkipLeadingSlashes(LPCWSTR psz)
  2485. {
  2486. // Skip two leading slashes.
  2487. if (psz[0] == SLASH && psz[1] == SLASH)
  2488. psz += 2;
  2489. return psz;
  2490. }
  2491. PRIVATE HRESULT
  2492. UrlGuessScheme(LPCWSTR pszUrl, PSHSTRW pstr)
  2493. {
  2494. HRESULT hr = S_FALSE;
  2495. ASSERT(pszUrl && pstr);
  2496. HKEY hkeyPrefixes;
  2497. if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, c_szURLPrefixesKey, 0, KEY_QUERY_VALUE, &hkeyPrefixes)
  2498. == ERROR_SUCCESS)
  2499. {
  2500. DWORD dwiValue;
  2501. CHAR rgchValueName[MAX_PATH];
  2502. DWORD cchValueName = SIZECHARS(rgchValueName);
  2503. DWORD dwType;
  2504. CHAR rgchPrefix[MAX_PATH];
  2505. DWORD cbPrefix = SIZEOF(rgchPrefix);
  2506. // need to get past the initial two slashes if applicable
  2507. pszUrl = SkipLeadingSlashes(pszUrl);
  2508. for (dwiValue = 0;
  2509. RegEnumValueA(hkeyPrefixes, dwiValue, rgchValueName,
  2510. &cchValueName, NULL, &dwType, (PBYTE)rgchPrefix,
  2511. &cbPrefix) == ERROR_SUCCESS;
  2512. dwiValue++)
  2513. {
  2514. WCHAR wszValue[MAX_PATH];
  2515. MultiByteToWideChar(CP_ACP, 0, rgchValueName, -1, wszValue, ARRAYSIZE(wszValue));
  2516. // we check to make sure that we match and there is something more
  2517. if (!StrCmpNIW(pszUrl, wszValue, cchValueName) && pszUrl[cchValueName])
  2518. {
  2519. MultiByteToWideChar(CP_ACP, 0, rgchPrefix, -1, wszValue, ARRAYSIZE(wszValue));
  2520. if(SUCCEEDED(hr = pstr->SetStr(wszValue)))
  2521. hr = pstr->Append(pszUrl);
  2522. break;
  2523. }
  2524. cchValueName = SIZECHARS(rgchValueName);
  2525. cbPrefix = SIZEOF(rgchPrefix);
  2526. }
  2527. RegCloseKey(hkeyPrefixes);
  2528. }
  2529. return(hr);
  2530. }
  2531. /*----------------------------------------------------------
  2532. Purpose: Grabs the default URL prefix in the registry and applies
  2533. it to the given URL.
  2534. Returns: S_OK
  2535. S_FALSE if there is no default prefix
  2536. */
  2537. const WCHAR c_szDefaultScheme[] = L"http://";
  2538. HRESULT
  2539. UrlApplyDefaultScheme(
  2540. LPCWSTR pszUrl,
  2541. PSHSTRW pstr)
  2542. {
  2543. HRESULT hr = S_FALSE;
  2544. WCHAR szDef[MAX_PATH];
  2545. DWORD cbSize = SIZEOF(szDef);
  2546. ASSERT(pszUrl && pstr);
  2547. ASSERT(!PathIsURLW(pszUrl));
  2548. DWORD dwType;
  2549. if (NO_ERROR == SHRegGetUSValueW(c_szDefaultURLPrefixKey, NULL, &dwType, (LPVOID)szDef, &cbSize, TRUE, (LPVOID)c_szDefaultScheme, SIZEOF(c_szDefaultScheme)))
  2550. {
  2551. pszUrl = SkipLeadingSlashes(pszUrl);
  2552. if(SUCCEEDED(hr = pstr->SetStr(szDef)))
  2553. hr = pstr->Append(pszUrl);
  2554. }
  2555. return hr;
  2556. }
  2557. /*----------------------------------------------------------
  2558. Purpose: Guesses a URL protocol based upon a list in the registry,
  2559. compared to the first few characters of the given
  2560. URL suffix.
  2561. Returns: S_OK if a URL protocol is determined
  2562. S_FALSE if there were no problems but no prefix was prepended
  2563. */
  2564. HRESULT
  2565. SHUrlApplyScheme(
  2566. LPCWSTR pszUrl,
  2567. PSHSTRW pstrOut,
  2568. DWORD dwFlags)
  2569. {
  2570. HRESULT hr = S_FALSE;
  2571. ASSERT(IS_VALID_STRING_PTRW(pszUrl, -1));
  2572. //
  2573. // if there is already scheme there, we do nothing
  2574. // unless the caller insists. this is to support
  2575. // a string that looks like www.foo.com:8001.
  2576. // this is a site that needs to be guessed at but
  2577. // it also could be a valid scheme since '.' and '-'
  2578. // are both valid scheme chars.
  2579. //
  2580. DWORD cch;
  2581. if((dwFlags & URL_APPLY_FORCEAPPLY) || !FindSchemeW(pszUrl, &cch))
  2582. {
  2583. if(dwFlags & URL_APPLY_GUESSSCHEME)
  2584. hr = UrlGuessScheme(pszUrl, pstrOut);
  2585. if (hr != S_OK && (dwFlags & URL_APPLY_GUESSFILE))
  2586. {
  2587. LPCWSTR psz = FindDosPath(pszUrl);
  2588. // only change hr if we actually converted.
  2589. if(psz && SUCCEEDED(SHUrlCreateFromPath(psz, pstrOut, 0)))
  2590. hr = S_OK;
  2591. }
  2592. if (hr != S_OK && (dwFlags & URL_APPLY_DEFAULT || !dwFlags))
  2593. hr = UrlApplyDefaultScheme(pszUrl, pstrOut);
  2594. }
  2595. return hr;
  2596. }
  2597. PRIVATE HRESULT
  2598. CopyOutA(PSHSTRA pstr, LPSTR psz, LPDWORD pcch)
  2599. {
  2600. HRESULT hr;
  2601. DWORD cch;
  2602. ASSERT(pstr);
  2603. ASSERT(psz);
  2604. ASSERT(pcch);
  2605. cch = pstr->GetLen();
  2606. if ((*pcch > cch) && psz)
  2607. {
  2608. hr = StringCchCopyA(psz, *pcch, pstr->GetStr());
  2609. }
  2610. else
  2611. {
  2612. hr = E_POINTER;
  2613. }
  2614. *pcch = cch + (FAILED(hr) ? 1 : 0);
  2615. return hr;
  2616. }
  2617. //*** StrCopyOutW --
  2618. // NOTES
  2619. // WARNING: must match semantics of CopyOutW! (esp. the *pcchOut part)
  2620. PRIVATE HRESULT
  2621. StrCopyOutW(LPCWSTR pszIn, LPWSTR pszOut, LPDWORD pcchOut)
  2622. {
  2623. HRESULT hr;
  2624. DWORD cch;
  2625. cch = lstrlenW(pszIn);
  2626. if ((cch < *pcchOut) && pszOut)
  2627. {
  2628. hr = StringCchCopyW(pszOut, *pcchOut, pszIn);
  2629. }
  2630. else
  2631. {
  2632. hr = E_POINTER;
  2633. }
  2634. *pcchOut = cch + (FAILED(hr) ? 1 : 0);
  2635. return hr;
  2636. }
  2637. //***
  2638. // NOTES
  2639. // WARNING: StrCopyOutW must match this func, so if you change this change
  2640. // it too
  2641. PRIVATE HRESULT
  2642. CopyOutW(PSHSTRW pstr, LPWSTR psz, LPDWORD pcch)
  2643. {
  2644. HRESULT hr = S_OK;
  2645. DWORD cch;
  2646. ASSERT(pstr);
  2647. ASSERT(psz);
  2648. ASSERT(pcch);
  2649. cch = pstr->GetLen();
  2650. if((*pcch > cch) && psz)
  2651. {
  2652. StringCchCopyW(psz, *pcch, pstr->GetStr());
  2653. }
  2654. else
  2655. {
  2656. hr = E_POINTER;
  2657. }
  2658. *pcch = cch + (FAILED(hr) ? 1 : 0);
  2659. return hr;
  2660. }
  2661. LWSTDAPI
  2662. UrlCanonicalizeA(LPCSTR pszIn,
  2663. LPSTR pszOut,
  2664. LPDWORD pcchOut,
  2665. DWORD dwFlags)
  2666. {
  2667. HRESULT hr;
  2668. SHSTRA straOut;
  2669. RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlCanonicalizeA: Caller passed invalid pszIn");
  2670. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlCanonicalizeA: Caller passed invalid pcchOut");
  2671. RIPMSG(NULL==pcchOut || (pszOut && IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCanonicalizeA: Caller passed invalid pszOut");
  2672. #ifdef DEBUG
  2673. if (pcchOut)
  2674. {
  2675. if (pszOut == pszIn)
  2676. DEBUGWhackPathStringA(pszOut, *pcchOut);
  2677. else
  2678. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  2679. }
  2680. #endif
  2681. if (!pszIn
  2682. || !pszOut
  2683. || !pcchOut
  2684. || !*pcchOut)
  2685. {
  2686. hr = E_INVALIDARG;
  2687. }
  2688. else
  2689. {
  2690. hr = UrlCombineA("", pszIn, pszOut, pcchOut, dwFlags);
  2691. }
  2692. return hr;
  2693. }
  2694. LWSTDAPI
  2695. UrlEscapeA(LPCSTR pszIn,
  2696. LPSTR pszOut,
  2697. LPDWORD pcchOut,
  2698. DWORD dwFlags)
  2699. {
  2700. HRESULT hr;
  2701. SHSTRA straOut;
  2702. RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlEscapeA: Caller passed invalid pszin");
  2703. RIPMSG(NULL!=pcchOut, "UrlEscapeA: Caller passed invalid pcchOut");
  2704. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlEscapeA: Caller passed invalid pszOut");
  2705. #ifdef DEBUG
  2706. if (pcchOut)
  2707. {
  2708. if (pszOut==pszOut)
  2709. DEBUGWhackPathStringA(pszOut, *pcchOut);
  2710. else
  2711. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  2712. }
  2713. #endif
  2714. if (!pszIn || !pszOut ||
  2715. !pcchOut || !*pcchOut)
  2716. hr = E_INVALIDARG;
  2717. else
  2718. {
  2719. SHSTRW strwOut;
  2720. SHSTRW strUrl;
  2721. if(SUCCEEDED(strUrl.SetStr(pszIn)))
  2722. hr = SHUrlEscape(strUrl, &strwOut, dwFlags);
  2723. else
  2724. hr = E_OUTOFMEMORY;
  2725. if(SUCCEEDED(hr))
  2726. hr = ReconcileHresults(hr, straOut.SetStr(strwOut));
  2727. }
  2728. if(SUCCEEDED(hr))
  2729. hr = ReconcileHresults(hr, CopyOutA(&straOut, pszOut, pcchOut));
  2730. return hr;
  2731. }
  2732. LWSTDAPI
  2733. UrlGetPartA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwPart, DWORD dwFlags)
  2734. {
  2735. HRESULT hr;
  2736. SHSTRA straOut;
  2737. RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlGetPartA: Caller passed invalid pszIn");
  2738. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlGetPartA: Caller passed invalid pcchOut");
  2739. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlGetPartA: Caller passed invalid pszOut");
  2740. #ifdef DEBUG
  2741. if (pcchOut)
  2742. {
  2743. if (pszOut==pszIn)
  2744. DEBUGWhackPathStringA(pszOut, *pcchOut);
  2745. else
  2746. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  2747. }
  2748. #endif
  2749. if (!pszIn || !pszOut ||
  2750. !pcchOut || !*pcchOut || dwPart == URL_PART_NONE)
  2751. hr = E_INVALIDARG;
  2752. else
  2753. {
  2754. SHSTRW strwOut;
  2755. SHSTRW strwIn;
  2756. if(SUCCEEDED(strwIn.SetStr(pszIn)))
  2757. hr = SHUrlGetPart(&strwIn, &strwOut, dwPart, dwFlags);
  2758. else
  2759. hr = E_OUTOFMEMORY;
  2760. if(SUCCEEDED(hr))
  2761. hr = ReconcileHresults(hr, straOut.SetStr(strwOut));
  2762. }
  2763. if(SUCCEEDED(hr))
  2764. hr = ReconcileHresults(hr, CopyOutA(&straOut, pszOut, pcchOut));
  2765. return hr;
  2766. }
  2767. LWSTDAPI_(BOOL) UrlIsA(LPCSTR pszURL, URLIS UrlIs)
  2768. {
  2769. BOOL fRet = FALSE;
  2770. RIPMSG(pszURL && IS_VALID_STRING_PTRA(pszURL, -1), "UrlIsA: Caller passed invalid pszURL");
  2771. if(pszURL)
  2772. {
  2773. DWORD cchScheme, dwFlags;
  2774. LPCSTR pszScheme = FindSchemeA(pszURL, &cchScheme);
  2775. if(pszScheme)
  2776. {
  2777. URL_SCHEME eScheme = GetSchemeTypeAndFlagsA(pszScheme, cchScheme, &dwFlags);
  2778. switch (UrlIs)
  2779. {
  2780. case URLIS_URL:
  2781. fRet = TRUE;
  2782. break;
  2783. case URLIS_OPAQUE:
  2784. fRet = (dwFlags & UPF_SCHEME_OPAQUE);
  2785. break;
  2786. case URLIS_NOHISTORY:
  2787. fRet = (dwFlags & UPF_SCHEME_NOHISTORY);
  2788. break;
  2789. case URLIS_FILEURL:
  2790. fRet = (eScheme == URL_SCHEME_FILE);
  2791. break;
  2792. default:
  2793. // if it cant be done quck and dirty
  2794. // then we need to thunk to the wide version
  2795. SHSTRW strUrl;
  2796. if (SUCCEEDED(strUrl.SetStr(pszURL)))
  2797. {
  2798. fRet = UrlIsW(strUrl, UrlIs);
  2799. }
  2800. }
  2801. }
  2802. }
  2803. return fRet;
  2804. }
  2805. LWSTDAPI_(BOOL) UrlIsW(LPCWSTR pszURL, URLIS UrlIs)
  2806. {
  2807. BOOL fRet = FALSE;
  2808. RIPMSG(NULL!=pszURL && IS_VALID_STRING_PTRW(pszURL, -1), "UrlIsW: Caller passed invalid pszURL");
  2809. if(pszURL)
  2810. {
  2811. DWORD cchScheme, dwFlags;
  2812. LPCWSTR pszScheme = FindSchemeW(pszURL, &cchScheme);
  2813. if(pszScheme)
  2814. {
  2815. SHSTRW str;
  2816. URL_SCHEME eScheme = GetSchemeTypeAndFlagsW(pszScheme, cchScheme, &dwFlags);
  2817. switch (UrlIs)
  2818. {
  2819. case URLIS_URL:
  2820. fRet = TRUE;
  2821. break;
  2822. case URLIS_OPAQUE:
  2823. fRet = (dwFlags & UPF_SCHEME_OPAQUE);
  2824. break;
  2825. case URLIS_NOHISTORY:
  2826. fRet = (dwFlags & UPF_SCHEME_NOHISTORY);
  2827. break;
  2828. case URLIS_FILEURL:
  2829. fRet = (eScheme == URL_SCHEME_FILE);
  2830. break;
  2831. case URLIS_APPLIABLE:
  2832. if (eScheme == URL_SCHEME_UNKNOWN)
  2833. {
  2834. if (S_OK == UrlGuessScheme(pszURL, &str))
  2835. fRet = TRUE;
  2836. }
  2837. break;
  2838. // these cases need a broken URL
  2839. case URLIS_DIRECTORY:
  2840. case URLIS_HASQUERY:
  2841. {
  2842. URLPARTS parts;
  2843. if (SUCCEEDED(str.SetStr(pszURL))
  2844. && SUCCEEDED(BreakUrl(str.GetInplaceStr(), &parts)))
  2845. {
  2846. switch(UrlIs)
  2847. {
  2848. case URLIS_DIRECTORY:
  2849. // if the last seg has a trailing slash, or
  2850. // if there are no path segments at all...
  2851. fRet = (!parts.cSegments || (parts.dwFlags & UPF_EXSEG_DIRECTORY));
  2852. break;
  2853. case URLIS_HASQUERY:
  2854. fRet = (parts.pszQuery && *parts.pszQuery);
  2855. break;
  2856. default:
  2857. ASSERT(FALSE);
  2858. break;
  2859. }
  2860. }
  2861. }
  2862. break;
  2863. default:
  2864. AssertMsg(FALSE, "UrlIs() called with invalid flag");
  2865. }
  2866. }
  2867. }
  2868. return fRet;
  2869. }
  2870. LWSTDAPI_(BOOL) UrlIsOpaqueA(LPCSTR pszURL)
  2871. {
  2872. return UrlIsA(pszURL, URLIS_OPAQUE);
  2873. }
  2874. LWSTDAPI_(BOOL) UrlIsOpaqueW(LPCWSTR pszURL)
  2875. {
  2876. return UrlIsW(pszURL, URLIS_OPAQUE);
  2877. }
  2878. LWSTDAPI_(BOOL) UrlIsNoHistoryA(LPCSTR pszURL)
  2879. {
  2880. return UrlIsA(pszURL, URLIS_NOHISTORY);
  2881. }
  2882. LWSTDAPI_(BOOL) UrlIsNoHistoryW(LPCWSTR pszURL)
  2883. {
  2884. return UrlIsW(pszURL, URLIS_NOHISTORY);
  2885. }
  2886. LWSTDAPI_(LPCSTR) UrlGetLocationA(LPCSTR pszURL)
  2887. {
  2888. CPINFO cpinfo;
  2889. BOOL fMBCS = (GetCPInfo(CP_ACP, &cpinfo) && cpinfo.LeadByte[0]);
  2890. RIPMSG(pszURL && IS_VALID_STRING_PTRA(pszURL, -1), "UrlGetLocationA: Caller passed invalid pszURL");
  2891. if(pszURL)
  2892. {
  2893. DWORD cchScheme, dwFlags;
  2894. LPCSTR pszScheme = FindSchemeA(pszURL, &cchScheme);
  2895. if(pszScheme)
  2896. {
  2897. URL_SCHEME eScheme = GetSchemeTypeAndFlagsA(pszScheme, cchScheme, &dwFlags);
  2898. return (dwFlags & UPF_SCHEME_OPAQUE) ? NULL : FindFragmentA(pszURL, fMBCS, (eScheme == URL_SCHEME_FILE));
  2899. }
  2900. }
  2901. return NULL;
  2902. }
  2903. LWSTDAPI_(LPCWSTR) UrlGetLocationW(LPCWSTR wzURL)
  2904. {
  2905. RIPMSG(wzURL && IS_VALID_STRING_PTRW(wzURL, -1), "UrlGetLocationW: Caller passed invalid wzURL");
  2906. if(wzURL)
  2907. {
  2908. DWORD cchScheme, dwFlags;
  2909. LPCWSTR pszScheme = FindSchemeW(wzURL, &cchScheme);
  2910. if(pszScheme)
  2911. {
  2912. URL_SCHEME eScheme = GetSchemeTypeAndFlagsW(pszScheme, cchScheme, &dwFlags);
  2913. return (dwFlags & UPF_SCHEME_OPAQUE) ? NULL : FindFragmentW(wzURL, (eScheme == URL_SCHEME_FILE));
  2914. }
  2915. }
  2916. return NULL;
  2917. }
  2918. LWSTDAPI_(int) UrlCompareA(LPCSTR psz1, LPCSTR psz2, BOOL fIgnoreSlash)
  2919. {
  2920. RIPMSG(psz1 && IS_VALID_STRING_PTRA(psz1, -1), "UrlCompareA: Caller passed invalid psz1");
  2921. RIPMSG(psz2 && IS_VALID_STRING_PTRA(psz1, -1), "UrlCompareA: Caller passed invalid psz2");
  2922. if (psz1 && psz2)
  2923. {
  2924. SHSTRW str1, str2;
  2925. if(SUCCEEDED(str1.SetStr(psz1)) && SUCCEEDED(str2.SetStr(psz2)) &&
  2926. SUCCEEDED(SHUrlUnescapeW(str1.GetInplaceStr(), 0)) && SUCCEEDED(SHUrlUnescapeW(str2.GetInplaceStr(), 0)) )
  2927. {
  2928. if(fIgnoreSlash)
  2929. {
  2930. LPWSTR pch;
  2931. pch = str1.GetInplaceStr() + str1.GetLen() - 1;
  2932. if(*pch == SLASH)
  2933. TERMSTR(pch);
  2934. pch = str2.GetInplaceStr() + str2.GetLen() - 1;
  2935. if(*pch == SLASH)
  2936. TERMSTR(pch);
  2937. }
  2938. return StrCmpW(str1, str2);
  2939. }
  2940. }
  2941. return lstrcmpA(psz1, psz2);
  2942. }
  2943. LWSTDAPI
  2944. UrlUnescapeA(LPSTR pszUrl, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags)
  2945. {
  2946. RIPMSG(pszUrl && IS_VALID_STRING_PTRA(pszUrl, -1), "UrlUnescapeA: Caller passed invalid pszUrl");
  2947. if(dwFlags & URL_UNESCAPE_INPLACE)
  2948. {
  2949. return SHUrlUnescapeA(pszUrl, dwFlags);
  2950. }
  2951. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlUnescapeA: Caller passed invalid pcchOut");
  2952. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlUnescapeA: Caller passed invalid pszOut");
  2953. #ifdef DEBUG
  2954. if (pcchOut)
  2955. {
  2956. if (pszOut==pszUrl)
  2957. DEBUGWhackPathStringA(pszOut, *pcchOut);
  2958. else
  2959. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  2960. }
  2961. #endif
  2962. if (!pszUrl
  2963. || !pcchOut
  2964. || !*pcchOut
  2965. || !pszOut)
  2966. {
  2967. return E_INVALIDARG;
  2968. }
  2969. SHSTRA str;
  2970. HRESULT hr = str.SetStr(pszUrl);
  2971. if(SUCCEEDED(hr))
  2972. {
  2973. SHUrlUnescapeA(str.GetInplaceStr(), dwFlags);
  2974. hr = CopyOutA(&str, pszOut, pcchOut);
  2975. }
  2976. return hr;
  2977. }
  2978. LWSTDAPI
  2979. PathCreateFromUrlA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags)
  2980. {
  2981. HRESULT hr;
  2982. SHSTRA straOut;
  2983. RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "PathCreateFromUrlA: Caller passed invalid pszIn");
  2984. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "PathCreateFromUrlA: Caller passed invalid pcchOut");
  2985. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "PathCreateFromUrlA: Caller passed invalid pszOut");
  2986. #ifdef DEBUG
  2987. if (pcchOut)
  2988. {
  2989. if (pszOut==pszIn)
  2990. DEBUGWhackPathStringA(pszOut, *pcchOut);
  2991. else
  2992. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  2993. }
  2994. #endif
  2995. if (!pszIn || !pszOut ||
  2996. !pcchOut || !*pcchOut )
  2997. hr = E_INVALIDARG;
  2998. else
  2999. {
  3000. SHSTRW strwOut;
  3001. SHSTRW strwIn;
  3002. if(SUCCEEDED(strwIn.SetStr(pszIn)))
  3003. hr = SHPathCreateFromUrl(strwIn, &strwOut, dwFlags);
  3004. else
  3005. hr = E_OUTOFMEMORY;
  3006. if(SUCCEEDED(hr))
  3007. hr = straOut.SetStr(strwOut);
  3008. }
  3009. if(SUCCEEDED(hr) )
  3010. hr = CopyOutA(&straOut, pszOut, pcchOut);
  3011. return hr;
  3012. }
  3013. LWSTDAPI
  3014. UrlCreateFromPathA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags)
  3015. {
  3016. HRESULT hr;
  3017. SHSTRA straOut;
  3018. RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlCreateFromPathA: Caller passed invalid pszIn");
  3019. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlCreateFromPathA: Caller passed invalid pcchOut");
  3020. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCreateFromPathA: Caller passed invalid pszOut");
  3021. #ifdef DEBUG
  3022. if (pcchOut)
  3023. {
  3024. if (pszOut==pszIn)
  3025. DEBUGWhackPathStringA(pszOut, *pcchOut);
  3026. else
  3027. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  3028. }
  3029. #endif
  3030. if (!pszIn || !pszOut ||
  3031. !pcchOut || !*pcchOut )
  3032. hr = E_INVALIDARG;
  3033. else
  3034. {
  3035. SHSTRW strwOut;
  3036. SHSTRW strwIn;
  3037. if(SUCCEEDED(strwIn.SetStr(pszIn)))
  3038. hr = SHUrlCreateFromPath(strwIn, &strwOut, dwFlags);
  3039. else
  3040. hr = E_OUTOFMEMORY;
  3041. if(SUCCEEDED(hr))
  3042. {
  3043. hr = ReconcileHresults(hr, straOut.SetStr(strwOut));
  3044. }
  3045. }
  3046. if(SUCCEEDED(hr) )
  3047. hr = ReconcileHresults(hr, CopyOutA(&straOut, pszOut, pcchOut));
  3048. return hr;
  3049. }
  3050. LWSTDAPI
  3051. UrlApplySchemeA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags)
  3052. {
  3053. HRESULT hr;
  3054. SHSTRA straOut;
  3055. RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlApplySchemeA: Caller passed invalid pszIn");
  3056. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlApplySchemeA: Caller passed invalid pcchOut");
  3057. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlApplySchemeA: Caller passed invalid pszOut");
  3058. #ifdef DEBUG
  3059. if (pcchOut)
  3060. {
  3061. if (pszOut==pszIn)
  3062. DEBUGWhackPathStringA(pszOut, *pcchOut);
  3063. else
  3064. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  3065. }
  3066. #endif
  3067. if (!pszIn || !pszOut ||
  3068. !pcchOut || !*pcchOut )
  3069. hr = E_INVALIDARG;
  3070. else
  3071. {
  3072. SHSTRW strwOut;
  3073. SHSTRW strwIn;
  3074. if(SUCCEEDED(strwIn.SetStr(pszIn)))
  3075. hr = SHUrlApplyScheme(strwIn, &strwOut, dwFlags);
  3076. else
  3077. hr = E_OUTOFMEMORY;
  3078. if(S_OK == (hr))
  3079. hr = straOut.SetStr(strwOut);
  3080. }
  3081. if(S_OK == (hr))
  3082. hr = CopyOutA(&straOut, pszOut, pcchOut);
  3083. return hr;
  3084. }
  3085. // PERF_CACHE
  3086. //*** g_szUCCanon -- 1-element cache for UrlCanonicalizeW
  3087. // DESCRIPTION
  3088. // it turns out a large # of our calls a) are for the same thing,
  3089. // and b) have pszOut(canon)=pszIn(raw). so cache the most recent guy.
  3090. LONG g_lockUC;
  3091. WCHAR g_szUCCanon[64]; // post-canon guy (also used for pre-canon check)
  3092. DWORD g_dwUCFlags;
  3093. #ifdef DEBUG
  3094. int g_cUCTot, g_cUCHit;
  3095. #endif
  3096. LWSTDAPI
  3097. UrlCanonicalizeW(LPCWSTR pszUrl,
  3098. LPWSTR pszCanonicalized,
  3099. LPDWORD pcchCanonicalized,
  3100. DWORD dwFlags)
  3101. {
  3102. HRESULT hr;
  3103. SHSTRW strwOut;
  3104. RIPMSG(pszUrl && IS_VALID_STRING_PTRW(pszUrl, -1), "UrlCanonicalizeW: Caller passed invalid pszUrl");
  3105. RIPMSG(NULL!=pcchCanonicalized && IS_VALID_WRITE_PTR(pcchCanonicalized, DWORD), "UrlCanonicalizeW: Caller passed invalid pcchCanonicalized");
  3106. RIPMSG(NULL==pcchCanonicalized || (pszCanonicalized && IS_VALID_WRITE_BUFFER(pszCanonicalized, char, *pcchCanonicalized)), "UrlCanonicalizeW: Caller passed invalid pszCanonicalized");
  3107. #ifdef DEBUG
  3108. if (pcchCanonicalized)
  3109. {
  3110. if (pszCanonicalized == pszUrl)
  3111. DEBUGWhackPathStringW(pszCanonicalized, *pcchCanonicalized);
  3112. else
  3113. DEBUGWhackPathBufferW(pszCanonicalized, *pcchCanonicalized);
  3114. }
  3115. #endif
  3116. if (!pszUrl
  3117. || !pszCanonicalized
  3118. || !pcchCanonicalized
  3119. || !*pcchCanonicalized)
  3120. {
  3121. hr = E_INVALIDARG;
  3122. }
  3123. else
  3124. {
  3125. #ifdef DEBUG
  3126. if ((g_cUCTot % 10) == 0)
  3127. TraceMsg(DM_PERF, "uc: tot=%d hit=%d", g_cUCTot, g_cUCHit);
  3128. #endif
  3129. DBEXEC(TRUE, g_cUCTot++);
  3130. // try the cache 1st
  3131. if (InterlockedExchange(&g_lockUC, 1) == 0) {
  3132. hr = E_FAIL;
  3133. if ((g_dwUCFlags==dwFlags)
  3134. &&
  3135. (!(dwFlags & URL_ESCAPE_PERCENT))
  3136. &&
  3137. StrCmpCW(pszUrl, g_szUCCanon) == 0)
  3138. {
  3139. DBEXEC(TRUE, g_cUCHit++);
  3140. DWORD cchTmp = *pcchCanonicalized;
  3141. hr = StrCopyOutW(g_szUCCanon, pszCanonicalized, pcchCanonicalized);
  3142. if (FAILED(hr))
  3143. *pcchCanonicalized = cchTmp; // restore!
  3144. }
  3145. InterlockedExchange(&g_lockUC, 0);
  3146. if (SUCCEEDED(hr))
  3147. return hr;
  3148. }
  3149. hr = UrlCombineW(L"", pszUrl, pszCanonicalized, pcchCanonicalized, dwFlags);
  3150. if (SUCCEEDED(hr) && *pcchCanonicalized < ARRAYSIZE(g_szUCCanon)) {
  3151. if (InterlockedExchange(&g_lockUC, 1) == 0)
  3152. {
  3153. StringCchCopyW(g_szUCCanon, ARRAYSIZE(g_szUCCanon), pszCanonicalized);
  3154. g_dwUCFlags = dwFlags;
  3155. InterlockedExchange(&g_lockUC, 0);
  3156. }
  3157. }
  3158. }
  3159. return hr;
  3160. }
  3161. LWSTDAPI
  3162. UrlEscapeW(LPCWSTR pszUrl,
  3163. LPWSTR pszEscaped,
  3164. LPDWORD pcchEscaped,
  3165. DWORD dwFlags)
  3166. {
  3167. HRESULT hr;
  3168. SHSTRW strwOut;
  3169. RIPMSG(pszUrl && IS_VALID_STRING_PTRW(pszUrl, -1), "UrlEscapeW: Caller passed invalid pszUrl");
  3170. RIPMSG(NULL!=pcchEscaped && IS_VALID_WRITE_PTR(pcchEscaped, DWORD), "UrlEscapeW: Caller passed invalid pcchEscaped");
  3171. RIPMSG(pszEscaped && (NULL==pcchEscaped || IS_VALID_WRITE_BUFFER(pszEscaped, WCHAR, *pcchEscaped)), "UrlEscapeW: Caller passed invalid pszEscaped");
  3172. #ifdef DEBUG
  3173. if (pcchEscaped)
  3174. {
  3175. if (pszEscaped==pszUrl)
  3176. DEBUGWhackPathStringW(pszEscaped, *pcchEscaped);
  3177. else
  3178. DEBUGWhackPathBufferW(pszEscaped, *pcchEscaped);
  3179. }
  3180. #endif
  3181. if (!pszUrl || !pszEscaped ||
  3182. !pcchEscaped || !*pcchEscaped)
  3183. hr = E_INVALIDARG;
  3184. else
  3185. {
  3186. hr = SHUrlEscape(pszUrl, &strwOut, dwFlags);
  3187. }
  3188. if(SUCCEEDED(hr) )
  3189. hr = CopyOutW(&strwOut, pszEscaped, pcchEscaped);
  3190. return hr;
  3191. }
  3192. LWSTDAPI_(int) UrlCompareW(LPCWSTR psz1, LPCWSTR psz2, BOOL fIgnoreSlash)
  3193. {
  3194. RIPMSG(psz1 && IS_VALID_STRING_PTRW(psz1, -1), "UrlCompareW: Caller passed invalid psz1");
  3195. RIPMSG(psz2 && IS_VALID_STRING_PTRW(psz1, -1), "UrlCompareW: Caller passed invalid psz2");
  3196. if (psz1 && psz2)
  3197. {
  3198. SHSTRW str1, str2;
  3199. if( SUCCEEDED(str1.SetStr(psz1)) && SUCCEEDED(str2.SetStr(psz2)) &&
  3200. SUCCEEDED(SHUrlUnescapeW(str1.GetInplaceStr(), 0)) && SUCCEEDED(SHUrlUnescapeW(str2.GetInplaceStr(), 0)))
  3201. {
  3202. if(fIgnoreSlash)
  3203. {
  3204. LPWSTR pch;
  3205. pch = str1.GetInplaceStr() + str1.GetLen() - 1;
  3206. if(*pch == SLASH)
  3207. TERMSTR(pch);
  3208. pch = str2.GetInplaceStr() + str2.GetLen() - 1;
  3209. if(*pch == SLASH)
  3210. TERMSTR(pch);
  3211. }
  3212. return StrCmpW(str1, str2);
  3213. }
  3214. }
  3215. return StrCmpW(psz1, psz2);
  3216. }
  3217. LWSTDAPI
  3218. UrlUnescapeW(LPWSTR pszUrl, LPWSTR pszOut, LPDWORD pcchOut, DWORD dwFlags)
  3219. {
  3220. RIPMSG(pszUrl && IS_VALID_STRING_PTRW(pszUrl, -1), "UrlUnescapeW: Caller passed invalid pszUrl");
  3221. if(dwFlags & URL_UNESCAPE_INPLACE)
  3222. {
  3223. return SHUrlUnescapeW(pszUrl, dwFlags);
  3224. }
  3225. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlUnescapeW: Caller passed invalid pcchOut");
  3226. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlUnescapeW: Caller passed invalid pszOut");
  3227. #ifdef DEBUG
  3228. if (pcchOut)
  3229. {
  3230. if (pszOut==pszUrl)
  3231. DEBUGWhackPathStringW(pszOut, *pcchOut);
  3232. else
  3233. DEBUGWhackPathBufferW(pszOut, *pcchOut);
  3234. }
  3235. #endif
  3236. if (!pszUrl
  3237. || !pcchOut
  3238. || !*pcchOut
  3239. || !pszOut)
  3240. {
  3241. return E_INVALIDARG;
  3242. }
  3243. SHSTRW str;
  3244. HRESULT hr = str.SetStr(pszUrl);
  3245. if(SUCCEEDED(hr))
  3246. {
  3247. SHUrlUnescapeW(str.GetInplaceStr(), dwFlags);
  3248. hr = CopyOutW(&str, pszOut, pcchOut);
  3249. }
  3250. return hr;
  3251. }
  3252. LWSTDAPI
  3253. PathCreateFromUrlW
  3254. (LPCWSTR pszIn,
  3255. LPWSTR pszOut,
  3256. LPDWORD pcchOut,
  3257. DWORD dwFlags)
  3258. {
  3259. HRESULT hr;
  3260. SHSTRW strOut;
  3261. RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "PathCreateFromUrlW: Caller passed invalid pszIn");
  3262. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "PathCreateFromUrlW: Caller passed invalid pcchOut");
  3263. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "PathCreateFromUrlW: Caller passed invalid pszOut");
  3264. #ifdef DEBUG
  3265. if (pcchOut)
  3266. {
  3267. if (pszOut==pszIn)
  3268. DEBUGWhackPathStringW(pszOut, *pcchOut);
  3269. else
  3270. DEBUGWhackPathBufferW(pszOut, *pcchOut);
  3271. }
  3272. #endif
  3273. if (!pszIn || !pszOut ||
  3274. !pcchOut || !*pcchOut )
  3275. hr = E_INVALIDARG;
  3276. else
  3277. hr = SHPathCreateFromUrl(pszIn, &strOut, dwFlags);
  3278. if(SUCCEEDED(hr) )
  3279. hr = CopyOutW(&strOut, pszOut, pcchOut);
  3280. return hr;
  3281. }
  3282. LWSTDAPI
  3283. UrlCreateFromPathW
  3284. (LPCWSTR pszIn,
  3285. LPWSTR pszOut,
  3286. LPDWORD pcchOut,
  3287. DWORD dwFlags)
  3288. {
  3289. HRESULT hr;
  3290. SHSTRW strOut;
  3291. RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "UrlCreateFromPathW: Caller passed invalid pszIn");
  3292. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlCreateFromPathW: Caller passed invalid pcchOut");
  3293. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlCreateFromPathW: Caller passed invalid pszOut");
  3294. #ifdef DEBUG
  3295. if (pcchOut)
  3296. {
  3297. if (pszOut==pszIn)
  3298. DEBUGWhackPathStringW(pszOut, *pcchOut);
  3299. else
  3300. DEBUGWhackPathBufferW(pszOut, *pcchOut);
  3301. }
  3302. #endif
  3303. if (!pszIn || !pszOut ||
  3304. !pcchOut || !*pcchOut )
  3305. hr = E_INVALIDARG;
  3306. else
  3307. hr = SHUrlCreateFromPath(pszIn, &strOut, dwFlags);
  3308. if(SUCCEEDED(hr) )
  3309. hr = ReconcileHresults(hr, CopyOutW(&strOut, pszOut, pcchOut));
  3310. return hr;
  3311. }
  3312. LWSTDAPI
  3313. UrlGetPartW(LPCWSTR pszIn, LPWSTR pszOut, LPDWORD pcchOut, DWORD dwPart, DWORD dwFlags)
  3314. {
  3315. SHSTRW strIn, strOut;
  3316. HRESULT hr;
  3317. RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "UrlGetPartW: Caller passed invalid pszIn");
  3318. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlGetPartW: Caller passed invalid pcchOut");
  3319. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlGetPartW: Caller passed invalid pszOut");
  3320. #ifdef DEBUG
  3321. if (pcchOut)
  3322. {
  3323. if (pszOut==pszIn)
  3324. DEBUGWhackPathStringW(pszOut, *pcchOut);
  3325. else
  3326. DEBUGWhackPathBufferW(pszOut, *pcchOut);
  3327. }
  3328. #endif
  3329. if (!pszIn || !pszOut ||
  3330. !pcchOut || !*pcchOut || !dwPart)
  3331. hr = E_INVALIDARG;
  3332. else if (SUCCEEDED(hr = strIn.SetStr(pszIn)))
  3333. hr = SHUrlGetPart(&strIn, &strOut, dwPart, dwFlags);
  3334. if(SUCCEEDED(hr) )
  3335. hr = CopyOutW(&strOut, pszOut, pcchOut);
  3336. return hr;
  3337. }
  3338. LWSTDAPI
  3339. UrlApplySchemeW
  3340. (LPCWSTR pszIn,
  3341. LPWSTR pszOut,
  3342. LPDWORD pcchOut,
  3343. DWORD dwFlags)
  3344. {
  3345. HRESULT hr;
  3346. SHSTRW strOut;
  3347. RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "UrlApplySchemeW: Caller passed invalid pszIn");
  3348. RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlApplySchemeW: Caller passed invalid pcchOut");
  3349. RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlApplySchemeW: Caller passed invalid pszOut");
  3350. #ifdef DEBUG
  3351. if (pcchOut)
  3352. {
  3353. if (pszOut==pszIn)
  3354. DEBUGWhackPathStringW(pszOut, *pcchOut);
  3355. else
  3356. DEBUGWhackPathBufferW(pszOut, *pcchOut);
  3357. }
  3358. #endif
  3359. if (!pszIn || !pszOut ||
  3360. !pcchOut || !*pcchOut )
  3361. hr = E_INVALIDARG;
  3362. else
  3363. hr = SHUrlApplyScheme(pszIn, &strOut, dwFlags);
  3364. if(S_OK == (hr))
  3365. hr = CopyOutW(&strOut, pszOut, pcchOut);
  3366. return hr;
  3367. }
  3368. //
  3369. // this is the same table used by both URLMON and WININET's cache
  3370. //
  3371. const static BYTE Translate[256] =
  3372. {
  3373. 1, 14,110, 25, 97,174,132,119,138,170,125,118, 27,233,140, 51,
  3374. 87,197,177,107,234,169, 56, 68, 30, 7,173, 73,188, 40, 36, 65,
  3375. 49,213,104,190, 57,211,148,223, 48,115, 15, 2, 67,186,210, 28,
  3376. 12,181,103, 70, 22, 58, 75, 78,183,167,238,157,124,147,172,144,
  3377. 176,161,141, 86, 60, 66,128, 83,156,241, 79, 46,168,198, 41,254,
  3378. 178, 85,253,237,250,154,133, 88, 35,206, 95,116,252,192, 54,221,
  3379. 102,218,255,240, 82,106,158,201, 61, 3, 89, 9, 42,155,159, 93,
  3380. 166, 80, 50, 34,175,195,100, 99, 26,150, 16,145, 4, 33, 8,189,
  3381. 121, 64, 77, 72,208,245,130,122,143, 55,105,134, 29,164,185,194,
  3382. 193,239,101,242, 5,171,126, 11, 74, 59,137,228,108,191,232,139,
  3383. 6, 24, 81, 20,127, 17, 91, 92,251,151,225,207, 21, 98,113,112,
  3384. 84,226, 18,214,199,187, 13, 32, 94,220,224,212,247,204,196, 43,
  3385. 249,236, 45,244,111,182,153,136,129, 90,217,202, 19,165,231, 71,
  3386. 230,142, 96,227, 62,179,246,114,162, 53,160,215,205,180, 47,109,
  3387. 44, 38, 31,149,135, 0,216, 52, 63, 23, 37, 69, 39,117,146,184,
  3388. 163,200,222,235,248,243,219, 10,152,131,123,229,203, 76,120,209
  3389. };
  3390. PRIVATE void _HashData(LPBYTE pbData, DWORD cbData, LPBYTE pbHash, DWORD cbHash)
  3391. {
  3392. DWORD i, j;
  3393. // seed the hash
  3394. for (i = cbHash; i-- > 0;)
  3395. pbHash[i] = (BYTE) i;
  3396. // do the hash
  3397. for (j = cbData; j-- > 0;)
  3398. {
  3399. for (i = cbHash; i-- > 0;)
  3400. pbHash[i] = Translate[pbHash[i] ^ pbData[j]];
  3401. }
  3402. }
  3403. LWSTDAPI
  3404. HashData(LPBYTE pbData, DWORD cbData, LPBYTE pbHash, DWORD cbHash)
  3405. {
  3406. RIPMSG(pbData && IS_VALID_READ_BUFFER(pbData, BYTE, cbData), "HashData: Caller passed invalid pbData");
  3407. RIPMSG(pbHash && IS_VALID_WRITE_BUFFER(pbHash, BYTE, cbHash), "HashData: Caller passed invalid pbHash");
  3408. if (pbData && pbHash)
  3409. {
  3410. _HashData(pbData, cbData, pbHash, cbHash);
  3411. return S_OK;
  3412. }
  3413. return E_INVALIDARG;
  3414. }
  3415. LWSTDAPI
  3416. UrlHashA(LPCSTR psz, LPBYTE pb, DWORD cb)
  3417. {
  3418. HRESULT hr = E_INVALIDARG;
  3419. RIPMSG(psz && IS_VALID_STRING_PTRA(psz, -1), "UrlHashA: Caller passed invalid psz");
  3420. RIPMSG(pb && IS_VALID_WRITE_BUFFER(pb, BYTE, cb), "UrlHashA: Caller passed invalid pb");
  3421. if (psz && pb)
  3422. {
  3423. _HashData((LPBYTE) psz, lstrlenA(psz), pb, cb);
  3424. return S_OK;
  3425. }
  3426. return hr;
  3427. }
  3428. LWSTDAPI
  3429. UrlHashW(LPCWSTR psz, LPBYTE pb, DWORD cb)
  3430. {
  3431. HRESULT hr;
  3432. RIPMSG(psz && IS_VALID_STRING_PTRW(psz, -1), "UrlHashW: Caller passed invalid psz");
  3433. RIPMSG(pb && IS_VALID_WRITE_BUFFER(pb, BYTE, cb), "UrlHashW: Caller passed invalid pb");
  3434. if (psz && pb)
  3435. {
  3436. SHSTRA str;
  3437. if (SUCCEEDED( hr = str.SetStr(psz)))
  3438. hr = UrlHashA(str, pb, cb);
  3439. }
  3440. else
  3441. {
  3442. hr = E_INVALIDARG;
  3443. }
  3444. return hr;
  3445. }
  3446. /***************************** ParseURL Functions *****************************/
  3447. // these were originally in URL.DLL and then moved to shlwapi.
  3448. // i just added them from url.c for reuse of code.
  3449. // ParseURL now does no MBCS thunks, to keep it fast.
  3450. //
  3451. // declarations for ParseURL() APIs
  3452. //
  3453. typedef const PARSEDURLA CPARSEDURLA;
  3454. typedef const PARSEDURLA * PCPARSEDURLA;
  3455. typedef const PARSEDURLW CPARSEDURLW;
  3456. typedef const PARSEDURLW * PCPARSEDURLW;
  3457. #ifdef DEBUG
  3458. BOOL
  3459. IsValidPCPARSEDURLA(
  3460. LPCSTR pcszURL,
  3461. PCPARSEDURLA pcpu)
  3462. {
  3463. return(IS_VALID_READ_PTR(pcpu, CPARSEDURLA) &&
  3464. (IS_VALID_STRING_PTRA(pcpu->pszProtocol, -1) &&
  3465. EVAL(IsStringContainedA(pcszURL, pcpu->pszProtocol)) &&
  3466. EVAL(pcpu->cchProtocol < (UINT)lstrlenA(pcpu->pszProtocol))) &&
  3467. (IS_VALID_STRING_PTRA(pcpu->pszSuffix, -1) &&
  3468. EVAL(IsStringContainedA(pcszURL, pcpu->pszSuffix)) &&
  3469. EVAL(pcpu->cchSuffix <= (UINT)lstrlenA(pcpu->pszSuffix))) &&
  3470. EVAL(pcpu->cchProtocol + pcpu->cchSuffix < (UINT)lstrlenA(pcszURL)));
  3471. }
  3472. BOOL
  3473. IsValidPCPARSEDURLW(
  3474. LPCWSTR pcszURL,
  3475. PCPARSEDURLW pcpu)
  3476. {
  3477. return(IS_VALID_READ_PTR(pcpu, CPARSEDURLW) &&
  3478. (IS_VALID_STRING_PTRW(pcpu->pszProtocol, -1) &&
  3479. EVAL(IsStringContainedW(pcszURL, pcpu->pszProtocol)) &&
  3480. EVAL(pcpu->cchProtocol < (UINT)lstrlenW(pcpu->pszProtocol))) &&
  3481. (IS_VALID_STRING_PTRW(pcpu->pszSuffix, -1) &&
  3482. EVAL(IsStringContainedW(pcszURL, pcpu->pszSuffix)) &&
  3483. EVAL(pcpu->cchSuffix <= (UINT)lstrlenW(pcpu->pszSuffix))) &&
  3484. EVAL(pcpu->cchProtocol + pcpu->cchSuffix < (UINT)lstrlenW(pcszURL)));
  3485. }
  3486. #endif
  3487. /*----------------------------------------------------------
  3488. Purpose: Parse the given path into the PARSEDURL structure.
  3489. ******
  3490. ****** This function must not do any extraneous
  3491. ****** things. It must be small and fast.
  3492. ******
  3493. Returns: NOERROR if a valid URL format
  3494. URL_E_INVALID_SYNTAX if not
  3495. Cond: --
  3496. */
  3497. STDMETHODIMP
  3498. ParseURLA(
  3499. LPCSTR pcszURL,
  3500. PPARSEDURLA ppu)
  3501. {
  3502. HRESULT hr = E_INVALIDARG;
  3503. RIP(IS_VALID_STRING_PTRA(pcszURL, -1));
  3504. RIP(IS_VALID_WRITE_PTR(ppu, PARSEDURLA));
  3505. if (pcszURL && ppu && SIZEOF(*ppu) == ppu->cbSize)
  3506. {
  3507. DWORD cch;
  3508. hr = URL_E_INVALID_SYNTAX; // assume error
  3509. ppu->pszProtocol = FindSchemeA(pcszURL, &cch);
  3510. if(ppu->pszProtocol)
  3511. {
  3512. ppu->cchProtocol = cch;
  3513. // Determine protocol scheme number
  3514. ppu->nScheme = SchemeTypeFromStringA(ppu->pszProtocol, cch);
  3515. ppu->pszSuffix = ppu->pszProtocol + cch + 1;
  3516. //
  3517. // APPCOMPAT - Backwards compatibility - zekel 28-feb-97
  3518. // ParseURL() believes in file: urls like "file://C:\foo\bar"
  3519. // and some pieces of code will use it to get the Dos Path.
  3520. // new code should always call PathCreateFromUrl() to
  3521. // get the dos path of a file: URL.
  3522. //
  3523. // i am leaving this behavior in case some compat stuff is out there.
  3524. //
  3525. if (URL_SCHEME_FILE == ppu->nScheme &&
  3526. '/' == ppu->pszSuffix[0] && '/' == ppu->pszSuffix[1])
  3527. {
  3528. // Yes; skip the "//"
  3529. ppu->pszSuffix += 2;
  3530. #ifndef UNIX
  3531. // FOR UNIX: If we have /vobs/build, we don't want to make
  3532. // There might be a third slash. Skip it.
  3533. if ('/' == *ppu->pszSuffix)
  3534. ppu->pszSuffix++;
  3535. #endif
  3536. }
  3537. ppu->cchSuffix = lstrlenA(ppu->pszSuffix);
  3538. hr = S_OK;
  3539. }
  3540. }
  3541. #ifdef DEBUG
  3542. if (hr == S_OK)
  3543. {
  3544. CHAR rgchDebugProtocol[MAX_PATH];
  3545. CHAR rgchDebugSuffix[MAX_PATH];
  3546. // (+ 1) for null terminator.
  3547. lstrcpynA(rgchDebugProtocol, ppu->pszProtocol,
  3548. min(ppu->cchProtocol + 1, SIZECHARS(rgchDebugProtocol)));
  3549. // (+ 1) for null terminator.
  3550. lstrcpynA(rgchDebugSuffix, ppu->pszSuffix,
  3551. min(ppu->cchSuffix + 1, SIZECHARS(rgchDebugSuffix)));
  3552. TraceMsgA(TF_URL, "ParseURL(): Parsed protocol \"%s\" and suffix \"%s\" from URL \"%s\".",
  3553. rgchDebugProtocol,
  3554. rgchDebugSuffix,
  3555. pcszURL);
  3556. }
  3557. else
  3558. {
  3559. TraceMsgA(TF_URL, "ParseURL(): Failed to parse \"%s\"", pcszURL);
  3560. }
  3561. #endif
  3562. ASSERT(FAILED(hr) ||
  3563. EVAL(IsValidPCPARSEDURLA(pcszURL, ppu)));
  3564. return(hr);
  3565. }
  3566. /*----------------------------------------------------------
  3567. Purpose: Parse the given path into the PARSEDURL structure.
  3568. ******
  3569. ****** This function must not do any extraneous
  3570. ****** things. It must be small and fast.
  3571. ******
  3572. Returns: NOERROR if a valid URL format
  3573. URL_E_INVALID_SYNTAX if not
  3574. Cond: --
  3575. */
  3576. STDMETHODIMP
  3577. ParseURLW(
  3578. LPCWSTR pcszURL,
  3579. PPARSEDURLW ppu)
  3580. {
  3581. HRESULT hr = E_INVALIDARG;
  3582. RIP(IS_VALID_STRING_PTRW(pcszURL, -1));
  3583. RIP(IS_VALID_WRITE_PTR(ppu, PARSEDURLW));
  3584. if (pcszURL && ppu && SIZEOF(*ppu) == ppu->cbSize)
  3585. {
  3586. DWORD cch;
  3587. hr = URL_E_INVALID_SYNTAX; // assume error
  3588. ppu->pszProtocol = FindSchemeW(pcszURL, &cch);
  3589. if(ppu->pszProtocol)
  3590. {
  3591. ppu->cchProtocol = cch;
  3592. // Determine protocol scheme number
  3593. ppu->nScheme = SchemeTypeFromStringW(ppu->pszProtocol, cch);
  3594. ppu->pszSuffix = ppu->pszProtocol + cch + 1;
  3595. //
  3596. // APPCOMPAT - Backwards compatibility - zekel 28-feb-97
  3597. // ParseURL() believes in file: urls like "file://C:\foo\bar"
  3598. // and some pieces of code will use it to get the Dos Path.
  3599. // new code should always call PathCreateFromUrl() to
  3600. // get the dos path of a file: URL.
  3601. //
  3602. // i am leaving this behavior in case some compat stuff is out there.
  3603. //
  3604. if (URL_SCHEME_FILE == ppu->nScheme &&
  3605. '/' == ppu->pszSuffix[0] && '/' == ppu->pszSuffix[1])
  3606. {
  3607. // Yes; skip the "//"
  3608. ppu->pszSuffix += 2;
  3609. #ifndef UNIX
  3610. // There might be a third slash. Skip it.
  3611. // IEUNIX - On UNIX, it's a root directory, so don't skip it!
  3612. if ('/' == *ppu->pszSuffix)
  3613. ppu->pszSuffix++;
  3614. #endif
  3615. }
  3616. ppu->cchSuffix = lstrlenW(ppu->pszSuffix);
  3617. hr = S_OK;
  3618. }
  3619. }
  3620. #ifdef DEBUG
  3621. if (hr==S_OK)
  3622. {
  3623. WCHAR rgchDebugProtocol[MAX_PATH];
  3624. WCHAR rgchDebugSuffix[MAX_PATH];
  3625. // (+ 1) for null terminator.
  3626. StrCpyNW(rgchDebugProtocol, ppu->pszProtocol,
  3627. min(ppu->cchProtocol + 1, SIZECHARS(rgchDebugProtocol)));
  3628. // (+ 1) for null terminator.
  3629. StrCpyNW(rgchDebugSuffix, ppu->pszSuffix,
  3630. min(ppu->cchSuffix + 1, SIZECHARS(rgchDebugSuffix)));
  3631. TraceMsg(TF_URL, "ParseURL(): Parsed protocol \"%s\" and suffix \"%s\" from URL \"%s\".",
  3632. rgchDebugProtocol,
  3633. rgchDebugSuffix,
  3634. pcszURL);
  3635. }
  3636. else
  3637. {
  3638. TraceMsg(TF_URL, "ParseURL(): Failed to parse \"%s\"", pcszURL);
  3639. }
  3640. #endif
  3641. ASSERT(FAILED(hr) ||
  3642. EVAL(IsValidPCPARSEDURLW(pcszURL, ppu)));
  3643. return(hr);
  3644. }
  3645. #ifdef USE_FAST_PARSER
  3646. // GetSchemeTypeAndFlagsSpecialW
  3647. // performs the same behavior as GetSchemeTypeAndFlagsW plus, when successful
  3648. // copies the canonicalised form of the scheme back.
  3649. PRIVATE URL_SCHEME
  3650. GetSchemeTypeAndFlagsSpecialW(LPWSTR pszScheme, DWORD cchScheme, LPDWORD pdwFlags)
  3651. {
  3652. DWORD i;
  3653. ASSERT(pszScheme);
  3654. #ifdef DEBUG
  3655. if ((g_cSTTot % 10) == 0)
  3656. TraceMsg(DM_PERF, "gstaf: tot=%d hit=%d hit0=%d", g_cSTTot, g_cSTHit, g_cSTHit0);
  3657. #endif
  3658. DBEXEC(TRUE, g_cSTTot++);
  3659. // check cache 1st
  3660. i = g_iScheme;
  3661. if (cchScheme == g_mpUrlSchemeTypes[i].cchScheme
  3662. && StrCmpNCW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme) == 0)
  3663. {
  3664. DBEXEC(TRUE, i == 0 ? g_cSTHit0++ : g_cSTHit++);
  3665. Lhit:
  3666. if (pdwFlags)
  3667. *pdwFlags = g_mpUrlSchemeTypes[i].dwFlags;
  3668. // update cache (unconditionally)
  3669. g_iScheme = i;
  3670. // We need to do this because the scheme might not be canonicalised
  3671. memcpy(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme*sizeof(WCHAR));
  3672. return g_mpUrlSchemeTypes[i].eScheme;
  3673. }
  3674. for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
  3675. {
  3676. if(cchScheme == g_mpUrlSchemeTypes[i].cchScheme
  3677. && 0 == StrCmpNIW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme))
  3678. goto Lhit;
  3679. }
  3680. if (pdwFlags)
  3681. {
  3682. *pdwFlags = 0;
  3683. }
  3684. return URL_SCHEME_UNKNOWN;
  3685. }
  3686. // URL_STRING --------------------------------------------------------------------------------------
  3687. // is a container for the combined URL. It attempts to construct a string from the information
  3688. // fed into it. If there is not enough buffer space available, it will measure how much additional
  3689. // space will be required to hold the string.
  3690. WCHAR wszBogus[] = L"";
  3691. // US_* are the various modes of transforming characters fed into the container.
  3692. // US_NOTHING do nothing to the character.
  3693. // US_UNESCAPE turn entries of the form %xx into the unescaped form
  3694. // US_ESCAPE_UNSAFE transform invalid path characters into %xx sequences
  3695. // US_ESCAPE_SPACES transform only spaces in to %20 sequences
  3696. enum
  3697. {
  3698. US_NOTHING,
  3699. US_UNESCAPE,
  3700. US_ESCAPE_UNSAFE,
  3701. US_ESCAPE_SPACES
  3702. };
  3703. class URL_STRING
  3704. {
  3705. protected:
  3706. URL_SCHEME _eScheme;
  3707. DWORD _ccWork, _ccMark, _ccLastWhite, _ccQuery, _ccFragment, _ccBuffer, _dwSchemeInfo;
  3708. DWORD _dwOldFlags, _dwFlags, _dwMode;
  3709. BOOL _fFixSlashes, _fExpecting, _fError;
  3710. WCHAR _wchLast, _wszInternalString[256];
  3711. PWSTR _pszWork;
  3712. VOID baseAccept(WCHAR wch);
  3713. VOID TrackWhiteSpace(WCHAR wch);
  3714. public:
  3715. URL_STRING(DWORD dwFlags);
  3716. ~URL_STRING();
  3717. VOID CleanAccept(WCHAR wch);
  3718. VOID Accept(WCHAR wch);
  3719. VOID Accept(PWSTR a_psz);
  3720. VOID Contract(BOOL fContractLevel = TRUE);
  3721. VOID TrimEndWhiteSpace();
  3722. PWSTR GetStart();
  3723. LONG GetTotalLength();
  3724. BOOL AnyProblems();
  3725. VOID NoteScheme(URL_SCHEME a_eScheme, DWORD a_dwSchemeInfo);
  3726. VOID AddSchemeNote(DWORD a_dwSchemeInfo);
  3727. DWORD GetSchemeNotes();
  3728. URL_SCHEME QueryScheme();
  3729. VOID Mark();
  3730. VOID ClearMark();
  3731. VOID EraseMarkedText();
  3732. DWORD CompareMarkWith(PWSTR psz);
  3733. DWORD CompareLast(PCWSTR psz, DWORD cc);
  3734. VOID EnableMunging();
  3735. VOID DisableMunging();
  3736. VOID DisableSlashFixing();
  3737. VOID RestoreFlags();
  3738. VOID AddFlagNote(DWORD dwFlag);
  3739. VOID NotifyQuery();
  3740. VOID NotifyFragment();
  3741. VOID DropQuery();
  3742. VOID DropFragment();
  3743. };
  3744. // -------------------------------------------------------------------------------
  3745. URL_STRING::URL_STRING(DWORD dwFlags)
  3746. {
  3747. _ccBuffer = ARRAYSIZE(_wszInternalString);
  3748. _ccWork = 1;
  3749. _pszWork = _wszInternalString;
  3750. _ccQuery = _ccFragment = _ccMark = 0;
  3751. _eScheme = URL_SCHEME_UNKNOWN;
  3752. _dwOldFlags = _dwFlags = dwFlags;
  3753. _dwMode = US_NOTHING;
  3754. _fFixSlashes = TRUE;
  3755. _fError = _fExpecting = FALSE;
  3756. }
  3757. URL_STRING::~URL_STRING()
  3758. {
  3759. if (_ccBuffer > ARRAYSIZE(_wszInternalString))
  3760. {
  3761. LocalFree(_pszWork);
  3762. }
  3763. }
  3764. // -------------------------------------------------------------------------------
  3765. // These are the standard functions used for adding characters to an url.
  3766. VOID URL_STRING::baseAccept(WCHAR wch)
  3767. {
  3768. _pszWork[_ccWork-1] = (_fFixSlashes
  3769. ? ((wch!=WHACK) ? wch : SLASH)
  3770. : wch);
  3771. _ccWork++;
  3772. if (_ccWork>_ccBuffer)
  3773. {
  3774. if (!_fError)
  3775. {
  3776. PWSTR psz = (PWSTR)LocalAlloc(LPTR, 2*_ccBuffer*sizeof(WCHAR));
  3777. if (!psz)
  3778. {
  3779. _ccWork--;
  3780. _fError = TRUE;
  3781. return;
  3782. }
  3783. memcpy(psz, _pszWork, (_ccWork-1)*sizeof(WCHAR));
  3784. if (_ccBuffer>ARRAYSIZE(_wszInternalString))
  3785. {
  3786. LocalFree(_pszWork);
  3787. }
  3788. _ccBuffer *= 2;
  3789. _pszWork = psz;
  3790. }
  3791. else
  3792. {
  3793. _ccWork--;
  3794. }
  3795. }
  3796. }
  3797. VOID URL_STRING::TrackWhiteSpace(WCHAR wch)
  3798. {
  3799. if (IsWhite(wch))
  3800. {
  3801. if (!_ccLastWhite)
  3802. {
  3803. _ccLastWhite = _ccWork;
  3804. }
  3805. }
  3806. else
  3807. {
  3808. _ccLastWhite = 0;
  3809. }
  3810. }
  3811. // -- URL_STRING::Accept ----------------------------
  3812. // Based on the current munging mode, transform the character into the
  3813. // desired form and add it to the string.
  3814. VOID URL_STRING::Accept(WCHAR wch)
  3815. {
  3816. TrackWhiteSpace(wch);
  3817. switch (_dwMode)
  3818. {
  3819. case US_NOTHING:
  3820. break;
  3821. case US_UNESCAPE:
  3822. if (_fExpecting)
  3823. {
  3824. if (!IsHex(wch))
  3825. {
  3826. baseAccept(HEX_ESCAPE);
  3827. if (_wchLast!=L'\0')
  3828. {
  3829. baseAccept(_wchLast);
  3830. }
  3831. _fExpecting = FALSE;
  3832. break;
  3833. }
  3834. else if (_wchLast!=L'\0')
  3835. {
  3836. wch = (HexToWord(_wchLast)*16) + HexToWord(wch);
  3837. TrackWhiteSpace(wch);
  3838. _fExpecting = FALSE;
  3839. if ((wch==WHACK) && _fFixSlashes)
  3840. {
  3841. _fFixSlashes = FALSE;
  3842. baseAccept(wch);
  3843. _fFixSlashes = TRUE;
  3844. return;
  3845. }
  3846. break;
  3847. }
  3848. else
  3849. {
  3850. _wchLast = wch;
  3851. }
  3852. return;
  3853. }
  3854. if (wch==HEX_ESCAPE)
  3855. {
  3856. _fExpecting = TRUE;
  3857. _wchLast = L'\0';
  3858. return;
  3859. }
  3860. break;
  3861. case US_ESCAPE_UNSAFE:
  3862. if ((wch==SLASH)
  3863. ||
  3864. (wch==WHACK && _fFixSlashes)
  3865. ||
  3866. (IsSafePathChar(wch) && (wch!=HEX_ESCAPE || !(_dwFlags & URL_ESCAPE_PERCENT))))
  3867. {
  3868. break;
  3869. }
  3870. baseAccept(L'%');
  3871. baseAccept(hex[(wch >> 4) & 15]);
  3872. baseAccept(hex[wch & 15]);
  3873. return;
  3874. case US_ESCAPE_SPACES:
  3875. if (wch==SPC)
  3876. {
  3877. baseAccept(L'%');
  3878. baseAccept(L'2');
  3879. baseAccept(L'0');
  3880. return;
  3881. }
  3882. break;
  3883. default:
  3884. ASSERT(FALSE);
  3885. }
  3886. baseAccept(wch);
  3887. }
  3888. // -- Accept --------------------------------
  3889. // Accept only a string
  3890. VOID URL_STRING::Accept(PWSTR psz)
  3891. {
  3892. while (*psz)
  3893. {
  3894. Accept(*psz);
  3895. psz++;
  3896. }
  3897. }
  3898. // -- Contract
  3899. // Whenever we call Contract, we're pointing past the last separator. We want to
  3900. // omit the segment between this separator and the one before it.
  3901. // This should be used ONLY when we're examining the path segment of the urls.
  3902. VOID URL_STRING::Contract(BOOL fContractLevel)
  3903. {
  3904. ASSERT(_ccWork && _ccMark);
  3905. // _ccWork is 1 after wherever the next character will be placed
  3906. // subtract +1 to derive what the last character in the url is
  3907. DWORD _ccEnd = _ccWork-1 - 1;
  3908. if (_eScheme!=URL_SCHEME_MK)
  3909. {
  3910. if (!fContractLevel && (_pszWork[_ccEnd]==SLASH || _pszWork[_ccEnd]==WHACK))
  3911. {
  3912. return;
  3913. }
  3914. do
  3915. {
  3916. _ccEnd--;
  3917. }
  3918. while ((_ccEnd>=_ccMark-1) && _pszWork[_ccEnd]!=SLASH && _pszWork[_ccEnd]!=WHACK);
  3919. }
  3920. else
  3921. {
  3922. if (!fContractLevel && (_pszWork[_ccEnd]==SLASH))
  3923. {
  3924. return;
  3925. }
  3926. do
  3927. {
  3928. _ccEnd--;
  3929. }
  3930. while ((_ccEnd>=_ccMark-1) && _pszWork[_ccEnd]!=SLASH);
  3931. }
  3932. if (_ccEnd<_ccMark-1)
  3933. {
  3934. _ccEnd = _ccMark-1;
  3935. }
  3936. else
  3937. {
  3938. _ccEnd++;
  3939. }
  3940. _ccWork = _ccEnd + 1;
  3941. }
  3942. VOID URL_STRING::TrimEndWhiteSpace()
  3943. {
  3944. if (_ccLastWhite)
  3945. {
  3946. _ccWork = _ccLastWhite;
  3947. _ccLastWhite = 0;
  3948. }
  3949. }
  3950. VOID URL_STRING::CleanAccept(WCHAR wch)
  3951. {
  3952. baseAccept(wch);
  3953. }
  3954. // -------------------------------------------------------------------------------
  3955. // These member functions return information about the url that is being formed
  3956. PWSTR URL_STRING::GetStart()
  3957. {
  3958. return _pszWork;
  3959. }
  3960. LONG URL_STRING::GetTotalLength()
  3961. {
  3962. return _ccWork - 1;
  3963. }
  3964. BOOL URL_STRING::AnyProblems()
  3965. {
  3966. return _fError;
  3967. }
  3968. // -------------------------------------------------------------------------------
  3969. VOID URL_STRING::NoteScheme(URL_SCHEME a_eScheme, DWORD a_dwSchemeInfo)
  3970. {
  3971. _eScheme = a_eScheme;
  3972. _dwSchemeInfo = a_dwSchemeInfo;
  3973. _fFixSlashes = a_dwSchemeInfo & UPF_SCHEME_CONVERT;
  3974. }
  3975. VOID URL_STRING::AddSchemeNote(DWORD a_dwSchemeInfo)
  3976. {
  3977. _dwSchemeInfo |= a_dwSchemeInfo;
  3978. _fFixSlashes = _dwSchemeInfo & UPF_SCHEME_CONVERT;
  3979. }
  3980. DWORD URL_STRING::GetSchemeNotes()
  3981. {
  3982. return _dwSchemeInfo;
  3983. }
  3984. URL_SCHEME URL_STRING::QueryScheme()
  3985. {
  3986. return _eScheme;
  3987. }
  3988. // -------------------------------------------------------------------------------
  3989. VOID URL_STRING::Mark()
  3990. {
  3991. _ccMark = _ccWork;
  3992. }
  3993. VOID URL_STRING::ClearMark()
  3994. {
  3995. _ccMark = 0;
  3996. }
  3997. VOID URL_STRING::EraseMarkedText()
  3998. {
  3999. if (_ccMark)
  4000. {
  4001. _ccWork = _ccMark;
  4002. _ccMark = 0;
  4003. }
  4004. }
  4005. DWORD URL_STRING::CompareMarkWith(PWSTR psz)
  4006. {
  4007. if (_ccMark)
  4008. {
  4009. *(_pszWork + _ccWork - 1) = L'\0';
  4010. return (StrCmpW(_pszWork + _ccMark - 1, psz));
  4011. }
  4012. // In other words, return that the string isn't present.
  4013. return 1;
  4014. }
  4015. DWORD URL_STRING::CompareLast(PCWSTR psz, DWORD cc)
  4016. {
  4017. if (_ccWork > cc)
  4018. {
  4019. return StrCmpNIW(_pszWork + _ccWork - 1 - cc, psz, cc);
  4020. }
  4021. return 1;
  4022. }
  4023. // -------------------------------------------------------------------------------
  4024. VOID URL_STRING::NotifyQuery()
  4025. {
  4026. if (!_ccQuery)
  4027. {
  4028. _ccQuery = _ccWork;
  4029. }
  4030. }
  4031. VOID URL_STRING::NotifyFragment()
  4032. {
  4033. if (!_ccFragment)
  4034. {
  4035. _ccFragment = _ccWork;
  4036. CleanAccept(POUND);
  4037. }
  4038. }
  4039. VOID URL_STRING::DropQuery()
  4040. {
  4041. if (_ccQuery)
  4042. {
  4043. _ccWork = _ccQuery;
  4044. _ccQuery = _ccFragment = 0;
  4045. }
  4046. }
  4047. VOID URL_STRING::DropFragment()
  4048. {
  4049. if (_ccFragment)
  4050. {
  4051. _ccWork = _ccFragment;
  4052. _ccFragment = 0;
  4053. }
  4054. }
  4055. // -------------------------------------------------------------------------------
  4056. // These member functions are for determining how the url's characters are going
  4057. // to be represented
  4058. VOID URL_STRING::EnableMunging()
  4059. {
  4060. _dwMode = US_NOTHING;
  4061. // For opaque urls, munge ONLY if we're explicitly asked to URL_ESCAPE or URL_UNESCAPE,
  4062. // but NOT URL_ESCAPE_SPACES_ONLY
  4063. // For query and fragment, never allow for URL_ESCAPE_UNSAFE and for
  4064. // others ONLY when URL_DONT_ESCAPE_EXTRA_INFO is specified
  4065. if ((_dwSchemeInfo & UPF_SCHEME_OPAQUE)
  4066. && (_dwFlags & URL_ESCAPE_SPACES_ONLY))
  4067. return;
  4068. if ((_ccQuery || _ccFragment)
  4069. && ((_dwFlags & (URL_DONT_ESCAPE_EXTRA_INFO | URL_ESCAPE_UNSAFE))))
  4070. return;
  4071. if (_dwFlags & URL_UNESCAPE)
  4072. {
  4073. _dwMode = US_UNESCAPE;
  4074. }
  4075. else if (_dwFlags & URL_ESCAPE_UNSAFE)
  4076. {
  4077. _dwMode = US_ESCAPE_UNSAFE;
  4078. }
  4079. else if (_dwFlags & URL_ESCAPE_SPACES_ONLY)
  4080. {
  4081. _dwMode = US_ESCAPE_SPACES;
  4082. }
  4083. }
  4084. VOID URL_STRING::DisableMunging()
  4085. {
  4086. _dwMode = US_NOTHING;
  4087. }
  4088. VOID URL_STRING::DisableSlashFixing()
  4089. {
  4090. _fFixSlashes = FALSE;
  4091. }
  4092. VOID URL_STRING::AddFlagNote(DWORD dwFlag)
  4093. {
  4094. _dwFlags |= dwFlag;
  4095. }
  4096. VOID URL_STRING::RestoreFlags()
  4097. {
  4098. ASSERT((_eScheme==URL_SCHEME_FILE) || (_dwFlags==_dwOldFlags));
  4099. _dwFlags = _dwOldFlags;
  4100. EnableMunging();
  4101. }
  4102. // -------------------------------------------------------------------------------
  4103. // URL ------------------------------------------------------------------------------------
  4104. // The URL class is used to examine the base and relative URLs to determine what
  4105. // will go into the URL_STRING container. The difference should be clear:
  4106. // URL instances look, but don't touch. URL_STRINGs are used solely to build urls.
  4107. class URL
  4108. {
  4109. private:
  4110. PCWSTR _pszUrl, _pszWork;
  4111. URL_SCHEME _eScheme;
  4112. DWORD _dwSchemeNotes, _dwFlags;
  4113. BOOL _fPathCompressionOn;
  4114. BOOL _fIgnoreQuery;
  4115. WCHAR SmallForm(WCHAR wch);
  4116. BOOL IsAlpha(WCHAR ch);
  4117. PCWSTR IsUrlPrefix(PCWSTR psz);
  4118. BOOL IsLocalDrive(PCWSTR psz);
  4119. BOOL IsQualifiedDrive(PCWSTR psz);
  4120. BOOL DetectSymbols(WCHAR wch1, WCHAR wch2 = '\0', WCHAR wch3 = '\0');
  4121. PCWSTR NextChar(PCWSTR psz);
  4122. PCWSTR FeedUntil(PCWSTR psz, URL_STRING* pus, WCHAR wchDelim1 = '\0', WCHAR wchDelim2 = '\0', WCHAR wchDelim3 = '\0', WCHAR wchDelim4 = '\0');
  4123. BOOL DetectFileServer();
  4124. BOOL DetectMkServer();
  4125. BOOL DefaultDetectServer();
  4126. VOID FeedDefaultServer(URL_STRING* pus);
  4127. VOID FeedFileServer(URL_STRING* pus);
  4128. VOID FeedFtpServer(URL_STRING* pus);
  4129. VOID FeedHttpServer(URL_STRING* pus);
  4130. VOID FeedMkServer(URL_STRING* pus);
  4131. PCWSTR FeedPort(PCWSTR psz, URL_STRING* pus);
  4132. public:
  4133. VOID Setup(PCWSTR pszInUrl, DWORD a_dwFlags = 0);
  4134. VOID Reset();
  4135. BOOL IsReset();
  4136. BOOL DetectAndFeedScheme(URL_STRING* pus, BOOL fReconcileSchemes = FALSE);
  4137. VOID SetScheme(URL_SCHEME eScheme, DWORD dwFlag);
  4138. URL_SCHEME GetScheme();
  4139. VOID AddSchemeNote(DWORD dwFlag);
  4140. DWORD GetSchemeNotes();
  4141. BOOL DetectServer();
  4142. BOOL DetectAbsolutePath();
  4143. BOOL DetectPath();
  4144. BOOL DetectQueryOrFragment();
  4145. BOOL DetectQuery();
  4146. BOOL DetectLocalDrive();
  4147. BOOL DetectSlash();
  4148. BOOL DetectAnything();
  4149. WCHAR PeekNext();
  4150. VOID FeedPath(URL_STRING* pus, BOOL fMarkServer = TRUE);
  4151. PCWSTR CopySegment(PCWSTR psz, URL_STRING* pus, BOOL* pfContinue);
  4152. DWORD DetectDots(PCWSTR* ppsz);
  4153. VOID StopPathCompression();
  4154. VOID FeedServer(URL_STRING* pus);
  4155. VOID FeedLocalDrive(URL_STRING* pus);
  4156. VOID FeedQueryAndFragment(URL_STRING* pus);
  4157. VOID IgnoreQuery();
  4158. };
  4159. // -------------------------------------------------------------------------------
  4160. VOID URL::Setup(PCWSTR pszInUrl, DWORD a_dwFlags)
  4161. {
  4162. while (*pszInUrl && IsWhite(*pszInUrl))
  4163. {
  4164. pszInUrl++;
  4165. }
  4166. _pszWork = _pszUrl = pszInUrl;
  4167. _eScheme = URL_SCHEME_UNKNOWN;
  4168. _dwSchemeNotes = 0;
  4169. _dwFlags = a_dwFlags;
  4170. _fPathCompressionOn = TRUE;
  4171. _fIgnoreQuery = FALSE;
  4172. }
  4173. VOID URL::Reset()
  4174. {
  4175. _pszWork = wszBogus;
  4176. }
  4177. BOOL URL::IsReset()
  4178. {
  4179. return (_pszWork==wszBogus);
  4180. }
  4181. // -------------------------------------------------------------------------------
  4182. inline WCHAR URL::SmallForm(WCHAR wch)
  4183. {
  4184. return (wch < L'A' || wch > L'Z') ? wch : (wch - L'A' + L'a');
  4185. }
  4186. inline BOOL URL::IsAlpha(WCHAR ch)
  4187. {
  4188. return ((ch >= 'a') && (ch <= 'z'))
  4189. ||
  4190. ((ch >= 'A') && (ch <= 'Z'));
  4191. }
  4192. inline PCWSTR URL::IsUrlPrefix(PCWSTR psz)
  4193. {
  4194. // We want to skip instances of "URL:"
  4195. psz = NextChar(psz);
  4196. if (*psz==L'u' || *psz==L'U')
  4197. {
  4198. psz = NextChar(psz+1);
  4199. if (*psz==L'r' || *psz==L'R')
  4200. {
  4201. psz = NextChar(psz+1);
  4202. if (*psz==L'l' || *psz==L'L')
  4203. {
  4204. psz = NextChar(psz+1);
  4205. if (*psz==COLON)
  4206. {
  4207. return NextChar(psz+1);
  4208. }
  4209. }
  4210. }
  4211. }
  4212. return NULL;
  4213. }
  4214. inline BOOL URL::IsLocalDrive(PCWSTR psz)
  4215. {
  4216. psz = NextChar(psz);
  4217. return (IsAlpha(*psz)
  4218. &&
  4219. ((*NextChar(psz+1)==COLON) || (*NextChar(psz+1)==BAR)));
  4220. }
  4221. // -- IsQualifiedDrive --------
  4222. // On Win32 systems, a qualified drive is either
  4223. // i. <letter>: or ii. \\UNC\
  4224. // Under unix, it's only /.
  4225. inline BOOL URL::IsQualifiedDrive(PCWSTR psz)
  4226. {
  4227. psz = NextChar(psz);
  4228. BOOL fResult = IsLocalDrive(psz);
  4229. if (!fResult && *psz==WHACK)
  4230. {
  4231. psz = NextChar(psz+1);
  4232. fResult = *psz==WHACK;
  4233. }
  4234. return fResult;
  4235. }
  4236. // -- DetectSymbols -------------
  4237. // This is used to help determine what part of the URL we have reached.
  4238. inline BOOL URL::DetectSymbols(WCHAR wch1, WCHAR wch2, WCHAR wch3)
  4239. {
  4240. ASSERT(_pszWork);
  4241. PCWSTR psz = NextChar(_pszWork);
  4242. return (*psz && (*psz==wch1 || *psz==wch2 || *psz==wch3));
  4243. }
  4244. BOOL URL::DetectSlash()
  4245. {
  4246. return DetectSymbols(SLASH, WHACK);
  4247. }
  4248. BOOL URL::DetectAnything()
  4249. {
  4250. return (*NextChar(_pszWork)!=L'\0');
  4251. }
  4252. // -- NextChar -------------------------------------
  4253. // We use NextChar instead of *psz because we want to
  4254. // ignore characters such as TAB, CR, etc.
  4255. inline PCWSTR URL::NextChar(PCWSTR psz)
  4256. {
  4257. while (IsInsignificantWhite(*psz))
  4258. {
  4259. psz++;
  4260. }
  4261. return psz;
  4262. }
  4263. WCHAR URL::PeekNext()
  4264. {
  4265. return (*NextChar(NextChar(_pszWork)+1));
  4266. }
  4267. // -------------------------------------------------------------------------------
  4268. inline PCWSTR URL::FeedUntil(PCWSTR psz, URL_STRING* pus, WCHAR wchDelim1, WCHAR wchDelim2, WCHAR wchDelim3, WCHAR wchDelim4)
  4269. {
  4270. psz = NextChar(psz);
  4271. while (*psz && *psz!=wchDelim1 && *psz!=wchDelim2 && *psz!=wchDelim3 && *psz!=wchDelim4)
  4272. {
  4273. pus->Accept(*psz);
  4274. psz = NextChar(psz+1);
  4275. }
  4276. return psz;
  4277. }
  4278. // -------------------------------------------------------------------------------
  4279. VOID URL::SetScheme(URL_SCHEME eScheme, DWORD dwFlag)
  4280. {
  4281. _eScheme = eScheme;
  4282. _dwSchemeNotes = dwFlag;
  4283. }
  4284. URL_SCHEME URL::GetScheme()
  4285. {
  4286. return _eScheme;
  4287. }
  4288. VOID URL::AddSchemeNote(DWORD dwFlag)
  4289. {
  4290. _dwSchemeNotes |= dwFlag;
  4291. }
  4292. DWORD URL::GetSchemeNotes()
  4293. {
  4294. return _dwSchemeNotes;
  4295. }
  4296. BOOL URL::DetectAndFeedScheme(URL_STRING* pus, BOOL fReconcileSchemes)
  4297. {
  4298. ASSERT(_pszWork);
  4299. ASSERT(!fReconcileSchemes || (fReconcileSchemes && pus->QueryScheme()!=URL_SCHEME_FILE));
  4300. PCWSTR psz = NextChar(_pszWork);
  4301. BOOL fResult = (IsQualifiedDrive(_pszWork));
  4302. if (fResult)
  4303. {
  4304. //
  4305. // Detected a File URL that isn't explicitly marked as such, ie C:\foo,
  4306. // in this case, we need to confirm that we're not overwriting
  4307. // a fully qualified relative URL with an Accept("file:"), although
  4308. // if the relative URL is the same scheme as the base, we now
  4309. // need to make the BASE-file URL take precedence.
  4310. //
  4311. _eScheme = URL_SCHEME_FILE;
  4312. if (!fReconcileSchemes)
  4313. {
  4314. pus->Accept((PWSTR)c_szFileScheme);
  4315. pus->Accept(COLON);
  4316. _dwSchemeNotes = g_mpUrlSchemeTypes[1].dwFlags;
  4317. pus->NoteScheme(_eScheme, _dwSchemeNotes);
  4318. pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE);
  4319. }
  4320. else if (pus->QueryScheme() != URL_SCHEME_FILE)
  4321. {
  4322. Reset();
  4323. }
  4324. goto exit;
  4325. }
  4326. for (;;)
  4327. {
  4328. while (IsValidSchemeCharW(*psz))
  4329. {
  4330. psz = NextChar(psz + 1);
  4331. }
  4332. if (*psz!=COLON)
  4333. {
  4334. break;
  4335. }
  4336. if (IsUrlPrefix(_pszWork))
  4337. {
  4338. // However, we want to skip instances of URL:
  4339. _pszWork = psz = NextChar(psz+1);
  4340. continue;
  4341. }
  4342. DWORD ccScheme = 0;
  4343. PCWSTR pszClone = NextChar(_pszWork);
  4344. if (!fReconcileSchemes)
  4345. {
  4346. while (pszClone<=psz)
  4347. {
  4348. pus->Accept(SmallForm(*pszClone));
  4349. ccScheme++;
  4350. pszClone = NextChar(pszClone+1);
  4351. }
  4352. _pszWork = pszClone;
  4353. // Subtract one for the colon
  4354. ccScheme--;
  4355. // BUG BUG Since we're smallifying the scheme above, we might be able to
  4356. // avoid calling this func, call GetSchemeTypeAndFlags instead.
  4357. _eScheme = GetSchemeTypeAndFlagsSpecialW(pus->GetStart(), ccScheme, &_dwSchemeNotes);
  4358. pus->NoteScheme(_eScheme, _dwSchemeNotes);
  4359. }
  4360. else
  4361. {
  4362. PWSTR pszKnownScheme = pus->GetStart();
  4363. while (pszClone<=psz && SmallForm(*pszClone)==*pszKnownScheme)
  4364. {
  4365. pszClone = NextChar(pszClone+1);
  4366. pszKnownScheme++;
  4367. }
  4368. if (pszClone<=psz)
  4369. {
  4370. Reset();
  4371. }
  4372. else
  4373. {
  4374. _pszWork = pszClone;
  4375. }
  4376. }
  4377. fResult = TRUE;
  4378. break;
  4379. }
  4380. exit:
  4381. return fResult;
  4382. }
  4383. // -------------------------------------------------------------------------------
  4384. BOOL URL::DetectServer()
  4385. {
  4386. ASSERT(_pszWork);
  4387. BOOL fRet;
  4388. switch (_eScheme)
  4389. {
  4390. case URL_SCHEME_FILE:
  4391. fRet = DetectFileServer();
  4392. break;
  4393. case URL_SCHEME_MK:
  4394. fRet = DetectMkServer();
  4395. break;
  4396. default:
  4397. fRet = DefaultDetectServer();
  4398. break;
  4399. }
  4400. return fRet;
  4401. }
  4402. BOOL URL::DetectLocalDrive()
  4403. {
  4404. return IsLocalDrive(_pszWork);
  4405. }
  4406. BOOL URL::DetectFileServer()
  4407. {
  4408. ASSERT(_pszWork);
  4409. PCWSTR psz = _pszWork;
  4410. BOOL fResult = IsLocalDrive(_pszWork);
  4411. if (fResult)
  4412. {
  4413. _dwSchemeNotes |= UPF_FILEISPATHURL;
  4414. }
  4415. else
  4416. {
  4417. fResult = DetectSymbols(SLASH, WHACK);
  4418. }
  4419. return fResult;
  4420. }
  4421. BOOL URL::DetectMkServer()
  4422. {
  4423. ASSERT(_pszWork);
  4424. PCWSTR psz = NextChar(_pszWork);
  4425. BOOL fResult = (*psz==L'@');
  4426. if (fResult)
  4427. {
  4428. _pszWork = NextChar(psz + 1);
  4429. }
  4430. return fResult;
  4431. }
  4432. BOOL URL::DefaultDetectServer()
  4433. {
  4434. BOOL fResult = FALSE;
  4435. if (DetectSymbols(SLASH, WHACK))
  4436. {
  4437. PCWSTR psz = NextChar(_pszWork + 1);
  4438. fResult = ((*psz==SLASH) || (*psz==WHACK));
  4439. }
  4440. return fResult;
  4441. }
  4442. VOID URL::FeedServer(URL_STRING* pus)
  4443. {
  4444. ASSERT(_pszWork);
  4445. switch (_eScheme)
  4446. {
  4447. case URL_SCHEME_FILE:
  4448. FeedFileServer(pus);
  4449. break;
  4450. case URL_SCHEME_MK:
  4451. FeedMkServer(pus);
  4452. break;
  4453. case URL_SCHEME_FTP:
  4454. FeedFtpServer(pus);
  4455. break;
  4456. case URL_SCHEME_HTTP:
  4457. case URL_SCHEME_HTTPS:
  4458. FeedHttpServer(pus);
  4459. break;
  4460. default:
  4461. FeedDefaultServer(pus);
  4462. break;
  4463. }
  4464. }
  4465. VOID URL::FeedMkServer(URL_STRING* pus)
  4466. {
  4467. ASSERT(_pszWork);
  4468. pus->EnableMunging();
  4469. pus->Accept(L'@');
  4470. _pszWork = FeedUntil(_pszWork, pus, SLASH);
  4471. if (!*_pszWork)
  4472. {
  4473. pus->TrimEndWhiteSpace();
  4474. }
  4475. else
  4476. {
  4477. _pszWork = NextChar(_pszWork+1);
  4478. }
  4479. pus->Accept(SLASH);
  4480. }
  4481. VOID URL::FeedLocalDrive(URL_STRING* pus)
  4482. {
  4483. pus->Accept(*NextChar(_pszWork));
  4484. _pszWork = NextChar(_pszWork+1);
  4485. pus->Accept(*_pszWork);
  4486. _pszWork = NextChar(_pszWork+1);
  4487. pus->DisableMunging();
  4488. }
  4489. VOID URL::FeedFileServer(URL_STRING* pus)
  4490. {
  4491. PCWSTR psz = NextChar(_pszWork);
  4492. while (*psz==SLASH || *psz==WHACK)
  4493. {
  4494. psz = NextChar(psz+1);
  4495. }
  4496. DWORD dwSlashes = (DWORD)(psz - _pszWork);
  4497. switch (dwSlashes)
  4498. {
  4499. case 4:
  4500. pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE);
  4501. _dwSchemeNotes |= UPF_FILEISPATHURL;
  4502. // 4 to 6 slashes == 1 UNC
  4503. case 2:
  4504. if (IsLocalDrive(psz))
  4505. {
  4506. pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE);
  4507. }
  4508. case 5:
  4509. case 6:
  4510. pus->Accept(SLASH);
  4511. pus->Accept(SLASH);
  4512. if (!IsLocalDrive(psz))
  4513. {
  4514. pus->EnableMunging();
  4515. psz = FeedUntil(psz, pus, SLASH, WHACK);
  4516. if (!*psz)
  4517. {
  4518. pus->TrimEndWhiteSpace();
  4519. Reset();
  4520. }
  4521. else
  4522. {
  4523. _pszWork = NextChar(psz+1);
  4524. }
  4525. }
  4526. else
  4527. {
  4528. _pszWork = psz;
  4529. }
  4530. pus->Accept(SLASH);
  4531. break;
  4532. // If there are no slashes, then it can't be a UNC.
  4533. case 0:
  4534. if (IsLocalDrive(psz))
  4535. {
  4536. pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE);
  4537. }
  4538. // We think of "file:/" and "file:///" to be on the local machine
  4539. // And if there are more slashes than we typically handle, we'll treat them as 1.
  4540. case 1:
  4541. case 3:
  4542. // This is a not-good-case
  4543. default:
  4544. pus->Accept(SLASH);
  4545. pus->Accept(SLASH);
  4546. pus->Accept(SLASH);
  4547. _pszWork = NextChar(psz);
  4548. break;
  4549. }
  4550. }
  4551. VOID URL::FeedFtpServer(URL_STRING* pus)
  4552. {
  4553. ASSERT(_pszWork);
  4554. PCWSTR psz = NextChar(_pszWork);
  4555. if (*psz==WHACK || *psz==SLASH)
  4556. {
  4557. pus->Accept(*psz);
  4558. psz = NextChar(psz+1);
  4559. }
  4560. if (*psz==WHACK || *psz==SLASH)
  4561. {
  4562. pus->Accept(*psz);
  4563. psz = NextChar(psz+1);
  4564. }
  4565. pus->EnableMunging();
  4566. // The following is a grotesque and gruesome hack. We need to preserve case for
  4567. // embedded username/password
  4568. _pszWork = psz;
  4569. BOOL fPossibleUserPasswordCombo = FALSE;
  4570. while (*psz && *psz!=SLASH && *psz!=POUND && *psz!=QUERY)
  4571. {
  4572. if (*psz==L'@')
  4573. {
  4574. fPossibleUserPasswordCombo = TRUE;
  4575. break;
  4576. }
  4577. psz = NextChar(psz+1);
  4578. }
  4579. psz = _pszWork;
  4580. if (fPossibleUserPasswordCombo)
  4581. {
  4582. while (*psz!=L'@')
  4583. {
  4584. pus->Accept(*psz);
  4585. psz = NextChar(psz+1);
  4586. }
  4587. }
  4588. // This still leaves the issue of slashes, colons, ?s, @s, and #s in passwords; I guess they
  4589. // ought to be escaped. (You just can't win, sometimes.)
  4590. while (*psz && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND)
  4591. {
  4592. pus->Accept(SmallForm(*psz));
  4593. psz = NextChar(psz+1);
  4594. }
  4595. if (*psz==COLON)
  4596. {
  4597. psz = FeedPort(psz, pus);
  4598. }
  4599. pus->DisableMunging();
  4600. _pszWork = psz;
  4601. if (!*psz)
  4602. {
  4603. pus->TrimEndWhiteSpace();
  4604. pus->Accept(SLASH);
  4605. }
  4606. else
  4607. {
  4608. if (*psz==QUERY || *psz==POUND)
  4609. {
  4610. pus->Accept(SLASH);
  4611. }
  4612. else
  4613. {
  4614. pus->Accept(*psz);
  4615. _pszWork = NextChar(psz+1);
  4616. }
  4617. }
  4618. }
  4619. VOID URL::FeedHttpServer(URL_STRING* pus)
  4620. {
  4621. // This is a version of FeedDefaultServer, stripped of non-essentials.
  4622. // This includes a hack to enable username/password combos in http urls.
  4623. ASSERT(_pszWork);
  4624. PCWSTR psz = NextChar(_pszWork);
  4625. if (*psz==WHACK || *psz==SLASH)
  4626. {
  4627. pus->Accept(*psz);
  4628. psz = NextChar(psz+1);
  4629. }
  4630. if (*psz==WHACK || *psz==SLASH)
  4631. {
  4632. pus->Accept(*psz);
  4633. psz = NextChar(psz+1);
  4634. }
  4635. pus->EnableMunging();
  4636. // WARNING! FeedPort also calls Mark(). Must be careful that they don't overlap.
  4637. pus->Mark();
  4638. PCWSTR pszRestart = psz;
  4639. while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND && *psz!=AT)
  4640. {
  4641. pus->Accept(SmallForm(*psz));
  4642. psz = NextChar(psz+1);
  4643. }
  4644. if (*psz==COLON)
  4645. {
  4646. // We either have a port or a password.
  4647. PCWSTR pszPort = psz;
  4648. do
  4649. {
  4650. psz = NextChar(psz+1);
  4651. }
  4652. while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND && *psz!=AT);
  4653. if (*psz!=AT)
  4654. {
  4655. psz = FeedPort(pszPort, pus);
  4656. }
  4657. }
  4658. if (*psz==AT)
  4659. {
  4660. // We've hit a username/password combo. So we have to undo our case-changing
  4661. psz = pszRestart;
  4662. pus->EraseMarkedText();
  4663. while (*psz!=AT)
  4664. {
  4665. pus->Accept(*psz);
  4666. psz = NextChar(psz+1);
  4667. }
  4668. // Now we carry on as before
  4669. while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND)
  4670. {
  4671. pus->Accept(SmallForm(*psz));
  4672. psz = NextChar(psz+1);
  4673. }
  4674. if (*psz==COLON)
  4675. {
  4676. psz = FeedPort(psz, pus);
  4677. }
  4678. }
  4679. pus->ClearMark();
  4680. pus->DisableMunging();
  4681. _pszWork = psz;
  4682. if (!*psz)
  4683. {
  4684. pus->TrimEndWhiteSpace();
  4685. if ((_eScheme!=URL_SCHEME_UNKNOWN) && !(_dwSchemeNotes & UPF_SCHEME_OPAQUE))
  4686. {
  4687. pus->Accept(SLASH);
  4688. }
  4689. }
  4690. else
  4691. {
  4692. if (*psz==QUERY || *psz==POUND)
  4693. {
  4694. pus->Accept(SLASH);
  4695. }
  4696. else
  4697. {
  4698. pus->Accept(*psz);
  4699. _pszWork = NextChar(psz+1);
  4700. }
  4701. }
  4702. }
  4703. VOID URL::FeedDefaultServer(URL_STRING* pus)
  4704. {
  4705. ASSERT(_pszWork);
  4706. PCWSTR psz = NextChar(_pszWork);
  4707. if (!(_dwSchemeNotes & UPF_SCHEME_INTERNET))
  4708. {
  4709. pus->DisableSlashFixing();
  4710. }
  4711. if (*psz==WHACK || *psz==SLASH)
  4712. {
  4713. pus->Accept(*psz);
  4714. psz = NextChar(psz+1);
  4715. }
  4716. if (*psz==WHACK || *psz==SLASH)
  4717. {
  4718. pus->Accept(*psz);
  4719. psz = NextChar(psz+1);
  4720. }
  4721. if (_dwSchemeNotes & UPF_SCHEME_INTERNET)
  4722. {
  4723. pus->EnableMunging();
  4724. while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND)
  4725. {
  4726. pus->Accept(SmallForm(*psz));
  4727. psz = NextChar(psz+1);
  4728. }
  4729. if (*psz==COLON)
  4730. {
  4731. psz = FeedPort(psz, pus);
  4732. }
  4733. pus->DisableMunging();
  4734. }
  4735. else
  4736. {
  4737. while (*psz && *psz!=SLASH)
  4738. {
  4739. pus->Accept(*psz);
  4740. psz = NextChar(psz+1);
  4741. }
  4742. }
  4743. _pszWork = psz;
  4744. if (!*psz)
  4745. {
  4746. pus->TrimEndWhiteSpace();
  4747. if ((_eScheme!=URL_SCHEME_UNKNOWN) && !(_dwSchemeNotes & UPF_SCHEME_OPAQUE))
  4748. {
  4749. pus->Accept(SLASH);
  4750. }
  4751. }
  4752. else
  4753. {
  4754. if (*psz==QUERY || *psz==POUND)
  4755. {
  4756. pus->Accept(SLASH);
  4757. }
  4758. else
  4759. {
  4760. pus->Accept(*psz);
  4761. _pszWork = NextChar(psz+1);
  4762. }
  4763. }
  4764. }
  4765. PCWSTR URL::FeedPort(PCWSTR psz, URL_STRING* pus)
  4766. {
  4767. BOOL fIgnorePort = FALSE;
  4768. pus->Mark();
  4769. psz = FeedUntil(psz, pus, SLASH, WHACK, POUND, QUERY);
  4770. if (!(_dwFlags & URL_DONT_SIMPLIFY))
  4771. {
  4772. // Here, decide whether or not to ignore the port
  4773. // FEATURE we should actually be getting this from
  4774. // the services file to find out the default protocol port
  4775. // but we dont think that most people will change them - zekel 17-Dec-96
  4776. switch(_eScheme)
  4777. {
  4778. case URL_SCHEME_HTTP:
  4779. if (pus->CompareMarkWith(L":80")==0)
  4780. fIgnorePort = TRUE;
  4781. break;
  4782. case URL_SCHEME_HTTPS:
  4783. if (pus->CompareMarkWith(L":443")==0)
  4784. fIgnorePort = TRUE;
  4785. break;
  4786. case URL_SCHEME_FTP:
  4787. if (pus->CompareMarkWith(L":21")==0)
  4788. fIgnorePort = TRUE;
  4789. break;
  4790. case URL_SCHEME_GOPHER:
  4791. if (pus->CompareMarkWith(L":70")==0)
  4792. fIgnorePort = TRUE;
  4793. break;
  4794. }
  4795. }
  4796. if (fIgnorePort)
  4797. {
  4798. pus->EraseMarkedText();
  4799. }
  4800. else
  4801. {
  4802. pus->ClearMark();
  4803. }
  4804. return psz;
  4805. }
  4806. // -------------------------------------------------------------------------------
  4807. BOOL URL::DetectAbsolutePath()
  4808. {
  4809. BOOL fResult = FALSE;
  4810. if (_dwSchemeNotes & UPF_SCHEME_OPAQUE)
  4811. {
  4812. fResult = TRUE;
  4813. }
  4814. else if (DetectSymbols(SLASH, WHACK))
  4815. {
  4816. fResult = TRUE;
  4817. _pszWork = NextChar(_pszWork+1);
  4818. }
  4819. return fResult;
  4820. }
  4821. BOOL URL::DetectPath()
  4822. {
  4823. return (*NextChar(_pszWork) && !DetectSymbols(QUERY, POUND));
  4824. }
  4825. VOID URL::FeedPath(URL_STRING* pus, BOOL fMarkServer)
  4826. {
  4827. ASSERT(_pszWork);
  4828. PCWSTR psz = NextChar(_pszWork);
  4829. if (fMarkServer)
  4830. {
  4831. pus->Mark();
  4832. }
  4833. if (_dwSchemeNotes & UPF_SCHEME_OPAQUE)
  4834. {
  4835. _pszWork = FeedUntil(psz, pus);
  4836. pus->TrimEndWhiteSpace();
  4837. }
  4838. else
  4839. {
  4840. DWORD cDots;
  4841. BOOL fContinue = TRUE;
  4842. do
  4843. {
  4844. cDots = 0;
  4845. PCWSTR pszTmp = psz;
  4846. if (_fPathCompressionOn)
  4847. {
  4848. cDots = DetectDots(&psz);
  4849. }
  4850. if (cDots)
  4851. {
  4852. if (cDots==2)
  4853. {
  4854. pus->Contract();
  4855. }
  4856. continue;
  4857. }
  4858. psz = CopySegment(pszTmp, pus, &fContinue);
  4859. }
  4860. while (fContinue);
  4861. _pszWork = psz;
  4862. if (!*_pszWork)
  4863. {
  4864. pus->TrimEndWhiteSpace();
  4865. }
  4866. }
  4867. }
  4868. // pfContinue indicates whether there's anything following that would
  4869. // be of relevance to a path
  4870. PCWSTR URL::CopySegment(PCWSTR psz, URL_STRING* pus, BOOL* pfContinue)
  4871. {
  4872. ASSERT(pfContinue);
  4873. BOOL fStop = FALSE;
  4874. psz = NextChar(psz);
  4875. while (!fStop)
  4876. {
  4877. switch (*psz)
  4878. {
  4879. case POUND:
  4880. if (_eScheme==URL_SCHEME_FILE)
  4881. {
  4882. // Since #s are valid for dos paths, we have to accept them except
  4883. // for when they follow a .htm/.html file (See FindFragmentA/W)
  4884. // However, some inconsistencies may still arise...
  4885. for (DWORD i=0; i < ARRAYSIZE(ExtTable); i++)
  4886. {
  4887. if (!pus->CompareLast(ExtTable[i].wszExt, ExtTable[i].cchExt))
  4888. break;
  4889. }
  4890. // If we haven't found a matching file extension, we'll treat as a filename character.
  4891. if (i==ARRAYSIZE(ExtTable))
  4892. {
  4893. pus->Accept(*psz);
  4894. psz = NextChar(psz+1);
  4895. break;
  4896. }
  4897. }
  4898. goto next;
  4899. case QUERY:
  4900. // We're going to support query as a legitimate character in file urls.
  4901. // *sigh*
  4902. if (_eScheme==URL_SCHEME_FILE)
  4903. {
  4904. if (_fIgnoreQuery)
  4905. {
  4906. psz = wszBogus;
  4907. }
  4908. else
  4909. {
  4910. pus->CleanAccept(*psz);
  4911. psz = NextChar(psz+1);
  4912. break;
  4913. }
  4914. }
  4915. case L'\0':
  4916. next:
  4917. *pfContinue = FALSE;
  4918. fStop = TRUE;
  4919. break;
  4920. case SLASH:
  4921. case WHACK:
  4922. fStop = TRUE;
  4923. // fall through
  4924. default:
  4925. pus->Accept(*psz);
  4926. psz = NextChar(psz+1);
  4927. break;
  4928. }
  4929. }
  4930. return psz;
  4931. }
  4932. DWORD URL::DetectDots(PCWSTR* ppsz)
  4933. {
  4934. PCWSTR psz;
  4935. if (ppsz)
  4936. {
  4937. psz = *ppsz;
  4938. }
  4939. else
  4940. {
  4941. psz = NextChar(_pszWork);
  4942. }
  4943. DWORD cDots = 0;
  4944. if (*psz==DOT)
  4945. {
  4946. psz = NextChar(psz+1);
  4947. cDots++;
  4948. if (*psz==DOT)
  4949. {
  4950. psz = NextChar(psz+1);
  4951. cDots++;
  4952. }
  4953. switch (*psz)
  4954. {
  4955. case WHACK:
  4956. if (_eScheme==URL_SCHEME_MK)
  4957. {
  4958. cDots = 0;
  4959. }
  4960. case SLASH:
  4961. psz = NextChar(psz+1);
  4962. break;
  4963. case QUERY:
  4964. case POUND:
  4965. case L'\0':
  4966. break;
  4967. default:
  4968. cDots = 0;
  4969. break;
  4970. }
  4971. }
  4972. if (ppsz)
  4973. {
  4974. *ppsz = psz;
  4975. }
  4976. return cDots;
  4977. }
  4978. VOID URL::StopPathCompression()
  4979. {
  4980. _fPathCompressionOn = FALSE;
  4981. }
  4982. // -------------------------------------------------------------------------------
  4983. BOOL URL::DetectQueryOrFragment()
  4984. {
  4985. return (DetectSymbols(QUERY, POUND));
  4986. }
  4987. BOOL URL::DetectQuery()
  4988. {
  4989. return (DetectSymbols(QUERY));
  4990. }
  4991. VOID URL::IgnoreQuery()
  4992. {
  4993. ASSERT(_eScheme==URL_SCHEME_FILE);
  4994. _fIgnoreQuery = TRUE;
  4995. }
  4996. VOID URL::FeedQueryAndFragment(URL_STRING* pus)
  4997. {
  4998. ASSERT(_pszWork);
  4999. if (_dwSchemeNotes & UPF_SCHEME_OPAQUE)
  5000. {
  5001. PCWSTR psz = NextChar(_pszWork);
  5002. while (*psz)
  5003. {
  5004. pus->Accept(*psz);
  5005. psz = NextChar(psz+1);
  5006. }
  5007. _pszWork = psz;
  5008. return;
  5009. }
  5010. PCWSTR psz = NextChar(_pszWork);
  5011. // This is okay since *psz must equal { ? | # }
  5012. if (*psz==QUERY)
  5013. {
  5014. pus->CleanAccept(QUERY);
  5015. }
  5016. // By munging, I mean taking an URL of form http://a/b#c?d and producing http://a/b?d#c
  5017. // We do this by default; however, we won't do this when we've been passed a fragment only
  5018. // as a relative url
  5019. // Query's always override.
  5020. if (*psz==QUERY)
  5021. {
  5022. pus->DropQuery();
  5023. pus->NotifyQuery();
  5024. pus->EnableMunging();
  5025. psz = NextChar(psz+1);
  5026. while (*psz)
  5027. {
  5028. if (*psz==POUND)
  5029. {
  5030. pus->NotifyFragment();
  5031. }
  5032. else
  5033. {
  5034. pus->Accept(*psz);
  5035. }
  5036. psz = NextChar(psz+1);
  5037. }
  5038. }
  5039. else
  5040. {
  5041. // This line of code will determine whether we've been passed a fragment for a relative url
  5042. // For properly formed base urls, this won't matter.
  5043. BOOL fMunge = psz!=NextChar(_pszUrl);
  5044. pus->DropFragment();
  5045. pus->NotifyFragment();
  5046. pus->EnableMunging();
  5047. psz = NextChar(psz+1);
  5048. // The following line is bogus. It just keeps going until the end. Not good.
  5049. // We MAY or MAY NOT fix this, depending on how much people scream at me.
  5050. // This may be an issue for Netscape compatibility.
  5051. // What we could do is: when either query or fragment would be blank, preserve as is.
  5052. // This would minimise breaking compatibility across the board.
  5053. // -- AKABIR, 09/28/98
  5054. while ((*psz==QUERY && !fMunge) || *psz)
  5055. {
  5056. if (*psz==QUERY)
  5057. {
  5058. pus->CleanAccept(QUERY);
  5059. }
  5060. else
  5061. {
  5062. pus->Accept(*psz);
  5063. }
  5064. psz = NextChar(psz+1);
  5065. }
  5066. if (*psz==QUERY)
  5067. {
  5068. pus->DropFragment();
  5069. pus->NotifyQuery();
  5070. pus->CleanAccept(*psz);
  5071. psz = NextChar(psz+1);
  5072. while (*psz)
  5073. {
  5074. pus->Accept(*psz);
  5075. psz = NextChar(psz+1);
  5076. }
  5077. pus->TrimEndWhiteSpace();
  5078. pus->NotifyFragment();
  5079. psz = NextChar(_pszWork);
  5080. pus->CleanAccept(*psz);
  5081. psz = NextChar(psz+1);
  5082. while (*psz!=QUERY)
  5083. {
  5084. pus->Accept(*psz);
  5085. psz = NextChar(psz+1);
  5086. }
  5087. }
  5088. }
  5089. pus->TrimEndWhiteSpace();
  5090. pus->ClearMark();
  5091. }
  5092. // -------------------------------------------------------------------------------
  5093. HRESULT
  5094. BlendUrls(URL& urlBase, URL& urlRelative, URL_STRING* pusOut, DWORD dwFlags)
  5095. {
  5096. HRESULT hr = S_OK;
  5097. // -- SCHEME --------------------------------------------------------------------------
  5098. // Examine each url's scheme.
  5099. // We won't continue to use urlBase IF
  5100. // 1. their tokenized schemes are not identical
  5101. // 2. the scheme is a file
  5102. // 3. the actual string schemes are not identical
  5103. // this checks to make sure that these are the same scheme, and
  5104. // that the scheme is allowed to be used in relative URLs
  5105. // file: is not allowed to because of weirdness with drive letters
  5106. // and \\UNC\shares
  5107. BOOL fBaseServerDetected = FALSE, fRelativeServerDetected = FALSE;
  5108. BOOL fDetectAbsoluteRelPath = FALSE;
  5109. BOOL fDetectedRelScheme = urlRelative.DetectAndFeedScheme(pusOut);
  5110. BOOL fDetectedBaseScheme = FALSE;
  5111. if (fDetectedRelScheme
  5112. && ((pusOut->QueryScheme()==URL_SCHEME_FILE)
  5113. || (urlRelative.GetSchemeNotes() & UPF_SCHEME_OPAQUE)))
  5114. {
  5115. urlBase.Reset();
  5116. }
  5117. else if ((fDetectedBaseScheme = urlBase.DetectAndFeedScheme(pusOut, fDetectedRelScheme)))
  5118. {
  5119. if (!fDetectedRelScheme)
  5120. {
  5121. urlRelative.SetScheme(urlBase.GetScheme(), urlBase.GetSchemeNotes());
  5122. }
  5123. }
  5124. // We fall back on the original parser for those cases we don't handle yet.
  5125. // (dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY)
  5126. if (((pusOut->QueryScheme()==URL_SCHEME_FILE)
  5127. || (!(fDetectedRelScheme || fDetectedBaseScheme)))
  5128. && ((dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY)))
  5129. {
  5130. hr = E_FAIL;
  5131. goto exit;
  5132. }
  5133. if ((pusOut->QueryScheme()==URL_SCHEME_UNKNOWN))
  5134. {
  5135. // BUG BUG For IE4 compat, we need to use the old parser. However
  5136. // if we're passed URL_PLUGGABLE_PROTOCOL, we'll use this parser.
  5137. if (!(dwFlags & URL_PLUGGABLE_PROTOCOL))
  5138. {
  5139. hr = E_FAIL;
  5140. goto exit;
  5141. }
  5142. urlRelative.StopPathCompression();
  5143. // Same schemes, so now we look at the base url to divine the opacity
  5144. if (urlBase.DetectAnything() && !urlBase.IsReset())
  5145. {
  5146. if (!urlBase.DetectSlash())
  5147. {
  5148. if (!urlRelative.DetectQueryOrFragment())
  5149. {
  5150. urlBase.Reset();
  5151. }
  5152. urlBase.AddSchemeNote(UPF_SCHEME_OPAQUE);
  5153. urlRelative.AddSchemeNote(UPF_SCHEME_OPAQUE);
  5154. pusOut->AddSchemeNote(UPF_SCHEME_OPAQUE);
  5155. }
  5156. }
  5157. else if (!urlRelative.DetectSlash())
  5158. {
  5159. // If urlBase is reset, that means the schemes are different,
  5160. // so we only have urlRelative to figure out opacity.
  5161. urlRelative.AddSchemeNote(UPF_SCHEME_OPAQUE);
  5162. pusOut->AddSchemeNote(UPF_SCHEME_OPAQUE);
  5163. }
  5164. }
  5165. else if (pusOut->QueryScheme()==URL_SCHEME_FTP)
  5166. {
  5167. // For ftp urls, we'll assume that we're being passed properly formed urls.
  5168. // Some ftp sites allow backslashes in their object filenames, so we should
  5169. // allow access to these. Also, domain passwords would otherwise need escaping.
  5170. pusOut->DisableSlashFixing();
  5171. }
  5172. if (dwFlags & URL_DONT_SIMPLIFY)
  5173. {
  5174. urlBase.StopPathCompression();
  5175. urlRelative.StopPathCompression();
  5176. }
  5177. // -- SERVER --------------------------------------------------------------------------
  5178. // Decide on the server to use.
  5179. // Question: if urlBase and UrlRelative have the same explicit server, isn't it pointless
  5180. // to continue looking at url base anyway?
  5181. pusOut->EnableMunging();
  5182. if (!(pusOut->GetSchemeNotes() & UPF_SCHEME_OPAQUE))
  5183. {
  5184. if (urlRelative.DetectServer()
  5185. && !(urlBase.DetectServer() && (urlRelative.PeekNext()!=SLASH) && (urlRelative.PeekNext()!=WHACK)))
  5186. {
  5187. fRelativeServerDetected = TRUE;
  5188. urlRelative.FeedServer(pusOut);
  5189. urlBase.Reset();
  5190. }
  5191. else if (urlBase.DetectServer())
  5192. {
  5193. fBaseServerDetected = TRUE;
  5194. urlBase.FeedServer(pusOut);
  5195. }
  5196. }
  5197. // -- PATH ----------------------------------------------------------------------------
  5198. // Figure out the path
  5199. // If the relative url has a path, and it starts with a slash/whack, forget about the
  5200. // base's path and stuff. Otherwise, inherit the base and attach the relative
  5201. // Potential problem: when rel path is empty, we expect to knock of the last base segment
  5202. if (pusOut->QueryScheme()==URL_SCHEME_FILE)
  5203. {
  5204. // Hack for back compat
  5205. // If the relative url consists of a query string, we'll append that to
  5206. // our resultant url, rather than the base's query string
  5207. if (urlRelative.DetectQuery())
  5208. {
  5209. urlBase.IgnoreQuery();
  5210. }
  5211. else
  5212. {
  5213. BOOL fResult1 = urlRelative.DetectAbsolutePath();
  5214. BOOL fResult2 = urlRelative.DetectLocalDrive();
  5215. if (fResult2)
  5216. {
  5217. urlBase.Reset();
  5218. urlRelative.FeedLocalDrive(pusOut);
  5219. if (urlRelative.DetectAbsolutePath())
  5220. {
  5221. pusOut->Accept(SLASH);
  5222. }
  5223. }
  5224. else
  5225. {
  5226. if (urlBase.DetectLocalDrive())
  5227. {
  5228. urlBase.FeedLocalDrive(pusOut);
  5229. if (fResult1)
  5230. {
  5231. pusOut->Accept(SLASH);
  5232. urlBase.Reset();
  5233. }
  5234. else if (urlBase.DetectAbsolutePath())
  5235. {
  5236. pusOut->Accept(SLASH);
  5237. }
  5238. }
  5239. else if (fResult1)
  5240. {
  5241. if (fRelativeServerDetected)
  5242. {
  5243. pusOut->Accept(SLASH);
  5244. }
  5245. urlBase.Reset();
  5246. }
  5247. }
  5248. }
  5249. }
  5250. else if (pusOut->QueryScheme()==URL_SCHEME_UNKNOWN)
  5251. {
  5252. if (pusOut->GetSchemeNotes() & UPF_SCHEME_OPAQUE)
  5253. {
  5254. if (!urlRelative.DetectAnything())
  5255. {
  5256. urlRelative.Reset();
  5257. }
  5258. }
  5259. else
  5260. {
  5261. // This code fragment is for urls with unknown schemes, that are to be
  5262. // treated hierarchically. Note that the authority (which has been passed in
  5263. // already) is terminated with /, ?, or \0. The / is *optional*, and should be
  5264. // appended if and only if the urls being combined call for it.
  5265. if (urlBase.IsReset())
  5266. {
  5267. // At this point, we're examining only the relative url. We've been brought to
  5268. // a stop by the presence of /, ? or \0. So
  5269. if (urlRelative.DetectSlash() && !fDetectedRelScheme)
  5270. {
  5271. pusOut->Accept(SLASH);
  5272. }
  5273. }
  5274. else
  5275. {
  5276. // In this case, we have both the relative and base urls to look at.
  5277. // What's the terminator for the base url
  5278. if ((urlRelative.DetectSlash()
  5279. || (!urlBase.DetectAnything()
  5280. && urlRelative.DetectAnything()
  5281. && !urlRelative.DetectQuery()))
  5282. && !fDetectedRelScheme)
  5283. {
  5284. pusOut->Accept(SLASH);
  5285. }
  5286. }
  5287. }
  5288. }
  5289. pusOut->EnableMunging();
  5290. if ((fBaseServerDetected && (fDetectAbsoluteRelPath = urlRelative.DetectAbsolutePath())))
  5291. {
  5292. if (!fRelativeServerDetected)
  5293. {
  5294. pusOut->RestoreFlags();
  5295. }
  5296. if (fDetectAbsoluteRelPath && urlRelative.DetectDots(NULL))
  5297. {
  5298. urlRelative.StopPathCompression();
  5299. }
  5300. urlRelative.FeedPath(pusOut);
  5301. urlBase.Reset();
  5302. }
  5303. else if (urlBase.DetectPath())
  5304. {
  5305. urlBase.FeedPath(pusOut);
  5306. // We don't want to contract the base path's free segment if
  5307. // a. the scheme is opaque
  5308. // b. the relative url has a path
  5309. // c. the relative url has no path, just a fragment/query
  5310. if (!(urlBase.GetSchemeNotes() & UPF_SCHEME_OPAQUE))
  5311. {
  5312. pusOut->RestoreFlags();
  5313. if (urlRelative.DetectPath()
  5314. || !urlRelative.DetectQueryOrFragment())
  5315. {
  5316. if (urlRelative.DetectPath() || !fDetectedRelScheme)
  5317. {
  5318. pusOut->Contract(FALSE);
  5319. }
  5320. if (fDetectedRelScheme)
  5321. {
  5322. urlRelative.StopPathCompression();
  5323. }
  5324. urlRelative.FeedPath(pusOut, FALSE);
  5325. urlBase.Reset();
  5326. }
  5327. else
  5328. {
  5329. urlRelative.FeedPath(pusOut, FALSE);
  5330. }
  5331. }
  5332. else
  5333. {
  5334. urlRelative.StopPathCompression();
  5335. urlRelative.FeedPath(pusOut, FALSE);
  5336. }
  5337. }
  5338. else if (urlRelative.DetectPath())
  5339. {
  5340. if (!fRelativeServerDetected)
  5341. {
  5342. pusOut->RestoreFlags();
  5343. }
  5344. else if (urlRelative.DetectDots(NULL))
  5345. {
  5346. urlRelative.StopPathCompression();
  5347. }
  5348. urlRelative.FeedPath(pusOut);
  5349. urlBase.Reset();
  5350. }
  5351. pusOut->ClearMark();
  5352. pusOut->DisableSlashFixing();
  5353. // -- QUERY AND FRAGMENT -----------------------------------------------------------
  5354. // Figure out the query
  5355. if (urlBase.DetectQueryOrFragment())
  5356. {
  5357. urlBase.FeedQueryAndFragment(pusOut);
  5358. }
  5359. if (urlRelative.DetectQueryOrFragment())
  5360. {
  5361. urlRelative.FeedQueryAndFragment(pusOut);
  5362. }
  5363. pusOut->CleanAccept(L'\0');
  5364. if (pusOut->AnyProblems())
  5365. {
  5366. hr = E_OUTOFMEMORY;
  5367. }
  5368. exit:
  5369. return hr;
  5370. }
  5371. HRESULT
  5372. FormUrlCombineResultW(LPCWSTR pszBase,
  5373. LPCWSTR pszRelative,
  5374. LPWSTR pszCombined,
  5375. LPDWORD pcchCombined,
  5376. DWORD dwFlags)
  5377. {
  5378. if ((dwFlags & URL_ESCAPE_UNSAFE)
  5379. && (dwFlags & URL_ESCAPE_SPACES_ONLY))
  5380. {
  5381. // In the original parser, ESCAPE_SPACES_ONLY takes precedence over ESCAPE_UNSAFE
  5382. // Deactivate UNSAFE
  5383. dwFlags ^= URL_ESCAPE_UNSAFE;
  5384. }
  5385. DWORD dwTempFlags = dwFlags;
  5386. if (dwFlags & URL_UNESCAPE)
  5387. {
  5388. if (dwFlags & URL_ESCAPE_UNSAFE)
  5389. {
  5390. dwTempFlags ^= URL_ESCAPE_UNSAFE;
  5391. }
  5392. if (dwFlags & URL_ESCAPE_SPACES_ONLY)
  5393. {
  5394. dwTempFlags ^= URL_ESCAPE_SPACES_ONLY;
  5395. }
  5396. }
  5397. // Make a copy of the relative url if the client wants to either
  5398. // a. unescape and escape the URL (since roundtripping is not guaranteed), or
  5399. // b. use the same location for relative URL's buffer for the combined url
  5400. HRESULT hr;
  5401. URL curlBase, curlRelative;
  5402. curlBase.Setup((PWSTR)pszBase);
  5403. curlRelative.Setup((PWSTR)pszRelative);
  5404. URL_STRING us(dwTempFlags);
  5405. hr = BlendUrls(curlBase, curlRelative, &us, dwTempFlags);
  5406. if (SUCCEEDED(hr))
  5407. {
  5408. DWORD ccBuffer = us.GetTotalLength();
  5409. if ((dwFlags & URL_UNESCAPE)
  5410. && (dwFlags & (URL_ESCAPE_UNSAFE | URL_ESCAPE_SPACES_ONLY)))
  5411. {
  5412. // No need to strip out URL_UNESCAPE
  5413. hr = UrlEscapeW(us.GetStart(), pszCombined, pcchCombined, dwFlags);
  5414. goto exit;
  5415. }
  5416. if (ccBuffer > *pcchCombined)
  5417. {
  5418. hr = E_POINTER;
  5419. }
  5420. else if (pszCombined)
  5421. {
  5422. memcpy(pszCombined, us.GetStart(), ccBuffer*sizeof(WCHAR));
  5423. // We return only the number of characters, not buffer size required.
  5424. ccBuffer--;
  5425. }
  5426. *pcchCombined = ccBuffer;
  5427. }
  5428. else if (hr==E_FAIL)
  5429. {
  5430. // ASSERT(((dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY)));
  5431. // We fall back on the original parser for those cases we don't handle yet.
  5432. // We should do this if and only if the new parser
  5433. // doesn't handle the flags cited above
  5434. // or we're passed a pluggable protocol without the forcing flag.
  5435. SHSTRW strwOut;
  5436. hr = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags);
  5437. if(SUCCEEDED(hr))
  5438. {
  5439. hr = ReconcileHresults(hr, CopyOutW(&strwOut, pszCombined, pcchCombined));
  5440. }
  5441. }
  5442. exit:
  5443. return hr;
  5444. }
  5445. HRESULT
  5446. FormUrlCombineResultA(LPCSTR pszBase,
  5447. LPCSTR pszRelative,
  5448. LPSTR pszCombined,
  5449. LPDWORD pcchCombined,
  5450. DWORD dwFlags)
  5451. {
  5452. if ((dwFlags & URL_ESCAPE_UNSAFE)
  5453. &&
  5454. (dwFlags & URL_ESCAPE_SPACES_ONLY))
  5455. {
  5456. // In the original parser, ESCAPE_SPACES_ONLY takes precedence over ESCAPE_UNSAFE
  5457. // Deactivate UNSAFE
  5458. dwFlags ^= URL_ESCAPE_UNSAFE;
  5459. }
  5460. // Make a copy of the relative url if the client wants to either
  5461. // a. unescape and escape the URL (since roundtripping is not guaranteed), or
  5462. // b. use the same location for relative URL's buffer for the combined url
  5463. SHSTRW strwBase;
  5464. SHSTRW strwRelative;
  5465. HRESULT hr;
  5466. if (!(SUCCEEDED(strwBase.SetStr(pszBase)) && SUCCEEDED(strwRelative.SetStr(pszRelative))))
  5467. {
  5468. return E_OUTOFMEMORY;
  5469. }
  5470. DWORD dwTempFlags = dwFlags;
  5471. if (dwFlags & URL_UNESCAPE)
  5472. {
  5473. if (dwFlags & URL_ESCAPE_UNSAFE)
  5474. {
  5475. dwTempFlags ^= URL_ESCAPE_UNSAFE;
  5476. }
  5477. if (dwFlags & URL_ESCAPE_SPACES_ONLY)
  5478. {
  5479. dwTempFlags ^= URL_ESCAPE_SPACES_ONLY;
  5480. }
  5481. }
  5482. URL curlBase, curlRelative;
  5483. curlBase.Setup(strwBase);
  5484. curlRelative.Setup(strwRelative);
  5485. URL_STRING us(dwTempFlags);
  5486. hr = BlendUrls(curlBase, curlRelative, &us, dwTempFlags);
  5487. if (SUCCEEDED(hr))
  5488. {
  5489. SHSTRA straOut;
  5490. if ((dwFlags & URL_UNESCAPE)
  5491. && (dwFlags & (URL_ESCAPE_UNSAFE | URL_ESCAPE_SPACES_ONLY)))
  5492. {
  5493. SHSTRW strwTemp;
  5494. // No need to strip out URL_UNESCAPE
  5495. hr = SHUrlEscape(us.GetStart(), &strwTemp, dwFlags);
  5496. hr = ReconcileHresults(hr, straOut.SetStr(strwTemp));
  5497. }
  5498. else
  5499. {
  5500. hr = straOut.SetStr(us.GetStart());
  5501. }
  5502. if (SUCCEEDED(hr))
  5503. {
  5504. hr = CopyOutA(&straOut, pszCombined, pcchCombined);
  5505. }
  5506. }
  5507. else if (hr==E_FAIL)
  5508. {
  5509. // ASSERT(((dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY)));
  5510. // We fall back on the original parser for those cases we don't handle yet.
  5511. // We should do this if and only if the new parser
  5512. // doesn't handle the flags cited above
  5513. SHSTRW strwOut;
  5514. hr = SHUrlParse(strwBase, strwRelative, &strwOut, dwFlags);
  5515. if (SUCCEEDED(hr))
  5516. {
  5517. SHSTRA straOut;
  5518. hr = ReconcileHresults(hr, straOut.SetStr(strwOut));
  5519. if(SUCCEEDED(hr))
  5520. hr = ReconcileHresults(hr, CopyOutA(&straOut, pszCombined, pcchCombined));
  5521. }
  5522. }
  5523. return hr;
  5524. }
  5525. #ifdef PROOFREAD_PARSES
  5526. EXTERN_C DWORD g_dwProofMode;
  5527. enum
  5528. {
  5529. PP_COMPARE,
  5530. PP_ORIGINAL_ONLY,
  5531. PP_NEW_ONLY
  5532. };
  5533. //#define SHOW_MESSAGEBOX
  5534. VOID LogData(PWSTR pszMsg)
  5535. {
  5536. SHSTRA str;
  5537. str.SetStr(pszMsg);
  5538. CHAR szFileName[MAX_PATH];
  5539. DWORD dwSize = MAX_PATH;
  5540. CHAR szComputerName[MAX_PATH];
  5541. HANDLE hResultsFile = NULL;
  5542. strcpy(szFileName, "\\\\BANYAN\\IPTD\\AKABIR\\1315\\");
  5543. if (!GetComputerNameA(szComputerName, &dwSize))
  5544. {
  5545. goto exit;
  5546. }
  5547. lstrcatA(szFileName, szComputerName);
  5548. hResultsFile = CreateFileA( szFileName,
  5549. GENERIC_WRITE,
  5550. FILE_SHARE_WRITE | FILE_SHARE_READ,
  5551. NULL,
  5552. OPEN_ALWAYS,
  5553. 0,
  5554. NULL);
  5555. if (INVALID_HANDLE_VALUE == hResultsFile)
  5556. hResultsFile = NULL;
  5557. if (hResultsFile)
  5558. {
  5559. if (SetFilePointer(hResultsFile, 0, NULL, FILE_END)==0xFFFFFFFF)
  5560. {
  5561. goto exit;
  5562. }
  5563. DWORD dwFoo;
  5564. if (0==WriteFile(hResultsFile, (PVOID)(PSTR)str, lstrlenW(pszMsg), &dwFoo, NULL))
  5565. {
  5566. DWORD dwE = GetLastError();
  5567. }
  5568. }
  5569. exit:
  5570. if (hResultsFile)
  5571. {
  5572. CloseHandle(hResultsFile);
  5573. }
  5574. }
  5575. HRESULT ProofreadParses(HRESULT hr,
  5576. LPCWSTR pszBase,
  5577. LPCWSTR pszRelative,
  5578. LPWSTR pszCombined,
  5579. PDWORD pcchCombined,
  5580. DWORD dwFlags,
  5581. DWORD dwSize
  5582. )
  5583. {
  5584. static WCHAR szLast[2084];
  5585. SHSTRW strwOut;
  5586. switch(g_dwProofMode)
  5587. {
  5588. case PP_COMPARE:
  5589. {
  5590. HRESULT hr2 = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags);
  5591. WCHAR wstr[2084];
  5592. DWORD ccLen = min(2084, dwSize), ccUrl = SUCCEEDED(hr) ? *pcchCombined : 0;
  5593. if(SUCCEEDED(hr2))
  5594. {
  5595. hr2 = CopyOutW(&strwOut, wstr, &ccLen);
  5596. if (hr2 == E_POINTER && hr == E_POINTER)
  5597. {
  5598. goto exitpoint;
  5599. }
  5600. // Check if cached combine equals the new parser's result
  5601. if (!StrCmpW(pszCombined, szLast))
  5602. {
  5603. goto exitpoint;
  5604. }
  5605. // Check if cached combine equals the old parser's result
  5606. if (!StrCmpW(wstr, szLast))
  5607. {
  5608. *pcchCombined = ccLen;
  5609. StrCpyNW(pszCombined, wstr, ccLen + 1);
  5610. hr = hr2;
  5611. goto exitpoint;
  5612. }
  5613. if (SUCCEEDED(hr))
  5614. {
  5615. StrCpyNW(szLast, wstr, ccLen);
  5616. if (!StrCmpW(wstr, pszCombined))
  5617. {
  5618. goto exitpoint;
  5619. }
  5620. DWORD dwBogus;
  5621. if ((dwFlags & URL_ESCAPE_SPACES_ONLY) && !(dwFlags & URL_UNESCAPE))
  5622. {
  5623. PCWSTR psz = FindSchemeW(pszCombined, &dwBogus);
  5624. DWORD dw;
  5625. if (psz
  5626. &&
  5627. (URL_SCHEME_UNKNOWN
  5628. !=GetSchemeTypeAndFlagsW(psz, dwBogus, &dw))
  5629. &&
  5630. (dw & UPF_SCHEME_OPAQUE))
  5631. {
  5632. goto exitpoint;
  5633. }
  5634. }
  5635. // Filter
  5636. // base: "http://foo/bar/"
  5637. // rel: ""
  5638. // old: "http://foo/bar"
  5639. // new: "http://foo/bar/"
  5640. if ((*pszRelative==L'\0')
  5641. &&
  5642. (!StrCmpNW(pszCombined, wstr, ccLen))
  5643. &&
  5644. (ccUrl==(ccLen+1))
  5645. &&
  5646. (pszCombined[ccLen]==L'/'))
  5647. {
  5648. goto exitpoint;
  5649. }
  5650. // Filter
  5651. // base: "http://foo/bar/what?ho"
  5652. // rel: ""
  5653. // old: "http://foo/bar/?ho"
  5654. // new: "http://foo/bar/"
  5655. if ((*pszRelative==L'\0')
  5656. &&
  5657. (!StrCmpNW(pszCombined, wstr, ccUrl))
  5658. &&
  5659. (wstr[ccUrl]==QUERY))
  5660. {
  5661. goto exitpoint;
  5662. }
  5663. // Filter
  5664. // base: "http://foo/bar/what?ho"
  5665. // rel: "/"
  5666. // old: "http://foo"
  5667. // new: "http://foo/"
  5668. if ((*pszRelative==L'/')
  5669. &&
  5670. (!StrCmpNW(pszCombined, wstr, ccLen))
  5671. &&
  5672. (ccUrl==(ccLen+1))
  5673. &&
  5674. (pszCombined[ccLen]==L'/'))
  5675. {
  5676. goto exitpoint;
  5677. }
  5678. WCHAR wmsg[8192];
  5679. wnsprintfW(wmsg,
  5680. ARRAYSIZE(wmsg),
  5681. L"Flags:%#x\nBase:\"%s\"\nRelative:\"%s\"\nOriginal result:\"%s\"\nNew result:\"%s\"\nUse original, not new, result?\n",
  5682. dwFlags,
  5683. pszBase,
  5684. pszRelative,
  5685. wstr,
  5686. pszCombined
  5687. );
  5688. #ifdef SHOW_MESSAGEBOX
  5689. if (IDYES==MessageBoxW(
  5690. NULL,
  5691. wmsg,
  5692. L"CONTACT AKABIR: URLCOMBINE FAILURE",
  5693. MB_YESNO | MB_ICONERROR | MB_TASKMODAL))
  5694. {
  5695. StrCpyNW(pszCombined, wstr, dwSize);
  5696. *pcchCombined = ccLen;
  5697. }
  5698. else
  5699. {
  5700. StrCpyNW(szLast, pszCombined, *pcchCombined);
  5701. }
  5702. #endif
  5703. LogData(wmsg);
  5704. }
  5705. else
  5706. {
  5707. WCHAR wmsg[8192];
  5708. wnsprintfW(wmsg,
  5709. ARRAYSIZE(wmsg),
  5710. L"Flags:%#x\nBase:\"%s\"\nRelative:\"%s\"\nFAILED:%#x\nExpected:\"%s\"\n",
  5711. dwFlags,
  5712. pszBase,
  5713. pszRelative,
  5714. hr,
  5715. wstr);
  5716. #ifdef SHOW_MESSAGEBOX
  5717. MessageBoxW(
  5718. NULL,
  5719. wmsg,
  5720. L"CONTACT AKABIR: URLCOMBINE FAILURE",
  5721. MB_OK | MB_ICONERROR | MB_TASKMODAL);
  5722. #endif
  5723. StrCpyNW(pszCombined, wstr, dwSize);
  5724. *pcchCombined = ccLen;
  5725. LogData(wmsg);
  5726. }
  5727. hr = hr2;
  5728. }
  5729. }
  5730. break;
  5731. case PP_NEW_ONLY:
  5732. break;
  5733. case PP_ORIGINAL_ONLY:
  5734. {
  5735. hr = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags);
  5736. if(SUCCEEDED(hr))
  5737. {
  5738. hr = CopyOutW(&strwOut, pszCombined, pcchCombined);
  5739. }
  5740. }
  5741. break;
  5742. }
  5743. exitpoint:
  5744. return hr;
  5745. }
  5746. #endif //PROOFREAD_PARSES
  5747. LWSTDAPI
  5748. UrlCombineW(LPCWSTR pszBase,
  5749. LPCWSTR pszRelative,
  5750. LPWSTR pszCombined,
  5751. LPDWORD pcchCombined,
  5752. DWORD dwFlags)
  5753. {
  5754. HRESULT hr = E_INVALIDARG;
  5755. if (pszBase && pszRelative && pcchCombined)
  5756. {
  5757. RIP(IS_VALID_STRING_PTRW(pszBase, INTERNET_MAX_PATH_LENGTH));
  5758. RIP(IS_VALID_STRING_PTRW(pszRelative, INTERNET_MAX_PATH_LENGTH));
  5759. RIP(IS_VALID_WRITE_PTR(pcchCombined, DWORD));
  5760. RIP((!pszCombined || IS_VALID_WRITE_BUFFER(pszCombined, WCHAR, *pcchCombined)));
  5761. #ifdef PROOFREAD_PARSES
  5762. DWORD dwSize = *pcchCombined;
  5763. #endif
  5764. hr = FormUrlCombineResultW(pszBase, pszRelative, pszCombined, pcchCombined, dwFlags);
  5765. #ifdef PROOFREAD_PARSES
  5766. hr = ProofreadParses(hr, pszBase, pszRelative, pszCombined, pcchCombined, dwFlags, dwSize);
  5767. #endif
  5768. }
  5769. return hr;
  5770. }
  5771. LWSTDAPI
  5772. UrlCombineA(LPCSTR pszBase,
  5773. LPCSTR pszRelative,
  5774. LPSTR pszOut,
  5775. LPDWORD pcchOut,
  5776. DWORD dwFlags)
  5777. {
  5778. HRESULT hr;
  5779. if (!pszBase
  5780. || !pszRelative
  5781. || !pcchOut)
  5782. {
  5783. hr = E_INVALIDARG;
  5784. }
  5785. else
  5786. {
  5787. RIP(IS_VALID_STRING_PTRA(pszBase, INTERNET_MAX_PATH_LENGTH));
  5788. RIP(IS_VALID_STRING_PTRA(pszRelative, INTERNET_MAX_PATH_LENGTH));
  5789. RIP(IS_VALID_WRITE_PTR(pcchOut, DWORD));
  5790. RIP((!pszOut || IS_VALID_WRITE_BUFFER(pszOut, CHAR, *pcchOut)));
  5791. hr = FormUrlCombineResultA(pszBase, pszRelative, pszOut, pcchOut, dwFlags);
  5792. }
  5793. return hr;
  5794. }
  5795. #else // end USE_FAST_PARSER
  5796. LWSTDAPI
  5797. UrlCombineW(LPCWSTR pszBase,
  5798. LPCWSTR pszRelative,
  5799. LPWSTR pszCombined,
  5800. LPDWORD pcchCombined,
  5801. DWORD dwFlags)
  5802. {
  5803. HRESULT hr = E_INVALIDARG;
  5804. RIPMSG(pszBase && IS_VALID_STRING_PTRW(pszBase, -1), "UrlCombineW: Caller passed invalid pszBase");
  5805. RIPMSG(pszRelative && IS_VALID_STRING_PTRW(pszRelative, -1), "UrlCombineW: Caller passed invalid pszRelative");
  5806. RIPMSG(NULL!=pcchOut, "UrlCombineW: Caller passed invalid pcchOut");
  5807. RIPMSG(NULL==pcchOut || (pszOut && IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCombineW: Caller passed invalid pszOut");
  5808. #ifdef DEBUG
  5809. if (pcchOut)
  5810. {
  5811. if (pszOut == pszBase || pszOut == pszRelative)
  5812. DEBUGWhackPathStringW(pszOut, *pcchOut);
  5813. else
  5814. DEBUGWhackPathBufferW(pszOut, *pcchOut);
  5815. }
  5816. #endif
  5817. if (pszBase && pszRelative && pcchCombined)
  5818. {
  5819. SHSTRW strwOut;
  5820. hr = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags);
  5821. if(SUCCEEDED(hr))
  5822. {
  5823. hr = CopyOutW(&strwOut, pszCombined, pcchCombined);
  5824. }
  5825. }
  5826. return hr;
  5827. }
  5828. LWSTDAPI
  5829. UrlCombineA(LPCSTR pszBase,
  5830. LPCSTR pszRelative,
  5831. LPSTR pszOut,
  5832. LPDWORD pcchOut,
  5833. DWORD dwFlags)
  5834. {
  5835. HRESULT hr;
  5836. SHSTRA straOut;
  5837. RIPMSG(pszBase && IS_VALID_STRING_PTRA(pszBase, -1), "UrlCombineA: Caller passed invalid pszBase");
  5838. RIPMSG(pszRelative && IS_VALID_STRING_PTRA(pszRelative, -1), "UrlCombineA: Caller passed invalid pszRelative");
  5839. RIPMSG(NULL!=pcchOut, "UrlCombineA: Caller passed invalid pcchOut");
  5840. RIPMSG(NULL==pcchOut || (pszOut && IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCombineA: Caller passed invalid pszOut");
  5841. #ifdef DEBUG
  5842. if (pcchOut)
  5843. {
  5844. if (pszOut == pszBase || pszOut == pszRelative)
  5845. DEBUGWhackPathStringA(pszOut, *pcchOut);
  5846. else
  5847. DEBUGWhackPathBufferA(pszOut, *pcchOut);
  5848. }
  5849. #endif
  5850. if (!pszBase || !pszRelative || !pcchOut)
  5851. hr = E_INVALIDARG;
  5852. else
  5853. {
  5854. SHSTRW strwOut;
  5855. SHSTRW strwBase;
  5856. SHSTRW strwRelative;
  5857. if(SUCCEEDED(strwBase.SetStr(pszBase)) && SUCCEEDED(strwRelative.SetStr(pszRelative)))
  5858. hr = SHUrlParse((LPWSTR) strwBase, (LPWSTR)strwRelative, &strwOut, dwFlags);
  5859. else
  5860. hr = E_OUTOFMEMORY;
  5861. if(SUCCEEDED(hr))
  5862. hr = straOut.SetStr(strwOut);
  5863. }
  5864. if(SUCCEEDED(hr) )
  5865. hr = CopyOutA(&straOut, pszOut, pcchOut);
  5866. return hr;
  5867. }
  5868. #endif // !USE_FAST_PARSER
  5869. //
  5870. // Combines the desired scheme with the string after the scheme with a : in between. For
  5871. // some protocols, a // is placed after the colon.
  5872. //
  5873. PRIVATE HRESULT ColonSlashSlashW
  5874. (
  5875. LPCWSTR pszScheme, // url protocol (lower-case)
  5876. LPCWSTR pszAfterScheme, // string to append after the protocol
  5877. LPWSTR pszTranslatedUrl, // output buffer
  5878. int cchMax // size of output buffer
  5879. )
  5880. {
  5881. StrCpyNW(pszTranslatedUrl, pszScheme, cchMax);
  5882. // Append : after scheme and possibly a // as well.
  5883. int cchScheme = lstrlenW(pszScheme);
  5884. if (cchMax > cchScheme + 3)
  5885. {
  5886. pszTranslatedUrl[cchScheme] = L':';
  5887. // Number of characters to skip over in the buffer (how many non alphanums originally
  5888. // followed the protocol)
  5889. int cchSkip = 0;
  5890. // Number of characters past the protocol: to skip over in the URL (Do we insert slashes?)
  5891. int cchSlashes = 0;
  5892. // Modify this conditional to include any other protocols to always follow with ://
  5893. // Right now, http, https and ftp are automatic
  5894. if (!StrCmpW(pszScheme, L"http") || !StrCmpW(pszScheme, L"ftp") || !StrCmpW(pszScheme, L"https") )
  5895. {
  5896. //
  5897. // When preparing to copy the contents of pszAfterScheme into pszUrl, we can
  5898. // skip over as many as 3 non alpha numeric characters, since we are adding ://
  5899. // to the protocol directly
  5900. //
  5901. while ((cchSkip < 3) && pszAfterScheme[cchSkip] && !IsCharAlphaNumericW(pszAfterScheme[cchSkip]))
  5902. {
  5903. cchSkip++;
  5904. }
  5905. pszTranslatedUrl[cchScheme+1] = L'/';
  5906. pszTranslatedUrl[cchScheme+2] = L'/';
  5907. pszTranslatedUrl[cchScheme+3] = L'\0';
  5908. cchSlashes = 2;
  5909. }
  5910. else
  5911. // some other protocol
  5912. {
  5913. // just skip over colon
  5914. cchSkip = 1;
  5915. pszTranslatedUrl[cchScheme+1] = L'\0';
  5916. }
  5917. // Copy the rest of the Url from the UrlBuffer into the Url
  5918. StrCatBuffW(pszTranslatedUrl, pszAfterScheme + cchSkip, cchMax);
  5919. }
  5920. return S_OK;
  5921. }
  5922. //
  5923. // Scans the url for a scheme and if it does not match the known schemes, then
  5924. // a closest match is found.
  5925. //
  5926. LWSTDAPI
  5927. UrlFixupW
  5928. (
  5929. LPCWSTR pcszUrl, // URL to correct
  5930. LPWSTR pszTranslatedUrl, // buffer for corrected url (can be same as pcszUrl)
  5931. DWORD cchMax // size of pszTranslatedUrl
  5932. )
  5933. {
  5934. HRESULT hr = S_OK;
  5935. //
  5936. // Find the scheme
  5937. //
  5938. WCHAR szScheme[INTERNET_MAX_SCHEME_LENGTH];
  5939. ULONG cchScheme = 0;
  5940. LPCWSTR pszScheme = FindSchemeW(pcszUrl, &cchScheme, TRUE);
  5941. if (NULL == pszScheme || cchScheme > (ARRAYSIZE(szScheme)-1))
  5942. {
  5943. // No scheme found
  5944. return S_FALSE;
  5945. }
  5946. for (ULONG cch=0; cch < cchScheme; ++cch, ++pszScheme)
  5947. {
  5948. szScheme[cch] = Ascii_ToLowerW(*pszScheme);
  5949. }
  5950. szScheme[cch] = L'\0';
  5951. LPCWSTR pszAfterScheme = pszScheme;
  5952. //
  5953. // If input and output buffers are the same, copy the stuff after the scheme
  5954. // to another buffer so it doesn't get clobbered when we recombine.
  5955. //
  5956. WCHAR szBuf[INTERNET_MAX_PATH_LENGTH];
  5957. if (pcszUrl == pszTranslatedUrl)
  5958. {
  5959. StrCpyNW(szBuf, pszAfterScheme, ARRAYSIZE(szBuf));
  5960. pszAfterScheme = szBuf;
  5961. }
  5962. //
  5963. // See if it matches any of our known schemes
  5964. //
  5965. BOOL fKnownScheme = FALSE;
  5966. for (ULONG i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); ++i)
  5967. {
  5968. if (StrCmpW(szScheme, g_mpUrlSchemeTypes[i].pszScheme) == 0)
  5969. {
  5970. fKnownScheme = TRUE;
  5971. break;
  5972. }
  5973. }
  5974. //
  5975. // If it matches a known scheme, then just fix :// if it's ftp or http
  5976. //
  5977. if (fKnownScheme ||
  5978. // Check for pluggable protocols too
  5979. NO_ERROR == SHGetValueW(HKEY_CLASSES_ROOT, szScheme, L"URL Protocol",
  5980. NULL, NULL, NULL))
  5981. {
  5982. ColonSlashSlashW(szScheme, pszAfterScheme, pszTranslatedUrl, cchMax);
  5983. return S_OK;
  5984. }
  5985. //
  5986. // Try to find a good match for the mispelled scheme
  5987. //
  5988. // These are weights used in the heuristic for the protocol matching
  5989. // iFloor is roughly the minimum percentage of characters that must match in
  5990. // order to make a change
  5991. const int cFloor = 60;
  5992. // A match in the first character has the greatest weight
  5993. const int cCorrectFirstChar = 150;
  5994. // Any other matched character
  5995. const int cCorrectChar = 100;
  5996. // The weight given to a character that only matches the preceding
  5997. // or subsequent character in the protocol
  5998. const int cOffByOneChar = 80;
  5999. // We penalize characters that are off by one, but if we have already
  6000. // observed the offset and subsequent characters continue the offset, we add this
  6001. const int cOffsetBonus = 20;
  6002. // The value of the best "match" found so far. Higher is a better match.
  6003. int iBestEval = 0;
  6004. // The protocol that's the best fit for the misspelled one
  6005. LPCWSTR pszBestMatch = NULL;
  6006. ULONG cchProt;
  6007. for (ULONG j = 0; j < ARRAYSIZE(g_mpUrlSchemeTypes); ++j)
  6008. {
  6009. // Is this one we don't correct to?
  6010. //
  6011. // Note: https is removed from this list. The potential for an intended "http" to
  6012. // be corrected to "https" is too high, and "http" is far more common. All this
  6013. // means is that if someone wants to get to an https site, they have to have it right.
  6014. //
  6015. if (IsFlagSet(g_mpUrlSchemeTypes[j].dwFlags, UPF_SCHEME_DONTCORRECT))
  6016. continue;
  6017. LPCWSTR pszProtocol = g_mpUrlSchemeTypes[j].pszScheme;
  6018. cchProt = g_mpUrlSchemeTypes[j].cchScheme;
  6019. // Evaluation of the fit of the currently tested protocol
  6020. int iEval = 0;
  6021. //
  6022. // Keep track of the positive or negative offset in the protocol
  6023. // such as "qhttp" instead of "http" or "elnet" instead of "telnet'
  6024. //
  6025. int iPosOffset = 0;
  6026. int iNegOffset = 0;
  6027. //
  6028. // The first character has the most weight. "htp" corrects
  6029. // to "http" and not "ftp" "ftt" corrects to "ftp"
  6030. //
  6031. if (*szScheme == *pszProtocol)
  6032. {
  6033. iEval += cCorrectFirstChar;
  6034. }
  6035. // Check for a negative offset
  6036. else if(*szScheme == pszProtocol[1])
  6037. {
  6038. iEval += cOffByOneChar;
  6039. iNegOffset = 1;
  6040. }
  6041. //
  6042. // We go through the characters in the protocol, even to the
  6043. // terminating null if iPosOffset == 1 (it is never more than 1)
  6044. // This is so the final "p" in "qhttp" gets a chance to be compared
  6045. //
  6046. for (i=1; i < cchProt + iPosOffset; i++)
  6047. {
  6048. // No points for null terminations matching
  6049. if (szScheme[i] == L'\0')
  6050. break;
  6051. //
  6052. // Check for adjacent character match
  6053. //
  6054. if (szScheme[i] == pszProtocol[i])
  6055. {
  6056. iEval += cCorrectChar;
  6057. }
  6058. else
  6059. {
  6060. if (szScheme[i] == pszProtocol[i - 1])
  6061. {
  6062. iEval += cOffByOneChar;
  6063. if (iPosOffset)
  6064. iEval += cOffsetBonus;
  6065. else
  6066. iPosOffset = 1;
  6067. }
  6068. else
  6069. {
  6070. if(szScheme[i] == pszProtocol[i + 1])
  6071. {
  6072. iEval += cOffByOneChar;
  6073. if (iNegOffset)
  6074. iEval += cOffsetBonus;
  6075. else
  6076. iNegOffset = 1;
  6077. }
  6078. }
  6079. }
  6080. }
  6081. // Divide the Evaluated value by the MAX(cchScheme, cchProt)
  6082. iEval = iEval / (cchScheme > cchProt ? cchScheme : cchProt);
  6083. // A new best match?
  6084. if (iEval > iBestEval)
  6085. {
  6086. iBestEval = iEval;
  6087. pszBestMatch = pszProtocol;
  6088. //
  6089. // If we found an unquestionably good match (only 1 non-firstchar typo),
  6090. // break out of the loop
  6091. //
  6092. if (iEval >= 100)
  6093. break;
  6094. }
  6095. }
  6096. // If a good enough match was found, then correct url
  6097. if (iBestEval >= cFloor)
  6098. {
  6099. ColonSlashSlashW(pszBestMatch, pszAfterScheme, pszTranslatedUrl,cchMax);
  6100. }
  6101. else
  6102. {
  6103. hr = S_FALSE;
  6104. }
  6105. return hr;
  6106. }
  6107. // This is a port of InternetCrackUrl from wininet.
  6108. // NTRAID:108139 akabir We REALLY NEED TO CLEAN THIS CODE UP.
  6109. // RAID 109209
  6110. // A lot of the stuff is redundant with the other code available, but we
  6111. // need to be careful not to cause any regressions. Thus, I'm leaving it in for now.
  6112. //
  6113. // UrlSchemeList - the list of schemes that we support
  6114. //
  6115. typedef struct {
  6116. LPWSTR SchemeName;
  6117. DWORD SchemeLength;
  6118. SHINTERNET_SCHEME SchemeType;
  6119. BOOL NeedSlashes;
  6120. } URL_SCHEME_INFO;
  6121. #define UrlUnescapeInPlaceW(pszUrl, dwFlags) UrlUnescapeW(pszUrl, NULL, NULL, dwFlags | URL_UNESCAPE_INPLACE)
  6122. // NOTE MEGA REDUNDANCY. We could use the similar table above and check for opaque. However
  6123. // we'd have to modify that table
  6124. PRIVATE
  6125. URL_SCHEME_INFO
  6126. UrlSchemeList[] = {
  6127. NULL, 0, SHINTERNET_SCHEME_DEFAULT, FALSE,
  6128. L"ftp", 3, SHINTERNET_SCHEME_FTP, TRUE,
  6129. L"gopher", 6, SHINTERNET_SCHEME_GOPHER, TRUE,
  6130. L"http", 4, SHINTERNET_SCHEME_HTTP, TRUE,
  6131. L"https", 5, SHINTERNET_SCHEME_HTTPS, TRUE,
  6132. L"file", 4, SHINTERNET_SCHEME_FILE, TRUE,
  6133. L"news", 4, SHINTERNET_SCHEME_NEWS, FALSE,
  6134. L"mailto", 6, SHINTERNET_SCHEME_MAILTO, FALSE,
  6135. L"socks", 5, SHINTERNET_SCHEME_SOCKS, FALSE,
  6136. L"javascript", 10, SHINTERNET_SCHEME_JAVASCRIPT, FALSE,
  6137. L"vbscript", 8, SHINTERNET_SCHEME_VBSCRIPT, FALSE,
  6138. L"res", 3, SHINTERNET_SCHEME_RES, TRUE
  6139. };
  6140. #define NUMBER_OF_URL_SCHEMES ARRAYSIZE(UrlSchemeList)
  6141. // swiped from wininet\macros.h
  6142. #define IsDigit(c) (((c) >= L'0') && ((c) <= L'9'))
  6143. #define ARGUMENT_PRESENT(ArgumentPointer) (\
  6144. (CHAR *)(ArgumentPointer) != (CHAR *)(NULL) )
  6145. BOOL ScanSchemes(LPWSTR pszToCheck, DWORD ccStr, PDWORD pwResult)
  6146. {
  6147. for (DWORD i=0; i<NUMBER_OF_URL_SCHEMES; i++)
  6148. {
  6149. if ((UrlSchemeList[i].SchemeLength == ccStr)
  6150. && (StrCmpNIW(UrlSchemeList[i].SchemeName, pszToCheck, ccStr)==0))
  6151. {
  6152. *pwResult = i;
  6153. return TRUE;
  6154. }
  6155. }
  6156. return FALSE;
  6157. }
  6158. #define ProbeWriteStringBufferW(a, b) ProbeWriteBuffer((LPVOID)a, (b*sizeof(WCHAR)));
  6159. #define PAGE_SIZE 4096
  6160. DWORD
  6161. ProbeWriteBuffer(
  6162. IN LPVOID lpBuffer,
  6163. IN DWORD dwBufferLength
  6164. )
  6165. /*++
  6166. Routine Description:
  6167. Probes a buffer for writeability. Used as part of API parameter validation,
  6168. this function tests the first and last locations in a buffer. This is not
  6169. as strict as the IsBadXPtr() Windows APIs, but it means we don't have to
  6170. test every location in the buffer
  6171. Arguments:
  6172. lpBuffer - pointer to buffer to test
  6173. dwBufferLength - length of buffer
  6174. Return Value:
  6175. DWORD
  6176. Success - ERROR_SUCCESS
  6177. Failure - ERROR_INVALID_PARAMETER
  6178. --*/
  6179. {
  6180. DWORD error;
  6181. //
  6182. // the buffer can be NULL if the probe length is 0. Otherwise, its an error
  6183. //
  6184. if (lpBuffer == NULL) {
  6185. error = (dwBufferLength == 0) ? ERROR_SUCCESS : ERROR_INVALID_PARAMETER;
  6186. } else if (dwBufferLength != 0) {
  6187. __try {
  6188. LPBYTE p;
  6189. LPBYTE end;
  6190. volatile BYTE b;
  6191. p = (LPBYTE)lpBuffer;
  6192. end = p + dwBufferLength - 1;
  6193. b = *end;
  6194. *end = b;
  6195. //
  6196. // visit every page in the buffer - it doesn't matter that we may
  6197. // test a character in the middle of a page
  6198. //
  6199. for (; p < end; p += PAGE_SIZE) {
  6200. b = *p;
  6201. *p = b;
  6202. }
  6203. error = ERROR_SUCCESS;
  6204. } __except(EXCEPTION_EXECUTE_HANDLER) {
  6205. error = ERROR_INVALID_PARAMETER;
  6206. }
  6207. ENDEXCEPT
  6208. } else {
  6209. //
  6210. // zero-length buffer
  6211. //
  6212. error = ERROR_SUCCESS;
  6213. }
  6214. return error;
  6215. }
  6216. DWORD
  6217. ProbeStringW(
  6218. IN LPWSTR lpString,
  6219. OUT LPDWORD lpdwStringLength
  6220. )
  6221. /*++
  6222. Routine Description:
  6223. Probes a wide string buffer for readability, and returns the length of the string
  6224. Arguments:
  6225. lpString - pointer to string to check
  6226. lpdwStringLength - returned length of string
  6227. Return Value:
  6228. DWORD
  6229. Success - ERROR_SUCCESS
  6230. Failure - ERROR_INVALID_PARAMETER
  6231. --*/
  6232. {
  6233. DWORD error;
  6234. DWORD length;
  6235. //
  6236. // initialize string length and return code
  6237. //
  6238. length = 0;
  6239. error = ERROR_SUCCESS;
  6240. //
  6241. // the buffer can be NULL
  6242. //
  6243. if (lpString != NULL) {
  6244. __try {
  6245. //
  6246. // unfortunately, for a string, we have to visit every location in
  6247. // the buffer to find the terminator
  6248. //
  6249. while (*lpString != '\0') {
  6250. ++length;
  6251. ++lpString;
  6252. }
  6253. } __except(EXCEPTION_EXECUTE_HANDLER) {
  6254. error = ERROR_INVALID_PARAMETER;
  6255. }
  6256. ENDEXCEPT
  6257. }
  6258. *lpdwStringLength = length;
  6259. return error;
  6260. }
  6261. DWORD
  6262. DecodeUrl(
  6263. IN LPWSTR Url,
  6264. IN DWORD UrlLength,
  6265. OUT LPWSTR DecodedString,
  6266. IN OUT LPDWORD DecodedLength
  6267. )
  6268. /*++
  6269. Routine Description:
  6270. Converts an URL string with embedded escape sequences (%xx) to a counted
  6271. string
  6272. It is safe to pass the same pointer for the string to convert, and the
  6273. buffer for the converted results: if the current character is not escaped,
  6274. it just gets overwritten, else the input pointer is moved ahead 2 characters
  6275. further than the output pointer, which is benign
  6276. Arguments:
  6277. Url - pointer to URL string to convert
  6278. UrlLength - number of characters in UrlString
  6279. DecodedString - pointer to buffer that receives converted string
  6280. DecodedLength - IN: number of characters in buffer
  6281. OUT: number of characters converted
  6282. Return Value:
  6283. DWORD
  6284. Success - ERROR_SUCCESS
  6285. Failure - ERROR_INTERNET_INVALID_URL
  6286. UrlString couldn't be converted
  6287. ERROR_INSUFFICIENT_BUFFER
  6288. ConvertedString isn't large enough to hold all the converted
  6289. UrlString
  6290. --*/
  6291. {
  6292. // NOTE We can replace this function with UrlUnescapeInPlace
  6293. DWORD bufferRemaining;
  6294. bufferRemaining = *DecodedLength;
  6295. while (UrlLength && bufferRemaining) {
  6296. WCHAR ch;
  6297. if (*Url == L'%') {
  6298. //
  6299. // REVIEW - would %00 ever appear in an URL?
  6300. //
  6301. if (IsHex(*(Url+1)) && IsHex(*(Url+2)))
  6302. {
  6303. ch = TranslateEscapedOctetW(Url);
  6304. Url += 3;
  6305. } else {
  6306. return ERROR_INTERNET_INVALID_URL;
  6307. }
  6308. UrlLength -= 3;
  6309. } else {
  6310. ch = *Url++;
  6311. --UrlLength;
  6312. }
  6313. *DecodedString++ = ch;
  6314. --bufferRemaining;
  6315. }
  6316. if (UrlLength == 0) {
  6317. *DecodedLength -= bufferRemaining;
  6318. return ERROR_SUCCESS;
  6319. } else {
  6320. return ERROR_INSUFFICIENT_BUFFER;
  6321. }
  6322. }
  6323. DWORD
  6324. DecodeUrlInSitu(
  6325. IN LPWSTR BufferAddress,
  6326. IN OUT LPDWORD BufferLength
  6327. )
  6328. /*++
  6329. Routine Description:
  6330. Decodes an URL string, if it contains escape sequences. The conversion is
  6331. done in place, since we know that a string containing escapes is longer than
  6332. the string with escape sequences (3 bytes) converted to characters (1 byte)
  6333. Arguments:
  6334. BufferAddress - pointer to the string to convert
  6335. BufferLength - IN: number of characters to convert
  6336. OUT: length of converted string
  6337. Return Value:
  6338. DWORD
  6339. Success - ERROR_SUCCESS
  6340. Failure - ERROR_INTERNET_INVALID_URL
  6341. ERROR_INSUFFICIENT_BUFFER
  6342. --*/
  6343. {
  6344. // NOTE We can replace this function with UrlUnescapeInPlace
  6345. DWORD stringLength = *BufferLength;
  6346. return DecodeUrl(BufferAddress,
  6347. stringLength,
  6348. BufferAddress,
  6349. BufferLength);
  6350. }
  6351. DWORD
  6352. GetUrlAddressInfo(
  6353. IN OUT LPWSTR* Url,
  6354. IN OUT LPDWORD UrlLength,
  6355. OUT LPWSTR* PartOne,
  6356. OUT LPDWORD PartOneLength,
  6357. OUT LPBOOL PartOneEscape,
  6358. OUT LPWSTR* PartTwo,
  6359. OUT LPDWORD PartTwoLength,
  6360. OUT LPBOOL PartTwoEscape
  6361. )
  6362. /*++
  6363. Routine Description:
  6364. Given a string of the form foo:bar, splits them into 2 counted strings about
  6365. the ':' character. The address string may or may not contain a ':'.
  6366. This function is intended to split into substrings the host:port and
  6367. username:password strings commonly used in Internet address specifications
  6368. and by association, in URLs
  6369. Arguments:
  6370. Url - pointer to pointer to string containing URL. On output
  6371. this is advanced past the address parts
  6372. UrlLength - pointer to length of URL in UrlString. On output this is
  6373. reduced by the number of characters parsed
  6374. PartOne - pointer which will receive first part of address string
  6375. PartOneLength - pointer which will receive length of first part of address
  6376. string
  6377. PartOneEscape - TRUE on output if PartOne contains escape sequences
  6378. PartTwo - pointer which will receive second part of address string
  6379. PartTwoLength - pointer which will receive length of second part of address
  6380. string
  6381. PartOneEscape - TRUE on output if PartTwo contains escape sequences
  6382. Return Value:
  6383. DWORD
  6384. Success - ERROR_SUCCESS
  6385. Failure - ERROR_INTERNET_INVALID_URL
  6386. --*/
  6387. {
  6388. LPWSTR pString;
  6389. LPWSTR pColon;
  6390. DWORD partLength;
  6391. LPBOOL partEscape;
  6392. DWORD length;
  6393. //
  6394. // parse out <host>[:<port>] or <name>[:<password>] (i.e. <part1>[:<part2>]
  6395. //
  6396. pString = *Url;
  6397. pColon = NULL;
  6398. partLength = 0;
  6399. *PartOne = pString;
  6400. *PartOneLength = 0;
  6401. *PartOneEscape = FALSE;
  6402. *PartTwoEscape = FALSE;
  6403. partEscape = PartOneEscape;
  6404. length = *UrlLength;
  6405. while ((*pString!=SLASH) && (*pString != L'\0') && (length != 0)) {
  6406. if (*pString==HEX_ESCAPE) {
  6407. // if there is a % in the string then it *must* (RFC 1738) be the
  6408. // start of an escape sequence. This function just reports the
  6409. // address of the substrings and their lengths; calling functions
  6410. // must handle the escape sequences (i.e. it is their responsibility
  6411. // to decide where to put the results)
  6412. //
  6413. *partEscape = TRUE;
  6414. }
  6415. if (*pString==COLON) {
  6416. if (pColon != NULL) {
  6417. //
  6418. // we don't expect more than 1 ':'
  6419. //
  6420. // ISSUE Note that passwords might contain colons, and thus not work in this
  6421. // case
  6422. return ERROR_INTERNET_INVALID_URL;
  6423. }
  6424. pColon = pString;
  6425. *PartOneLength = partLength;
  6426. if (partLength == 0) {
  6427. *PartOne = NULL;
  6428. }
  6429. partLength = 0;
  6430. partEscape = PartTwoEscape;
  6431. } else {
  6432. ++partLength;
  6433. }
  6434. ++pString;
  6435. --length;
  6436. }
  6437. //
  6438. // we either ended on the host (or user) name or the port number (or
  6439. // password), one of which we don't know the length of
  6440. //
  6441. if (pColon == NULL) {
  6442. *PartOneLength = partLength;
  6443. *PartTwo = NULL;
  6444. *PartTwoLength = 0;
  6445. *PartTwoEscape = FALSE;
  6446. } else {
  6447. *PartTwoLength = partLength;
  6448. *PartTwo = pColon + 1;
  6449. //
  6450. // in both the <user>:<password> and <host>:<port> cases, we cannot have
  6451. // the second part without the first, although both parts being zero
  6452. // length is OK (host name will be sorted out elsewhere, but (for now,
  6453. // at least) I am allowing <>:<> for username:password, since I don't
  6454. // see it expressly disallowed in the RFC. I may be revisiting this code
  6455. // later...)
  6456. //
  6457. // N.B.: ftp://ftp.microsoft.com uses http://:0/-http-gw-internal-/menu.gif
  6458. // if ((*PartOneLength == 0) && (partLength != 0)) {
  6459. // return ERROR_INTERNET_INVALID_URL;
  6460. // }
  6461. }
  6462. //
  6463. // update the URL pointer and length remaining
  6464. //
  6465. *Url = pString;
  6466. *UrlLength = length;
  6467. return ERROR_SUCCESS;
  6468. }
  6469. DWORD
  6470. GetUrlAddress(
  6471. IN OUT LPWSTR* lpszUrl,
  6472. OUT LPDWORD lpdwUrlLength,
  6473. OUT LPWSTR* lpszUserName OPTIONAL,
  6474. OUT LPDWORD lpdwUserNameLength OPTIONAL,
  6475. OUT LPWSTR* lpszPassword OPTIONAL,
  6476. OUT LPDWORD lpdwPasswordLength OPTIONAL,
  6477. OUT LPWSTR* lpszHostName OPTIONAL,
  6478. OUT LPDWORD lpdwHostNameLength OPTIONAL,
  6479. OUT LPSHINTERNET_PORT lpPort OPTIONAL,
  6480. OUT LPBOOL pHavePort
  6481. )
  6482. /*++
  6483. Routine Description:
  6484. This function extracts any and all parts of the address information for a
  6485. generic URL. If any of the address parts contain escaped characters (%nn)
  6486. then they are converted in situ
  6487. The generic addressing format (RFC 1738) is:
  6488. <user>:<password>@<host>:<port>
  6489. The addressing information cannot contain a password without a user name,
  6490. or a port without a host name
  6491. NB: ftp://ftp.microsoft.com uses URL's that have a port without a host name!
  6492. (e.g. http://:0/-http-gw-internal-/menu.gif)
  6493. Although only the lpszUrl and lpdwUrlLength fields are required, the address
  6494. parts will be checked for presence and completeness
  6495. Assumes: 1. If one of the optional lpsz fields is present (e.g. lpszUserName)
  6496. then the accompanying lpdw field must also be supplied
  6497. Arguments:
  6498. lpszUrl - IN: pointer to the URL to parse
  6499. OUT: URL remaining after address information
  6500. N.B. The url-path is NOT canonicalized (unescaped)
  6501. because it may contain protocol-specific information
  6502. which must be parsed out by the protocol-specific
  6503. parser
  6504. lpdwUrlLength - returned length of the remainder of the URL after the
  6505. address information
  6506. lpszUserName - returned pointer to the user name
  6507. This parameter can be omitted by those protocol parsers
  6508. that do not require or expect user names in the URL
  6509. lpdwUserNameLength - returned length of the user name part
  6510. This parameter can be omitted by those protocol parsers
  6511. that do not require or expect user names in the URL
  6512. lpszPassword - returned pointer to the password
  6513. This parameter can be omitted by those protocol parsers
  6514. that do not require or expect user passwords in the URL
  6515. lpdwPasswordLength - returned length of the password
  6516. This parameter can be omitted by those protocol parsers
  6517. that do not require or expect user passwords in the URL
  6518. lpszHostName - returned pointer to the host name
  6519. This parameter can be omitted by those protocol parsers
  6520. that do not require the host name info
  6521. lpdwHostNameLength - returned length of the host name
  6522. This parameter can be omitted by those protocol parsers
  6523. that do not require the host name info
  6524. lpPort - returned value of the port field
  6525. This parameter can be omitted by those protocol parsers
  6526. that do not require or expect user port number
  6527. pHavePort - returned boolean indicating whether a port was specified
  6528. in the URL or not. This value is not returned if the
  6529. lpPort parameter is omitted.
  6530. Return Value:
  6531. DWORD
  6532. Success - ERROR_SUCCESS
  6533. Failure - ERROR_INTERNET_INVALID_URL
  6534. We could not parse some part of the address info, or we
  6535. found address info where the protocol parser didn't expect
  6536. any
  6537. ERROR_INSUFFICIENT_BUFFER
  6538. We could not convert an escaped string
  6539. --*/
  6540. {
  6541. LPWSTR pAt;
  6542. DWORD urlLength;
  6543. LPWSTR pUrl;
  6544. BOOL part1Escape;
  6545. BOOL part2Escape;
  6546. WCHAR portNumber[INTERNET_MAX_PORT_NUMBER_LENGTH + 1];
  6547. DWORD portNumberLength;
  6548. LPWSTR pPortNumber;
  6549. DWORD error;
  6550. LPWSTR hostName;
  6551. DWORD hostNameLength;
  6552. pUrl = *lpszUrl;
  6553. urlLength = lstrlenW(pUrl);
  6554. //
  6555. // check to see if there is an '@' separating user name & password. If we
  6556. // see a '/' or get to the end of the string before we see the '@' then
  6557. // there is no username:password part
  6558. //
  6559. pAt = NULL;
  6560. for (DWORD i = 0; i < urlLength; ++i) {
  6561. if (pUrl[i]==SLASH) {
  6562. break;
  6563. } else if (pUrl[i]==AT) {
  6564. pAt = &pUrl[i];
  6565. break;
  6566. }
  6567. }
  6568. if (pAt != NULL) {
  6569. DWORD addressPartLength;
  6570. LPWSTR userName;
  6571. DWORD userNameLength;
  6572. LPWSTR password;
  6573. DWORD passwordLength;
  6574. addressPartLength = (DWORD) (pAt - pUrl);
  6575. urlLength -= addressPartLength;
  6576. error = GetUrlAddressInfo(&pUrl,
  6577. &addressPartLength,
  6578. &userName,
  6579. &userNameLength,
  6580. &part1Escape,
  6581. &password,
  6582. &passwordLength,
  6583. &part2Escape
  6584. );
  6585. if (error != ERROR_SUCCESS) {
  6586. return error;
  6587. }
  6588. //
  6589. // ensure there is no address information unparsed before the '@'
  6590. //
  6591. ASSERT(addressPartLength == 0);
  6592. ASSERT(pUrl == pAt);
  6593. if (ARGUMENT_PRESENT(lpszUserName)) {
  6594. ASSERT(ARGUMENT_PRESENT(lpdwUserNameLength));
  6595. //
  6596. // convert the user name in situ
  6597. //
  6598. if (part1Escape) {
  6599. ASSERT(userName != NULL);
  6600. ASSERT(userNameLength != 0);
  6601. error = DecodeUrlInSitu(userName, &userNameLength);
  6602. if (error != ERROR_SUCCESS) {
  6603. return error;
  6604. }
  6605. }
  6606. *lpszUserName = userName;
  6607. *lpdwUserNameLength = userNameLength;
  6608. }
  6609. if (ARGUMENT_PRESENT(lpszPassword)) {
  6610. // convert the password in situ
  6611. if (part2Escape) {
  6612. ASSERT(userName != NULL);
  6613. ASSERT(userNameLength != 0);
  6614. ASSERT(password != NULL);
  6615. ASSERT(passwordLength != 0);
  6616. error = DecodeUrlInSitu(password, &passwordLength);
  6617. if (error != ERROR_SUCCESS) {
  6618. return error;
  6619. }
  6620. }
  6621. *lpszPassword = password;
  6622. *lpdwPasswordLength = passwordLength;
  6623. }
  6624. //
  6625. // the URL pointer now points at the host:port fields (remember that
  6626. // ExtractAddressParts() must have bumped pUrl up to the end of the
  6627. // password field (if present) which ends at pAt)
  6628. //
  6629. ++pUrl;
  6630. //
  6631. // similarly, bump urlLength to account for the '@'
  6632. //
  6633. --urlLength;
  6634. } else {
  6635. //
  6636. // no '@' therefore no username or password
  6637. //
  6638. if (ARGUMENT_PRESENT(lpszUserName)) {
  6639. ASSERT(ARGUMENT_PRESENT(lpdwUserNameLength));
  6640. *lpszUserName = NULL;
  6641. *lpdwUserNameLength = 0;
  6642. }
  6643. if (ARGUMENT_PRESENT(lpszPassword)) {
  6644. ASSERT(ARGUMENT_PRESENT(lpdwPasswordLength));
  6645. *lpszPassword = NULL;
  6646. *lpdwPasswordLength = 0;
  6647. }
  6648. }
  6649. //
  6650. // now get the host name and the optional port
  6651. //
  6652. pPortNumber = portNumber;
  6653. portNumberLength = sizeof(portNumber);
  6654. error = GetUrlAddressInfo(&pUrl,
  6655. &urlLength,
  6656. &hostName,
  6657. &hostNameLength,
  6658. &part1Escape,
  6659. &pPortNumber,
  6660. &portNumberLength,
  6661. &part2Escape
  6662. );
  6663. if (error != ERROR_SUCCESS) {
  6664. return error;
  6665. }
  6666. //
  6667. // the URL address information MUST contain the host name
  6668. //
  6669. // if ((hostName == NULL) || (hostNameLength == 0)) {
  6670. // return ERROR_INTERNET_INVALID_URL;
  6671. // }
  6672. if (ARGUMENT_PRESENT(lpszHostName)) {
  6673. ASSERT(ARGUMENT_PRESENT(lpdwHostNameLength));
  6674. //
  6675. // if the host name contains escaped characters, convert them in situ
  6676. //
  6677. if (part1Escape) {
  6678. error = DecodeUrlInSitu(hostName, &hostNameLength);
  6679. if (error != ERROR_SUCCESS) {
  6680. return error;
  6681. }
  6682. }
  6683. *lpszHostName = hostName;
  6684. *lpdwHostNameLength = hostNameLength;
  6685. }
  6686. //
  6687. // if there is a port field, convert it if there are escaped characters,
  6688. // check it for valid numeric characters, and convert it to a number
  6689. //
  6690. if (ARGUMENT_PRESENT(lpPort)) {
  6691. if (portNumberLength != 0) {
  6692. DWORD i;
  6693. DWORD port;
  6694. ASSERT(pPortNumber != NULL);
  6695. if (part2Escape) {
  6696. error = DecodeUrlInSitu(pPortNumber, &portNumberLength);
  6697. if (error != ERROR_SUCCESS) {
  6698. return error;
  6699. }
  6700. }
  6701. //
  6702. // ensure all characters in the port number buffer are numeric, and
  6703. // calculate the port number at the same time
  6704. //
  6705. for (i = 0, port = 0; i < portNumberLength; ++i) {
  6706. if (!IsDigit(*pPortNumber)) {
  6707. return ERROR_INTERNET_INVALID_URL;
  6708. }
  6709. port = port * 10 + (int)(*pPortNumber++ - L'0');
  6710. // We won't allow ports larger than 65535 ((2^16)-1)
  6711. // We have to check this every time to make sure that someone
  6712. // doesn't try to overflow a DWORD.
  6713. if (port > 65535)
  6714. {
  6715. return ERROR_INTERNET_INVALID_URL;
  6716. }
  6717. }
  6718. *lpPort = (SHINTERNET_PORT)port;
  6719. if (ARGUMENT_PRESENT(pHavePort)) {
  6720. *pHavePort = TRUE;
  6721. }
  6722. } else {
  6723. *lpPort = INTERNET_INVALID_PORT_NUMBER;
  6724. if (ARGUMENT_PRESENT(pHavePort)) {
  6725. *pHavePort = FALSE;
  6726. }
  6727. }
  6728. }
  6729. //
  6730. // update the URL pointer and the length of the url-path
  6731. //
  6732. *lpszUrl = pUrl;
  6733. *lpdwUrlLength = urlLength;
  6734. return ERROR_SUCCESS;
  6735. }
  6736. DWORD
  6737. CrackUrl(
  6738. IN OUT LPWSTR lpszUrl,
  6739. IN DWORD dwUrlLength,
  6740. IN BOOL bEscape,
  6741. OUT LPSHINTERNET_SCHEME lpSchemeType OPTIONAL,
  6742. OUT LPWSTR* lpszSchemeName OPTIONAL,
  6743. OUT LPDWORD lpdwSchemeNameLength OPTIONAL,
  6744. OUT LPWSTR* lpszHostName OPTIONAL,
  6745. OUT LPDWORD lpdwHostNameLength OPTIONAL,
  6746. OUT LPSHINTERNET_PORT lpServerPort OPTIONAL,
  6747. OUT LPWSTR* lpszUserName OPTIONAL,
  6748. OUT LPDWORD lpdwUserNameLength OPTIONAL,
  6749. OUT LPWSTR* lpszPassword OPTIONAL,
  6750. OUT LPDWORD lpdwPasswordLength OPTIONAL,
  6751. OUT LPWSTR* lpszUrlPath OPTIONAL,
  6752. OUT LPDWORD lpdwUrlPathLength OPTIONAL,
  6753. OUT LPWSTR* lpszExtraInfo OPTIONAL,
  6754. OUT LPDWORD lpdwExtraInfoLength OPTIONAL,
  6755. OUT LPBOOL pHavePort
  6756. )
  6757. /*++
  6758. Routine Description:
  6759. Cracks an URL into its constituent parts
  6760. Assumes: 1. If one of the optional lpsz fields is present (e.g. lpszUserName)
  6761. then the accompanying lpdw field must also be supplied
  6762. Arguments:
  6763. lpszUrl - pointer to URL to crack. This buffer WILL BE
  6764. OVERWRITTEN if it contains escape sequences that
  6765. we will convert back to ANSI characters
  6766. dwUrlLength - if not 0, string length of lpszUrl
  6767. bEscape - TRUE if we are to escape the url-path
  6768. lpSchemeType - returned scheme type - e.g. INTERNET_SCHEME_HTTP
  6769. lpszSchemeName - returned scheme name
  6770. lpdwSchemeNameLength - length of scheme name
  6771. lpszHostName - returned host name
  6772. lpdwHostNameLength - length of host name buffer
  6773. lpServerPort - returned server port if present in the URL, else 0
  6774. lpszUserName - returned user name if present
  6775. lpdwUserNameLength - length of user name buffer
  6776. lpszPassword - returned password if present
  6777. lpdwPasswordLength - length of password buffer
  6778. lpszUrlPath - returned, canonicalized URL path
  6779. lpdwUrlPathLength - length of url-path buffer
  6780. lpszExtraInfo - returned search string or intra-page link if present
  6781. lpdwExtraInfoLength - length of extra info buffer
  6782. pHavePort - returned boolean indicating whether port was specified
  6783. Return Value:
  6784. DWORD
  6785. Success - ERROR_SUCCESS
  6786. Failure - ERROR_INTERNET_UNRECOGNIZED_SCHEME
  6787. --*/
  6788. {
  6789. DWORD error;
  6790. DWORD schemeLength;
  6791. SHINTERNET_SCHEME schemeType;
  6792. //
  6793. // if dwUrlLength is 0 then lpszUrl is ASCIIZ. Find its length
  6794. //
  6795. if (dwUrlLength == 0) {
  6796. dwUrlLength = lstrlenW(lpszUrl);
  6797. }
  6798. //
  6799. // get parser based on the protocol name
  6800. //
  6801. for (schemeLength = 0; lpszUrl[schemeLength]!=COLON; ++schemeLength) {
  6802. if ((dwUrlLength == 0) || (lpszUrl[schemeLength] == '\0')) {
  6803. //
  6804. // no ':' in URL? Bogus (dude)
  6805. //
  6806. error = ERROR_INTERNET_UNRECOGNIZED_SCHEME;
  6807. goto quit;
  6808. }
  6809. --dwUrlLength;
  6810. }
  6811. DWORD i;
  6812. int skip;
  6813. BOOL isGeneric;
  6814. BOOL needSlashes;
  6815. BOOL haveSlashes;
  6816. isGeneric = FALSE;
  6817. needSlashes = FALSE;
  6818. haveSlashes = FALSE;
  6819. schemeType = SHINTERNET_SCHEME_UNKNOWN;
  6820. if (ScanSchemes(lpszUrl, schemeLength, &i))
  6821. {
  6822. schemeType = UrlSchemeList[i].SchemeType;
  6823. needSlashes = UrlSchemeList[i].NeedSlashes;
  6824. }
  6825. skip = 1; // skip ':'
  6826. if ((dwUrlLength > 3) && (StrCmpNIW(&lpszUrl[schemeLength], L"://", 3) == 0)) {
  6827. skip = 3; // skip "://"
  6828. haveSlashes = TRUE;
  6829. }
  6830. if (schemeType == SHINTERNET_SCHEME_FILE)
  6831. isGeneric = TRUE;
  6832. if (schemeType == SHINTERNET_SCHEME_NEWS ||
  6833. schemeType == SHINTERNET_SCHEME_UNKNOWN) {
  6834. //
  6835. // urls can be hierarchical or opaque. if the slashes
  6836. // exist, then we should assume hierarchical
  6837. // when we dont know the scheme or it is news:.
  6838. // otherwise it is opaque (isGeneric)
  6839. //
  6840. needSlashes = haveSlashes;
  6841. isGeneric = !haveSlashes;
  6842. }
  6843. //
  6844. // If we don't have slashes, make sure we don't need them.
  6845. // If we have slashes, make sure they are required.
  6846. //
  6847. if ((!haveSlashes && !needSlashes) || (haveSlashes && needSlashes)) {
  6848. if (ARGUMENT_PRESENT(lpSchemeType)) {
  6849. *lpSchemeType = schemeType;
  6850. }
  6851. if (ARGUMENT_PRESENT(lpszSchemeName)) {
  6852. *lpszSchemeName = lpszUrl;
  6853. *lpdwSchemeNameLength = schemeLength;
  6854. }
  6855. lpszUrl += schemeLength + skip;
  6856. dwUrlLength -= skip;
  6857. if (SHINTERNET_SCHEME_RES == schemeType) {
  6858. if (ARGUMENT_PRESENT(lpszUserName)) {
  6859. *lpszUserName = NULL;
  6860. *lpdwUserNameLength = 0;
  6861. }
  6862. if (ARGUMENT_PRESENT(lpszPassword)) {
  6863. *lpszPassword = NULL;
  6864. *lpdwPasswordLength = 0;
  6865. }
  6866. if (ARGUMENT_PRESENT(lpServerPort)) {
  6867. *lpServerPort = 0;
  6868. }
  6869. PWSTR psz = lpszUrl;
  6870. while (*lpszUrl && *lpszUrl!=SLASH)
  6871. lpszUrl++;
  6872. if (ARGUMENT_PRESENT(lpszHostName)) {
  6873. *lpszHostName = psz;
  6874. *lpdwHostNameLength = (DWORD)(lpszUrl - psz);
  6875. dwUrlLength -= *lpdwHostNameLength;
  6876. error = DecodeUrlInSitu(*lpszHostName, lpdwHostNameLength);
  6877. }
  6878. } else if (isGeneric) {
  6879. if (ARGUMENT_PRESENT(lpszUserName)) {
  6880. *lpszUserName = NULL;
  6881. *lpdwUserNameLength = 0;
  6882. }
  6883. if (ARGUMENT_PRESENT(lpszPassword)) {
  6884. *lpszPassword = NULL;
  6885. *lpdwPasswordLength = 0;
  6886. }
  6887. if (ARGUMENT_PRESENT(lpszHostName)) {
  6888. *lpszHostName = NULL;
  6889. *lpdwHostNameLength = 0;
  6890. }
  6891. if (ARGUMENT_PRESENT(lpServerPort)) {
  6892. *lpServerPort = 0;
  6893. }
  6894. error = ERROR_SUCCESS;
  6895. } else {
  6896. error = GetUrlAddress(&lpszUrl,
  6897. &dwUrlLength,
  6898. lpszUserName,
  6899. lpdwUserNameLength,
  6900. lpszPassword,
  6901. lpdwPasswordLength,
  6902. lpszHostName,
  6903. lpdwHostNameLength,
  6904. lpServerPort,
  6905. pHavePort
  6906. );
  6907. }
  6908. if (bEscape && (error == ERROR_SUCCESS)) {
  6909. error = DecodeUrlInSitu(lpszUrl, &dwUrlLength);
  6910. }
  6911. if ((error == ERROR_SUCCESS) && ARGUMENT_PRESENT(lpszExtraInfo)) {
  6912. *lpdwExtraInfoLength = 0;
  6913. for (i = 0; i < (int)dwUrlLength; i++) {
  6914. if (lpszUrl[i] == '?' || lpszUrl[i] == '#') {
  6915. *lpszExtraInfo = &lpszUrl[i];
  6916. *lpdwExtraInfoLength = dwUrlLength - i;
  6917. dwUrlLength -= *lpdwExtraInfoLength;
  6918. }
  6919. }
  6920. }
  6921. if ((error == ERROR_SUCCESS) && ARGUMENT_PRESENT(lpszUrlPath)) {
  6922. *lpszUrlPath = lpszUrl;
  6923. *lpdwUrlPathLength = dwUrlLength;
  6924. }
  6925. } else {
  6926. error = ERROR_INTERNET_UNRECOGNIZED_SCHEME;
  6927. }
  6928. quit:
  6929. return error;
  6930. }
  6931. BOOL
  6932. WINAPI
  6933. UrlCrackW(
  6934. IN LPCWSTR lpszUrl,
  6935. IN DWORD dwUrlLength,
  6936. IN DWORD dwFlags,
  6937. IN LPSHURL_COMPONENTSW lpUrlComponents
  6938. )
  6939. /*++
  6940. Routine Description:
  6941. Cracks an URL into its constituent parts. Optionally escapes the url-path.
  6942. We assume that the user has supplied large enough buffers for the various
  6943. URL parts
  6944. Arguments:
  6945. lpszUrl - pointer to URL to crack
  6946. dwUrlLength - 0 if lpszUrl is ASCIIZ string, else length of lpszUrl
  6947. dwFlags - flags controlling operation
  6948. lpUrlComponents - pointer to URL_COMPONENTS
  6949. Return Value:
  6950. BOOL
  6951. Success - TRUE
  6952. Failure - FALSE. Call GetLastError() for more info
  6953. --*/
  6954. {
  6955. DWORD error = ERROR_SUCCESS;
  6956. // validate parameters
  6957. if (ARGUMENT_PRESENT(lpszUrl)) {
  6958. if (!dwUrlLength) {
  6959. error = ProbeStringW((LPWSTR)lpszUrl, &dwUrlLength);
  6960. } else if (IsBadReadPtr((LPVOID)lpszUrl, dwUrlLength*sizeof(WCHAR))) {
  6961. error = ERROR_INVALID_PARAMETER;
  6962. }
  6963. } else {
  6964. error = ERROR_INVALID_PARAMETER;
  6965. }
  6966. if (error != ERROR_SUCCESS)
  6967. {
  6968. goto quit;
  6969. }
  6970. if (IsBadWritePtr(lpUrlComponents, sizeof(*lpUrlComponents))
  6971. || (lpUrlComponents->dwStructSize != sizeof(*lpUrlComponents)))
  6972. {
  6973. error = ERROR_INVALID_PARAMETER;
  6974. goto quit;
  6975. }
  6976. //
  6977. // we only allow two flags for this API
  6978. //
  6979. if (dwFlags & ~(ICU_ESCAPE | ICU_DECODE)) {
  6980. error = ERROR_INVALID_PARAMETER;
  6981. goto quit;
  6982. }
  6983. //
  6984. // get the individual components to return. If they reference a buffer then
  6985. // check it for writeability
  6986. //
  6987. LPWSTR lpUrl;
  6988. LPWSTR urlCopy;
  6989. SHINTERNET_SCHEME schemeType;
  6990. LPWSTR schemeName;
  6991. DWORD schemeNameLength;
  6992. LPWSTR hostName;
  6993. DWORD hostNameLength;
  6994. SHINTERNET_PORT nPort;
  6995. LPWSTR userName;
  6996. DWORD userNameLength;
  6997. LPWSTR password;
  6998. DWORD passwordLength;
  6999. LPWSTR urlPath;
  7000. DWORD urlPathLength;
  7001. LPWSTR extraInfo;
  7002. DWORD extraInfoLength;
  7003. BOOL copyComponent;
  7004. BOOL havePort;
  7005. copyComponent = FALSE;
  7006. schemeName = lpUrlComponents->lpszScheme;
  7007. schemeNameLength = lpUrlComponents->dwSchemeLength;
  7008. if ((schemeName != NULL) && (schemeNameLength != 0)) {
  7009. error = ProbeWriteStringBufferW((LPVOID)schemeName, schemeNameLength);
  7010. if (error != ERROR_SUCCESS) {
  7011. goto quit;
  7012. }
  7013. *schemeName = '\0';
  7014. copyComponent = TRUE;
  7015. }
  7016. hostName = lpUrlComponents->lpszHostName;
  7017. hostNameLength = lpUrlComponents->dwHostNameLength;
  7018. if ((hostName != NULL) && (hostNameLength != 0)) {
  7019. error = ProbeWriteStringBufferW((LPVOID)hostName, hostNameLength);
  7020. if (error != ERROR_SUCCESS) {
  7021. goto quit;
  7022. }
  7023. *hostName = '\0';
  7024. copyComponent = TRUE;
  7025. }
  7026. userName = lpUrlComponents->lpszUserName;
  7027. userNameLength = lpUrlComponents->dwUserNameLength;
  7028. if ((userName != NULL) && (userNameLength != 0)) {
  7029. error = ProbeWriteStringBufferW((LPVOID)userName, userNameLength);
  7030. if (error != ERROR_SUCCESS) {
  7031. goto quit;
  7032. }
  7033. *userName = '\0';
  7034. copyComponent = TRUE;
  7035. }
  7036. password = lpUrlComponents->lpszPassword;
  7037. passwordLength = lpUrlComponents->dwPasswordLength;
  7038. if ((password != NULL) && (passwordLength != 0)) {
  7039. error = ProbeWriteStringBufferW((LPVOID)password, passwordLength);
  7040. if (error != ERROR_SUCCESS) {
  7041. goto quit;
  7042. }
  7043. *password = '\0';
  7044. copyComponent = TRUE;
  7045. }
  7046. urlPath = lpUrlComponents->lpszUrlPath;
  7047. urlPathLength = lpUrlComponents->dwUrlPathLength;
  7048. if ((urlPath != NULL) && (urlPathLength != 0)) {
  7049. error = ProbeWriteStringBufferW((LPVOID)urlPath, urlPathLength);
  7050. if (error != ERROR_SUCCESS) {
  7051. goto quit;
  7052. }
  7053. *urlPath = '\0';
  7054. copyComponent = TRUE;
  7055. }
  7056. extraInfo = lpUrlComponents->lpszExtraInfo;
  7057. extraInfoLength = lpUrlComponents->dwExtraInfoLength;
  7058. if ((extraInfo != NULL) && (extraInfoLength != 0)) {
  7059. error = ProbeWriteStringBufferW((LPVOID)extraInfo, extraInfoLength);
  7060. if (error != ERROR_SUCCESS) {
  7061. goto quit;
  7062. }
  7063. *extraInfo = '\0';
  7064. copyComponent = TRUE;
  7065. }
  7066. //
  7067. // we can only escape or decode the URL if the caller has provided us with
  7068. // buffers to write the escaped strings into
  7069. //
  7070. if (dwFlags & (ICU_ESCAPE | ICU_DECODE)) {
  7071. if (!copyComponent) {
  7072. error = ERROR_INVALID_PARAMETER;
  7073. goto quit;
  7074. }
  7075. //
  7076. // create a copy of the URL. CrackUrl() will modify this in situ. We
  7077. // need to copy the results back to the user's buffer(s)
  7078. //
  7079. DWORD dw = dwUrlLength;
  7080. if (!dw)
  7081. {
  7082. dw = lstrlenW(lpszUrl);
  7083. }
  7084. urlCopy = new WCHAR[dw+1];
  7085. if (urlCopy == NULL) {
  7086. error = ERROR_NOT_ENOUGH_MEMORY;
  7087. goto quit;
  7088. }
  7089. memcpy(urlCopy, lpszUrl, (dw+1)*sizeof(WCHAR));
  7090. lpUrl = urlCopy;
  7091. } else {
  7092. lpUrl = (LPWSTR)lpszUrl;
  7093. urlCopy = NULL;
  7094. }
  7095. //
  7096. // crack the URL into its constituent parts
  7097. //
  7098. error = CrackUrl(lpUrl,
  7099. dwUrlLength,
  7100. (dwFlags & ICU_ESCAPE) ? TRUE : FALSE,
  7101. &schemeType,
  7102. &schemeName,
  7103. &schemeNameLength,
  7104. &hostName,
  7105. &hostNameLength,
  7106. &nPort,
  7107. &userName,
  7108. &userNameLength,
  7109. &password,
  7110. &passwordLength,
  7111. &urlPath,
  7112. &urlPathLength,
  7113. extraInfoLength ? &extraInfo : NULL,
  7114. extraInfoLength ? &extraInfoLength : 0,
  7115. &havePort
  7116. );
  7117. if (error != ERROR_SUCCESS) {
  7118. goto crack_error;
  7119. }
  7120. BOOL copyFailure;
  7121. copyFailure = FALSE;
  7122. //
  7123. // update the URL_COMPONENTS structure based on the results, and what was
  7124. // asked for
  7125. //
  7126. if (lpUrlComponents->lpszScheme != NULL) {
  7127. if (lpUrlComponents->dwSchemeLength > schemeNameLength) {
  7128. memcpy(lpUrlComponents->lpszScheme, schemeName, schemeNameLength*sizeof(WCHAR));
  7129. lpUrlComponents->lpszScheme[schemeNameLength] = '\0';
  7130. if (dwFlags & ICU_DECODE) {
  7131. UrlUnescapeInPlaceW(lpUrlComponents->lpszScheme, 0);
  7132. }
  7133. } else {
  7134. ++schemeNameLength;
  7135. copyFailure = TRUE;
  7136. }
  7137. lpUrlComponents->dwSchemeLength = schemeNameLength;
  7138. } else if (lpUrlComponents->dwSchemeLength != 0) {
  7139. lpUrlComponents->lpszScheme = schemeName;
  7140. lpUrlComponents->dwSchemeLength = schemeNameLength;
  7141. }
  7142. if (lpUrlComponents->lpszHostName != NULL) {
  7143. if (lpUrlComponents->dwHostNameLength > hostNameLength) {
  7144. memcpy(lpUrlComponents->lpszHostName, hostName, hostNameLength*sizeof(WCHAR));
  7145. lpUrlComponents->lpszHostName[hostNameLength] = '\0';
  7146. if (dwFlags & ICU_DECODE) {
  7147. UrlUnescapeInPlaceW(lpUrlComponents->lpszHostName, 0);
  7148. }
  7149. } else {
  7150. ++hostNameLength;
  7151. copyFailure = TRUE;
  7152. }
  7153. lpUrlComponents->dwHostNameLength = hostNameLength;
  7154. } else if (lpUrlComponents->dwHostNameLength != 0) {
  7155. lpUrlComponents->lpszHostName = hostName;
  7156. lpUrlComponents->dwHostNameLength = hostNameLength;
  7157. }
  7158. if (lpUrlComponents->lpszUserName != NULL) {
  7159. if (lpUrlComponents->dwUserNameLength > userNameLength) {
  7160. memcpy(lpUrlComponents->lpszUserName, userName, userNameLength*sizeof(WCHAR));
  7161. lpUrlComponents->lpszUserName[userNameLength] = '\0';
  7162. if (dwFlags & ICU_DECODE) {
  7163. UrlUnescapeInPlaceW(lpUrlComponents->lpszUserName, 0);
  7164. }
  7165. } else {
  7166. ++userNameLength;
  7167. copyFailure = TRUE;
  7168. }
  7169. lpUrlComponents->dwUserNameLength = userNameLength;
  7170. } else if (lpUrlComponents->dwUserNameLength != 0) {
  7171. lpUrlComponents->lpszUserName = userName;
  7172. lpUrlComponents->dwUserNameLength = userNameLength;
  7173. }
  7174. if (lpUrlComponents->lpszPassword != NULL) {
  7175. if (lpUrlComponents->dwPasswordLength > passwordLength) {
  7176. memcpy(lpUrlComponents->lpszPassword, password, passwordLength*sizeof(WCHAR));
  7177. lpUrlComponents->lpszPassword[passwordLength] = '\0';
  7178. if (dwFlags & ICU_DECODE) {
  7179. UrlUnescapeInPlaceW(lpUrlComponents->lpszPassword, 0);
  7180. }
  7181. } else {
  7182. ++passwordLength;
  7183. copyFailure = TRUE;
  7184. }
  7185. lpUrlComponents->dwPasswordLength = passwordLength;
  7186. } else if (lpUrlComponents->dwPasswordLength != 0) {
  7187. lpUrlComponents->lpszPassword = password;
  7188. lpUrlComponents->dwPasswordLength = passwordLength;
  7189. }
  7190. if (lpUrlComponents->lpszUrlPath != NULL) {
  7191. if(schemeType == SHINTERNET_SCHEME_FILE)
  7192. {
  7193. //
  7194. // for file: urls we return the path component
  7195. // as a valid dos path.
  7196. //
  7197. copyFailure = FAILED(PathCreateFromUrlW(lpUrl, lpUrlComponents->lpszUrlPath, &(lpUrlComponents->dwUrlPathLength), 0));
  7198. }
  7199. else if (lpUrlComponents->dwUrlPathLength > urlPathLength) {
  7200. memcpy(lpUrlComponents->lpszUrlPath, urlPath, urlPathLength*sizeof(WCHAR));
  7201. lpUrlComponents->lpszUrlPath[urlPathLength] = '\0';
  7202. if (dwFlags & ICU_DECODE) {
  7203. UrlUnescapeInPlaceW(lpUrlComponents->lpszUrlPath, 0);
  7204. }
  7205. lpUrlComponents->dwUrlPathLength = urlPathLength;
  7206. } else {
  7207. ++urlPathLength;
  7208. copyFailure = TRUE;
  7209. lpUrlComponents->dwUrlPathLength = urlPathLength;
  7210. }
  7211. } else if (lpUrlComponents->dwUrlPathLength != 0) {
  7212. lpUrlComponents->lpszUrlPath = urlPath;
  7213. lpUrlComponents->dwUrlPathLength = urlPathLength;
  7214. }
  7215. if (lpUrlComponents->lpszExtraInfo != NULL) {
  7216. if (lpUrlComponents->dwExtraInfoLength > extraInfoLength) {
  7217. memcpy(lpUrlComponents->lpszExtraInfo, extraInfo, extraInfoLength*sizeof(WCHAR));
  7218. lpUrlComponents->lpszExtraInfo[extraInfoLength] = '\0';
  7219. if (dwFlags & ICU_DECODE) {
  7220. UrlUnescapeInPlaceW(lpUrlComponents->lpszExtraInfo, 0);
  7221. }
  7222. } else {
  7223. ++extraInfoLength;
  7224. copyFailure = TRUE;
  7225. }
  7226. lpUrlComponents->dwExtraInfoLength = extraInfoLength;
  7227. } else if (lpUrlComponents->dwExtraInfoLength != 0) {
  7228. lpUrlComponents->lpszExtraInfo = extraInfo;
  7229. lpUrlComponents->dwExtraInfoLength = extraInfoLength;
  7230. }
  7231. //
  7232. // we may have failed to copy one or more components because we didn't have
  7233. // enough buffer space.
  7234. //
  7235. // N.B. Don't change error below here. If need be, move this test lower
  7236. //
  7237. if (copyFailure) {
  7238. error = ERROR_INSUFFICIENT_BUFFER;
  7239. }
  7240. //
  7241. // copy the scheme type
  7242. //
  7243. lpUrlComponents->nScheme = schemeType;
  7244. //
  7245. // convert 0 port (not in URL) to default value for scheme
  7246. //
  7247. if (nPort == INTERNET_INVALID_PORT_NUMBER && !havePort) {
  7248. switch (schemeType) {
  7249. case SHINTERNET_SCHEME_FTP:
  7250. nPort = INTERNET_DEFAULT_FTP_PORT;
  7251. break;
  7252. case SHINTERNET_SCHEME_GOPHER:
  7253. nPort = INTERNET_DEFAULT_GOPHER_PORT;
  7254. break;
  7255. case SHINTERNET_SCHEME_HTTP:
  7256. nPort = INTERNET_DEFAULT_HTTP_PORT;
  7257. break;
  7258. case SHINTERNET_SCHEME_HTTPS:
  7259. nPort = INTERNET_DEFAULT_HTTPS_PORT;
  7260. break;
  7261. }
  7262. }
  7263. lpUrlComponents->nPort = nPort;
  7264. crack_error:
  7265. if (urlCopy != NULL) {
  7266. delete [] urlCopy;
  7267. }
  7268. quit:
  7269. // return HRESULT_FROM_WIN32(error);
  7270. if (error!=ERROR_SUCCESS)
  7271. {
  7272. SetLastError(error);
  7273. }
  7274. return error==ERROR_SUCCESS;
  7275. }