/*++ Copyright (c) 1994 Microsoft Corporation Module Name: urlpars.cpp Abstract: Contains all the worker routines for Combine and Canonicalize Contents: (ConvertChar) Author: Zeke Lucas (zekel) 16-Dez-96 Ahsan Kabir (akabir): UrlCombine parser rewritten in July-Sept98 Environment: Win32(s) user-mode DLL Revision History: there is about one percent of this derived from the Spyglass or MSHTML/WININET codebase --*/ #include "priv.h" #include #ifdef UNIX #include #endif #include #include #ifdef UNIX #include "unixstuff.h" #endif #include #define DM_PERF 0 // perf stats #define PF_LOGSCHEMEHITS 0x00000001 #ifndef CPP_FUNCTIONS #define CPP_FUNCTIONS #include #endif #define USE_FAST_PARSER #ifdef DEBUG //#define PROOFREAD_PARSES #endif // Same as in wininet; however, this is only theoretical, since urls aren't necessarily so // constrained. However, this is true throughout the product, so we'll have to do this. #define INTERNET_MAX_PATH_LENGTH 2048 #define INTERNET_MAX_SCHEME_LENGTH 32 #define HEX_ESCAPE L'%' #define HEX_ESCAPE_A '%' #define TERMSTR(pch) *(pch) = L'\0' // (WCHAR) 8 is backspace #define DEADSEGCHAR ((WCHAR) 8) #define KILLSEG(pch) *(pch) = DEADSEGCHAR #define CR L'\r' #define LF L'\n' #define TAB L'\t' #define SPC L' ' #define SLASH L'/' #define WHACK L'\\' #define QUERY L'?' #define POUND L'#' #define SEMICOLON L';' #define COLON L':' #define BAR L'|' #define DOT L'.' #define AT L'@' #define UPF_SCHEME_OPAQUE 0x00000001 // should not be treated as heriarchical #define UPF_SCHEME_INTERNET 0x00000002 #define UPF_SCHEME_NOHISTORY 0x00000004 #define UPF_SCHEME_CONVERT 0x00000008 // treat slashes and whacks as equiv #define UPF_SCHEME_DONTCORRECT 0x00000010 // Don't try to autocorrect to this scheme #define UPF_SEG_ABSOLUTE 0x00000100 // the initial segment is the root #define UPF_SEG_LOCKFIRST 0x00000200 // this is for file parsing #define UPF_SEG_EMPTYSEG 0x00000400 // this was an empty string, but is still important #define UPF_EXSEG_DIRECTORY 0x00001000 // the final segment is a "directory" (trailing slash) #define UPF_FILEISPATHURL 0x10000000 // this is for file paths, dont unescape because they are actually dos paths // // the masks are for inheritance purposes during BlendParts // if you inherit that part you inherit that mask // #define UPF_SCHEME_MASK 0x000000FF #define UPF_SEG_MASK 0x00000F00 #define UPF_EXSEG_MASK 0x0000F000 // right now these masks are unused, and can be recycled #define UPF_SERVER_MASK 0x000F0000 #define UPF_QUERY_MASK 0x0F000000 extern "C" int _StrCmpNA(LPCSTR lpStr1, LPCSTR lpStr2, int nChar, BOOL fMBCS); extern "C" LPSTR _StrChrA(LPCSTR lpStart, WORD wMatch, BOOL fMBCS); typedef struct _UrlParts { DWORD dwFlags; LPWSTR pszScheme; URL_SCHEME eScheme; LPWSTR pszServer; LPWSTR pszSegments; DWORD cSegments; LPWSTR pszExtraSegs; DWORD cExtraSegs; LPWSTR pszQuery; LPWSTR pszFragment; } URLPARTS, *PURLPARTS; HRESULT SHUrlParse(LPCWSTR pszBase, LPCWSTR pszUrl, PSHSTRW pstrOut, DWORD dwFlags); HRESULT SHUrlCreateFromPath(LPCWSTR pszPath, PSHSTRW pstrOut, DWORD dwFlags); // Ansi wrappers might overwrite the unicode core's return value // We should try to prevent that HRESULT ReconcileHresults(HRESULT hr1, HRESULT hr2) { return (hr2==S_OK) ? hr1 : hr2; } PRIVATE CONST WORD isSafe[96] = /* Bit 0 alphadigit -- 'a' to 'z', '0' to '9', 'A' to 'Z' ** Bit 1 Hex -- '0' to '9', 'a' to 'f', 'A' to 'F' ** Bit 2 valid scheme -- alphadigit | "-" | "." | "+" ** Bit 3 mark -- "%" | "$"| "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" | "," */ /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ // {0, 8, 0, 0, 8, 8, 0, 8, 8, 8, 8, 4, 8,12,12, 0, /* 2x !"#$%&'()*+,-./ */ // IE4 BETA1: allow + through unmolested. Should consider other options // post beta1. 12feb97 tonyci {0, 8, 0, 0, 8, 8, 0, 8, 8, 8, 8, 12, 8,12,12, 0, /* 2x !"#$%&'()*+,-./ */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 8, 8, 0, 8, 0, 0, /* 3x 0123456789:;<=>? */ 8, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x @ABCDEFGHIJKLMNO */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 8, /* 5X PQRSTUVWXYZ[\]^_ */ 0, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x `abcdefghijklmno */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 8, 0}; /* 7X pqrstuvwxyz{|}~ DEL */ PRIVATE const WCHAR hex[] = L"0123456789ABCDEF"; PRIVATE inline BOOL IsSafe(WCHAR ch, WORD mask) { if(((ch > 31 ) && (ch < 128) && (isSafe[ch - 32] & mask))) return TRUE; return FALSE; } #define IsAlphaDigit(c) IsSafe(c, 1) #define IsHex(c) IsSafe(c, 2) #define IsValidSchemeCharA(c) IsSafe(c, 5) #define IsSafePathChar(c) ((c > 0xff) || IsSafe(c, 9)) #define IsUpper(c) ((c) >= 'A' && (c) <= 'Z') PRIVATE inline BOOL IsAsciiCharW(WCHAR ch) { return (!(ch >> 8) && ((CHAR) ch)); } PRIVATE inline WCHAR Ascii_ToLowerW(WCHAR ch) { return (ch >= L'A' && ch <= L'Z') ? (ch - L'A' + L'a') : ch; } BOOL IsValidSchemeCharW(WCHAR ch) { if(IsAsciiCharW(ch)) return IsSafe( (CHAR) ch, 5); return FALSE; } WCHAR const c_szHttpScheme[] = L"http"; WCHAR const c_szFileScheme[] = L"file"; WCHAR const c_szFTPScheme[] = L"ftp"; WCHAR const c_szGopherScheme[] = L"gopher"; WCHAR const c_szMailToScheme[] = L"mailto"; WCHAR const c_szNewsScheme[] = L"news"; WCHAR const c_szNNTPScheme[] = L"nntp"; WCHAR const c_szTelnetScheme[] = L"telnet"; WCHAR const c_szWAISScheme[] = L"wais"; WCHAR const c_szMkScheme[] = L"mk"; WCHAR const c_szHttpsScheme[] = L"https"; WCHAR const c_szLocalScheme[] = L"local"; WCHAR const c_szShellScheme[] = L"shell"; WCHAR const c_szJSScheme[] = L"javascript"; WCHAR const c_szVSScheme[] = L"vbscript"; WCHAR const c_szAboutScheme[] = L"about"; WCHAR const c_szSnewsScheme[] = L"snews"; WCHAR const c_szResScheme[] = L"res"; WCHAR const c_szRootedScheme[] = L"ms-shell-rooted"; WCHAR const c_szIDListScheme[] = L"ms-shell-idlist"; WCHAR const c_szMsHelpScheme[] = L"hcp"; const struct { LPCWSTR pszScheme; URL_SCHEME eScheme; DWORD cchScheme; DWORD dwFlags; } g_mpUrlSchemeTypes[] = { // Because we use a linear search, sort this in the order of // most common usage. { c_szHttpScheme, URL_SCHEME_HTTP, SIZECHARS(c_szHttpScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT}, { c_szFileScheme, URL_SCHEME_FILE, SIZECHARS(c_szFileScheme) - 1, UPF_SCHEME_CONVERT}, { c_szFTPScheme, URL_SCHEME_FTP, SIZECHARS(c_szFTPScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT}, { c_szHttpsScheme, URL_SCHEME_HTTPS, SIZECHARS(c_szHttpsScheme) -1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT|UPF_SCHEME_DONTCORRECT}, { c_szNewsScheme, URL_SCHEME_NEWS, SIZECHARS(c_szNewsScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT}, { c_szMailToScheme, URL_SCHEME_MAILTO, SIZECHARS(c_szMailToScheme) - 1, UPF_SCHEME_OPAQUE}, { c_szGopherScheme, URL_SCHEME_GOPHER, SIZECHARS(c_szGopherScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT}, { c_szNNTPScheme, URL_SCHEME_NNTP, SIZECHARS(c_szNNTPScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT}, { c_szTelnetScheme, URL_SCHEME_TELNET, SIZECHARS(c_szTelnetScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT}, { c_szWAISScheme, URL_SCHEME_WAIS, SIZECHARS(c_szWAISScheme) - 1, 0}, { c_szMkScheme, URL_SCHEME_MK, SIZECHARS(c_szMkScheme) - 1, UPF_SCHEME_NOHISTORY}, { c_szShellScheme, URL_SCHEME_SHELL, SIZECHARS(c_szShellScheme) - 1, UPF_SCHEME_OPAQUE}, { c_szLocalScheme, URL_SCHEME_LOCAL, SIZECHARS(c_szLocalScheme) - 1, 0}, { c_szJSScheme, URL_SCHEME_JAVASCRIPT,SIZECHARS(c_szJSScheme) - 1, UPF_SCHEME_OPAQUE|UPF_SCHEME_NOHISTORY}, { c_szVSScheme, URL_SCHEME_VBSCRIPT, SIZECHARS(c_szVSScheme) - 1, UPF_SCHEME_OPAQUE|UPF_SCHEME_NOHISTORY}, { c_szSnewsScheme, URL_SCHEME_SNEWS, SIZECHARS(c_szSnewsScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT}, { c_szAboutScheme, URL_SCHEME_ABOUT, SIZECHARS(c_szAboutScheme) - 1, UPF_SCHEME_OPAQUE|UPF_SCHEME_NOHISTORY}, { c_szResScheme, URL_SCHEME_RES, SIZECHARS(c_szResScheme) - 1, UPF_SCHEME_NOHISTORY}, { c_szRootedScheme, URL_SCHEME_MSSHELLROOTED, SIZECHARS(c_szRootedScheme) - 1, 0}, { c_szIDListScheme, URL_SCHEME_MSSHELLIDLIST, SIZECHARS(c_szIDListScheme) - 1, 0}, { c_szMsHelpScheme, URL_SCHEME_MSHELP, SIZECHARS(c_szMsHelpScheme) - 1, 0}, }; PRIVATE int _StrCmpNMixed(LPCSTR psz, LPCWSTR pwz, DWORD cch) { int iRet = 0; // // we dont have to real mbcs conversion here because we are // guaranteed to have only ascii chars here // for (;cch; psz++, pwz++, cch--) { WCHAR ch = *psz; if (ch != *pwz) { // // this makes it case insensitive if (IsUpper(ch) && (ch + 32) == *pwz) continue; if(ch > *pwz) iRet = 1; else iRet = -1; break; } } return iRet; } //*** g_iScheme -- cache for g_mpUrlSchemeTypes // DESCRIPTION // we call GetSchemeTypeAndFlags many times for the same scheme. if // it's the 0th table entry, no biggee. if it's a later entry linear // search isnt very good. add a 1-element MRU cache. even for the most common // (by far) case of "http" (0th entry), we *still* win due to the cheaper // StrCmpC and skipped loop. // NOTES // g_iScheme refs/sets are atomic so no need for lock int g_iScheme; // last guy we hit #ifdef DEBUG int g_cSTTot, g_cSTHit, g_cSTHit0; #endif // // all of the pszScheme to nScheme functions are necessary at this point // because some parsing is vioent, and some is necessarily soft // PRIVATE URL_SCHEME GetSchemeTypeAndFlagsW(LPCWSTR pszScheme, DWORD cchScheme, LPDWORD pdwFlags) { DWORD i; ASSERT(pszScheme); #ifdef DEBUG if ((g_cSTTot % 10) == 0) TraceMsg(DM_PERF, "gstaf: tot=%d hit=%d hit0=%d", g_cSTTot, g_cSTHit, g_cSTHit0); #endif DBEXEC(TRUE, g_cSTTot++); // check cache 1st i = g_iScheme; if (cchScheme == g_mpUrlSchemeTypes[i].cchScheme && StrCmpNCW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme) == 0) { DBEXEC(TRUE, i == 0 ? g_cSTHit0++ : g_cSTHit++); Lhit: if (pdwFlags) *pdwFlags = g_mpUrlSchemeTypes[i].dwFlags; // update cache (unconditionally) g_iScheme = i; return g_mpUrlSchemeTypes[i].eScheme; } for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++) { if(cchScheme == g_mpUrlSchemeTypes[i].cchScheme && 0 == StrCmpNIW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme)) goto Lhit; } if (pdwFlags) { *pdwFlags = 0; } return URL_SCHEME_UNKNOWN; } PRIVATE URL_SCHEME GetSchemeTypeAndFlagsA(LPCSTR pszScheme, DWORD cchScheme, LPDWORD pdwFlags) { DWORD i; ASSERT(pszScheme); for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++) { if(0 == _StrCmpNMixed(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme)) { if (pdwFlags) *pdwFlags = g_mpUrlSchemeTypes[i].dwFlags; return g_mpUrlSchemeTypes[i].eScheme; } } if (pdwFlags) { *pdwFlags = 0; } return URL_SCHEME_UNKNOWN; } /*---------------------------------------------------------- Purpose: Return the scheme ordinal type (URL_SCHEME_*) based on the URL string. Returns: URL_SCHEME_ ordinal Cond: -- */ PRIVATE inline BOOL IsSameSchemeW(LPCWSTR pszLocal, LPCWSTR pszGlobal, DWORD cch) { ASSERT(pszLocal); ASSERT(pszGlobal); ASSERT(cch); return !StrCmpNIW(pszLocal, pszGlobal, cch); } PRIVATE BOOL IsSameSchemeA(LPCSTR pszLocal, LPCWSTR pszGlobal, DWORD cch) { ASSERT(pszLocal); ASSERT(pszGlobal); ASSERT(cch); return !_StrCmpNMixed(pszLocal, pszGlobal, cch); } PRIVATE URL_SCHEME SchemeTypeFromStringA( LPCSTR psz, DWORD cch) { DWORD i; // psz is a counted string (by cch), not a null-terminated string, // so use IS_VALID_READ_BUFFER instead of IS_VALID_STRING_PTRA. ASSERT(IS_VALID_READ_BUFFER(psz, CHAR, cch)); ASSERT(cch); // We use a linear search. A binary search wouldn't pay off // because the list isn't big enough, and we can sort the list // according to the most popular protocol schemes and pay off // bigger. for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++) { if(cch == g_mpUrlSchemeTypes[i].cchScheme && IsSameSchemeA(psz, g_mpUrlSchemeTypes[i].pszScheme, cch)) return g_mpUrlSchemeTypes[i].eScheme; } return URL_SCHEME_UNKNOWN; } PRIVATE URL_SCHEME SchemeTypeFromStringW( LPCWSTR psz, DWORD cch) { DWORD i; // psz is a counted string (by cch), not a null-terminated string, // so use IS_VALID_READ_BUFFER instead of IS_VALID_STRING_PTRW. ASSERT(IS_VALID_READ_BUFFER(psz, WCHAR, cch)); ASSERT(cch); // We use a linear search. A binary search wouldn't pay off // because the list isn't big enough, and we can sort the list // according to the most popular protocol schemes and pay off // bigger. for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++) { if(cch == g_mpUrlSchemeTypes[i].cchScheme && IsSameSchemeW(psz, g_mpUrlSchemeTypes[i].pszScheme, cch)) return g_mpUrlSchemeTypes[i].eScheme; } return URL_SCHEME_UNKNOWN; } // // these are used during path fumbling that i do // each string between a path delimiter ( '/' or '\') // is a segment. we dont ever really care about // empty ("") segments, so it is best to use // NextLiveSegment(). // inline PRIVATE LPWSTR NextSegment(LPWSTR psz) { ASSERT (psz); return psz + lstrlenW(psz) + 1; } #define IsLiveSegment(p) ((p) && (*p) != DEADSEGCHAR) PRIVATE LPWSTR NextLiveSegment(LPWSTR pszSeg, DWORD *piSeg, DWORD cSegs) { if(pszSeg) do { // // count the number of dead segments that we skip. // if the segment isnt dead, then we can just skip one, // the current one. // DWORD cSkip; for (cSkip = 0; (*pszSeg) == DEADSEGCHAR; pszSeg++, cSkip++); cSkip = cSkip ? cSkip : 1; if((*piSeg) + cSkip < cSegs) { pszSeg = NextSegment(pszSeg); (*piSeg) += cSkip; } else pszSeg = NULL; } while (pszSeg && (*pszSeg == DEADSEGCHAR)); return pszSeg; } PRIVATE LPWSTR LastLiveSegment(LPWSTR pszSeg, DWORD cSegs, BOOL fFailIfFirst) { DWORD iSeg = 0; LPWSTR pszLast = NULL; BOOL fLastIsFirst = FALSE; if(cSegs) { if(IsLiveSegment(pszSeg)) { pszLast = pszSeg; fLastIsFirst = TRUE; } while(pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs)) { if(!pszLast) fLastIsFirst = TRUE; else fLastIsFirst = FALSE; pszLast = pszSeg; } if(fFailIfFirst && fLastIsFirst) pszLast = NULL; } return pszLast; } PRIVATE LPWSTR FirstLiveSegment(LPWSTR pszSeg, DWORD *piSeg, DWORD cSegs) { ASSERT(piSeg); *piSeg = 0; if(!pszSeg || !cSegs) return NULL; if(!IsLiveSegment(pszSeg)) pszSeg = NextLiveSegment(pszSeg, piSeg, cSegs); return pszSeg; } inline BOOL IsDosDrive(LPCWSTR p) { return (*p && p[1] == COLON); } inline BOOL IsDosPath(LPCWSTR p) { return (*p == WHACK || IsDosDrive(p)); } inline BOOL IsDriveUrl(const WCHAR *p) { return (*p && p[1] == BAR); } inline BOOL IsDrive(LPCWSTR p) { return (IsDosDrive(p) || IsDriveUrl(p)); } inline BOOL IsSeparator(const WCHAR *p) { return (*p == SLASH || *p == WHACK ); } inline BOOL IsAbsolute(const WCHAR *p) { #ifndef UNIX return (IsSeparator(p) || IsDrive(p)); #else return (IsSeparator(p)) ; #endif } #define IsUNC(pathW) PathIsUNCW(pathW) inline BOOL IsDot(LPCWSTR p) // if p == "." return TRUE { return (*p == DOT && !p[1]); } inline BOOL IsDotDot(LPCWSTR p) // if p == ".." return TRUE { return (*p == DOT && p[1] == DOT && !p[2]); } //+--------------------------------------------------------------------------- // // Method: ConvertChar // // Synopsis: // // Arguments: [szStr] -- // [cIn] -- // [cOut] -- // // Returns: // // History: 03-20-96 JoeS (Joe Souza) Created // // Notes: // //---------------------------------------------------------------------------- static void ConvertChar(LPWSTR ptr, WCHAR cIn, WCHAR cOut, BOOL fProtectExtra) { while (*ptr) { if (fProtectExtra && (*ptr == QUERY || *ptr == POUND )) { break; } if (*ptr == cIn) { *ptr = cOut; } ptr++; } } PUBLIC void WininetFixFileSlashes(WCHAR *p) { // NB: This function assumes that p points to a file URL. // The file URL *MUST* be of the form "file://...". // HTParse() guarantees that this will be so. int schemelen = 0; schemelen = SIZECHARS(L"file://") - 1; /* In UNIX system, we don't need to convert the SLASH to WHACK */ if (p && lstrlenW(p) > schemelen) { #ifdef UNIX ConvertChar(p + schemelen, WHACK, SLASH, TRUE); #else ConvertChar(p + schemelen, SLASH, WHACK, TRUE); #endif } } // // in the URL spec, it says that all whitespace should be ignored // due to the fact that it is possible to introduce // new whitespace and eliminate other whitespace // however, we are only going to strip out TAB CR LF // because we consider SPACE's to be significant. // PRIVATE inline BOOL IsInsignificantWhite(WCHAR ch) { return (ch == TAB || ch == CR || ch == LF); } #define IsWhite(c) ((DWORD) (c) > 32 ? FALSE : TRUE) PRIVATE void TrimAndStripInsignificantWhite(WCHAR *psz) { ASSERT(psz); if(*psz) { LPCWSTR pszSrc = psz; LPWSTR pszDest = psz; LPWSTR pszLastSpace = NULL; // first trim the front side by just moving the source pointer. while(*pszSrc && IsWhite(*pszSrc)) { pszSrc++; } // // Copy the body stripping "insignificant" white spaces. // Remember the last white space to trim trailing space later. // while (*pszSrc) { if(IsInsignificantWhite(*pszSrc)) { pszSrc++; } else { if (IsWhite(*pszSrc)) { if (pszLastSpace==NULL) { pszLastSpace = pszDest; } } else { pszLastSpace = NULL; } *pszDest++ = *pszSrc++; } } // Trim the trailing space if (pszLastSpace) { *pszLastSpace = L'\0'; } else { *pszDest = L'\0'; } } } struct EXTKEY { PCSTR szExt; PCWSTR wszExt; DWORD cchExt; }; const EXTKEY ExtTable[] = { { ".html", L".html", ARRAYSIZE(".html") - 1 }, { ".htm", L".htm", ARRAYSIZE(".htm") - 1 }, { ".xml", L".xml", ARRAYSIZE(".xml") - 1 }, { ".doc", L".doc", ARRAYSIZE(".doc") - 1 }, { ".xls", L".xls", ARRAYSIZE(".xls") - 1 }, { ".ppt", L".ppt", ARRAYSIZE(".ppt") - 1 }, { ".rtf", L".rtf", ARRAYSIZE(".rtf") - 1 }, { ".dot", L".dot", ARRAYSIZE(".dot") - 1 }, { ".xlw", L".xlw", ARRAYSIZE(".xlw") - 1 }, { ".pps", L".pps", ARRAYSIZE(".pps") - 1 }, { ".xlt", L".xlt", ARRAYSIZE(".xlt") - 1 }, { ".hta", L".hta", ARRAYSIZE(".hta") - 1 }, { ".pot", L".pot", ARRAYSIZE(".pot") - 1 }, { ".pdf", L".pdf", ARRAYSIZE(".pdf") - 1 } }; inline BOOL CompareExtA(PCSTR psz, DWORD_PTR cch) { for (DWORD i=0; i < ARRAYSIZE(ExtTable); i++) { if (ExtTable[i].cchExt>cch) continue; if (!StrCmpNIA(psz - (LONG_PTR)ExtTable[i].cchExt, ExtTable[i].szExt, ExtTable[i].cchExt)) return TRUE; } return FALSE; } inline BOOL CompareExtW(PCWSTR pwsz, DWORD_PTR cch) { for (DWORD i=0; i < ARRAYSIZE(ExtTable); i++) { if (ExtTable[i].cchExt>cch) continue; if (!StrCmpNIW(pwsz - (LONG_PTR)ExtTable[i].cchExt, ExtTable[i].wszExt, ExtTable[i].cchExt)) return TRUE; } return FALSE; } PRIVATE LPCSTR FindFragmentA(LPCSTR psz, BOOL fMBCS, BOOL fIsFile) { CHAR *pch = _StrChrA(psz, POUND, fMBCS); if(pch && fIsFile) { CHAR *pchQuery = _StrChrA(psz, QUERY, fMBCS); if (pchQuery && (pchQuery < pch)) goto exit; do { LONG_PTR cch = pch - psz; // REARCHITECT: we shouldn't hardcode ".htm". // #s are significant in dospaths - zekel 9-JUL-97 // so we want to check the path in front and make sure // that it is an html file. we believe this heuristic should work // in about 99% of all cases. // // if it is not an html file it is not a hash if (CompareExtA(pch, cch)) { break; } } while (pch = _StrChrA(++pch, POUND, fMBCS)); } exit: return pch; } PRIVATE LPCWSTR FindFragmentW(LPCWSTR psz, BOOL fIsFile) { WCHAR *pch = StrChrW(psz, POUND); if(pch && fIsFile) { WCHAR *pchQuery = StrChrW(psz, QUERY); if (pchQuery && (pchQuery < pch)) goto exit; do { LONG_PTR cch = pch - psz; // REARCHITECT: we shouldn't hardcode ".htm". // #s are significant in dospaths - zekel 9-JUL-97 // so we want to check the path in front and make sure // that it is an html file. we believe this heuristic should work // in about 99% of all cases. // // if it is not an html file it is not a hash if (CompareExtW(pch, cch)) { break; } } while (pch = StrChrW(++pch, POUND)); } exit: return pch; } PRIVATE VOID BreakFragment(LPWSTR *ppsz, PURLPARTS parts) { ASSERT(ppsz); ASSERT(*ppsz); // // Opaque URLs are not allowed to use fragments - zekel 27-feb-97 // Is it possible for an opaque URL to use a fragment? // right now we assume not. i suspect so but will leave it this way for now // this is especially important to javascript and vbscript // FEATURE: this might be worth investigation, but probably can't change this code // if(!**ppsz || parts->dwFlags & UPF_SCHEME_OPAQUE) return; WCHAR *pch = (LPWSTR) FindFragmentW(*ppsz, parts->eScheme == URL_SCHEME_FILE); if (pch) { TERMSTR(pch); parts->pszFragment = pch +1; } } PRIVATE inline BOOL IsUrlPrefixA(LPCSTR psz) { // // Optimized for this particular case. Notice that most of it // will be lego-ized out anyway. // if (psz[0]=='u' || psz[0]=='U') { if (psz[1]=='r' || psz[1]=='R') { if (psz[2]=='l' || psz[2]=='L') { return TRUE; } } } return FALSE; // return !StrCmpNIA(psz, c_szURLPrefixA, c_cchURLPrefix); } PRIVATE inline BOOL IsUrlPrefixW(LPCWSTR psz) { // // Optimized for this particular case. Notice that most of it // will be lego-ized out anyway. // if (psz[0]==L'u' || psz[0]==L'U') { if (psz[1]==L'r' || psz[1]==L'R') { if (psz[2]==L'l' || psz[2]==L'L') { return TRUE; } } } return FALSE; // return !StrCmpNIW(psz, c_szURLPrefixW, c_cchURLPrefix); } // // if FindScheme() succeeds, it returns a pointer to the scheme, // and the cch holds the count of chars for the scheme // if it fails, and cch is non-zero then cch is how much should be skipped. // this is to allow "URL:/foo/bar", a relative URL with the "URL:" prefix. // LPCSTR FindSchemeA(LPCSTR psz, LPDWORD pcchScheme) { LPCSTR pch; DWORD cch; ASSERT(pcchScheme); ASSERT(psz); *pcchScheme = 0; for (pch = psz, cch = 0; *pch; pch++, cch++) { if (*pch == ':') { if (IsUrlPrefixA(psz)) { psz = pch +1; // set pcchScheme to skip past "URL:" *pcchScheme = cch + 1; // reset cch for the scheme len cch = -1; continue; } else { // // Scheme found if it is at least two characters if(cch > 1) { *pcchScheme = cch; return psz; } break; } } if(!IsValidSchemeCharA(*pch)) break; } return NULL; } // // FindSchemeW() around for Perf reasons for ParseURL() // Any changes in either FindScheme() needs to reflected in the other // LPCWSTR FindSchemeW(LPCWSTR psz, LPDWORD pcchScheme, BOOL fAllowSemicolon = FALSE) { LPCWSTR pch; DWORD cch; ASSERT(pcchScheme); ASSERT(psz); *pcchScheme = 0; for (pch = psz, cch = 0; *pch; pch++, cch++) { if (*pch == L':' || // Autocorrect permits a semicolon typo (fAllowSemicolon && *pch == L';')) { if (IsUrlPrefixW(psz)) { psz = pch +1; // set pcchScheme to skip past "URL:" *pcchScheme = cch + 1; // reset cch for the scheme len cch = -1; continue; } else { // // Scheme found if it is at least two characters if(cch > 1) { *pcchScheme = cch; return psz; } break; } } if(!IsValidSchemeCharW(*pch)) break; } return NULL; } PRIVATE DWORD CountSlashes(LPCWSTR *ppsz) { DWORD cSlashes = 0; LPCWSTR pch = *ppsz; while (IsSeparator(pch)) { *ppsz = pch; pch++; cSlashes++; } return cSlashes; } PRIVATE LPCWSTR FindDosPath(LPCWSTR psz) { if (IsDosDrive(psz) || IsUNC(psz)) { return psz; } else { DWORD cch; LPCWSTR pszScheme = FindSchemeW(psz, &cch); if (pszScheme && URL_SCHEME_FILE == GetSchemeTypeAndFlagsW(pszScheme, cch, NULL)) { LPCWSTR pch = psz + cch + 1; DWORD c = CountSlashes(&pch); switch (c) { case 2: if(IsDosDrive(++pch)) return pch; break; case 4: return --pch; } } } return NULL; } /*+++ WininetCopyUrlForParse() this copies the url and prepends a "file://" if necessary This should never be called except from wininet everyone else should be calling UrlCreateFromPath() Parameters IN - pszDst the destination buffer pszSrc source buffer OUT - pszDst is filled with a Live URL Returns VOID NOTE - Assume "file:" if no scheme and it looks like fully-qualified file path. ---*/ static const WCHAR c_szFileSchemeString[] = L"file://"; PRIVATE HRESULT WininetCopyUrlForParse(PSHSTRW pstrDst, LPCWSTR pszSrc) { #ifndef UNIX if (IsDrive(pszSrc) || IsUNC(pszSrc)) { // // NOTE: the first SetStr will always succeed // because the default buffer is more than "file://" pstrDst->SetStr(c_szFileSchemeString); return pstrDst->Append(pszSrc); } else #endif /* !UNIX */ return pstrDst->SetStr(pszSrc); } PRIVATE HRESULT CopyUrlForParse(LPCWSTR pszUrl, PSHSTRW pstrUrl, DWORD dwFlags) { LPCWSTR pch; HRESULT hr; // // now we will make copies of the URLs so that we can rip them apart // WininetCopyUrlForParse() will prepend a file: if it wants... // if(dwFlags & URL_WININET_COMPATIBILITY) { hr = WininetCopyUrlForParse(pstrUrl, pszUrl); } else if(pch = FindDosPath(pszUrl)) { hr = SHUrlCreateFromPath(pch, pstrUrl, dwFlags); } else { hr = pstrUrl->SetStr(pszUrl); } // Trim leading and trailing whitespace // Remove tab and CRLF characters. Netscape does this. if(SUCCEEDED(hr)) TrimAndStripInsignificantWhite(pstrUrl->GetInplaceStr()); return hr; } PRIVATE VOID BreakScheme(LPWSTR *ppsz, PURLPARTS parts) { if(!**ppsz || IsDrive(*ppsz)) return; DWORD cch; // // if FindScheme() succeeds, it returns a pointer to the scheme, // and the cch holds the count of chars for the scheme // if it fails, and cch is none zero then cch is how much should be skipped. // this is to allow "URL:/foo/bar", a relative URL with the "URL:" prefix. // if(NULL != (parts->pszScheme = (LPWSTR) FindSchemeW(*ppsz, &cch))) { parts->pszScheme[cch] = '\0'; CharLowerW(parts->pszScheme); // put the pointer past the scheme for next Break() *ppsz = parts->pszScheme + cch + 1; #ifdef DEBUG if (g_dwPrototype & PF_LOGSCHEMEHITS) { // this is for logging of url schemes, to make sure that we have the right order int c = GetPrivateProfileIntW(L"SchemeHits", parts->pszScheme, 0, L"UrlPars.ini"); WCHAR szc[25]; StringCchPrintfW(szc, ARRAYSIZE(szc), L"%d", ++c); WritePrivateProfileStringW(L"SchemeHits", parts->pszScheme, szc, L"UrlPars.ini"); } #endif //DEBUG parts->eScheme = GetSchemeTypeAndFlagsW(parts->pszScheme, cch, &parts->dwFlags); } else if (cch) *ppsz += cch + 1; } PRIVATE VOID BreakQuery(LPWSTR *ppsz, PURLPARTS parts) { WCHAR *pch; if(!**ppsz) return; if(parts->dwFlags & UPF_SCHEME_OPAQUE) return; pch = StrChrW(*ppsz, QUERY); // // APPCOMPAT NETSCAPE COMPATBILITY - zekel - 27-JAN-97 // we will also get http://foo#frag?query // even tho legally it should be http://foo?query#frag // of course we will put it back together the right way. // if(!pch && parts->pszFragment) pch = StrChrW(parts->pszFragment, QUERY); // found our query string... if (pch) { TERMSTR(pch); parts->pszQuery = pch + 1; } } PRIVATE VOID MkBreakServer(LPWSTR *ppsz, PURLPARTS parts) { // // NOTE: we dont convert WHACKs to SLASHs because mk can be of the // form pszServer = *ppsz; pch = StrChrW(*ppsz ,SLASH); if (pch) { parts->dwFlags |= UPF_SEG_ABSOLUTE; TERMSTR(pch); *ppsz = pch + 1; } else *ppsz += lstrlenW(*ppsz); } } PRIVATE VOID DefaultBreakServer(LPWSTR *ppsz, PURLPARTS parts) { if (**ppsz == SLASH) { parts->dwFlags |= UPF_SEG_ABSOLUTE; (*ppsz)++; if (**ppsz == SLASH) { // we have a winner! WCHAR * pch; parts->pszServer = (*ppsz) + 1; pch = StrChrW(parts->pszServer, SLASH); if(pch) { TERMSTR(pch); *ppsz = pch + 1; } else *ppsz = *ppsz + lstrlenW(*ppsz); } } else if(parts->pszScheme) parts->dwFlags |= UPF_SCHEME_OPAQUE; } PRIVATE VOID FileBreakServer(LPWSTR *ppsz, PURLPARTS parts) { LPWSTR pch; // CountSlashes() will set *ppsz to the last slash DWORD cSlashes = CountSlashes((LPCWSTR *)ppsz); if(cSlashes || IsDrive(*ppsz)) parts->dwFlags |= UPF_SEG_ABSOLUTE; switch (cSlashes) { case 0: break; case 4: // we identify file://\\UNC as a true DOS path with no escaped characters parts->dwFlags |= UPF_FILEISPATHURL; // fall through case 2: if(IsDrive((*ppsz) + 1)) { // this is a root drive TERMSTR(*ppsz); parts->pszServer = *ppsz; (*ppsz)++; // we identify file://C:\PATH as a true DOS path with no escaped characters parts->dwFlags |= UPF_FILEISPATHURL; break; } //else fallthru to UNC handling // fall through case 5: case 6: // // cases like "file:////..." or "file://///..." // we see this as a UNC path // lets set the server // parts->pszServer = ++(*ppsz); for(pch = *ppsz; *pch && !IsSeparator(pch); pch++); if(pch && *pch) { TERMSTR(pch); *ppsz = pch + 1; } else *ppsz = pch + lstrlenW(pch); break; case 1: // //we think of "file:/..." as on the local machine // so we have zero length pszServer // case 3: // //we think of file:///... as properly normalized on the local machine // so we have zero length pszServer // default: // there is just too many, we pretend that there is just one and ignore // the rest TERMSTR(*ppsz); parts->pszServer = *ppsz; (*ppsz)++; break; } // detect file://localserver/c:/path if(parts->pszServer && !StrCmpIW(parts->pszServer, L"localhost")) parts->pszServer = NULL; } PRIVATE VOID BreakServer(LPWSTR *ppsz, PURLPARTS parts, BOOL fConvert) { if(!**ppsz || parts->dwFlags & UPF_SCHEME_OPAQUE) return; // // APPCOMPAT - we pretend that whacks are the equiv of slashes - zekel 17-MAR-97 // this is because the internet uses slashes and DOS // uses whacks. so for useability's sake we allow both. // but not in all cases. in particular, the "mk:" stream // protocol depends upon the buggy behavior of one of IE30's // many URL parsers treating relative URLs with whacks as one // segment. // NOTE: IE30 had inconsistent behavior WRT URLs. so we handled // this case differently depending on when we saw, looked, touched, or // played with these URLs. wininet would always convert, but mshtml // sometimes would other times not. // // with MK: we cannot convert the base, or the relative // but in breakpath we have to allow for the use of WHACK // to indicate a root path // // we dont have to fProtectExtra because query and fragments // are already broken off if necessary. if (fConvert) ConvertChar(*ppsz, WHACK, SLASH, FALSE); switch(parts->eScheme) { case URL_SCHEME_FILE: FileBreakServer(ppsz, parts); break; case URL_SCHEME_MK: MkBreakServer(ppsz, parts); break; default: DefaultBreakServer(ppsz, parts); break; } } PRIVATE VOID DefaultBreakSegments(LPWSTR psz, PURLPARTS parts) { WCHAR *pch; while (pch = StrChrW(psz, SLASH)) { parts->cSegments++; TERMSTR(pch); psz = pch + 1; } if(!*psz || IsDot(psz) || IsDotDot(psz)) { if (!*psz && parts->cSegments > 1) parts->cSegments--; parts->dwFlags |= UPF_EXSEG_DIRECTORY; } } PRIVATE VOID DefaultBreakPath(LPWSTR *ppsz, PURLPARTS parts) { if(!**ppsz) return; // // this will keep the drive letter from being backed up over // during canonicalization. if we want keep the UNC share // from being backed up we should do it here // or in FileBreakServer() similarly // if(IsDrive(*ppsz)) { parts->dwFlags |= UPF_SEG_LOCKFIRST; // also convert "c|" to "c:" } parts->pszSegments = *ppsz; parts->cSegments = 1; if(!(parts->dwFlags & UPF_SCHEME_OPAQUE)) DefaultBreakSegments(parts->pszSegments, parts); } PRIVATE VOID BreakPath(LPWSTR *ppsz, PURLPARTS parts) { if(!**ppsz) return; if (parts->dwFlags & UPF_SCHEME_OPAQUE) { parts->pszSegments = *ppsz; parts->cSegments = 1; } else { // // we only need to check for absolute when there was // no server segment. if there was a server segment, // then absolute has already been set, and we need // to preserve any separators that exist in the path // if(!parts->pszServer && IsSeparator(*ppsz)) { parts->dwFlags |= UPF_SEG_ABSOLUTE; (*ppsz)++; } DefaultBreakPath(ppsz, parts); } } BOOL _ShouldBreakBase(PURLPARTS parts, LPCWSTR pszBase) { if (pszBase) { if (!parts->pszScheme) return TRUE; DWORD cch; LPCWSTR pszScheme = FindSchemeW(pszBase, &cch); // this means that this will only optimize on known schemes // if both urls use URL_SCHEME_UNKNOWN...then we parse both. if (pszScheme && parts->eScheme == GetSchemeTypeAndFlagsW(pszScheme, cch, NULL)) return TRUE; } return FALSE; } /*+++ BreakUrl() Break a URL for its consituent parts Parameters IN - the URL to crack open, need not be fully qualified OUT - parts absolute or relative may be nonzero (but not both). host, anchor and access may be nonzero if they were specified. Any which are nonzero point to zero terminated strings. Returns VOID Details - WARNING !! function munges the incoming buffer ---*/ #define BreakUrl(s, p) BreakUrls(s, p, NULL, NULL, NULL, 0) // // **BreakUrls()** // RETURNS // S_OK if the two urls need to be blended // S_FALSE if pszUrl is absolute, or there is no pszBase // failure some sort of memory allocation error // PRIVATE HRESULT BreakUrls(LPWSTR pszUrl, PURLPARTS parts, LPCWSTR pszBase, PSHSTRW pstrBase, PURLPARTS partsBase, DWORD dwFlags) { HRESULT hr = S_FALSE; ASSERT(pszUrl && parts); ZeroMemory(parts, SIZEOF(URLPARTS)); if(!*pszUrl) parts->dwFlags |= UPF_SEG_EMPTYSEG; // // WARNING: this order is specific, according to the proposed standard // if(*pszUrl || pszBase) { BOOL fConvert; BreakScheme(&pszUrl, parts); BreakFragment(&pszUrl, parts); BreakQuery(&pszUrl, parts); // // this is the first time that we need to access // pszBase if it exists, so this is when we copy and parse // if (_ShouldBreakBase(parts, pszBase)) { hr = CopyUrlForParse(pszBase, pstrBase, dwFlags); // this will be some kind of memory error if(FAILED(hr)) return hr; // ASSERT(hr != S_FALSE); BreakUrl(pstrBase->GetInplaceStr(), partsBase); fConvert = (partsBase->dwFlags & UPF_SCHEME_CONVERT); } else fConvert = (parts->dwFlags & UPF_SCHEME_CONVERT); BreakServer(&pszUrl, parts, fConvert); BreakPath(&pszUrl, parts); } return hr; } /*+++ BlendParts() & all dependant Blend* functions Blends the parts structures into one, taking the relavent bits from each one and dumping the unused data. Parameters IN - partsUrl the primary or relative parts - Takes precedence partsBase the base or referrers parts OUT - partsOut the combined result Returns VOID - NOTE: this will frequently NULL out the entire partsBase. ---*/ PRIVATE VOID BlendScheme(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut) { if(partsUrl->pszScheme) { LPCWSTR pszScheme = partsOut->pszScheme = partsUrl->pszScheme; URL_SCHEME eScheme = partsOut->eScheme = partsUrl->eScheme; partsOut->dwFlags |= (partsUrl->dwFlags & UPF_SCHEME_MASK); // // this checks to make sure that these are the same scheme, and // that the scheme is allowed to be used in relative URLs // file: is not allowed to because of weirdness with drive letters // and \\UNC\shares // if ((eScheme && (eScheme != partsBase->eScheme) || eScheme == URL_SCHEME_FILE) || (!partsBase->pszScheme) || (partsBase->pszScheme && StrCmpW(pszScheme, partsBase->pszScheme))) { // they are different schemes. DUMP partsBase. ZeroMemory(partsBase, SIZEOF(URLPARTS)); } } else { partsOut->pszScheme = partsBase->pszScheme; partsOut->eScheme = partsBase->eScheme; partsOut->dwFlags |= (partsBase->dwFlags & UPF_SCHEME_MASK); } } PRIVATE VOID BlendServer(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut) { ASSERT(partsUrl && partsBase && partsOut); // // if we have different hosts then everything but the pszAccess is DUMPED // if(partsUrl->pszServer) { partsOut->pszServer = partsUrl->pszServer; // NOTUSED partsOut->dwFlags |= (partsUrl->dwFlags & UPF_SERVER_MASK); if ((partsBase->pszServer && StrCmpW(partsUrl->pszServer, partsBase->pszServer))) { // they are different Servers. DUMP partsBase. ZeroMemory(partsBase, SIZEOF(URLPARTS)); } } else { partsOut->pszServer = partsBase->pszServer; // NOTUSED partsOut->dwFlags |= (partsBase->dwFlags & UPF_SERVER_MASK); } } PRIVATE VOID BlendPath(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut) { ASSERT(partsUrl && partsBase && partsOut); if (partsUrl->dwFlags & UPF_SEG_ABSOLUTE) { if((partsBase->dwFlags & UPF_SEG_LOCKFIRST) && !(partsUrl->dwFlags & UPF_SEG_LOCKFIRST)) { // this keeps the drive letters when necessary partsOut->pszSegments = partsBase->pszSegments; partsOut->cSegments = 1; // only keep the first segment partsOut->dwFlags |= (partsBase->dwFlags & UPF_SEG_MASK) ; partsOut->pszExtraSegs = partsUrl->pszSegments; partsOut->cExtraSegs = partsUrl->cSegments; partsOut->dwFlags |= (partsUrl->dwFlags & UPF_EXSEG_MASK); } else { // just use the absolute path partsOut->pszSegments = partsUrl->pszSegments; partsOut->cSegments = partsUrl->cSegments; partsOut->dwFlags |= (partsUrl->dwFlags & (UPF_SEG_MASK |UPF_EXSEG_MASK) ); } ZeroMemory(partsBase, SIZEOF(URLPARTS)); } else if ((partsBase->dwFlags & UPF_SEG_ABSOLUTE)) { // Adopt path not name partsOut->pszSegments = partsBase->pszSegments; partsOut->cSegments = partsBase->cSegments; partsOut->dwFlags |= (partsBase->dwFlags & UPF_SEG_MASK ); if(partsUrl->cSegments || partsUrl->dwFlags & UPF_SEG_EMPTYSEG) { // // this a relative path that needs to be combined // partsOut->pszExtraSegs = partsUrl->pszSegments; partsOut->cExtraSegs = partsUrl->cSegments; partsOut->dwFlags |= (partsUrl->dwFlags & UPF_EXSEG_MASK ); if (!(partsBase->dwFlags & UPF_EXSEG_DIRECTORY)) { // // knock off the file name segment // as long as the it isnt the first or the first is not locked // or it isnt a dotdot. in the case of http://site/dir/, dir/ is // not actually killed, only the NULL terminator following it is. // LPWSTR pszLast = LastLiveSegment(partsOut->pszSegments, partsOut->cSegments, partsOut->dwFlags & UPF_SEG_LOCKFIRST); if(pszLast && !IsDotDot(pszLast)) { if(partsUrl->dwFlags & UPF_SEG_EMPTYSEG) partsOut->dwFlags |= UPF_EXSEG_DIRECTORY; KILLSEG(pszLast); } } } else partsOut->dwFlags |= (partsBase->dwFlags & UPF_EXSEG_MASK); } else if (partsUrl->cSegments) { partsOut->pszSegments = partsUrl->pszSegments; partsOut->cSegments = partsUrl->cSegments; partsOut->dwFlags |= (partsUrl->dwFlags & (UPF_SEG_MASK |UPF_EXSEG_MASK) ); } else if (partsBase->cSegments) { partsOut->pszSegments = partsBase->pszSegments; partsOut->cSegments = partsBase->cSegments; partsOut->dwFlags |= (partsBase->dwFlags & (UPF_SEG_MASK |UPF_EXSEG_MASK) ); } // regardless, we want to zero if we have relative segs if (partsUrl->cSegments) ZeroMemory(partsBase, SIZEOF(URLPARTS)); } PRIVATE VOID BlendQuery(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut) { if(partsUrl->pszQuery) { LPCWSTR pszQuery = partsOut->pszQuery = partsUrl->pszQuery; // NOTUSED partsOut->dwFlags |= (partsUrl->dwFlags & UPF_Query_MASK); if ((partsBase->pszQuery && StrCmpW(pszQuery, partsBase->pszQuery))) { // they are different Querys. DUMP partsBase. ZeroMemory(partsBase, SIZEOF(URLPARTS)); } } else { partsOut->pszQuery = partsBase->pszQuery; // NOTUSED partsOut->dwFlags |= (partsBase->dwFlags & UPF_Query_MASK); } } PRIVATE VOID BlendFragment(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut) { if(partsUrl->pszFragment || partsUrl->cSegments) { LPCWSTR pszFragment = partsOut->pszFragment = partsUrl->pszFragment; // NOTUSED partsOut->dwFlags |= (partsUrl->dwFlags & UPF_Fragment_MASK); if ((partsBase->pszFragment && StrCmpW(pszFragment, partsBase->pszFragment))) { // they are different Fragments. DUMP partsBase. ZeroMemory(partsBase, SIZEOF(URLPARTS)); } } else { partsOut->pszFragment = partsBase->pszFragment; // NOTUSED partsOut->dwFlags |= (partsBase->dwFlags & UPF_Fragment_MASK); } } PRIVATE VOID BlendParts(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut) { // // partsUrl always takes priority over partsBase // ASSERT(partsUrl && partsBase && partsOut); ZeroMemory(partsOut, SIZEOF(URLPARTS)); BlendScheme( partsUrl, partsBase, partsOut); BlendServer( partsUrl, partsBase, partsOut); BlendPath( partsUrl, partsBase, partsOut); BlendQuery( partsUrl, partsBase, partsOut); BlendFragment( partsUrl, partsBase, partsOut); } PRIVATE VOID CanonServer(PURLPARTS parts) { // // we only do stuff if this server is an internet style // server. that way it uses FQDNs and IP port numbers // if (parts->pszServer && (parts->dwFlags & UPF_SCHEME_INTERNET)) { LPWSTR pszName = StrRChrW(parts->pszServer, NULL, L'@'); if(!pszName) pszName = parts->pszServer; // this should just point to the FQDN:Port CharLowerW(pszName); // // Ignore default port numbers, and trailing dots on FQDNs // which will only cause identical adresses to look different // { WCHAR *pch = StrChrW(pszName, COLON); if (pch && parts->eScheme) { BOOL fIgnorePort = FALSE; // // FEATURE we should actually be getting this from // the services file to find out the default protocol port // but we dont think that most people will change them - zekel 17-Dec-96 // switch(parts->eScheme) { case URL_SCHEME_HTTP: if(StrCmpW(pch, L":80") == 0) fIgnorePort = TRUE; break; case URL_SCHEME_FTP: if(StrCmpW(pch, L":21") == 0) fIgnorePort = TRUE; break; case URL_SCHEME_GOPHER: if(StrCmpW(pch, L":70") == 0) fIgnorePort = TRUE; break; case URL_SCHEME_HTTPS: if(StrCmpW(pch, L":443") == 0) fIgnorePort = TRUE; break; default: break; } if(fIgnorePort) TERMSTR(pch); // It is the default: ignore it } } } } PRIVATE VOID CanonCombineSegs(PURLPARTS parts) { ASSERT(parts); ASSERT(parts->pszExtraSegs && parts->cExtraSegs); LPWSTR pszLast = LastLiveSegment(parts->pszSegments, parts->cSegments, parts->dwFlags & UPF_SEG_LOCKFIRST); LPWSTR pszExtra = parts->pszExtraSegs; DWORD iExtra = 0; DWORD cExtras = parts->cExtraSegs; if(!IsLiveSegment(pszExtra)) pszExtra = NextLiveSegment(pszExtra, &iExtra, cExtras); while(pszExtra && IsDotDot(pszExtra)) { if (pszLast) KILLSEG(pszLast); KILLSEG(pszExtra); pszLast = LastLiveSegment(parts->pszSegments, parts->cSegments, parts->dwFlags & UPF_SEG_LOCKFIRST); pszExtra = NextLiveSegment(pszExtra, &iExtra, cExtras); } } PRIVATE VOID CanonSegments(LPWSTR pszSeg, DWORD cSegs, BOOL fLockFirst) { DWORD iSeg = 0; LPWSTR pszLastSeg = NULL; LPWSTR pszFirstSeg = pszSeg; BOOL fLastIsFirst = TRUE; BOOL fFirstSeg = TRUE; ASSERT (pszSeg && cSegs); pszSeg = FirstLiveSegment(pszSeg, &iSeg, cSegs); while (pszSeg) { if(IsDot(pszSeg)) { // if it is just a "." we can discard the segment KILLSEG(pszSeg); } else if(IsDotDot(pszSeg)) { // if it is ".." then we discard it and the last seg // // if we are at the first (root) or // the last is the root and it is locked // then we dont want to do anything // if(pszLastSeg && !IsDotDot(pszLastSeg) && !(fLastIsFirst && fLockFirst)) { KILLSEG(pszLastSeg); pszLastSeg = NULL; KILLSEG(pszSeg); } } if(IsLiveSegment(pszSeg)) { if(!pszLastSeg && fFirstSeg) fLastIsFirst = TRUE; else fLastIsFirst = FALSE; pszLastSeg = pszSeg; fFirstSeg = FALSE; } else { pszLastSeg = LastLiveSegment(pszFirstSeg, iSeg, fLockFirst); } pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs); } } PRIVATE VOID CanonPath(PURLPARTS parts) { ASSERT(parts); if(parts->cSegments) CanonSegments(parts->pszSegments, parts->cSegments, (parts->dwFlags & UPF_SEG_LOCKFIRST)); if(parts->cExtraSegs) CanonSegments(parts->pszExtraSegs, parts->cExtraSegs, FALSE); if(parts->cExtraSegs) CanonCombineSegs(parts); } PRIVATE VOID CanonParts(PURLPARTS parts) { ASSERT(parts); //CanonScheme(parts); CanonServer(parts); CanonPath(parts); //CanonQuery(parts); //CanonFragment(parts); } PRIVATE HRESULT BuildScheme(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr) { HRESULT hr = S_OK; ASSERT(parts && pstr); if(parts->pszScheme) { hr = pstr->Append(parts->pszScheme); if(SUCCEEDED(hr)) hr = pstr->Append(COLON); } return hr; } PRIVATE HRESULT BuildServer(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr) { HRESULT hr = S_OK; ASSERT(parts && pstr); switch(parts->eScheme) { case URL_SCHEME_MK: // CraigC's "mk:" has no // but acts like it does break; case URL_SCHEME_FILE: if ((dwFlags & URL_WININET_COMPATIBILITY) || (dwFlags & URL_FILE_USE_PATHURL)) { if(parts->pszServer && *parts->pszServer) hr = pstr->Append(L"////"); else if (parts->pszSegments && IsDrive(parts->pszSegments)) hr = pstr->Append(SLASH); else if (parts->dwFlags & UPF_SEG_ABSOLUTE) hr = pstr->Append(L"//"); } else if (parts->dwFlags & UPF_SEG_ABSOLUTE) hr = pstr->Append(L"//"); break; default: if(parts->pszServer && SUCCEEDED(hr)) hr = pstr->Append(L"//"); break; } if(parts->pszServer && SUCCEEDED(hr)) hr = pstr->Append(parts->pszServer); return hr; } PRIVATE HRESULT BuildSegments(LPWSTR pszSeg, DWORD cSegs, PSHSTRW pstr, BOOL fRoot, BOOL *pfSlashLast) { DWORD iSeg = 0; HRESULT hr = S_FALSE; *pfSlashLast = FALSE; ASSERT(pszSeg && pstr); pszSeg = FirstLiveSegment(pszSeg, &iSeg, cSegs); if(!fRoot && pszSeg) { hr = pstr->Append(pszSeg); if(SUCCEEDED(hr)) pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs); else pszSeg = NULL; } while (pszSeg) { hr = pstr->Append(SLASH); if(SUCCEEDED(hr) && *pszSeg) { hr = pstr->Append(pszSeg); *pfSlashLast = FALSE; } else *pfSlashLast = TRUE; if(SUCCEEDED(hr)) pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs); else break; } return hr; } PRIVATE HRESULT BuildPath(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr) { HRESULT hr = S_OK; BOOL fSlashLast = FALSE; DWORD iSeg; LPWSTR pszSegFirst = NULL; ASSERT(parts && pstr); if(parts->cSegments) { hr = BuildSegments(parts->pszSegments, parts->cSegments, pstr, parts->dwFlags & UPF_SEG_ABSOLUTE, &fSlashLast); if (fSlashLast) pstr->Append(SLASH); } if(SUCCEEDED(hr) && parts->cExtraSegs) { BOOL f = fSlashLast; hr = BuildSegments(parts->pszExtraSegs, parts->cExtraSegs, pstr, !fSlashLast, &fSlashLast); if (fSlashLast) pstr->Append(SLASH); if (hr == S_FALSE) fSlashLast = f; } // trailing slash on a server name for IIS if( !fSlashLast && ( (parts->dwFlags & UPF_EXSEG_DIRECTORY) || // if this is just a server name by itself (!FirstLiveSegment(parts->pszSegments, &iSeg, parts->cSegments) && !FirstLiveSegment(parts->pszExtraSegs, &iSeg, parts->cExtraSegs) && parts->dwFlags & UPF_SEG_ABSOLUTE) ) ) { hr = pstr->Append(SLASH); } return hr; } PRIVATE HRESULT BuildQuery(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr) { HRESULT hr = S_OK; ASSERT(parts && pstr); if(parts->pszQuery) { hr = pstr->Append(QUERY); if(SUCCEEDED(hr)) hr = pstr->Append(parts->pszQuery); } return hr; } PRIVATE HRESULT BuildFragment(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr) { HRESULT hr = S_OK; ASSERT(parts && pstr); if(parts->pszFragment) { hr = pstr->Append(POUND); if(SUCCEEDED(hr)) hr = pstr->Append(parts->pszFragment); } return hr; } PRIVATE HRESULT BuildUrl(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr) { HRESULT hr; ASSERT(parts && pstr); if( (SUCCEEDED(hr = BuildScheme(parts, dwFlags, pstr))) && (SUCCEEDED(hr = BuildServer(parts, dwFlags, pstr))) && (SUCCEEDED(hr = BuildPath(parts, dwFlags, pstr))) && (SUCCEEDED(hr = BuildQuery(parts, dwFlags, pstr))) ) hr = BuildFragment(parts, dwFlags, pstr); return hr; } /*+++ SHUrlEscape() Escapes an URL right now, i am only escaping stuff in the Path part of the URL Parameters IN - pszUrl URL to examine pstrOut SHSTR destination dwFlags the relevant URL_* flags, Returns HRESULT - SUCCESS S_OK ERROR only E_OUTOFMEMORY Helper Routines Escape*(part) each part gets its own escape routine (ie EscapeScheme) EscapeSpaces will only escape spaces (WININET compatibility mostly) EscapeSegmentsGetNeededSize gets the required size of destination buffer for all path segments EscapeLiveSegment does the work of escaping each path segment ---*/ PRIVATE HRESULT EscapeSpaces(LPCWSTR psz, PSHSTRW pstr, DWORD dwFlags) { HRESULT hr = S_OK; LPCWSTR pch; DWORD cSpaces = 0; ASSERT(psz && pstr); pstr->Reset(); for (pch = psz; *pch; pch++) { if (*pch == SPC) cSpaces++; } if(cSpaces) { hr = pstr->SetSize(lstrlenW(psz) + cSpaces * 2 + 1); if(SUCCEEDED(hr)) { int cchRemaing = pstr->GetSize(); LPWSTR pchOut = pstr->GetInplaceStr(); for (pch = psz; *pch; pch++) { if ((*pch == POUND || *pch == QUERY) && (dwFlags & URL_DONT_ESCAPE_EXTRA_INFO)) { int cchCopied; StringCchCopyW(pchOut, cchRemaing, pch); cchCopied = lstrlenW(pchOut); pchOut += cchCopied; cchRemaing -= cchCopied; break; } if (*pch == SPC) { *pchOut++ = HEX_ESCAPE; *pchOut++ = L'2'; *pchOut++ = L'0'; cchRemaing -= 3; } else { *pchOut++ = *pch; cchRemaing--; } ASSERT(cchRemaing >= 0); } TERMSTR(pchOut); } } else { hr = pstr->SetStr(psz); } return hr; } inline PRIVATE HRESULT EscapeScheme(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr) { ASSERT(partsUrl && partsOut); partsOut->pszScheme = partsUrl->pszScheme; partsOut->eScheme = partsUrl->eScheme; return S_OK; } inline PRIVATE HRESULT EscapeServer(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr) { ASSERT(partsUrl && partsOut); partsOut->pszServer = partsUrl->pszServer; return S_OK; } inline PRIVATE HRESULT EscapeQuery(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr) { ASSERT(partsUrl && partsOut); partsOut->pszQuery = partsUrl->pszQuery; return S_OK; } inline PRIVATE HRESULT EscapeFragment(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr) { ASSERT(partsUrl && partsOut); partsOut->pszFragment = partsUrl->pszFragment; return S_OK; } PRIVATE BOOL GetEscapeStringSize(LPWSTR psz, DWORD dwFlags, LPDWORD pcch) { BOOL fResize = FALSE; ASSERT(psz); ASSERT(pcch); for (*pcch = 0; *psz; psz++) { (*pcch)++; if(!IsSafePathChar(*psz) || ((dwFlags & URL_ESCAPE_PERCENT) && (*psz == HEX_ESCAPE))) { fResize = TRUE; *pcch += 2; } } // for the NULL term (*pcch)++; return fResize; } PRIVATE DWORD EscapeSegmentsGetNeededSize(LPWSTR pszSegments, DWORD cSegs, DWORD dwFlags) { DWORD cchNeeded = 0; BOOL fResize = FALSE; LPWSTR pszSeg; DWORD iSeg; ASSERT(pszSegments && cSegs); pszSeg = FirstLiveSegment(pszSegments, &iSeg, cSegs); while (IsLiveSegment(pszSeg)) { DWORD cch; if(GetEscapeStringSize(pszSeg, dwFlags, &cch)) fResize = TRUE; cchNeeded += cch; pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs); } return fResize ? cchNeeded : 0; } PRIVATE VOID EscapeString(LPCWSTR pszSeg, DWORD dwFlags, LPWSTR *ppchOut) { LPWSTR pchIn; // This pointer has been trusted to not modify it's contents, just iterate. LPWSTR pchOut = *ppchOut; WCHAR ch; for (pchIn = (LPWSTR)pszSeg; *pchIn; pchIn++) { ch = *pchIn; if (!IsSafePathChar(ch) || ((dwFlags & URL_ESCAPE_PERCENT) && (ch == HEX_ESCAPE))) { *pchOut++ = HEX_ESCAPE; *pchOut++ = hex[(ch >> 4) & 15]; *pchOut++ = hex[ch & 15]; } else *pchOut++ = *pchIn; } TERMSTR(pchOut); // move past the terminator pchOut++; *ppchOut = pchOut; } PRIVATE HRESULT EscapeSegments(LPWSTR pszSegments, DWORD cSegs, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr) { DWORD cchNeeded; HRESULT hr = S_OK; ASSERT(pszSegments && cSegs && partsOut && pstr); cchNeeded = EscapeSegmentsGetNeededSize(pszSegments, cSegs, dwFlags); if(cchNeeded) { ASSERT(pstr); hr = pstr->SetSize(cchNeeded); if(SUCCEEDED(hr)) { LPWSTR pchOut = pstr->GetInplaceStr(); LPWSTR pszSeg; DWORD iSeg; partsOut->pszSegments = pchOut; partsOut->cSegments = 0; pszSeg = FirstLiveSegment(pszSegments, &iSeg, cSegs); while (IsLiveSegment(pszSeg)) { EscapeString(pszSeg, dwFlags, &pchOut); partsOut->cSegments++; pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs); } } } else { partsOut->cSegments = cSegs; partsOut->pszSegments = pszSegments; } return hr; } PRIVATE HRESULT EscapePath(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr) { HRESULT hr = S_OK; ASSERT(partsUrl && partsOut && pstr); if(partsUrl->cSegments) { hr = EscapeSegments(partsUrl->pszSegments, partsUrl->cSegments, dwFlags, partsOut, pstr); } else { partsOut->cSegments = 0; partsOut->pszSegments = NULL; } return hr; } HRESULT SHUrlEscape (LPCWSTR pszUrl, PSHSTRW pstrOut, DWORD dwFlags) { #ifdef TESTING_SPACES_ONLY return EscapeSpaces(pszUrl, pstrOut, dwFlags); #else //TESTING_SPACES_ONLY SHSTRW strUrl; HRESULT hr; ASSERT(pszUrl && pstrOut); if(!pszUrl || !pstrOut) return E_INVALIDARG; // // EscapeSpaces is remarkably poor, // but so is this kind of functionality... // it doesnt do any kind of real parsing, it // only looks for spaces and escapes them... // if(dwFlags & URL_ESCAPE_SPACES_ONLY) return EscapeSpaces(pszUrl, pstrOut, dwFlags); // We are just passed a segment so we only want to // escape that and nothing else. Don't look for // URL pieces. if(dwFlags & URL_ESCAPE_SEGMENT_ONLY) { URLPARTS partsOut; SHSTRW strTemp; EscapeSegments((LPWSTR)pszUrl, 1, dwFlags, &partsOut, &strTemp); pstrOut->SetStr(partsOut.pszSegments); return S_OK; } pstrOut->Reset(); hr = strUrl.SetStr(pszUrl); if(SUCCEEDED(hr)) { URLPARTS partsUrl, partsOut; SHSTRW strPath; BreakUrl(strUrl.GetInplaceStr(), &partsUrl); ZeroMemory(&partsOut, SIZEOF(URLPARTS)); // // NOTE the only function here that is really active right now is the EscapePath // if some other part needs to be escaped, then add a new SHSTR in the 4th param // and change the appropriate subroutine // if( (SUCCEEDED(hr = EscapeScheme(&partsUrl, dwFlags, &partsOut, NULL))) && (SUCCEEDED(hr = EscapeServer(&partsUrl, dwFlags, &partsOut, NULL))) && (SUCCEEDED(hr = EscapePath(&partsUrl, dwFlags, &partsOut, &strPath))) && (SUCCEEDED(hr = EscapeQuery(&partsUrl, dwFlags, &partsOut, NULL))) && (SUCCEEDED(hr = EscapeFragment(&partsUrl, dwFlags, &partsOut, NULL))) ) { partsOut.dwFlags = partsUrl.dwFlags; hr = BuildUrl(&partsOut, dwFlags, pstrOut); } } else hr = E_OUTOFMEMORY; return hr; #endif //TESTING_SPACES_ONLY } /*+++ SHUrlUnescape() Unescapes a string in place. this is ok because it should never grow Parameters IN - psz string to unescape inplace dwFlags the relevant URL_* flags, Returns HRESULT - SUCCESS S_OK ERROR DOESNT error right now Helper Routines HexToWord takes a hexdigit and returns WORD with the right number or -1 IsEscapedChar looks at a ptr for "%XX" where X is a hexdigit TranslateEscapedChar translates "%XX" to an 8 bit char ---*/ PRIVATE WORD HexToWord(WCHAR ch) { if(ch >= TEXT('0') && ch <= TEXT('9')) return (WORD) ch - TEXT('0'); if(ch >= TEXT('A') && ch <= TEXT('F')) return (WORD) ch - TEXT('A') + 10; if(ch >= TEXT('a') && ch <= TEXT('f')) return (WORD) ch - TEXT('a') + 10; ASSERT(FALSE); //we have tried to use a non-hex number return (WORD) -1; } PRIVATE BOOL inline IsEscapedOctetW(LPCWSTR pch) { return (pch[0] == HEX_ESCAPE && IsHex(pch[1]) && IsHex(pch[2])) ? TRUE : FALSE; } PRIVATE BOOL inline IsEscapedOctetA(LPCSTR pch) { return (pch[0] == HEX_ESCAPE_A && IsHex((WCHAR)pch[1]) && IsHex((WCHAR)pch[2])) ? TRUE : FALSE; } PRIVATE WCHAR TranslateEscapedOctetW(LPCWSTR pch) { WCHAR ch; ASSERT(IsEscapedOctetW(pch)); pch++; ch = (WCHAR) HexToWord(*pch++) * 16; // hi nibble ch += HexToWord(*pch); // lo nibble return ch; } PRIVATE CHAR TranslateEscapedOctetA(LPCSTR pch) { CHAR ch; ASSERT(IsEscapedOctetA(pch)); pch++; ch = (CHAR) HexToWord(*pch++) * 16; // hi nibble ch += HexToWord(*pch); // lo nibble return ch; } HRESULT SHUrlUnescapeA(LPSTR psz, DWORD dwFlags) { CHAR *pchSrc = psz; CHAR *pchDst = psz; while (*pchSrc) { if ((*pchSrc == POUND || *pchSrc == QUERY) && (dwFlags & URL_DONT_ESCAPE_EXTRA_INFO)) { while (*pchDst++ = *pchSrc++) {}; break; } if (IsEscapedOctetA(pchSrc)) { CHAR ch = TranslateEscapedOctetA(pchSrc); *pchDst++ = ch; pchSrc += 3; // enuff for "%XX" } else { *pchDst++ = *pchSrc++; } } TERMSTR(pchDst); return S_OK; } HRESULT SHUrlUnescapeW(LPWSTR psz, DWORD dwFlags) { WCHAR *pchSrc = psz; WCHAR *pchDst = psz; while (*pchSrc) { if ((*pchSrc == POUND || *pchSrc == QUERY) && (dwFlags & URL_DONT_ESCAPE_EXTRA_INFO)) { while (*pchDst++ = *pchSrc++) {}; break; } if (IsEscapedOctetW(pchSrc)) { WCHAR ch = TranslateEscapedOctetW(pchSrc); *pchDst++ = ch; pchSrc += 3; // enuff for "%XX" } else { *pchDst++ = *pchSrc++; } } TERMSTR(pchDst); return S_OK; } PRIVATE HRESULT BuildDosPath(PURLPARTS parts, PSHSTRW pstrOut, DWORD dwFlags) { HRESULT hr; // this will disable a preceding slash when there is a drive if(parts->pszSegments && IsDrive(parts->pszSegments)) parts->dwFlags = (parts->dwFlags & ~UPF_SEG_ABSOLUTE); // if there is a zero length server then // we skip building it if(parts->pszServer && !*parts->pszServer) parts->pszServer = NULL; // this prevents all the special file goo checking parts->eScheme = URL_SCHEME_UNKNOWN; // // then go ahead and put the path together if( (SUCCEEDED(hr = BuildServer(parts, dwFlags, pstrOut))) && (!parts->cSegments || SUCCEEDED(hr = BuildPath(parts, dwFlags, pstrOut))) ) { // then decode it cuz paths arent escaped if (IsFlagSet(dwFlags, URL_FILE_USE_PATHURL)) WininetFixFileSlashes(pstrOut->GetInplaceStr()); else #ifndef UNIX ConvertChar(pstrOut->GetInplaceStr(), SLASH, WHACK, TRUE); #else ConvertChar(pstrOut->GetInplaceStr(), WHACK, SLASH, TRUE); #endif if(IsFlagClear(parts->dwFlags, UPF_FILEISPATHURL)) SHUrlUnescapeW(pstrOut->GetInplaceStr(), dwFlags); if(IsDriveUrl(*pstrOut)) { LPWSTR pszTemp = pstrOut->GetInplaceStr(); pszTemp[1] = COLON; } } return hr; } HRESULT SHPathCreateFromUrl(LPCWSTR pszUrl, PSHSTRW pstrOut, DWORD dwFlags) { HRESULT hr; SHSTRW strUrl; ASSERT(pszUrl && pstrOut); pstrOut->Reset(); hr = strUrl.SetStr(pszUrl); if(SUCCEEDED(hr)) { URLPARTS partsUrl; // first we need to break it open BreakUrl(strUrl.GetInplaceStr(), &partsUrl); // then we make sure it is a file: if(partsUrl.eScheme == URL_SCHEME_FILE) { hr = BuildDosPath(&partsUrl, pstrOut, dwFlags); } else hr = E_INVALIDARG; } return hr; } HRESULT SHUrlCreateFromPath(LPCWSTR pszPath, PSHSTRW pstrOut, DWORD dwFlags) { HRESULT hr; SHSTRW strPath; ASSERT(pszPath && pstrOut); if(PathIsURLW(pszPath)) { if(SUCCEEDED(hr = pstrOut->SetStr(pszPath))) return S_FALSE; else return hr; } pstrOut->Reset(); hr = strPath.SetStr(pszPath); TrimAndStripInsignificantWhite(strPath.GetInplaceStr()); if(SUCCEEDED(hr)) { URLPARTS partsIn, partsOut; SHSTRW strEscapedPath, strEscapedServer; LPWSTR pch = strPath.GetInplaceStr(); ZeroMemory(&partsIn, SIZEOF(URLPARTS)); partsIn.pszScheme = (LPWSTR)c_szFileScheme; partsIn.eScheme = URL_SCHEME_FILE; partsIn.dwFlags = UPF_SCHEME_CONVERT; // first break the path BreakFragment(&pch, &partsIn); BreakServer(&pch, &partsIn, TRUE); BreakPath(&pch, &partsIn); partsOut = partsIn; // then escape the path if we arent using path URLs if (IsFlagClear(dwFlags, URL_FILE_USE_PATHURL)) { hr = EscapePath(&partsIn, dwFlags | URL_ESCAPE_PERCENT, &partsOut, &strEscapedPath); if(SUCCEEDED(hr) && partsOut.pszServer) { // // i am treating the pszServer exactly like a path segment // DWORD cchNeeded; if(GetEscapeStringSize(partsOut.pszServer, dwFlags | URL_ESCAPE_PERCENT, &cchNeeded) && SUCCEEDED(hr = strEscapedServer.SetSize(cchNeeded))) { pch = strEscapedServer.GetInplaceStr(); EscapeString(partsOut.pszServer, dwFlags | URL_ESCAPE_PERCENT, &pch); partsOut.pszServer = strEscapedServer.GetInplaceStr(); } } } if(!partsOut.pszServer && IsFlagSet(partsOut.dwFlags, UPF_SEG_ABSOLUTE)) partsOut.pszServer = L""; // then build the URL if(SUCCEEDED(hr)) { if(URL_SCHEME_FILE == partsOut.eScheme && IsFlagSet(dwFlags, URL_FILE_USE_PATHURL)) { if (SUCCEEDED(hr = pstrOut->SetStr(c_szFileSchemeString))) hr = BuildDosPath(&partsOut, pstrOut, dwFlags); } else hr = BuildUrl(&partsOut, dwFlags, pstrOut); } if (SUCCEEDED(hr) && (IsFlagSet(dwFlags, URL_WININET_COMPATIBILITY))) WininetFixFileSlashes(pstrOut->GetInplaceStr()); } return hr; } /*+++ SHUrlParse() Canonicalize an URL or Combine and Canonicalize two URLs Parameters IN - pszBase the base or referring URL, may be NULL pszUrl the relative URL dwFlags the relevant URL_* flags, Returns HRESULT - SUCCESS S_OK ERROR appropriate error, usually just E_OUTOFMEMORY; NOTE: pszUrl will always take precedence over pszBase. ---*/ HRESULT SHUrlParse(LPCWSTR pszBase, LPCWSTR pszUrl, PSHSTRW pstrOut, DWORD dwFlags) { HRESULT hr = S_OK; URLPARTS partsUrl, partsOut, partsBase; SHSTRW strBase; SHSTRW strUrl; ASSERT(pszUrl); ASSERT(pstrOut); TraceMsgW(TF_URL | TF_FUNC, "entering SHUrlParse(%s, %s, 0x%X", pszBase,pszUrl ? pszUrl : L"NULL", dwFlags); pstrOut->Reset(); // // Don't bother parsing if all we have in an inter-page link as the // pszUrl and no pszBase to parse // if (pszUrl[0] == POUND && (!pszBase || !*pszBase)) { hr = pstrOut->SetStr(pszUrl); goto quit; } // // for Perf reasons we want to parse the relative url first. // if it is an absolute URL, we need never look at the base. // hr = CopyUrlForParse(pszUrl, &strUrl, dwFlags); if(FAILED(hr)) goto quit; // -- Cybersitter compat ---- // Some bug fix broke the original parser. No time to go back and // fix it, but since we know what to expect, we'll return this straight instead. // Basically, when we canonicalize ://, we produce :/// if (!StrCmpW(strUrl, L"://")) { hr = pstrOut->SetStr(L":///"); goto quit; } // // BreakUrls will decide if it is necessary to look at the relative // hr = BreakUrls(strUrl.GetInplaceStr(), &partsUrl, pszBase, &strBase, &partsBase, dwFlags); if(FAILED(hr)) goto quit; if(S_OK == hr) { // // this is where the real combination logic happens // this first parts is the one that takes precedence // BlendParts(&partsUrl, &partsBase, &partsOut); } else partsOut = partsUrl; // // we will now do the work of putting it together // if these fail, it is because we are out of memory. // if (!(dwFlags & URL_DONT_SIMPLIFY)) CanonParts(&partsOut); if(URL_SCHEME_FILE == partsOut.eScheme && IsFlagSet(dwFlags, URL_FILE_USE_PATHURL)) { if (SUCCEEDED(hr = pstrOut->SetStr(c_szFileSchemeString))) hr = BuildDosPath(&partsOut, pstrOut, dwFlags); } else hr = BuildUrl(&partsOut, dwFlags, pstrOut); if(SUCCEEDED(hr)) { if (dwFlags & URL_UNESCAPE) SHUrlUnescapeW(pstrOut->GetInplaceStr(), dwFlags); if (dwFlags & URL_ESCAPE_SPACES_ONLY || dwFlags & URL_ESCAPE_UNSAFE) { // // we are going to reuse strUrl here // hr = strUrl.SetStr(*pstrOut); if(SUCCEEDED(hr)) hr = SHUrlEscape(strUrl, pstrOut, dwFlags); } } if (SUCCEEDED(hr) && (IsFlagSet(dwFlags, URL_WININET_COMPATIBILITY)) && (partsOut.eScheme == URL_SCHEME_FILE)) WininetFixFileSlashes(pstrOut->GetInplaceStr()); quit: if(FAILED(hr)) { pstrOut->Reset(); TraceMsg(TF_URL | TF_FUNC, TEXT("FAILED SHUrlParse() hr = 0x%X\n"), hr); } else TraceMsgW(TF_URL | TF_FUNC, "SUCCEEDED SHUrlParse() %s\n", (LPCWSTR)*pstrOut); return hr; } typedef struct _LOGON { LPWSTR pszUser; LPWSTR pszPass; LPWSTR pszHost; LPWSTR pszPort; } LOGON, *PLOGON; PRIVATE void BreakLogon(LPWSTR psz, PLOGON plo) { ASSERT(psz); ASSERT(plo); WCHAR *pch = StrChrW(psz, L'@'); if(pch) { TERMSTR(pch); plo->pszHost = pch + 1; plo->pszUser = psz; pch = StrChrW(psz, COLON); if (pch) { TERMSTR(pch); plo->pszPass = pch + 1; } } else plo->pszHost = psz; pch = StrChrW(plo->pszHost, COLON); if (pch) { TERMSTR(pch); plo->pszPort = pch + 1; } } PRIVATE HRESULT InternetGetPart(DWORD dwPart, PURLPARTS parts, PSHSTRW pstr, DWORD dwFlags) { HRESULT hr = E_FAIL; if(parts->pszServer) { LOGON lo = {0}; BreakLogon(parts->pszServer, &lo); switch (dwPart) { case URL_PART_HOSTNAME: hr = pstr->Append(lo.pszHost); break; case URL_PART_USERNAME: hr = pstr->Append(lo.pszUser); break; case URL_PART_PASSWORD: hr = pstr->Append(lo.pszPass); break; case URL_PART_PORT: hr = pstr->Append(lo.pszPort); break; default: ASSERT(FALSE); } } return hr; } PRIVATE HRESULT SHUrlGetPart(PSHSTRW pstrIn, PSHSTRW pstrOut, DWORD dwPart, DWORD dwFlags) { ASSERT(pstrIn); ASSERT(pstrOut); ASSERT(dwPart); HRESULT hr = S_OK; URLPARTS parts; BreakUrl(pstrIn->GetInplaceStr(), &parts); if(dwFlags & URL_PARTFLAG_KEEPSCHEME) { hr = pstrOut->SetStr(parts.pszScheme); if(SUCCEEDED(hr)) hr = pstrOut->Append(COLON); } else pstrOut->Reset(); if(SUCCEEDED(hr)) { switch (dwPart) { case URL_PART_SCHEME: hr = pstrOut->SetStr(parts.pszScheme); break; case URL_PART_HOSTNAME: if (parts.eScheme == URL_SCHEME_FILE) { hr = pstrOut->SetStr(parts.pszServer); break; } // else fall through case URL_PART_USERNAME: case URL_PART_PASSWORD: case URL_PART_PORT: if(parts.dwFlags & UPF_SCHEME_INTERNET) { hr = InternetGetPart(dwPart, &parts, pstrOut, dwFlags); } else hr = E_FAIL; break; case URL_PART_QUERY: hr = pstrOut->SetStr(parts.pszQuery); break; default: ASSERT(FALSE); hr = E_UNEXPECTED; } } return hr; } #define c_szURLPrefixesKey "Software\\Microsoft\\Windows\\CurrentVersion\\URL\\Prefixes" const WCHAR c_szDefaultURLPrefixKey[] = L"Software\\Microsoft\\Windows\\CurrentVersion\\URL\\DefaultPrefix"; PRIVATE inline LPCWSTR SkipLeadingSlashes(LPCWSTR psz) { // Skip two leading slashes. if (psz[0] == SLASH && psz[1] == SLASH) psz += 2; return psz; } PRIVATE HRESULT UrlGuessScheme(LPCWSTR pszUrl, PSHSTRW pstr) { HRESULT hr = S_FALSE; ASSERT(pszUrl && pstr); HKEY hkeyPrefixes; if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, c_szURLPrefixesKey, 0, KEY_QUERY_VALUE, &hkeyPrefixes) == ERROR_SUCCESS) { DWORD dwiValue; CHAR rgchValueName[MAX_PATH]; DWORD cchValueName = SIZECHARS(rgchValueName); DWORD dwType; CHAR rgchPrefix[MAX_PATH]; DWORD cbPrefix = SIZEOF(rgchPrefix); // need to get past the initial two slashes if applicable pszUrl = SkipLeadingSlashes(pszUrl); for (dwiValue = 0; RegEnumValueA(hkeyPrefixes, dwiValue, rgchValueName, &cchValueName, NULL, &dwType, (PBYTE)rgchPrefix, &cbPrefix) == ERROR_SUCCESS; dwiValue++) { WCHAR wszValue[MAX_PATH]; MultiByteToWideChar(CP_ACP, 0, rgchValueName, -1, wszValue, ARRAYSIZE(wszValue)); // we check to make sure that we match and there is something more if (!StrCmpNIW(pszUrl, wszValue, cchValueName) && pszUrl[cchValueName]) { MultiByteToWideChar(CP_ACP, 0, rgchPrefix, -1, wszValue, ARRAYSIZE(wszValue)); if(SUCCEEDED(hr = pstr->SetStr(wszValue))) hr = pstr->Append(pszUrl); break; } cchValueName = SIZECHARS(rgchValueName); cbPrefix = SIZEOF(rgchPrefix); } RegCloseKey(hkeyPrefixes); } return(hr); } /*---------------------------------------------------------- Purpose: Grabs the default URL prefix in the registry and applies it to the given URL. Returns: S_OK S_FALSE if there is no default prefix */ const WCHAR c_szDefaultScheme[] = L"http://"; HRESULT UrlApplyDefaultScheme( LPCWSTR pszUrl, PSHSTRW pstr) { HRESULT hr = S_FALSE; WCHAR szDef[MAX_PATH]; DWORD cbSize = SIZEOF(szDef); ASSERT(pszUrl && pstr); ASSERT(!PathIsURLW(pszUrl)); DWORD dwType; if (NO_ERROR == SHRegGetUSValueW(c_szDefaultURLPrefixKey, NULL, &dwType, (LPVOID)szDef, &cbSize, TRUE, (LPVOID)c_szDefaultScheme, SIZEOF(c_szDefaultScheme))) { pszUrl = SkipLeadingSlashes(pszUrl); if(SUCCEEDED(hr = pstr->SetStr(szDef))) hr = pstr->Append(pszUrl); } return hr; } /*---------------------------------------------------------- Purpose: Guesses a URL protocol based upon a list in the registry, compared to the first few characters of the given URL suffix. Returns: S_OK if a URL protocol is determined S_FALSE if there were no problems but no prefix was prepended */ HRESULT SHUrlApplyScheme( LPCWSTR pszUrl, PSHSTRW pstrOut, DWORD dwFlags) { HRESULT hr = S_FALSE; ASSERT(IS_VALID_STRING_PTRW(pszUrl, -1)); // // if there is already scheme there, we do nothing // unless the caller insists. this is to support // a string that looks like www.foo.com:8001. // this is a site that needs to be guessed at but // it also could be a valid scheme since '.' and '-' // are both valid scheme chars. // DWORD cch; if((dwFlags & URL_APPLY_FORCEAPPLY) || !FindSchemeW(pszUrl, &cch)) { if(dwFlags & URL_APPLY_GUESSSCHEME) hr = UrlGuessScheme(pszUrl, pstrOut); if (hr != S_OK && (dwFlags & URL_APPLY_GUESSFILE)) { LPCWSTR psz = FindDosPath(pszUrl); // only change hr if we actually converted. if(psz && SUCCEEDED(SHUrlCreateFromPath(psz, pstrOut, 0))) hr = S_OK; } if (hr != S_OK && (dwFlags & URL_APPLY_DEFAULT || !dwFlags)) hr = UrlApplyDefaultScheme(pszUrl, pstrOut); } return hr; } PRIVATE HRESULT CopyOutA(PSHSTRA pstr, LPSTR psz, LPDWORD pcch) { HRESULT hr; DWORD cch; ASSERT(pstr); ASSERT(psz); ASSERT(pcch); cch = pstr->GetLen(); if ((*pcch > cch) && psz) { hr = StringCchCopyA(psz, *pcch, pstr->GetStr()); } else { hr = E_POINTER; } *pcch = cch + (FAILED(hr) ? 1 : 0); return hr; } //*** StrCopyOutW -- // NOTES // WARNING: must match semantics of CopyOutW! (esp. the *pcchOut part) PRIVATE HRESULT StrCopyOutW(LPCWSTR pszIn, LPWSTR pszOut, LPDWORD pcchOut) { HRESULT hr; DWORD cch; cch = lstrlenW(pszIn); if ((cch < *pcchOut) && pszOut) { hr = StringCchCopyW(pszOut, *pcchOut, pszIn); } else { hr = E_POINTER; } *pcchOut = cch + (FAILED(hr) ? 1 : 0); return hr; } //*** // NOTES // WARNING: StrCopyOutW must match this func, so if you change this change // it too PRIVATE HRESULT CopyOutW(PSHSTRW pstr, LPWSTR psz, LPDWORD pcch) { HRESULT hr = S_OK; DWORD cch; ASSERT(pstr); ASSERT(psz); ASSERT(pcch); cch = pstr->GetLen(); if((*pcch > cch) && psz) { StringCchCopyW(psz, *pcch, pstr->GetStr()); } else { hr = E_POINTER; } *pcch = cch + (FAILED(hr) ? 1 : 0); return hr; } LWSTDAPI UrlCanonicalizeA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags) { HRESULT hr; SHSTRA straOut; RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlCanonicalizeA: Caller passed invalid pszIn"); RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlCanonicalizeA: Caller passed invalid pcchOut"); RIPMSG(NULL==pcchOut || (pszOut && IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCanonicalizeA: Caller passed invalid pszOut"); #ifdef DEBUG if (pcchOut) { if (pszOut == pszIn) DEBUGWhackPathStringA(pszOut, *pcchOut); else DEBUGWhackPathBufferA(pszOut, *pcchOut); } #endif if (!pszIn || !pszOut || !pcchOut || !*pcchOut) { hr = E_INVALIDARG; } else { hr = UrlCombineA("", pszIn, pszOut, pcchOut, dwFlags); } return hr; } LWSTDAPI UrlEscapeA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags) { HRESULT hr; SHSTRA straOut; RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlEscapeA: Caller passed invalid pszin"); RIPMSG(NULL!=pcchOut, "UrlEscapeA: Caller passed invalid pcchOut"); RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlEscapeA: Caller passed invalid pszOut"); #ifdef DEBUG if (pcchOut) { if (pszOut==pszOut) DEBUGWhackPathStringA(pszOut, *pcchOut); else DEBUGWhackPathBufferA(pszOut, *pcchOut); } #endif if (!pszIn || !pszOut || !pcchOut || !*pcchOut) hr = E_INVALIDARG; else { SHSTRW strwOut; SHSTRW strUrl; if(SUCCEEDED(strUrl.SetStr(pszIn))) hr = SHUrlEscape(strUrl, &strwOut, dwFlags); else hr = E_OUTOFMEMORY; if(SUCCEEDED(hr)) hr = ReconcileHresults(hr, straOut.SetStr(strwOut)); } if(SUCCEEDED(hr)) hr = ReconcileHresults(hr, CopyOutA(&straOut, pszOut, pcchOut)); return hr; } LWSTDAPI UrlGetPartA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwPart, DWORD dwFlags) { HRESULT hr; SHSTRA straOut; RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlGetPartA: Caller passed invalid pszIn"); RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlGetPartA: Caller passed invalid pcchOut"); RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlGetPartA: Caller passed invalid pszOut"); #ifdef DEBUG if (pcchOut) { if (pszOut==pszIn) DEBUGWhackPathStringA(pszOut, *pcchOut); else DEBUGWhackPathBufferA(pszOut, *pcchOut); } #endif if (!pszIn || !pszOut || !pcchOut || !*pcchOut || dwPart == URL_PART_NONE) hr = E_INVALIDARG; else { SHSTRW strwOut; SHSTRW strwIn; if(SUCCEEDED(strwIn.SetStr(pszIn))) hr = SHUrlGetPart(&strwIn, &strwOut, dwPart, dwFlags); else hr = E_OUTOFMEMORY; if(SUCCEEDED(hr)) hr = ReconcileHresults(hr, straOut.SetStr(strwOut)); } if(SUCCEEDED(hr)) hr = ReconcileHresults(hr, CopyOutA(&straOut, pszOut, pcchOut)); return hr; } LWSTDAPI_(BOOL) UrlIsA(LPCSTR pszURL, URLIS UrlIs) { BOOL fRet = FALSE; RIPMSG(pszURL && IS_VALID_STRING_PTRA(pszURL, -1), "UrlIsA: Caller passed invalid pszURL"); if(pszURL) { DWORD cchScheme, dwFlags; LPCSTR pszScheme = FindSchemeA(pszURL, &cchScheme); if(pszScheme) { URL_SCHEME eScheme = GetSchemeTypeAndFlagsA(pszScheme, cchScheme, &dwFlags); switch (UrlIs) { case URLIS_URL: fRet = TRUE; break; case URLIS_OPAQUE: fRet = (dwFlags & UPF_SCHEME_OPAQUE); break; case URLIS_NOHISTORY: fRet = (dwFlags & UPF_SCHEME_NOHISTORY); break; case URLIS_FILEURL: fRet = (eScheme == URL_SCHEME_FILE); break; default: // if it cant be done quck and dirty // then we need to thunk to the wide version SHSTRW strUrl; if (SUCCEEDED(strUrl.SetStr(pszURL))) { fRet = UrlIsW(strUrl, UrlIs); } } } } return fRet; } LWSTDAPI_(BOOL) UrlIsW(LPCWSTR pszURL, URLIS UrlIs) { BOOL fRet = FALSE; RIPMSG(NULL!=pszURL && IS_VALID_STRING_PTRW(pszURL, -1), "UrlIsW: Caller passed invalid pszURL"); if(pszURL) { DWORD cchScheme, dwFlags; LPCWSTR pszScheme = FindSchemeW(pszURL, &cchScheme); if(pszScheme) { SHSTRW str; URL_SCHEME eScheme = GetSchemeTypeAndFlagsW(pszScheme, cchScheme, &dwFlags); switch (UrlIs) { case URLIS_URL: fRet = TRUE; break; case URLIS_OPAQUE: fRet = (dwFlags & UPF_SCHEME_OPAQUE); break; case URLIS_NOHISTORY: fRet = (dwFlags & UPF_SCHEME_NOHISTORY); break; case URLIS_FILEURL: fRet = (eScheme == URL_SCHEME_FILE); break; case URLIS_APPLIABLE: if (eScheme == URL_SCHEME_UNKNOWN) { if (S_OK == UrlGuessScheme(pszURL, &str)) fRet = TRUE; } break; // these cases need a broken URL case URLIS_DIRECTORY: case URLIS_HASQUERY: { URLPARTS parts; if (SUCCEEDED(str.SetStr(pszURL)) && SUCCEEDED(BreakUrl(str.GetInplaceStr(), &parts))) { switch(UrlIs) { case URLIS_DIRECTORY: // if the last seg has a trailing slash, or // if there are no path segments at all... fRet = (!parts.cSegments || (parts.dwFlags & UPF_EXSEG_DIRECTORY)); break; case URLIS_HASQUERY: fRet = (parts.pszQuery && *parts.pszQuery); break; default: ASSERT(FALSE); break; } } } break; default: AssertMsg(FALSE, "UrlIs() called with invalid flag"); } } } return fRet; } LWSTDAPI_(BOOL) UrlIsOpaqueA(LPCSTR pszURL) { return UrlIsA(pszURL, URLIS_OPAQUE); } LWSTDAPI_(BOOL) UrlIsOpaqueW(LPCWSTR pszURL) { return UrlIsW(pszURL, URLIS_OPAQUE); } LWSTDAPI_(BOOL) UrlIsNoHistoryA(LPCSTR pszURL) { return UrlIsA(pszURL, URLIS_NOHISTORY); } LWSTDAPI_(BOOL) UrlIsNoHistoryW(LPCWSTR pszURL) { return UrlIsW(pszURL, URLIS_NOHISTORY); } LWSTDAPI_(LPCSTR) UrlGetLocationA(LPCSTR pszURL) { CPINFO cpinfo; BOOL fMBCS = (GetCPInfo(CP_ACP, &cpinfo) && cpinfo.LeadByte[0]); RIPMSG(pszURL && IS_VALID_STRING_PTRA(pszURL, -1), "UrlGetLocationA: Caller passed invalid pszURL"); if(pszURL) { DWORD cchScheme, dwFlags; LPCSTR pszScheme = FindSchemeA(pszURL, &cchScheme); if(pszScheme) { URL_SCHEME eScheme = GetSchemeTypeAndFlagsA(pszScheme, cchScheme, &dwFlags); return (dwFlags & UPF_SCHEME_OPAQUE) ? NULL : FindFragmentA(pszURL, fMBCS, (eScheme == URL_SCHEME_FILE)); } } return NULL; } LWSTDAPI_(LPCWSTR) UrlGetLocationW(LPCWSTR wzURL) { RIPMSG(wzURL && IS_VALID_STRING_PTRW(wzURL, -1), "UrlGetLocationW: Caller passed invalid wzURL"); if(wzURL) { DWORD cchScheme, dwFlags; LPCWSTR pszScheme = FindSchemeW(wzURL, &cchScheme); if(pszScheme) { URL_SCHEME eScheme = GetSchemeTypeAndFlagsW(pszScheme, cchScheme, &dwFlags); return (dwFlags & UPF_SCHEME_OPAQUE) ? NULL : FindFragmentW(wzURL, (eScheme == URL_SCHEME_FILE)); } } return NULL; } LWSTDAPI_(int) UrlCompareA(LPCSTR psz1, LPCSTR psz2, BOOL fIgnoreSlash) { RIPMSG(psz1 && IS_VALID_STRING_PTRA(psz1, -1), "UrlCompareA: Caller passed invalid psz1"); RIPMSG(psz2 && IS_VALID_STRING_PTRA(psz1, -1), "UrlCompareA: Caller passed invalid psz2"); if (psz1 && psz2) { SHSTRW str1, str2; if(SUCCEEDED(str1.SetStr(psz1)) && SUCCEEDED(str2.SetStr(psz2)) && SUCCEEDED(SHUrlUnescapeW(str1.GetInplaceStr(), 0)) && SUCCEEDED(SHUrlUnescapeW(str2.GetInplaceStr(), 0)) ) { if(fIgnoreSlash) { LPWSTR pch; pch = str1.GetInplaceStr() + str1.GetLen() - 1; if(*pch == SLASH) TERMSTR(pch); pch = str2.GetInplaceStr() + str2.GetLen() - 1; if(*pch == SLASH) TERMSTR(pch); } return StrCmpW(str1, str2); } } return lstrcmpA(psz1, psz2); } LWSTDAPI UrlUnescapeA(LPSTR pszUrl, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags) { RIPMSG(pszUrl && IS_VALID_STRING_PTRA(pszUrl, -1), "UrlUnescapeA: Caller passed invalid pszUrl"); if(dwFlags & URL_UNESCAPE_INPLACE) { return SHUrlUnescapeA(pszUrl, dwFlags); } RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlUnescapeA: Caller passed invalid pcchOut"); RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlUnescapeA: Caller passed invalid pszOut"); #ifdef DEBUG if (pcchOut) { if (pszOut==pszUrl) DEBUGWhackPathStringA(pszOut, *pcchOut); else DEBUGWhackPathBufferA(pszOut, *pcchOut); } #endif if (!pszUrl || !pcchOut || !*pcchOut || !pszOut) { return E_INVALIDARG; } SHSTRA str; HRESULT hr = str.SetStr(pszUrl); if(SUCCEEDED(hr)) { SHUrlUnescapeA(str.GetInplaceStr(), dwFlags); hr = CopyOutA(&str, pszOut, pcchOut); } return hr; } LWSTDAPI PathCreateFromUrlA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags) { HRESULT hr; SHSTRA straOut; RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "PathCreateFromUrlA: Caller passed invalid pszIn"); RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "PathCreateFromUrlA: Caller passed invalid pcchOut"); RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "PathCreateFromUrlA: Caller passed invalid pszOut"); #ifdef DEBUG if (pcchOut) { if (pszOut==pszIn) DEBUGWhackPathStringA(pszOut, *pcchOut); else DEBUGWhackPathBufferA(pszOut, *pcchOut); } #endif if (!pszIn || !pszOut || !pcchOut || !*pcchOut ) hr = E_INVALIDARG; else { SHSTRW strwOut; SHSTRW strwIn; if(SUCCEEDED(strwIn.SetStr(pszIn))) hr = SHPathCreateFromUrl(strwIn, &strwOut, dwFlags); else hr = E_OUTOFMEMORY; if(SUCCEEDED(hr)) hr = straOut.SetStr(strwOut); } if(SUCCEEDED(hr) ) hr = CopyOutA(&straOut, pszOut, pcchOut); return hr; } LWSTDAPI UrlCreateFromPathA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags) { HRESULT hr; SHSTRA straOut; RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlCreateFromPathA: Caller passed invalid pszIn"); RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlCreateFromPathA: Caller passed invalid pcchOut"); RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCreateFromPathA: Caller passed invalid pszOut"); #ifdef DEBUG if (pcchOut) { if (pszOut==pszIn) DEBUGWhackPathStringA(pszOut, *pcchOut); else DEBUGWhackPathBufferA(pszOut, *pcchOut); } #endif if (!pszIn || !pszOut || !pcchOut || !*pcchOut ) hr = E_INVALIDARG; else { SHSTRW strwOut; SHSTRW strwIn; if(SUCCEEDED(strwIn.SetStr(pszIn))) hr = SHUrlCreateFromPath(strwIn, &strwOut, dwFlags); else hr = E_OUTOFMEMORY; if(SUCCEEDED(hr)) { hr = ReconcileHresults(hr, straOut.SetStr(strwOut)); } } if(SUCCEEDED(hr) ) hr = ReconcileHresults(hr, CopyOutA(&straOut, pszOut, pcchOut)); return hr; } LWSTDAPI UrlApplySchemeA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags) { HRESULT hr; SHSTRA straOut; RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlApplySchemeA: Caller passed invalid pszIn"); RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlApplySchemeA: Caller passed invalid pcchOut"); RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlApplySchemeA: Caller passed invalid pszOut"); #ifdef DEBUG if (pcchOut) { if (pszOut==pszIn) DEBUGWhackPathStringA(pszOut, *pcchOut); else DEBUGWhackPathBufferA(pszOut, *pcchOut); } #endif if (!pszIn || !pszOut || !pcchOut || !*pcchOut ) hr = E_INVALIDARG; else { SHSTRW strwOut; SHSTRW strwIn; if(SUCCEEDED(strwIn.SetStr(pszIn))) hr = SHUrlApplyScheme(strwIn, &strwOut, dwFlags); else hr = E_OUTOFMEMORY; if(S_OK == (hr)) hr = straOut.SetStr(strwOut); } if(S_OK == (hr)) hr = CopyOutA(&straOut, pszOut, pcchOut); return hr; } // PERF_CACHE //*** g_szUCCanon -- 1-element cache for UrlCanonicalizeW // DESCRIPTION // it turns out a large # of our calls a) are for the same thing, // and b) have pszOut(canon)=pszIn(raw). so cache the most recent guy. LONG g_lockUC; WCHAR g_szUCCanon[64]; // post-canon guy (also used for pre-canon check) DWORD g_dwUCFlags; #ifdef DEBUG int g_cUCTot, g_cUCHit; #endif LWSTDAPI UrlCanonicalizeW(LPCWSTR pszUrl, LPWSTR pszCanonicalized, LPDWORD pcchCanonicalized, DWORD dwFlags) { HRESULT hr; SHSTRW strwOut; RIPMSG(pszUrl && IS_VALID_STRING_PTRW(pszUrl, -1), "UrlCanonicalizeW: Caller passed invalid pszUrl"); RIPMSG(NULL!=pcchCanonicalized && IS_VALID_WRITE_PTR(pcchCanonicalized, DWORD), "UrlCanonicalizeW: Caller passed invalid pcchCanonicalized"); RIPMSG(NULL==pcchCanonicalized || (pszCanonicalized && IS_VALID_WRITE_BUFFER(pszCanonicalized, char, *pcchCanonicalized)), "UrlCanonicalizeW: Caller passed invalid pszCanonicalized"); #ifdef DEBUG if (pcchCanonicalized) { if (pszCanonicalized == pszUrl) DEBUGWhackPathStringW(pszCanonicalized, *pcchCanonicalized); else DEBUGWhackPathBufferW(pszCanonicalized, *pcchCanonicalized); } #endif if (!pszUrl || !pszCanonicalized || !pcchCanonicalized || !*pcchCanonicalized) { hr = E_INVALIDARG; } else { #ifdef DEBUG if ((g_cUCTot % 10) == 0) TraceMsg(DM_PERF, "uc: tot=%d hit=%d", g_cUCTot, g_cUCHit); #endif DBEXEC(TRUE, g_cUCTot++); // try the cache 1st if (InterlockedExchange(&g_lockUC, 1) == 0) { hr = E_FAIL; if ((g_dwUCFlags==dwFlags) && (!(dwFlags & URL_ESCAPE_PERCENT)) && StrCmpCW(pszUrl, g_szUCCanon) == 0) { DBEXEC(TRUE, g_cUCHit++); DWORD cchTmp = *pcchCanonicalized; hr = StrCopyOutW(g_szUCCanon, pszCanonicalized, pcchCanonicalized); if (FAILED(hr)) *pcchCanonicalized = cchTmp; // restore! } InterlockedExchange(&g_lockUC, 0); if (SUCCEEDED(hr)) return hr; } hr = UrlCombineW(L"", pszUrl, pszCanonicalized, pcchCanonicalized, dwFlags); if (SUCCEEDED(hr) && *pcchCanonicalized < ARRAYSIZE(g_szUCCanon)) { if (InterlockedExchange(&g_lockUC, 1) == 0) { StringCchCopyW(g_szUCCanon, ARRAYSIZE(g_szUCCanon), pszCanonicalized); g_dwUCFlags = dwFlags; InterlockedExchange(&g_lockUC, 0); } } } return hr; } LWSTDAPI UrlEscapeW(LPCWSTR pszUrl, LPWSTR pszEscaped, LPDWORD pcchEscaped, DWORD dwFlags) { HRESULT hr; SHSTRW strwOut; RIPMSG(pszUrl && IS_VALID_STRING_PTRW(pszUrl, -1), "UrlEscapeW: Caller passed invalid pszUrl"); RIPMSG(NULL!=pcchEscaped && IS_VALID_WRITE_PTR(pcchEscaped, DWORD), "UrlEscapeW: Caller passed invalid pcchEscaped"); RIPMSG(pszEscaped && (NULL==pcchEscaped || IS_VALID_WRITE_BUFFER(pszEscaped, WCHAR, *pcchEscaped)), "UrlEscapeW: Caller passed invalid pszEscaped"); #ifdef DEBUG if (pcchEscaped) { if (pszEscaped==pszUrl) DEBUGWhackPathStringW(pszEscaped, *pcchEscaped); else DEBUGWhackPathBufferW(pszEscaped, *pcchEscaped); } #endif if (!pszUrl || !pszEscaped || !pcchEscaped || !*pcchEscaped) hr = E_INVALIDARG; else { hr = SHUrlEscape(pszUrl, &strwOut, dwFlags); } if(SUCCEEDED(hr) ) hr = CopyOutW(&strwOut, pszEscaped, pcchEscaped); return hr; } LWSTDAPI_(int) UrlCompareW(LPCWSTR psz1, LPCWSTR psz2, BOOL fIgnoreSlash) { RIPMSG(psz1 && IS_VALID_STRING_PTRW(psz1, -1), "UrlCompareW: Caller passed invalid psz1"); RIPMSG(psz2 && IS_VALID_STRING_PTRW(psz1, -1), "UrlCompareW: Caller passed invalid psz2"); if (psz1 && psz2) { SHSTRW str1, str2; if( SUCCEEDED(str1.SetStr(psz1)) && SUCCEEDED(str2.SetStr(psz2)) && SUCCEEDED(SHUrlUnescapeW(str1.GetInplaceStr(), 0)) && SUCCEEDED(SHUrlUnescapeW(str2.GetInplaceStr(), 0))) { if(fIgnoreSlash) { LPWSTR pch; pch = str1.GetInplaceStr() + str1.GetLen() - 1; if(*pch == SLASH) TERMSTR(pch); pch = str2.GetInplaceStr() + str2.GetLen() - 1; if(*pch == SLASH) TERMSTR(pch); } return StrCmpW(str1, str2); } } return StrCmpW(psz1, psz2); } LWSTDAPI UrlUnescapeW(LPWSTR pszUrl, LPWSTR pszOut, LPDWORD pcchOut, DWORD dwFlags) { RIPMSG(pszUrl && IS_VALID_STRING_PTRW(pszUrl, -1), "UrlUnescapeW: Caller passed invalid pszUrl"); if(dwFlags & URL_UNESCAPE_INPLACE) { return SHUrlUnescapeW(pszUrl, dwFlags); } RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlUnescapeW: Caller passed invalid pcchOut"); RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlUnescapeW: Caller passed invalid pszOut"); #ifdef DEBUG if (pcchOut) { if (pszOut==pszUrl) DEBUGWhackPathStringW(pszOut, *pcchOut); else DEBUGWhackPathBufferW(pszOut, *pcchOut); } #endif if (!pszUrl || !pcchOut || !*pcchOut || !pszOut) { return E_INVALIDARG; } SHSTRW str; HRESULT hr = str.SetStr(pszUrl); if(SUCCEEDED(hr)) { SHUrlUnescapeW(str.GetInplaceStr(), dwFlags); hr = CopyOutW(&str, pszOut, pcchOut); } return hr; } LWSTDAPI PathCreateFromUrlW (LPCWSTR pszIn, LPWSTR pszOut, LPDWORD pcchOut, DWORD dwFlags) { HRESULT hr; SHSTRW strOut; RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "PathCreateFromUrlW: Caller passed invalid pszIn"); RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "PathCreateFromUrlW: Caller passed invalid pcchOut"); RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "PathCreateFromUrlW: Caller passed invalid pszOut"); #ifdef DEBUG if (pcchOut) { if (pszOut==pszIn) DEBUGWhackPathStringW(pszOut, *pcchOut); else DEBUGWhackPathBufferW(pszOut, *pcchOut); } #endif if (!pszIn || !pszOut || !pcchOut || !*pcchOut ) hr = E_INVALIDARG; else hr = SHPathCreateFromUrl(pszIn, &strOut, dwFlags); if(SUCCEEDED(hr) ) hr = CopyOutW(&strOut, pszOut, pcchOut); return hr; } LWSTDAPI UrlCreateFromPathW (LPCWSTR pszIn, LPWSTR pszOut, LPDWORD pcchOut, DWORD dwFlags) { HRESULT hr; SHSTRW strOut; RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "UrlCreateFromPathW: Caller passed invalid pszIn"); RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlCreateFromPathW: Caller passed invalid pcchOut"); RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlCreateFromPathW: Caller passed invalid pszOut"); #ifdef DEBUG if (pcchOut) { if (pszOut==pszIn) DEBUGWhackPathStringW(pszOut, *pcchOut); else DEBUGWhackPathBufferW(pszOut, *pcchOut); } #endif if (!pszIn || !pszOut || !pcchOut || !*pcchOut ) hr = E_INVALIDARG; else hr = SHUrlCreateFromPath(pszIn, &strOut, dwFlags); if(SUCCEEDED(hr) ) hr = ReconcileHresults(hr, CopyOutW(&strOut, pszOut, pcchOut)); return hr; } LWSTDAPI UrlGetPartW(LPCWSTR pszIn, LPWSTR pszOut, LPDWORD pcchOut, DWORD dwPart, DWORD dwFlags) { SHSTRW strIn, strOut; HRESULT hr; RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "UrlGetPartW: Caller passed invalid pszIn"); RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlGetPartW: Caller passed invalid pcchOut"); RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlGetPartW: Caller passed invalid pszOut"); #ifdef DEBUG if (pcchOut) { if (pszOut==pszIn) DEBUGWhackPathStringW(pszOut, *pcchOut); else DEBUGWhackPathBufferW(pszOut, *pcchOut); } #endif if (!pszIn || !pszOut || !pcchOut || !*pcchOut || !dwPart) hr = E_INVALIDARG; else if (SUCCEEDED(hr = strIn.SetStr(pszIn))) hr = SHUrlGetPart(&strIn, &strOut, dwPart, dwFlags); if(SUCCEEDED(hr) ) hr = CopyOutW(&strOut, pszOut, pcchOut); return hr; } LWSTDAPI UrlApplySchemeW (LPCWSTR pszIn, LPWSTR pszOut, LPDWORD pcchOut, DWORD dwFlags) { HRESULT hr; SHSTRW strOut; RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "UrlApplySchemeW: Caller passed invalid pszIn"); RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlApplySchemeW: Caller passed invalid pcchOut"); RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlApplySchemeW: Caller passed invalid pszOut"); #ifdef DEBUG if (pcchOut) { if (pszOut==pszIn) DEBUGWhackPathStringW(pszOut, *pcchOut); else DEBUGWhackPathBufferW(pszOut, *pcchOut); } #endif if (!pszIn || !pszOut || !pcchOut || !*pcchOut ) hr = E_INVALIDARG; else hr = SHUrlApplyScheme(pszIn, &strOut, dwFlags); if(S_OK == (hr)) hr = CopyOutW(&strOut, pszOut, pcchOut); return hr; } // // this is the same table used by both URLMON and WININET's cache // const static BYTE Translate[256] = { 1, 14,110, 25, 97,174,132,119,138,170,125,118, 27,233,140, 51, 87,197,177,107,234,169, 56, 68, 30, 7,173, 73,188, 40, 36, 65, 49,213,104,190, 57,211,148,223, 48,115, 15, 2, 67,186,210, 28, 12,181,103, 70, 22, 58, 75, 78,183,167,238,157,124,147,172,144, 176,161,141, 86, 60, 66,128, 83,156,241, 79, 46,168,198, 41,254, 178, 85,253,237,250,154,133, 88, 35,206, 95,116,252,192, 54,221, 102,218,255,240, 82,106,158,201, 61, 3, 89, 9, 42,155,159, 93, 166, 80, 50, 34,175,195,100, 99, 26,150, 16,145, 4, 33, 8,189, 121, 64, 77, 72,208,245,130,122,143, 55,105,134, 29,164,185,194, 193,239,101,242, 5,171,126, 11, 74, 59,137,228,108,191,232,139, 6, 24, 81, 20,127, 17, 91, 92,251,151,225,207, 21, 98,113,112, 84,226, 18,214,199,187, 13, 32, 94,220,224,212,247,204,196, 43, 249,236, 45,244,111,182,153,136,129, 90,217,202, 19,165,231, 71, 230,142, 96,227, 62,179,246,114,162, 53,160,215,205,180, 47,109, 44, 38, 31,149,135, 0,216, 52, 63, 23, 37, 69, 39,117,146,184, 163,200,222,235,248,243,219, 10,152,131,123,229,203, 76,120,209 }; PRIVATE void _HashData(LPBYTE pbData, DWORD cbData, LPBYTE pbHash, DWORD cbHash) { DWORD i, j; // seed the hash for (i = cbHash; i-- > 0;) pbHash[i] = (BYTE) i; // do the hash for (j = cbData; j-- > 0;) { for (i = cbHash; i-- > 0;) pbHash[i] = Translate[pbHash[i] ^ pbData[j]]; } } LWSTDAPI HashData(LPBYTE pbData, DWORD cbData, LPBYTE pbHash, DWORD cbHash) { RIPMSG(pbData && IS_VALID_READ_BUFFER(pbData, BYTE, cbData), "HashData: Caller passed invalid pbData"); RIPMSG(pbHash && IS_VALID_WRITE_BUFFER(pbHash, BYTE, cbHash), "HashData: Caller passed invalid pbHash"); if (pbData && pbHash) { _HashData(pbData, cbData, pbHash, cbHash); return S_OK; } return E_INVALIDARG; } LWSTDAPI UrlHashA(LPCSTR psz, LPBYTE pb, DWORD cb) { HRESULT hr = E_INVALIDARG; RIPMSG(psz && IS_VALID_STRING_PTRA(psz, -1), "UrlHashA: Caller passed invalid psz"); RIPMSG(pb && IS_VALID_WRITE_BUFFER(pb, BYTE, cb), "UrlHashA: Caller passed invalid pb"); if (psz && pb) { _HashData((LPBYTE) psz, lstrlenA(psz), pb, cb); return S_OK; } return hr; } LWSTDAPI UrlHashW(LPCWSTR psz, LPBYTE pb, DWORD cb) { HRESULT hr; RIPMSG(psz && IS_VALID_STRING_PTRW(psz, -1), "UrlHashW: Caller passed invalid psz"); RIPMSG(pb && IS_VALID_WRITE_BUFFER(pb, BYTE, cb), "UrlHashW: Caller passed invalid pb"); if (psz && pb) { SHSTRA str; if (SUCCEEDED( hr = str.SetStr(psz))) hr = UrlHashA(str, pb, cb); } else { hr = E_INVALIDARG; } return hr; } /***************************** ParseURL Functions *****************************/ // these were originally in URL.DLL and then moved to shlwapi. // i just added them from url.c for reuse of code. // ParseURL now does no MBCS thunks, to keep it fast. // // declarations for ParseURL() APIs // typedef const PARSEDURLA CPARSEDURLA; typedef const PARSEDURLA * PCPARSEDURLA; typedef const PARSEDURLW CPARSEDURLW; typedef const PARSEDURLW * PCPARSEDURLW; #ifdef DEBUG BOOL IsValidPCPARSEDURLA( LPCSTR pcszURL, PCPARSEDURLA pcpu) { return(IS_VALID_READ_PTR(pcpu, CPARSEDURLA) && (IS_VALID_STRING_PTRA(pcpu->pszProtocol, -1) && EVAL(IsStringContainedA(pcszURL, pcpu->pszProtocol)) && EVAL(pcpu->cchProtocol < (UINT)lstrlenA(pcpu->pszProtocol))) && (IS_VALID_STRING_PTRA(pcpu->pszSuffix, -1) && EVAL(IsStringContainedA(pcszURL, pcpu->pszSuffix)) && EVAL(pcpu->cchSuffix <= (UINT)lstrlenA(pcpu->pszSuffix))) && EVAL(pcpu->cchProtocol + pcpu->cchSuffix < (UINT)lstrlenA(pcszURL))); } BOOL IsValidPCPARSEDURLW( LPCWSTR pcszURL, PCPARSEDURLW pcpu) { return(IS_VALID_READ_PTR(pcpu, CPARSEDURLW) && (IS_VALID_STRING_PTRW(pcpu->pszProtocol, -1) && EVAL(IsStringContainedW(pcszURL, pcpu->pszProtocol)) && EVAL(pcpu->cchProtocol < (UINT)lstrlenW(pcpu->pszProtocol))) && (IS_VALID_STRING_PTRW(pcpu->pszSuffix, -1) && EVAL(IsStringContainedW(pcszURL, pcpu->pszSuffix)) && EVAL(pcpu->cchSuffix <= (UINT)lstrlenW(pcpu->pszSuffix))) && EVAL(pcpu->cchProtocol + pcpu->cchSuffix < (UINT)lstrlenW(pcszURL))); } #endif /*---------------------------------------------------------- Purpose: Parse the given path into the PARSEDURL structure. ****** ****** This function must not do any extraneous ****** things. It must be small and fast. ****** Returns: NOERROR if a valid URL format URL_E_INVALID_SYNTAX if not Cond: -- */ STDMETHODIMP ParseURLA( LPCSTR pcszURL, PPARSEDURLA ppu) { HRESULT hr = E_INVALIDARG; RIP(IS_VALID_STRING_PTRA(pcszURL, -1)); RIP(IS_VALID_WRITE_PTR(ppu, PARSEDURLA)); if (pcszURL && ppu && SIZEOF(*ppu) == ppu->cbSize) { DWORD cch; hr = URL_E_INVALID_SYNTAX; // assume error ppu->pszProtocol = FindSchemeA(pcszURL, &cch); if(ppu->pszProtocol) { ppu->cchProtocol = cch; // Determine protocol scheme number ppu->nScheme = SchemeTypeFromStringA(ppu->pszProtocol, cch); ppu->pszSuffix = ppu->pszProtocol + cch + 1; // // APPCOMPAT - Backwards compatibility - zekel 28-feb-97 // ParseURL() believes in file: urls like "file://C:\foo\bar" // and some pieces of code will use it to get the Dos Path. // new code should always call PathCreateFromUrl() to // get the dos path of a file: URL. // // i am leaving this behavior in case some compat stuff is out there. // if (URL_SCHEME_FILE == ppu->nScheme && '/' == ppu->pszSuffix[0] && '/' == ppu->pszSuffix[1]) { // Yes; skip the "//" ppu->pszSuffix += 2; #ifndef UNIX // FOR UNIX: If we have /vobs/build, we don't want to make // There might be a third slash. Skip it. if ('/' == *ppu->pszSuffix) ppu->pszSuffix++; #endif } ppu->cchSuffix = lstrlenA(ppu->pszSuffix); hr = S_OK; } } #ifdef DEBUG if (hr == S_OK) { CHAR rgchDebugProtocol[MAX_PATH]; CHAR rgchDebugSuffix[MAX_PATH]; // (+ 1) for null terminator. lstrcpynA(rgchDebugProtocol, ppu->pszProtocol, min(ppu->cchProtocol + 1, SIZECHARS(rgchDebugProtocol))); // (+ 1) for null terminator. lstrcpynA(rgchDebugSuffix, ppu->pszSuffix, min(ppu->cchSuffix + 1, SIZECHARS(rgchDebugSuffix))); TraceMsgA(TF_URL, "ParseURL(): Parsed protocol \"%s\" and suffix \"%s\" from URL \"%s\".", rgchDebugProtocol, rgchDebugSuffix, pcszURL); } else { TraceMsgA(TF_URL, "ParseURL(): Failed to parse \"%s\"", pcszURL); } #endif ASSERT(FAILED(hr) || EVAL(IsValidPCPARSEDURLA(pcszURL, ppu))); return(hr); } /*---------------------------------------------------------- Purpose: Parse the given path into the PARSEDURL structure. ****** ****** This function must not do any extraneous ****** things. It must be small and fast. ****** Returns: NOERROR if a valid URL format URL_E_INVALID_SYNTAX if not Cond: -- */ STDMETHODIMP ParseURLW( LPCWSTR pcszURL, PPARSEDURLW ppu) { HRESULT hr = E_INVALIDARG; RIP(IS_VALID_STRING_PTRW(pcszURL, -1)); RIP(IS_VALID_WRITE_PTR(ppu, PARSEDURLW)); if (pcszURL && ppu && SIZEOF(*ppu) == ppu->cbSize) { DWORD cch; hr = URL_E_INVALID_SYNTAX; // assume error ppu->pszProtocol = FindSchemeW(pcszURL, &cch); if(ppu->pszProtocol) { ppu->cchProtocol = cch; // Determine protocol scheme number ppu->nScheme = SchemeTypeFromStringW(ppu->pszProtocol, cch); ppu->pszSuffix = ppu->pszProtocol + cch + 1; // // APPCOMPAT - Backwards compatibility - zekel 28-feb-97 // ParseURL() believes in file: urls like "file://C:\foo\bar" // and some pieces of code will use it to get the Dos Path. // new code should always call PathCreateFromUrl() to // get the dos path of a file: URL. // // i am leaving this behavior in case some compat stuff is out there. // if (URL_SCHEME_FILE == ppu->nScheme && '/' == ppu->pszSuffix[0] && '/' == ppu->pszSuffix[1]) { // Yes; skip the "//" ppu->pszSuffix += 2; #ifndef UNIX // There might be a third slash. Skip it. // IEUNIX - On UNIX, it's a root directory, so don't skip it! if ('/' == *ppu->pszSuffix) ppu->pszSuffix++; #endif } ppu->cchSuffix = lstrlenW(ppu->pszSuffix); hr = S_OK; } } #ifdef DEBUG if (hr==S_OK) { WCHAR rgchDebugProtocol[MAX_PATH]; WCHAR rgchDebugSuffix[MAX_PATH]; // (+ 1) for null terminator. StrCpyNW(rgchDebugProtocol, ppu->pszProtocol, min(ppu->cchProtocol + 1, SIZECHARS(rgchDebugProtocol))); // (+ 1) for null terminator. StrCpyNW(rgchDebugSuffix, ppu->pszSuffix, min(ppu->cchSuffix + 1, SIZECHARS(rgchDebugSuffix))); TraceMsg(TF_URL, "ParseURL(): Parsed protocol \"%s\" and suffix \"%s\" from URL \"%s\".", rgchDebugProtocol, rgchDebugSuffix, pcszURL); } else { TraceMsg(TF_URL, "ParseURL(): Failed to parse \"%s\"", pcszURL); } #endif ASSERT(FAILED(hr) || EVAL(IsValidPCPARSEDURLW(pcszURL, ppu))); return(hr); } #ifdef USE_FAST_PARSER // GetSchemeTypeAndFlagsSpecialW // performs the same behavior as GetSchemeTypeAndFlagsW plus, when successful // copies the canonicalised form of the scheme back. PRIVATE URL_SCHEME GetSchemeTypeAndFlagsSpecialW(LPWSTR pszScheme, DWORD cchScheme, LPDWORD pdwFlags) { DWORD i; ASSERT(pszScheme); #ifdef DEBUG if ((g_cSTTot % 10) == 0) TraceMsg(DM_PERF, "gstaf: tot=%d hit=%d hit0=%d", g_cSTTot, g_cSTHit, g_cSTHit0); #endif DBEXEC(TRUE, g_cSTTot++); // check cache 1st i = g_iScheme; if (cchScheme == g_mpUrlSchemeTypes[i].cchScheme && StrCmpNCW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme) == 0) { DBEXEC(TRUE, i == 0 ? g_cSTHit0++ : g_cSTHit++); Lhit: if (pdwFlags) *pdwFlags = g_mpUrlSchemeTypes[i].dwFlags; // update cache (unconditionally) g_iScheme = i; // We need to do this because the scheme might not be canonicalised memcpy(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme*sizeof(WCHAR)); return g_mpUrlSchemeTypes[i].eScheme; } for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++) { if(cchScheme == g_mpUrlSchemeTypes[i].cchScheme && 0 == StrCmpNIW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme)) goto Lhit; } if (pdwFlags) { *pdwFlags = 0; } return URL_SCHEME_UNKNOWN; } // URL_STRING -------------------------------------------------------------------------------------- // is a container for the combined URL. It attempts to construct a string from the information // fed into it. If there is not enough buffer space available, it will measure how much additional // space will be required to hold the string. WCHAR wszBogus[] = L""; // US_* are the various modes of transforming characters fed into the container. // US_NOTHING do nothing to the character. // US_UNESCAPE turn entries of the form %xx into the unescaped form // US_ESCAPE_UNSAFE transform invalid path characters into %xx sequences // US_ESCAPE_SPACES transform only spaces in to %20 sequences enum { US_NOTHING, US_UNESCAPE, US_ESCAPE_UNSAFE, US_ESCAPE_SPACES }; class URL_STRING { protected: URL_SCHEME _eScheme; DWORD _ccWork, _ccMark, _ccLastWhite, _ccQuery, _ccFragment, _ccBuffer, _dwSchemeInfo; DWORD _dwOldFlags, _dwFlags, _dwMode; BOOL _fFixSlashes, _fExpecting, _fError; WCHAR _wchLast, _wszInternalString[256]; PWSTR _pszWork; VOID baseAccept(WCHAR wch); VOID TrackWhiteSpace(WCHAR wch); public: URL_STRING(DWORD dwFlags); ~URL_STRING(); VOID CleanAccept(WCHAR wch); VOID Accept(WCHAR wch); VOID Accept(PWSTR a_psz); VOID Contract(BOOL fContractLevel = TRUE); VOID TrimEndWhiteSpace(); PWSTR GetStart(); LONG GetTotalLength(); BOOL AnyProblems(); VOID NoteScheme(URL_SCHEME a_eScheme, DWORD a_dwSchemeInfo); VOID AddSchemeNote(DWORD a_dwSchemeInfo); DWORD GetSchemeNotes(); URL_SCHEME QueryScheme(); VOID Mark(); VOID ClearMark(); VOID EraseMarkedText(); DWORD CompareMarkWith(PWSTR psz); DWORD CompareLast(PCWSTR psz, DWORD cc); VOID EnableMunging(); VOID DisableMunging(); VOID DisableSlashFixing(); VOID RestoreFlags(); VOID AddFlagNote(DWORD dwFlag); VOID NotifyQuery(); VOID NotifyFragment(); VOID DropQuery(); VOID DropFragment(); }; // ------------------------------------------------------------------------------- URL_STRING::URL_STRING(DWORD dwFlags) { _ccBuffer = ARRAYSIZE(_wszInternalString); _ccWork = 1; _pszWork = _wszInternalString; _ccQuery = _ccFragment = _ccMark = 0; _eScheme = URL_SCHEME_UNKNOWN; _dwOldFlags = _dwFlags = dwFlags; _dwMode = US_NOTHING; _fFixSlashes = TRUE; _fError = _fExpecting = FALSE; } URL_STRING::~URL_STRING() { if (_ccBuffer > ARRAYSIZE(_wszInternalString)) { LocalFree(_pszWork); } } // ------------------------------------------------------------------------------- // These are the standard functions used for adding characters to an url. VOID URL_STRING::baseAccept(WCHAR wch) { _pszWork[_ccWork-1] = (_fFixSlashes ? ((wch!=WHACK) ? wch : SLASH) : wch); _ccWork++; if (_ccWork>_ccBuffer) { if (!_fError) { PWSTR psz = (PWSTR)LocalAlloc(LPTR, 2*_ccBuffer*sizeof(WCHAR)); if (!psz) { _ccWork--; _fError = TRUE; return; } memcpy(psz, _pszWork, (_ccWork-1)*sizeof(WCHAR)); if (_ccBuffer>ARRAYSIZE(_wszInternalString)) { LocalFree(_pszWork); } _ccBuffer *= 2; _pszWork = psz; } else { _ccWork--; } } } VOID URL_STRING::TrackWhiteSpace(WCHAR wch) { if (IsWhite(wch)) { if (!_ccLastWhite) { _ccLastWhite = _ccWork; } } else { _ccLastWhite = 0; } } // -- URL_STRING::Accept ---------------------------- // Based on the current munging mode, transform the character into the // desired form and add it to the string. VOID URL_STRING::Accept(WCHAR wch) { TrackWhiteSpace(wch); switch (_dwMode) { case US_NOTHING: break; case US_UNESCAPE: if (_fExpecting) { if (!IsHex(wch)) { baseAccept(HEX_ESCAPE); if (_wchLast!=L'\0') { baseAccept(_wchLast); } _fExpecting = FALSE; break; } else if (_wchLast!=L'\0') { wch = (HexToWord(_wchLast)*16) + HexToWord(wch); TrackWhiteSpace(wch); _fExpecting = FALSE; if ((wch==WHACK) && _fFixSlashes) { _fFixSlashes = FALSE; baseAccept(wch); _fFixSlashes = TRUE; return; } break; } else { _wchLast = wch; } return; } if (wch==HEX_ESCAPE) { _fExpecting = TRUE; _wchLast = L'\0'; return; } break; case US_ESCAPE_UNSAFE: if ((wch==SLASH) || (wch==WHACK && _fFixSlashes) || (IsSafePathChar(wch) && (wch!=HEX_ESCAPE || !(_dwFlags & URL_ESCAPE_PERCENT)))) { break; } baseAccept(L'%'); baseAccept(hex[(wch >> 4) & 15]); baseAccept(hex[wch & 15]); return; case US_ESCAPE_SPACES: if (wch==SPC) { baseAccept(L'%'); baseAccept(L'2'); baseAccept(L'0'); return; } break; default: ASSERT(FALSE); } baseAccept(wch); } // -- Accept -------------------------------- // Accept only a string VOID URL_STRING::Accept(PWSTR psz) { while (*psz) { Accept(*psz); psz++; } } // -- Contract // Whenever we call Contract, we're pointing past the last separator. We want to // omit the segment between this separator and the one before it. // This should be used ONLY when we're examining the path segment of the urls. VOID URL_STRING::Contract(BOOL fContractLevel) { ASSERT(_ccWork && _ccMark); // _ccWork is 1 after wherever the next character will be placed // subtract +1 to derive what the last character in the url is DWORD _ccEnd = _ccWork-1 - 1; if (_eScheme!=URL_SCHEME_MK) { if (!fContractLevel && (_pszWork[_ccEnd]==SLASH || _pszWork[_ccEnd]==WHACK)) { return; } do { _ccEnd--; } while ((_ccEnd>=_ccMark-1) && _pszWork[_ccEnd]!=SLASH && _pszWork[_ccEnd]!=WHACK); } else { if (!fContractLevel && (_pszWork[_ccEnd]==SLASH)) { return; } do { _ccEnd--; } while ((_ccEnd>=_ccMark-1) && _pszWork[_ccEnd]!=SLASH); } if (_ccEnd<_ccMark-1) { _ccEnd = _ccMark-1; } else { _ccEnd++; } _ccWork = _ccEnd + 1; } VOID URL_STRING::TrimEndWhiteSpace() { if (_ccLastWhite) { _ccWork = _ccLastWhite; _ccLastWhite = 0; } } VOID URL_STRING::CleanAccept(WCHAR wch) { baseAccept(wch); } // ------------------------------------------------------------------------------- // These member functions return information about the url that is being formed PWSTR URL_STRING::GetStart() { return _pszWork; } LONG URL_STRING::GetTotalLength() { return _ccWork - 1; } BOOL URL_STRING::AnyProblems() { return _fError; } // ------------------------------------------------------------------------------- VOID URL_STRING::NoteScheme(URL_SCHEME a_eScheme, DWORD a_dwSchemeInfo) { _eScheme = a_eScheme; _dwSchemeInfo = a_dwSchemeInfo; _fFixSlashes = a_dwSchemeInfo & UPF_SCHEME_CONVERT; } VOID URL_STRING::AddSchemeNote(DWORD a_dwSchemeInfo) { _dwSchemeInfo |= a_dwSchemeInfo; _fFixSlashes = _dwSchemeInfo & UPF_SCHEME_CONVERT; } DWORD URL_STRING::GetSchemeNotes() { return _dwSchemeInfo; } URL_SCHEME URL_STRING::QueryScheme() { return _eScheme; } // ------------------------------------------------------------------------------- VOID URL_STRING::Mark() { _ccMark = _ccWork; } VOID URL_STRING::ClearMark() { _ccMark = 0; } VOID URL_STRING::EraseMarkedText() { if (_ccMark) { _ccWork = _ccMark; _ccMark = 0; } } DWORD URL_STRING::CompareMarkWith(PWSTR psz) { if (_ccMark) { *(_pszWork + _ccWork - 1) = L'\0'; return (StrCmpW(_pszWork + _ccMark - 1, psz)); } // In other words, return that the string isn't present. return 1; } DWORD URL_STRING::CompareLast(PCWSTR psz, DWORD cc) { if (_ccWork > cc) { return StrCmpNIW(_pszWork + _ccWork - 1 - cc, psz, cc); } return 1; } // ------------------------------------------------------------------------------- VOID URL_STRING::NotifyQuery() { if (!_ccQuery) { _ccQuery = _ccWork; } } VOID URL_STRING::NotifyFragment() { if (!_ccFragment) { _ccFragment = _ccWork; CleanAccept(POUND); } } VOID URL_STRING::DropQuery() { if (_ccQuery) { _ccWork = _ccQuery; _ccQuery = _ccFragment = 0; } } VOID URL_STRING::DropFragment() { if (_ccFragment) { _ccWork = _ccFragment; _ccFragment = 0; } } // ------------------------------------------------------------------------------- // These member functions are for determining how the url's characters are going // to be represented VOID URL_STRING::EnableMunging() { _dwMode = US_NOTHING; // For opaque urls, munge ONLY if we're explicitly asked to URL_ESCAPE or URL_UNESCAPE, // but NOT URL_ESCAPE_SPACES_ONLY // For query and fragment, never allow for URL_ESCAPE_UNSAFE and for // others ONLY when URL_DONT_ESCAPE_EXTRA_INFO is specified if ((_dwSchemeInfo & UPF_SCHEME_OPAQUE) && (_dwFlags & URL_ESCAPE_SPACES_ONLY)) return; if ((_ccQuery || _ccFragment) && ((_dwFlags & (URL_DONT_ESCAPE_EXTRA_INFO | URL_ESCAPE_UNSAFE)))) return; if (_dwFlags & URL_UNESCAPE) { _dwMode = US_UNESCAPE; } else if (_dwFlags & URL_ESCAPE_UNSAFE) { _dwMode = US_ESCAPE_UNSAFE; } else if (_dwFlags & URL_ESCAPE_SPACES_ONLY) { _dwMode = US_ESCAPE_SPACES; } } VOID URL_STRING::DisableMunging() { _dwMode = US_NOTHING; } VOID URL_STRING::DisableSlashFixing() { _fFixSlashes = FALSE; } VOID URL_STRING::AddFlagNote(DWORD dwFlag) { _dwFlags |= dwFlag; } VOID URL_STRING::RestoreFlags() { ASSERT((_eScheme==URL_SCHEME_FILE) || (_dwFlags==_dwOldFlags)); _dwFlags = _dwOldFlags; EnableMunging(); } // ------------------------------------------------------------------------------- // URL ------------------------------------------------------------------------------------ // The URL class is used to examine the base and relative URLs to determine what // will go into the URL_STRING container. The difference should be clear: // URL instances look, but don't touch. URL_STRINGs are used solely to build urls. class URL { private: PCWSTR _pszUrl, _pszWork; URL_SCHEME _eScheme; DWORD _dwSchemeNotes, _dwFlags; BOOL _fPathCompressionOn; BOOL _fIgnoreQuery; WCHAR SmallForm(WCHAR wch); BOOL IsAlpha(WCHAR ch); PCWSTR IsUrlPrefix(PCWSTR psz); BOOL IsLocalDrive(PCWSTR psz); BOOL IsQualifiedDrive(PCWSTR psz); BOOL DetectSymbols(WCHAR wch1, WCHAR wch2 = '\0', WCHAR wch3 = '\0'); PCWSTR NextChar(PCWSTR psz); PCWSTR FeedUntil(PCWSTR psz, URL_STRING* pus, WCHAR wchDelim1 = '\0', WCHAR wchDelim2 = '\0', WCHAR wchDelim3 = '\0', WCHAR wchDelim4 = '\0'); BOOL DetectFileServer(); BOOL DetectMkServer(); BOOL DefaultDetectServer(); VOID FeedDefaultServer(URL_STRING* pus); VOID FeedFileServer(URL_STRING* pus); VOID FeedFtpServer(URL_STRING* pus); VOID FeedHttpServer(URL_STRING* pus); VOID FeedMkServer(URL_STRING* pus); PCWSTR FeedPort(PCWSTR psz, URL_STRING* pus); public: VOID Setup(PCWSTR pszInUrl, DWORD a_dwFlags = 0); VOID Reset(); BOOL IsReset(); BOOL DetectAndFeedScheme(URL_STRING* pus, BOOL fReconcileSchemes = FALSE); VOID SetScheme(URL_SCHEME eScheme, DWORD dwFlag); URL_SCHEME GetScheme(); VOID AddSchemeNote(DWORD dwFlag); DWORD GetSchemeNotes(); BOOL DetectServer(); BOOL DetectAbsolutePath(); BOOL DetectPath(); BOOL DetectQueryOrFragment(); BOOL DetectQuery(); BOOL DetectLocalDrive(); BOOL DetectSlash(); BOOL DetectAnything(); WCHAR PeekNext(); VOID FeedPath(URL_STRING* pus, BOOL fMarkServer = TRUE); PCWSTR CopySegment(PCWSTR psz, URL_STRING* pus, BOOL* pfContinue); DWORD DetectDots(PCWSTR* ppsz); VOID StopPathCompression(); VOID FeedServer(URL_STRING* pus); VOID FeedLocalDrive(URL_STRING* pus); VOID FeedQueryAndFragment(URL_STRING* pus); VOID IgnoreQuery(); }; // ------------------------------------------------------------------------------- VOID URL::Setup(PCWSTR pszInUrl, DWORD a_dwFlags) { while (*pszInUrl && IsWhite(*pszInUrl)) { pszInUrl++; } _pszWork = _pszUrl = pszInUrl; _eScheme = URL_SCHEME_UNKNOWN; _dwSchemeNotes = 0; _dwFlags = a_dwFlags; _fPathCompressionOn = TRUE; _fIgnoreQuery = FALSE; } VOID URL::Reset() { _pszWork = wszBogus; } BOOL URL::IsReset() { return (_pszWork==wszBogus); } // ------------------------------------------------------------------------------- inline WCHAR URL::SmallForm(WCHAR wch) { return (wch < L'A' || wch > L'Z') ? wch : (wch - L'A' + L'a'); } inline BOOL URL::IsAlpha(WCHAR ch) { return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')); } inline PCWSTR URL::IsUrlPrefix(PCWSTR psz) { // We want to skip instances of "URL:" psz = NextChar(psz); if (*psz==L'u' || *psz==L'U') { psz = NextChar(psz+1); if (*psz==L'r' || *psz==L'R') { psz = NextChar(psz+1); if (*psz==L'l' || *psz==L'L') { psz = NextChar(psz+1); if (*psz==COLON) { return NextChar(psz+1); } } } } return NULL; } inline BOOL URL::IsLocalDrive(PCWSTR psz) { psz = NextChar(psz); return (IsAlpha(*psz) && ((*NextChar(psz+1)==COLON) || (*NextChar(psz+1)==BAR))); } // -- IsQualifiedDrive -------- // On Win32 systems, a qualified drive is either // i. : or ii. \\UNC\ // Under unix, it's only /. inline BOOL URL::IsQualifiedDrive(PCWSTR psz) { psz = NextChar(psz); BOOL fResult = IsLocalDrive(psz); if (!fResult && *psz==WHACK) { psz = NextChar(psz+1); fResult = *psz==WHACK; } return fResult; } // -- DetectSymbols ------------- // This is used to help determine what part of the URL we have reached. inline BOOL URL::DetectSymbols(WCHAR wch1, WCHAR wch2, WCHAR wch3) { ASSERT(_pszWork); PCWSTR psz = NextChar(_pszWork); return (*psz && (*psz==wch1 || *psz==wch2 || *psz==wch3)); } BOOL URL::DetectSlash() { return DetectSymbols(SLASH, WHACK); } BOOL URL::DetectAnything() { return (*NextChar(_pszWork)!=L'\0'); } // -- NextChar ------------------------------------- // We use NextChar instead of *psz because we want to // ignore characters such as TAB, CR, etc. inline PCWSTR URL::NextChar(PCWSTR psz) { while (IsInsignificantWhite(*psz)) { psz++; } return psz; } WCHAR URL::PeekNext() { return (*NextChar(NextChar(_pszWork)+1)); } // ------------------------------------------------------------------------------- inline PCWSTR URL::FeedUntil(PCWSTR psz, URL_STRING* pus, WCHAR wchDelim1, WCHAR wchDelim2, WCHAR wchDelim3, WCHAR wchDelim4) { psz = NextChar(psz); while (*psz && *psz!=wchDelim1 && *psz!=wchDelim2 && *psz!=wchDelim3 && *psz!=wchDelim4) { pus->Accept(*psz); psz = NextChar(psz+1); } return psz; } // ------------------------------------------------------------------------------- VOID URL::SetScheme(URL_SCHEME eScheme, DWORD dwFlag) { _eScheme = eScheme; _dwSchemeNotes = dwFlag; } URL_SCHEME URL::GetScheme() { return _eScheme; } VOID URL::AddSchemeNote(DWORD dwFlag) { _dwSchemeNotes |= dwFlag; } DWORD URL::GetSchemeNotes() { return _dwSchemeNotes; } BOOL URL::DetectAndFeedScheme(URL_STRING* pus, BOOL fReconcileSchemes) { ASSERT(_pszWork); ASSERT(!fReconcileSchemes || (fReconcileSchemes && pus->QueryScheme()!=URL_SCHEME_FILE)); PCWSTR psz = NextChar(_pszWork); BOOL fResult = (IsQualifiedDrive(_pszWork)); if (fResult) { // // Detected a File URL that isn't explicitly marked as such, ie C:\foo, // in this case, we need to confirm that we're not overwriting // a fully qualified relative URL with an Accept("file:"), although // if the relative URL is the same scheme as the base, we now // need to make the BASE-file URL take precedence. // _eScheme = URL_SCHEME_FILE; if (!fReconcileSchemes) { pus->Accept((PWSTR)c_szFileScheme); pus->Accept(COLON); _dwSchemeNotes = g_mpUrlSchemeTypes[1].dwFlags; pus->NoteScheme(_eScheme, _dwSchemeNotes); pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE); } else if (pus->QueryScheme() != URL_SCHEME_FILE) { Reset(); } goto exit; } for (;;) { while (IsValidSchemeCharW(*psz)) { psz = NextChar(psz + 1); } if (*psz!=COLON) { break; } if (IsUrlPrefix(_pszWork)) { // However, we want to skip instances of URL: _pszWork = psz = NextChar(psz+1); continue; } DWORD ccScheme = 0; PCWSTR pszClone = NextChar(_pszWork); if (!fReconcileSchemes) { while (pszClone<=psz) { pus->Accept(SmallForm(*pszClone)); ccScheme++; pszClone = NextChar(pszClone+1); } _pszWork = pszClone; // Subtract one for the colon ccScheme--; // BUG BUG Since we're smallifying the scheme above, we might be able to // avoid calling this func, call GetSchemeTypeAndFlags instead. _eScheme = GetSchemeTypeAndFlagsSpecialW(pus->GetStart(), ccScheme, &_dwSchemeNotes); pus->NoteScheme(_eScheme, _dwSchemeNotes); } else { PWSTR pszKnownScheme = pus->GetStart(); while (pszClone<=psz && SmallForm(*pszClone)==*pszKnownScheme) { pszClone = NextChar(pszClone+1); pszKnownScheme++; } if (pszClone<=psz) { Reset(); } else { _pszWork = pszClone; } } fResult = TRUE; break; } exit: return fResult; } // ------------------------------------------------------------------------------- BOOL URL::DetectServer() { ASSERT(_pszWork); BOOL fRet; switch (_eScheme) { case URL_SCHEME_FILE: fRet = DetectFileServer(); break; case URL_SCHEME_MK: fRet = DetectMkServer(); break; default: fRet = DefaultDetectServer(); break; } return fRet; } BOOL URL::DetectLocalDrive() { return IsLocalDrive(_pszWork); } BOOL URL::DetectFileServer() { ASSERT(_pszWork); PCWSTR psz = _pszWork; BOOL fResult = IsLocalDrive(_pszWork); if (fResult) { _dwSchemeNotes |= UPF_FILEISPATHURL; } else { fResult = DetectSymbols(SLASH, WHACK); } return fResult; } BOOL URL::DetectMkServer() { ASSERT(_pszWork); PCWSTR psz = NextChar(_pszWork); BOOL fResult = (*psz==L'@'); if (fResult) { _pszWork = NextChar(psz + 1); } return fResult; } BOOL URL::DefaultDetectServer() { BOOL fResult = FALSE; if (DetectSymbols(SLASH, WHACK)) { PCWSTR psz = NextChar(_pszWork + 1); fResult = ((*psz==SLASH) || (*psz==WHACK)); } return fResult; } VOID URL::FeedServer(URL_STRING* pus) { ASSERT(_pszWork); switch (_eScheme) { case URL_SCHEME_FILE: FeedFileServer(pus); break; case URL_SCHEME_MK: FeedMkServer(pus); break; case URL_SCHEME_FTP: FeedFtpServer(pus); break; case URL_SCHEME_HTTP: case URL_SCHEME_HTTPS: FeedHttpServer(pus); break; default: FeedDefaultServer(pus); break; } } VOID URL::FeedMkServer(URL_STRING* pus) { ASSERT(_pszWork); pus->EnableMunging(); pus->Accept(L'@'); _pszWork = FeedUntil(_pszWork, pus, SLASH); if (!*_pszWork) { pus->TrimEndWhiteSpace(); } else { _pszWork = NextChar(_pszWork+1); } pus->Accept(SLASH); } VOID URL::FeedLocalDrive(URL_STRING* pus) { pus->Accept(*NextChar(_pszWork)); _pszWork = NextChar(_pszWork+1); pus->Accept(*_pszWork); _pszWork = NextChar(_pszWork+1); pus->DisableMunging(); } VOID URL::FeedFileServer(URL_STRING* pus) { PCWSTR psz = NextChar(_pszWork); while (*psz==SLASH || *psz==WHACK) { psz = NextChar(psz+1); } DWORD dwSlashes = (DWORD)(psz - _pszWork); switch (dwSlashes) { case 4: pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE); _dwSchemeNotes |= UPF_FILEISPATHURL; // 4 to 6 slashes == 1 UNC case 2: if (IsLocalDrive(psz)) { pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE); } case 5: case 6: pus->Accept(SLASH); pus->Accept(SLASH); if (!IsLocalDrive(psz)) { pus->EnableMunging(); psz = FeedUntil(psz, pus, SLASH, WHACK); if (!*psz) { pus->TrimEndWhiteSpace(); Reset(); } else { _pszWork = NextChar(psz+1); } } else { _pszWork = psz; } pus->Accept(SLASH); break; // If there are no slashes, then it can't be a UNC. case 0: if (IsLocalDrive(psz)) { pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE); } // We think of "file:/" and "file:///" to be on the local machine // And if there are more slashes than we typically handle, we'll treat them as 1. case 1: case 3: // This is a not-good-case default: pus->Accept(SLASH); pus->Accept(SLASH); pus->Accept(SLASH); _pszWork = NextChar(psz); break; } } VOID URL::FeedFtpServer(URL_STRING* pus) { ASSERT(_pszWork); PCWSTR psz = NextChar(_pszWork); if (*psz==WHACK || *psz==SLASH) { pus->Accept(*psz); psz = NextChar(psz+1); } if (*psz==WHACK || *psz==SLASH) { pus->Accept(*psz); psz = NextChar(psz+1); } pus->EnableMunging(); // The following is a grotesque and gruesome hack. We need to preserve case for // embedded username/password _pszWork = psz; BOOL fPossibleUserPasswordCombo = FALSE; while (*psz && *psz!=SLASH && *psz!=POUND && *psz!=QUERY) { if (*psz==L'@') { fPossibleUserPasswordCombo = TRUE; break; } psz = NextChar(psz+1); } psz = _pszWork; if (fPossibleUserPasswordCombo) { while (*psz!=L'@') { pus->Accept(*psz); psz = NextChar(psz+1); } } // This still leaves the issue of slashes, colons, ?s, @s, and #s in passwords; I guess they // ought to be escaped. (You just can't win, sometimes.) while (*psz && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND) { pus->Accept(SmallForm(*psz)); psz = NextChar(psz+1); } if (*psz==COLON) { psz = FeedPort(psz, pus); } pus->DisableMunging(); _pszWork = psz; if (!*psz) { pus->TrimEndWhiteSpace(); pus->Accept(SLASH); } else { if (*psz==QUERY || *psz==POUND) { pus->Accept(SLASH); } else { pus->Accept(*psz); _pszWork = NextChar(psz+1); } } } VOID URL::FeedHttpServer(URL_STRING* pus) { // This is a version of FeedDefaultServer, stripped of non-essentials. // This includes a hack to enable username/password combos in http urls. ASSERT(_pszWork); PCWSTR psz = NextChar(_pszWork); if (*psz==WHACK || *psz==SLASH) { pus->Accept(*psz); psz = NextChar(psz+1); } if (*psz==WHACK || *psz==SLASH) { pus->Accept(*psz); psz = NextChar(psz+1); } pus->EnableMunging(); // WARNING! FeedPort also calls Mark(). Must be careful that they don't overlap. pus->Mark(); PCWSTR pszRestart = psz; while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND && *psz!=AT) { pus->Accept(SmallForm(*psz)); psz = NextChar(psz+1); } if (*psz==COLON) { // We either have a port or a password. PCWSTR pszPort = psz; do { psz = NextChar(psz+1); } while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND && *psz!=AT); if (*psz!=AT) { psz = FeedPort(pszPort, pus); } } if (*psz==AT) { // We've hit a username/password combo. So we have to undo our case-changing psz = pszRestart; pus->EraseMarkedText(); while (*psz!=AT) { pus->Accept(*psz); psz = NextChar(psz+1); } // Now we carry on as before while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND) { pus->Accept(SmallForm(*psz)); psz = NextChar(psz+1); } if (*psz==COLON) { psz = FeedPort(psz, pus); } } pus->ClearMark(); pus->DisableMunging(); _pszWork = psz; if (!*psz) { pus->TrimEndWhiteSpace(); if ((_eScheme!=URL_SCHEME_UNKNOWN) && !(_dwSchemeNotes & UPF_SCHEME_OPAQUE)) { pus->Accept(SLASH); } } else { if (*psz==QUERY || *psz==POUND) { pus->Accept(SLASH); } else { pus->Accept(*psz); _pszWork = NextChar(psz+1); } } } VOID URL::FeedDefaultServer(URL_STRING* pus) { ASSERT(_pszWork); PCWSTR psz = NextChar(_pszWork); if (!(_dwSchemeNotes & UPF_SCHEME_INTERNET)) { pus->DisableSlashFixing(); } if (*psz==WHACK || *psz==SLASH) { pus->Accept(*psz); psz = NextChar(psz+1); } if (*psz==WHACK || *psz==SLASH) { pus->Accept(*psz); psz = NextChar(psz+1); } if (_dwSchemeNotes & UPF_SCHEME_INTERNET) { pus->EnableMunging(); while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND) { pus->Accept(SmallForm(*psz)); psz = NextChar(psz+1); } if (*psz==COLON) { psz = FeedPort(psz, pus); } pus->DisableMunging(); } else { while (*psz && *psz!=SLASH) { pus->Accept(*psz); psz = NextChar(psz+1); } } _pszWork = psz; if (!*psz) { pus->TrimEndWhiteSpace(); if ((_eScheme!=URL_SCHEME_UNKNOWN) && !(_dwSchemeNotes & UPF_SCHEME_OPAQUE)) { pus->Accept(SLASH); } } else { if (*psz==QUERY || *psz==POUND) { pus->Accept(SLASH); } else { pus->Accept(*psz); _pszWork = NextChar(psz+1); } } } PCWSTR URL::FeedPort(PCWSTR psz, URL_STRING* pus) { BOOL fIgnorePort = FALSE; pus->Mark(); psz = FeedUntil(psz, pus, SLASH, WHACK, POUND, QUERY); if (!(_dwFlags & URL_DONT_SIMPLIFY)) { // Here, decide whether or not to ignore the port // FEATURE we should actually be getting this from // the services file to find out the default protocol port // but we dont think that most people will change them - zekel 17-Dec-96 switch(_eScheme) { case URL_SCHEME_HTTP: if (pus->CompareMarkWith(L":80")==0) fIgnorePort = TRUE; break; case URL_SCHEME_HTTPS: if (pus->CompareMarkWith(L":443")==0) fIgnorePort = TRUE; break; case URL_SCHEME_FTP: if (pus->CompareMarkWith(L":21")==0) fIgnorePort = TRUE; break; case URL_SCHEME_GOPHER: if (pus->CompareMarkWith(L":70")==0) fIgnorePort = TRUE; break; } } if (fIgnorePort) { pus->EraseMarkedText(); } else { pus->ClearMark(); } return psz; } // ------------------------------------------------------------------------------- BOOL URL::DetectAbsolutePath() { BOOL fResult = FALSE; if (_dwSchemeNotes & UPF_SCHEME_OPAQUE) { fResult = TRUE; } else if (DetectSymbols(SLASH, WHACK)) { fResult = TRUE; _pszWork = NextChar(_pszWork+1); } return fResult; } BOOL URL::DetectPath() { return (*NextChar(_pszWork) && !DetectSymbols(QUERY, POUND)); } VOID URL::FeedPath(URL_STRING* pus, BOOL fMarkServer) { ASSERT(_pszWork); PCWSTR psz = NextChar(_pszWork); if (fMarkServer) { pus->Mark(); } if (_dwSchemeNotes & UPF_SCHEME_OPAQUE) { _pszWork = FeedUntil(psz, pus); pus->TrimEndWhiteSpace(); } else { DWORD cDots; BOOL fContinue = TRUE; do { cDots = 0; PCWSTR pszTmp = psz; if (_fPathCompressionOn) { cDots = DetectDots(&psz); } if (cDots) { if (cDots==2) { pus->Contract(); } continue; } psz = CopySegment(pszTmp, pus, &fContinue); } while (fContinue); _pszWork = psz; if (!*_pszWork) { pus->TrimEndWhiteSpace(); } } } // pfContinue indicates whether there's anything following that would // be of relevance to a path PCWSTR URL::CopySegment(PCWSTR psz, URL_STRING* pus, BOOL* pfContinue) { ASSERT(pfContinue); BOOL fStop = FALSE; psz = NextChar(psz); while (!fStop) { switch (*psz) { case POUND: if (_eScheme==URL_SCHEME_FILE) { // Since #s are valid for dos paths, we have to accept them except // for when they follow a .htm/.html file (See FindFragmentA/W) // However, some inconsistencies may still arise... for (DWORD i=0; i < ARRAYSIZE(ExtTable); i++) { if (!pus->CompareLast(ExtTable[i].wszExt, ExtTable[i].cchExt)) break; } // If we haven't found a matching file extension, we'll treat as a filename character. if (i==ARRAYSIZE(ExtTable)) { pus->Accept(*psz); psz = NextChar(psz+1); break; } } goto next; case QUERY: // We're going to support query as a legitimate character in file urls. // *sigh* if (_eScheme==URL_SCHEME_FILE) { if (_fIgnoreQuery) { psz = wszBogus; } else { pus->CleanAccept(*psz); psz = NextChar(psz+1); break; } } case L'\0': next: *pfContinue = FALSE; fStop = TRUE; break; case SLASH: case WHACK: fStop = TRUE; // fall through default: pus->Accept(*psz); psz = NextChar(psz+1); break; } } return psz; } DWORD URL::DetectDots(PCWSTR* ppsz) { PCWSTR psz; if (ppsz) { psz = *ppsz; } else { psz = NextChar(_pszWork); } DWORD cDots = 0; if (*psz==DOT) { psz = NextChar(psz+1); cDots++; if (*psz==DOT) { psz = NextChar(psz+1); cDots++; } switch (*psz) { case WHACK: if (_eScheme==URL_SCHEME_MK) { cDots = 0; } case SLASH: psz = NextChar(psz+1); break; case QUERY: case POUND: case L'\0': break; default: cDots = 0; break; } } if (ppsz) { *ppsz = psz; } return cDots; } VOID URL::StopPathCompression() { _fPathCompressionOn = FALSE; } // ------------------------------------------------------------------------------- BOOL URL::DetectQueryOrFragment() { return (DetectSymbols(QUERY, POUND)); } BOOL URL::DetectQuery() { return (DetectSymbols(QUERY)); } VOID URL::IgnoreQuery() { ASSERT(_eScheme==URL_SCHEME_FILE); _fIgnoreQuery = TRUE; } VOID URL::FeedQueryAndFragment(URL_STRING* pus) { ASSERT(_pszWork); if (_dwSchemeNotes & UPF_SCHEME_OPAQUE) { PCWSTR psz = NextChar(_pszWork); while (*psz) { pus->Accept(*psz); psz = NextChar(psz+1); } _pszWork = psz; return; } PCWSTR psz = NextChar(_pszWork); // This is okay since *psz must equal { ? | # } if (*psz==QUERY) { pus->CleanAccept(QUERY); } // By munging, I mean taking an URL of form http://a/b#c?d and producing http://a/b?d#c // We do this by default; however, we won't do this when we've been passed a fragment only // as a relative url // Query's always override. if (*psz==QUERY) { pus->DropQuery(); pus->NotifyQuery(); pus->EnableMunging(); psz = NextChar(psz+1); while (*psz) { if (*psz==POUND) { pus->NotifyFragment(); } else { pus->Accept(*psz); } psz = NextChar(psz+1); } } else { // This line of code will determine whether we've been passed a fragment for a relative url // For properly formed base urls, this won't matter. BOOL fMunge = psz!=NextChar(_pszUrl); pus->DropFragment(); pus->NotifyFragment(); pus->EnableMunging(); psz = NextChar(psz+1); // The following line is bogus. It just keeps going until the end. Not good. // We MAY or MAY NOT fix this, depending on how much people scream at me. // This may be an issue for Netscape compatibility. // What we could do is: when either query or fragment would be blank, preserve as is. // This would minimise breaking compatibility across the board. // -- AKABIR, 09/28/98 while ((*psz==QUERY && !fMunge) || *psz) { if (*psz==QUERY) { pus->CleanAccept(QUERY); } else { pus->Accept(*psz); } psz = NextChar(psz+1); } if (*psz==QUERY) { pus->DropFragment(); pus->NotifyQuery(); pus->CleanAccept(*psz); psz = NextChar(psz+1); while (*psz) { pus->Accept(*psz); psz = NextChar(psz+1); } pus->TrimEndWhiteSpace(); pus->NotifyFragment(); psz = NextChar(_pszWork); pus->CleanAccept(*psz); psz = NextChar(psz+1); while (*psz!=QUERY) { pus->Accept(*psz); psz = NextChar(psz+1); } } } pus->TrimEndWhiteSpace(); pus->ClearMark(); } // ------------------------------------------------------------------------------- HRESULT BlendUrls(URL& urlBase, URL& urlRelative, URL_STRING* pusOut, DWORD dwFlags) { HRESULT hr = S_OK; // -- SCHEME -------------------------------------------------------------------------- // Examine each url's scheme. // We won't continue to use urlBase IF // 1. their tokenized schemes are not identical // 2. the scheme is a file // 3. the actual string schemes are not identical // this checks to make sure that these are the same scheme, and // that the scheme is allowed to be used in relative URLs // file: is not allowed to because of weirdness with drive letters // and \\UNC\shares BOOL fBaseServerDetected = FALSE, fRelativeServerDetected = FALSE; BOOL fDetectAbsoluteRelPath = FALSE; BOOL fDetectedRelScheme = urlRelative.DetectAndFeedScheme(pusOut); BOOL fDetectedBaseScheme = FALSE; if (fDetectedRelScheme && ((pusOut->QueryScheme()==URL_SCHEME_FILE) || (urlRelative.GetSchemeNotes() & UPF_SCHEME_OPAQUE))) { urlBase.Reset(); } else if ((fDetectedBaseScheme = urlBase.DetectAndFeedScheme(pusOut, fDetectedRelScheme))) { if (!fDetectedRelScheme) { urlRelative.SetScheme(urlBase.GetScheme(), urlBase.GetSchemeNotes()); } } // We fall back on the original parser for those cases we don't handle yet. // (dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY) if (((pusOut->QueryScheme()==URL_SCHEME_FILE) || (!(fDetectedRelScheme || fDetectedBaseScheme))) && ((dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY))) { hr = E_FAIL; goto exit; } if ((pusOut->QueryScheme()==URL_SCHEME_UNKNOWN)) { // BUG BUG For IE4 compat, we need to use the old parser. However // if we're passed URL_PLUGGABLE_PROTOCOL, we'll use this parser. if (!(dwFlags & URL_PLUGGABLE_PROTOCOL)) { hr = E_FAIL; goto exit; } urlRelative.StopPathCompression(); // Same schemes, so now we look at the base url to divine the opacity if (urlBase.DetectAnything() && !urlBase.IsReset()) { if (!urlBase.DetectSlash()) { if (!urlRelative.DetectQueryOrFragment()) { urlBase.Reset(); } urlBase.AddSchemeNote(UPF_SCHEME_OPAQUE); urlRelative.AddSchemeNote(UPF_SCHEME_OPAQUE); pusOut->AddSchemeNote(UPF_SCHEME_OPAQUE); } } else if (!urlRelative.DetectSlash()) { // If urlBase is reset, that means the schemes are different, // so we only have urlRelative to figure out opacity. urlRelative.AddSchemeNote(UPF_SCHEME_OPAQUE); pusOut->AddSchemeNote(UPF_SCHEME_OPAQUE); } } else if (pusOut->QueryScheme()==URL_SCHEME_FTP) { // For ftp urls, we'll assume that we're being passed properly formed urls. // Some ftp sites allow backslashes in their object filenames, so we should // allow access to these. Also, domain passwords would otherwise need escaping. pusOut->DisableSlashFixing(); } if (dwFlags & URL_DONT_SIMPLIFY) { urlBase.StopPathCompression(); urlRelative.StopPathCompression(); } // -- SERVER -------------------------------------------------------------------------- // Decide on the server to use. // Question: if urlBase and UrlRelative have the same explicit server, isn't it pointless // to continue looking at url base anyway? pusOut->EnableMunging(); if (!(pusOut->GetSchemeNotes() & UPF_SCHEME_OPAQUE)) { if (urlRelative.DetectServer() && !(urlBase.DetectServer() && (urlRelative.PeekNext()!=SLASH) && (urlRelative.PeekNext()!=WHACK))) { fRelativeServerDetected = TRUE; urlRelative.FeedServer(pusOut); urlBase.Reset(); } else if (urlBase.DetectServer()) { fBaseServerDetected = TRUE; urlBase.FeedServer(pusOut); } } // -- PATH ---------------------------------------------------------------------------- // Figure out the path // If the relative url has a path, and it starts with a slash/whack, forget about the // base's path and stuff. Otherwise, inherit the base and attach the relative // Potential problem: when rel path is empty, we expect to knock of the last base segment if (pusOut->QueryScheme()==URL_SCHEME_FILE) { // Hack for back compat // If the relative url consists of a query string, we'll append that to // our resultant url, rather than the base's query string if (urlRelative.DetectQuery()) { urlBase.IgnoreQuery(); } else { BOOL fResult1 = urlRelative.DetectAbsolutePath(); BOOL fResult2 = urlRelative.DetectLocalDrive(); if (fResult2) { urlBase.Reset(); urlRelative.FeedLocalDrive(pusOut); if (urlRelative.DetectAbsolutePath()) { pusOut->Accept(SLASH); } } else { if (urlBase.DetectLocalDrive()) { urlBase.FeedLocalDrive(pusOut); if (fResult1) { pusOut->Accept(SLASH); urlBase.Reset(); } else if (urlBase.DetectAbsolutePath()) { pusOut->Accept(SLASH); } } else if (fResult1) { if (fRelativeServerDetected) { pusOut->Accept(SLASH); } urlBase.Reset(); } } } } else if (pusOut->QueryScheme()==URL_SCHEME_UNKNOWN) { if (pusOut->GetSchemeNotes() & UPF_SCHEME_OPAQUE) { if (!urlRelative.DetectAnything()) { urlRelative.Reset(); } } else { // This code fragment is for urls with unknown schemes, that are to be // treated hierarchically. Note that the authority (which has been passed in // already) is terminated with /, ?, or \0. The / is *optional*, and should be // appended if and only if the urls being combined call for it. if (urlBase.IsReset()) { // At this point, we're examining only the relative url. We've been brought to // a stop by the presence of /, ? or \0. So if (urlRelative.DetectSlash() && !fDetectedRelScheme) { pusOut->Accept(SLASH); } } else { // In this case, we have both the relative and base urls to look at. // What's the terminator for the base url if ((urlRelative.DetectSlash() || (!urlBase.DetectAnything() && urlRelative.DetectAnything() && !urlRelative.DetectQuery())) && !fDetectedRelScheme) { pusOut->Accept(SLASH); } } } } pusOut->EnableMunging(); if ((fBaseServerDetected && (fDetectAbsoluteRelPath = urlRelative.DetectAbsolutePath()))) { if (!fRelativeServerDetected) { pusOut->RestoreFlags(); } if (fDetectAbsoluteRelPath && urlRelative.DetectDots(NULL)) { urlRelative.StopPathCompression(); } urlRelative.FeedPath(pusOut); urlBase.Reset(); } else if (urlBase.DetectPath()) { urlBase.FeedPath(pusOut); // We don't want to contract the base path's free segment if // a. the scheme is opaque // b. the relative url has a path // c. the relative url has no path, just a fragment/query if (!(urlBase.GetSchemeNotes() & UPF_SCHEME_OPAQUE)) { pusOut->RestoreFlags(); if (urlRelative.DetectPath() || !urlRelative.DetectQueryOrFragment()) { if (urlRelative.DetectPath() || !fDetectedRelScheme) { pusOut->Contract(FALSE); } if (fDetectedRelScheme) { urlRelative.StopPathCompression(); } urlRelative.FeedPath(pusOut, FALSE); urlBase.Reset(); } else { urlRelative.FeedPath(pusOut, FALSE); } } else { urlRelative.StopPathCompression(); urlRelative.FeedPath(pusOut, FALSE); } } else if (urlRelative.DetectPath()) { if (!fRelativeServerDetected) { pusOut->RestoreFlags(); } else if (urlRelative.DetectDots(NULL)) { urlRelative.StopPathCompression(); } urlRelative.FeedPath(pusOut); urlBase.Reset(); } pusOut->ClearMark(); pusOut->DisableSlashFixing(); // -- QUERY AND FRAGMENT ----------------------------------------------------------- // Figure out the query if (urlBase.DetectQueryOrFragment()) { urlBase.FeedQueryAndFragment(pusOut); } if (urlRelative.DetectQueryOrFragment()) { urlRelative.FeedQueryAndFragment(pusOut); } pusOut->CleanAccept(L'\0'); if (pusOut->AnyProblems()) { hr = E_OUTOFMEMORY; } exit: return hr; } HRESULT FormUrlCombineResultW(LPCWSTR pszBase, LPCWSTR pszRelative, LPWSTR pszCombined, LPDWORD pcchCombined, DWORD dwFlags) { if ((dwFlags & URL_ESCAPE_UNSAFE) && (dwFlags & URL_ESCAPE_SPACES_ONLY)) { // In the original parser, ESCAPE_SPACES_ONLY takes precedence over ESCAPE_UNSAFE // Deactivate UNSAFE dwFlags ^= URL_ESCAPE_UNSAFE; } DWORD dwTempFlags = dwFlags; if (dwFlags & URL_UNESCAPE) { if (dwFlags & URL_ESCAPE_UNSAFE) { dwTempFlags ^= URL_ESCAPE_UNSAFE; } if (dwFlags & URL_ESCAPE_SPACES_ONLY) { dwTempFlags ^= URL_ESCAPE_SPACES_ONLY; } } // Make a copy of the relative url if the client wants to either // a. unescape and escape the URL (since roundtripping is not guaranteed), or // b. use the same location for relative URL's buffer for the combined url HRESULT hr; URL curlBase, curlRelative; curlBase.Setup((PWSTR)pszBase); curlRelative.Setup((PWSTR)pszRelative); URL_STRING us(dwTempFlags); hr = BlendUrls(curlBase, curlRelative, &us, dwTempFlags); if (SUCCEEDED(hr)) { DWORD ccBuffer = us.GetTotalLength(); if ((dwFlags & URL_UNESCAPE) && (dwFlags & (URL_ESCAPE_UNSAFE | URL_ESCAPE_SPACES_ONLY))) { // No need to strip out URL_UNESCAPE hr = UrlEscapeW(us.GetStart(), pszCombined, pcchCombined, dwFlags); goto exit; } if (ccBuffer > *pcchCombined) { hr = E_POINTER; } else if (pszCombined) { memcpy(pszCombined, us.GetStart(), ccBuffer*sizeof(WCHAR)); // We return only the number of characters, not buffer size required. ccBuffer--; } *pcchCombined = ccBuffer; } else if (hr==E_FAIL) { // ASSERT(((dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY))); // We fall back on the original parser for those cases we don't handle yet. // We should do this if and only if the new parser // doesn't handle the flags cited above // or we're passed a pluggable protocol without the forcing flag. SHSTRW strwOut; hr = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags); if(SUCCEEDED(hr)) { hr = ReconcileHresults(hr, CopyOutW(&strwOut, pszCombined, pcchCombined)); } } exit: return hr; } HRESULT FormUrlCombineResultA(LPCSTR pszBase, LPCSTR pszRelative, LPSTR pszCombined, LPDWORD pcchCombined, DWORD dwFlags) { if ((dwFlags & URL_ESCAPE_UNSAFE) && (dwFlags & URL_ESCAPE_SPACES_ONLY)) { // In the original parser, ESCAPE_SPACES_ONLY takes precedence over ESCAPE_UNSAFE // Deactivate UNSAFE dwFlags ^= URL_ESCAPE_UNSAFE; } // Make a copy of the relative url if the client wants to either // a. unescape and escape the URL (since roundtripping is not guaranteed), or // b. use the same location for relative URL's buffer for the combined url SHSTRW strwBase; SHSTRW strwRelative; HRESULT hr; if (!(SUCCEEDED(strwBase.SetStr(pszBase)) && SUCCEEDED(strwRelative.SetStr(pszRelative)))) { return E_OUTOFMEMORY; } DWORD dwTempFlags = dwFlags; if (dwFlags & URL_UNESCAPE) { if (dwFlags & URL_ESCAPE_UNSAFE) { dwTempFlags ^= URL_ESCAPE_UNSAFE; } if (dwFlags & URL_ESCAPE_SPACES_ONLY) { dwTempFlags ^= URL_ESCAPE_SPACES_ONLY; } } URL curlBase, curlRelative; curlBase.Setup(strwBase); curlRelative.Setup(strwRelative); URL_STRING us(dwTempFlags); hr = BlendUrls(curlBase, curlRelative, &us, dwTempFlags); if (SUCCEEDED(hr)) { SHSTRA straOut; if ((dwFlags & URL_UNESCAPE) && (dwFlags & (URL_ESCAPE_UNSAFE | URL_ESCAPE_SPACES_ONLY))) { SHSTRW strwTemp; // No need to strip out URL_UNESCAPE hr = SHUrlEscape(us.GetStart(), &strwTemp, dwFlags); hr = ReconcileHresults(hr, straOut.SetStr(strwTemp)); } else { hr = straOut.SetStr(us.GetStart()); } if (SUCCEEDED(hr)) { hr = CopyOutA(&straOut, pszCombined, pcchCombined); } } else if (hr==E_FAIL) { // ASSERT(((dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY))); // We fall back on the original parser for those cases we don't handle yet. // We should do this if and only if the new parser // doesn't handle the flags cited above SHSTRW strwOut; hr = SHUrlParse(strwBase, strwRelative, &strwOut, dwFlags); if (SUCCEEDED(hr)) { SHSTRA straOut; hr = ReconcileHresults(hr, straOut.SetStr(strwOut)); if(SUCCEEDED(hr)) hr = ReconcileHresults(hr, CopyOutA(&straOut, pszCombined, pcchCombined)); } } return hr; } #ifdef PROOFREAD_PARSES EXTERN_C DWORD g_dwProofMode; enum { PP_COMPARE, PP_ORIGINAL_ONLY, PP_NEW_ONLY }; //#define SHOW_MESSAGEBOX VOID LogData(PWSTR pszMsg) { SHSTRA str; str.SetStr(pszMsg); CHAR szFileName[MAX_PATH]; DWORD dwSize = MAX_PATH; CHAR szComputerName[MAX_PATH]; HANDLE hResultsFile = NULL; strcpy(szFileName, "\\\\BANYAN\\IPTD\\AKABIR\\1315\\"); if (!GetComputerNameA(szComputerName, &dwSize)) { goto exit; } lstrcatA(szFileName, szComputerName); hResultsFile = CreateFileA( szFileName, GENERIC_WRITE, FILE_SHARE_WRITE | FILE_SHARE_READ, NULL, OPEN_ALWAYS, 0, NULL); if (INVALID_HANDLE_VALUE == hResultsFile) hResultsFile = NULL; if (hResultsFile) { if (SetFilePointer(hResultsFile, 0, NULL, FILE_END)==0xFFFFFFFF) { goto exit; } DWORD dwFoo; if (0==WriteFile(hResultsFile, (PVOID)(PSTR)str, lstrlenW(pszMsg), &dwFoo, NULL)) { DWORD dwE = GetLastError(); } } exit: if (hResultsFile) { CloseHandle(hResultsFile); } } HRESULT ProofreadParses(HRESULT hr, LPCWSTR pszBase, LPCWSTR pszRelative, LPWSTR pszCombined, PDWORD pcchCombined, DWORD dwFlags, DWORD dwSize ) { static WCHAR szLast[2084]; SHSTRW strwOut; switch(g_dwProofMode) { case PP_COMPARE: { HRESULT hr2 = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags); WCHAR wstr[2084]; DWORD ccLen = min(2084, dwSize), ccUrl = SUCCEEDED(hr) ? *pcchCombined : 0; if(SUCCEEDED(hr2)) { hr2 = CopyOutW(&strwOut, wstr, &ccLen); if (hr2 == E_POINTER && hr == E_POINTER) { goto exitpoint; } // Check if cached combine equals the new parser's result if (!StrCmpW(pszCombined, szLast)) { goto exitpoint; } // Check if cached combine equals the old parser's result if (!StrCmpW(wstr, szLast)) { *pcchCombined = ccLen; StrCpyNW(pszCombined, wstr, ccLen + 1); hr = hr2; goto exitpoint; } if (SUCCEEDED(hr)) { StrCpyNW(szLast, wstr, ccLen); if (!StrCmpW(wstr, pszCombined)) { goto exitpoint; } DWORD dwBogus; if ((dwFlags & URL_ESCAPE_SPACES_ONLY) && !(dwFlags & URL_UNESCAPE)) { PCWSTR psz = FindSchemeW(pszCombined, &dwBogus); DWORD dw; if (psz && (URL_SCHEME_UNKNOWN !=GetSchemeTypeAndFlagsW(psz, dwBogus, &dw)) && (dw & UPF_SCHEME_OPAQUE)) { goto exitpoint; } } // Filter // base: "http://foo/bar/" // rel: "" // old: "http://foo/bar" // new: "http://foo/bar/" if ((*pszRelative==L'\0') && (!StrCmpNW(pszCombined, wstr, ccLen)) && (ccUrl==(ccLen+1)) && (pszCombined[ccLen]==L'/')) { goto exitpoint; } // Filter // base: "http://foo/bar/what?ho" // rel: "" // old: "http://foo/bar/?ho" // new: "http://foo/bar/" if ((*pszRelative==L'\0') && (!StrCmpNW(pszCombined, wstr, ccUrl)) && (wstr[ccUrl]==QUERY)) { goto exitpoint; } // Filter // base: "http://foo/bar/what?ho" // rel: "/" // old: "http://foo" // new: "http://foo/" if ((*pszRelative==L'/') && (!StrCmpNW(pszCombined, wstr, ccLen)) && (ccUrl==(ccLen+1)) && (pszCombined[ccLen]==L'/')) { goto exitpoint; } WCHAR wmsg[8192]; wnsprintfW(wmsg, ARRAYSIZE(wmsg), L"Flags:%#x\nBase:\"%s\"\nRelative:\"%s\"\nOriginal result:\"%s\"\nNew result:\"%s\"\nUse original, not new, result?\n", dwFlags, pszBase, pszRelative, wstr, pszCombined ); #ifdef SHOW_MESSAGEBOX if (IDYES==MessageBoxW( NULL, wmsg, L"CONTACT AKABIR: URLCOMBINE FAILURE", MB_YESNO | MB_ICONERROR | MB_TASKMODAL)) { StrCpyNW(pszCombined, wstr, dwSize); *pcchCombined = ccLen; } else { StrCpyNW(szLast, pszCombined, *pcchCombined); } #endif LogData(wmsg); } else { WCHAR wmsg[8192]; wnsprintfW(wmsg, ARRAYSIZE(wmsg), L"Flags:%#x\nBase:\"%s\"\nRelative:\"%s\"\nFAILED:%#x\nExpected:\"%s\"\n", dwFlags, pszBase, pszRelative, hr, wstr); #ifdef SHOW_MESSAGEBOX MessageBoxW( NULL, wmsg, L"CONTACT AKABIR: URLCOMBINE FAILURE", MB_OK | MB_ICONERROR | MB_TASKMODAL); #endif StrCpyNW(pszCombined, wstr, dwSize); *pcchCombined = ccLen; LogData(wmsg); } hr = hr2; } } break; case PP_NEW_ONLY: break; case PP_ORIGINAL_ONLY: { hr = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags); if(SUCCEEDED(hr)) { hr = CopyOutW(&strwOut, pszCombined, pcchCombined); } } break; } exitpoint: return hr; } #endif //PROOFREAD_PARSES LWSTDAPI UrlCombineW(LPCWSTR pszBase, LPCWSTR pszRelative, LPWSTR pszCombined, LPDWORD pcchCombined, DWORD dwFlags) { HRESULT hr = E_INVALIDARG; if (pszBase && pszRelative && pcchCombined) { RIP(IS_VALID_STRING_PTRW(pszBase, INTERNET_MAX_PATH_LENGTH)); RIP(IS_VALID_STRING_PTRW(pszRelative, INTERNET_MAX_PATH_LENGTH)); RIP(IS_VALID_WRITE_PTR(pcchCombined, DWORD)); RIP((!pszCombined || IS_VALID_WRITE_BUFFER(pszCombined, WCHAR, *pcchCombined))); #ifdef PROOFREAD_PARSES DWORD dwSize = *pcchCombined; #endif hr = FormUrlCombineResultW(pszBase, pszRelative, pszCombined, pcchCombined, dwFlags); #ifdef PROOFREAD_PARSES hr = ProofreadParses(hr, pszBase, pszRelative, pszCombined, pcchCombined, dwFlags, dwSize); #endif } return hr; } LWSTDAPI UrlCombineA(LPCSTR pszBase, LPCSTR pszRelative, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags) { HRESULT hr; if (!pszBase || !pszRelative || !pcchOut) { hr = E_INVALIDARG; } else { RIP(IS_VALID_STRING_PTRA(pszBase, INTERNET_MAX_PATH_LENGTH)); RIP(IS_VALID_STRING_PTRA(pszRelative, INTERNET_MAX_PATH_LENGTH)); RIP(IS_VALID_WRITE_PTR(pcchOut, DWORD)); RIP((!pszOut || IS_VALID_WRITE_BUFFER(pszOut, CHAR, *pcchOut))); hr = FormUrlCombineResultA(pszBase, pszRelative, pszOut, pcchOut, dwFlags); } return hr; } #else // end USE_FAST_PARSER LWSTDAPI UrlCombineW(LPCWSTR pszBase, LPCWSTR pszRelative, LPWSTR pszCombined, LPDWORD pcchCombined, DWORD dwFlags) { HRESULT hr = E_INVALIDARG; RIPMSG(pszBase && IS_VALID_STRING_PTRW(pszBase, -1), "UrlCombineW: Caller passed invalid pszBase"); RIPMSG(pszRelative && IS_VALID_STRING_PTRW(pszRelative, -1), "UrlCombineW: Caller passed invalid pszRelative"); RIPMSG(NULL!=pcchOut, "UrlCombineW: Caller passed invalid pcchOut"); RIPMSG(NULL==pcchOut || (pszOut && IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCombineW: Caller passed invalid pszOut"); #ifdef DEBUG if (pcchOut) { if (pszOut == pszBase || pszOut == pszRelative) DEBUGWhackPathStringW(pszOut, *pcchOut); else DEBUGWhackPathBufferW(pszOut, *pcchOut); } #endif if (pszBase && pszRelative && pcchCombined) { SHSTRW strwOut; hr = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags); if(SUCCEEDED(hr)) { hr = CopyOutW(&strwOut, pszCombined, pcchCombined); } } return hr; } LWSTDAPI UrlCombineA(LPCSTR pszBase, LPCSTR pszRelative, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags) { HRESULT hr; SHSTRA straOut; RIPMSG(pszBase && IS_VALID_STRING_PTRA(pszBase, -1), "UrlCombineA: Caller passed invalid pszBase"); RIPMSG(pszRelative && IS_VALID_STRING_PTRA(pszRelative, -1), "UrlCombineA: Caller passed invalid pszRelative"); RIPMSG(NULL!=pcchOut, "UrlCombineA: Caller passed invalid pcchOut"); RIPMSG(NULL==pcchOut || (pszOut && IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCombineA: Caller passed invalid pszOut"); #ifdef DEBUG if (pcchOut) { if (pszOut == pszBase || pszOut == pszRelative) DEBUGWhackPathStringA(pszOut, *pcchOut); else DEBUGWhackPathBufferA(pszOut, *pcchOut); } #endif if (!pszBase || !pszRelative || !pcchOut) hr = E_INVALIDARG; else { SHSTRW strwOut; SHSTRW strwBase; SHSTRW strwRelative; if(SUCCEEDED(strwBase.SetStr(pszBase)) && SUCCEEDED(strwRelative.SetStr(pszRelative))) hr = SHUrlParse((LPWSTR) strwBase, (LPWSTR)strwRelative, &strwOut, dwFlags); else hr = E_OUTOFMEMORY; if(SUCCEEDED(hr)) hr = straOut.SetStr(strwOut); } if(SUCCEEDED(hr) ) hr = CopyOutA(&straOut, pszOut, pcchOut); return hr; } #endif // !USE_FAST_PARSER // // Combines the desired scheme with the string after the scheme with a : in between. For // some protocols, a // is placed after the colon. // PRIVATE HRESULT ColonSlashSlashW ( LPCWSTR pszScheme, // url protocol (lower-case) LPCWSTR pszAfterScheme, // string to append after the protocol LPWSTR pszTranslatedUrl, // output buffer int cchMax // size of output buffer ) { StrCpyNW(pszTranslatedUrl, pszScheme, cchMax); // Append : after scheme and possibly a // as well. int cchScheme = lstrlenW(pszScheme); if (cchMax > cchScheme + 3) { pszTranslatedUrl[cchScheme] = L':'; // Number of characters to skip over in the buffer (how many non alphanums originally // followed the protocol) int cchSkip = 0; // Number of characters past the protocol: to skip over in the URL (Do we insert slashes?) int cchSlashes = 0; // Modify this conditional to include any other protocols to always follow with :// // Right now, http, https and ftp are automatic if (!StrCmpW(pszScheme, L"http") || !StrCmpW(pszScheme, L"ftp") || !StrCmpW(pszScheme, L"https") ) { // // When preparing to copy the contents of pszAfterScheme into pszUrl, we can // skip over as many as 3 non alpha numeric characters, since we are adding :// // to the protocol directly // while ((cchSkip < 3) && pszAfterScheme[cchSkip] && !IsCharAlphaNumericW(pszAfterScheme[cchSkip])) { cchSkip++; } pszTranslatedUrl[cchScheme+1] = L'/'; pszTranslatedUrl[cchScheme+2] = L'/'; pszTranslatedUrl[cchScheme+3] = L'\0'; cchSlashes = 2; } else // some other protocol { // just skip over colon cchSkip = 1; pszTranslatedUrl[cchScheme+1] = L'\0'; } // Copy the rest of the Url from the UrlBuffer into the Url StrCatBuffW(pszTranslatedUrl, pszAfterScheme + cchSkip, cchMax); } return S_OK; } // // Scans the url for a scheme and if it does not match the known schemes, then // a closest match is found. // LWSTDAPI UrlFixupW ( LPCWSTR pcszUrl, // URL to correct LPWSTR pszTranslatedUrl, // buffer for corrected url (can be same as pcszUrl) DWORD cchMax // size of pszTranslatedUrl ) { HRESULT hr = S_OK; // // Find the scheme // WCHAR szScheme[INTERNET_MAX_SCHEME_LENGTH]; ULONG cchScheme = 0; LPCWSTR pszScheme = FindSchemeW(pcszUrl, &cchScheme, TRUE); if (NULL == pszScheme || cchScheme > (ARRAYSIZE(szScheme)-1)) { // No scheme found return S_FALSE; } for (ULONG cch=0; cch < cchScheme; ++cch, ++pszScheme) { szScheme[cch] = Ascii_ToLowerW(*pszScheme); } szScheme[cch] = L'\0'; LPCWSTR pszAfterScheme = pszScheme; // // If input and output buffers are the same, copy the stuff after the scheme // to another buffer so it doesn't get clobbered when we recombine. // WCHAR szBuf[INTERNET_MAX_PATH_LENGTH]; if (pcszUrl == pszTranslatedUrl) { StrCpyNW(szBuf, pszAfterScheme, ARRAYSIZE(szBuf)); pszAfterScheme = szBuf; } // // See if it matches any of our known schemes // BOOL fKnownScheme = FALSE; for (ULONG i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); ++i) { if (StrCmpW(szScheme, g_mpUrlSchemeTypes[i].pszScheme) == 0) { fKnownScheme = TRUE; break; } } // // If it matches a known scheme, then just fix :// if it's ftp or http // if (fKnownScheme || // Check for pluggable protocols too NO_ERROR == SHGetValueW(HKEY_CLASSES_ROOT, szScheme, L"URL Protocol", NULL, NULL, NULL)) { ColonSlashSlashW(szScheme, pszAfterScheme, pszTranslatedUrl, cchMax); return S_OK; } // // Try to find a good match for the mispelled scheme // // These are weights used in the heuristic for the protocol matching // iFloor is roughly the minimum percentage of characters that must match in // order to make a change const int cFloor = 60; // A match in the first character has the greatest weight const int cCorrectFirstChar = 150; // Any other matched character const int cCorrectChar = 100; // The weight given to a character that only matches the preceding // or subsequent character in the protocol const int cOffByOneChar = 80; // We penalize characters that are off by one, but if we have already // observed the offset and subsequent characters continue the offset, we add this const int cOffsetBonus = 20; // The value of the best "match" found so far. Higher is a better match. int iBestEval = 0; // The protocol that's the best fit for the misspelled one LPCWSTR pszBestMatch = NULL; ULONG cchProt; for (ULONG j = 0; j < ARRAYSIZE(g_mpUrlSchemeTypes); ++j) { // Is this one we don't correct to? // // Note: https is removed from this list. The potential for an intended "http" to // be corrected to "https" is too high, and "http" is far more common. All this // means is that if someone wants to get to an https site, they have to have it right. // if (IsFlagSet(g_mpUrlSchemeTypes[j].dwFlags, UPF_SCHEME_DONTCORRECT)) continue; LPCWSTR pszProtocol = g_mpUrlSchemeTypes[j].pszScheme; cchProt = g_mpUrlSchemeTypes[j].cchScheme; // Evaluation of the fit of the currently tested protocol int iEval = 0; // // Keep track of the positive or negative offset in the protocol // such as "qhttp" instead of "http" or "elnet" instead of "telnet' // int iPosOffset = 0; int iNegOffset = 0; // // The first character has the most weight. "htp" corrects // to "http" and not "ftp" "ftt" corrects to "ftp" // if (*szScheme == *pszProtocol) { iEval += cCorrectFirstChar; } // Check for a negative offset else if(*szScheme == pszProtocol[1]) { iEval += cOffByOneChar; iNegOffset = 1; } // // We go through the characters in the protocol, even to the // terminating null if iPosOffset == 1 (it is never more than 1) // This is so the final "p" in "qhttp" gets a chance to be compared // for (i=1; i < cchProt + iPosOffset; i++) { // No points for null terminations matching if (szScheme[i] == L'\0') break; // // Check for adjacent character match // if (szScheme[i] == pszProtocol[i]) { iEval += cCorrectChar; } else { if (szScheme[i] == pszProtocol[i - 1]) { iEval += cOffByOneChar; if (iPosOffset) iEval += cOffsetBonus; else iPosOffset = 1; } else { if(szScheme[i] == pszProtocol[i + 1]) { iEval += cOffByOneChar; if (iNegOffset) iEval += cOffsetBonus; else iNegOffset = 1; } } } } // Divide the Evaluated value by the MAX(cchScheme, cchProt) iEval = iEval / (cchScheme > cchProt ? cchScheme : cchProt); // A new best match? if (iEval > iBestEval) { iBestEval = iEval; pszBestMatch = pszProtocol; // // If we found an unquestionably good match (only 1 non-firstchar typo), // break out of the loop // if (iEval >= 100) break; } } // If a good enough match was found, then correct url if (iBestEval >= cFloor) { ColonSlashSlashW(pszBestMatch, pszAfterScheme, pszTranslatedUrl,cchMax); } else { hr = S_FALSE; } return hr; } // This is a port of InternetCrackUrl from wininet. // NTRAID:108139 akabir We REALLY NEED TO CLEAN THIS CODE UP. // RAID 109209 // A lot of the stuff is redundant with the other code available, but we // need to be careful not to cause any regressions. Thus, I'm leaving it in for now. // // UrlSchemeList - the list of schemes that we support // typedef struct { LPWSTR SchemeName; DWORD SchemeLength; SHINTERNET_SCHEME SchemeType; BOOL NeedSlashes; } URL_SCHEME_INFO; #define UrlUnescapeInPlaceW(pszUrl, dwFlags) UrlUnescapeW(pszUrl, NULL, NULL, dwFlags | URL_UNESCAPE_INPLACE) // NOTE MEGA REDUNDANCY. We could use the similar table above and check for opaque. However // we'd have to modify that table PRIVATE URL_SCHEME_INFO UrlSchemeList[] = { NULL, 0, SHINTERNET_SCHEME_DEFAULT, FALSE, L"ftp", 3, SHINTERNET_SCHEME_FTP, TRUE, L"gopher", 6, SHINTERNET_SCHEME_GOPHER, TRUE, L"http", 4, SHINTERNET_SCHEME_HTTP, TRUE, L"https", 5, SHINTERNET_SCHEME_HTTPS, TRUE, L"file", 4, SHINTERNET_SCHEME_FILE, TRUE, L"news", 4, SHINTERNET_SCHEME_NEWS, FALSE, L"mailto", 6, SHINTERNET_SCHEME_MAILTO, FALSE, L"socks", 5, SHINTERNET_SCHEME_SOCKS, FALSE, L"javascript", 10, SHINTERNET_SCHEME_JAVASCRIPT, FALSE, L"vbscript", 8, SHINTERNET_SCHEME_VBSCRIPT, FALSE, L"res", 3, SHINTERNET_SCHEME_RES, TRUE }; #define NUMBER_OF_URL_SCHEMES ARRAYSIZE(UrlSchemeList) // swiped from wininet\macros.h #define IsDigit(c) (((c) >= L'0') && ((c) <= L'9')) #define ARGUMENT_PRESENT(ArgumentPointer) (\ (CHAR *)(ArgumentPointer) != (CHAR *)(NULL) ) BOOL ScanSchemes(LPWSTR pszToCheck, DWORD ccStr, PDWORD pwResult) { for (DWORD i=0; i[:] or [:] (i.e. [:] // pString = *Url; pColon = NULL; partLength = 0; *PartOne = pString; *PartOneLength = 0; *PartOneEscape = FALSE; *PartTwoEscape = FALSE; partEscape = PartOneEscape; length = *UrlLength; while ((*pString!=SLASH) && (*pString != L'\0') && (length != 0)) { if (*pString==HEX_ESCAPE) { // if there is a % in the string then it *must* (RFC 1738) be the // start of an escape sequence. This function just reports the // address of the substrings and their lengths; calling functions // must handle the escape sequences (i.e. it is their responsibility // to decide where to put the results) // *partEscape = TRUE; } if (*pString==COLON) { if (pColon != NULL) { // // we don't expect more than 1 ':' // // ISSUE Note that passwords might contain colons, and thus not work in this // case return ERROR_INTERNET_INVALID_URL; } pColon = pString; *PartOneLength = partLength; if (partLength == 0) { *PartOne = NULL; } partLength = 0; partEscape = PartTwoEscape; } else { ++partLength; } ++pString; --length; } // // we either ended on the host (or user) name or the port number (or // password), one of which we don't know the length of // if (pColon == NULL) { *PartOneLength = partLength; *PartTwo = NULL; *PartTwoLength = 0; *PartTwoEscape = FALSE; } else { *PartTwoLength = partLength; *PartTwo = pColon + 1; // // in both the : and : cases, we cannot have // the second part without the first, although both parts being zero // length is OK (host name will be sorted out elsewhere, but (for now, // at least) I am allowing <>:<> for username:password, since I don't // see it expressly disallowed in the RFC. I may be revisiting this code // later...) // // N.B.: ftp://ftp.microsoft.com uses http://:0/-http-gw-internal-/menu.gif // if ((*PartOneLength == 0) && (partLength != 0)) { // return ERROR_INTERNET_INVALID_URL; // } } // // update the URL pointer and length remaining // *Url = pString; *UrlLength = length; return ERROR_SUCCESS; } DWORD GetUrlAddress( IN OUT LPWSTR* lpszUrl, OUT LPDWORD lpdwUrlLength, OUT LPWSTR* lpszUserName OPTIONAL, OUT LPDWORD lpdwUserNameLength OPTIONAL, OUT LPWSTR* lpszPassword OPTIONAL, OUT LPDWORD lpdwPasswordLength OPTIONAL, OUT LPWSTR* lpszHostName OPTIONAL, OUT LPDWORD lpdwHostNameLength OPTIONAL, OUT LPSHINTERNET_PORT lpPort OPTIONAL, OUT LPBOOL pHavePort ) /*++ Routine Description: This function extracts any and all parts of the address information for a generic URL. If any of the address parts contain escaped characters (%nn) then they are converted in situ The generic addressing format (RFC 1738) is: :@: The addressing information cannot contain a password without a user name, or a port without a host name NB: ftp://ftp.microsoft.com uses URL's that have a port without a host name! (e.g. http://:0/-http-gw-internal-/menu.gif) Although only the lpszUrl and lpdwUrlLength fields are required, the address parts will be checked for presence and completeness Assumes: 1. If one of the optional lpsz fields is present (e.g. lpszUserName) then the accompanying lpdw field must also be supplied Arguments: lpszUrl - IN: pointer to the URL to parse OUT: URL remaining after address information N.B. The url-path is NOT canonicalized (unescaped) because it may contain protocol-specific information which must be parsed out by the protocol-specific parser lpdwUrlLength - returned length of the remainder of the URL after the address information lpszUserName - returned pointer to the user name This parameter can be omitted by those protocol parsers that do not require or expect user names in the URL lpdwUserNameLength - returned length of the user name part This parameter can be omitted by those protocol parsers that do not require or expect user names in the URL lpszPassword - returned pointer to the password This parameter can be omitted by those protocol parsers that do not require or expect user passwords in the URL lpdwPasswordLength - returned length of the password This parameter can be omitted by those protocol parsers that do not require or expect user passwords in the URL lpszHostName - returned pointer to the host name This parameter can be omitted by those protocol parsers that do not require the host name info lpdwHostNameLength - returned length of the host name This parameter can be omitted by those protocol parsers that do not require the host name info lpPort - returned value of the port field This parameter can be omitted by those protocol parsers that do not require or expect user port number pHavePort - returned boolean indicating whether a port was specified in the URL or not. This value is not returned if the lpPort parameter is omitted. Return Value: DWORD Success - ERROR_SUCCESS Failure - ERROR_INTERNET_INVALID_URL We could not parse some part of the address info, or we found address info where the protocol parser didn't expect any ERROR_INSUFFICIENT_BUFFER We could not convert an escaped string --*/ { LPWSTR pAt; DWORD urlLength; LPWSTR pUrl; BOOL part1Escape; BOOL part2Escape; WCHAR portNumber[INTERNET_MAX_PORT_NUMBER_LENGTH + 1]; DWORD portNumberLength; LPWSTR pPortNumber; DWORD error; LPWSTR hostName; DWORD hostNameLength; pUrl = *lpszUrl; urlLength = lstrlenW(pUrl); // // check to see if there is an '@' separating user name & password. If we // see a '/' or get to the end of the string before we see the '@' then // there is no username:password part // pAt = NULL; for (DWORD i = 0; i < urlLength; ++i) { if (pUrl[i]==SLASH) { break; } else if (pUrl[i]==AT) { pAt = &pUrl[i]; break; } } if (pAt != NULL) { DWORD addressPartLength; LPWSTR userName; DWORD userNameLength; LPWSTR password; DWORD passwordLength; addressPartLength = (DWORD) (pAt - pUrl); urlLength -= addressPartLength; error = GetUrlAddressInfo(&pUrl, &addressPartLength, &userName, &userNameLength, &part1Escape, &password, &passwordLength, &part2Escape ); if (error != ERROR_SUCCESS) { return error; } // // ensure there is no address information unparsed before the '@' // ASSERT(addressPartLength == 0); ASSERT(pUrl == pAt); if (ARGUMENT_PRESENT(lpszUserName)) { ASSERT(ARGUMENT_PRESENT(lpdwUserNameLength)); // // convert the user name in situ // if (part1Escape) { ASSERT(userName != NULL); ASSERT(userNameLength != 0); error = DecodeUrlInSitu(userName, &userNameLength); if (error != ERROR_SUCCESS) { return error; } } *lpszUserName = userName; *lpdwUserNameLength = userNameLength; } if (ARGUMENT_PRESENT(lpszPassword)) { // convert the password in situ if (part2Escape) { ASSERT(userName != NULL); ASSERT(userNameLength != 0); ASSERT(password != NULL); ASSERT(passwordLength != 0); error = DecodeUrlInSitu(password, &passwordLength); if (error != ERROR_SUCCESS) { return error; } } *lpszPassword = password; *lpdwPasswordLength = passwordLength; } // // the URL pointer now points at the host:port fields (remember that // ExtractAddressParts() must have bumped pUrl up to the end of the // password field (if present) which ends at pAt) // ++pUrl; // // similarly, bump urlLength to account for the '@' // --urlLength; } else { // // no '@' therefore no username or password // if (ARGUMENT_PRESENT(lpszUserName)) { ASSERT(ARGUMENT_PRESENT(lpdwUserNameLength)); *lpszUserName = NULL; *lpdwUserNameLength = 0; } if (ARGUMENT_PRESENT(lpszPassword)) { ASSERT(ARGUMENT_PRESENT(lpdwPasswordLength)); *lpszPassword = NULL; *lpdwPasswordLength = 0; } } // // now get the host name and the optional port // pPortNumber = portNumber; portNumberLength = sizeof(portNumber); error = GetUrlAddressInfo(&pUrl, &urlLength, &hostName, &hostNameLength, &part1Escape, &pPortNumber, &portNumberLength, &part2Escape ); if (error != ERROR_SUCCESS) { return error; } // // the URL address information MUST contain the host name // // if ((hostName == NULL) || (hostNameLength == 0)) { // return ERROR_INTERNET_INVALID_URL; // } if (ARGUMENT_PRESENT(lpszHostName)) { ASSERT(ARGUMENT_PRESENT(lpdwHostNameLength)); // // if the host name contains escaped characters, convert them in situ // if (part1Escape) { error = DecodeUrlInSitu(hostName, &hostNameLength); if (error != ERROR_SUCCESS) { return error; } } *lpszHostName = hostName; *lpdwHostNameLength = hostNameLength; } // // if there is a port field, convert it if there are escaped characters, // check it for valid numeric characters, and convert it to a number // if (ARGUMENT_PRESENT(lpPort)) { if (portNumberLength != 0) { DWORD i; DWORD port; ASSERT(pPortNumber != NULL); if (part2Escape) { error = DecodeUrlInSitu(pPortNumber, &portNumberLength); if (error != ERROR_SUCCESS) { return error; } } // // ensure all characters in the port number buffer are numeric, and // calculate the port number at the same time // for (i = 0, port = 0; i < portNumberLength; ++i) { if (!IsDigit(*pPortNumber)) { return ERROR_INTERNET_INVALID_URL; } port = port * 10 + (int)(*pPortNumber++ - L'0'); // We won't allow ports larger than 65535 ((2^16)-1) // We have to check this every time to make sure that someone // doesn't try to overflow a DWORD. if (port > 65535) { return ERROR_INTERNET_INVALID_URL; } } *lpPort = (SHINTERNET_PORT)port; if (ARGUMENT_PRESENT(pHavePort)) { *pHavePort = TRUE; } } else { *lpPort = INTERNET_INVALID_PORT_NUMBER; if (ARGUMENT_PRESENT(pHavePort)) { *pHavePort = FALSE; } } } // // update the URL pointer and the length of the url-path // *lpszUrl = pUrl; *lpdwUrlLength = urlLength; return ERROR_SUCCESS; } DWORD CrackUrl( IN OUT LPWSTR lpszUrl, IN DWORD dwUrlLength, IN BOOL bEscape, OUT LPSHINTERNET_SCHEME lpSchemeType OPTIONAL, OUT LPWSTR* lpszSchemeName OPTIONAL, OUT LPDWORD lpdwSchemeNameLength OPTIONAL, OUT LPWSTR* lpszHostName OPTIONAL, OUT LPDWORD lpdwHostNameLength OPTIONAL, OUT LPSHINTERNET_PORT lpServerPort OPTIONAL, OUT LPWSTR* lpszUserName OPTIONAL, OUT LPDWORD lpdwUserNameLength OPTIONAL, OUT LPWSTR* lpszPassword OPTIONAL, OUT LPDWORD lpdwPasswordLength OPTIONAL, OUT LPWSTR* lpszUrlPath OPTIONAL, OUT LPDWORD lpdwUrlPathLength OPTIONAL, OUT LPWSTR* lpszExtraInfo OPTIONAL, OUT LPDWORD lpdwExtraInfoLength OPTIONAL, OUT LPBOOL pHavePort ) /*++ Routine Description: Cracks an URL into its constituent parts Assumes: 1. If one of the optional lpsz fields is present (e.g. lpszUserName) then the accompanying lpdw field must also be supplied Arguments: lpszUrl - pointer to URL to crack. This buffer WILL BE OVERWRITTEN if it contains escape sequences that we will convert back to ANSI characters dwUrlLength - if not 0, string length of lpszUrl bEscape - TRUE if we are to escape the url-path lpSchemeType - returned scheme type - e.g. INTERNET_SCHEME_HTTP lpszSchemeName - returned scheme name lpdwSchemeNameLength - length of scheme name lpszHostName - returned host name lpdwHostNameLength - length of host name buffer lpServerPort - returned server port if present in the URL, else 0 lpszUserName - returned user name if present lpdwUserNameLength - length of user name buffer lpszPassword - returned password if present lpdwPasswordLength - length of password buffer lpszUrlPath - returned, canonicalized URL path lpdwUrlPathLength - length of url-path buffer lpszExtraInfo - returned search string or intra-page link if present lpdwExtraInfoLength - length of extra info buffer pHavePort - returned boolean indicating whether port was specified Return Value: DWORD Success - ERROR_SUCCESS Failure - ERROR_INTERNET_UNRECOGNIZED_SCHEME --*/ { DWORD error; DWORD schemeLength; SHINTERNET_SCHEME schemeType; // // if dwUrlLength is 0 then lpszUrl is ASCIIZ. Find its length // if (dwUrlLength == 0) { dwUrlLength = lstrlenW(lpszUrl); } // // get parser based on the protocol name // for (schemeLength = 0; lpszUrl[schemeLength]!=COLON; ++schemeLength) { if ((dwUrlLength == 0) || (lpszUrl[schemeLength] == '\0')) { // // no ':' in URL? Bogus (dude) // error = ERROR_INTERNET_UNRECOGNIZED_SCHEME; goto quit; } --dwUrlLength; } DWORD i; int skip; BOOL isGeneric; BOOL needSlashes; BOOL haveSlashes; isGeneric = FALSE; needSlashes = FALSE; haveSlashes = FALSE; schemeType = SHINTERNET_SCHEME_UNKNOWN; if (ScanSchemes(lpszUrl, schemeLength, &i)) { schemeType = UrlSchemeList[i].SchemeType; needSlashes = UrlSchemeList[i].NeedSlashes; } skip = 1; // skip ':' if ((dwUrlLength > 3) && (StrCmpNIW(&lpszUrl[schemeLength], L"://", 3) == 0)) { skip = 3; // skip "://" haveSlashes = TRUE; } if (schemeType == SHINTERNET_SCHEME_FILE) isGeneric = TRUE; if (schemeType == SHINTERNET_SCHEME_NEWS || schemeType == SHINTERNET_SCHEME_UNKNOWN) { // // urls can be hierarchical or opaque. if the slashes // exist, then we should assume hierarchical // when we dont know the scheme or it is news:. // otherwise it is opaque (isGeneric) // needSlashes = haveSlashes; isGeneric = !haveSlashes; } // // If we don't have slashes, make sure we don't need them. // If we have slashes, make sure they are required. // if ((!haveSlashes && !needSlashes) || (haveSlashes && needSlashes)) { if (ARGUMENT_PRESENT(lpSchemeType)) { *lpSchemeType = schemeType; } if (ARGUMENT_PRESENT(lpszSchemeName)) { *lpszSchemeName = lpszUrl; *lpdwSchemeNameLength = schemeLength; } lpszUrl += schemeLength + skip; dwUrlLength -= skip; if (SHINTERNET_SCHEME_RES == schemeType) { if (ARGUMENT_PRESENT(lpszUserName)) { *lpszUserName = NULL; *lpdwUserNameLength = 0; } if (ARGUMENT_PRESENT(lpszPassword)) { *lpszPassword = NULL; *lpdwPasswordLength = 0; } if (ARGUMENT_PRESENT(lpServerPort)) { *lpServerPort = 0; } PWSTR psz = lpszUrl; while (*lpszUrl && *lpszUrl!=SLASH) lpszUrl++; if (ARGUMENT_PRESENT(lpszHostName)) { *lpszHostName = psz; *lpdwHostNameLength = (DWORD)(lpszUrl - psz); dwUrlLength -= *lpdwHostNameLength; error = DecodeUrlInSitu(*lpszHostName, lpdwHostNameLength); } } else if (isGeneric) { if (ARGUMENT_PRESENT(lpszUserName)) { *lpszUserName = NULL; *lpdwUserNameLength = 0; } if (ARGUMENT_PRESENT(lpszPassword)) { *lpszPassword = NULL; *lpdwPasswordLength = 0; } if (ARGUMENT_PRESENT(lpszHostName)) { *lpszHostName = NULL; *lpdwHostNameLength = 0; } if (ARGUMENT_PRESENT(lpServerPort)) { *lpServerPort = 0; } error = ERROR_SUCCESS; } else { error = GetUrlAddress(&lpszUrl, &dwUrlLength, lpszUserName, lpdwUserNameLength, lpszPassword, lpdwPasswordLength, lpszHostName, lpdwHostNameLength, lpServerPort, pHavePort ); } if (bEscape && (error == ERROR_SUCCESS)) { error = DecodeUrlInSitu(lpszUrl, &dwUrlLength); } if ((error == ERROR_SUCCESS) && ARGUMENT_PRESENT(lpszExtraInfo)) { *lpdwExtraInfoLength = 0; for (i = 0; i < (int)dwUrlLength; i++) { if (lpszUrl[i] == '?' || lpszUrl[i] == '#') { *lpszExtraInfo = &lpszUrl[i]; *lpdwExtraInfoLength = dwUrlLength - i; dwUrlLength -= *lpdwExtraInfoLength; } } } if ((error == ERROR_SUCCESS) && ARGUMENT_PRESENT(lpszUrlPath)) { *lpszUrlPath = lpszUrl; *lpdwUrlPathLength = dwUrlLength; } } else { error = ERROR_INTERNET_UNRECOGNIZED_SCHEME; } quit: return error; } BOOL WINAPI UrlCrackW( IN LPCWSTR lpszUrl, IN DWORD dwUrlLength, IN DWORD dwFlags, IN LPSHURL_COMPONENTSW lpUrlComponents ) /*++ Routine Description: Cracks an URL into its constituent parts. Optionally escapes the url-path. We assume that the user has supplied large enough buffers for the various URL parts Arguments: lpszUrl - pointer to URL to crack dwUrlLength - 0 if lpszUrl is ASCIIZ string, else length of lpszUrl dwFlags - flags controlling operation lpUrlComponents - pointer to URL_COMPONENTS Return Value: BOOL Success - TRUE Failure - FALSE. Call GetLastError() for more info --*/ { DWORD error = ERROR_SUCCESS; // validate parameters if (ARGUMENT_PRESENT(lpszUrl)) { if (!dwUrlLength) { error = ProbeStringW((LPWSTR)lpszUrl, &dwUrlLength); } else if (IsBadReadPtr((LPVOID)lpszUrl, dwUrlLength*sizeof(WCHAR))) { error = ERROR_INVALID_PARAMETER; } } else { error = ERROR_INVALID_PARAMETER; } if (error != ERROR_SUCCESS) { goto quit; } if (IsBadWritePtr(lpUrlComponents, sizeof(*lpUrlComponents)) || (lpUrlComponents->dwStructSize != sizeof(*lpUrlComponents))) { error = ERROR_INVALID_PARAMETER; goto quit; } // // we only allow two flags for this API // if (dwFlags & ~(ICU_ESCAPE | ICU_DECODE)) { error = ERROR_INVALID_PARAMETER; goto quit; } // // get the individual components to return. If they reference a buffer then // check it for writeability // LPWSTR lpUrl; LPWSTR urlCopy; SHINTERNET_SCHEME schemeType; LPWSTR schemeName; DWORD schemeNameLength; LPWSTR hostName; DWORD hostNameLength; SHINTERNET_PORT nPort; LPWSTR userName; DWORD userNameLength; LPWSTR password; DWORD passwordLength; LPWSTR urlPath; DWORD urlPathLength; LPWSTR extraInfo; DWORD extraInfoLength; BOOL copyComponent; BOOL havePort; copyComponent = FALSE; schemeName = lpUrlComponents->lpszScheme; schemeNameLength = lpUrlComponents->dwSchemeLength; if ((schemeName != NULL) && (schemeNameLength != 0)) { error = ProbeWriteStringBufferW((LPVOID)schemeName, schemeNameLength); if (error != ERROR_SUCCESS) { goto quit; } *schemeName = '\0'; copyComponent = TRUE; } hostName = lpUrlComponents->lpszHostName; hostNameLength = lpUrlComponents->dwHostNameLength; if ((hostName != NULL) && (hostNameLength != 0)) { error = ProbeWriteStringBufferW((LPVOID)hostName, hostNameLength); if (error != ERROR_SUCCESS) { goto quit; } *hostName = '\0'; copyComponent = TRUE; } userName = lpUrlComponents->lpszUserName; userNameLength = lpUrlComponents->dwUserNameLength; if ((userName != NULL) && (userNameLength != 0)) { error = ProbeWriteStringBufferW((LPVOID)userName, userNameLength); if (error != ERROR_SUCCESS) { goto quit; } *userName = '\0'; copyComponent = TRUE; } password = lpUrlComponents->lpszPassword; passwordLength = lpUrlComponents->dwPasswordLength; if ((password != NULL) && (passwordLength != 0)) { error = ProbeWriteStringBufferW((LPVOID)password, passwordLength); if (error != ERROR_SUCCESS) { goto quit; } *password = '\0'; copyComponent = TRUE; } urlPath = lpUrlComponents->lpszUrlPath; urlPathLength = lpUrlComponents->dwUrlPathLength; if ((urlPath != NULL) && (urlPathLength != 0)) { error = ProbeWriteStringBufferW((LPVOID)urlPath, urlPathLength); if (error != ERROR_SUCCESS) { goto quit; } *urlPath = '\0'; copyComponent = TRUE; } extraInfo = lpUrlComponents->lpszExtraInfo; extraInfoLength = lpUrlComponents->dwExtraInfoLength; if ((extraInfo != NULL) && (extraInfoLength != 0)) { error = ProbeWriteStringBufferW((LPVOID)extraInfo, extraInfoLength); if (error != ERROR_SUCCESS) { goto quit; } *extraInfo = '\0'; copyComponent = TRUE; } // // we can only escape or decode the URL if the caller has provided us with // buffers to write the escaped strings into // if (dwFlags & (ICU_ESCAPE | ICU_DECODE)) { if (!copyComponent) { error = ERROR_INVALID_PARAMETER; goto quit; } // // create a copy of the URL. CrackUrl() will modify this in situ. We // need to copy the results back to the user's buffer(s) // DWORD dw = dwUrlLength; if (!dw) { dw = lstrlenW(lpszUrl); } urlCopy = new WCHAR[dw+1]; if (urlCopy == NULL) { error = ERROR_NOT_ENOUGH_MEMORY; goto quit; } memcpy(urlCopy, lpszUrl, (dw+1)*sizeof(WCHAR)); lpUrl = urlCopy; } else { lpUrl = (LPWSTR)lpszUrl; urlCopy = NULL; } // // crack the URL into its constituent parts // error = CrackUrl(lpUrl, dwUrlLength, (dwFlags & ICU_ESCAPE) ? TRUE : FALSE, &schemeType, &schemeName, &schemeNameLength, &hostName, &hostNameLength, &nPort, &userName, &userNameLength, &password, &passwordLength, &urlPath, &urlPathLength, extraInfoLength ? &extraInfo : NULL, extraInfoLength ? &extraInfoLength : 0, &havePort ); if (error != ERROR_SUCCESS) { goto crack_error; } BOOL copyFailure; copyFailure = FALSE; // // update the URL_COMPONENTS structure based on the results, and what was // asked for // if (lpUrlComponents->lpszScheme != NULL) { if (lpUrlComponents->dwSchemeLength > schemeNameLength) { memcpy(lpUrlComponents->lpszScheme, schemeName, schemeNameLength*sizeof(WCHAR)); lpUrlComponents->lpszScheme[schemeNameLength] = '\0'; if (dwFlags & ICU_DECODE) { UrlUnescapeInPlaceW(lpUrlComponents->lpszScheme, 0); } } else { ++schemeNameLength; copyFailure = TRUE; } lpUrlComponents->dwSchemeLength = schemeNameLength; } else if (lpUrlComponents->dwSchemeLength != 0) { lpUrlComponents->lpszScheme = schemeName; lpUrlComponents->dwSchemeLength = schemeNameLength; } if (lpUrlComponents->lpszHostName != NULL) { if (lpUrlComponents->dwHostNameLength > hostNameLength) { memcpy(lpUrlComponents->lpszHostName, hostName, hostNameLength*sizeof(WCHAR)); lpUrlComponents->lpszHostName[hostNameLength] = '\0'; if (dwFlags & ICU_DECODE) { UrlUnescapeInPlaceW(lpUrlComponents->lpszHostName, 0); } } else { ++hostNameLength; copyFailure = TRUE; } lpUrlComponents->dwHostNameLength = hostNameLength; } else if (lpUrlComponents->dwHostNameLength != 0) { lpUrlComponents->lpszHostName = hostName; lpUrlComponents->dwHostNameLength = hostNameLength; } if (lpUrlComponents->lpszUserName != NULL) { if (lpUrlComponents->dwUserNameLength > userNameLength) { memcpy(lpUrlComponents->lpszUserName, userName, userNameLength*sizeof(WCHAR)); lpUrlComponents->lpszUserName[userNameLength] = '\0'; if (dwFlags & ICU_DECODE) { UrlUnescapeInPlaceW(lpUrlComponents->lpszUserName, 0); } } else { ++userNameLength; copyFailure = TRUE; } lpUrlComponents->dwUserNameLength = userNameLength; } else if (lpUrlComponents->dwUserNameLength != 0) { lpUrlComponents->lpszUserName = userName; lpUrlComponents->dwUserNameLength = userNameLength; } if (lpUrlComponents->lpszPassword != NULL) { if (lpUrlComponents->dwPasswordLength > passwordLength) { memcpy(lpUrlComponents->lpszPassword, password, passwordLength*sizeof(WCHAR)); lpUrlComponents->lpszPassword[passwordLength] = '\0'; if (dwFlags & ICU_DECODE) { UrlUnescapeInPlaceW(lpUrlComponents->lpszPassword, 0); } } else { ++passwordLength; copyFailure = TRUE; } lpUrlComponents->dwPasswordLength = passwordLength; } else if (lpUrlComponents->dwPasswordLength != 0) { lpUrlComponents->lpszPassword = password; lpUrlComponents->dwPasswordLength = passwordLength; } if (lpUrlComponents->lpszUrlPath != NULL) { if(schemeType == SHINTERNET_SCHEME_FILE) { // // for file: urls we return the path component // as a valid dos path. // copyFailure = FAILED(PathCreateFromUrlW(lpUrl, lpUrlComponents->lpszUrlPath, &(lpUrlComponents->dwUrlPathLength), 0)); } else if (lpUrlComponents->dwUrlPathLength > urlPathLength) { memcpy(lpUrlComponents->lpszUrlPath, urlPath, urlPathLength*sizeof(WCHAR)); lpUrlComponents->lpszUrlPath[urlPathLength] = '\0'; if (dwFlags & ICU_DECODE) { UrlUnescapeInPlaceW(lpUrlComponents->lpszUrlPath, 0); } lpUrlComponents->dwUrlPathLength = urlPathLength; } else { ++urlPathLength; copyFailure = TRUE; lpUrlComponents->dwUrlPathLength = urlPathLength; } } else if (lpUrlComponents->dwUrlPathLength != 0) { lpUrlComponents->lpszUrlPath = urlPath; lpUrlComponents->dwUrlPathLength = urlPathLength; } if (lpUrlComponents->lpszExtraInfo != NULL) { if (lpUrlComponents->dwExtraInfoLength > extraInfoLength) { memcpy(lpUrlComponents->lpszExtraInfo, extraInfo, extraInfoLength*sizeof(WCHAR)); lpUrlComponents->lpszExtraInfo[extraInfoLength] = '\0'; if (dwFlags & ICU_DECODE) { UrlUnescapeInPlaceW(lpUrlComponents->lpszExtraInfo, 0); } } else { ++extraInfoLength; copyFailure = TRUE; } lpUrlComponents->dwExtraInfoLength = extraInfoLength; } else if (lpUrlComponents->dwExtraInfoLength != 0) { lpUrlComponents->lpszExtraInfo = extraInfo; lpUrlComponents->dwExtraInfoLength = extraInfoLength; } // // we may have failed to copy one or more components because we didn't have // enough buffer space. // // N.B. Don't change error below here. If need be, move this test lower // if (copyFailure) { error = ERROR_INSUFFICIENT_BUFFER; } // // copy the scheme type // lpUrlComponents->nScheme = schemeType; // // convert 0 port (not in URL) to default value for scheme // if (nPort == INTERNET_INVALID_PORT_NUMBER && !havePort) { switch (schemeType) { case SHINTERNET_SCHEME_FTP: nPort = INTERNET_DEFAULT_FTP_PORT; break; case SHINTERNET_SCHEME_GOPHER: nPort = INTERNET_DEFAULT_GOPHER_PORT; break; case SHINTERNET_SCHEME_HTTP: nPort = INTERNET_DEFAULT_HTTP_PORT; break; case SHINTERNET_SCHEME_HTTPS: nPort = INTERNET_DEFAULT_HTTPS_PORT; break; } } lpUrlComponents->nPort = nPort; crack_error: if (urlCopy != NULL) { delete [] urlCopy; } quit: // return HRESULT_FROM_WIN32(error); if (error!=ERROR_SUCCESS) { SetLastError(error); } return error==ERROR_SUCCESS; }