You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1230 lines
33 KiB
1230 lines
33 KiB
//+---------------------------------------------------------------------------
|
|
//
|
|
// Microsoft Windows
|
|
// Copyright (C) Microsoft Corporation, 1992 - 1995.
|
|
//
|
|
// File: curl.cxx
|
|
//
|
|
// Contents: handle url parsing and context urls parsing
|
|
//
|
|
// Classes:
|
|
//
|
|
// Functions:
|
|
//
|
|
// History: 2-20-96 JohannP (Johann Posch) Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
#include <iapp.h>
|
|
#include <shlwapi.h>
|
|
#include <shlwapip.h>
|
|
|
|
PerfDbgTag(tagCUrl, "Urlmon", "Log CUrl", DEB_PROT);
|
|
PerfDbgTag(tagCUrlApi, "Urlmon", "Log CUrl API", DEB_ASYNCAPIS);
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Function: IsStreamEnabled
|
|
//
|
|
// Synopsis: returns TRUE iff the registry key for the mk: protocol is set
|
|
//
|
|
// Arguments: (none)
|
|
//
|
|
// Returns:
|
|
//
|
|
// History: 6-7-96 craigc Created
|
|
//
|
|
// Notes:
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
extern BOOL g_bGlobalUTF8hackEnabled;
|
|
BOOL StringContainsHighAnsiW(LPCWSTR);
|
|
|
|
BOOL IsStreamEnabled()
|
|
{
|
|
DEBUG_ENTER((DBG_APP,
|
|
Bool,
|
|
"IsStreamEnabled",
|
|
NULL
|
|
));
|
|
|
|
HKEY hk;
|
|
char szBuf[256];
|
|
DWORD dwType;
|
|
DWORD dwSize;
|
|
const char szKey[] = "SOFTWARE\\Microsoft\\Internet Explorer";
|
|
const char szValue[] = "MkEnabled";
|
|
const char szYes[] = "yes";
|
|
|
|
static BOOL fChecked = FALSE;
|
|
static BOOL fEnabled = FALSE;
|
|
|
|
CMutexSem mxs;
|
|
CLock lck(mxs);
|
|
|
|
if (fChecked)
|
|
{
|
|
DEBUG_LEAVE(fEnabled);
|
|
return fEnabled;
|
|
}
|
|
|
|
if (RegOpenKey(HKEY_LOCAL_MACHINE, szKey, &hk) != ERROR_SUCCESS)
|
|
{
|
|
DEBUG_LEAVE(FALSE);
|
|
return( FALSE );
|
|
}
|
|
|
|
dwSize = sizeof(szBuf);
|
|
if (RegQueryValueEx( hk, szValue, NULL, &dwType, (BYTE*)szBuf, &dwSize ) != ERROR_SUCCESS)
|
|
{
|
|
RegCloseKey( hk );
|
|
|
|
DEBUG_LEAVE(FALSE);
|
|
return( FALSE );
|
|
}
|
|
RegCloseKey( hk );
|
|
|
|
fEnabled = (dwSize && (lstrcmpi( szYes, szBuf ) == 0));
|
|
fChecked = TRUE;
|
|
|
|
DEBUG_LEAVE(fEnabled);
|
|
return fEnabled;
|
|
}
|
|
|
|
//
|
|
// GetUrlScheme() returns one of the URL_SCHEME_* constants as
|
|
// defined in shlwapip.h
|
|
// example "http://foo" returns URL_SCHEME_HTTP
|
|
//
|
|
DWORD GetUrlScheme(IN LPCTSTR pcszUrl)
|
|
{
|
|
DEBUG_ENTER((DBG_APP,
|
|
Dword,
|
|
"GetUrlScheme",
|
|
"%#x",
|
|
pcszUrl
|
|
));
|
|
|
|
if(pcszUrl)
|
|
{
|
|
PARSEDURL pu;
|
|
pu.cbSize = sizeof(pu);
|
|
if(SUCCEEDED(ParseURL(pcszUrl, &pu)))
|
|
{
|
|
DEBUG_LEAVE(pu.nScheme);
|
|
return pu.nScheme;
|
|
}
|
|
}
|
|
|
|
DEBUG_LEAVE(URL_SCHEME_INVALID);
|
|
return URL_SCHEME_INVALID;
|
|
}
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Method: CUrl::CUrl
|
|
//
|
|
// Synopsis:
|
|
//
|
|
// Arguments: (none)
|
|
//
|
|
// Returns:
|
|
//
|
|
// History: 2-20-96 JohannP (Johann Posch) Created
|
|
//
|
|
// Notes:
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
CUrl::CUrl()
|
|
{
|
|
DEBUG_ENTER((DBG_APP,
|
|
None,
|
|
"CUrl::CUrl",
|
|
"this=%#x",
|
|
this
|
|
));
|
|
|
|
PerfDbgLog(tagCUrl, this, "+CUrl::CUrl");
|
|
|
|
_pszBaseURL = NULL;
|
|
_pszPartURL = NULL;
|
|
_pszFullURL = NULL;
|
|
_pszProtocol = NULL;
|
|
_pszServerName = NULL;
|
|
_pszUserName = NULL;
|
|
_pszPassword = NULL;
|
|
_pszObject = NULL;
|
|
_pBasicAllocUnit = NULL;
|
|
|
|
_ipPort = 0;
|
|
_dwProto = 0;
|
|
|
|
_fUTF8hack = FALSE;
|
|
_pszUTF8ServerName = NULL;
|
|
_dwServerCodePage = 0;
|
|
|
|
PerfDbgLog(tagCUrl, this, "-CUrl::CUrl");
|
|
|
|
DEBUG_LEAVE(0);
|
|
}
|
|
|
|
BOOL
|
|
CUrl::CUrlInitBasic(DWORD dwBaseUrlSize)
|
|
{
|
|
DEBUG_ENTER((DBG_APP,
|
|
Bool,
|
|
"CUrl::CUrlInitBasic",
|
|
"this=%#x, %#x",
|
|
this, dwBaseUrlSize
|
|
));
|
|
|
|
//
|
|
// basic allocation contains the following fields:
|
|
//
|
|
// _pszServerName - URL_FIELD_SIZE
|
|
// _pszUserName - URL_FIELD_SIZE
|
|
// _pszPassword - URL_FIELD_SIZE
|
|
// _pszProtocol - 12
|
|
// _pszBaseURL - dwBaseUrlSize + 1
|
|
//
|
|
BOOL fRet = FALSE;
|
|
DWORD dwBasicUnitSize = (3 * (URL_FIELD_SIZE)) + 12 + dwBaseUrlSize + 1;
|
|
_pBasicAllocUnit = new char[dwBasicUnitSize];
|
|
|
|
if( _pBasicAllocUnit )
|
|
{
|
|
_pszServerName = _pBasicAllocUnit;
|
|
_pszUserName = _pszServerName + URL_FIELD_SIZE;
|
|
_pszPassword = _pszUserName + URL_FIELD_SIZE;
|
|
_pszProtocol = _pszPassword + URL_FIELD_SIZE;
|
|
_pszBaseURL = _pszProtocol + 12;
|
|
|
|
*_pszServerName = '\0';
|
|
*_pszUserName = '\0';
|
|
*_pszPassword = '\0';
|
|
*_pszProtocol = '\0';
|
|
*_pszBaseURL = '\0';
|
|
|
|
fRet = TRUE;
|
|
}
|
|
|
|
DEBUG_LEAVE(fRet);
|
|
return fRet;
|
|
}
|
|
|
|
BOOL
|
|
CUrl::CUrlInitAll()
|
|
{
|
|
DEBUG_ENTER((DBG_APP,
|
|
Bool,
|
|
"CUrl::CUrlInitAll",
|
|
"this=%#x",
|
|
this
|
|
));
|
|
|
|
// backword compatibility init all with URL_MAX_LENGTH
|
|
BOOL fRet = FALSE;
|
|
fRet = CUrlInitBasic(MAX_URL_SIZE);
|
|
|
|
if( fRet )
|
|
{
|
|
_pszPartURL = new char[MAX_URL_SIZE + 1];
|
|
_pszFullURL = new char[MAX_URL_SIZE + 1];
|
|
_pszObject = new char[MAX_URL_SIZE + 1];
|
|
|
|
if( !_pszPartURL || !_pszFullURL || !_pszObject )
|
|
{
|
|
|
|
if (_pszPartURL)
|
|
delete _pszPartURL;
|
|
|
|
if (_pszFullURL)
|
|
delete _pszFullURL;
|
|
|
|
if (_pszObject)
|
|
delete _pszObject;
|
|
|
|
_pszPartURL = NULL;
|
|
_pszFullURL = NULL;
|
|
_pszObject = NULL;
|
|
|
|
fRet = FALSE;
|
|
}
|
|
else
|
|
{
|
|
*_pszPartURL = '\0';
|
|
*_pszFullURL = '\0';
|
|
*_pszObject = '\0';
|
|
}
|
|
}
|
|
|
|
DEBUG_LEAVE(fRet);
|
|
return fRet;
|
|
}
|
|
|
|
CUrl::~CUrl()
|
|
{
|
|
DEBUG_ENTER((DBG_APP,
|
|
None,
|
|
"CUrl::~CUrl",
|
|
"this=%#x",
|
|
this
|
|
));
|
|
|
|
if( _pBasicAllocUnit )
|
|
{
|
|
delete [] _pBasicAllocUnit;
|
|
}
|
|
if( _pszPartURL )
|
|
{
|
|
delete [] _pszPartURL;
|
|
}
|
|
if( _pszFullURL )
|
|
{
|
|
delete [] _pszFullURL;
|
|
}
|
|
if( _pszObject )
|
|
{
|
|
delete [] _pszObject;
|
|
}
|
|
if( _pszUTF8ServerName )
|
|
{
|
|
delete [] _pszUTF8ServerName;
|
|
}
|
|
|
|
DEBUG_LEAVE(0);
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Function: ParseUrl
|
|
//
|
|
// Synopsis: Breaks down a URL and puts servername, objectname and port
|
|
// into the download structure.
|
|
//
|
|
// Arguments:
|
|
//
|
|
//
|
|
// Returns: TRUE if the URL was successfully parsed.
|
|
//
|
|
// History: Created Unknown
|
|
// 02-20-95 JohannP (Johann Posch) Created Class
|
|
// 03-20-95 JoeS (Joe Souza) Special FILE: syntaxes
|
|
//
|
|
// Notes: URL should have already been parsed earlier by ConstructURL.
|
|
// This function will crack the URL.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
BOOL CUrl::ParseUrl(BOOL fUTF8Required, LPCWSTR pwzUrl, DWORD dwCodePage)
|
|
{
|
|
DEBUG_ENTER((DBG_APP,
|
|
Bool,
|
|
"CUrl::ParseUrl",
|
|
"this=%#x, %B",
|
|
this, fUTF8Required
|
|
));
|
|
|
|
PerfDbgLog1(tagCUrl, this, "+CUrl::ParseUrl Base:[%s]", _pszBaseURL);
|
|
BOOL fRet = TRUE;
|
|
URL_COMPONENTS url;
|
|
DWORD cchFullURL;
|
|
LPSTR szTemp;
|
|
CHAR * pch;
|
|
DWORD dwFullUrlLen;
|
|
|
|
_fUTF8hack = FALSE; //possibly set true later in the function for non-redirect codepath.
|
|
|
|
if (_pszPartURL && _pszPartURL[0] != '\0' ) // This string will be set for redirects.
|
|
{
|
|
//
|
|
// we need re-alloc _pszFullURL and _pszObject, since
|
|
// the the size can grow!
|
|
//
|
|
dwFullUrlLen = strlen(_pszBaseURL) + strlen(_pszPartURL) + 1;
|
|
if(dwFullUrlLen > MAX_URL_SIZE)
|
|
{
|
|
dwFullUrlLen = MAX_URL_SIZE + 1;
|
|
}
|
|
|
|
if( _pszFullURL )
|
|
{
|
|
delete [] _pszFullURL;
|
|
_pszFullURL = NULL;
|
|
_pszFullURL = new char[dwFullUrlLen];
|
|
}
|
|
|
|
if( _pszObject )
|
|
{
|
|
delete [] _pszObject;
|
|
_pszObject = NULL;
|
|
_pszObject = new char[dwFullUrlLen];
|
|
}
|
|
|
|
if( !_pszFullURL || !_pszObject )
|
|
{
|
|
fRet = FALSE;
|
|
goto Exit;
|
|
}
|
|
|
|
cchFullURL = dwFullUrlLen;
|
|
|
|
if(FAILED(UrlCombine(_pszBaseURL, _pszPartURL, _pszFullURL, &cchFullURL, URL_FILE_USE_PATHURL)))
|
|
{
|
|
fRet = FALSE;
|
|
PProtAssert(FALSE && "Combine failed in ParseUrl!\n");
|
|
|
|
goto Exit;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// FullURL is BaseURL
|
|
dwFullUrlLen = strlen(_pszBaseURL) + 1;
|
|
if( !_pszFullURL )
|
|
{
|
|
_pszFullURL = new char[dwFullUrlLen];
|
|
}
|
|
|
|
if( !_pszObject )
|
|
{
|
|
_pszObject = new char[dwFullUrlLen];
|
|
}
|
|
|
|
if( !_pszFullURL || !_pszObject )
|
|
{
|
|
fRet = FALSE;
|
|
goto Exit;
|
|
}
|
|
|
|
lstrcpy(_pszFullURL, _pszBaseURL);
|
|
}
|
|
|
|
// Trim off intra-page link.
|
|
//
|
|
// NB: Don't use ExtraInfo below to do this because you will
|
|
// also lose search string this way.
|
|
//
|
|
// Also, we need to do this before we decode the URL below,
|
|
// so that we don't trim off the wrong '#' if there was one
|
|
// encoded in the URL path.
|
|
//
|
|
// UrlGetLocation() will intelligently find the fragment
|
|
// some schemes do not use the # as a fragment identifier.
|
|
// it returns a pointer to the #
|
|
//
|
|
|
|
if(pch = (CHAR *)UrlGetLocation(_pszFullURL))
|
|
{
|
|
*pch = TEXT('\0');
|
|
}
|
|
|
|
|
|
_dwProto = ProtoFromString(_pszFullURL);
|
|
|
|
if (_dwProto == DLD_PROTOCOL_NONE)
|
|
{
|
|
fRet = FALSE;
|
|
goto Exit;
|
|
}
|
|
|
|
|
|
if(DLD_PROTOCOL_FILE == _dwProto)
|
|
{
|
|
//
|
|
// at this point, _pszFullURL and _pszObject should be all
|
|
// allocated with size of dwFullUrlLen
|
|
//
|
|
DWORD cchObject = dwFullUrlLen;
|
|
|
|
//do file stuff here
|
|
fRet = SUCCEEDED(PathCreateFromUrl(_pszFullURL, _pszObject, &cchObject, 0));
|
|
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// BUGBUG - InternetCrackUrl alters the original url - zekel - 25-JUL-97
|
|
// ICU is poorly behaved, and it unescapes the server and username
|
|
// components insitu regardless of whether it was requested or not
|
|
// this means that if you pass in http://host%76/, the url on return
|
|
// is http://hostv76/. it happens that if you create the URL from the
|
|
// components given, you will get the correct URL, but crack doesnt understand
|
|
// all URLs. it is too late in the game to change the behavior of ICU,
|
|
// because wininet internally depends on the behavior.
|
|
// so our solution is to create a temp buffer that can be messed with
|
|
// and then throw it away after we are done.
|
|
//
|
|
LPSTR pszTemp = StrDup(_pszFullURL);
|
|
|
|
if (pszTemp)
|
|
{
|
|
url.dwStructSize = sizeof(url);
|
|
|
|
url.lpszScheme = _pszProtocol;
|
|
url.dwSchemeLength =12;
|
|
|
|
url.lpszHostName = _pszServerName;
|
|
url.dwHostNameLength = URL_FIELD_SIZE;
|
|
|
|
url.lpszUserName = _pszUserName;
|
|
url.dwUserNameLength = URL_FIELD_SIZE;
|
|
|
|
url.lpszPassword = _pszPassword;
|
|
url.dwPasswordLength = URL_FIELD_SIZE;
|
|
|
|
url.lpszUrlPath = _pszObject;
|
|
url.dwUrlPathLength = dwFullUrlLen;
|
|
|
|
url.lpszExtraInfo = NULL;
|
|
url.dwExtraInfoLength = 0;
|
|
|
|
fRet = InternetCrackUrl(pszTemp, 0, (_dwProto == DLD_PROTOCOL_STREAM ? ICU_DECODE : 0), &url);
|
|
|
|
_ipPort = url.nPort;
|
|
|
|
/*
|
|
Code to pass in an MBCS servername to wininet always when this fix enabled
|
|
to get around the UTF8-servername bugs. - I-DNS fix.
|
|
*/
|
|
|
|
if( fUTF8Required
|
|
&& g_bGlobalUTF8hackEnabled
|
|
&& fRet
|
|
&& ((_dwProto == DLD_PROTOCOL_HTTP)
|
|
|| (_dwProto == DLD_PROTOCOL_HTTPS))
|
|
)
|
|
{
|
|
DWORD dwHostname = MAX_URL_SIZE;
|
|
WCHAR* pwzHostname = new WCHAR[MAX_URL_SIZE];
|
|
char* pszHostname = new char[MAX_URL_SIZE];
|
|
HRESULT hrTemp;
|
|
BOOL bUsedDefaultChar;
|
|
|
|
// This is NOT a loop - just an urlmon-style coding convention to avoid deep if-else-nesting.
|
|
do
|
|
{
|
|
fRet = FALSE;
|
|
|
|
if (!pwzHostname || !pszHostname)
|
|
{
|
|
break;
|
|
}
|
|
|
|
hrTemp = UrlGetPartW(pwzUrl, pwzHostname, &dwHostname, URL_PART_HOSTNAME, 0);
|
|
if (FAILED(hrTemp))
|
|
{
|
|
break;
|
|
}
|
|
|
|
if (!StringContainsHighAnsiW(pwzHostname))
|
|
{
|
|
// home free! - no high ansi in servername.
|
|
fRet = TRUE;
|
|
break;
|
|
}
|
|
|
|
if (dwCodePage == CP_UTF8)
|
|
{
|
|
dwCodePage = GetACP();
|
|
}
|
|
|
|
//This fix cannot be ported to IE downlevel versions because WC_NO_BEST_FIT_CHARS is not
|
|
//supported on all OS versions.
|
|
if (0 == WideCharToMultiByte(dwCodePage, WC_NO_BEST_FIT_CHARS, pwzHostname, -1,
|
|
pszHostname, MAX_URL_SIZE, NULL, &bUsedDefaultChar)
|
|
|| bUsedDefaultChar)
|
|
{
|
|
fRet = false;
|
|
break;
|
|
}
|
|
|
|
// Cache the UTF8 servername if we need it.
|
|
// This field is set only once ( not on redirects ),
|
|
// so release only in destructor.
|
|
|
|
_pszUTF8ServerName = new char[url.dwHostNameLength+1];
|
|
if (!_pszUTF8ServerName)
|
|
{
|
|
break;
|
|
}
|
|
|
|
lstrcpy(_pszUTF8ServerName, _pszServerName);
|
|
|
|
// now clobber it with the MBCS servername
|
|
|
|
//Compat: match side-effect of calling InternetCrackUrl
|
|
|
|
dwHostname = URL_FIELD_SIZE;
|
|
hrTemp = UrlUnescapeA(pszHostname, _pszServerName, &dwHostname, 0);
|
|
if(FAILED(hrTemp))
|
|
{
|
|
break;
|
|
}
|
|
|
|
// now put the original _pszFullURL back together with the MBCS servername
|
|
// instead of the UTF8 servername since wininet will have this.
|
|
|
|
url.lpszHostName = _pszServerName;
|
|
url.dwHostNameLength = dwHostname;
|
|
|
|
url.lpszUserName = NULL;
|
|
url.dwUserNameLength = 0;
|
|
|
|
url.lpszPassword = NULL;
|
|
url.dwPasswordLength = 0;
|
|
|
|
if (!InternetCreateUrl(&url, 0, _pszFullURL, &dwFullUrlLen))
|
|
{
|
|
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
|
|
{
|
|
break;
|
|
}
|
|
|
|
delete [] _pszFullURL;
|
|
_pszFullURL = new char[++dwFullUrlLen];
|
|
|
|
if (!_pszFullURL || !InternetCreateUrl(&url, 0, _pszFullURL, &dwFullUrlLen))
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
// At this point, everything is right.
|
|
// 1. _pszFullURL has the same url as wininet.
|
|
// 2. _pszServerName has the MBCS hostname
|
|
// 3. _pszUTF8ServerName has the UTF8 hostname that would have gone to the proxy (if needed)
|
|
|
|
_dwServerCodePage = dwCodePage;
|
|
_fUTF8hack = TRUE;
|
|
|
|
fRet = TRUE;
|
|
break;
|
|
}
|
|
while(TRUE);
|
|
|
|
if (pwzHostname)
|
|
delete [] pwzHostname;
|
|
if (pszHostname)
|
|
delete [] pszHostname;
|
|
}
|
|
|
|
LocalFree(pszTemp);
|
|
}
|
|
else
|
|
fRet = FALSE;
|
|
}
|
|
|
|
|
|
Exit:
|
|
|
|
PerfDbgLog1(tagCUrl, this, "-CUrl::ParseUrl Full:[%s]", _pszFullURL);
|
|
|
|
DEBUG_LEAVE(fRet);
|
|
return(fRet);
|
|
}
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Method: CUrl::ProtoFromString
|
|
//
|
|
// Synopsis:
|
|
//
|
|
// Arguments: [lpszProtocol] --
|
|
//
|
|
// Returns:
|
|
//
|
|
// History: Created Unknown
|
|
// 2-20-96 JohannP (Johann Posch) Modified for class
|
|
//
|
|
// Notes:
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
DWORD CUrl::ProtoFromString(LPSTR lpszProtocol)
|
|
{
|
|
DEBUG_ENTER((DBG_APP,
|
|
Dword,
|
|
"CUrl::ProtoFromString",
|
|
"this=%#x, %.80q",
|
|
this, lpszProtocol
|
|
));
|
|
|
|
DWORD dwRetVal = DLD_PROTOCOL_NONE;
|
|
PerfDbgLog1(tagCUrl, this, "CUrl::ProtoFromString [%s]", lpszProtocol);
|
|
|
|
switch (GetUrlScheme(lpszProtocol))
|
|
{
|
|
case URL_SCHEME_HTTPS:
|
|
dwRetVal = DLD_PROTOCOL_HTTPS;
|
|
break;
|
|
|
|
case URL_SCHEME_HTTP:
|
|
dwRetVal = DLD_PROTOCOL_HTTP;
|
|
break;
|
|
|
|
case URL_SCHEME_FTP:
|
|
dwRetVal = DLD_PROTOCOL_FTP;
|
|
break;
|
|
|
|
case URL_SCHEME_GOPHER:
|
|
dwRetVal = DLD_PROTOCOL_GOPHER;
|
|
break;
|
|
|
|
case URL_SCHEME_FILE:
|
|
dwRetVal = DLD_PROTOCOL_FILE;
|
|
break;
|
|
|
|
case URL_SCHEME_LOCAL:
|
|
dwRetVal = DLD_PROTOCOL_LOCAL;
|
|
break;
|
|
|
|
case URL_SCHEME_MK:
|
|
if(IsStreamEnabled())
|
|
dwRetVal = DLD_PROTOCOL_STREAM;
|
|
break;
|
|
|
|
}
|
|
|
|
DEBUG_LEAVE(dwRetVal);
|
|
return dwRetVal;
|
|
}
|
|
|
|
|
|
// Helper API's
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Function: ConstructURL
|
|
//
|
|
// Synopsis:
|
|
//
|
|
// Arguments: [pBC] -- Pointer to BindCtx
|
|
// [pURLBase] -- Pointer to Base URL [IN]
|
|
// [pURLRelative] -- Pointer to Relative URL [IN]
|
|
// [pURLFull] -- Pointer to resultant complete URL [OUT]
|
|
//
|
|
// Returns:
|
|
//
|
|
// History: 02-21-96 JoeS (Joe Souza) Created
|
|
//
|
|
// Notes:
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
STDAPI ConstructURL(LPBC pBC, LPMONIKER pmkContext, LPMONIKER pmkToLeft,
|
|
LPWSTR pwzURLRelative, LPWSTR pwzURLFull, DWORD cURLSize,
|
|
DWORD dwFlags)
|
|
{
|
|
DEBUG_ENTER_API((DBG_API,
|
|
Hresult,
|
|
"ConstructURL",
|
|
"%#x, %#x, %#x, %.80wq, %.80wq, %#x, %#x",
|
|
pBC, pmkContext, pmkToLeft, pwzURLRelative, pwzURLFull, cURLSize, dwFlags
|
|
));
|
|
|
|
PerfDbgLog2(tagCUrlApi, NULL, "+ConstructURL (rel:%ws, pmk:%lx)", pwzURLRelative, pmkContext);
|
|
HRESULT hr = NOERROR;
|
|
DWORD dwMnk = 0;
|
|
LPMONIKER pmkCtx = NULL;
|
|
LPWSTR wzURLBase = NULL;
|
|
WCHAR wszURLFull[MAX_URL_SIZE + 1];
|
|
DWORD cchURLFull;
|
|
DWORD cbSize;
|
|
BOOL bParseOk = FALSE;
|
|
DWORD dwCUFlags = URL_FILE_USE_PATHURL;
|
|
|
|
if (dwFlags & CU_STANDARD_FORM)
|
|
{
|
|
dwCUFlags = 0;
|
|
}
|
|
|
|
if (!pwzURLRelative || !pwzURLFull || !cURLSize)
|
|
{
|
|
hr = E_INVALIDARG;
|
|
goto ConstructExit;
|
|
}
|
|
|
|
pwzURLFull[0] = 0;
|
|
|
|
if (!pmkContext && pBC)
|
|
{
|
|
// No Context Moniker was specified, so try to get one of those.
|
|
|
|
hr = pBC->GetObjectParam(SZ_URLCONTEXT, (IUnknown **)&pmkCtx);
|
|
if (hr != NOERROR)
|
|
{
|
|
pmkCtx = NULL;
|
|
}
|
|
}
|
|
else if (pmkContext)
|
|
{
|
|
pmkCtx = pmkContext;
|
|
}
|
|
else if (pmkToLeft)
|
|
{
|
|
pmkCtx = pmkToLeft;
|
|
}
|
|
|
|
if (pmkCtx)
|
|
{
|
|
// There is a Context Moniker. Make sure it is a URL moniker and
|
|
// if it is, get the base URL from it.
|
|
|
|
pmkCtx->IsSystemMoniker(&dwMnk);
|
|
if (dwMnk == MKSYS_URLMONIKER)
|
|
{
|
|
hr = pmkCtx->GetDisplayName(pBC, NULL, &wzURLBase);
|
|
}
|
|
}
|
|
|
|
hr = NOERROR;
|
|
|
|
if (wzURLBase)
|
|
{
|
|
DWORD dwSizeIn = MAX_URL_SIZE;
|
|
cchURLFull = MAX_URL_SIZE;
|
|
bParseOk = SUCCEEDED(OInetCombineUrl(wzURLBase, pwzURLRelative, dwCUFlags, pwzURLFull, dwSizeIn, &cchURLFull, 0));
|
|
|
|
}
|
|
else if (dwFlags & CU_CANONICALIZE)
|
|
{
|
|
DWORD dwSizeIn = MAX_URL_SIZE;
|
|
cchURLFull = MAX_URL_SIZE;
|
|
bParseOk = SUCCEEDED(OInetParseUrl(pwzURLRelative,PARSE_CANONICALIZE, dwCUFlags, pwzURLFull, dwSizeIn,&cchURLFull,0));
|
|
}
|
|
else
|
|
{
|
|
// We did not combine a relative and a base URL, and caller
|
|
// does not want to canonicalize, so we just copy the given URL
|
|
// into the return buffer.
|
|
#ifndef unix
|
|
wcsncpy(pwzURLFull, pwzURLRelative, cURLSize / 2);
|
|
#else
|
|
wcsncpy(pwzURLFull, pwzURLRelative, cURLSize / sizeof(wchar_t));
|
|
#endif /* unix */
|
|
goto ConstructExit;
|
|
}
|
|
|
|
|
|
if (!bParseOk || !wcslen(pwzURLFull))
|
|
{
|
|
hr = MK_E_SYNTAX;
|
|
}
|
|
|
|
ConstructExit:
|
|
|
|
if (wzURLBase)
|
|
{
|
|
delete wzURLBase;
|
|
}
|
|
|
|
PerfDbgLog2(tagCUrlApi, NULL, "-ConstructURL [%ws], hr:%lx", pwzURLFull, hr);
|
|
|
|
DEBUG_LEAVE_API(hr);
|
|
return hr;
|
|
}
|
|
|
|
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// UTF-8 code from wininet written by RFirth
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
DWORD
|
|
CountUnicodeToUtf8(
|
|
IN LPCWSTR pwszIn,
|
|
IN DWORD dwInLen,
|
|
IN BOOL bEncode
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Count number of BYTEs required for UTF-8 conversion of UNICODE string. Count
|
|
is terminated after dwInLen characters
|
|
|
|
Arguments:
|
|
|
|
pwszIn - pointer to input wide-character string
|
|
|
|
dwInLen - number of characters in pwszIn
|
|
|
|
bEncode - TRUE if we are to hex encode characters >= 0x80
|
|
|
|
Return Value:
|
|
|
|
DWORD - number of BYTEs required for conversion
|
|
|
|
--*/
|
|
|
|
{
|
|
DEBUG_ENTER((DBG_APP,
|
|
Dword,
|
|
"CountUnicodeToUtf8",
|
|
"%.80wq, %#x, %B",
|
|
pwszIn, dwInLen, bEncode
|
|
));
|
|
|
|
PProtAssert(pwszIn != NULL);
|
|
PProtAssert(dwInLen != 0);
|
|
|
|
DWORD dwCount = 0;
|
|
DWORD oneCharLen = bEncode ? 3 : 1;
|
|
DWORD twoCharLen = 2 * oneCharLen;
|
|
|
|
//
|
|
// N.B. code arranged to reduce number of jumps in loop to 1 (while)
|
|
//
|
|
|
|
do {
|
|
|
|
WORD wchar = *pwszIn++;
|
|
|
|
dwCount += (wchar & 0xF800) ? oneCharLen : 0;
|
|
dwCount += ((wchar & 0xFF80) ? 0xFFFFFFFF : 0) & (twoCharLen - 1);
|
|
++dwCount;
|
|
} while (--dwInLen != 0);
|
|
|
|
DEBUG_LEAVE(dwCount);
|
|
return dwCount;
|
|
}
|
|
|
|
DWORD
|
|
ConvertUnicodeToUtf8(
|
|
IN LPCWSTR pwszIn,
|
|
IN DWORD dwInLen,
|
|
OUT LPBYTE pszOut,
|
|
IN DWORD dwOutLen,
|
|
IN BOOL bEncode
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Convert a string of UNICODE characters to UTF-8:
|
|
|
|
0000000000000000..0000000001111111: 0xxxxxxx
|
|
0000000010000000..0000011111111111: 110xxxxx 10xxxxxx
|
|
0000100000000000..1111111111111111: 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
|
Arguments:
|
|
|
|
pwszIn - pointer to input wide-character string
|
|
|
|
dwInLen - number of CHARACTERS in pwszIn INCLUDING terminating NUL
|
|
|
|
pszOut - pointer to output narrow-character buffer
|
|
|
|
dwOutLen - number of BYTEs in pszOut
|
|
|
|
bEncode - TRUE if we are to hex encode characters >= 0x80
|
|
|
|
Return Value:
|
|
|
|
DWORD
|
|
Success - ERROR_SUCCESS
|
|
|
|
Failure - ERROR_INSUFFICIENT_BUFFER
|
|
Not enough space in pszOut to store results
|
|
|
|
--*/
|
|
|
|
{
|
|
DEBUG_ENTER((DBG_APP,
|
|
Dword,
|
|
"CountUnicodeToUtf8",
|
|
"%.80wq, %#x, %#x, %#x, %B",
|
|
pwszIn, dwInLen, pszOut, dwOutLen, bEncode
|
|
));
|
|
|
|
PProtAssert(pwszIn != NULL);
|
|
PProtAssert((int)dwInLen > 0);
|
|
PProtAssert(pszOut != NULL);
|
|
PProtAssert((int)dwOutLen > 0);
|
|
|
|
DWORD outputSize = bEncode ? 3 : 1;
|
|
static char hexArray[] = "0123456789ABCDEF";
|
|
|
|
while (dwInLen-- && dwOutLen) {
|
|
|
|
WORD wchar = *pwszIn++;
|
|
BYTE bchar;
|
|
|
|
if (wchar <= 0x007F) {
|
|
*pszOut++ = (BYTE)(wchar);
|
|
--dwOutLen;
|
|
continue;
|
|
}
|
|
|
|
BYTE lead = ((wchar >= 0x0800) ? 0xE0 : 0xC0);
|
|
int shift = ((wchar >= 0x0800) ? 12 : 6);
|
|
|
|
bchar = lead | (BYTE)(wchar >> shift);
|
|
if (bEncode) {
|
|
*pszOut++ = '%';
|
|
*pszOut++ = hexArray[bchar >> 4];
|
|
bchar = hexArray[bchar & 0x0F];
|
|
}
|
|
*pszOut++ = bchar;
|
|
|
|
if (wchar >= 0x0800) {
|
|
bchar = 0x80 | (BYTE)((wchar >> 6) & 0x003F);
|
|
if (bEncode) {
|
|
*pszOut++ = '%';
|
|
*pszOut++ = hexArray[bchar >> 4];
|
|
bchar = hexArray[bchar & 0x0F];
|
|
}
|
|
*pszOut++ = bchar;
|
|
}
|
|
|
|
bchar = 0x80 | (BYTE)(wchar & 0x003F);
|
|
if (bEncode) {
|
|
*pszOut++ = '%';
|
|
*pszOut++ = hexArray[bchar >> 4];
|
|
bchar = hexArray[bchar & 0x0F];
|
|
}
|
|
*pszOut++ = bchar;
|
|
}
|
|
|
|
DEBUG_LEAVE(ERROR_SUCCESS);
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
BOOL
|
|
StringContainsHighAnsi(
|
|
IN LPSTR pszIn,
|
|
IN DWORD dwInLen
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Determine if string contains ANSI characters in range 0x80..0xFF. Search is
|
|
stopped when we hit the first high-ANSI character, when we hit the terminator
|
|
or when we have decremented dwInLen to zero
|
|
|
|
Arguments:
|
|
|
|
pszIn - pointer to string to test
|
|
|
|
dwInLen - length of pszIn
|
|
|
|
Return Value:
|
|
|
|
BOOL
|
|
TRUE - pszIn contains one or more high-ANSI characters
|
|
|
|
FALSE - pszIn (or substring of length dwInLen) does not contain
|
|
high-ANSI characters
|
|
|
|
--*/
|
|
|
|
{
|
|
DEBUG_ENTER((DBG_APP,
|
|
Bool,
|
|
"StringContainsHighAnsi",
|
|
"%.80q, %#x",
|
|
pszIn, dwInLen
|
|
));
|
|
|
|
PProtAssert(pszIn != NULL);
|
|
PProtAssert(dwInLen != 0);
|
|
|
|
// only need to search the base portion
|
|
while (dwInLen-- && *pszIn && *pszIn != '?') {
|
|
if (*pszIn++ & 0x80) {
|
|
|
|
DEBUG_LEAVE(TRUE);
|
|
return TRUE;
|
|
}
|
|
}
|
|
|
|
DEBUG_LEAVE(FALSE);
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
BOOL
|
|
StringContainsHighAnsiW(
|
|
IN LPCWSTR pwzIn
|
|
)
|
|
/*--
|
|
Unicode version of StringContainsHighAnsi()
|
|
--*/
|
|
{
|
|
DEBUG_ENTER((DBG_APP,
|
|
Bool,
|
|
"StringContainsHighAnsiW",
|
|
"%.80wq",
|
|
pwzIn
|
|
));
|
|
|
|
PProtAssert(pwzIn != NULL);
|
|
|
|
// only need to search the base portion
|
|
while (*pwzIn && *pwzIn != L'?')
|
|
{
|
|
if (*pwzIn >= 0x80)
|
|
{
|
|
DEBUG_LEAVE(TRUE);
|
|
return TRUE;
|
|
}
|
|
pwzIn++;
|
|
}
|
|
|
|
DEBUG_LEAVE(FALSE);
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
BOOL
|
|
ConvertUnicodeUrl(
|
|
LPCWSTR pwzFrom,
|
|
LPSTR pszTo,
|
|
INT cchTo,
|
|
DWORD dwCodePage,
|
|
BOOL fUTF8Enabled,
|
|
BOOL* pfUTF8Required
|
|
|
|
)
|
|
{
|
|
DEBUG_ENTER((DBG_APP,
|
|
Bool,
|
|
"ConvertUnicodeUrl",
|
|
"%.80wq, %.80q, %#x, %#x, %B",
|
|
pwzFrom, pszTo, cchTo, dwCodePage, fUTF8Enabled
|
|
));
|
|
|
|
BOOL fSuccess = FALSE;
|
|
|
|
//
|
|
// In multibyte string, if we have any bytes(in the base url portion)
|
|
// over 0x80, we will have to convert the base portion to utf-8
|
|
// (leave the query portion as multi-byte)
|
|
//
|
|
// S_FALSE from the conversion above indicates that some wide chars couldn't be
|
|
// mapped to the destination code page
|
|
|
|
*pfUTF8Required = FALSE;
|
|
|
|
if( fUTF8Enabled && StringContainsHighAnsiW(pwzFrom))
|
|
{
|
|
*pfUTF8Required = TRUE;
|
|
// utf-8 conversion
|
|
|
|
//
|
|
// do we have a query portion? (by searching the UNICODE URL string
|
|
// for '?') also we can get the UNICODED string's BasePortion
|
|
// URL Length
|
|
//
|
|
DWORD dwBaseUrlLen = 0; // BasePortion length (in UNICODE)
|
|
LPWSTR pBase = (LPWSTR) pwzFrom;
|
|
while( *pBase && *pBase != '?')
|
|
{
|
|
pBase++;
|
|
dwBaseUrlLen++;
|
|
}
|
|
|
|
DWORD dwMBQryUrlLen = 0; // QueryPortion length (in Multibyte)
|
|
LPSTR pszQry = NULL; // multibyte query string
|
|
if (*pBase)
|
|
{
|
|
//
|
|
// we have a query portion, need to get length of
|
|
// multi-byte query portion
|
|
// In this case, we don't care whether or not pszTo is able to convert
|
|
// everything
|
|
W2A(pBase, pszTo, cchTo, dwCodePage);
|
|
dwMBQryUrlLen = strlen(pszTo);
|
|
pszQry = pszTo;
|
|
}
|
|
|
|
//
|
|
// we are converting the base portion of UNICODE URL to UTF-8
|
|
// count UTF-8 string length for base url
|
|
//
|
|
DWORD dwUTF8Len = CountUnicodeToUtf8(pwzFrom, dwBaseUrlLen, TRUE);
|
|
|
|
//
|
|
// allocate buffer for whole converted string
|
|
// Buffer size = UTF8_BaseURL_Len + MultiByte_Query_Len + '\0'
|
|
//
|
|
DWORD dwUTFBufferSize = dwUTF8Len + dwMBQryUrlLen + 1;
|
|
|
|
//
|
|
// the size can not exceed incoming buffer size ccTo
|
|
//
|
|
if( dwUTFBufferSize > (DWORD)(cchTo + 1) )
|
|
{
|
|
//
|
|
// fallback to IE4 behavior - sending multi-byte string
|
|
//
|
|
goto cleanup;
|
|
}
|
|
|
|
char* pszUTF8 = new char[dwUTFBufferSize];
|
|
if( !pszUTF8 )
|
|
{
|
|
//
|
|
// if we failed to allocate, we automatically
|
|
// fallback to IE4 behavior - sending multi-byte string
|
|
//
|
|
goto cleanup;
|
|
}
|
|
memset(pszUTF8, 0, dwUTFBufferSize);
|
|
|
|
// Coverting UNICODE->UTF8
|
|
DWORD dwError;
|
|
dwError = ConvertUnicodeToUtf8( pwzFrom,
|
|
dwBaseUrlLen,
|
|
(LPBYTE)pszUTF8,
|
|
dwUTF8Len,
|
|
TRUE );
|
|
if( dwError != ERROR_SUCCESS )
|
|
{
|
|
//
|
|
// if we failed, delete temp string and fallback to IE4
|
|
// behavior - sending multi-byte string
|
|
//
|
|
delete [] pszUTF8;
|
|
goto cleanup;
|
|
}
|
|
|
|
//
|
|
// copy over the Multi-byte query string to final buffer
|
|
//
|
|
if( pszQry )
|
|
{
|
|
LPSTR pszURL = pszUTF8 + dwUTF8Len;
|
|
while( dwMBQryUrlLen-- )
|
|
{
|
|
*pszURL = *pszQry;
|
|
pszURL++;
|
|
pszQry++;
|
|
}
|
|
}
|
|
|
|
//
|
|
// we are done, copy the content from temp buffer to
|
|
// szTo
|
|
//
|
|
StrCpyN(pszTo, pszUTF8, dwUTFBufferSize);
|
|
|
|
// delete temp utf8 buffer
|
|
delete [] pszUTF8;
|
|
fSuccess = TRUE;
|
|
}
|
|
|
|
cleanup:
|
|
if (!fSuccess)
|
|
{
|
|
// Unicode->Multibyte
|
|
// IE4 behaviour. Shoot.
|
|
W2A(pwzFrom, pszTo, cchTo, dwCodePage);
|
|
}
|
|
|
|
DEBUG_LEAVE(TRUE);
|
|
return TRUE;
|
|
}
|