//+--------------------------------------------------------------------------- // // Microsoft Windows // Copyright (C) Microsoft Corporation, 1992 - 1995. // // File: curl.cxx // // Contents: handle url parsing and context urls parsing // // Classes: // // Functions: // // History: 2-20-96 JohannP (Johann Posch) Created // //---------------------------------------------------------------------------- #include #include #include PerfDbgTag(tagCUrl, "Urlmon", "Log CUrl", DEB_PROT); PerfDbgTag(tagCUrlApi, "Urlmon", "Log CUrl API", DEB_ASYNCAPIS); //+--------------------------------------------------------------------------- // // Function: IsStreamEnabled // // Synopsis: returns TRUE iff the registry key for the mk: protocol is set // // Arguments: (none) // // Returns: // // History: 6-7-96 craigc Created // // Notes: // //---------------------------------------------------------------------------- extern BOOL g_bGlobalUTF8hackEnabled; BOOL StringContainsHighAnsiW(LPCWSTR); BOOL IsStreamEnabled() { DEBUG_ENTER((DBG_APP, Bool, "IsStreamEnabled", NULL )); HKEY hk; char szBuf[256]; DWORD dwType; DWORD dwSize; const char szKey[] = "SOFTWARE\\Microsoft\\Internet Explorer"; const char szValue[] = "MkEnabled"; const char szYes[] = "yes"; static BOOL fChecked = FALSE; static BOOL fEnabled = FALSE; CMutexSem mxs; CLock lck(mxs); if (fChecked) { DEBUG_LEAVE(fEnabled); return fEnabled; } if (RegOpenKey(HKEY_LOCAL_MACHINE, szKey, &hk) != ERROR_SUCCESS) { DEBUG_LEAVE(FALSE); return( FALSE ); } dwSize = sizeof(szBuf); if (RegQueryValueEx( hk, szValue, NULL, &dwType, (BYTE*)szBuf, &dwSize ) != ERROR_SUCCESS) { RegCloseKey( hk ); DEBUG_LEAVE(FALSE); return( FALSE ); } RegCloseKey( hk ); fEnabled = (dwSize && (lstrcmpi( szYes, szBuf ) == 0)); fChecked = TRUE; DEBUG_LEAVE(fEnabled); return fEnabled; } // // GetUrlScheme() returns one of the URL_SCHEME_* constants as // defined in shlwapip.h // example "http://foo" returns URL_SCHEME_HTTP // DWORD GetUrlScheme(IN LPCTSTR pcszUrl) { DEBUG_ENTER((DBG_APP, Dword, "GetUrlScheme", "%#x", pcszUrl )); if(pcszUrl) { PARSEDURL pu; pu.cbSize = sizeof(pu); if(SUCCEEDED(ParseURL(pcszUrl, &pu))) { DEBUG_LEAVE(pu.nScheme); return pu.nScheme; } } DEBUG_LEAVE(URL_SCHEME_INVALID); return URL_SCHEME_INVALID; } //+--------------------------------------------------------------------------- // // Method: CUrl::CUrl // // Synopsis: // // Arguments: (none) // // Returns: // // History: 2-20-96 JohannP (Johann Posch) Created // // Notes: // //---------------------------------------------------------------------------- CUrl::CUrl() { DEBUG_ENTER((DBG_APP, None, "CUrl::CUrl", "this=%#x", this )); PerfDbgLog(tagCUrl, this, "+CUrl::CUrl"); _pszBaseURL = NULL; _pszPartURL = NULL; _pszFullURL = NULL; _pszProtocol = NULL; _pszServerName = NULL; _pszUserName = NULL; _pszPassword = NULL; _pszObject = NULL; _pBasicAllocUnit = NULL; _ipPort = 0; _dwProto = 0; _fUTF8hack = FALSE; _pszUTF8ServerName = NULL; _dwServerCodePage = 0; PerfDbgLog(tagCUrl, this, "-CUrl::CUrl"); DEBUG_LEAVE(0); } BOOL CUrl::CUrlInitBasic(DWORD dwBaseUrlSize) { DEBUG_ENTER((DBG_APP, Bool, "CUrl::CUrlInitBasic", "this=%#x, %#x", this, dwBaseUrlSize )); // // basic allocation contains the following fields: // // _pszServerName - URL_FIELD_SIZE // _pszUserName - URL_FIELD_SIZE // _pszPassword - URL_FIELD_SIZE // _pszProtocol - 12 // _pszBaseURL - dwBaseUrlSize + 1 // BOOL fRet = FALSE; DWORD dwBasicUnitSize = (3 * (URL_FIELD_SIZE)) + 12 + dwBaseUrlSize + 1; _pBasicAllocUnit = new char[dwBasicUnitSize]; if( _pBasicAllocUnit ) { _pszServerName = _pBasicAllocUnit; _pszUserName = _pszServerName + URL_FIELD_SIZE; _pszPassword = _pszUserName + URL_FIELD_SIZE; _pszProtocol = _pszPassword + URL_FIELD_SIZE; _pszBaseURL = _pszProtocol + 12; *_pszServerName = '\0'; *_pszUserName = '\0'; *_pszPassword = '\0'; *_pszProtocol = '\0'; *_pszBaseURL = '\0'; fRet = TRUE; } DEBUG_LEAVE(fRet); return fRet; } BOOL CUrl::CUrlInitAll() { DEBUG_ENTER((DBG_APP, Bool, "CUrl::CUrlInitAll", "this=%#x", this )); // backword compatibility init all with URL_MAX_LENGTH BOOL fRet = FALSE; fRet = CUrlInitBasic(MAX_URL_SIZE); if( fRet ) { _pszPartURL = new char[MAX_URL_SIZE + 1]; _pszFullURL = new char[MAX_URL_SIZE + 1]; _pszObject = new char[MAX_URL_SIZE + 1]; if( !_pszPartURL || !_pszFullURL || !_pszObject ) { if (_pszPartURL) delete _pszPartURL; if (_pszFullURL) delete _pszFullURL; if (_pszObject) delete _pszObject; _pszPartURL = NULL; _pszFullURL = NULL; _pszObject = NULL; fRet = FALSE; } else { *_pszPartURL = '\0'; *_pszFullURL = '\0'; *_pszObject = '\0'; } } DEBUG_LEAVE(fRet); return fRet; } CUrl::~CUrl() { DEBUG_ENTER((DBG_APP, None, "CUrl::~CUrl", "this=%#x", this )); if( _pBasicAllocUnit ) { delete [] _pBasicAllocUnit; } if( _pszPartURL ) { delete [] _pszPartURL; } if( _pszFullURL ) { delete [] _pszFullURL; } if( _pszObject ) { delete [] _pszObject; } if( _pszUTF8ServerName ) { delete [] _pszUTF8ServerName; } DEBUG_LEAVE(0); } //+--------------------------------------------------------------------------- // // Function: ParseUrl // // Synopsis: Breaks down a URL and puts servername, objectname and port // into the download structure. // // Arguments: // // // Returns: TRUE if the URL was successfully parsed. // // History: Created Unknown // 02-20-95 JohannP (Johann Posch) Created Class // 03-20-95 JoeS (Joe Souza) Special FILE: syntaxes // // Notes: URL should have already been parsed earlier by ConstructURL. // This function will crack the URL. // //---------------------------------------------------------------------------- BOOL CUrl::ParseUrl(BOOL fUTF8Required, LPCWSTR pwzUrl, DWORD dwCodePage) { DEBUG_ENTER((DBG_APP, Bool, "CUrl::ParseUrl", "this=%#x, %B", this, fUTF8Required )); PerfDbgLog1(tagCUrl, this, "+CUrl::ParseUrl Base:[%s]", _pszBaseURL); BOOL fRet = TRUE; URL_COMPONENTS url; DWORD cchFullURL; LPSTR szTemp; CHAR * pch; DWORD dwFullUrlLen; _fUTF8hack = FALSE; //possibly set true later in the function for non-redirect codepath. if (_pszPartURL && _pszPartURL[0] != '\0' ) // This string will be set for redirects. { // // we need re-alloc _pszFullURL and _pszObject, since // the the size can grow! // dwFullUrlLen = strlen(_pszBaseURL) + strlen(_pszPartURL) + 1; if(dwFullUrlLen > MAX_URL_SIZE) { dwFullUrlLen = MAX_URL_SIZE + 1; } if( _pszFullURL ) { delete [] _pszFullURL; _pszFullURL = NULL; _pszFullURL = new char[dwFullUrlLen]; } if( _pszObject ) { delete [] _pszObject; _pszObject = NULL; _pszObject = new char[dwFullUrlLen]; } if( !_pszFullURL || !_pszObject ) { fRet = FALSE; goto Exit; } cchFullURL = dwFullUrlLen; if(FAILED(UrlCombine(_pszBaseURL, _pszPartURL, _pszFullURL, &cchFullURL, URL_FILE_USE_PATHURL))) { fRet = FALSE; PProtAssert(FALSE && "Combine failed in ParseUrl!\n"); goto Exit; } } else { // FullURL is BaseURL dwFullUrlLen = strlen(_pszBaseURL) + 1; if( !_pszFullURL ) { _pszFullURL = new char[dwFullUrlLen]; } if( !_pszObject ) { _pszObject = new char[dwFullUrlLen]; } if( !_pszFullURL || !_pszObject ) { fRet = FALSE; goto Exit; } lstrcpy(_pszFullURL, _pszBaseURL); } // Trim off intra-page link. // // NB: Don't use ExtraInfo below to do this because you will // also lose search string this way. // // Also, we need to do this before we decode the URL below, // so that we don't trim off the wrong '#' if there was one // encoded in the URL path. // // UrlGetLocation() will intelligently find the fragment // some schemes do not use the # as a fragment identifier. // it returns a pointer to the # // if(pch = (CHAR *)UrlGetLocation(_pszFullURL)) { *pch = TEXT('\0'); } _dwProto = ProtoFromString(_pszFullURL); if (_dwProto == DLD_PROTOCOL_NONE) { fRet = FALSE; goto Exit; } if(DLD_PROTOCOL_FILE == _dwProto) { // // at this point, _pszFullURL and _pszObject should be all // allocated with size of dwFullUrlLen // DWORD cchObject = dwFullUrlLen; //do file stuff here fRet = SUCCEEDED(PathCreateFromUrl(_pszFullURL, _pszObject, &cchObject, 0)); } else { // // BUGBUG - InternetCrackUrl alters the original url - zekel - 25-JUL-97 // ICU is poorly behaved, and it unescapes the server and username // components insitu regardless of whether it was requested or not // this means that if you pass in http://host%76/, the url on return // is http://hostv76/. it happens that if you create the URL from the // components given, you will get the correct URL, but crack doesnt understand // all URLs. it is too late in the game to change the behavior of ICU, // because wininet internally depends on the behavior. // so our solution is to create a temp buffer that can be messed with // and then throw it away after we are done. // LPSTR pszTemp = StrDup(_pszFullURL); if (pszTemp) { url.dwStructSize = sizeof(url); url.lpszScheme = _pszProtocol; url.dwSchemeLength =12; url.lpszHostName = _pszServerName; url.dwHostNameLength = URL_FIELD_SIZE; url.lpszUserName = _pszUserName; url.dwUserNameLength = URL_FIELD_SIZE; url.lpszPassword = _pszPassword; url.dwPasswordLength = URL_FIELD_SIZE; url.lpszUrlPath = _pszObject; url.dwUrlPathLength = dwFullUrlLen; url.lpszExtraInfo = NULL; url.dwExtraInfoLength = 0; fRet = InternetCrackUrl(pszTemp, 0, (_dwProto == DLD_PROTOCOL_STREAM ? ICU_DECODE : 0), &url); _ipPort = url.nPort; /* Code to pass in an MBCS servername to wininet always when this fix enabled to get around the UTF8-servername bugs. - I-DNS fix. */ if( fUTF8Required && g_bGlobalUTF8hackEnabled && fRet && ((_dwProto == DLD_PROTOCOL_HTTP) || (_dwProto == DLD_PROTOCOL_HTTPS)) ) { DWORD dwHostname = MAX_URL_SIZE; WCHAR* pwzHostname = new WCHAR[MAX_URL_SIZE]; char* pszHostname = new char[MAX_URL_SIZE]; HRESULT hrTemp; BOOL bUsedDefaultChar; // This is NOT a loop - just an urlmon-style coding convention to avoid deep if-else-nesting. do { fRet = FALSE; if (!pwzHostname || !pszHostname) { break; } hrTemp = UrlGetPartW(pwzUrl, pwzHostname, &dwHostname, URL_PART_HOSTNAME, 0); if (FAILED(hrTemp)) { break; } if (!StringContainsHighAnsiW(pwzHostname)) { // home free! - no high ansi in servername. fRet = TRUE; break; } if (dwCodePage == CP_UTF8) { dwCodePage = GetACP(); } //This fix cannot be ported to IE downlevel versions because WC_NO_BEST_FIT_CHARS is not //supported on all OS versions. if (0 == WideCharToMultiByte(dwCodePage, WC_NO_BEST_FIT_CHARS, pwzHostname, -1, pszHostname, MAX_URL_SIZE, NULL, &bUsedDefaultChar) || bUsedDefaultChar) { fRet = false; break; } // Cache the UTF8 servername if we need it. // This field is set only once ( not on redirects ), // so release only in destructor. _pszUTF8ServerName = new char[url.dwHostNameLength+1]; if (!_pszUTF8ServerName) { break; } lstrcpy(_pszUTF8ServerName, _pszServerName); // now clobber it with the MBCS servername //Compat: match side-effect of calling InternetCrackUrl dwHostname = URL_FIELD_SIZE; hrTemp = UrlUnescapeA(pszHostname, _pszServerName, &dwHostname, 0); if(FAILED(hrTemp)) { break; } // now put the original _pszFullURL back together with the MBCS servername // instead of the UTF8 servername since wininet will have this. url.lpszHostName = _pszServerName; url.dwHostNameLength = dwHostname; url.lpszUserName = NULL; url.dwUserNameLength = 0; url.lpszPassword = NULL; url.dwPasswordLength = 0; if (!InternetCreateUrl(&url, 0, _pszFullURL, &dwFullUrlLen)) { if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) { break; } delete [] _pszFullURL; _pszFullURL = new char[++dwFullUrlLen]; if (!_pszFullURL || !InternetCreateUrl(&url, 0, _pszFullURL, &dwFullUrlLen)) { break; } } // At this point, everything is right. // 1. _pszFullURL has the same url as wininet. // 2. _pszServerName has the MBCS hostname // 3. _pszUTF8ServerName has the UTF8 hostname that would have gone to the proxy (if needed) _dwServerCodePage = dwCodePage; _fUTF8hack = TRUE; fRet = TRUE; break; } while(TRUE); if (pwzHostname) delete [] pwzHostname; if (pszHostname) delete [] pszHostname; } LocalFree(pszTemp); } else fRet = FALSE; } Exit: PerfDbgLog1(tagCUrl, this, "-CUrl::ParseUrl Full:[%s]", _pszFullURL); DEBUG_LEAVE(fRet); return(fRet); } //+--------------------------------------------------------------------------- // // Method: CUrl::ProtoFromString // // Synopsis: // // Arguments: [lpszProtocol] -- // // Returns: // // History: Created Unknown // 2-20-96 JohannP (Johann Posch) Modified for class // // Notes: // //---------------------------------------------------------------------------- DWORD CUrl::ProtoFromString(LPSTR lpszProtocol) { DEBUG_ENTER((DBG_APP, Dword, "CUrl::ProtoFromString", "this=%#x, %.80q", this, lpszProtocol )); DWORD dwRetVal = DLD_PROTOCOL_NONE; PerfDbgLog1(tagCUrl, this, "CUrl::ProtoFromString [%s]", lpszProtocol); switch (GetUrlScheme(lpszProtocol)) { case URL_SCHEME_HTTPS: dwRetVal = DLD_PROTOCOL_HTTPS; break; case URL_SCHEME_HTTP: dwRetVal = DLD_PROTOCOL_HTTP; break; case URL_SCHEME_FTP: dwRetVal = DLD_PROTOCOL_FTP; break; case URL_SCHEME_GOPHER: dwRetVal = DLD_PROTOCOL_GOPHER; break; case URL_SCHEME_FILE: dwRetVal = DLD_PROTOCOL_FILE; break; case URL_SCHEME_LOCAL: dwRetVal = DLD_PROTOCOL_LOCAL; break; case URL_SCHEME_MK: if(IsStreamEnabled()) dwRetVal = DLD_PROTOCOL_STREAM; break; } DEBUG_LEAVE(dwRetVal); return dwRetVal; } // Helper API's //+--------------------------------------------------------------------------- // // Function: ConstructURL // // Synopsis: // // Arguments: [pBC] -- Pointer to BindCtx // [pURLBase] -- Pointer to Base URL [IN] // [pURLRelative] -- Pointer to Relative URL [IN] // [pURLFull] -- Pointer to resultant complete URL [OUT] // // Returns: // // History: 02-21-96 JoeS (Joe Souza) Created // // Notes: // //---------------------------------------------------------------------------- STDAPI ConstructURL(LPBC pBC, LPMONIKER pmkContext, LPMONIKER pmkToLeft, LPWSTR pwzURLRelative, LPWSTR pwzURLFull, DWORD cURLSize, DWORD dwFlags) { DEBUG_ENTER_API((DBG_API, Hresult, "ConstructURL", "%#x, %#x, %#x, %.80wq, %.80wq, %#x, %#x", pBC, pmkContext, pmkToLeft, pwzURLRelative, pwzURLFull, cURLSize, dwFlags )); PerfDbgLog2(tagCUrlApi, NULL, "+ConstructURL (rel:%ws, pmk:%lx)", pwzURLRelative, pmkContext); HRESULT hr = NOERROR; DWORD dwMnk = 0; LPMONIKER pmkCtx = NULL; LPWSTR wzURLBase = NULL; WCHAR wszURLFull[MAX_URL_SIZE + 1]; DWORD cchURLFull; DWORD cbSize; BOOL bParseOk = FALSE; DWORD dwCUFlags = URL_FILE_USE_PATHURL; if (dwFlags & CU_STANDARD_FORM) { dwCUFlags = 0; } if (!pwzURLRelative || !pwzURLFull || !cURLSize) { hr = E_INVALIDARG; goto ConstructExit; } pwzURLFull[0] = 0; if (!pmkContext && pBC) { // No Context Moniker was specified, so try to get one of those. hr = pBC->GetObjectParam(SZ_URLCONTEXT, (IUnknown **)&pmkCtx); if (hr != NOERROR) { pmkCtx = NULL; } } else if (pmkContext) { pmkCtx = pmkContext; } else if (pmkToLeft) { pmkCtx = pmkToLeft; } if (pmkCtx) { // There is a Context Moniker. Make sure it is a URL moniker and // if it is, get the base URL from it. pmkCtx->IsSystemMoniker(&dwMnk); if (dwMnk == MKSYS_URLMONIKER) { hr = pmkCtx->GetDisplayName(pBC, NULL, &wzURLBase); } } hr = NOERROR; if (wzURLBase) { DWORD dwSizeIn = MAX_URL_SIZE; cchURLFull = MAX_URL_SIZE; bParseOk = SUCCEEDED(OInetCombineUrl(wzURLBase, pwzURLRelative, dwCUFlags, pwzURLFull, dwSizeIn, &cchURLFull, 0)); } else if (dwFlags & CU_CANONICALIZE) { DWORD dwSizeIn = MAX_URL_SIZE; cchURLFull = MAX_URL_SIZE; bParseOk = SUCCEEDED(OInetParseUrl(pwzURLRelative,PARSE_CANONICALIZE, dwCUFlags, pwzURLFull, dwSizeIn,&cchURLFull,0)); } else { // We did not combine a relative and a base URL, and caller // does not want to canonicalize, so we just copy the given URL // into the return buffer. #ifndef unix wcsncpy(pwzURLFull, pwzURLRelative, cURLSize / 2); #else wcsncpy(pwzURLFull, pwzURLRelative, cURLSize / sizeof(wchar_t)); #endif /* unix */ goto ConstructExit; } if (!bParseOk || !wcslen(pwzURLFull)) { hr = MK_E_SYNTAX; } ConstructExit: if (wzURLBase) { delete wzURLBase; } PerfDbgLog2(tagCUrlApi, NULL, "-ConstructURL [%ws], hr:%lx", pwzURLFull, hr); DEBUG_LEAVE_API(hr); return hr; } //+--------------------------------------------------------------------------- // // UTF-8 code from wininet written by RFirth // //---------------------------------------------------------------------------- DWORD CountUnicodeToUtf8( IN LPCWSTR pwszIn, IN DWORD dwInLen, IN BOOL bEncode ) /*++ Routine Description: Count number of BYTEs required for UTF-8 conversion of UNICODE string. Count is terminated after dwInLen characters Arguments: pwszIn - pointer to input wide-character string dwInLen - number of characters in pwszIn bEncode - TRUE if we are to hex encode characters >= 0x80 Return Value: DWORD - number of BYTEs required for conversion --*/ { DEBUG_ENTER((DBG_APP, Dword, "CountUnicodeToUtf8", "%.80wq, %#x, %B", pwszIn, dwInLen, bEncode )); PProtAssert(pwszIn != NULL); PProtAssert(dwInLen != 0); DWORD dwCount = 0; DWORD oneCharLen = bEncode ? 3 : 1; DWORD twoCharLen = 2 * oneCharLen; // // N.B. code arranged to reduce number of jumps in loop to 1 (while) // do { WORD wchar = *pwszIn++; dwCount += (wchar & 0xF800) ? oneCharLen : 0; dwCount += ((wchar & 0xFF80) ? 0xFFFFFFFF : 0) & (twoCharLen - 1); ++dwCount; } while (--dwInLen != 0); DEBUG_LEAVE(dwCount); return dwCount; } DWORD ConvertUnicodeToUtf8( IN LPCWSTR pwszIn, IN DWORD dwInLen, OUT LPBYTE pszOut, IN DWORD dwOutLen, IN BOOL bEncode ) /*++ Routine Description: Convert a string of UNICODE characters to UTF-8: 0000000000000000..0000000001111111: 0xxxxxxx 0000000010000000..0000011111111111: 110xxxxx 10xxxxxx 0000100000000000..1111111111111111: 1110xxxx 10xxxxxx 10xxxxxx Arguments: pwszIn - pointer to input wide-character string dwInLen - number of CHARACTERS in pwszIn INCLUDING terminating NUL pszOut - pointer to output narrow-character buffer dwOutLen - number of BYTEs in pszOut bEncode - TRUE if we are to hex encode characters >= 0x80 Return Value: DWORD Success - ERROR_SUCCESS Failure - ERROR_INSUFFICIENT_BUFFER Not enough space in pszOut to store results --*/ { DEBUG_ENTER((DBG_APP, Dword, "CountUnicodeToUtf8", "%.80wq, %#x, %#x, %#x, %B", pwszIn, dwInLen, pszOut, dwOutLen, bEncode )); PProtAssert(pwszIn != NULL); PProtAssert((int)dwInLen > 0); PProtAssert(pszOut != NULL); PProtAssert((int)dwOutLen > 0); DWORD outputSize = bEncode ? 3 : 1; static char hexArray[] = "0123456789ABCDEF"; while (dwInLen-- && dwOutLen) { WORD wchar = *pwszIn++; BYTE bchar; if (wchar <= 0x007F) { *pszOut++ = (BYTE)(wchar); --dwOutLen; continue; } BYTE lead = ((wchar >= 0x0800) ? 0xE0 : 0xC0); int shift = ((wchar >= 0x0800) ? 12 : 6); bchar = lead | (BYTE)(wchar >> shift); if (bEncode) { *pszOut++ = '%'; *pszOut++ = hexArray[bchar >> 4]; bchar = hexArray[bchar & 0x0F]; } *pszOut++ = bchar; if (wchar >= 0x0800) { bchar = 0x80 | (BYTE)((wchar >> 6) & 0x003F); if (bEncode) { *pszOut++ = '%'; *pszOut++ = hexArray[bchar >> 4]; bchar = hexArray[bchar & 0x0F]; } *pszOut++ = bchar; } bchar = 0x80 | (BYTE)(wchar & 0x003F); if (bEncode) { *pszOut++ = '%'; *pszOut++ = hexArray[bchar >> 4]; bchar = hexArray[bchar & 0x0F]; } *pszOut++ = bchar; } DEBUG_LEAVE(ERROR_SUCCESS); return ERROR_SUCCESS; } BOOL StringContainsHighAnsi( IN LPSTR pszIn, IN DWORD dwInLen ) /*++ Routine Description: Determine if string contains ANSI characters in range 0x80..0xFF. Search is stopped when we hit the first high-ANSI character, when we hit the terminator or when we have decremented dwInLen to zero Arguments: pszIn - pointer to string to test dwInLen - length of pszIn Return Value: BOOL TRUE - pszIn contains one or more high-ANSI characters FALSE - pszIn (or substring of length dwInLen) does not contain high-ANSI characters --*/ { DEBUG_ENTER((DBG_APP, Bool, "StringContainsHighAnsi", "%.80q, %#x", pszIn, dwInLen )); PProtAssert(pszIn != NULL); PProtAssert(dwInLen != 0); // only need to search the base portion while (dwInLen-- && *pszIn && *pszIn != '?') { if (*pszIn++ & 0x80) { DEBUG_LEAVE(TRUE); return TRUE; } } DEBUG_LEAVE(FALSE); return FALSE; } BOOL StringContainsHighAnsiW( IN LPCWSTR pwzIn ) /*-- Unicode version of StringContainsHighAnsi() --*/ { DEBUG_ENTER((DBG_APP, Bool, "StringContainsHighAnsiW", "%.80wq", pwzIn )); PProtAssert(pwzIn != NULL); // only need to search the base portion while (*pwzIn && *pwzIn != L'?') { if (*pwzIn >= 0x80) { DEBUG_LEAVE(TRUE); return TRUE; } pwzIn++; } DEBUG_LEAVE(FALSE); return FALSE; } BOOL ConvertUnicodeUrl( LPCWSTR pwzFrom, LPSTR pszTo, INT cchTo, DWORD dwCodePage, BOOL fUTF8Enabled, BOOL* pfUTF8Required ) { DEBUG_ENTER((DBG_APP, Bool, "ConvertUnicodeUrl", "%.80wq, %.80q, %#x, %#x, %B", pwzFrom, pszTo, cchTo, dwCodePage, fUTF8Enabled )); BOOL fSuccess = FALSE; // // In multibyte string, if we have any bytes(in the base url portion) // over 0x80, we will have to convert the base portion to utf-8 // (leave the query portion as multi-byte) // // S_FALSE from the conversion above indicates that some wide chars couldn't be // mapped to the destination code page *pfUTF8Required = FALSE; if( fUTF8Enabled && StringContainsHighAnsiW(pwzFrom)) { *pfUTF8Required = TRUE; // utf-8 conversion // // do we have a query portion? (by searching the UNICODE URL string // for '?') also we can get the UNICODED string's BasePortion // URL Length // DWORD dwBaseUrlLen = 0; // BasePortion length (in UNICODE) LPWSTR pBase = (LPWSTR) pwzFrom; while( *pBase && *pBase != '?') { pBase++; dwBaseUrlLen++; } DWORD dwMBQryUrlLen = 0; // QueryPortion length (in Multibyte) LPSTR pszQry = NULL; // multibyte query string if (*pBase) { // // we have a query portion, need to get length of // multi-byte query portion // In this case, we don't care whether or not pszTo is able to convert // everything W2A(pBase, pszTo, cchTo, dwCodePage); dwMBQryUrlLen = strlen(pszTo); pszQry = pszTo; } // // we are converting the base portion of UNICODE URL to UTF-8 // count UTF-8 string length for base url // DWORD dwUTF8Len = CountUnicodeToUtf8(pwzFrom, dwBaseUrlLen, TRUE); // // allocate buffer for whole converted string // Buffer size = UTF8_BaseURL_Len + MultiByte_Query_Len + '\0' // DWORD dwUTFBufferSize = dwUTF8Len + dwMBQryUrlLen + 1; // // the size can not exceed incoming buffer size ccTo // if( dwUTFBufferSize > (DWORD)(cchTo + 1) ) { // // fallback to IE4 behavior - sending multi-byte string // goto cleanup; } char* pszUTF8 = new char[dwUTFBufferSize]; if( !pszUTF8 ) { // // if we failed to allocate, we automatically // fallback to IE4 behavior - sending multi-byte string // goto cleanup; } memset(pszUTF8, 0, dwUTFBufferSize); // Coverting UNICODE->UTF8 DWORD dwError; dwError = ConvertUnicodeToUtf8( pwzFrom, dwBaseUrlLen, (LPBYTE)pszUTF8, dwUTF8Len, TRUE ); if( dwError != ERROR_SUCCESS ) { // // if we failed, delete temp string and fallback to IE4 // behavior - sending multi-byte string // delete [] pszUTF8; goto cleanup; } // // copy over the Multi-byte query string to final buffer // if( pszQry ) { LPSTR pszURL = pszUTF8 + dwUTF8Len; while( dwMBQryUrlLen-- ) { *pszURL = *pszQry; pszURL++; pszQry++; } } // // we are done, copy the content from temp buffer to // szTo // StrCpyN(pszTo, pszUTF8, dwUTFBufferSize); // delete temp utf8 buffer delete [] pszUTF8; fSuccess = TRUE; } cleanup: if (!fSuccess) { // Unicode->Multibyte // IE4 behaviour. Shoot. W2A(pwzFrom, pszTo, cchTo, dwCodePage); } DEBUG_LEAVE(TRUE); return TRUE; }