windows-server-2003/inetcore/urlmon/iapp/curl.cxx

//+---------------------------------------------------------------------------
//
//  Microsoft Windows
//  Copyright (C) Microsoft Corporation, 1992 - 1995.
//
//  File:       curl.cxx
//
//  Contents:   handle url parsing and context urls parsing
//
//  Classes:
//
//  Functions:
//
//  History:    2-20-96   JohannP (Johann Posch)   Created
//
//----------------------------------------------------------------------------
#include <iapp.h>
#include <shlwapi.h>
#include <shlwapip.h>

PerfDbgTag(tagCUrl,    "Urlmon", "Log CUrl",     DEB_PROT);
PerfDbgTag(tagCUrlApi, "Urlmon", "Log CUrl API", DEB_ASYNCAPIS);

//+---------------------------------------------------------------------------
//
//  Function: IsStreamEnabled
//
//  Synopsis: returns TRUE iff the registry key for the mk: protocol is set
//
//  Arguments:  (none)
//
//  Returns:
//
//  History:   6-7-96   craigc          Created
//
//  Notes:
//
//----------------------------------------------------------------------------
extern BOOL g_bGlobalUTF8hackEnabled;
BOOL StringContainsHighAnsiW(LPCWSTR);

BOOL IsStreamEnabled()
{
    DEBUG_ENTER((DBG_APP,
                Bool,
                "IsStreamEnabled",
                NULL
                ));
                
    HKEY    hk;
    char    szBuf[256];
    DWORD   dwType;
    DWORD   dwSize;
    const char    szKey[] = "SOFTWARE\\Microsoft\\Internet Explorer";
    const char    szValue[] = "MkEnabled";
    const char    szYes[] = "yes";

    static BOOL   fChecked = FALSE;
    static BOOL   fEnabled = FALSE;

    CMutexSem mxs;
    CLock lck(mxs);

    if (fChecked)
    {
        DEBUG_LEAVE(fEnabled);
        return fEnabled;
    }

    if (RegOpenKey(HKEY_LOCAL_MACHINE, szKey, &hk) != ERROR_SUCCESS)
    {
        DEBUG_LEAVE(FALSE);
        return( FALSE );
    }
    
    dwSize = sizeof(szBuf);
    if (RegQueryValueEx( hk, szValue, NULL, &dwType, (BYTE*)szBuf, &dwSize ) != ERROR_SUCCESS)
    {
        RegCloseKey( hk );

        DEBUG_LEAVE(FALSE);
        return( FALSE );
    }
    RegCloseKey( hk );

    fEnabled = (dwSize && (lstrcmpi( szYes, szBuf ) == 0));
    fChecked = TRUE;

    DEBUG_LEAVE(fEnabled);
    return fEnabled;
}

//
//  GetUrlScheme() returns one of the URL_SCHEME_* constants as
//  defined in shlwapip.h
//  example "http://foo" returns URL_SCHEME_HTTP
//
DWORD GetUrlScheme(IN LPCTSTR pcszUrl)
{
    DEBUG_ENTER((DBG_APP,
                Dword,
                "GetUrlScheme",
                "%#x",
                pcszUrl
                ));
                
    if(pcszUrl)
    {
        PARSEDURL pu;
        pu.cbSize = sizeof(pu);
        if(SUCCEEDED(ParseURL(pcszUrl, &pu)))
        {
            DEBUG_LEAVE(pu.nScheme);
            return pu.nScheme;
        }
    }

    DEBUG_LEAVE(URL_SCHEME_INVALID);
    return URL_SCHEME_INVALID;
}


//+---------------------------------------------------------------------------
//
//  Method:     CUrl::CUrl
//
//  Synopsis:
//
//  Arguments:  (none)
//
//  Returns:
//
//  History:    2-20-96   JohannP (Johann Posch)   Created
//
//  Notes:
//
//----------------------------------------------------------------------------
CUrl::CUrl()
{
    DEBUG_ENTER((DBG_APP,
                None,
                "CUrl::CUrl",
                "this=%#x",
                this
                ));
                
    PerfDbgLog(tagCUrl, this, "+CUrl::CUrl");

    _pszBaseURL      = NULL;
    _pszPartURL      = NULL;
    _pszFullURL      = NULL;
    _pszProtocol     = NULL;
    _pszServerName   = NULL;
    _pszUserName     = NULL;
    _pszPassword     = NULL;
    _pszObject       = NULL;
    _pBasicAllocUnit = NULL;

    _ipPort = 0;
    _dwProto = 0;

    _fUTF8hack = FALSE;
    _pszUTF8ServerName = NULL;
    _dwServerCodePage = 0;
    
    PerfDbgLog(tagCUrl, this, "-CUrl::CUrl");

    DEBUG_LEAVE(0);
}

BOOL
CUrl::CUrlInitBasic(DWORD dwBaseUrlSize)
{
    DEBUG_ENTER((DBG_APP,
                Bool,
                "CUrl::CUrlInitBasic",
                "this=%#x, %#x",
                this, dwBaseUrlSize
                ));
                
    //
    // basic allocation contains the following fields:  
    //
    //  _pszServerName - URL_FIELD_SIZE
    //  _pszUserName   - URL_FIELD_SIZE
    //  _pszPassword   - URL_FIELD_SIZE
    //  _pszProtocol   - 12
    //  _pszBaseURL    - dwBaseUrlSize + 1
    //
    BOOL fRet = FALSE;
    DWORD dwBasicUnitSize = (3 * (URL_FIELD_SIZE)) + 12 + dwBaseUrlSize + 1;
    _pBasicAllocUnit = new char[dwBasicUnitSize];

    if( _pBasicAllocUnit )
    {
        _pszServerName = _pBasicAllocUnit;
        _pszUserName   = _pszServerName + URL_FIELD_SIZE;
        _pszPassword   = _pszUserName   + URL_FIELD_SIZE;
        _pszProtocol   = _pszPassword   + URL_FIELD_SIZE;
        _pszBaseURL    = _pszProtocol   + 12;

        *_pszServerName = '\0';
        *_pszUserName   = '\0';
        *_pszPassword   = '\0';
        *_pszProtocol   = '\0';
        *_pszBaseURL    = '\0';

        fRet = TRUE;
    }

    DEBUG_LEAVE(fRet);
    return fRet;
}

BOOL
CUrl::CUrlInitAll()
{
    DEBUG_ENTER((DBG_APP,
                Bool,
                "CUrl::CUrlInitAll",
                "this=%#x",
                this
                ));
                
    // backword compatibility init all with URL_MAX_LENGTH
    BOOL fRet = FALSE;
    fRet = CUrlInitBasic(MAX_URL_SIZE);

    if( fRet )
    { 
        _pszPartURL = new char[MAX_URL_SIZE + 1];
        _pszFullURL = new char[MAX_URL_SIZE + 1];
        _pszObject  = new char[MAX_URL_SIZE + 1];
    
        if( !_pszPartURL || !_pszFullURL || !_pszObject )
        {

            if (_pszPartURL)
                delete _pszPartURL;

            if (_pszFullURL)
                delete _pszFullURL;

            if (_pszObject)
                delete _pszObject;

            _pszPartURL = NULL;
            _pszFullURL = NULL;
            _pszObject  = NULL;

            fRet = FALSE; 
        }
        else
        {
            *_pszPartURL = '\0';
            *_pszFullURL = '\0';
            *_pszObject  = '\0';
        }
    }

    DEBUG_LEAVE(fRet);
    return fRet;
}

CUrl::~CUrl()
{
    DEBUG_ENTER((DBG_APP,
                None,
                "CUrl::~CUrl",
                "this=%#x",
                this
                ));
                
    if( _pBasicAllocUnit )
    {
        delete [] _pBasicAllocUnit;
    }
    if( _pszPartURL )
    {
        delete [] _pszPartURL;
    }
    if( _pszFullURL )
    {
        delete [] _pszFullURL;
    }
    if( _pszObject )
    {
        delete [] _pszObject;
    }
    if( _pszUTF8ServerName )
    {
        delete [] _pszUTF8ServerName;
    }

    DEBUG_LEAVE(0);
}

//+---------------------------------------------------------------------------
//
//  Function:   ParseUrl
//
//  Synopsis:   Breaks down a URL and puts servername, objectname and port
//              into the download structure.
//
//  Arguments:
//
//
//  Returns:    TRUE if the URL was successfully parsed.
//
//  History:    Created    Unknown
//              02-20-95   JohannP (Johann Posch)   Created Class
//              03-20-95   JoeS    (Joe Souza)      Special FILE: syntaxes
//
//  Notes:      URL should have already been parsed earlier by ConstructURL.
//              This function will crack the URL.
//
//----------------------------------------------------------------------------
BOOL CUrl::ParseUrl(BOOL fUTF8Required, LPCWSTR pwzUrl, DWORD dwCodePage)
{
    DEBUG_ENTER((DBG_APP,
                Bool,
                "CUrl::ParseUrl",
                "this=%#x, %B",
                this, fUTF8Required
                ));
                
    PerfDbgLog1(tagCUrl, this, "+CUrl::ParseUrl Base:[%s]", _pszBaseURL);
    BOOL            fRet = TRUE;
    URL_COMPONENTS  url;
    DWORD           cchFullURL;
    LPSTR           szTemp;
    CHAR *          pch;
    DWORD           dwFullUrlLen;

    _fUTF8hack = FALSE; //possibly set true later in the function for non-redirect codepath.

    if (_pszPartURL && _pszPartURL[0] != '\0' )  // This string will be set for redirects.
    {
        //
        // we need re-alloc _pszFullURL and _pszObject, since
        // the the size can grow!
        //
        dwFullUrlLen = strlen(_pszBaseURL) + strlen(_pszPartURL) + 1;
        if(dwFullUrlLen > MAX_URL_SIZE)
        {
            dwFullUrlLen = MAX_URL_SIZE + 1;
        } 

        if( _pszFullURL )
        {
            delete [] _pszFullURL;
            _pszFullURL = NULL;
            _pszFullURL = new char[dwFullUrlLen];
        }

        if( _pszObject )
        {
            delete [] _pszObject;
            _pszObject = NULL;
            _pszObject = new char[dwFullUrlLen];
        }

        if( !_pszFullURL || !_pszObject  )
        {
            fRet = FALSE;
            goto Exit;
        }

        cchFullURL = dwFullUrlLen; 

        if(FAILED(UrlCombine(_pszBaseURL, _pszPartURL, _pszFullURL, &cchFullURL, URL_FILE_USE_PATHURL)))
        {
            fRet = FALSE;
            PProtAssert(FALSE && "Combine failed in ParseUrl!\n");

            goto Exit;
        }
    }
    else
    {
        // FullURL is BaseURL
        dwFullUrlLen = strlen(_pszBaseURL) + 1;
        if( !_pszFullURL )
        {
            _pszFullURL = new char[dwFullUrlLen];
        }

        if( !_pszObject )
        {
            _pszObject = new char[dwFullUrlLen];
        }

        if( !_pszFullURL || !_pszObject  )
        {
            fRet = FALSE;
            goto Exit;
        }

        lstrcpy(_pszFullURL, _pszBaseURL);
    }

    // Trim off intra-page link.
    //
    // NB: Don't use ExtraInfo below to do this because you will
    // also lose search string this way.
    //
    // Also, we need to do this before we decode the URL below,
    // so that we don't trim off the wrong '#' if there was one
    // encoded in the URL path.
    //
    //  UrlGetLocation() will intelligently find the fragment
    //  some schemes do not use the # as a fragment identifier.
    //  it returns a pointer to the #
    //

    if(pch = (CHAR *)UrlGetLocation(_pszFullURL))
    {
        *pch = TEXT('\0');
    }


    _dwProto = ProtoFromString(_pszFullURL);

    if (_dwProto == DLD_PROTOCOL_NONE)
    {
        fRet = FALSE;
        goto Exit;
    }

    
    if(DLD_PROTOCOL_FILE == _dwProto)
    {
        //
        // at this point, _pszFullURL and _pszObject should be all
        // allocated with size of dwFullUrlLen
        //
        DWORD cchObject = dwFullUrlLen;

        //do file stuff here
        fRet = SUCCEEDED(PathCreateFromUrl(_pszFullURL, _pszObject, &cchObject, 0));

    }
    else
    {
        //
        //  BUGBUG - InternetCrackUrl alters the original url - zekel - 25-JUL-97
        //  ICU is poorly behaved, and it unescapes the server and username
        //  components insitu regardless of whether it was requested or not
        //  this means that if you pass in http://host%76/, the url on return
        //  is http://hostv76/.  it happens that if you create the URL from the 
        //  components given, you will get the correct URL, but crack doesnt understand
        //  all URLs.  it is too late in the game to change the behavior of ICU,
        //  because wininet internally depends on the behavior.
        //  so our solution is to create a temp buffer that can be messed with
        //  and then throw it away after we are done.
        //
        LPSTR pszTemp = StrDup(_pszFullURL);

        if (pszTemp)
        {
            url.dwStructSize = sizeof(url);

            url.lpszScheme = _pszProtocol;
            url.dwSchemeLength =12;

            url.lpszHostName = _pszServerName;
            url.dwHostNameLength = URL_FIELD_SIZE;

            url.lpszUserName = _pszUserName;
            url.dwUserNameLength = URL_FIELD_SIZE;

            url.lpszPassword = _pszPassword;
            url.dwPasswordLength = URL_FIELD_SIZE;

            url.lpszUrlPath = _pszObject;
            url.dwUrlPathLength = dwFullUrlLen;

            url.lpszExtraInfo = NULL;
            url.dwExtraInfoLength = 0;

            fRet = InternetCrackUrl(pszTemp, 0, (_dwProto == DLD_PROTOCOL_STREAM ? ICU_DECODE : 0), &url);

            _ipPort = url.nPort;

        /*
            Code to pass in an MBCS servername to wininet always when this fix enabled 
            to get around the UTF8-servername bugs. - I-DNS fix.
         */

            if( fUTF8Required 
                && g_bGlobalUTF8hackEnabled
                && fRet
                && ((_dwProto == DLD_PROTOCOL_HTTP) 
                    || (_dwProto == DLD_PROTOCOL_HTTPS))
              )
            {
                DWORD dwHostname = MAX_URL_SIZE;
                WCHAR* pwzHostname = new WCHAR[MAX_URL_SIZE];
                char* pszHostname = new char[MAX_URL_SIZE];
                HRESULT hrTemp;
                BOOL bUsedDefaultChar;

                // This is NOT a loop - just an urlmon-style coding convention to avoid deep if-else-nesting.
                do
                {
                    fRet = FALSE;
                    
                    if (!pwzHostname || !pszHostname)
                    {
                        break;
                    }
                    
                    hrTemp = UrlGetPartW(pwzUrl, pwzHostname, &dwHostname, URL_PART_HOSTNAME, 0);
                    if (FAILED(hrTemp))
                    {
                        break;
                    }

                    if (!StringContainsHighAnsiW(pwzHostname))
                    {
                        // home free! - no high ansi in servername.
                        fRet = TRUE;
                        break;
                    }

                    if (dwCodePage == CP_UTF8)
                    {
                        dwCodePage = GetACP();
                    }
                   
                    //This fix cannot be ported to IE downlevel versions because WC_NO_BEST_FIT_CHARS is not
                    //supported on all OS versions.
                    if (0 == WideCharToMultiByte(dwCodePage, WC_NO_BEST_FIT_CHARS, pwzHostname, -1, 
                                pszHostname, MAX_URL_SIZE, NULL, &bUsedDefaultChar)
                        || bUsedDefaultChar)
                    {
                        fRet = false;
                        break;
                    }
                    
                    // Cache the UTF8 servername if we need it.
                    // This field is set only once ( not on redirects ),
                    // so release only in destructor.
                    
                    _pszUTF8ServerName = new char[url.dwHostNameLength+1];
                    if (!_pszUTF8ServerName)
                    {
                        break;
                    }
                    
                    lstrcpy(_pszUTF8ServerName, _pszServerName);
                    
                    // now clobber it with the MBCS servername
                    
                    //Compat: match side-effect of calling InternetCrackUrl
                    
                    dwHostname = URL_FIELD_SIZE;
                    hrTemp = UrlUnescapeA(pszHostname, _pszServerName, &dwHostname, 0);
                    if(FAILED(hrTemp))
                    {
                        break;
                    }

                    // now put the original _pszFullURL back together with the MBCS servername
                    // instead of the UTF8 servername since wininet will have this.

                    url.lpszHostName = _pszServerName;
                    url.dwHostNameLength = dwHostname;

                    url.lpszUserName = NULL;
                    url.dwUserNameLength = 0;

                    url.lpszPassword = NULL;
                    url.dwPasswordLength = 0;
                    
                    if (!InternetCreateUrl(&url, 0, _pszFullURL, &dwFullUrlLen))
                    {
                        if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
                        {
                            break;
                        }
                        
                        delete [] _pszFullURL;
                        _pszFullURL = new char[++dwFullUrlLen];

                        if (!_pszFullURL || !InternetCreateUrl(&url, 0, _pszFullURL, &dwFullUrlLen))
                        {
                            break;
                        }
                    }

                    // At this point, everything is right.
                    // 1. _pszFullURL has the same url as wininet.
                    // 2. _pszServerName has the MBCS hostname
                    // 3. _pszUTF8ServerName has the UTF8 hostname that would have gone to the proxy (if needed)

                    _dwServerCodePage = dwCodePage;
                    _fUTF8hack = TRUE;

                    fRet = TRUE;
                    break;
                }
                while(TRUE);
                
                if (pwzHostname)
                    delete [] pwzHostname;
                if (pszHostname)
                    delete [] pszHostname;            
            }
                
            LocalFree(pszTemp);
        }
        else
            fRet = FALSE;
    }


Exit:

    PerfDbgLog1(tagCUrl, this, "-CUrl::ParseUrl Full:[%s]", _pszFullURL);

    DEBUG_LEAVE(fRet);
    return(fRet);
}

//+---------------------------------------------------------------------------
//
//  Method:     CUrl::ProtoFromString
//
//  Synopsis:
//
//  Arguments:  [lpszProtocol] --
//
//  Returns:
//
//  History:    Created   Unknown
//              2-20-96   JohannP (Johann Posch)   Modified for class
//
//  Notes:
//
//----------------------------------------------------------------------------
DWORD CUrl::ProtoFromString(LPSTR lpszProtocol)
{
    DEBUG_ENTER((DBG_APP,
                Dword,
                "CUrl::ProtoFromString",
                "this=%#x, %.80q",
                this, lpszProtocol
                ));

    DWORD dwRetVal = DLD_PROTOCOL_NONE;
    PerfDbgLog1(tagCUrl, this, "CUrl::ProtoFromString [%s]", lpszProtocol);

    switch (GetUrlScheme(lpszProtocol))
    {
    case URL_SCHEME_HTTPS:
        dwRetVal = DLD_PROTOCOL_HTTPS;
        break;
    
    case URL_SCHEME_HTTP:
        dwRetVal = DLD_PROTOCOL_HTTP;
        break;

    case URL_SCHEME_FTP:
        dwRetVal = DLD_PROTOCOL_FTP;
        break;

    case URL_SCHEME_GOPHER:
        dwRetVal = DLD_PROTOCOL_GOPHER;
        break;
    
    case URL_SCHEME_FILE:
        dwRetVal = DLD_PROTOCOL_FILE;
        break;

    case URL_SCHEME_LOCAL:
        dwRetVal = DLD_PROTOCOL_LOCAL;
        break;
        
    case URL_SCHEME_MK:
        if(IsStreamEnabled())
            dwRetVal = DLD_PROTOCOL_STREAM;
        break;

    }

    DEBUG_LEAVE(dwRetVal);
    return dwRetVal;
}


// Helper API's

//+---------------------------------------------------------------------------
//
//  Function:   ConstructURL
//
//  Synopsis:
//
//  Arguments:  [pBC] -- Pointer to BindCtx
//              [pURLBase] -- Pointer to Base URL [IN]
//              [pURLRelative] -- Pointer to Relative URL [IN]
//              [pURLFull] -- Pointer to resultant complete URL [OUT]
//
//  Returns:
//
//  History:    02-21-96    JoeS    (Joe Souza)     Created
//
//  Notes:
//
//----------------------------------------------------------------------------
STDAPI ConstructURL(LPBC pBC, LPMONIKER pmkContext, LPMONIKER pmkToLeft,
            LPWSTR pwzURLRelative, LPWSTR pwzURLFull, DWORD cURLSize,
            DWORD dwFlags)
{
    DEBUG_ENTER_API((DBG_API,
                Hresult,
                "ConstructURL",
                "%#x, %#x, %#x, %.80wq, %.80wq, %#x, %#x",
                pBC, pmkContext, pmkToLeft, pwzURLRelative, pwzURLFull, cURLSize, dwFlags
                ));
                
    PerfDbgLog2(tagCUrlApi, NULL, "+ConstructURL (rel:%ws, pmk:%lx)", pwzURLRelative, pmkContext);
    HRESULT     hr = NOERROR;
    DWORD       dwMnk = 0;
    LPMONIKER   pmkCtx = NULL;
    LPWSTR      wzURLBase = NULL;
    WCHAR       wszURLFull[MAX_URL_SIZE + 1];
    DWORD       cchURLFull;
    DWORD       cbSize;
    BOOL        bParseOk = FALSE;
    DWORD       dwCUFlags = URL_FILE_USE_PATHURL;

    if (dwFlags & CU_STANDARD_FORM)
    {
        dwCUFlags = 0;
    }

    if (!pwzURLRelative || !pwzURLFull || !cURLSize)
    {
        hr = E_INVALIDARG;
        goto ConstructExit;
    }

    pwzURLFull[0] = 0;

    if (!pmkContext && pBC)
    {
        // No Context Moniker was specified, so try to get one of those.

        hr = pBC->GetObjectParam(SZ_URLCONTEXT, (IUnknown **)&pmkCtx);
        if (hr != NOERROR)
        {
            pmkCtx = NULL;
        }
    }
    else if (pmkContext)
    {
        pmkCtx = pmkContext;
    }
    else if (pmkToLeft)
    {
        pmkCtx = pmkToLeft;
    }

    if (pmkCtx)
    {
        // There is a Context Moniker.  Make sure it is a URL moniker and
        // if it is, get the base URL from it.

        pmkCtx->IsSystemMoniker(&dwMnk);
        if (dwMnk == MKSYS_URLMONIKER)
        {
            hr = pmkCtx->GetDisplayName(pBC, NULL, &wzURLBase);
        }
    }

    hr = NOERROR;

    if (wzURLBase)
    {
        DWORD dwSizeIn = MAX_URL_SIZE;
        cchURLFull = MAX_URL_SIZE;
        bParseOk = SUCCEEDED(OInetCombineUrl(wzURLBase, pwzURLRelative, dwCUFlags, pwzURLFull, dwSizeIn, &cchURLFull, 0));

    }
    else if (dwFlags & CU_CANONICALIZE)
    {
        DWORD dwSizeIn = MAX_URL_SIZE;
        cchURLFull = MAX_URL_SIZE;
        bParseOk = SUCCEEDED(OInetParseUrl(pwzURLRelative,PARSE_CANONICALIZE, dwCUFlags, pwzURLFull, dwSizeIn,&cchURLFull,0));
    }
    else
    {
        // We did not combine a relative and a base URL, and caller
        // does not want to canonicalize, so we just copy the given URL
        // into the return buffer.
#ifndef unix
        wcsncpy(pwzURLFull, pwzURLRelative, cURLSize / 2);
#else
        wcsncpy(pwzURLFull, pwzURLRelative, cURLSize / sizeof(wchar_t));
#endif /* unix */
        goto ConstructExit;
    }


    if (!bParseOk || !wcslen(pwzURLFull))
    {
        hr = MK_E_SYNTAX;
    }

ConstructExit:

    if (wzURLBase)
    {
        delete wzURLBase;
    }

    PerfDbgLog2(tagCUrlApi, NULL, "-ConstructURL [%ws], hr:%lx", pwzURLFull, hr);

    DEBUG_LEAVE_API(hr);
    return hr;
}


//+---------------------------------------------------------------------------
//
// UTF-8 code from wininet written by RFirth
//
//----------------------------------------------------------------------------
DWORD
CountUnicodeToUtf8(
    IN LPCWSTR pwszIn,
    IN DWORD dwInLen,
    IN BOOL bEncode
    )

/*++

Routine Description:

    Count number of BYTEs required for UTF-8 conversion of UNICODE string. Count
    is terminated after dwInLen characters

Arguments:

    pwszIn  - pointer to input wide-character string

    dwInLen - number of characters in pwszIn

    bEncode - TRUE if we are to hex encode characters >= 0x80

Return Value:

    DWORD   - number of BYTEs required for conversion

--*/

{
    DEBUG_ENTER((DBG_APP,
                Dword,
                "CountUnicodeToUtf8",
                "%.80wq, %#x, %B",
                pwszIn, dwInLen, bEncode
                ));
                
    PProtAssert(pwszIn != NULL);
    PProtAssert(dwInLen != 0);

    DWORD dwCount = 0;
    DWORD oneCharLen = bEncode ? 3 : 1;
    DWORD twoCharLen = 2 * oneCharLen;

    //
    // N.B. code arranged to reduce number of jumps in loop to 1 (while)
    //

    do {

        WORD wchar = *pwszIn++;

        dwCount += (wchar & 0xF800) ? oneCharLen : 0;
        dwCount += ((wchar & 0xFF80) ? 0xFFFFFFFF : 0) & (twoCharLen - 1);
        ++dwCount;
    } while (--dwInLen != 0);

    DEBUG_LEAVE(dwCount);
    return dwCount;
}

DWORD
ConvertUnicodeToUtf8(
    IN LPCWSTR pwszIn,
    IN DWORD dwInLen,
    OUT LPBYTE pszOut,
    IN DWORD dwOutLen,
    IN BOOL bEncode
    )

/*++

Routine Description:

    Convert a string of UNICODE characters to UTF-8:

        0000000000000000..0000000001111111: 0xxxxxxx
        0000000010000000..0000011111111111: 110xxxxx 10xxxxxx
        0000100000000000..1111111111111111: 1110xxxx 10xxxxxx 10xxxxxx

Arguments:

    pwszIn      - pointer to input wide-character string

    dwInLen     - number of CHARACTERS in pwszIn INCLUDING terminating NUL

    pszOut      - pointer to output narrow-character buffer

    dwOutLen    - number of BYTEs in pszOut

    bEncode     - TRUE if we are to hex encode characters >= 0x80

Return Value:

    DWORD
        Success - ERROR_SUCCESS

        Failure - ERROR_INSUFFICIENT_BUFFER
                    Not enough space in pszOut to store results

--*/

{
    DEBUG_ENTER((DBG_APP,
                Dword,
                "CountUnicodeToUtf8",
                "%.80wq, %#x, %#x, %#x, %B",
                pwszIn, dwInLen, pszOut, dwOutLen, bEncode
                ));
                
    PProtAssert(pwszIn != NULL);
    PProtAssert((int)dwInLen > 0);
    PProtAssert(pszOut != NULL);
    PProtAssert((int)dwOutLen > 0);

    DWORD outputSize = bEncode ? 3 : 1;
    static char hexArray[] = "0123456789ABCDEF";

    while (dwInLen-- && dwOutLen) {

        WORD wchar = *pwszIn++;
        BYTE bchar;

        if (wchar <= 0x007F) {
            *pszOut++ = (BYTE)(wchar);
            --dwOutLen;
            continue;
        }

        BYTE lead = ((wchar >= 0x0800) ? 0xE0 : 0xC0);
        int shift = ((wchar >= 0x0800) ? 12 : 6);

        bchar = lead | (BYTE)(wchar >> shift);
        if (bEncode) {
            *pszOut++ = '%';
            *pszOut++ = hexArray[bchar >> 4];
            bchar = hexArray[bchar & 0x0F];
        }
        *pszOut++ = bchar;

        if (wchar >= 0x0800) {
            bchar = 0x80 | (BYTE)((wchar >> 6) & 0x003F);
            if (bEncode) {
                *pszOut++ = '%';
                *pszOut++ = hexArray[bchar >> 4];
                bchar = hexArray[bchar & 0x0F];
            }
            *pszOut++ = bchar;
        }
        
        bchar = 0x80 | (BYTE)(wchar & 0x003F);
        if (bEncode) {
            *pszOut++ = '%';
            *pszOut++ = hexArray[bchar >> 4];
            bchar = hexArray[bchar & 0x0F];
        }
        *pszOut++ = bchar;
    }

    DEBUG_LEAVE(ERROR_SUCCESS);
    return ERROR_SUCCESS;
}

BOOL
StringContainsHighAnsi(
    IN LPSTR pszIn,
    IN DWORD dwInLen
    )

/*++

Routine Description:

    Determine if string contains ANSI characters in range 0x80..0xFF. Search is
    stopped when we hit the first high-ANSI character, when we hit the terminator
    or when we have decremented dwInLen to zero

Arguments:

    pszIn   - pointer to string to test

    dwInLen - length of pszIn

Return Value:

    BOOL
        TRUE    - pszIn contains one or more high-ANSI characters

        FALSE   - pszIn (or substring of length dwInLen) does not contain
                  high-ANSI characters

--*/

{
    DEBUG_ENTER((DBG_APP,
                Bool,
                "StringContainsHighAnsi",
                "%.80q, %#x",
                pszIn, dwInLen
                ));
                
    PProtAssert(pszIn != NULL);
    PProtAssert(dwInLen != 0);

    // only need to search the base portion 
    while (dwInLen-- && *pszIn && *pszIn != '?') {
        if (*pszIn++ & 0x80) {

            DEBUG_LEAVE(TRUE);
            return TRUE;
        }
    }

    DEBUG_LEAVE(FALSE);
    return FALSE;
}


BOOL
StringContainsHighAnsiW(
    IN LPCWSTR pwzIn
    )
/*--
    Unicode version of StringContainsHighAnsi()
--*/
{
    DEBUG_ENTER((DBG_APP,
                Bool,
                "StringContainsHighAnsiW",
                "%.80wq",
                pwzIn
                ));
                
    PProtAssert(pwzIn != NULL);

    // only need to search the base portion 
    while (*pwzIn && *pwzIn != L'?') 
    {
        if (*pwzIn >= 0x80) 
        {
            DEBUG_LEAVE(TRUE);
            return TRUE;
        }
        pwzIn++;
    }

    DEBUG_LEAVE(FALSE);
    return FALSE;
}


BOOL
ConvertUnicodeUrl(
    LPCWSTR pwzFrom, 
    LPSTR   pszTo,
    INT     cchTo,
    DWORD   dwCodePage,
    BOOL    fUTF8Enabled,
    BOOL*   pfUTF8Required
    
    )
{
    DEBUG_ENTER((DBG_APP,
                Bool,
                "ConvertUnicodeUrl",
                "%.80wq, %.80q, %#x, %#x, %B",
                pwzFrom, pszTo, cchTo, dwCodePage, fUTF8Enabled
                ));
                
    BOOL fSuccess = FALSE;

    //
    // In multibyte string, if we have any bytes(in the base url portion)
    // over 0x80, we will have to convert the base portion to utf-8
    // (leave the query portion as multi-byte)
    //
    // S_FALSE from the conversion above indicates that some wide chars couldn't be
    // mapped to the destination code page

    *pfUTF8Required = FALSE;

    if( fUTF8Enabled && StringContainsHighAnsiW(pwzFrom))
    {
        *pfUTF8Required = TRUE;
        // utf-8 conversion

        //
        // do we have a query portion? (by searching the UNICODE URL string
        // for '?') also we can get the UNICODED string's BasePortion 
        // URL Length 
        // 
        DWORD  dwBaseUrlLen = 0;        // BasePortion length (in UNICODE)
        LPWSTR pBase = (LPWSTR) pwzFrom;
        while( *pBase && *pBase != '?')
        {
            pBase++;
            dwBaseUrlLen++;
        }

        DWORD  dwMBQryUrlLen  = 0;      // QueryPortion length (in Multibyte)
        LPSTR  pszQry = NULL;           // multibyte query string 
        if (*pBase)
        {
            //
            // we have a query portion, need to get length of
            // multi-byte query portion
            // In this case, we don't care whether or not pszTo is able to convert 
            // everything
            W2A(pBase, pszTo, cchTo, dwCodePage);
            dwMBQryUrlLen = strlen(pszTo);
            pszQry = pszTo;
        }
        
        //
        // we are converting the base portion of UNICODE URL to UTF-8 
        // count UTF-8 string length for base url
        //
        DWORD dwUTF8Len = CountUnicodeToUtf8(pwzFrom, dwBaseUrlLen, TRUE);

        //
        // allocate buffer for whole converted string 
        // Buffer size = UTF8_BaseURL_Len + MultiByte_Query_Len + '\0'
        //
        DWORD dwUTFBufferSize = dwUTF8Len + dwMBQryUrlLen + 1;

        // 
        // the size can not exceed incoming buffer size ccTo
        //
        if( dwUTFBufferSize > (DWORD)(cchTo + 1) )
        {
            //
            // fallback to IE4 behavior - sending multi-byte string
            //
            goto cleanup;
        }

        char* pszUTF8 = new char[dwUTFBufferSize];
        if( !pszUTF8 )
        {
            //
            // if we failed to allocate, we automatically
            // fallback to IE4 behavior - sending multi-byte string
            //
            goto cleanup;
        }
        memset(pszUTF8, 0, dwUTFBufferSize); 

        // Coverting UNICODE->UTF8
        DWORD dwError;
        dwError = ConvertUnicodeToUtf8( pwzFrom, 
                                        dwBaseUrlLen, 
                                        (LPBYTE)pszUTF8, 
                                        dwUTF8Len,
                                        TRUE );
        if( dwError != ERROR_SUCCESS )
        {
            //
            // if we failed, delete temp string and fallback to IE4
            // behavior - sending multi-byte string
            //
            delete [] pszUTF8;
            goto cleanup;
        } 

        //
        // copy over the Multi-byte query string to final buffer
        //
        if( pszQry )
        {
            LPSTR pszURL = pszUTF8 + dwUTF8Len;       
            while( dwMBQryUrlLen-- )
            {
                *pszURL = *pszQry;
                pszURL++;
                pszQry++;    
            }
        }

        //
        // we are done, copy the content from temp buffer to 
        // szTo 
        //
        StrCpyN(pszTo, pszUTF8, dwUTFBufferSize);

        // delete temp utf8 buffer 
        delete [] pszUTF8;
        fSuccess = TRUE;
    }
    
cleanup:
    if (!fSuccess)
    {
        // Unicode->Multibyte 
        // IE4 behaviour. Shoot.
        W2A(pwzFrom, pszTo, cchTo, dwCodePage);
    }

    DEBUG_LEAVE(TRUE);
    return TRUE;
}