You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2330 lines
67 KiB
2330 lines
67 KiB
/*++
|
|
|
|
Copyright (c) 1995 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
parseurl.cxx
|
|
|
|
Abstract:
|
|
|
|
Contains functions to parse the basic URLs - FTP, Gopher, HTTP.
|
|
|
|
An URL parser simply acts as a macro: it must break out the protocol-specific
|
|
information from the URL and initiate opening the identified resource: all
|
|
this can be accomplished by calling the relevant Internet protocol APIs.
|
|
|
|
Code in this module is based on RFC1738
|
|
|
|
Contents:
|
|
IsValidUrl
|
|
DoesSchemeRequireSlashes
|
|
ParseUrl
|
|
CrackUrl
|
|
EncodeUrlPath
|
|
(HexCharToNumber)
|
|
(NumberToHexChar)
|
|
DecodeUrl
|
|
DecodeUrlInSitu
|
|
DecodeUrlStringInSitu
|
|
GetUrlAddressInfo
|
|
GetUrlAddress
|
|
MapUrlSchemeName
|
|
MapUrlScheme
|
|
MapUrlSchemeToName
|
|
|
|
Author:
|
|
|
|
Richard L Firth (rfirth) 26-Apr-1995
|
|
|
|
Environment:
|
|
|
|
Win32(s) user-mode DLL
|
|
|
|
Revision History:
|
|
|
|
26-Apr-1995
|
|
Created
|
|
|
|
--*/
|
|
|
|
#include <wininetp.h>
|
|
|
|
//
|
|
// private manifests
|
|
//
|
|
|
|
#define RESERVED SAFE
|
|
|
|
//
|
|
// private macros
|
|
//
|
|
|
|
//#define HEX_CHAR_TO_NUMBER(ch) \
|
|
// ((ch <= '9') \
|
|
// ? (ch - '0') \
|
|
// : ((ch >= 'a') \
|
|
// ? ((ch - 'a') + 10) \
|
|
// : ((ch - 'A') + 10)))
|
|
|
|
#define NUMBER_TO_HEX_CHAR(n) \
|
|
(((n) <= 9) ? ((char)(n) + '0') : (((char)(n) - 10) + 'A'))
|
|
|
|
#define IS_UNSAFE_URL_CHARACTER(Char, Scheme) \
|
|
(((UCHAR)(Char) <= 0x20) || ((UCHAR)(Char) >= 0x7f) \
|
|
|| (SafetyList[(Char) - 0x21] & (UNSAFE | Scheme)))
|
|
|
|
#define IS_UNSAFE_URL_WIDECHARACTER(wChar, Scheme) \
|
|
(((WCHAR)(wChar) <= 0x0020) || ((WCHAR)(wChar) >= 0x007f) \
|
|
|| (SafetyList[(wChar) - 0x0021] & (UNSAFE | Scheme)))
|
|
|
|
//
|
|
// private types
|
|
//
|
|
|
|
//
|
|
// private prototypes
|
|
//
|
|
|
|
PRIVATE
|
|
char
|
|
HexCharToNumber(
|
|
IN char ch
|
|
);
|
|
|
|
PRIVATE
|
|
char
|
|
NumberToHexChar(
|
|
IN int Number
|
|
);
|
|
|
|
|
|
//
|
|
// private data
|
|
//
|
|
|
|
//
|
|
// SafetyList - the list of characters above 0x20 and below 0x7f that are
|
|
// classified as safe, unsafe or scheme-specific. Safe characters do not need
|
|
// to be escaped for any URL scheme. Unsafe characters must be escaped for all
|
|
// URL schemes. Scheme-specific characters need only be escaped for the relevant
|
|
// scheme(s)
|
|
//
|
|
|
|
const
|
|
PRIVATE
|
|
UCHAR
|
|
SafetyList[] = {
|
|
|
|
//
|
|
// UNSAFE: 0x00..0x20
|
|
//
|
|
|
|
SAFE | HOSTNAME, // 0x21 (!)
|
|
UNSAFE, // 0x22 (")
|
|
UNSAFE, // 0x23 (#)
|
|
SAFE | HOSTNAME, // 0x24 ($)
|
|
UNSAFE, // 0x25 (%)
|
|
RESERVED | HOSTNAME, // 0x26 (&)
|
|
SAFE | HOSTNAME, // 0x27 (')
|
|
SAFE | HOSTNAME, // 0x28 (()
|
|
SAFE | HOSTNAME, // 0x29 ())
|
|
SAFE | HOSTNAME, // 0x2A (*)
|
|
SCHEME_GOPHER | HOSTNAME, // 0x2B (+)
|
|
SAFE | HOSTNAME, // 0x2C (,)
|
|
SAFE, // 0x2D (-)
|
|
SAFE, // 0x2E (.)
|
|
RESERVED | HOSTNAME, // 0x2F (/)
|
|
SAFE, // 0x30 (0)
|
|
SAFE, // 0x31 (1)
|
|
SAFE, // 0x32 (2)
|
|
SAFE, // 0x33 (3)
|
|
SAFE, // 0x34 (4)
|
|
SAFE, // 0x35 (5)
|
|
SAFE, // 0x36 (6)
|
|
SAFE, // 0x37 (7)
|
|
SAFE, // 0x38 (8)
|
|
SAFE, // 0x39 (9)
|
|
RESERVED | HOSTNAME, // 0x3A (:)
|
|
RESERVED | HOSTNAME, // 0x3B (;)
|
|
UNSAFE, // 0x3C (<)
|
|
RESERVED | HOSTNAME, // 0x3D (=)
|
|
UNSAFE, // 0x3E (>)
|
|
RESERVED | SCHEME_GOPHER | HOSTNAME, // 0x3F (?)
|
|
RESERVED | HOSTNAME, // 0x40 (@)
|
|
SAFE, // 0x41 (A)
|
|
SAFE, // 0x42 (B)
|
|
SAFE, // 0x43 (C)
|
|
SAFE, // 0x44 (D)
|
|
SAFE, // 0x45 (E)
|
|
SAFE, // 0x46 (F)
|
|
SAFE, // 0x47 (G)
|
|
SAFE, // 0x48 (H)
|
|
SAFE, // 0x49 (I)
|
|
SAFE, // 0x4A (J)
|
|
SAFE, // 0x4B (K)
|
|
SAFE, // 0x4C (L)
|
|
SAFE, // 0x4D (M)
|
|
SAFE, // 0x4E (N)
|
|
SAFE, // 0x4F (O)
|
|
SAFE, // 0x50 (P)
|
|
SAFE, // 0x51 (Q)
|
|
SAFE, // 0x42 (R)
|
|
SAFE, // 0x43 (S)
|
|
SAFE, // 0x44 (T)
|
|
SAFE, // 0x45 (U)
|
|
SAFE, // 0x46 (V)
|
|
SAFE, // 0x47 (W)
|
|
SAFE, // 0x48 (X)
|
|
SAFE, // 0x49 (Y)
|
|
SAFE, // 0x5A (Z)
|
|
UNSAFE, // 0x5B ([)
|
|
UNSAFE, // 0x5C (\)
|
|
UNSAFE, // 0x5D (])
|
|
UNSAFE, // 0x5E (^)
|
|
SAFE, // 0x5F (_)
|
|
UNSAFE, // 0x60 (`)
|
|
SAFE, // 0x61 (a)
|
|
SAFE, // 0x62 (b)
|
|
SAFE, // 0x63 (c)
|
|
SAFE, // 0x64 (d)
|
|
SAFE, // 0x65 (e)
|
|
SAFE, // 0x66 (f)
|
|
SAFE, // 0x67 (g)
|
|
SAFE, // 0x68 (h)
|
|
SAFE, // 0x69 (i)
|
|
SAFE, // 0x6A (j)
|
|
SAFE, // 0x6B (k)
|
|
SAFE, // 0x6C (l)
|
|
SAFE, // 0x6D (m)
|
|
SAFE, // 0x6E (n)
|
|
SAFE, // 0x6F (o)
|
|
SAFE, // 0x70 (p)
|
|
SAFE, // 0x71 (q)
|
|
SAFE, // 0x72 (r)
|
|
SAFE, // 0x73 (s)
|
|
SAFE, // 0x74 (t)
|
|
SAFE, // 0x75 (u)
|
|
SAFE, // 0x76 (v)
|
|
SAFE, // 0x77 (w)
|
|
SAFE, // 0x78 (x)
|
|
SAFE, // 0x79 (y)
|
|
SAFE, // 0x7A (z)
|
|
UNSAFE, // 0x7B ({)
|
|
UNSAFE, // 0x7C (|)
|
|
UNSAFE, // 0x7D (})
|
|
UNSAFE // 0x7E (~)
|
|
|
|
//
|
|
// UNSAFE: 0x7F..0xFF
|
|
//
|
|
|
|
};
|
|
|
|
|
|
INT ByteCountForLeadUtf8Byte(char ch)
|
|
{
|
|
static const int aiByteCountForFirstZero[] = {1,1,2,3,4,5,6,1}; // the final 1 shouldn't happen on a proper UTF-8 string
|
|
|
|
DWORD dwFirstZeroBit = 0;
|
|
BYTE chMask = 0x80; // binary 1000 0000
|
|
|
|
// While the mask reveals a non-zero and we haven't counted zeroes past
|
|
//the range of aiByteCountForLeadNibbleInUtf8[], look for a zero.
|
|
while ((char)chMask & ch
|
|
&& dwFirstZeroBit < ARRAY_ELEMENTS(aiByteCountForFirstZero)-1 )
|
|
{
|
|
dwFirstZeroBit++;
|
|
chMask = chMask >> 1;
|
|
}
|
|
|
|
return aiByteCountForFirstZero[dwFirstZeroBit];
|
|
}
|
|
|
|
|
|
LPSTR Utf8StrChr( LPSTR pString, LPSTR pEnd, char chTarget)
|
|
{
|
|
while( pString < pEnd && *pString != '\0')
|
|
{
|
|
if (*pString == chTarget)
|
|
return pString;
|
|
|
|
pString += ByteCountForLeadUtf8Byte(*pString);
|
|
}
|
|
|
|
return NULL;
|
|
};
|
|
|
|
|
|
LPSTR Utf8StrChrEx( LPSTR pString, LPSTR pEnd, char chTarget1, char chTarget2)
|
|
{
|
|
while( pString < pEnd && *pString != '\0')
|
|
{
|
|
if (*pString == chTarget1
|
|
|| *pString == chTarget2)
|
|
{
|
|
return pString;
|
|
}
|
|
|
|
pString += ByteCountForLeadUtf8Byte(*pString);
|
|
}
|
|
|
|
return NULL;
|
|
};
|
|
|
|
|
|
//
|
|
// UrlSchemeList - the list of schemes that we support
|
|
//
|
|
|
|
typedef struct {
|
|
LPSTR SchemeName;
|
|
DWORD SchemeLength;
|
|
INTERNET_SCHEME SchemeType;
|
|
DWORD SchemeFlags;
|
|
BOOL NeedSlashes;
|
|
DWORD OpenFlags;
|
|
} URL_SCHEME_INFO;
|
|
|
|
|
|
const
|
|
PRIVATE
|
|
URL_SCHEME_INFO
|
|
UrlSchemeList[] = {
|
|
NULL, 0, INTERNET_SCHEME_DEFAULT, 0, FALSE, 0,
|
|
"http", 4, INTERNET_SCHEME_HTTP, SCHEME_HTTP, TRUE, 0,
|
|
"https", 5, INTERNET_SCHEME_HTTPS, SCHEME_HTTP, TRUE, WINHTTP_FLAG_SECURE,
|
|
};
|
|
|
|
#define NUMBER_OF_URL_SCHEMES ARRAY_ELEMENTS(UrlSchemeList)
|
|
|
|
BOOL ScanSchemes(LPTSTR pszToCheck, DWORD ccStr, PDWORD pwResult)
|
|
{
|
|
for (DWORD i=0; i<NUMBER_OF_URL_SCHEMES; i++)
|
|
{
|
|
if ((UrlSchemeList[i].SchemeLength == ccStr)
|
|
&& (strnicmp(UrlSchemeList[i].SchemeName, pszToCheck, ccStr)==0))
|
|
{
|
|
*pwResult = i;
|
|
return TRUE;
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
//
|
|
// functions
|
|
//
|
|
|
|
|
|
BOOL
|
|
IsValidUrl(
|
|
IN LPCSTR lpszUrl
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Determines whether an URL has a valid format
|
|
|
|
Arguments:
|
|
|
|
lpszUrl - pointer to URL to check.
|
|
|
|
Assumes: 1. lpszUrl is non-NULL, non-empty string
|
|
|
|
Return Value:
|
|
|
|
BOOL
|
|
|
|
--*/
|
|
|
|
{
|
|
INET_ASSERT(lpszUrl != NULL);
|
|
INET_ASSERT(*lpszUrl != '\0');
|
|
|
|
while (*lpszUrl != '\0') {
|
|
if (IS_UNSAFE_URL_CHARACTER(*lpszUrl, SCHEME_ANY)) {
|
|
return FALSE;
|
|
}
|
|
++lpszUrl;
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
BOOL
|
|
IsValidHostNameW(
|
|
IN LPCWSTR lpwszHostName,
|
|
IN DWORD dwFlags
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Determines whether an hostname has valid chars in it
|
|
|
|
Arguments:
|
|
|
|
lpwszHostName - Pointer to hostname to check. Assumes lpwszHostName
|
|
is non-NULL and points to a non-empty UNICODE string.
|
|
|
|
dwFlags - Flags that modify validation.
|
|
If IVH_DISALLOW_IPV6_SCOPE_ID is set then an IPv6 literal
|
|
address containing a scope ID will be invalid
|
|
|
|
Return Value:
|
|
|
|
BOOL
|
|
|
|
--*/
|
|
|
|
{
|
|
SOCKADDR_IN6 Address;
|
|
INT Error;
|
|
INT AddressLength;
|
|
BOOL bAllowScopeID = ((dwFlags & IVHN_DISALLOW_IPV6_SCOPE_ID) == 0);
|
|
|
|
INET_ASSERT(lpwszHostName != NULL);
|
|
|
|
// first check if this is a valid IPv4 iteral
|
|
|
|
AddressLength = (INT)sizeof(Address);
|
|
Error = _I_WSAStringToAddressW((LPWSTR)lpwszHostName, AF_INET, NULL, (LPSOCKADDR)&Address, &AddressLength);
|
|
|
|
if (Error == 0) {
|
|
return TRUE;
|
|
}
|
|
|
|
// now check if this is a valid IPv6 literal
|
|
|
|
AddressLength = sizeof(Address);
|
|
Error = _I_WSAStringToAddressW((LPWSTR)lpwszHostName, AF_INET6, NULL, (LPSOCKADDR)&Address, &AddressLength);
|
|
|
|
if (Error == 0) {
|
|
|
|
// is an IPv6 literal but we also require surrounding brackets
|
|
|
|
if ((*lpwszHostName == L'[') && (*(lpwszHostName+lstrlenW(lpwszHostName)-1) == L']')) {
|
|
|
|
// check scope ID situation
|
|
|
|
if (bAllowScopeID) {
|
|
return TRUE;
|
|
} else {
|
|
if (Address.sin6_scope_id == 0) {
|
|
return TRUE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// not a literal address so do strict bad character checking
|
|
|
|
while (*lpwszHostName != L'\0') {
|
|
if (IS_UNSAFE_URL_WIDECHARACTER(*lpwszHostName, HOSTNAME)) {
|
|
return FALSE;
|
|
}
|
|
++lpwszHostName;
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
BOOL
|
|
IsValidHostNameA(
|
|
IN LPCSTR lpszHostName,
|
|
IN DWORD dwFlags
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Determines whether an hostname has valid chars in it
|
|
|
|
Arguments:
|
|
|
|
lpszHostName - pointer to Hostname to check.
|
|
|
|
lpszHostName - Pointer to hostname to check. Assumes lpszHostName
|
|
is non-NULL and points to a non-empty ASCII string.
|
|
|
|
dwFlags - Flags that modify validation.
|
|
If IVH_DISALLOW_IPV6_SCOPE_ID is set then an IPv6 literal
|
|
address containing a scope ID will be invalid
|
|
|
|
Return Value:
|
|
|
|
BOOL
|
|
|
|
--*/
|
|
|
|
{
|
|
SOCKADDR_IN6 Address;
|
|
INT Error;
|
|
INT AddressLength;
|
|
BOOL bAllowScopeID = ((dwFlags & IVHN_DISALLOW_IPV6_SCOPE_ID) == 0);
|
|
|
|
INET_ASSERT(lpszHostName != NULL);
|
|
|
|
// first check if this is a valid IPv4 iteral
|
|
|
|
AddressLength = sizeof(Address);
|
|
Error = _I_WSAStringToAddressA((LPSTR)lpszHostName, AF_INET, NULL, (LPSOCKADDR)&Address, &AddressLength);
|
|
|
|
if (Error == 0) {
|
|
return TRUE;
|
|
}
|
|
|
|
// now check if this is a valid IPv6 literal
|
|
|
|
AddressLength = sizeof(Address);
|
|
Error = _I_WSAStringToAddressA((LPSTR)lpszHostName, AF_INET6, NULL, (LPSOCKADDR)&Address, &AddressLength);
|
|
|
|
if (Error == 0) {
|
|
|
|
// is an IPv6 literal but we also require surrounding brackets
|
|
|
|
if ((*lpszHostName == '[') && (*(lpszHostName+lstrlen(lpszHostName)-1) == ']')) {
|
|
|
|
// check scope ID situation
|
|
|
|
if (bAllowScopeID) {
|
|
return TRUE;
|
|
} else {
|
|
if (Address.sin6_scope_id == 0) {
|
|
return TRUE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// not a literal address so do strict bad character checking
|
|
|
|
while (*lpszHostName != '\0') {
|
|
if (IS_UNSAFE_URL_CHARACTER(*lpszHostName, HOSTNAME)) {
|
|
return FALSE;
|
|
}
|
|
++lpszHostName;
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
BOOL
|
|
DoesSchemeRequireSlashes(
|
|
IN LPSTR lpszScheme,
|
|
IN DWORD dwSchemeLength,
|
|
IN BOOL bHasHostName
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Determines whether a protocol scheme requires slashes
|
|
|
|
Arguments:
|
|
|
|
lpszScheme - pointer to protocol scheme in question
|
|
(does not include ':' or slashes, just scheme name)
|
|
|
|
dwUrlLength - if not 0, string length of lpszScheme
|
|
|
|
Return Value:
|
|
|
|
BOOL
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD i;
|
|
|
|
//
|
|
// if dwSchemeLength is 0 then lpszUrl is ASCIIZ. Find its length
|
|
//
|
|
|
|
if (dwSchemeLength == 0) {
|
|
dwSchemeLength = strlen(lpszScheme);
|
|
}
|
|
|
|
if (ScanSchemes(lpszScheme, dwSchemeLength, &i))
|
|
{
|
|
return UrlSchemeList[i].NeedSlashes;
|
|
}
|
|
return bHasHostName;
|
|
}
|
|
|
|
|
|
DWORD
|
|
CrackUrl(
|
|
IN OUT LPSTR lpszUrl,
|
|
IN DWORD dwUrlLength,
|
|
IN BOOL bEscape,
|
|
OUT LPINTERNET_SCHEME lpSchemeType OPTIONAL,
|
|
OUT LPSTR* lpszSchemeName OPTIONAL,
|
|
OUT LPDWORD lpdwSchemeNameLength OPTIONAL,
|
|
OUT LPSTR* lpszHostName OPTIONAL,
|
|
OUT LPDWORD lpdwHostNameLength OPTIONAL,
|
|
IN BOOL fUnescapeHostName,
|
|
OUT LPINTERNET_PORT lpServerPort OPTIONAL,
|
|
OUT LPSTR* lpszUserName OPTIONAL,
|
|
OUT LPDWORD lpdwUserNameLength OPTIONAL,
|
|
OUT LPSTR* lpszPassword OPTIONAL,
|
|
OUT LPDWORD lpdwPasswordLength OPTIONAL,
|
|
OUT LPSTR* lpszUrlPath OPTIONAL,
|
|
OUT LPDWORD lpdwUrlPathLength OPTIONAL,
|
|
OUT LPSTR* lpszExtraInfo OPTIONAL,
|
|
OUT LPDWORD lpdwExtraInfoLength OPTIONAL,
|
|
OUT LPBOOL pHavePort
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Cracks an URL into its constituent parts
|
|
|
|
Assumes: 1. If one of the optional lpsz fields is present (e.g. lpszUserName)
|
|
then the accompanying lpdw field must also be supplied
|
|
|
|
bEscape is no longer used/supported and must always be false.
|
|
|
|
Arguments:
|
|
|
|
lpszUrl - pointer to URL to crack. This buffer WILL BE
|
|
OVERWRITTEN if it contains escape sequences that
|
|
we will convert back to ANSI characters and
|
|
fUnescapeHostName == TRUE
|
|
|
|
dwUrlLength - if not 0, string length of lpszUrl
|
|
|
|
bEscape - TRUE if we are to escape the url-path
|
|
|
|
lpSchemeType - returned scheme type - e.g. INTERNET_SCHEME_HTTP
|
|
|
|
lpszSchemeName - returned scheme name
|
|
|
|
lpdwSchemeNameLength - length of scheme name
|
|
|
|
lpszHostName - returned host name
|
|
|
|
lpdwHostNameLength - length of host name buffer
|
|
|
|
lpServerPort - returned server port if present in the URL, else 0
|
|
|
|
lpszUserName - returned user name if present
|
|
|
|
lpdwUserNameLength - length of user name buffer
|
|
|
|
lpszPassword - returned password if present
|
|
|
|
lpdwPasswordLength - length of password buffer
|
|
|
|
lpszUrlPath - returned, canonicalized URL path
|
|
|
|
lpdwUrlPathLength - length of url-path buffer
|
|
|
|
lpszExtraInfo - returned search string or intra-page link if present
|
|
|
|
lpdwExtraInfoLength - length of extra info buffer
|
|
|
|
pHavePort - returned boolean indicating whether port was specified
|
|
|
|
Return Value:
|
|
|
|
DWORD
|
|
Success - ERROR_SUCCESS
|
|
|
|
Failure - ERROR_WINHTTP_UNRECOGNIZED_SCHEME
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD error = ERROR_WINHTTP_INTERNAL_ERROR;
|
|
DWORD schemeLength;
|
|
INTERNET_SCHEME schemeType;
|
|
LPSTR pCursor, pEnd;
|
|
|
|
if(bEscape)
|
|
{
|
|
INET_ASSERT(!"bEscape==TRUE no longer supported for parseurl.cxx::CrackUrl()");
|
|
error = ERROR_INVALID_PARAMETER;
|
|
goto quit;
|
|
};
|
|
|
|
//
|
|
// if dwUrlLength is 0 then lpszUrl is ASCIIZ. Find its length
|
|
//
|
|
|
|
if (dwUrlLength == 0) {
|
|
dwUrlLength = strlen(lpszUrl);
|
|
}
|
|
|
|
pCursor = lpszUrl;
|
|
pEnd = lpszUrl + dwUrlLength;
|
|
|
|
//
|
|
// extract the scheme (ex: "SCHEME://host/path...")
|
|
//
|
|
|
|
pEnd = Utf8StrChr(pCursor, pEnd, ':');
|
|
if (pEnd == NULL)
|
|
{
|
|
error = ERROR_WINHTTP_UNRECOGNIZED_SCHEME;
|
|
goto quit;
|
|
}
|
|
|
|
schemeLength = (DWORD)(pEnd - pCursor);
|
|
|
|
//
|
|
// We now point to the scheme with pCursor.. extract some info about it
|
|
//
|
|
|
|
DWORD i;
|
|
int skip;
|
|
BOOL needSlashes;
|
|
BOOL haveSlashes;
|
|
|
|
needSlashes = FALSE;
|
|
haveSlashes = FALSE;
|
|
|
|
schemeType = INTERNET_SCHEME_UNKNOWN;
|
|
|
|
if (ScanSchemes(pCursor, schemeLength, &i))
|
|
{
|
|
schemeType = UrlSchemeList[i].SchemeType;
|
|
needSlashes = UrlSchemeList[i].NeedSlashes;
|
|
}
|
|
else
|
|
{
|
|
error = ERROR_WINHTTP_UNRECOGNIZED_SCHEME;
|
|
goto quit;
|
|
}
|
|
|
|
skip = 1; // skip ':'
|
|
|
|
if ((dwUrlLength - schemeLength > 3) && (memcmp(&lpszUrl[schemeLength], "://", 3) == 0))
|
|
{
|
|
skip = 3; // skip "://"
|
|
haveSlashes = TRUE;
|
|
}
|
|
|
|
//
|
|
// If we don't have slashes, make sure we don't need them.
|
|
// If we have slashes, make sure they are required.
|
|
//
|
|
|
|
if( (haveSlashes || needSlashes) && !(haveSlashes && needSlashes))
|
|
{
|
|
error = ERROR_WINHTTP_INVALID_URL;
|
|
goto quit;
|
|
}
|
|
|
|
//
|
|
// We've parsed the scheme, so set up that result.
|
|
//
|
|
if (ARGUMENT_PRESENT(lpSchemeType)) {
|
|
*lpSchemeType = schemeType;
|
|
}
|
|
if (ARGUMENT_PRESENT(lpszSchemeName)) {
|
|
*lpszSchemeName = lpszUrl;
|
|
*lpdwSchemeNameLength = schemeLength;
|
|
}
|
|
|
|
//
|
|
// Now crack the rest of the URL
|
|
//
|
|
lpszUrl += schemeLength + skip;
|
|
dwUrlLength -= schemeLength + skip;
|
|
|
|
error = GetUrlAddress(&lpszUrl,
|
|
&dwUrlLength,
|
|
lpszUserName,
|
|
lpdwUserNameLength,
|
|
lpszPassword,
|
|
lpdwPasswordLength,
|
|
lpszHostName,
|
|
lpdwHostNameLength,
|
|
fUnescapeHostName,
|
|
lpServerPort,
|
|
pHavePort
|
|
);
|
|
|
|
if (error != ERROR_SUCCESS)
|
|
goto quit;
|
|
|
|
if (ARGUMENT_PRESENT(lpszExtraInfo))
|
|
{
|
|
pCursor = Utf8StrChrEx(lpszUrl, lpszUrl+dwUrlLength, '#', '?');
|
|
if (pCursor == NULL)
|
|
pCursor = lpszUrl+dwUrlLength;
|
|
|
|
*lpszExtraInfo = pCursor;
|
|
*lpdwExtraInfoLength = (DWORD)(lpszUrl+dwUrlLength-pCursor);
|
|
dwUrlLength -= *lpdwExtraInfoLength;
|
|
}
|
|
|
|
//
|
|
// If the user didn't ask for the extra info, it is returned appended to the url path.
|
|
//
|
|
if (ARGUMENT_PRESENT(lpszUrlPath))
|
|
{
|
|
*lpszUrlPath = lpszUrl;
|
|
*lpdwUrlPathLength = dwUrlLength;
|
|
}
|
|
|
|
quit:
|
|
|
|
return error;
|
|
}
|
|
|
|
#define DEFAULT_REALLOC_SIZE 1024
|
|
|
|
DWORD
|
|
EncodeUrlPath(
|
|
IN DWORD Flags,
|
|
IN DWORD SchemeFlags,
|
|
IN LPSTR UrlPath,
|
|
IN DWORD UrlPathLength,
|
|
OUT LPSTR* pEncodedUrlPath,
|
|
IN OUT LPDWORD EncodedUrlPathLength
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Encodes an URL-path. That is, escapes the string. Creates a new URL-path in
|
|
which all the 'unsafe' and reserved characters for this scheme have been
|
|
converted to escape sequences
|
|
|
|
Arguments:
|
|
|
|
Flags - controlling expansion
|
|
|
|
SchemeFlags - which scheme we are encoding for -
|
|
SCHEME_HTTP, etc.
|
|
|
|
UrlPath - pointer to the unescaped string
|
|
|
|
UrlPathLength - length of Url
|
|
|
|
EncodedUrlPath - pointer to buffer where encoded URL will be
|
|
written
|
|
|
|
EncodedUrlPathLength - IN: size of EncodedUrlPath
|
|
OUT: number of bytes written to EncodedUrlPath
|
|
|
|
Return Value:
|
|
|
|
DWORD
|
|
Success - ERROR_SUCCESS
|
|
|
|
Failure - ERROR_INSUFFICIENT_BUFFER
|
|
UrlPathLength not large enough to store encoded URL path
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD error;
|
|
DWORD len;
|
|
|
|
len = *EncodedUrlPathLength;
|
|
LPSTR EncodedUrlPath = *pEncodedUrlPath;
|
|
UCHAR ch;
|
|
|
|
UNREFERENCED_PARAMETER(UrlPathLength);
|
|
|
|
while(0 != (ch = (UCHAR)*UrlPath++))
|
|
{
|
|
//
|
|
// check whether this character is safe. For now, we encode all unsafe
|
|
// and scheme-specific characters the same way (i.e. irrespective of
|
|
// scheme)
|
|
//
|
|
// We are allowing '/' to be copied unmodified
|
|
//
|
|
|
|
if (len < 3)
|
|
{
|
|
LPSTR pStr = (LPSTR)REALLOCATE_MEMORY(*pEncodedUrlPath, *EncodedUrlPathLength+DEFAULT_REALLOC_SIZE);
|
|
|
|
if (pStr)
|
|
{
|
|
EncodedUrlPath = pStr+*EncodedUrlPathLength-len;
|
|
*pEncodedUrlPath = pStr;
|
|
len += DEFAULT_REALLOC_SIZE;
|
|
*EncodedUrlPathLength += DEFAULT_REALLOC_SIZE;
|
|
}
|
|
else
|
|
{
|
|
goto error;
|
|
}
|
|
}
|
|
|
|
if (IS_UNSAFE_URL_CHARACTER(ch, SchemeFlags)
|
|
&& !((ch == '/') && (Flags & NO_ENCODE_PATH_SEP)))
|
|
{
|
|
*EncodedUrlPath++ = '%';
|
|
//*EncodedUrlPath++ = NumberToHexChar((int)ch / 16);
|
|
*EncodedUrlPath++ = (CHAR)NUMBER_TO_HEX_CHAR((int)ch / 16);
|
|
//*EncodedUrlPath++ = NumberToHexChar((int)ch % 16);
|
|
*EncodedUrlPath++ = (CHAR)NUMBER_TO_HEX_CHAR((int)ch % 16);
|
|
len -= 2; // extra --len below
|
|
}
|
|
else
|
|
{
|
|
*EncodedUrlPath++ = (signed char)ch;
|
|
}
|
|
--len;
|
|
}
|
|
|
|
*EncodedUrlPath = '\0';
|
|
*EncodedUrlPathLength -= len;
|
|
error = ERROR_SUCCESS;
|
|
|
|
quit:
|
|
return error;
|
|
|
|
error:
|
|
error = ERROR_NOT_ENOUGH_MEMORY;
|
|
goto quit;
|
|
}
|
|
|
|
|
|
PRIVATE
|
|
char
|
|
HexCharToNumber(
|
|
IN char ch
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Converts an ANSI character in the range '0'..'9' 'A'..'F' 'a'..'f' to its
|
|
corresponding hexadecimal value (0..f)
|
|
|
|
Arguments:
|
|
|
|
ch - character to convert
|
|
|
|
Return Value:
|
|
|
|
char
|
|
hexadecimal value of ch, as an 8-bit (signed) character value
|
|
|
|
--*/
|
|
|
|
{
|
|
return (CHAR)((ch <= '9') ? (ch - '0')
|
|
: ((ch >= 'a') ? ((ch - 'a') + 10) : ((ch - 'A') + 10)));
|
|
}
|
|
|
|
|
|
PRIVATE
|
|
char
|
|
NumberToHexChar(
|
|
IN int Number
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Converts a number in the range 0..15 to its ASCII character hex representation
|
|
('0'..'F')
|
|
|
|
Arguments:
|
|
|
|
Number - to convert
|
|
|
|
Return Value:
|
|
|
|
char
|
|
character in above range
|
|
|
|
--*/
|
|
|
|
{
|
|
return (Number <= 9) ? (char)('0' + Number) : (char)('A' + (Number - 10));
|
|
}
|
|
|
|
|
|
DWORD
|
|
DecodeUrl(
|
|
IN LPSTR Url,
|
|
IN DWORD UrlLength,
|
|
OUT LPSTR DecodedString,
|
|
IN OUT LPDWORD DecodedLength
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Converts an URL string with embedded escape sequences (%xx) to a counted
|
|
string
|
|
|
|
It is safe to pass the same pointer for the string to convert, and the
|
|
buffer for the converted results: if the current character is not escaped,
|
|
it just gets overwritten, else the input pointer is moved ahead 2 characters
|
|
further than the output pointer, which is benign
|
|
|
|
Arguments:
|
|
|
|
Url - pointer to URL string to convert
|
|
|
|
UrlLength - number of characters in UrlString
|
|
|
|
DecodedString - pointer to buffer that receives converted string
|
|
|
|
DecodedLength - IN: number of characters in buffer
|
|
OUT: number of characters converted
|
|
|
|
Return Value:
|
|
|
|
DWORD
|
|
Success - ERROR_SUCCESS
|
|
|
|
Failure - ERROR_WINHTTP_INVALID_URL
|
|
UrlString couldn't be converted
|
|
|
|
ERROR_INSUFFICIENT_BUFFER
|
|
ConvertedString isn't large enough to hold all the converted
|
|
UrlString
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD bufferRemaining;
|
|
|
|
bufferRemaining = *DecodedLength;
|
|
while (UrlLength && bufferRemaining) {
|
|
|
|
char ch;
|
|
|
|
if (*Url == '%') {
|
|
|
|
//
|
|
// BUGBUG - would %00 ever appear in an URL?
|
|
//
|
|
|
|
++Url;
|
|
if (isxdigit(*Url)) {
|
|
ch = HexCharToNumber(*Url++) << 4;
|
|
if (isxdigit(*Url)) {
|
|
ch |= HexCharToNumber(*Url++);
|
|
} else {
|
|
return ERROR_WINHTTP_INVALID_URL;
|
|
}
|
|
} else {
|
|
return ERROR_WINHTTP_INVALID_URL;
|
|
}
|
|
UrlLength -= 3;
|
|
} else {
|
|
ch = *Url++;
|
|
--UrlLength;
|
|
}
|
|
*DecodedString++ = ch;
|
|
--bufferRemaining;
|
|
}
|
|
if (UrlLength == 0) {
|
|
*DecodedLength -= bufferRemaining;
|
|
return ERROR_SUCCESS;
|
|
} else {
|
|
return ERROR_INSUFFICIENT_BUFFER;
|
|
}
|
|
}
|
|
|
|
|
|
DWORD
|
|
DecodeUrlInSitu(
|
|
IN LPSTR BufferAddress,
|
|
IN OUT LPDWORD BufferLength
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Decodes an URL string, if it contains escape sequences. The conversion is
|
|
done in place, since we know that a string containing escapes is longer than
|
|
the string with escape sequences (3 bytes) converted to characters (1 byte)
|
|
|
|
Arguments:
|
|
|
|
BufferAddress - pointer to the string to convert
|
|
|
|
BufferLength - IN: number of characters to convert
|
|
OUT: length of converted string
|
|
|
|
Return Value:
|
|
|
|
DWORD
|
|
Success - ERROR_SUCCESS
|
|
|
|
Failure - ERROR_WINHTTP_INVALID_URL
|
|
ERROR_INSUFFICIENT_BUFFER
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD stringLength;
|
|
|
|
stringLength = *BufferLength;
|
|
if (memchr(BufferAddress, '%', stringLength)) {
|
|
return DecodeUrl(BufferAddress,
|
|
stringLength,
|
|
BufferAddress,
|
|
BufferLength
|
|
);
|
|
} else {
|
|
|
|
//
|
|
// no escape character in the string, just return success
|
|
//
|
|
|
|
return ERROR_SUCCESS;
|
|
}
|
|
}
|
|
|
|
|
|
DWORD
|
|
DecodeUrlStringInSitu(
|
|
IN LPSTR BufferAddress,
|
|
IN OUT LPDWORD BufferLength
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Performs DecodeUrlInSitu() on a string and zero terminates it
|
|
|
|
Assumes: 1. Even if no decoding is performed, *BufferLength is large enough
|
|
to fit an extra '\0' character
|
|
|
|
Arguments:
|
|
|
|
BufferAddress - pointer to the string to convert
|
|
|
|
BufferLength - IN: number of characters to convert
|
|
OUT: length of converted string, excluding '\0'
|
|
|
|
Return Value:
|
|
|
|
DWORD
|
|
Success - ERROR_SUCCESS
|
|
|
|
Failure - ERROR_WINHTTP_INVALID_URL
|
|
ERROR_INSUFFICIENT_BUFFER
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD error;
|
|
|
|
error = DecodeUrlInSitu(BufferAddress, BufferLength);
|
|
if (error == ERROR_SUCCESS) {
|
|
BufferAddress[*BufferLength] = '\0';
|
|
}
|
|
return error;
|
|
}
|
|
|
|
|
|
DWORD
|
|
GetUrlAddressInfo(
|
|
IN OUT LPSTR* Url,
|
|
IN OUT LPDWORD UrlLength,
|
|
OUT LPSTR* PartOne,
|
|
OUT LPDWORD PartOneLength,
|
|
OUT LPBOOL PartOneEscape,
|
|
OUT LPSTR* PartTwo,
|
|
OUT LPDWORD PartTwoLength,
|
|
OUT LPBOOL PartTwoEscape
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Given a string of the form foo:bar, splits them into 2 counted strings about
|
|
the ':' character. The address string may or may not contain a ':'.
|
|
|
|
This function is intended to split into substrings the host:port and
|
|
username:password strings commonly used in Internet address specifications
|
|
and by association, in URLs
|
|
|
|
Modified to handle IPv6 literal addresses in URLs surrounded by brackets "[ ]" as per
|
|
RFC 2732. Input of "[foo]:bar" is now considered equivalent to "foo:bar". The brackets
|
|
ARE returned as part of a string and counted.
|
|
|
|
|
|
Arguments:
|
|
|
|
Url - pointer to pointer to string containing URL. On output
|
|
this is advanced past the address parts
|
|
|
|
UrlLength - pointer to length of URL in UrlString. On output this is
|
|
reduced by the number of characters parsed
|
|
|
|
PartOne - pointer which will receive first part of address string
|
|
|
|
PartOneLength - pointer which will receive length of first part of address
|
|
string
|
|
|
|
PartOneEscape - TRUE on output if PartOne contains escape sequences
|
|
|
|
PartTwo - pointer which will receive second part of address string
|
|
|
|
PartTwoLength - pointer which will receive length of second part of address
|
|
string
|
|
|
|
PartOneEscape - TRUE on output if PartTwo contains escape sequences
|
|
|
|
Return Value:
|
|
|
|
DWORD
|
|
Success - ERROR_SUCCESS
|
|
|
|
Failure - ERROR_WINHTTP_INVALID_URL
|
|
|
|
--*/
|
|
|
|
{
|
|
LPSTR pString;
|
|
LPSTR pColon;
|
|
DWORD partLength;
|
|
LPBOOL partEscape;
|
|
DWORD length;
|
|
|
|
//
|
|
// parse out <host>[:<port>] or <name>[:<password>] (i.e. <part1>[:<part2>]
|
|
//
|
|
|
|
pString = *Url;
|
|
pColon = NULL;
|
|
partLength = 0;
|
|
*PartOne = pString;
|
|
*PartOneLength = 0;
|
|
*PartOneEscape = FALSE;
|
|
*PartTwoEscape = FALSE;
|
|
partEscape = PartOneEscape;
|
|
length = *UrlLength;
|
|
|
|
if ((length != 0) && (*pString == '[')) {
|
|
//
|
|
// If the first part starts with a '[' then we assume it's an IPv6
|
|
// literal address and it must be terminated with a ']'.
|
|
//
|
|
// Note we DO NOT output PartOneEscape == TRUE if there is a % in
|
|
// the IPv6 literal address designating a Scope ID.
|
|
//
|
|
*PartOne = pString;
|
|
for (;;) {
|
|
|
|
if(*pString & ~0x7F)
|
|
return ERROR_WINHTTP_INVALID_URL;
|
|
|
|
++partLength;
|
|
++pString;
|
|
--length;
|
|
if (length == 0) {
|
|
return ERROR_WINHTTP_INVALID_URL;
|
|
}
|
|
if (*pString == ']') {
|
|
++partLength;
|
|
break;
|
|
}
|
|
}
|
|
++pString;
|
|
--length;
|
|
//
|
|
// If there's more, then there should be a colon or forward slash
|
|
// We allow http://[addr]/...
|
|
// http://[addr]:port/...
|
|
// not
|
|
// http://[addr]junk/...
|
|
//
|
|
if (length != 0) {
|
|
if ((*pString != ':') &&
|
|
(*pString != '/'))
|
|
return ERROR_WINHTTP_INVALID_URL;
|
|
}
|
|
}
|
|
|
|
while ((*pString != '/') && (*pString != '\0') && (length != 0)) {
|
|
if (*pString == '%') {
|
|
|
|
//
|
|
// if there is a % in the string then it *must* (RFC 1738) be the
|
|
// start of an escape sequence. This function just reports the
|
|
// address of the substrings and their lengths; calling functions
|
|
// must handle the escape sequences (i.e. it is their responsibility
|
|
// to decide where to put the results)
|
|
//
|
|
|
|
*partEscape = TRUE;
|
|
}
|
|
if (*pString == ':') {
|
|
if (pColon != NULL) {
|
|
|
|
//
|
|
// we don't expect more than 1 ':'
|
|
//
|
|
|
|
return ERROR_WINHTTP_INVALID_URL;
|
|
}
|
|
pColon = pString;
|
|
*PartOneLength = partLength;
|
|
if (partLength == 0) {
|
|
*PartOne = NULL;
|
|
}
|
|
partLength = 0;
|
|
partEscape = PartTwoEscape;
|
|
} else {
|
|
++partLength;
|
|
}
|
|
|
|
if(*pString & ~0x7F)
|
|
return ERROR_WINHTTP_INVALID_URL;
|
|
|
|
++pString;
|
|
--length;
|
|
}
|
|
|
|
//
|
|
// we either ended on the host (or user) name or the port number (or
|
|
// password), one of which we don't know the length of
|
|
//
|
|
|
|
if (pColon == NULL) {
|
|
*PartOneLength = partLength;
|
|
*PartTwo = NULL;
|
|
*PartTwoLength = 0;
|
|
*PartTwoEscape = FALSE;
|
|
} else {
|
|
*PartTwoLength = partLength;
|
|
*PartTwo = pColon + 1;
|
|
|
|
//
|
|
// in both the <user>:<password> and <host>:<port> cases, we cannot have
|
|
// the second part without the first, although both parts being zero
|
|
// length is OK (host name will be sorted out elsewhere, but (for now,
|
|
// at least) I am allowing <>:<> for username:password, since I don't
|
|
// see it expressly disallowed in the RFC. I may be revisiting this code
|
|
// later...)
|
|
//
|
|
// N.B.: ftp://ftp.microsoft.com uses http://:0/-http-gw-internal-/menu.gif
|
|
|
|
// if ((*PartOneLength == 0) && (partLength != 0)) {
|
|
// return ERROR_WINHTTP_INVALID_URL;
|
|
// }
|
|
}
|
|
|
|
//
|
|
// update the URL pointer and length remaining
|
|
//
|
|
|
|
*Url = pString;
|
|
*UrlLength = length;
|
|
|
|
return ERROR_SUCCESS;
|
|
}
|
|
|
|
|
|
DWORD
|
|
GetUrlAddress(
|
|
IN OUT LPSTR* lpszUrl,
|
|
OUT LPDWORD lpdwUrlLength,
|
|
OUT LPSTR* lpszUserName OPTIONAL,
|
|
OUT LPDWORD lpdwUserNameLength OPTIONAL,
|
|
OUT LPSTR* lpszPassword OPTIONAL,
|
|
OUT LPDWORD lpdwPasswordLength OPTIONAL,
|
|
OUT LPSTR* lpszHostName OPTIONAL,
|
|
OUT LPDWORD lpdwHostNameLength OPTIONAL,
|
|
IN BOOL fUnescapeHostName,
|
|
OUT LPINTERNET_PORT lpPort OPTIONAL,
|
|
OUT LPBOOL pHavePort
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function extracts any and all parts of the address information for a
|
|
generic URL. If any of the address parts contain escaped characters (%nn)
|
|
then they are converted in situ
|
|
|
|
The generic addressing format (RFC 1738) is:
|
|
|
|
<user>:<password>@<host>:<port>
|
|
|
|
The addressing information cannot contain a password without a user name,
|
|
or a port without a host name
|
|
NB: ftp://ftp.microsoft.com uses URL's that have a port without a host name!
|
|
(e.g. http://:0/-http-gw-internal-/menu.gif)
|
|
|
|
Although only the lpszUrl and lpdwUrlLength fields are required, the address
|
|
parts will be checked for presence and completeness
|
|
|
|
Assumes: 1. If one of the optional lpsz fields is present (e.g. lpszUserName)
|
|
then the accompanying lpdw field must also be supplied
|
|
|
|
Arguments:
|
|
|
|
lpszUrl - IN: pointer to the URL to parse
|
|
OUT: URL remaining after address information
|
|
|
|
N.B. The url-path is NOT canonicalized (unescaped)
|
|
because it may contain protocol-specific information
|
|
which must be parsed out by the protocol-specific
|
|
parser
|
|
|
|
lpdwUrlLength - returned length of the remainder of the URL after the
|
|
address information
|
|
|
|
lpszUserName - returned pointer to the user name
|
|
This parameter can be omitted by those protocol parsers
|
|
that do not require or expect user names in the URL
|
|
|
|
lpdwUserNameLength - returned length of the user name part
|
|
This parameter can be omitted by those protocol parsers
|
|
that do not require or expect user names in the URL
|
|
|
|
lpszPassword - returned pointer to the password
|
|
This parameter can be omitted by those protocol parsers
|
|
that do not require or expect user passwords in the URL
|
|
|
|
lpdwPasswordLength - returned length of the password
|
|
This parameter can be omitted by those protocol parsers
|
|
that do not require or expect user passwords in the URL
|
|
|
|
lpszHostName - returned pointer to the host name
|
|
This parameter can be omitted by those protocol parsers
|
|
that do not require the host name info
|
|
|
|
lpdwHostNameLength - returned length of the host name
|
|
This parameter can be omitted by those protocol parsers
|
|
that do not require the host name info
|
|
|
|
lpPort - returned value of the port field
|
|
This parameter can be omitted by those protocol parsers
|
|
that do not require or expect user port number
|
|
|
|
pHavePort - returned boolean indicating whether a port was specified
|
|
in the URL or not. This value is not returned if the
|
|
lpPort parameter is omitted.
|
|
|
|
Return Value:
|
|
|
|
DWORD
|
|
Success - ERROR_SUCCESS
|
|
|
|
Failure - ERROR_WINHTTP_INVALID_URL
|
|
We could not parse some part of the address info, or we
|
|
found address info where the protocol parser didn't expect
|
|
any
|
|
|
|
ERROR_INSUFFICIENT_BUFFER
|
|
We could not convert an escaped string
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD error = ERROR_WINHTTP_INTERNAL_ERROR;
|
|
|
|
DWORD urlLength;
|
|
LPSTR pUrl;
|
|
BOOL part1Escape;
|
|
BOOL part2Escape;
|
|
char portNumber[INTERNET_MAX_PORT_NUMBER_LENGTH + 1];
|
|
DWORD portNumberLength;
|
|
LPSTR pPortNumber;
|
|
LPSTR hostName;
|
|
DWORD hostNameLength;
|
|
|
|
pUrl = *lpszUrl;
|
|
urlLength = strlen(pUrl);
|
|
|
|
char *pHead, *pTail;
|
|
|
|
//
|
|
// check to see if there is an '@' separating user name & password. If we
|
|
// see a '/' or get to the end of the string before we see the '@' then
|
|
// there is no username:password part
|
|
//
|
|
|
|
char *pAt, *pSlash;
|
|
|
|
pHead = pUrl;
|
|
pTail = pHead + urlLength;
|
|
|
|
pSlash = Utf8StrChr(pHead, pTail, '/');
|
|
if (pSlash == NULL)
|
|
pSlash = pTail;
|
|
pAt = Utf8StrChr(pHead, pSlash, '@');
|
|
|
|
{
|
|
char *pUsername, *pPassword;
|
|
int iUsernameLength, iPasswordLength;
|
|
pUsername = pSlash;
|
|
pPassword = pSlash;
|
|
iUsernameLength = 0;
|
|
iPasswordLength = 0;
|
|
if (pAt != NULL)
|
|
{
|
|
pUsername = pHead;
|
|
pPassword = Utf8StrChr( pUsername, pAt, ':'); // still a ':' ahead of the actual password..
|
|
if (pPassword == NULL)
|
|
pPassword = pAt;
|
|
iUsernameLength = (DWORD)(pPassword - pUsername);
|
|
|
|
if (*pPassword == ':')
|
|
pPassword++;
|
|
|
|
iPasswordLength = (DWORD)(pAt - pPassword);
|
|
|
|
pHead = pAt + 1;
|
|
}
|
|
|
|
if (ARGUMENT_PRESENT(lpszUserName))
|
|
{
|
|
INET_ASSERT(ARGUMENT_PRESENT(lpdwUserNameLength));
|
|
*lpszUserName = pUsername;
|
|
*lpdwUserNameLength = iUsernameLength;
|
|
}
|
|
|
|
if (ARGUMENT_PRESENT(lpszPassword))
|
|
{
|
|
INET_ASSERT(ARGUMENT_PRESENT(lpdwPasswordLength));
|
|
*lpszPassword = pPassword;
|
|
*lpdwPasswordLength = iPasswordLength;
|
|
}
|
|
}
|
|
|
|
//
|
|
// now get the host name and the optional port
|
|
//
|
|
|
|
pUrl = pHead;
|
|
urlLength = (DWORD)(pTail - pHead);
|
|
|
|
pPortNumber = portNumber;
|
|
portNumberLength = sizeof(portNumber);
|
|
error = GetUrlAddressInfo(&pUrl,
|
|
&urlLength,
|
|
&hostName,
|
|
&hostNameLength,
|
|
&part1Escape,
|
|
&pPortNumber,
|
|
&portNumberLength,
|
|
&part2Escape
|
|
);
|
|
if (error != ERROR_SUCCESS)
|
|
goto done;
|
|
|
|
//
|
|
// the URL address information MUST contain the host name
|
|
//
|
|
|
|
if ((hostName == NULL) || (hostNameLength == 0))
|
|
{
|
|
error = ERROR_WINHTTP_INVALID_URL;
|
|
goto done;
|
|
}
|
|
|
|
if (ARGUMENT_PRESENT(lpszHostName))
|
|
{
|
|
INET_ASSERT(ARGUMENT_PRESENT(lpdwHostNameLength));
|
|
|
|
//
|
|
// if the host name contains escaped characters, convert them in situ
|
|
//
|
|
|
|
if (part1Escape && fUnescapeHostName)
|
|
{
|
|
error = DecodeUrlInSitu(hostName, &hostNameLength);
|
|
if (error != ERROR_SUCCESS)
|
|
goto done;
|
|
}
|
|
*lpszHostName = hostName;
|
|
*lpdwHostNameLength = hostNameLength;
|
|
}
|
|
|
|
//
|
|
// if there is a port field, convert it if there are escaped characters,
|
|
// check it for valid numeric characters, and convert it to a number
|
|
//
|
|
|
|
if (portNumberLength != 0)
|
|
{
|
|
DWORD i;
|
|
DWORD port;
|
|
|
|
INET_ASSERT(pPortNumber != NULL);
|
|
|
|
//
|
|
// We can ignore part2Escape because below we detect
|
|
//non-digits in the port.
|
|
//
|
|
|
|
//
|
|
// ensure all characters in the port number buffer are numeric, and
|
|
// calculate the port number at the same time
|
|
//
|
|
|
|
for (i = 0, port = 0; i < portNumberLength; ++i, ++pPortNumber)
|
|
{
|
|
if (!isdigit(*pPortNumber))
|
|
{
|
|
error = ERROR_WINHTTP_INVALID_URL;
|
|
goto done;
|
|
}
|
|
port = port * 10 + (int)(*pPortNumber - '0');
|
|
// We won't allow ports larger than 65535 ((2^16)-1)
|
|
// We have to check this every time to make sure that someone
|
|
// doesn't try to overflow a DWORD.
|
|
if (port > 65535)
|
|
{
|
|
error = ERROR_WINHTTP_INVALID_URL;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
if (ARGUMENT_PRESENT(lpPort))
|
|
*lpPort = (INTERNET_PORT)port;
|
|
if (ARGUMENT_PRESENT(pHavePort))
|
|
*pHavePort = TRUE;
|
|
}
|
|
else
|
|
{
|
|
if (ARGUMENT_PRESENT(lpPort))
|
|
*lpPort = INTERNET_INVALID_PORT_NUMBER;
|
|
if (ARGUMENT_PRESENT(pHavePort))
|
|
*pHavePort = FALSE;
|
|
}
|
|
|
|
//
|
|
// update the URL pointer and the length of the url-path
|
|
//
|
|
|
|
*lpszUrl = pUrl;
|
|
*lpdwUrlLength = urlLength;
|
|
|
|
error = ERROR_SUCCESS;
|
|
|
|
done:
|
|
return error;
|
|
}
|
|
|
|
|
|
INTERNET_SCHEME
|
|
MapUrlSchemeName(
|
|
IN LPSTR lpszSchemeName,
|
|
IN DWORD dwSchemeNameLength
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Maps a scheme name/length to a scheme name type
|
|
|
|
Arguments:
|
|
|
|
lpszSchemeName - pointer to name of scheme to map
|
|
|
|
dwSchemeNameLength - length of scheme (if -1, lpszSchemeName is ASCIZ)
|
|
|
|
Return Value:
|
|
|
|
INTERNET_SCHEME
|
|
|
|
--*/
|
|
|
|
{
|
|
if (dwSchemeNameLength == (DWORD)-1) {
|
|
dwSchemeNameLength = (DWORD)lstrlen(lpszSchemeName);
|
|
}
|
|
|
|
DWORD i;
|
|
if (ScanSchemes(lpszSchemeName, dwSchemeNameLength, &i))
|
|
{
|
|
return UrlSchemeList[i].SchemeType;
|
|
}
|
|
return INTERNET_SCHEME_UNKNOWN;
|
|
}
|
|
|
|
|
|
LPSTR
|
|
MapUrlScheme(
|
|
IN INTERNET_SCHEME Scheme,
|
|
OUT LPDWORD lpdwSchemeNameLength
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Maps the enumerated scheme name type to the name
|
|
|
|
Arguments:
|
|
|
|
Scheme - enumerated scheme type to map
|
|
|
|
lpdwSchemeNameLength - pointer to returned length of scheme name
|
|
|
|
Return Value:
|
|
|
|
LPSTR - pointer to scheme name or NULL
|
|
|
|
--*/
|
|
|
|
{
|
|
if ((Scheme >= INTERNET_SCHEME_FIRST)
|
|
&& (Scheme <= INTERNET_SCHEME_LAST))
|
|
{
|
|
*lpdwSchemeNameLength = UrlSchemeList[Scheme].SchemeLength;
|
|
return UrlSchemeList[Scheme].SchemeName;
|
|
}
|
|
*lpdwSchemeNameLength = 0;
|
|
return NULL;
|
|
}
|
|
|
|
|
|
LPSTR
|
|
MapUrlSchemeToName(
|
|
IN INTERNET_SCHEME Scheme
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Maps the enumerated scheme name type to the name
|
|
|
|
Arguments:
|
|
|
|
Scheme - enumerated scheme type to map
|
|
|
|
Return Value:
|
|
|
|
LPSTR - pointer to scheme name or NULL
|
|
|
|
--*/
|
|
|
|
{
|
|
if ((Scheme >= INTERNET_SCHEME_FIRST)
|
|
&& (Scheme <= INTERNET_SCHEME_LAST)) {
|
|
return UrlSchemeList[Scheme].SchemeName;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
|
|
|
|
//
|
|
//
|
|
// UnsafeInPathAndQueryFlags flag in table set to 1 if symbol is unsafe for path or query
|
|
// question mark treated as safe
|
|
// this table is fater then SafetyList because it requires no substraction and no masking
|
|
// and only one bound checking to access it
|
|
//
|
|
//
|
|
const
|
|
PRIVATE
|
|
BYTE
|
|
UnsafeInPathAndQueryFlags[128] = {
|
|
// 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
|
|
// xx xx xx xx xx xx xx xx xx xx xx xx xx xx xx xx
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
// 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
|
|
// xx xx xx xx xx xx xx xx xx xx xx xx xx xx xx xx
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
// 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
|
|
// ! " # $ % & ' ( ) * + , - . /
|
|
1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
// 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
|
|
// 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
|
|
|
|
// 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
|
|
// @ A B C D E F G H I J K L M N O
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
// 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
|
|
// P Q R S T U V W X Y Z [ \ ] ^ _
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
|
|
|
|
// 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
|
|
// ` a b c d e f g h i j k l m n o
|
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
// 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
|
|
// p q r s t u v w x y z { | } ~ xx
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
//
|
|
// ADD_HEX_TO_STRING adds ch in "%hh" format to a given string and increases string ptr
|
|
// for use inside ConvertUnicodeToMultiByte only
|
|
//
|
|
//
|
|
#define ADD_HEX_TO_STRING(pStr, ch) \
|
|
{ UCHAR c = (UCHAR)(ch);\
|
|
*pStr++ = '%'; \
|
|
*pStr++ = hexArray[c>>4]; \
|
|
*pStr++ = hexArray[c & 0x0f]; \
|
|
}
|
|
//#define ADD_HEX_TO_STRING(pStr, ch) \
|
|
// { UCHAR c = (UCHAR)ch; *(DWORD*)pStr = (DWORD)'%' + ((DWORD)(hexArray[c>>4]) << 8) + ((DWORD)(hexArray[c & 0x0f]) << 16); \
|
|
// pStr += 3; }
|
|
|
|
|
|
|
|
/*
|
|
* ConvertUnicodeToMultiByte:
|
|
*
|
|
|
|
dwFlags:
|
|
|
|
WINHTTP_FLAG_VALID_HOSTNAME only for server name; fast conversion is performed, no escaping
|
|
WINHTTP_FLAG_NULL_CODEPAGE assumes string contains only ASCII chars, fast conversion is performed
|
|
WINHTTP_FLAG_ESCAPE_PERCENT if escaping enabled, escape percent as well
|
|
WINHTTP_FLAG_ESCAPE_DISABLE disable escaping (if WINHTTP_FLAG_VALID_HOSTNAME not set)
|
|
WINHTTP_FLAG_ESCAPE_DISABLE_QUERY if escaping enabled escape path part, but do not escape query
|
|
|
|
*/
|
|
|
|
DWORD
|
|
ConvertUnicodeToMultiByte(
|
|
LPCWSTR lpszObjectName,
|
|
DWORD dwCodePage,
|
|
MEMORYPACKET* pmp,
|
|
DWORD dwFlags)
|
|
{
|
|
static CHAR* hexArray = "0123456789ABCDEF";
|
|
|
|
DWORD dwError = ERROR_SUCCESS;
|
|
BOOL bPureAscii = TRUE;
|
|
BOOL bTreatPercentAsSafe = (dwFlags & WINHTTP_FLAG_ESCAPE_PERCENT) ? FALSE : TRUE;
|
|
BOOL bNeedEscaping = (dwFlags & WINHTTP_FLAG_ESCAPE_DISABLE) ? FALSE : TRUE;
|
|
BOOL bEscapeQuery = (dwFlags & WINHTTP_FLAG_ESCAPE_DISABLE_QUERY) ? FALSE : TRUE;
|
|
|
|
//determine size of string and/or safe characters
|
|
DWORD dwUnsafeChars = 0;
|
|
DWORD dwUnicodeUrlSize;
|
|
|
|
if (dwFlags & WINHTTP_FLAG_VALID_HOSTNAME)
|
|
{
|
|
bNeedEscaping = FALSE;
|
|
|
|
if (!IsValidHostNameW(lpszObjectName, 0)) { // 0 == allow v6 literal scope ids
|
|
dwError = ERROR_WINHTTP_INVALID_URL;
|
|
goto done;
|
|
}
|
|
|
|
dwUnicodeUrlSize = lstrlenW(lpszObjectName)+1;
|
|
|
|
}
|
|
else if ((dwFlags & WINHTTP_FLAG_NULL_CODEPAGE) && !bNeedEscaping)
|
|
{
|
|
//if no escaping needed there is no need to calcaulate num of unsafe char
|
|
dwUnicodeUrlSize = lstrlenW(lpszObjectName)+1;
|
|
}
|
|
else
|
|
{
|
|
// optimization to check for unsafe characters, and optimize the common case.
|
|
// calculate the length, and while parsing the string, check if there are unsafeChars
|
|
PCWSTR pwStr;
|
|
|
|
if (bTreatPercentAsSafe)
|
|
for(pwStr = lpszObjectName; *pwStr; ++pwStr)
|
|
{
|
|
UINT16 wc = *pwStr;
|
|
if (wc <= 0x7f)
|
|
{
|
|
if (UnsafeInPathAndQueryFlags[wc] && (wc != L'%'))
|
|
++dwUnsafeChars;
|
|
}
|
|
else
|
|
{
|
|
bPureAscii = FALSE;
|
|
++dwUnsafeChars;
|
|
}
|
|
}
|
|
else
|
|
for(pwStr = lpszObjectName; *pwStr; ++pwStr)
|
|
{
|
|
UINT16 wc = *pwStr;
|
|
if (wc <= 0x7f)
|
|
{
|
|
if (UnsafeInPathAndQueryFlags[wc])
|
|
++dwUnsafeChars;
|
|
}
|
|
else
|
|
{
|
|
bPureAscii = FALSE;
|
|
++dwUnsafeChars;
|
|
}
|
|
}
|
|
|
|
dwUnicodeUrlSize = (DWORD)(pwStr-lpszObjectName+1);
|
|
}
|
|
|
|
//convert to MBCS
|
|
if (bPureAscii)
|
|
{
|
|
pmp->dwAlloc = dwUnicodeUrlSize;
|
|
if (bNeedEscaping)
|
|
pmp->dwAlloc += 2 * dwUnsafeChars;
|
|
|
|
pmp->psStr = (LPSTR)ALLOCATE_FIXED_MEMORY(pmp->dwAlloc);
|
|
|
|
if (!pmp->psStr)
|
|
{
|
|
pmp->dwAlloc = 0;
|
|
dwError = ERROR_NOT_ENOUGH_MEMORY;
|
|
goto done;
|
|
}
|
|
|
|
PSTR pStr = pmp->psStr;
|
|
if (bNeedEscaping)
|
|
{
|
|
UCHAR chPercent = bTreatPercentAsSafe ? (UCHAR)'%' : (UCHAR)0;
|
|
|
|
if (bEscapeQuery)
|
|
for (; *lpszObjectName; ++lpszObjectName)
|
|
{
|
|
UCHAR ch = (UCHAR)*lpszObjectName;
|
|
if (!UnsafeInPathAndQueryFlags[ch] || (ch == chPercent))
|
|
*pStr++ = ch;
|
|
else
|
|
{
|
|
ADD_HEX_TO_STRING (pStr, ch)
|
|
}
|
|
}
|
|
else
|
|
for (; *lpszObjectName && (*lpszObjectName != L'?'); ++lpszObjectName)
|
|
{
|
|
UCHAR ch = (UCHAR)*lpszObjectName;
|
|
if (!UnsafeInPathAndQueryFlags[ch] || ch == chPercent)
|
|
*pStr++ = ch;
|
|
else
|
|
{
|
|
ADD_HEX_TO_STRING (pStr, ch)
|
|
}
|
|
}
|
|
}
|
|
|
|
for (; *lpszObjectName; ++lpszObjectName)
|
|
*pStr++ = (CHAR)*lpszObjectName;
|
|
*pStr = '\0';
|
|
|
|
pmp->dwSize = (DWORD)(pStr - pmp->psStr);
|
|
}
|
|
else if (dwCodePage == CP_UTF8)
|
|
{
|
|
//converts to UTF8 and performs escaping at same time
|
|
pmp->dwAlloc = dwUnicodeUrlSize + (bNeedEscaping ? 8 : 2) * dwUnsafeChars; //yep, some extra allocation possible
|
|
|
|
pmp->psStr = (LPSTR)ALLOCATE_FIXED_MEMORY(pmp->dwAlloc);
|
|
|
|
if (!pmp->psStr)
|
|
{
|
|
pmp->dwAlloc = 0;
|
|
dwError = ERROR_NOT_ENOUGH_MEMORY;
|
|
goto done;
|
|
}
|
|
|
|
PSTR pStr = pmp->psStr;
|
|
|
|
if (bNeedEscaping)
|
|
{
|
|
WCHAR wcPercent = bTreatPercentAsSafe ? L'%' : (WCHAR)0;
|
|
WCHAR wcQMark = bEscapeQuery ? (WCHAR)0 : L'?';
|
|
|
|
for (; *lpszObjectName && (*lpszObjectName != wcQMark); ++lpszObjectName)
|
|
{
|
|
UINT16 wc = *lpszObjectName;
|
|
if (wc <= 0x007f) // encode to one byte
|
|
{
|
|
if (!UnsafeInPathAndQueryFlags[wc] || wc == wcPercent)
|
|
*pStr++ = (CHAR)wc;
|
|
else
|
|
{
|
|
ADD_HEX_TO_STRING (pStr, wc)
|
|
}
|
|
}
|
|
else if (wc <= 0x07FF) //encode to two bytes
|
|
{
|
|
ADD_HEX_TO_STRING (pStr, 0xC0 | (wc >> 6))
|
|
ADD_HEX_TO_STRING (pStr, 0x80 | (wc & 0x3F))
|
|
}
|
|
else //encode to three bytes
|
|
{
|
|
ADD_HEX_TO_STRING (pStr, 0xe0 | (wc >> 12))
|
|
ADD_HEX_TO_STRING (pStr, 0x80 | ((wc >> 6) & 0x3F))
|
|
ADD_HEX_TO_STRING (pStr, 0x80 | (wc & 0x3F))
|
|
}
|
|
}
|
|
}
|
|
|
|
for (; *lpszObjectName; ++lpszObjectName)
|
|
{
|
|
UINT16 wc = *lpszObjectName;
|
|
if (wc <= 0x007f) // encode to one byte
|
|
{
|
|
*pStr++ = (CHAR)wc;
|
|
}
|
|
else if (wc <= 0x07FF) //encode to two bytes
|
|
{
|
|
*pStr++ = (CHAR)(0xC0 | (wc >> 6));
|
|
*pStr++ = (CHAR)(0x80 | (wc & 0x3F));
|
|
//*(WORD*)pStr = (WORD)0x80C0 | (wc >> 6) | ((wc & 0x3F) << 8);
|
|
//pStr += 2;
|
|
}
|
|
else //encode to three bytes
|
|
{
|
|
*pStr++ = (CHAR)(0xe0 | (wc >> 12));
|
|
*pStr++ = (CHAR)(0x80 | ((wc >> 6) & 0x3F));
|
|
*pStr++ = (CHAR)(0x80 | (wc & 0x3F));
|
|
//DWORD tmp = 0x8080e0 | (wc >> 12) | ((wc << 2) & 0x3f00) | (((DWORD)wc << 16) & 0x3f0000);
|
|
//*(DWORD*)pStr = tmp;
|
|
//pStr += 3;
|
|
}
|
|
}
|
|
|
|
*pStr = '\0';
|
|
|
|
pmp->dwSize = (DWORD)(pStr - pmp->psStr);
|
|
}
|
|
else
|
|
{
|
|
//last and final, so not to loose perf don't set dwCodePage to values other then CP_UTF8 :)
|
|
// convert with WideCharToMultiByte()
|
|
|
|
pmp->dwAlloc = WideCharToMultiByte(dwCodePage, 0, lpszObjectName, dwUnicodeUrlSize, NULL, 0, NULL, NULL);
|
|
if (!pmp->dwAlloc)
|
|
{
|
|
dwError = GetLastError();
|
|
goto done;
|
|
}
|
|
|
|
pmp->psStr = (LPSTR)ALLOCATE_FIXED_MEMORY(pmp->dwAlloc);
|
|
|
|
if (!pmp->psStr)
|
|
{
|
|
pmp->dwAlloc = 0;
|
|
dwError = ERROR_NOT_ENOUGH_MEMORY;
|
|
goto done;
|
|
}
|
|
|
|
//find out if query is present
|
|
PCHAR pchQMInConverted = NULL;
|
|
DWORD dwQuerySize;
|
|
if (bNeedEscaping)
|
|
{
|
|
WCHAR* pQM = wcschr(lpszObjectName, L'?');
|
|
if (pQM)
|
|
{
|
|
DWORD dwPathSize = 0;
|
|
if (pQM != lpszObjectName)
|
|
{
|
|
dwPathSize = WideCharToMultiByte(dwCodePage, 0, lpszObjectName, (DWORD)(pQM - lpszObjectName), pmp->psStr, pmp->dwAlloc, NULL, NULL);
|
|
|
|
if (!dwPathSize)
|
|
{
|
|
dwError = GetLastError();
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
dwQuerySize = WideCharToMultiByte(dwCodePage, 0, pQM, dwUnicodeUrlSize - (DWORD)(pQM - lpszObjectName), pmp->psStr + dwPathSize, pmp->dwAlloc - dwPathSize, NULL, NULL);
|
|
|
|
if (!dwQuerySize)
|
|
{
|
|
dwError = GetLastError();
|
|
goto done;
|
|
}
|
|
|
|
--dwQuerySize;
|
|
|
|
pmp->dwSize = dwPathSize + dwQuerySize;
|
|
pchQMInConverted = pmp->psStr + dwPathSize;
|
|
}
|
|
}
|
|
|
|
if (!pchQMInConverted)
|
|
{
|
|
pmp->dwSize = WideCharToMultiByte(dwCodePage, 0, lpszObjectName, dwUnicodeUrlSize, pmp->psStr, pmp->dwAlloc, NULL, NULL);
|
|
|
|
if (!pmp->dwSize)
|
|
{
|
|
dwError = GetLastError();
|
|
goto done;
|
|
}
|
|
else
|
|
--(pmp->dwSize);
|
|
}
|
|
|
|
if (bNeedEscaping)
|
|
{
|
|
//collect information about code page
|
|
DWORD dwCharSize = 1;
|
|
|
|
if (dwCodePage != CP_UTF7)
|
|
{
|
|
CPINFO CPInfo;
|
|
if (!GetCPInfo(dwCodePage, &CPInfo))
|
|
{
|
|
dwError = GetLastError();
|
|
goto done;
|
|
}
|
|
dwCharSize = CPInfo.MaxCharSize;
|
|
}
|
|
|
|
UCHAR chPercent = bTreatPercentAsSafe ? '%' : (UCHAR)0;
|
|
|
|
if (dwCharSize == 1)
|
|
{
|
|
dwUnsafeChars = 0;
|
|
|
|
//calculate number of unsafe chars
|
|
PSTR pStop = pchQMInConverted ? pchQMInConverted : (pmp->psStr + pmp->dwSize);
|
|
|
|
PSTR pStr = pmp->psStr;
|
|
//this loop counts unsafe chars in path, count '?' as well
|
|
for(; pStr != pStop; ++pStr)
|
|
{
|
|
UCHAR ch = *pStr;
|
|
if ((ch > 0x7F) || (UnsafeInPathAndQueryFlags[ch] && (ch != chPercent)) || (ch == '?'))
|
|
++dwUnsafeChars;
|
|
}
|
|
//this loop counts unsafe chars in query, do not count '?'
|
|
for(; *pStr; ++pStr)
|
|
{
|
|
UCHAR ch = *pStr;
|
|
if ((ch > 0x7F) || (UnsafeInPathAndQueryFlags[ch] && (ch != chPercent)))
|
|
++dwUnsafeChars;
|
|
}
|
|
|
|
if (dwUnsafeChars == 0)
|
|
goto done;
|
|
|
|
//make new allocation
|
|
DWORD dwNewAlloc = pmp->dwAlloc + dwUnsafeChars*2;
|
|
LPSTR pDest, pNewStr;
|
|
pNewStr = pDest = (LPSTR)ALLOCATE_FIXED_MEMORY(dwNewAlloc);
|
|
if (!pDest)
|
|
{
|
|
dwError = ERROR_NOT_ENOUGH_MEMORY;
|
|
goto done;
|
|
}
|
|
|
|
//escaping
|
|
|
|
//escape path part
|
|
pStr = pmp->psStr;
|
|
for(; pStr != pStop; ++pStr)
|
|
{
|
|
UCHAR ch = *pStr;
|
|
if ((ch <= 0x7F) && ((!UnsafeInPathAndQueryFlags[ch] && (ch != '?')) || (ch == chPercent)))
|
|
*pDest++ = ch;
|
|
else
|
|
{
|
|
ADD_HEX_TO_STRING (pDest, ch)
|
|
}
|
|
}
|
|
//escape query part
|
|
for(; *pStr; ++pStr)
|
|
{
|
|
UCHAR ch = *pStr;
|
|
if ((ch <= 0x7F) && (!UnsafeInPathAndQueryFlags[ch] || (ch == chPercent)))
|
|
*pDest++ = ch;
|
|
else
|
|
{
|
|
ADD_HEX_TO_STRING (pDest, ch)
|
|
}
|
|
}
|
|
*pDest = '\0';
|
|
|
|
FREE_FIXED_MEMORY(pmp->psStr);
|
|
pmp->psStr = pNewStr;
|
|
pmp->dwSize = (DWORD)(pDest-pNewStr);
|
|
pmp->dwAlloc = dwNewAlloc;
|
|
}
|
|
else
|
|
{
|
|
//well, string is mbcs
|
|
|
|
dwUnsafeChars = 0;
|
|
|
|
//calculate number of unsafe chars
|
|
PSTR pStop = pchQMInConverted ? pchQMInConverted : (pmp->psStr + pmp->dwSize);
|
|
|
|
PSTR pStr = pmp->psStr;
|
|
|
|
//this loop counts unsafe chars in path, count '?' as well
|
|
while (pStr != pStop)
|
|
{
|
|
UCHAR ch = *pStr;
|
|
if (IsDBCSLeadByteEx(dwCodePage, ch))
|
|
{
|
|
//do not allow percent here
|
|
if ((ch > 0x7F) || UnsafeInPathAndQueryFlags[ch] || (ch == '?'))
|
|
++dwUnsafeChars;
|
|
++pStr;
|
|
ch = *pStr;
|
|
if ((ch > 0x7F) || UnsafeInPathAndQueryFlags[ch] || (ch == '?'))
|
|
++dwUnsafeChars;
|
|
++pStr;
|
|
}
|
|
else
|
|
{
|
|
if ((ch > 0x7F) || (UnsafeInPathAndQueryFlags[ch] && (ch != chPercent)) || (ch == '?'))
|
|
++dwUnsafeChars;
|
|
++pStr;
|
|
}
|
|
}
|
|
//this loop counts unsafe chars in query, do not count '?'
|
|
while(*pStr)
|
|
{
|
|
UCHAR ch = *pStr;
|
|
if (IsDBCSLeadByteEx(dwCodePage, ch))
|
|
{
|
|
//do not allow percent here
|
|
if ((ch > 0x7F) || UnsafeInPathAndQueryFlags[ch])
|
|
++dwUnsafeChars;
|
|
++pStr;
|
|
ch = *pStr;
|
|
if ((ch > 0x7F) || UnsafeInPathAndQueryFlags[ch])
|
|
++dwUnsafeChars;
|
|
++pStr;
|
|
}
|
|
else
|
|
{
|
|
if ((ch > 0x7F) || (UnsafeInPathAndQueryFlags[ch] && (ch != chPercent)))
|
|
++dwUnsafeChars;
|
|
++pStr;
|
|
}
|
|
}
|
|
|
|
if (dwUnsafeChars == 0)
|
|
goto done;
|
|
|
|
//make new allocation
|
|
DWORD dwNewAlloc = pmp->dwAlloc + dwUnsafeChars*2;
|
|
LPSTR pDest, pNewStr;
|
|
pNewStr = pDest = (LPSTR)ALLOCATE_FIXED_MEMORY(dwNewAlloc);
|
|
if (!pDest)
|
|
{
|
|
dwError = ERROR_NOT_ENOUGH_MEMORY;
|
|
goto done;
|
|
}
|
|
|
|
//escaping
|
|
|
|
//escape path part
|
|
pStr = pmp->psStr;
|
|
while (pStr != pStop)
|
|
{
|
|
UCHAR ch = *pStr;
|
|
if (IsDBCSLeadByteEx(dwCodePage, ch))
|
|
{
|
|
//do not allow percent here
|
|
if ((ch <= 0x7F) && !UnsafeInPathAndQueryFlags[ch] && (ch != '?'))
|
|
*pDest++ = ch;
|
|
else
|
|
{
|
|
ADD_HEX_TO_STRING (pDest, ch)
|
|
}
|
|
++pStr;
|
|
ch = *pStr;
|
|
if ((ch <= 0x7F) && !UnsafeInPathAndQueryFlags[ch] && (ch != '?'))
|
|
*pDest++ = ch;
|
|
else
|
|
{
|
|
ADD_HEX_TO_STRING (pDest, ch)
|
|
}
|
|
++pStr;
|
|
}
|
|
else
|
|
{
|
|
if ((ch <= 0x7F) && ((!UnsafeInPathAndQueryFlags[ch] && (ch != '?')) || (ch == chPercent)))
|
|
*pDest++ = ch;
|
|
else
|
|
{
|
|
ADD_HEX_TO_STRING (pDest, ch)
|
|
}
|
|
++pStr;
|
|
}
|
|
}
|
|
|
|
//escape query part
|
|
while (*pStr)
|
|
{
|
|
UCHAR ch = *pStr;
|
|
if (IsDBCSLeadByteEx(dwCodePage, ch))
|
|
{
|
|
//do not allow percent here
|
|
if ((ch <= 0x7F) && !UnsafeInPathAndQueryFlags[ch])
|
|
*pDest++ = ch;
|
|
else
|
|
{
|
|
ADD_HEX_TO_STRING (pDest, ch)
|
|
}
|
|
++pStr;
|
|
ch = *pStr;
|
|
if ((ch <= 0x7F) && !UnsafeInPathAndQueryFlags[ch])
|
|
*pDest++ = ch;
|
|
else
|
|
{
|
|
ADD_HEX_TO_STRING (pDest, ch)
|
|
}
|
|
++pStr;
|
|
}
|
|
else
|
|
{
|
|
if ((ch <= 0x7F) && (!UnsafeInPathAndQueryFlags[ch] || (ch == chPercent)))
|
|
*pDest++ = ch;
|
|
else
|
|
{
|
|
ADD_HEX_TO_STRING (pDest, ch)
|
|
}
|
|
++pStr;
|
|
}
|
|
}
|
|
|
|
*pDest = '\0';
|
|
|
|
FREE_FIXED_MEMORY(pmp->psStr);
|
|
pmp->psStr = pNewStr;
|
|
pmp->dwSize = (DWORD)(pDest-pNewStr);
|
|
pmp->dwAlloc = dwNewAlloc;
|
|
}
|
|
}
|
|
}
|
|
|
|
done:
|
|
if (pmp->psStr)
|
|
pmp->dwAlloc = (pmp->dwAlloc > MP_MAX_STACK_USE) ? pmp->dwAlloc : MP_MAX_STACK_USE+1;// to force FREE in ~MEMORYPACKET
|
|
|
|
return dwError;
|
|
}
|
|
|
|
|