You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
4688 lines
133 KiB
4688 lines
133 KiB
/*++
|
|
|
|
Copyright (c) 1998-2002 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
C14n.c
|
|
|
|
Abstract:
|
|
|
|
URL canonicalization (c14n) routines
|
|
|
|
Author:
|
|
|
|
George V. Reilly (GeorgeRe) 22-Mar-2002
|
|
|
|
Revision History:
|
|
|
|
--*/
|
|
|
|
#include <precomp.h>
|
|
|
|
#include "c14np.h"
|
|
|
|
#if defined(ALLOC_PRAGMA) && defined(KERNEL_PRIV)
|
|
|
|
#pragma alloc_text( PAGE, HttpInitializeDefaultUrlC14nConfig)
|
|
#pragma alloc_text( PAGE, HttpInitializeDefaultUrlC14nConfigEncoding)
|
|
#pragma alloc_text( PAGE, HttpUnescapePercentHexEncoding)
|
|
#pragma alloc_text( PAGE, HttppPopCharHostNameUtf8)
|
|
#pragma alloc_text( PAGE, HttppPopCharHostNameDbcs)
|
|
#pragma alloc_text( PAGE, HttppPopCharHostNameAnsi)
|
|
#pragma alloc_text( PAGE, HttpCopyHost)
|
|
#pragma alloc_text( PAGE, HttppCopyHostByType)
|
|
#pragma alloc_text( PAGE, HttpValidateHostname)
|
|
#pragma alloc_text( PAGE, HttppPopCharAbsPathUtf8)
|
|
#pragma alloc_text( PAGE, HttppPopCharAbsPathDbcs)
|
|
#pragma alloc_text( PAGE, HttppPopCharAbsPathAnsi)
|
|
#pragma alloc_text( PAGE, HttppPopCharQueryString)
|
|
#pragma alloc_text( PAGE, HttppCopyUrlByType)
|
|
#pragma alloc_text( PAGE, HttpCopyUrl)
|
|
#pragma alloc_text( PAGE, HttpCleanAndCopyUrl)
|
|
#pragma alloc_text( PAGE, HttppCleanAndCopyUrlByType)
|
|
#pragma alloc_text( PAGE, HttpFindUrlToken)
|
|
#pragma alloc_text( PAGE, HttppParseIPv6Address)
|
|
#pragma alloc_text( PAGE, HttppPrintIpAddressW)
|
|
#pragma alloc_text( PAGE, HttpParseUrl)
|
|
#pragma alloc_text( PAGE, HttpNormalizeParsedUrl)
|
|
|
|
|
|
#endif // ALLOC_PRAGMA && KERNEL_PRIV
|
|
|
|
#if 0 // Non-Pageable Functions
|
|
NOT PAGEABLE --
|
|
#endif // Non-Pageable Functions
|
|
|
|
|
|
|
|
VOID
|
|
HttpInitializeDefaultUrlC14nConfig(
|
|
PURL_C14N_CONFIG pCfg
|
|
)
|
|
{
|
|
PAGED_CODE();
|
|
|
|
pCfg->HostnameDecodeOrder = UrlDecode_Utf8_Else_Dbcs_Else_Ansi;
|
|
pCfg->AbsPathDecodeOrder = UrlDecode_Utf8;
|
|
pCfg->EnableNonUtf8 = FALSE;
|
|
pCfg->FavorUtf8 = FALSE;
|
|
pCfg->EnableDbcs = FALSE;
|
|
pCfg->PercentUAllowed = DEFAULT_C14N_PERCENT_U_ALLOWED;
|
|
pCfg->AllowRestrictedChars = DEFAULT_C14N_ALLOW_RESTRICTED_CHARS;
|
|
pCfg->CodePage = 0;
|
|
pCfg->UrlMaxLength = DEFAULT_C14N_URL_MAX_LENGTH;
|
|
pCfg->UrlSegmentMaxLength = DEFAULT_C14N_URL_SEGMENT_MAX_LENGTH;
|
|
pCfg->UrlSegmentMaxCount = DEFAULT_C14N_URL_SEGMENT_MAX_COUNT;
|
|
pCfg->MaxLabelLength = DEFAULT_C14N_MAX_LABEL_LENGTH;
|
|
pCfg->MaxHostnameLength = DEFAULT_C14N_MAX_HOSTNAME_LENGTH;
|
|
|
|
} // HttpInitializeDefaultUrlC14nConfig
|
|
|
|
|
|
|
|
VOID
|
|
HttpInitializeDefaultUrlC14nConfigEncoding(
|
|
PURL_C14N_CONFIG pCfg,
|
|
BOOLEAN EnableNonUtf8,
|
|
BOOLEAN FavorUtf8,
|
|
BOOLEAN EnableDbcs
|
|
)
|
|
{
|
|
PAGED_CODE();
|
|
|
|
HttpInitializeDefaultUrlC14nConfig(pCfg);
|
|
|
|
pCfg->EnableNonUtf8 = EnableNonUtf8;
|
|
pCfg->FavorUtf8 = FavorUtf8;
|
|
pCfg->EnableDbcs = EnableDbcs;
|
|
|
|
if (EnableNonUtf8)
|
|
{
|
|
if (FavorUtf8)
|
|
{
|
|
pCfg->AbsPathDecodeOrder = (EnableDbcs
|
|
? UrlDecode_Utf8_Else_Dbcs
|
|
: UrlDecode_Utf8_Else_Ansi);
|
|
}
|
|
else
|
|
{
|
|
pCfg->AbsPathDecodeOrder = (EnableDbcs
|
|
? UrlDecode_Dbcs_Else_Utf8
|
|
: UrlDecode_Ansi_Else_Utf8);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
pCfg->AbsPathDecodeOrder = UrlDecode_Utf8;
|
|
}
|
|
|
|
} // HttpInitializeDefaultUrlC14nConfigEncoding
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
|
|
Convert '%NN' or '%uNNNN' to a ULONG.
|
|
|
|
Arguments:
|
|
|
|
pSourceChar - Input buffer
|
|
SourceLength - Length of pSourceChar, in bytes
|
|
PercentUAllowed - Accept '%uNNNN' notation?
|
|
pOutChar - decoded character
|
|
pBytesToSkip - number of bytes consumed from pSourceChar;
|
|
will be 3 for %NN and 6 for %uNNNN.
|
|
|
|
Return Value:
|
|
|
|
STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
|
|
|
|
--***************************************************************************/
|
|
|
|
NTSTATUS
|
|
HttpUnescapePercentHexEncoding(
|
|
IN PCUCHAR pSourceChar,
|
|
IN ULONG SourceLength,
|
|
IN BOOLEAN PercentUAllowed,
|
|
OUT PULONG pOutChar,
|
|
OUT PULONG pBytesToSkip
|
|
)
|
|
{
|
|
ULONG Result, i, NumDigits;
|
|
PCUCHAR pHexDigits;
|
|
|
|
PAGED_CODE();
|
|
|
|
if (SourceLength < STRLEN_LIT("%NN"))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttpUnescapePercentHexEncoding(%p): "
|
|
"Length too short, %lu.\n",
|
|
pSourceChar, SourceLength
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
else if (pSourceChar[0] != PERCENT)
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttpUnescapePercentHexEncoding(%p): "
|
|
"Starts with 0x%02lX, not '%%'.\n",
|
|
pSourceChar, (ULONG) pSourceChar[0]
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
|
|
if (pSourceChar[1] != 'u' && pSourceChar[1] != 'U')
|
|
{
|
|
// RFC 2396 says that an "escaped octet is encoded as a character
|
|
// triplet, consisting of the percent character '%' followed by
|
|
// the two hexadecimal digits representing the octet code."
|
|
|
|
pHexDigits = pSourceChar + STRLEN_LIT("%");
|
|
NumDigits = 2;
|
|
*pBytesToSkip = STRLEN_LIT("%NN");
|
|
}
|
|
else
|
|
{
|
|
// This is the %uNNNN notation generated by JavaScript's escape() fn
|
|
|
|
if (! PercentUAllowed)
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttpUnescapePercentHexEncoding(%p): "
|
|
"%%uNNNN forbidden.\n",
|
|
pSourceChar, SourceLength
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
else if (SourceLength < STRLEN_LIT("%uNNNN"))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttpUnescapePercentHexEncoding(%p): "
|
|
"Length %lu too short for %%uNNNN.\n",
|
|
pSourceChar, SourceLength
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
|
|
pHexDigits = pSourceChar + STRLEN_LIT("%u");
|
|
NumDigits = 4;
|
|
*pBytesToSkip = STRLEN_LIT("%uNNNN");
|
|
}
|
|
|
|
ASSERT(*pBytesToSkip <= SourceLength);
|
|
|
|
Result = 0;
|
|
|
|
for (i = 0; i < NumDigits; ++i)
|
|
{
|
|
ULONG Char = pHexDigits[i];
|
|
ULONG Digit;
|
|
|
|
//
|
|
// HexToChar() inlined. Note: in ASCII, '0' < 'A' < 'a' and there are
|
|
// no gaps in ranges '0'..'9', 'A'..'F', and 'a'..'f' (unlike EBCDIC,
|
|
// which has gaps between 'I'/'J', 'R'/'S', 'i'/'j', and 'r'/'s').
|
|
//
|
|
|
|
C_ASSERT('0' < 'A' && 'A' < 'a');
|
|
C_ASSERT('9' - '0' == 10 - 1);
|
|
C_ASSERT('F' - 'A' == 6 - 1);
|
|
C_ASSERT('f' - 'a' == 6 - 1);
|
|
|
|
if (! IS_HTTP_HEX(Char))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttpUnescapePercentHexEncoding(%p): "
|
|
"Invalid hex character[%lu], 0x%02lX.\n",
|
|
pSourceChar, i, Char
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
else if ('a' <= Char)
|
|
{
|
|
ASSERT('a' <= Char && Char <= 'f');
|
|
Digit = Char - 'a' + 0xA;
|
|
}
|
|
else if ('A' <= Char)
|
|
{
|
|
ASSERT('A' <= Char && Char <= 'F');
|
|
Digit = Char - 'A' + 0xA;
|
|
}
|
|
else
|
|
{
|
|
ASSERT('0' <= Char && Char <= '9');
|
|
Digit = Char - '0';
|
|
}
|
|
|
|
ASSERT(Digit < 0x10);
|
|
|
|
Result = (Result << 4) | Digit;
|
|
}
|
|
|
|
*pOutChar = Result;
|
|
|
|
return STATUS_SUCCESS;
|
|
|
|
} // HttpUnescapePercentHexEncoding
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
|
|
Consume 1-4 bytes from pSourceChar, treating it as raw UTF-8.
|
|
This routine is only suitable for the hostname part of an HTTP URL,
|
|
|
|
Arguments:
|
|
|
|
pSourceChar - Input buffer
|
|
SourceLength - Length of pSourceChar, in bytes
|
|
pUnicodeChar - decoded character
|
|
pBytesToSkip - number of characters consumed from pSourceChar
|
|
|
|
Return Value:
|
|
|
|
STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
|
|
|
|
--***************************************************************************/
|
|
|
|
NTSTATUS
|
|
HttppPopCharHostNameUtf8(
|
|
IN PCUCHAR pSourceChar,
|
|
IN ULONG SourceLength,
|
|
OUT PULONG pUnicodeChar,
|
|
OUT PULONG pBytesToSkip
|
|
)
|
|
{
|
|
NTSTATUS Status;
|
|
|
|
PAGED_CODE();
|
|
|
|
ASSERT(SourceLength > 0);
|
|
|
|
Status = HttpUtf8RawBytesToUnicode(
|
|
pSourceChar,
|
|
SourceLength,
|
|
pUnicodeChar,
|
|
pBytesToSkip
|
|
);
|
|
|
|
return Status;
|
|
|
|
} // HttppPopCharHostNameUtf8
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
|
|
Consume 1-2 bytes from pSourceChar and converts it from raw DBCS to Unicode.
|
|
This routine is only suitable for the hostname part of an HTTP URL.
|
|
|
|
Arguments:
|
|
|
|
pSourceChar - Input buffer
|
|
SourceLength - Length of pSourceChar, in bytes
|
|
pUnicodeChar - decoded character
|
|
pBytesToSkip - number of characters consumed from pSourceChar
|
|
|
|
Return Value:
|
|
|
|
STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
|
|
|
|
--***************************************************************************/
|
|
|
|
NTSTATUS
|
|
HttppPopCharHostNameDbcs(
|
|
IN PCUCHAR pSourceChar,
|
|
IN ULONG SourceLength,
|
|
OUT PULONG pUnicodeChar,
|
|
OUT PULONG pBytesToSkip
|
|
)
|
|
{
|
|
NTSTATUS Status;
|
|
ULONG AnsiCharSize;
|
|
WCHAR WideChar;
|
|
|
|
PAGED_CODE();
|
|
|
|
ASSERT(SourceLength > 0);
|
|
|
|
if (! IS_DBCS_LEAD_BYTE(pSourceChar[0]))
|
|
{
|
|
AnsiCharSize = 1;
|
|
}
|
|
else
|
|
{
|
|
if (SourceLength < 2)
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharHostNameDbcs(%p): "
|
|
"ERROR: DBCS lead byte, 0x%02lX, at end of string\n",
|
|
pSourceChar, *pSourceChar
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
|
|
AnsiCharSize = 2;
|
|
}
|
|
|
|
Status = RtlMultiByteToUnicodeN(
|
|
&WideChar,
|
|
sizeof(WCHAR),
|
|
NULL,
|
|
(PCSTR) pSourceChar,
|
|
AnsiCharSize
|
|
);
|
|
|
|
if (!NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharHostNameDbcs(%p): "
|
|
"MultiByteToUnicode(%lu) failed, %s.\n",
|
|
pSourceChar, AnsiCharSize, HttpStatusToString(Status)
|
|
));
|
|
|
|
return Status;
|
|
}
|
|
|
|
*pUnicodeChar = WideChar;
|
|
*pBytesToSkip = AnsiCharSize;
|
|
|
|
return STATUS_SUCCESS;
|
|
|
|
} // HttppPopCharHostNameDbcs
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
|
|
Consume 1 bytes from pSourceChar and converts it from raw ANSI to Unicode.
|
|
This routine is only suitable for the hostname part of an HTTP URL.
|
|
|
|
Arguments:
|
|
|
|
pSourceChar - Input buffer
|
|
SourceLength - Length of pSourceChar, in bytes
|
|
pUnicodeChar - decoded character
|
|
pBytesToSkip - number of characters consumed from pSourceChar
|
|
|
|
Return Value:
|
|
|
|
STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
|
|
|
|
--***************************************************************************/
|
|
|
|
NTSTATUS
|
|
HttppPopCharHostNameAnsi(
|
|
IN PCUCHAR pSourceChar,
|
|
IN ULONG SourceLength,
|
|
OUT PULONG pUnicodeChar,
|
|
OUT PULONG pBytesToSkip
|
|
)
|
|
{
|
|
NTSTATUS Status;
|
|
|
|
#if !DBG
|
|
UNREFERENCED_PARAMETER(SourceLength);
|
|
#endif // !DBG
|
|
|
|
PAGED_CODE();
|
|
|
|
ASSERT(SourceLength > 0);
|
|
|
|
*pUnicodeChar = AnsiToUnicodeMap[pSourceChar[0]];
|
|
*pBytesToSkip = 1;
|
|
|
|
Status = (0 != *pUnicodeChar)
|
|
? STATUS_SUCCESS
|
|
: STATUS_OBJECT_PATH_SYNTAX_BAD;
|
|
|
|
if (!NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharHostNameAnsi(%p): "
|
|
"No mapping for %lu.\n",
|
|
pSourceChar, *pSourceChar
|
|
));
|
|
}
|
|
|
|
return Status;
|
|
|
|
} // HttppPopCharHostNameAnsi
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
|
|
Common tail function called at the end of the HttppPopCharAbsPath*()
|
|
functions, to minimize code replication.
|
|
|
|
Arguments:
|
|
|
|
pSourceChar - Input buffer
|
|
SourceLength - Length of pSourceChar, in bytes
|
|
UnicodeChar - decoded character
|
|
BytesToSkip - number of characters consumed from pSourceChar
|
|
pUnicodeChar - where to put UnicodeChar result
|
|
pBytesToSkip - where to put BytesToSkip result
|
|
|
|
Return Value:
|
|
|
|
STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
|
|
|
|
--***************************************************************************/
|
|
|
|
__inline
|
|
NTSTATUS
|
|
HttppPopCharAbsPathCommonTail(
|
|
IN PCUCHAR pSourceChar,
|
|
IN ULONG SourceLength,
|
|
IN ULONG UnicodeChar,
|
|
IN ULONG BytesToSkip,
|
|
IN BOOLEAN AllowRestrictedChars,
|
|
OUT PULONG pUnicodeChar,
|
|
OUT PULONG pBytesToSkip
|
|
)
|
|
{
|
|
#if !DBG
|
|
UNREFERENCED_PARAMETER(pSourceChar);
|
|
UNREFERENCED_PARAMETER(SourceLength);
|
|
#endif // !DBG
|
|
|
|
//
|
|
// Special handling for characters in the 8-bit range.
|
|
// May want to look at BytesToSkip to distinguish between
|
|
// raw and hex-escaped/UTF-8-encoded data.
|
|
//
|
|
// In particular, should we allow %2F or %u002F as alternate
|
|
// represenations of '/' in a URL? Why would anyone have a legitimate
|
|
// need to escape a slash character?
|
|
//
|
|
|
|
if (UnicodeChar < 0x100)
|
|
{
|
|
// Transform backslashes to forward slashes
|
|
|
|
if (BACK_SLASH == UnicodeChar)
|
|
{
|
|
UnicodeChar = FORWARD_SLASH;
|
|
}
|
|
else if (!AllowRestrictedChars && IS_URL_INVALID(UnicodeChar))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathCommonTail(%p): "
|
|
"Invalid character, U+%04X.\n",
|
|
pSourceChar, UnicodeChar
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
|
|
// CODEWORK: should we allow hex-escaped "restricted" or "unwise"
|
|
// characters at all?
|
|
}
|
|
|
|
ASSERT(BytesToSkip <= SourceLength);
|
|
|
|
*pBytesToSkip = BytesToSkip;
|
|
*pUnicodeChar = UnicodeChar;
|
|
|
|
return STATUS_SUCCESS;
|
|
|
|
} // HttppPopCharAbsPathCommonTail
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
|
|
Consume 1-12 bytes from pSourceChar. Handle hex-escaped UTF-8 encoding.
|
|
This routine is only suitable for the /abspath part of an HTTP URL.
|
|
|
|
Arguments:
|
|
|
|
pSourceChar - Input buffer
|
|
SourceLength - Length of pSourceChar, in bytes
|
|
pUnicodeChar - decoded character
|
|
pBytesToSkip - number of characters consumed from pSourceChar
|
|
|
|
Return Value:
|
|
|
|
STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
|
|
|
|
--***************************************************************************/
|
|
|
|
NTSTATUS
|
|
HttppPopCharAbsPathUtf8(
|
|
IN PCUCHAR pSourceChar,
|
|
IN ULONG SourceLength,
|
|
IN BOOLEAN PercentUAllowed,
|
|
IN BOOLEAN AllowRestrictedChars,
|
|
OUT PULONG pUnicodeChar,
|
|
OUT PULONG pBytesToSkip
|
|
)
|
|
{
|
|
NTSTATUS Status;
|
|
ULONG UnicodeChar;
|
|
ULONG BytesToSkip;
|
|
ULONG Temp;
|
|
ULONG OctetCount;
|
|
UCHAR Octets[4];
|
|
UCHAR LeadByte;
|
|
|
|
//
|
|
// Sanity check.
|
|
//
|
|
|
|
PAGED_CODE();
|
|
|
|
ASSERT(SourceLength > 0);
|
|
|
|
//
|
|
// validate it as a valid URL character
|
|
//
|
|
|
|
if (! IS_URL_TOKEN(pSourceChar[0]))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathUtf8(%p): "
|
|
"first char, 0x%02lX, isn't URL token\n",
|
|
pSourceChar, (ULONG) pSourceChar[0]
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
|
|
//
|
|
// need to unescape hex encoding, '%NN' or '%uNNNN'?
|
|
//
|
|
|
|
if (PERCENT != pSourceChar[0])
|
|
{
|
|
UnicodeChar = pSourceChar[0];
|
|
BytesToSkip = 1;
|
|
|
|
//
|
|
// All octets with bit7 set MUST be hex-escaped.
|
|
// Do NOT accept literals with hi-bit set.
|
|
//
|
|
|
|
if (UnicodeChar > ASCII_MAX)
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathUtf8(%p): "
|
|
"Invalid hi-bit literal, 0x%02lX.\n",
|
|
pSourceChar, UnicodeChar
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
|
|
Status = STATUS_SUCCESS;
|
|
goto unslash;
|
|
}
|
|
|
|
Status = HttpUnescapePercentHexEncoding(
|
|
pSourceChar,
|
|
SourceLength,
|
|
PercentUAllowed,
|
|
&UnicodeChar,
|
|
&BytesToSkip
|
|
);
|
|
|
|
if (! NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathUtf8(%p): "
|
|
"Invalid hex encoding.\n",
|
|
pSourceChar
|
|
));
|
|
|
|
return Status;
|
|
}
|
|
|
|
//
|
|
// If we consumed '%uNNNN', don't attempt any UTF-8 decoding
|
|
//
|
|
|
|
if (STRLEN_LIT("%uNNNN") == BytesToSkip)
|
|
goto unslash;
|
|
|
|
ASSERT(STRLEN_LIT("%NN") == BytesToSkip);
|
|
ASSERT(UnicodeChar <= 0xFF);
|
|
|
|
Octets[0] = LeadByte = (UCHAR) UnicodeChar;
|
|
|
|
OctetCount = UTF8_OCTET_COUNT(LeadByte);
|
|
|
|
if (0 == OctetCount)
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathUtf8(%p): "
|
|
"Invalid lead byte, 0x%02lX.\n",
|
|
pSourceChar, UnicodeChar
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
|
|
ASSERT(OctetCount <= sizeof(Octets) / sizeof(Octets[0]));
|
|
|
|
BytesToSkip = OctetCount * STRLEN_LIT("%NN");
|
|
|
|
if (BytesToSkip > SourceLength)
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathUtf8(%p): "
|
|
"%lu octets is not enough for %lu-byte UTF-8 encoding.\n",
|
|
pSourceChar, OctetCount, SourceLength
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
|
|
if (OctetCount == 1)
|
|
{
|
|
#if DBG
|
|
// Singleton: no trail bytes
|
|
|
|
Status = HttpUtf8RawBytesToUnicode(
|
|
Octets,
|
|
OctetCount,
|
|
&UnicodeChar,
|
|
&Temp
|
|
);
|
|
|
|
ASSERT(STATUS_SUCCESS == Status);
|
|
ASSERT(UnicodeChar == LeadByte);
|
|
ASSERT(1 == Temp);
|
|
#endif // DBG
|
|
}
|
|
else
|
|
{
|
|
ULONG i;
|
|
|
|
//
|
|
// Decode the hex-escaped trail bytes
|
|
//
|
|
|
|
for (i = 1; i < OctetCount; ++i)
|
|
{
|
|
ULONG TrailChar;
|
|
UCHAR TrailByte;
|
|
|
|
Status = HttpUnescapePercentHexEncoding(
|
|
pSourceChar + i * STRLEN_LIT("%NN"),
|
|
STRLEN_LIT("%NN"),
|
|
FALSE, // do not allow %uNNNN for trail bytes
|
|
&TrailChar,
|
|
&Temp
|
|
);
|
|
|
|
if (! NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathUtf8(%p): "
|
|
"Invalid hex-encoded trail byte[%lu].\n",
|
|
pSourceChar, i
|
|
));
|
|
|
|
return Status;
|
|
}
|
|
|
|
ASSERT(STRLEN_LIT("%NN") == Temp);
|
|
ASSERT(TrailChar <= 0xFF);
|
|
|
|
Octets[i] = TrailByte = (UCHAR) TrailChar;
|
|
|
|
if (! IS_UTF8_TRAILBYTE(TrailByte))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathUtf8(%p): "
|
|
"Invalid trail byte[%lu], 0x%02lX.\n",
|
|
pSourceChar, i, TrailChar
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
}
|
|
|
|
//
|
|
// Decode the raw UTF-8 bytes
|
|
//
|
|
|
|
Status = HttpUtf8RawBytesToUnicode(
|
|
Octets,
|
|
OctetCount,
|
|
&UnicodeChar,
|
|
&Temp
|
|
);
|
|
|
|
if (! NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathUtf8(%p): "
|
|
"Invalid UTF-8 sequence.\n",
|
|
pSourceChar
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
}
|
|
|
|
unslash:
|
|
|
|
ASSERT(NT_SUCCESS(Status));
|
|
|
|
return HttppPopCharAbsPathCommonTail(
|
|
pSourceChar,
|
|
SourceLength,
|
|
UnicodeChar,
|
|
BytesToSkip,
|
|
AllowRestrictedChars,
|
|
pUnicodeChar,
|
|
pBytesToSkip
|
|
);
|
|
|
|
} // HttppPopCharAbsPathUtf8
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
|
|
Consume 1-6 bytes from pSourceChar. Handle hex-escaped DBCS encoding.
|
|
This routine is only suitable for the /abspath part of an HTTP URL.
|
|
|
|
Arguments:
|
|
|
|
pSourceChar - Input buffer
|
|
SourceLength - Length of pSourceChar, in bytes
|
|
pUnicodeChar - decoded character
|
|
pBytesToSkip - number of characters consumed from pSourceChar
|
|
|
|
Return Value:
|
|
|
|
STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
|
|
|
|
--***************************************************************************/
|
|
|
|
NTSTATUS
|
|
HttppPopCharAbsPathDbcs(
|
|
IN PCUCHAR pSourceChar,
|
|
IN ULONG SourceLength,
|
|
IN BOOLEAN PercentUAllowed,
|
|
IN BOOLEAN AllowRestrictedChars,
|
|
OUT PULONG pUnicodeChar,
|
|
OUT PULONG pBytesToSkip
|
|
)
|
|
{
|
|
NTSTATUS Status;
|
|
ULONG UnicodeChar;
|
|
WCHAR WideChar;
|
|
ULONG BytesToSkip;
|
|
UCHAR AnsiChar[2];
|
|
ULONG AnsiCharSize;
|
|
UCHAR LeadByte;
|
|
UCHAR SecondByte = 0;
|
|
|
|
//
|
|
// Sanity check.
|
|
//
|
|
|
|
PAGED_CODE();
|
|
|
|
ASSERT(SourceLength > 0);
|
|
|
|
if (! IS_URL_TOKEN(pSourceChar[0]))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathDbcs(%p): "
|
|
"first char, 0x%02lX, isn't URL token\n",
|
|
pSourceChar, (ULONG) pSourceChar[0]
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
|
|
if (PERCENT != pSourceChar[0])
|
|
{
|
|
// Note: unlike UTF-8, we allow literal bytes whose top bit is set
|
|
|
|
UnicodeChar = pSourceChar[0];
|
|
BytesToSkip = 1;
|
|
}
|
|
else
|
|
{
|
|
// need to unescape hex encoding, '%NN' or '%uNNNN'
|
|
|
|
Status = HttpUnescapePercentHexEncoding(
|
|
pSourceChar,
|
|
SourceLength,
|
|
PercentUAllowed,
|
|
&UnicodeChar,
|
|
&BytesToSkip
|
|
);
|
|
|
|
if (! NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathDbcs(%p): "
|
|
"Invalid hex encoding.\n",
|
|
pSourceChar
|
|
));
|
|
|
|
return Status;
|
|
}
|
|
|
|
//
|
|
// If we consumed '%uNNNN', don't attempt DBCS-to-Unicode conversion
|
|
//
|
|
|
|
if (STRLEN_LIT("%uNNNN") == BytesToSkip)
|
|
goto unslash;
|
|
|
|
ASSERT(STRLEN_LIT("%NN") == BytesToSkip);
|
|
ASSERT(UnicodeChar <= 0xFF);
|
|
}
|
|
|
|
LeadByte = (UCHAR) UnicodeChar;
|
|
AnsiChar[0] = LeadByte;
|
|
|
|
if (! IS_DBCS_LEAD_BYTE(LeadByte))
|
|
{
|
|
AnsiCharSize = 1;
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// This is a double-byte character.
|
|
//
|
|
|
|
ASSERT(BytesToSkip <= SourceLength);
|
|
|
|
if (BytesToSkip == SourceLength)
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathDbcs(%p): "
|
|
"ERROR: DBCS lead byte, 0x%02lX, at end of string\n",
|
|
pSourceChar, UnicodeChar
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
|
|
AnsiCharSize = 2;
|
|
SecondByte = pSourceChar[BytesToSkip];
|
|
|
|
if (PERCENT != SecondByte)
|
|
{
|
|
BytesToSkip += 1;
|
|
}
|
|
else
|
|
{
|
|
ULONG TrailChar;
|
|
ULONG Temp;
|
|
|
|
if (BytesToSkip + STRLEN_LIT("%NN") > SourceLength)
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathDbcs(%p): "
|
|
"ERROR: no space for DBCS hex-encoded suffix\n",
|
|
pSourceChar
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
|
|
Status = HttpUnescapePercentHexEncoding(
|
|
pSourceChar + BytesToSkip,
|
|
SourceLength - BytesToSkip,
|
|
FALSE, // no %uNNNN allowed here
|
|
&TrailChar,
|
|
&Temp
|
|
);
|
|
|
|
if (! NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathDbcs(%p): "
|
|
"Invalid hex encoding of trail byte.\n",
|
|
pSourceChar
|
|
));
|
|
|
|
return Status;
|
|
}
|
|
|
|
ASSERT(STRLEN_LIT("%NN") == Temp);
|
|
ASSERT(TrailChar <= 0xFF);
|
|
|
|
SecondByte = (UCHAR) TrailChar;
|
|
BytesToSkip += STRLEN_LIT("%NN");
|
|
}
|
|
|
|
AnsiChar[1] = SecondByte;
|
|
}
|
|
|
|
Status = RtlMultiByteToUnicodeN(
|
|
&WideChar,
|
|
sizeof(WCHAR),
|
|
NULL,
|
|
(PCHAR) &AnsiChar[0],
|
|
AnsiCharSize
|
|
);
|
|
|
|
if (!NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathDbcs(%p): "
|
|
"MultiByteToUnicode(%lu) failed, %s.\n",
|
|
pSourceChar, AnsiCharSize, HttpStatusToString(Status)
|
|
));
|
|
|
|
return Status;
|
|
}
|
|
|
|
UnicodeChar = WideChar;
|
|
|
|
#if DBG
|
|
//
|
|
// Describe conversion in debug spew.
|
|
//
|
|
|
|
if (1 == AnsiCharSize)
|
|
{
|
|
UlTraceVerbose(PARSER, (
|
|
"http!HttppPopCharAbsPathDbcs(%p): "
|
|
"converted %02X to U+%04lX '%C'\n",
|
|
pSourceChar,
|
|
LeadByte,
|
|
UnicodeChar,
|
|
UnicodeChar
|
|
));
|
|
}
|
|
else
|
|
{
|
|
ASSERT(2 == AnsiCharSize);
|
|
|
|
UlTraceVerbose(PARSER, (
|
|
"http!HttppPopCharAbsPathDbcs(%p): "
|
|
"converted %02X %02X to U+%04lX '%C'\n",
|
|
pSourceChar,
|
|
LeadByte,
|
|
SecondByte,
|
|
UnicodeChar,
|
|
UnicodeChar
|
|
));
|
|
}
|
|
#endif // DBG
|
|
|
|
unslash:
|
|
|
|
ASSERT(NT_SUCCESS(Status));
|
|
|
|
return HttppPopCharAbsPathCommonTail(
|
|
pSourceChar,
|
|
SourceLength,
|
|
UnicodeChar,
|
|
BytesToSkip,
|
|
AllowRestrictedChars,
|
|
pUnicodeChar,
|
|
pBytesToSkip
|
|
);
|
|
|
|
} // HttppPopCharAbsPathDbcs
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
|
|
Consume 1-6 bytes from pSourceChar. Handle hex-escaped ANSI encoding.
|
|
This routine is only suitable for the /abspath part of an HTTP URL.
|
|
|
|
Arguments:
|
|
|
|
pSourceChar - Input buffer
|
|
SourceLength - Length of pSourceChar, in bytes
|
|
pUnicodeChar - decoded character
|
|
pBytesToSkip - number of characters consumed from pSourceChar
|
|
|
|
Return Value:
|
|
|
|
STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
|
|
|
|
--***************************************************************************/
|
|
|
|
NTSTATUS
|
|
HttppPopCharAbsPathAnsi(
|
|
IN PCUCHAR pSourceChar,
|
|
IN ULONG SourceLength,
|
|
IN BOOLEAN PercentUAllowed,
|
|
IN BOOLEAN AllowRestrictedChars,
|
|
OUT PULONG pUnicodeChar,
|
|
OUT PULONG pBytesToSkip
|
|
)
|
|
{
|
|
NTSTATUS Status = STATUS_SUCCESS;
|
|
ULONG UnicodeChar;
|
|
ULONG BytesToSkip;
|
|
|
|
//
|
|
// Sanity check.
|
|
//
|
|
|
|
PAGED_CODE();
|
|
|
|
ASSERT(SourceLength > 0);
|
|
|
|
//
|
|
// DBCS and ANSI decoders must allow any raw byte whose top bit
|
|
// is set (0x80-0xFF)
|
|
//
|
|
|
|
if (! IS_URL_TOKEN(pSourceChar[0]) &&
|
|
!(0x80 & pSourceChar[0]))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathAnsi(%p): "
|
|
"first char, 0x%02lX, isn't URL token\n",
|
|
pSourceChar, (ULONG) pSourceChar[0]
|
|
));
|
|
|
|
RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
|
|
}
|
|
|
|
if (PERCENT != pSourceChar[0])
|
|
{
|
|
// Note: unlike UTF-8, we allow literal bytes whose top bit is set
|
|
|
|
UnicodeChar = AnsiToUnicodeMap[ pSourceChar[0] ];
|
|
BytesToSkip = 1;
|
|
}
|
|
else
|
|
{
|
|
// need to unescape hex encoding, '%NN' or '%uNNNN'
|
|
|
|
Status = HttpUnescapePercentHexEncoding(
|
|
pSourceChar,
|
|
SourceLength,
|
|
PercentUAllowed,
|
|
&UnicodeChar,
|
|
&BytesToSkip
|
|
);
|
|
|
|
if (! NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppPopCharAbsPathAnsi(%p): "
|
|
"Invalid hex encoding.\n",
|
|
pSourceChar
|
|
));
|
|
|
|
return Status;
|
|
}
|
|
|
|
//
|
|
// If we consumed '%uNNNN', don't attempt Ansi-to-Unicode conversion
|
|
//
|
|
|
|
if (STRLEN_LIT("%uNNNN") != BytesToSkip)
|
|
{
|
|
ASSERT(STRLEN_LIT("%NN") == BytesToSkip);
|
|
ASSERT(UnicodeChar <= 0xFF);
|
|
|
|
UnicodeChar = AnsiToUnicodeMap[(UCHAR) UnicodeChar];
|
|
}
|
|
}
|
|
|
|
ASSERT(NT_SUCCESS(Status));
|
|
|
|
return HttppPopCharAbsPathCommonTail(
|
|
pSourceChar,
|
|
SourceLength,
|
|
UnicodeChar,
|
|
BytesToSkip,
|
|
AllowRestrictedChars,
|
|
pUnicodeChar,
|
|
pBytesToSkip
|
|
);
|
|
|
|
} // HttppPopCharAbsPathAnsi
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
|
|
Consume 1 bytes from pSourceChar and returns it unaltered.
|
|
This routine is only suitable for the ?querystring part of an HTTP URL,
|
|
which we do not interpret.
|
|
|
|
CODEWORK: don't 'convert' querystring to Unicode. Send it up verbatim.
|
|
|
|
Arguments:
|
|
|
|
pSourceChar - Input buffer
|
|
SourceLength - Length of pSourceChar, in bytes
|
|
pUnicodeChar - decoded character
|
|
pBytesToSkip - number of characters consumed from pSourceChar
|
|
|
|
Return Value:
|
|
|
|
STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
|
|
|
|
--***************************************************************************/
|
|
|
|
NTSTATUS
|
|
HttppPopCharQueryString(
|
|
IN PCUCHAR pSourceChar,
|
|
IN ULONG SourceLength,
|
|
IN BOOLEAN PercentUAllowed,
|
|
IN BOOLEAN AllowRestrictedChars,
|
|
OUT PULONG pUnicodeChar,
|
|
OUT PULONG pBytesToSkip
|
|
)
|
|
{
|
|
PAGED_CODE();
|
|
|
|
UNREFERENCED_PARAMETER(SourceLength);
|
|
UNREFERENCED_PARAMETER(PercentUAllowed);
|
|
UNREFERENCED_PARAMETER(AllowRestrictedChars);
|
|
|
|
*pUnicodeChar = *pSourceChar;
|
|
*pBytesToSkip = 1;
|
|
|
|
return STATUS_SUCCESS;
|
|
|
|
} // HttppPopCharQueryString
|
|
|
|
|
|
|
|
//
|
|
// a cool local helper macro
|
|
//
|
|
|
|
#define EMIT_CHAR(ch, pDest, BytesCopied, Status, AllowRestrictedChars) \
|
|
do \
|
|
{ \
|
|
WCHAR HighSurrogate, LowSurrogate; \
|
|
\
|
|
if ((ch) > LOW_NONCHAR_BITS) \
|
|
{ \
|
|
Status = HttpUcs4toUtf16((ch), \
|
|
&HighSurrogate, &LowSurrogate); \
|
|
\
|
|
if (! NT_SUCCESS(Status)) \
|
|
goto end; \
|
|
\
|
|
*pDest++ = HighSurrogate; \
|
|
*pDest++ = LowSurrogate; \
|
|
BytesCopied += 2 * sizeof(WCHAR); \
|
|
} \
|
|
else \
|
|
{ \
|
|
ASSERT(ch < HIGH_SURROGATE_START \
|
|
|| LOW_SURROGATE_END < ch); \
|
|
\
|
|
if ( IS_UNICODE_NONCHAR((ch)) ) \
|
|
{ \
|
|
UlTraceError(PARSER, ( \
|
|
"http!HttpUcs4toUtf16(): " \
|
|
"Non-character code point, U+%04lX.\n", \
|
|
(ch) )); \
|
|
\
|
|
Status = STATUS_INVALID_PARAMETER; \
|
|
goto end; \
|
|
} \
|
|
\
|
|
*pDest++ = (WCHAR) (ch); \
|
|
BytesCopied += sizeof(WCHAR); \
|
|
} \
|
|
\
|
|
/* Can probably omit this test */ \
|
|
if (BytesCopied > UNICODE_STRING_MAX_BYTE_LEN) \
|
|
{ \
|
|
Status = STATUS_DATA_OVERRUN; \
|
|
goto end; \
|
|
} \
|
|
} while (0, 0)
|
|
|
|
|
|
#define EMIT_LITERAL_CHAR(ch, pDest, BytesCopied) \
|
|
do \
|
|
{ \
|
|
ASSERT(IS_ASCII(ch)); \
|
|
\
|
|
*pDest++ = (WCHAR) (ch); \
|
|
BytesCopied += sizeof(WCHAR); \
|
|
} while (0, 0)
|
|
|
|
|
|
#define HttppUrlEncodingToString(UrlEncoding) \
|
|
((UrlEncoding == UrlDecode_Ansi) \
|
|
? "Ansi" \
|
|
: (UrlEncoding == UrlDecode_Dbcs) \
|
|
? "Dbcs" \
|
|
: "Utf8")
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
|
|
Copies a hostname, converting it to Unicode
|
|
|
|
Arguments:
|
|
|
|
|
|
Return Value:
|
|
|
|
NTSTATUS - Completion status.
|
|
|
|
--***************************************************************************/
|
|
NTSTATUS
|
|
HttpCopyHost(
|
|
IN PURL_C14N_CONFIG pCfg,
|
|
OUT PWSTR pDestination,
|
|
IN PCUCHAR pSource,
|
|
IN ULONG SourceLength,
|
|
OUT PULONG pBytesCopied,
|
|
OUT PURL_ENCODING_TYPE pHostnameEncodingType
|
|
)
|
|
{
|
|
NTSTATUS Status = STATUS_UNSUCCESSFUL;
|
|
ULONG DecodeOrder = pCfg->HostnameDecodeOrder;
|
|
|
|
PAGED_CODE();
|
|
|
|
ASSERT(NULL != pCfg);
|
|
ASSERT(NULL != pDestination);
|
|
ASSERT(NULL != pSource);
|
|
ASSERT(NULL != pBytesCopied);
|
|
ASSERT(NULL != pHostnameEncodingType);
|
|
|
|
if (0 == DecodeOrder || DecodeOrder != (DecodeOrder & UrlDecode_MaxMask))
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpCopyHost: invalid DecodeOrder, 0x%lX\n",
|
|
DecodeOrder
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
for ( ;
|
|
0 != DecodeOrder && !NT_SUCCESS(Status);
|
|
DecodeOrder >>= UrlDecode_Shift
|
|
)
|
|
{
|
|
ULONG UrlEncoding = (DecodeOrder & UrlDecode_Mask);
|
|
|
|
switch (UrlEncoding)
|
|
{
|
|
default:
|
|
ASSERT(! "Impossible UrlDecodeOrder");
|
|
|
|
case UrlDecode_None:
|
|
break;
|
|
|
|
case UrlDecode_Ansi:
|
|
case UrlDecode_Dbcs:
|
|
case UrlDecode_Utf8:
|
|
|
|
UlTraceVerbose(PARSER,
|
|
("http!HttpCopyHost(%s, Src=%p, %lu)\n",
|
|
HttppUrlEncodingToString(UrlEncoding),
|
|
pSource, SourceLength
|
|
));
|
|
|
|
Status = HttppCopyHostByType(
|
|
(URL_ENCODING_TYPE) UrlEncoding,
|
|
pDestination,
|
|
pSource,
|
|
SourceLength,
|
|
pBytesCopied
|
|
);
|
|
|
|
if (NT_SUCCESS(Status))
|
|
{
|
|
*pHostnameEncodingType = (URL_ENCODING_TYPE) UrlEncoding;
|
|
|
|
UlTraceVerbose(PARSER,
|
|
("http!HttpCopyHost(%s): "
|
|
"(%lu) '%.*s' -> (%lu) '%ls'\n",
|
|
HttppUrlEncodingToString(UrlEncoding),
|
|
SourceLength, SourceLength, pSource,
|
|
*pBytesCopied/sizeof(WCHAR), pDestination
|
|
));
|
|
}
|
|
|
|
break;
|
|
};
|
|
}
|
|
|
|
return Status;
|
|
|
|
} // HttpCopyHost
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
|
|
Copies a hostname, converting it to Unicode
|
|
|
|
CODEWORK: Handle ACE-encoded hostnames
|
|
|
|
Arguments:
|
|
|
|
|
|
Return Value:
|
|
|
|
NTSTATUS - Completion status.
|
|
|
|
--***************************************************************************/
|
|
NTSTATUS
|
|
HttppCopyHostByType(
|
|
IN URL_ENCODING_TYPE UrlEncoding,
|
|
OUT PWSTR pDestination,
|
|
IN PCUCHAR pSource,
|
|
IN ULONG SourceLength,
|
|
OUT PULONG pBytesCopied
|
|
)
|
|
{
|
|
NTSTATUS Status;
|
|
PWSTR pDest;
|
|
PCUCHAR pChar;
|
|
ULONG BytesCopied;
|
|
ULONG UnicodeChar;
|
|
ULONG CharToSkip;
|
|
PFN_POPCHAR_HOSTNAME pfnPopChar;
|
|
|
|
if (UrlEncoding_Ansi == UrlEncoding)
|
|
pfnPopChar = &HttppPopCharHostNameAnsi;
|
|
else if (UrlEncoding_Dbcs == UrlEncoding)
|
|
pfnPopChar = &HttppPopCharHostNameDbcs;
|
|
else if (UrlEncoding_Utf8 == UrlEncoding)
|
|
pfnPopChar = &HttppPopCharHostNameUtf8;
|
|
else
|
|
{
|
|
ASSERT(! "Invalid UrlEncoding");
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
//
|
|
// Sanity check.
|
|
//
|
|
|
|
PAGED_CODE();
|
|
|
|
|
|
pDest = pDestination;
|
|
BytesCopied = 0;
|
|
|
|
pChar = pSource;
|
|
|
|
while ((int)SourceLength > 0)
|
|
{
|
|
UnicodeChar = *pChar;
|
|
|
|
if (IS_ASCII(UnicodeChar))
|
|
{
|
|
CharToSkip = 1;
|
|
}
|
|
else
|
|
{
|
|
Status = (*pfnPopChar)(
|
|
pChar,
|
|
SourceLength,
|
|
&UnicodeChar,
|
|
&CharToSkip
|
|
);
|
|
|
|
if (NT_SUCCESS(Status) == FALSE)
|
|
goto end;
|
|
}
|
|
|
|
ASSERT(CharToSkip <= SourceLength);
|
|
|
|
EMIT_CHAR(
|
|
UnicodeChar,
|
|
pDest,
|
|
BytesCopied,
|
|
Status,
|
|
FALSE
|
|
);
|
|
|
|
pChar += CharToSkip;
|
|
SourceLength -= CharToSkip;
|
|
}
|
|
|
|
//
|
|
// terminate the string, it hasn't been done in the loop
|
|
//
|
|
|
|
ASSERT((pDest-1)[0] != UNICODE_NULL);
|
|
|
|
pDest[0] = UNICODE_NULL;
|
|
*pBytesCopied = BytesCopied;
|
|
|
|
Status = STATUS_SUCCESS;
|
|
|
|
|
|
end:
|
|
return Status;
|
|
|
|
} // HttppCopyHostByType
|
|
|
|
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Validates that a hostname is well-formed
|
|
|
|
CODEWORK: For future IDN (International Domain Names) work,
|
|
we may need to handle raw UTF-8 or ACE hostnames.
|
|
|
|
Note: if the validation algorithm changes here, it may be necessary
|
|
to update HttpParseUrl() too.
|
|
|
|
Arguments:
|
|
|
|
pHostname - the hostname
|
|
HostnameLength - length of hostname, in bytes
|
|
HostnameType - Source of the hostname: Host header, AbsUri, or
|
|
synthesized from the transport's local IP address
|
|
|
|
Return Value:
|
|
|
|
STATUS_SUCCESS if valid
|
|
|
|
--*/
|
|
|
|
NTSTATUS
|
|
HttpValidateHostname(
|
|
IN PURL_C14N_CONFIG pCfg,
|
|
IN PCUCHAR pHostname,
|
|
IN ULONG HostnameLength,
|
|
IN HOSTNAME_TYPE HostnameType,
|
|
OUT PSHORT pAddressType
|
|
)
|
|
{
|
|
PCUCHAR pChar;
|
|
PCUCHAR pLabel;
|
|
PCUCHAR pEnd = pHostname + HostnameLength;
|
|
PCSTR pTerminator;
|
|
NTSTATUS Status;
|
|
USHORT Port;
|
|
struct in_addr IPv4Address;
|
|
struct in6_addr IPv6Address;
|
|
BOOLEAN AlphaLabel;
|
|
|
|
PAGED_CODE();
|
|
|
|
ASSERT(NULL != pCfg);
|
|
ASSERT(NULL != pHostname);
|
|
ASSERT(NULL != pAddressType);
|
|
|
|
if (0 == HostnameLength)
|
|
{
|
|
// RFC 2616, 14.23 "Host" says that the Host header can be empty
|
|
if (Hostname_HostHeader == HostnameType)
|
|
goto end;
|
|
|
|
// It is an error for empty hostnames to appear elsewhere
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: empty hostname\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
// Is this an IPv6 literal address, per RFC 2732?
|
|
|
|
if ('[' == *pHostname)
|
|
{
|
|
// Empty brackets?
|
|
if (HostnameLength < STRLEN_LIT("[0]") || ']' == pHostname[1])
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: IPv6 address too short\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
for (pChar = pHostname + STRLEN_LIT("["); pChar < pEnd; ++pChar)
|
|
{
|
|
if (']' == *pChar)
|
|
break;
|
|
|
|
//
|
|
// Dots are allowed because the last 32 bits may be represented
|
|
// in IPv4 dotted-octet notation. We do not accept Scope IDs
|
|
// (indicated by '%') in hostnames.
|
|
//
|
|
|
|
if (IS_HTTP_HEX(*pChar) || ':' == *pChar || '.' == *pChar)
|
|
continue;
|
|
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: "
|
|
"Invalid char in IPv6 address, 0x%02X '%c', "
|
|
"after '%.*s'\n",
|
|
*pChar,
|
|
IS_HTTP_PRINT(*pChar) ? *pChar : '?',
|
|
DIFF(pChar - pHostname),
|
|
pHostname
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
if (pChar == pEnd)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: No ']' for IPv6 address\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
ASSERT(pChar < pEnd);
|
|
ASSERT(']' == *pChar);
|
|
|
|
// Let the RTL routine do the hard work of parsing IPv6 addrs
|
|
Status = RtlIpv6StringToAddressA(
|
|
(PCSTR) pHostname + STRLEN_LIT("["),
|
|
&pTerminator,
|
|
&IPv6Address
|
|
);
|
|
|
|
if (! NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: "
|
|
"Invalid IPv6 address, %s\n",
|
|
HttpStatusToString(Status)
|
|
));
|
|
|
|
RETURN(Status);
|
|
}
|
|
|
|
if (pTerminator != (PCSTR) pChar)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: "
|
|
"Invalid IPv6 terminator, 0x%02X '%c'\n",
|
|
*pTerminator,
|
|
IS_HTTP_PRINT(*pTerminator) ? *pTerminator : '?'
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
*pAddressType = TDI_ADDRESS_TYPE_IP6;
|
|
|
|
// Skip the terminating ']'
|
|
pChar += STRLEN_LIT("]");
|
|
|
|
// Any chars after the ']'?
|
|
if (pChar == pEnd)
|
|
{
|
|
ASSERT(DIFF(pEnd - pHostname) <= pCfg->MaxHostnameLength);
|
|
goto end;
|
|
}
|
|
|
|
ASSERT(pChar < pEnd);
|
|
|
|
if (':' == *pChar)
|
|
goto port;
|
|
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: "
|
|
"Invalid char after IPv6 ']', 0x%02X '%c'\n",
|
|
*pChar,
|
|
IS_HTTP_PRINT(*pChar) ? *pChar : '?'
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
//
|
|
// It must be a domain name or an IPv4 literal. We'll try to treat
|
|
// it as a domain name first. If it turns out to be all-numeric,
|
|
// we'll try decoding it as an IPv4 literal. We'll see if the name
|
|
// is well-formed, but we will not do a DNS lookup to see if it exists,
|
|
// as that would be much too expensive.
|
|
//
|
|
|
|
AlphaLabel = FALSE;
|
|
pLabel = pHostname;
|
|
|
|
for (pChar = pHostname; pChar < pEnd; ++pChar)
|
|
{
|
|
if (':' == *pChar)
|
|
{
|
|
if (pChar == pHostname)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: empty hostname\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
|
|
// exit the loop
|
|
break;
|
|
}
|
|
|
|
if ('.' == *pChar)
|
|
{
|
|
ULONG LabelLength = DIFF(pChar - pLabel);
|
|
|
|
// There must be at least one char in the label
|
|
if (0 == LabelLength)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: empty label\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
// Label can't have more than 63 chars
|
|
if (LabelLength > pCfg->MaxLabelLength)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: overlong label, %lu\n",
|
|
LabelLength
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
// Reset for the next label
|
|
pLabel = pChar + STRLEN_LIT(".");
|
|
|
|
continue;
|
|
}
|
|
|
|
// CODEWORK: handle DBCS characters
|
|
|
|
if (!IS_URL_ILLEGAL_COMPUTERNAME(*pChar))
|
|
{
|
|
if (!IS_HTTP_DIGIT(*pChar))
|
|
AlphaLabel = TRUE;
|
|
|
|
if (pChar > pLabel)
|
|
continue;
|
|
|
|
// The first char of a label cannot be a hyphen. (Underscore?)
|
|
if ('-' == *pChar)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: "
|
|
"'-' at beginning of label\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: "
|
|
"Invalid char in hostname, 0x%02X '%c', "
|
|
"after '%.*s'\n",
|
|
*pChar,
|
|
IS_HTTP_PRINT(*pChar) ? *pChar : '?',
|
|
DIFF(pChar - pHostname),
|
|
pHostname
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
|
|
} // loop through hostname
|
|
|
|
|
|
ASSERT(pChar == pEnd || ':' == *pChar);
|
|
|
|
if (AlphaLabel)
|
|
{
|
|
*pAddressType = 0;
|
|
}
|
|
else
|
|
{
|
|
// Let's see if it's a valid IPv4 address
|
|
Status = RtlIpv4StringToAddressA(
|
|
(PCSTR) pHostname,
|
|
TRUE, // strict => 4 dotted decimal octets
|
|
&pTerminator,
|
|
&IPv4Address
|
|
);
|
|
|
|
if (!NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: "
|
|
"Invalid IPv4 address, %s\n",
|
|
HttpStatusToString(Status)
|
|
));
|
|
|
|
RETURN(Status);
|
|
}
|
|
|
|
if (pTerminator != (PCSTR) pChar)
|
|
{
|
|
ASSERT(pTerminator < (PCSTR) pChar);
|
|
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: "
|
|
"Invalid IPv4 address after %lu chars, "
|
|
"0x%02X, '%c'\n",
|
|
DIFF(pTerminator - (PCSTR) pHostname),
|
|
*pTerminator,
|
|
IS_HTTP_PRINT(*pTerminator) ? *pTerminator : '?'
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
*pAddressType = TDI_ADDRESS_TYPE_IP;
|
|
}
|
|
|
|
port:
|
|
|
|
//
|
|
// Parse the port number
|
|
//
|
|
|
|
// Check for overlong hostnames
|
|
if (DIFF(pChar - pHostname) > pCfg->MaxHostnameLength)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: overlong hostname, %lu\n",
|
|
DIFF(pChar - pHostname)
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
if (pChar == pEnd)
|
|
goto end;
|
|
|
|
ASSERT(pHostname < pChar && pChar < pEnd);
|
|
ASSERT(':' == *pChar);
|
|
|
|
pChar += STRLEN_LIT(":");
|
|
|
|
ASSERT(pChar <= pEnd);
|
|
|
|
// RFC 2616, section 3.2.2 "http URL", says:
|
|
// "If the port is empty or not given, port 80 is assumed".
|
|
if (pChar == pEnd)
|
|
{
|
|
Port = 80;
|
|
goto end;
|
|
}
|
|
|
|
Status = HttpAnsiStringToUShort(
|
|
pChar,
|
|
pEnd - pChar, // <port> must occupy all remaining chars
|
|
FALSE, // no leading zeros permitted
|
|
10,
|
|
(PUCHAR*) &pTerminator,
|
|
&Port
|
|
);
|
|
|
|
if (!NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: "
|
|
"Invalid port number, %s\n",
|
|
HttpStatusToString(Status)
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
ASSERT(pTerminator == (PCSTR) pEnd);
|
|
|
|
if (0 == Port)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpValidateHostname: Port must not be zero.\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
end:
|
|
RETURN(STATUS_SUCCESS);
|
|
|
|
} // HttpValidateHostname
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
|
|
Convert to unicode
|
|
|
|
Arguments:
|
|
|
|
|
|
Return Value:
|
|
|
|
NTSTATUS - Completion status.
|
|
|
|
|
|
--***************************************************************************/
|
|
NTSTATUS
|
|
HttpCopyUrl(
|
|
IN PURL_C14N_CONFIG pCfg,
|
|
OUT PWSTR pDestination,
|
|
IN PCUCHAR pSource,
|
|
IN ULONG SourceLength,
|
|
OUT PULONG pBytesCopied,
|
|
OUT PURL_ENCODING_TYPE pUrlEncodingType
|
|
)
|
|
{
|
|
NTSTATUS Status = STATUS_UNSUCCESSFUL;
|
|
ULONG DecodeOrder = pCfg->AbsPathDecodeOrder;
|
|
|
|
PAGED_CODE();
|
|
|
|
ASSERT(NULL != pDestination);
|
|
ASSERT(NULL != pSource);
|
|
ASSERT(NULL != pBytesCopied);
|
|
ASSERT(NULL != pUrlEncodingType);
|
|
|
|
if (0 == DecodeOrder || DecodeOrder != (DecodeOrder & UrlDecode_MaxMask))
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpCopyUrl: invalid DecodeOrder, 0x%lX\n",
|
|
DecodeOrder
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
for ( ;
|
|
0 != DecodeOrder && !NT_SUCCESS(Status);
|
|
DecodeOrder >>= UrlDecode_Shift
|
|
)
|
|
{
|
|
ULONG UrlEncoding = (DecodeOrder & UrlDecode_Mask);
|
|
|
|
switch (UrlEncoding)
|
|
{
|
|
default:
|
|
ASSERT(! "Impossible UrlDecodeOrder");
|
|
|
|
case UrlDecode_None:
|
|
break;
|
|
|
|
case UrlDecode_Ansi:
|
|
case UrlDecode_Dbcs:
|
|
case UrlDecode_Utf8:
|
|
|
|
UlTraceVerbose(PARSER,
|
|
("http!HttpCopyUrl(%s, Src=%p, %lu)\n",
|
|
HttppUrlEncodingToString(UrlEncoding),
|
|
pSource, SourceLength
|
|
));
|
|
|
|
Status = HttppCopyUrlByType(
|
|
pCfg,
|
|
(URL_ENCODING_TYPE) UrlEncoding,
|
|
pDestination,
|
|
pSource,
|
|
SourceLength,
|
|
pBytesCopied
|
|
);
|
|
|
|
if (NT_SUCCESS(Status))
|
|
{
|
|
*pUrlEncodingType = (URL_ENCODING_TYPE) UrlEncoding;
|
|
|
|
UlTraceVerbose(PARSER,
|
|
("http!HttpCopyUrl(%s): "
|
|
"(%lu) '%.*s' -> (%lu) '%ls'\n",
|
|
HttppUrlEncodingToString(UrlEncoding),
|
|
SourceLength, SourceLength, pSource,
|
|
*pBytesCopied/sizeof(WCHAR), pDestination
|
|
));
|
|
}
|
|
|
|
break;
|
|
};
|
|
}
|
|
|
|
return Status;
|
|
|
|
} // HttpCopyUrl
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
|
|
This function can be told to copy UTF-8, ANSI, or DBCS URLs.
|
|
|
|
Convert to Unicode
|
|
|
|
Arguments:
|
|
|
|
|
|
Return Value:
|
|
|
|
NTSTATUS - Completion status.
|
|
|
|
|
|
--***************************************************************************/
|
|
NTSTATUS
|
|
HttppCopyUrlByType(
|
|
IN PURL_C14N_CONFIG pCfg,
|
|
IN URL_ENCODING_TYPE UrlEncoding,
|
|
OUT PWSTR pDestination,
|
|
IN PCUCHAR pSource,
|
|
IN ULONG SourceLength,
|
|
OUT PULONG pBytesCopied
|
|
)
|
|
{
|
|
PWSTR pDest;
|
|
PCUCHAR pChar;
|
|
ULONG BytesCopied;
|
|
ULONG UnicodeChar;
|
|
ULONG CharToSkip;
|
|
#if DBG
|
|
NTSTATUS Status;
|
|
PFN_POPCHAR_ABSPATH pfnPopChar;
|
|
PWSTR pSegment = pDestination;
|
|
ULONG SegmentCount = 0;
|
|
#endif // DBG
|
|
|
|
//
|
|
// Sanity check.
|
|
//
|
|
|
|
PAGED_CODE();
|
|
|
|
#if DBG
|
|
if (UrlEncoding_Ansi == UrlEncoding)
|
|
pfnPopChar = &HttppPopCharAbsPathAnsi;
|
|
else if (UrlEncoding_Dbcs == UrlEncoding)
|
|
pfnPopChar = &HttppPopCharAbsPathDbcs;
|
|
else if (UrlEncoding_Utf8 == UrlEncoding)
|
|
pfnPopChar = &HttppPopCharAbsPathUtf8;
|
|
else
|
|
{
|
|
ASSERT(! "Invalid UrlEncoding");
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
#else // !DBG
|
|
UNREFERENCED_PARAMETER(pCfg);
|
|
UNREFERENCED_PARAMETER(UrlEncoding);
|
|
#endif // DBG
|
|
|
|
pDest = pDestination;
|
|
BytesCopied = 0;
|
|
|
|
pChar = pSource;
|
|
CharToSkip = 1;
|
|
|
|
while ((int)SourceLength > 0)
|
|
{
|
|
ULONG NextUnicodeChar = FastPopChars[*pChar];
|
|
|
|
//
|
|
// Grab the next character.
|
|
//
|
|
// All clean chars have a non-zero entry in FastPopChars[].
|
|
// All clean chars are in the US-ASCII range, 0-127.
|
|
//
|
|
|
|
ASSERT(0 != NextUnicodeChar);
|
|
ASSERT(IS_ASCII(NextUnicodeChar));
|
|
|
|
#if DBG
|
|
Status = (*pfnPopChar)(
|
|
pChar,
|
|
SourceLength,
|
|
pCfg->PercentUAllowed,
|
|
pCfg->AllowRestrictedChars,
|
|
&UnicodeChar,
|
|
&CharToSkip
|
|
);
|
|
|
|
ASSERT(NT_SUCCESS(Status));
|
|
ASSERT(UnicodeChar == NextUnicodeChar);
|
|
ASSERT(CharToSkip == 1);
|
|
#endif // !DBG
|
|
|
|
UnicodeChar = (WCHAR) NextUnicodeChar;
|
|
CharToSkip = 1;
|
|
|
|
#if DBG
|
|
// Because HttpFindUrlToken() marks as dirty any URLs that
|
|
// (appear to) have too many segments or overlong segments,
|
|
// we should never hit these assertions
|
|
if (FORWARD_SLASH == UnicodeChar)
|
|
{
|
|
ULONG SegmentLength = DIFF(pDest - pSegment);
|
|
|
|
// The segment length should be within bounds
|
|
|
|
ASSERT(SegmentLength > 0 || pDestination == pSegment);
|
|
ASSERT(SegmentLength
|
|
<= pCfg->UrlSegmentMaxLength + WCSLEN_LIT(L"/"));
|
|
|
|
pSegment = pDest;
|
|
++SegmentCount;
|
|
|
|
// There should not be too many segments
|
|
ASSERT(SegmentCount <= pCfg->UrlSegmentMaxCount);
|
|
}
|
|
#endif // DBG
|
|
|
|
EMIT_LITERAL_CHAR(UnicodeChar, pDest, BytesCopied);
|
|
|
|
pChar += CharToSkip;
|
|
SourceLength -= CharToSkip;
|
|
}
|
|
|
|
//
|
|
// terminate the string, it hasn't been done in the loop
|
|
//
|
|
|
|
ASSERT((pDest-1)[0] != UNICODE_NULL);
|
|
|
|
pDest[0] = UNICODE_NULL;
|
|
*pBytesCopied = BytesCopied;
|
|
|
|
ASSERT(DIFF(pDest - pSegment) > 0);
|
|
ASSERT(DIFF(pDest - pSegment)
|
|
<= pCfg->UrlSegmentMaxLength + WCSLEN_LIT(L"/"));
|
|
ASSERT(SegmentCount < pCfg->UrlSegmentMaxCount);
|
|
|
|
return STATUS_SUCCESS;
|
|
|
|
} // HttppCopyUrlByType
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
|
|
|
|
Unescape
|
|
Convert backslash to forward slash
|
|
Remove double slashes (empty directiories names) - e.g. // or \\
|
|
Handle /./
|
|
Handle /../
|
|
Convert to unicode
|
|
|
|
Arguments:
|
|
|
|
|
|
Return Value:
|
|
|
|
NTSTATUS - Completion status.
|
|
|
|
|
|
Note: Any changes to this code may require changes for the fast path code too.
|
|
The fast path is HttpCopyUrl.
|
|
|
|
--***************************************************************************/
|
|
NTSTATUS
|
|
HttpCleanAndCopyUrl(
|
|
IN PURL_C14N_CONFIG pCfg,
|
|
IN URL_PART UrlPart,
|
|
OUT PWSTR pDestination,
|
|
IN PCUCHAR pSource,
|
|
IN ULONG SourceLength,
|
|
OUT PULONG pBytesCopied,
|
|
OUT PWSTR * ppQueryString OPTIONAL,
|
|
OUT PURL_ENCODING_TYPE pUrlEncodingType
|
|
)
|
|
{
|
|
NTSTATUS Status = STATUS_UNSUCCESSFUL;
|
|
ULONG DecodeOrder = pCfg->AbsPathDecodeOrder;
|
|
|
|
PAGED_CODE();
|
|
|
|
ASSERT(NULL != pDestination);
|
|
ASSERT(NULL != pSource);
|
|
ASSERT(NULL != pBytesCopied);
|
|
ASSERT(NULL != pUrlEncodingType);
|
|
|
|
if (0 == DecodeOrder || DecodeOrder != (DecodeOrder & UrlDecode_MaxMask))
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpCleanAndCopyUrl: invalid DecodeOrder, 0x%lX\n",
|
|
DecodeOrder
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
for ( ;
|
|
0 != DecodeOrder && !NT_SUCCESS(Status);
|
|
DecodeOrder >>= UrlDecode_Shift
|
|
)
|
|
{
|
|
ULONG UrlEncoding = (DecodeOrder & UrlDecode_Mask);
|
|
|
|
switch (UrlEncoding)
|
|
{
|
|
default:
|
|
ASSERT(! "Impossible UrlDecodeOrder");
|
|
|
|
case UrlDecode_None:
|
|
break;
|
|
|
|
case UrlDecode_Ansi:
|
|
case UrlDecode_Dbcs:
|
|
case UrlDecode_Utf8:
|
|
|
|
UlTraceVerbose(PARSER,
|
|
("http!HttpCleanAndCopyUrl(%s, Src=%p, %lu)\n",
|
|
HttppUrlEncodingToString(UrlEncoding),
|
|
pSource, SourceLength
|
|
));
|
|
|
|
Status = HttppCleanAndCopyUrlByType(
|
|
pCfg,
|
|
(URL_ENCODING_TYPE) UrlEncoding,
|
|
UrlPart,
|
|
pDestination,
|
|
pSource,
|
|
SourceLength,
|
|
pBytesCopied,
|
|
ppQueryString
|
|
);
|
|
|
|
if (NT_SUCCESS(Status))
|
|
{
|
|
*pUrlEncodingType = (URL_ENCODING_TYPE) UrlEncoding;
|
|
|
|
UlTraceVerbose(PARSER,
|
|
("http!HttpCleanAndCopyUrl(%s): "
|
|
"(%lu) '%.*s' -> (%lu) '%ls'\n",
|
|
HttppUrlEncodingToString(UrlEncoding),
|
|
SourceLength, SourceLength, pSource,
|
|
*pBytesCopied/sizeof(WCHAR), pDestination
|
|
));
|
|
}
|
|
|
|
break;
|
|
};
|
|
}
|
|
|
|
return Status;
|
|
|
|
} // HttpCleanAndCopyUrl
|
|
|
|
|
|
|
|
//
|
|
// HttppCleanAndCopyUrlByType() uses StateFromStateAndToken[][] and
|
|
// ActionFromStateAndToken[][] to handle "//", "/./", and "/../" productions.
|
|
//
|
|
|
|
#define TOK_STATE(state, other, dot, eos, slash) \
|
|
{ \
|
|
URL_STATE_ ## other, \
|
|
URL_STATE_ ## dot, \
|
|
URL_STATE_ ## eos, \
|
|
URL_STATE_ ## slash \
|
|
}
|
|
|
|
//
|
|
// CanonStateFromStateAndToken[][] is used by HttpParseUrl() to reject
|
|
// "//", "/./", and "/../" sequences, as these URLs are supposed to
|
|
// be in canonical form already.
|
|
//
|
|
|
|
const URL_STATE
|
|
CanonStateFromStateAndToken[URL_STATE_MAX][URL_TOKEN_MAX] =
|
|
{
|
|
// State \ Token: Other '.' EOS '/'
|
|
TOK_STATE( START, START, START, END, SLASH),
|
|
TOK_STATE( SLASH, START, SLASH_DOT, END, ERROR),
|
|
TOK_STATE( SLASH_DOT, START, SLASH_DOT_DOT, END, ERROR),
|
|
TOK_STATE( SLASH_DOT_DOT, START, START, ERROR, ERROR),
|
|
|
|
TOK_STATE( END, END, END, END, END),
|
|
TOK_STATE( ERROR, ERROR, ERROR, ERROR, ERROR)
|
|
};
|
|
|
|
|
|
//
|
|
// StateFromStateAndToken[][] says which new state to transition to given
|
|
// the current state and the token we saw. Used by HttppCleanAndCopyUrlByType()
|
|
//
|
|
|
|
const URL_STATE
|
|
StateFromStateAndToken[URL_STATE_MAX][URL_TOKEN_MAX] =
|
|
{
|
|
// State \ Token: Other '.' EOS '/'
|
|
TOK_STATE( START, START, START, END, SLASH),
|
|
TOK_STATE( SLASH, START, SLASH_DOT, END, SLASH),
|
|
TOK_STATE( SLASH_DOT, START, SLASH_DOT_DOT, END, SLASH),
|
|
TOK_STATE( SLASH_DOT_DOT, START, START, END, SLASH),
|
|
|
|
TOK_STATE( END, END, END, END, END),
|
|
TOK_STATE( ERROR, ERROR, ERROR, ERROR, ERROR)
|
|
};
|
|
|
|
|
|
//
|
|
// ActionFromStateAndToken[][] says what action to perform based on
|
|
// the current state and the current token
|
|
//
|
|
|
|
#define NEW_ACTION(state, other, dot, eos, slash) \
|
|
{ \
|
|
ACTION_ ## other, \
|
|
ACTION_ ## dot, \
|
|
ACTION_ ## eos, \
|
|
ACTION_ ## slash \
|
|
}
|
|
|
|
const URL_ACTION
|
|
ActionFromStateAndToken[URL_STATE_MAX][URL_TOKEN_MAX] =
|
|
{
|
|
// State \ Token: Other '.' EOS '/'
|
|
NEW_ACTION(START, EMIT_CH, EMIT_CH, NOTHING, EMIT_CH),
|
|
NEW_ACTION(SLASH, EMIT_CH, NOTHING, NOTHING, NOTHING),
|
|
NEW_ACTION(SLASH_DOT, EMIT_DOT_CH, NOTHING, NOTHING, NOTHING),
|
|
NEW_ACTION(SLASH_DOT_DOT, EMIT_DOT_DOT_CH,
|
|
EMIT_DOT_DOT_CH, BACKUP, BACKUP),
|
|
|
|
NEW_ACTION(END, NOTHING, NOTHING, NOTHING, NOTHING)
|
|
};
|
|
|
|
|
|
#if DBG
|
|
|
|
PCSTR
|
|
HttppUrlActionToString(
|
|
URL_ACTION Action)
|
|
{
|
|
switch (Action)
|
|
{
|
|
case ACTION_NOTHING: return "NOTHING";
|
|
case ACTION_EMIT_CH: return "EMIT_CH";
|
|
case ACTION_EMIT_DOT_CH: return "EMIT_DOT_CH";
|
|
case ACTION_EMIT_DOT_DOT_CH: return "EMIT_DOT_DOT_CH";
|
|
case ACTION_BACKUP: return "BACKUP";
|
|
case ACTION_MAX: return "MAX";
|
|
default:
|
|
ASSERT(! "Invalid URL_ACTION");
|
|
return "ACTION_???";
|
|
}
|
|
} // HttppUrlActionToString
|
|
|
|
|
|
PCSTR
|
|
HttppUrlStateToString(
|
|
URL_STATE UrlState)
|
|
{
|
|
switch (UrlState)
|
|
{
|
|
case URL_STATE_START: return "START";
|
|
case URL_STATE_SLASH: return "SLASH";
|
|
case URL_STATE_SLASH_DOT: return "SLASH_DOT";
|
|
case URL_STATE_SLASH_DOT_DOT: return "SLASH_DOT_DOT";
|
|
case URL_STATE_END: return "END";
|
|
case URL_STATE_ERROR: return "ERROR";
|
|
case URL_STATE_MAX: return "MAX";
|
|
default:
|
|
ASSERT(! "Invalid URL_STATE");
|
|
return "URL_STATE_???";
|
|
}
|
|
} // HttppUrlStateToString
|
|
|
|
|
|
PCSTR
|
|
HttppUrlTokenToString(
|
|
URL_STATE_TOKEN UrlToken)
|
|
{
|
|
switch (UrlToken)
|
|
{
|
|
case URL_TOKEN_OTHER: return "OTHER";
|
|
case URL_TOKEN_DOT: return "DOT";
|
|
case URL_TOKEN_EOS: return "EOS";
|
|
case URL_TOKEN_SLASH: return "SLASH";
|
|
case URL_TOKEN_MAX: return "MAX";
|
|
default:
|
|
ASSERT(! "Invalid URL_STATE_TOKEN");
|
|
return "URL_TOKEN_???";
|
|
}
|
|
} // HttppUrlTokenToString
|
|
|
|
#endif // DBG
|
|
|
|
|
|
PCSTR
|
|
HttpSiteTypeToString(
|
|
HTTP_URL_SITE_TYPE SiteType
|
|
)
|
|
{
|
|
switch (SiteType)
|
|
{
|
|
case HttpUrlSite_None: return "None";
|
|
case HttpUrlSite_Name: return "Name";
|
|
case HttpUrlSite_IP: return "IP";
|
|
case HttpUrlSite_NamePlusIP: return "Name+IP";
|
|
case HttpUrlSite_WeakWildcard: return "Weak";
|
|
case HttpUrlSite_StrongWildcard: return "Strong";
|
|
case HttpUrlSite_Max: return "Max";
|
|
default:
|
|
ASSERT(! "Invalid HTTP_URL_SITE_TYPE");
|
|
return "????";
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
|
|
This function can be told to clean up UTF-8, ANSI, or DBCS URLs.
|
|
|
|
Unescape
|
|
Convert backslash to forward slash
|
|
Remove double slashes (empty directiories names) - e.g. // or \\
|
|
Handle /./
|
|
Handle /../
|
|
Convert to unicode
|
|
|
|
Arguments:
|
|
|
|
|
|
Return Value:
|
|
|
|
NTSTATUS - Completion status.
|
|
|
|
Note: Any changes to this code may require changes for the fast path code too.
|
|
The fast path is HttppCopyUrlByType.
|
|
|
|
|
|
--***************************************************************************/
|
|
NTSTATUS
|
|
HttppCleanAndCopyUrlByType(
|
|
IN PURL_C14N_CONFIG pCfg,
|
|
IN URL_ENCODING_TYPE UrlEncoding,
|
|
IN URL_PART UrlPart,
|
|
OUT PWSTR pDestination,
|
|
IN PCUCHAR pSource,
|
|
IN ULONG SourceLength,
|
|
OUT PULONG pBytesCopied,
|
|
OUT PWSTR * ppQueryString OPTIONAL
|
|
)
|
|
{
|
|
NTSTATUS Status;
|
|
PWSTR pDest;
|
|
PCUCHAR pChar;
|
|
ULONG CharToSkip;
|
|
ULONG BytesCopied;
|
|
PWSTR pQueryString;
|
|
URL_STATE UrlState = URL_STATE_START;
|
|
URL_STATE_TOKEN UrlToken = URL_TOKEN_OTHER;
|
|
URL_ACTION Action = ACTION_NOTHING;
|
|
ULONG UnicodeChar;
|
|
BOOLEAN MakeCanonical;
|
|
PWCHAR pFastPopChar;
|
|
PFN_POPCHAR_ABSPATH pfnPopChar;
|
|
PWSTR pSegment = pDestination;
|
|
ULONG SegmentCount = 0;
|
|
BOOLEAN TestSegment = FALSE;
|
|
#if DBG
|
|
ULONG OriginalSourceLength = SourceLength;
|
|
#endif
|
|
|
|
//
|
|
// Sanity check.
|
|
//
|
|
|
|
PAGED_CODE();
|
|
|
|
ASSERT(UrlPart_AbsPath == UrlPart);
|
|
|
|
if (UrlEncoding_Ansi == UrlEncoding)
|
|
pfnPopChar = &HttppPopCharAbsPathAnsi;
|
|
else if (UrlEncoding_Dbcs == UrlEncoding)
|
|
pfnPopChar = &HttppPopCharAbsPathDbcs;
|
|
else if (UrlEncoding_Utf8 == UrlEncoding)
|
|
pfnPopChar = &HttppPopCharAbsPathUtf8;
|
|
else
|
|
{
|
|
ASSERT(! "Invalid UrlEncoding");
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
ASSERT(FORWARD_SLASH == *pSource);
|
|
|
|
pDest = pDestination;
|
|
pQueryString = NULL;
|
|
BytesCopied = 0;
|
|
|
|
pChar = pSource;
|
|
CharToSkip = 0;
|
|
|
|
UrlState = 0;
|
|
|
|
MakeCanonical = (BOOLEAN) (UrlPart == UrlPart_AbsPath);
|
|
|
|
if (UrlEncoding == UrlEncoding_Utf8 && UrlPart != UrlPart_QueryString)
|
|
{
|
|
pFastPopChar = FastPopChars;
|
|
}
|
|
else
|
|
{
|
|
pFastPopChar = DummyPopChars;
|
|
}
|
|
|
|
while (SourceLength > 0)
|
|
{
|
|
//
|
|
// advance ! it's at the top of the loop to enable ANSI_NULL to
|
|
// come through ONCE
|
|
//
|
|
|
|
ASSERT(CharToSkip <= SourceLength);
|
|
|
|
pChar += CharToSkip;
|
|
SourceLength -= CharToSkip;
|
|
|
|
//
|
|
// well? have we hit the end?
|
|
//
|
|
|
|
if (SourceLength == 0)
|
|
{
|
|
UnicodeChar = UNICODE_NULL;
|
|
CharToSkip = 1;
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// Nope. Peek briefly to see if we hit the query string
|
|
//
|
|
|
|
if (UrlPart == UrlPart_AbsPath && pChar[0] == QUESTION_MARK)
|
|
{
|
|
ASSERT(pQueryString == NULL);
|
|
|
|
//
|
|
// remember its location
|
|
//
|
|
|
|
pQueryString = pDest;
|
|
|
|
//
|
|
// let it fall through ONCE to the canonical
|
|
// in order to handle a trailing "/.." like
|
|
// "http://foobar:80/foo/bar/..?v=1&v2"
|
|
//
|
|
|
|
TestSegment = TRUE;
|
|
UnicodeChar = QUESTION_MARK;
|
|
CharToSkip = 1;
|
|
|
|
//
|
|
// now we are cleaning the query string
|
|
//
|
|
|
|
UrlPart = UrlPart_QueryString;
|
|
|
|
UlTraceVerbose(PARSER, ("QueryString @ %p\n", pQueryString));
|
|
|
|
//
|
|
// cannot use fast path for PopChar anymore
|
|
//
|
|
|
|
pFastPopChar = DummyPopChars;
|
|
|
|
pfnPopChar = HttppPopCharQueryString;
|
|
}
|
|
else
|
|
{
|
|
ULONG NextUnicodeChar = pFastPopChar[*pChar];
|
|
|
|
//
|
|
// Grab the next character. Try to be fast for the
|
|
// normal character case. Otherwise call PopChar.
|
|
//
|
|
|
|
if (NextUnicodeChar == 0)
|
|
{
|
|
Status = (*pfnPopChar)(
|
|
pChar,
|
|
SourceLength,
|
|
pCfg->PercentUAllowed,
|
|
pCfg->AllowRestrictedChars,
|
|
&UnicodeChar,
|
|
&CharToSkip
|
|
);
|
|
|
|
if (NT_SUCCESS(Status) == FALSE)
|
|
goto end;
|
|
}
|
|
else
|
|
{
|
|
#if DBG
|
|
Status = (*pfnPopChar)(
|
|
pChar,
|
|
SourceLength,
|
|
pCfg->PercentUAllowed,
|
|
pCfg->AllowRestrictedChars,
|
|
&UnicodeChar,
|
|
&CharToSkip
|
|
);
|
|
|
|
ASSERT(NT_SUCCESS(Status));
|
|
ASSERT(UnicodeChar == NextUnicodeChar);
|
|
ASSERT(CharToSkip == 1);
|
|
#endif // DBG
|
|
UnicodeChar = (WCHAR) NextUnicodeChar;
|
|
CharToSkip = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!MakeCanonical)
|
|
{
|
|
UrlToken = (UnicodeChar == UNICODE_NULL)
|
|
? URL_TOKEN_EOS
|
|
: URL_TOKEN_OTHER;
|
|
TestSegment = FALSE;
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// now use the state machine to make it canonical.
|
|
//
|
|
|
|
//
|
|
// did we just hit the query string? this will only happen once
|
|
// that we take this branch after hitting it, as we stop
|
|
// processing after hitting it.
|
|
//
|
|
|
|
if (UrlPart == UrlPart_QueryString)
|
|
{
|
|
//
|
|
// treat this just like we hit a NULL, EOS.
|
|
//
|
|
|
|
ASSERT(QUESTION_MARK == UnicodeChar);
|
|
|
|
UrlToken = URL_TOKEN_EOS;
|
|
TestSegment = TRUE;
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// otherwise based the new state off of the char we
|
|
// just popped.
|
|
//
|
|
|
|
switch (UnicodeChar)
|
|
{
|
|
case UNICODE_NULL:
|
|
UrlToken = URL_TOKEN_EOS;
|
|
TestSegment = TRUE;
|
|
break;
|
|
|
|
case DOT:
|
|
UrlToken = URL_TOKEN_DOT;
|
|
TestSegment = FALSE;
|
|
break;
|
|
|
|
case FORWARD_SLASH:
|
|
UrlToken = URL_TOKEN_SLASH;
|
|
TestSegment = TRUE;
|
|
break;
|
|
|
|
default:
|
|
UrlToken = URL_TOKEN_OTHER;
|
|
TestSegment = FALSE;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
Action = ActionFromStateAndToken[UrlState][UrlToken];
|
|
|
|
IF_DEBUG2BOTH(PARSER, VERBOSE)
|
|
{
|
|
ULONG i;
|
|
UCHAR HexBuff[5*12 + 10];
|
|
PUCHAR p = HexBuff;
|
|
UCHAR Byte;
|
|
|
|
ASSERT(CharToSkip <= 4 * STRLEN_LIT("%NN"));
|
|
|
|
// Generate something like
|
|
// "[25 65 32 25 38 30 25 39 35] '%e2%80%95'"
|
|
|
|
*p++ = '[';
|
|
|
|
for (i = 0; i < CharToSkip; ++i)
|
|
{
|
|
const static char hexArray[] = "0123456789ABCDEF";
|
|
|
|
Byte = pChar[i];
|
|
*p++ = hexArray[Byte >> 4];
|
|
*p++ = hexArray[Byte & 0xf];
|
|
*p++ = ' ';
|
|
}
|
|
|
|
p[-1] = ']'; // overwrite last ' '
|
|
*p++ = ' ';
|
|
*p++ = '\'';
|
|
|
|
for (i = 0; i < CharToSkip; ++i)
|
|
{
|
|
Byte = pChar[i];
|
|
*p++ = (IS_HTTP_PRINT(Byte) ? Byte : '?');
|
|
}
|
|
|
|
*p++ = '\'';
|
|
*p++ = '\0';
|
|
|
|
ASSERT(DIFF(p - HexBuff) <= DIMENSION(HexBuff));
|
|
|
|
UlTrace(PARSER,
|
|
("http!HttppCleanAndCopyUrlByType(%s): "
|
|
"(%lu) %s -> U+%04lX '%c': "
|
|
"[%s][%s] -> %s, %s%s\n",
|
|
HttppUrlEncodingToString(UrlEncoding),
|
|
CharToSkip, HexBuff,
|
|
UnicodeChar,
|
|
IS_ANSI(UnicodeChar) && IS_HTTP_PRINT(UnicodeChar)
|
|
? (UCHAR) UnicodeChar : '?',
|
|
HttppUrlStateToString(UrlState),
|
|
HttppUrlTokenToString(UrlToken),
|
|
HttppUrlStateToString(
|
|
StateFromStateAndToken[UrlState][UrlToken]),
|
|
HttppUrlActionToString(Action),
|
|
TestSegment ? ", TestSegment" : ""
|
|
));
|
|
|
|
} // IF_DEBUG2BOTH(PARSER, VERBOSE)
|
|
|
|
//
|
|
// Segment length and segment count checks
|
|
//
|
|
|
|
if (TestSegment)
|
|
{
|
|
ULONG SegmentLength = DIFF(pDest - pSegment);
|
|
|
|
ASSERT(pSegment <= pDest);
|
|
|
|
UlTraceVerbose(PARSER,
|
|
("http!HttppCleanAndCopyUrlByType: "
|
|
"Segment[%lu] %p (%lu) = '%.*ls'\n",
|
|
SegmentCount, pSegment, SegmentLength,
|
|
SegmentLength, pSegment
|
|
));
|
|
|
|
// Reject if segment too long
|
|
if (SegmentLength > pCfg->UrlSegmentMaxLength + WCSLEN_LIT(L"/"))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppCleanAndCopyUrlByType: "
|
|
"Segment too long: %lu\n",
|
|
SegmentLength
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_DEVICE_REQUEST);
|
|
}
|
|
|
|
pSegment = pDest;
|
|
|
|
// Reject if too many path segments
|
|
|
|
if (Action != ACTION_NOTHING)
|
|
{
|
|
if (pSegment == pDestination)
|
|
{
|
|
SegmentCount = 0;
|
|
}
|
|
else if (++SegmentCount > pCfg->UrlSegmentMaxCount)
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppCleanAndCopyUrlByType: "
|
|
"Too many segments: %lu\n",
|
|
SegmentCount
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_DEVICE_REQUEST);
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Perform the action associated with the state.
|
|
//
|
|
|
|
switch (Action)
|
|
{
|
|
case ACTION_EMIT_DOT_DOT_CH:
|
|
|
|
EMIT_LITERAL_CHAR(DOT, pDest, BytesCopied);
|
|
|
|
// fall through
|
|
|
|
case ACTION_EMIT_DOT_CH:
|
|
|
|
EMIT_LITERAL_CHAR(DOT, pDest, BytesCopied);
|
|
|
|
// fall through
|
|
|
|
case ACTION_EMIT_CH:
|
|
|
|
|
|
EMIT_CHAR(
|
|
UnicodeChar,
|
|
pDest,
|
|
BytesCopied,
|
|
Status,
|
|
pCfg->AllowRestrictedChars
|
|
);
|
|
|
|
// fall through
|
|
|
|
case ACTION_NOTHING:
|
|
break;
|
|
|
|
case ACTION_BACKUP:
|
|
|
|
//
|
|
// pDest currently points 1 past the last '/'. backup over it and
|
|
// find the preceding '/', set pDest to 1 past that one.
|
|
//
|
|
|
|
//
|
|
// backup to the '/'
|
|
//
|
|
|
|
pDest -= 1;
|
|
BytesCopied -= sizeof(WCHAR);
|
|
|
|
ASSERT(pDest[0] == FORWARD_SLASH);
|
|
|
|
//
|
|
// are we at the start of the string? that's bad, can't go back!
|
|
//
|
|
|
|
if (pDest == pDestination)
|
|
{
|
|
ASSERT(BytesCopied == 0);
|
|
|
|
UlTraceError(PARSER, (
|
|
"http!HttppCleanAndCopyUrl: "
|
|
"Can't back up for \"/../\"\n"
|
|
));
|
|
|
|
Status = STATUS_OBJECT_PATH_INVALID;
|
|
goto end;
|
|
}
|
|
|
|
//
|
|
// back up over the '/'
|
|
//
|
|
|
|
pDest -= 1;
|
|
BytesCopied -= sizeof(WCHAR);
|
|
|
|
ASSERT(pDest > pDestination);
|
|
|
|
//
|
|
// now find the previous slash
|
|
//
|
|
|
|
while (pDest > pDestination && pDest[0] != FORWARD_SLASH)
|
|
{
|
|
pDest -= 1;
|
|
BytesCopied -= sizeof(WCHAR);
|
|
}
|
|
|
|
//
|
|
// Adjust segment trackers downwards
|
|
//
|
|
|
|
pSegment = pDest;
|
|
|
|
if (pSegment == pDestination)
|
|
SegmentCount = 0;
|
|
else
|
|
--SegmentCount;
|
|
|
|
//
|
|
// we already have a slash, so don't have to store one.
|
|
//
|
|
|
|
ASSERT(pDest[0] == FORWARD_SLASH);
|
|
|
|
//
|
|
// simply skip it, as if we had emitted it just now
|
|
//
|
|
|
|
pDest += 1;
|
|
BytesCopied += sizeof(WCHAR);
|
|
|
|
break;
|
|
|
|
default:
|
|
ASSERT(!"http!HttppCleanAndCopyUrl: "
|
|
"Invalid action code in state table!");
|
|
Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
|
|
goto end;
|
|
}
|
|
|
|
//
|
|
// Just hit the query string ?
|
|
//
|
|
|
|
if (MakeCanonical && UrlPart == UrlPart_QueryString)
|
|
{
|
|
//
|
|
// Stop canonical processing
|
|
//
|
|
|
|
MakeCanonical = FALSE;
|
|
|
|
//
|
|
// Need to emit the '?', it wasn't emitted above
|
|
//
|
|
|
|
ASSERT(ActionFromStateAndToken[UrlState][UrlToken]
|
|
!= ACTION_EMIT_CH);
|
|
|
|
//
|
|
// remember its location (in case we backed up)
|
|
//
|
|
|
|
pQueryString = pDest;
|
|
|
|
EMIT_LITERAL_CHAR(QUESTION_MARK, pDest, BytesCopied);
|
|
|
|
// reset
|
|
UrlToken = URL_TOKEN_OTHER;
|
|
UrlState = URL_STATE_START;
|
|
}
|
|
|
|
// update the URL state
|
|
UrlState = StateFromStateAndToken[UrlState][UrlToken];
|
|
|
|
ASSERT(URL_STATE_ERROR != UrlState);
|
|
}
|
|
|
|
//
|
|
// terminate the string, it hasn't been done in the loop
|
|
//
|
|
|
|
ASSERT((pDest-1)[0] != UNICODE_NULL);
|
|
|
|
pDest[0] = UNICODE_NULL;
|
|
*pBytesCopied = BytesCopied;
|
|
|
|
if (BytesCopied > pCfg->UrlMaxLength * sizeof(WCHAR))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttppCleanAndCopyUrlByType: "
|
|
"URL too long: %lu\n",
|
|
BytesCopied
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_DEVICE_REQUEST);
|
|
}
|
|
|
|
if (ppQueryString != NULL)
|
|
{
|
|
*ppQueryString = pQueryString;
|
|
}
|
|
|
|
UlTraceVerbose(PARSER,
|
|
("http!HttppCleanAndCopyUrlByType: "
|
|
"(%lu) '%.*s' -> (%lu) '%.*ls', %squerystring\n",
|
|
OriginalSourceLength,
|
|
OriginalSourceLength, pSource,
|
|
BytesCopied/sizeof(WCHAR),
|
|
BytesCopied/sizeof(WCHAR), pDestination,
|
|
pQueryString != NULL ? "" : "no "
|
|
));
|
|
|
|
Status = STATUS_SUCCESS;
|
|
|
|
|
|
end:
|
|
return Status;
|
|
|
|
} // HttppCleanAndCopyUrlByType
|
|
|
|
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
A utility routine to find a Url token. We take an input pointer, skip any
|
|
preceding LWS, then scan the token until we find either LWS or a CRLF
|
|
pair. We also mark the request to have a "Clean" Url
|
|
|
|
Arguments:
|
|
|
|
pBuffer - Buffer to search for token.
|
|
BufferLength - Length of data pointed to by pBuffer.
|
|
ppTokenStart - Where to return the start of the token, if we locate
|
|
its delimiter.
|
|
pTokenLength - Where to return the length of the token.
|
|
pRawUrlClean - where to return cleanliness of URL
|
|
|
|
Return Value:
|
|
|
|
STATUS_SUCCESS if no parsing errors in the URL.
|
|
We also return, in *ppTokenStart, a pointer to the token we found,
|
|
or NULL if we don't find a whitespace-delimited token.
|
|
pRawUrlClean flag may be set.
|
|
|
|
--*/
|
|
NTSTATUS
|
|
HttpFindUrlToken(
|
|
IN PURL_C14N_CONFIG pCfg,
|
|
IN PCUCHAR pBuffer,
|
|
IN ULONG BufferLength,
|
|
OUT PUCHAR* ppTokenStart,
|
|
OUT PULONG pTokenLength,
|
|
OUT PBOOLEAN pRawUrlClean
|
|
)
|
|
{
|
|
PCUCHAR pTokenStart;
|
|
PCUCHAR pSegment;
|
|
UCHAR CurrentChar;
|
|
UCHAR PreviousChar;
|
|
ULONG SegmentCount = 0;
|
|
ULONG TokenLength;
|
|
|
|
//
|
|
// Sanity check.
|
|
//
|
|
|
|
PAGED_CODE();
|
|
|
|
ASSERT(NULL != pBuffer);
|
|
ASSERT(NULL != ppTokenStart);
|
|
ASSERT(NULL != pTokenLength);
|
|
ASSERT(NULL != pRawUrlClean);
|
|
|
|
//
|
|
// Assume Clean RawUrl
|
|
//
|
|
|
|
*pRawUrlClean = TRUE;
|
|
*ppTokenStart = NULL;
|
|
*pTokenLength = 0;
|
|
|
|
//
|
|
// First, skip any preceding LWS.
|
|
//
|
|
|
|
while (BufferLength > 0 && IS_HTTP_LWS(*pBuffer))
|
|
{
|
|
pBuffer++;
|
|
BufferLength--;
|
|
}
|
|
|
|
// If we stopped because we ran out of buffer, bail.
|
|
if (BufferLength == 0)
|
|
{
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
pTokenStart = pBuffer;
|
|
PreviousChar = ANSI_NULL;
|
|
|
|
// This will usually point to a '/', but it won't if this is an AbsURI.
|
|
// It doesn't really matter, since only a few borderline cases will
|
|
// be marked as dirty that might not otherwise be.
|
|
pSegment = pBuffer;
|
|
|
|
// Now skip over the token, until we see either LWS or a CR or LF.
|
|
|
|
while ( BufferLength != 0 )
|
|
{
|
|
CurrentChar = *pBuffer;
|
|
|
|
// must check for WS [ \t\r\n] first, since \t, \r, & \n are CTL chars!
|
|
if ( IS_HTTP_WS_TOKEN(CurrentChar) )
|
|
{
|
|
break;
|
|
}
|
|
|
|
if ( IS_HTTP_CTL(CurrentChar) )
|
|
{
|
|
*pRawUrlClean = FALSE;
|
|
*ppTokenStart = NULL;
|
|
|
|
UlTraceError(PARSER, (
|
|
"http!HttpFindUrlToken: "
|
|
"Found control char: %02X\n",
|
|
CurrentChar
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_DEVICE_REQUEST);
|
|
}
|
|
|
|
//
|
|
// URL is NOT clean if it contains any of the following patterns
|
|
//
|
|
// a. back slash "\"
|
|
// b. dot, forward slash | forward slash, forward slash "./" | "//"
|
|
// c. forward slash, dot | dot, dot "/." | ".."
|
|
// d. question mark (querystring) "?"
|
|
// e. percent (hex escape) "%"
|
|
// f. raw bytes with high bit set, >= 0x80
|
|
//
|
|
// These are conservative estimates of "Clean"; some clean URLs may not
|
|
// be marked as clean. For such URLs, we'll skip the fast path but at
|
|
// no loss of functionality.
|
|
//
|
|
|
|
if ( IS_URL_DIRTY(CurrentChar) )
|
|
{
|
|
// Only do the checks if it's still clean
|
|
if (*pRawUrlClean)
|
|
{
|
|
if (CurrentChar == FORWARD_SLASH || CurrentChar == DOT)
|
|
{
|
|
if (PreviousChar == FORWARD_SLASH || PreviousChar == DOT)
|
|
{
|
|
*pRawUrlClean = FALSE;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*pRawUrlClean = FALSE;
|
|
}
|
|
}
|
|
|
|
if (CurrentChar == FORWARD_SLASH)
|
|
{
|
|
ULONG SegmentLength = DIFF(pBuffer - pSegment);
|
|
|
|
// If the segment contains %-hex-escaped chars, it may become
|
|
// acceptably short after PopChar() processing. Let
|
|
// HttppCleanAndCopyUrlByType() figure it out.
|
|
|
|
if (SegmentLength > pCfg->UrlSegmentMaxLength)
|
|
*pRawUrlClean = FALSE;
|
|
|
|
pSegment = pBuffer;
|
|
|
|
// If this is an AbsURI, instead of an AbsPath, the
|
|
// segment count will be higher, because of the two slashes
|
|
// before the hostname. Also, "/../", "/./", and "//"
|
|
// minimization will reduce the final count of segments.
|
|
// Again, let HttppCleanAndCopyUrlByType() figure it out.
|
|
|
|
if (++SegmentCount > pCfg->UrlSegmentMaxCount)
|
|
*pRawUrlClean = FALSE;
|
|
}
|
|
}
|
|
|
|
PreviousChar = CurrentChar;
|
|
pBuffer++;
|
|
BufferLength--;
|
|
}
|
|
|
|
// See why we stopped.
|
|
if (0 == BufferLength)
|
|
{
|
|
*pRawUrlClean = FALSE;
|
|
|
|
// Ran out of buffer before end of token.
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
ASSERT(IS_HTTP_WS_TOKEN(*pBuffer));
|
|
|
|
TokenLength = DIFF(pBuffer - pTokenStart);
|
|
|
|
if (0 == TokenLength)
|
|
{
|
|
UlTraceError(PARSER, ("http!HttpFindUrlToken: Found empty token\n"));
|
|
|
|
RETURN(STATUS_INVALID_DEVICE_REQUEST);
|
|
}
|
|
|
|
// Check the final segment
|
|
if (DIFF(pBuffer - pSegment) > pCfg->UrlSegmentMaxLength)
|
|
*pRawUrlClean = FALSE;
|
|
|
|
if (++SegmentCount > pCfg->UrlSegmentMaxCount)
|
|
*pRawUrlClean = FALSE;
|
|
|
|
if (TokenLength > pCfg->UrlMaxLength)
|
|
*pRawUrlClean = FALSE;
|
|
|
|
// Success! Set the token length and return the start of the token.
|
|
*pTokenLength = TokenLength;
|
|
*ppTokenStart = (PUCHAR) pTokenStart;
|
|
|
|
return STATUS_SUCCESS;
|
|
|
|
} // HttpFindUrlToken
|
|
|
|
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Parse an IPv6 address from a Unicode buffer. Must be delimited by [].
|
|
May contain a scope ID.
|
|
|
|
Arguments:
|
|
|
|
pBuffer - Buffer to parse. Must point to '['.
|
|
BufferLength - Length of data pointed to by pBuffer.
|
|
ScopeIdAllowed - if TRUE, an optional scope ID may be present
|
|
pSockAddr6 - Where to return the parsed IPv6 address
|
|
ppEnd - On success, points to character after ']'
|
|
|
|
Return Value:
|
|
|
|
STATUS_SUCCESS if no parsing errors in the IPv6 address.
|
|
|
|
--*/
|
|
NTSTATUS
|
|
HttppParseIPv6Address(
|
|
IN PCWSTR pBuffer,
|
|
IN ULONG BufferLength,
|
|
IN BOOLEAN ScopeIdAllowed,
|
|
OUT PSOCKADDR_IN6 pSockAddr6,
|
|
OUT PCWSTR* ppEnd
|
|
)
|
|
{
|
|
NTSTATUS Status;
|
|
PCWSTR pEnd = pBuffer + BufferLength;
|
|
PCWSTR pChar;
|
|
PWSTR pTerminator;
|
|
ULONG ScopeTemp;
|
|
|
|
ASSERT(NULL != pBuffer);
|
|
ASSERT(0 < BufferLength);
|
|
ASSERT(NULL != pSockAddr6);
|
|
ASSERT(NULL != ppEnd);
|
|
|
|
RtlZeroMemory(pSockAddr6, sizeof(*pSockAddr6));
|
|
*ppEnd = NULL;
|
|
|
|
pSockAddr6->sin6_family = TDI_ADDRESS_TYPE_IP6;
|
|
|
|
// Caller guarantees this
|
|
ASSERT(L'[' == *pBuffer);
|
|
|
|
// Empty brackets?
|
|
if (BufferLength < WCSLEN_LIT(L"[0]") || L']' == pBuffer[1])
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttppParseIPv6Address: IPv6 address too short\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
for (pChar = pBuffer + WCSLEN_LIT(L"["); pChar < pEnd; ++pChar)
|
|
{
|
|
if (IS_ASCII(*pChar))
|
|
{
|
|
if (L']' == *pChar || L'%' == *pChar)
|
|
break;
|
|
|
|
// Dots are allowed because the last 32 bits may be represented
|
|
// in IPv4 dotted-octet notation
|
|
if (IS_HTTP_HEX(*pChar) || L':' == *pChar || L'.' == *pChar)
|
|
continue;
|
|
}
|
|
|
|
UlTraceError(PARSER,
|
|
("http!HttppParseIPv6Address: "
|
|
"Invalid char in IPv6 address, U+%04X '%c', "
|
|
"after %lu chars, '%.*ls'\n",
|
|
*pChar,
|
|
IS_ANSI(*pChar) && IS_HTTP_PRINT(*pChar) ? *pChar : '?',
|
|
DIFF(pChar - pBuffer),
|
|
DIFF(pChar - pBuffer),
|
|
pBuffer
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
if (pChar == pEnd)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttppParseIPv6Address: No ']' for IPv6 address\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
ASSERT(pChar < pEnd);
|
|
ASSERT(L']' == *pChar || L'%' == *pChar);
|
|
|
|
// Let the RTL routine do the hard work of parsing IPv6 addrs
|
|
Status = RtlIpv6StringToAddressW(
|
|
pBuffer + WCSLEN_LIT(L"["),
|
|
&pTerminator,
|
|
&pSockAddr6->sin6_addr
|
|
);
|
|
|
|
if (! NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttppParseIPv6Address: "
|
|
"Invalid IPv6 address, %s\n",
|
|
HttpStatusToString(Status)
|
|
));
|
|
|
|
RETURN(Status);
|
|
}
|
|
|
|
if (pTerminator != pChar)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttppParseIPv6Address: "
|
|
"Invalid IPv6 terminator, U+%04X, '%c'\n",
|
|
*pTerminator,
|
|
IS_ANSI(*pTerminator) && IS_HTTP_PRINT(*pTerminator)
|
|
? *pTerminator
|
|
: '?'
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
// Is a scopeid present?
|
|
|
|
if (L'%' != *pChar)
|
|
{
|
|
ASSERT(L']' == *pChar);
|
|
pSockAddr6->sin6_scope_id = 0;
|
|
}
|
|
else
|
|
{
|
|
PCWSTR pScopeEnd;
|
|
|
|
// Skip the '%' denoting a scope ID
|
|
pChar += WCSLEN_LIT(L"%");
|
|
|
|
if (!ScopeIdAllowed)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttppParseIPv6Address: No scope ID allowed\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
if (pChar == pEnd)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttppParseIPv6Address: "
|
|
"No IPv6 scope ID after '%%'\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
pScopeEnd = pChar;
|
|
|
|
do
|
|
{
|
|
if (*pScopeEnd < L'0' || *pScopeEnd > L'9')
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttppParseIPv6Address: "
|
|
"Invalid digit in IPv6 scope ID, "
|
|
"U+%04X, '%c'\n",
|
|
*pScopeEnd,
|
|
IS_ANSI(*pScopeEnd) && IS_HTTP_PRINT(*pScopeEnd)
|
|
? *pScopeEnd
|
|
: '?'
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
} while (++pScopeEnd < pEnd && L']' != *pScopeEnd);
|
|
|
|
ASSERT(pScopeEnd > pChar);
|
|
|
|
if (pScopeEnd == pEnd)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttppParseIPv6Address: "
|
|
"No ']' after IPv6 scope ID\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
ASSERT(L']' == *pScopeEnd);
|
|
|
|
Status = HttpWideStringToULong(
|
|
pChar,
|
|
pScopeEnd - pChar,
|
|
FALSE, // no leading zeros permitted
|
|
10,
|
|
&pTerminator,
|
|
&ScopeTemp
|
|
);
|
|
|
|
if (!NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttppParseIPv6Address: "
|
|
"Invalid scopeID, %s\n",
|
|
HttpStatusToString(Status)
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
// Scope ID does not get swapped to Network Byte Order
|
|
*(UNALIGNED64 ULONG *)&pSockAddr6->sin6_scope_id =
|
|
ScopeTemp;
|
|
|
|
ASSERT(pTerminator == pScopeEnd);
|
|
|
|
pChar = pScopeEnd;
|
|
|
|
} // '%' handling
|
|
|
|
ASSERT(pChar < pEnd);
|
|
ASSERT(L']' == *pChar);
|
|
|
|
// Skip the terminating ']'
|
|
pChar += WCSLEN_LIT(L"]");
|
|
|
|
*ppEnd = pChar;
|
|
|
|
RETURN(STATUS_SUCCESS);
|
|
|
|
} // HttppParseIPv6Address
|
|
|
|
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Print an IPv4 or IPv6 address as Unicode.
|
|
|
|
Arguments:
|
|
|
|
pSockAddr - The IP address to print
|
|
pBuffer - Buffer to print to. Assumed to be large enough.
|
|
|
|
Return Value:
|
|
|
|
Number of wide chars printed (the length)
|
|
|
|
--*/
|
|
|
|
ULONG
|
|
HttppPrintIpAddressW(
|
|
IN PSOCKADDR pSockAddr,
|
|
OUT PWSTR pBuffer
|
|
)
|
|
{
|
|
PWSTR pResult = pBuffer;
|
|
|
|
HTTP_FILL_BUFFER(pBuffer, MAX_IP_ADDR_PLUS_BRACKETS_STRING_LEN);
|
|
|
|
if (TDI_ADDRESS_TYPE_IP == pSockAddr->sa_family)
|
|
{
|
|
PSOCKADDR_IN pAddr4 = (PSOCKADDR_IN) pSockAddr;
|
|
|
|
pResult = RtlIpv4AddressToStringW(&pAddr4->sin_addr, pResult);
|
|
}
|
|
else if (TDI_ADDRESS_TYPE_IP6 == pSockAddr->sa_family)
|
|
{
|
|
PSOCKADDR_IN6 pAddr6 = (PSOCKADDR_IN6) pSockAddr;
|
|
|
|
*pResult++ = L'[';
|
|
pResult = RtlIpv6AddressToStringW(&pAddr6->sin6_addr, pResult);
|
|
// CODEWORK: Handle scope ID
|
|
*pResult++ = L']';
|
|
}
|
|
else
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttppPrintIpAddressW(): invalid sa_family, %hd\n",
|
|
pSockAddr->sa_family
|
|
));
|
|
|
|
ASSERT(! "Invalid SockAddr Family");
|
|
}
|
|
|
|
*pResult = UNICODE_NULL;
|
|
|
|
return DIFF(pResult - pBuffer);
|
|
|
|
} // HttppPrintIpAddressW
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
This checks to see if the URL is well-formed.
|
|
A well-formed URL has a scheme ("http" or "https"),
|
|
a valid hostname (including + and * wildcards, IPv4, and IPv6 literals),
|
|
a port, and a well-formed abspath.
|
|
|
|
* Must check that the URL is well-formed and in canonical form; e.g.,
|
|
- Disallow /../ and /./
|
|
- Disallow invalid characters, including invalid Unicode surrogate
|
|
pairs. The URL is already in Unicode, so it's not a question of
|
|
using the IS_URL_TOKEN() macro.
|
|
|
|
|
|
Arguments:
|
|
pCfg - configuration parameters
|
|
pUrl - Unicode string containing URL (not assumed to be
|
|
zero-terminated)
|
|
UrlLength - length of pUrl, in WCHARs
|
|
TrailingSlashReqd - if TRUE, pUrl must end in '/'
|
|
ForceRoutingIP - if TRUE and the hostname is an IPv4 or IPv6 literal,
|
|
pParsedUrl->Normalized will be cleared, to force
|
|
HttpNormalizeParsedUrl() to rewrite the URL as
|
|
http://IP:port:IP/path
|
|
pParsedUrl - on successful exit, the components of the URL
|
|
|
|
Return Value:
|
|
|
|
NTSTATUS
|
|
|
|
--***************************************************************************/
|
|
|
|
NTSTATUS
|
|
HttpParseUrl(
|
|
IN PURL_C14N_CONFIG pCfg,
|
|
IN PCWSTR pUrl,
|
|
IN ULONG UrlLength,
|
|
IN BOOLEAN TrailingSlashReqd,
|
|
IN BOOLEAN ForceRoutingIP,
|
|
OUT PHTTP_PARSED_URL pParsedUrl
|
|
)
|
|
{
|
|
NTSTATUS Status;
|
|
ULONG PreviousChar;
|
|
ULONG UnicodeChar;
|
|
PCWSTR pEnd = pUrl + UrlLength;
|
|
PCWSTR pHostname;
|
|
PCWSTR pChar;
|
|
PCWSTR pLabel;
|
|
PCWSTR pSlash;
|
|
PCWSTR pSegment;
|
|
PWSTR pTerminator;
|
|
BOOLEAN AlphaLabel;
|
|
BOOLEAN TestSegment;
|
|
BOOLEAN MoreChars;
|
|
BOOLEAN LastCharHack;
|
|
ULONG SegmentCount;
|
|
URL_STATE UrlState;
|
|
URL_STATE_TOKEN UrlToken;
|
|
URL_ACTION Action;
|
|
WCHAR IpAddr[MAX_IP_ADDR_PLUS_BRACKETS_STRING_LEN];
|
|
ULONG Length;
|
|
|
|
//
|
|
// Sanity check.
|
|
//
|
|
|
|
PAGED_CODE();
|
|
|
|
ASSERT(NULL != pCfg);
|
|
ASSERT(NULL != pUrl);
|
|
ASSERT(0 < UrlLength && UrlLength <= UNICODE_STRING_MAX_WCHAR_LEN);
|
|
ASSERT(FALSE == TrailingSlashReqd || TRUE == TrailingSlashReqd);
|
|
ASSERT(FALSE == ForceRoutingIP || TRUE == ForceRoutingIP);
|
|
ASSERT(NULL != pParsedUrl);
|
|
|
|
RtlZeroMemory(pParsedUrl, sizeof(*pParsedUrl));
|
|
|
|
pParsedUrl->Signature = HTTP_PARSED_URL_SIGNATURE;
|
|
pParsedUrl->pFullUrl = (PWSTR) pUrl;
|
|
pParsedUrl->UrlLength = (USHORT) UrlLength;
|
|
pParsedUrl->Normalized = TRUE;
|
|
pParsedUrl->TrailingSlashReqd = TrailingSlashReqd;
|
|
|
|
// This is the shortest possible valid URL
|
|
|
|
if (UrlLength < WCSLEN_LIT(L"http://*:1/"))
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: Url too short, %lu, %.*ls\n",
|
|
UrlLength, UrlLength, pUrl
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
// Check the scheme
|
|
|
|
if (0 == wcsncmp(pUrl, L"http://", WCSLEN_LIT(L"http://")))
|
|
{
|
|
pParsedUrl->Secure = FALSE;
|
|
pHostname = pUrl + WCSLEN_LIT(L"http://");
|
|
}
|
|
else if (0 == wcsncmp(pUrl, L"https://", WCSLEN_LIT(L"https://")))
|
|
{
|
|
pParsedUrl->Secure = TRUE;
|
|
pHostname = pUrl + WCSLEN_LIT(L"https://");
|
|
}
|
|
else
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: invalid scheme, %.*ls\n",
|
|
UrlLength, pUrl
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
pParsedUrl->pHostname = (PWSTR) pHostname;
|
|
|
|
// Is a trailing slash present, if required?
|
|
|
|
if (TrailingSlashReqd && L'/' != pUrl[UrlLength - 1])
|
|
{
|
|
// No, then the URL will have to be rewritten
|
|
pParsedUrl->Normalized = FALSE;
|
|
}
|
|
|
|
//
|
|
// The hostname validation code below looks a lot like that in
|
|
// HttpValidateHostname(). However, it is sufficiently different
|
|
// (WCHAR vs. UCHAR, Host+IP, Scope IDs, compulsory ports, etc) that
|
|
// it is not easy to combine them into one routine. If the hostname
|
|
// validation code is changed here, it may be necessary to change it
|
|
// in HttpValidateHostname() too, or vice versa.
|
|
//
|
|
|
|
// Check for weak (http://*:port/) and strong (http://+:port/) wildcards
|
|
|
|
if (L'*' == *pHostname || L'+' == *pHostname)
|
|
{
|
|
pParsedUrl->SiteType = (L'*' == *pHostname)
|
|
? HttpUrlSite_WeakWildcard
|
|
: HttpUrlSite_StrongWildcard;
|
|
|
|
pChar = pHostname + WCSLEN_LIT(L"*");
|
|
|
|
ASSERT(pChar < pEnd);
|
|
|
|
// The wildcard must be followed by ":port"
|
|
if (L':' == *pChar)
|
|
goto port;
|
|
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: No port in '%c' wildcard address\n",
|
|
*pHostname
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
// Is this an IPv6 literal address, per RFC 2732?
|
|
|
|
if (L'[' == *pHostname)
|
|
{
|
|
pParsedUrl->SiteType = HttpUrlSite_IP;
|
|
|
|
Status = HttppParseIPv6Address(
|
|
pHostname,
|
|
DIFF(pEnd - pHostname),
|
|
TRUE, // scope ID allowed
|
|
&pParsedUrl->SockAddr6,
|
|
&pChar);
|
|
|
|
if (!NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: "
|
|
"Invalid IPv6 address, %s\n",
|
|
HttpStatusToString(Status)
|
|
));
|
|
|
|
RETURN(Status);
|
|
}
|
|
|
|
ASSERT(TDI_ADDRESS_TYPE_IP6 == pParsedUrl->SockAddr.sa_family);
|
|
ASSERT(pChar > pHostname);
|
|
|
|
// There must be a port
|
|
if (pChar == pEnd || L':' != *pChar)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: No port after IPv6 address\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
//
|
|
// There are so many legitimate ways to write an IPv6 literal
|
|
// that we can't assume that a valid IPv6 literal is normalized.
|
|
// Since we do string comparisons, we'll have to rewrite the URL
|
|
// if the Normalized flag is not set.
|
|
//
|
|
|
|
Length = HttppPrintIpAddressW(&pParsedUrl->SockAddr, IpAddr);
|
|
|
|
if (Length != DIFF_USHORT(pChar - pHostname)
|
|
|| 0 != _wcsnicmp(pHostname, IpAddr, Length))
|
|
{
|
|
pParsedUrl->Normalized = FALSE;
|
|
}
|
|
|
|
goto port;
|
|
|
|
} // IPv6
|
|
|
|
//
|
|
// It must be a domain name or an IPv4 literal. We'll try to treat
|
|
// it as a domain name first. If the labels turn out to be all-numeric,
|
|
// we'll try decoding it as an IPv4 literal.
|
|
//
|
|
|
|
AlphaLabel = FALSE;
|
|
pLabel = pHostname;
|
|
|
|
for (pChar = pHostname; pChar < pEnd; ++pChar)
|
|
{
|
|
if (L':' == *pChar)
|
|
{
|
|
if (pChar == pHostname)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: empty hostname\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
// Have we seen any non-digits?
|
|
if (AlphaLabel)
|
|
{
|
|
ASSERT(0 == pParsedUrl->SockAddr.sa_family);
|
|
pParsedUrl->SiteType = HttpUrlSite_Name;
|
|
goto port;
|
|
}
|
|
|
|
pParsedUrl->SiteType = HttpUrlSite_IP;
|
|
pParsedUrl->SockAddr4.sin_family = TDI_ADDRESS_TYPE_IP;
|
|
ASSERT(TDI_ADDRESS_TYPE_IP == pParsedUrl->SockAddr.sa_family);
|
|
|
|
// Let's see if it's a valid IPv4 address
|
|
Status = RtlIpv4StringToAddressW(
|
|
pHostname,
|
|
TRUE, // strict => 4 dotted decimal octets
|
|
&pTerminator,
|
|
&pParsedUrl->SockAddr4.sin_addr
|
|
);
|
|
|
|
if (!NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: "
|
|
"Invalid IPv4 address, %s\n",
|
|
HttpStatusToString(Status)
|
|
));
|
|
|
|
RETURN(Status);
|
|
}
|
|
|
|
if (pTerminator != pChar)
|
|
{
|
|
ASSERT(pTerminator < pChar);
|
|
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: "
|
|
"Invalid IPv4 address after %lu chars, "
|
|
"U+%04X, '%c'\n",
|
|
DIFF(pTerminator - pHostname),
|
|
*pTerminator,
|
|
IS_ANSI(*pTerminator) && IS_HTTP_PRINT(*pTerminator)
|
|
? *pTerminator
|
|
: '?'
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
Length = HttppPrintIpAddressW(&pParsedUrl->SockAddr, IpAddr);
|
|
|
|
if (Length != DIFF_USHORT(pChar - pHostname)
|
|
|| 0 != _wcsnicmp(pHostname, IpAddr, Length))
|
|
{
|
|
pParsedUrl->Normalized = FALSE;
|
|
}
|
|
|
|
goto port;
|
|
|
|
} // ':' handling
|
|
|
|
if (L'.' == *pChar)
|
|
{
|
|
ULONG LabelLength = DIFF(pChar - pLabel);
|
|
|
|
// There must be at least one char in the label
|
|
if (0 == LabelLength)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: empty label\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
// Label can't have more than 63 chars
|
|
if (LabelLength > pCfg->MaxLabelLength)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: overlong label, %lu\n",
|
|
LabelLength
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
// Reset for the next label
|
|
pLabel = pChar + WCSLEN_LIT(L".");
|
|
|
|
continue;
|
|
}
|
|
|
|
//
|
|
// All chars above 0xFF are considered valid
|
|
//
|
|
|
|
if (!IS_ANSI(*pChar) || !IS_URL_ILLEGAL_COMPUTERNAME(*pChar))
|
|
{
|
|
if (!IS_ANSI(*pChar) || !IS_HTTP_DIGIT(*pChar))
|
|
AlphaLabel = TRUE;
|
|
|
|
if (pChar > pLabel)
|
|
continue;
|
|
|
|
// The first char of a label cannot be a hyphen. (Underscore?)
|
|
if (L'-' == *pChar)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: '-' at beginning of label\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: "
|
|
"Invalid char in hostname, U+%04X '%c',"
|
|
" after %lu chars, '%.*s'\n",
|
|
*pChar,
|
|
IS_ANSI(*pChar) && IS_HTTP_PRINT(*pChar) ? *pChar : '?',
|
|
DIFF(pChar - pHostname),
|
|
DIFF(pChar - pHostname),
|
|
pHostname
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
|
|
} // hostname
|
|
|
|
|
|
//
|
|
// If we got here, we fell off the end of the buffer,
|
|
// without finding a ':' for the port
|
|
//
|
|
|
|
ASSERT(pChar == pEnd);
|
|
|
|
UlTraceError(PARSER, ("http!HttpParseUrl: No port\n"));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
|
|
|
|
port:
|
|
|
|
//
|
|
// Parse the port number
|
|
//
|
|
|
|
ASSERT(pHostname < pChar && pChar < pEnd);
|
|
ASSERT(L':' == *pChar);
|
|
|
|
pParsedUrl->HostnameLength = DIFF_USHORT(pChar - pHostname);
|
|
|
|
// First, check for overlong hostnames
|
|
if (pParsedUrl->HostnameLength > pCfg->MaxHostnameLength)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: overlong hostname, %hu\n",
|
|
pParsedUrl->HostnameLength
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
// Skip the ':' denoting a port number
|
|
pChar += WCSLEN_LIT(L":");
|
|
|
|
if (pChar == pEnd)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: No port after ':'\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
// Search for the '/' or second ':' that terminates the port number
|
|
|
|
pSlash = pChar;
|
|
pParsedUrl->pPort = (PWSTR) pSlash;
|
|
|
|
do
|
|
{
|
|
if (*pSlash < L'0' || *pSlash > L'9')
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: "
|
|
"Invalid digit in port, U+%04X, '%c'\n",
|
|
*pSlash,
|
|
IS_ANSI(*pSlash) && IS_HTTP_PRINT(*pSlash)
|
|
? *pSlash
|
|
: '?'
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
} while (++pSlash < pEnd && L'/' != *pSlash && L':' != *pSlash);
|
|
|
|
ASSERT(pSlash > pChar);
|
|
|
|
pParsedUrl->PortLength = DIFF_USHORT(pSlash - pChar);
|
|
|
|
if (pSlash == pEnd)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: No '/' (or second ':') after port\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
ASSERT(L'/' == *pSlash || L':' == *pSlash);
|
|
|
|
Status = HttpWideStringToUShort(
|
|
pChar,
|
|
pParsedUrl->PortLength,
|
|
FALSE, // no leading zeros permitted
|
|
10,
|
|
&pTerminator,
|
|
&pParsedUrl->PortNumber
|
|
);
|
|
|
|
if (!NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: "
|
|
"Invalid port number, %s\n",
|
|
HttpStatusToString(Status)
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
if (0 == pParsedUrl->PortNumber)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: Port must not be zero.\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
ASSERT(pTerminator == pSlash);
|
|
|
|
pChar = pSlash;
|
|
|
|
goto routing_IP; // so /W4 won't complain about an unreferenced label
|
|
|
|
|
|
routing_IP:
|
|
|
|
//
|
|
// Is this a Host+IP site; i.e., is there a Routing IP address
|
|
// after the port number?
|
|
//
|
|
|
|
if (L'/' == *pChar)
|
|
{
|
|
pParsedUrl->pRoutingIP = NULL;
|
|
pParsedUrl->RoutingIPLength = 0;
|
|
ASSERT(0 == pParsedUrl->RoutingAddr.sa_family);
|
|
|
|
//
|
|
// If the hostname is an IP literal, but there is no routing IP
|
|
// (i.e., http://IP:port/path), we must rewrite the URL as
|
|
// http://IP:port:IP/path; i.e., explicitly use the hostname IP
|
|
// as the routing IP.
|
|
//
|
|
|
|
if (ForceRoutingIP && 0 != pParsedUrl->SockAddr.sa_family)
|
|
{
|
|
ASSERT(TDI_ADDRESS_TYPE_IP == pParsedUrl->SockAddr.sa_family
|
|
|| TDI_ADDRESS_TYPE_IP6 == pParsedUrl->SockAddr.sa_family);
|
|
|
|
pParsedUrl->Normalized = FALSE;
|
|
}
|
|
|
|
goto parse_path;
|
|
}
|
|
|
|
ASSERT(L':' == *pChar);
|
|
|
|
if (HttpUrlSite_WeakWildcard == pParsedUrl->SiteType
|
|
|| HttpUrlSite_StrongWildcard == pParsedUrl->SiteType)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: "
|
|
"Can't have Routing IPs on Wildcard sites\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
pChar += WCSLEN_LIT(L":");
|
|
|
|
if (pChar == pEnd)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: No IP address after second ':'\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
pParsedUrl->pRoutingIP = (PWSTR) pChar;
|
|
|
|
ASSERT(HttpUrlSite_NamePlusIP != pParsedUrl->SiteType);
|
|
|
|
if (HttpUrlSite_Name == pParsedUrl->SiteType)
|
|
{
|
|
pParsedUrl->SiteType = HttpUrlSite_NamePlusIP;
|
|
}
|
|
|
|
//
|
|
// Is the Routing IP an IPv6 literal?
|
|
//
|
|
|
|
if (L'[' == *pChar)
|
|
{
|
|
if (TDI_ADDRESS_TYPE_IP == pParsedUrl->SockAddr.sa_family)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: "
|
|
"Can't have http://IPv4:port:[IPv6]\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
ASSERT(TDI_ADDRESS_TYPE_IP6 == pParsedUrl->SockAddr.sa_family
|
|
|| 0 == pParsedUrl->SockAddr.sa_family);
|
|
|
|
Status = HttppParseIPv6Address(
|
|
pChar,
|
|
DIFF(pEnd - pChar),
|
|
TRUE, // scope ID allowed
|
|
&pParsedUrl->RoutingAddr6,
|
|
&pSlash);
|
|
|
|
if (!NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: "
|
|
"Invalid Host+IPv6 address, %s\n",
|
|
HttpStatusToString(Status)
|
|
));
|
|
|
|
RETURN(Status);
|
|
}
|
|
|
|
ASSERT(TDI_ADDRESS_TYPE_IP6 == pParsedUrl->RoutingAddr.sa_family);
|
|
ASSERT(pSlash > pChar);
|
|
|
|
// There must be a slash
|
|
if (pSlash == pEnd || L'/' != *pSlash)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: '/' expected after Host+IPv6.\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
// CODEWORK: Should we care if RoutingAddr6 != SockAddr6?
|
|
|
|
pParsedUrl->RoutingIPLength = DIFF_USHORT(pSlash - pChar);
|
|
|
|
Length = HttppPrintIpAddressW(&pParsedUrl->RoutingAddr, IpAddr);
|
|
|
|
if (Length != pParsedUrl->RoutingIPLength
|
|
|| 0 != _wcsnicmp(pChar, IpAddr, Length))
|
|
{
|
|
pParsedUrl->Normalized = FALSE;
|
|
}
|
|
|
|
pChar = pSlash;
|
|
|
|
goto parse_path;
|
|
}
|
|
|
|
|
|
//
|
|
// No, then it must be an IPv4 literal
|
|
//
|
|
|
|
if (TDI_ADDRESS_TYPE_IP6 == pParsedUrl->SockAddr.sa_family)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: Can't have http://[IPv6]:port:IPv4\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
ASSERT(TDI_ADDRESS_TYPE_IP == pParsedUrl->SockAddr.sa_family
|
|
|| 0 == pParsedUrl->SockAddr.sa_family);
|
|
|
|
// Search for the terminating '/'
|
|
|
|
pSlash = pChar;
|
|
|
|
do
|
|
{
|
|
if ((L'0' <= *pSlash && *pSlash <= L'9') || L'.' == *pSlash)
|
|
continue;
|
|
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: "
|
|
"Invalid character in Host+IPv4, U+%04X, '%c'\n",
|
|
*pSlash,
|
|
IS_ANSI(*pSlash) && IS_HTTP_PRINT(*pSlash)
|
|
? *pSlash
|
|
: '?'
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
|
|
} while (++pSlash < pEnd && L'/' != *pSlash);
|
|
|
|
ASSERT(pSlash > pChar);
|
|
|
|
if (pSlash == pEnd)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: No '/' after Host+IPv4\n"
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
ASSERT(L'/' == *pSlash);
|
|
|
|
Status = RtlIpv4StringToAddressW(
|
|
pChar,
|
|
TRUE, // strict => 4 dotted decimal octets
|
|
&pTerminator,
|
|
&pParsedUrl->RoutingAddr4.sin_addr
|
|
);
|
|
|
|
if (!NT_SUCCESS(Status))
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: "
|
|
"Invalid Host+IPv4 address, %s\n",
|
|
HttpStatusToString(Status)
|
|
));
|
|
|
|
RETURN(Status);
|
|
}
|
|
|
|
if (pTerminator != pSlash)
|
|
{
|
|
ASSERT(pTerminator < pSlash);
|
|
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: "
|
|
"Invalid Host+IPv4 address after %lu chars, "
|
|
"U+%04X, '%c'\n",
|
|
DIFF(pTerminator - pChar),
|
|
*pTerminator,
|
|
IS_ANSI(*pTerminator) && IS_HTTP_PRINT(*pTerminator)
|
|
? *pTerminator
|
|
: '?'
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
// CODEWORK: Should we care if RoutingAddr4 != SockAddr4
|
|
|
|
pParsedUrl->RoutingIPLength = DIFF_USHORT(pSlash - pChar);
|
|
pParsedUrl->RoutingAddr4.sin_family = TDI_ADDRESS_TYPE_IP;
|
|
|
|
Length = HttppPrintIpAddressW(&pParsedUrl->RoutingAddr, IpAddr);
|
|
|
|
if (Length != pParsedUrl->RoutingIPLength
|
|
|| 0 != _wcsnicmp(pChar, IpAddr, Length))
|
|
{
|
|
pParsedUrl->Normalized = FALSE;
|
|
}
|
|
|
|
pChar = pSlash;
|
|
|
|
|
|
|
|
parse_path:
|
|
|
|
//
|
|
// Parse the abspath
|
|
//
|
|
|
|
ASSERT(pParsedUrl->pRoutingIP == NULL || pParsedUrl->RoutingIPLength > 0);
|
|
ASSERT(pHostname < pChar && pChar < pEnd);
|
|
ASSERT(L'/' == *pChar);
|
|
|
|
pParsedUrl->pAbsPath = (PWSTR) pChar;
|
|
pParsedUrl->AbsPathLength = DIFF_USHORT(pEnd - pChar);
|
|
|
|
if (pParsedUrl->AbsPathLength > pCfg->UrlMaxLength)
|
|
{
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: "
|
|
"AbsPath is too long: %lu\n",
|
|
pParsedUrl->AbsPathLength
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
UrlState = URL_STATE_START;
|
|
UrlToken = URL_TOKEN_OTHER;
|
|
Action = ACTION_NOTHING;
|
|
pSegment = pChar;
|
|
TestSegment = FALSE;
|
|
LastCharHack = FALSE;
|
|
MoreChars = TRUE;
|
|
PreviousChar = UNICODE_NULL;
|
|
UnicodeChar = *pChar;
|
|
SegmentCount = 0;
|
|
|
|
//
|
|
// Loop through all the characters in pAbsPath, plus one or two
|
|
// special ones at the end.
|
|
//
|
|
|
|
while (MoreChars)
|
|
{
|
|
switch (UnicodeChar)
|
|
{
|
|
case UNICODE_NULL:
|
|
UrlToken = URL_TOKEN_EOS;
|
|
TestSegment = TRUE;
|
|
break;
|
|
|
|
case DOT:
|
|
UrlToken = URL_TOKEN_DOT;
|
|
TestSegment = FALSE;
|
|
break;
|
|
|
|
case FORWARD_SLASH:
|
|
UrlToken = URL_TOKEN_SLASH;
|
|
TestSegment = TRUE;
|
|
break;
|
|
|
|
case PERCENT: // no hex escapes
|
|
case STAR: // no wildcards
|
|
case QUESTION_MARK: // no wildcards or querystrings
|
|
case BACK_SLASH: // no C string escapes
|
|
UlTraceError(PARSER,
|
|
("http!HttpParseUrl: invalid '%c' char in path\n",
|
|
(UCHAR) UnicodeChar
|
|
));
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
|
|
default:
|
|
UrlToken = URL_TOKEN_OTHER;
|
|
TestSegment = FALSE;
|
|
break;
|
|
}
|
|
|
|
UlTraceVerbose(PARSER,
|
|
("http!HttpParseUrl: "
|
|
"[%lu] U+%04lX '%c' %p: [%s][%s] -> %s, %s\n",
|
|
DIFF(pChar - pParsedUrl->pAbsPath),
|
|
UnicodeChar,
|
|
IS_ANSI(UnicodeChar) && IS_HTTP_PRINT(UnicodeChar)
|
|
? (UCHAR) UnicodeChar : '?',
|
|
pChar,
|
|
HttppUrlStateToString(UrlState),
|
|
HttppUrlTokenToString(UrlToken),
|
|
HttppUrlStateToString(
|
|
CanonStateFromStateAndToken[UrlState][UrlToken]),
|
|
TestSegment ? ", TestSegment" : ""
|
|
));
|
|
|
|
//
|
|
// Reject control characters
|
|
//
|
|
|
|
if (!LastCharHack
|
|
&& !pCfg->AllowRestrictedChars
|
|
&& IS_ANSI(UnicodeChar)
|
|
&& IS_URL_INVALID(UnicodeChar))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttpParseUrl: "
|
|
"Invalid character, U+%04lX, in path.\n",
|
|
UnicodeChar
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
//
|
|
// Check that (high-surrogate, low-surrogate) come in pairs
|
|
//
|
|
|
|
if (HIGH_SURROGATE_START <= PreviousChar
|
|
&& PreviousChar <= HIGH_SURROGATE_END)
|
|
{
|
|
if (UnicodeChar < LOW_SURROGATE_START
|
|
|| UnicodeChar > LOW_SURROGATE_END)
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttpParseUrl: "
|
|
"Illegal surrogate pair, U+%04lX, U+%04lX.\n",
|
|
PreviousChar, UnicodeChar
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
}
|
|
else if (LOW_SURROGATE_START <= UnicodeChar
|
|
&& UnicodeChar <= LOW_SURROGATE_END)
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttpParseUrl: "
|
|
"Non-high surrogate, U+%04lX, "
|
|
"before low surrogate, U+%04lX.\n",
|
|
PreviousChar, UnicodeChar
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
if (URL_STATE_ERROR == CanonStateFromStateAndToken[UrlState][UrlToken])
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttpParseUrl: "
|
|
"Error state from %s,%s in path, after U+%04lX.\n",
|
|
HttppUrlStateToString(UrlState),
|
|
HttppUrlTokenToString(UrlToken),
|
|
UnicodeChar
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
UrlState = CanonStateFromStateAndToken[UrlState][UrlToken];
|
|
|
|
//
|
|
// Check segment limits
|
|
//
|
|
|
|
if (TestSegment)
|
|
{
|
|
ULONG SegmentLength = DIFF(pChar - pSegment);
|
|
|
|
// The CanonStateFromStateAndToken checks should prevent
|
|
// empty segments, among other things
|
|
ASSERT(SegmentLength > 0 || pChar == pSegment);
|
|
|
|
// Reject if segment too long
|
|
if (SegmentLength > pCfg->UrlSegmentMaxLength + WCSLEN_LIT(L"/"))
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttpParseUrl(): "
|
|
"Segment too long: %lu\n",
|
|
SegmentLength
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
|
|
pSegment = pChar;
|
|
|
|
// Reject if too many path segments
|
|
if (++SegmentCount > pCfg->UrlSegmentMaxCount)
|
|
{
|
|
UlTraceError(PARSER, (
|
|
"http!HttpParseUrl(): "
|
|
"Too many segments: %lu\n",
|
|
SegmentCount
|
|
));
|
|
|
|
RETURN(STATUS_INVALID_PARAMETER);
|
|
}
|
|
}
|
|
|
|
//
|
|
// Are there any more path characters?
|
|
//
|
|
|
|
PreviousChar = UnicodeChar;
|
|
|
|
if (++pChar < pEnd)
|
|
{
|
|
UnicodeChar = *pChar;
|
|
}
|
|
else if (!LastCharHack)
|
|
{
|
|
// Want to make sure that the last segment is tested.
|
|
// If there's no trailing slash, we'll enter here twice;
|
|
// otherwise once
|
|
|
|
if (TrailingSlashReqd && FORWARD_SLASH != PreviousChar)
|
|
{
|
|
// First, fake a trailing slash, if needed
|
|
UnicodeChar = FORWARD_SLASH;
|
|
}
|
|
else
|
|
{
|
|
// Second, always finish up with UNICODE_NULL
|
|
UnicodeChar = UNICODE_NULL;
|
|
LastCharHack = TRUE;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Terminate the loop
|
|
MoreChars = FALSE;
|
|
}
|
|
|
|
} // while (MoreChars)
|
|
|
|
RETURN(STATUS_SUCCESS);
|
|
|
|
} // HttpParseUrl
|
|
|
|
|
|
|
|
/***************************************************************************++
|
|
|
|
Routine Description:
|
|
Some URLs parsed by HttpParseUrl() will not be considered normalized
|
|
if they have IP literals, Routing IPs, or no trailing slash.
|
|
This routine will build a fully normalized URL and (possibly) free the
|
|
old one
|
|
|
|
Arguments:
|
|
pParsedUrl - On entry, points to a URL parsed by HttpParseUrl();
|
|
On successful exit, points to a normalized URL.
|
|
pCfg - configuration parameters
|
|
ForceCopy - if TRUE, will always make a new, normalized URL
|
|
FreeOriginalUrl - if FALSE, will never free the original URL.
|
|
The caller must manage the memory.
|
|
ForceRoutingIP - if TRUE and the hostname is an IPv4 or IPv6 literal,
|
|
the URL will be rewritten in the form
|
|
http://IP:port:IP/path
|
|
PoolType - PagedPool or NonPagedPool
|
|
PoolTag - Tag used to allocate pUrl
|
|
|
|
Return Value:
|
|
|
|
NTSTATUS - STATUS_SUCCESS or STATUS_NO_MEMORY
|
|
|
|
--***************************************************************************/
|
|
NTSTATUS
|
|
HttpNormalizeParsedUrl(
|
|
IN OUT PHTTP_PARSED_URL pParsedUrl,
|
|
IN PURL_C14N_CONFIG pCfg,
|
|
IN BOOLEAN ForceCopy,
|
|
IN BOOLEAN FreeOriginalUrl,
|
|
IN BOOLEAN ForceRoutingIP,
|
|
IN POOL_TYPE PoolType,
|
|
IN ULONG PoolTag
|
|
)
|
|
{
|
|
HTTP_PARSED_URL ParsedUrl = *pParsedUrl;
|
|
NTSTATUS Status = STATUS_SUCCESS;
|
|
|
|
ASSERT(HTTP_PARSED_URL_SIGNATURE == ParsedUrl.Signature);
|
|
|
|
if (ParsedUrl.Normalized && !ForceCopy)
|
|
{
|
|
// nothing to do
|
|
}
|
|
else
|
|
{
|
|
PWSTR pResult;
|
|
WCHAR HostAddrString[MAX_IP_ADDR_PLUS_BRACKETS_STRING_LEN];
|
|
WCHAR RoutingAddrString[MAX_IP_ADDR_PLUS_BRACKETS_STRING_LEN];
|
|
ULONG SchemeLength;
|
|
ULONG HostAddrLength;
|
|
ULONG HostnameLength;
|
|
ULONG RoutingAddrLength;
|
|
ULONG AbsPathLength;
|
|
ULONG Length;
|
|
ULONG TrailingSlashLength;
|
|
PCWSTR pUrl;
|
|
|
|
pUrl = ParsedUrl.pFullUrl;
|
|
|
|
SchemeLength = DIFF(ParsedUrl.pHostname - ParsedUrl.pFullUrl);
|
|
|
|
// Calculate HostAddrLength and HostnameLength (mutually exclusive)
|
|
if (0 != ParsedUrl.SockAddr.sa_family)
|
|
{
|
|
HostAddrLength = HttppPrintIpAddressW(
|
|
&ParsedUrl.SockAddr,
|
|
HostAddrString
|
|
);
|
|
HostnameLength = 0;
|
|
}
|
|
else
|
|
{
|
|
HostAddrLength = 0;
|
|
HostAddrString[0] = UNICODE_NULL;
|
|
HostnameLength = ParsedUrl.HostnameLength;
|
|
}
|
|
|
|
// Calculate RoutingAddrLength
|
|
if (0 != ParsedUrl.RoutingAddr.sa_family)
|
|
{
|
|
RoutingAddrLength = WCSLEN_LIT(L":")
|
|
+ HttppPrintIpAddressW(
|
|
&ParsedUrl.RoutingAddr,
|
|
RoutingAddrString
|
|
);
|
|
}
|
|
else if (ForceRoutingIP && 0 != ParsedUrl.SockAddr.sa_family)
|
|
{
|
|
// We must rewrite http://IP:port/path as http://IP:port:IP/path
|
|
RoutingAddrLength = WCSLEN_LIT(L":") + HostAddrLength;
|
|
wcscpy(RoutingAddrString, HostAddrString);
|
|
}
|
|
else
|
|
{
|
|
RoutingAddrLength = 0;
|
|
RoutingAddrString[0] = UNICODE_NULL;
|
|
}
|
|
|
|
AbsPathLength = ParsedUrl.AbsPathLength;
|
|
|
|
ASSERT(AbsPathLength > 0);
|
|
|
|
if (ParsedUrl.TrailingSlashReqd
|
|
&& FORWARD_SLASH != ParsedUrl.pAbsPath[AbsPathLength-1])
|
|
{
|
|
TrailingSlashLength = WCSLEN_LIT(L"/");
|
|
}
|
|
else
|
|
{
|
|
TrailingSlashLength = 0;
|
|
}
|
|
|
|
Length = SchemeLength
|
|
+ HostAddrLength
|
|
+ HostnameLength
|
|
+ WCSLEN_LIT(L":") + ParsedUrl.PortLength
|
|
+ RoutingAddrLength
|
|
+ AbsPathLength
|
|
+ TrailingSlashLength;
|
|
|
|
pResult = (PWSTR) HTTPP_ALLOC(
|
|
PoolType,
|
|
(Length + 1) * sizeof(WCHAR),
|
|
PoolTag
|
|
);
|
|
|
|
|
|
if (NULL == pResult)
|
|
{
|
|
Status = STATUS_NO_MEMORY;
|
|
// Do not destroy the old URL. Let caller handle it.
|
|
}
|
|
else
|
|
{
|
|
PWSTR pDest = pResult;
|
|
|
|
#define WCSNCPY(pSrc, Length) \
|
|
RtlCopyMemory(pDest, (pSrc), (Length) * sizeof(WCHAR)); \
|
|
pDest += (Length)
|
|
|
|
#define WCSNCPY2(pField, Length) \
|
|
WCSNCPY(ParsedUrl.pField, Length)
|
|
|
|
#define WCSNCPY_LIT(Lit) \
|
|
WCSNCPY(Lit, WCSLEN_LIT(Lit))
|
|
|
|
WCSNCPY2(pFullUrl, SchemeLength);
|
|
|
|
if (0 != HostnameLength)
|
|
{
|
|
ASSERT(0 == HostAddrLength);
|
|
WCSNCPY2(pHostname, HostnameLength);
|
|
}
|
|
else
|
|
{
|
|
ASSERT(0 != HostAddrLength);
|
|
WCSNCPY(HostAddrString, HostAddrLength);
|
|
}
|
|
|
|
WCSNCPY_LIT(L":");
|
|
WCSNCPY2(pPort, ParsedUrl.PortLength);
|
|
|
|
if (RoutingAddrLength > 0)
|
|
{
|
|
WCSNCPY_LIT(L":");
|
|
WCSNCPY(
|
|
RoutingAddrString,
|
|
RoutingAddrLength - WCSLEN_LIT(L":")
|
|
);
|
|
}
|
|
|
|
WCSNCPY2(pAbsPath, AbsPathLength);
|
|
|
|
if (TrailingSlashLength > 0)
|
|
{
|
|
WCSNCPY_LIT(L"/");
|
|
}
|
|
|
|
ASSERT(DIFF(pDest - pResult) == Length);
|
|
|
|
*pDest = UNICODE_NULL;
|
|
|
|
Status = HttpParseUrl(
|
|
pCfg,
|
|
pResult,
|
|
Length,
|
|
ParsedUrl.TrailingSlashReqd,
|
|
ForceRoutingIP,
|
|
&ParsedUrl
|
|
);
|
|
|
|
ASSERT(STATUS_SUCCESS == Status);
|
|
ASSERT(ParsedUrl.Normalized);
|
|
|
|
if (FreeOriginalUrl)
|
|
HTTPP_FREE((PVOID) pUrl, PoolTag);
|
|
|
|
// Write the updated local copy back to the caller's HTTP_PARSED_URL
|
|
*pParsedUrl = ParsedUrl;
|
|
}
|
|
}
|
|
|
|
return Status;
|
|
|
|
} // HttpNormalizeParsedUrl
|