windows-server-2003/net/http/common/c14n.c

/*++

Copyright (c) 1998-2002 Microsoft Corporation

Module Name:

    C14n.c

Abstract:

    URL canonicalization (c14n) routines

Author:

    George V. Reilly (GeorgeRe)     22-Mar-2002

Revision History:

--*/

#include <precomp.h>

#include "c14np.h"

#if defined(ALLOC_PRAGMA) && defined(KERNEL_PRIV)

#pragma alloc_text( PAGE, HttpInitializeDefaultUrlC14nConfig)
#pragma alloc_text( PAGE, HttpInitializeDefaultUrlC14nConfigEncoding)
#pragma alloc_text( PAGE, HttpUnescapePercentHexEncoding)
#pragma alloc_text( PAGE, HttppPopCharHostNameUtf8)
#pragma alloc_text( PAGE, HttppPopCharHostNameDbcs)
#pragma alloc_text( PAGE, HttppPopCharHostNameAnsi)
#pragma alloc_text( PAGE, HttpCopyHost)
#pragma alloc_text( PAGE, HttppCopyHostByType)
#pragma alloc_text( PAGE, HttpValidateHostname)
#pragma alloc_text( PAGE, HttppPopCharAbsPathUtf8)
#pragma alloc_text( PAGE, HttppPopCharAbsPathDbcs)
#pragma alloc_text( PAGE, HttppPopCharAbsPathAnsi)
#pragma alloc_text( PAGE, HttppPopCharQueryString)
#pragma alloc_text( PAGE, HttppCopyUrlByType)
#pragma alloc_text( PAGE, HttpCopyUrl)
#pragma alloc_text( PAGE, HttpCleanAndCopyUrl)
#pragma alloc_text( PAGE, HttppCleanAndCopyUrlByType)
#pragma alloc_text( PAGE, HttpFindUrlToken)
#pragma alloc_text( PAGE, HttppParseIPv6Address)
#pragma alloc_text( PAGE, HttppPrintIpAddressW)
#pragma alloc_text( PAGE, HttpParseUrl)
#pragma alloc_text( PAGE, HttpNormalizeParsedUrl)


#endif // ALLOC_PRAGMA && KERNEL_PRIV

#if 0   // Non-Pageable Functions
NOT PAGEABLE -- 
#endif // Non-Pageable Functions


VOID
HttpInitializeDefaultUrlC14nConfig(
    PURL_C14N_CONFIG pCfg
    )
{
    PAGED_CODE();

    pCfg->HostnameDecodeOrder   = UrlDecode_Utf8_Else_Dbcs_Else_Ansi;
    pCfg->AbsPathDecodeOrder    = UrlDecode_Utf8;
    pCfg->EnableNonUtf8         = FALSE;
    pCfg->FavorUtf8             = FALSE;
    pCfg->EnableDbcs            = FALSE;
    pCfg->PercentUAllowed       = DEFAULT_C14N_PERCENT_U_ALLOWED;
    pCfg->AllowRestrictedChars  = DEFAULT_C14N_ALLOW_RESTRICTED_CHARS;
    pCfg->CodePage              = 0;
    pCfg->UrlMaxLength          = DEFAULT_C14N_URL_MAX_LENGTH;
    pCfg->UrlSegmentMaxLength   = DEFAULT_C14N_URL_SEGMENT_MAX_LENGTH;
    pCfg->UrlSegmentMaxCount    = DEFAULT_C14N_URL_SEGMENT_MAX_COUNT;
    pCfg->MaxLabelLength        = DEFAULT_C14N_MAX_LABEL_LENGTH;
    pCfg->MaxHostnameLength     = DEFAULT_C14N_MAX_HOSTNAME_LENGTH;

} // HttpInitializeDefaultUrlC14nConfig


VOID
HttpInitializeDefaultUrlC14nConfigEncoding(
    PURL_C14N_CONFIG    pCfg,
    BOOLEAN             EnableNonUtf8,
    BOOLEAN             FavorUtf8,
    BOOLEAN             EnableDbcs
    )
{
    PAGED_CODE();

    HttpInitializeDefaultUrlC14nConfig(pCfg);

    pCfg->EnableNonUtf8     = EnableNonUtf8;
    pCfg->FavorUtf8         = FavorUtf8;
    pCfg->EnableDbcs        = EnableDbcs;

    if (EnableNonUtf8)
    {
        if (FavorUtf8)
        {
            pCfg->AbsPathDecodeOrder = (EnableDbcs
                                            ? UrlDecode_Utf8_Else_Dbcs
                                            : UrlDecode_Utf8_Else_Ansi);
        }
        else
        {
            pCfg->AbsPathDecodeOrder = (EnableDbcs
                                            ? UrlDecode_Dbcs_Else_Utf8
                                            : UrlDecode_Ansi_Else_Utf8);
        }
    }
    else
    {
        pCfg->AbsPathDecodeOrder = UrlDecode_Utf8;
    }

} // HttpInitializeDefaultUrlC14nConfigEncoding


/***************************************************************************++

Routine Description:

    Convert '%NN' or '%uNNNN' to a ULONG.

Arguments:

    pSourceChar     - Input buffer
    SourceLength    - Length of pSourceChar, in bytes
    PercentUAllowed - Accept '%uNNNN' notation?
    pOutChar        - decoded character
    pBytesToSkip    - number of bytes consumed from pSourceChar;
                      will be 3 for %NN and 6 for %uNNNN.

Return Value:

    STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD

--***************************************************************************/

NTSTATUS
HttpUnescapePercentHexEncoding(
    IN  PCUCHAR pSourceChar,
    IN  ULONG   SourceLength,
    IN  BOOLEAN PercentUAllowed,
    OUT PULONG  pOutChar,
    OUT PULONG  pBytesToSkip
    )
{
    ULONG   Result, i, NumDigits;
    PCUCHAR pHexDigits;

    PAGED_CODE();

    if (SourceLength < STRLEN_LIT("%NN"))
    {
        UlTraceError(PARSER, (
                    "http!HttpUnescapePercentHexEncoding(%p): "
                    "Length too short, %lu.\n",
                    pSourceChar, SourceLength
                    ));

        RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
    }
    else if (pSourceChar[0] != PERCENT)
    {
        UlTraceError(PARSER, (
                    "http!HttpUnescapePercentHexEncoding(%p): "
                    "Starts with 0x%02lX, not '%%'.\n",
                    pSourceChar, (ULONG) pSourceChar[0]
                    ));

        RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
    }

    if (pSourceChar[1] != 'u'  &&  pSourceChar[1] != 'U')
    {
        // RFC 2396 says that an "escaped octet is encoded as a character
        // triplet, consisting of the percent character '%' followed by
        // the two hexadecimal digits representing the octet code."

        pHexDigits    = pSourceChar + STRLEN_LIT("%");
        NumDigits     = 2;
        *pBytesToSkip = STRLEN_LIT("%NN");
    }
    else
    {
        // This is the %uNNNN notation generated by JavaScript's escape() fn

        if (! PercentUAllowed)
        {
            UlTraceError(PARSER, (
                        "http!HttpUnescapePercentHexEncoding(%p): "
                        "%%uNNNN forbidden.\n",
                        pSourceChar, SourceLength
                        ));

            RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
        }
        else if (SourceLength < STRLEN_LIT("%uNNNN"))
        {
            UlTraceError(PARSER, (
                        "http!HttpUnescapePercentHexEncoding(%p): "
                        "Length %lu too short for %%uNNNN.\n",
                        pSourceChar, SourceLength
                        ));

            RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
        }

        pHexDigits    = pSourceChar + STRLEN_LIT("%u");
        NumDigits     = 4;
        *pBytesToSkip = STRLEN_LIT("%uNNNN");
    }

    ASSERT(*pBytesToSkip <= SourceLength);

    Result = 0;

    for (i = 0;  i < NumDigits;  ++i)
    {
        ULONG Char = pHexDigits[i];
        ULONG Digit;

        //
        // HexToChar() inlined. Note: in ASCII, '0' < 'A' < 'a' and there are
        // no gaps in ranges '0'..'9', 'A'..'F', and 'a'..'f' (unlike EBCDIC,
        // which has gaps between 'I'/'J', 'R'/'S', 'i'/'j', and 'r'/'s').
        //

        C_ASSERT('0' < 'A'  &&  'A' < 'a');
        C_ASSERT('9' - '0'  == 10 - 1);
        C_ASSERT('F' - 'A'  ==  6 - 1);
        C_ASSERT('f' - 'a'  ==  6 - 1);

        if (! IS_HTTP_HEX(Char))
        {
            UlTraceError(PARSER, (
                        "http!HttpUnescapePercentHexEncoding(%p): "
                        "Invalid hex character[%lu], 0x%02lX.\n",
                        pSourceChar, i, Char
                        ));

            RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
        }
        else if ('a' <= Char)
        {
            ASSERT('a' <= Char  &&  Char <= 'f');
            Digit = Char - 'a' + 0xA;
        }
        else if ('A' <= Char)
        {
            ASSERT('A' <= Char  &&  Char <= 'F');
            Digit = Char - 'A' + 0xA;
        }
        else
        {
            ASSERT('0' <= Char  &&  Char <= '9');
            Digit = Char - '0';
        }

        ASSERT(Digit < 0x10);

        Result = (Result << 4)  |  Digit;
    }

    *pOutChar = Result;

    return STATUS_SUCCESS;

} // HttpUnescapePercentHexEncoding


/***************************************************************************++

Routine Description:

    Consume 1-4 bytes from pSourceChar, treating it as raw UTF-8.
    This routine is only suitable for the hostname part of an HTTP URL,

Arguments:

    pSourceChar     - Input buffer
    SourceLength    - Length of pSourceChar, in bytes
    pUnicodeChar    - decoded character
    pBytesToSkip    - number of characters consumed from pSourceChar

Return Value:

    STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD

--***************************************************************************/

NTSTATUS
HttppPopCharHostNameUtf8(
    IN  PCUCHAR pSourceChar,
    IN  ULONG   SourceLength,
    OUT PULONG  pUnicodeChar,
    OUT PULONG  pBytesToSkip
    )
{
    NTSTATUS Status;

    PAGED_CODE();

    ASSERT(SourceLength > 0);

    Status = HttpUtf8RawBytesToUnicode(
                    pSourceChar,
                    SourceLength,
                    pUnicodeChar,
                    pBytesToSkip
                    );

    return Status;

} // HttppPopCharHostNameUtf8


/***************************************************************************++

Routine Description:

    Consume 1-2 bytes from pSourceChar and converts it from raw DBCS to Unicode.
    This routine is only suitable for the hostname part of an HTTP URL.

Arguments:

    pSourceChar     - Input buffer
    SourceLength    - Length of pSourceChar, in bytes
    pUnicodeChar    - decoded character
    pBytesToSkip    - number of characters consumed from pSourceChar

Return Value:

    STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD

--***************************************************************************/

NTSTATUS
HttppPopCharHostNameDbcs(
    IN  PCUCHAR pSourceChar,
    IN  ULONG   SourceLength,
    OUT PULONG  pUnicodeChar,
    OUT PULONG  pBytesToSkip
    )
{
    NTSTATUS Status;
    ULONG    AnsiCharSize;
    WCHAR    WideChar;

    PAGED_CODE();

    ASSERT(SourceLength > 0);

    if (! IS_DBCS_LEAD_BYTE(pSourceChar[0]))
    {
        AnsiCharSize = 1;
    }
    else
    {
        if (SourceLength < 2)
        {
            UlTraceError(PARSER, (
                        "http!HttppPopCharHostNameDbcs(%p): "
                        "ERROR: DBCS lead byte, 0x%02lX, at end of string\n",
                        pSourceChar, *pSourceChar
                        ));

            RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
        }

        AnsiCharSize = 2;
    }

    Status = RtlMultiByteToUnicodeN(
                    &WideChar,
                    sizeof(WCHAR),
                    NULL,
                    (PCSTR) pSourceChar,
                    AnsiCharSize
                    );

    if (!NT_SUCCESS(Status))
    {
        UlTraceError(PARSER, (
                    "http!HttppPopCharHostNameDbcs(%p): "
                    "MultiByteToUnicode(%lu) failed, %s.\n",
                    pSourceChar, AnsiCharSize, HttpStatusToString(Status)
                    ));

        return Status;
    }

    *pUnicodeChar = WideChar;
    *pBytesToSkip = AnsiCharSize;

    return STATUS_SUCCESS;

} // HttppPopCharHostNameDbcs


/***************************************************************************++

Routine Description:

    Consume 1 bytes from pSourceChar and converts it from raw ANSI to Unicode.
    This routine is only suitable for the hostname part of an HTTP URL.

Arguments:

    pSourceChar     - Input buffer
    SourceLength    - Length of pSourceChar, in bytes
    pUnicodeChar    - decoded character
    pBytesToSkip    - number of characters consumed from pSourceChar

Return Value:

    STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD

--***************************************************************************/

NTSTATUS
HttppPopCharHostNameAnsi(
    IN  PCUCHAR pSourceChar,
    IN  ULONG   SourceLength,
    OUT PULONG  pUnicodeChar,
    OUT PULONG  pBytesToSkip
    )
{
    NTSTATUS Status;

#if !DBG
    UNREFERENCED_PARAMETER(SourceLength);
#endif // !DBG

    PAGED_CODE();

    ASSERT(SourceLength > 0);

    *pUnicodeChar = AnsiToUnicodeMap[pSourceChar[0]];
    *pBytesToSkip = 1;

    Status = (0 != *pUnicodeChar)
                ? STATUS_SUCCESS
                : STATUS_OBJECT_PATH_SYNTAX_BAD;

    if (!NT_SUCCESS(Status))
    {
        UlTraceError(PARSER, (
                    "http!HttppPopCharHostNameAnsi(%p): "
                    "No mapping for %lu.\n",
                    pSourceChar, *pSourceChar
                    ));
    }

    return Status;

} // HttppPopCharHostNameAnsi


/***************************************************************************++

Routine Description:

    Common tail function called at the end of the HttppPopCharAbsPath*()
    functions, to minimize code replication.

Arguments:

    pSourceChar     - Input buffer
    SourceLength    - Length of pSourceChar, in bytes
    UnicodeChar     - decoded character
    BytesToSkip     - number of characters consumed from pSourceChar
    pUnicodeChar    - where to put UnicodeChar result
    pBytesToSkip    - where to put BytesToSkip result

Return Value:

    STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD

--***************************************************************************/

__inline
NTSTATUS
HttppPopCharAbsPathCommonTail(
    IN  PCUCHAR pSourceChar,
    IN  ULONG   SourceLength,
    IN  ULONG   UnicodeChar,
    IN  ULONG   BytesToSkip,
    IN  BOOLEAN AllowRestrictedChars,
    OUT PULONG  pUnicodeChar,
    OUT PULONG  pBytesToSkip
    )
{
#if !DBG
    UNREFERENCED_PARAMETER(pSourceChar);
    UNREFERENCED_PARAMETER(SourceLength);
#endif // !DBG

    //
    // Special handling for characters in the 8-bit range.
    // May want to look at BytesToSkip to distinguish between
    // raw and hex-escaped/UTF-8-encoded data.
    //
    // In particular, should we allow %2F or %u002F as alternate
    // represenations of '/' in a URL? Why would anyone have a legitimate
    // need to escape a slash character?
    //

    if (UnicodeChar < 0x100)
    {
        // Transform backslashes to forward slashes

        if (BACK_SLASH == UnicodeChar)
        {
            UnicodeChar = FORWARD_SLASH;
        }
        else if (!AllowRestrictedChars  &&  IS_URL_INVALID(UnicodeChar))
        {
            UlTraceError(PARSER, (
                        "http!HttppPopCharAbsPathCommonTail(%p): "
                        "Invalid character, U+%04X.\n",
                        pSourceChar, UnicodeChar
                        ));

            RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
        }

        // CODEWORK: should we allow hex-escaped "restricted" or "unwise"
        // characters at all?
    }

    ASSERT(BytesToSkip <= SourceLength);

    *pBytesToSkip = BytesToSkip;
    *pUnicodeChar = UnicodeChar;

    return STATUS_SUCCESS;

} // HttppPopCharAbsPathCommonTail


/***************************************************************************++

Routine Description:

    Consume 1-12 bytes from pSourceChar. Handle hex-escaped UTF-8 encoding.
    This routine is only suitable for the /abspath part of an HTTP URL.

Arguments:

    pSourceChar     - Input buffer
    SourceLength    - Length of pSourceChar, in bytes
    pUnicodeChar    - decoded character
    pBytesToSkip    - number of characters consumed from pSourceChar

Return Value:

    STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD

--***************************************************************************/

NTSTATUS
HttppPopCharAbsPathUtf8(
    IN  PCUCHAR pSourceChar,
    IN  ULONG   SourceLength,
    IN  BOOLEAN PercentUAllowed,
    IN  BOOLEAN AllowRestrictedChars,
    OUT PULONG  pUnicodeChar,
    OUT PULONG  pBytesToSkip
    )
{
    NTSTATUS Status;
    ULONG    UnicodeChar;
    ULONG    BytesToSkip;
    ULONG    Temp;
    ULONG    OctetCount;
    UCHAR    Octets[4];
    UCHAR    LeadByte;

    //
    // Sanity check.
    //

    PAGED_CODE();

    ASSERT(SourceLength > 0);

    //
    // validate it as a valid URL character
    //

    if (! IS_URL_TOKEN(pSourceChar[0]))
    {
        UlTraceError(PARSER, (
            "http!HttppPopCharAbsPathUtf8(%p): "
            "first char, 0x%02lX, isn't URL token\n",
            pSourceChar, (ULONG) pSourceChar[0]
            ));

        RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
    }

    //
    // need to unescape hex encoding, '%NN' or '%uNNNN'?
    //

    if (PERCENT != pSourceChar[0])
    {
        UnicodeChar = pSourceChar[0];
        BytesToSkip = 1;

        //
        // All octets with bit7 set MUST be hex-escaped.
        // Do NOT accept literals with hi-bit set.
        //

        if (UnicodeChar > ASCII_MAX)
        {
            UlTraceError(PARSER, (
                        "http!HttppPopCharAbsPathUtf8(%p): "
                        "Invalid hi-bit literal, 0x%02lX.\n",
                        pSourceChar, UnicodeChar
                        ));

            RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
        }

        Status = STATUS_SUCCESS;
        goto unslash;
    }

    Status = HttpUnescapePercentHexEncoding(
                    pSourceChar,
                    SourceLength,
                    PercentUAllowed,
                    &UnicodeChar,
                    &BytesToSkip
                    );

    if (! NT_SUCCESS(Status))
    {
        UlTraceError(PARSER, (
            "http!HttppPopCharAbsPathUtf8(%p): "
            "Invalid hex encoding.\n",
            pSourceChar
            ));

        return Status;
    }

    //
    // If we consumed '%uNNNN', don't attempt any UTF-8 decoding
    //

    if (STRLEN_LIT("%uNNNN") == BytesToSkip)
        goto unslash;

    ASSERT(STRLEN_LIT("%NN") == BytesToSkip);
    ASSERT(UnicodeChar <= 0xFF);

    Octets[0] = LeadByte = (UCHAR) UnicodeChar;

    OctetCount = UTF8_OCTET_COUNT(LeadByte);

    if (0 == OctetCount)
    {
        UlTraceError(PARSER, (
                    "http!HttppPopCharAbsPathUtf8(%p): "
                    "Invalid lead byte, 0x%02lX.\n",
                    pSourceChar, UnicodeChar
                    ));

        RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
    }

    ASSERT(OctetCount <= sizeof(Octets) / sizeof(Octets[0]));

    BytesToSkip = OctetCount * STRLEN_LIT("%NN");

    if (BytesToSkip > SourceLength)
    {
        UlTraceError(PARSER, (
                    "http!HttppPopCharAbsPathUtf8(%p): "
                    "%lu octets is not enough for %lu-byte UTF-8 encoding.\n",
                    pSourceChar, OctetCount, SourceLength
                    ));

        RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
    }

    if (OctetCount == 1)
    {
#if DBG
        // Singleton: no trail bytes

        Status = HttpUtf8RawBytesToUnicode(
                        Octets,
                        OctetCount,
                        &UnicodeChar,
                        &Temp
                        );

        ASSERT(STATUS_SUCCESS == Status);
        ASSERT(UnicodeChar == LeadByte);
        ASSERT(1 == Temp);
#endif // DBG
    }
    else
    {
        ULONG i;

        //
        // Decode the hex-escaped trail bytes
        //

        for (i = 1;  i < OctetCount;  ++i)
        {
            ULONG TrailChar;
            UCHAR TrailByte;

            Status = HttpUnescapePercentHexEncoding(
                            pSourceChar  +  i * STRLEN_LIT("%NN"),
                            STRLEN_LIT("%NN"),
                            FALSE,  // do not allow %uNNNN for trail bytes
                            &TrailChar,
                            &Temp
                            );

            if (! NT_SUCCESS(Status))
            {
                UlTraceError(PARSER, (
                            "http!HttppPopCharAbsPathUtf8(%p): "
                            "Invalid hex-encoded trail byte[%lu].\n",
                            pSourceChar, i
                            ));

                return Status;
            }

            ASSERT(STRLEN_LIT("%NN") == Temp);
            ASSERT(TrailChar <= 0xFF);

            Octets[i] = TrailByte = (UCHAR) TrailChar;

            if (! IS_UTF8_TRAILBYTE(TrailByte))
            {
                UlTraceError(PARSER, (
                            "http!HttppPopCharAbsPathUtf8(%p): "
                            "Invalid trail byte[%lu], 0x%02lX.\n",
                            pSourceChar, i, TrailChar
                            ));

                RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
            }
        }

        //
        // Decode the raw UTF-8 bytes
        //

        Status = HttpUtf8RawBytesToUnicode(
                        Octets,
                        OctetCount,
                        &UnicodeChar,
                        &Temp
                        );

        if (! NT_SUCCESS(Status))
        {
            UlTraceError(PARSER, (
                        "http!HttppPopCharAbsPathUtf8(%p): "
                        "Invalid UTF-8 sequence.\n",
                        pSourceChar
                        ));

            RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
        }
    }

unslash:

    ASSERT(NT_SUCCESS(Status));

    return HttppPopCharAbsPathCommonTail(
                pSourceChar,
                SourceLength,
                UnicodeChar,
                BytesToSkip,
                AllowRestrictedChars,
                pUnicodeChar,
                pBytesToSkip
                );

} // HttppPopCharAbsPathUtf8


/***************************************************************************++

Routine Description:

    Consume 1-6 bytes from pSourceChar. Handle hex-escaped DBCS encoding.
    This routine is only suitable for the /abspath part of an HTTP URL.

Arguments:

    pSourceChar     - Input buffer
    SourceLength    - Length of pSourceChar, in bytes
    pUnicodeChar    - decoded character
    pBytesToSkip    - number of characters consumed from pSourceChar

Return Value:

    STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD

--***************************************************************************/

NTSTATUS
HttppPopCharAbsPathDbcs(
    IN  PCUCHAR pSourceChar,
    IN  ULONG   SourceLength,
    IN  BOOLEAN PercentUAllowed,
    IN  BOOLEAN AllowRestrictedChars,
    OUT PULONG  pUnicodeChar,
    OUT PULONG  pBytesToSkip
    )
{
    NTSTATUS Status;
    ULONG    UnicodeChar;
    WCHAR    WideChar;
    ULONG    BytesToSkip;
    UCHAR    AnsiChar[2];
    ULONG    AnsiCharSize;
    UCHAR    LeadByte;
    UCHAR    SecondByte = 0;

    //
    // Sanity check.
    //

    PAGED_CODE();

    ASSERT(SourceLength > 0);

    if (! IS_URL_TOKEN(pSourceChar[0]))
    {
        UlTraceError(PARSER, (
                    "http!HttppPopCharAbsPathDbcs(%p): "
                    "first char, 0x%02lX, isn't URL token\n",
                    pSourceChar, (ULONG) pSourceChar[0]
                    ));

        RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
    }

    if (PERCENT != pSourceChar[0])
    {
        // Note: unlike UTF-8, we allow literal bytes whose top bit is set

        UnicodeChar = pSourceChar[0];
        BytesToSkip = 1;
    }
    else
    {
        // need to unescape hex encoding, '%NN' or '%uNNNN'

        Status = HttpUnescapePercentHexEncoding(
                        pSourceChar,
                        SourceLength,
                        PercentUAllowed,
                        &UnicodeChar,
                        &BytesToSkip
                        );

        if (! NT_SUCCESS(Status))
        {
            UlTraceError(PARSER, (
                        "http!HttppPopCharAbsPathDbcs(%p): "
                        "Invalid hex encoding.\n",
                        pSourceChar
                        ));

            return Status;
        }

        //
        // If we consumed '%uNNNN', don't attempt DBCS-to-Unicode conversion
        //

        if (STRLEN_LIT("%uNNNN") == BytesToSkip)
            goto unslash;

        ASSERT(STRLEN_LIT("%NN") == BytesToSkip);
        ASSERT(UnicodeChar <= 0xFF);
    }

    LeadByte    = (UCHAR) UnicodeChar;
    AnsiChar[0] = LeadByte;

    if (! IS_DBCS_LEAD_BYTE(LeadByte))
    {
        AnsiCharSize = 1;
    }
    else
    {
        //
        // This is a double-byte character.
        //

        ASSERT(BytesToSkip <= SourceLength);

        if (BytesToSkip == SourceLength)
        {
            UlTraceError(PARSER, (
                        "http!HttppPopCharAbsPathDbcs(%p): "
                        "ERROR: DBCS lead byte, 0x%02lX, at end of string\n",
                        pSourceChar, UnicodeChar
                        ));

            RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
        }

        AnsiCharSize = 2;
        SecondByte   = pSourceChar[BytesToSkip];

        if (PERCENT != SecondByte)
        {
            BytesToSkip += 1;
        }
        else
        {
            ULONG TrailChar;
            ULONG Temp;

            if (BytesToSkip + STRLEN_LIT("%NN") > SourceLength)
            {
                UlTraceError(PARSER, (
                            "http!HttppPopCharAbsPathDbcs(%p): "
                            "ERROR: no space for DBCS hex-encoded suffix\n",
                            pSourceChar
                            ));

                RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
            }

            Status = HttpUnescapePercentHexEncoding(
                            pSourceChar  + BytesToSkip,
                            SourceLength - BytesToSkip,
                            FALSE,          // no %uNNNN allowed here
                            &TrailChar,
                            &Temp
                            );

            if (! NT_SUCCESS(Status))
            {
                UlTraceError(PARSER, (
                            "http!HttppPopCharAbsPathDbcs(%p): "
                            "Invalid hex encoding of trail byte.\n",
                            pSourceChar
                            ));

                return Status;
            }

            ASSERT(STRLEN_LIT("%NN") == Temp);
            ASSERT(TrailChar <= 0xFF);

            SecondByte   = (UCHAR) TrailChar;
            BytesToSkip += STRLEN_LIT("%NN");
        }

        AnsiChar[1] = SecondByte;
    }

    Status = RtlMultiByteToUnicodeN(
                    &WideChar,
                    sizeof(WCHAR),
                    NULL,
                    (PCHAR) &AnsiChar[0],
                    AnsiCharSize
                    );

    if (!NT_SUCCESS(Status))
    {
        UlTraceError(PARSER, (
                    "http!HttppPopCharAbsPathDbcs(%p): "
                    "MultiByteToUnicode(%lu) failed, %s.\n",
                    pSourceChar, AnsiCharSize, HttpStatusToString(Status)
                    ));

        return Status;
    }

    UnicodeChar = WideChar;

#if DBG
    //
    // Describe conversion in debug spew.
    //

    if (1 == AnsiCharSize)
    {
        UlTraceVerbose(PARSER, (
            "http!HttppPopCharAbsPathDbcs(%p): "
            "converted %02X to U+%04lX '%C'\n",
            pSourceChar,
            LeadByte,
            UnicodeChar,
            UnicodeChar
            ));
    }
    else
    {
        ASSERT(2 == AnsiCharSize);

        UlTraceVerbose(PARSER, (
            "http!HttppPopCharAbsPathDbcs(%p): "
            "converted %02X %02X to U+%04lX '%C'\n",
            pSourceChar,
            LeadByte,
            SecondByte,
            UnicodeChar,
            UnicodeChar
            ));
    }
#endif // DBG

unslash:

    ASSERT(NT_SUCCESS(Status));

    return HttppPopCharAbsPathCommonTail(
                pSourceChar,
                SourceLength,
                UnicodeChar,
                BytesToSkip,
                AllowRestrictedChars,
                pUnicodeChar,
                pBytesToSkip
                );

} // HttppPopCharAbsPathDbcs


/***************************************************************************++

Routine Description:

    Consume 1-6 bytes from pSourceChar. Handle hex-escaped ANSI encoding.
    This routine is only suitable for the /abspath part of an HTTP URL.

Arguments:

    pSourceChar     - Input buffer
    SourceLength    - Length of pSourceChar, in bytes
    pUnicodeChar    - decoded character
    pBytesToSkip    - number of characters consumed from pSourceChar

Return Value:

    STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD

--***************************************************************************/

NTSTATUS
HttppPopCharAbsPathAnsi(
    IN  PCUCHAR pSourceChar,
    IN  ULONG   SourceLength,
    IN  BOOLEAN PercentUAllowed,
    IN  BOOLEAN AllowRestrictedChars,
    OUT PULONG  pUnicodeChar,
    OUT PULONG  pBytesToSkip
    )
{
    NTSTATUS Status = STATUS_SUCCESS;
    ULONG    UnicodeChar;
    ULONG    BytesToSkip;

    //
    // Sanity check.
    //

    PAGED_CODE();

    ASSERT(SourceLength > 0);

    //
    // DBCS and ANSI decoders must allow any raw byte whose top bit 
    // is set (0x80-0xFF)
    //
    
    if (! IS_URL_TOKEN(pSourceChar[0])     && 
        !(0x80 & pSourceChar[0]))
    {
        UlTraceError(PARSER, (
                    "http!HttppPopCharAbsPathAnsi(%p): "
                    "first char, 0x%02lX, isn't URL token\n",
                    pSourceChar, (ULONG) pSourceChar[0]
                    ));

        RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
    }

    if (PERCENT != pSourceChar[0])
    {
        // Note: unlike UTF-8, we allow literal bytes whose top bit is set

        UnicodeChar = AnsiToUnicodeMap[ pSourceChar[0] ];
        BytesToSkip = 1;
    }
    else
    {
        // need to unescape hex encoding, '%NN' or '%uNNNN'

        Status = HttpUnescapePercentHexEncoding(
                        pSourceChar,
                        SourceLength,
                        PercentUAllowed,
                        &UnicodeChar,
                        &BytesToSkip
                        );

        if (! NT_SUCCESS(Status))
        {
            UlTraceError(PARSER, (
                        "http!HttppPopCharAbsPathAnsi(%p): "
                        "Invalid hex encoding.\n",
                        pSourceChar
                        ));

            return Status;
        }

        //
        // If we consumed '%uNNNN', don't attempt Ansi-to-Unicode conversion
        //

        if (STRLEN_LIT("%uNNNN") != BytesToSkip)
        {
            ASSERT(STRLEN_LIT("%NN") == BytesToSkip);
            ASSERT(UnicodeChar <= 0xFF);

            UnicodeChar = AnsiToUnicodeMap[(UCHAR) UnicodeChar];
        }
    }

    ASSERT(NT_SUCCESS(Status));

    return HttppPopCharAbsPathCommonTail(
                pSourceChar,
                SourceLength,
                UnicodeChar,
                BytesToSkip,
                AllowRestrictedChars,
                pUnicodeChar,
                pBytesToSkip
                );

} // HttppPopCharAbsPathAnsi


/***************************************************************************++

Routine Description:

    Consume 1 bytes from pSourceChar and returns it unaltered.
    This routine is only suitable for the ?querystring part of an HTTP URL,
    which we do not interpret.

    CODEWORK: don't 'convert' querystring to Unicode. Send it up verbatim.

Arguments:

    pSourceChar     - Input buffer
    SourceLength    - Length of pSourceChar, in bytes
    pUnicodeChar    - decoded character
    pBytesToSkip    - number of characters consumed from pSourceChar

Return Value:

    STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD

--***************************************************************************/

NTSTATUS
HttppPopCharQueryString(
    IN  PCUCHAR pSourceChar,
    IN  ULONG   SourceLength,
    IN  BOOLEAN PercentUAllowed,
    IN  BOOLEAN AllowRestrictedChars,
    OUT PULONG  pUnicodeChar,
    OUT PULONG  pBytesToSkip
    )
{
    PAGED_CODE();

    UNREFERENCED_PARAMETER(SourceLength);
    UNREFERENCED_PARAMETER(PercentUAllowed);
    UNREFERENCED_PARAMETER(AllowRestrictedChars);

    *pUnicodeChar = *pSourceChar;
    *pBytesToSkip = 1;

    return STATUS_SUCCESS;

} // HttppPopCharQueryString


//
// a cool local helper macro
//

#define EMIT_CHAR(ch, pDest, BytesCopied, Status, AllowRestrictedChars) \
    do                                                          \
    {                                                           \
        WCHAR HighSurrogate, LowSurrogate;                      \
                                                                \
        if ((ch) > LOW_NONCHAR_BITS)                            \
        {                                                       \
            Status = HttpUcs4toUtf16((ch),                      \
                            &HighSurrogate, &LowSurrogate);     \
                                                                \
            if (! NT_SUCCESS(Status))                           \
                goto end;                                       \
                                                                \
            *pDest++ = HighSurrogate;                           \
            *pDest++ = LowSurrogate;                            \
            BytesCopied += 2 * sizeof(WCHAR);                   \
        }                                                       \
        else                                                    \
        {                                                       \
            ASSERT(ch < HIGH_SURROGATE_START                    \
                    ||   LOW_SURROGATE_END < ch);               \
                                                                \
            if ( IS_UNICODE_NONCHAR((ch)) )                     \
            {                                                   \
                UlTraceError(PARSER, (                          \
                    "http!HttpUcs4toUtf16(): "                  \
                    "Non-character code point, U+%04lX.\n",     \
                    (ch) ));                                    \
                                                                \
                Status = STATUS_INVALID_PARAMETER;              \
                goto end;                                       \
            }                                                   \
                                                                \
            *pDest++ = (WCHAR) (ch);                            \
            BytesCopied += sizeof(WCHAR);                       \
        }                                                       \
                                                                \
        /* Can probably omit this test */                       \
        if (BytesCopied > UNICODE_STRING_MAX_BYTE_LEN)          \
        {                                                       \
            Status = STATUS_DATA_OVERRUN;                       \
            goto end;                                           \
        }                                                       \
    } while (0, 0)


#define EMIT_LITERAL_CHAR(ch, pDest, BytesCopied)               \
    do                                                          \
    {                                                           \
        ASSERT(IS_ASCII(ch));                                   \
                                                                \
        *pDest++ = (WCHAR) (ch);                                \
        BytesCopied += sizeof(WCHAR);                           \
    } while (0, 0)


#define HttppUrlEncodingToString(UrlEncoding)                   \
    ((UrlEncoding == UrlDecode_Ansi)                            \
        ? "Ansi"                                                \
        : (UrlEncoding == UrlDecode_Dbcs)                       \
            ? "Dbcs"                                            \
            : "Utf8")


/***************************************************************************++

Routine Description:

    Copies a hostname, converting it to Unicode

Arguments:


Return Value:

    NTSTATUS - Completion status.

--***************************************************************************/
NTSTATUS
HttpCopyHost(
    IN      PURL_C14N_CONFIG    pCfg,
    OUT     PWSTR               pDestination,
    IN      PCUCHAR             pSource,
    IN      ULONG               SourceLength,
    OUT     PULONG              pBytesCopied,
    OUT     PURL_ENCODING_TYPE  pHostnameEncodingType
    )
{
    NTSTATUS Status         = STATUS_UNSUCCESSFUL;
    ULONG    DecodeOrder    = pCfg->HostnameDecodeOrder;

    PAGED_CODE();

    ASSERT(NULL != pCfg);
    ASSERT(NULL != pDestination);
    ASSERT(NULL != pSource);
    ASSERT(NULL != pBytesCopied);
    ASSERT(NULL != pHostnameEncodingType);

    if (0 == DecodeOrder  ||  DecodeOrder != (DecodeOrder & UrlDecode_MaxMask))
    {
        UlTraceError(PARSER,
                    ("http!HttpCopyHost: invalid DecodeOrder, 0x%lX\n",
                    DecodeOrder
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    for ( ;
         0 != DecodeOrder  &&  !NT_SUCCESS(Status);
         DecodeOrder >>= UrlDecode_Shift
        )
    {
        ULONG UrlEncoding = (DecodeOrder & UrlDecode_Mask);

        switch (UrlEncoding)
        {
            default:
                ASSERT(! "Impossible UrlDecodeOrder");

            case UrlDecode_None:
                break;

            case UrlDecode_Ansi:
            case UrlDecode_Dbcs:
            case UrlDecode_Utf8:

                UlTraceVerbose(PARSER,
                                ("http!HttpCopyHost(%s, Src=%p, %lu)\n",
                                HttppUrlEncodingToString(UrlEncoding),
                                pSource, SourceLength
                                ));

                Status = HttppCopyHostByType(
                            (URL_ENCODING_TYPE) UrlEncoding,
                            pDestination,
                            pSource,
                            SourceLength,
                            pBytesCopied
                            );

                if (NT_SUCCESS(Status))
                {
                    *pHostnameEncodingType = (URL_ENCODING_TYPE) UrlEncoding;

                    UlTraceVerbose(PARSER,
                                    ("http!HttpCopyHost(%s): "
                                     "(%lu) '%.*s' -> (%lu) '%ls'\n",
                                     HttppUrlEncodingToString(UrlEncoding),
                                     SourceLength, SourceLength, pSource,
                                     *pBytesCopied/sizeof(WCHAR), pDestination
                                    ));
                }

                break;
        };
    }

    return Status;

} // HttpCopyHost


/***************************************************************************++

Routine Description:

    Copies a hostname, converting it to Unicode

    CODEWORK: Handle ACE-encoded hostnames

Arguments:


Return Value:

    NTSTATUS - Completion status.

--***************************************************************************/
NTSTATUS
HttppCopyHostByType(
    IN      URL_ENCODING_TYPE   UrlEncoding,
    OUT     PWSTR               pDestination,
    IN      PCUCHAR             pSource,
    IN      ULONG               SourceLength,
    OUT     PULONG              pBytesCopied
    )
{
    NTSTATUS                Status;
    PWSTR                   pDest;
    PCUCHAR                 pChar;
    ULONG                   BytesCopied;
    ULONG                   UnicodeChar;
    ULONG                   CharToSkip;
    PFN_POPCHAR_HOSTNAME    pfnPopChar;

    if (UrlEncoding_Ansi == UrlEncoding)
        pfnPopChar = &HttppPopCharHostNameAnsi;
    else if (UrlEncoding_Dbcs == UrlEncoding)
        pfnPopChar = &HttppPopCharHostNameDbcs;
    else if (UrlEncoding_Utf8 == UrlEncoding)
        pfnPopChar = &HttppPopCharHostNameUtf8;
    else
    {
        ASSERT(! "Invalid UrlEncoding");
        RETURN(STATUS_INVALID_PARAMETER);
    }
    //
    // Sanity check.
    //

    PAGED_CODE();


    pDest = pDestination;
    BytesCopied = 0;

    pChar = pSource;

    while ((int)SourceLength > 0)
    {
        UnicodeChar = *pChar;

        if (IS_ASCII(UnicodeChar))
        {
            CharToSkip = 1;
        }
        else
        {
            Status = (*pfnPopChar)(
                            pChar,
                            SourceLength,
                            &UnicodeChar,
                            &CharToSkip
                            );

            if (NT_SUCCESS(Status) == FALSE)
                goto end;
        }

        ASSERT(CharToSkip <= SourceLength);

        EMIT_CHAR(
            UnicodeChar,
            pDest,
            BytesCopied,
            Status,
            FALSE
            );

        pChar += CharToSkip;
        SourceLength -= CharToSkip;
    }

    //
    // terminate the string, it hasn't been done in the loop
    //

    ASSERT((pDest-1)[0] != UNICODE_NULL);

    pDest[0] = UNICODE_NULL;
    *pBytesCopied = BytesCopied;

    Status = STATUS_SUCCESS;


end:
    return Status;

}   // HttppCopyHostByType


/*++

Routine Description:

    Validates that a hostname is well-formed

    CODEWORK: For future IDN (International Domain Names) work,
    we may need to handle raw UTF-8 or ACE hostnames.

    Note: if the validation algorithm changes here, it may be necessary
    to update HttpParseUrl() too.

Arguments:

    pHostname       - the hostname
    HostnameLength  - length of hostname, in bytes
    HostnameType    - Source of the hostname: Host header, AbsUri, or
                        synthesized from the transport's local IP address

Return Value:

    STATUS_SUCCESS if valid

--*/

NTSTATUS
HttpValidateHostname(
    IN      PURL_C14N_CONFIG    pCfg,
    IN      PCUCHAR             pHostname,
    IN      ULONG               HostnameLength,
    IN      HOSTNAME_TYPE       HostnameType,
    OUT     PSHORT              pAddressType
    )
{
    PCUCHAR         pChar;
    PCUCHAR         pLabel;
    PCUCHAR         pEnd = pHostname + HostnameLength;
    PCSTR           pTerminator;
    NTSTATUS        Status;
    USHORT          Port;
    struct in_addr  IPv4Address;
    struct in6_addr IPv6Address;
    BOOLEAN         AlphaLabel;

    PAGED_CODE();

    ASSERT(NULL != pCfg);
    ASSERT(NULL != pHostname);
    ASSERT(NULL != pAddressType);

    if (0 == HostnameLength)
    {
        // RFC 2616, 14.23 "Host" says that the Host header can be empty
        if (Hostname_HostHeader == HostnameType)
            goto end;

        // It is an error for empty hostnames to appear elsewhere
        UlTraceError(PARSER,
                    ("http!HttpValidateHostname: empty hostname\n"
                     ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    // Is this an IPv6 literal address, per RFC 2732?

    if ('[' == *pHostname)
    {
        // Empty brackets?
        if (HostnameLength < STRLEN_LIT("[0]")  ||  ']' == pHostname[1])
        {
            UlTraceError(PARSER,
                        ("http!HttpValidateHostname: IPv6 address too short\n"
                         ));

            RETURN(STATUS_INVALID_PARAMETER);
        }

        for (pChar = pHostname + STRLEN_LIT("[");  pChar < pEnd;  ++pChar)
        {
            if (']' == *pChar)
                break;

            //
            // Dots are allowed because the last 32 bits may be represented
            // in IPv4 dotted-octet notation. We do not accept Scope IDs
            // (indicated by '%') in hostnames.
            //

            if (IS_HTTP_HEX(*pChar)  ||  ':' == *pChar  ||  '.' == *pChar)
                continue;

            UlTraceError(PARSER,
                        ("http!HttpValidateHostname: "
                        "Invalid char in IPv6 address, 0x%02X '%c', "
                        "after '%.*s'\n",
                        *pChar,
                        IS_HTTP_PRINT(*pChar) ? *pChar : '?',
                        DIFF(pChar - pHostname),
                        pHostname
                        ));

            RETURN(STATUS_INVALID_PARAMETER);
        }

        if (pChar == pEnd)
        {
            UlTraceError(PARSER,
                        ("http!HttpValidateHostname: No ']' for IPv6 address\n"
                         ));

            RETURN(STATUS_INVALID_PARAMETER);
        }

        ASSERT(pChar < pEnd);
        ASSERT(']' == *pChar);

        // Let the RTL routine do the hard work of parsing IPv6 addrs
        Status = RtlIpv6StringToAddressA(
                    (PCSTR) pHostname + STRLEN_LIT("["),
                    &pTerminator,
                    &IPv6Address
                    );

        if (! NT_SUCCESS(Status))
        {
            UlTraceError(PARSER,
                        ("http!HttpValidateHostname: "
                        "Invalid IPv6 address, %s\n",
                        HttpStatusToString(Status)
                        ));

            RETURN(Status);
        }

        if (pTerminator != (PCSTR) pChar)
        {
            UlTraceError(PARSER,
                        ("http!HttpValidateHostname: "
                        "Invalid IPv6 terminator, 0x%02X '%c'\n",
                        *pTerminator,
                        IS_HTTP_PRINT(*pTerminator) ? *pTerminator : '?'
                        ));

            RETURN(STATUS_INVALID_PARAMETER);
        }

        *pAddressType = TDI_ADDRESS_TYPE_IP6;

        // Skip the terminating ']'
        pChar += STRLEN_LIT("]");

        // Any chars after the ']'?
        if (pChar == pEnd)
        {
            ASSERT(DIFF(pEnd - pHostname) <= pCfg->MaxHostnameLength);
            goto end;
        }

        ASSERT(pChar < pEnd);

        if (':' == *pChar)
            goto port;

        UlTraceError(PARSER,
                    ("http!HttpValidateHostname: "
                    "Invalid char after IPv6 ']', 0x%02X '%c'\n",
                    *pChar,
                    IS_HTTP_PRINT(*pChar) ? *pChar : '?'
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    //
    // It must be a domain name or an IPv4 literal. We'll try to treat
    // it as a domain name first. If it turns out to be all-numeric,
    // we'll try decoding it as an IPv4 literal. We'll see if the name
    // is well-formed, but we will not do a DNS lookup to see if it exists,
    // as that would be much too expensive.
    //

    AlphaLabel = FALSE;
    pLabel     = pHostname;

    for (pChar = pHostname;  pChar < pEnd;  ++pChar)
    {
        if (':' == *pChar)
        {
            if (pChar == pHostname)
            {
                UlTraceError(PARSER,
                            ("http!HttpValidateHostname: empty hostname\n"
                             ));

                RETURN(STATUS_INVALID_PARAMETER);
            }


            // exit the loop
            break;
        }

        if ('.' == *pChar)
        {
            ULONG LabelLength = DIFF(pChar - pLabel);

            // There must be at least one char in the label
            if (0 == LabelLength)
            {
                UlTraceError(PARSER,
                            ("http!HttpValidateHostname: empty label\n"
                             ));

                RETURN(STATUS_INVALID_PARAMETER);
            }

            // Label can't have more than 63 chars
            if (LabelLength > pCfg->MaxLabelLength)
            {
                UlTraceError(PARSER,
                            ("http!HttpValidateHostname: overlong label, %lu\n",
                             LabelLength
                             ));

                RETURN(STATUS_INVALID_PARAMETER);
            }

            // Reset for the next label
            pLabel = pChar + STRLEN_LIT(".");

            continue;
        }

        // CODEWORK: handle DBCS characters

        if (!IS_URL_ILLEGAL_COMPUTERNAME(*pChar))
        {
            if (!IS_HTTP_DIGIT(*pChar))
                AlphaLabel = TRUE;

            if (pChar > pLabel)
                continue;

            // The first char of a label cannot be a hyphen. (Underscore?)
            if ('-' == *pChar)
            {
                UlTraceError(PARSER,
                            ("http!HttpValidateHostname: "
                             "'-' at beginning of label\n"
                            ));

                RETURN(STATUS_INVALID_PARAMETER);
            }

            continue;
        }

        UlTraceError(PARSER,
                    ("http!HttpValidateHostname: "
                     "Invalid char in hostname, 0x%02X '%c', "
                     "after '%.*s'\n",
                     *pChar,
                     IS_HTTP_PRINT(*pChar) ? *pChar : '?',
                     DIFF(pChar - pHostname),
                     pHostname
                    ));

        RETURN(STATUS_INVALID_PARAMETER);

    } // loop through hostname


    ASSERT(pChar == pEnd  ||  ':' == *pChar);

    if (AlphaLabel)
    {
        *pAddressType = 0;
    }
    else
    {
        // Let's see if it's a valid IPv4 address
        Status = RtlIpv4StringToAddressA(
                    (PCSTR) pHostname,
                    TRUE,           // strict => 4 dotted decimal octets
                    &pTerminator,
                    &IPv4Address
                    );

        if (!NT_SUCCESS(Status))
        {
            UlTraceError(PARSER,
                        ("http!HttpValidateHostname: "
                        "Invalid IPv4 address, %s\n",
                        HttpStatusToString(Status)
                        ));

            RETURN(Status);
        }

        if (pTerminator != (PCSTR) pChar)
        {
            ASSERT(pTerminator < (PCSTR) pChar);

            UlTraceError(PARSER,
                        ("http!HttpValidateHostname: "
                        "Invalid IPv4 address after %lu chars, "
                        "0x%02X, '%c'\n",
                        DIFF(pTerminator - (PCSTR) pHostname),
                        *pTerminator,
                        IS_HTTP_PRINT(*pTerminator) ? *pTerminator : '?'
                        ));

            RETURN(STATUS_INVALID_PARAMETER);
        }

        *pAddressType = TDI_ADDRESS_TYPE_IP;
    }

port:

    //
    // Parse the port number
    //

    // Check for overlong hostnames
    if (DIFF(pChar - pHostname) > pCfg->MaxHostnameLength)
    {
        UlTraceError(PARSER,
                    ("http!HttpValidateHostname: overlong hostname, %lu\n",
                     DIFF(pChar - pHostname)
                     ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    if (pChar == pEnd)
        goto end;

    ASSERT(pHostname < pChar  &&  pChar < pEnd);
    ASSERT(':' == *pChar);

    pChar += STRLEN_LIT(":");

    ASSERT(pChar <= pEnd);

    // RFC 2616, section 3.2.2 "http URL", says:
    // "If the port is empty or not given, port 80 is assumed".
    if (pChar == pEnd)
    {
        Port = 80;
        goto end;
    }

    Status = HttpAnsiStringToUShort(
                pChar,
                pEnd - pChar,   // <port> must occupy all remaining chars
                FALSE,          // no leading zeros permitted
                10,
                (PUCHAR*) &pTerminator,
                &Port
                );

    if (!NT_SUCCESS(Status))
    {
        UlTraceError(PARSER,
                    ("http!HttpValidateHostname: "
                    "Invalid port number, %s\n",
                    HttpStatusToString(Status)
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    ASSERT(pTerminator == (PCSTR) pEnd);

    if (0 == Port)
    {
        UlTraceError(PARSER,
                    ("http!HttpValidateHostname: Port must not be zero.\n"
                     ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

end:
    RETURN(STATUS_SUCCESS);

} // HttpValidateHostname


/***************************************************************************++

Routine Description:

    Convert to unicode

Arguments:


Return Value:

    NTSTATUS - Completion status.


--***************************************************************************/
NTSTATUS
HttpCopyUrl(
    IN      PURL_C14N_CONFIG    pCfg,
    OUT     PWSTR               pDestination,
    IN      PCUCHAR             pSource,
    IN      ULONG               SourceLength,
    OUT     PULONG              pBytesCopied,
    OUT     PURL_ENCODING_TYPE  pUrlEncodingType
    )
{
    NTSTATUS Status         = STATUS_UNSUCCESSFUL;
    ULONG    DecodeOrder    = pCfg->AbsPathDecodeOrder;

    PAGED_CODE();

    ASSERT(NULL != pDestination);
    ASSERT(NULL != pSource);
    ASSERT(NULL != pBytesCopied);
    ASSERT(NULL != pUrlEncodingType);

    if (0 == DecodeOrder  ||  DecodeOrder != (DecodeOrder & UrlDecode_MaxMask))
    {
        UlTraceError(PARSER,
                    ("http!HttpCopyUrl: invalid DecodeOrder, 0x%lX\n",
                    DecodeOrder
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    for ( ;
         0 != DecodeOrder  &&  !NT_SUCCESS(Status);
         DecodeOrder >>= UrlDecode_Shift
        )
    {
        ULONG UrlEncoding = (DecodeOrder & UrlDecode_Mask);

        switch (UrlEncoding)
        {
            default:
                ASSERT(! "Impossible UrlDecodeOrder");

            case UrlDecode_None:
                break;

            case UrlDecode_Ansi:
            case UrlDecode_Dbcs:
            case UrlDecode_Utf8:

                UlTraceVerbose(PARSER,
                                ("http!HttpCopyUrl(%s, Src=%p, %lu)\n",
                                HttppUrlEncodingToString(UrlEncoding),
                                pSource, SourceLength
                                ));

                Status = HttppCopyUrlByType(
                            pCfg,
                            (URL_ENCODING_TYPE) UrlEncoding,
                            pDestination,
                            pSource,
                            SourceLength,
                            pBytesCopied
                            );

                if (NT_SUCCESS(Status))
                {
                    *pUrlEncodingType = (URL_ENCODING_TYPE) UrlEncoding;

                    UlTraceVerbose(PARSER,
                                    ("http!HttpCopyUrl(%s): "
                                     "(%lu) '%.*s' -> (%lu) '%ls'\n",
                                     HttppUrlEncodingToString(UrlEncoding),
                                     SourceLength, SourceLength, pSource,
                                     *pBytesCopied/sizeof(WCHAR), pDestination
                                    ));
                }

                break;
        };
    }

    return Status;

} // HttpCopyUrl


/***************************************************************************++

Routine Description:

    This function can be told to copy UTF-8, ANSI, or DBCS URLs.

    Convert to Unicode

Arguments:


Return Value:

    NTSTATUS - Completion status.


--***************************************************************************/
NTSTATUS
HttppCopyUrlByType(
    IN      PURL_C14N_CONFIG    pCfg,
    IN      URL_ENCODING_TYPE   UrlEncoding,
    OUT     PWSTR               pDestination,
    IN      PCUCHAR             pSource,
    IN      ULONG               SourceLength,
    OUT     PULONG              pBytesCopied
    )
{
    PWSTR               pDest;
    PCUCHAR             pChar;
    ULONG               BytesCopied;
    ULONG               UnicodeChar;
    ULONG               CharToSkip;
#if DBG
    NTSTATUS            Status;
    PFN_POPCHAR_ABSPATH pfnPopChar;
    PWSTR               pSegment = pDestination;
    ULONG               SegmentCount = 0;
#endif // DBG

    //
    // Sanity check.
    //

    PAGED_CODE();

#if DBG
    if (UrlEncoding_Ansi == UrlEncoding)
        pfnPopChar = &HttppPopCharAbsPathAnsi;
    else if (UrlEncoding_Dbcs == UrlEncoding)
        pfnPopChar = &HttppPopCharAbsPathDbcs;
    else if (UrlEncoding_Utf8 == UrlEncoding)
        pfnPopChar = &HttppPopCharAbsPathUtf8;
    else
    {
        ASSERT(! "Invalid UrlEncoding");
        RETURN(STATUS_INVALID_PARAMETER);
    }
#else  // !DBG
    UNREFERENCED_PARAMETER(pCfg);
    UNREFERENCED_PARAMETER(UrlEncoding);
#endif // DBG

    pDest = pDestination;
    BytesCopied = 0;

    pChar = pSource;
    CharToSkip = 1;

    while ((int)SourceLength > 0)
    {
        ULONG NextUnicodeChar = FastPopChars[*pChar];

        //
        // Grab the next character.
        //
        // All clean chars have a non-zero entry in FastPopChars[].
        // All clean chars are in the US-ASCII range, 0-127.
        //

        ASSERT(0 != NextUnicodeChar);
        ASSERT(IS_ASCII(NextUnicodeChar));

#if DBG
        Status = (*pfnPopChar)(
                        pChar,
                        SourceLength,
                        pCfg->PercentUAllowed,
                        pCfg->AllowRestrictedChars,
                        &UnicodeChar,
                        &CharToSkip
                        );

        ASSERT(NT_SUCCESS(Status));
        ASSERT(UnicodeChar == NextUnicodeChar);
        ASSERT(CharToSkip == 1);
#endif // !DBG

        UnicodeChar = (WCHAR) NextUnicodeChar;
        CharToSkip = 1;

#if DBG
        // Because HttpFindUrlToken() marks as dirty any URLs that
        // (appear to) have too many segments or overlong segments,
        // we should never hit these assertions
        if (FORWARD_SLASH == UnicodeChar)
        {
            ULONG SegmentLength = DIFF(pDest - pSegment);

            // The segment length should be within bounds

            ASSERT(SegmentLength > 0  ||  pDestination == pSegment);
            ASSERT(SegmentLength
                            <= pCfg->UrlSegmentMaxLength + WCSLEN_LIT(L"/"));

            pSegment = pDest;
            ++SegmentCount;

            // There should not be too many segments
            ASSERT(SegmentCount <= pCfg->UrlSegmentMaxCount);
        }
#endif // DBG

        EMIT_LITERAL_CHAR(UnicodeChar, pDest, BytesCopied);

        pChar += CharToSkip;
        SourceLength -= CharToSkip;
    }

    //
    // terminate the string, it hasn't been done in the loop
    //

    ASSERT((pDest-1)[0] != UNICODE_NULL);

    pDest[0] = UNICODE_NULL;
    *pBytesCopied = BytesCopied;

    ASSERT(DIFF(pDest - pSegment) > 0);
    ASSERT(DIFF(pDest - pSegment)
                <= pCfg->UrlSegmentMaxLength + WCSLEN_LIT(L"/"));
    ASSERT(SegmentCount < pCfg->UrlSegmentMaxCount);

    return STATUS_SUCCESS;

}   // HttppCopyUrlByType


/***************************************************************************++

Routine Description:


    Unescape
    Convert backslash to forward slash
    Remove double slashes (empty directiories names) - e.g. // or \\
    Handle /./
    Handle /../
    Convert to unicode

Arguments:


Return Value:

    NTSTATUS - Completion status.


Note: Any changes to this code may require changes for the fast path code too.
      The fast path is HttpCopyUrl.

--***************************************************************************/
NTSTATUS
HttpCleanAndCopyUrl(
    IN      PURL_C14N_CONFIG    pCfg,
    IN      URL_PART            UrlPart,
    OUT     PWSTR               pDestination,
    IN      PCUCHAR             pSource,
    IN      ULONG               SourceLength,
    OUT     PULONG              pBytesCopied,
    OUT     PWSTR *             ppQueryString OPTIONAL,
    OUT     PURL_ENCODING_TYPE  pUrlEncodingType
    )
{
    NTSTATUS Status         = STATUS_UNSUCCESSFUL;
    ULONG    DecodeOrder    = pCfg->AbsPathDecodeOrder;

    PAGED_CODE();

    ASSERT(NULL != pDestination);
    ASSERT(NULL != pSource);
    ASSERT(NULL != pBytesCopied);
    ASSERT(NULL != pUrlEncodingType);

    if (0 == DecodeOrder  ||  DecodeOrder != (DecodeOrder & UrlDecode_MaxMask))
    {
        UlTraceError(PARSER,
                    ("http!HttpCleanAndCopyUrl: invalid DecodeOrder, 0x%lX\n",
                    DecodeOrder
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    for ( ;
         0 != DecodeOrder  &&  !NT_SUCCESS(Status);
         DecodeOrder >>= UrlDecode_Shift
        )
    {
        ULONG UrlEncoding = (DecodeOrder & UrlDecode_Mask);

        switch (UrlEncoding)
        {
            default:
                ASSERT(! "Impossible UrlDecodeOrder");

            case UrlDecode_None:
                break;

            case UrlDecode_Ansi:
            case UrlDecode_Dbcs:
            case UrlDecode_Utf8:

                UlTraceVerbose(PARSER,
                                ("http!HttpCleanAndCopyUrl(%s, Src=%p, %lu)\n",
                                HttppUrlEncodingToString(UrlEncoding),
                                pSource, SourceLength
                                ));

                Status = HttppCleanAndCopyUrlByType(
                            pCfg,
                            (URL_ENCODING_TYPE) UrlEncoding,
                            UrlPart,
                            pDestination,
                            pSource,
                            SourceLength,
                            pBytesCopied,
                            ppQueryString
                            );

                if (NT_SUCCESS(Status))
                {
                    *pUrlEncodingType = (URL_ENCODING_TYPE) UrlEncoding;

                    UlTraceVerbose(PARSER,
                                    ("http!HttpCleanAndCopyUrl(%s): "
                                     "(%lu) '%.*s' -> (%lu) '%ls'\n",
                                     HttppUrlEncodingToString(UrlEncoding),
                                     SourceLength, SourceLength, pSource,
                                     *pBytesCopied/sizeof(WCHAR), pDestination
                                    ));
                }

                break;
        };
    }

    return Status;

} // HttpCleanAndCopyUrl


//
// HttppCleanAndCopyUrlByType() uses StateFromStateAndToken[][] and
// ActionFromStateAndToken[][] to handle "//", "/./", and "/../" productions.
//

#define TOK_STATE(state, other, dot, eos, slash)    \
    {                                               \
        URL_STATE_ ## other,                        \
        URL_STATE_ ## dot,                          \
        URL_STATE_ ## eos,                          \
        URL_STATE_ ## slash                         \
    }

//
// CanonStateFromStateAndToken[][] is used by HttpParseUrl() to reject
// "//", "/./", and "/../" sequences, as these URLs are supposed to
// be in canonical form already.
//

const URL_STATE
CanonStateFromStateAndToken[URL_STATE_MAX][URL_TOKEN_MAX] =
{
//             State \ Token:  Other       '.'             EOS     '/'
    TOK_STATE( START,          START,      START,          END,    SLASH),
    TOK_STATE( SLASH,          START,      SLASH_DOT,      END,    ERROR),
    TOK_STATE( SLASH_DOT,      START,      SLASH_DOT_DOT,  END,    ERROR),
    TOK_STATE( SLASH_DOT_DOT,  START,      START,          ERROR,  ERROR),

    TOK_STATE( END,            END,        END,            END,    END),
    TOK_STATE( ERROR,          ERROR,      ERROR,          ERROR,  ERROR)
};


//
// StateFromStateAndToken[][] says which new state to transition to given
// the current state and the token we saw. Used by HttppCleanAndCopyUrlByType()
//

const URL_STATE
StateFromStateAndToken[URL_STATE_MAX][URL_TOKEN_MAX] =
{
//             State \ Token:  Other       '.'             EOS     '/'
    TOK_STATE( START,          START,      START,          END,    SLASH),
    TOK_STATE( SLASH,          START,      SLASH_DOT,      END,    SLASH),
    TOK_STATE( SLASH_DOT,      START,      SLASH_DOT_DOT,  END,    SLASH),
    TOK_STATE( SLASH_DOT_DOT,  START,      START,          END,    SLASH),

    TOK_STATE( END,            END,        END,            END,    END),
    TOK_STATE( ERROR,          ERROR,      ERROR,          ERROR,  ERROR)
};


//
// ActionFromStateAndToken[][] says what action to perform based on
// the current state and the current token
//

#define NEW_ACTION(state, other, dot, eos, slash)   \
    {                                               \
        ACTION_ ## other,                           \
        ACTION_ ## dot,                             \
        ACTION_ ## eos,                             \
        ACTION_ ## slash                            \
    }

const URL_ACTION
ActionFromStateAndToken[URL_STATE_MAX][URL_TOKEN_MAX] =
{
//             State \ Token: Other        '.'             EOS     '/'
    NEW_ACTION(START,         EMIT_CH,     EMIT_CH,        NOTHING, EMIT_CH),
    NEW_ACTION(SLASH,         EMIT_CH,     NOTHING,        NOTHING, NOTHING),
    NEW_ACTION(SLASH_DOT,     EMIT_DOT_CH, NOTHING,        NOTHING, NOTHING),
    NEW_ACTION(SLASH_DOT_DOT, EMIT_DOT_DOT_CH,
                                          EMIT_DOT_DOT_CH, BACKUP,  BACKUP),

    NEW_ACTION(END,           NOTHING,     NOTHING,        NOTHING, NOTHING)
};


#if DBG

PCSTR
HttppUrlActionToString(
    URL_ACTION Action)
{
    switch (Action)
    {
        case ACTION_NOTHING:            return "NOTHING";
        case ACTION_EMIT_CH:            return "EMIT_CH";
        case ACTION_EMIT_DOT_CH:        return "EMIT_DOT_CH";
        case ACTION_EMIT_DOT_DOT_CH:    return "EMIT_DOT_DOT_CH";
        case ACTION_BACKUP:             return "BACKUP";
        case ACTION_MAX:                return "MAX";
        default:
            ASSERT(! "Invalid URL_ACTION");
            return "ACTION_???";
    }
} // HttppUrlActionToString


PCSTR
HttppUrlStateToString(
    URL_STATE UrlState)
{
    switch (UrlState)
    {
        case URL_STATE_START:           return "START";
        case URL_STATE_SLASH:           return "SLASH";
        case URL_STATE_SLASH_DOT:       return "SLASH_DOT";
        case URL_STATE_SLASH_DOT_DOT:   return "SLASH_DOT_DOT";
        case URL_STATE_END:             return "END";
        case URL_STATE_ERROR:           return "ERROR";
        case URL_STATE_MAX:             return "MAX";
        default:
            ASSERT(! "Invalid URL_STATE");
            return "URL_STATE_???";
    }
} // HttppUrlStateToString


PCSTR
HttppUrlTokenToString(
    URL_STATE_TOKEN UrlToken)
{
    switch (UrlToken)
    {
        case URL_TOKEN_OTHER:           return "OTHER";
        case URL_TOKEN_DOT:             return "DOT";
        case URL_TOKEN_EOS:             return "EOS";
        case URL_TOKEN_SLASH:           return "SLASH";
        case URL_TOKEN_MAX:             return "MAX";
        default:
            ASSERT(! "Invalid URL_STATE_TOKEN");
            return "URL_TOKEN_???";
    }
} // HttppUrlTokenToString

#endif // DBG


PCSTR
HttpSiteTypeToString(
    HTTP_URL_SITE_TYPE SiteType
    )
{
    switch (SiteType)
    {
        case HttpUrlSite_None:              return "None";
        case HttpUrlSite_Name:              return "Name";
        case HttpUrlSite_IP:                return "IP";
        case HttpUrlSite_NamePlusIP:        return "Name+IP";
        case HttpUrlSite_WeakWildcard:      return "Weak";
        case HttpUrlSite_StrongWildcard:    return "Strong";
        case HttpUrlSite_Max:               return "Max";
        default:
            ASSERT(! "Invalid HTTP_URL_SITE_TYPE");
            return "????";
    }
}


/***************************************************************************++

Routine Description:

    This function can be told to clean up UTF-8, ANSI, or DBCS URLs.

    Unescape
    Convert backslash to forward slash
    Remove double slashes (empty directiories names) - e.g. // or \\
    Handle /./
    Handle /../
    Convert to unicode

Arguments:


Return Value:

    NTSTATUS - Completion status.

Note: Any changes to this code may require changes for the fast path code too.
      The fast path is HttppCopyUrlByType.


--***************************************************************************/
NTSTATUS
HttppCleanAndCopyUrlByType(
    IN      PURL_C14N_CONFIG    pCfg,
    IN      URL_ENCODING_TYPE   UrlEncoding,
    IN      URL_PART            UrlPart,
    OUT     PWSTR               pDestination,
    IN      PCUCHAR             pSource,
    IN      ULONG               SourceLength,
    OUT     PULONG              pBytesCopied,
    OUT     PWSTR *             ppQueryString OPTIONAL
    )
{
    NTSTATUS            Status;
    PWSTR               pDest;
    PCUCHAR             pChar;
    ULONG               CharToSkip;
    ULONG               BytesCopied;
    PWSTR               pQueryString;
    URL_STATE           UrlState = URL_STATE_START;
    URL_STATE_TOKEN     UrlToken = URL_TOKEN_OTHER;
    URL_ACTION          Action = ACTION_NOTHING;
    ULONG               UnicodeChar;
    BOOLEAN             MakeCanonical;
    PWCHAR              pFastPopChar;
    PFN_POPCHAR_ABSPATH pfnPopChar;
    PWSTR               pSegment = pDestination;
    ULONG               SegmentCount = 0;
    BOOLEAN             TestSegment = FALSE;
#if DBG
    ULONG               OriginalSourceLength = SourceLength;
#endif

    //
    // Sanity check.
    //

    PAGED_CODE();

    ASSERT(UrlPart_AbsPath == UrlPart);

    if (UrlEncoding_Ansi == UrlEncoding)
        pfnPopChar = &HttppPopCharAbsPathAnsi;
    else if (UrlEncoding_Dbcs == UrlEncoding)
        pfnPopChar = &HttppPopCharAbsPathDbcs;
    else if (UrlEncoding_Utf8 == UrlEncoding)
        pfnPopChar = &HttppPopCharAbsPathUtf8;
    else
    {
        ASSERT(! "Invalid UrlEncoding");
        RETURN(STATUS_INVALID_PARAMETER);
    }

    ASSERT(FORWARD_SLASH == *pSource);

    pDest = pDestination;
    pQueryString = NULL;
    BytesCopied = 0;

    pChar = pSource;
    CharToSkip = 0;

    UrlState = 0;

    MakeCanonical = (BOOLEAN) (UrlPart == UrlPart_AbsPath);

    if (UrlEncoding == UrlEncoding_Utf8 && UrlPart != UrlPart_QueryString)
    {
        pFastPopChar = FastPopChars;
    }
    else
    {
        pFastPopChar = DummyPopChars;
    }

    while (SourceLength > 0)
    {
        //
        // advance !  it's at the top of the loop to enable ANSI_NULL to
        // come through ONCE
        //

        ASSERT(CharToSkip <= SourceLength);

        pChar += CharToSkip;
        SourceLength -= CharToSkip;

        //
        // well?  have we hit the end?
        //

        if (SourceLength == 0)
        {
            UnicodeChar = UNICODE_NULL;
            CharToSkip = 1;
        }
        else
        {
            //
            // Nope.  Peek briefly to see if we hit the query string
            //

            if (UrlPart == UrlPart_AbsPath  &&  pChar[0] == QUESTION_MARK)
            {
                ASSERT(pQueryString == NULL);

                //
                // remember its location
                //

                pQueryString = pDest;

                //
                // let it fall through ONCE to the canonical
                // in order to handle a trailing "/.." like
                // "http://foobar:80/foo/bar/..?v=1&v2"
                //

                TestSegment = TRUE;
                UnicodeChar = QUESTION_MARK;
                CharToSkip  = 1;

                //
                // now we are cleaning the query string
                //

                UrlPart = UrlPart_QueryString;

                UlTraceVerbose(PARSER, ("QueryString @ %p\n", pQueryString));

                //
                // cannot use fast path for PopChar anymore
                //

                pFastPopChar = DummyPopChars;

                pfnPopChar = HttppPopCharQueryString;
            }
            else
            {
                ULONG NextUnicodeChar = pFastPopChar[*pChar];

                //
                // Grab the next character. Try to be fast for the
                // normal character case. Otherwise call PopChar.
                //

                if (NextUnicodeChar == 0)
                {
                    Status = (*pfnPopChar)(
                                    pChar,
                                    SourceLength,
                                    pCfg->PercentUAllowed,
                                    pCfg->AllowRestrictedChars,
                                    &UnicodeChar,
                                    &CharToSkip
                                    );

                    if (NT_SUCCESS(Status) == FALSE)
                        goto end;
                }
                else
                {
#if DBG
                    Status = (*pfnPopChar)(
                                    pChar,
                                    SourceLength,
                                    pCfg->PercentUAllowed,
                                    pCfg->AllowRestrictedChars,
                                    &UnicodeChar,
                                    &CharToSkip
                                    );

                    ASSERT(NT_SUCCESS(Status));
                    ASSERT(UnicodeChar == NextUnicodeChar);
                    ASSERT(CharToSkip == 1);
#endif // DBG
                    UnicodeChar = (WCHAR) NextUnicodeChar;
                    CharToSkip = 1;
                }
            }
        }

        if (!MakeCanonical)
        {
            UrlToken = (UnicodeChar == UNICODE_NULL)
                            ? URL_TOKEN_EOS
                            : URL_TOKEN_OTHER;
            TestSegment = FALSE;
        }
        else
        {
            //
            // now use the state machine to make it canonical.
            //

            //
            // did we just hit the query string?  this will only happen once
            // that we take this branch after hitting it, as we stop
            // processing after hitting it.
            //

            if (UrlPart == UrlPart_QueryString)
            {
                //
                // treat this just like we hit a NULL, EOS.
                //

                ASSERT(QUESTION_MARK == UnicodeChar);

                UrlToken = URL_TOKEN_EOS;
                TestSegment = TRUE;
            }
            else
            {
                //
                // otherwise based the new state off of the char we
                // just popped.
                //

                switch (UnicodeChar)
                {
                case UNICODE_NULL:
                    UrlToken = URL_TOKEN_EOS;
                    TestSegment = TRUE;
                    break;

                case DOT:
                    UrlToken = URL_TOKEN_DOT;
                    TestSegment = FALSE;
                    break;

                case FORWARD_SLASH:
                    UrlToken = URL_TOKEN_SLASH;
                    TestSegment = TRUE;
                    break;

                default:
                    UrlToken = URL_TOKEN_OTHER;
                    TestSegment = FALSE;
                    break;
                }
            }
        }

        Action = ActionFromStateAndToken[UrlState][UrlToken];

        IF_DEBUG2BOTH(PARSER, VERBOSE)
        {
            ULONG   i;
            UCHAR   HexBuff[5*12 + 10];
            PUCHAR  p = HexBuff;
            UCHAR   Byte;

            ASSERT(CharToSkip <= 4 * STRLEN_LIT("%NN"));

            // Generate something like
            // "[25 65 32 25 38 30 25 39 35] '%e2%80%95'"

            *p++ = '[';

            for (i = 0;  i < CharToSkip;  ++i)
            {
                const static char hexArray[] = "0123456789ABCDEF";

                Byte = pChar[i];
                *p++ = hexArray[Byte >> 4];
                *p++ = hexArray[Byte & 0xf];
                *p++ = ' ';
            }
            
            p[-1] = ']'; // overwrite last ' '
            *p++ = ' ';
            *p++ = '\'';

            for (i = 0;  i < CharToSkip;  ++i)
            {
                Byte = pChar[i];
                *p++ = (IS_HTTP_PRINT(Byte) ? Byte : '?');
            }

            *p++ = '\'';
            *p++ = '\0';

            ASSERT(DIFF(p - HexBuff) <= DIMENSION(HexBuff));
            
            UlTrace(PARSER,
                    ("http!HttppCleanAndCopyUrlByType(%s): "
                     "(%lu) %s -> U+%04lX '%c': "
                     "[%s][%s] -> %s, %s%s\n",
                     HttppUrlEncodingToString(UrlEncoding),
                     CharToSkip, HexBuff,
                     UnicodeChar,
                     IS_ANSI(UnicodeChar) && IS_HTTP_PRINT(UnicodeChar)
                        ? (UCHAR) UnicodeChar : '?',
                     HttppUrlStateToString(UrlState),
                     HttppUrlTokenToString(UrlToken),
                     HttppUrlStateToString(
                         StateFromStateAndToken[UrlState][UrlToken]),
                     HttppUrlActionToString(Action),
                     TestSegment ? ", TestSegment" : ""
                    ));

        } // IF_DEBUG2BOTH(PARSER, VERBOSE)

        //
        // Segment length and segment count checks
        //

        if (TestSegment)
        {
            ULONG SegmentLength = DIFF(pDest - pSegment);

            ASSERT(pSegment <= pDest);

            UlTraceVerbose(PARSER,
                            ("http!HttppCleanAndCopyUrlByType: "
                             "Segment[%lu] %p (%lu) = '%.*ls'\n",
                             SegmentCount, pSegment, SegmentLength,
                             SegmentLength, pSegment
                            ));

            // Reject if segment too long
            if (SegmentLength > pCfg->UrlSegmentMaxLength + WCSLEN_LIT(L"/"))
            {
                UlTraceError(PARSER, (
                            "http!HttppCleanAndCopyUrlByType: "
                            "Segment too long: %lu\n",
                            SegmentLength
                            ));

                RETURN(STATUS_INVALID_DEVICE_REQUEST);
            }

            pSegment = pDest;

            // Reject if too many path segments

            if (Action != ACTION_NOTHING)
            {
                if (pSegment == pDestination)
                {
                    SegmentCount = 0;
                }
                else if (++SegmentCount > pCfg->UrlSegmentMaxCount)
                {
                    UlTraceError(PARSER, (
                                "http!HttppCleanAndCopyUrlByType: "
                                "Too many segments: %lu\n",
                                SegmentCount
                                ));

                    RETURN(STATUS_INVALID_DEVICE_REQUEST);
                }
            }
        }

        //
        //  Perform the action associated with the state.
        //

        switch (Action)
        {
        case ACTION_EMIT_DOT_DOT_CH:

            EMIT_LITERAL_CHAR(DOT, pDest, BytesCopied);

            // fall through

        case ACTION_EMIT_DOT_CH:

            EMIT_LITERAL_CHAR(DOT, pDest, BytesCopied);

            // fall through

        case ACTION_EMIT_CH:


            EMIT_CHAR(
                UnicodeChar,
                pDest,
                BytesCopied,
                Status,
                pCfg->AllowRestrictedChars
                );

            // fall through

        case ACTION_NOTHING:
            break;

        case ACTION_BACKUP:

            //
            // pDest currently points 1 past the last '/'.  backup over it and
            // find the preceding '/', set pDest to 1 past that one.
            //

            //
            // backup to the '/'
            //

            pDest       -= 1;
            BytesCopied -= sizeof(WCHAR);

            ASSERT(pDest[0] == FORWARD_SLASH);

            //
            // are we at the start of the string?  that's bad, can't go back!
            //

            if (pDest == pDestination)
            {
                ASSERT(BytesCopied == 0);

                UlTraceError(PARSER, (
                            "http!HttppCleanAndCopyUrl: "
                            "Can't back up for \"/../\"\n"
                            ));

                Status = STATUS_OBJECT_PATH_INVALID;
                goto end;
            }

            //
            // back up over the '/'
            //

            pDest       -= 1;
            BytesCopied -= sizeof(WCHAR);

            ASSERT(pDest > pDestination);

            //
            // now find the previous slash
            //

            while (pDest > pDestination  &&  pDest[0] != FORWARD_SLASH)
            {
                pDest       -= 1;
                BytesCopied -= sizeof(WCHAR);
            }

            //
            // Adjust segment trackers downwards
            //

            pSegment = pDest;

            if (pSegment == pDestination)
                SegmentCount = 0;
            else
                --SegmentCount;

            //
            // we already have a slash, so don't have to store one.
            //

            ASSERT(pDest[0] == FORWARD_SLASH);

            //
            // simply skip it, as if we had emitted it just now
            //

            pDest       += 1;
            BytesCopied += sizeof(WCHAR);

            break;

        default:
            ASSERT(!"http!HttppCleanAndCopyUrl: "
                    "Invalid action code in state table!");
            Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
            goto end;
        }

        //
        // Just hit the query string ?
        //

        if (MakeCanonical && UrlPart == UrlPart_QueryString)
        {
            //
            // Stop canonical processing
            //

            MakeCanonical = FALSE;

            //
            // Need to emit the '?', it wasn't emitted above
            //

            ASSERT(ActionFromStateAndToken[UrlState][UrlToken]
                    != ACTION_EMIT_CH);

            //
            // remember its location (in case we backed up)
            //
            
            pQueryString = pDest;

            EMIT_LITERAL_CHAR(QUESTION_MARK, pDest, BytesCopied);

            // reset
            UrlToken = URL_TOKEN_OTHER;
            UrlState = URL_STATE_START;
        }

        // update the URL state
        UrlState = StateFromStateAndToken[UrlState][UrlToken];

        ASSERT(URL_STATE_ERROR != UrlState);
    }

    //
    // terminate the string, it hasn't been done in the loop
    //

    ASSERT((pDest-1)[0] != UNICODE_NULL);

    pDest[0] = UNICODE_NULL;
    *pBytesCopied = BytesCopied;

    if (BytesCopied > pCfg->UrlMaxLength * sizeof(WCHAR))
    {
        UlTraceError(PARSER, (
                    "http!HttppCleanAndCopyUrlByType: "
                    "URL too long: %lu\n",
                    BytesCopied
                    ));

        RETURN(STATUS_INVALID_DEVICE_REQUEST);
    }

    if (ppQueryString != NULL)
    {
        *ppQueryString = pQueryString;
    }

    UlTraceVerbose(PARSER,
                    ("http!HttppCleanAndCopyUrlByType: "
                     "(%lu) '%.*s' -> (%lu) '%.*ls', %squerystring\n",
                     OriginalSourceLength,
                     OriginalSourceLength, pSource,
                     BytesCopied/sizeof(WCHAR),
                     BytesCopied/sizeof(WCHAR), pDestination,
                     pQueryString != NULL ? "" : "no "
                    ));

    Status = STATUS_SUCCESS;


end:
    return Status;

}   // HttppCleanAndCopyUrlByType


/*++

Routine Description:

    A utility routine to find a Url token. We take an input pointer, skip any
    preceding LWS, then scan the token until we find either LWS or a CRLF
    pair. We also mark the request to have a "Clean" Url

Arguments:

    pBuffer         - Buffer to search for token.
    BufferLength    - Length of data pointed to by pBuffer.
    ppTokenStart    - Where to return the start of the token, if we locate
                      its delimiter.
    pTokenLength    - Where to return the length of the token.
    pRawUrlClean    - where to return cleanliness of URL

Return Value:

    STATUS_SUCCESS if no parsing errors in the URL.
    We also return, in *ppTokenStart, a pointer to the token we found,
    or NULL if we don't find a whitespace-delimited token.
    pRawUrlClean flag may be set.

--*/
NTSTATUS
HttpFindUrlToken(
    IN  PURL_C14N_CONFIG    pCfg,
    IN  PCUCHAR             pBuffer,
    IN  ULONG               BufferLength,
    OUT PUCHAR*             ppTokenStart,
    OUT PULONG              pTokenLength,
    OUT PBOOLEAN            pRawUrlClean
    )
{
    PCUCHAR pTokenStart;
    PCUCHAR pSegment;
    UCHAR   CurrentChar;
    UCHAR   PreviousChar;
    ULONG   SegmentCount = 0;
    ULONG   TokenLength;

    //
    // Sanity check.
    //

    PAGED_CODE();

    ASSERT(NULL != pBuffer);
    ASSERT(NULL != ppTokenStart);
    ASSERT(NULL != pTokenLength);
    ASSERT(NULL != pRawUrlClean);

    //
    // Assume Clean RawUrl
    //

    *pRawUrlClean = TRUE;
    *ppTokenStart = NULL;
    *pTokenLength = 0;

    //
    // First, skip any preceding LWS.
    //

    while (BufferLength > 0 && IS_HTTP_LWS(*pBuffer))
    {
        pBuffer++;
        BufferLength--;
    }

    // If we stopped because we ran out of buffer, bail.
    if (BufferLength == 0)
    {
        return STATUS_SUCCESS;
    }

    pTokenStart  = pBuffer;
    PreviousChar = ANSI_NULL;

    // This will usually point to a '/', but it won't if this is an AbsURI.
    // It doesn't really matter, since only a few borderline cases will
    // be marked as dirty that might not otherwise be.
    pSegment = pBuffer;

    // Now skip over the token, until we see either LWS or a CR or LF.

    while ( BufferLength != 0 )
    {
        CurrentChar = *pBuffer;

        // must check for WS [ \t\r\n] first, since \t, \r, & \n are CTL chars!
        if ( IS_HTTP_WS_TOKEN(CurrentChar) )
        {
            break;
        }

        if ( IS_HTTP_CTL(CurrentChar) )
        {
            *pRawUrlClean = FALSE;
            *ppTokenStart = NULL;

            UlTraceError(PARSER, (
                    "http!HttpFindUrlToken: "
                    "Found control char: %02X\n",
                    CurrentChar
                    ));

            RETURN(STATUS_INVALID_DEVICE_REQUEST);
        }

        //
        // URL is NOT clean if it contains any of the following patterns
        //
        // a. back slash                                        "\"
        // b. dot, forward slash | forward slash, forward slash "./" | "//"
        // c. forward slash, dot | dot, dot                     "/." | ".."
        // d. question mark (querystring)                       "?"
        // e. percent (hex escape)                              "%"
        // f. raw bytes with high bit set, >= 0x80
        //
        // These are conservative estimates of "Clean"; some clean URLs may not
        // be marked as clean. For such URLs, we'll skip the fast path but at
        // no loss of functionality.
        //

        if ( IS_URL_DIRTY(CurrentChar) )
        {
            // Only do the checks if it's still clean
            if (*pRawUrlClean)
            {
                if (CurrentChar == FORWARD_SLASH || CurrentChar == DOT)
                {
                    if (PreviousChar == FORWARD_SLASH || PreviousChar == DOT)
                    {
                        *pRawUrlClean = FALSE;
                    }
                }
                else
                {
                    *pRawUrlClean = FALSE;
                }
            }

            if (CurrentChar == FORWARD_SLASH)
            {
                ULONG SegmentLength = DIFF(pBuffer - pSegment);

                // If the segment contains %-hex-escaped chars, it may become
                // acceptably short after PopChar() processing. Let
                // HttppCleanAndCopyUrlByType() figure it out.

                if (SegmentLength > pCfg->UrlSegmentMaxLength)
                    *pRawUrlClean = FALSE;

                pSegment = pBuffer;

                // If this is an AbsURI, instead of an AbsPath, the
                // segment count will be higher, because of the two slashes
                // before the hostname. Also, "/../", "/./", and "//"
                // minimization will reduce the final count of segments.
                // Again, let HttppCleanAndCopyUrlByType() figure it out. 

                if (++SegmentCount > pCfg->UrlSegmentMaxCount)
                    *pRawUrlClean = FALSE;
            }
        }

        PreviousChar = CurrentChar;
        pBuffer++;
        BufferLength--;
    }

    // See why we stopped.
    if (0 == BufferLength)
    {
        *pRawUrlClean = FALSE;

        // Ran out of buffer before end of token.
        return STATUS_SUCCESS;
    }

    ASSERT(IS_HTTP_WS_TOKEN(*pBuffer));

    TokenLength = DIFF(pBuffer - pTokenStart);

    if (0 == TokenLength)
    {
        UlTraceError(PARSER, ("http!HttpFindUrlToken: Found empty token\n"));

        RETURN(STATUS_INVALID_DEVICE_REQUEST);
    }

    // Check the final segment
    if (DIFF(pBuffer - pSegment) > pCfg->UrlSegmentMaxLength)
        *pRawUrlClean = FALSE;

    if (++SegmentCount > pCfg->UrlSegmentMaxCount)
        *pRawUrlClean = FALSE;

    if (TokenLength > pCfg->UrlMaxLength)
        *pRawUrlClean = FALSE;

    // Success! Set the token length and return the start of the token.
    *pTokenLength = TokenLength;
    *ppTokenStart = (PUCHAR) pTokenStart;

    return STATUS_SUCCESS;

}   // HttpFindUrlToken


/*++

Routine Description:

    Parse an IPv6 address from a Unicode buffer. Must be delimited by [].
    May contain a scope ID.

Arguments:

    pBuffer         - Buffer to parse. Must point to '['.
    BufferLength    - Length of data pointed to by pBuffer.
    ScopeIdAllowed  - if TRUE, an optional scope ID may be present
    pSockAddr6      - Where to return the parsed IPv6 address
    ppEnd           - On success, points to character after ']'

Return Value:

    STATUS_SUCCESS if no parsing errors in the IPv6 address.

--*/
NTSTATUS
HttppParseIPv6Address(
    IN  PCWSTR          pBuffer,
    IN  ULONG           BufferLength,
    IN  BOOLEAN         ScopeIdAllowed,
    OUT PSOCKADDR_IN6   pSockAddr6,
    OUT PCWSTR*         ppEnd
    )
{
    NTSTATUS    Status;
    PCWSTR      pEnd = pBuffer + BufferLength;
    PCWSTR      pChar;
    PWSTR       pTerminator;
    ULONG       ScopeTemp;

    ASSERT(NULL != pBuffer);
    ASSERT(0 < BufferLength);
    ASSERT(NULL != pSockAddr6);
    ASSERT(NULL != ppEnd);

    RtlZeroMemory(pSockAddr6, sizeof(*pSockAddr6));
    *ppEnd = NULL;

    pSockAddr6->sin6_family = TDI_ADDRESS_TYPE_IP6;

    // Caller guarantees this
    ASSERT(L'[' == *pBuffer);

    // Empty brackets?
    if (BufferLength < WCSLEN_LIT(L"[0]")  ||  L']' == pBuffer[1])
    {
        UlTraceError(PARSER,
                    ("http!HttppParseIPv6Address: IPv6 address too short\n"
                     ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    for (pChar = pBuffer + WCSLEN_LIT(L"[");  pChar < pEnd;  ++pChar)
    {
        if (IS_ASCII(*pChar))
        {
            if (L']' == *pChar  ||  L'%' == *pChar)
                break;

            // Dots are allowed because the last 32 bits may be represented
            // in IPv4 dotted-octet notation
            if (IS_HTTP_HEX(*pChar)  ||  L':' == *pChar  ||  L'.' == *pChar)
                continue;
        }

        UlTraceError(PARSER,
                    ("http!HttppParseIPv6Address: "
                    "Invalid char in IPv6 address, U+%04X '%c', "
                    "after %lu chars, '%.*ls'\n",
                    *pChar,
                    IS_ANSI(*pChar) && IS_HTTP_PRINT(*pChar) ? *pChar : '?',
                    DIFF(pChar - pBuffer),
                    DIFF(pChar - pBuffer),
                    pBuffer
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    if (pChar == pEnd)
    {
        UlTraceError(PARSER,
                    ("http!HttppParseIPv6Address: No ']' for IPv6 address\n"
                     ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    ASSERT(pChar < pEnd);
    ASSERT(L']' == *pChar  ||  L'%' == *pChar);

    // Let the RTL routine do the hard work of parsing IPv6 addrs
    Status = RtlIpv6StringToAddressW(
                pBuffer + WCSLEN_LIT(L"["),
                &pTerminator,
                &pSockAddr6->sin6_addr
                );

    if (! NT_SUCCESS(Status))
    {
        UlTraceError(PARSER,
                    ("http!HttppParseIPv6Address: "
                    "Invalid IPv6 address, %s\n",
                    HttpStatusToString(Status)
                    ));

        RETURN(Status);
    }

    if (pTerminator != pChar)
    {
        UlTraceError(PARSER,
                    ("http!HttppParseIPv6Address: "
                    "Invalid IPv6 terminator, U+%04X, '%c'\n",
                    *pTerminator,
                    IS_ANSI(*pTerminator) && IS_HTTP_PRINT(*pTerminator)
                        ? *pTerminator
                        : '?'
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    // Is a scopeid present?

    if (L'%' != *pChar)
    {
        ASSERT(L']' == *pChar);
        pSockAddr6->sin6_scope_id = 0;
    }
    else
    {
        PCWSTR pScopeEnd;

        // Skip the '%' denoting a scope ID
        pChar += WCSLEN_LIT(L"%");

        if (!ScopeIdAllowed)
        {
            UlTraceError(PARSER,
                        ("http!HttppParseIPv6Address: No scope ID allowed\n"
                        ));

            RETURN(STATUS_INVALID_PARAMETER);
        }

        if (pChar == pEnd)
        {
            UlTraceError(PARSER,
                        ("http!HttppParseIPv6Address: "
                         "No IPv6 scope ID after '%%'\n"
                        ));

            RETURN(STATUS_INVALID_PARAMETER);
        }

        pScopeEnd = pChar;

        do
        {
            if (*pScopeEnd < L'0'  ||  *pScopeEnd > L'9')
            {
                UlTraceError(PARSER,
                            ("http!HttppParseIPv6Address: "
                            "Invalid digit in IPv6 scope ID, "
                            "U+%04X, '%c'\n",
                            *pScopeEnd,
                            IS_ANSI(*pScopeEnd) && IS_HTTP_PRINT(*pScopeEnd)
                                ? *pScopeEnd
                                : '?'
                            ));

                RETURN(STATUS_INVALID_PARAMETER);
            }
        } while (++pScopeEnd < pEnd  &&  L']' != *pScopeEnd);

        ASSERT(pScopeEnd > pChar);

        if (pScopeEnd == pEnd)
        {
            UlTraceError(PARSER,
                        ("http!HttppParseIPv6Address: "
                         "No ']' after IPv6 scope ID\n"
                        ));

            RETURN(STATUS_INVALID_PARAMETER);
        }

        ASSERT(L']' == *pScopeEnd);

        Status = HttpWideStringToULong(
                    pChar,
                    pScopeEnd - pChar,
                    FALSE,          // no leading zeros permitted
                    10,
                    &pTerminator,
                    &ScopeTemp
                    );

        if (!NT_SUCCESS(Status))
        {
            UlTraceError(PARSER,
                        ("http!HttppParseIPv6Address: "
                        "Invalid scopeID, %s\n",
                        HttpStatusToString(Status)
                        ));

            RETURN(STATUS_INVALID_PARAMETER);
        }

        // Scope ID does not get swapped to Network Byte Order
        *(UNALIGNED64 ULONG *)&pSockAddr6->sin6_scope_id = 
            ScopeTemp;

        ASSERT(pTerminator == pScopeEnd);

        pChar = pScopeEnd;

    } // '%' handling

    ASSERT(pChar < pEnd);
    ASSERT(L']' == *pChar);

    // Skip the terminating ']'
    pChar += WCSLEN_LIT(L"]");

    *ppEnd = pChar;

    RETURN(STATUS_SUCCESS);

} // HttppParseIPv6Address


/*++

Routine Description:

    Print an IPv4 or IPv6 address as Unicode.

Arguments:

    pSockAddr       - The IP address to print
    pBuffer         - Buffer to print to. Assumed to be large enough.

Return Value:

    Number of wide chars printed (the length)

--*/

ULONG
HttppPrintIpAddressW(
    IN  PSOCKADDR           pSockAddr,
    OUT PWSTR               pBuffer
    )
{
    PWSTR pResult = pBuffer;

    HTTP_FILL_BUFFER(pBuffer, MAX_IP_ADDR_PLUS_BRACKETS_STRING_LEN);

    if (TDI_ADDRESS_TYPE_IP == pSockAddr->sa_family)
    {
        PSOCKADDR_IN pAddr4 = (PSOCKADDR_IN) pSockAddr;

        pResult = RtlIpv4AddressToStringW(&pAddr4->sin_addr, pResult);
    }
    else if (TDI_ADDRESS_TYPE_IP6 == pSockAddr->sa_family)
    {
        PSOCKADDR_IN6 pAddr6 = (PSOCKADDR_IN6) pSockAddr;

        *pResult++ = L'[';
        pResult = RtlIpv6AddressToStringW(&pAddr6->sin6_addr, pResult);
        // CODEWORK: Handle scope ID
        *pResult++ = L']';
    }
    else
    {
        UlTraceError(PARSER,
                     ("http!HttppPrintIpAddressW(): invalid sa_family, %hd\n",
                      pSockAddr->sa_family
                     ));

        ASSERT(! "Invalid SockAddr Family");
    }

    *pResult = UNICODE_NULL;

    return DIFF(pResult - pBuffer);

} // HttppPrintIpAddressW


/***************************************************************************++

Routine Description:
    This checks to see if the URL is well-formed.
    A well-formed URL has a scheme ("http" or "https"),
    a valid hostname (including + and * wildcards, IPv4, and IPv6 literals),
    a port, and a well-formed abspath.

 *  Must check that the URL is well-formed and in canonical form; e.g.,
        - Disallow /../ and /./
        - Disallow invalid characters, including invalid Unicode surrogate
          pairs. The URL is already in Unicode, so it's not a question of
          using the IS_URL_TOKEN() macro.


Arguments:
    pCfg                - configuration parameters
    pUrl                - Unicode string containing URL (not assumed to be
                            zero-terminated)
    UrlLength           - length of pUrl, in WCHARs
    TrailingSlashReqd   - if TRUE, pUrl must end in '/'
    ForceRoutingIP      - if TRUE and the hostname is an IPv4 or IPv6 literal,
                            pParsedUrl->Normalized will be cleared, to force
                            HttpNormalizeParsedUrl() to rewrite the URL as
                            http://IP:port:IP/path
    pParsedUrl          - on successful exit, the components of the URL

Return Value:

    NTSTATUS

--***************************************************************************/

NTSTATUS
HttpParseUrl(
    IN  PURL_C14N_CONFIG    pCfg,
    IN  PCWSTR              pUrl,
    IN  ULONG               UrlLength,
    IN  BOOLEAN             TrailingSlashReqd,
    IN  BOOLEAN             ForceRoutingIP,
    OUT PHTTP_PARSED_URL    pParsedUrl
    )
{
    NTSTATUS            Status;
    ULONG               PreviousChar;
    ULONG               UnicodeChar;
    PCWSTR              pEnd = pUrl + UrlLength;
    PCWSTR              pHostname;
    PCWSTR              pChar;
    PCWSTR              pLabel;
    PCWSTR              pSlash;
    PCWSTR              pSegment;
    PWSTR               pTerminator;
    BOOLEAN             AlphaLabel;
    BOOLEAN             TestSegment;
    BOOLEAN             MoreChars;
    BOOLEAN             LastCharHack;
    ULONG               SegmentCount;
    URL_STATE           UrlState;
    URL_STATE_TOKEN     UrlToken;
    URL_ACTION          Action;
    WCHAR               IpAddr[MAX_IP_ADDR_PLUS_BRACKETS_STRING_LEN];
    ULONG               Length;

    //
    // Sanity check.
    //

    PAGED_CODE();

    ASSERT(NULL != pCfg);
    ASSERT(NULL != pUrl);
    ASSERT(0 < UrlLength  &&  UrlLength <= UNICODE_STRING_MAX_WCHAR_LEN);
    ASSERT(FALSE == TrailingSlashReqd  ||  TRUE == TrailingSlashReqd);
    ASSERT(FALSE == ForceRoutingIP  ||  TRUE == ForceRoutingIP);
    ASSERT(NULL != pParsedUrl);

    RtlZeroMemory(pParsedUrl, sizeof(*pParsedUrl));

    pParsedUrl->Signature           = HTTP_PARSED_URL_SIGNATURE;
    pParsedUrl->pFullUrl            = (PWSTR) pUrl;
    pParsedUrl->UrlLength           = (USHORT) UrlLength;
    pParsedUrl->Normalized          = TRUE;
    pParsedUrl->TrailingSlashReqd   = TrailingSlashReqd;

    // This is the shortest possible valid URL

    if (UrlLength < WCSLEN_LIT(L"http://*:1/"))
    {
        UlTraceError(PARSER,
                     ("http!HttpParseUrl: Url too short, %lu, %.*ls\n",
                      UrlLength, UrlLength, pUrl
                     ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    // Check the scheme

    if (0 == wcsncmp(pUrl, L"http://",  WCSLEN_LIT(L"http://")))
    {
        pParsedUrl->Secure = FALSE;
        pHostname = pUrl + WCSLEN_LIT(L"http://");
    }
    else if (0 == wcsncmp(pUrl, L"https://", WCSLEN_LIT(L"https://")))
    {
        pParsedUrl->Secure = TRUE;
        pHostname = pUrl + WCSLEN_LIT(L"https://");
    }
    else
    {
        UlTraceError(PARSER,
                     ("http!HttpParseUrl: invalid scheme, %.*ls\n",
                      UrlLength, pUrl
                     ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    pParsedUrl->pHostname = (PWSTR) pHostname;

    // Is a trailing slash present, if required?

    if (TrailingSlashReqd  &&  L'/' != pUrl[UrlLength - 1])
    {
        // No, then the URL will have to be rewritten
        pParsedUrl->Normalized = FALSE;
    }

    //
    // The hostname validation code below looks a lot like that in
    // HttpValidateHostname(). However, it is sufficiently different
    // (WCHAR vs. UCHAR, Host+IP, Scope IDs, compulsory ports, etc) that
    // it is not easy to combine them into one routine. If the hostname
    // validation code is changed here, it may be necessary to change it
    // in HttpValidateHostname() too, or vice versa.
    //

    // Check for weak (http://*:port/) and strong (http://+:port/) wildcards

    if (L'*' == *pHostname  ||  L'+' == *pHostname)
    {
        pParsedUrl->SiteType = (L'*' == *pHostname)
                                    ? HttpUrlSite_WeakWildcard
                                    : HttpUrlSite_StrongWildcard;

        pChar = pHostname + WCSLEN_LIT(L"*");

        ASSERT(pChar < pEnd);

        // The wildcard must be followed by ":port"
        if (L':' == *pChar)
            goto port;

        UlTraceError(PARSER,
                    ("http!HttpParseUrl: No port in '%c' wildcard address\n",
                     *pHostname
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    // Is this an IPv6 literal address, per RFC 2732?

    if (L'[' == *pHostname)
    {
        pParsedUrl->SiteType = HttpUrlSite_IP;

        Status = HttppParseIPv6Address(
                        pHostname,
                        DIFF(pEnd - pHostname),
                        TRUE,      // scope ID allowed
                        &pParsedUrl->SockAddr6,
                        &pChar);

        if (!NT_SUCCESS(Status))
        {
            UlTraceError(PARSER,
                        ("http!HttpParseUrl: "
                        "Invalid IPv6 address, %s\n",
                        HttpStatusToString(Status)
                        ));

            RETURN(Status);
        }

        ASSERT(TDI_ADDRESS_TYPE_IP6 == pParsedUrl->SockAddr.sa_family);
        ASSERT(pChar > pHostname);

        // There must be a port
        if (pChar == pEnd  ||  L':' != *pChar)
        {
            UlTraceError(PARSER,
                        ("http!HttpParseUrl: No port after IPv6 address\n"
                        ));

            RETURN(STATUS_INVALID_PARAMETER);
        }

        //
        // There are so many legitimate ways to write an IPv6 literal
        // that we can't assume that a valid IPv6 literal is normalized.
        // Since we do string comparisons, we'll have to rewrite the URL
        // if the Normalized flag is not set.
        //

        Length = HttppPrintIpAddressW(&pParsedUrl->SockAddr, IpAddr);

        if (Length != DIFF_USHORT(pChar - pHostname)
                || 0 != _wcsnicmp(pHostname, IpAddr, Length))
        {
            pParsedUrl->Normalized = FALSE;
        }

        goto port;

    } // IPv6

    //
    // It must be a domain name or an IPv4 literal. We'll try to treat
    // it as a domain name first. If the labels turn out to be all-numeric,
    // we'll try decoding it as an IPv4 literal.
    //

    AlphaLabel = FALSE;
    pLabel     = pHostname;

    for (pChar = pHostname;  pChar < pEnd;  ++pChar)
    {
        if (L':' == *pChar)
        {
            if (pChar == pHostname)
            {
                UlTraceError(PARSER,
                            ("http!HttpParseUrl: empty hostname\n"
                             ));

                RETURN(STATUS_INVALID_PARAMETER);
            }

            // Have we seen any non-digits?
            if (AlphaLabel)
            {
                ASSERT(0 == pParsedUrl->SockAddr.sa_family);
                pParsedUrl->SiteType = HttpUrlSite_Name;
                goto port;
            }

            pParsedUrl->SiteType = HttpUrlSite_IP;
            pParsedUrl->SockAddr4.sin_family = TDI_ADDRESS_TYPE_IP;
            ASSERT(TDI_ADDRESS_TYPE_IP == pParsedUrl->SockAddr.sa_family);

            // Let's see if it's a valid IPv4 address
            Status = RtlIpv4StringToAddressW(
                        pHostname,
                        TRUE,           // strict => 4 dotted decimal octets
                        &pTerminator,
                        &pParsedUrl->SockAddr4.sin_addr
                        );

            if (!NT_SUCCESS(Status))
            {
                UlTraceError(PARSER,
                            ("http!HttpParseUrl: "
                            "Invalid IPv4 address, %s\n",
                            HttpStatusToString(Status)
                            ));

                RETURN(Status);
            }

            if (pTerminator != pChar)
            {
                ASSERT(pTerminator < pChar);

                UlTraceError(PARSER,
                            ("http!HttpParseUrl: "
                            "Invalid IPv4 address after %lu chars, "
                            "U+%04X, '%c'\n",
                            DIFF(pTerminator - pHostname),
                            *pTerminator,
                            IS_ANSI(*pTerminator) && IS_HTTP_PRINT(*pTerminator)
                                ? *pTerminator
                                : '?'
                            ));

                RETURN(STATUS_INVALID_PARAMETER);
            }

            Length = HttppPrintIpAddressW(&pParsedUrl->SockAddr, IpAddr);

            if (Length != DIFF_USHORT(pChar - pHostname)
                    || 0 != _wcsnicmp(pHostname, IpAddr, Length))
            {
                pParsedUrl->Normalized = FALSE;
            }

            goto port;

        } // ':' handling

        if (L'.' == *pChar)
        {
            ULONG LabelLength = DIFF(pChar - pLabel);

            // There must be at least one char in the label
            if (0 == LabelLength)
            {
                UlTraceError(PARSER,
                            ("http!HttpParseUrl: empty label\n"
                             ));

                RETURN(STATUS_INVALID_PARAMETER);
            }

            // Label can't have more than 63 chars
            if (LabelLength > pCfg->MaxLabelLength)
            {
                UlTraceError(PARSER,
                            ("http!HttpParseUrl: overlong label, %lu\n",
                             LabelLength
                             ));

                RETURN(STATUS_INVALID_PARAMETER);
            }

            // Reset for the next label
            pLabel = pChar + WCSLEN_LIT(L".");

            continue;
        }

        //
        // All chars above 0xFF are considered valid
        //

        if (!IS_ANSI(*pChar)  ||  !IS_URL_ILLEGAL_COMPUTERNAME(*pChar))
        {
            if (!IS_ANSI(*pChar)  ||  !IS_HTTP_DIGIT(*pChar))
                AlphaLabel = TRUE;

            if (pChar > pLabel)
                continue;

            // The first char of a label cannot be a hyphen. (Underscore?)
            if (L'-' == *pChar)
            {
                UlTraceError(PARSER,
                            ("http!HttpParseUrl: '-' at beginning of label\n"
                            ));

                RETURN(STATUS_INVALID_PARAMETER);
            }

            continue;
        }

        UlTraceError(PARSER,
                    ("http!HttpParseUrl: "
                     "Invalid char in hostname, U+%04X '%c',"
                     " after %lu chars, '%.*s'\n",
                     *pChar,
                     IS_ANSI(*pChar) && IS_HTTP_PRINT(*pChar) ? *pChar : '?',
                     DIFF(pChar - pHostname),
                     DIFF(pChar - pHostname),
                     pHostname
                    ));

        RETURN(STATUS_INVALID_PARAMETER);

    } // hostname


    //
    // If we got here, we fell off the end of the buffer,
    // without finding a ':' for the port
    //

    ASSERT(pChar == pEnd);

    UlTraceError(PARSER, ("http!HttpParseUrl: No port\n"));

    RETURN(STATUS_INVALID_PARAMETER);


port:

    //
    // Parse the port number
    //

    ASSERT(pHostname < pChar  &&  pChar < pEnd);
    ASSERT(L':' == *pChar);

    pParsedUrl->HostnameLength  = DIFF_USHORT(pChar - pHostname);

    // First, check for overlong hostnames
    if (pParsedUrl->HostnameLength > pCfg->MaxHostnameLength)
    {
        UlTraceError(PARSER,
                    ("http!HttpParseUrl: overlong hostname, %hu\n",
                     pParsedUrl->HostnameLength
                     ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    // Skip the ':' denoting a port number
    pChar += WCSLEN_LIT(L":");

    if (pChar == pEnd)
    {
        UlTraceError(PARSER,
                    ("http!HttpParseUrl: No port after ':'\n"
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    // Search for the '/' or second ':' that terminates the port number

    pSlash = pChar;
    pParsedUrl->pPort = (PWSTR) pSlash;

    do
    {
        if (*pSlash < L'0'  ||  *pSlash > L'9')
        {
            UlTraceError(PARSER,
                        ("http!HttpParseUrl: "
                        "Invalid digit in port, U+%04X, '%c'\n",
                        *pSlash,
                        IS_ANSI(*pSlash) && IS_HTTP_PRINT(*pSlash)
                            ? *pSlash
                            : '?'
                        ));

            RETURN(STATUS_INVALID_PARAMETER);
        }
    } while (++pSlash < pEnd  &&  L'/' != *pSlash  &&  L':' != *pSlash);

    ASSERT(pSlash > pChar);

    pParsedUrl->PortLength = DIFF_USHORT(pSlash - pChar);

    if (pSlash == pEnd)
    {
        UlTraceError(PARSER,
                    ("http!HttpParseUrl: No '/' (or second ':') after port\n"
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    ASSERT(L'/' == *pSlash  ||  L':' == *pSlash);

    Status = HttpWideStringToUShort(
                pChar,
                pParsedUrl->PortLength,
                FALSE,          // no leading zeros permitted
                10,
                &pTerminator,
                &pParsedUrl->PortNumber
                );

    if (!NT_SUCCESS(Status))
    {
        UlTraceError(PARSER,
                    ("http!HttpParseUrl: "
                    "Invalid port number, %s\n",
                    HttpStatusToString(Status)
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    if (0 == pParsedUrl->PortNumber)
    {
        UlTraceError(PARSER,
                    ("http!HttpParseUrl: Port must not be zero.\n"
                     ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    ASSERT(pTerminator == pSlash);

    pChar = pSlash;

    goto routing_IP;    // so /W4 won't complain about an unreferenced label


routing_IP:

    //
    // Is this a Host+IP site; i.e., is there a Routing IP address
    // after the port number?
    //

    if (L'/' == *pChar)
    {
        pParsedUrl->pRoutingIP      = NULL;
        pParsedUrl->RoutingIPLength = 0;
        ASSERT(0 == pParsedUrl->RoutingAddr.sa_family);

        //
        // If the hostname is an IP literal, but there is no routing IP
        // (i.e., http://IP:port/path), we must rewrite the URL as
        // http://IP:port:IP/path; i.e., explicitly use the hostname IP
        // as the routing IP.
        //

        if (ForceRoutingIP  &&  0 != pParsedUrl->SockAddr.sa_family)
        {
            ASSERT(TDI_ADDRESS_TYPE_IP == pParsedUrl->SockAddr.sa_family
                    ||  TDI_ADDRESS_TYPE_IP6 == pParsedUrl->SockAddr.sa_family);

            pParsedUrl->Normalized = FALSE;
        }

        goto parse_path;
    }

    ASSERT(L':' == *pChar);

    if (HttpUrlSite_WeakWildcard == pParsedUrl->SiteType
        ||  HttpUrlSite_StrongWildcard == pParsedUrl->SiteType)
    {
        UlTraceError(PARSER,
                    ("http!HttpParseUrl: "
                     "Can't have Routing IPs on Wildcard sites\n"
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    pChar += WCSLEN_LIT(L":");

    if (pChar == pEnd)
    {
        UlTraceError(PARSER,
                    ("http!HttpParseUrl: No IP address after second ':'\n"
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    pParsedUrl->pRoutingIP = (PWSTR) pChar;

    ASSERT(HttpUrlSite_NamePlusIP != pParsedUrl->SiteType);
 
    if (HttpUrlSite_Name == pParsedUrl->SiteType)
    {
        pParsedUrl->SiteType = HttpUrlSite_NamePlusIP;
    }

    //
    // Is the Routing IP an IPv6 literal?
    //

    if (L'[' == *pChar)
    {
        if (TDI_ADDRESS_TYPE_IP == pParsedUrl->SockAddr.sa_family)
        {
            UlTraceError(PARSER,
                       ("http!HttpParseUrl: "
                        "Can't have http://IPv4:port:[IPv6]\n"
                        ));

            RETURN(STATUS_INVALID_PARAMETER);
        }

        ASSERT(TDI_ADDRESS_TYPE_IP6 == pParsedUrl->SockAddr.sa_family
                ||  0 == pParsedUrl->SockAddr.sa_family);

        Status = HttppParseIPv6Address(
                        pChar,
                        DIFF(pEnd - pChar),
                        TRUE,      // scope ID allowed
                        &pParsedUrl->RoutingAddr6,
                        &pSlash);

        if (!NT_SUCCESS(Status))
        {
            UlTraceError(PARSER,
                        ("http!HttpParseUrl: "
                        "Invalid Host+IPv6 address, %s\n",
                        HttpStatusToString(Status)
                        ));

            RETURN(Status);
        }

        ASSERT(TDI_ADDRESS_TYPE_IP6 == pParsedUrl->RoutingAddr.sa_family);
        ASSERT(pSlash > pChar);

        // There must be a slash
        if (pSlash == pEnd  ||  L'/' != *pSlash)
        {
            UlTraceError(PARSER,
                        ("http!HttpParseUrl: '/' expected after Host+IPv6.\n"
                        ));

            RETURN(STATUS_INVALID_PARAMETER);
        }

        // CODEWORK: Should we care if RoutingAddr6 != SockAddr6?

        pParsedUrl->RoutingIPLength = DIFF_USHORT(pSlash - pChar);

        Length = HttppPrintIpAddressW(&pParsedUrl->RoutingAddr, IpAddr);

        if (Length != pParsedUrl->RoutingIPLength
                || 0 != _wcsnicmp(pChar, IpAddr, Length))
        {
            pParsedUrl->Normalized = FALSE;
        }

        pChar = pSlash;

        goto parse_path;
    }


    //
    // No, then it must be an IPv4 literal
    //

    if (TDI_ADDRESS_TYPE_IP6 == pParsedUrl->SockAddr.sa_family)
    {
        UlTraceError(PARSER,
                    ("http!HttpParseUrl: Can't have http://[IPv6]:port:IPv4\n"
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    ASSERT(TDI_ADDRESS_TYPE_IP == pParsedUrl->SockAddr.sa_family
            ||  0 == pParsedUrl->SockAddr.sa_family);

    // Search for the terminating '/'

    pSlash = pChar;

    do
    {
        if ((L'0' <= *pSlash  &&  *pSlash <= L'9')  ||  L'.' == *pSlash)
            continue;

        UlTraceError(PARSER,
                    ("http!HttpParseUrl: "
                    "Invalid character in Host+IPv4, U+%04X, '%c'\n",
                    *pSlash,
                    IS_ANSI(*pSlash) && IS_HTTP_PRINT(*pSlash)
                        ? *pSlash
                        : '?'
                    ));

        RETURN(STATUS_INVALID_PARAMETER);

    } while (++pSlash < pEnd  &&  L'/' != *pSlash);

    ASSERT(pSlash > pChar);

    if (pSlash == pEnd)
    {
        UlTraceError(PARSER,
                    ("http!HttpParseUrl: No '/' after Host+IPv4\n"
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    ASSERT(L'/' == *pSlash);

    Status = RtlIpv4StringToAddressW(
                pChar,
                TRUE,           // strict => 4 dotted decimal octets
                &pTerminator,
                &pParsedUrl->RoutingAddr4.sin_addr
                );

    if (!NT_SUCCESS(Status))
    {
        UlTraceError(PARSER,
                    ("http!HttpParseUrl: "
                    "Invalid Host+IPv4 address, %s\n",
                    HttpStatusToString(Status)
                    ));

        RETURN(Status);
    }

    if (pTerminator != pSlash)
    {
        ASSERT(pTerminator < pSlash);

        UlTraceError(PARSER,
                    ("http!HttpParseUrl: "
                    "Invalid Host+IPv4 address after %lu chars, "
                    "U+%04X, '%c'\n",
                    DIFF(pTerminator - pChar),
                    *pTerminator,
                    IS_ANSI(*pTerminator)  &&  IS_HTTP_PRINT(*pTerminator)
                        ? *pTerminator
                        : '?'
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    // CODEWORK: Should we care if RoutingAddr4 != SockAddr4

    pParsedUrl->RoutingIPLength         = DIFF_USHORT(pSlash - pChar);
    pParsedUrl->RoutingAddr4.sin_family = TDI_ADDRESS_TYPE_IP;

    Length = HttppPrintIpAddressW(&pParsedUrl->RoutingAddr, IpAddr);

    if (Length != pParsedUrl->RoutingIPLength
            || 0 != _wcsnicmp(pChar, IpAddr, Length))
    {
        pParsedUrl->Normalized = FALSE;
    }

    pChar = pSlash;


parse_path:

    //
    // Parse the abspath
    //

    ASSERT(pParsedUrl->pRoutingIP == NULL  ||  pParsedUrl->RoutingIPLength > 0);
    ASSERT(pHostname < pChar  &&  pChar < pEnd);
    ASSERT(L'/' == *pChar);

    pParsedUrl->pAbsPath        = (PWSTR) pChar;
    pParsedUrl->AbsPathLength   = DIFF_USHORT(pEnd - pChar);

    if (pParsedUrl->AbsPathLength > pCfg->UrlMaxLength)
    {
        UlTraceError(PARSER,
                    ("http!HttpParseUrl: "
                    "AbsPath is too long: %lu\n",
                    pParsedUrl->AbsPathLength
                    ));

        RETURN(STATUS_INVALID_PARAMETER);
    }

    UrlState        = URL_STATE_START;
    UrlToken        = URL_TOKEN_OTHER;
    Action          = ACTION_NOTHING;
    pSegment        = pChar;
    TestSegment     = FALSE;
    LastCharHack    = FALSE;
    MoreChars       = TRUE;
    PreviousChar    = UNICODE_NULL;
    UnicodeChar     = *pChar;
    SegmentCount    = 0;

    //
    // Loop through all the characters in pAbsPath, plus one or two
    // special ones at the end.
    //

    while (MoreChars)
    {
        switch (UnicodeChar)
        {
        case UNICODE_NULL:
            UrlToken = URL_TOKEN_EOS;
            TestSegment = TRUE;
            break;

        case DOT:
            UrlToken = URL_TOKEN_DOT;
            TestSegment = FALSE;
            break;

        case FORWARD_SLASH:
            UrlToken = URL_TOKEN_SLASH;
            TestSegment = TRUE;
            break;

        case PERCENT:           // no hex escapes
        case STAR:              // no wildcards
        case QUESTION_MARK:     // no wildcards or querystrings
        case BACK_SLASH:        // no C string escapes
            UlTraceError(PARSER,
                        ("http!HttpParseUrl: invalid '%c' char in path\n",
                         (UCHAR) UnicodeChar
                        ));
            RETURN(STATUS_INVALID_PARAMETER);

        default:
            UrlToken = URL_TOKEN_OTHER;
            TestSegment = FALSE;
            break;
        }

        UlTraceVerbose(PARSER,
                        ("http!HttpParseUrl: "
                         "[%lu] U+%04lX '%c' %p: [%s][%s] -> %s, %s\n",
                         DIFF(pChar - pParsedUrl->pAbsPath),
                         UnicodeChar,
                         IS_ANSI(UnicodeChar) && IS_HTTP_PRINT(UnicodeChar)
                            ? (UCHAR) UnicodeChar : '?',
                         pChar,
                         HttppUrlStateToString(UrlState),
                         HttppUrlTokenToString(UrlToken),
                         HttppUrlStateToString(
                             CanonStateFromStateAndToken[UrlState][UrlToken]),
                         TestSegment ? ", TestSegment" : ""
                        ));

        //
        // Reject control characters
        //

        if (!LastCharHack
                &&  !pCfg->AllowRestrictedChars
                &&  IS_ANSI(UnicodeChar)
                &&  IS_URL_INVALID(UnicodeChar))
        {
            UlTraceError(PARSER, (
                        "http!HttpParseUrl: "
                        "Invalid character, U+%04lX, in path.\n",
                        UnicodeChar
                        ));

            RETURN(STATUS_INVALID_PARAMETER);
        }

        //
        // Check that (high-surrogate, low-surrogate) come in pairs
        //

        if (HIGH_SURROGATE_START <= PreviousChar
                && PreviousChar <= HIGH_SURROGATE_END)
        {
            if (UnicodeChar < LOW_SURROGATE_START
                    ||  UnicodeChar > LOW_SURROGATE_END)
            {
                UlTraceError(PARSER, (
                            "http!HttpParseUrl: "
                            "Illegal surrogate pair, U+%04lX, U+%04lX.\n",
                            PreviousChar, UnicodeChar
                            ));

                RETURN(STATUS_INVALID_PARAMETER);
            }
        }
        else if (LOW_SURROGATE_START <= UnicodeChar
                    &&  UnicodeChar <= LOW_SURROGATE_END)
        {
            UlTraceError(PARSER, (
                        "http!HttpParseUrl: "
                        "Non-high surrogate, U+%04lX, "
                        "before low surrogate, U+%04lX.\n",
                        PreviousChar, UnicodeChar
                        ));

            RETURN(STATUS_INVALID_PARAMETER);
        }

        if (URL_STATE_ERROR == CanonStateFromStateAndToken[UrlState][UrlToken])
        {
            UlTraceError(PARSER, (
                        "http!HttpParseUrl: "
                        "Error state from %s,%s in path, after U+%04lX.\n",
                         HttppUrlStateToString(UrlState),
                         HttppUrlTokenToString(UrlToken),
                        UnicodeChar
                        ));

            RETURN(STATUS_INVALID_PARAMETER);
        }

        UrlState = CanonStateFromStateAndToken[UrlState][UrlToken];

        //
        // Check segment limits
        //

        if (TestSegment)
        {
            ULONG SegmentLength = DIFF(pChar - pSegment);

            // The CanonStateFromStateAndToken checks should prevent
            // empty segments, among other things
            ASSERT(SegmentLength > 0  ||  pChar == pSegment);

            // Reject if segment too long
            if (SegmentLength > pCfg->UrlSegmentMaxLength + WCSLEN_LIT(L"/"))
            {
                UlTraceError(PARSER, (
                            "http!HttpParseUrl(): "
                            "Segment too long: %lu\n",
                            SegmentLength
                            ));

                RETURN(STATUS_INVALID_PARAMETER);
            }

            pSegment = pChar;

            // Reject if too many path segments
            if (++SegmentCount > pCfg->UrlSegmentMaxCount)
            {
                UlTraceError(PARSER, (
                            "http!HttpParseUrl(): "
                            "Too many segments: %lu\n",
                            SegmentCount
                            ));

                RETURN(STATUS_INVALID_PARAMETER);
            }
        }

        //
        // Are there any more path characters?
        //

        PreviousChar = UnicodeChar;

        if (++pChar < pEnd)
        {
            UnicodeChar = *pChar;
        }
        else if (!LastCharHack)
        {
            // Want to make sure that the last segment is tested.
            // If there's no trailing slash, we'll enter here twice;
            // otherwise once

            if (TrailingSlashReqd  &&  FORWARD_SLASH != PreviousChar)
            {
                // First, fake a trailing slash, if needed
                UnicodeChar = FORWARD_SLASH;
            }
            else
            {
                // Second, always finish up with UNICODE_NULL
                UnicodeChar = UNICODE_NULL;
                LastCharHack = TRUE;
            }
        }
        else
        {
            // Terminate the loop
            MoreChars = FALSE;
        }

    } // while (MoreChars)

    RETURN(STATUS_SUCCESS);

} // HttpParseUrl


/***************************************************************************++

Routine Description:
    Some URLs parsed by HttpParseUrl() will not be considered normalized
    if they have IP literals, Routing IPs, or no trailing slash.
    This routine will build a fully normalized URL and (possibly) free the
    old one

Arguments:
    pParsedUrl          - On entry, points to a URL parsed by HttpParseUrl();
                            On successful exit, points to a normalized URL.
    pCfg                - configuration parameters
    ForceCopy           - if TRUE, will always make a new, normalized URL
    FreeOriginalUrl     - if FALSE, will never free the original URL.
                            The caller must manage the memory.
    ForceRoutingIP      - if TRUE and the hostname is an IPv4 or IPv6 literal,
                            the URL will be rewritten in the form
                            http://IP:port:IP/path
    PoolType            - PagedPool or NonPagedPool
    PoolTag             - Tag used to allocate pUrl

Return Value:

    NTSTATUS        - STATUS_SUCCESS or STATUS_NO_MEMORY

--***************************************************************************/
NTSTATUS
HttpNormalizeParsedUrl(
    IN OUT PHTTP_PARSED_URL pParsedUrl,
    IN     PURL_C14N_CONFIG pCfg,
    IN     BOOLEAN          ForceCopy,
    IN     BOOLEAN          FreeOriginalUrl,
    IN     BOOLEAN          ForceRoutingIP,
    IN     POOL_TYPE        PoolType,
    IN     ULONG            PoolTag
    )
{
    HTTP_PARSED_URL ParsedUrl   = *pParsedUrl;
    NTSTATUS        Status      = STATUS_SUCCESS;

    ASSERT(HTTP_PARSED_URL_SIGNATURE == ParsedUrl.Signature);

    if (ParsedUrl.Normalized  &&  !ForceCopy)
    {
        // nothing to do
    }
    else
    {
        PWSTR   pResult;
        WCHAR   HostAddrString[MAX_IP_ADDR_PLUS_BRACKETS_STRING_LEN];
        WCHAR   RoutingAddrString[MAX_IP_ADDR_PLUS_BRACKETS_STRING_LEN];
        ULONG   SchemeLength;
        ULONG   HostAddrLength;
        ULONG   HostnameLength;
        ULONG   RoutingAddrLength;
        ULONG   AbsPathLength;
        ULONG   Length;
        ULONG   TrailingSlashLength;
        PCWSTR  pUrl;

        pUrl = ParsedUrl.pFullUrl;

        SchemeLength = DIFF(ParsedUrl.pHostname - ParsedUrl.pFullUrl);

        // Calculate HostAddrLength and HostnameLength (mutually exclusive)
        if (0 != ParsedUrl.SockAddr.sa_family)
        {
            HostAddrLength = HttppPrintIpAddressW(
                                    &ParsedUrl.SockAddr,
                                    HostAddrString
                                    );
            HostnameLength = 0;
        }
        else
        {
            HostAddrLength = 0;
            HostAddrString[0] = UNICODE_NULL;
            HostnameLength = ParsedUrl.HostnameLength;
        }

        // Calculate RoutingAddrLength
        if (0 != ParsedUrl.RoutingAddr.sa_family)
        {
            RoutingAddrLength = WCSLEN_LIT(L":")
                                + HttppPrintIpAddressW(
                                        &ParsedUrl.RoutingAddr,
                                        RoutingAddrString
                                        );
        }
        else if (ForceRoutingIP  &&  0 != ParsedUrl.SockAddr.sa_family)
        {
            // We must rewrite http://IP:port/path as http://IP:port:IP/path
            RoutingAddrLength = WCSLEN_LIT(L":") + HostAddrLength;
            wcscpy(RoutingAddrString, HostAddrString);
        }
        else
        {
            RoutingAddrLength = 0;
            RoutingAddrString[0] = UNICODE_NULL;
        }

        AbsPathLength = ParsedUrl.AbsPathLength;

        ASSERT(AbsPathLength > 0);

        if (ParsedUrl.TrailingSlashReqd
                && FORWARD_SLASH != ParsedUrl.pAbsPath[AbsPathLength-1])
        {
            TrailingSlashLength = WCSLEN_LIT(L"/");
        }
        else
        {
            TrailingSlashLength = 0;
        }

        Length = SchemeLength
                    + HostAddrLength
                    + HostnameLength
                    + WCSLEN_LIT(L":") + ParsedUrl.PortLength
                    + RoutingAddrLength
                    + AbsPathLength
                    + TrailingSlashLength;

        pResult = (PWSTR) HTTPP_ALLOC(
                                PoolType,
                                (Length + 1) * sizeof(WCHAR),
                                PoolTag
                                );


        if (NULL == pResult)
        {
            Status = STATUS_NO_MEMORY;
            // Do not destroy the old URL. Let caller handle it.
        }
        else
        {
            PWSTR pDest = pResult;

#define WCSNCPY(pSrc, Length)                               \
    RtlCopyMemory(pDest, (pSrc), (Length) * sizeof(WCHAR)); \
    pDest += (Length)

#define WCSNCPY2(pField, Length)                            \
    WCSNCPY(ParsedUrl.pField, Length)

#define WCSNCPY_LIT(Lit)                                    \
    WCSNCPY(Lit, WCSLEN_LIT(Lit))

            WCSNCPY2(pFullUrl, SchemeLength);

            if (0 != HostnameLength)
            {
                ASSERT(0 == HostAddrLength);
                WCSNCPY2(pHostname, HostnameLength);
            }
            else
            {
                ASSERT(0 != HostAddrLength);
                WCSNCPY(HostAddrString, HostAddrLength);
            }

            WCSNCPY_LIT(L":");
            WCSNCPY2(pPort, ParsedUrl.PortLength);

            if (RoutingAddrLength > 0)
            {
                WCSNCPY_LIT(L":");
                WCSNCPY(
                    RoutingAddrString,
                    RoutingAddrLength - WCSLEN_LIT(L":")
                    );
            }

            WCSNCPY2(pAbsPath, AbsPathLength);

            if (TrailingSlashLength > 0)
            {
                WCSNCPY_LIT(L"/");
            }

            ASSERT(DIFF(pDest - pResult) == Length);

            *pDest = UNICODE_NULL;

            Status = HttpParseUrl(
                        pCfg,
                        pResult,
                        Length,
                        ParsedUrl.TrailingSlashReqd,
                        ForceRoutingIP,
                        &ParsedUrl
                        );

            ASSERT(STATUS_SUCCESS == Status);
            ASSERT(ParsedUrl.Normalized);

            if (FreeOriginalUrl)
                HTTPP_FREE((PVOID) pUrl, PoolTag);

            // Write the updated local copy back to the caller's HTTP_PARSED_URL
            *pParsedUrl = ParsedUrl;
        }
    }

    return Status;

} // HttpNormalizeParsedUrl