windows-xp/Source/XPSP1/NT/inetsrv/iis/iisrearc/ul/drv/parsep.h


								/*++


								Copyright (c) 1998-2001 Microsoft Corporation


								Module Name:


								    parsep.h


								Abstract:


								    Contains all of the kernel mode HTTP parsing code.


								Author:


								    Henry Sanders (henrysa)       04-May-1998


								Revision History:


								--*/


								#ifndef _PARSEP_H_

								#define _PARSEP_H_


								#ifdef __cplusplus

								extern "C" {

								#endif


								//

								// External variables.

								//


								extern PUSHORT NlsLeadByteInfo;


								//

								// Constants

								//


								#define MIN_VERSION_SIZE    (sizeof("HTTP/1.1") - 1)


								#define MAX_VERB_LENGTH     (sizeof("PROPPATCH"))


								#define HTTP_11_VERSION 0x312e312f50545448

								#define HTTP_10_VERSION 0x302e312f50545448


								#define UPCASE_MASK ((ULONGLONG)0xdfdfdfdfdfdfdfdf)


								#define MAX_HEADER_LONG_COUNT           (3)

								#define MAX_HEADER_LENGTH               (MAX_HEADER_LONG_COUNT * sizeof(ULONGLONG))


								#define NUMBER_HEADER_INDICIES          (26)


								#define NUMBER_HEADER_HINT_INDICIES     (8)


								//

								// Default Server: header if none provided by the application.

								//


								#define DEFAULT_SERVER_HDR          "Microsoft-IIS/6.0"

								#define DEFAULT_SERVER_HDR_LENGTH   (sizeof(DEFAULT_SERVER_HDR) - sizeof(CHAR))


								//

								// One second in 100ns system time units. Used for generating

								// Date: headers.

								//


								#define ONE_SECOND                  10000000


								//

								// Size of Connection: header values

								//


								#define CONN_CLOSE_HDR              "close"

								#define CONN_CLOSE_HDR_LENGTH       (sizeof(CONN_CLOSE_HDR) - sizeof(CHAR))


								#define CONN_KEEPALIVE_HDR          "keep-alive"

								#define CONN_KEEPALIVE_HDR_LENGTH   (sizeof(CONN_KEEPALIVE_HDR) - sizeof(CHAR))


								//

								// These are backwards because of little endian.

								//


								#define HTTP_PREFIX         'PTTH'

								#define HTTP_PREFIX_SIZE    4

								#define HTTP_PREFIX_MASK    0xdfdfdfdf


								#define HTTP_PREFIX1        '\0//:'

								#define HTTP_PREFIX1_SIZE   3

								#define HTTP_PREFIX1_MASK   0x00ffffff


								#define HTTP_PREFIX2        '//:S'

								#define HTTP_PREFIX2_SIZE   4

								#define HTTP_PREFIX2_MASK   0xffffffdf


								typedef NTSTATUS (*PFN_SERVER_HEADER_HANDLER)(

								                        PUL_INTERNAL_REQUEST    pRequest,

								                        PUCHAR                  pHttpRequest,

								                        ULONG                   HttpRequestLength,

								                        HTTP_HEADER_ID          HeaderID,

								                        ULONG *                 pBytesTaken

								                        );


								typedef NTSTATUS (*PFN_CLIENT_HEADER_HANDLER)(

								                    PHTTP_KNOWN_HEADER  pKnownHeaders,

								                    PUCHAR              *pOutBufferHead,

								                    PUCHAR              *pOutBufferTail,

								                    PULONG              BytesAvailable,

								                    PUCHAR              pHeader,

								                    ULONG               HeaderLength,

								                    HTTP_HEADER_ID      HeaderID,

								                    ULONG  *            pBytesTaken

								                    );


								//

								// Structure of the fast verb lookup table. The table consists of a series of

								// entries where each entry contains an HTTP verb represented as a ulonglong,

								// a mask to use for comparing that verb, the length of the verb and the

								// translated id.

								//


								typedef struct _FAST_VERB_ENTRY

								{

								    union

								    {

								        UCHAR       Char[sizeof(ULONGLONG)+1];

								        ULONGLONG   LongLong;

								    }           RawVerb;

								    ULONGLONG   RawVerbMask;

								    ULONG       RawVerbLength;

								    HTTP_VERB   TranslatedVerb;


								} FAST_VERB_ENTRY, *PFAST_VERB_ENTRY;


								//

								// Stucture of the all verb lookup table. This table holds all verbs that

								// we understand, including those that are too long to fit in the fast

								// verb table.

								//


								typedef struct _LONG_VERB_ENTRY

								{

								    ULONG       RawVerbLength;

								    UCHAR       RawVerb[MAX_VERB_LENGTH];

								    HTTP_VERB   TranslatedVerb;


								} LONG_VERB_ENTRY, *PLONG_VERB_ENTRY;


								//

								// Structure for a header map entry. Each header map entry contains a

								// verb and a series of masks to use in checking that verb.

								//


								typedef struct _HEADER_MAP_ENTRY

								{

								    ULONG                      HeaderLength;

								    ULONG                      ArrayCount;

								    ULONG                      MinBytesNeeded;

								    union

								    {

								        UCHAR                  HeaderChar[MAX_HEADER_LENGTH];

								        ULONGLONG              HeaderLong[MAX_HEADER_LONG_COUNT];

								    }                          Header;

								    ULONGLONG                  HeaderMask[MAX_HEADER_LONG_COUNT];

								    UCHAR                      MixedCaseHeader[MAX_HEADER_LENGTH];


								    HTTP_HEADER_ID             HeaderID;

								    BOOLEAN                    AutoGenerate;

								    PFN_SERVER_HEADER_HANDLER  pServerHandler;

								    PFN_CLIENT_HEADER_HANDLER  pClientHandler;

								    LONG                       HintIndex;


								}  HEADER_MAP_ENTRY, *PHEADER_MAP_ENTRY;


								//

								// Structure for a header index table entry.

								//


								typedef struct _HEADER_INDEX_ENTRY

								{

								    PHEADER_MAP_ENTRY   pHeaderMap;

								    ULONG               Count;


								} HEADER_INDEX_ENTRY, *PHEADER_INDEX_ENTRY;


								//

								// Structure for a header hint index table entry.

								//


								typedef struct _HEADER_HINT_INDEX_ENTRY

								{

								    PHEADER_MAP_ENTRY   pHeaderMap;

								    UCHAR               c;


								} HEADER_HINT_INDEX_ENTRY, *PHEADER_HINT_INDEX_ENTRY, **PPHEADER_HINT_INDEX_ENTRY;


								//

								// A (complex) macro to create a mask for a header map entry,

								// given the header length and the mask offset (in bytes). This

								// mask will need to be touched up for non-alphabetic characters.

								//


								#define CREATE_HEADER_MASK(hlength, maskoffset) \

								    ((hlength) > (maskoffset) ? UPCASE_MASK : \

								        (((maskoffset) - (hlength)) >= 8 ? 0 : \

								        (UPCASE_MASK >> ( ((maskoffset) - (hlength)) * (ULONGLONG)8))))


								//

								// Macro for creating header map entries. The mask entries are created

								// by the init code.

								//


								#define CREATE_HEADER_MAP_ENTRY(header, ID, auto, serverhandler, clienthandler, HintIndex)\

								{                                                        \

								                                                         \

								    sizeof(#header) - 1,                                 \

								    ((sizeof(#header) - 1) / 8) +                        \

								        (((sizeof(#header) - 1) % 8) == 0 ? 0 : 1),      \

								    (((sizeof(#header) - 1) / 8) +                       \

								        (((sizeof(#header) - 1) % 8) == 0 ? 0 : 1)) * 8, \

								    { #header },                                         \

								    { 0, 0, 0},                                          \

								    { #header },                                         \

								    ID,                                                  \

								    auto,                                                \

								    serverhandler,                                       \

								    clienthandler,                                       \

								    HintIndex                                            \

								    }


								//

								// Macro for defining fast verb table entries. Note that we don't subtrace 1

								// from the various sizeof occurences because we'd just have to add it back

								// in to account for the seperating space.

								//


								#define CREATE_FAST_VERB_ENTRY(verb)    { {#verb " "}, \

								                                                (0xffffffffffffffff >> \

								                                                ((8 - (sizeof(#verb))) * 8)), \

								                                                (sizeof(#verb)), HttpVerb##verb }


								//

								// Macro for defining all verb table entries.

								//


								#define CREATE_LONG_VERB_ENTRY(verb)    { sizeof(#verb) - 1, \

								                                             #verb,\

								                                             HttpVerb##verb }


								#define IS_UTF8_TRAILBYTE(ch)      (((ch) & 0xc0) == 0x80)


								NTSTATUS

								CheckForAbsoluteUrl(

								    IN  PUL_INTERNAL_REQUEST    pRequest,

								    IN  PUCHAR                  pURL,

								    IN  ULONG                   URLLength,

								    IN  PUCHAR *                pHostPtr,

								    IN  ULONG  *                BytesTaken

								    );


								NTSTATUS

								LookupVerb(

								    IN  PUL_INTERNAL_REQUEST    pRequest,

								    IN  PUCHAR                  pHttpRequest,

								    IN  ULONG                   HttpRequestLength,

								    OUT ULONG  *                pBytesTaken

								    );


								NTSTATUS

								UlParseHeaderWithHint(

								    IN  PUL_INTERNAL_REQUEST    pRequest,

								    IN  PUCHAR                  pHttpRequest,

								    IN  ULONG                   HttpRequestLength,

								    IN  PHEADER_MAP_ENTRY       pHeaderHintMap,

								    OUT ULONG  *                pBytesTaken

								    );


								NTSTATUS

								UlParseHeader(

								    IN  PUL_INTERNAL_REQUEST    pRequest,

								    IN  PUCHAR                  pHttpRequest,

								    IN  ULONG                   HttpRequestLength,

								    OUT ULONG  *                pBytesTaken

								    );


								NTSTATUS

								UlLookupHeader(

								    IN  PUL_INTERNAL_REQUEST    pRequest,

								    IN  PUCHAR                  pHttpRequest,

								    IN  ULONG                   HttpRequestLength,

								    IN  PHEADER_MAP_ENTRY       pCurrentHeaderMap,

								    IN  ULONG                   HeaderMapCount,

								    OUT ULONG  *                pBytesTaken

								    );


								typedef enum _URL_PART

								{

								    Scheme,

								    HostName,

								    AbsPath,

								    QueryString


								} URL_PART;


								typedef enum _URL_TYPE

								{

								    UrlTypeUtf8,

								    UrlTypeAnsi,

								    UrlTypeDbcs

								} URL_TYPE;


								NTSTATUS

								UlpCleanAndCopyUrl(

								    IN      URL_PART    UrlPart,

								    IN OUT  PWSTR       pDestination,

								    IN      PUCHAR      pSource,

								    IN      ULONG       SourceLength,

								    OUT     PULONG      pBytesCopied,

								    OUT     PWSTR *     ppQueryString OPTIONAL,

								    OUT     PULONG      pUrlHash

								    );


								NTSTATUS

								UlpCleanAndCopyUrlByType(

								    IN      URL_TYPE    UrlType,

								    IN      URL_PART    UrlPart,

								    IN OUT  PWSTR       pDestination,

								    IN      PUCHAR      pSource,

								    IN      ULONG       SourceLength,

								    OUT     PULONG      pBytesCopied,

								    OUT     PWSTR *     ppQueryString OPTIONAL,

								    OUT     PULONG      pUrlHash

								    );


								NTSTATUS

								Unescape(

								    IN  PUCHAR pChar,

								    OUT PUCHAR pOutChar

								    );


								//

								// PopChar is used only if the string is not UTF-8, or UrlPart != QueryString,

								// or the current character is '%' or its high bit is set.  In all other cases,

								// the FastPopChars table is used for fast conversion.

								//


								__inline

								NTSTATUS

								FASTCALL

								PopChar(

								    IN URL_TYPE UrlType,

								    IN URL_PART UrlPart,

								    IN PUCHAR pChar,

								    OUT WCHAR * pUnicodeChar,

								    OUT PULONG pCharToSkip

								    )

								{

								    NTSTATUS Status;

								    WCHAR   UnicodeChar;

								    UCHAR   Char;

								    UCHAR   Trail1;

								    UCHAR   Trail2;

								    ULONG   CharToSkip;


								    //

								    // Sanity check.

								    //


								    PAGED_CODE();


								    //

								    // validate it as a valid url character

								    //


								    if (UrlPart != QueryString)

								    {

								        if (IS_URL_TOKEN(pChar[0]) == FALSE)

								        {

								            Status = STATUS_OBJECT_PATH_SYNTAX_BAD;


								            UlTrace(PARSER, (

								                "ul!PopChar(pChar = %p) first char isn't URL token\n",

								                pChar

								                ));


								            goto end;

								        }

								    }

								    else

								    {

								        //

								        // Allow anything but linefeed in the query string.

								        //


								        if (pChar[0] == LF)

								        {

								            Status = STATUS_OBJECT_PATH_SYNTAX_BAD;


								            UlTrace(PARSER, (

								                "ul!PopChar(pChar = %p) linefeed in query string\n",

								                pChar

								                ));


								            goto end;

								        }


								        UnicodeChar = (USHORT) pChar[0];

								        CharToSkip = 1;


								        // skip all the decoding stuff

								        goto slash;

								    }


								    //

								    // need to unescape ?

								    //

								    // can't decode the query string.  that would be lossy decodeing

								    // as '=' and '&' characters might be encoded, but have meaning

								    // to the usermode parser.

								    //


								    if (pChar[0] == '%')

								    {

								        Status = Unescape(pChar, &Char);

								        if (NT_SUCCESS(Status) == FALSE)

								            goto end;

								        CharToSkip = 3;

								    }

								    else

								    {

								        Char = pChar[0];

								        CharToSkip = 1;

								    }


								    if (UrlType == UrlTypeUtf8)

								    {

								        //

								        // convert to unicode, checking for utf8 .

								        //

								        // 3 byte runs are the largest we can have.  16 bits in UCS-2 =

								        // 3 bytes of (4+4,2+6,2+6) where it's code + char.

								        // for a total of 6+6+4 char bits = 16 bits.

								        //


								        //

								        // NOTE: we'll only bother to decode utf if it was escaped

								        // thus the (CharToSkip == 3)

								        //

								        if ((CharToSkip == 3) && ((Char & 0xf0) == 0xe0))

								        {

								            // 3 byte run

								            //


								            // Unescape the next 2 trail bytes

								            //


								            Status = Unescape(pChar+CharToSkip, &Trail1);

								            if (NT_SUCCESS(Status) == FALSE)

								                goto end;


								            CharToSkip += 3; // %xx


								            Status = Unescape(pChar+CharToSkip, &Trail2);

								            if (NT_SUCCESS(Status) == FALSE)

								                goto end;


								            CharToSkip += 3; // %xx


								            if (IS_UTF8_TRAILBYTE(Trail1) == FALSE ||

								                IS_UTF8_TRAILBYTE(Trail2) == FALSE)

								            {

								                // bad utf!

								                //

								                Status = STATUS_OBJECT_PATH_SYNTAX_BAD;


								                UlTrace(PARSER, (

								                            "ul!PopChar( 0x%x 0x%x ) bad trail bytes\n",

								                            Trail1,

								                            Trail2

								                            ));


								                goto end;

								            }


								            // handle three byte case

								            // 1110xxxx 10xxxxxx 10xxxxxx


								            UnicodeChar = (USHORT) (((Char & 0x0f) << 12) |

								                                    ((Trail1 & 0x3f) << 6) |

								                                    (Trail2 & 0x3f));


								        }

								        else if ((CharToSkip == 3) && ((Char & 0xe0) == 0xc0))

								        {

								            // 2 byte run

								            //


								            // Unescape the next 1 trail byte

								            //


								            Status = Unescape(pChar+CharToSkip, &Trail1);

								            if (NT_SUCCESS(Status) == FALSE)

								                goto end;


								            CharToSkip += 3; // %xx


								            if (IS_UTF8_TRAILBYTE(Trail1) == FALSE)

								            {

								                // bad utf!

								                //

								                Status = STATUS_OBJECT_PATH_SYNTAX_BAD;


								                UlTrace(PARSER, (

								                            "ul!PopChar( 0x%x ) bad trail byte\n",

								                            Trail1

								                            ));


								                goto end;

								            }


								            // handle two byte case

								            // 110xxxxx 10xxxxxx


								            UnicodeChar = (USHORT) (((Char & 0x1f) << 6) |

								                                    (Trail1 & 0x3f));


								        }


								        // now this can either be unescaped high-bit (bad)

								        // or escaped high-bit.  (also bad)

								        //

								        // thus not checking CharToSkip

								        //


								        else if ((Char & 0x80) == 0x80)

								        {

								            // high bit set !  bad utf!

								            //

								            Status = STATUS_OBJECT_PATH_SYNTAX_BAD;


								            UlTrace(PARSER, (

								                        "ul!PopChar( 0x%x ) ERROR: high bit set! bad utf!\n",

								                        Char

								                        ));


								            goto end;


								        }

								        //

								        // Normal character (again either escaped or unescaped)

								        //

								        else

								        {

								            //

								            // Simple conversion to unicode, it's 7-bit ascii.

								            //


								            UnicodeChar = (USHORT)Char;

								        }


								    }

								    else // UrlType != UrlTypeUtf8

								    {

								        UCHAR AnsiChar[2];

								        ULONG AnsiCharSize;


								        //

								        // Convert ANSI character to Unicode.

								        // If the UrlType is UrlTypeDbcs, then we may have

								        // a DBCS lead/trail pair.

								        //


								        if (UrlType == UrlTypeDbcs && NlsLeadByteInfo[Char])

								        {

								            //

								            // This is a double-byte character.

								            //


								            AnsiCharSize = 2;

								            AnsiChar[0] = Char;


								            Status = Unescape(pChar+CharToSkip, &AnsiChar[1]);

								            if (!NT_SUCCESS(Status))

								            {

								                goto end;

								            }


								            CharToSkip += 3; // %xx


								        }

								        else

								        {

								            //

								            // This is a single-byte character.

								            //


								            AnsiCharSize = 1;

								            AnsiChar[0] = Char;


								        }


								        Status = RtlMultiByteToUnicodeN(

								                        &UnicodeChar,

								                        sizeof(WCHAR),

								                        NULL,

								                        (PCHAR) &AnsiChar[0],

								                        AnsiCharSize

								                        );


								        if (!NT_SUCCESS(Status))

								        {

								            goto end;

								        }

								    }


								slash:

								    //

								    // turn backslashes into forward slashes

								    //


								    if (UrlPart != QueryString && UnicodeChar == L'\\')

								    {

								        UnicodeChar = L'/';

								    }

								    else if (UnicodeChar == UNICODE_NULL)

								    {

								        //

								        // we pop'd a NULL.  bad!

								        //

								        Status = STATUS_OBJECT_PATH_SYNTAX_BAD;

								        goto end;

								    }


								    *pCharToSkip  = CharToSkip;

								    *pUnicodeChar = UnicodeChar;


								    Status = STATUS_SUCCESS;


								end:

								    return Status;


								}   // PopChar


								// Call this only after the entire request has been parsed

								//

								NTSTATUS

								UlpCookUrl(

								    IN  PUL_INTERNAL_REQUEST    pRequest

								    );


								ULONG

								UlpParseHttpVersion(

								    PUCHAR pString,

								    ULONG  StringLength,

								    PHTTP_VERSION pVersion

								    );


								#ifdef __cplusplus

								}; // extern "C"

								#endif


								#endif // _PARSEP_H_