windows-server-2003/inetcore/winhttp/v5/http/hdrparse.cxx

#include <wininetp.h>
#include <perfdiag.hxx>
#include "httpp.h"

//
// HTTP_HEADER_PARSER implementation
//

HTTP_HEADER_PARSER::HTTP_HEADER_PARSER(
    IN LPSTR szHeaders,
    IN DWORD cbHeaders
    ) : HTTP_HEADERS()

/*++

Routine Description:

    Constructor for the HTTP_HEADER_PARSER object.  Calls ParseHeaders to
      build a parsed version of the header string passed in.

Arguments:

    szHeaders      - pointer to the headers to parse

    cbHeaders      - length of the headers

Return Value:

    None.

--*/

{
    DWORD dwBytesScaned = 0;
    BOOL fFoundCompleteLine;
    BOOL fFoundEndOfHeaders;
    DWORD error;

    error = ParseHeaders(
        szHeaders,
        cbHeaders,
        TRUE, // Eof
        &dwBytesScaned,
        &fFoundCompleteLine,
        &fFoundEndOfHeaders
        );

    INET_ASSERT(error == ERROR_SUCCESS);
    INET_ASSERT(fFoundCompleteLine);
    INET_ASSERT(fFoundEndOfHeaders);
}


BOOL
HTTP_HEADER_PARSER::ParseStatusLine(
    IN LPSTR lpHeaderBase,
    IN DWORD dwBufferLength,
    IN BOOL fEof,
    IN OUT DWORD *lpdwBufferLengthScanned,
    OUT DWORD *lpdwStatusCode,
    OUT DWORD *lpdwMajorVersion,
    OUT DWORD *lpdwMinorVersion
    )

/*++

Routine Description:

    Parses the Status line of an HTTP server response.  Takes care of adding the status
     line to HTTP header array.

Arguments:

    lpszHeader      - pointer to the header to check

    dwHeaderLength  - length of the header

Return Value:

    BOOL  - TRUE if line was successively parsed and processed, FALSE otherwise

--*/

{

#define BEFORE_VERSION_NUMBERS 0
#define MAJOR_VERSION_NUMBER   1
#define MINOR_VERSION_NUMBER   2
#define STATUS_CODE_NUMBER     3
#define AFTER_STATUS_CODE      4
#define MAX_STATUS_INTS        4

    LPSTR lpszEnd = lpHeaderBase + dwBufferLength;
    LPSTR response = lpHeaderBase + *lpdwBufferLengthScanned;
    DWORD dwBytesScanned = 0;
    DWORD dwStatusLineSize = 0;
    LPSTR lpszStatusLine;
    int ver_state = BEFORE_VERSION_NUMBERS;
    DWORD adwStatusInts[MAX_STATUS_INTS];
    BOOL success = TRUE;

    for ( int i = 0; i < MAX_STATUS_INTS; i++)
        adwStatusInts[i] = 0;

    lpszStatusLine = response;

    //
    // While walking the Status Line looking for terminating \r\n,
    //   we extract the Major.Minor Versions and Status Code in that order.
    //   text and spaces will lie between/before/after the three numbers
    //   but the idea is to remeber which number we're calculating based on a numeric state
    //   If all goes well the loop will churn out an array with the 3 numbers plugged in as DWORDs
    //

    while ((response < lpszEnd) && (*response != '\r') && (*response != '\n'))
    {
        // below should be wrapped in while (response[i] != ' ') to be more robust???
        switch (ver_state)
        {
            case BEFORE_VERSION_NUMBERS:
                if (*response == '/')
                {
                    INET_ASSERT(ver_state == BEFORE_VERSION_NUMBERS);
                    ver_state++; // = MAJOR_VERSION_NUMBER
                }
                else if (*response == ' ')
                {
                    ver_state = STATUS_CODE_NUMBER;
                }

                break;

            case MAJOR_VERSION_NUMBER:

                if (*response == '.')
                {
                    INET_ASSERT(ver_state == MAJOR_VERSION_NUMBER);
                    ver_state++; // = MINOR_VERSION_NUMBER
                    break;
                }
                // fall through

            case MINOR_VERSION_NUMBER:

                if (*response == ' ')
                {
                    INET_ASSERT(ver_state == MINOR_VERSION_NUMBER);
                    ver_state++; // = STATUS_CODE_NUMBER
                    break;
                }
                // fall through

            case STATUS_CODE_NUMBER:

                if (isdigit(*response)) {
                    int val = *response - '0';
                    adwStatusInts[ver_state] = adwStatusInts[ver_state] * 10 + val;
                }
                else if ( adwStatusInts[STATUS_CODE_NUMBER] > 0 )
                {
                    //
                    // we eat spaces before status code is found,
                    //  once we have the status code we can go on to the next
                    //  state on the next non-digit. This is done
                    //  to cover cases with several spaces between version
                    //  and the status code number.
                    //

                    INET_ASSERT(ver_state == STATUS_CODE_NUMBER);
                    ver_state++; // = AFTER_STATUS_CODE
                    break;
                } else if (!isspace(*response)) {
                    adwStatusInts[ver_state] = (DWORD)-1;
                }

                break;

            case AFTER_STATUS_CODE:
                break;

        }

        ++response;
        ++dwBytesScanned;
    }

    dwStatusLineSize = dwBytesScanned;

    if (response == lpszEnd) {

        //
        // response now points one past the end of the buffer. We may be looking
        // over the edge...
        //
        // if we're at the end of the connection then the server sent us an
        // incorrectly formatted response. Probably an error.
        //
        // Otherwise its a partial response. We need more
        //


        DEBUG_PRINT(HTTP,
                    INFO,
                    ("found end of short response in status line\n"
                    ));

        success = fEof ? TRUE : FALSE;

        //
        // if we really hit the end of the response then update the amount of
        // headers scanned
        //

        if (!success) {
            dwBytesScanned = 0;
        }

        goto quit;

    }

    while ((response < lpszEnd)
    && ((*response == '\r') || (*response == ' '))) {
        ++response;
        ++dwBytesScanned;
    }

    if (response == lpszEnd) {

        //
        // hit end of buffer without finding LF
        //

        success = FALSE;

        DEBUG_PRINT(HTTP,
                    WARNING,
                    ("hit end of buffer without finding LF\n"
                    ));

        goto quit;

    } else if (*response == '\n') {
        ++response;
        ++dwBytesScanned;

        //
        // if we found the empty line then we are done
        //

        success = TRUE;
    }


    INET_ASSERT(success);

    //
    // Now we have our parsed header to add to the array
    //

    HEADER_STRING * freeHeader;
    DWORD iSlot;

    freeHeader = FindFreeSlot(&iSlot);
    if (freeHeader == NULL) {
        INET_ASSERT(FALSE);
        success = FALSE;
        goto quit;
    } else {
        INET_ASSERT(iSlot == 0); // status line should always be first
        freeHeader->CreateOffsetString((DWORD)(lpszStatusLine - lpHeaderBase), dwStatusLineSize);
        freeHeader->SetHash(0); // status line has no hash value.
    }


quit:

    *lpdwStatusCode    = adwStatusInts[STATUS_CODE_NUMBER];
    *lpdwMajorVersion  = adwStatusInts[MAJOR_VERSION_NUMBER];
    *lpdwMinorVersion  = adwStatusInts[MINOR_VERSION_NUMBER];

    *lpdwBufferLengthScanned += dwBytesScanned;

    return success;
}

DWORD
HTTP_HEADER_PARSER::ParseHeaders(
    IN LPSTR lpHeaderBase,
    IN DWORD dwBufferLength,
    IN BOOL fEof,
    IN OUT DWORD *lpdwBufferLengthScanned,
    OUT LPBOOL pfFoundCompleteLine,
    OUT LPBOOL pfFoundEndOfHeaders
    )

/*++

Routine Description:

    Loads headers into HTTP_HEADERS member for subsequent parsing.

    Parses string based headers and adds their parts to an internally stored
    array of HTTP_HEADERS.

    Input is assumed to be well formed Header Name/Value pairs, each deliminated
    by ':' and '\r\n'.

Arguments:

    lpszHeader      - pointer to the header to check

    dwHeaderLength  - length of the header

Return Value:

    None.

--*/


{

    LPSTR lpszEnd = lpHeaderBase + dwBufferLength;
    LPSTR response = lpHeaderBase + *lpdwBufferLengthScanned;
    DWORD dwBytesScanned = 0;
    BOOL success = FALSE;
    DWORD error = ERROR_SUCCESS;

    *pfFoundEndOfHeaders  = FALSE;

    //
    // Each iteration of the following loop
    // walks an HTTP header line of the form:
    //  HeaderName: HeaderValue\r\n
    //

    do
    {
        DWORD dwHash = HEADER_HASH_SEED;
        LPSTR lpszHeaderName;
        DWORD dwHeaderNameLength = 0;
        DWORD dwHeaderLineLength = 0;
        DWORD dwPreviousAmountOfBytesScanned = dwBytesScanned;

        //
        // Remove leading whitespace from header
        //

        while ( (response < lpszEnd) && ((*response == ' ') || (*response == '\t')) )
        {
            ++response;
            ++dwBytesScanned;
        }

        //
        // Scan for HeaderName:
        //

        lpszHeaderName = response;
        dwPreviousAmountOfBytesScanned = dwBytesScanned;

        while ((response < lpszEnd) && (*response != ':') && (*response != '\r') && (*response != '\n'))
        {
            //
            // This code incapsulates CalculateHashNoCase as an optimization,
            //   we attempt to calculate the Hash value as we parse the header.
            //

            CHAR ch = *response;

            if ((ch >= 'A') && (ch <= 'Z')) {
                ch = MAKE_LOWER(ch);
            }
            dwHash += (DWORD)(dwHash << 5) + ch;

            ++response;
            ++dwBytesScanned;
        }

        dwHeaderNameLength = (DWORD) (response - lpszHeaderName);

        //
        // catch bogus responses: if we find what looks like one of a (very)
        // small set of HTML tags, then assume the previous header was the
        // last
        //

        if ((dwHeaderNameLength >= sizeof("<HTML>") - 1)
            && (*lpszHeaderName == '<')
            && (!strnicmp(lpszHeaderName, "<HTML>", sizeof("<HTML>") - 1)
                || !strnicmp(lpszHeaderName, "<HEAD>", sizeof("<HEAD>") - 1))) {
            *pfFoundEndOfHeaders  = TRUE;
            break;
        }

        //
        // Keep scanning till end of the line.
        //

        while ((response < lpszEnd) && (*response != '\r') && (*response != '\n'))
        {
            ++response;
            ++dwBytesScanned;
        }

        dwHeaderLineLength = (DWORD) (response - lpszHeaderName); // note: this headerLINElength

        if (response == lpszEnd) {

            //
            // response now points one past the end of the buffer. We may be looking
            // over the edge...
            //
            // if we're at the end of the connection then the server sent us an
            // incorrectly formatted response. Probably an error.
            //
            // Otherwise its a partial response. We need more
            //


            DEBUG_PRINT(HTTP,
                        INFO,
                        ("found end of short response\n"
                        ));

            success = fEof ? TRUE : FALSE;

            //
            // if we really hit the end of the response then update the amount of
            // headers scanned
            //

            if (!success) {
                dwBytesScanned = dwPreviousAmountOfBytesScanned;
            }

            break;

        }
        else
        {

            //
            // we reached a CR or LF. This is the end of this current header. Find
            // the start of the next one
            //

            //
            // first, strip off any trailing spaces from the current header. We do
            // this by simply reducing the string length. We only look for space
            // and tab characters. Only do this if we have a non-zero length header
            //

            if (dwHeaderLineLength != 0) {
                for (int i = -1; response[i] == ' ' || response[i] == '\t'; --i) {
                    --dwHeaderLineLength;
                }
            }

            INET_ASSERT((int)dwHeaderLineLength >= 0);

            //
            // some servers respond with "\r\r\n". Lame
            // A new twist: "\r \r\n". Lamer
            //

            while ((response < lpszEnd)
            && ((*response == '\r') || (*response == ' '))) {
                ++response;
                ++dwBytesScanned;
            }
            if (response == lpszEnd) {

                //
                // hit end of buffer without finding LF
                //

                success = FALSE;

                DEBUG_PRINT(HTTP,
                            WARNING,
                            ("hit end of buffer without finding LF\n"
                            ));

                //
                // get more data, reparse this line
                //

                dwBytesScanned = dwPreviousAmountOfBytesScanned;
                break;
            } else if (*response == '\n') {
                ++response;
                ++dwBytesScanned;

                //
                // if we found the empty line then we are done
                //

                if (dwHeaderLineLength == 0) {
                    *pfFoundEndOfHeaders  = TRUE;
                    break;
                }

                success = TRUE;
            }
        }

        //
        // Now we have our parsed header to add to the array
        //

        HEADER_STRING * freeHeader;
        DWORD iSlot;

        freeHeader = FindFreeSlot(&iSlot);
        if (freeHeader == NULL) {
            error = GetError();

            INET_ASSERT(error != ERROR_SUCCESS);
            goto quit;

        } else {
            freeHeader->CreateOffsetString((DWORD) (lpszHeaderName - lpHeaderBase), dwHeaderLineLength);
            freeHeader->SetHash(dwHash);
        }


        //CHAR szTemp[256];
        //
        //memcpy(szTemp, lpszHeaderName, dwHeaderLineLength);
        //lpszHeaderName[dwHeaderLineLength] = '\0';

        //DEBUG_PRINT(HTTP,
        //    INFO,
        //    ("ParseHeaders: adding=%q\n", lpszHeaderName
        //    ));


        //
        // Now see if this is a known header we are adding, if so then we note that fact
        //

        DWORD dwKnownQueryIndex;

        if (HeaderMatch(dwHash, lpszHeaderName, dwHeaderNameLength, &dwKnownQueryIndex) )
        {
            freeHeader->SetNextKnownIndex(FastAdd(dwKnownQueryIndex, iSlot));
        }
    } while (TRUE);

quit:

    *lpdwBufferLengthScanned += dwBytesScanned;
    *pfFoundCompleteLine = success;

    return error;
}