/*--
Copyright (c) 1995-1998  Microsoft Corporation
Module Name: PARSER.CPP
Author: Arul Menezes
Abstract: HTTP request parser
--*/
#include "pch.h"
#pragma hdrstop

#include "httpd.h"

// This could be written as a state-machine parser, but for now I'm
// keeping it simple and slow :-(

// Methods
const char cszGET[] = "GET";
const char cszHEAD[] = "HEAD";
const char cszPOST[] = "POST";
// General headers
const char cszConnection[] = "Connection:";
//const char cszDate[] = "Date:";
//const char cszPragma[] = "Pragma:";
// Request headers
const char cszAuthorization[] = "Authorization:";
const char cszIfModifiedSince[] = "If-Modified-Since:";
//const char cszReferer[] = "Referer:";
//const char cszUserAgent[] = "User-Agent:";
const char cszCookie[] = "Cookie:";
const char cszAccept[] = "Accept:";
// Entity Headers
const char cszContentLength[] = "Content-Length:";
const char cszContentType[] = "Content-Type:";

// other Header tokens
// const char cszHTTPVER[] = "HTTP/%d.%d"; //
const char cszHTTPVER[] = "HTTP/";
const char cszBasic[] = "Basic";
const char cszNTLM[] = "NTLM";

#define PFNPARSE(x) &(CHttpRequest::Parse##x)
#define TABLEENTRY(csz, id, pfn)    { csz, sizeof(csz)-1, id, PFNPARSE(pfn) }
#define AUTH_FILTER_DONE     0x1000     // no more filter calls to SF_AUTH after the 1st one in a session

typedef (CHttpRequest::*PFNPARSEPROC)(PCSTR pszTok, TOKEN idHeader);

typedef struct tagHeaderDesc
{
    const char*     sz;
    int             iLen;
    TOKEN           id;
    PFNPARSEPROC    pfn;
} HEADERDESC;


const HEADERDESC rgHeaders[] =
{
    //{ cszGET, sizeof(cszGET), TOK_GET, &CHttpRequest::ParseMethod },
// Methods
//  TABLEENTRY(cszGET,  TOK_GET,  Method),
//  TABLEENTRY(cszHEAD, TOK_HEAD, Method),
//  TABLEENTRY(cszPOST, TOK_POST, Method),
// General headers
    TABLEENTRY(cszConnection, TOK_CONNECTION, Connection),
    //TABLEENTRY(cszDate,     TOK_DATE,   Date),
    //TABLEENTRY(cszPragma, TOK_PRAGMA, Pragma),
// Request headers
    TABLEENTRY(cszCookie,   TOK_COOKIE, Cookie),
    TABLEENTRY(cszAccept,   TOK_ACCEPT, Accept),
    //TABLEENTRY(cszReferer,  TOK_REFERER Referer),
    //TABLEENTRY(cszUserAgent,TOK_UAGENT, UserAgent),
    TABLEENTRY(cszAuthorization,  TOK_AUTH,  Authorization),
    TABLEENTRY(cszIfModifiedSince,TOK_IFMOD, IfModifiedSince),
// Entity Headers
    //TABLEENTRY(cszContentEncoding, TOK_ENCODING Encoding),
    TABLEENTRY(cszContentType,  TOK_TYPE,    ContentType),
    TABLEENTRY(cszContentLength,TOK_LENGTH,  ContentLength),
    { 0, 0, (TOKEN)0, 0}
};

// Parse all the headers, line by line
BOOL CHttpRequest::ParseHeaders()
{
    DEBUG_CODE_INIT;
    PSTR pszTok;
    PWSTR pwszTemp;
    PSTR pszPathInfo = NULL;
    int i, iLen;
    BOOL ret = FALSE;


    if (!m_bufRequest.NextTokenWS(&pszTok, &iLen))
    {
        m_rs = STATUS_BADREQ;
        myleave(287);
    }

    if (! ParseMethod(pszTok,iLen))
    {
        m_rs = STATUS_BADREQ;
        myleave(288);
    }

    if (!m_bufRequest.NextLine())
    {
        m_rs = STATUS_BADREQ;
        myleave(290);
    }

    // outer-loop. one header per iteration
    while (m_bufRequest.NextTokenColon(&pszTok, &iLen))
    {
        // compare token with tokens in table
        for (i=0; rgHeaders[i].sz; i++)
        {
            //TraceTag(ttidWebServer, "Comparing %s %d %d", rgHeaders[i].sz, rgHeaders[i].iLen, rgHeaders[i].pfn);
            if ( (rgHeaders[i].iLen == iLen) &&
                 0==_memicmp(rgHeaders[i].sz, pszTok, iLen) )
                break;
        }
        if (rgHeaders[i].pfn)
        {
            TraceTag(ttidWebServer, "Parsing %s", rgHeaders[i].sz);
            // call the specific function to parse this header.
            if (! ((this->*(rgHeaders[i].pfn))(pszTok, rgHeaders[i].id)) )
            {
                TraceTag(ttidWebServer, "Parser: failed to parse %s -- IGNORING", rgHeaders[i].sz);
            }
        }
        else
        {
            TraceTag(ttidWebServer, "Ignoring header %s", pszTok);
        }
        if (!m_bufRequest.NextLine())
        {
            m_rs = STATUS_BADREQ;
            myleave(290);
        }
    }

    if (!m_bufRequest.NextLine()) // eat the blank line
    {
        m_rs = STATUS_BADREQ;
        myleave(290);
    }
    TraceTag(ttidWebServer, "Parser: DONE");

    // check what we got
    if (!m_pszMethod || !m_idMethod)
    {
        TraceTag(ttidWebServer, "Parser: missing URL or method, illformatted Request-line");
        m_rs = STATUS_BADREQ;
        myleave(291);
    }

    // Once we've read the request line, give filter shot at modifying the
    // remaining headers.
    if (g_pVars->m_fFilters &&
        ! CallFilter(SF_NOTIFY_PREPROC_HEADERS))
        myleave(292);


    m_wszPath = g_pVars->m_pVroots->URLAtoPathW(m_pszURL, &m_dwPermissions, &m_AuthLevelReqd,&m_VRootScriptType,&m_pszPathInfo,&m_wszVRootUserList);

    if (g_pVars->m_fFilters &&
        ! CallFilter(SF_NOTIFY_URL_MAP))
        myleave(293);

    // get extension
    if (m_wszPath && (pwszTemp = wcsrchr(m_wszPath, '.')))
        m_wszExt = MySzDupW(pwszTemp);


    // As per the docs, the filter gets ONLY 1 call per session to notify
    // it of this event.  m_dwAuthFlags is remembered from session to session.

    // Like IIS, it always is called, even if Vroots is AUTH_PUBLIC already and
    // even if no security has been enabled.

    if ( g_pVars->m_fFilters && ! (m_dwAuthFlags & AUTH_FILTER_DONE))
    {
        if ( ! AuthenticateFilter())
            myleave(294);
    }
    m_dwAuthFlags |= AUTH_FILTER_DONE;

    ret = TRUE;
    done:
    TraceTag(ttidWebServer, "Parse headers failed, err = %d",err);
    return ret;
}

BOOL CHttpRequest::ParseMethod(PCSTR pszMethod, int cbMethod)
{
    DEBUG_CODE_INIT;
    PSTR pszTok, pszTok2;
    int iLen;
    BOOL ret;

// save method
    m_pszMethod = MySzDupA(pszMethod);

    if (0 == memcmp(cszGET,pszMethod,cbMethod))
        m_idMethod = TOK_GET;
    else if (0 == memcmp(cszHEAD,pszMethod,cbMethod))
        m_idMethod = TOK_HEAD;
    else if (0 == memcmp(cszPOST,pszMethod,cbMethod))
        m_idMethod = TOK_POST;
    else
        m_idMethod = TOK_UNKNOWN_VERB;

// get URL and HTTP/x.y together (allows for spaces in URL like Netscape sends)
    if (!m_bufRequest.NextTokenEOL(&pszTok, &iLen))
        myretleave(FALSE, 201);

// seperate out the HTTP/x.y
    if (pszTok2 = strrchr(pszTok, ' '))
    {
        *pszTok2 = 0;
        iLen = (INT)((INT_PTR)(pszTok2-pszTok));
        pszTok2++;
    }

// clean up & parse the URL
    MyCrackURL(pszTok, iLen);

// get version (optional. HTTP 0.9 wont have this)
    if (!pszTok2)
        m_dwVersion = MAKELONG(9, 0);
    else
    {
        //  int iMajor, iMinor;
        //  sscanf(pszTok2, cszHTTPVER, &iMajor, &iMinor);
        //  m_dwVersion = MAKELONG(iMinor, iMajor);
        SetHTTPVersion(pszTok2, &m_dwVersion);

        pszTok2[-1] = ' ';  // reset this to a space
    }
    ret = TRUE;

    done:
    TraceTag(ttidWebServer, "end ParseMethod (iGLE=%d iErr=%d)", GLE(err),err);
    return ret;
}

// We assume a raw URL in the form that we receive in the HTTP headers (no scheme, port number etc)
// We extract the path, extra-path, and query
BOOL CHttpRequest::MyCrackURL(PSTR pszRawURL, int iLen)
{
    DEBUG_CODE_INIT;
    BOOL ret = FALSE;
    PSTR  pszDecodedURL=0, pszTemp=0, pszPartiallyDecodedURL=0;
    int iLen2;
    DWORD cchDecodedURL = iLen + 1; // including the NULL terminator
    DWORD cchPartiallyDecodedURL = iLen + 1;

    // decode URL (convert escape sequences etc)
    if (NULL == (pszPartiallyDecodedURL = MyRgAllocNZ(CHAR, cchDecodedURL)))
        myleave(382);
    if (NULL == (pszDecodedURL = MyRgAllocNZ(CHAR, cchPartiallyDecodedURL)))
        myleave(382);

    // BUG FIX 393235 - When InternetCanonicalizeUrlA() is told to decode a URL and process the meta
    // directories, it does them in the wrong order. Passing it:
    // http://localhost:2869/upnphost/%2e./%2e./%2e./%2e./%2e./%2e./boot.ini
    // results in:
    // http://localhost:2869/upnphost/../../../../../../boot.ini
    // which is clearly not safe. To work around this, we call it twice - once to decode the URL, and
    // a second time to process the meta directories.

    // First, decode the URL
    if (!InternetCanonicalizeUrlA(pszRawURL,
                                  pszPartiallyDecodedURL,
                                  (DWORD*)&cchPartiallyDecodedURL,
                                  ICU_NO_ENCODE | ICU_DECODE | ICU_BROWSER_MODE | ICU_NO_META))
    {
        TraceTag(ttidWebServer, "CHttpRequest::MyCrackURL - InternetCanonicalizeUrlA failed with GLE=%d\n", GetLastError());
        myleave(383);
    }

    // Second, process the meta directories
    if (!InternetCanonicalizeUrlA(pszPartiallyDecodedURL,
                                  pszDecodedURL,
                                  (DWORD*)&cchDecodedURL,
                                  ICU_NO_ENCODE | ICU_BROWSER_MODE))
    {
        TraceTag(ttidWebServer, "CHttpRequest::MyCrackURL - InternetCanonicalizeUrlA failed with GLE=%d\n", GetLastError());
        myleave(384);
    }


    // get query string
    if (pszTemp = strchr(pszDecodedURL, '?'))
    {
        m_pszQueryString = MySzDupA(pszTemp+1);
        *pszTemp = 0;
    }


    // Searching for an embedded ISAPI dll name, ie /wwww/isapi.dll/a/b.
    // We load the file /www/isapi.dll and set PATH_INFO to /a/b
    // Emebbed ASP file names are handled similiarly.
    if (g_pVars->m_fExtensions)
    {
        if (pszTemp = strstr(pszDecodedURL,".dll/"))
        {
            m_pszPathInfo = MySzDupA(pszTemp + sizeof(".dll/") - 2);
            pszTemp[sizeof(".dll/") - 2] = 0;
        }
        else if (pszTemp = strstr(pszDecodedURL,".asp/"))
        {
            m_pszPathInfo = MySzDupA(pszTemp + sizeof(".asp/") - 2);
            pszTemp[sizeof(".asp/") - 2] = 0;
        }
    }

    // save a copy of the cleaned up URL (MINUS query!)
    // SPECIAL HACK: alloc one extra char in case we have to send a redirect back (see request.cpp)

    iLen2 = strlen(pszDecodedURL);
    m_pszURL = MySzAllocA(1+iLen2);
    Nstrcpy(m_pszURL, pszDecodedURL, iLen2); // copy null-term too.

    ret = TRUE;
    done:
    MyFree(pszDecodedURL);
    MyFree(pszPartiallyDecodedURL);
    TraceTag(ttidWebServer, "end MyCrackURL(%s) path=%s ext=%s query=%s (iGLE=%d iErr=%d)\r\n",
                               pszRawURL, m_wszPath, m_wszExt, m_pszQueryString, GLE(err), err);

    return ret;

}

BOOL CHttpRequest::ParseContentLength(PCSTR pszMethod, TOKEN id)
{
    PSTR pszTok = 0;
    int iLen = 0;

// get length (first token after "Content-Type;")
    if (m_bufRequest.NextTokenWS(&pszTok, &iLen) && pszTok && iLen)
    {
        m_dwContentLength = atoi(pszTok);
    }

    return TRUE;

}

BOOL CHttpRequest::ParseCookie(PCSTR pszMethod, TOKEN id)
{
    PSTR pszTok = 0;
    int iLen = 0;

// get cookie (upto \r\n after "Cookies;")
    if (m_bufRequest.NextTokenEOL(&pszTok, &iLen) && pszTok && iLen)
    {
        m_pszCookie = MySzDupA(pszTok);
    }
    return TRUE;

}

BOOL CHttpRequest::ParseAccept(PCSTR pszMethod, TOKEN id)
{
    PSTR pszTok = 0;
    int iLen = 0;

// get cookie (upto \r\n after "Cookies;")
    if (m_bufRequest.NextTokenEOL(&pszTok, &iLen) && pszTok && iLen)
    {
        m_pszAccept = MySzDupA(pszTok);
    }
    return TRUE;

}


BOOL CHttpRequest::ParseContentType(PCSTR pszMethod, TOKEN id)
{
    PSTR pszTok = 0;
    int iLen = 0;

// get type (first token after "Content-Type;")
    if (m_bufRequest.NextTokenWS(&pszTok, &iLen) && pszTok && iLen)
    {
        m_pszContentType = MySzDupA(pszTok);
    }
    return TRUE;
}

const char cszDateParseFmt[] = " %*3s, %02hd %3s %04hd %02hd:%02hd:%02hd GMT; length=%d";

BOOL CHttpRequest::ParseIfModifiedSince(PCSTR pszMethod, TOKEN id)
{
    PSTR pszTok = 0;
    int iLen = 0;
    int i = 0;
    char szMonth[10];
    SYSTEMTIME st;
    ZEROMEM(&st);

// get the date (rest of line after If-Modified-Since)
// BUGBUG: Note we are handling only one date format (the "reccomended" one)
    if (m_bufRequest.NextTokenEOL(&pszTok, &iLen) && pszTok && iLen)
    {
        //  i = sscanf(pszTok, cszDateParseFmt, &st.wDay, &szMonth, &st.wYear, &st.wHour, &st.wMinute, &st.wSecond, &m_dwIfModifiedLength);
        if ( SetHTTPDate(pszTok,szMonth,&st,&m_dwIfModifiedLength))
        {
            // try to match month
            for (i=0; rgMonth[i]; i++)
            {
                if (0==strcmpi(szMonth, rgMonth[i]))
                {
                    st.wMonth = (WORD)i;
                    // convert to filetime & store
                    SystemTimeToFileTime(&st, &m_ftIfModifiedSince);
                    return TRUE;
                }
            }
        }
        TraceTag(ttidWebServer, "Failed to parse If-Modified-Since(%s) Parsed: day=%02d month=%s(%d) year=%04d time=%02d:%02d:%02d len=%d\r\n",
                              pszTok, st.wDay, szMonth, i, st.wYear, st.wHour, st.wMinute, st.wSecond, m_dwIfModifiedLength);
    }
    return FALSE;
}



// Note:  No filter calls to SF_NOTIFY_AUTHENT in this fcn
BOOL CHttpRequest::ParseAuthorization(PCSTR pszMethod, TOKEN id)
{
    DEBUG_CODE_INIT;
    BOOL ret = FALSE;
    PSTR pszTok=0;
    int iLen=0;

// get the auth scheme (first token after "Authorization;")
    if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen)
        myretleave(FALSE, 91);

    m_pszAuthType = MySzDupA(pszTok);

    if (g_pVars->m_fBasicAuth && 0==strcmpi(pszTok, cszBasic))
    {
        // get the scheme auth data (second token) [NOTE: cant get 2 tokens at once!!]
        if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen)
            myretleave(FALSE, 92);


        if (!HandleBasicAuth(pszTok, &m_pszRemoteUser, &m_pszPassword,
                             &m_AuthLevelGranted, &m_NTLMState,m_wszVRootUserList))
            myretleave(TRUE, 93);

        TraceTag(ttidWebServer, "Basic Auth SUCCESS");
        m_dwAuthFlags |= m_AuthLevelGranted;
        ret = TRUE;
    }

    else if (g_pVars->m_fNTLMAuth && 0==strcmpi(pszTok, cszNTLM))
    {
        // get the scheme auth data (second token) [NOTE: cant get 2 tokens at once!!]
        if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen)
            myretleave(FALSE, 95);

        if (!HandleNTLMAuth(pszTok))
            myretleave(TRUE, 96);

        TraceTag(ttidWebServer, "NTLM Auth SUCCESS");
        ret = TRUE;
    }

    // We read in this data anyway.  A filter could theoretically set an Access-denied
    // even if neither NTLM or basic weren't set.  AuthenticateFilter will handle
    // this data later in that case.
    // We store data in m_pszRawRemoteUser because it hasn't been Base64 decoded yet
    else
    {
        // get the scheme auth data (second token) [NOTE: cant get 2 tokens at once!!]
        if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen)
            myretleave(FALSE, 97);


        m_pszRawRemoteUser = MySzDupA(pszTok);
        if (NULL == m_pszRemoteUser)
            myretleave(FALSE, 98);

        TraceTag(ttidWebServer, "Unknown authorization type requested OR requested type not enabled");
    }

    done:
    TraceTag(ttidWebServer, "Auth FAILED (err=%d ret=%d)", err, ret);

    return ret;
}

BOOL CHttpRequest::ParseConnection(PCSTR pszMethod, TOKEN id)
{
    PSTR pszTok = 0;
    int iLen = 0;

// get first token after "Connnection;"
    if (m_bufRequest.NextTokenWS(&pszTok, &iLen) && pszTok && iLen)
    {
        if (0==strcmpi(pszTok, cszKeepAlive))
            m_fKeepAlive = TRUE;
    }
    return TRUE;
}