Copyright (c) 1995-1998 Microsoft Corporation Module Name: PARSER.CPP Author: Arul Menezes Abstract: HTTP request parser --*/ #include "pch.h"
#pragma hdrstop
#include "httpd.h"
// This could be written as a state-machine parser, but for now I'm
// keeping it simple and slow :-(
// Methods
const char cszGET[] = "GET"; const char cszHEAD[] = "HEAD"; const char cszPOST[] = "POST"; // General headers
const char cszConnection[] = "Connection:"; //const char cszDate[] = "Date:";
//const char cszPragma[] = "Pragma:";
// Request headers
const char cszAuthorization[] = "Authorization:"; const char cszIfModifiedSince[] = "If-Modified-Since:"; //const char cszReferer[] = "Referer:";
//const char cszUserAgent[] = "User-Agent:";
const char cszCookie[] = "Cookie:"; const char cszAccept[] = "Accept:"; // Entity Headers
const char cszContentLength[] = "Content-Length:"; const char cszContentType[] = "Content-Type:";
// other Header tokens
// const char cszHTTPVER[] = "HTTP/%d.%d"; //
const char cszHTTPVER[] = "HTTP/"; const char cszBasic[] = "Basic"; const char cszNTLM[] = "NTLM";
#define PFNPARSE(x) &(CHttpRequest::Parse##x)
#define TABLEENTRY(csz, id, pfn) { csz, sizeof(csz)-1, id, PFNPARSE(pfn) }
#define AUTH_FILTER_DONE 0x1000 // no more filter calls to SF_AUTH after the 1st one in a session
typedef (CHttpRequest::*PFNPARSEPROC)(PCSTR pszTok, TOKEN idHeader);
typedef struct tagHeaderDesc { const char* sz; int iLen; TOKEN id; PFNPARSEPROC pfn; } HEADERDESC;
const HEADERDESC rgHeaders[] = { //{ cszGET, sizeof(cszGET), TOK_GET, &CHttpRequest::ParseMethod },
// Methods
// General headers
TABLEENTRY(cszConnection, TOK_CONNECTION, Connection), //TABLEENTRY(cszDate, TOK_DATE, Date),
//TABLEENTRY(cszPragma, TOK_PRAGMA, Pragma),
// Request headers
TABLEENTRY(cszCookie, TOK_COOKIE, Cookie), TABLEENTRY(cszAccept, TOK_ACCEPT, Accept), //TABLEENTRY(cszReferer, TOK_REFERER Referer),
//TABLEENTRY(cszUserAgent,TOK_UAGENT, UserAgent),
TABLEENTRY(cszAuthorization, TOK_AUTH, Authorization), TABLEENTRY(cszIfModifiedSince,TOK_IFMOD, IfModifiedSince), // Entity Headers
//TABLEENTRY(cszContentEncoding, TOK_ENCODING Encoding),
TABLEENTRY(cszContentType, TOK_TYPE, ContentType), TABLEENTRY(cszContentLength,TOK_LENGTH, ContentLength), { 0, 0, (TOKEN)0, 0} };
// Parse all the headers, line by line
BOOL CHttpRequest::ParseHeaders() { DEBUG_CODE_INIT; PSTR pszTok; PWSTR pwszTemp; PSTR pszPathInfo = NULL; int i, iLen; BOOL ret = FALSE;
if (!m_bufRequest.NextTokenWS(&pszTok, &iLen)) { m_rs = STATUS_BADREQ; myleave(287); }
if (! ParseMethod(pszTok,iLen)) { m_rs = STATUS_BADREQ; myleave(288); }
if (!m_bufRequest.NextLine()) { m_rs = STATUS_BADREQ; myleave(290); }
// outer-loop. one header per iteration
while (m_bufRequest.NextTokenColon(&pszTok, &iLen)) { // compare token with tokens in table
for (i=0; rgHeaders[i].sz; i++) { //TraceTag(ttidWebServer, "Comparing %s %d %d", rgHeaders[i].sz, rgHeaders[i].iLen, rgHeaders[i].pfn);
if ( (rgHeaders[i].iLen == iLen) && 0==_memicmp(rgHeaders[i].sz, pszTok, iLen) ) break; } if (rgHeaders[i].pfn) { TraceTag(ttidWebServer, "Parsing %s", rgHeaders[i].sz); // call the specific function to parse this header.
if (! ((this->*(rgHeaders[i].pfn))(pszTok, rgHeaders[i].id)) ) { TraceTag(ttidWebServer, "Parser: failed to parse %s -- IGNORING", rgHeaders[i].sz); } } else { TraceTag(ttidWebServer, "Ignoring header %s", pszTok); } if (!m_bufRequest.NextLine()) { m_rs = STATUS_BADREQ; myleave(290); } }
if (!m_bufRequest.NextLine()) // eat the blank line
{ m_rs = STATUS_BADREQ; myleave(290); } TraceTag(ttidWebServer, "Parser: DONE");
// check what we got
if (!m_pszMethod || !m_idMethod) { TraceTag(ttidWebServer, "Parser: missing URL or method, illformatted Request-line"); m_rs = STATUS_BADREQ; myleave(291); }
// Once we've read the request line, give filter shot at modifying the
// remaining headers.
if (g_pVars->m_fFilters && ! CallFilter(SF_NOTIFY_PREPROC_HEADERS)) myleave(292);
m_wszPath = g_pVars->m_pVroots->URLAtoPathW(m_pszURL, &m_dwPermissions, &m_AuthLevelReqd,&m_VRootScriptType,&m_pszPathInfo,&m_wszVRootUserList);
if (g_pVars->m_fFilters && ! CallFilter(SF_NOTIFY_URL_MAP)) myleave(293);
// get extension
if (m_wszPath && (pwszTemp = wcsrchr(m_wszPath, '.'))) m_wszExt = MySzDupW(pwszTemp);
// As per the docs, the filter gets ONLY 1 call per session to notify
// it of this event. m_dwAuthFlags is remembered from session to session.
// Like IIS, it always is called, even if Vroots is AUTH_PUBLIC already and
// even if no security has been enabled.
if ( g_pVars->m_fFilters && ! (m_dwAuthFlags & AUTH_FILTER_DONE)) { if ( ! AuthenticateFilter()) myleave(294); } m_dwAuthFlags |= AUTH_FILTER_DONE;
ret = TRUE; done: TraceTag(ttidWebServer, "Parse headers failed, err = %d",err); return ret; }
BOOL CHttpRequest::ParseMethod(PCSTR pszMethod, int cbMethod) { DEBUG_CODE_INIT; PSTR pszTok, pszTok2; int iLen; BOOL ret;
// save method
m_pszMethod = MySzDupA(pszMethod);
if (0 == memcmp(cszGET,pszMethod,cbMethod)) m_idMethod = TOK_GET; else if (0 == memcmp(cszHEAD,pszMethod,cbMethod)) m_idMethod = TOK_HEAD; else if (0 == memcmp(cszPOST,pszMethod,cbMethod)) m_idMethod = TOK_POST; else m_idMethod = TOK_UNKNOWN_VERB;
// get URL and HTTP/x.y together (allows for spaces in URL like Netscape sends)
if (!m_bufRequest.NextTokenEOL(&pszTok, &iLen)) myretleave(FALSE, 201);
// seperate out the HTTP/x.y
if (pszTok2 = strrchr(pszTok, ' ')) { *pszTok2 = 0; iLen = (INT)((INT_PTR)(pszTok2-pszTok)); pszTok2++; }
// clean up & parse the URL
MyCrackURL(pszTok, iLen);
// get version (optional. HTTP 0.9 wont have this)
if (!pszTok2) m_dwVersion = MAKELONG(9, 0); else { // int iMajor, iMinor;
// sscanf(pszTok2, cszHTTPVER, &iMajor, &iMinor);
// m_dwVersion = MAKELONG(iMinor, iMajor);
SetHTTPVersion(pszTok2, &m_dwVersion);
pszTok2[-1] = ' '; // reset this to a space
} ret = TRUE;
done: TraceTag(ttidWebServer, "end ParseMethod (iGLE=%d iErr=%d)", GLE(err),err); return ret; }
// We assume a raw URL in the form that we receive in the HTTP headers (no scheme, port number etc)
// We extract the path, extra-path, and query
BOOL CHttpRequest::MyCrackURL(PSTR pszRawURL, int iLen) { DEBUG_CODE_INIT; BOOL ret = FALSE; PSTR pszDecodedURL=0, pszTemp=0, pszPartiallyDecodedURL=0; int iLen2; DWORD cchDecodedURL = iLen + 1; // including the NULL terminator
DWORD cchPartiallyDecodedURL = iLen + 1;
// decode URL (convert escape sequences etc)
if (NULL == (pszPartiallyDecodedURL = MyRgAllocNZ(CHAR, cchDecodedURL))) myleave(382); if (NULL == (pszDecodedURL = MyRgAllocNZ(CHAR, cchPartiallyDecodedURL))) myleave(382);
// BUG FIX 393235 - When InternetCanonicalizeUrlA() is told to decode a URL and process the meta
// directories, it does them in the wrong order. Passing it:
// http://localhost:2869/upnphost/%2e./%2e./%2e./%2e./%2e./%2e./boot.ini
// results in:
// http://localhost:2869/upnphost/../../../../../../boot.ini
// which is clearly not safe. To work around this, we call it twice - once to decode the URL, and
// a second time to process the meta directories.
// First, decode the URL
if (!InternetCanonicalizeUrlA(pszRawURL, pszPartiallyDecodedURL, (DWORD*)&cchPartiallyDecodedURL, ICU_NO_ENCODE | ICU_DECODE | ICU_BROWSER_MODE | ICU_NO_META)) { TraceTag(ttidWebServer, "CHttpRequest::MyCrackURL - InternetCanonicalizeUrlA failed with GLE=%d\n", GetLastError()); myleave(383); }
// Second, process the meta directories
if (!InternetCanonicalizeUrlA(pszPartiallyDecodedURL, pszDecodedURL, (DWORD*)&cchDecodedURL, ICU_NO_ENCODE | ICU_BROWSER_MODE)) { TraceTag(ttidWebServer, "CHttpRequest::MyCrackURL - InternetCanonicalizeUrlA failed with GLE=%d\n", GetLastError()); myleave(384); }
// get query string
if (pszTemp = strchr(pszDecodedURL, '?')) { m_pszQueryString = MySzDupA(pszTemp+1); *pszTemp = 0; }
// Searching for an embedded ISAPI dll name, ie /wwww/isapi.dll/a/b.
// We load the file /www/isapi.dll and set PATH_INFO to /a/b
// Emebbed ASP file names are handled similiarly.
if (g_pVars->m_fExtensions) { if (pszTemp = strstr(pszDecodedURL,".dll/")) { m_pszPathInfo = MySzDupA(pszTemp + sizeof(".dll/") - 2); pszTemp[sizeof(".dll/") - 2] = 0; } else if (pszTemp = strstr(pszDecodedURL,".asp/")) { m_pszPathInfo = MySzDupA(pszTemp + sizeof(".asp/") - 2); pszTemp[sizeof(".asp/") - 2] = 0; } }
// save a copy of the cleaned up URL (MINUS query!)
// SPECIAL HACK: alloc one extra char in case we have to send a redirect back (see request.cpp)
iLen2 = strlen(pszDecodedURL); m_pszURL = MySzAllocA(1+iLen2); Nstrcpy(m_pszURL, pszDecodedURL, iLen2); // copy null-term too.
ret = TRUE; done: MyFree(pszDecodedURL); MyFree(pszPartiallyDecodedURL); TraceTag(ttidWebServer, "end MyCrackURL(%s) path=%s ext=%s query=%s (iGLE=%d iErr=%d)\r\n", pszRawURL, m_wszPath, m_wszExt, m_pszQueryString, GLE(err), err);
return ret;
BOOL CHttpRequest::ParseContentLength(PCSTR pszMethod, TOKEN id) { PSTR pszTok = 0; int iLen = 0;
// get length (first token after "Content-Type;")
if (m_bufRequest.NextTokenWS(&pszTok, &iLen) && pszTok && iLen) { m_dwContentLength = atoi(pszTok); }
return TRUE;
BOOL CHttpRequest::ParseCookie(PCSTR pszMethod, TOKEN id) { PSTR pszTok = 0; int iLen = 0;
// get cookie (upto \r\n after "Cookies;")
if (m_bufRequest.NextTokenEOL(&pszTok, &iLen) && pszTok && iLen) { m_pszCookie = MySzDupA(pszTok); } return TRUE;
BOOL CHttpRequest::ParseAccept(PCSTR pszMethod, TOKEN id) { PSTR pszTok = 0; int iLen = 0;
// get cookie (upto \r\n after "Cookies;")
if (m_bufRequest.NextTokenEOL(&pszTok, &iLen) && pszTok && iLen) { m_pszAccept = MySzDupA(pszTok); } return TRUE;
BOOL CHttpRequest::ParseContentType(PCSTR pszMethod, TOKEN id) { PSTR pszTok = 0; int iLen = 0;
// get type (first token after "Content-Type;")
if (m_bufRequest.NextTokenWS(&pszTok, &iLen) && pszTok && iLen) { m_pszContentType = MySzDupA(pszTok); } return TRUE; }
const char cszDateParseFmt[] = " %*3s, %02hd %3s %04hd %02hd:%02hd:%02hd GMT; length=%d";
BOOL CHttpRequest::ParseIfModifiedSince(PCSTR pszMethod, TOKEN id) { PSTR pszTok = 0; int iLen = 0; int i = 0; char szMonth[10]; SYSTEMTIME st; ZEROMEM(&st);
// get the date (rest of line after If-Modified-Since)
// BUGBUG: Note we are handling only one date format (the "reccomended" one)
if (m_bufRequest.NextTokenEOL(&pszTok, &iLen) && pszTok && iLen) { // i = sscanf(pszTok, cszDateParseFmt, &st.wDay, &szMonth, &st.wYear, &st.wHour, &st.wMinute, &st.wSecond, &m_dwIfModifiedLength);
if ( SetHTTPDate(pszTok,szMonth,&st,&m_dwIfModifiedLength)) { // try to match month
for (i=0; rgMonth[i]; i++) { if (0==strcmpi(szMonth, rgMonth[i])) { st.wMonth = (WORD)i; // convert to filetime & store
SystemTimeToFileTime(&st, &m_ftIfModifiedSince); return TRUE; } } } TraceTag(ttidWebServer, "Failed to parse If-Modified-Since(%s) Parsed: day=%02d month=%s(%d) year=%04d time=%02d:%02d:%02d len=%d\r\n", pszTok, st.wDay, szMonth, i, st.wYear, st.wHour, st.wMinute, st.wSecond, m_dwIfModifiedLength); } return FALSE; }
// Note: No filter calls to SF_NOTIFY_AUTHENT in this fcn
BOOL CHttpRequest::ParseAuthorization(PCSTR pszMethod, TOKEN id) { DEBUG_CODE_INIT; BOOL ret = FALSE; PSTR pszTok=0; int iLen=0;
// get the auth scheme (first token after "Authorization;")
if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen) myretleave(FALSE, 91);
m_pszAuthType = MySzDupA(pszTok);
if (g_pVars->m_fBasicAuth && 0==strcmpi(pszTok, cszBasic)) { // get the scheme auth data (second token) [NOTE: cant get 2 tokens at once!!]
if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen) myretleave(FALSE, 92);
if (!HandleBasicAuth(pszTok, &m_pszRemoteUser, &m_pszPassword, &m_AuthLevelGranted, &m_NTLMState,m_wszVRootUserList)) myretleave(TRUE, 93);
TraceTag(ttidWebServer, "Basic Auth SUCCESS"); m_dwAuthFlags |= m_AuthLevelGranted; ret = TRUE; }
else if (g_pVars->m_fNTLMAuth && 0==strcmpi(pszTok, cszNTLM)) { // get the scheme auth data (second token) [NOTE: cant get 2 tokens at once!!]
if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen) myretleave(FALSE, 95);
if (!HandleNTLMAuth(pszTok)) myretleave(TRUE, 96);
TraceTag(ttidWebServer, "NTLM Auth SUCCESS"); ret = TRUE; }
// We read in this data anyway. A filter could theoretically set an Access-denied
// even if neither NTLM or basic weren't set. AuthenticateFilter will handle
// this data later in that case.
// We store data in m_pszRawRemoteUser because it hasn't been Base64 decoded yet
else { // get the scheme auth data (second token) [NOTE: cant get 2 tokens at once!!]
if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen) myretleave(FALSE, 97);
m_pszRawRemoteUser = MySzDupA(pszTok); if (NULL == m_pszRemoteUser) myretleave(FALSE, 98);
TraceTag(ttidWebServer, "Unknown authorization type requested OR requested type not enabled"); }
done: TraceTag(ttidWebServer, "Auth FAILED (err=%d ret=%d)", err, ret);
return ret; }
BOOL CHttpRequest::ParseConnection(PCSTR pszMethod, TOKEN id) { PSTR pszTok = 0; int iLen = 0;
// get first token after "Connnection;"
if (m_bufRequest.NextTokenWS(&pszTok, &iLen) && pszTok && iLen) { if (0==strcmpi(pszTok, cszKeepAlive)) m_fKeepAlive = TRUE; } return TRUE; }