Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

508 lines
15 KiB

  1. /*--
  2. Copyright (c) 1995-1998 Microsoft Corporation
  3. Module Name: PARSER.CPP
  4. Author: Arul Menezes
  5. Abstract: HTTP request parser
  6. --*/
  7. #include "pch.h"
  8. #pragma hdrstop
  9. #include "httpd.h"
  10. // This could be written as a state-machine parser, but for now I'm
  11. // keeping it simple and slow :-(
  12. // Methods
  13. const char cszGET[] = "GET";
  14. const char cszHEAD[] = "HEAD";
  15. const char cszPOST[] = "POST";
  16. // General headers
  17. const char cszConnection[] = "Connection:";
  18. //const char cszDate[] = "Date:";
  19. //const char cszPragma[] = "Pragma:";
  20. // Request headers
  21. const char cszAuthorization[] = "Authorization:";
  22. const char cszIfModifiedSince[] = "If-Modified-Since:";
  23. //const char cszReferer[] = "Referer:";
  24. //const char cszUserAgent[] = "User-Agent:";
  25. const char cszCookie[] = "Cookie:";
  26. const char cszAccept[] = "Accept:";
  27. // Entity Headers
  28. const char cszContentLength[] = "Content-Length:";
  29. const char cszContentType[] = "Content-Type:";
  30. // other Header tokens
  31. // const char cszHTTPVER[] = "HTTP/%d.%d"; //
  32. const char cszHTTPVER[] = "HTTP/";
  33. const char cszBasic[] = "Basic";
  34. const char cszNTLM[] = "NTLM";
  35. #define PFNPARSE(x) &(CHttpRequest::Parse##x)
  36. #define TABLEENTRY(csz, id, pfn) { csz, sizeof(csz)-1, id, PFNPARSE(pfn) }
  37. #define AUTH_FILTER_DONE 0x1000 // no more filter calls to SF_AUTH after the 1st one in a session
  38. typedef (CHttpRequest::*PFNPARSEPROC)(PCSTR pszTok, TOKEN idHeader);
  39. typedef struct tagHeaderDesc
  40. {
  41. const char* sz;
  42. int iLen;
  43. TOKEN id;
  44. PFNPARSEPROC pfn;
  45. } HEADERDESC;
  46. const HEADERDESC rgHeaders[] =
  47. {
  48. //{ cszGET, sizeof(cszGET), TOK_GET, &CHttpRequest::ParseMethod },
  49. // Methods
  50. // TABLEENTRY(cszGET, TOK_GET, Method),
  51. // TABLEENTRY(cszHEAD, TOK_HEAD, Method),
  52. // TABLEENTRY(cszPOST, TOK_POST, Method),
  53. // General headers
  54. TABLEENTRY(cszConnection, TOK_CONNECTION, Connection),
  55. //TABLEENTRY(cszDate, TOK_DATE, Date),
  56. //TABLEENTRY(cszPragma, TOK_PRAGMA, Pragma),
  57. // Request headers
  58. TABLEENTRY(cszCookie, TOK_COOKIE, Cookie),
  59. TABLEENTRY(cszAccept, TOK_ACCEPT, Accept),
  60. //TABLEENTRY(cszReferer, TOK_REFERER Referer),
  61. //TABLEENTRY(cszUserAgent,TOK_UAGENT, UserAgent),
  62. TABLEENTRY(cszAuthorization, TOK_AUTH, Authorization),
  63. TABLEENTRY(cszIfModifiedSince,TOK_IFMOD, IfModifiedSince),
  64. // Entity Headers
  65. //TABLEENTRY(cszContentEncoding, TOK_ENCODING Encoding),
  66. TABLEENTRY(cszContentType, TOK_TYPE, ContentType),
  67. TABLEENTRY(cszContentLength,TOK_LENGTH, ContentLength),
  68. { 0, 0, (TOKEN)0, 0}
  69. };
  70. // Parse all the headers, line by line
  71. BOOL CHttpRequest::ParseHeaders()
  72. {
  73. DEBUG_CODE_INIT;
  74. PSTR pszTok;
  75. PWSTR pwszTemp;
  76. PSTR pszPathInfo = NULL;
  77. int i, iLen;
  78. BOOL ret = FALSE;
  79. if (!m_bufRequest.NextTokenWS(&pszTok, &iLen))
  80. {
  81. m_rs = STATUS_BADREQ;
  82. myleave(287);
  83. }
  84. if (! ParseMethod(pszTok,iLen))
  85. {
  86. m_rs = STATUS_BADREQ;
  87. myleave(288);
  88. }
  89. if (!m_bufRequest.NextLine())
  90. {
  91. m_rs = STATUS_BADREQ;
  92. myleave(290);
  93. }
  94. // outer-loop. one header per iteration
  95. while (m_bufRequest.NextTokenColon(&pszTok, &iLen))
  96. {
  97. // compare token with tokens in table
  98. for (i=0; rgHeaders[i].sz; i++)
  99. {
  100. //TraceTag(ttidWebServer, "Comparing %s %d %d", rgHeaders[i].sz, rgHeaders[i].iLen, rgHeaders[i].pfn);
  101. if ( (rgHeaders[i].iLen == iLen) &&
  102. 0==_memicmp(rgHeaders[i].sz, pszTok, iLen) )
  103. break;
  104. }
  105. if (rgHeaders[i].pfn)
  106. {
  107. TraceTag(ttidWebServer, "Parsing %s", rgHeaders[i].sz);
  108. // call the specific function to parse this header.
  109. if (! ((this->*(rgHeaders[i].pfn))(pszTok, rgHeaders[i].id)) )
  110. {
  111. TraceTag(ttidWebServer, "Parser: failed to parse %s -- IGNORING", rgHeaders[i].sz);
  112. }
  113. }
  114. else
  115. {
  116. TraceTag(ttidWebServer, "Ignoring header %s", pszTok);
  117. }
  118. if (!m_bufRequest.NextLine())
  119. {
  120. m_rs = STATUS_BADREQ;
  121. myleave(290);
  122. }
  123. }
  124. if (!m_bufRequest.NextLine()) // eat the blank line
  125. {
  126. m_rs = STATUS_BADREQ;
  127. myleave(290);
  128. }
  129. TraceTag(ttidWebServer, "Parser: DONE");
  130. // check what we got
  131. if (!m_pszMethod || !m_idMethod)
  132. {
  133. TraceTag(ttidWebServer, "Parser: missing URL or method, illformatted Request-line");
  134. m_rs = STATUS_BADREQ;
  135. myleave(291);
  136. }
  137. // Once we've read the request line, give filter shot at modifying the
  138. // remaining headers.
  139. if (g_pVars->m_fFilters &&
  140. ! CallFilter(SF_NOTIFY_PREPROC_HEADERS))
  141. myleave(292);
  142. m_wszPath = g_pVars->m_pVroots->URLAtoPathW(m_pszURL, &m_dwPermissions, &m_AuthLevelReqd,&m_VRootScriptType,&m_pszPathInfo,&m_wszVRootUserList);
  143. if (g_pVars->m_fFilters &&
  144. ! CallFilter(SF_NOTIFY_URL_MAP))
  145. myleave(293);
  146. // get extension
  147. if (m_wszPath && (pwszTemp = wcsrchr(m_wszPath, '.')))
  148. m_wszExt = MySzDupW(pwszTemp);
  149. // As per the docs, the filter gets ONLY 1 call per session to notify
  150. // it of this event. m_dwAuthFlags is remembered from session to session.
  151. // Like IIS, it always is called, even if Vroots is AUTH_PUBLIC already and
  152. // even if no security has been enabled.
  153. if ( g_pVars->m_fFilters && ! (m_dwAuthFlags & AUTH_FILTER_DONE))
  154. {
  155. if ( ! AuthenticateFilter())
  156. myleave(294);
  157. }
  158. m_dwAuthFlags |= AUTH_FILTER_DONE;
  159. ret = TRUE;
  160. done:
  161. TraceTag(ttidWebServer, "Parse headers failed, err = %d",err);
  162. return ret;
  163. }
  164. BOOL CHttpRequest::ParseMethod(PCSTR pszMethod, int cbMethod)
  165. {
  166. DEBUG_CODE_INIT;
  167. PSTR pszTok, pszTok2;
  168. int iLen;
  169. BOOL ret;
  170. // save method
  171. m_pszMethod = MySzDupA(pszMethod);
  172. if (0 == memcmp(cszGET,pszMethod,cbMethod))
  173. m_idMethod = TOK_GET;
  174. else if (0 == memcmp(cszHEAD,pszMethod,cbMethod))
  175. m_idMethod = TOK_HEAD;
  176. else if (0 == memcmp(cszPOST,pszMethod,cbMethod))
  177. m_idMethod = TOK_POST;
  178. else
  179. m_idMethod = TOK_UNKNOWN_VERB;
  180. // get URL and HTTP/x.y together (allows for spaces in URL like Netscape sends)
  181. if (!m_bufRequest.NextTokenEOL(&pszTok, &iLen))
  182. myretleave(FALSE, 201);
  183. // seperate out the HTTP/x.y
  184. if (pszTok2 = strrchr(pszTok, ' '))
  185. {
  186. *pszTok2 = 0;
  187. iLen = (INT)((INT_PTR)(pszTok2-pszTok));
  188. pszTok2++;
  189. }
  190. // clean up & parse the URL
  191. MyCrackURL(pszTok, iLen);
  192. // get version (optional. HTTP 0.9 wont have this)
  193. if (!pszTok2)
  194. m_dwVersion = MAKELONG(9, 0);
  195. else
  196. {
  197. // int iMajor, iMinor;
  198. // sscanf(pszTok2, cszHTTPVER, &iMajor, &iMinor);
  199. // m_dwVersion = MAKELONG(iMinor, iMajor);
  200. SetHTTPVersion(pszTok2, &m_dwVersion);
  201. pszTok2[-1] = ' '; // reset this to a space
  202. }
  203. ret = TRUE;
  204. done:
  205. TraceTag(ttidWebServer, "end ParseMethod (iGLE=%d iErr=%d)", GLE(err),err);
  206. return ret;
  207. }
  208. // We assume a raw URL in the form that we receive in the HTTP headers (no scheme, port number etc)
  209. // We extract the path, extra-path, and query
  210. BOOL CHttpRequest::MyCrackURL(PSTR pszRawURL, int iLen)
  211. {
  212. DEBUG_CODE_INIT;
  213. BOOL ret = FALSE;
  214. PSTR pszDecodedURL=0, pszTemp=0, pszPartiallyDecodedURL=0;
  215. int iLen2;
  216. DWORD cchDecodedURL = iLen + 1; // including the NULL terminator
  217. DWORD cchPartiallyDecodedURL = iLen + 1;
  218. // decode URL (convert escape sequences etc)
  219. if (NULL == (pszPartiallyDecodedURL = MyRgAllocNZ(CHAR, cchDecodedURL)))
  220. myleave(382);
  221. if (NULL == (pszDecodedURL = MyRgAllocNZ(CHAR, cchPartiallyDecodedURL)))
  222. myleave(382);
  223. // BUG FIX 393235 - When InternetCanonicalizeUrlA() is told to decode a URL and process the meta
  224. // directories, it does them in the wrong order. Passing it:
  225. // http://localhost:2869/upnphost/%2e./%2e./%2e./%2e./%2e./%2e./boot.ini
  226. // results in:
  227. // http://localhost:2869/upnphost/../../../../../../boot.ini
  228. // which is clearly not safe. To work around this, we call it twice - once to decode the URL, and
  229. // a second time to process the meta directories.
  230. // First, decode the URL
  231. if (!InternetCanonicalizeUrlA(pszRawURL,
  232. pszPartiallyDecodedURL,
  233. (DWORD*)&cchPartiallyDecodedURL,
  234. ICU_NO_ENCODE | ICU_DECODE | ICU_BROWSER_MODE | ICU_NO_META))
  235. {
  236. TraceTag(ttidWebServer, "CHttpRequest::MyCrackURL - InternetCanonicalizeUrlA failed with GLE=%d\n", GetLastError());
  237. myleave(383);
  238. }
  239. // Second, process the meta directories
  240. if (!InternetCanonicalizeUrlA(pszPartiallyDecodedURL,
  241. pszDecodedURL,
  242. (DWORD*)&cchDecodedURL,
  243. ICU_NO_ENCODE | ICU_BROWSER_MODE))
  244. {
  245. TraceTag(ttidWebServer, "CHttpRequest::MyCrackURL - InternetCanonicalizeUrlA failed with GLE=%d\n", GetLastError());
  246. myleave(384);
  247. }
  248. // get query string
  249. if (pszTemp = strchr(pszDecodedURL, '?'))
  250. {
  251. m_pszQueryString = MySzDupA(pszTemp+1);
  252. *pszTemp = 0;
  253. }
  254. // Searching for an embedded ISAPI dll name, ie /wwww/isapi.dll/a/b.
  255. // We load the file /www/isapi.dll and set PATH_INFO to /a/b
  256. // Emebbed ASP file names are handled similiarly.
  257. if (g_pVars->m_fExtensions)
  258. {
  259. if (pszTemp = strstr(pszDecodedURL,".dll/"))
  260. {
  261. m_pszPathInfo = MySzDupA(pszTemp + sizeof(".dll/") - 2);
  262. pszTemp[sizeof(".dll/") - 2] = 0;
  263. }
  264. else if (pszTemp = strstr(pszDecodedURL,".asp/"))
  265. {
  266. m_pszPathInfo = MySzDupA(pszTemp + sizeof(".asp/") - 2);
  267. pszTemp[sizeof(".asp/") - 2] = 0;
  268. }
  269. }
  270. // save a copy of the cleaned up URL (MINUS query!)
  271. // SPECIAL HACK: alloc one extra char in case we have to send a redirect back (see request.cpp)
  272. iLen2 = strlen(pszDecodedURL);
  273. m_pszURL = MySzAllocA(1+iLen2);
  274. Nstrcpy(m_pszURL, pszDecodedURL, iLen2); // copy null-term too.
  275. ret = TRUE;
  276. done:
  277. MyFree(pszDecodedURL);
  278. MyFree(pszPartiallyDecodedURL);
  279. TraceTag(ttidWebServer, "end MyCrackURL(%s) path=%s ext=%s query=%s (iGLE=%d iErr=%d)\r\n",
  280. pszRawURL, m_wszPath, m_wszExt, m_pszQueryString, GLE(err), err);
  281. return ret;
  282. }
  283. BOOL CHttpRequest::ParseContentLength(PCSTR pszMethod, TOKEN id)
  284. {
  285. PSTR pszTok = 0;
  286. int iLen = 0;
  287. // get length (first token after "Content-Type;")
  288. if (m_bufRequest.NextTokenWS(&pszTok, &iLen) && pszTok && iLen)
  289. {
  290. m_dwContentLength = atoi(pszTok);
  291. }
  292. return TRUE;
  293. }
  294. BOOL CHttpRequest::ParseCookie(PCSTR pszMethod, TOKEN id)
  295. {
  296. PSTR pszTok = 0;
  297. int iLen = 0;
  298. // get cookie (upto \r\n after "Cookies;")
  299. if (m_bufRequest.NextTokenEOL(&pszTok, &iLen) && pszTok && iLen)
  300. {
  301. m_pszCookie = MySzDupA(pszTok);
  302. }
  303. return TRUE;
  304. }
  305. BOOL CHttpRequest::ParseAccept(PCSTR pszMethod, TOKEN id)
  306. {
  307. PSTR pszTok = 0;
  308. int iLen = 0;
  309. // get cookie (upto \r\n after "Cookies;")
  310. if (m_bufRequest.NextTokenEOL(&pszTok, &iLen) && pszTok && iLen)
  311. {
  312. m_pszAccept = MySzDupA(pszTok);
  313. }
  314. return TRUE;
  315. }
  316. BOOL CHttpRequest::ParseContentType(PCSTR pszMethod, TOKEN id)
  317. {
  318. PSTR pszTok = 0;
  319. int iLen = 0;
  320. // get type (first token after "Content-Type;")
  321. if (m_bufRequest.NextTokenWS(&pszTok, &iLen) && pszTok && iLen)
  322. {
  323. m_pszContentType = MySzDupA(pszTok);
  324. }
  325. return TRUE;
  326. }
  327. const char cszDateParseFmt[] = " %*3s, %02hd %3s %04hd %02hd:%02hd:%02hd GMT; length=%d";
  328. BOOL CHttpRequest::ParseIfModifiedSince(PCSTR pszMethod, TOKEN id)
  329. {
  330. PSTR pszTok = 0;
  331. int iLen = 0;
  332. int i = 0;
  333. char szMonth[10];
  334. SYSTEMTIME st;
  335. ZEROMEM(&st);
  336. // get the date (rest of line after If-Modified-Since)
  337. // BUGBUG: Note we are handling only one date format (the "reccomended" one)
  338. if (m_bufRequest.NextTokenEOL(&pszTok, &iLen) && pszTok && iLen)
  339. {
  340. // i = sscanf(pszTok, cszDateParseFmt, &st.wDay, &szMonth, &st.wYear, &st.wHour, &st.wMinute, &st.wSecond, &m_dwIfModifiedLength);
  341. if ( SetHTTPDate(pszTok,szMonth,&st,&m_dwIfModifiedLength))
  342. {
  343. // try to match month
  344. for (i=0; rgMonth[i]; i++)
  345. {
  346. if (0==strcmpi(szMonth, rgMonth[i]))
  347. {
  348. st.wMonth = (WORD)i;
  349. // convert to filetime & store
  350. SystemTimeToFileTime(&st, &m_ftIfModifiedSince);
  351. return TRUE;
  352. }
  353. }
  354. }
  355. TraceTag(ttidWebServer, "Failed to parse If-Modified-Since(%s) Parsed: day=%02d month=%s(%d) year=%04d time=%02d:%02d:%02d len=%d\r\n",
  356. pszTok, st.wDay, szMonth, i, st.wYear, st.wHour, st.wMinute, st.wSecond, m_dwIfModifiedLength);
  357. }
  358. return FALSE;
  359. }
  360. // Note: No filter calls to SF_NOTIFY_AUTHENT in this fcn
  361. BOOL CHttpRequest::ParseAuthorization(PCSTR pszMethod, TOKEN id)
  362. {
  363. DEBUG_CODE_INIT;
  364. BOOL ret = FALSE;
  365. PSTR pszTok=0;
  366. int iLen=0;
  367. // get the auth scheme (first token after "Authorization;")
  368. if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen)
  369. myretleave(FALSE, 91);
  370. m_pszAuthType = MySzDupA(pszTok);
  371. if (g_pVars->m_fBasicAuth && 0==strcmpi(pszTok, cszBasic))
  372. {
  373. // get the scheme auth data (second token) [NOTE: cant get 2 tokens at once!!]
  374. if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen)
  375. myretleave(FALSE, 92);
  376. if (!HandleBasicAuth(pszTok, &m_pszRemoteUser, &m_pszPassword,
  377. &m_AuthLevelGranted, &m_NTLMState,m_wszVRootUserList))
  378. myretleave(TRUE, 93);
  379. TraceTag(ttidWebServer, "Basic Auth SUCCESS");
  380. m_dwAuthFlags |= m_AuthLevelGranted;
  381. ret = TRUE;
  382. }
  383. else if (g_pVars->m_fNTLMAuth && 0==strcmpi(pszTok, cszNTLM))
  384. {
  385. // get the scheme auth data (second token) [NOTE: cant get 2 tokens at once!!]
  386. if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen)
  387. myretleave(FALSE, 95);
  388. if (!HandleNTLMAuth(pszTok))
  389. myretleave(TRUE, 96);
  390. TraceTag(ttidWebServer, "NTLM Auth SUCCESS");
  391. ret = TRUE;
  392. }
  393. // We read in this data anyway. A filter could theoretically set an Access-denied
  394. // even if neither NTLM or basic weren't set. AuthenticateFilter will handle
  395. // this data later in that case.
  396. // We store data in m_pszRawRemoteUser because it hasn't been Base64 decoded yet
  397. else
  398. {
  399. // get the scheme auth data (second token) [NOTE: cant get 2 tokens at once!!]
  400. if (!m_bufRequest.NextTokenWS(&pszTok, &iLen) || !pszTok || !iLen)
  401. myretleave(FALSE, 97);
  402. m_pszRawRemoteUser = MySzDupA(pszTok);
  403. if (NULL == m_pszRemoteUser)
  404. myretleave(FALSE, 98);
  405. TraceTag(ttidWebServer, "Unknown authorization type requested OR requested type not enabled");
  406. }
  407. done:
  408. TraceTag(ttidWebServer, "Auth FAILED (err=%d ret=%d)", err, ret);
  409. return ret;
  410. }
  411. BOOL CHttpRequest::ParseConnection(PCSTR pszMethod, TOKEN id)
  412. {
  413. PSTR pszTok = 0;
  414. int iLen = 0;
  415. // get first token after "Connnection;"
  416. if (m_bufRequest.NextTokenWS(&pszTok, &iLen) && pszTok && iLen)
  417. {
  418. if (0==strcmpi(pszTok, cszKeepAlive))
  419. m_fKeepAlive = TRUE;
  420. }
  421. return TRUE;
  422. }