Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

674 lines
16 KiB

  1. /*++
  2. Copyright (c) 1998-2001 Microsoft Corporation
  3. Module Name:
  4. parsep.h
  5. Abstract:
  6. Contains all of the kernel mode HTTP parsing code.
  7. Author:
  8. Henry Sanders (henrysa) 04-May-1998
  9. Revision History:
  10. --*/
  11. #ifndef _PARSEP_H_
  12. #define _PARSEP_H_
  13. #ifdef __cplusplus
  14. extern "C" {
  15. #endif
  16. //
  17. // External variables.
  18. //
  19. extern PUSHORT NlsLeadByteInfo;
  20. //
  21. // Constants
  22. //
  23. #define MIN_VERSION_SIZE (sizeof("HTTP/1.1") - 1)
  24. #define MAX_VERB_LENGTH (sizeof("PROPPATCH"))
  25. #define HTTP_11_VERSION 0x312e312f50545448
  26. #define HTTP_10_VERSION 0x302e312f50545448
  27. #define UPCASE_MASK ((ULONGLONG)0xdfdfdfdfdfdfdfdf)
  28. #define MAX_HEADER_LONG_COUNT (3)
  29. #define MAX_HEADER_LENGTH (MAX_HEADER_LONG_COUNT * sizeof(ULONGLONG))
  30. #define NUMBER_HEADER_INDICIES (26)
  31. #define NUMBER_HEADER_HINT_INDICIES (8)
  32. //
  33. // Default Server: header if none provided by the application.
  34. //
  35. #define DEFAULT_SERVER_HDR "Microsoft-IIS/6.0"
  36. #define DEFAULT_SERVER_HDR_LENGTH (sizeof(DEFAULT_SERVER_HDR) - sizeof(CHAR))
  37. //
  38. // One second in 100ns system time units. Used for generating
  39. // Date: headers.
  40. //
  41. #define ONE_SECOND 10000000
  42. //
  43. // Size of Connection: header values
  44. //
  45. #define CONN_CLOSE_HDR "close"
  46. #define CONN_CLOSE_HDR_LENGTH (sizeof(CONN_CLOSE_HDR) - sizeof(CHAR))
  47. #define CONN_KEEPALIVE_HDR "keep-alive"
  48. #define CONN_KEEPALIVE_HDR_LENGTH (sizeof(CONN_KEEPALIVE_HDR) - sizeof(CHAR))
  49. //
  50. // These are backwards because of little endian.
  51. //
  52. #define HTTP_PREFIX 'PTTH'
  53. #define HTTP_PREFIX_SIZE 4
  54. #define HTTP_PREFIX_MASK 0xdfdfdfdf
  55. #define HTTP_PREFIX1 '\0//:'
  56. #define HTTP_PREFIX1_SIZE 3
  57. #define HTTP_PREFIX1_MASK 0x00ffffff
  58. #define HTTP_PREFIX2 '//:S'
  59. #define HTTP_PREFIX2_SIZE 4
  60. #define HTTP_PREFIX2_MASK 0xffffffdf
  61. typedef NTSTATUS (*PFN_SERVER_HEADER_HANDLER)(
  62. PUL_INTERNAL_REQUEST pRequest,
  63. PUCHAR pHttpRequest,
  64. ULONG HttpRequestLength,
  65. HTTP_HEADER_ID HeaderID,
  66. ULONG * pBytesTaken
  67. );
  68. typedef NTSTATUS (*PFN_CLIENT_HEADER_HANDLER)(
  69. PHTTP_KNOWN_HEADER pKnownHeaders,
  70. PUCHAR *pOutBufferHead,
  71. PUCHAR *pOutBufferTail,
  72. PULONG BytesAvailable,
  73. PUCHAR pHeader,
  74. ULONG HeaderLength,
  75. HTTP_HEADER_ID HeaderID,
  76. ULONG * pBytesTaken
  77. );
  78. //
  79. // Structure of the fast verb lookup table. The table consists of a series of
  80. // entries where each entry contains an HTTP verb represented as a ulonglong,
  81. // a mask to use for comparing that verb, the length of the verb and the
  82. // translated id.
  83. //
  84. typedef struct _FAST_VERB_ENTRY
  85. {
  86. union
  87. {
  88. UCHAR Char[sizeof(ULONGLONG)+1];
  89. ULONGLONG LongLong;
  90. } RawVerb;
  91. ULONGLONG RawVerbMask;
  92. ULONG RawVerbLength;
  93. HTTP_VERB TranslatedVerb;
  94. } FAST_VERB_ENTRY, *PFAST_VERB_ENTRY;
  95. //
  96. // Stucture of the all verb lookup table. This table holds all verbs that
  97. // we understand, including those that are too long to fit in the fast
  98. // verb table.
  99. //
  100. typedef struct _LONG_VERB_ENTRY
  101. {
  102. ULONG RawVerbLength;
  103. UCHAR RawVerb[MAX_VERB_LENGTH];
  104. HTTP_VERB TranslatedVerb;
  105. } LONG_VERB_ENTRY, *PLONG_VERB_ENTRY;
  106. //
  107. // Structure for a header map entry. Each header map entry contains a
  108. // verb and a series of masks to use in checking that verb.
  109. //
  110. typedef struct _HEADER_MAP_ENTRY
  111. {
  112. ULONG HeaderLength;
  113. ULONG ArrayCount;
  114. ULONG MinBytesNeeded;
  115. union
  116. {
  117. UCHAR HeaderChar[MAX_HEADER_LENGTH];
  118. ULONGLONG HeaderLong[MAX_HEADER_LONG_COUNT];
  119. } Header;
  120. ULONGLONG HeaderMask[MAX_HEADER_LONG_COUNT];
  121. UCHAR MixedCaseHeader[MAX_HEADER_LENGTH];
  122. HTTP_HEADER_ID HeaderID;
  123. BOOLEAN AutoGenerate;
  124. PFN_SERVER_HEADER_HANDLER pServerHandler;
  125. PFN_CLIENT_HEADER_HANDLER pClientHandler;
  126. LONG HintIndex;
  127. } HEADER_MAP_ENTRY, *PHEADER_MAP_ENTRY;
  128. //
  129. // Structure for a header index table entry.
  130. //
  131. typedef struct _HEADER_INDEX_ENTRY
  132. {
  133. PHEADER_MAP_ENTRY pHeaderMap;
  134. ULONG Count;
  135. } HEADER_INDEX_ENTRY, *PHEADER_INDEX_ENTRY;
  136. //
  137. // Structure for a header hint index table entry.
  138. //
  139. typedef struct _HEADER_HINT_INDEX_ENTRY
  140. {
  141. PHEADER_MAP_ENTRY pHeaderMap;
  142. UCHAR c;
  143. } HEADER_HINT_INDEX_ENTRY, *PHEADER_HINT_INDEX_ENTRY, **PPHEADER_HINT_INDEX_ENTRY;
  144. //
  145. // A (complex) macro to create a mask for a header map entry,
  146. // given the header length and the mask offset (in bytes). This
  147. // mask will need to be touched up for non-alphabetic characters.
  148. //
  149. #define CREATE_HEADER_MASK(hlength, maskoffset) \
  150. ((hlength) > (maskoffset) ? UPCASE_MASK : \
  151. (((maskoffset) - (hlength)) >= 8 ? 0 : \
  152. (UPCASE_MASK >> ( ((maskoffset) - (hlength)) * (ULONGLONG)8))))
  153. //
  154. // Macro for creating header map entries. The mask entries are created
  155. // by the init code.
  156. //
  157. #define CREATE_HEADER_MAP_ENTRY(header, ID, auto, serverhandler, clienthandler, HintIndex)\
  158. { \
  159. \
  160. sizeof(#header) - 1, \
  161. ((sizeof(#header) - 1) / 8) + \
  162. (((sizeof(#header) - 1) % 8) == 0 ? 0 : 1), \
  163. (((sizeof(#header) - 1) / 8) + \
  164. (((sizeof(#header) - 1) % 8) == 0 ? 0 : 1)) * 8, \
  165. { #header }, \
  166. { 0, 0, 0}, \
  167. { #header }, \
  168. ID, \
  169. auto, \
  170. serverhandler, \
  171. clienthandler, \
  172. HintIndex \
  173. }
  174. //
  175. // Macro for defining fast verb table entries. Note that we don't subtrace 1
  176. // from the various sizeof occurences because we'd just have to add it back
  177. // in to account for the seperating space.
  178. //
  179. #define CREATE_FAST_VERB_ENTRY(verb) { {#verb " "}, \
  180. (0xffffffffffffffff >> \
  181. ((8 - (sizeof(#verb))) * 8)), \
  182. (sizeof(#verb)), HttpVerb##verb }
  183. //
  184. // Macro for defining all verb table entries.
  185. //
  186. #define CREATE_LONG_VERB_ENTRY(verb) { sizeof(#verb) - 1, \
  187. #verb,\
  188. HttpVerb##verb }
  189. #define IS_UTF8_TRAILBYTE(ch) (((ch) & 0xc0) == 0x80)
  190. NTSTATUS
  191. CheckForAbsoluteUrl(
  192. IN PUL_INTERNAL_REQUEST pRequest,
  193. IN PUCHAR pURL,
  194. IN ULONG URLLength,
  195. IN PUCHAR * pHostPtr,
  196. IN ULONG * BytesTaken
  197. );
  198. NTSTATUS
  199. LookupVerb(
  200. IN PUL_INTERNAL_REQUEST pRequest,
  201. IN PUCHAR pHttpRequest,
  202. IN ULONG HttpRequestLength,
  203. OUT ULONG * pBytesTaken
  204. );
  205. NTSTATUS
  206. UlParseHeaderWithHint(
  207. IN PUL_INTERNAL_REQUEST pRequest,
  208. IN PUCHAR pHttpRequest,
  209. IN ULONG HttpRequestLength,
  210. IN PHEADER_MAP_ENTRY pHeaderHintMap,
  211. OUT ULONG * pBytesTaken
  212. );
  213. NTSTATUS
  214. UlParseHeader(
  215. IN PUL_INTERNAL_REQUEST pRequest,
  216. IN PUCHAR pHttpRequest,
  217. IN ULONG HttpRequestLength,
  218. OUT ULONG * pBytesTaken
  219. );
  220. NTSTATUS
  221. UlLookupHeader(
  222. IN PUL_INTERNAL_REQUEST pRequest,
  223. IN PUCHAR pHttpRequest,
  224. IN ULONG HttpRequestLength,
  225. IN PHEADER_MAP_ENTRY pCurrentHeaderMap,
  226. IN ULONG HeaderMapCount,
  227. OUT ULONG * pBytesTaken
  228. );
  229. typedef enum _URL_PART
  230. {
  231. Scheme,
  232. HostName,
  233. AbsPath,
  234. QueryString
  235. } URL_PART;
  236. typedef enum _URL_TYPE
  237. {
  238. UrlTypeUtf8,
  239. UrlTypeAnsi,
  240. UrlTypeDbcs
  241. } URL_TYPE;
  242. NTSTATUS
  243. UlpCleanAndCopyUrl(
  244. IN URL_PART UrlPart,
  245. IN OUT PWSTR pDestination,
  246. IN PUCHAR pSource,
  247. IN ULONG SourceLength,
  248. OUT PULONG pBytesCopied,
  249. OUT PWSTR * ppQueryString OPTIONAL,
  250. OUT PULONG pUrlHash
  251. );
  252. NTSTATUS
  253. UlpCleanAndCopyUrlByType(
  254. IN URL_TYPE UrlType,
  255. IN URL_PART UrlPart,
  256. IN OUT PWSTR pDestination,
  257. IN PUCHAR pSource,
  258. IN ULONG SourceLength,
  259. OUT PULONG pBytesCopied,
  260. OUT PWSTR * ppQueryString OPTIONAL,
  261. OUT PULONG pUrlHash
  262. );
  263. NTSTATUS
  264. Unescape(
  265. IN PUCHAR pChar,
  266. OUT PUCHAR pOutChar
  267. );
  268. //
  269. // PopChar is used only if the string is not UTF-8, or UrlPart != QueryString,
  270. // or the current character is '%' or its high bit is set. In all other cases,
  271. // the FastPopChars table is used for fast conversion.
  272. //
  273. __inline
  274. NTSTATUS
  275. FASTCALL
  276. PopChar(
  277. IN URL_TYPE UrlType,
  278. IN URL_PART UrlPart,
  279. IN PUCHAR pChar,
  280. OUT WCHAR * pUnicodeChar,
  281. OUT PULONG pCharToSkip
  282. )
  283. {
  284. NTSTATUS Status;
  285. WCHAR UnicodeChar;
  286. UCHAR Char;
  287. UCHAR Trail1;
  288. UCHAR Trail2;
  289. ULONG CharToSkip;
  290. //
  291. // Sanity check.
  292. //
  293. PAGED_CODE();
  294. //
  295. // validate it as a valid url character
  296. //
  297. if (UrlPart != QueryString)
  298. {
  299. if (IS_URL_TOKEN(pChar[0]) == FALSE)
  300. {
  301. Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
  302. UlTrace(PARSER, (
  303. "ul!PopChar(pChar = %p) first char isn't URL token\n",
  304. pChar
  305. ));
  306. goto end;
  307. }
  308. }
  309. else
  310. {
  311. //
  312. // Allow anything but linefeed in the query string.
  313. //
  314. if (pChar[0] == LF)
  315. {
  316. Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
  317. UlTrace(PARSER, (
  318. "ul!PopChar(pChar = %p) linefeed in query string\n",
  319. pChar
  320. ));
  321. goto end;
  322. }
  323. UnicodeChar = (USHORT) pChar[0];
  324. CharToSkip = 1;
  325. // skip all the decoding stuff
  326. goto slash;
  327. }
  328. //
  329. // need to unescape ?
  330. //
  331. // can't decode the query string. that would be lossy decodeing
  332. // as '=' and '&' characters might be encoded, but have meaning
  333. // to the usermode parser.
  334. //
  335. if (pChar[0] == '%')
  336. {
  337. Status = Unescape(pChar, &Char);
  338. if (NT_SUCCESS(Status) == FALSE)
  339. goto end;
  340. CharToSkip = 3;
  341. }
  342. else
  343. {
  344. Char = pChar[0];
  345. CharToSkip = 1;
  346. }
  347. if (UrlType == UrlTypeUtf8)
  348. {
  349. //
  350. // convert to unicode, checking for utf8 .
  351. //
  352. // 3 byte runs are the largest we can have. 16 bits in UCS-2 =
  353. // 3 bytes of (4+4,2+6,2+6) where it's code + char.
  354. // for a total of 6+6+4 char bits = 16 bits.
  355. //
  356. //
  357. // NOTE: we'll only bother to decode utf if it was escaped
  358. // thus the (CharToSkip == 3)
  359. //
  360. if ((CharToSkip == 3) && ((Char & 0xf0) == 0xe0))
  361. {
  362. // 3 byte run
  363. //
  364. // Unescape the next 2 trail bytes
  365. //
  366. Status = Unescape(pChar+CharToSkip, &Trail1);
  367. if (NT_SUCCESS(Status) == FALSE)
  368. goto end;
  369. CharToSkip += 3; // %xx
  370. Status = Unescape(pChar+CharToSkip, &Trail2);
  371. if (NT_SUCCESS(Status) == FALSE)
  372. goto end;
  373. CharToSkip += 3; // %xx
  374. if (IS_UTF8_TRAILBYTE(Trail1) == FALSE ||
  375. IS_UTF8_TRAILBYTE(Trail2) == FALSE)
  376. {
  377. // bad utf!
  378. //
  379. Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
  380. UlTrace(PARSER, (
  381. "ul!PopChar( 0x%x 0x%x ) bad trail bytes\n",
  382. Trail1,
  383. Trail2
  384. ));
  385. goto end;
  386. }
  387. // handle three byte case
  388. // 1110xxxx 10xxxxxx 10xxxxxx
  389. UnicodeChar = (USHORT) (((Char & 0x0f) << 12) |
  390. ((Trail1 & 0x3f) << 6) |
  391. (Trail2 & 0x3f));
  392. }
  393. else if ((CharToSkip == 3) && ((Char & 0xe0) == 0xc0))
  394. {
  395. // 2 byte run
  396. //
  397. // Unescape the next 1 trail byte
  398. //
  399. Status = Unescape(pChar+CharToSkip, &Trail1);
  400. if (NT_SUCCESS(Status) == FALSE)
  401. goto end;
  402. CharToSkip += 3; // %xx
  403. if (IS_UTF8_TRAILBYTE(Trail1) == FALSE)
  404. {
  405. // bad utf!
  406. //
  407. Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
  408. UlTrace(PARSER, (
  409. "ul!PopChar( 0x%x ) bad trail byte\n",
  410. Trail1
  411. ));
  412. goto end;
  413. }
  414. // handle two byte case
  415. // 110xxxxx 10xxxxxx
  416. UnicodeChar = (USHORT) (((Char & 0x1f) << 6) |
  417. (Trail1 & 0x3f));
  418. }
  419. // now this can either be unescaped high-bit (bad)
  420. // or escaped high-bit. (also bad)
  421. //
  422. // thus not checking CharToSkip
  423. //
  424. else if ((Char & 0x80) == 0x80)
  425. {
  426. // high bit set ! bad utf!
  427. //
  428. Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
  429. UlTrace(PARSER, (
  430. "ul!PopChar( 0x%x ) ERROR: high bit set! bad utf!\n",
  431. Char
  432. ));
  433. goto end;
  434. }
  435. //
  436. // Normal character (again either escaped or unescaped)
  437. //
  438. else
  439. {
  440. //
  441. // Simple conversion to unicode, it's 7-bit ascii.
  442. //
  443. UnicodeChar = (USHORT)Char;
  444. }
  445. }
  446. else // UrlType != UrlTypeUtf8
  447. {
  448. UCHAR AnsiChar[2];
  449. ULONG AnsiCharSize;
  450. //
  451. // Convert ANSI character to Unicode.
  452. // If the UrlType is UrlTypeDbcs, then we may have
  453. // a DBCS lead/trail pair.
  454. //
  455. if (UrlType == UrlTypeDbcs && NlsLeadByteInfo[Char])
  456. {
  457. //
  458. // This is a double-byte character.
  459. //
  460. AnsiCharSize = 2;
  461. AnsiChar[0] = Char;
  462. Status = Unescape(pChar+CharToSkip, &AnsiChar[1]);
  463. if (!NT_SUCCESS(Status))
  464. {
  465. goto end;
  466. }
  467. CharToSkip += 3; // %xx
  468. }
  469. else
  470. {
  471. //
  472. // This is a single-byte character.
  473. //
  474. AnsiCharSize = 1;
  475. AnsiChar[0] = Char;
  476. }
  477. Status = RtlMultiByteToUnicodeN(
  478. &UnicodeChar,
  479. sizeof(WCHAR),
  480. NULL,
  481. (PCHAR) &AnsiChar[0],
  482. AnsiCharSize
  483. );
  484. if (!NT_SUCCESS(Status))
  485. {
  486. goto end;
  487. }
  488. }
  489. slash:
  490. //
  491. // turn backslashes into forward slashes
  492. //
  493. if (UrlPart != QueryString && UnicodeChar == L'\\')
  494. {
  495. UnicodeChar = L'/';
  496. }
  497. else if (UnicodeChar == UNICODE_NULL)
  498. {
  499. //
  500. // we pop'd a NULL. bad!
  501. //
  502. Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
  503. goto end;
  504. }
  505. *pCharToSkip = CharToSkip;
  506. *pUnicodeChar = UnicodeChar;
  507. Status = STATUS_SUCCESS;
  508. end:
  509. return Status;
  510. } // PopChar
  511. // Call this only after the entire request has been parsed
  512. //
  513. NTSTATUS
  514. UlpCookUrl(
  515. IN PUL_INTERNAL_REQUEST pRequest
  516. );
  517. ULONG
  518. UlpParseHttpVersion(
  519. PUCHAR pString,
  520. ULONG StringLength,
  521. PHTTP_VERSION pVersion
  522. );
  523. #ifdef __cplusplus
  524. }; // extern "C"
  525. #endif
  526. #endif // _PARSEP_H_