Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4688 lines
133 KiB

  1. /*++
  2. Copyright (c) 1998-2002 Microsoft Corporation
  3. Module Name:
  4. C14n.c
  5. Abstract:
  6. URL canonicalization (c14n) routines
  7. Author:
  8. George V. Reilly (GeorgeRe) 22-Mar-2002
  9. Revision History:
  10. --*/
  11. #include <precomp.h>
  12. #include "c14np.h"
  13. #if defined(ALLOC_PRAGMA) && defined(KERNEL_PRIV)
  14. #pragma alloc_text( PAGE, HttpInitializeDefaultUrlC14nConfig)
  15. #pragma alloc_text( PAGE, HttpInitializeDefaultUrlC14nConfigEncoding)
  16. #pragma alloc_text( PAGE, HttpUnescapePercentHexEncoding)
  17. #pragma alloc_text( PAGE, HttppPopCharHostNameUtf8)
  18. #pragma alloc_text( PAGE, HttppPopCharHostNameDbcs)
  19. #pragma alloc_text( PAGE, HttppPopCharHostNameAnsi)
  20. #pragma alloc_text( PAGE, HttpCopyHost)
  21. #pragma alloc_text( PAGE, HttppCopyHostByType)
  22. #pragma alloc_text( PAGE, HttpValidateHostname)
  23. #pragma alloc_text( PAGE, HttppPopCharAbsPathUtf8)
  24. #pragma alloc_text( PAGE, HttppPopCharAbsPathDbcs)
  25. #pragma alloc_text( PAGE, HttppPopCharAbsPathAnsi)
  26. #pragma alloc_text( PAGE, HttppPopCharQueryString)
  27. #pragma alloc_text( PAGE, HttppCopyUrlByType)
  28. #pragma alloc_text( PAGE, HttpCopyUrl)
  29. #pragma alloc_text( PAGE, HttpCleanAndCopyUrl)
  30. #pragma alloc_text( PAGE, HttppCleanAndCopyUrlByType)
  31. #pragma alloc_text( PAGE, HttpFindUrlToken)
  32. #pragma alloc_text( PAGE, HttppParseIPv6Address)
  33. #pragma alloc_text( PAGE, HttppPrintIpAddressW)
  34. #pragma alloc_text( PAGE, HttpParseUrl)
  35. #pragma alloc_text( PAGE, HttpNormalizeParsedUrl)
  36. #endif // ALLOC_PRAGMA && KERNEL_PRIV
  37. #if 0 // Non-Pageable Functions
  38. NOT PAGEABLE --
  39. #endif // Non-Pageable Functions
  40. VOID
  41. HttpInitializeDefaultUrlC14nConfig(
  42. PURL_C14N_CONFIG pCfg
  43. )
  44. {
  45. PAGED_CODE();
  46. pCfg->HostnameDecodeOrder = UrlDecode_Utf8_Else_Dbcs_Else_Ansi;
  47. pCfg->AbsPathDecodeOrder = UrlDecode_Utf8;
  48. pCfg->EnableNonUtf8 = FALSE;
  49. pCfg->FavorUtf8 = FALSE;
  50. pCfg->EnableDbcs = FALSE;
  51. pCfg->PercentUAllowed = DEFAULT_C14N_PERCENT_U_ALLOWED;
  52. pCfg->AllowRestrictedChars = DEFAULT_C14N_ALLOW_RESTRICTED_CHARS;
  53. pCfg->CodePage = 0;
  54. pCfg->UrlMaxLength = DEFAULT_C14N_URL_MAX_LENGTH;
  55. pCfg->UrlSegmentMaxLength = DEFAULT_C14N_URL_SEGMENT_MAX_LENGTH;
  56. pCfg->UrlSegmentMaxCount = DEFAULT_C14N_URL_SEGMENT_MAX_COUNT;
  57. pCfg->MaxLabelLength = DEFAULT_C14N_MAX_LABEL_LENGTH;
  58. pCfg->MaxHostnameLength = DEFAULT_C14N_MAX_HOSTNAME_LENGTH;
  59. } // HttpInitializeDefaultUrlC14nConfig
  60. VOID
  61. HttpInitializeDefaultUrlC14nConfigEncoding(
  62. PURL_C14N_CONFIG pCfg,
  63. BOOLEAN EnableNonUtf8,
  64. BOOLEAN FavorUtf8,
  65. BOOLEAN EnableDbcs
  66. )
  67. {
  68. PAGED_CODE();
  69. HttpInitializeDefaultUrlC14nConfig(pCfg);
  70. pCfg->EnableNonUtf8 = EnableNonUtf8;
  71. pCfg->FavorUtf8 = FavorUtf8;
  72. pCfg->EnableDbcs = EnableDbcs;
  73. if (EnableNonUtf8)
  74. {
  75. if (FavorUtf8)
  76. {
  77. pCfg->AbsPathDecodeOrder = (EnableDbcs
  78. ? UrlDecode_Utf8_Else_Dbcs
  79. : UrlDecode_Utf8_Else_Ansi);
  80. }
  81. else
  82. {
  83. pCfg->AbsPathDecodeOrder = (EnableDbcs
  84. ? UrlDecode_Dbcs_Else_Utf8
  85. : UrlDecode_Ansi_Else_Utf8);
  86. }
  87. }
  88. else
  89. {
  90. pCfg->AbsPathDecodeOrder = UrlDecode_Utf8;
  91. }
  92. } // HttpInitializeDefaultUrlC14nConfigEncoding
  93. /***************************************************************************++
  94. Routine Description:
  95. Convert '%NN' or '%uNNNN' to a ULONG.
  96. Arguments:
  97. pSourceChar - Input buffer
  98. SourceLength - Length of pSourceChar, in bytes
  99. PercentUAllowed - Accept '%uNNNN' notation?
  100. pOutChar - decoded character
  101. pBytesToSkip - number of bytes consumed from pSourceChar;
  102. will be 3 for %NN and 6 for %uNNNN.
  103. Return Value:
  104. STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
  105. --***************************************************************************/
  106. NTSTATUS
  107. HttpUnescapePercentHexEncoding(
  108. IN PCUCHAR pSourceChar,
  109. IN ULONG SourceLength,
  110. IN BOOLEAN PercentUAllowed,
  111. OUT PULONG pOutChar,
  112. OUT PULONG pBytesToSkip
  113. )
  114. {
  115. ULONG Result, i, NumDigits;
  116. PCUCHAR pHexDigits;
  117. PAGED_CODE();
  118. if (SourceLength < STRLEN_LIT("%NN"))
  119. {
  120. UlTraceError(PARSER, (
  121. "http!HttpUnescapePercentHexEncoding(%p): "
  122. "Length too short, %lu.\n",
  123. pSourceChar, SourceLength
  124. ));
  125. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  126. }
  127. else if (pSourceChar[0] != PERCENT)
  128. {
  129. UlTraceError(PARSER, (
  130. "http!HttpUnescapePercentHexEncoding(%p): "
  131. "Starts with 0x%02lX, not '%%'.\n",
  132. pSourceChar, (ULONG) pSourceChar[0]
  133. ));
  134. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  135. }
  136. if (pSourceChar[1] != 'u' && pSourceChar[1] != 'U')
  137. {
  138. // RFC 2396 says that an "escaped octet is encoded as a character
  139. // triplet, consisting of the percent character '%' followed by
  140. // the two hexadecimal digits representing the octet code."
  141. pHexDigits = pSourceChar + STRLEN_LIT("%");
  142. NumDigits = 2;
  143. *pBytesToSkip = STRLEN_LIT("%NN");
  144. }
  145. else
  146. {
  147. // This is the %uNNNN notation generated by JavaScript's escape() fn
  148. if (! PercentUAllowed)
  149. {
  150. UlTraceError(PARSER, (
  151. "http!HttpUnescapePercentHexEncoding(%p): "
  152. "%%uNNNN forbidden.\n",
  153. pSourceChar, SourceLength
  154. ));
  155. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  156. }
  157. else if (SourceLength < STRLEN_LIT("%uNNNN"))
  158. {
  159. UlTraceError(PARSER, (
  160. "http!HttpUnescapePercentHexEncoding(%p): "
  161. "Length %lu too short for %%uNNNN.\n",
  162. pSourceChar, SourceLength
  163. ));
  164. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  165. }
  166. pHexDigits = pSourceChar + STRLEN_LIT("%u");
  167. NumDigits = 4;
  168. *pBytesToSkip = STRLEN_LIT("%uNNNN");
  169. }
  170. ASSERT(*pBytesToSkip <= SourceLength);
  171. Result = 0;
  172. for (i = 0; i < NumDigits; ++i)
  173. {
  174. ULONG Char = pHexDigits[i];
  175. ULONG Digit;
  176. //
  177. // HexToChar() inlined. Note: in ASCII, '0' < 'A' < 'a' and there are
  178. // no gaps in ranges '0'..'9', 'A'..'F', and 'a'..'f' (unlike EBCDIC,
  179. // which has gaps between 'I'/'J', 'R'/'S', 'i'/'j', and 'r'/'s').
  180. //
  181. C_ASSERT('0' < 'A' && 'A' < 'a');
  182. C_ASSERT('9' - '0' == 10 - 1);
  183. C_ASSERT('F' - 'A' == 6 - 1);
  184. C_ASSERT('f' - 'a' == 6 - 1);
  185. if (! IS_HTTP_HEX(Char))
  186. {
  187. UlTraceError(PARSER, (
  188. "http!HttpUnescapePercentHexEncoding(%p): "
  189. "Invalid hex character[%lu], 0x%02lX.\n",
  190. pSourceChar, i, Char
  191. ));
  192. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  193. }
  194. else if ('a' <= Char)
  195. {
  196. ASSERT('a' <= Char && Char <= 'f');
  197. Digit = Char - 'a' + 0xA;
  198. }
  199. else if ('A' <= Char)
  200. {
  201. ASSERT('A' <= Char && Char <= 'F');
  202. Digit = Char - 'A' + 0xA;
  203. }
  204. else
  205. {
  206. ASSERT('0' <= Char && Char <= '9');
  207. Digit = Char - '0';
  208. }
  209. ASSERT(Digit < 0x10);
  210. Result = (Result << 4) | Digit;
  211. }
  212. *pOutChar = Result;
  213. return STATUS_SUCCESS;
  214. } // HttpUnescapePercentHexEncoding
  215. /***************************************************************************++
  216. Routine Description:
  217. Consume 1-4 bytes from pSourceChar, treating it as raw UTF-8.
  218. This routine is only suitable for the hostname part of an HTTP URL,
  219. Arguments:
  220. pSourceChar - Input buffer
  221. SourceLength - Length of pSourceChar, in bytes
  222. pUnicodeChar - decoded character
  223. pBytesToSkip - number of characters consumed from pSourceChar
  224. Return Value:
  225. STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
  226. --***************************************************************************/
  227. NTSTATUS
  228. HttppPopCharHostNameUtf8(
  229. IN PCUCHAR pSourceChar,
  230. IN ULONG SourceLength,
  231. OUT PULONG pUnicodeChar,
  232. OUT PULONG pBytesToSkip
  233. )
  234. {
  235. NTSTATUS Status;
  236. PAGED_CODE();
  237. ASSERT(SourceLength > 0);
  238. Status = HttpUtf8RawBytesToUnicode(
  239. pSourceChar,
  240. SourceLength,
  241. pUnicodeChar,
  242. pBytesToSkip
  243. );
  244. return Status;
  245. } // HttppPopCharHostNameUtf8
  246. /***************************************************************************++
  247. Routine Description:
  248. Consume 1-2 bytes from pSourceChar and converts it from raw DBCS to Unicode.
  249. This routine is only suitable for the hostname part of an HTTP URL.
  250. Arguments:
  251. pSourceChar - Input buffer
  252. SourceLength - Length of pSourceChar, in bytes
  253. pUnicodeChar - decoded character
  254. pBytesToSkip - number of characters consumed from pSourceChar
  255. Return Value:
  256. STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
  257. --***************************************************************************/
  258. NTSTATUS
  259. HttppPopCharHostNameDbcs(
  260. IN PCUCHAR pSourceChar,
  261. IN ULONG SourceLength,
  262. OUT PULONG pUnicodeChar,
  263. OUT PULONG pBytesToSkip
  264. )
  265. {
  266. NTSTATUS Status;
  267. ULONG AnsiCharSize;
  268. WCHAR WideChar;
  269. PAGED_CODE();
  270. ASSERT(SourceLength > 0);
  271. if (! IS_DBCS_LEAD_BYTE(pSourceChar[0]))
  272. {
  273. AnsiCharSize = 1;
  274. }
  275. else
  276. {
  277. if (SourceLength < 2)
  278. {
  279. UlTraceError(PARSER, (
  280. "http!HttppPopCharHostNameDbcs(%p): "
  281. "ERROR: DBCS lead byte, 0x%02lX, at end of string\n",
  282. pSourceChar, *pSourceChar
  283. ));
  284. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  285. }
  286. AnsiCharSize = 2;
  287. }
  288. Status = RtlMultiByteToUnicodeN(
  289. &WideChar,
  290. sizeof(WCHAR),
  291. NULL,
  292. (PCSTR) pSourceChar,
  293. AnsiCharSize
  294. );
  295. if (!NT_SUCCESS(Status))
  296. {
  297. UlTraceError(PARSER, (
  298. "http!HttppPopCharHostNameDbcs(%p): "
  299. "MultiByteToUnicode(%lu) failed, %s.\n",
  300. pSourceChar, AnsiCharSize, HttpStatusToString(Status)
  301. ));
  302. return Status;
  303. }
  304. *pUnicodeChar = WideChar;
  305. *pBytesToSkip = AnsiCharSize;
  306. return STATUS_SUCCESS;
  307. } // HttppPopCharHostNameDbcs
  308. /***************************************************************************++
  309. Routine Description:
  310. Consume 1 bytes from pSourceChar and converts it from raw ANSI to Unicode.
  311. This routine is only suitable for the hostname part of an HTTP URL.
  312. Arguments:
  313. pSourceChar - Input buffer
  314. SourceLength - Length of pSourceChar, in bytes
  315. pUnicodeChar - decoded character
  316. pBytesToSkip - number of characters consumed from pSourceChar
  317. Return Value:
  318. STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
  319. --***************************************************************************/
  320. NTSTATUS
  321. HttppPopCharHostNameAnsi(
  322. IN PCUCHAR pSourceChar,
  323. IN ULONG SourceLength,
  324. OUT PULONG pUnicodeChar,
  325. OUT PULONG pBytesToSkip
  326. )
  327. {
  328. NTSTATUS Status;
  329. #if !DBG
  330. UNREFERENCED_PARAMETER(SourceLength);
  331. #endif // !DBG
  332. PAGED_CODE();
  333. ASSERT(SourceLength > 0);
  334. *pUnicodeChar = AnsiToUnicodeMap[pSourceChar[0]];
  335. *pBytesToSkip = 1;
  336. Status = (0 != *pUnicodeChar)
  337. ? STATUS_SUCCESS
  338. : STATUS_OBJECT_PATH_SYNTAX_BAD;
  339. if (!NT_SUCCESS(Status))
  340. {
  341. UlTraceError(PARSER, (
  342. "http!HttppPopCharHostNameAnsi(%p): "
  343. "No mapping for %lu.\n",
  344. pSourceChar, *pSourceChar
  345. ));
  346. }
  347. return Status;
  348. } // HttppPopCharHostNameAnsi
  349. /***************************************************************************++
  350. Routine Description:
  351. Common tail function called at the end of the HttppPopCharAbsPath*()
  352. functions, to minimize code replication.
  353. Arguments:
  354. pSourceChar - Input buffer
  355. SourceLength - Length of pSourceChar, in bytes
  356. UnicodeChar - decoded character
  357. BytesToSkip - number of characters consumed from pSourceChar
  358. pUnicodeChar - where to put UnicodeChar result
  359. pBytesToSkip - where to put BytesToSkip result
  360. Return Value:
  361. STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
  362. --***************************************************************************/
  363. __inline
  364. NTSTATUS
  365. HttppPopCharAbsPathCommonTail(
  366. IN PCUCHAR pSourceChar,
  367. IN ULONG SourceLength,
  368. IN ULONG UnicodeChar,
  369. IN ULONG BytesToSkip,
  370. IN BOOLEAN AllowRestrictedChars,
  371. OUT PULONG pUnicodeChar,
  372. OUT PULONG pBytesToSkip
  373. )
  374. {
  375. #if !DBG
  376. UNREFERENCED_PARAMETER(pSourceChar);
  377. UNREFERENCED_PARAMETER(SourceLength);
  378. #endif // !DBG
  379. //
  380. // Special handling for characters in the 8-bit range.
  381. // May want to look at BytesToSkip to distinguish between
  382. // raw and hex-escaped/UTF-8-encoded data.
  383. //
  384. // In particular, should we allow %2F or %u002F as alternate
  385. // represenations of '/' in a URL? Why would anyone have a legitimate
  386. // need to escape a slash character?
  387. //
  388. if (UnicodeChar < 0x100)
  389. {
  390. // Transform backslashes to forward slashes
  391. if (BACK_SLASH == UnicodeChar)
  392. {
  393. UnicodeChar = FORWARD_SLASH;
  394. }
  395. else if (!AllowRestrictedChars && IS_URL_INVALID(UnicodeChar))
  396. {
  397. UlTraceError(PARSER, (
  398. "http!HttppPopCharAbsPathCommonTail(%p): "
  399. "Invalid character, U+%04X.\n",
  400. pSourceChar, UnicodeChar
  401. ));
  402. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  403. }
  404. // CODEWORK: should we allow hex-escaped "restricted" or "unwise"
  405. // characters at all?
  406. }
  407. ASSERT(BytesToSkip <= SourceLength);
  408. *pBytesToSkip = BytesToSkip;
  409. *pUnicodeChar = UnicodeChar;
  410. return STATUS_SUCCESS;
  411. } // HttppPopCharAbsPathCommonTail
  412. /***************************************************************************++
  413. Routine Description:
  414. Consume 1-12 bytes from pSourceChar. Handle hex-escaped UTF-8 encoding.
  415. This routine is only suitable for the /abspath part of an HTTP URL.
  416. Arguments:
  417. pSourceChar - Input buffer
  418. SourceLength - Length of pSourceChar, in bytes
  419. pUnicodeChar - decoded character
  420. pBytesToSkip - number of characters consumed from pSourceChar
  421. Return Value:
  422. STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
  423. --***************************************************************************/
  424. NTSTATUS
  425. HttppPopCharAbsPathUtf8(
  426. IN PCUCHAR pSourceChar,
  427. IN ULONG SourceLength,
  428. IN BOOLEAN PercentUAllowed,
  429. IN BOOLEAN AllowRestrictedChars,
  430. OUT PULONG pUnicodeChar,
  431. OUT PULONG pBytesToSkip
  432. )
  433. {
  434. NTSTATUS Status;
  435. ULONG UnicodeChar;
  436. ULONG BytesToSkip;
  437. ULONG Temp;
  438. ULONG OctetCount;
  439. UCHAR Octets[4];
  440. UCHAR LeadByte;
  441. //
  442. // Sanity check.
  443. //
  444. PAGED_CODE();
  445. ASSERT(SourceLength > 0);
  446. //
  447. // validate it as a valid URL character
  448. //
  449. if (! IS_URL_TOKEN(pSourceChar[0]))
  450. {
  451. UlTraceError(PARSER, (
  452. "http!HttppPopCharAbsPathUtf8(%p): "
  453. "first char, 0x%02lX, isn't URL token\n",
  454. pSourceChar, (ULONG) pSourceChar[0]
  455. ));
  456. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  457. }
  458. //
  459. // need to unescape hex encoding, '%NN' or '%uNNNN'?
  460. //
  461. if (PERCENT != pSourceChar[0])
  462. {
  463. UnicodeChar = pSourceChar[0];
  464. BytesToSkip = 1;
  465. //
  466. // All octets with bit7 set MUST be hex-escaped.
  467. // Do NOT accept literals with hi-bit set.
  468. //
  469. if (UnicodeChar > ASCII_MAX)
  470. {
  471. UlTraceError(PARSER, (
  472. "http!HttppPopCharAbsPathUtf8(%p): "
  473. "Invalid hi-bit literal, 0x%02lX.\n",
  474. pSourceChar, UnicodeChar
  475. ));
  476. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  477. }
  478. Status = STATUS_SUCCESS;
  479. goto unslash;
  480. }
  481. Status = HttpUnescapePercentHexEncoding(
  482. pSourceChar,
  483. SourceLength,
  484. PercentUAllowed,
  485. &UnicodeChar,
  486. &BytesToSkip
  487. );
  488. if (! NT_SUCCESS(Status))
  489. {
  490. UlTraceError(PARSER, (
  491. "http!HttppPopCharAbsPathUtf8(%p): "
  492. "Invalid hex encoding.\n",
  493. pSourceChar
  494. ));
  495. return Status;
  496. }
  497. //
  498. // If we consumed '%uNNNN', don't attempt any UTF-8 decoding
  499. //
  500. if (STRLEN_LIT("%uNNNN") == BytesToSkip)
  501. goto unslash;
  502. ASSERT(STRLEN_LIT("%NN") == BytesToSkip);
  503. ASSERT(UnicodeChar <= 0xFF);
  504. Octets[0] = LeadByte = (UCHAR) UnicodeChar;
  505. OctetCount = UTF8_OCTET_COUNT(LeadByte);
  506. if (0 == OctetCount)
  507. {
  508. UlTraceError(PARSER, (
  509. "http!HttppPopCharAbsPathUtf8(%p): "
  510. "Invalid lead byte, 0x%02lX.\n",
  511. pSourceChar, UnicodeChar
  512. ));
  513. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  514. }
  515. ASSERT(OctetCount <= sizeof(Octets) / sizeof(Octets[0]));
  516. BytesToSkip = OctetCount * STRLEN_LIT("%NN");
  517. if (BytesToSkip > SourceLength)
  518. {
  519. UlTraceError(PARSER, (
  520. "http!HttppPopCharAbsPathUtf8(%p): "
  521. "%lu octets is not enough for %lu-byte UTF-8 encoding.\n",
  522. pSourceChar, OctetCount, SourceLength
  523. ));
  524. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  525. }
  526. if (OctetCount == 1)
  527. {
  528. #if DBG
  529. // Singleton: no trail bytes
  530. Status = HttpUtf8RawBytesToUnicode(
  531. Octets,
  532. OctetCount,
  533. &UnicodeChar,
  534. &Temp
  535. );
  536. ASSERT(STATUS_SUCCESS == Status);
  537. ASSERT(UnicodeChar == LeadByte);
  538. ASSERT(1 == Temp);
  539. #endif // DBG
  540. }
  541. else
  542. {
  543. ULONG i;
  544. //
  545. // Decode the hex-escaped trail bytes
  546. //
  547. for (i = 1; i < OctetCount; ++i)
  548. {
  549. ULONG TrailChar;
  550. UCHAR TrailByte;
  551. Status = HttpUnescapePercentHexEncoding(
  552. pSourceChar + i * STRLEN_LIT("%NN"),
  553. STRLEN_LIT("%NN"),
  554. FALSE, // do not allow %uNNNN for trail bytes
  555. &TrailChar,
  556. &Temp
  557. );
  558. if (! NT_SUCCESS(Status))
  559. {
  560. UlTraceError(PARSER, (
  561. "http!HttppPopCharAbsPathUtf8(%p): "
  562. "Invalid hex-encoded trail byte[%lu].\n",
  563. pSourceChar, i
  564. ));
  565. return Status;
  566. }
  567. ASSERT(STRLEN_LIT("%NN") == Temp);
  568. ASSERT(TrailChar <= 0xFF);
  569. Octets[i] = TrailByte = (UCHAR) TrailChar;
  570. if (! IS_UTF8_TRAILBYTE(TrailByte))
  571. {
  572. UlTraceError(PARSER, (
  573. "http!HttppPopCharAbsPathUtf8(%p): "
  574. "Invalid trail byte[%lu], 0x%02lX.\n",
  575. pSourceChar, i, TrailChar
  576. ));
  577. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  578. }
  579. }
  580. //
  581. // Decode the raw UTF-8 bytes
  582. //
  583. Status = HttpUtf8RawBytesToUnicode(
  584. Octets,
  585. OctetCount,
  586. &UnicodeChar,
  587. &Temp
  588. );
  589. if (! NT_SUCCESS(Status))
  590. {
  591. UlTraceError(PARSER, (
  592. "http!HttppPopCharAbsPathUtf8(%p): "
  593. "Invalid UTF-8 sequence.\n",
  594. pSourceChar
  595. ));
  596. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  597. }
  598. }
  599. unslash:
  600. ASSERT(NT_SUCCESS(Status));
  601. return HttppPopCharAbsPathCommonTail(
  602. pSourceChar,
  603. SourceLength,
  604. UnicodeChar,
  605. BytesToSkip,
  606. AllowRestrictedChars,
  607. pUnicodeChar,
  608. pBytesToSkip
  609. );
  610. } // HttppPopCharAbsPathUtf8
  611. /***************************************************************************++
  612. Routine Description:
  613. Consume 1-6 bytes from pSourceChar. Handle hex-escaped DBCS encoding.
  614. This routine is only suitable for the /abspath part of an HTTP URL.
  615. Arguments:
  616. pSourceChar - Input buffer
  617. SourceLength - Length of pSourceChar, in bytes
  618. pUnicodeChar - decoded character
  619. pBytesToSkip - number of characters consumed from pSourceChar
  620. Return Value:
  621. STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
  622. --***************************************************************************/
  623. NTSTATUS
  624. HttppPopCharAbsPathDbcs(
  625. IN PCUCHAR pSourceChar,
  626. IN ULONG SourceLength,
  627. IN BOOLEAN PercentUAllowed,
  628. IN BOOLEAN AllowRestrictedChars,
  629. OUT PULONG pUnicodeChar,
  630. OUT PULONG pBytesToSkip
  631. )
  632. {
  633. NTSTATUS Status;
  634. ULONG UnicodeChar;
  635. WCHAR WideChar;
  636. ULONG BytesToSkip;
  637. UCHAR AnsiChar[2];
  638. ULONG AnsiCharSize;
  639. UCHAR LeadByte;
  640. UCHAR SecondByte = 0;
  641. //
  642. // Sanity check.
  643. //
  644. PAGED_CODE();
  645. ASSERT(SourceLength > 0);
  646. if (! IS_URL_TOKEN(pSourceChar[0]))
  647. {
  648. UlTraceError(PARSER, (
  649. "http!HttppPopCharAbsPathDbcs(%p): "
  650. "first char, 0x%02lX, isn't URL token\n",
  651. pSourceChar, (ULONG) pSourceChar[0]
  652. ));
  653. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  654. }
  655. if (PERCENT != pSourceChar[0])
  656. {
  657. // Note: unlike UTF-8, we allow literal bytes whose top bit is set
  658. UnicodeChar = pSourceChar[0];
  659. BytesToSkip = 1;
  660. }
  661. else
  662. {
  663. // need to unescape hex encoding, '%NN' or '%uNNNN'
  664. Status = HttpUnescapePercentHexEncoding(
  665. pSourceChar,
  666. SourceLength,
  667. PercentUAllowed,
  668. &UnicodeChar,
  669. &BytesToSkip
  670. );
  671. if (! NT_SUCCESS(Status))
  672. {
  673. UlTraceError(PARSER, (
  674. "http!HttppPopCharAbsPathDbcs(%p): "
  675. "Invalid hex encoding.\n",
  676. pSourceChar
  677. ));
  678. return Status;
  679. }
  680. //
  681. // If we consumed '%uNNNN', don't attempt DBCS-to-Unicode conversion
  682. //
  683. if (STRLEN_LIT("%uNNNN") == BytesToSkip)
  684. goto unslash;
  685. ASSERT(STRLEN_LIT("%NN") == BytesToSkip);
  686. ASSERT(UnicodeChar <= 0xFF);
  687. }
  688. LeadByte = (UCHAR) UnicodeChar;
  689. AnsiChar[0] = LeadByte;
  690. if (! IS_DBCS_LEAD_BYTE(LeadByte))
  691. {
  692. AnsiCharSize = 1;
  693. }
  694. else
  695. {
  696. //
  697. // This is a double-byte character.
  698. //
  699. ASSERT(BytesToSkip <= SourceLength);
  700. if (BytesToSkip == SourceLength)
  701. {
  702. UlTraceError(PARSER, (
  703. "http!HttppPopCharAbsPathDbcs(%p): "
  704. "ERROR: DBCS lead byte, 0x%02lX, at end of string\n",
  705. pSourceChar, UnicodeChar
  706. ));
  707. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  708. }
  709. AnsiCharSize = 2;
  710. SecondByte = pSourceChar[BytesToSkip];
  711. if (PERCENT != SecondByte)
  712. {
  713. BytesToSkip += 1;
  714. }
  715. else
  716. {
  717. ULONG TrailChar;
  718. ULONG Temp;
  719. if (BytesToSkip + STRLEN_LIT("%NN") > SourceLength)
  720. {
  721. UlTraceError(PARSER, (
  722. "http!HttppPopCharAbsPathDbcs(%p): "
  723. "ERROR: no space for DBCS hex-encoded suffix\n",
  724. pSourceChar
  725. ));
  726. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  727. }
  728. Status = HttpUnescapePercentHexEncoding(
  729. pSourceChar + BytesToSkip,
  730. SourceLength - BytesToSkip,
  731. FALSE, // no %uNNNN allowed here
  732. &TrailChar,
  733. &Temp
  734. );
  735. if (! NT_SUCCESS(Status))
  736. {
  737. UlTraceError(PARSER, (
  738. "http!HttppPopCharAbsPathDbcs(%p): "
  739. "Invalid hex encoding of trail byte.\n",
  740. pSourceChar
  741. ));
  742. return Status;
  743. }
  744. ASSERT(STRLEN_LIT("%NN") == Temp);
  745. ASSERT(TrailChar <= 0xFF);
  746. SecondByte = (UCHAR) TrailChar;
  747. BytesToSkip += STRLEN_LIT("%NN");
  748. }
  749. AnsiChar[1] = SecondByte;
  750. }
  751. Status = RtlMultiByteToUnicodeN(
  752. &WideChar,
  753. sizeof(WCHAR),
  754. NULL,
  755. (PCHAR) &AnsiChar[0],
  756. AnsiCharSize
  757. );
  758. if (!NT_SUCCESS(Status))
  759. {
  760. UlTraceError(PARSER, (
  761. "http!HttppPopCharAbsPathDbcs(%p): "
  762. "MultiByteToUnicode(%lu) failed, %s.\n",
  763. pSourceChar, AnsiCharSize, HttpStatusToString(Status)
  764. ));
  765. return Status;
  766. }
  767. UnicodeChar = WideChar;
  768. #if DBG
  769. //
  770. // Describe conversion in debug spew.
  771. //
  772. if (1 == AnsiCharSize)
  773. {
  774. UlTraceVerbose(PARSER, (
  775. "http!HttppPopCharAbsPathDbcs(%p): "
  776. "converted %02X to U+%04lX '%C'\n",
  777. pSourceChar,
  778. LeadByte,
  779. UnicodeChar,
  780. UnicodeChar
  781. ));
  782. }
  783. else
  784. {
  785. ASSERT(2 == AnsiCharSize);
  786. UlTraceVerbose(PARSER, (
  787. "http!HttppPopCharAbsPathDbcs(%p): "
  788. "converted %02X %02X to U+%04lX '%C'\n",
  789. pSourceChar,
  790. LeadByte,
  791. SecondByte,
  792. UnicodeChar,
  793. UnicodeChar
  794. ));
  795. }
  796. #endif // DBG
  797. unslash:
  798. ASSERT(NT_SUCCESS(Status));
  799. return HttppPopCharAbsPathCommonTail(
  800. pSourceChar,
  801. SourceLength,
  802. UnicodeChar,
  803. BytesToSkip,
  804. AllowRestrictedChars,
  805. pUnicodeChar,
  806. pBytesToSkip
  807. );
  808. } // HttppPopCharAbsPathDbcs
  809. /***************************************************************************++
  810. Routine Description:
  811. Consume 1-6 bytes from pSourceChar. Handle hex-escaped ANSI encoding.
  812. This routine is only suitable for the /abspath part of an HTTP URL.
  813. Arguments:
  814. pSourceChar - Input buffer
  815. SourceLength - Length of pSourceChar, in bytes
  816. pUnicodeChar - decoded character
  817. pBytesToSkip - number of characters consumed from pSourceChar
  818. Return Value:
  819. STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
  820. --***************************************************************************/
  821. NTSTATUS
  822. HttppPopCharAbsPathAnsi(
  823. IN PCUCHAR pSourceChar,
  824. IN ULONG SourceLength,
  825. IN BOOLEAN PercentUAllowed,
  826. IN BOOLEAN AllowRestrictedChars,
  827. OUT PULONG pUnicodeChar,
  828. OUT PULONG pBytesToSkip
  829. )
  830. {
  831. NTSTATUS Status = STATUS_SUCCESS;
  832. ULONG UnicodeChar;
  833. ULONG BytesToSkip;
  834. //
  835. // Sanity check.
  836. //
  837. PAGED_CODE();
  838. ASSERT(SourceLength > 0);
  839. //
  840. // DBCS and ANSI decoders must allow any raw byte whose top bit
  841. // is set (0x80-0xFF)
  842. //
  843. if (! IS_URL_TOKEN(pSourceChar[0]) &&
  844. !(0x80 & pSourceChar[0]))
  845. {
  846. UlTraceError(PARSER, (
  847. "http!HttppPopCharAbsPathAnsi(%p): "
  848. "first char, 0x%02lX, isn't URL token\n",
  849. pSourceChar, (ULONG) pSourceChar[0]
  850. ));
  851. RETURN(STATUS_OBJECT_PATH_SYNTAX_BAD);
  852. }
  853. if (PERCENT != pSourceChar[0])
  854. {
  855. // Note: unlike UTF-8, we allow literal bytes whose top bit is set
  856. UnicodeChar = AnsiToUnicodeMap[ pSourceChar[0] ];
  857. BytesToSkip = 1;
  858. }
  859. else
  860. {
  861. // need to unescape hex encoding, '%NN' or '%uNNNN'
  862. Status = HttpUnescapePercentHexEncoding(
  863. pSourceChar,
  864. SourceLength,
  865. PercentUAllowed,
  866. &UnicodeChar,
  867. &BytesToSkip
  868. );
  869. if (! NT_SUCCESS(Status))
  870. {
  871. UlTraceError(PARSER, (
  872. "http!HttppPopCharAbsPathAnsi(%p): "
  873. "Invalid hex encoding.\n",
  874. pSourceChar
  875. ));
  876. return Status;
  877. }
  878. //
  879. // If we consumed '%uNNNN', don't attempt Ansi-to-Unicode conversion
  880. //
  881. if (STRLEN_LIT("%uNNNN") != BytesToSkip)
  882. {
  883. ASSERT(STRLEN_LIT("%NN") == BytesToSkip);
  884. ASSERT(UnicodeChar <= 0xFF);
  885. UnicodeChar = AnsiToUnicodeMap[(UCHAR) UnicodeChar];
  886. }
  887. }
  888. ASSERT(NT_SUCCESS(Status));
  889. return HttppPopCharAbsPathCommonTail(
  890. pSourceChar,
  891. SourceLength,
  892. UnicodeChar,
  893. BytesToSkip,
  894. AllowRestrictedChars,
  895. pUnicodeChar,
  896. pBytesToSkip
  897. );
  898. } // HttppPopCharAbsPathAnsi
  899. /***************************************************************************++
  900. Routine Description:
  901. Consume 1 bytes from pSourceChar and returns it unaltered.
  902. This routine is only suitable for the ?querystring part of an HTTP URL,
  903. which we do not interpret.
  904. CODEWORK: don't 'convert' querystring to Unicode. Send it up verbatim.
  905. Arguments:
  906. pSourceChar - Input buffer
  907. SourceLength - Length of pSourceChar, in bytes
  908. pUnicodeChar - decoded character
  909. pBytesToSkip - number of characters consumed from pSourceChar
  910. Return Value:
  911. STATUS_SUCCESS or STATUS_OBJECT_PATH_SYNTAX_BAD
  912. --***************************************************************************/
  913. NTSTATUS
  914. HttppPopCharQueryString(
  915. IN PCUCHAR pSourceChar,
  916. IN ULONG SourceLength,
  917. IN BOOLEAN PercentUAllowed,
  918. IN BOOLEAN AllowRestrictedChars,
  919. OUT PULONG pUnicodeChar,
  920. OUT PULONG pBytesToSkip
  921. )
  922. {
  923. PAGED_CODE();
  924. UNREFERENCED_PARAMETER(SourceLength);
  925. UNREFERENCED_PARAMETER(PercentUAllowed);
  926. UNREFERENCED_PARAMETER(AllowRestrictedChars);
  927. *pUnicodeChar = *pSourceChar;
  928. *pBytesToSkip = 1;
  929. return STATUS_SUCCESS;
  930. } // HttppPopCharQueryString
  931. //
  932. // a cool local helper macro
  933. //
  934. #define EMIT_CHAR(ch, pDest, BytesCopied, Status, AllowRestrictedChars) \
  935. do \
  936. { \
  937. WCHAR HighSurrogate, LowSurrogate; \
  938. \
  939. if ((ch) > LOW_NONCHAR_BITS) \
  940. { \
  941. Status = HttpUcs4toUtf16((ch), \
  942. &HighSurrogate, &LowSurrogate); \
  943. \
  944. if (! NT_SUCCESS(Status)) \
  945. goto end; \
  946. \
  947. *pDest++ = HighSurrogate; \
  948. *pDest++ = LowSurrogate; \
  949. BytesCopied += 2 * sizeof(WCHAR); \
  950. } \
  951. else \
  952. { \
  953. ASSERT(ch < HIGH_SURROGATE_START \
  954. || LOW_SURROGATE_END < ch); \
  955. \
  956. if ( IS_UNICODE_NONCHAR((ch)) ) \
  957. { \
  958. UlTraceError(PARSER, ( \
  959. "http!HttpUcs4toUtf16(): " \
  960. "Non-character code point, U+%04lX.\n", \
  961. (ch) )); \
  962. \
  963. Status = STATUS_INVALID_PARAMETER; \
  964. goto end; \
  965. } \
  966. \
  967. *pDest++ = (WCHAR) (ch); \
  968. BytesCopied += sizeof(WCHAR); \
  969. } \
  970. \
  971. /* Can probably omit this test */ \
  972. if (BytesCopied > UNICODE_STRING_MAX_BYTE_LEN) \
  973. { \
  974. Status = STATUS_DATA_OVERRUN; \
  975. goto end; \
  976. } \
  977. } while (0, 0)
  978. #define EMIT_LITERAL_CHAR(ch, pDest, BytesCopied) \
  979. do \
  980. { \
  981. ASSERT(IS_ASCII(ch)); \
  982. \
  983. *pDest++ = (WCHAR) (ch); \
  984. BytesCopied += sizeof(WCHAR); \
  985. } while (0, 0)
  986. #define HttppUrlEncodingToString(UrlEncoding) \
  987. ((UrlEncoding == UrlDecode_Ansi) \
  988. ? "Ansi" \
  989. : (UrlEncoding == UrlDecode_Dbcs) \
  990. ? "Dbcs" \
  991. : "Utf8")
  992. /***************************************************************************++
  993. Routine Description:
  994. Copies a hostname, converting it to Unicode
  995. Arguments:
  996. Return Value:
  997. NTSTATUS - Completion status.
  998. --***************************************************************************/
  999. NTSTATUS
  1000. HttpCopyHost(
  1001. IN PURL_C14N_CONFIG pCfg,
  1002. OUT PWSTR pDestination,
  1003. IN PCUCHAR pSource,
  1004. IN ULONG SourceLength,
  1005. OUT PULONG pBytesCopied,
  1006. OUT PURL_ENCODING_TYPE pHostnameEncodingType
  1007. )
  1008. {
  1009. NTSTATUS Status = STATUS_UNSUCCESSFUL;
  1010. ULONG DecodeOrder = pCfg->HostnameDecodeOrder;
  1011. PAGED_CODE();
  1012. ASSERT(NULL != pCfg);
  1013. ASSERT(NULL != pDestination);
  1014. ASSERT(NULL != pSource);
  1015. ASSERT(NULL != pBytesCopied);
  1016. ASSERT(NULL != pHostnameEncodingType);
  1017. if (0 == DecodeOrder || DecodeOrder != (DecodeOrder & UrlDecode_MaxMask))
  1018. {
  1019. UlTraceError(PARSER,
  1020. ("http!HttpCopyHost: invalid DecodeOrder, 0x%lX\n",
  1021. DecodeOrder
  1022. ));
  1023. RETURN(STATUS_INVALID_PARAMETER);
  1024. }
  1025. for ( ;
  1026. 0 != DecodeOrder && !NT_SUCCESS(Status);
  1027. DecodeOrder >>= UrlDecode_Shift
  1028. )
  1029. {
  1030. ULONG UrlEncoding = (DecodeOrder & UrlDecode_Mask);
  1031. switch (UrlEncoding)
  1032. {
  1033. default:
  1034. ASSERT(! "Impossible UrlDecodeOrder");
  1035. case UrlDecode_None:
  1036. break;
  1037. case UrlDecode_Ansi:
  1038. case UrlDecode_Dbcs:
  1039. case UrlDecode_Utf8:
  1040. UlTraceVerbose(PARSER,
  1041. ("http!HttpCopyHost(%s, Src=%p, %lu)\n",
  1042. HttppUrlEncodingToString(UrlEncoding),
  1043. pSource, SourceLength
  1044. ));
  1045. Status = HttppCopyHostByType(
  1046. (URL_ENCODING_TYPE) UrlEncoding,
  1047. pDestination,
  1048. pSource,
  1049. SourceLength,
  1050. pBytesCopied
  1051. );
  1052. if (NT_SUCCESS(Status))
  1053. {
  1054. *pHostnameEncodingType = (URL_ENCODING_TYPE) UrlEncoding;
  1055. UlTraceVerbose(PARSER,
  1056. ("http!HttpCopyHost(%s): "
  1057. "(%lu) '%.*s' -> (%lu) '%ls'\n",
  1058. HttppUrlEncodingToString(UrlEncoding),
  1059. SourceLength, SourceLength, pSource,
  1060. *pBytesCopied/sizeof(WCHAR), pDestination
  1061. ));
  1062. }
  1063. break;
  1064. };
  1065. }
  1066. return Status;
  1067. } // HttpCopyHost
  1068. /***************************************************************************++
  1069. Routine Description:
  1070. Copies a hostname, converting it to Unicode
  1071. CODEWORK: Handle ACE-encoded hostnames
  1072. Arguments:
  1073. Return Value:
  1074. NTSTATUS - Completion status.
  1075. --***************************************************************************/
  1076. NTSTATUS
  1077. HttppCopyHostByType(
  1078. IN URL_ENCODING_TYPE UrlEncoding,
  1079. OUT PWSTR pDestination,
  1080. IN PCUCHAR pSource,
  1081. IN ULONG SourceLength,
  1082. OUT PULONG pBytesCopied
  1083. )
  1084. {
  1085. NTSTATUS Status;
  1086. PWSTR pDest;
  1087. PCUCHAR pChar;
  1088. ULONG BytesCopied;
  1089. ULONG UnicodeChar;
  1090. ULONG CharToSkip;
  1091. PFN_POPCHAR_HOSTNAME pfnPopChar;
  1092. if (UrlEncoding_Ansi == UrlEncoding)
  1093. pfnPopChar = &HttppPopCharHostNameAnsi;
  1094. else if (UrlEncoding_Dbcs == UrlEncoding)
  1095. pfnPopChar = &HttppPopCharHostNameDbcs;
  1096. else if (UrlEncoding_Utf8 == UrlEncoding)
  1097. pfnPopChar = &HttppPopCharHostNameUtf8;
  1098. else
  1099. {
  1100. ASSERT(! "Invalid UrlEncoding");
  1101. RETURN(STATUS_INVALID_PARAMETER);
  1102. }
  1103. //
  1104. // Sanity check.
  1105. //
  1106. PAGED_CODE();
  1107. pDest = pDestination;
  1108. BytesCopied = 0;
  1109. pChar = pSource;
  1110. while ((int)SourceLength > 0)
  1111. {
  1112. UnicodeChar = *pChar;
  1113. if (IS_ASCII(UnicodeChar))
  1114. {
  1115. CharToSkip = 1;
  1116. }
  1117. else
  1118. {
  1119. Status = (*pfnPopChar)(
  1120. pChar,
  1121. SourceLength,
  1122. &UnicodeChar,
  1123. &CharToSkip
  1124. );
  1125. if (NT_SUCCESS(Status) == FALSE)
  1126. goto end;
  1127. }
  1128. ASSERT(CharToSkip <= SourceLength);
  1129. EMIT_CHAR(
  1130. UnicodeChar,
  1131. pDest,
  1132. BytesCopied,
  1133. Status,
  1134. FALSE
  1135. );
  1136. pChar += CharToSkip;
  1137. SourceLength -= CharToSkip;
  1138. }
  1139. //
  1140. // terminate the string, it hasn't been done in the loop
  1141. //
  1142. ASSERT((pDest-1)[0] != UNICODE_NULL);
  1143. pDest[0] = UNICODE_NULL;
  1144. *pBytesCopied = BytesCopied;
  1145. Status = STATUS_SUCCESS;
  1146. end:
  1147. return Status;
  1148. } // HttppCopyHostByType
  1149. /*++
  1150. Routine Description:
  1151. Validates that a hostname is well-formed
  1152. CODEWORK: For future IDN (International Domain Names) work,
  1153. we may need to handle raw UTF-8 or ACE hostnames.
  1154. Note: if the validation algorithm changes here, it may be necessary
  1155. to update HttpParseUrl() too.
  1156. Arguments:
  1157. pHostname - the hostname
  1158. HostnameLength - length of hostname, in bytes
  1159. HostnameType - Source of the hostname: Host header, AbsUri, or
  1160. synthesized from the transport's local IP address
  1161. Return Value:
  1162. STATUS_SUCCESS if valid
  1163. --*/
  1164. NTSTATUS
  1165. HttpValidateHostname(
  1166. IN PURL_C14N_CONFIG pCfg,
  1167. IN PCUCHAR pHostname,
  1168. IN ULONG HostnameLength,
  1169. IN HOSTNAME_TYPE HostnameType,
  1170. OUT PSHORT pAddressType
  1171. )
  1172. {
  1173. PCUCHAR pChar;
  1174. PCUCHAR pLabel;
  1175. PCUCHAR pEnd = pHostname + HostnameLength;
  1176. PCSTR pTerminator;
  1177. NTSTATUS Status;
  1178. USHORT Port;
  1179. struct in_addr IPv4Address;
  1180. struct in6_addr IPv6Address;
  1181. BOOLEAN AlphaLabel;
  1182. PAGED_CODE();
  1183. ASSERT(NULL != pCfg);
  1184. ASSERT(NULL != pHostname);
  1185. ASSERT(NULL != pAddressType);
  1186. if (0 == HostnameLength)
  1187. {
  1188. // RFC 2616, 14.23 "Host" says that the Host header can be empty
  1189. if (Hostname_HostHeader == HostnameType)
  1190. goto end;
  1191. // It is an error for empty hostnames to appear elsewhere
  1192. UlTraceError(PARSER,
  1193. ("http!HttpValidateHostname: empty hostname\n"
  1194. ));
  1195. RETURN(STATUS_INVALID_PARAMETER);
  1196. }
  1197. // Is this an IPv6 literal address, per RFC 2732?
  1198. if ('[' == *pHostname)
  1199. {
  1200. // Empty brackets?
  1201. if (HostnameLength < STRLEN_LIT("[0]") || ']' == pHostname[1])
  1202. {
  1203. UlTraceError(PARSER,
  1204. ("http!HttpValidateHostname: IPv6 address too short\n"
  1205. ));
  1206. RETURN(STATUS_INVALID_PARAMETER);
  1207. }
  1208. for (pChar = pHostname + STRLEN_LIT("["); pChar < pEnd; ++pChar)
  1209. {
  1210. if (']' == *pChar)
  1211. break;
  1212. //
  1213. // Dots are allowed because the last 32 bits may be represented
  1214. // in IPv4 dotted-octet notation. We do not accept Scope IDs
  1215. // (indicated by '%') in hostnames.
  1216. //
  1217. if (IS_HTTP_HEX(*pChar) || ':' == *pChar || '.' == *pChar)
  1218. continue;
  1219. UlTraceError(PARSER,
  1220. ("http!HttpValidateHostname: "
  1221. "Invalid char in IPv6 address, 0x%02X '%c', "
  1222. "after '%.*s'\n",
  1223. *pChar,
  1224. IS_HTTP_PRINT(*pChar) ? *pChar : '?',
  1225. DIFF(pChar - pHostname),
  1226. pHostname
  1227. ));
  1228. RETURN(STATUS_INVALID_PARAMETER);
  1229. }
  1230. if (pChar == pEnd)
  1231. {
  1232. UlTraceError(PARSER,
  1233. ("http!HttpValidateHostname: No ']' for IPv6 address\n"
  1234. ));
  1235. RETURN(STATUS_INVALID_PARAMETER);
  1236. }
  1237. ASSERT(pChar < pEnd);
  1238. ASSERT(']' == *pChar);
  1239. // Let the RTL routine do the hard work of parsing IPv6 addrs
  1240. Status = RtlIpv6StringToAddressA(
  1241. (PCSTR) pHostname + STRLEN_LIT("["),
  1242. &pTerminator,
  1243. &IPv6Address
  1244. );
  1245. if (! NT_SUCCESS(Status))
  1246. {
  1247. UlTraceError(PARSER,
  1248. ("http!HttpValidateHostname: "
  1249. "Invalid IPv6 address, %s\n",
  1250. HttpStatusToString(Status)
  1251. ));
  1252. RETURN(Status);
  1253. }
  1254. if (pTerminator != (PCSTR) pChar)
  1255. {
  1256. UlTraceError(PARSER,
  1257. ("http!HttpValidateHostname: "
  1258. "Invalid IPv6 terminator, 0x%02X '%c'\n",
  1259. *pTerminator,
  1260. IS_HTTP_PRINT(*pTerminator) ? *pTerminator : '?'
  1261. ));
  1262. RETURN(STATUS_INVALID_PARAMETER);
  1263. }
  1264. *pAddressType = TDI_ADDRESS_TYPE_IP6;
  1265. // Skip the terminating ']'
  1266. pChar += STRLEN_LIT("]");
  1267. // Any chars after the ']'?
  1268. if (pChar == pEnd)
  1269. {
  1270. ASSERT(DIFF(pEnd - pHostname) <= pCfg->MaxHostnameLength);
  1271. goto end;
  1272. }
  1273. ASSERT(pChar < pEnd);
  1274. if (':' == *pChar)
  1275. goto port;
  1276. UlTraceError(PARSER,
  1277. ("http!HttpValidateHostname: "
  1278. "Invalid char after IPv6 ']', 0x%02X '%c'\n",
  1279. *pChar,
  1280. IS_HTTP_PRINT(*pChar) ? *pChar : '?'
  1281. ));
  1282. RETURN(STATUS_INVALID_PARAMETER);
  1283. }
  1284. //
  1285. // It must be a domain name or an IPv4 literal. We'll try to treat
  1286. // it as a domain name first. If it turns out to be all-numeric,
  1287. // we'll try decoding it as an IPv4 literal. We'll see if the name
  1288. // is well-formed, but we will not do a DNS lookup to see if it exists,
  1289. // as that would be much too expensive.
  1290. //
  1291. AlphaLabel = FALSE;
  1292. pLabel = pHostname;
  1293. for (pChar = pHostname; pChar < pEnd; ++pChar)
  1294. {
  1295. if (':' == *pChar)
  1296. {
  1297. if (pChar == pHostname)
  1298. {
  1299. UlTraceError(PARSER,
  1300. ("http!HttpValidateHostname: empty hostname\n"
  1301. ));
  1302. RETURN(STATUS_INVALID_PARAMETER);
  1303. }
  1304. // exit the loop
  1305. break;
  1306. }
  1307. if ('.' == *pChar)
  1308. {
  1309. ULONG LabelLength = DIFF(pChar - pLabel);
  1310. // There must be at least one char in the label
  1311. if (0 == LabelLength)
  1312. {
  1313. UlTraceError(PARSER,
  1314. ("http!HttpValidateHostname: empty label\n"
  1315. ));
  1316. RETURN(STATUS_INVALID_PARAMETER);
  1317. }
  1318. // Label can't have more than 63 chars
  1319. if (LabelLength > pCfg->MaxLabelLength)
  1320. {
  1321. UlTraceError(PARSER,
  1322. ("http!HttpValidateHostname: overlong label, %lu\n",
  1323. LabelLength
  1324. ));
  1325. RETURN(STATUS_INVALID_PARAMETER);
  1326. }
  1327. // Reset for the next label
  1328. pLabel = pChar + STRLEN_LIT(".");
  1329. continue;
  1330. }
  1331. // CODEWORK: handle DBCS characters
  1332. if (!IS_URL_ILLEGAL_COMPUTERNAME(*pChar))
  1333. {
  1334. if (!IS_HTTP_DIGIT(*pChar))
  1335. AlphaLabel = TRUE;
  1336. if (pChar > pLabel)
  1337. continue;
  1338. // The first char of a label cannot be a hyphen. (Underscore?)
  1339. if ('-' == *pChar)
  1340. {
  1341. UlTraceError(PARSER,
  1342. ("http!HttpValidateHostname: "
  1343. "'-' at beginning of label\n"
  1344. ));
  1345. RETURN(STATUS_INVALID_PARAMETER);
  1346. }
  1347. continue;
  1348. }
  1349. UlTraceError(PARSER,
  1350. ("http!HttpValidateHostname: "
  1351. "Invalid char in hostname, 0x%02X '%c', "
  1352. "after '%.*s'\n",
  1353. *pChar,
  1354. IS_HTTP_PRINT(*pChar) ? *pChar : '?',
  1355. DIFF(pChar - pHostname),
  1356. pHostname
  1357. ));
  1358. RETURN(STATUS_INVALID_PARAMETER);
  1359. } // loop through hostname
  1360. ASSERT(pChar == pEnd || ':' == *pChar);
  1361. if (AlphaLabel)
  1362. {
  1363. *pAddressType = 0;
  1364. }
  1365. else
  1366. {
  1367. // Let's see if it's a valid IPv4 address
  1368. Status = RtlIpv4StringToAddressA(
  1369. (PCSTR) pHostname,
  1370. TRUE, // strict => 4 dotted decimal octets
  1371. &pTerminator,
  1372. &IPv4Address
  1373. );
  1374. if (!NT_SUCCESS(Status))
  1375. {
  1376. UlTraceError(PARSER,
  1377. ("http!HttpValidateHostname: "
  1378. "Invalid IPv4 address, %s\n",
  1379. HttpStatusToString(Status)
  1380. ));
  1381. RETURN(Status);
  1382. }
  1383. if (pTerminator != (PCSTR) pChar)
  1384. {
  1385. ASSERT(pTerminator < (PCSTR) pChar);
  1386. UlTraceError(PARSER,
  1387. ("http!HttpValidateHostname: "
  1388. "Invalid IPv4 address after %lu chars, "
  1389. "0x%02X, '%c'\n",
  1390. DIFF(pTerminator - (PCSTR) pHostname),
  1391. *pTerminator,
  1392. IS_HTTP_PRINT(*pTerminator) ? *pTerminator : '?'
  1393. ));
  1394. RETURN(STATUS_INVALID_PARAMETER);
  1395. }
  1396. *pAddressType = TDI_ADDRESS_TYPE_IP;
  1397. }
  1398. port:
  1399. //
  1400. // Parse the port number
  1401. //
  1402. // Check for overlong hostnames
  1403. if (DIFF(pChar - pHostname) > pCfg->MaxHostnameLength)
  1404. {
  1405. UlTraceError(PARSER,
  1406. ("http!HttpValidateHostname: overlong hostname, %lu\n",
  1407. DIFF(pChar - pHostname)
  1408. ));
  1409. RETURN(STATUS_INVALID_PARAMETER);
  1410. }
  1411. if (pChar == pEnd)
  1412. goto end;
  1413. ASSERT(pHostname < pChar && pChar < pEnd);
  1414. ASSERT(':' == *pChar);
  1415. pChar += STRLEN_LIT(":");
  1416. ASSERT(pChar <= pEnd);
  1417. // RFC 2616, section 3.2.2 "http URL", says:
  1418. // "If the port is empty or not given, port 80 is assumed".
  1419. if (pChar == pEnd)
  1420. {
  1421. Port = 80;
  1422. goto end;
  1423. }
  1424. Status = HttpAnsiStringToUShort(
  1425. pChar,
  1426. pEnd - pChar, // <port> must occupy all remaining chars
  1427. FALSE, // no leading zeros permitted
  1428. 10,
  1429. (PUCHAR*) &pTerminator,
  1430. &Port
  1431. );
  1432. if (!NT_SUCCESS(Status))
  1433. {
  1434. UlTraceError(PARSER,
  1435. ("http!HttpValidateHostname: "
  1436. "Invalid port number, %s\n",
  1437. HttpStatusToString(Status)
  1438. ));
  1439. RETURN(STATUS_INVALID_PARAMETER);
  1440. }
  1441. ASSERT(pTerminator == (PCSTR) pEnd);
  1442. if (0 == Port)
  1443. {
  1444. UlTraceError(PARSER,
  1445. ("http!HttpValidateHostname: Port must not be zero.\n"
  1446. ));
  1447. RETURN(STATUS_INVALID_PARAMETER);
  1448. }
  1449. end:
  1450. RETURN(STATUS_SUCCESS);
  1451. } // HttpValidateHostname
  1452. /***************************************************************************++
  1453. Routine Description:
  1454. Convert to unicode
  1455. Arguments:
  1456. Return Value:
  1457. NTSTATUS - Completion status.
  1458. --***************************************************************************/
  1459. NTSTATUS
  1460. HttpCopyUrl(
  1461. IN PURL_C14N_CONFIG pCfg,
  1462. OUT PWSTR pDestination,
  1463. IN PCUCHAR pSource,
  1464. IN ULONG SourceLength,
  1465. OUT PULONG pBytesCopied,
  1466. OUT PURL_ENCODING_TYPE pUrlEncodingType
  1467. )
  1468. {
  1469. NTSTATUS Status = STATUS_UNSUCCESSFUL;
  1470. ULONG DecodeOrder = pCfg->AbsPathDecodeOrder;
  1471. PAGED_CODE();
  1472. ASSERT(NULL != pDestination);
  1473. ASSERT(NULL != pSource);
  1474. ASSERT(NULL != pBytesCopied);
  1475. ASSERT(NULL != pUrlEncodingType);
  1476. if (0 == DecodeOrder || DecodeOrder != (DecodeOrder & UrlDecode_MaxMask))
  1477. {
  1478. UlTraceError(PARSER,
  1479. ("http!HttpCopyUrl: invalid DecodeOrder, 0x%lX\n",
  1480. DecodeOrder
  1481. ));
  1482. RETURN(STATUS_INVALID_PARAMETER);
  1483. }
  1484. for ( ;
  1485. 0 != DecodeOrder && !NT_SUCCESS(Status);
  1486. DecodeOrder >>= UrlDecode_Shift
  1487. )
  1488. {
  1489. ULONG UrlEncoding = (DecodeOrder & UrlDecode_Mask);
  1490. switch (UrlEncoding)
  1491. {
  1492. default:
  1493. ASSERT(! "Impossible UrlDecodeOrder");
  1494. case UrlDecode_None:
  1495. break;
  1496. case UrlDecode_Ansi:
  1497. case UrlDecode_Dbcs:
  1498. case UrlDecode_Utf8:
  1499. UlTraceVerbose(PARSER,
  1500. ("http!HttpCopyUrl(%s, Src=%p, %lu)\n",
  1501. HttppUrlEncodingToString(UrlEncoding),
  1502. pSource, SourceLength
  1503. ));
  1504. Status = HttppCopyUrlByType(
  1505. pCfg,
  1506. (URL_ENCODING_TYPE) UrlEncoding,
  1507. pDestination,
  1508. pSource,
  1509. SourceLength,
  1510. pBytesCopied
  1511. );
  1512. if (NT_SUCCESS(Status))
  1513. {
  1514. *pUrlEncodingType = (URL_ENCODING_TYPE) UrlEncoding;
  1515. UlTraceVerbose(PARSER,
  1516. ("http!HttpCopyUrl(%s): "
  1517. "(%lu) '%.*s' -> (%lu) '%ls'\n",
  1518. HttppUrlEncodingToString(UrlEncoding),
  1519. SourceLength, SourceLength, pSource,
  1520. *pBytesCopied/sizeof(WCHAR), pDestination
  1521. ));
  1522. }
  1523. break;
  1524. };
  1525. }
  1526. return Status;
  1527. } // HttpCopyUrl
  1528. /***************************************************************************++
  1529. Routine Description:
  1530. This function can be told to copy UTF-8, ANSI, or DBCS URLs.
  1531. Convert to Unicode
  1532. Arguments:
  1533. Return Value:
  1534. NTSTATUS - Completion status.
  1535. --***************************************************************************/
  1536. NTSTATUS
  1537. HttppCopyUrlByType(
  1538. IN PURL_C14N_CONFIG pCfg,
  1539. IN URL_ENCODING_TYPE UrlEncoding,
  1540. OUT PWSTR pDestination,
  1541. IN PCUCHAR pSource,
  1542. IN ULONG SourceLength,
  1543. OUT PULONG pBytesCopied
  1544. )
  1545. {
  1546. PWSTR pDest;
  1547. PCUCHAR pChar;
  1548. ULONG BytesCopied;
  1549. ULONG UnicodeChar;
  1550. ULONG CharToSkip;
  1551. #if DBG
  1552. NTSTATUS Status;
  1553. PFN_POPCHAR_ABSPATH pfnPopChar;
  1554. PWSTR pSegment = pDestination;
  1555. ULONG SegmentCount = 0;
  1556. #endif // DBG
  1557. //
  1558. // Sanity check.
  1559. //
  1560. PAGED_CODE();
  1561. #if DBG
  1562. if (UrlEncoding_Ansi == UrlEncoding)
  1563. pfnPopChar = &HttppPopCharAbsPathAnsi;
  1564. else if (UrlEncoding_Dbcs == UrlEncoding)
  1565. pfnPopChar = &HttppPopCharAbsPathDbcs;
  1566. else if (UrlEncoding_Utf8 == UrlEncoding)
  1567. pfnPopChar = &HttppPopCharAbsPathUtf8;
  1568. else
  1569. {
  1570. ASSERT(! "Invalid UrlEncoding");
  1571. RETURN(STATUS_INVALID_PARAMETER);
  1572. }
  1573. #else // !DBG
  1574. UNREFERENCED_PARAMETER(pCfg);
  1575. UNREFERENCED_PARAMETER(UrlEncoding);
  1576. #endif // DBG
  1577. pDest = pDestination;
  1578. BytesCopied = 0;
  1579. pChar = pSource;
  1580. CharToSkip = 1;
  1581. while ((int)SourceLength > 0)
  1582. {
  1583. ULONG NextUnicodeChar = FastPopChars[*pChar];
  1584. //
  1585. // Grab the next character.
  1586. //
  1587. // All clean chars have a non-zero entry in FastPopChars[].
  1588. // All clean chars are in the US-ASCII range, 0-127.
  1589. //
  1590. ASSERT(0 != NextUnicodeChar);
  1591. ASSERT(IS_ASCII(NextUnicodeChar));
  1592. #if DBG
  1593. Status = (*pfnPopChar)(
  1594. pChar,
  1595. SourceLength,
  1596. pCfg->PercentUAllowed,
  1597. pCfg->AllowRestrictedChars,
  1598. &UnicodeChar,
  1599. &CharToSkip
  1600. );
  1601. ASSERT(NT_SUCCESS(Status));
  1602. ASSERT(UnicodeChar == NextUnicodeChar);
  1603. ASSERT(CharToSkip == 1);
  1604. #endif // !DBG
  1605. UnicodeChar = (WCHAR) NextUnicodeChar;
  1606. CharToSkip = 1;
  1607. #if DBG
  1608. // Because HttpFindUrlToken() marks as dirty any URLs that
  1609. // (appear to) have too many segments or overlong segments,
  1610. // we should never hit these assertions
  1611. if (FORWARD_SLASH == UnicodeChar)
  1612. {
  1613. ULONG SegmentLength = DIFF(pDest - pSegment);
  1614. // The segment length should be within bounds
  1615. ASSERT(SegmentLength > 0 || pDestination == pSegment);
  1616. ASSERT(SegmentLength
  1617. <= pCfg->UrlSegmentMaxLength + WCSLEN_LIT(L"/"));
  1618. pSegment = pDest;
  1619. ++SegmentCount;
  1620. // There should not be too many segments
  1621. ASSERT(SegmentCount <= pCfg->UrlSegmentMaxCount);
  1622. }
  1623. #endif // DBG
  1624. EMIT_LITERAL_CHAR(UnicodeChar, pDest, BytesCopied);
  1625. pChar += CharToSkip;
  1626. SourceLength -= CharToSkip;
  1627. }
  1628. //
  1629. // terminate the string, it hasn't been done in the loop
  1630. //
  1631. ASSERT((pDest-1)[0] != UNICODE_NULL);
  1632. pDest[0] = UNICODE_NULL;
  1633. *pBytesCopied = BytesCopied;
  1634. ASSERT(DIFF(pDest - pSegment) > 0);
  1635. ASSERT(DIFF(pDest - pSegment)
  1636. <= pCfg->UrlSegmentMaxLength + WCSLEN_LIT(L"/"));
  1637. ASSERT(SegmentCount < pCfg->UrlSegmentMaxCount);
  1638. return STATUS_SUCCESS;
  1639. } // HttppCopyUrlByType
  1640. /***************************************************************************++
  1641. Routine Description:
  1642. Unescape
  1643. Convert backslash to forward slash
  1644. Remove double slashes (empty directiories names) - e.g. // or \\
  1645. Handle /./
  1646. Handle /../
  1647. Convert to unicode
  1648. Arguments:
  1649. Return Value:
  1650. NTSTATUS - Completion status.
  1651. Note: Any changes to this code may require changes for the fast path code too.
  1652. The fast path is HttpCopyUrl.
  1653. --***************************************************************************/
  1654. NTSTATUS
  1655. HttpCleanAndCopyUrl(
  1656. IN PURL_C14N_CONFIG pCfg,
  1657. IN URL_PART UrlPart,
  1658. OUT PWSTR pDestination,
  1659. IN PCUCHAR pSource,
  1660. IN ULONG SourceLength,
  1661. OUT PULONG pBytesCopied,
  1662. OUT PWSTR * ppQueryString OPTIONAL,
  1663. OUT PURL_ENCODING_TYPE pUrlEncodingType
  1664. )
  1665. {
  1666. NTSTATUS Status = STATUS_UNSUCCESSFUL;
  1667. ULONG DecodeOrder = pCfg->AbsPathDecodeOrder;
  1668. PAGED_CODE();
  1669. ASSERT(NULL != pDestination);
  1670. ASSERT(NULL != pSource);
  1671. ASSERT(NULL != pBytesCopied);
  1672. ASSERT(NULL != pUrlEncodingType);
  1673. if (0 == DecodeOrder || DecodeOrder != (DecodeOrder & UrlDecode_MaxMask))
  1674. {
  1675. UlTraceError(PARSER,
  1676. ("http!HttpCleanAndCopyUrl: invalid DecodeOrder, 0x%lX\n",
  1677. DecodeOrder
  1678. ));
  1679. RETURN(STATUS_INVALID_PARAMETER);
  1680. }
  1681. for ( ;
  1682. 0 != DecodeOrder && !NT_SUCCESS(Status);
  1683. DecodeOrder >>= UrlDecode_Shift
  1684. )
  1685. {
  1686. ULONG UrlEncoding = (DecodeOrder & UrlDecode_Mask);
  1687. switch (UrlEncoding)
  1688. {
  1689. default:
  1690. ASSERT(! "Impossible UrlDecodeOrder");
  1691. case UrlDecode_None:
  1692. break;
  1693. case UrlDecode_Ansi:
  1694. case UrlDecode_Dbcs:
  1695. case UrlDecode_Utf8:
  1696. UlTraceVerbose(PARSER,
  1697. ("http!HttpCleanAndCopyUrl(%s, Src=%p, %lu)\n",
  1698. HttppUrlEncodingToString(UrlEncoding),
  1699. pSource, SourceLength
  1700. ));
  1701. Status = HttppCleanAndCopyUrlByType(
  1702. pCfg,
  1703. (URL_ENCODING_TYPE) UrlEncoding,
  1704. UrlPart,
  1705. pDestination,
  1706. pSource,
  1707. SourceLength,
  1708. pBytesCopied,
  1709. ppQueryString
  1710. );
  1711. if (NT_SUCCESS(Status))
  1712. {
  1713. *pUrlEncodingType = (URL_ENCODING_TYPE) UrlEncoding;
  1714. UlTraceVerbose(PARSER,
  1715. ("http!HttpCleanAndCopyUrl(%s): "
  1716. "(%lu) '%.*s' -> (%lu) '%ls'\n",
  1717. HttppUrlEncodingToString(UrlEncoding),
  1718. SourceLength, SourceLength, pSource,
  1719. *pBytesCopied/sizeof(WCHAR), pDestination
  1720. ));
  1721. }
  1722. break;
  1723. };
  1724. }
  1725. return Status;
  1726. } // HttpCleanAndCopyUrl
  1727. //
  1728. // HttppCleanAndCopyUrlByType() uses StateFromStateAndToken[][] and
  1729. // ActionFromStateAndToken[][] to handle "//", "/./", and "/../" productions.
  1730. //
  1731. #define TOK_STATE(state, other, dot, eos, slash) \
  1732. { \
  1733. URL_STATE_ ## other, \
  1734. URL_STATE_ ## dot, \
  1735. URL_STATE_ ## eos, \
  1736. URL_STATE_ ## slash \
  1737. }
  1738. //
  1739. // CanonStateFromStateAndToken[][] is used by HttpParseUrl() to reject
  1740. // "//", "/./", and "/../" sequences, as these URLs are supposed to
  1741. // be in canonical form already.
  1742. //
  1743. const URL_STATE
  1744. CanonStateFromStateAndToken[URL_STATE_MAX][URL_TOKEN_MAX] =
  1745. {
  1746. // State \ Token: Other '.' EOS '/'
  1747. TOK_STATE( START, START, START, END, SLASH),
  1748. TOK_STATE( SLASH, START, SLASH_DOT, END, ERROR),
  1749. TOK_STATE( SLASH_DOT, START, SLASH_DOT_DOT, END, ERROR),
  1750. TOK_STATE( SLASH_DOT_DOT, START, START, ERROR, ERROR),
  1751. TOK_STATE( END, END, END, END, END),
  1752. TOK_STATE( ERROR, ERROR, ERROR, ERROR, ERROR)
  1753. };
  1754. //
  1755. // StateFromStateAndToken[][] says which new state to transition to given
  1756. // the current state and the token we saw. Used by HttppCleanAndCopyUrlByType()
  1757. //
  1758. const URL_STATE
  1759. StateFromStateAndToken[URL_STATE_MAX][URL_TOKEN_MAX] =
  1760. {
  1761. // State \ Token: Other '.' EOS '/'
  1762. TOK_STATE( START, START, START, END, SLASH),
  1763. TOK_STATE( SLASH, START, SLASH_DOT, END, SLASH),
  1764. TOK_STATE( SLASH_DOT, START, SLASH_DOT_DOT, END, SLASH),
  1765. TOK_STATE( SLASH_DOT_DOT, START, START, END, SLASH),
  1766. TOK_STATE( END, END, END, END, END),
  1767. TOK_STATE( ERROR, ERROR, ERROR, ERROR, ERROR)
  1768. };
  1769. //
  1770. // ActionFromStateAndToken[][] says what action to perform based on
  1771. // the current state and the current token
  1772. //
  1773. #define NEW_ACTION(state, other, dot, eos, slash) \
  1774. { \
  1775. ACTION_ ## other, \
  1776. ACTION_ ## dot, \
  1777. ACTION_ ## eos, \
  1778. ACTION_ ## slash \
  1779. }
  1780. const URL_ACTION
  1781. ActionFromStateAndToken[URL_STATE_MAX][URL_TOKEN_MAX] =
  1782. {
  1783. // State \ Token: Other '.' EOS '/'
  1784. NEW_ACTION(START, EMIT_CH, EMIT_CH, NOTHING, EMIT_CH),
  1785. NEW_ACTION(SLASH, EMIT_CH, NOTHING, NOTHING, NOTHING),
  1786. NEW_ACTION(SLASH_DOT, EMIT_DOT_CH, NOTHING, NOTHING, NOTHING),
  1787. NEW_ACTION(SLASH_DOT_DOT, EMIT_DOT_DOT_CH,
  1788. EMIT_DOT_DOT_CH, BACKUP, BACKUP),
  1789. NEW_ACTION(END, NOTHING, NOTHING, NOTHING, NOTHING)
  1790. };
  1791. #if DBG
  1792. PCSTR
  1793. HttppUrlActionToString(
  1794. URL_ACTION Action)
  1795. {
  1796. switch (Action)
  1797. {
  1798. case ACTION_NOTHING: return "NOTHING";
  1799. case ACTION_EMIT_CH: return "EMIT_CH";
  1800. case ACTION_EMIT_DOT_CH: return "EMIT_DOT_CH";
  1801. case ACTION_EMIT_DOT_DOT_CH: return "EMIT_DOT_DOT_CH";
  1802. case ACTION_BACKUP: return "BACKUP";
  1803. case ACTION_MAX: return "MAX";
  1804. default:
  1805. ASSERT(! "Invalid URL_ACTION");
  1806. return "ACTION_???";
  1807. }
  1808. } // HttppUrlActionToString
  1809. PCSTR
  1810. HttppUrlStateToString(
  1811. URL_STATE UrlState)
  1812. {
  1813. switch (UrlState)
  1814. {
  1815. case URL_STATE_START: return "START";
  1816. case URL_STATE_SLASH: return "SLASH";
  1817. case URL_STATE_SLASH_DOT: return "SLASH_DOT";
  1818. case URL_STATE_SLASH_DOT_DOT: return "SLASH_DOT_DOT";
  1819. case URL_STATE_END: return "END";
  1820. case URL_STATE_ERROR: return "ERROR";
  1821. case URL_STATE_MAX: return "MAX";
  1822. default:
  1823. ASSERT(! "Invalid URL_STATE");
  1824. return "URL_STATE_???";
  1825. }
  1826. } // HttppUrlStateToString
  1827. PCSTR
  1828. HttppUrlTokenToString(
  1829. URL_STATE_TOKEN UrlToken)
  1830. {
  1831. switch (UrlToken)
  1832. {
  1833. case URL_TOKEN_OTHER: return "OTHER";
  1834. case URL_TOKEN_DOT: return "DOT";
  1835. case URL_TOKEN_EOS: return "EOS";
  1836. case URL_TOKEN_SLASH: return "SLASH";
  1837. case URL_TOKEN_MAX: return "MAX";
  1838. default:
  1839. ASSERT(! "Invalid URL_STATE_TOKEN");
  1840. return "URL_TOKEN_???";
  1841. }
  1842. } // HttppUrlTokenToString
  1843. #endif // DBG
  1844. PCSTR
  1845. HttpSiteTypeToString(
  1846. HTTP_URL_SITE_TYPE SiteType
  1847. )
  1848. {
  1849. switch (SiteType)
  1850. {
  1851. case HttpUrlSite_None: return "None";
  1852. case HttpUrlSite_Name: return "Name";
  1853. case HttpUrlSite_IP: return "IP";
  1854. case HttpUrlSite_NamePlusIP: return "Name+IP";
  1855. case HttpUrlSite_WeakWildcard: return "Weak";
  1856. case HttpUrlSite_StrongWildcard: return "Strong";
  1857. case HttpUrlSite_Max: return "Max";
  1858. default:
  1859. ASSERT(! "Invalid HTTP_URL_SITE_TYPE");
  1860. return "????";
  1861. }
  1862. }
  1863. /***************************************************************************++
  1864. Routine Description:
  1865. This function can be told to clean up UTF-8, ANSI, or DBCS URLs.
  1866. Unescape
  1867. Convert backslash to forward slash
  1868. Remove double slashes (empty directiories names) - e.g. // or \\
  1869. Handle /./
  1870. Handle /../
  1871. Convert to unicode
  1872. Arguments:
  1873. Return Value:
  1874. NTSTATUS - Completion status.
  1875. Note: Any changes to this code may require changes for the fast path code too.
  1876. The fast path is HttppCopyUrlByType.
  1877. --***************************************************************************/
  1878. NTSTATUS
  1879. HttppCleanAndCopyUrlByType(
  1880. IN PURL_C14N_CONFIG pCfg,
  1881. IN URL_ENCODING_TYPE UrlEncoding,
  1882. IN URL_PART UrlPart,
  1883. OUT PWSTR pDestination,
  1884. IN PCUCHAR pSource,
  1885. IN ULONG SourceLength,
  1886. OUT PULONG pBytesCopied,
  1887. OUT PWSTR * ppQueryString OPTIONAL
  1888. )
  1889. {
  1890. NTSTATUS Status;
  1891. PWSTR pDest;
  1892. PCUCHAR pChar;
  1893. ULONG CharToSkip;
  1894. ULONG BytesCopied;
  1895. PWSTR pQueryString;
  1896. URL_STATE UrlState = URL_STATE_START;
  1897. URL_STATE_TOKEN UrlToken = URL_TOKEN_OTHER;
  1898. URL_ACTION Action = ACTION_NOTHING;
  1899. ULONG UnicodeChar;
  1900. BOOLEAN MakeCanonical;
  1901. PWCHAR pFastPopChar;
  1902. PFN_POPCHAR_ABSPATH pfnPopChar;
  1903. PWSTR pSegment = pDestination;
  1904. ULONG SegmentCount = 0;
  1905. BOOLEAN TestSegment = FALSE;
  1906. #if DBG
  1907. ULONG OriginalSourceLength = SourceLength;
  1908. #endif
  1909. //
  1910. // Sanity check.
  1911. //
  1912. PAGED_CODE();
  1913. ASSERT(UrlPart_AbsPath == UrlPart);
  1914. if (UrlEncoding_Ansi == UrlEncoding)
  1915. pfnPopChar = &HttppPopCharAbsPathAnsi;
  1916. else if (UrlEncoding_Dbcs == UrlEncoding)
  1917. pfnPopChar = &HttppPopCharAbsPathDbcs;
  1918. else if (UrlEncoding_Utf8 == UrlEncoding)
  1919. pfnPopChar = &HttppPopCharAbsPathUtf8;
  1920. else
  1921. {
  1922. ASSERT(! "Invalid UrlEncoding");
  1923. RETURN(STATUS_INVALID_PARAMETER);
  1924. }
  1925. ASSERT(FORWARD_SLASH == *pSource);
  1926. pDest = pDestination;
  1927. pQueryString = NULL;
  1928. BytesCopied = 0;
  1929. pChar = pSource;
  1930. CharToSkip = 0;
  1931. UrlState = 0;
  1932. MakeCanonical = (BOOLEAN) (UrlPart == UrlPart_AbsPath);
  1933. if (UrlEncoding == UrlEncoding_Utf8 && UrlPart != UrlPart_QueryString)
  1934. {
  1935. pFastPopChar = FastPopChars;
  1936. }
  1937. else
  1938. {
  1939. pFastPopChar = DummyPopChars;
  1940. }
  1941. while (SourceLength > 0)
  1942. {
  1943. //
  1944. // advance ! it's at the top of the loop to enable ANSI_NULL to
  1945. // come through ONCE
  1946. //
  1947. ASSERT(CharToSkip <= SourceLength);
  1948. pChar += CharToSkip;
  1949. SourceLength -= CharToSkip;
  1950. //
  1951. // well? have we hit the end?
  1952. //
  1953. if (SourceLength == 0)
  1954. {
  1955. UnicodeChar = UNICODE_NULL;
  1956. CharToSkip = 1;
  1957. }
  1958. else
  1959. {
  1960. //
  1961. // Nope. Peek briefly to see if we hit the query string
  1962. //
  1963. if (UrlPart == UrlPart_AbsPath && pChar[0] == QUESTION_MARK)
  1964. {
  1965. ASSERT(pQueryString == NULL);
  1966. //
  1967. // remember its location
  1968. //
  1969. pQueryString = pDest;
  1970. //
  1971. // let it fall through ONCE to the canonical
  1972. // in order to handle a trailing "/.." like
  1973. // "http://foobar:80/foo/bar/..?v=1&v2"
  1974. //
  1975. TestSegment = TRUE;
  1976. UnicodeChar = QUESTION_MARK;
  1977. CharToSkip = 1;
  1978. //
  1979. // now we are cleaning the query string
  1980. //
  1981. UrlPart = UrlPart_QueryString;
  1982. UlTraceVerbose(PARSER, ("QueryString @ %p\n", pQueryString));
  1983. //
  1984. // cannot use fast path for PopChar anymore
  1985. //
  1986. pFastPopChar = DummyPopChars;
  1987. pfnPopChar = HttppPopCharQueryString;
  1988. }
  1989. else
  1990. {
  1991. ULONG NextUnicodeChar = pFastPopChar[*pChar];
  1992. //
  1993. // Grab the next character. Try to be fast for the
  1994. // normal character case. Otherwise call PopChar.
  1995. //
  1996. if (NextUnicodeChar == 0)
  1997. {
  1998. Status = (*pfnPopChar)(
  1999. pChar,
  2000. SourceLength,
  2001. pCfg->PercentUAllowed,
  2002. pCfg->AllowRestrictedChars,
  2003. &UnicodeChar,
  2004. &CharToSkip
  2005. );
  2006. if (NT_SUCCESS(Status) == FALSE)
  2007. goto end;
  2008. }
  2009. else
  2010. {
  2011. #if DBG
  2012. Status = (*pfnPopChar)(
  2013. pChar,
  2014. SourceLength,
  2015. pCfg->PercentUAllowed,
  2016. pCfg->AllowRestrictedChars,
  2017. &UnicodeChar,
  2018. &CharToSkip
  2019. );
  2020. ASSERT(NT_SUCCESS(Status));
  2021. ASSERT(UnicodeChar == NextUnicodeChar);
  2022. ASSERT(CharToSkip == 1);
  2023. #endif // DBG
  2024. UnicodeChar = (WCHAR) NextUnicodeChar;
  2025. CharToSkip = 1;
  2026. }
  2027. }
  2028. }
  2029. if (!MakeCanonical)
  2030. {
  2031. UrlToken = (UnicodeChar == UNICODE_NULL)
  2032. ? URL_TOKEN_EOS
  2033. : URL_TOKEN_OTHER;
  2034. TestSegment = FALSE;
  2035. }
  2036. else
  2037. {
  2038. //
  2039. // now use the state machine to make it canonical.
  2040. //
  2041. //
  2042. // did we just hit the query string? this will only happen once
  2043. // that we take this branch after hitting it, as we stop
  2044. // processing after hitting it.
  2045. //
  2046. if (UrlPart == UrlPart_QueryString)
  2047. {
  2048. //
  2049. // treat this just like we hit a NULL, EOS.
  2050. //
  2051. ASSERT(QUESTION_MARK == UnicodeChar);
  2052. UrlToken = URL_TOKEN_EOS;
  2053. TestSegment = TRUE;
  2054. }
  2055. else
  2056. {
  2057. //
  2058. // otherwise based the new state off of the char we
  2059. // just popped.
  2060. //
  2061. switch (UnicodeChar)
  2062. {
  2063. case UNICODE_NULL:
  2064. UrlToken = URL_TOKEN_EOS;
  2065. TestSegment = TRUE;
  2066. break;
  2067. case DOT:
  2068. UrlToken = URL_TOKEN_DOT;
  2069. TestSegment = FALSE;
  2070. break;
  2071. case FORWARD_SLASH:
  2072. UrlToken = URL_TOKEN_SLASH;
  2073. TestSegment = TRUE;
  2074. break;
  2075. default:
  2076. UrlToken = URL_TOKEN_OTHER;
  2077. TestSegment = FALSE;
  2078. break;
  2079. }
  2080. }
  2081. }
  2082. Action = ActionFromStateAndToken[UrlState][UrlToken];
  2083. IF_DEBUG2BOTH(PARSER, VERBOSE)
  2084. {
  2085. ULONG i;
  2086. UCHAR HexBuff[5*12 + 10];
  2087. PUCHAR p = HexBuff;
  2088. UCHAR Byte;
  2089. ASSERT(CharToSkip <= 4 * STRLEN_LIT("%NN"));
  2090. // Generate something like
  2091. // "[25 65 32 25 38 30 25 39 35] '%e2%80%95'"
  2092. *p++ = '[';
  2093. for (i = 0; i < CharToSkip; ++i)
  2094. {
  2095. const static char hexArray[] = "0123456789ABCDEF";
  2096. Byte = pChar[i];
  2097. *p++ = hexArray[Byte >> 4];
  2098. *p++ = hexArray[Byte & 0xf];
  2099. *p++ = ' ';
  2100. }
  2101. p[-1] = ']'; // overwrite last ' '
  2102. *p++ = ' ';
  2103. *p++ = '\'';
  2104. for (i = 0; i < CharToSkip; ++i)
  2105. {
  2106. Byte = pChar[i];
  2107. *p++ = (IS_HTTP_PRINT(Byte) ? Byte : '?');
  2108. }
  2109. *p++ = '\'';
  2110. *p++ = '\0';
  2111. ASSERT(DIFF(p - HexBuff) <= DIMENSION(HexBuff));
  2112. UlTrace(PARSER,
  2113. ("http!HttppCleanAndCopyUrlByType(%s): "
  2114. "(%lu) %s -> U+%04lX '%c': "
  2115. "[%s][%s] -> %s, %s%s\n",
  2116. HttppUrlEncodingToString(UrlEncoding),
  2117. CharToSkip, HexBuff,
  2118. UnicodeChar,
  2119. IS_ANSI(UnicodeChar) && IS_HTTP_PRINT(UnicodeChar)
  2120. ? (UCHAR) UnicodeChar : '?',
  2121. HttppUrlStateToString(UrlState),
  2122. HttppUrlTokenToString(UrlToken),
  2123. HttppUrlStateToString(
  2124. StateFromStateAndToken[UrlState][UrlToken]),
  2125. HttppUrlActionToString(Action),
  2126. TestSegment ? ", TestSegment" : ""
  2127. ));
  2128. } // IF_DEBUG2BOTH(PARSER, VERBOSE)
  2129. //
  2130. // Segment length and segment count checks
  2131. //
  2132. if (TestSegment)
  2133. {
  2134. ULONG SegmentLength = DIFF(pDest - pSegment);
  2135. ASSERT(pSegment <= pDest);
  2136. UlTraceVerbose(PARSER,
  2137. ("http!HttppCleanAndCopyUrlByType: "
  2138. "Segment[%lu] %p (%lu) = '%.*ls'\n",
  2139. SegmentCount, pSegment, SegmentLength,
  2140. SegmentLength, pSegment
  2141. ));
  2142. // Reject if segment too long
  2143. if (SegmentLength > pCfg->UrlSegmentMaxLength + WCSLEN_LIT(L"/"))
  2144. {
  2145. UlTraceError(PARSER, (
  2146. "http!HttppCleanAndCopyUrlByType: "
  2147. "Segment too long: %lu\n",
  2148. SegmentLength
  2149. ));
  2150. RETURN(STATUS_INVALID_DEVICE_REQUEST);
  2151. }
  2152. pSegment = pDest;
  2153. // Reject if too many path segments
  2154. if (Action != ACTION_NOTHING)
  2155. {
  2156. if (pSegment == pDestination)
  2157. {
  2158. SegmentCount = 0;
  2159. }
  2160. else if (++SegmentCount > pCfg->UrlSegmentMaxCount)
  2161. {
  2162. UlTraceError(PARSER, (
  2163. "http!HttppCleanAndCopyUrlByType: "
  2164. "Too many segments: %lu\n",
  2165. SegmentCount
  2166. ));
  2167. RETURN(STATUS_INVALID_DEVICE_REQUEST);
  2168. }
  2169. }
  2170. }
  2171. //
  2172. // Perform the action associated with the state.
  2173. //
  2174. switch (Action)
  2175. {
  2176. case ACTION_EMIT_DOT_DOT_CH:
  2177. EMIT_LITERAL_CHAR(DOT, pDest, BytesCopied);
  2178. // fall through
  2179. case ACTION_EMIT_DOT_CH:
  2180. EMIT_LITERAL_CHAR(DOT, pDest, BytesCopied);
  2181. // fall through
  2182. case ACTION_EMIT_CH:
  2183. EMIT_CHAR(
  2184. UnicodeChar,
  2185. pDest,
  2186. BytesCopied,
  2187. Status,
  2188. pCfg->AllowRestrictedChars
  2189. );
  2190. // fall through
  2191. case ACTION_NOTHING:
  2192. break;
  2193. case ACTION_BACKUP:
  2194. //
  2195. // pDest currently points 1 past the last '/'. backup over it and
  2196. // find the preceding '/', set pDest to 1 past that one.
  2197. //
  2198. //
  2199. // backup to the '/'
  2200. //
  2201. pDest -= 1;
  2202. BytesCopied -= sizeof(WCHAR);
  2203. ASSERT(pDest[0] == FORWARD_SLASH);
  2204. //
  2205. // are we at the start of the string? that's bad, can't go back!
  2206. //
  2207. if (pDest == pDestination)
  2208. {
  2209. ASSERT(BytesCopied == 0);
  2210. UlTraceError(PARSER, (
  2211. "http!HttppCleanAndCopyUrl: "
  2212. "Can't back up for \"/../\"\n"
  2213. ));
  2214. Status = STATUS_OBJECT_PATH_INVALID;
  2215. goto end;
  2216. }
  2217. //
  2218. // back up over the '/'
  2219. //
  2220. pDest -= 1;
  2221. BytesCopied -= sizeof(WCHAR);
  2222. ASSERT(pDest > pDestination);
  2223. //
  2224. // now find the previous slash
  2225. //
  2226. while (pDest > pDestination && pDest[0] != FORWARD_SLASH)
  2227. {
  2228. pDest -= 1;
  2229. BytesCopied -= sizeof(WCHAR);
  2230. }
  2231. //
  2232. // Adjust segment trackers downwards
  2233. //
  2234. pSegment = pDest;
  2235. if (pSegment == pDestination)
  2236. SegmentCount = 0;
  2237. else
  2238. --SegmentCount;
  2239. //
  2240. // we already have a slash, so don't have to store one.
  2241. //
  2242. ASSERT(pDest[0] == FORWARD_SLASH);
  2243. //
  2244. // simply skip it, as if we had emitted it just now
  2245. //
  2246. pDest += 1;
  2247. BytesCopied += sizeof(WCHAR);
  2248. break;
  2249. default:
  2250. ASSERT(!"http!HttppCleanAndCopyUrl: "
  2251. "Invalid action code in state table!");
  2252. Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
  2253. goto end;
  2254. }
  2255. //
  2256. // Just hit the query string ?
  2257. //
  2258. if (MakeCanonical && UrlPart == UrlPart_QueryString)
  2259. {
  2260. //
  2261. // Stop canonical processing
  2262. //
  2263. MakeCanonical = FALSE;
  2264. //
  2265. // Need to emit the '?', it wasn't emitted above
  2266. //
  2267. ASSERT(ActionFromStateAndToken[UrlState][UrlToken]
  2268. != ACTION_EMIT_CH);
  2269. //
  2270. // remember its location (in case we backed up)
  2271. //
  2272. pQueryString = pDest;
  2273. EMIT_LITERAL_CHAR(QUESTION_MARK, pDest, BytesCopied);
  2274. // reset
  2275. UrlToken = URL_TOKEN_OTHER;
  2276. UrlState = URL_STATE_START;
  2277. }
  2278. // update the URL state
  2279. UrlState = StateFromStateAndToken[UrlState][UrlToken];
  2280. ASSERT(URL_STATE_ERROR != UrlState);
  2281. }
  2282. //
  2283. // terminate the string, it hasn't been done in the loop
  2284. //
  2285. ASSERT((pDest-1)[0] != UNICODE_NULL);
  2286. pDest[0] = UNICODE_NULL;
  2287. *pBytesCopied = BytesCopied;
  2288. if (BytesCopied > pCfg->UrlMaxLength * sizeof(WCHAR))
  2289. {
  2290. UlTraceError(PARSER, (
  2291. "http!HttppCleanAndCopyUrlByType: "
  2292. "URL too long: %lu\n",
  2293. BytesCopied
  2294. ));
  2295. RETURN(STATUS_INVALID_DEVICE_REQUEST);
  2296. }
  2297. if (ppQueryString != NULL)
  2298. {
  2299. *ppQueryString = pQueryString;
  2300. }
  2301. UlTraceVerbose(PARSER,
  2302. ("http!HttppCleanAndCopyUrlByType: "
  2303. "(%lu) '%.*s' -> (%lu) '%.*ls', %squerystring\n",
  2304. OriginalSourceLength,
  2305. OriginalSourceLength, pSource,
  2306. BytesCopied/sizeof(WCHAR),
  2307. BytesCopied/sizeof(WCHAR), pDestination,
  2308. pQueryString != NULL ? "" : "no "
  2309. ));
  2310. Status = STATUS_SUCCESS;
  2311. end:
  2312. return Status;
  2313. } // HttppCleanAndCopyUrlByType
  2314. /*++
  2315. Routine Description:
  2316. A utility routine to find a Url token. We take an input pointer, skip any
  2317. preceding LWS, then scan the token until we find either LWS or a CRLF
  2318. pair. We also mark the request to have a "Clean" Url
  2319. Arguments:
  2320. pBuffer - Buffer to search for token.
  2321. BufferLength - Length of data pointed to by pBuffer.
  2322. ppTokenStart - Where to return the start of the token, if we locate
  2323. its delimiter.
  2324. pTokenLength - Where to return the length of the token.
  2325. pRawUrlClean - where to return cleanliness of URL
  2326. Return Value:
  2327. STATUS_SUCCESS if no parsing errors in the URL.
  2328. We also return, in *ppTokenStart, a pointer to the token we found,
  2329. or NULL if we don't find a whitespace-delimited token.
  2330. pRawUrlClean flag may be set.
  2331. --*/
  2332. NTSTATUS
  2333. HttpFindUrlToken(
  2334. IN PURL_C14N_CONFIG pCfg,
  2335. IN PCUCHAR pBuffer,
  2336. IN ULONG BufferLength,
  2337. OUT PUCHAR* ppTokenStart,
  2338. OUT PULONG pTokenLength,
  2339. OUT PBOOLEAN pRawUrlClean
  2340. )
  2341. {
  2342. PCUCHAR pTokenStart;
  2343. PCUCHAR pSegment;
  2344. UCHAR CurrentChar;
  2345. UCHAR PreviousChar;
  2346. ULONG SegmentCount = 0;
  2347. ULONG TokenLength;
  2348. //
  2349. // Sanity check.
  2350. //
  2351. PAGED_CODE();
  2352. ASSERT(NULL != pBuffer);
  2353. ASSERT(NULL != ppTokenStart);
  2354. ASSERT(NULL != pTokenLength);
  2355. ASSERT(NULL != pRawUrlClean);
  2356. //
  2357. // Assume Clean RawUrl
  2358. //
  2359. *pRawUrlClean = TRUE;
  2360. *ppTokenStart = NULL;
  2361. *pTokenLength = 0;
  2362. //
  2363. // First, skip any preceding LWS.
  2364. //
  2365. while (BufferLength > 0 && IS_HTTP_LWS(*pBuffer))
  2366. {
  2367. pBuffer++;
  2368. BufferLength--;
  2369. }
  2370. // If we stopped because we ran out of buffer, bail.
  2371. if (BufferLength == 0)
  2372. {
  2373. return STATUS_SUCCESS;
  2374. }
  2375. pTokenStart = pBuffer;
  2376. PreviousChar = ANSI_NULL;
  2377. // This will usually point to a '/', but it won't if this is an AbsURI.
  2378. // It doesn't really matter, since only a few borderline cases will
  2379. // be marked as dirty that might not otherwise be.
  2380. pSegment = pBuffer;
  2381. // Now skip over the token, until we see either LWS or a CR or LF.
  2382. while ( BufferLength != 0 )
  2383. {
  2384. CurrentChar = *pBuffer;
  2385. // must check for WS [ \t\r\n] first, since \t, \r, & \n are CTL chars!
  2386. if ( IS_HTTP_WS_TOKEN(CurrentChar) )
  2387. {
  2388. break;
  2389. }
  2390. if ( IS_HTTP_CTL(CurrentChar) )
  2391. {
  2392. *pRawUrlClean = FALSE;
  2393. *ppTokenStart = NULL;
  2394. UlTraceError(PARSER, (
  2395. "http!HttpFindUrlToken: "
  2396. "Found control char: %02X\n",
  2397. CurrentChar
  2398. ));
  2399. RETURN(STATUS_INVALID_DEVICE_REQUEST);
  2400. }
  2401. //
  2402. // URL is NOT clean if it contains any of the following patterns
  2403. //
  2404. // a. back slash "\"
  2405. // b. dot, forward slash | forward slash, forward slash "./" | "//"
  2406. // c. forward slash, dot | dot, dot "/." | ".."
  2407. // d. question mark (querystring) "?"
  2408. // e. percent (hex escape) "%"
  2409. // f. raw bytes with high bit set, >= 0x80
  2410. //
  2411. // These are conservative estimates of "Clean"; some clean URLs may not
  2412. // be marked as clean. For such URLs, we'll skip the fast path but at
  2413. // no loss of functionality.
  2414. //
  2415. if ( IS_URL_DIRTY(CurrentChar) )
  2416. {
  2417. // Only do the checks if it's still clean
  2418. if (*pRawUrlClean)
  2419. {
  2420. if (CurrentChar == FORWARD_SLASH || CurrentChar == DOT)
  2421. {
  2422. if (PreviousChar == FORWARD_SLASH || PreviousChar == DOT)
  2423. {
  2424. *pRawUrlClean = FALSE;
  2425. }
  2426. }
  2427. else
  2428. {
  2429. *pRawUrlClean = FALSE;
  2430. }
  2431. }
  2432. if (CurrentChar == FORWARD_SLASH)
  2433. {
  2434. ULONG SegmentLength = DIFF(pBuffer - pSegment);
  2435. // If the segment contains %-hex-escaped chars, it may become
  2436. // acceptably short after PopChar() processing. Let
  2437. // HttppCleanAndCopyUrlByType() figure it out.
  2438. if (SegmentLength > pCfg->UrlSegmentMaxLength)
  2439. *pRawUrlClean = FALSE;
  2440. pSegment = pBuffer;
  2441. // If this is an AbsURI, instead of an AbsPath, the
  2442. // segment count will be higher, because of the two slashes
  2443. // before the hostname. Also, "/../", "/./", and "//"
  2444. // minimization will reduce the final count of segments.
  2445. // Again, let HttppCleanAndCopyUrlByType() figure it out.
  2446. if (++SegmentCount > pCfg->UrlSegmentMaxCount)
  2447. *pRawUrlClean = FALSE;
  2448. }
  2449. }
  2450. PreviousChar = CurrentChar;
  2451. pBuffer++;
  2452. BufferLength--;
  2453. }
  2454. // See why we stopped.
  2455. if (0 == BufferLength)
  2456. {
  2457. *pRawUrlClean = FALSE;
  2458. // Ran out of buffer before end of token.
  2459. return STATUS_SUCCESS;
  2460. }
  2461. ASSERT(IS_HTTP_WS_TOKEN(*pBuffer));
  2462. TokenLength = DIFF(pBuffer - pTokenStart);
  2463. if (0 == TokenLength)
  2464. {
  2465. UlTraceError(PARSER, ("http!HttpFindUrlToken: Found empty token\n"));
  2466. RETURN(STATUS_INVALID_DEVICE_REQUEST);
  2467. }
  2468. // Check the final segment
  2469. if (DIFF(pBuffer - pSegment) > pCfg->UrlSegmentMaxLength)
  2470. *pRawUrlClean = FALSE;
  2471. if (++SegmentCount > pCfg->UrlSegmentMaxCount)
  2472. *pRawUrlClean = FALSE;
  2473. if (TokenLength > pCfg->UrlMaxLength)
  2474. *pRawUrlClean = FALSE;
  2475. // Success! Set the token length and return the start of the token.
  2476. *pTokenLength = TokenLength;
  2477. *ppTokenStart = (PUCHAR) pTokenStart;
  2478. return STATUS_SUCCESS;
  2479. } // HttpFindUrlToken
  2480. /*++
  2481. Routine Description:
  2482. Parse an IPv6 address from a Unicode buffer. Must be delimited by [].
  2483. May contain a scope ID.
  2484. Arguments:
  2485. pBuffer - Buffer to parse. Must point to '['.
  2486. BufferLength - Length of data pointed to by pBuffer.
  2487. ScopeIdAllowed - if TRUE, an optional scope ID may be present
  2488. pSockAddr6 - Where to return the parsed IPv6 address
  2489. ppEnd - On success, points to character after ']'
  2490. Return Value:
  2491. STATUS_SUCCESS if no parsing errors in the IPv6 address.
  2492. --*/
  2493. NTSTATUS
  2494. HttppParseIPv6Address(
  2495. IN PCWSTR pBuffer,
  2496. IN ULONG BufferLength,
  2497. IN BOOLEAN ScopeIdAllowed,
  2498. OUT PSOCKADDR_IN6 pSockAddr6,
  2499. OUT PCWSTR* ppEnd
  2500. )
  2501. {
  2502. NTSTATUS Status;
  2503. PCWSTR pEnd = pBuffer + BufferLength;
  2504. PCWSTR pChar;
  2505. PWSTR pTerminator;
  2506. ULONG ScopeTemp;
  2507. ASSERT(NULL != pBuffer);
  2508. ASSERT(0 < BufferLength);
  2509. ASSERT(NULL != pSockAddr6);
  2510. ASSERT(NULL != ppEnd);
  2511. RtlZeroMemory(pSockAddr6, sizeof(*pSockAddr6));
  2512. *ppEnd = NULL;
  2513. pSockAddr6->sin6_family = TDI_ADDRESS_TYPE_IP6;
  2514. // Caller guarantees this
  2515. ASSERT(L'[' == *pBuffer);
  2516. // Empty brackets?
  2517. if (BufferLength < WCSLEN_LIT(L"[0]") || L']' == pBuffer[1])
  2518. {
  2519. UlTraceError(PARSER,
  2520. ("http!HttppParseIPv6Address: IPv6 address too short\n"
  2521. ));
  2522. RETURN(STATUS_INVALID_PARAMETER);
  2523. }
  2524. for (pChar = pBuffer + WCSLEN_LIT(L"["); pChar < pEnd; ++pChar)
  2525. {
  2526. if (IS_ASCII(*pChar))
  2527. {
  2528. if (L']' == *pChar || L'%' == *pChar)
  2529. break;
  2530. // Dots are allowed because the last 32 bits may be represented
  2531. // in IPv4 dotted-octet notation
  2532. if (IS_HTTP_HEX(*pChar) || L':' == *pChar || L'.' == *pChar)
  2533. continue;
  2534. }
  2535. UlTraceError(PARSER,
  2536. ("http!HttppParseIPv6Address: "
  2537. "Invalid char in IPv6 address, U+%04X '%c', "
  2538. "after %lu chars, '%.*ls'\n",
  2539. *pChar,
  2540. IS_ANSI(*pChar) && IS_HTTP_PRINT(*pChar) ? *pChar : '?',
  2541. DIFF(pChar - pBuffer),
  2542. DIFF(pChar - pBuffer),
  2543. pBuffer
  2544. ));
  2545. RETURN(STATUS_INVALID_PARAMETER);
  2546. }
  2547. if (pChar == pEnd)
  2548. {
  2549. UlTraceError(PARSER,
  2550. ("http!HttppParseIPv6Address: No ']' for IPv6 address\n"
  2551. ));
  2552. RETURN(STATUS_INVALID_PARAMETER);
  2553. }
  2554. ASSERT(pChar < pEnd);
  2555. ASSERT(L']' == *pChar || L'%' == *pChar);
  2556. // Let the RTL routine do the hard work of parsing IPv6 addrs
  2557. Status = RtlIpv6StringToAddressW(
  2558. pBuffer + WCSLEN_LIT(L"["),
  2559. &pTerminator,
  2560. &pSockAddr6->sin6_addr
  2561. );
  2562. if (! NT_SUCCESS(Status))
  2563. {
  2564. UlTraceError(PARSER,
  2565. ("http!HttppParseIPv6Address: "
  2566. "Invalid IPv6 address, %s\n",
  2567. HttpStatusToString(Status)
  2568. ));
  2569. RETURN(Status);
  2570. }
  2571. if (pTerminator != pChar)
  2572. {
  2573. UlTraceError(PARSER,
  2574. ("http!HttppParseIPv6Address: "
  2575. "Invalid IPv6 terminator, U+%04X, '%c'\n",
  2576. *pTerminator,
  2577. IS_ANSI(*pTerminator) && IS_HTTP_PRINT(*pTerminator)
  2578. ? *pTerminator
  2579. : '?'
  2580. ));
  2581. RETURN(STATUS_INVALID_PARAMETER);
  2582. }
  2583. // Is a scopeid present?
  2584. if (L'%' != *pChar)
  2585. {
  2586. ASSERT(L']' == *pChar);
  2587. pSockAddr6->sin6_scope_id = 0;
  2588. }
  2589. else
  2590. {
  2591. PCWSTR pScopeEnd;
  2592. // Skip the '%' denoting a scope ID
  2593. pChar += WCSLEN_LIT(L"%");
  2594. if (!ScopeIdAllowed)
  2595. {
  2596. UlTraceError(PARSER,
  2597. ("http!HttppParseIPv6Address: No scope ID allowed\n"
  2598. ));
  2599. RETURN(STATUS_INVALID_PARAMETER);
  2600. }
  2601. if (pChar == pEnd)
  2602. {
  2603. UlTraceError(PARSER,
  2604. ("http!HttppParseIPv6Address: "
  2605. "No IPv6 scope ID after '%%'\n"
  2606. ));
  2607. RETURN(STATUS_INVALID_PARAMETER);
  2608. }
  2609. pScopeEnd = pChar;
  2610. do
  2611. {
  2612. if (*pScopeEnd < L'0' || *pScopeEnd > L'9')
  2613. {
  2614. UlTraceError(PARSER,
  2615. ("http!HttppParseIPv6Address: "
  2616. "Invalid digit in IPv6 scope ID, "
  2617. "U+%04X, '%c'\n",
  2618. *pScopeEnd,
  2619. IS_ANSI(*pScopeEnd) && IS_HTTP_PRINT(*pScopeEnd)
  2620. ? *pScopeEnd
  2621. : '?'
  2622. ));
  2623. RETURN(STATUS_INVALID_PARAMETER);
  2624. }
  2625. } while (++pScopeEnd < pEnd && L']' != *pScopeEnd);
  2626. ASSERT(pScopeEnd > pChar);
  2627. if (pScopeEnd == pEnd)
  2628. {
  2629. UlTraceError(PARSER,
  2630. ("http!HttppParseIPv6Address: "
  2631. "No ']' after IPv6 scope ID\n"
  2632. ));
  2633. RETURN(STATUS_INVALID_PARAMETER);
  2634. }
  2635. ASSERT(L']' == *pScopeEnd);
  2636. Status = HttpWideStringToULong(
  2637. pChar,
  2638. pScopeEnd - pChar,
  2639. FALSE, // no leading zeros permitted
  2640. 10,
  2641. &pTerminator,
  2642. &ScopeTemp
  2643. );
  2644. if (!NT_SUCCESS(Status))
  2645. {
  2646. UlTraceError(PARSER,
  2647. ("http!HttppParseIPv6Address: "
  2648. "Invalid scopeID, %s\n",
  2649. HttpStatusToString(Status)
  2650. ));
  2651. RETURN(STATUS_INVALID_PARAMETER);
  2652. }
  2653. // Scope ID does not get swapped to Network Byte Order
  2654. *(UNALIGNED64 ULONG *)&pSockAddr6->sin6_scope_id =
  2655. ScopeTemp;
  2656. ASSERT(pTerminator == pScopeEnd);
  2657. pChar = pScopeEnd;
  2658. } // '%' handling
  2659. ASSERT(pChar < pEnd);
  2660. ASSERT(L']' == *pChar);
  2661. // Skip the terminating ']'
  2662. pChar += WCSLEN_LIT(L"]");
  2663. *ppEnd = pChar;
  2664. RETURN(STATUS_SUCCESS);
  2665. } // HttppParseIPv6Address
  2666. /*++
  2667. Routine Description:
  2668. Print an IPv4 or IPv6 address as Unicode.
  2669. Arguments:
  2670. pSockAddr - The IP address to print
  2671. pBuffer - Buffer to print to. Assumed to be large enough.
  2672. Return Value:
  2673. Number of wide chars printed (the length)
  2674. --*/
  2675. ULONG
  2676. HttppPrintIpAddressW(
  2677. IN PSOCKADDR pSockAddr,
  2678. OUT PWSTR pBuffer
  2679. )
  2680. {
  2681. PWSTR pResult = pBuffer;
  2682. HTTP_FILL_BUFFER(pBuffer, MAX_IP_ADDR_PLUS_BRACKETS_STRING_LEN);
  2683. if (TDI_ADDRESS_TYPE_IP == pSockAddr->sa_family)
  2684. {
  2685. PSOCKADDR_IN pAddr4 = (PSOCKADDR_IN) pSockAddr;
  2686. pResult = RtlIpv4AddressToStringW(&pAddr4->sin_addr, pResult);
  2687. }
  2688. else if (TDI_ADDRESS_TYPE_IP6 == pSockAddr->sa_family)
  2689. {
  2690. PSOCKADDR_IN6 pAddr6 = (PSOCKADDR_IN6) pSockAddr;
  2691. *pResult++ = L'[';
  2692. pResult = RtlIpv6AddressToStringW(&pAddr6->sin6_addr, pResult);
  2693. // CODEWORK: Handle scope ID
  2694. *pResult++ = L']';
  2695. }
  2696. else
  2697. {
  2698. UlTraceError(PARSER,
  2699. ("http!HttppPrintIpAddressW(): invalid sa_family, %hd\n",
  2700. pSockAddr->sa_family
  2701. ));
  2702. ASSERT(! "Invalid SockAddr Family");
  2703. }
  2704. *pResult = UNICODE_NULL;
  2705. return DIFF(pResult - pBuffer);
  2706. } // HttppPrintIpAddressW
  2707. /***************************************************************************++
  2708. Routine Description:
  2709. This checks to see if the URL is well-formed.
  2710. A well-formed URL has a scheme ("http" or "https"),
  2711. a valid hostname (including + and * wildcards, IPv4, and IPv6 literals),
  2712. a port, and a well-formed abspath.
  2713. * Must check that the URL is well-formed and in canonical form; e.g.,
  2714. - Disallow /../ and /./
  2715. - Disallow invalid characters, including invalid Unicode surrogate
  2716. pairs. The URL is already in Unicode, so it's not a question of
  2717. using the IS_URL_TOKEN() macro.
  2718. Arguments:
  2719. pCfg - configuration parameters
  2720. pUrl - Unicode string containing URL (not assumed to be
  2721. zero-terminated)
  2722. UrlLength - length of pUrl, in WCHARs
  2723. TrailingSlashReqd - if TRUE, pUrl must end in '/'
  2724. ForceRoutingIP - if TRUE and the hostname is an IPv4 or IPv6 literal,
  2725. pParsedUrl->Normalized will be cleared, to force
  2726. HttpNormalizeParsedUrl() to rewrite the URL as
  2727. http://IP:port:IP/path
  2728. pParsedUrl - on successful exit, the components of the URL
  2729. Return Value:
  2730. NTSTATUS
  2731. --***************************************************************************/
  2732. NTSTATUS
  2733. HttpParseUrl(
  2734. IN PURL_C14N_CONFIG pCfg,
  2735. IN PCWSTR pUrl,
  2736. IN ULONG UrlLength,
  2737. IN BOOLEAN TrailingSlashReqd,
  2738. IN BOOLEAN ForceRoutingIP,
  2739. OUT PHTTP_PARSED_URL pParsedUrl
  2740. )
  2741. {
  2742. NTSTATUS Status;
  2743. ULONG PreviousChar;
  2744. ULONG UnicodeChar;
  2745. PCWSTR pEnd = pUrl + UrlLength;
  2746. PCWSTR pHostname;
  2747. PCWSTR pChar;
  2748. PCWSTR pLabel;
  2749. PCWSTR pSlash;
  2750. PCWSTR pSegment;
  2751. PWSTR pTerminator;
  2752. BOOLEAN AlphaLabel;
  2753. BOOLEAN TestSegment;
  2754. BOOLEAN MoreChars;
  2755. BOOLEAN LastCharHack;
  2756. ULONG SegmentCount;
  2757. URL_STATE UrlState;
  2758. URL_STATE_TOKEN UrlToken;
  2759. URL_ACTION Action;
  2760. WCHAR IpAddr[MAX_IP_ADDR_PLUS_BRACKETS_STRING_LEN];
  2761. ULONG Length;
  2762. //
  2763. // Sanity check.
  2764. //
  2765. PAGED_CODE();
  2766. ASSERT(NULL != pCfg);
  2767. ASSERT(NULL != pUrl);
  2768. ASSERT(0 < UrlLength && UrlLength <= UNICODE_STRING_MAX_WCHAR_LEN);
  2769. ASSERT(FALSE == TrailingSlashReqd || TRUE == TrailingSlashReqd);
  2770. ASSERT(FALSE == ForceRoutingIP || TRUE == ForceRoutingIP);
  2771. ASSERT(NULL != pParsedUrl);
  2772. RtlZeroMemory(pParsedUrl, sizeof(*pParsedUrl));
  2773. pParsedUrl->Signature = HTTP_PARSED_URL_SIGNATURE;
  2774. pParsedUrl->pFullUrl = (PWSTR) pUrl;
  2775. pParsedUrl->UrlLength = (USHORT) UrlLength;
  2776. pParsedUrl->Normalized = TRUE;
  2777. pParsedUrl->TrailingSlashReqd = TrailingSlashReqd;
  2778. // This is the shortest possible valid URL
  2779. if (UrlLength < WCSLEN_LIT(L"http://*:1/"))
  2780. {
  2781. UlTraceError(PARSER,
  2782. ("http!HttpParseUrl: Url too short, %lu, %.*ls\n",
  2783. UrlLength, UrlLength, pUrl
  2784. ));
  2785. RETURN(STATUS_INVALID_PARAMETER);
  2786. }
  2787. // Check the scheme
  2788. if (0 == wcsncmp(pUrl, L"http://", WCSLEN_LIT(L"http://")))
  2789. {
  2790. pParsedUrl->Secure = FALSE;
  2791. pHostname = pUrl + WCSLEN_LIT(L"http://");
  2792. }
  2793. else if (0 == wcsncmp(pUrl, L"https://", WCSLEN_LIT(L"https://")))
  2794. {
  2795. pParsedUrl->Secure = TRUE;
  2796. pHostname = pUrl + WCSLEN_LIT(L"https://");
  2797. }
  2798. else
  2799. {
  2800. UlTraceError(PARSER,
  2801. ("http!HttpParseUrl: invalid scheme, %.*ls\n",
  2802. UrlLength, pUrl
  2803. ));
  2804. RETURN(STATUS_INVALID_PARAMETER);
  2805. }
  2806. pParsedUrl->pHostname = (PWSTR) pHostname;
  2807. // Is a trailing slash present, if required?
  2808. if (TrailingSlashReqd && L'/' != pUrl[UrlLength - 1])
  2809. {
  2810. // No, then the URL will have to be rewritten
  2811. pParsedUrl->Normalized = FALSE;
  2812. }
  2813. //
  2814. // The hostname validation code below looks a lot like that in
  2815. // HttpValidateHostname(). However, it is sufficiently different
  2816. // (WCHAR vs. UCHAR, Host+IP, Scope IDs, compulsory ports, etc) that
  2817. // it is not easy to combine them into one routine. If the hostname
  2818. // validation code is changed here, it may be necessary to change it
  2819. // in HttpValidateHostname() too, or vice versa.
  2820. //
  2821. // Check for weak (http://*:port/) and strong (http://+:port/) wildcards
  2822. if (L'*' == *pHostname || L'+' == *pHostname)
  2823. {
  2824. pParsedUrl->SiteType = (L'*' == *pHostname)
  2825. ? HttpUrlSite_WeakWildcard
  2826. : HttpUrlSite_StrongWildcard;
  2827. pChar = pHostname + WCSLEN_LIT(L"*");
  2828. ASSERT(pChar < pEnd);
  2829. // The wildcard must be followed by ":port"
  2830. if (L':' == *pChar)
  2831. goto port;
  2832. UlTraceError(PARSER,
  2833. ("http!HttpParseUrl: No port in '%c' wildcard address\n",
  2834. *pHostname
  2835. ));
  2836. RETURN(STATUS_INVALID_PARAMETER);
  2837. }
  2838. // Is this an IPv6 literal address, per RFC 2732?
  2839. if (L'[' == *pHostname)
  2840. {
  2841. pParsedUrl->SiteType = HttpUrlSite_IP;
  2842. Status = HttppParseIPv6Address(
  2843. pHostname,
  2844. DIFF(pEnd - pHostname),
  2845. TRUE, // scope ID allowed
  2846. &pParsedUrl->SockAddr6,
  2847. &pChar);
  2848. if (!NT_SUCCESS(Status))
  2849. {
  2850. UlTraceError(PARSER,
  2851. ("http!HttpParseUrl: "
  2852. "Invalid IPv6 address, %s\n",
  2853. HttpStatusToString(Status)
  2854. ));
  2855. RETURN(Status);
  2856. }
  2857. ASSERT(TDI_ADDRESS_TYPE_IP6 == pParsedUrl->SockAddr.sa_family);
  2858. ASSERT(pChar > pHostname);
  2859. // There must be a port
  2860. if (pChar == pEnd || L':' != *pChar)
  2861. {
  2862. UlTraceError(PARSER,
  2863. ("http!HttpParseUrl: No port after IPv6 address\n"
  2864. ));
  2865. RETURN(STATUS_INVALID_PARAMETER);
  2866. }
  2867. //
  2868. // There are so many legitimate ways to write an IPv6 literal
  2869. // that we can't assume that a valid IPv6 literal is normalized.
  2870. // Since we do string comparisons, we'll have to rewrite the URL
  2871. // if the Normalized flag is not set.
  2872. //
  2873. Length = HttppPrintIpAddressW(&pParsedUrl->SockAddr, IpAddr);
  2874. if (Length != DIFF_USHORT(pChar - pHostname)
  2875. || 0 != _wcsnicmp(pHostname, IpAddr, Length))
  2876. {
  2877. pParsedUrl->Normalized = FALSE;
  2878. }
  2879. goto port;
  2880. } // IPv6
  2881. //
  2882. // It must be a domain name or an IPv4 literal. We'll try to treat
  2883. // it as a domain name first. If the labels turn out to be all-numeric,
  2884. // we'll try decoding it as an IPv4 literal.
  2885. //
  2886. AlphaLabel = FALSE;
  2887. pLabel = pHostname;
  2888. for (pChar = pHostname; pChar < pEnd; ++pChar)
  2889. {
  2890. if (L':' == *pChar)
  2891. {
  2892. if (pChar == pHostname)
  2893. {
  2894. UlTraceError(PARSER,
  2895. ("http!HttpParseUrl: empty hostname\n"
  2896. ));
  2897. RETURN(STATUS_INVALID_PARAMETER);
  2898. }
  2899. // Have we seen any non-digits?
  2900. if (AlphaLabel)
  2901. {
  2902. ASSERT(0 == pParsedUrl->SockAddr.sa_family);
  2903. pParsedUrl->SiteType = HttpUrlSite_Name;
  2904. goto port;
  2905. }
  2906. pParsedUrl->SiteType = HttpUrlSite_IP;
  2907. pParsedUrl->SockAddr4.sin_family = TDI_ADDRESS_TYPE_IP;
  2908. ASSERT(TDI_ADDRESS_TYPE_IP == pParsedUrl->SockAddr.sa_family);
  2909. // Let's see if it's a valid IPv4 address
  2910. Status = RtlIpv4StringToAddressW(
  2911. pHostname,
  2912. TRUE, // strict => 4 dotted decimal octets
  2913. &pTerminator,
  2914. &pParsedUrl->SockAddr4.sin_addr
  2915. );
  2916. if (!NT_SUCCESS(Status))
  2917. {
  2918. UlTraceError(PARSER,
  2919. ("http!HttpParseUrl: "
  2920. "Invalid IPv4 address, %s\n",
  2921. HttpStatusToString(Status)
  2922. ));
  2923. RETURN(Status);
  2924. }
  2925. if (pTerminator != pChar)
  2926. {
  2927. ASSERT(pTerminator < pChar);
  2928. UlTraceError(PARSER,
  2929. ("http!HttpParseUrl: "
  2930. "Invalid IPv4 address after %lu chars, "
  2931. "U+%04X, '%c'\n",
  2932. DIFF(pTerminator - pHostname),
  2933. *pTerminator,
  2934. IS_ANSI(*pTerminator) && IS_HTTP_PRINT(*pTerminator)
  2935. ? *pTerminator
  2936. : '?'
  2937. ));
  2938. RETURN(STATUS_INVALID_PARAMETER);
  2939. }
  2940. Length = HttppPrintIpAddressW(&pParsedUrl->SockAddr, IpAddr);
  2941. if (Length != DIFF_USHORT(pChar - pHostname)
  2942. || 0 != _wcsnicmp(pHostname, IpAddr, Length))
  2943. {
  2944. pParsedUrl->Normalized = FALSE;
  2945. }
  2946. goto port;
  2947. } // ':' handling
  2948. if (L'.' == *pChar)
  2949. {
  2950. ULONG LabelLength = DIFF(pChar - pLabel);
  2951. // There must be at least one char in the label
  2952. if (0 == LabelLength)
  2953. {
  2954. UlTraceError(PARSER,
  2955. ("http!HttpParseUrl: empty label\n"
  2956. ));
  2957. RETURN(STATUS_INVALID_PARAMETER);
  2958. }
  2959. // Label can't have more than 63 chars
  2960. if (LabelLength > pCfg->MaxLabelLength)
  2961. {
  2962. UlTraceError(PARSER,
  2963. ("http!HttpParseUrl: overlong label, %lu\n",
  2964. LabelLength
  2965. ));
  2966. RETURN(STATUS_INVALID_PARAMETER);
  2967. }
  2968. // Reset for the next label
  2969. pLabel = pChar + WCSLEN_LIT(L".");
  2970. continue;
  2971. }
  2972. //
  2973. // All chars above 0xFF are considered valid
  2974. //
  2975. if (!IS_ANSI(*pChar) || !IS_URL_ILLEGAL_COMPUTERNAME(*pChar))
  2976. {
  2977. if (!IS_ANSI(*pChar) || !IS_HTTP_DIGIT(*pChar))
  2978. AlphaLabel = TRUE;
  2979. if (pChar > pLabel)
  2980. continue;
  2981. // The first char of a label cannot be a hyphen. (Underscore?)
  2982. if (L'-' == *pChar)
  2983. {
  2984. UlTraceError(PARSER,
  2985. ("http!HttpParseUrl: '-' at beginning of label\n"
  2986. ));
  2987. RETURN(STATUS_INVALID_PARAMETER);
  2988. }
  2989. continue;
  2990. }
  2991. UlTraceError(PARSER,
  2992. ("http!HttpParseUrl: "
  2993. "Invalid char in hostname, U+%04X '%c',"
  2994. " after %lu chars, '%.*s'\n",
  2995. *pChar,
  2996. IS_ANSI(*pChar) && IS_HTTP_PRINT(*pChar) ? *pChar : '?',
  2997. DIFF(pChar - pHostname),
  2998. DIFF(pChar - pHostname),
  2999. pHostname
  3000. ));
  3001. RETURN(STATUS_INVALID_PARAMETER);
  3002. } // hostname
  3003. //
  3004. // If we got here, we fell off the end of the buffer,
  3005. // without finding a ':' for the port
  3006. //
  3007. ASSERT(pChar == pEnd);
  3008. UlTraceError(PARSER, ("http!HttpParseUrl: No port\n"));
  3009. RETURN(STATUS_INVALID_PARAMETER);
  3010. port:
  3011. //
  3012. // Parse the port number
  3013. //
  3014. ASSERT(pHostname < pChar && pChar < pEnd);
  3015. ASSERT(L':' == *pChar);
  3016. pParsedUrl->HostnameLength = DIFF_USHORT(pChar - pHostname);
  3017. // First, check for overlong hostnames
  3018. if (pParsedUrl->HostnameLength > pCfg->MaxHostnameLength)
  3019. {
  3020. UlTraceError(PARSER,
  3021. ("http!HttpParseUrl: overlong hostname, %hu\n",
  3022. pParsedUrl->HostnameLength
  3023. ));
  3024. RETURN(STATUS_INVALID_PARAMETER);
  3025. }
  3026. // Skip the ':' denoting a port number
  3027. pChar += WCSLEN_LIT(L":");
  3028. if (pChar == pEnd)
  3029. {
  3030. UlTraceError(PARSER,
  3031. ("http!HttpParseUrl: No port after ':'\n"
  3032. ));
  3033. RETURN(STATUS_INVALID_PARAMETER);
  3034. }
  3035. // Search for the '/' or second ':' that terminates the port number
  3036. pSlash = pChar;
  3037. pParsedUrl->pPort = (PWSTR) pSlash;
  3038. do
  3039. {
  3040. if (*pSlash < L'0' || *pSlash > L'9')
  3041. {
  3042. UlTraceError(PARSER,
  3043. ("http!HttpParseUrl: "
  3044. "Invalid digit in port, U+%04X, '%c'\n",
  3045. *pSlash,
  3046. IS_ANSI(*pSlash) && IS_HTTP_PRINT(*pSlash)
  3047. ? *pSlash
  3048. : '?'
  3049. ));
  3050. RETURN(STATUS_INVALID_PARAMETER);
  3051. }
  3052. } while (++pSlash < pEnd && L'/' != *pSlash && L':' != *pSlash);
  3053. ASSERT(pSlash > pChar);
  3054. pParsedUrl->PortLength = DIFF_USHORT(pSlash - pChar);
  3055. if (pSlash == pEnd)
  3056. {
  3057. UlTraceError(PARSER,
  3058. ("http!HttpParseUrl: No '/' (or second ':') after port\n"
  3059. ));
  3060. RETURN(STATUS_INVALID_PARAMETER);
  3061. }
  3062. ASSERT(L'/' == *pSlash || L':' == *pSlash);
  3063. Status = HttpWideStringToUShort(
  3064. pChar,
  3065. pParsedUrl->PortLength,
  3066. FALSE, // no leading zeros permitted
  3067. 10,
  3068. &pTerminator,
  3069. &pParsedUrl->PortNumber
  3070. );
  3071. if (!NT_SUCCESS(Status))
  3072. {
  3073. UlTraceError(PARSER,
  3074. ("http!HttpParseUrl: "
  3075. "Invalid port number, %s\n",
  3076. HttpStatusToString(Status)
  3077. ));
  3078. RETURN(STATUS_INVALID_PARAMETER);
  3079. }
  3080. if (0 == pParsedUrl->PortNumber)
  3081. {
  3082. UlTraceError(PARSER,
  3083. ("http!HttpParseUrl: Port must not be zero.\n"
  3084. ));
  3085. RETURN(STATUS_INVALID_PARAMETER);
  3086. }
  3087. ASSERT(pTerminator == pSlash);
  3088. pChar = pSlash;
  3089. goto routing_IP; // so /W4 won't complain about an unreferenced label
  3090. routing_IP:
  3091. //
  3092. // Is this a Host+IP site; i.e., is there a Routing IP address
  3093. // after the port number?
  3094. //
  3095. if (L'/' == *pChar)
  3096. {
  3097. pParsedUrl->pRoutingIP = NULL;
  3098. pParsedUrl->RoutingIPLength = 0;
  3099. ASSERT(0 == pParsedUrl->RoutingAddr.sa_family);
  3100. //
  3101. // If the hostname is an IP literal, but there is no routing IP
  3102. // (i.e., http://IP:port/path), we must rewrite the URL as
  3103. // http://IP:port:IP/path; i.e., explicitly use the hostname IP
  3104. // as the routing IP.
  3105. //
  3106. if (ForceRoutingIP && 0 != pParsedUrl->SockAddr.sa_family)
  3107. {
  3108. ASSERT(TDI_ADDRESS_TYPE_IP == pParsedUrl->SockAddr.sa_family
  3109. || TDI_ADDRESS_TYPE_IP6 == pParsedUrl->SockAddr.sa_family);
  3110. pParsedUrl->Normalized = FALSE;
  3111. }
  3112. goto parse_path;
  3113. }
  3114. ASSERT(L':' == *pChar);
  3115. if (HttpUrlSite_WeakWildcard == pParsedUrl->SiteType
  3116. || HttpUrlSite_StrongWildcard == pParsedUrl->SiteType)
  3117. {
  3118. UlTraceError(PARSER,
  3119. ("http!HttpParseUrl: "
  3120. "Can't have Routing IPs on Wildcard sites\n"
  3121. ));
  3122. RETURN(STATUS_INVALID_PARAMETER);
  3123. }
  3124. pChar += WCSLEN_LIT(L":");
  3125. if (pChar == pEnd)
  3126. {
  3127. UlTraceError(PARSER,
  3128. ("http!HttpParseUrl: No IP address after second ':'\n"
  3129. ));
  3130. RETURN(STATUS_INVALID_PARAMETER);
  3131. }
  3132. pParsedUrl->pRoutingIP = (PWSTR) pChar;
  3133. ASSERT(HttpUrlSite_NamePlusIP != pParsedUrl->SiteType);
  3134. if (HttpUrlSite_Name == pParsedUrl->SiteType)
  3135. {
  3136. pParsedUrl->SiteType = HttpUrlSite_NamePlusIP;
  3137. }
  3138. //
  3139. // Is the Routing IP an IPv6 literal?
  3140. //
  3141. if (L'[' == *pChar)
  3142. {
  3143. if (TDI_ADDRESS_TYPE_IP == pParsedUrl->SockAddr.sa_family)
  3144. {
  3145. UlTraceError(PARSER,
  3146. ("http!HttpParseUrl: "
  3147. "Can't have http://IPv4:port:[IPv6]\n"
  3148. ));
  3149. RETURN(STATUS_INVALID_PARAMETER);
  3150. }
  3151. ASSERT(TDI_ADDRESS_TYPE_IP6 == pParsedUrl->SockAddr.sa_family
  3152. || 0 == pParsedUrl->SockAddr.sa_family);
  3153. Status = HttppParseIPv6Address(
  3154. pChar,
  3155. DIFF(pEnd - pChar),
  3156. TRUE, // scope ID allowed
  3157. &pParsedUrl->RoutingAddr6,
  3158. &pSlash);
  3159. if (!NT_SUCCESS(Status))
  3160. {
  3161. UlTraceError(PARSER,
  3162. ("http!HttpParseUrl: "
  3163. "Invalid Host+IPv6 address, %s\n",
  3164. HttpStatusToString(Status)
  3165. ));
  3166. RETURN(Status);
  3167. }
  3168. ASSERT(TDI_ADDRESS_TYPE_IP6 == pParsedUrl->RoutingAddr.sa_family);
  3169. ASSERT(pSlash > pChar);
  3170. // There must be a slash
  3171. if (pSlash == pEnd || L'/' != *pSlash)
  3172. {
  3173. UlTraceError(PARSER,
  3174. ("http!HttpParseUrl: '/' expected after Host+IPv6.\n"
  3175. ));
  3176. RETURN(STATUS_INVALID_PARAMETER);
  3177. }
  3178. // CODEWORK: Should we care if RoutingAddr6 != SockAddr6?
  3179. pParsedUrl->RoutingIPLength = DIFF_USHORT(pSlash - pChar);
  3180. Length = HttppPrintIpAddressW(&pParsedUrl->RoutingAddr, IpAddr);
  3181. if (Length != pParsedUrl->RoutingIPLength
  3182. || 0 != _wcsnicmp(pChar, IpAddr, Length))
  3183. {
  3184. pParsedUrl->Normalized = FALSE;
  3185. }
  3186. pChar = pSlash;
  3187. goto parse_path;
  3188. }
  3189. //
  3190. // No, then it must be an IPv4 literal
  3191. //
  3192. if (TDI_ADDRESS_TYPE_IP6 == pParsedUrl->SockAddr.sa_family)
  3193. {
  3194. UlTraceError(PARSER,
  3195. ("http!HttpParseUrl: Can't have http://[IPv6]:port:IPv4\n"
  3196. ));
  3197. RETURN(STATUS_INVALID_PARAMETER);
  3198. }
  3199. ASSERT(TDI_ADDRESS_TYPE_IP == pParsedUrl->SockAddr.sa_family
  3200. || 0 == pParsedUrl->SockAddr.sa_family);
  3201. // Search for the terminating '/'
  3202. pSlash = pChar;
  3203. do
  3204. {
  3205. if ((L'0' <= *pSlash && *pSlash <= L'9') || L'.' == *pSlash)
  3206. continue;
  3207. UlTraceError(PARSER,
  3208. ("http!HttpParseUrl: "
  3209. "Invalid character in Host+IPv4, U+%04X, '%c'\n",
  3210. *pSlash,
  3211. IS_ANSI(*pSlash) && IS_HTTP_PRINT(*pSlash)
  3212. ? *pSlash
  3213. : '?'
  3214. ));
  3215. RETURN(STATUS_INVALID_PARAMETER);
  3216. } while (++pSlash < pEnd && L'/' != *pSlash);
  3217. ASSERT(pSlash > pChar);
  3218. if (pSlash == pEnd)
  3219. {
  3220. UlTraceError(PARSER,
  3221. ("http!HttpParseUrl: No '/' after Host+IPv4\n"
  3222. ));
  3223. RETURN(STATUS_INVALID_PARAMETER);
  3224. }
  3225. ASSERT(L'/' == *pSlash);
  3226. Status = RtlIpv4StringToAddressW(
  3227. pChar,
  3228. TRUE, // strict => 4 dotted decimal octets
  3229. &pTerminator,
  3230. &pParsedUrl->RoutingAddr4.sin_addr
  3231. );
  3232. if (!NT_SUCCESS(Status))
  3233. {
  3234. UlTraceError(PARSER,
  3235. ("http!HttpParseUrl: "
  3236. "Invalid Host+IPv4 address, %s\n",
  3237. HttpStatusToString(Status)
  3238. ));
  3239. RETURN(Status);
  3240. }
  3241. if (pTerminator != pSlash)
  3242. {
  3243. ASSERT(pTerminator < pSlash);
  3244. UlTraceError(PARSER,
  3245. ("http!HttpParseUrl: "
  3246. "Invalid Host+IPv4 address after %lu chars, "
  3247. "U+%04X, '%c'\n",
  3248. DIFF(pTerminator - pChar),
  3249. *pTerminator,
  3250. IS_ANSI(*pTerminator) && IS_HTTP_PRINT(*pTerminator)
  3251. ? *pTerminator
  3252. : '?'
  3253. ));
  3254. RETURN(STATUS_INVALID_PARAMETER);
  3255. }
  3256. // CODEWORK: Should we care if RoutingAddr4 != SockAddr4
  3257. pParsedUrl->RoutingIPLength = DIFF_USHORT(pSlash - pChar);
  3258. pParsedUrl->RoutingAddr4.sin_family = TDI_ADDRESS_TYPE_IP;
  3259. Length = HttppPrintIpAddressW(&pParsedUrl->RoutingAddr, IpAddr);
  3260. if (Length != pParsedUrl->RoutingIPLength
  3261. || 0 != _wcsnicmp(pChar, IpAddr, Length))
  3262. {
  3263. pParsedUrl->Normalized = FALSE;
  3264. }
  3265. pChar = pSlash;
  3266. parse_path:
  3267. //
  3268. // Parse the abspath
  3269. //
  3270. ASSERT(pParsedUrl->pRoutingIP == NULL || pParsedUrl->RoutingIPLength > 0);
  3271. ASSERT(pHostname < pChar && pChar < pEnd);
  3272. ASSERT(L'/' == *pChar);
  3273. pParsedUrl->pAbsPath = (PWSTR) pChar;
  3274. pParsedUrl->AbsPathLength = DIFF_USHORT(pEnd - pChar);
  3275. if (pParsedUrl->AbsPathLength > pCfg->UrlMaxLength)
  3276. {
  3277. UlTraceError(PARSER,
  3278. ("http!HttpParseUrl: "
  3279. "AbsPath is too long: %lu\n",
  3280. pParsedUrl->AbsPathLength
  3281. ));
  3282. RETURN(STATUS_INVALID_PARAMETER);
  3283. }
  3284. UrlState = URL_STATE_START;
  3285. UrlToken = URL_TOKEN_OTHER;
  3286. Action = ACTION_NOTHING;
  3287. pSegment = pChar;
  3288. TestSegment = FALSE;
  3289. LastCharHack = FALSE;
  3290. MoreChars = TRUE;
  3291. PreviousChar = UNICODE_NULL;
  3292. UnicodeChar = *pChar;
  3293. SegmentCount = 0;
  3294. //
  3295. // Loop through all the characters in pAbsPath, plus one or two
  3296. // special ones at the end.
  3297. //
  3298. while (MoreChars)
  3299. {
  3300. switch (UnicodeChar)
  3301. {
  3302. case UNICODE_NULL:
  3303. UrlToken = URL_TOKEN_EOS;
  3304. TestSegment = TRUE;
  3305. break;
  3306. case DOT:
  3307. UrlToken = URL_TOKEN_DOT;
  3308. TestSegment = FALSE;
  3309. break;
  3310. case FORWARD_SLASH:
  3311. UrlToken = URL_TOKEN_SLASH;
  3312. TestSegment = TRUE;
  3313. break;
  3314. case PERCENT: // no hex escapes
  3315. case STAR: // no wildcards
  3316. case QUESTION_MARK: // no wildcards or querystrings
  3317. case BACK_SLASH: // no C string escapes
  3318. UlTraceError(PARSER,
  3319. ("http!HttpParseUrl: invalid '%c' char in path\n",
  3320. (UCHAR) UnicodeChar
  3321. ));
  3322. RETURN(STATUS_INVALID_PARAMETER);
  3323. default:
  3324. UrlToken = URL_TOKEN_OTHER;
  3325. TestSegment = FALSE;
  3326. break;
  3327. }
  3328. UlTraceVerbose(PARSER,
  3329. ("http!HttpParseUrl: "
  3330. "[%lu] U+%04lX '%c' %p: [%s][%s] -> %s, %s\n",
  3331. DIFF(pChar - pParsedUrl->pAbsPath),
  3332. UnicodeChar,
  3333. IS_ANSI(UnicodeChar) && IS_HTTP_PRINT(UnicodeChar)
  3334. ? (UCHAR) UnicodeChar : '?',
  3335. pChar,
  3336. HttppUrlStateToString(UrlState),
  3337. HttppUrlTokenToString(UrlToken),
  3338. HttppUrlStateToString(
  3339. CanonStateFromStateAndToken[UrlState][UrlToken]),
  3340. TestSegment ? ", TestSegment" : ""
  3341. ));
  3342. //
  3343. // Reject control characters
  3344. //
  3345. if (!LastCharHack
  3346. && !pCfg->AllowRestrictedChars
  3347. && IS_ANSI(UnicodeChar)
  3348. && IS_URL_INVALID(UnicodeChar))
  3349. {
  3350. UlTraceError(PARSER, (
  3351. "http!HttpParseUrl: "
  3352. "Invalid character, U+%04lX, in path.\n",
  3353. UnicodeChar
  3354. ));
  3355. RETURN(STATUS_INVALID_PARAMETER);
  3356. }
  3357. //
  3358. // Check that (high-surrogate, low-surrogate) come in pairs
  3359. //
  3360. if (HIGH_SURROGATE_START <= PreviousChar
  3361. && PreviousChar <= HIGH_SURROGATE_END)
  3362. {
  3363. if (UnicodeChar < LOW_SURROGATE_START
  3364. || UnicodeChar > LOW_SURROGATE_END)
  3365. {
  3366. UlTraceError(PARSER, (
  3367. "http!HttpParseUrl: "
  3368. "Illegal surrogate pair, U+%04lX, U+%04lX.\n",
  3369. PreviousChar, UnicodeChar
  3370. ));
  3371. RETURN(STATUS_INVALID_PARAMETER);
  3372. }
  3373. }
  3374. else if (LOW_SURROGATE_START <= UnicodeChar
  3375. && UnicodeChar <= LOW_SURROGATE_END)
  3376. {
  3377. UlTraceError(PARSER, (
  3378. "http!HttpParseUrl: "
  3379. "Non-high surrogate, U+%04lX, "
  3380. "before low surrogate, U+%04lX.\n",
  3381. PreviousChar, UnicodeChar
  3382. ));
  3383. RETURN(STATUS_INVALID_PARAMETER);
  3384. }
  3385. if (URL_STATE_ERROR == CanonStateFromStateAndToken[UrlState][UrlToken])
  3386. {
  3387. UlTraceError(PARSER, (
  3388. "http!HttpParseUrl: "
  3389. "Error state from %s,%s in path, after U+%04lX.\n",
  3390. HttppUrlStateToString(UrlState),
  3391. HttppUrlTokenToString(UrlToken),
  3392. UnicodeChar
  3393. ));
  3394. RETURN(STATUS_INVALID_PARAMETER);
  3395. }
  3396. UrlState = CanonStateFromStateAndToken[UrlState][UrlToken];
  3397. //
  3398. // Check segment limits
  3399. //
  3400. if (TestSegment)
  3401. {
  3402. ULONG SegmentLength = DIFF(pChar - pSegment);
  3403. // The CanonStateFromStateAndToken checks should prevent
  3404. // empty segments, among other things
  3405. ASSERT(SegmentLength > 0 || pChar == pSegment);
  3406. // Reject if segment too long
  3407. if (SegmentLength > pCfg->UrlSegmentMaxLength + WCSLEN_LIT(L"/"))
  3408. {
  3409. UlTraceError(PARSER, (
  3410. "http!HttpParseUrl(): "
  3411. "Segment too long: %lu\n",
  3412. SegmentLength
  3413. ));
  3414. RETURN(STATUS_INVALID_PARAMETER);
  3415. }
  3416. pSegment = pChar;
  3417. // Reject if too many path segments
  3418. if (++SegmentCount > pCfg->UrlSegmentMaxCount)
  3419. {
  3420. UlTraceError(PARSER, (
  3421. "http!HttpParseUrl(): "
  3422. "Too many segments: %lu\n",
  3423. SegmentCount
  3424. ));
  3425. RETURN(STATUS_INVALID_PARAMETER);
  3426. }
  3427. }
  3428. //
  3429. // Are there any more path characters?
  3430. //
  3431. PreviousChar = UnicodeChar;
  3432. if (++pChar < pEnd)
  3433. {
  3434. UnicodeChar = *pChar;
  3435. }
  3436. else if (!LastCharHack)
  3437. {
  3438. // Want to make sure that the last segment is tested.
  3439. // If there's no trailing slash, we'll enter here twice;
  3440. // otherwise once
  3441. if (TrailingSlashReqd && FORWARD_SLASH != PreviousChar)
  3442. {
  3443. // First, fake a trailing slash, if needed
  3444. UnicodeChar = FORWARD_SLASH;
  3445. }
  3446. else
  3447. {
  3448. // Second, always finish up with UNICODE_NULL
  3449. UnicodeChar = UNICODE_NULL;
  3450. LastCharHack = TRUE;
  3451. }
  3452. }
  3453. else
  3454. {
  3455. // Terminate the loop
  3456. MoreChars = FALSE;
  3457. }
  3458. } // while (MoreChars)
  3459. RETURN(STATUS_SUCCESS);
  3460. } // HttpParseUrl
  3461. /***************************************************************************++
  3462. Routine Description:
  3463. Some URLs parsed by HttpParseUrl() will not be considered normalized
  3464. if they have IP literals, Routing IPs, or no trailing slash.
  3465. This routine will build a fully normalized URL and (possibly) free the
  3466. old one
  3467. Arguments:
  3468. pParsedUrl - On entry, points to a URL parsed by HttpParseUrl();
  3469. On successful exit, points to a normalized URL.
  3470. pCfg - configuration parameters
  3471. ForceCopy - if TRUE, will always make a new, normalized URL
  3472. FreeOriginalUrl - if FALSE, will never free the original URL.
  3473. The caller must manage the memory.
  3474. ForceRoutingIP - if TRUE and the hostname is an IPv4 or IPv6 literal,
  3475. the URL will be rewritten in the form
  3476. http://IP:port:IP/path
  3477. PoolType - PagedPool or NonPagedPool
  3478. PoolTag - Tag used to allocate pUrl
  3479. Return Value:
  3480. NTSTATUS - STATUS_SUCCESS or STATUS_NO_MEMORY
  3481. --***************************************************************************/
  3482. NTSTATUS
  3483. HttpNormalizeParsedUrl(
  3484. IN OUT PHTTP_PARSED_URL pParsedUrl,
  3485. IN PURL_C14N_CONFIG pCfg,
  3486. IN BOOLEAN ForceCopy,
  3487. IN BOOLEAN FreeOriginalUrl,
  3488. IN BOOLEAN ForceRoutingIP,
  3489. IN POOL_TYPE PoolType,
  3490. IN ULONG PoolTag
  3491. )
  3492. {
  3493. HTTP_PARSED_URL ParsedUrl = *pParsedUrl;
  3494. NTSTATUS Status = STATUS_SUCCESS;
  3495. ASSERT(HTTP_PARSED_URL_SIGNATURE == ParsedUrl.Signature);
  3496. if (ParsedUrl.Normalized && !ForceCopy)
  3497. {
  3498. // nothing to do
  3499. }
  3500. else
  3501. {
  3502. PWSTR pResult;
  3503. WCHAR HostAddrString[MAX_IP_ADDR_PLUS_BRACKETS_STRING_LEN];
  3504. WCHAR RoutingAddrString[MAX_IP_ADDR_PLUS_BRACKETS_STRING_LEN];
  3505. ULONG SchemeLength;
  3506. ULONG HostAddrLength;
  3507. ULONG HostnameLength;
  3508. ULONG RoutingAddrLength;
  3509. ULONG AbsPathLength;
  3510. ULONG Length;
  3511. ULONG TrailingSlashLength;
  3512. PCWSTR pUrl;
  3513. pUrl = ParsedUrl.pFullUrl;
  3514. SchemeLength = DIFF(ParsedUrl.pHostname - ParsedUrl.pFullUrl);
  3515. // Calculate HostAddrLength and HostnameLength (mutually exclusive)
  3516. if (0 != ParsedUrl.SockAddr.sa_family)
  3517. {
  3518. HostAddrLength = HttppPrintIpAddressW(
  3519. &ParsedUrl.SockAddr,
  3520. HostAddrString
  3521. );
  3522. HostnameLength = 0;
  3523. }
  3524. else
  3525. {
  3526. HostAddrLength = 0;
  3527. HostAddrString[0] = UNICODE_NULL;
  3528. HostnameLength = ParsedUrl.HostnameLength;
  3529. }
  3530. // Calculate RoutingAddrLength
  3531. if (0 != ParsedUrl.RoutingAddr.sa_family)
  3532. {
  3533. RoutingAddrLength = WCSLEN_LIT(L":")
  3534. + HttppPrintIpAddressW(
  3535. &ParsedUrl.RoutingAddr,
  3536. RoutingAddrString
  3537. );
  3538. }
  3539. else if (ForceRoutingIP && 0 != ParsedUrl.SockAddr.sa_family)
  3540. {
  3541. // We must rewrite http://IP:port/path as http://IP:port:IP/path
  3542. RoutingAddrLength = WCSLEN_LIT(L":") + HostAddrLength;
  3543. wcscpy(RoutingAddrString, HostAddrString);
  3544. }
  3545. else
  3546. {
  3547. RoutingAddrLength = 0;
  3548. RoutingAddrString[0] = UNICODE_NULL;
  3549. }
  3550. AbsPathLength = ParsedUrl.AbsPathLength;
  3551. ASSERT(AbsPathLength > 0);
  3552. if (ParsedUrl.TrailingSlashReqd
  3553. && FORWARD_SLASH != ParsedUrl.pAbsPath[AbsPathLength-1])
  3554. {
  3555. TrailingSlashLength = WCSLEN_LIT(L"/");
  3556. }
  3557. else
  3558. {
  3559. TrailingSlashLength = 0;
  3560. }
  3561. Length = SchemeLength
  3562. + HostAddrLength
  3563. + HostnameLength
  3564. + WCSLEN_LIT(L":") + ParsedUrl.PortLength
  3565. + RoutingAddrLength
  3566. + AbsPathLength
  3567. + TrailingSlashLength;
  3568. pResult = (PWSTR) HTTPP_ALLOC(
  3569. PoolType,
  3570. (Length + 1) * sizeof(WCHAR),
  3571. PoolTag
  3572. );
  3573. if (NULL == pResult)
  3574. {
  3575. Status = STATUS_NO_MEMORY;
  3576. // Do not destroy the old URL. Let caller handle it.
  3577. }
  3578. else
  3579. {
  3580. PWSTR pDest = pResult;
  3581. #define WCSNCPY(pSrc, Length) \
  3582. RtlCopyMemory(pDest, (pSrc), (Length) * sizeof(WCHAR)); \
  3583. pDest += (Length)
  3584. #define WCSNCPY2(pField, Length) \
  3585. WCSNCPY(ParsedUrl.pField, Length)
  3586. #define WCSNCPY_LIT(Lit) \
  3587. WCSNCPY(Lit, WCSLEN_LIT(Lit))
  3588. WCSNCPY2(pFullUrl, SchemeLength);
  3589. if (0 != HostnameLength)
  3590. {
  3591. ASSERT(0 == HostAddrLength);
  3592. WCSNCPY2(pHostname, HostnameLength);
  3593. }
  3594. else
  3595. {
  3596. ASSERT(0 != HostAddrLength);
  3597. WCSNCPY(HostAddrString, HostAddrLength);
  3598. }
  3599. WCSNCPY_LIT(L":");
  3600. WCSNCPY2(pPort, ParsedUrl.PortLength);
  3601. if (RoutingAddrLength > 0)
  3602. {
  3603. WCSNCPY_LIT(L":");
  3604. WCSNCPY(
  3605. RoutingAddrString,
  3606. RoutingAddrLength - WCSLEN_LIT(L":")
  3607. );
  3608. }
  3609. WCSNCPY2(pAbsPath, AbsPathLength);
  3610. if (TrailingSlashLength > 0)
  3611. {
  3612. WCSNCPY_LIT(L"/");
  3613. }
  3614. ASSERT(DIFF(pDest - pResult) == Length);
  3615. *pDest = UNICODE_NULL;
  3616. Status = HttpParseUrl(
  3617. pCfg,
  3618. pResult,
  3619. Length,
  3620. ParsedUrl.TrailingSlashReqd,
  3621. ForceRoutingIP,
  3622. &ParsedUrl
  3623. );
  3624. ASSERT(STATUS_SUCCESS == Status);
  3625. ASSERT(ParsedUrl.Normalized);
  3626. if (FreeOriginalUrl)
  3627. HTTPP_FREE((PVOID) pUrl, PoolTag);
  3628. // Write the updated local copy back to the caller's HTTP_PARSED_URL
  3629. *pParsedUrl = ParsedUrl;
  3630. }
  3631. }
  3632. return Status;
  3633. } // HttpNormalizeParsedUrl