/*++ Copyright (c) 1998-2002 Microsoft Corporation Module Name: C14n.h Abstract: URL canonicalization (c14n) routines Author: George V. Reilly (GeorgeRe) 10-Apr-2002 Revision History: --*/ #ifndef _C14N_H_ #define _C14N_H_ #define DEFAULT_C14N_ENABLE_NON_UTF8_URL TRUE #define DEFAULT_C14N_FAVOR_UTF8_URL TRUE #define DEFAULT_C14N_ENABLE_DBCS_URL FALSE #define DEFAULT_C14N_PERCENT_U_ALLOWED TRUE #define DEFAULT_C14N_ALLOW_RESTRICTED_CHARS FALSE // Maximum length of the AbsPath of a URL, in chars #define DEFAULT_C14N_URL_MAX_LENGTH UNICODE_STRING_MAX_WCHAR_LEN #ifndef MAX_PATH #define MAX_PATH 260 #endif // Maximum length of an individual segment within a URL #define DEFAULT_C14N_URL_SEGMENT_MAX_LENGTH MAX_PATH #define C14N_URL_SEGMENT_UNLIMITED_LENGTH (0xFFFFFFFE - STRLEN_LIT("/")) // Maximum number of path segments within a URL #define DEFAULT_C14N_URL_SEGMENT_MAX_COUNT 255 #define C14N_URL_SEGMENT_UNLIMITED_COUNT C14N_URL_SEGMENT_UNLIMITED_LENGTH // Maximum length of a label within a hostname; e.g., "www.example.com" // has three labels, with "example" being the longest at 7 octets. #define DEFAULT_C14N_MAX_LABEL_LENGTH 63 // Maximum length of a hostname #define DEFAULT_C14N_MAX_HOSTNAME_LENGTH 255 typedef enum _URL_PART { UrlPart_Scheme, UrlPart_HostName, UrlPart_UserInfo, UrlPart_AbsPath, UrlPart_QueryString, UrlPart_Fragment } URL_PART; typedef enum _URL_DECODE_ORDER { UrlDecode_Shift = 2, UrlDecode_Mask = ((1 << UrlDecode_Shift) - 1), #define URL_DECODE2(D1, D2) \ ( UrlDecode_##D1 | (UrlDecode_##D2 << UrlDecode_Shift)) #define URL_DECODE3(D1, D2, D3) \ ( URL_DECODE2(D1, D2) | (UrlDecode_##D3 << (2 * UrlDecode_Shift))) UrlDecode_None = 0, // The following are the only valid permutations UrlDecode_Ansi = 1, UrlDecode_Dbcs = 2, UrlDecode_Utf8 = 3, UrlDecode_Ansi_Else_Dbcs = URL_DECODE2(Ansi, Dbcs), UrlDecode_Ansi_Else_Dbcs_Else_Utf8 = URL_DECODE3(Ansi, Dbcs, Utf8), UrlDecode_Ansi_Else_Utf8 = URL_DECODE2(Ansi, Utf8), UrlDecode_Ansi_Else_Utf8_Else_Dbcs = URL_DECODE3(Ansi, Utf8, Dbcs), UrlDecode_Dbcs_Else_Ansi = URL_DECODE2(Dbcs, Ansi), UrlDecode_Dbcs_Else_Ansi_Else_Utf8 = URL_DECODE3(Dbcs, Ansi, Utf8), UrlDecode_Dbcs_Else_Utf8 = URL_DECODE2(Dbcs, Utf8), UrlDecode_Dbcs_Else_Utf8_Else_Ansi = URL_DECODE3(Dbcs, Utf8, Ansi), UrlDecode_Utf8_Else_Ansi = URL_DECODE2(Utf8, Ansi), UrlDecode_Utf8_Else_Ansi_Else_Dbcs = URL_DECODE3(Utf8, Ansi, Dbcs), UrlDecode_Utf8_Else_Dbcs = URL_DECODE2(Utf8, Dbcs), UrlDecode_Utf8_Else_Dbcs_Else_Ansi = URL_DECODE3(Utf8, Dbcs, Ansi), UrlDecode_MaxMask = URL_DECODE3(Mask, Mask, Mask) #undef URL_DECODE2 #undef URL_DECODE3 // UrlDecode_Utf8_Else_Dbcs_Else_Ansi means: // * First attempt to decode the URL as UTF-8. // * If that fails, attempt to decode it as DBCS. // * If that too fails, attempt to decode it as ANSI. } URL_DECODE_ORDER, *PURL_DECODE_ORDER; typedef enum _URL_ENCODING_TYPE { UrlEncoding_Ansi = UrlDecode_Ansi, UrlEncoding_Dbcs = UrlDecode_Dbcs, UrlEncoding_Utf8 = UrlDecode_Utf8 } URL_ENCODING_TYPE, *PURL_ENCODING_TYPE; typedef struct _URL_C14N_CONFIG { URL_DECODE_ORDER HostnameDecodeOrder; URL_DECODE_ORDER AbsPathDecodeOrder; BOOLEAN EnableNonUtf8; BOOLEAN FavorUtf8; BOOLEAN EnableDbcs; BOOLEAN PercentUAllowed; BOOLEAN AllowRestrictedChars; ULONG CodePage; ULONG UrlMaxLength; ULONG UrlSegmentMaxLength; ULONG UrlSegmentMaxCount; ULONG MaxLabelLength; ULONG MaxHostnameLength; } URL_C14N_CONFIG, *PURL_C14N_CONFIG; typedef enum { HttpUrlSite_None = 0, HttpUrlSite_Name, // named site HttpUrlSite_IP, // IPv4 or IPv6 literal hostname HttpUrlSite_NamePlusIP, // named site with Routing IP HttpUrlSite_WeakWildcard, // hostname = '*' HttpUrlSite_StrongWildcard, // hostname = '+' HttpUrlSite_Max } HTTP_URL_SITE_TYPE, *PHTTP_URL_SITE_TYPE; #define HTTP_PARSED_URL_SIGNATURE MAKE_SIGNATURE('PUrl') #define HTTP_PARSED_URL_SIGNATURE_X \ MAKE_FREE_SIGNATURE(HTTP_PARSED_URL_SIGNATURE) #define IS_VALID_HTTP_PARSED_URL(p) \ ((p) && ((p)->Signature == HTTP_PARSED_URL_SIGNATURE)) typedef struct _HTTP_PARSED_URL { ULONG Signature; // HTTP_PARSED_URL_SIGNATURE HTTP_URL_SITE_TYPE SiteType; // Name, IP, or Weak/StrongWildCard // // These strings all point into the same buffer, of the form // "http://hostname:port/abs/path/" or // "http://hostname:port:IP/abs/path/". // PWSTR pFullUrl; // points to "http" or "https" PWSTR pHostname; // point to "hostname" PWSTR pPort; // point to "port" PWSTR pRoutingIP; // point to "IP" or NULL PWSTR pAbsPath; // points to "/abs/path" USHORT UrlLength; // length of pFullUrl USHORT HostnameLength; // length of pHostname USHORT PortLength; // length of pPort USHORT RoutingIPLength;// length of pRoutingIP USHORT AbsPathLength; // length of pAbsPath USHORT PortNumber; // value of pPort BOOLEAN Secure; // http or httpS? BOOLEAN Normalized; // In normalized form? BOOLEAN TrailingSlashReqd; // If TRUE => directory prefix union { SOCKADDR SockAddr; // Look at SockAddr.sa_family SOCKADDR_IN SockAddr4; // set if == TDI_ADDRESS_TYPE_IP SOCKADDR_IN6 SockAddr6; // set if == TDI_ADDRESS_TYPE_IP6 }; union { SOCKADDR RoutingAddr; // Look at RoutingAddr.sa_family SOCKADDR_IN RoutingAddr4; // set if == TDI_ADDRESS_TYPE_IP SOCKADDR_IN6 RoutingAddr6; // set if == TDI_ADDRESS_TYPE_IP6 }; } HTTP_PARSED_URL, *PHTTP_PARSED_URL; typedef enum _HOSTNAME_TYPE { Hostname_AbsUri = 1, // from Request-line Hostname_HostHeader, // from Host header Hostname_Transport // synthesized from transport's local IP address } HOSTNAME_TYPE, *PHOSTNAME_TYPE; VOID HttpInitializeDefaultUrlC14nConfig( PURL_C14N_CONFIG pCfg ); VOID HttpInitializeDefaultUrlC14nConfigEncoding( PURL_C14N_CONFIG pCfg, BOOLEAN EnableNonUtf8, BOOLEAN FavorUtf8, BOOLEAN EnableDbcs ); NTSTATUS HttpUnescapePercentHexEncoding( IN PCUCHAR pSourceChar, IN ULONG SourceLength, IN BOOLEAN PercentUAllowed, OUT PULONG pOutChar, OUT PULONG pBytesToSkip ); NTSTATUS HttpValidateHostname( IN PURL_C14N_CONFIG pCfg, IN PCUCHAR pHostname, IN ULONG HostnameLength, IN HOSTNAME_TYPE HostnameType, OUT PSHORT pAddressType ); NTSTATUS HttpCopyHost( IN PURL_C14N_CONFIG pCfg, OUT PWSTR pDestination, IN PCUCHAR pSource, IN ULONG SourceLength, OUT PULONG pBytesCopied, OUT PURL_ENCODING_TYPE pUrlEncodingType ); NTSTATUS HttpCopyUrl( IN PURL_C14N_CONFIG pCfg, OUT PWSTR pDestination, IN PCUCHAR pSource, IN ULONG SourceLength, OUT PULONG pBytesCopied, OUT PURL_ENCODING_TYPE pUrlEncoding ); NTSTATUS HttpCleanAndCopyUrl( IN PURL_C14N_CONFIG pCfg, IN URL_PART UrlPart, OUT PWSTR pDestination, IN PCUCHAR pSource, IN ULONG SourceLength, OUT PULONG pBytesCopied, OUT PWSTR * ppQueryString OPTIONAL, OUT PURL_ENCODING_TYPE pUrlEncoding ); NTSTATUS HttpFindUrlToken( IN PURL_C14N_CONFIG pCfg, IN PCUCHAR pBuffer, IN ULONG BufferLength, OUT PUCHAR* ppTokenStart, OUT PULONG pTokenLength, OUT PBOOLEAN pRawUrlClean ); NTSTATUS HttpParseUrl( IN PURL_C14N_CONFIG pCfg, IN PCWSTR pUrl, IN ULONG UrlLength, IN BOOLEAN TrailingSlashReqd, IN BOOLEAN ForceRoutingIP, OUT PHTTP_PARSED_URL pParsedUrl ); NTSTATUS HttpNormalizeParsedUrl( IN OUT PHTTP_PARSED_URL pParsedUrl, IN PURL_C14N_CONFIG pCfg, IN BOOLEAN ForceCopy, IN BOOLEAN FreeOriginalUrl, IN BOOLEAN ForceRoutingIP, IN POOL_TYPE PoolType, IN ULONG PoolTag ); PCSTR HttpSiteTypeToString( HTTP_URL_SITE_TYPE SiteType ); #endif // _C14N_H_