|
|
/*
* U R L . C P P * * Url normalization/canonicalization * * Stolen from the IIS5 project 'iis5\svcs\iisrlt\string.cxx' and * cleaned up to fit in with the DAV sources. * * Copyright 1986-1997 Microsoft Corporation, All Rights Reserved */
#include "_davprs.h"
#include "xemit.h"
// URI Escaping --------------------------------------------------------------
//
// gc_mpbchCharToHalfByte - map a ASCII-encoded char representing a single hex
// digit to a half-byte value. Used to convert hex represented strings into a
// binary representation.
//
// Reference values:
//
// '0' = 49, 0x31;
// 'A' = 65, 0x41;
// 'a' = 97, 0x61;
//
DEC_CONST BYTE gc_mpbchCharToHalfByte[] = { 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, 0x0,0x1,0x2,0x3,0x4,0x5,0x6,0x7, 0x8,0x9,0x0,0x0,0x0,0x0,0x0,0x0, 0x0,0xa,0xb,0xc,0xd,0xe,0xf,0x0, 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, // Caps here.
0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, 0x0,0xa,0xb,0xc,0xd,0xe,0xf,0x0, 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, // Lowercase here.
0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, };
// Switches a wide char to a half-byte hex value. The incoming char
// MUST be in the "ASCII-encoded hex digit" range: 0-9, A-F, a-f.
//
inline BYTE BCharToHalfByte(WCHAR wch) { AssertSz (!(wch & 0xFF00), "BCharToHalfByte: char upper bits non-zero"); AssertSz (iswxdigit(wch), "BCharToHalfByte: Char out of hex digit range.");
return gc_mpbchCharToHalfByte[wch]; };
// gc_mpwchhbHalfByteToChar - map a half-byte (low nibble) value to the
// correspoding ASCII-encoded wide char. Used to convert a single byte
// into a hex string representation.
//
const WCHAR gc_mpwchhbHalfByteToChar[] = { L'0', L'1', L'2', L'3', L'4', L'5', L'6', L'7', L'8', L'9', L'A', L'B', L'C', L'D', L'E', L'F', };
// Switches a half-byte to an ACSII-encoded wide char.
// NOTE: The caller must mask out the "other half" of the byte!
//
inline WCHAR WchHalfByteToWideChar(BYTE b) { AssertSz (!(b & 0xF0), "WchHalfByteToWideChar: byte upper bits non-zero.");
return gc_mpwchhbHalfByteToChar[b]; };
// gc_mpchhbHalfByteToChar - map a half-byte (low nibble) value to the
// correspoding ASCII-encoded wide char. Used to convert a single byte
// into a hex string representation.
//
const CHAR gc_mpchhbHalfByteToChar[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', };
// Switches a half-byte to an ACSII-encoded wide char.
// NOTE: The caller must mask out the "other half" of the byte!
//
inline CHAR ChHalfByteToWideChar(BYTE b) { AssertSz (!(b & 0xF0), "ChHalfByteToWideChar: byte upper bits non-zero.");
return gc_mpchhbHalfByteToChar[b]; };
// Note on HttpUriEscape and HttpUriUnescape
//
// These functions do the HTTP URL escaping and Unescaping equivalent to
// the one done by IIS. DAVEX URLs are escaped and unescaped thru a different
// sets of routines in _urlesc subsystem. The rule is whenever we sent out
// an Exchange HTTP wire URL, you should go thru the function in the
// _urlesc. Right now old UrlEscape and UrlUnescape routines are routed
// through those. However there exist cases where we need to do the
// IIS style escape and unescape. One scenario is when we forward the
// URLs to ISAPIs, where we use the HttpUriUnescape and HttpUriEscape functions.
// File system DAV also uses HttpUriEscape and HttpUriUnescape.
//
// HttpUriEscape()
//
// This function is immigrated from iis5\svcs\w3\server\dirlist.cpp's
// We should do the same URL escaping as IIS does.
//
// Replaces all "bad" characters with their ASCII hex equivalent
//
VOID __fastcall HttpUriEscape ( /* [in] */ LPCSTR pszSrc, /* [out] */ auto_heap_ptr<CHAR>& pszDst) { enum { URL_BUF_INCREMENT = 16 };
// It is important that we operate on unsigned character, as otherwise
// checks below simply do not work correctly. E.g. UTF-8 characters will
// not get escaped, etc.
//
UCHAR uch;
UINT cbDst; UINT cbSrc; UINT ibDst; UINT ibSrc;
// Set cbSrc to account for the string length of
// the url including the NULL
//
Assert(pszSrc); cbSrc = static_cast<UINT>(strlen (pszSrc) + 1);
// Allocate enough space for the expanded url -- and
// lets be a bit optimistic
//
cbDst = max (cbSrc + URL_BUF_INCREMENT, MAX_PATH); pszDst = static_cast<LPSTR>(g_heap.Alloc(cbDst));
for (ibSrc = 0, ibDst = 0; ibSrc < cbSrc; ibSrc++) { uch = pszSrc[ibSrc];
// Make sure we always have space to expand this character.
// Since we have allocated extra space to begin with, we should
// never have the scenario where we do a realloc just for the
// last char.
//
if (ibDst + 2 >= cbDst) // enough space for three more chars
{ // Destiniation buffer is not large enough, reallocate
// to get more space.
//
cbDst += URL_BUF_INCREMENT; pszDst.realloc (cbDst); }
// Escape characters that are in the non-printable range
// but ignore CR and LF.
//
// The inclusive ranges escaped are...
//
// 0x01 - 0x20 /* First non-printable range */
// 0x80 - 0xBF /* Trailing bytes of UTF8 sequence */
// 0xC0 - 0xDF /* Leading byte of UTF8 two byte sequence */
// 0xE0 - 0xEF /* Leading byte of UTF8 three byte sequence */
//
if ((((uch >= 0x01) && (uch <= 0x20)) /* First non-printable range */ || ((uch >= 0x80) && (uch <= 0xEF)) /* UTF8 sequence bytes */ || (uch == '%') || (uch == '?') || (uch == '+') || (uch == '&') || (uch == '#')) && !(uch == '\n' || uch == '\r')) { // Insert the escape character
//
pszDst[ibDst + 0] = '%';
// Convert the low then the high character to hex
//
BYTE bDigit = static_cast<BYTE>(uch % 16); pszDst[ibDst + 2] = ChHalfByteToWideChar (bDigit); bDigit = static_cast<BYTE>((uch/16) % 16); pszDst[ibDst + 1] = ChHalfByteToWideChar (bDigit);
// Adjust for the two extra characters for this sequence
//
ibDst += 3; } else { pszDst[ibDst] = uch; ibDst += 1; } }
UrlTrace ("Url: UriEscape(): escaped url: %hs\n", pszDst.get()); return; }
// HttpUriUnescape()
//
// This function is immigrated from iis5\svcs\w3\server\dirlist.cpp's
// We should do the same URL unescaping as IIS does.
//
// Replaces all escaped characters with their byte equivalent
//
//
VOID __fastcall HttpUriUnescape ( /* [in] */ const LPCSTR pszUrl, /* [out] */ LPSTR pszUnescaped) { LPCSTR pch; LPSTR pchNew;
Assert (pszUrl); Assert (pszUnescaped);
pch = pszUrl; pchNew = pszUnescaped;
while (*pch) { // If this is a valid byte-stuffed character, unpack it. For us
// to really unpack it, we need the sequence to be valid.
//
// NOTE: we stole this code from IIS at one point, so we are
// pretty sure this is consistant with their behavior.
//
if (('%' == pch[0]) && ('\0' != pch[1]) && ('\0' != pch[2]) && isxdigit(pch[1]) && isxdigit(pch[2])) {
#pragma warning(disable:4244)
// IMPORTANT: when we do this processing, there is no specific
// machine/byte ordering assumed. The HEX digit is represented
// as a %xx, and the first char is multiplied by sixteen and
// then second char is added in.
//
UrlTrace ("HttpUriEscape () - unescaping: %hc%hc%hc\n", pch[0], pch[1], pch[2]); *pchNew = (BCharToHalfByte(pch[1]) * 16) + BCharToHalfByte(pch[2]); pch += 3;
#pragma warning(default:4244)
} else { *pchNew = *pch++; }
// If a NULL character was byte-stuffed, then that is the end of
// the url and we can stop processing now. Otherwise, path modifications
// could be used to bypass a NULL.
//
if ('\0' == *pchNew) { break; }
pchNew++; }
// Close the new URI
//
*pchNew = '\0';
UrlTrace ("HttpUriEscape() - resulting destination: \"%hs\"\n", pszUnescaped); }
// Prefix stripping ----------------------------------------------------------
//
SCODE __fastcall ScStripAndCheckHttpPrefix ( /* [in] */ const IEcb& ecb, /* [in/out] */ LPCWSTR * ppwszRequest) { SCODE sc = S_OK;
Assert (ppwszRequest); Assert (*ppwszRequest); LPCWSTR pwszRequest = *ppwszRequest;
// See if the servername matches
//
LPCWSTR pwsz; UINT cch;
// If the forward request URI is fully qualified, strip it to
// an absolute URI
//
cch = ecb.CchUrlPrefixW (&pwsz); if (!_wcsnicmp (pwsz, pwszRequest, cch)) { pwszRequest += cch; cch = ecb.CchGetServerNameW (&pwsz); if (_wcsnicmp (pwsz, pwszRequest, cch)) { sc = E_DAV_BAD_DESTINATION; DebugTrace ("ScStripAndCheckHttpPrefix(): server does not match 0x%08lX\n", sc); goto ret; }
// If the server name matched, make sure that if the
// next thing is a port number that it is ":80".
//
pwszRequest += cch; if (*pwszRequest == L':') { cch = ecb.CchUrlPortW (&pwsz); if (_wcsnicmp (pwsz, pwszRequest, cch)) { sc = E_DAV_BAD_DESTINATION; DebugTrace ("ScStripAndCheckHttpPrefix(): port does not match 0x%08lX\n", sc); goto ret; } pwszRequest += cch; } }
*ppwszRequest = pwszRequest;
ret:
return sc; }
LPCWSTR __fastcall PwszUrlStrippedOfPrefix ( /* [in] */ LPCWSTR pwszUrl) { Assert (pwszUrl);
// Skip past the "http://" of the url
//
if (L'/' != *pwszUrl) { // If the first slash occurance is a double slash, then
// move past the end of it.
//
LPWSTR pwszSlash = wcschr (pwszUrl, L'/'); while (pwszSlash && (L'/' == pwszSlash[1])) { // Skip past the host/server name
//
pwszSlash += 2; while (NULL != (pwszSlash = wcschr (pwszSlash, L'/'))) { UrlTrace ("Url: PwszUrlStrippedOfPrefix(): normalizing: " "skipping %d chars of '%S'\n", pwszSlash - pwszUrl, pwszUrl);
pwszUrl = pwszSlash; break; } break; } }
return pwszUrl; }
// Storage path to UTF8 url translation --------------------------------------
//
SCODE __fastcall ScUTF8UrlFromStoragePath ( /* [in] */ const IEcbBase & ecb, /* [in] */ LPCWSTR pwszPath, /* [out] */ LPSTR pszUrl, /* [in/out] */ UINT * pcbUrl, /* [in] */ LPCWSTR pwszServer) { CStackBuffer<WCHAR,MAX_PATH> pwszUrl; SCODE sc = S_OK; UINT cbUrl; UINT cchUrl;
// Assume one skinny character will be represented by one wide character,
// Note that callers are indicating available space including 0 termination.
//
cchUrl = *pcbUrl; if (!pwszUrl.resize(cchUrl * sizeof(WCHAR))) return E_OUTOFMEMORY;
sc = ScUrlFromStoragePath (ecb, pwszPath, pwszUrl.get(), &cchUrl, pwszServer); if (S_FALSE == sc) { if (!pwszUrl.resize(cchUrl * sizeof(WCHAR))) return E_OUTOFMEMORY;
sc = ScUrlFromStoragePath (ecb, pwszPath, pwszUrl.get(), &cchUrl, pwszServer); } if (S_OK != sc) { // There is no reason to fail because for being short of buffer - we gave as
// much as we were asked for
//
Assert(S_FALSE != sc); DebugTrace( "ScUrlFromStoragePath() - ScUrlFromStoragePath() failed 0x%08lX\n", sc ); goto ret; }
// Find out the length of buffer needed for the UTF-8
// version of the URL. Functions above return the length
// including '\0' termination, so number of charasters
// to convert will always be more than zero.
//
Assert(0 < cchUrl); cbUrl = WideCharToMultiByte(CP_UTF8, 0, pwszUrl.get(), cchUrl, NULL, 0, NULL, NULL); if (0 == cbUrl) { sc = HRESULT_FROM_WIN32(GetLastError()); DebugTrace( "ScUTF8UrlFromStoragePath() - WideCharToMultiByte() failed 0x%08lX\n", sc ); goto ret; }
if (*pcbUrl < cbUrl) { sc = S_FALSE; *pcbUrl = cbUrl; goto ret; } else { // Convert the URL to skinny including 0 termination
//
cbUrl = WideCharToMultiByte( CP_UTF8, 0, pwszUrl.get(), cchUrl, pszUrl, cbUrl, NULL, NULL); if (0 == cbUrl) { sc = HRESULT_FROM_WIN32(GetLastError()); DebugTrace( "ScUrlFromStoragePath() - WideCharToMultiByte() failed 0x%08lX\n", sc ); goto ret; }
*pcbUrl = cbUrl; }
ret:
if (FAILED(sc)) { // Zero out the return in the case of failure
//
*pcbUrl = 0; } return sc; }
// Redirect url construction -------------------------------------------------
//
SCODE __fastcall ScConstructRedirectUrl ( /* [in] */ const IEcb& ecb, /* [in] */ BOOL fNeedSlash, /* [out] */ LPSTR * ppszUrl, /* [in] */ LPCWSTR pwszServer ) { SCODE sc;
auto_heap_ptr<CHAR> pszEscapedUrl; // We will need to escape the url we construct, so we will store it there
CStackBuffer<CHAR,MAX_PATH> pszLocation; LPCSTR pszQueryString; UINT cchQueryString; LPCWSTR pwsz; UINT cch;
// This request needs to be redirected. Allocate
// enough space for the URI and an extra trailing
// slash and a null terminator.
//
pwsz = ecb.LpwszPathTranslated(); pszQueryString = ecb.LpszQueryString(); cchQueryString = static_cast<UINT>(strlen(pszQueryString));
// Make a best guess. We allow for additional trailing '/'
// here (thus we show one character less than we actually
// have to the functions bellow).
//
cch = pszLocation.celems() - 1; sc = ::ScUTF8UrlFromStoragePath (ecb, pwsz, pszLocation.get(), &cch, pwszServer); if (S_FALSE == sc) { // Try again. Also do not forget that we may
// add trailing '/' later, thus allow space for
// it too.
//
if (!pszLocation.resize(cch + 1)) return E_OUTOFMEMORY;
sc = ::ScUTF8UrlFromStoragePath (ecb, pwsz, pszLocation.get(), &cch, pwszServer); } if (S_OK != sc) { // We gave sufficient space, we must not be asked for more
//
Assert(S_FALSE != sc); DebugTrace("ScConstructRedirectUrl() - ScUTF8UrlFromStoragePath() failed with error 0x%08lX\n", sc); goto ret; }
// The translation above results in a URI that does not
// have a trailing slash. So if one is required, do that
// here.
//
// The value of cch at this point includes the
// null-termination character. So we need to look
// back two characters instead of one.
//
//$ DBCS: Since we are always spitting back UTF8, I don't think
// forward-slash characters are likely to be an issue here. So
// there should be no need for a DBCS lead byte check to determine
// if a slash is required.
//
Assert (0 == pszLocation[cch - 1]); if (fNeedSlash && ('/' != pszLocation[cch - 2])) { pszLocation[cch - 1] = '/'; pszLocation[cch] = '\0'; } //
//$ DBCS: end.
// Escape the URL
//
HttpUriEscape (pszLocation.get(), pszEscapedUrl);
// Copy the query string if we have got one
//
if (cchQueryString) { cch = static_cast<UINT>(strlen(pszEscapedUrl.get())); pszEscapedUrl.realloc(cch + cchQueryString + 2); // One for the '?' and one for zero termination.
pszEscapedUrl[cch] = '?'; memcpy(pszEscapedUrl.get() + cch + 1, pszQueryString, cchQueryString); pszEscapedUrl[cch + 1 + cchQueryString] = '\0'; } *ppszUrl = pszEscapedUrl.relinquish();
ret:
return sc; }
// Virtual roots -------------------------------------------------------------
//
/*
* FIsVRoot() * * Purpose: * * Returns TRUE iif the specified URI is the VRoot * * Parameters: * * pmu [in] method utility function * pszURI [in] URI to check */ BOOL __fastcall CMethUtil::FIsVRoot (LPCWSTR pwszURI) { LPCWSTR pwsz; LPCWSTR pwszUnused;
Assert(pwszURI); UINT cch = static_cast<UINT>(wcslen (pwszURI));
// The virtual root as determined by CchGetVirtualRoot(),
// will truncate the trailing slash, if any.
//
pwsz = pwszURI + (cch ? cch - 1 : 0); if (L'/' == *pwsz) { cch -= 1; }
return (cch == CchGetVirtualRootW(&pwszUnused)); }
// Path conflicts ------------------------------------------------------------
//
BOOL __fastcall FSizedPathConflict ( /* [in] */ LPCWSTR pwszSrc, /* [in] */ UINT cchSrc, /* [in] */ LPCWSTR pwszDst, /* [in] */ UINT cchDst) { // For which ever path is shorter, see if it is
// a proper subdir of the longer.
//
if ((0 == cchSrc) || (0 == cchDst)) { DebugTrace ("Dav: Url: FSizedPathConflict(): zero length path is " "always in conflict!\n"); return TRUE; } if (cchDst < cchSrc) { // When the destination is shorter, if the paths
// match up to the full length of the destination
// and the last character or the one immediately
// following the destination is a backslash, then
// the paths are conflicting.
//
if (!_wcsnicmp (pwszSrc, pwszDst, cchDst)) { if ((L'\\' == *(pwszDst + cchDst - 1)) || (L'\\' == *(pwszSrc + cchDst)) || //$$DAVEX BUG: We could get here in a case where we have:
// pwszSrc = \\.\ExchangeIfs\Private Folders/this/is/my/path
// pwszDest = \\.\ExchangeIfs\Private Folders
// The two comparisons above balk on this. Add the two
// comparisons below to handle this case properly.
(L'/' == *(pwszDst + cchDst - 1)) || (L'/' == *(pwszSrc + cchDst))) { DebugTrace ("Dav: Url: FSizedPathConflict(): destination is " "parent to source\n"); return TRUE; } } } else if (cchSrc < cchDst) { // When the source is shorter, if the paths
// match up to the full length of the source
// and the last character or the one immediately
// following the source is a backslash, then
// the paths are conflicting.
//
if (!_wcsnicmp (pwszSrc, pwszDst, cchSrc)) { if ((L'\\' == *(pwszSrc + cchSrc - 1)) || (L'\\' == *(pwszDst + cchSrc)) || //$$DAVEX BUG: We could get here in a case where we have:
// pwszSrc = \\.\ExchangeIfs\Private Folders/this/is/my/path
// pwszDest = \\.\ExchangeIfs\Private Folders
// The two comparisons above balk on this. Add the two
// comparisons below to handle this case properly.
(L'/' == *(pwszSrc + cchSrc - 1)) || (L'/' == *(pwszDst + cchSrc))) { DebugTrace ("Dav: Url: FSizedPathConflict(): source is parent " "to destination\n"); return TRUE; } } } else { // If the paths are the same length, and are infact
// equal, why do anything?
//
if (!_wcsicmp (pwszSrc, pwszDst)) { DebugTrace ("Dav: Url: FSizedPathConflict(): source and " "destination refer to same\n"); return TRUE; } } return FALSE; }
BOOL __fastcall FPathConflict ( /* [in] */ LPCWSTR pwszSrc, /* [in] */ LPCWSTR pwszDst) { Assert (pwszSrc); Assert (pwszDst);
UINT cchSrc = static_cast<UINT>(wcslen (pwszSrc)); UINT cchDst = static_cast<UINT>(wcslen (pwszDst));
return FSizedPathConflict (pwszSrc, cchSrc, pwszDst, cchDst); }
BOOL __fastcall FIsImmediateParentUrl (LPCWSTR pwszParent, LPCWSTR pwszChild) { LPCWSTR pwsz;
Assert(pwszChild); UINT cchChild = static_cast<UINT>(wcslen (pwszChild)); UINT cchMatch;
// Skip back from the end of the child until the last
// path segment has been reached
//
pwsz = pwszChild + cchChild - 1;
// Child may terminate in a slash, trim it if need be
//
if (*pwsz == L'/') { --pwsz; }
// Ok, now we can try and isolate the last segment
//
for (; pwsz > pwszChild; --pwsz) { if (*pwsz == L'/') { break; } }
// See if the parent and child match up to this point
//
cchMatch = static_cast<UINT>(pwsz - pwszChild); if (!_wcsnicmp (pwszParent, pwszChild, cchMatch)) { // Make sure that the parent doesn't trail off onto another
// branch of the tree, and yes these asserts are DBCS correct.
//
Assert ((*(pwszParent + cchMatch) == L'\0') || ((*(pwszParent + cchMatch) == L'/') && (*(pwszParent + cchMatch + 1) == L'\0')));
return TRUE; }
return FALSE; }
SCODE ScAddTitledHref (CEmitterNode& enParent, IMethUtil * pmu, LPCWSTR pwszTag, LPCWSTR pwszPath, BOOL fCollection, CVRoot* pcvrTranslate) { auto_heap_ptr<CHAR> pszUriEscaped; CEmitterNode en; SCODE sc = S_OK;
// Just see if we have the path and tag to process
//
Assert(pwszTag); Assert(pwszPath);
sc = ScWireUrlFromStoragePath (pmu, pwszPath, fCollection, pcvrTranslate, pszUriEscaped); if (FAILED (sc)) goto ret;
sc = enParent.ScAddUTF8Node (pwszTag, en, pszUriEscaped.get()); if (FAILED (sc)) goto ret;
ret:
return sc; }
|