///////////////////////////////////////////////////////////////////////////// // Copyright (C) 1993-1996 Microsoft Corporation. All Rights Reserved. // // MODULE: URL.cpp // // PURPOSE: All the URL parsing routines THOR would ever need. // #include "pch.hxx" #include "strconst.h" #include "urltest.h" #include "url.h" #include "xpcomm.h" #include #include #include "mimeole.h" #include #include #include "imnact.h" #include "demand.h" #include // // FUNCTION: URL_ParseNewsUrls // // PURPOSE: Takes a URL passed to the news view and validates it. If the // URL is valid, then the server, group, and article-id are // returned as appropriate. // // PARAMETERS: // pszURL - Pointer to the URL to parse. // ppszServer - Name of the server, this function allocates the memory. // puPort - Port number on the server to use. // ppszGroup - Name of the group, this function allocates the memory. // ppszArticle - Article id, this function allocates the memory. // pfSecure - whether to use SSL to connect // // RETURN VALUE: // Returns S_OK if the URL is valid, or an appropriate error code // otherwise. // // COMMENTS: // The URLs that are valid for news are: // // news: // news: // news:// (for Netscape compatibility) // news:/// (for URL.DLL compatibility) // news:/// // news:/// // nntp://:// // // $LOCALIZE - Need a separate code path for DBCS HRESULT URL_ParseNewsUrls(LPTSTR pszURL, LPTSTR* ppszServer, LPUINT puPort, LPTSTR* ppszGroup, LPTSTR* ppszArticle, LPBOOL pfSecure) { HRESULT hr; UINT cchBuffer ; LPTSTR pszBuffer, pszTemp; Assert(pszURL != NULL); // Allocate a temp buffer to work with. cchBuffer = lstrlen(pszURL) + sizeof(TCHAR); if (!MemAlloc((LPVOID*)&pszBuffer, cchBuffer)) return E_OUTOFMEMORY; ZeroMemory(pszBuffer, cchBuffer); // Loop through the URL looking for the first ":". We're trying to discern // what the prefix is - either "nntp" or "news". pszTemp = pszURL; while (*pszTemp && *pszTemp != TEXT(':')) pszTemp++; CopyMemory(pszBuffer, pszURL, ((LPBYTE) pszTemp - (LPBYTE) pszURL)); *ppszServer = NULL; *ppszGroup = NULL; *ppszArticle = NULL; *puPort = (UINT) -1; *pfSecure = FALSE; if (0 == lstrcmpi(pszBuffer, c_szURLNews)) { // The URL starts with "news:", so advance the pointer past the ":" // and pass what's left to the appropriate parser. pszTemp++; hr = URL_ParseNEWS(pszTemp, ppszServer, ppszGroup, ppszArticle); } else if (0 == lstrcmpi(pszBuffer, c_szURLNNTP)) { // The URL starts with "nntp:", so advance the pointer past the ":" // and pass what's left to the appropriate parser. pszTemp++; hr = URL_ParseNNTP(pszTemp, ppszServer, puPort, ppszGroup, ppszArticle); } else if (0 == lstrcmpi(pszBuffer, c_szURLSnews)) { // The URL starts with "snews:", so advance the pointer past the ":" // and pass what's left to the appropriate parser. pszTemp++; *pfSecure = TRUE; hr = URL_ParseNEWS(pszTemp, ppszServer, ppszGroup, ppszArticle); } else { // this protocol is not a supported NEWS protocol hr = INET_E_UNKNOWN_PROTOCOL; } MemFree(pszBuffer); return hr; } // $LOCALIZE - Need a separate code path for DBCS HRESULT URL_ParseNEWS(LPTSTR pszURL, LPTSTR* ppszServer, LPTSTR* ppszGroup, LPTSTR* ppszArticle) { LPTSTR pszBuffer; LPTSTR pszBegin; UINT cch = 0; if (pszURL == NULL || *pszURL == '\0') return INET_E_INVALID_URL; // First check to see if a server has been specified. If so, then the // first two characters will be "//". if (*pszURL == TEXT('/')) { // Make sure there are two '/' pszURL++; if (*pszURL != TEXT('/')) return INET_E_INVALID_URL; pszURL++; pszBegin = pszURL; // Ok, got a server name. Find the end and copy it to ppszServer. while (*pszURL && (*pszURL != TEXT('/'))) pszURL++; cch = (UINT) ((LPBYTE) pszURL - (LPBYTE) pszBegin) + sizeof(TCHAR); if (cch <= 1) return S_OK; // bug 12467 if (!MemAlloc((LPVOID*) ppszServer, cch)) return E_OUTOFMEMORY; ZeroMemory(*ppszServer, cch); CopyMemory(*ppszServer, pszBegin, cch - sizeof(TCHAR)); // if we found the last '/' skip over it if (*pszURL) pszURL++; // // NOTE: This code makes the following URLs valid, taking us to the // root node for the server. // // news:// // news:/// // // The first form is necessary for compatibility with Netscape, and // the second form is necessary because URL.DLL adds the trailing // slash before passing the first form to us. // // If we're at the end, fake a news://server/* URL. if (!*pszURL) pszURL = (LPTSTR)g_szAsterisk; } // The difference between a group and article string is that the article // must have "@" in it somewhere. if (!lstrlen(pszURL)) { if (*ppszServer) { MemFree(*ppszServer); *ppszServer = 0; } return INET_E_INVALID_URL; } ULONG cchURL = lstrlen(pszURL)+1; if (!MemAlloc((LPVOID*) &pszBuffer, cchURL * sizeof(TCHAR))) { if (*ppszServer) { MemFree(*ppszServer); *ppszServer = 0; } return INET_E_INVALID_URL; } StrCpyN(pszBuffer, pszURL, cchURL); while (*pszURL && *pszURL != TEXT('@')) pszURL++; if (*pszURL == TEXT('@')) { // This is an article *ppszGroup = NULL; *ppszArticle = pszBuffer; } else { *ppszGroup = pszBuffer; *ppszArticle = NULL; } return S_OK; } // $LOCALIZE - Need a separate code path for DBCS // Validates a URL of the form NNTP://:// HRESULT URL_ParseNNTP(LPTSTR pszURL, LPTSTR* ppszServer, LPUINT puPort, LPTSTR* ppszGroup, LPTSTR* ppszArticle) { LPTSTR pszTemp; UINT cch; HRESULT hrReturn = S_OK; Assert(pszURL != NULL); if (pszURL == NULL || *pszURL == '\0') return INET_E_INVALID_URL; // Make sure there are leading "//" if (*pszURL != TEXT('/')) return INET_E_INVALID_URL; pszURL++; if (*pszURL != TEXT('/')) return INET_E_INVALID_URL; pszURL++; pszTemp = pszURL; // Search for the host name. while (*pszTemp && (*pszTemp != TEXT('/')) && (*pszTemp != TEXT(':'))) pszTemp++; if (*pszTemp != TEXT('/') && *pszTemp != TEXT(':')) return INET_E_INVALID_URL; // Copy the host name to the server return value cch = (UINT) ((LPBYTE) pszTemp - (LPBYTE) pszURL) + sizeof(TCHAR); if (cch <= 1) return INET_E_INVALID_URL; if (!MemAlloc((LPVOID*) ppszServer, cch)) return E_OUTOFMEMORY; ZeroMemory(*ppszServer, cch); CopyMemory(*ppszServer, pszURL, (LPBYTE) pszTemp - (LPBYTE) pszURL); if (*pszTemp == TEXT(':')) { // The URL specified a port, so parse that puppy out. pszTemp++; pszURL = pszTemp; while (*pszTemp && (*pszTemp != TEXT('/'))) pszTemp++; cch = (UINT) ((LPBYTE) pszTemp - (LPBYTE) pszURL); if (cch <= 1) { hrReturn = INET_E_INVALID_URL; goto error; } *puPort = StrToInt(pszURL); } if (*pszTemp != TEXT('/')) { hrReturn = INET_E_INVALID_URL; goto error; } // Get the newsgroup name pszTemp++; // Pass the '/' pszURL = pszTemp; while (*pszTemp && (*pszTemp != TEXT('/'))) pszTemp++; if (*pszTemp != TEXT('/')) { hrReturn = INET_E_INVALID_URL; goto error; } // Copy the group name to the group return value cch = (UINT) ((LPBYTE) pszTemp - (LPBYTE) pszURL) + sizeof(TCHAR); if (cch <= 0) { hrReturn = INET_E_INVALID_URL; goto error; } if (!MemAlloc((LPVOID*) ppszGroup, cch)) return (E_OUTOFMEMORY); ZeroMemory(*ppszGroup, cch); CopyMemory(*ppszGroup, pszURL, (LPBYTE) pszTemp - (LPBYTE) pszURL); // Now copy from here to the end of the string as the article id pszTemp++; cch = lstrlen(pszTemp) + 1; if (cch <= 0) { hrReturn = INET_E_INVALID_URL; goto error; } if (!MemAlloc((LPVOID*) ppszArticle, cch*sizeof(TCHAR))) return (E_OUTOFMEMORY); StrCpyN(*ppszArticle, pszTemp, cch); return (S_OK); error: if (*ppszServer) MemFree(*ppszServer); if (*ppszGroup) MemFree(*ppszGroup); if (*ppszArticle) MemFree(*ppszArticle); *ppszServer = NULL; *ppszGroup = NULL; *ppszArticle = NULL; *puPort = (UINT) -1; return (hrReturn); } static const TCHAR c_szColon[] = ":"; static const TCHAR c_szQuestion[] = "?"; static const TCHAR c_szEquals[] = "="; static const TCHAR c_szAmpersand[] = "&"; static const TCHAR c_szBody[] = "body"; static const TCHAR c_szBcc[] = "bcc"; // // FUNCTION: URL_ParseMailTo() // // PURPOSE: This function takes a mailto: URL and determines if it is a valid // URL for mail. The function then fill in a pMsg from the URL // // PARAMETERS: // pszURL - The URL to parse. // pMsg - The LPMIMEMESSAGE to fill in from the URL. // // RETURN VALUE: // Returns S_OK if the URL is a valid mail URL and the message is filled, // or an appropriate HRESULT describing why the function failed. // // COMMENTS: // Right now the only valid URL is // mailto: // HRESULT URL_ParseMailTo(LPTSTR pszURL, LPMIMEMESSAGE pMsg) { CStringParser sp; HRESULT hr; HADDRESS hAddress; LPMIMEADDRESSTABLE pAddrTable = 0; sp.Init(pszURL, lstrlen(pszURL), 0); if (sp.ChParse(c_szColon)) { // verify that this is a "mailto:" URL if (lstrcmpi(sp.PszValue(), c_szURLMailTo)) return INET_E_UNKNOWN_PROTOCOL; hr = pMsg->GetAddressTable(&pAddrTable); if (FAILED(hr)) return(hr); Assert(pAddrTable != NULL); sp.ChParse(c_szQuestion); if (sp.CchValue()) { // opie says it's cool that I'm about to clobber his buffer UrlUnescapeInPlace((LPTSTR)sp.PszValue(), 0); pAddrTable->Append(IAT_TO, IET_DECODED, sp.PszValue(), NULL, &hAddress); } while (sp.ChParse(c_szEquals)) { LPTSTR pszAttr = StringDup(sp.PszValue()); if (pszAttr) { sp.ChParse(c_szAmpersand); if (sp.CchValue()) { UrlUnescapeInPlace((LPTSTR)sp.PszValue(), 0); // are we trying to set the body? if (!lstrcmpi(c_szBody, pszAttr)) { LPSTREAM pStream; if (SUCCEEDED(MimeOleCreateVirtualStream(&pStream))) { if (SUCCEEDED(pStream->Write(sp.PszValue(), lstrlen(sp.PszValue()) * sizeof(TCHAR), NULL))) { pMsg->SetTextBody(TXT_PLAIN, IET_DECODED, NULL, pStream, NULL); } pStream->Release(); } } else if (0 == lstrcmpi(c_szCC, pszAttr)) { pAddrTable->Append(IAT_CC, IET_DECODED, sp.PszValue(), NULL, &hAddress); } else if (0 == lstrcmpi(c_szBcc, pszAttr)) { pAddrTable->Append(IAT_BCC, IET_DECODED, sp.PszValue(), NULL, &hAddress); } else if (0 == lstrcmpi(c_szTo, pszAttr)) { pAddrTable->Append(IAT_TO, IET_DECODED, sp.PszValue(), NULL, &hAddress); } else { // just stuff the prop into the message MimeOleSetBodyPropA(pMsg, HBODY_ROOT, pszAttr, NOFLAGS, sp.PszValue()); } } MemFree(pszAttr); } } pAddrTable->Release(); } return S_OK; } #define MAX_SUBSTR_SIZE CCHMAX_DISPLAY_NAME typedef struct tagURLSub { LPCTSTR szTag; DWORD dwType; } URLSUB; const static URLSUB c_UrlSub[] = { {TEXT("{SUB_CLCID}"), URLSUB_CLCID}, {TEXT("{SUB_PRD}"), URLSUB_PRD}, {TEXT("{SUB_PVER}"), URLSUB_PVER}, {TEXT("{SUB_NAME}"), URLSUB_NAME}, {TEXT("{SUB_EMAIL}"), URLSUB_EMAIL}, }; HRESULT URLSubstitutionA(LPCSTR pszUrlIn, LPSTR pszUrlOut, DWORD cchSize, DWORD dwSubstitutions, IImnAccount *pCertAccount) { HRESULT hr = S_OK; DWORD dwIndex; CHAR szTempUrl[INTERNET_MAX_URL_LENGTH]; Assert(cchSize <= ARRAYSIZE(szTempUrl)); // We will truncate anything longer than INTERNET_MAX_URL_LENGTH StrCpyN(szTempUrl, pszUrlIn, ARRAYSIZE(szTempUrl)); for (dwIndex = 0; dwIndex < ARRAYSIZE(c_UrlSub); dwIndex++) { while (dwSubstitutions & c_UrlSub[dwIndex].dwType) { LPSTR pszTag = StrStrA(szTempUrl, c_UrlSub[dwIndex].szTag); if (pszTag) { TCHAR szCopyUrl[INTERNET_MAX_URL_LENGTH]; TCHAR szSubStr[MAX_SUBSTR_SIZE]; // The Substitution // Copy URL Before Substitution. CopyMemory(szCopyUrl, szTempUrl, (int)min((pszTag - szTempUrl), sizeof(szCopyUrl))); szCopyUrl[(pszTag - szTempUrl)/sizeof(CHAR)] = TEXT('\0'); pszTag += lstrlen(c_UrlSub[dwIndex].szTag); switch (c_UrlSub[dwIndex].dwType) { case URLSUB_CLCID: { LCID lcid = GetUserDefaultLCID(); wnsprintf(szSubStr, ARRAYSIZE(szSubStr), "%#04lx", lcid); } break; case URLSUB_PRD: StrCpyN(szSubStr, c_szUrlSubPRD, ARRAYSIZE(szSubStr)); break; case URLSUB_PVER: StrCpyN(szSubStr, c_szUrlSubPVER, ARRAYSIZE(szSubStr)); break; case URLSUB_NAME: case URLSUB_EMAIL: { IImnAccount *pAccount = NULL; hr = E_FAIL; if(pCertAccount) { hr = pCertAccount->GetPropSz((c_UrlSub[dwIndex].dwType == URLSUB_NAME) ? AP_SMTP_DISPLAY_NAME : AP_SMTP_EMAIL_ADDRESS, szSubStr, ARRAYSIZE(szSubStr)); } else if (g_pAcctMan && SUCCEEDED(g_pAcctMan->GetDefaultAccount(ACCT_MAIL, &pAccount))) { hr = pAccount->GetPropSz((c_UrlSub[dwIndex].dwType == URLSUB_NAME) ? AP_SMTP_DISPLAY_NAME : AP_SMTP_EMAIL_ADDRESS, szSubStr, ARRAYSIZE(szSubStr)); pAccount->Release(); } if (FAILED(hr)) return hr; } break; default: szSubStr[0] = TEXT('\0'); Assert(FALSE); // Not Impl. hr = E_NOTIMPL; break; } // Add the Substitution String to the end (will become the middle) StrCatBuff(szCopyUrl, szSubStr, ARRAYSIZE(szCopyUrl)); // Add the rest of the URL after the substitution substring. StrCatBuff(szCopyUrl, pszTag, ARRAYSIZE(szCopyUrl)); StrCpyN(szTempUrl, szCopyUrl, ARRAYSIZE(szTempUrl)); } else break; // This will allow us to replace all the occurances of this string. } } StrCpyN(pszUrlOut, szTempUrl, cchSize); return hr; } HRESULT URLSubLoadStringA(UINT idRes, LPSTR pszUrlOut, DWORD cchSizeOut, DWORD dwSubstitutions, IImnAccount *pCertAccount) { HRESULT hr = E_FAIL; CHAR szTempUrl[INTERNET_MAX_URL_LENGTH]; if (LoadStringA(g_hLocRes, idRes, szTempUrl, ARRAYSIZE(szTempUrl))) hr = URLSubstitutionA(szTempUrl, pszUrlOut, cchSizeOut, dwSubstitutions, pCertAccount); return hr; } HRESULT HrConvertStringToUnicode(UINT uiSrcCodePage, CHAR *pSrcStr, UINT cSrcSize, WCHAR *pDstStr, UINT cDstSize) { IMultiLanguage *pMLang = NULL; IMLangConvertCharset *pMLangConv = NULL; HRESULT hr = E_FAIL; IF_FAILEXIT(hr = CoCreateInstance(CLSID_CMultiLanguage, NULL, CLSCTX_INPROC_SERVER, IID_IMultiLanguage, (void**)&pMLang)); IF_FAILEXIT(hr = pMLang->CreateConvertCharset(uiSrcCodePage, 1200, NULL, &pMLangConv)); hr = pMLangConv->DoConversionToUnicode(pSrcStr, &cSrcSize, pDstStr, &cDstSize); exit: ReleaseObj(pMLangConv); ReleaseObj(pMLang); return hr; } static const char c_szBaseFmt[]="\n\r"; static const char c_szBaseFileFmt[]="\n\r"; static const WCHAR c_wszBaseFmt[]=L"\n\r"; static const WCHAR c_wszBaseFileFmt[]=L"\n\r"; HRESULT HrCreateBasedWebPage(LPWSTR pwszUrl, LPSTREAM *ppstmHtml) { HRESULT hr; LPSTREAM pstm = NULL, pstmCopy = NULL, pstmTemp = NULL; CHAR szBase[MAX_PATH+50], szCopy[MAX_PATH]; WCHAR wszBase[MAX_PATH+50], wszCopy[MAX_PATH]; ULONG cb, cbTemp; BOOL fLittleEndian; LPSTR pszUrl = NULL, pszStream = NULL, pszCharset = NULL; LPWSTR pwszStream = NULL, pwszTempUrl = NULL; BOOL fIsURL = PathIsURLW(pwszUrl), fForceUnicode, fIsUnicode; IF_FAILEXIT(hr = MimeOleCreateVirtualStream(&pstmCopy)); // Are we a file or a URL? if(fIsURL) { // Since we have a url, then must be ansi IF_NULLEXIT(pszUrl = PszToANSI(CP_ACP, pwszUrl)); // we can not write to this pstm, so we have pstmCopy. IF_FAILEXIT(hr = URLOpenBlockingStream(NULL, pszUrl, &pstm, 0, NULL)); if (S_OK == HrIsStreamUnicode(pstm, &fLittleEndian)) { BYTE rgb[2]; IF_FAILEXIT(hr = pstm->Read(rgb, 2, &cb)); Assert(2 == cb); IF_FAILEXIT(hr = pstmCopy->Write(rgb, 2, NULL)); wnsprintfW((LPWSTR)wszBase, ARRAYSIZE(wszBase), c_wszBaseFmt, pwszUrl); IF_FAILEXIT(hr = pstmCopy->Write(wszBase, lstrlenW(wszBase) * sizeof(WCHAR), NULL)); } else { wnsprintf(szBase, ARRAYSIZE(szBase), c_szBaseFmt, pszUrl); IF_FAILEXIT(hr = pstmCopy->Write(szBase, lstrlen(szBase), NULL)); } } else { // If filename can't be converted to ansi, then we must do this in UNICODE // even if the stationery itself is normally ansi. IF_NULLEXIT(pszUrl = PszToANSI(CP_ACP, pwszUrl)); IF_NULLEXIT(pwszTempUrl = PszToUnicode(CP_ACP, pszUrl)); fForceUnicode = (0 != StrCmpW(pwszUrl, pwszTempUrl)); IF_FAILEXIT(hr = CreateStreamOnHFileW(pwszUrl, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL, &pstm)); fIsUnicode = (S_OK == HrIsStreamUnicode(pstm, &fLittleEndian)); if (fForceUnicode || fIsUnicode) { BYTE bUniMark = 0xFF; IF_FAILEXIT(hr = pstmCopy->Write(&bUniMark, sizeof(bUniMark), NULL)); bUniMark = 0xFE; IF_FAILEXIT(hr = pstmCopy->Write(&bUniMark, sizeof(bUniMark), NULL)); StrCpyNW(wszCopy, pwszUrl, ARRAYSIZE(wszCopy)); PathRemoveFileSpecW(wszCopy); wnsprintfW((LPWSTR)wszBase, ARRAYSIZE(wszBase), c_wszBaseFileFmt, wszCopy); IF_FAILEXIT(hr = pstmCopy->Write(wszBase, lstrlenW(wszBase) * sizeof(WCHAR), NULL)); } else { StrCpyN(szCopy, pszUrl, ARRAYSIZE(szCopy)); PathRemoveFileSpec(szCopy); wnsprintf((LPSTR)szBase, ARRAYSIZE(szBase), c_szBaseFileFmt, szCopy); IF_FAILEXIT(hr = pstmCopy->Write(szBase, lstrlen(szBase), NULL)); } if (fIsUnicode) { WCHAR bom; IF_FAILEXIT(hr = pstm->Read(&bom, 2, &cb)); Assert(2 == cb); } // This is an ANSI stream that we are forcing into UNICODE // This area will only occur if we are streaming a file else if (fForceUnicode) { LARGE_INTEGER pos = {0}; UINT uiHtmlCodepage = 0; Assert(!fIsURL); // In order for the file name to write to the stream properly, we // must convert the stream to unicode before we copy. // Get the charset GetHtmlCharset(pstm, &pszCharset); if(pszCharset) { INETCSETINFO CSetInfo = {0}; HCHARSET hCharset = NULL; if (SUCCEEDED(MimeOleFindCharset(pszCharset, &hCharset))) { if(SUCCEEDED(MimeOleGetCharsetInfo(hCharset,&CSetInfo))) uiHtmlCodepage = CSetInfo.cpiInternet; } } IF_FAILEXIT(hr = HrRewindStream(pstm)); // Allocate enough to read ANSI IF_FAILEXIT(hr = HrSafeGetStreamSize(pstm, &cb)); IF_NULLEXIT(MemAlloc((LPVOID*)&pszStream, cb+1)); // Read in ANSI IF_FAILEXIT(hr = pstm->Read(pszStream, cb, &cbTemp)); Assert(cbTemp == cb); pszStream[cb] = 0; // Alloc enough for the unicode conversion. Assume that each // ANSI char will be one unicode char IF_NULLEXIT(MemAlloc((LPVOID*)&pwszStream, (cb+1)*sizeof(WCHAR))); //Convert including null, if the fancy call fails, we should at least continue //with the old dumb way. if(!uiHtmlCodepage || FAILED(HrConvertStringToUnicode(uiHtmlCodepage, pszStream, cb+1, pwszStream, cb+1))) MultiByteToWideChar(CP_ACP, 0, pszStream, cb+1, pwszStream, cb+1); // Create a new stream IF_FAILEXIT(hr = MimeOleCreateVirtualStream(&pstmTemp)); IF_FAILEXIT(hr = pstmTemp->Write(pwszStream, lstrlenW(pwszStream)*sizeof(WCHAR), &cb)); IF_FAILEXIT(hr = HrRewindStream(pstmTemp)); ReplaceInterface(pstm, pstmTemp); } } IF_FAILEXIT(hr = HrCopyStream(pstm, pstmCopy, &cb)); IF_FAILEXIT(hr = HrRewindStream(pstmCopy)); *ppstmHtml=pstmCopy; pstmCopy->AddRef(); exit: ReleaseObj(pstm); ReleaseObj(pstmTemp); ReleaseObj(pstmCopy); MemFree(pszUrl); MemFree(pszStream); MemFree(pwszStream); MemFree(pwszTempUrl); MemFree(pszCharset); return hr; }