Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1230 lines
33 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1992 - 1995.
  5. //
  6. // File: curl.cxx
  7. //
  8. // Contents: handle url parsing and context urls parsing
  9. //
  10. // Classes:
  11. //
  12. // Functions:
  13. //
  14. // History: 2-20-96 JohannP (Johann Posch) Created
  15. //
  16. //----------------------------------------------------------------------------
  17. #include <iapp.h>
  18. #include <shlwapi.h>
  19. #include <shlwapip.h>
  20. PerfDbgTag(tagCUrl, "Urlmon", "Log CUrl", DEB_PROT);
  21. PerfDbgTag(tagCUrlApi, "Urlmon", "Log CUrl API", DEB_ASYNCAPIS);
  22. //+---------------------------------------------------------------------------
  23. //
  24. // Function: IsStreamEnabled
  25. //
  26. // Synopsis: returns TRUE iff the registry key for the mk: protocol is set
  27. //
  28. // Arguments: (none)
  29. //
  30. // Returns:
  31. //
  32. // History: 6-7-96 craigc Created
  33. //
  34. // Notes:
  35. //
  36. //----------------------------------------------------------------------------
  37. extern BOOL g_bGlobalUTF8hackEnabled;
  38. BOOL StringContainsHighAnsiW(LPCWSTR);
  39. BOOL IsStreamEnabled()
  40. {
  41. DEBUG_ENTER((DBG_APP,
  42. Bool,
  43. "IsStreamEnabled",
  44. NULL
  45. ));
  46. HKEY hk;
  47. char szBuf[256];
  48. DWORD dwType;
  49. DWORD dwSize;
  50. const char szKey[] = "SOFTWARE\\Microsoft\\Internet Explorer";
  51. const char szValue[] = "MkEnabled";
  52. const char szYes[] = "yes";
  53. static BOOL fChecked = FALSE;
  54. static BOOL fEnabled = FALSE;
  55. CMutexSem mxs;
  56. CLock lck(mxs);
  57. if (fChecked)
  58. {
  59. DEBUG_LEAVE(fEnabled);
  60. return fEnabled;
  61. }
  62. if (RegOpenKey(HKEY_LOCAL_MACHINE, szKey, &hk) != ERROR_SUCCESS)
  63. {
  64. DEBUG_LEAVE(FALSE);
  65. return( FALSE );
  66. }
  67. dwSize = sizeof(szBuf);
  68. if (RegQueryValueEx( hk, szValue, NULL, &dwType, (BYTE*)szBuf, &dwSize ) != ERROR_SUCCESS)
  69. {
  70. RegCloseKey( hk );
  71. DEBUG_LEAVE(FALSE);
  72. return( FALSE );
  73. }
  74. RegCloseKey( hk );
  75. fEnabled = (dwSize && (lstrcmpi( szYes, szBuf ) == 0));
  76. fChecked = TRUE;
  77. DEBUG_LEAVE(fEnabled);
  78. return fEnabled;
  79. }
  80. //
  81. // GetUrlScheme() returns one of the URL_SCHEME_* constants as
  82. // defined in shlwapip.h
  83. // example "http://foo" returns URL_SCHEME_HTTP
  84. //
  85. DWORD GetUrlScheme(IN LPCTSTR pcszUrl)
  86. {
  87. DEBUG_ENTER((DBG_APP,
  88. Dword,
  89. "GetUrlScheme",
  90. "%#x",
  91. pcszUrl
  92. ));
  93. if(pcszUrl)
  94. {
  95. PARSEDURL pu;
  96. pu.cbSize = sizeof(pu);
  97. if(SUCCEEDED(ParseURL(pcszUrl, &pu)))
  98. {
  99. DEBUG_LEAVE(pu.nScheme);
  100. return pu.nScheme;
  101. }
  102. }
  103. DEBUG_LEAVE(URL_SCHEME_INVALID);
  104. return URL_SCHEME_INVALID;
  105. }
  106. //+---------------------------------------------------------------------------
  107. //
  108. // Method: CUrl::CUrl
  109. //
  110. // Synopsis:
  111. //
  112. // Arguments: (none)
  113. //
  114. // Returns:
  115. //
  116. // History: 2-20-96 JohannP (Johann Posch) Created
  117. //
  118. // Notes:
  119. //
  120. //----------------------------------------------------------------------------
  121. CUrl::CUrl()
  122. {
  123. DEBUG_ENTER((DBG_APP,
  124. None,
  125. "CUrl::CUrl",
  126. "this=%#x",
  127. this
  128. ));
  129. PerfDbgLog(tagCUrl, this, "+CUrl::CUrl");
  130. _pszBaseURL = NULL;
  131. _pszPartURL = NULL;
  132. _pszFullURL = NULL;
  133. _pszProtocol = NULL;
  134. _pszServerName = NULL;
  135. _pszUserName = NULL;
  136. _pszPassword = NULL;
  137. _pszObject = NULL;
  138. _pBasicAllocUnit = NULL;
  139. _ipPort = 0;
  140. _dwProto = 0;
  141. _fUTF8hack = FALSE;
  142. _pszUTF8ServerName = NULL;
  143. _dwServerCodePage = 0;
  144. PerfDbgLog(tagCUrl, this, "-CUrl::CUrl");
  145. DEBUG_LEAVE(0);
  146. }
  147. BOOL
  148. CUrl::CUrlInitBasic(DWORD dwBaseUrlSize)
  149. {
  150. DEBUG_ENTER((DBG_APP,
  151. Bool,
  152. "CUrl::CUrlInitBasic",
  153. "this=%#x, %#x",
  154. this, dwBaseUrlSize
  155. ));
  156. //
  157. // basic allocation contains the following fields:
  158. //
  159. // _pszServerName - URL_FIELD_SIZE
  160. // _pszUserName - URL_FIELD_SIZE
  161. // _pszPassword - URL_FIELD_SIZE
  162. // _pszProtocol - 12
  163. // _pszBaseURL - dwBaseUrlSize + 1
  164. //
  165. BOOL fRet = FALSE;
  166. DWORD dwBasicUnitSize = (3 * (URL_FIELD_SIZE)) + 12 + dwBaseUrlSize + 1;
  167. _pBasicAllocUnit = new char[dwBasicUnitSize];
  168. if( _pBasicAllocUnit )
  169. {
  170. _pszServerName = _pBasicAllocUnit;
  171. _pszUserName = _pszServerName + URL_FIELD_SIZE;
  172. _pszPassword = _pszUserName + URL_FIELD_SIZE;
  173. _pszProtocol = _pszPassword + URL_FIELD_SIZE;
  174. _pszBaseURL = _pszProtocol + 12;
  175. *_pszServerName = '\0';
  176. *_pszUserName = '\0';
  177. *_pszPassword = '\0';
  178. *_pszProtocol = '\0';
  179. *_pszBaseURL = '\0';
  180. fRet = TRUE;
  181. }
  182. DEBUG_LEAVE(fRet);
  183. return fRet;
  184. }
  185. BOOL
  186. CUrl::CUrlInitAll()
  187. {
  188. DEBUG_ENTER((DBG_APP,
  189. Bool,
  190. "CUrl::CUrlInitAll",
  191. "this=%#x",
  192. this
  193. ));
  194. // backword compatibility init all with URL_MAX_LENGTH
  195. BOOL fRet = FALSE;
  196. fRet = CUrlInitBasic(MAX_URL_SIZE);
  197. if( fRet )
  198. {
  199. _pszPartURL = new char[MAX_URL_SIZE + 1];
  200. _pszFullURL = new char[MAX_URL_SIZE + 1];
  201. _pszObject = new char[MAX_URL_SIZE + 1];
  202. if( !_pszPartURL || !_pszFullURL || !_pszObject )
  203. {
  204. if (_pszPartURL)
  205. delete _pszPartURL;
  206. if (_pszFullURL)
  207. delete _pszFullURL;
  208. if (_pszObject)
  209. delete _pszObject;
  210. _pszPartURL = NULL;
  211. _pszFullURL = NULL;
  212. _pszObject = NULL;
  213. fRet = FALSE;
  214. }
  215. else
  216. {
  217. *_pszPartURL = '\0';
  218. *_pszFullURL = '\0';
  219. *_pszObject = '\0';
  220. }
  221. }
  222. DEBUG_LEAVE(fRet);
  223. return fRet;
  224. }
  225. CUrl::~CUrl()
  226. {
  227. DEBUG_ENTER((DBG_APP,
  228. None,
  229. "CUrl::~CUrl",
  230. "this=%#x",
  231. this
  232. ));
  233. if( _pBasicAllocUnit )
  234. {
  235. delete [] _pBasicAllocUnit;
  236. }
  237. if( _pszPartURL )
  238. {
  239. delete [] _pszPartURL;
  240. }
  241. if( _pszFullURL )
  242. {
  243. delete [] _pszFullURL;
  244. }
  245. if( _pszObject )
  246. {
  247. delete [] _pszObject;
  248. }
  249. if( _pszUTF8ServerName )
  250. {
  251. delete [] _pszUTF8ServerName;
  252. }
  253. DEBUG_LEAVE(0);
  254. }
  255. //+---------------------------------------------------------------------------
  256. //
  257. // Function: ParseUrl
  258. //
  259. // Synopsis: Breaks down a URL and puts servername, objectname and port
  260. // into the download structure.
  261. //
  262. // Arguments:
  263. //
  264. //
  265. // Returns: TRUE if the URL was successfully parsed.
  266. //
  267. // History: Created Unknown
  268. // 02-20-95 JohannP (Johann Posch) Created Class
  269. // 03-20-95 JoeS (Joe Souza) Special FILE: syntaxes
  270. //
  271. // Notes: URL should have already been parsed earlier by ConstructURL.
  272. // This function will crack the URL.
  273. //
  274. //----------------------------------------------------------------------------
  275. BOOL CUrl::ParseUrl(BOOL fUTF8Required, LPCWSTR pwzUrl, DWORD dwCodePage)
  276. {
  277. DEBUG_ENTER((DBG_APP,
  278. Bool,
  279. "CUrl::ParseUrl",
  280. "this=%#x, %B",
  281. this, fUTF8Required
  282. ));
  283. PerfDbgLog1(tagCUrl, this, "+CUrl::ParseUrl Base:[%s]", _pszBaseURL);
  284. BOOL fRet = TRUE;
  285. URL_COMPONENTS url;
  286. DWORD cchFullURL;
  287. LPSTR szTemp;
  288. CHAR * pch;
  289. DWORD dwFullUrlLen;
  290. _fUTF8hack = FALSE; //possibly set true later in the function for non-redirect codepath.
  291. if (_pszPartURL && _pszPartURL[0] != '\0' ) // This string will be set for redirects.
  292. {
  293. //
  294. // we need re-alloc _pszFullURL and _pszObject, since
  295. // the the size can grow!
  296. //
  297. dwFullUrlLen = strlen(_pszBaseURL) + strlen(_pszPartURL) + 1;
  298. if(dwFullUrlLen > MAX_URL_SIZE)
  299. {
  300. dwFullUrlLen = MAX_URL_SIZE + 1;
  301. }
  302. if( _pszFullURL )
  303. {
  304. delete [] _pszFullURL;
  305. _pszFullURL = NULL;
  306. _pszFullURL = new char[dwFullUrlLen];
  307. }
  308. if( _pszObject )
  309. {
  310. delete [] _pszObject;
  311. _pszObject = NULL;
  312. _pszObject = new char[dwFullUrlLen];
  313. }
  314. if( !_pszFullURL || !_pszObject )
  315. {
  316. fRet = FALSE;
  317. goto Exit;
  318. }
  319. cchFullURL = dwFullUrlLen;
  320. if(FAILED(UrlCombine(_pszBaseURL, _pszPartURL, _pszFullURL, &cchFullURL, URL_FILE_USE_PATHURL)))
  321. {
  322. fRet = FALSE;
  323. PProtAssert(FALSE && "Combine failed in ParseUrl!\n");
  324. goto Exit;
  325. }
  326. }
  327. else
  328. {
  329. // FullURL is BaseURL
  330. dwFullUrlLen = strlen(_pszBaseURL) + 1;
  331. if( !_pszFullURL )
  332. {
  333. _pszFullURL = new char[dwFullUrlLen];
  334. }
  335. if( !_pszObject )
  336. {
  337. _pszObject = new char[dwFullUrlLen];
  338. }
  339. if( !_pszFullURL || !_pszObject )
  340. {
  341. fRet = FALSE;
  342. goto Exit;
  343. }
  344. lstrcpy(_pszFullURL, _pszBaseURL);
  345. }
  346. // Trim off intra-page link.
  347. //
  348. // NB: Don't use ExtraInfo below to do this because you will
  349. // also lose search string this way.
  350. //
  351. // Also, we need to do this before we decode the URL below,
  352. // so that we don't trim off the wrong '#' if there was one
  353. // encoded in the URL path.
  354. //
  355. // UrlGetLocation() will intelligently find the fragment
  356. // some schemes do not use the # as a fragment identifier.
  357. // it returns a pointer to the #
  358. //
  359. if(pch = (CHAR *)UrlGetLocation(_pszFullURL))
  360. {
  361. *pch = TEXT('\0');
  362. }
  363. _dwProto = ProtoFromString(_pszFullURL);
  364. if (_dwProto == DLD_PROTOCOL_NONE)
  365. {
  366. fRet = FALSE;
  367. goto Exit;
  368. }
  369. if(DLD_PROTOCOL_FILE == _dwProto)
  370. {
  371. //
  372. // at this point, _pszFullURL and _pszObject should be all
  373. // allocated with size of dwFullUrlLen
  374. //
  375. DWORD cchObject = dwFullUrlLen;
  376. //do file stuff here
  377. fRet = SUCCEEDED(PathCreateFromUrl(_pszFullURL, _pszObject, &cchObject, 0));
  378. }
  379. else
  380. {
  381. //
  382. // BUGBUG - InternetCrackUrl alters the original url - zekel - 25-JUL-97
  383. // ICU is poorly behaved, and it unescapes the server and username
  384. // components insitu regardless of whether it was requested or not
  385. // this means that if you pass in http://host%76/, the url on return
  386. // is http://hostv76/. it happens that if you create the URL from the
  387. // components given, you will get the correct URL, but crack doesnt understand
  388. // all URLs. it is too late in the game to change the behavior of ICU,
  389. // because wininet internally depends on the behavior.
  390. // so our solution is to create a temp buffer that can be messed with
  391. // and then throw it away after we are done.
  392. //
  393. LPSTR pszTemp = StrDup(_pszFullURL);
  394. if (pszTemp)
  395. {
  396. url.dwStructSize = sizeof(url);
  397. url.lpszScheme = _pszProtocol;
  398. url.dwSchemeLength =12;
  399. url.lpszHostName = _pszServerName;
  400. url.dwHostNameLength = URL_FIELD_SIZE;
  401. url.lpszUserName = _pszUserName;
  402. url.dwUserNameLength = URL_FIELD_SIZE;
  403. url.lpszPassword = _pszPassword;
  404. url.dwPasswordLength = URL_FIELD_SIZE;
  405. url.lpszUrlPath = _pszObject;
  406. url.dwUrlPathLength = dwFullUrlLen;
  407. url.lpszExtraInfo = NULL;
  408. url.dwExtraInfoLength = 0;
  409. fRet = InternetCrackUrl(pszTemp, 0, (_dwProto == DLD_PROTOCOL_STREAM ? ICU_DECODE : 0), &url);
  410. _ipPort = url.nPort;
  411. /*
  412. Code to pass in an MBCS servername to wininet always when this fix enabled
  413. to get around the UTF8-servername bugs. - I-DNS fix.
  414. */
  415. if( fUTF8Required
  416. && g_bGlobalUTF8hackEnabled
  417. && fRet
  418. && ((_dwProto == DLD_PROTOCOL_HTTP)
  419. || (_dwProto == DLD_PROTOCOL_HTTPS))
  420. )
  421. {
  422. DWORD dwHostname = MAX_URL_SIZE;
  423. WCHAR* pwzHostname = new WCHAR[MAX_URL_SIZE];
  424. char* pszHostname = new char[MAX_URL_SIZE];
  425. HRESULT hrTemp;
  426. BOOL bUsedDefaultChar;
  427. // This is NOT a loop - just an urlmon-style coding convention to avoid deep if-else-nesting.
  428. do
  429. {
  430. fRet = FALSE;
  431. if (!pwzHostname || !pszHostname)
  432. {
  433. break;
  434. }
  435. hrTemp = UrlGetPartW(pwzUrl, pwzHostname, &dwHostname, URL_PART_HOSTNAME, 0);
  436. if (FAILED(hrTemp))
  437. {
  438. break;
  439. }
  440. if (!StringContainsHighAnsiW(pwzHostname))
  441. {
  442. // home free! - no high ansi in servername.
  443. fRet = TRUE;
  444. break;
  445. }
  446. if (dwCodePage == CP_UTF8)
  447. {
  448. dwCodePage = GetACP();
  449. }
  450. //This fix cannot be ported to IE downlevel versions because WC_NO_BEST_FIT_CHARS is not
  451. //supported on all OS versions.
  452. if (0 == WideCharToMultiByte(dwCodePage, WC_NO_BEST_FIT_CHARS, pwzHostname, -1,
  453. pszHostname, MAX_URL_SIZE, NULL, &bUsedDefaultChar)
  454. || bUsedDefaultChar)
  455. {
  456. fRet = false;
  457. break;
  458. }
  459. // Cache the UTF8 servername if we need it.
  460. // This field is set only once ( not on redirects ),
  461. // so release only in destructor.
  462. _pszUTF8ServerName = new char[url.dwHostNameLength+1];
  463. if (!_pszUTF8ServerName)
  464. {
  465. break;
  466. }
  467. lstrcpy(_pszUTF8ServerName, _pszServerName);
  468. // now clobber it with the MBCS servername
  469. //Compat: match side-effect of calling InternetCrackUrl
  470. dwHostname = URL_FIELD_SIZE;
  471. hrTemp = UrlUnescapeA(pszHostname, _pszServerName, &dwHostname, 0);
  472. if(FAILED(hrTemp))
  473. {
  474. break;
  475. }
  476. // now put the original _pszFullURL back together with the MBCS servername
  477. // instead of the UTF8 servername since wininet will have this.
  478. url.lpszHostName = _pszServerName;
  479. url.dwHostNameLength = dwHostname;
  480. url.lpszUserName = NULL;
  481. url.dwUserNameLength = 0;
  482. url.lpszPassword = NULL;
  483. url.dwPasswordLength = 0;
  484. if (!InternetCreateUrl(&url, 0, _pszFullURL, &dwFullUrlLen))
  485. {
  486. if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
  487. {
  488. break;
  489. }
  490. delete [] _pszFullURL;
  491. _pszFullURL = new char[++dwFullUrlLen];
  492. if (!_pszFullURL || !InternetCreateUrl(&url, 0, _pszFullURL, &dwFullUrlLen))
  493. {
  494. break;
  495. }
  496. }
  497. // At this point, everything is right.
  498. // 1. _pszFullURL has the same url as wininet.
  499. // 2. _pszServerName has the MBCS hostname
  500. // 3. _pszUTF8ServerName has the UTF8 hostname that would have gone to the proxy (if needed)
  501. _dwServerCodePage = dwCodePage;
  502. _fUTF8hack = TRUE;
  503. fRet = TRUE;
  504. break;
  505. }
  506. while(TRUE);
  507. if (pwzHostname)
  508. delete [] pwzHostname;
  509. if (pszHostname)
  510. delete [] pszHostname;
  511. }
  512. LocalFree(pszTemp);
  513. }
  514. else
  515. fRet = FALSE;
  516. }
  517. Exit:
  518. PerfDbgLog1(tagCUrl, this, "-CUrl::ParseUrl Full:[%s]", _pszFullURL);
  519. DEBUG_LEAVE(fRet);
  520. return(fRet);
  521. }
  522. //+---------------------------------------------------------------------------
  523. //
  524. // Method: CUrl::ProtoFromString
  525. //
  526. // Synopsis:
  527. //
  528. // Arguments: [lpszProtocol] --
  529. //
  530. // Returns:
  531. //
  532. // History: Created Unknown
  533. // 2-20-96 JohannP (Johann Posch) Modified for class
  534. //
  535. // Notes:
  536. //
  537. //----------------------------------------------------------------------------
  538. DWORD CUrl::ProtoFromString(LPSTR lpszProtocol)
  539. {
  540. DEBUG_ENTER((DBG_APP,
  541. Dword,
  542. "CUrl::ProtoFromString",
  543. "this=%#x, %.80q",
  544. this, lpszProtocol
  545. ));
  546. DWORD dwRetVal = DLD_PROTOCOL_NONE;
  547. PerfDbgLog1(tagCUrl, this, "CUrl::ProtoFromString [%s]", lpszProtocol);
  548. switch (GetUrlScheme(lpszProtocol))
  549. {
  550. case URL_SCHEME_HTTPS:
  551. dwRetVal = DLD_PROTOCOL_HTTPS;
  552. break;
  553. case URL_SCHEME_HTTP:
  554. dwRetVal = DLD_PROTOCOL_HTTP;
  555. break;
  556. case URL_SCHEME_FTP:
  557. dwRetVal = DLD_PROTOCOL_FTP;
  558. break;
  559. case URL_SCHEME_GOPHER:
  560. dwRetVal = DLD_PROTOCOL_GOPHER;
  561. break;
  562. case URL_SCHEME_FILE:
  563. dwRetVal = DLD_PROTOCOL_FILE;
  564. break;
  565. case URL_SCHEME_LOCAL:
  566. dwRetVal = DLD_PROTOCOL_LOCAL;
  567. break;
  568. case URL_SCHEME_MK:
  569. if(IsStreamEnabled())
  570. dwRetVal = DLD_PROTOCOL_STREAM;
  571. break;
  572. }
  573. DEBUG_LEAVE(dwRetVal);
  574. return dwRetVal;
  575. }
  576. // Helper API's
  577. //+---------------------------------------------------------------------------
  578. //
  579. // Function: ConstructURL
  580. //
  581. // Synopsis:
  582. //
  583. // Arguments: [pBC] -- Pointer to BindCtx
  584. // [pURLBase] -- Pointer to Base URL [IN]
  585. // [pURLRelative] -- Pointer to Relative URL [IN]
  586. // [pURLFull] -- Pointer to resultant complete URL [OUT]
  587. //
  588. // Returns:
  589. //
  590. // History: 02-21-96 JoeS (Joe Souza) Created
  591. //
  592. // Notes:
  593. //
  594. //----------------------------------------------------------------------------
  595. STDAPI ConstructURL(LPBC pBC, LPMONIKER pmkContext, LPMONIKER pmkToLeft,
  596. LPWSTR pwzURLRelative, LPWSTR pwzURLFull, DWORD cURLSize,
  597. DWORD dwFlags)
  598. {
  599. DEBUG_ENTER_API((DBG_API,
  600. Hresult,
  601. "ConstructURL",
  602. "%#x, %#x, %#x, %.80wq, %.80wq, %#x, %#x",
  603. pBC, pmkContext, pmkToLeft, pwzURLRelative, pwzURLFull, cURLSize, dwFlags
  604. ));
  605. PerfDbgLog2(tagCUrlApi, NULL, "+ConstructURL (rel:%ws, pmk:%lx)", pwzURLRelative, pmkContext);
  606. HRESULT hr = NOERROR;
  607. DWORD dwMnk = 0;
  608. LPMONIKER pmkCtx = NULL;
  609. LPWSTR wzURLBase = NULL;
  610. WCHAR wszURLFull[MAX_URL_SIZE + 1];
  611. DWORD cchURLFull;
  612. DWORD cbSize;
  613. BOOL bParseOk = FALSE;
  614. DWORD dwCUFlags = URL_FILE_USE_PATHURL;
  615. if (dwFlags & CU_STANDARD_FORM)
  616. {
  617. dwCUFlags = 0;
  618. }
  619. if (!pwzURLRelative || !pwzURLFull || !cURLSize)
  620. {
  621. hr = E_INVALIDARG;
  622. goto ConstructExit;
  623. }
  624. pwzURLFull[0] = 0;
  625. if (!pmkContext && pBC)
  626. {
  627. // No Context Moniker was specified, so try to get one of those.
  628. hr = pBC->GetObjectParam(SZ_URLCONTEXT, (IUnknown **)&pmkCtx);
  629. if (hr != NOERROR)
  630. {
  631. pmkCtx = NULL;
  632. }
  633. }
  634. else if (pmkContext)
  635. {
  636. pmkCtx = pmkContext;
  637. }
  638. else if (pmkToLeft)
  639. {
  640. pmkCtx = pmkToLeft;
  641. }
  642. if (pmkCtx)
  643. {
  644. // There is a Context Moniker. Make sure it is a URL moniker and
  645. // if it is, get the base URL from it.
  646. pmkCtx->IsSystemMoniker(&dwMnk);
  647. if (dwMnk == MKSYS_URLMONIKER)
  648. {
  649. hr = pmkCtx->GetDisplayName(pBC, NULL, &wzURLBase);
  650. }
  651. }
  652. hr = NOERROR;
  653. if (wzURLBase)
  654. {
  655. DWORD dwSizeIn = MAX_URL_SIZE;
  656. cchURLFull = MAX_URL_SIZE;
  657. bParseOk = SUCCEEDED(OInetCombineUrl(wzURLBase, pwzURLRelative, dwCUFlags, pwzURLFull, dwSizeIn, &cchURLFull, 0));
  658. }
  659. else if (dwFlags & CU_CANONICALIZE)
  660. {
  661. DWORD dwSizeIn = MAX_URL_SIZE;
  662. cchURLFull = MAX_URL_SIZE;
  663. bParseOk = SUCCEEDED(OInetParseUrl(pwzURLRelative,PARSE_CANONICALIZE, dwCUFlags, pwzURLFull, dwSizeIn,&cchURLFull,0));
  664. }
  665. else
  666. {
  667. // We did not combine a relative and a base URL, and caller
  668. // does not want to canonicalize, so we just copy the given URL
  669. // into the return buffer.
  670. #ifndef unix
  671. wcsncpy(pwzURLFull, pwzURLRelative, cURLSize / 2);
  672. #else
  673. wcsncpy(pwzURLFull, pwzURLRelative, cURLSize / sizeof(wchar_t));
  674. #endif /* unix */
  675. goto ConstructExit;
  676. }
  677. if (!bParseOk || !wcslen(pwzURLFull))
  678. {
  679. hr = MK_E_SYNTAX;
  680. }
  681. ConstructExit:
  682. if (wzURLBase)
  683. {
  684. delete wzURLBase;
  685. }
  686. PerfDbgLog2(tagCUrlApi, NULL, "-ConstructURL [%ws], hr:%lx", pwzURLFull, hr);
  687. DEBUG_LEAVE_API(hr);
  688. return hr;
  689. }
  690. //+---------------------------------------------------------------------------
  691. //
  692. // UTF-8 code from wininet written by RFirth
  693. //
  694. //----------------------------------------------------------------------------
  695. DWORD
  696. CountUnicodeToUtf8(
  697. IN LPCWSTR pwszIn,
  698. IN DWORD dwInLen,
  699. IN BOOL bEncode
  700. )
  701. /*++
  702. Routine Description:
  703. Count number of BYTEs required for UTF-8 conversion of UNICODE string. Count
  704. is terminated after dwInLen characters
  705. Arguments:
  706. pwszIn - pointer to input wide-character string
  707. dwInLen - number of characters in pwszIn
  708. bEncode - TRUE if we are to hex encode characters >= 0x80
  709. Return Value:
  710. DWORD - number of BYTEs required for conversion
  711. --*/
  712. {
  713. DEBUG_ENTER((DBG_APP,
  714. Dword,
  715. "CountUnicodeToUtf8",
  716. "%.80wq, %#x, %B",
  717. pwszIn, dwInLen, bEncode
  718. ));
  719. PProtAssert(pwszIn != NULL);
  720. PProtAssert(dwInLen != 0);
  721. DWORD dwCount = 0;
  722. DWORD oneCharLen = bEncode ? 3 : 1;
  723. DWORD twoCharLen = 2 * oneCharLen;
  724. //
  725. // N.B. code arranged to reduce number of jumps in loop to 1 (while)
  726. //
  727. do {
  728. WORD wchar = *pwszIn++;
  729. dwCount += (wchar & 0xF800) ? oneCharLen : 0;
  730. dwCount += ((wchar & 0xFF80) ? 0xFFFFFFFF : 0) & (twoCharLen - 1);
  731. ++dwCount;
  732. } while (--dwInLen != 0);
  733. DEBUG_LEAVE(dwCount);
  734. return dwCount;
  735. }
  736. DWORD
  737. ConvertUnicodeToUtf8(
  738. IN LPCWSTR pwszIn,
  739. IN DWORD dwInLen,
  740. OUT LPBYTE pszOut,
  741. IN DWORD dwOutLen,
  742. IN BOOL bEncode
  743. )
  744. /*++
  745. Routine Description:
  746. Convert a string of UNICODE characters to UTF-8:
  747. 0000000000000000..0000000001111111: 0xxxxxxx
  748. 0000000010000000..0000011111111111: 110xxxxx 10xxxxxx
  749. 0000100000000000..1111111111111111: 1110xxxx 10xxxxxx 10xxxxxx
  750. Arguments:
  751. pwszIn - pointer to input wide-character string
  752. dwInLen - number of CHARACTERS in pwszIn INCLUDING terminating NUL
  753. pszOut - pointer to output narrow-character buffer
  754. dwOutLen - number of BYTEs in pszOut
  755. bEncode - TRUE if we are to hex encode characters >= 0x80
  756. Return Value:
  757. DWORD
  758. Success - ERROR_SUCCESS
  759. Failure - ERROR_INSUFFICIENT_BUFFER
  760. Not enough space in pszOut to store results
  761. --*/
  762. {
  763. DEBUG_ENTER((DBG_APP,
  764. Dword,
  765. "CountUnicodeToUtf8",
  766. "%.80wq, %#x, %#x, %#x, %B",
  767. pwszIn, dwInLen, pszOut, dwOutLen, bEncode
  768. ));
  769. PProtAssert(pwszIn != NULL);
  770. PProtAssert((int)dwInLen > 0);
  771. PProtAssert(pszOut != NULL);
  772. PProtAssert((int)dwOutLen > 0);
  773. DWORD outputSize = bEncode ? 3 : 1;
  774. static char hexArray[] = "0123456789ABCDEF";
  775. while (dwInLen-- && dwOutLen) {
  776. WORD wchar = *pwszIn++;
  777. BYTE bchar;
  778. if (wchar <= 0x007F) {
  779. *pszOut++ = (BYTE)(wchar);
  780. --dwOutLen;
  781. continue;
  782. }
  783. BYTE lead = ((wchar >= 0x0800) ? 0xE0 : 0xC0);
  784. int shift = ((wchar >= 0x0800) ? 12 : 6);
  785. bchar = lead | (BYTE)(wchar >> shift);
  786. if (bEncode) {
  787. *pszOut++ = '%';
  788. *pszOut++ = hexArray[bchar >> 4];
  789. bchar = hexArray[bchar & 0x0F];
  790. }
  791. *pszOut++ = bchar;
  792. if (wchar >= 0x0800) {
  793. bchar = 0x80 | (BYTE)((wchar >> 6) & 0x003F);
  794. if (bEncode) {
  795. *pszOut++ = '%';
  796. *pszOut++ = hexArray[bchar >> 4];
  797. bchar = hexArray[bchar & 0x0F];
  798. }
  799. *pszOut++ = bchar;
  800. }
  801. bchar = 0x80 | (BYTE)(wchar & 0x003F);
  802. if (bEncode) {
  803. *pszOut++ = '%';
  804. *pszOut++ = hexArray[bchar >> 4];
  805. bchar = hexArray[bchar & 0x0F];
  806. }
  807. *pszOut++ = bchar;
  808. }
  809. DEBUG_LEAVE(ERROR_SUCCESS);
  810. return ERROR_SUCCESS;
  811. }
  812. BOOL
  813. StringContainsHighAnsi(
  814. IN LPSTR pszIn,
  815. IN DWORD dwInLen
  816. )
  817. /*++
  818. Routine Description:
  819. Determine if string contains ANSI characters in range 0x80..0xFF. Search is
  820. stopped when we hit the first high-ANSI character, when we hit the terminator
  821. or when we have decremented dwInLen to zero
  822. Arguments:
  823. pszIn - pointer to string to test
  824. dwInLen - length of pszIn
  825. Return Value:
  826. BOOL
  827. TRUE - pszIn contains one or more high-ANSI characters
  828. FALSE - pszIn (or substring of length dwInLen) does not contain
  829. high-ANSI characters
  830. --*/
  831. {
  832. DEBUG_ENTER((DBG_APP,
  833. Bool,
  834. "StringContainsHighAnsi",
  835. "%.80q, %#x",
  836. pszIn, dwInLen
  837. ));
  838. PProtAssert(pszIn != NULL);
  839. PProtAssert(dwInLen != 0);
  840. // only need to search the base portion
  841. while (dwInLen-- && *pszIn && *pszIn != '?') {
  842. if (*pszIn++ & 0x80) {
  843. DEBUG_LEAVE(TRUE);
  844. return TRUE;
  845. }
  846. }
  847. DEBUG_LEAVE(FALSE);
  848. return FALSE;
  849. }
  850. BOOL
  851. StringContainsHighAnsiW(
  852. IN LPCWSTR pwzIn
  853. )
  854. /*--
  855. Unicode version of StringContainsHighAnsi()
  856. --*/
  857. {
  858. DEBUG_ENTER((DBG_APP,
  859. Bool,
  860. "StringContainsHighAnsiW",
  861. "%.80wq",
  862. pwzIn
  863. ));
  864. PProtAssert(pwzIn != NULL);
  865. // only need to search the base portion
  866. while (*pwzIn && *pwzIn != L'?')
  867. {
  868. if (*pwzIn >= 0x80)
  869. {
  870. DEBUG_LEAVE(TRUE);
  871. return TRUE;
  872. }
  873. pwzIn++;
  874. }
  875. DEBUG_LEAVE(FALSE);
  876. return FALSE;
  877. }
  878. BOOL
  879. ConvertUnicodeUrl(
  880. LPCWSTR pwzFrom,
  881. LPSTR pszTo,
  882. INT cchTo,
  883. DWORD dwCodePage,
  884. BOOL fUTF8Enabled,
  885. BOOL* pfUTF8Required
  886. )
  887. {
  888. DEBUG_ENTER((DBG_APP,
  889. Bool,
  890. "ConvertUnicodeUrl",
  891. "%.80wq, %.80q, %#x, %#x, %B",
  892. pwzFrom, pszTo, cchTo, dwCodePage, fUTF8Enabled
  893. ));
  894. BOOL fSuccess = FALSE;
  895. //
  896. // In multibyte string, if we have any bytes(in the base url portion)
  897. // over 0x80, we will have to convert the base portion to utf-8
  898. // (leave the query portion as multi-byte)
  899. //
  900. // S_FALSE from the conversion above indicates that some wide chars couldn't be
  901. // mapped to the destination code page
  902. *pfUTF8Required = FALSE;
  903. if( fUTF8Enabled && StringContainsHighAnsiW(pwzFrom))
  904. {
  905. *pfUTF8Required = TRUE;
  906. // utf-8 conversion
  907. //
  908. // do we have a query portion? (by searching the UNICODE URL string
  909. // for '?') also we can get the UNICODED string's BasePortion
  910. // URL Length
  911. //
  912. DWORD dwBaseUrlLen = 0; // BasePortion length (in UNICODE)
  913. LPWSTR pBase = (LPWSTR) pwzFrom;
  914. while( *pBase && *pBase != '?')
  915. {
  916. pBase++;
  917. dwBaseUrlLen++;
  918. }
  919. DWORD dwMBQryUrlLen = 0; // QueryPortion length (in Multibyte)
  920. LPSTR pszQry = NULL; // multibyte query string
  921. if (*pBase)
  922. {
  923. //
  924. // we have a query portion, need to get length of
  925. // multi-byte query portion
  926. // In this case, we don't care whether or not pszTo is able to convert
  927. // everything
  928. W2A(pBase, pszTo, cchTo, dwCodePage);
  929. dwMBQryUrlLen = strlen(pszTo);
  930. pszQry = pszTo;
  931. }
  932. //
  933. // we are converting the base portion of UNICODE URL to UTF-8
  934. // count UTF-8 string length for base url
  935. //
  936. DWORD dwUTF8Len = CountUnicodeToUtf8(pwzFrom, dwBaseUrlLen, TRUE);
  937. //
  938. // allocate buffer for whole converted string
  939. // Buffer size = UTF8_BaseURL_Len + MultiByte_Query_Len + '\0'
  940. //
  941. DWORD dwUTFBufferSize = dwUTF8Len + dwMBQryUrlLen + 1;
  942. //
  943. // the size can not exceed incoming buffer size ccTo
  944. //
  945. if( dwUTFBufferSize > (DWORD)(cchTo + 1) )
  946. {
  947. //
  948. // fallback to IE4 behavior - sending multi-byte string
  949. //
  950. goto cleanup;
  951. }
  952. char* pszUTF8 = new char[dwUTFBufferSize];
  953. if( !pszUTF8 )
  954. {
  955. //
  956. // if we failed to allocate, we automatically
  957. // fallback to IE4 behavior - sending multi-byte string
  958. //
  959. goto cleanup;
  960. }
  961. memset(pszUTF8, 0, dwUTFBufferSize);
  962. // Coverting UNICODE->UTF8
  963. DWORD dwError;
  964. dwError = ConvertUnicodeToUtf8( pwzFrom,
  965. dwBaseUrlLen,
  966. (LPBYTE)pszUTF8,
  967. dwUTF8Len,
  968. TRUE );
  969. if( dwError != ERROR_SUCCESS )
  970. {
  971. //
  972. // if we failed, delete temp string and fallback to IE4
  973. // behavior - sending multi-byte string
  974. //
  975. delete [] pszUTF8;
  976. goto cleanup;
  977. }
  978. //
  979. // copy over the Multi-byte query string to final buffer
  980. //
  981. if( pszQry )
  982. {
  983. LPSTR pszURL = pszUTF8 + dwUTF8Len;
  984. while( dwMBQryUrlLen-- )
  985. {
  986. *pszURL = *pszQry;
  987. pszURL++;
  988. pszQry++;
  989. }
  990. }
  991. //
  992. // we are done, copy the content from temp buffer to
  993. // szTo
  994. //
  995. StrCpyN(pszTo, pszUTF8, dwUTFBufferSize);
  996. // delete temp utf8 buffer
  997. delete [] pszUTF8;
  998. fSuccess = TRUE;
  999. }
  1000. cleanup:
  1001. if (!fSuccess)
  1002. {
  1003. // Unicode->Multibyte
  1004. // IE4 behaviour. Shoot.
  1005. W2A(pwzFrom, pszTo, cchTo, dwCodePage);
  1006. }
  1007. DEBUG_LEAVE(TRUE);
  1008. return TRUE;
  1009. }