Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

527 lines
12 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name :
  4. linkload.cpp
  5. Abstract:
  6. Link loader class definitions. It uses wininet API
  7. to load the web page from the internet.
  8. Author:
  9. Michael Cheuk (mcheuk) 22-Nov-1996
  10. Project:
  11. Link Checker
  12. Revision History:
  13. --*/
  14. #include "stdafx.h"
  15. #include "linkload.h"
  16. #include "link.h"
  17. #ifdef _DEBUG
  18. #define new DEBUG_NEW
  19. #undef THIS_FILE
  20. static char THIS_FILE[] = __FILE__;
  21. #endif
  22. // Constants
  23. const int iMaxRedirectCount_c = 3;
  24. const UINT nReadFileBufferSize_c = 4096;
  25. const UINT nQueryResultBufferSize_c = 1024;
  26. BOOL
  27. CLinkLoader::Create(
  28. const CString& strUserAgent,
  29. const CString& strAdditonalHeaders
  30. )
  31. /*++
  32. Routine Description:
  33. One time link loader create funtion
  34. Arguments:
  35. strUserAgent - HTTP user agent name
  36. strAdditonalHeaders - addtional HTTP headers
  37. Return Value:
  38. BOOL - TRUE if success. FALSE otherwise.
  39. --*/
  40. {
  41. // Make sure wininet.dll is loaded
  42. ASSERT(CWininet::IsLoaded());
  43. if(!CWininet::IsLoaded())
  44. {
  45. return FALSE;
  46. }
  47. // Save the additional header
  48. m_strAdditionalHeaders = strAdditonalHeaders;
  49. // Open an internet session
  50. m_hInternetSession = CWininet::InternetOpenA(
  51. strUserAgent,
  52. PRE_CONFIG_INTERNET_ACCESS,
  53. NULL,
  54. INTERNET_INVALID_PORT_NUMBER,
  55. 0);
  56. #ifdef _DEBUG
  57. if(!m_hInternetSession)
  58. {
  59. TRACE(_T("CLinkLoader::Create() - InternetOpen() failed. GetLastError() = %d\n"),
  60. GetLastError());
  61. }
  62. #endif
  63. return (m_hInternetSession != NULL);
  64. } // CLinkLoader::Create
  65. BOOL
  66. CLinkLoader::ChangeProperties(
  67. const CString& strUserAgent,
  68. const CString& strAdditionalHeaders
  69. )
  70. /*++
  71. Routine Description:
  72. Change the loader properties
  73. Arguments:
  74. strUserAgent - HTTP user agent name
  75. strAdditonalHeaders - addtional HTTP headers
  76. Return Value:
  77. BOOL - TRUE if success. FALSE otherwise.
  78. --*/
  79. {
  80. if(m_hInternetSession)
  81. {
  82. // Close the previous internet session and
  83. // call Create() again
  84. VERIFY(CWininet::InternetCloseHandle(m_hInternetSession));
  85. return Create(strUserAgent, strAdditionalHeaders);
  86. }
  87. return FALSE;
  88. } // CLinkLoader::ChangeProperties
  89. BOOL
  90. CLinkLoader::Load(
  91. CLink& link,
  92. BOOL fReadFile
  93. )
  94. /*++
  95. Routine Description:
  96. Load a web link
  97. Arguments:
  98. link - reference to the result link object
  99. fReadFile - read the file and save it in the link object
  100. Return Value:
  101. BOOL - TRUE if success. FALSE otherwise.
  102. --*/
  103. {
  104. // Make sure we have a session avaiable
  105. ASSERT(m_hInternetSession);
  106. if(!m_hInternetSession)
  107. {
  108. return FALSE;
  109. }
  110. // Crack the URL
  111. TCHAR szHostName[INTERNET_MAX_HOST_NAME_LENGTH];
  112. TCHAR szUrlPath[INTERNET_MAX_URL_LENGTH];
  113. URL_COMPONENTS urlcomp;
  114. memset(&urlcomp, 0, sizeof(urlcomp));
  115. urlcomp.dwStructSize = sizeof(urlcomp);
  116. urlcomp.lpszHostName = (LPTSTR) &szHostName;
  117. urlcomp.dwHostNameLength = INTERNET_MAX_HOST_NAME_LENGTH;
  118. urlcomp.lpszUrlPath = (LPTSTR) &szUrlPath;
  119. urlcomp.dwUrlPathLength = INTERNET_MAX_URL_LENGTH;
  120. if(!CWininet::InternetCrackUrlA(link.GetURL(), link.GetURL().GetLength(), NULL, &urlcomp))
  121. {
  122. TRACE(_T("CLinkLoader::Load() - InternetCrackUrl() failed. GetLastError() = %d\n"),
  123. GetLastError());
  124. return FALSE;
  125. }
  126. // Make sure we have a valid (non zero length) URL path
  127. if(_tcslen(szUrlPath) == 0)
  128. {
  129. _tprintf(szUrlPath, "%s", _TCHAR('/'));
  130. }
  131. // Call the appropriate load funtion for different URL schemes
  132. if(urlcomp.nScheme == INTERNET_SCHEME_HTTP)
  133. {
  134. return LoadHTTP(link, fReadFile, szHostName, szUrlPath);
  135. }
  136. else if(urlcomp.nScheme >= INTERNET_SCHEME_FTP &&
  137. urlcomp.nScheme <= INTERNET_SCHEME_HTTPS)
  138. {
  139. return LoadURL(link);
  140. }
  141. else
  142. {
  143. TRACE(_T("CLinkLoader::Load() - unsupport URL scheme(%d)\n"), urlcomp.nScheme);
  144. link.SetState(CLink::eUnsupport);
  145. return FALSE;
  146. }
  147. } // CLinkLoader::Load
  148. BOOL
  149. CLinkLoader::LoadURL(
  150. CLink& link
  151. )
  152. /*++
  153. Routine Description:
  154. Load a URL (non-HTTP) link
  155. Arguments:
  156. link - reference to the result link object
  157. Return Value:
  158. BOOL - TRUE if success. FALSE otherwise.
  159. --*/
  160. {
  161. // Use InternetOpenUrl for all URL scheme except HTTP
  162. CAutoInternetHandle hOpenURL;
  163. hOpenURL = CWininet::InternetOpenUrlA(
  164. m_hInternetSession,
  165. link.GetURL(),
  166. NULL,
  167. 0,
  168. INTERNET_FLAG_DONT_CACHE,
  169. 0);
  170. if(!hOpenURL)
  171. {
  172. TRACE(_T("CLinkLoader::LoadURL() - InternetOpenUrlA() failed."));
  173. return WininetFailed(link);
  174. }
  175. else
  176. {
  177. link.SetState(CLink::eValidURL);
  178. return TRUE;
  179. }
  180. } // CLinkLoader::LoadURL
  181. BOOL
  182. CLinkLoader::LoadHTTP(
  183. CLink& link,
  184. BOOL fReadFile,
  185. LPCTSTR lpszHostName,
  186. LPCTSTR lpszUrlPath,
  187. int iRedirectCount /* = 0 */
  188. )
  189. /*++
  190. Routine Description:
  191. Load a HTTP link
  192. Arguments:
  193. link - reference to the result link object
  194. fReadFile - read the file and save it in the link object
  195. lpszHostName - hostname
  196. lpszUrlPath - URL path
  197. iRedirectCount - Looping count. It is used to keep track the
  198. the number of redirection for current link.
  199. Return Value:
  200. BOOL - TRUE if success. FALSE otherwise.
  201. --*/
  202. {
  203. // Open an http session
  204. CAutoInternetHandle hHttpSession;
  205. hHttpSession = CWininet::InternetConnectA(
  206. m_hInternetSession, // hInternetSession
  207. lpszHostName, // lpszServerName
  208. INTERNET_INVALID_PORT_NUMBER, // nServerPort
  209. _T(""), // lpszUsername
  210. _T(""), // lpszPassword
  211. INTERNET_SERVICE_HTTP, // dwService
  212. 0, // dwFlags
  213. 0); // dwContext
  214. if(!hHttpSession)
  215. {
  216. TRACE(_T("CLinkLoader::LoadHTTP() - InternetConnect() failed."));
  217. return WininetFailed(link);
  218. }
  219. // Open an http request
  220. CAutoInternetHandle hHttpRequest;
  221. hHttpRequest = CWininet::HttpOpenRequestA(
  222. hHttpSession, // hHttpSession
  223. _T("GET"), // lpszVerb
  224. lpszUrlPath, // lpszObjectName
  225. HTTP_VERSION, // lpszVersion
  226. link.GetBase(), // lpszReferer
  227. NULL, // lpszAcceptTypes
  228. INTERNET_FLAG_NO_AUTO_REDIRECT | INTERNET_FLAG_DONT_CACHE, // dwFlags
  229. 0); // dwContext
  230. if(!hHttpRequest)
  231. {
  232. TRACE(_T("CLinkLoader::LoadHTTP() - HttpOpenRequest() failed."));
  233. return WininetFailed(link);
  234. }
  235. // Sent the http request
  236. if(!CWininet::HttpSendRequestA(
  237. hHttpRequest, // hHttpRequest
  238. m_strAdditionalHeaders, // lpszHeaders
  239. (DWORD)-1, // dwHeadersLength
  240. 0, // lpOptional
  241. 0)) // dwOptionalLength
  242. {
  243. TRACE(_T("CLinkLoader::LoadHTTP() - HttpSendRequest() failed."));
  244. return WininetFailed(link);
  245. }
  246. TCHAR szQueryResult[nQueryResultBufferSize_c];
  247. DWORD dwQueryLength = sizeof(szQueryResult);
  248. // Check the result status code
  249. if(!CWininet::HttpQueryInfoA(
  250. hHttpRequest, // hHttpRequest
  251. HTTP_QUERY_STATUS_CODE, // dwInfoLevel
  252. szQueryResult, // lpvBuffer
  253. &dwQueryLength, // lpdwBufferLength
  254. NULL)) // lpdwIndex
  255. {
  256. TRACE(_T("CLinkLoader::LoadHTTP() - HttpQueryInfo() failed."));
  257. return WininetFailed(link);
  258. }
  259. // Check for 301 Move Permanently or 302 Move Temporarily
  260. if(_ttoi(szQueryResult) == 301 || _ttoi(szQueryResult) == 302)
  261. {
  262. // We can only redirect iMaxRedirectCount_c times
  263. if(iRedirectCount > iMaxRedirectCount_c)
  264. {
  265. return FALSE;
  266. }
  267. // Get the new location
  268. dwQueryLength = sizeof(szQueryResult);
  269. if(!CWininet::HttpQueryInfoA(
  270. hHttpRequest, // hHttpRequest
  271. HTTP_QUERY_LOCATION, // dwInfoLevel
  272. szQueryResult, // lpvBuffer
  273. &dwQueryLength, // lpdwBufferLength
  274. NULL)) // lpdwIndex
  275. {
  276. TRACE(_T("CLinkLoader::LoadHTTP() - HttpQueryInfo() failed."));
  277. return WininetFailed(link);
  278. }
  279. // We only update the URL in link object if
  280. // we are redirecting from http://hostname/xyz to http://hostname/xyz/
  281. if(link.GetURL().GetLength() + 1 == (int)dwQueryLength &&
  282. link.GetURL().GetAt(link.GetURL().GetLength() - 1) != _TCHAR('/') &&
  283. szQueryResult[dwQueryLength - 1] == _TCHAR('/'))
  284. {
  285. link.SetURL(szQueryResult);
  286. }
  287. // Crack the URL & call LoadHTTP again
  288. TCHAR szHostName[INTERNET_MAX_HOST_NAME_LENGTH];
  289. TCHAR szUrlPath[INTERNET_MAX_URL_LENGTH];
  290. // Crack the URL
  291. URL_COMPONENTS urlcomp;
  292. memset(&urlcomp, 0, sizeof(urlcomp));
  293. urlcomp.dwStructSize = sizeof(urlcomp);
  294. urlcomp.lpszHostName = (LPTSTR) &szHostName;
  295. urlcomp.dwHostNameLength = INTERNET_MAX_HOST_NAME_LENGTH;
  296. urlcomp.lpszUrlPath = (LPTSTR) &szUrlPath;
  297. urlcomp.dwUrlPathLength = INTERNET_MAX_URL_LENGTH;
  298. VERIFY(CWininet::InternetCrackUrlA(szQueryResult, dwQueryLength, NULL, &urlcomp));
  299. return LoadHTTP(link, fReadFile, szHostName, szUrlPath, ++iRedirectCount);
  300. }
  301. // Update the HTTP status code
  302. link.SetStatusCode(_ttoi(szQueryResult));
  303. // If the status code is not 2xx. it is a invalid link
  304. if(szQueryResult[0] != '2')
  305. {
  306. link.SetState(CLink::eInvalidHTTP);
  307. // Get the new location
  308. dwQueryLength = sizeof(szQueryResult);
  309. if(CWininet::HttpQueryInfoA(
  310. hHttpRequest, // hHttpRequest
  311. HTTP_QUERY_STATUS_TEXT, // dwInfoLevel
  312. szQueryResult, // lpvBuffer
  313. &dwQueryLength, // lpdwBufferLength
  314. NULL)) // lpdwIndex
  315. {
  316. link.SetStatusText(szQueryResult);
  317. }
  318. return FALSE;
  319. }
  320. // Now we have a valid http link
  321. link.SetState(CLink::eValidHTTP);
  322. // If we are not reading the file, we can return now
  323. if(!fReadFile)
  324. {
  325. return TRUE;
  326. }
  327. // Check the result content-type
  328. dwQueryLength = sizeof(szQueryResult);
  329. if(!CWininet::HttpQueryInfoA(
  330. hHttpRequest, // hHttpRequest
  331. HTTP_QUERY_CONTENT_TYPE,// dwInfoLevel
  332. szQueryResult, // lpvBuffer
  333. &dwQueryLength, // lpdwBufferLength
  334. NULL)) // lpdwIndex
  335. {
  336. TRACE(_T("CLinkLoader::LoadHTTP() - HttpQueryInfo() failed."));
  337. return WininetFailed(link);
  338. }
  339. // We only load the html text for parsing
  340. if(!_tcsstr(szQueryResult, _T("text/html")) )
  341. {
  342. return TRUE;
  343. }
  344. link.SetContentType(CLink::eText);
  345. CString strBuffer;
  346. TCHAR buf[nReadFileBufferSize_c];
  347. DWORD dwBytesRead;
  348. // Load the text html in a loop
  349. do
  350. {
  351. memset(buf, 0, sizeof(buf));
  352. if(CWininet::InternetReadFile(
  353. hHttpRequest, // hFile
  354. buf, // lpBuffer
  355. sizeof(buf), // dwNumberOfBytesToRead
  356. &dwBytesRead)) // lpNumberOfBytesRead
  357. {
  358. strBuffer += buf;
  359. }
  360. else
  361. {
  362. TRACE(_T("CLinkLoader::LoadHTTP() - InternetReadFile() failed."));
  363. return WininetFailed(link);
  364. }
  365. }
  366. while(dwBytesRead);
  367. // Set the InternetReadFile result in the link object
  368. link.SetData(strBuffer);
  369. return TRUE;
  370. } // CLinkLoader::LoadHTTP
  371. BOOL
  372. CLinkLoader::WininetFailed(
  373. CLink& link
  374. )
  375. /*++
  376. Routine Description:
  377. Wininet failed clean up subroutine
  378. Arguments:
  379. link - reference to the result link object
  380. Return Value:
  381. BOOL - Alway return TRUE
  382. --*/
  383. {
  384. link.SetState(CLink::eInvalidWininet);
  385. link.SetStatusCode(GetLastError());
  386. TRACE(_T(" GetLastError() = %d\n"), link.GetStatusCode());
  387. LPTSTR lpMsgBuf;
  388. if(FormatMessage(
  389. FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_HMODULE | FORMAT_MESSAGE_FROM_SYSTEM,
  390. CWininet::GetWininetModule(),
  391. GetLastError(),
  392. MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
  393. (LPTSTR) &lpMsgBuf,
  394. 0,
  395. NULL) > 0)
  396. {
  397. link.SetStatusText(lpMsgBuf);
  398. LocalFree(lpMsgBuf);
  399. }
  400. return FALSE;
  401. } // CLinkLoader::WininetFailed