Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

611 lines
10 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name :
  4. lcmgr.cpp
  5. Abstract:
  6. Link checker manager class implementation. This class provides the
  7. interfaces for creating and customizing the worker thread (link
  8. checking thread).
  9. NOTE: You should only have a aingle instance of CLinkCheckerMgr.
  10. Author:
  11. Michael Cheuk (mcheuk)
  12. Project:
  13. Link Checker
  14. Revision History:
  15. --*/
  16. #include "stdafx.h"
  17. #include "lcmgr.h"
  18. #include "enumdir.h"
  19. #include "proglog.h"
  20. #ifdef _DEBUG
  21. #define new DEBUG_NEW
  22. #undef THIS_FILE
  23. static char THIS_FILE[] = __FILE__;
  24. #endif
  25. // Constants (TODO: put this in resource)
  26. const CString strParsing_c(_T("Parsing"));
  27. const CString strLoading_c(_T("Loading"));
  28. //------------------------------------------------------------------
  29. // Global fucntion for retrieve the link checker manager
  30. //
  31. // Global link checker manager pointer
  32. CLinkCheckerMgr* g_pLinkCheckerMgr = NULL;
  33. CLinkCheckerMgr&
  34. GetLinkCheckerMgr(
  35. )
  36. /*++
  37. Routine Description:
  38. Global fucntion for retrieve the link checker manager
  39. Arguments:
  40. N/A
  41. Return Value:
  42. CLinkCheckMgr& - reference to the link checker manager
  43. --*/
  44. {
  45. ASSERT(g_pLinkCheckerMgr);
  46. return *g_pLinkCheckerMgr;
  47. }
  48. //------------------------------------------------------------------
  49. // CLinkCheckerMgr implementation
  50. //
  51. CLinkCheckerMgr::CLinkCheckerMgr(
  52. )
  53. /*++
  54. Routine Description:
  55. Constructor.
  56. Arguments:
  57. N/A
  58. Return Value:
  59. N/A
  60. --*/
  61. {
  62. ASSERT(g_pLinkCheckerMgr == NULL);
  63. g_pLinkCheckerMgr = this;
  64. m_fWininetLoaded = FALSE;
  65. m_fInitialized = FALSE;
  66. m_lWorkerThreadRunning = -1;
  67. m_lTerminatingThread = -1;
  68. m_hWorkerThread = NULL;
  69. m_pProgressLog = NULL;
  70. } // CLinkCheckerMgr::CLinkCheckerMgr
  71. CLinkCheckerMgr::~CLinkCheckerMgr(
  72. )
  73. /*++
  74. Routine Description:
  75. Destructor.
  76. Arguments:
  77. N/A
  78. Return Value:
  79. N/A
  80. --*/
  81. {
  82. // The worker must be terminated
  83. ASSERT(!IsWorkerThreadRunning());
  84. // Nuke the global pointer
  85. ASSERT(g_pLinkCheckerMgr);
  86. g_pLinkCheckerMgr = NULL;
  87. } // CLinkCheckerMgr::~CLinkCheckerMgr
  88. BOOL
  89. CLinkCheckerMgr::LoadWininet(
  90. )
  91. /*++
  92. Routine Description:
  93. Load wininet.dll. This must be called before initialize()
  94. Arguments:
  95. N/A
  96. Return Value:
  97. BOOL - TRUE if success. FALSE otherwise.
  98. --*/
  99. {
  100. // Make sure LoadWininet() only call once
  101. ASSERT(!m_fWininetLoaded);
  102. if(m_fWininetLoaded)
  103. {
  104. return FALSE;
  105. }
  106. m_fWininetLoaded = TRUE;
  107. return m_Wininet.Load();
  108. } // CLinkCheckerMgr::LoadWininet
  109. BOOL
  110. CLinkCheckerMgr::Initialize(
  111. CProgressLog* pProgressLog
  112. )
  113. /*++
  114. Routine Description:
  115. Initialize the link checker manager. The link checker manager
  116. will initialize the link loader, link parser, ...etc
  117. Arguments:
  118. pProgressLog - pointer to an instance of progress logging object
  119. Return Value:
  120. BOOL - TRUE if success. FALSE otherwise.
  121. --*/
  122. {
  123. // Make sure Initialize() only call once
  124. ASSERT(!m_fInitialized);
  125. if(m_fInitialized)
  126. {
  127. return FALSE;
  128. }
  129. m_fInitialized = TRUE;
  130. // pProgressLog is ok to be NULL
  131. m_pProgressLog = pProgressLog;
  132. // Create the link loader
  133. if(!m_Loader.Create(_T(""), _T("")))
  134. {
  135. return FALSE;
  136. }
  137. // Create the error log
  138. if(!m_ErrLog.Create())
  139. {
  140. return FALSE;
  141. }
  142. // Set the local host name in the paser
  143. m_Parser.SetLocalHostName(GetUserOptions().GetHostName());
  144. return TRUE;
  145. } // CLinkCheckerMgr::Initialize
  146. BOOL
  147. CLinkCheckerMgr::BeginWorkerThread(
  148. )
  149. /*++
  150. Routine Description:
  151. Begin the link checking thread
  152. Arguments:
  153. N/A
  154. Return Value:
  155. BOOL - TRUE if success. FALSE otherwise.
  156. --*/
  157. {
  158. // Start 1 thread only
  159. if(IsWorkerThreadRunning())
  160. {
  161. return FALSE;
  162. }
  163. CWinThread* pWorkerThread = ::AfxBeginThread((AFX_THREADPROC)WorkerThreadForwarder, NULL);
  164. if(pWorkerThread == NULL)
  165. {
  166. return FALSE;
  167. }
  168. else
  169. {
  170. m_hWorkerThread = pWorkerThread->m_hThread;
  171. return TRUE;
  172. }
  173. } // CLinkCheckerMgr::BeginWorkerThread
  174. void
  175. CLinkCheckerMgr::SignalWorkerThreadToTerminate(
  176. )
  177. /*++
  178. Routine Description:
  179. Signal the worker thread to terminate
  180. Arguments:
  181. N/A
  182. Return Value:
  183. N/A
  184. --*/
  185. {
  186. if(IsWorkerThreadRunning() && !IsThreadTerminating())
  187. {
  188. InterlockedIncrement(&m_lTerminatingThread);
  189. }
  190. } // CLinkCheckerMgr::SignalWorkerThreadToTerminate
  191. UINT
  192. CLinkCheckerMgr::WorkerThreadForwarder(
  193. LPVOID pParam
  194. )
  195. /*++
  196. Routine Description:
  197. Worker thread entry point
  198. Arguments:
  199. pParam - unused
  200. Return Value:
  201. UINT - unsed
  202. --*/
  203. {
  204. // Now IsWorkerThreadRunnig() return TRUE
  205. InterlockedIncrement(&GetLinkCheckerMgr().m_lWorkerThreadRunning);
  206. UINT nRet = GetLinkCheckerMgr().WorkerThread(pParam);
  207. // Now IsWorkerThreadRunnig() return FLASE
  208. InterlockedDecrement(&GetLinkCheckerMgr().m_lWorkerThreadRunning);
  209. // Notify the progress log, the worker thread is completed
  210. if(GetLinkCheckerMgr().m_pProgressLog)
  211. {
  212. // Possible deadlock. Use message instead ?
  213. GetLinkCheckerMgr().m_pProgressLog->WorkerThreadComplete();
  214. }
  215. return nRet;
  216. } // CLinkCheckerMgr::WorkerThreadForwarder
  217. UINT
  218. CLinkCheckerMgr::WorkerThread(
  219. LPVOID pParam
  220. )
  221. /*++
  222. Routine Description:
  223. Actual worker thread function
  224. Arguments:
  225. pParam - unused
  226. Return Value:
  227. UINT - unsed
  228. --*/
  229. {
  230. UNUSED_ALWAYS(pParam);
  231. // Write the error log header
  232. m_ErrLog.WriteHeader();
  233. // Go thru all the combination of browser & language
  234. POSITION PosBrowser;
  235. CBrowserInfo BrowserInfo;
  236. POSITION PosLanguage;
  237. CLanguageInfo LanguageInfo;
  238. PosBrowser = GetUserOptions().GetAvailableBrowsers().GetHeadSelectedPosition();
  239. do
  240. {
  241. // Get the next browser
  242. BrowserInfo = GetUserOptions().GetAvailableBrowsers().GetNextSelected(PosBrowser);
  243. m_ErrLog.SetBrowser(BrowserInfo.GetName());
  244. // Reset language position
  245. PosLanguage = GetUserOptions().GetAvailableLanguages().GetHeadSelectedPosition();
  246. do
  247. {
  248. // Get the language
  249. LanguageInfo = GetUserOptions().GetAvailableLanguages().GetNextSelected(PosLanguage);
  250. m_ErrLog.SetLanguage(LanguageInfo.GetName());
  251. // Change the loader properties
  252. CString strAdditionalHeaders;
  253. strAdditionalHeaders.Format(_T("Accept: */*\r\nAccept-Language: %s"), LanguageInfo.GetAcceptName());
  254. if(!m_Loader.ChangeProperties(BrowserInfo.GetUserAgent(), strAdditionalHeaders))
  255. {
  256. return 1;
  257. }
  258. // Remove everything in the look up table
  259. m_Lookup.RemoveAll();
  260. // *EITHER* We are checking for virtual directories
  261. const CVirtualDirInfoList& DirInfoList = GetUserOptions().GetDirectoryList();
  262. int iSize = DirInfoList.GetCount();
  263. if(DirInfoList.GetCount() > 0)
  264. {
  265. POSITION Pos = DirInfoList.GetHeadPosition();
  266. // For each user input directory
  267. for(int i=0; !IsThreadTerminating() && i<iSize; i++)
  268. {
  269. CEnumerateDirTree Eumerator(DirInfoList.GetNext(Pos));
  270. CString strURL;
  271. // For each file in this directory tree, create an empty
  272. // stack with one file in
  273. while(!IsThreadTerminating() && Eumerator.Next(strURL))
  274. {
  275. CheckThisURL(strURL);
  276. }
  277. }
  278. }
  279. // *OR* We are checking for URL path
  280. const CStringList& URLList = GetUserOptions().GetURLList();
  281. iSize = URLList.GetCount();
  282. if(iSize > 0)
  283. {
  284. POSITION Pos = URLList.GetHeadPosition();
  285. for(int i=0; !IsThreadTerminating() && i<iSize; i++)
  286. {
  287. CheckThisURL(URLList.GetNext(Pos));
  288. }
  289. }
  290. }while(!IsThreadTerminating() && PosLanguage != NULL);
  291. }while(!IsThreadTerminating() && PosBrowser != NULL);
  292. // Write the error log footer
  293. m_ErrLog.WriteFooter();
  294. return 1;
  295. } // CLinkCheckerMgr::WorkerThread
  296. void
  297. CLinkCheckerMgr::CheckThisURL(
  298. LPCTSTR lpszURL
  299. )
  300. /*++
  301. Routine Description:
  302. Check this URL. This is the core of link checking.
  303. Arguments:
  304. lpszURL - URL to check
  305. Return Value:
  306. N/A
  307. --*/
  308. {
  309. // Create a link object for the input
  310. CLink Link(lpszURL, _T("Link Checker"), lpszURL, TRUE);
  311. // If not found in the lookup table
  312. if(!m_Lookup.Get(Link.GetURL(), Link))
  313. {
  314. if(m_pProgressLog)
  315. {
  316. CString strLog;
  317. strLog.Format(_T("Loading %s"), Link.GetURL());
  318. m_pProgressLog->Log(strLog);
  319. TRACE(_T("%s\n"), strLog);
  320. }
  321. // Load it ( with ReadFile )
  322. int iRet = m_Loader.Load(Link, TRUE);
  323. // Set the load time in the object
  324. Link.SetTime(CTime::GetCurrentTime());
  325. // Update the lookup table with this link
  326. m_Lookup.Add(Link.GetURL(), Link);
  327. }
  328. ASSERT(Link.GetState() != CLink::eUnit);
  329. // If the link is invalid, write to error log & return
  330. if(Link.GetState() == CLink::eInvalidHTTP ||
  331. Link.GetState() == CLink::eInvalidWininet)
  332. {
  333. m_ErrLog.Write(Link);
  334. return;
  335. }
  336. // If the link is not a text file, nothing
  337. // to parse
  338. if(Link.GetContentType() != CLink::eText)
  339. {
  340. return;
  341. }
  342. if(m_pProgressLog)
  343. {
  344. CString strLog;
  345. strLog.Format(_T("%s %s"), strParsing_c, Link.GetURL());
  346. m_pProgressLog->Log(strLog);
  347. TRACE(_T("%s\n"), strLog);
  348. }
  349. // Add the links in this html to the stack
  350. CLinkPtrList List;
  351. m_Parser.Parse(Link.GetData(), Link.GetURL(), List);
  352. // While the link stack is not empty
  353. while(!IsThreadTerminating() && List.GetCount() > 0)
  354. {
  355. // Pop a new link
  356. CLink* pLink = List.GetHead();
  357. List.RemoveHead();
  358. // If not found in the lookup table
  359. if(!m_Lookup.Get(pLink->GetURL(), *pLink))
  360. {
  361. if(m_pProgressLog)
  362. {
  363. CString strLog;
  364. strLog.Format(_T("%s %s"), strLoading_c, pLink->GetURL());
  365. m_pProgressLog->Log(strLog);
  366. TRACE(_T("%s\n"), strLog);
  367. }
  368. // Load it
  369. m_Loader.Load(*pLink, FALSE);
  370. // Set the load time in the object
  371. pLink->SetTime(CTime::GetCurrentTime());
  372. // Update the lookup table with this link
  373. m_Lookup.Add(pLink->GetURL(), *pLink);
  374. }
  375. // Make sure all the links were initialized
  376. ASSERT(pLink->GetState() != CLink::eUnit);
  377. // If the link is invalid, write to error log & return
  378. if(pLink->GetState() == CLink::eInvalidHTTP ||
  379. pLink->GetState() == CLink::eInvalidWininet)
  380. {
  381. m_ErrLog.Write(*pLink);
  382. }
  383. delete pLink;
  384. }
  385. } // CLinkCheckerMgr::CheckThisURL
  386. void
  387. CLinkCheckerMgr::ChangeBackSlash(
  388. LPTSTR lpsz
  389. )
  390. /*++
  391. Routine Description:
  392. Static functions for changing '\' to '/' in string
  393. Arguments:
  394. lpsz - input string pointer
  395. Return Value:
  396. N/A
  397. --*/
  398. {
  399. lpsz = _tcschr(lpsz, _TUCHAR('\\'));
  400. while(lpsz != NULL)
  401. {
  402. lpsz[0] = _TCHAR('/');
  403. lpsz = _tcschr(lpsz, _TUCHAR('\\'));
  404. }
  405. } // CLinkCheckerMgr::ChangeBackSlash
  406. void
  407. CLinkCheckerMgr::ChangeBackSlash(
  408. CString& str
  409. )
  410. /*++
  411. Routine Description:
  412. Static functions for changing '\' to '/' in string
  413. Arguments:
  414. str - input string
  415. Return Value:
  416. N/A
  417. --*/
  418. {
  419. LPTSTR lpsz = str.GetBuffer(str.GetLength());
  420. ChangeBackSlash(lpsz);
  421. str.ReleaseBuffer();
  422. } // CLinkCheckerMgr::ChangeBackSlash