Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

602 lines
16 KiB

  1. /*++
  2. Copyright (c) 1995-1997 Microsoft Corporation
  3. Module Name:
  4. poller.c
  5. Abstract:
  6. This module polls the resource list
  7. Author:
  8. John Vert (jvert) 5-Dec-1995
  9. Revision History:
  10. Sivaprasad Padisetty (sivapad) 06-18-1997 Added the COM support
  11. --*/
  12. #include "nt.h"
  13. #include "ntrtl.h"
  14. #include "nturtl.h"
  15. #include "resmonp.h"
  16. #include "stdio.h"
  17. #define RESMON_MODULE RESMON_MODULE_POLLER
  18. //
  19. // Global data defined by this module
  20. //
  21. BOOL RmpShutdown = FALSE;
  22. //
  23. // The following critical section protects both insertion of new event lists
  24. // onto the event listhead, as well as adding new events to a given event list.
  25. // This could be broken into one critical section for each purpose. The latter
  26. // critical section would be part of each event list. The former would use the
  27. // following lock.
  28. //
  29. CRITICAL_SECTION RmpEventListLock; // Lock for processing event lists
  30. //
  31. // Function prototypes local to this module
  32. //
  33. DWORD
  34. RmpComputeNextTimeout(
  35. IN PPOLL_EVENT_LIST EventList
  36. );
  37. DWORD
  38. RmpPollList(
  39. IN PPOLL_EVENT_LIST EventList
  40. );
  41. VOID
  42. RmpPollBucket(
  43. IN PMONITOR_BUCKET Bucket
  44. );
  45. DWORD
  46. RmpPollerThread(
  47. IN LPVOID Context
  48. )
  49. /*++
  50. Routine Description:
  51. Thread startup routine for the polling thread. The way this works, is that
  52. other parts of the resource monitor add events to the list of events that
  53. is being processed by this thread. When they are done, they signal this
  54. thread, which makes a copy of the new lists, and then waits for an event to
  55. happen or a timeout occurs.
  56. Arguments:
  57. Context - A pointer to the POLL_EVENT_LIST for this thread.
  58. Return Value:
  59. Win32 error code.
  60. Note:
  61. This code assumes that the EventList pointed to by Context does NOT go
  62. away while this thread is running. Further it assumes that the ResourceList
  63. pointed to by the given EventList does not go away or change.
  64. --*/
  65. {
  66. DWORD Timeout;
  67. DWORD Status;
  68. PPOLL_EVENT_LIST NewEventList = (PPOLL_EVENT_LIST)Context;
  69. POLL_EVENT_LIST waitEventList; // Event list outstanding
  70. DWORD WaitFailed = 0;
  71. //
  72. // Zero the local copy event list structure.
  73. //
  74. ZeroMemory( &waitEventList, sizeof(POLL_EVENT_LIST) );
  75. //
  76. // Don't allow system failures to generate popups.
  77. //
  78. SetErrorMode( SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX );
  79. //
  80. // Create notification event to indicate that this list
  81. // has changed.
  82. //
  83. NewEventList->ListNotify = CreateEvent(NULL,
  84. FALSE,
  85. FALSE,
  86. NULL);
  87. if (NewEventList->ListNotify == NULL) {
  88. CL_UNEXPECTED_ERROR(GetLastError());
  89. }
  90. RmpAddPollEvent(NewEventList, NewEventList->ListNotify, NULL);
  91. //
  92. // Make a copy of the NewEventList first time through.
  93. //
  94. AcquireEventListLock( NewEventList );
  95. CopyMemory( &waitEventList,
  96. NewEventList,
  97. sizeof(POLL_EVENT_LIST)
  98. );
  99. ReleaseEventListLock( NewEventList );
  100. try_again:
  101. //
  102. // Compute initial timeout.
  103. //
  104. Timeout = RmpComputeNextTimeout( NewEventList );
  105. //
  106. // There are four functions performed by this thread...
  107. //
  108. // 1. Handle timers for polling.
  109. // 2. Handle list notification changes and updates to the number of
  110. // events handled by the WaitForMultipleObjects.
  111. // 3. Handle events set by resource DLL's to deliver asynchronous
  112. // event (failure) notifications.
  113. // 4. Handle a shutdown request.
  114. //
  115. // N.B. Handles cannot go away while we are waiting... it is therefore
  116. // best to set the event for the ListNotify event so we can redo the
  117. // wait event list.
  118. //
  119. while (TRUE) {
  120. //
  121. // Wait for any of the events to be signaled.
  122. //
  123. CL_ASSERT(waitEventList.Handle[0] == NewEventList->ListNotify);
  124. Status = WaitForMultipleObjects(waitEventList.EventCount,
  125. &waitEventList.Handle[0],
  126. FALSE,
  127. Timeout);
  128. if (Status == WAIT_TIMEOUT) {
  129. //
  130. // Time period has elapsed, go poll everybody
  131. //
  132. Timeout = RmpPollList( NewEventList );
  133. WaitFailed = 0;
  134. } else {
  135. //
  136. // If the first event is signaled, which is the ListNotify event,
  137. // then the list changed or a new poll event was added.
  138. //
  139. if ( Status == WAIT_OBJECT_0 ) {
  140. if (RmpShutdown) {
  141. //
  142. // Exit the poller thread, this will notify the main thread
  143. // to clean up and shutdown.
  144. //
  145. break;
  146. }
  147. get_new_list:
  148. WaitFailed = 0;
  149. //
  150. // The list has changed or we have a new event to wait for,
  151. // recompute a new timeout and make a copy of the new event list
  152. //
  153. AcquireEventListLock( NewEventList );
  154. CopyMemory( &waitEventList,
  155. NewEventList,
  156. sizeof(POLL_EVENT_LIST)
  157. );
  158. ReleaseEventListLock( NewEventList );
  159. Timeout = RmpComputeNextTimeout( NewEventList );
  160. } else if ( Status == WAIT_FAILED ) {
  161. //
  162. // We've probably signaled an event, and closed the handle
  163. // already. Wait on the Notify Event for just a little bit.
  164. // If that event fires, then copy a new event list. But only
  165. // try this 100 times.
  166. //
  167. if ( ++WaitFailed < 100 ) {
  168. Status = WaitForSingleObject( waitEventList.ListNotify,
  169. 100 );
  170. if ( RmpShutdown ) {
  171. break;
  172. }
  173. if ( Status == WAIT_TIMEOUT ) {
  174. continue;
  175. } else {
  176. goto get_new_list;
  177. }
  178. } else {
  179. Status = GetLastError();
  180. break;
  181. }
  182. } else {
  183. //
  184. // One of the resource events was signaled!
  185. //
  186. WaitFailed = 0;
  187. CL_ASSERT( WAIT_OBJECT_0 == 0 );
  188. RmpResourceEventSignaled( &waitEventList,
  189. Status );
  190. Timeout = RmpComputeNextTimeout( NewEventList );
  191. }
  192. }
  193. }
  194. ClRtlLogPrint( LOG_NOISE,
  195. "[RM] PollerThread stopping. Shutdown = %1!u!, Status = %2!u!, "
  196. "WaitFailed = %3!u!, NotifyEvent address = %4!u!.\n",
  197. RmpShutdown,
  198. Status,
  199. WaitFailed,
  200. waitEventList.ListNotify);
  201. #if 1 // RodGa - this is for debug only!
  202. WaitFailed = 0;
  203. if ( Status == ERROR_INVALID_HANDLE ) {
  204. DWORD i;
  205. for ( i = 0; i < waitEventList.EventCount; i++ ) {
  206. ClRtlLogPrint( LOG_NOISE, "[RM] Event address %1!u!, index %2!u!.\n",
  207. waitEventList.Handle[i], i);
  208. Status = WaitForSingleObject( waitEventList.Handle[i], 10 );
  209. if ( (Status == WAIT_FAILED) &&
  210. (GetLastError() == ERROR_INVALID_HANDLE) )
  211. {
  212. ClRtlLogPrint( LOG_UNUSUAL, "[RM] Event address %1!u!, index %2!u! is bad. Removing...\n",
  213. waitEventList.Handle[i], i);
  214. RmpRemovePollEvent( waitEventList.Handle[i] );
  215. //
  216. // Copy new list... and try again.
  217. //
  218. AcquireEventListLock( NewEventList );
  219. CopyMemory( &waitEventList,
  220. NewEventList,
  221. sizeof(POLL_EVENT_LIST)
  222. );
  223. ReleaseEventListLock( NewEventList );
  224. goto try_again;
  225. }
  226. }
  227. }
  228. #endif
  229. CL_ASSERT( NewEventList->ListNotify );
  230. CL_ASSERT( waitEventList.ListNotify == NewEventList->ListNotify );
  231. CloseHandle( NewEventList->ListNotify );
  232. return(0);
  233. } // RmpPollerThread
  234. DWORD
  235. RmpComputeNextTimeout(
  236. IN PPOLL_EVENT_LIST EventList
  237. )
  238. /*++
  239. Routine Description:
  240. Searches the resource list to determine the number of milliseconds
  241. until the next poll event.
  242. Arguments:
  243. None.
  244. Return Value:
  245. 0 - A poll interval has already elapsed.
  246. INFINITE - No resources to poll
  247. number of milliseconds until the next poll event.
  248. --*/
  249. {
  250. DWORD Timeout;
  251. PMONITOR_BUCKET Bucket;
  252. DWORDLONG NextDueTime;
  253. DWORDLONG CurrentTime;
  254. DWORDLONG WaitTime;
  255. AcquireEventListLock( EventList );
  256. if (!IsListEmpty(&EventList->BucketListHead)) {
  257. Bucket = CONTAINING_RECORD(EventList->BucketListHead.Flink,
  258. MONITOR_BUCKET,
  259. BucketList);
  260. NextDueTime = Bucket->DueTime;
  261. Bucket = CONTAINING_RECORD(Bucket->BucketList.Flink,
  262. MONITOR_BUCKET,
  263. BucketList);
  264. while (&Bucket->BucketList != &EventList->BucketListHead) {
  265. if (Bucket->DueTime < NextDueTime) {
  266. NextDueTime = Bucket->DueTime;
  267. }
  268. Bucket = CONTAINING_RECORD(Bucket->BucketList.Flink,
  269. MONITOR_BUCKET,
  270. BucketList);
  271. }
  272. //
  273. // Compute the number of milliseconds from the current time
  274. // until the next due time. This is our timeout value.
  275. //
  276. GetSystemTimeAsFileTime((LPFILETIME)&CurrentTime);
  277. if (NextDueTime > CurrentTime) {
  278. WaitTime = NextDueTime - CurrentTime;
  279. CL_ASSERT(WaitTime < (DWORDLONG)0xffffffff * 10000); // check for excessive value
  280. Timeout = (ULONG)(WaitTime / 10000);
  281. } else {
  282. //
  283. // The next poll time has already passed, timeout immediately
  284. // and go poll the list.
  285. //
  286. Timeout = 0;
  287. }
  288. } else {
  289. //
  290. // Nothing to poll, so wait on the ListNotify event forever.
  291. //
  292. Timeout = INFINITE;
  293. }
  294. ReleaseEventListLock( EventList );
  295. return(Timeout);
  296. } // RmpComputeNextTimeout
  297. DWORD
  298. RmpPollList(
  299. IN PPOLL_EVENT_LIST EventList
  300. )
  301. /*++
  302. Routine Description:
  303. Polls all resources in the resource list whose timeouts have
  304. expired. Recomputes the next timeout interval for each polled
  305. resource.
  306. Arguments:
  307. None.
  308. Return Value:
  309. The number of milliseconds until the next poll event.
  310. --*/
  311. {
  312. ULONG i;
  313. DWORD Timeout = INFINITE;
  314. DWORDLONG NextDueTime;
  315. DWORDLONG CurrentTime;
  316. DWORDLONG WaitTime;
  317. PMONITOR_BUCKET Bucket;
  318. AcquireEventListLock( EventList );
  319. if (!IsListEmpty(&EventList->BucketListHead)) {
  320. Bucket = CONTAINING_RECORD(EventList->BucketListHead.Flink,
  321. MONITOR_BUCKET,
  322. BucketList);
  323. NextDueTime = Bucket->DueTime;
  324. while (&Bucket->BucketList != &EventList->BucketListHead) {
  325. GetSystemTimeAsFileTime((LPFILETIME)&CurrentTime);
  326. if (CurrentTime >= Bucket->DueTime) {
  327. //
  328. // This poll interval has expired. Compute the
  329. // next poll interval and poll this bucket now.
  330. //
  331. CL_ASSERT( Bucket->Period != 0 );
  332. Bucket->DueTime = CurrentTime + Bucket->Period;
  333. RmpPollBucket(Bucket);
  334. }
  335. //
  336. // If this bucket is the closest upcoming event,
  337. // update NextDueTime.
  338. //
  339. if (Bucket->DueTime < NextDueTime) {
  340. NextDueTime = Bucket->DueTime;
  341. }
  342. Bucket = CONTAINING_RECORD(Bucket->BucketList.Flink,
  343. MONITOR_BUCKET,
  344. BucketList);
  345. }
  346. //
  347. // Compute new timeout value in milliseconds
  348. //
  349. GetSystemTimeAsFileTime((LPFILETIME)&CurrentTime);
  350. if (CurrentTime > NextDueTime) {
  351. //
  352. // The next timeout has already expired
  353. //
  354. WaitTime = Timeout = 0;
  355. } else {
  356. WaitTime = NextDueTime - CurrentTime;
  357. CL_ASSERT(WaitTime < (DWORDLONG)0xffffffff * 10000); // check for excessive value
  358. Timeout = (ULONG)(WaitTime / 10000);
  359. }
  360. }
  361. ReleaseEventListLock( EventList );
  362. return(Timeout);
  363. } // RmpPollList
  364. VOID
  365. RmpPollBucket(
  366. IN PMONITOR_BUCKET Bucket
  367. )
  368. /*++
  369. Routine Description:
  370. Polls all the resources in a given bucket. Updates their state and notifies
  371. cluster manager as appropriate.
  372. Arguments:
  373. Bucket - Supplies the bucket containing the list of resources to be polled.
  374. Return Value:
  375. None.
  376. --*/
  377. {
  378. PLIST_ENTRY CurrentEntry;
  379. PRESOURCE Resource;
  380. BOOL Success = TRUE;
  381. CurrentEntry = Bucket->ResourceList.Flink;
  382. while (CurrentEntry != &Bucket->ResourceList) {
  383. Resource = CONTAINING_RECORD(CurrentEntry,RESOURCE,ListEntry);
  384. //
  385. // The EventList Lock protects concurrent calls to individual
  386. // resources. The EventList Lock was taken out in RmpPollList.
  387. // If we increase the granularity of locking, and lock the resource
  388. // then we'd add a lock here.
  389. //
  390. if (Resource->State == ClusterResourceOnline) {
  391. //
  392. // A resource that is online alternates between LooksAlive
  393. // and IsAlive polling by doing an IsAlive poll instead of
  394. // a LooksAlive poll every IsAliveCount iterations.
  395. //
  396. Resource->IsAliveCount += 1;
  397. CL_ASSERT( Resource->IsAliveRollover != 0 );
  398. if (Resource->IsAliveCount == Resource->IsAliveRollover) {
  399. //
  400. // Poll the IsAlive entrypoint.
  401. //
  402. RmpSetMonitorState(RmonIsAlivePoll, Resource);
  403. #ifdef COMRES
  404. Success = RESMON_ISALIVE (Resource) ;
  405. #else
  406. Success = (Resource->IsAlive)(Resource->Id);
  407. #endif
  408. RmpSetMonitorState(RmonIdle, NULL);
  409. //
  410. // If this was successful, then we will perform the LooksAlive
  411. // test next time. Otherwise, we do the IsAlive check again.
  412. //
  413. if (Success) {
  414. Resource->IsAliveCount = 0;
  415. } else {
  416. --Resource->IsAliveCount;
  417. }
  418. } else {
  419. //
  420. // Poll the LooksAlive entrypoint.
  421. //
  422. if ( Resource->EventHandle == NULL ) {
  423. RmpSetMonitorState(RmonLooksAlivePoll,Resource);
  424. #ifdef COMRES
  425. Success = RESMON_LOOKSALIVE (Resource) ;
  426. #else
  427. Success = (Resource->LooksAlive)(Resource->Id);
  428. #endif
  429. RmpSetMonitorState(RmonIdle, NULL);
  430. }
  431. if ( !Success ) {
  432. RmpSetMonitorState(RmonIsAlivePoll, Resource);
  433. #ifdef COMRES
  434. Success = RESMON_ISALIVE (Resource) ;
  435. #else
  436. Success = (Resource->IsAlive)(Resource->Id);
  437. #endif
  438. RmpSetMonitorState(RmonIdle, NULL);
  439. }
  440. }
  441. if (!Success) {
  442. //
  443. // The resource has failed. Mark it as Failed and notify
  444. // the cluster manager.
  445. //
  446. Resource->State = ClusterResourceFailed;
  447. RmpPostNotify(Resource, NotifyResourceStateChange);
  448. }
  449. }
  450. CurrentEntry = CurrentEntry->Flink;
  451. }
  452. } // RmpPollBucket
  453. VOID
  454. RmpSignalPoller(
  455. IN PPOLL_EVENT_LIST EventList
  456. )
  457. /*++
  458. Routine Description:
  459. Interface to notify the poller thread that the resource list has
  460. been changed or a new event has been added to the poll event list.
  461. The poller thread should get a new event list and recompute its timeouts.
  462. Arguments:
  463. EventList - the event list that is to be notified.
  464. Return Value:
  465. None.
  466. --*/
  467. {
  468. BOOL Success;
  469. if (EventList->ListNotify != NULL) {
  470. Success = SetEvent(EventList->ListNotify);
  471. CL_ASSERT(Success);
  472. }
  473. } // RmpSignalPoller