Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

640 lines
19 KiB

  1. /*++
  2. Copyright (c) 1995-1997 Microsoft Corporation
  3. Module Name:
  4. poller.c
  5. Abstract:
  6. This module polls the resource list
  7. Author:
  8. John Vert (jvert) 5-Dec-1995
  9. Revision History:
  10. Sivaprasad Padisetty (sivapad) 06-18-1997 Added the COM support
  11. --*/
  12. #include "nt.h"
  13. #include "ntrtl.h"
  14. #include "nturtl.h"
  15. #include "resmonp.h"
  16. #include "stdio.h"
  17. #define RESMON_MODULE RESMON_MODULE_POLLER
  18. //
  19. // Global data defined by this module
  20. //
  21. BOOL RmpShutdown = FALSE;
  22. //
  23. // The following critical section protects both insertion of new event lists
  24. // onto the event listhead, as well as adding new events to a given event list.
  25. // This could be broken into one critical section for each purpose. The latter
  26. // critical section would be part of each event list. The former would use the
  27. // following lock.
  28. //
  29. CRITICAL_SECTION RmpEventListLock; // Lock for processing event lists
  30. //
  31. // Function prototypes local to this module
  32. //
  33. DWORD
  34. RmpComputeNextTimeout(
  35. IN PPOLL_EVENT_LIST EventList
  36. );
  37. DWORD
  38. RmpPollList(
  39. IN PPOLL_EVENT_LIST EventList
  40. );
  41. VOID
  42. RmpPollBucket(
  43. IN PMONITOR_BUCKET Bucket
  44. );
  45. DWORD
  46. RmpPollerThread(
  47. IN LPVOID Context
  48. )
  49. /*++
  50. Routine Description:
  51. Thread startup routine for the polling thread. The way this works, is that
  52. other parts of the resource monitor add events to the list of events that
  53. is being processed by this thread. When they are done, they signal this
  54. thread, which makes a copy of the new lists, and then waits for an event to
  55. happen or a timeout occurs.
  56. Arguments:
  57. Context - A pointer to the POLL_EVENT_LIST for this thread.
  58. Return Value:
  59. Win32 error code.
  60. Note:
  61. This code assumes that the EventList pointed to by Context does NOT go
  62. away while this thread is running. Further it assumes that the ResourceList
  63. pointed to by the given EventList does not go away or change.
  64. --*/
  65. {
  66. DWORD Timeout;
  67. DWORD Status;
  68. PPOLL_EVENT_LIST NewEventList = (PPOLL_EVENT_LIST)Context;
  69. POLL_EVENT_LIST waitEventList; // Event list outstanding
  70. DWORD WaitFailed = 0;
  71. //
  72. // Zero the local copy event list structure.
  73. //
  74. ZeroMemory( &waitEventList, sizeof(POLL_EVENT_LIST) );
  75. //
  76. // Don't allow system failures to generate popups.
  77. //
  78. SetErrorMode( SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX );
  79. //
  80. // Create notification event to indicate that this list
  81. // has changed.
  82. //
  83. NewEventList->ListNotify = CreateEvent(NULL,
  84. FALSE,
  85. FALSE,
  86. NULL);
  87. if (NewEventList->ListNotify == NULL) {
  88. CL_UNEXPECTED_ERROR(GetLastError());
  89. }
  90. RmpAddPollEvent(NewEventList, NewEventList->ListNotify, NULL);
  91. //
  92. // Create a shutdown event
  93. //
  94. NewEventList->hEventShutdown = CreateEvent( NULL, // Security attributes
  95. FALSE, // Auto reset event
  96. FALSE, // Nonsignaled initial state
  97. NULL ); // Name
  98. if ( NewEventList->hEventShutdown == NULL )
  99. {
  100. CL_UNEXPECTED_ERROR(GetLastError());
  101. }
  102. //
  103. // Add the shutdown event to the poll list
  104. //
  105. RmpAddPollEvent( NewEventList, NewEventList->hEventShutdown, NULL );
  106. //
  107. // Make a copy of the NewEventList first time through.
  108. //
  109. AcquireEventListLock( NewEventList );
  110. CopyMemory( &waitEventList,
  111. NewEventList,
  112. sizeof(POLL_EVENT_LIST)
  113. );
  114. ReleaseEventListLock( NewEventList );
  115. try_again:
  116. //
  117. // Compute initial timeout.
  118. //
  119. Timeout = RmpComputeNextTimeout( NewEventList );
  120. //
  121. // There are four functions performed by this thread...
  122. //
  123. // 1. Handle timers for polling.
  124. // 2. Handle list notification changes and updates to the number of
  125. // events handled by the WaitForMultipleObjects.
  126. // 3. Handle events set by resource DLL's to deliver asynchronous
  127. // event (failure) notifications.
  128. // 4. Handle a shutdown request.
  129. //
  130. // N.B. Handles cannot go away while we are waiting... it is therefore
  131. // best to set the event for the ListNotify event so we can redo the
  132. // wait event list.
  133. //
  134. while (TRUE) {
  135. //
  136. // Wait for any of the events to be signaled.
  137. //
  138. CL_ASSERT(waitEventList.Handle[0] == NewEventList->ListNotify);
  139. Status = WaitForMultipleObjects(waitEventList.EventCount,
  140. &waitEventList.Handle[0],
  141. FALSE,
  142. Timeout);
  143. if (Status == WAIT_TIMEOUT) {
  144. //
  145. // Time period has elapsed, go poll everybody
  146. //
  147. Timeout = RmpPollList( NewEventList );
  148. WaitFailed = 0;
  149. } else {
  150. //
  151. // If the first event is signaled, which is the ListNotify event,
  152. // then the list changed or a new poll event was added.
  153. //
  154. if ( Status == WAIT_OBJECT_0 ) {
  155. get_new_list:
  156. WaitFailed = 0;
  157. //
  158. // The list has changed or we have a new event to wait for,
  159. // recompute a new timeout and make a copy of the new event list
  160. //
  161. AcquireEventListLock( NewEventList );
  162. CopyMemory( &waitEventList,
  163. NewEventList,
  164. sizeof(POLL_EVENT_LIST)
  165. );
  166. ReleaseEventListLock( NewEventList );
  167. Timeout = RmpComputeNextTimeout( NewEventList );
  168. } else if ( Status == WAIT_OBJECT_0 + 1 ) {
  169. //
  170. // This thread has been asked to shutdown, so exit.
  171. //
  172. ClRtlLogPrint(LOG_NOISE, "[RM] RmpPollerThread: Asked to exit...\n");
  173. break;
  174. }
  175. else if ( Status == WAIT_FAILED ) {
  176. //
  177. // We've probably signaled an event, and closed the handle
  178. // already. Wait on the Notify Event for just a little bit.
  179. // If that event fires, then copy a new event list. But only
  180. // try this 100 times.
  181. //
  182. if ( ++WaitFailed < 100 ) {
  183. Status = WaitForSingleObject( waitEventList.ListNotify,
  184. 100 );
  185. if ( RmpShutdown ) {
  186. break;
  187. }
  188. if ( Status == WAIT_TIMEOUT ) {
  189. continue;
  190. } else {
  191. goto get_new_list;
  192. }
  193. } else {
  194. Status = GetLastError();
  195. break;
  196. }
  197. } else {
  198. //
  199. // One of the resource events was signaled!
  200. //
  201. WaitFailed = 0;
  202. CL_ASSERT( WAIT_OBJECT_0 == 0 );
  203. RmpResourceEventSignaled( &waitEventList,
  204. Status );
  205. Timeout = RmpComputeNextTimeout( NewEventList );
  206. }
  207. }
  208. }
  209. ClRtlLogPrint( LOG_NOISE,
  210. "[RM] PollerThread stopping. Shutdown = %1!u!, Status = %2!u!, "
  211. "WaitFailed = %3!u!, NotifyEvent address = %4!u!.\n",
  212. RmpShutdown,
  213. Status,
  214. WaitFailed,
  215. waitEventList.ListNotify);
  216. #if 1 // RodGa - this is for debug only!
  217. WaitFailed = 0;
  218. if ( Status == ERROR_INVALID_HANDLE ) {
  219. DWORD i;
  220. for ( i = 0; i < waitEventList.EventCount; i++ ) {
  221. ClRtlLogPrint( LOG_NOISE, "[RM] Event address %1!u!, index %2!u!.\n",
  222. waitEventList.Handle[i], i);
  223. Status = WaitForSingleObject( waitEventList.Handle[i], 10 );
  224. if ( (Status == WAIT_FAILED) &&
  225. (GetLastError() == ERROR_INVALID_HANDLE) )
  226. {
  227. ClRtlLogPrint( LOG_UNUSUAL, "[RM] Event address %1!u!, index %2!u! is bad. Removing...\n",
  228. waitEventList.Handle[i], i);
  229. RmpRemovePollEvent( NewEventList, waitEventList.Handle[i] );
  230. //
  231. // Copy new list... and try again.
  232. //
  233. AcquireEventListLock( NewEventList );
  234. CopyMemory( &waitEventList,
  235. NewEventList,
  236. sizeof(POLL_EVENT_LIST)
  237. );
  238. ReleaseEventListLock( NewEventList );
  239. goto try_again;
  240. }
  241. }
  242. }
  243. #endif
  244. CL_ASSERT( NewEventList->ListNotify );
  245. CL_ASSERT( waitEventList.ListNotify == NewEventList->ListNotify );
  246. CloseHandle( NewEventList->ListNotify );
  247. NewEventList->ListNotify = NULL;
  248. CL_ASSERT( NewEventList->hEventShutdown );
  249. CloseHandle( NewEventList->hEventShutdown );
  250. NewEventList->hEventShutdown = NULL;
  251. return(0);
  252. } // RmpPollerThread
  253. DWORD
  254. RmpComputeNextTimeout(
  255. IN PPOLL_EVENT_LIST EventList
  256. )
  257. /*++
  258. Routine Description:
  259. Searches the resource list to determine the number of milliseconds
  260. until the next poll event.
  261. Arguments:
  262. None.
  263. Return Value:
  264. 0 - A poll interval has already elapsed.
  265. INFINITE - No resources to poll
  266. number of milliseconds until the next poll event.
  267. --*/
  268. {
  269. DWORD Timeout;
  270. PMONITOR_BUCKET Bucket;
  271. DWORDLONG NextDueTime;
  272. DWORDLONG CurrentTime;
  273. DWORDLONG WaitTime;
  274. AcquireEventListLock( EventList );
  275. if (!IsListEmpty(&EventList->BucketListHead)) {
  276. Bucket = CONTAINING_RECORD(EventList->BucketListHead.Flink,
  277. MONITOR_BUCKET,
  278. BucketList);
  279. NextDueTime = Bucket->DueTime;
  280. Bucket = CONTAINING_RECORD(Bucket->BucketList.Flink,
  281. MONITOR_BUCKET,
  282. BucketList);
  283. while (&Bucket->BucketList != &EventList->BucketListHead) {
  284. if (Bucket->DueTime < NextDueTime) {
  285. NextDueTime = Bucket->DueTime;
  286. }
  287. Bucket = CONTAINING_RECORD(Bucket->BucketList.Flink,
  288. MONITOR_BUCKET,
  289. BucketList);
  290. }
  291. //
  292. // Compute the number of milliseconds from the current time
  293. // until the next due time. This is our timeout value.
  294. //
  295. GetSystemTimeAsFileTime((LPFILETIME)&CurrentTime);
  296. if (NextDueTime > CurrentTime) {
  297. WaitTime = NextDueTime - CurrentTime;
  298. CL_ASSERT(WaitTime < (DWORDLONG)0xffffffff * 10000); // check for excessive value
  299. Timeout = (ULONG)(WaitTime / 10000);
  300. } else {
  301. //
  302. // The next poll time has already passed, timeout immediately
  303. // and go poll the list.
  304. //
  305. Timeout = 0;
  306. }
  307. } else {
  308. //
  309. // Nothing to poll, so wait on the ListNotify event forever.
  310. //
  311. Timeout = INFINITE;
  312. }
  313. ReleaseEventListLock( EventList );
  314. return(Timeout);
  315. } // RmpComputeNextTimeout
  316. DWORD
  317. RmpPollList(
  318. IN PPOLL_EVENT_LIST EventList
  319. )
  320. /*++
  321. Routine Description:
  322. Polls all resources in the resource list whose timeouts have
  323. expired. Recomputes the next timeout interval for each polled
  324. resource.
  325. Arguments:
  326. None.
  327. Return Value:
  328. The number of milliseconds until the next poll event.
  329. --*/
  330. {
  331. ULONG i;
  332. DWORD Timeout = INFINITE;
  333. DWORDLONG NextDueTime;
  334. DWORDLONG CurrentTime;
  335. DWORDLONG WaitTime;
  336. PMONITOR_BUCKET Bucket;
  337. AcquireEventListLock( EventList );
  338. if (!IsListEmpty(&EventList->BucketListHead)) {
  339. Bucket = CONTAINING_RECORD(EventList->BucketListHead.Flink,
  340. MONITOR_BUCKET,
  341. BucketList);
  342. NextDueTime = Bucket->DueTime;
  343. while (&Bucket->BucketList != &EventList->BucketListHead) {
  344. GetSystemTimeAsFileTime((LPFILETIME)&CurrentTime);
  345. if (CurrentTime >= Bucket->DueTime) {
  346. //
  347. // This poll interval has expired. Compute the
  348. // next poll interval and poll this bucket now.
  349. //
  350. CL_ASSERT( Bucket->Period != 0 );
  351. Bucket->DueTime = CurrentTime + Bucket->Period;
  352. RmpPollBucket(Bucket);
  353. }
  354. //
  355. // If this bucket is the closest upcoming event,
  356. // update NextDueTime.
  357. //
  358. if (Bucket->DueTime < NextDueTime) {
  359. NextDueTime = Bucket->DueTime;
  360. }
  361. Bucket = CONTAINING_RECORD(Bucket->BucketList.Flink,
  362. MONITOR_BUCKET,
  363. BucketList);
  364. }
  365. //
  366. // Compute new timeout value in milliseconds
  367. //
  368. GetSystemTimeAsFileTime((LPFILETIME)&CurrentTime);
  369. if (CurrentTime > NextDueTime) {
  370. //
  371. // The next timeout has already expired
  372. //
  373. WaitTime = Timeout = 0;
  374. } else {
  375. WaitTime = NextDueTime - CurrentTime;
  376. CL_ASSERT(WaitTime < (DWORDLONG)0xffffffff * 10000); // check for excessive value
  377. Timeout = (ULONG)(WaitTime / 10000);
  378. }
  379. }
  380. ReleaseEventListLock( EventList );
  381. return(Timeout);
  382. } // RmpPollList
  383. VOID
  384. RmpPollBucket(
  385. IN PMONITOR_BUCKET Bucket
  386. )
  387. /*++
  388. Routine Description:
  389. Polls all the resources in a given bucket. Updates their state and notifies
  390. cluster manager as appropriate.
  391. Arguments:
  392. Bucket - Supplies the bucket containing the list of resources to be polled.
  393. Return Value:
  394. None.
  395. --*/
  396. {
  397. PLIST_ENTRY CurrentEntry;
  398. PRESOURCE Resource;
  399. BOOL Success = TRUE;
  400. PRM_DUE_TIME_ENTRY pDueTimeEntry;
  401. CurrentEntry = Bucket->ResourceList.Flink;
  402. while (CurrentEntry != &Bucket->ResourceList) {
  403. Resource = CONTAINING_RECORD(CurrentEntry,RESOURCE,ListEntry);
  404. //
  405. // The EventList Lock protects concurrent calls to individual
  406. // resources. The EventList Lock was taken out in RmpPollList.
  407. // If we increase the granularity of locking, and lock the resource
  408. // then we'd add a lock here.
  409. //
  410. if (Resource->State == ClusterResourceOnline) {
  411. //
  412. // A resource that is online alternates between LooksAlive
  413. // and IsAlive polling by doing an IsAlive poll instead of
  414. // a LooksAlive poll every IsAliveCount iterations.
  415. //
  416. Resource->IsAliveCount += 1;
  417. CL_ASSERT( Resource->IsAliveRollover != 0 );
  418. if (Resource->IsAliveCount == Resource->IsAliveRollover) {
  419. //
  420. // Poll the IsAlive entrypoint.
  421. //
  422. RmpSetMonitorState(RmonIsAlivePoll, Resource);
  423. pDueTimeEntry = RmpInsertDeadlockMonitorList ( Resource->DllName,
  424. Resource->ResourceType,
  425. Resource->ResourceName,
  426. L"Is alive" );
  427. #ifdef COMRES
  428. Success = RESMON_ISALIVE (Resource) ;
  429. #else
  430. Success = (Resource->IsAlive)(Resource->Id);
  431. #endif
  432. RmpRemoveDeadlockMonitorList ( pDueTimeEntry );
  433. RmpSetMonitorState(RmonIdle, NULL);
  434. //
  435. // If this was successful, then we will perform the LooksAlive
  436. // test next time. Otherwise, we do the IsAlive check again.
  437. //
  438. if (Success) {
  439. Resource->IsAliveCount = 0;
  440. } else {
  441. --Resource->IsAliveCount;
  442. }
  443. } else {
  444. //
  445. // Poll the LooksAlive entrypoint.
  446. //
  447. if ( Resource->EventHandle == NULL ) {
  448. RmpSetMonitorState(RmonLooksAlivePoll,Resource);
  449. pDueTimeEntry = RmpInsertDeadlockMonitorList ( Resource->DllName,
  450. Resource->ResourceType,
  451. Resource->ResourceName,
  452. L"Looks alive" );
  453. #ifdef COMRES
  454. Success = RESMON_LOOKSALIVE (Resource) ;
  455. #else
  456. Success = (Resource->LooksAlive)(Resource->Id);
  457. #endif
  458. RmpSetMonitorState(RmonIdle, NULL);
  459. RmpRemoveDeadlockMonitorList ( pDueTimeEntry );
  460. }
  461. if ( !Success ) {
  462. RmpSetMonitorState(RmonIsAlivePoll, Resource);
  463. pDueTimeEntry = RmpInsertDeadlockMonitorList ( Resource->DllName,
  464. Resource->ResourceType,
  465. Resource->ResourceName,
  466. L"Is alive (looksalive fail)" );
  467. #ifdef COMRES
  468. Success = RESMON_ISALIVE (Resource) ;
  469. #else
  470. Success = (Resource->IsAlive)(Resource->Id);
  471. #endif
  472. RmpSetMonitorState(RmonIdle, NULL);
  473. RmpRemoveDeadlockMonitorList ( pDueTimeEntry );
  474. }
  475. }
  476. if (!Success) {
  477. //
  478. // The resource has failed. Mark it as Failed and notify
  479. // the cluster manager.
  480. //
  481. Resource->State = ClusterResourceFailed;
  482. RmpPostNotify(Resource, NotifyResourceStateChange);
  483. }
  484. }
  485. CurrentEntry = CurrentEntry->Flink;
  486. }
  487. } // RmpPollBucket
  488. VOID
  489. RmpSignalPoller(
  490. IN PPOLL_EVENT_LIST EventList
  491. )
  492. /*++
  493. Routine Description:
  494. Interface to notify the poller thread that the resource list has
  495. been changed or a new event has been added to the poll event list.
  496. The poller thread should get a new event list and recompute its timeouts.
  497. Arguments:
  498. EventList - the event list that is to be notified.
  499. Return Value:
  500. None.
  501. --*/
  502. {
  503. BOOL Success;
  504. if (EventList->ListNotify != NULL) {
  505. Success = SetEvent(EventList->ListNotify);
  506. CL_ASSERT(Success);
  507. }
  508. } // RmpSignalPoller