Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

568 lines
15 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. rmnotify.c
  5. Abstract:
  6. Interfaces with the resource monitor to detect notification
  7. of resource state changes.
  8. Author:
  9. John Vert (jvert) 12-Jan-1996
  10. Revision History:
  11. --*/
  12. #include "fmp.h"
  13. #define LOG_MODULE RMNOTIFY
  14. //
  15. // Local Data
  16. //
  17. CL_QUEUE NotifyQueue;
  18. typedef struct {
  19. LIST_ENTRY Linkage;
  20. RM_EVENT_TYPE EventType;
  21. union {
  22. struct {
  23. RM_NOTIFY_KEY NotifyKey;
  24. CLUSTER_RESOURCE_STATE NewState;
  25. } ResourceTransition;
  26. struct {
  27. RM_NOTIFY_KEY NotifyKey;
  28. } ResourceResuscitate;
  29. } Parameters;
  30. } RM_EVENT, *PRM_EVENT;
  31. HANDLE RmNotifyThread;
  32. //
  33. // Local Functions
  34. //
  35. DWORD
  36. FmpRmWorkerThread(
  37. IN LPVOID lpThreadParameter
  38. );
  39. VOID
  40. FmpRmWorkItemHandler(
  41. IN PCLRTL_WORK_ITEM WorkItem,
  42. IN DWORD Ignored1,
  43. IN DWORD Ignored2,
  44. IN ULONG_PTR Ignored3
  45. );
  46. DWORD
  47. FmpRmDoHandleCriticalResourceStateChange(
  48. IN PRM_EVENT pEvent
  49. );
  50. DWORD
  51. FmpInitializeNotify(
  52. VOID
  53. )
  54. /*++
  55. Routine Description:
  56. Initialization routine for notification engine
  57. Arguments:
  58. None.
  59. Return Value:
  60. ERROR_SUCCESS if successful
  61. Win32 error code otherwise.
  62. --*/
  63. {
  64. DWORD ThreadId;
  65. DWORD Status;
  66. Status = ClRtlInitializeQueue(&NotifyQueue);
  67. if (Status != ERROR_SUCCESS) {
  68. CL_LOGFAILURE(Status);
  69. return(Status);
  70. }
  71. RmNotifyThread = CreateThread(NULL,
  72. 0,
  73. FmpRmWorkerThread,
  74. NULL,
  75. 0,
  76. &ThreadId);
  77. if (RmNotifyThread == NULL) {
  78. CsInconsistencyHalt(GetLastError());
  79. }
  80. return(ERROR_SUCCESS);
  81. }
  82. DWORD
  83. FmpRmWorkerThread(
  84. IN LPVOID lpThreadParameter
  85. )
  86. /*++
  87. Routine Description:
  88. This thread processes deferred Resource Monitor events.
  89. Arguments:
  90. lpThreadParameter - not used.
  91. Return Value:
  92. None.
  93. --*/
  94. {
  95. DWORD status = ERROR_SUCCESS;
  96. PRM_EVENT event;
  97. PLIST_ENTRY entry;
  98. while (TRUE)
  99. {
  100. entry = ClRtlRemoveHeadQueue(&NotifyQueue);
  101. if ( entry == NULL ) {
  102. break;
  103. }
  104. event = CONTAINING_RECORD(entry,
  105. RM_EVENT,
  106. Linkage);
  107. if (event->EventType == RmWorkerTerminate)
  108. {
  109. LocalFree(event);
  110. break;
  111. }
  112. status = FmpRmDoHandleCriticalResourceStateChange(event);
  113. if (status != ERROR_SUCCESS)
  114. {
  115. break;
  116. }
  117. }
  118. return(status);
  119. }
  120. #if 0
  121. VOID
  122. FmpRmWorkItemHandler(
  123. IN PCLRTL_WORK_ITEM WorkItem,
  124. IN DWORD Ignored1,
  125. IN DWORD Ignored2,
  126. IN ULONG_PTR Ignored3
  127. )
  128. {
  129. PFM_RESOURCE resource;
  130. ULONG_PTR notifyKey;
  131. PRM_EVENT event;
  132. DWORD status;
  133. BOOL bQuoChangeLockHeld = FALSE;
  134. event = (PRM_EVENT)WorkItem->Context;
  135. // It is assumed that NotifyKey is always the first field of the struct
  136. // within the union in RM_EVENT
  137. notifyKey = event->Parameters.ResourceResuscitate.NotifyKey;
  138. resource = FmpFindResourceByNotifyKey(
  139. notifyKey
  140. );
  141. if (resource == NULL) {
  142. ClRtlLogPrint(LOG_NOISE,
  143. "[FM] FmpRmWorkItemHandler, bad resource NotifyKey %1!u!\n",
  144. notifyKey
  145. );
  146. goto FnExit;
  147. }
  148. ClRtlLogPrint(LOG_NOISE,
  149. "[FM] FmpRmWorkItemHandler, Resource=<%1!ws!>, Event=%2!u!\n",
  150. OmObjectId(resource),
  151. event->EventType);
  152. ChkFMState:
  153. if (!FmpFMGroupsInited)
  154. {
  155. DWORD dwRetryCount = 50;
  156. ACQUIRE_SHARED_LOCK(gQuoChangeLock);
  157. //FmFormNewClusterPhaseProcessing is in progress
  158. if (FmpFMFormPhaseProcessing)
  159. {
  160. ClRtlLogPrint(LOG_CRITICAL,
  161. "[FM] FmpRmWorkItemHandler, resource notification from quorum resource "
  162. "during phase processing. Sleep and retry\n");
  163. RELEASE_LOCK(gQuoChangeLock);
  164. Sleep(500);
  165. if (dwRetryCount--)
  166. goto ChkFMState;
  167. else
  168. {
  169. ClRtlLogPrint(LOG_CRITICAL,
  170. "[FM] FmpRmWorkItemHandler, waited for too long\n");
  171. //terminate the process
  172. CL_ASSERT(FALSE);
  173. }
  174. }
  175. else
  176. {
  177. bQuoChangeLockHeld = TRUE;
  178. }
  179. //this can only come from the quorum resource
  180. CL_ASSERT(resource->QuorumResource);
  181. }
  182. switch(event->EventType) {
  183. case ResourceTransition:
  184. {
  185. CLUSTER_RESOURCE_STATE newState =
  186. event->Parameters.ResourceTransition.NewState;
  187. FmpHandleResourceTransition(
  188. resource,
  189. newState
  190. );
  191. break;
  192. }
  193. #if 0
  194. //
  195. // Chittur Subbaraman (chitturs) - 4/19/99
  196. //
  197. // Commenting out - case ResourceResuscitate is not called from anywhere.
  198. //
  199. case ResourceResuscitate:
  200. ClRtlLogPrint(LOG_NOISE,
  201. "[FM] FmpRmWorkItemHandler, processing ResourceResuscitate event\n");
  202. FmpAcquireLocalResourceLock( resource );
  203. FmpRestartResourceTree( resource );
  204. FmpReleaseLocalResourceLock( resource );
  205. break;
  206. //
  207. // Chittur Subbaraman (chitturs) - 4/19/99
  208. //
  209. // Commenting out - case RmUpdateResource is now handled by FmpWorkerThread.
  210. //
  211. case RmUpdateResource:
  212. //
  213. // Now tell the resource monitor about the changes.
  214. //
  215. FmpAcquireLocalResourceLock( resource );
  216. status = FmpRmChangeResourceParams( resource );
  217. FmpReleaseLocalResourceLock( resource );
  218. if ( status != ERROR_SUCCESS ) {
  219. ClRtlLogPrint(LOG_UNUSUAL,
  220. "[FM] FmpRmWorkerThread, failed to change resource "
  221. "parameters for %1!ws!, error %2!u!.\n",
  222. OmObjectId(resource),
  223. status );
  224. }
  225. break;
  226. //
  227. // Chittur Subbaraman (chitturs) - 4/19/99
  228. //
  229. // Commenting out - Since the producer of this notification is commented
  230. // out in fmreg.c.
  231. //
  232. case RmRestartResource:
  233. FmpAcquireLocalResourceLock( resource );
  234. status = FmpRmCloseResource( resource );
  235. if ( status == ERROR_SUCCESS ) {
  236. if ( resource->Flags & RESOURCE_SEPARATE_MONITOR ) {
  237. resource->Flags &= ~RESOURCE_SEPARATE_MONITOR;
  238. } else {
  239. resource->Flags |= RESOURCE_SEPARATE_MONITOR;
  240. }
  241. status = FmpRmCreateResource( resource );
  242. if ( status != ERROR_SUCCESS ) {
  243. ClRtlLogPrint(LOG_UNUSUAL,
  244. "[FM] FmpRmWorkItemhandler: Separate resource monitor "
  245. "changed for '%1!ws!', but failed to re-open the resource, "
  246. "error %2!u!.\n",
  247. OmObjectId(resource),
  248. status );
  249. }
  250. } else {
  251. ClRtlLogPrint(LOG_UNUSUAL,
  252. "[FM] FmpRmWorkItemHandler :Separate resource monitor "
  253. "changed for '%1!ws!', but failed to close the resource, "
  254. "error %2!u!.\n",
  255. OmObjectId(resource),
  256. status );
  257. }
  258. FmpReleaseLocalResourceLock( resource );
  259. break;
  260. #endif
  261. default:
  262. ClRtlLogPrint(LOG_NOISE,
  263. "[FM] FmpRmWorkerThread, Unknown event type %1!u!\n",
  264. event->EventType
  265. );
  266. break;
  267. }
  268. FnExit:
  269. if (bQuoChangeLockHeld) {
  270. RELEASE_LOCK(gQuoChangeLock);
  271. }
  272. LocalFree(event);
  273. LocalFree(WorkItem);
  274. ClRtlLogPrint(LOG_NOISE,"[FM] FmpRmWorkItemHandler: Exit\n");
  275. return;
  276. }
  277. #endif
  278. BOOL
  279. FmpPostNotification(
  280. IN RM_NOTIFY_KEY NotifyKey,
  281. IN DWORD NotifyEvent,
  282. IN CLUSTER_RESOURCE_STATE CurrentState
  283. )
  284. /*++
  285. Routine Description:
  286. Callback routine used by resource monitor for resource state
  287. change notification. This routine queues the notification to
  288. a worker thread for deferred processing.
  289. Arguments:
  290. NotifyKey - Supplies the notification key for the resource
  291. that changed
  292. NotifyEvent - The event type.
  293. CurrentState - Supplies the (new) current state of the resource
  294. Return Value:
  295. TRUE - continue receiving notifications
  296. FALSE - abort notifications
  297. --*/
  298. {
  299. PRM_EVENT event;
  300. event = LocalAlloc(LMEM_FIXED, sizeof(RM_EVENT));
  301. if (event != NULL) {
  302. ClRtlLogPrint(LOG_NOISE,
  303. "[FM] NotifyCallBackRoutine: enqueuing event\n");
  304. event->EventType = NotifyEvent;
  305. event->Parameters.ResourceTransition.NotifyKey = NotifyKey;
  306. event->Parameters.ResourceTransition.NewState = CurrentState;
  307. //
  308. // Enqueue the event for the worker thread.
  309. //
  310. ClRtlInsertTailQueue(&NotifyQueue, &event->Linkage);
  311. }
  312. return(TRUE);
  313. }
  314. DWORD
  315. FmpRmDoHandleCriticalResourceStateChange(
  316. IN PRM_EVENT pEvent
  317. )
  318. /*++
  319. Routine Description:
  320. Does an interlocked decrement of the gdwQuoBlockingResources variable.
  321. Handle the transition of the quorum resource state via a separate
  322. thread.
  323. Arguments:
  324. pEvent - The Resource Monitor Event
  325. Return Value:
  326. ERROR_SUCCESS on success, a Win32 error code otherwise.
  327. Comments:
  328. DO NOT hold any locks (such as group lock, gQuoChangeLock, etc.)
  329. in this function. You could deadlock the system quite easily.
  330. --*/
  331. {
  332. RM_NOTIFY_KEY NotifyKey;
  333. DWORD dwOldBlockingFlag;
  334. PFM_RESOURCE pResource;
  335. DWORD status = ERROR_SUCCESS;
  336. PCLRTL_WORK_ITEM pWorkItem;
  337. CLUSTER_RESOURCE_STATE
  338. NewState = pEvent->Parameters.ResourceTransition.NewState;
  339. //
  340. // Chittur Subbaraman (chitturs) - 4/19/99
  341. //
  342. // This function decrements the blocking resources count when the
  343. // resource state has stabilized. It is important to do this
  344. // decrement in a non-blocking mode so that the quorum resource
  345. // does not get caught forever waiting for this count to go down to
  346. // zero in the offline call, FmpRmOfflineResource. This code was
  347. // originally located in FmpHandleResourceTransition and was moved
  348. // here since you could run out of FmpRmWorkItemHandler threads
  349. // (which service the CsDelayedWorkQueue) since all of them could
  350. // get blocked on the local resource lock in
  351. // FmpHandleResourceTransition and consequently any new notifications
  352. // from resmon which could potentially decrement this count will
  353. // not get serviced.
  354. //
  355. NotifyKey = pEvent->Parameters.ResourceResuscitate.NotifyKey;
  356. pResource = FmpFindResourceByNotifyKey(
  357. NotifyKey
  358. );
  359. if ( pResource == NULL ) {
  360. ClRtlLogPrint(LOG_UNUSUAL,
  361. "[FM] FmpRmDoHandleCriticalResourceStateChange, bad resource NotifyKey %1!u!\n",
  362. NotifyKey
  363. );
  364. goto FnExit;
  365. }
  366. if ( pEvent->EventType != ResourceTransition )
  367. {
  368. goto FnExit;
  369. }
  370. if ( pResource->QuorumResource )
  371. {
  372. //
  373. // Chittur Subbaraman (chitturs) - 6/25/99
  374. //
  375. // If this resource is the quorum resource, then let
  376. // FmpHandleResourceTransition take care of the sync notifications.
  377. // Note that this function only does the notifications for the
  378. // non-quorum resources as well as does the decrement on the
  379. // blocking resources count. The decrement MUST be done
  380. // without holding any locks to avoid potential deadlocks with
  381. // the quorum resource offline getting stuck in FmpRmOfflineResource
  382. // waiting for the blocking resources count to go to 0.
  383. // As far as the quorum resource goes, the sync notifications
  384. // must be done with gQuoChangeLock held since we want to
  385. // synchronize with other threads such as the FmCheckQuorumState
  386. // called by the DM node down handler. FmpHandleResourceTransition
  387. // does hold the gQuoChangeLock.
  388. //
  389. // Note also that for the quorum resource a separate thread
  390. // handles the resource transition since if we depend on the
  391. // worker threads servicing the CsDelayedWorkQueue to do this,
  392. // this notification could be starved from being processed since
  393. // some thread could hold the group lock and be stuck in the
  394. // resource onlining waiting for the quorum resource to go
  395. // online and all the worker threads servicing the CsDelayedWorkQueue
  396. // could be blocked on the group lock preventing the propagation
  397. // of the quorum resource state.
  398. //
  399. FmpCreateResStateChangeHandler( pResource, NewState, pResource->State );
  400. LocalFree( pEvent );
  401. goto FnExit;
  402. }
  403. pWorkItem = LocalAlloc( LMEM_FIXED, sizeof( CLRTL_WORK_ITEM ) );
  404. if ( pWorkItem == NULL )
  405. {
  406. status = ERROR_NOT_ENOUGH_MEMORY;
  407. CL_UNEXPECTED_ERROR( status );
  408. goto FnExit;
  409. }
  410. //
  411. // Comments from sunitas: Call the synchronous notifications.
  412. // This is done before the count is decremented as the synchronous
  413. // callbacks like the registry replication must get a chance to
  414. // finish before the quorum resource state is allowed to change.
  415. //
  416. // Note, there is no synchronization here with the resmon's
  417. // online/offline code. They are using the LocalResourceLocks.
  418. //
  419. FmpCallResourceNotifyCb( pResource, NewState );
  420. dwOldBlockingFlag = InterlockedExchange( &pResource->BlockingQuorum, 0 );
  421. if ( dwOldBlockingFlag ) {
  422. ClRtlLogPrint(LOG_NOISE,
  423. "[FM] FmpRmDoHandleCriticalResourceStateChange: call InterlockedDecrement on gdwQuoBlockingResources, Resource %1!ws!\n",
  424. OmObjectId(pResource));
  425. InterlockedDecrement( &gdwQuoBlockingResources );
  426. }
  427. //post a work item to the fm worker thread to handle the rest
  428. OmReferenceObject(pResource);
  429. FmpPostWorkItem(FM_EVENT_RES_RESOURCE_TRANSITION,
  430. pResource,
  431. NewState);
  432. #if 0
  433. ClRtlInitializeWorkItem( pWorkItem, FmpRmWorkItemHandler, (PVOID) pEvent );
  434. status = ClRtlPostItemWorkQueue( CsDelayedWorkQueue, pWorkItem, 0, 0 );
  435. if ( status )
  436. {
  437. LocalFree( pWorkItem );
  438. CL_UNEXPECTED_ERROR( status );
  439. }
  440. #endif
  441. FnExit:
  442. return( status );
  443. }