Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

835 lines
27 KiB

  1. /*++
  2. Copyright (c) 1996-1997 Microsoft Corporation
  3. Module Name:
  4. resfail.c
  5. Abstract:
  6. Cluster resource state management routines.
  7. Author:
  8. Mike Massa (mikemas) 14-Jan-1996
  9. Revision History:
  10. --*/
  11. #include "fmp.h"
  12. #define LOG_MODULE RESFAIL
  13. // globals
  14. //
  15. // Local Functions
  16. //
  17. DWORD
  18. FmpHandleResStateChangeProc(
  19. IN LPVOID pContext
  20. );
  21. VOID
  22. FmpHandleResourceFailure(
  23. IN PFM_RESOURCE pResource
  24. )
  25. /*++
  26. Routine Description:
  27. Handles resource failure notifications from the resource monitor.
  28. Arguments:
  29. Resource - The resource which has failed.
  30. Return Value:
  31. None.
  32. Note:
  33. This routine is only called if the resource was online at the time of
  34. the failure.
  35. --*/
  36. {
  37. DWORD dwStatus;
  38. BOOL bRestartGroup = TRUE;
  39. DWORD tickCount;
  40. DWORD withinFailurePeriod;
  41. CsLogEvent2(LOG_CRITICAL,
  42. FM_RESOURCE_FAILURE,
  43. OmObjectName(pResource),
  44. OmObjectName(pResource->Group));
  45. ClRtlLogPrint(LOG_NOISE,
  46. "[FM] FmpHandleResourceFailure: taking resource %1!ws! and dependents offline\n",
  47. OmObjectId(pResource));
  48. if ( pResource->State == ClusterResourceOnline )
  49. {
  50. ClRtlLogPrint(LOG_NOISE,
  51. "[FM] Resource %1!ws! failed, but still online!\n",
  52. OmObjectId(pResource));
  53. }
  54. // SS: We handle the failure of the quorum resource specially
  55. // since other resources rely on it and may be blocked waiting
  56. // for the quorum resource to come online.
  57. ++ pResource->NumberOfFailures;
  58. switch ( pResource->RestartAction )
  59. {
  60. case RestartNot:
  61. FmpTerminateResource( pResource );
  62. //
  63. // No action if FM is shutting down
  64. //
  65. if ( FmpShutdown ) return;
  66. // Don't do anything.
  67. // However, if this is a quorum resource cause it to halt
  68. if (pResource->QuorumResource)
  69. {
  70. //cleanup quorum resource and cause the node to halt
  71. if (pResource->RestartAction == RestartNot)
  72. {
  73. FmpCleanupQuorumResource(pResource);
  74. CsInconsistencyHalt(ERROR_QUORUM_RESOURCE_ONLINE_FAILED);
  75. }
  76. }
  77. break;
  78. case RestartLocal:
  79. // fall through is correct for this case
  80. bRestartGroup = FALSE;
  81. case RestartGroup:
  82. //
  83. // If the number of failures is too high, then don't restart locally.
  84. // If this was a local restart then don't notify FM so that Group
  85. // doesn't move because of this guy; otherwise notify the FM that the
  86. // group has failed.
  87. //
  88. //
  89. // Get our current time, in milliseconds.
  90. //
  91. tickCount = GetTickCount();
  92. //
  93. // Compute a boolean that tells if we are withing the allotted
  94. // failure period.
  95. //
  96. withinFailurePeriod = ( ((tickCount - pResource->FailureTime) <=
  97. pResource->RestartPeriod) ? TRUE : FALSE);
  98. //
  99. // If it's been a long time since our last failure, then
  100. // get the current time of this failure, and reset the count
  101. // of failures.
  102. //
  103. if ( !withinFailurePeriod ) {
  104. pResource->FailureTime = tickCount;
  105. pResource->NumberOfFailures = 1;
  106. }
  107. if ( pResource->NumberOfFailures <= pResource->RestartThreshold )
  108. {
  109. FmpTerminateResource( pResource );
  110. //
  111. // No restart if FM is shutting down or if the group is marked for a failover.
  112. //
  113. if ( ( FmpShutdown ) ||
  114. ( pResource->Group->dwStructState & FM_GROUP_STRUCT_MARKED_FOR_MOVE_ON_FAIL ) )
  115. {
  116. ClRtlLogPrint(LOG_UNUSUAL,
  117. "[FM] FmpHandleResourceFailure: No restart tree on resource %1!ws!...\n",
  118. OmObjectId(pResource));
  119. return;
  120. }
  121. FmpRestartResourceTree( pResource );
  122. pResource->Group->dwStructState |= FM_GROUP_STRUCT_MARKED_FOR_COMPLETION_EVENT;
  123. FmpCheckForGroupCompletionEvent(pResource->Group);
  124. }
  125. else if ( bRestartGroup )
  126. {
  127. //
  128. // No restart if FM is shutting down or if the group is marked for a failover.
  129. //
  130. if ( ( FmpShutdown ) ||
  131. ( pResource->Group->dwStructState & FM_GROUP_STRUCT_MARKED_FOR_MOVE_ON_FAIL ) )
  132. {
  133. ClRtlLogPrint(LOG_UNUSUAL,
  134. "[FM] FmpHandleResourceFailure: No group failure handling for resource %1!ws!...\n",
  135. OmObjectId(pResource));
  136. FmpTerminateResource( pResource );
  137. return;
  138. }
  139. //
  140. // Handle the group failure and notify the resources if we decide to failover the
  141. // group.
  142. //
  143. FmpHandleGroupFailure( pResource->Group, pResource );
  144. ClusterEvent( CLUSTER_EVENT_GROUP_FAILED, pResource->Group );
  145. }
  146. else
  147. {
  148. FmpTerminateResource( pResource );
  149. //
  150. // No restart if FM is shutting down or if the group is marked for a failover.
  151. //
  152. if ( ( FmpShutdown ) ||
  153. ( pResource->Group->dwStructState & FM_GROUP_STRUCT_MARKED_FOR_MOVE_ON_FAIL ) )
  154. {
  155. ClRtlLogPrint(LOG_UNUSUAL,
  156. "[FM] FmpHandleResourceFailure: No delayed restart on resource %1!ws!...\n",
  157. OmObjectId(pResource));
  158. return;
  159. }
  160. ClRtlLogPrint(LOG_NOISE,
  161. "[FM] RestartLocal: resource %1!ws! has exceeded its restart limit!\n",
  162. OmObjectId(pResource));
  163. if (pResource->QuorumResource)
  164. {
  165. FmpCleanupQuorumResource(pResource);
  166. CsInconsistencyHalt(ERROR_QUORUM_RESOURCE_ONLINE_FAILED);
  167. }
  168. // Start a timer for which will attempt to restart the resource later
  169. FmpDelayedStartRes(pResource);
  170. }
  171. break;
  172. default:
  173. ClRtlLogPrint(LOG_NOISE,"[FM] FmpHandleResourceFailure: unknown restart action! Value = %1!u!\n",
  174. pResource->RestartAction);
  175. }
  176. return;
  177. } // FmpHandleResourceFailure
  178. VOID
  179. FmpHandleResourceTransition(
  180. IN PFM_RESOURCE Resource,
  181. IN CLUSTER_RESOURCE_STATE NewState
  182. )
  183. /*++
  184. Routine Description:
  185. Takes appropriate action based on resource state transitions indicated
  186. by the Resource Monitor.
  187. Arguments:
  188. Resource - The resource which has transitioned.
  189. NewState - The new state of Resource.
  190. Return Value:
  191. None.
  192. --*/
  193. {
  194. DWORD status;
  195. DWORD dwOldBlockingFlag;
  196. ChkFMState:
  197. ACQUIRE_SHARED_LOCK(gQuoChangeLock);
  198. if (!FmpFMGroupsInited)
  199. {
  200. DWORD dwRetryCount = 50;
  201. //FmFormNewClusterPhaseProcessing is in progress
  202. if (FmpFMFormPhaseProcessing)
  203. {
  204. ClRtlLogPrint(LOG_CRITICAL,
  205. "[FM] FmpHandleResourceTransition: resource notification from quorum resource "
  206. "during phase processing. Sleep and retry\n");
  207. RELEASE_LOCK(gQuoChangeLock);
  208. Sleep(500);
  209. if (dwRetryCount--)
  210. goto ChkFMState;
  211. else
  212. {
  213. ClRtlLogPrint(LOG_CRITICAL,
  214. "[FM] FmpHandleResourceTransition: waited for too long\n");
  215. //terminate the process
  216. CL_ASSERT(FALSE);
  217. CsInconsistencyHalt(ERROR_CLUSTER_NODE_DOWN);
  218. }
  219. }
  220. //this can only come from the quorum resource
  221. CL_ASSERT(Resource->QuorumResource);
  222. }
  223. // if this is from the quorum resource, we need to do some special handling
  224. // protect the check for quorum resource by acquiring the shared lock
  225. if (Resource->QuorumResource)
  226. {
  227. //
  228. // Chittur Subbaraman (chitturs) - 6/25/99
  229. //
  230. // Handle the sync notifications for the quorum resource. This is
  231. // done here instead of in FmpRmDoInterlockedDecrement since we
  232. // need to hold the gQuoChangeLock for this to synchronize with
  233. // other threads such as the FmCheckQuorumState called by the DM
  234. // node down handler. Note that FmpRmDoInterLockedDecrement needs
  235. // to be done with NO LOCKS held since it easily runs into deadlock
  236. // situations in which the quorum resource offline is waiting to
  237. // have the blocking resources count go to 0 and FmpRmDoInterLockedDecrement
  238. // which alone can make this count to 0 could be stuck waiting for
  239. // the lock.
  240. //
  241. DWORD dwBlockingFlag = InterlockedExchange( &Resource->BlockingQuorum, 0 );
  242. CL_ASSERT( dwBlockingFlag == FALSE );
  243. FmpCallResourceNotifyCb( Resource, NewState );
  244. ACQUIRE_EXCLUSIVE_LOCK(gQuoLock);
  245. }
  246. else
  247. {
  248. FmpAcquireLocalResourceLock(Resource);
  249. }
  250. ClRtlLogPrint(
  251. NewState == ClusterResourceFailed ? LOG_UNUSUAL : LOG_NOISE,
  252. "[FM] FmpHandleResourceTransition: Resource Name = %1!ws! [%2!ws!] old state=%3!u! new state=%4!u!\n",
  253. OmObjectId(Resource),
  254. OmObjectName(Resource),
  255. Resource->State,
  256. NewState
  257. );
  258. if ( Resource->State == NewState )
  259. {
  260. ClRtlLogPrint(LOG_NOISE,
  261. "[FM] FmpHandleResourceTransition: Resource %1!ws! already in state=%2!u!\n",
  262. OmObjectId(Resource),
  263. NewState );
  264. goto FnExit;
  265. }
  266. //
  267. // Chittur Subbaraman (chitturs) - 7/8/2001
  268. //
  269. // A resource which is in waiting state cannot receive a notification from resource monitor. If such
  270. // a notification is received, discard it as a stale notification. If not, you take wrong actions
  271. // on a resource which is in waiting state (such as restart) and mess up FM's notification processing.
  272. //
  273. if ( Resource->Flags & RESOURCE_WAITING )
  274. {
  275. ClRtlLogPrint(LOG_UNUSUAL,
  276. "[FM] FmpHandleResourceTransition: Resource %1!ws! [%2!ws!] is in waiting state, discarding notification as stale\n",
  277. OmObjectId(Resource),
  278. OmObjectName(Resource));
  279. goto FnExit;
  280. }
  281. switch (Resource->State) {
  282. case ClusterResourceOnline:
  283. // if there is a resource failure, then let the worker thread handle it
  284. // if there is a state change call the resource state change handler
  285. if (Resource->State != NewState)
  286. FmpPropagateResourceState( Resource, NewState );
  287. if (NewState == ClusterResourceFailed)
  288. {
  289. if (Resource->QuorumResource)
  290. {
  291. RELEASE_LOCK(gQuoLock);
  292. FmpProcessResourceEvents(Resource, ClusterResourceFailed,
  293. ClusterResourceOnline);
  294. ACQUIRE_EXCLUSIVE_LOCK(gQuoLock);
  295. }
  296. else
  297. {
  298. FmpProcessResourceEvents(Resource, ClusterResourceFailed,
  299. ClusterResourceOnline);
  300. }
  301. }
  302. else
  303. {
  304. CL_ASSERT( (NewState == ClusterResourceOnline) ||
  305. (NewState == ClusterResourceOffline) );
  306. }
  307. break;
  308. case ClusterResourceFailed:
  309. if (Resource->State != NewState)
  310. FmpPropagateResourceState( Resource, NewState );
  311. break;
  312. case ClusterResourceOfflinePending:
  313. //SS: a resource cannot go from one pending state to another
  314. CL_ASSERT( NewState < ClusterResourcePending )
  315. // fall through
  316. case ClusterResourceOffline:
  317. //
  318. // Because this resource is now unstuck... there may be other
  319. // pending threads waiting to clear up. If not, they'll just get
  320. // stuck again, until the next notification.
  321. //
  322. switch ( NewState ) {
  323. case ClusterResourceFailed:
  324. if ( Resource->State != NewState )
  325. FmpPropagateResourceState( Resource, NewState );
  326. // if it is the quorum resource handle the locking appropriately
  327. if (Resource->QuorumResource)
  328. {
  329. //
  330. // Chittur Subbaraman (chitturs) - 9/20/99
  331. //
  332. // Release and reacquire the gQuoLock to maintain
  333. // locking order between group lock and gQuoLock.
  334. //
  335. RELEASE_LOCK(gQuoLock);
  336. FmpProcessResourceEvents(Resource, ClusterResourceFailed,
  337. ClusterResourceOffline);
  338. ACQUIRE_EXCLUSIVE_LOCK(gQuoLock);
  339. }
  340. else
  341. {
  342. FmpProcessResourceEvents(Resource, ClusterResourceFailed,
  343. ClusterResourceOffline);
  344. }
  345. break;
  346. case ClusterResourceOffline:
  347. if ( Resource->Group->OwnerNode == NmLocalNode )
  348. {
  349. if ( Resource->State != NewState )
  350. {
  351. FmpPropagateResourceState( Resource, NewState );
  352. }
  353. // if it is the quorum resource handle the locking appropriately
  354. if (Resource->QuorumResource)
  355. {
  356. //
  357. // Chittur Subbaraman (chitturs) - 9/20/99
  358. //
  359. // Release and reacquire the gQuoLock to maintain
  360. // locking order between group lock and gQuoLock.
  361. //
  362. RELEASE_LOCK(gQuoLock);
  363. FmpProcessResourceEvents(Resource, ClusterResourceOffline,
  364. ClusterResourceOfflinePending);
  365. ACQUIRE_EXCLUSIVE_LOCK(gQuoLock);
  366. }
  367. else
  368. {
  369. FmpProcessResourceEvents(Resource, ClusterResourceOffline,
  370. ClusterResourceOfflinePending);
  371. }
  372. }
  373. else
  374. {
  375. if ( Resource->State != NewState )
  376. {
  377. FmpPropagateResourceState( Resource, NewState );
  378. }
  379. }
  380. break;
  381. default:
  382. if ( Resource->State != NewState ) {
  383. FmpPropagateResourceState( Resource, NewState );
  384. }
  385. break;
  386. }
  387. break;
  388. case ClusterResourceOnlinePending:
  389. //SS: a resource cannot go from one pending state to another
  390. CL_ASSERT( NewState < ClusterResourcePending )
  391. //
  392. // Because this resource is now unstuck... there may be other
  393. // pending threads waiting to clear up. If not, they'll just get
  394. // stuck again, until the next notification.
  395. //
  396. switch ( NewState ) {
  397. case ClusterResourceFailed:
  398. //
  399. // Make sure we go through full failure recovery.
  400. //
  401. //SS: dont know why the state is being set to online
  402. //it could be online pending
  403. //Resource->State = ClusterResourceOnline;
  404. ClRtlLogPrint(LOG_UNUSUAL,
  405. "[FM] FmpHandleResourceTransition: Resource failed, post a work item\n");
  406. if (Resource->State != NewState)
  407. FmpPropagateResourceState( Resource, NewState );
  408. // since this is the quorum Resource handle locking appropriately
  409. if (Resource->QuorumResource)
  410. {
  411. //
  412. // Chittur Subbaraman (chitturs) - 9/20/99
  413. //
  414. // Release and reacquire the gQuoLock to maintain
  415. // locking order between group lock and gQuoLock.
  416. //
  417. RELEASE_LOCK(gQuoLock);
  418. FmpProcessResourceEvents(Resource, ClusterResourceFailed,
  419. ClusterResourceOnlinePending);
  420. ACQUIRE_EXCLUSIVE_LOCK(gQuoLock);
  421. }
  422. else
  423. {
  424. FmpProcessResourceEvents(Resource, ClusterResourceFailed,
  425. ClusterResourceOnlinePending);
  426. }
  427. break;
  428. case ClusterResourceOnline:
  429. if (Resource->Group->OwnerNode == NmLocalNode) {
  430. //Call FmpPropagateResourceState without holding the group
  431. //lock for the quorum resource
  432. FmpPropagateResourceState( Resource, NewState );
  433. // since this is the quorum Resource fork another thread
  434. if (Resource->QuorumResource)
  435. {
  436. //
  437. // Chittur Subbaraman (chitturs) - 9/20/99
  438. //
  439. // Release and reacquire the gQuoLock to maintain
  440. // locking order between group lock and gQuoLock.
  441. //
  442. RELEASE_LOCK(gQuoLock);
  443. FmpProcessResourceEvents(Resource, ClusterResourceOnline,
  444. ClusterResourceOnlinePending);
  445. ACQUIRE_EXCLUSIVE_LOCK(gQuoLock);
  446. }
  447. else
  448. {
  449. FmpProcessResourceEvents(Resource, ClusterResourceOnline,
  450. ClusterResourceOnlinePending);
  451. }
  452. } else {
  453. FmpPropagateResourceState( Resource, NewState );
  454. }
  455. break;
  456. default:
  457. if (Resource->State != NewState)
  458. FmpPropagateResourceState( Resource, NewState );
  459. break;
  460. }
  461. break;
  462. case ClusterResourceInitializing:
  463. default:
  464. if (Resource->State != NewState)
  465. FmpPropagateResourceState( Resource, NewState );
  466. CL_ASSERT(Resource->State == NewState);
  467. }
  468. FnExit:
  469. if (Resource->QuorumResource) {
  470. RELEASE_LOCK(gQuoLock);
  471. } else {
  472. FmpReleaseLocalResourceLock(Resource);
  473. }
  474. RELEASE_LOCK(gQuoChangeLock);
  475. return;
  476. }
  477. /****
  478. @func DWORD | FmpCreateResNotificationHandler| This creates a new
  479. thread to handle state change notifications for the given resource.
  480. @parm IN PFM_RESOURCE | pResource | Pointer to the resource.
  481. @parm IN CLUSTER_RESOURCE_STATE | OldState | The old state of the
  482. resource from which it transitioned.
  483. @parm IN CLUSTER_RESOURCE_STATE | NewState | The new state of the
  484. resource.
  485. @comm This routine creates a thread to perform all the pending work
  486. when the resource changes state that cannot be performed within
  487. FmpHandleResourceTransition to avoid deadlocks and that cannot
  488. be deffered to the FmpWorkerThread because of serialization issues.
  489. In particular, it is used to handle state transition work for the
  490. quorum resource since other resources depend on the quorum resource
  491. and cannot come online till the state of the quorum becomes online.
  492. For instance, the quorum resource may be coming offline as a part
  493. of move while another resource if in FmpWorkerThread() calling
  494. FmpOffline/OnlineWaitingTree(). For the quorum resource to come
  495. online again (that happens by signalling the move pending thread)
  496. so that FmpWorkerThread can make progress its events will have
  497. to be handled separately.
  498. @rdesc Returns a result code. ERROR_SUCCESS on success.
  499. @xref <f FmpHandleResStateChangeProc>
  500. ****/
  501. DWORD FmpCreateResStateChangeHandler(
  502. IN PFM_RESOURCE pResource,
  503. IN CLUSTER_RESOURCE_STATE NewState,
  504. IN CLUSTER_RESOURCE_STATE OldState)
  505. {
  506. HANDLE hThread = NULL;
  507. DWORD dwThreadId;
  508. PRESOURCE_STATE_CHANGE pResStateContext = NULL;
  509. DWORD dwStatus = ERROR_SUCCESS;
  510. //reference the resource
  511. //the thread will dereference it, if the thread is successfully
  512. //created
  513. ClRtlLogPrint(LOG_NOISE,
  514. "[FM] FmpCreateResStateChangeHandler: Entry\r\n");
  515. OmReferenceObject(pResource);
  516. pResStateContext = LocalAlloc(LMEM_FIXED, sizeof(RESOURCE_STATE_CHANGE));
  517. if (!pResStateContext)
  518. {
  519. dwStatus = GetLastError();
  520. CL_UNEXPECTED_ERROR(dwStatus);
  521. goto FnExit;
  522. }
  523. pResStateContext->pResource = pResource;
  524. pResStateContext->OldState = OldState;
  525. pResStateContext->NewState = NewState;
  526. hThread = CreateThread( NULL, 0, FmpHandleResStateChangeProc,
  527. pResStateContext, 0, &dwThreadId );
  528. if ( hThread == NULL )
  529. {
  530. dwStatus = GetLastError();
  531. CL_UNEXPECTED_ERROR(dwStatus);
  532. // if the function failed to create the thread, cleanup the
  533. // state that the thread would have cleaned
  534. //deref the object if the thread is not created successfully
  535. OmDereferenceObject(pResource);
  536. LocalFree(pResStateContext);
  537. goto FnExit;
  538. }
  539. FnExit:
  540. //do general cleanup
  541. if (hThread)
  542. CloseHandle(hThread);
  543. ClRtlLogPrint(LOG_NOISE,
  544. "[FM] FmpCreateResStateChangeHandler: Exit, status %1!u!\r\n",
  545. dwStatus);
  546. return(dwStatus);
  547. }
  548. /****
  549. @func DWORD | FmpHandleResStateChangeProc| This thread procedure
  550. handles all the post processing for the resource transitions
  551. for the quorum resource.
  552. @parm IN LPVOID | pContext | A pointer to PRESOURCE_STATE_CHANGE
  553. structure.
  554. @comm This thread handles a resource change notification postprocessing.
  555. Significantly for quorum resource so that quorum resource
  556. state change notifications are not handled by the single
  557. FmpWorkThread() [that causes deadlock - if the quorum
  558. notification resource is queued behind a notification whose
  559. handling requires tha quorum resource be online]..
  560. @rdesc Returns a result code. ERROR_SUCCESS on success.
  561. @xref <f FmpCreateResStateChangeHandler)
  562. ****/
  563. DWORD
  564. FmpHandleResStateChangeProc(
  565. IN LPVOID pContext
  566. )
  567. {
  568. PRESOURCE_STATE_CHANGE pResStateChange = pContext;
  569. CL_ASSERT( pResStateChange );
  570. ClRtlLogPrint(LOG_NOISE,
  571. "[FM] FmpHandleResStateChangeProc: Entry...\r\n");
  572. FmpHandleResourceTransition( pResStateChange->pResource,
  573. pResStateChange->NewState );
  574. OmDereferenceObject( pResStateChange->pResource );
  575. LocalFree( pResStateChange );
  576. ClRtlLogPrint(LOG_NOISE,
  577. "[FM] FmpHandleResStateChangeProc: Exit...\r\n");
  578. return( ERROR_SUCCESS );
  579. }
  580. DWORD
  581. FmpDelayedStartRes(
  582. IN PFM_RESOURCE pResource
  583. )
  584. /*++
  585. Routine Description:
  586. Starts a timer for the resource. FmpDelayedRestartCb function will be
  587. invoked at the expiry of timer..
  588. Arguments:
  589. pResource - The resource which has transitioned.
  590. Return Value:
  591. ERROR_SUCCESS if successful, WIN32 errorcode otherwise.
  592. Note that no delayed restart attempts are made if the resource is a quorum resource.
  593. --*/
  594. {
  595. DWORD dwStatus = ERROR_SUCCESS;
  596. ClRtlLogPrint(LOG_NOISE,
  597. "[FM] FmpDelayedRestartRes:Entry for resource %1!ws!\n",
  598. OmObjectId(pResource));
  599. if( (pResource->RetryPeriodOnFailure != CLUSTER_RESOURCE_DEFAULT_RETRY_PERIOD_ON_FAILURE ) &&
  600. !(pResource->QuorumResource) )
  601. {
  602. // Check if there is already a timer running for this resource
  603. if(pResource->hTimer == NULL)
  604. {
  605. pResource->hTimer = CreateWaitableTimer(NULL, FALSE, NULL);
  606. if (!(pResource->hTimer))
  607. {
  608. // not a fatal error but log it
  609. ClRtlLogPrint(LOG_UNUSUAL,
  610. "[FM] FmpDelayedRestartRes: failed to create the watchdog timer for resource %1!ws!\n",
  611. OmObjectId(pResource));
  612. }
  613. else{
  614. ClRtlLogPrint(LOG_NOISE,
  615. "[FM] FmpDelayedRestartRes: Adding watchdog timer for resource %1!ws!, period=%2!u!\n",
  616. OmObjectId(pResource),
  617. pResource->RetryPeriodOnFailure);
  618. // make sure resource struct won't go away if resource is deleted before the timer fires
  619. OmReferenceObject(pResource);
  620. //register the timer with the periodic activity timer thread
  621. dwStatus = AddTimerActivity(pResource->hTimer, pResource->RetryPeriodOnFailure, 0, FmpDelayedRestartCb, pResource);
  622. if (dwStatus != ERROR_SUCCESS)
  623. {
  624. ClRtlLogPrint(LOG_CRITICAL,
  625. "[FM] FmpDelayedRestartRes: AddTimerActivity failed with error %1!u!\n",
  626. dwStatus);
  627. CloseHandle(pResource->hTimer);
  628. pResource->hTimer = NULL;
  629. }
  630. }
  631. }
  632. }
  633. return dwStatus;
  634. }
  635. VOID
  636. FmpDelayedRestartCb(
  637. IN HANDLE hTimer,
  638. IN PVOID pContext)
  639. /*++
  640. Routine Description
  641. This is invoked by timer activity thread to attempt a restart on
  642. a failed resource.
  643. Arguments
  644. pContext - a pointer to PFM_RESOURCE
  645. Return Value
  646. ERROR_SUCCESS on success, a WIN32 error code otherwise.
  647. --*/
  648. {
  649. PFM_RESOURCE pResource;
  650. pResource=(PFM_RESOURCE)pContext;
  651. ClRtlLogPrint(LOG_NOISE,
  652. "[FM] FmpDelayedRestartCb: Entry for resource %1!ws! \n",
  653. OmObjectId(pResource));
  654. OmReferenceObject(pResource);
  655. FmpPostWorkItem(FM_EVENT_RES_RETRY_TIMER,
  656. pResource,
  657. 0);
  658. OmDereferenceObject(pResource);
  659. return;
  660. }