Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

949 lines
27 KiB

  1. /*++
  2. Copyright (c) 2001 Microsoft Corporation
  3. Module Name:
  4. Monitor.c
  5. Abstract:
  6. Routines for interfacing with the Resource Monitor process
  7. Author:
  8. John Vert (jvert) 3-Jan-1996
  9. Revision History:
  10. --*/
  11. #include "fmp.h"
  12. //
  13. // Global data
  14. //
  15. CRITICAL_SECTION FmpMonitorLock;
  16. //
  17. // Local function prototypes
  18. //
  19. DWORD
  20. FmpInitializeResourceMonitorNotify(
  21. VOID
  22. );
  23. DWORD
  24. FmpRmNotifyThread(
  25. IN LPVOID lpThreadParameter
  26. );
  27. PRESMON
  28. FmpCreateMonitor(
  29. LPWSTR DebugPrefix,
  30. BOOL SeparateMonitor
  31. )
  32. /*++
  33. Routine Description:
  34. Creates a new monitor process and initiates the RPC communication
  35. with it.
  36. Arguments:
  37. None.
  38. Return Value:
  39. Pointer to the resource monitor structure if successful.
  40. NULL otherwise.
  41. --*/
  42. {
  43. #define FM_MAX_RESMON_COMMAND_LINE_SIZE 128
  44. SECURITY_ATTRIBUTES Security;
  45. HANDLE WaitArray[2];
  46. HANDLE ThreadHandle;
  47. HANDLE Event = NULL;
  48. HANDLE FileMapping = NULL;
  49. STARTUPINFO StartupInfo;
  50. PROCESS_INFORMATION ProcessInfo;
  51. PROCESS_INFORMATION DebugInfo;
  52. BOOL Success;
  53. WCHAR CommandBuffer[FM_MAX_RESMON_COMMAND_LINE_SIZE];
  54. PWCHAR resmonCmdLine = CommandBuffer;
  55. TCHAR DebugLine[512];
  56. TCHAR *Binding;
  57. RPC_BINDING_HANDLE RpcBinding;
  58. DWORD Status;
  59. PRESMON Monitor;
  60. DWORD ThreadId;
  61. DWORD Retry = 0;
  62. DWORD creationFlags;
  63. //
  64. // Recover any DLL files left impartially upgraded.
  65. //
  66. FmpRecoverResourceDLLFiles ();
  67. Monitor = LocalAlloc(LMEM_ZEROINIT, sizeof(RESMON));
  68. if (Monitor == NULL) {
  69. ClRtlLogPrint(LOG_UNUSUAL,
  70. "[FM] Failed to allocate a Monitor structure.\n");
  71. return(NULL);
  72. }
  73. Monitor->Shutdown = FALSE;
  74. Monitor->Signature = FMP_RESMON_SIGNATURE;
  75. //
  76. // Create an event and a file mapping object to be passed to
  77. // the Resource Monitor process. The event is for the Resource
  78. // Monitor to signal its initialization is complete. The file
  79. // mapping is for creating the shared memory region between
  80. // the Resource Monitor and the cluster manager.
  81. //
  82. Security.nLength = sizeof(Security);
  83. Security.lpSecurityDescriptor = NULL;
  84. Security.bInheritHandle = TRUE;
  85. Event = CreateEvent(&Security,
  86. TRUE,
  87. FALSE,
  88. NULL);
  89. if (Event == NULL) {
  90. Status = GetLastError();
  91. ClRtlLogPrint(LOG_UNUSUAL,
  92. "[FM] Failed to create a ResMon event, error %1!u!.\n",
  93. Status);
  94. goto create_failed;
  95. }
  96. Security.nLength = sizeof(Security);
  97. Security.lpSecurityDescriptor = NULL;
  98. Security.bInheritHandle = TRUE;
  99. FileMapping = CreateFileMapping(INVALID_HANDLE_VALUE,
  100. &Security,
  101. PAGE_READWRITE,
  102. 0,
  103. sizeof(MONITOR_STATE),
  104. NULL);
  105. if (FileMapping == NULL) {
  106. Status = GetLastError();
  107. ClRtlLogPrint(LOG_UNUSUAL,
  108. "[FM] File Mapping for ResMon failed, error = %1!u!.\n",
  109. Status);
  110. goto create_failed;
  111. }
  112. //
  113. // Create our own (read-only) view of the shared memory section
  114. //
  115. Monitor->SharedState = MapViewOfFile(FileMapping,
  116. FILE_MAP_READ | FILE_MAP_WRITE,
  117. 0,
  118. 0,
  119. 0);
  120. if (Monitor->SharedState == NULL) {
  121. Status = GetLastError();
  122. ClRtlLogPrint(LOG_UNUSUAL,
  123. "[FM] Mapping shared state for ResMon failed, error %1!u!.\n",
  124. Status);
  125. goto create_failed;
  126. }
  127. ZeroMemory( Monitor->SharedState, sizeof(MONITOR_STATE) );
  128. if ( !CsDebugResmon && DebugPrefix != NULL && *DebugPrefix != UNICODE_NULL ) {
  129. Monitor->SharedState->ResmonStop = TRUE;
  130. }
  131. //
  132. // build cmd line for Resource Monitor process
  133. //
  134. wsprintf(resmonCmdLine,
  135. TEXT("resrcmon -e %d -m %d -p %d"),
  136. Event,
  137. FileMapping,
  138. GetCurrentProcessId() );
  139. if ( CsDebugResmon ) {
  140. wcscat( resmonCmdLine, L" -d" );
  141. if ( CsResmonDebugCmd ) {
  142. DWORD cmdLineSize = wcslen( resmonCmdLine );
  143. DWORD debugCmdSize = wcslen( CsResmonDebugCmd );
  144. //
  145. // make sure our static buffer is large enough; 4 includes the
  146. // space, 2 double quotes and. 5 adds in the terminating NULL.
  147. //
  148. if (( cmdLineSize + debugCmdSize ) > ( FM_MAX_RESMON_COMMAND_LINE_SIZE - 4 )) {
  149. resmonCmdLine = LocalAlloc(LMEM_FIXED,
  150. ( cmdLineSize + debugCmdSize + 5 ) * sizeof( WCHAR ));
  151. if ( resmonCmdLine != NULL ) {
  152. wcscpy( resmonCmdLine, CommandBuffer );
  153. wcscat( resmonCmdLine, L" \"" );
  154. wcscat( resmonCmdLine, CsResmonDebugCmd );
  155. wcscat( resmonCmdLine, L"\"" );
  156. } else {
  157. ClRtlLogPrint(LOG_UNUSUAL,
  158. "[FM] Unable to allocate space for debug command line\n");
  159. resmonCmdLine = CommandBuffer;
  160. }
  161. } else {
  162. wcscat( resmonCmdLine, L" \"" );
  163. wcscat( resmonCmdLine, CsResmonDebugCmd );
  164. wcscat( resmonCmdLine, L"\"" );
  165. }
  166. }
  167. }
  168. //
  169. // Attempt to start ResMon process.
  170. //
  171. retry_resmon_start:
  172. ZeroMemory(&StartupInfo, sizeof(StartupInfo));
  173. StartupInfo.cb = sizeof(StartupInfo);
  174. creationFlags = DETACHED_PROCESS; // so ctrl-c won't kill it
  175. Success = CreateProcess(NULL,
  176. resmonCmdLine,
  177. NULL,
  178. NULL,
  179. FALSE, // Inherit handles
  180. creationFlags,
  181. NULL,
  182. NULL,
  183. &StartupInfo,
  184. &ProcessInfo);
  185. if (!Success) {
  186. Status = GetLastError();
  187. ClRtlLogPrint(LOG_UNUSUAL,
  188. "[FM] Failed to create resmon process, error %1!u!.\n",
  189. Status);
  190. CL_LOGFAILURE(Status);
  191. goto create_failed;
  192. } else if ( CsDebugResmon && !CsResmonDebugCmd ) {
  193. ClRtlLogPrint(LOG_CRITICAL,
  194. "[FM] Waiting for debugger to connect to resmon process %1!u!\n",
  195. ProcessInfo.dwProcessId);
  196. }
  197. CloseHandle(ProcessInfo.hThread); // don't need this
  198. //
  199. // Wait for the ResMon process to terminate, or for it to signal
  200. // its startup event.
  201. //
  202. WaitArray[0] = Event;
  203. WaitArray[1] = ProcessInfo.hProcess;
  204. Status = WaitForMultipleObjects(2,
  205. WaitArray,
  206. FALSE,
  207. INFINITE);
  208. if (Status == WAIT_FAILED) {
  209. Status = GetLastError();
  210. ClRtlLogPrint(LOG_UNUSUAL,
  211. "[FM] Wait for ResMon to start failed, error %1!u!.\n",
  212. Status);
  213. goto create_failed;
  214. }
  215. if (Status == ( WAIT_OBJECT_0 + 1 )) {
  216. if ( ++Retry > 1 ) {
  217. //
  218. // The resource monitor terminated prematurely.
  219. //
  220. GetExitCodeProcess(ProcessInfo.hProcess, &Status);
  221. ClRtlLogPrint(LOG_UNUSUAL,
  222. "[FM] ResMon terminated prematurely, error %1!u!.\n",
  223. Status);
  224. goto create_failed;
  225. } else {
  226. goto retry_resmon_start;
  227. }
  228. } else {
  229. //
  230. // The resource monitor has successfully initialized
  231. //
  232. CL_ASSERT(Status == 0);
  233. Monitor->Process = ProcessInfo.hProcess;
  234. //
  235. // invoke the DebugPrefix process only if we're not already debugging
  236. // the resmon process
  237. //
  238. if ( CsDebugResmon && DebugPrefix && *DebugPrefix != UNICODE_NULL ) {
  239. ClRtlLogPrint(LOG_UNUSUAL,
  240. "[FM] -debugresmon overrides DebugPrefix property\n");
  241. }
  242. if ( !CsDebugResmon && ( DebugPrefix != NULL ) && ( *DebugPrefix != UNICODE_NULL )) {
  243. wsprintf(DebugLine, TEXT("%ws -p %d"), DebugPrefix, ProcessInfo.dwProcessId);
  244. ZeroMemory(&StartupInfo, sizeof(StartupInfo));
  245. StartupInfo.cb = sizeof(StartupInfo);
  246. StartupInfo.lpDesktop = TEXT("WinSta0\\Default");
  247. Success = CreateProcess(NULL,
  248. DebugLine,
  249. NULL,
  250. NULL,
  251. FALSE, // Inherit handles
  252. CREATE_NEW_CONSOLE,
  253. NULL,
  254. NULL,
  255. &StartupInfo,
  256. &DebugInfo);
  257. Monitor->SharedState->ResmonStop = FALSE;
  258. if ( !Success ) {
  259. Status = GetLastError();
  260. ClRtlLogPrint(LOG_UNUSUAL,
  261. "[FM] ResMon debug start failed, error %1!u!.\n",
  262. Status);
  263. } else {
  264. CloseHandle(DebugInfo.hThread); // don't need this
  265. CloseHandle(DebugInfo.hProcess); // don't need this
  266. }
  267. }
  268. }
  269. CloseHandle(Event);
  270. CloseHandle(FileMapping);
  271. Event = NULL;
  272. FileMapping = NULL;
  273. //
  274. // Initiate RPC with resource monitor process
  275. //
  276. wsprintf(resmonCmdLine, TEXT("resrcmon%d"), ProcessInfo.dwProcessId);
  277. Status = RpcStringBindingCompose(TEXT("e76ea56d-453f-11cf-bfec-08002be23f2f"),
  278. TEXT("ncalrpc"),
  279. NULL,
  280. resmonCmdLine,
  281. NULL,
  282. &Binding);
  283. if (Status != RPC_S_OK) {
  284. ClRtlLogPrint(LOG_UNUSUAL,
  285. "[FM] ResMon RPC binding compose failed, error %1!u!.\n",
  286. Status);
  287. goto create_failed;
  288. }
  289. Status = RpcBindingFromStringBinding(Binding, &Monitor->Binding);
  290. if (Status != RPC_S_OK) {
  291. ClRtlLogPrint(LOG_UNUSUAL,
  292. "[FM] ResMon RPC binding creation failed, error %1!u!.\n",
  293. Status);
  294. goto create_failed;
  295. }
  296. RpcStringFree(&Binding);
  297. //
  298. // Start notification thread.
  299. //
  300. Monitor->NotifyThread = CreateThread(NULL,
  301. 0,
  302. FmpRmNotifyThread,
  303. Monitor,
  304. 0,
  305. &ThreadId);
  306. if (Monitor->NotifyThread == NULL) {
  307. Status = GetLastError();
  308. ClRtlLogPrint(LOG_UNUSUAL,
  309. "[FM] Creation of notify thread for ResMon failed, error %1!u!.\n",
  310. Status);
  311. goto create_failed;
  312. }
  313. Monitor->RefCount = 2;
  314. if ( resmonCmdLine != CommandBuffer ) {
  315. LocalFree( resmonCmdLine );
  316. }
  317. return(Monitor);
  318. create_failed:
  319. if ( Monitor->NotifyThread != NULL ) {
  320. CloseHandle( Monitor->NotifyThread );
  321. }
  322. LocalFree( Monitor );
  323. if ( FileMapping != NULL ) {
  324. CloseHandle( FileMapping );
  325. }
  326. if ( Event != NULL ) {
  327. CloseHandle( Event );
  328. }
  329. if ( resmonCmdLine != CommandBuffer ) {
  330. LocalFree( resmonCmdLine );
  331. }
  332. SetLastError(Status);
  333. return(NULL);
  334. } // FmpCreateMonitor
  335. VOID
  336. FmpShutdownMonitor(
  337. IN PRESMON Monitor
  338. )
  339. /*++
  340. Routine Description:
  341. Performs a clean shutdown of the Resource Monitor process.
  342. Note that this does not make any changes to the state of
  343. any resources being monitored by the Resource Monitor, it
  344. only asks the Resource Monitor to clean up and terminate.
  345. Arguments:
  346. None.
  347. Return Value:
  348. None.
  349. --*/
  350. {
  351. DWORD Status;
  352. CL_ASSERT(Monitor != NULL);
  353. FmpAcquireMonitorLock();
  354. if ( Monitor->Shutdown ) {
  355. return;
  356. }
  357. Monitor->Shutdown = TRUE;
  358. FmpReleaseMonitorLock();
  359. //
  360. // RPC to the server process to tell it to shutdown.
  361. //
  362. RmShutdownProcess(Monitor->Binding);
  363. //
  364. // Wait for the process to exit so that the monitor fully cleans up the resources if necessary.
  365. //
  366. if ( Monitor->Process ) {
  367. Status = WaitForSingleObject(Monitor->Process, FM_MONITOR_SHUTDOWN_TIMEOUT);
  368. if ( Status != WAIT_OBJECT_0 ) {
  369. ClRtlLogPrint(LOG_ERROR,"[FM] Failed to shutdown resource monitor.\n");
  370. TerminateProcess( Monitor->Process, 1 );
  371. }
  372. CloseHandle(Monitor->Process);
  373. Monitor->Process = NULL;
  374. }
  375. RpcBindingFree(&Monitor->Binding);
  376. //
  377. // Wait for the notify thread to exit, but just a little bit.
  378. //
  379. if ( Monitor->NotifyThread ) {
  380. Status = WaitForSingleObject(Monitor->NotifyThread,
  381. FM_RPC_TIMEOUT*2); // Increased timeout to try to ensure RPC completes
  382. if ( Status != WAIT_OBJECT_0 ) {
  383. ; // call removed: Terminate Thread( Monitor->NotifyThread, 1 );
  384. // Bad call to make since terminating threads on NT can cause real problems.
  385. }
  386. CloseHandle(Monitor->NotifyThread);
  387. Monitor->NotifyThread = NULL;
  388. }
  389. //
  390. // Clean up shared memory mapping
  391. //
  392. UnmapViewOfFile(Monitor->SharedState);
  393. if ( InterlockedDecrement(&Monitor->RefCount) == 0 ) {
  394. PVOID caller, callersCaller;
  395. RtlGetCallersAddress(
  396. &caller,
  397. &callersCaller );
  398. ClRtlLogPrint(LOG_NOISE,
  399. "[FMY] Freeing monitor structure (1) %1!lx!, caller %2!lx!, callerscaller %3!lx!\n",
  400. Monitor, caller, callersCaller );
  401. LocalFree(Monitor);
  402. }
  403. return;
  404. } // FmpShutdownMonitor
  405. DWORD
  406. FmpRmNotifyThread(
  407. IN LPVOID lpThreadParameter
  408. )
  409. /*++
  410. Routine Description:
  411. This is the thread that receives resource monitor notifications.
  412. Arguments:
  413. lpThreadParameter - Pointer to resource monitor structure.
  414. Return Value:
  415. None.
  416. --*/
  417. {
  418. PRESMON Monitor;
  419. PRESMON NewMonitor;
  420. RM_NOTIFY_KEY NotifyKey;
  421. DWORD NotifyEvent;
  422. DWORD Status;
  423. CLUSTER_RESOURCE_STATE CurrentState;
  424. BOOL Success;
  425. Monitor = lpThreadParameter;
  426. //
  427. // Loop forever picking up resource monitor notifications.
  428. // When the resource monitor returns FALSE, it indicates
  429. // that shutdown is occurring.
  430. //
  431. do {
  432. try {
  433. Success = RmNotifyChanges(Monitor->Binding,
  434. &NotifyKey,
  435. &NotifyEvent,
  436. (LPDWORD)&CurrentState);
  437. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  438. //
  439. // RPC communications failure, treat it as a shutdown.
  440. //
  441. Status = GetExceptionCode();
  442. ClRtlLogPrint(LOG_NOISE,
  443. "[FM] NotifyChanges got an RPC failure, %1!u!.\n",
  444. Status);
  445. Success = FALSE;
  446. }
  447. if (Success) {
  448. Success = FmpPostNotification(NotifyKey, NotifyEvent, CurrentState);
  449. } else {
  450. //
  451. // If we are shutting down... then this is okay.
  452. //
  453. if ( FmpShutdown ||
  454. Monitor->Shutdown ) {
  455. break;
  456. }
  457. //
  458. // We will try to start a new resource monitor. If this fails,
  459. // then shutdown the cluster service.
  460. //
  461. ClRtlLogPrint(LOG_ERROR,
  462. "[FM] Resource monitor terminated!\n");
  463. ClRtlLogPrint(LOG_ERROR,
  464. "[FM] Last resource monitor state: %1!u!, resource %2!u!.\n",
  465. Monitor->SharedState->State,
  466. Monitor->SharedState->ActiveResource);
  467. CsLogEvent(LOG_UNUSUAL, FM_EVENT_RESMON_DIED);
  468. //
  469. // Use a worker thread to start new resource monitor(s).
  470. //
  471. if (FmpCreateMonitorRestartThread(Monitor))
  472. CsInconsistencyHalt(ERROR_INVALID_STATE);
  473. }
  474. } while ( Success );
  475. ClRtlLogPrint(LOG_NOISE,"[FM] RmNotifyChanges returned\n");
  476. if ( InterlockedDecrement( &Monitor->RefCount ) == 0 ) {
  477. ClRtlLogPrint(LOG_NOISE,
  478. "[FMY] Freeing monitor structure (2) %1!lx!\n",
  479. Monitor );
  480. LocalFree( Monitor );
  481. }
  482. return(0);
  483. } // FmpRmNotifyThread
  484. BOOL
  485. FmpFindMonitorResource(
  486. IN PRESMON OldMonitor,
  487. IN PMONITOR_RESOURCE_ENUM *PtrEnumResource,
  488. IN PFM_RESOURCE Resource,
  489. IN LPCWSTR Name
  490. )
  491. /*++
  492. Routine Description:
  493. Finds all resources that were managed by the old resource monitor and
  494. starts them under the new resource monitor. Or adds them to the list
  495. of resources to be restarted.
  496. Arguments:
  497. OldMonitor - pointer to the old resource monitor structure.
  498. PtrEnumResource - pointer to a pointer to a resource enum structure.
  499. Resource - the current resource being enumerated.
  500. Name - name of the current resource.
  501. Return Value:
  502. TRUE - if we should continue enumeration.
  503. FALSE - otherwise.
  504. Notes:
  505. Nothing in the old resource monitor structure should be used.
  506. --*/
  507. {
  508. DWORD status;
  509. BOOL returnNow = FALSE;
  510. PMONITOR_RESOURCE_ENUM enumResource = *PtrEnumResource;
  511. PMONITOR_RESOURCE_ENUM newEnumResource;
  512. DWORD dwOldBlockingFlag;
  513. if ( Resource->Monitor == OldMonitor ) {
  514. if ( enumResource->fCreateMonitors == FALSE ) goto skip_monitor_creation;
  515. //
  516. // If this is not the quorum resource and it is blocking the
  517. // quorum resource, then fix it up now.
  518. //
  519. dwOldBlockingFlag = InterlockedExchange( &Resource->BlockingQuorum, 0 );
  520. if ( dwOldBlockingFlag ) {
  521. ClRtlLogPrint(LOG_NOISE,
  522. "[FM] RestartMonitor: call InterlockedDecrement on gdwQuoBlockingResources, Resource %1!ws!\n",
  523. OmObjectId(Resource));
  524. InterlockedDecrement(&gdwQuoBlockingResources);
  525. }
  526. //
  527. // If the resource had been previously create in Resmon, then recreate
  528. // it with a new resource monitor.
  529. //
  530. if ( Resource->Flags & RESOURCE_CREATED ) {
  531. // Note - this will create a new resource monitor as needed.
  532. status = FmpRmCreateResource(Resource);
  533. if ( status != ERROR_SUCCESS ) {
  534. ClRtlLogPrint(LOG_ERROR,"[FM] Failed to restart resource %1!ws!. Error %2!u!.\n",
  535. Name, status );
  536. return(TRUE);
  537. }
  538. } else {
  539. return(TRUE);
  540. }
  541. } else {
  542. return(TRUE);
  543. }
  544. skip_monitor_creation:
  545. //
  546. // If we successfully recreated a resource monitor, then add it to the
  547. // list of resources to indicate failure.
  548. //
  549. if ( enumResource->CurrentIndex >= enumResource->EntryCount ) {
  550. newEnumResource = LocalReAlloc( enumResource,
  551. MONITOR_RESOURCE_SIZE( enumResource->EntryCount +
  552. ENUM_GROW_SIZE ),
  553. LMEM_MOVEABLE );
  554. if ( newEnumResource == NULL ) {
  555. ClRtlLogPrint(LOG_ERROR,
  556. "[FM] Failed re-allocating resource enum to restart resource monitor!\n");
  557. return(FALSE);
  558. }
  559. enumResource = newEnumResource;
  560. enumResource->EntryCount += ENUM_GROW_SIZE;
  561. *PtrEnumResource = newEnumResource;
  562. }
  563. enumResource->Entry[enumResource->CurrentIndex] = Resource;
  564. ++enumResource->CurrentIndex;
  565. return(TRUE);
  566. } // FmpFindMonitorResource
  567. BOOL
  568. FmpRestartMonitor(
  569. PRESMON OldMonitor
  570. )
  571. /*++
  572. Routine Description:
  573. Creates a new monitor process and initiates the RPC communication
  574. with it. Restarts all resources that were attached to the old monitor
  575. process.
  576. Arguments:
  577. OldMonitor - pointer to the old resource monitor structure.
  578. Return Value:
  579. TRUE if successful.
  580. FALSE otherwise.
  581. Notes:
  582. The old monitor structure is deallocated when done.
  583. --*/
  584. {
  585. DWORD enumSize;
  586. DWORD i;
  587. DWORD status;
  588. PMONITOR_RESOURCE_ENUM enumResource;
  589. PFM_RESOURCE resource;
  590. DWORD dwOldBlockingFlag;
  591. FmpAcquireMonitorLock();
  592. if ( FmpShutdown ) {
  593. FmpReleaseMonitorLock();
  594. return(TRUE);
  595. }
  596. enumSize = MONITOR_RESOURCE_SIZE( ENUM_GROW_SIZE );
  597. enumResource = LocalAlloc( LMEM_ZEROINIT, enumSize );
  598. if ( enumResource == NULL ) {
  599. ClRtlLogPrint(LOG_ERROR,
  600. "[FM] Failed allocating resource enum to restart resource monitor!\n");
  601. FmpReleaseMonitorLock();
  602. CsInconsistencyHalt(ERROR_NOT_ENOUGH_MEMORY);
  603. return(FALSE);
  604. }
  605. enumResource->EntryCount = ENUM_GROW_SIZE;
  606. enumResource->CurrentIndex = 0;
  607. enumResource->fCreateMonitors = FALSE;
  608. //
  609. // Enumerate all resources controlled by the old resource monitor so that we can invoke the
  610. // handlers registered for those resources. Both preoffline and postoffline handlers are
  611. // invoked prior to monitor shutdown so that the assumption made about underlying resource
  612. // access (such as quorum disk access) remain valid in a graceful monitor shutdown case.
  613. // We would issue a specific shutdown command in the case of a graceful shutdown occurring
  614. // as a part of resource DLL upgrade.
  615. //
  616. OmEnumObjects( ObjectTypeResource,
  617. (OM_ENUM_OBJECT_ROUTINE)FmpFindMonitorResource,
  618. OldMonitor,
  619. &enumResource );
  620. for ( i = 0; i < enumResource->CurrentIndex; i++ ) {
  621. resource = enumResource->Entry[i];
  622. if ( ( resource->PersistentState == ClusterResourceOnline ) &&
  623. ( resource->Group->OwnerNode == NmLocalNode ) ) {
  624. OmNotifyCb( resource, NOTIFY_RESOURCE_PREOFFLINE );
  625. OmNotifyCb( resource, NOTIFY_RESOURCE_POSTOFFLINE );
  626. }
  627. }
  628. FmpShutdownMonitor( OldMonitor );
  629. if ( FmpDefaultMonitor == OldMonitor ) {
  630. FmpDefaultMonitor = FmpCreateMonitor(NULL, FALSE);
  631. if ( FmpDefaultMonitor == NULL ) {
  632. LocalFree( enumResource );
  633. FmpReleaseMonitorLock();
  634. CsInconsistencyHalt(GetLastError());
  635. return(FALSE);
  636. }
  637. }
  638. enumResource->CurrentIndex = 0;
  639. enumResource->fCreateMonitors = TRUE;
  640. //
  641. // Enumerate all resources controlled by the old resource monitor,
  642. // and connect them into the new resource monitor.
  643. //
  644. OmEnumObjects( ObjectTypeResource,
  645. (OM_ENUM_OBJECT_ROUTINE)FmpFindMonitorResource,
  646. OldMonitor,
  647. &enumResource );
  648. //
  649. // First set each resource in the list to the Offline state.
  650. //
  651. for ( i = 0; i < enumResource->CurrentIndex; i++ ) {
  652. resource = enumResource->Entry[i];
  653. //
  654. // If the resource is owned by the local system, then do it.
  655. //
  656. if ( resource->Group->OwnerNode == NmLocalNode ) {
  657. resource->State = ClusterResourceOffline;
  658. //
  659. // If this is not the quorum resource and it is blocking the
  660. // quorum resource, then fix it up now.
  661. //
  662. dwOldBlockingFlag = InterlockedExchange( &resource->BlockingQuorum, 0 );
  663. if ( dwOldBlockingFlag ) {
  664. ClRtlLogPrint(LOG_NOISE,
  665. "[FM] RestartMonitor: call InterlockedDecrement on gdwQuoBlockingResources, Resource %1!ws!\n",
  666. OmObjectId(resource));
  667. InterlockedDecrement(&gdwQuoBlockingResources);
  668. }
  669. }
  670. }
  671. //
  672. // Find the quorum resource - if present bring online first.
  673. //
  674. for ( i = 0; i < enumResource->CurrentIndex; i++ ) {
  675. resource = enumResource->Entry[i];
  676. //
  677. // If the resource is owned by the local system and is the
  678. // quorum resource, then do it.
  679. //
  680. if ( (resource->Group->OwnerNode == NmLocalNode) &&
  681. resource->QuorumResource ) {
  682. FmpRestartResourceTree( resource );
  683. }
  684. }
  685. //
  686. // Now restart the rest of the resources in the list.
  687. //
  688. for ( i = 0; i < enumResource->CurrentIndex; i++ ) {
  689. resource = enumResource->Entry[i];
  690. //
  691. // If the resource is owned by the local system, then do it.
  692. //
  693. if ( (resource->Group->OwnerNode == NmLocalNode) &&
  694. !resource->QuorumResource ) {
  695. FmpRestartResourceTree( resource );
  696. }
  697. }
  698. FmpReleaseMonitorLock();
  699. //
  700. // Don't delete the old monitor block until we've reset the resources
  701. // to point to the new resource monitor block.
  702. // Better to get an RPC failure, rather than some form of ACCVIO.
  703. //
  704. LocalFree( enumResource );
  705. if ( InterlockedDecrement( &OldMonitor->RefCount ) == 0 ) {
  706. #if 0
  707. PVOID caller, callersCaller;
  708. RtlGetCallersAddress(
  709. &caller,
  710. &callersCaller );
  711. ClRtlLogPrint(LOG_NOISE,
  712. "[FMY] Freeing monitor structure (3) %1!lx!, caller %2!lx!, callerscaller %3!lx!\n",
  713. OldMonitor, caller, callersCaller );
  714. #endif
  715. LocalFree( OldMonitor );
  716. }
  717. return(TRUE);
  718. } // FmpRestartMonitor
  719. /****
  720. @func DWORD | FmpCreateMonitorRestartThread| This creates a new
  721. thread to restart a monitor.
  722. @parm IN PRESMON | pMonitor| Pointer to the resource monitor that n
  723. needs to be restarted.
  724. @comm A monitor needs to be started in a separate thread as it
  725. decrements the gquoblockingrescount for resources therein.
  726. This cannot be done by fmpworkerthread because that causes
  727. deadlocks if other items, like failure handling, being
  728. processed by the fmpworkerthread are waiting for work that
  729. will done by the items, like restart monitor, still in queue.
  730. @rdesc Returns a result code. ERROR_SUCCESS on success.
  731. ****/
  732. DWORD FmpCreateMonitorRestartThread(
  733. IN PRESMON pMonitor
  734. )
  735. {
  736. HANDLE hThread = NULL;
  737. DWORD dwThreadId;
  738. DWORD dwStatus = ERROR_SUCCESS;
  739. ClRtlLogPrint(LOG_NOISE,
  740. "[FM] FmpCreateMonitorRestartThread: Entry\r\n");
  741. //reference the resource
  742. //the thread will dereference it
  743. InterlockedIncrement( &pMonitor->RefCount );
  744. hThread = CreateThread( NULL, 0, FmpRestartMonitor,
  745. pMonitor, 0, &dwThreadId );
  746. if ( hThread == NULL )
  747. {
  748. dwStatus = GetLastError();
  749. CL_UNEXPECTED_ERROR(dwStatus);
  750. goto FnExit;
  751. }
  752. FnExit:
  753. //do general cleanup
  754. if (hThread)
  755. CloseHandle(hThread);
  756. ClRtlLogPrint(LOG_NOISE,
  757. "[FM] FmpCreateMonitorRestartThread: Exit, status %1!u!\r\n",
  758. dwStatus);
  759. return(dwStatus);
  760. }