Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2669 lines
69 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. fminit.c
  5. Abstract:
  6. Initialization for the Failover Manager component of the
  7. NT Cluster Service
  8. Author:
  9. John Vert (jvert) 7-Feb-1996
  10. Rod Gamache (rodga) 14-Mar-1996
  11. Revision History:
  12. --*/
  13. #include "..\nm\nmp.h" /* For NmpEnumNodeDefinitions */
  14. #ifdef LOG_CURRENT_MODULE
  15. #undef LOG_CURRENT_MODULE
  16. #endif
  17. #include "fmp.h"
  18. #define LOG_MODULE FMINIT
  19. // The order in which the locks should be acquired is
  20. // 1) gQuoChangeLock
  21. // 2) GroupLock
  22. // 3) gQuoLock
  23. // 4) GumLocks
  24. // 4*) gResTypeLock - this lock is acquired inside gum updates
  25. // 5) gLockDmpRoot
  26. // 6) pLog->Lock
  27. //A lock for synchronizing online/offline with respect to the quorum
  28. //resource
  29. //This lock is held in exclusive mode when bringing the quorum resource
  30. //online/offline and in shared mode when other resources are brought online
  31. //offline
  32. #if NO_SHARED_LOCKS
  33. CRITICAL_SECTION gQuoLock;
  34. #else
  35. RTL_RESOURCE gQuoLock;
  36. #endif
  37. //A lock for synchronizing changes to the resource->quorumresource field
  38. //and allowing changes to the quorum resource's group in form phase1
  39. // and phase 2 of fm.
  40. #if NO_SHARED_LOCKS
  41. CRITICAL_SECTION gQuoChangeLock;
  42. #else
  43. RTL_RESOURCE gQuoChangeLock;
  44. #endif
  45. //A lock for synchronizing changes to the resource type field entries.
  46. //shared by all resource types.
  47. #if NO_SHARED_LOCKS
  48. CRITICAL_SECTION gResTypeLock;
  49. #else
  50. RTL_RESOURCE gResTypeLock;
  51. #endif
  52. GUM_DISPATCH_ENTRY FmGumDispatchTable[] = {
  53. {2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateChangeResourceName},
  54. {2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateChangeGroupName},
  55. {1, FmpUpdateDeleteResource},
  56. {1, FmpUpdateDeleteGroup},
  57. {2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateAddDependency},
  58. {2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateRemoveDependency},
  59. {1, FmpUpdateChangeClusterName},
  60. {3, (PGUM_DISPATCH_ROUTINE1)FmpUpdateChangeQuorumResource},
  61. {2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateResourceState},
  62. {3, (PGUM_DISPATCH_ROUTINE1)FmpUpdateGroupState},
  63. {4, (PGUM_DISPATCH_ROUTINE1)EpUpdateClusWidePostEvent},
  64. {2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateGroupNode},
  65. {3, (PGUM_DISPATCH_ROUTINE1)FmpUpdatePossibleNodeForResType},
  66. {2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateGroupIntendedOwner},
  67. {1, (PGUM_DISPATCH_ROUTINE1)FmpUpdateAssignOwnerToGroups},
  68. {1, (PGUM_DISPATCH_ROUTINE1)FmpUpdateApproveJoin},
  69. {2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateCompleteGroupMove},
  70. {2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateCheckAndSetGroupOwner},
  71. {2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateUseRandomizedNodeListForGroups}
  72. };
  73. #define WINDOW_TIMEOUT (15*60*1000) // Try every 15 minutes
  74. //
  75. // Global data initialized in this module
  76. //
  77. PRESMON FmpDefaultMonitor = NULL;
  78. DWORD FmpInitialized = FALSE;
  79. DWORD FmpFMOnline = FALSE;
  80. DWORD FmpFMGroupsInited = FALSE;
  81. DWORD FmpFMFormPhaseProcessing = FALSE; //this is set to true when form new cluster phase processing starts
  82. BOOL FmpShutdown = FALSE;
  83. BOOL FmpMajorEvent = FALSE; // Signals a major event while joining
  84. DWORD FmpQuorumOnLine = FALSE;
  85. HANDLE FmpShutdownEvent;
  86. HANDLE FmpTimerThread;
  87. HANDLE ghQuoOnlineEvent = NULL; // the event that is signalled when the quorum res is online
  88. DWORD gdwQuoBlockingResources = 0; // the number of resources in pending stated which prevent the quorum res state change
  89. PFM_NODE gFmpNodeArray = NULL;
  90. // 185575: remove unique RPC binding handles
  91. //CRITICAL_SECTION FmpBindingLock;
  92. //
  93. // Local functions
  94. //
  95. BOOL
  96. FmpEnumNodes(
  97. OUT DWORD *pStatus,
  98. IN PVOID Context2,
  99. IN PNM_NODE Node,
  100. IN LPCWSTR Name
  101. );
  102. DWORD
  103. FmpJoinPendingThread(
  104. IN LPVOID Context
  105. );
  106. DWORD FmpGetJoinApproval();
  107. static
  108. DWORD
  109. FmpBuildForceQuorumInfo(
  110. IN LPCWSTR pszNodesIn,
  111. OUT PCLUS_FORCE_QUORUM_INFO* ppForceQuorumInfo
  112. );
  113. static
  114. void
  115. FmpDeleteForceQuorumInfo(
  116. IN OUT PCLUS_FORCE_QUORUM_INFO* ppForceQuorumInfo
  117. );
  118. DWORD
  119. WINAPI
  120. FmInitialize(
  121. VOID
  122. )
  123. /*++
  124. Routine Description:
  125. Initializes the failover manager
  126. Arguments:
  127. None
  128. Return Value:
  129. ERROR_SUCCESS if successful.
  130. Win32 error code otherwise.
  131. --*/
  132. {
  133. DWORD Status;
  134. OM_OBJECT_TYPE_INITIALIZE ObjectTypeInit;
  135. DWORD NodeId;
  136. CL_ASSERT(!FmpInitialized);
  137. if ( FmpInitialized ) {
  138. return(ERROR_SUCCESS);
  139. }
  140. Status = EpRegisterEventHandler(CLUSTER_EVENT_ALL,FmpEventHandler);
  141. if (Status != ERROR_SUCCESS) {
  142. CsInconsistencyHalt( Status );
  143. }
  144. //register for synchronous node down notifications
  145. Status = EpRegisterSyncEventHandler(CLUSTER_EVENT_NODE_DOWN_EX,
  146. FmpSyncEventHandler);
  147. if (Status != ERROR_SUCCESS){
  148. CsInconsistencyHalt( Status );
  149. }
  150. //
  151. // Initialize Critical Sections.
  152. //
  153. InitializeCriticalSection( &FmpResourceLock );
  154. InitializeCriticalSection( &FmpGroupLock );
  155. InitializeCriticalSection( &FmpMonitorLock );
  156. // 185575: remove unique RPC binding handles
  157. // InitializeCriticalSection( &FmpBindingLock );
  158. // initialize the quorum lock
  159. // This is used to synchronize online/offlines of other resources
  160. // with respect to the quorum resource
  161. INITIALIZE_LOCK(gQuoLock);
  162. //this is used to check/change the resource->quorum value
  163. //This synchronization is needed between the resource transition
  164. //processing that needs to do special processing for quorum
  165. //resource and the gum update handler to change the quorum resource
  166. INITIALIZE_LOCK(gQuoChangeLock);
  167. //Initialize the restype lock
  168. INITIALIZE_LOCK(gResTypeLock);
  169. // create a unnamed event that is used for waiting for quorum resource
  170. // to go online
  171. // This is a manual reset event and is initialized to unsignalled state.
  172. // When the quorum resource goes to pending state this is manually reset
  173. // to unsignalled state. When the quorum resource goes online it is set
  174. // to signalled state
  175. ghQuoOnlineEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
  176. if (!ghQuoOnlineEvent)
  177. {
  178. CL_UNEXPECTED_ERROR((Status = GetLastError()));
  179. return(Status);
  180. }
  181. gFmpNodeArray = (PFM_NODE)LocalAlloc(LMEM_FIXED,
  182. (sizeof(FM_NODE) * (NmGetMaxNodeId() + 1))
  183. );
  184. if (gFmpNodeArray == NULL) {
  185. Status = ERROR_NOT_ENOUGH_MEMORY;
  186. CL_UNEXPECTED_ERROR(Status);
  187. CsInconsistencyHalt(Status);
  188. return(Status);
  189. }
  190. //initialize it and the RPC binding table
  191. for (NodeId = ClusterMinNodeId; NodeId <= NmMaxNodeId; ++NodeId)
  192. {
  193. FmpRpcBindings[NodeId] = NULL;
  194. FmpRpcQuorumBindings[NodeId] = NULL;
  195. gFmpNodeArray[NodeId].dwNodeDownProcessingInProgress = 0;
  196. }
  197. //
  198. // Initialize the FM work queue.
  199. //
  200. Status = ClRtlInitializeQueue( &FmpWorkQueue );
  201. if (Status != ERROR_SUCCESS) {
  202. CsInconsistencyHalt(Status);
  203. return(Status);
  204. }
  205. //
  206. // Create a pending event notification.
  207. //
  208. FmpShutdownEvent = CreateEvent( NULL, TRUE, FALSE, NULL );
  209. if ( FmpShutdownEvent == NULL ) {
  210. return(GetLastError());
  211. }
  212. //
  213. // Initialize Group Types.
  214. //
  215. ObjectTypeInit.Name = FMP_GROUP_NAME;
  216. ObjectTypeInit.Signature = FMP_GROUP_SIGNATURE;
  217. ObjectTypeInit.ObjectSize = sizeof(FM_GROUP);
  218. ObjectTypeInit.DeleteObjectMethod = FmpGroupLastReference;
  219. Status = OmCreateType( ObjectTypeGroup,
  220. &ObjectTypeInit );
  221. if ( Status != ERROR_SUCCESS ) {
  222. CsInconsistencyHalt(Status);
  223. return(Status);
  224. }
  225. //
  226. // Initialize Resource Types.
  227. //
  228. ObjectTypeInit.Name = FMP_RESOURCE_NAME;
  229. ObjectTypeInit.Signature = FMP_RESOURCE_SIGNATURE;
  230. ObjectTypeInit.ObjectSize = sizeof(FM_RESOURCE);
  231. ObjectTypeInit.DeleteObjectMethod = FmpResourceLastReference;
  232. Status = OmCreateType( ObjectTypeResource,
  233. &ObjectTypeInit );
  234. if ( Status != ERROR_SUCCESS ) {
  235. CsInconsistencyHalt(Status);
  236. return(Status);
  237. }
  238. //
  239. // Initialize ResType Types.
  240. //
  241. ObjectTypeInit.Name = FMP_RESOURCE_TYPE_NAME;
  242. ObjectTypeInit.Signature = FMP_RESOURCE_TYPE_SIGNATURE;
  243. ObjectTypeInit.ObjectSize = sizeof(FM_RESTYPE);
  244. ObjectTypeInit.DeleteObjectMethod = FmpResTypeLastRef;
  245. Status = OmCreateType( ObjectTypeResType,
  246. &ObjectTypeInit );
  247. if ( Status != ERROR_SUCCESS ) {
  248. CsInconsistencyHalt(Status);
  249. return(Status);
  250. }
  251. //
  252. // Initialize the Notify thread.
  253. //
  254. Status = FmpInitializeNotify();
  255. if (Status != ERROR_SUCCESS) {
  256. CsInconsistencyHalt(Status);
  257. return(Status);
  258. }
  259. //
  260. // Initialize the FM worker thread.
  261. //
  262. Status = FmpStartWorkerThread();
  263. if ( Status != ERROR_SUCCESS ) {
  264. CsInconsistencyHalt(Status);
  265. return(Status);
  266. }
  267. FmpInitialized = TRUE;
  268. return(ERROR_SUCCESS);
  269. } // FmInitialize
  270. BOOL
  271. FmpEnumGroupsInit(
  272. IN PVOID Context1,
  273. IN PVOID Context2,
  274. IN PFM_GROUP Group,
  275. IN LPCWSTR Name
  276. )
  277. /*++
  278. Routine Description:
  279. Group enumeration callback for FM join. This phase completes initialization
  280. of every group.
  281. Arguments:
  282. Context1 - Not used.
  283. Context2 - Not used.
  284. Group - Supplies the group.
  285. Name - Supplies the group's name.
  286. Return Value:
  287. TRUE - to indicate that the enumeration should continue.
  288. FALSE - to indicate that the enumeration should not continue.
  289. --*/
  290. {
  291. //
  292. // Finish initializing the group.
  293. //
  294. FmpCompleteInitGroup( Group );
  295. return(TRUE);
  296. } // FmpEnumGroupsInit
  297. BOOL
  298. FmpEnumFixupResources(
  299. IN PCLUSTERVERSIONINFO pClusterVersionInfo,
  300. IN PVOID Context2,
  301. IN PFM_GROUP Group,
  302. IN LPCWSTR Name
  303. )
  304. /*++
  305. Routine Description:
  306. Group enumeration callback for FM join. This phase completes initialization
  307. of every group.
  308. Arguments:
  309. Context1 - Not used.
  310. Context2 - Not used.
  311. Group - Supplies the group.
  312. Name - Supplies the group's name.
  313. Return Value:
  314. TRUE - to indicate that the enumeration should continue.
  315. FALSE - to indicate that the enumeration should not continue.
  316. --*/
  317. {
  318. PLIST_ENTRY listEntry;
  319. PFM_RESOURCE Resource;
  320. FmpAcquireLocalGroupLock( Group );
  321. //
  322. // For each resource in the Group, make sure it gets an
  323. // opportunity to do fixups.
  324. //
  325. for ( listEntry = Group->Contains.Flink;
  326. listEntry != &(Group->Contains);
  327. listEntry = listEntry->Flink ) {
  328. Resource = CONTAINING_RECORD(listEntry, FM_RESOURCE, ContainsLinkage);
  329. FmpRmResourceControl( Resource,
  330. CLUSCTL_RESOURCE_CLUSTER_VERSION_CHANGED,
  331. (LPBYTE)pClusterVersionInfo,
  332. pClusterVersionInfo->dwVersionInfoSize,
  333. NULL,
  334. 0,
  335. NULL,
  336. NULL
  337. );
  338. }
  339. FmpReleaseLocalGroupLock( Group);
  340. return(TRUE);
  341. } // FmpEnumFixupResources
  342. BOOL
  343. FmpEnumJoinGroupsMove(
  344. IN LPBOOL Deferred,
  345. IN PVOID Context2,
  346. IN PFM_GROUP Group,
  347. IN LPCWSTR Name
  348. )
  349. /*++
  350. Routine Description:
  351. Group enumeration callback for FM join. Queries the preferred owners
  352. groups and moves those that belong on this system and that can move.
  353. Arguments:
  354. Deferred - TRUE if a move was deferred because of Failback Window. Must
  355. be FALSE on first call.
  356. Context2 - Not used.
  357. Group - Supplies the group.
  358. Name - Supplies the group's name.
  359. Return Value:
  360. TRUE - to indicate that the enumeration should continue.
  361. FALSE - to indicate that the enumeration should not continue.
  362. --*/
  363. {
  364. PLIST_ENTRY listEntry;
  365. PPREFERRED_ENTRY preferredEntry;
  366. SYSTEMTIME localTime;
  367. BOOL failBackWindowOkay = FALSE;
  368. DWORD threadId;
  369. DWORD status;
  370. GetLocalTime( &localTime );
  371. FmpAcquireLocalGroupLock( Group );
  372. //
  373. // Adjust ending time if needed.
  374. //
  375. if ( Group->FailbackWindowStart > Group->FailbackWindowEnd ) {
  376. Group->FailbackWindowEnd += 24;
  377. if ( Group->FailbackWindowStart > localTime.wHour ) {
  378. localTime.wHour += 24;
  379. }
  380. }
  381. //
  382. // If the Failback start and end times are valid, then check if we need
  383. // to start a timer thread to move the group at the appropriate time.
  384. //
  385. if ( (Group->FailbackType == GroupFailback) &&
  386. ((Group->FailbackWindowStart != Group->FailbackWindowEnd) &&
  387. (localTime.wHour >= Group->FailbackWindowStart) &&
  388. (localTime.wHour < Group->FailbackWindowEnd)) ||
  389. (Group->FailbackWindowStart == Group->FailbackWindowEnd) ) {
  390. failBackWindowOkay = TRUE;
  391. }
  392. //
  393. // Check if we need to move the group.
  394. //
  395. if ( !IsListEmpty( &Group->PreferredOwners ) ) {
  396. listEntry = Group->PreferredOwners.Flink;
  397. preferredEntry = CONTAINING_RECORD( listEntry,
  398. PREFERRED_ENTRY,
  399. PreferredLinkage );
  400. //
  401. // Move group if:
  402. // 0. Remote system is paused, and we're not OR
  403. // 1. Our system is in the preferred list and the owner node is not OR
  404. // 2. Group is Offline or Group is Online/PartialOnline and it can
  405. // failback AND
  406. // 3. Group's preferred list is ordered and our system is higher
  407. //
  408. if ( Group->OwnerNode == NULL ) {
  409. // Should we shoot ourselves because we got an incomplete snapshot
  410. // of the joint attempt.
  411. CsInconsistencyHalt(ERROR_CLUSTER_JOIN_ABORTED);
  412. } else if ( Group->OwnerNode != NmLocalNode) {
  413. if (((NmGetNodeState(NmLocalNode) != ClusterNodePaused) &&
  414. (NmGetNodeState(Group->OwnerNode) == ClusterNodePaused)) ||
  415. (FmpInPreferredList(Group, NmLocalNode, FALSE, NULL) &&
  416. !FmpInPreferredList( Group, Group->OwnerNode, FALSE, NULL)) ||
  417. ((((Group->State == ClusterGroupOnline) ||
  418. (Group->State == ClusterGroupPartialOnline)) &&
  419. (Group->FailbackType == FailbackOkay) ||
  420. (Group->State == ClusterGroupOffline)) &&
  421. ((Group->OrderedOwners) &&
  422. (FmpHigherInPreferredList(Group, NmLocalNode, Group->OwnerNode)))) ) {
  423. if ( failBackWindowOkay ) {
  424. PNM_NODE OwnerNode = Group->OwnerNode;
  425. status = FmcMoveGroupRequest( Group, NmLocalNode );
  426. if ( ( status == ERROR_SUCCESS ) || ( status == ERROR_IO_PENDING ) ) {
  427. //
  428. // Chittur Subbaraman (chitturs) - 7/31/2000
  429. //
  430. // Log an event indicating an impending failback.
  431. //
  432. CsLogEvent3( LOG_NOISE,
  433. FM_EVENT_GROUP_FAILBACK,
  434. OmObjectName(Group),
  435. OmObjectName(OwnerNode),
  436. OmObjectName(NmLocalNode) );
  437. }
  438. FmpAcquireLocalGroupLock( Group );
  439. } else {
  440. //
  441. // Start timer thread if not already running. If it fails,
  442. // what possibly can we do?
  443. //
  444. if ( FmpTimerThread == NULL ) {
  445. FmpTimerThread = CreateThread( NULL,
  446. 0,
  447. FmpJoinPendingThread,
  448. NULL,
  449. 0,
  450. &threadId );
  451. }
  452. *Deferred = TRUE;
  453. }
  454. }
  455. }
  456. }
  457. FmpReleaseLocalGroupLock( Group );
  458. return(TRUE);
  459. } // FmpEnumJoinGroups
  460. BOOL
  461. FmpEnumSignalGroups(
  462. IN PVOID Context1,
  463. IN PVOID Context2,
  464. IN PFM_GROUP Group,
  465. IN LPCWSTR Name
  466. )
  467. /*++
  468. Routine Description:
  469. Group enumeration callback to indicate state change on all groups
  470. and resources.
  471. For the quorum resource, if we're forming a cluster, we'll also
  472. fixup information that was not available when the resource was created.
  473. Arguments:
  474. Context1 - Pointer to a BOOL that is TRUE if this is a FormCluster.
  475. FALSE otherwise.
  476. Context2 - Not used.
  477. Group - Supplies the group.
  478. Name - Supplies the group's name.
  479. Return Value:
  480. TRUE - to indicate that the enumeration should continue.
  481. FALSE - to indicate that the enumeration should not continue.
  482. --*/
  483. {
  484. PLIST_ENTRY listEntry;
  485. PFM_RESOURCE resource;
  486. BOOL formCluster = *(PBOOL)Context1;
  487. DWORD status;
  488. BOOL quorumGroup = FALSE;
  489. //
  490. // For each resource in the group, generate an event notification.
  491. //
  492. for (listEntry = Group->Contains.Flink;
  493. listEntry != &(Group->Contains);
  494. listEntry = listEntry->Flink ) {
  495. resource = CONTAINING_RECORD( listEntry,
  496. FM_RESOURCE,
  497. ContainsLinkage );
  498. //
  499. // If this is the quorum resource and we're performing a Form
  500. // Cluster, then fixup the quorum resource info.
  501. //
  502. if ( resource->QuorumResource ) {
  503. status = FmpFixupResourceInfo( resource );
  504. quorumGroup = TRUE;
  505. if ( status != ERROR_SUCCESS ) {
  506. ClRtlLogPrint( LOG_NOISE,
  507. "[FM] Warning, failed to fixup quorum resource %1!ws!, error %2!u!.\n",
  508. OmObjectId(resource),
  509. status );
  510. }
  511. }
  512. if ( resource->State == ClusterResourceOnline ) {
  513. ClusterEvent( CLUSTER_EVENT_RESOURCE_ONLINE, resource );
  514. } else {
  515. ClusterEvent( CLUSTER_EVENT_RESOURCE_OFFLINE, resource );
  516. }
  517. }
  518. if ( quorumGroup ) {
  519. status = FmpFixupGroupInfo( Group );
  520. if ( status != ERROR_SUCCESS ) {
  521. ClRtlLogPrint( LOG_NOISE,
  522. "[FM] Warning, failed to fixup quorum group %1!ws!, error %2!u!.\n",
  523. OmObjectId( Group ),
  524. status );
  525. }
  526. }
  527. if ( Group->State == ClusterGroupOnline ) {
  528. ClusterEvent( CLUSTER_EVENT_GROUP_ONLINE, Group );
  529. } else {
  530. ClusterEvent( CLUSTER_EVENT_GROUP_OFFLINE, Group );
  531. }
  532. return(TRUE);
  533. } // FmpEnumSignalGroups
  534. DWORD
  535. FmpJoinPendingThread(
  536. IN LPVOID Context
  537. )
  538. /*++
  539. Routine Description:
  540. Thread to keep trying to move groups, as long we are blocked by a
  541. FailbackWindow problem. This thread runs every 15 minutes to attempt to
  542. move Groups.
  543. Arguments:
  544. Context - Not used.
  545. Return Value:
  546. ERROR_SUCCESS.
  547. --*/
  548. {
  549. DWORD status;
  550. BOOL deferred;
  551. //
  552. // As long as we have deferred Group moves, keep going.
  553. do {
  554. status = WaitForSingleObject( FmpShutdownEvent, WINDOW_TIMEOUT );
  555. if ( FmpShutdown ) {
  556. goto finished;
  557. }
  558. deferred = FALSE;
  559. //
  560. // For each group, see if it should be moved to the local system.
  561. //
  562. OmEnumObjects( ObjectTypeGroup,
  563. FmpEnumJoinGroupsMove,
  564. &deferred,
  565. NULL );
  566. } while ( (status != WAIT_FAILED) && deferred );
  567. finished:
  568. CloseHandle( FmpTimerThread );
  569. FmpTimerThread = NULL;
  570. return(ERROR_SUCCESS);
  571. } // FmpJoinPendingThread
  572. DWORD
  573. WINAPI
  574. FmGetQuorumResource(
  575. OUT PFM_GROUP *ppQuoGroup,
  576. OUT LPDWORD lpdwSignature OPTIONAL
  577. )
  578. /*++
  579. Routine Description:
  580. Find the quorum resource, arbitrate it and return a name that can be
  581. used to open the device in order to perform reads. Optionally,
  582. return the signature of the quorum disk.
  583. There are 3 items that we need:
  584. 1. The name of the quorum resource.
  585. 2. The name of the Group that the quorum resource is a member of.
  586. 3. The resource type for the quorum resource.
  587. Arguments:
  588. ppQuoGroup - Supplies a pointer to a buffer into which the
  589. quorum group info is returned.
  590. lpdwSignature - An optional argument which is used to return
  591. the signature of the quorum disk from the cluster hive.
  592. Return Value:
  593. ERROR_SUCCESS if successful.
  594. A Win32 error code on failure.
  595. --*/
  596. {
  597. LPWSTR quorumId = NULL;
  598. LPWSTR groupId = NULL;
  599. LPCWSTR stringId;
  600. LPWSTR containsString = NULL;
  601. PFM_GROUP group = NULL;
  602. PFM_RESOURCE resource = NULL;
  603. HDMKEY hGroupKey;
  604. DWORD groupIdSize = 0;
  605. DWORD idMaxSize = 0;
  606. DWORD idSize = 0;
  607. DWORD status;
  608. DWORD keyIndex;
  609. DWORD stringIndex;
  610. PCLUS_FORCE_QUORUM_INFO pForceQuorumInfo = NULL;
  611. *ppQuoGroup = NULL;
  612. //
  613. // Get the quorum resource value.
  614. //
  615. status = DmQuerySz( DmQuorumKey,
  616. CLUSREG_NAME_QUORUM_RESOURCE,
  617. (LPWSTR*)&quorumId,
  618. &idMaxSize,
  619. &idSize );
  620. if ( status != ERROR_SUCCESS ) {
  621. ClRtlLogPrint(LOG_ERROR,
  622. "[FM] Failed to get quorum resource, error %1!u!.\n",
  623. status);
  624. goto FnExit;
  625. }
  626. //
  627. // Chittur Subbaraman (chitturs) - 10/30/98
  628. //
  629. // If the user is forcing a database restore operation, you
  630. // also need to verify whether the quorum disk signature in
  631. // the registry matches that in the disk itself. So, go get
  632. // the signature from the Cluster\Resources\quorumId\Parameters
  633. // key
  634. //
  635. if ( lpdwSignature != NULL ) {
  636. status = FmpGetQuorumDiskSignature( quorumId, lpdwSignature );
  637. if ( status != ERROR_SUCCESS ) {
  638. //
  639. // This is not a fatal error. So log an error and go on.
  640. //
  641. ClRtlLogPrint(LOG_ERROR,
  642. "[FM] Failed to get quorum disk signature, error %1!u!.\n",
  643. status);
  644. }
  645. }
  646. //
  647. // Initialize the default Resource Monitor
  648. //
  649. if ( FmpDefaultMonitor == NULL ) {
  650. FmpDefaultMonitor = FmpCreateMonitor(NULL, FALSE);
  651. }
  652. if (FmpDefaultMonitor == NULL) {
  653. status = GetLastError();
  654. CsInconsistencyHalt(status);
  655. goto FnExit;
  656. }
  657. //
  658. // Now find the group that the quorum resource is a member of.
  659. //
  660. idMaxSize = 0;
  661. idSize = 0;
  662. for ( keyIndex = 0; ; keyIndex++ )
  663. {
  664. status = FmpRegEnumerateKey( DmGroupsKey,
  665. keyIndex,
  666. &groupId,
  667. &groupIdSize );
  668. if ( status == ERROR_NO_MORE_ITEMS )
  669. {
  670. break;
  671. }
  672. if (status != ERROR_SUCCESS)
  673. {
  674. continue;
  675. }
  676. //open the group key
  677. hGroupKey = DmOpenKey( DmGroupsKey,
  678. groupId,
  679. KEY_READ );
  680. if (!hGroupKey)
  681. continue;
  682. //
  683. // Get the contains string.
  684. //
  685. status = DmQueryMultiSz( hGroupKey,
  686. CLUSREG_NAME_GRP_CONTAINS,
  687. &containsString,
  688. &idMaxSize,
  689. &idSize );
  690. DmCloseKey(hGroupKey);
  691. if ( status != ERROR_SUCCESS )
  692. continue;
  693. for ( stringIndex = 0; ; stringIndex++ )
  694. {
  695. stringId = ClRtlMultiSzEnum( containsString,
  696. idSize/sizeof(WCHAR),
  697. stringIndex );
  698. if ( stringId == NULL ) {
  699. break;
  700. }
  701. if ( lstrcmpiW( stringId, quorumId ) == 0 )
  702. {
  703. // We will now create the group, which will also
  704. // create the resource, and the resource type.
  705. //
  706. // TODO - this will also create all resources
  707. // within the group. What should we do about that?
  708. // We could require the quorum resource to be in
  709. // a group by itself! (rodga) 17-June-1996.
  710. //
  711. group = FmpCreateGroup( groupId,
  712. FALSE );
  713. if (CsNoQuorum)
  714. FmpSetGroupPersistentState(group, ClusterGroupOffline);
  715. break;
  716. }
  717. }
  718. //if we found the group, thre is no need to search for more
  719. if (group != NULL)
  720. break;
  721. }
  722. //
  723. // Check if we found the Quorum resource's group.
  724. //
  725. if ( group == NULL )
  726. {
  727. ClRtlLogPrint(LOG_NOISE,
  728. "[FM] Did not find group for quorum resource.\n");
  729. status = ERROR_GROUP_NOT_FOUND;
  730. goto FnExit;
  731. }
  732. //
  733. // Get the quorum resource structure.
  734. //
  735. resource = OmReferenceObjectById( ObjectTypeResource, quorumId );
  736. if ( resource == NULL )
  737. {
  738. ClRtlLogPrint(LOG_ERROR,
  739. "[FM] Failed to find quorum resource object.\n");
  740. status = ERROR_RESOURCE_NOT_FOUND;
  741. goto FnExit;
  742. }
  743. resource->QuorumResource = TRUE;
  744. if (!CsNoQuorum)
  745. {
  746. ClRtlLogPrint(LOG_ERROR,
  747. "[FM] Arbitrate for quorum resource id %1!ws!.\n",
  748. OmObjectId(resource));
  749. //
  750. // First finish initializing the quorum resource.
  751. //
  752. if ( resource->Monitor == NULL )
  753. {
  754. status = FmpInitializeResource( resource, TRUE );
  755. if ( status != ERROR_SUCCESS )
  756. {
  757. ClRtlLogPrint(LOG_UNUSUAL,
  758. "[FM] Error completing initialization of quorum resource '%1!ws!, error %2!u!.\n",
  759. OmObjectId(resource),
  760. status );
  761. goto FnExit;
  762. }
  763. }
  764. //
  765. // If we have a force quorum of nodes then drop a control code to the
  766. // resource with the list of nodes. This must be done before
  767. // arbitrate. First we build force quorum info - this makes sure that the node list is valid etc.
  768. // Note that the list can be NULL.
  769. //
  770. if ( CsForceQuorum ) {
  771. ClRtlLogPrint(LOG_NOISE,
  772. "[FM] force quorum specified, sending CLUSCTL_RESOURCE_FORCE_QUORUM == 0x%1!08lx!\n",
  773. CLUSCTL_RESOURCE_FORCE_QUORUM );
  774. status = FmpBuildForceQuorumInfo( CsForceQuorumNodes,
  775. &pForceQuorumInfo );
  776. if ( status != ERROR_SUCCESS ) {
  777. ClRtlLogPrint(LOG_UNUSUAL,
  778. "[FM] Error building force quorum info for resource '%1!ws!, error %2!u!.\n",
  779. OmObjectId(resource),
  780. status );
  781. goto FnExit;
  782. }
  783. status = FmpRmResourceControl( resource,
  784. CLUSCTL_RESOURCE_FORCE_QUORUM,
  785. (LPBYTE)pForceQuorumInfo,
  786. pForceQuorumInfo->dwSize,
  787. NULL,
  788. 0,
  789. NULL,
  790. NULL );
  791. //
  792. // Tolerate ERROR_INVALID_FUNCTION since this just means that the
  793. // resource doesn't handle it.
  794. //
  795. if ( status == ERROR_INVALID_FUNCTION )
  796. status = ERROR_SUCCESS;
  797. if ( status != ERROR_SUCCESS ) {
  798. ClRtlLogPrint(LOG_ERROR,
  799. "[FM] Resource control for Force Quorum for resource '%1!ws! encountered error %2!u!.\n",
  800. OmObjectId(resource),
  801. status );
  802. goto FnExit;
  803. }
  804. }
  805. //
  806. // Now arbitrate for the resource.
  807. //
  808. status = FmpRmArbitrateResource( resource );
  809. }
  810. FnExit:
  811. if ( status == ERROR_SUCCESS ) {
  812. ClRtlLogPrint(LOG_NOISE,
  813. "[FM] FmGetQuorumResource successful\r\n");
  814. *ppQuoGroup = group;
  815. }
  816. else
  817. {
  818. ClRtlLogPrint(LOG_ERROR,
  819. "[FM] FmGetQuorumResource failed, error %1!u!.\n",
  820. status);
  821. //the group will be cleaned by fmshutdown()
  822. }
  823. if (pForceQuorumInfo) FmpDeleteForceQuorumInfo( &pForceQuorumInfo );
  824. if (resource) OmDereferenceObject(resource);
  825. if (quorumId) LocalFree(quorumId);
  826. if (groupId) LocalFree(groupId);
  827. //
  828. // Chittur Subbaraman (chitturs) - 10/05/98
  829. // Fix memory leak
  830. //
  831. if (containsString) LocalFree(containsString);
  832. return(status);
  833. } // FmGetQuorumResource
  834. DWORD
  835. WINAPI
  836. FmFindQuorumResource(
  837. OUT PFM_RESOURCE *ppResource
  838. )
  839. /*++
  840. Routine Description:
  841. Finds the quorum resource and returns a pointer to the resource
  842. object.
  843. Arguments:
  844. *ppResource - A pointer to the Quorum resource object is returned in this.
  845. Return Value:
  846. ERROR_SUCCESS if successful.
  847. A Win32 error code on failure.
  848. --*/
  849. {
  850. DWORD dwError = ERROR_SUCCESS;
  851. //enumerate all the resources
  852. *ppResource = NULL;
  853. OmEnumObjects( ObjectTypeResource,
  854. FmpFindQuorumResource,
  855. ppResource,
  856. NULL );
  857. if ( *ppResource == NULL )
  858. {
  859. dwError = ERROR_RESOURCE_NOT_FOUND;
  860. CL_LOGCLUSERROR(FM_QUORUM_RESOURCE_NOT_FOUND);
  861. }
  862. return(dwError);
  863. }
  864. DWORD WINAPI FmFindQuorumOwnerNodeId(IN PFM_RESOURCE pResource)
  865. {
  866. DWORD dwNodeId;
  867. CL_ASSERT(pResource->Group->OwnerNode != NULL);
  868. dwNodeId = NmGetNodeId(pResource->Group->OwnerNode);
  869. return (dwNodeId);
  870. }
  871. BOOL
  872. FmpReturnResourceType(
  873. IN OUT PFM_RESTYPE *FoundResourceType,
  874. IN LPCWSTR ResourceTypeName,
  875. IN PFM_RESTYPE ResourceType,
  876. IN LPCWSTR Name
  877. )
  878. /*++
  879. Routine Description:
  880. Group enumeration callback for FM join. Queries the preferred owners
  881. groups and moves those that belong on this system and that can move.
  882. Arguments:
  883. ResourceType - Returns the found ResourceType, if found.
  884. Context2 - The input resource type name to find.
  885. Resource - Supplies the current ResourceType.
  886. Name - Supplies the ResourceType's name.
  887. Return Value:
  888. TRUE - to indicate that the enumeration should continue.
  889. FALSE - to indicate that the enumeration should not continue.
  890. --*/
  891. {
  892. if ( lstrcmpiW( Name, ResourceTypeName ) == 0 ) {
  893. OmReferenceObject( ResourceType );
  894. *FoundResourceType = ResourceType;
  895. return(FALSE);
  896. }
  897. return(TRUE);
  898. } // FmpReturnResourceType
  899. DWORD
  900. WINAPI
  901. FmFormNewClusterPhase1(
  902. IN PFM_GROUP pQuoGroup
  903. )
  904. /*++
  905. Routine Description:
  906. Destroys the quorum group that was created. The quorum resource is left
  907. behind and its group adjusted according to the new logs.
  908. Arguments:
  909. None.
  910. Returns:
  911. ERROR_SUCCESS if successful
  912. Win32 errorcode otherwise.
  913. --*/
  914. {
  915. DWORD status;
  916. ClRtlLogPrint(LOG_NOISE,
  917. "[FM] FmFormNewClusterPhase1, Entry. Quorum quorum will be deleted\r\n");
  918. //
  919. // Enable the GUM.
  920. //
  921. GumReceiveUpdates(FALSE,
  922. GumUpdateFailoverManager,
  923. FmpGumReceiveUpdates,
  924. NULL,
  925. sizeof(FmGumDispatchTable)/sizeof(GUM_DISPATCH_ENTRY),
  926. FmGumDispatchTable,
  927. FmpGumVoteHandler);
  928. //Acquire the exclusive lock for the quorum
  929. // This is done so that we can ignore any resource transition events from
  930. // the quorum resource between phase 1 and phase 2 of FM initialization on Form
  931. ACQUIRE_EXCLUSIVE_LOCK(gQuoChangeLock);
  932. FmpFMFormPhaseProcessing = TRUE;
  933. //release the quorum lock
  934. RELEASE_LOCK(gQuoChangeLock);
  935. //the group lock will be freed by FmpDestroyGroup
  936. FmpAcquireLocalGroupLock( pQuoGroup );
  937. //destroy the quorum group object, dont bring the quorum resource online/offline
  938. //All resources in the quorum group must get deleted, except the quorum resource
  939. //All resources in the quorum group must get recreated in FmFormNewClusterPhase2.
  940. //The quorum group is removed from the group list, hence it will be recreated in phase2.
  941. //Since the quorum resource must not get deleted we will increment its ref count
  942. //This is because in phase 2 it is not created and its ref count is not incremented at create
  943. //By the time it is put on the contains list, we expect the resource count to be 2.
  944. OmReferenceObject(gpQuoResource);
  945. status = FmpDestroyGroup(pQuoGroup, TRUE);
  946. //We prefer that the quorum group is deleted
  947. //since after rollback the old group may no longer exist and we
  948. //dont want it to be on the group list
  949. gpQuoResource->Group = NULL;
  950. OmDereferenceObject(pQuoGroup);
  951. return(status);
  952. } // FmFormNewClusterPhase1
  953. DWORD
  954. WINAPI
  955. FmFormNewClusterPhase2(
  956. VOID
  957. )
  958. /*++
  959. Routine Description:
  960. Bring the Failover Manager Online, this means claiming all groups and
  961. finishing the initialization of resources.
  962. Arguments:
  963. None.
  964. Returns:
  965. ERROR_SUCCESS if successful
  966. Win32 errorcode otherwise.
  967. --*/
  968. {
  969. DWORD status;
  970. BOOL formCluster = TRUE;
  971. PFM_GROUP group;
  972. PFM_RESOURCE pQuoResource=NULL;
  973. CLUSTERVERSIONINFO ClusterVersionInfo;
  974. PCLUSTERVERSIONINFO pClusterVersionInfo = NULL;
  975. PGROUP_ENUM MyGroups = NULL;
  976. BOOL QuorumGroup;
  977. ClRtlLogPrint(LOG_NOISE,
  978. "[FM] FmFormNewClusterPhase2, Entry.\r\n");
  979. //
  980. // Initialize resource types
  981. //
  982. status = FmpInitResourceTypes();
  983. if (status != ERROR_SUCCESS) {
  984. CsInconsistencyHalt(status);
  985. goto error_exit;
  986. }
  987. //
  988. // Initialize Groups,
  989. //
  990. status = FmpInitGroups( FALSE );
  991. if (status != ERROR_SUCCESS) {
  992. goto error_exit;
  993. }
  994. // refigure out the state for the quorum group
  995. status = FmFindQuorumResource(&pQuoResource);
  996. if (status != ERROR_SUCCESS)
  997. {
  998. goto error_exit;
  999. }
  1000. //
  1001. // Set the state of the quorum group depending upon the state of
  1002. // the quorum resource
  1003. //
  1004. //now we should enable resource events to come in for the quorum resource as well
  1005. ACQUIRE_EXCLUSIVE_LOCK(gQuoChangeLock);
  1006. FmpFMFormPhaseProcessing = FALSE;
  1007. group = pQuoResource->Group;
  1008. group->State = FmpGetGroupState(group, TRUE);
  1009. OmDereferenceObject(pQuoResource);
  1010. //if the noquorum flag is set, dont bring the quorum group online
  1011. if (CsNoQuorum)
  1012. FmpSetGroupPersistentState(pQuoResource->Group, ClusterGroupOffline);
  1013. RELEASE_LOCK(gQuoChangeLock);
  1014. //
  1015. // Initialize the default Resource Monitor
  1016. //
  1017. if ( FmpDefaultMonitor == NULL ) {
  1018. FmpDefaultMonitor = FmpCreateMonitor(NULL, FALSE);
  1019. }
  1020. if (FmpDefaultMonitor == NULL) {
  1021. status = GetLastError();
  1022. ClRtlLogPrint(LOG_ERROR,
  1023. "[FM] Failed to create default resource monitor on Form.\n");
  1024. goto error_exit;
  1025. }
  1026. if (NmLocalNodeVersionChanged)
  1027. {
  1028. //initialize the version information
  1029. CsGetClusterVersionInfo(&ClusterVersionInfo);
  1030. pClusterVersionInfo = &ClusterVersionInfo;
  1031. }
  1032. //enable votes and gum updates since the fixups for
  1033. //resource types require that
  1034. FmpFMGroupsInited = TRUE;
  1035. //
  1036. // The resource type possible node list is built
  1037. // using a voting protocol, hence we need to
  1038. // fix it up since the vote could have been conducted
  1039. // while this node was down.
  1040. // Also call the resource type control code if the
  1041. // local node version has changed
  1042. //
  1043. status = FmpFixupResourceTypesPhase1(FALSE, NmLocalNodeVersionChanged,
  1044. pClusterVersionInfo);
  1045. if (status != ERROR_SUCCESS) {
  1046. CsInconsistencyHalt(status);
  1047. goto error_exit;
  1048. }
  1049. //
  1050. // Find and sort all known groups
  1051. //
  1052. status = FmpEnumSortGroups(&MyGroups, NULL, &QuorumGroup);
  1053. if (status != ERROR_SUCCESS) {
  1054. goto error_exit;
  1055. }
  1056. //
  1057. // Find the state of the Groups.
  1058. //
  1059. FmpGetGroupListState( MyGroups );
  1060. //
  1061. // Set the Group owner.
  1062. //
  1063. FmpSetGroupEnumOwner( MyGroups, NmLocalNode, NULL, QuorumGroup, NULL );
  1064. //
  1065. // For each group, finish initialization of all groups and resources.
  1066. //
  1067. OmEnumObjects( ObjectTypeGroup,
  1068. FmpEnumGroupsInit,
  1069. NULL,
  1070. NULL );
  1071. // if the resource type is not supported, remove it from the possible
  1072. // owners list of all resources of that type
  1073. status = FmpFixupPossibleNodesForResources(FALSE);
  1074. if (status != ERROR_SUCCESS) {
  1075. CsInconsistencyHalt(status);
  1076. return(status);
  1077. }
  1078. if (NmLocalNodeVersionChanged)
  1079. {
  1080. //
  1081. // For each group, allow all resources to do any fixups
  1082. // they might need to do to the cluster registry to
  1083. // run in a mixed mode cluster.
  1084. //
  1085. // Get the version info
  1086. OmEnumObjects( ObjectTypeGroup,
  1087. FmpEnumFixupResources,
  1088. &ClusterVersionInfo,
  1089. NULL );
  1090. }
  1091. //
  1092. // Take ownership of all the groups in the system. This also completes
  1093. // the initialization of all resources.
  1094. //
  1095. status = FmpClaimAllGroups(MyGroups);
  1096. if (status != ERROR_SUCCESS) {
  1097. ClRtlLogPrint(LOG_CRITICAL,"[FM] FmpClaimAllGroups failed %1!d!\n",status);
  1098. goto error_exit;
  1099. }
  1100. //
  1101. // Cleanup
  1102. //
  1103. FmpDeleteEnum(MyGroups);
  1104. FmpFMOnline = TRUE;
  1105. //
  1106. // Signal a state change for every group and resource!
  1107. //
  1108. OmEnumObjects( ObjectTypeGroup,
  1109. FmpEnumSignalGroups,
  1110. &formCluster,
  1111. NULL );
  1112. //
  1113. // Chittur Subbaraman (chitturs) - 5/3/2000
  1114. //
  1115. // Make sure the phase 2 notifications are delivered only after all initialization is
  1116. // complete. This includes fixing up the possible owners of the quorum resource by
  1117. // FmpEnumSignalGroups. Once phase 2 notifications are delivered, resource type DLLs
  1118. // would be free to issue cluster API calls into FM and the lack of possible owners should
  1119. // not be the reason to reject these calls.
  1120. //
  1121. status = FmpFixupResourceTypesPhase2(FALSE, NmLocalNodeVersionChanged,
  1122. pClusterVersionInfo);
  1123. if (status != ERROR_SUCCESS) {
  1124. CsInconsistencyHalt( status );
  1125. goto error_exit;
  1126. }
  1127. ClRtlLogPrint(LOG_NOISE,"[FM] FmFormNewClusterPhase2 complete.\n");
  1128. return(ERROR_SUCCESS);
  1129. error_exit:
  1130. if (MyGroups) FmpDeleteEnum(MyGroups);
  1131. FmpShutdown = TRUE;
  1132. FmpFMOnline = FALSE;
  1133. FmpCleanupGroups(FALSE);
  1134. if (FmpDefaultMonitor != NULL) {
  1135. FmpShutdownMonitor( FmpDefaultMonitor );
  1136. FmpDefaultMonitor = NULL;
  1137. }
  1138. FmpShutdown = FALSE;
  1139. return(status);
  1140. } // FmFormNewClusterPhase2
  1141. DWORD
  1142. WINAPI
  1143. FmJoinPhase1(
  1144. VOID
  1145. )
  1146. /*++
  1147. Routine Description:
  1148. Performs the FM initialization and join procedure. This creates skeletal
  1149. groups and resources, which are not fully initialized. After the API is
  1150. fully enabled (in Phase 2) we will finish initialization of the groups
  1151. and resources (which causes the resource monitors to run and opens
  1152. the resource DLL's.
  1153. Arguments:
  1154. None.
  1155. Return Value:
  1156. ERROR_SUCCESS if successful
  1157. Win32 errorcode otherwise.
  1158. --*/
  1159. {
  1160. DWORD status;
  1161. DWORD sequence;
  1162. //
  1163. // Enable Gum updates.
  1164. //
  1165. GumReceiveUpdates(TRUE,
  1166. GumUpdateFailoverManager,
  1167. FmpGumReceiveUpdates,
  1168. NULL,
  1169. sizeof(FmGumDispatchTable)/sizeof(GUM_DISPATCH_ENTRY),
  1170. FmGumDispatchTable,
  1171. FmpGumVoteHandler);
  1172. retry:
  1173. status = GumBeginJoinUpdate(GumUpdateFailoverManager, &sequence);
  1174. if (status != ERROR_SUCCESS) {
  1175. ClRtlLogPrint(LOG_CRITICAL,
  1176. "[FM] GumBeginJoinUpdate failed %1!d!\n",
  1177. status);
  1178. return(status);
  1179. }
  1180. //
  1181. // Build up all the FM data structures for resource types.
  1182. //
  1183. //
  1184. // Initialize resource types
  1185. //
  1186. status = FmpInitResourceTypes();
  1187. if (status != ERROR_SUCCESS) {
  1188. CsInconsistencyHalt(status);
  1189. return(status);
  1190. }
  1191. //
  1192. // Initialize Groups, but don't fully initialize them yet.
  1193. //
  1194. status = FmpInitGroups( FALSE );
  1195. if (status != ERROR_SUCCESS) {
  1196. return(status);
  1197. }
  1198. //
  1199. // Initialize the default Resource Monitor. This step must be done before end join update
  1200. // since this node can receive certain updates such as s_GumCollectVoteFromNode immediately
  1201. // after GumEndJoinUpdate which may need the services of the default monitor.
  1202. //
  1203. if ( FmpDefaultMonitor == NULL ) {
  1204. FmpDefaultMonitor = FmpCreateMonitor(NULL, FALSE);
  1205. }
  1206. if ( FmpDefaultMonitor == NULL ) {
  1207. status = GetLastError();
  1208. CsInconsistencyHalt(status);
  1209. return(status);
  1210. }
  1211. //
  1212. // Get the group and resource state from each node which is online.
  1213. //
  1214. status = ERROR_SUCCESS;
  1215. OmEnumObjects( ObjectTypeNode,
  1216. FmpEnumNodes,
  1217. &status,
  1218. NULL );
  1219. if (status == ERROR_SUCCESS) {
  1220. FmpFMGroupsInited = TRUE;
  1221. // Gum Update handlers for resource and group state changes
  1222. // can process the updates now.
  1223. status = GumEndJoinUpdate(sequence,
  1224. GumUpdateFailoverManager,
  1225. FmUpdateJoin,
  1226. 0,
  1227. NULL);
  1228. if (status == ERROR_CLUSTER_DATABASE_SEQMISMATCH) {
  1229. ClRtlLogPrint(LOG_UNUSUAL,
  1230. "[FM] GumEndJoinUpdate with sequence %1!d! failed with a sequence mismatch\n",
  1231. sequence);
  1232. } else if (status != ERROR_SUCCESS) {
  1233. ClRtlLogPrint(LOG_CRITICAL,
  1234. "[FM] GumEndJoinUpdate with sequence %1!d! failed with status %2!d!\n",
  1235. sequence,
  1236. status);
  1237. }
  1238. } else {
  1239. ClRtlLogPrint(LOG_UNUSUAL,
  1240. "[FM] FmJoin: FmpEnumNodes failed %1!d!\n",
  1241. status);
  1242. return(status); // we will loop forever without this
  1243. }
  1244. if (status != ERROR_SUCCESS) {
  1245. //
  1246. // clean up resources
  1247. //
  1248. FmpShutdown = TRUE;
  1249. FmpCleanupGroups(FALSE);
  1250. FmpShutdown = FALSE;
  1251. //
  1252. // Better luck next time!
  1253. //
  1254. goto retry;
  1255. }
  1256. ClRtlLogPrint(LOG_NOISE,"[FM] FmJoinPhase1 complete.\n");
  1257. return(ERROR_SUCCESS);
  1258. } // FmJoinPhase1
  1259. DWORD
  1260. WINAPI
  1261. FmJoinPhase2(
  1262. VOID
  1263. )
  1264. /*++
  1265. Routine Description:
  1266. Performs the second phase of FM initialization and join procedure.
  1267. Finish creation of resources by allowing the resource monitors to be
  1268. created. Claim any groups which should failback to this node.
  1269. Arguments:
  1270. None.
  1271. Return Value:
  1272. ERROR_SUCCESS if successful
  1273. Win32 errorcode otherwise.
  1274. --*/
  1275. {
  1276. DWORD status;
  1277. CLUSTERVERSIONINFO ClusterVersionInfo;
  1278. PCLUSTERVERSIONINFO pClusterVersionInfo = NULL;
  1279. DWORD dwRetryCount=60;//try for atleast a minute
  1280. GetJoinApproval:
  1281. status = FmpGetJoinApproval();
  1282. if (status == ERROR_RETRY)
  1283. {
  1284. // if the other nodes have pending work to do
  1285. //after this node last died and are not willing
  1286. // to accept it back till that is over, we will stall
  1287. // the join
  1288. //sleep for a second
  1289. dwRetryCount--;
  1290. if (dwRetryCount)
  1291. {
  1292. Sleep(1000);
  1293. goto GetJoinApproval;
  1294. }
  1295. else
  1296. {
  1297. ClRtlLogPrint(LOG_CRITICAL,
  1298. "[FM] FmJoinPhase2 : timed out trying to get join approval.\n");
  1299. CsInconsistencyHalt(status);
  1300. }
  1301. }
  1302. if (NmLocalNodeVersionChanged)
  1303. {
  1304. //initialize the cluster versioninfo structure
  1305. CsGetClusterVersionInfo(&ClusterVersionInfo);
  1306. pClusterVersionInfo = &ClusterVersionInfo;
  1307. }
  1308. //
  1309. // The resource type possible node list is built
  1310. // using a voting protocol, hence we need to
  1311. // fix it up since the vote could have been conducted
  1312. // while this node was down.
  1313. //
  1314. status = FmpFixupResourceTypesPhase1(TRUE, NmLocalNodeVersionChanged,
  1315. pClusterVersionInfo);
  1316. if (status != ERROR_SUCCESS) {
  1317. CsInconsistencyHalt(status);
  1318. return(status);
  1319. }
  1320. //
  1321. // For each group, finish initialization of all groups and resources.
  1322. //
  1323. OmEnumObjects( ObjectTypeGroup,
  1324. FmpEnumGroupsInit,
  1325. NULL,
  1326. NULL );
  1327. // if the resource type is not supported, remove it from the possible
  1328. // owners list of all resources of that type
  1329. status = FmpFixupPossibleNodesForResources(TRUE);
  1330. if (status != ERROR_SUCCESS) {
  1331. CsInconsistencyHalt(status);
  1332. return(status);
  1333. }
  1334. if (NmLocalNodeVersionChanged)
  1335. {
  1336. //
  1337. // For each group, allow all resources to do any fixups
  1338. // they might need to do to the cluster registry to
  1339. // run in a mixed mode cluster.
  1340. //
  1341. OmEnumObjects( ObjectTypeGroup,
  1342. FmpEnumFixupResources,
  1343. &ClusterVersionInfo,
  1344. NULL );
  1345. }
  1346. //
  1347. // The FM is now in sync with everybody else.
  1348. //
  1349. FmpFMOnline = TRUE;
  1350. if ( FmpMajorEvent ) {
  1351. return(ERROR_NOT_READY);
  1352. }
  1353. status = FmpFixupResourceTypesPhase2(TRUE, NmLocalNodeVersionChanged,
  1354. pClusterVersionInfo);
  1355. if (status != ERROR_SUCCESS) {
  1356. CsInconsistencyHalt(status);
  1357. return(status);
  1358. }
  1359. ClRtlLogPrint(LOG_NOISE,"[FM] FmJoinPhase2 complete, now online!\n");
  1360. return(ERROR_SUCCESS);
  1361. } // FmJoinPhase2
  1362. VOID
  1363. FmJoinPhase3(
  1364. VOID
  1365. )
  1366. /*++
  1367. Routine Description:
  1368. Handles any group moves and resource/group state change signaling as
  1369. a part of join. This MUST be done only AFTER the extended node state
  1370. is UP.
  1371. Arguments:
  1372. None.
  1373. Return Value:
  1374. None.
  1375. --*/
  1376. {
  1377. BOOL formCluster = FALSE;
  1378. DWORD deferred = FALSE;
  1379. ClRtlLogPrint(LOG_NOISE,"[FM] FmJoinPhase3 entry...\n");
  1380. //
  1381. // Chittur Subbaraman (chitturs) - 10/28/99
  1382. //
  1383. //
  1384. // For each group, see if it should be moved to the local system.
  1385. //
  1386. OmEnumObjects( ObjectTypeGroup,
  1387. FmpEnumJoinGroupsMove,
  1388. &deferred,
  1389. NULL );
  1390. //
  1391. // Signal a state change for every group and resource!
  1392. //
  1393. OmEnumObjects( ObjectTypeGroup,
  1394. FmpEnumSignalGroups,
  1395. &formCluster,
  1396. NULL );
  1397. ClRtlLogPrint(LOG_NOISE,"[FM] FmJoinPhase3 exit...\n");
  1398. } // FmJoinPhase3
  1399. BOOL
  1400. FmpFindQuorumResource(
  1401. IN OUT PFM_RESOURCE *QuorumResource,
  1402. IN PVOID Context2,
  1403. IN PFM_RESOURCE Resource,
  1404. IN LPCWSTR Name
  1405. )
  1406. /*++
  1407. Routine Description:
  1408. Group enumeration callback for FM findquorumresource.
  1409. Arguments:
  1410. QuorumResource - Returns the found quorum resource, if found.
  1411. Context2 - Not used.
  1412. Resource - Supplies the current resource.
  1413. Name - Supplies the Resource's name.
  1414. Return Value:
  1415. TRUE - to indicate that the enumeration should continue.
  1416. FALSE - to indicate that the enumeration should not continue.
  1417. --*/
  1418. {
  1419. if ( Resource->QuorumResource ) {
  1420. OmReferenceObject( Resource );
  1421. *QuorumResource = Resource;
  1422. return(FALSE);
  1423. }
  1424. return(TRUE);
  1425. } // FmpFindQuorumResource
  1426. BOOL
  1427. FmArbitrateQuorumResource(
  1428. VOID
  1429. )
  1430. /*++
  1431. Routine Description:
  1432. Arguments:
  1433. Return Value:
  1434. TRUE - if the quorum resource was successfully arbitrated and acquired.
  1435. FALSE - it the quorum resource was not successfully arbitrated.
  1436. --*/
  1437. {
  1438. PFM_RESOURCE resource = NULL;
  1439. DWORD status;
  1440. WCHAR localComputerName[MAX_COMPUTERNAME_LENGTH + 1];
  1441. DWORD localComputerNameSize = MAX_COMPUTERNAME_LENGTH + 1;
  1442. //
  1443. // Next try to find the Quorum resource.
  1444. //
  1445. FmFindQuorumResource(&resource);
  1446. if ( resource == NULL ) {
  1447. SetLastError(ERROR_RESOURCE_NOT_FOUND);
  1448. return(FALSE);
  1449. }
  1450. //
  1451. // Now arbitrate for the resource.
  1452. //
  1453. status = FmpRmArbitrateResource( resource );
  1454. if ( status == ERROR_SUCCESS ) {
  1455. ClRtlLogPrint(LOG_NOISE,
  1456. "[FM] Successfully arbitrated quorum resource %1!ws!.\n",
  1457. OmObjectId(resource));
  1458. ClRtlLogPrint(LOG_NOISE,
  1459. "[FM] FMArbitrateQuoRes: Current State %1!u! State=%2!u! Owner %3!u!\r\n",
  1460. resource->PersistentState,
  1461. resource->State,
  1462. NmGetNodeId((resource->Group)->OwnerNode));
  1463. ClRtlLogPrint(LOG_NOISE,
  1464. "[FM] FMArbitrateQuoRes: Group state :Current State %1!u! State=%2!u! Owner %3!u!\r\n",
  1465. resource->Group->PersistentState,
  1466. resource->Group->State,
  1467. NmGetNodeId((resource->Group)->OwnerNode));
  1468. //
  1469. // The quorum resource will be brought online by REGROUP.
  1470. //
  1471. // RNG: what happens if we can't online the quorum resource?
  1472. // A: The node will halt.
  1473. //SS: dereference the object referenced by fmfindquorumresource
  1474. OmDereferenceObject(resource);
  1475. return(TRUE);
  1476. } else {
  1477. ClRtlLogPrint(LOG_ERROR,
  1478. "[FM] Failed to arbitrate quorum resource %1!ws!, error %2!u!.\n",
  1479. OmObjectId(resource),
  1480. status);
  1481. //SS: dereference the object referenced by fmfindquorumresource
  1482. OmDereferenceObject(resource);
  1483. return(FALSE);
  1484. }
  1485. } // FmArbitrateQuorumResource
  1486. BOOL
  1487. FmpEnumHoldIO(
  1488. IN PVOID Context1,
  1489. IN PVOID Context2,
  1490. IN PFM_RESTYPE ResType,
  1491. IN LPCWSTR Name
  1492. )
  1493. /*++
  1494. Routine Description:
  1495. Send a HOLD_IO control code to all resource types of class STORAGE.
  1496. Arguments:
  1497. Context1 - Not used.
  1498. Context2 - Not used.
  1499. ResType - Supplies the Resource Type.
  1500. Name - Supplies the Resource Type's name.
  1501. Return Value:
  1502. TRUE - to indicate that the enumeration should continue.
  1503. FALSE - to indicate that the enumeration should not continue.
  1504. --*/
  1505. {
  1506. DWORD dwStatus;
  1507. DWORD bytesReturned;
  1508. DWORD bytesRequired;
  1509. if ( ResType->Class == CLUS_RESCLASS_STORAGE ) {
  1510. ClRtlLogPrint(LOG_NOISE,
  1511. "[FM] Hold IO for storage resource type: %1!ws!\n",
  1512. Name );
  1513. // Hold IO for this resource type
  1514. dwStatus = FmpRmResourceTypeControl(
  1515. Name,
  1516. CLUSCTL_RESOURCE_TYPE_HOLD_IO,
  1517. NULL,
  1518. 0,
  1519. NULL,
  1520. 0,
  1521. &bytesReturned,
  1522. &bytesRequired );
  1523. ClRtlLogPrint(LOG_NOISE,
  1524. "[FM] Resource DLL Hold IO returned status %1!u!\n",
  1525. dwStatus );
  1526. }
  1527. return(TRUE);
  1528. } // FmpEnumHoldIO
  1529. VOID
  1530. FmHoldIO(
  1531. VOID
  1532. )
  1533. /*++
  1534. Routine Description:
  1535. This routine holds all I/O for all storage class resource types.
  1536. It does this by calling the resource dll with a
  1537. CLUSCTL_RESOURCE_TYPE_HOLD_IO resource type control code.
  1538. Inputs:
  1539. None
  1540. Outputs:
  1541. None
  1542. --*/
  1543. {
  1544. OmEnumObjects( ObjectTypeResType,
  1545. FmpEnumHoldIO,
  1546. NULL,
  1547. NULL );
  1548. return;
  1549. } // FmHoldIO
  1550. BOOL
  1551. FmpEnumResumeIO(
  1552. IN PVOID Context1,
  1553. IN PVOID Context2,
  1554. IN PFM_RESTYPE ResType,
  1555. IN LPCWSTR Name
  1556. )
  1557. /*++
  1558. Routine Description:
  1559. Send a RESUME_IO control code to all resource types of class STORAGE.
  1560. Arguments:
  1561. Context1 - Not used.
  1562. Context2 - Not used.
  1563. ResType - Supplies the Resource Type.
  1564. Name - Supplies the Resource Type's name.
  1565. Return Value:
  1566. TRUE - to indicate that the enumeration should continue.
  1567. FALSE - to indicate that the enumeration should not continue.
  1568. --*/
  1569. {
  1570. DWORD dwStatus;
  1571. DWORD bytesReturned;
  1572. DWORD bytesRequired;
  1573. if ( ResType->Class == CLUS_RESCLASS_STORAGE ) {
  1574. ClRtlLogPrint(LOG_NOISE,
  1575. "[FM] Resume IO for storage Resource Type %1!ws!\n",
  1576. Name );
  1577. // Resume IO for this resource type
  1578. dwStatus = FmpRmResourceTypeControl(
  1579. Name,
  1580. CLUSCTL_RESOURCE_TYPE_RESUME_IO,
  1581. NULL,
  1582. 0,
  1583. NULL,
  1584. 0,
  1585. &bytesReturned,
  1586. &bytesRequired );
  1587. ClRtlLogPrint(LOG_NOISE,
  1588. "[FM] Resource DLL Resume IO returned status %1!u!\n",
  1589. dwStatus );
  1590. }
  1591. return(TRUE);
  1592. } // FmpEnumResumeIO
  1593. VOID
  1594. FmResumeIO(
  1595. VOID
  1596. )
  1597. /*++
  1598. Routine Description:
  1599. This routine resumes all I/O for all storage class resource types.
  1600. It does this by calling the resource dll with a
  1601. CLUSCTL_RESOURCE_TYPE_RESUME_IO resource type control code.
  1602. Inputs:
  1603. None
  1604. Outputs:
  1605. None
  1606. --*/
  1607. {
  1608. OmEnumObjects( ObjectTypeResType,
  1609. FmpEnumResumeIO,
  1610. NULL,
  1611. NULL );
  1612. return;
  1613. } // FmResumeIO
  1614. BOOL
  1615. FmpEnumNodes(
  1616. OUT DWORD *pStatus,
  1617. IN PVOID Context2,
  1618. IN PNM_NODE Node,
  1619. IN LPCWSTR Name
  1620. )
  1621. /*++
  1622. Routine Description:
  1623. Node enumeration callback for FM join. Queries the state
  1624. of owned groups and resources for each online node.
  1625. Arguments:
  1626. pStatus - Returns any error that may occur.
  1627. Context2 - Not used
  1628. Node - Supplies the node.
  1629. Name - Supplies the node's name.
  1630. Return Value:
  1631. TRUE - to indicate that the enumeration should continue.
  1632. FALSE - to indicate that the enumeration should not continue.
  1633. --*/
  1634. {
  1635. DWORD Status;
  1636. DWORD NodeId;
  1637. PGROUP_ENUM NodeGroups = NULL;
  1638. PRESOURCE_ENUM NodeResources = NULL;
  1639. DWORD i;
  1640. PFM_GROUP Group;
  1641. PFM_RESOURCE Resource;
  1642. if (Node == NmLocalNode) {
  1643. CL_ASSERT(NmGetNodeState(Node) != ClusterNodeUp);
  1644. return(TRUE);
  1645. }
  1646. //
  1647. // Enumerate all other node's group states. This includes all nodes
  1648. // that are up, as well as nodes that are paused.
  1649. //
  1650. if ((NmGetNodeState(Node) == ClusterNodeUp) ||
  1651. (NmGetNodeState(Node) == ClusterNodePaused)){
  1652. NodeId = NmGetNodeId(Node);
  1653. CL_ASSERT(Session[NodeId] != NULL);
  1654. Status = FmsQueryOwnedGroups(Session[NodeId],
  1655. &NodeGroups,
  1656. &NodeResources);
  1657. if (Status != ERROR_SUCCESS) {
  1658. ClRtlLogPrint(LOG_UNUSUAL,
  1659. "[FM] FmsQueryOwnedGroups to node %1!ws! failed %2!d!\n",
  1660. OmObjectId(Node),
  1661. Status);
  1662. *pStatus = Status;
  1663. return(FALSE);
  1664. }
  1665. //
  1666. // Enumerate the groups and set their owner and state.
  1667. //
  1668. for (i=0; i < NodeGroups->EntryCount; i++) {
  1669. Group = OmReferenceObjectById(ObjectTypeGroup,
  1670. NodeGroups->Entry[i].Id);
  1671. if (Group == NULL) {
  1672. ClRtlLogPrint(LOG_UNUSUAL,
  1673. "[FM] FmpEnumNodes: group %1!ws! not found\n",
  1674. NodeGroups->Entry[i].Id);
  1675. } else {
  1676. if ( FmpInPreferredList( Group, Node, FALSE, NULL ) ) {
  1677. ClRtlLogPrint(LOG_NOISE,
  1678. "[FM] Setting group %1!ws! owner to node %2!ws!, state %3!d!\n",
  1679. OmObjectId(Group),
  1680. OmObjectId(Node),
  1681. NodeGroups->Entry[i].State);
  1682. } else {
  1683. ClRtlLogPrint(LOG_NOISE,
  1684. "[FM] Init, Node %1!ws! is not in group %2!ws!.\n",
  1685. OmObjectId(Node),
  1686. OmObjectId(Group));
  1687. }
  1688. OmReferenceObject( Node );
  1689. Group->OwnerNode = Node;
  1690. Group->State = NodeGroups->Entry[i].State;
  1691. Group->StateSequence = NodeGroups->Entry[i].StateSequence;
  1692. OmDereferenceObject(Group);
  1693. }
  1694. MIDL_user_free(NodeGroups->Entry[i].Id);
  1695. }
  1696. MIDL_user_free(NodeGroups);
  1697. //
  1698. // Enumerate the resources and set their current state.
  1699. //
  1700. for (i=0; i < NodeResources->EntryCount; i++) {
  1701. Resource = OmReferenceObjectById(ObjectTypeResource,
  1702. NodeResources->Entry[i].Id);
  1703. if (Resource == NULL) {
  1704. ClRtlLogPrint(LOG_UNUSUAL,
  1705. "[FM] FmpEnumNodes: resource %1!ws! not found\n",
  1706. NodeResources->Entry[i].Id);
  1707. } else {
  1708. ClRtlLogPrint(LOG_NOISE,
  1709. "[FM] Setting resource %1!ws! state to %2!d!\n",
  1710. OmObjectId(Resource),
  1711. NodeResources->Entry[i].State);
  1712. Resource->State = NodeResources->Entry[i].State;
  1713. Resource->StateSequence = NodeResources->Entry[i].StateSequence;
  1714. OmDereferenceObject(Resource);
  1715. }
  1716. MIDL_user_free(NodeResources->Entry[i].Id);
  1717. }
  1718. MIDL_user_free(NodeResources);
  1719. }
  1720. return(TRUE);
  1721. } // FmpEnumNodes
  1722. VOID
  1723. WINAPI
  1724. FmShutdown(
  1725. VOID
  1726. )
  1727. /*++
  1728. Routine Description:
  1729. Shuts down the Failover Manager
  1730. Arguments:
  1731. None
  1732. Return Value:
  1733. None.
  1734. --*/
  1735. {
  1736. DWORD i;
  1737. if ( !FmpInitialized ) {
  1738. return;
  1739. }
  1740. FmpInitialized = FALSE;
  1741. ClRtlLogPrint(LOG_UNUSUAL,
  1742. "[FM] Shutdown: Failover Manager requested to shutdown.\n");
  1743. //
  1744. // For now, we really can't delete these critical sections. There is a
  1745. // race condition where the FM is shutting down and someone is walking
  1746. // the lists. Keep this critical sections around... just in case.
  1747. //
  1748. //DeleteCriticalSection( &FmpResourceLock );
  1749. //DeleteCriticalSection( &FmpGroupLock );
  1750. //DeleteCriticalSection( &FmpMonitorLock );
  1751. if ( FmpDefaultMonitor != NULL ) {
  1752. FmpShutdownMonitor(FmpDefaultMonitor);
  1753. FmpDefaultMonitor = NULL;
  1754. }
  1755. CloseHandle( FmpShutdownEvent );
  1756. #if 0 // RNG - don't run the risk of other threads using these handles
  1757. for ( i = ClusterMinNodeId; i <= NmMaxNodeId; i++ ) {
  1758. if ( FmpRpcBindings[i] != NULL ) {
  1759. ClMsgDeleteRpcBinding( FmpRpcBindings[i] );
  1760. FmpRpcBindings[i] = NULL;
  1761. }
  1762. if ( FmpRpcQuorumBindings[i] != NULL ) {
  1763. ClMsgDeleteRpcBinding( FmpRpcQuorumBindings[i] );
  1764. FmpRpcQuorumBindings[i] = NULL;
  1765. }
  1766. }
  1767. #endif
  1768. ClRtlDeleteQueue( &FmpWorkQueue );
  1769. return;
  1770. } // FmShutdown
  1771. VOID
  1772. WINAPI
  1773. FmShutdownGroups(
  1774. VOID
  1775. )
  1776. /*++
  1777. Routine Description:
  1778. Moves or takes offline all groups owned by this node.
  1779. Arguments:
  1780. None
  1781. Return Value:
  1782. None.
  1783. --*/
  1784. {
  1785. ClRtlLogPrint(LOG_UNUSUAL,
  1786. "[FM] Shutdown: Failover Manager requested to shutdown groups.\n");
  1787. //if we didnt initialize, we dont have to do anything
  1788. if (!FmpInitialized)
  1789. return;
  1790. //
  1791. // Use the Group Lock to synchronize the shutdown
  1792. //
  1793. FmpAcquireGroupLock();
  1794. //if shutdown is already in progress, return
  1795. if ( FmpShutdown) {
  1796. FmpReleaseGroupLock();
  1797. return;
  1798. }
  1799. FmpShutdown = TRUE;
  1800. FmpFMOnline = FALSE;
  1801. FmpReleaseGroupLock();
  1802. //
  1803. // Now cleanup all Groups/Resources.
  1804. //
  1805. FmpCleanupGroups(TRUE);
  1806. return;
  1807. } // FmShutdownGroups
  1808. /****
  1809. @func DWORD | FmBringQuorumOnline| This routine finds the quorum resource and
  1810. brings it online.
  1811. @comm This is called by the FmFormClusterPhase 1.
  1812. @xref
  1813. ****/
  1814. DWORD FmBringQuorumOnline()
  1815. {
  1816. PFM_RESOURCE pQuoResource;
  1817. DWORD dwError=ERROR_SUCCESS;
  1818. //
  1819. // Synchronize with shutdown.
  1820. //
  1821. FmpAcquireGroupLock();
  1822. if ( FmpShutdown ) {
  1823. FmpReleaseGroupLock();
  1824. return(ERROR_SUCCESS);
  1825. }
  1826. if ((dwError = FmFindQuorumResource(&pQuoResource)) != ERROR_SUCCESS)
  1827. {
  1828. ClRtlLogPrint(LOG_UNUSUAL,
  1829. "[Fm] FmpBringQuorumOnline : failed to find resource 0x%1!08lx!\r\n",
  1830. dwError);
  1831. goto FnExit;
  1832. }
  1833. //mark yourself as owner
  1834. if ( pQuoResource->Group->OwnerNode != NULL )
  1835. {
  1836. OmDereferenceObject( pQuoResource->Group->OwnerNode );
  1837. }
  1838. OmReferenceObject( NmLocalNode );
  1839. pQuoResource->Group->OwnerNode = NmLocalNode;
  1840. //prepare the group for onlining it
  1841. FmpPrepareGroupForOnline(pQuoResource->Group);
  1842. dwError = FmpOnlineResource(pQuoResource, TRUE);
  1843. //SS:decrement the ref count on the quorum resource object
  1844. //provided by fmfindquorumresource
  1845. OmDereferenceObject(pQuoResource);
  1846. FnExit:
  1847. FmpReleaseGroupLock();
  1848. return(dwError);
  1849. }
  1850. /****
  1851. @func DWORD | FmpGetQuorumDiskSignature | Get the signature of
  1852. the quorum disk from the cluster hive.
  1853. @parm IN LPWSTR | lpQuorumId | Identifier of the quorum resource.
  1854. @parm OUT LPDWORD | lpdwSignature | Quorum disk signature.
  1855. @rdesc Returns a Win32 error code on failure. ERROR_SUCCESS on success.
  1856. @comm This function attempts to open the Resources\lpQuorumId\Parameters
  1857. key under the cluster hive and read the quorum disk signature.
  1858. @xref <f FmGetQuorumResource>
  1859. ****/
  1860. DWORD
  1861. FmpGetQuorumDiskSignature(
  1862. IN LPCWSTR lpQuorumId,
  1863. OUT LPDWORD lpdwSignature
  1864. )
  1865. {
  1866. HDMKEY hQuorumResKey = NULL;
  1867. HDMKEY hQuorumResParametersKey = NULL;
  1868. DWORD dwStatus = ERROR_SUCCESS;
  1869. //
  1870. // Chittur Subbaraman (chitturs) - 10/30/98
  1871. //
  1872. hQuorumResKey = DmOpenKey( DmResourcesKey,
  1873. lpQuorumId,
  1874. KEY_READ );
  1875. if ( hQuorumResKey != NULL )
  1876. {
  1877. //
  1878. // Open up the Parameters key
  1879. //
  1880. hQuorumResParametersKey = DmOpenKey( hQuorumResKey,
  1881. CLUSREG_KEYNAME_PARAMETERS,
  1882. KEY_READ );
  1883. DmCloseKey( hQuorumResKey );
  1884. if ( hQuorumResParametersKey != NULL )
  1885. {
  1886. //
  1887. // Read the disk signature value
  1888. //
  1889. dwStatus = DmQueryDword( hQuorumResParametersKey,
  1890. CLUSREG_NAME_PHYSDISK_SIGNATURE,
  1891. lpdwSignature,
  1892. NULL );
  1893. DmCloseKey( hQuorumResParametersKey );
  1894. } else
  1895. {
  1896. dwStatus = GetLastError();
  1897. }
  1898. } else
  1899. {
  1900. dwStatus = GetLastError();
  1901. }
  1902. //
  1903. // If you failed, then reset the signature to 0 so that the
  1904. // caller won't take any actions based on an invalid signature.
  1905. //
  1906. if ( dwStatus != ERROR_SUCCESS )
  1907. {
  1908. *lpdwSignature = 0;
  1909. }
  1910. return( dwStatus );
  1911. }
  1912. DWORD FmpGetJoinApproval()
  1913. {
  1914. DWORD dwStatus;
  1915. LPCWSTR pszNodeId;
  1916. DWORD dwNodeLen;
  1917. pszNodeId = OmObjectId(NmLocalNode);
  1918. dwNodeLen = (lstrlenW(pszNodeId)+1)*sizeof(WCHAR);
  1919. dwStatus = GumSendUpdateEx(
  1920. GumUpdateFailoverManager,
  1921. FmUpdateApproveJoin,
  1922. 1,
  1923. dwNodeLen,
  1924. pszNodeId);
  1925. return(dwStatus);
  1926. }
  1927. /****
  1928. @func DWORD | FmpBuildForceQuorumInfo | Build the force quorum info that
  1929. will be passed to the resource DLL via a control code. This
  1930. involves enumerating nodes and checking that the nodes that make up
  1931. the list passed on the command line are all valid cluster nodes.
  1932. @parm IN LPCWSTR | pszNodesIn | Comma separated list of node names. If
  1933. this is NULL then the routine just fills the quorum info structure
  1934. with 0 and a NULL node list.
  1935. @parm OUT PCLUS_FORCE_QUORUM_INFO | pForceQuorumInfo | Structure that gets
  1936. filled in with info
  1937. @rdesc Returns a Win32 error code on failure. ERROR_SUCCESS on success.
  1938. @comm Assumes NmInitialize was called prior to calling this routine.
  1939. @xref <f FmpBuildForceQuorumInfo>
  1940. ****/
  1941. static
  1942. DWORD
  1943. FmpBuildForceQuorumInfo(
  1944. IN LPCWSTR pszNodesIn,
  1945. OUT PCLUS_FORCE_QUORUM_INFO* ppForceQuorumInfo
  1946. )
  1947. {
  1948. WCHAR *pszOut = NULL;
  1949. WCHAR *pszComma = NULL;
  1950. DWORD status = ERROR_SUCCESS;
  1951. PNM_NODE_ENUM2 pNodeEnum = NULL;
  1952. int iCurrLen = 0, iOffset = 0;
  1953. DWORD dwNodeIndex;
  1954. DWORD dwSize;
  1955. PCLUS_FORCE_QUORUM_INFO pForceQuorumInfo = NULL;
  1956. // Need to allocate a structure that can hold the nodes list.
  1957. //
  1958. dwSize = sizeof( CLUS_FORCE_QUORUM_INFO ) + sizeof( WCHAR ) * (wcslen( pszNodesIn ) + 1);
  1959. pForceQuorumInfo = LocalAlloc( LMEM_FIXED, dwSize );
  1960. if ( pForceQuorumInfo == NULL ) {
  1961. status = ERROR_NOT_ENOUGH_MEMORY;
  1962. goto ErrorExit;
  1963. }
  1964. ZeroMemory( pForceQuorumInfo, dwSize );
  1965. pForceQuorumInfo->dwSize = dwSize;
  1966. pForceQuorumInfo->dwNodeBitMask = 0;
  1967. pForceQuorumInfo->dwMaxNumberofNodes = 0;
  1968. if ( pszNodesIn == NULL ) {
  1969. pForceQuorumInfo->multiszNodeList[0] = L'\0';
  1970. goto ret;
  1971. }
  1972. // Now get the enumeration of all cluster nodes so we can check we have
  1973. // valid nodes in the list.
  1974. //
  1975. status = NmpEnumNodeDefinitions( &pNodeEnum );
  1976. if ( status != ERROR_SUCCESS )
  1977. goto ErrorExit;
  1978. // Go through all the nodes we have and ensure that they are cluster nodes.
  1979. // Get the corresponding ID and incorporate in the bitmask
  1980. //
  1981. do {
  1982. pszComma = wcschr( pszNodesIn, (int) L',');
  1983. if ( pszComma == NULL )
  1984. iCurrLen = wcslen( pszNodesIn );
  1985. else
  1986. iCurrLen = (int) (pszComma - pszNodesIn);
  1987. // At this point pszNodesIn is the start of a node name, iCurrLen chars long
  1988. // or iCurrLen is 0 in which case we have ,, in the input stream.
  1989. //
  1990. if (iCurrLen > 0) {
  1991. // Work out if this node is part of the cluster and if so get its
  1992. // ID and setup the bitmask.
  1993. //
  1994. for ( dwNodeIndex = 0; dwNodeIndex < pNodeEnum->NodeCount; dwNodeIndex++ ) {
  1995. int iNodeNameLen = wcslen( pNodeEnum->NodeList[ dwNodeIndex ].NodeName );
  1996. ClRtlLogPrint( LOG_NOISE, "[Fm] FmpBuildForceQuorumInfo: trying %1\r\n",
  1997. pNodeEnum->NodeList[ dwNodeIndex ].NodeName );
  1998. if ( _wcsnicmp( pNodeEnum->NodeList[ dwNodeIndex ].NodeName,
  1999. pszNodesIn,
  2000. max(iCurrLen, iNodeNameLen) ) == 0 ) {
  2001. ClRtlLogPrint( LOG_NOISE, "[Fm] FmpBuildForceQuorumInfo: got match %1\r\n",
  2002. pNodeEnum->NodeList[ dwNodeIndex ].NodeName );
  2003. // Set the mask and max nodes and break - ignore duplicates.
  2004. //
  2005. if ( !(pForceQuorumInfo->dwNodeBitMask & (1 << dwNodeIndex)) ) {
  2006. pForceQuorumInfo->dwMaxNumberofNodes += 1;
  2007. pForceQuorumInfo->dwNodeBitMask |= 1 << dwNodeIndex;
  2008. wcscpy( &pForceQuorumInfo->multiszNodeList[iOffset], pNodeEnum->NodeList[ dwNodeIndex ].NodeName );
  2009. iOffset += wcslen( pNodeEnum->NodeList[ dwNodeIndex ].NodeName ) + 1;
  2010. }
  2011. break;
  2012. }
  2013. }
  2014. if ( dwNodeIndex == pNodeEnum->NodeCount ) {
  2015. ClRtlLogPrint( LOG_UNUSUAL, "[Fm] FmpBuildForceQuorumInfo: no match for %1\r\n", pszNodesIn );
  2016. status = ERROR_INVALID_PARAMETER;
  2017. goto ErrorExit;
  2018. }
  2019. } else if ( pszComma != NULL ) {
  2020. ClRtlLogPrint( LOG_ERROR,
  2021. "[Fm] FmpBuildForceQuorumInfo: iCurrLen was 0 so ,, was in node list: %1\r\n",
  2022. CsForceQuorumNodes );
  2023. status = ERROR_INVALID_PARAMETER;
  2024. goto ErrorExit;
  2025. }
  2026. pszNodesIn = pszComma + 1;
  2027. } while ( pszComma != NULL);
  2028. pForceQuorumInfo->multiszNodeList[ iOffset ] = L'\0';
  2029. goto ret;
  2030. ErrorExit:
  2031. if ( pForceQuorumInfo != NULL ) {
  2032. LocalFree( pForceQuorumInfo );
  2033. pForceQuorumInfo = NULL;
  2034. }
  2035. ret:
  2036. if ( pNodeEnum != NULL ) {
  2037. ClNetFreeNodeEnum( pNodeEnum );
  2038. }
  2039. if ( status == ERROR_SUCCESS ) {
  2040. *ppForceQuorumInfo = pForceQuorumInfo;
  2041. }
  2042. return status;
  2043. }
  2044. static
  2045. void
  2046. FmpDeleteForceQuorumInfo(
  2047. IN OUT PCLUS_FORCE_QUORUM_INFO* ppForceQuorumInfo
  2048. )
  2049. {
  2050. (void) LocalFree( *ppForceQuorumInfo );
  2051. *ppForceQuorumInfo = NULL;
  2052. }