Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

5305 lines
153 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. group.c
  5. Abstract:
  6. Cluster group management routines.
  7. Author:
  8. Rod Gamache (rodga) 8-Mar-1996
  9. Notes:
  10. WARNING: All of the routines in this file assume that the group
  11. lock is held when they are called.
  12. Revision History:
  13. --*/
  14. #include "fmp.h"
  15. #define LOG_MODULE GROUP
  16. //
  17. // Global Data
  18. //
  19. CRITICAL_SECTION FmpGroupLock;
  20. //
  21. // Local function prototypes
  22. //
  23. /////////////////////////////////////////////////////////////////////////////
  24. //
  25. // Group Management Routines
  26. //
  27. /////////////////////////////////////////////////////////////////////////////
  28. BOOL
  29. FmpInPreferredList(
  30. IN PFM_GROUP Group,
  31. IN PNM_NODE Node,
  32. IN BOOL bRecalc,
  33. IN PFM_RESOURCE pRefResource
  34. )
  35. /*++
  36. Routine Description:
  37. Check if a node is in the preferred list for the Group.
  38. Arguments:
  39. Group - Pointer to the group object with the preferred owners list.
  40. Node - The Node to check for.
  41. bRecalc - If set to TRUE, we recalculate the preferred list for the group
  42. based on the possible node list for the reference resource.
  43. pRefResource - If NULL, we walk all the resources in the
  44. group and calculate their possible node list to see
  45. if it has since expanded due to the fact that dlls
  46. were copied to nodes.
  47. Return Value:
  48. TRUE - if the node is in the list.
  49. FALSE - if the node is NOT in the list.
  50. --*/
  51. {
  52. PLIST_ENTRY listEntry;
  53. PPREFERRED_ENTRY preferredEntry;
  54. BOOL bRet = FALSE;
  55. //
  56. // For each entry in the Preferred list, it must exist in the possible
  57. // list.
  58. //
  59. ChkInPrefList:
  60. for ( listEntry = Group->PreferredOwners.Flink;
  61. listEntry != &(Group->PreferredOwners);
  62. listEntry = listEntry->Flink ) {
  63. preferredEntry = CONTAINING_RECORD( listEntry,
  64. PREFERRED_ENTRY,
  65. PreferredLinkage );
  66. if ( preferredEntry->PreferredNode == Node ) {
  67. return(TRUE);
  68. }
  69. }
  70. if (bRecalc)
  71. {
  72. PFM_RESOURCE pResource;
  73. DWORD dwStatus;
  74. LPWSTR lpszOwners = NULL;
  75. DWORD dwMaxSize=0;
  76. HDMKEY hGroupKey;
  77. DWORD dwSize = 0;
  78. hGroupKey = DmOpenKey(DmGroupsKey, OmObjectId(Group),
  79. KEY_READ);
  80. if (hGroupKey == NULL)
  81. {
  82. dwStatus = GetLastError();
  83. ClRtlLogPrint(LOG_CRITICAL,
  84. "[FM] FmInPreferredList: Couldnt open group key\r\n",
  85. dwStatus);
  86. CL_UNEXPECTED_ERROR(dwStatus);
  87. goto FnExit;
  88. }
  89. //the group preferred list must not be set by the user
  90. //if it is then there is no point in doing this recalculation
  91. dwStatus = DmQueryMultiSz( hGroupKey,
  92. CLUSREG_NAME_GRP_PREFERRED_OWNERS,
  93. &lpszOwners,
  94. &dwMaxSize,
  95. &dwSize );
  96. if (lpszOwners)
  97. LocalFree(lpszOwners);
  98. DmCloseKey(hGroupKey);
  99. if (dwStatus == ERROR_FILE_NOT_FOUND)
  100. {
  101. DWORD dwUserModified;
  102. for (listEntry = Group->Contains.Flink;
  103. listEntry != &(Group->Contains);
  104. listEntry = listEntry->Flink)
  105. {
  106. pResource = CONTAINING_RECORD(listEntry, FM_RESOURCE, ContainsLinkage);
  107. //the resource possible node list must not be set by the user
  108. //if it is, then we can skip this resource
  109. dwStatus = DmQueryDword( pResource->RegistryKey,
  110. CLUSREG_NAME_RES_USER_MODIFIED_POSSIBLE_LIST,
  111. &dwUserModified,
  112. NULL );
  113. if (dwStatus == ERROR_FILE_NOT_FOUND)
  114. {
  115. FmpSetPossibleNodeForResType(OmObjectId(pResource->Type),
  116. TRUE);
  117. if (FmpInPossibleListForResType(pResource->Type,Node) &&
  118. !FmpInPossibleListForResource(pResource, Node))
  119. {
  120. //add to the resource possible node list
  121. //this will or add to the pref list of the group
  122. FmChangeResourceNode(pResource, Node, TRUE);
  123. }
  124. }
  125. }
  126. //set bRecalc to be FALSE so that we dont evaluate this again
  127. bRecalc = FALSE;
  128. goto ChkInPrefList;
  129. }
  130. }
  131. FnExit:
  132. return(bRet);
  133. } // FmpInPreferredList
  134. BOOL
  135. FmpHigherInPreferredList(
  136. IN PFM_GROUP Group,
  137. IN PNM_NODE Node1,
  138. IN PNM_NODE Node2
  139. )
  140. /*++
  141. Routine Description:
  142. Check if Node1 is higher (in priority) in the preferred owners list than
  143. Node1.
  144. Arguments:
  145. Group - Pointer to the group object with the preferred owners list.
  146. Node1 - The Node that should be higher in the list.
  147. Node2 - The Node that should be lower in the list.
  148. Return Value:
  149. TRUE - if Node1 is higher in the list.
  150. FALSE - if Node2 is higher in the list, or Node1 is not in the list at all.
  151. --*/
  152. {
  153. PLIST_ENTRY listEntry;
  154. PPREFERRED_ENTRY preferredEntry;
  155. DWORD orderedOwners = 0;
  156. //
  157. // For each entry in the Preferred list, check whether Node1 or Node2 is
  158. // higher.
  159. //
  160. for ( listEntry = Group->PreferredOwners.Flink;
  161. listEntry != &(Group->PreferredOwners),
  162. orderedOwners < Group->OrderedOwners;
  163. listEntry = listEntry->Flink ) {
  164. preferredEntry = CONTAINING_RECORD( listEntry,
  165. PREFERRED_ENTRY,
  166. PreferredLinkage );
  167. if ( preferredEntry->PreferredNode == Node1 ) {
  168. return(TRUE);
  169. }
  170. if ( preferredEntry->PreferredNode == Node2 ) {
  171. return(FALSE);
  172. }
  173. orderedOwners++;
  174. }
  175. return(FALSE);
  176. } // FmpHigherInPreferredList
  177. DWORD
  178. FmpSetPreferredEntry(
  179. IN PFM_GROUP Group,
  180. IN PNM_NODE Node
  181. )
  182. /*++
  183. Routine Description:
  184. Add a node to the preferred list for the Group.
  185. Arguments:
  186. Group - Pointer to the group object with the preferred owners list.
  187. Node - The Node to add.
  188. Return Value:
  189. ERROR_SUCCESS if node is added.
  190. ERROR_NOT_ENOUGH_MEMORY on failure.
  191. --*/
  192. {
  193. PLIST_ENTRY listEntry;
  194. PPREFERRED_ENTRY preferredEntry;
  195. //
  196. // Make sure entry is not already present in list.
  197. //
  198. if ( FmpInPreferredList( Group, Node, FALSE, NULL ) ) {
  199. return(ERROR_SUCCESS);
  200. }
  201. //
  202. // Create the Preferred Owners List entry.
  203. //
  204. preferredEntry = LocalAlloc( LMEM_FIXED, sizeof(PREFERRED_ENTRY) );
  205. if ( preferredEntry == NULL ) {
  206. ClRtlLogPrint( LOG_ERROR,
  207. "[FM] Error allocating preferred owner entry for group %1!ws!. Stopped adding.\n",
  208. OmObjectId(Group));
  209. return(ERROR_NOT_ENOUGH_MEMORY);
  210. }
  211. //
  212. // Create the preferred owner entry and keep a reference on the node object.
  213. //
  214. OmReferenceObject( Node );
  215. preferredEntry->PreferredNode = Node;
  216. InsertTailList( &Group->PreferredOwners,
  217. &preferredEntry->PreferredLinkage );
  218. return(ERROR_SUCCESS);
  219. } // FmpSetPreferredEntry
  220. BOOL FmpFindNodeThatMightBeAddedToPrefList(
  221. IN PFM_GROUP pGroup,
  222. IN PNM_NODE *pDestNode,
  223. IN PVOID pNode,
  224. IN LPCWSTR szName)
  225. {
  226. BOOL bRet = TRUE; //assume we will continue enumeration
  227. *pDestNode = NULL;
  228. //if this node is not up or if this is the local node, continue
  229. if ((pNode == NmLocalNode) || (NmGetNodeState(pNode) != ClusterNodeUp))
  230. {
  231. return(bRet);
  232. }
  233. if (FmpInPreferredList(pGroup, pNode, TRUE, NULL))
  234. {
  235. bRet = FALSE;
  236. *pDestNode = pNode;
  237. }
  238. return(bRet);
  239. }
  240. PNM_NODE
  241. FmpFindAnotherNode(
  242. IN PFM_GROUP Group,
  243. IN BOOL bChooseMostPreferredNode
  244. )
  245. /*++
  246. Routine Description:
  247. Check if another node is up that can take the group.
  248. Arguments:
  249. Group - Pointer to the group object we're checking.
  250. bChooseMostPreferredNode - Whether to choose the most preferred node or not.
  251. Return Value:
  252. Pointer to node object that the group can move to.
  253. NULL if another system is not found.
  254. --*/
  255. {
  256. PLIST_ENTRY listEntry;
  257. PPREFERRED_ENTRY preferredEntry;
  258. PNM_NODE first = NULL;
  259. BOOLEAN flag = FALSE;
  260. //
  261. // First, let us give the anti-affinity algorithm a shot at picking the node.
  262. //
  263. first = FmpGetNodeNotHostingUndesiredGroups ( Group,
  264. TRUE ); // Rule out local node
  265. if ( first != NULL )
  266. {
  267. goto FnExit;
  268. }
  269. //
  270. // For each entry in the Preferred list, find a system (other than the
  271. // local system that is up).
  272. //
  273. if ( bChooseMostPreferredNode )
  274. {
  275. first = FmpGetNonLocalPreferredNode( Group );
  276. //
  277. // In this case in which you are doing a user-initiated move, give the randomized
  278. // preferred list algorithm a chance to pick the node. Note that if the randomized
  279. // algorithm could not pick a node, it will return the supplied suggested node itself.
  280. //
  281. if ( first != NULL )
  282. {
  283. first = FmpPickNodeFromPreferredListAtRandom ( Group,
  284. first, // Suggested default
  285. TRUE, // Dont choose local node
  286. TRUE ); // Check whether randomization
  287. // should be disabled
  288. }
  289. }
  290. else
  291. {
  292. for ( listEntry = Group->PreferredOwners.Flink;
  293. listEntry != &(Group->PreferredOwners);
  294. listEntry = listEntry->Flink ) {
  295. preferredEntry = CONTAINING_RECORD( listEntry,
  296. PREFERRED_ENTRY,
  297. PreferredLinkage );
  298. if ( (preferredEntry->PreferredNode != NmLocalNode) &&
  299. (NmGetExtendedNodeState(preferredEntry->PreferredNode) == ClusterNodeUp) ) {
  300. if (flag == TRUE)
  301. return(preferredEntry->PreferredNode);
  302. else if (first == NULL)
  303. first = preferredEntry->PreferredNode;
  304. } else if (preferredEntry->PreferredNode == NmLocalNode) {
  305. flag = TRUE;
  306. }
  307. }
  308. }
  309. //if we couldnt find a node, we retry again since the user might have
  310. //expanded the possible node list for resource type since then
  311. //if the group preferred list is not set by the user,
  312. //we recalculate it since it could have
  313. if (first == NULL)
  314. {
  315. LPWSTR lpszOwners = NULL;
  316. DWORD dwMaxSize=0;
  317. HDMKEY hGroupKey;
  318. DWORD dwSize = 0;
  319. DWORD dwStatus;
  320. hGroupKey = DmOpenKey(DmGroupsKey, OmObjectId(Group),
  321. KEY_READ);
  322. if (hGroupKey == NULL)
  323. {
  324. dwStatus = GetLastError();
  325. ClRtlLogPrint(LOG_CRITICAL,
  326. "[FM] FmInPreferredList: Couldnt open group key\r\n",
  327. dwStatus);
  328. CL_UNEXPECTED_ERROR(dwStatus);
  329. goto FnExit;
  330. }
  331. //the group preferred list must not be set by the user
  332. //if it is then there is no point in doing this recalculation
  333. dwStatus = DmQueryMultiSz( hGroupKey,
  334. CLUSREG_NAME_GRP_PREFERRED_OWNERS,
  335. &lpszOwners,
  336. &dwMaxSize,
  337. &dwSize );
  338. if (lpszOwners)
  339. LocalFree(lpszOwners);
  340. DmCloseKey(hGroupKey);
  341. if (dwStatus == ERROR_FILE_NOT_FOUND)
  342. OmEnumObjects(ObjectTypeNode, FmpFindNodeThatMightBeAddedToPrefList,
  343. Group, &first);
  344. }
  345. FnExit:
  346. return(first);
  347. } // FmpFindAnotherNode
  348. PNM_NODE
  349. FmpGetPreferredNode(
  350. IN PFM_GROUP Group
  351. )
  352. /*++
  353. Routine Description:
  354. Find best node that can take the group
  355. Arguments:
  356. Group - Pointer to the group object we're checking.
  357. Return Value:
  358. Pointer to node object that the group can move to.
  359. NULL if another system is not found.
  360. --*/
  361. {
  362. PLIST_ENTRY listEntry;
  363. PPREFERRED_ENTRY preferredEntry;
  364. PNM_NODE pNode = NULL;
  365. //
  366. // First, let us give the anti-affinity algorithm a shot at picking the node.
  367. //
  368. pNode = FmpGetNodeNotHostingUndesiredGroups ( Group,
  369. FALSE ); // Don't rule out local node
  370. if ( pNode != NULL )
  371. {
  372. return ( pNode );
  373. }
  374. //
  375. // For each entry in the Preferred list, find a system that is up.
  376. //
  377. for ( listEntry = Group->PreferredOwners.Flink;
  378. listEntry != &(Group->PreferredOwners);
  379. listEntry = listEntry->Flink ) {
  380. preferredEntry = CONTAINING_RECORD( listEntry,
  381. PREFERRED_ENTRY,
  382. PreferredLinkage );
  383. if (NmGetNodeState(preferredEntry->PreferredNode) == ClusterNodeUp ) {
  384. return(preferredEntry->PreferredNode);
  385. }
  386. }
  387. return(NULL);
  388. } // FmpGetPreferredNode
  389. PNM_NODE
  390. FmpGetNonLocalPreferredNode(
  391. IN PFM_GROUP Group
  392. )
  393. /*++
  394. Routine Description:
  395. Find best node that can take the group which is not the local node.
  396. Arguments:
  397. Group - Pointer to the group object we're checking.
  398. Return Value:
  399. Pointer to node object that the group can move to.
  400. NULL if another system is not found.
  401. --*/
  402. {
  403. PLIST_ENTRY listEntry;
  404. PPREFERRED_ENTRY preferredEntry;
  405. //
  406. // For each entry in the Preferred list, find a system (other than the
  407. // local system that is up).
  408. //
  409. for ( listEntry = Group->PreferredOwners.Flink;
  410. listEntry != &(Group->PreferredOwners);
  411. listEntry = listEntry->Flink ) {
  412. preferredEntry = CONTAINING_RECORD( listEntry,
  413. PREFERRED_ENTRY,
  414. PreferredLinkage );
  415. if ( preferredEntry->PreferredNode == NmLocalNode ) {
  416. continue;
  417. }
  418. if (NmGetNodeState(preferredEntry->PreferredNode) == ClusterNodeUp ) {
  419. return(preferredEntry->PreferredNode);
  420. }
  421. }
  422. return(NULL);
  423. } // FmpGetNonLocalPreferredNode
  424. BOOL
  425. FmpIsGroupQuiet(
  426. IN PFM_GROUP Group,
  427. IN CLUSTER_GROUP_STATE WantedState
  428. )
  429. /*++
  430. Routine Description:
  431. Checks if the group has any pending resources.
  432. Arguments:
  433. Group - the Group to check.
  434. WantedState - the state the Group wants to get to.
  435. Return Value:
  436. TRUE - if the Group is not doing anything now.
  437. FALSE otherwise.
  438. --*/
  439. {
  440. DWORD status;
  441. PLIST_ENTRY listEntry;
  442. PFM_RESOURCE Resource;
  443. if ( Group->MovingList ) {
  444. return(FALSE);
  445. }
  446. //
  447. // Check all of the resources contained within this group.
  448. //
  449. for ( listEntry = Group->Contains.Flink;
  450. listEntry != &(Group->Contains);
  451. listEntry = listEntry->Flink ) {
  452. Resource = CONTAINING_RECORD(listEntry, FM_RESOURCE, ContainsLinkage);
  453. switch ( WantedState ) {
  454. case ClusterGroupOnline:
  455. // if resource is pending, then offline pending is bad
  456. if ( Resource->State == ClusterResourceOfflinePending ) {
  457. return(FALSE);
  458. }
  459. break;
  460. case ClusterGroupOffline:
  461. // if resource is pending, then online pending is bad
  462. if ( Resource->State == ClusterResourceOnlinePending ) {
  463. return(FALSE);
  464. }
  465. break;
  466. default:
  467. // any pending state is bad
  468. if ( Resource->State >= ClusterResourcePending ) {
  469. return(FALSE);
  470. }
  471. break;
  472. }
  473. }
  474. return(TRUE);
  475. } // FmpIsGroupQuiet
  476. VOID
  477. FmpSetGroupPersistentState(
  478. IN PFM_GROUP Group,
  479. IN CLUSTER_GROUP_STATE State
  480. )
  481. /*++
  482. Routine Description:
  483. Sets the PersistentState of a Group. This includes the registry.
  484. Arguments:
  485. Group - The Group to set the state for.
  486. State - The new state for the Group.
  487. Returns:
  488. ERROR_SUCCESS if successful.
  489. A Win32 error code on failure.
  490. Notes:
  491. The LocalGroupLock must be held.
  492. --*/
  493. {
  494. DWORD persistentState;
  495. LPWSTR persistentStateName = CLUSREG_NAME_GRP_PERSISTENT_STATE;
  496. if (!gbIsQuoResEnoughSpace)
  497. return;
  498. FmpAcquireLocalGroupLock( Group );
  499. //
  500. // If the current state has changed, then do the work. Otherwise,
  501. // skip the effort.
  502. //
  503. if ( Group->PersistentState != State ) {
  504. Group->PersistentState = State;
  505. CL_ASSERT( Group->RegistryKey != NULL );
  506. //
  507. // Set the new value, but only if it is online or offline.
  508. //
  509. if ( State == ClusterGroupOnline ) {
  510. persistentState = 1;
  511. DmSetValue( Group->RegistryKey,
  512. persistentStateName,
  513. REG_DWORD,
  514. (LPBYTE)&persistentState,
  515. sizeof(DWORD) );
  516. } else if ( State == ClusterGroupOffline ) {
  517. persistentState = 0;
  518. DmSetValue( Group->RegistryKey,
  519. persistentStateName,
  520. REG_DWORD,
  521. (LPBYTE)&persistentState,
  522. sizeof(DWORD) );
  523. }
  524. }
  525. FmpReleaseLocalGroupLock( Group );
  526. } // FmpSetGroupPersistentState
  527. DWORD
  528. FmpOnlineGroup(
  529. IN PFM_GROUP Group,
  530. IN BOOL ForceOnline
  531. )
  532. /*++
  533. Routine Description:
  534. Bring the specified group online. This means bringing all of the
  535. individual resources contained within the group online. This is an
  536. atomic operation - so either all resources contained within the group
  537. are brought online, or none of them are.
  538. Arguments:
  539. Group - Supplies a pointer to the group structure to bring online.
  540. ForceOnline - TRUE if all resources in the Group should be forced online.
  541. Retruns:
  542. ERROR_SUCCESS if the request was successful.
  543. A Win32 error code on failure.
  544. --*/
  545. {
  546. DWORD status, retstatus = ERROR_SUCCESS;
  547. PLIST_ENTRY listEntry;
  548. PFM_RESOURCE Resource;
  549. BOOL bPending = FALSE;
  550. ClRtlLogPrint(LOG_NOISE,
  551. "[FM] OnlineGroup for %1!ws! owner %2!d!\n",
  552. OmObjectId(Group), OmObjectId(Group->OwnerNode));
  553. FmpAcquireLocalGroupLock( Group );
  554. //
  555. // Check if we are the owner... if not, return failure.
  556. //
  557. if ( gpQuoResource->Group != Group &&
  558. ((Group->OwnerNode != NmLocalNode) ||
  559. !FmpInPreferredList( Group, Group->OwnerNode, TRUE, NULL) ) ) {
  560. FmpReleaseLocalGroupLock( Group );
  561. return(ERROR_HOST_NODE_NOT_RESOURCE_OWNER);
  562. }
  563. //
  564. // Make sure the group is quiet
  565. //
  566. if ( !FmpIsGroupQuiet( Group, ClusterGroupOnline ) ) {
  567. FmpReleaseLocalGroupLock( Group );
  568. return(ERROR_INVALID_STATE);
  569. }
  570. //if the quorum group is in this group bring it online first
  571. //This is called when a node goes down and its groups are
  572. //being reclaimed, the order in which the resoures are brought
  573. //online is important
  574. if ( gpQuoResource->Group == Group)
  575. {
  576. //SS:: if the quorum resource is in the group, it must be
  577. //brought online irrespective of the persistent state
  578. //so we will pass in true here
  579. //Apps can mess with persistent state via the common
  580. //properties and then cause havoc so we need to force the
  581. //quorum resource online despite that
  582. status = FmpDoOnlineResource( gpQuoResource,
  583. TRUE );
  584. if ( (status != ERROR_SUCCESS) &&
  585. (status != ERROR_IO_PENDING) ) {
  586. ClRtlLogPrint(LOG_NOISE,
  587. "[FM] OnlineGroup: Failed on resource %1!ws!. Status %2!u!\n",
  588. OmObjectId(gpQuoResource),
  589. status);
  590. CL_UNEXPECTED_ERROR(status);
  591. }
  592. }
  593. //
  594. // Bring online all of the resources contained within this group.
  595. //
  596. for ( listEntry = Group->Contains.Flink;
  597. listEntry != &(Group->Contains);
  598. listEntry = listEntry->Flink ) {
  599. Resource = CONTAINING_RECORD(listEntry, FM_RESOURCE, ContainsLinkage);
  600. status = FmpDoOnlineResource( Resource,
  601. ForceOnline );
  602. if (status == ERROR_IO_PENDING) {
  603. bPending = TRUE;
  604. }
  605. if ( (status != ERROR_SUCCESS) &&
  606. (status != ERROR_NODE_CANT_HOST_RESOURCE) &&
  607. (status != ERROR_IO_PENDING) ) {
  608. ClRtlLogPrint(LOG_NOISE,
  609. "[FM] OnlineGroup: Failed on resource %1!ws!. Status %2!u!\n",
  610. OmObjectId(Resource),
  611. status);
  612. retstatus = status;
  613. }
  614. }
  615. //
  616. // Normally bringing the resources online propagates the group state,
  617. // but in order to get the state right for a group with no resources,
  618. // manually propagate the state here.
  619. //
  620. FmpPropagateGroupState(Group);
  621. ClRtlLogPrint(LOG_NOISE,
  622. "[FM] OnlineGroup: setting group state to Online for %1!ws!\n",
  623. OmObjectId(Group));
  624. FmpReleaseLocalGroupLock( Group );
  625. if (retstatus == ERROR_SUCCESS) {
  626. if (bPending) {
  627. retstatus = ERROR_IO_PENDING;
  628. }
  629. }
  630. return(retstatus);
  631. } // FmpOnlineGroup
  632. DWORD
  633. FmpOfflineGroup(
  634. IN PFM_GROUP Group,
  635. IN BOOL OfflineQuorum,
  636. IN BOOL SetPersistent
  637. )
  638. /*++
  639. Routine Description:
  640. Bring the specified group offline. This means bringing all of the
  641. individual resources contained within the group offline.
  642. Arguments:
  643. Group - Supplies a pointer to the group structure to bring offline.
  644. OfflineQuorum - TRUE if any quorum resource in this group should
  645. be taken offline. FALSE if the quorum resource should be left online.
  646. SetPersistent - TRUE if the persistent state of each resource should be
  647. updated.
  648. Returns:
  649. ERROR_SUCCESS if the request was successful.
  650. A Win32 error code on failure.
  651. --*/
  652. {
  653. DWORD status;
  654. PLIST_ENTRY listEntry;
  655. PFM_RESOURCE Resource;
  656. DWORD returnStatus = ERROR_SUCCESS;
  657. PRESOURCE_ENUM ResourceEnum=NULL;
  658. DWORD i;
  659. FmpAcquireLocalGroupLock( Group );
  660. //if the group has been marked for delete, then fail this call
  661. if (!IS_VALID_FM_GROUP(Group))
  662. {
  663. FmpReleaseLocalGroupLock( Group);
  664. return (ERROR_GROUP_NOT_AVAILABLE);
  665. }
  666. ClRtlLogPrint(LOG_NOISE,
  667. "[FM] FmpOfflineGroup, Group=%1!ws!\n",
  668. OmObjectId(Group));
  669. //
  670. // Check if we are the owner... if not, return failure.
  671. //
  672. if ( Group->OwnerNode != NmLocalNode ) {
  673. returnStatus = ERROR_HOST_NODE_NOT_RESOURCE_OWNER;
  674. goto error_exit;
  675. }
  676. //
  677. // Make sure the group is quiet
  678. //
  679. if ( !FmpIsGroupQuiet( Group, ClusterGroupOffline ) ) {
  680. returnStatus = ERROR_INVALID_STATE;
  681. goto error_exit;
  682. }
  683. //
  684. // Get the list of resources in the group and their states.
  685. //
  686. returnStatus = FmpGetResourceList( &ResourceEnum, Group );
  687. if ( returnStatus != ERROR_SUCCESS ) {
  688. goto error_exit;
  689. }
  690. // offline all resources except the quorum resource
  691. for ( i = 0; i < ResourceEnum->EntryCount; i++ ) {
  692. Resource = OmReferenceObjectById( ObjectTypeResource,
  693. ResourceEnum->Entry[i].Id );
  694. if ( Resource == NULL ) {
  695. returnStatus = ERROR_RESOURCE_NOT_FOUND;
  696. goto error_exit;
  697. }
  698. //quorum resource is brought offline last
  699. if (Resource->QuorumResource)
  700. {
  701. OmDereferenceObject(Resource);
  702. continue;
  703. }
  704. if (SetPersistent) {
  705. FmpSetResourcePersistentState( Resource, ClusterResourceOffline );
  706. }
  707. status = FmpOfflineResource( Resource, FALSE);
  708. OmDereferenceObject( Resource );
  709. if ( (status != ERROR_SUCCESS) &&
  710. (status != ERROR_IO_PENDING) ) {
  711. returnStatus = status;
  712. goto error_exit;
  713. }
  714. if ( status == ERROR_IO_PENDING ) {
  715. returnStatus = ERROR_IO_PENDING;
  716. }
  717. }
  718. // bring the quorum resource offline now, if asked to bring quorum offline
  719. // This allows other resources to come offline and save their checkpoints
  720. // The quorum resource offline should block till the resources have
  721. // finished saving the checkpoint
  722. if (ResourceEnum->ContainsQuorum >= 0)
  723. {
  724. if (!OfflineQuorum)
  725. {
  726. //if the quorum resource should not be taken offline
  727. returnStatus = ERROR_QUORUM_RESOURCE;
  728. }
  729. else if (returnStatus == ERROR_SUCCESS)
  730. {
  731. CL_ASSERT((DWORD)ResourceEnum->ContainsQuorum < ResourceEnum->EntryCount);
  732. Resource = OmReferenceObjectById( ObjectTypeResource,
  733. ResourceEnum->Entry[ResourceEnum->ContainsQuorum].Id );
  734. if ( Resource == NULL ) {
  735. returnStatus = ERROR_RESOURCE_NOT_FOUND;
  736. goto error_exit;
  737. }
  738. ClRtlLogPrint(LOG_NOISE,
  739. "[FM] FmpOfflineGroup: Bring quorum resource offline\n");
  740. if ( !(Resource->Flags & RESOURCE_WAITING) ) {
  741. if (Resource->State != ClusterResourceOffline) {
  742. Resource->State = ClusterResourceOnline; // [HACKHACK]
  743. }
  744. status = FmpOfflineResource( Resource , FALSE);
  745. OmDereferenceObject( Resource );
  746. if ( (status != ERROR_SUCCESS) &&
  747. (status != ERROR_IO_PENDING) )
  748. {
  749. returnStatus = status;
  750. goto error_exit;
  751. }
  752. if ( status == ERROR_IO_PENDING )
  753. returnStatus = ERROR_IO_PENDING;
  754. } else {
  755. OmDereferenceObject( Resource );
  756. }
  757. }
  758. }
  759. //
  760. // Normally bringing the resources offline propagates the group state,
  761. // but in order to get the state right for a group with no resources,
  762. // manually propagate the state here.
  763. //
  764. if (SetPersistent)
  765. FmpPropagateGroupState(Group);
  766. error_exit:
  767. FmpReleaseLocalGroupLock( Group );
  768. if (ResourceEnum)
  769. FmpDeleteResourceEnum( ResourceEnum );
  770. return(returnStatus);
  771. } // FmpOfflineGroup
  772. CLUSTER_GROUP_STATE
  773. FmpGetGroupState(
  774. IN PFM_GROUP Group,
  775. IN BOOL IsNormalized
  776. )
  777. /*++
  778. Routine Description:
  779. Get the Group state, either normalized to ClusterGroupOnline or
  780. ClusterGroupOffline or not normalized.
  781. Arguments:
  782. Group - The Group we're interested in.
  783. IsNormalized - Should the Group state be normalized ?
  784. Returns:
  785. The current Group state which is one of (in increasing order of
  786. precedence)
  787. ClusterGroupOnline, ClusterGroupOffline
  788. ClusterGroupPartialOnline
  789. ClusterGroupPending (only if IsNormalized is FALSE)
  790. ClusterGroupFailed (only if IsNormalized is FALSE)
  791. --*/
  792. {
  793. PLIST_ENTRY listEntry;
  794. PFM_RESOURCE resource;
  795. CLUSTER_GROUP_STATE state;
  796. CLUSTER_RESOURCE_STATE firstResourceState;
  797. CLUSTER_RESOURCE_STATE resourceState;
  798. // Chittur Subbaraman (chitturs) - 09/16/98 (Modified this function
  799. // to work with IsNormalized flag)
  800. FmpAcquireLocalGroupLock( Group );
  801. if ( !IsListEmpty(&Group->Contains) ) {
  802. listEntry = Group->Contains.Flink;
  803. resource = CONTAINING_RECORD(listEntry,
  804. FM_RESOURCE,
  805. ContainsLinkage);
  806. //
  807. // Get the first resource's state
  808. //
  809. firstResourceState = resource->State;
  810. if ( IsNormalized == FALSE ) {
  811. BOOL IsPending = FALSE;
  812. BOOL IsPartialOnline = FALSE;
  813. //
  814. // First check whether any resource in the group has
  815. // failed. If so, set the group state to ClusterGroupFailed
  816. // and exit immediately. If no resource in the group has
  817. // failed, but at least one of them is in the pending state,
  818. // then set the group state to ClusterGroupPending and exit
  819. // immediately. If no resource in the group is in either
  820. // the failed or in the pending state, then check whether
  821. // some resources in the group are in online and some in the
  822. // offline state. Then, set the group state to
  823. // ClusterGroupPartialOnline and exit immediately.
  824. //
  825. for ( ;
  826. listEntry != &(Group->Contains);
  827. listEntry = listEntry->Flink ) {
  828. resource = CONTAINING_RECORD(listEntry,
  829. FM_RESOURCE,
  830. ContainsLinkage);
  831. resourceState = resource->State;
  832. if ( resourceState == ClusterResourceFailed ) {
  833. state = ClusterGroupFailed;
  834. //
  835. // This state has the highest precedence, so
  836. // exit immediately.
  837. //
  838. goto FnExit;
  839. } else if ( (resourceState == ClusterResourceOnlinePending) ||
  840. (resourceState == ClusterResourceOfflinePending) ) {
  841. IsPending = TRUE;
  842. } else {
  843. CL_ASSERT( (resourceState == ClusterResourceOffline) ||
  844. (resourceState == ClusterResourceOnline) ||
  845. (resourceState == ClusterResourceInitializing) );
  846. if ( resourceState == ClusterResourceInitializing ) {
  847. //
  848. // Normalize this state to offline state
  849. //
  850. resourceState = ClusterResourceOffline;
  851. }
  852. if ( firstResourceState == ClusterResourceInitializing ) {
  853. //
  854. // Normalize this state to offline state
  855. //
  856. firstResourceState = ClusterResourceOffline;
  857. }
  858. if ( firstResourceState != resourceState ) {
  859. IsPartialOnline = TRUE;
  860. }
  861. }
  862. }
  863. if ( IsPending == TRUE ) {
  864. state = ClusterGroupPending;
  865. //
  866. // This state has the next highest precedence after
  867. // ClusterGroupFailed state
  868. //
  869. goto FnExit;
  870. }
  871. if ( IsPartialOnline == TRUE ) {
  872. state = ClusterGroupPartialOnline;
  873. //
  874. // This state has the next highest precedence after
  875. // ClusterGroupFailed and ClusterGroupPending states
  876. //
  877. goto FnExit;
  878. }
  879. if ( firstResourceState == ClusterResourceOnline ) {
  880. state = ClusterGroupOnline;
  881. //
  882. // If the first resource is in an online state,
  883. // then the group state should be online.
  884. //
  885. goto FnExit;
  886. }
  887. if ( firstResourceState == ClusterResourceOffline ) {
  888. state = ClusterGroupOffline;
  889. //
  890. // If the first resource is in an offline state,
  891. // then the group state should be offline.
  892. //
  893. goto FnExit;
  894. }
  895. }
  896. //
  897. // The control gets here only if IsNormalized is TRUE
  898. //
  899. if ( (firstResourceState == ClusterResourceOnline) ||
  900. (firstResourceState == ClusterResourceOnlinePending) ) {
  901. state = ClusterGroupOnline;
  902. firstResourceState = ClusterResourceOnline;
  903. } else {
  904. CL_ASSERT( (firstResourceState == ClusterResourceOffline) ||
  905. (firstResourceState == ClusterResourceFailed) ||
  906. (firstResourceState == ClusterResourceOfflinePending) ||
  907. (firstResourceState == ClusterResourceInitializing) );
  908. state = ClusterGroupOffline;
  909. firstResourceState = ClusterResourceOffline;
  910. }
  911. //
  912. // Now check each resource to see if they match the first.
  913. //
  914. for (listEntry = Group->Contains.Flink;
  915. listEntry != &(Group->Contains);
  916. listEntry = listEntry->Flink ) {
  917. resource = CONTAINING_RECORD(listEntry,
  918. FM_RESOURCE,
  919. ContainsLinkage);
  920. resourceState = resource->State;
  921. //
  922. // Normalize pending states to their final state, and Failed and Initializing
  923. // to Offline.
  924. //
  925. if ( resourceState == ClusterResourceOnlinePending ) {
  926. resourceState = ClusterResourceOnline;
  927. } else if ( (resourceState == ClusterResourceOfflinePending) ||
  928. (resourceState == ClusterResourceFailed) ||
  929. (resourceState == ClusterResourceInitializing) ) {
  930. resourceState = ClusterResourceOffline;
  931. }
  932. //
  933. // We only need 1 resource that is not the same as the first resource
  934. // to be in a partially online state.
  935. //
  936. if ( firstResourceState != resourceState ) {
  937. state = ClusterGroupPartialOnline;
  938. break;
  939. }
  940. }
  941. } else {
  942. //
  943. // The group is empty, so I guess it must be offline.
  944. //
  945. state = Group->PersistentState;
  946. }
  947. FnExit:
  948. FmpReleaseLocalGroupLock( Group );
  949. return(state);
  950. } // FmpGetGroupState
  951. DWORD
  952. FmpPropagateGroupState(
  953. IN PFM_GROUP Group
  954. )
  955. /*++
  956. Routine Description:
  957. Set and propagate the state of the group to other components on the
  958. local system and to other systems in the cluster.
  959. Arguments:
  960. Group - The Group to propagate the state.
  961. Return:
  962. ERROR_SUCCESS if successful.
  963. A Win32 error code on failure.
  964. Notes:
  965. We will use the first resource's state to determine what should be the
  966. state for the whole group. If all resources match the state of the first
  967. resource, then that is the state of the Group. If any resource disagrees
  968. with the first resource, then the state is PartialOnline.
  969. --*/
  970. {
  971. GUM_GROUP_STATE groupState;
  972. LPCWSTR groupId;
  973. DWORD groupIdSize;
  974. DWORD status;
  975. PLIST_ENTRY listEntry;
  976. CLUSTER_RESOURCE_STATE firstResourceState;
  977. CLUSTER_GROUP_STATE state;
  978. FmpAcquireLocalGroupLock( Group );
  979. //
  980. // If we no longer own the Group, then just return now.
  981. //
  982. // This can happen when a resource goes offline (via a terminate), but
  983. // the group ownership has already migrated to another system.
  984. // We will assume that returning success is okay in this case.
  985. //
  986. if ( Group->OwnerNode != NmLocalNode ) {
  987. FmpReleaseLocalGroupLock( Group );
  988. return(ERROR_SUCCESS);
  989. }
  990. //
  991. // Chittur Subbaraman (chitturs) - 6/28/99
  992. //
  993. // If the group is marked for deletion, then don't do anything.
  994. //
  995. if ( !IS_VALID_FM_GROUP( Group ) ) {
  996. FmpReleaseLocalGroupLock( Group );
  997. return(ERROR_SUCCESS);
  998. }
  999. state = FmpGetGroupState( Group, TRUE );
  1000. //
  1001. // If the state has changed, then update the local system.
  1002. //
  1003. ++Group->StateSequence;
  1004. if ( state != Group->State ) {
  1005. Group->State = state;
  1006. switch ( state ) {
  1007. case ClusterGroupOnline:
  1008. case ClusterGroupPartialOnline:
  1009. ClusterEvent(CLUSTER_EVENT_GROUP_ONLINE, Group);
  1010. break;
  1011. case ClusterGroupOffline:
  1012. case ClusterGroupFailed:
  1013. ClusterEvent(CLUSTER_EVENT_GROUP_OFFLINE, Group);
  1014. break;
  1015. default:
  1016. break;
  1017. }
  1018. //
  1019. // Prepare to notify the other systems.
  1020. //
  1021. groupId = OmObjectId( Group );
  1022. groupIdSize = (lstrlenW( groupId ) + 1) * sizeof(WCHAR);
  1023. //
  1024. // Set Group state
  1025. //
  1026. groupState.State = state;
  1027. groupState.PersistentState = Group->PersistentState;
  1028. groupState.StateSequence = Group->StateSequence;
  1029. status = GumSendUpdateEx(GumUpdateFailoverManager,
  1030. FmUpdateGroupState,
  1031. 3,
  1032. groupIdSize,
  1033. groupId,
  1034. (lstrlenW(OmObjectId(NmLocalNode))+1)*sizeof(WCHAR),
  1035. OmObjectId(NmLocalNode),
  1036. sizeof(groupState),
  1037. &groupState);
  1038. ClRtlLogPrint(LOG_NOISE,
  1039. "[FM] FmpPropagateGroupState: Group %1!ws! state = %2!u!, persistent state = %3!u!\n",
  1040. OmObjectId(Group),
  1041. groupState.State,
  1042. groupState.PersistentState);
  1043. } else {
  1044. //
  1045. // Assume that the state didn't change, but the owning node did.
  1046. //
  1047. //
  1048. // Prepare to notify the other systems.
  1049. //
  1050. groupId = OmObjectId( Group );
  1051. groupIdSize = (lstrlenW( groupId ) + 1) * sizeof(WCHAR);
  1052. status = GumSendUpdateEx(GumUpdateFailoverManager,
  1053. FmUpdateGroupNode,
  1054. 2,
  1055. groupIdSize,
  1056. groupId,
  1057. (lstrlenW(OmObjectId(NmLocalNode))+1)*sizeof(WCHAR),
  1058. OmObjectId(NmLocalNode));
  1059. }
  1060. FmpReleaseLocalGroupLock( Group );
  1061. return(status);
  1062. } // FmpPropagateGroupState
  1063. DWORD
  1064. FmpPropagateFailureCount(
  1065. IN PFM_GROUP Group,
  1066. IN BOOL NewTime
  1067. )
  1068. /*++
  1069. Routine Description:
  1070. Propagate NumberOfFailures for the group to other systems in the cluster.
  1071. Arguments:
  1072. Group - The Group to propagate the state.
  1073. NewTime - TRUE if last failure time should be reset also. FALSE otherwise.
  1074. Return:
  1075. ERROR_SUCCESS if successful.
  1076. A Win32 error code on failure.
  1077. Notes:
  1078. The Local Group lock must be held.
  1079. --*/
  1080. {
  1081. PGUM_FAILURE_COUNT failureCount;
  1082. DWORD failureCountSize;
  1083. LPCWSTR groupId;
  1084. DWORD status;
  1085. //
  1086. // Prepare to notify the other systems.
  1087. //
  1088. groupId = OmObjectId( Group );
  1089. failureCountSize = sizeof(GUM_FAILURE_COUNT) - 1 +
  1090. ((lstrlenW(groupId) + 1) * sizeof(WCHAR));
  1091. failureCount = LocalAlloc(LMEM_FIXED, failureCountSize);
  1092. if ( failureCount == NULL ) {
  1093. return(ERROR_NOT_ENOUGH_MEMORY);
  1094. }
  1095. failureCount->Count = Group->NumberOfFailures;
  1096. failureCount->NewTime = (DWORD)NewTime;
  1097. wcscpy(&failureCount->GroupId[0], groupId);
  1098. status = GumSendUpdate( GumUpdateFailoverManager,
  1099. FmUpdateFailureCount,
  1100. failureCountSize,
  1101. failureCount );
  1102. LocalFree( failureCount );
  1103. return(status);
  1104. } // FmpPropagateFailureCount
  1105. PFM_GROUP
  1106. FmpCreateGroup(
  1107. IN LPWSTR GroupId,
  1108. IN BOOL Initialize
  1109. )
  1110. /*++
  1111. Routine Description:
  1112. Creates a new Group object.
  1113. Arguments:
  1114. GroupId - The Id of the new Group.
  1115. Initialize - TRUE if the Group should be initialized, FALSE otherwise.
  1116. Returns:
  1117. A non-NULL pointer to the Group if successful.
  1118. NULL - The Group could not be created.
  1119. Notes:
  1120. 1) Passing Initialize as FALSE allows for creating the group and it
  1121. resources, but complete initialization can happen later.
  1122. 2) The Group List lock must be held.
  1123. 3) If the Group is created, the reference count on the object is 1. If
  1124. the group is not create (i.e., it already exists) then the reference count
  1125. is not incremented and the caller may add a reference as needed.
  1126. --*/
  1127. {
  1128. PFM_GROUP group = NULL;
  1129. DWORD status = ERROR_SUCCESS;
  1130. BOOL Created;
  1131. //
  1132. // Open an existing group or create a new one.
  1133. //
  1134. group = OmCreateObject( ObjectTypeGroup,
  1135. GroupId,
  1136. NULL,
  1137. &Created);
  1138. if (group == NULL) {
  1139. return(NULL);
  1140. }
  1141. if (!Created) {
  1142. ClRtlLogPrint(LOG_NOISE,
  1143. "[FM] Opened existing group %1!ws!\n",
  1144. GroupId);
  1145. //this is the quorum group being recreated again,
  1146. if ((!FmpFMOnline) && (group->RegistryKey == NULL))
  1147. {
  1148. status = FmpInitializeGroup(group, Initialize);
  1149. }
  1150. OmDereferenceObject( group );
  1151. goto FnExit;
  1152. }
  1153. else
  1154. {
  1155. ClRtlLogPrint(LOG_NOISE,
  1156. "[FM] Creating group %1!ws!\n",
  1157. GroupId);
  1158. group->State = ClusterGroupOffline;
  1159. InitializeCriticalSection( &group->Lock );
  1160. group->dwStructState = FM_GROUP_STRUCT_CREATED;
  1161. //
  1162. // Insert the group into its list.
  1163. //
  1164. status = FmpInitializeGroup( group , Initialize);
  1165. if ( status != ERROR_SUCCESS ) {
  1166. goto FnExit;
  1167. }
  1168. //
  1169. // Insert the group into its list.
  1170. //
  1171. status = OmInsertObject( group );
  1172. if ( status != ERROR_SUCCESS ) {
  1173. goto FnExit;
  1174. }
  1175. }
  1176. FnExit:
  1177. if (status != ERROR_SUCCESS)
  1178. {
  1179. FmpAcquireLocalGroupLock( group );
  1180. FmpDestroyGroup( group, FALSE );
  1181. SetLastError(status);
  1182. group = NULL;
  1183. }
  1184. return(group);
  1185. } // FmpCreateGroup
  1186. DWORD FmpInitializeGroup(
  1187. IN PFM_GROUP Group,
  1188. IN BOOL Initialize
  1189. )
  1190. {
  1191. DWORD status;
  1192. //
  1193. // Initialize the Group
  1194. //
  1195. InitializeListHead( &(Group->Contains) );
  1196. InitializeListHead( &(Group->PreferredOwners) );
  1197. InitializeListHead( &(Group->DmRundownList) );
  1198. InitializeListHead( &(Group->WaitQueue) );
  1199. Group->MovingList = NULL;
  1200. //
  1201. // Read the registry information if directed to do so.
  1202. //
  1203. status = FmpQueryGroupInfo( Group, Initialize );
  1204. if ( status != ERROR_SUCCESS ) {
  1205. ClRtlLogPrint(LOG_NOISE,
  1206. "[FM] FmpInitializeGroup: FmpQueryGroupInfo failed, status=%1!u!\n",
  1207. status);
  1208. }
  1209. return(status);
  1210. }
  1211. DWORD
  1212. FmpDestroyGroup(
  1213. IN PFM_GROUP Group,
  1214. IN BOOL bDeleteObjOnly
  1215. )
  1216. /*++
  1217. Routine Description:
  1218. Closes a group.
  1219. First, this routine verifies that all resources contained within
  1220. the Group are closed.
  1221. If the group is online, it is brought offline.
  1222. Note that the group object itself is not dereferenced here. This is
  1223. done so that FmpCleanupGroups can simply enumerate all the groups,
  1224. destroying each one in turn. This approach means a group may be
  1225. destroyed multiple times if there are outstanding references to it, but
  1226. that is not a problem since no work will be done on subsequent calls.
  1227. IF bDeleteObjOnly is TRUE, then the resource monitor is not invoked and
  1228. group state is not touched.
  1229. Arguments:
  1230. FoundGroup - Returns the found group.
  1231. Group - Supplies the current group.
  1232. Name - Supplies the current group's name.
  1233. Return Value:
  1234. TRUE - to continue searching
  1235. FALSE - to stop the search. The matching group is returned in
  1236. *FoundGroup
  1237. Notes:
  1238. The LocalGroupLock MUST be held! This routine will release that lock
  1239. as part of cleanup.
  1240. --*/
  1241. {
  1242. PLIST_ENTRY listEntry;
  1243. PFM_RESOURCE Resource;
  1244. PPREFERRED_ENTRY preferredEntry;
  1245. DWORD status = ERROR_SUCCESS;
  1246. ClRtlLogPrint(LOG_NOISE,
  1247. "[FM] DestroyGroup: destroying %1!ws!\n",
  1248. OmObjectId(Group));
  1249. //
  1250. // Make sure there are no resources in the Group.
  1251. //
  1252. for ( listEntry = Group->Contains.Flink;
  1253. listEntry != &(Group->Contains);
  1254. ) {
  1255. Resource = CONTAINING_RECORD(listEntry, FM_RESOURCE, ContainsLinkage);
  1256. listEntry = listEntry->Flink;
  1257. RemoveEntryList( &Resource->ContainsLinkage );
  1258. //dereference for removing from the contains list
  1259. OmDereferenceObject( Resource );
  1260. FmpAcquireLocalResourceLock( Resource );
  1261. if (!bDeleteObjOnly)
  1262. Resource->QuorumResource = FALSE;
  1263. FmpDestroyResource( Resource, bDeleteObjOnly );
  1264. //the reference count on the group wrt to being
  1265. //referenced by the resource is handled in FmpDestroyResource
  1266. }
  1267. CL_ASSERT(IsListEmpty(&Group->Contains));
  1268. //
  1269. //
  1270. // Make sure the preferred owners list is drained.
  1271. //
  1272. while ( !IsListEmpty( &Group->PreferredOwners ) ) {
  1273. listEntry = RemoveHeadList(&Group->PreferredOwners);
  1274. preferredEntry = CONTAINING_RECORD( listEntry,
  1275. PREFERRED_ENTRY,
  1276. PreferredLinkage );
  1277. OmDereferenceObject( preferredEntry->PreferredNode );
  1278. LocalFree( preferredEntry );
  1279. }
  1280. //
  1281. // Now that there are no remaining resources in this group
  1282. // we're done, so remove it from it's object type list.
  1283. //
  1284. status = OmRemoveObject( Group );
  1285. //
  1286. // Close the Group's registry key.
  1287. //
  1288. DmRundownList( &Group->DmRundownList );
  1289. if ( Group->RegistryKey != NULL ) {
  1290. DmCloseKey( Group->RegistryKey );
  1291. Group->RegistryKey = NULL;
  1292. Group->Initialized = FALSE;
  1293. }
  1294. //
  1295. // We must release the lock prior to the dereference, in case this is
  1296. // the last dereference of the object!
  1297. //
  1298. FmpReleaseLocalGroupLock( Group );
  1299. ClRtlLogPrint(LOG_NOISE,
  1300. "[FM] FmpDestroyGroup: Group %1!ws! destroyed.\n",
  1301. OmObjectId(Group));
  1302. OmDereferenceObject( Group );
  1303. return(status);
  1304. } // FmpDestroyGroup
  1305. ///////////////////////////////////////////////////////////////////////////
  1306. //
  1307. // Initialization/Cleanup Routines
  1308. //
  1309. ///////////////////////////////////////////////////////////////////////////
  1310. DWORD
  1311. FmpInitGroups(
  1312. IN BOOL Initialize
  1313. )
  1314. /*++
  1315. Routine Description:
  1316. Processes the Cluster group list in the registry. For each
  1317. group key found, a cluster group is created.
  1318. Arguments:
  1319. Initialize - TRUE if resources should be initialized. FALSE otherwise.
  1320. Return Value:
  1321. ERROR_SUCCESS if successful.
  1322. A Win32 error code on failure.
  1323. --*/
  1324. {
  1325. DWORD status;
  1326. DWORD keyIndex = 0;
  1327. LPWSTR groupId = NULL;
  1328. DWORD groupIdMaxSize = 0;
  1329. PFM_GROUP ignored;
  1330. ClRtlLogPrint(LOG_NOISE,"[FM] Processing groups list.\n");
  1331. FmpAcquireGroupLock();
  1332. //
  1333. // Enumerate the subkeys. Each subkey name corresponds to a group name.
  1334. //
  1335. for (keyIndex = 0; ; keyIndex++) {
  1336. status = FmpRegEnumerateKey( DmGroupsKey,
  1337. keyIndex,
  1338. &groupId,
  1339. &groupIdMaxSize
  1340. );
  1341. if (status == NO_ERROR) {
  1342. ignored = FmpCreateGroup( groupId,
  1343. Initialize );
  1344. continue;
  1345. }
  1346. if (status == ERROR_NO_MORE_ITEMS) {
  1347. status = NO_ERROR;
  1348. } else {
  1349. ClRtlLogPrint(LOG_NOISE,"[FM] EnumGroup error %1!u!\n", status);
  1350. }
  1351. break;
  1352. }
  1353. FmpReleaseGroupLock();
  1354. ClRtlLogPrint(LOG_NOISE,"[FM] All groups created.\n");
  1355. if (groupId != NULL) {
  1356. LocalFree(groupId);
  1357. }
  1358. return(status);
  1359. } // FmpInitGroups
  1360. DWORD
  1361. FmpCompleteInitGroup(
  1362. IN PFM_GROUP Group
  1363. )
  1364. /*++
  1365. Routine Description:
  1366. Finish initialization of all resources within the group.
  1367. Arguments:
  1368. Group - The group to finish initializing.
  1369. Return Value:
  1370. ERROR_SUCCESS if successful.
  1371. A Win32 error code on failure.
  1372. --*/
  1373. {
  1374. PLIST_ENTRY listEntry;
  1375. PFM_RESOURCE Resource;
  1376. FmpAcquireLocalGroupLock(Group);
  1377. //
  1378. // For each resource in the Group, make sure that it has been fully
  1379. // initialized.
  1380. //
  1381. for ( listEntry = Group->Contains.Flink;
  1382. listEntry != &(Group->Contains);
  1383. listEntry = listEntry->Flink ) {
  1384. Resource = CONTAINING_RECORD(listEntry, FM_RESOURCE, ContainsLinkage);
  1385. FmpInitializeResource( Resource, TRUE );
  1386. }
  1387. FmpReleaseLocalGroupLock(Group);
  1388. return(ERROR_SUCCESS);
  1389. } // FmpCompleteInitGroup
  1390. DWORD
  1391. FmpCleanupGroupsWorker(
  1392. IN PFM_CLEANUP_INFO pFmCleanupInfo
  1393. )
  1394. /*++
  1395. Routine Description:
  1396. This routine walks through an enumerated list of all the groups
  1397. owned by the local node and tries to shut them down cleanly.
  1398. In the first phase it tries to bring
  1399. all resources offline except the quorum one.
  1400. In the second phase it waits for the group to reach stable state
  1401. and then move it. It tries to bring the quorum resource offline as
  1402. well by moving the quorum group.
  1403. Arguments:
  1404. pFmCleanupInfo - ptr to a strucuture containing the groups to be
  1405. offlined/moved and the timelimit in which to do so.
  1406. Returns:
  1407. None.
  1408. Assumptions:
  1409. --*/
  1410. {
  1411. DWORD Status = ERROR_SUCCESS;
  1412. DWORD i;
  1413. PFM_GROUP pGroup;
  1414. PGROUP_ENUM pGroupEnum;
  1415. BOOL bContainsQuorumGroup;
  1416. BOOL bQuorumGroup = FALSE;
  1417. DWORD CleanupStatus = ERROR_SUCCESS;
  1418. ClRtlLogPrint(LOG_NOISE,
  1419. "[FM] FmpCleanupGroupsWorker: Entry\r\n");
  1420. //
  1421. // This is done in two passes. In the first pass, we offline/move all
  1422. // resources except the quorum resource. In the second pass, we offline/move
  1423. // everything and then destroy the group. This allows resources that are
  1424. // being shutdown to write to the registry and have the updates logged to
  1425. // the quorum disk.
  1426. //
  1427. pGroupEnum = pFmCleanupInfo->pGroupEnum;
  1428. bContainsQuorumGroup = pFmCleanupInfo->bContainsQuorumGroup;
  1429. // Now offline all of the non-quorum resources...
  1430. // but don't wait for them to finish. I.E. get as much work done as
  1431. // possible as fast as possible.
  1432. //
  1433. for ( i = 0; i < pGroupEnum->EntryCount; i++ )
  1434. {
  1435. pGroup = OmReferenceObjectById( ObjectTypeGroup,
  1436. pGroupEnum->Entry[i].Id );
  1437. //try and offline all resources except the quorum
  1438. //resource
  1439. Status = FmpCleanupGroupPhase1(pGroup, pFmCleanupInfo->dwTimeOut);
  1440. if ((Status != ERROR_IO_PENDING) && (Status != ERROR_SUCCESS) &&
  1441. (Status != ERROR_QUORUM_RESOURCE))
  1442. CleanupStatus = Status;
  1443. OmDereferenceObject(pGroup);
  1444. }
  1445. //this finishes the second phase of the cleanup on shutdown
  1446. //if the quorum group is in this list, skip it and process it
  1447. //at the end
  1448. if (CleanupStatus == ERROR_SUCCESS)
  1449. {
  1450. for ( i = 0; i < pGroupEnum->EntryCount; i++ )
  1451. {
  1452. pGroup = OmReferenceObjectById( ObjectTypeGroup,
  1453. pGroupEnum->Entry[i].Id );
  1454. if (gpQuoResource->Group == pGroup)
  1455. {
  1456. ClRtlLogPrint(LOG_NOISE,
  1457. "[FM] FmpCleanupGroupsWorker: Quorum group belongs to this node, process phase 2 later\r\n");
  1458. bQuorumGroup = TRUE;
  1459. OmDereferenceObject(pGroup);
  1460. continue;
  1461. }
  1462. //try and offline all groups, including the quorum resource
  1463. //also try and move the resource to other nodes
  1464. Status = FmpCleanupGroupPhase2(pGroup);
  1465. OmDereferenceObject(pGroup);
  1466. }
  1467. if (bQuorumGroup)
  1468. Status = FmpCleanupGroupPhase2(gpQuoResource->Group);
  1469. }
  1470. else
  1471. {
  1472. //phase 1 didnt work for some reason
  1473. //try and offline the quorum resource alone.
  1474. //TODO::Should we also terminate all resources
  1475. // No way to terminate services ???
  1476. if (bContainsQuorumGroup)
  1477. FmpCleanupQuorumResource(gpQuoResource);
  1478. }
  1479. return(Status);
  1480. } // FmpCleanupGroupsWorker
  1481. DWORD
  1482. FmpCleanupGroupPhase1(
  1483. IN PFM_GROUP Group,
  1484. IN DWORD dwTimeOut
  1485. )
  1486. /*++
  1487. Routine Description:
  1488. This routine is the first phase for clean up all groups owned by the node
  1489. on shutdown.
  1490. In this phase, we try and bring all resources offline except the quorum
  1491. resource. In this phase we dont block for the resources to reach a stable
  1492. state
  1493. We give the group the shutdown timeout specified for the cluster
  1494. to reach a stable state before we try to offline it. If it doesnt
  1495. reach a stable state in this period then we shut it down abruptly.
  1496. Arguments:
  1497. Group - The Group to offline.
  1498. Returns:
  1499. ERROR_SUCCESS if successful.
  1500. A Win32 error code on failure.
  1501. --*/
  1502. {
  1503. DWORD Status = ERROR_SUCCESS;
  1504. DWORD dwRetryCount = (2 * dwTimeOut)/1000;//we check after every 1/2 sec
  1505. ClRtlLogPrint(LOG_NOISE,
  1506. "[FM] FmpCleanupGroupsPhase1: Entry, Group = %1!ws!\r\n",
  1507. OmObjectId(Group));
  1508. ChkGroupState:
  1509. FmpAcquireLocalGroupLock( Group );
  1510. //
  1511. // Just offline the group
  1512. //
  1513. if ( Group->OwnerNode == NmLocalNode )
  1514. {
  1515. //
  1516. // Make sure the group is quiet
  1517. //
  1518. if ( !FmpIsGroupQuiet( Group, ClusterGroupOffline ) )
  1519. {
  1520. FmpReleaseLocalGroupLock( Group );
  1521. ClRtlLogPrint(LOG_NOISE,
  1522. "[FM] FmpCleanupGroupsPhase1: Group is not quiet, wait\r\n");
  1523. //we give it a minute to recover totally
  1524. Sleep(500);
  1525. if (dwRetryCount--)
  1526. goto ChkGroupState;
  1527. else
  1528. {
  1529. Status = ERROR_REQUEST_ABORTED;
  1530. goto FnExit;
  1531. }
  1532. }
  1533. //
  1534. // Just take the group offline. Don't wait, don't pass go...
  1535. //
  1536. // Dont take the quorum resource offline in phase 1
  1537. // The quorum resource must be the last one to be taken offline
  1538. Status = FmpOfflineGroup(Group, FALSE, FALSE);
  1539. }
  1540. FmpReleaseLocalGroupLock( Group );
  1541. FnExit:
  1542. ClRtlLogPrint(LOG_NOISE,
  1543. "[FM] FmpCleanupGroupsPhase1: Exit, status=%1!u!\r\n",
  1544. Status);
  1545. return(Status);
  1546. } // FmpCleanupGroupsPhase1
  1547. DWORD
  1548. FmpCleanupGroupPhase2(
  1549. IN PFM_GROUP Group
  1550. )
  1551. /*++
  1552. Routine Description:
  1553. This routine is the second phase for clean up all groups owned by the node
  1554. on shutdown.
  1555. In this phase, we try and bring all resources offline including the quorum
  1556. resource. We also try to move the quorum resource
  1557. We give the group 10 seconds to reach a stable state before we try to
  1558. move it.
  1559. Arguments:
  1560. Group - The Group to offline.
  1561. Returns:
  1562. ERROR_SUCCESS if successful.
  1563. A Win32 error code on failure.
  1564. --*/
  1565. {
  1566. DWORD Status = ERROR_SUCCESS;
  1567. DWORD dwRetryCount= 120 * 12;
  1568. ClRtlLogPrint(LOG_NOISE,
  1569. "[FM] FmpCleanupGroupsPhase2: Entry, Group = %1!ws!\r\n",
  1570. OmObjectId(Group));
  1571. FmpAcquireLocalGroupLock( Group );
  1572. //
  1573. // Try to move the Group before destroying it if we own it.
  1574. //
  1575. if ( Group->OwnerNode == NmLocalNode )
  1576. {
  1577. //
  1578. // First make sure the group is really offline.
  1579. // In phase 1 we began the offline process... we need to check it here.
  1580. //
  1581. WaitSomeMore:
  1582. //
  1583. // [GorN] [10/05/1999]
  1584. // We need to wait for the quorum to go offline, otherwise
  1585. // the surviving node will not be able to arbitrate.
  1586. //
  1587. // FmpWaitForGroup keeps issuing RmOffline for the quorum,
  1588. // resrcmon returns ERROR_INVALID_STATE, for the second offline,
  1589. // since offline is already in progress.
  1590. //
  1591. // This causes us to break out of this look while the quorum resource
  1592. // is still being offline.
  1593. //
  1594. // [HACKHACK] The following fix for the problem is a hack.
  1595. // It would be better either to make resmon return IO_PENDING when
  1596. // somebody is trying to offline the resource that is in offline pending
  1597. //
  1598. // Or not to call FmRmOffline the second time in FM.
  1599. //
  1600. Status = FmpOfflineGroup(Group, TRUE, FALSE);
  1601. if (Status == ERROR_IO_PENDING ||
  1602. (Status == ERROR_INVALID_STATE
  1603. && Group == gpQuoResource->Group) )
  1604. {
  1605. //FmpWaitForGroup() will release the lock
  1606. Status = FmpWaitForGroup(Group);
  1607. ClRtlLogPrint(LOG_NOISE,
  1608. "[FM] FmpCleanupGroupsPhase2: Sleep and retry\r\n");
  1609. Sleep(2*1000);
  1610. //Reacquire the group lock and check if the group is offline
  1611. FmpAcquireLocalGroupLock(Group);
  1612. if (dwRetryCount--)
  1613. goto WaitSomeMore;
  1614. }
  1615. else if (Status != ERROR_SUCCESS)
  1616. {
  1617. goto FnExit;
  1618. }
  1619. else
  1620. {
  1621. // The Move routine frees the LocalGroupLock!
  1622. FmpMoveGroup( Group, NULL, TRUE, NULL, TRUE );
  1623. FmpAcquireLocalGroupLock( Group );
  1624. }
  1625. }
  1626. FnExit:
  1627. FmpReleaseLocalGroupLock(Group);
  1628. ClRtlLogPrint(LOG_NOISE,
  1629. "[FM] FmpCleanupGroupsPhase2: Exit\n");
  1630. return(TRUE);
  1631. } // FmpCleanupGroupsPhase2
  1632. BOOL
  1633. FmpEnumNodeState(
  1634. OUT DWORD *pStatus,
  1635. IN PVOID Context2,
  1636. IN PNM_NODE Node,
  1637. IN LPCWSTR Name
  1638. )
  1639. /*++
  1640. Routine Description:
  1641. Node enumeration callback for FM shutdown. Queries the state
  1642. of other nodes to see if any are up.
  1643. Arguments:
  1644. pStatus - Returns TRUE if other node is up.
  1645. Context2 - Not used
  1646. Node - Supplies the node.
  1647. Name - Supplies the node's name.
  1648. Return Value:
  1649. TRUE - to indicate that the enumeration should continue.
  1650. FALSE - to indicate that the enumeration should not continue.
  1651. --*/
  1652. {
  1653. DWORD Status;
  1654. DWORD NodeId;
  1655. PGROUP_ENUM NodeGroups = NULL;
  1656. PRESOURCE_ENUM NodeResources = NULL;
  1657. DWORD i;
  1658. PFM_GROUP Group;
  1659. PFM_RESOURCE Resource;
  1660. if (Node == NmLocalNode) {
  1661. return(TRUE);
  1662. }
  1663. //
  1664. // Enumerate all other node's group states. This includes all nodes
  1665. // that are up, as well as nodes that are paused.
  1666. //
  1667. if ((NmGetNodeState(Node) == ClusterNodeUp) ||
  1668. (NmGetNodeState(Node) == ClusterNodePaused)){
  1669. *pStatus = TRUE;
  1670. return(FALSE);
  1671. }
  1672. return(TRUE);
  1673. } // FmpEnumNodeState
  1674. VOID
  1675. FmpCleanupGroups(
  1676. IN BOOL ClusterShutDownEvent
  1677. )
  1678. /*++
  1679. Routine Description:
  1680. This routine kicks off the cleanup of the FM layer.
  1681. Arguments:
  1682. None.
  1683. Returns:
  1684. None.
  1685. --*/
  1686. {
  1687. DWORD Status;
  1688. DWORD dwTimeOut;
  1689. DWORD dwDefaultTimeOut;
  1690. HANDLE hCleanupThread;
  1691. DWORD otherNodesUp = FALSE;
  1692. DWORD dwThreadId;
  1693. DWORD i,dwTimeOutCount;
  1694. PGROUP_ENUM pGroupEnum;
  1695. BOOL bQuorumGroup = FALSE;
  1696. PFM_CLEANUP_INFO pFmCleanupInfo;
  1697. ClRtlLogPrint(LOG_NOISE,
  1698. "[FM] FmpCleanupGroups: Entry\r\n");
  1699. //
  1700. // If we don't know the quorum resource or we are not online,
  1701. // then leave immediately
  1702. //
  1703. if ( !gpQuoResource ) {
  1704. goto FnExit;
  1705. }
  1706. ACQUIRE_EXCLUSIVE_LOCK(gQuoChangeLock);
  1707. //if this is called when fmformphaseprocessing is going on
  1708. //then the quorum group doesnt exist, other groups dont exist
  1709. //either
  1710. if (FmpFMFormPhaseProcessing)
  1711. FmpCleanupQuorumResource(gpQuoResource);
  1712. else
  1713. CL_ASSERT(gpQuoResource->Group != NULL)
  1714. RELEASE_LOCK(gQuoChangeLock);
  1715. //
  1716. // Find and sort all known groups, hold the group lock while enumerating
  1717. //
  1718. FmpAcquireGroupLock();
  1719. Status = FmpEnumSortGroups(&pGroupEnum, NmLocalNode, &bQuorumGroup);
  1720. FmpReleaseGroupLock();
  1721. if (Status != ERROR_SUCCESS) {
  1722. goto FnExit;
  1723. }
  1724. //
  1725. // See if any other node in the cluster is up...
  1726. // If so, we will use the default timeout value.
  1727. // Otherwise, we will use what we believe is a more reasonable time.
  1728. //
  1729. OmEnumObjects( ObjectTypeNode,
  1730. FmpEnumNodeState,
  1731. &otherNodesUp,
  1732. NULL );
  1733. dwDefaultTimeOut = CLUSTER_SHUTDOWN_TIMEOUT * 60; // default timeout (secs)
  1734. switch ( CsShutdownRequest ) {
  1735. case CsShutdownTypeShutdown:
  1736. if ( otherNodesUp ) {
  1737. dwTimeOut = 15; // other node will time us out quickly - say 15 secs
  1738. } else {
  1739. dwTimeOut = 30; // otherwise use 30 seconds
  1740. }
  1741. break;
  1742. default:
  1743. // apply default value to registry
  1744. dwDefaultTimeOut = CLUSTER_SHUTDOWN_TIMEOUT; // default timeout (mins)
  1745. Status = DmQueryDword( DmClusterParametersKey,
  1746. CLUSREG_NAME_CLUS_SHUTDOWN_TIMEOUT,
  1747. &dwTimeOut,
  1748. &dwDefaultTimeOut);
  1749. dwTimeOut *= 60; // convert to secs.
  1750. break;
  1751. }
  1752. //convert to msecs
  1753. dwTimeOut *= 1000;
  1754. pFmCleanupInfo = (PFM_CLEANUP_INFO)LocalAlloc(LMEM_FIXED, sizeof(FM_CLEANUP_INFO));
  1755. if (!pFmCleanupInfo)
  1756. {
  1757. Status = ERROR_NOT_ENOUGH_MEMORY;
  1758. goto FnExit;
  1759. }
  1760. pFmCleanupInfo->pGroupEnum = pGroupEnum;
  1761. pFmCleanupInfo->dwTimeOut = dwTimeOut; //in msecs
  1762. pFmCleanupInfo->bContainsQuorumGroup = bQuorumGroup;
  1763. //
  1764. // Start the worker thread to perform cleanup.
  1765. //
  1766. hCleanupThread = CreateThread( NULL,
  1767. 0,
  1768. FmpCleanupGroupsWorker,
  1769. pFmCleanupInfo,
  1770. 0,
  1771. &dwThreadId );
  1772. if ( hCleanupThread == NULL ) {
  1773. //SS: if we own the quorum resource should we cleanup the quorum resource
  1774. //this will avoid corruption
  1775. if (bQuorumGroup)
  1776. FmpCleanupQuorumResource(gpQuoResource);
  1777. goto FnExit;
  1778. }
  1779. // Rohit (rjain): This path is taken when Cluster Service is shutting
  1780. // down. ServiceStatus checkpoint is incremented after every WaitHint
  1781. // units of time. For this the waiting period of dwTimeOut is divided into
  1782. // multiple waiting periods of dwWaitHint units each.
  1783. if((ClusterShutDownEvent==TRUE) && (dwTimeOut > CsServiceStatus.dwWaitHint))
  1784. {
  1785. dwTimeOutCount=dwTimeOut/CsServiceStatus.dwWaitHint;
  1786. ClRtlLogPrint(LOG_ERROR,
  1787. "[FM] FmpCleanupGroups: dwTimeOut=%1!u! dwTimoutCount=%2!u! waithint =%3!u! \r\n",
  1788. dwTimeOut,dwTimeOutCount, CsServiceStatus.dwWaitHint);
  1789. for(i=0;i<dwTimeOutCount;i++){
  1790. Status = WaitForSingleObject(hCleanupThread, CsServiceStatus.dwWaitHint);
  1791. switch(Status) {
  1792. case WAIT_OBJECT_0:
  1793. //everything is fine
  1794. ClRtlLogPrint(LOG_NOISE,
  1795. "[FM] FmpCleanupGroups: Cleanup thread finished in time\r\n");
  1796. break;
  1797. case WAIT_TIMEOUT:
  1798. //should we terminate the thread
  1799. //try and clean up the quorum resource
  1800. //this will avoid corruption on the quorum disk
  1801. //TODO::Should we also terminate all resources
  1802. // No way to terminate services ???
  1803. if(i == (dwTimeOutCount-1)){
  1804. ClRtlLogPrint(LOG_UNUSUAL,
  1805. "[FM] FmpCleanupGroups: Timed out on the CleanupThread\r\n");
  1806. if (bQuorumGroup)
  1807. FmpCleanupQuorumResource(gpQuoResource);
  1808. }
  1809. break;
  1810. case WAIT_FAILED:
  1811. ClRtlLogPrint(LOG_UNUSUAL,
  1812. "[DM] FmpCleanupGroups: wait on CleanupEvent failed 0x%1!08lx!\r\n",
  1813. GetLastError());
  1814. break;
  1815. }
  1816. if(Status== WAIT_OBJECT_0 || Status==WAIT_FAILED)
  1817. break;
  1818. CsServiceStatus.dwCheckPoint++;
  1819. CsAnnounceServiceStatus();
  1820. }
  1821. goto FnExit;
  1822. }
  1823. //
  1824. // Wait for the thread to complete or a timeout.
  1825. //
  1826. Status = WaitForSingleObject(hCleanupThread, dwTimeOut);
  1827. switch(Status) {
  1828. case WAIT_OBJECT_0:
  1829. //everything is fine
  1830. ClRtlLogPrint(LOG_NOISE,
  1831. "[FM] FmpCleanupGroups: Cleanup thread finished in time\r\n");
  1832. break;
  1833. case WAIT_TIMEOUT:
  1834. //should we terminate the thread
  1835. //try and clean up the quorum resource
  1836. //this will avoid corruption on the quorum disk
  1837. //TODO::Should we also terminate all resources
  1838. // No way to terminate services ???
  1839. ClRtlLogPrint(LOG_UNUSUAL,
  1840. "[FM] FmpCleanupGroups: Timed out on the CleanupThread\r\n");
  1841. if (bQuorumGroup)
  1842. FmpCleanupQuorumResource(gpQuoResource);
  1843. break;
  1844. case WAIT_FAILED:
  1845. ClRtlLogPrint(LOG_UNUSUAL,
  1846. "[DM] FmpCleanupGroups: wait on CleanupEvent failed 0x%1!08lx!\r\n",
  1847. GetLastError());
  1848. break;
  1849. }
  1850. FnExit:
  1851. //SS: dont bother cleaning up, we are going to exit after this
  1852. #if 0
  1853. if (pGroupEnum) LocalFree(GroupEnum);
  1854. #endif
  1855. ClRtlLogPrint(LOG_NOISE,
  1856. "[FM] FmpCleanupGroups: Exit\r\n");
  1857. return;
  1858. } // FmpCleanupGroups
  1859. DWORD
  1860. FmpCleanupQuorumResource(
  1861. IN PFM_RESOURCE Resource
  1862. )
  1863. /*++
  1864. Routine Description:
  1865. This routine is for emergency clean up of the quorum resource.
  1866. In this phase, we dont try and acquire any locks. We just try to
  1867. bring the quorum resource offline. Hopefully the api is offline and
  1868. nothing funky is attempted on the quorum group/resource during this
  1869. time. This should only be called during the shutdown of FM.
  1870. Arguments:
  1871. Group - The Group to offline.
  1872. Returns:
  1873. ERROR_SUCCESS if successful.
  1874. A Win32 error code on failure.
  1875. --*/
  1876. {
  1877. DWORD status = ERROR_SUCCESS;
  1878. DWORD state;
  1879. ClRtlLogPrint(LOG_NOISE,
  1880. "[FM] FmpCleanupQuorum: Offline resource <%1!ws!> <%2!ws!>\n",
  1881. OmObjectName(Resource),
  1882. OmObjectId(Resource) );
  1883. //
  1884. // If the resource is already offline, then return immediately.
  1885. //
  1886. // We should not have to check if a resource has been initialized,
  1887. // since if it hasn't, then we will return because the pre-initialized
  1888. // state of a resource is Offline.
  1889. //
  1890. if ( Resource->State == ClusterResourceOffline ) {
  1891. //
  1892. // If this is the quorum resource, make sure any reservation
  1893. // threads are stopped!
  1894. //
  1895. FmpRmTerminateResource( Resource );
  1896. return(ERROR_SUCCESS);
  1897. }
  1898. if (Resource->State > ClusterResourcePending ) {
  1899. ClRtlLogPrint(LOG_NOISE,
  1900. "[FM] FmpCleanupQuorum: Offline resource <%1!ws!> is in pending state\n",
  1901. OmObjectName(Resource) );
  1902. FmpRmTerminateResource( Resource );
  1903. return(ERROR_SUCCESS);
  1904. }
  1905. //make sure the quorum logs can be flushed and closed
  1906. OmNotifyCb(Resource, NOTIFY_RESOURCE_PREOFFLINE);
  1907. //it may not be prudent to call offline without holding any locks
  1908. //just call terminate
  1909. FmpRmTerminateResource( Resource );
  1910. ClRtlLogPrint(LOG_NOISE,
  1911. "[FM] FmpCleanupQuorum: RmOfflineResource returns %1!u!\r\n",
  1912. status);
  1913. return(status);
  1914. }
  1915. DWORD
  1916. FmpMoveGroup(
  1917. IN PFM_GROUP Group,
  1918. IN PNM_NODE DestinationNode OPTIONAL,
  1919. IN BOOL ShutdownHandler,
  1920. OUT PNM_NODE *pChosenDestinationNode OPTIONAL,
  1921. IN BOOL bChooseMostPreferredNode
  1922. )
  1923. /*++
  1924. Routine Description:
  1925. Move the specified Group. This means taking all of the individual
  1926. resources contained within the group offline and requesting the
  1927. DestinationNode to bring the Group Online.
  1928. Arguments:
  1929. Group - Supplies a pointer to the group structure to move.
  1930. DestinationNode - Supplies the node object to move the group to. If not
  1931. present, then move it to 'highest' entry in the preferred list.
  1932. ShutdownHandler - TRUE if the shutdown handler is invoking this function.
  1933. pChosenDestinationNode - Set to the destination node of the move and
  1934. will be passed on to FmpCompleteMoveGroup, if necessary.
  1935. bChooseMostPreferredNode - If the destination node is not supplied,
  1936. indicates whether to choose the most preferred node or not.
  1937. Returns:
  1938. ERROR_SUCCESS if the request was successful.
  1939. A Win32 error code on failure.
  1940. Notes:
  1941. It is assumed that the Group and all contained resources are offline
  1942. from the requesting node when this call returns. The Group may or
  1943. may not be online on the DestinationNode, depending on whether the
  1944. online request succeeded. This means that the status return is merely
  1945. the status return for the Online request for the DestinationNode.
  1946. The LocalGroupLock MUST also be held. The LocalGroupLock is released
  1947. by this routine.
  1948. --*/
  1949. {
  1950. PNM_NODE node;
  1951. DWORD status;
  1952. PFM_RESOURCE resource;
  1953. PLIST_ENTRY listEntry;
  1954. PRESOURCE_ENUM resourceList=NULL;
  1955. DWORD dwMoveStatus = ERROR_SUCCESS;
  1956. ClRtlLogPrint(LOG_NOISE,
  1957. "[FM] FmpMoveGroup: Entry\r\n");
  1958. if ( !ShutdownHandler )
  1959. {
  1960. if ( !FmpFMOnline )
  1961. {
  1962. status = ERROR_CLUSTER_NODE_NOT_READY;
  1963. goto FnExit;
  1964. }
  1965. if ( FmpShutdown )
  1966. {
  1967. status = ERROR_SHUTDOWN_IN_PROGRESS;
  1968. goto FnExit;
  1969. }
  1970. }
  1971. //
  1972. // See which system owns the group in order to control the move request.
  1973. //
  1974. if ( Group->OwnerNode != NmLocalNode )
  1975. {
  1976. if ( Group->OwnerNode == NULL )
  1977. {
  1978. status = ERROR_HOST_NODE_NOT_AVAILABLE;
  1979. goto FnExit;
  1980. }
  1981. //
  1982. // The other system owns the Group ... let them do the work.
  1983. //
  1984. ClRtlLogPrint(LOG_NOISE,
  1985. "[FM] FmpMoveGroup: Request node %1!ws! to move Group %2!ws!\n",
  1986. OmObjectId(Group->OwnerNode),
  1987. OmObjectId(Group));
  1988. // FmcMoveGroupRequest must release the Group lock.
  1989. status = FmcMoveGroupRequest( Group,
  1990. DestinationNode );
  1991. if ( status != ERROR_SUCCESS )
  1992. {
  1993. ClRtlLogPrint(LOG_NOISE,
  1994. "[FM] FmpMoveGroup: Requested system %1!ws! to move group %2!ws! failed with status %3!u!.\n",
  1995. OmObjectId(Group->OwnerNode),
  1996. OmObjectId(Group),
  1997. status);
  1998. }
  1999. FmpAcquireLocalGroupLock( Group );
  2000. goto FnExit;
  2001. }
  2002. else
  2003. {
  2004. //
  2005. // We control the move.
  2006. //
  2007. if ( !FmpIsGroupQuiet(Group, ClusterGroupStateUnknown) )
  2008. {
  2009. //
  2010. // If a move is pending or resources are pending,
  2011. // then return now.
  2012. //
  2013. ClRtlLogPrint(LOG_NOISE,
  2014. "[FM] FmpMoveGroup: Request to move group <%1!ws!> when it is busy.\n",
  2015. OmObjectName(Group) );
  2016. status = ERROR_INVALID_STATE;
  2017. goto FnExit;
  2018. }
  2019. if ( ARGUMENT_PRESENT( DestinationNode ) )
  2020. {
  2021. //
  2022. // Check if we are the destination... if so, we're done.
  2023. //
  2024. if ( NmLocalNode == DestinationNode )
  2025. {
  2026. status = ERROR_SUCCESS;
  2027. goto FnExit;
  2028. }
  2029. node = DestinationNode;
  2030. }
  2031. else
  2032. {
  2033. node = FmpFindAnotherNode( Group, bChooseMostPreferredNode );
  2034. if ( node == NULL )
  2035. {
  2036. status = ERROR_HOST_NODE_NOT_AVAILABLE;
  2037. goto FnExit;
  2038. }
  2039. }
  2040. if ( ARGUMENT_PRESENT ( pChosenDestinationNode ) )
  2041. {
  2042. *pChosenDestinationNode = node;
  2043. }
  2044. ClRtlLogPrint(LOG_NOISE,
  2045. "[FM] FmpMoveGroup: Moving group %1!ws! to node %2!ws! (%3!d!)\n",
  2046. OmObjectId(Group),
  2047. OmObjectId(node),
  2048. NmGetNodeId(node));
  2049. //
  2050. // If the other system is not up, then fail now.
  2051. //
  2052. if ( NmGetExtendedNodeState(node) != ClusterNodeUp )
  2053. {
  2054. status = ERROR_HOST_NODE_NOT_AVAILABLE;
  2055. goto FnExit;
  2056. }
  2057. //
  2058. // If the other system is not in the preferred list, then fail this
  2059. // now.
  2060. //
  2061. if ( !FmpInPreferredList( Group, node, TRUE, NULL) )
  2062. {
  2063. status = ERROR_CLUSTER_NODE_NOT_FOUND;
  2064. goto FnExit;
  2065. }
  2066. //
  2067. // Get the list of resources in the group and their states.
  2068. //
  2069. status = FmpGetResourceList( &resourceList, Group );
  2070. if ( status != ERROR_SUCCESS )
  2071. {
  2072. goto FnExit;
  2073. }
  2074. Group->MovingList = resourceList;
  2075. //
  2076. // At this point the other system should be up!
  2077. //
  2078. status = FmpOfflineResourceList( resourceList, TRUE );
  2079. //SS: avoid the window when the group lock is released
  2080. //and the moving flag is not set true
  2081. //moving will be continued in another thread context if pending is
  2082. //returned
  2083. if ( status != ERROR_SUCCESS )
  2084. {
  2085. goto FnRestore;
  2086. }
  2087. // for now make sure that the group state is propagated here
  2088. // In general it is propagated by the worker thread. Since
  2089. // the ownership is going to change, we want to make sure that the
  2090. // last known state is propagated from this node to others before
  2091. // that.
  2092. FmpPropagateGroupState(Group);
  2093. //
  2094. // Assume the other node is going to take ownership. This is done
  2095. // before, in case the Group state changes. We want to accept the
  2096. // Group/resource state changes from the remote system when they
  2097. // arrive. We've already verified that node is in the preferred list!
  2098. //
  2099. TESTPT(TpFailPreMoveWithNodeDown)
  2100. {
  2101. ClusterEvent( CLUSTER_EVENT_NODE_DOWN, node );
  2102. }
  2103. //
  2104. // Chittur Subbaraman (chitturs) - 5/18/99
  2105. //
  2106. // Modified to handle the move group request of a quorum group in
  2107. // case the destination node could not arbitrate for the quorum
  2108. // resource.
  2109. //
  2110. do
  2111. {
  2112. //
  2113. // Before making the RPC, set the intended owner of the group
  2114. //
  2115. FmpSetIntendedOwnerForGroup( Group, NmGetNodeId( node ) );
  2116. try {
  2117. ClRtlLogPrint(LOG_NOISE,
  2118. "[FM] FmpMoveGroup: Take group %2!ws! request to remote node %1!ws!\n",
  2119. OmObjectId(node),
  2120. OmObjectId(Group));
  2121. dwMoveStatus = status = FmcTakeGroupRequest( node, OmObjectId( Group ), resourceList );
  2122. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  2123. LPCWSTR pszNodeId;
  2124. LPCWSTR pszGroupId;
  2125. status = GetExceptionCode ();
  2126. ClRtlLogPrint(LOG_NOISE,
  2127. "[FM] FmpMoveGroup: Exception in FmcTakeGroupRequest %2!ws! request to remote node %1!ws!, status=%3!u!\n",
  2128. OmObjectId(node),
  2129. OmObjectId(Group),
  2130. status);
  2131. //
  2132. // An exception from RPC indicates that the other node is either dead
  2133. // or insane. We dont know whether it took ownership or not.
  2134. // So, let the FM node down handler handle the group.
  2135. //
  2136. GumCommFailure( GumUpdateFailoverManager,
  2137. NmGetNodeId(node),
  2138. GetExceptionCode(),
  2139. TRUE );
  2140. //
  2141. // The new owner node that is now dead might have set the intended
  2142. // owner as NULL or it might not have set this. It might have
  2143. // set the owner node to himself or might not have.
  2144. // If it has set the owner node for this group as himself, then
  2145. // the FM node down handler will assume responsibility for this
  2146. // group. If the target node dies before it sets himself as the owner,
  2147. // then again, the FM node down handler will assume responsibility
  2148. // for the group. We wake up when the gum sync handling is over.
  2149. // Right now, the gum update for the owner node may still be in
  2150. // progress so we cant be sure if that update was completed on
  2151. // all nodes.
  2152. //
  2153. //
  2154. // Chittur Subbaraman (chitturs) - 6/7/99
  2155. //
  2156. // Issue a GUM update to handle this group. Using this
  2157. // GUM update prevents any race condition with the
  2158. // node down processing code.
  2159. //
  2160. // TODO: This does not cover the case in which
  2161. // FmpTakeGroupRequest crashes after setting the
  2162. // intended owner to invalid ID. In such a case,
  2163. // the following handler won't take ownership of the
  2164. // group. Also, claim handler will not touch the
  2165. // group.
  2166. //
  2167. pszNodeId = OmObjectId( node );
  2168. pszGroupId = OmObjectId( Group );
  2169. GumSendUpdateEx( GumUpdateFailoverManager,
  2170. FmUpdateCompleteGroupMove,
  2171. 2,
  2172. (lstrlenW(pszNodeId)+1)*sizeof(WCHAR),
  2173. pszNodeId,
  2174. (lstrlenW(pszGroupId)+1)*sizeof(WCHAR),
  2175. pszGroupId);
  2176. status = ERROR_HOST_NODE_NOT_AVAILABLE;
  2177. goto FnExit;
  2178. }
  2179. if ( status == ERROR_RETRY )
  2180. {
  2181. //
  2182. // The destination refused to take the quorum group since it
  2183. // did not win the arbitration. So let us see who won the
  2184. // arbitration.
  2185. //
  2186. DWORD dwSelectedQuorumOwnerId;
  2187. CL_ASSERT( Group == gpQuoResource->Group );
  2188. ClRtlLogPrint(LOG_NOISE,
  2189. "[FM] FmpMoveGroup: Remote node asked us to resend take group request for group %1!ws! to another node ...\n",
  2190. OmObjectId( Group ));
  2191. //
  2192. // Get the ID of the node which the MM believes is the best
  2193. // candidate to own the quorum resource. This is a call that
  2194. // blocks while RGP is in progress.
  2195. //
  2196. MMApproxArbitrationWinner( &dwSelectedQuorumOwnerId );
  2197. if ( ( dwSelectedQuorumOwnerId == NmGetNodeId( NmLocalNode ) ) ||
  2198. ( dwSelectedQuorumOwnerId == MM_INVALID_NODE ) )
  2199. {
  2200. //
  2201. // The local node is chosen by MM or no node is chosen by
  2202. // the MM. The latter case will happen if no RGP has
  2203. // occurred at the time this call is made. Let us see if we
  2204. // can arbitrate for the quorum resource.
  2205. //
  2206. status = FmpRmArbitrateResource( gpQuoResource );
  2207. if ( status != ERROR_SUCCESS )
  2208. {
  2209. //
  2210. // Too bad. We will halt and let FmpNodeDown handler
  2211. // handle the quorum group.
  2212. //
  2213. ClRtlLogPrint(LOG_CRITICAL,
  2214. "[FM] FmpMoveGroup: Local node %1!u! cannot arbitrate for quorum, Status = %1!u!...\n",
  2215. dwSelectedQuorumOwnerId,
  2216. status);
  2217. CsInconsistencyHalt( ERROR_QUORUM_RESOURCE_ONLINE_FAILED );
  2218. }
  2219. status = ERROR_RETRY;
  2220. break;
  2221. }
  2222. node = NmReferenceNodeById( dwSelectedQuorumOwnerId );
  2223. if ( node == NULL )
  2224. {
  2225. ClRtlLogPrint(LOG_CRITICAL,
  2226. "[FM] FmpMoveGroup: Selected node %1!u! cannot be referenced...\n",
  2227. dwSelectedQuorumOwnerId);
  2228. CsInconsistencyHalt( ERROR_QUORUM_RESOURCE_ONLINE_FAILED );
  2229. }
  2230. } // if
  2231. } while ( status == ERROR_RETRY );
  2232. TESTPT(TpFailPostMoveWithNodeDown)
  2233. {
  2234. ClusterEvent( CLUSTER_EVENT_NODE_DOWN, node );
  2235. }
  2236. CL_ASSERT( status != ERROR_IO_PENDING );
  2237. if ( status != ERROR_SUCCESS )
  2238. {
  2239. ClRtlLogPrint(LOG_NOISE,
  2240. "[FM] FmpMoveGroup: FmcTakeGroupRequest to node %1!ws! to take group %2!ws! failed, status %3!u!.\n",
  2241. OmObjectId(node),
  2242. OmObjectId(Group),
  2243. status );
  2244. goto FnRestore;
  2245. }
  2246. //
  2247. // If the group is empty, then generate a Group state change event.
  2248. //
  2249. if ( IsListEmpty( &Group->Contains ) )
  2250. {
  2251. ClusterWideEvent( CLUSTER_EVENT_GROUP_OFFLINE,
  2252. Group );
  2253. }
  2254. }
  2255. FnRestore:
  2256. if ((status != ERROR_SUCCESS) && (status != ERROR_IO_PENDING))
  2257. {
  2258. //
  2259. // Chittur Subbaraman (chitturs) - 3/22/2000
  2260. //
  2261. // Reset the group's intended owner to invalid node ID if the
  2262. // node down handler did not do that.
  2263. //
  2264. if ( dwMoveStatus != ERROR_SUCCESS )
  2265. {
  2266. if ( FmpSetIntendedOwnerForGroup( Group, ClusterInvalidNodeId )
  2267. == ERROR_CLUSTER_INVALID_NODE )
  2268. {
  2269. ClRtlLogPrint(LOG_NOISE,
  2270. "[FM] FmpMoveGroup: Group <%1!ws!> has already been processed by node down handler....\r\n",
  2271. OmObjectName(Group));
  2272. goto FnExit;
  2273. }
  2274. }
  2275. // the move failed
  2276. // In all failure cases we want to bring the resources
  2277. // back online
  2278. // if it is pending, then we let FmpCompleteMoveGroup finish
  2279. // the work
  2280. if (resourceList)
  2281. {
  2282. //
  2283. // Terminate all of the resources in the group.
  2284. //
  2285. FmpTerminateResourceList( resourceList );
  2286. //
  2287. // Chittur Subbaraman (chitturs) - 4/10/2000
  2288. //
  2289. // Make sure to online the quorum group even if this node is
  2290. // shutting down. This is necessary so that other groups
  2291. // can be brought offline during this node's shutdown. Note
  2292. // that FmpOnlineResourceList would only online a group
  2293. // during a shutdown if the group is the quorum group.
  2294. //
  2295. if ( FmpFMGroupsInited )
  2296. FmpOnlineResourceList( resourceList, Group );
  2297. }
  2298. }
  2299. FnExit:
  2300. ClRtlLogPrint(LOG_NOISE,
  2301. "[FM] FmpMoveGroup: Exit group <%1!ws!>, status = %2!u!\r\n",
  2302. OmObjectName(Group),
  2303. status);
  2304. if ( status != ERROR_IO_PENDING )
  2305. {
  2306. if (resourceList)
  2307. {
  2308. FmpDeleteResourceEnum( resourceList );
  2309. Group->MovingList = NULL;
  2310. }
  2311. }
  2312. if ( ( status == ERROR_SUCCESS ) || ( status == ERROR_IO_PENDING ) )
  2313. {
  2314. //
  2315. // Chittur Subbaraman (chitturs) - 4/13/99
  2316. //
  2317. // If the FmpDoMoveGroupOnFailure thread is also waiting to do the
  2318. // move, then tell that thread to take its hands off.
  2319. //
  2320. if ( Group->dwStructState & FM_GROUP_STRUCT_MARKED_FOR_MOVE_ON_FAIL )
  2321. {
  2322. Group->dwStructState |= FM_GROUP_STRUCT_MARKED_FOR_REGULAR_MOVE;
  2323. }
  2324. }
  2325. FmpReleaseLocalGroupLock( Group );
  2326. return(status);
  2327. } // FmpMoveGroup
  2328. DWORD
  2329. FmpCompleteMoveGroup(
  2330. IN PFM_GROUP Group,
  2331. IN PNM_NODE DestinationNode
  2332. )
  2333. /*++
  2334. Routine Description:
  2335. This completes the move of a group by asking the other node to take
  2336. ownership.
  2337. This function is called by FmpMovePendingThread() after all the resources
  2338. are offline.
  2339. Arguments:
  2340. Group - Supplies a pointer to the group structure to move.
  2341. DestinationNode - Supplies the node object to move the group to. If not
  2342. present, then move it to 'highest' entry in the preferred list.
  2343. Returns:
  2344. ERROR_SUCCESS if the request was successful.
  2345. A Win32 error code on failure.
  2346. Notes:
  2347. It is assumed that the Group and all contained resources are offline
  2348. when this is called.
  2349. The LocalGroupLock MUST also be held. The LocalGroupLock is released
  2350. by this routine, especially before requesting a remote system to move
  2351. a group!
  2352. --*/
  2353. {
  2354. PNM_NODE node;
  2355. DWORD status = ERROR_SUCCESS;
  2356. PFM_RESOURCE resource;
  2357. PLIST_ENTRY listEntry;
  2358. PRESOURCE_ENUM resourceList=NULL;
  2359. DWORD dwMoveStatus = ERROR_SUCCESS;
  2360. ClRtlLogPrint(LOG_NOISE,
  2361. "[FM] CompleteMoveGroup: Entry for <%1!ws!>\r\n",
  2362. OmObjectName(Group) );
  2363. resourceList = Group->MovingList;
  2364. if ( resourceList == NULL ) {
  2365. ClRtlLogPrint( LOG_NOISE,
  2366. "[FM] CompleteMoveGroup: No moving list!\n" );
  2367. status = ERROR_SUCCESS;
  2368. goto FnRestore;
  2369. }
  2370. node = DestinationNode;
  2371. CL_ASSERT( node != NULL );
  2372. ClRtlLogPrint(LOG_NOISE,
  2373. "[FM] CompleteMoveGroup: Completing the move for group %1!ws! to node %2!ws! (%3!d!)\n",
  2374. OmObjectName(Group),
  2375. OmObjectId(node),
  2376. NmGetNodeId(node));
  2377. status = FmpOfflineResourceList( resourceList, TRUE );
  2378. if ( status != ERROR_SUCCESS ) {
  2379. //by now the group must be offline!
  2380. //if not, mail the move, the resource that fails to go
  2381. //offline will force the other resources to come online
  2382. //again.
  2383. //how do we handle shutdowns
  2384. goto FnRestore;
  2385. }
  2386. // for now make sure that the group state is propagated here
  2387. // In general it is propagated by the worker thread. Since
  2388. // the ownership is going to change, we want to make sure that the
  2389. // last known state is propagated from this node to others before
  2390. // that.
  2391. FmpPropagateGroupState(Group);
  2392. //
  2393. // Chittur Subbaraman (chitturs) - 10/01/1999
  2394. //
  2395. // If the other system is not up, then fail now. Note that this
  2396. // check must be done only AFTER ensuring that the group state
  2397. // is stable. Otherwise some funny corner cases can result.
  2398. // E.g., If the complete move operation is aborted when one or
  2399. // more resources are in offline pending state since the destination
  2400. // node went down, then you first terminate the resource list and
  2401. // then online the list. As a part of all this, the online pending
  2402. // or the online states of the resources could be propagated
  2403. // synchronously. Now, the offline notification from the previous
  2404. // offline attempt could come in and be processed by the FM worker
  2405. // thread way too late and you could have spurious resource states
  2406. // in FM while the real resource state is different. Another
  2407. // issue here is during the lengthy offline operation here, the
  2408. // destination node could go down and come back up soon after and
  2409. // so aborting the move may not be prudent in such a case.
  2410. //
  2411. // But, don't do this optimization for the quorum group. This is
  2412. // because once the quorum group is made offline, then MM
  2413. // could decide who the group owner is. So, you may not be able to
  2414. // bring the group online necessarily in this node. To avoid such
  2415. // a case, we let FmcTakeGroupRequest fail and then let either the
  2416. // retry loop here move the group somewhere else or let the
  2417. // FM node down handler decide on the group's owner consulting
  2418. // with MM.
  2419. //
  2420. if ( ( NmGetExtendedNodeState(node) != ClusterNodeUp ) &&
  2421. ( Group != gpQuoResource->Group ) )
  2422. {
  2423. status = ERROR_HOST_NODE_NOT_AVAILABLE;
  2424. ClRtlLogPrint(LOG_NOISE,
  2425. "[FM] FmpCompleteMoveGroup: Restoring group <%1!ws!> on local node due to destination node unavailability...\n",
  2426. OmObjectName(Group));
  2427. goto FnRestore;
  2428. }
  2429. // SS::
  2430. // After this point the responsibility of failing the group
  2431. // back due to resource failures is with the destination code.
  2432. // If there is a failure to bring the resources online,
  2433. // the local restart policy on the destination node must kick
  2434. // in.
  2435. //
  2436. // if there is an rpc failure to communicate with the other node
  2437. // I suppose we should bring the resources online here again
  2438. // However, rpc failures can be pretty non descriptive - there is
  2439. // no way to determine from rpc errors if the rpc call actually
  2440. // executed on the remote side
  2441. //
  2442. // but unless we are pretty careful about this and do what gum does
  2443. // on rpc failures(banish the destination node) there is no way to
  2444. // guarantee that both nodes dont retry to restart the group
  2445. // If the destination node begins the process of bringing resources
  2446. // in the group online, FmsTakeGroupRequest must return success(note
  2447. // it should not return ERROR_IO_PENDING), else
  2448. // it returns an error code and this node will bring the group back
  2449. // to its previous state.
  2450. // Assume the other node is going to take ownership. This is done
  2451. // before, in case the Group state changes. We want to accept the
  2452. // Group/resource state changes from the remote system when they
  2453. // arrive. We've already verified that node is in the preferred list!
  2454. //
  2455. //we will reacquire the lock after making the rpc call
  2456. // SS::
  2457. // After this point the responsibility of failing the group
  2458. // back due to resource failures is with the destination code.
  2459. // If there is a failure to bring the resources online,
  2460. // the local restart policy on the destination node must kick
  2461. // in.
  2462. //
  2463. // if there is an rpc failure to communicate with the other node
  2464. // I suppose we should bring the resources online here again
  2465. // However, rpc failures can be pretty non descriptive - there is
  2466. // no way to determine from rpc errors if the rpc call actually
  2467. // executed on the remote side
  2468. //
  2469. // but unless we are pretty careful about this and do what gum does
  2470. // on rpc failures(banish the destination node) there is no way to
  2471. // guarantee that both nodes dont retry to restart the group
  2472. // If the destination node begins the process of bringing resources
  2473. // in the group online, FmsTakeGroupRequest must return success(note
  2474. // it should not return ERROR_IO_PENDING), else
  2475. // it returns an error code and this node will bring the group back
  2476. // to its previous state.
  2477. // Assume the other node is going to take ownership. This is done
  2478. // before, in case the Group state changes. We want to accept the
  2479. // Group/resource state changes from the remote system when they
  2480. // arrive. We've already verified that node is in the preferred list!
  2481. //
  2482. //
  2483. // Chittur Subbaraman (chitturs) - 5/18/99
  2484. //
  2485. // Modified to handle the move group request of a quorum group in
  2486. // case the destination node could not arbitrate for the quorum
  2487. // resource.
  2488. //
  2489. do
  2490. {
  2491. //
  2492. // Before making the RPC, set the intended owner of the group
  2493. //
  2494. FmpSetIntendedOwnerForGroup( Group, NmGetNodeId( node ) );
  2495. try {
  2496. ClRtlLogPrint(LOG_NOISE,
  2497. "[FM] FmpCompleteMoveGroup: Take group %2!ws! request to remote node %1!ws!\n",
  2498. OmObjectId(node),
  2499. OmObjectId(Group));
  2500. dwMoveStatus = status = FmcTakeGroupRequest( node, OmObjectId( Group ), resourceList );
  2501. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  2502. LPCWSTR pszNodeId;
  2503. LPCWSTR pszGroupId;
  2504. status = GetExceptionCode ();
  2505. ClRtlLogPrint(LOG_NOISE,
  2506. "[FM] FmpCompleteMoveGroup: Exception in FmcTakeGroupRequest %2!ws! request to remote node %1!ws!, status=%3!u!\n",
  2507. OmObjectId(node),
  2508. OmObjectId(Group),
  2509. status);
  2510. //
  2511. // An exception from RPC indicates that the other node is either dead
  2512. // or insane. We dont know whether it took ownership or not.
  2513. // So, let the FM node down handler handle the group.
  2514. //
  2515. GumCommFailure( GumUpdateFailoverManager,
  2516. NmGetNodeId(node),
  2517. GetExceptionCode(),
  2518. TRUE );
  2519. //
  2520. // The new owner node that is now dead might have set the intended
  2521. // owner as NULL or it might not have set this. It might have
  2522. // set the owner node to himself or might not have.
  2523. // If it has set the owner node for this group as himself, then
  2524. // the FM node down handler will assume responsibility for this
  2525. // group. If the target node dies before it sets himself as the owner,
  2526. // then again, the FM node down handler will assume responsibility
  2527. // for the group. We wake up when the gum sync handling is over.
  2528. // Right now, the gum update for the owner node may still be in
  2529. // progress so we cant be sure if that update was completed on
  2530. // all nodes.
  2531. //
  2532. //
  2533. // Chittur Subbaraman (chitturs) - 6/7/99
  2534. //
  2535. // Issue a GUM update to handle this group. Using this
  2536. // GUM update prevents any race condition with the
  2537. // node down processing code.
  2538. //
  2539. //
  2540. // TODO: This does not cover the case in which
  2541. // FmpTakeGroupRequest crashes after setting the
  2542. // intended owner to invalid ID. In such a case,
  2543. // the following handler won't take ownership of the
  2544. // group. Also, claim handler will not touch the
  2545. // group.
  2546. //
  2547. pszNodeId = OmObjectId( node );
  2548. pszGroupId = OmObjectId( Group );
  2549. GumSendUpdateEx( GumUpdateFailoverManager,
  2550. FmUpdateCompleteGroupMove,
  2551. 2,
  2552. (lstrlenW(pszNodeId)+1)*sizeof(WCHAR),
  2553. pszNodeId,
  2554. (lstrlenW(pszGroupId)+1)*sizeof(WCHAR),
  2555. pszGroupId);
  2556. status = ERROR_HOST_NODE_NOT_AVAILABLE;
  2557. goto FnExit;
  2558. }
  2559. if ( status == ERROR_RETRY )
  2560. {
  2561. //
  2562. // The destination refused to take the quorum group since it
  2563. // did not win the arbitration. So let us see who won the
  2564. // arbitration.
  2565. //
  2566. DWORD dwSelectedQuorumOwnerId;
  2567. CL_ASSERT( Group == gpQuoResource->Group );
  2568. ClRtlLogPrint(LOG_NOISE,
  2569. "[FM] FmpCompleteMoveGroup: Remote node asked us to resend take group request for group %1!ws! to another node ...\n",
  2570. OmObjectId( Group ));
  2571. //
  2572. // Get the ID of the node which the MM believes is the best
  2573. // candidate to own the quorum resource. This is a call that
  2574. // blocks while RGP is in progress.
  2575. //
  2576. MMApproxArbitrationWinner( &dwSelectedQuorumOwnerId );
  2577. if ( ( dwSelectedQuorumOwnerId == NmGetNodeId( NmLocalNode ) ) ||
  2578. ( dwSelectedQuorumOwnerId == MM_INVALID_NODE ) )
  2579. {
  2580. //
  2581. // The local node is chosen by MM or no node is chosen by
  2582. // the MM. The latter case will happen if no RGP has
  2583. // occurred at the time this call is made. Let us see if we
  2584. // can arbitrate for the quorum resource.
  2585. //
  2586. status = FmpRmArbitrateResource( gpQuoResource );
  2587. if ( status != ERROR_SUCCESS )
  2588. {
  2589. //
  2590. // Too bad. We will halt and let FmpNodeDown handler
  2591. // handle the quorum group.
  2592. //
  2593. ClRtlLogPrint(LOG_NOISE,
  2594. "[FM] FmpCompleteMoveGroup: Local node %1!u! cannot arbitrate for quorum group %3!ws!, Status = %2!u!...\n",
  2595. dwSelectedQuorumOwnerId,
  2596. status,
  2597. OmObjectId( Group ));
  2598. CsInconsistencyHalt( ERROR_QUORUM_RESOURCE_ONLINE_FAILED );
  2599. }
  2600. status = ERROR_RETRY;
  2601. break;
  2602. }
  2603. node = NmReferenceNodeById( dwSelectedQuorumOwnerId );
  2604. if ( node == NULL )
  2605. {
  2606. ClRtlLogPrint(LOG_CRITICAL,
  2607. "[FM] FmpCompleteMoveGroup: Selected node %1!u! cannot be referenced...\n",
  2608. dwSelectedQuorumOwnerId);
  2609. CsInconsistencyHalt( ERROR_QUORUM_RESOURCE_ONLINE_FAILED );
  2610. }
  2611. } // if
  2612. } while ( status == ERROR_RETRY );
  2613. // At this point, the onus of taking care of the group is with the
  2614. // destination node whether it means restarting the group or
  2615. // failing it back
  2616. FnRestore:
  2617. //if there is any failure try and restore the previous states
  2618. if ((status != ERROR_IO_PENDING) && (status != ERROR_SUCCESS))
  2619. {
  2620. //
  2621. // Chittur Subbaraman (chitturs) - 3/22/2000
  2622. //
  2623. // Reset the group's intended owner to invalid node ID if the
  2624. // node down handler did not do that.
  2625. //
  2626. if ( dwMoveStatus != ERROR_SUCCESS )
  2627. {
  2628. if ( FmpSetIntendedOwnerForGroup( Group, ClusterInvalidNodeId )
  2629. == ERROR_CLUSTER_INVALID_NODE )
  2630. {
  2631. ClRtlLogPrint(LOG_NOISE,
  2632. "[FM] FmpCompleteMoveGroup: Group <%1!ws!> has already been processed by node down handler....\r\n",
  2633. OmObjectName(Group));
  2634. goto FnExit;
  2635. }
  2636. }
  2637. if (resourceList)
  2638. {
  2639. FmpTerminateResourceList( resourceList );
  2640. //
  2641. // Chittur Subbaraman (chitturs) - 4/10/2000
  2642. //
  2643. // Make sure to online the quorum group even if this node is
  2644. // shutting down. This is necessary so that other groups
  2645. // can be brought offline during this node's shutdown. Note
  2646. // that FmpOnlineResourceList would only online a group
  2647. // during a shutdown if the group is the quorum group.
  2648. //
  2649. if ( FmpFMGroupsInited )
  2650. FmpOnlineResourceList( resourceList, Group );
  2651. }
  2652. } else
  2653. {
  2654. //
  2655. // Chittur Subbaraman (chitturs) - 4/19/99
  2656. //
  2657. // If the FmpDoMoveGroupOnFailure thread is also waiting to do the
  2658. // move, then tell that thread to take its hands off.
  2659. //
  2660. if ( Group->dwStructState & FM_GROUP_STRUCT_MARKED_FOR_MOVE_ON_FAIL )
  2661. {
  2662. Group->dwStructState |= FM_GROUP_STRUCT_MARKED_FOR_REGULAR_MOVE;
  2663. }
  2664. }
  2665. FnExit:
  2666. ClRtlLogPrint(LOG_NOISE,
  2667. "[FM] FmpCompleteMoveGroup: Exit, status = %1!u!\r\n",
  2668. status);
  2669. //if the status is success or some other error, clean up the resource list
  2670. if (status != ERROR_IO_PENDING)
  2671. {
  2672. if (resourceList)
  2673. {
  2674. FmpDeleteResourceEnum( resourceList );
  2675. Group->MovingList = NULL;
  2676. }
  2677. }
  2678. FmpReleaseLocalGroupLock( Group );
  2679. return(status);
  2680. } // FmpCompleteMoveGroup
  2681. DWORD
  2682. FmpMovePendingThread(
  2683. IN LPVOID Context
  2684. )
  2685. /*++
  2686. Routine Description:
  2687. Continue trying to move a group if ERROR_IO_PENDING is returned.
  2688. We need to perform this operation, because part way through a move
  2689. request, we could get a pending return status. The processing of the
  2690. request is halted and the pending status is returned. However, the
  2691. remainder of the move operation needs to be performed.
  2692. Arguments:
  2693. Context - Pointer to the MOVE_GROUP structure to move.
  2694. Returns:
  2695. ERROR_SUCCESS.
  2696. --*/
  2697. {
  2698. PMOVE_GROUP moveGroup = (PMOVE_GROUP)Context;
  2699. PFM_GROUP group;
  2700. PNM_NODE node;
  2701. DWORD status;
  2702. DWORD loopCount = 100; // Only try this so many times and then give up
  2703. HANDLE waitArray[2];
  2704. group = moveGroup->Group;
  2705. node = moveGroup->DestinationNode;
  2706. ClRtlLogPrint(LOG_NOISE,
  2707. "[FM] FmpMovePendingThread Entry.\n");
  2708. //
  2709. // We must attempt to finish the move request for this Group.
  2710. //
  2711. // We are waiting for a resource to go offline and it finally goes
  2712. // offline and the Group's pending event is set.
  2713. //
  2714. // Or we are waiting for cluster shutdown (FmpShutdownEvent)
  2715. //
  2716. WaitSomeMore:
  2717. //acquire the lock since fmpwaitforgroup() releases it
  2718. FmpAcquireLocalGroupLock( group );
  2719. status = FmpWaitForGroup(group);
  2720. if (status == ERROR_SHUTDOWN_IN_PROGRESS) {
  2721. //
  2722. // We've been asked to shutdown
  2723. //
  2724. } else if (status == ERROR_SUCCESS) {
  2725. //acquire the group lock before calling FmpCompleteMoveGroup
  2726. FmpAcquireLocalGroupLock( group );
  2727. status = FmpCompleteMoveGroup( group, node );
  2728. if ( status == ERROR_IO_PENDING ) {
  2729. Sleep(500); // [HACKHACK] kludgy, I know, but nice solution might break something else
  2730. goto WaitSomeMore;
  2731. }
  2732. } else {
  2733. ClRtlLogPrint(LOG_UNUSUAL,
  2734. "[FM] FmpMovePendingThread got error %1!d! waiting for group to shutdown.\n",
  2735. status);
  2736. }
  2737. //
  2738. // We're done with the move now.
  2739. //
  2740. if ( status != ERROR_IO_PENDING ) {
  2741. CL_ASSERT( group->MovingList == NULL );
  2742. }
  2743. //
  2744. // Now dereference the Group and node object (if non-NULL) and
  2745. // free our local context.
  2746. //
  2747. OmDereferenceObject( group );
  2748. if ( node != NULL ) {
  2749. OmDereferenceObject( node );
  2750. }
  2751. LocalFree( Context );
  2752. ClRtlLogPrint(LOG_NOISE,
  2753. "[FM] FmpMovePendingThread Exit.\n");
  2754. return(ERROR_SUCCESS);
  2755. } // FmpMovePendingThread
  2756. DWORD
  2757. FmpCreateMovePendingThread(
  2758. IN PFM_GROUP Group,
  2759. IN PNM_NODE DestinationNode
  2760. )
  2761. /*++
  2762. Routine Description:
  2763. Crate a thread that will continue to call the move routine for a given
  2764. Group.
  2765. Arguments:
  2766. Group - A pointer to the Group to move.
  2767. DestinationNode - The destination node for the move request.
  2768. Returns:
  2769. ERROR_IO_PENDING if the thread was created successfully. This assumes
  2770. that this routine was called because of this error return.
  2771. A Win32 error code on failure.
  2772. --*/
  2773. {
  2774. HANDLE threadHandle=NULL;
  2775. DWORD threadId;
  2776. PMOVE_GROUP context=NULL;
  2777. DWORD status=ERROR_IO_PENDING; //assume success
  2778. FmpAcquireLocalGroupLock( Group );
  2779. if ( Group->OwnerNode != NmLocalNode ) {
  2780. status = ERROR_HOST_NODE_NOT_RESOURCE_OWNER;
  2781. goto FnExit;
  2782. }
  2783. //
  2784. // If there is a pending event, then the group is not available for any
  2785. // new requests.
  2786. //
  2787. if ( FmpIsGroupPending(Group) ) {
  2788. status = ERROR_GROUP_NOT_AVAILABLE;
  2789. goto FnExit;
  2790. }
  2791. context = LocalAlloc(LMEM_FIXED, sizeof(MOVE_GROUP));
  2792. if ( context == NULL ) {
  2793. status = ERROR_NOT_ENOUGH_MEMORY;
  2794. goto FnExit;
  2795. }
  2796. //
  2797. // Keep reference on the Group and node object (if present) while we
  2798. // retain pointers.
  2799. //
  2800. OmReferenceObject( Group );
  2801. if ( DestinationNode != NULL ) {
  2802. OmReferenceObject( DestinationNode );
  2803. }
  2804. //
  2805. // Fill in context fields
  2806. //
  2807. context->Group = Group;
  2808. context->DestinationNode = DestinationNode;
  2809. threadHandle = CreateThread( NULL,
  2810. 0,
  2811. FmpMovePendingThread,
  2812. context,
  2813. 0,
  2814. &threadId );
  2815. if ( threadHandle == NULL )
  2816. {
  2817. OmDereferenceObject( Group );
  2818. if ( DestinationNode != NULL ) {
  2819. OmDereferenceObject( DestinationNode );
  2820. }
  2821. status = GetLastError();
  2822. LocalFree(context);
  2823. goto FnExit;
  2824. }
  2825. FnExit:
  2826. if (threadHandle) CloseHandle( threadHandle );
  2827. FmpReleaseLocalGroupLock( Group );
  2828. return(status);
  2829. } // FmpCreateMovePendingThread
  2830. DWORD
  2831. FmpDoMoveGroup(
  2832. IN PFM_GROUP Group,
  2833. IN PNM_NODE DestinationNode,
  2834. IN BOOL bChooseMostPreferredNode
  2835. )
  2836. /*++
  2837. Routine Description:
  2838. This routine performs the action of moving a Group. This requires taking
  2839. a Group offline and then bringing the Group online. The Offline and
  2840. Online requests may pend, so we have to pick up the work in order to
  2841. complete the request. This means handling the offline pending case, since
  2842. the online pending request will eventually complete.
  2843. Arguments:
  2844. Group - The Group to move.
  2845. DestinationNode - The destination node for the move request.
  2846. bChooseMostPreferredNode - If the destination node is not supplied,
  2847. indicates whether to choose the most preferred node or not.
  2848. Returns:
  2849. ERROR_SUCCESS if successful.
  2850. A Win32 error code on failure.
  2851. --*/
  2852. {
  2853. DWORD status;
  2854. PNM_NODE node;
  2855. PNM_NODE ChosenDestinationNode = NULL;
  2856. //
  2857. // We can only support one request on this Group at a time.
  2858. //
  2859. ClRtlLogPrint(LOG_NOISE,
  2860. "[FM] FmpDoMoveGroup: Entry\r\n");
  2861. FmpAcquireLocalGroupLock( Group );
  2862. //if the group has been marked for delete, then fail this call
  2863. if (!IS_VALID_FM_GROUP(Group))
  2864. {
  2865. FmpReleaseLocalGroupLock( Group);
  2866. return (ERROR_GROUP_NOT_AVAILABLE);
  2867. }
  2868. if ( FmpIsGroupPending(Group) ) {
  2869. FmpReleaseLocalGroupLock( Group );
  2870. return(ERROR_GROUP_NOT_AVAILABLE);
  2871. }
  2872. node = Group->OwnerNode;
  2873. // Note: the local group lock is released by the FmpMoveGroup routine.
  2874. status = FmpMoveGroup( Group, DestinationNode, FALSE, &ChosenDestinationNode, bChooseMostPreferredNode );
  2875. //
  2876. // If we were the owner of the group and the request is pending, then
  2877. // start a thread to complete the move request.
  2878. //
  2879. if ( (node == NmLocalNode) &&
  2880. (status == ERROR_IO_PENDING) ) {
  2881. status = FmpCreateMovePendingThread( Group, ChosenDestinationNode );
  2882. }
  2883. //
  2884. // Chittur Subbaraman (chitturs) - 7/31/2000
  2885. //
  2886. // Log an event to the eventlog if the group is moving due to a failure.
  2887. //
  2888. if ( ( bChooseMostPreferredNode == FALSE ) &&
  2889. ( ( status == ERROR_SUCCESS ) || ( status == ERROR_IO_PENDING ) ) )
  2890. {
  2891. CsLogEvent3( LOG_NOISE,
  2892. FM_EVENT_GROUP_FAILOVER,
  2893. OmObjectName(Group),
  2894. OmObjectName(NmLocalNode),
  2895. OmObjectName(ChosenDestinationNode) );
  2896. }
  2897. ClRtlLogPrint(LOG_NOISE,
  2898. "[FM] FmpDoMoveGroup: Exit, status = %1!u!\r\n",
  2899. status);
  2900. return(status);
  2901. } // FmpDoMoveGroup
  2902. DWORD
  2903. FmpTakeGroupRequest(
  2904. IN PFM_GROUP Group,
  2905. IN PRESOURCE_ENUM ResourceList
  2906. )
  2907. /*++
  2908. Routine Description:
  2909. Performs a Take Group Request from (THE) remote system and returns
  2910. status for that request.
  2911. Arguments:
  2912. Group - The Group to take online locally.
  2913. ResourceList - The list of resources and their states.
  2914. Return Value:
  2915. ERROR_SUCCESS if successful.
  2916. A Win32 error code on error.
  2917. --*/
  2918. {
  2919. DWORD status = ERROR_SUCCESS;
  2920. ClRtlLogPrint(LOG_NOISE,
  2921. "[FM] FmpTakeGroupRequest: To take group '%1!ws!'.\n",
  2922. OmObjectId(Group) );
  2923. FmpAcquireLocalGroupLock( Group );
  2924. if ( !FmpFMOnline )
  2925. {
  2926. if (FmpShutdown)
  2927. status = ERROR_CLUSTER_NODE_SHUTTING_DOWN;
  2928. else
  2929. status = ERROR_CLUSTER_NODE_NOT_READY;
  2930. CL_LOGFAILURE(status);
  2931. ClRtlLogPrint(LOG_NOISE,
  2932. "[FM] FmpTakeGroupRequest: Group '%1!ws!' cannot be accepted, status=%2!u!...\n",
  2933. OmObjectId(Group),
  2934. status);
  2935. //
  2936. // Chittur Subbaraman (chitturs) - 7/5/2000
  2937. //
  2938. // Make sure you ask the source node to relocate the quorum group some place else
  2939. // after consulting with MM.
  2940. //
  2941. if ( gpQuoResource->Group == Group ) status = ERROR_RETRY;
  2942. goto FnExit;
  2943. }
  2944. //every body should be able to host the quorum group
  2945. //so we dont check the prefferred owner list for this group
  2946. if ( ( gpQuoResource->Group != Group) &&
  2947. !FmpInPreferredList( Group, NmLocalNode, FALSE, NULL) )
  2948. {
  2949. //
  2950. // Nobody should ever ask us to take a group that can't run here.
  2951. //
  2952. status = ERROR_CLUSTER_NODE_NOT_FOUND;
  2953. CL_LOGFAILURE( status);
  2954. goto FnExit;
  2955. }
  2956. //
  2957. // Take ownership of the Group.
  2958. //
  2959. if ( Group->OwnerNode == NmLocalNode ) {
  2960. //SS:://We are alreay the owner ?? How did this happen
  2961. status = ERROR_SUCCESS;
  2962. goto FnExit;
  2963. }
  2964. //
  2965. // Chittur Subbaraman (chitturs) - 5/18/99
  2966. //
  2967. // Handle quorum group in a special way. Make sure you can arbitrate
  2968. // for the quorum resource. If not, you could get killed when you
  2969. // try to bring it online and you fail.
  2970. //
  2971. if ( Group == gpQuoResource->Group )
  2972. {
  2973. status = FmpRmArbitrateResource( gpQuoResource );
  2974. if ( status != ERROR_SUCCESS )
  2975. {
  2976. ClRtlLogPrint(LOG_NOISE,
  2977. "[FM] FmpTakeGroupRequest: MM did not select local node %1!u! as the arbitration winner...\n\r",
  2978. NmLocalNodeId,
  2979. status);
  2980. status = ERROR_RETRY;
  2981. goto FnExit;
  2982. }
  2983. }
  2984. status = FmpSetOwnerForGroup( Group, NmLocalNode );
  2985. if ( status != ERROR_SUCCESS )
  2986. {
  2987. ClRtlLogPrint(LOG_NOISE,
  2988. "[FM] FmpTakeGroupRequest: Set owner GUM update returns %1!u! for group <%2!ws!>...\n\r",
  2989. status,
  2990. OmObjectId(Group));
  2991. if ( status == ERROR_GROUP_NOT_AVAILABLE )
  2992. {
  2993. //
  2994. // If the node down processing GUM handler has claimed ownership
  2995. // of this group, consider everything as being fine.
  2996. //
  2997. status = ERROR_SUCCESS;
  2998. }
  2999. goto FnExit;
  3000. }
  3001. FmpSetIntendedOwnerForGroup(Group, ClusterInvalidNodeId);
  3002. // prepare to bring this group online
  3003. FmpPrepareGroupForOnline( Group );
  3004. //
  3005. // Online what needs to be online.
  3006. //
  3007. // SS: Note that we ignore the error from FmpOnlineResourceList
  3008. // This is because at this point the onus of taking care of the group
  3009. // is with us.
  3010. //
  3011. FmpOnlineResourceList( ResourceList, Group );
  3012. FnExit:
  3013. FmpReleaseLocalGroupLock( Group );
  3014. ClRtlLogPrint(LOG_NOISE,
  3015. "[FM] FmpTakeGroupRequest: Exit for group <%1!ws!>, Status = %2!u!...\n",
  3016. OmObjectId(Group),
  3017. status);
  3018. return(status);
  3019. } // FmpTakeGroupRequest
  3020. DWORD
  3021. FmpUpdateChangeGroupName(
  3022. IN BOOL SourceNode,
  3023. IN LPCWSTR GroupId,
  3024. IN LPCWSTR NewName
  3025. )
  3026. /*++
  3027. Routine Description:
  3028. GUM dispatch routine for changing the friendly name of a group.
  3029. Arguments:
  3030. SourceNode - Supplies whether or not this node initiated the GUM update.
  3031. Not used.
  3032. ResourceId - Supplies the group ID.
  3033. NewName - Supplies the new friendly name.
  3034. Return Value:
  3035. ERROR_SUCCESS if successful.
  3036. Win32 error code otherwise.
  3037. --*/
  3038. {
  3039. PFM_GROUP Group;
  3040. DWORD Status;
  3041. //
  3042. // Chittur Subbaraman (chitturs) - 4/19/98
  3043. //
  3044. // If FM groups are not initialized or FM is shutting down, don't
  3045. // do anything.
  3046. //
  3047. if ( !FmpFMGroupsInited ||
  3048. FmpShutdown ) {
  3049. return(ERROR_SUCCESS);
  3050. }
  3051. Group = OmReferenceObjectById(ObjectTypeGroup, GroupId);
  3052. if (Group == NULL) {
  3053. return(ERROR_GROUP_NOT_FOUND);
  3054. }
  3055. Status = OmSetObjectName( Group, NewName);
  3056. if (Status == ERROR_SUCCESS) {
  3057. ClusterEvent(CLUSTER_EVENT_GROUP_PROPERTY_CHANGE, Group);
  3058. }
  3059. OmDereferenceObject(Group);
  3060. return(Status);
  3061. } // FmpUpdateChangeGroupName
  3062. BOOL
  3063. FmpEnumGroupNodeEvict(
  3064. IN PVOID Context1,
  3065. IN PVOID Context2,
  3066. IN PVOID Object,
  3067. IN LPCWSTR Name
  3068. )
  3069. /*++
  3070. Routine Description:
  3071. Group enumeration callback for removing node references when
  3072. a node is evicted.
  3073. Arguments:
  3074. Context1 - Supplies the node that is being evicted.
  3075. Context2 - not used
  3076. Object - Supplies a pointer to the group object
  3077. Name - Supplies the object name.
  3078. Return Value:
  3079. TRUE to continue enumeration
  3080. --*/
  3081. {
  3082. PFM_GROUP Group = (PFM_GROUP)Object;
  3083. PNM_NODE Node = (PNM_NODE)Context1;
  3084. PLIST_ENTRY listEntry;
  3085. PPREFERRED_ENTRY preferredEntry;
  3086. ClRtlLogPrint(LOG_NOISE,
  3087. "[FM] EnumGroupNodeEvict: Removing references to node %1!ws! from group %2!ws!\n",
  3088. OmObjectId(Node),
  3089. OmObjectId(Group));
  3090. FmpAcquireLocalGroupLock(Group);
  3091. //
  3092. // Walk the list of preferred owners. If this node is in the list, remove it.
  3093. //
  3094. for ( listEntry = Group->PreferredOwners.Flink;
  3095. listEntry != &(Group->PreferredOwners);
  3096. listEntry = listEntry->Flink ) {
  3097. preferredEntry = CONTAINING_RECORD( listEntry,
  3098. PREFERRED_ENTRY,
  3099. PreferredLinkage );
  3100. if ( preferredEntry->PreferredNode == Node ) {
  3101. RemoveEntryList(&preferredEntry->PreferredLinkage);
  3102. OmDereferenceObject(preferredEntry->PreferredNode);
  3103. LocalFree(preferredEntry);
  3104. break;
  3105. }
  3106. }
  3107. FmpReleaseLocalGroupLock(Group);
  3108. ClusterEvent(CLUSTER_EVENT_GROUP_PROPERTY_CHANGE, Group);
  3109. return(TRUE);
  3110. } // FmpEnumGroupNodeEvict
  3111. VOID
  3112. FmpSignalGroupWaiters(
  3113. IN PFM_GROUP Group
  3114. )
  3115. /*++
  3116. Routine Description:
  3117. Wakes up any threads waiting for this group to achieve a
  3118. stable state.
  3119. Arguments:
  3120. Group - Supplies the group.
  3121. Return Value:
  3122. None.
  3123. --*/
  3124. {
  3125. PLIST_ENTRY ListEntry;
  3126. PFM_WAIT_BLOCK WaitBlock;
  3127. FmpAcquireLocalGroupLock( Group );
  3128. while (!IsListEmpty(&Group->WaitQueue)) {
  3129. ListEntry = RemoveHeadList(&Group->WaitQueue);
  3130. WaitBlock = CONTAINING_RECORD(ListEntry,
  3131. FM_WAIT_BLOCK,
  3132. ListEntry);
  3133. WaitBlock->Status = ERROR_SUCCESS;
  3134. SetEvent(WaitBlock->hEvent);
  3135. }
  3136. FmpReleaseLocalGroupLock( Group );
  3137. }
  3138. DWORD
  3139. FmpWaitForGroup(
  3140. IN PFM_GROUP Group
  3141. )
  3142. /*++
  3143. Routine Description:
  3144. Waits for a group to reach a stable state.
  3145. Arguments:
  3146. Group - supplies the group
  3147. Comments - Assumption, is that the group lock is held when this is called.
  3148. This function releases the group lock before the wait
  3149. Return Value:
  3150. ERROR_SUCCESS if successful
  3151. ERROR_SHUTDOWN_IN_PROGRESS if the cluster is being shutdown
  3152. Win32 error code otherwise
  3153. --*/
  3154. {
  3155. FM_WAIT_BLOCK WaitBlock;
  3156. HANDLE WaitArray[2];
  3157. DWORD Status;
  3158. CLUSTER_GROUP_STATE GroupState;
  3159. WaitBlock.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
  3160. if (WaitBlock.hEvent == NULL) {
  3161. FmpReleaseLocalGroupLock( Group );
  3162. return(GetLastError());
  3163. }
  3164. //
  3165. // Check to see if it transitioned before we got the lock.
  3166. //
  3167. GroupState = FmpGetGroupState( Group , TRUE );
  3168. if ((GroupState == ClusterGroupOffline) ||
  3169. (GroupState == ClusterGroupOnline) ||
  3170. (GroupState == ClusterGroupFailed)) {
  3171. CloseHandle( WaitBlock.hEvent );
  3172. FmpReleaseLocalGroupLock( Group );
  3173. return(ERROR_SUCCESS);
  3174. }
  3175. //
  3176. // Chittur Subbaraman (chitturs) - 10/31/1999
  3177. //
  3178. // Now before waiting, really make sure one or more resources in the
  3179. // group is in pending state.
  3180. //
  3181. GroupState = FmpGetGroupState( Group, FALSE );
  3182. if ( GroupState != ClusterGroupPending ) {
  3183. CloseHandle( WaitBlock.hEvent );
  3184. FmpReleaseLocalGroupLock( Group );
  3185. ClRtlLogPrint(LOG_NOISE,
  3186. "[FM] FmpWaitForGroup: Group <%1!ws!> state is %2!d!, not waiting for event...\r\n",
  3187. OmObjectName(Group),
  3188. GroupState );
  3189. return( ERROR_SUCCESS );
  3190. }
  3191. //
  3192. // Add this wait block to the queue.
  3193. //
  3194. InsertTailList(&Group->WaitQueue, &WaitBlock.ListEntry);
  3195. FmpReleaseLocalGroupLock( Group );
  3196. //
  3197. // Wait for the group to become stable or for the cluster to shutdown.
  3198. //
  3199. WaitArray[0] = FmpShutdownEvent;
  3200. WaitArray[1] = WaitBlock.hEvent;
  3201. Status = WaitForMultipleObjects(2, WaitArray, FALSE, INFINITE);
  3202. CloseHandle(WaitBlock.hEvent);
  3203. if (Status == 0) {
  3204. return(ERROR_SHUTDOWN_IN_PROGRESS);
  3205. } else {
  3206. return(WaitBlock.Status);
  3207. }
  3208. }
  3209. /****
  3210. @func DWORD | FmpDeleteGroup| This makes the gum call to delete the
  3211. group.
  3212. @parm IN PFM_GROUP | pGroup | The group that must be deleted.
  3213. @comm The group lock must be held when calling this api.
  3214. @rdesc Returns a result code. ERROR_SUCCESS on success.
  3215. ****/
  3216. DWORD
  3217. FmpDeleteGroup(
  3218. IN PFM_GROUP pGroup)
  3219. {
  3220. PCWSTR pszGroupId;
  3221. DWORD dwBufSize;
  3222. DWORD dwGroupLen;
  3223. DWORD dwStatus;
  3224. pszGroupId = OmObjectId( pGroup );
  3225. dwGroupLen = (lstrlenW(pszGroupId)+1) * sizeof(WCHAR);
  3226. //
  3227. // Send message.
  3228. //
  3229. dwStatus = GumSendUpdateEx(GumUpdateFailoverManager,
  3230. FmUpdateDeleteGroup,
  3231. 1,
  3232. dwGroupLen,
  3233. pszGroupId);
  3234. return(dwStatus);
  3235. }
  3236. VOID
  3237. FmpGroupLastReference(
  3238. IN PFM_GROUP pGroup
  3239. )
  3240. /*++
  3241. Routine Description:
  3242. Last dereference to group object processing routine.
  3243. All cleanup for a group should really be done here!
  3244. Arguments:
  3245. Resource - pointer the group being removed.
  3246. Return Value:
  3247. None.
  3248. --*/
  3249. {
  3250. if ( pGroup->OwnerNode != NULL )
  3251. OmDereferenceObject(pGroup->OwnerNode);
  3252. if (pGroup->dwStructState & FM_GROUP_STRUCT_CREATED)
  3253. DeleteCriticalSection(&pGroup->Lock);
  3254. return;
  3255. } // FmpGroupLastReference
  3256. DWORD
  3257. FmpDoMoveGroupOnFailure(
  3258. IN LPVOID pContext
  3259. )
  3260. /*++
  3261. Routine Description:
  3262. Move a group after ensuring that all resources in the group are
  3263. in stable state. This thread is forked from FmpHandleGroupFailure.
  3264. Arguments:
  3265. pContext - Pointer to the MOVE_GROUP structure to move.
  3266. Returns:
  3267. ERROR_SUCCESS.
  3268. --*/
  3269. {
  3270. PMOVE_GROUP pMoveGroup = ( PMOVE_GROUP ) pContext;
  3271. PFM_GROUP pGroup;
  3272. DWORD dwStatus;
  3273. PLIST_ENTRY pListEntry;
  3274. PFM_RESOURCE pResource;
  3275. //
  3276. // Chittur Subbaraman (chitturs) - 4/13/99
  3277. //
  3278. // This thread first waits until all the resources within the
  3279. // failed group are in stable state and then initiates the
  3280. // move.
  3281. //
  3282. pGroup = pMoveGroup->Group;
  3283. ClRtlLogPrint(LOG_NOISE,
  3284. "[FM] FmpDoMoveGroupOnFailure: Entry for Group <%1!ws!>...\n",
  3285. OmObjectId(pGroup));
  3286. TryAgain:
  3287. FmpAcquireLocalGroupLock( pGroup );
  3288. //
  3289. // This thread must yield if someone else takes responsibility for
  3290. // the move.
  3291. //
  3292. // Condition 1: Protects against the case in which someone moves
  3293. // the group to another node and back to you while this thread is
  3294. // sleeping (very rare, I agree).
  3295. //
  3296. // Condition 2: Protects against the common move case.
  3297. //
  3298. // Condition 3: Protects against the case in which the
  3299. // FmpMovePendingThread is waiting in FmpWaitForGroup while
  3300. // this thread got the resource lock and reached here.
  3301. //
  3302. if ( ( pGroup->dwStructState &
  3303. FM_GROUP_STRUCT_MARKED_FOR_REGULAR_MOVE ) ||
  3304. ( pGroup->OwnerNode != NmLocalNode ) ||
  3305. ( pGroup->MovingList != NULL ) )
  3306. {
  3307. ClRtlLogPrint(LOG_NOISE,
  3308. "[FM] FmpDoMoveGroupOnFailure: Group <%1!ws!> move being yielded to someone else who is moving it...\n",
  3309. OmObjectId(pGroup));
  3310. goto FnExit;
  3311. }
  3312. //
  3313. // If FM is shutting down, just exit.
  3314. //
  3315. if ( FmpShutdown )
  3316. {
  3317. ClRtlLogPrint(LOG_NOISE,
  3318. "[FM] FmpDoMoveGroupOnFailure: Giving up Group <%1!ws!> move. FM is shutting down ...\n",
  3319. OmObjectId(pGroup));
  3320. goto FnExit;
  3321. }
  3322. //
  3323. // If the group has been marked for delete, then also exit. This is
  3324. // just an optimization. FmpDoMoveGroup does this check also.
  3325. //
  3326. if ( !IS_VALID_FM_GROUP( pGroup ) )
  3327. {
  3328. ClRtlLogPrint(LOG_NOISE,
  3329. "[FM] FmpDoMoveGroupOnFailure: Group <%1!ws!> marked for delete. Exiting ...\n",
  3330. OmObjectId(pGroup));
  3331. goto FnExit;
  3332. }
  3333. //
  3334. // Wait until all resources within the group become stable.
  3335. //
  3336. for ( pListEntry = pGroup->Contains.Flink;
  3337. pListEntry != &(pGroup->Contains);
  3338. pListEntry = pListEntry->Flink )
  3339. {
  3340. pResource = CONTAINING_RECORD( pListEntry,
  3341. FM_RESOURCE,
  3342. ContainsLinkage );
  3343. if ( pResource->State > ClusterResourcePending )
  3344. {
  3345. FmpReleaseLocalGroupLock( pGroup );
  3346. Sleep ( 200 );
  3347. goto TryAgain;
  3348. }
  3349. }
  3350. //
  3351. // Initiate a move now that the group is quiet.
  3352. //
  3353. dwStatus = FmpDoMoveGroup( pGroup, NULL, FALSE );
  3354. ClRtlLogPrint(LOG_NOISE,
  3355. "[FM] FmpDoMoveGroupOnFailure: FmpDoMoveGroup returns %1!u!\n",
  3356. dwStatus);
  3357. FnExit:
  3358. LocalFree( pContext );
  3359. pGroup->dwStructState &=
  3360. ~( FM_GROUP_STRUCT_MARKED_FOR_MOVE_ON_FAIL | FM_GROUP_STRUCT_MARKED_FOR_REGULAR_MOVE );
  3361. FmpReleaseLocalGroupLock( pGroup );
  3362. OmDereferenceObject( pGroup );
  3363. ClRtlLogPrint(LOG_NOISE,
  3364. "[FM] FmpDoMoveGroupOnFailure Exit.\n");
  3365. return( ERROR_SUCCESS );
  3366. } // FmpDoMoveGroupOnFailure
  3367. /****
  3368. @func DWORD | FmpSetIntendedOwnerForGroup| This makes the gum call
  3369. to set the intended owner for the group before a move.
  3370. @parm IN PFM_GROUP | pGroup | The group whose intended owner
  3371. is to be set.
  3372. @comm The local group lock is held while making this call.
  3373. @rdesc Returns a result code. ERROR_SUCCESS on success.
  3374. ****/
  3375. DWORD FmpSetIntendedOwnerForGroup(
  3376. IN PFM_GROUP pGroup,
  3377. IN DWORD dwNodeId)
  3378. {
  3379. PCWSTR pszGroupId;
  3380. DWORD dwGroupLen;
  3381. DWORD dwStatus;
  3382. pszGroupId = OmObjectId( pGroup );
  3383. dwGroupLen = (lstrlenW(pszGroupId)+1) * sizeof(WCHAR);
  3384. //
  3385. // Send message.
  3386. //
  3387. dwStatus = GumSendUpdateEx(GumUpdateFailoverManager,
  3388. FmUpdateGroupIntendedOwner,
  3389. 2,
  3390. dwGroupLen,
  3391. pszGroupId,
  3392. sizeof(DWORD),
  3393. &dwNodeId
  3394. );
  3395. return(dwStatus);
  3396. }
  3397. /****
  3398. @func DWORD | FmpSetOwnerForGroup | On a move the new owner
  3399. node makes this gum call to inform all nodes that it
  3400. owns this particular group.
  3401. @parm IN PFM_GROUP | pGroup | The group whose owner must be set.
  3402. @parm IN PNM_NODE | pNode | The group's owner node.
  3403. @comm The local group lock is held while making this call.
  3404. @rdesc Returns a result code. ERROR_SUCCESS on success.
  3405. ****/
  3406. DWORD FmpSetOwnerForGroup(
  3407. IN PFM_GROUP pGroup,
  3408. IN PNM_NODE pNode
  3409. )
  3410. {
  3411. PCWSTR pszGroupId;
  3412. PCWSTR pszNodeId;
  3413. DWORD dwGroupLen;
  3414. DWORD dwNodeLen;
  3415. DWORD dwStatus;
  3416. pszGroupId = OmObjectId( pGroup );
  3417. dwGroupLen = (lstrlenW(pszGroupId)+1) * sizeof(WCHAR);
  3418. pszNodeId = OmObjectId(pNode);
  3419. dwNodeLen = (lstrlenW(pszNodeId)+1) * sizeof(WCHAR);
  3420. //
  3421. // Send message.
  3422. //
  3423. dwStatus = GumSendUpdateEx(GumUpdateFailoverManager,
  3424. FmUpdateCheckAndSetGroupOwner,
  3425. 2,
  3426. dwGroupLen,
  3427. pszGroupId,
  3428. dwNodeLen,
  3429. pszNodeId
  3430. );
  3431. return(dwStatus);
  3432. }
  3433. PNM_NODE
  3434. FmpGetNodeNotHostingUndesiredGroups(
  3435. IN PFM_GROUP pGroup,
  3436. IN BOOL fRuleOutLocalNode
  3437. )
  3438. /*++
  3439. Routine Description:
  3440. Find a preferred node that does not host groups with CLUSREG_NAME_GRP_ANTI_AFFINITY_CLASS_NAME
  3441. property set to the same value as the supplied group.
  3442. Arguments:
  3443. pGroup - Pointer to the group object we're checking.
  3444. fRuleOutLocalNode - Should the local node be considered or not.
  3445. Return Value:
  3446. Pointer to node object that satisfies the anti-affinity condition.
  3447. NULL if a node cannot be not found.
  3448. Note:
  3449. The antiaffinity property value is defined as a MULTI_SZ property. However for this implementation
  3450. we ignore all the string values beyond the first value. The MULTI_SZ definition is to allow
  3451. future expansion of the algorithm implemented by this function.
  3452. --*/
  3453. {
  3454. PLIST_ENTRY plistEntry;
  3455. PPREFERRED_ENTRY pPreferredEntry;
  3456. GROUP_AFFINITY_NODE_INFO GroupAffinityNodeInfo;
  3457. PNM_NODE pNode = NULL;
  3458. DWORD dwIndex = 0, i;
  3459. DWORD dwClusterHighestVersion;
  3460. GroupAffinityNodeInfo.ppNmNodeList = NULL;
  3461. //
  3462. // Chittur Subbaraman (chitturs) - 3/6/2001
  3463. //
  3464. // This function works as follows. First, it makes a list of possible candidate nodes that the
  3465. // group can be hosted on. Next, it enumerates all groups in the cluster and for those
  3466. // groups that have the AntiAffinityClassName property set, it will remove those group's
  3467. // current owner nodes from the list of possible candidate nodes if they are present there.
  3468. // Note that this function will return a node only if the pruning has positively taken place.
  3469. // Else, it will return NULL.
  3470. //
  3471. // IMPORTANT NOTE: This function is called by all nodes from the node down processing FM
  3472. // GUM handler. For all nodes to reach exactly the same decision on the group placement,
  3473. // it is crucial that all nodes call this function for groups in exactly the same order.
  3474. // E.g., if node 1 was hosting groups A, B and C and it died, then all the remaining nodes
  3475. // must call this function first for group A, then for group B and finally for group C.
  3476. // This is because once group A is placed by this function, then group B's placement is
  3477. // influenced by group A's placement and similarly for groups B and C. This order is
  3478. // ensured since all nodes OM will maintain groups in the same order since OM creates this
  3479. // list based on enumerating the group key (under Cluster\Groups) and that must occur in the
  3480. // same order in all nodes.
  3481. //
  3482. //
  3483. // It is too bad that we can't hold any locks while enumerating groups and looking at the
  3484. // property field since that will soon result in a deadlock (since we can't hold group locks
  3485. // from within a GUM and this function is invoked from a GUM).
  3486. //
  3487. //
  3488. // If we are dealing with the mixed mode cluster or if the group does not have the antiaffinity
  3489. // property set, then don't do anything.
  3490. //
  3491. NmGetClusterOperationalVersion( &dwClusterHighestVersion,
  3492. NULL,
  3493. NULL );
  3494. if ( ( CLUSTER_GET_MAJOR_VERSION( dwClusterHighestVersion ) < NT51_MAJOR_VERSION ) ||
  3495. ( pGroup->lpszAntiAffinityClassName == NULL ) )
  3496. {
  3497. goto FnExit;
  3498. }
  3499. //
  3500. // Initialize the node list.
  3501. //
  3502. GroupAffinityNodeInfo.ppNmNodeList = LocalAlloc ( LPTR,
  3503. ClusterDefaultMaxNodes * sizeof ( PNM_NODE ) );
  3504. if ( GroupAffinityNodeInfo.ppNmNodeList == NULL )
  3505. {
  3506. ClRtlLogPrint(LOG_CRITICAL, "[FM] FmpGetNodeNotHostingUndesiredGroups: Failed in alloc, Status %1!d!\n",
  3507. GetLastError());
  3508. goto FnExit;
  3509. }
  3510. //
  3511. // For each entry in the preferred list, find a system that is up and that does not
  3512. // host any groups with an anti-affinity to the supplied group.
  3513. //
  3514. for ( plistEntry = pGroup->PreferredOwners.Flink;
  3515. plistEntry != &(pGroup->PreferredOwners);
  3516. plistEntry = plistEntry->Flink )
  3517. {
  3518. pPreferredEntry = CONTAINING_RECORD( plistEntry,
  3519. PREFERRED_ENTRY,
  3520. PreferredLinkage );
  3521. if ( NmGetNodeState( pPreferredEntry->PreferredNode ) == ClusterNodeUp )
  3522. {
  3523. if ( ( fRuleOutLocalNode ) &&
  3524. ( pPreferredEntry->PreferredNode == NmLocalNode ) ) continue;
  3525. GroupAffinityNodeInfo.ppNmNodeList[dwIndex] = pPreferredEntry->PreferredNode;
  3526. dwIndex ++;
  3527. }
  3528. } // for
  3529. //
  3530. // Initialize the other fields in the GroupAffinityNodeInfo structure.
  3531. //
  3532. GroupAffinityNodeInfo.pGroup = pGroup;
  3533. GroupAffinityNodeInfo.fDidPruningOccur = FALSE;
  3534. //
  3535. // Enumerate all the groups and rule out nodes that host groups with the supplied
  3536. // anti-affinity property set.
  3537. //
  3538. OmEnumObjects ( ObjectTypeGroup,
  3539. FmpCheckForAntiAffinityProperty,
  3540. pGroup->lpszAntiAffinityClassName,
  3541. &GroupAffinityNodeInfo );
  3542. //
  3543. // No pruning occurred so far. So, don't proceed further and let the caller decide on
  3544. // a best node for the group using some other algorithm.
  3545. //
  3546. if ( GroupAffinityNodeInfo.fDidPruningOccur == FALSE )
  3547. {
  3548. goto FnExit;
  3549. }
  3550. //
  3551. // Now, pick the first node from the list that is a valid node.
  3552. //
  3553. for ( i=0; i<ClusterDefaultMaxNodes; i++ )
  3554. {
  3555. if ( GroupAffinityNodeInfo.ppNmNodeList[i] != NULL )
  3556. {
  3557. pNode = GroupAffinityNodeInfo.ppNmNodeList[i];
  3558. ClRtlLogPrint(LOG_NOISE, "[FM] FmpGetNodeNotHostingUndesiredGroups: Choosing node %1!d! for group %2!ws! [%3!ws!]...\n",
  3559. NmGetNodeId(pNode),
  3560. OmObjectId(pGroup),
  3561. OmObjectName(pGroup));
  3562. goto FnExit;
  3563. }
  3564. } // for
  3565. FnExit:
  3566. LocalFree( GroupAffinityNodeInfo.ppNmNodeList );
  3567. return( pNode );
  3568. } // FmpGetNodeNotHostingUndesiredGroups
  3569. BOOL
  3570. FmpCheckForAntiAffinityProperty(
  3571. IN LPCWSTR lpszAntiAffinityClassName,
  3572. IN PGROUP_AFFINITY_NODE_INFO pGroupAffinityNodeInfo,
  3573. IN PFM_GROUP pGroup,
  3574. IN LPCWSTR lpszGroupName
  3575. )
  3576. /*++
  3577. Routine Description:
  3578. Remove a node from the supplied node list if it hosts the supplied group with the supplied
  3579. anti-affinity property set.
  3580. Arguments:
  3581. lpszAntiAffinityClassName - The name property to check for.
  3582. pGroupAffinityNodeInfo - Structure containing a list of nodes that is to be pruned possibly.
  3583. pGroup - Supplies the group.
  3584. lpszGroupName - Supplies the group's name.
  3585. Return Value:
  3586. TRUE - to indicate that the enumeration should continue.
  3587. FALSE - to indicate that the enumeration should not continue.
  3588. --*/
  3589. {
  3590. DWORD i;
  3591. //
  3592. // If the supplied group has the anti-affinity property not set or if it has the
  3593. // property set but is not same as the one we are checking against or if it is same
  3594. // as the group we are interested in placing, then just return specifying that the
  3595. // enum should continue.
  3596. //
  3597. if ( ( pGroup->lpszAntiAffinityClassName == NULL ) ||
  3598. ( pGroup == pGroupAffinityNodeInfo->pGroup ) ||
  3599. ( lstrcmp ( lpszAntiAffinityClassName, pGroup->lpszAntiAffinityClassName ) != 0 ) )
  3600. {
  3601. goto FnExit;
  3602. }
  3603. //
  3604. // If you reached here, this means that the supplied group has the anti-affinity property
  3605. // set and is same as the property we are checking against. So, prune the node list.
  3606. //
  3607. for ( i=0; i<ClusterDefaultMaxNodes; i++ )
  3608. {
  3609. if ( ( pGroupAffinityNodeInfo->ppNmNodeList[i] != NULL ) &&
  3610. ( pGroup->OwnerNode == pGroupAffinityNodeInfo->ppNmNodeList[i] ) )
  3611. {
  3612. ClRtlLogPrint(LOG_NOISE, "[FM] FmpCheckForAntiAffinityProperty: Pruning node %1!d! for group %2!ws! due to "
  3613. "group %3!ws!, AntiAffinityClassName=%4!ws!...\n",
  3614. NmGetNodeId(pGroupAffinityNodeInfo->ppNmNodeList[i]),
  3615. OmObjectId(pGroupAffinityNodeInfo->pGroup),
  3616. OmObjectId(pGroup),
  3617. lpszAntiAffinityClassName);
  3618. pGroupAffinityNodeInfo->ppNmNodeList[i] = NULL;
  3619. //
  3620. // Mark that pruning was attempted.
  3621. //
  3622. pGroupAffinityNodeInfo->fDidPruningOccur = TRUE;
  3623. goto FnExit;
  3624. } // if
  3625. } // for
  3626. FnExit:
  3627. return( TRUE );
  3628. } // FmpCheckForAntiAffinityProperty
  3629. PNM_NODE
  3630. FmpPickNodeFromPreferredListAtRandom(
  3631. IN PFM_GROUP pGroup,
  3632. IN PNM_NODE pSuggestedPreferredNode OPTIONAL,
  3633. IN BOOL fRuleOutLocalNode,
  3634. IN BOOL fCheckForDisablingRandomization
  3635. )
  3636. /*++
  3637. Routine Description:
  3638. Find a preferred node for the group that is UP in a random fashion.
  3639. Arguments:
  3640. pGroup - Pointer to the group object we're interested in.
  3641. pSuggestedPreferredNode - Suggested fallback option in case this random result is undesired. OPTIONAL
  3642. fRuleOutLocalNode - Should the local node be ruled out from consideration.
  3643. fCheckForDisablingRandomization - Check whether randomization should be disabled.
  3644. Return Value:
  3645. The preferred node that is picked.
  3646. NULL if a node cannot be not found.
  3647. Comments:
  3648. This function is called from both FmpMoveGroup as well as from FmpNodeDown. In the former case,
  3649. we will have a non-NULL suggested preferred node, rule out local node option, check
  3650. for property setting disabling randomization and check for mixed mode clusters to disable
  3651. randomization. In the latter case, these parameters are the opposite.
  3652. --*/
  3653. {
  3654. UUID uuId;
  3655. USHORT usHashValue;
  3656. PNM_NODE pNode = NULL, pSelectedNode = pSuggestedPreferredNode;
  3657. DWORD dwNodeId;
  3658. DWORD dwRetry = 0;
  3659. DWORD dwStatus;
  3660. DWORD dwDisabled = 0;
  3661. DWORD dwClusterHighestVersion;
  3662. //
  3663. // Chittur Subbaraman (chitturs) - 4/18/2001
  3664. //
  3665. if ( fCheckForDisablingRandomization )
  3666. {
  3667. //
  3668. // If you are here, this means you are coming as a part of a user-initiated move.
  3669. // Check whether the randomization applies.
  3670. //
  3671. //
  3672. // First, check if are operating in a mixed version cluster. If so, don't randomize.
  3673. //
  3674. NmGetClusterOperationalVersion( &dwClusterHighestVersion,
  3675. NULL,
  3676. NULL );
  3677. if ( CLUSTER_GET_MAJOR_VERSION( dwClusterHighestVersion ) <
  3678. NT51_MAJOR_VERSION )
  3679. {
  3680. return ( pSelectedNode );
  3681. }
  3682. //
  3683. // Next check if the user has turned off the randomization algorithm by setting
  3684. // HKLM\Cluster\DisableGroupPreferredOwnersRandomization DWORD to 1.
  3685. //
  3686. dwStatus = DmQueryDword( DmClusterParametersKey,
  3687. CLUSREG_NAME_DISABLE_GROUP_PREFERRED_OWNER_RANDOMIZATION,
  3688. &dwDisabled,
  3689. NULL );
  3690. if ( ( dwStatus == ERROR_SUCCESS ) &&
  3691. ( dwDisabled == 1 ) )
  3692. {
  3693. return ( pSelectedNode );
  3694. }
  3695. }
  3696. //
  3697. // This function will attempt to pick a node at random from the group's preferred owners list
  3698. // in case the caller does not suggest a preferred node which is set by the user. So, first
  3699. // this function checks this case and bails out if the condition is met. Otherwise, it
  3700. // will generate a random number between 1 and NmMaxNodeId and see if (a) that node is in
  3701. // the group's preferred list, and (b) that node is UP. If so, it picks up the node. Note
  3702. // that the function will try 10 times to pick a node and then gives up. If no
  3703. // node is found, this function will return the suggested node which in some cases could be
  3704. // NULL.
  3705. //
  3706. ClRtlLogPrint(LOG_NOISE, "[FM] FmpPickNodeFromPreferredListAtRandom: Picking node for group %1!ws! [%2!ws!], suggested node %3!u!...\n",
  3707. OmObjectId(pGroup),
  3708. OmObjectName(pGroup),
  3709. (pSuggestedPreferredNode == NULL) ? 0:NmGetNodeId(pSuggestedPreferredNode));
  3710. if ( ( pSuggestedPreferredNode != NULL ) &&
  3711. ( FmpIsNodeUserPreferred ( pGroup, pSuggestedPreferredNode ) ) )
  3712. {
  3713. ClRtlLogPrint(LOG_NOISE, "[FM] FmpPickNodeFromPreferredListAtRandom: Node %2!u! for group %1!ws! is user preferred...\n",
  3714. OmObjectId(pGroup),
  3715. NmGetNodeId(pSuggestedPreferredNode));
  3716. goto FnExit;
  3717. }
  3718. if ( pGroup->lpszAntiAffinityClassName != NULL )
  3719. {
  3720. ClRtlLogPrint(LOG_NOISE, "[FM] FmpPickNodeFromPreferredListAtRandom: Group %1!ws! has antiaffinity property set...\n",
  3721. OmObjectId(pGroup));
  3722. goto FnExit;
  3723. }
  3724. //
  3725. // Retry 25 times so that we can have a good chance of getting a valid node. Note that we
  3726. // supply NmMaxNodeId to the srand() function and its value is equal to the node limit of
  3727. // 16. So, to get a valid node in a smaller size cluster, we have to have the retry count
  3728. // to be reasonable.
  3729. //
  3730. while ( dwRetry++ < 25 )
  3731. {
  3732. dwStatus = UuidFromString( ( LPWSTR ) OmObjectId(pGroup), &uuId );
  3733. if ( dwStatus != RPC_S_OK )
  3734. {
  3735. ClRtlLogPrint(LOG_CRITICAL, "[FM] FmpPickNodeFromPreferredListAtRandom: Unable to get UUID from string %1!ws!, Status %2!u!...\n",
  3736. OmObjectId(pGroup),
  3737. dwStatus);
  3738. goto FnExit;
  3739. }
  3740. usHashValue = UuidHash( &uuId, &dwStatus );
  3741. if ( dwStatus != RPC_S_OK )
  3742. {
  3743. ClRtlLogPrint(LOG_CRITICAL, "[FM] FmpPickNodeFromPreferredListAtRandom: Unable to get hash value for UUID %1!ws!, Status %2!u!...\n",
  3744. OmObjectId(pGroup),
  3745. dwStatus);
  3746. goto FnExit;
  3747. }
  3748. //
  3749. // Seed the random number generate with a value that is as random as it gets.
  3750. //
  3751. srand( GetTickCount() * usHashValue * ( dwRetry + 1 ) );
  3752. //
  3753. // Find the node ID that is between ClusterMinNodeId and NmMaxNodeId. We use NmMaxNodeId
  3754. // here since there is no simple way to get the count of configured nodes. Note that we
  3755. // have to ensure that the node ID falls within this range, otherwise assertion trips
  3756. // in NmReferenceNodeById.
  3757. //
  3758. dwNodeId = ( DWORD ) ( ( double ) rand() / ( double ) ( RAND_MAX ) * NmMaxNodeId ) + 1;
  3759. if ( dwNodeId > NmMaxNodeId ) dwNodeId = NmMaxNodeId;
  3760. if ( dwNodeId < ClusterMinNodeId ) dwNodeId = ClusterMinNodeId;
  3761. //
  3762. // In case the caller asks you to rule out local node, do so.
  3763. //
  3764. if ( ( fRuleOutLocalNode ) && ( dwNodeId == NmLocalNodeId ) ) continue;
  3765. //
  3766. // Reference and dereference the node objects. Note that we are only interested in
  3767. // getting a pointer to the node object and we use the fact that the node in the preferred
  3768. // list must be referenced.
  3769. //
  3770. pNode = NmReferenceNodeById ( dwNodeId );
  3771. if ( pNode == NULL ) continue;
  3772. if ( ( FmpInPreferredList( pGroup, pNode, FALSE, NULL ) ) &&
  3773. ( NmGetExtendedNodeState( pNode ) == ClusterNodeUp ) )
  3774. {
  3775. pSelectedNode = pNode;
  3776. break;
  3777. }
  3778. OmDereferenceObject ( pNode );
  3779. pNode = NULL;
  3780. }// while
  3781. FnExit:
  3782. if ( pNode != NULL ) OmDereferenceObject ( pNode );
  3783. ClRtlLogPrint(LOG_NOISE, "[FM] FmpPickNodeFromPreferredListAtRandom: Selected node %2!u! for group %1!ws!...\n",
  3784. OmObjectId(pGroup),
  3785. (pSelectedNode == NULL) ? 0:NmGetNodeId(pSelectedNode));
  3786. return ( pSelectedNode );
  3787. }// FmpPickNodeFromPreferredNodeAtRandom
  3788. BOOL
  3789. FmpIsNodeUserPreferred(
  3790. IN PFM_GROUP pGroup,
  3791. IN PNM_NODE pPreferredNode
  3792. )
  3793. /*++
  3794. Routine Description:
  3795. Check whether the supplied node is set as a preferred node by the user.
  3796. Arguments:
  3797. pGroup - Pointer to the group object we're interested in.
  3798. pPreferredNode - Preferred node to check for.
  3799. Return Value:
  3800. TRUE - The supplied preferred node is user set.
  3801. FALSE otherwise
  3802. --*/
  3803. {
  3804. DWORD dwStatus;
  3805. BOOL fPreferredByUser = FALSE;
  3806. LPWSTR lpmszPreferredNodeList = NULL;
  3807. LPCWSTR lpszPreferredNode;
  3808. DWORD cbPreferredNodeList = 0;
  3809. DWORD cbBuffer = 0;
  3810. DWORD dwIndex;
  3811. PNM_NODE pNode;
  3812. //
  3813. // Look for any preferred owners set by the user
  3814. //
  3815. dwStatus = DmQueryMultiSz( pGroup->RegistryKey,
  3816. CLUSREG_NAME_GRP_PREFERRED_OWNERS,
  3817. &lpmszPreferredNodeList,
  3818. &cbBuffer,
  3819. &cbPreferredNodeList );
  3820. if ( dwStatus != ERROR_SUCCESS )
  3821. {
  3822. goto FnExit;
  3823. }
  3824. //
  3825. // Parse the multisz and check whether the supplied node exists in the list
  3826. //
  3827. for ( dwIndex = 0; ; dwIndex++ )
  3828. {
  3829. lpszPreferredNode = ClRtlMultiSzEnum( lpmszPreferredNodeList,
  3830. cbPreferredNodeList/sizeof(WCHAR),
  3831. dwIndex );
  3832. if ( lpszPreferredNode == NULL )
  3833. {
  3834. break;
  3835. }
  3836. pNode = OmReferenceObjectById( ObjectTypeNode,
  3837. lpszPreferredNode );
  3838. if ( pNode == NULL )
  3839. {
  3840. ClRtlLogPrint(LOG_CRITICAL, "[FM] FmpIsNodeUserPreferred: Unable to reference node %1!ws!, Status %2!u!...\n",
  3841. lpszPreferredNode,
  3842. dwStatus);
  3843. continue;
  3844. }
  3845. if ( pNode == pPreferredNode )
  3846. {
  3847. fPreferredByUser = TRUE;
  3848. OmDereferenceObject ( pNode );
  3849. break;
  3850. }
  3851. OmDereferenceObject ( pNode );
  3852. } // for
  3853. FnExit:
  3854. LocalFree ( lpmszPreferredNodeList );
  3855. return ( fPreferredByUser );
  3856. }// FmpIsNodeUserPreferred
  3857. DWORD
  3858. FmpPrepareGroupNodeList(
  3859. OUT PFM_GROUP_NODE_LIST *ppGroupNodeList
  3860. )
  3861. /*++
  3862. Routine Description:
  3863. Prepares a buffer containing the group ID and preferred owner node ID of all groups.
  3864. Arguments:
  3865. ppGroupNodeList - Pointer to a buffer containing group IDs and preferred nodes.
  3866. Return Value:
  3867. ERROR_SUCCESS on success
  3868. Win32 error code otherwise
  3869. --*/
  3870. {
  3871. DWORD cbBuffer = 512; // Let us try a 512 byte buffer to start with.
  3872. DWORD dwStatus;
  3873. DWORD dwDisabled = 0;
  3874. //
  3875. // First check if the user has turned off the randomization algorithm by setting
  3876. // HKLM\Cluster\DisableGroupPreferredOwnersRandomization DWORD to 1.
  3877. //
  3878. dwStatus = DmQueryDword( DmClusterParametersKey,
  3879. CLUSREG_NAME_DISABLE_GROUP_PREFERRED_OWNER_RANDOMIZATION,
  3880. &dwDisabled,
  3881. NULL );
  3882. if ( ( dwStatus == ERROR_SUCCESS ) &&
  3883. ( dwDisabled == 1 ) )
  3884. {
  3885. dwStatus = ERROR_CLUSTER_INVALID_REQUEST;
  3886. return ( dwStatus );
  3887. }
  3888. //
  3889. // This function allocates contiguous memory for a list so that the entire buffer
  3890. // can be passed on to GUM.
  3891. //
  3892. *ppGroupNodeList = LocalAlloc( LPTR, cbBuffer );
  3893. if ( *ppGroupNodeList == NULL )
  3894. {
  3895. dwStatus = GetLastError();
  3896. ClRtlLogPrint(LOG_CRITICAL, "[FM] FmpPrepareGroupNodeList: Memory alloc failed, Status %1!u!...\n",
  3897. dwStatus);
  3898. return ( dwStatus );
  3899. }
  3900. //
  3901. // Initialize the size of the list to the size of the header minus first element.
  3902. //
  3903. ( *ppGroupNodeList )->cbGroupNodeList = sizeof ( FM_GROUP_NODE_LIST ) -
  3904. sizeof ( FM_GROUP_NODE_LIST_ENTRY );
  3905. //
  3906. // Enumerate all the groups, find a possibly random preferred owner for each group and
  3907. // return all the info in the buffer.
  3908. //
  3909. return OmEnumObjects ( ObjectTypeGroup,
  3910. FmpAddGroupNodeToList,
  3911. ppGroupNodeList,
  3912. &cbBuffer );
  3913. }// FmpPrepareGroupNodeList
  3914. DWORD
  3915. FmpAddGroupNodeToList(
  3916. IN PFM_GROUP_NODE_LIST *ppGroupNodeList,
  3917. IN LPDWORD pcbBuffer,
  3918. IN PFM_GROUP pGroup,
  3919. IN LPCWSTR lpszGroupId
  3920. )
  3921. /*++
  3922. Routine Description:
  3923. Find a random preferred owner for the given group and add the info to a buffer.
  3924. Arguments:
  3925. ppGroupNodeList - Pointer to a buffer containing group IDs and preferred nodes.
  3926. pcbBuffer - Size of the buffer.
  3927. pGroup - Group whose preferred node is to be found.
  3928. lpszGroupId - ID of the group.
  3929. Return Value:
  3930. ERROR_SUCCESS on success
  3931. Win32 error code otherwise
  3932. --*/
  3933. {
  3934. PNM_NODE pNode;
  3935. PFM_GROUP_NODE_LIST_ENTRY pGroupNodeListEntry;
  3936. PFM_GROUP_NODE_LIST pBuffer;
  3937. PLIST_ENTRY pListEntry;
  3938. DWORD dwStatus;
  3939. //
  3940. // Get the group lock since you manipulate group lists here.
  3941. //
  3942. FmpAcquireLocalGroupLock ( pGroup );
  3943. //
  3944. // Skip the quorum group since we cannot randomize its preferred owners list since MM has a
  3945. // choke hold on the placement of quorum group.
  3946. //
  3947. if ( pGroup == gpQuoResource->Group ) goto FnExit;
  3948. //
  3949. // Try to pick a preferred node list for the group at random.
  3950. //
  3951. pNode = FmpPickNodeFromPreferredListAtRandom( pGroup,
  3952. NULL, // No suggested preferred owner
  3953. FALSE, // Can choose local node
  3954. FALSE ); // Check whether randomization should be
  3955. // disabled
  3956. //
  3957. // If no node could be picked, bail out
  3958. //
  3959. if ( pNode == NULL ) goto FnExit;
  3960. //
  3961. // Check whether the allocated buffer is big enough to hold the new entry. Note that the
  3962. // RHS of the equality need not contain the NULL char size since we allocate 1 WCHAR for it in
  3963. // the FM_GROUP_NODE_LIST_ENTRY structure. Also, note that we have to see if the current
  3964. // buffer size is big enough to hold the padding for DWORD alignment.
  3965. //
  3966. if ( *pcbBuffer < ( ( *ppGroupNodeList )->cbGroupNodeList +
  3967. ( sizeof ( FM_GROUP_NODE_LIST_ENTRY ) +
  3968. lstrlenW ( lpszGroupId ) * sizeof ( WCHAR ) +
  3969. sizeof ( DWORD ) - 1
  3970. ) & ~( sizeof ( DWORD ) - 1 )
  3971. ) )
  3972. {
  3973. //
  3974. // Reallocate a bigger buffer
  3975. //
  3976. pBuffer = LocalAlloc( LPTR, 2 * ( *pcbBuffer ) );
  3977. if ( pBuffer == NULL )
  3978. {
  3979. dwStatus = GetLastError();
  3980. ClRtlLogPrint(LOG_CRITICAL, "[FM] FmpAddGroupNodeToList: Memory alloc failed, Status %1!u!...\n",
  3981. dwStatus);
  3982. goto FnExit;
  3983. }
  3984. ( *pcbBuffer ) *= 2;
  3985. //
  3986. // Copy the contents of the old list to the new list.
  3987. //
  3988. CopyMemory( pBuffer, *ppGroupNodeList, ( *ppGroupNodeList )->cbGroupNodeList );
  3989. LocalFree ( *ppGroupNodeList );
  3990. *ppGroupNodeList = pBuffer;
  3991. }
  3992. //
  3993. // Find the pointer to the beginning of the new list entry
  3994. //
  3995. pGroupNodeListEntry = ( PFM_GROUP_NODE_LIST_ENTRY )
  3996. ( ( LPBYTE ) ( *ppGroupNodeList ) +
  3997. ( *ppGroupNodeList )->cbGroupNodeList );
  3998. //
  3999. // Adjust the size of the list. As above, size of NULL char is excluded. Align the length
  4000. // to a multiple of DWORD since we want the PFM_GROUP_NODE_LIST_ENTRY structure to be
  4001. // DWORD aligned since the structure starts with a DWORD.
  4002. //
  4003. ( *ppGroupNodeList )->cbGroupNodeList += ( sizeof ( FM_GROUP_NODE_LIST_ENTRY ) +
  4004. lstrlenW ( lpszGroupId ) * sizeof ( WCHAR ) +
  4005. sizeof ( DWORD ) - 1 ) & ~( sizeof ( DWORD ) - 1 );
  4006. //
  4007. // Set the contents of the list entry
  4008. //
  4009. pGroupNodeListEntry->dwPreferredNodeId = NmGetNodeId ( pNode );
  4010. lstrcpy( pGroupNodeListEntry->szGroupId, lpszGroupId );
  4011. FnExit:
  4012. FmpReleaseLocalGroupLock( pGroup );
  4013. return ( TRUE );
  4014. }// FmpPrepareGroupNodeList
  4015. PNM_NODE
  4016. FmpParseGroupNodeListForPreferredOwner(
  4017. IN PFM_GROUP pGroup,
  4018. IN PFM_GROUP_NODE_LIST pGroupNodeList,
  4019. IN PNM_NODE pSuggestedPreferredNode
  4020. )
  4021. /*++
  4022. Routine Description:
  4023. Parse the supplied group node list looking for a preferred node for the supplied group.
  4024. Arguments:
  4025. pGroup - The group whose preferred node must be found.
  4026. pGroupNodeList - The list contains preferred nodes of the group.
  4027. pSuggestedPreferredNode - Suggested preferred node fallback option.
  4028. Return Value:
  4029. The preferred node for the group.
  4030. --*/
  4031. {
  4032. PNM_NODE pSelectedNode = pSuggestedPreferredNode;
  4033. PFM_GROUP_NODE_LIST_ENTRY pGroupNodeListEntry;
  4034. BOOL fFoundGroup = FALSE;
  4035. PNM_NODE pNode = NULL;
  4036. DWORD dwStatus;
  4037. DWORD cbGroupNodeList;
  4038. //
  4039. // If the suggested node is user preferred or if it has an anti-affinity class name
  4040. // property set, don't do anything else. Just return the suggested owner.
  4041. //
  4042. if ( ( FmpIsNodeUserPreferred ( pGroup, pSuggestedPreferredNode ) ) ||
  4043. ( pGroup->lpszAntiAffinityClassName != NULL ) )
  4044. {
  4045. ClRtlLogPrint(LOG_NOISE, "[FM] FmpParseGroupNodeListForPreferredOwner: Node %2!u! for group %1!ws! is user preferred/antiaffinity property set...\n",
  4046. OmObjectId(pGroup),
  4047. NmGetNodeId(pSuggestedPreferredNode));
  4048. goto FnExit;
  4049. }
  4050. cbGroupNodeList = sizeof ( FM_GROUP_NODE_LIST ) -
  4051. sizeof ( FM_GROUP_NODE_LIST_ENTRY );
  4052. //
  4053. // Walk the supplied list looking for the group entry.
  4054. //
  4055. while ( cbGroupNodeList < pGroupNodeList->cbGroupNodeList )
  4056. {
  4057. pGroupNodeListEntry = ( PFM_GROUP_NODE_LIST_ENTRY ) ( ( LPBYTE ) pGroupNodeList +
  4058. cbGroupNodeList );
  4059. if ( lstrcmp( pGroupNodeListEntry->szGroupId, OmObjectId( pGroup ) ) == 0 )
  4060. {
  4061. fFoundGroup = TRUE;
  4062. break;
  4063. }
  4064. cbGroupNodeList += ( sizeof ( FM_GROUP_NODE_LIST_ENTRY ) +
  4065. lstrlenW ( pGroupNodeListEntry->szGroupId ) * sizeof ( WCHAR ) +
  4066. sizeof ( DWORD ) - 1 ) & ~( sizeof ( DWORD ) - 1 );
  4067. } // while
  4068. //
  4069. // Fallback to the suggested option if:
  4070. // (1) You did not find the group in the list
  4071. // (2) The preferred node for the group is invalid in the list
  4072. // (3) The preferred node for the group is down
  4073. //
  4074. if ( fFoundGroup == FALSE )
  4075. {
  4076. ClRtlLogPrint(LOG_NOISE, "[FM] FmpParseGroupNodeListForPreferredOwner: Did not find group %1!ws! in supplied list...\n",
  4077. OmObjectId(pGroup));
  4078. goto FnExit;
  4079. }
  4080. if ( ( pGroupNodeListEntry->dwPreferredNodeId == 0 ) ||
  4081. ( pGroupNodeListEntry->dwPreferredNodeId > NmMaxNodeId ) )
  4082. {
  4083. ClRtlLogPrint(LOG_NOISE, "[FM] FmpParseGroupNodeListForPreferredOwner: Invalid node %1!u! for group %1!ws! in supplied list...\n",
  4084. pGroupNodeListEntry->dwPreferredNodeId,
  4085. OmObjectId(pGroup));
  4086. goto FnExit;
  4087. }
  4088. pNode = NmReferenceNodeById( pGroupNodeListEntry->dwPreferredNodeId );
  4089. if ( pNode == NULL )
  4090. {
  4091. dwStatus = GetLastError();
  4092. ClRtlLogPrint(LOG_UNUSUAL, "[FM] FmpParseGroupNodeListForPreferredOwner: Unable to reference node %1!u! for group %1!ws!, Status %3!u!...\n",
  4093. pGroupNodeListEntry->dwPreferredNodeId,
  4094. OmObjectId(pGroup),
  4095. dwStatus);
  4096. goto FnExit;
  4097. }
  4098. if ( NmGetNodeState( pNode ) != ClusterNodeUp )
  4099. {
  4100. ClRtlLogPrint(LOG_UNUSUAL, "[FM] FmpParseGroupNodeListForPreferredOwner: Preferred node %1!u! for group %1!ws! is not UP...\n",
  4101. pGroupNodeListEntry->dwPreferredNodeId,
  4102. OmObjectId(pGroup));
  4103. goto FnExit;
  4104. }
  4105. pSelectedNode = pNode;
  4106. ClRtlLogPrint(LOG_NOISE, "[FM] FmpParseGroupNodeListForPreferredOwner: Selected node %1!u! for group %2!ws! from supplied randomized list...\n",
  4107. pGroupNodeListEntry->dwPreferredNodeId,
  4108. OmObjectId(pGroup));
  4109. FnExit:
  4110. //
  4111. // Dereference the node object since we depend on the original reference added to the
  4112. // group's preferred owner when it was added to the group structure.
  4113. //
  4114. if ( pNode != NULL ) OmDereferenceObject( pNode );
  4115. return ( pSelectedNode );
  4116. }// FmpParseGroupNodeListForPreferredOwner