Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1721 lines
52 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. grouparb.c
  5. Abstract:
  6. Cluster group arbitration and sorting routines.
  7. Author:
  8. Rod Gamache (rodga) 8-Mar-1996
  9. Revision History:
  10. --*/
  11. #include "fmp.h"
  12. #define LOG_MODULE GROUPARB
  13. //
  14. // Global data
  15. //
  16. //
  17. // Local function prototypes
  18. //
  19. typedef struct FM_GROUP_ENUM_DATA {
  20. DWORD Allocated;
  21. LPCWSTR pszOwnerNodeId;
  22. BOOL QuorumGroup;
  23. } FM_GROUP_ENUM_DATA, *PFM_GROUP_ENUM_DATA;
  24. BOOL
  25. FmpEnumGroups(
  26. IN OUT PGROUP_ENUM *Enum,
  27. IN PFM_GROUP_ENUM_DATA EnumData,
  28. IN PFM_GROUP Group,
  29. IN LPCWSTR Name
  30. );
  31. BOOL
  32. FmpEqualGroupLists(
  33. IN PGROUP_ENUM Group1,
  34. IN PGROUP_ENUM Group2
  35. );
  36. int
  37. _cdecl
  38. SortCompare(
  39. IN const void * Elem1,
  40. IN const void * Elem2
  41. );
  42. DWORD
  43. FmpEnumSortGroups(
  44. OUT PGROUP_ENUM *ReturnEnum,
  45. IN OPTIONAL LPCWSTR pszOwnerNodeId,
  46. OUT PBOOL QuorumGroup
  47. )
  48. /*++
  49. Routine Description:
  50. Enumerates and sorts the list of Groups.
  51. Arguments:
  52. ReturnEnum - Returns the requested objects.
  53. pszOwnerNodeId - If present, supplies the owner node to filter
  54. the list of groups. (i.e. if you supply this, you
  55. get a list of groups owned by that node)
  56. If not present, all groups are returned.
  57. QuorumGroup - Returns TRUE if the quorum resource in one of the groups
  58. returned in the ENUM.
  59. Return Value:
  60. ERROR_SUCCESS if successful.
  61. Win32 error code on error.
  62. --*/
  63. {
  64. DWORD status;
  65. PGROUP_ENUM groupEnum = NULL;
  66. FM_GROUP_ENUM_DATA EnumData;
  67. EnumData.Allocated = ENUM_GROW_SIZE;
  68. EnumData.pszOwnerNodeId = pszOwnerNodeId;
  69. EnumData.QuorumGroup = FALSE;
  70. groupEnum = LocalAlloc(LMEM_FIXED, GROUP_SIZE(ENUM_GROW_SIZE));
  71. if ( groupEnum == NULL ) {
  72. status = ERROR_NOT_ENOUGH_MEMORY;
  73. goto error_exit;
  74. }
  75. groupEnum->EntryCount = 0;
  76. //
  77. // Enumerate all groups, sort with Quorum Group first in the list.
  78. //
  79. OmEnumObjects(ObjectTypeGroup,
  80. FmpEnumGroups,
  81. &groupEnum,
  82. &EnumData);
  83. *ReturnEnum = groupEnum;
  84. *QuorumGroup = EnumData.QuorumGroup;
  85. return(ERROR_SUCCESS);
  86. error_exit:
  87. if ( groupEnum != NULL ) {
  88. LocalFree( groupEnum );
  89. }
  90. *ReturnEnum = NULL;
  91. *QuorumGroup = FALSE;
  92. return(status);
  93. } // FmpEnumSortGroups
  94. DWORD
  95. FmpGetGroupListState(
  96. PGROUP_ENUM GroupEnum
  97. )
  98. /*++
  99. Routine Description:
  100. This routine gets the Group state for each of the Groups in the list.
  101. Arguments:
  102. GroupEnum - The list of Groups we now own.
  103. Returns:
  104. ERROR_SUCCESS if successful.
  105. Win32 error code on failure.
  106. --*/
  107. {
  108. PFM_GROUP group;
  109. DWORD i;
  110. for ( i = 0; i < GroupEnum->EntryCount; i++ ) {
  111. group = OmReferenceObjectById( ObjectTypeGroup,
  112. GroupEnum->Entry[i].Id );
  113. if ( group == NULL ) {
  114. return(ERROR_GROUP_NOT_FOUND);
  115. }
  116. ClRtlLogPrint( LOG_NOISE,
  117. "[FM] GetGroupListState, Group <%1!ws!> state = %2!d!\n",
  118. OmObjectName(group), group->State );
  119. if ( (group->State == ClusterGroupFailed) ||
  120. (group->State == ClusterGroupPartialOnline) ) {
  121. GroupEnum->Entry[i].State = ClusterGroupOnline;
  122. } else {
  123. GroupEnum->Entry[i].State = group->State;
  124. }
  125. OmDereferenceObject( group );
  126. }
  127. return(ERROR_SUCCESS);
  128. } // FmpGetGroupListState
  129. DWORD
  130. FmpOnlineGroupList(
  131. IN PGROUP_ENUM GroupEnum,
  132. IN BOOL bPrepareQuoForOnline
  133. )
  134. /*++
  135. Routine Description:
  136. Brings online all Groups in the Enum list. If the quorum group
  137. is present in the list, then it must be first.
  138. Arguments:
  139. GroupEnum - The list of Groups to bring online.
  140. bPrepareQuoForOnline - Indicates whether the quorum resource should be
  141. forced prepared for onlining
  142. Returns:
  143. ERROR_SUCCESS if successful.
  144. Win32 error code on failure.
  145. --*/
  146. {
  147. PFM_GROUP group;
  148. DWORD status = ERROR_SUCCESS;
  149. int i;
  150. int iQuoGroup=-1;
  151. //
  152. // see if the quorum group is present in the list.
  153. //
  154. if ( NmGetNodeId(NmLocalNode) == NmGetNodeId( gpQuoResource->Group->OwnerNode ) )
  155. {
  156. for ( i = 0; (DWORD)i < GroupEnum->EntryCount; i++ )
  157. {
  158. if (!lstrcmpW(OmObjectId(gpQuoResource->Group), GroupEnum->Entry[i].Id))
  159. {
  160. iQuoGroup = i;
  161. break;
  162. }
  163. }
  164. }
  165. //if quorum group was found, bring it online first. It would normally
  166. //be first in the list.
  167. //the quorum group online must return success, or invalid state
  168. //because of the online pending quorum resource.
  169. //if the quorum resource needs to be brought online, it must
  170. //be brought into online or online pending state. This is
  171. // not required in fix quorum mode.
  172. if (iQuoGroup != -1)
  173. {
  174. ClRtlLogPrint(LOG_NOISE,
  175. "[FM] FmpOnlineGroupList: bring quorum group online\n");
  176. status = FmpOnlineGroupFromList(GroupEnum, iQuoGroup, bPrepareQuoForOnline);
  177. if ( status != ERROR_SUCCESS && status != ERROR_IO_PENDING)
  178. {
  179. ClRtlLogPrint(LOG_NOISE,
  180. "[FM] FmpOnlineGroupFromList: quorum online returned %1!u!.\n",
  181. status );
  182. CL_LOGFAILURE(status);
  183. }
  184. }
  185. // bring the non-quorum groups online
  186. for ( i = 0; (DWORD)i < GroupEnum->EntryCount; i++ )
  187. {
  188. //quorum resource should be online now
  189. if (i != iQuoGroup)
  190. FmpOnlineGroupFromList(GroupEnum, i, bPrepareQuoForOnline);
  191. }
  192. return(status);
  193. } // FmpOnlineGroupList
  194. DWORD FmpOnlineGroupFromList(
  195. IN PGROUP_ENUM GroupEnum,
  196. IN DWORD Index,
  197. IN BOOL bPrepareQuoForOnline
  198. )
  199. {
  200. PFM_GROUP group;
  201. DWORD status=ERROR_SUCCESS; //assume success
  202. PLIST_ENTRY listEntry;
  203. PFM_RESOURCE resource;
  204. group = OmReferenceObjectById( ObjectTypeGroup,
  205. GroupEnum->Entry[Index].Id );
  206. //
  207. // If we fail to find a group, then just continue.
  208. //
  209. if ( group == NULL ) {
  210. status = ERROR_GROUP_NOT_FOUND;
  211. return(status);
  212. }
  213. FmpAcquireLocalGroupLock( group );
  214. if (group->OwnerNode != NmLocalNode) {
  215. FmpReleaseLocalGroupLock( group );
  216. OmDereferenceObject(group);
  217. return (ERROR_HOST_NODE_NOT_RESOURCE_OWNER);
  218. }
  219. ClRtlLogPrint(LOG_NOISE,
  220. "[FM] FmpOnlineGroupFromList: Previous group state for %1!ws! is %2!u!\r\n",
  221. OmObjectId(group), GroupEnum->Entry[Index].State);
  222. //
  223. // First make sure the group has completed initialization.
  224. //
  225. FmpCompleteInitGroup( group );
  226. //
  227. // First check if the Group failed to initialize. If so,
  228. // then attempt a failover immediately.
  229. //
  230. if ( GroupEnum->Entry[Index].State == ClusterGroupPartialOnline ) {
  231. GroupEnum->Entry[Index].State = ClusterGroupOnline;
  232. }
  233. if (!bPrepareQuoForOnline)
  234. {
  235. //
  236. // Normalize the state of each resource within the group.
  237. // except the quorum resource - this is because at initialization
  238. // we dont want to touch the quorum resource
  239. //
  240. for ( listEntry = group->Contains.Flink;
  241. listEntry != &(group->Contains);
  242. listEntry = listEntry->Flink ) {
  243. resource = CONTAINING_RECORD(listEntry, FM_RESOURCE, ContainsLinkage);
  244. if ( !resource->QuorumResource ) {
  245. // don't touch the quorum resource
  246. switch ( resource->State ) {
  247. // all active resources should be brought online.
  248. case ClusterResourceOnlinePending:
  249. case ClusterResourceOfflinePending:
  250. case ClusterResourceOnline:
  251. resource->State = ClusterResourceOffline;
  252. break;
  253. default:
  254. // otherwise do nothing
  255. break;
  256. }
  257. }
  258. }
  259. }
  260. FmpSignalGroupWaiters( group );
  261. if ( group->InitFailed ) {
  262. //
  263. // Bring the Group online... and then fail it!
  264. //
  265. ClRtlLogPrint(LOG_NOISE,
  266. "[FM] FmpOnlineGroupFromList: group->InitFailed is true for %1!ws!\n",
  267. OmObjectId(group));
  268. status = FmpOnlineGroup( group, FALSE );
  269. ClusterEvent( CLUSTER_EVENT_GROUP_FAILED, group );
  270. OmReferenceObject( group );
  271. FmpPostWorkItem( FM_EVENT_GROUP_FAILED, group, 0 );
  272. } else if ((group->PersistentState == ClusterGroupOnline) ||
  273. (GroupEnum->Entry[Index].State == ClusterGroupOnline) ||
  274. FmpIsAnyResourcePersistentStateOnline( group ) ) {
  275. //
  276. // Chittur Subbaraman (chitturs) - 01/07/2001
  277. //
  278. // Now bring the Group online if that is it's current state or if any one of the
  279. // resources in the group has an online persistent state. The third check is
  280. // required since it is possible for a group to have a persistent state of ClusterGroupOffline,
  281. // a state of ClusterGroupOffline and yet one or more resources in the group has a persistent
  282. // state of ClusterResourceOnline. This happens for a group in which the client never ever
  283. // calls OnlineGroup but calls OnlineResource for one or more resources in the group and you
  284. // reached this call either at the cluster service startup time or as a part of node down
  285. // processing when the source node died just after the group became ClusterGroupOffline
  286. // and before the destination node brought the appropriate resources within the group online.
  287. // In such a case, we still want to bring each resource that has a persistent state of
  288. // ClusterResourceOnline to online state. Note that it is tricky to muck with the group
  289. // persistent state in an OnlineResource call due to atomicity issues (we really need a
  290. // transaction to update both group and resource persistent states in one shot) and also
  291. // due to the fuzzy definition of group persistent state when the group has some resources
  292. // online and some offline.
  293. //
  294. ClRtlLogPrint(LOG_NOISE,
  295. "[FM] FmpOnlineGroupFromList: trying to bring group %1!ws! online\n",
  296. OmObjectId(group));
  297. status = FmpOnlineGroup( group, FALSE );
  298. if (status == ERROR_QUORUM_RESOURCE_ONLINE_FAILED)
  299. {
  300. PRESOURCE_ENUM pResourceEnum;
  301. // This fn is either called at startup or during
  302. // a node down event on claiming a group - so we must
  303. // try our darn best to bring resources
  304. // online after a quorum resource failure
  305. // With quorum resource failure the failure policy is
  306. // not invoked for resources so something must try to bring
  307. // these resources online. This is why we are adding this
  308. // here
  309. //
  310. // Get the list of resources in the group and their states.
  311. //
  312. status = FmpGetResourceList( &pResourceEnum, group );
  313. if ( status == ERROR_SUCCESS )
  314. {
  315. //submit a timer callback to try and bring these resources
  316. //online
  317. //the worker thread will clean up the resource list
  318. FmpSubmitRetryOnline(pResourceEnum, group);
  319. }
  320. }
  321. }
  322. FmpReleaseLocalGroupLock( group );
  323. OmDereferenceObject( group );
  324. return(status);
  325. } // FmpOnlineGroupFromList
  326. DWORD
  327. FmpOnlineResourceFromList(
  328. IN PRESOURCE_ENUM ResourceEnum,
  329. IN PFM_GROUP pGroup
  330. )
  331. /*++
  332. Routine Description:
  333. Brings online all resources in the Enum list.
  334. Arguments:
  335. ResourceEnum - The list of resources to bring online.
  336. Comments : This function is called from the worker thread. We
  337. dont assume that the resource hasnt changed groups since the
  338. work item was posted. The local resource lock is acquired and
  339. released for each resource.
  340. Returns:
  341. ERROR_SUCCESS if successful.
  342. Win32 error code on failure.
  343. --*/
  344. {
  345. PFM_RESOURCE resource;
  346. DWORD status;
  347. DWORD returnStatus = ERROR_SUCCESS;
  348. DWORD i;
  349. if ( !FmpFMOnline ||
  350. FmpShutdown ) {
  351. return(ERROR_INVALID_STATE);
  352. }
  353. //log an event saying we are trying on online a group
  354. if (pGroup)
  355. FmpLogGroupInfoEvent1( FM_EVENT_GROUP_START_ONLINE, OmObjectName(pGroup));
  356. // if the quorum resource is contained in here, bring it online first
  357. if (ResourceEnum->ContainsQuorum >= 0)
  358. {
  359. CL_ASSERT((DWORD)ResourceEnum->ContainsQuorum < ResourceEnum->EntryCount);
  360. resource = OmReferenceObjectById( ObjectTypeResource,
  361. ResourceEnum->Entry[ResourceEnum->ContainsQuorum].Id );
  362. // the resource should not vanish, we are holding the group lock after all
  363. CL_ASSERT(resource != NULL);
  364. //
  365. // If we fail to find a resource, then just continue
  366. //
  367. if ( resource != NULL ) {
  368. //acquire the local resource lock
  369. FmpAcquireLocalResourceLock(resource);
  370. ClRtlLogPrint(LOG_NOISE,
  371. "[FM] FmpOnlineResourceFromList: Previous quorum resource state for %1!ws! is %2!u!\r\n",
  372. OmObjectId(resource), ResourceEnum->Entry[ResourceEnum->ContainsQuorum].State);
  373. if ( (ResourceEnum->Entry[ResourceEnum->ContainsQuorum].State == ClusterResourceOnline) ||
  374. (ResourceEnum->Entry[ResourceEnum->ContainsQuorum].State == ClusterResourceFailed) ) {
  375. //
  376. // Now bring the resource online if that is it's current state.
  377. //
  378. ClRtlLogPrint(LOG_NOISE,
  379. "[FM] FmpOnlineResourceFromList: trying to bring quorum resource %1!ws! online, state %2!u!\n",
  380. OmObjectId(resource),
  381. resource->State);
  382. status = FmpOnlineResource( resource, FALSE );
  383. if ( status != ERROR_SUCCESS ) {
  384. returnStatus = status;
  385. }
  386. }
  387. OmDereferenceObject( resource );
  388. FmpReleaseLocalResourceLock(resource);
  389. }
  390. }
  391. // SS::: TODO what happens to the persistent state of the
  392. // other resources - is it handled correctly - note that this is
  393. // called on moving a group
  394. // Will the restart policy do the right thing in terms of bringing
  395. // them online
  396. // if the quorum resource has failed, dont bother trying
  397. // to bring the rest of the resourcess online
  398. if ((returnStatus != ERROR_SUCCESS) && (returnStatus != ERROR_IO_PENDING))
  399. {
  400. FmpSubmitRetryOnline(ResourceEnum, pGroup);
  401. goto FnExit;
  402. }
  403. // bring online all of the other resources
  404. for ( i = 0; i < ResourceEnum->EntryCount; i++ ) {
  405. resource = OmReferenceObjectById( ObjectTypeResource,
  406. ResourceEnum->Entry[i].Id );
  407. //
  408. // If we fail to find a resource, then just continue.
  409. //
  410. if ( resource == NULL ) {
  411. status = ERROR_RESOURCE_NOT_FOUND;
  412. continue;
  413. }
  414. FmpAcquireLocalResourceLock(resource);
  415. //if the resource has been marked for delete, then dont let
  416. //it be brought online
  417. if (!IS_VALID_FM_RESOURCE(resource))
  418. {
  419. FmpReleaseLocalResourceLock( resource );
  420. OmDereferenceObject(resource);
  421. continue;
  422. }
  423. //quorum resource has already been handled
  424. if (resource->QuorumResource)
  425. {
  426. FmpReleaseLocalResourceLock( resource );
  427. OmDereferenceObject(resource);
  428. continue;
  429. }
  430. ClRtlLogPrint(LOG_NOISE,
  431. "[FM] FmpOnlineResourceFromList: Previous resource state for %1!ws! is %2!u!\r\n",
  432. OmObjectId(resource), ResourceEnum->Entry[i].State);
  433. if ( (ResourceEnum->Entry[i].State == ClusterResourceOnline) ||
  434. (ResourceEnum->Entry[i].State == ClusterResourceFailed) )
  435. {
  436. //
  437. // Now bring the resource online if that is it's current state.
  438. //
  439. ClRtlLogPrint(LOG_NOISE,
  440. "[FM] FmpOnlineResourceFromList: trying to bring resource %1!ws! online\n",
  441. OmObjectId(resource));
  442. status = FmpOnlineResource( resource, FALSE );
  443. if ( returnStatus == ERROR_SUCCESS )
  444. {
  445. returnStatus = status;
  446. }
  447. //if this resource didnt come online because the quorum resource
  448. //didnt come online, dont bother bringing the other resources online
  449. //just a waste of time
  450. if (status == ERROR_QUORUM_RESOURCE_ONLINE_FAILED)
  451. {
  452. //submit a timer callback to try and bring these resources
  453. //online
  454. FmpReleaseLocalResourceLock( resource );
  455. OmDereferenceObject( resource );
  456. FmpSubmitRetryOnline(ResourceEnum, pGroup);
  457. break;
  458. }
  459. }
  460. FmpReleaseLocalResourceLock( resource );
  461. OmDereferenceObject( resource );
  462. }
  463. FnExit:
  464. if (returnStatus == ERROR_IO_PENDING)
  465. {
  466. if (pGroup)
  467. pGroup->dwStructState |= FM_GROUP_STRUCT_MARKED_FOR_COMPLETION_EVENT;
  468. //the failed or success event will get logged later on
  469. }
  470. else if (returnStatus == ERROR_SUCCESS)
  471. {
  472. if (pGroup)
  473. FmpLogGroupInfoEvent1( FM_EVENT_GROUP_COMPLETE_ONLINE, OmObjectName(pGroup));
  474. }
  475. else
  476. {
  477. //SS: log an event to say that the online process failed
  478. if (pGroup)
  479. FmpLogGroupInfoEvent1( FM_EVENT_GROUP_FAILED_ONLINE_OFFLINE, OmObjectName(pGroup));
  480. }
  481. ClRtlLogPrint(LOG_NOISE,
  482. "[FM] FmpOnlineResourceFromList: Exit, status=%1!u!\r\n",
  483. returnStatus);
  484. return(returnStatus);
  485. } // FmpOnlineResourceFromList
  486. BOOL
  487. FmpEqualGroupLists(
  488. IN PGROUP_ENUM Group1,
  489. IN PGROUP_ENUM Group2
  490. )
  491. /*++
  492. Routine Description:
  493. This routine verifies that two group lists are equal.
  494. Arguments:
  495. Group1 - The first group to compare.
  496. Group2 - The second group to compare.
  497. Returns:
  498. TRUE - if the two lists are equal.
  499. FALSE - otherwise.
  500. --*/
  501. {
  502. DWORD i;
  503. if ( (Group1 == NULL) ||
  504. (Group2 == NULL) ) {
  505. ClRtlLogPrint(LOG_NOISE,"[FM] One of the Group lists is NULL for equality check\n");
  506. return(FALSE);
  507. }
  508. if ( Group1->EntryCount != Group2->EntryCount ) {
  509. ClRtlLogPrint(LOG_NOISE,"[FM] Group entry counts not equal! Left: %1!u!, Right: %2!u!.\n",
  510. Group1->EntryCount, Group2->EntryCount);
  511. return(FALSE);
  512. }
  513. for ( i = 0; i < Group1->EntryCount; i++ ) {
  514. if ( lstrcmpiW(Group1->Entry[i].Id, Group2->Entry[i].Id) != 0 ) {
  515. ClRtlLogPrint(LOG_NOISE,"[FM] Group Lists do not have same names!\n");
  516. return(FALSE);
  517. }
  518. }
  519. return(TRUE);
  520. } // FmpEqualGroupLists
  521. BOOL
  522. FmpEnumGroups(
  523. IN OUT PGROUP_ENUM *Enum,
  524. IN PFM_GROUP_ENUM_DATA EnumData,
  525. IN PFM_GROUP Group,
  526. IN LPCWSTR Id
  527. )
  528. /*++
  529. Routine Description:
  530. Worker callback routine for the enumeration of Groups.
  531. This routine adds the specified Group to the list that is being
  532. generated.
  533. Arguments:
  534. Enum - The Group Enumeration list. Can be an output if a new list is
  535. allocated.
  536. EnumData - Supplies the current enumeration data structure.
  537. Group - The Group object being enumerated.
  538. Id - The Id of the Group object being enumerated.
  539. Returns:
  540. TRUE - to indicate that the enumeration should continue.
  541. Side Effects:
  542. Makes the quorum group first in the list.
  543. --*/
  544. {
  545. PGROUP_ENUM groupEnum;
  546. PGROUP_ENUM newEnum;
  547. DWORD newAllocated;
  548. DWORD index;
  549. LPWSTR newId;
  550. LPWSTR tmpId;
  551. DWORD status;
  552. PFM_RESOURCE quorumResource;
  553. //HACKHACK::
  554. //SS: Since this is invoked from within a gum call and
  555. // the owner node is changed only within a gum call
  556. // we wont acquire locks.
  557. // there is a window if the dead node is the source of a
  558. // move and if it does a move after it is declared dead by
  559. // other nodes, the target of move and the fmpassignownerstogroup
  560. // might both land up bringing the group online on two nodes
  561. // However, if we could be guaranteed virtual synchrony, then
  562. // the target of move wouldnt accept calls from a dead node and
  563. // we wont land up in this soup. Now, it is upto the xport layer
  564. // to provide this guarantee.
  565. // For now we acquire no locks
  566. //FmpAcquireLocalGroupLock( Group );
  567. if ((EnumData->pszOwnerNodeId != NULL) &&
  568. (lstrcmp(EnumData->pszOwnerNodeId, OmObjectId(Group->OwnerNode))) &&
  569. ((Group->pIntendedOwner == NULL) ||
  570. (lstrcmp(EnumData->pszOwnerNodeId, OmObjectId(Group->pIntendedOwner))))) {
  571. //
  572. // This group does not match the owner criteria
  573. //
  574. //FmpReleaseLocalGroupLock( Group );
  575. return(TRUE);
  576. }
  577. //FmpReleaseLocalGroupLock( Group );
  578. groupEnum = *Enum;
  579. if ( groupEnum->EntryCount >= EnumData->Allocated ) {
  580. //
  581. // Time to grow the GROUP_ENUM
  582. //
  583. newAllocated = EnumData->Allocated + ENUM_GROW_SIZE;
  584. newEnum = LocalAlloc(LMEM_FIXED, GROUP_SIZE(newAllocated));
  585. if ( newEnum == NULL ) {
  586. return(FALSE);
  587. }
  588. CopyMemory(newEnum, groupEnum, GROUP_SIZE(EnumData->Allocated));
  589. EnumData->Allocated = newAllocated;
  590. *Enum = newEnum;
  591. LocalFree(groupEnum);
  592. groupEnum = newEnum;
  593. }
  594. //
  595. // Initialize new entry
  596. //
  597. newId = LocalAlloc(LMEM_FIXED, (lstrlenW(Id)+1) * sizeof(WCHAR));
  598. if ( newId == NULL ) {
  599. CsInconsistencyHalt(ERROR_NOT_ENOUGH_MEMORY);
  600. }
  601. lstrcpyW(newId, Id);
  602. //
  603. // Find the quorum resource, and see if it is this group.
  604. //
  605. status = FmFindQuorumResource( &quorumResource );
  606. if ( status != ERROR_SUCCESS ) {
  607. CsInconsistencyHalt(status);
  608. }
  609. groupEnum->Entry[groupEnum->EntryCount].Id = newId;
  610. if ( quorumResource->Group == Group ) {
  611. // found the quorum resource group, put it first in the list.
  612. tmpId = groupEnum->Entry[0].Id;
  613. groupEnum->Entry[0].Id = newId;
  614. groupEnum->Entry[groupEnum->EntryCount].Id = tmpId;
  615. EnumData->QuorumGroup = TRUE;
  616. }
  617. ++groupEnum->EntryCount;
  618. OmDereferenceObject( quorumResource );
  619. return(TRUE);
  620. } // FmpEnumGroups
  621. DWORD
  622. FmpClaimAllGroups(
  623. PGROUP_ENUM MyGroups
  624. )
  625. /*++
  626. Routine Description:
  627. Takes ownership of all the groups defined in the cluster. This
  628. is used when a new cluster is being formed.
  629. Arguments:
  630. None.
  631. Return Value:
  632. ERROR_SUCCESS if successful
  633. Win32 errorcode otherwise
  634. --*/
  635. {
  636. //
  637. // Bring online any Group that needs to be online.
  638. //
  639. FmpOnlineGroupList( MyGroups, FALSE );
  640. return(ERROR_SUCCESS);
  641. }
  642. VOID
  643. FmpDeleteEnum(
  644. IN PGROUP_ENUM Enum
  645. )
  646. /*++
  647. Routine Description:
  648. This routine deletes an GROUP_ENUM and associated name strings.
  649. Arguments:
  650. Enum - The GROUP_ENUM to delete. This pointer can be NULL.
  651. Returns:
  652. None.
  653. Notes:
  654. This routine will take a NULL input pointer and just return.
  655. --*/
  656. {
  657. PGROUP_ENUM_ENTRY enumEntry;
  658. DWORD i;
  659. if ( Enum == NULL ) {
  660. return;
  661. }
  662. for ( i = 0; i < Enum->EntryCount; i++ ) {
  663. enumEntry = &Enum->Entry[i];
  664. LocalFree(enumEntry->Id);
  665. }
  666. LocalFree(Enum);
  667. return;
  668. } // FmpDeleteEnum
  669. /****
  670. @func VOID | FmpPrepareGroupForOnline| This routine sets the Group
  671. up for onlining it on this node post a failure of a node
  672. or at initialization.
  673. @parm IN PFM_GROUP | pGroup| A pointer to the group.
  674. @comm The group lock must be held. Except when called at bootstrapping
  675. by FmBringQuorumOnline.
  676. MUST BE CALLED ONLY BY THE OWNER NODE OF THE GROUP.
  677. @rdesc returns ERROR_SUCCESS if succesful else w32 error code.
  678. MUST BE CALLED ONLY BY THE OWNER NODE OF THE GROUP.
  679. ****/
  680. VOID FmpPrepareGroupForOnline(
  681. IN PFM_GROUP pGroup
  682. )
  683. {
  684. PLIST_ENTRY pListEntry;
  685. PFM_RESOURCE pResource;
  686. pGroup->State = ClusterGroupOffline;
  687. ++pGroup->StateSequence;
  688. //
  689. // Mark offline all of the resources contained within this group.
  690. //
  691. for (pListEntry = pGroup->Contains.Flink;
  692. pListEntry != &pGroup->Contains;
  693. pListEntry = pListEntry->Flink)
  694. {
  695. pResource = CONTAINING_RECORD(pListEntry, FM_RESOURCE, ContainsLinkage);
  696. pResource->State = ClusterResourceOffline;
  697. ++pResource->StateSequence;
  698. }
  699. }
  700. /****
  701. @func DWORD | FmpSetGroupEnumOwner| This routine sets the Group
  702. owner for all Groups in the list.
  703. @parm IN PGROUP_ENUM | pGroupEnum| The list of Groups.
  704. @parm IN PNM_NODE | pDefaultOwnerNode | A pointer to the default owner
  705. node.
  706. @parm IN LPCWSTR | pszDeadNodeId | The ID of the node that died. If
  707. this routine is being called other wise, this is set to NULL.
  708. @parm IN BOOL | bQuorumGroup | set to TRUE if the quorum group is
  709. on the list of groups.
  710. @parm IN PFM_GROUP_NODE_LIST | pGroupNodeList | The randomized suggested preferred
  711. owner for all groups.
  712. @comm If the group was in the process of moving and had an intended
  713. owner and the intended owner is not dead, the intended owner is
  714. allowed to take care of the group. Else, the first node on the
  715. preferred list that is up is chosen as the owner. If no such
  716. node exits, then the ownership is assigned to the default owner
  717. provided. This routine is called by the forming node at
  718. initialization to claimownership of all groups and by the gum
  719. update procedure FmpUpdateAssignOwnerToGroups.
  720. @rdesc returns ERROR_SUCCESS if succesful else w32 error code.
  721. ****/
  722. DWORD
  723. FmpSetGroupEnumOwner(
  724. IN PGROUP_ENUM pGroupEnum,
  725. IN PNM_NODE pDefaultOwnerNode,
  726. IN LPCWSTR pszDeadNodeId,
  727. IN BOOL bQuorumGroup,
  728. IN PFM_GROUP_NODE_LIST pGroupNodeList
  729. )
  730. {
  731. PFM_GROUP pGroup;
  732. DWORD i;
  733. DWORD dwStatus = ERROR_SUCCESS;
  734. PNM_NODE pOwnerNode;
  735. for ( i = 0; i < pGroupEnum->EntryCount; i++ )
  736. {
  737. pGroup = OmReferenceObjectById( ObjectTypeGroup,
  738. pGroupEnum->Entry[i].Id );
  739. if ( pGroup == NULL )
  740. {
  741. ClRtlLogPrint(LOG_NOISE,
  742. "[FM] FmpSetGroupEnumOwner: Group %1!ws! not found\n",
  743. pGroupEnum->Entry[i].Id);
  744. dwStatus = ERROR_GROUP_NOT_FOUND;
  745. goto FnExit;
  746. }
  747. //
  748. // SS: HACKHACK : cant get the group lock within a gum update
  749. // FmpAcquireLocalGroupLock( pGroup );
  750. //
  751. // SS: In case of a node death, see if there was an intended owner
  752. // if the intended owner is set and if the intended owner is
  753. // not the one that died then we use the normal procedure
  754. // else we let the intended owner take care of the group.
  755. //
  756. // Chittur Subbaraman (chitturs) - 7/26/99
  757. //
  758. // Condition 2: Means the group was being moved and FmpTakeGroupRequest
  759. // has not taken 100% responsibility for the group.
  760. //
  761. // Condition 3: Means the source node crashed and NOT the destination node.
  762. //
  763. // Added condition 4 to cover the case in which the source node of
  764. // the move crashed AFTER setting the intended owner as the
  765. // destination node and BEFORE the FmpTakeGroupRequest has set
  766. // the group ownership to the destination node.
  767. //
  768. // If the group's owner node and the group's intended owner node are
  769. // not the same, then let this GUM handler take care of assigning
  770. // the group ownership. This means that the FmpTakeGroupRequest
  771. // has not yet set the ownership for the group to the destination
  772. // node of the move. Now, once this GUM handler sets the
  773. // ownership for the group and then resets the intended owner to
  774. // NULL, FmpTakeGroupRequest which could follow behind this GUM handler
  775. // will not succeed in setting the ownership to the local node and that
  776. // will just return doing nothing. This is TRUE only for an NT5 cluster.
  777. // For a mixed-mode cluster, all bets are off.
  778. //
  779. if ( (pszDeadNodeId) &&
  780. (pGroup->pIntendedOwner != NULL) &&
  781. (lstrcmp ( OmObjectId ( pGroup->pIntendedOwner ), pszDeadNodeId ) ) &&
  782. (pGroup->OwnerNode == pGroup->pIntendedOwner) )
  783. {
  784. //
  785. // Chittur Subbaraman (chitturs) - 7/27/99
  786. //
  787. // Looks like this code inside "if" will never ever be
  788. // executed. Keeping it so as to make the changes minimal.
  789. //
  790. ClRtlLogPrint(LOG_NOISE,
  791. "[FM] FmpSetGroupEnumOwner: Group %1!ws! will be handled by node %2!ws!\n",
  792. OmObjectId(pGroup), OmObjectId(pGroup->pIntendedOwner));
  793. continue;
  794. }
  795. //
  796. // Find first preferred node that is UP, if we can't find any use
  797. // default OwnerNode
  798. //
  799. //
  800. // If this is the quorum group, then use the node that was selected
  801. // by the MM layer. The quorum group is the first entry in the list
  802. // and the Boolean QuorumGroup must be TRUE!
  803. //
  804. if ( (i == 0) && bQuorumGroup )
  805. {
  806. DWORD dwOwnerNodeId;
  807. //for the quorum group find the node that had last
  808. //arbitrated for it.
  809. //We do this by asking MM about it.
  810. //If there was no arbitration during the last regroup
  811. //but there was one in the one before that one, the
  812. //node that arbitrated is returned.
  813. //This node should be able to online the group.
  814. //We use MMApproxArbitrationWinner instead if
  815. // MMGetArbitrationWinner() since multiple-regroups
  816. // might occur before the FM handles the node down
  817. // event for this node.
  818. MMApproxArbitrationWinner( &dwOwnerNodeId );
  819. ClRtlLogPrint(LOG_NOISE,
  820. "[FM] FmpSetGroupEnumOwner:: MM suggests node %1!u! for quorum owner\r\n",
  821. dwOwnerNodeId);
  822. if ( dwOwnerNodeId != MM_INVALID_NODE )
  823. {
  824. pOwnerNode = NmReferenceNodeById( dwOwnerNodeId );
  825. //
  826. // We can't proceed in this strange situation.
  827. //
  828. if ( pOwnerNode == NULL )
  829. {
  830. CsInconsistencyHalt ( ERROR_CLUSTER_INVALID_NODE );
  831. } else
  832. {
  833. //
  834. // Dereference the node object rightaway so that you don't increment the ref count
  835. // twice, one here and one again down below.
  836. //
  837. OmDereferenceObject ( pOwnerNode );
  838. }
  839. }
  840. else
  841. {
  842. ClRtlLogPrint(LOG_CRITICAL,
  843. "[FM] FmpSetGroupEnumOwner:: MM returned MM_INVALID_NODE, chose the default target\r\n");
  844. //else just use the default target
  845. pOwnerNode = pDefaultOwnerNode;
  846. }
  847. }
  848. else
  849. {
  850. pOwnerNode = FmpGetPreferredNode(pGroup);
  851. if ( pOwnerNode == NULL )
  852. {
  853. pOwnerNode = pDefaultOwnerNode;
  854. }
  855. //
  856. // If the caller (GUM) has supplied a randomized preferred owner of the group, then
  857. // see if it can be used.
  858. //
  859. if ( pGroupNodeList != NULL )
  860. {
  861. pOwnerNode = FmpParseGroupNodeListForPreferredOwner( pGroup,
  862. pGroupNodeList,
  863. pOwnerNode );
  864. }
  865. }
  866. if ( pGroup->OwnerNode != NULL )
  867. {
  868. OmDereferenceObject( pGroup->OwnerNode );
  869. }
  870. OmReferenceObject( pOwnerNode );
  871. pGroup->OwnerNode = pOwnerNode;
  872. ClRtlLogPrint(LOG_NOISE,
  873. "[FM] FmpSetGroupEnumOwner: Group's %1!ws! new owner is node %2!ws!\n",
  874. OmObjectId(pGroup), OmObjectId(pOwnerNode));
  875. //FmpReleaseLocalGroupLock( pGroup );
  876. OmDereferenceObject(pGroup);
  877. }
  878. FnExit:
  879. return(dwStatus);
  880. } // FmpSetGroupEnumOwner
  881. DWORD
  882. FmpAssignOwnersToGroups(
  883. IN LPCWSTR pszNodeId,
  884. IN PFM_GROUP pGroup,
  885. IN PFM_GROUP_NODE_LIST pGroupNodeList
  886. )
  887. /*++
  888. Routine Description:
  889. Takes ownership of all the groups defined in the cluster that
  890. are owned by another node. This is used when a node fails.
  891. The current algorithm is very dumb and simple. Node with the
  892. lowest ID gets all the groups.
  893. Arguments:
  894. pszNodeId - Supplies the node ID that all the groups should be taken
  895. from.
  896. pGroup - Supplies the group which alone is to be claimed.
  897. pGroupNodeList - The randomized suggested preferred owner for all groups.
  898. Return Value:
  899. ERROR_SUCCESS if successful
  900. Win32 errorcode otherwise
  901. --*/
  902. {
  903. DWORD i;
  904. DWORD dwStatus;
  905. PGROUP_ENUM pNodeGroups = NULL;
  906. PNM_NODE pDefaultTarget = NULL;
  907. PNM_NODE pPausedTarget = NULL;
  908. BOOL bQuorumGroup;
  909. //
  910. // Acquire the global group lock
  911. //
  912. FmpAcquireGroupLock();
  913. //
  914. // Check if groups are initialized
  915. //
  916. if ( !FmpFMGroupsInited )
  917. {
  918. dwStatus = ERROR_SUCCESS;
  919. goto FnExit;
  920. }
  921. //
  922. // Find and sort all known groups
  923. //
  924. if ( pGroup == NULL )
  925. {
  926. dwStatus = FmpEnumSortGroups(&pNodeGroups, pszNodeId, &bQuorumGroup);
  927. } else
  928. {
  929. //
  930. // Chittur Subbaraman (chitturs) - 6/7/99
  931. //
  932. // This means you got here due to an RPC exception raised in
  933. // FmpTakeGroupRequest. So, see where this sole group goes.
  934. //
  935. dwStatus = FmpGetGroupInNodeGroupList(&pNodeGroups, pGroup, pszNodeId, &bQuorumGroup);
  936. }
  937. if (dwStatus != ERROR_SUCCESS)
  938. {
  939. CL_ASSERT(pNodeGroups == NULL);
  940. goto FnExit;
  941. }
  942. CL_ASSERT(pNodeGroups != NULL);
  943. //if no nodes were owned by this node, just return
  944. if (pNodeGroups->EntryCount == 0)
  945. {
  946. FmpDeleteEnum(pNodeGroups);
  947. goto FnExit;
  948. }
  949. //
  950. // Find the state of the Groups.
  951. //
  952. FmpGetGroupListState( pNodeGroups );
  953. //
  954. // Find the active node with the lowest ID to be the default
  955. // owner of these groups.
  956. //
  957. // If we can't find an active node then select the lowest node id for
  958. // a node that is paused.
  959. //
  960. CL_ASSERT(NmMaxNodeId != ClusterInvalidNodeId);
  961. CL_ASSERT(Session != NULL);
  962. for (i=ClusterMinNodeId; i<=NmMaxNodeId; i++)
  963. {
  964. pDefaultTarget = NmReferenceNodeById(i);
  965. if ( pDefaultTarget != NULL )
  966. {
  967. //if this node is up, there is no need to use a paused target
  968. if ( NmGetNodeState(pDefaultTarget) == ClusterNodeUp )
  969. {
  970. if ( pPausedTarget )
  971. {
  972. OmDereferenceObject(pPausedTarget);
  973. pPausedTarget = NULL;
  974. }
  975. //found a node, leave this loop
  976. break;
  977. }
  978. //node is not up, check if it paused
  979. //if is is paused and no other paused node has been found
  980. //set this one to be the lowest paused node
  981. if ( !pPausedTarget &&
  982. (NmGetNodeState(pDefaultTarget) == ClusterNodePaused) )
  983. {
  984. pPausedTarget = pDefaultTarget;
  985. }
  986. else
  987. {
  988. OmDereferenceObject(pDefaultTarget);
  989. }
  990. pDefaultTarget = NULL;
  991. }
  992. }
  993. if ( (pDefaultTarget == NULL) && (pPausedTarget == NULL) ) {
  994. //
  995. // There are no online/paused nodes, this node must be paused,
  996. // so don't do anything.
  997. //
  998. ClRtlLogPrint(LOG_CRITICAL,
  999. "[FM] FmpAssignOwnersToGroups - no online/paused nodes remaining\n");
  1000. //SS: then what are we doing here
  1001. FmpDeleteEnum(pNodeGroups);
  1002. goto FnExit;
  1003. }
  1004. //if no node is up, use the lowest paused node as the default owner for
  1005. //the groups
  1006. if ( pDefaultTarget == NULL )
  1007. {
  1008. pDefaultTarget = pPausedTarget;
  1009. }
  1010. ClRtlLogPrint(LOG_NOISE,
  1011. "[FM] FmpAssignOwnersToGroups - DefaultTarget is %1!ws!\n",
  1012. OmObjectId(pDefaultTarget));
  1013. //
  1014. // Chittur Subbaraman (chitturs) - 7/20/99
  1015. //
  1016. // Prepare the entire group list for subsequent online. You have
  1017. // to do this here to have a consistent resource state view
  1018. // among different nodes in the cluster since this is the GUM
  1019. // handler. Also, the DM node down handler which follows this
  1020. // GUM handler may think that the quorum resource is owned by
  1021. // this node and its state is online while it has not been
  1022. // brought online on this node. Note also the order of this
  1023. // call and the call to set the group ownership. THIS ORDER
  1024. // MUST BE FOLLOWED since we don't hold any groups lock here
  1025. // (since we are paranoid about deadlocks) and we don't want
  1026. // the FmCheckQuorumState function called as a part of the
  1027. // DM node down handler to think that the group is owned by
  1028. // this node and is also online on this node.
  1029. //
  1030. FmpPrepareGroupEnumForOnline( pNodeGroups );
  1031. //
  1032. // Set the Group owner.
  1033. //
  1034. FmpSetGroupEnumOwner( pNodeGroups,
  1035. pDefaultTarget,
  1036. pszNodeId,
  1037. bQuorumGroup,
  1038. pGroupNodeList );
  1039. //
  1040. // Chittur Subbaraman (chitturs) - 5/26/99
  1041. //
  1042. // Clear the intended owner fields of all the groups. This is done
  1043. // since there is no guarantee that FmpTakeGroupRequest will do this.
  1044. //
  1045. FmpResetGroupIntendedOwner( pNodeGroups );
  1046. //
  1047. // Chittur Subbaraman (chitturs) - 7/14/99
  1048. //
  1049. // Handle the online of group list containing the quorum resource with
  1050. // a separate thread and let the worker thread handle group lists
  1051. // not containing the quorum resource. This is necessary since it is
  1052. // possible that this node can take ownership at roughly the same
  1053. // time of a quorum group and a non-quorum group each resident
  1054. // in a different node due to back-to-back node crashes. In such a
  1055. // case, we can't order these groups for online globally with the
  1056. // quorum group first in the list. So, we don't want the worker thread
  1057. // to be "stuck" in FmpRmOnlineResource for the non-quorum group's
  1058. // resource waiting for the quorum group to be brought online since
  1059. // the quorum group online work item is queued behind the non-quorum
  1060. // group online work item.
  1061. //
  1062. if ( bQuorumGroup )
  1063. {
  1064. HANDLE hThread = NULL;
  1065. DWORD dwThreadId;
  1066. ClRtlLogPrint(LOG_NOISE,
  1067. "[FM] FmpAssignOwnersToGroups - Create thread to handle group list containing quorum group....\n"
  1068. );
  1069. hThread = CreateThread( NULL,
  1070. 0,
  1071. FmpBringQuorumGroupListOnline,
  1072. pNodeGroups,
  1073. 0,
  1074. &dwThreadId );
  1075. if ( hThread == NULL )
  1076. {
  1077. CL_UNEXPECTED_ERROR( GetLastError() );
  1078. OmDereferenceObject( pDefaultTarget );
  1079. goto FnExit;
  1080. }
  1081. CloseHandle( hThread );
  1082. } else
  1083. {
  1084. ClRtlLogPrint(LOG_NOISE,
  1085. "[FM] FmpAssignOwnersToGroups - Post work item to worker thread to handle group list containing non-quorum groups....\n"
  1086. );
  1087. FmpPostWorkItem(FM_EVENT_INTERNAL_ONLINE_GROUPLIST, pNodeGroups, 0);
  1088. }
  1089. OmDereferenceObject(pDefaultTarget);
  1090. FnExit:
  1091. //
  1092. // Release the global group lock
  1093. //
  1094. FmpReleaseGroupLock();
  1095. return(ERROR_SUCCESS);
  1096. }
  1097. /****
  1098. @func DWORD | FmpResetGroupIntendedOwner| This routine resets the
  1099. intended owner for all groups in the list.
  1100. @parm IN PGROUP_ENUM | pGroupEnum| The list of Groups.
  1101. @rdesc Returns ERROR_SUCCESS.
  1102. ****/
  1103. VOID
  1104. FmpResetGroupIntendedOwner(
  1105. IN PGROUP_ENUM pGroupEnum
  1106. )
  1107. {
  1108. DWORD i;
  1109. PFM_GROUP pGroup;
  1110. ClRtlLogPrint(LOG_NOISE,
  1111. "[FM] FmpResetGroupIntendedOwner: Entry.\n");
  1112. for ( i = 0; i < pGroupEnum->EntryCount; i++ )
  1113. {
  1114. pGroup = OmReferenceObjectById( ObjectTypeGroup,
  1115. pGroupEnum->Entry[i].Id );
  1116. if ( pGroup == NULL )
  1117. {
  1118. ClRtlLogPrint(LOG_UNUSUAL,
  1119. "[FM] FmpResetGroupIntendedOwner: Group %1!ws! not found\n");
  1120. continue;
  1121. }
  1122. pGroup->pIntendedOwner = NULL;
  1123. OmDereferenceObject( pGroup );
  1124. }
  1125. ClRtlLogPrint(LOG_NOISE,
  1126. "[FM] FmpResetGroupIntendedOwner: Exit.\n");
  1127. }
  1128. /****
  1129. @func DWORD | FmpGetGroupInNodeGroupList | This routine checks whether
  1130. the supplied group is to be included in the list to be brought
  1131. online.
  1132. @parm OUT PGROUP_ENUM | pReturnEnum | The group list possibly
  1133. containing the supplied group.
  1134. @parm IN PFM_GROUP | pGroup | The group which is to be brought online
  1135. possibly.
  1136. @parm IN LPCWSTR | pszDeadNodeId | The node ID of the dead node.
  1137. @parm OUT PBOOL | pbQuorumGroup | Does the group list contain the quorum group ?
  1138. @rdesc Returns ERROR_SUCCESS on success OR a Win32 error code on a
  1139. failure.
  1140. ****/
  1141. DWORD
  1142. FmpGetGroupInNodeGroupList(
  1143. OUT PGROUP_ENUM *pReturnEnum,
  1144. IN PFM_GROUP pGroup,
  1145. IN LPCWSTR pszDeadNodeId,
  1146. OUT PBOOL pbQuorumGroup
  1147. )
  1148. {
  1149. DWORD dwStatus = ERROR_SUCCESS;
  1150. PGROUP_ENUM pGroupEnum = NULL;
  1151. PFM_RESOURCE pQuoResource = NULL;
  1152. //
  1153. // Chittur Subbaraman (chitturs) - 6/7/99
  1154. //
  1155. // This function is only called if an RPC exception is raised in
  1156. // FmpTakeGroupRequest. This function will check to see whether this
  1157. // group is to be brought online in this node.
  1158. //
  1159. ClRtlLogPrint(LOG_NOISE,
  1160. "[FM] FmpGetGroupInNodeGroupList: Entry for group <%1!ws!>\n",
  1161. OmObjectId(pGroup));
  1162. *pbQuorumGroup = FALSE;
  1163. pGroupEnum = LocalAlloc( LPTR,
  1164. sizeof( GROUP_ENUM_ENTRY ) + sizeof( GROUP_ENUM ) );
  1165. if ( pGroupEnum == NULL )
  1166. {
  1167. dwStatus = ERROR_NOT_ENOUGH_MEMORY;
  1168. goto FnExit;
  1169. }
  1170. pGroupEnum->Entry[0].Id = pGroupEnum->Entry[1].Id = NULL;
  1171. pGroupEnum->EntryCount = 0;
  1172. //
  1173. // Check whether this group was in the dead node or was in the
  1174. // process of moving to the dead node.
  1175. //
  1176. if( ( pszDeadNodeId != NULL ) &&
  1177. ( lstrcmp ( pszDeadNodeId, OmObjectId ( pGroup->OwnerNode ) ) ) &&
  1178. ( ( pGroup->pIntendedOwner == NULL ) ||
  1179. ( ( lstrcmp ( pszDeadNodeId, OmObjectId ( pGroup->pIntendedOwner ) ) ) ) ) )
  1180. {
  1181. //
  1182. // This group does not match the owner criteria
  1183. //
  1184. dwStatus = ERROR_GROUP_NOT_AVAILABLE;
  1185. goto FnExit;
  1186. }
  1187. dwStatus = FmFindQuorumResource( &pQuoResource );
  1188. if ( dwStatus != ERROR_SUCCESS )
  1189. {
  1190. ClRtlLogPrint(LOG_CRITICAL,
  1191. "[FM] FmpGetGroupInNodeGroupList: Cannot find quorum resource, Status = %1!u!\n",
  1192. dwStatus);
  1193. CsInconsistencyHalt( dwStatus );
  1194. }
  1195. //
  1196. // Handle the quorum group first, if necessary. This is needed since
  1197. // otherwise you may not be able to bring the other group online.
  1198. //
  1199. if( ( pGroup != pQuoResource->Group ) &&
  1200. ( ( pszDeadNodeId == NULL ) ||
  1201. ( !lstrcmp ( pszDeadNodeId, OmObjectId ( pQuoResource->Group->OwnerNode ) ) ) ||
  1202. ( ( pQuoResource->Group->pIntendedOwner != NULL ) &&
  1203. ( !lstrcmp ( pszDeadNodeId, OmObjectId ( pQuoResource->Group->pIntendedOwner ) ) ) ) ) )
  1204. {
  1205. //
  1206. // The quorum group matches the owner criteria. Include it first
  1207. // in the list.
  1208. //
  1209. pGroupEnum->Entry[pGroupEnum->EntryCount].Id =
  1210. LocalAlloc( LMEM_FIXED, ( lstrlenW(OmObjectId(pQuoResource->Group)) + 1 ) * sizeof( WCHAR ) );
  1211. if ( pGroupEnum->Entry[pGroupEnum->EntryCount].Id == NULL )
  1212. {
  1213. dwStatus = ERROR_NOT_ENOUGH_MEMORY;
  1214. goto FnExit;
  1215. }
  1216. ClRtlLogPrint(LOG_NOISE,
  1217. "[FM] FmpGetGroupInNodeGroupList: Dead node contains quorum group also, including it...\n");
  1218. lstrcpyW( pGroupEnum->Entry[pGroupEnum->EntryCount].Id, OmObjectId( pQuoResource->Group ) );
  1219. pGroupEnum->EntryCount++;
  1220. *pbQuorumGroup = TRUE;
  1221. } else if ( pGroup == pQuoResource->Group )
  1222. {
  1223. *pbQuorumGroup = TRUE;
  1224. }
  1225. pGroupEnum->Entry[pGroupEnum->EntryCount].Id =
  1226. LocalAlloc( LMEM_FIXED, ( lstrlenW(OmObjectId(pGroup)) + 1 ) * sizeof( WCHAR ) );
  1227. if ( pGroupEnum->Entry[pGroupEnum->EntryCount].Id == NULL )
  1228. {
  1229. dwStatus = ERROR_NOT_ENOUGH_MEMORY;
  1230. goto FnExit;
  1231. }
  1232. lstrcpyW( pGroupEnum->Entry[pGroupEnum->EntryCount].Id, OmObjectId( pGroup ) );
  1233. pGroupEnum->EntryCount++;
  1234. *pReturnEnum = pGroupEnum;
  1235. OmDereferenceObject( pQuoResource );
  1236. ClRtlLogPrint(LOG_NOISE,
  1237. "[FM] FmpGetGroupInNodeGroupList: Exit with SUCCESS.\n");
  1238. return( ERROR_SUCCESS );
  1239. FnExit:
  1240. if ( pGroupEnum != NULL )
  1241. {
  1242. FmpDeleteEnum( pGroupEnum );
  1243. }
  1244. if ( pQuoResource != NULL )
  1245. {
  1246. OmDereferenceObject( pQuoResource );
  1247. }
  1248. *pReturnEnum = NULL;
  1249. ClRtlLogPrint(LOG_NOISE,
  1250. "[FM] FmpGetGroupInNodeGroupList: Exit, Status = %1!u!\n",
  1251. dwStatus);
  1252. return( dwStatus );
  1253. }
  1254. /****
  1255. @func VOID | FmpPrepareGroupEnumForOnline | Prepare a list of
  1256. groups for online.
  1257. @parm IN PGROUP_ENUM | pGroupEnum | The group list.
  1258. @rdesc None.
  1259. ****/
  1260. VOID
  1261. FmpPrepareGroupEnumForOnline(
  1262. IN PGROUP_ENUM pGroupEnum
  1263. )
  1264. {
  1265. PFM_GROUP pGroup = NULL;
  1266. DWORD i;
  1267. //
  1268. // Chittur Subbaraman (chitturs) - 6/21/99
  1269. //
  1270. // Prepare an entire group list for online.
  1271. //
  1272. ClRtlLogPrint(LOG_NOISE,
  1273. "[FM] FmpPrepareGroupEnumForOnline - Entry...\n");
  1274. for ( i=0; i<pGroupEnum->EntryCount; i++ )
  1275. {
  1276. pGroup = OmReferenceObjectById( ObjectTypeGroup,
  1277. pGroupEnum->Entry[i].Id );
  1278. //
  1279. // If we fail to find a group, then just continue.
  1280. //
  1281. if ( pGroup == NULL )
  1282. {
  1283. ClRtlLogPrint(LOG_UNUSUAL,
  1284. "[FM] FmpPrepareGroupEnumForOnline - Group %1!ws! cannot be found !\n",
  1285. pGroupEnum->Entry[i].Id);
  1286. continue;
  1287. }
  1288. ClRtlLogPrint(LOG_NOISE,
  1289. "[FM] FmpPrepareGroupEnumForOnline - Preparing group <%1!ws!> for online...\n",
  1290. pGroupEnum->Entry[i].Id);
  1291. FmpPrepareGroupForOnline( pGroup );
  1292. OmDereferenceObject ( pGroup );
  1293. }
  1294. ClRtlLogPrint(LOG_NOISE,
  1295. "[FM] FmpPrepareGroupEnumForOnline - Exit...\n");
  1296. }
  1297. /****
  1298. @func DWORD | FmpBringQuorumGroupListOnline | Bring a list of groups
  1299. containing the quorum group online.
  1300. @parm IN LPVOID | pContext | A pointer to the group list to be brought
  1301. online.
  1302. @rdesc Returns ERROR_SUCCESS.
  1303. ****/
  1304. DWORD
  1305. FmpBringQuorumGroupListOnline(
  1306. IN LPVOID pContext
  1307. )
  1308. {
  1309. PGROUP_ENUM pGroupList = NULL;
  1310. //
  1311. // Chittur Subbaraman (chitturs) - 7/14/99
  1312. //
  1313. // This function tries to bring a list of groups containing the quorum
  1314. // group online. Note that if the group's owner turns out to be some
  1315. // other node, this function will not online the group.
  1316. //
  1317. ClRtlLogPrint(LOG_NOISE,
  1318. "[FM] FmpBringQuorumGroupListOnline - Entry: Trying to online group list containing quorum group....\n"
  1319. );
  1320. pGroupList = pContext;
  1321. CL_ASSERT( pGroupList != NULL );
  1322. FmpOnlineGroupList( pGroupList, TRUE );
  1323. FmpDeleteEnum( pGroupList );
  1324. ClRtlLogPrint(LOG_NOISE,
  1325. "[FM] FmpBringQuorumGroupListOnline - Exit ....\n"
  1326. );
  1327. return( ERROR_SUCCESS );
  1328. }
  1329. /****
  1330. @func BOOL | FmpIsAnyResourcePersistentStateOnline | Is the persistent state of any
  1331. resource in the group online ?
  1332. @parm IN PFM_GROUP | pGroup | The group which is to be checked.
  1333. @rdesc TRUE if at least one resource's persistent state is ClusterResourceOnline, FALSE otherwise.
  1334. ****/
  1335. BOOL
  1336. FmpIsAnyResourcePersistentStateOnline(
  1337. IN PFM_GROUP pGroup
  1338. )
  1339. {
  1340. PFM_RESOURCE pResource;
  1341. PLIST_ENTRY pListEntry;
  1342. if ( CsNoQuorum ) return FALSE;
  1343. for ( pListEntry = pGroup->Contains.Flink;
  1344. pListEntry != &( pGroup->Contains );
  1345. pListEntry = pListEntry->Flink )
  1346. {
  1347. pResource = CONTAINING_RECORD( pListEntry,
  1348. FM_RESOURCE,
  1349. ContainsLinkage );
  1350. if ( pResource->PersistentState == ClusterResourceOnline )
  1351. {
  1352. ClRtlLogPrint(LOG_NOISE,
  1353. "[FM] FmpIsAnyResourcePersistentStateOnline: Persistent state of resource %1!ws! in group %2!ws! is online...\r\n",
  1354. OmObjectId(pResource),
  1355. OmObjectId(pGroup));
  1356. return ( TRUE );
  1357. }
  1358. } // for
  1359. ClRtlLogPrint(LOG_NOISE,
  1360. "[FM] FmpIsAnyResourcePersistentStateOnline: No resource in group %1!ws! has persistent state online...\r\n",
  1361. OmObjectId(pGroup));
  1362. return( FALSE );
  1363. } // FmpIsAnyResourcePersistentStateOnline