You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2781 lines
74 KiB
2781 lines
74 KiB
/*++
|
|
|
|
Copyright (c) 1996 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
fminit.c
|
|
|
|
Abstract:
|
|
|
|
Initialization for the Failover Manager component of the
|
|
NT Cluster Service
|
|
|
|
Author:
|
|
|
|
John Vert (jvert) 7-Feb-1996
|
|
Rod Gamache (rodga) 14-Mar-1996
|
|
|
|
|
|
Revision History:
|
|
|
|
--*/
|
|
#include "..\nm\nmp.h" /* For NmpEnumNodeDefinitions */
|
|
#ifdef LOG_CURRENT_MODULE
|
|
#undef LOG_CURRENT_MODULE
|
|
#endif
|
|
#include "fmp.h"
|
|
|
|
|
|
#define LOG_MODULE FMINIT
|
|
|
|
// The order in which the locks should be acquired is
|
|
// 1) gQuoChangeLock
|
|
// 2) GroupLock
|
|
// 3) gQuoLock
|
|
// 4) GumLocks
|
|
// 4*) gResTypeLock - this lock is acquired inside gum updates
|
|
// 5) gLockDmpRoot
|
|
// 6) pLog->Lock
|
|
|
|
|
|
//A lock for synchronizing online/offline with respect to the quorum
|
|
//resource
|
|
//This lock is held in exclusive mode when bringing the quorum resource
|
|
//online/offline and in shared mode when other resources are brought online
|
|
//offline
|
|
#if NO_SHARED_LOCKS
|
|
CRITICAL_SECTION gQuoLock;
|
|
#else
|
|
RTL_RESOURCE gQuoLock;
|
|
#endif
|
|
|
|
//A lock for synchronizing changes to the resource->quorumresource field
|
|
//and allowing changes to the quorum resource's group in form phase1
|
|
// and phase 2 of fm.
|
|
#if NO_SHARED_LOCKS
|
|
CRITICAL_SECTION gQuoChangeLock;
|
|
#else
|
|
RTL_RESOURCE gQuoChangeLock;
|
|
#endif
|
|
|
|
//A lock for synchronizing changes to the resource type field entries.
|
|
//shared by all resource types.
|
|
#if NO_SHARED_LOCKS
|
|
CRITICAL_SECTION gResTypeLock;
|
|
#else
|
|
RTL_RESOURCE gResTypeLock;
|
|
#endif
|
|
|
|
|
|
GUM_DISPATCH_ENTRY FmGumDispatchTable[] = {
|
|
{2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateChangeResourceName},
|
|
{2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateChangeGroupName},
|
|
{1, FmpUpdateDeleteResource},
|
|
{1, FmpUpdateDeleteGroup},
|
|
{2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateAddDependency},
|
|
{2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateRemoveDependency},
|
|
{1, FmpUpdateChangeClusterName},
|
|
{3, (PGUM_DISPATCH_ROUTINE1)FmpUpdateChangeQuorumResource},
|
|
{2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateResourceState},
|
|
{3, (PGUM_DISPATCH_ROUTINE1)FmpUpdateGroupState},
|
|
{4, (PGUM_DISPATCH_ROUTINE1)EpUpdateClusWidePostEvent},
|
|
{2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateGroupNode},
|
|
{3, (PGUM_DISPATCH_ROUTINE1)FmpUpdatePossibleNodeForResType},
|
|
{2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateGroupIntendedOwner},
|
|
{1, (PGUM_DISPATCH_ROUTINE1)FmpUpdateAssignOwnerToGroups},
|
|
{1, (PGUM_DISPATCH_ROUTINE1)FmpUpdateApproveJoin},
|
|
{2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateCompleteGroupMove},
|
|
{2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateCheckAndSetGroupOwner},
|
|
{2, (PGUM_DISPATCH_ROUTINE1)FmpUpdateUseRandomizedNodeListForGroups},
|
|
{5, (PGUM_DISPATCH_ROUTINE1)FmpUpdateChangeQuorumResource2},
|
|
};
|
|
|
|
|
|
#define WINDOW_TIMEOUT (15*60*1000) // Try every 15 minutes
|
|
|
|
//
|
|
// Global data initialized in this module
|
|
//
|
|
|
|
PRESMON FmpDefaultMonitor = NULL;
|
|
DWORD FmpInitialized = FALSE;
|
|
DWORD FmpFMOnline = FALSE;
|
|
DWORD FmpFMGroupsInited = FALSE;
|
|
DWORD FmpFMFormPhaseProcessing = FALSE; //this is set to true when form new cluster phase processing starts
|
|
BOOL FmpShutdown = FALSE;
|
|
BOOL FmpMajorEvent = FALSE; // Signals a major event while joining
|
|
DWORD FmpQuorumOnLine = FALSE;
|
|
|
|
HANDLE FmpShutdownEvent;
|
|
HANDLE FmpTimerThread;
|
|
|
|
HANDLE ghQuoOnlineEvent = NULL; // the event that is signalled when the quorum res is online
|
|
DWORD gdwQuoBlockingResources = 0; // the number of resources in pending stated which prevent the quorum res state change
|
|
|
|
PFM_NODE gFmpNodeArray = NULL;
|
|
|
|
// 185575: remove unique RPC binding handles
|
|
//CRITICAL_SECTION FmpBindingLock;
|
|
|
|
//
|
|
// Local functions
|
|
//
|
|
BOOL
|
|
FmpEnumNodes(
|
|
OUT DWORD *pStatus,
|
|
IN PVOID Context2,
|
|
IN PNM_NODE Node,
|
|
IN LPCWSTR Name
|
|
);
|
|
|
|
DWORD
|
|
FmpJoinPendingThread(
|
|
IN LPVOID Context
|
|
);
|
|
|
|
|
|
DWORD FmpGetJoinApproval();
|
|
|
|
static
|
|
DWORD
|
|
FmpBuildForceQuorumInfo(
|
|
IN LPCWSTR pszNodesIn,
|
|
OUT PCLUS_FORCE_QUORUM_INFO* ppForceQuorumInfo
|
|
);
|
|
|
|
static
|
|
void
|
|
FmpDeleteForceQuorumInfo(
|
|
IN OUT PCLUS_FORCE_QUORUM_INFO* ppForceQuorumInfo
|
|
);
|
|
|
|
|
|
DWORD
|
|
WINAPI
|
|
FmInitialize(
|
|
VOID
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Initializes the failover manager
|
|
|
|
Arguments:
|
|
|
|
None
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS if successful.
|
|
|
|
Win32 error code otherwise.
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD Status;
|
|
OM_OBJECT_TYPE_INITIALIZE ObjectTypeInit;
|
|
DWORD NodeId;
|
|
|
|
CL_ASSERT(!FmpInitialized);
|
|
|
|
if ( FmpInitialized ) {
|
|
return(ERROR_SUCCESS);
|
|
}
|
|
Status = EpRegisterEventHandler(CLUSTER_EVENT_ALL,FmpEventHandler);
|
|
if (Status != ERROR_SUCCESS) {
|
|
CsInconsistencyHalt( Status );
|
|
}
|
|
|
|
//register for synchronous node down notifications
|
|
Status = EpRegisterSyncEventHandler(CLUSTER_EVENT_NODE_DOWN_EX,
|
|
FmpSyncEventHandler);
|
|
|
|
if (Status != ERROR_SUCCESS){
|
|
CsInconsistencyHalt( Status );
|
|
}
|
|
|
|
//
|
|
// Initialize Critical Sections.
|
|
//
|
|
|
|
InitializeCriticalSection( &FmpResourceLock );
|
|
InitializeCriticalSection( &FmpGroupLock );
|
|
InitializeCriticalSection( &FmpMonitorLock );
|
|
|
|
//
|
|
// Initialize the monitor list head
|
|
//
|
|
InitializeListHead ( &g_leFmpMonitorListHead );
|
|
|
|
// 185575: remove unique RPC binding handles
|
|
// InitializeCriticalSection( &FmpBindingLock );
|
|
|
|
// initialize the quorum lock
|
|
// This is used to synchronize online/offlines of other resources
|
|
// with respect to the quorum resource
|
|
INITIALIZE_LOCK(gQuoLock);
|
|
//this is used to check/change the resource->quorum value
|
|
//This synchronization is needed between the resource transition
|
|
//processing that needs to do special processing for quorum
|
|
//resource and the gum update handler to change the quorum resource
|
|
INITIALIZE_LOCK(gQuoChangeLock);
|
|
|
|
//Initialize the restype lock
|
|
INITIALIZE_LOCK(gResTypeLock);
|
|
|
|
// create a unnamed event that is used for waiting for quorum resource
|
|
// to go online
|
|
// This is a manual reset event and is initialized to unsignalled state.
|
|
// When the quorum resource goes to pending state this is manually reset
|
|
// to unsignalled state. When the quorum resource goes online it is set
|
|
// to signalled state
|
|
ghQuoOnlineEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
|
|
if (!ghQuoOnlineEvent)
|
|
{
|
|
CL_UNEXPECTED_ERROR((Status = GetLastError()));
|
|
return(Status);
|
|
|
|
}
|
|
|
|
gFmpNodeArray = (PFM_NODE)LocalAlloc(LMEM_FIXED,
|
|
(sizeof(FM_NODE) * (NmGetMaxNodeId() + 1))
|
|
);
|
|
|
|
if (gFmpNodeArray == NULL) {
|
|
Status = ERROR_NOT_ENOUGH_MEMORY;
|
|
CL_UNEXPECTED_ERROR(Status);
|
|
CsInconsistencyHalt(Status);
|
|
return(Status);
|
|
}
|
|
|
|
//initialize it and the RPC binding table
|
|
for (NodeId = ClusterMinNodeId; NodeId <= NmMaxNodeId; ++NodeId)
|
|
{
|
|
FmpRpcBindings[NodeId] = NULL;
|
|
FmpRpcQuorumBindings[NodeId] = NULL;
|
|
gFmpNodeArray[NodeId].dwNodeDownProcessingInProgress = 0;
|
|
gFmpNodeArray[NodeId].dwNodeDownProcessingThreadId = 0;
|
|
}
|
|
|
|
//
|
|
// Initialize the FM work queue.
|
|
//
|
|
Status = ClRtlInitializeQueue( &FmpWorkQueue );
|
|
if (Status != ERROR_SUCCESS) {
|
|
CsInconsistencyHalt(Status);
|
|
return(Status);
|
|
}
|
|
|
|
//
|
|
// Create a pending event notification.
|
|
//
|
|
FmpShutdownEvent = CreateEvent( NULL, TRUE, FALSE, NULL );
|
|
|
|
if ( FmpShutdownEvent == NULL ) {
|
|
return(GetLastError());
|
|
}
|
|
|
|
//
|
|
// Initialize Group Types.
|
|
//
|
|
ObjectTypeInit.Name = FMP_GROUP_NAME;
|
|
ObjectTypeInit.Signature = FMP_GROUP_SIGNATURE;
|
|
ObjectTypeInit.ObjectSize = sizeof(FM_GROUP);
|
|
ObjectTypeInit.DeleteObjectMethod = FmpGroupLastReference;
|
|
|
|
Status = OmCreateType( ObjectTypeGroup,
|
|
&ObjectTypeInit );
|
|
|
|
if ( Status != ERROR_SUCCESS ) {
|
|
CsInconsistencyHalt(Status);
|
|
return(Status);
|
|
}
|
|
|
|
//
|
|
// Initialize Resource Types.
|
|
//
|
|
ObjectTypeInit.Name = FMP_RESOURCE_NAME;
|
|
ObjectTypeInit.Signature = FMP_RESOURCE_SIGNATURE;
|
|
ObjectTypeInit.ObjectSize = sizeof(FM_RESOURCE);
|
|
ObjectTypeInit.DeleteObjectMethod = FmpResourceLastReference;
|
|
|
|
Status = OmCreateType( ObjectTypeResource,
|
|
&ObjectTypeInit );
|
|
|
|
if ( Status != ERROR_SUCCESS ) {
|
|
CsInconsistencyHalt(Status);
|
|
return(Status);
|
|
}
|
|
|
|
//
|
|
// Initialize ResType Types.
|
|
//
|
|
ObjectTypeInit.Name = FMP_RESOURCE_TYPE_NAME;
|
|
ObjectTypeInit.Signature = FMP_RESOURCE_TYPE_SIGNATURE;
|
|
ObjectTypeInit.ObjectSize = sizeof(FM_RESTYPE);
|
|
ObjectTypeInit.DeleteObjectMethod = FmpResTypeLastRef;
|
|
|
|
Status = OmCreateType( ObjectTypeResType,
|
|
&ObjectTypeInit );
|
|
|
|
if ( Status != ERROR_SUCCESS ) {
|
|
CsInconsistencyHalt(Status);
|
|
return(Status);
|
|
}
|
|
|
|
//
|
|
// Initialize the Notify thread.
|
|
//
|
|
Status = FmpInitializeNotify();
|
|
if (Status != ERROR_SUCCESS) {
|
|
CsInconsistencyHalt(Status);
|
|
return(Status);
|
|
}
|
|
|
|
|
|
|
|
//
|
|
// Initialize the FM worker thread.
|
|
//
|
|
Status = FmpStartWorkerThread();
|
|
if ( Status != ERROR_SUCCESS ) {
|
|
CsInconsistencyHalt(Status);
|
|
return(Status);
|
|
}
|
|
|
|
FmpInitialized = TRUE;
|
|
|
|
return(ERROR_SUCCESS);
|
|
|
|
} // FmInitialize
|
|
|
|
|
|
|
|
BOOL
|
|
FmpEnumGroupsInit(
|
|
IN PVOID Context1,
|
|
IN PVOID Context2,
|
|
IN PFM_GROUP Group,
|
|
IN LPCWSTR Name
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Group enumeration callback for FM join. This phase completes initialization
|
|
of every group.
|
|
|
|
Arguments:
|
|
|
|
Context1 - Not used.
|
|
|
|
Context2 - Not used.
|
|
|
|
Group - Supplies the group.
|
|
|
|
Name - Supplies the group's name.
|
|
|
|
Return Value:
|
|
|
|
TRUE - to indicate that the enumeration should continue.
|
|
FALSE - to indicate that the enumeration should not continue.
|
|
|
|
--*/
|
|
|
|
{
|
|
|
|
|
|
//
|
|
// Finish initializing the group.
|
|
//
|
|
FmpCompleteInitGroup( Group );
|
|
|
|
|
|
return(TRUE);
|
|
|
|
} // FmpEnumGroupsInit
|
|
|
|
BOOL
|
|
FmpEnumFixupResources(
|
|
IN PCLUSTERVERSIONINFO pClusterVersionInfo,
|
|
IN PVOID Context2,
|
|
IN PFM_GROUP Group,
|
|
IN LPCWSTR Name
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Group enumeration callback for FM join. This phase completes initialization
|
|
of every group.
|
|
|
|
Arguments:
|
|
|
|
Context1 - Not used.
|
|
|
|
Context2 - Not used.
|
|
|
|
Group - Supplies the group.
|
|
|
|
Name - Supplies the group's name.
|
|
|
|
Return Value:
|
|
|
|
TRUE - to indicate that the enumeration should continue.
|
|
FALSE - to indicate that the enumeration should not continue.
|
|
|
|
--*/
|
|
|
|
{
|
|
PLIST_ENTRY listEntry;
|
|
PFM_RESOURCE Resource;
|
|
|
|
FmpAcquireLocalGroupLock( Group );
|
|
|
|
//
|
|
// For each resource in the Group, make sure it gets an
|
|
// opportunity to do fixups.
|
|
//
|
|
for ( listEntry = Group->Contains.Flink;
|
|
listEntry != &(Group->Contains);
|
|
listEntry = listEntry->Flink ) {
|
|
|
|
Resource = CONTAINING_RECORD(listEntry, FM_RESOURCE, ContainsLinkage);
|
|
FmpRmResourceControl( Resource,
|
|
CLUSCTL_RESOURCE_CLUSTER_VERSION_CHANGED,
|
|
(LPBYTE)pClusterVersionInfo,
|
|
pClusterVersionInfo->dwVersionInfoSize,
|
|
NULL,
|
|
0,
|
|
NULL,
|
|
NULL
|
|
);
|
|
|
|
|
|
}
|
|
|
|
FmpReleaseLocalGroupLock( Group);
|
|
|
|
return(TRUE);
|
|
|
|
} // FmpEnumFixupResources
|
|
|
|
|
|
BOOL
|
|
FmpEnumJoinGroupsMove(
|
|
IN LPBOOL Deferred,
|
|
IN PVOID Context2,
|
|
IN PFM_GROUP Group,
|
|
IN LPCWSTR Name
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Group enumeration callback for FM join. Queries the preferred owners
|
|
groups and moves those that belong on this system and that can move.
|
|
|
|
Arguments:
|
|
|
|
Deferred - TRUE if a move was deferred because of Failback Window. Must
|
|
be FALSE on first call.
|
|
|
|
Context2 - Not used.
|
|
|
|
Group - Supplies the group.
|
|
|
|
Name - Supplies the group's name.
|
|
|
|
Return Value:
|
|
|
|
TRUE - to indicate that the enumeration should continue.
|
|
FALSE - to indicate that the enumeration should not continue.
|
|
|
|
--*/
|
|
|
|
{
|
|
PLIST_ENTRY listEntry;
|
|
PPREFERRED_ENTRY preferredEntry;
|
|
SYSTEMTIME localTime;
|
|
BOOL failBackWindowOkay = FALSE;
|
|
DWORD threadId;
|
|
DWORD status;
|
|
|
|
GetLocalTime( &localTime );
|
|
|
|
FmpAcquireLocalGroupLock( Group );
|
|
|
|
//
|
|
// Adjust ending time if needed.
|
|
//
|
|
if ( Group->FailbackWindowStart > Group->FailbackWindowEnd ) {
|
|
Group->FailbackWindowEnd += 24;
|
|
if ( Group->FailbackWindowStart > localTime.wHour ) {
|
|
localTime.wHour += 24;
|
|
}
|
|
}
|
|
|
|
//
|
|
// If the Failback start and end times are valid, then check if we need
|
|
// to start a timer thread to move the group at the appropriate time.
|
|
//
|
|
if ( (Group->FailbackType == GroupFailback) &&
|
|
((Group->FailbackWindowStart != Group->FailbackWindowEnd) &&
|
|
(localTime.wHour >= Group->FailbackWindowStart) &&
|
|
(localTime.wHour < Group->FailbackWindowEnd)) ||
|
|
(Group->FailbackWindowStart == Group->FailbackWindowEnd) ) {
|
|
failBackWindowOkay = TRUE;
|
|
}
|
|
|
|
//
|
|
// Check if we need to move the group.
|
|
//
|
|
if ( !IsListEmpty( &Group->PreferredOwners ) ) {
|
|
listEntry = Group->PreferredOwners.Flink;
|
|
preferredEntry = CONTAINING_RECORD( listEntry,
|
|
PREFERRED_ENTRY,
|
|
PreferredLinkage );
|
|
//
|
|
// Move group if:
|
|
// 0. Remote system is paused, and we're not OR
|
|
// 1. Our system is in the preferred list and the owner node is not OR
|
|
// 2. Group is Offline or Group is Online/PartialOnline and it can
|
|
// failback AND
|
|
// 3. Group's preferred list is ordered and our system is higher
|
|
//
|
|
|
|
if ( Group->OwnerNode == NULL ) {
|
|
// Should we shoot ourselves because we got an incomplete snapshot
|
|
// of the joint attempt.
|
|
CsInconsistencyHalt(ERROR_CLUSTER_JOIN_ABORTED);
|
|
} else if ( Group->OwnerNode != NmLocalNode) {
|
|
if (((NmGetNodeState(NmLocalNode) != ClusterNodePaused) &&
|
|
(NmGetNodeState(Group->OwnerNode) == ClusterNodePaused)) ||
|
|
|
|
(FmpInPreferredList(Group, NmLocalNode, FALSE, NULL) &&
|
|
!FmpInPreferredList( Group, Group->OwnerNode, FALSE, NULL)) ||
|
|
|
|
((((Group->State == ClusterGroupOnline) ||
|
|
(Group->State == ClusterGroupPartialOnline)) &&
|
|
(Group->FailbackType == FailbackOkay) ||
|
|
(Group->State == ClusterGroupOffline)) &&
|
|
((Group->OrderedOwners) &&
|
|
(FmpHigherInPreferredList(Group, NmLocalNode, Group->OwnerNode)))) ) {
|
|
if ( failBackWindowOkay ) {
|
|
PNM_NODE OwnerNode = Group->OwnerNode;
|
|
|
|
status = FmcMoveGroupRequest( Group, NmLocalNode );
|
|
if ( ( status == ERROR_SUCCESS ) || ( status == ERROR_IO_PENDING ) ) {
|
|
//
|
|
// Chittur Subbaraman (chitturs) - 7/31/2000
|
|
//
|
|
// Log an event indicating an impending failback.
|
|
//
|
|
CsLogEvent3( LOG_NOISE,
|
|
FM_EVENT_GROUP_FAILBACK,
|
|
OmObjectName(Group),
|
|
OmObjectName(OwnerNode),
|
|
OmObjectName(NmLocalNode) );
|
|
}
|
|
FmpAcquireLocalGroupLock( Group );
|
|
} else {
|
|
//
|
|
// Start timer thread if not already running. If it fails,
|
|
// what possibly can we do?
|
|
//
|
|
if ( FmpTimerThread == NULL ) {
|
|
FmpTimerThread = CreateThread( NULL,
|
|
0,
|
|
FmpJoinPendingThread,
|
|
NULL,
|
|
0,
|
|
&threadId );
|
|
}
|
|
*Deferred = TRUE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
FmpReleaseLocalGroupLock( Group );
|
|
|
|
return(TRUE);
|
|
|
|
} // FmpEnumJoinGroups
|
|
|
|
|
|
|
|
BOOL
|
|
FmpEnumSignalGroups(
|
|
IN PVOID Context1,
|
|
IN PVOID Context2,
|
|
IN PFM_GROUP Group,
|
|
IN LPCWSTR Name
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Group enumeration callback to indicate state change on all groups
|
|
and resources.
|
|
|
|
For the quorum resource, if we're forming a cluster, we'll also
|
|
fixup information that was not available when the resource was created.
|
|
|
|
Arguments:
|
|
|
|
Context1 - Pointer to a BOOL that is TRUE if this is a FormCluster.
|
|
FALSE otherwise.
|
|
|
|
Context2 - Not used.
|
|
|
|
Group - Supplies the group.
|
|
|
|
Name - Supplies the group's name.
|
|
|
|
Return Value:
|
|
|
|
TRUE - to indicate that the enumeration should continue.
|
|
FALSE - to indicate that the enumeration should not continue.
|
|
|
|
--*/
|
|
|
|
{
|
|
PLIST_ENTRY listEntry;
|
|
PFM_RESOURCE resource;
|
|
BOOL formCluster = *(PBOOL)Context1;
|
|
DWORD status;
|
|
BOOL quorumGroup = FALSE;
|
|
|
|
//
|
|
// For each resource in the group, generate an event notification.
|
|
//
|
|
|
|
for (listEntry = Group->Contains.Flink;
|
|
listEntry != &(Group->Contains);
|
|
listEntry = listEntry->Flink ) {
|
|
resource = CONTAINING_RECORD( listEntry,
|
|
FM_RESOURCE,
|
|
ContainsLinkage );
|
|
//
|
|
// If this is the quorum resource and we're performing a Form
|
|
// Cluster, then fixup the quorum resource info.
|
|
//
|
|
if ( resource->QuorumResource ) {
|
|
status = FmpFixupResourceInfo( resource );
|
|
quorumGroup = TRUE;
|
|
if ( status != ERROR_SUCCESS ) {
|
|
ClRtlLogPrint( LOG_NOISE,
|
|
"[FM] Warning, failed to fixup quorum resource %1!ws!, error %2!u!.\n",
|
|
OmObjectId(resource),
|
|
status );
|
|
}
|
|
}
|
|
|
|
if ( resource->State == ClusterResourceOnline ) {
|
|
ClusterEvent( CLUSTER_EVENT_RESOURCE_ONLINE, resource );
|
|
} else {
|
|
ClusterEvent( CLUSTER_EVENT_RESOURCE_OFFLINE, resource );
|
|
}
|
|
}
|
|
|
|
if ( quorumGroup ) {
|
|
status = FmpFixupGroupInfo( Group );
|
|
if ( status != ERROR_SUCCESS ) {
|
|
ClRtlLogPrint( LOG_NOISE,
|
|
"[FM] Warning, failed to fixup quorum group %1!ws!, error %2!u!.\n",
|
|
OmObjectId( Group ),
|
|
status );
|
|
}
|
|
}
|
|
|
|
if ( Group->State == ClusterGroupOnline ) {
|
|
ClusterEvent( CLUSTER_EVENT_GROUP_ONLINE, Group );
|
|
} else {
|
|
ClusterEvent( CLUSTER_EVENT_GROUP_OFFLINE, Group );
|
|
}
|
|
|
|
return(TRUE);
|
|
|
|
} // FmpEnumSignalGroups
|
|
|
|
|
|
|
|
DWORD
|
|
FmpJoinPendingThread(
|
|
IN LPVOID Context
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Thread to keep trying to move groups, as long we are blocked by a
|
|
FailbackWindow problem. This thread runs every 15 minutes to attempt to
|
|
move Groups.
|
|
|
|
Arguments:
|
|
|
|
Context - Not used.
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS.
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD status;
|
|
BOOL deferred;
|
|
|
|
//
|
|
// As long as we have deferred Group moves, keep going.
|
|
do {
|
|
|
|
status = WaitForSingleObject( FmpShutdownEvent, WINDOW_TIMEOUT );
|
|
|
|
if ( FmpShutdown ) {
|
|
goto finished;
|
|
}
|
|
|
|
deferred = FALSE;
|
|
|
|
//
|
|
// For each group, see if it should be moved to the local system.
|
|
//
|
|
OmEnumObjects( ObjectTypeGroup,
|
|
FmpEnumJoinGroupsMove,
|
|
&deferred,
|
|
NULL );
|
|
|
|
} while ( (status != WAIT_FAILED) && deferred );
|
|
|
|
finished:
|
|
|
|
CloseHandle( FmpTimerThread );
|
|
FmpTimerThread = NULL;
|
|
|
|
return(ERROR_SUCCESS);
|
|
|
|
} // FmpJoinPendingThread
|
|
|
|
|
|
|
|
DWORD
|
|
WINAPI
|
|
FmGetQuorumResource(
|
|
OUT PFM_GROUP *ppQuoGroup,
|
|
OUT LPDWORD lpdwSignature OPTIONAL
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Find the quorum resource, arbitrate it and return a name that can be
|
|
used to open the device in order to perform reads. Optionally,
|
|
return the signature of the quorum disk.
|
|
|
|
There are 3 items that we need:
|
|
|
|
1. The name of the quorum resource.
|
|
2. The name of the Group that the quorum resource is a member of.
|
|
3. The resource type for the quorum resource.
|
|
|
|
Arguments:
|
|
|
|
ppQuoGroup - Supplies a pointer to a buffer into which the
|
|
quorum group info is returned.
|
|
|
|
lpdwSignature - An optional argument which is used to return
|
|
the signature of the quorum disk from the cluster hive.
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS if successful.
|
|
|
|
A Win32 error code on failure.
|
|
|
|
--*/
|
|
|
|
{
|
|
LPWSTR quorumId = NULL;
|
|
LPWSTR groupId = NULL;
|
|
LPCWSTR stringId;
|
|
LPWSTR containsString = NULL;
|
|
PFM_GROUP group = NULL;
|
|
PFM_RESOURCE resource = NULL;
|
|
HDMKEY hGroupKey;
|
|
DWORD groupIdSize = 0;
|
|
DWORD idMaxSize = 0;
|
|
DWORD idSize = 0;
|
|
DWORD status;
|
|
DWORD keyIndex;
|
|
DWORD stringIndex;
|
|
|
|
*ppQuoGroup = NULL;
|
|
|
|
//
|
|
// Get the quorum resource value.
|
|
//
|
|
status = DmQuerySz( DmQuorumKey,
|
|
CLUSREG_NAME_QUORUM_RESOURCE,
|
|
(LPWSTR*)&quorumId,
|
|
&idMaxSize,
|
|
&idSize );
|
|
|
|
if ( status != ERROR_SUCCESS ) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[FM] Failed to get quorum resource, error %1!u!.\n",
|
|
status);
|
|
goto FnExit;
|
|
}
|
|
|
|
//
|
|
// Chittur Subbaraman (chitturs) - 10/30/98
|
|
//
|
|
// If the user is forcing a database restore operation, you
|
|
// also need to verify whether the quorum disk signature in
|
|
// the registry matches that in the disk itself. So, go get
|
|
// the signature from the Cluster\Resources\quorumId\Parameters
|
|
// key
|
|
//
|
|
if ( lpdwSignature != NULL ) {
|
|
status = FmpGetQuorumDiskSignature( quorumId, lpdwSignature );
|
|
if ( status != ERROR_SUCCESS ) {
|
|
//
|
|
// This is not a fatal error. So log an error and go on.
|
|
//
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[FM] Failed to get quorum disk signature, error %1!u!.\n",
|
|
status);
|
|
}
|
|
}
|
|
|
|
//
|
|
// Initialize the default Resource Monitor
|
|
//
|
|
if ( FmpDefaultMonitor == NULL ) {
|
|
FmpDefaultMonitor = FmpCreateMonitor(NULL, FALSE);
|
|
}
|
|
|
|
if (FmpDefaultMonitor == NULL) {
|
|
status = GetLastError();
|
|
CsInconsistencyHalt(status);
|
|
goto FnExit;
|
|
}
|
|
|
|
//
|
|
// Now find the group that the quorum resource is a member of.
|
|
//
|
|
idMaxSize = 0;
|
|
idSize = 0;
|
|
for ( keyIndex = 0; ; keyIndex++ )
|
|
{
|
|
status = FmpRegEnumerateKey( DmGroupsKey,
|
|
keyIndex,
|
|
&groupId,
|
|
&groupIdSize );
|
|
|
|
if ( status != ERROR_SUCCESS )
|
|
{
|
|
ClRtlLogPrint(LOG_CRITICAL, "[FM] FmGetQuorumResource: FmpRegEnumerateKey returns %1!u!\n",
|
|
status);
|
|
break;
|
|
}
|
|
|
|
//open the group key
|
|
hGroupKey = DmOpenKey( DmGroupsKey,
|
|
groupId,
|
|
KEY_READ );
|
|
if (!hGroupKey)
|
|
continue;
|
|
//
|
|
// Get the contains string.
|
|
//
|
|
status = DmQueryMultiSz( hGroupKey,
|
|
CLUSREG_NAME_GRP_CONTAINS,
|
|
&containsString,
|
|
&idMaxSize,
|
|
&idSize );
|
|
DmCloseKey(hGroupKey);
|
|
|
|
if ( status != ERROR_SUCCESS )
|
|
continue;
|
|
for ( stringIndex = 0; ; stringIndex++ )
|
|
{
|
|
stringId = ClRtlMultiSzEnum( containsString,
|
|
idSize/sizeof(WCHAR),
|
|
stringIndex );
|
|
if ( stringId == NULL ) {
|
|
break;
|
|
}
|
|
if ( lstrcmpiW( stringId, quorumId ) == 0 )
|
|
{
|
|
// We will now create the group, which will also
|
|
// create the resource, and the resource type.
|
|
//
|
|
// TODO - this will also create all resources
|
|
// within the group. What should we do about that?
|
|
// We could require the quorum resource to be in
|
|
// a group by itself! (rodga) 17-June-1996.
|
|
//
|
|
group = FmpCreateGroup( groupId,
|
|
FALSE );
|
|
if (CsNoQuorum)
|
|
FmpSetGroupPersistentState(group, ClusterGroupOffline);
|
|
|
|
break;
|
|
}
|
|
}
|
|
//if we found the group, thre is no need to search for more
|
|
if (group != NULL)
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Check if we found the Quorum resource's group.
|
|
//
|
|
if ( group == NULL )
|
|
{
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[FM] Did not find group for quorum resource.\n");
|
|
status = ERROR_GROUP_NOT_FOUND;
|
|
goto FnExit;
|
|
}
|
|
|
|
//
|
|
// Get the quorum resource structure.
|
|
//
|
|
resource = OmReferenceObjectById( ObjectTypeResource, quorumId );
|
|
if ( resource == NULL )
|
|
{
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[FM] Failed to find quorum resource object.\n");
|
|
status = ERROR_RESOURCE_NOT_FOUND;
|
|
goto FnExit;
|
|
}
|
|
|
|
resource->QuorumResource = TRUE;
|
|
|
|
if (!CsNoQuorum)
|
|
{
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[FM] Arbitrate for quorum resource id %1!ws!.\n",
|
|
OmObjectId(resource));
|
|
|
|
//
|
|
// First finish initializing the quorum resource.
|
|
//
|
|
if ( resource->Monitor == NULL )
|
|
{
|
|
status = FmpInitializeResource( resource, TRUE );
|
|
if ( status != ERROR_SUCCESS )
|
|
{
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[FM] Error completing initialization of quorum resource '%1!ws!, error %2!u!.\n",
|
|
OmObjectId(resource),
|
|
status );
|
|
goto FnExit;
|
|
}
|
|
}
|
|
|
|
if ( CsForceQuorum ) {
|
|
status = FmpSendForceQuorumControlToResource( resource );
|
|
if ( status != ERROR_SUCCESS ) {
|
|
// The routine does its own logging. Just bail.
|
|
goto FnExit;
|
|
}
|
|
}
|
|
|
|
|
|
//
|
|
// Now arbitrate for the resource.
|
|
//
|
|
status = FmpRmArbitrateResource( resource );
|
|
|
|
}
|
|
|
|
FnExit:
|
|
if ( status == ERROR_SUCCESS ) {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[FM] FmGetQuorumResource successful\n");
|
|
*ppQuoGroup = group;
|
|
}
|
|
else
|
|
{
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[FM] FmGetQuorumResource failed, error %1!u!.\n",
|
|
status);
|
|
//the group will be cleaned by fmshutdown()
|
|
|
|
}
|
|
if (resource) OmDereferenceObject(resource);
|
|
if (quorumId) LocalFree(quorumId);
|
|
if (groupId) LocalFree(groupId);
|
|
//
|
|
// Chittur Subbaraman (chitturs) - 10/05/98
|
|
// Fix memory leak
|
|
//
|
|
if (containsString) LocalFree(containsString);
|
|
return(status);
|
|
} // FmGetQuorumResource
|
|
|
|
|
|
|
|
DWORD
|
|
WINAPI
|
|
FmpSendForceQuorumControlToResource(
|
|
PFM_RESOURCE resource )
|
|
{
|
|
PCLUS_FORCE_QUORUM_INFO pForceQuorumInfo = NULL;
|
|
DWORD status;
|
|
|
|
//
|
|
// If we have a force quorum (Majority Node Set) then drop a control code to the
|
|
// resource with the list of nodes. This must be done before
|
|
// arbitrate. First we build force quorum info - this makes sure that the node list is valid etc.
|
|
// Note that the list can be NULL.
|
|
//
|
|
status = FmpBuildForceQuorumInfo( CsForceQuorumNodes,
|
|
&pForceQuorumInfo );
|
|
if ( status != ERROR_SUCCESS ) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[FM] Error building force quorum info for resource '%1!ws!, error %2!u!.\n",
|
|
OmObjectId(resource),
|
|
status );
|
|
goto FnExit;
|
|
}
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[FM] sending CLUSCTL_RESOURCE_FORCE_QUORUM\n" );
|
|
|
|
status = FmpRmResourceControl( resource,
|
|
CLUSCTL_RESOURCE_FORCE_QUORUM,
|
|
(LPBYTE)pForceQuorumInfo,
|
|
pForceQuorumInfo->dwSize,
|
|
NULL,
|
|
0,
|
|
NULL,
|
|
NULL );
|
|
//
|
|
// Tolerate ERROR_INVALID_FUNCTION since this just means that the
|
|
// resource doesn't handle it.
|
|
//
|
|
if ( status == ERROR_INVALID_FUNCTION )
|
|
status = ERROR_SUCCESS;
|
|
|
|
if ( status != ERROR_SUCCESS ) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[FM] Resource control for Force Quorum for resource '%1!ws! encountered error %2!u!.\n",
|
|
OmObjectId(resource),
|
|
status );
|
|
}
|
|
|
|
FnExit:
|
|
if (pForceQuorumInfo) FmpDeleteForceQuorumInfo( &pForceQuorumInfo );
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
|
|
BOOL
|
|
WINAPI
|
|
FmpIsNodeInForceQuorumNodes(
|
|
IN LPCWSTR lpszNodeId )
|
|
{
|
|
BOOL result = FALSE;
|
|
PCLUS_FORCE_QUORUM_INFO pForceQuorumInfo = NULL;
|
|
DWORD dwNodeId;
|
|
PNM_NODE pNmNode = NULL;
|
|
DWORD status;
|
|
|
|
status = FmpBuildForceQuorumInfo( CsForceQuorumNodes,
|
|
&pForceQuorumInfo );
|
|
|
|
if ( status != ERROR_SUCCESS ) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[FM] Error building force quorum info, error %1!u!.\n",
|
|
status );
|
|
goto FnExit;
|
|
}
|
|
|
|
pNmNode = OmReferenceObjectById(
|
|
ObjectTypeNode,
|
|
lpszNodeId );
|
|
|
|
if (pNmNode == NULL) {
|
|
status = ERROR_CLUSTER_NODE_NOT_MEMBER;
|
|
ClRtlLogPrint( LOG_UNUSUAL,
|
|
"[FM] Node %1!ws! is not a member of this cluster. Cannot join.\n",
|
|
lpszNodeId );
|
|
goto FnExit;
|
|
}
|
|
|
|
dwNodeId = NmGetNodeId( pNmNode );
|
|
|
|
result = ( pForceQuorumInfo->dwNodeBitMask & ( 1 << dwNodeId )) != 0;
|
|
|
|
ClRtlLogPrint( LOG_NOISE,
|
|
"[FM] Node %1!ws! is %2!ws!in the ForceQuorumNodes list.\n",
|
|
lpszNodeId,
|
|
( result ? L"" : L"not " ));
|
|
|
|
FnExit:
|
|
if (pForceQuorumInfo) FmpDeleteForceQuorumInfo( &pForceQuorumInfo );
|
|
if ( pNmNode ) OmDereferenceObject( pNmNode );
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
DWORD
|
|
WINAPI
|
|
FmFindQuorumResource(
|
|
OUT PFM_RESOURCE *ppResource
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Finds the quorum resource and returns a pointer to the resource
|
|
object.
|
|
|
|
Arguments:
|
|
|
|
*ppResource - A pointer to the Quorum resource object is returned in this.
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS if successful.
|
|
|
|
A Win32 error code on failure.
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD dwError = ERROR_SUCCESS;
|
|
|
|
//enumerate all the resources
|
|
*ppResource = NULL;
|
|
|
|
OmEnumObjects( ObjectTypeResource,
|
|
FmpFindQuorumResource,
|
|
ppResource,
|
|
NULL );
|
|
|
|
if ( *ppResource == NULL )
|
|
{
|
|
dwError = ERROR_RESOURCE_NOT_FOUND;
|
|
CL_LOGCLUSERROR(FM_QUORUM_RESOURCE_NOT_FOUND);
|
|
}
|
|
|
|
return(dwError);
|
|
}
|
|
|
|
|
|
DWORD WINAPI FmFindQuorumOwnerNodeId(IN PFM_RESOURCE pResource)
|
|
{
|
|
DWORD dwNodeId;
|
|
|
|
CL_ASSERT(pResource->Group->OwnerNode != NULL);
|
|
dwNodeId = NmGetNodeId(pResource->Group->OwnerNode);
|
|
|
|
return (dwNodeId);
|
|
}
|
|
|
|
|
|
|
|
BOOL
|
|
FmpReturnResourceType(
|
|
IN OUT PFM_RESTYPE *FoundResourceType,
|
|
IN LPCWSTR ResourceTypeName,
|
|
IN PFM_RESTYPE ResourceType,
|
|
IN LPCWSTR Name
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Group enumeration callback for FM join. Queries the preferred owners
|
|
groups and moves those that belong on this system and that can move.
|
|
|
|
Arguments:
|
|
|
|
ResourceType - Returns the found ResourceType, if found.
|
|
|
|
Context2 - The input resource type name to find.
|
|
|
|
Resource - Supplies the current ResourceType.
|
|
|
|
Name - Supplies the ResourceType's name.
|
|
|
|
Return Value:
|
|
|
|
TRUE - to indicate that the enumeration should continue.
|
|
FALSE - to indicate that the enumeration should not continue.
|
|
|
|
--*/
|
|
|
|
{
|
|
|
|
if ( lstrcmpiW( Name, ResourceTypeName ) == 0 ) {
|
|
OmReferenceObject( ResourceType );
|
|
*FoundResourceType = ResourceType;
|
|
return(FALSE);
|
|
}
|
|
|
|
return(TRUE);
|
|
|
|
} // FmpReturnResourceType
|
|
|
|
|
|
DWORD
|
|
WINAPI
|
|
FmFormNewClusterPhase1(
|
|
IN PFM_GROUP pQuoGroup
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Destroys the quorum group that was created. The quorum resource is left
|
|
behind and its group adjusted according to the new logs.
|
|
|
|
Arguments:
|
|
|
|
None.
|
|
|
|
Returns:
|
|
|
|
ERROR_SUCCESS if successful
|
|
|
|
Win32 errorcode otherwise.
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD status;
|
|
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[FM] FmFormNewClusterPhase1, Entry. Quorum quorum will be deleted\n");
|
|
|
|
//
|
|
// Enable the GUM.
|
|
//
|
|
GumReceiveUpdates(FALSE,
|
|
GumUpdateFailoverManager,
|
|
FmpGumReceiveUpdates,
|
|
NULL,
|
|
sizeof(FmGumDispatchTable)/sizeof(GUM_DISPATCH_ENTRY),
|
|
FmGumDispatchTable,
|
|
FmpGumVoteHandler);
|
|
|
|
//Acquire the exclusive lock for the quorum
|
|
// This is done so that we can ignore any resource transition events from
|
|
// the quorum resource between phase 1 and phase 2 of FM initialization on Form
|
|
ACQUIRE_EXCLUSIVE_LOCK(gQuoChangeLock);
|
|
|
|
FmpFMFormPhaseProcessing = TRUE;
|
|
|
|
//release the quorum lock
|
|
RELEASE_LOCK(gQuoChangeLock);
|
|
|
|
//the group lock will be freed by FmpDestroyGroup
|
|
FmpAcquireLocalGroupLock( pQuoGroup );
|
|
|
|
//destroy the quorum group object, dont bring the quorum resource online/offline
|
|
//All resources in the quorum group must get deleted, except the quorum resource
|
|
//All resources in the quorum group must get recreated in FmFormNewClusterPhase2.
|
|
//The quorum group is removed from the group list, hence it will be recreated in phase2.
|
|
//Since the quorum resource must not get deleted we will increment its ref count
|
|
//This is because in phase 2 it is not created and its ref count is not incremented at create
|
|
//By the time it is put on the contains list, we expect the resource count to be 2.
|
|
OmReferenceObject(gpQuoResource);
|
|
status = FmpDestroyGroup(pQuoGroup, TRUE);
|
|
|
|
//We prefer that the quorum group is deleted
|
|
//since after rollback the old group may no longer exist and we
|
|
//dont want it to be on the group list
|
|
gpQuoResource->Group = NULL;
|
|
OmDereferenceObject(pQuoGroup);
|
|
|
|
return(status);
|
|
|
|
} // FmFormNewClusterPhase1
|
|
|
|
|
|
|
|
DWORD
|
|
WINAPI
|
|
FmFormNewClusterPhase2(
|
|
VOID
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Bring the Failover Manager Online, this means claiming all groups and
|
|
finishing the initialization of resources.
|
|
|
|
Arguments:
|
|
|
|
None.
|
|
|
|
Returns:
|
|
|
|
ERROR_SUCCESS if successful
|
|
|
|
Win32 errorcode otherwise.
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD status;
|
|
BOOL formCluster = TRUE;
|
|
PFM_GROUP group;
|
|
PFM_RESOURCE pQuoResource=NULL;
|
|
CLUSTERVERSIONINFO ClusterVersionInfo;
|
|
PCLUSTERVERSIONINFO pClusterVersionInfo = NULL;
|
|
PGROUP_ENUM MyGroups = NULL;
|
|
BOOL QuorumGroup;
|
|
|
|
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[FM] FmFormNewClusterPhase2, Entry.\n");
|
|
|
|
|
|
//
|
|
// Initialize resource types
|
|
//
|
|
status = FmpInitResourceTypes();
|
|
if (status != ERROR_SUCCESS) {
|
|
CsInconsistencyHalt(status);
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Initialize Groups,
|
|
//
|
|
status = FmpInitGroups( FALSE );
|
|
if (status != ERROR_SUCCESS) {
|
|
goto error_exit;
|
|
}
|
|
|
|
// refigure out the state for the quorum group
|
|
status = FmFindQuorumResource(&pQuoResource);
|
|
if (status != ERROR_SUCCESS)
|
|
{
|
|
goto error_exit;
|
|
}
|
|
//
|
|
// Set the state of the quorum group depending upon the state of
|
|
// the quorum resource
|
|
//
|
|
//now we should enable resource events to come in for the quorum resource as well
|
|
ACQUIRE_EXCLUSIVE_LOCK(gQuoChangeLock);
|
|
FmpFMFormPhaseProcessing = FALSE;
|
|
|
|
group = pQuoResource->Group;
|
|
group->State = FmpGetGroupState(group, TRUE);
|
|
OmDereferenceObject(pQuoResource);
|
|
|
|
//if the noquorum flag is set, dont bring the quorum group online
|
|
if (CsNoQuorum)
|
|
FmpSetGroupPersistentState(pQuoResource->Group, ClusterGroupOffline);
|
|
|
|
RELEASE_LOCK(gQuoChangeLock);
|
|
|
|
//
|
|
// Check if resource dll deadlock detection is enabled. This must be called only
|
|
// after FmpInitialized is set to TRUE.
|
|
//
|
|
FmCheckIsDeadlockDetectionEnabled ();
|
|
|
|
//
|
|
// Initialize the default Resource Monitor
|
|
//
|
|
if ( FmpDefaultMonitor == NULL ) {
|
|
FmpDefaultMonitor = FmpCreateMonitor(NULL, FALSE);
|
|
}
|
|
|
|
if (FmpDefaultMonitor == NULL) {
|
|
status = GetLastError();
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[FM] Failed to create default resource monitor on Form.\n");
|
|
goto error_exit;
|
|
}
|
|
|
|
|
|
if (NmLocalNodeVersionChanged)
|
|
{
|
|
//initialize the version information
|
|
CsGetClusterVersionInfo(&ClusterVersionInfo);
|
|
pClusterVersionInfo = &ClusterVersionInfo;
|
|
}
|
|
|
|
|
|
//enable votes and gum updates since the fixups for
|
|
//resource types require that
|
|
FmpFMGroupsInited = TRUE;
|
|
|
|
//
|
|
// The resource type possible node list is built
|
|
// using a voting protocol, hence we need to
|
|
// fix it up since the vote could have been conducted
|
|
// while this node was down.
|
|
// Also call the resource type control code if the
|
|
// local node version has changed
|
|
//
|
|
status = FmpFixupResourceTypesPhase1(FALSE, NmLocalNodeVersionChanged,
|
|
pClusterVersionInfo);
|
|
if (status != ERROR_SUCCESS) {
|
|
CsInconsistencyHalt(status);
|
|
goto error_exit;
|
|
}
|
|
|
|
|
|
//
|
|
// Find and sort all known groups
|
|
//
|
|
status = FmpEnumSortGroups(&MyGroups, NULL, &QuorumGroup);
|
|
if (status != ERROR_SUCCESS) {
|
|
goto error_exit;
|
|
}
|
|
|
|
|
|
//
|
|
// Find the state of the Groups.
|
|
//
|
|
FmpGetGroupListState( MyGroups );
|
|
|
|
//
|
|
// Set the Group owner.
|
|
//
|
|
FmpSetGroupEnumOwner( MyGroups, NmLocalNode, NULL, QuorumGroup, NULL );
|
|
|
|
|
|
//
|
|
// For each group, finish initialization of all groups and resources.
|
|
//
|
|
OmEnumObjects( ObjectTypeGroup,
|
|
FmpEnumGroupsInit,
|
|
NULL,
|
|
NULL );
|
|
|
|
// if the resource type is not supported, remove it from the possible
|
|
// owners list of all resources of that type
|
|
status = FmpFixupPossibleNodesForResources(FALSE);
|
|
if (status != ERROR_SUCCESS) {
|
|
CsInconsistencyHalt(status);
|
|
return(status);
|
|
}
|
|
|
|
if (NmLocalNodeVersionChanged)
|
|
{
|
|
|
|
//
|
|
// For each group, allow all resources to do any fixups
|
|
// they might need to do to the cluster registry to
|
|
// run in a mixed mode cluster.
|
|
//
|
|
// Get the version info
|
|
OmEnumObjects( ObjectTypeGroup,
|
|
FmpEnumFixupResources,
|
|
&ClusterVersionInfo,
|
|
NULL );
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
// Take ownership of all the groups in the system. This also completes
|
|
// the initialization of all resources.
|
|
//
|
|
status = FmpClaimAllGroups(MyGroups);
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_CRITICAL,"[FM] FmpClaimAllGroups failed %1!d!\n",status);
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Cleanup
|
|
//
|
|
FmpDeleteEnum(MyGroups);
|
|
|
|
FmpFMOnline = TRUE;
|
|
|
|
//
|
|
// Signal a state change for every group and resource!
|
|
//
|
|
OmEnumObjects( ObjectTypeGroup,
|
|
FmpEnumSignalGroups,
|
|
&formCluster,
|
|
NULL );
|
|
|
|
//
|
|
// Chittur Subbaraman (chitturs) - 5/3/2000
|
|
//
|
|
// Make sure the phase 2 notifications are delivered only after all initialization is
|
|
// complete. This includes fixing up the possible owners of the quorum resource by
|
|
// FmpEnumSignalGroups. Once phase 2 notifications are delivered, resource type DLLs
|
|
// would be free to issue cluster API calls into FM and the lack of possible owners should
|
|
// not be the reason to reject these calls.
|
|
//
|
|
status = FmpFixupResourceTypesPhase2(FALSE, NmLocalNodeVersionChanged,
|
|
pClusterVersionInfo);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
CsInconsistencyHalt( status );
|
|
goto error_exit;
|
|
}
|
|
|
|
ClRtlLogPrint(LOG_NOISE,"[FM] FmFormNewClusterPhase2 complete.\n");
|
|
return(ERROR_SUCCESS);
|
|
|
|
|
|
error_exit:
|
|
|
|
if (MyGroups) FmpDeleteEnum(MyGroups);
|
|
|
|
FmpShutdown = TRUE;
|
|
FmpFMOnline = FALSE;
|
|
|
|
FmpCleanupGroups(FALSE);
|
|
if (FmpDefaultMonitor != NULL) {
|
|
FmpShutdownMonitor( FmpDefaultMonitor );
|
|
FmpDefaultMonitor = NULL;
|
|
}
|
|
|
|
FmpShutdown = FALSE;
|
|
|
|
return(status);
|
|
|
|
|
|
|
|
} // FmFormNewClusterPhase2
|
|
|
|
|
|
|
|
DWORD
|
|
WINAPI
|
|
FmJoinPhase1(
|
|
OUT DWORD *EndSeq
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Performs the FM initialization and join procedure. This creates skeletal
|
|
groups and resources, which are not fully initialized. After the API is
|
|
fully enabled (in Phase 2) we will finish initialization of the groups
|
|
and resources (which causes the resource monitors to run and opens
|
|
the resource DLL's.
|
|
|
|
Arguments:
|
|
|
|
None.
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS if successful
|
|
|
|
Win32 errorcode otherwise.
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD status;
|
|
DWORD sequence;
|
|
int retries = 0;
|
|
|
|
//
|
|
// Enable Gum updates.
|
|
//
|
|
GumReceiveUpdates(TRUE,
|
|
GumUpdateFailoverManager,
|
|
FmpGumReceiveUpdates,
|
|
NULL,
|
|
sizeof(FmGumDispatchTable)/sizeof(GUM_DISPATCH_ENTRY),
|
|
FmGumDispatchTable,
|
|
FmpGumVoteHandler);
|
|
|
|
retry:
|
|
status = GumBeginJoinUpdate(GumUpdateFailoverManager, &sequence);
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[FM] GumBeginJoinUpdate failed %1!d!\n",
|
|
status);
|
|
return(status);
|
|
}
|
|
|
|
//
|
|
// Build up all the FM data structures for resource types.
|
|
//
|
|
//
|
|
// Initialize resource types
|
|
//
|
|
status = FmpInitResourceTypes();
|
|
if (status != ERROR_SUCCESS) {
|
|
CsInconsistencyHalt(status);
|
|
return(status);
|
|
}
|
|
|
|
//
|
|
// Initialize Groups, but don't fully initialize them yet.
|
|
//
|
|
status = FmpInitGroups( FALSE );
|
|
if (status != ERROR_SUCCESS) {
|
|
return(status);
|
|
}
|
|
|
|
//
|
|
// Initialize the default Resource Monitor. This step must be done before end join update
|
|
// since this node can receive certain updates such as s_GumCollectVoteFromNode immediately
|
|
// after GumEndJoinUpdate which may need the services of the default monitor.
|
|
//
|
|
if ( FmpDefaultMonitor == NULL ) {
|
|
FmpDefaultMonitor = FmpCreateMonitor(NULL, FALSE);
|
|
}
|
|
if ( FmpDefaultMonitor == NULL ) {
|
|
status = GetLastError();
|
|
CsInconsistencyHalt(status);
|
|
return(status);
|
|
}
|
|
|
|
//
|
|
// Get the group and resource state from each node which is online.
|
|
//
|
|
status = ERROR_SUCCESS;
|
|
OmEnumObjects( ObjectTypeNode,
|
|
FmpEnumNodes,
|
|
&status,
|
|
NULL );
|
|
if (status == ERROR_SUCCESS) {
|
|
FmpFMGroupsInited = TRUE;
|
|
// Gum Update handlers for resource and group state changes
|
|
// can process the updates now.
|
|
status = GumEndJoinUpdate(sequence,
|
|
GumUpdateFailoverManager,
|
|
FmUpdateJoin,
|
|
0,
|
|
NULL);
|
|
if (status == ERROR_CLUSTER_DATABASE_SEQMISMATCH) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[FM] GumEndJoinUpdate with sequence %1!d! failed with a sequence mismatch\n",
|
|
sequence);
|
|
} else if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[FM] GumEndJoinUpdate with sequence %1!d! failed with status %2!d!\n",
|
|
sequence,
|
|
status);
|
|
}
|
|
} else {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[FM] FmJoin: FmpEnumNodes failed %1!d!\n",
|
|
status);
|
|
return(status);
|
|
}
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
//
|
|
// clean up resources
|
|
//
|
|
FmpShutdown = TRUE;
|
|
FmpCleanupGroups(FALSE);
|
|
FmpShutdown = FALSE;
|
|
|
|
if ( retries++ < 3 ) {
|
|
ClRtlLogPrint(LOG_UNUSUAL, "[FM] FmJoinPhase1: retry %1!d!\n", retries);
|
|
goto retry;
|
|
}
|
|
}
|
|
else {
|
|
ClRtlLogPrint(LOG_NOISE,"[FM] FmJoinPhase1 complete.\n");
|
|
|
|
// Update EndSeq on success
|
|
*EndSeq = sequence;
|
|
|
|
//
|
|
// Check if resource dll deadlock detection is enabled. This must be called only
|
|
// after FmpInitialized is set to TRUE.
|
|
//
|
|
FmCheckIsDeadlockDetectionEnabled ();
|
|
}
|
|
|
|
return(status);
|
|
|
|
} // FmJoinPhase1
|
|
|
|
|
|
DWORD
|
|
WINAPI
|
|
FmJoinPhase2(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Performs the second phase of FM initialization and join procedure.
|
|
Finish creation of resources by allowing the resource monitors to be
|
|
created. Claim any groups which should failback to this node.
|
|
|
|
Arguments:
|
|
|
|
None.
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS if successful
|
|
|
|
Win32 errorcode otherwise.
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD status;
|
|
CLUSTERVERSIONINFO ClusterVersionInfo;
|
|
PCLUSTERVERSIONINFO pClusterVersionInfo = NULL;
|
|
DWORD dwRetryCount=60;//try for atleast a minute
|
|
|
|
|
|
GetJoinApproval:
|
|
status = FmpGetJoinApproval();
|
|
|
|
if (status == ERROR_RETRY)
|
|
{
|
|
// if the other nodes have pending work to do
|
|
//after this node last died and are not willing
|
|
// to accept it back till that is over, we will stall
|
|
// the join
|
|
//sleep for a second
|
|
dwRetryCount--;
|
|
if (dwRetryCount)
|
|
{
|
|
Sleep(1000);
|
|
goto GetJoinApproval;
|
|
}
|
|
else
|
|
{
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[FM] FmJoinPhase2 : timed out trying to get join approval.\n");
|
|
CsInconsistencyHalt(status);
|
|
}
|
|
}
|
|
|
|
|
|
if (NmLocalNodeVersionChanged)
|
|
{
|
|
//initialize the cluster versioninfo structure
|
|
CsGetClusterVersionInfo(&ClusterVersionInfo);
|
|
pClusterVersionInfo = &ClusterVersionInfo;
|
|
}
|
|
//
|
|
// The resource type possible node list is built
|
|
// using a voting protocol, hence we need to
|
|
// fix it up since the vote could have been conducted
|
|
// while this node was down.
|
|
//
|
|
status = FmpFixupResourceTypesPhase1(TRUE, NmLocalNodeVersionChanged,
|
|
pClusterVersionInfo);
|
|
if (status != ERROR_SUCCESS) {
|
|
CsInconsistencyHalt(status);
|
|
return(status);
|
|
}
|
|
|
|
|
|
//
|
|
// For each group, finish initialization of all groups and resources.
|
|
//
|
|
OmEnumObjects( ObjectTypeGroup,
|
|
FmpEnumGroupsInit,
|
|
NULL,
|
|
NULL );
|
|
|
|
|
|
// if the resource type is not supported, remove it from the possible
|
|
// owners list of all resources of that type
|
|
status = FmpFixupPossibleNodesForResources(TRUE);
|
|
if (status != ERROR_SUCCESS) {
|
|
CsInconsistencyHalt(status);
|
|
return(status);
|
|
}
|
|
|
|
if (NmLocalNodeVersionChanged)
|
|
{
|
|
//
|
|
// For each group, allow all resources to do any fixups
|
|
// they might need to do to the cluster registry to
|
|
// run in a mixed mode cluster.
|
|
//
|
|
OmEnumObjects( ObjectTypeGroup,
|
|
FmpEnumFixupResources,
|
|
&ClusterVersionInfo,
|
|
NULL );
|
|
}
|
|
//
|
|
// The FM is now in sync with everybody else.
|
|
//
|
|
FmpFMOnline = TRUE;
|
|
|
|
if ( FmpMajorEvent ) {
|
|
return(ERROR_NOT_READY);
|
|
}
|
|
|
|
// RAID 513705. Need to send force quorum control to the quorum resource at this point.
|
|
if ( CsForceQuorum ) {
|
|
ASSERT( gpQuoResource ); // Should I assert here, or add "gpQuoResource != NULL" to the
|
|
// if expression?
|
|
status = FmpSendForceQuorumControlToResource( gpQuoResource );
|
|
if ( status != ERROR_SUCCESS ) {
|
|
// The routine does its own logging. Just bail.
|
|
return status;
|
|
}
|
|
}
|
|
|
|
|
|
status = FmpFixupResourceTypesPhase2(TRUE, NmLocalNodeVersionChanged,
|
|
pClusterVersionInfo);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
CsInconsistencyHalt(status);
|
|
return(status);
|
|
}
|
|
|
|
ClRtlLogPrint(LOG_NOISE,"[FM] FmJoinPhase2 complete, now online!\n");
|
|
|
|
return(ERROR_SUCCESS);
|
|
|
|
} // FmJoinPhase2
|
|
|
|
VOID
|
|
FmJoinPhase3(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Handles any group moves and resource/group state change signaling as
|
|
a part of join. This MUST be done only AFTER the extended node state
|
|
is UP.
|
|
|
|
Arguments:
|
|
|
|
None.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
--*/
|
|
{
|
|
BOOL formCluster = FALSE;
|
|
DWORD deferred = FALSE;
|
|
|
|
ClRtlLogPrint(LOG_NOISE,"[FM] FmJoinPhase3 entry...\n");
|
|
|
|
//
|
|
// Chittur Subbaraman (chitturs) - 10/28/99
|
|
//
|
|
//
|
|
// For each group, see if it should be moved to the local system.
|
|
//
|
|
OmEnumObjects( ObjectTypeGroup,
|
|
FmpEnumJoinGroupsMove,
|
|
&deferred,
|
|
NULL );
|
|
|
|
//
|
|
// Signal a state change for every group and resource!
|
|
//
|
|
OmEnumObjects( ObjectTypeGroup,
|
|
FmpEnumSignalGroups,
|
|
&formCluster,
|
|
NULL );
|
|
|
|
ClRtlLogPrint(LOG_NOISE,"[FM] FmJoinPhase3 exit...\n");
|
|
} // FmJoinPhase3
|
|
|
|
BOOL
|
|
FmpFindQuorumResource(
|
|
IN OUT PFM_RESOURCE *QuorumResource,
|
|
IN PVOID Context2,
|
|
IN PFM_RESOURCE Resource,
|
|
IN LPCWSTR Name
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Group enumeration callback for FM findquorumresource.
|
|
|
|
Arguments:
|
|
|
|
QuorumResource - Returns the found quorum resource, if found.
|
|
|
|
Context2 - Not used.
|
|
|
|
Resource - Supplies the current resource.
|
|
|
|
Name - Supplies the Resource's name.
|
|
|
|
Return Value:
|
|
|
|
TRUE - to indicate that the enumeration should continue.
|
|
FALSE - to indicate that the enumeration should not continue.
|
|
|
|
--*/
|
|
|
|
{
|
|
|
|
if ( Resource->QuorumResource ) {
|
|
OmReferenceObject( Resource );
|
|
*QuorumResource = Resource;
|
|
return(FALSE);
|
|
}
|
|
|
|
return(TRUE);
|
|
|
|
} // FmpFindQuorumResource
|
|
|
|
|
|
|
|
BOOL
|
|
FmArbitrateQuorumResource(
|
|
VOID
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Arguments:
|
|
|
|
Return Value:
|
|
|
|
TRUE - if the quorum resource was successfully arbitrated and acquired.
|
|
|
|
FALSE - it the quorum resource was not successfully arbitrated.
|
|
|
|
--*/
|
|
|
|
{
|
|
PFM_RESOURCE resource = NULL;
|
|
DWORD status;
|
|
WCHAR localComputerName[MAX_COMPUTERNAME_LENGTH + 1];
|
|
DWORD localComputerNameSize = MAX_COMPUTERNAME_LENGTH + 1;
|
|
|
|
//
|
|
// Next try to find the Quorum resource.
|
|
//
|
|
|
|
FmFindQuorumResource(&resource);
|
|
|
|
if ( resource == NULL ) {
|
|
SetLastError(ERROR_RESOURCE_NOT_FOUND);
|
|
return(FALSE);
|
|
}
|
|
|
|
//
|
|
// Now arbitrate for the resource.
|
|
//
|
|
status = FmpRmArbitrateResource( resource );
|
|
|
|
if ( status == ERROR_SUCCESS ) {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[FM] Successfully arbitrated quorum resource %1!ws!.\n",
|
|
OmObjectId(resource));
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[FM] FMArbitrateQuoRes: Current State %1!u! State=%2!u! Owner %3!u!\n",
|
|
resource->PersistentState,
|
|
resource->State,
|
|
NmGetNodeId((resource->Group)->OwnerNode));
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[FM] FMArbitrateQuoRes: Group state :Current State %1!u! State=%2!u! Owner %3!u!\n",
|
|
resource->Group->PersistentState,
|
|
resource->Group->State,
|
|
NmGetNodeId((resource->Group)->OwnerNode));
|
|
//
|
|
// The quorum resource will be brought online by REGROUP.
|
|
//
|
|
// RNG: what happens if we can't online the quorum resource?
|
|
// A: The node will halt.
|
|
|
|
//SS: dereference the object referenced by fmfindquorumresource
|
|
OmDereferenceObject(resource);
|
|
|
|
return(TRUE);
|
|
} else {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[FM] Failed to arbitrate quorum resource %1!ws!, error %2!u!.\n",
|
|
OmObjectId(resource),
|
|
status);
|
|
//SS: dereference the object referenced by fmfindquorumresource
|
|
OmDereferenceObject(resource);
|
|
return(FALSE);
|
|
}
|
|
|
|
} // FmArbitrateQuorumResource
|
|
|
|
|
|
|
|
BOOL
|
|
FmpEnumHoldIO(
|
|
IN PVOID Context1,
|
|
IN PVOID Context2,
|
|
IN PFM_RESTYPE ResType,
|
|
IN LPCWSTR Name
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Send a HOLD_IO control code to all resource types of class STORAGE.
|
|
|
|
Arguments:
|
|
|
|
Context1 - Not used.
|
|
|
|
Context2 - Not used.
|
|
|
|
ResType - Supplies the Resource Type.
|
|
|
|
Name - Supplies the Resource Type's name.
|
|
|
|
Return Value:
|
|
|
|
TRUE - to indicate that the enumeration should continue.
|
|
FALSE - to indicate that the enumeration should not continue.
|
|
|
|
--*/
|
|
{
|
|
DWORD dwStatus;
|
|
DWORD bytesReturned;
|
|
DWORD bytesRequired;
|
|
|
|
if ( ResType->Class == CLUS_RESCLASS_STORAGE ) {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[FM] Hold IO for storage resource type: %1!ws!\n",
|
|
Name );
|
|
|
|
// Hold IO for this resource type
|
|
dwStatus = FmpRmResourceTypeControl(
|
|
Name,
|
|
CLUSCTL_RESOURCE_TYPE_HOLD_IO,
|
|
NULL,
|
|
0,
|
|
NULL,
|
|
0,
|
|
&bytesReturned,
|
|
&bytesRequired );
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[FM] Resource DLL Hold IO returned status %1!u!\n",
|
|
dwStatus );
|
|
}
|
|
|
|
return(TRUE);
|
|
|
|
} // FmpEnumHoldIO
|
|
|
|
|
|
|
|
VOID
|
|
FmHoldIO(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine holds all I/O for all storage class resource types.
|
|
It does this by calling the resource dll with a
|
|
CLUSCTL_RESOURCE_TYPE_HOLD_IO resource type control code.
|
|
|
|
Inputs:
|
|
|
|
None
|
|
|
|
Outputs:
|
|
|
|
None
|
|
|
|
--*/
|
|
{
|
|
OmEnumObjects( ObjectTypeResType,
|
|
FmpEnumHoldIO,
|
|
NULL,
|
|
NULL );
|
|
return;
|
|
|
|
} // FmHoldIO
|
|
|
|
|
|
|
|
BOOL
|
|
FmpEnumResumeIO(
|
|
IN PVOID Context1,
|
|
IN PVOID Context2,
|
|
IN PFM_RESTYPE ResType,
|
|
IN LPCWSTR Name
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Send a RESUME_IO control code to all resource types of class STORAGE.
|
|
|
|
Arguments:
|
|
|
|
Context1 - Not used.
|
|
|
|
Context2 - Not used.
|
|
|
|
ResType - Supplies the Resource Type.
|
|
|
|
Name - Supplies the Resource Type's name.
|
|
|
|
Return Value:
|
|
|
|
TRUE - to indicate that the enumeration should continue.
|
|
FALSE - to indicate that the enumeration should not continue.
|
|
|
|
--*/
|
|
{
|
|
DWORD dwStatus;
|
|
DWORD bytesReturned;
|
|
DWORD bytesRequired;
|
|
|
|
if ( ResType->Class == CLUS_RESCLASS_STORAGE ) {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[FM] Resume IO for storage Resource Type %1!ws!\n",
|
|
Name );
|
|
|
|
// Resume IO for this resource type
|
|
dwStatus = FmpRmResourceTypeControl(
|
|
Name,
|
|
CLUSCTL_RESOURCE_TYPE_RESUME_IO,
|
|
NULL,
|
|
0,
|
|
NULL,
|
|
0,
|
|
&bytesReturned,
|
|
&bytesRequired );
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[FM] Resource DLL Resume IO returned status %1!u!\n",
|
|
dwStatus );
|
|
}
|
|
|
|
return(TRUE);
|
|
|
|
} // FmpEnumResumeIO
|
|
|
|
|
|
|
|
VOID
|
|
FmResumeIO(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine resumes all I/O for all storage class resource types.
|
|
It does this by calling the resource dll with a
|
|
CLUSCTL_RESOURCE_TYPE_RESUME_IO resource type control code.
|
|
|
|
Inputs:
|
|
|
|
None
|
|
|
|
Outputs:
|
|
|
|
None
|
|
|
|
--*/
|
|
{
|
|
|
|
OmEnumObjects( ObjectTypeResType,
|
|
FmpEnumResumeIO,
|
|
NULL,
|
|
NULL );
|
|
return;
|
|
|
|
} // FmResumeIO
|
|
|
|
|
|
|
|
BOOL
|
|
FmpEnumNodes(
|
|
OUT DWORD *pStatus,
|
|
IN PVOID Context2,
|
|
IN PNM_NODE Node,
|
|
IN LPCWSTR Name
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Node enumeration callback for FM join. Queries the state
|
|
of owned groups and resources for each online node.
|
|
|
|
Arguments:
|
|
|
|
pStatus - Returns any error that may occur.
|
|
|
|
Context2 - Not used
|
|
|
|
Node - Supplies the node.
|
|
|
|
Name - Supplies the node's name.
|
|
|
|
Return Value:
|
|
|
|
TRUE - to indicate that the enumeration should continue.
|
|
FALSE - to indicate that the enumeration should not continue.
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD Status;
|
|
DWORD NodeId;
|
|
PGROUP_ENUM NodeGroups = NULL;
|
|
PRESOURCE_ENUM NodeResources = NULL;
|
|
DWORD i;
|
|
PFM_GROUP Group;
|
|
PFM_RESOURCE Resource;
|
|
|
|
if (Node == NmLocalNode) {
|
|
CL_ASSERT(NmGetNodeState(Node) != ClusterNodeUp);
|
|
return(TRUE);
|
|
}
|
|
|
|
//
|
|
// Enumerate all other node's group states. This includes all nodes
|
|
// that are up, as well as nodes that are paused.
|
|
//
|
|
if ((NmGetNodeState(Node) == ClusterNodeUp) ||
|
|
(NmGetNodeState(Node) == ClusterNodePaused)){
|
|
NodeId = NmGetNodeId(Node);
|
|
CL_ASSERT(Session[NodeId] != NULL);
|
|
|
|
Status = FmsQueryOwnedGroups(Session[NodeId],
|
|
&NodeGroups,
|
|
&NodeResources);
|
|
if (Status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[FM] FmsQueryOwnedGroups to node %1!ws! failed %2!d!\n",
|
|
OmObjectId(Node),
|
|
Status);
|
|
*pStatus = Status;
|
|
return(FALSE);
|
|
}
|
|
|
|
//
|
|
// Enumerate the groups and set their owner and state.
|
|
//
|
|
for (i=0; i < NodeGroups->EntryCount; i++) {
|
|
Group = OmReferenceObjectById(ObjectTypeGroup,
|
|
NodeGroups->Entry[i].Id);
|
|
if (Group == NULL) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[FM] FmpEnumNodes: group %1!ws! not found\n",
|
|
NodeGroups->Entry[i].Id);
|
|
} else {
|
|
if ( FmpInPreferredList( Group, Node, FALSE, NULL ) ) {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[FM] Setting group %1!ws! owner to node %2!ws!, state %3!d!\n",
|
|
OmObjectId(Group),
|
|
OmObjectId(Node),
|
|
NodeGroups->Entry[i].State);
|
|
} else {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[FM] Init, Node %1!ws! is not in group %2!ws!.\n",
|
|
OmObjectId(Node),
|
|
OmObjectId(Group));
|
|
}
|
|
OmReferenceObject( Node );
|
|
Group->OwnerNode = Node;
|
|
Group->State = NodeGroups->Entry[i].State;
|
|
Group->StateSequence = NodeGroups->Entry[i].StateSequence;
|
|
OmDereferenceObject(Group);
|
|
}
|
|
|
|
MIDL_user_free(NodeGroups->Entry[i].Id);
|
|
}
|
|
MIDL_user_free(NodeGroups);
|
|
|
|
//
|
|
// Enumerate the resources and set their current state.
|
|
//
|
|
for (i=0; i < NodeResources->EntryCount; i++) {
|
|
Resource = OmReferenceObjectById(ObjectTypeResource,
|
|
NodeResources->Entry[i].Id);
|
|
if (Resource == NULL) {
|
|
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[FM] FmpEnumNodes: resource %1!ws! not found\n",
|
|
NodeResources->Entry[i].Id);
|
|
} else {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[FM] Setting resource %1!ws! state to %2!d!\n",
|
|
OmObjectId(Resource),
|
|
NodeResources->Entry[i].State);
|
|
Resource->State = NodeResources->Entry[i].State;
|
|
Resource->StateSequence = NodeResources->Entry[i].StateSequence;
|
|
OmDereferenceObject(Resource);
|
|
}
|
|
MIDL_user_free(NodeResources->Entry[i].Id);
|
|
}
|
|
MIDL_user_free(NodeResources);
|
|
|
|
}
|
|
|
|
return(TRUE);
|
|
|
|
} // FmpEnumNodes
|
|
|
|
|
|
|
|
VOID
|
|
WINAPI
|
|
FmShutdown(
|
|
VOID
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Shuts down the Failover Manager
|
|
|
|
Arguments:
|
|
|
|
None
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD i;
|
|
|
|
if ( !FmpInitialized ) {
|
|
return;
|
|
}
|
|
|
|
FmpInitialized = FALSE;
|
|
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[FM] Shutdown: Failover Manager requested to shutdown.\n");
|
|
|
|
//
|
|
// For now, we really can't delete these critical sections. There is a
|
|
// race condition where the FM is shutting down and someone is walking
|
|
// the lists. Keep this critical sections around... just in case.
|
|
//
|
|
//DeleteCriticalSection( &FmpResourceLock );
|
|
//DeleteCriticalSection( &FmpGroupLock );
|
|
//DeleteCriticalSection( &FmpMonitorLock );
|
|
|
|
if ( FmpDefaultMonitor != NULL ) {
|
|
FmpShutdownMonitor(FmpDefaultMonitor);
|
|
FmpDefaultMonitor = NULL;
|
|
}
|
|
|
|
CloseHandle( FmpShutdownEvent );
|
|
|
|
#if 0 // RNG - don't run the risk of other threads using these handles
|
|
for ( i = ClusterMinNodeId; i <= NmMaxNodeId; i++ ) {
|
|
if ( FmpRpcBindings[i] != NULL ) {
|
|
ClMsgDeleteRpcBinding( FmpRpcBindings[i] );
|
|
FmpRpcBindings[i] = NULL;
|
|
}
|
|
if ( FmpRpcQuorumBindings[i] != NULL ) {
|
|
ClMsgDeleteRpcBinding( FmpRpcQuorumBindings[i] );
|
|
FmpRpcQuorumBindings[i] = NULL;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
ClRtlDeleteQueue( &FmpWorkQueue );
|
|
|
|
return;
|
|
|
|
} // FmShutdown
|
|
|
|
|
|
VOID
|
|
WINAPI
|
|
FmShutdownGroups(
|
|
VOID
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Moves or takes offline all groups owned by this node.
|
|
|
|
Arguments:
|
|
|
|
None
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
|
|
{
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[FM] Shutdown: Failover Manager requested to shutdown groups.\n");
|
|
|
|
//if we didnt initialize, we dont have to do anything
|
|
if (!FmpInitialized)
|
|
return;
|
|
//
|
|
// Use the Group Lock to synchronize the shutdown
|
|
//
|
|
FmpAcquireGroupLock();
|
|
|
|
//if shutdown is already in progress, return
|
|
if ( FmpShutdown) {
|
|
FmpReleaseGroupLock();
|
|
return;
|
|
}
|
|
|
|
|
|
FmpShutdown = TRUE;
|
|
FmpFMOnline = FALSE;
|
|
|
|
FmpReleaseGroupLock();
|
|
|
|
//
|
|
// Now cleanup all Groups/Resources.
|
|
//
|
|
FmpCleanupGroups(TRUE);
|
|
|
|
|
|
return;
|
|
|
|
} // FmShutdownGroups
|
|
|
|
|
|
|
|
/****
|
|
@func DWORD | FmBringQuorumOnline| This routine finds the quorum resource and
|
|
brings it online.
|
|
|
|
@comm This is called by the FmFormClusterPhase 1.
|
|
@xref
|
|
****/
|
|
DWORD FmBringQuorumOnline()
|
|
{
|
|
PFM_RESOURCE pQuoResource;
|
|
DWORD dwError=ERROR_SUCCESS;
|
|
|
|
//
|
|
// Synchronize with shutdown.
|
|
//
|
|
FmpAcquireGroupLock();
|
|
if ( FmpShutdown ) {
|
|
FmpReleaseGroupLock();
|
|
return(ERROR_SUCCESS);
|
|
}
|
|
|
|
if ((dwError = FmFindQuorumResource(&pQuoResource)) != ERROR_SUCCESS)
|
|
{
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[Fm] FmpBringQuorumOnline : failed to find resource 0x%1!08lx!\n",
|
|
dwError);
|
|
goto FnExit;
|
|
}
|
|
|
|
//mark yourself as owner
|
|
if ( pQuoResource->Group->OwnerNode != NULL )
|
|
{
|
|
OmDereferenceObject( pQuoResource->Group->OwnerNode );
|
|
}
|
|
|
|
OmReferenceObject( NmLocalNode );
|
|
pQuoResource->Group->OwnerNode = NmLocalNode;
|
|
|
|
//prepare the group for onlining it
|
|
FmpPrepareGroupForOnline(pQuoResource->Group);
|
|
dwError = FmpOnlineResource(pQuoResource, TRUE);
|
|
//SS:decrement the ref count on the quorum resource object
|
|
//provided by fmfindquorumresource
|
|
OmDereferenceObject(pQuoResource);
|
|
|
|
FnExit:
|
|
FmpReleaseGroupLock();
|
|
return(dwError);
|
|
|
|
}
|
|
|
|
/****
|
|
@func DWORD | FmpGetQuorumDiskSignature | Get the signature of
|
|
the quorum disk from the cluster hive.
|
|
|
|
@parm IN LPWSTR | lpQuorumId | Identifier of the quorum resource.
|
|
|
|
@parm OUT LPDWORD | lpdwSignature | Quorum disk signature.
|
|
|
|
@rdesc Returns a Win32 error code on failure. ERROR_SUCCESS on success.
|
|
|
|
@comm This function attempts to open the Resources\lpQuorumId\Parameters
|
|
key under the cluster hive and read the quorum disk signature.
|
|
|
|
@xref <f FmGetQuorumResource>
|
|
****/
|
|
DWORD
|
|
FmpGetQuorumDiskSignature(
|
|
IN LPCWSTR lpQuorumId,
|
|
OUT LPDWORD lpdwSignature
|
|
)
|
|
{
|
|
HDMKEY hQuorumResKey = NULL;
|
|
HDMKEY hQuorumResParametersKey = NULL;
|
|
DWORD dwStatus = ERROR_SUCCESS;
|
|
|
|
//
|
|
// Chittur Subbaraman (chitturs) - 10/30/98
|
|
//
|
|
hQuorumResKey = DmOpenKey( DmResourcesKey,
|
|
lpQuorumId,
|
|
KEY_READ );
|
|
if ( hQuorumResKey != NULL )
|
|
{
|
|
//
|
|
// Open up the Parameters key
|
|
//
|
|
hQuorumResParametersKey = DmOpenKey( hQuorumResKey,
|
|
CLUSREG_KEYNAME_PARAMETERS,
|
|
KEY_READ );
|
|
DmCloseKey( hQuorumResKey );
|
|
if ( hQuorumResParametersKey != NULL )
|
|
{
|
|
//
|
|
// Read the disk signature value
|
|
//
|
|
dwStatus = DmQueryDword( hQuorumResParametersKey,
|
|
CLUSREG_NAME_PHYSDISK_SIGNATURE,
|
|
lpdwSignature,
|
|
NULL );
|
|
DmCloseKey( hQuorumResParametersKey );
|
|
} else
|
|
{
|
|
dwStatus = GetLastError();
|
|
}
|
|
} else
|
|
{
|
|
dwStatus = GetLastError();
|
|
}
|
|
|
|
//
|
|
// If you failed, then reset the signature to 0 so that the
|
|
// caller won't take any actions based on an invalid signature.
|
|
//
|
|
if ( dwStatus != ERROR_SUCCESS )
|
|
{
|
|
*lpdwSignature = 0;
|
|
}
|
|
|
|
return( dwStatus );
|
|
}
|
|
|
|
|
|
DWORD FmpGetJoinApproval()
|
|
{
|
|
DWORD dwStatus;
|
|
LPCWSTR pszNodeId;
|
|
DWORD dwNodeLen;
|
|
|
|
|
|
pszNodeId = OmObjectId(NmLocalNode);
|
|
dwNodeLen = (lstrlenW(pszNodeId)+1)*sizeof(WCHAR);
|
|
|
|
dwStatus = GumSendUpdateEx(
|
|
GumUpdateFailoverManager,
|
|
FmUpdateApproveJoin,
|
|
1,
|
|
dwNodeLen,
|
|
pszNodeId);
|
|
|
|
return(dwStatus);
|
|
|
|
}
|
|
|
|
/****
|
|
@func DWORD | FmpBuildForceQuorumInfo | Build the force quorum info that
|
|
will be passed to the resource DLL via a control code. This
|
|
involves enumerating nodes and checking that the nodes that make up
|
|
the list passed on the command line are all valid cluster nodes.
|
|
|
|
@parm IN LPCWSTR | pszNodesIn | Comma separated list of node names. If
|
|
this is NULL then the routine just fills the quorum info structure
|
|
with 0 and a NULL node list.
|
|
|
|
@parm OUT PCLUS_FORCE_QUORUM_INFO | pForceQuorumInfo | Structure that gets
|
|
filled in with info
|
|
|
|
@rdesc Returns a Win32 error code on failure. ERROR_SUCCESS on success.
|
|
|
|
@comm Assumes NmInitialize was called prior to calling this routine.
|
|
|
|
@xref <f FmpBuildForceQuorumInfo>
|
|
****/
|
|
static
|
|
DWORD
|
|
FmpBuildForceQuorumInfo(
|
|
IN LPCWSTR pszNodesIn,
|
|
OUT PCLUS_FORCE_QUORUM_INFO* ppForceQuorumInfo
|
|
)
|
|
{
|
|
WCHAR *pszOut = NULL;
|
|
WCHAR *pszComma = NULL;
|
|
DWORD status = ERROR_SUCCESS;
|
|
PNM_NODE_ENUM2 pNodeEnum = NULL;
|
|
int iCurrLen = 0, iOffset = 0;
|
|
DWORD dwNodeIndex;
|
|
DWORD dwSize;
|
|
PCLUS_FORCE_QUORUM_INFO pForceQuorumInfo = NULL;
|
|
|
|
// Need to allocate a structure that can hold the nodes list.
|
|
//
|
|
dwSize = sizeof( CLUS_FORCE_QUORUM_INFO ) + sizeof( WCHAR ) * (wcslen( pszNodesIn ) + 1);
|
|
pForceQuorumInfo = LocalAlloc( LMEM_FIXED, dwSize );
|
|
if ( pForceQuorumInfo == NULL ) {
|
|
status = ERROR_NOT_ENOUGH_MEMORY;
|
|
goto ErrorExit;
|
|
}
|
|
ZeroMemory( pForceQuorumInfo, dwSize );
|
|
|
|
pForceQuorumInfo->dwSize = dwSize;
|
|
pForceQuorumInfo->dwNodeBitMask = 0;
|
|
pForceQuorumInfo->dwMaxNumberofNodes = 0;
|
|
|
|
if ( pszNodesIn == NULL ) {
|
|
pForceQuorumInfo->multiszNodeList[0] = L'\0';
|
|
goto ret;
|
|
}
|
|
|
|
ClRtlLogPrint( LOG_NOISE, "[Fm] FmpBuildForceQuorumInfo: pszNodesIn is %1!ws!\n",
|
|
pszNodesIn );
|
|
|
|
// Now get the enumeration of all cluster nodes so we can check we have
|
|
// valid nodes in the list.
|
|
//
|
|
status = NmpEnumNodeDefinitions( &pNodeEnum );
|
|
if ( status != ERROR_SUCCESS )
|
|
goto ErrorExit;
|
|
|
|
// Go through all the nodes we have and ensure that they are cluster nodes.
|
|
// Get the corresponding ID and incorporate in the bitmask
|
|
//
|
|
do {
|
|
pszComma = wcschr( pszNodesIn, (int) L',');
|
|
if ( pszComma == NULL )
|
|
iCurrLen = wcslen( pszNodesIn );
|
|
else
|
|
iCurrLen = (int) (pszComma - pszNodesIn);
|
|
|
|
// At this point pszNodesIn is the start of a node name, iCurrLen chars long
|
|
// or iCurrLen is 0 in which case we have ,, in the input stream.
|
|
//
|
|
if (iCurrLen > 0) {
|
|
|
|
// Work out if this node is part of the cluster and if so get its
|
|
// ID and setup the bitmask.
|
|
//
|
|
for ( dwNodeIndex = 0; dwNodeIndex < pNodeEnum->NodeCount; dwNodeIndex++ ) {
|
|
int iNodeNameLen = wcslen( pNodeEnum->NodeList[ dwNodeIndex ].NodeName );
|
|
ClRtlLogPrint( LOG_NOISE, "[Fm] FmpBuildForceQuorumInfo: trying %1!ws!\n",
|
|
pNodeEnum->NodeList[ dwNodeIndex ].NodeName );
|
|
|
|
if ( ClRtlStrNICmp( pNodeEnum->NodeList[ dwNodeIndex ].NodeName,
|
|
pszNodesIn,
|
|
max(iCurrLen, iNodeNameLen) ) == 0 ) {
|
|
// Using wcstoul here to get the nodeId rather than using
|
|
PWSTR ignore;
|
|
DWORD nodeId = wcstoul( pNodeEnum->NodeList[ dwNodeIndex ].NodeId, &ignore, 10 );
|
|
|
|
ClRtlLogPrint( LOG_NOISE, "[Fm] FmpBuildForceQuorumInfo: got match %1!ws!\n",
|
|
pNodeEnum->NodeList[ dwNodeIndex ].NodeName );
|
|
|
|
// Set the mask and max nodes and break - ignore duplicates.
|
|
//
|
|
if ( !( pForceQuorumInfo->dwNodeBitMask & ( 1 << nodeId )) ) {
|
|
pForceQuorumInfo->dwMaxNumberofNodes += 1;
|
|
pForceQuorumInfo->dwNodeBitMask |= ( 1 << nodeId );
|
|
wcscpy( &pForceQuorumInfo->multiszNodeList[iOffset], pNodeEnum->NodeList[ dwNodeIndex ].NodeName );
|
|
iOffset += wcslen( pNodeEnum->NodeList[ dwNodeIndex ].NodeName ) + 1;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
if ( dwNodeIndex == pNodeEnum->NodeCount ) {
|
|
ClRtlLogPrint( LOG_UNUSUAL, "[Fm] FmpBuildForceQuorumInfo: no match for %1!ws!\n", pszNodesIn );
|
|
status = ERROR_INVALID_PARAMETER;
|
|
goto ErrorExit;
|
|
}
|
|
} else if ( pszComma != NULL ) {
|
|
ClRtlLogPrint( LOG_UNUSUAL,
|
|
"[Fm] FmpBuildForceQuorumInfo: iCurrLen was 0 so ,, was in node list: %1!ws!\n",
|
|
CsForceQuorumNodes );
|
|
status = ERROR_INVALID_PARAMETER;
|
|
goto ErrorExit;
|
|
}
|
|
pszNodesIn = pszComma + 1;
|
|
} while ( pszComma != NULL);
|
|
pForceQuorumInfo->multiszNodeList[ iOffset ] = L'\0';
|
|
goto ret;
|
|
|
|
ErrorExit:
|
|
if ( pForceQuorumInfo != NULL ) {
|
|
LocalFree( pForceQuorumInfo );
|
|
pForceQuorumInfo = NULL;
|
|
}
|
|
ret:
|
|
if ( pNodeEnum != NULL ) {
|
|
ClNetFreeNodeEnum( pNodeEnum );
|
|
}
|
|
|
|
if ( status == ERROR_SUCCESS ) {
|
|
*ppForceQuorumInfo = pForceQuorumInfo;
|
|
|
|
ClRtlLogPrint( LOG_NOISE,
|
|
"[Fm] FmpBuildForceQuorumInfo: success; mask is 0x%1!08x!\n",
|
|
pForceQuorumInfo->dwNodeBitMask );
|
|
}
|
|
return status;
|
|
}
|
|
|
|
static
|
|
void
|
|
FmpDeleteForceQuorumInfo(
|
|
IN OUT PCLUS_FORCE_QUORUM_INFO* ppForceQuorumInfo
|
|
)
|
|
{
|
|
(void) LocalFree( *ppForceQuorumInfo );
|
|
*ppForceQuorumInfo = NULL;
|
|
}
|