Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

695 lines
16 KiB

/*++
Copyright (c) 1996-1999 Microsoft Corporation
Module Name:
member.c
Abstract:
Cluster membership management routines for the Node Manager.
Author:
Mike Massa (mikemas) 12-Mar-1996
Revision History:
--*/
#include "nmp.h"
#include <clusrtl.h>
//
// Data
//
BOOLEAN NmpMembershipCleanupOk = FALSE;
BITSET NmpUpNodeSet = 0;
LIST_ENTRY NmpLeaderChangeWaitList = {NULL, NULL};
//
// Routines
//
VOID
NmpMarkNodeUp(
CL_NODE_ID NodeId
)
/*++
Notes:
Called with the NmpLock held.
--*/
{
BitsetAdd(NmpUpNodeSet, NodeId);
return;
}
VOID
NmpNodeUpEventHandler(
IN PNM_NODE Node
)
/*++
Notes:
Called with the NmpLock held.
--*/
{
NmpMarkNodeUp(Node->NodeId);
//
// Don't declare the local node to be up. The join code will
// take care of this.
//
if ((Node != NmLocalNode) && (Node->State == ClusterNodeJoining)) {
ClRtlLogPrint(LOG_UNUSUAL,
"[NMJOIN] Joining node %1!u! is now participating in the cluster membership.\n",
Node->NodeId
);
CL_ASSERT(NmpJoinerNodeId == Node->NodeId);
CL_ASSERT(Node->State == ClusterNodeJoining);
CL_ASSERT(NmpJoinTimer == 0);
CL_ASSERT(NmpJoinAbortPending == FALSE);
CL_ASSERT(NmpJoinerUp == FALSE);
NmpJoinerUp = TRUE;
}
return;
} // NmpNodeUpEventHandler
VOID
NmpNodeDownEventHandler(
IN PNM_NODE Node
)
{
NmpMultiNodeDownEventHandler( BitsetFromUnit(Node->NodeId) );
}
DWORD
NmpMultiNodeDownEventHandler(
IN BITSET DownedNodeSet
)
{
CL_NODE_ID i;
PNM_NODE node;
DWORD status;
BOOLEAN iAmNewLeader = FALSE;
PNM_LEADER_CHANGE_WAIT_ENTRY waitEntry;
PLIST_ENTRY listEntry;
ClRtlLogPrint(LOG_NOISE, "[NM] Down node set: %1!04X!.\n", DownedNodeSet);
NmpAcquireLock();
//
// Compute the new up node set
//
BitsetSubtract(NmpUpNodeSet, DownedNodeSet);
ClRtlLogPrint(LOG_NOISE, "[NM] New up node set: %1!04X!.\n", NmpUpNodeSet);
//
// Check for failure of a joining node.
//
if (NmpJoinerNodeId != ClusterInvalidNodeId) {
if (NmpJoinerNodeId == NmLocalNodeId) {
//
// The joining node is the local node. Halt.
//
ClRtlLogPrint(LOG_NOISE,
"[NMJOIN] Aborting join because of change in membership.\n"
);
CsInconsistencyHalt(ERROR_CLUSTER_JOIN_ABORTED);
}
else if ( (BitsetIsMember(NmpJoinerNodeId, DownedNodeSet))
||
( (BitsetIsMember(NmpSponsorNodeId, DownedNodeSet)) &&
(!BitsetIsMember(NmpJoinerNodeId, DownedNodeSet))
)
)
{
//
// The joining node is down or the sponsor is down and the joiner
// is not yet an active member. Cleanup the join state. If the
// sponsor is down and the joiner is an active member, we will
// clean up when we detect that the joiner has perished.
//
ClRtlLogPrint(LOG_NOISE,
"[NMJOIN] Aborting join of node %1!u! sponsored by node %2!u!\n",
NmpJoinerNodeId,
NmpSponsorNodeId
);
//
// Reset joiner state if sponsor died
//
if (BitsetIsMember(NmpSponsorNodeId, DownedNodeSet)) {
node = NmpIdArray[NmpJoinerNodeId];
node->State = ClusterNodeDown;
// [GorN 4/3/2000]
// Without a node down, cluadmin won't refresh the state.
// If this code is to be changed to emit CLUSTER_NODE_CHANGE_EVENT or
// some other event, NmpUpdateJoinAbort has to be changed as well,
// so that we will have the same join cleanup behavior
BitsetAdd(DownedNodeSet, NmpJoinerNodeId);
}
NmpJoinerNodeId = ClusterInvalidNodeId;
NmpSponsorNodeId = ClusterInvalidNodeId;
NmpJoinTimer = 0;
NmpJoinAbortPending = FALSE;
NmpJoinSequence = 0;
NmpJoinerUp = FALSE;
NmpJoinerOutOfSynch = FALSE;
}
else {
//
// Mark that the joiner is out of synch with the cluster
// state. The sponsor will eventually abort the join.
//
ClRtlLogPrint(LOG_NOISE,
"[NMJOIN] Joiner node %1!u! is now out of synch with the cluster state.\n",
NmpJoinerNodeId
);
NmpJoinerOutOfSynch = TRUE;
}
}
//
// Check if the leader node went down
//
if (BitsetIsMember(NmpLeaderNodeId, DownedNodeSet)) {
BOOL isEventSet;
//
// Elect a new leader - active node with the smallest ID.
//
for (i = ClusterMinNodeId; i <= NmMaxNodeId; i++) {
if (BitsetIsMember(i, NmpUpNodeSet)) {
NmpLeaderNodeId = i;
break;
}
}
CL_ASSERT(i <= NmMaxNodeId);
if (NmpLeaderNodeId == NmLocalNodeId) {
//
// The local node is the new leader.
//
ClRtlLogPrint(LOG_NOISE,
"[NM] This node is the new leader.\n"
);
iAmNewLeader = TRUE;
}
else {
ClRtlLogPrint(LOG_NOISE,
"[NM] Node %1!u! is the new leader.\n",
NmpLeaderNodeId
);
}
//
// Wake up any threads waiting for an RPC call to the leader to
// complete.
//
while (!IsListEmpty(&NmpLeaderChangeWaitList)) {
listEntry = RemoveHeadList(&NmpLeaderChangeWaitList);
//
// NULL out the entry's links to indicate that it has been
// dequeued. The users of the notification feature depend
// on this action.
//
listEntry->Flink = NULL; listEntry->Blink = NULL;
//
// Wake up the waiting thread.
//
waitEntry = (PNM_LEADER_CHANGE_WAIT_ENTRY) listEntry;
isEventSet = SetEvent(waitEntry->LeaderChangeEvent);
CL_ASSERT(isEventSet != 0);
}
}
//
// First recovery pass - clean up node states and disable communication
//
for (i = ClusterMinNodeId; i <= NmMaxNodeId; i++) {
node = NmpIdArray[i];
if ( (node != NULL) && (BitsetIsMember(i, DownedNodeSet)) ) {
node->State = ClusterNodeDown;
status = ClusnetOfflineNodeComm(
NmClusnetHandle,
node->NodeId
);
CL_ASSERT(
(status == ERROR_SUCCESS) ||
(status == ERROR_CLUSTER_NODE_ALREADY_DOWN)
);
}
}
//
// Inform the rest of the service that these nodes are gone
//
ClusterEventEx(
CLUSTER_EVENT_NODE_DOWN_EX,
EP_CONTEXT_VALID,
ULongToPtr(DownedNodeSet)
);
//
// Second recovery pass - clean up network states and issue old-style
// node down events
//
for (i = ClusterMinNodeId; i <= NmMaxNodeId; i++) {
node = NmpIdArray[i];
if ( (node != NULL) && (BitsetIsMember(i, DownedNodeSet)) ) {
//
// Issue an individual node down event.
//
ClusterEvent(CLUSTER_EVENT_NODE_DOWN, node);
//
// Now do Intracluster RPC cleanup...
//
NmpTerminateRpcsToNode(node->NodeId);
//
// Update the network and interface information.
//
NmpUpdateNetworkConnectivityForDownNode(node);
//
// Log an event
//
if (NmpLeaderNodeId == NmLocalNodeId) {
LPCWSTR nodeName = OmObjectName(node);
CsLogEvent1(
LOG_UNUSUAL,
NM_EVENT_NODE_DOWN,
nodeName
);
}
}
}
//
// If this node is the new leader, schedule a state computation for all
// networks. State reports may have been received before this node
// assumed leadership duties.
//
if (iAmNewLeader) {
NmpRecomputeNT5NetworkAndInterfaceStates();
}
NmpReleaseLock();
return(ERROR_SUCCESS);
} // NmpNodesDownEventHandler //
DWORD
NmpNodeChange(
IN DWORD NodeId,
IN NODESTATUS NewStatus
)
{
PNM_NODE node;
CL_ASSERT(
(NodeId >= ClusterMinNodeId) &&
(NodeId <= NmMaxNodeId)
);
NmpAcquireLock();
node = NmpIdArray[NodeId];
CL_ASSERT(node != NULL);
if (node != NULL) {
if (NewStatus == NODE_DOWN) {
NmpNodeDownEventHandler(node);
}
else {
CL_ASSERT(NewStatus == NODE_UP);
NmpNodeUpEventHandler(node);
}
}
NmpReleaseLock();
return(ERROR_SUCCESS);
} // NmpNodeChange
VOID
NmpHoldIoEventHandler(
VOID
)
{
ClRtlLogPrint(LOG_NOISE,
"[NM] Holding I/O.\n"
);
#if defined(HOLD_IO_IS_SAFE_NOW)
FmHoldIO();
#endif
return;
}
VOID
NmpResumeIoEventHandler(
VOID
)
{
ClRtlLogPrint(LOG_NOISE,
"[NM] Resuming I/O.\n"
);
#if defined(HOLD_IO_IS_SAFE_NOW)
FmResumeIO();
#endif
return;
}
BOOL
NmpCheckQuorumEventHandler(
VOID
)
{
BOOL haveQuorum;
//
// daviddio 06/19/2000
//
// Before asking FM to arbitrate, determine if we have any
// viable network interfaces. If not, return failure to MM
// and allow other cluster nodes to arbitrate. The SCM
// will restart the cluster service, so that if no nodes
// successfully arbitrate, we will get another shot.
//
if (NmpCheckForNetwork()) {
ClRtlLogPrint(LOG_NOISE,
"[NM] Checking if we own the quorum resource.\n"
);
haveQuorum = FmArbitrateQuorumResource();
if (haveQuorum) {
ClRtlLogPrint(LOG_NOISE,
"[NM] We own the quorum resource.\n"
);
}
else {
ClRtlLogPrint(LOG_NOISE,
"[NM] We do not own the quorum resource, status %1!u!.\n",
GetLastError()
);
//[GN] ClusnetHalt( NmClusnetHandle ); => NmpHaltEventHandler
//
}
} else {
ClRtlLogPrint(LOG_CRITICAL,
"[NM] Abdicating quorum because no valid network "
"interfaces were detected.\n"
);
haveQuorum = FALSE;
}
return(haveQuorum);
} // NmpCheckQuorumEventHandler
void
NmpMsgCleanup1(
IN DWORD DeadNodeId
)
{
ClRtlLogPrint(LOG_NOISE,
"[NM] Phase 1 message cleanup - node %1!u!.\n",
DeadNodeId
);
return;
}
void
NmpMsgCleanup2(
IN BITSET DownedNodeSet
)
{
ClRtlLogPrint(LOG_NOISE,
"[NM] Phase 2 message cleanup - node %1!04X!.\n",
DownedNodeSet
);
NmpAcquireLock();
if ( NmpCleanupIfJoinAborted &&
(NmpJoinerNodeId != ClusterInvalidNodeId) &&
BitsetIsMember(NmpJoinerNodeId, DownedNodeSet) )
{
//
// Since the joiner is in the DownedNodeSet mask
// the node down will be delivered on this node by a regroup engine.
// No need for NmpUpdateAbortJoin to issue a node down.
//
NmpCleanupIfJoinAborted = FALSE;
ClRtlLogPrint(LOG_NOISE,
"[NM] NmpCleanupIfJoinAborted is set to false. Joiner - %1!u!.\n",
NmpJoinerNodeId
);
}
NmpReleaseLock();
//
// Inform the rest of the service that these nodes are gone
//
ClusterSyncEventEx(
CLUSTER_EVENT_NODE_DOWN_EX,
EP_CONTEXT_VALID,
ULongToPtr(DownedNodeSet)
);
return;
}
VOID
NmpHaltEventHandler(
IN DWORD HaltCode
)
{
WCHAR string[16];
// Do a graceful stop if we are shutting down //
if (HaltCode == MM_STOP_REQUESTED) {
DWORD Status = ERROR_SUCCESS;
ClRtlLogPrint(LOG_UNUSUAL,
"[NM] Prompt shutdown is requested by a membership engine\n"
);
ClusnetHalt( NmClusnetHandle );
CsLogEvent(LOG_NOISE, SERVICE_SUCCESSFUL_TERMINATION);
CsServiceStatus.dwCurrentState = SERVICE_STOPPED;
CsServiceStatus.dwControlsAccepted = 0;
CsServiceStatus.dwCheckPoint = 0;
CsServiceStatus.dwWaitHint = 0;
CsServiceStatus.dwWin32ExitCode = Status;
CsServiceStatus.dwServiceSpecificExitCode = Status;
CsAnnounceServiceStatus();
ExitProcess(Status);
} else {
wsprintfW(&(string[0]), L"%u", HaltCode);
ClRtlLogPrint(LOG_CRITICAL,
"[NM] Halting this node due to membership or communications error. Halt code = %1!u!\n",
HaltCode
);
ClusnetHalt( NmClusnetHandle );
//
// Adjust membership code to win32 error code. (If mapping exits)
//
HaltCode = MMMapHaltCodeToDosError( HaltCode );
CsInconsistencyHalt(HaltCode);
}
}
void
NmpJoinFailed(
void
)
{
return;
}
DWORD
NmpGumUpdateHandler(
IN DWORD Context,
IN BOOL SourceNode,
IN DWORD BufferLength,
IN PVOID Buffer
)
/*++
Routine Description:
Handles GUM updates for membership events.
Arguments:
Context - Supplies the update context. This is the message type
SourceNode - Supplies whether or not the update originated on this node.
BufferLength - Supplies the length of the update.
Buffer - Supplies a pointer to the buffer.
Return Value:
ERROR_SUCCESS if successful
Win32 error code otherwise
--*/
{
DWORD status;
if (Context == NmUpdateJoinComplete) {
status = NmpUpdateJoinComplete(Buffer);
}
else {
status = ERROR_SUCCESS;
ClRtlLogPrint(LOG_UNUSUAL,
"[NM] Discarding unknown gum request %1!u!\n",
Context
);
}
return(status);
} // NmpUpdateGumHandler
DWORD
NmpMembershipInit(
VOID
)
{
DWORD status;
ClRtlLogPrint(LOG_NOISE,"[NM] Initializing membership...\n");
InitializeListHead(&NmpLeaderChangeWaitList);
//
// Initialize membership engine.
//
status = MMInit(
NmLocalNodeId,
NmMaxNodes,
NmpNodeChange,
NmpCheckQuorumEventHandler,
NmpHoldIoEventHandler,
NmpResumeIoEventHandler,
NmpMsgCleanup1,
NmpMsgCleanup2,
NmpHaltEventHandler,
NmpJoinFailed,
NmpMultiNodeDownEventHandler
);
if (status != MM_OK) {
status = MMMapStatusToDosError(status);
ClRtlLogPrint(LOG_CRITICAL,
"[NM] Membership initialization failed, status %1!u!.\n",
status
);
return(status);
}
NmpMembershipCleanupOk = TRUE;
ClRtlLogPrint(LOG_NOISE,"[NM] Membership initialization complete.\n");
return(ERROR_SUCCESS);
} // NmpMembershipInit
VOID
NmpMembershipShutdown(
VOID
)
{
if (NmpMembershipCleanupOk) {
ClRtlLogPrint(LOG_NOISE,"[NM] Shutting down membership...\n");
MMShutdown();
NmpMembershipCleanupOk = FALSE;
ClRtlLogPrint(LOG_NOISE,"[NM] Membership shutdown complete.\n");
}
return;
} // NmpMembershipShutdown