mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
5259 lines
143 KiB
5259 lines
143 KiB
/*++
|
|
|
|
Copyright (c) 1996-1999 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
nminit.c
|
|
|
|
Abstract:
|
|
|
|
Initialization, cluster join, and cluster form routines for the
|
|
Node Manager.
|
|
|
|
Author:
|
|
|
|
Mike Massa (mikemas)
|
|
|
|
Revision History:
|
|
|
|
6/03/96 Created.
|
|
|
|
--*/
|
|
|
|
/*
|
|
|
|
General Implementation Notes:
|
|
|
|
The functions DmBeginLocalUpdate, DmCommitLocalUpdate, and
|
|
DmAbortLocalUpdate cannot be called while holding the NM lock, or
|
|
a deadlock with the NmTimer thread may result during regroup when
|
|
disk writes are stalled. These functions attempt to write to the
|
|
quorum disk.
|
|
|
|
*/
|
|
|
|
|
|
#include "nmp.h"
|
|
|
|
|
|
//
|
|
// External Data
|
|
//
|
|
extern BOOL CsNoQuorum;
|
|
|
|
//
|
|
// Public Data
|
|
//
|
|
HANDLE NmClusnetHandle = NULL;
|
|
|
|
//
|
|
// Private Data
|
|
//
|
|
CRITICAL_SECTION NmpLock;
|
|
NM_STATE NmpState = NmStateOffline;
|
|
DWORD NmpActiveThreadCount = 0;
|
|
HANDLE NmpShutdownEvent = NULL;
|
|
CL_NODE_ID NmpJoinerNodeId = ClusterInvalidNodeId;
|
|
CL_NODE_ID NmpSponsorNodeId = ClusterInvalidNodeId;
|
|
DWORD NmpJoinTimer = 0;
|
|
BOOLEAN NmpJoinAbortPending = FALSE;
|
|
DWORD NmpJoinSequence = 0;
|
|
BOOLEAN NmpJoinerUp = FALSE;
|
|
BOOLEAN NmpJoinBeginInProgress = FALSE;
|
|
BOOLEAN NmpJoinerOutOfSynch = FALSE;
|
|
LPWSTR NmpClusnetEndpoint = NULL;
|
|
WCHAR NmpInvalidJoinerIdString[] = L"0";
|
|
CL_NODE_ID NmpLeaderNodeId = ClusterInvalidNodeId;
|
|
BOOL NmpCleanupIfJoinAborted = FALSE;
|
|
BOOL NmpSuccessfulMMJoin = FALSE;
|
|
DWORD NmpAddNodeId = ClusterInvalidNodeId;
|
|
LPWSTR NmpClusterInstanceId = NULL;
|
|
|
|
//externs
|
|
|
|
extern DWORD CsMyHighestVersion;
|
|
extern DWORD CsMyLowestVersion;
|
|
extern DWORD CsClusterHighestVersion;
|
|
extern DWORD CsClusterLowestVersion;
|
|
|
|
GUM_DISPATCH_ENTRY NmGumDispatchTable[] = {
|
|
{1, NmpUpdateCreateNode},
|
|
{1, NmpUpdatePauseNode},
|
|
{1, NmpUpdateResumeNode},
|
|
{1, NmpUpdateEvictNode},
|
|
{4, (PGUM_DISPATCH_ROUTINE1) NmpUpdateCreateNetwork},
|
|
{2, (PGUM_DISPATCH_ROUTINE1) NmpUpdateSetNetworkName},
|
|
{1, NmpUpdateSetNetworkPriorityOrder},
|
|
{3, (PGUM_DISPATCH_ROUTINE1) NmpUpdateSetNetworkCommonProperties},
|
|
{2, (PGUM_DISPATCH_ROUTINE1) NmpUpdateCreateInterface},
|
|
{2, (PGUM_DISPATCH_ROUTINE1) NmpUpdateSetInterfaceInfo},
|
|
{3, (PGUM_DISPATCH_ROUTINE1) NmpUpdateSetInterfaceCommonProperties},
|
|
{1, NmpUpdateDeleteInterface},
|
|
{3, (PGUM_DISPATCH_ROUTINE1) NmpUpdateJoinBegin},
|
|
{2, (PGUM_DISPATCH_ROUTINE1) NmpUpdateJoinAbort},
|
|
//
|
|
// Version 2 (NT 5.0) extensions that are understood by NT4 SP4
|
|
//
|
|
{5, (PGUM_DISPATCH_ROUTINE1) NmpUpdateJoinBegin2},
|
|
{4, (PGUM_DISPATCH_ROUTINE1) NmpUpdateSetNetworkAndInterfaceStates},
|
|
{2, (PGUM_DISPATCH_ROUTINE1) NmpUpdatePerformFixups},
|
|
{5, (PGUM_DISPATCH_ROUTINE1) NmpUpdatePerformFixups2},
|
|
//
|
|
// Version 2 (NT 5.0) extensions that are not understood by NT4 SP4
|
|
// These may not be called in a mixed NT4/NT5 cluster.
|
|
//
|
|
{5, (PGUM_DISPATCH_ROUTINE1) NmpUpdateAddNode},
|
|
{2, (PGUM_DISPATCH_ROUTINE1) NmpUpdateExtendedNodeState},
|
|
//
|
|
// NT 5.1 extensions that are not understood by NT5 and
|
|
// earlier. NT5 nodes will ignore these updates without
|
|
// error.
|
|
//
|
|
{4, (PGUM_DISPATCH_ROUTINE1) NmpUpdateSetNetworkMulticastConfiguration},
|
|
};
|
|
|
|
//
|
|
// Local prototypes
|
|
//
|
|
DWORD
|
|
NmpCreateRpcBindings(
|
|
IN PNM_NODE Node
|
|
);
|
|
|
|
DWORD
|
|
NmpCreateClusterInstanceId(
|
|
VOID
|
|
);
|
|
|
|
//
|
|
// Component initialization routines.
|
|
//
|
|
DWORD
|
|
NmInitialize(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Initializes the Node Manager component.
|
|
|
|
Arguments:
|
|
|
|
None
|
|
|
|
Return Value:
|
|
|
|
A Win32 status code.
|
|
|
|
Notes:
|
|
|
|
The local node object is created by this routine.
|
|
|
|
--*/
|
|
{
|
|
DWORD status;
|
|
OM_OBJECT_TYPE_INITIALIZE nodeTypeInitializer;
|
|
HDMKEY nodeKey = NULL;
|
|
DWORD nameSize = CS_MAX_NODE_NAME_LENGTH + 1;
|
|
HKEY serviceKey;
|
|
DWORD nodeIdSize = (CS_MAX_NODE_ID_LENGTH + 1) *
|
|
sizeof(WCHAR);
|
|
LPWSTR nodeIdString = NULL;
|
|
WSADATA wsaData;
|
|
WORD versionRequested;
|
|
int err;
|
|
ULONG ndx;
|
|
DWORD valueType;
|
|
NM_NODE_INFO2 nodeInfo;
|
|
WCHAR errorString[12];
|
|
DWORD eventCode = 0;
|
|
LPWSTR string;
|
|
|
|
|
|
CL_ASSERT(NmpState == NmStateOffline);
|
|
|
|
ClRtlLogPrint(LOG_NOISE,"[NM] Initializing...\n");
|
|
|
|
//
|
|
// Initialize globals.
|
|
//
|
|
InitializeCriticalSection(&NmpLock);
|
|
|
|
InitializeListHead(&NmpNodeList);
|
|
InitializeListHead(&NmpNetworkList);
|
|
InitializeListHead(&NmpInternalNetworkList);
|
|
InitializeListHead(&NmpDeletedNetworkList);
|
|
InitializeListHead(&NmpInterfaceList);
|
|
InitializeListHead(&NmpDeletedInterfaceList);
|
|
|
|
NmMaxNodes = ClusterDefaultMaxNodes;
|
|
NmMaxNodeId = ClusterMinNodeId + NmMaxNodes - 1;
|
|
|
|
|
|
//
|
|
// Initializing the RPC Recording/cancelling mechanism
|
|
// NOTE - This should move if NmMaxNodeId Definition above moves.
|
|
//
|
|
NmpIntraClusterRpcArr = LocalAlloc(LMEM_FIXED,
|
|
sizeof(NM_INTRACLUSTER_RPC_THREAD) * (NmMaxNodeId +1));
|
|
|
|
if(NmpIntraClusterRpcArr == NULL) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Failed to allocate memory for RPC monitoring.\n"
|
|
);
|
|
status = ERROR_NOT_ENOUGH_MEMORY;
|
|
eventCode = CS_EVENT_ALLOCATION_FAILURE;
|
|
goto error_exit;
|
|
}
|
|
else {
|
|
ZeroMemory(NmpIntraClusterRpcArr,
|
|
sizeof(NM_INTRACLUSTER_RPC_THREAD) * (NmMaxNodeId + 1));
|
|
for(ndx = 0;ndx <= NmMaxNodeId;ndx++)
|
|
InitializeListHead(&NmpIntraClusterRpcArr[ndx]);
|
|
|
|
InitializeCriticalSection(&NmpRPCLock);
|
|
}
|
|
|
|
|
|
|
|
//
|
|
// Initialize the network configuration package.
|
|
//
|
|
ClNetInitialize(
|
|
ClNetPrint,
|
|
ClNetLogEvent,
|
|
ClNetLogEvent1,
|
|
ClNetLogEvent2,
|
|
ClNetLogEvent3
|
|
);
|
|
|
|
//
|
|
// Initialize WinSock
|
|
//
|
|
versionRequested = MAKEWORD(2,0);
|
|
|
|
err = WSAStartup(versionRequested, &wsaData);
|
|
|
|
if (err != 0) {
|
|
status = WSAGetLastError();
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent1(LOG_CRITICAL, NM_EVENT_WSASTARTUP_FAILED, errorString);
|
|
ClRtlLogPrint(LOG_NOISE,"[NM] Failed to initialize Winsock, status %1!u!\n", status);
|
|
return(status);
|
|
}
|
|
|
|
if ( (LOBYTE(wsaData.wVersion) != 2) || (HIBYTE(wsaData.wVersion) != 0)) {
|
|
status = WSAVERNOTSUPPORTED;
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent1(LOG_CRITICAL, NM_EVENT_WSASTARTUP_FAILED, errorString);
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Found unexpected Windows Sockets version %1!u!\n",
|
|
wsaData.wVersion
|
|
);
|
|
WSACleanup();
|
|
return(status);
|
|
}
|
|
|
|
NmpShutdownEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
|
|
|
|
if (NmpShutdownEvent == NULL) {
|
|
status = GetLastError();
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent1(LOG_CRITICAL, CS_EVENT_ALLOCATION_FAILURE, errorString);
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Failed to create shutdown event, status %1!u!\n",
|
|
status
|
|
);
|
|
WSACleanup();
|
|
return(status);
|
|
}
|
|
|
|
NmpState = NmStateOnlinePending;
|
|
|
|
//
|
|
// Get the name of this node.
|
|
//
|
|
if (!GetComputerName(&(NmLocalNodeName[0]), &nameSize)) {
|
|
status = GetLastError();
|
|
eventCode = NM_EVENT_GETCOMPUTERNAME_FAILED;
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Unable to get local computername, status %1!u!\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] Local node name = %1!ws!.\n",
|
|
NmLocalNodeName
|
|
);
|
|
|
|
//
|
|
// Open a control channel to the Cluster Network driver
|
|
//
|
|
NmClusnetHandle = ClusnetOpenControlChannel(0);
|
|
|
|
if (NmClusnetHandle == NULL) {
|
|
status = GetLastError();
|
|
eventCode = NM_EVENT_CLUSNET_UNAVAILABLE;
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Unable to open a handle to the Cluster Network driver, status %1!u!\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Tell the Cluster Network driver to shutdown when our handle is closed
|
|
// in case the Cluster Service crashes.
|
|
//
|
|
status = ClusnetEnableShutdownOnClose(NmClusnetHandle);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
eventCode = NM_EVENT_CLUSNET_ENABLE_SHUTDOWN_FAILED;
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Unable to register Cluster Network shutdown trigger, status %1!u!\n",
|
|
status
|
|
);
|
|
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Allocate the node ID array.
|
|
//
|
|
CL_ASSERT(NmpIdArray == NULL);
|
|
|
|
NmpIdArray = LocalAlloc(
|
|
LMEM_FIXED,
|
|
(sizeof(PNM_NODE) * (NmMaxNodeId + 1))
|
|
);
|
|
|
|
if (NmpIdArray == NULL) {
|
|
status = ERROR_NOT_ENOUGH_MEMORY;
|
|
eventCode = CS_EVENT_ALLOCATION_FAILURE;
|
|
goto error_exit;
|
|
}
|
|
|
|
ZeroMemory(NmpIdArray, (sizeof(PNM_NODE) * (NmMaxNodeId + 1)));
|
|
|
|
//
|
|
// Create the node object type
|
|
//
|
|
ZeroMemory(&nodeTypeInitializer, sizeof(OM_OBJECT_TYPE_INITIALIZE));
|
|
nodeTypeInitializer.ObjectSize = sizeof(NM_NODE);
|
|
nodeTypeInitializer.Signature = NM_NODE_SIG;
|
|
nodeTypeInitializer.Name = L"Node";
|
|
nodeTypeInitializer.DeleteObjectMethod = NmpDestroyNodeObject;
|
|
|
|
status = OmCreateType(ObjectTypeNode, &nodeTypeInitializer);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
eventCode = CS_EVENT_ALLOCATION_FAILURE;
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Unable to create node object type, status %1!u!\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Get the local node ID from the local registry.
|
|
//
|
|
status = RegCreateKeyW(
|
|
HKEY_LOCAL_MACHINE,
|
|
CLUSREG_KEYNAME_CLUSSVC_PARAMETERS,
|
|
&serviceKey
|
|
);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent2(
|
|
LOG_CRITICAL,
|
|
CS_EVENT_REG_OPEN_FAILED,
|
|
CLUSREG_KEYNAME_CLUSSVC_PARAMETERS,
|
|
errorString
|
|
);
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Failed to open cluster service parameters key, status %1!u!\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
string = L"NodeId";
|
|
status = RegQueryValueExW(
|
|
serviceKey,
|
|
string,
|
|
0,
|
|
&valueType,
|
|
(LPBYTE) &(NmLocalNodeIdString[0]),
|
|
&nodeIdSize
|
|
);
|
|
|
|
RegCloseKey(serviceKey);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent2(
|
|
LOG_CRITICAL,
|
|
CS_EVENT_REG_QUERY_FAILED,
|
|
string,
|
|
errorString
|
|
);
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Failed to read local node ID from registry, status %1!u!\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
if (valueType != REG_SZ) {
|
|
status = ERROR_INVALID_PARAMETER;
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent2(
|
|
LOG_CRITICAL,
|
|
CS_EVENT_REG_QUERY_FAILED,
|
|
string,
|
|
errorString
|
|
);
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Local Node ID registry value is not of type REG_SZ.\n"
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] Local node ID = %1!ws!.\n",
|
|
NmLocalNodeIdString
|
|
);
|
|
|
|
NmLocalNodeId = wcstoul(NmLocalNodeIdString, NULL, 10);
|
|
|
|
//
|
|
// Get information about the local node.
|
|
//
|
|
wcscpy(&(nodeInfo.NodeId[0]), NmLocalNodeIdString);
|
|
|
|
status = NmpGetNodeDefinition(&nodeInfo);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Create the local node object. We must do this here because GUM
|
|
// requires the local node object to initialize.
|
|
//
|
|
status = NmpCreateLocalNodeObject(&nodeInfo);
|
|
|
|
ClNetFreeNodeInfo(&nodeInfo);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Initialize the network and interface object types
|
|
//
|
|
status = NmpInitializeNetworks();
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
goto error_exit;
|
|
}
|
|
|
|
status = NmpInitializeInterfaces();
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Initialize net PnP handling
|
|
//
|
|
status = NmpInitializePnp();
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// init the advise sink that tells when a connection object has been
|
|
// renamed
|
|
//
|
|
status = NmpInitializeConnectoidAdviseSink();
|
|
if (status != ERROR_SUCCESS) {
|
|
goto error_exit;
|
|
}
|
|
|
|
ClRtlLogPrint(LOG_NOISE,"[NM] Initialization complete.\n");
|
|
|
|
return(ERROR_SUCCESS);
|
|
|
|
|
|
error_exit:
|
|
|
|
if (eventCode != 0) {
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent1(LOG_CRITICAL, eventCode, errorString);
|
|
}
|
|
|
|
wsprintfW( &(errorString[0]), L"%u", status );
|
|
CsLogEvent1(LOG_CRITICAL, NM_INIT_FAILED, errorString);
|
|
|
|
ClRtlLogPrint(LOG_CRITICAL,"[NM] Initialization failed %1!d!\n",status);
|
|
|
|
NmShutdown();
|
|
|
|
return(status);
|
|
|
|
} // NmInitialize
|
|
|
|
|
|
VOID
|
|
NmShutdown(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Terminates all processing - shuts down all sources of work for
|
|
worker threads.
|
|
|
|
Arguments:
|
|
|
|
|
|
|
|
Return Value:
|
|
|
|
|
|
|
|
--*/
|
|
{
|
|
DWORD status;
|
|
|
|
|
|
if (NmpState == NmStateOffline) {
|
|
return;
|
|
}
|
|
|
|
NmCloseConnectoidAdviseSink();
|
|
|
|
NmpShutdownPnp();
|
|
|
|
NmpAcquireLock();
|
|
|
|
ClRtlLogPrint(LOG_NOISE,"[NM] Shutdown starting...\n");
|
|
|
|
NmpState = NmStateOfflinePending;
|
|
|
|
if (NmpActiveThreadCount > 0) {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] Waiting for %1!u! active threads to terminate...\n",
|
|
NmpActiveThreadCount
|
|
);
|
|
|
|
NmpReleaseLock();
|
|
|
|
status = WaitForSingleObject(NmpShutdownEvent, INFINITE);
|
|
|
|
CL_ASSERT(status == WAIT_OBJECT_0);
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] All active threads have completed. Continuing shutdown...\n"
|
|
);
|
|
|
|
}
|
|
else {
|
|
NmpReleaseLock();
|
|
}
|
|
|
|
NmLeaveCluster();
|
|
|
|
NmpCleanupPnp();
|
|
|
|
if (NmLocalNode != NULL) {
|
|
NmpDeleteNodeObject(NmLocalNode, FALSE);
|
|
NmLocalNode = NULL;
|
|
}
|
|
|
|
if (NmpIdArray != NULL) {
|
|
LocalFree(NmpIdArray); NmpIdArray = NULL;
|
|
}
|
|
|
|
NmpFreeClusterKey();
|
|
|
|
if (NmpClusterInstanceId != NULL) {
|
|
MIDL_user_free(NmpClusterInstanceId);
|
|
NmpClusterInstanceId = NULL;
|
|
}
|
|
|
|
if (NmClusnetHandle != NULL) {
|
|
ClusnetCloseControlChannel(NmClusnetHandle);
|
|
NmClusnetHandle = NULL;
|
|
}
|
|
|
|
CloseHandle(NmpShutdownEvent); NmpShutdownEvent = NULL;
|
|
|
|
WSACleanup();
|
|
|
|
//
|
|
// As long as the GUM and Clusapi RPC interfaces cannot be
|
|
// shutdown, it is not safe to delete this critical section.
|
|
//
|
|
// DeleteCriticalSection(&NmpLock);
|
|
|
|
NmpState = NmStateOffline;
|
|
|
|
ClRtlLogPrint(LOG_NOISE,"[NM] Shutdown complete.\n");
|
|
|
|
return;
|
|
|
|
} // NmShutdown
|
|
|
|
|
|
VOID
|
|
NmLeaveCluster(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
|
|
|
|
Arguments:
|
|
|
|
|
|
|
|
Return Value:
|
|
|
|
|
|
|
|
--*/
|
|
{
|
|
DWORD status;
|
|
|
|
|
|
if (NmLocalNode != NULL) {
|
|
if ( (NmLocalNode->State == ClusterNodeUp) ||
|
|
(NmLocalNode->State == ClusterNodePaused) ||
|
|
(NmLocalNode->State == ClusterNodeJoining)
|
|
)
|
|
{
|
|
//
|
|
// Leave the cluster.
|
|
//
|
|
ClRtlLogPrint(LOG_NOISE,"[NM] Leaving cluster.\n");
|
|
|
|
MMLeave();
|
|
|
|
#ifdef MM_IN_CLUSNET
|
|
|
|
status = ClusnetLeaveCluster(NmClusnetHandle);
|
|
CL_ASSERT(status == ERROR_SUCCESS);
|
|
|
|
#endif // MM_IN_CLUSNET
|
|
|
|
}
|
|
}
|
|
|
|
NmpMembershipShutdown();
|
|
|
|
NmpCleanupInterfaces();
|
|
|
|
NmpCleanupNetworks();
|
|
|
|
NmpCleanupNodes();
|
|
|
|
//
|
|
// Shutdown the Cluster Network driver.
|
|
//
|
|
if (NmClusnetHandle != NULL) {
|
|
DWORD status = ClusnetShutdown(NmClusnetHandle);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NM] Shutdown of the Cluster Network driver failed, status %1!u!\n",
|
|
status
|
|
);
|
|
}
|
|
}
|
|
|
|
if (NmpClusnetEndpoint != NULL) {
|
|
MIDL_user_free(NmpClusnetEndpoint);
|
|
NmpClusnetEndpoint = NULL;
|
|
}
|
|
|
|
return;
|
|
|
|
} // NmLeaveCluster
|
|
|
|
|
|
DWORD
|
|
NmpCreateClusterObjects(
|
|
IN RPC_BINDING_HANDLE JoinSponsorBinding
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Creates objects to represent the cluster's nodes, networks, and
|
|
interfaces.
|
|
|
|
Arguments:
|
|
|
|
JoinSponsorBinding - A pointer to an RPC binding handle for the sponsor
|
|
node if this node is joining a cluster. NULL if
|
|
this node is forming a cluster.
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS if the routine is successful.
|
|
A Win32 error code otherwise.
|
|
|
|
Notes:
|
|
|
|
This routine MUST NOT be called with the NM lock held.
|
|
|
|
--*/
|
|
{
|
|
DWORD status;
|
|
PNM_NODE_ENUM2 nodeEnum = NULL;
|
|
PNM_NETWORK_ENUM networkEnum = NULL;
|
|
PNM_INTERFACE_ENUM2 interfaceEnum = NULL;
|
|
PNM_NODE node = NULL;
|
|
DWORD matchedNetworkCount = 0;
|
|
DWORD newNetworkCount = 0;
|
|
DWORD InitRetry = 2;
|
|
WCHAR errorString[12];
|
|
DWORD eventCode = 0;
|
|
BOOL renameConnectoids;
|
|
|
|
|
|
while ( InitRetry-- ) {
|
|
//
|
|
// Initialize the Cluster Network driver. This will clean up
|
|
// any old state that was left around from the last run of the
|
|
// Cluster Service. Note that the local node object is registered in
|
|
// this call.
|
|
//
|
|
status = ClusnetInitialize(
|
|
NmClusnetHandle,
|
|
NmLocalNodeId,
|
|
NmMaxNodes,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL
|
|
);
|
|
|
|
if (status == ERROR_SUCCESS) {
|
|
break;
|
|
} else {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NM] Shutting down Cluster Network driver before retrying Initialization, status %1!u!\n",
|
|
status);
|
|
|
|
ClusnetShutdown( NmClusnetHandle );
|
|
}
|
|
};
|
|
|
|
if ( status != ERROR_SUCCESS ) {
|
|
eventCode = NM_EVENT_CLUSNET_INITIALIZE_FAILED;
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Initialization of the Cluster Network driver failed, status %1!u!\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Tell the Cluster Network driver to reserve the Cluster Network
|
|
// endpoint on this node.
|
|
//
|
|
status = ClusnetReserveEndpoint(
|
|
NmClusnetHandle,
|
|
NmpClusnetEndpoint
|
|
);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Unable to reserve Clusnet Network endpoint %1!ws!, "
|
|
"status %2!u!\n", NmpClusnetEndpoint, status
|
|
);
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent2(
|
|
LOG_CRITICAL,
|
|
NM_EVENT_CLUSNET_RESERVE_ENDPOINT_FAILED,
|
|
NmpClusnetEndpoint,
|
|
errorString
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Obtain the node portion of the cluster database.
|
|
//
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] Synchronizing node information.\n"
|
|
);
|
|
|
|
if (JoinSponsorBinding == NULL) {
|
|
status = NmpEnumNodeDefinitions(&nodeEnum);
|
|
}
|
|
else {
|
|
status = NmRpcEnumNodeDefinitions2(
|
|
JoinSponsorBinding,
|
|
NmpJoinSequence,
|
|
NmLocalNodeIdString,
|
|
&nodeEnum
|
|
);
|
|
}
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
eventCode = NM_EVENT_CONFIG_SYNCH_FAILED;
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Unable to synchronize node information, status %1!u!.\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Create the node objects.
|
|
//
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] Creating node objects.\n"
|
|
);
|
|
|
|
status = NmpCreateNodeObjects(nodeEnum);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Obtain the networks portion of the cluster database.
|
|
//
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] Synchronizing network information.\n"
|
|
);
|
|
|
|
if (JoinSponsorBinding == NULL) {
|
|
status = NmpEnumNetworkDefinitions(&networkEnum);
|
|
}
|
|
else {
|
|
status = NmRpcEnumNetworkDefinitions(
|
|
JoinSponsorBinding,
|
|
NmpJoinSequence,
|
|
NmLocalNodeIdString,
|
|
&networkEnum
|
|
);
|
|
}
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
eventCode = NM_EVENT_CONFIG_SYNCH_FAILED;
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Unable to synchronize network information, status %1!u!.\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Obtain the interfaces portion of the cluster database.
|
|
//
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] Synchronizing interface information.\n"
|
|
);
|
|
|
|
if (JoinSponsorBinding == NULL) {
|
|
status = NmpEnumInterfaceDefinitions(&interfaceEnum);
|
|
}
|
|
else {
|
|
status = NmRpcEnumInterfaceDefinitions2(
|
|
JoinSponsorBinding,
|
|
NmpJoinSequence,
|
|
NmLocalNodeIdString,
|
|
&interfaceEnum
|
|
);
|
|
}
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
eventCode = NM_EVENT_CONFIG_SYNCH_FAILED;
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Unable to synchronize interface information, status %1!u!.\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
if ( CsUpgrade ) {
|
|
//
|
|
// If this is an upgrade from NT4 to Whistler, then fix up the
|
|
// connectoid names so they align with the cluster network
|
|
// names.
|
|
//
|
|
// REMOVE THIS PORTION AFTER WHISTLER HAS SHIPPED.
|
|
//
|
|
if ( CLUSTER_GET_MAJOR_VERSION( NmLocalNode->HighestVersion ) <= NT4SP4_MAJOR_VERSION ) {
|
|
renameConnectoids = TRUE;
|
|
} else {
|
|
//
|
|
// upgrade from W2K to Whistler. Nothing should have changed but
|
|
// if it did, connectoids should have precedence
|
|
//
|
|
renameConnectoids = FALSE;
|
|
}
|
|
} else {
|
|
//
|
|
// THIS SECTION MUST ALWAYS BE HERE
|
|
//
|
|
// if forming, cluster network objects are renamed to its
|
|
// corresponding connectoid name. During a join, the opposite is true.
|
|
//
|
|
if ( JoinSponsorBinding ) {
|
|
renameConnectoids = TRUE;
|
|
} else {
|
|
renameConnectoids = FALSE;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Post a PnP notification ioctl. If we receive a PnP notification
|
|
// before we finish initializing, we must restart the process.
|
|
//
|
|
NmpWatchForPnpEvents();
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Run the network configuration engine. This will update the
|
|
// cluster database.
|
|
//
|
|
status = NmpConfigureNetworks(
|
|
JoinSponsorBinding,
|
|
NmLocalNodeIdString,
|
|
NmLocalNodeName,
|
|
&networkEnum,
|
|
&interfaceEnum,
|
|
NmpClusnetEndpoint,
|
|
&matchedNetworkCount,
|
|
&newNetworkCount,
|
|
renameConnectoids
|
|
);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Failed to configure networks & interfaces, status %1!u!.\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] Matched %1!u! networks, created %2!u! new networks.\n",
|
|
matchedNetworkCount,
|
|
newNetworkCount
|
|
);
|
|
|
|
//
|
|
// Get the updated network information from the database.
|
|
//
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] Resynchronizing network information.\n"
|
|
);
|
|
|
|
if (JoinSponsorBinding == NULL) {
|
|
status = NmpEnumNetworkDefinitions(&networkEnum);
|
|
}
|
|
else {
|
|
status = NmRpcEnumNetworkDefinitions(
|
|
JoinSponsorBinding,
|
|
NmpJoinSequence,
|
|
NmLocalNodeIdString,
|
|
&networkEnum
|
|
);
|
|
}
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
eventCode = NM_EVENT_CONFIG_SYNCH_FAILED;
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Unable to resynchronize network information, "
|
|
"status %1!u!.\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Get the updated interface information from the database.
|
|
//
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] Resynchronizing interface information.\n"
|
|
);
|
|
|
|
if (JoinSponsorBinding == NULL) {
|
|
status = NmpEnumInterfaceDefinitions(&interfaceEnum);
|
|
}
|
|
else {
|
|
status = NmRpcEnumInterfaceDefinitions2(
|
|
JoinSponsorBinding,
|
|
NmpJoinSequence,
|
|
NmLocalNodeIdString,
|
|
&interfaceEnum
|
|
);
|
|
}
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
eventCode = NM_EVENT_CONFIG_SYNCH_FAILED;
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Unable to resynchronize interface information, status %1!u!.\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Create the network objects.
|
|
//
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] Creating network objects.\n"
|
|
);
|
|
|
|
status = NmpCreateNetworkObjects(networkEnum);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Failed to create network objects, status %1!u!.\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Fixup the priorities of the internal networks if we are forming
|
|
// a cluster.
|
|
//
|
|
if (JoinSponsorBinding == NULL) {
|
|
DWORD networkCount;
|
|
PNM_NETWORK * networkList;
|
|
|
|
status = NmpEnumInternalNetworks(&networkCount, &networkList);
|
|
|
|
if ((status == ERROR_SUCCESS) && (networkCount > 0)) {
|
|
DWORD i;
|
|
HLOCALXSACTION xaction;
|
|
|
|
|
|
//
|
|
// Begin a transaction - this must not be done while holding
|
|
// the NM lock.
|
|
//
|
|
xaction = DmBeginLocalUpdate();
|
|
|
|
if (xaction == NULL) {
|
|
status = GetLastError();
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Failed to start a transaction, status %1!u!\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
status = NmpSetNetworkPriorityOrder(
|
|
networkCount,
|
|
networkList,
|
|
xaction
|
|
);
|
|
|
|
if (status == ERROR_SUCCESS) {
|
|
DmCommitLocalUpdate(xaction);
|
|
}
|
|
else {
|
|
DmAbortLocalUpdate(xaction);
|
|
goto error_exit;
|
|
}
|
|
|
|
for (i=0; i<networkCount; i++) {
|
|
if (networkList[i] != NULL) {
|
|
OmDereferenceObject(networkList[i]);
|
|
}
|
|
}
|
|
|
|
LocalFree(networkList);
|
|
}
|
|
}
|
|
|
|
//
|
|
// Create the interface objects.
|
|
//
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] Creating interface objects.\n"
|
|
);
|
|
|
|
status = NmpCreateInterfaceObjects(interfaceEnum);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Failed to create interface objects, status %1!u!.\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
if (JoinSponsorBinding != NULL) {
|
|
//
|
|
// The node must have connectivity to all active cluster nodes
|
|
// in order to join a cluster.
|
|
//
|
|
PNM_NODE unreachableNode;
|
|
|
|
if (!NmpVerifyJoinerConnectivity(NmLocalNode, &unreachableNode)) {
|
|
status = ERROR_CLUSTER_NETWORK_NOT_FOUND;
|
|
CsLogEvent1(
|
|
LOG_CRITICAL,
|
|
NM_EVENT_NODE_UNREACHABLE,
|
|
OmObjectName(unreachableNode),
|
|
);
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Joining node cannot communicate with all other "
|
|
"active nodes.\n"
|
|
);
|
|
goto error_exit;
|
|
}
|
|
}
|
|
|
|
status = NmpMembershipInit();
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
goto error_exit;
|
|
}
|
|
|
|
|
|
error_exit:
|
|
|
|
if (eventCode != 0) {
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent1(LOG_CRITICAL, eventCode, errorString);
|
|
}
|
|
|
|
if (nodeEnum != NULL) {
|
|
ClNetFreeNodeEnum(nodeEnum);
|
|
}
|
|
|
|
if (networkEnum != NULL) {
|
|
ClNetFreeNetworkEnum(networkEnum);
|
|
}
|
|
|
|
if (interfaceEnum != NULL) {
|
|
ClNetFreeInterfaceEnum(interfaceEnum);
|
|
}
|
|
|
|
return(status);
|
|
|
|
} // NmpCreateClusterObjects
|
|
|
|
//
|
|
// Routines common to joining and forming.
|
|
//
|
|
|
|
DWORD
|
|
NmpCreateClusterInstanceId(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Checks the cluster database for the cluster instance id. Creates
|
|
if not present.
|
|
|
|
--*/
|
|
{
|
|
DWORD status;
|
|
LPWSTR clusterInstanceId = NULL;
|
|
DWORD clusterInstanceIdBufSize = 0;
|
|
DWORD clusterInstanceIdSize = 0;
|
|
BOOLEAN found = FALSE;
|
|
UUID guid;
|
|
|
|
do {
|
|
|
|
status = NmpQueryString(
|
|
DmClusterParametersKey,
|
|
L"ClusterInstanceID",
|
|
REG_SZ,
|
|
&clusterInstanceId,
|
|
&clusterInstanceIdBufSize,
|
|
&clusterInstanceIdSize
|
|
);
|
|
|
|
if (status == ERROR_SUCCESS) {
|
|
found = TRUE;
|
|
} else {
|
|
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Cluster Instance ID not found in "
|
|
"cluster database, status %1!u!.\n",
|
|
status
|
|
);
|
|
|
|
status = UuidCreate(&guid);
|
|
if (status == RPC_S_OK) {
|
|
|
|
status = UuidToString(&guid, &clusterInstanceId);
|
|
if (status == RPC_S_OK) {
|
|
|
|
status = DmSetValue(
|
|
DmClusterParametersKey,
|
|
L"ClusterInstanceID",
|
|
REG_SZ,
|
|
(PBYTE) clusterInstanceId,
|
|
NM_WCSLEN(clusterInstanceId)
|
|
);
|
|
if (status != ERROR_SUCCESS) {
|
|
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Failed to store Cluster Instance ID "
|
|
"in cluster database, status %1!u!.\n",
|
|
status
|
|
);
|
|
}
|
|
|
|
if (clusterInstanceId != NULL) {
|
|
RpcStringFree(&clusterInstanceId);
|
|
clusterInstanceId = NULL;
|
|
}
|
|
} else {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Failed to convert Cluster Instance ID "
|
|
"GUID into string, status %1!u!.\n",
|
|
status
|
|
);
|
|
}
|
|
|
|
} else {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Failed to create Cluster Instance ID GUID, "
|
|
"status %1!u!.\n",
|
|
status
|
|
);
|
|
}
|
|
}
|
|
} while ( !found && (status == ERROR_SUCCESS) );
|
|
|
|
if (status == ERROR_SUCCESS) {
|
|
|
|
CL_ASSERT(clusterInstanceId != NULL);
|
|
|
|
NmpAcquireLock();
|
|
|
|
if (NmpClusterInstanceId == NULL) {
|
|
NmpClusterInstanceId = clusterInstanceId;
|
|
clusterInstanceId = NULL;
|
|
}
|
|
|
|
NmpReleaseLock();
|
|
}
|
|
|
|
if (clusterInstanceId != NULL) {
|
|
midl_user_free(clusterInstanceId);
|
|
clusterInstanceId = NULL;
|
|
}
|
|
|
|
return(status);
|
|
|
|
} // NmpCreateClusterInstanceId
|
|
|
|
//
|
|
// Routines for forming a new cluster.
|
|
//
|
|
|
|
DWORD
|
|
NmFormNewCluster(
|
|
VOID
|
|
)
|
|
{
|
|
DWORD status;
|
|
DWORD isPaused = FALSE;
|
|
DWORD pausedDefault = FALSE;
|
|
HDMKEY nodeKey;
|
|
DWORD valueLength, valueSize;
|
|
WCHAR errorString[12], errorString2[12];
|
|
DWORD eventCode = 0;
|
|
PLIST_ENTRY entry;
|
|
PNM_NETWORK network;
|
|
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] Beginning cluster form process.\n"
|
|
);
|
|
|
|
//
|
|
// Since this node is forming the cluster, it is the leader.
|
|
//
|
|
NmpLeaderNodeId = NmLocalNodeId;
|
|
|
|
//
|
|
// Read the clusnet endpoint override value from the registry, if it
|
|
// exists.
|
|
//
|
|
if (NmpClusnetEndpoint != NULL) {
|
|
MIDL_user_free(NmpClusnetEndpoint);
|
|
NmpClusnetEndpoint = NULL;
|
|
}
|
|
|
|
valueLength = 0;
|
|
|
|
status = NmpQueryString(
|
|
DmClusterParametersKey,
|
|
L"ClusnetEndpoint",
|
|
REG_SZ,
|
|
&NmpClusnetEndpoint,
|
|
&valueLength,
|
|
&valueSize
|
|
);
|
|
|
|
if (status == ERROR_SUCCESS) {
|
|
USHORT endpoint;
|
|
|
|
//
|
|
// Validate the value
|
|
//
|
|
status = ClRtlTcpipStringToEndpoint(
|
|
NmpClusnetEndpoint,
|
|
&endpoint
|
|
);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
CsLogEvent2(
|
|
LOG_UNUSUAL,
|
|
NM_EVENT_INVALID_CLUSNET_ENDPOINT,
|
|
NmpClusnetEndpoint,
|
|
CLUSNET_DEFAULT_ENDPOINT_STRING
|
|
);
|
|
ClRtlLogPrint(
|
|
LOG_CRITICAL,
|
|
"[NM] '%1!ws!' is not valid endpoint value. Using default value %2!ws!.\n",
|
|
NmpClusnetEndpoint,
|
|
CLUSNET_DEFAULT_ENDPOINT_STRING
|
|
);
|
|
MIDL_user_free(NmpClusnetEndpoint);
|
|
NmpClusnetEndpoint = NULL;
|
|
}
|
|
}
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
NmpClusnetEndpoint = MIDL_user_allocate(
|
|
NM_WCSLEN(CLUSNET_DEFAULT_ENDPOINT_STRING)
|
|
);
|
|
|
|
if (NmpClusnetEndpoint == NULL) {
|
|
status = ERROR_NOT_ENOUGH_MEMORY;
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent1(LOG_CRITICAL, CS_EVENT_ALLOCATION_FAILURE, errorString);
|
|
return(status);
|
|
}
|
|
|
|
lstrcpyW(NmpClusnetEndpoint, CLUSNET_DEFAULT_ENDPOINT_STRING);
|
|
}
|
|
|
|
//
|
|
// Create the node, network, and interface objects
|
|
//
|
|
status = NmpCreateClusterObjects(NULL);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Perform version checking - check if we are compatible with the rest of the cluster
|
|
//
|
|
status = NmpIsNodeVersionAllowed(NmLocalNodeId, CsMyHighestVersion,
|
|
CsMyLowestVersion, FALSE);
|
|
if (status != ERROR_SUCCESS)
|
|
{
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Version of Node %1!ws! is no longer compatible with other members of the cluster.\n",
|
|
NmLocalNodeIdString);
|
|
goto error_exit;
|
|
|
|
}
|
|
|
|
//If the forming node's version has changed, fix it up
|
|
status = NmpValidateNodeVersion(
|
|
NmLocalNodeIdString,
|
|
CsMyHighestVersion,
|
|
CsMyLowestVersion
|
|
);
|
|
|
|
if (status == ERROR_REVISION_MISMATCH)
|
|
{
|
|
//there was a version change, try and fix it up
|
|
status = NmpFormFixupNodeVersion(
|
|
NmLocalNodeIdString,
|
|
CsMyHighestVersion,
|
|
CsMyLowestVersion
|
|
);
|
|
NmLocalNodeVersionChanged = TRUE;
|
|
}
|
|
if (status != ERROR_SUCCESS)
|
|
{
|
|
goto error_exit;
|
|
}
|
|
|
|
|
|
//
|
|
//at this point we ready to calculate the cluster version
|
|
//all the node versions are in the registry, the fixups have
|
|
//been made if neccessary
|
|
//
|
|
NmpResetClusterVersion(FALSE);
|
|
|
|
NmpMulticastInitialize();
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] Forming cluster membership.\n"
|
|
);
|
|
|
|
status = MMJoin(
|
|
NmLocalNodeId,
|
|
NM_CLOCK_PERIOD,
|
|
NM_SEND_HB_RATE,
|
|
NM_RECV_HB_RATE,
|
|
NM_MM_JOIN_TIMEOUT
|
|
);
|
|
|
|
if (status != MM_OK) {
|
|
status = MMMapStatusToDosError(status);
|
|
eventCode = NM_EVENT_MM_FORM_FAILED;
|
|
ClRtlLogPrint(
|
|
LOG_CRITICAL,
|
|
"[NM] Membership form failed, status %1!u!. Unable to form a cluster.\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
#ifdef MM_IN_CLUSNET
|
|
|
|
status = ClusnetFormCluster(
|
|
NmClusnetHandle,
|
|
NM_CLOCK_PERIOD,
|
|
NM_SEND_HB_RATE,
|
|
NM_RECV_HB_RATE
|
|
);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(
|
|
LOG_CRITICAL,
|
|
"[NM] Failed to form a cluster, status %1!u!.\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
#endif // MM_IN_CLUSNET
|
|
|
|
//
|
|
// Check to see if we should come up in the paused state.
|
|
//
|
|
nodeKey = DmOpenKey(
|
|
DmNodesKey,
|
|
NmLocalNodeIdString,
|
|
KEY_READ
|
|
);
|
|
|
|
if (nodeKey != NULL) {
|
|
status = DmQueryDword(
|
|
nodeKey,
|
|
CLUSREG_NAME_NODE_PAUSED,
|
|
&isPaused,
|
|
&pausedDefault
|
|
);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(
|
|
LOG_UNUSUAL,
|
|
"[NM] Unable to query Paused value for local node, status %1!u!.\n",
|
|
status
|
|
);
|
|
}
|
|
|
|
DmCloseKey(nodeKey);
|
|
}
|
|
else {
|
|
ClRtlLogPrint(
|
|
LOG_UNUSUAL,
|
|
"[NM] Unable to open database key to local node, status %1!u!. Unable to determine Pause state.\n",
|
|
status
|
|
);
|
|
}
|
|
|
|
NmpAcquireLock();
|
|
|
|
if (isPaused) {
|
|
NmLocalNode->State = ClusterNodePaused;
|
|
} else {
|
|
NmLocalNode->State = ClusterNodeUp;
|
|
}
|
|
NmLocalNode->ExtendedState = ClusterNodeJoining;
|
|
|
|
NmpState = NmStateOnline;
|
|
|
|
NmpReleaseLock();
|
|
|
|
//
|
|
// If the cluster instance ID does not exist, create it now. The cluster
|
|
// instance ID should be in the database unless this is the first uplevel
|
|
// node.
|
|
//
|
|
NmpCreateClusterInstanceId();
|
|
|
|
//
|
|
// Create the cluster key.
|
|
//
|
|
status = NmpRegenerateClusterKey();
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NM] Failed to generate cluster key, status %1!u!. "
|
|
"Allowing service to continue ...\n",
|
|
status
|
|
);
|
|
status = ERROR_SUCCESS;
|
|
}
|
|
|
|
//
|
|
// Enable communication for the local node.
|
|
//
|
|
status = ClusnetOnlineNodeComm(NmClusnetHandle, NmLocalNodeId);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
|
|
wsprintfW(&(errorString[0]), L"%u", NmLocalNodeId);
|
|
wsprintfW(&(errorString2[0]), L"%u", status);
|
|
CsLogEvent2(
|
|
LOG_CRITICAL,
|
|
NM_EVENT_CLUSNET_ONLINE_COMM_FAILED,
|
|
errorString,
|
|
errorString2
|
|
);
|
|
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Failed to enable communication for local node, status %1!u!\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
GumReceiveUpdates(FALSE,
|
|
GumUpdateMembership,
|
|
NmpGumUpdateHandler,
|
|
NULL,
|
|
sizeof(NmGumDispatchTable)/sizeof(GUM_DISPATCH_ENTRY),
|
|
NmGumDispatchTable,
|
|
NULL
|
|
);
|
|
|
|
//
|
|
// Enable network PnP event handling.
|
|
//
|
|
// If a PnP event occured during the form process, an error code will
|
|
// be returned, which will abort startup of the service.
|
|
//
|
|
status = NmpEnablePnpEvents();
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NM] A network PnP event occurred during form - abort.\n");
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Check if we formed without any viable networks. The form is still
|
|
// allowed, but we record an entry in the system event log.
|
|
//
|
|
if (!NmpCheckForNetwork()) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NM] Formed cluster with no viable networks.\n"
|
|
);
|
|
CsLogEvent(LOG_UNUSUAL, NM_EVENT_FORM_WITH_NO_NETWORKS);
|
|
}
|
|
|
|
//
|
|
// Force a reconfiguration of multicast parameters and plumb
|
|
// the results in clusnet.
|
|
//
|
|
NmpAcquireLock();
|
|
|
|
if (NmpIsClusterMulticastReady(TRUE)) {
|
|
status = NmpStartMulticast(NULL);
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NM] Failed to start multicast "
|
|
"on cluster networks, status %1!u!.\n",
|
|
status
|
|
);
|
|
//
|
|
// Not a de facto fatal error.
|
|
//
|
|
status = ERROR_SUCCESS;
|
|
}
|
|
}
|
|
|
|
NmpReleaseLock();
|
|
|
|
error_exit:
|
|
|
|
if (eventCode != 0) {
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent1(LOG_CRITICAL, eventCode, errorString);
|
|
}
|
|
|
|
return(status);
|
|
|
|
} // NmFormNewCluster
|
|
|
|
|
|
//
|
|
//
|
|
// Client-side routines for joining a cluster.
|
|
//
|
|
//
|
|
DWORD
|
|
NmJoinCluster(
|
|
IN RPC_BINDING_HANDLE SponsorBinding
|
|
)
|
|
{
|
|
DWORD status;
|
|
DWORD sponsorNodeId;
|
|
PNM_INTERFACE netInterface;
|
|
PNM_NETWORK network;
|
|
PNM_NODE node;
|
|
PLIST_ENTRY nodeEntry, ifEntry;
|
|
WCHAR errorString[12], errorString2[12];
|
|
DWORD eventCode = 0;
|
|
DWORD versionFlags = 0;
|
|
extern BOOLEAN bFormCluster;
|
|
DWORD retry;
|
|
BOOLEAN joinBegin3 = TRUE;
|
|
LPWSTR clusterInstanceId = NULL;
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Beginning cluster join process.\n"
|
|
);
|
|
|
|
// GN: If a node tries to restart immediately after a clean shutdown,
|
|
// NmRpcJoinBegin2 can fail with ERROR_CLUSTER_NODE_UP. Since the regroup
|
|
// incident caused by this node might not be finished.
|
|
//
|
|
// If we are getting error CLUSTER_NODE_UP, we will keep retrying for
|
|
// 12 seconds, hoping that regroup will finish.
|
|
|
|
retry = 120 / 3; // We sleep for 3 seconds. Need to wait 2 minutes //
|
|
for (;;) {
|
|
//
|
|
// Get the join sequence number so we can tell if the cluster
|
|
// configuration changes during the join process. We overload the
|
|
// use of the NmpJoinSequence variable since it isn't used in the
|
|
// sponsor capacity until the node joins.
|
|
//
|
|
|
|
//
|
|
// Try NmRpcJoinBegin3. If it fails with an RPC procnum out of
|
|
// range error, the sponsor is a downlevel node. Revert to
|
|
// NmRpcJoinBegin2.
|
|
//
|
|
if (joinBegin3) {
|
|
|
|
// Only read the cluster instance ID from the registry on
|
|
// the first try.
|
|
if (clusterInstanceId == NULL) {
|
|
|
|
DWORD clusterInstanceIdBufSize = 0;
|
|
DWORD clusterInstanceIdSize = 0;
|
|
|
|
status = NmpQueryString(
|
|
DmClusterParametersKey,
|
|
L"ClusterInstanceID",
|
|
REG_SZ,
|
|
&clusterInstanceId,
|
|
&clusterInstanceIdBufSize,
|
|
&clusterInstanceIdSize
|
|
);
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Failed to read cluster instance ID from database, status %1!u!.\n",
|
|
status
|
|
);
|
|
// Try to join with the downlevel interface. It is
|
|
// possible that this node was just upgraded and the
|
|
// last time it was in the cluster there was no
|
|
// cluster instance ID.
|
|
joinBegin3 = FALSE;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
status = NmRpcJoinBegin3(
|
|
SponsorBinding,
|
|
clusterInstanceId,
|
|
NmLocalNodeIdString,
|
|
NmLocalNodeName,
|
|
CsMyHighestVersion,
|
|
CsMyLowestVersion,
|
|
0, // joiner's major node version
|
|
0, // joiner's minor node version
|
|
L"", // joiner's CsdVersion
|
|
0, // joiner's product suite
|
|
&sponsorNodeId,
|
|
&NmpJoinSequence,
|
|
&NmpClusnetEndpoint
|
|
);
|
|
if (status == RPC_S_PROCNUM_OUT_OF_RANGE) {
|
|
// retry immediately with JoinBegin2
|
|
joinBegin3 = FALSE;
|
|
continue;
|
|
}
|
|
} else {
|
|
|
|
status = NmRpcJoinBegin2(
|
|
SponsorBinding,
|
|
NmLocalNodeIdString,
|
|
NmLocalNodeName,
|
|
CsMyHighestVersion,
|
|
CsMyLowestVersion,
|
|
&sponsorNodeId,
|
|
&NmpJoinSequence,
|
|
&NmpClusnetEndpoint
|
|
);
|
|
}
|
|
|
|
if ( ((status != ERROR_CLUSTER_NODE_UP
|
|
&& status != ERROR_CLUSTER_JOIN_IN_PROGRESS) ) || retry == 0 )
|
|
{
|
|
break;
|
|
}
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Unable to begin join, status %1!u!. Retrying ...\n",
|
|
status
|
|
);
|
|
CsServiceStatus.dwCheckPoint++;
|
|
CsAnnounceServiceStatus();
|
|
Sleep(3000);
|
|
--retry;
|
|
}
|
|
|
|
// Free the cluster instance ID string, if necessary.
|
|
if (clusterInstanceId != NULL) {
|
|
midl_user_free(clusterInstanceId);
|
|
}
|
|
|
|
// [GORN Jan/7/2000]
|
|
// If we are here, then we have already successfully talked to the sponsor
|
|
// via JoinVersion interface.
|
|
//
|
|
// We shouldn't try to form the cluster if NmRpcJoinBegin2 fails.
|
|
// Otherwise we may steal the quorum on the move [452108]
|
|
|
|
//
|
|
// Past this point we will not try to form a cluster
|
|
//
|
|
bFormCluster = FALSE;
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
eventCode = NM_EVENT_BEGIN_JOIN_FAILED;
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Unable to begin join, status %1!u!.\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Sponsor node ID = %1!u!. Join sequence number = %2!u!, endpoint = %3!ws!.\n",
|
|
sponsorNodeId,
|
|
NmpJoinSequence,
|
|
NmpClusnetEndpoint
|
|
);
|
|
|
|
//
|
|
// Create all of the cluster objects for which we are responsible.
|
|
//
|
|
status = NmpCreateClusterObjects(SponsorBinding);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
goto error_exit;
|
|
}
|
|
|
|
// The local node version might have changed, fix it
|
|
// The sponsorer fixes it in the registry and tells other
|
|
// nodes about it, however the joining node is not a part
|
|
// of the cluster membership as yet.
|
|
// The local node structure is created early on in NmInitialize()
|
|
// hence it must get fixed up
|
|
if ((NmLocalNode->HighestVersion != CsMyHighestVersion) ||
|
|
(NmLocalNode->LowestVersion != CsMyLowestVersion))
|
|
{
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Local Node version changed probably due to upgrade/deinstall\n");
|
|
NmLocalNode->HighestVersion = CsMyHighestVersion;
|
|
NmLocalNode->LowestVersion = CsMyLowestVersion;
|
|
NmLocalNodeVersionChanged = TRUE;
|
|
}
|
|
|
|
//at this point we ready to calculate the cluster version
|
|
//all the node objects contain the correct node versions
|
|
NmpResetClusterVersion(FALSE);
|
|
|
|
NmpMulticastInitialize();
|
|
|
|
//
|
|
// Enable communication for the local node.
|
|
//
|
|
status = ClusnetOnlineNodeComm(NmClusnetHandle, NmLocalNodeId);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
wsprintfW(&(errorString[0]), L"%u", NmLocalNodeId);
|
|
wsprintfW(&(errorString2[0]), L"%u", status);
|
|
CsLogEvent2(
|
|
LOG_CRITICAL,
|
|
NM_EVENT_CLUSNET_ONLINE_COMM_FAILED,
|
|
errorString,
|
|
errorString2
|
|
);
|
|
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Unable to enable communication for local node, status %1!u!.\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Fire up the intracluster RPC server so we can perform the membership
|
|
// join.
|
|
//
|
|
status = ClusterRegisterIntraclusterRpcInterface();
|
|
|
|
if ( status != ERROR_SUCCESS ) {
|
|
eventCode = CS_EVENT_RPC_INIT_FAILED;
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"ClusSvc: Error starting intracluster RPC server, Status = %1!u!\n",
|
|
status);
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Cycle through the list of cluster nodes and create mutual RPC bindings
|
|
// for the intracluster interface with each.
|
|
//
|
|
for (nodeEntry = NmpNodeList.Flink;
|
|
nodeEntry != &NmpNodeList;
|
|
nodeEntry = nodeEntry->Flink
|
|
)
|
|
{
|
|
node = CONTAINING_RECORD(nodeEntry, NM_NODE, Linkage);
|
|
|
|
if ( (node != NmLocalNode)
|
|
&&
|
|
( (node->State == ClusterNodeUp)
|
|
||
|
|
(node->State == ClusterNodePaused)
|
|
)
|
|
)
|
|
{
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Creating RPC bindings for member node %1!u!\n",
|
|
node->NodeId
|
|
);
|
|
|
|
//
|
|
//
|
|
// Cycle through the target node's interfaces
|
|
//
|
|
for (ifEntry = node->InterfaceList.Flink;
|
|
ifEntry != &(node->InterfaceList);
|
|
ifEntry = ifEntry->Flink
|
|
)
|
|
{
|
|
netInterface = CONTAINING_RECORD(
|
|
ifEntry,
|
|
NM_INTERFACE,
|
|
NodeLinkage
|
|
);
|
|
|
|
network = netInterface->Network;
|
|
|
|
if (NmpIsNetworkForInternalUse(network)) {
|
|
if ( (network->LocalInterface != NULL) &&
|
|
NmpIsInterfaceRegistered(network->LocalInterface) &&
|
|
NmpIsInterfaceRegistered(netInterface)
|
|
)
|
|
{
|
|
PNM_INTERFACE localInterface = network->LocalInterface;
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Attempting to use network %1!ws! to "
|
|
"create bindings for node %2!u!\n",
|
|
OmObjectName(network),
|
|
node->NodeId
|
|
);
|
|
|
|
status = NmpSetNodeInterfacePriority(
|
|
node,
|
|
0xFFFFFFFF,
|
|
netInterface,
|
|
1
|
|
);
|
|
|
|
if (status == ERROR_SUCCESS) {
|
|
|
|
status = NmRpcCreateBinding(
|
|
SponsorBinding,
|
|
NmpJoinSequence,
|
|
NmLocalNodeIdString,
|
|
(LPWSTR) OmObjectId(localInterface),
|
|
(LPWSTR) OmObjectId(node)
|
|
);
|
|
|
|
if (status == ERROR_SUCCESS) {
|
|
//
|
|
// Create RPC bindings for the target node.
|
|
//
|
|
status = NmpCreateRpcBindings(node);
|
|
|
|
if (status == ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Created binding for node "
|
|
"%1!u!\n",
|
|
node->NodeId
|
|
);
|
|
break;
|
|
}
|
|
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent3(
|
|
LOG_UNUSUAL,
|
|
NM_EVENT_JOIN_BIND_OUT_FAILED,
|
|
OmObjectName(node),
|
|
OmObjectName(network),
|
|
errorString
|
|
);
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Unable to create binding for "
|
|
"node %1!u!, status %2!u!.\n",
|
|
node->NodeId,
|
|
status
|
|
);
|
|
}
|
|
else {
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent3(
|
|
LOG_UNUSUAL,
|
|
NM_EVENT_JOIN_BIND_IN_FAILED,
|
|
OmObjectName(node),
|
|
OmObjectName(network),
|
|
errorString
|
|
);
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Member node %1!u! failed to "
|
|
"create binding to us, status %2!u!\n",
|
|
node->NodeId,
|
|
status
|
|
);
|
|
}
|
|
}
|
|
else {
|
|
wsprintfW(&(errorString[0]), L"%u", node->NodeId);
|
|
wsprintfW(&(errorString2[0]), L"%u", status);
|
|
CsLogEvent2(
|
|
LOG_UNUSUAL,
|
|
NM_EVENT_CLUSNET_SET_INTERFACE_PRIO_FAILED,
|
|
errorString,
|
|
errorString2
|
|
);
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Failed to set interface priorities "
|
|
"for node %1!u!, status %2!u!\n",
|
|
node->NodeId,
|
|
status
|
|
);
|
|
}
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_NODE_UNREACHABLE;
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] No matching local interface for "
|
|
"network %1!ws!\n",
|
|
OmObjectName(netInterface->Network)
|
|
);
|
|
}
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_NODE_UNREACHABLE;
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Network %1!ws! is not used for internal "
|
|
"communication.\n",
|
|
OmObjectName(netInterface->Network)
|
|
);
|
|
}
|
|
}
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
//
|
|
// Cannot make contact with this node. The join fails.
|
|
//
|
|
CsLogEvent1(
|
|
LOG_CRITICAL,
|
|
NM_EVENT_NODE_UNREACHABLE,
|
|
OmObjectName(node)
|
|
);
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Cluster node %1!u! is not reachable. Join "
|
|
"failed.\n",
|
|
node->NodeId
|
|
);
|
|
goto error_exit;
|
|
}
|
|
}
|
|
}
|
|
|
|
CL_ASSERT(status == ERROR_SUCCESS);
|
|
|
|
//
|
|
// run through the active nodes again, this time establishing
|
|
// security contexts to use in signing packets
|
|
//
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Establishing security contexts with all active nodes.\n"
|
|
);
|
|
|
|
for (nodeEntry = NmpNodeList.Flink;
|
|
nodeEntry != &NmpNodeList;
|
|
nodeEntry = nodeEntry->Flink
|
|
)
|
|
{
|
|
node = CONTAINING_RECORD(nodeEntry, NM_NODE, Linkage);
|
|
|
|
status = ClMsgCreateActiveNodeSecurityContext(NmpJoinSequence, node);
|
|
|
|
if ( status != ERROR_SUCCESS ) {
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent2(
|
|
LOG_UNUSUAL,
|
|
NM_EVENT_CREATE_SECURITY_CONTEXT_FAILED,
|
|
OmObjectName(node),
|
|
errorString
|
|
);
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Unable to establish security context for node %1!u!, status 0x%2!08X!\n",
|
|
node->NodeId,
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Finally, petition the sponsor for membership
|
|
//
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Petitioning to join cluster membership.\n"
|
|
);
|
|
|
|
#ifdef CLUSTER_TESTPOINT
|
|
TESTPT(TpFailJoinPetitionForMembership) {
|
|
status = 999999;
|
|
goto error_exit;
|
|
}
|
|
#endif
|
|
|
|
status = NmRpcPetitionForMembership(
|
|
SponsorBinding,
|
|
NmpJoinSequence,
|
|
NmLocalNodeIdString
|
|
);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
//
|
|
// Our petition was denied.
|
|
//
|
|
eventCode = NM_EVENT_PETITION_FAILED;
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Petition to join was denied %1!d!\n",
|
|
status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
#ifdef CLUSTER_TESTPOINT
|
|
TESTPT(TpFailNmJoin) {
|
|
status = 999999;
|
|
goto error_exit;
|
|
}
|
|
#endif
|
|
|
|
//
|
|
// Reset the interface priorities for all nodes to default to
|
|
// the priorities of the associated networks.
|
|
//
|
|
NmpAcquireLock();
|
|
|
|
for (ifEntry = NmpInterfaceList.Flink;
|
|
ifEntry != &NmpInterfaceList;
|
|
ifEntry = ifEntry->Flink
|
|
)
|
|
{
|
|
netInterface = CONTAINING_RECORD(ifEntry, NM_INTERFACE, Linkage);
|
|
network = netInterface->Network;
|
|
|
|
if ( NmpIsNetworkForInternalUse(network) &&
|
|
NmpIsInterfaceRegistered(netInterface)
|
|
)
|
|
{
|
|
status = ClusnetSetInterfacePriority(
|
|
NmClusnetHandle,
|
|
netInterface->Node->NodeId,
|
|
netInterface->Network->ShortId,
|
|
0
|
|
);
|
|
|
|
CL_ASSERT(status == ERROR_SUCCESS);
|
|
}
|
|
}
|
|
|
|
NmpState = NmStateOnline;
|
|
|
|
NmpReleaseLock();
|
|
|
|
//
|
|
// Invoke other components to create RPC bindings for each node.
|
|
//
|
|
|
|
//
|
|
// Enable our GUM update handler.
|
|
//
|
|
GumReceiveUpdates(
|
|
TRUE,
|
|
GumUpdateMembership,
|
|
NmpGumUpdateHandler,
|
|
NULL,
|
|
sizeof(NmGumDispatchTable)/sizeof(GUM_DISPATCH_ENTRY),
|
|
NmGumDispatchTable,
|
|
NULL
|
|
);
|
|
|
|
return(ERROR_SUCCESS);
|
|
|
|
error_exit:
|
|
|
|
if (eventCode != 0) {
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent1(LOG_CRITICAL, eventCode, errorString);
|
|
}
|
|
|
|
return(status);
|
|
|
|
} // NmJoinCluster
|
|
|
|
|
|
BOOLEAN
|
|
NmpVerifyJoinerConnectivity(
|
|
IN PNM_NODE JoiningNode,
|
|
OUT PNM_NODE * UnreachableNode
|
|
)
|
|
{
|
|
PLIST_ENTRY entry;
|
|
PNM_NODE node;
|
|
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Verifying connectivity to active cluster nodes\n"
|
|
);
|
|
|
|
*UnreachableNode = NULL;
|
|
|
|
for (entry = NmpNodeList.Flink;
|
|
entry != &NmpNodeList;
|
|
entry = entry->Flink
|
|
)
|
|
{
|
|
node = CONTAINING_RECORD(
|
|
entry,
|
|
NM_NODE,
|
|
Linkage
|
|
);
|
|
|
|
if (NM_NODE_UP(node)) {
|
|
if (!NmpVerifyNodeConnectivity(JoiningNode, node, NULL)) {
|
|
*UnreachableNode = node;
|
|
return(FALSE);
|
|
}
|
|
}
|
|
}
|
|
|
|
return(TRUE);
|
|
|
|
} // NmpVerifyJoinerConnectivity
|
|
|
|
|
|
DWORD
|
|
NmGetJoinSequence(
|
|
VOID
|
|
)
|
|
{
|
|
DWORD sequence;
|
|
|
|
|
|
NmpAcquireLock();
|
|
|
|
sequence = NmpJoinSequence;
|
|
|
|
NmpReleaseLock();
|
|
|
|
return(sequence);
|
|
|
|
} // NmGetJoinSequence
|
|
|
|
|
|
|
|
DWORD
|
|
NmJoinComplete(
|
|
OUT DWORD *EndSeq
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is called by the initialization sequence once a
|
|
join has successfully completed and the node can transition
|
|
from ClusterNodeJoining to ClusterNodeOnline.
|
|
|
|
Arguments:
|
|
|
|
None
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS if successful
|
|
|
|
Win32 error otherwise.
|
|
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD Sequence;
|
|
DWORD Status;
|
|
PNM_JOIN_UPDATE JoinUpdate = NULL;
|
|
DWORD UpdateLength;
|
|
HDMKEY NodeKey = NULL;
|
|
DWORD Default = 0;
|
|
DWORD NumRetries=50;
|
|
DWORD eventCode = 0;
|
|
WCHAR errorString[12];
|
|
PNM_NETWORK_STATE_ENUM networkStateEnum = NULL;
|
|
PNM_NETWORK_STATE_INFO networkStateInfo;
|
|
PNM_INTERFACE_STATE_ENUM interfaceStateEnum = NULL;
|
|
PNM_INTERFACE_STATE_INFO interfaceStateInfo;
|
|
DWORD i;
|
|
PNM_NETWORK network;
|
|
PNM_INTERFACE netInterface;
|
|
PLIST_ENTRY entry;
|
|
DWORD moveCount;
|
|
BOOLEAN mcast;
|
|
|
|
|
|
UpdateLength = sizeof(NM_JOIN_UPDATE) +
|
|
(lstrlenW(OmObjectId(NmLocalNode))+1)*sizeof(WCHAR);
|
|
|
|
JoinUpdate = LocalAlloc(LMEM_FIXED, UpdateLength);
|
|
|
|
if (JoinUpdate == NULL) {
|
|
Status = ERROR_NOT_ENOUGH_MEMORY;
|
|
eventCode = CS_EVENT_ALLOCATION_FAILURE;
|
|
ClRtlLogPrint(LOG_CRITICAL, "[NMJOIN] Unable to allocate memory.\n");
|
|
goto error_exit;
|
|
}
|
|
|
|
JoinUpdate->JoinSequence = NmpJoinSequence;
|
|
|
|
lstrcpyW(JoinUpdate->NodeId, OmObjectId(NmLocalNode));
|
|
|
|
NodeKey = DmOpenKey(DmNodesKey, OmObjectId(NmLocalNode), KEY_READ);
|
|
|
|
if (NodeKey == NULL) {
|
|
Status = GetLastError();
|
|
wsprintfW(&(errorString[0]), L"%u", Status);
|
|
CsLogEvent2(
|
|
LOG_CRITICAL,
|
|
CS_EVENT_REG_OPEN_FAILED,
|
|
OmObjectId(NmLocalNode),
|
|
errorString
|
|
);
|
|
ClRtlLogPrint(
|
|
LOG_CRITICAL,
|
|
"[NMJOIN] Unable to open database key to local node, status %1!u!.\n",
|
|
Status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
retry:
|
|
|
|
Status = GumBeginJoinUpdate(GumUpdateMembership, &Sequence);
|
|
|
|
if (Status != ERROR_SUCCESS) {
|
|
eventCode = NM_EVENT_GENERAL_JOIN_ERROR;
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Get the leader node ID from the sponsor.
|
|
//
|
|
Status = NmRpcGetLeaderNodeId(
|
|
CsJoinSponsorBinding,
|
|
NmpJoinSequence,
|
|
NmLocalNodeIdString,
|
|
&NmpLeaderNodeId
|
|
);
|
|
|
|
if (Status != ERROR_SUCCESS) {
|
|
if (Status == ERROR_CALL_NOT_IMPLEMENTED) {
|
|
//
|
|
// The sponsor is an NT4 node. Make this node the leader.
|
|
//
|
|
NmpLeaderNodeId = NmLocalNodeId;
|
|
}
|
|
else {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Failed to get leader node ID from sponsor, status %1!u!.\n",
|
|
Status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
}
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Node %1!u! is the leader.\n",
|
|
NmpLeaderNodeId
|
|
);
|
|
|
|
//
|
|
// Fetch the network and interface states from the sponsor
|
|
//
|
|
Status = NmRpcEnumNetworkAndInterfaceStates(
|
|
CsJoinSponsorBinding,
|
|
NmpJoinSequence,
|
|
NmLocalNodeIdString,
|
|
&networkStateEnum,
|
|
&interfaceStateEnum
|
|
);
|
|
|
|
if (Status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Failed to get network and interface state values from sponsor, status %1!u!.\n",
|
|
Status
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
NmpAcquireLock();
|
|
|
|
for (i=0; i<networkStateEnum->NetworkCount; i++) {
|
|
networkStateInfo = &(networkStateEnum->NetworkList[i]);
|
|
|
|
network = OmReferenceObjectById(
|
|
ObjectTypeNetwork,
|
|
networkStateInfo->Id
|
|
);
|
|
|
|
if (network == NULL) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Cannot find network %1!ws! to update state.\n",
|
|
networkStateInfo->Id
|
|
);
|
|
NmpReleaseLock();
|
|
NmpFreeNetworkStateEnum(networkStateEnum);
|
|
LocalFree(JoinUpdate);
|
|
DmCloseKey(NodeKey);
|
|
return(ERROR_CLUSTER_NETWORK_NOT_FOUND);
|
|
}
|
|
|
|
network->State = networkStateInfo->State;
|
|
|
|
OmDereferenceObject(network);
|
|
}
|
|
|
|
for (i=0; i<interfaceStateEnum->InterfaceCount; i++) {
|
|
interfaceStateInfo = &(interfaceStateEnum->InterfaceList[i]);
|
|
|
|
netInterface = OmReferenceObjectById(
|
|
ObjectTypeNetInterface,
|
|
interfaceStateInfo->Id
|
|
);
|
|
|
|
if (netInterface == NULL) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Cannot find interface %1!ws! to update state.\n",
|
|
interfaceStateInfo->Id
|
|
);
|
|
NmpReleaseLock();
|
|
NmpFreeInterfaceStateEnum(interfaceStateEnum);
|
|
LocalFree(JoinUpdate);
|
|
DmCloseKey(NodeKey);
|
|
return(ERROR_CLUSTER_NETINTERFACE_NOT_FOUND);
|
|
}
|
|
|
|
netInterface->State = interfaceStateInfo->State;
|
|
|
|
OmDereferenceObject(netInterface);
|
|
}
|
|
|
|
NmpReleaseLock();
|
|
|
|
NmpFreeInterfaceStateEnum(interfaceStateEnum);
|
|
interfaceStateEnum = NULL;
|
|
|
|
|
|
//
|
|
// Check the registry to see if we should come up paused.
|
|
//
|
|
JoinUpdate->IsPaused = Default;
|
|
|
|
Status = DmQueryDword(NodeKey,
|
|
CLUSREG_NAME_NODE_PAUSED,
|
|
&JoinUpdate->IsPaused,
|
|
&Default);
|
|
|
|
if (Status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Unable to query Paused value for local node, status %1!u!.\n",
|
|
Status
|
|
);
|
|
}
|
|
|
|
Status = GumEndJoinUpdate(Sequence,
|
|
GumUpdateMembership,
|
|
NmUpdateJoinComplete,
|
|
UpdateLength,
|
|
JoinUpdate);
|
|
|
|
if (Status != ERROR_SUCCESS) {
|
|
if (Status == ERROR_CLUSTER_JOIN_ABORTED) {
|
|
//
|
|
// The join was aborted by the cluster members. Don't retry.
|
|
//
|
|
CsLogEvent(LOG_CRITICAL, NM_EVENT_JOIN_ABORTED);
|
|
goto error_exit;
|
|
}
|
|
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] GumEndJoinUpdate with sequence %1!d! failed %2!d!\n",
|
|
Sequence,
|
|
Status
|
|
);
|
|
|
|
if (--NumRetries == 0) {
|
|
CsLogEvent(LOG_CRITICAL, NM_EVENT_JOIN_ABANDONED);
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Tried to complete join too many times. Giving up.\n"
|
|
);
|
|
goto error_exit;
|
|
}
|
|
|
|
goto retry;
|
|
}
|
|
|
|
NmpAcquireLock();
|
|
|
|
if (JoinUpdate->IsPaused != 0) {
|
|
//
|
|
// We should be coming up paused.
|
|
//
|
|
NmLocalNode->State = ClusterNodePaused;
|
|
} else {
|
|
//
|
|
// Set our state to online.
|
|
//
|
|
NmLocalNode->State = ClusterNodeUp;
|
|
}
|
|
|
|
//
|
|
// Remember whether this cluster meets multicast criteria.
|
|
//
|
|
mcast = NmpIsClusterMulticastReady(TRUE);
|
|
|
|
NmpReleaseLock();
|
|
|
|
//
|
|
// If the cluster instance ID does not exist, create it now. The cluster
|
|
// instance ID should be in the database unless this is the first uplevel
|
|
// node.
|
|
//
|
|
NmpCreateClusterInstanceId();
|
|
|
|
//
|
|
// Create the cluster key.
|
|
//
|
|
Status = NmpRegenerateClusterKey();
|
|
if (Status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NM] Failed to generate cluster key, status %1!u!. "
|
|
"Allowing service to continue ...\n",
|
|
Status
|
|
);
|
|
Status = ERROR_SUCCESS;
|
|
}
|
|
|
|
//
|
|
// Finally, enable network PnP event handling.
|
|
//
|
|
// If a PnP event occured during the join process, an error code will
|
|
// be returned, which will abort startup of the service.
|
|
//
|
|
Status = NmpEnablePnpEvents();
|
|
|
|
if (Status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] A network PnP event occurred during join - abort.\n");
|
|
goto error_exit;
|
|
}
|
|
|
|
//
|
|
// Mark end sequence
|
|
*EndSeq = Sequence;
|
|
|
|
ClRtlLogPrint(LOG_NOISE, "[NMJOIN] Join complete, node now online\n");
|
|
|
|
if (mcast) {
|
|
Status = NmpRefreshClusterMulticastConfiguration();
|
|
if (Status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NM] Failed to refresh multicast configuration "
|
|
"for cluster networks, status %1!u!.\n",
|
|
Status
|
|
);
|
|
//
|
|
// Not a de facto fatal error.
|
|
//
|
|
Status = ERROR_SUCCESS;
|
|
}
|
|
}
|
|
|
|
error_exit:
|
|
|
|
if (JoinUpdate != NULL) {
|
|
LocalFree(JoinUpdate);
|
|
}
|
|
|
|
if (NodeKey != NULL) {
|
|
DmCloseKey(NodeKey);
|
|
}
|
|
|
|
if (eventCode != 0) {
|
|
wsprintfW(&(errorString[0]), L"%u", Status);
|
|
CsLogEvent1(LOG_CRITICAL, eventCode, errorString);
|
|
}
|
|
|
|
return(Status);
|
|
|
|
} // NmJoinComplete
|
|
|
|
|
|
//
|
|
// Server-side routines for sponsoring a joining node.
|
|
//
|
|
/*
|
|
|
|
Notes On Joining:
|
|
|
|
Only a single node may join the cluster at any time. A join begins with
|
|
a JoinBegin global update. A join completes successfully with a
|
|
JoinComplete global update. A join is aborted with a JoinAbort global
|
|
update.
|
|
|
|
A timer runs on the sponsor during a join. The timer is suspended
|
|
while the sponsor is performing work on behalf of the joiner. If the
|
|
timer expires, a worker thread is scheduled to initiate the abort
|
|
process.
|
|
|
|
|
|
If the sponsor goes down while a join is in progress, the node
|
|
down handling code on each remaining node will abort the join.
|
|
|
|
*/
|
|
|
|
error_status_t
|
|
s_NmRpcJoinBegin(
|
|
IN handle_t IDL_handle,
|
|
IN LPWSTR JoinerNodeId,
|
|
IN LPWSTR JoinerNodeName,
|
|
OUT LPDWORD SponsorNodeId,
|
|
OUT LPDWORD JoinSequenceNumber,
|
|
OUT LPWSTR * ClusnetEndpoint
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Called by a joining node to begin the join process.
|
|
Issues a JoinBegin global update.
|
|
|
|
--*/
|
|
{
|
|
|
|
DWORD status=ERROR_CLUSTER_INCOMPATIBLE_VERSIONS;
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Request by node %1!ws! to begin joining, refused. Using obsolete join interface\n",
|
|
JoinerNodeId
|
|
);
|
|
|
|
if ( status != ERROR_SUCCESS ) {
|
|
WCHAR errorCode[16];
|
|
|
|
wsprintfW( errorCode, L"%u", status );
|
|
|
|
CsLogEvent2(
|
|
LOG_CRITICAL,
|
|
NM_EVENT_JOIN_REFUSED,
|
|
JoinerNodeId,
|
|
errorCode
|
|
);
|
|
}
|
|
|
|
return(status);
|
|
|
|
} // s_NmRpcJoinBegin
|
|
|
|
//
|
|
// Server-side routines for sponsoring a joining node.
|
|
//
|
|
/*
|
|
|
|
Notes On Joining:
|
|
|
|
|
|
|
|
|
|
*/
|
|
//#pragma optimize("", off)
|
|
|
|
DWORD
|
|
NmpJoinBegin(
|
|
IN LPWSTR JoinerNodeId,
|
|
IN LPWSTR JoinerNodeName,
|
|
IN DWORD JoinerHighestVersion,
|
|
IN DWORD JoinerLowestVersion,
|
|
OUT LPDWORD SponsorNodeId,
|
|
OUT LPDWORD JoinSequenceNumber,
|
|
OUT LPWSTR * ClusnetEndpoint
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Called from s_NmRpcJoinBegin2 and s_NmRpcJoinBegin3.
|
|
Contains functionality common to both JoinBegin versions.
|
|
|
|
Notes:
|
|
|
|
Called with NM lock held and NmpLockedEnterApi already
|
|
called.
|
|
|
|
--*/
|
|
{
|
|
DWORD status = ERROR_SUCCESS;
|
|
PNM_NODE joinerNode = NULL;
|
|
LPWSTR endpoint = NULL;
|
|
|
|
joinerNode = OmReferenceObjectById(
|
|
ObjectTypeNode,
|
|
JoinerNodeId
|
|
);
|
|
|
|
if (joinerNode == NULL) {
|
|
status = ERROR_CLUSTER_NODE_NOT_MEMBER;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Node %1!ws! is not a member of this cluster. Cannot join.\n",
|
|
JoinerNodeId
|
|
);
|
|
goto FnExit;
|
|
}
|
|
|
|
endpoint = MIDL_user_allocate(NM_WCSLEN(NmpClusnetEndpoint));
|
|
|
|
if (endpoint == NULL) {
|
|
status = ERROR_NOT_ENOUGH_MEMORY;
|
|
goto FnExit;
|
|
}
|
|
|
|
lstrcpyW(endpoint, NmpClusnetEndpoint);
|
|
|
|
if (NmpJoinBeginInProgress) {
|
|
status = ERROR_CLUSTER_JOIN_IN_PROGRESS;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Node %1!ws! cannot join because a join is already in progress.\n",
|
|
JoinerNodeId
|
|
);
|
|
goto FnExit;
|
|
}
|
|
|
|
//
|
|
//validate the nodes version's number
|
|
//ie. check to see what the cluster database
|
|
//claims this node's version is vs what the node
|
|
//itself suggests
|
|
status = NmpValidateNodeVersion(
|
|
JoinerNodeId,
|
|
JoinerHighestVersion,
|
|
JoinerLowestVersion
|
|
);
|
|
|
|
//since this node joined, its version has changed
|
|
//this may happen due to upgrades or reinstall
|
|
//if this version cant join due to versioning,fail the join
|
|
if (status == ERROR_REVISION_MISMATCH) {
|
|
DWORD id = NmGetNodeId(joinerNode);
|
|
|
|
status = NmpIsNodeVersionAllowed(
|
|
id,
|
|
JoinerHighestVersion,
|
|
JoinerLowestVersion,
|
|
TRUE
|
|
);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] The version of the cluster prevents Node %1!ws! from joining the cluster\n",
|
|
JoinerNodeId
|
|
);
|
|
goto FnExit;
|
|
}
|
|
}
|
|
else if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] The version of Node %1!ws! cannot be validated.\n",
|
|
JoinerNodeId
|
|
);
|
|
goto FnExit;
|
|
}
|
|
|
|
//
|
|
// Lock out other join attempts with this sponsor.
|
|
//
|
|
NmpJoinBeginInProgress = TRUE;
|
|
NmpSuccessfulMMJoin = FALSE;
|
|
|
|
NmpReleaseLock();
|
|
|
|
status = GumSendUpdateEx(
|
|
GumUpdateMembership,
|
|
NmUpdateJoinBegin2,
|
|
5,
|
|
NM_WCSLEN(JoinerNodeId),
|
|
JoinerNodeId,
|
|
NM_WCSLEN(JoinerNodeName),
|
|
JoinerNodeName,
|
|
NM_WCSLEN(NmLocalNodeIdString),
|
|
NmLocalNodeIdString,
|
|
sizeof(DWORD),
|
|
&JoinerHighestVersion,
|
|
sizeof(DWORD),
|
|
&JoinerLowestVersion
|
|
);
|
|
|
|
NmpAcquireLock();
|
|
|
|
CL_ASSERT(NmpJoinBeginInProgress == TRUE);
|
|
NmpJoinBeginInProgress = FALSE;
|
|
|
|
if (status != ERROR_SUCCESS)
|
|
{
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] JoinBegin2 update for node %1!ws! failed, status %2!u!\n",
|
|
JoinerNodeId,
|
|
status
|
|
);
|
|
goto FnExit;
|
|
}
|
|
//
|
|
// Verify that the join is still in progress with
|
|
// this node as the sponsor.
|
|
//
|
|
if ( (NmpJoinerNodeId == joinerNode->NodeId) &&
|
|
(NmpSponsorNodeId == NmLocalNodeId)
|
|
)
|
|
{
|
|
//
|
|
// Give the joiner parameters for future
|
|
// join-related calls.
|
|
//
|
|
*SponsorNodeId = NmLocalNodeId;
|
|
*JoinSequenceNumber = NmpJoinSequence;
|
|
|
|
//
|
|
// Start the join timer
|
|
//
|
|
NmpJoinTimer = NM_JOIN_TIMEOUT;
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Node %1!ws! has begun the join process.\n",
|
|
JoinerNodeId
|
|
);
|
|
}
|
|
else
|
|
{
|
|
status = ERROR_CLUSTER_JOIN_ABORTED;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Begin join of node %1!ws! was aborted\n",
|
|
JoinerNodeId
|
|
);
|
|
}
|
|
|
|
FnExit:
|
|
if (joinerNode) {
|
|
OmDereferenceObject(joinerNode);
|
|
}
|
|
|
|
if (status == ERROR_SUCCESS) {
|
|
*ClusnetEndpoint = endpoint;
|
|
}
|
|
else {
|
|
WCHAR errorCode[16];
|
|
|
|
if (endpoint) MIDL_user_free(endpoint);
|
|
|
|
wsprintfW( errorCode, L"%u", status );
|
|
|
|
CsLogEvent2(
|
|
LOG_CRITICAL,
|
|
NM_EVENT_JOIN_REFUSED,
|
|
JoinerNodeId,
|
|
errorCode
|
|
);
|
|
}
|
|
|
|
return(status);
|
|
|
|
} // NmpJoinBegin
|
|
|
|
error_status_t
|
|
s_NmRpcJoinBegin2(
|
|
IN handle_t IDL_handle,
|
|
IN LPWSTR JoinerNodeId,
|
|
IN LPWSTR JoinerNodeName,
|
|
IN DWORD JoinerHighestVersion,
|
|
IN DWORD JoinerLowestVersion,
|
|
OUT LPDWORD SponsorNodeId,
|
|
OUT LPDWORD JoinSequenceNumber,
|
|
OUT LPWSTR * ClusnetEndpoint
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Called by a joining node to begin the join process.
|
|
Issues a JoinBegin global update.
|
|
|
|
--*/
|
|
{
|
|
DWORD status = ERROR_SUCCESS;
|
|
|
|
status = FmDoesQuorumAllowJoin();
|
|
if (status != ERROR_SUCCESS)
|
|
{
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Quorum Characteristics prevent the node %1!ws! to from joining, Status=%2!u!.\n",
|
|
JoinerNodeId,
|
|
status
|
|
);
|
|
return(status);
|
|
|
|
}
|
|
|
|
NmpAcquireLock();
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Processing request by node %1!ws! to begin joining (2).\n",
|
|
JoinerNodeId
|
|
);
|
|
|
|
if (!NmpLockedEnterApi(NmStateOnline)) {
|
|
status = ERROR_NODE_NOT_AVAILABLE;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Cannot sponsor a joining node at this time.\n"
|
|
);
|
|
NmpReleaseLock();
|
|
return(status);
|
|
}
|
|
|
|
status = NmpJoinBegin(
|
|
JoinerNodeId,
|
|
JoinerNodeName,
|
|
JoinerHighestVersion,
|
|
JoinerLowestVersion,
|
|
SponsorNodeId,
|
|
JoinSequenceNumber,
|
|
ClusnetEndpoint
|
|
);
|
|
|
|
NmpLockedLeaveApi();
|
|
|
|
NmpReleaseLock();
|
|
|
|
return(status);
|
|
|
|
} // s_NmRpcJoinBegin2
|
|
|
|
error_status_t
|
|
s_NmRpcJoinBegin3(
|
|
IN handle_t IDL_handle,
|
|
IN LPWSTR JoinerClusterInstanceId,
|
|
IN LPWSTR JoinerNodeId,
|
|
IN LPWSTR JoinerNodeName,
|
|
IN DWORD JoinerHighestVersion,
|
|
IN DWORD JoinerLowestVersion,
|
|
IN DWORD JoinerMajorVersion,
|
|
IN DWORD JoinerMinorVersion,
|
|
IN LPWSTR JoinerCsdVersion,
|
|
IN DWORD JoinerProductSuite,
|
|
OUT LPDWORD SponsorNodeId,
|
|
OUT LPDWORD JoinSequenceNumber,
|
|
OUT LPWSTR * ClusnetEndpoint
|
|
)
|
|
{
|
|
DWORD status = ERROR_SUCCESS;
|
|
|
|
LPWSTR clusterInstanceId = NULL;
|
|
DWORD clusterInstanceIdBufSize = 0;
|
|
DWORD clusterInstanceIdSize = 0;
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Processing request by node %1!ws! to begin joining (3).\n",
|
|
JoinerNodeId
|
|
);
|
|
|
|
status = FmDoesQuorumAllowJoin();
|
|
if (status != ERROR_SUCCESS)
|
|
{
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Quorum Characteristics prevent node %1!ws! from joining, Status=%2!u!.\n",
|
|
JoinerNodeId,
|
|
status
|
|
);
|
|
return(status);
|
|
|
|
}
|
|
|
|
//
|
|
// Check our cluster instance ID against the joiner's.
|
|
//
|
|
if (NmpClusterInstanceId == NULL ||
|
|
lstrcmpiW(NmpClusterInstanceId, JoinerClusterInstanceId) != 0) {
|
|
|
|
WCHAR errorCode[16];
|
|
|
|
status = ERROR_CLUSTER_INSTANCE_ID_MISMATCH;
|
|
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Sponsor cluster instance ID %1!ws! does not match joiner cluster instance id %2!ws!.\n",
|
|
((NmpClusterInstanceId == NULL) ? L"<NULL>" : NmpClusterInstanceId),
|
|
JoinerClusterInstanceId
|
|
);
|
|
|
|
wsprintfW( errorCode, L"%u", status );
|
|
CsLogEvent2(
|
|
LOG_CRITICAL,
|
|
NM_EVENT_JOIN_REFUSED,
|
|
JoinerNodeId,
|
|
errorCode
|
|
);
|
|
|
|
return(status);
|
|
|
|
} else {
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Sponsor cluster instance ID matches joiner cluster instance id (%1!ws!).\n",
|
|
JoinerClusterInstanceId
|
|
);
|
|
}
|
|
|
|
NmpAcquireLock();
|
|
|
|
if (!NmpLockedEnterApi(NmStateOnline)) {
|
|
|
|
status = ERROR_NODE_NOT_AVAILABLE;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Cannot sponsor a joining node at this time.\n"
|
|
);
|
|
|
|
} else {
|
|
|
|
status = NmpJoinBegin(
|
|
JoinerNodeId,
|
|
JoinerNodeName,
|
|
JoinerHighestVersion,
|
|
JoinerLowestVersion,
|
|
SponsorNodeId,
|
|
JoinSequenceNumber,
|
|
ClusnetEndpoint
|
|
);
|
|
|
|
NmpLockedLeaveApi();
|
|
}
|
|
|
|
NmpReleaseLock();
|
|
|
|
return(status);
|
|
|
|
} // s_NmRpcJoinBegin3
|
|
|
|
DWORD
|
|
NmpUpdateJoinBegin(
|
|
IN BOOL SourceNode,
|
|
IN LPWSTR JoinerNodeId,
|
|
IN LPWSTR JoinerNodeName,
|
|
IN LPWSTR SponsorNodeId
|
|
)
|
|
{
|
|
DWORD status=ERROR_CLUSTER_INCOMPATIBLE_VERSIONS;
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Failing update to begin join of node %1!ws! with "
|
|
"sponsor %2!ws!. Using obsolete join interface.\n",
|
|
JoinerNodeId,
|
|
SponsorNodeId
|
|
);
|
|
|
|
return(status);
|
|
|
|
} // NmpUpdateJoinBegin
|
|
|
|
|
|
DWORD
|
|
NmpUpdateJoinBegin2(
|
|
IN BOOL SourceNode,
|
|
IN LPWSTR JoinerNodeId,
|
|
IN LPWSTR JoinerNodeName,
|
|
IN LPWSTR SponsorNodeId,
|
|
IN LPDWORD JoinerHighestVersion,
|
|
IN LPDWORD JoinerLowestVersion
|
|
)
|
|
{
|
|
DWORD status = ERROR_SUCCESS;
|
|
PNM_NODE sponsorNode=NULL;
|
|
PNM_NODE joinerNode=NULL;
|
|
HLOCALXSACTION hXsaction=NULL;
|
|
BOOLEAN lockAcquired = FALSE;
|
|
BOOLEAN fakeSuccess = FALSE;
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Received update to begin join (2) of node %1!ws! with "
|
|
"sponsor %2!ws!.\n",
|
|
JoinerNodeId,
|
|
SponsorNodeId
|
|
);
|
|
|
|
//
|
|
// If running with -noquorum flag or if not online, don't sponsor
|
|
// any node.
|
|
//
|
|
if (CsNoQuorum || !NmpEnterApi(NmStateOnline)) {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NM] Not in valid state to begin a join operation.\n"
|
|
);
|
|
return(ERROR_NODE_NOT_AVAILABLE);
|
|
}
|
|
|
|
//
|
|
// Find the sponsor node
|
|
//
|
|
sponsorNode = OmReferenceObjectById(
|
|
ObjectTypeNode,
|
|
SponsorNodeId
|
|
);
|
|
|
|
if (sponsorNode == NULL) {
|
|
status = ERROR_CLUSTER_NODE_NOT_MEMBER;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] JoinBegin update for node %1!ws! failed because "
|
|
"sponsor node %2!ws! is not a member of this cluster.\n",
|
|
JoinerNodeId,
|
|
SponsorNodeId
|
|
);
|
|
goto FnExit;
|
|
}
|
|
|
|
//
|
|
// Find the joiner node
|
|
//
|
|
joinerNode = OmReferenceObjectById(
|
|
ObjectTypeNode,
|
|
JoinerNodeId
|
|
);
|
|
|
|
if (joinerNode == NULL) {
|
|
status = ERROR_CLUSTER_NODE_NOT_MEMBER;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Node %1!ws! is not a member of this cluster. "
|
|
"Cannot join.\n",
|
|
JoinerNodeId
|
|
);
|
|
goto FnExit;
|
|
}
|
|
|
|
hXsaction = DmBeginLocalUpdate();
|
|
|
|
if (hXsaction == NULL) {
|
|
status = GetLastError();
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Failed to start a transaction, status %1!u!\n",
|
|
status
|
|
);
|
|
goto FnExit;
|
|
}
|
|
|
|
NmpAcquireLock(); lockAcquired = TRUE;
|
|
|
|
if (!NM_NODE_UP(sponsorNode)) {
|
|
//
|
|
// [GorN 4/3/2000] See bug#98287
|
|
// This hack is a kludgy solution to a problem that
|
|
// a replay of this Gum update after the sponsor death
|
|
// will fail on all the nodes that didn't see the update.
|
|
//
|
|
fakeSuccess = TRUE;
|
|
status = ERROR_NODE_NOT_AVAILABLE;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Sponsor node %1!ws! is not up. Join of node %2!ws! "
|
|
"failed.\n",
|
|
SponsorNodeId,
|
|
JoinerNodeId
|
|
);
|
|
goto FnExit;
|
|
}
|
|
|
|
//
|
|
// Check that the joiner is really who we think it is.
|
|
//
|
|
if (lstrcmpiW( OmObjectName(joinerNode), JoinerNodeName)) {
|
|
status = ERROR_CLUSTER_NODE_NOT_MEMBER;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Node %1!ws! is not a member of this cluster. "
|
|
"Cannot join.\n",
|
|
JoinerNodeName
|
|
);
|
|
goto FnExit;
|
|
}
|
|
|
|
//
|
|
// Make sure the joiner is currently down.
|
|
//
|
|
if (joinerNode->State != ClusterNodeDown) {
|
|
status = ERROR_CLUSTER_NODE_UP;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Node %1!ws! is not down. Cannot begin join.\n",
|
|
JoinerNodeId
|
|
);
|
|
goto FnExit;
|
|
}
|
|
|
|
//
|
|
// Make sure we aren't already in a join.
|
|
//
|
|
if (NmpJoinerNodeId != ClusterInvalidNodeId) {
|
|
status = ERROR_CLUSTER_JOIN_IN_PROGRESS;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Node %1!ws! cannot begin join because a join is "
|
|
"already in progress for node %2!u!.\n",
|
|
JoinerNodeId,
|
|
NmpJoinerNodeId
|
|
);
|
|
goto FnExit;
|
|
}
|
|
|
|
//
|
|
// Perform the version compatibility check.
|
|
//
|
|
status = NmpIsNodeVersionAllowed(
|
|
NmGetNodeId(joinerNode),
|
|
*JoinerHighestVersion,
|
|
*JoinerLowestVersion,
|
|
TRUE
|
|
);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] The version of the cluster prevents Node %1!ws! "
|
|
"from joining the cluster\n",
|
|
JoinerNodeId
|
|
);
|
|
goto FnExit;
|
|
}
|
|
|
|
// Fix up the joiner's version number if needed.
|
|
//
|
|
|
|
status = NmpValidateNodeVersion(
|
|
JoinerNodeId,
|
|
*JoinerHighestVersion,
|
|
*JoinerLowestVersion
|
|
);
|
|
|
|
if (status == ERROR_REVISION_MISMATCH) {
|
|
//
|
|
// At this point, the registry contains the new
|
|
// versions for the joining code.
|
|
// The new node information should be reread
|
|
// from the registry before resetting the cluster
|
|
// version
|
|
// make sure the joiner gets the database from the
|
|
// sponsor after the fixups have occured
|
|
//
|
|
status = NmpJoinFixupNodeVersion(
|
|
hXsaction,
|
|
JoinerNodeId,
|
|
*JoinerHighestVersion,
|
|
*JoinerLowestVersion
|
|
);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Node %1!ws! failed to fixup its node version\r\n",
|
|
JoinerNodeId);
|
|
goto FnExit;
|
|
}
|
|
}
|
|
else if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] The verison of Node %1!ws! could not be validated\n",
|
|
JoinerNodeId);
|
|
goto FnExit;
|
|
}
|
|
|
|
//
|
|
//at this point we ready to calculate the cluster version
|
|
//all the node versions are in the registry, the fixups have
|
|
//been made if neccessary
|
|
//
|
|
NmpResetClusterVersion(TRUE);
|
|
|
|
//
|
|
// Enable communication to the joiner.
|
|
//
|
|
// This must be the last test that can fail before the join is allowed
|
|
// to proceed.
|
|
//
|
|
status = ClusnetOnlineNodeComm(NmClusnetHandle, joinerNode->NodeId);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
if (status != ERROR_CLUSTER_NODE_ALREADY_UP) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Failed to enable communication for node %1!u!, "
|
|
"status %2!u!\n",
|
|
JoinerNodeId,
|
|
status
|
|
);
|
|
goto FnExit;
|
|
}
|
|
else {
|
|
status = ERROR_SUCCESS;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Officially begin the join process
|
|
//
|
|
CL_ASSERT(NmpJoinTimer == 0);
|
|
CL_ASSERT(NmpJoinAbortPending == FALSE);
|
|
CL_ASSERT(NmpJoinerUp == FALSE);
|
|
CL_ASSERT(NmpSponsorNodeId == ClusterInvalidNodeId);
|
|
|
|
NmpJoinerNodeId = joinerNode->NodeId;
|
|
NmpSponsorNodeId = sponsorNode->NodeId;
|
|
NmpJoinerOutOfSynch = FALSE;
|
|
NmpJoinSequence = GumGetCurrentSequence(GumUpdateMembership);
|
|
|
|
joinerNode->State = ClusterNodeJoining;
|
|
|
|
ClusterEvent(
|
|
CLUSTER_EVENT_NODE_JOIN,
|
|
joinerNode
|
|
);
|
|
|
|
NmpCleanupIfJoinAborted = TRUE;
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Node %1!ws! join sequence = %2!u!\n",
|
|
JoinerNodeId,
|
|
NmpJoinSequence
|
|
);
|
|
|
|
CL_ASSERT(status == ERROR_SUCCESS);
|
|
|
|
FnExit:
|
|
|
|
if (lockAcquired) {
|
|
NmpLockedLeaveApi();
|
|
NmpReleaseLock();
|
|
}
|
|
else {
|
|
NmpLeaveApi();
|
|
}
|
|
|
|
if (hXsaction != NULL) {
|
|
if (status == ERROR_SUCCESS) {
|
|
DmCommitLocalUpdate(hXsaction);
|
|
}
|
|
else {
|
|
DmAbortLocalUpdate(hXsaction);
|
|
}
|
|
}
|
|
|
|
if (joinerNode != NULL) {
|
|
OmDereferenceObject(joinerNode);
|
|
}
|
|
|
|
if (sponsorNode != NULL) {
|
|
OmDereferenceObject(sponsorNode);
|
|
}
|
|
|
|
if (fakeSuccess) {
|
|
status = ERROR_SUCCESS;
|
|
}
|
|
return(status);
|
|
|
|
} // NmpUpdateJoinBegin2
|
|
|
|
|
|
DWORD
|
|
NmpCreateRpcBindings(
|
|
IN PNM_NODE Node
|
|
)
|
|
{
|
|
DWORD status;
|
|
|
|
|
|
//
|
|
// Create the default binding for the whole cluster service
|
|
//
|
|
status = ClMsgCreateDefaultRpcBinding(
|
|
Node, &Node->DefaultRpcBindingGeneration);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
return(status);
|
|
}
|
|
|
|
//
|
|
// Create private bindings for the NM's use.
|
|
// We create one for reporting network connectivity and one for
|
|
// performing network failure isolation. The NM uses the
|
|
// default binding for operations on behalf of joining nodes.
|
|
//
|
|
if (Node->ReportRpcBinding != NULL) {
|
|
//
|
|
// Reuse the old binding.
|
|
//
|
|
status = ClMsgVerifyRpcBinding(Node->ReportRpcBinding);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Failed to verify RPC binding for node %1!u!, "
|
|
"status %2!u!.\n",
|
|
Node->NodeId,
|
|
status
|
|
);
|
|
return(status);
|
|
}
|
|
}
|
|
else {
|
|
//
|
|
// Create a new binding
|
|
//
|
|
status = ClMsgCreateRpcBinding(
|
|
Node,
|
|
&(Node->ReportRpcBinding),
|
|
0 );
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Failed to create RPC binding for node %1!u!, "
|
|
"status %2!u!.\n",
|
|
Node->NodeId,
|
|
status
|
|
);
|
|
return(status);
|
|
}
|
|
}
|
|
|
|
if (Node->IsolateRpcBinding != NULL) {
|
|
//
|
|
// Reuse the old binding.
|
|
//
|
|
status = ClMsgVerifyRpcBinding(Node->IsolateRpcBinding);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Failed to verify RPC binding for node %1!u!, "
|
|
"status %2!u!.\n",
|
|
Node->NodeId,
|
|
status
|
|
);
|
|
return(status);
|
|
}
|
|
}
|
|
else {
|
|
//
|
|
// Create a new binding
|
|
//
|
|
status = ClMsgCreateRpcBinding(
|
|
Node,
|
|
&(Node->IsolateRpcBinding),
|
|
0 );
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NM] Failed to create RPC binding for node %1!u!, "
|
|
"status %2!u!.\n",
|
|
Node->NodeId,
|
|
status
|
|
);
|
|
return(status);
|
|
}
|
|
}
|
|
|
|
//
|
|
// Call other components to create their private bindings
|
|
//
|
|
status = GumCreateRpcBindings(Node);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
return(status);
|
|
}
|
|
|
|
status = EvCreateRpcBindings(Node);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
return(status);
|
|
}
|
|
|
|
status = FmCreateRpcBindings(Node);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
return(status);
|
|
}
|
|
|
|
return(ERROR_SUCCESS);
|
|
|
|
} // NmpCreateRpcBindings
|
|
|
|
|
|
error_status_t
|
|
s_NmRpcCreateBinding(
|
|
IN handle_t IDL_handle,
|
|
IN DWORD JoinSequence,
|
|
IN LPWSTR JoinerNodeId,
|
|
IN LPWSTR JoinerInterfaceId,
|
|
IN LPWSTR MemberNodeId
|
|
)
|
|
{
|
|
DWORD status;
|
|
|
|
|
|
NmpAcquireLock();
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Processing CreateBinding request from joining node %1!ws! for member node %2!ws!\n",
|
|
JoinerNodeId,
|
|
MemberNodeId
|
|
);
|
|
|
|
if (NmpLockedEnterApi(NmStateOnlinePending)) {
|
|
|
|
PNM_NODE joinerNode = OmReferenceObjectById(
|
|
ObjectTypeNode,
|
|
JoinerNodeId
|
|
);
|
|
|
|
if (joinerNode != NULL) {
|
|
if ( (JoinSequence == NmpJoinSequence) &&
|
|
(NmpJoinerNodeId == joinerNode->NodeId) &&
|
|
(NmpSponsorNodeId == NmLocalNodeId) &&
|
|
!NmpJoinAbortPending
|
|
)
|
|
{
|
|
PNM_NODE memberNode;
|
|
|
|
|
|
CL_ASSERT(joinerNode->State == ClusterNodeJoining);
|
|
CL_ASSERT(NmpJoinerUp == FALSE);
|
|
CL_ASSERT(NmpJoinTimer != 0);
|
|
|
|
//
|
|
// Suspend the join timer while we are working on
|
|
// behalf of the joiner. This precludes an abort
|
|
// from occuring as well.
|
|
//
|
|
NmpJoinTimer = 0;
|
|
|
|
memberNode = OmReferenceObjectById(
|
|
ObjectTypeNode,
|
|
MemberNodeId
|
|
);
|
|
|
|
if (memberNode != NULL) {
|
|
PNM_INTERFACE netInterface = OmReferenceObjectById(
|
|
ObjectTypeNetInterface,
|
|
JoinerInterfaceId
|
|
);
|
|
|
|
if (netInterface != NULL) {
|
|
if (memberNode == NmLocalNode) {
|
|
status = NmpCreateJoinerRpcBindings(
|
|
joinerNode,
|
|
netInterface
|
|
);
|
|
}
|
|
else {
|
|
if (NM_NODE_UP(memberNode)) {
|
|
DWORD joinSequence = NmpJoinSequence;
|
|
RPC_BINDING_HANDLE binding =
|
|
Session[memberNode->NodeId];
|
|
|
|
CL_ASSERT(binding != NULL);
|
|
|
|
NmpReleaseLock();
|
|
|
|
NmStartRpc(memberNode->NodeId);
|
|
status = NmRpcCreateJoinerBinding(
|
|
binding,
|
|
joinSequence,
|
|
JoinerNodeId,
|
|
JoinerInterfaceId
|
|
);
|
|
NmEndRpc(memberNode->NodeId);
|
|
if(status != RPC_S_OK) {
|
|
NmDumpRpcExtErrorInfo(status);
|
|
}
|
|
|
|
NmpAcquireLock();
|
|
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_NODE_DOWN;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] CreateBinding call for joining node %1!ws! failed because member node %2!ws! is down.\n",
|
|
JoinerNodeId,
|
|
MemberNodeId
|
|
);
|
|
}
|
|
}
|
|
|
|
OmDereferenceObject(netInterface);
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_NETINTERFACE_NOT_FOUND;
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Can't create binding for joining node %1!ws! - interface %2!ws! doesn't exist.\n",
|
|
JoinerNodeId,
|
|
JoinerInterfaceId
|
|
);
|
|
}
|
|
|
|
OmDereferenceObject(memberNode);
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_NODE_NOT_FOUND;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] CreateBinding call for joining node %1!ws! failed because member node %2!ws! does not exist\n",
|
|
JoinerNodeId,
|
|
MemberNodeId
|
|
);
|
|
}
|
|
|
|
//
|
|
// Verify that the join is still in progress
|
|
//
|
|
if ( (JoinSequence == NmpJoinSequence) &&
|
|
(NmpJoinerNodeId == joinerNode->NodeId)
|
|
)
|
|
{
|
|
CL_ASSERT(joinerNode->State == ClusterNodeJoining);
|
|
CL_ASSERT(NmpJoinerUp == FALSE);
|
|
CL_ASSERT(NmpSponsorNodeId == NmLocalNodeId);
|
|
CL_ASSERT(NmpJoinTimer == 0);
|
|
CL_ASSERT(NmpJoinAbortPending == FALSE);
|
|
|
|
//
|
|
// Restart the join timer.
|
|
//
|
|
NmpJoinTimer = NM_JOIN_TIMEOUT;
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_JOIN_ABORTED;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] CreateBinding call for joining node %1!ws! failed because the join was aborted.\n",
|
|
JoinerNodeId
|
|
);
|
|
}
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_JOIN_ABORTED;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] CreateBinding call for joining node %1!ws! failed because the join was aborted.\n",
|
|
JoinerNodeId
|
|
);
|
|
}
|
|
|
|
OmDereferenceObject(joinerNode);
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_NODE_NOT_MEMBER;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] CreateBinding call for joining node %1!ws! failed because the node is not a member of the cluster.\n",
|
|
JoinerNodeId
|
|
);
|
|
}
|
|
|
|
NmpLockedLeaveApi();
|
|
}
|
|
|
|
NmpReleaseLock();
|
|
|
|
return(status);
|
|
|
|
} // s_NmRpcCreateBinding
|
|
|
|
|
|
error_status_t
|
|
s_NmRpcCreateJoinerBinding(
|
|
IN handle_t IDL_handle,
|
|
IN DWORD JoinSequence,
|
|
IN LPWSTR JoinerNodeId,
|
|
IN LPWSTR JoinerInterfaceId
|
|
)
|
|
/*++
|
|
|
|
Notes:
|
|
|
|
The sponsor is responsible for aborting the join on failure.
|
|
|
|
--*/
|
|
{
|
|
DWORD status;
|
|
|
|
|
|
NmpAcquireLock();
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Processing CreateBinding request for joining node %1!ws!.\n",
|
|
JoinerNodeId
|
|
);
|
|
|
|
if (NmpLockedEnterApi(NmStateOnline)) {
|
|
PNM_NODE joinerNode = OmReferenceObjectById(
|
|
ObjectTypeNode,
|
|
JoinerNodeId
|
|
);
|
|
|
|
if (joinerNode != NULL) {
|
|
PNM_INTERFACE netInterface = OmReferenceObjectById(
|
|
ObjectTypeNetInterface,
|
|
JoinerInterfaceId
|
|
);
|
|
|
|
if (netInterface != NULL) {
|
|
//
|
|
// Verify that a join is still in progress.
|
|
//
|
|
if ( (JoinSequence == NmpJoinSequence) &&
|
|
(NmpJoinerNodeId == joinerNode->NodeId)
|
|
)
|
|
{
|
|
status = NmpCreateJoinerRpcBindings(
|
|
joinerNode,
|
|
netInterface
|
|
);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
WCHAR errorString[12];
|
|
|
|
wsprintfW(&(errorString[0]), L"%u", status);
|
|
CsLogEvent3(
|
|
LOG_UNUSUAL,
|
|
NM_EVENT_JOINER_BIND_FAILED,
|
|
OmObjectName(joinerNode),
|
|
OmObjectName(netInterface->Network),
|
|
errorString
|
|
);
|
|
}
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_JOIN_ABORTED;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Failing create bindings for joining node %1!ws! because the join was aborted\n",
|
|
JoinerNodeId
|
|
);
|
|
}
|
|
|
|
OmDereferenceObject(netInterface);
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_NETINTERFACE_NOT_FOUND;
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Can't create binding for joining node %1!ws! - no corresponding interface for joiner interface %2!ws!.\n",
|
|
JoinerNodeId,
|
|
JoinerInterfaceId
|
|
);
|
|
}
|
|
|
|
OmDereferenceObject(joinerNode);
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_NODE_NOT_MEMBER;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] CreateBinding call for joining node %1!ws! failed because the node is not a member of the cluster.\n",
|
|
JoinerNodeId
|
|
);
|
|
}
|
|
|
|
NmpLockedLeaveApi();
|
|
}
|
|
else {
|
|
status = ERROR_NODE_NOT_AVAILABLE;
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Not in valid state to process the request.\n"
|
|
);
|
|
}
|
|
|
|
NmpReleaseLock();
|
|
|
|
return(status);
|
|
|
|
} // s_NmRpcCreateJoinerBinding
|
|
|
|
|
|
DWORD
|
|
NmpCreateJoinerRpcBindings(
|
|
IN PNM_NODE JoinerNode,
|
|
IN PNM_INTERFACE JoinerInterface
|
|
)
|
|
/*++
|
|
|
|
Notes:
|
|
|
|
Called with the NmpLock held.
|
|
|
|
--*/
|
|
{
|
|
DWORD status;
|
|
PNM_NETWORK network = JoinerInterface->Network;
|
|
CL_NODE_ID joinerNodeId = JoinerNode->NodeId;
|
|
|
|
|
|
CL_ASSERT(JoinerNode->NodeId == NmpJoinerNodeId);
|
|
CL_ASSERT(JoinerNode->State == ClusterNodeJoining);
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Creating bindings for joining node %1!u! using network %2!ws!\n",
|
|
joinerNodeId,
|
|
OmObjectName(JoinerInterface->Network)
|
|
);
|
|
|
|
//
|
|
// Make sure that this node has an interface on the target network.
|
|
//
|
|
|
|
if (NmpIsNetworkForInternalUse(network)) {
|
|
if (network->LocalInterface != NULL) {
|
|
if ( NmpIsInterfaceRegistered(JoinerInterface) &&
|
|
NmpIsInterfaceRegistered(network->LocalInterface)
|
|
|
|
)
|
|
{
|
|
status = NmpSetNodeInterfacePriority(
|
|
JoinerNode,
|
|
0xFFFFFFFF,
|
|
JoinerInterface,
|
|
1
|
|
);
|
|
|
|
if (status == ERROR_SUCCESS) {
|
|
PNM_INTERFACE localInterface = network->LocalInterface;
|
|
|
|
//
|
|
// Create intracluster RPC bindings for the petitioner.
|
|
// The MM relies on these to perform the join.
|
|
//
|
|
|
|
OmReferenceObject(localInterface);
|
|
OmReferenceObject(JoinerNode);
|
|
|
|
NmpReleaseLock();
|
|
|
|
status = NmpCreateRpcBindings(JoinerNode);
|
|
|
|
NmpAcquireLock();
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Unable to create RPC binding for "
|
|
"joining node %1!u!, status %2!u!.\n",
|
|
joinerNodeId,
|
|
status
|
|
);
|
|
}
|
|
|
|
OmDereferenceObject(JoinerNode);
|
|
OmDereferenceObject(localInterface);
|
|
}
|
|
else {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Failed to set interface priority for "
|
|
"network %1!ws! (%2!ws!), status %3!u!\n",
|
|
OmObjectId(network),
|
|
OmObjectName(network),
|
|
status
|
|
);
|
|
}
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_NODE_UNREACHABLE;
|
|
}
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_NETINTERFACE_NOT_FOUND;
|
|
}
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_NODE_UNREACHABLE;
|
|
}
|
|
|
|
if (status !=ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[NMJOIN] Failed to create binding for joining node %1!u! "
|
|
"on network %2!ws! (%3!ws!), status %4!u!\n",
|
|
joinerNodeId,
|
|
OmObjectId(network),
|
|
OmObjectName(network),
|
|
status
|
|
);
|
|
}
|
|
|
|
return(status);
|
|
|
|
} // NmpCreateJoinerRpcBinding
|
|
|
|
|
|
|
|
error_status_t
|
|
s_NmRpcPetitionForMembership(
|
|
IN handle_t IDL_handle,
|
|
IN DWORD JoinSequence,
|
|
IN LPCWSTR JoinerNodeId
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Server side of a join petition.
|
|
|
|
Arguments:
|
|
|
|
IDL_handle - RPC binding handle, not used.
|
|
|
|
JoinSequence - Supplies the sequence returned from NmRpcJoinBegin
|
|
|
|
JoinerNodeId - Supplies the ID of the node attempting to join.
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS if successful
|
|
|
|
Win32 error otherwise.
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD status;
|
|
PNM_NODE joinerNode;
|
|
|
|
|
|
#ifdef CLUSTER_TESTPOINT
|
|
TESTPT(TestpointJoinFailPetition) {
|
|
return(999999);
|
|
}
|
|
#endif
|
|
|
|
NmpAcquireLock();
|
|
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Processing petition to join from node %1!ws!.\n",
|
|
JoinerNodeId
|
|
);
|
|
|
|
if (NmpLockedEnterApi(NmStateOnline)) {
|
|
|
|
joinerNode = OmReferenceObjectById(ObjectTypeNode, JoinerNodeId);
|
|
|
|
if (joinerNode != NULL) {
|
|
//
|
|
// Verify that the join is still in progress
|
|
//
|
|
//
|
|
// DavidDio 6/13/2000
|
|
// There is a small window where a begin join update can
|
|
// succeed during a regroup, but the regroup ends before
|
|
// the joining node petitions to join. In this case, the
|
|
// node will be marked out of sync. Aborting the join
|
|
// after MMJoin() is much more heavyweight than before,
|
|
// so check for this condition now. (Bug 125778).
|
|
//
|
|
if ( (JoinSequence == NmpJoinSequence) &&
|
|
(NmpJoinerNodeId == joinerNode->NodeId) &&
|
|
(NmpSponsorNodeId == NmLocalNodeId) &&
|
|
(!NmpJoinAbortPending) &&
|
|
(!NmpJoinerOutOfSynch)
|
|
)
|
|
{
|
|
ClRtlLogPrint(LOG_UNUSUAL, "[NMJOIN] Performing join.\n");
|
|
|
|
CL_ASSERT(joinerNode->State == ClusterNodeJoining);
|
|
CL_ASSERT(NmpJoinerUp == FALSE);
|
|
CL_ASSERT(NmpJoinTimer != 0);
|
|
|
|
//
|
|
// Call the MM to join this node to the cluster membership.
|
|
// Disable the join timer. Once the node becomes an active
|
|
// member, we won't need it anymore.
|
|
//
|
|
NmpJoinTimer = 0;
|
|
|
|
NmpReleaseLock();
|
|
|
|
status = MMJoin(
|
|
joinerNode->NodeId,
|
|
NM_CLOCK_PERIOD,
|
|
NM_SEND_HB_RATE,
|
|
NM_RECV_HB_RATE,
|
|
NM_MM_JOIN_TIMEOUT
|
|
);
|
|
|
|
NmpAcquireLock();
|
|
|
|
//
|
|
// Verify that the join is still in progress
|
|
//
|
|
if ( (JoinSequence == NmpJoinSequence) &&
|
|
(NmpJoinerNodeId == joinerNode->NodeId)
|
|
)
|
|
{
|
|
CL_ASSERT(NmpSponsorNodeId == NmLocalNodeId);
|
|
CL_ASSERT(joinerNode->State == ClusterNodeJoining);
|
|
CL_ASSERT(NmpJoinTimer == 0);
|
|
CL_ASSERT(NmpJoinAbortPending == FALSE);
|
|
|
|
// GorN 3/22/2000
|
|
// We hit a case when MMJoin has succeeded after a regroup
|
|
// that killed one of the nodes (not joiner and not sponsor)
|
|
// thus leaving the joiner out of sync
|
|
// We need to abourt the join in this case too
|
|
|
|
if (status != MM_OK || NmpJoinerOutOfSynch) {
|
|
status = MMMapStatusToDosError(status);
|
|
|
|
if (NmpJoinerOutOfSynch) {
|
|
status = ERROR_CLUSTER_JOIN_ABORTED;
|
|
}
|
|
|
|
//
|
|
// Abort the join
|
|
//
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Petition to join by node %1!ws! failed, status %2!u!.\n",
|
|
JoinerNodeId,
|
|
status
|
|
);
|
|
//
|
|
// If MMJoin was unsuccessful it initiates a banishing
|
|
// regroup. This regroup will deliver node down events
|
|
// on all nodes that saw hb's from the joiner.
|
|
//
|
|
// Calling MMBlockIfRegroupIsInProgress here will guarantee that
|
|
// Phase2 cleanup is complete on all nodes, before we
|
|
// call NmpJoinAbort.
|
|
//
|
|
NmpReleaseLock();
|
|
MMBlockIfRegroupIsInProgress();
|
|
NmpAcquireLock();
|
|
|
|
NmpJoinAbort(status, joinerNode);
|
|
}
|
|
else {
|
|
NmpSuccessfulMMJoin = TRUE;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Petition to join by node %1!ws! succeeded.\n",
|
|
JoinerNodeId
|
|
);
|
|
}
|
|
|
|
#ifdef MM_IN_CLUSNET
|
|
|
|
if (status == MM_OK) {
|
|
|
|
status = NmJoinNodeToCluster(joinerNodeId);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
|
|
DWORD clusnetStatus;
|
|
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Join of node %1!ws! failed, status %2!u!.\n",
|
|
JoinerNodeId,
|
|
status
|
|
);
|
|
|
|
CL_LOGFAILURE( status );
|
|
|
|
NmpReleaseLock();
|
|
|
|
MMEject(joinerNodeId);
|
|
|
|
NmpAcquireLock();
|
|
|
|
clusnetStatus = ClusnetOfflineNodeComm(
|
|
NmClusnetHandle,
|
|
joinerNodeId
|
|
);
|
|
CL_ASSERT(
|
|
(status == ERROR_SUCCESS) ||
|
|
(status == ERROR_CLUSTER_NODE_ALREADY_DOWN
|
|
);
|
|
}
|
|
else {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Join completed successfully.\n"
|
|
);
|
|
}
|
|
}
|
|
|
|
#endif // MM_IN_CLUSNET
|
|
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_JOIN_ABORTED;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Petition to join by node %1!ws! failed because the join was aborted.\n",
|
|
JoinerNodeId
|
|
);
|
|
}
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_JOIN_ABORTED;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Petition by node %1!ws! failed because the join was aborted\n",
|
|
JoinerNodeId
|
|
);
|
|
}
|
|
|
|
OmDereferenceObject(joinerNode);
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_NODE_NOT_MEMBER;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Petition to join by %1!ws! failed because the node is not a cluster member\n",
|
|
JoinerNodeId
|
|
);
|
|
}
|
|
|
|
NmpLockedLeaveApi();
|
|
}
|
|
else {
|
|
status = ERROR_NODE_NOT_AVAILABLE;
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Not in valid state to process the request.\n"
|
|
);
|
|
}
|
|
|
|
NmpReleaseLock();
|
|
|
|
return(status);
|
|
|
|
} // s_NmRpcPetitionForMembership
|
|
|
|
|
|
error_status_t
|
|
s_NmRpcGetLeaderNodeId(
|
|
IN handle_t IDL_handle,
|
|
IN DWORD JoinSequence, OPTIONAL
|
|
IN LPWSTR JoinerNodeId, OPTIONAL
|
|
OUT LPDWORD LeaderNodeId
|
|
)
|
|
{
|
|
DWORD status = ERROR_SUCCESS;
|
|
PNM_NODE joinerNode = NULL;
|
|
|
|
|
|
NmpAcquireLock();
|
|
|
|
if (NmpLockedEnterApi(NmStateOnline)){
|
|
joinerNode = OmReferenceObjectById(
|
|
ObjectTypeNode,
|
|
JoinerNodeId
|
|
);
|
|
|
|
if (joinerNode != NULL) {
|
|
if ( (JoinSequence == NmpJoinSequence) &&
|
|
(NmpJoinerNodeId == joinerNode->NodeId) &&
|
|
(NmpSponsorNodeId == NmLocalNodeId) &&
|
|
!NmpJoinAbortPending
|
|
)
|
|
{
|
|
CL_ASSERT(joinerNode->State == ClusterNodeJoining);
|
|
|
|
*LeaderNodeId = NmpLeaderNodeId;
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_JOIN_ABORTED;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] GetLeaderNodeId call for joining node %1!ws! failed because the join was aborted.\n",
|
|
JoinerNodeId
|
|
);
|
|
}
|
|
|
|
OmDereferenceObject(joinerNode);
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_NODE_NOT_MEMBER;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] GetLeaderNodeId call for joining node %1!ws! failed because the node is not a member of the cluster.\n",
|
|
JoinerNodeId
|
|
);
|
|
}
|
|
|
|
NmpLockedLeaveApi();
|
|
}
|
|
else {
|
|
status = ERROR_NODE_NOT_AVAILABLE;
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Not in valid state to process GetLeaderNodeId request.\n"
|
|
);
|
|
}
|
|
|
|
NmpReleaseLock();
|
|
|
|
return(status);
|
|
|
|
} // s_NmRpcGetLeaderNodeId
|
|
|
|
|
|
DWORD
|
|
NmpUpdateJoinComplete(
|
|
IN PNM_JOIN_UPDATE JoinUpdate
|
|
)
|
|
{
|
|
DWORD status;
|
|
|
|
|
|
NmpAcquireLock();
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Processing JoinComplete update from node %1!ws!\n",
|
|
JoinUpdate->NodeId
|
|
);
|
|
|
|
if (NmpLockedEnterApi(NmStateOnline)) {
|
|
PNM_NODE joinerNode;
|
|
LPWSTR joinerIdString = JoinUpdate->NodeId;
|
|
|
|
|
|
joinerNode = OmReferenceObjectById(ObjectTypeNode, joinerIdString);
|
|
|
|
if (joinerNode != NULL) {
|
|
|
|
CL_ASSERT(joinerNode != NmLocalNode);
|
|
|
|
//
|
|
// Verify that the join is still in progress and nothing has
|
|
// changed.
|
|
//
|
|
if ( (JoinUpdate->JoinSequence == NmpJoinSequence) &&
|
|
(NmpJoinerNodeId == joinerNode->NodeId) &&
|
|
(joinerNode->State == ClusterNodeJoining) &&
|
|
NmpJoinerUp &&
|
|
!NmpJoinerOutOfSynch
|
|
)
|
|
{
|
|
PNM_INTERFACE netInterface;
|
|
PNM_NETWORK network;
|
|
PLIST_ENTRY ifEntry;
|
|
|
|
|
|
NmpJoinerNodeId = ClusterInvalidNodeId;
|
|
NmpSponsorNodeId = ClusterInvalidNodeId;
|
|
NmpJoinTimer = 0;
|
|
NmpJoinAbortPending = FALSE;
|
|
NmpJoinSequence = 0;
|
|
NmpJoinerUp = FALSE;
|
|
|
|
if (JoinUpdate->IsPaused != 0) {
|
|
//
|
|
// This node is coming up in the paused state.
|
|
//
|
|
joinerNode->State = ClusterNodePaused;
|
|
} else {
|
|
joinerNode->State = ClusterNodeUp;
|
|
}
|
|
|
|
joinerNode->ExtendedState = ClusterNodeJoining;
|
|
|
|
ClusterEvent(CLUSTER_EVENT_NODE_UP, (PVOID)joinerNode);
|
|
|
|
//
|
|
// Reset the interface priorities for this node.
|
|
//
|
|
for (ifEntry = joinerNode->InterfaceList.Flink;
|
|
ifEntry != &joinerNode->InterfaceList;
|
|
ifEntry = ifEntry->Flink
|
|
)
|
|
{
|
|
netInterface = CONTAINING_RECORD(
|
|
ifEntry,
|
|
NM_INTERFACE,
|
|
NodeLinkage
|
|
);
|
|
|
|
network = netInterface->Network;
|
|
|
|
if ( NmpIsNetworkForInternalUse(network) &&
|
|
NmpIsInterfaceRegistered(netInterface)
|
|
)
|
|
{
|
|
status = ClusnetSetInterfacePriority(
|
|
NmClusnetHandle,
|
|
joinerNode->NodeId,
|
|
network->ShortId,
|
|
0
|
|
);
|
|
|
|
CL_ASSERT(status == ERROR_SUCCESS);
|
|
}
|
|
}
|
|
|
|
status = ERROR_SUCCESS;
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_JOIN_ABORTED;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Join of node %1!ws! cannot complete because the join was aborted\n",
|
|
joinerIdString
|
|
);
|
|
}
|
|
|
|
OmDereferenceObject(joinerNode);
|
|
}
|
|
else {
|
|
status =ERROR_CLUSTER_NODE_NOT_MEMBER;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Join of node %1!ws! cannot complete because the node is not a cluster member.\n",
|
|
joinerIdString
|
|
);
|
|
}
|
|
|
|
NmpLockedLeaveApi();
|
|
}
|
|
else {
|
|
status = ERROR_NODE_NOT_AVAILABLE;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Not in valid state to process JoinComplete update.\n"
|
|
);
|
|
}
|
|
|
|
//
|
|
// If the multicast shared key is based on the cluster service account
|
|
// password, we may need to refresh, since the password might have
|
|
// changed and the joiner will be running under the new password.
|
|
//
|
|
if (status == ERROR_SUCCESS) {
|
|
status = NmpMulticastRegenerateKey(NULL);
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NM] Failed to regenerate cluster network multicast "
|
|
"keys, status %1!u!.\n",
|
|
status
|
|
);
|
|
//
|
|
// Not a de facto fatal error.
|
|
//
|
|
status = ERROR_SUCCESS;
|
|
}
|
|
}
|
|
|
|
NmpReleaseLock();
|
|
|
|
// DavidDio 10/27/2000
|
|
// Bug 213781: NmpUpdateJoinComplete must always return ERROR_SUCCESS.
|
|
// Otherwise, there is a small window whereby GUM sequence numbers on
|
|
// remaining cluster nodes can fall out of sync. If the join should
|
|
// be aborted, return ERROR_SUCCESS but poison the joiner out-of-band.
|
|
if (status != ERROR_SUCCESS) {
|
|
DWORD dwJoinerId;
|
|
|
|
if (JoinUpdate->NodeId != NULL) {
|
|
dwJoinerId = wcstoul(JoinUpdate->NodeId, NULL, 10);
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Join of node %1!u! failed with status %2!u!. Initiating banishment.\n",
|
|
dwJoinerId,
|
|
status
|
|
);
|
|
NmAdviseNodeFailure(dwJoinerId, status);
|
|
} else {
|
|
dwJoinerId = ClusterInvalidNodeId;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Join of node %1!ws! failed with status %2!u!. Cannot initiate banishment as node id is unknown.\n",
|
|
dwJoinerId,
|
|
status
|
|
);
|
|
}
|
|
}
|
|
|
|
return(ERROR_SUCCESS);
|
|
|
|
} // NmpUpdateJoinComplete
|
|
|
|
|
|
DWORD
|
|
NmpUpdateJoinAbort(
|
|
IN BOOL SourceNode,
|
|
IN LPDWORD JoinSequence,
|
|
IN LPWSTR JoinerNodeId
|
|
)
|
|
/*++
|
|
|
|
Notes:
|
|
|
|
|
|
--*/
|
|
{
|
|
DWORD status = ERROR_SUCCESS;
|
|
|
|
|
|
NmpAcquireLock();
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Received update to abort join sequence %1!u! (joiner id %2!ws!).\n",
|
|
*JoinSequence,
|
|
JoinerNodeId
|
|
);
|
|
|
|
if (NmpLockedEnterApi(NmStateOnline)) {
|
|
PNM_NODE joinerNode = OmReferenceObjectById(
|
|
ObjectTypeNode,
|
|
JoinerNodeId
|
|
);
|
|
|
|
if (joinerNode != NULL) {
|
|
//
|
|
// Check if the specified join is still in progress.
|
|
//
|
|
if ( (*JoinSequence == NmpJoinSequence) &&
|
|
(NmpJoinerNodeId == joinerNode->NodeId)
|
|
)
|
|
{
|
|
CL_ASSERT(NmpSponsorNodeId != ClusterInvalidNodeId);
|
|
CL_ASSERT(joinerNode->State == ClusterNodeJoining);
|
|
|
|
//
|
|
// Assumption:
|
|
//
|
|
// An abort cannot occur during the MM join process.
|
|
// If the joiner is not already up, it cannot come up
|
|
// during the abort processing.
|
|
//
|
|
// Assert condition may not be true with the current MM join code.
|
|
// Some nodes might have got monitor node and set
|
|
// NmpJoinerUp state to TRUE by the time the sponsor issued
|
|
// an abort update
|
|
//
|
|
//CL_ASSERT(NmpJoinerUp == FALSE);
|
|
|
|
if (NmpCleanupIfJoinAborted) {
|
|
|
|
NmpCleanupIfJoinAborted = FALSE;
|
|
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Issuing a node down event for %1!u!.\n",
|
|
joinerNode->NodeId
|
|
);
|
|
|
|
//
|
|
// This node is not yet active in the membership.
|
|
// Call the node down event handler to finish the abort.
|
|
//
|
|
|
|
//
|
|
// We will not call NmpMsgCleanup1 and NmpMsgCleanup2,
|
|
// because we cannot guarantee that they will get executed
|
|
// in a barrier style fashion
|
|
//
|
|
// !!! Lock will be acquired by NmpNodeDownEventHandler
|
|
// second time. Is it OK?
|
|
//
|
|
NmpNodeDownEventHandler(joinerNode);
|
|
} else {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Node down was already issued for %1!u!.\n",
|
|
joinerNode->NodeId
|
|
);
|
|
}
|
|
}
|
|
else {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Ignoring old join abort update with sequence %1!u!.\n",
|
|
*JoinSequence
|
|
);
|
|
}
|
|
|
|
OmDereferenceObject(joinerNode);
|
|
status = ERROR_SUCCESS;
|
|
}
|
|
else {
|
|
status = ERROR_CLUSTER_NODE_NOT_MEMBER;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Join of node %1!ws! cannot be aborted because the node is not a cluster member.\n",
|
|
JoinerNodeId
|
|
);
|
|
}
|
|
|
|
NmpLockedLeaveApi();
|
|
}
|
|
else {
|
|
status = ERROR_NODE_NOT_AVAILABLE;
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Not in valid state to process JoinAbort update.\n"
|
|
);
|
|
}
|
|
|
|
NmpReleaseLock();
|
|
|
|
return(status);
|
|
|
|
} // NmpUpdateJoinAbort
|
|
|
|
|
|
VOID
|
|
NmpJoinAbort(
|
|
DWORD AbortStatus,
|
|
PNM_NODE JoinerNode
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Issues a JoinAbort update.
|
|
|
|
Notes:
|
|
|
|
Called with the NmpLock held.
|
|
|
|
--*/
|
|
{
|
|
DWORD status;
|
|
DWORD joinSequence = NmpJoinSequence;
|
|
WCHAR errorString[12];
|
|
|
|
|
|
CL_ASSERT(NmpJoinerNodeId != ClusterInvalidNodeId);
|
|
CL_ASSERT(NmpSponsorNodeId == NmLocalNodeId);
|
|
CL_ASSERT(JoinerNode->State == ClusterNodeJoining);
|
|
|
|
if (AbortStatus == ERROR_TIMEOUT) {
|
|
wsprintfW(&(errorString[0]), L"%u", AbortStatus);
|
|
CsLogEvent1(
|
|
LOG_CRITICAL,
|
|
NM_EVENT_JOIN_TIMED_OUT,
|
|
OmObjectName(JoinerNode)
|
|
);
|
|
}
|
|
else {
|
|
wsprintfW(&(errorString[0]), L"%u", AbortStatus);
|
|
CsLogEvent2(
|
|
LOG_CRITICAL,
|
|
NM_EVENT_SPONSOR_JOIN_ABORTED,
|
|
OmObjectName(JoinerNode),
|
|
errorString
|
|
);
|
|
}
|
|
|
|
//
|
|
// Assumption:
|
|
//
|
|
// An abort cannot occur during the MM join process. If the joiner
|
|
// is not already up, it cannot come up during the abort processing.
|
|
//
|
|
if (NmpSuccessfulMMJoin == FALSE) {
|
|
//
|
|
// The joining node has not become active yet. Issue
|
|
// an abort update.
|
|
//
|
|
DWORD joinSequence = NmpJoinSequence;
|
|
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Issuing update to abort join of node %1!u!.\n",
|
|
NmpJoinerNodeId
|
|
);
|
|
|
|
NmpReleaseLock();
|
|
|
|
status = GumSendUpdateEx(
|
|
GumUpdateMembership,
|
|
NmUpdateJoinAbort,
|
|
2,
|
|
sizeof(DWORD),
|
|
&joinSequence,
|
|
NM_WCSLEN(OmObjectId(JoinerNode)),
|
|
OmObjectId(JoinerNode)
|
|
);
|
|
|
|
NmpAcquireLock();
|
|
}
|
|
else {
|
|
//
|
|
// The joining node is already active in the membership.
|
|
// Ask the MM to kick it out. The node down event will
|
|
// finish the abort process.
|
|
//
|
|
CL_NODE_ID joinerNodeId = NmpJoinerNodeId;
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Ejecting joining node %1!u! from the cluster membership.\n",
|
|
NmpJoinerNodeId
|
|
);
|
|
|
|
NmpReleaseLock();
|
|
|
|
status = MMEject(joinerNodeId);
|
|
|
|
NmpAcquireLock();
|
|
}
|
|
|
|
if (status != MM_OK) {
|
|
status = MMMapStatusToDosError(status);
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Update to abort join of node %1!u! failed, status %2!u!\n",
|
|
JoinerNode->NodeId,
|
|
status
|
|
);
|
|
|
|
//
|
|
// If the join is still pending, and this is the sponsor node,
|
|
// force a timeout to retry the abort. If we aren't the sponsor,
|
|
// there isn't much we can do.
|
|
//
|
|
if ( (joinSequence == NmpJoinSequence) &&
|
|
(NmpJoinerNodeId == JoinerNode->NodeId) &&
|
|
(NmpSponsorNodeId == NmLocalNodeId)
|
|
)
|
|
{
|
|
NmpJoinTimer = 1;
|
|
NmpJoinAbortPending = FALSE;
|
|
}
|
|
}
|
|
|
|
return;
|
|
|
|
} // NmpJoinAbort
|
|
|
|
|
|
VOID
|
|
NmpJoinAbortWorker(
|
|
IN PCLRTL_WORK_ITEM WorkItem,
|
|
IN DWORD Status,
|
|
IN DWORD BytesTransferred,
|
|
IN ULONG_PTR IoContext
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Worker thread for aborting a join.
|
|
|
|
--*/
|
|
{
|
|
DWORD joinSequence = PtrToUlong(WorkItem->Context);
|
|
|
|
|
|
NmpAcquireLock();
|
|
|
|
//
|
|
// The active thread count was bumped up when this item was scheduled.
|
|
// No need to call NmpEnterApi().
|
|
//
|
|
|
|
//
|
|
// If the join is still pending, begin the abort process.
|
|
//
|
|
if ( (joinSequence == NmpJoinSequence) &&
|
|
(NmpJoinerNodeId != ClusterInvalidNodeId) &&
|
|
NmpJoinAbortPending
|
|
)
|
|
{
|
|
PNM_NODE joinerNode = NmpIdArray[NmpJoinerNodeId];
|
|
|
|
if (joinerNode != NULL) {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Worker thread initiating abort of joining node %1!u!\n",
|
|
NmpJoinerNodeId
|
|
);
|
|
|
|
NmpJoinAbort(ERROR_TIMEOUT, joinerNode);
|
|
}
|
|
else {
|
|
CL_ASSERT(joinerNode != NULL);
|
|
}
|
|
}
|
|
else {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Skipping join abort, sequence to abort %1!u!, current join sequence %2!u!, "
|
|
"joiner node: %3!u! sponsor node: %4!u!\n",
|
|
joinSequence,
|
|
NmpJoinSequence,
|
|
NmpJoinerNodeId,
|
|
NmpSponsorNodeId
|
|
);
|
|
}
|
|
|
|
NmpLockedLeaveApi();
|
|
|
|
NmpReleaseLock();
|
|
|
|
LocalFree(WorkItem);
|
|
|
|
return;
|
|
|
|
} // NmpJoinAbortWorker
|
|
|
|
|
|
VOID
|
|
NmpJoinTimerTick(
|
|
IN DWORD MsTickInterval
|
|
)
|
|
/*++
|
|
|
|
Notes:
|
|
Called with NmpLock held.
|
|
|
|
--*/
|
|
{
|
|
if (NmpLockedEnterApi(NmStateOnline)) {
|
|
//
|
|
// If we are sponsoring a join, update the timer.
|
|
//
|
|
if ( (NmpJoinerNodeId != ClusterInvalidNodeId) &&
|
|
(NmpSponsorNodeId == NmLocalNodeId) &&
|
|
!NmpJoinAbortPending &&
|
|
(NmpJoinTimer != 0)
|
|
)
|
|
{
|
|
//ClRtlLogPrint(LOG_NOISE,
|
|
// "[NMJOIN] Timer tick (%1!u! ms)\n",
|
|
// Interval
|
|
// );
|
|
|
|
if (NmpJoinTimer > MsTickInterval) {
|
|
NmpJoinTimer -= MsTickInterval;
|
|
}
|
|
else {
|
|
//
|
|
// The join has timed out. Schedule a worker thread to
|
|
// carry out the abort process.
|
|
//
|
|
PCLRTL_WORK_ITEM workItem;
|
|
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Join of node %1!u! has timed out.\n",
|
|
NmpJoinerNodeId
|
|
);
|
|
|
|
workItem = LocalAlloc(LMEM_FIXED, sizeof(CLRTL_WORK_ITEM));
|
|
|
|
if (workItem != NULL) {
|
|
DWORD status;
|
|
|
|
ClRtlInitializeWorkItem(
|
|
workItem,
|
|
NmpJoinAbortWorker,
|
|
ULongToPtr(NmpJoinSequence)
|
|
);
|
|
|
|
status = ClRtlPostItemWorkQueue(
|
|
CsDelayedWorkQueue,
|
|
workItem,
|
|
0,
|
|
0
|
|
);
|
|
|
|
if (status == ERROR_SUCCESS) {
|
|
//
|
|
// Stop the timer, flag that an abort is in progress,
|
|
// and account for the thread we just scheduled.
|
|
//
|
|
NmpJoinTimer = 0;
|
|
NmpJoinAbortPending = TRUE;
|
|
NmpActiveThreadCount++;
|
|
}
|
|
else {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Failed to schedule abort of join, status %1!u!.\n",
|
|
status
|
|
);
|
|
LocalFree(workItem);
|
|
}
|
|
}
|
|
else {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] Failed to allocate memory for join abort.\n"
|
|
);
|
|
}
|
|
}
|
|
}
|
|
NmpLockedLeaveApi();
|
|
}
|
|
|
|
return;
|
|
|
|
} // NmpJoinTimerTick
|
|
|
|
|
|
VOID
|
|
NmTimerTick(
|
|
IN DWORD MsTickInterval
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Implements all of the NM timers. Called on every tick of
|
|
the common NM/MM timer - currently every 300ms.
|
|
|
|
Arguments:
|
|
|
|
MsTickInterval - The number of milliseconds that have passed
|
|
since the last tick.
|
|
|
|
ReturnValue:
|
|
|
|
None.
|
|
|
|
--*/
|
|
{
|
|
NmpAcquireLock();
|
|
|
|
NmpNetworkTimerTick(MsTickInterval);
|
|
|
|
NmpJoinTimerTick(MsTickInterval);
|
|
|
|
#if DBG
|
|
|
|
// Addition for checking for hung RPC threads.
|
|
NmpRpcTimerTick(MsTickInterval);
|
|
|
|
#endif // DBG
|
|
|
|
NmpReleaseLock();
|
|
|
|
return;
|
|
|
|
} // NmTimerTick
|
|
|
|
|
|
error_status_t
|
|
s_JoinAddNode3(
|
|
IN handle_t IDL_handle,
|
|
IN LPCWSTR lpszNodeName,
|
|
IN DWORD dwNodeHighestVersion,
|
|
IN DWORD dwNodeLowestVersion,
|
|
IN DWORD dwNodeProductSuite
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Adds a new node to the cluster.
|
|
|
|
Arguments:
|
|
|
|
IDL_handle - RPC binding handle, not used.
|
|
|
|
lpszNodeName - Supplies the name of the new node.
|
|
|
|
dwNodeHighestVersion - The highest cluster version number that the
|
|
new node can support.
|
|
|
|
dwNodeLowestVersion - The lowest cluster version number that the
|
|
new node can support.
|
|
|
|
dwNodeProductSuite - The product suite type identifier for the new node.
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS if successful
|
|
|
|
Win32 error code otherwise.
|
|
|
|
Notes:
|
|
|
|
This is a new routine in NT5. It performs the AddNode operation
|
|
correctly. It will never be invoked by an NT4 system. It cannot
|
|
be invoked if an NT4 node is in the cluster without violating
|
|
the license agreement.
|
|
|
|
The cluster registry APIs cannot be called while holding the NmpLock,
|
|
or a deadlock may occur.
|
|
|
|
--*/
|
|
{
|
|
DWORD status;
|
|
DWORD registryNodeLimit;
|
|
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Received request to add node '%1!ws!' to the cluster.\n",
|
|
lpszNodeName
|
|
);
|
|
|
|
//
|
|
// Read the necessary registry parameters before acquiring
|
|
// the NM lock.
|
|
//
|
|
status = DmQueryDword(
|
|
DmClusterParametersKey,
|
|
CLUSREG_NAME_MAX_NODES,
|
|
®istryNodeLimit,
|
|
NULL
|
|
);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
registryNodeLimit = 0;
|
|
}
|
|
|
|
NmpAcquireLock();
|
|
|
|
if (NmpLockedEnterApi(NmStateOnline)) {
|
|
DWORD retryCount = 0;
|
|
|
|
//if this is the last node and it has been evicted
|
|
//but the cleanup hasnt completed and hence the
|
|
//service is up, then it should not entertain
|
|
//any new join requests
|
|
if (NmpLastNodeEvicted)
|
|
{
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] This node was evicted and hence is not in a valid state to process a "
|
|
"request to add a node to the cluster.\n"
|
|
);
|
|
status = ERROR_NODE_NOT_AVAILABLE;
|
|
NmpLockedLeaveApi();
|
|
goto FnExit;
|
|
}
|
|
|
|
|
|
while (TRUE) {
|
|
if (NmpLeaderNodeId == NmLocalNodeId) {
|
|
//
|
|
// This node is the leader, call the internal
|
|
// handler directly.
|
|
//
|
|
status = NmpAddNode(
|
|
lpszNodeName,
|
|
dwNodeHighestVersion,
|
|
dwNodeLowestVersion,
|
|
dwNodeProductSuite,
|
|
registryNodeLimit
|
|
);
|
|
}
|
|
else {
|
|
//
|
|
// Forward the request to the leader.
|
|
//
|
|
RPC_BINDING_HANDLE binding = Session[NmpLeaderNodeId];
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Forwarding request to add node '%1!ws!' "
|
|
"to the cluster to the leader (node %!u!).\n",
|
|
lpszNodeName,
|
|
NmpLeaderNodeId
|
|
);
|
|
|
|
CL_ASSERT(binding != NULL);
|
|
|
|
NmpReleaseLock();
|
|
|
|
status = NmRpcAddNode(
|
|
binding,
|
|
lpszNodeName,
|
|
dwNodeHighestVersion,
|
|
dwNodeLowestVersion,
|
|
dwNodeProductSuite
|
|
);
|
|
|
|
NmpAcquireLock();
|
|
}
|
|
|
|
//
|
|
// Check for the error codes that indicate either that
|
|
// another AddNode operation is in progress or that the
|
|
// leadership is changing. We will retry in these cases.
|
|
//
|
|
if ( (status != ERROR_CLUSTER_JOIN_IN_PROGRESS) &&
|
|
(status != ERROR_NODE_NOT_AVAILABLE)
|
|
) {
|
|
break;
|
|
}
|
|
|
|
//
|
|
// Sleep for 3 seconds and try again. We will give up and
|
|
// return the error after retrying for 2 minutes.
|
|
//
|
|
if (++retryCount > 40) {
|
|
break;
|
|
}
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] AddNode operation for node '%1!ws! delayed "
|
|
"waiting for competing AddNode operation to complete.\n",
|
|
lpszNodeName
|
|
);
|
|
|
|
NmpReleaseLock();
|
|
|
|
Sleep(3000);
|
|
|
|
NmpAcquireLock();
|
|
|
|
} // end while(TRUE)
|
|
|
|
NmpLockedLeaveApi();
|
|
}
|
|
else {
|
|
ClRtlLogPrint(LOG_UNUSUAL,
|
|
"[NMJOIN] This system is not in a valid state to process a "
|
|
"request to add a node to the cluster.\n"
|
|
);
|
|
status = ERROR_NODE_NOT_AVAILABLE;
|
|
}
|
|
|
|
FnExit:
|
|
NmpReleaseLock();
|
|
|
|
return(status);
|
|
|
|
} // s_NmJoinAddNode3
|
|
|
|
|
|
// This is used by setup of all highest major versions post 1.0
|
|
error_status_t
|
|
s_JoinAddNode2(
|
|
IN handle_t IDL_handle,
|
|
IN LPCWSTR lpszNodeName,
|
|
IN DWORD dwNodeHighestVersion,
|
|
IN DWORD dwNodeLowestVersion
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Adds a new node to the cluster database.
|
|
|
|
Arguments:
|
|
|
|
IDL_handle - RPC binding handle, not used.
|
|
|
|
lpszNodeName - Supplies the name of the new node.
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS if successful
|
|
|
|
Win32 error code otherwise.
|
|
|
|
Notes:
|
|
|
|
This routine was defined in NT4-SP4. JoinAddNode3 is used by NT5. Since
|
|
it is impossible to install clustering using the NT4-SP4 software,
|
|
this routine should never be invoked.
|
|
|
|
--*/
|
|
|
|
{
|
|
CL_ASSERT(FALSE);
|
|
|
|
return(ERROR_CLUSTER_INCOMPATIBLE_VERSIONS);
|
|
}
|
|
|
|
error_status_t
|
|
s_JoinAddNode(
|
|
IN handle_t IDL_handle,
|
|
IN LPCWSTR lpszNodeName
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Adds a new node to the cluster database.
|
|
|
|
Arguments:
|
|
|
|
IDL_handle - RPC binding handle, not used.
|
|
|
|
lpszNodeName - Supplies the name of the new node.
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS if successful
|
|
|
|
Win32 error code otherwise.
|
|
|
|
Notes:
|
|
|
|
This is the routine that NT4-SP3 setup invokes to add a new node to
|
|
a cluster. The combination of NT4-SP3 and NT5 is not supported.
|
|
|
|
--*/
|
|
|
|
{
|
|
return(ERROR_CLUSTER_INCOMPATIBLE_VERSIONS);
|
|
}
|
|
|
|
//
|
|
// The rest of the code is currently unused.
|
|
//
|
|
error_status_t
|
|
s_NmRpcDeliverJoinMessage(
|
|
IN handle_t IDL_handle,
|
|
IN UCHAR * Message,
|
|
IN DWORD MessageLength
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Server side of the RPC interface for delivering membership
|
|
join messages.
|
|
|
|
Arguments:
|
|
|
|
IDL_handle - RPC binding handle, not used.
|
|
|
|
buffer - Supplies a pointer to the message data.
|
|
|
|
length - Supplies the length of the message data.
|
|
|
|
Return Value:
|
|
|
|
ERROR_SUCCESS
|
|
|
|
--*/
|
|
|
|
{
|
|
DWORD status = ERROR_SUCCESS;
|
|
|
|
#ifdef MM_IN_CLUSNET
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Delivering join message to Clusnet.\n"
|
|
);
|
|
|
|
status = ClusnetDeliverJoinMessage(
|
|
NmClusnetHandle,
|
|
Message,
|
|
MessageLength
|
|
);
|
|
|
|
#endif
|
|
return(status);
|
|
}
|
|
|
|
|
|
#ifdef MM_IN_CLUSNET
|
|
|
|
DWORD
|
|
NmpSendJoinMessage(
|
|
IN ULONG DestNodeMask,
|
|
IN PVOID Message,
|
|
IN ULONG MessageLength
|
|
)
|
|
{
|
|
DWORD status = ERROR_SUCCESS;
|
|
CL_NODE_ID node;
|
|
|
|
|
|
CL_ASSERT(NmMaxNodeId != ClusterInvalidNodeId);
|
|
|
|
for ( node = ClusterMinNodeId;
|
|
node <= NmMaxNodeId;
|
|
node++, (DestNodeMask >>= 1)
|
|
)
|
|
{
|
|
|
|
if (DestNodeMask & 0x1) {
|
|
if (node != NmLocalNodeId) {
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Sending join message to node %1!u!.\n",
|
|
node
|
|
);
|
|
|
|
status = NmRpcDeliverJoinMessage(
|
|
Session[node->NodeId],
|
|
Message,
|
|
MessageLength
|
|
);
|
|
|
|
if (status == RPC_S_CALL_FAILED_DNE) {
|
|
//
|
|
// Try again since the first call to a restarted
|
|
// RPC server will fail.
|
|
//
|
|
status = NmRpcDeliverJoinMessage(
|
|
Session[node->NodeId],
|
|
Message,
|
|
MessageLength
|
|
);
|
|
}
|
|
}
|
|
else {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Delivering join message to local node.\n"
|
|
);
|
|
|
|
status = ClusnetDeliverJoinMessage(
|
|
NmClusnetHandle,
|
|
Message,
|
|
MessageLength
|
|
);
|
|
}
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] send of join message to node %1!u! failed, status %2!u!\n",
|
|
node,
|
|
status
|
|
);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return(status);
|
|
|
|
} // NmpSendJoinMessage
|
|
|
|
|
|
DWORD
|
|
NmJoinNodeToCluster(
|
|
CL_NODE_ID joinerNodeId
|
|
)
|
|
{
|
|
DWORD status;
|
|
PVOID message = NULL;
|
|
ULONG messageLength;
|
|
ULONG destMask;
|
|
CLUSNET_JOIN_PHASE phase;
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] Joining node %1!u! to the cluster.\n",
|
|
joinerNodeId
|
|
);
|
|
|
|
for (phase = ClusnetJoinPhase1; phase <= ClusnetJoinPhase4; phase++) {
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] JoinNode phase %1!u!\n",
|
|
phase
|
|
);
|
|
|
|
status = ClusnetJoinCluster(
|
|
NmClusnetHandle,
|
|
joinerNodeId,
|
|
phase,
|
|
NM_MM_JOIN_TIMEOUT,
|
|
&message,
|
|
&messageLength,
|
|
&destMask
|
|
);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] JoinNode phase %1!u! failed, status %2!u!\n",
|
|
phase,
|
|
status
|
|
);
|
|
|
|
break;
|
|
}
|
|
|
|
status = NmpSendJoinMessage(
|
|
destMask,
|
|
message,
|
|
messageLength
|
|
);
|
|
|
|
if (status != ERROR_SUCCESS) {
|
|
DWORD abortStatus;
|
|
|
|
ClRtlLogPrint(LOG_NOISE,
|
|
"[NMJOIN] send join message failed %1!u!, aborting join of node %2!u!.\n",
|
|
status,
|
|
joinerNodeId
|
|
);
|
|
|
|
abortStatus = ClusnetJoinCluster(
|
|
NmClusnetHandle,
|
|
joinerNodeId,
|
|
ClusnetJoinPhaseAbort,
|
|
NM_MM_JOIN_TIMEOUT,
|
|
&message,
|
|
&messageLength,
|
|
&destMask
|
|
);
|
|
|
|
if (abortStatus == ERROR_SUCCESS) {
|
|
(VOID) NmpSendJoinMessage(
|
|
destMask,
|
|
message,
|
|
messageLength
|
|
);
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (message != NULL) {
|
|
ClusnetEndJoinCluster(NmClusnetHandle, message);
|
|
}
|
|
|
|
return(status);
|
|
|
|
} // NmJoinNodeToCluster
|
|
|
|
|
|
#endif // MM_IN_CLUSNET
|
|
|