This module handles the initialization path where a newly booted node joins an existing cluster.
John Vert (jvert) 6/6/1996
--*/ #include "initp.h"
#include "lmcons.h"
#include "lmremutl.h"
#include "lmapibuf.h"
#include <clusverp.h>
#define JOIN_CLIENT_NO_DELAY 0 // delay for high-prio networks
#define JOIN_CLIENT_NETWORK_DELAY 1000 // delay for low-prio networks
#define JOIN_CLIENT_RESOURCE_DELAY 2000 // delay for cluster IP/netname
// Local types
// Local data
CRITICAL_SECTION CsJoinLock; HANDLE CsJoinEvent = NULL; DWORD CsJoinThreadCount = 0; DWORD CsJoinStatus=ERROR_SUCCESS; RPC_BINDING_HANDLE CsJoinSponsorBinding = NULL; LPWSTR CsJoinSponsorName = NULL;
// While another node is joining, we will keep track of any DM or FM updates.
BOOL CsDmOrFmHasChanged = FALSE;
// Local function prototypes
VOID JoinpEnumNodesAndJoinByAddress( IN HDMKEY Key, IN PWSTR NodeId, IN PVOID Context );
VOID JoinpEnumNodesAndJoinByHostName( IN HDMKEY Key, IN PWSTR NodeId, IN PVOID Context );
VOID JoinpConnectToSponsor( IN PWSTR SponsorName, IN DWORD Delay );
DWORD WINAPI JoinpConnectThread( LPVOID Parameter );
DWORD JoinpAttemptJoin( LPWSTR SponsorName, RPC_BINDING_HANDLE JoinMasterBinding );
BOOL JoinpAddNodeCallback( IN PVOID Context1, IN PVOID Context2, IN PVOID Object, IN LPCWSTR Name );
BOOL JoinpEnumNetworksToSetPriority( IN PVOID Context1, IN PVOID Context2, IN PVOID Object, IN LPCWSTR Name );
DWORD ClusterJoin( VOID ) /*++
Routine Description:
Called to attempt to join a cluster that already exists.
Return Value:
ERROR_SUCCESS if successful
Win32 error code otherwise.
{ DWORD Status; LPWSTR ClusterIpAddress = NULL; LPWSTR ClusIpAddrResource = NULL; LPWSTR ClusterNameId = NULL; DWORD idMaxSize = 0; DWORD idSize = 0; HDMKEY hClusNameResKey = NULL; HDMKEY hClusIPAddrResKey = NULL;
// Try connecting using the cluster IP address first. get the cluster
// name resource, looking up its dependency for the cluster IP addr
Status = DmQuerySz(DmClusterParametersKey, CLUSREG_NAME_CLUS_CLUSTER_NAME_RES, &ClusterNameId, &idMaxSize, &idSize);
if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to get cluster name resource, error %1!u!.\n", Status); goto error_exit; }
// open name resource key and read its DependsOn key
hClusNameResKey = DmOpenKey( DmResourcesKey, ClusterNameId, KEY_READ );
if ( hClusNameResKey == NULL ) {
Status = GetLastError(); ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to open Cluster Name resource key, error %1!u!.\n", Status); goto error_exit; }
// allocate enough space for the GUID and the Parameters string
idMaxSize = ( CS_NETWORK_ID_LENGTH + sizeof( CLUSREG_KEYNAME_PARAMETERS ) + 2) * sizeof(WCHAR); ClusIpAddrResource = LocalAlloc( LMEM_FIXED, idMaxSize );
if ( ClusIpAddrResource == NULL ) {
ClRtlLogPrint(LOG_CRITICAL, "[JOIN] no memory for Cluster Ip address resource ID!\n"); goto error_exit; }
Status = DmQueryMultiSz(hClusNameResKey, CLUSREG_NAME_RES_DEPENDS_ON, &ClusIpAddrResource, &idMaxSize, &idSize);
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to get Cluster Ip address resource ID, error %1!u!.\n", Status); goto error_exit; }
lstrcatW( ClusIpAddrResource, L"\\" ); lstrcatW( ClusIpAddrResource, CLUSREG_KEYNAME_PARAMETERS ); hClusIPAddrResKey = DmOpenKey( DmResourcesKey, ClusIpAddrResource, KEY_READ );
if ( hClusIPAddrResKey == NULL ) {
Status = GetLastError(); ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to open Cluster IP Address resource key, error %1!u!.\n", Status); goto error_exit; }
// get the IP Address; note that these value names are not defined
// in a global way. if they are changed, this code will break
idMaxSize = idSize = 0; Status = DmQuerySz(hClusIPAddrResKey, L"Address", &ClusterIpAddress, &idMaxSize, &idSize);
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to get Cluster Ip address, error %1!u!.\n", Status); goto error_exit; }
// Spawn threads to find a sponsor. We will try the make connections using
// the cluster IP address, the IP address of each node on each network, and
// the name of each node in the cluster. The connects will proceed in
// parallel. We'll use the first one that succeeds.
CsJoinEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
if (CsJoinEvent == NULL) { Status = GetLastError(); ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to create join event, error %1!u!.\n", Status ); goto error_exit; }
CsJoinThreadCount = 1; InitializeCriticalSection(&CsJoinLock); EnterCriticalSection(&CsJoinLock);
DmEnumKeys(DmNetInterfacesKey, JoinpEnumNodesAndJoinByAddress, NULL);
DmEnumKeys(DmNodesKey, JoinpEnumNodesAndJoinByHostName, NULL);
// give the other threads a chance to start since using the cluster IP
// address to join with is problematic when the resource moves in the
// middle of a join
JoinpConnectToSponsor(ClusterIpAddress, JOIN_CLIENT_RESOURCE_DELAY);
//update status for scm
CsServiceStatus.dwCheckPoint++; CsAnnounceServiceStatus();
if(CsJoinThreadCount == 1) SetEvent(CsJoinEvent);
Status = WaitForSingleObject(CsJoinEvent, INFINITE); CL_ASSERT(Status == WAIT_OBJECT_0);
EnterCriticalSection(&CsJoinLock); ClRtlLogPrint(LOG_NOISE, "[JOIN] Got out of the join wait, CsJoinThreadCount = %1!u!.\n", CsJoinThreadCount );
if(--CsJoinThreadCount == 0) { CloseHandle(CsJoinEvent); DeleteCriticalSection(&CsJoinLock); } else LeaveCriticalSection(&CsJoinLock);
// All of the threads have failed or one of them made a connection,
// use it to join.
if (CsJoinSponsorBinding != NULL) { CL_ASSERT(CsJoinSponsorName != NULL);
ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Attempting join with sponsor %1!ws!.\n", CsJoinSponsorName );
// Chittur Subbaraman (chitturs) - 10/27/98
// If the database restore operation is requested, then
// refuse to join the cluster and return an error code.
if ( CsDatabaseRestore == TRUE ) { Status = ERROR_CLUSTER_NODE_UP; LocalFree(CsJoinSponsorName); goto error_exit; }
Status = JoinpAttemptJoin(CsJoinSponsorName, CsJoinSponsorBinding);
RpcBindingFree(&CsJoinSponsorBinding); LocalFree(CsJoinSponsorName); } else { //we couldnt create a binding to the sponsorer
if(CsJoinStatus == ERROR_SUCCESS) { //we did the version check in joinpconnectthread but for some reason
//couldnt produce a binding
Status = ERROR_BAD_NETPATH; ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Unable to connect to any sponsor node.\n"); } else { Status = CsJoinStatus; }
// rajdas: If the join did not suceed due to version mismatch we shouldn't try to form a cluster.
// Bug ID: 152229
error_exit: if ( ClusterNameId ) { LocalFree( ClusterNameId ); }
if ( ClusterIpAddress ) { LocalFree( ClusterIpAddress ); }
if ( ClusIpAddrResource ) { LocalFree( ClusIpAddrResource ); }
if ( hClusNameResKey ) { DmCloseKey( hClusNameResKey ); }
if ( hClusIPAddrResKey ) { DmCloseKey( hClusIPAddrResKey ); }
return(Status); }
VOID JoinpEnumNodesAndJoinByAddress( IN HDMKEY Key, IN PWSTR NetInterfaceId, IN PVOID Context )
Routine Description:
Attempts to establish an RPC connection to a specified node using its IP address
Key - pointer to the node key handle
NetInterfaceId - pointer to string representing net IF ID (guid)
Context - pointer to a location to return the final status
Return Value:
{ DWORD status; LPWSTR NetIFNodeID = NULL; LPWSTR NetIFIpAddress = NULL; LPWSTR NetIFNetwork = NULL; HDMKEY NetIFNetworkKey = NULL; DWORD NetIFNetworkPriority; DWORD idMaxSize = 0; DWORD idSize = 0;
// get the NodeId Value from the NetIF key and if it's us,
// skip this netIF
status = DmQuerySz(Key, CLUSREG_NAME_NETIFACE_NODE, &NetIFNodeID, &idMaxSize, &idSize);
if ( status == ERROR_SUCCESS ) {
if (lstrcmpiW(NetIFNodeID, NmLocalNodeIdString) != 0) {
// it's not us so get the address and try it...
idMaxSize = idSize = 0; status = DmQuerySz(Key, CLUSREG_NAME_NETIFACE_ADDRESS, &NetIFIpAddress, &idMaxSize, &idSize);
if ( status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to get NetInterface Address, error %1!u!.\n", status); goto error_exit; }
// Determine the delay based on the network priority. If we
// cannot find it in the cluster database, we still try to
// connect to the sponsor assuming the lowest priority.
NetIFNetworkPriority = 0xFFFFFFFF; idMaxSize = idSize = 0; status = DmQuerySz(Key, CLUSREG_NAME_NETIFACE_NETWORK, &NetIFNetwork, &idMaxSize, &idSize);
if ( status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Failed to get NetInterface Network, error %1!u!.\n", status); goto ConnectToSponsor; }
NetIFNetworkKey = DmOpenKey(DmNetworksKey, NetIFNetwork, KEY_READ);
if ( NetIFNetworkKey == NULL ) {
ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Failed to open key for network %1!ws!, error %2!u!.\n", NetIFNetwork, status ); goto ConnectToSponsor; }
status = DmQueryDword(NetIFNetworkKey, CLUSREG_NAME_NET_PRIORITY, &NetIFNetworkPriority, 0);
if ( status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Failed to get NetInterface network priority, error %1!u!.\n", status); }
ConnectToSponsor: //
// attempt the join with this address
JoinpConnectToSponsor(NetIFIpAddress, JOIN_CLIENT_GET_NETWORK_DELAY(NetIFNetworkPriority)); } } else { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to get NetInterface Node ID, error %1!u!.\n", status); }
error_exit: DmCloseKey(Key);
if ( NetIFNodeID ) { LocalFree( NetIFNodeID ); }
if ( NetIFIpAddress ) { LocalFree( NetIFIpAddress ); }
return; }
VOID JoinpEnumNodesAndJoinByHostName( IN HDMKEY Key, IN PWSTR NodeId, IN PVOID Context )
Routine Description:
Attempts to establish an RPC connection to a specified node using its host name
Key - pointer to the node key handle
NodeId - pointer to string representing node ID (number)
Context - pointer to a location to return the final status
Return Value:
{ DWORD status; LPWSTR nodeName=NULL; DWORD nodeNameLen=0; DWORD nodeNameSize=0;
// Try to connect if this is not us
if (lstrcmpiW(NodeId, NmLocalNodeIdString) != 0) {
status = DmQuerySz(Key, CLUSREG_NAME_NODE_NAME, &nodeName, &nodeNameLen, &nodeNameSize);
if (status == ERROR_SUCCESS) {
JoinpConnectToSponsor(nodeName, JOIN_CLIENT_NETWORK_DELAY); LocalFree(nodeName); } }
return; }
VOID JoinpConnectToSponsor( IN PWSTR SponsorName, IN DWORD Delay ) /*++
Routine Description:
Attempts to establish an RPC connection to a specified node.
SponsorName - The name (or IP address) of the target sponsor.
Delay - Milliseconds to wait before sending request
Return Value:
ERROR_SUCCESS if an RPC connection is successfully made to the sponsor. An RPC error code otherwise.
{ HANDLE threadHandle; DWORD status = ERROR_SUCCESS; DWORD threadId; PJOIN_SPONSOR_CONTEXT context; BOOL setEvent = FALSE;
ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Spawning thread to connect to sponsor %1!ws!\n", SponsorName );
// Allocate the context and sponsor name buffer separately. If this
// thread "wins" sponsorship, the name buffer will be reused.
if (context != NULL) {
context->Name = LocalAlloc( LMEM_FIXED | LMEM_ZEROINIT, (lstrlenW(SponsorName) + 1 ) * sizeof(WCHAR) );
if (context->Name != NULL) { lstrcpyW(context->Name, SponsorName); context->Delay = Delay;
threadHandle = CreateThread( NULL, 0, JoinpConnectThread, context, 0, &threadId );
if (threadHandle != NULL) { CloseHandle(threadHandle); } else { status = GetLastError(); ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Failed to spawn connect thread, error %1!u!.\n", status );
--CsJoinThreadCount; LocalFree(context->Name); LocalFree(context); } } else { LocalFree(context); ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Failed to allocate memory for sponsor name.\n" ); } } else { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Failed to allocate memory.\n" ); }
} // JoinpConnectToSponsor
DWORD WINAPI VerifyJoinVersionData( LPWSTR sponsorName )
Routine Description:
Verify that the sponsor and the joiner are compatible
sponsorName - pointer to text string of sponsor to use
Return Value:
ERROR_SUCCESS - if ok to continue join
{ DWORD status; LPWSTR bindingString = NULL; RPC_BINDING_HANDLE bindingHandle = NULL; DWORD SponsorNodeId; DWORD ClusterHighestVersion; DWORD ClusterLowestVersion; DWORD JoinStatus = ERROR_SUCCESS; DWORD packageIndex;
// Attempt to connect to the sponsor's JoinVersion RPC interface.
status = RpcStringBindingComposeW( L"6e17aaa0-1a47-11d1-98bd-0000f875292e", L"ncadg_ip_udp", sponsorName, NULL, NULL, &bindingString);
if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to compose JoinVersion string binding for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); goto error_exit; }
status = RpcBindingFromStringBindingW(bindingString, &bindingHandle);
if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to build JoinVersion binding for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); goto error_exit; }
// under load, the sponsor might take a while to respond back to the
// joiner. The default timeout is at 30 secs and this seems to work
// ok. Note that this means the sponsor has 30 secs to reply to either
// the RPC request or ping. As long it makes any reply, then the joiner's
// RPC will continue to wait and not time out the sponsor.
status = RpcMgmtSetComTimeout( bindingHandle, CLUSTER_JOINVERSION_RPC_COM_TIMEOUT ); if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to set JoinVersion com timeout for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); }
status = RpcEpResolveBinding(bindingHandle, JoinVersion_v2_0_c_ifspec);
if (status != RPC_S_OK) { if ( (status == RPC_S_SERVER_UNAVAILABLE) || (status == RPC_S_NOT_LISTENING) || (status == EPT_S_NOT_REGISTERED) ) { ClRtlLogPrint(LOG_NOISE, "[JOIN] Sponsor %1!ws! is not available (JoinVersion), status=%2!u!.\n", sponsorName, status ); } else { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to resolve JoinVersion endpoint for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); } goto error_exit; }
// run through the list of RPC security packages, trying to establish a
// security context with this binding.
for (packageIndex = 0; packageIndex < CsNumberOfRPCSecurityPackages; ++packageIndex ) { status = RpcBindingSetAuthInfoW(bindingHandle, CsServiceDomainAccount, RPC_C_AUTHN_LEVEL_CONNECT, CsRPCSecurityPackage[ packageIndex ], NULL, RPC_C_AUTHZ_NAME);
if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to set JoinVersion AuthInfo using %1!ws! package, status %2!u!.\n", CsRPCSecurityPackageName[packageIndex], status); continue; }
status = CsRpcGetJoinVersionData(bindingHandle, NmLocalNodeId, CsMyHighestVersion, CsMyLowestVersion, &SponsorNodeId, &ClusterHighestVersion, &ClusterLowestVersion, &JoinStatus);
if ( status == RPC_S_OK ) { break; } else { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to get join version data from sponsor %1!ws! using " "%2!ws! package, status %3!u!.\n", sponsorName, CsRPCSecurityPackageName[packageIndex], status); } }
// jump out now if nothing work (as in the case of a form)
if ( status != ERROR_SUCCESS ) { goto error_exit; }
// use the join lock to set the RPC package index
EnterCriticalSection( &CsJoinLock );
if ( CsRPCSecurityPackageIndex < 0 ) { CsRPCSecurityPackageIndex = packageIndex; }
LeaveCriticalSection( &CsJoinLock );
// check the sponsor was in agreement with the join
if ( JoinStatus != ERROR_SUCCESS ) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Sponsor %1!ws! has discontinued join, status %2!u!.\n", sponsorName, JoinStatus); if (JoinStatus == ERROR_CLUSTER_INCOMPATIBLE_VERSIONS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Join version data from sponsor %1!ws! doesn't match: JH: 0x%2!08X! JL: 0x%3!08X! SH: 0x%4!08X! SL: 0x%5!08X!.\n", sponsorName, CsMyHighestVersion, CsMyLowestVersion, ClusterHighestVersion, ClusterLowestVersion); //
// rajdas: In this case I have managed to contact a sponsor, but there is a version mismatch. If all the join
// threads meet the same fate, clussvc should not try to form a cluster.
// BUG ID: 152229
CsJoinStatus = ERROR_CLUSTER_INCOMPATIBLE_VERSIONS; } goto error_exit; }
// SS: we will leave this check because win2K clusters didnt do the
// server side check, so the client must continue to do it
// now check that it is ok to join. We want this node to run
// at the highest level of compatibility possible. One of the
// following conditions must be true:
// 1) the High versions match exactly (major and build number)
// 2) our Highest matches the sponsor's Lowest exactly, downgrading
// the sponsor to our level of compatibility
// 3) our Lowest matches the sponsor's Highest, downgrading ourselves
// to the sponsor's level of compatibility
// note that the minor (build) version must match as well. The previous
// version numbers are "well known" and shouldn't change when a newer
// version is available/implemented.
if ( CsMyHighestVersion == ClusterHighestVersion || CsMyHighestVersion == ClusterLowestVersion || CsMyLowestVersion == ClusterHighestVersion #if 1 // CLUSTER_BETA
|| CsNoVersionCheck #endif
) { status = ERROR_SUCCESS;
} else {
ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Join version data from sponsor %1!ws! doesn't match: JH: 0x%2!08X! JL: 0x%3!08X! SH: 0x%4!08X! SL: 0x%5!08X!.\n", sponsorName, CsMyHighestVersion, CsMyLowestVersion, ClusterHighestVersion, ClusterLowestVersion);
// rajdas: In this case I have managed to contact a sponsor, but there is a version mismatch. If all the join
// threads meet the same fate, clussvc should not try to form a cluster.
// BUG ID: 152229
error_exit: if (bindingHandle != NULL) { RpcBindingFree(&bindingHandle); }
return status; }
DWORD WINAPI JoinpConnectThread( LPVOID Parameter ) { PJOIN_SPONSOR_CONTEXT context = (PJOIN_SPONSOR_CONTEXT) Parameter; LPWSTR sponsorName = context->Name; DWORD status; LPWSTR bindingString = NULL; RPC_BINDING_HANDLE bindingHandle = NULL; BOOL setEvent = FALSE;
// Sleep for the specified delay.
if (context->Delay > 0) { Sleep(context->Delay); }
// No need to send a sponsorship request if a sponsor has
// already been chosen.
if (CsJoinSponsorBinding != NULL) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] No need to ask %1!ws! to sponsor us after delay of %2!u! milliseconds.\n", sponsorName, context->Delay ); status = RPC_S_CALL_FAILED_DNE; goto error_exit; }
// Try to connect to the specified node.
ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Asking %1!ws! to sponsor us after delay of %2!u! milliseconds.\n", sponsorName, context->Delay );
// connect to the JoinVersion interface first to see if we should progress
// any further. since this is the first RPC call to the other node, we can
// determine which security package should be used for the other interfaces.
status = VerifyJoinVersionData( sponsorName );
if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] JoinVersion data for sponsor %1!ws! is invalid, status %2!u!.\n", sponsorName, status ); goto error_exit; }
// Attempt to connect to the sponsor's extrocluster (join) RPC interface.
status = RpcStringBindingComposeW( L"ffe561b8-bf15-11cf-8c5e-08002bb49649", L"ncadg_ip_udp", sponsorName, NULL, NULL, &bindingString);
if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to compose ExtroCluster string binding for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); goto error_exit; }
status = RpcBindingFromStringBindingW(bindingString, &bindingHandle);
if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to build ExtroCluster binding for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); goto error_exit; }
// under load, the sponsor might take a while to respond back to the
// joiner. The default timeout is at 30 secs and this seems to work
// ok. Note that this means the sponsor has 30 secs to reply to either
// the RPC request or ping. As long it makes any reply, then the joiner's
// RPC will continue to wait and not time out the sponsor.
status = RpcMgmtSetComTimeout( bindingHandle, CLUSTER_EXTROCLUSTER_RPC_COM_TIMEOUT );
if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to set ExtroCluster com timeout for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); }
status = RpcEpResolveBinding(bindingHandle, ExtroCluster_v2_0_c_ifspec);
if (status != RPC_S_OK) { if ( (status == RPC_S_SERVER_UNAVAILABLE) || (status == RPC_S_NOT_LISTENING) || (status == EPT_S_NOT_REGISTERED) ) { ClRtlLogPrint(LOG_NOISE, "[JOIN] Sponsor %1!ws! is not available (ExtroCluster), status=%2!u!.\n", sponsorName, status ); } else { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to resolve ExtroCluster endpoint for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); } goto error_exit; }
// establish a security context with this binding.
status = RpcBindingSetAuthInfoW(bindingHandle, CsServiceDomainAccount, RPC_C_AUTHN_LEVEL_CONNECT, CsRPCSecurityPackage[ CsRPCSecurityPackageIndex ], NULL, RPC_C_AUTHZ_NAME);
if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to set ExtroCluster AuthInfo using %1!ws! package, status %2!u!.\n", CsRPCSecurityPackageName[ CsRPCSecurityPackageIndex ], status);
goto error_exit; }
if (status == RPC_S_OK) { if (CsJoinSponsorBinding == NULL) { //
// This is the first successful connection.
ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Selecting %1!ws! as join sponsor.\n", sponsorName );
CsJoinSponsorBinding = bindingHandle; bindingHandle = NULL; CsJoinSponsorName = sponsorName; sponsorName = NULL; SetEvent(CsJoinEvent); } else { ClRtlLogPrint(LOG_NOISE, "[JOIN] Closing connection to sponsor %1!ws!.\n", sponsorName ); } }
if (--CsJoinThreadCount == 0) { CloseHandle(CsJoinEvent); DeleteCriticalSection(&CsJoinLock); } else if (CsJoinThreadCount == 1) { SetEvent(CsJoinEvent); LeaveCriticalSection(&CsJoinLock); } else LeaveCriticalSection(&CsJoinLock);
if (bindingHandle != NULL) { RpcBindingFree(&bindingHandle); }
if (sponsorName != NULL) { LocalFree(sponsorName); }
} // JoinpConnectThread
DWORD JoinpAttemptJoin( LPWSTR SponsorName, RPC_BINDING_HANDLE JoinMasterBinding ) /*++
Routine Description:
Called to attempt to join a cluster that already exists.
SponsorName - The name (or IP address) of the target sponsor.
JoinMasterBinding - RPC binding to use to perform join.
Return Value:
ERROR_SUCCESS if successful
Win32 error code otherwise.
{ DWORD Status; NET_API_STATUS netStatus; LPTIME_OF_DAY_INFO tod = NULL; SYSTEMTIME systemTime; PNM_NETWORK network; DWORD startseq, endseq;
TESTPT(TpFailNmJoinCluster) { Status = 999999; goto error_exit; } #endif
Status = NmJoinCluster(JoinMasterBinding);
if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] NmJoinCluster failed, status %1!u!.\n", Status ); goto error_exit; }
// Synchronize the registry database
TESTPT(TpFailDmJoin) { Status = 999999; goto error_exit; } #endif
Status = DmJoin(JoinMasterBinding, &startseq);
if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] DmJoin failed, error %1!d!\n", Status); goto error_exit; }
// Initialize the event handler, needs to register with gum for cluster wide
Status = EpInitPhase1(); if ( Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] EpInitPhase1 failed, Status = %1!u!\n", Status); return(Status); }
TESTPT(TpFailApiInitPhase1) { Status = 999999; goto error_exit; } #endif
// Bring the API online in read-only mode. There is no join phase for
// the API. The API is required by FmOnline, which starts the
// resource monitor.
Status = ApiOnlineReadOnly(); if ( Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] ApiOnlineReadOnly failed, error = %1!u!\n", Status); goto error_exit; }
TESTPT(TpFailFmJoinPhase1) { Status = 999999; goto error_exit; } #endif
//update status for scm
CsServiceStatus.dwCheckPoint++; CsAnnounceServiceStatus();
// Resynchronize the FM. We cannot enable the Groups until after the
// the API is fully operational. See below.
Status = FmJoinPhase1(&endseq); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] FmJoinPhase1 failed, error %1!d!\n", Status); goto error_exit; }
TESTPT(TpFailDmUpdateJoinCluster) { Status = 999999; goto error_exit; } #endif
// Call the DM to hook the notifications for quorum resource and
//event handler
Status = DmUpdateJoinCluster(); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] DmUpdateJoin failed, error = %1!u!\n", Status); goto error_exit; }
TESTPT(TpFailNmJoinComplete) { Status = 999999; goto error_exit; } #endif
// We are now fully online, call NM to globally change our state.
Status = NmJoinComplete(&endseq); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] NmJoinComplete failed, error %1!d!\n", Status); goto error_exit; }
//perform the fixup for the AdminExt value on both Nt4 and Nt5 nodes.
Status=FmFixupAdminExt(); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] FmFixupAdminExt failed, error %1!d!\n", Status); goto error_exit; }
//perform the fixups after the registry is downloaded
//walk the list of fixups
Status = NmPerformFixups(NM_JOIN_FIXUP); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] NmPerformFixups failed, error %1!d!\n", Status); goto error_exit; }
TESTPT(TpFailApiInitPhase2) { Status = 999999; goto error_exit; } #endif
// Finally enable the full API.
Status = ApiOnline(); if ( Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] ApiOnline failed, error = %1!u!\n", Status); goto error_exit; }
TESTPT(TpFailFmJoinPhase2) { Status = 999999; goto error_exit; } #endif
//update status for scm
CsServiceStatus.dwCheckPoint++; CsAnnounceServiceStatus();
// Call back the Failover Manager to enable and move groups.
// The full registry is now available, so all groups/resources/resource
// types can be created (since they use the registry calls).
Status = FmJoinPhase2(); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] FmJoinPhase2 failed, status %1!d!.\n", Status); goto error_exit; }
TESTPT(TpFailEvInitialize) { Status = 999999; goto error_exit; } #endif
// Finish initializing the cluster wide event logging
// ASSUMPTION: this is called after the NM has established cluster
// membership.
if (!CsNoRepEvtLogging) { Status = EvOnline(); //if this fails, we still start the cluster service
if ( Status != ERROR_SUCCESS ) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Error calling EvOnline, Status = %1!u!\n", Status); } }
ClRtlLogPrint(LOG_NOISE, "[INIT] Cleaning up failed join attempt.\n");
BOOL JoinpAddNodeCallback( IN PVOID Context1, IN PVOID Context2, IN PVOID Object, IN LPCWSTR Name ) /*++
Routine Description:
Callback enumeration routine for adding a new node. This callback figures out what node IDs are available.
Context1 - Supplies a pointer to an array of BOOLs. The node ID for the enumerated node is set to FALSE.
Context2 - Not used.
Object - A pointer to the node object.
Name - The node name.
Return Value:
{ PBOOL Avail; DWORD Id;
Id = NmGetNodeId(Object); CL_ASSERT(NmIsValidNodeId(Id));
Avail = (PBOOL)Context1;
Avail[Id] = FALSE;
return(TRUE); }