|
|
/*++
Copyright (c) 1996 Microsoft Corporation
Module Name:
join.c
Abstract:
This module handles the initialization path where a newly booted node joins an existing cluster.
Author:
John Vert (jvert) 6/6/1996
Revision History:
--*/ #include "initp.h"
#include "lmcons.h"
#include "lmremutl.h"
#include "lmapibuf.h"
#include <clusverp.h>
//
// Local types
//
typedef struct { LPWSTR Name; LPWSTR NetworkId; } JOIN_SPONSOR_CONTEXT, *PJOIN_SPONSOR_CONTEXT;
//
// Local data
//
CRITICAL_SECTION CsJoinLock; HANDLE CsJoinEvent = NULL; DWORD CsJoinThreadCount = 0; DWORD CsJoinStatus=ERROR_SUCCESS; RPC_BINDING_HANDLE CsJoinSponsorBinding = NULL; LPWSTR CsJoinSponsorName = NULL;
//
// Local function prototypes
//
VOID JoinpEnumNodesAndJoinByAddress( IN HDMKEY Key, IN PWSTR NodeId, IN PVOID Context );
VOID JoinpEnumNodesAndJoinByHostName( IN HDMKEY Key, IN PWSTR NodeId, IN PVOID Context );
VOID JoinpConnectToSponsor( IN PWSTR SponsorName );
DWORD WINAPI JoinpConnectThread( LPVOID Parameter );
DWORD JoinpAttemptJoin( LPWSTR SponsorName, RPC_BINDING_HANDLE JoinMasterBinding );
BOOL JoinpAddNodeCallback( IN PVOID Context1, IN PVOID Context2, IN PVOID Object, IN LPCWSTR Name );
BOOL JoinpEnumNetworksToSetPriority( IN PVOID Context1, IN PVOID Context2, IN PVOID Object, IN LPCWSTR Name );
DWORD ClusterJoin( VOID ) /*++
Routine Description:
Called to attempt to join a cluster that already exists.
Arguments:
None
Return Value:
ERROR_SUCCESS if successful
Win32 error code otherwise.
--*/
{ DWORD Status; LPWSTR ClusterIpAddress = NULL; LPWSTR ClusIpAddrResource = NULL; LPWSTR ClusterNameId = NULL; DWORD idMaxSize = 0; DWORD idSize = 0; HDMKEY hClusNameResKey = NULL; HDMKEY hClusIPAddrResKey = NULL;
//
// Try connecting using the cluster IP address first. get the cluster
// name resource, looking up its dependency for the cluster IP addr
//
Status = DmQuerySz(DmClusterParametersKey, CLUSREG_NAME_CLUS_CLUSTER_NAME_RES, &ClusterNameId, &idMaxSize, &idSize);
if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to get cluster name resource, error %1!u!.\n", Status); goto error_exit; }
//
// open name resource key and read its DependsOn key
//
hClusNameResKey = DmOpenKey( DmResourcesKey, ClusterNameId, KEY_READ );
if ( hClusNameResKey == NULL ) {
Status = GetLastError(); ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to open Cluster Name resource key, error %1!u!.\n", Status); goto error_exit; }
//
// allocate enough space for the GUID and the Parameters string
//
idMaxSize = ( CS_NETWORK_ID_LENGTH + sizeof( CLUSREG_KEYNAME_PARAMETERS ) + 2) * sizeof(WCHAR); ClusIpAddrResource = LocalAlloc( LMEM_FIXED, idMaxSize );
if ( ClusIpAddrResource == NULL ) {
Status = ERROR_NOT_ENOUGH_MEMORY;
ClRtlLogPrint(LOG_CRITICAL, "[JOIN] no memory for Cluster Ip address resource ID!\n"); goto error_exit; }
Status = DmQueryMultiSz(hClusNameResKey, CLUSREG_NAME_RES_DEPENDS_ON, &ClusIpAddrResource, &idMaxSize, &idSize);
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to get Cluster Ip address resource ID, error %1!u!.\n", Status); goto error_exit; }
lstrcatW( ClusIpAddrResource, L"\\" ); lstrcatW( ClusIpAddrResource, CLUSREG_KEYNAME_PARAMETERS ); hClusIPAddrResKey = DmOpenKey( DmResourcesKey, ClusIpAddrResource, KEY_READ );
if ( hClusIPAddrResKey == NULL ) {
Status = GetLastError(); ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to open Cluster IP Address resource key, error %1!u!.\n", Status); goto error_exit; }
//
// get the IP Address; note that these value names are not defined
// in a global way. if they are changed, this code will break
//
idMaxSize = idSize = 0; Status = DmQuerySz(hClusIPAddrResKey, L"Address", &ClusterIpAddress, &idMaxSize, &idSize);
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to get Cluster Ip address, error %1!u!.\n", Status); goto error_exit; }
//
// Spawn threads to find a sponsor. We will try the make connections using
// the cluster IP address, the IP address of each node on each network, and
// the name of each node in the cluster. The connects will proceed in
// parallel. We'll use the first one that succeeds.
//
CsJoinEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
if (CsJoinEvent == NULL) { Status = GetLastError(); ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to create join event, error %1!u!.\n", Status ); goto error_exit; }
CsJoinThreadCount = 1; InitializeCriticalSection(&CsJoinLock); EnterCriticalSection(&CsJoinLock);
DmEnumKeys(DmNetInterfacesKey, JoinpEnumNodesAndJoinByAddress, NULL);
DmEnumKeys(DmNodesKey, JoinpEnumNodesAndJoinByHostName, NULL);
//
// give the other threads a chance to start since using the cluster IP
// address to join with is problematic when the resource moves in the
// middle of a join
//
Sleep( 1000 ); JoinpConnectToSponsor(ClusterIpAddress);
//update status for scm
CsServiceStatus.dwCheckPoint++; CsAnnounceServiceStatus();
if(CsJoinThreadCount == 1) SetEvent(CsJoinEvent);
LeaveCriticalSection(&CsJoinLock);
Status = WaitForSingleObject(CsJoinEvent, INFINITE); CL_ASSERT(Status == WAIT_OBJECT_0);
EnterCriticalSection(&CsJoinLock); ClRtlLogPrint(LOG_NOISE, "[JOIN] Got out of the join wait, CsJoinThreadCount = %1!u!.\n", CsJoinThreadCount );
if(--CsJoinThreadCount == 0) { CloseHandle(CsJoinEvent); DeleteCriticalSection(&CsJoinLock); } else LeaveCriticalSection(&CsJoinLock);
//
// All of the threads have failed or one of them made a connection,
// use it to join.
//
if (CsJoinSponsorBinding != NULL) { CL_ASSERT(CsJoinSponsorName != NULL);
ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Attempting join with sponsor %1!ws!.\n", CsJoinSponsorName );
//
// Chittur Subbaraman (chitturs) - 10/27/98
//
// If the database restore operation is requested, then
// refuse to join the cluster and return an error code.
//
if ( CsDatabaseRestore == TRUE ) { Status = ERROR_CLUSTER_NODE_UP; LocalFree(CsJoinSponsorName); goto error_exit; }
Status = JoinpAttemptJoin(CsJoinSponsorName, CsJoinSponsorBinding);
RpcBindingFree(&CsJoinSponsorBinding); LocalFree(CsJoinSponsorName); } else { Status = ERROR_BAD_NETPATH; ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Unable to connect to any sponsor node.\n" );
//
// rajdas: If the join did not suceed due to version mismatch we shouldn't try to form a cluster.
// Bug ID: 152229
//
if(CsJoinStatus == ERROR_CLUSTER_INCOMPATIBLE_VERSIONS) bFormCluster = FALSE; }
error_exit: if ( ClusterNameId ) { LocalFree( ClusterNameId ); }
if ( ClusterIpAddress ) { LocalFree( ClusterIpAddress ); }
if ( ClusIpAddrResource ) { LocalFree( ClusIpAddrResource ); }
if ( hClusNameResKey ) { DmCloseKey( hClusNameResKey ); }
if ( hClusIPAddrResKey ) { DmCloseKey( hClusIPAddrResKey ); }
return(Status); }
VOID JoinpEnumNodesAndJoinByAddress( IN HDMKEY Key, IN PWSTR NetInterfaceId, IN PVOID Context )
/*++
Routine Description:
Attempts to establish an RPC connection to a specified node using its IP address
Arguments:
Key - pointer to the node key handle
NetInterfaceId - pointer to string representing net IF ID (guid)
Context - pointer to a location to return the final status
Return Value:
None
--*/
{ DWORD status; LPWSTR NetIFNodeID = NULL; LPWSTR NetIFIpAddress = NULL; DWORD idMaxSize = 0; DWORD idSize = 0;
//
// get the NodeId Value from the NetIF key and if it's us,
// skip this netIF
//
status = DmQuerySz(Key, CLUSREG_NAME_NETIFACE_NODE, &NetIFNodeID, &idMaxSize, &idSize);
if ( status == ERROR_SUCCESS ) {
if (lstrcmpiW(NetIFNodeID, NmLocalNodeIdString) != 0) {
//
// it's not us so get the address and try it...
//
idMaxSize = idSize = 0; status = DmQuerySz(Key, CLUSREG_NAME_NETIFACE_ADDRESS, &NetIFIpAddress, &idMaxSize, &idSize);
if ( status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to get NetInterface Address, error %1!u!.\n", status); goto error_exit; }
//
// attempt the join with this address
//
JoinpConnectToSponsor(NetIFIpAddress); } } else { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] failed to get NetInterface Node ID, error %1!u!.\n", status); }
error_exit: DmCloseKey(Key);
if ( NetIFNodeID ) { LocalFree( NetIFNodeID ); }
if ( NetIFIpAddress ) { LocalFree( NetIFIpAddress ); }
return; }
VOID JoinpEnumNodesAndJoinByHostName( IN HDMKEY Key, IN PWSTR NodeId, IN PVOID Context )
/*++
Routine Description:
Attempts to establish an RPC connection to a specified node using its host name
Arguments:
Key - pointer to the node key handle
NodeId - pointer to string representing node ID (number)
Context - pointer to a location to return the final status
Return Value:
None
--*/
{ DWORD status; LPWSTR nodeName=NULL; DWORD nodeNameLen=0; DWORD nodeNameSize=0;
//
// Try to connect if this is not us
//
if (lstrcmpiW(NodeId, NmLocalNodeIdString) != 0) {
status = DmQuerySz(Key, CLUSREG_NAME_NODE_NAME, &nodeName, &nodeNameLen, &nodeNameSize);
if (status == ERROR_SUCCESS) {
JoinpConnectToSponsor(nodeName); LocalFree(nodeName); } }
DmCloseKey(Key);
return; }
VOID JoinpConnectToSponsor( IN PWSTR SponsorName ) /*++
Routine Description:
Attempts to establish an RPC connection to a specified node.
Arguments:
SponsorName - The name (or IP address) of the target sponsor.
Return Value:
ERROR_SUCCESS if an RPC connection is successfully made to the sponsor. An RPC error code otherwise.
--*/
{ HANDLE threadHandle; DWORD status = ERROR_SUCCESS; DWORD threadId; LPWSTR name; BOOL setEvent = FALSE;
ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Spawning thread to connect to sponsor %1!ws!\n", SponsorName );
name = LocalAlloc( LMEM_FIXED, (lstrlenW(SponsorName) + 1 ) * sizeof(WCHAR) );
if (name != NULL) { lstrcpyW(name, SponsorName);
CsJoinThreadCount++;
threadHandle = CreateThread( NULL, 0, JoinpConnectThread, name, 0, &threadId );
if (threadHandle != NULL) { CloseHandle(threadHandle); } else { status = GetLastError(); ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Failed to spawn connect thread, error %1!u!.\n", status );
--CsJoinThreadCount; } } else { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Failed to allocate memory.\n" ); }
return;
} // JoinpConnectToSponsor
DWORD WINAPI VerifyJoinVersionData( LPWSTR sponsorName )
/*++
Routine Description:
Verify that the sponsor and the joiner are compatible
Arguments:
sponsorName - pointer to text string of sponsor to use
Return Value:
ERROR_SUCCESS - if ok to continue join
--*/
{ DWORD status; LPWSTR bindingString = NULL; RPC_BINDING_HANDLE bindingHandle = NULL; DWORD SponsorNodeId; DWORD ClusterHighestVersion; DWORD ClusterLowestVersion; DWORD JoinStatus; DWORD packageIndex;
//
// Attempt to connect to the sponsor's JoinVersion RPC interface.
//
status = RpcStringBindingComposeW( L"6e17aaa0-1a47-11d1-98bd-0000f875292e", L"ncadg_ip_udp", sponsorName, NULL, NULL, &bindingString);
if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to compose JoinVersion string binding for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); goto error_exit; }
status = RpcBindingFromStringBindingW(bindingString, &bindingHandle);
RpcStringFreeW(&bindingString);
if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to build JoinVersion binding for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); goto error_exit; }
//
// under load, the sponsor might take a while to respond back to the
// joiner. The default timeout is at 30 secs and this seems to work
// ok. Note that this means the sponsor has 30 secs to reply to either
// the RPC request or ping. As long it makes any reply, then the joiner's
// RPC will continue to wait and not time out the sponsor.
//
status = RpcMgmtSetComTimeout( bindingHandle, CLUSTER_JOINVERSION_RPC_COM_TIMEOUT );
if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to set JoinVersion com timeout for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); }
status = RpcEpResolveBinding(bindingHandle, JoinVersion_v2_0_c_ifspec);
if (status != RPC_S_OK) { if ( (status == RPC_S_SERVER_UNAVAILABLE) || (status == RPC_S_NOT_LISTENING) || (status == EPT_S_NOT_REGISTERED) ) { ClRtlLogPrint(LOG_NOISE, "[JOIN] Sponsor %1!ws! is not available (JoinVersion), status=%2!u!.\n", sponsorName, status ); } else { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to resolve JoinVersion endpoint for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); } goto error_exit; }
if ( CsUseAuthenticatedRPC ) { //
// run through the list of RPC security packages, trying to establish
// a security context with this binding.
//
for (packageIndex = 0; packageIndex < CsNumberOfRPCSecurityPackages; ++packageIndex ) { status = RpcBindingSetAuthInfoW(bindingHandle, CsServiceDomainAccount, RPC_C_AUTHN_LEVEL_CONNECT, CsRPCSecurityPackage[ packageIndex ], NULL, RPC_C_AUTHZ_NAME);
if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to set JoinVersion AuthInfo using %1!ws! package, status %2!u!.\n", CsRPCSecurityPackageName[packageIndex], status); continue; }
status = CsRpcGetJoinVersionData(bindingHandle, NmLocalNodeId, CsMyHighestVersion, CsMyLowestVersion, &SponsorNodeId, &ClusterHighestVersion, &ClusterLowestVersion, &JoinStatus);
if ( status == RPC_S_OK ) { break; } else { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to get join version data from sponsor %1!ws! using %2!ws! package, status %3!u!.\n", sponsorName, CsRPCSecurityPackageName[packageIndex], status); } } } else {
//
// get the version data from the sponsor and determine if we
// should continue to join
//
status = CsRpcGetJoinVersionData(bindingHandle, NmLocalNodeId, CsMyHighestVersion, CsMyLowestVersion, &SponsorNodeId, &ClusterHighestVersion, &ClusterLowestVersion, &JoinStatus);
if ( status != RPC_S_OK ) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to get join version data from sponsor %1!ws!, status %2!u!.\n", sponsorName, status); } }
//
// jump out now if nothing work (as in the case of a form)
//
if ( status != ERROR_SUCCESS ) { goto error_exit; }
//
// use the join lock to set the RPC package index
//
EnterCriticalSection( &CsJoinLock );
if ( CsRPCSecurityPackageIndex < 0 ) { CsRPCSecurityPackageIndex = packageIndex; }
LeaveCriticalSection( &CsJoinLock );
//
// check the sponsor was in agreement with the join
//
if ( JoinStatus != ERROR_SUCCESS ) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Sponsor %1!ws! has discontinued join, status %2!u!.\n", sponsorName, JoinStatus); if (JoinStatus == ERROR_CLUSTER_INCOMPATIBLE_VERSIONS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Join version data from sponsor %1!ws! doesn't match: JH: 0x%2!08X! JL: 0x%3!08X! SH: 0x%4!08X! SL: 0x%5!08X!.\n", sponsorName, CsMyHighestVersion, CsMyLowestVersion, ClusterHighestVersion, ClusterLowestVersion); //
// rajdas: In this case I have managed to contact a sponsor, but there is a version mismatch. If all the join
// threads meet the same fate, clussvc should not try to form a cluster.
// BUG ID: 152229
//
CsJoinStatus = ERROR_CLUSTER_INCOMPATIBLE_VERSIONS; } goto error_exit; }
// SS: we will leave this check because win2K clusters didnt do the
// server side check, so the client must continue to do it
//
// now check that it is ok to join. We want this node to run
// at the highest level of compatibility possible. One of the
// following conditions must be true:
//
// 1) the High versions match exactly (major and build number)
// 2) our Highest matches the sponsor's Lowest exactly, downgrading
// the sponsor to our level of compatibility
// 3) our Lowest matches the sponsor's Highest, downgrading ourselves
// to the sponsor's level of compatibility
//
// note that the minor (build) version must match as well. The previous
// version numbers are "well known" and shouldn't change when a newer
// version is available/implemented.
//
if ( CsMyHighestVersion == ClusterHighestVersion || CsMyHighestVersion == ClusterLowestVersion || CsMyLowestVersion == ClusterHighestVersion #if 1 // CLUSTER_BETA
|| CsNoVersionCheck #endif
) { status = ERROR_SUCCESS;
} else {
ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Join version data from sponsor %1!ws! doesn't match: JH: 0x%2!08X! JL: 0x%3!08X! SH: 0x%4!08X! SL: 0x%5!08X!.\n", sponsorName, CsMyHighestVersion, CsMyLowestVersion, ClusterHighestVersion, ClusterLowestVersion);
status = ERROR_CLUSTER_INCOMPATIBLE_VERSIONS;
//
// rajdas: In this case I have managed to contact a sponsor, but there is a version mismatch. If all the join
// threads meet the same fate, clussvc should not try to form a cluster.
// BUG ID: 152229
//
CsJoinStatus = ERROR_CLUSTER_INCOMPATIBLE_VERSIONS; }
error_exit: if (bindingHandle != NULL) { RpcBindingFree(&bindingHandle); }
return status; }
DWORD WINAPI JoinpConnectThread( LPVOID Parameter ) { LPWSTR sponsorName = Parameter; DWORD status; LPWSTR bindingString = NULL; RPC_BINDING_HANDLE bindingHandle = NULL; BOOL setEvent = FALSE;
//
// Try to connect to the specified node.
//
ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Asking %1!ws! to sponsor us.\n", sponsorName );
//
// connect to the JoinVersion interface first to see if we should progress
// any further. since this is the first RPC call to the other node, we can
// determine which security package should be used for the other interfaces.
//
status = VerifyJoinVersionData( sponsorName );
if (status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] JoinVersion data for sponsor %1!ws! is invalid, status %2!u!.\n", sponsorName, status ); goto error_exit; }
//
// Attempt to connect to the sponsor's extrocluster (join) RPC interface.
//
status = RpcStringBindingComposeW( L"ffe561b8-bf15-11cf-8c5e-08002bb49649", L"ncadg_ip_udp", sponsorName, NULL, NULL, &bindingString);
if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to compose ExtroCluster string binding for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); goto error_exit; }
status = RpcBindingFromStringBindingW(bindingString, &bindingHandle);
RpcStringFreeW(&bindingString);
if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to build ExtroCluster binding for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); goto error_exit; }
//
// under load, the sponsor might take a while to respond back to the
// joiner. The default timeout is at 30 secs and this seems to work
// ok. Note that this means the sponsor has 30 secs to reply to either
// the RPC request or ping. As long it makes any reply, then the joiner's
// RPC will continue to wait and not time out the sponsor.
//
status = RpcMgmtSetComTimeout( bindingHandle, CLUSTER_EXTROCLUSTER_RPC_COM_TIMEOUT );
if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to set ExtroCluster com timeout for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); }
status = RpcEpResolveBinding(bindingHandle, ExtroCluster_v2_0_c_ifspec);
if (status != RPC_S_OK) { if ( (status == RPC_S_SERVER_UNAVAILABLE) || (status == RPC_S_NOT_LISTENING) || (status == EPT_S_NOT_REGISTERED) ) { ClRtlLogPrint(LOG_NOISE, "[JOIN] Sponsor %1!ws! is not available (ExtroCluster), status=%2!u!.\n", sponsorName, status ); } else { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to resolve ExtroCluster endpoint for sponsor %1!ws!, status %2!u!.\n", sponsorName, status ); } goto error_exit; }
if ( CsUseAuthenticatedRPC ) {
//
// establish a security context with this binding.
//
status = RpcBindingSetAuthInfoW(bindingHandle, CsServiceDomainAccount, RPC_C_AUTHN_LEVEL_CONNECT, CsRPCSecurityPackage[ CsRPCSecurityPackageIndex ], NULL, RPC_C_AUTHZ_NAME);
if (status != RPC_S_OK) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Unable to set ExtroCluster AuthInfo using %1!ws! package, status %2!u!.\n", CsRPCSecurityPackageName[ CsRPCSecurityPackageIndex ], status);
goto error_exit; } }
error_exit:
EnterCriticalSection(&CsJoinLock);
if (status == RPC_S_OK) { if (CsJoinSponsorBinding == NULL) { //
// This is the first successful connection.
//
ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] Selecting %1!ws! as join sponsor.\n", sponsorName );
CsJoinSponsorBinding = bindingHandle; bindingHandle = NULL; CsJoinSponsorName = sponsorName; sponsorName = NULL; SetEvent(CsJoinEvent); } else { ClRtlLogPrint(LOG_NOISE, "[JOIN] Closing connection to sponsor %1!ws!.\n", sponsorName ); } }
if (--CsJoinThreadCount == 0) { CloseHandle(CsJoinEvent); DeleteCriticalSection(&CsJoinLock); } else if (CsJoinThreadCount == 1) { SetEvent(CsJoinEvent); LeaveCriticalSection(&CsJoinLock); } else LeaveCriticalSection(&CsJoinLock);
if (bindingHandle != NULL) { RpcBindingFree(&bindingHandle); }
if (sponsorName != NULL) { LocalFree(sponsorName); }
return(status);
} // JoinpConnectThread
DWORD JoinpAttemptJoin( LPWSTR SponsorName, RPC_BINDING_HANDLE JoinMasterBinding ) /*++
Routine Description:
Called to attempt to join a cluster that already exists.
Arguments:
SponsorName - The name (or IP address) of the target sponsor.
JoinMasterBinding - RPC binding to use to perform join.
Return Value:
ERROR_SUCCESS if successful
Win32 error code otherwise.
--*/
{ DWORD Status; NET_API_STATUS netStatus; LPTIME_OF_DAY_INFO tod = NULL; SYSTEMTIME systemTime; PNM_NETWORK network; DWORD startseq, endseq;
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailNmJoinCluster) { Status = 999999; goto error_exit; } #endif
Status = NmJoinCluster(JoinMasterBinding);
if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_UNUSUAL, "[JOIN] NmJoinCluster failed, status %1!u!.\n", Status ); goto error_exit; }
//
// Synchronize the registry database
//
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailDmJoin) { Status = 999999; goto error_exit; } #endif
Status = DmJoin(JoinMasterBinding, &startseq);
if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] DmJoin failed, error %1!d!\n", Status); goto error_exit; }
//
// Initialize the event handler, needs to register with gum for cluster wide
//events.
Status = EpInitPhase1(); if ( Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] EpInitPhase1 failed, Status = %1!u!\n", Status); return(Status); }
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailApiInitPhase1) { Status = 999999; goto error_exit; } #endif
//
// Bring the API online in read-only mode. There is no join phase for
// the API. The API is required by FmOnline, which starts the
// resource monitor.
//
Status = ApiOnlineReadOnly(); if ( Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] ApiOnlineReadOnly failed, error = %1!u!\n", Status); goto error_exit; }
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailFmJoinPhase1) { Status = 999999; goto error_exit; } #endif
//update status for scm
CsServiceStatus.dwCheckPoint++; CsAnnounceServiceStatus();
//
// Resynchronize the FM. We cannot enable the Groups until after the
// the API is fully operational. See below.
//
Status = FmJoinPhase1(); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] FmJoinPhase1 failed, error %1!d!\n", Status); goto error_exit; }
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailDmUpdateJoinCluster) { Status = 999999; goto error_exit; } #endif
// Call the DM to hook the notifications for quorum resource and
//event handler
Status = DmUpdateJoinCluster(); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] DmUpdateJoin failed, error = %1!u!\n", Status); goto error_exit; }
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailNmJoinComplete) { Status = 999999; goto error_exit; } #endif
//
// We are now fully online, call NM to globally change our state.
//
Status = NmJoinComplete(&endseq); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] NmJoinComplete failed, error %1!d!\n", Status); goto error_exit; }
#if 0
//
// This check is flawed. Network state updates can occur during
// the join process, causing this check to fail unnecessarily.
//
if (startseq + GUM_UPDATE_JOINSEQUENCE != endseq) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Sequence mismatch, start %1!d! end %2!d!\n", startseq, endseq); Status = ERROR_CLUSTER_DATABASE_SEQMISMATCH; goto error_exit; } #endif // 0
//perform the fixup for the AdminExt value on both Nt4 and Nt5 nodes.
Status=FmFixupAdminExt(); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] FmFixupAdminExt failed, error %1!d!\n", Status); goto error_exit; }
//perform the fixups after the registry is downloaded
//walk the list of fixups
Status = NmPerformFixups(NM_JOIN_FIXUP); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] NmPerformFixups failed, error %1!d!\n", Status); goto error_exit; }
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailApiInitPhase2) { Status = 999999; goto error_exit; } #endif
//
// Finally enable the full API.
//
Status = ApiOnline(); if ( Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] ApiOnline failed, error = %1!u!\n", Status); goto error_exit; }
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailFmJoinPhase2) { Status = 999999; goto error_exit; } #endif
//update status for scm
CsServiceStatus.dwCheckPoint++; CsAnnounceServiceStatus();
//
// Call back the Failover Manager to enable and move groups.
// The full registry is now available, so all groups/resources/resource
// types can be created (since they use the registry calls).
//
Status = FmJoinPhase2(); if (Status != ERROR_SUCCESS) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] FmJoinPhase2 failed, status %1!d!.\n", Status); goto error_exit; }
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailEvInitialize) { Status = 999999; goto error_exit; } #endif
//
// Finish initializing the cluster wide event logging
//
// ASSUMPTION: this is called after the NM has established cluster
// membership.
//
if (!CsNoRepEvtLogging) { Status = EvOnline(); //if this fails, we still start the cluster service
if ( Status != ERROR_SUCCESS ) { ClRtlLogPrint(LOG_CRITICAL, "[JOIN] Error calling EvOnline, Status = %1!u!\n", Status); } }
return(ERROR_SUCCESS);
error_exit:
ClRtlLogPrint(LOG_NOISE, "[INIT] Cleaning up failed join attempt.\n");
ClusterLeave();
return(Status);
}
BOOL JoinpAddNodeCallback( IN PVOID Context1, IN PVOID Context2, IN PVOID Object, IN LPCWSTR Name ) /*++
Routine Description:
Callback enumeration routine for adding a new node. This callback figures out what node IDs are available.
Arguments:
Context1 - Supplies a pointer to an array of BOOLs. The node ID for the enumerated node is set to FALSE.
Context2 - Not used.
Object - A pointer to the node object.
Name - The node name.
Return Value:
TRUE
--*/
{ PBOOL Avail; DWORD Id;
Id = NmGetNodeId(Object); CL_ASSERT(NmIsValidNodeId(Id));
Avail = (PBOOL)Context1;
Avail[Id] = FALSE;
return(TRUE); }
|