Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1447 lines
39 KiB

/*++
Copyright (c) 1996 Microsoft Corporation
Module Name:
join.c
Abstract:
This module handles the initialization path where a newly booted
node joins an existing cluster.
Author:
John Vert (jvert) 6/6/1996
Revision History:
--*/
#include "initp.h"
#include "lmcons.h"
#include "lmremutl.h"
#include "lmapibuf.h"
#include <clusverp.h>
#define JOIN_CLIENT_NO_DELAY 0 // delay for high-prio networks
#define JOIN_CLIENT_NETWORK_DELAY 1000 // delay for low-prio networks
#define JOIN_CLIENT_RESOURCE_DELAY 2000 // delay for cluster IP/netname
#define JOIN_CLIENT_GET_NETWORK_DELAY(_NetworkPrio) \
(((_NetworkPrio) == 1) ? JOIN_CLIENT_NO_DELAY : JOIN_CLIENT_NETWORK_DELAY)
//
// Local types
//
typedef struct {
DWORD Delay;
LPWSTR Name;
} JOIN_SPONSOR_CONTEXT, *PJOIN_SPONSOR_CONTEXT;
//
// Local data
//
CRITICAL_SECTION CsJoinLock;
HANDLE CsJoinEvent = NULL;
DWORD CsJoinThreadCount = 0;
DWORD CsJoinStatus=ERROR_SUCCESS;
RPC_BINDING_HANDLE CsJoinSponsorBinding = NULL;
LPWSTR CsJoinSponsorName = NULL;
// While another node is joining, we will keep track of any DM or FM updates.
BOOL CsDmOrFmHasChanged = FALSE;
//
// Local function prototypes
//
VOID
JoinpEnumNodesAndJoinByAddress(
IN HDMKEY Key,
IN PWSTR NodeId,
IN PVOID Context
);
VOID
JoinpEnumNodesAndJoinByHostName(
IN HDMKEY Key,
IN PWSTR NodeId,
IN PVOID Context
);
VOID
JoinpConnectToSponsor(
IN PWSTR SponsorName,
IN DWORD Delay
);
DWORD WINAPI
JoinpConnectThread(
LPVOID Parameter
);
DWORD
JoinpAttemptJoin(
LPWSTR SponsorName,
RPC_BINDING_HANDLE JoinMasterBinding
);
BOOL
JoinpAddNodeCallback(
IN PVOID Context1,
IN PVOID Context2,
IN PVOID Object,
IN LPCWSTR Name
);
BOOL
JoinpEnumNetworksToSetPriority(
IN PVOID Context1,
IN PVOID Context2,
IN PVOID Object,
IN LPCWSTR Name
);
DWORD
ClusterJoin(
VOID
)
/*++
Routine Description:
Called to attempt to join a cluster that already exists.
Arguments:
None
Return Value:
ERROR_SUCCESS if successful
Win32 error code otherwise.
--*/
{
DWORD Status;
LPWSTR ClusterIpAddress = NULL;
LPWSTR ClusIpAddrResource = NULL;
LPWSTR ClusterNameId = NULL;
DWORD idMaxSize = 0;
DWORD idSize = 0;
HDMKEY hClusNameResKey = NULL;
HDMKEY hClusIPAddrResKey = NULL;
//
// Try connecting using the cluster IP address first. get the cluster
// name resource, looking up its dependency for the cluster IP addr
//
Status = DmQuerySz(DmClusterParametersKey,
CLUSREG_NAME_CLUS_CLUSTER_NAME_RES,
&ClusterNameId,
&idMaxSize,
&idSize);
if (Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] failed to get cluster name resource, error %1!u!.\n",
Status);
goto error_exit;
}
//
// open name resource key and read its DependsOn key
//
hClusNameResKey = DmOpenKey( DmResourcesKey, ClusterNameId, KEY_READ );
if ( hClusNameResKey == NULL ) {
Status = GetLastError();
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] failed to open Cluster Name resource key, error %1!u!.\n",
Status);
goto error_exit;
}
//
// allocate enough space for the GUID and the Parameters string
//
idMaxSize = ( CS_NETWORK_ID_LENGTH + sizeof( CLUSREG_KEYNAME_PARAMETERS ) + 2)
* sizeof(WCHAR);
ClusIpAddrResource = LocalAlloc( LMEM_FIXED, idMaxSize );
if ( ClusIpAddrResource == NULL ) {
Status = ERROR_NOT_ENOUGH_MEMORY;
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] no memory for Cluster Ip address resource ID!\n");
goto error_exit;
}
Status = DmQueryMultiSz(hClusNameResKey,
CLUSREG_NAME_RES_DEPENDS_ON,
&ClusIpAddrResource,
&idMaxSize,
&idSize);
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] failed to get Cluster Ip address resource ID, error %1!u!.\n",
Status);
goto error_exit;
}
lstrcatW( ClusIpAddrResource, L"\\" );
lstrcatW( ClusIpAddrResource, CLUSREG_KEYNAME_PARAMETERS );
hClusIPAddrResKey = DmOpenKey( DmResourcesKey, ClusIpAddrResource, KEY_READ );
if ( hClusIPAddrResKey == NULL ) {
Status = GetLastError();
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] failed to open Cluster IP Address resource key, error %1!u!.\n",
Status);
goto error_exit;
}
//
// get the IP Address; note that these value names are not defined
// in a global way. if they are changed, this code will break
//
idMaxSize = idSize = 0;
Status = DmQuerySz(hClusIPAddrResKey,
L"Address",
&ClusterIpAddress,
&idMaxSize,
&idSize);
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] failed to get Cluster Ip address, error %1!u!.\n",
Status);
goto error_exit;
}
//
// Spawn threads to find a sponsor. We will try the make connections using
// the cluster IP address, the IP address of each node on each network, and
// the name of each node in the cluster. The connects will proceed in
// parallel. We'll use the first one that succeeds.
//
CsJoinEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
if (CsJoinEvent == NULL) {
Status = GetLastError();
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] failed to create join event, error %1!u!.\n",
Status
);
goto error_exit;
}
CsJoinThreadCount = 1;
InitializeCriticalSection(&CsJoinLock);
EnterCriticalSection(&CsJoinLock);
DmEnumKeys(DmNetInterfacesKey, JoinpEnumNodesAndJoinByAddress, NULL);
DmEnumKeys(DmNodesKey, JoinpEnumNodesAndJoinByHostName, NULL);
//
// give the other threads a chance to start since using the cluster IP
// address to join with is problematic when the resource moves in the
// middle of a join
//
JoinpConnectToSponsor(ClusterIpAddress, JOIN_CLIENT_RESOURCE_DELAY);
//update status for scm
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
if(CsJoinThreadCount == 1)
SetEvent(CsJoinEvent);
LeaveCriticalSection(&CsJoinLock);
Status = WaitForSingleObject(CsJoinEvent, INFINITE);
CL_ASSERT(Status == WAIT_OBJECT_0);
EnterCriticalSection(&CsJoinLock);
ClRtlLogPrint(LOG_NOISE,
"[JOIN] Got out of the join wait, CsJoinThreadCount = %1!u!.\n",
CsJoinThreadCount
);
if(--CsJoinThreadCount == 0) {
CloseHandle(CsJoinEvent);
DeleteCriticalSection(&CsJoinLock);
}
else
LeaveCriticalSection(&CsJoinLock);
//
// All of the threads have failed or one of them made a connection,
// use it to join.
//
if (CsJoinSponsorBinding != NULL) {
CL_ASSERT(CsJoinSponsorName != NULL);
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Attempting join with sponsor %1!ws!.\n",
CsJoinSponsorName
);
//
// Chittur Subbaraman (chitturs) - 10/27/98
//
// If the database restore operation is requested, then
// refuse to join the cluster and return an error code.
//
if ( CsDatabaseRestore == TRUE ) {
Status = ERROR_CLUSTER_NODE_UP;
LocalFree(CsJoinSponsorName);
goto error_exit;
}
Status = JoinpAttemptJoin(CsJoinSponsorName, CsJoinSponsorBinding);
RpcBindingFree(&CsJoinSponsorBinding);
LocalFree(CsJoinSponsorName);
}
else {
//we couldnt create a binding to the sponsorer
if(CsJoinStatus == ERROR_SUCCESS) {
//we did the version check in joinpconnectthread but for some reason
//couldnt produce a binding
Status = ERROR_BAD_NETPATH;
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] Unable to connect to any sponsor node.\n");
}
else {
Status = CsJoinStatus;
}
// rajdas: If the join did not suceed due to version mismatch we shouldn't try to form a cluster.
// Bug ID: 152229
//
if(CsJoinStatus == ERROR_CLUSTER_INCOMPATIBLE_VERSIONS)
bFormCluster = FALSE;
}
error_exit:
if ( ClusterNameId ) {
LocalFree( ClusterNameId );
}
if ( ClusterIpAddress ) {
LocalFree( ClusterIpAddress );
}
if ( ClusIpAddrResource ) {
LocalFree( ClusIpAddrResource );
}
if ( hClusNameResKey ) {
DmCloseKey( hClusNameResKey );
}
if ( hClusIPAddrResKey ) {
DmCloseKey( hClusIPAddrResKey );
}
return(Status);
}
VOID
JoinpEnumNodesAndJoinByAddress(
IN HDMKEY Key,
IN PWSTR NetInterfaceId,
IN PVOID Context
)
/*++
Routine Description:
Attempts to establish an RPC connection to a specified
node using its IP address
Arguments:
Key - pointer to the node key handle
NetInterfaceId - pointer to string representing net IF ID (guid)
Context - pointer to a location to return the final status
Return Value:
None
--*/
{
DWORD status;
LPWSTR NetIFNodeID = NULL;
LPWSTR NetIFIpAddress = NULL;
LPWSTR NetIFNetwork = NULL;
HDMKEY NetIFNetworkKey = NULL;
DWORD NetIFNetworkPriority;
DWORD idMaxSize = 0;
DWORD idSize = 0;
//
// get the NodeId Value from the NetIF key and if it's us,
// skip this netIF
//
status = DmQuerySz(Key,
CLUSREG_NAME_NETIFACE_NODE,
&NetIFNodeID,
&idMaxSize,
&idSize);
if ( status == ERROR_SUCCESS ) {
if (lstrcmpiW(NetIFNodeID, NmLocalNodeIdString) != 0) {
//
// it's not us so get the address and try it...
//
idMaxSize = idSize = 0;
status = DmQuerySz(Key,
CLUSREG_NAME_NETIFACE_ADDRESS,
&NetIFIpAddress,
&idMaxSize,
&idSize);
if ( status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] failed to get NetInterface Address, error %1!u!.\n",
status);
goto error_exit;
}
//
// Determine the delay based on the network priority. If we
// cannot find it in the cluster database, we still try to
// connect to the sponsor assuming the lowest priority.
//
NetIFNetworkPriority = 0xFFFFFFFF;
idMaxSize = idSize = 0;
status = DmQuerySz(Key,
CLUSREG_NAME_NETIFACE_NETWORK,
&NetIFNetwork,
&idMaxSize,
&idSize);
if ( status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] Failed to get NetInterface Network, error %1!u!.\n",
status);
goto ConnectToSponsor;
}
NetIFNetworkKey = DmOpenKey(DmNetworksKey,
NetIFNetwork,
KEY_READ);
if ( NetIFNetworkKey == NULL ) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] Failed to open key for network %1!ws!, error %2!u!.\n",
NetIFNetwork, status
);
goto ConnectToSponsor;
}
status = DmQueryDword(NetIFNetworkKey,
CLUSREG_NAME_NET_PRIORITY,
&NetIFNetworkPriority,
0);
if ( status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] Failed to get NetInterface network priority, error %1!u!.\n",
status);
}
ConnectToSponsor:
//
// attempt the join with this address
//
JoinpConnectToSponsor(NetIFIpAddress,
JOIN_CLIENT_GET_NETWORK_DELAY(NetIFNetworkPriority));
}
}
else {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] failed to get NetInterface Node ID, error %1!u!.\n",
status);
}
error_exit:
DmCloseKey(Key);
if ( NetIFNodeID ) {
LocalFree( NetIFNodeID );
}
if ( NetIFIpAddress ) {
LocalFree( NetIFIpAddress );
}
return;
}
VOID
JoinpEnumNodesAndJoinByHostName(
IN HDMKEY Key,
IN PWSTR NodeId,
IN PVOID Context
)
/*++
Routine Description:
Attempts to establish an RPC connection to a specified node using
its host name
Arguments:
Key - pointer to the node key handle
NodeId - pointer to string representing node ID (number)
Context - pointer to a location to return the final status
Return Value:
None
--*/
{
DWORD status;
LPWSTR nodeName=NULL;
DWORD nodeNameLen=0;
DWORD nodeNameSize=0;
//
// Try to connect if this is not us
//
if (lstrcmpiW(NodeId, NmLocalNodeIdString) != 0) {
status = DmQuerySz(Key,
CLUSREG_NAME_NODE_NAME,
&nodeName,
&nodeNameLen,
&nodeNameSize);
if (status == ERROR_SUCCESS) {
JoinpConnectToSponsor(nodeName, JOIN_CLIENT_NETWORK_DELAY);
LocalFree(nodeName);
}
}
DmCloseKey(Key);
return;
}
VOID
JoinpConnectToSponsor(
IN PWSTR SponsorName,
IN DWORD Delay
)
/*++
Routine Description:
Attempts to establish an RPC connection to a specified node.
Arguments:
SponsorName - The name (or IP address) of the target sponsor.
Delay - Milliseconds to wait before sending request
Return Value:
ERROR_SUCCESS if an RPC connection is successfully made to the sponsor.
An RPC error code otherwise.
--*/
{
HANDLE threadHandle;
DWORD status = ERROR_SUCCESS;
DWORD threadId;
PJOIN_SPONSOR_CONTEXT context;
BOOL setEvent = FALSE;
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Spawning thread to connect to sponsor %1!ws!\n",
SponsorName
);
//
// Allocate the context and sponsor name buffer separately. If this
// thread "wins" sponsorship, the name buffer will be reused.
//
context = LocalAlloc( LMEM_FIXED | LMEM_ZEROINIT, sizeof(JOIN_SPONSOR_CONTEXT) );
if (context != NULL) {
context->Name = LocalAlloc( LMEM_FIXED | LMEM_ZEROINIT,
(lstrlenW(SponsorName) + 1 ) * sizeof(WCHAR) );
if (context->Name != NULL) {
lstrcpyW(context->Name, SponsorName);
context->Delay = Delay;
CsJoinThreadCount++;
threadHandle = CreateThread(
NULL,
0,
JoinpConnectThread,
context,
0,
&threadId
);
if (threadHandle != NULL) {
CloseHandle(threadHandle);
}
else {
status = GetLastError();
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] Failed to spawn connect thread, error %1!u!.\n",
status
);
--CsJoinThreadCount;
LocalFree(context->Name);
LocalFree(context);
}
}
else {
LocalFree(context);
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] Failed to allocate memory for sponsor name.\n"
);
}
}
else {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] Failed to allocate memory.\n"
);
}
return;
} // JoinpConnectToSponsor
DWORD WINAPI
VerifyJoinVersionData(
LPWSTR sponsorName
)
/*++
Routine Description:
Verify that the sponsor and the joiner are compatible
Arguments:
sponsorName - pointer to text string of sponsor to use
Return Value:
ERROR_SUCCESS - if ok to continue join
--*/
{
DWORD status;
LPWSTR bindingString = NULL;
RPC_BINDING_HANDLE bindingHandle = NULL;
DWORD SponsorNodeId;
DWORD ClusterHighestVersion;
DWORD ClusterLowestVersion;
DWORD JoinStatus = ERROR_SUCCESS;
DWORD packageIndex;
//
// Attempt to connect to the sponsor's JoinVersion RPC interface.
//
status = RpcStringBindingComposeW(
L"6e17aaa0-1a47-11d1-98bd-0000f875292e",
L"ncadg_ip_udp",
sponsorName,
NULL,
NULL,
&bindingString);
if (status != RPC_S_OK) {
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Unable to compose JoinVersion string binding for sponsor %1!ws!, status %2!u!.\n",
sponsorName,
status
);
goto error_exit;
}
status = RpcBindingFromStringBindingW(bindingString, &bindingHandle);
RpcStringFreeW(&bindingString);
if (status != RPC_S_OK) {
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Unable to build JoinVersion binding for sponsor %1!ws!, status %2!u!.\n",
sponsorName,
status
);
goto error_exit;
}
//
// under load, the sponsor might take a while to respond back to the
// joiner. The default timeout is at 30 secs and this seems to work
// ok. Note that this means the sponsor has 30 secs to reply to either
// the RPC request or ping. As long it makes any reply, then the joiner's
// RPC will continue to wait and not time out the sponsor.
//
status = RpcMgmtSetComTimeout( bindingHandle, CLUSTER_JOINVERSION_RPC_COM_TIMEOUT );
if (status != RPC_S_OK) {
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Unable to set JoinVersion com timeout for sponsor %1!ws!, status %2!u!.\n",
sponsorName,
status
);
}
status = RpcEpResolveBinding(bindingHandle, JoinVersion_v2_0_c_ifspec);
if (status != RPC_S_OK) {
if ( (status == RPC_S_SERVER_UNAVAILABLE) ||
(status == RPC_S_NOT_LISTENING) ||
(status == EPT_S_NOT_REGISTERED)
)
{
ClRtlLogPrint(LOG_NOISE,
"[JOIN] Sponsor %1!ws! is not available (JoinVersion), status=%2!u!.\n",
sponsorName,
status
);
}
else {
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Unable to resolve JoinVersion endpoint for sponsor %1!ws!, status %2!u!.\n",
sponsorName,
status
);
}
goto error_exit;
}
//
// run through the list of RPC security packages, trying to establish a
// security context with this binding.
//
for (packageIndex = 0;
packageIndex < CsNumberOfRPCSecurityPackages;
++packageIndex )
{
status = RpcBindingSetAuthInfoW(bindingHandle,
CsServiceDomainAccount,
RPC_C_AUTHN_LEVEL_CONNECT,
CsRPCSecurityPackage[ packageIndex ],
NULL,
RPC_C_AUTHZ_NAME);
if (status != RPC_S_OK) {
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Unable to set JoinVersion AuthInfo using %1!ws! package, status %2!u!.\n",
CsRPCSecurityPackageName[packageIndex],
status);
continue;
}
status = CsRpcGetJoinVersionData(bindingHandle,
NmLocalNodeId,
CsMyHighestVersion,
CsMyLowestVersion,
&SponsorNodeId,
&ClusterHighestVersion,
&ClusterLowestVersion,
&JoinStatus);
if ( status == RPC_S_OK ) {
break;
} else {
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Unable to get join version data from sponsor %1!ws! using "
"%2!ws! package, status %3!u!.\n",
sponsorName,
CsRPCSecurityPackageName[packageIndex],
status);
}
}
//
// jump out now if nothing work (as in the case of a form)
//
if ( status != ERROR_SUCCESS ) {
goto error_exit;
}
//
// use the join lock to set the RPC package index
//
EnterCriticalSection( &CsJoinLock );
if ( CsRPCSecurityPackageIndex < 0 ) {
CsRPCSecurityPackageIndex = packageIndex;
}
LeaveCriticalSection( &CsJoinLock );
//
// check the sponsor was in agreement with the join
//
if ( JoinStatus != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Sponsor %1!ws! has discontinued join, status %2!u!.\n",
sponsorName,
JoinStatus);
if (JoinStatus == ERROR_CLUSTER_INCOMPATIBLE_VERSIONS)
{
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] Join version data from sponsor %1!ws! doesn't match: JH: 0x%2!08X! JL: 0x%3!08X! SH: 0x%4!08X! SL: 0x%5!08X!.\n",
sponsorName,
CsMyHighestVersion,
CsMyLowestVersion,
ClusterHighestVersion,
ClusterLowestVersion);
//
// rajdas: In this case I have managed to contact a sponsor, but there is a version mismatch. If all the join
// threads meet the same fate, clussvc should not try to form a cluster.
// BUG ID: 152229
//
CsJoinStatus = ERROR_CLUSTER_INCOMPATIBLE_VERSIONS;
}
goto error_exit;
}
// SS: we will leave this check because win2K clusters didnt do the
// server side check, so the client must continue to do it
//
// now check that it is ok to join. We want this node to run
// at the highest level of compatibility possible. One of the
// following conditions must be true:
//
// 1) the High versions match exactly (major and build number)
// 2) our Highest matches the sponsor's Lowest exactly, downgrading
// the sponsor to our level of compatibility
// 3) our Lowest matches the sponsor's Highest, downgrading ourselves
// to the sponsor's level of compatibility
//
// note that the minor (build) version must match as well. The previous
// version numbers are "well known" and shouldn't change when a newer
// version is available/implemented.
//
if ( CsMyHighestVersion == ClusterHighestVersion ||
CsMyHighestVersion == ClusterLowestVersion ||
CsMyLowestVersion == ClusterHighestVersion
#if 1 // CLUSTER_BETA
|| CsNoVersionCheck
#endif
)
{
status = ERROR_SUCCESS;
} else {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] Join version data from sponsor %1!ws! doesn't match: JH: 0x%2!08X! JL: 0x%3!08X! SH: 0x%4!08X! SL: 0x%5!08X!.\n",
sponsorName,
CsMyHighestVersion,
CsMyLowestVersion,
ClusterHighestVersion,
ClusterLowestVersion);
status = ERROR_CLUSTER_INCOMPATIBLE_VERSIONS;
//
// rajdas: In this case I have managed to contact a sponsor, but there is a version mismatch. If all the join
// threads meet the same fate, clussvc should not try to form a cluster.
// BUG ID: 152229
//
CsJoinStatus = ERROR_CLUSTER_INCOMPATIBLE_VERSIONS;
}
error_exit:
if (bindingHandle != NULL) {
RpcBindingFree(&bindingHandle);
}
return status;
}
DWORD WINAPI
JoinpConnectThread(
LPVOID Parameter
)
{
PJOIN_SPONSOR_CONTEXT context = (PJOIN_SPONSOR_CONTEXT) Parameter;
LPWSTR sponsorName = context->Name;
DWORD status;
LPWSTR bindingString = NULL;
RPC_BINDING_HANDLE bindingHandle = NULL;
BOOL setEvent = FALSE;
//
// Sleep for the specified delay.
//
if (context->Delay > 0) {
Sleep(context->Delay);
}
//
// No need to send a sponsorship request if a sponsor has
// already been chosen.
//
if (CsJoinSponsorBinding != NULL) {
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] No need to ask %1!ws! to sponsor us after delay of %2!u! milliseconds.\n",
sponsorName, context->Delay
);
status = RPC_S_CALL_FAILED_DNE;
goto error_exit;
}
//
// Try to connect to the specified node.
//
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Asking %1!ws! to sponsor us after delay of %2!u! milliseconds.\n",
sponsorName, context->Delay
);
//
// connect to the JoinVersion interface first to see if we should progress
// any further. since this is the first RPC call to the other node, we can
// determine which security package should be used for the other interfaces.
//
status = VerifyJoinVersionData( sponsorName );
if (status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] JoinVersion data for sponsor %1!ws! is invalid, status %2!u!.\n",
sponsorName,
status
);
goto error_exit;
}
//
// Attempt to connect to the sponsor's extrocluster (join) RPC interface.
//
status = RpcStringBindingComposeW(
L"ffe561b8-bf15-11cf-8c5e-08002bb49649",
L"ncadg_ip_udp",
sponsorName,
NULL,
NULL,
&bindingString);
if (status != RPC_S_OK) {
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Unable to compose ExtroCluster string binding for sponsor %1!ws!, status %2!u!.\n",
sponsorName,
status
);
goto error_exit;
}
status = RpcBindingFromStringBindingW(bindingString, &bindingHandle);
RpcStringFreeW(&bindingString);
if (status != RPC_S_OK) {
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Unable to build ExtroCluster binding for sponsor %1!ws!, status %2!u!.\n",
sponsorName,
status
);
goto error_exit;
}
//
// under load, the sponsor might take a while to respond back to the
// joiner. The default timeout is at 30 secs and this seems to work
// ok. Note that this means the sponsor has 30 secs to reply to either
// the RPC request or ping. As long it makes any reply, then the joiner's
// RPC will continue to wait and not time out the sponsor.
//
status = RpcMgmtSetComTimeout( bindingHandle, CLUSTER_EXTROCLUSTER_RPC_COM_TIMEOUT );
if (status != RPC_S_OK) {
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Unable to set ExtroCluster com timeout for sponsor %1!ws!, status %2!u!.\n",
sponsorName,
status
);
}
status = RpcEpResolveBinding(bindingHandle, ExtroCluster_v2_0_c_ifspec);
if (status != RPC_S_OK) {
if ( (status == RPC_S_SERVER_UNAVAILABLE) ||
(status == RPC_S_NOT_LISTENING) ||
(status == EPT_S_NOT_REGISTERED)
)
{
ClRtlLogPrint(LOG_NOISE,
"[JOIN] Sponsor %1!ws! is not available (ExtroCluster), status=%2!u!.\n",
sponsorName,
status
);
}
else {
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Unable to resolve ExtroCluster endpoint for sponsor %1!ws!, status %2!u!.\n",
sponsorName,
status
);
}
goto error_exit;
}
//
// establish a security context with this binding.
//
status = RpcBindingSetAuthInfoW(bindingHandle,
CsServiceDomainAccount,
RPC_C_AUTHN_LEVEL_CONNECT,
CsRPCSecurityPackage[ CsRPCSecurityPackageIndex ],
NULL,
RPC_C_AUTHZ_NAME);
if (status != RPC_S_OK) {
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Unable to set ExtroCluster AuthInfo using %1!ws! package, status %2!u!.\n",
CsRPCSecurityPackageName[ CsRPCSecurityPackageIndex ],
status);
goto error_exit;
}
error_exit:
EnterCriticalSection(&CsJoinLock);
if (status == RPC_S_OK) {
if (CsJoinSponsorBinding == NULL) {
//
// This is the first successful connection.
//
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] Selecting %1!ws! as join sponsor.\n",
sponsorName
);
CsJoinSponsorBinding = bindingHandle;
bindingHandle = NULL;
CsJoinSponsorName = sponsorName;
sponsorName = NULL;
SetEvent(CsJoinEvent);
}
else {
ClRtlLogPrint(LOG_NOISE,
"[JOIN] Closing connection to sponsor %1!ws!.\n",
sponsorName
);
}
}
if (--CsJoinThreadCount == 0) {
CloseHandle(CsJoinEvent);
DeleteCriticalSection(&CsJoinLock);
}
else if (CsJoinThreadCount == 1) {
SetEvent(CsJoinEvent);
LeaveCriticalSection(&CsJoinLock);
}
else
LeaveCriticalSection(&CsJoinLock);
if (bindingHandle != NULL) {
RpcBindingFree(&bindingHandle);
}
if (sponsorName != NULL) {
LocalFree(sponsorName);
}
LocalFree(context);
return(status);
} // JoinpConnectThread
DWORD
JoinpAttemptJoin(
LPWSTR SponsorName,
RPC_BINDING_HANDLE JoinMasterBinding
)
/*++
Routine Description:
Called to attempt to join a cluster that already exists.
Arguments:
SponsorName - The name (or IP address) of the target sponsor.
JoinMasterBinding - RPC binding to use to perform join.
Return Value:
ERROR_SUCCESS if successful
Win32 error code otherwise.
--*/
{
DWORD Status;
NET_API_STATUS netStatus;
LPTIME_OF_DAY_INFO tod = NULL;
SYSTEMTIME systemTime;
PNM_NETWORK network;
DWORD startseq, endseq;
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailNmJoinCluster) {
Status = 999999;
goto error_exit;
}
#endif
Status = NmJoinCluster(JoinMasterBinding);
if (Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_UNUSUAL,
"[JOIN] NmJoinCluster failed, status %1!u!.\n",
Status
);
goto error_exit;
}
//
// Synchronize the registry database
//
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailDmJoin) {
Status = 999999;
goto error_exit;
}
#endif
Status = DmJoin(JoinMasterBinding, &startseq);
if (Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] DmJoin failed, error %1!d!\n",
Status);
goto error_exit;
}
//
// Initialize the event handler, needs to register with gum for cluster wide
//events.
Status = EpInitPhase1();
if ( Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] EpInitPhase1 failed, Status = %1!u!\n",
Status);
return(Status);
}
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailApiInitPhase1) {
Status = 999999;
goto error_exit;
}
#endif
//
// Bring the API online in read-only mode. There is no join phase for
// the API. The API is required by FmOnline, which starts the
// resource monitor.
//
Status = ApiOnlineReadOnly();
if ( Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] ApiOnlineReadOnly failed, error = %1!u!\n",
Status);
goto error_exit;
}
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailFmJoinPhase1) {
Status = 999999;
goto error_exit;
}
#endif
//update status for scm
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
//
// Resynchronize the FM. We cannot enable the Groups until after the
// the API is fully operational. See below.
//
Status = FmJoinPhase1(&endseq);
if (Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] FmJoinPhase1 failed, error %1!d!\n",
Status);
goto error_exit;
}
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailDmUpdateJoinCluster) {
Status = 999999;
goto error_exit;
}
#endif
// Call the DM to hook the notifications for quorum resource and
//event handler
Status = DmUpdateJoinCluster();
if (Status != ERROR_SUCCESS)
{
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] DmUpdateJoin failed, error = %1!u!\n",
Status);
goto error_exit;
}
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailNmJoinComplete) {
Status = 999999;
goto error_exit;
}
#endif
//
// We are now fully online, call NM to globally change our state.
//
Status = NmJoinComplete(&endseq);
if (Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] NmJoinComplete failed, error %1!d!\n",
Status);
goto error_exit;
}
//perform the fixup for the AdminExt value on both Nt4 and Nt5 nodes.
Status=FmFixupAdminExt();
if (Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] FmFixupAdminExt failed, error %1!d!\n",
Status);
goto error_exit;
}
//perform the fixups after the registry is downloaded
//walk the list of fixups
Status = NmPerformFixups(NM_JOIN_FIXUP);
if (Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] NmPerformFixups failed, error %1!d!\n",
Status);
goto error_exit;
}
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailApiInitPhase2) {
Status = 999999;
goto error_exit;
}
#endif
//
// Finally enable the full API.
//
Status = ApiOnline();
if ( Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] ApiOnline failed, error = %1!u!\n",
Status);
goto error_exit;
}
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailFmJoinPhase2) {
Status = 999999;
goto error_exit;
}
#endif
//update status for scm
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
//
// Call back the Failover Manager to enable and move groups.
// The full registry is now available, so all groups/resources/resource
// types can be created (since they use the registry calls).
//
Status = FmJoinPhase2();
if (Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] FmJoinPhase2 failed, status %1!d!.\n",
Status);
goto error_exit;
}
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailEvInitialize) {
Status = 999999;
goto error_exit;
}
#endif
//
// Finish initializing the cluster wide event logging
//
// ASSUMPTION: this is called after the NM has established cluster
// membership.
//
if (!CsNoRepEvtLogging)
{
Status = EvOnline();
//if this fails, we still start the cluster service
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL,
"[JOIN] Error calling EvOnline, Status = %1!u!\n",
Status);
}
}
return(ERROR_SUCCESS);
error_exit:
ClRtlLogPrint(LOG_NOISE, "[INIT] Cleaning up failed join attempt.\n");
ClusterLeave();
return(Status);
}
BOOL
JoinpAddNodeCallback(
IN PVOID Context1,
IN PVOID Context2,
IN PVOID Object,
IN LPCWSTR Name
)
/*++
Routine Description:
Callback enumeration routine for adding a new node. This callback
figures out what node IDs are available.
Arguments:
Context1 - Supplies a pointer to an array of BOOLs. The node ID for
the enumerated node is set to FALSE.
Context2 - Not used.
Object - A pointer to the node object.
Name - The node name.
Return Value:
TRUE
--*/
{
PBOOL Avail;
DWORD Id;
Id = NmGetNodeId(Object);
CL_ASSERT(NmIsValidNodeId(Id));
Avail = (PBOOL)Context1;
Avail[Id] = FALSE;
return(TRUE);
}