/*++

Copyright (c) 1996  Microsoft Corporation

Module Name:

    join.c

Abstract:

    This module handles the initialization path where a newly booted
    node joins an existing cluster.

Author:

    John Vert (jvert) 6/6/1996

Revision History:

--*/
#include "initp.h"
#include "lmcons.h"
#include "lmremutl.h"
#include "lmapibuf.h"

#include <clusverp.h>


//
// Local types
//
typedef struct {
    LPWSTR   Name;
    LPWSTR   NetworkId;
} JOIN_SPONSOR_CONTEXT, *PJOIN_SPONSOR_CONTEXT;


//
// Local data
//
CRITICAL_SECTION    CsJoinLock;
HANDLE              CsJoinEvent = NULL;
DWORD               CsJoinThreadCount = 0;
DWORD               CsJoinStatus=ERROR_SUCCESS;
RPC_BINDING_HANDLE  CsJoinSponsorBinding = NULL;
LPWSTR              CsJoinSponsorName = NULL;


//
// Local function prototypes
//
VOID
JoinpEnumNodesAndJoinByAddress(
    IN HDMKEY  Key,
    IN PWSTR   NodeId,
    IN PVOID   Context
    );

VOID
JoinpEnumNodesAndJoinByHostName(
    IN HDMKEY  Key,
    IN PWSTR   NodeId,
    IN PVOID   Context
    );

VOID
JoinpConnectToSponsor(
    IN PWSTR   SponsorName
    );

DWORD WINAPI
JoinpConnectThread(
    LPVOID   Parameter
    );

DWORD
JoinpAttemptJoin(
    LPWSTR               SponsorName,
    RPC_BINDING_HANDLE   JoinMasterBinding
    );

BOOL
JoinpAddNodeCallback(
    IN PVOID Context1,
    IN PVOID Context2,
    IN PVOID Object,
    IN LPCWSTR Name
    );

BOOL
JoinpEnumNetworksToSetPriority(
    IN PVOID Context1,
    IN PVOID Context2,
    IN PVOID Object,
    IN LPCWSTR Name
    );


DWORD
ClusterJoin(
    VOID
    )
/*++

Routine Description:

    Called to attempt to join a cluster that already exists.

Arguments:

    None

Return Value:

    ERROR_SUCCESS if successful

    Win32 error code otherwise.

--*/

{
    DWORD Status;
    LPWSTR ClusterIpAddress = NULL;
    LPWSTR ClusIpAddrResource = NULL;
    LPWSTR ClusterNameId = NULL;
    DWORD idMaxSize = 0;
    DWORD idSize = 0;
    HDMKEY hClusNameResKey = NULL;
    HDMKEY hClusIPAddrResKey = NULL;

    //
    // Try connecting using the cluster IP address first. get the cluster
    // name resource, looking up its dependency for the cluster IP addr
    //

    Status = DmQuerySz(DmClusterParametersKey,
                       CLUSREG_NAME_CLUS_CLUSTER_NAME_RES,
                       &ClusterNameId,
                       &idMaxSize,
                       &idSize);

    if (Status != ERROR_SUCCESS) {
        ClRtlLogPrint(LOG_CRITICAL,
                   "[JOIN] failed to get cluster name resource, error %1!u!.\n",
                   Status);
        goto error_exit;
    }

    //
    // open name resource key and read its DependsOn key
    //

    hClusNameResKey = DmOpenKey( DmResourcesKey, ClusterNameId, KEY_READ );

    if ( hClusNameResKey == NULL ) {

        Status = GetLastError();
        ClRtlLogPrint(LOG_CRITICAL,
                   "[JOIN] failed to open Cluster Name resource key, error %1!u!.\n",
                   Status);
        goto error_exit;
    }

    //
    // allocate enough space for the GUID and the Parameters string
    //

    idMaxSize = ( CS_NETWORK_ID_LENGTH + sizeof( CLUSREG_KEYNAME_PARAMETERS ) + 2)
        * sizeof(WCHAR);
    ClusIpAddrResource = LocalAlloc( LMEM_FIXED, idMaxSize );

    if ( ClusIpAddrResource == NULL ) {

        Status = ERROR_NOT_ENOUGH_MEMORY;

        ClRtlLogPrint(LOG_CRITICAL,
                   "[JOIN] no memory for Cluster Ip address resource ID!\n");
        goto error_exit;
    }

    Status = DmQueryMultiSz(hClusNameResKey,
                            CLUSREG_NAME_RES_DEPENDS_ON,
                            &ClusIpAddrResource,
                            &idMaxSize,
                            &idSize);

    if ( Status != ERROR_SUCCESS ) {

        ClRtlLogPrint(LOG_CRITICAL,
                   "[JOIN] failed to get Cluster Ip address resource ID, error %1!u!.\n",
                   Status);
        goto error_exit;
    }

    lstrcatW( ClusIpAddrResource, L"\\" );
    lstrcatW( ClusIpAddrResource, CLUSREG_KEYNAME_PARAMETERS );
    hClusIPAddrResKey = DmOpenKey( DmResourcesKey, ClusIpAddrResource, KEY_READ );

    if ( hClusIPAddrResKey == NULL ) {

        Status = GetLastError();
        ClRtlLogPrint(LOG_CRITICAL,
                   "[JOIN] failed to open Cluster IP Address resource key, error %1!u!.\n",
                   Status);
        goto error_exit;
    }

    //
    // get the IP Address; note that these value names are not defined
    // in a global way. if they are changed, this code will break
    //

    idMaxSize = idSize = 0;
    Status = DmQuerySz(hClusIPAddrResKey,
                       L"Address",
                       &ClusterIpAddress,
                       &idMaxSize,
                       &idSize);

    if ( Status != ERROR_SUCCESS ) {

        ClRtlLogPrint(LOG_CRITICAL,
                   "[JOIN] failed to get Cluster Ip address, error %1!u!.\n",
                   Status);
        goto error_exit;
    }

    //
    // Spawn threads to find a sponsor. We will try the make connections using
    // the cluster IP address, the IP address of each node on each network, and
    // the name of each node in the cluster. The connects will proceed in
    // parallel. We'll use the first one that succeeds.
    //
    CsJoinEvent = CreateEvent(NULL, TRUE, FALSE, NULL);

    if (CsJoinEvent == NULL) {
        Status = GetLastError();
        ClRtlLogPrint(LOG_CRITICAL, 
            "[JOIN] failed to create join event, error %1!u!.\n",
            Status
            );
        goto error_exit;
    }

    CsJoinThreadCount = 1;
    InitializeCriticalSection(&CsJoinLock);
    EnterCriticalSection(&CsJoinLock);

    DmEnumKeys(DmNetInterfacesKey, JoinpEnumNodesAndJoinByAddress, NULL);

    DmEnumKeys(DmNodesKey, JoinpEnumNodesAndJoinByHostName, NULL);

    //
    // give the other threads a chance to start since using the cluster IP
    // address to join with is problematic when the resource moves in the
    // middle of a join
    //
    Sleep( 1000 );
    JoinpConnectToSponsor(ClusterIpAddress);

    //update status for scm
    CsServiceStatus.dwCheckPoint++;
    CsAnnounceServiceStatus();


    if(CsJoinThreadCount == 1)
        SetEvent(CsJoinEvent);

    LeaveCriticalSection(&CsJoinLock);

    Status = WaitForSingleObject(CsJoinEvent, INFINITE);
    CL_ASSERT(Status == WAIT_OBJECT_0);


    EnterCriticalSection(&CsJoinLock);
    ClRtlLogPrint(LOG_NOISE, 
        "[JOIN] Got out of the join wait, CsJoinThreadCount = %1!u!.\n",
        CsJoinThreadCount
        );

    if(--CsJoinThreadCount == 0) {
        CloseHandle(CsJoinEvent);
        DeleteCriticalSection(&CsJoinLock);
    }
    else
        LeaveCriticalSection(&CsJoinLock);

    //
    // All of the threads have failed or one of them made a connection,
    // use it to join.
    //
    if (CsJoinSponsorBinding != NULL) {
        CL_ASSERT(CsJoinSponsorName != NULL);

        ClRtlLogPrint(LOG_UNUSUAL, 
            "[JOIN] Attempting join with sponsor %1!ws!.\n",
            CsJoinSponsorName
            );

        //
        //  Chittur Subbaraman (chitturs) - 10/27/98
        //
        //  If the database restore operation is requested, then
        //  refuse to join the cluster and return an error code.
        //
        if ( CsDatabaseRestore == TRUE ) {
            Status = ERROR_CLUSTER_NODE_UP;
            LocalFree(CsJoinSponsorName);
            goto error_exit;
        }


        Status = JoinpAttemptJoin(CsJoinSponsorName, CsJoinSponsorBinding);

        RpcBindingFree(&CsJoinSponsorBinding);
        LocalFree(CsJoinSponsorName);
    }
    else {
        Status = ERROR_BAD_NETPATH;
        ClRtlLogPrint(LOG_CRITICAL, 
            "[JOIN] Unable to connect to any sponsor node.\n"
            );

        //
        // rajdas: If the join did not suceed due to version mismatch we shouldn't try to form a cluster.
        // Bug ID: 152229
        //
        if(CsJoinStatus == ERROR_CLUSTER_INCOMPATIBLE_VERSIONS)
            bFormCluster = FALSE;
    }


error_exit:
    if ( ClusterNameId ) {
        LocalFree( ClusterNameId );
    }

    if ( ClusterIpAddress ) {
        LocalFree( ClusterIpAddress );
    }

    if ( ClusIpAddrResource ) {
        LocalFree( ClusIpAddrResource );
    }

    if ( hClusNameResKey ) {
        DmCloseKey( hClusNameResKey );
    }

    if ( hClusIPAddrResKey ) {
        DmCloseKey( hClusIPAddrResKey );
    }

    return(Status);
}


VOID
JoinpEnumNodesAndJoinByAddress(
    IN HDMKEY  Key,
    IN PWSTR   NetInterfaceId,
    IN PVOID   Context
    )

/*++

Routine Description:

    Attempts to establish an RPC connection to a specified
    node using its IP address

Arguments:

    Key - pointer to the node key handle

    NetInterfaceId - pointer to string representing net IF ID (guid)

    Context - pointer to a location to return the final status

Return Value:

    None

--*/

{
    DWORD       status;
    LPWSTR      NetIFNodeID = NULL;
    LPWSTR      NetIFIpAddress = NULL;
    DWORD       idMaxSize = 0;
    DWORD       idSize = 0;


    //
    // get the NodeId Value from the NetIF key and if it's us,
    // skip this netIF
    //

    status = DmQuerySz(Key,
                       CLUSREG_NAME_NETIFACE_NODE,
                       &NetIFNodeID,
                       &idMaxSize,
                       &idSize);

    if ( status == ERROR_SUCCESS ) {

        if (lstrcmpiW(NetIFNodeID, NmLocalNodeIdString) != 0) {

            //
            // it's not us so get the address and try it...
            //

            idMaxSize = idSize = 0;
            status = DmQuerySz(Key,
                               CLUSREG_NAME_NETIFACE_ADDRESS,
                               &NetIFIpAddress,
                               &idMaxSize,
                               &idSize);

            if ( status != ERROR_SUCCESS ) {

                ClRtlLogPrint(LOG_CRITICAL,
                    "[JOIN] failed to get NetInterface Address, error %1!u!.\n",
                     status);
                goto error_exit;
            }

            //
            // attempt the join with this address
            //
            JoinpConnectToSponsor(NetIFIpAddress);
        }
    }
    else {
        ClRtlLogPrint(LOG_CRITICAL,
            "[JOIN] failed to get NetInterface Node ID, error %1!u!.\n",
             status);
    }

error_exit:
    DmCloseKey(Key);

    if ( NetIFNodeID ) {
        LocalFree( NetIFNodeID );
    }

    if ( NetIFIpAddress ) {
        LocalFree( NetIFIpAddress );
    }

    return;
}


VOID
JoinpEnumNodesAndJoinByHostName(
    IN HDMKEY  Key,
    IN PWSTR   NodeId,
    IN PVOID   Context
    )

/*++

Routine Description:

    Attempts to establish an RPC connection to a specified node using
    its host name

Arguments:

    Key - pointer to the node key handle

    NodeId - pointer to string representing node ID (number)

    Context - pointer to a location to return the final status

Return Value:

    None

--*/

{
    DWORD       status;
    LPWSTR      nodeName=NULL;
    DWORD       nodeNameLen=0;
    DWORD       nodeNameSize=0;

    //
    // Try to connect if this is not us
    //
    if (lstrcmpiW(NodeId, NmLocalNodeIdString) != 0) {

        status = DmQuerySz(Key,
                           CLUSREG_NAME_NODE_NAME,
                           &nodeName,
                           &nodeNameLen,
                           &nodeNameSize);

        if (status == ERROR_SUCCESS) {

            JoinpConnectToSponsor(nodeName);
            LocalFree(nodeName);
        }
    }

    DmCloseKey(Key);

    return;
}


VOID
JoinpConnectToSponsor(
    IN PWSTR   SponsorName
    )
/*++

Routine Description:

    Attempts to establish an RPC connection to a specified node.

Arguments:

    SponsorName - The name (or IP address) of the target sponsor.

Return Value:

    ERROR_SUCCESS if an RPC connection is successfully made to the sponsor.
    An RPC error code otherwise.

--*/

{
    HANDLE                  threadHandle;
    DWORD                   status = ERROR_SUCCESS;
    DWORD                   threadId;
    LPWSTR                  name;
    BOOL                    setEvent = FALSE;


    ClRtlLogPrint(LOG_UNUSUAL, 
       "[JOIN] Spawning thread to connect to sponsor %1!ws!\n",
        SponsorName
        );

    name = LocalAlloc( LMEM_FIXED, (lstrlenW(SponsorName) + 1 ) * sizeof(WCHAR) );

    if (name != NULL) {
        lstrcpyW(name, SponsorName);

        CsJoinThreadCount++;

        threadHandle = CreateThread(
                           NULL,
                           0,
                           JoinpConnectThread,
                           name,
                           0,
                           &threadId
                           );

        if (threadHandle != NULL) {
            CloseHandle(threadHandle);
        }
        else {
            status = GetLastError();
            ClRtlLogPrint(LOG_CRITICAL, 
                "[JOIN] Failed to spawn connect thread, error %1!u!.\n",
                status
                );

            --CsJoinThreadCount;
        }
    }
    else {
        ClRtlLogPrint(LOG_CRITICAL, 
            "[JOIN] Failed to allocate memory.\n"
            );
    }

    return;

}  // JoinpConnectToSponsor


DWORD WINAPI
VerifyJoinVersionData(
    LPWSTR  sponsorName
    )

/*++

Routine Description:

    Verify that the sponsor and the joiner are compatible

Arguments:

    sponsorName - pointer to text string of sponsor to use

Return Value:

    ERROR_SUCCESS - if ok to continue join

--*/

{
    DWORD                   status;
    LPWSTR                  bindingString = NULL;
    RPC_BINDING_HANDLE      bindingHandle = NULL;
    DWORD                   SponsorNodeId;
    DWORD                   ClusterHighestVersion;
    DWORD                   ClusterLowestVersion;
    DWORD                   JoinStatus;
    DWORD                   packageIndex;

    //
    // Attempt to connect to the sponsor's JoinVersion RPC interface.
    //
    status = RpcStringBindingComposeW(
                 L"6e17aaa0-1a47-11d1-98bd-0000f875292e",
                 L"ncadg_ip_udp",
                 sponsorName,
                 NULL,
                 NULL,
                 &bindingString);

    if (status != RPC_S_OK) {
        ClRtlLogPrint(LOG_UNUSUAL, 
            "[JOIN] Unable to compose JoinVersion string binding for sponsor %1!ws!, status %2!u!.\n",
            sponsorName,
            status
            );
        goto error_exit;
    }

    status = RpcBindingFromStringBindingW(bindingString, &bindingHandle);

    RpcStringFreeW(&bindingString);

    if (status != RPC_S_OK) {
        ClRtlLogPrint(LOG_UNUSUAL, 
            "[JOIN] Unable to build JoinVersion binding for sponsor %1!ws!, status %2!u!.\n",
            sponsorName,
            status
            );
        goto error_exit;
    }

    //
    // under load, the sponsor might take a while to respond back to the
    // joiner. The default timeout is at 30 secs and this seems to work
    // ok. Note that this means the sponsor has 30 secs to reply to either
    // the RPC request or ping. As long it makes any reply, then the joiner's
    // RPC will continue to wait and not time out the sponsor.
    //

    status = RpcMgmtSetComTimeout( bindingHandle, CLUSTER_JOINVERSION_RPC_COM_TIMEOUT );

    if (status != RPC_S_OK) {
        ClRtlLogPrint(LOG_UNUSUAL, 
            "[JOIN] Unable to set JoinVersion com timeout for sponsor %1!ws!, status %2!u!.\n",
            sponsorName,
            status
            );
    }

    status = RpcEpResolveBinding(bindingHandle, JoinVersion_v2_0_c_ifspec);

    if (status != RPC_S_OK) {
        if ( (status == RPC_S_SERVER_UNAVAILABLE) ||
             (status == RPC_S_NOT_LISTENING) ||
             (status == EPT_S_NOT_REGISTERED)
           )
        {
            ClRtlLogPrint(LOG_NOISE, 
                "[JOIN] Sponsor %1!ws! is not available (JoinVersion), status=%2!u!.\n",
                sponsorName,
                status
                );
        }
        else {
            ClRtlLogPrint(LOG_UNUSUAL, 
                "[JOIN] Unable to resolve JoinVersion endpoint for sponsor %1!ws!, status %2!u!.\n",
                sponsorName,
                status
                );
        }
        goto error_exit;
    }

    if ( CsUseAuthenticatedRPC ) {
        //
        // run through the list of RPC security packages, trying to establish
        // a security context with this binding.
        //

        for (packageIndex = 0;
             packageIndex < CsNumberOfRPCSecurityPackages;
             ++packageIndex )
        {
            status = RpcBindingSetAuthInfoW(bindingHandle,
                                            CsServiceDomainAccount,
                                            RPC_C_AUTHN_LEVEL_CONNECT,
                                            CsRPCSecurityPackage[ packageIndex ],
                                            NULL,
                                            RPC_C_AUTHZ_NAME);

            if (status != RPC_S_OK) {
                ClRtlLogPrint(LOG_UNUSUAL,
                           "[JOIN] Unable to set JoinVersion AuthInfo using %1!ws! package, status %2!u!.\n",
                            CsRPCSecurityPackageName[packageIndex],
                            status);
                continue;
            }

            status = CsRpcGetJoinVersionData(bindingHandle,
                                             NmLocalNodeId,
                                             CsMyHighestVersion,
                                             CsMyLowestVersion,
                                             &SponsorNodeId,
                                             &ClusterHighestVersion,
                                             &ClusterLowestVersion,
                                             &JoinStatus);

            if ( status == RPC_S_OK ) {
                break;
            } else {
                ClRtlLogPrint(LOG_UNUSUAL,
                           "[JOIN] Unable to get join version data from sponsor %1!ws! using %2!ws! package, status %3!u!.\n",
                            sponsorName,
                            CsRPCSecurityPackageName[packageIndex],
                            status);
            }
        }
    } else {

        //
        // get the version data from the sponsor and determine if we
        // should continue to join
        //

        status = CsRpcGetJoinVersionData(bindingHandle,
                                         NmLocalNodeId,
                                         CsMyHighestVersion,
                                         CsMyLowestVersion,
                                         &SponsorNodeId,
                                         &ClusterHighestVersion,
                                         &ClusterLowestVersion,
                                         &JoinStatus);

        if ( status != RPC_S_OK ) {
            ClRtlLogPrint(LOG_UNUSUAL,
                       "[JOIN] Unable to get join version data from sponsor %1!ws!, status %2!u!.\n",
                        sponsorName,
                        status);
        }
    }

    //
    // jump out now if nothing work (as in the case of a form)
    //
    if ( status != ERROR_SUCCESS ) {
        goto error_exit;
    }

    //
    // use the join lock to set the RPC package index
    //
    EnterCriticalSection( &CsJoinLock );

    if ( CsRPCSecurityPackageIndex < 0 ) {
        CsRPCSecurityPackageIndex = packageIndex;
    }

    LeaveCriticalSection( &CsJoinLock );

    //
    // check the sponsor was in agreement with the join
    //
    if ( JoinStatus != ERROR_SUCCESS ) {
        ClRtlLogPrint(LOG_UNUSUAL, 
            "[JOIN]  Sponsor %1!ws! has discontinued join, status %2!u!.\n",
            sponsorName,
            JoinStatus);
        if (JoinStatus == ERROR_CLUSTER_INCOMPATIBLE_VERSIONS)
        {
            ClRtlLogPrint(LOG_CRITICAL, 
                "[JOIN] Join version data from sponsor %1!ws! doesn't match: JH: 0x%2!08X! JL: 0x%3!08X! SH: 0x%4!08X! SL: 0x%5!08X!.\n",
                sponsorName,
                CsMyHighestVersion,
                CsMyLowestVersion,
                ClusterHighestVersion,
                ClusterLowestVersion);
            //
            // rajdas: In this case I have managed to contact a sponsor, but there is a version mismatch. If all the join
            // threads meet the same fate, clussvc should not try to form a cluster.
            // BUG ID: 152229
            //
            CsJoinStatus = ERROR_CLUSTER_INCOMPATIBLE_VERSIONS;
                
        }
        goto error_exit;
    }

    // SS: we will leave this check because win2K clusters didnt do the 
    // server side check, so the client must continue to do it
    //
    // now check that it is ok to join. We want this node to run
    // at the highest level of compatibility possible. One of the
    // following conditions must be true:
    //
    // 1) the High versions match exactly (major and build number)
    // 2) our Highest matches the sponsor's Lowest exactly, downgrading
    //    the sponsor to our level of compatibility
    // 3) our Lowest matches the sponsor's Highest, downgrading ourselves
    //    to the sponsor's level of compatibility
    //
    // note that the minor (build) version must match as well. The previous
    // version numbers are "well known" and shouldn't change when a newer
    // version is available/implemented.
    //

    if ( CsMyHighestVersion == ClusterHighestVersion ||
         CsMyHighestVersion == ClusterLowestVersion  ||
         CsMyLowestVersion == ClusterHighestVersion
#if 1 // CLUSTER_BETA
         || CsNoVersionCheck
#endif
         )
    {
        status = ERROR_SUCCESS;

    } else {

        ClRtlLogPrint(LOG_CRITICAL, 
            "[JOIN] Join version data from sponsor %1!ws! doesn't match: JH: 0x%2!08X! JL: 0x%3!08X! SH: 0x%4!08X! SL: 0x%5!08X!.\n",
            sponsorName,
            CsMyHighestVersion,
            CsMyLowestVersion,
            ClusterHighestVersion,
            ClusterLowestVersion);

        status = ERROR_CLUSTER_INCOMPATIBLE_VERSIONS;

        //
        // rajdas: In this case I have managed to contact a sponsor, but there is a version mismatch. If all the join
        // threads meet the same fate, clussvc should not try to form a cluster.
        // BUG ID: 152229
        //
        CsJoinStatus = ERROR_CLUSTER_INCOMPATIBLE_VERSIONS;
    }

error_exit:
    if (bindingHandle != NULL) {
        RpcBindingFree(&bindingHandle);
    }

    return status;
}

DWORD WINAPI
JoinpConnectThread(
    LPVOID   Parameter
    )
{
    LPWSTR                  sponsorName = Parameter;
    DWORD                   status;
    LPWSTR                  bindingString = NULL;
    RPC_BINDING_HANDLE      bindingHandle = NULL;
    BOOL                    setEvent = FALSE;

    //
    // Try to connect to the specified node.
    //
    ClRtlLogPrint(LOG_UNUSUAL, 
       "[JOIN] Asking %1!ws! to sponsor us.\n",
        sponsorName
        );

    //
    // connect to the JoinVersion interface first to see if we should progress
    // any further. since this is the first RPC call to the other node, we can
    // determine which security package should be used for the other interfaces.
    //

    status = VerifyJoinVersionData( sponsorName );

    if (status != ERROR_SUCCESS) {
        ClRtlLogPrint(LOG_UNUSUAL, 
            "[JOIN] JoinVersion data for sponsor %1!ws! is invalid, status %2!u!.\n",
            sponsorName,
            status
            );
        goto error_exit;
    }

    //
    // Attempt to connect to the sponsor's extrocluster (join) RPC interface.
    //
    status = RpcStringBindingComposeW(
                 L"ffe561b8-bf15-11cf-8c5e-08002bb49649",
                 L"ncadg_ip_udp",
                 sponsorName,
                 NULL,
                 NULL,
                 &bindingString);

    if (status != RPC_S_OK) {
        ClRtlLogPrint(LOG_UNUSUAL, 
            "[JOIN] Unable to compose ExtroCluster string binding for sponsor %1!ws!, status %2!u!.\n",
            sponsorName,
            status
            );
        goto error_exit;
    }

    status = RpcBindingFromStringBindingW(bindingString, &bindingHandle);

    RpcStringFreeW(&bindingString);

    if (status != RPC_S_OK) {
        ClRtlLogPrint(LOG_UNUSUAL, 
            "[JOIN] Unable to build ExtroCluster binding for sponsor %1!ws!, status %2!u!.\n",
            sponsorName,
            status
            );
        goto error_exit;
    }

    //
    // under load, the sponsor might take a while to respond back to the
    // joiner. The default timeout is at 30 secs and this seems to work
    // ok. Note that this means the sponsor has 30 secs to reply to either
    // the RPC request or ping. As long it makes any reply, then the joiner's
    // RPC will continue to wait and not time out the sponsor.
    //

    status = RpcMgmtSetComTimeout( bindingHandle, CLUSTER_EXTROCLUSTER_RPC_COM_TIMEOUT );

    if (status != RPC_S_OK) {
        ClRtlLogPrint(LOG_UNUSUAL, 
            "[JOIN] Unable to set ExtroCluster com timeout for sponsor %1!ws!, status %2!u!.\n",
            sponsorName,
            status
            );
    }

    status = RpcEpResolveBinding(bindingHandle, ExtroCluster_v2_0_c_ifspec);

    if (status != RPC_S_OK) {
        if ( (status == RPC_S_SERVER_UNAVAILABLE) ||
             (status == RPC_S_NOT_LISTENING) ||
             (status == EPT_S_NOT_REGISTERED)
           )
        {
            ClRtlLogPrint(LOG_NOISE, 
                "[JOIN] Sponsor %1!ws! is not available (ExtroCluster), status=%2!u!.\n",
                sponsorName,
                status
                );
        }
        else {
            ClRtlLogPrint(LOG_UNUSUAL, 
                "[JOIN] Unable to resolve ExtroCluster endpoint for sponsor %1!ws!, status %2!u!.\n",
                sponsorName,
                status
                );
        }
        goto error_exit;
    }

    if ( CsUseAuthenticatedRPC ) {

        //
        // establish a security context with this binding.
        //
        status = RpcBindingSetAuthInfoW(bindingHandle,
                                        CsServiceDomainAccount,
                                        RPC_C_AUTHN_LEVEL_CONNECT,
                                        CsRPCSecurityPackage[ CsRPCSecurityPackageIndex ],
                                        NULL,
                                        RPC_C_AUTHZ_NAME);

        if (status != RPC_S_OK) {
            ClRtlLogPrint(LOG_UNUSUAL,
                       "[JOIN] Unable to set ExtroCluster AuthInfo using %1!ws! package, status %2!u!.\n",
                        CsRPCSecurityPackageName[ CsRPCSecurityPackageIndex ],
                        status);

            goto error_exit;
        }
    }

error_exit:

    EnterCriticalSection(&CsJoinLock);

    if (status == RPC_S_OK) {
        if (CsJoinSponsorBinding == NULL) {
            //
            // This is the first successful connection.
            //
            ClRtlLogPrint(LOG_UNUSUAL, 
                "[JOIN] Selecting %1!ws! as join sponsor.\n",
                sponsorName
                );

            CsJoinSponsorBinding = bindingHandle;
            bindingHandle = NULL;
            CsJoinSponsorName = sponsorName;
            sponsorName = NULL;
            SetEvent(CsJoinEvent);
        }
        else {
            ClRtlLogPrint(LOG_NOISE, 
                "[JOIN] Closing connection to sponsor %1!ws!.\n",
                sponsorName
                );
        }
    }

    if (--CsJoinThreadCount == 0) {
        CloseHandle(CsJoinEvent);
        DeleteCriticalSection(&CsJoinLock);
    }
    else if (CsJoinThreadCount == 1) {
        SetEvent(CsJoinEvent);
        LeaveCriticalSection(&CsJoinLock);
    }
    else
        LeaveCriticalSection(&CsJoinLock);

    if (bindingHandle != NULL) {
        RpcBindingFree(&bindingHandle);
    }

    if (sponsorName != NULL) {
        LocalFree(sponsorName);
    }

    return(status);

}  // JoinpConnectThread



DWORD
JoinpAttemptJoin(
    LPWSTR               SponsorName,
    RPC_BINDING_HANDLE   JoinMasterBinding
    )
/*++

Routine Description:

    Called to attempt to join a cluster that already exists.

Arguments:

    SponsorName - The name (or IP address) of the target sponsor.

    JoinMasterBinding - RPC binding to use to perform join.

Return Value:

    ERROR_SUCCESS if successful

    Win32 error code otherwise.

--*/

{
    DWORD Status;
    NET_API_STATUS netStatus;
    LPTIME_OF_DAY_INFO tod = NULL;
    SYSTEMTIME systemTime;
    PNM_NETWORK network;
    DWORD startseq, endseq;


#ifdef CLUSTER_TESTPOINT
    TESTPT(TpFailNmJoinCluster) {
        Status = 999999;
        goto error_exit;
    }
#endif

    Status = NmJoinCluster(JoinMasterBinding);

    if (Status != ERROR_SUCCESS) {
        ClRtlLogPrint(LOG_UNUSUAL,
                   "[JOIN] NmJoinCluster failed, status %1!u!.\n",
                   Status
                   );
        goto error_exit;
    }

    //
    // Synchronize the registry database
    //
#ifdef CLUSTER_TESTPOINT
    TESTPT(TpFailDmJoin) {
        Status = 999999;
        goto error_exit;
    }
#endif

    Status = DmJoin(JoinMasterBinding, &startseq);

    if (Status != ERROR_SUCCESS) {
        ClRtlLogPrint(LOG_CRITICAL,
                   "[JOIN] DmJoin failed, error %1!d!\n",
                   Status);
        goto error_exit;
    }



    //
    // Initialize the event handler, needs to register with gum for cluster wide
    //events.
    Status = EpInitPhase1();
    if ( Status != ERROR_SUCCESS) {
        ClRtlLogPrint(LOG_CRITICAL,
                   "[JOIN] EpInitPhase1 failed, Status = %1!u!\n",
                   Status);
        return(Status);
    }

#ifdef CLUSTER_TESTPOINT
    TESTPT(TpFailApiInitPhase1) {
        Status = 999999;
        goto error_exit;
    }
#endif

    //
    // Bring the API online in read-only mode. There is no join phase for
    // the API. The API is required by FmOnline, which starts the
    // resource monitor.
    //
    Status = ApiOnlineReadOnly();
    if ( Status != ERROR_SUCCESS) {
        ClRtlLogPrint(LOG_CRITICAL,
            "[JOIN] ApiOnlineReadOnly failed, error = %1!u!\n",
            Status);
        goto error_exit;
    }

#ifdef CLUSTER_TESTPOINT
    TESTPT(TpFailFmJoinPhase1) {
        Status = 999999;
        goto error_exit;
    }
#endif

    //update status for scm
    CsServiceStatus.dwCheckPoint++;
    CsAnnounceServiceStatus();

    //
    // Resynchronize the FM. We cannot enable the Groups until after the
    // the API is fully operational. See below.
    //
    Status = FmJoinPhase1();
    if (Status != ERROR_SUCCESS) {
        ClRtlLogPrint(LOG_CRITICAL,
                   "[JOIN] FmJoinPhase1 failed, error %1!d!\n",
                   Status);
        goto error_exit;
    }

#ifdef CLUSTER_TESTPOINT
    TESTPT(TpFailDmUpdateJoinCluster) {
        Status = 999999;
        goto error_exit;
    }
#endif

    // Call the DM to hook the notifications for quorum resource and
    //event handler
    Status = DmUpdateJoinCluster();
    if (Status != ERROR_SUCCESS)
    {
            ClRtlLogPrint(LOG_CRITICAL,
            "[JOIN] DmUpdateJoin failed, error = %1!u!\n",
            Status);
            goto error_exit;
    }



#ifdef CLUSTER_TESTPOINT
    TESTPT(TpFailNmJoinComplete) {
        Status = 999999;
        goto error_exit;
    }
#endif

    //
    // We are now fully online, call NM to globally change our state.
    //
    Status = NmJoinComplete(&endseq);
    if (Status != ERROR_SUCCESS) {
        ClRtlLogPrint(LOG_CRITICAL,
                   "[JOIN] NmJoinComplete failed, error %1!d!\n",
                   Status);
        goto error_exit;
    }

#if 0
//
// This check is flawed. Network state updates can occur during
// the join process, causing this check to fail unnecessarily.
//
    if (startseq + GUM_UPDATE_JOINSEQUENCE != endseq) {
        ClRtlLogPrint(LOG_CRITICAL,
                   "[JOIN] Sequence mismatch, start %1!d! end %2!d!\n",
                   startseq, endseq);
	Status = ERROR_CLUSTER_DATABASE_SEQMISMATCH;
        goto error_exit;
    }
#endif // 0

    //perform the fixup for the AdminExt value on both Nt4 and Nt5 nodes.
    Status=FmFixupAdminExt();
    if (Status != ERROR_SUCCESS) {
        ClRtlLogPrint(LOG_CRITICAL,
                   "[JOIN] FmFixupAdminExt failed, error %1!d!\n",
                   Status);
        goto error_exit;
    }


    //perform the fixups after the registry is downloaded
    //walk the list of fixups
    Status = NmPerformFixups(NM_JOIN_FIXUP);
    if (Status != ERROR_SUCCESS) {
        ClRtlLogPrint(LOG_CRITICAL,
                   "[JOIN] NmPerformFixups failed, error %1!d!\n",
                   Status);
        goto error_exit;
    }

#ifdef CLUSTER_TESTPOINT
    TESTPT(TpFailApiInitPhase2) {
        Status = 999999;
        goto error_exit;
    }
#endif



    //
    // Finally enable the full API.
    //
    Status = ApiOnline();
    if ( Status != ERROR_SUCCESS) {
        ClRtlLogPrint(LOG_CRITICAL,
            "[JOIN] ApiOnline failed, error = %1!u!\n",
            Status);
        goto error_exit;
    }

#ifdef CLUSTER_TESTPOINT
    TESTPT(TpFailFmJoinPhase2) {
        Status = 999999;
        goto error_exit;
    }
#endif

    //update status for scm
    CsServiceStatus.dwCheckPoint++;
    CsAnnounceServiceStatus();

    //
    // Call back the Failover Manager to enable and move groups.
    // The full registry is now available, so all groups/resources/resource
    // types can be created (since they use the registry calls).
    //
    Status = FmJoinPhase2();
    if (Status != ERROR_SUCCESS) {
        ClRtlLogPrint(LOG_CRITICAL,
                   "[JOIN] FmJoinPhase2 failed, status %1!d!.\n",
                   Status);
        goto error_exit;
    }


#ifdef CLUSTER_TESTPOINT
    TESTPT(TpFailEvInitialize) {
        Status = 999999;
        goto error_exit;
    }
#endif
    //
    // Finish initializing the cluster wide event logging
    //
    // ASSUMPTION: this is called after the NM has established cluster
    // membership.
    //
    if (!CsNoRepEvtLogging)
    {
        Status = EvOnline();
            //if this fails, we still start the cluster service
        if ( Status != ERROR_SUCCESS ) {
            ClRtlLogPrint(LOG_CRITICAL,
                "[JOIN] Error calling EvOnline, Status = %1!u!\n",
                Status);
        }
    }

    return(ERROR_SUCCESS);


error_exit:

    ClRtlLogPrint(LOG_NOISE, "[INIT] Cleaning up failed join attempt.\n");

    ClusterLeave();

    return(Status);

}





BOOL
JoinpAddNodeCallback(
    IN PVOID Context1,
    IN PVOID Context2,
    IN PVOID Object,
    IN LPCWSTR Name
    )
/*++

Routine Description:

    Callback enumeration routine for adding a new node. This callback
    figures out what node IDs are available.

Arguments:

    Context1 - Supplies a pointer to an array of BOOLs. The node ID for
        the enumerated node is set to FALSE.

    Context2 - Not used.

    Object - A pointer to the node object.

    Name - The node name.

Return Value:

     TRUE

--*/

{
    PBOOL Avail;
    DWORD Id;

    Id = NmGetNodeId(Object);
    CL_ASSERT(NmIsValidNodeId(Id));

    Avail = (PBOOL)Context1;

    Avail[Id] = FALSE;


    return(TRUE);
}