windows-server-2003/base/cluster/clusnet/xport/chbeat.c

/*++

Copyright (c) 1996  Microsoft Corporation

Module Name:

    chbeat.c

Abstract:

    membership state heart beat code. Tracks node availability through
    exchanging heart beat messages with nodes that are marked as alive.

Author:

    Charlie Wickham (charlwi) 05-Mar-1997

Environment:

    Kernel Mode

Revision History:

--*/

#include "precomp.h"
#pragma hdrstop
#include "chbeat.tmh"

#include "clusvmsg.h"
#include "stdio.h"

/* External */

/* Static */

//
// heart beat structures - heart beats are driven by a timer and DPC
// routine. In order to synchronize the shutdown of the DPC, we also need two
// flags, an event and a spin lock.
//

KTIMER HeartBeatTimer;
KDPC HeartBeatDpc;
KEVENT HeartBeatDpcFinished;
BOOLEAN HeartBeatEnabled = FALSE;
BOOLEAN HeartBeatDpcRunning = FALSE;
CN_LOCK HeartBeatLock;

#if 0

Heart Beating Explained

ClockTicks are incremented every HEART_BEAT_PERIOD millisecs. SendTicks are the
number of ticks that go by before sending HBs.

The check for received HB msgs is done in the tick just before HB msgs are
sent. Interface Lost HB ticks are in terms of heart beat check periods and
therefore are incremented only during the check period. An interface is failed
when the number of Interface Lost HB ticks have passed and no HB message has
been received on that interface.

Likewise, Node Lost HB Ticks are in terms of heart beat check periods and are
incremented during the check period. After all interfaces have failed on a
node, Node Lost HB ticks must pass without an interface going back online
before a node down event is issued.  Note that a node's comm state is set to
offline when all interfaces have failed.

#endif

#define CLUSNET_HEART_BEAT_SEND_TICKS           2       // every 1.2 secs
#define CLUSNET_INTERFACE_LOST_HEART_BEAT_TICKS 3       // after 3 secs
#define CLUSNET_NODE_LOST_HEART_BEAT_TICKS      6       // after 6.6 secs

ULONG HeartBeatClockTicks;
ULONG HeartBeatSendTicks = CLUSNET_HEART_BEAT_SEND_TICKS;
ULONG HBInterfaceLostHBTicks = CLUSNET_INTERFACE_LOST_HEART_BEAT_TICKS;
ULONG HBNodeLostHBTicks = CLUSNET_NODE_LOST_HEART_BEAT_TICKS;

//
// Unicast Heartbeat Data
//
// Even with multicast heartbeats, unicast heartbeats must be supported
// for backwards compatibility.
//

//
// This array records all the nodes that need to have a HB sent to another
// node. This array is not protected by a lock since it is only used with the
// heartbeat DPC routine.
//

typedef struct _INTERFACE_HEARTBEAT_INFO {
    CL_NODE_ID NodeId;
    CL_NETWORK_ID NetworkId;
    ULONG SeqNumber;
    ULONG AckNumber;
} INTERFACE_HEARTBEAT_INFO, *PINTERFACE_HEARTBEAT_INFO;

#define InterfaceHBInfoInitialLength            16
#define InterfaceHBInfoLengthIncrement          4

PINTERFACE_HEARTBEAT_INFO InterfaceHeartBeatInfo = NULL;
ULONG InterfaceHBInfoCount;         // running count while sending HBs
ULONG InterfaceHBInfoCurrentLength; // current length of HB info array

LARGE_INTEGER HBTime;       // HB time in relative sys time
#define MAX_DPC_SKEW    ( -HBTime.QuadPart / 2 )

//
// Outerscreen mask. This is set by clussvc's membership manager in user
// mode. As it changes, MM drops down the set outerscreen Ioctl to update
// clusnet's notion of this mask. Clusnet uses this mask to determine the
// validity of a received heart beat. If the sending node is not part
// of the mask, then it is sent a poison packet and the received event
// is not passed on to other consumers. If it is a legetimate PP, then
// we generate the proper event.
//
// Note: MM type definitions and macros have been moved to cnpdef.h for
//       general usage.
//
typedef CX_CLUSTERSCREEN CX_OUTERSCREEN;

CX_OUTERSCREEN MMOuterscreen;


// Multicast Heartbeat Data
//
typedef struct _NETWORK_MCAST_HEARTBEAT_INFO {
    CL_NETWORK_ID        NetworkId;
    PCNP_MULTICAST_GROUP McastGroup;
    CX_HB_NODE_INFO      NodeInfo[ClusterDefaultMaxNodes+ClusterMinNodeId];
    CX_CLUSTERSCREEN     McastTarget;
} NETWORK_MCAST_HEARTBEAT_INFO, *PNETWORK_MCAST_HEARTBEAT_INFO;

#define NetworkHBInfoInitialLength            4
#define NetworkHBInfoLengthIncrement          4

PNETWORK_MCAST_HEARTBEAT_INFO NetworkHeartBeatInfo = NULL;
ULONG NetworkHBInfoCount;         // running count while sending HBs
ULONG NetworkHBInfoCurrentLength; // current length of HB info array

CL_NETWORK_ID     MulticastBestNetwork = ClusterAnyNetworkId;

ULONG CxMulticastEpoch = 0;

//
// Declarations for Clussvc to Clusnet Heartbeating.
//
ULONG             ClussvcClusnetHbTimeoutTicks = 0;
ClussvcHangAction ClussvcClusnetHbTimeoutAction = ClussvcHangActionDisable;
ULONG             ClussvcClusnetHbTickCount = 0;
BOOLEAN           ClussvcTerminateStopHbs = FALSE;
PIO_WORKITEM      ClussvcTerminateWorkItem = NULL;
// Parameters for the Clussvc to Clusnet Heartbeating bugcheck. These are
// for informational purposes only and should not otherwise be used. For
// instance, the process object is dereferenced immediately after the 
// pointer is determined.
PEPROCESS         ClussvcProcessObject = NULL;
ULONG             ClussvcClusnetHbTimeoutSeconds = 0;

/* Forward */

NTSTATUS
CxInitializeHeartBeat(
    void
    );

VOID
CxUnloadHeartBeat(
    VOID
    );

VOID
CnpHeartBeatDpc(
    PKDPC DpcObject,
    PVOID DeferredContext,
    PVOID Arg1,
    PVOID Arg2
    );

BOOLEAN
CnpWalkNodesToSendHeartBeats(
    IN  PCNP_NODE   UpdateNode,
    IN  PVOID       UpdateContext,
    IN  CN_IRQL     NodeTableIrql
    );

BOOLEAN
CnpWalkNodesToCheckForHeartBeats(
    IN  PCNP_NODE   UpdateNode,
    IN  PVOID       UpdateContext,
    IN  CN_IRQL     NodeTableIrql
    );

VOID
CnpSendHBs(
    IN  PCNP_INTERFACE   UpdateInterface
    );

NTSTATUS
CxSetOuterscreen(
    IN  ULONG Outerscreen
    );

VOID
CnpReceivePoisonPacket(
    IN  PCNP_NETWORK   Network,
    IN  CL_NODE_ID SourceNodeId,
    IN  ULONG SeqNumber
    );

VOID
CnpUpdateMulticastEpoch(
    ULONG NewEpoch
    );

VOID
CnpCheckClussvcHang(
    VOID
    );

VOID
CnpLogClussvcHangAndTerminate(    
    IN PDEVICE_OBJECT DeviceObject,
    IN PVOID          Context
    );

VOID
CnpLogClussvcHang(
    IN PDEVICE_OBJECT DeviceObject,
    IN PVOID          Context
    );

/* End Forward */


#ifdef ALLOC_PRAGMA

#pragma alloc_text(INIT, CxInitializeHeartBeat)
#pragma alloc_text(PAGE, CxUnloadHeartBeat)

#endif // ALLOC_PRAGMA


NTSTATUS
CxInitializeHeartBeat(
    void
    )

/*++

Routine Description:

    Init the mechanisms used to send and monitor heart beats

Arguments:

    None

Return Value:

    STATUS_INSUFFICIENT_RESOURCES if allocation fails.
    STATUS_SUCCESS otherwise.

--*/

{
    // allocate the interface info array
    InterfaceHBInfoCount = 0;
    InterfaceHBInfoCurrentLength = InterfaceHBInfoInitialLength;
    
    if (InterfaceHBInfoCurrentLength > 0) {
        InterfaceHeartBeatInfo = CnAllocatePool(
                                     InterfaceHBInfoCurrentLength 
                                     * sizeof(INTERFACE_HEARTBEAT_INFO)
                                     );
        if (InterfaceHeartBeatInfo == NULL) {
            return(STATUS_INSUFFICIENT_RESOURCES);
        }
    }

    // allocate the network info array
    NetworkHBInfoCount = 0;
    NetworkHBInfoCurrentLength = NetworkHBInfoInitialLength;

    if (NetworkHBInfoCurrentLength > 0) {
        NetworkHeartBeatInfo = CnAllocatePool(
                                   NetworkHBInfoCurrentLength
                                   * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
                                   );
        if (NetworkHeartBeatInfo == NULL) {
            return(STATUS_INSUFFICIENT_RESOURCES);
        }
        RtlZeroMemory(
            NetworkHeartBeatInfo, 
            NetworkHBInfoCurrentLength * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
            );
    }

    KeInitializeTimer( &HeartBeatTimer );
    KeInitializeDpc( &HeartBeatDpc, CnpHeartBeatDpc, NULL );
    KeInitializeEvent( &HeartBeatDpcFinished, SynchronizationEvent, FALSE );
    CnInitializeLock( &HeartBeatLock, CNP_HBEAT_LOCK );

    MEMLOG( MemLogInitHB, 0, 0 );

    return(STATUS_SUCCESS);

} // CxInitializeHeartBeat


VOID
CxUnloadHeartBeat(
    VOID
    )
/*++

Routine Description:

    Called during clusnet driver unload. Free any data structures
    allocated to send and monitor heartbeats.

Arguments:

    None

Return Value:

    None

--*/
{
    PAGED_CODE();

    if (InterfaceHeartBeatInfo != NULL) {
        CnFreePool(InterfaceHeartBeatInfo);
        InterfaceHeartBeatInfo = NULL;
    }

    if (NetworkHeartBeatInfo != NULL) {
        CnFreePool(NetworkHeartBeatInfo);
        NetworkHeartBeatInfo = NULL;
    }

    return;

} // CxUnloadHeartBeat


NTSTATUS
CnpStartHeartBeats(
    VOID
    )

/*++

Routine Description:

    Start heart beating with the nodes that are marked alive and have
    an interface marked either OnlinePending or Online.

Arguments:

    None

Return Value:

    STATUS_INSUFFICIENT_RESOURCES if the workitem allocation fails

--*/

{
    BOOLEAN TimerInserted;
    CN_IRQL OldIrql;
    ULONG period = HEART_BEAT_PERIOD;

    //
    // Pre-allocate a workitem in case we need an emergency
    // termination of the cluster service due to a user-mode
    // hang.
    // No need to take the lock before the allocation and
    // assignment, since below is the first place the lock 
    // is acquired as the service starts.
    CnAssert(ClussvcTerminateWorkItem == NULL);
    ClussvcTerminateWorkItem = IoAllocateWorkItem(CnDeviceObject);
    if (ClussvcTerminateWorkItem == NULL) {
        CnTrace(HBEAT_EVENT, HbTraceTerminateWorkItemAlloc,
            "[HB] Failed to pre-allocate clussvc termination "
            "workitem.\n"
            );
        return(STATUS_INSUFFICIENT_RESOURCES);
    }

    CnAcquireLock( &HeartBeatLock, &OldIrql );

    HBTime.QuadPart = Int32x32To64( HEART_BEAT_PERIOD, -10000 );

    TimerInserted = KeSetTimerEx(&HeartBeatTimer,
                                 HBTime,
                                 HEART_BEAT_PERIOD,
                                 &HeartBeatDpc);

    HeartBeatEnabled = TRUE;
    ClussvcTerminateStopHbs = FALSE;

    CnTrace(HBEAT_EVENT, HbTraceTimerStarted,
        "[HB] Heartbeat timer started. Period = %u ms.",
        period // LOGULONG
        );            
    
    MEMLOG( MemLogHBStarted, HEART_BEAT_PERIOD, 0 );

    CnReleaseLock( &HeartBeatLock, OldIrql );

    return(STATUS_SUCCESS);

} // CnpStartHeartBeats

VOID
CnpStopHeartBeats(
    VOID
    )

/*++

Routine Description:

    Stop heart beating with other nodes in the cluster.

Arguments:

    None

Return Value:

    None

--*/

{
    BOOLEAN      TimerCanceled;
    CN_IRQL      OldIrql;
    PIO_WORKITEM FreeWorkItem = NULL;

    CnAcquireLock( &HeartBeatLock, &OldIrql );

    if (HeartBeatEnabled) {
        HeartBeatEnabled = FALSE;

        //
        // Cancel the periodic timer. Contrary to what the DDK implies,
        // this does not cancel the DPC if it is still queued from the
        // last timer expiration. It only stops the timer from firing
        // again. This is true as of 8/99. See KiTimerListExpire() in
        // ntos\ke\dpcsup.c.
        //
        TimerCanceled = KeCancelTimer( &HeartBeatTimer );

        CnTrace(HBEAT_DETAIL, HbTraceTimerCancelled,
            "[HB] Heartbeat timer cancelled: %!bool!",
            TimerCanceled // LOGBOOLEAN
            );

        MEMLOG( MemLogHBStopped, 0, 0 );

        //
        // Remove the DPC associated with the timer from the system DPC
        // queue, if it is there. This actually does nothing, because a
        // timer DPC is only inserted into the system DPC  queue if it is
        // bound to a specific processor. Unbound DPCs are executed inline
        // on the current processor in the kernel's timer expiration code.
        // Note that the object for a periodic timer is reinserted into the
        // timer queue before the DPC is excuted. So, it is possible for the
        // timer and the associated DPC to be queued simultaneously. This is
        // true as of 8/99. See KiTimerListExpire() in ntos\ke\dpcsup.c.
        //
        // The bottom line is that there is no safe way to synchronize with
        // the execution of a timer DPC during driver unload. All we can
        // do is ensure that the DPC handler code recognizes that it should
        // abort execution immediately and hope that it does so before the
        // driver code is unloaded. We do this by setting the HeartBeatEnabled
        // flag to False above. If our DPC code happens to be executing at
        // this point in time on another processor, as denoted by
        // HeartBeatDpcRunning, we wait for it to finish.
        //
        if ( !KeRemoveQueueDpc( &HeartBeatDpc )) {

            CnTrace(HBEAT_DETAIL, HbTraceDpcRunning,
                "[HB] DPC not removed. HeartBeatDpcRunning = %!bool!",
                HeartBeatDpcRunning // LOGBOOLEAN
                );
        
            MEMLOG( MemLogHBDpcRunning, HeartBeatDpcRunning, 0 );

            if ( HeartBeatDpcRunning ) {

                CnReleaseLock( &HeartBeatLock, OldIrql );

                CnTrace(HBEAT_DETAIL, HbWaitForDpcToFinish,
                    "can't remove DPC; waiting on DPCFinished event"
                    );

                MEMLOG( MemLogWaitForDpcFinish, 0, 0 );

                KeWaitForSingleObject(&HeartBeatDpcFinished,
                                      Executive,
                                      KernelMode,
                                      FALSE,              // not alertable
                                      NULL);              // no timeout

                KeClearEvent( &HeartBeatDpcFinished );

                CnAcquireLock( &HeartBeatLock, &OldIrql);
            }
        }

        CnTrace(HBEAT_EVENT, HbTraceTimerStopped,
            "[HB] Heartbeat timer stopped."
            );

    }

    //
    // If the pre-allocated workitem was not used, we need to 
    // free it to remove the reference on the clusnet device object.
    //
    FreeWorkItem = ClussvcTerminateWorkItem;
    ClussvcTerminateWorkItem = NULL;

    CnReleaseLock( &HeartBeatLock, OldIrql );

    if (FreeWorkItem != NULL) {
        IoFreeWorkItem(FreeWorkItem);
    }

    return;

} // CnpStopHeartBeats

VOID
CnpSendMcastHBCompletion(
    IN NTSTATUS  Status,
    IN ULONG     BytesSent,
    IN PVOID     Context,
    IN PVOID     Buffer
)
/*++

Routine Description:
    
    Called when a mcast heartbeat send request completes 
    successfully or unsuccessfully. Dereferences the
    McastGroup data structure.
    
Arguments:

    Status - status of request
    
    BytesSent - not used
    
    Context - points to multicast group data structure
    
    Buffer - not used
    
Return value:

    None.
    
--*/
{
    PCNP_MULTICAST_GROUP mcastGroup = (PCNP_MULTICAST_GROUP) Context;

    CnAssert(mcastGroup != NULL);

    CnpDereferenceMulticastGroup(mcastGroup);

    return;

} // CnpSendMcastHBCompletion

NTSTATUS
CnpSendMcastHB(
    IN  PCNP_INTERFACE   Interface
    )
/*++

Routine Description:

    Writes multicast heartbeat data into the NetworkHeartBeatInfo
    array for target Interface.
    
Notes:

    Called from DPC with Network and Node locks held.
    Returns with Network and Node locks held.

--*/
{
    ULONG      i;
    BOOLEAN    networkConnected;

    // find the network info structure for this network
    for (i = 0; i < NetworkHBInfoCount; i++) {
        if (NetworkHeartBeatInfo[i].NetworkId 
            == Interface->Network->Id) {
            break;
        }
    }

    // start a new network info structure, if necessary
    if (i == NetworkHBInfoCount) {

        // before claiming an entry in the network info array,
        // make sure the array is large enough
        if (NetworkHBInfoCount >= NetworkHBInfoCurrentLength) {

            // need to allocate a new network info array

            PNETWORK_MCAST_HEARTBEAT_INFO tempInfo = NULL;
            PNETWORK_MCAST_HEARTBEAT_INFO freeInfo = NULL;
            ULONG                         tempLength;

            tempLength = NetworkHBInfoCurrentLength
                + NetworkHBInfoLengthIncrement;
            tempInfo = CnAllocatePool(
                           tempLength 
                           * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
                           );
            if (tempInfo == NULL) {

                CnTrace(
                    HBEAT_DETAIL, HbNetInfoArrayAllocFailed,
                    "[HB] Failed to allocate network heartbeat info "
                    "array of length %u. Cannot schedule heartbeat "
                    "for node %u on network %u.",
                    tempLength, 
                    Interface->Node->Id,
                    Interface->Network->Id
                    );

                // cannot continue. the failure to send this
                // heartbeat will not be fatal if we recover
                // quickly. if we do not recover, this node
                // will be poisoned, which is probably best
                // since it is dangerously low on nonpaged pool.

                return(STATUS_INSUFFICIENT_RESOURCES);

            } else {

                // the allocation was successful. establish
                // the new array as the heartbeat info
                // array.

                RtlZeroMemory(
                    tempInfo,
                    tempLength * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
                    );

                freeInfo = NetworkHeartBeatInfo;
                NetworkHeartBeatInfo = tempInfo;
                NetworkHBInfoCurrentLength = tempLength;

                if (freeInfo != NULL) {

                    if (NetworkHBInfoCount > 0) {
                        RtlCopyMemory(
                            NetworkHeartBeatInfo,
                            freeInfo,
                            NetworkHBInfoCount 
                            * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
                            );
                    }

                    CnFreePool(freeInfo);
                }

                CnTrace(
                    HBEAT_DETAIL, HbNetInfoArrayLengthIncreased,
                    "[HB] Increased network heartbeat info array "
                    "to size %u.",
                    NetworkHBInfoCurrentLength
                    );
            }
        }

        // increment the current counter
        NetworkHBInfoCount++;

        // initialize the information for this structure
        RtlZeroMemory(
            &NetworkHeartBeatInfo[i].McastTarget,
            sizeof(NetworkHeartBeatInfo[i].McastTarget)
            );
        NetworkHeartBeatInfo[i].NetworkId = Interface->Network->Id;
        NetworkHeartBeatInfo[i].McastGroup = 
            Interface->Network->CurrentMcastGroup;
        CnpReferenceMulticastGroup(NetworkHeartBeatInfo[i].McastGroup);
    }

    networkConnected = (BOOLEAN)(!CnpIsNetworkLocalDisconn(Interface->Network));

    CnTrace(HBEAT_DETAIL, HbTraceScheduleMcastHBForInterface,
        "[HB] Scheduling multicast HB for node %u on network %u "
        "(I/F state = %!ifstate!) "
        "(interface media connected = %!bool!).",
        Interface->Node->Id, // LOGULONG
        Interface->Network->Id, // LOGULONG
        Interface->State, // LOGIfState
        networkConnected
        );

    // fill in the network info for this node/interface
    NetworkHeartBeatInfo[i].NodeInfo[Interface->Node->Id].SeqNumber = 
        Interface->SequenceToSend;
    NetworkHeartBeatInfo[i].NodeInfo[Interface->Node->Id].AckNumber =
        Interface->LastSequenceReceived;
    CnpClusterScreenInsert(
        NetworkHeartBeatInfo[i].McastTarget.ClusterScreen,
        INT_NODE(Interface->Node->Id)
        );

    return(STATUS_SUCCESS);

} // CnpSendMcastHB

NTSTATUS
CnpSendUcastHB(
    IN  PCNP_INTERFACE   Interface
    )
/*++

Routine Description:

    Writes unicast heartbeat data into the InterfaceHeartBeatInfo
    array for target Interface.
    
Notes:

    Called from DPC with Network and Node locks held.
    Returns with Network and Node locks held.

--*/
{
    BOOLEAN    networkConnected;
    
    // before filling an entry in the heartbeat info array,
    // make sure the array is large enough.
    if (InterfaceHBInfoCount >= InterfaceHBInfoCurrentLength) {

        // need to allocate a new heartbeat info array

        PINTERFACE_HEARTBEAT_INFO tempInfo = NULL;
        PINTERFACE_HEARTBEAT_INFO freeInfo = NULL;
        ULONG                     tempLength;

        tempLength = InterfaceHBInfoCurrentLength 
            + InterfaceHBInfoLengthIncrement;
        tempInfo = CnAllocatePool(
                       tempLength * sizeof(INTERFACE_HEARTBEAT_INFO)
                       );
        if (tempInfo == NULL) {

            CnTrace(
                HBEAT_DETAIL, HbInfoArrayAllocFailed,
                "[HB] Failed to allocate heartbeat info "
                "array of length %u. Cannot schedule heartbeat "
                "for node %u on network %u.",
                tempLength, 
                Interface->Node->Id,
                Interface->Network->Id
                );

            // cannot continue. the failure to send this
            // heartbeat will not be fatal if we recover
            // quickly. if we do not recover, this node
            // will be poisoned, which is probably best
            // since it is dangerously low on nonpaged pool.

            return(STATUS_INSUFFICIENT_RESOURCES);

        } else {

            // the allocation was successful. establish
            // the new array as the heartbeat info
            // array.

            freeInfo = InterfaceHeartBeatInfo;
            InterfaceHeartBeatInfo = tempInfo;
            InterfaceHBInfoCurrentLength = tempLength;

            if (freeInfo != NULL) {

                if (InterfaceHBInfoCount > 0) {
                    RtlCopyMemory(
                        InterfaceHeartBeatInfo,
                        freeInfo,
                        InterfaceHBInfoCount * sizeof(INTERFACE_HEARTBEAT_INFO)
                        );
                }

                CnFreePool(freeInfo);
            }

            CnTrace(
                HBEAT_DETAIL, HbInfoArrayLengthIncreased,
                "[HB] Increased heartbeat info array to size %u.",
                InterfaceHBInfoCurrentLength
                );
        }
    }

    networkConnected = (BOOLEAN)(!CnpIsNetworkLocalDisconn(Interface->Network));

    CnTrace(HBEAT_DETAIL, HbTraceScheduleHBForInterface,
        "[HB] Scheduling HB for node %u on network %u (I/F state = %!ifstate!) "
        "(interface media connected = %!bool!).",
        Interface->Node->Id, // LOGULONG
        Interface->Network->Id, // LOGULONG
        Interface->State, // LOGIfState
        networkConnected
        );

    InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].NodeId = Interface->Node->Id;
    InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].SeqNumber =
        Interface->SequenceToSend;
    InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].AckNumber =
        Interface->LastSequenceReceived;
    InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].NetworkId = Interface->Network->Id;

    ++InterfaceHBInfoCount;

    return(STATUS_SUCCESS);

} // CnpSendUcastHB


VOID
CnpSendHBs(
    IN  PCNP_INTERFACE   Interface
    )

/*++

Routine Description:

    If Interface is in the correct state then stuff an entry in
    the heartbeat info array. Expand the heartbeat info
    array if necessary.

Arguments:

    Interface - target interface for heartbeat message

Return Value:

    None

--*/

{
    BOOLEAN mcastOnly = FALSE;

    if ( Interface->State >= ClusnetInterfaceStateUnreachable ) {

        // increment the sequence number
        (Interface->SequenceToSend)++;

        // check if we should include this interface in a 
        // multicast heartbeat. first we verify that the
        // network is multicast capable. then, we include it
        // if either of the following conditions are true:
        // - we have received a multicast heartbeat from the
        //   target interface
        // - the discovery count (the number of discovery mcasts
        //   left to send to the target interface) is greater 
        //   than zero
        if (CnpIsNetworkMulticastCapable(Interface->Network)) {
            
            if (CnpInterfaceQueryReceivedMulticast(Interface)) {

                // write the mcast heartbeat data. if not
                // successful, attempt a unicast heartbeat.
                if (CnpSendMcastHB(Interface) == STATUS_SUCCESS) {
                    mcastOnly = TRUE;
                }

            } else if (Interface->McastDiscoverCount > 0) {

                // write the mcast heartbeat data for a
                // discovery. if successful, decrement the
                // discovery count.
                if (CnpSendMcastHB(Interface) == STATUS_SUCCESS) {
                    --Interface->McastDiscoverCount;

                    // if the discovery count has reached zero,
                    // set the rediscovery countdown. this is
                    // the number of heartbeat periods until we
                    // try discovery again.
                    if (Interface->McastDiscoverCount == 0) {
                        Interface->McastRediscoveryCountdown = 
                            CNP_INTERFACE_MCAST_REDISCOVERY;
                    }
                }
            } else if (Interface->McastRediscoveryCountdown > 0) {

                // decrement the rediscovery countdown. if we
                // reach zero, we will start multicast discovery
                // on the next heartbeat to this interface.
                if (--Interface->McastRediscoveryCountdown == 0) {
                    Interface->McastDiscoverCount = 
                        CNP_INTERFACE_MCAST_DISCOVERY;
                }
            }
        }

        // write unicast heartbeat data
        if (!mcastOnly) {
            CnpSendUcastHB(Interface);
        }
    }

    CnReleaseLock(&Interface->Network->Lock, Interface->Network->Irql);

    return;

} // CnpSendHBs

VOID
CnpCheckForHBs(
    IN  PCNP_INTERFACE   Interface
    )

/*++

Routine Description:

    Check if heart beats have been received for this interface

Arguments:

    None

Return Value:

    None

--*/

{
    ULONG   MissedHBCount;
    BOOLEAN NetworkLockReleased = FALSE;

    if ( Interface->State >= ClusnetInterfaceStateUnreachable
         && !CnpIsNetworkLocalDisconn(Interface->Network) ) {

        MissedHBCount = InterlockedIncrement( &Interface->MissedHBs );

        if ( MissedHBCount == 1 ) {

            //
            // a HB was received in time for this node. Clear the status
            // info associated with this interface, but also mark the node
            // as having an interface that is ok. Note that we do not
            // use HBs on restricted nets to determine node health.
            //

            if (!CnpIsNetworkRestricted(Interface->Network)) {
                Interface->Node->HBWasMissed = FALSE;
            }
            
            CnTrace(HBEAT_DETAIL, HbTraceHBReceivedForInterface,
                "[HB] A HB was received from node %u on net %u in this "
                "period.",
                Interface->Node->Id, // LOGULONG
                Interface->Network->Id // LOGULONG
                );

        } else {
            CnTrace(HBEAT_EVENT, HbTraceMissedIfHB,
                "[HB] HB MISSED for node %u on net %u, missed count %u.",
                Interface->Node->Id, // LOGULONG
                Interface->Network->Id, // LOGULONG
                MissedHBCount // LOGULONG
                );

            MEMLOG4(
                MemLogMissedIfHB,
                (ULONG_PTR)Interface, MissedHBCount,
                Interface->Node->Id,
                Interface->Network->Id
                );

            if ( MissedHBCount >= HBInterfaceLostHBTicks &&
                 Interface->State >= ClusnetInterfaceStateOnlinePending ) {

                //
                // interface is either online pending or online, so move it
                // to unreachable. CnpFailInterface will also mark the node
                // unreachable if all of the node's interfaces are unreachable.
                // CnpFailInterface releases the network object lock as part
                // of its duties.
                //

                CnTrace(HBEAT_DETAIL, HbTraceFailInterface,
                    "[HB] Moving I/F for node %u on net %u to failed state, "
                    "previous I/F state = %!ifstate!.",
                    Interface->Node->Id, // LOGULONG
                    Interface->Network->Id, // LOGULONG
                    Interface->State // LOGIfState
                    );
                
                //
                // continuation log entries go before the main entry since
                // we scan the log backwards, i.e., we'll hit FailingIf
                // before we hit FailingIf1.
                //
                MEMLOG4(
                    MemLogFailingIf,
                    (ULONG_PTR)Interface,
                    Interface->State,
                    Interface->Node->Id,
                    Interface->Network->Id
                    );

                CnpFailInterface( Interface );
                NetworkLockReleased = TRUE;

                //
                // issue a net interface unreachable event to let consumers
                // know what is happening
                //
                CnTrace(HBEAT_EVENT, HbTraceInterfaceUnreachableEvent,
                    "[HB] Issuing InterfaceUnreachable event for node %u "
                    "on net %u, previous I/F state = %!ifstate!.",
                    Interface->Node->Id, // LOGULONG
                    Interface->Network->Id, // LOGULONG
                    Interface->State // LOGIfState
                    );
                
                CnIssueEvent(ClusnetEventNetInterfaceUnreachable,
                             Interface->Node->Id,
                             Interface->Network->Id);
            }
        }
    }

    if ( !NetworkLockReleased ) {

        CnReleaseLock(&Interface->Network->Lock,
                      Interface->Network->Irql);
    }

    return;

} // CnpCheckForHBs

BOOLEAN
CnpWalkNodesToSendHeartBeats(
    IN  PCNP_NODE   Node,
    IN  PVOID       UpdateContext,
    IN  CN_IRQL     NodeTableIrql
    )

/*++

Routine Description:

    Support routine called for each node in the node table. If node is
    alive, then we walk its interfaces, performing the appropriate
    action.

Arguments:

    None

Return Value:

    None

--*/

{
    //
    // If this node is alive and not the local node, then walk its
    // interfaces, supplying the appropriate routine to use at this time
    //

    if ( Node->MMState == ClusnetNodeStateAlive &&
         Node != CnpLocalNode ) {

        CnTrace(HBEAT_DETAIL, HbTraceScheduleHBForNode,
            "[HB] Scheduling HBs for node %u (state = %!mmstate!).",
            Node->Id, // LOGULONG
            Node->MMState // LOGMmState
            );
                
        MEMLOG( MemLogSendHBWalkNode, Node->Id, Node->MMState );
        CnpWalkInterfacesOnNode( Node, (PVOID)CnpSendHBs );
    }

    CnReleaseLock( &Node->Lock, Node->Irql );

    return TRUE;       // the node table lock is still held

} // CnpWalkNodesToSendHeartBeats

BOOLEAN
CnpWalkNodesToCheckForHeartBeats(
    IN  PCNP_NODE   Node,
    IN  PVOID       UpdateContext,
    IN  CN_IRQL     NodeTableIrql
    )

/*++

Routine Description:

    heart beat checking routine called for each node  in the node table
    (except for the local node). If node is alive, then we walk its
    interfaces, performing the appropriate action.

Arguments:

    None

Return Value:

    None

--*/

{
    BOOLEAN NodeWasReachable;
    ULONG MissedHBCount;

    if ( Node->MMState == ClusnetNodeStateAlive &&
         Node != CnpLocalNode ) {

        //
        // this node is alive, so walk its interfaces. Assume the
        // worst by setting the HB Missed flag to true and
        // have the interfaces prove that this is wrong. Also make
        // note of the current unreachable flag setting. If it changes
        // this time
        //

        NodeWasReachable = !CnpIsNodeUnreachable( Node );
        Node->HBWasMissed = TRUE;

        CnTrace(HBEAT_DETAIL, HbTraceCheckNodeForHeartbeats,
            "[HB] Checking for HBs from node %u. WasReachable = %!bool!, "
            "state = %!mmstate!.",
            Node->Id, // LOGULONG
            NodeWasReachable, // LOGBOOLEAN
            Node->MMState // LOGMmState
            );

        MEMLOG( MemLogCheckHBNodeReachable, Node->Id, NodeWasReachable );
        MEMLOG( MemLogCheckHBWalkNode, Node->Id, Node->MMState );

        CnpWalkInterfacesOnNode( Node, (PVOID)CnpCheckForHBs );

        if ( Node->HBWasMissed ) {

            //
            // no HBs received on any of this node's IFs. if membership
            // still thinks this node is alive and the node has been
            // unreachable, then note that this node is toast in HB
            // info array. This will cause a node down event to be
            // generated for this node.
            //

            MissedHBCount = InterlockedIncrement( &Node->MissedHBs );

            CnTrace(HBEAT_EVENT, HbTraceNodeMissedHB,
                "[HB] Node %u has missed %u HBs on all interfaces, "
                "current state = %!mmstate!.",
                Node->Id, // LOGULONG
                MissedHBCount, // LOGULONG
                Node->MMState // LOGMmState
                );

            MEMLOG( MemLogCheckHBMissedHB, MissedHBCount, Node->MMState );

            //
            // if the this node is a either a member or in the process of
            // joining AND it's missed too many HBs AND we haven't issued a
            // node down, then issue a node down.
            //
            if ( ( Node->MMState == ClusnetNodeStateAlive
                   ||
                   Node->MMState == ClusnetNodeStateJoining
                 )
                 && MissedHBCount >= HBNodeLostHBTicks
                 && !Node->NodeDownIssued
               )
            {
                Node->NodeDownIssued = TRUE;
                CnIssueEvent( ClusnetEventNodeDown, Node->Id, 0 );

                CnTrace(HBEAT_EVENT, HbTraceNodeDownEvent,
                    "[HB] Issuing NodeDown event for node %u.",
                    Node->Id // LOGULONG
                    );
                        
                MEMLOG( MemLogNodeDownIssued, Node->Id, TRUE );
            }
        }
    } else {
        MEMLOG( MemLogCheckHBWalkNode, Node->Id, Node->MMState );
    }

    CnReleaseLock( &Node->Lock, Node->Irql );

    return TRUE;       // the node table lock is still held

} // CnpWalkNodesToCheckForHeartBeats

VOID
CnpHeartBeatDpc(
    PKDPC DpcObject,
    PVOID DeferredContext,
    PVOID Arg1,
    PVOID Arg2
    )

/*++

Routine Description:

    Start heart beating with the nodes that are marked alive and have
    an interface marked either OnlinePending or Online.

Arguments:

    None

Return Value:

    None

--*/

{
    PINTERFACE_HEARTBEAT_INFO     pNodeHBInfo;
    PNETWORK_MCAST_HEARTBEAT_INFO pMcastHBInfo;
    CN_IRQL                       OldIrql;
    BOOLEAN                       StopSendRecvHbs;

#ifdef MEMLOGGING
    static LARGE_INTEGER LastSysTime;
    LARGE_INTEGER CurrentTime;
    LARGE_INTEGER TimeDelta;

    //
    // try to determine the skew between when we asked to be run and
    // the time we actually did run
    //

    KeQuerySystemTime( &CurrentTime );

    if ( LastSysTime.QuadPart != 0 ) {

        //
        // add in HBTime which is negative due to relative sys time
        //

        TimeDelta.QuadPart = ( CurrentTime.QuadPart - LastSysTime.QuadPart ) +
            HBTime.QuadPart;

        if ( TimeDelta.QuadPart > MAX_DPC_SKEW ||
             TimeDelta.QuadPart < -MAX_DPC_SKEW 
           ) 
        {
            LONG skew = (LONG)(TimeDelta.QuadPart/10000);  // convert to ms

            MEMLOG( MemLogDpcTimeSkew, TimeDelta.LowPart, 0 );
            

            CnTrace(HBEAT_EVENT, HbTraceLateDpc,
                "[HB] Timer fired %d ms late.", 
                skew // LOGSLONG
                );

        }
    }

    LastSysTime.QuadPart = CurrentTime.QuadPart;

#endif // MEMLOGGING

    CnAcquireLock( &HeartBeatLock, &OldIrql );

    if ( !HeartBeatEnabled ) {
        CnTrace(HBEAT_DETAIL, HbTraceSetDpcEvent,
            "DPC: setting HeartBeatDpcFinished event"
            );
        
        MEMLOG( MemLogSetDpcEvent, 0, 0 );

        KeSetEvent( &HeartBeatDpcFinished, 0, FALSE );
        
        CnReleaseLock( &HeartBeatLock, OldIrql );
        
        return;
    }

    HeartBeatDpcRunning = TRUE;

    //
    // Check if we need to stop sending heartbeats. This
    // occurs when clusnet detects that clussvc is not
    // operating correctly. In case system work queues
    // are blocked up (but not DPCs), we stop sending
    // heartbeats so that other nodes initiate failover.
    //
    StopSendRecvHbs = ClussvcTerminateStopHbs;

    CnReleaseLock( &HeartBeatLock, OldIrql );

    if (!StopSendRecvHbs) {

        if ( HeartBeatClockTicks == 0 ||
             HeartBeatClockTicks == HeartBeatSendTicks) {

            //
            // time to send HBs. Clear the count of target interfaces 
            // and walk the node table finding the nodes that are
            // marked alive.
            //

            NetworkHBInfoCount = 0;
            InterfaceHBInfoCount = 0;
            CnpWalkNodeTable( CnpWalkNodesToSendHeartBeats, NULL );

            //
            // run down the list of networks and send out any multicast
            // heartbeats.
            //

            pMcastHBInfo = NetworkHeartBeatInfo;
            while ( NetworkHBInfoCount-- ) {

                CnTrace(
                    HBEAT_EVENT, HbTraceSendMcastHB,
                    "[HB] Sending multicast HB on net %u.\n",
                    pMcastHBInfo->NetworkId
                    );

                CxSendMcastHeartBeatMessage(
                    pMcastHBInfo->NetworkId,
                    pMcastHBInfo->McastGroup,
                    pMcastHBInfo->McastTarget,
                    CxMulticastEpoch,
                    pMcastHBInfo->NodeInfo,
                    CnpSendMcastHBCompletion,
                    pMcastHBInfo->McastGroup
                    );

                ++pMcastHBInfo;
            }

            //
            // now run down the list of interfaces that we compiled and
            // send any unicast packets
            //

            pNodeHBInfo = InterfaceHeartBeatInfo;
            while ( InterfaceHBInfoCount-- ) {

                CnTrace(HBEAT_EVENT, HbTraceSendHB,
                    "[HB] Sending HB to node %u on net %u, seqno %u, ackno %u.",
                    pNodeHBInfo->NodeId, // LOGULONG
                    pNodeHBInfo->NetworkId, // LOGULONG
                    pNodeHBInfo->SeqNumber, // LOGULONG
                    pNodeHBInfo->AckNumber // LOGULONG
                );

                CxSendHeartBeatMessage(pNodeHBInfo->NodeId,
                                       pNodeHBInfo->SeqNumber,
                                       pNodeHBInfo->AckNumber,
                                       pNodeHBInfo->NetworkId);

                MEMLOG(
                    MemLogSendingHB, 
                    pNodeHBInfo->NodeId, 
                    pNodeHBInfo->NetworkId
                    );

                ++pNodeHBInfo;
            }

            //
            // finally, up the tick count, progressing to the next potential
            // work item
            //

            HeartBeatClockTicks++;

        } else if ( HeartBeatClockTicks >= ( HeartBeatSendTicks - 1 )) {

            //
            // walk the node table looking for lack of heart beats on
            // a node's set of interfaces.
            //
            CnpWalkNodeTable( CnpWalkNodesToCheckForHeartBeats, NULL );
            HeartBeatClockTicks = 0;

        } else {

            HeartBeatClockTicks++;
        }
    }

    // Check for clussvc hangs.
    CnpCheckClussvcHang();
    
    //
    // indicate that we're no longer running and if we're shutting down
    // then set the event that the shutdown thread is waiting on
    //

    CnAcquireLock( &HeartBeatLock, &OldIrql );
    HeartBeatDpcRunning = FALSE;

    if ( !HeartBeatEnabled ) {
        KeSetEvent( &HeartBeatDpcFinished, 0, FALSE );

        CnTrace(HBEAT_DETAIL, HbTraceSetDpcEvent2,
            "DPC: setting HeartBeatDpcFinished event (2)"
            );
                 
        MEMLOG( MemLogSetDpcEvent, 0, 0 );
    }

    CnReleaseLock( &HeartBeatLock, OldIrql );

} // CnpHeartBeatDpc

PCNP_INTERFACE
CnpFindInterfaceLocked(
    IN  PCNP_NODE Node,
    IN  PCNP_NETWORK Network
    )

/*++

Routine Description:

    Given node and network structure pointers, find the interface
    structure. Similar to CnpFindInterface except that we're passing
    in pointers instead of IDs.

Arguments:

    Node - pointer to node struct that sent the packet
    Network - pointer to Network struct on which packet was received

Return Value:

    Pointer to Interface on which packet was recv'd, otherwise NULL

--*/

{
    PLIST_ENTRY IfEntry;
    PCNP_INTERFACE Interface;

    CnVerifyCpuLockMask(CNP_NODE_OBJECT_LOCK,         // Required
                        0,                            // Forbidden
                        CNP_NETWORK_OBJECT_LOCK_MAX   // Maximum
                        );

    for (IfEntry = Node->InterfaceList.Flink;
         IfEntry != &(Node->InterfaceList);
         IfEntry = IfEntry->Flink
         )
        {
            Interface = CONTAINING_RECORD(IfEntry,
                                          CNP_INTERFACE,
                                          NodeLinkage);

            if ( Interface->Network == Network ) {
                break;
            }
        }


    if ( IfEntry == &Node->InterfaceList ) {

        return NULL;
    } else {

        return Interface;
    }
} // CnpFindInterfaceLocked

VOID
CnpReceiveHeartBeatMessage(
    IN  PCNP_NETWORK Network,
    IN  CL_NODE_ID SourceNodeId,
    IN  ULONG SeqNumber,
    IN  ULONG AckNumber,
    IN  BOOLEAN Multicast,
    IN  ULONG MulticastEpoch
    )

/*++

Routine Description:

    We received a heartbeat from a node on a network. Reset
    the missed HB count on that network's interface.


Arguments:

    Network - pointer to network block on which the packet was received

    SourceNodeId - node number that issued the packet

    SeqNumber - sending nodes' sequence num

    AckNumber - last seq number sent by us that was seen at the sending node
    
    Multicast - indicates whether this heartbeat was received in a multicast

    MulticastEpoch - indicates multicast epoch number from heartbeat packet

Return Value:

    None

--*/

{
    PCNP_NODE Node;
    PCNP_INTERFACE Interface;
    CX_OUTERSCREEN CurrentOuterscreen;


    //
    // Take a snapshot of the current outerscreen so that our
    // information doesn't change between decisions.
    //
    CurrentOuterscreen.UlongScreen = MMOuterscreen.UlongScreen;

    //
    // we ignore all packets until we're part of the cluster
    //
    if ( !CnpClusterScreenMember(
              CurrentOuterscreen.ClusterScreen,
              INT_NODE( CnLocalNodeId )
              )
       )
    {
        return;
    }

    //
    // We ignore multicast packets whose epoch is earlier than ours.
    // This prevents replay attacks, because the multicast key may
    // not have been regenerated since the last time a node joined (and
    // heartbeat sequence numbers were reset to one).
    //
    if (Multicast && MulticastEpoch < CxMulticastEpoch) {
        CnTrace(HBEAT_ERROR, HbTraceHBFromExpiredEpoch,
            "[HB] Discarding HB from old epoch. Source Node %u, "
            "Pkt Epoch %u, Current Epoch %u.",
            SourceNodeId, // LOGULONG
            MulticastEpoch, // LOGULONG
            CxMulticastEpoch // LOGULONG
            );
        return;
    }

    //
    // convert the Node ID into a pointer and find the interface
    // on which the packet was received.
    //
    Node = CnpFindNode( SourceNodeId );
    CnAssert( Node != NULL );

    Interface = CnpFindInterfaceLocked( Node, Network );

    if ( Interface == NULL ) {

        //
        // somehow this network object went away while we were
        // receiving some data on it. Just ignore this msg
        //

        CnTrace(HBEAT_ERROR, HbTraceHBFromUnknownNetwork,
            "[HB] Discarding HB from node %u on an unknown network.",
            Node->Id // LOGULONG
            );

        MEMLOG( MemLogNoNetID, Node->Id, (ULONG_PTR)Network );
        goto error_exit;
    }

    //
    // determine if this is guy is legit. If not in the outerscreen,
    // then send a poison packet and we're done
    //

    if ( !CnpClusterScreenMember(
              CurrentOuterscreen.ClusterScreen,
              INT_NODE( SourceNodeId )
              )
       )
    {
        //
        // Don't bother sending poison packets on restricted networks. They
        // will be ignored.
        //
        if (CnpIsNetworkRestricted(Interface->Network)) {
            goto error_exit;
        }

        CnTrace(HBEAT_ERROR, HbTraceHBFromBanishedNode,
            "[HB] Discarding HB from banished node %u on net %u "
            "due to outerscreen %04X. Sending poison packet back.",
            Node->Id, // LOGULONG
            Interface->Network->Id, // LOGULONG
            CurrentOuterscreen.UlongScreen // LOGULONG
            );

        CcmpSendPoisonPacket( Node, NULL, 0, Network, NULL);
        //
        // The node lock was released.
        //
        return;
    }

    //
    // Check that the incoming seq num is something we expect to
    // guard against replay attacks.
    //
    if ( SeqNumber <= Interface->LastSequenceReceived) {

        CnTrace( 
            HBEAT_ERROR, HbTraceHBOutOfSequence,
            "[HB] Discarding HB from node %u on net %u with stale seqno %u. "
            "Last seqno %u. Multicast: %!bool!.",
            Node->Id, // LOGULONG
            Interface->Network->Id, // LOGULONG
            SeqNumber, // LOGULONG
            Interface->LastSequenceReceived, // LOGULONG
            Multicast
            );

        MEMLOG( MemLogOutOfSequence, SourceNodeId, SeqNumber );

        goto error_exit;
    }

    // Update the interface's last received seq number
    // which will be sent back as the ack number.
    Interface->LastSequenceReceived = SeqNumber;

    //
    // Compare our seq number to the ack number in the packet.
    // If more than two off then the source node is not recv'ing
    // our heartbeats, but we're receiving theirs. This network is
    // not usable. We ignore this msg to guarantee that we will
    // declare the network down if the condition persists.
    //
    // In addition, if we are sending multicast heartbeats to this
    // interface, revert to unicasts in case there is a multicast
    // problem.
    //
    if (( Interface->SequenceToSend - AckNumber ) > 2 ) {

        CnTrace(HBEAT_ERROR, HbTraceHBWithStaleAck,
            "[HB] Discarding HB from node %u with stale ackno %u. "
            "My seqno %u. Multicast: %!bool!.",
            Node->Id, // LOGULONG
            AckNumber, // LOGULONG
            Interface->SequenceToSend, // LOGULONG
            Multicast
            );

        MEMLOG( MemLogSeqAckMismatch, (ULONG_PTR)Interface, Interface->State );

        if (CnpInterfaceQueryReceivedMulticast(Interface)) {
            CnpInterfaceClearReceivedMulticast(Interface);
            Interface->McastDiscoverCount = CNP_INTERFACE_MCAST_DISCOVERY;
            CnpMulticastChangeNodeReachability(
                Network,
                Node,
                FALSE,   // not reachable
                TRUE,    // raise event
                NULL     // OUT new mask
                );
        }

        goto error_exit;
    }

    MEMLOG4( MemLogReceivedPacket,
             SeqNumber, AckNumber,
             SourceNodeId, Interface->Network->Id );

    CnTrace(HBEAT_EVENT, HbTraceReceivedHBpacket,
        "[HB] Received HB from node %u on net %u, seqno %u, ackno %u, "
        "multicast: %!bool!.",
        SourceNodeId, // LOGULONG
        Interface->Network->Id, // LOGULONG
        SeqNumber, // LOGULONG
        AckNumber, // LOGULONG
        Multicast
        );

    // Reset the interface's and node's Missed HB count
    // to indicate that things are somewhat normal.
    //
    InterlockedExchange(&Interface->MissedHBs, 0);

    //
    // Don't reset node miss count on restricted nets.
    //
    if (!CnpIsNetworkRestricted(Interface->Network)) {
        InterlockedExchange(&Node->MissedHBs, 0);
    }

    //
    // if local interface was previously disconnected (e.g. received
    // a WMI NDIS status media disconnect event), reconnect it now.
    //
    if (CnpIsNetworkLocalDisconn(Interface->Network)) {
        CxReconnectLocalInterface(Interface->Network->Id);
    }

    //
    // move interface to online if necessary
    //
    if ( Interface->State == ClusnetInterfaceStateOnlinePending ||
         Interface->State == ClusnetInterfaceStateUnreachable ) {

        CnAcquireLockAtDpc( &Interface->Network->Lock );
        Interface->Network->Irql = DISPATCH_LEVEL;

        CnTrace(HBEAT_DETAIL, HbTraceInterfaceOnline,
            "[HB] Moving interface for node %u on network %u to online "
            "state.",
            Node->Id, // LOGULONG
            Interface->Network->Id // LOGULONG
            );

        //
        // Initiate multicast discovery.
        //
        Interface->McastDiscoverCount = CNP_INTERFACE_MCAST_DISCOVERY;
        Interface->McastRediscoveryCountdown = 0;

        MEMLOG( MemLogOnlineIf, Node->Id, Interface->State );

        CnpOnlineInterface( Interface );
    
        CnTrace(HBEAT_EVENT, HbTraceInterfaceUpEvent,
            "[HB] Issuing InterfaceUp event for node %u on network %u.",
            Node->Id, // LOGULONG
            Interface->Network->Id // LOGULONG
            );                

        CnIssueEvent(ClusnetEventNetInterfaceUp,
                     Node->Id,
                     Interface->Network->Id);
    }

    //
    // Indicate that a multicast has been received from this interface.
    // This allows us to include this interface in our multicasts.
    //
    if (Multicast) {
        IF_CNDBG(CN_DEBUG_HBEATS) {
            CNPRINT(("[HB] Received multicast heartbeat on "
                     "network %d from source node %d, seq %d, "
                     "ack %d.\n",
                     Network->Id, SourceNodeId,
                     SeqNumber, AckNumber
                     ));
        }

        if (!CnpInterfaceQueryReceivedMulticast(Interface)) {
            
            CnpInterfaceSetReceivedMulticast(Interface);
            
            CnpMulticastChangeNodeReachability(
                Network,
                Node,
                TRUE,    // reachable
                TRUE,    // raise event
                NULL     // OUT new mask
                );
        }

        // There is no point in sending discovery packets to this
        // interface.
        Interface->McastDiscoverCount = 0;
        Interface->McastRediscoveryCountdown = 0;

        // If the source node's multicast epoch is greater than
        // ours, update. We can make the initial comparison without
        // acquiring the lock.
        if (MulticastEpoch > CxMulticastEpoch) {
            CnpUpdateMulticastEpoch(MulticastEpoch);
        }
    }

    CnReleaseLock( &Node->Lock, Node->Irql );

    //
    // when the first HB is recv'ed, a node may be in either the
    // join or alive state (the sponser, for instance, moves from
    // dead to alive). We need to clear the Node down issued flag
    // for either case. If the MM State is joining, then a node up
    // event must be issued as well. Note that we ignore HBs for
    // node health purposes on restricted nets.
    //

    if ( ( (Node->MMState == ClusnetNodeStateJoining)
           ||
           (Node->MMState == ClusnetNodeStateAlive)
         )
         &&
         Node->NodeDownIssued
         &&
         !CnpIsNetworkRestricted(Interface->Network)
       )
    {

        Node->NodeDownIssued = FALSE;
        MEMLOG( MemLogNodeDownIssued, Node->Id, FALSE );

        if ( Node->MMState == ClusnetNodeStateJoining ) {

            CnTrace(HBEAT_EVENT, HbTraceNodeUpEvent,
                "[HB] Issuing NodeUp event for node %u.",
                Node->Id // LOGULONG
                );   
            
            MEMLOG( MemLogNodeUp, Node->Id, 0 );

            CnIssueEvent( ClusnetEventNodeUp, Node->Id, 0 );
        }
    }

    return;

error_exit:

    CnReleaseLock( &Node->Lock, Node->Irql );
    return;

} // CnpReceiveHeartBeatMessage

NTSTATUS
CxSetOuterscreen(
    IN  ULONG Outerscreen
    )
{
    //
    // based on the number of valid nodes, make sure any extranious
    // bits are not set
    //

    CnAssert( ClusterDefaultMaxNodes <= 32 );
    CnAssert(
        ( Outerscreen & ( 0xFFFFFFFE << ( 32 - ClusterDefaultMaxNodes - 1 )))
        == 0);

    IF_CNDBG( CN_DEBUG_HBEATS )
        CNPRINT(("[CCMP] Setting outerscreen to %04X\n",
                 ((Outerscreen & 0xFF)<< 8) | ((Outerscreen >> 8) & 0xFF)));

    MMOuterscreen.UlongScreen = Outerscreen;

    CnTrace(HBEAT_EVENT, HbTraceSetOuterscreen,
        "[HB] Setting outerscreen to %04X",
        Outerscreen // LOGULONG
        );

    MEMLOG( MemLogOuterscreen, Outerscreen, 0 );

    return STATUS_SUCCESS;
} // CxSetOuterscreen

VOID
CnpTerminateClusterService(
    IN PVOID Parameter
    )
{
    PWORK_QUEUE_ITEM workQueueItem = Parameter;
    ULONG sourceNodeId = *((PULONG)(workQueueItem + 1));
    WCHAR sourceNodeStringId[ 16 ];

    swprintf(sourceNodeStringId, L"%u", sourceNodeId );

    //
    // only way we can get here right now is if a poison packet was received.
    //
    CnWriteErrorLogEntry(CLNET_NODE_POISONED,
                         STATUS_SUCCESS,
                         NULL,
                         0,
                         1,
                         sourceNodeStringId );

    if ( ClussvcProcessHandle ) {

        //
        // there is still a race condition between the cluster service shutting
        // down and closing this handle and it being used here. This really
        // isn't a problem since the user mode portion is going away anyway.
        // Besides, there isn't alot we can do if this call doesn't work anyway.
        //

        ZwTerminateProcess( ClussvcProcessHandle, STATUS_CLUSTER_POISONED );
    }

    CnFreePool( Parameter );
} // CnpTerminateClusterService

VOID
CnpReceivePoisonPacket(
    IN  PCNP_NETWORK   Network,
    IN  CL_NODE_ID SourceNodeId,
    IN  ULONG SeqNumber
    )
{
    PCNP_NODE Node;
    PCNP_INTERFACE Interface;
    PWORK_QUEUE_ITEM WorkItem;

    
    //
    // give the node and the network pointers, find the interface on which
    // this packet was received
    //

    Node = CnpFindNode( SourceNodeId );
    
    if ( Node == NULL ) {
        CnTrace(HBEAT_ERROR, HbTraceNoPoisonFromUnknownNode,
            "[HB] Discarding poison packet from unknown node %u.",
            SourceNodeId // LOGULONG
        );
        return;
    }

    Interface = CnpFindInterfaceLocked( Node, Network );

    if ( Interface == NULL ) {

        //
        // somehow this network object went away while we were
        // receiving some data on it. Just ignore this msg
        //
        CnTrace(HBEAT_ERROR, HbTracePoisonFromUnknownNetwork,
            "[HB] Discarding poison packet from node %u on unknown network.",
            Node->Id // LOGULONG
            );

        MEMLOG( MemLogNoNetID, Node->Id, (ULONG_PTR)Network );

        CnReleaseLock( &Node->Lock, Node->Irql );
        return;
    }

    //
    // Check that the incoming seq num is something we expect to
    // guard against replay attacks.
    //

    if ( SeqNumber <= Interface->LastSequenceReceived) {

        CnTrace(HBEAT_ERROR , HbTracePoisonOutOfSeq,
            "[HB] Discarding poison packet from node %u with stale seqno %u. "
            "Current seqno %u.",
            SourceNodeId, // LOGULONG
            SeqNumber, // LOGULONG
            Interface->LastSequenceReceived // LOGULONG
            );

        MEMLOG( MemLogOutOfSequence, SourceNodeId, SeqNumber );

        CnReleaseLock( &Node->Lock, Node->Irql );
        return;
    }

    //
    // Ignore poison packets from restricted networks
    //
    if (CnpIsNetworkRestricted(Network)) {

        CnTrace(HBEAT_ERROR , HbTracePoisonFromRestrictedNet,
            "[HB] Discarding poison packet from node %u on restricted "
            "network %u.",
            SourceNodeId, // LOGULONG
            Network->Id // LOGULONG
            );

        CnReleaseLock( &Node->Lock, Node->Irql );
        return;
    }

    //
    // We always honor a recv'ed poison packet.
    //

    CnReleaseLock( &Node->Lock, Node->Irql );

    CnTrace(HBEAT_EVENT, HbTracePoisonPktReceived,
        "[HB] Received poison packet from node %u. Halting this node.",
        SourceNodeId // LOGULONG
        );            

    MEMLOG( MemLogPoisonPktReceived, SourceNodeId, 0 );

    CnIssueEvent( ClusnetEventPoisonPacketReceived, SourceNodeId, 0 );

    //
    // Shutdown all cluster network processing.
    //
    CnHaltOperation(NULL);

    //
    // allocate a work queue item so we can whack the cluster service
    // process. allocate extra space at the end and stuff the source node ID
    // out there. Yes, I know it is groady...
    //

    WorkItem = CnAllocatePool( sizeof( WORK_QUEUE_ITEM ) + sizeof( CL_NODE_ID ));
    if ( WorkItem != NULL ) {

        *((PULONG)(WorkItem + 1)) = SourceNodeId;
        ExInitializeWorkItem( WorkItem, CnpTerminateClusterService, WorkItem );
        ExQueueWorkItem( WorkItem, CriticalWorkQueue );
    }
    
    return;

} // CnpReceivePoisonPacket

VOID
CnpLogClussvcHangAndTerminate(    
    IN PDEVICE_OBJECT DeviceObject,
    IN PVOID          Context
    )
/*++

Routine Description:

    This routine logs an entry into system event log about clussvc hang, and terminates the
    clussvc process.

Arguments:

    None

Return Value:

    None

--*/
    
{
    WCHAR myStr[40];

    swprintf(myStr, L"%u", ((ClussvcClusnetHbTimeoutTicks * HEART_BEAT_PERIOD)/1000));

    CnWriteErrorLogEntry(
        CLNET_CLUSSVC_HUNG_TERMINATE,
        STATUS_SUCCESS,
        NULL,
        0,
        1,
        myStr
        );

    if (ClussvcProcessHandle) {
        ZwTerminateProcess(ClussvcProcessHandle, STATUS_CLUSTER_NODE_DOWN); 
    }
    IoFreeWorkItem((PIO_WORKITEM)Context);
}//CnpLogClussvcHangAndTerminate

VOID
CnpLogClussvcHang(
    IN PDEVICE_OBJECT DeviceObject,
    IN PVOID          Context
    )
/*++

Routine Description:

    This routine logs an entry into system event log about clussvc hang.

Arguments:

    None

Return Value:

    None

--*/
    
{
    WCHAR myStr[40];

    swprintf(myStr, L"%u", ((ClussvcClusnetHbTimeoutTicks * HEART_BEAT_PERIOD)/1000));

    CnWriteErrorLogEntry(
        CLNET_CLUSSVC_HUNG,
        STATUS_SUCCESS,
        NULL,
        0,
        1,
        myStr
        );

    IoFreeWorkItem((PIO_WORKITEM)Context);
}//CnpLogClussvcHang
    
VOID
CnpCheckClussvcHang(
    VOID
    )
/*++

Routine Description:

    Check for HB ticks from Clussvc, if not disabled, and Tick count has reached max
    then take appropriate action depending on the configured value.

Arguments:

    None

Return Value:

    None

--*/
    
{

    ULONG newValue;
    
    // Check if heartbeating is disabled, then return.
    if((ClussvcClusnetHbTickCount == 0) || 
        (ClussvcClusnetHbTimeoutAction == ClussvcHangActionDisable)) {
        return;
    }

    // Decrement the counter by 1.
    newValue = InterlockedDecrement(&ClussvcClusnetHbTickCount);

    // If this is 1->0 transition we need to do something.
    if(newValue != 0)
        return;

    CnTrace(HBEAT_ERROR , HbTraceClussvcHang,
            "[HB] Clussvc to Clusnet HB Timeout, Timeout=%u DPC ticks, Action=%u.",
            ClussvcClusnetHbTimeoutTicks,
            ClussvcClusnetHbTimeoutAction
            );

    IF_CNDBG( CN_DEBUG_HBEATS ) {
        CNPRINT((
            "[HB] Clussvc to Clusnet HB Timeout, Timeout=%u DPC ticks, Action=%u\n",
            ClussvcClusnetHbTimeoutTicks,
            (ULONG)ClussvcClusnetHbTimeoutAction
            ));
    }

    CnAssert(ClussvcClusnetHbTimeoutAction< ClussvcHangActionMax);
    
    switch(ClussvcClusnetHbTimeoutAction) {

        case ClussvcHangActionLog:
            // Just log a message and reset ClussvcClusnetHbTickCount to ClussvcClusnetHbTimeoutTicks
            // Use DelayedWorkQueue
            {
                PIO_WORKITEM WorkItem;    

                WorkItem = IoAllocateWorkItem(CnDeviceObject);
                if ( WorkItem != NULL ) {
                    IoQueueWorkItem(
                        WorkItem,
                        CnpLogClussvcHang,
                        DelayedWorkQueue,
                        (PVOID)WorkItem
                        );
                }
                InterlockedExchange(&ClussvcClusnetHbTickCount, ClussvcClusnetHbTimeoutTicks);
            }
            break;

        case ClussvcHangActionBugCheckMachine:
            // Bugcheck the machine.
            {
                KeBugCheckEx(
                    USER_MODE_HEALTH_MONITOR,
                    (ULONG_PTR)((ClussvcProcessHandle != NULL) ? ClussvcProcessObject : NULL),
                    (ULONG_PTR)(ClussvcClusnetHbTimeoutSeconds),
                    0,
                    0
                    );
            }
            break;

        case ClussvcHangActionTerminateService:
        default:    
            // Terminate Cluster Service. Handling is similar to the case as if clusnet has
            // received a poison packet. Using Critical work queue.
            {
                KIRQL        irql;

                // If we have already run through this terminate path,
                // then we do not do it again. The workitem will already
                // be on the critical work queue (even if it has not yet
                // executed).
                CnAcquireLock(&HeartBeatLock, &irql);

                if (ClussvcTerminateWorkItem != NULL) {

                    PIO_WORKITEM WorkItem;

                    // Swap out the workitem.
                    WorkItem = ClussvcTerminateWorkItem;
                    ClussvcTerminateWorkItem = NULL;

                    // Stop outgoing heartbeats.
                    ClussvcTerminateStopHbs = TRUE;
                    
                    CnReleaseLock(&HeartBeatLock, irql);

                    // Issue halt event so clusdisk stops reservations.
                    CnIssueEvent(ClusnetEventHalt, 0, 0);

                    // Stop normal clusnet activity.
                    CnHaltOperation(NULL);

                    // Queue the critical workitem to terminate the
                    // service process.
                    IoQueueWorkItem(
                        WorkItem,
                        CnpLogClussvcHangAndTerminate,
                        CriticalWorkQueue,
                        (PVOID)WorkItem
                        );

                 } else {
                    CnReleaseLock(&HeartBeatLock, irql);
                }
            }
            break;
    }    
}//CnpCheckClussvcHang

VOID
CnpWalkInterfacesAfterRegroup(
    IN  PCNP_INTERFACE   Interface
    )

/*++

Routine Description:

    Reset counters for each interface after a regroup

Arguments:

    None

Return Value:

    None

--*/

{
    InterlockedExchange(&Interface->MissedHBs, 0);
    CnReleaseLock(&Interface->Network->Lock, Interface->Network->Irql);

} // CnpWalkInterfacesAfterRegroup

BOOLEAN
CnpWalkNodesAfterRegroup(
    IN  PCNP_NODE   Node,
    IN  PVOID       UpdateContext,
    IN  CN_IRQL     NodeTableIrql
    )

/*++

Routine Description:

    Called for each node in the node table. Regroup has finished
    so we clear the node's missed Heart beat count and its node down
    issued flag. No node should be unreachable at this point. If we
    find one, kick off another regroup.

Arguments:

    standard...

Return Value:

    None

--*/

{
    //
    // check for inconsistent settings of Comm and MM state
    //
    if ( ( Node->MMState == ClusnetNodeStateAlive
           ||
           Node->MMState == ClusnetNodeStateJoining
         )
         &&
         Node->CommState == ClusnetNodeCommStateUnreachable
       )
    {

        CnTrace(HBEAT_EVENT, HbTraceNodeDownEvent2,
            "[HB] Issuing NodeDown event for node %u.",
            Node->Id // LOGULONG
            );
    
        MEMLOG( MemLogInconsistentStates, Node->Id, Node->MMState );
        CnIssueEvent( ClusnetEventNodeDown, Node->Id, 0 );
    }

    CnpWalkInterfacesOnNode( Node, (PVOID)CnpWalkInterfacesAfterRegroup );

    InterlockedExchange(&Node->MissedHBs, 0);

    //
    // clear this only for nodes in the alive state. Once a node is marked
    // dead, the flag is re-init'ed to true (this is used during a join to
    // issue only one node up event).
    //

    if ( Node->MMState == ClusnetNodeStateAlive ) {

        Node->NodeDownIssued = FALSE;
        MEMLOG( MemLogNodeDownIssued, Node->Id, FALSE );
    }

    CnReleaseLock( &Node->Lock, Node->Irql );

    return TRUE;       // the node table lock is still held

} // CnpWalkNodesAfterRegroup


VOID
CxRegroupFinished(
    ULONG NewEventEpoch,
    ULONG NewRegroupEpoch
    )

/*++

Routine Description:

    called when regroup has finished. Walk the node list and
    perform the cleanup in the walk routine.

Arguments:

    None

Return Value:

    None

--*/

{
    MEMLOG( MemLogRegroupFinished, NewEventEpoch, 0 );

    CnTrace(HBEAT_EVENT, HbTraceRegroupFinished,
        "[HB] Regroup finished, new event epoch = %u, "
        "new regroup epoch = %u.",
        NewEventEpoch, // LOGULONG
        NewRegroupEpoch // LOGULONG
        );

    CnAssert( NewEventEpoch >= EventEpoch );
    EventEpoch = NewEventEpoch;

    if (NewRegroupEpoch > CxMulticastEpoch) {
        CnpUpdateMulticastEpoch(NewRegroupEpoch);
    }

    CnpWalkNodeTable( CnpWalkNodesAfterRegroup, NULL );
} // CxRegroupFinished


VOID
CnpUpdateMulticastEpoch(
    ULONG NewEpoch
    )
/*++

Routine Description:

    The Multicast Epoch must be monotonically increasing
    and agreed upon by all nodes. It is based on the
    regroup epoch (not to be confused with the ClusNet
    event epoch, which is local to each node).
    
    It is conceivable for a stale regroup epoch update
    to occur; thus, only update if the new value is 
    greater than the current value.

Arguments:

    NewEpoch - new epoch number

Return value:

    None

--*/
{
    KIRQL irql;

    CnAcquireLock(&HeartBeatLock, &irql);

    if (NewEpoch > CxMulticastEpoch) {

        CnTrace(HBEAT_EVENT, HbTraceUpdateMulticastEpoch,
            "[HB] Updating multicast epoch from %u to %u.",
            CxMulticastEpoch, NewEpoch
            );

        CxMulticastEpoch = NewEpoch;
    }

    CnReleaseLock(&HeartBeatLock, irql);
    
} // CnpUpdateMulticastEpoch

/* end chbeat.c */