Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

2455 lines
68 KiB

/*++
Copyright (c) 1996 Microsoft Corporation
Module Name:
chbeat.c
Abstract:
membership state heart beat code. Tracks node availability through
exchanging heart beat messages with nodes that are marked as alive.
Author:
Charlie Wickham (charlwi) 05-Mar-1997
Environment:
Kernel Mode
Revision History:
--*/
#include "precomp.h"
#pragma hdrstop
#include "chbeat.tmh"
#include "clusvmsg.h"
#include "stdio.h"
/* External */
/* Static */
//
// heart beat structures - heart beats are driven by a timer and DPC
// routine. In order to synchronize the shutdown of the DPC, we also need two
// flags, an event and a spin lock.
//
KTIMER HeartBeatTimer;
KDPC HeartBeatDpc;
KEVENT HeartBeatDpcFinished;
BOOLEAN HeartBeatEnabled = FALSE;
BOOLEAN HeartBeatDpcRunning = FALSE;
CN_LOCK HeartBeatLock;
#if 0
Heart Beating Explained
ClockTicks are incremented every HEART_BEAT_PERIOD millisecs. SendTicks are the
number of ticks that go by before sending HBs.
The check for received HB msgs is done in the tick just before HB msgs are
sent. Interface Lost HB ticks are in terms of heart beat check periods and
therefore are incremented only during the check period. An interface is failed
when the number of Interface Lost HB ticks have passed and no HB message has
been received on that interface.
Likewise, Node Lost HB Ticks are in terms of heart beat check periods and are
incremented during the check period. After all interfaces have failed on a
node, Node Lost HB ticks must pass without an interface going back online
before a node down event is issued. Note that a node's comm state is set to
offline when all interfaces have failed.
#endif
#define CLUSNET_HEART_BEAT_SEND_TICKS 2 // every 1.2 secs
#define CLUSNET_INTERFACE_LOST_HEART_BEAT_TICKS 3 // after 3 secs
#define CLUSNET_NODE_LOST_HEART_BEAT_TICKS 6 // after 6.6 secs
ULONG HeartBeatClockTicks;
ULONG HeartBeatSendTicks = CLUSNET_HEART_BEAT_SEND_TICKS;
ULONG HBInterfaceLostHBTicks = CLUSNET_INTERFACE_LOST_HEART_BEAT_TICKS;
ULONG HBNodeLostHBTicks = CLUSNET_NODE_LOST_HEART_BEAT_TICKS;
//
// Unicast Heartbeat Data
//
// Even with multicast heartbeats, unicast heartbeats must be supported
// for backwards compatibility.
//
//
// This array records all the nodes that need to have a HB sent to another
// node. This array is not protected by a lock since it is only used with the
// heartbeat DPC routine.
//
typedef struct _INTERFACE_HEARTBEAT_INFO {
CL_NODE_ID NodeId;
CL_NETWORK_ID NetworkId;
ULONG SeqNumber;
ULONG AckNumber;
} INTERFACE_HEARTBEAT_INFO, *PINTERFACE_HEARTBEAT_INFO;
#define InterfaceHBInfoInitialLength 16
#define InterfaceHBInfoLengthIncrement 4
PINTERFACE_HEARTBEAT_INFO InterfaceHeartBeatInfo = NULL;
ULONG InterfaceHBInfoCount; // running count while sending HBs
ULONG InterfaceHBInfoCurrentLength; // current length of HB info array
LARGE_INTEGER HBTime; // HB time in relative sys time
#define MAX_DPC_SKEW ( -HBTime.QuadPart / 2 )
//
// Outerscreen mask. This is set by clussvc's membership manager in user
// mode. As it changes, MM drops down the set outerscreen Ioctl to update
// clusnet's notion of this mask. Clusnet uses this mask to determine the
// validity of a received heart beat. If the sending node is not part
// of the mask, then it is sent a poison packet and the received event
// is not passed on to other consumers. If it is a legetimate PP, then
// we generate the proper event.
//
// Note: MM type definitions and macros have been moved to cnpdef.h for
// general usage.
//
typedef CX_CLUSTERSCREEN CX_OUTERSCREEN;
CX_OUTERSCREEN MMOuterscreen;
// Multicast Heartbeat Data
//
typedef struct _NETWORK_MCAST_HEARTBEAT_INFO {
CL_NETWORK_ID NetworkId;
PCNP_MULTICAST_GROUP McastGroup;
CX_HB_NODE_INFO NodeInfo[ClusterDefaultMaxNodes+ClusterMinNodeId];
CX_CLUSTERSCREEN McastTarget;
} NETWORK_MCAST_HEARTBEAT_INFO, *PNETWORK_MCAST_HEARTBEAT_INFO;
#define NetworkHBInfoInitialLength 4
#define NetworkHBInfoLengthIncrement 4
PNETWORK_MCAST_HEARTBEAT_INFO NetworkHeartBeatInfo = NULL;
ULONG NetworkHBInfoCount; // running count while sending HBs
ULONG NetworkHBInfoCurrentLength; // current length of HB info array
CL_NETWORK_ID MulticastBestNetwork = ClusterAnyNetworkId;
ULONG CxMulticastEpoch = 0;
//
// Declarations for Clussvc to Clusnet Heartbeating.
//
ULONG ClussvcClusnetHbTimeoutTicks = 0;
ClussvcHangAction ClussvcClusnetHbTimeoutAction = ClussvcHangActionDisable;
ULONG ClussvcClusnetHbTickCount = 0;
BOOLEAN ClussvcTerminateStopHbs = FALSE;
PIO_WORKITEM ClussvcTerminateWorkItem = NULL;
// Parameters for the Clussvc to Clusnet Heartbeating bugcheck. These are
// for informational purposes only and should not otherwise be used. For
// instance, the process object is dereferenced immediately after the
// pointer is determined.
PEPROCESS ClussvcProcessObject = NULL;
ULONG ClussvcClusnetHbTimeoutSeconds = 0;
/* Forward */
NTSTATUS
CxInitializeHeartBeat(
void
);
VOID
CxUnloadHeartBeat(
VOID
);
VOID
CnpHeartBeatDpc(
PKDPC DpcObject,
PVOID DeferredContext,
PVOID Arg1,
PVOID Arg2
);
BOOLEAN
CnpWalkNodesToSendHeartBeats(
IN PCNP_NODE UpdateNode,
IN PVOID UpdateContext,
IN CN_IRQL NodeTableIrql
);
BOOLEAN
CnpWalkNodesToCheckForHeartBeats(
IN PCNP_NODE UpdateNode,
IN PVOID UpdateContext,
IN CN_IRQL NodeTableIrql
);
VOID
CnpSendHBs(
IN PCNP_INTERFACE UpdateInterface
);
NTSTATUS
CxSetOuterscreen(
IN ULONG Outerscreen
);
VOID
CnpReceivePoisonPacket(
IN PCNP_NETWORK Network,
IN CL_NODE_ID SourceNodeId,
IN ULONG SeqNumber
);
VOID
CnpUpdateMulticastEpoch(
ULONG NewEpoch
);
VOID
CnpCheckClussvcHang(
VOID
);
VOID
CnpLogClussvcHangAndTerminate(
IN PDEVICE_OBJECT DeviceObject,
IN PVOID Context
);
VOID
CnpLogClussvcHang(
IN PDEVICE_OBJECT DeviceObject,
IN PVOID Context
);
/* End Forward */
#ifdef ALLOC_PRAGMA
#pragma alloc_text(INIT, CxInitializeHeartBeat)
#pragma alloc_text(PAGE, CxUnloadHeartBeat)
#endif // ALLOC_PRAGMA
NTSTATUS
CxInitializeHeartBeat(
void
)
/*++
Routine Description:
Init the mechanisms used to send and monitor heart beats
Arguments:
None
Return Value:
STATUS_INSUFFICIENT_RESOURCES if allocation fails.
STATUS_SUCCESS otherwise.
--*/
{
// allocate the interface info array
InterfaceHBInfoCount = 0;
InterfaceHBInfoCurrentLength = InterfaceHBInfoInitialLength;
if (InterfaceHBInfoCurrentLength > 0) {
InterfaceHeartBeatInfo = CnAllocatePool(
InterfaceHBInfoCurrentLength
* sizeof(INTERFACE_HEARTBEAT_INFO)
);
if (InterfaceHeartBeatInfo == NULL) {
return(STATUS_INSUFFICIENT_RESOURCES);
}
}
// allocate the network info array
NetworkHBInfoCount = 0;
NetworkHBInfoCurrentLength = NetworkHBInfoInitialLength;
if (NetworkHBInfoCurrentLength > 0) {
NetworkHeartBeatInfo = CnAllocatePool(
NetworkHBInfoCurrentLength
* sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
);
if (NetworkHeartBeatInfo == NULL) {
return(STATUS_INSUFFICIENT_RESOURCES);
}
RtlZeroMemory(
NetworkHeartBeatInfo,
NetworkHBInfoCurrentLength * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
);
}
KeInitializeTimer( &HeartBeatTimer );
KeInitializeDpc( &HeartBeatDpc, CnpHeartBeatDpc, NULL );
KeInitializeEvent( &HeartBeatDpcFinished, SynchronizationEvent, FALSE );
CnInitializeLock( &HeartBeatLock, CNP_HBEAT_LOCK );
MEMLOG( MemLogInitHB, 0, 0 );
return(STATUS_SUCCESS);
} // CxInitializeHeartBeat
VOID
CxUnloadHeartBeat(
VOID
)
/*++
Routine Description:
Called during clusnet driver unload. Free any data structures
allocated to send and monitor heartbeats.
Arguments:
None
Return Value:
None
--*/
{
PAGED_CODE();
if (InterfaceHeartBeatInfo != NULL) {
CnFreePool(InterfaceHeartBeatInfo);
InterfaceHeartBeatInfo = NULL;
}
if (NetworkHeartBeatInfo != NULL) {
CnFreePool(NetworkHeartBeatInfo);
NetworkHeartBeatInfo = NULL;
}
return;
} // CxUnloadHeartBeat
NTSTATUS
CnpStartHeartBeats(
VOID
)
/*++
Routine Description:
Start heart beating with the nodes that are marked alive and have
an interface marked either OnlinePending or Online.
Arguments:
None
Return Value:
STATUS_INSUFFICIENT_RESOURCES if the workitem allocation fails
--*/
{
BOOLEAN TimerInserted;
CN_IRQL OldIrql;
ULONG period = HEART_BEAT_PERIOD;
//
// Pre-allocate a workitem in case we need an emergency
// termination of the cluster service due to a user-mode
// hang.
// No need to take the lock before the allocation and
// assignment, since below is the first place the lock
// is acquired as the service starts.
CnAssert(ClussvcTerminateWorkItem == NULL);
ClussvcTerminateWorkItem = IoAllocateWorkItem(CnDeviceObject);
if (ClussvcTerminateWorkItem == NULL) {
CnTrace(HBEAT_EVENT, HbTraceTerminateWorkItemAlloc,
"[HB] Failed to pre-allocate clussvc termination "
"workitem.\n"
);
return(STATUS_INSUFFICIENT_RESOURCES);
}
CnAcquireLock( &HeartBeatLock, &OldIrql );
HBTime.QuadPart = Int32x32To64( HEART_BEAT_PERIOD, -10000 );
TimerInserted = KeSetTimerEx(&HeartBeatTimer,
HBTime,
HEART_BEAT_PERIOD,
&HeartBeatDpc);
HeartBeatEnabled = TRUE;
ClussvcTerminateStopHbs = FALSE;
CnTrace(HBEAT_EVENT, HbTraceTimerStarted,
"[HB] Heartbeat timer started. Period = %u ms.",
period // LOGULONG
);
MEMLOG( MemLogHBStarted, HEART_BEAT_PERIOD, 0 );
CnReleaseLock( &HeartBeatLock, OldIrql );
return(STATUS_SUCCESS);
} // CnpStartHeartBeats
VOID
CnpStopHeartBeats(
VOID
)
/*++
Routine Description:
Stop heart beating with other nodes in the cluster.
Arguments:
None
Return Value:
None
--*/
{
BOOLEAN TimerCanceled;
CN_IRQL OldIrql;
PIO_WORKITEM FreeWorkItem = NULL;
CnAcquireLock( &HeartBeatLock, &OldIrql );
if (HeartBeatEnabled) {
HeartBeatEnabled = FALSE;
//
// Cancel the periodic timer. Contrary to what the DDK implies,
// this does not cancel the DPC if it is still queued from the
// last timer expiration. It only stops the timer from firing
// again. This is true as of 8/99. See KiTimerListExpire() in
// ntos\ke\dpcsup.c.
//
TimerCanceled = KeCancelTimer( &HeartBeatTimer );
CnTrace(HBEAT_DETAIL, HbTraceTimerCancelled,
"[HB] Heartbeat timer cancelled: %!bool!",
TimerCanceled // LOGBOOLEAN
);
MEMLOG( MemLogHBStopped, 0, 0 );
//
// Remove the DPC associated with the timer from the system DPC
// queue, if it is there. This actually does nothing, because a
// timer DPC is only inserted into the system DPC queue if it is
// bound to a specific processor. Unbound DPCs are executed inline
// on the current processor in the kernel's timer expiration code.
// Note that the object for a periodic timer is reinserted into the
// timer queue before the DPC is excuted. So, it is possible for the
// timer and the associated DPC to be queued simultaneously. This is
// true as of 8/99. See KiTimerListExpire() in ntos\ke\dpcsup.c.
//
// The bottom line is that there is no safe way to synchronize with
// the execution of a timer DPC during driver unload. All we can
// do is ensure that the DPC handler code recognizes that it should
// abort execution immediately and hope that it does so before the
// driver code is unloaded. We do this by setting the HeartBeatEnabled
// flag to False above. If our DPC code happens to be executing at
// this point in time on another processor, as denoted by
// HeartBeatDpcRunning, we wait for it to finish.
//
if ( !KeRemoveQueueDpc( &HeartBeatDpc )) {
CnTrace(HBEAT_DETAIL, HbTraceDpcRunning,
"[HB] DPC not removed. HeartBeatDpcRunning = %!bool!",
HeartBeatDpcRunning // LOGBOOLEAN
);
MEMLOG( MemLogHBDpcRunning, HeartBeatDpcRunning, 0 );
if ( HeartBeatDpcRunning ) {
CnReleaseLock( &HeartBeatLock, OldIrql );
CnTrace(HBEAT_DETAIL, HbWaitForDpcToFinish,
"can't remove DPC; waiting on DPCFinished event"
);
MEMLOG( MemLogWaitForDpcFinish, 0, 0 );
KeWaitForSingleObject(&HeartBeatDpcFinished,
Executive,
KernelMode,
FALSE, // not alertable
NULL); // no timeout
KeClearEvent( &HeartBeatDpcFinished );
CnAcquireLock( &HeartBeatLock, &OldIrql);
}
}
CnTrace(HBEAT_EVENT, HbTraceTimerStopped,
"[HB] Heartbeat timer stopped."
);
}
//
// If the pre-allocated workitem was not used, we need to
// free it to remove the reference on the clusnet device object.
//
FreeWorkItem = ClussvcTerminateWorkItem;
ClussvcTerminateWorkItem = NULL;
CnReleaseLock( &HeartBeatLock, OldIrql );
if (FreeWorkItem != NULL) {
IoFreeWorkItem(FreeWorkItem);
}
return;
} // CnpStopHeartBeats
VOID
CnpSendMcastHBCompletion(
IN NTSTATUS Status,
IN ULONG BytesSent,
IN PVOID Context,
IN PVOID Buffer
)
/*++
Routine Description:
Called when a mcast heartbeat send request completes
successfully or unsuccessfully. Dereferences the
McastGroup data structure.
Arguments:
Status - status of request
BytesSent - not used
Context - points to multicast group data structure
Buffer - not used
Return value:
None.
--*/
{
PCNP_MULTICAST_GROUP mcastGroup = (PCNP_MULTICAST_GROUP) Context;
CnAssert(mcastGroup != NULL);
CnpDereferenceMulticastGroup(mcastGroup);
return;
} // CnpSendMcastHBCompletion
NTSTATUS
CnpSendMcastHB(
IN PCNP_INTERFACE Interface
)
/*++
Routine Description:
Writes multicast heartbeat data into the NetworkHeartBeatInfo
array for target Interface.
Notes:
Called from DPC with Network and Node locks held.
Returns with Network and Node locks held.
--*/
{
ULONG i;
BOOLEAN networkConnected;
// find the network info structure for this network
for (i = 0; i < NetworkHBInfoCount; i++) {
if (NetworkHeartBeatInfo[i].NetworkId
== Interface->Network->Id) {
break;
}
}
// start a new network info structure, if necessary
if (i == NetworkHBInfoCount) {
// before claiming an entry in the network info array,
// make sure the array is large enough
if (NetworkHBInfoCount >= NetworkHBInfoCurrentLength) {
// need to allocate a new network info array
PNETWORK_MCAST_HEARTBEAT_INFO tempInfo = NULL;
PNETWORK_MCAST_HEARTBEAT_INFO freeInfo = NULL;
ULONG tempLength;
tempLength = NetworkHBInfoCurrentLength
+ NetworkHBInfoLengthIncrement;
tempInfo = CnAllocatePool(
tempLength
* sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
);
if (tempInfo == NULL) {
CnTrace(
HBEAT_DETAIL, HbNetInfoArrayAllocFailed,
"[HB] Failed to allocate network heartbeat info "
"array of length %u. Cannot schedule heartbeat "
"for node %u on network %u.",
tempLength,
Interface->Node->Id,
Interface->Network->Id
);
// cannot continue. the failure to send this
// heartbeat will not be fatal if we recover
// quickly. if we do not recover, this node
// will be poisoned, which is probably best
// since it is dangerously low on nonpaged pool.
return(STATUS_INSUFFICIENT_RESOURCES);
} else {
// the allocation was successful. establish
// the new array as the heartbeat info
// array.
RtlZeroMemory(
tempInfo,
tempLength * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
);
freeInfo = NetworkHeartBeatInfo;
NetworkHeartBeatInfo = tempInfo;
NetworkHBInfoCurrentLength = tempLength;
if (freeInfo != NULL) {
if (NetworkHBInfoCount > 0) {
RtlCopyMemory(
NetworkHeartBeatInfo,
freeInfo,
NetworkHBInfoCount
* sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
);
}
CnFreePool(freeInfo);
}
CnTrace(
HBEAT_DETAIL, HbNetInfoArrayLengthIncreased,
"[HB] Increased network heartbeat info array "
"to size %u.",
NetworkHBInfoCurrentLength
);
}
}
// increment the current counter
NetworkHBInfoCount++;
// initialize the information for this structure
RtlZeroMemory(
&NetworkHeartBeatInfo[i].McastTarget,
sizeof(NetworkHeartBeatInfo[i].McastTarget)
);
NetworkHeartBeatInfo[i].NetworkId = Interface->Network->Id;
NetworkHeartBeatInfo[i].McastGroup =
Interface->Network->CurrentMcastGroup;
CnpReferenceMulticastGroup(NetworkHeartBeatInfo[i].McastGroup);
}
networkConnected = (BOOLEAN)(!CnpIsNetworkLocalDisconn(Interface->Network));
CnTrace(HBEAT_DETAIL, HbTraceScheduleMcastHBForInterface,
"[HB] Scheduling multicast HB for node %u on network %u "
"(I/F state = %!ifstate!) "
"(interface media connected = %!bool!).",
Interface->Node->Id, // LOGULONG
Interface->Network->Id, // LOGULONG
Interface->State, // LOGIfState
networkConnected
);
// fill in the network info for this node/interface
NetworkHeartBeatInfo[i].NodeInfo[Interface->Node->Id].SeqNumber =
Interface->SequenceToSend;
NetworkHeartBeatInfo[i].NodeInfo[Interface->Node->Id].AckNumber =
Interface->LastSequenceReceived;
CnpClusterScreenInsert(
NetworkHeartBeatInfo[i].McastTarget.ClusterScreen,
INT_NODE(Interface->Node->Id)
);
return(STATUS_SUCCESS);
} // CnpSendMcastHB
NTSTATUS
CnpSendUcastHB(
IN PCNP_INTERFACE Interface
)
/*++
Routine Description:
Writes unicast heartbeat data into the InterfaceHeartBeatInfo
array for target Interface.
Notes:
Called from DPC with Network and Node locks held.
Returns with Network and Node locks held.
--*/
{
BOOLEAN networkConnected;
// before filling an entry in the heartbeat info array,
// make sure the array is large enough.
if (InterfaceHBInfoCount >= InterfaceHBInfoCurrentLength) {
// need to allocate a new heartbeat info array
PINTERFACE_HEARTBEAT_INFO tempInfo = NULL;
PINTERFACE_HEARTBEAT_INFO freeInfo = NULL;
ULONG tempLength;
tempLength = InterfaceHBInfoCurrentLength
+ InterfaceHBInfoLengthIncrement;
tempInfo = CnAllocatePool(
tempLength * sizeof(INTERFACE_HEARTBEAT_INFO)
);
if (tempInfo == NULL) {
CnTrace(
HBEAT_DETAIL, HbInfoArrayAllocFailed,
"[HB] Failed to allocate heartbeat info "
"array of length %u. Cannot schedule heartbeat "
"for node %u on network %u.",
tempLength,
Interface->Node->Id,
Interface->Network->Id
);
// cannot continue. the failure to send this
// heartbeat will not be fatal if we recover
// quickly. if we do not recover, this node
// will be poisoned, which is probably best
// since it is dangerously low on nonpaged pool.
return(STATUS_INSUFFICIENT_RESOURCES);
} else {
// the allocation was successful. establish
// the new array as the heartbeat info
// array.
freeInfo = InterfaceHeartBeatInfo;
InterfaceHeartBeatInfo = tempInfo;
InterfaceHBInfoCurrentLength = tempLength;
if (freeInfo != NULL) {
if (InterfaceHBInfoCount > 0) {
RtlCopyMemory(
InterfaceHeartBeatInfo,
freeInfo,
InterfaceHBInfoCount * sizeof(INTERFACE_HEARTBEAT_INFO)
);
}
CnFreePool(freeInfo);
}
CnTrace(
HBEAT_DETAIL, HbInfoArrayLengthIncreased,
"[HB] Increased heartbeat info array to size %u.",
InterfaceHBInfoCurrentLength
);
}
}
networkConnected = (BOOLEAN)(!CnpIsNetworkLocalDisconn(Interface->Network));
CnTrace(HBEAT_DETAIL, HbTraceScheduleHBForInterface,
"[HB] Scheduling HB for node %u on network %u (I/F state = %!ifstate!) "
"(interface media connected = %!bool!).",
Interface->Node->Id, // LOGULONG
Interface->Network->Id, // LOGULONG
Interface->State, // LOGIfState
networkConnected
);
InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].NodeId = Interface->Node->Id;
InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].SeqNumber =
Interface->SequenceToSend;
InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].AckNumber =
Interface->LastSequenceReceived;
InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].NetworkId = Interface->Network->Id;
++InterfaceHBInfoCount;
return(STATUS_SUCCESS);
} // CnpSendUcastHB
VOID
CnpSendHBs(
IN PCNP_INTERFACE Interface
)
/*++
Routine Description:
If Interface is in the correct state then stuff an entry in
the heartbeat info array. Expand the heartbeat info
array if necessary.
Arguments:
Interface - target interface for heartbeat message
Return Value:
None
--*/
{
BOOLEAN mcastOnly = FALSE;
if ( Interface->State >= ClusnetInterfaceStateUnreachable ) {
// increment the sequence number
(Interface->SequenceToSend)++;
// check if we should include this interface in a
// multicast heartbeat. first we verify that the
// network is multicast capable. then, we include it
// if either of the following conditions are true:
// - we have received a multicast heartbeat from the
// target interface
// - the discovery count (the number of discovery mcasts
// left to send to the target interface) is greater
// than zero
if (CnpIsNetworkMulticastCapable(Interface->Network)) {
if (CnpInterfaceQueryReceivedMulticast(Interface)) {
// write the mcast heartbeat data. if not
// successful, attempt a unicast heartbeat.
if (CnpSendMcastHB(Interface) == STATUS_SUCCESS) {
mcastOnly = TRUE;
}
} else if (Interface->McastDiscoverCount > 0) {
// write the mcast heartbeat data for a
// discovery. if successful, decrement the
// discovery count.
if (CnpSendMcastHB(Interface) == STATUS_SUCCESS) {
--Interface->McastDiscoverCount;
// if the discovery count has reached zero,
// set the rediscovery countdown. this is
// the number of heartbeat periods until we
// try discovery again.
if (Interface->McastDiscoverCount == 0) {
Interface->McastRediscoveryCountdown =
CNP_INTERFACE_MCAST_REDISCOVERY;
}
}
} else if (Interface->McastRediscoveryCountdown > 0) {
// decrement the rediscovery countdown. if we
// reach zero, we will start multicast discovery
// on the next heartbeat to this interface.
if (--Interface->McastRediscoveryCountdown == 0) {
Interface->McastDiscoverCount =
CNP_INTERFACE_MCAST_DISCOVERY;
}
}
}
// write unicast heartbeat data
if (!mcastOnly) {
CnpSendUcastHB(Interface);
}
}
CnReleaseLock(&Interface->Network->Lock, Interface->Network->Irql);
return;
} // CnpSendHBs
VOID
CnpCheckForHBs(
IN PCNP_INTERFACE Interface
)
/*++
Routine Description:
Check if heart beats have been received for this interface
Arguments:
None
Return Value:
None
--*/
{
ULONG MissedHBCount;
BOOLEAN NetworkLockReleased = FALSE;
if ( Interface->State >= ClusnetInterfaceStateUnreachable
&& !CnpIsNetworkLocalDisconn(Interface->Network) ) {
MissedHBCount = InterlockedIncrement( &Interface->MissedHBs );
if ( MissedHBCount == 1 ) {
//
// a HB was received in time for this node. Clear the status
// info associated with this interface, but also mark the node
// as having an interface that is ok. Note that we do not
// use HBs on restricted nets to determine node health.
//
if (!CnpIsNetworkRestricted(Interface->Network)) {
Interface->Node->HBWasMissed = FALSE;
}
CnTrace(HBEAT_DETAIL, HbTraceHBReceivedForInterface,
"[HB] A HB was received from node %u on net %u in this "
"period.",
Interface->Node->Id, // LOGULONG
Interface->Network->Id // LOGULONG
);
} else {
CnTrace(HBEAT_EVENT, HbTraceMissedIfHB,
"[HB] HB MISSED for node %u on net %u, missed count %u.",
Interface->Node->Id, // LOGULONG
Interface->Network->Id, // LOGULONG
MissedHBCount // LOGULONG
);
MEMLOG4(
MemLogMissedIfHB,
(ULONG_PTR)Interface, MissedHBCount,
Interface->Node->Id,
Interface->Network->Id
);
if ( MissedHBCount >= HBInterfaceLostHBTicks &&
Interface->State >= ClusnetInterfaceStateOnlinePending ) {
//
// interface is either online pending or online, so move it
// to unreachable. CnpFailInterface will also mark the node
// unreachable if all of the node's interfaces are unreachable.
// CnpFailInterface releases the network object lock as part
// of its duties.
//
CnTrace(HBEAT_DETAIL, HbTraceFailInterface,
"[HB] Moving I/F for node %u on net %u to failed state, "
"previous I/F state = %!ifstate!.",
Interface->Node->Id, // LOGULONG
Interface->Network->Id, // LOGULONG
Interface->State // LOGIfState
);
//
// continuation log entries go before the main entry since
// we scan the log backwards, i.e., we'll hit FailingIf
// before we hit FailingIf1.
//
MEMLOG4(
MemLogFailingIf,
(ULONG_PTR)Interface,
Interface->State,
Interface->Node->Id,
Interface->Network->Id
);
CnpFailInterface( Interface );
NetworkLockReleased = TRUE;
//
// issue a net interface unreachable event to let consumers
// know what is happening
//
CnTrace(HBEAT_EVENT, HbTraceInterfaceUnreachableEvent,
"[HB] Issuing InterfaceUnreachable event for node %u "
"on net %u, previous I/F state = %!ifstate!.",
Interface->Node->Id, // LOGULONG
Interface->Network->Id, // LOGULONG
Interface->State // LOGIfState
);
CnIssueEvent(ClusnetEventNetInterfaceUnreachable,
Interface->Node->Id,
Interface->Network->Id);
}
}
}
if ( !NetworkLockReleased ) {
CnReleaseLock(&Interface->Network->Lock,
Interface->Network->Irql);
}
return;
} // CnpCheckForHBs
BOOLEAN
CnpWalkNodesToSendHeartBeats(
IN PCNP_NODE Node,
IN PVOID UpdateContext,
IN CN_IRQL NodeTableIrql
)
/*++
Routine Description:
Support routine called for each node in the node table. If node is
alive, then we walk its interfaces, performing the appropriate
action.
Arguments:
None
Return Value:
None
--*/
{
//
// If this node is alive and not the local node, then walk its
// interfaces, supplying the appropriate routine to use at this time
//
if ( Node->MMState == ClusnetNodeStateAlive &&
Node != CnpLocalNode ) {
CnTrace(HBEAT_DETAIL, HbTraceScheduleHBForNode,
"[HB] Scheduling HBs for node %u (state = %!mmstate!).",
Node->Id, // LOGULONG
Node->MMState // LOGMmState
);
MEMLOG( MemLogSendHBWalkNode, Node->Id, Node->MMState );
CnpWalkInterfacesOnNode( Node, (PVOID)CnpSendHBs );
}
CnReleaseLock( &Node->Lock, Node->Irql );
return TRUE; // the node table lock is still held
} // CnpWalkNodesToSendHeartBeats
BOOLEAN
CnpWalkNodesToCheckForHeartBeats(
IN PCNP_NODE Node,
IN PVOID UpdateContext,
IN CN_IRQL NodeTableIrql
)
/*++
Routine Description:
heart beat checking routine called for each node in the node table
(except for the local node). If node is alive, then we walk its
interfaces, performing the appropriate action.
Arguments:
None
Return Value:
None
--*/
{
BOOLEAN NodeWasReachable;
ULONG MissedHBCount;
if ( Node->MMState == ClusnetNodeStateAlive &&
Node != CnpLocalNode ) {
//
// this node is alive, so walk its interfaces. Assume the
// worst by setting the HB Missed flag to true and
// have the interfaces prove that this is wrong. Also make
// note of the current unreachable flag setting. If it changes
// this time
//
NodeWasReachable = !CnpIsNodeUnreachable( Node );
Node->HBWasMissed = TRUE;
CnTrace(HBEAT_DETAIL, HbTraceCheckNodeForHeartbeats,
"[HB] Checking for HBs from node %u. WasReachable = %!bool!, "
"state = %!mmstate!.",
Node->Id, // LOGULONG
NodeWasReachable, // LOGBOOLEAN
Node->MMState // LOGMmState
);
MEMLOG( MemLogCheckHBNodeReachable, Node->Id, NodeWasReachable );
MEMLOG( MemLogCheckHBWalkNode, Node->Id, Node->MMState );
CnpWalkInterfacesOnNode( Node, (PVOID)CnpCheckForHBs );
if ( Node->HBWasMissed ) {
//
// no HBs received on any of this node's IFs. if membership
// still thinks this node is alive and the node has been
// unreachable, then note that this node is toast in HB
// info array. This will cause a node down event to be
// generated for this node.
//
MissedHBCount = InterlockedIncrement( &Node->MissedHBs );
CnTrace(HBEAT_EVENT, HbTraceNodeMissedHB,
"[HB] Node %u has missed %u HBs on all interfaces, "
"current state = %!mmstate!.",
Node->Id, // LOGULONG
MissedHBCount, // LOGULONG
Node->MMState // LOGMmState
);
MEMLOG( MemLogCheckHBMissedHB, MissedHBCount, Node->MMState );
//
// if the this node is a either a member or in the process of
// joining AND it's missed too many HBs AND we haven't issued a
// node down, then issue a node down.
//
if ( ( Node->MMState == ClusnetNodeStateAlive
||
Node->MMState == ClusnetNodeStateJoining
)
&& MissedHBCount >= HBNodeLostHBTicks
&& !Node->NodeDownIssued
)
{
Node->NodeDownIssued = TRUE;
CnIssueEvent( ClusnetEventNodeDown, Node->Id, 0 );
CnTrace(HBEAT_EVENT, HbTraceNodeDownEvent,
"[HB] Issuing NodeDown event for node %u.",
Node->Id // LOGULONG
);
MEMLOG( MemLogNodeDownIssued, Node->Id, TRUE );
}
}
} else {
MEMLOG( MemLogCheckHBWalkNode, Node->Id, Node->MMState );
}
CnReleaseLock( &Node->Lock, Node->Irql );
return TRUE; // the node table lock is still held
} // CnpWalkNodesToCheckForHeartBeats
VOID
CnpHeartBeatDpc(
PKDPC DpcObject,
PVOID DeferredContext,
PVOID Arg1,
PVOID Arg2
)
/*++
Routine Description:
Start heart beating with the nodes that are marked alive and have
an interface marked either OnlinePending or Online.
Arguments:
None
Return Value:
None
--*/
{
PINTERFACE_HEARTBEAT_INFO pNodeHBInfo;
PNETWORK_MCAST_HEARTBEAT_INFO pMcastHBInfo;
CN_IRQL OldIrql;
BOOLEAN StopSendRecvHbs;
#ifdef MEMLOGGING
static LARGE_INTEGER LastSysTime;
LARGE_INTEGER CurrentTime;
LARGE_INTEGER TimeDelta;
//
// try to determine the skew between when we asked to be run and
// the time we actually did run
//
KeQuerySystemTime( &CurrentTime );
if ( LastSysTime.QuadPart != 0 ) {
//
// add in HBTime which is negative due to relative sys time
//
TimeDelta.QuadPart = ( CurrentTime.QuadPart - LastSysTime.QuadPart ) +
HBTime.QuadPart;
if ( TimeDelta.QuadPart > MAX_DPC_SKEW ||
TimeDelta.QuadPart < -MAX_DPC_SKEW
)
{
LONG skew = (LONG)(TimeDelta.QuadPart/10000); // convert to ms
MEMLOG( MemLogDpcTimeSkew, TimeDelta.LowPart, 0 );
CnTrace(HBEAT_EVENT, HbTraceLateDpc,
"[HB] Timer fired %d ms late.",
skew // LOGSLONG
);
}
}
LastSysTime.QuadPart = CurrentTime.QuadPart;
#endif // MEMLOGGING
CnAcquireLock( &HeartBeatLock, &OldIrql );
if ( !HeartBeatEnabled ) {
CnTrace(HBEAT_DETAIL, HbTraceSetDpcEvent,
"DPC: setting HeartBeatDpcFinished event"
);
MEMLOG( MemLogSetDpcEvent, 0, 0 );
KeSetEvent( &HeartBeatDpcFinished, 0, FALSE );
CnReleaseLock( &HeartBeatLock, OldIrql );
return;
}
HeartBeatDpcRunning = TRUE;
//
// Check if we need to stop sending heartbeats. This
// occurs when clusnet detects that clussvc is not
// operating correctly. In case system work queues
// are blocked up (but not DPCs), we stop sending
// heartbeats so that other nodes initiate failover.
//
StopSendRecvHbs = ClussvcTerminateStopHbs;
CnReleaseLock( &HeartBeatLock, OldIrql );
if (!StopSendRecvHbs) {
if ( HeartBeatClockTicks == 0 ||
HeartBeatClockTicks == HeartBeatSendTicks) {
//
// time to send HBs. Clear the count of target interfaces
// and walk the node table finding the nodes that are
// marked alive.
//
NetworkHBInfoCount = 0;
InterfaceHBInfoCount = 0;
CnpWalkNodeTable( CnpWalkNodesToSendHeartBeats, NULL );
//
// run down the list of networks and send out any multicast
// heartbeats.
//
pMcastHBInfo = NetworkHeartBeatInfo;
while ( NetworkHBInfoCount-- ) {
CnTrace(
HBEAT_EVENT, HbTraceSendMcastHB,
"[HB] Sending multicast HB on net %u.\n",
pMcastHBInfo->NetworkId
);
CxSendMcastHeartBeatMessage(
pMcastHBInfo->NetworkId,
pMcastHBInfo->McastGroup,
pMcastHBInfo->McastTarget,
CxMulticastEpoch,
pMcastHBInfo->NodeInfo,
CnpSendMcastHBCompletion,
pMcastHBInfo->McastGroup
);
++pMcastHBInfo;
}
//
// now run down the list of interfaces that we compiled and
// send any unicast packets
//
pNodeHBInfo = InterfaceHeartBeatInfo;
while ( InterfaceHBInfoCount-- ) {
CnTrace(HBEAT_EVENT, HbTraceSendHB,
"[HB] Sending HB to node %u on net %u, seqno %u, ackno %u.",
pNodeHBInfo->NodeId, // LOGULONG
pNodeHBInfo->NetworkId, // LOGULONG
pNodeHBInfo->SeqNumber, // LOGULONG
pNodeHBInfo->AckNumber // LOGULONG
);
CxSendHeartBeatMessage(pNodeHBInfo->NodeId,
pNodeHBInfo->SeqNumber,
pNodeHBInfo->AckNumber,
pNodeHBInfo->NetworkId);
MEMLOG(
MemLogSendingHB,
pNodeHBInfo->NodeId,
pNodeHBInfo->NetworkId
);
++pNodeHBInfo;
}
//
// finally, up the tick count, progressing to the next potential
// work item
//
HeartBeatClockTicks++;
} else if ( HeartBeatClockTicks >= ( HeartBeatSendTicks - 1 )) {
//
// walk the node table looking for lack of heart beats on
// a node's set of interfaces.
//
CnpWalkNodeTable( CnpWalkNodesToCheckForHeartBeats, NULL );
HeartBeatClockTicks = 0;
} else {
HeartBeatClockTicks++;
}
}
// Check for clussvc hangs.
CnpCheckClussvcHang();
//
// indicate that we're no longer running and if we're shutting down
// then set the event that the shutdown thread is waiting on
//
CnAcquireLock( &HeartBeatLock, &OldIrql );
HeartBeatDpcRunning = FALSE;
if ( !HeartBeatEnabled ) {
KeSetEvent( &HeartBeatDpcFinished, 0, FALSE );
CnTrace(HBEAT_DETAIL, HbTraceSetDpcEvent2,
"DPC: setting HeartBeatDpcFinished event (2)"
);
MEMLOG( MemLogSetDpcEvent, 0, 0 );
}
CnReleaseLock( &HeartBeatLock, OldIrql );
} // CnpHeartBeatDpc
PCNP_INTERFACE
CnpFindInterfaceLocked(
IN PCNP_NODE Node,
IN PCNP_NETWORK Network
)
/*++
Routine Description:
Given node and network structure pointers, find the interface
structure. Similar to CnpFindInterface except that we're passing
in pointers instead of IDs.
Arguments:
Node - pointer to node struct that sent the packet
Network - pointer to Network struct on which packet was received
Return Value:
Pointer to Interface on which packet was recv'd, otherwise NULL
--*/
{
PLIST_ENTRY IfEntry;
PCNP_INTERFACE Interface;
CnVerifyCpuLockMask(CNP_NODE_OBJECT_LOCK, // Required
0, // Forbidden
CNP_NETWORK_OBJECT_LOCK_MAX // Maximum
);
for (IfEntry = Node->InterfaceList.Flink;
IfEntry != &(Node->InterfaceList);
IfEntry = IfEntry->Flink
)
{
Interface = CONTAINING_RECORD(IfEntry,
CNP_INTERFACE,
NodeLinkage);
if ( Interface->Network == Network ) {
break;
}
}
if ( IfEntry == &Node->InterfaceList ) {
return NULL;
} else {
return Interface;
}
} // CnpFindInterfaceLocked
VOID
CnpReceiveHeartBeatMessage(
IN PCNP_NETWORK Network,
IN CL_NODE_ID SourceNodeId,
IN ULONG SeqNumber,
IN ULONG AckNumber,
IN BOOLEAN Multicast,
IN ULONG MulticastEpoch
)
/*++
Routine Description:
We received a heartbeat from a node on a network. Reset
the missed HB count on that network's interface.
Arguments:
Network - pointer to network block on which the packet was received
SourceNodeId - node number that issued the packet
SeqNumber - sending nodes' sequence num
AckNumber - last seq number sent by us that was seen at the sending node
Multicast - indicates whether this heartbeat was received in a multicast
MulticastEpoch - indicates multicast epoch number from heartbeat packet
Return Value:
None
--*/
{
PCNP_NODE Node;
PCNP_INTERFACE Interface;
CX_OUTERSCREEN CurrentOuterscreen;
//
// Take a snapshot of the current outerscreen so that our
// information doesn't change between decisions.
//
CurrentOuterscreen.UlongScreen = MMOuterscreen.UlongScreen;
//
// we ignore all packets until we're part of the cluster
//
if ( !CnpClusterScreenMember(
CurrentOuterscreen.ClusterScreen,
INT_NODE( CnLocalNodeId )
)
)
{
return;
}
//
// We ignore multicast packets whose epoch is earlier than ours.
// This prevents replay attacks, because the multicast key may
// not have been regenerated since the last time a node joined (and
// heartbeat sequence numbers were reset to one).
//
if (Multicast && MulticastEpoch < CxMulticastEpoch) {
CnTrace(HBEAT_ERROR, HbTraceHBFromExpiredEpoch,
"[HB] Discarding HB from old epoch. Source Node %u, "
"Pkt Epoch %u, Current Epoch %u.",
SourceNodeId, // LOGULONG
MulticastEpoch, // LOGULONG
CxMulticastEpoch // LOGULONG
);
return;
}
//
// convert the Node ID into a pointer and find the interface
// on which the packet was received.
//
Node = CnpFindNode( SourceNodeId );
CnAssert( Node != NULL );
Interface = CnpFindInterfaceLocked( Node, Network );
if ( Interface == NULL ) {
//
// somehow this network object went away while we were
// receiving some data on it. Just ignore this msg
//
CnTrace(HBEAT_ERROR, HbTraceHBFromUnknownNetwork,
"[HB] Discarding HB from node %u on an unknown network.",
Node->Id // LOGULONG
);
MEMLOG( MemLogNoNetID, Node->Id, (ULONG_PTR)Network );
goto error_exit;
}
//
// determine if this is guy is legit. If not in the outerscreen,
// then send a poison packet and we're done
//
if ( !CnpClusterScreenMember(
CurrentOuterscreen.ClusterScreen,
INT_NODE( SourceNodeId )
)
)
{
//
// Don't bother sending poison packets on restricted networks. They
// will be ignored.
//
if (CnpIsNetworkRestricted(Interface->Network)) {
goto error_exit;
}
CnTrace(HBEAT_ERROR, HbTraceHBFromBanishedNode,
"[HB] Discarding HB from banished node %u on net %u "
"due to outerscreen %04X. Sending poison packet back.",
Node->Id, // LOGULONG
Interface->Network->Id, // LOGULONG
CurrentOuterscreen.UlongScreen // LOGULONG
);
CcmpSendPoisonPacket( Node, NULL, 0, Network, NULL);
//
// The node lock was released.
//
return;
}
//
// Check that the incoming seq num is something we expect to
// guard against replay attacks.
//
if ( SeqNumber <= Interface->LastSequenceReceived) {
CnTrace(
HBEAT_ERROR, HbTraceHBOutOfSequence,
"[HB] Discarding HB from node %u on net %u with stale seqno %u. "
"Last seqno %u. Multicast: %!bool!.",
Node->Id, // LOGULONG
Interface->Network->Id, // LOGULONG
SeqNumber, // LOGULONG
Interface->LastSequenceReceived, // LOGULONG
Multicast
);
MEMLOG( MemLogOutOfSequence, SourceNodeId, SeqNumber );
goto error_exit;
}
// Update the interface's last received seq number
// which will be sent back as the ack number.
Interface->LastSequenceReceived = SeqNumber;
//
// Compare our seq number to the ack number in the packet.
// If more than two off then the source node is not recv'ing
// our heartbeats, but we're receiving theirs. This network is
// not usable. We ignore this msg to guarantee that we will
// declare the network down if the condition persists.
//
// In addition, if we are sending multicast heartbeats to this
// interface, revert to unicasts in case there is a multicast
// problem.
//
if (( Interface->SequenceToSend - AckNumber ) > 2 ) {
CnTrace(HBEAT_ERROR, HbTraceHBWithStaleAck,
"[HB] Discarding HB from node %u with stale ackno %u. "
"My seqno %u. Multicast: %!bool!.",
Node->Id, // LOGULONG
AckNumber, // LOGULONG
Interface->SequenceToSend, // LOGULONG
Multicast
);
MEMLOG( MemLogSeqAckMismatch, (ULONG_PTR)Interface, Interface->State );
if (CnpInterfaceQueryReceivedMulticast(Interface)) {
CnpInterfaceClearReceivedMulticast(Interface);
Interface->McastDiscoverCount = CNP_INTERFACE_MCAST_DISCOVERY;
CnpMulticastChangeNodeReachability(
Network,
Node,
FALSE, // not reachable
TRUE, // raise event
NULL // OUT new mask
);
}
goto error_exit;
}
MEMLOG4( MemLogReceivedPacket,
SeqNumber, AckNumber,
SourceNodeId, Interface->Network->Id );
CnTrace(HBEAT_EVENT, HbTraceReceivedHBpacket,
"[HB] Received HB from node %u on net %u, seqno %u, ackno %u, "
"multicast: %!bool!.",
SourceNodeId, // LOGULONG
Interface->Network->Id, // LOGULONG
SeqNumber, // LOGULONG
AckNumber, // LOGULONG
Multicast
);
// Reset the interface's and node's Missed HB count
// to indicate that things are somewhat normal.
//
InterlockedExchange(&Interface->MissedHBs, 0);
//
// Don't reset node miss count on restricted nets.
//
if (!CnpIsNetworkRestricted(Interface->Network)) {
InterlockedExchange(&Node->MissedHBs, 0);
}
//
// if local interface was previously disconnected (e.g. received
// a WMI NDIS status media disconnect event), reconnect it now.
//
if (CnpIsNetworkLocalDisconn(Interface->Network)) {
CxReconnectLocalInterface(Interface->Network->Id);
}
//
// move interface to online if necessary
//
if ( Interface->State == ClusnetInterfaceStateOnlinePending ||
Interface->State == ClusnetInterfaceStateUnreachable ) {
CnAcquireLockAtDpc( &Interface->Network->Lock );
Interface->Network->Irql = DISPATCH_LEVEL;
CnTrace(HBEAT_DETAIL, HbTraceInterfaceOnline,
"[HB] Moving interface for node %u on network %u to online "
"state.",
Node->Id, // LOGULONG
Interface->Network->Id // LOGULONG
);
//
// Initiate multicast discovery.
//
Interface->McastDiscoverCount = CNP_INTERFACE_MCAST_DISCOVERY;
Interface->McastRediscoveryCountdown = 0;
MEMLOG( MemLogOnlineIf, Node->Id, Interface->State );
CnpOnlineInterface( Interface );
CnTrace(HBEAT_EVENT, HbTraceInterfaceUpEvent,
"[HB] Issuing InterfaceUp event for node %u on network %u.",
Node->Id, // LOGULONG
Interface->Network->Id // LOGULONG
);
CnIssueEvent(ClusnetEventNetInterfaceUp,
Node->Id,
Interface->Network->Id);
}
//
// Indicate that a multicast has been received from this interface.
// This allows us to include this interface in our multicasts.
//
if (Multicast) {
IF_CNDBG(CN_DEBUG_HBEATS) {
CNPRINT(("[HB] Received multicast heartbeat on "
"network %d from source node %d, seq %d, "
"ack %d.\n",
Network->Id, SourceNodeId,
SeqNumber, AckNumber
));
}
if (!CnpInterfaceQueryReceivedMulticast(Interface)) {
CnpInterfaceSetReceivedMulticast(Interface);
CnpMulticastChangeNodeReachability(
Network,
Node,
TRUE, // reachable
TRUE, // raise event
NULL // OUT new mask
);
}
// There is no point in sending discovery packets to this
// interface.
Interface->McastDiscoverCount = 0;
Interface->McastRediscoveryCountdown = 0;
// If the source node's multicast epoch is greater than
// ours, update. We can make the initial comparison without
// acquiring the lock.
if (MulticastEpoch > CxMulticastEpoch) {
CnpUpdateMulticastEpoch(MulticastEpoch);
}
}
CnReleaseLock( &Node->Lock, Node->Irql );
//
// when the first HB is recv'ed, a node may be in either the
// join or alive state (the sponser, for instance, moves from
// dead to alive). We need to clear the Node down issued flag
// for either case. If the MM State is joining, then a node up
// event must be issued as well. Note that we ignore HBs for
// node health purposes on restricted nets.
//
if ( ( (Node->MMState == ClusnetNodeStateJoining)
||
(Node->MMState == ClusnetNodeStateAlive)
)
&&
Node->NodeDownIssued
&&
!CnpIsNetworkRestricted(Interface->Network)
)
{
Node->NodeDownIssued = FALSE;
MEMLOG( MemLogNodeDownIssued, Node->Id, FALSE );
if ( Node->MMState == ClusnetNodeStateJoining ) {
CnTrace(HBEAT_EVENT, HbTraceNodeUpEvent,
"[HB] Issuing NodeUp event for node %u.",
Node->Id // LOGULONG
);
MEMLOG( MemLogNodeUp, Node->Id, 0 );
CnIssueEvent( ClusnetEventNodeUp, Node->Id, 0 );
}
}
return;
error_exit:
CnReleaseLock( &Node->Lock, Node->Irql );
return;
} // CnpReceiveHeartBeatMessage
NTSTATUS
CxSetOuterscreen(
IN ULONG Outerscreen
)
{
//
// based on the number of valid nodes, make sure any extranious
// bits are not set
//
CnAssert( ClusterDefaultMaxNodes <= 32 );
CnAssert(
( Outerscreen & ( 0xFFFFFFFE << ( 32 - ClusterDefaultMaxNodes - 1 )))
== 0);
IF_CNDBG( CN_DEBUG_HBEATS )
CNPRINT(("[CCMP] Setting outerscreen to %04X\n",
((Outerscreen & 0xFF)<< 8) | ((Outerscreen >> 8) & 0xFF)));
MMOuterscreen.UlongScreen = Outerscreen;
CnTrace(HBEAT_EVENT, HbTraceSetOuterscreen,
"[HB] Setting outerscreen to %04X",
Outerscreen // LOGULONG
);
MEMLOG( MemLogOuterscreen, Outerscreen, 0 );
return STATUS_SUCCESS;
} // CxSetOuterscreen
VOID
CnpTerminateClusterService(
IN PVOID Parameter
)
{
PWORK_QUEUE_ITEM workQueueItem = Parameter;
ULONG sourceNodeId = *((PULONG)(workQueueItem + 1));
WCHAR sourceNodeStringId[ 16 ];
swprintf(sourceNodeStringId, L"%u", sourceNodeId );
//
// only way we can get here right now is if a poison packet was received.
//
CnWriteErrorLogEntry(CLNET_NODE_POISONED,
STATUS_SUCCESS,
NULL,
0,
1,
sourceNodeStringId );
if ( ClussvcProcessHandle ) {
//
// there is still a race condition between the cluster service shutting
// down and closing this handle and it being used here. This really
// isn't a problem since the user mode portion is going away anyway.
// Besides, there isn't alot we can do if this call doesn't work anyway.
//
ZwTerminateProcess( ClussvcProcessHandle, STATUS_CLUSTER_POISONED );
}
CnFreePool( Parameter );
} // CnpTerminateClusterService
VOID
CnpReceivePoisonPacket(
IN PCNP_NETWORK Network,
IN CL_NODE_ID SourceNodeId,
IN ULONG SeqNumber
)
{
PCNP_NODE Node;
PCNP_INTERFACE Interface;
PWORK_QUEUE_ITEM WorkItem;
//
// give the node and the network pointers, find the interface on which
// this packet was received
//
Node = CnpFindNode( SourceNodeId );
if ( Node == NULL ) {
CnTrace(HBEAT_ERROR, HbTraceNoPoisonFromUnknownNode,
"[HB] Discarding poison packet from unknown node %u.",
SourceNodeId // LOGULONG
);
return;
}
Interface = CnpFindInterfaceLocked( Node, Network );
if ( Interface == NULL ) {
//
// somehow this network object went away while we were
// receiving some data on it. Just ignore this msg
//
CnTrace(HBEAT_ERROR, HbTracePoisonFromUnknownNetwork,
"[HB] Discarding poison packet from node %u on unknown network.",
Node->Id // LOGULONG
);
MEMLOG( MemLogNoNetID, Node->Id, (ULONG_PTR)Network );
CnReleaseLock( &Node->Lock, Node->Irql );
return;
}
//
// Check that the incoming seq num is something we expect to
// guard against replay attacks.
//
if ( SeqNumber <= Interface->LastSequenceReceived) {
CnTrace(HBEAT_ERROR , HbTracePoisonOutOfSeq,
"[HB] Discarding poison packet from node %u with stale seqno %u. "
"Current seqno %u.",
SourceNodeId, // LOGULONG
SeqNumber, // LOGULONG
Interface->LastSequenceReceived // LOGULONG
);
MEMLOG( MemLogOutOfSequence, SourceNodeId, SeqNumber );
CnReleaseLock( &Node->Lock, Node->Irql );
return;
}
//
// Ignore poison packets from restricted networks
//
if (CnpIsNetworkRestricted(Network)) {
CnTrace(HBEAT_ERROR , HbTracePoisonFromRestrictedNet,
"[HB] Discarding poison packet from node %u on restricted "
"network %u.",
SourceNodeId, // LOGULONG
Network->Id // LOGULONG
);
CnReleaseLock( &Node->Lock, Node->Irql );
return;
}
//
// We always honor a recv'ed poison packet.
//
CnReleaseLock( &Node->Lock, Node->Irql );
CnTrace(HBEAT_EVENT, HbTracePoisonPktReceived,
"[HB] Received poison packet from node %u. Halting this node.",
SourceNodeId // LOGULONG
);
MEMLOG( MemLogPoisonPktReceived, SourceNodeId, 0 );
CnIssueEvent( ClusnetEventPoisonPacketReceived, SourceNodeId, 0 );
//
// Shutdown all cluster network processing.
//
CnHaltOperation(NULL);
//
// allocate a work queue item so we can whack the cluster service
// process. allocate extra space at the end and stuff the source node ID
// out there. Yes, I know it is groady...
//
WorkItem = CnAllocatePool( sizeof( WORK_QUEUE_ITEM ) + sizeof( CL_NODE_ID ));
if ( WorkItem != NULL ) {
*((PULONG)(WorkItem + 1)) = SourceNodeId;
ExInitializeWorkItem( WorkItem, CnpTerminateClusterService, WorkItem );
ExQueueWorkItem( WorkItem, CriticalWorkQueue );
}
return;
} // CnpReceivePoisonPacket
VOID
CnpLogClussvcHangAndTerminate(
IN PDEVICE_OBJECT DeviceObject,
IN PVOID Context
)
/*++
Routine Description:
This routine logs an entry into system event log about clussvc hang, and terminates the
clussvc process.
Arguments:
None
Return Value:
None
--*/
{
WCHAR myStr[40];
swprintf(myStr, L"%u", ((ClussvcClusnetHbTimeoutTicks * HEART_BEAT_PERIOD)/1000));
CnWriteErrorLogEntry(
CLNET_CLUSSVC_HUNG_TERMINATE,
STATUS_SUCCESS,
NULL,
0,
1,
myStr
);
if (ClussvcProcessHandle) {
ZwTerminateProcess(ClussvcProcessHandle, STATUS_CLUSTER_NODE_DOWN);
}
IoFreeWorkItem((PIO_WORKITEM)Context);
}//CnpLogClussvcHangAndTerminate
VOID
CnpLogClussvcHang(
IN PDEVICE_OBJECT DeviceObject,
IN PVOID Context
)
/*++
Routine Description:
This routine logs an entry into system event log about clussvc hang.
Arguments:
None
Return Value:
None
--*/
{
WCHAR myStr[40];
swprintf(myStr, L"%u", ((ClussvcClusnetHbTimeoutTicks * HEART_BEAT_PERIOD)/1000));
CnWriteErrorLogEntry(
CLNET_CLUSSVC_HUNG,
STATUS_SUCCESS,
NULL,
0,
1,
myStr
);
IoFreeWorkItem((PIO_WORKITEM)Context);
}//CnpLogClussvcHang
VOID
CnpCheckClussvcHang(
VOID
)
/*++
Routine Description:
Check for HB ticks from Clussvc, if not disabled, and Tick count has reached max
then take appropriate action depending on the configured value.
Arguments:
None
Return Value:
None
--*/
{
ULONG newValue;
// Check if heartbeating is disabled, then return.
if((ClussvcClusnetHbTickCount == 0) ||
(ClussvcClusnetHbTimeoutAction == ClussvcHangActionDisable)) {
return;
}
// Decrement the counter by 1.
newValue = InterlockedDecrement(&ClussvcClusnetHbTickCount);
// If this is 1->0 transition we need to do something.
if(newValue != 0)
return;
CnTrace(HBEAT_ERROR , HbTraceClussvcHang,
"[HB] Clussvc to Clusnet HB Timeout, Timeout=%u DPC ticks, Action=%u.",
ClussvcClusnetHbTimeoutTicks,
ClussvcClusnetHbTimeoutAction
);
IF_CNDBG( CN_DEBUG_HBEATS ) {
CNPRINT((
"[HB] Clussvc to Clusnet HB Timeout, Timeout=%u DPC ticks, Action=%u\n",
ClussvcClusnetHbTimeoutTicks,
(ULONG)ClussvcClusnetHbTimeoutAction
));
}
CnAssert(ClussvcClusnetHbTimeoutAction< ClussvcHangActionMax);
switch(ClussvcClusnetHbTimeoutAction) {
case ClussvcHangActionLog:
// Just log a message and reset ClussvcClusnetHbTickCount to ClussvcClusnetHbTimeoutTicks
// Use DelayedWorkQueue
{
PIO_WORKITEM WorkItem;
WorkItem = IoAllocateWorkItem(CnDeviceObject);
if ( WorkItem != NULL ) {
IoQueueWorkItem(
WorkItem,
CnpLogClussvcHang,
DelayedWorkQueue,
(PVOID)WorkItem
);
}
InterlockedExchange(&ClussvcClusnetHbTickCount, ClussvcClusnetHbTimeoutTicks);
}
break;
case ClussvcHangActionBugCheckMachine:
// Bugcheck the machine.
{
KeBugCheckEx(
USER_MODE_HEALTH_MONITOR,
(ULONG_PTR)((ClussvcProcessHandle != NULL) ? ClussvcProcessObject : NULL),
(ULONG_PTR)(ClussvcClusnetHbTimeoutSeconds),
0,
0
);
}
break;
case ClussvcHangActionTerminateService:
default:
// Terminate Cluster Service. Handling is similar to the case as if clusnet has
// received a poison packet. Using Critical work queue.
{
KIRQL irql;
// If we have already run through this terminate path,
// then we do not do it again. The workitem will already
// be on the critical work queue (even if it has not yet
// executed).
CnAcquireLock(&HeartBeatLock, &irql);
if (ClussvcTerminateWorkItem != NULL) {
PIO_WORKITEM WorkItem;
// Swap out the workitem.
WorkItem = ClussvcTerminateWorkItem;
ClussvcTerminateWorkItem = NULL;
// Stop outgoing heartbeats.
ClussvcTerminateStopHbs = TRUE;
CnReleaseLock(&HeartBeatLock, irql);
// Issue halt event so clusdisk stops reservations.
CnIssueEvent(ClusnetEventHalt, 0, 0);
// Stop normal clusnet activity.
CnHaltOperation(NULL);
// Queue the critical workitem to terminate the
// service process.
IoQueueWorkItem(
WorkItem,
CnpLogClussvcHangAndTerminate,
CriticalWorkQueue,
(PVOID)WorkItem
);
} else {
CnReleaseLock(&HeartBeatLock, irql);
}
}
break;
}
}//CnpCheckClussvcHang
VOID
CnpWalkInterfacesAfterRegroup(
IN PCNP_INTERFACE Interface
)
/*++
Routine Description:
Reset counters for each interface after a regroup
Arguments:
None
Return Value:
None
--*/
{
InterlockedExchange(&Interface->MissedHBs, 0);
CnReleaseLock(&Interface->Network->Lock, Interface->Network->Irql);
} // CnpWalkInterfacesAfterRegroup
BOOLEAN
CnpWalkNodesAfterRegroup(
IN PCNP_NODE Node,
IN PVOID UpdateContext,
IN CN_IRQL NodeTableIrql
)
/*++
Routine Description:
Called for each node in the node table. Regroup has finished
so we clear the node's missed Heart beat count and its node down
issued flag. No node should be unreachable at this point. If we
find one, kick off another regroup.
Arguments:
standard...
Return Value:
None
--*/
{
//
// check for inconsistent settings of Comm and MM state
//
if ( ( Node->MMState == ClusnetNodeStateAlive
||
Node->MMState == ClusnetNodeStateJoining
)
&&
Node->CommState == ClusnetNodeCommStateUnreachable
)
{
CnTrace(HBEAT_EVENT, HbTraceNodeDownEvent2,
"[HB] Issuing NodeDown event for node %u.",
Node->Id // LOGULONG
);
MEMLOG( MemLogInconsistentStates, Node->Id, Node->MMState );
CnIssueEvent( ClusnetEventNodeDown, Node->Id, 0 );
}
CnpWalkInterfacesOnNode( Node, (PVOID)CnpWalkInterfacesAfterRegroup );
InterlockedExchange(&Node->MissedHBs, 0);
//
// clear this only for nodes in the alive state. Once a node is marked
// dead, the flag is re-init'ed to true (this is used during a join to
// issue only one node up event).
//
if ( Node->MMState == ClusnetNodeStateAlive ) {
Node->NodeDownIssued = FALSE;
MEMLOG( MemLogNodeDownIssued, Node->Id, FALSE );
}
CnReleaseLock( &Node->Lock, Node->Irql );
return TRUE; // the node table lock is still held
} // CnpWalkNodesAfterRegroup
VOID
CxRegroupFinished(
ULONG NewEventEpoch,
ULONG NewRegroupEpoch
)
/*++
Routine Description:
called when regroup has finished. Walk the node list and
perform the cleanup in the walk routine.
Arguments:
None
Return Value:
None
--*/
{
MEMLOG( MemLogRegroupFinished, NewEventEpoch, 0 );
CnTrace(HBEAT_EVENT, HbTraceRegroupFinished,
"[HB] Regroup finished, new event epoch = %u, "
"new regroup epoch = %u.",
NewEventEpoch, // LOGULONG
NewRegroupEpoch // LOGULONG
);
CnAssert( NewEventEpoch >= EventEpoch );
EventEpoch = NewEventEpoch;
if (NewRegroupEpoch > CxMulticastEpoch) {
CnpUpdateMulticastEpoch(NewRegroupEpoch);
}
CnpWalkNodeTable( CnpWalkNodesAfterRegroup, NULL );
} // CxRegroupFinished
VOID
CnpUpdateMulticastEpoch(
ULONG NewEpoch
)
/*++
Routine Description:
The Multicast Epoch must be monotonically increasing
and agreed upon by all nodes. It is based on the
regroup epoch (not to be confused with the ClusNet
event epoch, which is local to each node).
It is conceivable for a stale regroup epoch update
to occur; thus, only update if the new value is
greater than the current value.
Arguments:
NewEpoch - new epoch number
Return value:
None
--*/
{
KIRQL irql;
CnAcquireLock(&HeartBeatLock, &irql);
if (NewEpoch > CxMulticastEpoch) {
CnTrace(HBEAT_EVENT, HbTraceUpdateMulticastEpoch,
"[HB] Updating multicast epoch from %u to %u.",
CxMulticastEpoch, NewEpoch
);
CxMulticastEpoch = NewEpoch;
}
CnReleaseLock(&HeartBeatLock, irql);
} // CnpUpdateMulticastEpoch
/* end chbeat.c */