You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1645 lines
56 KiB
1645 lines
56 KiB
#ifdef __TANDEM
|
|
#pragma columns 79
|
|
#pragma page "srgpos.c - T9050 - OS-dependent routines for Regroup Module"
|
|
#endif
|
|
|
|
/* @@@ START COPYRIGHT @@@
|
|
** Tandem Confidential: Need to Know only
|
|
** Copyright (c) 1995, Tandem Computers Incorporated
|
|
** Protected as an unpublished work.
|
|
** All Rights Reserved.
|
|
**
|
|
** The computer program listings, specifications, and documentation
|
|
** herein are the property of Tandem Computers Incorporated and shall
|
|
** not be reproduced, copied, disclosed, or used in whole or in part
|
|
** for any reason without the prior express written permission of
|
|
** Tandem Computers Incorporated.
|
|
**
|
|
** @@@ END COPYRIGHT @@@
|
|
**/
|
|
|
|
/*---------------------------------------------------------------------------
|
|
* This file (srgpos.c) contains OS-specific code used by Regroup.
|
|
*---------------------------------------------------------------------------*/
|
|
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif /* __cplusplus */
|
|
|
|
|
|
#include <wrgp.h>
|
|
|
|
#ifdef NSK
|
|
#include <pmsgrgp.h>
|
|
#endif /* NSK */
|
|
|
|
#if defined(NT)
|
|
|
|
DWORD
|
|
MmSetThreadPriority(
|
|
VOID
|
|
);
|
|
|
|
void
|
|
NT_timer_thread(
|
|
void
|
|
);
|
|
|
|
PWCHAR
|
|
RgpGetNodeNameFromId(
|
|
node_t
|
|
);
|
|
|
|
#endif // NT
|
|
|
|
/* The global pointer to regroup's internal data structure. */
|
|
|
|
#ifdef NSK
|
|
/* The global regroup pointer is #defined to a pointer in the message
|
|
* system root structure.
|
|
*/
|
|
#endif
|
|
|
|
#if defined(LCU) || defined(UNIX) || defined(NT)
|
|
rgp_control_t *rgp = (rgp_control_t *) RGP_NULL_PTR;
|
|
DWORD QuorumOwner = MM_INVALID_NODE;
|
|
/* quorum owner can be set by the forming node before rgp is initialized */
|
|
/* Clussvc to Clusnet Heartbeating stuff. This bool would enable it. */
|
|
BOOL MmStartClussvcToClusnetHeartbeat=FALSE;
|
|
LONG MmCheckSystemHealthTick=0;
|
|
#endif /* LCU || UNIX || NT */
|
|
|
|
|
|
#ifdef LCU
|
|
|
|
/************************************************************************
|
|
* rgp_lcu_serv_listen
|
|
* ===================
|
|
*
|
|
* Description:
|
|
*
|
|
* This is an LCU-specific routine that gets called in IPC interrupt
|
|
* context when a datagram addressed to the Regroup Module is received.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* void *listen_callarg - required param, unused by regroup
|
|
* lcumsg_t *lcumsgp - pointer to message
|
|
* uint moredata - required param, unused by regroup
|
|
*
|
|
* Returns:
|
|
*
|
|
* int - Always returns ELCU_OK
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* The routine simply picks apart the arguments and calls
|
|
* rgp_received_packet().
|
|
*
|
|
*
|
|
************************************************************************/
|
|
_priv _resident int
|
|
rgp_lcu_serv_listen(void *listen_callarg, lcumsg_t *lcumsgp, uint moredata)
|
|
{
|
|
/* Ignore if the packet is not from the local system. */
|
|
if (lcumsgp->lcu_sysnum == rgp->OS_specific_control.my_sysnum)
|
|
rgp_received_packet(lcumsgp->lcu_node,
|
|
lcumsgp->lcu_reqmbuf.lcu_ctrlbuf,
|
|
lcumsgp->lcu_reqmbuf.lcu_ctrllen);
|
|
return(ELCU_OK);
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
* rgp_lcu_event_callback
|
|
* ======================
|
|
*
|
|
* Description:
|
|
*
|
|
* This is an LCU-specific routine that gets called in IPC interrupt
|
|
* context when the LCUEV_NODE_UNREACHABLE event is generated.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* ulong event - event # (= LCUEV_NODE_UNREACHABLE)
|
|
* sysnum_t sysnum - system # (= local system #)
|
|
* nodenum_t node - # of node that is unreachable
|
|
* int event_info - required parameter, unused by regroup
|
|
*
|
|
* Returns:
|
|
*
|
|
* void - no return value
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* The routine simply transforms the LCU event into the regroup event
|
|
* RGP_EVT_NODE_UNREACHABLE and calls rgp_event_handler().
|
|
*
|
|
************************************************************************/
|
|
_priv _resident void
|
|
rgp_lcu_event_callback(
|
|
ulong event,
|
|
sysnum_t sysnum,
|
|
nodenum_t node,
|
|
int event_info)
|
|
{
|
|
/* Sanity checks:
|
|
* (1) The event must be LCUEV_NODE_UNREACHABLE, the only event
|
|
* we asked for.
|
|
* (1) The event must be for the local system, the only system
|
|
* we asked for.
|
|
*/
|
|
if ((event != LCUEV_NODE_UNREACHABLE) ||
|
|
(sysnum != rgp->OS_specific_control.my_sysnum))
|
|
RGP_ERROR(RGP_INTERNAL_ERROR);
|
|
|
|
rgp_event_handler(RGP_EVT_NODE_UNREACHABLE, node);
|
|
}
|
|
|
|
#endif /* LCU */
|
|
|
|
|
|
/************************************************************************
|
|
* rgp_init_OS
|
|
* ===========
|
|
*
|
|
* Description:
|
|
*
|
|
* This routine does OS-dependent regroup initialization such as
|
|
* initializing the regroup data structure lock, requesting a
|
|
* periodic timer to be installed and registering the callback
|
|
* routine for receiving regroup's unacknowledged packets.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* None
|
|
*
|
|
* Returns:
|
|
*
|
|
* void - no return value
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* OS-dependent initializations.
|
|
*
|
|
************************************************************************/
|
|
_priv _resident void
|
|
rgp_init_OS(void)
|
|
{
|
|
|
|
#ifdef UNIX
|
|
struct sigaction sig_action; /* to install signals */
|
|
#endif
|
|
#ifdef LCU
|
|
sysnum_t sysnum;
|
|
lcumsg_t *lcumsgp;
|
|
#endif
|
|
#ifdef NT
|
|
HANDLE tempHandle;
|
|
DWORD threadID = 0;
|
|
#endif
|
|
|
|
#if defined(NSK) || defined(UNIX) || defined(NT)
|
|
/*
|
|
* In NSK, the regroup caller ensures that timer and IPC interrupts
|
|
* are disabled before the regroup routines are called. Therefore,
|
|
* there is no regroup lock initialization. Also, rather than using
|
|
* registration of callback routines, the appropriate routine names
|
|
* are hard coded into routines that must call them. Thus, the timer
|
|
* routine is called from POLLINGCHECK, the periodic message system
|
|
* routine, and the packet reception routine is called from the
|
|
* IPC interrupt handler.
|
|
*/
|
|
|
|
/* Initialize the unchanging fields in the rgp_msgsys struct. */
|
|
|
|
rgp->rgp_msgsys_p->regroup_data = (void *) &(rgp->rgppkt_to_send);
|
|
rgp->rgp_msgsys_p->regroup_datalen = RGPPKTLEN;
|
|
rgp->rgp_msgsys_p->iamalive_data = (void *) &(rgp->iamalive_pkt);
|
|
rgp->rgp_msgsys_p->iamalive_datalen = IAMALIVEPKTLEN;
|
|
rgp->rgp_msgsys_p->poison_data = (void *) &(rgp->poison_pkt);
|
|
rgp->rgp_msgsys_p->poison_datalen = POISONPKTLEN;
|
|
|
|
#endif /* NSK || UNIX || NT */
|
|
|
|
#ifdef LCU
|
|
|
|
if (itimeout(rgp_periodic_check,
|
|
NULL, /* parameter pointer */
|
|
((RGP_CLOCK_PERIOD * HZ) / 100) | TO_PERIODIC,
|
|
plstr /* interrupt priority level */
|
|
) == 0)
|
|
RGP_ERROR(RGP_INTERNAL_ERROR);
|
|
if (lcuxprt_listen(LCU_RGP_PORT,
|
|
rgp_lcu_serv_listen,
|
|
NULL /* no call arg */,
|
|
NULL /* no options */
|
|
) != ELCU_OK)
|
|
RGP_ERROR(RGP_INTERNAL_ERROR);
|
|
|
|
if (lcuxprt_config(LCU_GET_MYSYSNUM, &sysnum) != ELCU_OK)
|
|
RGP_ERROR(RGP_INTERNAL_ERROR);
|
|
rgp->OS_specific_control.my_sysnum = sysnum;
|
|
|
|
/* Allocate 3 message buffers to send regroup packets, iamalive packets
|
|
* and poison packets.
|
|
*/
|
|
if ((lcumsgp = lcuxprt_msg_alloc(LCU_UNACKMSG, LCU_RGP_FLAGS)) == NULL)
|
|
RGP_ERROR(RGP_INTERNAL_ERROR); /* no memory */
|
|
rgp->OS_specific_control.lcumsg_regroup_p = lcumsgp;
|
|
lcumsgp->lcu_tag = NULL;
|
|
lcumsgp->lcu_sysnum = sysnum;
|
|
lcumsgp->lcu_port = LCU_RGP_PORT;
|
|
lcumsgp->lcu_flags = LCUMSG_CRITICAL;
|
|
lcumsgp->lcu_reqmbuf.lcu_ctrllen = RGPPKTLEN;
|
|
lcumsgp->lcu_reqmbuf.lcu_ctrlbuf = (char *)&(rgp->rgppkt_to_send);
|
|
|
|
if ((lcumsgp = lcuxprt_msg_alloc(LCU_UNACKMSG, LCU_RGP_FLAGS)) == NULL)
|
|
RGP_ERROR(RGP_INTERNAL_ERROR); /* no memory */
|
|
rgp->OS_specific_control.lcumsg_iamalive_p = lcumsgp;
|
|
lcumsgp->lcu_tag = NULL;
|
|
lcumsgp->lcu_sysnum = sysnum;
|
|
lcumsgp->lcu_port = LCU_RGP_PORT;
|
|
lcumsgp->lcu_reqmbuf.lcu_ctrllen = IAMALIVEPKTLEN;
|
|
lcumsgp->lcu_reqmbuf.lcu_ctrlbuf = (char *)&(rgp->iamalive_pkt);
|
|
|
|
if ((lcumsgp = lcuxprt_msg_alloc(LCU_UNACKMSG, LCU_RGP_FLAGS)) == NULL)
|
|
RGP_ERROR(RGP_INTERNAL_ERROR); /* no memory */
|
|
rgp->OS_specific_control.lcumsg_poison_p = lcumsgp;
|
|
lcumsgp->lcu_tag = NULL;
|
|
lcumsgp->lcu_sysnum = sysnum;
|
|
lcumsgp->lcu_port = LCU_RGP_PORT;
|
|
lcumsgp->lcu_reqmbuf.lcu_ctrllen = POISONPKTLEN;
|
|
lcumsgp->lcu_reqmbuf.lcu_ctrlbuf = (char *)&(rgp->poison_pkt);
|
|
|
|
/* Register to get the LCUEV_NODE_UNREACHABLE event. */
|
|
if (lcuxprt_events(LCU_CATCH_EVENTS, sysnum, LCUEV_NODE_UNREACHABLE,
|
|
rgp_lcu_event_callback) != ELCU_OK)
|
|
RGP_ERROR(RGP_INTERNAL_ERROR);
|
|
|
|
#endif /* LCU */
|
|
|
|
#ifdef UNIX
|
|
/* For testing on UNIX at user level, we use alarm() to simulate timer
|
|
* ticks. */
|
|
/* Install the alarm handler. */
|
|
sig_action.sa_flags = 0;
|
|
sig_action.sa_handler = alarm_handler;
|
|
sigemptyset(&(sig_action.sa_mask));
|
|
/* Block messages when handling timer pops. */
|
|
sigaddset(&(sig_action.sa_mask), SIGPOLL);
|
|
sigaction(SIGALRM, &sig_action, NULL);
|
|
|
|
alarm_callback = rgp_periodic_check;
|
|
|
|
/* Round up the alarm period to the next higher second. */
|
|
alarm_period = (RGP_CLOCK_PERIOD + 99) / 100;
|
|
|
|
/* Get first timer tick as soon as possible; subsequent ones will be
|
|
* at alarm_period.
|
|
*/
|
|
alarm(1);
|
|
#endif /* UNIX */
|
|
|
|
#ifdef NT
|
|
/* On NT we create a separate thread that will be our timer. */
|
|
/* The Timer Thread waits on TimerSignal Event to indicate an RGP rate change. */
|
|
/* An RGP rate of 0 is a signal for the Timer Thread to exit */
|
|
|
|
tempHandle = CreateEvent ( NULL, /* no security */
|
|
FALSE, /* Autoreset */
|
|
TRUE, /* Initial State is Signalled */
|
|
NULL); /* No name */
|
|
if ( !tempHandle )
|
|
{
|
|
RGP_ERROR (RGP_INTERNAL_ERROR);
|
|
}
|
|
rgp->OS_specific_control.TimerSignal = tempHandle;
|
|
|
|
tempHandle = CreateEvent ( NULL, /* no security */
|
|
TRUE, /* Manual reset */
|
|
TRUE, /* Initial State is Signalled */
|
|
NULL); /* No name */
|
|
if ( !tempHandle )
|
|
{
|
|
RGP_ERROR (RGP_INTERNAL_ERROR);
|
|
}
|
|
rgp->OS_specific_control.Stabilized = tempHandle;
|
|
rgp->OS_specific_control.ArbitrationInProgress = FALSE;
|
|
rgp->OS_specific_control.ArbitratingNode = MM_INVALID_NODE;
|
|
rgp->OS_specific_control.ApproxArbitrationWinner = MM_INVALID_NODE;
|
|
rgp->OS_specific_control.ShuttingDown = FALSE;
|
|
|
|
tempHandle = CreateThread( 0, /* security */
|
|
0, /* stack size - use same as primary thread */
|
|
(LPTHREAD_START_ROUTINE)NT_timer_thread, /* starting point */
|
|
(VOID *) NULL, /* no parameter */
|
|
0, /* create flags - start immediately */
|
|
&threadID ); /* thread ID returned here */
|
|
if ( !tempHandle )
|
|
{
|
|
RGP_ERROR( RGP_INTERNAL_ERROR ); /* at least for now */
|
|
}
|
|
rgp->OS_specific_control.TimerThread = tempHandle;
|
|
rgp->OS_specific_control.TimerThreadId = threadID;
|
|
|
|
rgp->OS_specific_control.UpDownCallback = RGP_NULL_PTR;
|
|
rgp->OS_specific_control.NodesDownCallback = RGP_NULL_PTR;
|
|
rgp->OS_specific_control.EventEpoch = 0;
|
|
|
|
#if defined TDM_DEBUG
|
|
rgp->OS_specific_control.debug.frozen = 0;
|
|
rgp->OS_specific_control.debug.reload_in_progress = 0;
|
|
rgp->OS_specific_control.debug.timer_frozen = 0;
|
|
rgp->OS_specific_control.debug.doing_tracing = 0;
|
|
rgp->OS_specific_control.debug.MyTestPoints.TestPointWord = 0;
|
|
|
|
// seed the random number function used in testing
|
|
srand((unsigned) time( NULL ) );
|
|
#endif
|
|
|
|
#endif /* NT */
|
|
|
|
|
|
|
|
}
|
|
|
|
/************************************************************************
|
|
* rgp_cleanup_OS
|
|
* ===========
|
|
*
|
|
* Description:
|
|
*
|
|
* This routine does OS-dependent cleanup of regroup structures
|
|
* and timer thread activity to ready for a new JOIN attempt.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* None
|
|
*
|
|
* Returns:
|
|
*
|
|
* void - no return value
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* OS-dependent initializations.
|
|
*
|
|
************************************************************************/
|
|
_priv _resident void
|
|
rgp_cleanup_OS(void)
|
|
{
|
|
#if defined (NT)
|
|
// Tell Timer Thread to restart RGP Timer
|
|
// a_tick might have changed.
|
|
SetEvent( rgp->OS_specific_control.TimerSignal);
|
|
#endif // NT
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
* rgp_update_regroup_packet
|
|
* =========================
|
|
*
|
|
* Description:
|
|
*
|
|
* Macro to copy the current regroup status into the regroup packet
|
|
* sending buffer.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* None
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* Copies the status (which is already in the form of a regroup status
|
|
* packet) into the packet buffer. Then, if we should let others (and
|
|
* ourselves) know of our stage, the current knownstage field is
|
|
* updated to include the local node number.
|
|
*
|
|
************************************************************************/
|
|
#define rgp_update_regroup_packet \
|
|
do \
|
|
{ \
|
|
/* Copy the regroup status to the sending packet area. */ \
|
|
rgp->rgppkt_to_send = rgp->rgppkt; \
|
|
\
|
|
/* If we should let others know of our stage, we must modify the \
|
|
* current stage mask to include ourselves. \
|
|
*/ \
|
|
if (rgp->sendstage) \
|
|
switch (rgp->rgppkt.stage) \
|
|
{ \
|
|
case RGP_ACTIVATED: \
|
|
ClusterInsert(rgp->rgppkt_to_send.knownstage1, rgp->mynode); \
|
|
break; \
|
|
case RGP_CLOSING: \
|
|
ClusterInsert(rgp->rgppkt_to_send.knownstage2, rgp->mynode); \
|
|
break; \
|
|
case RGP_PRUNING: \
|
|
ClusterInsert(rgp->rgppkt_to_send.knownstage3, rgp->mynode); \
|
|
break; \
|
|
case RGP_PHASE1_CLEANUP: \
|
|
ClusterInsert(rgp->rgppkt_to_send.knownstage4, rgp->mynode); \
|
|
break; \
|
|
case RGP_PHASE2_CLEANUP: \
|
|
ClusterInsert(rgp->rgppkt_to_send.knownstage5, rgp->mynode); \
|
|
break; \
|
|
default: \
|
|
break; \
|
|
} \
|
|
} while(0)
|
|
|
|
|
|
/************************************************************************
|
|
* rgp_update_poison_packet
|
|
* ========================
|
|
*
|
|
* Description:
|
|
*
|
|
* Macro to copy the current regroup status into the poison packet
|
|
* sending buffer.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* None
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* Copies the appropriate regroup status fields into the poison
|
|
* packet buffer to help debugging when a dump of a poisoned
|
|
* node is examined.
|
|
*
|
|
************************************************************************/
|
|
#define rgp_update_poison_packet \
|
|
do \
|
|
{ \
|
|
rgp->poison_pkt.seqno = rgp->rgppkt.seqno; \
|
|
rgp->poison_pkt.reason = rgp->rgppkt.reason; \
|
|
rgp->poison_pkt.activatingnode = rgp->rgppkt.activatingnode; \
|
|
rgp->poison_pkt.causingnode = rgp->rgppkt.causingnode; \
|
|
ClusterCopy(rgp->poison_pkt.initnodes, rgp->initnodes); \
|
|
ClusterCopy(rgp->poison_pkt.endnodes, rgp->endnodes); \
|
|
} while(0)
|
|
|
|
|
|
/************************************************************************
|
|
* rgp_broadcast
|
|
* =============
|
|
*
|
|
* Description:
|
|
*
|
|
* This routine asks the message system to broadcast an unacknowledged
|
|
* packet of subtype "packet_subtype" to a set of nodes indicated in
|
|
* an appropriate field in the rgp control struct. How the broadcast
|
|
* is implemented depends on the OS.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* uint8 packet_subtype - type of unsequenced packet to send
|
|
*
|
|
* Returns:
|
|
*
|
|
* void - no return value
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* The same data packet is to be sent to the set of nodes indicated
|
|
* in the rgp control struct field. The sending can be done by queueing
|
|
* the packets directly to the send engine or the send can be deferred
|
|
* to a lower priority interrupt level. The former approach reduces
|
|
* the latency for sending these urgent packets while the latter
|
|
* approach may reduce the number of sends if several requests to
|
|
* send the same type of packets (this is true only of regroup
|
|
* packets) are made in quick succession. In this case, previous
|
|
* requests are overwritten by later requests. This is OK since the
|
|
* regroup algorithm has enough redundancy in packet sending.
|
|
*
|
|
* In NSK, the message system provides a broadcast facility for
|
|
* unacknowledged packets. It copies regroup's packet into its own
|
|
* buffer and issues multiple requests to the SNet services layer.
|
|
* When it copies the buffer, it disables the timer and IPC
|
|
* interrupts ensuring that there will be no contention with Regroup.
|
|
* Therefore, this routine can safely update the packet area here
|
|
* without checking if the sending apparatus has completed sending
|
|
* the previous packet.
|
|
*
|
|
* This is not true of LCU where the message system does not
|
|
* provide a broadcast facility. In LCU, the updating of the packet
|
|
* buffer can be done only when the send engine has completed
|
|
* sending. This is assured only in the send completion interrupt
|
|
* handler (rgp_msgsys_work).
|
|
*
|
|
************************************************************************/
|
|
_priv _resident void
|
|
rgp_broadcast(uint8 packet_subtype)
|
|
{
|
|
cluster_t temp_cluster;
|
|
|
|
//[Raj Das] Copy the ignorescreen before sending.....
|
|
PackIgnoreScreen(&rgp->rgppkt, rgp->ignorescreen);
|
|
|
|
switch (packet_subtype)
|
|
{
|
|
case RGP_UNACK_REGROUP :
|
|
|
|
/* Trace the queueing of regroup status packets. */
|
|
RGP_TRACE( "RGP Send packets",
|
|
rgp->rgppkt.stage, /* TRACE */
|
|
RGP_MERGE_TO_32( rgp->status_targets, /* TRACE */
|
|
rgp->rgppkt.knownstage1 ), /* TRACE */
|
|
RGP_MERGE_TO_32( rgp->rgppkt.knownstage2, /* TRACE */
|
|
rgp->rgppkt.knownstage3 ), /* TRACE */
|
|
RGP_MERGE_TO_32( rgp->rgppkt.knownstage4, /* TRACE */
|
|
rgp->rgppkt.knownstage5 ) ); /* TRACE */
|
|
|
|
#if defined(NSK) || defined(UNIX) || defined(NT)
|
|
/* In NSK, the packet buffer can be updated even if the send
|
|
* engine is working on the previous send. See algorithm
|
|
* description above.
|
|
*/
|
|
|
|
if ((rgp->rgppkt.reason == MM_EVT_LEAVE) &&
|
|
(rgp->rgppkt.causingnode == rgp->mynode))
|
|
// If a LEAVE event is in progress exclude our node from knownstage mask
|
|
rgp->rgppkt_to_send = rgp->rgppkt;
|
|
else
|
|
// copy regroup packet and insert our node number into knownstage mask
|
|
rgp_update_regroup_packet;
|
|
#endif /* NSK || UNIX || NT */
|
|
|
|
ClusterUnion(rgp->rgp_msgsys_p->regroup_nodes,
|
|
rgp->status_targets,
|
|
rgp->rgp_msgsys_p->regroup_nodes);
|
|
|
|
/* Clear the targets field in the rgp_control struct after
|
|
* copying this info. The message system must clear the target
|
|
* bits in the common regroup/msgsys struct after sending the
|
|
* packets.
|
|
*/
|
|
ClusterInit(rgp->status_targets);
|
|
|
|
rgp->rgp_msgsys_p->sendrgppkts = 1;
|
|
|
|
break;
|
|
|
|
case RGP_UNACK_IAMALIVE :
|
|
|
|
/* Count number of IamAlive requests queued. */
|
|
RGP_INCREMENT_COUNTER( QueuedIAmAlive );
|
|
|
|
ClusterUnion(rgp->rgp_msgsys_p->iamalive_nodes,
|
|
rgp->rgpinfo.cluster,
|
|
rgp->rgp_msgsys_p->iamalive_nodes);
|
|
rgp->rgp_msgsys_p->sendiamalives = 1;
|
|
|
|
/* No targets field to clear in the rgp_control struct.
|
|
* The message system must clear the target bits in the common
|
|
* regroup/msgsys struct after sending the packets.
|
|
*/
|
|
break;
|
|
|
|
case RGP_UNACK_POISON :
|
|
|
|
/* Trace the sending of poison packets. */
|
|
RGP_TRACE( "RGP Send poison ",
|
|
rgp->rgppkt.stage, /* TRACE */
|
|
RGP_MERGE_TO_32( rgp->poison_targets, /* TRACE */
|
|
rgp->rgppkt.knownstage1 ), /* TRACE */
|
|
RGP_MERGE_TO_32( rgp->rgppkt.knownstage2, /* TRACE */
|
|
rgp->rgppkt.knownstage3 ), /* TRACE */
|
|
RGP_MERGE_TO_32( rgp->rgppkt.knownstage4, /* TRACE */
|
|
rgp->rgppkt.knownstage5 ) ); /* TRACE */
|
|
|
|
/* The poison packet targets must NOT be considered alive. */
|
|
|
|
ClusterIntersection(temp_cluster, rgp->rgpinfo.cluster,
|
|
rgp->poison_targets);
|
|
|
|
ClusterDifference(temp_cluster,
|
|
temp_cluster,
|
|
rgp->OS_specific_control.Banished);
|
|
|
|
if (ClusterNumMembers(temp_cluster) != 0)
|
|
RGP_ERROR(RGP_INTERNAL_ERROR);
|
|
|
|
#if defined(NSK) || defined(NT)
|
|
/* In NSK, the packet buffer can be updated even if the send
|
|
* engine is working on the previous send. See algorithm
|
|
* description above.
|
|
*/
|
|
rgp_update_poison_packet;
|
|
#endif /* NSK || NT */
|
|
|
|
ClusterUnion(rgp->rgp_msgsys_p->poison_nodes,
|
|
rgp->poison_targets,
|
|
rgp->rgp_msgsys_p->poison_nodes);
|
|
|
|
/* Clear the targets field in the rgp_control struct after
|
|
* copying this info. The message system must clear the target
|
|
* bits in the common regroup/msgsys struct after sending the
|
|
* packets.
|
|
*/
|
|
ClusterInit(rgp->poison_targets);
|
|
|
|
rgp->rgp_msgsys_p->sendpoisons = 1;
|
|
|
|
break;
|
|
|
|
default :
|
|
|
|
RGP_ERROR(RGP_INTERNAL_ERROR);
|
|
break;
|
|
}
|
|
|
|
QUEUESEND; /* invoke OS-specific sending function/macro */
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
* rgp_had_power_failure
|
|
* =====================
|
|
*
|
|
* Description:
|
|
*
|
|
* Tells the OS at the end of a regroup incident if a surviving node
|
|
* had a power failure. The message system can use this to clear all
|
|
* bus errors collected so far to node because node seems to have
|
|
* had a power failure and has now recovered from it. Perhaps, the
|
|
* bus errors were due to the power failure.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* None
|
|
*
|
|
* Returns:
|
|
*
|
|
* void - no return value
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* Calls a message system routine to perform any error clearing.
|
|
*
|
|
************************************************************************/
|
|
_priv _resident void
|
|
rgp_had_power_failure(node_t node)
|
|
{
|
|
/* Currently, there is nothing to do. */
|
|
RGP_TRACE( "RGP Power fail ", node, 0, 0, 0);
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
* rgp_status_of_node
|
|
* ==================
|
|
*
|
|
* Description:
|
|
*
|
|
* Ask the SP to return the status of a node. The SP must return the
|
|
* current status and not return a stale status. This routine is
|
|
* called by the split-brain avoidance algorithm in the two-node
|
|
* case, for the non-tie-breaker to get the status of the tie-breaker
|
|
* node.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* node_t node
|
|
* the node whose status is to be obtained.
|
|
*
|
|
* Returns:
|
|
*
|
|
* int - the status code of the node returned by the SP, appropriately
|
|
* encoded into one of the values known to regroup.
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* Calls a millicode routine to ask the SP for the status of the node.
|
|
*
|
|
************************************************************************/
|
|
_priv _resident int
|
|
rgp_status_of_node(node_t node)
|
|
{
|
|
#if defined(NT)
|
|
/* noone home */
|
|
return RGP_NODE_UNREACHABLE;
|
|
#else
|
|
return _get_remote_cpu_state_( node ); /*F40:MB06452.1*/
|
|
#endif
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
* rgp_newnode_online
|
|
* ==================
|
|
*
|
|
* Description:
|
|
*
|
|
* This routine is called if the first IamAlive is received from a
|
|
* newly booted node before the cluster manager gets a chance to
|
|
* call rgp_monitor_node(). The OS can use this routine to mark the
|
|
* node as up if it does not have any other means to detect that
|
|
* a node has come up.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* node_t node -
|
|
* the new node that has just been detected to be up
|
|
*
|
|
* Returns:
|
|
*
|
|
* void - no return value
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* This routine marks the state of the node as up as seen by the
|
|
* native OS.
|
|
*
|
|
* In NSK, on the reloader node, the marking of the reloadee as up
|
|
* is done by the message system when the initial address handshake
|
|
* packet is received from the reloadee. NSK does not require the
|
|
* regroup module to report the fact that the reloadee is online.
|
|
*
|
|
* The above is probably true for LCU as well. However, the details
|
|
* are not yet worked out. For now, this routine is a no-op for LCU.
|
|
*
|
|
************************************************************************/
|
|
_priv _resident void
|
|
rgp_newnode_online(node_t newnode)
|
|
{
|
|
RGP_TRACE( "RGP New node up ", newnode, 0, 0, 0);
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
* rgp_select_cluster_ex
|
|
* =====================
|
|
*
|
|
* Description:
|
|
*
|
|
* Given an array of cluster choices, this routine picks the best
|
|
* cluster to keep alive. cluster_choices[] is the array of choices
|
|
* and num_clusters is the number of entries in the array.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* cluster_t cluster_choices[]
|
|
* array of cluster choices
|
|
*
|
|
* int num_clusters
|
|
* number of entries (choices) in the array
|
|
*
|
|
* node_t key_node
|
|
* internal node number of the key node or RGP_NULL_NODE
|
|
*
|
|
* Returns:
|
|
*
|
|
* int - the index of the selected cluster; if no cluster
|
|
* is viable, -1 is returned.
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* By default, the best cluster is defined as the largest cluster.
|
|
* Optionally, a node called key_node can be required to be present
|
|
* for a cluster to be viable. key_node can be set to RGP_NULL_NODE
|
|
* to imply that no specific node is required to be present. The
|
|
* routine returns the index of the best cluster and -1 if none of
|
|
* the clusters is viable (that is, does not include the key node).
|
|
*
|
|
************************************************************************/
|
|
_priv _resident int
|
|
rgp_select_cluster_ex(cluster_t cluster_choices[], int num_clusters, node_t key_node)
|
|
{
|
|
|
|
int max_members = 0, num_members;
|
|
int cluster_selected = -1;
|
|
int i;
|
|
|
|
#if defined(UNIX)
|
|
printf("rgp_select_cluster() called with %d choices:", num_clusters);
|
|
for (i = 0; i < num_clusters; i++)
|
|
{
|
|
node_t j;
|
|
printf("(");
|
|
for (j = 0; j < (node_t) rgp->num_nodes; j++)
|
|
{
|
|
if (ClusterMember(cluster_choices[i], j))
|
|
printf("%d,", EXT_NODE(j));
|
|
}
|
|
printf(")");
|
|
}
|
|
printf("\n");
|
|
fflush(stdout);
|
|
#endif /* UNIX */
|
|
|
|
for (i = 0; i < num_clusters; i++)
|
|
{
|
|
/* Skip the current cluster if a key node is defined and is not
|
|
* in the cluster.
|
|
*/
|
|
if ((key_node != RGP_NULL_NODE) &&
|
|
!ClusterMember(cluster_choices[i], key_node))
|
|
continue;
|
|
|
|
if ((num_members = ClusterNumMembers(cluster_choices[i])) > max_members)
|
|
{
|
|
cluster_selected = i;
|
|
max_members = num_members;
|
|
}
|
|
}
|
|
|
|
#if defined(UNIX)
|
|
printf("Node %d: rgp_select_cluster() returned %d.\n",
|
|
EXT_NODE(rgp->mynode), cluster_selected);
|
|
fflush(stdout);
|
|
#endif /* UNIX */
|
|
|
|
return (cluster_selected);
|
|
}
|
|
|
|
/************************************************************************
|
|
* rgp_select_cluster
|
|
* ==================
|
|
*
|
|
* Description:
|
|
*
|
|
* Given an array of cluster choices, this routine picks the best
|
|
* cluster to keep alive. cluster_choices[] is the array of choices
|
|
* and num_clusters is the number of entries in the array.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* cluster_t cluster_choices[]
|
|
* array of cluster choices
|
|
*
|
|
* int num_clusters
|
|
* number of entries (choices) in the array
|
|
*
|
|
* Returns:
|
|
*
|
|
* int - the index of the selected cluster; if no cluster
|
|
* is viable, -1 is returned.
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* By default, the best cluster is defined as the largest cluster.
|
|
* Optionally, a node called RGP_KEY_NODE can be required to be present
|
|
* for a cluster to be viable. RGP_KEY_NODE can be set to RGP_NULL_NODE
|
|
* to imply that no specific node is required to be present. The
|
|
* routine returns the index of the best cluster and -1 if none of
|
|
* the clusters is viable (that is, does not include the key node).
|
|
*
|
|
************************************************************************/
|
|
_priv _resident int
|
|
rgp_select_cluster(cluster_t cluster_choices[], int num_clusters)
|
|
{
|
|
node_t key_node;
|
|
if (RGP_KEY_NODE == RGP_NULL_NODE) {
|
|
key_node = RGP_NULL_NODE;
|
|
} else {
|
|
key_node = INT_NODE(RGP_KEY_NODE);
|
|
}
|
|
return rgp_select_cluster_ex(cluster_choices , num_clusters, key_node);
|
|
}
|
|
|
|
|
|
#ifdef LCU
|
|
/************************************************************************
|
|
* rgp_msgsys_work
|
|
* ===============
|
|
*
|
|
* Description:
|
|
*
|
|
* LCU-specific routine that implements broadcasting of packets by
|
|
* sending them serially.
|
|
*
|
|
* This routine is called from rgp_broadcast() to initiate new sends.
|
|
* It is also the packet send completion interrupt handler (callback
|
|
* routine), invoked by the LCU message system when the packet buffer
|
|
* can be reused.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* lcumsg_t *lcumsgp -
|
|
* pointer to lcu message if called from the transport's send
|
|
* completion interrupt handler; NULL if called from
|
|
* rgp_broadcast() to send a new packet.
|
|
*
|
|
* int status -
|
|
* the message completion status if called from the transport's
|
|
* send completion interrupt handler; 0 if called from
|
|
* rgp_broadcast() to send a new packet.
|
|
*
|
|
* Returns:
|
|
*
|
|
* void - no return value
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* If called from the send completion interrupt, the routine checks
|
|
* to see if the packet buffer needs to be refreshed. This is true
|
|
* if the appropriate bit in the rgp_msgsys struct is set. If so,
|
|
* the buffer is updated with the current info (using an update
|
|
* macro). This update is relevant to regroup status packets and
|
|
* poison packets, but not to IamAlives packets whose contents are
|
|
* always the same. The bit is cleared after the packet is updated.
|
|
*
|
|
* Next, the routine checks if there are more destinations to send
|
|
* the packet to. If so, it finds the next higher numbered node to
|
|
* send to, issues a send and returns.
|
|
*
|
|
* If invoked from rgp_broadcast() to start a new broadcast, the
|
|
* routine first checks to see if the previous broadcast of the
|
|
* same packet is complete. This is indicated by the tag field in
|
|
* the message struct. The tag is NULL if the broadcast has
|
|
* completed or has not been initiated. In this case, the tag is
|
|
* set to a non-NULL value and a new broadcast initiated, with
|
|
* this routine specified as the callback routine.
|
|
*
|
|
* If the previous broadcast has not completed, nothing needs to
|
|
* be done. The completion interrupt will cause the buffer to be
|
|
* refreshed and the broadcast to be continued. The broadcast
|
|
* will then include new targets that may be included in this
|
|
* new request.
|
|
*
|
|
************************************************************************/
|
|
_priv _resident void
|
|
rgp_msgsys_work(lcumsg_t *lcumsgp, int status)
|
|
{
|
|
rgp_unseq_pkt_t *packet;
|
|
cluster_t *sending_cluster;
|
|
node_t node;
|
|
|
|
if (lcumsgp == NULL)
|
|
{
|
|
/* New work requested. Only one type of work is requested at
|
|
* a time.
|
|
*/
|
|
|
|
if (rgp->rgp_msgsys_p->sendrgppkts)
|
|
{
|
|
|
|
/* Have new regroup status packets to send. First check
|
|
* if the last regroup status send completed. If so,
|
|
* we can update the packet and initiate a new send.
|
|
* If not, we must defer to the completion interrupt
|
|
* (invocation of this routine with a non-NULL lcumsgp).
|
|
*/
|
|
|
|
lcumsgp = rgp->OS_specific_control.lcumsg_regroup_p;
|
|
if (lcumsgp->lcu_tag == NULL)
|
|
{
|
|
/* Last send completed. Initiate new send. */
|
|
|
|
rgp_update_regroup_packet;
|
|
rgp->rgp_msgsys_p->sendrgppkts = 0;
|
|
|
|
for (node = 0; node < rgp->num_nodes; node++)
|
|
{
|
|
if (ClusterMember(rgp->rgp_msgsys_p->regroup_nodes, node))
|
|
{
|
|
ClusterDelete(rgp->rgp_msgsys_p->regroup_nodes, node);
|
|
lcumsgp->lcu_node = node;
|
|
lcumsgp->lcu_tag = &(rgp->rgp_msgsys_p->regroup_nodes);
|
|
if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=
|
|
ELCU_OK)
|
|
RGP_ERROR(RGP_INTERNAL_ERROR);
|
|
break; /* can send only to one node at a time */
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
else if (rgp->rgp_msgsys_p->sendiamalives)
|
|
{
|
|
/* Need to send IamAlives again. First check if the last
|
|
* IamAlive send completed. If so, we can initiate a new send.
|
|
* If not, we must defer to the completion interrupt
|
|
* (invocation of this routine with a non-NULL lcumsgp).
|
|
*/
|
|
|
|
lcumsgp = rgp->OS_specific_control.lcumsg_iamalive_p;
|
|
if (lcumsgp->lcu_tag == NULL)
|
|
{
|
|
/* Last send completed. Initiate new send. */
|
|
|
|
rgp->rgp_msgsys_p->sendiamalives = 0;
|
|
|
|
for (node = 0; node < rgp->num_nodes; node++)
|
|
{
|
|
if (ClusterMember(rgp->rgp_msgsys_p->iamalive_nodes, node))
|
|
{
|
|
ClusterDelete(rgp->rgp_msgsys_p->iamalive_nodes, node);
|
|
lcumsgp->lcu_node = node;
|
|
lcumsgp->lcu_tag = &(rgp->rgp_msgsys_p->iamalive_nodes);
|
|
if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=
|
|
ELCU_OK)
|
|
RGP_ERROR(RGP_INTERNAL_ERROR);
|
|
break; /* can send only to one node at a time */
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
else if (rgp->rgp_msgsys_p->sendpoisons)
|
|
{
|
|
/* Have new poison packets to send. First check
|
|
* if the last poison packet send completed. If so,
|
|
* we can update the packet and initiate a new send.
|
|
* If not, we must defer to the completion interrupt
|
|
* (invocation of this routine with a non-NULL lcumsgp).
|
|
*/
|
|
|
|
lcumsgp = rgp->OS_specific_control.lcumsg_poison_p;
|
|
if (lcumsgp->lcu_tag == NULL)
|
|
{
|
|
/* Last send completed. Initiate new send. */
|
|
|
|
rgp_update_poison_packet;
|
|
rgp->rgp_msgsys_p->sendpoisons = 0;
|
|
|
|
for (node = 0; node < rgp->num_nodes; node++)
|
|
{
|
|
if (ClusterMember(rgp->rgp_msgsys_p->poison_nodes, node))
|
|
{
|
|
ClusterDelete(rgp->rgp_msgsys_p->poison_nodes, node);
|
|
lcumsgp->lcu_node = node;
|
|
lcumsgp->lcu_tag = &(rgp->rgp_msgsys_p->poison_nodes);
|
|
if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=
|
|
ELCU_OK)
|
|
RGP_ERROR(RGP_INTERNAL_ERROR);
|
|
break; /* can send only to one node at a time */
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
} /* new work */
|
|
|
|
else
|
|
{
|
|
/* Send completion interrupt; continue the broadcast if
|
|
* there are targets remaining.
|
|
*/
|
|
|
|
RGP_LOCK;
|
|
|
|
/* Find what type of packet completed; send the same type. */
|
|
|
|
packet = (rgp_unseq_pkt_t *) lcumsgp->lcu_reqmbuf.lcu_ctrlbuf;
|
|
|
|
switch (packet->pktsubtype)
|
|
{
|
|
case RGP_UNACK_REGROUP :
|
|
|
|
/* Check if packet needs to be updated. */
|
|
if (rgp->rgp_msgsys_p->sendrgppkts)
|
|
{
|
|
rgp_update_regroup_packet;
|
|
rgp->rgp_msgsys_p->sendrgppkts = 0;
|
|
}
|
|
break;
|
|
|
|
case RGP_UNACK_IAMALIVE :
|
|
break;
|
|
|
|
case RGP_UNACK_POISON :
|
|
|
|
/* Check if packet needs to be updated. */
|
|
if (rgp->rgp_msgsys_p->sendpoisons)
|
|
{
|
|
rgp_update_poison_packet;
|
|
rgp->rgp_msgsys_p->sendpoisons = 0;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/* Check if there is any more node to send the same packet
|
|
* type to. If not, set the tag to NULL and return.
|
|
*/
|
|
sending_cluster = (cluster_t *) (lcumsgp->lcu_tag);
|
|
if (ClusterNumMembers(*sending_cluster) == 0)
|
|
{
|
|
lcumsgp->lcu_tag = NULL; /* indicate that broadcast is complete. */
|
|
return;
|
|
}
|
|
|
|
/* There is at least one more node to send to. Start with
|
|
* the node with the next higher number than the node we
|
|
* just finished sending to.
|
|
*
|
|
* The loop terminates after posting a send to the next
|
|
* node to send to. We know there is at least one such node.
|
|
*/
|
|
for (node = lcumsgp->lcu_node + 1; node < rgp->num_nodes + 1; node++)
|
|
{
|
|
if (node == rgp->num_nodes)
|
|
node = 0; /* continue the search starting at node 0 */
|
|
if (ClusterMember(*sending_cluster, node))
|
|
{
|
|
ClusterDelete(*sending_cluster, node);
|
|
lcumsgp->lcu_node = node;
|
|
if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=
|
|
ELCU_OK)
|
|
RGP_ERROR(RGP_INTERNAL_ERROR);
|
|
break; /* can send only to one node at a time */
|
|
}
|
|
}
|
|
|
|
RGP_UNLOCK;
|
|
}
|
|
}
|
|
#endif /* LCU */
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
#if defined(LCU) || defined(UNIX) || defined(NT)
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
void
|
|
rgp_hold_all_io(void)
|
|
/* Simulates the TNet services routine to pause IO. */
|
|
{
|
|
#if defined (NT)
|
|
(*(rgp->OS_specific_control.HoldIOCallback))();
|
|
#endif
|
|
RGP_TRACE( "RGP Hold all IO ", 0, 0, 0, 0);
|
|
}
|
|
/*---------------------------------------------------------------------------*/
|
|
void
|
|
rgp_resume_all_io(void)
|
|
/* Simulates the TNet services routine to resume IO. */
|
|
{
|
|
#if defined (NT)
|
|
(*(rgp->OS_specific_control.ResumeIOCallback))();
|
|
#endif
|
|
RGP_TRACE( "RGP Resume IO ", 0, 0, 0, 0);
|
|
}
|
|
/*---------------------------------------------------------------------------*/
|
|
void
|
|
RGP_ERROR_EX (uint16 halt_code, char* fname, DWORD lineno)
|
|
/* Halt node with error code. */
|
|
{
|
|
char *halt_string;
|
|
node_t node = RGP_NULL_NODE;
|
|
#if defined( NT )
|
|
char halt_buffer[ 256 ];
|
|
DWORD eventMsgId;
|
|
BOOL skipFormatting = FALSE;
|
|
|
|
//
|
|
// If a user initiated a shutdown, (s)he wants to see the node
|
|
// to go down and wait for an explicit start command.
|
|
//
|
|
// We map RGP_RELOADFAILED to SHUTDOWN_DURING_REGROUP_ERROR since
|
|
// HaltCallback does a graceful stop for the latter one.
|
|
// SCM won't restart the node after a graceful stop unless
|
|
// it is explicitly told to do so
|
|
//
|
|
if (halt_code == RGP_RELOADFAILED &&
|
|
rgp->OS_specific_control.ShuttingDown)
|
|
{
|
|
halt_code = RGP_SHUTDOWN_DURING_RGP;
|
|
}
|
|
#endif
|
|
|
|
if (halt_code == RGP_RELOADFAILED) {
|
|
halt_string = "[RGP] Node %d: REGROUP WARNING: reload failed.";
|
|
eventMsgId = MM_EVENT_RELOAD_FAILED;
|
|
}
|
|
else if (halt_code == RGP_INTERNAL_ERROR) {
|
|
halt_string = "[RGP] Node %d: REGROUP ERROR: consistency check failed in file %s, line %u.";
|
|
eventMsgId = MM_EVENT_INTERNAL_ERROR;
|
|
skipFormatting = TRUE;
|
|
|
|
_snprintf(halt_buffer, sizeof( halt_buffer ) - 1,
|
|
halt_string,
|
|
EXT_NODE(rgp->mynode),
|
|
fname,
|
|
lineno);
|
|
}
|
|
else if (halt_code == RGP_MISSED_POLL_TO_SELF) {
|
|
halt_string = "[RGP] Node %d: REGROUP ERROR: cannot talk to self.";
|
|
eventMsgId = NM_EVENT_MEMBERSHIP_HALT;
|
|
}
|
|
#if !defined(NT)
|
|
else if (halt_code == RGP_AVOID_SPLIT_BRAIN) {
|
|
halt_string = "[RGP] Node %d: REGROUP ERROR: commiting suicide to avoid split brain.";
|
|
}
|
|
#endif
|
|
else if (halt_code == RGP_PRUNED_OUT) {
|
|
halt_string = "[RGP] Node %d: REGROUP ERROR: pruned out due to communication failure.";
|
|
eventMsgId = MM_EVENT_PRUNED_OUT;
|
|
}
|
|
else if ((halt_code >= RGP_PARIAH_FIRST) && (halt_code <= RGP_PARIAH_LAST)) {
|
|
halt_string = "[RGP] Node %d: REGROUP ERROR: poison packet received from node %d.";
|
|
eventMsgId = MM_EVENT_PARIAH;
|
|
node = (node_t)(halt_code - RGP_PARIAH);
|
|
}
|
|
else if (halt_code == RGP_ARBITRATION_FAILED) {
|
|
halt_string = "[RGP] Node %d: REGROUP ERROR: arbitration failed.";
|
|
eventMsgId = MM_EVENT_ARBITRATION_FAILED;
|
|
}
|
|
else if (halt_code == RGP_ARBITRATION_STALLED) {
|
|
halt_string = "[RGP] Node %d: REGROUP ERROR: arbitration stalled.";
|
|
eventMsgId = MM_EVENT_ARBITRATION_STALLED;
|
|
}
|
|
else if (halt_code == RGP_SHUTDOWN_DURING_RGP) {
|
|
halt_string = "[RGP] Node %d: REGROUP INFO: regroup engine requested immediate shutdown.";
|
|
eventMsgId = MM_EVENT_SHUTDOWN_DURING_RGP;
|
|
}
|
|
else {
|
|
halt_string = "[RGP] Node %d: REGROUP ERROR: unknown halt code (%d).";
|
|
eventMsgId = NM_EVENT_MEMBERSHIP_HALT;
|
|
node = halt_code; // get it printed out by borrowing node
|
|
}
|
|
|
|
#if defined(UNIX)
|
|
printf(halt_string, EXT_NODE(rgp->mynode), node);
|
|
fflush(stdout);
|
|
/* Simulate a halt by dumping core and exiting the process. */
|
|
abort();
|
|
|
|
#elif defined(NT)
|
|
|
|
if ( !skipFormatting ) {
|
|
_snprintf(halt_buffer, sizeof( halt_buffer ) - 1,
|
|
halt_string,
|
|
EXT_NODE(rgp->mynode),
|
|
node);
|
|
}
|
|
|
|
#if CLUSTER_BETA
|
|
ClRtlLogPrint(LOG_CRITICAL, "%1!hs!\t%2!hs!:%3!d!\n", halt_buffer, fname, lineno);
|
|
#else
|
|
ClRtlLogPrint(LOG_CRITICAL, "%1!hs!\n", halt_buffer );
|
|
#endif
|
|
|
|
if ((halt_code >= RGP_PARIAH_FIRST) && (halt_code <= RGP_PARIAH_LAST)) {
|
|
WCHAR nodeString[ 16 ];
|
|
PWCHAR nodeName;
|
|
|
|
_snwprintf( nodeString, sizeof( nodeString ) / sizeof ( WCHAR ), L"%d", node );
|
|
nodeName = RgpGetNodeNameFromId( node );
|
|
CsLogEvent2( LOG_CRITICAL, eventMsgId, nodeString, nodeName );
|
|
if ( nodeName != NULL ) {
|
|
LocalFree( nodeName );
|
|
}
|
|
}
|
|
else if ( eventMsgId == NM_EVENT_MEMBERSHIP_HALT ) {
|
|
WCHAR haltString[ 16 ];
|
|
|
|
_snwprintf( haltString, sizeof( haltString ) / sizeof ( WCHAR ), L"%d", halt_code );
|
|
CsLogEvent1( LOG_CRITICAL, eventMsgId, haltString );
|
|
}
|
|
else {
|
|
CsLogEvent( LOG_CRITICAL, eventMsgId );
|
|
}
|
|
|
|
/* we rely on RGP_ERROR_EX to kill the node immediately
|
|
|
|
rgp_cleanup() can potentially slow us down.
|
|
435977 showed that it can take upto 25 seconds, if we
|
|
have a lot IP addr activity.
|
|
|
|
since in the end of the function we execute HaltCallback which kills the cluster,
|
|
we can safely omit doing rgp_cleanup and rgp_cleanup_OS
|
|
|
|
If JoinFailedCallback will be ever enabled, the fate of rgp_cleanup and rgp_cleanup_OS
|
|
should be reevaluated.
|
|
*/
|
|
|
|
#if 0
|
|
rgp_cleanup();
|
|
rgp_cleanup_OS();
|
|
if (halt_code == RGP_RELOADFAILED)
|
|
(*(rgp->OS_specific_control.JoinFailedCallback))();
|
|
else
|
|
#endif
|
|
(*(rgp->OS_specific_control.HaltCallback))(halt_code); // does not return */
|
|
|
|
#else
|
|
cmn_err(CE_PANIC, halt_string, EXT_NODE(rgp->mynode), node);
|
|
#endif /* UNIX */
|
|
}
|
|
/*---------------------------------------------------------------------------*/
|
|
void
|
|
rgp_start_phase1_cleanup(void)
|
|
/* Tells the OS to start cleanup actions for all failed nodes. */
|
|
{
|
|
#if defined (NT)
|
|
node_t i;
|
|
//
|
|
// On NT we saved the nodes to be downed bitmask in NeedsNodeDownCallback.
|
|
//
|
|
for ( i=0; i < (node_t) rgp->num_nodes; i++)
|
|
{
|
|
if ( ClusterMember( rgp->OS_specific_control.NeedsNodeDownCallback, i ) )
|
|
{
|
|
(*(rgp->OS_specific_control.MsgCleanup1Callback))(EXT_NODE(i));
|
|
}
|
|
}
|
|
#endif
|
|
RGP_TRACE( "RGP Ph1 cleanup ", 0, 0, 0, 0);
|
|
rgp_event_handler(RGP_EVT_PHASE1_CLEANUP_DONE, RGP_NULL_NODE);
|
|
}
|
|
/*---------------------------------------------------------------------------*/
|
|
void
|
|
rgp_start_phase2_cleanup(void)
|
|
/* The equivalent of NSK's regroupstage4action(). */
|
|
{
|
|
#if defined (NT)
|
|
BITSET bitset;
|
|
node_t i;
|
|
//
|
|
// On NT we saved the nodes to be downed bitmask in NeedsNodeDownCallback.
|
|
//
|
|
BitsetInit(bitset);
|
|
for ( i=0; i < (node_t) rgp->num_nodes; i++)
|
|
{
|
|
if ( ClusterMember( rgp->OS_specific_control.NeedsNodeDownCallback, i ) )
|
|
{
|
|
BitsetAdd(bitset, EXT_NODE(i));
|
|
}
|
|
}
|
|
|
|
(*(rgp->OS_specific_control.MsgCleanup2Callback))(bitset);
|
|
#endif
|
|
RGP_TRACE( "RGP Ph2 cleanup ", 0, 0, 0, 0);
|
|
rgp_event_handler(RGP_EVT_PHASE2_CLEANUP_DONE, RGP_NULL_NODE);
|
|
}
|
|
/*---------------------------------------------------------------------------*/
|
|
void
|
|
rgp_cleanup_complete(void)
|
|
/* The equivalent of NSK's regroupstage5action(). */
|
|
{
|
|
#if defined(NT)
|
|
#endif
|
|
RGP_TRACE( "RGP completed ", 0, 0, 0, 0);
|
|
}
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
#endif /* LCU || UNIX || NT */
|
|
|
|
#if defined(NT)
|
|
|
|
/************************************************************************
|
|
* NT_timer_callback
|
|
* =================
|
|
*
|
|
* Description:
|
|
*
|
|
* This routine is the callback function that gets invoked whenever a
|
|
* timer pops. The routine will call rgp_periodic_check. This function
|
|
* is defined by the Win32 TimerProc procedure.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* See below. We don't use any of them.
|
|
*
|
|
* Returns:
|
|
*
|
|
* none.
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* This routine just calls rgp_periodic_check. The existense of this
|
|
* routine is solely due to a fixed format callback defined by
|
|
* Microsoft.
|
|
*
|
|
************************************************************************/
|
|
VOID CALLBACK NT_timer_callback(
|
|
VOID
|
|
)
|
|
{
|
|
#if defined(TDM_DEBUG)
|
|
if ( !(rgp->OS_specific_control.debug.timer_frozen) &&
|
|
!(rgp->OS_specific_control.debug.frozen) )
|
|
#endif
|
|
rgp_periodic_check( );
|
|
|
|
// Do the Clussvc to clusnet heartbeating stuff here iff enabled.
|
|
if(MmStartClussvcToClusnetHeartbeat && (NmClusnetHandle != NULL)) {
|
|
|
|
if (MmCheckSystemHealthTick <= 0) {
|
|
// Reseed the tick count.
|
|
// Mimic hardware watchdog timers and use one quarter of the timeout.
|
|
MmCheckSystemHealthTick = ((NmClusSvcHeartbeatTimeout * 1000)/RGP_CLOCK_PERIOD)/4;
|
|
|
|
// Send the heartbeat ioctl.
|
|
ClusnetIamalive(NmClusnetHandle);
|
|
}
|
|
else {
|
|
MmCheckSystemHealthTick--;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
/************************************************************************
|
|
* NT_timer_thread
|
|
* ===============
|
|
*
|
|
* Description:
|
|
*
|
|
* This routine is executed as a separate thread in the Windows NT
|
|
* implementation. This thread controls generates periodic regroup
|
|
* clock ticks. It is signalled via an event whenever the rate changes
|
|
* or to cause termination.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* None.
|
|
*
|
|
* Returns:
|
|
*
|
|
* This thread should not go away.
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* This routine is run as a separate thread. It sets up a timer to pop
|
|
* every <time_interval> * 10 milliseconds.
|
|
*
|
|
************************************************************************/
|
|
void NT_timer_thread( void )
|
|
{
|
|
BOOL Success;
|
|
LARGE_INTEGER DueTime;
|
|
DWORD Error, MyHandleIndex;
|
|
HANDLE MyHandles[2]; /* for use by WaitForMultiple */
|
|
DWORD status;
|
|
DWORD msDueTime;
|
|
|
|
#define MyHandleSignalIx 0
|
|
#define MyHandleTimerIx 1
|
|
|
|
MyHandles[MyHandleSignalIx] = rgp->OS_specific_control.TimerSignal; /* Event signals HB rate change */
|
|
|
|
rgp->OS_specific_control.RGPTimer = CreateWaitableTimer(
|
|
NULL, // no security
|
|
FALSE, // Initial State FALSE
|
|
NULL
|
|
); // No name
|
|
|
|
if (rgp->OS_specific_control.RGPTimer == NULL) {
|
|
Error = GetLastError();
|
|
RGP_ERROR(RGP_INTERNAL_ERROR);
|
|
}
|
|
|
|
status = MmSetThreadPriority();
|
|
|
|
if ( status != ERROR_SUCCESS ) {
|
|
ClRtlLogPrint(LOG_CRITICAL,
|
|
"[MM] Unable to set timer thread priority, status %1!u!\n",
|
|
status
|
|
);
|
|
|
|
RGP_ERROR((uint16) status);
|
|
ExitThread(status);
|
|
}
|
|
|
|
MyHandles[MyHandleTimerIx] = rgp->OS_specific_control.RGPTimer;
|
|
|
|
while (TRUE)
|
|
{
|
|
MyHandleIndex = WaitForMultipleObjects (
|
|
2, /* Number of Events */
|
|
MyHandles, /* Handle Array */
|
|
FALSE, /* Wait for ANY event */
|
|
INFINITE ); /* Wait forever */
|
|
|
|
if (MyHandleIndex == MyHandleSignalIx) // Timer Change Signal Event
|
|
{
|
|
// RGP rate has changed
|
|
CancelWaitableTimer ( rgp->OS_specific_control.RGPTimer );
|
|
if ( rgp->rgpinfo.a_tick == 0 ) // Time to quit
|
|
{
|
|
CloseHandle ( rgp->OS_specific_control.RGPTimer );
|
|
rgp->OS_specific_control.RGPTimer = 0;
|
|
ExitThread ( 0 );
|
|
}
|
|
|
|
// a_tick has new RGP rate in milliseconds.
|
|
msDueTime = rgp->rgpinfo.a_tick;
|
|
DueTime.QuadPart = Int32x32To64(-10000, msDueTime);
|
|
Success = SetWaitableTimer(
|
|
rgp->OS_specific_control.RGPTimer,
|
|
&DueTime,
|
|
rgp->rgpinfo.a_tick,
|
|
NULL,
|
|
NULL,
|
|
FALSE);
|
|
|
|
if (!Success) {
|
|
Error = GetLastError();
|
|
RGP_ERROR(RGP_INTERNAL_ERROR);
|
|
}
|
|
|
|
} // Timer Change Signal
|
|
else
|
|
{ // RGP Timer Tick
|
|
NT_timer_callback();
|
|
|
|
// Removed - bug 742297. NM now has its own timer thread.
|
|
// NmTimerTick(msDueTime);
|
|
}
|
|
} // while
|
|
}
|
|
|
|
|
|
PWCHAR
|
|
RgpGetNodeNameFromId(
|
|
node_t NodeID
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
given a node ID, issue a get name node control to get the computer name of
|
|
the node. Returned buffer to be freed by caller.
|
|
|
|
Arguments:
|
|
|
|
NodeID - ID ( 1, 2, 3, ..) of the node
|
|
|
|
Return Value:
|
|
|
|
pointer to buffer containing name
|
|
|
|
--*/
|
|
|
|
{
|
|
PWCHAR buffer;
|
|
DWORD bufferSize = MAX_COMPUTERNAME_LENGTH * sizeof( WCHAR );
|
|
DWORD bytesReturned;
|
|
DWORD bytesRequired;
|
|
PNM_NODE node;
|
|
|
|
buffer = LocalAlloc( LMEM_FIXED, bufferSize );
|
|
if ( buffer != NULL ) {
|
|
node = NmReferenceNodeById( NodeID );
|
|
if ( node != NULL ) {
|
|
NmNodeControl(node,
|
|
NULL, // HostNode OPTIONAL,
|
|
CLUSCTL_NODE_GET_NAME,
|
|
NULL, // InBuffer,
|
|
0, // InBufferSize,
|
|
(PUCHAR)buffer,
|
|
bufferSize,
|
|
&bytesReturned,
|
|
&bytesRequired);
|
|
|
|
OmDereferenceObject( node );
|
|
}
|
|
}
|
|
|
|
return buffer;
|
|
}
|
|
|
|
#endif /* NT */
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif /* __cplusplus */
|
|
|
|
|
|
#if 0
|
|
|
|
History of changes to this file:
|
|
-------------------------------------------------------------------------
|
|
1995, December 13 F40:KSK0610 /*F40:KSK06102.2*/
|
|
|
|
This file is part of the portable Regroup Module used in the NonStop
|
|
Kernel (NSK) and Loosely Coupled UNIX (LCU) operating systems. There
|
|
are 10 files in the module - jrgp.h, jrgpos.h, wrgp.h, wrgpos.h,
|
|
srgpif.c, srgpos.c, srgpsm.c, srgputl.c, srgpcli.c and srgpsvr.c.
|
|
The last two are simulation files to test the Regroup Module on a
|
|
UNIX workstation in user mode with processes simulating processor nodes
|
|
and UDP datagrams used to send unacknowledged datagrams.
|
|
|
|
This file was first submitted for release into NSK on 12/13/95.
|
|
------------------------------------------------------------------------------
|
|
This change occurred on 19 Jan 1996 /*F40:MB06458.1*/
|
|
Changes for phase IV Sierra message system release. Includes: /*F40:MB06458.2*/
|
|
- Some cleanup of the code /*F40:MB06458.3*/
|
|
- Increment KCCB counters to count the number of setup messages and /*F40:MB06458.4*/
|
|
unsequenced messages sent. /*F40:MB06458.5*/
|
|
- Fixed some bugs /*F40:MB06458.6*/
|
|
- Disable interrupts before allocating broadcast sibs. /*F40:MB06458.7*/
|
|
- Change per-packet-timeout to 5ms /*F40:MB06458.8*/
|
|
- Make the regroup and powerfail broadcast use highest priority /*F40:MB06458.9*/
|
|
tnet services queue. /*F40:MB06458.10*/
|
|
- Call the millicode backdoor to get the processor status from SP /*F40:MB06458.11*/
|
|
- Fixed expand bug in msg_listen_ and msg_readctrl_ /*F40:MB06458.12*/
|
|
- Added enhancement to msngr_sendmsg_ so that clients do not need /*F40:MB06458.13*/
|
|
to be unstoppable before calling this routine. /*F40:MB06458.14*/
|
|
- Added new steps in the build file called /*F40:MB06458.15*/
|
|
MSGSYS_C - compiles all the message system C files /*F40:MB06458.16*/
|
|
MSDRIVER - compiles all the MSDriver files /*F40:MB06458.17*/
|
|
REGROUP - compiles all the regroup files /*F40:MB06458.18*/
|
|
- remove #pragma env libspace because we set it as a command line /*F40:MB06458.19*/
|
|
parameter. /*F40:MB06458.20*/
|
|
----------------------------------------------------------------------- /*F40:MB06458.21*/
|
|
|
|
#endif /* 0 - change descriptions */
|