windows-server-2003/base/cluster/service/mm/srgpos.c


								#ifdef __TANDEM

								#pragma columns 79

								#pragma page "srgpos.c - T9050 - OS-dependent routines for Regroup Module"

								#endif


								/* @@@ START COPYRIGHT @@@

								**  Tandem Confidential:  Need to Know only

								**  Copyright (c) 1995, Tandem Computers Incorporated

								**  Protected as an unpublished work.

								**  All Rights Reserved.

								**

								**  The computer program listings, specifications, and documentation

								**  herein are the property of Tandem Computers Incorporated and shall

								**  not be reproduced, copied, disclosed, or used in whole or in part

								**  for any reason without the prior express written permission of

								**  Tandem Computers Incorporated.

								**

								** @@@ END COPYRIGHT @@@

								**/


								/*---------------------------------------------------------------------------

								 * This file (srgpos.c) contains OS-specific code used by Regroup.

								 *---------------------------------------------------------------------------*/


								#ifdef __cplusplus

								   extern "C" {

								#endif /* __cplusplus */


								#include <wrgp.h>


								#ifdef NSK

								#include <pmsgrgp.h>

								#endif /* NSK */


								#if defined(NT)


								DWORD

								MmSetThreadPriority(

								    VOID

								    );


								void

								NT_timer_thread(

								    void

								    );


								PWCHAR

								RgpGetNodeNameFromId(

								    node_t

								    );


								#endif // NT


								/* The global pointer to regroup's internal data structure. */


								#ifdef NSK

								/* The global regroup pointer is #defined to a pointer in the message

								 * system root structure.

								 */

								#endif


								#if defined(LCU) || defined(UNIX) || defined(NT)

								rgp_control_t *rgp = (rgp_control_t *) RGP_NULL_PTR;

								DWORD  QuorumOwner = MM_INVALID_NODE;

								  /* quorum owner can be set by the forming node before rgp is initialized */

								  /* Clussvc to Clusnet Heartbeating stuff. This bool would enable it. */

								BOOL MmStartClussvcToClusnetHeartbeat=FALSE;

								LONG MmCheckSystemHealthTick=0;

								#endif /* LCU || UNIX || NT */


								#ifdef LCU


								/************************************************************************

								 * rgp_lcu_serv_listen

								 * ===================

								 *

								 * Description:

								 *

								 *    This is an LCU-specific routine that gets called in IPC interrupt

								 *    context when a datagram addressed to the Regroup Module is received.

								 *

								 * Parameters:

								 *

								 *    void     *listen_callarg  - required param, unused by regroup

								 *    lcumsg_t *lcumsgp         - pointer to message

								 *    uint     moredata         - required param, unused by regroup

								 *

								 * Returns:

								 *

								 *    int - Always returns ELCU_OK

								 *

								 * Algorithm:

								 *

								 *    The routine simply picks apart the arguments and calls

								 *    rgp_received_packet().

								 *

								 *

								 ************************************************************************/

								_priv _resident int

								rgp_lcu_serv_listen(void *listen_callarg, lcumsg_t *lcumsgp, uint moredata)

								{

								   /* Ignore if the packet is not from the local system. */

								   if (lcumsgp->lcu_sysnum == rgp->OS_specific_control.my_sysnum)

								      rgp_received_packet(lcumsgp->lcu_node,

								                lcumsgp->lcu_reqmbuf.lcu_ctrlbuf,

								                lcumsgp->lcu_reqmbuf.lcu_ctrllen);

								   return(ELCU_OK);

								}


								/************************************************************************

								 * rgp_lcu_event_callback

								 * ======================

								 *

								 * Description:

								 *

								 *    This is an LCU-specific routine that gets called in IPC interrupt

								 *    context when the LCUEV_NODE_UNREACHABLE event is generated.

								 *

								 * Parameters:

								 *

								 *    ulong      event        -  event # (= LCUEV_NODE_UNREACHABLE)

								 *    sysnum_t   sysnum       -  system # (= local system #)

								 *    nodenum_t  node         -  # of node that is unreachable

								 *    int        event_info   -  required parameter, unused by regroup

								 *

								 * Returns:

								 *

								 *    void - no return value

								 *

								 * Algorithm:

								 *

								 *    The routine simply transforms the LCU event into the regroup event

								 *    RGP_EVT_NODE_UNREACHABLE and calls rgp_event_handler().

								 *

								 ************************************************************************/

								_priv _resident void

								rgp_lcu_event_callback(

								   ulong      event,

								   sysnum_t   sysnum,

								   nodenum_t  node,

								   int        event_info)

								{

								   /* Sanity checks:

								    * (1) The event must be LCUEV_NODE_UNREACHABLE, the only event

								    *     we asked for.

								    * (1) The event must be for the local system, the only system

								    *     we asked for.

								    */

								   if ((event != LCUEV_NODE_UNREACHABLE) ||

								       (sysnum != rgp->OS_specific_control.my_sysnum))

								      RGP_ERROR(RGP_INTERNAL_ERROR);


								   rgp_event_handler(RGP_EVT_NODE_UNREACHABLE, node);

								}


								#endif /* LCU */


								/************************************************************************

								 * rgp_init_OS

								 * ===========

								 *

								 * Description:

								 *

								 *    This routine does OS-dependent regroup initialization such as

								 *    initializing the regroup data structure lock, requesting a

								 *    periodic timer to be installed and registering the callback

								 *    routine for receiving regroup's unacknowledged packets.

								 *

								 * Parameters:

								 *

								 *    None

								 *

								 * Returns:

								 *

								 *    void - no return value

								 *

								 * Algorithm:

								 *

								 *    OS-dependent initializations.

								 *

								 ************************************************************************/

								_priv _resident void

								rgp_init_OS(void)

								{


								#ifdef UNIX

								   struct sigaction sig_action; /* to install signals */

								#endif

								#ifdef LCU

								   sysnum_t sysnum;

								   lcumsg_t *lcumsgp;

								#endif

								#ifdef NT

								   HANDLE       tempHandle;

								   DWORD        threadID = 0;

								#endif


								#if defined(NSK) || defined(UNIX) || defined(NT)

								   /*

								    * In NSK, the regroup caller ensures that timer and IPC interrupts

								    * are disabled before the regroup routines are called. Therefore,

								    * there is no regroup lock initialization. Also, rather than using

								    * registration of callback routines, the appropriate routine names

								    * are hard coded into routines that must call them. Thus, the timer

								    * routine is called from POLLINGCHECK, the periodic message system

								    * routine, and the packet reception routine is called from the

								    * IPC interrupt handler.

								    */


								   /* Initialize the unchanging fields in the rgp_msgsys struct. */


								   rgp->rgp_msgsys_p->regroup_data = (void *) &(rgp->rgppkt_to_send);

								   rgp->rgp_msgsys_p->regroup_datalen = RGPPKTLEN;

								   rgp->rgp_msgsys_p->iamalive_data = (void *) &(rgp->iamalive_pkt);

								   rgp->rgp_msgsys_p->iamalive_datalen = IAMALIVEPKTLEN;

								   rgp->rgp_msgsys_p->poison_data = (void *) &(rgp->poison_pkt);

								   rgp->rgp_msgsys_p->poison_datalen = POISONPKTLEN;


								#endif /* NSK || UNIX || NT */


								#ifdef LCU


								   if (itimeout(rgp_periodic_check,

								                NULL, /* parameter pointer */

								                ((RGP_CLOCK_PERIOD * HZ) / 100) | TO_PERIODIC,

								                plstr /* interrupt priority level */

								               ) == 0)

								      RGP_ERROR(RGP_INTERNAL_ERROR);

								   if (lcuxprt_listen(LCU_RGP_PORT,

								                      rgp_lcu_serv_listen,

								                      NULL /* no call arg */,

								                      NULL /* no options */

								                     ) != ELCU_OK)

								      RGP_ERROR(RGP_INTERNAL_ERROR);


								   if (lcuxprt_config(LCU_GET_MYSYSNUM, &sysnum) != ELCU_OK)

								      RGP_ERROR(RGP_INTERNAL_ERROR);

								   rgp->OS_specific_control.my_sysnum = sysnum;


								   /* Allocate 3 message buffers to send regroup packets, iamalive packets

								    * and poison packets.

								    */

								   if ((lcumsgp = lcuxprt_msg_alloc(LCU_UNACKMSG, LCU_RGP_FLAGS)) == NULL)

								      RGP_ERROR(RGP_INTERNAL_ERROR); /* no memory */

								   rgp->OS_specific_control.lcumsg_regroup_p = lcumsgp;

								   lcumsgp->lcu_tag = NULL;

								   lcumsgp->lcu_sysnum = sysnum;

								   lcumsgp->lcu_port = LCU_RGP_PORT;

								   lcumsgp->lcu_flags = LCUMSG_CRITICAL;

								   lcumsgp->lcu_reqmbuf.lcu_ctrllen = RGPPKTLEN;

								   lcumsgp->lcu_reqmbuf.lcu_ctrlbuf = (char *)&(rgp->rgppkt_to_send);


								   if ((lcumsgp = lcuxprt_msg_alloc(LCU_UNACKMSG, LCU_RGP_FLAGS)) == NULL)

								      RGP_ERROR(RGP_INTERNAL_ERROR); /* no memory */

								   rgp->OS_specific_control.lcumsg_iamalive_p = lcumsgp;

								   lcumsgp->lcu_tag = NULL;

								   lcumsgp->lcu_sysnum = sysnum;

								   lcumsgp->lcu_port = LCU_RGP_PORT;

								   lcumsgp->lcu_reqmbuf.lcu_ctrllen = IAMALIVEPKTLEN;

								   lcumsgp->lcu_reqmbuf.lcu_ctrlbuf = (char *)&(rgp->iamalive_pkt);


								   if ((lcumsgp = lcuxprt_msg_alloc(LCU_UNACKMSG, LCU_RGP_FLAGS)) == NULL)

								      RGP_ERROR(RGP_INTERNAL_ERROR); /* no memory */

								   rgp->OS_specific_control.lcumsg_poison_p = lcumsgp;

								   lcumsgp->lcu_tag = NULL;

								   lcumsgp->lcu_sysnum = sysnum;

								   lcumsgp->lcu_port = LCU_RGP_PORT;

								   lcumsgp->lcu_reqmbuf.lcu_ctrllen = POISONPKTLEN;

								   lcumsgp->lcu_reqmbuf.lcu_ctrlbuf = (char *)&(rgp->poison_pkt);


								   /* Register to get the LCUEV_NODE_UNREACHABLE event. */

								   if (lcuxprt_events(LCU_CATCH_EVENTS, sysnum, LCUEV_NODE_UNREACHABLE,

								                      rgp_lcu_event_callback) != ELCU_OK)

								      RGP_ERROR(RGP_INTERNAL_ERROR);


								#endif /* LCU */


								#ifdef UNIX

								   /* For testing on UNIX at user level, we use alarm() to simulate timer

								    * ticks. */

								   /* Install the alarm handler. */

								   sig_action.sa_flags = 0;

								   sig_action.sa_handler = alarm_handler;

								   sigemptyset(&(sig_action.sa_mask));

								   /* Block messages when handling timer pops. */

								   sigaddset(&(sig_action.sa_mask), SIGPOLL);

								   sigaction(SIGALRM, &sig_action, NULL);


								   alarm_callback = rgp_periodic_check;


								   /* Round up the alarm period to the next higher second. */

								   alarm_period = (RGP_CLOCK_PERIOD + 99) / 100;


								   /* Get first timer tick as soon as possible; subsequent ones will be

								    * at alarm_period.

								    */

								   alarm(1);

								#endif /* UNIX */


								#ifdef NT

								   /* On NT we create a separate thread that will be our timer. */

								   /* The Timer Thread waits on TimerSignal Event to indicate an RGP rate change. */

								   /* An RGP rate of 0 is a signal for the Timer Thread to exit */


								   tempHandle = CreateEvent ( NULL,         /* no security */

								                              FALSE,        /* Autoreset */

								                              TRUE,         /* Initial State is Signalled */

								                              NULL);        /* No name */

								   if ( !tempHandle )

								   {

								           RGP_ERROR (RGP_INTERNAL_ERROR);

								   }

								   rgp->OS_specific_control.TimerSignal = tempHandle;


								   tempHandle = CreateEvent ( NULL,         /* no security */

								                              TRUE,         /* Manual reset */

								                              TRUE,         /* Initial State is Signalled */

								                              NULL);        /* No name */

								   if ( !tempHandle )

								   {

								           RGP_ERROR (RGP_INTERNAL_ERROR);

								   }

								   rgp->OS_specific_control.Stabilized = tempHandle;

								   rgp->OS_specific_control.ArbitrationInProgress = FALSE;

								   rgp->OS_specific_control.ArbitratingNode = MM_INVALID_NODE;

								   rgp->OS_specific_control.ApproxArbitrationWinner = MM_INVALID_NODE;

								   rgp->OS_specific_control.ShuttingDown = FALSE;


								   tempHandle = CreateThread( 0,                /* security */

								                              0,                /* stack size - use same as primary thread */

								                              (LPTHREAD_START_ROUTINE)NT_timer_thread,      /* starting point */

								                              (VOID *) NULL,    /* no parameter */

								                              0,                /* create flags - start immediately */

								                              &threadID );      /* thread ID returned here */

								   if ( !tempHandle )

								   {

								                RGP_ERROR( RGP_INTERNAL_ERROR );        /* at least for now */

								   }

								   rgp->OS_specific_control.TimerThread = tempHandle;

								   rgp->OS_specific_control.TimerThreadId = threadID;


								   rgp->OS_specific_control.UpDownCallback = RGP_NULL_PTR;

								   rgp->OS_specific_control.NodesDownCallback = RGP_NULL_PTR;

								   rgp->OS_specific_control.EventEpoch = 0;


								#if defined TDM_DEBUG

								   rgp->OS_specific_control.debug.frozen = 0;

								   rgp->OS_specific_control.debug.reload_in_progress = 0;

								   rgp->OS_specific_control.debug.timer_frozen = 0;

								   rgp->OS_specific_control.debug.doing_tracing = 0;

								   rgp->OS_specific_control.debug.MyTestPoints.TestPointWord = 0;


								   // seed the random number function used in testing

								   srand((unsigned) time( NULL ) );

								#endif


								#endif /* NT */


								}


								/************************************************************************

								 * rgp_cleanup_OS

								 * ===========

								 *

								 * Description:

								 *

								 *    This routine does OS-dependent cleanup of regroup structures

								 *    and timer thread activity to ready for a new JOIN attempt.

								 *

								 * Parameters:

								 *

								 *    None

								 *

								 * Returns:

								 *

								 *    void - no return value

								 *

								 * Algorithm:

								 *

								 *    OS-dependent initializations.

								 *

								 ************************************************************************/

								_priv _resident void

								rgp_cleanup_OS(void)

								{

								#if defined (NT)

								        // Tell Timer Thread to restart RGP Timer

								        // a_tick might have changed.

								        SetEvent( rgp->OS_specific_control.TimerSignal);

								#endif // NT

								}


								/************************************************************************

								 * rgp_update_regroup_packet

								 * =========================

								 *

								 * Description:

								 *

								 *    Macro to copy the current regroup status into the regroup packet

								 *    sending buffer.

								 *

								 * Parameters:

								 *

								 *    None

								 *

								 * Algorithm:

								 *

								 *    Copies the status (which is already in the form of a regroup status

								 *    packet) into the packet buffer. Then, if we should let others (and

								 *    ourselves) know of our stage, the current knownstage field is

								 *    updated to include the local node number.

								 *

								 ************************************************************************/

								#define rgp_update_regroup_packet                                        \

								do                                                                       \

								{                                                                        \

								   /* Copy the regroup status to the sending packet area. */             \

								   rgp->rgppkt_to_send = rgp->rgppkt;                                    \

								                                                                         \

								   /* If we should let others know of our stage, we must modify the      \

								    * current stage mask to include ourselves.                           \

								    */                                                                   \

								   if (rgp->sendstage)                                                   \

								      switch (rgp->rgppkt.stage)                                         \

								      {                                                                  \

								         case RGP_ACTIVATED:                                             \

								            ClusterInsert(rgp->rgppkt_to_send.knownstage1, rgp->mynode); \

								            break;                                                       \

								         case RGP_CLOSING:                                               \

								            ClusterInsert(rgp->rgppkt_to_send.knownstage2, rgp->mynode); \

								            break;                                                       \

								         case RGP_PRUNING:                                               \

								            ClusterInsert(rgp->rgppkt_to_send.knownstage3, rgp->mynode); \

								            break;                                                       \

								         case RGP_PHASE1_CLEANUP:                                        \

								            ClusterInsert(rgp->rgppkt_to_send.knownstage4, rgp->mynode); \

								            break;                                                       \

								         case RGP_PHASE2_CLEANUP:                                        \

								            ClusterInsert(rgp->rgppkt_to_send.knownstage5, rgp->mynode); \

								            break;                                                       \

								         default:                                                        \

								            break;                                                       \

								      }                                                                  \

								} while(0)


								/************************************************************************

								 * rgp_update_poison_packet

								 * ========================

								 *

								 * Description:

								 *

								 *    Macro to copy the current regroup status into the poison packet

								 *    sending buffer.

								 *

								 * Parameters:

								 *

								 *    None

								 *

								 * Algorithm:

								 *

								 *    Copies the appropriate regroup status fields into the poison

								 *    packet buffer to help debugging when a dump of a poisoned

								 *    node is examined.

								 *

								 ************************************************************************/

								#define rgp_update_poison_packet                                         \

								do                                                                       \

								{                                                                        \

								   rgp->poison_pkt.seqno = rgp->rgppkt.seqno;                            \

								   rgp->poison_pkt.reason = rgp->rgppkt.reason;                          \

								   rgp->poison_pkt.activatingnode = rgp->rgppkt.activatingnode;          \

								   rgp->poison_pkt.causingnode = rgp->rgppkt.causingnode;                \

								   ClusterCopy(rgp->poison_pkt.initnodes, rgp->initnodes);               \

								   ClusterCopy(rgp->poison_pkt.endnodes, rgp->endnodes);                 \

								} while(0)


								/************************************************************************

								 * rgp_broadcast

								 * =============

								 *

								 * Description:

								 *

								 *    This routine asks the message system to broadcast an unacknowledged

								 *    packet of subtype "packet_subtype" to a set of nodes indicated in

								 *    an appropriate field in the rgp control struct. How the broadcast

								 *    is implemented depends on the OS.

								 *

								 * Parameters:

								 *

								 *    uint8 packet_subtype - type of unsequenced packet to send

								 *

								 * Returns:

								 *

								 *    void - no return value

								 *

								 * Algorithm:

								 *

								 *    The same data packet is to be sent to the set of nodes indicated

								 *    in the rgp control struct field. The sending can be done by queueing

								 *    the packets directly to the send engine or the send can be deferred

								 *    to a lower priority interrupt level. The former approach reduces

								 *    the latency for sending these urgent packets while the latter

								 *    approach may reduce the number of sends if several requests to

								 *    send the same type of packets (this is true only of regroup

								 *    packets) are made in quick succession. In this case, previous

								 *    requests are overwritten by later requests. This is OK since the

								 *    regroup algorithm has enough redundancy in packet sending.

								 *

								 *    In NSK, the message system provides a broadcast facility for

								 *    unacknowledged packets. It copies regroup's packet into its own

								 *    buffer and issues multiple requests to the SNet services layer.

								 *    When it copies the buffer, it disables the timer and IPC

								 *    interrupts ensuring that there will be no contention with Regroup.

								 *    Therefore, this routine can safely update the packet area here

								 *    without checking if the sending apparatus has completed sending

								 *    the previous packet.

								 *

								 *    This is not true of LCU where the message system does not

								 *    provide a broadcast facility. In LCU, the updating of the packet

								 *    buffer can be done only when the send engine has completed

								 *    sending. This is assured only in the send completion interrupt

								 *    handler (rgp_msgsys_work).

								 *

								 ************************************************************************/

								_priv _resident void

								rgp_broadcast(uint8 packet_subtype)

								{

								   cluster_t temp_cluster;


								   //[Raj Das] Copy the ignorescreen before sending.....

								   PackIgnoreScreen(&rgp->rgppkt, rgp->ignorescreen);


								   switch (packet_subtype)

								   {

								      case RGP_UNACK_REGROUP :


								         /* Trace the queueing of regroup status packets. */

								         RGP_TRACE( "RGP Send packets",

								                    rgp->rgppkt.stage,                             /* TRACE */

								                    RGP_MERGE_TO_32( rgp->status_targets,          /* TRACE */

								                                     rgp->rgppkt.knownstage1 ),    /* TRACE */

								                    RGP_MERGE_TO_32( rgp->rgppkt.knownstage2,      /* TRACE */

								                                     rgp->rgppkt.knownstage3 ),    /* TRACE */

								                    RGP_MERGE_TO_32( rgp->rgppkt.knownstage4,      /* TRACE */

								                                     rgp->rgppkt.knownstage5 ) );  /* TRACE */


								#if defined(NSK) || defined(UNIX) || defined(NT)

								         /* In NSK, the packet buffer can be updated even if the send

								          * engine is working on the previous send. See algorithm

								          * description above.

								          */


								         if ((rgp->rgppkt.reason == MM_EVT_LEAVE) &&

								                         (rgp->rgppkt.causingnode == rgp->mynode))

								                         // If a LEAVE event is in progress exclude our node from knownstage mask

								                         rgp->rgppkt_to_send = rgp->rgppkt;

								                 else

								                         // copy regroup packet and insert our node number into knownstage mask

								                         rgp_update_regroup_packet;

								#endif /* NSK || UNIX || NT */


								         ClusterUnion(rgp->rgp_msgsys_p->regroup_nodes,

								                      rgp->status_targets,

								                      rgp->rgp_msgsys_p->regroup_nodes);


								         /* Clear the targets field in the rgp_control struct after

								          * copying this info. The message system must clear the target

								          * bits in the common regroup/msgsys struct after sending the

								          * packets.

								          */

								         ClusterInit(rgp->status_targets);


								         rgp->rgp_msgsys_p->sendrgppkts = 1;


								         break;


								      case RGP_UNACK_IAMALIVE :


								         /* Count number of IamAlive requests queued. */

								         RGP_INCREMENT_COUNTER( QueuedIAmAlive );


								         ClusterUnion(rgp->rgp_msgsys_p->iamalive_nodes,

								                      rgp->rgpinfo.cluster,

								                      rgp->rgp_msgsys_p->iamalive_nodes);

								         rgp->rgp_msgsys_p->sendiamalives = 1;


								         /* No targets field to clear in the rgp_control struct.

								          * The message system must clear the target bits in the common

								          * regroup/msgsys struct after sending the packets.

								          */

								         break;


								      case RGP_UNACK_POISON :


								         /* Trace the sending of poison packets. */

								         RGP_TRACE( "RGP Send poison ",

								                    rgp->rgppkt.stage,                             /* TRACE */

								                    RGP_MERGE_TO_32( rgp->poison_targets,          /* TRACE */

								                                     rgp->rgppkt.knownstage1 ),    /* TRACE */

								                    RGP_MERGE_TO_32( rgp->rgppkt.knownstage2,      /* TRACE */

								                                     rgp->rgppkt.knownstage3 ),    /* TRACE */

								                    RGP_MERGE_TO_32( rgp->rgppkt.knownstage4,      /* TRACE */

								                                     rgp->rgppkt.knownstage5 ) );  /* TRACE */


								         /* The poison packet targets must NOT be considered alive. */


								         ClusterIntersection(temp_cluster, rgp->rgpinfo.cluster,

								                             rgp->poison_targets);


								         ClusterDifference(temp_cluster,

								                           temp_cluster,

								                           rgp->OS_specific_control.Banished);


								         if (ClusterNumMembers(temp_cluster) != 0)

								               RGP_ERROR(RGP_INTERNAL_ERROR);


								#if defined(NSK) || defined(NT)

								         /* In NSK, the packet buffer can be updated even if the send

								          * engine is working on the previous send. See algorithm

								          * description above.

								          */

								         rgp_update_poison_packet;

								#endif /* NSK || NT */


								         ClusterUnion(rgp->rgp_msgsys_p->poison_nodes,

								                      rgp->poison_targets,

								                      rgp->rgp_msgsys_p->poison_nodes);


								         /* Clear the targets field in the rgp_control struct after

								          * copying this info. The message system must clear the target

								          * bits in the common regroup/msgsys struct after sending the

								          * packets.

								          */

								         ClusterInit(rgp->poison_targets);


								         rgp->rgp_msgsys_p->sendpoisons = 1;


								         break;


								      default :


								         RGP_ERROR(RGP_INTERNAL_ERROR);

								         break;

								   }


								   QUEUESEND; /* invoke OS-specific sending function/macro */

								}


								/************************************************************************

								 * rgp_had_power_failure

								 * =====================

								 *

								 * Description:

								 *

								 *    Tells the OS at the end of a regroup incident if a surviving node

								 *    had a power failure. The message system can use this to clear all

								 *    bus errors collected so far to node because node seems to have

								 *    had a power failure and has now recovered from it.  Perhaps, the

								 *    bus errors were due to the power failure.

								 *

								 * Parameters:

								 *

								 *    None

								 *

								 * Returns:

								 *

								 *    void - no return value

								 *

								 * Algorithm:

								 *

								 *    Calls a message system routine to perform any error clearing.

								 *

								 ************************************************************************/

								_priv _resident void

								rgp_had_power_failure(node_t node)

								{

								   /* Currently, there is nothing to do. */

								   RGP_TRACE( "RGP Power fail  ", node, 0, 0, 0);

								}


								/************************************************************************

								 * rgp_status_of_node

								 * ==================

								 *

								 * Description:

								 *

								 *    Ask the SP to return the status of a node. The SP must return the

								 *    current status and not return a stale status. This routine is

								 *    called by the split-brain avoidance algorithm in the two-node

								 *    case, for the non-tie-breaker to get the status of the tie-breaker

								 *    node.

								 *

								 * Parameters:

								 *

								 *    node_t node

								 *       the node whose status is to be obtained.

								 *

								 * Returns:

								 *

								 *    int - the status code of the node returned by the SP, appropriately

								 *    encoded into one of the values known to regroup.

								 *

								 * Algorithm:

								 *

								 *    Calls a millicode routine to ask the SP for the status of the node.

								 *

								 ************************************************************************/

								_priv _resident int

								rgp_status_of_node(node_t node)

								{

								#if defined(NT)

								        /* noone home */

								        return RGP_NODE_UNREACHABLE;

								#else

								        return _get_remote_cpu_state_( node );                                        /*F40:MB06452.1*/

								#endif

								}


								/************************************************************************

								 * rgp_newnode_online

								 * ==================

								 *

								 * Description:

								 *

								 *    This routine is called if the first IamAlive is received from a

								 *    newly booted node before the cluster manager gets a chance to

								 *    call rgp_monitor_node(). The OS can use this routine to mark the

								 *    node as up if it does not have any other means to detect that

								 *    a node has come up.

								 *

								 * Parameters:

								 *

								 *    node_t node -

								 *       the new node that has just been detected to be up

								 *

								 * Returns:

								 *

								 *    void - no return value

								 *

								 * Algorithm:

								 *

								 *    This routine marks the state of the node as up as seen by the

								 *    native OS.

								 *

								 *    In NSK, on the reloader node, the marking of the reloadee as up

								 *    is done by the message system when the initial address handshake

								 *    packet is received from the reloadee. NSK does not require the

								 *    regroup module to report the fact that the reloadee is online.

								 *

								 *    The above is probably true for LCU as well. However, the details

								 *    are not yet worked out. For now, this routine is a no-op for LCU.

								 *

								 ************************************************************************/

								_priv _resident void

								rgp_newnode_online(node_t newnode)

								{

								   RGP_TRACE( "RGP New node up ", newnode, 0, 0, 0);

								}


								/************************************************************************

								 * rgp_select_cluster_ex

								 * =====================

								 *

								 * Description:

								 *

								 *    Given an array of cluster choices, this routine picks the best

								 *    cluster to keep alive. cluster_choices[] is the array of choices

								 *    and num_clusters is the number of entries in the array.

								 *

								 * Parameters:

								 *

								 *    cluster_t cluster_choices[]

								 *       array of cluster choices

								 *

								 *    int num_clusters

								 *       number of entries (choices) in the array

								 *

								 *    node_t key_node

								 *       internal node number of the key node or RGP_NULL_NODE

								 *

								 * Returns:

								 *

								 *    int - the index of the selected cluster; if no cluster

								 *    is viable, -1 is returned.

								 *

								 * Algorithm:

								 *

								 *    By default, the best cluster is defined as the largest cluster.

								 *    Optionally, a node called key_node can be required to be present

								 *    for a cluster to be viable. key_node can be set to RGP_NULL_NODE

								 *    to imply that no specific node is required to be present.  The

								 *    routine returns the index of the best cluster and -1 if none of

								 *    the clusters is viable (that is, does not include the key node).

								 *

								 ************************************************************************/

								_priv _resident int

								rgp_select_cluster_ex(cluster_t cluster_choices[], int num_clusters, node_t key_node)

								{


								   int max_members = 0, num_members;

								   int cluster_selected = -1;

								   int i;


								#if defined(UNIX)

								   printf("rgp_select_cluster() called with %d choices:", num_clusters);

								   for (i = 0; i < num_clusters; i++)

								   {

								      node_t j;

								      printf("(");

								      for (j = 0; j < (node_t) rgp->num_nodes; j++)

								      {

								         if (ClusterMember(cluster_choices[i], j))

								            printf("%d,", EXT_NODE(j));

								      }

								      printf(")");

								   }

								   printf("\n");

								   fflush(stdout);

								#endif /* UNIX */


								   for (i = 0; i < num_clusters; i++)

								   {

								      /* Skip the current cluster if a key node is defined and is not

								       * in the cluster.

								       */

								      if ((key_node != RGP_NULL_NODE) &&

								          !ClusterMember(cluster_choices[i], key_node))

								         continue;


								      if ((num_members = ClusterNumMembers(cluster_choices[i])) > max_members)

								      {

								         cluster_selected = i;

								         max_members = num_members;

								      }

								   }


								#if defined(UNIX)

								   printf("Node %d: rgp_select_cluster() returned %d.\n",

								          EXT_NODE(rgp->mynode), cluster_selected);

								   fflush(stdout);

								#endif /* UNIX */


								   return (cluster_selected);

								}


								/************************************************************************

								 * rgp_select_cluster

								 * ==================

								 *

								 * Description:

								 *

								 *    Given an array of cluster choices, this routine picks the best

								 *    cluster to keep alive. cluster_choices[] is the array of choices

								 *    and num_clusters is the number of entries in the array.

								 *

								 * Parameters:

								 *

								 *    cluster_t cluster_choices[]

								 *       array of cluster choices

								 *

								 *    int num_clusters

								 *       number of entries (choices) in the array

								 *

								 * Returns:

								 *

								 *    int - the index of the selected cluster; if no cluster

								 *    is viable, -1 is returned.

								 *

								 * Algorithm:

								 *

								 *    By default, the best cluster is defined as the largest cluster.

								 *    Optionally, a node called RGP_KEY_NODE can be required to be present

								 *    for a cluster to be viable. RGP_KEY_NODE can be set to RGP_NULL_NODE

								 *    to imply that no specific node is required to be present.  The

								 *    routine returns the index of the best cluster and -1 if none of

								 *    the clusters is viable (that is, does not include the key node).

								 *

								 ************************************************************************/

								_priv _resident int

								rgp_select_cluster(cluster_t cluster_choices[], int num_clusters)

								{

								    node_t key_node;

								    if (RGP_KEY_NODE == RGP_NULL_NODE) {

								        key_node = RGP_NULL_NODE;

								    } else {

								        key_node = INT_NODE(RGP_KEY_NODE);

								    }

								    return rgp_select_cluster_ex(cluster_choices , num_clusters, key_node);

								}


								#ifdef LCU

								/************************************************************************

								 * rgp_msgsys_work

								 * ===============

								 *

								 * Description:

								 *

								 *    LCU-specific routine that implements broadcasting of packets by

								 *    sending them serially.

								 *

								 *    This routine is called from rgp_broadcast() to initiate new sends.

								 *    It is also the packet send completion interrupt handler (callback

								 *    routine), invoked by the LCU message system when the packet buffer

								 *    can be reused.

								 *

								 * Parameters:

								 *

								 *    lcumsg_t *lcumsgp -

								 *       pointer to lcu message if called from the transport's send

								 *       completion interrupt handler; NULL if called from

								 *       rgp_broadcast() to send a new packet.

								 *

								 *    int status -

								 *       the message completion status if called from the transport's

								 *       send completion interrupt handler; 0 if called from

								 *       rgp_broadcast() to send a new packet.

								 *

								 * Returns:

								 *

								 *    void - no return value

								 *

								 * Algorithm:

								 *

								 *    If called from the send completion interrupt, the routine checks

								 *    to see if the packet buffer needs to be refreshed. This is true

								 *    if the appropriate bit in the rgp_msgsys struct is set. If so,

								 *    the buffer is updated with the current info (using an update

								 *    macro). This update is relevant to regroup status packets and

								 *    poison packets, but not to IamAlives packets whose contents are

								 *    always the same. The bit is cleared after the packet is updated.

								 *

								 *    Next, the routine checks if there are more destinations to send

								 *    the packet to. If so, it finds the next higher numbered node to

								 *    send to, issues a send and returns.

								 *

								 *    If invoked from rgp_broadcast() to start a new broadcast, the

								 *    routine first checks to see if the previous broadcast of the

								 *    same packet is complete. This is indicated by the tag field in

								 *    the message struct. The tag is NULL if the broadcast has

								 *    completed or has not been initiated. In this case, the tag is

								 *    set to a non-NULL value and a new broadcast initiated, with

								 *    this routine specified as the callback routine.

								 *

								 *    If the previous broadcast has not completed, nothing needs to

								 *    be done. The completion interrupt will cause the buffer to be

								 *    refreshed and the broadcast to be continued. The broadcast

								 *    will then include new targets that may be included in this

								 *    new request.

								 *

								 ************************************************************************/

								_priv _resident void

								rgp_msgsys_work(lcumsg_t *lcumsgp, int status)

								{

								   rgp_unseq_pkt_t   *packet;

								   cluster_t         *sending_cluster;

								   node_t            node;


								   if (lcumsgp == NULL)

								   {

								      /* New work requested. Only one type of work is requested at

								       * a time.

								       */


								      if (rgp->rgp_msgsys_p->sendrgppkts)

								      {


								         /* Have new regroup status packets to send. First check

								          * if the last regroup status send completed. If so,

								          * we can update the packet and initiate a new send.

								          * If not, we must defer to the completion interrupt

								          * (invocation of this routine with a non-NULL lcumsgp).

								          */


								         lcumsgp = rgp->OS_specific_control.lcumsg_regroup_p;

								         if (lcumsgp->lcu_tag == NULL)

								         {

								            /* Last send completed. Initiate new send. */


								            rgp_update_regroup_packet;

								            rgp->rgp_msgsys_p->sendrgppkts = 0;


								            for (node = 0; node < rgp->num_nodes; node++)

								            {

								               if (ClusterMember(rgp->rgp_msgsys_p->regroup_nodes, node))

								               {

								                  ClusterDelete(rgp->rgp_msgsys_p->regroup_nodes, node);

								                  lcumsgp->lcu_node = node;

								                  lcumsgp->lcu_tag = &(rgp->rgp_msgsys_p->regroup_nodes);

								                  if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=

								                     ELCU_OK)

								                     RGP_ERROR(RGP_INTERNAL_ERROR);

								                  break; /* can send only to one node at a time */

								               }

								            }

								         }

								      }


								      else if (rgp->rgp_msgsys_p->sendiamalives)

								      {

								         /* Need to send IamAlives again. First check if the last

								          * IamAlive send completed. If so, we can initiate a new send.

								          * If not, we must defer to the completion interrupt

								          * (invocation of this routine with a non-NULL lcumsgp).

								          */


								         lcumsgp = rgp->OS_specific_control.lcumsg_iamalive_p;

								         if (lcumsgp->lcu_tag == NULL)

								         {

								            /* Last send completed. Initiate new send. */


								            rgp->rgp_msgsys_p->sendiamalives = 0;


								            for (node = 0; node < rgp->num_nodes; node++)

								            {

								               if (ClusterMember(rgp->rgp_msgsys_p->iamalive_nodes, node))

								               {

								                  ClusterDelete(rgp->rgp_msgsys_p->iamalive_nodes, node);

								                  lcumsgp->lcu_node = node;

								                  lcumsgp->lcu_tag = &(rgp->rgp_msgsys_p->iamalive_nodes);

								                  if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=

								                     ELCU_OK)

								                     RGP_ERROR(RGP_INTERNAL_ERROR);

								                  break; /* can send only to one node at a time */

								               }

								            }

								         }

								      }


								      else if (rgp->rgp_msgsys_p->sendpoisons)

								      {

								         /* Have new poison packets to send. First check

								          * if the last poison packet send completed. If so,

								          * we can update the packet and initiate a new send.

								          * If not, we must defer to the completion interrupt

								          * (invocation of this routine with a non-NULL lcumsgp).

								          */


								         lcumsgp = rgp->OS_specific_control.lcumsg_poison_p;

								         if (lcumsgp->lcu_tag == NULL)

								         {

								            /* Last send completed. Initiate new send. */


								            rgp_update_poison_packet;

								            rgp->rgp_msgsys_p->sendpoisons = 0;


								            for (node = 0; node < rgp->num_nodes; node++)

								            {

								               if (ClusterMember(rgp->rgp_msgsys_p->poison_nodes, node))

								               {

								                  ClusterDelete(rgp->rgp_msgsys_p->poison_nodes, node);

								                  lcumsgp->lcu_node = node;

								                  lcumsgp->lcu_tag = &(rgp->rgp_msgsys_p->poison_nodes);

								                  if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=

								                     ELCU_OK)

								                     RGP_ERROR(RGP_INTERNAL_ERROR);

								                  break; /* can send only to one node at a time */

								               }

								            }

								         }

								      }


								   } /* new work */


								   else

								   {

								      /* Send completion interrupt; continue the broadcast if

								       * there are targets remaining.

								       */


								      RGP_LOCK;


								      /* Find what type of packet completed; send the same type. */


								      packet = (rgp_unseq_pkt_t *) lcumsgp->lcu_reqmbuf.lcu_ctrlbuf;


								      switch (packet->pktsubtype)

								      {

								         case RGP_UNACK_REGROUP :


								            /* Check if packet needs to be updated. */

								            if (rgp->rgp_msgsys_p->sendrgppkts)

								            {

								               rgp_update_regroup_packet;

								               rgp->rgp_msgsys_p->sendrgppkts = 0;

								            }

								            break;


								         case RGP_UNACK_IAMALIVE :

								            break;


								         case RGP_UNACK_POISON :


								            /* Check if packet needs to be updated. */

								            if (rgp->rgp_msgsys_p->sendpoisons)

								            {

								               rgp_update_poison_packet;

								               rgp->rgp_msgsys_p->sendpoisons = 0;

								            }

								            break;

								      }


								      /* Check if there is any more node to send the same packet

								       * type to. If not, set the tag to NULL and return.

								       */

								      sending_cluster = (cluster_t *) (lcumsgp->lcu_tag);

								      if (ClusterNumMembers(*sending_cluster) == 0)

								      {

								         lcumsgp->lcu_tag = NULL; /* indicate that broadcast is complete. */

								         return;

								      }


								      /* There is at least one more node to send to. Start with

								       * the node with the next higher number than the node we

								       * just finished sending to.

								       *

								       * The loop terminates after posting a send to the next

								       * node to send to. We know there is at least one such node.

								       */

								      for (node = lcumsgp->lcu_node + 1; node < rgp->num_nodes + 1; node++)

								      {

								         if (node == rgp->num_nodes)

								            node = 0;  /* continue the search starting at node 0 */

								         if (ClusterMember(*sending_cluster, node))

								         {

								            ClusterDelete(*sending_cluster, node);

								            lcumsgp->lcu_node = node;

								            if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=

								               ELCU_OK)

								               RGP_ERROR(RGP_INTERNAL_ERROR);

								            break; /* can send only to one node at a time */

								         }

								      }


								      RGP_UNLOCK;

								   }

								}

								#endif /* LCU */


								/*---------------------------------------------------------------------------*/


								#if defined(LCU) || defined(UNIX) || defined(NT)


								/*---------------------------------------------------------------------------*/

								void

								rgp_hold_all_io(void)

								/* Simulates the TNet services routine to pause IO. */

								{

								#if defined (NT)

								   (*(rgp->OS_specific_control.HoldIOCallback))();

								#endif

								   RGP_TRACE( "RGP Hold all IO ", 0, 0, 0, 0);

								}

								/*---------------------------------------------------------------------------*/

								void

								rgp_resume_all_io(void)

								/* Simulates the TNet services routine to resume IO. */

								{

								#if defined (NT)

								   (*(rgp->OS_specific_control.ResumeIOCallback))();

								#endif

								   RGP_TRACE( "RGP Resume IO   ", 0, 0, 0, 0);

								}

								/*---------------------------------------------------------------------------*/

								void

								RGP_ERROR_EX (uint16 halt_code, char* fname, DWORD lineno)

								/* Halt node with error code. */

								{

								   char *halt_string;

								   node_t node = RGP_NULL_NODE;

								#if defined( NT )

								   char halt_buffer[ 256 ];

								   DWORD eventMsgId;

								   BOOL skipFormatting = FALSE;


								   //

								   // If a user initiated a shutdown, (s)he wants to see the node

								   // to go down and wait for an explicit start command.

								   //

								   // We map RGP_RELOADFAILED to SHUTDOWN_DURING_REGROUP_ERROR since

								   // HaltCallback does a graceful stop for the latter one.

								   // SCM won't restart the node after a graceful stop unless

								   // it is explicitly told to do so

								   //

								   if (halt_code == RGP_RELOADFAILED &&

								       rgp->OS_specific_control.ShuttingDown)

								   {

								      halt_code = RGP_SHUTDOWN_DURING_RGP;

								   }

								#endif


								   if (halt_code == RGP_RELOADFAILED) {

								      halt_string = "[RGP] Node %d: REGROUP WARNING: reload failed.";

								      eventMsgId = MM_EVENT_RELOAD_FAILED;

								   }

								   else if (halt_code ==  RGP_INTERNAL_ERROR) {

								      halt_string = "[RGP] Node %d: REGROUP ERROR: consistency check failed in file %s, line %u.";

								      eventMsgId = MM_EVENT_INTERNAL_ERROR;

								      skipFormatting = TRUE;


								      _snprintf(halt_buffer, sizeof( halt_buffer ) - 1,

								                halt_string,

								                EXT_NODE(rgp->mynode),

								                fname,

								                lineno);

								   }

								   else if (halt_code ==  RGP_MISSED_POLL_TO_SELF) {

								      halt_string = "[RGP] Node %d: REGROUP ERROR: cannot talk to self.";

								      eventMsgId = NM_EVENT_MEMBERSHIP_HALT;

								   }

								#if !defined(NT)

								   else if (halt_code ==  RGP_AVOID_SPLIT_BRAIN) {

								      halt_string = "[RGP] Node %d: REGROUP ERROR: commiting suicide to avoid split brain.";

								   }

								#endif

								   else if (halt_code ==  RGP_PRUNED_OUT) {

								      halt_string = "[RGP] Node %d: REGROUP ERROR: pruned out due to communication failure.";

								      eventMsgId = MM_EVENT_PRUNED_OUT;

								   }

								   else if ((halt_code >=  RGP_PARIAH_FIRST) && (halt_code <= RGP_PARIAH_LAST)) {

								       halt_string = "[RGP] Node %d: REGROUP ERROR: poison packet received from node %d.";

								       eventMsgId = MM_EVENT_PARIAH;

								       node = (node_t)(halt_code - RGP_PARIAH);

								   }

								   else if (halt_code ==  RGP_ARBITRATION_FAILED) {

								      halt_string = "[RGP] Node %d: REGROUP ERROR: arbitration failed.";

								      eventMsgId = MM_EVENT_ARBITRATION_FAILED;

								   }

								   else if (halt_code ==  RGP_ARBITRATION_STALLED) {

								      halt_string = "[RGP] Node %d: REGROUP ERROR: arbitration stalled.";

								      eventMsgId = MM_EVENT_ARBITRATION_STALLED;

								   }

								   else if (halt_code ==  RGP_SHUTDOWN_DURING_RGP) {

								      halt_string = "[RGP] Node %d: REGROUP INFO: regroup engine requested immediate shutdown.";

								      eventMsgId = MM_EVENT_SHUTDOWN_DURING_RGP;

								   }

								   else {

								      halt_string = "[RGP] Node %d: REGROUP ERROR: unknown halt code (%d).";

								      eventMsgId = NM_EVENT_MEMBERSHIP_HALT;

								      node = halt_code;  // get it printed out by borrowing node

								   }


								#if defined(UNIX)

								   printf(halt_string, EXT_NODE(rgp->mynode), node);

								   fflush(stdout);

								   /* Simulate a halt by dumping core and exiting the process. */

								   abort();


								#elif defined(NT)


								   if ( !skipFormatting ) {

								       _snprintf(halt_buffer, sizeof( halt_buffer ) - 1,

								                 halt_string,

								                 EXT_NODE(rgp->mynode),

								                 node);

								   }


								#if CLUSTER_BETA

								     ClRtlLogPrint(LOG_CRITICAL, "%1!hs!\t%2!hs!:%3!d!\n", halt_buffer, fname, lineno);

								#else

								     ClRtlLogPrint(LOG_CRITICAL, "%1!hs!\n", halt_buffer );

								#endif


								     if ((halt_code >=  RGP_PARIAH_FIRST) && (halt_code <= RGP_PARIAH_LAST)) {

								         WCHAR  nodeString[ 16 ];

								         PWCHAR nodeName;


								         _snwprintf( nodeString, sizeof( nodeString ) / sizeof ( WCHAR ), L"%d", node );

								         nodeName = RgpGetNodeNameFromId( node );

								         CsLogEvent2( LOG_CRITICAL, eventMsgId, nodeString, nodeName );

								         if ( nodeName != NULL ) {

								             LocalFree( nodeName );

								         }

								     }

								     else if ( eventMsgId == NM_EVENT_MEMBERSHIP_HALT ) {

								         WCHAR  haltString[ 16 ];


								         _snwprintf( haltString, sizeof( haltString ) / sizeof ( WCHAR ), L"%d", halt_code );

								         CsLogEvent1( LOG_CRITICAL, eventMsgId, haltString );

								     }

								     else {

								         CsLogEvent( LOG_CRITICAL, eventMsgId );

								     }


								   /* we rely on RGP_ERROR_EX to kill the node immediately


								      rgp_cleanup() can potentially slow us down.

								      435977 showed that it can take upto 25 seconds, if we

								      have a lot IP addr activity.


								      since in the end of the function we execute HaltCallback which kills the cluster,

								      we can safely omit doing rgp_cleanup and rgp_cleanup_OS


								      If JoinFailedCallback will be ever enabled, the fate of rgp_cleanup and rgp_cleanup_OS

								      should be reevaluated.

								   */


								#if 0

								   rgp_cleanup();

								   rgp_cleanup_OS();

								   if (halt_code == RGP_RELOADFAILED)

								           (*(rgp->OS_specific_control.JoinFailedCallback))();

								   else

								#endif

								           (*(rgp->OS_specific_control.HaltCallback))(halt_code); // does not return */


								#else

								   cmn_err(CE_PANIC, halt_string, EXT_NODE(rgp->mynode), node);

								#endif /* UNIX */

								}

								/*---------------------------------------------------------------------------*/

								void

								rgp_start_phase1_cleanup(void)

								/* Tells the OS to start cleanup actions for all failed nodes. */

								{

								#if defined (NT)

								    node_t i;

								    //

								    // On NT we saved the nodes to be downed bitmask in NeedsNodeDownCallback.

								    //

								    for ( i=0; i < (node_t) rgp->num_nodes; i++)

								    {

								        if ( ClusterMember( rgp->OS_specific_control.NeedsNodeDownCallback, i ) )

								        {

								            (*(rgp->OS_specific_control.MsgCleanup1Callback))(EXT_NODE(i));

								        }

								    }

								#endif

								   RGP_TRACE( "RGP Ph1 cleanup ", 0, 0, 0, 0);

								   rgp_event_handler(RGP_EVT_PHASE1_CLEANUP_DONE, RGP_NULL_NODE);

								}

								/*---------------------------------------------------------------------------*/

								void

								rgp_start_phase2_cleanup(void)

								/* The equivalent of NSK's regroupstage4action(). */

								{

								#if defined (NT)

								    BITSET bitset;

								    node_t i;

								    //

								    // On NT we saved the nodes to be downed bitmask in NeedsNodeDownCallback.

								    //

								    BitsetInit(bitset);

								    for ( i=0; i < (node_t) rgp->num_nodes; i++)

								    {

								        if ( ClusterMember( rgp->OS_specific_control.NeedsNodeDownCallback, i ) )

								        {

								            BitsetAdd(bitset, EXT_NODE(i));

								        }

								    }


								    (*(rgp->OS_specific_control.MsgCleanup2Callback))(bitset);

								#endif

								   RGP_TRACE( "RGP Ph2 cleanup ", 0, 0, 0, 0);

								   rgp_event_handler(RGP_EVT_PHASE2_CLEANUP_DONE, RGP_NULL_NODE);

								}

								/*---------------------------------------------------------------------------*/

								void

								rgp_cleanup_complete(void)

								/* The equivalent of NSK's regroupstage5action(). */

								{

								#if defined(NT)

								#endif

								   RGP_TRACE( "RGP completed   ", 0, 0, 0, 0);

								}

								/*---------------------------------------------------------------------------*/


								#endif /* LCU || UNIX || NT */


								#if defined(NT)


								/************************************************************************

								 * NT_timer_callback

								 * =================

								 *

								 * Description:

								 *

								 *    This routine is the callback function that gets invoked whenever a

								 *        timer pops.  The routine will call rgp_periodic_check.  This function

								 *        is defined by the Win32 TimerProc procedure.

								 *

								 * Parameters:

								 *

								 *        See below.  We don't use any of them.

								 *

								 * Returns:

								 *

								 *    none.

								 *

								 * Algorithm:

								 *

								 *    This routine just calls rgp_periodic_check.  The existense of this

								 *        routine is solely due to a fixed format callback defined by

								 *        Microsoft.

								 *

								 ************************************************************************/

								VOID CALLBACK NT_timer_callback(

								        VOID

								        )

								{

								#if defined(TDM_DEBUG)

								    if ( !(rgp->OS_specific_control.debug.timer_frozen) &&

								         !(rgp->OS_specific_control.debug.frozen) )

								#endif

								        rgp_periodic_check( );


								    // Do the Clussvc to clusnet heartbeating stuff here iff enabled.

								    if(MmStartClussvcToClusnetHeartbeat && (NmClusnetHandle != NULL)) {


								        if (MmCheckSystemHealthTick <= 0) {

								            // Reseed the tick count.

								            // Mimic hardware watchdog timers and use one quarter of the timeout.

								            MmCheckSystemHealthTick = ((NmClusSvcHeartbeatTimeout * 1000)/RGP_CLOCK_PERIOD)/4;


								            // Send the heartbeat ioctl.

								            ClusnetIamalive(NmClusnetHandle);

								        }

								        else {

								            MmCheckSystemHealthTick--;

								        }

								    }


								}


								 /************************************************************************

								 * NT_timer_thread

								 * ===============

								 *

								 * Description:

								 *

								 *    This routine is executed as a separate thread in the Windows NT

								 *    implementation.  This thread controls generates periodic regroup

								 *    clock ticks. It is signalled via an event whenever the rate changes

								 *    or to cause termination.

								 *

								 * Parameters:

								 *

								 *    None.

								 *

								 * Returns:

								 *

								 *    This thread should not go away.

								 *

								 * Algorithm:

								 *

								 *    This routine is run as a separate thread.  It sets up a timer to pop

								 *        every <time_interval> * 10 milliseconds.

								 *

								 ************************************************************************/

								void NT_timer_thread( void  )

								{

								    BOOL Success;

								    LARGE_INTEGER DueTime;

								    DWORD Error, MyHandleIndex;

								    HANDLE MyHandles[2]; /* for use by WaitForMultiple */

								    DWORD status;

								    DWORD msDueTime;


								#define MyHandleSignalIx 0

								#define MyHandleTimerIx  1


								    MyHandles[MyHandleSignalIx] = rgp->OS_specific_control.TimerSignal; /* Event signals HB rate change */


								    rgp->OS_specific_control.RGPTimer = CreateWaitableTimer(

								                                            NULL,      // no security

								                                            FALSE,     // Initial State FALSE

								                                            NULL

								                                            );     // No name


								    if (rgp->OS_specific_control.RGPTimer == NULL) {

								        Error = GetLastError();

								        RGP_ERROR(RGP_INTERNAL_ERROR);

								    }


								    status = MmSetThreadPriority();


								    if ( status != ERROR_SUCCESS ) {

								        ClRtlLogPrint(LOG_CRITICAL,

								            "[MM] Unable to set timer thread priority, status %1!u!\n",

								            status

								            );


								        RGP_ERROR((uint16) status);

								        ExitThread(status);

								    }


								    MyHandles[MyHandleTimerIx] = rgp->OS_specific_control.RGPTimer;


								    while (TRUE)

								    {

								        MyHandleIndex = WaitForMultipleObjects (

								                            2,                /* Number of Events */

								                            MyHandles,        /* Handle Array */

								                            FALSE,            /* Wait for ANY event */

								                            INFINITE );       /* Wait forever */


								        if (MyHandleIndex == MyHandleSignalIx)  // Timer Change Signal Event

								        {

								            // RGP rate has changed

								            CancelWaitableTimer ( rgp->OS_specific_control.RGPTimer );

								            if ( rgp->rgpinfo.a_tick == 0 ) // Time to quit

								            {

								                CloseHandle ( rgp->OS_specific_control.RGPTimer );

								                rgp->OS_specific_control.RGPTimer = 0;

								                ExitThread ( 0 );

								            }


								            // a_tick has new RGP rate in milliseconds.

								            msDueTime = rgp->rgpinfo.a_tick;

								            DueTime.QuadPart = Int32x32To64(-10000, msDueTime);

								            Success = SetWaitableTimer(

								                          rgp->OS_specific_control.RGPTimer,

								                          &DueTime,

								                          rgp->rgpinfo.a_tick,

								                          NULL,

								                          NULL,

								                          FALSE);


								            if (!Success) {

								                Error = GetLastError();

								                RGP_ERROR(RGP_INTERNAL_ERROR);

								            }


								        } // Timer Change Signal

								        else

								        {   // RGP Timer Tick

								            NT_timer_callback();


								            // Removed - bug 742297. NM now has its own timer thread.

								            // NmTimerTick(msDueTime);

								        }

								    } // while

								}


								PWCHAR

								RgpGetNodeNameFromId(

								    node_t NodeID

								    )


								/*++


								Routine Description:


								    given a node ID, issue a get name node control to get the computer name of

								    the node. Returned buffer to be freed by caller.


								Arguments:


								    NodeID - ID ( 1, 2, 3, ..) of the node


								Return Value:


								    pointer to buffer containing name


								--*/


								{

								    PWCHAR      buffer;

								    DWORD       bufferSize = MAX_COMPUTERNAME_LENGTH * sizeof( WCHAR );

								    DWORD       bytesReturned;

								    DWORD       bytesRequired;

								    PNM_NODE    node;


								    buffer = LocalAlloc( LMEM_FIXED, bufferSize );

								    if ( buffer != NULL ) {

								        node = NmReferenceNodeById( NodeID );

								        if ( node != NULL ) {

								            NmNodeControl(node,

								                          NULL,                     // HostNode OPTIONAL,

								                          CLUSCTL_NODE_GET_NAME,

								                          NULL,                     // InBuffer,

								                          0,                        // InBufferSize,

								                          (PUCHAR)buffer,

								                          bufferSize,

								                          &bytesReturned,

								                          &bytesRequired);


								            OmDereferenceObject( node );

								        }

								    }


								    return buffer;

								}


								#endif /* NT */


								#ifdef __cplusplus

								}

								#endif /* __cplusplus */


								#if 0


								History of changes to this file:

								-------------------------------------------------------------------------

								1995, December 13                                           F40:KSK0610          /*F40:KSK06102.2*/


								This file is part of the portable Regroup Module used in the NonStop

								Kernel (NSK) and Loosely Coupled UNIX (LCU) operating systems. There

								are 10 files in the module - jrgp.h, jrgpos.h, wrgp.h, wrgpos.h,

								srgpif.c, srgpos.c, srgpsm.c, srgputl.c, srgpcli.c and srgpsvr.c.

								The last two are simulation files to test the Regroup Module on a

								UNIX workstation in user mode with processes simulating processor nodes

								and UDP datagrams used to send unacknowledged datagrams.


								This file was first submitted for release into NSK on 12/13/95.

								------------------------------------------------------------------------------

								This change occurred on 19 Jan 1996                                              /*F40:MB06458.1*/

								Changes for phase IV Sierra message system release. Includes:                    /*F40:MB06458.2*/

								 - Some cleanup of the code                                                      /*F40:MB06458.3*/

								 - Increment KCCB counters to count the number of setup messages and             /*F40:MB06458.4*/

								   unsequenced messages sent.                                                    /*F40:MB06458.5*/

								 - Fixed some bugs                                                               /*F40:MB06458.6*/

								 - Disable interrupts before allocating broadcast sibs.                          /*F40:MB06458.7*/

								 - Change per-packet-timeout to 5ms                                              /*F40:MB06458.8*/

								 - Make the regroup and powerfail broadcast use highest priority                 /*F40:MB06458.9*/

								   tnet services queue.                                                          /*F40:MB06458.10*/

								 - Call the millicode backdoor to get the processor status from SP               /*F40:MB06458.11*/

								 - Fixed expand bug in msg_listen_ and msg_readctrl_                             /*F40:MB06458.12*/

								 - Added enhancement to msngr_sendmsg_ so that clients do not need               /*F40:MB06458.13*/

								   to be unstoppable before calling this routine.                                /*F40:MB06458.14*/

								 - Added new steps in the build file called                                      /*F40:MB06458.15*/

								   MSGSYS_C - compiles all the message system C files                            /*F40:MB06458.16*/

								   MSDRIVER - compiles all the MSDriver files                                    /*F40:MB06458.17*/

								   REGROUP  - compiles all the regroup files                                     /*F40:MB06458.18*/

								 - remove #pragma env libspace because we set it as a command line               /*F40:MB06458.19*/

								   parameter.                                                                    /*F40:MB06458.20*/

								-----------------------------------------------------------------------          /*F40:MB06458.21*/


								#endif    /* 0 - change descriptions */