windows-xp/Source/XPSP1/NT/base/cluster/service/mm/srgpif.c


								#ifdef __TANDEM

								#pragma columns 79

								#pragma page "srgpif.c - T9050 - interface routines for Regroup Module"

								#endif


								/* @@@ START COPYRIGHT @@@

								**  Tandem Confidential:  Need to Know only

								**  Copyright (c) 1995, Tandem Computers Incorporated

								**  Protected as an unpublished work.

								**  All Rights Reserved.

								**

								**  The computer program listings, specifications, and documentation

								**  herein are the property of Tandem Computers Incorporated and shall

								**  not be reproduced, copied, disclosed, or used in whole or in part

								**  for any reason without the prior express written permission of

								**  Tandem Computers Incorporated.

								**

								** @@@ END COPYRIGHT @@@

								**/


								/*---------------------------------------------------------------------------

								 * This file (srgpif.c) contains all the external interface routines

								 * of Regroup.

								 *---------------------------------------------------------------------------*/


								#ifdef __cplusplus

								   extern "C" {

								#endif /* __cplusplus */


								#include <wrgp.h>


								/************************************************************************

								 * rgp_estimate_memory

								 * ===================

								 *

								 * Description:

								 *

								 *    Routine to find the number of bytes of memory needed by regroup.

								 *    The only global memory used by Regroup is for the rgp_control structure.

								 *    The caller must allocate and zero out a chunk of this much memory

								 *    and then call rgp_init() with a pointer to this memory.

								 *

								 * Parameters:

								 *

								 *    None

								 *

								 * Returns:

								 *

								 *    int - number of bytes of locked down and initialized (to 0) memory

								 *          needed by Regroup. The memory must be 4-byte aligned.

								 *

								 * Algorithm:

								 *

								 *    Uses the size of the rgp_control_t to calculate the number of

								 *    bytes needed.

								 *

								 ************************************************************************/

								_priv _resident int

								RGP_ESTIMATE_MEMORY(void)

								{

								   return(sizeof(rgp_control_t));

								}


								/************************************************************************

								 * rgp_init

								 * ========

								 *

								 * Description:

								 *

								 *    Routine to initialize the global Regroup data structures.

								 *

								 * Parameters:

								 *

								 *    node_t this_node -

								 *       node number of local node; regroup uses bit masks to represent

								 *       nodes in the cluster and starts numbering nodes from 0. The OS

								 *       starts numbering at LOWEST_NODENUM. This transformation is

								 *       maintained in all the regroup interfaces to the OS.

								 *

								 *    unsigned int num_nodes -

								 *       number of nodes in the configured node number space =

								 *       (largest configured node number - LOWEST_NODENUM + 1).

								 *

								 *    void *rgp_buffer -

								 *       pointer to a block of locked down memory initialized to 0; this is

								 *       for use by Regroup as its global memory; must be 4-byte aligned

								 *

								 *    int rgp_buflen -

								 *       length in bytes of the locked down buffer *rgp_buffer; must be equal

								 *       to or greater than the number returned by rgp_estimate_memory()

								 *

								 *    rgp_msgsys_p rgp_msgsys_p -

								 *       pointer to a common struct used by the message system and

								 *       Regroup to co-ordinate regroup related work

								 *

								 * Returns:

								 *

								 *    void - no return value

								 *

								 * Algorithm:

								 *

								 *    Initializes the Regroup global data structure with default initial

								 *    values and the parameters passed in.

								 *

								 ************************************************************************/

								_priv _resident void

								RGP_INIT(node_t this_node, unsigned int num_nodes,

								         void *rgp_buffer, int rgp_buflen,

								         rgp_msgsys_p rgp_msgsys_p)

								{

								   this_node = INT_NODE(this_node); /* adjust the node number by the offset */


								   if ((num_nodes > MAX_CLUSTER_SIZE) ||

								       (this_node >= (node_t) num_nodes) ||

								       (rgp_buflen < rgp_estimate_memory()) /* buffer too small */ ||

								       ((ULONG_PTR)rgp_buffer % 4) /* buffer not 4-byte aligned */

								      )

								      RGP_ERROR(RGP_INTERNAL_ERROR);


								#ifdef NSK

								   /* In NSK, the caller must set up the global rgp pointer. */

								#else

								   rgp = (rgp_control_t *) rgp_buffer;

								#endif /* NSK */


								   rgp->num_nodes = num_nodes; /* # of nodes configured */


								   rgp->rgp_msgsys_p = rgp_msgsys_p; /* ptr to struct shared with Msgsys */


								   rgp->mynode = this_node;


								#if defined (NT)

								    /* Initialize RGP_LOCK, the CRITICALSECTION object that will be used

								         * to synchronize access within the regroup procedures */

								   InitializeCriticalSection( &rgp->OS_specific_control.RgpCriticalSection );

								#endif


								   RGP_CLEANUP();


								   /* We place a bit pattern in the IamAlive packet. This bit

								    * pattern toggles all the bits.

								    */

								   rgp->iamalive_pkt.testpattern.words[0]  = 0x0055FF6D;

								   rgp->iamalive_pkt.testpattern.words[1]  = 0x92CC33E3;

								   rgp->iamalive_pkt.testpattern.words[2]  = 0x718E49F0;

								   rgp->iamalive_pkt.testpattern.words[3]  = 0x92CC33E3;

								   rgp->iamalive_pkt.testpattern.words[4]  = 0x0055FF6D;

								   rgp->iamalive_pkt.testpattern.words[5]  = 0x0055FF6D;

								   rgp->iamalive_pkt.testpattern.words[6]  = 0x92CC33E3;

								   rgp->iamalive_pkt.testpattern.words[7]  = 0x718E49F0;

								   rgp->iamalive_pkt.testpattern.words[8]  = 0x92CC33E3;

								   rgp->iamalive_pkt.testpattern.words[9]  = 0x0055FF6D;

								   rgp->iamalive_pkt.testpattern.words[10] = 0x55AA55AA;

								   rgp->iamalive_pkt.testpattern.words[11] = 0x55AA55AA;

								   rgp->iamalive_pkt.testpattern.words[12] = 0x55AA55AA;


								   rgp->poison_pkt.pktsubtype = RGP_UNACK_POISON;


								   rgp_init_OS();  /* OS-specific initializations */


								   rgp_cleanup_OS(); /* OS-specific cleanup */


								   /* Trace the call after the data structures have been initialized. */

								   RGP_TRACE( "RGP Init called ", EXT_NODE(this_node), num_nodes,

								              PtrToUlong(rgp_buffer), PtrToUlong(rgp_msgsys_p) ); /* TRACE */

								}


								/**************************************************************************

								 * rgp_cleanup

								 * ===========

								 * Description:

								 *

								 *    This function cleans up the RGP structure such that this node is

								 *    virtually returned to the state following RGP_INIT and ready to be

								 *    "join"ed into the cluster.

								 *

								 * Parameters:

								 *

								 *      None

								 *

								 * Returns:

								 *

								 *      None

								 **************************************************************************/

								 _priv _resident void

								RGP_CLEANUP(void)

								{

								   node_t i;


								   RGP_LOCK;


								/* Initialize the state of all possible nodes in the cluster. */

								   for (i = 0; i < (node_t) rgp->num_nodes; i++)

								   {

								      rgp->node_states[i].status = RGP_NODE_DEAD;

								      rgp->node_states[i].pollstate = AWAITING_IAMALIVE;

								      rgp->node_states[i].lostHBs = 0;


								#if defined( NT )

								      ClusnetSetNodeMembershipState(NmClusnetHandle,

								                                    EXT_NODE( i ),

								                                    ClusnetNodeStateDead);

								#endif // NT

								   }

								   for (i = (node_t)rgp->num_nodes; i < MAX_CLUSTER_SIZE; i++)

								   {

								      rgp->node_states[i].status = RGP_NODE_NOT_CONFIGURED;

								      rgp->node_states[i].pollstate = AWAITING_IAMALIVE;

								      rgp->node_states[i].lostHBs = 0;


								#if defined( NT )

								      ClusnetSetNodeMembershipState(NmClusnetHandle,

								                                    EXT_NODE( i ),

								                                    ClusnetNodeStateNotConfigured);

								#endif // NT

								   }


								   rgp->rgpinfo.version = RGP_VERSION;

								   rgp->rgpinfo.seqnum = RGP_INITSEQNUM;

								   rgp->rgpinfo.iamalive_ticks = RGP_IAMALIVE_TICKS;

								   rgp->rgpinfo.check_ticks = RGP_CHECK_TICKS;

								   rgp->rgpinfo.Min_Stage1_ticks = RGP_MIN_STAGE1_TICKS;

								   rgp->rgpinfo.a_tick = RGP_INACTIVE_PERIOD;


								   ClusterInit(rgp->rgpinfo.cluster);


								   rgp->rgppkt.stage = RGP_COLDLOADED;

								   rgp->rgpcounter = 0;

								   rgp->restartcount = 0;


								   rgp->tiebreaker = rgp->mynode;


								   /* Initialize the unacknowledged packet buffers */


								   rgp->rgppkt.pktsubtype = RGP_UNACK_REGROUP;

								   rgp->rgppkt.seqno = rgp->rgpinfo.seqnum;

								   rgp->last_stable_seqno = rgp->rgpinfo.seqnum;


								   ClusterCopy(rgp->OS_specific_control.CPUUPMASK, rgp->rgpinfo.cluster);

								   ClusterCopy(rgp->outerscreen,           rgp->rgpinfo.cluster);

								#if defined( NT )

								   ClusnetSetOuterscreen( NmClusnetHandle, (ULONG)*((PUSHORT)rgp->outerscreen) );

								#endif

								   ClusterCopy(rgp->innerscreen,           rgp->rgpinfo.cluster);

								   ClusterCopy(rgp->rgppkt.knownstage1,    rgp->rgpinfo.cluster);

								   ClusterCopy(rgp->rgppkt.knownstage2,    rgp->rgpinfo.cluster);

								   ClusterCopy(rgp->rgppkt.knownstage3,    rgp->rgpinfo.cluster);

								   ClusterCopy(rgp->rgppkt.knownstage4,    rgp->rgpinfo.cluster);

								   ClusterCopy(rgp->rgppkt.knownstage5,    rgp->rgpinfo.cluster);

								   ClusterCopy(rgp->rgppkt.pruning_result, rgp->rgpinfo.cluster);

								   MatrixInit(rgp->rgppkt.connectivity_matrix);


								   rgp->rgppkt_to_send.pktsubtype = RGP_UNACK_REGROUP;


								   rgp->iamalive_pkt.pktsubtype = RGP_UNACK_IAMALIVE;


								   RGP_UNLOCK;

								}


								/***************************************************************************

								 * rgp_sequence_number

								 * ===================

								 * Description:

								 *

								 *    This function returns the regroup sequence number.

								 *

								 *    This provides only a subset of the functionality provided by

								 *    rgp_getrgpinfo(), but is a simpler function and has no structure

								 *    parameters, making it easier to call from PTAL.

								 *

								 *    A regroup incident could be in progress when this routine is

								 *    called.

								 *

								 * Parameters:

								 *

								 *      None

								 *

								 * Returns:

								 *

								 *     uint32 - the current regroup sequence number; this reflects

								 *              how many regroup incidents have happened since

								 *              the system came up. Since one incident can result in

								 *              upto RGP_RESTART_MAX restarts each resulting in the

								 *              sequence # being bumped, this number does not always

								 *              equal the number of regroup incidents.

								 *

								 ***************************************************************************/

								_priv _resident uint32

								RGP_SEQUENCE_NUMBER(void)

								{

								    return(rgp->rgpinfo.seqnum);

								}


								/************************************************************************

								 * rgp_getrgpinfo

								 * ==============

								 *

								 * Description:

								 *

								 *    Routine to get Regroup parameters.

								 *

								 * Parameters:

								 *

								 *    rgpinfo_t *rgpinfo - pointer to struct to be filled with Regroup

								 *                         parameters.

								 *

								 * Returns:

								 *

								 *    int - 0 if successful; -1 if Regroup is perturbed.

								 *

								 * Algorithm:

								 *

								 *    Copies the rgpinfo struct from the Regroup global memory into the

								 *    struct passed in by the caller.

								 *

								 ************************************************************************/

								_priv _resident int

								RGP_GETRGPINFO(rgpinfo_t *rgpinfo)

								{

								   int error = 0;


								   /* If no rgpinfo structure is passed OR rgp_init() has not been called

								    * earlier, halt.

								    */


								   if ((rgpinfo == RGP_NULL_PTR) || (rgp == RGP_NULL_PTR))

								      RGP_ERROR( RGP_INTERNAL_ERROR );


								   RGP_LOCK;


								   if (rgp_is_perturbed())

								      error = -1;

								   else

								      /* Copy the rgpinfo structure from regroup's internal struct. */

								      *rgpinfo = rgp->rgpinfo;


								   RGP_UNLOCK;


								   return(error);

								}


								/************************************************************************

								 * rgp_setrgpinfo

								 * ==============

								 *

								 * Description:

								 *

								 *    Routine to set Regroup parameters. This routine is to be called on

								 *    newly booting nodes to set the Regroup parameters to the values

								 *    in the master or reloading node. The parameters to be updated

								 *    include Regroup timing parameters and the cluster membership;

								 *    that is, the current set of nodes in the system.

								 *

								 *    This routine can also be called on the first node to boot to

								 *    modify the Regroup timing parameters which are set to the default

								 *    values when rgp_init() is called. Such modification has to be done

								 *    before other nodes are added to the system.

								 *

								 * Parameters:

								 *

								 *    rgpinfo_t *rgpinfo - pointer to struct with Regroup parameters to

								 *                         be modified.

								 *

								 * Returns:

								 *

								 *    int - 0 if successful; -1 if there is more than one node in the

								 *    cluster. This is to prevent modification of timing parameters

								 *    after the second node is added to the system.

								 *

								 * Algorithm:

								 *

								 *    Copies the contents of the user-passed struct into the one in the

								 *    Regroup global memory and updates related parameters.

								 *

								 ************************************************************************/

								_priv _resident int

								RGP_SETRGPINFO(rgpinfo_t *rgpinfo)

								{

								   int error = 0;

								   node_t i;


								   /* If no rgpinfo structure is passed OR the version # of the

								    * structure is not understood OR rgp_init() has not been called,

								    * halt.

								    */


								   if ((rgpinfo == RGP_NULL_PTR) ||

								       (rgpinfo->version != RGP_VERSION) ||

								       (rgp == RGP_NULL_PTR))

								      RGP_ERROR( RGP_INTERNAL_ERROR );


								   RGP_LOCK;


								   /* The following checks must be made before proceeding:

								    *

								    * 1. Regroup must not be perturbed.

								    *

								    * 2. If rgp_start() has been called (regroup is in the

								    *    RGP_STABILIZED state), only the local node must be in the

								    *    cluster when this routine is called.

								    *

								    * 3. If rgp_start() has been called, this routine can be used

								    *    only to modify the timing parameters and not to specify the

								    *    cluster.

								    *

								    * If these restrictions are not followed, return -1.

								    */


								   RGP_TRACE( "RGP SetRGPInfo  ",

								              rgpinfo->version,                /* TRACE */

								              rgpinfo->seqnum,                 /* TRACE */

								              rgpinfo->iamalive_ticks,         /* TRACE */

								              GetCluster( rgpinfo->cluster ) );/* TRACE */


								   if (  rgp_is_perturbed() ||

								         (  (rgp->rgppkt.stage == RGP_STABILIZED) &&

								            (  (ClusterNumMembers(rgp->rgpinfo.cluster) > 1) ||

								               !ClusterCompare(rgp->rgpinfo.cluster,rgpinfo->cluster)

								            )

								         )

								      )

								      error = -1;

								   else

								   {

								      /* Copy the rgpinfo structure into regroup's internal struct. */

								      rgp->rgpinfo = *rgpinfo;


								      /* If iamalive_ticks is set to 0, use the default value instead. */        /*F40:KSK06102.2*/

								      if (rgpinfo->iamalive_ticks == 0)                                          /*F40:KSK06102.3*/

								         rgp->rgpinfo.iamalive_ticks = RGP_IAMALIVE_TICKS;                       /*F40:KSK06102.4*/

								                                                                                 /*F40:KSK06102.5*/

								          if (rgpinfo->check_ticks == 0)

								          {

								                 rgp->rgpinfo.check_ticks = RGP_CHECK_TICKS;

								          }


								          if (rgpinfo->Min_Stage1_ticks == 0)

								                 rgp->rgpinfo.Min_Stage1_ticks =

								                  (rgp->rgpinfo.iamalive_ticks * rgp->rgpinfo.check_ticks);


								          if (rgpinfo->a_tick == 0)

								                 rgp->rgpinfo.a_tick = RGP_CLOCK_PERIOD;


								          // Tell Timer thread to restart RGP timer

								          SetEvent (rgp->OS_specific_control.TimerSignal);


								      /* The cluster should include the local node even if the cluster

								       * field in the rgpinfo structure does not include it.

								       */

								      ClusterInsert(rgp->rgpinfo.cluster, rgp->mynode);


								      /* Copy the sequence number into the regroup packet area. */

								      rgp->rgppkt.seqno = rgp->rgpinfo.seqnum;


								      /* If nodes have been added in the cluster field, they must be

								       * added to all the screens and their status must be set to

								       * alive.

								       */


								      ClusterCopy(rgp->OS_specific_control.CPUUPMASK, rgp->rgpinfo.cluster);

								      ClusterCopy(rgp->outerscreen,           rgp->rgpinfo.cluster);

								#if defined( NT )

								      ClusnetSetOuterscreen( NmClusnetHandle, (ULONG)*((PUSHORT)rgp->outerscreen) );

								      ClusterComplement(rgp->ignorescreen, rgp->outerscreen);

								#endif

								      ClusterCopy(rgp->innerscreen,           rgp->rgpinfo.cluster);

								      ClusterCopy(rgp->rgppkt.knownstage1,    rgp->rgpinfo.cluster);

								      ClusterCopy(rgp->rgppkt.knownstage2,    rgp->rgpinfo.cluster);

								      ClusterCopy(rgp->rgppkt.knownstage3,    rgp->rgpinfo.cluster);

								      ClusterCopy(rgp->rgppkt.knownstage4,    rgp->rgpinfo.cluster);

								      ClusterCopy(rgp->rgppkt.knownstage5,    rgp->rgpinfo.cluster);

								      ClusterCopy(rgp->rgppkt.pruning_result, rgp->rgpinfo.cluster);

								      rgp->tiebreaker = rgp_select_tiebreaker(rgp->rgpinfo.cluster);


								      for (i = 0; i < (node_t) rgp->num_nodes; i++)

								      {

								         if (ClusterMember(rgp->rgpinfo.cluster, i))

								         {

								            rgp->node_states[i].pollstate = IAMALIVE_RECEIVED;

								            rgp->node_states[i].status = RGP_NODE_ALIVE;


								#if defined( NT )

								            ClusnetSetNodeMembershipState(NmClusnetHandle,

								                                          EXT_NODE( i ),

								                                          ClusnetNodeStateAlive);

								#endif // NT

								         }

								      }

								      /* Reset the clock counter so that IamAlives are sent when

								       * the next timer tick arrives.

								       */

								      rgp->clock_ticks = 0;

								   }


								   RGP_UNLOCK;


								   return(error);

								}


								/************************************************************************

								 * rgp_start

								 * =========

								 *

								 * Description:

								 *

								 *    This routine signals the end of node integration into the cluster.

								 *    The node can now start participating in the Regroup algorithm.

								 *

								 * Parameters:

								 *

								 *    void (*rgp_node_failed)()

								 *       pointer to a routine to be called when a node failure is

								 *       detected.

								 *

								 *    int (*rgp_select_cluster)()

								 *       pointer to an optional routine to be called when link failures

								 *       cause multiple alternative clusters to be formed. This routine

								 *       should select one from a list of suggested clusters.

								 *

								 * Returns:

								 *

								 *    void - no return value

								 *

								 * Algorithm:

								 *

								 *    Installs the callback routines in the global data structure and

								 *    changes the Regroup state to RGP_STABILIZED.

								 *

								 ************************************************************************/

								_priv _resident void

								RGP_START(void (*nodedown_callback)(cluster_t failed_nodes),

								          int (*select_cluster)(cluster_t cluster_choices[], int num_clusters)

								         )

								{

								   if (rgp == RGP_NULL_PTR)

								      RGP_ERROR( RGP_INTERNAL_ERROR );


								   RGP_LOCK;


								   RGP_TRACE( "RGP Start called",

								              rgp->rgppkt.stage,                /* TRACE */

								              PtrToUlong(nodedown_callback),    /* TRACE */

								              PtrToUlong(select_cluster),       /* TRACE */

								              0 );                              /* TRACE */


								   /* Install callback routines for node failure notification and cluster

								    * selection. If no routine is given by the caller, use default ones.

								    */


								   if (nodedown_callback == RGP_NULL_PTR)

								   {

								#ifdef NSK

								      /* In NSK, rgp_start() is called from pTAL code and passing routine

								       * addresses is cumbersome. So, RGP_NULL_PTR is passed and we

								       * call the routine rgp_node_failed() which must be supplied by

								       * the message system.

								       */

								      rgp->nodedown_callback = rgp_node_failed; /* hardcoded name */

								#else

								      /* A node down callback routine must be supplied. */

								      RGP_ERROR( RGP_INTERNAL_ERROR );

								#endif /* NSK */

								   }

								   else

								      rgp->nodedown_callback = nodedown_callback;

								#if 0

								   /* The select cluster routine is optional. */

								   if (select_cluster == RGP_NULL_PTR)

								      rgp->select_cluster = rgp_select_cluster; /* supplied by regroup */

								   else

								#endif

								   //

								   // Calling rgp_select_cluster is

								   // not a good idea since it doesn't take into the consideration

								   // quorum owner node.

								   // If rgp->select_cluster == RGP_NULL_PTR, then  srgpsm.c uses

								   //   rgp_select_cluster_ex, that will try to select the group

								   // that contain the current quorum owner node


								   rgp->select_cluster = select_cluster;


								#if defined(NT)

								   /* Call the node up callback.  This is where the local node gets

								    * the node up callback for itself coming up.  Other nodes call

								    * the callback, for this node coming up, in rgp_monitor_node.

								    */


								   ClusterInsert(rgp->rgpinfo.cluster, rgp->mynode);

								   ClusterCopy(rgp->OS_specific_control.CPUUPMASK, rgp->rgpinfo.cluster);


								   if ( rgp->OS_specific_control.UpDownCallback != RGP_NULL_PTR )

								   {

								      (*(rgp->OS_specific_control.UpDownCallback))(

								          EXT_NODE(rgp->mynode),

								          NODE_UP

								          );

								   }

								#endif  /* NT */


								   RGP_UNLOCK;


								}


								/************************************************************************

								 * rgp_add_node

								 * ============

								 *

								 * Description:

								 *

								 *    Called to add a newly booting node to the regroup masks. This prevents

								 *    Regroup from sending poison packets to the new node when it tries to

								 *    contact our node by sending IamAlive messages.

								 *

								 * Parameters:

								 *

								 *    node_t node - node to be added to the recognition masks

								 *

								 * Returns:

								 *

								 *    int - 0 on success and -1 on failure. The routine fails only if a

								 *    regroup incident is in progress.

								 *

								 * Algorithm:

								 *

								 *    The node is added to all the recognition masks and its state is

								 *    changed to RGP_NODE_COMING_UP.

								 *

								 ************************************************************************/

								_priv _resident int

								RGP_ADD_NODE(node_t node)

								{

								   int error = 0;


								   RGP_LOCK;


								   RGP_TRACE( "RGP Add node    ", node, rgp->rgppkt.stage,

								              GetCluster(rgp->outerscreen),                 /* TRACE */

								              GetCluster(rgp->rgpinfo.cluster) );           /* TRACE */


								   /* Cannot add a node while regroup is perturbed. Return -1 in that case.

								    * The new node booting should fail due to the regroup incident anyway.

								    */

								   if (rgp_is_perturbed())

								      error = -1;

								   else

								   {

								      node = INT_NODE(node); /* adjust the node number by the offset */


								      ClusterInsert(rgp->outerscreen,           node);

								#if defined( NT )

								      ClusnetSetOuterscreen( NmClusnetHandle, (ULONG)*((PUSHORT)rgp->outerscreen) );

								#endif

								      ClusterInsert(rgp->innerscreen,           node);

								      ClusterInsert(rgp->rgppkt.knownstage1,    node);

								      ClusterInsert(rgp->rgppkt.knownstage2,    node);

								      ClusterInsert(rgp->rgppkt.knownstage3,    node);

								      ClusterInsert(rgp->rgppkt.knownstage4,    node);

								      ClusterInsert(rgp->rgppkt.knownstage5,    node);

								      ClusterInsert(rgp->rgppkt.pruning_result, node);

								      rgp->node_states[node].pollstate = AWAITING_IAMALIVE;

								      rgp->node_states[node].status = RGP_NODE_COMING_UP;

								      rgp->node_states[node].lostHBs = 0;


								#if defined( NT )

								      ClusterDelete( rgp->OS_specific_control.Banished, node );


								      //

								      // Remove joining node from ignore screen

								      //


								      ClusterDelete( rgp->ignorescreen,                 node );

								      PackIgnoreScreen(&rgp->rgppkt, rgp->ignorescreen);


								      ClusnetSetNodeMembershipState(NmClusnetHandle,

								                                    EXT_NODE( node ),

								                                    ClusnetNodeStateJoining);

								#endif // NT

								   }


								   RGP_UNLOCK;


								   return(error);

								}


								/************************************************************************

								 * rgp_monitor_node

								 * ================

								 *

								 * Description:

								 *

								 *    Called by all running nodes to change the status of a newly booted node

								 *    to UP. Can be called by the new node also; it is a no-op in this case.

								 *

								 * Parameters:

								 *

								 *    node_t node - number of node being declared up

								 *

								 * Returns:

								 *

								 *    int - 0 on success and -1 on failure. The routine fails only if the

								 *    state of the node is neither RGP_NODE_COMING_UP nor RGP_NODE_ALIVE.

								 *

								 * Algorithm:

								 *

								 *    If the node is marked coming up, its state is changed to

								 *    RGP_NODE_ALIVE. If the node has already been marked up,

								 *    nothing is done.

								 *

								 ************************************************************************/

								_priv _resident int

								RGP_MONITOR_NODE(node_t node)

								{

								   int error = 0;


								   RGP_LOCK;


								   RGP_TRACE( "RGP Monitor node", node, rgp->rgppkt.stage,

								              GetCluster(rgp->outerscreen),                 /* TRACE */

								              GetCluster(rgp->rgpinfo.cluster) );           /* TRACE */


								   node = INT_NODE(node); /* adjust the node number by the offset */


								   /* Accept the request only if the state of the node is COMING_UP or UP. */


								   if (rgp->node_states[node].status == RGP_NODE_COMING_UP)

								   {

								      ClusterInsert(rgp->rgpinfo.cluster, node);

								      rgp->tiebreaker = rgp_select_tiebreaker(rgp->rgpinfo.cluster);

								      rgp->node_states[node].pollstate = IAMALIVE_RECEIVED;

								      rgp->node_states[node].status = RGP_NODE_ALIVE;

								#if defined(NT)

								      ClusterCopy(rgp->OS_specific_control.CPUUPMASK, rgp->rgpinfo.cluster);


								      ClusnetSetNodeMembershipState(NmClusnetHandle,

								                                    EXT_NODE( node ),

								                                    ClusnetNodeStateAlive);


								      /* A node came up.  Call the node up callback. */

								      if ( rgp->OS_specific_control.UpDownCallback != RGP_NULL_PTR )

								      {

								         (*(rgp->OS_specific_control.UpDownCallback))(

								             EXT_NODE(node),

								             NODE_UP

								             );

								      }

								#endif  /* NT */


								   }

								   else if (rgp->node_states[node].status != RGP_NODE_ALIVE)

								      /* Perhaps the booting node failed and regroup has already marked

								       * it down. The cluster manager may have invoked a global update

								       * resulting in this call before regroup reporetd the failure

								       * of the node.

								       */

								      error = -1;


								   RGP_UNLOCK;


								   return(error);

								}


								/************************************************************************

								 * rgp_remove_node

								 * ===============

								 *

								 * Description:

								 *

								 *    Called by the cluster manager to force out a booting node if booting

								 *    fails. Regroup may or may not have already removed the booting node

								 *    from the masks and declared it down, depending on what stage the

								 *    booting is in and when the booting node failed.

								 *

								 *    Regroup can remove the node from the masks of all nodes in the cluster

								 *    by simply starting a new incident of regroup with any event code. This

								 *    will force all nodes to come to an agreement on cluster membership

								 *    that excludes the booting node. If the booting node is alive, it will

								 *    commit suicide since it will be in the incompetent (RGP_COLDLOADED)

								 *    state.

								 *

								 *    Removing the new node from our masks is not necessary since regroup

								 *    will detect the node failure and adjust the masks. If we do remove it

								 *    from our masks BEFORE initiating regroup, regroup may complete quicker

								 *    since we will not wait in stage 1 for the node to check in. Also, this

								 *    could allow a node to be removed even after it is fully integrated.

								 *    This is because our node will send a poison packet to the removed node

								 *    if it tries to contact us.

								 *

								 *    But this "enhancement" is not implemented because it requires a new

								 *    regroup event code which is examined by all nodes and processed

								 *    specially. Currently, the regroup event code is used only for

								 *    debugging info. Also, there is no guarantee that all nodes see the

								 *    same regroup reason code. For instance, some may see a missing

								 *    IamAlive while others may see a power failure.

								 *

								 * Parameters:

								 *

								 *    node_t node - node to be removed from the recognition masks

								 *                  (in external format).

								 *

								 * Returns:

								 *

								 *    int - 0 on success and -1 on failure. The routine fails if a

								 *    regroup incident is in progress or rgp_start() has not been

								 *    called (as in a new node where the booting is not complete).

								 *

								 * Algorithm:

								 *

								 *    If the node is still in the recognition masks, a new regroup incident

								 *    is started. This incident will result in all nodes declaring the node

								 *    dead and removing it from the recognition masks.

								 *

								 ************************************************************************/

								_priv _resident int

								RGP_REMOVE_NODE(node_t node)

								{

								   int error = 0;


								   RGP_LOCK;


								   RGP_TRACE( "RGP Remove node ", node, rgp->rgppkt.stage,

								              GetCluster(rgp->outerscreen),                 /* TRACE */

								              GetCluster(rgp->rgpinfo.cluster) );           /* TRACE */


								   if (rgp->rgppkt.stage == RGP_STABILIZED)

								   {

								      if (ClusterMember(rgp->outerscreen, INT_NODE(node)))

								      {

								         /* Node is currently in our screen. The node may have never come up

								          * after rgp_add_node() was called OR regroup may not have figured

								          * out yet that the node is down. In either case, the node must

								          * be forced out and all nodes in the cluster notified (by a regroup

								          * incident). If the node is still running, it will commit suicide

								          * when this regroup incident starts.

								          */


								         rgp_event_handler(RGP_EVT_LATEPOLLPACKET, node);

								      }

								      else

								      {

								         /* Either the node was not added to the cluster OR regroup has

								          * already figured out that the node is dead and reported this.

								          * In either case, there is nothing more to do.

								          */

								      }

								   }

								   else

								      error = -1;


								   RGP_UNLOCK;


								   return(error);

								}


								/************************************************************************

								 * rgp_is_perturbed

								 * ================

								 *

								 * Description:

								 *

								 *    Function to check if a regroup incident is in progress.

								 *

								 * Parameters:

								 *

								 *    None.

								 *

								 * Returns:

								 *

								 *    int - 0 if no regroup is quiescent; non-zero if a regroup incident

								 *    is in progress.

								 *

								 * Algorithm:

								 *

								 *    Looks at the current state of the Regroup algorithm.

								 *

								 ************************************************************************/

								_priv _resident int

								RGP_IS_PERTURBED(void)

								{

								   uint8 stage = rgp->rgppkt.stage;


								   return((stage != RGP_STABILIZED) && (stage != RGP_COLDLOADED));

								}


								/************************************************************************

								 * rgp_periodic_check

								 * ==================

								 *

								 * Description:

								 *

								 *    This routine is invoked every RGP_CLOCK_PERIOD by the timer interrupt

								 *    handler of the native OS. It performs Regroups's periodic operations.

								 *

								 * Parameters:

								 *

								 *    None

								 *

								 * Returns:

								 *

								 *    void - no return value

								 *

								 * Algorithm:

								 *

								 *    This routine requests Iamalive packets to be sent, checks if

								 *    IamAlives have been received (and calls rgp_event_handler() if

								 *    not) and sends a clock tick to the regroup algorithm if it is in

								 *    progress.

								 *

								 *    IamAlives are checked at twice the IamAlive period. The regroup

								 *    global variable clock_ticks is incremented in each call. After

								 *    the IamAlives are checked, clock_ticks is reset to 0. Thus, the

								 *    ticker counts time modulo twice the IamAlive ticks.

								 *

								 ************************************************************************/

								_priv _resident void

								RGP_PERIODIC_CHECK(void)

								{

								   node_t  node;


								   RGP_LOCK;


								   /* If regroup is active, give it a shot at each regroup clock tick. */


								   if ((rgp->rgppkt.stage != RGP_STABILIZED) &&

								       (rgp->rgppkt.stage != RGP_COLDLOADED))

								      rgp_event_handler(RGP_EVT_CLOCK_TICK, RGP_NULL_NODE);


								#if !defined( NT )

								   /* Send IamAlive messages at appropriate intervals. */


								   if ( (rgp->clock_ticks == 0) ||

								        (rgp->clock_ticks == rgp->rgpinfo.iamalive_ticks) )

								   {

								      rgp_broadcast(RGP_UNACK_IAMALIVE);

								      rgp->clock_ticks++;

								   }


								   /* Check for missing IamAlives at IamAlive sending period,

								    * But flag an error (LATE_POLL) only if "check_ticks" IamAlives missed.

								    * The checking is offset from the sending by one clock tick.

								    */


								   else if ( rgp->clock_ticks >= (rgp->rgpinfo.iamalive_ticks - 1) )

								   { /* check all nodes for IamAlives received */


								      for (node = 0; node < (node_t) rgp->num_nodes; node++)

								      {

								         if (rgp->node_states[node].status == RGP_NODE_ALIVE)

								         {

								            if ( rgp->node_states[node].pollstate == IAMALIVE_RECEIVED )

								            {  /* checked in in time */

								#if defined(TDM_DEBUG)

								               if ( rgp->OS_specific_control.debug.doing_tracing )

								               {

								                  printf ("Node %d: Node %d is alive. My rgp state=%d\n",

								                     EXT_NODE(rgp->mynode), EXT_NODE(node), rgp->rgppkt.stage );

								               }

								#endif

								               rgp->node_states[node].pollstate = AWAITING_IAMALIVE;

								               rgp->node_states[node].lostHBs = 0;

								            }

								            else if ( rgp->node_states[node].lostHBs++ < rgp->rgpinfo.check_ticks )

								               ;// allow upto (check_ticks-1) IamAlives to be lost.

								            else

								            {

								               /* missing IamAlives */

								               if (node == rgp->mynode) /* missed my own packets */

								               {

								                  /* We should be lenient if we just had a power failure.

								                   */

								                  if (rgp->pfail_state == 0) /* no recent power failure */

								                     RGP_ERROR( RGP_MISSED_POLL_TO_SELF );

								               }

								               else

								                  rgp_event_handler(RGP_EVT_LATEPOLLPACKET, EXT_NODE(node));

								            }

								         }

								      }


								      /* Reset the regroup tick counter after checking for IamAlives. */

								      rgp->clock_ticks = 0;


								   } /* check all nodes for IamAlives received */


								   else

								      rgp->clock_ticks++;


								   /* rgp->pfail_state is set to a non-zero value when a pfail event

								    * is reported to regroup. It is decremented at every regroup clock

								    * tick till it reaches zero. While this number is non-zero, missing

								    * self IamAlives are ignored and do not cause the node to halt.

								    * This gives the sending hardware some time to recover from power

								    * failures before self IamAlives are checked.

								    */

								   if (rgp->pfail_state)

								      rgp->pfail_state--;


								#endif // NT


								   RGP_UNLOCK;


								}  /* rgp_periodic_check */


								/************************************************************************

								 * rgp_received_packet

								 * ===================

								 *

								 * Description:

								 *

								 *    Routine to be called by the message system when an unacknowledged

								 *    packet sent by the Regroup module is received from any node. These

								 *    packets include IamAlive packets, regroup status packets and poison

								 *    packets.

								 *

								 * Parameters:

								 *

								 *    node_t node      - node from which a packet has been received

								 *

								 *    void   *packet   - address of the received packet data

								 *

								 *    int    packetlen - length in bytes of the received packet data

								 *

								 * Returns:

								 *

								 *    void - no return value

								 *

								 * Algorithm:

								 *

								 *    Does different things based on the packet subtype.

								 *

								 ************************************************************************/

								_priv _resident void

								RGP_RECEIVED_PACKET(node_t node, void *packet, int packetlen)

								{

								   rgp_unseq_pkt_t *unseq_pkt = (rgp_unseq_pkt_t *) packet;


								   node = INT_NODE(node);


								   /* If the packet is from a node that cannot be in our cluster,

								    * simply ignore it.

								    */

								   if (node >= (node_t) rgp->num_nodes)

								      return;


								   /* If the sending node is excluded by the outer screen, then it is

								    * not part of the current (most recently known) configuration.

								    * Therefore the packet should not be honored, and a poison message

								    * should be sent to try to kill this renegade processor unless

								    * it is sending US a poison packet. If it is sending us a poison

								    * packet, we cannot send it a poison in return because that results

								    * in an infinite loop. In this case, we just halt because this

								    * situation implies that there is a split brain situation and our

								    * split brain avoidance algorithm has failed.

								    */


								   /* NT Notes

								    *

								    * even with poison pkts being sent and recv'ed in the kernel, we still

								    * want to make these checks since clusnet doesn't have the regroup stage

								    * info and regroup packets themselves find there way in here.

								    */


								   if (!ClusterMember(rgp->outerscreen, node)

								#if defined( NT )

								       ||

								       ClusterMember(rgp->OS_specific_control.Banished, node)

								#endif

								      )

								   {

								       if (rgp->rgppkt.stage == RGP_COLDLOADED)

								       {

								           // We are doing this check in srgpsm.c

								           // No need to do it here

								           // RGP_ERROR(RGP_RELOADFAILED);

								           //

								       }

								       else if (unseq_pkt->pktsubtype == RGP_UNACK_POISON)

								       {

								           RGP_ERROR((uint16) (RGP_PARIAH + EXT_NODE(node)));

								       } else {

								           /* Must send a poison packet to the sender.

								            */

								           ClusterInsert(rgp->poison_targets, node);

								           rgp_broadcast(RGP_UNACK_POISON);

								       }

								       return;

								   }


								   switch (unseq_pkt->pktsubtype)

								   {

								      case RGP_UNACK_IAMALIVE :

								      {

								         /* Count the number of IamAlives received */

								         if ( node == rgp->mynode )

								             RGP_INCREMENT_COUNTER( RcvdLocalIAmAlive );

								         else

								             RGP_INCREMENT_COUNTER( RcvdRemoteIAmAlive );


								         if (rgp->node_states[node].status == RGP_NODE_ALIVE)

								            rgp->node_states[node].pollstate = IAMALIVE_RECEIVED;


								         else if (rgp->node_states[node].status == RGP_NODE_COMING_UP)

								         {

								            /* If the node has not yet been marked fully up, it is time to

								             * do so.

								             */

								            rgp_monitor_node(EXT_NODE(node));


								            /* We must tell the OS that the new node is up in case the

								             * OS needs the IamAlives to figure that out.

								             */

								            rgp_newnode_online(EXT_NODE(node));

								         }

								         else

								            /* If the node state is neither alive nor coming up, it

								             * must not be in our outerscreen. The outerscreen check

								             * above must have passed and we should not get here.

								             */

								            RGP_ERROR(RGP_INTERNAL_ERROR);


								         break;

								      }

								      case RGP_UNACK_REGROUP  :

								      {

								         /* Count the number of regroup status packets received. */

								         RGP_INCREMENT_COUNTER( RcvdRegroup );


								         /* Any good packet can be treated as an IamAlive packet. */

								         rgp->node_states[node].pollstate = IAMALIVE_RECEIVED;


								         RGP_EVENT_HANDLER_EX (RGP_EVT_RECEIVED_PACKET, EXT_NODE(node), (void*)unseq_pkt);

								         break;

								      }

								      case RGP_UNACK_POISON   :

								      {

								         /* If our node is in RGP_PRUNING stage and have been pruned out,

								          * the poison packet probably implies that the sender has gone

								          * into the next stage and declared us down. In this case, use

								          * the more appropriate RGP_PRUNED_OUT halt code. Otherwise,

								          * use the poison packet halt code. In either case, we must halt.

								          */

								          if ( (rgp->rgppkt.stage == RGP_PRUNING) &&

								               !ClusterMember(rgp->rgppkt.pruning_result, rgp->mynode) )

								              RGP_ERROR(RGP_PRUNED_OUT);

								          else

								          {

								              if (rgp->rgppkt.stage == RGP_COLDLOADED)

								                  {

								                      RGP_ERROR(RGP_RELOADFAILED);

								                      return;

								                  }

								                  else

								                      RGP_ERROR((uint16) (RGP_PARIAH + EXT_NODE(node)));

								          }

								          break;

								      }

								      default                   :

								      {

								         /* Ignore the unknown packet type. */

								         break;

								      }

								   }

								}

								/*---------------------------------------------------------------------------*/


								#ifdef __cplusplus

								}

								#endif /* __cplusplus */


								#if 0


								History of changes to this file:

								-------------------------------------------------------------------------

								1995, December 13                                           F40:KSK0610          /*F40:KSK06102.6*/


								This file is part of the portable Regroup Module used in the NonStop

								Kernel (NSK) and Loosely Coupled UNIX (LCU) operating systems. There

								are 10 files in the module - jrgp.h, jrgpos.h, wrgp.h, wrgpos.h,

								srgpif.c, srgpos.c, srgpsm.c, srgputl.c, srgpcli.c and srgpsvr.c.

								The last two are simulation files to test the Regroup Module on a

								UNIX workstation in user mode with processes simulating processor nodes

								and UDP datagrams used to send unacknowledged datagrams.


								This file was first submitted for release into NSK on 12/13/95.

								------------------------------------------------------------------------------

								This change occurred on 19 Jan 1996                                              /*F40:MB06458.1*/

								Changes for phase IV Sierra message system release. Includes:                    /*F40:MB06458.2*/

								 - Some cleanup of the code                                                      /*F40:MB06458.3*/

								 - Increment KCCB counters to count the number of setup messages and             /*F40:MB06458.4*/

								   unsequenced messages sent.                                                    /*F40:MB06458.5*/

								 - Fixed some bugs                                                               /*F40:MB06458.6*/

								 - Disable interrupts before allocating broadcast sibs.                          /*F40:MB06458.7*/

								 - Change per-packet-timeout to 5ms                                              /*F40:MB06458.8*/

								 - Make the regroup and powerfail broadcast use highest priority                 /*F40:MB06458.9*/

								   tnet services queue.                                                          /*F40:MB06458.10*/

								 - Call the millicode backdoor to get the processor status from SP               /*F40:MB06458.11*/

								 - Fixed expand bug in msg_listen_ and msg_readctrl_                             /*F40:MB06458.12*/

								 - Added enhancement to msngr_sendmsg_ so that clients do not need               /*F40:MB06458.13*/

								   to be unstoppable before calling this routine.                                /*F40:MB06458.14*/

								 - Added new steps in the build file called                                      /*F40:MB06458.15*/

								   MSGSYS_C - compiles all the message system C files                            /*F40:MB06458.16*/

								   MSDRIVER - compiles all the MSDriver files                                    /*F40:MB06458.17*/

								   REGROUP  - compiles all the regroup files                                     /*F40:MB06458.18*/

								 - remove #pragma env libspace because we set it as a command line               /*F40:MB06458.19*/

								   parameter.                                                                    /*F40:MB06458.20*/

								-----------------------------------------------------------------------          /*F40:MB06458.21*/


								#endif    /* 0 - change descriptions */