#ifndef _JRGPOS_H_ #define _JRGPOS_H_ #ifdef __TANDEM #pragma columns 79 #pragma page "jrgpos.h - T9050 - OS-specific declarations for Regroup Module" #endif /* @@@@@@ START COPYRIGHT @@@@@@ ** Tandem Confidential: Need to Know only ** Copyright (c) 1995, Tandem Computers Incorporated ** Protected as an unpublished work. ** All Rights Reserved. ** ** The computer program listings, specifications, and documentation ** herein are the property of Tandem Computers Incorporated and shall ** not be reproduced, copied, disclosed, or used in whole or in part ** for any reason without the prior express written permission of ** Tandem Computers Incorporated. ** ** @@@@@@ END COPYRIGHT @@@@@@ **/ /*--------------------------------------------------------------------------- * This file (jrgpos.h) contains OS-specific declarations used by Regroup. * Use appropriate #includes to pull in declarations from other native * OS files. *---------------------------------------------------------------------------*/ #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ #ifdef NSK #include /* to get "uint8", "uint16" and "uint32" */ #include #include #include #define RGP_NULL_PTR NIL_ /* Null pointer for use by RGP */ #define MAX_CLUSTER_SIZE MAX_CPUS /* max # of nodes supported in system */ #define LOWEST_NODENUM ((node_t)0) /* starting node number */ #define RGP_NULL_NODE ((node_t)-1) /* special node# for defaults */ #define RGP_KEY_NODE RGP_NULL_NODE /* No node is special */ #endif /* NSK */ #if defined(LCU) || defined(UNIX) || defined(NT) typedef unsigned char uint8; typedef unsigned short uint16; typedef unsigned int uint32; #ifndef NULL #define NULL ((void *)0) #endif /* NULL */ #define RGP_NULL_PTR NULL /* Null pointer for use by RGP */ #if defined(LCU) || defined(UNIX) #define MAX_CLUSTER_SIZE 16 /* max # of nodes supported in system */ #define LOWEST_NODENUM ((node_t)1) /* starting node number */ #endif #if defined(NT) #include "service.h" #define MAX_CLUSTER_SIZE ClusterDefaultMaxNodes /* max # of nodes supported in system */ #define LOWEST_NODENUM ((node_t)ClusterMinNodeId) /* starting node number */ #endif #define RGP_NULL_NODE ((node_t)-1) /* special node# for defaults */ #define RGP_KEY_NODE RGP_NULL_NODE /* No node is special */ #define _priv /* used only by NSK compilers */ #define _resident /* used only by NSK compilers */ #endif /* LCU || UNIX || NT */ /*--------------------------------------------------------------- * Unacknowledged packet subtypes used by regroup. * These are made visible to the OS for reporting and counting * purposes only. *---------------------------------------------------------------*/ /* Maximum payload of packets sent by Regroup is 56 bytes. * This allows a maximum transport overhead of 8 bytes in the * ServerNet interrupt packet which has a size of 64 bytes. */ #define RGP_UNACK_PKTLEN 56 /*bytes*/ typedef struct { uint8 pktsubtype; uint8 subtype_specific[RGP_UNACK_PKTLEN - sizeof(uint8)]; } rgp_unseq_pkt_t; /* Regroup unacknowledged packet subtypes */ #define RGP_UNACK_IAMALIVE (uint8) 1 /* I am alive packet */ #define RGP_UNACK_REGROUP (uint8) 2 /* regroup status packet */ #define RGP_UNACK_POISON (uint8) 3 /* poison packet */ /*---------------------------------------------------------------*/ /* Processor status codes returned by the Service Processor (SP) */ /*---------------------------------------------------------------*/ #define RGP_NODE_UNREACHABLE 0 #define RGP_NODE_TIMED_OUT 1 #define RGP_NODE_FROZEN 2 #define RGP_NODE_HALTED 3 #define RGP_NODE_OPERATIONAL 4 /*----------------------*/ /* Processor halt codes */ /*----------------------*/ #ifdef NSK #include #endif /* NSK */ #if defined(LCU) || defined(UNIX) || defined(NT) #define RGP_RELOADFAILED 1 #define RGP_INTERNAL_ERROR 2 #define RGP_MISSED_POLL_TO_SELF 3 #define RGP_AVOID_SPLIT_BRAIN 4 #define RGP_PRUNED_OUT 5 #define RGP_PARIAH 6 #define RGP_PARIAH_FIRST RGP_PARIAH + LOWEST_NODENUM #define RGP_PARIAH_LAST RGP_PARIAH_FIRST + MAX_CLUSTER_SIZE - 1 #define RGP_ARBITRATION_FAILED 1000 #define RGP_ARBITRATION_STALLED 1001 #define RGP_SHUTDOWN_DURING_RGP 1002 // Alias of MM_STOP_REQUESTED in mmapi.h #endif /* LCU || UNIX || NT */ /*------------------------------------------------------------------------- * Timing parameters for Regroup. Of these, RGP_IAMALIVE_TICKS can be * overridden at run time using the rgp_getrgpinfo(), rgp_setrgpinfo() * routine pair. This is useful to slow node failure detection during * kernel debug sessions. *-------------------------------------------------------------------------*/ #ifdef NSK #define RGP_CLOCK_PERIOD 30 /* period between regroup ticks, * in units of 10 milliseconds */ #define RGP_PFAIL_TICKS 16 /* # of regroup ticks after a * power on event to forgive * missing self IamAlives */ #endif /* NSK */ #ifdef LCU #define RGP_CLOCK_PERIOD 30 /* period between regroup ticks, * in units of 10 milliseconds */ #define RGP_PFAIL_TICKS 16 /* # of regroup ticks after a * power on event to forgive * missing self IamAlives */ #endif /* LCU */ #ifdef UNIX #define RGP_CLOCK_PERIOD 100 /* period between regroup ticks, * in units of 10 milliseconds */ #define RGP_PFAIL_TICKS 16 /* # of regroup ticks after a * power on event to forgive * missing self IamAlives */ #endif /* UNIX */ #ifdef NT #define RGP_INACTIVE_PERIOD 60000 /* period between regroup ticks in ms when * node is inactive. 1 minute period */ #define RGP_CLOCK_PERIOD 300 /* period between regroup ticks, * in units of milliseconds */ #define RGP_PFAIL_TICKS 16 /* # of regroup ticks after a * power on event to forgive * missing self IamAlives */ #endif /* NT */ /* The following timing parameters can be overridden at run time by using * the rgp_getrgpinfo(), rgp_setrgpinfo() routine pair. */ // Bug#328641 // // Extend min_stage1 to approx 4 seconds to match NM values (check_ticks 2=>3) // Extend connectivity ticks to 3 to 9 ticks // adjust rgp_must_restart accordingly (20=>23) half conn_tick increase // #define RGP_IAMALIVE_TICKS 4 /* rgp clock ticks between IamAlives */ #define RGP_CHECK_TICKS 3 /* rgp clock ticks before at least 1 ImAlive received */ #define RGP_MIN_STAGE1_TICKS (RGP_IAMALIVE_TICKS * RGP_CHECK_TICKS) /* The following parameters can be made to be OS-dependent if needed. */ #define RGP_MUST_ENTER_STAGE2 32 /* must enter stage2 after this many ticks, regardless of conditions */ #define RGP_CONNECTIVITY_TICKS 9 /* max # of ticks to wait in stage 2 to collect connectivity info */ #define RGP_MUST_RESTART 23 /* stall detector tick count; if no progress after this many ticks, abort and restart regroup. */ #define RGP_RESTART_MAX 3 /* maximum number of restarts allowed per regroup incident; if this is exceeded, the node halts. */ /*--------------------------------------*/ /* Definition of node and cluster types */ /*--------------------------------------*/ typedef short node_t; /* The cluster_t data type is a bit array with MAX_CLUSTER_SIZE * bits. It is implemented as an array of MAX_CLUSTER_SIZE/8 * (rounded up) uint8s. */ #define BYTEL 8 /* number of bits in a uint8 */ #define BYTES_IN_CLUSTER ((MAX_CLUSTER_SIZE + BYTEL - 1) / BYTEL) typedef uint8 cluster_t [BYTES_IN_CLUSTER]; /************************************************************************ * rgp_msgsys_t (shared by regroup and message system) * --------------------------------------------------- * This structure is used by Regroup and the Message System to co-ordinate * actions that are to be done by the Message System on behalf of Regroup. * Regroup posts work requests in timer or IPC interrupt context and the * message system performs these at appropriate times (from the * dispatcher in NSK). * * ___________________________________________________________ * wd0 | flags (bitfields) | regroup_nodes | * |_____________________________|_____________________________| * wd1 | iamalive_nodes | poison_nodes | * |_____________________________|_____________________________| * wd2 | *regroup_data | * |___________________________________________________________| * wd3 | *iamalive_data | * |___________________________________________________________| * wd4 | *poison_data | * |___________________________________________________________| * wd5 | regroup_datalen | * |___________________________________________________________| * wd6 | iamalive_datalen | * |___________________________________________________________| * wd7 | poison_datalen | * |___________________________________________________________| * * * flags: * * sendrgppkts - have regroup status packets to send * sendiamalives - have iamalive status packets to send * sendpoisons - have poison packets to send * phase1_cleanup - need to start phase1 cleanup due to node death * phase2_cleanup - need to start phase2 cleanup due to node death * * regroup_nodes - mask of nodes to send regroup pkts to * iamalive_nodes - mask of nodes to send iamalives to * poison_nodes - mask of nodes to send poison pkts to * * The following fields are used in NSK and the user-level UNIX * simulation only. * * regroup_data - address of regroup pkt data to send * iamalive_data - address of iamalive data to send * poison_data - address of poison pkt data to send * * regroup_datalen - length of regroup pkt data to send * iamalive_datalen - length of iamalive data to send * poison_datalen - length of poison pkt data to send * */ #ifdef __TANDEM #pragma fieldalign shared8 rgp_msgsys #endif /* __TANDEM */ typedef struct rgp_msgsys { uint16 sendrgppkts : 1; uint16 sendiamalives : 1; uint16 sendpoisons : 1; uint16 phase1_cleanup : 1; uint16 phase2_cleanup : 1; uint16 filler : 11; cluster_t regroup_nodes; cluster_t iamalive_nodes; cluster_t poison_nodes; #if defined(NSK) || defined(UNIX) || defined(NT) void *regroup_data; void *iamalive_data; void *poison_data; uint32 regroup_datalen; uint32 iamalive_datalen; uint32 poison_datalen; #endif /* NSK || UNIX || NT */ } rgp_msgsys_t; typedef struct rgp_msgsys *rgp_msgsys_p; /*---------------------------------------------------------------------- * OS-dependent routines used by Regroup. * * These are defined either in the regroup file srgpos.c or in other * modules in the OS. *----------------------------------------------------------------------*/ _priv _resident extern void rgp_init_OS(void); _priv _resident extern void rgp_broadcast(uint8 packet_subtype); _priv _resident extern void rgp_node_failed(node_t node); _priv _resident extern void rgp_start_phase1_cleanup(void); _priv _resident extern void rgp_start_phase2_cleanup(void); _priv _resident extern void rgp_cleanup_complete(void); _priv _resident extern void rgp_had_power_failure(node_t node); _priv _resident extern int rgp_status_of_node(node_t node); _priv _resident extern void rgp_newnode_online(node_t newnode); _priv _resident extern int rgp_select_cluster(cluster_t cluster_choices[], int num_clusters); _priv _resident extern int rgp_select_cluster_ex(cluster_t cluster_choices[], int num_clusters, node_t keynode); _priv _resident extern void rgp_cleanup_OS(void); #ifdef NSK #include /*F40:MB06452.1*/ #include /*F40:MB06452.2*/ #include /*F40:MB06452.3*/ #include /*F40:MB06452.4*/ /*F40:MB06452.5*/ #define rgp_hold_all_io TSER_TRANSFER_PAUSE_ /*F40:MB064514.1*/ #define rgp_resume_all_io TSER_TRANSFER_CONTINUE_ /*F40:MB064514.2*/ /*F40:MB06452.8*/ #else _priv _resident extern void rgp_hold_all_io(void); _priv _resident extern void rgp_resume_all_io(void); #endif /* NSK */ /* * Macros to set and get the members of a cluster using a mask of * appropriate size. */ #define SetCluster(/* cluster_t */ cluster, /* uint16 */ mask) \ { \ cluster[0] = (uint8)(mask >> 8); \ cluster[1] = (uint8)(mask & 0xFF); \ } #define GetCluster(/* cluster_t */ cluster) \ (((uint16)cluster[0] << 8) | (uint16)cluster[1]) /* Macro to combine two cluster masks into a uint32. * This is used in tracing regroup events. */ #define RGP_MERGE_TO_32( c1, c2 ) \ ( ( GetCluster( c1 ) << 16 ) | ( GetCluster( c2 ) ) ) /*---------------------------------------------------------------------- * OS-dependent routines used by the Regroup module. * * These are defined in srgpos.c. *----------------------------------------------------------------------*/ /* Routines to halt the node upon catastrophic errors. */ #ifdef NSK #include /*F40:MB06458.3*/ #define RGP_ERROR(/* uint16 */ halt_code) SYSTEM_FREEZE_(halt_code) #else _priv _resident extern void RGP_ERROR_EX (uint16 halt_code, char* fname, DWORD lineno); #define RGP_ERROR(halt_code) RGP_ERROR_EX(halt_code, __FILE__, __LINE__) #endif /* NSK */ #ifdef UNIX _priv _resident extern void PrintRegroupStart(); _priv _resident extern void PrintPruningResult(); _priv _resident extern void PrintStage(); _priv _resident extern void PrintMatrix(); #endif /* UNIX */ #ifdef __cplusplus } #endif /* __cplusplus */ #if 0 History of changes to this file: ------------------------------------------------------------------------- 1995, December 13 F40:KSK0610 /*F40:KSK06102.1*/ This file is part of the portable Regroup Module used in the NonStop Kernel (NSK) and Loosely Coupled UNIX (LCU) operating systems. There are 10 files in the module - jrgp.h, jrgpos.h, wrgp.h, wrgpos.h, srgpif.c, srgpos.c, srgpsm.c, srgputl.c, srgpcli.c and srgpsvr.c. The last two are simulation files to test the Regroup Module on a UNIX workstation in user mode with processes simulating processor nodes and UDP datagrams used to send unacknowledged datagrams. This file was first submitted for release into NSK on 12/13/95. ------------------------------------------------------------------------------ This change occurred on 19 Jan 1996 /*F40:MB06458.4*/ Changes for phase IV Sierra message system release. Includes: /*F40:MB06458.5*/ - Some cleanup of the code /*F40:MB06458.6*/ - Increment KCCB counters to count the number of setup messages and /*F40:MB06458.7*/ unsequenced messages sent. /*F40:MB06458.8*/ - Fixed some bugs /*F40:MB06458.9*/ - Disable interrupts before allocating broadcast sibs. /*F40:MB06458.10*/ - Change per-packet-timeout to 5ms /*F40:MB06458.11*/ - Make the regroup and powerfail broadcast use highest priority /*F40:MB06458.12*/ tnet services queue. /*F40:MB06458.13*/ - Call the millicode backdoor to get the processor status from SP /*F40:MB06458.14*/ - Fixed expand bug in msg_listen_ and msg_readctrl_ /*F40:MB06458.15*/ - Added enhancement to msngr_sendmsg_ so that clients do not need /*F40:MB06458.16*/ to be unstoppable before calling this routine. /*F40:MB06458.17*/ - Added new steps in the build file called /*F40:MB06458.18*/ MSGSYS_C - compiles all the message system C files /*F40:MB06458.19*/ MSDRIVER - compiles all the MSDriver files /*F40:MB06458.20*/ REGROUP - compiles all the regroup files /*F40:MB06458.21*/ ----------------------------------------------------------------------- /*F40:MB06458.22*/ #endif /* 0 - change descriptions */ #endif /* _JRGPOS_H_ defined */