Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2538 lines
91 KiB

  1. #ifdef __TANDEM
  2. #pragma columns 79
  3. #pragma page "srgpsm.c - T9050 - Regroup Module state machine routines"
  4. #endif
  5. /* @@@ START COPYRIGHT @@@
  6. ** Tandem Confidential: Need to Know only
  7. ** Copyright (c) 1995, Tandem Computers Incorporated
  8. ** Protected as an unpublished work.
  9. ** All Rights Reserved.
  10. **
  11. ** The computer program listings, specifications, and documentation
  12. ** herein are the property of Tandem Computers Incorporated and shall
  13. ** not be reproduced, copied, disclosed, or used in whole or in part
  14. ** for any reason without the prior express written permission of
  15. ** Tandem Computers Incorporated.
  16. **
  17. ** @@@ END COPYRIGHT @@@
  18. **/
  19. /*---------------------------------------------------------------------------
  20. * This file (srgpsm.c) contains regroup state machine routines.
  21. *---------------------------------------------------------------------------*/
  22. #ifdef __cplusplus
  23. extern "C" {
  24. #endif /* __cplusplus */
  25. #include <wrgp.h>
  26. /*---------- arbitration algorithm ------------ */
  27. DWORD MmQuorumArbitrationTimeout = 60; // seconds
  28. DWORD MmQuorumArbitrationEqualizer = 7; // seconds
  29. #define RGP_ARBITRATION_TIMEOUT ((MmQuorumArbitrationTimeout * 100)/30) // tick == 300ms
  30. #define AVERAGE_ARBITRATION_TIME_IN_SECONDS (MmQuorumArbitrationEqualizer)
  31. void enter_first_cleanup_stage();
  32. void regroup_restart();
  33. int ClusterEmpty(cluster_t c);
  34. DWORD
  35. DiskArbitrationThread(
  36. IN LPVOID param
  37. ) ;
  38. _priv _resident static int
  39. regroup_test_arbitrate_advance()
  40. {
  41. cluster_t temp;
  42. int orig_numnodes = ClusterNumMembers(rgp->rgpinfo.cluster);
  43. int current_numnodes = ClusterNumMembers(rgp->rgppkt.pruning_result);
  44. if( orig_numnodes == current_numnodes ) {
  45. return 1;
  46. }
  47. //
  48. // If somebody entered stage4 then our group owns the quorum
  49. //
  50. ClusterIntersection(
  51. temp,
  52. rgp->rgppkt.knownstage4,
  53. rgp->rgppkt.pruning_result
  54. );
  55. return ClusterNumMembers(temp) != 0;
  56. }
  57. _priv _resident static int
  58. regroup_start_arbitrate()
  59. {
  60. int orig_numnodes = ClusterNumMembers(rgp->rgpinfo.cluster);
  61. int current_numnodes = ClusterNumMembers(rgp->rgppkt.pruning_result);
  62. if( orig_numnodes == current_numnodes ) {
  63. enter_first_cleanup_stage();
  64. return 0; // No Arbitration needed. Proceed to clean up stage //
  65. }
  66. else {
  67. cluster_t arbitrators;
  68. int n_arbitrators;
  69. node_t arbitrator;
  70. HANDLE thread;
  71. DWORD threadId;
  72. ULONG epoch;
  73. RGP_LOCK;
  74. epoch = rgp->OS_specific_control.EventEpoch;
  75. if(rgp->arbitration_started) {
  76. RGP_UNLOCK;
  77. return 1; // stay in this stage for awhile
  78. }
  79. rgp->arbitration_ticks = 0;
  80. rgp->arbitration_started = 1;
  81. RGP_UNLOCK;
  82. ClusterIntersection(
  83. arbitrators,
  84. rgp->rgppkt.pruning_result,
  85. rgp->rgppkt.quorumowner
  86. );
  87. n_arbitrators = ClusterNumMembers(arbitrators);
  88. if(n_arbitrators == 0) {
  89. //
  90. // If there are no quorum owners in this group //
  91. // Let's take the guy with the lowest id //
  92. //
  93. arbitrator = rgp_select_tiebreaker(rgp->rgppkt.pruning_result);
  94. } else {
  95. //
  96. // Otherwise we will take the quorum owner guy
  97. // with the lowest id
  98. //
  99. arbitrator = rgp_select_tiebreaker(arbitrators);
  100. if(n_arbitrators > 1) {
  101. RGP_TRACE( "RGP !!! More than one quorum owner",
  102. EXT_NODE(arbitrator), /* TRACE */
  103. GetCluster( rgp->rgpinfo.cluster ), /* TRACE */
  104. GetCluster( rgp->rgppkt.pruning_result ),/* TRACE */
  105. GetCluster( rgp->rgppkt.knownstage2 ) ); /* TRACE */
  106. // Do we need to kill all other arbitrators?
  107. // No.
  108. // ClusterDelete(arbitrators, arbitrator);
  109. // ClusterUnion(
  110. // rgp->poison_targets,
  111. // rgp->poison_targets,
  112. // arbitrators
  113. // );
  114. // rgp_broadcast(RGP_UNACK_POISON);
  115. }
  116. }
  117. rgp->tiebreaker = arbitrator;
  118. //
  119. // Now we have an arbitrating node
  120. // We will run a thread that will run arbitration algorithm
  121. //
  122. RGP_TRACE( "RGP Arbitration Delegated to",
  123. EXT_NODE(arbitrator), /* TRACE */
  124. GetCluster( rgp->rgpinfo.cluster ), /* TRACE */
  125. GetCluster( rgp->rgppkt.pruning_result ), /* TRACE */
  126. GetCluster( rgp->rgppkt.knownstage2 ) ); /* TRACE */
  127. rgp->OS_specific_control.ArbitratingNode = (DWORD)EXT_NODE(arbitrator);
  128. if(arbitrator != rgp->mynode) {
  129. return 1;
  130. }
  131. thread = CreateThread( NULL, // security attributes
  132. 0, // stack_size = default
  133. DiskArbitrationThread,
  134. ULongToPtr(epoch),
  135. 0, // runs immediately
  136. &threadId );
  137. if(thread == NULL) {
  138. //
  139. // Force Others to regroup //
  140. //
  141. RGP_LOCK;
  142. rgp_event_handler( RGP_EVT_BANISH_NODE, EXT_NODE(rgp->mynode) );
  143. RGP_UNLOCK;
  144. //
  145. // Kill this node
  146. //
  147. RGP_ERROR(RGP_ARBITRATION_FAILED);
  148. return FALSE;
  149. }
  150. CloseHandle(thread);
  151. }
  152. return TRUE;
  153. }
  154. DWORD
  155. DiskArbitrationThread(
  156. IN LPVOID param
  157. )
  158. {
  159. cluster_t current_participants;
  160. DWORD status;
  161. int participant_count;
  162. int delay;
  163. ULONG_PTR startingEpoch = (ULONG_PTR) param;
  164. BOOL EpochsEqual;
  165. int orig_numnodes;
  166. int current_numnodes;
  167. LONGLONG Time1, Time2;
  168. ClusterCopy(current_participants, rgp->rgppkt.pruning_result);
  169. orig_numnodes = ClusterNumMembers(rgp->rgpinfo.cluster);
  170. current_numnodes = ClusterNumMembers(current_participants);
  171. RGP_LOCK;
  172. EpochsEqual = ( startingEpoch == rgp->OS_specific_control.EventEpoch );
  173. RGP_UNLOCK;
  174. if(!EpochsEqual)
  175. return 0;
  176. delay = (orig_numnodes+1)/2 - current_numnodes;
  177. if(delay < 0) delay = 0;
  178. Sleep(delay * 6000);
  179. RGP_LOCK;
  180. EpochsEqual = ( startingEpoch == rgp->OS_specific_control.EventEpoch );
  181. if (EpochsEqual) {
  182. rgp->OS_specific_control.ArbitrationInProgress += 1;
  183. }
  184. RGP_UNLOCK;
  185. if(!EpochsEqual)
  186. return 0;
  187. GetSystemTimeAsFileTime((LPFILETIME)&Time1);
  188. status = (*(rgp->OS_specific_control.QuorumCallback))();
  189. GetSystemTimeAsFileTime((LPFILETIME)&Time2);
  190. if (status != 0
  191. && startingEpoch == rgp->OS_specific_control.EventEpoch)
  192. {
  193. // If we won the arbitration and we are in the same epoch (approx check)
  194. // we need to figure out whether we need to slow down a little
  195. Time2 -= Time1;
  196. // Convert to seconds
  197. Time2 = Time2 / 10 / 1000 / 1000;
  198. //
  199. // [HACKHACK] GorN Oct/30/1999
  200. // We had a weird timejump in the middle of the arbitration
  201. // Arbitration was completed before it started, we slept for
  202. // too long and regroup timed us out. Let's guard against it.
  203. //
  204. if ( (Time2 >= 0)
  205. && (Time2 < AVERAGE_ARBITRATION_TIME_IN_SECONDS) )
  206. {
  207. //
  208. // Don't need to be better than the average
  209. // If we are so fast, let's slow down
  210. //
  211. Time2 = AVERAGE_ARBITRATION_TIME_IN_SECONDS - Time2;
  212. RGP_TRACE( "RGP sleeping",
  213. (ULONG)Time2, /* TRACE */
  214. 0, /* TRACE */
  215. 0, /* TRACE */
  216. 0 ); /* TRACE */
  217. Sleep( (ULONG)(Time2 * 1000) );
  218. }
  219. }
  220. RGP_LOCK;
  221. rgp->OS_specific_control.ArbitrationInProgress -= 1;
  222. EpochsEqual = ( startingEpoch == rgp->OS_specific_control.EventEpoch );
  223. if(!EpochsEqual) {
  224. RGP_UNLOCK;
  225. return 0;
  226. }
  227. if(status) {
  228. //
  229. // We own the quorum device
  230. // Let's proceed to the next stage
  231. //
  232. enter_first_cleanup_stage();
  233. RGP_UNLOCK;
  234. //
  235. // All the rest will see that we are in cleanup stage and
  236. // will proceed to it too
  237. //
  238. } else {
  239. //
  240. // Force Others to regroup //
  241. //
  242. rgp_event_handler( RGP_EVT_BANISH_NODE, EXT_NODE(rgp->mynode) );
  243. RGP_UNLOCK;
  244. //
  245. // Kill this node
  246. //
  247. RGP_ERROR(RGP_ARBITRATION_FAILED);
  248. }
  249. return 0;
  250. }
  251. /************************************************************************
  252. * rgp_check_packet
  253. * rgp_print_packet
  254. * =================
  255. *
  256. * Description:
  257. *
  258. * Forward declarations of functions used in rgp_sanity_check macro
  259. *
  260. ************************************************************************/
  261. void rgp_print_packet(rgp_pkt_t* pkt, char* label, int code);
  262. int rgp_check_packet(rgp_pkt_t* pkt);
  263. /************************************************************************
  264. * rgp_sanity_check
  265. * =================
  266. *
  267. * Description:
  268. *
  269. * This macro prints RGP packet if it has unreasonable values in
  270. * powerfail, knownstages, pruning_result, and connectivity_matrix fields.
  271. *
  272. * Parameters:
  273. *
  274. * rgp_pkt_t* pkt -
  275. * packet to be checked
  276. * char* label -
  277. * label that will be printed together with a packet
  278. *
  279. * Returns:
  280. *
  281. * VOID
  282. *
  283. ************************************************************************/
  284. #define rgp_sanity_check(__pkt,__label) \
  285. do { \
  286. int __code; __code = rgp_check_packet(__pkt); \
  287. if( __code ) {rgp_print_packet(__pkt, __label, __code);} \
  288. } while ( 0 )
  289. /*---------------------------------------------------------------------------*/
  290. /************************************************************************
  291. * split_brain_avoidance_algorithm
  292. * ===============================
  293. *
  294. * Description:
  295. *
  296. * This algorithm ensures that, after a regroup incident completes,
  297. * at most one group of nodes will survive regardless of connectivity
  298. * failures.
  299. *
  300. * Parameters:
  301. *
  302. * None
  303. *
  304. * Returns:
  305. *
  306. * void - no return value; The algorithm results in either this node
  307. * halting (with the RGP_AVOID_SPLIT_BRAIN halt code) or this group
  308. * being the only group that survives.
  309. *
  310. * Algorithm:
  311. *
  312. * The algorithm is described in detail in the Sierra Tech Memo S.84,
  313. * "Modifications in Regroup Algorithm for Sierra".
  314. *
  315. * The algorithm looks at the set of nodes currently visible from the
  316. * local cluster and compares it to the set of nodes alive before
  317. * the regroup incident started (outerscreen). The decision to survive
  318. * or halt depends on the number of nodes in the current group compared
  319. * to the number of nodes in the original group.
  320. *
  321. * Case 1:
  322. * If the current group contains > half the original number, this
  323. * group survives.
  324. *
  325. * Case 2:
  326. * If the current group contains < half the original number, this
  327. * node (and group) halts.
  328. *
  329. * Case 3:
  330. * If the current group contains exactly half the original number AND
  331. * the current group has at least two members, then this group
  332. * survives if and only if it contains the tie-breaker node (selected
  333. * when the cluster is formed and after each regroup incident).
  334. *
  335. * Case 4:
  336. * If the current group contains exactly half the original number AND
  337. * the current group has exactly one member, then we will call the
  338. * QuromSelect procedure to check if the Quorum Disk is accessible
  339. * from this node. If the procedure returns value TRUE we survive;
  340. * else we halt.
  341. *
  342. *
  343. ************************************************************************/
  344. _priv _resident static void
  345. split_brain_avoidance_algorithm()
  346. {
  347. int orig_numnodes, current_numnodes;
  348. RGP_TRACE( "RGP SpltBrainAlg",
  349. EXT_NODE(rgp->tiebreaker), /* TRACE */
  350. GetCluster( rgp->rgpinfo.cluster ), /* TRACE */
  351. GetCluster( rgp->outerscreen ), /* TRACE */
  352. GetCluster( rgp->rgppkt.knownstage2 ) ); /* TRACE */
  353. /* Sanity checks:
  354. * 1. The current set of nodes must be a subset of the original set
  355. * of nodes.
  356. * 2. My node must be in the current set. This was checked
  357. * when stage2 was entered. No need to check again.
  358. */
  359. if (!ClusterSubsetOf(rgp->rgpinfo.cluster, rgp->rgppkt.knownstage2))
  360. RGP_ERROR(RGP_INTERNAL_ERROR);
  361. orig_numnodes = ClusterNumMembers(rgp->rgpinfo.cluster);
  362. current_numnodes = ClusterNumMembers(rgp->rgppkt.knownstage2);
  363. if (orig_numnodes == current_numnodes)
  364. /* All nodes are alive. No split brain possibility. */
  365. return;
  366. else if (orig_numnodes == 2) /* Special 2-node case */
  367. {
  368. if ((*(rgp->OS_specific_control.QuorumCallback))())
  369. return; /* we have access to Quorum disk. We survive. */
  370. else {
  371. #if defined( NT )
  372. ClusnetHalt( NmClusnetHandle );
  373. #endif
  374. RGP_ERROR(RGP_AVOID_SPLIT_BRAIN);
  375. }
  376. } /* Special 2-node case */
  377. else /* Multi (>2) node case */
  378. {
  379. if ((current_numnodes << 1) > orig_numnodes)
  380. /* Our group has more than half the nodes => we are the majority.
  381. * We can survive. Other group(s) will kill themselves.
  382. */
  383. return;
  384. else if ((current_numnodes << 1) < orig_numnodes)
  385. /* Our group has less than half the nodes => there may be a
  386. * larger group alive. We must halt and allow that group to
  387. * survive.
  388. */
  389. RGP_ERROR(RGP_AVOID_SPLIT_BRAIN);
  390. else
  391. {
  392. /* Our group has exactly half the number of processors;
  393. * We survive if we contain the tie-breaker node and halt otherwise.
  394. */
  395. if (ClusterMember(rgp->rgppkt.knownstage2, rgp->tiebreaker))
  396. return;
  397. else
  398. RGP_ERROR(RGP_AVOID_SPLIT_BRAIN);
  399. }
  400. } /* Multi (>2) node case */
  401. }
  402. /************************************************************************
  403. * regroup_restart
  404. * ===============
  405. *
  406. * Description:
  407. *
  408. * Starts a new regroup incident.
  409. *
  410. * Parameters:
  411. *
  412. * None
  413. *
  414. * Returns:
  415. *
  416. * void - no return value
  417. *
  418. * Algorithm:
  419. *
  420. * Sets the regroup state to RGP_ACTIVATED, pauses all IO and
  421. * initializes the stage masks and connectivity matrix.
  422. *
  423. ************************************************************************/
  424. _priv _resident static void
  425. regroup_restart()
  426. {
  427. cluster_t old_ignorescreen;
  428. UnpackIgnoreScreen(&rgp->rgppkt, old_ignorescreen);
  429. RGP_TRACE( "RGP (re)starting",
  430. rgp->rgppkt.seqno, /* TRACE */
  431. rgp->rgppkt.reason, /* TRACE */
  432. rgp->rgppkt.activatingnode, /* TRACE */
  433. rgp->rgppkt.causingnode ); /* TRACE */
  434. RGP_TRACE( "RGP masks ",
  435. RGP_MERGE_TO_32( rgp->outerscreen, /* TRACE */
  436. rgp->innerscreen ), /* TRACE */
  437. RGP_MERGE_TO_32( rgp->rgppkt.knownstage1, /* TRACE */
  438. rgp->rgppkt.knownstage2 ), /* TRACE */
  439. RGP_MERGE_TO_32( rgp->rgppkt.knownstage3, /* TRACE */
  440. rgp->rgppkt.knownstage4 ), /* TRACE */
  441. RGP_MERGE_TO_32( rgp->rgppkt.knownstage5, /* TRACE */
  442. rgp->rgppkt.pruning_result ) ); /* TRACE */
  443. /* We are about to start a new pass of the regroup algorithm.
  444. * This does not necessarily mean we have finished the previous
  445. * pass; i.e., in an abort situation we may be starting over.
  446. * This may occur when some other node fails during the current
  447. * pass through the algorithm leaving us hung up at one of the
  448. * intermediate stages.
  449. */
  450. //
  451. // GN. When we do MM_LEAVE. Our state is COLDLOADED.
  452. // Bailing out of regroup_restart here would prevent us from
  453. // forming a regroup packet that would initate a banishing regroup incident
  454. //
  455. /* To avoid split brained nodes from corrupting data in storage
  456. * devices, we request the transport subsystem to hold all IO requests
  457. * in a queue and not transfer them over SNet. We will allow IO to
  458. * be resumed when regroup can guarantee that there can no longer be
  459. * split brains. This will be done when the final group is determined
  460. * and regroup enters the RGP_PHASE1_CLEANUP stage.
  461. */
  462. rgp_hold_all_io();
  463. /* The following is a bit of history from the NSK regroup algorithm from
  464. * pre-Sierra systems based on the InterProcessor Bus (IPB). Some of
  465. * the particulars mentioned here have changed, but the principle remains.
  466. *
  467. * Previously, we used to mark all the known stages as zero, except for
  468. * stage1. We used to mark only ourselves as in stage1. So, even if our
  469. * bus reception logic is screwed up, and we are not receiving packets
  470. * from anybody including ourselves, we would mark ourselves as being in
  471. * stage1. And after (what used to be) six ticks, we would proceed into
  472. * stage2 and mark ourselves as being in stage2. This would cause stage1
  473. * and stage2 to be equal, and our world would constitute just
  474. * ourselves. Thus we would go through regroup eliminating everybody
  475. * else. However, since we are not receiving packets from anybody else,
  476. * we would miss our own iamalive packets, and we too will soon die of
  477. * %4032. Thus the symptoms would constitute everybody else dying of
  478. * (%4040 + some node number), and that node dying with a %4032 halt.
  479. * See TPR S 88070112309628 for more details.
  480. *
  481. * To avoid this situation, we now do not mark ourselves as in a
  482. * particular stage until we get our own regroup packets indicating we
  483. * are in that stage. Thus, in regroup_restart, all the stages are
  484. * cleared. Previously, regroupbroadcaststatus in sendqueuedmessages
  485. * used to send directly from the regroup_control structures.
  486. * regroupbroadcaststatus has been modified to construct the unsequenced
  487. * packets on its stack. It would first copy the state from the
  488. * regroup_control structure, and then would LOR in our node into a known
  489. * stage, if requested to do so. When we receive that packet, we would
  490. * merge that information into our state, and thus we would be
  491. * guaranteed that our bus sending and reception logic is working, and
  492. * that we can legitimately mark ourselves as being in that stage. This
  493. * whole change avoids problems where bus sending logic works, but bus
  494. * reception logic is screwed up for both buses in a node.
  495. */
  496. rgp->sendstage = 0; /* Don't let anyone know I am in stage 1 until
  497. * I have seen a regroup clock tick; this is to
  498. * cause this node to halt if it is not getting
  499. * clock ticks. I will halt when the other nodes
  500. * advance without me and send me a status packet
  501. * indicating this or send me a poison packet
  502. * after declaring me down.
  503. */
  504. rgp->rgpcounter = 0;
  505. ClusterInit(rgp->rgppkt.knownstage1);
  506. ClusterInit(rgp->rgppkt.knownstage2);
  507. ClusterInit(rgp->rgppkt.knownstage3);
  508. ClusterInit(rgp->rgppkt.knownstage4);
  509. ClusterInit(rgp->rgppkt.knownstage5);
  510. ClusterInit(rgp->rgppkt.pruning_result);
  511. MatrixInit(rgp->rgppkt.connectivity_matrix);
  512. MatrixInit(rgp->internal_connectivity_matrix);
  513. /* Just for ease of debugging, to send in our poison packets, we keep
  514. * the known nodes mask at the start of regroup. poison packets contain
  515. * known nodes at the beginning of regroup and at the end of it.
  516. */
  517. ClusterCopy(rgp->initnodes, rgp->rgpinfo.cluster);
  518. ClusterInit(rgp->endnodes);
  519. #if defined( NT )
  520. //
  521. // increment the event epoch so we can detect stale events
  522. // from clusnet
  523. //
  524. ++rgp->OS_specific_control.EventEpoch;
  525. #endif
  526. if ( (rgp->rgppkt.stage >= RGP_CLOSING) &&
  527. (rgp->rgppkt.stage <= RGP_PHASE2_CLEANUP) &&
  528. ClusterCompare(rgp->rgppkt.knownstage1,
  529. rgp->rgppkt.knownstage2) )
  530. {
  531. //
  532. // If we were interrupted by this restart after we closed
  533. // 1st stage regroup window, then no nodes can be added to group w/o joining.
  534. //
  535. // Thus we will add missing nodes into our ignorescreen.
  536. // This will force the regroup not to wait for them in stage1
  537. cluster_t tmp;
  538. ClusterDifference(tmp, rgp->rgpinfo.cluster, rgp->innerscreen);
  539. ClusterUnion(rgp->ignorescreen, rgp->ignorescreen, tmp);
  540. }
  541. if ( ClusterMember(rgp->ignorescreen, rgp->mynode) ) {
  542. // We shouldn't have get here, but since we are here
  543. // Let's shield us from the outside world
  544. RGP_TRACE( "Self Isolation", 0, 0, 0, 0 );
  545. ClusterCopy(rgp->ignorescreen, rgp->rgpinfo.cluster);
  546. ClusterDelete(rgp->ignorescreen, rgp->mynode);
  547. }
  548. if ( !ClusterEmpty(rgp->ignorescreen) ) {
  549. // if we are ignoring somebody we have
  550. // to be cautious. I.e. we will stay longer in the
  551. // first stage to give a chance to everybody to learn about
  552. // our ignorescreen
  553. rgp->cautiousmode = 1;
  554. }
  555. if ( !ClusterCompare(old_ignorescreen, rgp->ignorescreen) ) {
  556. // Ignore screen is changed, reset restart counter //
  557. RGP_TRACE( "Ignorescreen->", GetCluster(old_ignorescreen), GetCluster(rgp->ignorescreen), 0, 0 );
  558. rgp->restartcount = 0;
  559. }
  560. PackIgnoreScreen(&rgp->rgppkt, rgp->ignorescreen);
  561. rgp->arbitration_started = 0;
  562. rgp->OS_specific_control.ArbitrationInProgress = 1;
  563. rgp->OS_specific_control.ArbitratingNode = MM_INVALID_NODE;
  564. if ( !rgp_is_perturbed() ) {
  565. ResetEvent( rgp->OS_specific_control.Stabilized );
  566. }
  567. ClusterInit(rgp->rgppkt.quorumowner);
  568. if( QuorumOwner == (DWORD)EXT_NODE(rgp->mynode) ) {
  569. ClusterInsert(rgp->rgppkt.quorumowner, rgp->mynode);
  570. }
  571. if (rgp->rgppkt.stage == RGP_COLDLOADED)
  572. {
  573. if (!rgp->OS_specific_control.ShuttingDown) {
  574. //
  575. // Currently, RGP_RELOADFAILED calls ExitProcess
  576. // During clean shutdown we would like to send the regroup packet
  577. // out triggering a regroup. So we don't want to die.
  578. //
  579. // Since we are not resetting state to RGP_ACTIVATED, this
  580. // node will not be able to participate in the regroup.
  581. //
  582. RGP_ERROR(RGP_RELOADFAILED);
  583. }
  584. } else {
  585. rgp->rgppkt.stage = RGP_ACTIVATED;
  586. }
  587. }
  588. /************************************************************************
  589. * regroup_test_stage2_advance
  590. * ===========================
  591. *
  592. * Description:
  593. *
  594. * Checks to see if we can advance to regroup stage 2.
  595. *
  596. * Parameters:
  597. *
  598. * None
  599. *
  600. * Returns:
  601. *
  602. * int - 1 if stage 2 can be entered and 0 if not.
  603. *
  604. * Algorithm:
  605. *
  606. * Stage 2 can be entered if one of the following conditions is true.
  607. *
  608. * (a) all nodes are present and accounted for and at least one
  609. * regroup clock tick has occurred
  610. * (b) we are not in cautious mode, all but one node are present
  611. * and accounted for, AND a minimum number of ticks
  612. * (rgp_quickdecisionlegit) have elapsed.
  613. * (c) if RGP_MUST_ENTER_STAGE2 ticks have elapsed.
  614. *
  615. ************************************************************************/
  616. _priv _resident static int
  617. regroup_test_stage2_advance()
  618. {
  619. cluster_t stragglers; /* set of nodes not yet checkd in */
  620. int num_stragglers; /* # of nodes not yet checkd in */
  621. /* Stage 2 must be entered after some interval regardless of any
  622. * other conditions.
  623. */
  624. if (rgp->rgpcounter == 0)
  625. return(0);
  626. if (rgp->rgpcounter >= RGP_MUST_ENTER_STAGE2)
  627. {
  628. RGP_TRACE( "RGP S->2cautious",
  629. rgp->rgpcounter, /* TRACE */
  630. rgp->cautiousmode, /* TRACE */
  631. GetCluster( rgp->outerscreen ), /* TRACE */
  632. GetCluster( rgp->rgppkt.knownstage1 ) ); /* TRACE */
  633. return(1);
  634. }
  635. /* The number of ticks is between 1 and RGP_MUST_ENTER_STAGE2.
  636. * We need to examine the stage1 mask to decide if we can
  637. * advance.
  638. *
  639. * If every node in the old configuration has checked in, I can
  640. * advance at once. This is either a false alarm or caused by
  641. * power failure or connectivity failures.
  642. */
  643. /* Compute the set of nodes from the original configuration not yet
  644. * recognized.
  645. */
  646. ClusterDifference(stragglers, rgp->outerscreen,
  647. rgp->rgppkt.knownstage1);
  648. //
  649. // We shouldn't wait for the nodes we are ignoring,
  650. // since we cannot get a packet from them anyway
  651. //
  652. ClusterDifference(stragglers, stragglers,
  653. rgp->ignorescreen);
  654. if ((num_stragglers = ClusterNumMembers(stragglers)) == 0)
  655. {
  656. RGP_TRACE( "RGP S->2 all in ",
  657. rgp->rgpcounter, /* TRACE */
  658. GetCluster( rgp->outerscreen ), 0, 0 ); /* TRACE */
  659. return(1); /* all present and accounted for */
  660. }
  661. /* If stragglers is non-empty, perhaps I can still advance to stage 2
  662. * if I am not in cautious mode (no recent power fail and not
  663. * aborting and rerunning the regroup algorithm) AND all nodes but
  664. * one have checked in AND some minimum number of ticks have elapsed.
  665. *
  666. * The minimum number of ticks is selected to be 1 greater than the
  667. * the LATEPOLL inititiation period (allowed consecutive missed IamAlive time)
  668. * since that should guarantee that, if the
  669. * cluster has broken off into multiple disconnected clusters,
  670. * the other clusters would have detected the missing IamAlives,
  671. * started regroup and paused IO, thus preventing the possibility
  672. * of data corruption caused by a split brain situation.
  673. */
  674. if (!(rgp->cautiousmode) &&
  675. (num_stragglers == 1) &&
  676. (rgp->rgpcounter > rgp->rgpinfo.Min_Stage1_ticks))
  677. {
  678. RGP_TRACE( "RGP S->2 1 miss ",
  679. rgp->rgpcounter, /* TRACE */
  680. GetCluster( rgp->outerscreen ), /* TRACE */
  681. GetCluster( rgp->rgppkt.knownstage1 ), 0 ); /* TRACE */
  682. return(1); /* advance - all but one checked in */
  683. }
  684. return(0); /* sorry cannot advance yet */
  685. }
  686. /************************************************************************
  687. * regroup_stage3_advance
  688. * ===========================
  689. *
  690. * Description:
  691. *
  692. * This function is called after the split brain avoidance algorithm
  693. * is run and the tie-breaker is selected in stage 2. It checks if
  694. * we can proceed to stage 3 (RGP_PRUNING) and advances to stage 3
  695. * if possible.
  696. *
  697. * Parameters:
  698. *
  699. * None
  700. *
  701. * Returns:
  702. *
  703. * int - 1 if the regroup stage has been advanced to RGP_PRUNING;
  704. * 0 if the stage cannot be advanced yet.
  705. *
  706. * Algorithm:
  707. *
  708. * The algorithm depends on whether we are the tie-breaker or not.
  709. *
  710. * On the tie-breaker node, we first check if there are any
  711. * disconnects in the cluster. If there aren't any, there is no need
  712. * for pruning. We can then set pruning_result to knownstage2,
  713. * advance to the RGP_PRUNING stage and return 1. If there are
  714. * disconnects, we must wait a certain number of ticks to collect
  715. * connectivity info from all nodes. If the number of ticks have not
  716. * passed, return 0. If the required number of ticks have elapsed,
  717. * we must call the pruning algorithm to get the list of potential
  718. * groups. After that, the select_cluster() routine is called to
  719. * pick one from the set of possible clusters. After this is done,
  720. * pruning_result is set to the selected cluster and we return 1.
  721. *
  722. * On a non-tiebreaker node, nothing is done till a stage3 packet is
  723. * received from the tie-breaker node or another node which got a
  724. * stage 3 packet. If a stage 3 packet has not been received, we
  725. * simply return 0. If a stage 3 packet is received, RGP_PRUNING
  726. * stage is entered and we return 1.
  727. *
  728. ************************************************************************/
  729. _priv _resident int
  730. regroup_stage3_advance()
  731. {
  732. int stage_advanced = 0, numgroups, groupnum;
  733. if (rgp->tiebreaker == rgp->mynode)
  734. {
  735. if (connectivity_complete(rgp->rgppkt.connectivity_matrix))
  736. {
  737. /* No disconnects. All nodes in knownstage2 survive. */
  738. rgp->rgppkt.stage = RGP_PRUNING;
  739. ClusterCopy(rgp->rgppkt.pruning_result,
  740. rgp->rgppkt.knownstage2);
  741. stage_advanced = 1;
  742. RGP_TRACE( "RGP S->3 NoPrune", rgp->rgpcounter, 0, 0, 0 );
  743. }
  744. /* There are disconnects; must wait for connectivity
  745. * information to be complete. The info is deemed
  746. * complete after a fixed number of ticks have
  747. * elapsed.
  748. */
  749. else if (rgp->pruning_ticks >= RGP_CONNECTIVITY_TICKS)
  750. { /* connectivity info collection complete; enter stage 3 */
  751. RGP_TRACE( "RGP Con. matrix1",
  752. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[0], /*TRACE*/
  753. rgp->rgppkt.connectivity_matrix[1] ), /*TRACE*/
  754. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[2], /*TRACE*/
  755. rgp->rgppkt.connectivity_matrix[3] ), /*TRACE*/
  756. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[4], /*TRACE*/
  757. rgp->rgppkt.connectivity_matrix[5] ), /*TRACE*/
  758. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[6], /*TRACE*/
  759. rgp->rgppkt.connectivity_matrix[7])); /*TRACE*/
  760. RGP_TRACE( "RGP Con. matrix2",
  761. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[8], /*TRACE*/
  762. rgp->rgppkt.connectivity_matrix[9] ), /*TRACE*/
  763. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[10], /*TRACE*/
  764. rgp->rgppkt.connectivity_matrix[11]), /*TRACE*/
  765. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[12], /*TRACE*/
  766. rgp->rgppkt.connectivity_matrix[13]), /*TRACE*/
  767. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[14], /*TRACE*/
  768. rgp->rgppkt.connectivity_matrix[15]));/*TRACE*/
  769. numgroups = find_all_fully_connected_groups(
  770. rgp->rgppkt.connectivity_matrix,
  771. rgp->tiebreaker,
  772. rgp->potential_groups);
  773. if ((void *)rgp->select_cluster == RGP_NULL_PTR)
  774. {
  775. node_t keynode;
  776. cluster_t temp;
  777. ClusterIntersection(
  778. temp,
  779. rgp->rgppkt.knownstage2,
  780. rgp->rgppkt.quorumowner
  781. );
  782. if ( ClusterEmpty(temp) ) {
  783. keynode = RGP_NULL_NODE;
  784. } else {
  785. keynode = rgp_select_tiebreaker(temp);
  786. }
  787. RGP_TRACE( "RGP keynode ng ", keynode, numgroups, 0, 0); /*TRACE*/
  788. /* No callback specified; use regroup's own routine. */
  789. groupnum = rgp_select_cluster_ex(
  790. rgp->potential_groups, numgroups, keynode);
  791. }
  792. else
  793. {
  794. /* Call routine specified at rgp_start() time. */
  795. groupnum = (*(rgp->select_cluster))(
  796. rgp->potential_groups, numgroups);
  797. }
  798. if (groupnum >= 0)
  799. ClusterCopy(rgp->rgppkt.pruning_result,
  800. rgp->potential_groups[groupnum]);
  801. else
  802. /* No group can survive. Can't halt yet.
  803. * Need to tell everyone else.
  804. */
  805. ClusterInit(rgp->rgppkt.pruning_result);
  806. rgp->rgppkt.stage = RGP_PRUNING;
  807. stage_advanced = 1;
  808. RGP_TRACE( "RGP S->3 Pruned ",
  809. rgp->rgpcounter, /* TRACE */
  810. GetCluster( rgp->rgppkt.knownstage2 ), /* TRACE */
  811. GetCluster( rgp->rgppkt.pruning_result ), /* TRACE */
  812. numgroups ); /* TRACE */
  813. } /* connectivity info collection complete; enter stage 3 */
  814. } /* tie-breaker node */
  815. else
  816. { /* not tie-breaker node */
  817. if (ClusterNumMembers(rgp->rgppkt.knownstage3) != 0)
  818. {
  819. /* We got a stage 3 packet from someone. Enter stage 3. */
  820. rgp->rgppkt.stage = RGP_PRUNING;
  821. stage_advanced = 1;
  822. RGP_TRACE( "RGP Got S3 pkt ",
  823. rgp->rgpcounter, /* TRACE */
  824. GetCluster( rgp->rgppkt.knownstage2 ), /* TRACE */
  825. GetCluster( rgp->rgppkt.pruning_result ), /* TRACE */
  826. GetCluster( rgp->rgppkt.knownstage3 ) ); /* TRACE */
  827. }
  828. } /* not tie-breaker node */
  829. return(stage_advanced);
  830. }
  831. /************************************************************************
  832. * enter_first_cleanup_stage
  833. * =========================
  834. *
  835. * Description:
  836. *
  837. * This function performs the actions required when entering the
  838. * first of the message clean up stages.
  839. *
  840. * Parameters:
  841. *
  842. * None
  843. *
  844. * Returns:
  845. *
  846. * void - no return value
  847. *
  848. * Algorithm:
  849. *
  850. * There are many actions to be performed after the final cluster
  851. * is selected. The actions are described in comments throughout
  852. * this routine.
  853. *
  854. ************************************************************************/
  855. _priv _resident void
  856. enter_first_cleanup_stage()
  857. {
  858. cluster_t banishees;
  859. node_t failer;
  860. rgp->rgppkt.stage = RGP_PHASE1_CLEANUP;
  861. RGP_TRACE( "RGP S->4 ", rgp->rgpcounter, 0, 0, 0 );
  862. /* The packets we send now will not indicate we are in the phase 1
  863. * cleanup stage yet. We indicate we are in this stage only after
  864. * we have completed the clean up action associated with the stage.
  865. * This is done in rgp_event_handler, under the
  866. * RGP_EVT_PHASE1_CLEANUP_DONE event.
  867. */
  868. rgp->sendstage = 0;
  869. /* Now, we can resume IO since we have passed the split brain danger.
  870. * New split brain situations will result in regroup restarting and
  871. * pausing IO again.
  872. */
  873. rgp_resume_all_io();
  874. /* Compute in banishees the set of nodes being lost from the old
  875. * configuration.
  876. */
  877. ClusterDifference(banishees, rgp->rgpinfo.cluster,
  878. rgp->rgppkt.pruning_result);
  879. /* Install the new configuration into the masks. */
  880. ClusterCopy(rgp->outerscreen, rgp->rgppkt.pruning_result);
  881. #if defined( NT )
  882. ClusnetSetOuterscreen(
  883. NmClusnetHandle,
  884. (ULONG)*((PUSHORT)rgp->outerscreen)
  885. );
  886. #endif
  887. ClusterCopy(rgp->innerscreen, rgp->rgppkt.pruning_result);
  888. ClusterCopy(rgp->endnodes, rgp->rgppkt.pruning_result);
  889. ClusterCopy(rgp->rgpinfo.cluster, rgp->rgppkt.pruning_result);
  890. /* Select a new tiebreaker because the previous one may have been */
  891. /* pruned out. Note: tiebreaker_selected has already been set in S2. */
  892. rgp->tiebreaker =
  893. rgp_select_tiebreaker(rgp->rgppkt.pruning_result);
  894. /* F40 Bug FixID KCY0833 */
  895. /* Mark the state of the banishees as dead and invoke the
  896. * node down callback routine.
  897. */
  898. for (failer = 0; failer < (node_t) rgp->num_nodes; failer++)
  899. if (ClusterMember(banishees, failer)
  900. || rgp->node_states[failer].status == RGP_NODE_COMING_UP // fix bug#265069
  901. )
  902. {
  903. rgp->node_states[failer].status = RGP_NODE_DEAD;
  904. rgp->node_states[failer].pollstate = AWAITING_IAMALIVE;
  905. rgp->node_states[failer].lostHBs = 0;
  906. #if !defined(NT)
  907. (*(rgp->nodedown_callback))(EXT_NODE(failer));
  908. #else
  909. ClusnetSetNodeMembershipState(NmClusnetHandle,
  910. EXT_NODE( failer ),
  911. ClusnetNodeStateDead);
  912. //
  913. // On NT we do the nodedown callback at the end of stage 5.
  914. // This allows the cleanup phases to complete before we let
  915. // the "upper" layers know that a node went down.
  916. //
  917. if ( ClusterMember(rgp->OS_specific_control.CPUUPMASK,failer) )
  918. ClusterInsert(
  919. rgp->OS_specific_control.NeedsNodeDownCallback,
  920. failer
  921. );
  922. #endif // !defined(NT)
  923. }
  924. /* If some nodes have been lost from the configuration, then I will
  925. * queue regroup status packets to them. This is a best efforts
  926. * attempt to ensure that they get quickly taken out if they
  927. * do in fact continue to run.
  928. */
  929. ClusterUnion(rgp->status_targets, banishees, rgp->status_targets);
  930. //
  931. // In NT, we are using rgp->rgppkt.hadpowerfail to transmit
  932. // quorum ownership information
  933. //
  934. #if !defined(NT)
  935. /* I should inform the message system of any node that experienced a
  936. * power on recovery. The message system can use this to clear error
  937. * counters so that a link will not be declared down due to errors
  938. * which may have been caused by the power failure.
  939. */
  940. for (failer = 0; failer < (node_t) rgp->num_nodes; failer++)
  941. if ((ClusterMember(rgp->rgppkt.hadpowerfail, failer)) &&
  942. !(ClusterMember(banishees, failer)))
  943. /* This survivor had a power failure. */
  944. rgp_had_power_failure( EXT_NODE(failer) );
  945. #endif // NT
  946. /* Tell the OS to start clean up operations for the failed nodes. */
  947. rgp_start_phase1_cleanup();
  948. }
  949. /************************************************************************
  950. * evaluatestageadvance
  951. * ====================
  952. *
  953. * Description:
  954. *
  955. * This function evaluates whether additional state transitions are
  956. * possible as a result of the info just received.
  957. *
  958. * Parameters:
  959. *
  960. * None
  961. *
  962. * Returns:
  963. *
  964. * void - no return value
  965. *
  966. * Algorithm:
  967. *
  968. * To evaluate whether we can advance through the stages, a loop is
  969. * used with a case entry for each stage. If an entry decides not to
  970. * advance to the next stage, it must return from the function. If
  971. * it does advance, it should not return but remain in the loop
  972. * since it is possible to have cascaded stage transitions
  973. * especially in a two node system. Thus, the loop is exited when no
  974. * more stage transitions are possible.
  975. *
  976. ************************************************************************/
  977. _priv _resident static void
  978. evaluatestageadvance()
  979. {
  980. cluster_t temp_cluster;
  981. node_t node;
  982. node_t i;
  983. for (;;) /* loop until someone exits by returning */
  984. {
  985. switch (rgp->rgppkt.stage)
  986. {
  987. case RGP_COLDLOADED :
  988. {
  989. if (!rgp->OS_specific_control.ShuttingDown) {
  990. RGP_ERROR(RGP_RELOADFAILED);
  991. }
  992. return;
  993. }
  994. case RGP_ACTIVATED :
  995. { /* evaluate whether to go to stage RGP_CLOSING */
  996. if (!regroup_test_stage2_advance())
  997. return;
  998. if (!ClusterMember(rgp->rgppkt.knownstage1, rgp->mynode))
  999. RGP_ERROR(RGP_MISSED_POLL_TO_SELF);
  1000. rgp->rgppkt.stage = RGP_CLOSING;
  1001. rgp->rgpcounter = 0;
  1002. rgp->tiebreaker_selected = 0;
  1003. /* If we abort the regroup, and there's somebody that everybody
  1004. * banished on this regroup, the following line keeps him from
  1005. * joining up on the next regroup.
  1006. */
  1007. ClusterCopy(rgp->innerscreen, rgp->rgppkt.knownstage1);
  1008. break;
  1009. } /* evaluate whether to go to stage RGP_CLOSING */
  1010. case RGP_CLOSING :
  1011. { /* evaluate whether to go to stage RGP_PRUNING */
  1012. if (rgp->tiebreaker_selected)
  1013. {
  1014. if (regroup_stage3_advance())
  1015. break; /* try to advance further */
  1016. else
  1017. return; /* cannot advance any more */
  1018. }
  1019. if (!ClusterCompare(rgp->rgppkt.knownstage1,
  1020. rgp->rgppkt.knownstage2))
  1021. return;
  1022. //
  1023. // In NT, we no longer use the split-brain avoidance algorithm.
  1024. // We use a cluster-wide arbitration algorithm instead.
  1025. //
  1026. #if !defined(NT)
  1027. /* When the known stage 1 and known stage 2 sets are the
  1028. * same, we have the complete set of nodes that are
  1029. * connected to us. It is time to execute the split-
  1030. * brain avoidance algorithm. If we are a splinter group
  1031. * cut off from the main group, we will not survive this
  1032. * algorithm.
  1033. */
  1034. split_brain_avoidance_algorithm();
  1035. #endif // NT
  1036. /* We are the lucky survivors of the split brain avoidance
  1037. * algorithm. Now, we must proceed to elect a new tie-breaker
  1038. * since the current tie-breaker may no longer be with us.
  1039. */
  1040. rgp->tiebreaker =
  1041. rgp_select_tiebreaker(rgp->rgppkt.knownstage2);
  1042. rgp->tiebreaker_selected = 1;
  1043. RGP_TRACE( "RGP S2 tiebr sel",
  1044. rgp->rgpcounter, /* TRACE */
  1045. EXT_NODE(rgp->tiebreaker), /* TRACE */
  1046. 0, 0 ); /* TRACE */
  1047. rgp->pruning_ticks = 0;
  1048. break;
  1049. } /* evaluate whether to go to stage 3 */
  1050. case RGP_PRUNING :
  1051. { /* evaluate whether to go to RGP_PHASE1_CLEANUP stage */
  1052. if (rgp->arbitration_started) {
  1053. if (regroup_test_arbitrate_advance()) {
  1054. enter_first_cleanup_stage();
  1055. break;
  1056. } else {
  1057. return; // Stay in this stage //
  1058. }
  1059. }
  1060. if (rgp->has_unreachable_nodes)
  1061. {
  1062. RGP_TRACE( "RGP Unreach Node",
  1063. GetCluster( rgp->rgppkt.pruning_result ), /* TRACE */
  1064. GetCluster( rgp->unreachable_nodes ), 0, 0 ); /* TRACE */
  1065. /* Must check if the unreachable nodes are in the
  1066. * selected final group. If so, we must restart
  1067. * regroup.
  1068. */
  1069. ClusterIntersection(temp_cluster, rgp->unreachable_nodes,
  1070. rgp->rgppkt.pruning_result);
  1071. /* Clear the unreachable node mask and flag after examining
  1072. * them. If we restart, we will start with a clean slate.
  1073. */
  1074. rgp->has_unreachable_nodes = 0;
  1075. ClusterInit(rgp->unreachable_nodes);
  1076. if (ClusterNumMembers(temp_cluster) != 0)
  1077. {
  1078. /* We have a node unreachable event to a node
  1079. * selected to survive. We must regenerate
  1080. * the connectivity matrix and re-run the node
  1081. * pruning algorithm. Start a new regroup incident.
  1082. * All restarts are in cautious mode.
  1083. */
  1084. rgp->cautiousmode = 1;
  1085. rgp->rgppkt.seqno = rgp->rgppkt.seqno + 1;
  1086. rgp->rgppkt.reason = RGP_EVT_NODE_UNREACHABLE;
  1087. rgp->rgppkt.activatingnode = (uint8) EXT_NODE(rgp->mynode);
  1088. /* For causingnode, pick the first unreachable node
  1089. * in temp_cluster.
  1090. */
  1091. for (node = 0; node < (node_t) rgp->num_nodes; node++)
  1092. {
  1093. if (ClusterMember(temp_cluster, node))
  1094. {
  1095. rgp->rgppkt.causingnode = (uint8) EXT_NODE(node);
  1096. break;
  1097. }
  1098. }
  1099. regroup_restart();
  1100. return;
  1101. }
  1102. }
  1103. if (!ClusterCompare(rgp->rgppkt.knownstage2,
  1104. rgp->rgppkt.knownstage3))
  1105. return;
  1106. /* All nodes in the connected cluster have been notified
  1107. * of the pruning decision (entered stage 3). If we are
  1108. * selected to survive, we can now enter stage 4. If we are
  1109. * not in the selected group (pruning_result), we must halt.
  1110. * Wait for at least one node in PRUNING_RESULT to get into
  1111. * stage 4 before halting. This ensures that the algorithm
  1112. * does not stall in stage 3 with all pruned out nodes
  1113. * halting before ANY of the survivors finds that all nodes
  1114. * entered stage 3.
  1115. */
  1116. if (!ClusterMember(rgp->rgppkt.pruning_result, rgp->mynode))
  1117. {
  1118. /* Wait for at least one node in PRUNING_RESULT
  1119. * to get into stage 4 before halting. Since only
  1120. * nodes in PRUNING_RESULT get into stage 4, it is
  1121. * sufficient to check if knownstage4 has any members.
  1122. */
  1123. if (ClusterNumMembers(rgp->rgppkt.knownstage4) != 0)
  1124. RGP_ERROR(RGP_PRUNED_OUT);
  1125. return;
  1126. }
  1127. // proceed to second stage of pruning - arbitration
  1128. if( regroup_start_arbitrate() ) {
  1129. return; // stay in this stage
  1130. } else {
  1131. break; // either proceed to the next, or restart
  1132. }
  1133. break;
  1134. } /* evaluate whether to go to RGP_PHASE1_CLEANUP stage */
  1135. case RGP_PHASE1_CLEANUP :
  1136. { /* evaluate whether to go to RGP_PHASE2_CLEANUP stage */
  1137. if (!ClusterCompare(rgp->rgppkt.pruning_result,
  1138. rgp->rgppkt.knownstage4))
  1139. return;
  1140. rgp->rgppkt.stage = RGP_PHASE2_CLEANUP;
  1141. RGP_TRACE( "RGP S->5 ", rgp->rgpcounter, 0, 0, 0 );
  1142. /* The packets we send now will not indicate we are in the phase 2
  1143. * cleanup stage yet. We indicate we are in this stage only after
  1144. * we have completed the clean up action associated with the stage.
  1145. * This is done in rgp_event_handler, under the
  1146. * RGP_EVT_PHASE2_CLEANUP_DONE event.
  1147. */
  1148. rgp->sendstage = 0;
  1149. rgp_start_phase2_cleanup();
  1150. break;
  1151. } /* evaluate whether to go to RGP_PHASE2_CLEANUP stage */
  1152. case RGP_PHASE2_CLEANUP :
  1153. { /* evaluate whether to go to RGP_STABILIZED stage */
  1154. if (!ClusterCompare(rgp->rgppkt.knownstage4,
  1155. rgp->rgppkt.knownstage5))
  1156. return;
  1157. RGP_LOCK;
  1158. //
  1159. // [HACKHACK] This is not necessary anymore, since we
  1160. // are holding the lock in message.c when delivering
  1161. // regroup packet received event
  1162. //
  1163. if (RGP_PHASE2_CLEANUP != rgp->rgppkt.stage) {
  1164. RGP_TRACE( "RGP S->6 (race) ", rgp->rgpcounter, rgp->rgppkt.stage, 0, 0 );
  1165. break;
  1166. }
  1167. rgp->rgppkt.stage = RGP_STABILIZED;
  1168. RGP_TRACE( "RGP S->6 ", rgp->rgpcounter, 0, 0, 0 );
  1169. rgp->rgpcounter = 0;
  1170. rgp->restartcount = 0;
  1171. /* Reset the regroup flags which have not yet been cleared. */
  1172. rgp->cautiousmode = 0;
  1173. /* Clear the mask indicating nodes which own the quorum resrc. */
  1174. ClusterInit(rgp->rgppkt.quorumowner);
  1175. /* Copy the sequence number into the rgpinfo area. */
  1176. rgp->rgpinfo.seqnum = rgp->rgppkt.seqno;
  1177. SetEvent( rgp->OS_specific_control.Stabilized );
  1178. if (rgp->OS_specific_control.ArbitratingNode != MM_INVALID_NODE) {
  1179. // Somebody was arbitrating //
  1180. rgp->OS_specific_control.ApproxArbitrationWinner =
  1181. rgp->OS_specific_control.ArbitratingNode;
  1182. if (rgp->OS_specific_control.ArbitratingNode == (DWORD)EXT_NODE(rgp->mynode)) {
  1183. //
  1184. // [HackHack] To close 422405
  1185. // when 421828 is fixed, please uncomment the following line
  1186. //
  1187. // QuorumOwner = rgp->OS_specific_control.ArbitratingNode;
  1188. } else {
  1189. if (QuorumOwner != MM_INVALID_NODE) {
  1190. ClRtlLogPrint(LOG_UNUSUAL,
  1191. "[MM] : clearing quorum owner var (winner is %1!u!), %.\n",
  1192. rgp->OS_specific_control.ArbitratingNode
  1193. );
  1194. }
  1195. QuorumOwner = MM_INVALID_NODE;
  1196. }
  1197. }
  1198. rgp_cleanup_complete();
  1199. #if defined(NT)
  1200. //
  1201. // On NT we deferred doing the node down callback until all the
  1202. // cleanup phases have been complete.
  1203. //
  1204. ClusterCopy(
  1205. rgp->OS_specific_control.CPUUPMASK,
  1206. rgp->rgpinfo.cluster
  1207. );
  1208. (*(rgp->nodedown_callback))(
  1209. rgp->OS_specific_control.NeedsNodeDownCallback
  1210. );
  1211. //
  1212. // Clear the down node mask
  1213. //
  1214. ClusterInit(rgp->OS_specific_control.NeedsNodeDownCallback);
  1215. //
  1216. // finally, tell clusnet that regroup has finished
  1217. //
  1218. ClusnetRegroupFinished(NmClusnetHandle,
  1219. rgp->OS_specific_control.EventEpoch);
  1220. rgp->last_stable_seqno = rgp->rgppkt.seqno;
  1221. RGP_UNLOCK;
  1222. #endif
  1223. return;
  1224. } /* evaluate whether to go to RGP_STABILIZED stage */
  1225. case RGP_STABILIZED :
  1226. return; /* stabilized, so I am all done */
  1227. default :
  1228. RGP_ERROR(RGP_INTERNAL_ERROR); /* unknown stage */
  1229. } /* switch (rgp->rgppkt.stage) */
  1230. } /* loop until someone exits by returning */
  1231. }
  1232. /************************************************************************
  1233. * rgp_event_handler
  1234. * =================
  1235. *
  1236. * Description:
  1237. *
  1238. * The state machine and the heart of the regroup algorithm.
  1239. *
  1240. * Parameters:
  1241. *
  1242. * int event -
  1243. * which event happened
  1244. *
  1245. * node_t causingnode -
  1246. * node causing the event: node which sent a regroup status
  1247. * packet or whose IamAlives are missed; if the causing node is
  1248. * not relevant information, RGP_NULL_NODE can be passed and
  1249. * is ignored. *This node ID is in external format.*
  1250. *
  1251. * Returns:
  1252. *
  1253. * void - no return value
  1254. *
  1255. * Algorithm:
  1256. *
  1257. * The state machine is the heart of the regroup algorithm.
  1258. * It is organized as a switch statement with the regroup stage as
  1259. * the case label and the regroup event as the switch variable.
  1260. * Events could cause regroup to start a new incident, to advance
  1261. * through stages or to update information without advancing to
  1262. * another stage. This routine also arranges for regroup status
  1263. * packets to be sent to all relevant nodes including our own
  1264. * node.
  1265. *
  1266. ************************************************************************/
  1267. _priv _resident void
  1268. RGP_EVENT_HANDLER_EX(int event, node_t causingnode, void *arg)
  1269. {
  1270. rgp_pkt_t *rcvd_pkt_p;
  1271. cluster_t ignorescreen_rcvd;
  1272. uint8 oldstage;
  1273. int send_status_pkts = 0;
  1274. /* Note: arg is only used when event == RGP_EVENT_RECEIVED_PACKET. It is the ptr to the packet */
  1275. /* Trace unusual invocations of this routine. */
  1276. if (event != RGP_EVT_RECEIVED_PACKET && event != RGP_EVT_CLOCK_TICK)
  1277. RGP_TRACE( "RGP Event ", event, causingnode, rgp->rgppkt.stage, rgp->rgpcounter ); /* TRACE */
  1278. switch (event)
  1279. {
  1280. case RGP_EVT_NODE_UNREACHABLE :
  1281. { /* All paths to a node are unreachable */
  1282. /* Ignore the event if the unreachable node has been eliminated
  1283. * from our outerscreen. The message system probably doesn't
  1284. * know it yet.
  1285. */
  1286. if (ClusterMember(rgp->outerscreen, INT_NODE(causingnode)))
  1287. {
  1288. /* Store this event and check after node pruning (when
  1289. * entering the RGP_PRUNING stage). If a regroup incident
  1290. * is in progress and we haven't entered the RGP_PRUNING
  1291. * stage yet, this will happen in the current incident.
  1292. * If not, it will happen in the next regroup incident
  1293. * which will surely start soon due to this disconnect.
  1294. *
  1295. * We do not start a regroup incident for this event. We will
  1296. * wait for IamAlives to be missed for starting a new regroup
  1297. * incident. This is due to the requirement that, in case
  1298. * of a total disconnect resulting in multiple groups, we must
  1299. * stay in stage 1 till we can guarantee that the other group(s)
  1300. * has started regroup and paused IO. We assume that the
  1301. * regroup incident started at the IamAlive check tick and
  1302. * use the periodic nature of the IamAlive sends and
  1303. * IamAlive checks to limit the stage1 pause to the period
  1304. * of IamAlive sends (+ 1 tick to drain IO). If we started
  1305. * a regroup incident due to the node unreachable event, we
  1306. * have to stay in stage1 longer.
  1307. */
  1308. rgp->has_unreachable_nodes = 1;
  1309. ClusterInsert(rgp->unreachable_nodes, INT_NODE(causingnode));
  1310. break;
  1311. }
  1312. } /* All paths to a node are unreachable */
  1313. case RGP_EVT_PHASE1_CLEANUP_DONE :
  1314. {
  1315. /* The following checks are needed in case we restarted
  1316. * regroup and asked for phase1 cleanup multiple times.
  1317. * We must make sure that all such requests have been
  1318. * completed.
  1319. */
  1320. if ( (rgp->rgppkt.stage == RGP_PHASE1_CLEANUP) &&
  1321. (rgp->rgp_msgsys_p->phase1_cleanup == 0) )
  1322. { /* all caught up */
  1323. /* Let others and ourselves get packets indicating we are in
  1324. * this stage. When we get that packet, we will update our
  1325. * knownstage field. If our sending or receiving apparatus
  1326. * failed meanwhile and we don't get our own packet, it
  1327. * will cause regroup to be restarted.
  1328. */
  1329. rgp->sendstage = 1;
  1330. send_status_pkts = 1;
  1331. evaluatestageadvance();
  1332. } /* all caught up */
  1333. break;
  1334. }
  1335. case RGP_EVT_PHASE2_CLEANUP_DONE :
  1336. {
  1337. /* The following checks are needed in case we restarted
  1338. * regroup and asked for phase2 cleanup multiple times.
  1339. * We must make sure that all such requests have been
  1340. * completed.
  1341. */
  1342. if ( (rgp->rgppkt.stage == RGP_PHASE2_CLEANUP) &&
  1343. (rgp->rgp_msgsys_p->phase2_cleanup == 0) )
  1344. { /* all caught up */
  1345. /* Let others and ourselves get packets indicating we are
  1346. * in this stage.
  1347. */
  1348. rgp->sendstage = 1;
  1349. send_status_pkts = 1;
  1350. evaluatestageadvance();
  1351. } /* all caught up */
  1352. break;
  1353. }
  1354. case RGP_EVT_LATEPOLLPACKET :
  1355. { /* some node is late with IamAlives */
  1356. RGP_LOCK; // to ensure that the packet receive does not initiate
  1357. // regroup asynchronously.
  1358. /* Start a new regroup incident if not already active. */
  1359. if (rgp->rgppkt.stage == RGP_STABILIZED)
  1360. {
  1361. rgp->rgppkt.seqno = rgp->rgppkt.seqno + 1;
  1362. rgp->rgppkt.reason = RGP_EVT_LATEPOLLPACKET;
  1363. rgp->rgppkt.activatingnode = (uint8) EXT_NODE(rgp->mynode);
  1364. rgp->rgppkt.causingnode = (uint8) causingnode;
  1365. regroup_restart();
  1366. send_status_pkts = 1;
  1367. } else if (rgp->rgppkt.stage == RGP_COLDLOADED)
  1368. {
  1369. RGP_ERROR(RGP_RELOADFAILED);
  1370. }
  1371. RGP_UNLOCK;
  1372. break;
  1373. } /* some node is late with IamAlives */
  1374. case MM_EVT_LEAVE:
  1375. rgp->OS_specific_control.ShuttingDown = TRUE;
  1376. case RGP_EVT_BANISH_NODE :
  1377. { /* assumes that the lock is held */
  1378. rgp->rgppkt.seqno = rgp->rgppkt.seqno + 1;
  1379. rgp->rgppkt.activatingnode = (uint8) EXT_NODE(rgp->mynode);
  1380. // Pack Ignore Screen in the regroup_restart will
  1381. // fill reason and causingnode fields of the packet
  1382. ClusterInsert(rgp->ignorescreen, INT_NODE(causingnode) );
  1383. regroup_restart();
  1384. send_status_pkts = 1;
  1385. break;
  1386. }
  1387. #if 0
  1388. case MM_EVT_LEAVE: // this node needs to leave the cluster gracefully
  1389. {
  1390. // Initiate a Regroup Event amongst remaining members if any
  1391. // Start a new regroup incident if not already active.
  1392. if (rgp->rgppkt.stage == RGP_STABILIZED)
  1393. {
  1394. rgp->rgppkt.seqno = rgp->rgppkt.seqno + 1;
  1395. rgp->rgppkt.reason = MM_EVT_LEAVE;
  1396. rgp->rgppkt.activatingnode = (uint8) EXT_NODE(rgp->mynode);
  1397. rgp->rgppkt.causingnode = (uint8) EXT_NODE(rgp->mynode);
  1398. regroup_restart();
  1399. send_status_pkts = 1;
  1400. }
  1401. break;
  1402. }
  1403. #endif
  1404. case RGP_EVT_CLOCK_TICK :
  1405. { /* called on regroup clock tick when regroup is active */
  1406. if( (rgp->rgppkt.stage == RGP_PRUNING) &&
  1407. (rgp->arbitration_started)
  1408. )
  1409. {
  1410. rgp->arbitration_ticks++;
  1411. if (rgp->arbitration_ticks >= RGP_ARBITRATION_TIMEOUT) {
  1412. //
  1413. // Kill timed-out arbitrator
  1414. //
  1415. if(rgp->tiebreaker == rgp->mynode) {
  1416. //
  1417. // If this node was arbitrating, then die
  1418. //
  1419. if ( IsDebuggerPresent() ) {
  1420. DebugBreak();
  1421. }
  1422. RGP_ERROR(RGP_ARBITRATION_STALLED);
  1423. }
  1424. else {
  1425. //
  1426. // Kill the arbitrator and initiate another regroup
  1427. //
  1428. RGP_TRACE(
  1429. "RGP arbitration stalled ",
  1430. rgp->rgppkt.stage, 0, 0, 0
  1431. );
  1432. rgp_event_handler(
  1433. RGP_EVT_BANISH_NODE,
  1434. EXT_NODE(rgp->tiebreaker)
  1435. );
  1436. break;
  1437. }
  1438. }
  1439. evaluatestageadvance();
  1440. //
  1441. // No need to send packets while we are waiting for
  1442. // the arbitrator to win
  1443. //
  1444. // send_status_pkts = rgp->rgppkt.stage != RGP_PRUNING;
  1445. //
  1446. // [GN] Wrong. We do have to send status packets.
  1447. // If we have partial connectivity, we need to
  1448. // continue exchanging packets, so that the pruner,
  1449. // can learn indirectly that all nodes got the pruning results.
  1450. //
  1451. send_status_pkts = 1;
  1452. break;
  1453. }
  1454. else {
  1455. rgp->rgpcounter++; /* increment the counter */
  1456. }
  1457. if ( (rgp->rgppkt.stage == RGP_ACTIVATED) && (rgp->sendstage == 0) )
  1458. {
  1459. /* To detect the potential failure of my timer pop mechanism
  1460. * (such as by the corruption of the time list), I wait for
  1461. * at least one regroup clock tick before I let myself and
  1462. * others know I am in stage 1.
  1463. */
  1464. // [GorN Jan14/2000]
  1465. // We don't send our connectivity information,
  1466. // before we get the first clock tick.
  1467. // However we collect this information in
  1468. // rgp->internal_connectivity_matrix.
  1469. // Let's put it in the outgoing packet
  1470. // so that everybody will see what we think about them.
  1471. MatrixOr(rgp->rgppkt.connectivity_matrix,
  1472. rgp->internal_connectivity_matrix);
  1473. rgp->sendstage = 1; /* let everyone know we are in stage 1 */
  1474. }
  1475. else if ( (rgp->rgppkt.stage >= RGP_CLOSING) &&
  1476. (rgp->rgppkt.stage <= RGP_PHASE2_CLEANUP) )
  1477. { /* check for possible abort and restart */
  1478. if (rgp->rgpcounter >= RGP_MUST_RESTART)
  1479. {
  1480. /* Stalled out. Probably someone died after starting
  1481. * or another node is still in stage 1 cautious mode
  1482. */
  1483. if ( ++(rgp->restartcount) > RGP_RESTART_MAX ) {
  1484. // It is not a good idea to die, because somebody
  1485. // is stalling. Let's add stallees into ignore mask and restart
  1486. //
  1487. // RGP_ERROR(RGP_INTERNAL_ERROR); // [Fixed]
  1488. cluster_t tmp, *stage;
  1489. switch (rgp->rgppkt.stage) {
  1490. case RGP_CLOSING: stage = &rgp->rgppkt.knownstage2; break;
  1491. case RGP_PRUNING: stage = &rgp->rgppkt.knownstage3; break;
  1492. case RGP_PHASE1_CLEANUP: stage = &rgp->rgppkt.knownstage4; break;
  1493. case RGP_PHASE2_CLEANUP: stage = &rgp->rgppkt.knownstage5; break;
  1494. }
  1495. ClusterDifference(tmp, rgp->rgpinfo.cluster, *stage);
  1496. //
  1497. // If we stalled during closing, due to tiebraker running
  1498. // the pruning algorithn going bunkers, we can have tmp = 0
  1499. // In this case, we need to ignore somebody to guarantee that
  1500. // the algorithm completes.
  1501. //
  1502. if ( ClusterEmpty(tmp) && rgp->tiebreaker_selected) {
  1503. ClusterInsert(tmp, rgp->tiebreaker);
  1504. }
  1505. ClusterUnion(rgp->ignorescreen, rgp->ignorescreen, tmp);
  1506. }
  1507. /* If we are stalling in stage 3 and we have been pruned out,
  1508. * it is possible that we are stalling because we have been
  1509. * isolated from all other nodes. We must halt in this case.
  1510. */
  1511. if ( (rgp->rgppkt.stage == RGP_PRUNING) &&
  1512. !ClusterMember(rgp->rgppkt.pruning_result, rgp->mynode) )
  1513. RGP_ERROR(RGP_PRUNED_OUT);
  1514. rgp->cautiousmode = 1;
  1515. rgp->rgppkt.seqno = rgp->rgppkt.seqno + 1;
  1516. RGP_TRACE( "RGP stalled ", rgp->rgppkt.stage, 0, 0, 0 );
  1517. regroup_restart();
  1518. } /* Stalled out ... */
  1519. } /* check for possible abort and restart */
  1520. if ((rgp->rgppkt.stage == RGP_CLOSING) && rgp->tiebreaker_selected)
  1521. rgp->pruning_ticks++;
  1522. evaluatestageadvance();
  1523. send_status_pkts = 1; /* send rgp packets regardless of progress */
  1524. break;
  1525. } /* called on regroup clock tick when regroup is active */
  1526. case RGP_EVT_RECEIVED_PACKET :
  1527. { /* received an rgp packet */
  1528. /* If the sending node is excluded by the outer screen, then it is
  1529. * not even part of the current (most recently known) configuration.
  1530. * Therefore the packet should not be honored, and a poison message
  1531. * should be sent to try to kill this renegade processor.
  1532. * That is done in the calling routine that processes all incoming
  1533. * regroup module packets (IamAlive, regroup and poison packets).
  1534. */
  1535. /* If the sending node was accepted by the outer screen but then
  1536. * excluded by the inner screen, then the packet will be disregarded
  1537. * but no poison message sent. This phenomenon may occur when this
  1538. * node has entered stage 2 without having heard from (recognized)
  1539. * the sending node and then a message arrives late from that
  1540. * sending node. In this case the fate of the sending node, i.e.
  1541. * whether it gets ruled out of the global configuration or not is
  1542. * unknown at this point. If the sender can get itself recognized
  1543. * by some node before that node enters stage 2, then it will be
  1544. * saved. Otherwise it will be declared down and subsequently shot
  1545. * with poison packets if it ever tries to assert itself.
  1546. */
  1547. /* Remember the arg to this routine is the packet pointer */
  1548. rcvd_pkt_p = (rgp_pkt_t *)arg; /* address of pkt just received */
  1549. if ( rgp->rgppkt.seqno != rcvd_pkt_p->seqno)
  1550. RGP_TRACE( "RGP Event ", event, causingnode, rgp->rgppkt.stage, rgp->rgpcounter ); /* TRACE */
  1551. UnpackIgnoreScreen(rcvd_pkt_p, ignorescreen_rcvd);
  1552. if ( !ClusterEmpty(ignorescreen_rcvd) ) {
  1553. RGP_TRACE( "RGP Incoming pkt", GetCluster(ignorescreen_rcvd),
  1554. rcvd_pkt_p->seqno, rgp->rgppkt.stage, causingnode);
  1555. }
  1556. if ( !ClusterMember(rgp->innerscreen, INT_NODE(causingnode))) {
  1557. RGP_TRACE( "RGP Ignoring !inner", causingnode, rgp->rgppkt.stage,
  1558. GetCluster(rgp->innerscreen), GetCluster(ignorescreen_rcvd) );
  1559. return;
  1560. }
  1561. RGP_LOCK; // To ensure that the timer thread does not initiate
  1562. // regroup asynchronously at this time.
  1563. //////////////////////////// New Ignore Screen Stuff /////////////////////////////////
  1564. if (ClusterMember(rgp->ignorescreen, INT_NODE(causingnode) )) {
  1565. RGP_UNLOCK;
  1566. RGP_TRACE( "RGP Ignoring", causingnode, rgp->rgppkt.stage,
  1567. GetCluster(rgp->ignorescreen), GetCluster(ignorescreen_rcvd) );
  1568. return;
  1569. }
  1570. if (rcvd_pkt_p->seqno < rgp->last_stable_seqno ) {
  1571. RGP_UNLOCK;
  1572. RGP_TRACE( "RGP old packet", causingnode, rcvd_pkt_p->seqno, rgp->last_stable_seqno, 0);
  1573. // This is a late packet from the previous regroup incident
  1574. // from the node that is currently in my outerscreen.
  1575. // This node could not have sent it now, this is probably a packet
  1576. // that stuck somewhere and was delieverd eons later.
  1577. // Simply ignore it.
  1578. return;
  1579. }
  1580. if ( ClusterMember(ignorescreen_rcvd, rgp->mynode ) ) {
  1581. //
  1582. // Sender ignores me. We will do the same to him.
  1583. //
  1584. ClusterInsert(rgp->ignorescreen, INT_NODE(causingnode) );
  1585. rgp->rgppkt.seqno = rgp->rgppkt.seqno + 1;
  1586. regroup_restart();
  1587. send_status_pkts = 1;
  1588. RGP_UNLOCK;
  1589. break;
  1590. }
  1591. if ( ClusterCompare(ignorescreen_rcvd, rgp->ignorescreen) ) {
  1592. // We have the same ignore screen.
  1593. // No work needs to be done
  1594. } else if ( ClusterSubsetOf(rgp->ignorescreen, ignorescreen_rcvd) ) {
  1595. // Incoming packet has smaller ignore screen
  1596. // Ignore this packet, but reply to its sender with
  1597. // our current regroup packet to force to upgrade to
  1598. // our view of the world.
  1599. // do so only if we are properly initialized
  1600. if (rgp->rgppkt.stage == RGP_COLDLOADED && !rgp->OS_specific_control.ShuttingDown) {
  1601. RGP_ERROR(RGP_RELOADFAILED);
  1602. }
  1603. RGP_TRACE( "RGP smaller ignore mask ",
  1604. rgp->rgppkt.seqno, rcvd_pkt_p->seqno, /* TRACE */
  1605. rgp->rgppkt.stage, rcvd_pkt_p->stage ); /* TRACE */
  1606. ClusterInsert(rgp->status_targets, INT_NODE(causingnode));
  1607. rgp_broadcast(RGP_UNACK_REGROUP);
  1608. RGP_UNLOCK;
  1609. return;
  1610. } else if ( ClusterSubsetOf(ignorescreen_rcvd, rgp->ignorescreen) ) {
  1611. RGP_TRACE( "RGP bigger ignore mask ",
  1612. GetCluster(ignorescreen_rcvd), GetCluster(rgp->ignorescreen), /* TRACE */
  1613. rgp->rgppkt.stage, causingnode ); /* TRACE */
  1614. // Incoming packet has bigger ignore screen.
  1615. // Upgrade to this information and process the packet
  1616. rgp->rgppkt.seqno = rcvd_pkt_p->seqno;
  1617. /* Somebody else activated regroup. So, let's just copy */
  1618. /* the sender's reason code and reason nodes. */
  1619. //
  1620. // Ignore mask parts are in the reason and activatingnode fields
  1621. //
  1622. ClusterCopy(rgp->ignorescreen, ignorescreen_rcvd); // fix bug #328216
  1623. rgp->rgppkt.reason = rcvd_pkt_p->reason;
  1624. rgp->rgppkt.activatingnode = rcvd_pkt_p->activatingnode;
  1625. rgp->rgppkt.causingnode = rcvd_pkt_p->causingnode;
  1626. regroup_restart();
  1627. send_status_pkts = 1;
  1628. } else {
  1629. RGP_TRACE( "RGP different ignore masks ",
  1630. GetCluster(ignorescreen_rcvd), GetCluster(rgp->ignorescreen), /* TRACE */
  1631. rgp->rgppkt.stage, causingnode ); /* TRACE */
  1632. // Ignore masks are different and neither of them is
  1633. // a subset of another.
  1634. //
  1635. // We need to merge information out of these masks
  1636. // and restart the regroup.
  1637. //
  1638. // Packet that we just received will be ignored
  1639. ClusterUnion(rgp->ignorescreen, rgp->ignorescreen, ignorescreen_rcvd);
  1640. rgp->rgppkt.seqno = max(rgp->rgppkt.seqno, rcvd_pkt_p->seqno) + 1;
  1641. regroup_restart();
  1642. send_status_pkts = 1;
  1643. RGP_UNLOCK;
  1644. break;
  1645. }
  1646. //////////////////////////// End of new Ignore Screen Stuff /////////////////////////////////
  1647. // Now ignorescreens of this node packet and incoming packet are the same //
  1648. // proceed with regular regroup processing //
  1649. /* Since the packet is acceptable, the regroup sequence number
  1650. * must be compared to that of this node. If the incoming message
  1651. * has a higher sequence number, then a new pass of the regroup
  1652. * algorithm has started. This node must accept the new sequence
  1653. * number, reinitialize its data, and start partcicipating in
  1654. * the new pass. Also, the incoming message must be processed
  1655. * since, once the algorithm reinitializes, the sequence numbers
  1656. * now match.
  1657. *
  1658. * If the incoming packet has a matching sequence number, then it
  1659. * should be accepted. The knowledge of the global state of the
  1660. * algorithm it reflects must be merged with that already present
  1661. * in this node. Then this node must evaluate whether further
  1662. * state transitions are possible.
  1663. *
  1664. * Finally, if the incoming packet has a lower sequence number, then
  1665. * it comes from a node unaware of the current level of the global
  1666. * algorithm. The data in it should be ignored, but a packet should
  1667. * be sent to it so that it will reinitialize its algorithm.
  1668. *
  1669. * The sequence number is a 32 bit algebraic value - hopefully it
  1670. * will never wrap around.
  1671. */
  1672. if (rcvd_pkt_p->seqno < rgp->rgppkt.seqno)
  1673. { /* sender below current level - ignore but let him know it*/
  1674. RGP_TRACE( "RGP lower seqno ",
  1675. rgp->rgppkt.seqno, rcvd_pkt_p->seqno, /* TRACE */
  1676. rgp->rgppkt.stage, rcvd_pkt_p->stage ); /* TRACE */
  1677. ClusterInsert(rgp->status_targets, INT_NODE(causingnode));
  1678. rgp_broadcast(RGP_UNACK_REGROUP);
  1679. RGP_UNLOCK;
  1680. return;
  1681. }
  1682. if (rcvd_pkt_p->seqno > rgp->rgppkt.seqno)
  1683. { /* sender above current level - I must upgrade to it*/
  1684. // The node that forces a restart responsible for keeping
  1685. // track of restarts and making a decision who will die/be ignored
  1686. // if ( ++(rgp->restartcount) > RGP_RESTART_MAX )
  1687. // RGP_ERROR(RGP_INTERNAL_ERROR);
  1688. if ( (rgp->rgppkt.stage != RGP_STABILIZED) ||
  1689. ((rcvd_pkt_p->seqno - rgp->rgppkt.seqno) > 1) )
  1690. {
  1691. RGP_TRACE( "RGP higher seqno",
  1692. rgp->rgppkt.seqno, rcvd_pkt_p->seqno, /* TRACE */
  1693. rgp->rgppkt.stage, rcvd_pkt_p->stage );/* TRACE */
  1694. rgp->cautiousmode = 1;
  1695. }
  1696. rgp->rgppkt.seqno = rcvd_pkt_p->seqno;
  1697. /* Somebody else activated regroup. So, let's just copy */
  1698. /* the sender's reason code and reason nodes. */
  1699. rgp->rgppkt.reason = rcvd_pkt_p->reason;
  1700. rgp->rgppkt.activatingnode = rcvd_pkt_p->activatingnode;
  1701. rgp->rgppkt.causingnode = rcvd_pkt_p->causingnode;
  1702. regroup_restart();
  1703. send_status_pkts = 1;
  1704. } /* sender above current level - I must upgrade to it*/
  1705. /* Now we are at the same level - even if we weren't at first.
  1706. *
  1707. * If the sender has already commited to a view of the world
  1708. * that excludes me, I must halt in order to keep the system in
  1709. * a consistent state.
  1710. *
  1711. * This is true even with the split brain avoidance algorithm.
  1712. * The fact that stage1 = stage2 in the packet implies that the
  1713. * sender has already run the split brain avoidance algorithm
  1714. * and decided that he should survive.
  1715. */
  1716. if ( (rcvd_pkt_p->stage > RGP_ACTIVATED) &&
  1717. ClusterCompare(rcvd_pkt_p->knownstage1,
  1718. rcvd_pkt_p->knownstage2) &&
  1719. !ClusterMember(rcvd_pkt_p->knownstage1, rgp->mynode) )
  1720. {
  1721. ClusterInsert(rgp->ignorescreen, INT_NODE(causingnode) );
  1722. rgp->rgppkt.seqno ++;
  1723. regroup_restart();
  1724. send_status_pkts = 1;
  1725. RGP_UNLOCK;
  1726. // /* I must die for overall consistency. */
  1727. // RGP_ERROR((uint16) (RGP_PARIAH + causingnode)); // [Fixed]
  1728. break;
  1729. }
  1730. RGP_UNLOCK;
  1731. /* If I have terminated the active part of the algorithm, I
  1732. * am in stage 6 and am not routinely broadcasting my status
  1733. * anymore. If I get a packet from someone else who has not
  1734. * yet terminated, then I must send him the word. But if he
  1735. * has terminated, I must not send any packet or else there
  1736. * will be an infinite loop of packets bouncing back and forth.
  1737. */
  1738. if (rgp->rgppkt.stage == RGP_STABILIZED)
  1739. { /* I have terminated so can't learn anything more. */
  1740. if (!ClusterCompare(rcvd_pkt_p->knownstage5,
  1741. rgp->rgppkt.knownstage5))
  1742. { /* but sender has not so I must notify him */
  1743. ClusterInsert(rgp->status_targets, INT_NODE(causingnode));
  1744. rgp_broadcast(RGP_UNACK_REGROUP);
  1745. }
  1746. return;
  1747. }
  1748. /* At this point, the packet is from a legal node within the
  1749. * current round of the algorithm and I have not terminated
  1750. * at stage RGP_STABILIZED so I need to absorb whatever new
  1751. * info is in this packet.
  1752. *
  1753. * The way to merge what this packet says with what I already
  1754. * know is to just logically OR the known stage x fields
  1755. * together.
  1756. */
  1757. {
  1758. int seqno = rcvd_pkt_p->seqno&0xffff;
  1759. int stage = rcvd_pkt_p->stage&0xffff;
  1760. int trgs = *(int*)rgp->status_targets & 0xffff;
  1761. int node = INT_NODE(causingnode)&0xffff;
  1762. RGP_TRACE( "RGP recv pkt ",
  1763. ((seqno << 16) | stage),
  1764. RGP_MERGE_TO_32(
  1765. rcvd_pkt_p->knownstage1,
  1766. rcvd_pkt_p->knownstage2
  1767. ),
  1768. RGP_MERGE_TO_32(
  1769. rcvd_pkt_p->knownstage3,
  1770. rcvd_pkt_p->knownstage4
  1771. ),
  1772. (trgs << 16) | node
  1773. );
  1774. }
  1775. rgp_sanity_check(rcvd_pkt_p, "RGP Received packet");
  1776. rgp_sanity_check(&(rgp->rgppkt), "RGP Internal packet");
  1777. ClusterUnion(rgp->rgppkt.quorumowner, rcvd_pkt_p->quorumowner,
  1778. rgp->rgppkt.quorumowner);
  1779. ClusterUnion(rgp->rgppkt.knownstage1, rcvd_pkt_p->knownstage1,
  1780. rgp->rgppkt.knownstage1);
  1781. ClusterUnion(rgp->rgppkt.knownstage2, rcvd_pkt_p->knownstage2,
  1782. rgp->rgppkt.knownstage2);
  1783. ClusterUnion(rgp->rgppkt.knownstage3, rcvd_pkt_p->knownstage3,
  1784. rgp->rgppkt.knownstage3);
  1785. ClusterUnion(rgp->rgppkt.knownstage4, rcvd_pkt_p->knownstage4,
  1786. rgp->rgppkt.knownstage4);
  1787. ClusterUnion(rgp->rgppkt.knownstage5, rcvd_pkt_p->knownstage5,
  1788. rgp->rgppkt.knownstage5);
  1789. ClusterUnion(rgp->rgppkt.pruning_result, rcvd_pkt_p->pruning_result,
  1790. rgp->rgppkt.pruning_result);
  1791. /* But when I am in stage 2, it is possible that I can learn to
  1792. * recognize some node I have not previously recognized by hearing
  1793. * of it indirectly from some other node that I have recognized.
  1794. * To handle this case, I always merge knownstage1 info into
  1795. * the inner screen so that subsequent messages from the newly
  1796. * recognized node will be accepted and processed.
  1797. */
  1798. if ((rgp->rgppkt.stage == RGP_CLOSING) &&
  1799. !(rgp->tiebreaker_selected))
  1800. ClusterUnion(rgp->innerscreen, rgp->rgppkt.knownstage1,
  1801. rgp->innerscreen);
  1802. /* In the first two stages of regroup, the inter-node connectivity
  1803. * information is collected and propagated. When we get a regroup
  1804. * packet, we turn ON the bit corresponding to the [our-node,
  1805. * sender-node] entry in the connectivity matrix. We also OR in
  1806. * the matrix sent by the sender node in the regroup packet.
  1807. *
  1808. * The matrix is not updated if we are in stage 1 and haven't
  1809. * received the first clock tick. This is to prevent the
  1810. * node pruning algorithm from considering us alive if our
  1811. * timer mechanism is disrupted, but the IPC mechanism is OK.
  1812. */
  1813. /* [GorN 01/07/2000] If we are not collection connectivity information,
  1814. * until we receive a first tick we can ran into problems if the node is
  1815. * killed right after it send out its first timer driven packet
  1816. * (which doesn't have any connectivity info yet). This can cause a
  1817. * confusion. See bug 451792.
  1818. *
  1819. * What we will do is we will collect connectivity information on
  1820. * the side even when rgp->sendstage is FALSE and move it into the regroup
  1821. * packet if we ever get a clock tick
  1822. */
  1823. if (rgp->rgppkt.stage < RGP_PRUNING && !rgp->sendstage)
  1824. {
  1825. MatrixSet(rgp->internal_connectivity_matrix,
  1826. rgp->mynode, INT_NODE(causingnode));
  1827. if (causingnode != EXT_NODE(rgp->mynode))
  1828. MatrixOr(rgp->internal_connectivity_matrix,
  1829. rcvd_pkt_p->connectivity_matrix);
  1830. }
  1831. if ((rgp->rgppkt.stage < RGP_PRUNING) && rgp->sendstage)
  1832. {
  1833. MatrixSet(rgp->rgppkt.connectivity_matrix,
  1834. rgp->mynode, INT_NODE(causingnode));
  1835. if (causingnode != EXT_NODE(rgp->mynode))
  1836. MatrixOr(rgp->rgppkt.connectivity_matrix,
  1837. rcvd_pkt_p->connectivity_matrix);
  1838. }
  1839. /* Now, I can evaluate whether additional state transitions are
  1840. * possible as a result of the info just received.
  1841. */
  1842. oldstage = rgp->rgppkt.stage;
  1843. // QuorumCheck now runs in a separate thread
  1844. // if (oldstage != RGP_CLOSING) // Cannot run Quorumcheck from here.
  1845. evaluatestageadvance();
  1846. /* To speed things up, let us broadcast our status if our
  1847. * stage has changed and we are willing to let others and
  1848. * ourselves see it.
  1849. */
  1850. if ( (oldstage != rgp->rgppkt.stage) && rgp->sendstage )
  1851. send_status_pkts = 1; /* broadcast at once to speed things up */
  1852. break;
  1853. } /* received an rgp packet */
  1854. //
  1855. // We do not support power failure notifications in NT
  1856. //
  1857. #if defined(NT)
  1858. CL_ASSERT(event != RGP_EVT_POWERFAIL);
  1859. //
  1860. // Fall thru to default case
  1861. //
  1862. #else // NT
  1863. case RGP_EVT_POWERFAIL :
  1864. { /* Our node got a power up interrupt or an indication of power
  1865. * failure from another node. */
  1866. /* Note that this code will unconditionally abort and restart
  1867. * the algorithm even if it was active before the power failure.
  1868. * The new incident must be in cautious mode.
  1869. */
  1870. rgp->cautiousmode = 1;
  1871. rgp->rgppkt.seqno = rgp->rgppkt.seqno + 1;
  1872. rgp->rgppkt.reason = RGP_EVT_POWERFAIL;
  1873. rgp->rgppkt.activatingnode = (uint8) EXT_NODE(rgp->mynode);
  1874. rgp->rgppkt.causingnode = (uint8) causingnode;
  1875. /* rgp->pfail_state is set to a non-zero value when a pfail event
  1876. * is reported to regroup. It is decremented at every regroup clock
  1877. * tick till it reaches zero. While this number is non-zero, missing
  1878. * self IamAlives are ignored and do not cause the node to halt.
  1879. * This gives the sending hardware some time to recover from power
  1880. * failures before self IamAlives are checked.
  1881. */
  1882. if (causingnode == EXT_NODE(rgp->mynode))
  1883. rgp->pfail_state = RGP_PFAIL_TICKS;
  1884. /* Store the fact that causingnode experienced a PFAIL,
  1885. * for reporting to the message system when regroup stabilizes.
  1886. */
  1887. ClusterInsert(rgp->rgppkt.hadpowerfail, INT_NODE(causingnode));
  1888. regroup_restart();
  1889. send_status_pkts = 1;
  1890. break;
  1891. } /* power failure */
  1892. #endif // NT
  1893. default :
  1894. {
  1895. RGP_ERROR(RGP_INTERNAL_ERROR);
  1896. }
  1897. }
  1898. if (send_status_pkts) /* significant change - send status at once */
  1899. {
  1900. ClusterUnion(rgp->status_targets,
  1901. rgp->outerscreen, rgp->status_targets);
  1902. rgp_broadcast(RGP_UNACK_REGROUP);
  1903. }
  1904. }
  1905. /************************************************************************
  1906. * rgp_check_packet
  1907. * =================
  1908. *
  1909. * Description:
  1910. *
  1911. * verifies that RGP packet has reasonable values in
  1912. * powerfail, knownstages, pruning_result, and connectivity_matrix fields
  1913. *
  1914. * Parameters:
  1915. *
  1916. * rgp_pkt_t* pkt -
  1917. * packet to be checked
  1918. *
  1919. * Returns:
  1920. *
  1921. * 0 - packet looks good
  1922. * 1,2,3... - strange looking packet
  1923. *
  1924. ************************************************************************/
  1925. int rgp_check_packet(rgp_pkt_t* pkt) {
  1926. node_t i;
  1927. //
  1928. // Verify that
  1929. // knownstage5 \subset knownstage4 \subset knownstage3 \subset
  1930. // knownstage2 \subset knownstage1 \subset rgp->rgpinfo.cluster
  1931. //
  1932. // int ClusterSubsetOf(cluster_t big, cluster_t small)
  1933. // Returns 1 if set small = set big or small is a subset of big.
  1934. //
  1935. if( !ClusterSubsetOf(pkt->knownstage4, pkt->knownstage5) ) {
  1936. return 5;
  1937. }
  1938. if( !ClusterSubsetOf(pkt->knownstage3, pkt->knownstage4) ) {
  1939. return 4;
  1940. }
  1941. if( !ClusterSubsetOf(pkt->knownstage2, pkt->knownstage3) ) {
  1942. return 3;
  1943. }
  1944. if( !ClusterSubsetOf(pkt->knownstage1, pkt->knownstage2) ) {
  1945. return 2;
  1946. }
  1947. if( !ClusterSubsetOf(rgp->rgpinfo.cluster, pkt->knownstage1) ) {
  1948. return 1;
  1949. }
  1950. //
  1951. // pruning_result has to be a subset of knownstage2
  1952. //
  1953. if( !ClusterSubsetOf(pkt->knownstage2, pkt->pruning_result) ) {
  1954. return 9;
  1955. }
  1956. //
  1957. // quorumowner has to be a subset of original cluster
  1958. //
  1959. if(!ClusterSubsetOf(rgp->rgpinfo.cluster, pkt->quorumowner)) {
  1960. return 8;
  1961. }
  1962. //
  1963. // Check connectivity matrix
  1964. //
  1965. for(i = 0; i < MAX_CLUSTER_SIZE; ++i) {
  1966. if( ClusterMember( rgp->rgpinfo.cluster, i ) ) {
  1967. //
  1968. // Node i is a member of a cluster
  1969. // Its connectivity bitmap has to be a subset of rgp->rgpinfo.cluster
  1970. //
  1971. if(!ClusterSubsetOf(rgp->rgpinfo.cluster, pkt->connectivity_matrix[i])) {
  1972. return 10;
  1973. }
  1974. } else {
  1975. //
  1976. // Node i is not a member of a cluster
  1977. // Its connectivity bitmap has to be 0
  1978. //
  1979. if(!ClusterEmpty(pkt->connectivity_matrix[i]))
  1980. return 11;
  1981. }
  1982. }
  1983. return 0;
  1984. }
  1985. /************************************************************************
  1986. * rgp_print_packet
  1987. * =================
  1988. *
  1989. * Description:
  1990. *
  1991. * Prints RGP packet fields
  1992. *
  1993. * Parameters:
  1994. *
  1995. * rgp_pkt_t* pkt -
  1996. * packet to be printed
  1997. * char* label -
  1998. * label to be printed together with a packet
  1999. * int code -
  2000. * a number to be printed together with a packet
  2001. *
  2002. * Returns:
  2003. *
  2004. * VOID
  2005. *
  2006. ************************************************************************/
  2007. void rgp_print_packet(rgp_pkt_t* pkt, char* label, int code)
  2008. {
  2009. uint8 pktsubtype;
  2010. uint8 stage;
  2011. uint16 reason;
  2012. uint32 seqno;
  2013. uint8 activatingnode;
  2014. uint8 causingnode;
  2015. cluster_t quorumowner;
  2016. RGP_TRACE( label,
  2017. pkt->seqno, /* TRACE */
  2018. code,
  2019. (pkt->stage << 16) |
  2020. (pkt->activatingnode << 8) |
  2021. (pkt->causingnode), /* TRACE */
  2022. RGP_MERGE_TO_32( rgp->outerscreen,
  2023. rgp->innerscreen )
  2024. );
  2025. RGP_TRACE( "RGP CHK masks ",
  2026. RGP_MERGE_TO_32( rgp->rgpinfo.cluster, /* TRACE */
  2027. pkt->quorumowner ), /* TRACE */
  2028. RGP_MERGE_TO_32( pkt->knownstage1, /* TRACE */
  2029. pkt->knownstage2 ), /* TRACE */
  2030. RGP_MERGE_TO_32( pkt->knownstage3, /* TRACE */
  2031. pkt->knownstage4 ), /* TRACE */
  2032. RGP_MERGE_TO_32( pkt->knownstage5, /* TRACE */
  2033. pkt->pruning_result ) ); /* TRACE */
  2034. RGP_TRACE( "RGP CHK Con. matrix1",
  2035. RGP_MERGE_TO_32( pkt->connectivity_matrix[0], /*TRACE*/
  2036. pkt->connectivity_matrix[1] ), /*TRACE*/
  2037. RGP_MERGE_TO_32( pkt->connectivity_matrix[2], /*TRACE*/
  2038. pkt->connectivity_matrix[3] ), /*TRACE*/
  2039. RGP_MERGE_TO_32( pkt->connectivity_matrix[4], /*TRACE*/
  2040. pkt->connectivity_matrix[5] ), /*TRACE*/
  2041. RGP_MERGE_TO_32( pkt->connectivity_matrix[6], /*TRACE*/
  2042. pkt->connectivity_matrix[7])); /*TRACE*/
  2043. RGP_TRACE( "RGP CHK Con. matrix2",
  2044. RGP_MERGE_TO_32( pkt->connectivity_matrix[8], /*TRACE*/
  2045. pkt->connectivity_matrix[9] ), /*TRACE*/
  2046. RGP_MERGE_TO_32( pkt->connectivity_matrix[10], /*TRACE*/
  2047. pkt->connectivity_matrix[11]), /*TRACE*/
  2048. RGP_MERGE_TO_32( pkt->connectivity_matrix[12], /*TRACE*/
  2049. pkt->connectivity_matrix[13]), /*TRACE*/
  2050. RGP_MERGE_TO_32( pkt->connectivity_matrix[14], /*TRACE*/
  2051. pkt->connectivity_matrix[15]));/*TRACE*/
  2052. }
  2053. /************************************************************************
  2054. * UnpackIgnoreScreen
  2055. * =================
  2056. *
  2057. * Description:
  2058. *
  2059. * Extracts ignorescreen out of regroup packet
  2060. *
  2061. * Parameters:
  2062. *
  2063. * rgp_pkt_t* from -
  2064. * source packet
  2065. * cluster_t to -
  2066. * target node set
  2067. *
  2068. * Returns:
  2069. *
  2070. * VOID
  2071. *
  2072. * Comments:
  2073. *
  2074. * If the packet is received from NT4 node, unpacked ignorescreen
  2075. * will ne always 0.
  2076. *
  2077. ************************************************************************/
  2078. void UnpackIgnoreScreen(rgp_pkt_t* from, cluster_t to)
  2079. {
  2080. #pragma warning( push )
  2081. #pragma warning( disable : 4244 )
  2082. if (from->reason < RGP_EVT_IGNORE_MASK) {
  2083. ClusterInit(to);
  2084. } else {
  2085. to[0] = ((uint16)from->reason) >> 8;
  2086. to[1] = (uint8)from->causingnode;
  2087. }
  2088. #pragma warning( pop )
  2089. }
  2090. /************************************************************************
  2091. * rgp_print_packet
  2092. * =================
  2093. *
  2094. * Description:
  2095. *
  2096. * Put an ignorescreen back into a regroup packet
  2097. *
  2098. * Parameters:
  2099. *
  2100. * rgp_pkt_t* to -
  2101. * packet to be updated
  2102. * cluster_t from -
  2103. * source node set
  2104. *
  2105. * Returns:
  2106. *
  2107. * VOID
  2108. *
  2109. ************************************************************************/
  2110. void PackIgnoreScreen(rgp_pkt_t* to, cluster_t from)
  2111. {
  2112. if ( ClusterEmpty(from) ) {
  2113. to->reason &= 255;
  2114. to->causingnode = 0;
  2115. } else {
  2116. to->reason = (uint8)RGP_EVT_IGNORE_MASK | (from[0] << 8);
  2117. to->causingnode = from[1];
  2118. }
  2119. }
  2120. /*---------------------------------------------------------------------------*/
  2121. #ifdef __cplusplus
  2122. }
  2123. #endif /* __cplusplus */
  2124. #if 0
  2125. History of changes to this file:
  2126. -------------------------------------------------------------------------
  2127. 1995, December 13 F40:KSK0610 /*F40:KSK06102.2*/
  2128. This file is part of the portable Regroup Module used in the NonStop
  2129. Kernel (NSK) and Loosely Coupled UNIX (LCU) operating systems. There
  2130. are 10 files in the module - jrgp.h, jrgpos.h, wrgp.h, wrgpos.h,
  2131. srgpif.c, srgpos.c, srgpsm.c, srgputl.c, srgpcli.c and srgpsvr.c.
  2132. The last two are simulation files to test the Regroup Module on a
  2133. UNIX workstation in user mode with processes simulating processor nodes
  2134. and UDP datagrams used to send unacknowledged datagrams.
  2135. This file was first submitted for release into NSK on 12/13/95.
  2136. ------------------------------------------------------------------------------
  2137. This change occurred on 19 Jan 1996 /*F40:MB06458.1*/
  2138. Changes for phase IV Sierra message system release. Includes: /*F40:MB06458.2*/
  2139. - Some cleanup of the code /*F40:MB06458.3*/
  2140. - Increment KCCB counters to count the number of setup messages and /*F40:MB06458.4*/
  2141. unsequenced messages sent. /*F40:MB06458.5*/
  2142. - Fixed some bugs /*F40:MB06458.6*/
  2143. - Disable interrupts before allocating broadcast sibs. /*F40:MB06458.7*/
  2144. - Change per-packet-timeout to 5ms /*F40:MB06458.8*/
  2145. - Make the regroup and powerfail broadcast use highest priority /*F40:MB06458.9*/
  2146. tnet services queue. /*F40:MB06458.10*/
  2147. - Call the millicode backdoor to get the processor status from SP /*F40:MB06458.11*/
  2148. - Fixed expand bug in msg_listen_ and msg_readctrl_ /*F40:MB06458.12*/
  2149. - Added enhancement to msngr_sendmsg_ so that clients do not need /*F40:MB06458.13*/
  2150. to be unstoppable before calling this routine. /*F40:MB06458.14*/
  2151. - Added new steps in the build file called /*F40:MB06458.15*/
  2152. MSGSYS_C - compiles all the message system C files /*F40:MB06458.16*/
  2153. MSDRIVER - compiles all the MSDriver files /*F40:MB06458.17*/
  2154. REGROUP - compiles all the regroup files /*F40:MB06458.18*/
  2155. - remove #pragma env libspace because we set it as a command line /*F40:MB06458.19*/
  2156. parameter. /*F40:MB06458.20*/
  2157. ----------------------------------------------------------------------- /*F40:MB06458.21*/
  2158. #endif /* 0 - change descriptions */