Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2543 lines
94 KiB

  1. #ifdef __TANDEM
  2. #pragma columns 79
  3. #pragma page "srgpsm.c - T9050 - Regroup Module state machine routines"
  4. #endif
  5. /* @@@ START COPYRIGHT @@@
  6. ** Tandem Confidential: Need to Know only
  7. ** Copyright (c) 1995, Tandem Computers Incorporated
  8. ** Protected as an unpublished work.
  9. ** All Rights Reserved.
  10. **
  11. ** The computer program listings, specifications, and documentation
  12. ** herein are the property of Tandem Computers Incorporated and shall
  13. ** not be reproduced, copied, disclosed, or used in whole or in part
  14. ** for any reason without the prior express written permission of
  15. ** Tandem Computers Incorporated.
  16. **
  17. ** @@@ END COPYRIGHT @@@
  18. **/
  19. /*---------------------------------------------------------------------------
  20. * This file (srgpsm.c) contains regroup state machine routines.
  21. *---------------------------------------------------------------------------*/
  22. #ifdef __cplusplus
  23. extern "C" {
  24. #endif /* __cplusplus */
  25. #include <wrgp.h>
  26. /*---------- arbitration algorithm ------------ */
  27. DWORD MmQuorumArbitrationTimeout = CLUSTER_QUORUM_DEFAULT_ARBITRATION_TIMEOUT; // seconds
  28. DWORD MmQuorumArbitrationEqualizer = 7; // seconds
  29. #define RGP_ARBITRATION_TIMEOUT ((MmQuorumArbitrationTimeout * 100)/30) // tick == 300ms
  30. #define AVERAGE_ARBITRATION_TIME_IN_SECONDS (MmQuorumArbitrationEqualizer)
  31. void enter_first_cleanup_stage();
  32. void regroup_restart();
  33. int ClusterEmpty(cluster_t c);
  34. DWORD
  35. DiskArbitrationThread(
  36. IN LPVOID param
  37. ) ;
  38. _priv _resident static int
  39. regroup_test_arbitrate_advance()
  40. {
  41. cluster_t temp;
  42. int orig_numnodes = ClusterNumMembers(rgp->rgpinfo.cluster);
  43. int current_numnodes = ClusterNumMembers(rgp->rgppkt.pruning_result);
  44. if( orig_numnodes == current_numnodes ) {
  45. return 1;
  46. }
  47. //
  48. // If somebody entered stage4 then our group owns the quorum
  49. //
  50. ClusterIntersection(
  51. temp,
  52. rgp->rgppkt.knownstage4,
  53. rgp->rgppkt.pruning_result
  54. );
  55. return ClusterNumMembers(temp) != 0;
  56. }
  57. _priv _resident static int
  58. regroup_start_arbitrate()
  59. {
  60. int orig_numnodes = ClusterNumMembers(rgp->rgpinfo.cluster);
  61. int current_numnodes = ClusterNumMembers(rgp->rgppkt.pruning_result);
  62. if( orig_numnodes == current_numnodes ) {
  63. enter_first_cleanup_stage();
  64. return 0; // No Arbitration needed. Proceed to clean up stage //
  65. }
  66. else {
  67. cluster_t arbitrators;
  68. int n_arbitrators;
  69. node_t arbitrator;
  70. HANDLE thread;
  71. DWORD threadId;
  72. ULONG epoch;
  73. RGP_LOCK;
  74. epoch = rgp->OS_specific_control.EventEpoch;
  75. if(rgp->arbitration_started) {
  76. RGP_UNLOCK;
  77. return 1; // stay in this stage for awhile
  78. }
  79. rgp->arbitration_ticks = 0;
  80. rgp->arbitration_started = 1;
  81. RGP_UNLOCK;
  82. ClusterIntersection(
  83. arbitrators,
  84. rgp->rgppkt.pruning_result,
  85. rgp->rgppkt.quorumowner
  86. );
  87. n_arbitrators = ClusterNumMembers(arbitrators);
  88. if(n_arbitrators == 0) {
  89. //
  90. // If there are no quorum owners in this group //
  91. // Let's take the guy with the lowest id //
  92. //
  93. arbitrator = rgp_select_tiebreaker(rgp->rgppkt.pruning_result);
  94. } else {
  95. //
  96. // Otherwise we will take the quorum owner guy
  97. // with the lowest id
  98. //
  99. arbitrator = rgp_select_tiebreaker(arbitrators);
  100. if(n_arbitrators > 1) {
  101. RGP_TRACE( "RGP !!! More than one quorum owner",
  102. EXT_NODE(arbitrator), /* TRACE */
  103. GetCluster( rgp->rgpinfo.cluster ), /* TRACE */
  104. GetCluster( rgp->rgppkt.pruning_result ),/* TRACE */
  105. GetCluster( rgp->rgppkt.knownstage2 ) ); /* TRACE */
  106. // Do we need to kill all other arbitrators?
  107. // No.
  108. // ClusterDelete(arbitrators, arbitrator);
  109. // ClusterUnion(
  110. // rgp->poison_targets,
  111. // rgp->poison_targets,
  112. // arbitrators
  113. // );
  114. // rgp_broadcast(RGP_UNACK_POISON);
  115. }
  116. }
  117. rgp->tiebreaker = arbitrator;
  118. //
  119. // Now we have an arbitrating node
  120. // We will run a thread that will run arbitration algorithm
  121. //
  122. RGP_TRACE( "RGP Arbitration Delegated to",
  123. EXT_NODE(arbitrator), /* TRACE */
  124. GetCluster( rgp->rgpinfo.cluster ), /* TRACE */
  125. GetCluster( rgp->rgppkt.pruning_result ), /* TRACE */
  126. GetCluster( rgp->rgppkt.knownstage2 ) ); /* TRACE */
  127. // Fix Bug #460991
  128. // regroup_restart on stage 4 or later will reset ArbitratingNode
  129. // and if all the nodes are present after restart ApproxArbitrationWinner
  130. // will be not set properly. Assign it here.
  131. rgp->OS_specific_control.ApproxArbitrationWinner =
  132. rgp->OS_specific_control.ArbitratingNode = (DWORD)EXT_NODE(arbitrator);
  133. if(arbitrator != rgp->mynode) {
  134. return 1;
  135. }
  136. thread = CreateThread( NULL, // security attributes
  137. 0, // stack_size = default
  138. DiskArbitrationThread,
  139. ULongToPtr(epoch),
  140. 0, // runs immediately
  141. &threadId );
  142. if(thread == NULL) {
  143. //
  144. // Force Others to regroup //
  145. //
  146. RGP_LOCK;
  147. rgp_event_handler( RGP_EVT_BANISH_NODE, EXT_NODE(rgp->mynode) );
  148. RGP_UNLOCK;
  149. //
  150. // Kill this node
  151. //
  152. RGP_ERROR(RGP_ARBITRATION_FAILED);
  153. return FALSE;
  154. }
  155. CloseHandle(thread);
  156. }
  157. return TRUE;
  158. }
  159. DWORD
  160. DiskArbitrationThread(
  161. IN LPVOID param
  162. )
  163. {
  164. cluster_t current_participants;
  165. DWORD status;
  166. int participant_count;
  167. int delay;
  168. ULONG_PTR startingEpoch = (ULONG_PTR) param;
  169. BOOL EpochsEqual;
  170. int orig_numnodes;
  171. int current_numnodes;
  172. LONGLONG Time1, Time2;
  173. ClusterCopy(current_participants, rgp->rgppkt.pruning_result);
  174. orig_numnodes = ClusterNumMembers(rgp->rgpinfo.cluster);
  175. current_numnodes = ClusterNumMembers(current_participants);
  176. RGP_LOCK;
  177. EpochsEqual = ( startingEpoch == rgp->OS_specific_control.EventEpoch );
  178. RGP_UNLOCK;
  179. if(!EpochsEqual)
  180. return 0;
  181. delay = (orig_numnodes+1)/2 - current_numnodes;
  182. if(delay < 0) delay = 0;
  183. Sleep(delay * 6000);
  184. RGP_LOCK;
  185. EpochsEqual = ( startingEpoch == rgp->OS_specific_control.EventEpoch );
  186. if (EpochsEqual) {
  187. rgp->OS_specific_control.ArbitrationInProgress += 1;
  188. }
  189. RGP_UNLOCK;
  190. if(!EpochsEqual)
  191. return 0;
  192. GetSystemTimeAsFileTime((LPFILETIME)&Time1);
  193. status = (*(rgp->OS_specific_control.QuorumCallback))();
  194. GetSystemTimeAsFileTime((LPFILETIME)&Time2);
  195. if (status != 0
  196. && startingEpoch == rgp->OS_specific_control.EventEpoch)
  197. {
  198. // If we won the arbitration and we are in the same epoch (approx check)
  199. // we need to figure out whether we need to slow down a little
  200. Time2 -= Time1;
  201. // Convert to seconds
  202. Time2 = Time2 / 10 / 1000 / 1000;
  203. //
  204. // [HACKHACK] GorN Oct/30/1999
  205. // We had a weird timejump in the middle of the arbitration
  206. // Arbitration was completed before it started, we slept for
  207. // too long and regroup timed us out. Let's guard against it.
  208. //
  209. if ( (Time2 >= 0)
  210. && (Time2 < AVERAGE_ARBITRATION_TIME_IN_SECONDS) )
  211. {
  212. //
  213. // Don't need to be better than the average
  214. // If we are so fast, let's slow down
  215. //
  216. Time2 = AVERAGE_ARBITRATION_TIME_IN_SECONDS - Time2;
  217. RGP_TRACE( "RGP sleeping",
  218. (ULONG)Time2, /* TRACE */
  219. 0, /* TRACE */
  220. 0, /* TRACE */
  221. 0 ); /* TRACE */
  222. Sleep( (ULONG)(Time2 * 1000) );
  223. }
  224. }
  225. RGP_LOCK;
  226. rgp->OS_specific_control.ArbitrationInProgress -= 1;
  227. EpochsEqual = ( startingEpoch == rgp->OS_specific_control.EventEpoch );
  228. if(!EpochsEqual) {
  229. RGP_UNLOCK;
  230. return 0;
  231. }
  232. if(status) {
  233. //
  234. // We own the quorum device
  235. // Let's proceed to the next stage
  236. //
  237. enter_first_cleanup_stage();
  238. RGP_UNLOCK;
  239. //
  240. // All the rest will see that we are in cleanup stage and
  241. // will proceed to it too
  242. //
  243. } else {
  244. //
  245. // Force Others to regroup //
  246. //
  247. rgp_event_handler( RGP_EVT_BANISH_NODE, EXT_NODE(rgp->mynode) );
  248. RGP_UNLOCK;
  249. //
  250. // Kill this node
  251. //
  252. RGP_ERROR(RGP_ARBITRATION_FAILED);
  253. }
  254. return 0;
  255. }
  256. /************************************************************************
  257. * rgp_check_packet
  258. * rgp_print_packet
  259. * =================
  260. *
  261. * Description:
  262. *
  263. * Forward declarations of functions used in rgp_sanity_check macro
  264. *
  265. ************************************************************************/
  266. void rgp_print_packet(rgp_pkt_t* pkt, char* label, int code);
  267. int rgp_check_packet(rgp_pkt_t* pkt);
  268. /************************************************************************
  269. * rgp_sanity_check
  270. * =================
  271. *
  272. * Description:
  273. *
  274. * This macro prints RGP packet if it has unreasonable values in
  275. * powerfail, knownstages, pruning_result, and connectivity_matrix fields.
  276. *
  277. * Parameters:
  278. *
  279. * rgp_pkt_t* pkt -
  280. * packet to be checked
  281. * char* label -
  282. * label that will be printed together with a packet
  283. *
  284. * Returns:
  285. *
  286. * VOID
  287. *
  288. ************************************************************************/
  289. #define rgp_sanity_check(__pkt,__label) \
  290. do { \
  291. int __code; __code = rgp_check_packet(__pkt); \
  292. if( __code ) {rgp_print_packet(__pkt, __label, __code);} \
  293. } while ( 0 )
  294. /*---------------------------------------------------------------------------*/
  295. /************************************************************************
  296. * split_brain_avoidance_algorithm
  297. * ===============================
  298. *
  299. * Description:
  300. *
  301. * This algorithm ensures that, after a regroup incident completes,
  302. * at most one group of nodes will survive regardless of connectivity
  303. * failures.
  304. *
  305. * Parameters:
  306. *
  307. * None
  308. *
  309. * Returns:
  310. *
  311. * void - no return value; The algorithm results in either this node
  312. * halting (with the RGP_AVOID_SPLIT_BRAIN halt code) or this group
  313. * being the only group that survives.
  314. *
  315. * Algorithm:
  316. *
  317. * The algorithm is described in detail in the Sierra Tech Memo S.84,
  318. * "Modifications in Regroup Algorithm for Sierra".
  319. *
  320. * The algorithm looks at the set of nodes currently visible from the
  321. * local cluster and compares it to the set of nodes alive before
  322. * the regroup incident started (outerscreen). The decision to survive
  323. * or halt depends on the number of nodes in the current group compared
  324. * to the number of nodes in the original group.
  325. *
  326. * Case 1:
  327. * If the current group contains > half the original number, this
  328. * group survives.
  329. *
  330. * Case 2:
  331. * If the current group contains < half the original number, this
  332. * node (and group) halts.
  333. *
  334. * Case 3:
  335. * If the current group contains exactly half the original number AND
  336. * the current group has at least two members, then this group
  337. * survives if and only if it contains the tie-breaker node (selected
  338. * when the cluster is formed and after each regroup incident).
  339. *
  340. * Case 4:
  341. * If the current group contains exactly half the original number AND
  342. * the current group has exactly one member, then we will call the
  343. * QuromSelect procedure to check if the Quorum Disk is accessible
  344. * from this node. If the procedure returns value TRUE we survive;
  345. * else we halt.
  346. *
  347. *
  348. ************************************************************************/
  349. _priv _resident static void
  350. split_brain_avoidance_algorithm()
  351. {
  352. int orig_numnodes, current_numnodes;
  353. RGP_TRACE( "RGP SpltBrainAlg",
  354. EXT_NODE(rgp->tiebreaker), /* TRACE */
  355. GetCluster( rgp->rgpinfo.cluster ), /* TRACE */
  356. GetCluster( rgp->outerscreen ), /* TRACE */
  357. GetCluster( rgp->rgppkt.knownstage2 ) ); /* TRACE */
  358. /* Sanity checks:
  359. * 1. The current set of nodes must be a subset of the original set
  360. * of nodes.
  361. * 2. My node must be in the current set. This was checked
  362. * when stage2 was entered. No need to check again.
  363. */
  364. if (!ClusterSubsetOf(rgp->rgpinfo.cluster, rgp->rgppkt.knownstage2))
  365. RGP_ERROR(RGP_INTERNAL_ERROR);
  366. orig_numnodes = ClusterNumMembers(rgp->rgpinfo.cluster);
  367. current_numnodes = ClusterNumMembers(rgp->rgppkt.knownstage2);
  368. if (orig_numnodes == current_numnodes)
  369. /* All nodes are alive. No split brain possibility. */
  370. return;
  371. else if (orig_numnodes == 2) /* Special 2-node case */
  372. {
  373. if ((*(rgp->OS_specific_control.QuorumCallback))())
  374. return; /* we have access to Quorum disk. We survive. */
  375. else {
  376. #if defined( NT )
  377. ClusnetHalt( NmClusnetHandle );
  378. #endif
  379. RGP_ERROR(RGP_AVOID_SPLIT_BRAIN);
  380. }
  381. } /* Special 2-node case */
  382. else /* Multi (>2) node case */
  383. {
  384. if ((current_numnodes << 1) > orig_numnodes)
  385. /* Our group has more than half the nodes => we are the majority.
  386. * We can survive. Other group(s) will kill themselves.
  387. */
  388. return;
  389. else if ((current_numnodes << 1) < orig_numnodes)
  390. /* Our group has less than half the nodes => there may be a
  391. * larger group alive. We must halt and allow that group to
  392. * survive.
  393. */
  394. RGP_ERROR(RGP_AVOID_SPLIT_BRAIN);
  395. else
  396. {
  397. /* Our group has exactly half the number of processors;
  398. * We survive if we contain the tie-breaker node and halt otherwise.
  399. */
  400. if (ClusterMember(rgp->rgppkt.knownstage2, rgp->tiebreaker))
  401. return;
  402. else
  403. RGP_ERROR(RGP_AVOID_SPLIT_BRAIN);
  404. }
  405. } /* Multi (>2) node case */
  406. }
  407. /************************************************************************
  408. * regroup_restart
  409. * ===============
  410. *
  411. * Description:
  412. *
  413. * Starts a new regroup incident.
  414. *
  415. * Parameters:
  416. *
  417. * None
  418. *
  419. * Returns:
  420. *
  421. * void - no return value
  422. *
  423. * Algorithm:
  424. *
  425. * Sets the regroup state to RGP_ACTIVATED, pauses all IO and
  426. * initializes the stage masks and connectivity matrix.
  427. *
  428. ************************************************************************/
  429. _priv _resident static void
  430. regroup_restart()
  431. {
  432. cluster_t old_ignorescreen;
  433. UnpackIgnoreScreen(&rgp->rgppkt, old_ignorescreen);
  434. RGP_TRACE( "RGP (re)starting",
  435. rgp->rgppkt.seqno, /* TRACE */
  436. rgp->rgppkt.reason, /* TRACE */
  437. rgp->rgppkt.activatingnode, /* TRACE */
  438. rgp->rgppkt.causingnode ); /* TRACE */
  439. RGP_TRACE( "RGP masks ",
  440. RGP_MERGE_TO_32( rgp->outerscreen, /* TRACE */
  441. rgp->innerscreen ), /* TRACE */
  442. RGP_MERGE_TO_32( rgp->rgppkt.knownstage1, /* TRACE */
  443. rgp->rgppkt.knownstage2 ), /* TRACE */
  444. RGP_MERGE_TO_32( rgp->rgppkt.knownstage3, /* TRACE */
  445. rgp->rgppkt.knownstage4 ), /* TRACE */
  446. RGP_MERGE_TO_32( rgp->rgppkt.knownstage5, /* TRACE */
  447. rgp->rgppkt.pruning_result ) ); /* TRACE */
  448. /* We are about to start a new pass of the regroup algorithm.
  449. * This does not necessarily mean we have finished the previous
  450. * pass; i.e., in an abort situation we may be starting over.
  451. * This may occur when some other node fails during the current
  452. * pass through the algorithm leaving us hung up at one of the
  453. * intermediate stages.
  454. */
  455. //
  456. // GN. When we do MM_LEAVE. Our state is COLDLOADED.
  457. // Bailing out of regroup_restart here would prevent us from
  458. // forming a regroup packet that would initate a banishing regroup incident
  459. //
  460. /* To avoid split brained nodes from corrupting data in storage
  461. * devices, we request the transport subsystem to hold all IO requests
  462. * in a queue and not transfer them over SNet. We will allow IO to
  463. * be resumed when regroup can guarantee that there can no longer be
  464. * split brains. This will be done when the final group is determined
  465. * and regroup enters the RGP_PHASE1_CLEANUP stage.
  466. */
  467. rgp_hold_all_io();
  468. /* The following is a bit of history from the NSK regroup algorithm from
  469. * pre-Sierra systems based on the InterProcessor Bus (IPB). Some of
  470. * the particulars mentioned here have changed, but the principle remains.
  471. *
  472. * Previously, we used to mark all the known stages as zero, except for
  473. * stage1. We used to mark only ourselves as in stage1. So, even if our
  474. * bus reception logic is screwed up, and we are not receiving packets
  475. * from anybody including ourselves, we would mark ourselves as being in
  476. * stage1. And after (what used to be) six ticks, we would proceed into
  477. * stage2 and mark ourselves as being in stage2. This would cause stage1
  478. * and stage2 to be equal, and our world would constitute just
  479. * ourselves. Thus we would go through regroup eliminating everybody
  480. * else. However, since we are not receiving packets from anybody else,
  481. * we would miss our own iamalive packets, and we too will soon die of
  482. * %4032. Thus the symptoms would constitute everybody else dying of
  483. * (%4040 + some node number), and that node dying with a %4032 halt.
  484. * See TPR S 88070112309628 for more details.
  485. *
  486. * To avoid this situation, we now do not mark ourselves as in a
  487. * particular stage until we get our own regroup packets indicating we
  488. * are in that stage. Thus, in regroup_restart, all the stages are
  489. * cleared. Previously, regroupbroadcaststatus in sendqueuedmessages
  490. * used to send directly from the regroup_control structures.
  491. * regroupbroadcaststatus has been modified to construct the unsequenced
  492. * packets on its stack. It would first copy the state from the
  493. * regroup_control structure, and then would LOR in our node into a known
  494. * stage, if requested to do so. When we receive that packet, we would
  495. * merge that information into our state, and thus we would be
  496. * guaranteed that our bus sending and reception logic is working, and
  497. * that we can legitimately mark ourselves as being in that stage. This
  498. * whole change avoids problems where bus sending logic works, but bus
  499. * reception logic is screwed up for both buses in a node.
  500. */
  501. rgp->sendstage = 0; /* Don't let anyone know I am in stage 1 until
  502. * I have seen a regroup clock tick; this is to
  503. * cause this node to halt if it is not getting
  504. * clock ticks. I will halt when the other nodes
  505. * advance without me and send me a status packet
  506. * indicating this or send me a poison packet
  507. * after declaring me down.
  508. */
  509. rgp->rgpcounter = 0;
  510. ClusterInit(rgp->rgppkt.knownstage1);
  511. ClusterInit(rgp->rgppkt.knownstage2);
  512. ClusterInit(rgp->rgppkt.knownstage3);
  513. ClusterInit(rgp->rgppkt.knownstage4);
  514. ClusterInit(rgp->rgppkt.knownstage5);
  515. ClusterInit(rgp->rgppkt.pruning_result);
  516. MatrixInit(rgp->rgppkt.connectivity_matrix);
  517. MatrixInit(rgp->internal_connectivity_matrix);
  518. /* Just for ease of debugging, to send in our poison packets, we keep
  519. * the known nodes mask at the start of regroup. poison packets contain
  520. * known nodes at the beginning of regroup and at the end of it.
  521. */
  522. ClusterCopy(rgp->initnodes, rgp->rgpinfo.cluster);
  523. ClusterInit(rgp->endnodes);
  524. #if defined( NT )
  525. //
  526. // increment the event epoch so we can detect stale events
  527. // from clusnet
  528. //
  529. ++rgp->OS_specific_control.EventEpoch;
  530. #endif
  531. if ( (rgp->rgppkt.stage >= RGP_CLOSING) &&
  532. (rgp->rgppkt.stage <= RGP_PHASE2_CLEANUP) &&
  533. ClusterCompare(rgp->rgppkt.knownstage1,
  534. rgp->rgppkt.knownstage2) )
  535. {
  536. //
  537. // If we were interrupted by this restart after we closed
  538. // 1st stage regroup window, then no nodes can be added to group w/o joining.
  539. //
  540. // Thus we will add missing nodes into our ignorescreen.
  541. // This will force the regroup not to wait for them in stage1
  542. cluster_t tmp;
  543. ClusterDifference(tmp, rgp->rgpinfo.cluster, rgp->innerscreen);
  544. ClusterUnion(rgp->ignorescreen, rgp->ignorescreen, tmp);
  545. }
  546. if ( ClusterMember(rgp->ignorescreen, rgp->mynode) ) {
  547. // We shouldn't have get here, but since we are here
  548. // Let's shield us from the outside world
  549. RGP_TRACE( "Self Isolation", 0, 0, 0, 0 );
  550. ClusterCopy(rgp->ignorescreen, rgp->rgpinfo.cluster);
  551. ClusterDelete(rgp->ignorescreen, rgp->mynode);
  552. }
  553. if ( !ClusterEmpty(rgp->ignorescreen) ) {
  554. // if we are ignoring somebody we have
  555. // to be cautious. I.e. we will stay longer in the
  556. // first stage to give a chance to everybody to learn about
  557. // our ignorescreen
  558. rgp->cautiousmode = 1;
  559. }
  560. if ( !ClusterCompare(old_ignorescreen, rgp->ignorescreen) ) {
  561. // Ignore screen is changed, reset restart counter //
  562. RGP_TRACE( "Ignorescreen->", GetCluster(old_ignorescreen), GetCluster(rgp->ignorescreen), 0, 0 );
  563. rgp->restartcount = 0;
  564. }
  565. PackIgnoreScreen(&rgp->rgppkt, rgp->ignorescreen);
  566. rgp->arbitration_started = 0;
  567. rgp->OS_specific_control.ArbitratingNode = MM_INVALID_NODE;
  568. if ( !rgp_is_perturbed() ) {
  569. ResetEvent( rgp->OS_specific_control.Stabilized );
  570. }
  571. ClusterInit(rgp->rgppkt.quorumowner);
  572. if( QuorumOwner == (DWORD)EXT_NODE(rgp->mynode) ) {
  573. ClusterInsert(rgp->rgppkt.quorumowner, rgp->mynode);
  574. }
  575. if (rgp->rgppkt.stage == RGP_COLDLOADED)
  576. {
  577. if (!rgp->OS_specific_control.ShuttingDown) {
  578. //
  579. // Currently, RGP_RELOADFAILED calls ExitProcess
  580. // During clean shutdown we would like to send the regroup packet
  581. // out triggering a regroup. So we don't want to die.
  582. //
  583. // Since we are not resetting state to RGP_ACTIVATED, this
  584. // node will not be able to participate in the regroup.
  585. //
  586. RGP_ERROR(RGP_RELOADFAILED);
  587. }
  588. } else {
  589. rgp->rgppkt.stage = RGP_ACTIVATED;
  590. }
  591. }
  592. /************************************************************************
  593. * regroup_test_stage2_advance
  594. * ===========================
  595. *
  596. * Description:
  597. *
  598. * Checks to see if we can advance to regroup stage 2.
  599. *
  600. * Parameters:
  601. *
  602. * None
  603. *
  604. * Returns:
  605. *
  606. * int - 1 if stage 2 can be entered and 0 if not.
  607. *
  608. * Algorithm:
  609. *
  610. * Stage 2 can be entered if one of the following conditions is true.
  611. *
  612. * (a) all nodes are present and accounted for and at least one
  613. * regroup clock tick has occurred
  614. * (b) we are not in cautious mode, all but one node are present
  615. * and accounted for, AND a minimum number of ticks
  616. * (rgp_quickdecisionlegit) have elapsed.
  617. * (c) if RGP_MUST_ENTER_STAGE2 ticks have elapsed.
  618. *
  619. ************************************************************************/
  620. _priv _resident static int
  621. regroup_test_stage2_advance()
  622. {
  623. cluster_t stragglers; /* set of nodes not yet checkd in */
  624. int num_stragglers; /* # of nodes not yet checkd in */
  625. /* Stage 2 must be entered after some interval regardless of any
  626. * other conditions.
  627. */
  628. if (rgp->rgpcounter == 0)
  629. return(0);
  630. if (rgp->rgpcounter >= RGP_MUST_ENTER_STAGE2)
  631. {
  632. RGP_TRACE( "RGP S->2cautious",
  633. rgp->rgpcounter, /* TRACE */
  634. rgp->cautiousmode, /* TRACE */
  635. GetCluster( rgp->outerscreen ), /* TRACE */
  636. GetCluster( rgp->rgppkt.knownstage1 ) ); /* TRACE */
  637. return(1);
  638. }
  639. /* The number of ticks is between 1 and RGP_MUST_ENTER_STAGE2.
  640. * We need to examine the stage1 mask to decide if we can
  641. * advance.
  642. *
  643. * If every node in the old configuration has checked in, I can
  644. * advance at once. This is either a false alarm or caused by
  645. * power failure or connectivity failures.
  646. */
  647. /* Compute the set of nodes from the original configuration not yet
  648. * recognized.
  649. */
  650. ClusterDifference(stragglers, rgp->outerscreen,
  651. rgp->rgppkt.knownstage1);
  652. //
  653. // We shouldn't wait for the nodes we are ignoring,
  654. // since we cannot get a packet from them anyway
  655. //
  656. ClusterDifference(stragglers, stragglers,
  657. rgp->ignorescreen);
  658. if ((num_stragglers = ClusterNumMembers(stragglers)) == 0)
  659. {
  660. RGP_TRACE( "RGP S->2 all in ",
  661. rgp->rgpcounter, /* TRACE */
  662. GetCluster( rgp->outerscreen ), 0, 0 ); /* TRACE */
  663. return(1); /* all present and accounted for */
  664. }
  665. /* If stragglers is non-empty, perhaps I can still advance to stage 2
  666. * if I am not in cautious mode (no recent power fail and not
  667. * aborting and rerunning the regroup algorithm) AND all nodes but
  668. * one have checked in AND some minimum number of ticks have elapsed.
  669. *
  670. * The minimum number of ticks is selected to be 1 greater than the
  671. * the LATEPOLL inititiation period (allowed consecutive missed IamAlive time)
  672. * since that should guarantee that, if the
  673. * cluster has broken off into multiple disconnected clusters,
  674. * the other clusters would have detected the missing IamAlives,
  675. * started regroup and paused IO, thus preventing the possibility
  676. * of data corruption caused by a split brain situation.
  677. */
  678. if (!(rgp->cautiousmode) &&
  679. (num_stragglers == 1) &&
  680. (rgp->rgpcounter > rgp->rgpinfo.Min_Stage1_ticks))
  681. {
  682. RGP_TRACE( "RGP S->2 1 miss ",
  683. rgp->rgpcounter, /* TRACE */
  684. GetCluster( rgp->outerscreen ), /* TRACE */
  685. GetCluster( rgp->rgppkt.knownstage1 ), 0 ); /* TRACE */
  686. return(1); /* advance - all but one checked in */
  687. }
  688. return(0); /* sorry cannot advance yet */
  689. }
  690. /************************************************************************
  691. * regroup_stage3_advance
  692. * ===========================
  693. *
  694. * Description:
  695. *
  696. * This function is called after the split brain avoidance algorithm
  697. * is run and the tie-breaker is selected in stage 2. It checks if
  698. * we can proceed to stage 3 (RGP_PRUNING) and advances to stage 3
  699. * if possible.
  700. *
  701. * Parameters:
  702. *
  703. * None
  704. *
  705. * Returns:
  706. *
  707. * int - 1 if the regroup stage has been advanced to RGP_PRUNING;
  708. * 0 if the stage cannot be advanced yet.
  709. *
  710. * Algorithm:
  711. *
  712. * The algorithm depends on whether we are the tie-breaker or not.
  713. *
  714. * On the tie-breaker node, we first check if there are any
  715. * disconnects in the cluster. If there aren't any, there is no need
  716. * for pruning. We can then set pruning_result to knownstage2,
  717. * advance to the RGP_PRUNING stage and return 1. If there are
  718. * disconnects, we must wait a certain number of ticks to collect
  719. * connectivity info from all nodes. If the number of ticks have not
  720. * passed, return 0. If the required number of ticks have elapsed,
  721. * we must call the pruning algorithm to get the list of potential
  722. * groups. After that, the select_cluster() routine is called to
  723. * pick one from the set of possible clusters. After this is done,
  724. * pruning_result is set to the selected cluster and we return 1.
  725. *
  726. * On a non-tiebreaker node, nothing is done till a stage3 packet is
  727. * received from the tie-breaker node or another node which got a
  728. * stage 3 packet. If a stage 3 packet has not been received, we
  729. * simply return 0. If a stage 3 packet is received, RGP_PRUNING
  730. * stage is entered and we return 1.
  731. *
  732. ************************************************************************/
  733. _priv _resident int
  734. regroup_stage3_advance()
  735. {
  736. int stage_advanced = 0, numgroups, groupnum;
  737. if (rgp->tiebreaker == rgp->mynode)
  738. {
  739. if (connectivity_complete(rgp->rgppkt.connectivity_matrix))
  740. {
  741. /* No disconnects. All nodes in knownstage2 survive. */
  742. rgp->rgppkt.stage = RGP_PRUNING;
  743. ClusterCopy(rgp->rgppkt.pruning_result,
  744. rgp->rgppkt.knownstage2);
  745. stage_advanced = 1;
  746. RGP_TRACE( "RGP S->3 NoPrune", rgp->rgpcounter, 0, 0, 0 );
  747. }
  748. /* There are disconnects; must wait for connectivity
  749. * information to be complete. The info is deemed
  750. * complete after a fixed number of ticks have
  751. * elapsed.
  752. */
  753. else if (rgp->pruning_ticks >= RGP_CONNECTIVITY_TICKS)
  754. { /* connectivity info collection complete; enter stage 3 */
  755. RGP_TRACE( "RGP Con. matrix1",
  756. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[0], /*TRACE*/
  757. rgp->rgppkt.connectivity_matrix[1] ), /*TRACE*/
  758. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[2], /*TRACE*/
  759. rgp->rgppkt.connectivity_matrix[3] ), /*TRACE*/
  760. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[4], /*TRACE*/
  761. rgp->rgppkt.connectivity_matrix[5] ), /*TRACE*/
  762. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[6], /*TRACE*/
  763. rgp->rgppkt.connectivity_matrix[7])); /*TRACE*/
  764. RGP_TRACE( "RGP Con. matrix2",
  765. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[8], /*TRACE*/
  766. rgp->rgppkt.connectivity_matrix[9] ), /*TRACE*/
  767. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[10], /*TRACE*/
  768. rgp->rgppkt.connectivity_matrix[11]), /*TRACE*/
  769. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[12], /*TRACE*/
  770. rgp->rgppkt.connectivity_matrix[13]), /*TRACE*/
  771. RGP_MERGE_TO_32( rgp->rgppkt.connectivity_matrix[14], /*TRACE*/
  772. rgp->rgppkt.connectivity_matrix[15]));/*TRACE*/
  773. numgroups = find_all_fully_connected_groups(
  774. rgp->rgppkt.connectivity_matrix,
  775. rgp->tiebreaker,
  776. rgp->potential_groups);
  777. if ((void *)rgp->select_cluster == RGP_NULL_PTR)
  778. {
  779. node_t keynode;
  780. cluster_t temp;
  781. ClusterIntersection(
  782. temp,
  783. rgp->rgppkt.knownstage2,
  784. rgp->rgppkt.quorumowner
  785. );
  786. if ( ClusterEmpty(temp) ) {
  787. keynode = RGP_NULL_NODE;
  788. } else {
  789. keynode = rgp_select_tiebreaker(temp);
  790. }
  791. RGP_TRACE( "RGP keynode ng ", keynode, numgroups, 0, 0); /*TRACE*/
  792. /* No callback specified; use regroup's own routine. */
  793. groupnum = rgp_select_cluster_ex(
  794. rgp->potential_groups, numgroups, keynode);
  795. }
  796. else
  797. {
  798. /* Call routine specified at rgp_start() time. */
  799. groupnum = (*(rgp->select_cluster))(
  800. rgp->potential_groups, numgroups);
  801. }
  802. if (groupnum >= 0)
  803. ClusterCopy(rgp->rgppkt.pruning_result,
  804. rgp->potential_groups[groupnum]);
  805. else
  806. /* No group can survive. Can't halt yet.
  807. * Need to tell everyone else.
  808. */
  809. ClusterInit(rgp->rgppkt.pruning_result);
  810. rgp->rgppkt.stage = RGP_PRUNING;
  811. stage_advanced = 1;
  812. RGP_TRACE( "RGP S->3 Pruned ",
  813. rgp->rgpcounter, /* TRACE */
  814. GetCluster( rgp->rgppkt.knownstage2 ), /* TRACE */
  815. GetCluster( rgp->rgppkt.pruning_result ), /* TRACE */
  816. numgroups ); /* TRACE */
  817. } /* connectivity info collection complete; enter stage 3 */
  818. } /* tie-breaker node */
  819. else
  820. { /* not tie-breaker node */
  821. if (ClusterNumMembers(rgp->rgppkt.knownstage3) != 0)
  822. {
  823. /* We got a stage 3 packet from someone. Enter stage 3. */
  824. rgp->rgppkt.stage = RGP_PRUNING;
  825. stage_advanced = 1;
  826. RGP_TRACE( "RGP Got S3 pkt ",
  827. rgp->rgpcounter, /* TRACE */
  828. GetCluster( rgp->rgppkt.knownstage2 ), /* TRACE */
  829. GetCluster( rgp->rgppkt.pruning_result ), /* TRACE */
  830. GetCluster( rgp->rgppkt.knownstage3 ) ); /* TRACE */
  831. }
  832. } /* not tie-breaker node */
  833. return(stage_advanced);
  834. }
  835. /************************************************************************
  836. * enter_first_cleanup_stage
  837. * =========================
  838. *
  839. * Description:
  840. *
  841. * This function performs the actions required when entering the
  842. * first of the message clean up stages.
  843. *
  844. * Parameters:
  845. *
  846. * None
  847. *
  848. * Returns:
  849. *
  850. * void - no return value
  851. *
  852. * Algorithm:
  853. *
  854. * There are many actions to be performed after the final cluster
  855. * is selected. The actions are described in comments throughout
  856. * this routine.
  857. *
  858. ************************************************************************/
  859. _priv _resident void
  860. enter_first_cleanup_stage()
  861. {
  862. cluster_t banishees;
  863. node_t failer;
  864. rgp->rgppkt.stage = RGP_PHASE1_CLEANUP;
  865. RGP_TRACE( "RGP S->4 ", rgp->rgpcounter, 0, 0, 0 );
  866. /* The packets we send now will not indicate we are in the phase 1
  867. * cleanup stage yet. We indicate we are in this stage only after
  868. * we have completed the clean up action associated with the stage.
  869. * This is done in rgp_event_handler, under the
  870. * RGP_EVT_PHASE1_CLEANUP_DONE event.
  871. */
  872. rgp->sendstage = 0;
  873. /* Now, we can resume IO since we have passed the split brain danger.
  874. * New split brain situations will result in regroup restarting and
  875. * pausing IO again.
  876. */
  877. rgp_resume_all_io();
  878. /* Compute in banishees the set of nodes being lost from the old
  879. * configuration.
  880. */
  881. ClusterDifference(banishees, rgp->rgpinfo.cluster,
  882. rgp->rgppkt.pruning_result);
  883. /* Install the new configuration into the masks. */
  884. ClusterCopy(rgp->outerscreen, rgp->rgppkt.pruning_result);
  885. #if defined( NT )
  886. ClusnetSetOuterscreen(
  887. NmClusnetHandle,
  888. (ULONG)*((PUSHORT)rgp->outerscreen)
  889. );
  890. #endif
  891. ClusterCopy(rgp->innerscreen, rgp->rgppkt.pruning_result);
  892. ClusterCopy(rgp->endnodes, rgp->rgppkt.pruning_result);
  893. ClusterCopy(rgp->rgpinfo.cluster, rgp->rgppkt.pruning_result);
  894. /* Select a new tiebreaker because the previous one may have been */
  895. /* pruned out. Note: tiebreaker_selected has already been set in S2. */
  896. rgp->tiebreaker =
  897. rgp_select_tiebreaker(rgp->rgppkt.pruning_result);
  898. /* F40 Bug FixID KCY0833 */
  899. /* Mark the state of the banishees as dead and invoke the
  900. * node down callback routine.
  901. */
  902. for (failer = 0; failer < (node_t) rgp->num_nodes; failer++)
  903. if (ClusterMember(banishees, failer)
  904. || rgp->node_states[failer].status == RGP_NODE_COMING_UP // fix bug#265069
  905. )
  906. {
  907. rgp->node_states[failer].status = RGP_NODE_DEAD;
  908. rgp->node_states[failer].pollstate = AWAITING_IAMALIVE;
  909. rgp->node_states[failer].lostHBs = 0;
  910. #if !defined(NT)
  911. (*(rgp->nodedown_callback))(EXT_NODE(failer));
  912. #else
  913. ClusnetSetNodeMembershipState(NmClusnetHandle,
  914. EXT_NODE( failer ),
  915. ClusnetNodeStateDead);
  916. //
  917. // On NT we do the nodedown callback at the end of stage 5.
  918. // This allows the cleanup phases to complete before we let
  919. // the "upper" layers know that a node went down.
  920. //
  921. if ( ClusterMember(rgp->OS_specific_control.CPUUPMASK,failer) )
  922. ClusterInsert(
  923. rgp->OS_specific_control.NeedsNodeDownCallback,
  924. failer
  925. );
  926. #endif // !defined(NT)
  927. }
  928. /* If some nodes have been lost from the configuration, then I will
  929. * queue regroup status packets to them. This is a best efforts
  930. * attempt to ensure that they get quickly taken out if they
  931. * do in fact continue to run.
  932. */
  933. ClusterUnion(rgp->status_targets, banishees, rgp->status_targets);
  934. //
  935. // In NT, we are using rgp->rgppkt.hadpowerfail to transmit
  936. // quorum ownership information
  937. //
  938. #if !defined(NT)
  939. /* I should inform the message system of any node that experienced a
  940. * power on recovery. The message system can use this to clear error
  941. * counters so that a link will not be declared down due to errors
  942. * which may have been caused by the power failure.
  943. */
  944. for (failer = 0; failer < (node_t) rgp->num_nodes; failer++)
  945. if ((ClusterMember(rgp->rgppkt.hadpowerfail, failer)) &&
  946. !(ClusterMember(banishees, failer)))
  947. /* This survivor had a power failure. */
  948. rgp_had_power_failure( EXT_NODE(failer) );
  949. #endif // NT
  950. /* Tell the OS to start clean up operations for the failed nodes. */
  951. rgp_start_phase1_cleanup();
  952. }
  953. /************************************************************************
  954. * evaluatestageadvance
  955. * ====================
  956. *
  957. * Description:
  958. *
  959. * This function evaluates whether additional state transitions are
  960. * possible as a result of the info just received.
  961. *
  962. * Parameters:
  963. *
  964. * None
  965. *
  966. * Returns:
  967. *
  968. * void - no return value
  969. *
  970. * Algorithm:
  971. *
  972. * To evaluate whether we can advance through the stages, a loop is
  973. * used with a case entry for each stage. If an entry decides not to
  974. * advance to the next stage, it must return from the function. If
  975. * it does advance, it should not return but remain in the loop
  976. * since it is possible to have cascaded stage transitions
  977. * especially in a two node system. Thus, the loop is exited when no
  978. * more stage transitions are possible.
  979. *
  980. ************************************************************************/
  981. _priv _resident static void
  982. evaluatestageadvance()
  983. {
  984. cluster_t temp_cluster;
  985. node_t node;
  986. node_t i;
  987. for (;;) /* loop until someone exits by returning */
  988. {
  989. switch (rgp->rgppkt.stage)
  990. {
  991. case RGP_COLDLOADED :
  992. {
  993. if (!rgp->OS_specific_control.ShuttingDown) {
  994. RGP_ERROR(RGP_RELOADFAILED);
  995. }
  996. return;
  997. }
  998. case RGP_ACTIVATED :
  999. { /* evaluate whether to go to stage RGP_CLOSING */
  1000. if (!regroup_test_stage2_advance())
  1001. return;
  1002. if (!ClusterMember(rgp->rgppkt.knownstage1, rgp->mynode))
  1003. RGP_ERROR(RGP_MISSED_POLL_TO_SELF);
  1004. rgp->rgppkt.stage = RGP_CLOSING;
  1005. rgp->rgpcounter = 0;
  1006. rgp->tiebreaker_selected = 0;
  1007. /* If we abort the regroup, and there's somebody that everybody
  1008. * banished on this regroup, the following line keeps him from
  1009. * joining up on the next regroup.
  1010. */
  1011. ClusterCopy(rgp->innerscreen, rgp->rgppkt.knownstage1);
  1012. break;
  1013. } /* evaluate whether to go to stage RGP_CLOSING */
  1014. case RGP_CLOSING :
  1015. { /* evaluate whether to go to stage RGP_PRUNING */
  1016. if (rgp->tiebreaker_selected)
  1017. {
  1018. if (regroup_stage3_advance())
  1019. break; /* try to advance further */
  1020. else
  1021. return; /* cannot advance any more */
  1022. }
  1023. if (!ClusterCompare(rgp->rgppkt.knownstage1,
  1024. rgp->rgppkt.knownstage2))
  1025. return;
  1026. //
  1027. // In NT, we no longer use the split-brain avoidance algorithm.
  1028. // We use a cluster-wide arbitration algorithm instead.
  1029. //
  1030. #if !defined(NT)
  1031. /* When the known stage 1 and known stage 2 sets are the
  1032. * same, we have the complete set of nodes that are
  1033. * connected to us. It is time to execute the split-
  1034. * brain avoidance algorithm. If we are a splinter group
  1035. * cut off from the main group, we will not survive this
  1036. * algorithm.
  1037. */
  1038. split_brain_avoidance_algorithm();
  1039. #endif // NT
  1040. /* We are the lucky survivors of the split brain avoidance
  1041. * algorithm. Now, we must proceed to elect a new tie-breaker
  1042. * since the current tie-breaker may no longer be with us.
  1043. */
  1044. rgp->tiebreaker =
  1045. rgp_select_tiebreaker(rgp->rgppkt.knownstage2);
  1046. rgp->tiebreaker_selected = 1;
  1047. RGP_TRACE( "RGP S2 tiebr sel",
  1048. rgp->rgpcounter, /* TRACE */
  1049. EXT_NODE(rgp->tiebreaker), /* TRACE */
  1050. 0, 0 ); /* TRACE */
  1051. rgp->pruning_ticks = 0;
  1052. break;
  1053. } /* evaluate whether to go to stage 3 */
  1054. case RGP_PRUNING :
  1055. { /* evaluate whether to go to RGP_PHASE1_CLEANUP stage */
  1056. if (rgp->arbitration_started) {
  1057. if (regroup_test_arbitrate_advance()) {
  1058. enter_first_cleanup_stage();
  1059. break;
  1060. } else {
  1061. return; // Stay in this stage //
  1062. }
  1063. }
  1064. if (rgp->has_unreachable_nodes)
  1065. {
  1066. RGP_TRACE( "RGP Unreach Node",
  1067. GetCluster( rgp->rgppkt.pruning_result ), /* TRACE */
  1068. GetCluster( rgp->unreachable_nodes ), 0, 0 ); /* TRACE */
  1069. /* Must check if the unreachable nodes are in the
  1070. * selected final group. If so, we must restart
  1071. * regroup.
  1072. */
  1073. ClusterIntersection(temp_cluster, rgp->unreachable_nodes,
  1074. rgp->rgppkt.pruning_result);
  1075. /* Clear the unreachable node mask and flag after examining
  1076. * them. If we restart, we will start with a clean slate.
  1077. */
  1078. rgp->has_unreachable_nodes = 0;
  1079. ClusterInit(rgp->unreachable_nodes);
  1080. if (ClusterNumMembers(temp_cluster) != 0)
  1081. {
  1082. /* We have a node unreachable event to a node
  1083. * selected to survive. We must regenerate
  1084. * the connectivity matrix and re-run the node
  1085. * pruning algorithm. Start a new regroup incident.
  1086. * All restarts are in cautious mode.
  1087. */
  1088. rgp->cautiousmode = 1;
  1089. rgp->rgppkt.seqno = rgp->rgppkt.seqno + 1;
  1090. rgp->rgppkt.reason = RGP_EVT_NODE_UNREACHABLE;
  1091. rgp->rgppkt.activatingnode = (uint8) EXT_NODE(rgp->mynode);
  1092. /* For causingnode, pick the first unreachable node
  1093. * in temp_cluster.
  1094. */
  1095. for (node = 0; node < (node_t) rgp->num_nodes; node++)
  1096. {
  1097. if (ClusterMember(temp_cluster, node))
  1098. {
  1099. rgp->rgppkt.causingnode = (uint8) EXT_NODE(node);
  1100. break;
  1101. }
  1102. }
  1103. regroup_restart();
  1104. return;
  1105. }
  1106. }
  1107. if (!ClusterCompare(rgp->rgppkt.knownstage2,
  1108. rgp->rgppkt.knownstage3))
  1109. return;
  1110. /* All nodes in the connected cluster have been notified
  1111. * of the pruning decision (entered stage 3). If we are
  1112. * selected to survive, we can now enter stage 4. If we are
  1113. * not in the selected group (pruning_result), we must halt.
  1114. * Wait for at least one node in PRUNING_RESULT to get into
  1115. * stage 4 before halting. This ensures that the algorithm
  1116. * does not stall in stage 3 with all pruned out nodes
  1117. * halting before ANY of the survivors finds that all nodes
  1118. * entered stage 3.
  1119. */
  1120. if (!ClusterMember(rgp->rgppkt.pruning_result, rgp->mynode))
  1121. {
  1122. /* Wait for at least one node in PRUNING_RESULT
  1123. * to get into stage 4 before halting. Since only
  1124. * nodes in PRUNING_RESULT get into stage 4, it is
  1125. * sufficient to check if knownstage4 has any members.
  1126. */
  1127. if (ClusterNumMembers(rgp->rgppkt.knownstage4) != 0)
  1128. RGP_ERROR(RGP_PRUNED_OUT);
  1129. return;
  1130. }
  1131. // proceed to second stage of pruning - arbitration
  1132. if( regroup_start_arbitrate() ) {
  1133. return; // stay in this stage
  1134. } else {
  1135. break; // either proceed to the next, or restart
  1136. }
  1137. break;
  1138. } /* evaluate whether to go to RGP_PHASE1_CLEANUP stage */
  1139. case RGP_PHASE1_CLEANUP :
  1140. { /* evaluate whether to go to RGP_PHASE2_CLEANUP stage */
  1141. if (!ClusterCompare(rgp->rgppkt.pruning_result,
  1142. rgp->rgppkt.knownstage4))
  1143. return;
  1144. rgp->rgppkt.stage = RGP_PHASE2_CLEANUP;
  1145. RGP_TRACE( "RGP S->5 ", rgp->rgpcounter, 0, 0, 0 );
  1146. /* The packets we send now will not indicate we are in the phase 2
  1147. * cleanup stage yet. We indicate we are in this stage only after
  1148. * we have completed the clean up action associated with the stage.
  1149. * This is done in rgp_event_handler, under the
  1150. * RGP_EVT_PHASE2_CLEANUP_DONE event.
  1151. */
  1152. rgp->sendstage = 0;
  1153. rgp_start_phase2_cleanup();
  1154. break;
  1155. } /* evaluate whether to go to RGP_PHASE2_CLEANUP stage */
  1156. case RGP_PHASE2_CLEANUP :
  1157. { /* evaluate whether to go to RGP_STABILIZED stage */
  1158. if (!ClusterCompare(rgp->rgppkt.knownstage4,
  1159. rgp->rgppkt.knownstage5))
  1160. return;
  1161. RGP_LOCK;
  1162. //
  1163. // [HACKHACK] This is not necessary anymore, since we
  1164. // are holding the lock in message.c when delivering
  1165. // regroup packet received event
  1166. //
  1167. if (RGP_PHASE2_CLEANUP != rgp->rgppkt.stage) {
  1168. RGP_TRACE( "RGP S->6 (race) ", rgp->rgpcounter, rgp->rgppkt.stage, 0, 0 );
  1169. break;
  1170. }
  1171. rgp->rgppkt.stage = RGP_STABILIZED;
  1172. RGP_TRACE( "RGP S->6 ", rgp->rgpcounter, 0, 0, 0 );
  1173. rgp->rgpcounter = 0;
  1174. rgp->restartcount = 0;
  1175. /* Reset the regroup flags which have not yet been cleared. */
  1176. rgp->cautiousmode = 0;
  1177. /* Clear the mask indicating nodes which own the quorum resrc. */
  1178. ClusterInit(rgp->rgppkt.quorumowner);
  1179. /* Copy the sequence number into the rgpinfo area. */
  1180. rgp->rgpinfo.seqnum = rgp->rgppkt.seqno;
  1181. SetEvent( rgp->OS_specific_control.Stabilized );
  1182. if (rgp->OS_specific_control.ArbitratingNode != MM_INVALID_NODE) {
  1183. // Somebody was arbitrating //
  1184. rgp->OS_specific_control.ApproxArbitrationWinner =
  1185. rgp->OS_specific_control.ArbitratingNode;
  1186. if (rgp->OS_specific_control.ArbitratingNode == (DWORD)EXT_NODE(rgp->mynode)) {
  1187. //
  1188. // [HackHack] To close 422405
  1189. // when 421828 is fixed, please uncomment the following line
  1190. //
  1191. // QuorumOwner = rgp->OS_specific_control.ArbitratingNode;
  1192. } else {
  1193. if (QuorumOwner != MM_INVALID_NODE) {
  1194. ClRtlLogPrint(LOG_UNUSUAL,
  1195. "[MM] : clearing quorum owner var (winner is %1!u!), %.\n",
  1196. rgp->OS_specific_control.ArbitratingNode
  1197. );
  1198. }
  1199. QuorumOwner = MM_INVALID_NODE;
  1200. }
  1201. }
  1202. rgp_cleanup_complete();
  1203. #if defined(NT)
  1204. //
  1205. // On NT we deferred doing the node down callback until all the
  1206. // cleanup phases have been complete.
  1207. //
  1208. ClusterCopy(
  1209. rgp->OS_specific_control.CPUUPMASK,
  1210. rgp->rgpinfo.cluster
  1211. );
  1212. (*(rgp->nodedown_callback))(
  1213. rgp->OS_specific_control.NeedsNodeDownCallback
  1214. );
  1215. //
  1216. // Clear the down node mask
  1217. //
  1218. ClusterInit(rgp->OS_specific_control.NeedsNodeDownCallback);
  1219. //
  1220. // finally, tell clusnet that regroup has finished
  1221. //
  1222. ClusnetRegroupFinished(NmClusnetHandle,
  1223. rgp->OS_specific_control.EventEpoch,
  1224. rgp->rgppkt.seqno);
  1225. rgp->last_stable_seqno = rgp->rgppkt.seqno;
  1226. RGP_UNLOCK;
  1227. #endif
  1228. return;
  1229. } /* evaluate whether to go to RGP_STABILIZED stage */
  1230. case RGP_STABILIZED :
  1231. return; /* stabilized, so I am all done */
  1232. default :
  1233. RGP_ERROR(RGP_INTERNAL_ERROR); /* unknown stage */
  1234. } /* switch (rgp->rgppkt.stage) */
  1235. } /* loop until someone exits by returning */
  1236. }
  1237. /************************************************************************
  1238. * rgp_event_handler
  1239. * =================
  1240. *
  1241. * Description:
  1242. *
  1243. * The state machine and the heart of the regroup algorithm.
  1244. *
  1245. * Parameters:
  1246. *
  1247. * int event -
  1248. * which event happened
  1249. *
  1250. * node_t causingnode -
  1251. * node causing the event: node which sent a regroup status
  1252. * packet or whose IamAlives are missed; if the causing node is
  1253. * not relevant information, RGP_NULL_NODE can be passed and
  1254. * is ignored. *This node ID is in external format.*
  1255. *
  1256. * Returns:
  1257. *
  1258. * void - no return value
  1259. *
  1260. * Algorithm:
  1261. *
  1262. * The state machine is the heart of the regroup algorithm.
  1263. * It is organized as a switch statement with the regroup stage as
  1264. * the case label and the regroup event as the switch variable.
  1265. * Events could cause regroup to start a new incident, to advance
  1266. * through stages or to update information without advancing to
  1267. * another stage. This routine also arranges for regroup status
  1268. * packets to be sent to all relevant nodes including our own
  1269. * node.
  1270. *
  1271. ************************************************************************/
  1272. _priv _resident void
  1273. RGP_EVENT_HANDLER_EX(int event, node_t causingnode, void *arg)
  1274. {
  1275. rgp_pkt_t *rcvd_pkt_p;
  1276. cluster_t ignorescreen_rcvd;
  1277. uint8 oldstage;
  1278. int send_status_pkts = 0;
  1279. /* Note: arg is only used when event == RGP_EVENT_RECEIVED_PACKET. It is the ptr to the packet */
  1280. /* Trace unusual invocations of this routine. */
  1281. if (event != RGP_EVT_RECEIVED_PACKET && event != RGP_EVT_CLOCK_TICK)
  1282. RGP_TRACE( "RGP Event ", event, causingnode, rgp->rgppkt.stage, rgp->rgpcounter ); /* TRACE */
  1283. switch (event)
  1284. {
  1285. case RGP_EVT_NODE_UNREACHABLE :
  1286. { /* All paths to a node are unreachable */
  1287. /* Ignore the event if the unreachable node has been eliminated
  1288. * from our outerscreen. The message system probably doesn't
  1289. * know it yet.
  1290. */
  1291. if (ClusterMember(rgp->outerscreen, INT_NODE(causingnode)))
  1292. {
  1293. /* Store this event and check after node pruning (when
  1294. * entering the RGP_PRUNING stage). If a regroup incident
  1295. * is in progress and we haven't entered the RGP_PRUNING
  1296. * stage yet, this will happen in the current incident.
  1297. * If not, it will happen in the next regroup incident
  1298. * which will surely start soon due to this disconnect.
  1299. *
  1300. * We do not start a regroup incident for this event. We will
  1301. * wait for IamAlives to be missed for starting a new regroup
  1302. * incident. This is due to the requirement that, in case
  1303. * of a total disconnect resulting in multiple groups, we must
  1304. * stay in stage 1 till we can guarantee that the other group(s)
  1305. * has started regroup and paused IO. We assume that the
  1306. * regroup incident started at the IamAlive check tick and
  1307. * use the periodic nature of the IamAlive sends and
  1308. * IamAlive checks to limit the stage1 pause to the period
  1309. * of IamAlive sends (+ 1 tick to drain IO). If we started
  1310. * a regroup incident due to the node unreachable event, we
  1311. * have to stay in stage1 longer.
  1312. */
  1313. rgp->has_unreachable_nodes = 1;
  1314. ClusterInsert(rgp->unreachable_nodes, INT_NODE(causingnode));
  1315. break;
  1316. }
  1317. } /* All paths to a node are unreachable */
  1318. case RGP_EVT_PHASE1_CLEANUP_DONE :
  1319. {
  1320. /* The following checks are needed in case we restarted
  1321. * regroup and asked for phase1 cleanup multiple times.
  1322. * We must make sure that all such requests have been
  1323. * completed.
  1324. */
  1325. if ( (rgp->rgppkt.stage == RGP_PHASE1_CLEANUP) &&
  1326. (rgp->rgp_msgsys_p->phase1_cleanup == 0) )
  1327. { /* all caught up */
  1328. /* Let others and ourselves get packets indicating we are in
  1329. * this stage. When we get that packet, we will update our
  1330. * knownstage field. If our sending or receiving apparatus
  1331. * failed meanwhile and we don't get our own packet, it
  1332. * will cause regroup to be restarted.
  1333. */
  1334. rgp->sendstage = 1;
  1335. send_status_pkts = 1;
  1336. evaluatestageadvance();
  1337. } /* all caught up */
  1338. break;
  1339. }
  1340. case RGP_EVT_PHASE2_CLEANUP_DONE :
  1341. {
  1342. /* The following checks are needed in case we restarted
  1343. * regroup and asked for phase2 cleanup multiple times.
  1344. * We must make sure that all such requests have been
  1345. * completed.
  1346. */
  1347. if ( (rgp->rgppkt.stage == RGP_PHASE2_CLEANUP) &&
  1348. (rgp->rgp_msgsys_p->phase2_cleanup == 0) )
  1349. { /* all caught up */
  1350. /* Let others and ourselves get packets indicating we are
  1351. * in this stage.
  1352. */
  1353. rgp->sendstage = 1;
  1354. send_status_pkts = 1;
  1355. evaluatestageadvance();
  1356. } /* all caught up */
  1357. break;
  1358. }
  1359. case RGP_EVT_LATEPOLLPACKET :
  1360. { /* some node is late with IamAlives */
  1361. RGP_LOCK; // to ensure that the packet receive does not initiate
  1362. // regroup asynchronously.
  1363. /* Start a new regroup incident if not already active. */
  1364. if (rgp->rgppkt.stage == RGP_STABILIZED)
  1365. {
  1366. rgp->rgppkt.seqno = rgp->rgppkt.seqno + 1;
  1367. rgp->rgppkt.reason = RGP_EVT_LATEPOLLPACKET;
  1368. rgp->rgppkt.activatingnode = (uint8) EXT_NODE(rgp->mynode);
  1369. rgp->rgppkt.causingnode = (uint8) causingnode;
  1370. regroup_restart();
  1371. send_status_pkts = 1;
  1372. } else if (rgp->rgppkt.stage == RGP_COLDLOADED)
  1373. {
  1374. RGP_ERROR(RGP_RELOADFAILED);
  1375. }
  1376. RGP_UNLOCK;
  1377. break;
  1378. } /* some node is late with IamAlives */
  1379. case MM_EVT_LEAVE:
  1380. rgp->OS_specific_control.ShuttingDown = TRUE;
  1381. case RGP_EVT_BANISH_NODE :
  1382. { /* assumes that the lock is held */
  1383. rgp->rgppkt.seqno = rgp->rgppkt.seqno + 1;
  1384. rgp->rgppkt.activatingnode = (uint8) EXT_NODE(rgp->mynode);
  1385. // Pack Ignore Screen in the regroup_restart will
  1386. // fill reason and causingnode fields of the packet
  1387. ClusterInsert(rgp->ignorescreen, INT_NODE(causingnode) );
  1388. regroup_restart();
  1389. send_status_pkts = 1;
  1390. break;
  1391. }
  1392. #if 0
  1393. case MM_EVT_LEAVE: // this node needs to leave the cluster gracefully
  1394. {
  1395. // Initiate a Regroup Event amongst remaining members if any
  1396. // Start a new regroup incident if not already active.
  1397. if (rgp->rgppkt.stage == RGP_STABILIZED)
  1398. {
  1399. rgp->rgppkt.seqno = rgp->rgppkt.seqno + 1;
  1400. rgp->rgppkt.reason = MM_EVT_LEAVE;
  1401. rgp->rgppkt.activatingnode = (uint8) EXT_NODE(rgp->mynode);
  1402. rgp->rgppkt.causingnode = (uint8) EXT_NODE(rgp->mynode);
  1403. regroup_restart();
  1404. send_status_pkts = 1;
  1405. }
  1406. break;
  1407. }
  1408. #endif
  1409. case RGP_EVT_CLOCK_TICK :
  1410. { /* called on regroup clock tick when regroup is active */
  1411. if( (rgp->rgppkt.stage == RGP_PRUNING) &&
  1412. (rgp->arbitration_started)
  1413. )
  1414. {
  1415. rgp->arbitration_ticks++;
  1416. if (rgp->arbitration_ticks >= RGP_ARBITRATION_TIMEOUT) {
  1417. //
  1418. // Kill timed-out arbitrator
  1419. //
  1420. if(rgp->tiebreaker == rgp->mynode) {
  1421. //
  1422. // If this node was arbitrating, then die
  1423. //
  1424. if ( IsDebuggerPresent() ) {
  1425. DebugBreak();
  1426. }
  1427. RGP_ERROR(RGP_ARBITRATION_STALLED);
  1428. }
  1429. else {
  1430. //
  1431. // Kill the arbitrator and initiate another regroup
  1432. //
  1433. RGP_TRACE(
  1434. "RGP arbitration stalled ",
  1435. rgp->rgppkt.stage, 0, 0, 0
  1436. );
  1437. rgp_event_handler(
  1438. RGP_EVT_BANISH_NODE,
  1439. EXT_NODE(rgp->tiebreaker)
  1440. );
  1441. break;
  1442. }
  1443. }
  1444. evaluatestageadvance();
  1445. //
  1446. // No need to send packets while we are waiting for
  1447. // the arbitrator to win
  1448. //
  1449. // send_status_pkts = rgp->rgppkt.stage != RGP_PRUNING;
  1450. //
  1451. // [GN] Wrong. We do have to send status packets.
  1452. // If we have partial connectivity, we need to
  1453. // continue exchanging packets, so that the pruner,
  1454. // can learn indirectly that all nodes got the pruning results.
  1455. //
  1456. send_status_pkts = 1;
  1457. break;
  1458. }
  1459. else {
  1460. rgp->rgpcounter++; /* increment the counter */
  1461. }
  1462. if ( (rgp->rgppkt.stage == RGP_ACTIVATED) && (rgp->sendstage == 0) )
  1463. {
  1464. /* To detect the potential failure of my timer pop mechanism
  1465. * (such as by the corruption of the time list), I wait for
  1466. * at least one regroup clock tick before I let myself and
  1467. * others know I am in stage 1.
  1468. */
  1469. // [GorN Jan14/2000]
  1470. // We don't send our connectivity information,
  1471. // before we get the first clock tick.
  1472. // However we collect this information in
  1473. // rgp->internal_connectivity_matrix.
  1474. // Let's put it in the outgoing packet
  1475. // so that everybody will see what we think about them.
  1476. MatrixOr(rgp->rgppkt.connectivity_matrix,
  1477. rgp->internal_connectivity_matrix);
  1478. rgp->sendstage = 1; /* let everyone know we are in stage 1 */
  1479. }
  1480. else if ( (rgp->rgppkt.stage >= RGP_CLOSING) &&
  1481. (rgp->rgppkt.stage <= RGP_PHASE2_CLEANUP) )
  1482. { /* check for possible abort and restart */
  1483. if (rgp->rgpcounter >= RGP_MUST_RESTART)
  1484. {
  1485. /* Stalled out. Probably someone died after starting
  1486. * or another node is still in stage 1 cautious mode
  1487. */
  1488. if ( ++(rgp->restartcount) > RGP_RESTART_MAX ) {
  1489. // It is not a good idea to die, because somebody
  1490. // is stalling. Let's add stallees into ignore mask and restart
  1491. //
  1492. // RGP_ERROR(RGP_INTERNAL_ERROR); // [Fixed]
  1493. cluster_t tmp, *stage;
  1494. switch (rgp->rgppkt.stage) {
  1495. case RGP_CLOSING: stage = &rgp->rgppkt.knownstage2; break;
  1496. case RGP_PRUNING: stage = &rgp->rgppkt.knownstage3; break;
  1497. case RGP_PHASE1_CLEANUP: stage = &rgp->rgppkt.knownstage4; break;
  1498. case RGP_PHASE2_CLEANUP: stage = &rgp->rgppkt.knownstage5; break;
  1499. }
  1500. ClusterDifference(tmp, rgp->rgpinfo.cluster, *stage);
  1501. //
  1502. // If we stalled during closing, due to tiebraker running
  1503. // the pruning algorithn going bunkers, we can have tmp = 0
  1504. // In this case, we need to ignore somebody to guarantee that
  1505. // the algorithm completes.
  1506. //
  1507. if ( ClusterEmpty(tmp) && rgp->tiebreaker_selected) {
  1508. ClusterInsert(tmp, rgp->tiebreaker);
  1509. }
  1510. ClusterUnion(rgp->ignorescreen, rgp->ignorescreen, tmp);
  1511. }
  1512. /* If we are stalling in stage 3 and we have been pruned out,
  1513. * it is possible that we are stalling because we have been
  1514. * isolated from all other nodes. We must halt in this case.
  1515. */
  1516. if ( (rgp->rgppkt.stage == RGP_PRUNING) &&
  1517. !ClusterMember(rgp->rgppkt.pruning_result, rgp->mynode) )
  1518. RGP_ERROR(RGP_PRUNED_OUT);
  1519. rgp->cautiousmode = 1;
  1520. rgp->rgppkt.seqno = rgp->rgppkt.seqno + 1;
  1521. RGP_TRACE( "RGP stalled ", rgp->rgppkt.stage, 0, 0, 0 );
  1522. regroup_restart();
  1523. } /* Stalled out ... */
  1524. } /* check for possible abort and restart */
  1525. if ((rgp->rgppkt.stage == RGP_CLOSING) && rgp->tiebreaker_selected)
  1526. rgp->pruning_ticks++;
  1527. evaluatestageadvance();
  1528. send_status_pkts = 1; /* send rgp packets regardless of progress */
  1529. break;
  1530. } /* called on regroup clock tick when regroup is active */
  1531. case RGP_EVT_RECEIVED_PACKET :
  1532. { /* received an rgp packet */
  1533. /* If the sending node is excluded by the outer screen, then it is
  1534. * not even part of the current (most recently known) configuration.
  1535. * Therefore the packet should not be honored, and a poison message
  1536. * should be sent to try to kill this renegade processor.
  1537. * That is done in the calling routine that processes all incoming
  1538. * regroup module packets (IamAlive, regroup and poison packets).
  1539. */
  1540. /* If the sending node was accepted by the outer screen but then
  1541. * excluded by the inner screen, then the packet will be disregarded
  1542. * but no poison message sent. This phenomenon may occur when this
  1543. * node has entered stage 2 without having heard from (recognized)
  1544. * the sending node and then a message arrives late from that
  1545. * sending node. In this case the fate of the sending node, i.e.
  1546. * whether it gets ruled out of the global configuration or not is
  1547. * unknown at this point. If the sender can get itself recognized
  1548. * by some node before that node enters stage 2, then it will be
  1549. * saved. Otherwise it will be declared down and subsequently shot
  1550. * with poison packets if it ever tries to assert itself.
  1551. */
  1552. /* Remember the arg to this routine is the packet pointer */
  1553. rcvd_pkt_p = (rgp_pkt_t *)arg; /* address of pkt just received */
  1554. if ( rgp->rgppkt.seqno != rcvd_pkt_p->seqno)
  1555. RGP_TRACE( "RGP Event ", event, causingnode, rgp->rgppkt.stage, rgp->rgpcounter ); /* TRACE */
  1556. UnpackIgnoreScreen(rcvd_pkt_p, ignorescreen_rcvd);
  1557. if ( !ClusterEmpty(ignorescreen_rcvd) ) {
  1558. RGP_TRACE( "RGP Incoming pkt", GetCluster(ignorescreen_rcvd),
  1559. rcvd_pkt_p->seqno, rgp->rgppkt.stage, causingnode);
  1560. }
  1561. if ( !ClusterMember(rgp->innerscreen, INT_NODE(causingnode))) {
  1562. RGP_TRACE( "RGP Ignoring !inner", causingnode, rgp->rgppkt.stage,
  1563. GetCluster(rgp->innerscreen), GetCluster(ignorescreen_rcvd) );
  1564. return;
  1565. }
  1566. RGP_LOCK; // To ensure that the timer thread does not initiate
  1567. // regroup asynchronously at this time.
  1568. //////////////////////////// New Ignore Screen Stuff /////////////////////////////////
  1569. if (ClusterMember(rgp->ignorescreen, INT_NODE(causingnode) )) {
  1570. RGP_UNLOCK;
  1571. RGP_TRACE( "RGP Ignoring", causingnode, rgp->rgppkt.stage,
  1572. GetCluster(rgp->ignorescreen), GetCluster(ignorescreen_rcvd) );
  1573. return;
  1574. }
  1575. if (rcvd_pkt_p->seqno < rgp->last_stable_seqno ) {
  1576. RGP_UNLOCK;
  1577. RGP_TRACE( "RGP old packet", causingnode, rcvd_pkt_p->seqno, rgp->last_stable_seqno, 0);
  1578. // This is a late packet from the previous regroup incident
  1579. // from the node that is currently in my outerscreen.
  1580. // This node could not have sent it now, this is probably a packet
  1581. // that stuck somewhere and was delieverd eons later.
  1582. // Simply ignore it.
  1583. return;
  1584. }
  1585. if ( ClusterMember(ignorescreen_rcvd, rgp->mynode ) ) {
  1586. //
  1587. // Sender ignores me. We will do the same to him.
  1588. //
  1589. ClusterInsert(rgp->ignorescreen, INT_NODE(causingnode) );
  1590. rgp->rgppkt.seqno = rgp->rgppkt.seqno + 1;
  1591. regroup_restart();
  1592. send_status_pkts = 1;
  1593. RGP_UNLOCK;
  1594. break;
  1595. }
  1596. if ( ClusterCompare(ignorescreen_rcvd, rgp->ignorescreen) ) {
  1597. // We have the same ignore screen.
  1598. // No work needs to be done
  1599. } else if ( ClusterSubsetOf(rgp->ignorescreen, ignorescreen_rcvd) ) {
  1600. // Incoming packet has smaller ignore screen
  1601. // Ignore this packet, but reply to its sender with
  1602. // our current regroup packet to force to upgrade to
  1603. // our view of the world.
  1604. // do so only if we are properly initialized
  1605. if (rgp->rgppkt.stage == RGP_COLDLOADED && !rgp->OS_specific_control.ShuttingDown) {
  1606. RGP_ERROR(RGP_RELOADFAILED);
  1607. }
  1608. RGP_TRACE( "RGP smaller ignore mask ",
  1609. GetCluster(ignorescreen_rcvd), GetCluster(rgp->ignorescreen), /* TRACE */
  1610. rgp->rgppkt.stage, rcvd_pkt_p->stage ); /* TRACE */
  1611. ClusterInsert(rgp->status_targets, INT_NODE(causingnode));
  1612. rgp_broadcast(RGP_UNACK_REGROUP);
  1613. RGP_UNLOCK;
  1614. return;
  1615. } else if ( ClusterSubsetOf(ignorescreen_rcvd, rgp->ignorescreen) ) {
  1616. RGP_TRACE( "RGP bigger ignore mask ",
  1617. GetCluster(ignorescreen_rcvd), GetCluster(rgp->ignorescreen), /* TRACE */
  1618. rgp->rgppkt.stage, causingnode ); /* TRACE */
  1619. // Incoming packet has bigger ignore screen.
  1620. // Upgrade to this information and process the packet
  1621. rgp->rgppkt.seqno = rcvd_pkt_p->seqno;
  1622. /* Somebody else activated regroup. So, let's just copy */
  1623. /* the sender's reason code and reason nodes. */
  1624. //
  1625. // Ignore mask parts are in the reason and activatingnode fields
  1626. //
  1627. ClusterCopy(rgp->ignorescreen, ignorescreen_rcvd); // fix bug #328216
  1628. rgp->rgppkt.reason = rcvd_pkt_p->reason;
  1629. rgp->rgppkt.activatingnode = rcvd_pkt_p->activatingnode;
  1630. rgp->rgppkt.causingnode = rcvd_pkt_p->causingnode;
  1631. regroup_restart();
  1632. send_status_pkts = 1;
  1633. } else {
  1634. RGP_TRACE( "RGP different ignore masks ",
  1635. GetCluster(ignorescreen_rcvd), GetCluster(rgp->ignorescreen), /* TRACE */
  1636. rgp->rgppkt.stage, causingnode ); /* TRACE */
  1637. // Ignore masks are different and neither of them is
  1638. // a subset of another.
  1639. //
  1640. // We need to merge information out of these masks
  1641. // and restart the regroup.
  1642. //
  1643. // Packet that we just received will be ignored
  1644. ClusterUnion(rgp->ignorescreen, rgp->ignorescreen, ignorescreen_rcvd);
  1645. rgp->rgppkt.seqno = max(rgp->rgppkt.seqno, rcvd_pkt_p->seqno) + 1;
  1646. regroup_restart();
  1647. send_status_pkts = 1;
  1648. RGP_UNLOCK;
  1649. break;
  1650. }
  1651. //////////////////////////// End of new Ignore Screen Stuff /////////////////////////////////
  1652. // Now ignorescreens of this node packet and incoming packet are the same //
  1653. // proceed with regular regroup processing //
  1654. /* Since the packet is acceptable, the regroup sequence number
  1655. * must be compared to that of this node. If the incoming message
  1656. * has a higher sequence number, then a new pass of the regroup
  1657. * algorithm has started. This node must accept the new sequence
  1658. * number, reinitialize its data, and start partcicipating in
  1659. * the new pass. Also, the incoming message must be processed
  1660. * since, once the algorithm reinitializes, the sequence numbers
  1661. * now match.
  1662. *
  1663. * If the incoming packet has a matching sequence number, then it
  1664. * should be accepted. The knowledge of the global state of the
  1665. * algorithm it reflects must be merged with that already present
  1666. * in this node. Then this node must evaluate whether further
  1667. * state transitions are possible.
  1668. *
  1669. * Finally, if the incoming packet has a lower sequence number, then
  1670. * it comes from a node unaware of the current level of the global
  1671. * algorithm. The data in it should be ignored, but a packet should
  1672. * be sent to it so that it will reinitialize its algorithm.
  1673. *
  1674. * The sequence number is a 32 bit algebraic value - hopefully it
  1675. * will never wrap around.
  1676. */
  1677. if (rcvd_pkt_p->seqno < rgp->rgppkt.seqno)
  1678. { /* sender below current level - ignore but let him know it*/
  1679. RGP_TRACE( "RGP lower seqno ",
  1680. rgp->rgppkt.seqno, rcvd_pkt_p->seqno, /* TRACE */
  1681. rgp->rgppkt.stage, rcvd_pkt_p->stage ); /* TRACE */
  1682. ClusterInsert(rgp->status_targets, INT_NODE(causingnode));
  1683. rgp_broadcast(RGP_UNACK_REGROUP);
  1684. RGP_UNLOCK;
  1685. return;
  1686. }
  1687. if (rcvd_pkt_p->seqno > rgp->rgppkt.seqno)
  1688. { /* sender above current level - I must upgrade to it*/
  1689. // The node that forces a restart responsible for keeping
  1690. // track of restarts and making a decision who will die/be ignored
  1691. // if ( ++(rgp->restartcount) > RGP_RESTART_MAX )
  1692. // RGP_ERROR(RGP_INTERNAL_ERROR);
  1693. if ( (rgp->rgppkt.stage != RGP_STABILIZED) ||
  1694. ((rcvd_pkt_p->seqno - rgp->rgppkt.seqno) > 1) )
  1695. {
  1696. RGP_TRACE( "RGP higher seqno",
  1697. rgp->rgppkt.seqno, rcvd_pkt_p->seqno, /* TRACE */
  1698. rgp->rgppkt.stage, rcvd_pkt_p->stage );/* TRACE */
  1699. rgp->cautiousmode = 1;
  1700. }
  1701. rgp->rgppkt.seqno = rcvd_pkt_p->seqno;
  1702. /* Somebody else activated regroup. So, let's just copy */
  1703. /* the sender's reason code and reason nodes. */
  1704. rgp->rgppkt.reason = rcvd_pkt_p->reason;
  1705. rgp->rgppkt.activatingnode = rcvd_pkt_p->activatingnode;
  1706. rgp->rgppkt.causingnode = rcvd_pkt_p->causingnode;
  1707. regroup_restart();
  1708. send_status_pkts = 1;
  1709. } /* sender above current level - I must upgrade to it*/
  1710. /* Now we are at the same level - even if we weren't at first.
  1711. *
  1712. * If the sender has already commited to a view of the world
  1713. * that excludes me, I must halt in order to keep the system in
  1714. * a consistent state.
  1715. *
  1716. * This is true even with the split brain avoidance algorithm.
  1717. * The fact that stage1 = stage2 in the packet implies that the
  1718. * sender has already run the split brain avoidance algorithm
  1719. * and decided that he should survive.
  1720. */
  1721. if ( (rcvd_pkt_p->stage > RGP_ACTIVATED) &&
  1722. ClusterCompare(rcvd_pkt_p->knownstage1,
  1723. rcvd_pkt_p->knownstage2) &&
  1724. !ClusterMember(rcvd_pkt_p->knownstage1, rgp->mynode) )
  1725. {
  1726. ClusterInsert(rgp->ignorescreen, INT_NODE(causingnode) );
  1727. rgp->rgppkt.seqno ++;
  1728. regroup_restart();
  1729. send_status_pkts = 1;
  1730. RGP_UNLOCK;
  1731. // /* I must die for overall consistency. */
  1732. // RGP_ERROR((uint16) (RGP_PARIAH + causingnode)); // [Fixed]
  1733. break;
  1734. }
  1735. RGP_UNLOCK;
  1736. /* If I have terminated the active part of the algorithm, I
  1737. * am in stage 6 and am not routinely broadcasting my status
  1738. * anymore. If I get a packet from someone else who has not
  1739. * yet terminated, then I must send him the word. But if he
  1740. * has terminated, I must not send any packet or else there
  1741. * will be an infinite loop of packets bouncing back and forth.
  1742. */
  1743. if (rgp->rgppkt.stage == RGP_STABILIZED)
  1744. { /* I have terminated so can't learn anything more. */
  1745. if (!ClusterCompare(rcvd_pkt_p->knownstage5,
  1746. rgp->rgppkt.knownstage5))
  1747. { /* but sender has not so I must notify him */
  1748. ClusterInsert(rgp->status_targets, INT_NODE(causingnode));
  1749. rgp_broadcast(RGP_UNACK_REGROUP);
  1750. }
  1751. return;
  1752. }
  1753. /* At this point, the packet is from a legal node within the
  1754. * current round of the algorithm and I have not terminated
  1755. * at stage RGP_STABILIZED so I need to absorb whatever new
  1756. * info is in this packet.
  1757. *
  1758. * The way to merge what this packet says with what I already
  1759. * know is to just logically OR the known stage x fields
  1760. * together.
  1761. */
  1762. {
  1763. int seqno = rcvd_pkt_p->seqno&0xffff;
  1764. int stage = rcvd_pkt_p->stage&0xffff;
  1765. int trgs = *(int*)rgp->status_targets & 0xffff;
  1766. int node = INT_NODE(causingnode)&0xffff;
  1767. RGP_TRACE( "RGP recv pkt ",
  1768. ((seqno << 16) | stage),
  1769. RGP_MERGE_TO_32(
  1770. rcvd_pkt_p->knownstage1,
  1771. rcvd_pkt_p->knownstage2
  1772. ),
  1773. RGP_MERGE_TO_32(
  1774. rcvd_pkt_p->knownstage3,
  1775. rcvd_pkt_p->knownstage4
  1776. ),
  1777. (trgs << 16) | node
  1778. );
  1779. }
  1780. rgp_sanity_check(rcvd_pkt_p, "RGP Received packet");
  1781. rgp_sanity_check(&(rgp->rgppkt), "RGP Internal packet");
  1782. ClusterUnion(rgp->rgppkt.quorumowner, rcvd_pkt_p->quorumowner,
  1783. rgp->rgppkt.quorumowner);
  1784. ClusterUnion(rgp->rgppkt.knownstage1, rcvd_pkt_p->knownstage1,
  1785. rgp->rgppkt.knownstage1);
  1786. ClusterUnion(rgp->rgppkt.knownstage2, rcvd_pkt_p->knownstage2,
  1787. rgp->rgppkt.knownstage2);
  1788. ClusterUnion(rgp->rgppkt.knownstage3, rcvd_pkt_p->knownstage3,
  1789. rgp->rgppkt.knownstage3);
  1790. ClusterUnion(rgp->rgppkt.knownstage4, rcvd_pkt_p->knownstage4,
  1791. rgp->rgppkt.knownstage4);
  1792. ClusterUnion(rgp->rgppkt.knownstage5, rcvd_pkt_p->knownstage5,
  1793. rgp->rgppkt.knownstage5);
  1794. ClusterUnion(rgp->rgppkt.pruning_result, rcvd_pkt_p->pruning_result,
  1795. rgp->rgppkt.pruning_result);
  1796. /* But when I am in stage 2, it is possible that I can learn to
  1797. * recognize some node I have not previously recognized by hearing
  1798. * of it indirectly from some other node that I have recognized.
  1799. * To handle this case, I always merge knownstage1 info into
  1800. * the inner screen so that subsequent messages from the newly
  1801. * recognized node will be accepted and processed.
  1802. */
  1803. if ((rgp->rgppkt.stage == RGP_CLOSING) &&
  1804. !(rgp->tiebreaker_selected))
  1805. ClusterUnion(rgp->innerscreen, rgp->rgppkt.knownstage1,
  1806. rgp->innerscreen);
  1807. /* In the first two stages of regroup, the inter-node connectivity
  1808. * information is collected and propagated. When we get a regroup
  1809. * packet, we turn ON the bit corresponding to the [our-node,
  1810. * sender-node] entry in the connectivity matrix. We also OR in
  1811. * the matrix sent by the sender node in the regroup packet.
  1812. *
  1813. * The matrix is not updated if we are in stage 1 and haven't
  1814. * received the first clock tick. This is to prevent the
  1815. * node pruning algorithm from considering us alive if our
  1816. * timer mechanism is disrupted, but the IPC mechanism is OK.
  1817. */
  1818. /* [GorN 01/07/2000] If we are not collection connectivity information,
  1819. * until we receive a first tick we can ran into problems if the node is
  1820. * killed right after it send out its first timer driven packet
  1821. * (which doesn't have any connectivity info yet). This can cause a
  1822. * confusion. See bug 451792.
  1823. *
  1824. * What we will do is we will collect connectivity information on
  1825. * the side even when rgp->sendstage is FALSE and move it into the regroup
  1826. * packet if we ever get a clock tick
  1827. */
  1828. if (rgp->rgppkt.stage < RGP_PRUNING && !rgp->sendstage)
  1829. {
  1830. MatrixSet(rgp->internal_connectivity_matrix,
  1831. rgp->mynode, INT_NODE(causingnode));
  1832. if (causingnode != EXT_NODE(rgp->mynode))
  1833. MatrixOr(rgp->internal_connectivity_matrix,
  1834. rcvd_pkt_p->connectivity_matrix);
  1835. }
  1836. if ((rgp->rgppkt.stage < RGP_PRUNING) && rgp->sendstage)
  1837. {
  1838. MatrixSet(rgp->rgppkt.connectivity_matrix,
  1839. rgp->mynode, INT_NODE(causingnode));
  1840. if (causingnode != EXT_NODE(rgp->mynode))
  1841. MatrixOr(rgp->rgppkt.connectivity_matrix,
  1842. rcvd_pkt_p->connectivity_matrix);
  1843. }
  1844. /* Now, I can evaluate whether additional state transitions are
  1845. * possible as a result of the info just received.
  1846. */
  1847. oldstage = rgp->rgppkt.stage;
  1848. // QuorumCheck now runs in a separate thread
  1849. // if (oldstage != RGP_CLOSING) // Cannot run Quorumcheck from here.
  1850. evaluatestageadvance();
  1851. /* To speed things up, let us broadcast our status if our
  1852. * stage has changed and we are willing to let others and
  1853. * ourselves see it.
  1854. */
  1855. if ( (oldstage != rgp->rgppkt.stage) && rgp->sendstage )
  1856. send_status_pkts = 1; /* broadcast at once to speed things up */
  1857. break;
  1858. } /* received an rgp packet */
  1859. //
  1860. // We do not support power failure notifications in NT
  1861. //
  1862. #if defined(NT)
  1863. CL_ASSERT(event != RGP_EVT_POWERFAIL);
  1864. //
  1865. // Fall thru to default case
  1866. //
  1867. #else // NT
  1868. case RGP_EVT_POWERFAIL :
  1869. { /* Our node got a power up interrupt or an indication of power
  1870. * failure from another node. */
  1871. /* Note that this code will unconditionally abort and restart
  1872. * the algorithm even if it was active before the power failure.
  1873. * The new incident must be in cautious mode.
  1874. */
  1875. rgp->cautiousmode = 1;
  1876. rgp->rgppkt.seqno = rgp->rgppkt.seqno + 1;
  1877. rgp->rgppkt.reason = RGP_EVT_POWERFAIL;
  1878. rgp->rgppkt.activatingnode = (uint8) EXT_NODE(rgp->mynode);
  1879. rgp->rgppkt.causingnode = (uint8) causingnode;
  1880. /* rgp->pfail_state is set to a non-zero value when a pfail event
  1881. * is reported to regroup. It is decremented at every regroup clock
  1882. * tick till it reaches zero. While this number is non-zero, missing
  1883. * self IamAlives are ignored and do not cause the node to halt.
  1884. * This gives the sending hardware some time to recover from power
  1885. * failures before self IamAlives are checked.
  1886. */
  1887. if (causingnode == EXT_NODE(rgp->mynode))
  1888. rgp->pfail_state = RGP_PFAIL_TICKS;
  1889. /* Store the fact that causingnode experienced a PFAIL,
  1890. * for reporting to the message system when regroup stabilizes.
  1891. */
  1892. ClusterInsert(rgp->rgppkt.hadpowerfail, INT_NODE(causingnode));
  1893. regroup_restart();
  1894. send_status_pkts = 1;
  1895. break;
  1896. } /* power failure */
  1897. #endif // NT
  1898. default :
  1899. {
  1900. RGP_ERROR(RGP_INTERNAL_ERROR);
  1901. }
  1902. }
  1903. if (send_status_pkts) /* significant change - send status at once */
  1904. {
  1905. ClusterUnion(rgp->status_targets,
  1906. rgp->outerscreen, rgp->status_targets);
  1907. rgp_broadcast(RGP_UNACK_REGROUP);
  1908. }
  1909. }
  1910. /************************************************************************
  1911. * rgp_check_packet
  1912. * =================
  1913. *
  1914. * Description:
  1915. *
  1916. * verifies that RGP packet has reasonable values in
  1917. * powerfail, knownstages, pruning_result, and connectivity_matrix fields
  1918. *
  1919. * Parameters:
  1920. *
  1921. * rgp_pkt_t* pkt -
  1922. * packet to be checked
  1923. *
  1924. * Returns:
  1925. *
  1926. * 0 - packet looks good
  1927. * 1,2,3... - strange looking packet
  1928. *
  1929. ************************************************************************/
  1930. int rgp_check_packet(rgp_pkt_t* pkt) {
  1931. node_t i;
  1932. //
  1933. // Verify that
  1934. // knownstage5 \subset knownstage4 \subset knownstage3 \subset
  1935. // knownstage2 \subset knownstage1 \subset rgp->rgpinfo.cluster
  1936. //
  1937. // int ClusterSubsetOf(cluster_t big, cluster_t small)
  1938. // Returns 1 if set small = set big or small is a subset of big.
  1939. //
  1940. if( !ClusterSubsetOf(pkt->knownstage4, pkt->knownstage5) ) {
  1941. return 5;
  1942. }
  1943. if( !ClusterSubsetOf(pkt->knownstage3, pkt->knownstage4) ) {
  1944. return 4;
  1945. }
  1946. if( !ClusterSubsetOf(pkt->knownstage2, pkt->knownstage3) ) {
  1947. return 3;
  1948. }
  1949. if( !ClusterSubsetOf(pkt->knownstage1, pkt->knownstage2) ) {
  1950. return 2;
  1951. }
  1952. if( !ClusterSubsetOf(rgp->rgpinfo.cluster, pkt->knownstage1) ) {
  1953. return 1;
  1954. }
  1955. //
  1956. // pruning_result has to be a subset of knownstage2
  1957. //
  1958. if( !ClusterSubsetOf(pkt->knownstage2, pkt->pruning_result) ) {
  1959. return 9;
  1960. }
  1961. //
  1962. // quorumowner has to be a subset of original cluster
  1963. //
  1964. if(!ClusterSubsetOf(rgp->rgpinfo.cluster, pkt->quorumowner)) {
  1965. return 8;
  1966. }
  1967. //
  1968. // Check connectivity matrix
  1969. //
  1970. for(i = 0; i < MAX_CLUSTER_SIZE; ++i) {
  1971. if( ClusterMember( rgp->rgpinfo.cluster, i ) ) {
  1972. //
  1973. // Node i is a member of a cluster
  1974. // Its connectivity bitmap has to be a subset of rgp->rgpinfo.cluster
  1975. //
  1976. if(!ClusterSubsetOf(rgp->rgpinfo.cluster, pkt->connectivity_matrix[i])) {
  1977. return 10;
  1978. }
  1979. } else {
  1980. //
  1981. // Node i is not a member of a cluster
  1982. // Its connectivity bitmap has to be 0
  1983. //
  1984. if(!ClusterEmpty(pkt->connectivity_matrix[i]))
  1985. return 11;
  1986. }
  1987. }
  1988. return 0;
  1989. }
  1990. /************************************************************************
  1991. * rgp_print_packet
  1992. * =================
  1993. *
  1994. * Description:
  1995. *
  1996. * Prints RGP packet fields
  1997. *
  1998. * Parameters:
  1999. *
  2000. * rgp_pkt_t* pkt -
  2001. * packet to be printed
  2002. * char* label -
  2003. * label to be printed together with a packet
  2004. * int code -
  2005. * a number to be printed together with a packet
  2006. *
  2007. * Returns:
  2008. *
  2009. * VOID
  2010. *
  2011. ************************************************************************/
  2012. void rgp_print_packet(rgp_pkt_t* pkt, char* label, int code)
  2013. {
  2014. uint8 pktsubtype;
  2015. uint8 stage;
  2016. uint16 reason;
  2017. uint32 seqno;
  2018. uint8 activatingnode;
  2019. uint8 causingnode;
  2020. cluster_t quorumowner;
  2021. RGP_TRACE( label,
  2022. pkt->seqno, /* TRACE */
  2023. code,
  2024. (pkt->stage << 16) |
  2025. (pkt->activatingnode << 8) |
  2026. (pkt->causingnode), /* TRACE */
  2027. RGP_MERGE_TO_32( rgp->outerscreen,
  2028. rgp->innerscreen )
  2029. );
  2030. RGP_TRACE( "RGP CHK masks ",
  2031. RGP_MERGE_TO_32( rgp->rgpinfo.cluster, /* TRACE */
  2032. pkt->quorumowner ), /* TRACE */
  2033. RGP_MERGE_TO_32( pkt->knownstage1, /* TRACE */
  2034. pkt->knownstage2 ), /* TRACE */
  2035. RGP_MERGE_TO_32( pkt->knownstage3, /* TRACE */
  2036. pkt->knownstage4 ), /* TRACE */
  2037. RGP_MERGE_TO_32( pkt->knownstage5, /* TRACE */
  2038. pkt->pruning_result ) ); /* TRACE */
  2039. RGP_TRACE( "RGP CHK Con. matrix1",
  2040. RGP_MERGE_TO_32( pkt->connectivity_matrix[0], /*TRACE*/
  2041. pkt->connectivity_matrix[1] ), /*TRACE*/
  2042. RGP_MERGE_TO_32( pkt->connectivity_matrix[2], /*TRACE*/
  2043. pkt->connectivity_matrix[3] ), /*TRACE*/
  2044. RGP_MERGE_TO_32( pkt->connectivity_matrix[4], /*TRACE*/
  2045. pkt->connectivity_matrix[5] ), /*TRACE*/
  2046. RGP_MERGE_TO_32( pkt->connectivity_matrix[6], /*TRACE*/
  2047. pkt->connectivity_matrix[7])); /*TRACE*/
  2048. RGP_TRACE( "RGP CHK Con. matrix2",
  2049. RGP_MERGE_TO_32( pkt->connectivity_matrix[8], /*TRACE*/
  2050. pkt->connectivity_matrix[9] ), /*TRACE*/
  2051. RGP_MERGE_TO_32( pkt->connectivity_matrix[10], /*TRACE*/
  2052. pkt->connectivity_matrix[11]), /*TRACE*/
  2053. RGP_MERGE_TO_32( pkt->connectivity_matrix[12], /*TRACE*/
  2054. pkt->connectivity_matrix[13]), /*TRACE*/
  2055. RGP_MERGE_TO_32( pkt->connectivity_matrix[14], /*TRACE*/
  2056. pkt->connectivity_matrix[15]));/*TRACE*/
  2057. }
  2058. /************************************************************************
  2059. * UnpackIgnoreScreen
  2060. * =================
  2061. *
  2062. * Description:
  2063. *
  2064. * Extracts ignorescreen out of regroup packet
  2065. *
  2066. * Parameters:
  2067. *
  2068. * rgp_pkt_t* from -
  2069. * source packet
  2070. * cluster_t to -
  2071. * target node set
  2072. *
  2073. * Returns:
  2074. *
  2075. * VOID
  2076. *
  2077. * Comments:
  2078. *
  2079. * If the packet is received from NT4 node, unpacked ignorescreen
  2080. * will ne always 0.
  2081. *
  2082. ************************************************************************/
  2083. void UnpackIgnoreScreen(rgp_pkt_t* from, cluster_t to)
  2084. {
  2085. #pragma warning( push )
  2086. #pragma warning( disable : 4244 )
  2087. if (from->reason < RGP_EVT_IGNORE_MASK) {
  2088. ClusterInit(to);
  2089. } else {
  2090. to[0] = ((uint16)from->reason) >> 8;
  2091. to[1] = (uint8)from->causingnode;
  2092. }
  2093. #pragma warning( pop )
  2094. }
  2095. /************************************************************************
  2096. * rgp_print_packet
  2097. * =================
  2098. *
  2099. * Description:
  2100. *
  2101. * Put an ignorescreen back into a regroup packet
  2102. *
  2103. * Parameters:
  2104. *
  2105. * rgp_pkt_t* to -
  2106. * packet to be updated
  2107. * cluster_t from -
  2108. * source node set
  2109. *
  2110. * Returns:
  2111. *
  2112. * VOID
  2113. *
  2114. ************************************************************************/
  2115. void PackIgnoreScreen(rgp_pkt_t* to, cluster_t from)
  2116. {
  2117. if ( ClusterEmpty(from) ) {
  2118. to->reason &= 255;
  2119. to->causingnode = 0;
  2120. } else {
  2121. to->reason = (uint8)RGP_EVT_IGNORE_MASK | (from[0] << 8);
  2122. to->causingnode = from[1];
  2123. }
  2124. }
  2125. /*---------------------------------------------------------------------------*/
  2126. #ifdef __cplusplus
  2127. }
  2128. #endif /* __cplusplus */
  2129. #if 0
  2130. History of changes to this file:
  2131. -------------------------------------------------------------------------
  2132. 1995, December 13 F40:KSK0610 /*F40:KSK06102.2*/
  2133. This file is part of the portable Regroup Module used in the NonStop
  2134. Kernel (NSK) and Loosely Coupled UNIX (LCU) operating systems. There
  2135. are 10 files in the module - jrgp.h, jrgpos.h, wrgp.h, wrgpos.h,
  2136. srgpif.c, srgpos.c, srgpsm.c, srgputl.c, srgpcli.c and srgpsvr.c.
  2137. The last two are simulation files to test the Regroup Module on a
  2138. UNIX workstation in user mode with processes simulating processor nodes
  2139. and UDP datagrams used to send unacknowledged datagrams.
  2140. This file was first submitted for release into NSK on 12/13/95.
  2141. ------------------------------------------------------------------------------
  2142. This change occurred on 19 Jan 1996 /*F40:MB06458.1*/
  2143. Changes for phase IV Sierra message system release. Includes: /*F40:MB06458.2*/
  2144. - Some cleanup of the code /*F40:MB06458.3*/
  2145. - Increment KCCB counters to count the number of setup messages and /*F40:MB06458.4*/
  2146. unsequenced messages sent. /*F40:MB06458.5*/
  2147. - Fixed some bugs /*F40:MB06458.6*/
  2148. - Disable interrupts before allocating broadcast sibs. /*F40:MB06458.7*/
  2149. - Change per-packet-timeout to 5ms /*F40:MB06458.8*/
  2150. - Make the regroup and powerfail broadcast use highest priority /*F40:MB06458.9*/
  2151. tnet services queue. /*F40:MB06458.10*/
  2152. - Call the millicode backdoor to get the processor status from SP /*F40:MB06458.11*/
  2153. - Fixed expand bug in msg_listen_ and msg_readctrl_ /*F40:MB06458.12*/
  2154. - Added enhancement to msngr_sendmsg_ so that clients do not need /*F40:MB06458.13*/
  2155. to be unstoppable before calling this routine. /*F40:MB06458.14*/
  2156. - Added new steps in the build file called /*F40:MB06458.15*/
  2157. MSGSYS_C - compiles all the message system C files /*F40:MB06458.16*/
  2158. MSDRIVER - compiles all the MSDriver files /*F40:MB06458.17*/
  2159. REGROUP - compiles all the regroup files /*F40:MB06458.18*/
  2160. - remove #pragma env libspace because we set it as a command line /*F40:MB06458.19*/
  2161. parameter. /*F40:MB06458.20*/
  2162. ----------------------------------------------------------------------- /*F40:MB06458.21*/
  2163. #endif /* 0 - change descriptions */