Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1621 lines
54 KiB

  1. #ifdef __TANDEM
  2. #pragma columns 79
  3. #pragma page "srgpos.c - T9050 - OS-dependent routines for Regroup Module"
  4. #endif
  5. /* @@@ START COPYRIGHT @@@
  6. ** Tandem Confidential: Need to Know only
  7. ** Copyright (c) 1995, Tandem Computers Incorporated
  8. ** Protected as an unpublished work.
  9. ** All Rights Reserved.
  10. **
  11. ** The computer program listings, specifications, and documentation
  12. ** herein are the property of Tandem Computers Incorporated and shall
  13. ** not be reproduced, copied, disclosed, or used in whole or in part
  14. ** for any reason without the prior express written permission of
  15. ** Tandem Computers Incorporated.
  16. **
  17. ** @@@ END COPYRIGHT @@@
  18. **/
  19. /*---------------------------------------------------------------------------
  20. * This file (srgpos.c) contains OS-specific code used by Regroup.
  21. *---------------------------------------------------------------------------*/
  22. #ifdef __cplusplus
  23. extern "C" {
  24. #endif /* __cplusplus */
  25. #include <wrgp.h>
  26. #ifdef NSK
  27. #include <pmsgrgp.h>
  28. #endif /* NSK */
  29. #if defined(NT)
  30. DWORD
  31. MmSetThreadPriority(
  32. VOID
  33. );
  34. void
  35. NT_timer_thread(
  36. void
  37. );
  38. PWCHAR
  39. RgpGetNodeNameFromId(
  40. node_t
  41. );
  42. #endif // NT
  43. /* The global pointer to regroup's internal data structure. */
  44. #ifdef NSK
  45. /* The global regroup pointer is #defined to a pointer in the message
  46. * system root structure.
  47. */
  48. #endif
  49. #if defined(LCU) || defined(UNIX) || defined(NT)
  50. rgp_control_t *rgp = (rgp_control_t *) RGP_NULL_PTR;
  51. DWORD QuorumOwner = MM_INVALID_NODE;
  52. /* quorum owner can be set by the forming node before rgp is initialized */
  53. #endif /* LCU || UNIX || NT */
  54. #ifdef LCU
  55. /************************************************************************
  56. * rgp_lcu_serv_listen
  57. * ===================
  58. *
  59. * Description:
  60. *
  61. * This is an LCU-specific routine that gets called in IPC interrupt
  62. * context when a datagram addressed to the Regroup Module is received.
  63. *
  64. * Parameters:
  65. *
  66. * void *listen_callarg - required param, unused by regroup
  67. * lcumsg_t *lcumsgp - pointer to message
  68. * uint moredata - required param, unused by regroup
  69. *
  70. * Returns:
  71. *
  72. * int - Always returns ELCU_OK
  73. *
  74. * Algorithm:
  75. *
  76. * The routine simply picks apart the arguments and calls
  77. * rgp_received_packet().
  78. *
  79. *
  80. ************************************************************************/
  81. _priv _resident int
  82. rgp_lcu_serv_listen(void *listen_callarg, lcumsg_t *lcumsgp, uint moredata)
  83. {
  84. /* Ignore if the packet is not from the local system. */
  85. if (lcumsgp->lcu_sysnum == rgp->OS_specific_control.my_sysnum)
  86. rgp_received_packet(lcumsgp->lcu_node,
  87. lcumsgp->lcu_reqmbuf.lcu_ctrlbuf,
  88. lcumsgp->lcu_reqmbuf.lcu_ctrllen);
  89. return(ELCU_OK);
  90. }
  91. /************************************************************************
  92. * rgp_lcu_event_callback
  93. * ======================
  94. *
  95. * Description:
  96. *
  97. * This is an LCU-specific routine that gets called in IPC interrupt
  98. * context when the LCUEV_NODE_UNREACHABLE event is generated.
  99. *
  100. * Parameters:
  101. *
  102. * ulong event - event # (= LCUEV_NODE_UNREACHABLE)
  103. * sysnum_t sysnum - system # (= local system #)
  104. * nodenum_t node - # of node that is unreachable
  105. * int event_info - required parameter, unused by regroup
  106. *
  107. * Returns:
  108. *
  109. * void - no return value
  110. *
  111. * Algorithm:
  112. *
  113. * The routine simply transforms the LCU event into the regroup event
  114. * RGP_EVT_NODE_UNREACHABLE and calls rgp_event_handler().
  115. *
  116. ************************************************************************/
  117. _priv _resident void
  118. rgp_lcu_event_callback(
  119. ulong event,
  120. sysnum_t sysnum,
  121. nodenum_t node,
  122. int event_info)
  123. {
  124. /* Sanity checks:
  125. * (1) The event must be LCUEV_NODE_UNREACHABLE, the only event
  126. * we asked for.
  127. * (1) The event must be for the local system, the only system
  128. * we asked for.
  129. */
  130. if ((event != LCUEV_NODE_UNREACHABLE) ||
  131. (sysnum != rgp->OS_specific_control.my_sysnum))
  132. RGP_ERROR(RGP_INTERNAL_ERROR);
  133. rgp_event_handler(RGP_EVT_NODE_UNREACHABLE, node);
  134. }
  135. #endif /* LCU */
  136. /************************************************************************
  137. * rgp_init_OS
  138. * ===========
  139. *
  140. * Description:
  141. *
  142. * This routine does OS-dependent regroup initialization such as
  143. * initializing the regroup data structure lock, requesting a
  144. * periodic timer to be installed and registering the callback
  145. * routine for receiving regroup's unacknowledged packets.
  146. *
  147. * Parameters:
  148. *
  149. * None
  150. *
  151. * Returns:
  152. *
  153. * void - no return value
  154. *
  155. * Algorithm:
  156. *
  157. * OS-dependent initializations.
  158. *
  159. ************************************************************************/
  160. _priv _resident void
  161. rgp_init_OS(void)
  162. {
  163. #ifdef UNIX
  164. struct sigaction sig_action; /* to install signals */
  165. #endif
  166. #ifdef LCU
  167. sysnum_t sysnum;
  168. lcumsg_t *lcumsgp;
  169. #endif
  170. #ifdef NT
  171. HANDLE tempHandle;
  172. DWORD threadID = 0;
  173. #endif
  174. #if defined(NSK) || defined(UNIX) || defined(NT)
  175. /*
  176. * In NSK, the regroup caller ensures that timer and IPC interrupts
  177. * are disabled before the regroup routines are called. Therefore,
  178. * there is no regroup lock initialization. Also, rather than using
  179. * registration of callback routines, the appropriate routine names
  180. * are hard coded into routines that must call them. Thus, the timer
  181. * routine is called from POLLINGCHECK, the periodic message system
  182. * routine, and the packet reception routine is called from the
  183. * IPC interrupt handler.
  184. */
  185. /* Initialize the unchanging fields in the rgp_msgsys struct. */
  186. rgp->rgp_msgsys_p->regroup_data = (void *) &(rgp->rgppkt_to_send);
  187. rgp->rgp_msgsys_p->regroup_datalen = RGPPKTLEN;
  188. rgp->rgp_msgsys_p->iamalive_data = (void *) &(rgp->iamalive_pkt);
  189. rgp->rgp_msgsys_p->iamalive_datalen = IAMALIVEPKTLEN;
  190. rgp->rgp_msgsys_p->poison_data = (void *) &(rgp->poison_pkt);
  191. rgp->rgp_msgsys_p->poison_datalen = POISONPKTLEN;
  192. #endif /* NSK || UNIX || NT */
  193. #ifdef LCU
  194. if (itimeout(rgp_periodic_check,
  195. NULL, /* parameter pointer */
  196. ((RGP_CLOCK_PERIOD * HZ) / 100) | TO_PERIODIC,
  197. plstr /* interrupt priority level */
  198. ) == 0)
  199. RGP_ERROR(RGP_INTERNAL_ERROR);
  200. if (lcuxprt_listen(LCU_RGP_PORT,
  201. rgp_lcu_serv_listen,
  202. NULL /* no call arg */,
  203. NULL /* no options */
  204. ) != ELCU_OK)
  205. RGP_ERROR(RGP_INTERNAL_ERROR);
  206. if (lcuxprt_config(LCU_GET_MYSYSNUM, &sysnum) != ELCU_OK)
  207. RGP_ERROR(RGP_INTERNAL_ERROR);
  208. rgp->OS_specific_control.my_sysnum = sysnum;
  209. /* Allocate 3 message buffers to send regroup packets, iamalive packets
  210. * and poison packets.
  211. */
  212. if ((lcumsgp = lcuxprt_msg_alloc(LCU_UNACKMSG, LCU_RGP_FLAGS)) == NULL)
  213. RGP_ERROR(RGP_INTERNAL_ERROR); /* no memory */
  214. rgp->OS_specific_control.lcumsg_regroup_p = lcumsgp;
  215. lcumsgp->lcu_tag = NULL;
  216. lcumsgp->lcu_sysnum = sysnum;
  217. lcumsgp->lcu_port = LCU_RGP_PORT;
  218. lcumsgp->lcu_flags = LCUMSG_CRITICAL;
  219. lcumsgp->lcu_reqmbuf.lcu_ctrllen = RGPPKTLEN;
  220. lcumsgp->lcu_reqmbuf.lcu_ctrlbuf = (char *)&(rgp->rgppkt_to_send);
  221. if ((lcumsgp = lcuxprt_msg_alloc(LCU_UNACKMSG, LCU_RGP_FLAGS)) == NULL)
  222. RGP_ERROR(RGP_INTERNAL_ERROR); /* no memory */
  223. rgp->OS_specific_control.lcumsg_iamalive_p = lcumsgp;
  224. lcumsgp->lcu_tag = NULL;
  225. lcumsgp->lcu_sysnum = sysnum;
  226. lcumsgp->lcu_port = LCU_RGP_PORT;
  227. lcumsgp->lcu_reqmbuf.lcu_ctrllen = IAMALIVEPKTLEN;
  228. lcumsgp->lcu_reqmbuf.lcu_ctrlbuf = (char *)&(rgp->iamalive_pkt);
  229. if ((lcumsgp = lcuxprt_msg_alloc(LCU_UNACKMSG, LCU_RGP_FLAGS)) == NULL)
  230. RGP_ERROR(RGP_INTERNAL_ERROR); /* no memory */
  231. rgp->OS_specific_control.lcumsg_poison_p = lcumsgp;
  232. lcumsgp->lcu_tag = NULL;
  233. lcumsgp->lcu_sysnum = sysnum;
  234. lcumsgp->lcu_port = LCU_RGP_PORT;
  235. lcumsgp->lcu_reqmbuf.lcu_ctrllen = POISONPKTLEN;
  236. lcumsgp->lcu_reqmbuf.lcu_ctrlbuf = (char *)&(rgp->poison_pkt);
  237. /* Register to get the LCUEV_NODE_UNREACHABLE event. */
  238. if (lcuxprt_events(LCU_CATCH_EVENTS, sysnum, LCUEV_NODE_UNREACHABLE,
  239. rgp_lcu_event_callback) != ELCU_OK)
  240. RGP_ERROR(RGP_INTERNAL_ERROR);
  241. #endif /* LCU */
  242. #ifdef UNIX
  243. /* For testing on UNIX at user level, we use alarm() to simulate timer
  244. * ticks. */
  245. /* Install the alarm handler. */
  246. sig_action.sa_flags = 0;
  247. sig_action.sa_handler = alarm_handler;
  248. sigemptyset(&(sig_action.sa_mask));
  249. /* Block messages when handling timer pops. */
  250. sigaddset(&(sig_action.sa_mask), SIGPOLL);
  251. sigaction(SIGALRM, &sig_action, NULL);
  252. alarm_callback = rgp_periodic_check;
  253. /* Round up the alarm period to the next higher second. */
  254. alarm_period = (RGP_CLOCK_PERIOD + 99) / 100;
  255. /* Get first timer tick as soon as possible; subsequent ones will be
  256. * at alarm_period.
  257. */
  258. alarm(1);
  259. #endif /* UNIX */
  260. #ifdef NT
  261. /* On NT we create a separate thread that will be our timer. */
  262. /* The Timer Thread waits on TimerSignal Event to indicate an RGP rate change. */
  263. /* An RGP rate of 0 is a signal for the Timer Thread to exit */
  264. tempHandle = CreateEvent ( NULL, /* no security */
  265. FALSE, /* Autoreset */
  266. TRUE, /* Initial State is Signalled */
  267. NULL); /* No name */
  268. if ( !tempHandle )
  269. {
  270. RGP_ERROR (RGP_INTERNAL_ERROR);
  271. }
  272. rgp->OS_specific_control.TimerSignal = tempHandle;
  273. tempHandle = CreateEvent ( NULL, /* no security */
  274. TRUE, /* Manual reset */
  275. TRUE, /* Initial State is Signalled */
  276. NULL); /* No name */
  277. if ( !tempHandle )
  278. {
  279. RGP_ERROR (RGP_INTERNAL_ERROR);
  280. }
  281. rgp->OS_specific_control.Stabilized = tempHandle;
  282. rgp->OS_specific_control.ArbitrationInProgress = FALSE;
  283. rgp->OS_specific_control.ArbitratingNode = MM_INVALID_NODE;
  284. rgp->OS_specific_control.ApproxArbitrationWinner = MM_INVALID_NODE;
  285. rgp->OS_specific_control.ShuttingDown = FALSE;
  286. tempHandle = CreateThread( 0, /* security */
  287. 0, /* stack size - use same as primary thread */
  288. (LPTHREAD_START_ROUTINE)NT_timer_thread, /* starting point */
  289. (VOID *) NULL, /* no parameter */
  290. 0, /* create flags - start immediately */
  291. &threadID ); /* thread ID returned here */
  292. if ( !tempHandle )
  293. {
  294. RGP_ERROR( RGP_INTERNAL_ERROR ); /* at least for now */
  295. }
  296. rgp->OS_specific_control.TimerThread = tempHandle;
  297. rgp->OS_specific_control.TimerThreadId = threadID;
  298. rgp->OS_specific_control.UpDownCallback = RGP_NULL_PTR;
  299. rgp->OS_specific_control.NodesDownCallback = RGP_NULL_PTR;
  300. rgp->OS_specific_control.EventEpoch = 0;
  301. #if defined TDM_DEBUG
  302. rgp->OS_specific_control.debug.frozen = 0;
  303. rgp->OS_specific_control.debug.reload_in_progress = 0;
  304. rgp->OS_specific_control.debug.timer_frozen = 0;
  305. rgp->OS_specific_control.debug.doing_tracing = 0;
  306. rgp->OS_specific_control.debug.MyTestPoints.TestPointWord = 0;
  307. // seed the random number function used in testing
  308. srand((unsigned) time( NULL ) );
  309. #endif
  310. #endif /* NT */
  311. }
  312. /************************************************************************
  313. * rgp_cleanup_OS
  314. * ===========
  315. *
  316. * Description:
  317. *
  318. * This routine does OS-dependent cleanup of regroup structures
  319. * and timer thread activity to ready for a new JOIN attempt.
  320. *
  321. * Parameters:
  322. *
  323. * None
  324. *
  325. * Returns:
  326. *
  327. * void - no return value
  328. *
  329. * Algorithm:
  330. *
  331. * OS-dependent initializations.
  332. *
  333. ************************************************************************/
  334. _priv _resident void
  335. rgp_cleanup_OS(void)
  336. {
  337. #if defined (NT)
  338. // Tell Timer Thread to restart RGP Timer
  339. // a_tick might have changed.
  340. SetEvent( rgp->OS_specific_control.TimerSignal);
  341. #endif // NT
  342. }
  343. /************************************************************************
  344. * rgp_update_regroup_packet
  345. * =========================
  346. *
  347. * Description:
  348. *
  349. * Macro to copy the current regroup status into the regroup packet
  350. * sending buffer.
  351. *
  352. * Parameters:
  353. *
  354. * None
  355. *
  356. * Algorithm:
  357. *
  358. * Copies the status (which is already in the form of a regroup status
  359. * packet) into the packet buffer. Then, if we should let others (and
  360. * ourselves) know of our stage, the current knownstage field is
  361. * updated to include the local node number.
  362. *
  363. ************************************************************************/
  364. #define rgp_update_regroup_packet \
  365. do \
  366. { \
  367. /* Copy the regroup status to the sending packet area. */ \
  368. rgp->rgppkt_to_send = rgp->rgppkt; \
  369. \
  370. /* If we should let others know of our stage, we must modify the \
  371. * current stage mask to include ourselves. \
  372. */ \
  373. if (rgp->sendstage) \
  374. switch (rgp->rgppkt.stage) \
  375. { \
  376. case RGP_ACTIVATED: \
  377. ClusterInsert(rgp->rgppkt_to_send.knownstage1, rgp->mynode); \
  378. break; \
  379. case RGP_CLOSING: \
  380. ClusterInsert(rgp->rgppkt_to_send.knownstage2, rgp->mynode); \
  381. break; \
  382. case RGP_PRUNING: \
  383. ClusterInsert(rgp->rgppkt_to_send.knownstage3, rgp->mynode); \
  384. break; \
  385. case RGP_PHASE1_CLEANUP: \
  386. ClusterInsert(rgp->rgppkt_to_send.knownstage4, rgp->mynode); \
  387. break; \
  388. case RGP_PHASE2_CLEANUP: \
  389. ClusterInsert(rgp->rgppkt_to_send.knownstage5, rgp->mynode); \
  390. break; \
  391. default: \
  392. break; \
  393. } \
  394. } while(0)
  395. /************************************************************************
  396. * rgp_update_poison_packet
  397. * ========================
  398. *
  399. * Description:
  400. *
  401. * Macro to copy the current regroup status into the poison packet
  402. * sending buffer.
  403. *
  404. * Parameters:
  405. *
  406. * None
  407. *
  408. * Algorithm:
  409. *
  410. * Copies the appropriate regroup status fields into the poison
  411. * packet buffer to help debugging when a dump of a poisoned
  412. * node is examined.
  413. *
  414. ************************************************************************/
  415. #define rgp_update_poison_packet \
  416. do \
  417. { \
  418. rgp->poison_pkt.seqno = rgp->rgppkt.seqno; \
  419. rgp->poison_pkt.reason = rgp->rgppkt.reason; \
  420. rgp->poison_pkt.activatingnode = rgp->rgppkt.activatingnode; \
  421. rgp->poison_pkt.causingnode = rgp->rgppkt.causingnode; \
  422. ClusterCopy(rgp->poison_pkt.initnodes, rgp->initnodes); \
  423. ClusterCopy(rgp->poison_pkt.endnodes, rgp->endnodes); \
  424. } while(0)
  425. /************************************************************************
  426. * rgp_broadcast
  427. * =============
  428. *
  429. * Description:
  430. *
  431. * This routine asks the message system to broadcast an unacknowledged
  432. * packet of subtype "packet_subtype" to a set of nodes indicated in
  433. * an appropriate field in the rgp control struct. How the broadcast
  434. * is implemented depends on the OS.
  435. *
  436. * Parameters:
  437. *
  438. * uint8 packet_subtype - type of unsequenced packet to send
  439. *
  440. * Returns:
  441. *
  442. * void - no return value
  443. *
  444. * Algorithm:
  445. *
  446. * The same data packet is to be sent to the set of nodes indicated
  447. * in the rgp control struct field. The sending can be done by queueing
  448. * the packets directly to the send engine or the send can be deferred
  449. * to a lower priority interrupt level. The former approach reduces
  450. * the latency for sending these urgent packets while the latter
  451. * approach may reduce the number of sends if several requests to
  452. * send the same type of packets (this is true only of regroup
  453. * packets) are made in quick succession. In this case, previous
  454. * requests are overwritten by later requests. This is OK since the
  455. * regroup algorithm has enough redundancy in packet sending.
  456. *
  457. * In NSK, the message system provides a broadcast facility for
  458. * unacknowledged packets. It copies regroup's packet into its own
  459. * buffer and issues multiple requests to the SNet services layer.
  460. * When it copies the buffer, it disables the timer and IPC
  461. * interrupts ensuring that there will be no contention with Regroup.
  462. * Therefore, this routine can safely update the packet area here
  463. * without checking if the sending apparatus has completed sending
  464. * the previous packet.
  465. *
  466. * This is not true of LCU where the message system does not
  467. * provide a broadcast facility. In LCU, the updating of the packet
  468. * buffer can be done only when the send engine has completed
  469. * sending. This is assured only in the send completion interrupt
  470. * handler (rgp_msgsys_work).
  471. *
  472. ************************************************************************/
  473. _priv _resident void
  474. rgp_broadcast(uint8 packet_subtype)
  475. {
  476. cluster_t temp_cluster;
  477. switch (packet_subtype)
  478. {
  479. case RGP_UNACK_REGROUP :
  480. /* Trace the queueing of regroup status packets. */
  481. RGP_TRACE( "RGP Send packets",
  482. rgp->rgppkt.stage, /* TRACE */
  483. RGP_MERGE_TO_32( rgp->status_targets, /* TRACE */
  484. rgp->rgppkt.knownstage1 ), /* TRACE */
  485. RGP_MERGE_TO_32( rgp->rgppkt.knownstage2, /* TRACE */
  486. rgp->rgppkt.knownstage3 ), /* TRACE */
  487. RGP_MERGE_TO_32( rgp->rgppkt.knownstage4, /* TRACE */
  488. rgp->rgppkt.knownstage5 ) ); /* TRACE */
  489. #if defined(NSK) || defined(UNIX) || defined(NT)
  490. /* In NSK, the packet buffer can be updated even if the send
  491. * engine is working on the previous send. See algorithm
  492. * description above.
  493. */
  494. if ((rgp->rgppkt.reason == MM_EVT_LEAVE) &&
  495. (rgp->rgppkt.causingnode == rgp->mynode))
  496. // If a LEAVE event is in progress exclude our node from knownstage mask
  497. rgp->rgppkt_to_send = rgp->rgppkt;
  498. else
  499. // copy regroup packet and insert our node number into knownstage mask
  500. rgp_update_regroup_packet;
  501. #endif /* NSK || UNIX || NT */
  502. ClusterUnion(rgp->rgp_msgsys_p->regroup_nodes,
  503. rgp->status_targets,
  504. rgp->rgp_msgsys_p->regroup_nodes);
  505. /* Clear the targets field in the rgp_control struct after
  506. * copying this info. The message system must clear the target
  507. * bits in the common regroup/msgsys struct after sending the
  508. * packets.
  509. */
  510. ClusterInit(rgp->status_targets);
  511. rgp->rgp_msgsys_p->sendrgppkts = 1;
  512. break;
  513. case RGP_UNACK_IAMALIVE :
  514. /* Count number of IamAlive requests queued. */
  515. RGP_INCREMENT_COUNTER( QueuedIAmAlive );
  516. ClusterUnion(rgp->rgp_msgsys_p->iamalive_nodes,
  517. rgp->rgpinfo.cluster,
  518. rgp->rgp_msgsys_p->iamalive_nodes);
  519. rgp->rgp_msgsys_p->sendiamalives = 1;
  520. /* No targets field to clear in the rgp_control struct.
  521. * The message system must clear the target bits in the common
  522. * regroup/msgsys struct after sending the packets.
  523. */
  524. break;
  525. case RGP_UNACK_POISON :
  526. /* Trace the sending of poison packets. */
  527. RGP_TRACE( "RGP Send poison ",
  528. rgp->rgppkt.stage, /* TRACE */
  529. RGP_MERGE_TO_32( rgp->poison_targets, /* TRACE */
  530. rgp->rgppkt.knownstage1 ), /* TRACE */
  531. RGP_MERGE_TO_32( rgp->rgppkt.knownstage2, /* TRACE */
  532. rgp->rgppkt.knownstage3 ), /* TRACE */
  533. RGP_MERGE_TO_32( rgp->rgppkt.knownstage4, /* TRACE */
  534. rgp->rgppkt.knownstage5 ) ); /* TRACE */
  535. /* The poison packet targets must NOT be considered alive. */
  536. ClusterIntersection(temp_cluster, rgp->rgpinfo.cluster,
  537. rgp->poison_targets);
  538. ClusterDifference(temp_cluster,
  539. temp_cluster,
  540. rgp->OS_specific_control.Banished);
  541. if (ClusterNumMembers(temp_cluster) != 0)
  542. RGP_ERROR(RGP_INTERNAL_ERROR);
  543. #if defined(NSK) || defined(NT)
  544. /* In NSK, the packet buffer can be updated even if the send
  545. * engine is working on the previous send. See algorithm
  546. * description above.
  547. */
  548. rgp_update_poison_packet;
  549. #endif /* NSK || NT */
  550. ClusterUnion(rgp->rgp_msgsys_p->poison_nodes,
  551. rgp->poison_targets,
  552. rgp->rgp_msgsys_p->poison_nodes);
  553. /* Clear the targets field in the rgp_control struct after
  554. * copying this info. The message system must clear the target
  555. * bits in the common regroup/msgsys struct after sending the
  556. * packets.
  557. */
  558. ClusterInit(rgp->poison_targets);
  559. rgp->rgp_msgsys_p->sendpoisons = 1;
  560. break;
  561. default :
  562. RGP_ERROR(RGP_INTERNAL_ERROR);
  563. break;
  564. }
  565. QUEUESEND; /* invoke OS-specific sending function/macro */
  566. }
  567. /************************************************************************
  568. * rgp_had_power_failure
  569. * =====================
  570. *
  571. * Description:
  572. *
  573. * Tells the OS at the end of a regroup incident if a surviving node
  574. * had a power failure. The message system can use this to clear all
  575. * bus errors collected so far to node because node seems to have
  576. * had a power failure and has now recovered from it. Perhaps, the
  577. * bus errors were due to the power failure.
  578. *
  579. * Parameters:
  580. *
  581. * None
  582. *
  583. * Returns:
  584. *
  585. * void - no return value
  586. *
  587. * Algorithm:
  588. *
  589. * Calls a message system routine to perform any error clearing.
  590. *
  591. ************************************************************************/
  592. _priv _resident void
  593. rgp_had_power_failure(node_t node)
  594. {
  595. /* Currently, there is nothing to do. */
  596. RGP_TRACE( "RGP Power fail ", node, 0, 0, 0);
  597. }
  598. /************************************************************************
  599. * rgp_status_of_node
  600. * ==================
  601. *
  602. * Description:
  603. *
  604. * Ask the SP to return the status of a node. The SP must return the
  605. * current status and not return a stale status. This routine is
  606. * called by the split-brain avoidance algorithm in the two-node
  607. * case, for the non-tie-breaker to get the status of the tie-breaker
  608. * node.
  609. *
  610. * Parameters:
  611. *
  612. * node_t node
  613. * the node whose status is to be obtained.
  614. *
  615. * Returns:
  616. *
  617. * int - the status code of the node returned by the SP, appropriately
  618. * encoded into one of the values known to regroup.
  619. *
  620. * Algorithm:
  621. *
  622. * Calls a millicode routine to ask the SP for the status of the node.
  623. *
  624. ************************************************************************/
  625. _priv _resident int
  626. rgp_status_of_node(node_t node)
  627. {
  628. #if defined(NT)
  629. /* noone home */
  630. return RGP_NODE_UNREACHABLE;
  631. #else
  632. return _get_remote_cpu_state_( node ); /*F40:MB06452.1*/
  633. #endif
  634. }
  635. /************************************************************************
  636. * rgp_newnode_online
  637. * ==================
  638. *
  639. * Description:
  640. *
  641. * This routine is called if the first IamAlive is received from a
  642. * newly booted node before the cluster manager gets a chance to
  643. * call rgp_monitor_node(). The OS can use this routine to mark the
  644. * node as up if it does not have any other means to detect that
  645. * a node has come up.
  646. *
  647. * Parameters:
  648. *
  649. * node_t node -
  650. * the new node that has just been detected to be up
  651. *
  652. * Returns:
  653. *
  654. * void - no return value
  655. *
  656. * Algorithm:
  657. *
  658. * This routine marks the state of the node as up as seen by the
  659. * native OS.
  660. *
  661. * In NSK, on the reloader node, the marking of the reloadee as up
  662. * is done by the message system when the initial address handshake
  663. * packet is received from the reloadee. NSK does not require the
  664. * regroup module to report the fact that the reloadee is online.
  665. *
  666. * The above is probably true for LCU as well. However, the details
  667. * are not yet worked out. For now, this routine is a no-op for LCU.
  668. *
  669. ************************************************************************/
  670. _priv _resident void
  671. rgp_newnode_online(node_t newnode)
  672. {
  673. RGP_TRACE( "RGP New node up ", newnode, 0, 0, 0);
  674. }
  675. /************************************************************************
  676. * rgp_select_cluster_ex
  677. * =====================
  678. *
  679. * Description:
  680. *
  681. * Given an array of cluster choices, this routine picks the best
  682. * cluster to keep alive. cluster_choices[] is the array of choices
  683. * and num_clusters is the number of entries in the array.
  684. *
  685. * Parameters:
  686. *
  687. * cluster_t cluster_choices[]
  688. * array of cluster choices
  689. *
  690. * int num_clusters
  691. * number of entries (choices) in the array
  692. *
  693. * node_t key_node
  694. * internal node number of the key node or RGP_NULL_NODE
  695. *
  696. * Returns:
  697. *
  698. * int - the index of the selected cluster; if no cluster
  699. * is viable, -1 is returned.
  700. *
  701. * Algorithm:
  702. *
  703. * By default, the best cluster is defined as the largest cluster.
  704. * Optionally, a node called key_node can be required to be present
  705. * for a cluster to be viable. key_node can be set to RGP_NULL_NODE
  706. * to imply that no specific node is required to be present. The
  707. * routine returns the index of the best cluster and -1 if none of
  708. * the clusters is viable (that is, does not include the key node).
  709. *
  710. ************************************************************************/
  711. _priv _resident int
  712. rgp_select_cluster_ex(cluster_t cluster_choices[], int num_clusters, node_t key_node)
  713. {
  714. int max_members = 0, num_members;
  715. int cluster_selected = -1;
  716. int i;
  717. #if defined(UNIX)
  718. printf("rgp_select_cluster() called with %d choices:", num_clusters);
  719. for (i = 0; i < num_clusters; i++)
  720. {
  721. node_t j;
  722. printf("(");
  723. for (j = 0; j < (node_t) rgp->num_nodes; j++)
  724. {
  725. if (ClusterMember(cluster_choices[i], j))
  726. printf("%d,", EXT_NODE(j));
  727. }
  728. printf(")");
  729. }
  730. printf("\n");
  731. fflush(stdout);
  732. #endif /* UNIX */
  733. for (i = 0; i < num_clusters; i++)
  734. {
  735. /* Skip the current cluster if a key node is defined and is not
  736. * in the cluster.
  737. */
  738. if ((key_node != RGP_NULL_NODE) &&
  739. !ClusterMember(cluster_choices[i], key_node))
  740. continue;
  741. if ((num_members = ClusterNumMembers(cluster_choices[i])) > max_members)
  742. {
  743. cluster_selected = i;
  744. max_members = num_members;
  745. }
  746. }
  747. #if defined(UNIX)
  748. printf("Node %d: rgp_select_cluster() returned %d.\n",
  749. EXT_NODE(rgp->mynode), cluster_selected);
  750. fflush(stdout);
  751. #endif /* UNIX */
  752. return (cluster_selected);
  753. }
  754. /************************************************************************
  755. * rgp_select_cluster
  756. * ==================
  757. *
  758. * Description:
  759. *
  760. * Given an array of cluster choices, this routine picks the best
  761. * cluster to keep alive. cluster_choices[] is the array of choices
  762. * and num_clusters is the number of entries in the array.
  763. *
  764. * Parameters:
  765. *
  766. * cluster_t cluster_choices[]
  767. * array of cluster choices
  768. *
  769. * int num_clusters
  770. * number of entries (choices) in the array
  771. *
  772. * Returns:
  773. *
  774. * int - the index of the selected cluster; if no cluster
  775. * is viable, -1 is returned.
  776. *
  777. * Algorithm:
  778. *
  779. * By default, the best cluster is defined as the largest cluster.
  780. * Optionally, a node called RGP_KEY_NODE can be required to be present
  781. * for a cluster to be viable. RGP_KEY_NODE can be set to RGP_NULL_NODE
  782. * to imply that no specific node is required to be present. The
  783. * routine returns the index of the best cluster and -1 if none of
  784. * the clusters is viable (that is, does not include the key node).
  785. *
  786. ************************************************************************/
  787. _priv _resident int
  788. rgp_select_cluster(cluster_t cluster_choices[], int num_clusters)
  789. {
  790. node_t key_node;
  791. if (RGP_KEY_NODE == RGP_NULL_NODE) {
  792. key_node = RGP_NULL_NODE;
  793. } else {
  794. key_node = INT_NODE(RGP_KEY_NODE);
  795. }
  796. return rgp_select_cluster_ex(cluster_choices , num_clusters, key_node);
  797. }
  798. #ifdef LCU
  799. /************************************************************************
  800. * rgp_msgsys_work
  801. * ===============
  802. *
  803. * Description:
  804. *
  805. * LCU-specific routine that implements broadcasting of packets by
  806. * sending them serially.
  807. *
  808. * This routine is called from rgp_broadcast() to initiate new sends.
  809. * It is also the packet send completion interrupt handler (callback
  810. * routine), invoked by the LCU message system when the packet buffer
  811. * can be reused.
  812. *
  813. * Parameters:
  814. *
  815. * lcumsg_t *lcumsgp -
  816. * pointer to lcu message if called from the transport's send
  817. * completion interrupt handler; NULL if called from
  818. * rgp_broadcast() to send a new packet.
  819. *
  820. * int status -
  821. * the message completion status if called from the transport's
  822. * send completion interrupt handler; 0 if called from
  823. * rgp_broadcast() to send a new packet.
  824. *
  825. * Returns:
  826. *
  827. * void - no return value
  828. *
  829. * Algorithm:
  830. *
  831. * If called from the send completion interrupt, the routine checks
  832. * to see if the packet buffer needs to be refreshed. This is true
  833. * if the appropriate bit in the rgp_msgsys struct is set. If so,
  834. * the buffer is updated with the current info (using an update
  835. * macro). This update is relevant to regroup status packets and
  836. * poison packets, but not to IamAlives packets whose contents are
  837. * always the same. The bit is cleared after the packet is updated.
  838. *
  839. * Next, the routine checks if there are more destinations to send
  840. * the packet to. If so, it finds the next higher numbered node to
  841. * send to, issues a send and returns.
  842. *
  843. * If invoked from rgp_broadcast() to start a new broadcast, the
  844. * routine first checks to see if the previous broadcast of the
  845. * same packet is complete. This is indicated by the tag field in
  846. * the message struct. The tag is NULL if the broadcast has
  847. * completed or has not been initiated. In this case, the tag is
  848. * set to a non-NULL value and a new broadcast initiated, with
  849. * this routine specified as the callback routine.
  850. *
  851. * If the previous broadcast has not completed, nothing needs to
  852. * be done. The completion interrupt will cause the buffer to be
  853. * refreshed and the broadcast to be continued. The broadcast
  854. * will then include new targets that may be included in this
  855. * new request.
  856. *
  857. ************************************************************************/
  858. _priv _resident void
  859. rgp_msgsys_work(lcumsg_t *lcumsgp, int status)
  860. {
  861. rgp_unseq_pkt_t *packet;
  862. cluster_t *sending_cluster;
  863. node_t node;
  864. if (lcumsgp == NULL)
  865. {
  866. /* New work requested. Only one type of work is requested at
  867. * a time.
  868. */
  869. if (rgp->rgp_msgsys_p->sendrgppkts)
  870. {
  871. /* Have new regroup status packets to send. First check
  872. * if the last regroup status send completed. If so,
  873. * we can update the packet and initiate a new send.
  874. * If not, we must defer to the completion interrupt
  875. * (invocation of this routine with a non-NULL lcumsgp).
  876. */
  877. lcumsgp = rgp->OS_specific_control.lcumsg_regroup_p;
  878. if (lcumsgp->lcu_tag == NULL)
  879. {
  880. /* Last send completed. Initiate new send. */
  881. rgp_update_regroup_packet;
  882. rgp->rgp_msgsys_p->sendrgppkts = 0;
  883. for (node = 0; node < rgp->num_nodes; node++)
  884. {
  885. if (ClusterMember(rgp->rgp_msgsys_p->regroup_nodes, node))
  886. {
  887. ClusterDelete(rgp->rgp_msgsys_p->regroup_nodes, node);
  888. lcumsgp->lcu_node = node;
  889. lcumsgp->lcu_tag = &(rgp->rgp_msgsys_p->regroup_nodes);
  890. if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=
  891. ELCU_OK)
  892. RGP_ERROR(RGP_INTERNAL_ERROR);
  893. break; /* can send only to one node at a time */
  894. }
  895. }
  896. }
  897. }
  898. else if (rgp->rgp_msgsys_p->sendiamalives)
  899. {
  900. /* Need to send IamAlives again. First check if the last
  901. * IamAlive send completed. If so, we can initiate a new send.
  902. * If not, we must defer to the completion interrupt
  903. * (invocation of this routine with a non-NULL lcumsgp).
  904. */
  905. lcumsgp = rgp->OS_specific_control.lcumsg_iamalive_p;
  906. if (lcumsgp->lcu_tag == NULL)
  907. {
  908. /* Last send completed. Initiate new send. */
  909. rgp->rgp_msgsys_p->sendiamalives = 0;
  910. for (node = 0; node < rgp->num_nodes; node++)
  911. {
  912. if (ClusterMember(rgp->rgp_msgsys_p->iamalive_nodes, node))
  913. {
  914. ClusterDelete(rgp->rgp_msgsys_p->iamalive_nodes, node);
  915. lcumsgp->lcu_node = node;
  916. lcumsgp->lcu_tag = &(rgp->rgp_msgsys_p->iamalive_nodes);
  917. if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=
  918. ELCU_OK)
  919. RGP_ERROR(RGP_INTERNAL_ERROR);
  920. break; /* can send only to one node at a time */
  921. }
  922. }
  923. }
  924. }
  925. else if (rgp->rgp_msgsys_p->sendpoisons)
  926. {
  927. /* Have new poison packets to send. First check
  928. * if the last poison packet send completed. If so,
  929. * we can update the packet and initiate a new send.
  930. * If not, we must defer to the completion interrupt
  931. * (invocation of this routine with a non-NULL lcumsgp).
  932. */
  933. lcumsgp = rgp->OS_specific_control.lcumsg_poison_p;
  934. if (lcumsgp->lcu_tag == NULL)
  935. {
  936. /* Last send completed. Initiate new send. */
  937. rgp_update_poison_packet;
  938. rgp->rgp_msgsys_p->sendpoisons = 0;
  939. for (node = 0; node < rgp->num_nodes; node++)
  940. {
  941. if (ClusterMember(rgp->rgp_msgsys_p->poison_nodes, node))
  942. {
  943. ClusterDelete(rgp->rgp_msgsys_p->poison_nodes, node);
  944. lcumsgp->lcu_node = node;
  945. lcumsgp->lcu_tag = &(rgp->rgp_msgsys_p->poison_nodes);
  946. if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=
  947. ELCU_OK)
  948. RGP_ERROR(RGP_INTERNAL_ERROR);
  949. break; /* can send only to one node at a time */
  950. }
  951. }
  952. }
  953. }
  954. } /* new work */
  955. else
  956. {
  957. /* Send completion interrupt; continue the broadcast if
  958. * there are targets remaining.
  959. */
  960. RGP_LOCK;
  961. /* Find what type of packet completed; send the same type. */
  962. packet = (rgp_unseq_pkt_t *) lcumsgp->lcu_reqmbuf.lcu_ctrlbuf;
  963. switch (packet->pktsubtype)
  964. {
  965. case RGP_UNACK_REGROUP :
  966. /* Check if packet needs to be updated. */
  967. if (rgp->rgp_msgsys_p->sendrgppkts)
  968. {
  969. rgp_update_regroup_packet;
  970. rgp->rgp_msgsys_p->sendrgppkts = 0;
  971. }
  972. break;
  973. case RGP_UNACK_IAMALIVE :
  974. break;
  975. case RGP_UNACK_POISON :
  976. /* Check if packet needs to be updated. */
  977. if (rgp->rgp_msgsys_p->sendpoisons)
  978. {
  979. rgp_update_poison_packet;
  980. rgp->rgp_msgsys_p->sendpoisons = 0;
  981. }
  982. break;
  983. }
  984. /* Check if there is any more node to send the same packet
  985. * type to. If not, set the tag to NULL and return.
  986. */
  987. sending_cluster = (cluster_t *) (lcumsgp->lcu_tag);
  988. if (ClusterNumMembers(*sending_cluster) == 0)
  989. {
  990. lcumsgp->lcu_tag = NULL; /* indicate that broadcast is complete. */
  991. return;
  992. }
  993. /* There is at least one more node to send to. Start with
  994. * the node with the next higher number than the node we
  995. * just finished sending to.
  996. *
  997. * The loop terminates after posting a send to the next
  998. * node to send to. We know there is at least one such node.
  999. */
  1000. for (node = lcumsgp->lcu_node + 1; node < rgp->num_nodes + 1; node++)
  1001. {
  1002. if (node == rgp->num_nodes)
  1003. node = 0; /* continue the search starting at node 0 */
  1004. if (ClusterMember(*sending_cluster, node))
  1005. {
  1006. ClusterDelete(*sending_cluster, node);
  1007. lcumsgp->lcu_node = node;
  1008. if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=
  1009. ELCU_OK)
  1010. RGP_ERROR(RGP_INTERNAL_ERROR);
  1011. break; /* can send only to one node at a time */
  1012. }
  1013. }
  1014. RGP_UNLOCK;
  1015. }
  1016. }
  1017. #endif /* LCU */
  1018. /*---------------------------------------------------------------------------*/
  1019. #if defined(LCU) || defined(UNIX) || defined(NT)
  1020. /*---------------------------------------------------------------------------*/
  1021. void
  1022. rgp_hold_all_io(void)
  1023. /* Simulates the TNet services routine to pause IO. */
  1024. {
  1025. #if defined (NT)
  1026. (*(rgp->OS_specific_control.HoldIOCallback))();
  1027. #endif
  1028. RGP_TRACE( "RGP Hold all IO ", 0, 0, 0, 0);
  1029. }
  1030. /*---------------------------------------------------------------------------*/
  1031. void
  1032. rgp_resume_all_io(void)
  1033. /* Simulates the TNet services routine to resume IO. */
  1034. {
  1035. #if defined (NT)
  1036. (*(rgp->OS_specific_control.ResumeIOCallback))();
  1037. #endif
  1038. RGP_TRACE( "RGP Resume IO ", 0, 0, 0, 0);
  1039. }
  1040. /*---------------------------------------------------------------------------*/
  1041. void
  1042. RGP_ERROR_EX (uint16 halt_code, char* fname, DWORD lineno)
  1043. /* Halt node with error code. */
  1044. {
  1045. char *halt_string;
  1046. node_t node = RGP_NULL_NODE;
  1047. #if defined( NT )
  1048. char halt_buffer[ 256 ];
  1049. DWORD eventMsgId;
  1050. BOOL skipFormatting = FALSE;
  1051. //
  1052. // If a user initiated a shutdown, (s)he wants to see the node
  1053. // to go down and wait for an explicit start command.
  1054. //
  1055. // We map RGP_RELOADFAILED to SHUTDOWN_DURING_REGROUP_ERROR since
  1056. // HaltCallback does a graceful stop for the latter one.
  1057. // SCM won't restart the node after a graceful stop unless
  1058. // it is explicitly told to do so
  1059. //
  1060. if (halt_code == RGP_RELOADFAILED &&
  1061. rgp->OS_specific_control.ShuttingDown)
  1062. {
  1063. halt_code = RGP_SHUTDOWN_DURING_RGP;
  1064. }
  1065. #endif
  1066. if (halt_code == RGP_RELOADFAILED) {
  1067. halt_string = "[RGP] Node %d: REGROUP WARNING: reload failed.";
  1068. eventMsgId = MM_EVENT_RELOAD_FAILED;
  1069. }
  1070. else if (halt_code == RGP_INTERNAL_ERROR) {
  1071. halt_string = "[RGP] Node %d: REGROUP ERROR: consistency check failed in file %s, line %u.";
  1072. eventMsgId = MM_EVENT_INTERNAL_ERROR;
  1073. skipFormatting = TRUE;
  1074. _snprintf(halt_buffer, sizeof( halt_buffer ) - 1,
  1075. halt_string,
  1076. EXT_NODE(rgp->mynode),
  1077. fname,
  1078. lineno);
  1079. }
  1080. else if (halt_code == RGP_MISSED_POLL_TO_SELF) {
  1081. halt_string = "[RGP] Node %d: REGROUP ERROR: cannot talk to self.";
  1082. eventMsgId = NM_EVENT_MEMBERSHIP_HALT;
  1083. }
  1084. #if !defined(NT)
  1085. else if (halt_code == RGP_AVOID_SPLIT_BRAIN) {
  1086. halt_string = "[RGP] Node %d: REGROUP ERROR: commiting suicide to avoid split brain.";
  1087. }
  1088. #endif
  1089. else if (halt_code == RGP_PRUNED_OUT) {
  1090. halt_string = "[RGP] Node %d: REGROUP ERROR: pruned out due to communication failure.";
  1091. eventMsgId = MM_EVENT_PRUNED_OUT;
  1092. }
  1093. else if ((halt_code >= RGP_PARIAH_FIRST) && (halt_code <= RGP_PARIAH_LAST)) {
  1094. halt_string = "[RGP] Node %d: REGROUP ERROR: poison packet received from node %d.";
  1095. eventMsgId = MM_EVENT_PARIAH;
  1096. node = (node_t)(halt_code - RGP_PARIAH);
  1097. }
  1098. else if (halt_code == RGP_ARBITRATION_FAILED) {
  1099. halt_string = "[RGP] Node %d: REGROUP ERROR: arbitration failed.";
  1100. eventMsgId = MM_EVENT_ARBITRATION_FAILED;
  1101. }
  1102. else if (halt_code == RGP_ARBITRATION_STALLED) {
  1103. halt_string = "[RGP] Node %d: REGROUP ERROR: arbitration stalled.";
  1104. eventMsgId = MM_EVENT_ARBITRATION_STALLED;
  1105. }
  1106. else if (halt_code == RGP_SHUTDOWN_DURING_RGP) {
  1107. halt_string = "[RGP] Node %d: REGROUP INFO: regroup engine requested immediate shutdown.";
  1108. eventMsgId = MM_EVENT_SHUTDOWN_DURING_RGP;
  1109. }
  1110. else {
  1111. halt_string = "[RGP] Node %d: REGROUP ERROR: unknown halt code (%d).";
  1112. eventMsgId = NM_EVENT_MEMBERSHIP_HALT;
  1113. node = halt_code; // get it printed out by borrowing node
  1114. }
  1115. #if defined(UNIX)
  1116. printf(halt_string, EXT_NODE(rgp->mynode), node);
  1117. fflush(stdout);
  1118. /* Simulate a halt by dumping core and exiting the process. */
  1119. abort();
  1120. #elif defined(NT)
  1121. if ( !skipFormatting ) {
  1122. _snprintf(halt_buffer, sizeof( halt_buffer ) - 1,
  1123. halt_string,
  1124. EXT_NODE(rgp->mynode),
  1125. node);
  1126. }
  1127. #if CLUSTER_BETA
  1128. ClRtlLogPrint(LOG_CRITICAL, "%1!hs!\t%2!hs!:%3!d!\n", halt_buffer, fname, lineno);
  1129. #else
  1130. ClRtlLogPrint(LOG_CRITICAL, "%1!hs!\n", halt_buffer );
  1131. #endif
  1132. if ((halt_code >= RGP_PARIAH_FIRST) && (halt_code <= RGP_PARIAH_LAST)) {
  1133. WCHAR nodeString[ 16 ];
  1134. PWCHAR nodeName;
  1135. _snwprintf( nodeString, sizeof( nodeString ) / sizeof ( WCHAR ), L"%d", node );
  1136. nodeName = RgpGetNodeNameFromId( node );
  1137. CsLogEvent2( LOG_CRITICAL, eventMsgId, nodeString, nodeName );
  1138. if ( nodeName != NULL ) {
  1139. LocalFree( nodeName );
  1140. }
  1141. }
  1142. else if ( eventMsgId == NM_EVENT_MEMBERSHIP_HALT ) {
  1143. WCHAR haltString[ 16 ];
  1144. _snwprintf( haltString, sizeof( haltString ) / sizeof ( WCHAR ), L"%d", halt_code );
  1145. CsLogEvent1( LOG_CRITICAL, eventMsgId, haltString );
  1146. }
  1147. else {
  1148. CsLogEvent( LOG_CRITICAL, eventMsgId );
  1149. }
  1150. /* we rely on RGP_ERROR_EX to kill the node immediately
  1151. rgp_cleanup() can potentially slow us down.
  1152. 435977 showed that it can take upto 25 seconds, if we
  1153. have a lot IP addr activity.
  1154. since in the end of the function we execute HaltCallback which kills the cluster,
  1155. we can safely omit doing rgp_cleanup and rgp_cleanup_OS
  1156. If JoinFailedCallback will be ever enabled, the fate of rgp_cleanup and rgp_cleanup_OS
  1157. should be reevaluated.
  1158. */
  1159. #if 0
  1160. rgp_cleanup();
  1161. rgp_cleanup_OS();
  1162. if (halt_code == RGP_RELOADFAILED)
  1163. (*(rgp->OS_specific_control.JoinFailedCallback))();
  1164. else
  1165. #endif
  1166. (*(rgp->OS_specific_control.HaltCallback))(halt_code); // does not return */
  1167. #else
  1168. cmn_err(CE_PANIC, halt_string, EXT_NODE(rgp->mynode), node);
  1169. #endif /* UNIX */
  1170. }
  1171. /*---------------------------------------------------------------------------*/
  1172. void
  1173. rgp_start_phase1_cleanup(void)
  1174. /* Tells the OS to start cleanup actions for all failed nodes. */
  1175. {
  1176. #if defined (NT)
  1177. node_t i;
  1178. //
  1179. // On NT we saved the nodes to be downed bitmask in NeedsNodeDownCallback.
  1180. //
  1181. for ( i=0; i < (node_t) rgp->num_nodes; i++)
  1182. {
  1183. if ( ClusterMember( rgp->OS_specific_control.NeedsNodeDownCallback, i ) )
  1184. {
  1185. (*(rgp->OS_specific_control.MsgCleanup1Callback))(EXT_NODE(i));
  1186. }
  1187. }
  1188. #endif
  1189. RGP_TRACE( "RGP Ph1 cleanup ", 0, 0, 0, 0);
  1190. rgp_event_handler(RGP_EVT_PHASE1_CLEANUP_DONE, RGP_NULL_NODE);
  1191. }
  1192. /*---------------------------------------------------------------------------*/
  1193. void
  1194. rgp_start_phase2_cleanup(void)
  1195. /* The equivalent of NSK's regroupstage4action(). */
  1196. {
  1197. #if defined (NT)
  1198. BITSET bitset;
  1199. node_t i;
  1200. //
  1201. // On NT we saved the nodes to be downed bitmask in NeedsNodeDownCallback.
  1202. //
  1203. BitsetInit(bitset);
  1204. for ( i=0; i < (node_t) rgp->num_nodes; i++)
  1205. {
  1206. if ( ClusterMember( rgp->OS_specific_control.NeedsNodeDownCallback, i ) )
  1207. {
  1208. BitsetAdd(bitset, EXT_NODE(i));
  1209. }
  1210. }
  1211. (*(rgp->OS_specific_control.MsgCleanup2Callback))(bitset);
  1212. #endif
  1213. RGP_TRACE( "RGP Ph2 cleanup ", 0, 0, 0, 0);
  1214. rgp_event_handler(RGP_EVT_PHASE2_CLEANUP_DONE, RGP_NULL_NODE);
  1215. }
  1216. /*---------------------------------------------------------------------------*/
  1217. void
  1218. rgp_cleanup_complete(void)
  1219. /* The equivalent of NSK's regroupstage5action(). */
  1220. {
  1221. #if defined(NT)
  1222. #endif
  1223. RGP_TRACE( "RGP completed ", 0, 0, 0, 0);
  1224. }
  1225. /*---------------------------------------------------------------------------*/
  1226. #endif /* LCU || UNIX || NT */
  1227. #if defined(NT)
  1228. /************************************************************************
  1229. * NT_timer_callback
  1230. * =================
  1231. *
  1232. * Description:
  1233. *
  1234. * This routine is the callback function that gets invoked whenever a
  1235. * timer pops. The routine will call rgp_periodic_check. This function
  1236. * is defined by the Win32 TimerProc procedure.
  1237. *
  1238. * Parameters:
  1239. *
  1240. * See below. We don't use any of them.
  1241. *
  1242. * Returns:
  1243. *
  1244. * none.
  1245. *
  1246. * Algorithm:
  1247. *
  1248. * This routine just calls rgp_periodic_check. The existense of this
  1249. * routine is solely due to a fixed format callback defined by
  1250. * Microsoft.
  1251. *
  1252. ************************************************************************/
  1253. VOID CALLBACK NT_timer_callback(
  1254. VOID
  1255. )
  1256. {
  1257. #if defined(TDM_DEBUG)
  1258. if ( !(rgp->OS_specific_control.debug.timer_frozen) &&
  1259. !(rgp->OS_specific_control.debug.frozen) )
  1260. #endif
  1261. rgp_periodic_check( );
  1262. }
  1263. /************************************************************************
  1264. * NT_timer_thread
  1265. * ===============
  1266. *
  1267. * Description:
  1268. *
  1269. * This routine is executed as a separate thread in the Windows NT
  1270. * implementation. This thread controls generates periodic regroup
  1271. * clock ticks. It is signalled via an event whenever the rate changes
  1272. * or to cause termination.
  1273. *
  1274. * Parameters:
  1275. *
  1276. * None.
  1277. *
  1278. * Returns:
  1279. *
  1280. * This thread should not go away.
  1281. *
  1282. * Algorithm:
  1283. *
  1284. * This routine is run as a separate thread. It sets up a timer to pop
  1285. * every <time_interval> * 10 milliseconds.
  1286. *
  1287. ************************************************************************/
  1288. void NT_timer_thread( void )
  1289. {
  1290. BOOL Success;
  1291. LARGE_INTEGER DueTime;
  1292. DWORD Error, MyHandleIndex;
  1293. HANDLE MyHandles[2]; /* for use by WaitForMultiple */
  1294. DWORD status;
  1295. DWORD msDueTime;
  1296. #define MyHandleSignalIx 0
  1297. #define MyHandleTimerIx 1
  1298. MyHandles[MyHandleSignalIx] = rgp->OS_specific_control.TimerSignal; /* Event signals HB rate change */
  1299. rgp->OS_specific_control.RGPTimer = CreateWaitableTimer(
  1300. NULL, // no security
  1301. FALSE, // Initial State FALSE
  1302. NULL
  1303. ); // No name
  1304. if (rgp->OS_specific_control.RGPTimer == NULL) {
  1305. Error = GetLastError();
  1306. RGP_ERROR(RGP_INTERNAL_ERROR);
  1307. }
  1308. status = MmSetThreadPriority();
  1309. if ( status != ERROR_SUCCESS ) {
  1310. ClRtlLogPrint(LOG_CRITICAL,
  1311. "[MM] Unable to set timer thread priority, status %1!u!\n",
  1312. status
  1313. );
  1314. RGP_ERROR((uint16) status);
  1315. ExitThread(status);
  1316. }
  1317. MyHandles[MyHandleTimerIx] = rgp->OS_specific_control.RGPTimer;
  1318. while (TRUE)
  1319. {
  1320. MyHandleIndex = WaitForMultipleObjects (
  1321. 2, /* Number of Events */
  1322. MyHandles, /* Handle Array */
  1323. FALSE, /* Wait for ANY event */
  1324. INFINITE ); /* Wait forever */
  1325. if (MyHandleIndex == MyHandleSignalIx) // Timer Change Signal Event
  1326. {
  1327. // RGP rate has changed
  1328. CancelWaitableTimer ( rgp->OS_specific_control.RGPTimer );
  1329. if ( rgp->rgpinfo.a_tick == 0 ) // Time to quit
  1330. {
  1331. CloseHandle ( rgp->OS_specific_control.RGPTimer );
  1332. rgp->OS_specific_control.RGPTimer = 0;
  1333. ExitThread ( 0 );
  1334. }
  1335. // a_tick has new RGP rate in milliseconds.
  1336. msDueTime = rgp->rgpinfo.a_tick;
  1337. DueTime.QuadPart = -10 * 1000 * msDueTime;
  1338. Success = SetWaitableTimer(
  1339. rgp->OS_specific_control.RGPTimer,
  1340. &DueTime,
  1341. rgp->rgpinfo.a_tick,
  1342. NULL,
  1343. NULL,
  1344. FALSE);
  1345. if (!Success) {
  1346. Error = GetLastError();
  1347. RGP_ERROR(RGP_INTERNAL_ERROR);
  1348. }
  1349. } // Timer Change Signal
  1350. else
  1351. { // RGP Timer Tick
  1352. NT_timer_callback();
  1353. NmTimerTick(msDueTime);
  1354. }
  1355. } // while
  1356. }
  1357. PWCHAR
  1358. RgpGetNodeNameFromId(
  1359. node_t NodeID
  1360. )
  1361. /*++
  1362. Routine Description:
  1363. given a node ID, issue a get name node control to get the computer name of
  1364. the node. Returned buffer to be freed by caller.
  1365. Arguments:
  1366. NodeID - ID ( 1, 2, 3, ..) of the node
  1367. Return Value:
  1368. pointer to buffer containing name
  1369. --*/
  1370. {
  1371. PWCHAR buffer;
  1372. DWORD bufferSize = MAX_COMPUTERNAME_LENGTH * sizeof( WCHAR );
  1373. DWORD bytesReturned;
  1374. DWORD bytesRequired;
  1375. PNM_NODE node;
  1376. buffer = LocalAlloc( LMEM_FIXED, bufferSize );
  1377. if ( buffer != NULL ) {
  1378. node = NmReferenceNodeById( NodeID );
  1379. if ( node != NULL ) {
  1380. NmNodeControl(node,
  1381. NULL, // HostNode OPTIONAL,
  1382. CLUSCTL_NODE_GET_NAME,
  1383. NULL, // InBuffer,
  1384. 0, // InBufferSize,
  1385. (PUCHAR)buffer,
  1386. bufferSize,
  1387. &bytesReturned,
  1388. &bytesRequired);
  1389. OmDereferenceObject( node );
  1390. }
  1391. }
  1392. return buffer;
  1393. }
  1394. #endif /* NT */
  1395. #ifdef __cplusplus
  1396. }
  1397. #endif /* __cplusplus */
  1398. #if 0
  1399. History of changes to this file:
  1400. -------------------------------------------------------------------------
  1401. 1995, December 13 F40:KSK0610 /*F40:KSK06102.2*/
  1402. This file is part of the portable Regroup Module used in the NonStop
  1403. Kernel (NSK) and Loosely Coupled UNIX (LCU) operating systems. There
  1404. are 10 files in the module - jrgp.h, jrgpos.h, wrgp.h, wrgpos.h,
  1405. srgpif.c, srgpos.c, srgpsm.c, srgputl.c, srgpcli.c and srgpsvr.c.
  1406. The last two are simulation files to test the Regroup Module on a
  1407. UNIX workstation in user mode with processes simulating processor nodes
  1408. and UDP datagrams used to send unacknowledged datagrams.
  1409. This file was first submitted for release into NSK on 12/13/95.
  1410. ------------------------------------------------------------------------------
  1411. This change occurred on 19 Jan 1996 /*F40:MB06458.1*/
  1412. Changes for phase IV Sierra message system release. Includes: /*F40:MB06458.2*/
  1413. - Some cleanup of the code /*F40:MB06458.3*/
  1414. - Increment KCCB counters to count the number of setup messages and /*F40:MB06458.4*/
  1415. unsequenced messages sent. /*F40:MB06458.5*/
  1416. - Fixed some bugs /*F40:MB06458.6*/
  1417. - Disable interrupts before allocating broadcast sibs. /*F40:MB06458.7*/
  1418. - Change per-packet-timeout to 5ms /*F40:MB06458.8*/
  1419. - Make the regroup and powerfail broadcast use highest priority /*F40:MB06458.9*/
  1420. tnet services queue. /*F40:MB06458.10*/
  1421. - Call the millicode backdoor to get the processor status from SP /*F40:MB06458.11*/
  1422. - Fixed expand bug in msg_listen_ and msg_readctrl_ /*F40:MB06458.12*/
  1423. - Added enhancement to msngr_sendmsg_ so that clients do not need /*F40:MB06458.13*/
  1424. to be unstoppable before calling this routine. /*F40:MB06458.14*/
  1425. - Added new steps in the build file called /*F40:MB06458.15*/
  1426. MSGSYS_C - compiles all the message system C files /*F40:MB06458.16*/
  1427. MSDRIVER - compiles all the MSDriver files /*F40:MB06458.17*/
  1428. REGROUP - compiles all the regroup files /*F40:MB06458.18*/
  1429. - remove #pragma env libspace because we set it as a command line /*F40:MB06458.19*/
  1430. parameter. /*F40:MB06458.20*/
  1431. ----------------------------------------------------------------------- /*F40:MB06458.21*/
  1432. #endif /* 0 - change descriptions */