Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1645 lines
56 KiB

  1. #ifdef __TANDEM
  2. #pragma columns 79
  3. #pragma page "srgpos.c - T9050 - OS-dependent routines for Regroup Module"
  4. #endif
  5. /* @@@ START COPYRIGHT @@@
  6. ** Tandem Confidential: Need to Know only
  7. ** Copyright (c) 1995, Tandem Computers Incorporated
  8. ** Protected as an unpublished work.
  9. ** All Rights Reserved.
  10. **
  11. ** The computer program listings, specifications, and documentation
  12. ** herein are the property of Tandem Computers Incorporated and shall
  13. ** not be reproduced, copied, disclosed, or used in whole or in part
  14. ** for any reason without the prior express written permission of
  15. ** Tandem Computers Incorporated.
  16. **
  17. ** @@@ END COPYRIGHT @@@
  18. **/
  19. /*---------------------------------------------------------------------------
  20. * This file (srgpos.c) contains OS-specific code used by Regroup.
  21. *---------------------------------------------------------------------------*/
  22. #ifdef __cplusplus
  23. extern "C" {
  24. #endif /* __cplusplus */
  25. #include <wrgp.h>
  26. #ifdef NSK
  27. #include <pmsgrgp.h>
  28. #endif /* NSK */
  29. #if defined(NT)
  30. DWORD
  31. MmSetThreadPriority(
  32. VOID
  33. );
  34. void
  35. NT_timer_thread(
  36. void
  37. );
  38. PWCHAR
  39. RgpGetNodeNameFromId(
  40. node_t
  41. );
  42. #endif // NT
  43. /* The global pointer to regroup's internal data structure. */
  44. #ifdef NSK
  45. /* The global regroup pointer is #defined to a pointer in the message
  46. * system root structure.
  47. */
  48. #endif
  49. #if defined(LCU) || defined(UNIX) || defined(NT)
  50. rgp_control_t *rgp = (rgp_control_t *) RGP_NULL_PTR;
  51. DWORD QuorumOwner = MM_INVALID_NODE;
  52. /* quorum owner can be set by the forming node before rgp is initialized */
  53. /* Clussvc to Clusnet Heartbeating stuff. This bool would enable it. */
  54. BOOL MmStartClussvcToClusnetHeartbeat=FALSE;
  55. LONG MmCheckSystemHealthTick=0;
  56. #endif /* LCU || UNIX || NT */
  57. #ifdef LCU
  58. /************************************************************************
  59. * rgp_lcu_serv_listen
  60. * ===================
  61. *
  62. * Description:
  63. *
  64. * This is an LCU-specific routine that gets called in IPC interrupt
  65. * context when a datagram addressed to the Regroup Module is received.
  66. *
  67. * Parameters:
  68. *
  69. * void *listen_callarg - required param, unused by regroup
  70. * lcumsg_t *lcumsgp - pointer to message
  71. * uint moredata - required param, unused by regroup
  72. *
  73. * Returns:
  74. *
  75. * int - Always returns ELCU_OK
  76. *
  77. * Algorithm:
  78. *
  79. * The routine simply picks apart the arguments and calls
  80. * rgp_received_packet().
  81. *
  82. *
  83. ************************************************************************/
  84. _priv _resident int
  85. rgp_lcu_serv_listen(void *listen_callarg, lcumsg_t *lcumsgp, uint moredata)
  86. {
  87. /* Ignore if the packet is not from the local system. */
  88. if (lcumsgp->lcu_sysnum == rgp->OS_specific_control.my_sysnum)
  89. rgp_received_packet(lcumsgp->lcu_node,
  90. lcumsgp->lcu_reqmbuf.lcu_ctrlbuf,
  91. lcumsgp->lcu_reqmbuf.lcu_ctrllen);
  92. return(ELCU_OK);
  93. }
  94. /************************************************************************
  95. * rgp_lcu_event_callback
  96. * ======================
  97. *
  98. * Description:
  99. *
  100. * This is an LCU-specific routine that gets called in IPC interrupt
  101. * context when the LCUEV_NODE_UNREACHABLE event is generated.
  102. *
  103. * Parameters:
  104. *
  105. * ulong event - event # (= LCUEV_NODE_UNREACHABLE)
  106. * sysnum_t sysnum - system # (= local system #)
  107. * nodenum_t node - # of node that is unreachable
  108. * int event_info - required parameter, unused by regroup
  109. *
  110. * Returns:
  111. *
  112. * void - no return value
  113. *
  114. * Algorithm:
  115. *
  116. * The routine simply transforms the LCU event into the regroup event
  117. * RGP_EVT_NODE_UNREACHABLE and calls rgp_event_handler().
  118. *
  119. ************************************************************************/
  120. _priv _resident void
  121. rgp_lcu_event_callback(
  122. ulong event,
  123. sysnum_t sysnum,
  124. nodenum_t node,
  125. int event_info)
  126. {
  127. /* Sanity checks:
  128. * (1) The event must be LCUEV_NODE_UNREACHABLE, the only event
  129. * we asked for.
  130. * (1) The event must be for the local system, the only system
  131. * we asked for.
  132. */
  133. if ((event != LCUEV_NODE_UNREACHABLE) ||
  134. (sysnum != rgp->OS_specific_control.my_sysnum))
  135. RGP_ERROR(RGP_INTERNAL_ERROR);
  136. rgp_event_handler(RGP_EVT_NODE_UNREACHABLE, node);
  137. }
  138. #endif /* LCU */
  139. /************************************************************************
  140. * rgp_init_OS
  141. * ===========
  142. *
  143. * Description:
  144. *
  145. * This routine does OS-dependent regroup initialization such as
  146. * initializing the regroup data structure lock, requesting a
  147. * periodic timer to be installed and registering the callback
  148. * routine for receiving regroup's unacknowledged packets.
  149. *
  150. * Parameters:
  151. *
  152. * None
  153. *
  154. * Returns:
  155. *
  156. * void - no return value
  157. *
  158. * Algorithm:
  159. *
  160. * OS-dependent initializations.
  161. *
  162. ************************************************************************/
  163. _priv _resident void
  164. rgp_init_OS(void)
  165. {
  166. #ifdef UNIX
  167. struct sigaction sig_action; /* to install signals */
  168. #endif
  169. #ifdef LCU
  170. sysnum_t sysnum;
  171. lcumsg_t *lcumsgp;
  172. #endif
  173. #ifdef NT
  174. HANDLE tempHandle;
  175. DWORD threadID = 0;
  176. #endif
  177. #if defined(NSK) || defined(UNIX) || defined(NT)
  178. /*
  179. * In NSK, the regroup caller ensures that timer and IPC interrupts
  180. * are disabled before the regroup routines are called. Therefore,
  181. * there is no regroup lock initialization. Also, rather than using
  182. * registration of callback routines, the appropriate routine names
  183. * are hard coded into routines that must call them. Thus, the timer
  184. * routine is called from POLLINGCHECK, the periodic message system
  185. * routine, and the packet reception routine is called from the
  186. * IPC interrupt handler.
  187. */
  188. /* Initialize the unchanging fields in the rgp_msgsys struct. */
  189. rgp->rgp_msgsys_p->regroup_data = (void *) &(rgp->rgppkt_to_send);
  190. rgp->rgp_msgsys_p->regroup_datalen = RGPPKTLEN;
  191. rgp->rgp_msgsys_p->iamalive_data = (void *) &(rgp->iamalive_pkt);
  192. rgp->rgp_msgsys_p->iamalive_datalen = IAMALIVEPKTLEN;
  193. rgp->rgp_msgsys_p->poison_data = (void *) &(rgp->poison_pkt);
  194. rgp->rgp_msgsys_p->poison_datalen = POISONPKTLEN;
  195. #endif /* NSK || UNIX || NT */
  196. #ifdef LCU
  197. if (itimeout(rgp_periodic_check,
  198. NULL, /* parameter pointer */
  199. ((RGP_CLOCK_PERIOD * HZ) / 100) | TO_PERIODIC,
  200. plstr /* interrupt priority level */
  201. ) == 0)
  202. RGP_ERROR(RGP_INTERNAL_ERROR);
  203. if (lcuxprt_listen(LCU_RGP_PORT,
  204. rgp_lcu_serv_listen,
  205. NULL /* no call arg */,
  206. NULL /* no options */
  207. ) != ELCU_OK)
  208. RGP_ERROR(RGP_INTERNAL_ERROR);
  209. if (lcuxprt_config(LCU_GET_MYSYSNUM, &sysnum) != ELCU_OK)
  210. RGP_ERROR(RGP_INTERNAL_ERROR);
  211. rgp->OS_specific_control.my_sysnum = sysnum;
  212. /* Allocate 3 message buffers to send regroup packets, iamalive packets
  213. * and poison packets.
  214. */
  215. if ((lcumsgp = lcuxprt_msg_alloc(LCU_UNACKMSG, LCU_RGP_FLAGS)) == NULL)
  216. RGP_ERROR(RGP_INTERNAL_ERROR); /* no memory */
  217. rgp->OS_specific_control.lcumsg_regroup_p = lcumsgp;
  218. lcumsgp->lcu_tag = NULL;
  219. lcumsgp->lcu_sysnum = sysnum;
  220. lcumsgp->lcu_port = LCU_RGP_PORT;
  221. lcumsgp->lcu_flags = LCUMSG_CRITICAL;
  222. lcumsgp->lcu_reqmbuf.lcu_ctrllen = RGPPKTLEN;
  223. lcumsgp->lcu_reqmbuf.lcu_ctrlbuf = (char *)&(rgp->rgppkt_to_send);
  224. if ((lcumsgp = lcuxprt_msg_alloc(LCU_UNACKMSG, LCU_RGP_FLAGS)) == NULL)
  225. RGP_ERROR(RGP_INTERNAL_ERROR); /* no memory */
  226. rgp->OS_specific_control.lcumsg_iamalive_p = lcumsgp;
  227. lcumsgp->lcu_tag = NULL;
  228. lcumsgp->lcu_sysnum = sysnum;
  229. lcumsgp->lcu_port = LCU_RGP_PORT;
  230. lcumsgp->lcu_reqmbuf.lcu_ctrllen = IAMALIVEPKTLEN;
  231. lcumsgp->lcu_reqmbuf.lcu_ctrlbuf = (char *)&(rgp->iamalive_pkt);
  232. if ((lcumsgp = lcuxprt_msg_alloc(LCU_UNACKMSG, LCU_RGP_FLAGS)) == NULL)
  233. RGP_ERROR(RGP_INTERNAL_ERROR); /* no memory */
  234. rgp->OS_specific_control.lcumsg_poison_p = lcumsgp;
  235. lcumsgp->lcu_tag = NULL;
  236. lcumsgp->lcu_sysnum = sysnum;
  237. lcumsgp->lcu_port = LCU_RGP_PORT;
  238. lcumsgp->lcu_reqmbuf.lcu_ctrllen = POISONPKTLEN;
  239. lcumsgp->lcu_reqmbuf.lcu_ctrlbuf = (char *)&(rgp->poison_pkt);
  240. /* Register to get the LCUEV_NODE_UNREACHABLE event. */
  241. if (lcuxprt_events(LCU_CATCH_EVENTS, sysnum, LCUEV_NODE_UNREACHABLE,
  242. rgp_lcu_event_callback) != ELCU_OK)
  243. RGP_ERROR(RGP_INTERNAL_ERROR);
  244. #endif /* LCU */
  245. #ifdef UNIX
  246. /* For testing on UNIX at user level, we use alarm() to simulate timer
  247. * ticks. */
  248. /* Install the alarm handler. */
  249. sig_action.sa_flags = 0;
  250. sig_action.sa_handler = alarm_handler;
  251. sigemptyset(&(sig_action.sa_mask));
  252. /* Block messages when handling timer pops. */
  253. sigaddset(&(sig_action.sa_mask), SIGPOLL);
  254. sigaction(SIGALRM, &sig_action, NULL);
  255. alarm_callback = rgp_periodic_check;
  256. /* Round up the alarm period to the next higher second. */
  257. alarm_period = (RGP_CLOCK_PERIOD + 99) / 100;
  258. /* Get first timer tick as soon as possible; subsequent ones will be
  259. * at alarm_period.
  260. */
  261. alarm(1);
  262. #endif /* UNIX */
  263. #ifdef NT
  264. /* On NT we create a separate thread that will be our timer. */
  265. /* The Timer Thread waits on TimerSignal Event to indicate an RGP rate change. */
  266. /* An RGP rate of 0 is a signal for the Timer Thread to exit */
  267. tempHandle = CreateEvent ( NULL, /* no security */
  268. FALSE, /* Autoreset */
  269. TRUE, /* Initial State is Signalled */
  270. NULL); /* No name */
  271. if ( !tempHandle )
  272. {
  273. RGP_ERROR (RGP_INTERNAL_ERROR);
  274. }
  275. rgp->OS_specific_control.TimerSignal = tempHandle;
  276. tempHandle = CreateEvent ( NULL, /* no security */
  277. TRUE, /* Manual reset */
  278. TRUE, /* Initial State is Signalled */
  279. NULL); /* No name */
  280. if ( !tempHandle )
  281. {
  282. RGP_ERROR (RGP_INTERNAL_ERROR);
  283. }
  284. rgp->OS_specific_control.Stabilized = tempHandle;
  285. rgp->OS_specific_control.ArbitrationInProgress = FALSE;
  286. rgp->OS_specific_control.ArbitratingNode = MM_INVALID_NODE;
  287. rgp->OS_specific_control.ApproxArbitrationWinner = MM_INVALID_NODE;
  288. rgp->OS_specific_control.ShuttingDown = FALSE;
  289. tempHandle = CreateThread( 0, /* security */
  290. 0, /* stack size - use same as primary thread */
  291. (LPTHREAD_START_ROUTINE)NT_timer_thread, /* starting point */
  292. (VOID *) NULL, /* no parameter */
  293. 0, /* create flags - start immediately */
  294. &threadID ); /* thread ID returned here */
  295. if ( !tempHandle )
  296. {
  297. RGP_ERROR( RGP_INTERNAL_ERROR ); /* at least for now */
  298. }
  299. rgp->OS_specific_control.TimerThread = tempHandle;
  300. rgp->OS_specific_control.TimerThreadId = threadID;
  301. rgp->OS_specific_control.UpDownCallback = RGP_NULL_PTR;
  302. rgp->OS_specific_control.NodesDownCallback = RGP_NULL_PTR;
  303. rgp->OS_specific_control.EventEpoch = 0;
  304. #if defined TDM_DEBUG
  305. rgp->OS_specific_control.debug.frozen = 0;
  306. rgp->OS_specific_control.debug.reload_in_progress = 0;
  307. rgp->OS_specific_control.debug.timer_frozen = 0;
  308. rgp->OS_specific_control.debug.doing_tracing = 0;
  309. rgp->OS_specific_control.debug.MyTestPoints.TestPointWord = 0;
  310. // seed the random number function used in testing
  311. srand((unsigned) time( NULL ) );
  312. #endif
  313. #endif /* NT */
  314. }
  315. /************************************************************************
  316. * rgp_cleanup_OS
  317. * ===========
  318. *
  319. * Description:
  320. *
  321. * This routine does OS-dependent cleanup of regroup structures
  322. * and timer thread activity to ready for a new JOIN attempt.
  323. *
  324. * Parameters:
  325. *
  326. * None
  327. *
  328. * Returns:
  329. *
  330. * void - no return value
  331. *
  332. * Algorithm:
  333. *
  334. * OS-dependent initializations.
  335. *
  336. ************************************************************************/
  337. _priv _resident void
  338. rgp_cleanup_OS(void)
  339. {
  340. #if defined (NT)
  341. // Tell Timer Thread to restart RGP Timer
  342. // a_tick might have changed.
  343. SetEvent( rgp->OS_specific_control.TimerSignal);
  344. #endif // NT
  345. }
  346. /************************************************************************
  347. * rgp_update_regroup_packet
  348. * =========================
  349. *
  350. * Description:
  351. *
  352. * Macro to copy the current regroup status into the regroup packet
  353. * sending buffer.
  354. *
  355. * Parameters:
  356. *
  357. * None
  358. *
  359. * Algorithm:
  360. *
  361. * Copies the status (which is already in the form of a regroup status
  362. * packet) into the packet buffer. Then, if we should let others (and
  363. * ourselves) know of our stage, the current knownstage field is
  364. * updated to include the local node number.
  365. *
  366. ************************************************************************/
  367. #define rgp_update_regroup_packet \
  368. do \
  369. { \
  370. /* Copy the regroup status to the sending packet area. */ \
  371. rgp->rgppkt_to_send = rgp->rgppkt; \
  372. \
  373. /* If we should let others know of our stage, we must modify the \
  374. * current stage mask to include ourselves. \
  375. */ \
  376. if (rgp->sendstage) \
  377. switch (rgp->rgppkt.stage) \
  378. { \
  379. case RGP_ACTIVATED: \
  380. ClusterInsert(rgp->rgppkt_to_send.knownstage1, rgp->mynode); \
  381. break; \
  382. case RGP_CLOSING: \
  383. ClusterInsert(rgp->rgppkt_to_send.knownstage2, rgp->mynode); \
  384. break; \
  385. case RGP_PRUNING: \
  386. ClusterInsert(rgp->rgppkt_to_send.knownstage3, rgp->mynode); \
  387. break; \
  388. case RGP_PHASE1_CLEANUP: \
  389. ClusterInsert(rgp->rgppkt_to_send.knownstage4, rgp->mynode); \
  390. break; \
  391. case RGP_PHASE2_CLEANUP: \
  392. ClusterInsert(rgp->rgppkt_to_send.knownstage5, rgp->mynode); \
  393. break; \
  394. default: \
  395. break; \
  396. } \
  397. } while(0)
  398. /************************************************************************
  399. * rgp_update_poison_packet
  400. * ========================
  401. *
  402. * Description:
  403. *
  404. * Macro to copy the current regroup status into the poison packet
  405. * sending buffer.
  406. *
  407. * Parameters:
  408. *
  409. * None
  410. *
  411. * Algorithm:
  412. *
  413. * Copies the appropriate regroup status fields into the poison
  414. * packet buffer to help debugging when a dump of a poisoned
  415. * node is examined.
  416. *
  417. ************************************************************************/
  418. #define rgp_update_poison_packet \
  419. do \
  420. { \
  421. rgp->poison_pkt.seqno = rgp->rgppkt.seqno; \
  422. rgp->poison_pkt.reason = rgp->rgppkt.reason; \
  423. rgp->poison_pkt.activatingnode = rgp->rgppkt.activatingnode; \
  424. rgp->poison_pkt.causingnode = rgp->rgppkt.causingnode; \
  425. ClusterCopy(rgp->poison_pkt.initnodes, rgp->initnodes); \
  426. ClusterCopy(rgp->poison_pkt.endnodes, rgp->endnodes); \
  427. } while(0)
  428. /************************************************************************
  429. * rgp_broadcast
  430. * =============
  431. *
  432. * Description:
  433. *
  434. * This routine asks the message system to broadcast an unacknowledged
  435. * packet of subtype "packet_subtype" to a set of nodes indicated in
  436. * an appropriate field in the rgp control struct. How the broadcast
  437. * is implemented depends on the OS.
  438. *
  439. * Parameters:
  440. *
  441. * uint8 packet_subtype - type of unsequenced packet to send
  442. *
  443. * Returns:
  444. *
  445. * void - no return value
  446. *
  447. * Algorithm:
  448. *
  449. * The same data packet is to be sent to the set of nodes indicated
  450. * in the rgp control struct field. The sending can be done by queueing
  451. * the packets directly to the send engine or the send can be deferred
  452. * to a lower priority interrupt level. The former approach reduces
  453. * the latency for sending these urgent packets while the latter
  454. * approach may reduce the number of sends if several requests to
  455. * send the same type of packets (this is true only of regroup
  456. * packets) are made in quick succession. In this case, previous
  457. * requests are overwritten by later requests. This is OK since the
  458. * regroup algorithm has enough redundancy in packet sending.
  459. *
  460. * In NSK, the message system provides a broadcast facility for
  461. * unacknowledged packets. It copies regroup's packet into its own
  462. * buffer and issues multiple requests to the SNet services layer.
  463. * When it copies the buffer, it disables the timer and IPC
  464. * interrupts ensuring that there will be no contention with Regroup.
  465. * Therefore, this routine can safely update the packet area here
  466. * without checking if the sending apparatus has completed sending
  467. * the previous packet.
  468. *
  469. * This is not true of LCU where the message system does not
  470. * provide a broadcast facility. In LCU, the updating of the packet
  471. * buffer can be done only when the send engine has completed
  472. * sending. This is assured only in the send completion interrupt
  473. * handler (rgp_msgsys_work).
  474. *
  475. ************************************************************************/
  476. _priv _resident void
  477. rgp_broadcast(uint8 packet_subtype)
  478. {
  479. cluster_t temp_cluster;
  480. //[Raj Das] Copy the ignorescreen before sending.....
  481. PackIgnoreScreen(&rgp->rgppkt, rgp->ignorescreen);
  482. switch (packet_subtype)
  483. {
  484. case RGP_UNACK_REGROUP :
  485. /* Trace the queueing of regroup status packets. */
  486. RGP_TRACE( "RGP Send packets",
  487. rgp->rgppkt.stage, /* TRACE */
  488. RGP_MERGE_TO_32( rgp->status_targets, /* TRACE */
  489. rgp->rgppkt.knownstage1 ), /* TRACE */
  490. RGP_MERGE_TO_32( rgp->rgppkt.knownstage2, /* TRACE */
  491. rgp->rgppkt.knownstage3 ), /* TRACE */
  492. RGP_MERGE_TO_32( rgp->rgppkt.knownstage4, /* TRACE */
  493. rgp->rgppkt.knownstage5 ) ); /* TRACE */
  494. #if defined(NSK) || defined(UNIX) || defined(NT)
  495. /* In NSK, the packet buffer can be updated even if the send
  496. * engine is working on the previous send. See algorithm
  497. * description above.
  498. */
  499. if ((rgp->rgppkt.reason == MM_EVT_LEAVE) &&
  500. (rgp->rgppkt.causingnode == rgp->mynode))
  501. // If a LEAVE event is in progress exclude our node from knownstage mask
  502. rgp->rgppkt_to_send = rgp->rgppkt;
  503. else
  504. // copy regroup packet and insert our node number into knownstage mask
  505. rgp_update_regroup_packet;
  506. #endif /* NSK || UNIX || NT */
  507. ClusterUnion(rgp->rgp_msgsys_p->regroup_nodes,
  508. rgp->status_targets,
  509. rgp->rgp_msgsys_p->regroup_nodes);
  510. /* Clear the targets field in the rgp_control struct after
  511. * copying this info. The message system must clear the target
  512. * bits in the common regroup/msgsys struct after sending the
  513. * packets.
  514. */
  515. ClusterInit(rgp->status_targets);
  516. rgp->rgp_msgsys_p->sendrgppkts = 1;
  517. break;
  518. case RGP_UNACK_IAMALIVE :
  519. /* Count number of IamAlive requests queued. */
  520. RGP_INCREMENT_COUNTER( QueuedIAmAlive );
  521. ClusterUnion(rgp->rgp_msgsys_p->iamalive_nodes,
  522. rgp->rgpinfo.cluster,
  523. rgp->rgp_msgsys_p->iamalive_nodes);
  524. rgp->rgp_msgsys_p->sendiamalives = 1;
  525. /* No targets field to clear in the rgp_control struct.
  526. * The message system must clear the target bits in the common
  527. * regroup/msgsys struct after sending the packets.
  528. */
  529. break;
  530. case RGP_UNACK_POISON :
  531. /* Trace the sending of poison packets. */
  532. RGP_TRACE( "RGP Send poison ",
  533. rgp->rgppkt.stage, /* TRACE */
  534. RGP_MERGE_TO_32( rgp->poison_targets, /* TRACE */
  535. rgp->rgppkt.knownstage1 ), /* TRACE */
  536. RGP_MERGE_TO_32( rgp->rgppkt.knownstage2, /* TRACE */
  537. rgp->rgppkt.knownstage3 ), /* TRACE */
  538. RGP_MERGE_TO_32( rgp->rgppkt.knownstage4, /* TRACE */
  539. rgp->rgppkt.knownstage5 ) ); /* TRACE */
  540. /* The poison packet targets must NOT be considered alive. */
  541. ClusterIntersection(temp_cluster, rgp->rgpinfo.cluster,
  542. rgp->poison_targets);
  543. ClusterDifference(temp_cluster,
  544. temp_cluster,
  545. rgp->OS_specific_control.Banished);
  546. if (ClusterNumMembers(temp_cluster) != 0)
  547. RGP_ERROR(RGP_INTERNAL_ERROR);
  548. #if defined(NSK) || defined(NT)
  549. /* In NSK, the packet buffer can be updated even if the send
  550. * engine is working on the previous send. See algorithm
  551. * description above.
  552. */
  553. rgp_update_poison_packet;
  554. #endif /* NSK || NT */
  555. ClusterUnion(rgp->rgp_msgsys_p->poison_nodes,
  556. rgp->poison_targets,
  557. rgp->rgp_msgsys_p->poison_nodes);
  558. /* Clear the targets field in the rgp_control struct after
  559. * copying this info. The message system must clear the target
  560. * bits in the common regroup/msgsys struct after sending the
  561. * packets.
  562. */
  563. ClusterInit(rgp->poison_targets);
  564. rgp->rgp_msgsys_p->sendpoisons = 1;
  565. break;
  566. default :
  567. RGP_ERROR(RGP_INTERNAL_ERROR);
  568. break;
  569. }
  570. QUEUESEND; /* invoke OS-specific sending function/macro */
  571. }
  572. /************************************************************************
  573. * rgp_had_power_failure
  574. * =====================
  575. *
  576. * Description:
  577. *
  578. * Tells the OS at the end of a regroup incident if a surviving node
  579. * had a power failure. The message system can use this to clear all
  580. * bus errors collected so far to node because node seems to have
  581. * had a power failure and has now recovered from it. Perhaps, the
  582. * bus errors were due to the power failure.
  583. *
  584. * Parameters:
  585. *
  586. * None
  587. *
  588. * Returns:
  589. *
  590. * void - no return value
  591. *
  592. * Algorithm:
  593. *
  594. * Calls a message system routine to perform any error clearing.
  595. *
  596. ************************************************************************/
  597. _priv _resident void
  598. rgp_had_power_failure(node_t node)
  599. {
  600. /* Currently, there is nothing to do. */
  601. RGP_TRACE( "RGP Power fail ", node, 0, 0, 0);
  602. }
  603. /************************************************************************
  604. * rgp_status_of_node
  605. * ==================
  606. *
  607. * Description:
  608. *
  609. * Ask the SP to return the status of a node. The SP must return the
  610. * current status and not return a stale status. This routine is
  611. * called by the split-brain avoidance algorithm in the two-node
  612. * case, for the non-tie-breaker to get the status of the tie-breaker
  613. * node.
  614. *
  615. * Parameters:
  616. *
  617. * node_t node
  618. * the node whose status is to be obtained.
  619. *
  620. * Returns:
  621. *
  622. * int - the status code of the node returned by the SP, appropriately
  623. * encoded into one of the values known to regroup.
  624. *
  625. * Algorithm:
  626. *
  627. * Calls a millicode routine to ask the SP for the status of the node.
  628. *
  629. ************************************************************************/
  630. _priv _resident int
  631. rgp_status_of_node(node_t node)
  632. {
  633. #if defined(NT)
  634. /* noone home */
  635. return RGP_NODE_UNREACHABLE;
  636. #else
  637. return _get_remote_cpu_state_( node ); /*F40:MB06452.1*/
  638. #endif
  639. }
  640. /************************************************************************
  641. * rgp_newnode_online
  642. * ==================
  643. *
  644. * Description:
  645. *
  646. * This routine is called if the first IamAlive is received from a
  647. * newly booted node before the cluster manager gets a chance to
  648. * call rgp_monitor_node(). The OS can use this routine to mark the
  649. * node as up if it does not have any other means to detect that
  650. * a node has come up.
  651. *
  652. * Parameters:
  653. *
  654. * node_t node -
  655. * the new node that has just been detected to be up
  656. *
  657. * Returns:
  658. *
  659. * void - no return value
  660. *
  661. * Algorithm:
  662. *
  663. * This routine marks the state of the node as up as seen by the
  664. * native OS.
  665. *
  666. * In NSK, on the reloader node, the marking of the reloadee as up
  667. * is done by the message system when the initial address handshake
  668. * packet is received from the reloadee. NSK does not require the
  669. * regroup module to report the fact that the reloadee is online.
  670. *
  671. * The above is probably true for LCU as well. However, the details
  672. * are not yet worked out. For now, this routine is a no-op for LCU.
  673. *
  674. ************************************************************************/
  675. _priv _resident void
  676. rgp_newnode_online(node_t newnode)
  677. {
  678. RGP_TRACE( "RGP New node up ", newnode, 0, 0, 0);
  679. }
  680. /************************************************************************
  681. * rgp_select_cluster_ex
  682. * =====================
  683. *
  684. * Description:
  685. *
  686. * Given an array of cluster choices, this routine picks the best
  687. * cluster to keep alive. cluster_choices[] is the array of choices
  688. * and num_clusters is the number of entries in the array.
  689. *
  690. * Parameters:
  691. *
  692. * cluster_t cluster_choices[]
  693. * array of cluster choices
  694. *
  695. * int num_clusters
  696. * number of entries (choices) in the array
  697. *
  698. * node_t key_node
  699. * internal node number of the key node or RGP_NULL_NODE
  700. *
  701. * Returns:
  702. *
  703. * int - the index of the selected cluster; if no cluster
  704. * is viable, -1 is returned.
  705. *
  706. * Algorithm:
  707. *
  708. * By default, the best cluster is defined as the largest cluster.
  709. * Optionally, a node called key_node can be required to be present
  710. * for a cluster to be viable. key_node can be set to RGP_NULL_NODE
  711. * to imply that no specific node is required to be present. The
  712. * routine returns the index of the best cluster and -1 if none of
  713. * the clusters is viable (that is, does not include the key node).
  714. *
  715. ************************************************************************/
  716. _priv _resident int
  717. rgp_select_cluster_ex(cluster_t cluster_choices[], int num_clusters, node_t key_node)
  718. {
  719. int max_members = 0, num_members;
  720. int cluster_selected = -1;
  721. int i;
  722. #if defined(UNIX)
  723. printf("rgp_select_cluster() called with %d choices:", num_clusters);
  724. for (i = 0; i < num_clusters; i++)
  725. {
  726. node_t j;
  727. printf("(");
  728. for (j = 0; j < (node_t) rgp->num_nodes; j++)
  729. {
  730. if (ClusterMember(cluster_choices[i], j))
  731. printf("%d,", EXT_NODE(j));
  732. }
  733. printf(")");
  734. }
  735. printf("\n");
  736. fflush(stdout);
  737. #endif /* UNIX */
  738. for (i = 0; i < num_clusters; i++)
  739. {
  740. /* Skip the current cluster if a key node is defined and is not
  741. * in the cluster.
  742. */
  743. if ((key_node != RGP_NULL_NODE) &&
  744. !ClusterMember(cluster_choices[i], key_node))
  745. continue;
  746. if ((num_members = ClusterNumMembers(cluster_choices[i])) > max_members)
  747. {
  748. cluster_selected = i;
  749. max_members = num_members;
  750. }
  751. }
  752. #if defined(UNIX)
  753. printf("Node %d: rgp_select_cluster() returned %d.\n",
  754. EXT_NODE(rgp->mynode), cluster_selected);
  755. fflush(stdout);
  756. #endif /* UNIX */
  757. return (cluster_selected);
  758. }
  759. /************************************************************************
  760. * rgp_select_cluster
  761. * ==================
  762. *
  763. * Description:
  764. *
  765. * Given an array of cluster choices, this routine picks the best
  766. * cluster to keep alive. cluster_choices[] is the array of choices
  767. * and num_clusters is the number of entries in the array.
  768. *
  769. * Parameters:
  770. *
  771. * cluster_t cluster_choices[]
  772. * array of cluster choices
  773. *
  774. * int num_clusters
  775. * number of entries (choices) in the array
  776. *
  777. * Returns:
  778. *
  779. * int - the index of the selected cluster; if no cluster
  780. * is viable, -1 is returned.
  781. *
  782. * Algorithm:
  783. *
  784. * By default, the best cluster is defined as the largest cluster.
  785. * Optionally, a node called RGP_KEY_NODE can be required to be present
  786. * for a cluster to be viable. RGP_KEY_NODE can be set to RGP_NULL_NODE
  787. * to imply that no specific node is required to be present. The
  788. * routine returns the index of the best cluster and -1 if none of
  789. * the clusters is viable (that is, does not include the key node).
  790. *
  791. ************************************************************************/
  792. _priv _resident int
  793. rgp_select_cluster(cluster_t cluster_choices[], int num_clusters)
  794. {
  795. node_t key_node;
  796. if (RGP_KEY_NODE == RGP_NULL_NODE) {
  797. key_node = RGP_NULL_NODE;
  798. } else {
  799. key_node = INT_NODE(RGP_KEY_NODE);
  800. }
  801. return rgp_select_cluster_ex(cluster_choices , num_clusters, key_node);
  802. }
  803. #ifdef LCU
  804. /************************************************************************
  805. * rgp_msgsys_work
  806. * ===============
  807. *
  808. * Description:
  809. *
  810. * LCU-specific routine that implements broadcasting of packets by
  811. * sending them serially.
  812. *
  813. * This routine is called from rgp_broadcast() to initiate new sends.
  814. * It is also the packet send completion interrupt handler (callback
  815. * routine), invoked by the LCU message system when the packet buffer
  816. * can be reused.
  817. *
  818. * Parameters:
  819. *
  820. * lcumsg_t *lcumsgp -
  821. * pointer to lcu message if called from the transport's send
  822. * completion interrupt handler; NULL if called from
  823. * rgp_broadcast() to send a new packet.
  824. *
  825. * int status -
  826. * the message completion status if called from the transport's
  827. * send completion interrupt handler; 0 if called from
  828. * rgp_broadcast() to send a new packet.
  829. *
  830. * Returns:
  831. *
  832. * void - no return value
  833. *
  834. * Algorithm:
  835. *
  836. * If called from the send completion interrupt, the routine checks
  837. * to see if the packet buffer needs to be refreshed. This is true
  838. * if the appropriate bit in the rgp_msgsys struct is set. If so,
  839. * the buffer is updated with the current info (using an update
  840. * macro). This update is relevant to regroup status packets and
  841. * poison packets, but not to IamAlives packets whose contents are
  842. * always the same. The bit is cleared after the packet is updated.
  843. *
  844. * Next, the routine checks if there are more destinations to send
  845. * the packet to. If so, it finds the next higher numbered node to
  846. * send to, issues a send and returns.
  847. *
  848. * If invoked from rgp_broadcast() to start a new broadcast, the
  849. * routine first checks to see if the previous broadcast of the
  850. * same packet is complete. This is indicated by the tag field in
  851. * the message struct. The tag is NULL if the broadcast has
  852. * completed or has not been initiated. In this case, the tag is
  853. * set to a non-NULL value and a new broadcast initiated, with
  854. * this routine specified as the callback routine.
  855. *
  856. * If the previous broadcast has not completed, nothing needs to
  857. * be done. The completion interrupt will cause the buffer to be
  858. * refreshed and the broadcast to be continued. The broadcast
  859. * will then include new targets that may be included in this
  860. * new request.
  861. *
  862. ************************************************************************/
  863. _priv _resident void
  864. rgp_msgsys_work(lcumsg_t *lcumsgp, int status)
  865. {
  866. rgp_unseq_pkt_t *packet;
  867. cluster_t *sending_cluster;
  868. node_t node;
  869. if (lcumsgp == NULL)
  870. {
  871. /* New work requested. Only one type of work is requested at
  872. * a time.
  873. */
  874. if (rgp->rgp_msgsys_p->sendrgppkts)
  875. {
  876. /* Have new regroup status packets to send. First check
  877. * if the last regroup status send completed. If so,
  878. * we can update the packet and initiate a new send.
  879. * If not, we must defer to the completion interrupt
  880. * (invocation of this routine with a non-NULL lcumsgp).
  881. */
  882. lcumsgp = rgp->OS_specific_control.lcumsg_regroup_p;
  883. if (lcumsgp->lcu_tag == NULL)
  884. {
  885. /* Last send completed. Initiate new send. */
  886. rgp_update_regroup_packet;
  887. rgp->rgp_msgsys_p->sendrgppkts = 0;
  888. for (node = 0; node < rgp->num_nodes; node++)
  889. {
  890. if (ClusterMember(rgp->rgp_msgsys_p->regroup_nodes, node))
  891. {
  892. ClusterDelete(rgp->rgp_msgsys_p->regroup_nodes, node);
  893. lcumsgp->lcu_node = node;
  894. lcumsgp->lcu_tag = &(rgp->rgp_msgsys_p->regroup_nodes);
  895. if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=
  896. ELCU_OK)
  897. RGP_ERROR(RGP_INTERNAL_ERROR);
  898. break; /* can send only to one node at a time */
  899. }
  900. }
  901. }
  902. }
  903. else if (rgp->rgp_msgsys_p->sendiamalives)
  904. {
  905. /* Need to send IamAlives again. First check if the last
  906. * IamAlive send completed. If so, we can initiate a new send.
  907. * If not, we must defer to the completion interrupt
  908. * (invocation of this routine with a non-NULL lcumsgp).
  909. */
  910. lcumsgp = rgp->OS_specific_control.lcumsg_iamalive_p;
  911. if (lcumsgp->lcu_tag == NULL)
  912. {
  913. /* Last send completed. Initiate new send. */
  914. rgp->rgp_msgsys_p->sendiamalives = 0;
  915. for (node = 0; node < rgp->num_nodes; node++)
  916. {
  917. if (ClusterMember(rgp->rgp_msgsys_p->iamalive_nodes, node))
  918. {
  919. ClusterDelete(rgp->rgp_msgsys_p->iamalive_nodes, node);
  920. lcumsgp->lcu_node = node;
  921. lcumsgp->lcu_tag = &(rgp->rgp_msgsys_p->iamalive_nodes);
  922. if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=
  923. ELCU_OK)
  924. RGP_ERROR(RGP_INTERNAL_ERROR);
  925. break; /* can send only to one node at a time */
  926. }
  927. }
  928. }
  929. }
  930. else if (rgp->rgp_msgsys_p->sendpoisons)
  931. {
  932. /* Have new poison packets to send. First check
  933. * if the last poison packet send completed. If so,
  934. * we can update the packet and initiate a new send.
  935. * If not, we must defer to the completion interrupt
  936. * (invocation of this routine with a non-NULL lcumsgp).
  937. */
  938. lcumsgp = rgp->OS_specific_control.lcumsg_poison_p;
  939. if (lcumsgp->lcu_tag == NULL)
  940. {
  941. /* Last send completed. Initiate new send. */
  942. rgp_update_poison_packet;
  943. rgp->rgp_msgsys_p->sendpoisons = 0;
  944. for (node = 0; node < rgp->num_nodes; node++)
  945. {
  946. if (ClusterMember(rgp->rgp_msgsys_p->poison_nodes, node))
  947. {
  948. ClusterDelete(rgp->rgp_msgsys_p->poison_nodes, node);
  949. lcumsgp->lcu_node = node;
  950. lcumsgp->lcu_tag = &(rgp->rgp_msgsys_p->poison_nodes);
  951. if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=
  952. ELCU_OK)
  953. RGP_ERROR(RGP_INTERNAL_ERROR);
  954. break; /* can send only to one node at a time */
  955. }
  956. }
  957. }
  958. }
  959. } /* new work */
  960. else
  961. {
  962. /* Send completion interrupt; continue the broadcast if
  963. * there are targets remaining.
  964. */
  965. RGP_LOCK;
  966. /* Find what type of packet completed; send the same type. */
  967. packet = (rgp_unseq_pkt_t *) lcumsgp->lcu_reqmbuf.lcu_ctrlbuf;
  968. switch (packet->pktsubtype)
  969. {
  970. case RGP_UNACK_REGROUP :
  971. /* Check if packet needs to be updated. */
  972. if (rgp->rgp_msgsys_p->sendrgppkts)
  973. {
  974. rgp_update_regroup_packet;
  975. rgp->rgp_msgsys_p->sendrgppkts = 0;
  976. }
  977. break;
  978. case RGP_UNACK_IAMALIVE :
  979. break;
  980. case RGP_UNACK_POISON :
  981. /* Check if packet needs to be updated. */
  982. if (rgp->rgp_msgsys_p->sendpoisons)
  983. {
  984. rgp_update_poison_packet;
  985. rgp->rgp_msgsys_p->sendpoisons = 0;
  986. }
  987. break;
  988. }
  989. /* Check if there is any more node to send the same packet
  990. * type to. If not, set the tag to NULL and return.
  991. */
  992. sending_cluster = (cluster_t *) (lcumsgp->lcu_tag);
  993. if (ClusterNumMembers(*sending_cluster) == 0)
  994. {
  995. lcumsgp->lcu_tag = NULL; /* indicate that broadcast is complete. */
  996. return;
  997. }
  998. /* There is at least one more node to send to. Start with
  999. * the node with the next higher number than the node we
  1000. * just finished sending to.
  1001. *
  1002. * The loop terminates after posting a send to the next
  1003. * node to send to. We know there is at least one such node.
  1004. */
  1005. for (node = lcumsgp->lcu_node + 1; node < rgp->num_nodes + 1; node++)
  1006. {
  1007. if (node == rgp->num_nodes)
  1008. node = 0; /* continue the search starting at node 0 */
  1009. if (ClusterMember(*sending_cluster, node))
  1010. {
  1011. ClusterDelete(*sending_cluster, node);
  1012. lcumsgp->lcu_node = node;
  1013. if (lcuxprt_msg_send(lcumsgp, NULL, rgp_msgsys_work, 0) !=
  1014. ELCU_OK)
  1015. RGP_ERROR(RGP_INTERNAL_ERROR);
  1016. break; /* can send only to one node at a time */
  1017. }
  1018. }
  1019. RGP_UNLOCK;
  1020. }
  1021. }
  1022. #endif /* LCU */
  1023. /*---------------------------------------------------------------------------*/
  1024. #if defined(LCU) || defined(UNIX) || defined(NT)
  1025. /*---------------------------------------------------------------------------*/
  1026. void
  1027. rgp_hold_all_io(void)
  1028. /* Simulates the TNet services routine to pause IO. */
  1029. {
  1030. #if defined (NT)
  1031. (*(rgp->OS_specific_control.HoldIOCallback))();
  1032. #endif
  1033. RGP_TRACE( "RGP Hold all IO ", 0, 0, 0, 0);
  1034. }
  1035. /*---------------------------------------------------------------------------*/
  1036. void
  1037. rgp_resume_all_io(void)
  1038. /* Simulates the TNet services routine to resume IO. */
  1039. {
  1040. #if defined (NT)
  1041. (*(rgp->OS_specific_control.ResumeIOCallback))();
  1042. #endif
  1043. RGP_TRACE( "RGP Resume IO ", 0, 0, 0, 0);
  1044. }
  1045. /*---------------------------------------------------------------------------*/
  1046. void
  1047. RGP_ERROR_EX (uint16 halt_code, char* fname, DWORD lineno)
  1048. /* Halt node with error code. */
  1049. {
  1050. char *halt_string;
  1051. node_t node = RGP_NULL_NODE;
  1052. #if defined( NT )
  1053. char halt_buffer[ 256 ];
  1054. DWORD eventMsgId;
  1055. BOOL skipFormatting = FALSE;
  1056. //
  1057. // If a user initiated a shutdown, (s)he wants to see the node
  1058. // to go down and wait for an explicit start command.
  1059. //
  1060. // We map RGP_RELOADFAILED to SHUTDOWN_DURING_REGROUP_ERROR since
  1061. // HaltCallback does a graceful stop for the latter one.
  1062. // SCM won't restart the node after a graceful stop unless
  1063. // it is explicitly told to do so
  1064. //
  1065. if (halt_code == RGP_RELOADFAILED &&
  1066. rgp->OS_specific_control.ShuttingDown)
  1067. {
  1068. halt_code = RGP_SHUTDOWN_DURING_RGP;
  1069. }
  1070. #endif
  1071. if (halt_code == RGP_RELOADFAILED) {
  1072. halt_string = "[RGP] Node %d: REGROUP WARNING: reload failed.";
  1073. eventMsgId = MM_EVENT_RELOAD_FAILED;
  1074. }
  1075. else if (halt_code == RGP_INTERNAL_ERROR) {
  1076. halt_string = "[RGP] Node %d: REGROUP ERROR: consistency check failed in file %s, line %u.";
  1077. eventMsgId = MM_EVENT_INTERNAL_ERROR;
  1078. skipFormatting = TRUE;
  1079. _snprintf(halt_buffer, sizeof( halt_buffer ) - 1,
  1080. halt_string,
  1081. EXT_NODE(rgp->mynode),
  1082. fname,
  1083. lineno);
  1084. }
  1085. else if (halt_code == RGP_MISSED_POLL_TO_SELF) {
  1086. halt_string = "[RGP] Node %d: REGROUP ERROR: cannot talk to self.";
  1087. eventMsgId = NM_EVENT_MEMBERSHIP_HALT;
  1088. }
  1089. #if !defined(NT)
  1090. else if (halt_code == RGP_AVOID_SPLIT_BRAIN) {
  1091. halt_string = "[RGP] Node %d: REGROUP ERROR: commiting suicide to avoid split brain.";
  1092. }
  1093. #endif
  1094. else if (halt_code == RGP_PRUNED_OUT) {
  1095. halt_string = "[RGP] Node %d: REGROUP ERROR: pruned out due to communication failure.";
  1096. eventMsgId = MM_EVENT_PRUNED_OUT;
  1097. }
  1098. else if ((halt_code >= RGP_PARIAH_FIRST) && (halt_code <= RGP_PARIAH_LAST)) {
  1099. halt_string = "[RGP] Node %d: REGROUP ERROR: poison packet received from node %d.";
  1100. eventMsgId = MM_EVENT_PARIAH;
  1101. node = (node_t)(halt_code - RGP_PARIAH);
  1102. }
  1103. else if (halt_code == RGP_ARBITRATION_FAILED) {
  1104. halt_string = "[RGP] Node %d: REGROUP ERROR: arbitration failed.";
  1105. eventMsgId = MM_EVENT_ARBITRATION_FAILED;
  1106. }
  1107. else if (halt_code == RGP_ARBITRATION_STALLED) {
  1108. halt_string = "[RGP] Node %d: REGROUP ERROR: arbitration stalled.";
  1109. eventMsgId = MM_EVENT_ARBITRATION_STALLED;
  1110. }
  1111. else if (halt_code == RGP_SHUTDOWN_DURING_RGP) {
  1112. halt_string = "[RGP] Node %d: REGROUP INFO: regroup engine requested immediate shutdown.";
  1113. eventMsgId = MM_EVENT_SHUTDOWN_DURING_RGP;
  1114. }
  1115. else {
  1116. halt_string = "[RGP] Node %d: REGROUP ERROR: unknown halt code (%d).";
  1117. eventMsgId = NM_EVENT_MEMBERSHIP_HALT;
  1118. node = halt_code; // get it printed out by borrowing node
  1119. }
  1120. #if defined(UNIX)
  1121. printf(halt_string, EXT_NODE(rgp->mynode), node);
  1122. fflush(stdout);
  1123. /* Simulate a halt by dumping core and exiting the process. */
  1124. abort();
  1125. #elif defined(NT)
  1126. if ( !skipFormatting ) {
  1127. _snprintf(halt_buffer, sizeof( halt_buffer ) - 1,
  1128. halt_string,
  1129. EXT_NODE(rgp->mynode),
  1130. node);
  1131. }
  1132. #if CLUSTER_BETA
  1133. ClRtlLogPrint(LOG_CRITICAL, "%1!hs!\t%2!hs!:%3!d!\n", halt_buffer, fname, lineno);
  1134. #else
  1135. ClRtlLogPrint(LOG_CRITICAL, "%1!hs!\n", halt_buffer );
  1136. #endif
  1137. if ((halt_code >= RGP_PARIAH_FIRST) && (halt_code <= RGP_PARIAH_LAST)) {
  1138. WCHAR nodeString[ 16 ];
  1139. PWCHAR nodeName;
  1140. _snwprintf( nodeString, sizeof( nodeString ) / sizeof ( WCHAR ), L"%d", node );
  1141. nodeName = RgpGetNodeNameFromId( node );
  1142. CsLogEvent2( LOG_CRITICAL, eventMsgId, nodeString, nodeName );
  1143. if ( nodeName != NULL ) {
  1144. LocalFree( nodeName );
  1145. }
  1146. }
  1147. else if ( eventMsgId == NM_EVENT_MEMBERSHIP_HALT ) {
  1148. WCHAR haltString[ 16 ];
  1149. _snwprintf( haltString, sizeof( haltString ) / sizeof ( WCHAR ), L"%d", halt_code );
  1150. CsLogEvent1( LOG_CRITICAL, eventMsgId, haltString );
  1151. }
  1152. else {
  1153. CsLogEvent( LOG_CRITICAL, eventMsgId );
  1154. }
  1155. /* we rely on RGP_ERROR_EX to kill the node immediately
  1156. rgp_cleanup() can potentially slow us down.
  1157. 435977 showed that it can take upto 25 seconds, if we
  1158. have a lot IP addr activity.
  1159. since in the end of the function we execute HaltCallback which kills the cluster,
  1160. we can safely omit doing rgp_cleanup and rgp_cleanup_OS
  1161. If JoinFailedCallback will be ever enabled, the fate of rgp_cleanup and rgp_cleanup_OS
  1162. should be reevaluated.
  1163. */
  1164. #if 0
  1165. rgp_cleanup();
  1166. rgp_cleanup_OS();
  1167. if (halt_code == RGP_RELOADFAILED)
  1168. (*(rgp->OS_specific_control.JoinFailedCallback))();
  1169. else
  1170. #endif
  1171. (*(rgp->OS_specific_control.HaltCallback))(halt_code); // does not return */
  1172. #else
  1173. cmn_err(CE_PANIC, halt_string, EXT_NODE(rgp->mynode), node);
  1174. #endif /* UNIX */
  1175. }
  1176. /*---------------------------------------------------------------------------*/
  1177. void
  1178. rgp_start_phase1_cleanup(void)
  1179. /* Tells the OS to start cleanup actions for all failed nodes. */
  1180. {
  1181. #if defined (NT)
  1182. node_t i;
  1183. //
  1184. // On NT we saved the nodes to be downed bitmask in NeedsNodeDownCallback.
  1185. //
  1186. for ( i=0; i < (node_t) rgp->num_nodes; i++)
  1187. {
  1188. if ( ClusterMember( rgp->OS_specific_control.NeedsNodeDownCallback, i ) )
  1189. {
  1190. (*(rgp->OS_specific_control.MsgCleanup1Callback))(EXT_NODE(i));
  1191. }
  1192. }
  1193. #endif
  1194. RGP_TRACE( "RGP Ph1 cleanup ", 0, 0, 0, 0);
  1195. rgp_event_handler(RGP_EVT_PHASE1_CLEANUP_DONE, RGP_NULL_NODE);
  1196. }
  1197. /*---------------------------------------------------------------------------*/
  1198. void
  1199. rgp_start_phase2_cleanup(void)
  1200. /* The equivalent of NSK's regroupstage4action(). */
  1201. {
  1202. #if defined (NT)
  1203. BITSET bitset;
  1204. node_t i;
  1205. //
  1206. // On NT we saved the nodes to be downed bitmask in NeedsNodeDownCallback.
  1207. //
  1208. BitsetInit(bitset);
  1209. for ( i=0; i < (node_t) rgp->num_nodes; i++)
  1210. {
  1211. if ( ClusterMember( rgp->OS_specific_control.NeedsNodeDownCallback, i ) )
  1212. {
  1213. BitsetAdd(bitset, EXT_NODE(i));
  1214. }
  1215. }
  1216. (*(rgp->OS_specific_control.MsgCleanup2Callback))(bitset);
  1217. #endif
  1218. RGP_TRACE( "RGP Ph2 cleanup ", 0, 0, 0, 0);
  1219. rgp_event_handler(RGP_EVT_PHASE2_CLEANUP_DONE, RGP_NULL_NODE);
  1220. }
  1221. /*---------------------------------------------------------------------------*/
  1222. void
  1223. rgp_cleanup_complete(void)
  1224. /* The equivalent of NSK's regroupstage5action(). */
  1225. {
  1226. #if defined(NT)
  1227. #endif
  1228. RGP_TRACE( "RGP completed ", 0, 0, 0, 0);
  1229. }
  1230. /*---------------------------------------------------------------------------*/
  1231. #endif /* LCU || UNIX || NT */
  1232. #if defined(NT)
  1233. /************************************************************************
  1234. * NT_timer_callback
  1235. * =================
  1236. *
  1237. * Description:
  1238. *
  1239. * This routine is the callback function that gets invoked whenever a
  1240. * timer pops. The routine will call rgp_periodic_check. This function
  1241. * is defined by the Win32 TimerProc procedure.
  1242. *
  1243. * Parameters:
  1244. *
  1245. * See below. We don't use any of them.
  1246. *
  1247. * Returns:
  1248. *
  1249. * none.
  1250. *
  1251. * Algorithm:
  1252. *
  1253. * This routine just calls rgp_periodic_check. The existense of this
  1254. * routine is solely due to a fixed format callback defined by
  1255. * Microsoft.
  1256. *
  1257. ************************************************************************/
  1258. VOID CALLBACK NT_timer_callback(
  1259. VOID
  1260. )
  1261. {
  1262. #if defined(TDM_DEBUG)
  1263. if ( !(rgp->OS_specific_control.debug.timer_frozen) &&
  1264. !(rgp->OS_specific_control.debug.frozen) )
  1265. #endif
  1266. rgp_periodic_check( );
  1267. // Do the Clussvc to clusnet heartbeating stuff here iff enabled.
  1268. if(MmStartClussvcToClusnetHeartbeat && (NmClusnetHandle != NULL)) {
  1269. if (MmCheckSystemHealthTick <= 0) {
  1270. // Reseed the tick count.
  1271. // Mimic hardware watchdog timers and use one quarter of the timeout.
  1272. MmCheckSystemHealthTick = ((NmClusSvcHeartbeatTimeout * 1000)/RGP_CLOCK_PERIOD)/4;
  1273. // Send the heartbeat ioctl.
  1274. ClusnetIamalive(NmClusnetHandle);
  1275. }
  1276. else {
  1277. MmCheckSystemHealthTick--;
  1278. }
  1279. }
  1280. }
  1281. /************************************************************************
  1282. * NT_timer_thread
  1283. * ===============
  1284. *
  1285. * Description:
  1286. *
  1287. * This routine is executed as a separate thread in the Windows NT
  1288. * implementation. This thread controls generates periodic regroup
  1289. * clock ticks. It is signalled via an event whenever the rate changes
  1290. * or to cause termination.
  1291. *
  1292. * Parameters:
  1293. *
  1294. * None.
  1295. *
  1296. * Returns:
  1297. *
  1298. * This thread should not go away.
  1299. *
  1300. * Algorithm:
  1301. *
  1302. * This routine is run as a separate thread. It sets up a timer to pop
  1303. * every <time_interval> * 10 milliseconds.
  1304. *
  1305. ************************************************************************/
  1306. void NT_timer_thread( void )
  1307. {
  1308. BOOL Success;
  1309. LARGE_INTEGER DueTime;
  1310. DWORD Error, MyHandleIndex;
  1311. HANDLE MyHandles[2]; /* for use by WaitForMultiple */
  1312. DWORD status;
  1313. DWORD msDueTime;
  1314. #define MyHandleSignalIx 0
  1315. #define MyHandleTimerIx 1
  1316. MyHandles[MyHandleSignalIx] = rgp->OS_specific_control.TimerSignal; /* Event signals HB rate change */
  1317. rgp->OS_specific_control.RGPTimer = CreateWaitableTimer(
  1318. NULL, // no security
  1319. FALSE, // Initial State FALSE
  1320. NULL
  1321. ); // No name
  1322. if (rgp->OS_specific_control.RGPTimer == NULL) {
  1323. Error = GetLastError();
  1324. RGP_ERROR(RGP_INTERNAL_ERROR);
  1325. }
  1326. status = MmSetThreadPriority();
  1327. if ( status != ERROR_SUCCESS ) {
  1328. ClRtlLogPrint(LOG_CRITICAL,
  1329. "[MM] Unable to set timer thread priority, status %1!u!\n",
  1330. status
  1331. );
  1332. RGP_ERROR((uint16) status);
  1333. ExitThread(status);
  1334. }
  1335. MyHandles[MyHandleTimerIx] = rgp->OS_specific_control.RGPTimer;
  1336. while (TRUE)
  1337. {
  1338. MyHandleIndex = WaitForMultipleObjects (
  1339. 2, /* Number of Events */
  1340. MyHandles, /* Handle Array */
  1341. FALSE, /* Wait for ANY event */
  1342. INFINITE ); /* Wait forever */
  1343. if (MyHandleIndex == MyHandleSignalIx) // Timer Change Signal Event
  1344. {
  1345. // RGP rate has changed
  1346. CancelWaitableTimer ( rgp->OS_specific_control.RGPTimer );
  1347. if ( rgp->rgpinfo.a_tick == 0 ) // Time to quit
  1348. {
  1349. CloseHandle ( rgp->OS_specific_control.RGPTimer );
  1350. rgp->OS_specific_control.RGPTimer = 0;
  1351. ExitThread ( 0 );
  1352. }
  1353. // a_tick has new RGP rate in milliseconds.
  1354. msDueTime = rgp->rgpinfo.a_tick;
  1355. DueTime.QuadPart = Int32x32To64(-10000, msDueTime);
  1356. Success = SetWaitableTimer(
  1357. rgp->OS_specific_control.RGPTimer,
  1358. &DueTime,
  1359. rgp->rgpinfo.a_tick,
  1360. NULL,
  1361. NULL,
  1362. FALSE);
  1363. if (!Success) {
  1364. Error = GetLastError();
  1365. RGP_ERROR(RGP_INTERNAL_ERROR);
  1366. }
  1367. } // Timer Change Signal
  1368. else
  1369. { // RGP Timer Tick
  1370. NT_timer_callback();
  1371. // Removed - bug 742297. NM now has its own timer thread.
  1372. // NmTimerTick(msDueTime);
  1373. }
  1374. } // while
  1375. }
  1376. PWCHAR
  1377. RgpGetNodeNameFromId(
  1378. node_t NodeID
  1379. )
  1380. /*++
  1381. Routine Description:
  1382. given a node ID, issue a get name node control to get the computer name of
  1383. the node. Returned buffer to be freed by caller.
  1384. Arguments:
  1385. NodeID - ID ( 1, 2, 3, ..) of the node
  1386. Return Value:
  1387. pointer to buffer containing name
  1388. --*/
  1389. {
  1390. PWCHAR buffer;
  1391. DWORD bufferSize = MAX_COMPUTERNAME_LENGTH * sizeof( WCHAR );
  1392. DWORD bytesReturned;
  1393. DWORD bytesRequired;
  1394. PNM_NODE node;
  1395. buffer = LocalAlloc( LMEM_FIXED, bufferSize );
  1396. if ( buffer != NULL ) {
  1397. node = NmReferenceNodeById( NodeID );
  1398. if ( node != NULL ) {
  1399. NmNodeControl(node,
  1400. NULL, // HostNode OPTIONAL,
  1401. CLUSCTL_NODE_GET_NAME,
  1402. NULL, // InBuffer,
  1403. 0, // InBufferSize,
  1404. (PUCHAR)buffer,
  1405. bufferSize,
  1406. &bytesReturned,
  1407. &bytesRequired);
  1408. OmDereferenceObject( node );
  1409. }
  1410. }
  1411. return buffer;
  1412. }
  1413. #endif /* NT */
  1414. #ifdef __cplusplus
  1415. }
  1416. #endif /* __cplusplus */
  1417. #if 0
  1418. History of changes to this file:
  1419. -------------------------------------------------------------------------
  1420. 1995, December 13 F40:KSK0610 /*F40:KSK06102.2*/
  1421. This file is part of the portable Regroup Module used in the NonStop
  1422. Kernel (NSK) and Loosely Coupled UNIX (LCU) operating systems. There
  1423. are 10 files in the module - jrgp.h, jrgpos.h, wrgp.h, wrgpos.h,
  1424. srgpif.c, srgpos.c, srgpsm.c, srgputl.c, srgpcli.c and srgpsvr.c.
  1425. The last two are simulation files to test the Regroup Module on a
  1426. UNIX workstation in user mode with processes simulating processor nodes
  1427. and UDP datagrams used to send unacknowledged datagrams.
  1428. This file was first submitted for release into NSK on 12/13/95.
  1429. ------------------------------------------------------------------------------
  1430. This change occurred on 19 Jan 1996 /*F40:MB06458.1*/
  1431. Changes for phase IV Sierra message system release. Includes: /*F40:MB06458.2*/
  1432. - Some cleanup of the code /*F40:MB06458.3*/
  1433. - Increment KCCB counters to count the number of setup messages and /*F40:MB06458.4*/
  1434. unsequenced messages sent. /*F40:MB06458.5*/
  1435. - Fixed some bugs /*F40:MB06458.6*/
  1436. - Disable interrupts before allocating broadcast sibs. /*F40:MB06458.7*/
  1437. - Change per-packet-timeout to 5ms /*F40:MB06458.8*/
  1438. - Make the regroup and powerfail broadcast use highest priority /*F40:MB06458.9*/
  1439. tnet services queue. /*F40:MB06458.10*/
  1440. - Call the millicode backdoor to get the processor status from SP /*F40:MB06458.11*/
  1441. - Fixed expand bug in msg_listen_ and msg_readctrl_ /*F40:MB06458.12*/
  1442. - Added enhancement to msngr_sendmsg_ so that clients do not need /*F40:MB06458.13*/
  1443. to be unstoppable before calling this routine. /*F40:MB06458.14*/
  1444. - Added new steps in the build file called /*F40:MB06458.15*/
  1445. MSGSYS_C - compiles all the message system C files /*F40:MB06458.16*/
  1446. MSDRIVER - compiles all the MSDriver files /*F40:MB06458.17*/
  1447. REGROUP - compiles all the regroup files /*F40:MB06458.18*/
  1448. - remove #pragma env libspace because we set it as a command line /*F40:MB06458.19*/
  1449. parameter. /*F40:MB06458.20*/
  1450. ----------------------------------------------------------------------- /*F40:MB06458.21*/
  1451. #endif /* 0 - change descriptions */