Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2455 lines
68 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. chbeat.c
  5. Abstract:
  6. membership state heart beat code. Tracks node availability through
  7. exchanging heart beat messages with nodes that are marked as alive.
  8. Author:
  9. Charlie Wickham (charlwi) 05-Mar-1997
  10. Environment:
  11. Kernel Mode
  12. Revision History:
  13. --*/
  14. #include "precomp.h"
  15. #pragma hdrstop
  16. #include "chbeat.tmh"
  17. #include "clusvmsg.h"
  18. #include "stdio.h"
  19. /* External */
  20. /* Static */
  21. //
  22. // heart beat structures - heart beats are driven by a timer and DPC
  23. // routine. In order to synchronize the shutdown of the DPC, we also need two
  24. // flags, an event and a spin lock.
  25. //
  26. KTIMER HeartBeatTimer;
  27. KDPC HeartBeatDpc;
  28. KEVENT HeartBeatDpcFinished;
  29. BOOLEAN HeartBeatEnabled = FALSE;
  30. BOOLEAN HeartBeatDpcRunning = FALSE;
  31. CN_LOCK HeartBeatLock;
  32. #if 0
  33. Heart Beating Explained
  34. ClockTicks are incremented every HEART_BEAT_PERIOD millisecs. SendTicks are the
  35. number of ticks that go by before sending HBs.
  36. The check for received HB msgs is done in the tick just before HB msgs are
  37. sent. Interface Lost HB ticks are in terms of heart beat check periods and
  38. therefore are incremented only during the check period. An interface is failed
  39. when the number of Interface Lost HB ticks have passed and no HB message has
  40. been received on that interface.
  41. Likewise, Node Lost HB Ticks are in terms of heart beat check periods and are
  42. incremented during the check period. After all interfaces have failed on a
  43. node, Node Lost HB ticks must pass without an interface going back online
  44. before a node down event is issued. Note that a node's comm state is set to
  45. offline when all interfaces have failed.
  46. #endif
  47. #define CLUSNET_HEART_BEAT_SEND_TICKS 2 // every 1.2 secs
  48. #define CLUSNET_INTERFACE_LOST_HEART_BEAT_TICKS 3 // after 3 secs
  49. #define CLUSNET_NODE_LOST_HEART_BEAT_TICKS 6 // after 6.6 secs
  50. ULONG HeartBeatClockTicks;
  51. ULONG HeartBeatSendTicks = CLUSNET_HEART_BEAT_SEND_TICKS;
  52. ULONG HBInterfaceLostHBTicks = CLUSNET_INTERFACE_LOST_HEART_BEAT_TICKS;
  53. ULONG HBNodeLostHBTicks = CLUSNET_NODE_LOST_HEART_BEAT_TICKS;
  54. //
  55. // Unicast Heartbeat Data
  56. //
  57. // Even with multicast heartbeats, unicast heartbeats must be supported
  58. // for backwards compatibility.
  59. //
  60. //
  61. // This array records all the nodes that need to have a HB sent to another
  62. // node. This array is not protected by a lock since it is only used with the
  63. // heartbeat DPC routine.
  64. //
  65. typedef struct _INTERFACE_HEARTBEAT_INFO {
  66. CL_NODE_ID NodeId;
  67. CL_NETWORK_ID NetworkId;
  68. ULONG SeqNumber;
  69. ULONG AckNumber;
  70. } INTERFACE_HEARTBEAT_INFO, *PINTERFACE_HEARTBEAT_INFO;
  71. #define InterfaceHBInfoInitialLength 16
  72. #define InterfaceHBInfoLengthIncrement 4
  73. PINTERFACE_HEARTBEAT_INFO InterfaceHeartBeatInfo = NULL;
  74. ULONG InterfaceHBInfoCount; // running count while sending HBs
  75. ULONG InterfaceHBInfoCurrentLength; // current length of HB info array
  76. LARGE_INTEGER HBTime; // HB time in relative sys time
  77. #define MAX_DPC_SKEW ( -HBTime.QuadPart / 2 )
  78. //
  79. // Outerscreen mask. This is set by clussvc's membership manager in user
  80. // mode. As it changes, MM drops down the set outerscreen Ioctl to update
  81. // clusnet's notion of this mask. Clusnet uses this mask to determine the
  82. // validity of a received heart beat. If the sending node is not part
  83. // of the mask, then it is sent a poison packet and the received event
  84. // is not passed on to other consumers. If it is a legetimate PP, then
  85. // we generate the proper event.
  86. //
  87. // Note: MM type definitions and macros have been moved to cnpdef.h for
  88. // general usage.
  89. //
  90. typedef CX_CLUSTERSCREEN CX_OUTERSCREEN;
  91. CX_OUTERSCREEN MMOuterscreen;
  92. // Multicast Heartbeat Data
  93. //
  94. typedef struct _NETWORK_MCAST_HEARTBEAT_INFO {
  95. CL_NETWORK_ID NetworkId;
  96. PCNP_MULTICAST_GROUP McastGroup;
  97. CX_HB_NODE_INFO NodeInfo[ClusterDefaultMaxNodes+ClusterMinNodeId];
  98. CX_CLUSTERSCREEN McastTarget;
  99. } NETWORK_MCAST_HEARTBEAT_INFO, *PNETWORK_MCAST_HEARTBEAT_INFO;
  100. #define NetworkHBInfoInitialLength 4
  101. #define NetworkHBInfoLengthIncrement 4
  102. PNETWORK_MCAST_HEARTBEAT_INFO NetworkHeartBeatInfo = NULL;
  103. ULONG NetworkHBInfoCount; // running count while sending HBs
  104. ULONG NetworkHBInfoCurrentLength; // current length of HB info array
  105. CL_NETWORK_ID MulticastBestNetwork = ClusterAnyNetworkId;
  106. ULONG CxMulticastEpoch = 0;
  107. //
  108. // Declarations for Clussvc to Clusnet Heartbeating.
  109. //
  110. ULONG ClussvcClusnetHbTimeoutTicks = 0;
  111. ClussvcHangAction ClussvcClusnetHbTimeoutAction = ClussvcHangActionDisable;
  112. ULONG ClussvcClusnetHbTickCount = 0;
  113. BOOLEAN ClussvcTerminateStopHbs = FALSE;
  114. PIO_WORKITEM ClussvcTerminateWorkItem = NULL;
  115. // Parameters for the Clussvc to Clusnet Heartbeating bugcheck. These are
  116. // for informational purposes only and should not otherwise be used. For
  117. // instance, the process object is dereferenced immediately after the
  118. // pointer is determined.
  119. PEPROCESS ClussvcProcessObject = NULL;
  120. ULONG ClussvcClusnetHbTimeoutSeconds = 0;
  121. /* Forward */
  122. NTSTATUS
  123. CxInitializeHeartBeat(
  124. void
  125. );
  126. VOID
  127. CxUnloadHeartBeat(
  128. VOID
  129. );
  130. VOID
  131. CnpHeartBeatDpc(
  132. PKDPC DpcObject,
  133. PVOID DeferredContext,
  134. PVOID Arg1,
  135. PVOID Arg2
  136. );
  137. BOOLEAN
  138. CnpWalkNodesToSendHeartBeats(
  139. IN PCNP_NODE UpdateNode,
  140. IN PVOID UpdateContext,
  141. IN CN_IRQL NodeTableIrql
  142. );
  143. BOOLEAN
  144. CnpWalkNodesToCheckForHeartBeats(
  145. IN PCNP_NODE UpdateNode,
  146. IN PVOID UpdateContext,
  147. IN CN_IRQL NodeTableIrql
  148. );
  149. VOID
  150. CnpSendHBs(
  151. IN PCNP_INTERFACE UpdateInterface
  152. );
  153. NTSTATUS
  154. CxSetOuterscreen(
  155. IN ULONG Outerscreen
  156. );
  157. VOID
  158. CnpReceivePoisonPacket(
  159. IN PCNP_NETWORK Network,
  160. IN CL_NODE_ID SourceNodeId,
  161. IN ULONG SeqNumber
  162. );
  163. VOID
  164. CnpUpdateMulticastEpoch(
  165. ULONG NewEpoch
  166. );
  167. VOID
  168. CnpCheckClussvcHang(
  169. VOID
  170. );
  171. VOID
  172. CnpLogClussvcHangAndTerminate(
  173. IN PDEVICE_OBJECT DeviceObject,
  174. IN PVOID Context
  175. );
  176. VOID
  177. CnpLogClussvcHang(
  178. IN PDEVICE_OBJECT DeviceObject,
  179. IN PVOID Context
  180. );
  181. /* End Forward */
  182. #ifdef ALLOC_PRAGMA
  183. #pragma alloc_text(INIT, CxInitializeHeartBeat)
  184. #pragma alloc_text(PAGE, CxUnloadHeartBeat)
  185. #endif // ALLOC_PRAGMA
  186. NTSTATUS
  187. CxInitializeHeartBeat(
  188. void
  189. )
  190. /*++
  191. Routine Description:
  192. Init the mechanisms used to send and monitor heart beats
  193. Arguments:
  194. None
  195. Return Value:
  196. STATUS_INSUFFICIENT_RESOURCES if allocation fails.
  197. STATUS_SUCCESS otherwise.
  198. --*/
  199. {
  200. // allocate the interface info array
  201. InterfaceHBInfoCount = 0;
  202. InterfaceHBInfoCurrentLength = InterfaceHBInfoInitialLength;
  203. if (InterfaceHBInfoCurrentLength > 0) {
  204. InterfaceHeartBeatInfo = CnAllocatePool(
  205. InterfaceHBInfoCurrentLength
  206. * sizeof(INTERFACE_HEARTBEAT_INFO)
  207. );
  208. if (InterfaceHeartBeatInfo == NULL) {
  209. return(STATUS_INSUFFICIENT_RESOURCES);
  210. }
  211. }
  212. // allocate the network info array
  213. NetworkHBInfoCount = 0;
  214. NetworkHBInfoCurrentLength = NetworkHBInfoInitialLength;
  215. if (NetworkHBInfoCurrentLength > 0) {
  216. NetworkHeartBeatInfo = CnAllocatePool(
  217. NetworkHBInfoCurrentLength
  218. * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
  219. );
  220. if (NetworkHeartBeatInfo == NULL) {
  221. return(STATUS_INSUFFICIENT_RESOURCES);
  222. }
  223. RtlZeroMemory(
  224. NetworkHeartBeatInfo,
  225. NetworkHBInfoCurrentLength * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
  226. );
  227. }
  228. KeInitializeTimer( &HeartBeatTimer );
  229. KeInitializeDpc( &HeartBeatDpc, CnpHeartBeatDpc, NULL );
  230. KeInitializeEvent( &HeartBeatDpcFinished, SynchronizationEvent, FALSE );
  231. CnInitializeLock( &HeartBeatLock, CNP_HBEAT_LOCK );
  232. MEMLOG( MemLogInitHB, 0, 0 );
  233. return(STATUS_SUCCESS);
  234. } // CxInitializeHeartBeat
  235. VOID
  236. CxUnloadHeartBeat(
  237. VOID
  238. )
  239. /*++
  240. Routine Description:
  241. Called during clusnet driver unload. Free any data structures
  242. allocated to send and monitor heartbeats.
  243. Arguments:
  244. None
  245. Return Value:
  246. None
  247. --*/
  248. {
  249. PAGED_CODE();
  250. if (InterfaceHeartBeatInfo != NULL) {
  251. CnFreePool(InterfaceHeartBeatInfo);
  252. InterfaceHeartBeatInfo = NULL;
  253. }
  254. if (NetworkHeartBeatInfo != NULL) {
  255. CnFreePool(NetworkHeartBeatInfo);
  256. NetworkHeartBeatInfo = NULL;
  257. }
  258. return;
  259. } // CxUnloadHeartBeat
  260. NTSTATUS
  261. CnpStartHeartBeats(
  262. VOID
  263. )
  264. /*++
  265. Routine Description:
  266. Start heart beating with the nodes that are marked alive and have
  267. an interface marked either OnlinePending or Online.
  268. Arguments:
  269. None
  270. Return Value:
  271. STATUS_INSUFFICIENT_RESOURCES if the workitem allocation fails
  272. --*/
  273. {
  274. BOOLEAN TimerInserted;
  275. CN_IRQL OldIrql;
  276. ULONG period = HEART_BEAT_PERIOD;
  277. //
  278. // Pre-allocate a workitem in case we need an emergency
  279. // termination of the cluster service due to a user-mode
  280. // hang.
  281. // No need to take the lock before the allocation and
  282. // assignment, since below is the first place the lock
  283. // is acquired as the service starts.
  284. CnAssert(ClussvcTerminateWorkItem == NULL);
  285. ClussvcTerminateWorkItem = IoAllocateWorkItem(CnDeviceObject);
  286. if (ClussvcTerminateWorkItem == NULL) {
  287. CnTrace(HBEAT_EVENT, HbTraceTerminateWorkItemAlloc,
  288. "[HB] Failed to pre-allocate clussvc termination "
  289. "workitem.\n"
  290. );
  291. return(STATUS_INSUFFICIENT_RESOURCES);
  292. }
  293. CnAcquireLock( &HeartBeatLock, &OldIrql );
  294. HBTime.QuadPart = Int32x32To64( HEART_BEAT_PERIOD, -10000 );
  295. TimerInserted = KeSetTimerEx(&HeartBeatTimer,
  296. HBTime,
  297. HEART_BEAT_PERIOD,
  298. &HeartBeatDpc);
  299. HeartBeatEnabled = TRUE;
  300. ClussvcTerminateStopHbs = FALSE;
  301. CnTrace(HBEAT_EVENT, HbTraceTimerStarted,
  302. "[HB] Heartbeat timer started. Period = %u ms.",
  303. period // LOGULONG
  304. );
  305. MEMLOG( MemLogHBStarted, HEART_BEAT_PERIOD, 0 );
  306. CnReleaseLock( &HeartBeatLock, OldIrql );
  307. return(STATUS_SUCCESS);
  308. } // CnpStartHeartBeats
  309. VOID
  310. CnpStopHeartBeats(
  311. VOID
  312. )
  313. /*++
  314. Routine Description:
  315. Stop heart beating with other nodes in the cluster.
  316. Arguments:
  317. None
  318. Return Value:
  319. None
  320. --*/
  321. {
  322. BOOLEAN TimerCanceled;
  323. CN_IRQL OldIrql;
  324. PIO_WORKITEM FreeWorkItem = NULL;
  325. CnAcquireLock( &HeartBeatLock, &OldIrql );
  326. if (HeartBeatEnabled) {
  327. HeartBeatEnabled = FALSE;
  328. //
  329. // Cancel the periodic timer. Contrary to what the DDK implies,
  330. // this does not cancel the DPC if it is still queued from the
  331. // last timer expiration. It only stops the timer from firing
  332. // again. This is true as of 8/99. See KiTimerListExpire() in
  333. // ntos\ke\dpcsup.c.
  334. //
  335. TimerCanceled = KeCancelTimer( &HeartBeatTimer );
  336. CnTrace(HBEAT_DETAIL, HbTraceTimerCancelled,
  337. "[HB] Heartbeat timer cancelled: %!bool!",
  338. TimerCanceled // LOGBOOLEAN
  339. );
  340. MEMLOG( MemLogHBStopped, 0, 0 );
  341. //
  342. // Remove the DPC associated with the timer from the system DPC
  343. // queue, if it is there. This actually does nothing, because a
  344. // timer DPC is only inserted into the system DPC queue if it is
  345. // bound to a specific processor. Unbound DPCs are executed inline
  346. // on the current processor in the kernel's timer expiration code.
  347. // Note that the object for a periodic timer is reinserted into the
  348. // timer queue before the DPC is excuted. So, it is possible for the
  349. // timer and the associated DPC to be queued simultaneously. This is
  350. // true as of 8/99. See KiTimerListExpire() in ntos\ke\dpcsup.c.
  351. //
  352. // The bottom line is that there is no safe way to synchronize with
  353. // the execution of a timer DPC during driver unload. All we can
  354. // do is ensure that the DPC handler code recognizes that it should
  355. // abort execution immediately and hope that it does so before the
  356. // driver code is unloaded. We do this by setting the HeartBeatEnabled
  357. // flag to False above. If our DPC code happens to be executing at
  358. // this point in time on another processor, as denoted by
  359. // HeartBeatDpcRunning, we wait for it to finish.
  360. //
  361. if ( !KeRemoveQueueDpc( &HeartBeatDpc )) {
  362. CnTrace(HBEAT_DETAIL, HbTraceDpcRunning,
  363. "[HB] DPC not removed. HeartBeatDpcRunning = %!bool!",
  364. HeartBeatDpcRunning // LOGBOOLEAN
  365. );
  366. MEMLOG( MemLogHBDpcRunning, HeartBeatDpcRunning, 0 );
  367. if ( HeartBeatDpcRunning ) {
  368. CnReleaseLock( &HeartBeatLock, OldIrql );
  369. CnTrace(HBEAT_DETAIL, HbWaitForDpcToFinish,
  370. "can't remove DPC; waiting on DPCFinished event"
  371. );
  372. MEMLOG( MemLogWaitForDpcFinish, 0, 0 );
  373. KeWaitForSingleObject(&HeartBeatDpcFinished,
  374. Executive,
  375. KernelMode,
  376. FALSE, // not alertable
  377. NULL); // no timeout
  378. KeClearEvent( &HeartBeatDpcFinished );
  379. CnAcquireLock( &HeartBeatLock, &OldIrql);
  380. }
  381. }
  382. CnTrace(HBEAT_EVENT, HbTraceTimerStopped,
  383. "[HB] Heartbeat timer stopped."
  384. );
  385. }
  386. //
  387. // If the pre-allocated workitem was not used, we need to
  388. // free it to remove the reference on the clusnet device object.
  389. //
  390. FreeWorkItem = ClussvcTerminateWorkItem;
  391. ClussvcTerminateWorkItem = NULL;
  392. CnReleaseLock( &HeartBeatLock, OldIrql );
  393. if (FreeWorkItem != NULL) {
  394. IoFreeWorkItem(FreeWorkItem);
  395. }
  396. return;
  397. } // CnpStopHeartBeats
  398. VOID
  399. CnpSendMcastHBCompletion(
  400. IN NTSTATUS Status,
  401. IN ULONG BytesSent,
  402. IN PVOID Context,
  403. IN PVOID Buffer
  404. )
  405. /*++
  406. Routine Description:
  407. Called when a mcast heartbeat send request completes
  408. successfully or unsuccessfully. Dereferences the
  409. McastGroup data structure.
  410. Arguments:
  411. Status - status of request
  412. BytesSent - not used
  413. Context - points to multicast group data structure
  414. Buffer - not used
  415. Return value:
  416. None.
  417. --*/
  418. {
  419. PCNP_MULTICAST_GROUP mcastGroup = (PCNP_MULTICAST_GROUP) Context;
  420. CnAssert(mcastGroup != NULL);
  421. CnpDereferenceMulticastGroup(mcastGroup);
  422. return;
  423. } // CnpSendMcastHBCompletion
  424. NTSTATUS
  425. CnpSendMcastHB(
  426. IN PCNP_INTERFACE Interface
  427. )
  428. /*++
  429. Routine Description:
  430. Writes multicast heartbeat data into the NetworkHeartBeatInfo
  431. array for target Interface.
  432. Notes:
  433. Called from DPC with Network and Node locks held.
  434. Returns with Network and Node locks held.
  435. --*/
  436. {
  437. ULONG i;
  438. BOOLEAN networkConnected;
  439. // find the network info structure for this network
  440. for (i = 0; i < NetworkHBInfoCount; i++) {
  441. if (NetworkHeartBeatInfo[i].NetworkId
  442. == Interface->Network->Id) {
  443. break;
  444. }
  445. }
  446. // start a new network info structure, if necessary
  447. if (i == NetworkHBInfoCount) {
  448. // before claiming an entry in the network info array,
  449. // make sure the array is large enough
  450. if (NetworkHBInfoCount >= NetworkHBInfoCurrentLength) {
  451. // need to allocate a new network info array
  452. PNETWORK_MCAST_HEARTBEAT_INFO tempInfo = NULL;
  453. PNETWORK_MCAST_HEARTBEAT_INFO freeInfo = NULL;
  454. ULONG tempLength;
  455. tempLength = NetworkHBInfoCurrentLength
  456. + NetworkHBInfoLengthIncrement;
  457. tempInfo = CnAllocatePool(
  458. tempLength
  459. * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
  460. );
  461. if (tempInfo == NULL) {
  462. CnTrace(
  463. HBEAT_DETAIL, HbNetInfoArrayAllocFailed,
  464. "[HB] Failed to allocate network heartbeat info "
  465. "array of length %u. Cannot schedule heartbeat "
  466. "for node %u on network %u.",
  467. tempLength,
  468. Interface->Node->Id,
  469. Interface->Network->Id
  470. );
  471. // cannot continue. the failure to send this
  472. // heartbeat will not be fatal if we recover
  473. // quickly. if we do not recover, this node
  474. // will be poisoned, which is probably best
  475. // since it is dangerously low on nonpaged pool.
  476. return(STATUS_INSUFFICIENT_RESOURCES);
  477. } else {
  478. // the allocation was successful. establish
  479. // the new array as the heartbeat info
  480. // array.
  481. RtlZeroMemory(
  482. tempInfo,
  483. tempLength * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
  484. );
  485. freeInfo = NetworkHeartBeatInfo;
  486. NetworkHeartBeatInfo = tempInfo;
  487. NetworkHBInfoCurrentLength = tempLength;
  488. if (freeInfo != NULL) {
  489. if (NetworkHBInfoCount > 0) {
  490. RtlCopyMemory(
  491. NetworkHeartBeatInfo,
  492. freeInfo,
  493. NetworkHBInfoCount
  494. * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
  495. );
  496. }
  497. CnFreePool(freeInfo);
  498. }
  499. CnTrace(
  500. HBEAT_DETAIL, HbNetInfoArrayLengthIncreased,
  501. "[HB] Increased network heartbeat info array "
  502. "to size %u.",
  503. NetworkHBInfoCurrentLength
  504. );
  505. }
  506. }
  507. // increment the current counter
  508. NetworkHBInfoCount++;
  509. // initialize the information for this structure
  510. RtlZeroMemory(
  511. &NetworkHeartBeatInfo[i].McastTarget,
  512. sizeof(NetworkHeartBeatInfo[i].McastTarget)
  513. );
  514. NetworkHeartBeatInfo[i].NetworkId = Interface->Network->Id;
  515. NetworkHeartBeatInfo[i].McastGroup =
  516. Interface->Network->CurrentMcastGroup;
  517. CnpReferenceMulticastGroup(NetworkHeartBeatInfo[i].McastGroup);
  518. }
  519. networkConnected = (BOOLEAN)(!CnpIsNetworkLocalDisconn(Interface->Network));
  520. CnTrace(HBEAT_DETAIL, HbTraceScheduleMcastHBForInterface,
  521. "[HB] Scheduling multicast HB for node %u on network %u "
  522. "(I/F state = %!ifstate!) "
  523. "(interface media connected = %!bool!).",
  524. Interface->Node->Id, // LOGULONG
  525. Interface->Network->Id, // LOGULONG
  526. Interface->State, // LOGIfState
  527. networkConnected
  528. );
  529. // fill in the network info for this node/interface
  530. NetworkHeartBeatInfo[i].NodeInfo[Interface->Node->Id].SeqNumber =
  531. Interface->SequenceToSend;
  532. NetworkHeartBeatInfo[i].NodeInfo[Interface->Node->Id].AckNumber =
  533. Interface->LastSequenceReceived;
  534. CnpClusterScreenInsert(
  535. NetworkHeartBeatInfo[i].McastTarget.ClusterScreen,
  536. INT_NODE(Interface->Node->Id)
  537. );
  538. return(STATUS_SUCCESS);
  539. } // CnpSendMcastHB
  540. NTSTATUS
  541. CnpSendUcastHB(
  542. IN PCNP_INTERFACE Interface
  543. )
  544. /*++
  545. Routine Description:
  546. Writes unicast heartbeat data into the InterfaceHeartBeatInfo
  547. array for target Interface.
  548. Notes:
  549. Called from DPC with Network and Node locks held.
  550. Returns with Network and Node locks held.
  551. --*/
  552. {
  553. BOOLEAN networkConnected;
  554. // before filling an entry in the heartbeat info array,
  555. // make sure the array is large enough.
  556. if (InterfaceHBInfoCount >= InterfaceHBInfoCurrentLength) {
  557. // need to allocate a new heartbeat info array
  558. PINTERFACE_HEARTBEAT_INFO tempInfo = NULL;
  559. PINTERFACE_HEARTBEAT_INFO freeInfo = NULL;
  560. ULONG tempLength;
  561. tempLength = InterfaceHBInfoCurrentLength
  562. + InterfaceHBInfoLengthIncrement;
  563. tempInfo = CnAllocatePool(
  564. tempLength * sizeof(INTERFACE_HEARTBEAT_INFO)
  565. );
  566. if (tempInfo == NULL) {
  567. CnTrace(
  568. HBEAT_DETAIL, HbInfoArrayAllocFailed,
  569. "[HB] Failed to allocate heartbeat info "
  570. "array of length %u. Cannot schedule heartbeat "
  571. "for node %u on network %u.",
  572. tempLength,
  573. Interface->Node->Id,
  574. Interface->Network->Id
  575. );
  576. // cannot continue. the failure to send this
  577. // heartbeat will not be fatal if we recover
  578. // quickly. if we do not recover, this node
  579. // will be poisoned, which is probably best
  580. // since it is dangerously low on nonpaged pool.
  581. return(STATUS_INSUFFICIENT_RESOURCES);
  582. } else {
  583. // the allocation was successful. establish
  584. // the new array as the heartbeat info
  585. // array.
  586. freeInfo = InterfaceHeartBeatInfo;
  587. InterfaceHeartBeatInfo = tempInfo;
  588. InterfaceHBInfoCurrentLength = tempLength;
  589. if (freeInfo != NULL) {
  590. if (InterfaceHBInfoCount > 0) {
  591. RtlCopyMemory(
  592. InterfaceHeartBeatInfo,
  593. freeInfo,
  594. InterfaceHBInfoCount * sizeof(INTERFACE_HEARTBEAT_INFO)
  595. );
  596. }
  597. CnFreePool(freeInfo);
  598. }
  599. CnTrace(
  600. HBEAT_DETAIL, HbInfoArrayLengthIncreased,
  601. "[HB] Increased heartbeat info array to size %u.",
  602. InterfaceHBInfoCurrentLength
  603. );
  604. }
  605. }
  606. networkConnected = (BOOLEAN)(!CnpIsNetworkLocalDisconn(Interface->Network));
  607. CnTrace(HBEAT_DETAIL, HbTraceScheduleHBForInterface,
  608. "[HB] Scheduling HB for node %u on network %u (I/F state = %!ifstate!) "
  609. "(interface media connected = %!bool!).",
  610. Interface->Node->Id, // LOGULONG
  611. Interface->Network->Id, // LOGULONG
  612. Interface->State, // LOGIfState
  613. networkConnected
  614. );
  615. InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].NodeId = Interface->Node->Id;
  616. InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].SeqNumber =
  617. Interface->SequenceToSend;
  618. InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].AckNumber =
  619. Interface->LastSequenceReceived;
  620. InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].NetworkId = Interface->Network->Id;
  621. ++InterfaceHBInfoCount;
  622. return(STATUS_SUCCESS);
  623. } // CnpSendUcastHB
  624. VOID
  625. CnpSendHBs(
  626. IN PCNP_INTERFACE Interface
  627. )
  628. /*++
  629. Routine Description:
  630. If Interface is in the correct state then stuff an entry in
  631. the heartbeat info array. Expand the heartbeat info
  632. array if necessary.
  633. Arguments:
  634. Interface - target interface for heartbeat message
  635. Return Value:
  636. None
  637. --*/
  638. {
  639. BOOLEAN mcastOnly = FALSE;
  640. if ( Interface->State >= ClusnetInterfaceStateUnreachable ) {
  641. // increment the sequence number
  642. (Interface->SequenceToSend)++;
  643. // check if we should include this interface in a
  644. // multicast heartbeat. first we verify that the
  645. // network is multicast capable. then, we include it
  646. // if either of the following conditions are true:
  647. // - we have received a multicast heartbeat from the
  648. // target interface
  649. // - the discovery count (the number of discovery mcasts
  650. // left to send to the target interface) is greater
  651. // than zero
  652. if (CnpIsNetworkMulticastCapable(Interface->Network)) {
  653. if (CnpInterfaceQueryReceivedMulticast(Interface)) {
  654. // write the mcast heartbeat data. if not
  655. // successful, attempt a unicast heartbeat.
  656. if (CnpSendMcastHB(Interface) == STATUS_SUCCESS) {
  657. mcastOnly = TRUE;
  658. }
  659. } else if (Interface->McastDiscoverCount > 0) {
  660. // write the mcast heartbeat data for a
  661. // discovery. if successful, decrement the
  662. // discovery count.
  663. if (CnpSendMcastHB(Interface) == STATUS_SUCCESS) {
  664. --Interface->McastDiscoverCount;
  665. // if the discovery count has reached zero,
  666. // set the rediscovery countdown. this is
  667. // the number of heartbeat periods until we
  668. // try discovery again.
  669. if (Interface->McastDiscoverCount == 0) {
  670. Interface->McastRediscoveryCountdown =
  671. CNP_INTERFACE_MCAST_REDISCOVERY;
  672. }
  673. }
  674. } else if (Interface->McastRediscoveryCountdown > 0) {
  675. // decrement the rediscovery countdown. if we
  676. // reach zero, we will start multicast discovery
  677. // on the next heartbeat to this interface.
  678. if (--Interface->McastRediscoveryCountdown == 0) {
  679. Interface->McastDiscoverCount =
  680. CNP_INTERFACE_MCAST_DISCOVERY;
  681. }
  682. }
  683. }
  684. // write unicast heartbeat data
  685. if (!mcastOnly) {
  686. CnpSendUcastHB(Interface);
  687. }
  688. }
  689. CnReleaseLock(&Interface->Network->Lock, Interface->Network->Irql);
  690. return;
  691. } // CnpSendHBs
  692. VOID
  693. CnpCheckForHBs(
  694. IN PCNP_INTERFACE Interface
  695. )
  696. /*++
  697. Routine Description:
  698. Check if heart beats have been received for this interface
  699. Arguments:
  700. None
  701. Return Value:
  702. None
  703. --*/
  704. {
  705. ULONG MissedHBCount;
  706. BOOLEAN NetworkLockReleased = FALSE;
  707. if ( Interface->State >= ClusnetInterfaceStateUnreachable
  708. && !CnpIsNetworkLocalDisconn(Interface->Network) ) {
  709. MissedHBCount = InterlockedIncrement( &Interface->MissedHBs );
  710. if ( MissedHBCount == 1 ) {
  711. //
  712. // a HB was received in time for this node. Clear the status
  713. // info associated with this interface, but also mark the node
  714. // as having an interface that is ok. Note that we do not
  715. // use HBs on restricted nets to determine node health.
  716. //
  717. if (!CnpIsNetworkRestricted(Interface->Network)) {
  718. Interface->Node->HBWasMissed = FALSE;
  719. }
  720. CnTrace(HBEAT_DETAIL, HbTraceHBReceivedForInterface,
  721. "[HB] A HB was received from node %u on net %u in this "
  722. "period.",
  723. Interface->Node->Id, // LOGULONG
  724. Interface->Network->Id // LOGULONG
  725. );
  726. } else {
  727. CnTrace(HBEAT_EVENT, HbTraceMissedIfHB,
  728. "[HB] HB MISSED for node %u on net %u, missed count %u.",
  729. Interface->Node->Id, // LOGULONG
  730. Interface->Network->Id, // LOGULONG
  731. MissedHBCount // LOGULONG
  732. );
  733. MEMLOG4(
  734. MemLogMissedIfHB,
  735. (ULONG_PTR)Interface, MissedHBCount,
  736. Interface->Node->Id,
  737. Interface->Network->Id
  738. );
  739. if ( MissedHBCount >= HBInterfaceLostHBTicks &&
  740. Interface->State >= ClusnetInterfaceStateOnlinePending ) {
  741. //
  742. // interface is either online pending or online, so move it
  743. // to unreachable. CnpFailInterface will also mark the node
  744. // unreachable if all of the node's interfaces are unreachable.
  745. // CnpFailInterface releases the network object lock as part
  746. // of its duties.
  747. //
  748. CnTrace(HBEAT_DETAIL, HbTraceFailInterface,
  749. "[HB] Moving I/F for node %u on net %u to failed state, "
  750. "previous I/F state = %!ifstate!.",
  751. Interface->Node->Id, // LOGULONG
  752. Interface->Network->Id, // LOGULONG
  753. Interface->State // LOGIfState
  754. );
  755. //
  756. // continuation log entries go before the main entry since
  757. // we scan the log backwards, i.e., we'll hit FailingIf
  758. // before we hit FailingIf1.
  759. //
  760. MEMLOG4(
  761. MemLogFailingIf,
  762. (ULONG_PTR)Interface,
  763. Interface->State,
  764. Interface->Node->Id,
  765. Interface->Network->Id
  766. );
  767. CnpFailInterface( Interface );
  768. NetworkLockReleased = TRUE;
  769. //
  770. // issue a net interface unreachable event to let consumers
  771. // know what is happening
  772. //
  773. CnTrace(HBEAT_EVENT, HbTraceInterfaceUnreachableEvent,
  774. "[HB] Issuing InterfaceUnreachable event for node %u "
  775. "on net %u, previous I/F state = %!ifstate!.",
  776. Interface->Node->Id, // LOGULONG
  777. Interface->Network->Id, // LOGULONG
  778. Interface->State // LOGIfState
  779. );
  780. CnIssueEvent(ClusnetEventNetInterfaceUnreachable,
  781. Interface->Node->Id,
  782. Interface->Network->Id);
  783. }
  784. }
  785. }
  786. if ( !NetworkLockReleased ) {
  787. CnReleaseLock(&Interface->Network->Lock,
  788. Interface->Network->Irql);
  789. }
  790. return;
  791. } // CnpCheckForHBs
  792. BOOLEAN
  793. CnpWalkNodesToSendHeartBeats(
  794. IN PCNP_NODE Node,
  795. IN PVOID UpdateContext,
  796. IN CN_IRQL NodeTableIrql
  797. )
  798. /*++
  799. Routine Description:
  800. Support routine called for each node in the node table. If node is
  801. alive, then we walk its interfaces, performing the appropriate
  802. action.
  803. Arguments:
  804. None
  805. Return Value:
  806. None
  807. --*/
  808. {
  809. //
  810. // If this node is alive and not the local node, then walk its
  811. // interfaces, supplying the appropriate routine to use at this time
  812. //
  813. if ( Node->MMState == ClusnetNodeStateAlive &&
  814. Node != CnpLocalNode ) {
  815. CnTrace(HBEAT_DETAIL, HbTraceScheduleHBForNode,
  816. "[HB] Scheduling HBs for node %u (state = %!mmstate!).",
  817. Node->Id, // LOGULONG
  818. Node->MMState // LOGMmState
  819. );
  820. MEMLOG( MemLogSendHBWalkNode, Node->Id, Node->MMState );
  821. CnpWalkInterfacesOnNode( Node, (PVOID)CnpSendHBs );
  822. }
  823. CnReleaseLock( &Node->Lock, Node->Irql );
  824. return TRUE; // the node table lock is still held
  825. } // CnpWalkNodesToSendHeartBeats
  826. BOOLEAN
  827. CnpWalkNodesToCheckForHeartBeats(
  828. IN PCNP_NODE Node,
  829. IN PVOID UpdateContext,
  830. IN CN_IRQL NodeTableIrql
  831. )
  832. /*++
  833. Routine Description:
  834. heart beat checking routine called for each node in the node table
  835. (except for the local node). If node is alive, then we walk its
  836. interfaces, performing the appropriate action.
  837. Arguments:
  838. None
  839. Return Value:
  840. None
  841. --*/
  842. {
  843. BOOLEAN NodeWasReachable;
  844. ULONG MissedHBCount;
  845. if ( Node->MMState == ClusnetNodeStateAlive &&
  846. Node != CnpLocalNode ) {
  847. //
  848. // this node is alive, so walk its interfaces. Assume the
  849. // worst by setting the HB Missed flag to true and
  850. // have the interfaces prove that this is wrong. Also make
  851. // note of the current unreachable flag setting. If it changes
  852. // this time
  853. //
  854. NodeWasReachable = !CnpIsNodeUnreachable( Node );
  855. Node->HBWasMissed = TRUE;
  856. CnTrace(HBEAT_DETAIL, HbTraceCheckNodeForHeartbeats,
  857. "[HB] Checking for HBs from node %u. WasReachable = %!bool!, "
  858. "state = %!mmstate!.",
  859. Node->Id, // LOGULONG
  860. NodeWasReachable, // LOGBOOLEAN
  861. Node->MMState // LOGMmState
  862. );
  863. MEMLOG( MemLogCheckHBNodeReachable, Node->Id, NodeWasReachable );
  864. MEMLOG( MemLogCheckHBWalkNode, Node->Id, Node->MMState );
  865. CnpWalkInterfacesOnNode( Node, (PVOID)CnpCheckForHBs );
  866. if ( Node->HBWasMissed ) {
  867. //
  868. // no HBs received on any of this node's IFs. if membership
  869. // still thinks this node is alive and the node has been
  870. // unreachable, then note that this node is toast in HB
  871. // info array. This will cause a node down event to be
  872. // generated for this node.
  873. //
  874. MissedHBCount = InterlockedIncrement( &Node->MissedHBs );
  875. CnTrace(HBEAT_EVENT, HbTraceNodeMissedHB,
  876. "[HB] Node %u has missed %u HBs on all interfaces, "
  877. "current state = %!mmstate!.",
  878. Node->Id, // LOGULONG
  879. MissedHBCount, // LOGULONG
  880. Node->MMState // LOGMmState
  881. );
  882. MEMLOG( MemLogCheckHBMissedHB, MissedHBCount, Node->MMState );
  883. //
  884. // if the this node is a either a member or in the process of
  885. // joining AND it's missed too many HBs AND we haven't issued a
  886. // node down, then issue a node down.
  887. //
  888. if ( ( Node->MMState == ClusnetNodeStateAlive
  889. ||
  890. Node->MMState == ClusnetNodeStateJoining
  891. )
  892. && MissedHBCount >= HBNodeLostHBTicks
  893. && !Node->NodeDownIssued
  894. )
  895. {
  896. Node->NodeDownIssued = TRUE;
  897. CnIssueEvent( ClusnetEventNodeDown, Node->Id, 0 );
  898. CnTrace(HBEAT_EVENT, HbTraceNodeDownEvent,
  899. "[HB] Issuing NodeDown event for node %u.",
  900. Node->Id // LOGULONG
  901. );
  902. MEMLOG( MemLogNodeDownIssued, Node->Id, TRUE );
  903. }
  904. }
  905. } else {
  906. MEMLOG( MemLogCheckHBWalkNode, Node->Id, Node->MMState );
  907. }
  908. CnReleaseLock( &Node->Lock, Node->Irql );
  909. return TRUE; // the node table lock is still held
  910. } // CnpWalkNodesToCheckForHeartBeats
  911. VOID
  912. CnpHeartBeatDpc(
  913. PKDPC DpcObject,
  914. PVOID DeferredContext,
  915. PVOID Arg1,
  916. PVOID Arg2
  917. )
  918. /*++
  919. Routine Description:
  920. Start heart beating with the nodes that are marked alive and have
  921. an interface marked either OnlinePending or Online.
  922. Arguments:
  923. None
  924. Return Value:
  925. None
  926. --*/
  927. {
  928. PINTERFACE_HEARTBEAT_INFO pNodeHBInfo;
  929. PNETWORK_MCAST_HEARTBEAT_INFO pMcastHBInfo;
  930. CN_IRQL OldIrql;
  931. BOOLEAN StopSendRecvHbs;
  932. #ifdef MEMLOGGING
  933. static LARGE_INTEGER LastSysTime;
  934. LARGE_INTEGER CurrentTime;
  935. LARGE_INTEGER TimeDelta;
  936. //
  937. // try to determine the skew between when we asked to be run and
  938. // the time we actually did run
  939. //
  940. KeQuerySystemTime( &CurrentTime );
  941. if ( LastSysTime.QuadPart != 0 ) {
  942. //
  943. // add in HBTime which is negative due to relative sys time
  944. //
  945. TimeDelta.QuadPart = ( CurrentTime.QuadPart - LastSysTime.QuadPart ) +
  946. HBTime.QuadPart;
  947. if ( TimeDelta.QuadPart > MAX_DPC_SKEW ||
  948. TimeDelta.QuadPart < -MAX_DPC_SKEW
  949. )
  950. {
  951. LONG skew = (LONG)(TimeDelta.QuadPart/10000); // convert to ms
  952. MEMLOG( MemLogDpcTimeSkew, TimeDelta.LowPart, 0 );
  953. CnTrace(HBEAT_EVENT, HbTraceLateDpc,
  954. "[HB] Timer fired %d ms late.",
  955. skew // LOGSLONG
  956. );
  957. }
  958. }
  959. LastSysTime.QuadPart = CurrentTime.QuadPart;
  960. #endif // MEMLOGGING
  961. CnAcquireLock( &HeartBeatLock, &OldIrql );
  962. if ( !HeartBeatEnabled ) {
  963. CnTrace(HBEAT_DETAIL, HbTraceSetDpcEvent,
  964. "DPC: setting HeartBeatDpcFinished event"
  965. );
  966. MEMLOG( MemLogSetDpcEvent, 0, 0 );
  967. KeSetEvent( &HeartBeatDpcFinished, 0, FALSE );
  968. CnReleaseLock( &HeartBeatLock, OldIrql );
  969. return;
  970. }
  971. HeartBeatDpcRunning = TRUE;
  972. //
  973. // Check if we need to stop sending heartbeats. This
  974. // occurs when clusnet detects that clussvc is not
  975. // operating correctly. In case system work queues
  976. // are blocked up (but not DPCs), we stop sending
  977. // heartbeats so that other nodes initiate failover.
  978. //
  979. StopSendRecvHbs = ClussvcTerminateStopHbs;
  980. CnReleaseLock( &HeartBeatLock, OldIrql );
  981. if (!StopSendRecvHbs) {
  982. if ( HeartBeatClockTicks == 0 ||
  983. HeartBeatClockTicks == HeartBeatSendTicks) {
  984. //
  985. // time to send HBs. Clear the count of target interfaces
  986. // and walk the node table finding the nodes that are
  987. // marked alive.
  988. //
  989. NetworkHBInfoCount = 0;
  990. InterfaceHBInfoCount = 0;
  991. CnpWalkNodeTable( CnpWalkNodesToSendHeartBeats, NULL );
  992. //
  993. // run down the list of networks and send out any multicast
  994. // heartbeats.
  995. //
  996. pMcastHBInfo = NetworkHeartBeatInfo;
  997. while ( NetworkHBInfoCount-- ) {
  998. CnTrace(
  999. HBEAT_EVENT, HbTraceSendMcastHB,
  1000. "[HB] Sending multicast HB on net %u.\n",
  1001. pMcastHBInfo->NetworkId
  1002. );
  1003. CxSendMcastHeartBeatMessage(
  1004. pMcastHBInfo->NetworkId,
  1005. pMcastHBInfo->McastGroup,
  1006. pMcastHBInfo->McastTarget,
  1007. CxMulticastEpoch,
  1008. pMcastHBInfo->NodeInfo,
  1009. CnpSendMcastHBCompletion,
  1010. pMcastHBInfo->McastGroup
  1011. );
  1012. ++pMcastHBInfo;
  1013. }
  1014. //
  1015. // now run down the list of interfaces that we compiled and
  1016. // send any unicast packets
  1017. //
  1018. pNodeHBInfo = InterfaceHeartBeatInfo;
  1019. while ( InterfaceHBInfoCount-- ) {
  1020. CnTrace(HBEAT_EVENT, HbTraceSendHB,
  1021. "[HB] Sending HB to node %u on net %u, seqno %u, ackno %u.",
  1022. pNodeHBInfo->NodeId, // LOGULONG
  1023. pNodeHBInfo->NetworkId, // LOGULONG
  1024. pNodeHBInfo->SeqNumber, // LOGULONG
  1025. pNodeHBInfo->AckNumber // LOGULONG
  1026. );
  1027. CxSendHeartBeatMessage(pNodeHBInfo->NodeId,
  1028. pNodeHBInfo->SeqNumber,
  1029. pNodeHBInfo->AckNumber,
  1030. pNodeHBInfo->NetworkId);
  1031. MEMLOG(
  1032. MemLogSendingHB,
  1033. pNodeHBInfo->NodeId,
  1034. pNodeHBInfo->NetworkId
  1035. );
  1036. ++pNodeHBInfo;
  1037. }
  1038. //
  1039. // finally, up the tick count, progressing to the next potential
  1040. // work item
  1041. //
  1042. HeartBeatClockTicks++;
  1043. } else if ( HeartBeatClockTicks >= ( HeartBeatSendTicks - 1 )) {
  1044. //
  1045. // walk the node table looking for lack of heart beats on
  1046. // a node's set of interfaces.
  1047. //
  1048. CnpWalkNodeTable( CnpWalkNodesToCheckForHeartBeats, NULL );
  1049. HeartBeatClockTicks = 0;
  1050. } else {
  1051. HeartBeatClockTicks++;
  1052. }
  1053. }
  1054. // Check for clussvc hangs.
  1055. CnpCheckClussvcHang();
  1056. //
  1057. // indicate that we're no longer running and if we're shutting down
  1058. // then set the event that the shutdown thread is waiting on
  1059. //
  1060. CnAcquireLock( &HeartBeatLock, &OldIrql );
  1061. HeartBeatDpcRunning = FALSE;
  1062. if ( !HeartBeatEnabled ) {
  1063. KeSetEvent( &HeartBeatDpcFinished, 0, FALSE );
  1064. CnTrace(HBEAT_DETAIL, HbTraceSetDpcEvent2,
  1065. "DPC: setting HeartBeatDpcFinished event (2)"
  1066. );
  1067. MEMLOG( MemLogSetDpcEvent, 0, 0 );
  1068. }
  1069. CnReleaseLock( &HeartBeatLock, OldIrql );
  1070. } // CnpHeartBeatDpc
  1071. PCNP_INTERFACE
  1072. CnpFindInterfaceLocked(
  1073. IN PCNP_NODE Node,
  1074. IN PCNP_NETWORK Network
  1075. )
  1076. /*++
  1077. Routine Description:
  1078. Given node and network structure pointers, find the interface
  1079. structure. Similar to CnpFindInterface except that we're passing
  1080. in pointers instead of IDs.
  1081. Arguments:
  1082. Node - pointer to node struct that sent the packet
  1083. Network - pointer to Network struct on which packet was received
  1084. Return Value:
  1085. Pointer to Interface on which packet was recv'd, otherwise NULL
  1086. --*/
  1087. {
  1088. PLIST_ENTRY IfEntry;
  1089. PCNP_INTERFACE Interface;
  1090. CnVerifyCpuLockMask(CNP_NODE_OBJECT_LOCK, // Required
  1091. 0, // Forbidden
  1092. CNP_NETWORK_OBJECT_LOCK_MAX // Maximum
  1093. );
  1094. for (IfEntry = Node->InterfaceList.Flink;
  1095. IfEntry != &(Node->InterfaceList);
  1096. IfEntry = IfEntry->Flink
  1097. )
  1098. {
  1099. Interface = CONTAINING_RECORD(IfEntry,
  1100. CNP_INTERFACE,
  1101. NodeLinkage);
  1102. if ( Interface->Network == Network ) {
  1103. break;
  1104. }
  1105. }
  1106. if ( IfEntry == &Node->InterfaceList ) {
  1107. return NULL;
  1108. } else {
  1109. return Interface;
  1110. }
  1111. } // CnpFindInterfaceLocked
  1112. VOID
  1113. CnpReceiveHeartBeatMessage(
  1114. IN PCNP_NETWORK Network,
  1115. IN CL_NODE_ID SourceNodeId,
  1116. IN ULONG SeqNumber,
  1117. IN ULONG AckNumber,
  1118. IN BOOLEAN Multicast,
  1119. IN ULONG MulticastEpoch
  1120. )
  1121. /*++
  1122. Routine Description:
  1123. We received a heartbeat from a node on a network. Reset
  1124. the missed HB count on that network's interface.
  1125. Arguments:
  1126. Network - pointer to network block on which the packet was received
  1127. SourceNodeId - node number that issued the packet
  1128. SeqNumber - sending nodes' sequence num
  1129. AckNumber - last seq number sent by us that was seen at the sending node
  1130. Multicast - indicates whether this heartbeat was received in a multicast
  1131. MulticastEpoch - indicates multicast epoch number from heartbeat packet
  1132. Return Value:
  1133. None
  1134. --*/
  1135. {
  1136. PCNP_NODE Node;
  1137. PCNP_INTERFACE Interface;
  1138. CX_OUTERSCREEN CurrentOuterscreen;
  1139. //
  1140. // Take a snapshot of the current outerscreen so that our
  1141. // information doesn't change between decisions.
  1142. //
  1143. CurrentOuterscreen.UlongScreen = MMOuterscreen.UlongScreen;
  1144. //
  1145. // we ignore all packets until we're part of the cluster
  1146. //
  1147. if ( !CnpClusterScreenMember(
  1148. CurrentOuterscreen.ClusterScreen,
  1149. INT_NODE( CnLocalNodeId )
  1150. )
  1151. )
  1152. {
  1153. return;
  1154. }
  1155. //
  1156. // We ignore multicast packets whose epoch is earlier than ours.
  1157. // This prevents replay attacks, because the multicast key may
  1158. // not have been regenerated since the last time a node joined (and
  1159. // heartbeat sequence numbers were reset to one).
  1160. //
  1161. if (Multicast && MulticastEpoch < CxMulticastEpoch) {
  1162. CnTrace(HBEAT_ERROR, HbTraceHBFromExpiredEpoch,
  1163. "[HB] Discarding HB from old epoch. Source Node %u, "
  1164. "Pkt Epoch %u, Current Epoch %u.",
  1165. SourceNodeId, // LOGULONG
  1166. MulticastEpoch, // LOGULONG
  1167. CxMulticastEpoch // LOGULONG
  1168. );
  1169. return;
  1170. }
  1171. //
  1172. // convert the Node ID into a pointer and find the interface
  1173. // on which the packet was received.
  1174. //
  1175. Node = CnpFindNode( SourceNodeId );
  1176. CnAssert( Node != NULL );
  1177. Interface = CnpFindInterfaceLocked( Node, Network );
  1178. if ( Interface == NULL ) {
  1179. //
  1180. // somehow this network object went away while we were
  1181. // receiving some data on it. Just ignore this msg
  1182. //
  1183. CnTrace(HBEAT_ERROR, HbTraceHBFromUnknownNetwork,
  1184. "[HB] Discarding HB from node %u on an unknown network.",
  1185. Node->Id // LOGULONG
  1186. );
  1187. MEMLOG( MemLogNoNetID, Node->Id, (ULONG_PTR)Network );
  1188. goto error_exit;
  1189. }
  1190. //
  1191. // determine if this is guy is legit. If not in the outerscreen,
  1192. // then send a poison packet and we're done
  1193. //
  1194. if ( !CnpClusterScreenMember(
  1195. CurrentOuterscreen.ClusterScreen,
  1196. INT_NODE( SourceNodeId )
  1197. )
  1198. )
  1199. {
  1200. //
  1201. // Don't bother sending poison packets on restricted networks. They
  1202. // will be ignored.
  1203. //
  1204. if (CnpIsNetworkRestricted(Interface->Network)) {
  1205. goto error_exit;
  1206. }
  1207. CnTrace(HBEAT_ERROR, HbTraceHBFromBanishedNode,
  1208. "[HB] Discarding HB from banished node %u on net %u "
  1209. "due to outerscreen %04X. Sending poison packet back.",
  1210. Node->Id, // LOGULONG
  1211. Interface->Network->Id, // LOGULONG
  1212. CurrentOuterscreen.UlongScreen // LOGULONG
  1213. );
  1214. CcmpSendPoisonPacket( Node, NULL, 0, Network, NULL);
  1215. //
  1216. // The node lock was released.
  1217. //
  1218. return;
  1219. }
  1220. //
  1221. // Check that the incoming seq num is something we expect to
  1222. // guard against replay attacks.
  1223. //
  1224. if ( SeqNumber <= Interface->LastSequenceReceived) {
  1225. CnTrace(
  1226. HBEAT_ERROR, HbTraceHBOutOfSequence,
  1227. "[HB] Discarding HB from node %u on net %u with stale seqno %u. "
  1228. "Last seqno %u. Multicast: %!bool!.",
  1229. Node->Id, // LOGULONG
  1230. Interface->Network->Id, // LOGULONG
  1231. SeqNumber, // LOGULONG
  1232. Interface->LastSequenceReceived, // LOGULONG
  1233. Multicast
  1234. );
  1235. MEMLOG( MemLogOutOfSequence, SourceNodeId, SeqNumber );
  1236. goto error_exit;
  1237. }
  1238. // Update the interface's last received seq number
  1239. // which will be sent back as the ack number.
  1240. Interface->LastSequenceReceived = SeqNumber;
  1241. //
  1242. // Compare our seq number to the ack number in the packet.
  1243. // If more than two off then the source node is not recv'ing
  1244. // our heartbeats, but we're receiving theirs. This network is
  1245. // not usable. We ignore this msg to guarantee that we will
  1246. // declare the network down if the condition persists.
  1247. //
  1248. // In addition, if we are sending multicast heartbeats to this
  1249. // interface, revert to unicasts in case there is a multicast
  1250. // problem.
  1251. //
  1252. if (( Interface->SequenceToSend - AckNumber ) > 2 ) {
  1253. CnTrace(HBEAT_ERROR, HbTraceHBWithStaleAck,
  1254. "[HB] Discarding HB from node %u with stale ackno %u. "
  1255. "My seqno %u. Multicast: %!bool!.",
  1256. Node->Id, // LOGULONG
  1257. AckNumber, // LOGULONG
  1258. Interface->SequenceToSend, // LOGULONG
  1259. Multicast
  1260. );
  1261. MEMLOG( MemLogSeqAckMismatch, (ULONG_PTR)Interface, Interface->State );
  1262. if (CnpInterfaceQueryReceivedMulticast(Interface)) {
  1263. CnpInterfaceClearReceivedMulticast(Interface);
  1264. Interface->McastDiscoverCount = CNP_INTERFACE_MCAST_DISCOVERY;
  1265. CnpMulticastChangeNodeReachability(
  1266. Network,
  1267. Node,
  1268. FALSE, // not reachable
  1269. TRUE, // raise event
  1270. NULL // OUT new mask
  1271. );
  1272. }
  1273. goto error_exit;
  1274. }
  1275. MEMLOG4( MemLogReceivedPacket,
  1276. SeqNumber, AckNumber,
  1277. SourceNodeId, Interface->Network->Id );
  1278. CnTrace(HBEAT_EVENT, HbTraceReceivedHBpacket,
  1279. "[HB] Received HB from node %u on net %u, seqno %u, ackno %u, "
  1280. "multicast: %!bool!.",
  1281. SourceNodeId, // LOGULONG
  1282. Interface->Network->Id, // LOGULONG
  1283. SeqNumber, // LOGULONG
  1284. AckNumber, // LOGULONG
  1285. Multicast
  1286. );
  1287. // Reset the interface's and node's Missed HB count
  1288. // to indicate that things are somewhat normal.
  1289. //
  1290. InterlockedExchange(&Interface->MissedHBs, 0);
  1291. //
  1292. // Don't reset node miss count on restricted nets.
  1293. //
  1294. if (!CnpIsNetworkRestricted(Interface->Network)) {
  1295. InterlockedExchange(&Node->MissedHBs, 0);
  1296. }
  1297. //
  1298. // if local interface was previously disconnected (e.g. received
  1299. // a WMI NDIS status media disconnect event), reconnect it now.
  1300. //
  1301. if (CnpIsNetworkLocalDisconn(Interface->Network)) {
  1302. CxReconnectLocalInterface(Interface->Network->Id);
  1303. }
  1304. //
  1305. // move interface to online if necessary
  1306. //
  1307. if ( Interface->State == ClusnetInterfaceStateOnlinePending ||
  1308. Interface->State == ClusnetInterfaceStateUnreachable ) {
  1309. CnAcquireLockAtDpc( &Interface->Network->Lock );
  1310. Interface->Network->Irql = DISPATCH_LEVEL;
  1311. CnTrace(HBEAT_DETAIL, HbTraceInterfaceOnline,
  1312. "[HB] Moving interface for node %u on network %u to online "
  1313. "state.",
  1314. Node->Id, // LOGULONG
  1315. Interface->Network->Id // LOGULONG
  1316. );
  1317. //
  1318. // Initiate multicast discovery.
  1319. //
  1320. Interface->McastDiscoverCount = CNP_INTERFACE_MCAST_DISCOVERY;
  1321. Interface->McastRediscoveryCountdown = 0;
  1322. MEMLOG( MemLogOnlineIf, Node->Id, Interface->State );
  1323. CnpOnlineInterface( Interface );
  1324. CnTrace(HBEAT_EVENT, HbTraceInterfaceUpEvent,
  1325. "[HB] Issuing InterfaceUp event for node %u on network %u.",
  1326. Node->Id, // LOGULONG
  1327. Interface->Network->Id // LOGULONG
  1328. );
  1329. CnIssueEvent(ClusnetEventNetInterfaceUp,
  1330. Node->Id,
  1331. Interface->Network->Id);
  1332. }
  1333. //
  1334. // Indicate that a multicast has been received from this interface.
  1335. // This allows us to include this interface in our multicasts.
  1336. //
  1337. if (Multicast) {
  1338. IF_CNDBG(CN_DEBUG_HBEATS) {
  1339. CNPRINT(("[HB] Received multicast heartbeat on "
  1340. "network %d from source node %d, seq %d, "
  1341. "ack %d.\n",
  1342. Network->Id, SourceNodeId,
  1343. SeqNumber, AckNumber
  1344. ));
  1345. }
  1346. if (!CnpInterfaceQueryReceivedMulticast(Interface)) {
  1347. CnpInterfaceSetReceivedMulticast(Interface);
  1348. CnpMulticastChangeNodeReachability(
  1349. Network,
  1350. Node,
  1351. TRUE, // reachable
  1352. TRUE, // raise event
  1353. NULL // OUT new mask
  1354. );
  1355. }
  1356. // There is no point in sending discovery packets to this
  1357. // interface.
  1358. Interface->McastDiscoverCount = 0;
  1359. Interface->McastRediscoveryCountdown = 0;
  1360. // If the source node's multicast epoch is greater than
  1361. // ours, update. We can make the initial comparison without
  1362. // acquiring the lock.
  1363. if (MulticastEpoch > CxMulticastEpoch) {
  1364. CnpUpdateMulticastEpoch(MulticastEpoch);
  1365. }
  1366. }
  1367. CnReleaseLock( &Node->Lock, Node->Irql );
  1368. //
  1369. // when the first HB is recv'ed, a node may be in either the
  1370. // join or alive state (the sponser, for instance, moves from
  1371. // dead to alive). We need to clear the Node down issued flag
  1372. // for either case. If the MM State is joining, then a node up
  1373. // event must be issued as well. Note that we ignore HBs for
  1374. // node health purposes on restricted nets.
  1375. //
  1376. if ( ( (Node->MMState == ClusnetNodeStateJoining)
  1377. ||
  1378. (Node->MMState == ClusnetNodeStateAlive)
  1379. )
  1380. &&
  1381. Node->NodeDownIssued
  1382. &&
  1383. !CnpIsNetworkRestricted(Interface->Network)
  1384. )
  1385. {
  1386. Node->NodeDownIssued = FALSE;
  1387. MEMLOG( MemLogNodeDownIssued, Node->Id, FALSE );
  1388. if ( Node->MMState == ClusnetNodeStateJoining ) {
  1389. CnTrace(HBEAT_EVENT, HbTraceNodeUpEvent,
  1390. "[HB] Issuing NodeUp event for node %u.",
  1391. Node->Id // LOGULONG
  1392. );
  1393. MEMLOG( MemLogNodeUp, Node->Id, 0 );
  1394. CnIssueEvent( ClusnetEventNodeUp, Node->Id, 0 );
  1395. }
  1396. }
  1397. return;
  1398. error_exit:
  1399. CnReleaseLock( &Node->Lock, Node->Irql );
  1400. return;
  1401. } // CnpReceiveHeartBeatMessage
  1402. NTSTATUS
  1403. CxSetOuterscreen(
  1404. IN ULONG Outerscreen
  1405. )
  1406. {
  1407. //
  1408. // based on the number of valid nodes, make sure any extranious
  1409. // bits are not set
  1410. //
  1411. CnAssert( ClusterDefaultMaxNodes <= 32 );
  1412. CnAssert(
  1413. ( Outerscreen & ( 0xFFFFFFFE << ( 32 - ClusterDefaultMaxNodes - 1 )))
  1414. == 0);
  1415. IF_CNDBG( CN_DEBUG_HBEATS )
  1416. CNPRINT(("[CCMP] Setting outerscreen to %04X\n",
  1417. ((Outerscreen & 0xFF)<< 8) | ((Outerscreen >> 8) & 0xFF)));
  1418. MMOuterscreen.UlongScreen = Outerscreen;
  1419. CnTrace(HBEAT_EVENT, HbTraceSetOuterscreen,
  1420. "[HB] Setting outerscreen to %04X",
  1421. Outerscreen // LOGULONG
  1422. );
  1423. MEMLOG( MemLogOuterscreen, Outerscreen, 0 );
  1424. return STATUS_SUCCESS;
  1425. } // CxSetOuterscreen
  1426. VOID
  1427. CnpTerminateClusterService(
  1428. IN PVOID Parameter
  1429. )
  1430. {
  1431. PWORK_QUEUE_ITEM workQueueItem = Parameter;
  1432. ULONG sourceNodeId = *((PULONG)(workQueueItem + 1));
  1433. WCHAR sourceNodeStringId[ 16 ];
  1434. swprintf(sourceNodeStringId, L"%u", sourceNodeId );
  1435. //
  1436. // only way we can get here right now is if a poison packet was received.
  1437. //
  1438. CnWriteErrorLogEntry(CLNET_NODE_POISONED,
  1439. STATUS_SUCCESS,
  1440. NULL,
  1441. 0,
  1442. 1,
  1443. sourceNodeStringId );
  1444. if ( ClussvcProcessHandle ) {
  1445. //
  1446. // there is still a race condition between the cluster service shutting
  1447. // down and closing this handle and it being used here. This really
  1448. // isn't a problem since the user mode portion is going away anyway.
  1449. // Besides, there isn't alot we can do if this call doesn't work anyway.
  1450. //
  1451. ZwTerminateProcess( ClussvcProcessHandle, STATUS_CLUSTER_POISONED );
  1452. }
  1453. CnFreePool( Parameter );
  1454. } // CnpTerminateClusterService
  1455. VOID
  1456. CnpReceivePoisonPacket(
  1457. IN PCNP_NETWORK Network,
  1458. IN CL_NODE_ID SourceNodeId,
  1459. IN ULONG SeqNumber
  1460. )
  1461. {
  1462. PCNP_NODE Node;
  1463. PCNP_INTERFACE Interface;
  1464. PWORK_QUEUE_ITEM WorkItem;
  1465. //
  1466. // give the node and the network pointers, find the interface on which
  1467. // this packet was received
  1468. //
  1469. Node = CnpFindNode( SourceNodeId );
  1470. if ( Node == NULL ) {
  1471. CnTrace(HBEAT_ERROR, HbTraceNoPoisonFromUnknownNode,
  1472. "[HB] Discarding poison packet from unknown node %u.",
  1473. SourceNodeId // LOGULONG
  1474. );
  1475. return;
  1476. }
  1477. Interface = CnpFindInterfaceLocked( Node, Network );
  1478. if ( Interface == NULL ) {
  1479. //
  1480. // somehow this network object went away while we were
  1481. // receiving some data on it. Just ignore this msg
  1482. //
  1483. CnTrace(HBEAT_ERROR, HbTracePoisonFromUnknownNetwork,
  1484. "[HB] Discarding poison packet from node %u on unknown network.",
  1485. Node->Id // LOGULONG
  1486. );
  1487. MEMLOG( MemLogNoNetID, Node->Id, (ULONG_PTR)Network );
  1488. CnReleaseLock( &Node->Lock, Node->Irql );
  1489. return;
  1490. }
  1491. //
  1492. // Check that the incoming seq num is something we expect to
  1493. // guard against replay attacks.
  1494. //
  1495. if ( SeqNumber <= Interface->LastSequenceReceived) {
  1496. CnTrace(HBEAT_ERROR , HbTracePoisonOutOfSeq,
  1497. "[HB] Discarding poison packet from node %u with stale seqno %u. "
  1498. "Current seqno %u.",
  1499. SourceNodeId, // LOGULONG
  1500. SeqNumber, // LOGULONG
  1501. Interface->LastSequenceReceived // LOGULONG
  1502. );
  1503. MEMLOG( MemLogOutOfSequence, SourceNodeId, SeqNumber );
  1504. CnReleaseLock( &Node->Lock, Node->Irql );
  1505. return;
  1506. }
  1507. //
  1508. // Ignore poison packets from restricted networks
  1509. //
  1510. if (CnpIsNetworkRestricted(Network)) {
  1511. CnTrace(HBEAT_ERROR , HbTracePoisonFromRestrictedNet,
  1512. "[HB] Discarding poison packet from node %u on restricted "
  1513. "network %u.",
  1514. SourceNodeId, // LOGULONG
  1515. Network->Id // LOGULONG
  1516. );
  1517. CnReleaseLock( &Node->Lock, Node->Irql );
  1518. return;
  1519. }
  1520. //
  1521. // We always honor a recv'ed poison packet.
  1522. //
  1523. CnReleaseLock( &Node->Lock, Node->Irql );
  1524. CnTrace(HBEAT_EVENT, HbTracePoisonPktReceived,
  1525. "[HB] Received poison packet from node %u. Halting this node.",
  1526. SourceNodeId // LOGULONG
  1527. );
  1528. MEMLOG( MemLogPoisonPktReceived, SourceNodeId, 0 );
  1529. CnIssueEvent( ClusnetEventPoisonPacketReceived, SourceNodeId, 0 );
  1530. //
  1531. // Shutdown all cluster network processing.
  1532. //
  1533. CnHaltOperation(NULL);
  1534. //
  1535. // allocate a work queue item so we can whack the cluster service
  1536. // process. allocate extra space at the end and stuff the source node ID
  1537. // out there. Yes, I know it is groady...
  1538. //
  1539. WorkItem = CnAllocatePool( sizeof( WORK_QUEUE_ITEM ) + sizeof( CL_NODE_ID ));
  1540. if ( WorkItem != NULL ) {
  1541. *((PULONG)(WorkItem + 1)) = SourceNodeId;
  1542. ExInitializeWorkItem( WorkItem, CnpTerminateClusterService, WorkItem );
  1543. ExQueueWorkItem( WorkItem, CriticalWorkQueue );
  1544. }
  1545. return;
  1546. } // CnpReceivePoisonPacket
  1547. VOID
  1548. CnpLogClussvcHangAndTerminate(
  1549. IN PDEVICE_OBJECT DeviceObject,
  1550. IN PVOID Context
  1551. )
  1552. /*++
  1553. Routine Description:
  1554. This routine logs an entry into system event log about clussvc hang, and terminates the
  1555. clussvc process.
  1556. Arguments:
  1557. None
  1558. Return Value:
  1559. None
  1560. --*/
  1561. {
  1562. WCHAR myStr[40];
  1563. swprintf(myStr, L"%u", ((ClussvcClusnetHbTimeoutTicks * HEART_BEAT_PERIOD)/1000));
  1564. CnWriteErrorLogEntry(
  1565. CLNET_CLUSSVC_HUNG_TERMINATE,
  1566. STATUS_SUCCESS,
  1567. NULL,
  1568. 0,
  1569. 1,
  1570. myStr
  1571. );
  1572. if (ClussvcProcessHandle) {
  1573. ZwTerminateProcess(ClussvcProcessHandle, STATUS_CLUSTER_NODE_DOWN);
  1574. }
  1575. IoFreeWorkItem((PIO_WORKITEM)Context);
  1576. }//CnpLogClussvcHangAndTerminate
  1577. VOID
  1578. CnpLogClussvcHang(
  1579. IN PDEVICE_OBJECT DeviceObject,
  1580. IN PVOID Context
  1581. )
  1582. /*++
  1583. Routine Description:
  1584. This routine logs an entry into system event log about clussvc hang.
  1585. Arguments:
  1586. None
  1587. Return Value:
  1588. None
  1589. --*/
  1590. {
  1591. WCHAR myStr[40];
  1592. swprintf(myStr, L"%u", ((ClussvcClusnetHbTimeoutTicks * HEART_BEAT_PERIOD)/1000));
  1593. CnWriteErrorLogEntry(
  1594. CLNET_CLUSSVC_HUNG,
  1595. STATUS_SUCCESS,
  1596. NULL,
  1597. 0,
  1598. 1,
  1599. myStr
  1600. );
  1601. IoFreeWorkItem((PIO_WORKITEM)Context);
  1602. }//CnpLogClussvcHang
  1603. VOID
  1604. CnpCheckClussvcHang(
  1605. VOID
  1606. )
  1607. /*++
  1608. Routine Description:
  1609. Check for HB ticks from Clussvc, if not disabled, and Tick count has reached max
  1610. then take appropriate action depending on the configured value.
  1611. Arguments:
  1612. None
  1613. Return Value:
  1614. None
  1615. --*/
  1616. {
  1617. ULONG newValue;
  1618. // Check if heartbeating is disabled, then return.
  1619. if((ClussvcClusnetHbTickCount == 0) ||
  1620. (ClussvcClusnetHbTimeoutAction == ClussvcHangActionDisable)) {
  1621. return;
  1622. }
  1623. // Decrement the counter by 1.
  1624. newValue = InterlockedDecrement(&ClussvcClusnetHbTickCount);
  1625. // If this is 1->0 transition we need to do something.
  1626. if(newValue != 0)
  1627. return;
  1628. CnTrace(HBEAT_ERROR , HbTraceClussvcHang,
  1629. "[HB] Clussvc to Clusnet HB Timeout, Timeout=%u DPC ticks, Action=%u.",
  1630. ClussvcClusnetHbTimeoutTicks,
  1631. ClussvcClusnetHbTimeoutAction
  1632. );
  1633. IF_CNDBG( CN_DEBUG_HBEATS ) {
  1634. CNPRINT((
  1635. "[HB] Clussvc to Clusnet HB Timeout, Timeout=%u DPC ticks, Action=%u\n",
  1636. ClussvcClusnetHbTimeoutTicks,
  1637. (ULONG)ClussvcClusnetHbTimeoutAction
  1638. ));
  1639. }
  1640. CnAssert(ClussvcClusnetHbTimeoutAction< ClussvcHangActionMax);
  1641. switch(ClussvcClusnetHbTimeoutAction) {
  1642. case ClussvcHangActionLog:
  1643. // Just log a message and reset ClussvcClusnetHbTickCount to ClussvcClusnetHbTimeoutTicks
  1644. // Use DelayedWorkQueue
  1645. {
  1646. PIO_WORKITEM WorkItem;
  1647. WorkItem = IoAllocateWorkItem(CnDeviceObject);
  1648. if ( WorkItem != NULL ) {
  1649. IoQueueWorkItem(
  1650. WorkItem,
  1651. CnpLogClussvcHang,
  1652. DelayedWorkQueue,
  1653. (PVOID)WorkItem
  1654. );
  1655. }
  1656. InterlockedExchange(&ClussvcClusnetHbTickCount, ClussvcClusnetHbTimeoutTicks);
  1657. }
  1658. break;
  1659. case ClussvcHangActionBugCheckMachine:
  1660. // Bugcheck the machine.
  1661. {
  1662. KeBugCheckEx(
  1663. USER_MODE_HEALTH_MONITOR,
  1664. (ULONG_PTR)((ClussvcProcessHandle != NULL) ? ClussvcProcessObject : NULL),
  1665. (ULONG_PTR)(ClussvcClusnetHbTimeoutSeconds),
  1666. 0,
  1667. 0
  1668. );
  1669. }
  1670. break;
  1671. case ClussvcHangActionTerminateService:
  1672. default:
  1673. // Terminate Cluster Service. Handling is similar to the case as if clusnet has
  1674. // received a poison packet. Using Critical work queue.
  1675. {
  1676. KIRQL irql;
  1677. // If we have already run through this terminate path,
  1678. // then we do not do it again. The workitem will already
  1679. // be on the critical work queue (even if it has not yet
  1680. // executed).
  1681. CnAcquireLock(&HeartBeatLock, &irql);
  1682. if (ClussvcTerminateWorkItem != NULL) {
  1683. PIO_WORKITEM WorkItem;
  1684. // Swap out the workitem.
  1685. WorkItem = ClussvcTerminateWorkItem;
  1686. ClussvcTerminateWorkItem = NULL;
  1687. // Stop outgoing heartbeats.
  1688. ClussvcTerminateStopHbs = TRUE;
  1689. CnReleaseLock(&HeartBeatLock, irql);
  1690. // Issue halt event so clusdisk stops reservations.
  1691. CnIssueEvent(ClusnetEventHalt, 0, 0);
  1692. // Stop normal clusnet activity.
  1693. CnHaltOperation(NULL);
  1694. // Queue the critical workitem to terminate the
  1695. // service process.
  1696. IoQueueWorkItem(
  1697. WorkItem,
  1698. CnpLogClussvcHangAndTerminate,
  1699. CriticalWorkQueue,
  1700. (PVOID)WorkItem
  1701. );
  1702. } else {
  1703. CnReleaseLock(&HeartBeatLock, irql);
  1704. }
  1705. }
  1706. break;
  1707. }
  1708. }//CnpCheckClussvcHang
  1709. VOID
  1710. CnpWalkInterfacesAfterRegroup(
  1711. IN PCNP_INTERFACE Interface
  1712. )
  1713. /*++
  1714. Routine Description:
  1715. Reset counters for each interface after a regroup
  1716. Arguments:
  1717. None
  1718. Return Value:
  1719. None
  1720. --*/
  1721. {
  1722. InterlockedExchange(&Interface->MissedHBs, 0);
  1723. CnReleaseLock(&Interface->Network->Lock, Interface->Network->Irql);
  1724. } // CnpWalkInterfacesAfterRegroup
  1725. BOOLEAN
  1726. CnpWalkNodesAfterRegroup(
  1727. IN PCNP_NODE Node,
  1728. IN PVOID UpdateContext,
  1729. IN CN_IRQL NodeTableIrql
  1730. )
  1731. /*++
  1732. Routine Description:
  1733. Called for each node in the node table. Regroup has finished
  1734. so we clear the node's missed Heart beat count and its node down
  1735. issued flag. No node should be unreachable at this point. If we
  1736. find one, kick off another regroup.
  1737. Arguments:
  1738. standard...
  1739. Return Value:
  1740. None
  1741. --*/
  1742. {
  1743. //
  1744. // check for inconsistent settings of Comm and MM state
  1745. //
  1746. if ( ( Node->MMState == ClusnetNodeStateAlive
  1747. ||
  1748. Node->MMState == ClusnetNodeStateJoining
  1749. )
  1750. &&
  1751. Node->CommState == ClusnetNodeCommStateUnreachable
  1752. )
  1753. {
  1754. CnTrace(HBEAT_EVENT, HbTraceNodeDownEvent2,
  1755. "[HB] Issuing NodeDown event for node %u.",
  1756. Node->Id // LOGULONG
  1757. );
  1758. MEMLOG( MemLogInconsistentStates, Node->Id, Node->MMState );
  1759. CnIssueEvent( ClusnetEventNodeDown, Node->Id, 0 );
  1760. }
  1761. CnpWalkInterfacesOnNode( Node, (PVOID)CnpWalkInterfacesAfterRegroup );
  1762. InterlockedExchange(&Node->MissedHBs, 0);
  1763. //
  1764. // clear this only for nodes in the alive state. Once a node is marked
  1765. // dead, the flag is re-init'ed to true (this is used during a join to
  1766. // issue only one node up event).
  1767. //
  1768. if ( Node->MMState == ClusnetNodeStateAlive ) {
  1769. Node->NodeDownIssued = FALSE;
  1770. MEMLOG( MemLogNodeDownIssued, Node->Id, FALSE );
  1771. }
  1772. CnReleaseLock( &Node->Lock, Node->Irql );
  1773. return TRUE; // the node table lock is still held
  1774. } // CnpWalkNodesAfterRegroup
  1775. VOID
  1776. CxRegroupFinished(
  1777. ULONG NewEventEpoch,
  1778. ULONG NewRegroupEpoch
  1779. )
  1780. /*++
  1781. Routine Description:
  1782. called when regroup has finished. Walk the node list and
  1783. perform the cleanup in the walk routine.
  1784. Arguments:
  1785. None
  1786. Return Value:
  1787. None
  1788. --*/
  1789. {
  1790. MEMLOG( MemLogRegroupFinished, NewEventEpoch, 0 );
  1791. CnTrace(HBEAT_EVENT, HbTraceRegroupFinished,
  1792. "[HB] Regroup finished, new event epoch = %u, "
  1793. "new regroup epoch = %u.",
  1794. NewEventEpoch, // LOGULONG
  1795. NewRegroupEpoch // LOGULONG
  1796. );
  1797. CnAssert( NewEventEpoch >= EventEpoch );
  1798. EventEpoch = NewEventEpoch;
  1799. if (NewRegroupEpoch > CxMulticastEpoch) {
  1800. CnpUpdateMulticastEpoch(NewRegroupEpoch);
  1801. }
  1802. CnpWalkNodeTable( CnpWalkNodesAfterRegroup, NULL );
  1803. } // CxRegroupFinished
  1804. VOID
  1805. CnpUpdateMulticastEpoch(
  1806. ULONG NewEpoch
  1807. )
  1808. /*++
  1809. Routine Description:
  1810. The Multicast Epoch must be monotonically increasing
  1811. and agreed upon by all nodes. It is based on the
  1812. regroup epoch (not to be confused with the ClusNet
  1813. event epoch, which is local to each node).
  1814. It is conceivable for a stale regroup epoch update
  1815. to occur; thus, only update if the new value is
  1816. greater than the current value.
  1817. Arguments:
  1818. NewEpoch - new epoch number
  1819. Return value:
  1820. None
  1821. --*/
  1822. {
  1823. KIRQL irql;
  1824. CnAcquireLock(&HeartBeatLock, &irql);
  1825. if (NewEpoch > CxMulticastEpoch) {
  1826. CnTrace(HBEAT_EVENT, HbTraceUpdateMulticastEpoch,
  1827. "[HB] Updating multicast epoch from %u to %u.",
  1828. CxMulticastEpoch, NewEpoch
  1829. );
  1830. CxMulticastEpoch = NewEpoch;
  1831. }
  1832. CnReleaseLock(&HeartBeatLock, irql);
  1833. } // CnpUpdateMulticastEpoch
  1834. /* end chbeat.c */