Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2082 lines
56 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. chbeat.c
  5. Abstract:
  6. membership state heart beat code. Tracks node availability through
  7. exchanging heart beat messages with nodes that are marked as alive.
  8. Author:
  9. Charlie Wickham (charlwi) 05-Mar-1997
  10. Environment:
  11. Kernel Mode
  12. Revision History:
  13. --*/
  14. #include "precomp.h"
  15. #pragma hdrstop
  16. #include "chbeat.tmh"
  17. #include "clusvmsg.h"
  18. #include "stdio.h"
  19. /* External */
  20. /* Static */
  21. //
  22. // heart beat structures - heart beats are driven by a timer and DPC
  23. // routine. In order to synchronize the shutdown of the DPC, we also need two
  24. // flags, an event and a spin lock.
  25. //
  26. KTIMER HeartBeatTimer;
  27. KDPC HeartBeatDpc;
  28. KEVENT HeartBeatDpcFinished;
  29. BOOLEAN HeartBeatEnabled = FALSE;
  30. BOOLEAN HeartBeatDpcRunning = FALSE;
  31. CN_LOCK HeartBeatLock;
  32. //
  33. // heart beat period in millisecs
  34. //
  35. #define HEART_BEAT_PERIOD 600
  36. #if 0
  37. Heart Beating Explained
  38. ClockTicks are incremented every HEART_BEAT_PERIOD millisecs. SendTicks are the
  39. number of ticks that go by before sending HBs.
  40. The check for received HB msgs is done in the tick just before HB msgs are
  41. sent. Interface Lost HB ticks are in terms of heart beat check periods and
  42. therefore are incremented only during the check period. An interface is failed
  43. when the number of Interface Lost HB ticks have passed and no HB message has
  44. been received on that interface.
  45. Likewise, Node Lost HB Ticks are in terms of heart beat check periods and are
  46. incremented during the check period. After all interfaces have failed on a
  47. node, Node Lost HB ticks must pass without an interface going back online
  48. before a node down event is issued. Note that a node's comm state is set to
  49. offline when all interfaces have failed.
  50. #endif
  51. #define CLUSNET_HEART_BEAT_SEND_TICKS 2 // every 1.2 secs
  52. #define CLUSNET_INTERFACE_LOST_HEART_BEAT_TICKS 3 // after 3 secs
  53. #define CLUSNET_NODE_LOST_HEART_BEAT_TICKS 6 // after 6.6 secs
  54. ULONG HeartBeatClockTicks;
  55. ULONG HeartBeatSendTicks = CLUSNET_HEART_BEAT_SEND_TICKS;
  56. ULONG HBInterfaceLostHBTicks = CLUSNET_INTERFACE_LOST_HEART_BEAT_TICKS;
  57. ULONG HBNodeLostHBTicks = CLUSNET_NODE_LOST_HEART_BEAT_TICKS;
  58. //
  59. // Unicast Heartbeat Data
  60. //
  61. // Even with multicast heartbeats, unicast heartbeats must be supported
  62. // for backwards compatibility.
  63. //
  64. //
  65. // This array records all the nodes that need to have a HB sent to another
  66. // node. This array is not protected by a lock since it is only used with the
  67. // heartbeat DPC routine.
  68. //
  69. typedef struct _INTERFACE_HEARTBEAT_INFO {
  70. CL_NODE_ID NodeId;
  71. CL_NETWORK_ID NetworkId;
  72. ULONG SeqNumber;
  73. ULONG AckNumber;
  74. } INTERFACE_HEARTBEAT_INFO, *PINTERFACE_HEARTBEAT_INFO;
  75. #define InterfaceHBInfoInitialLength 16
  76. #define InterfaceHBInfoLengthIncrement 4
  77. PINTERFACE_HEARTBEAT_INFO InterfaceHeartBeatInfo = NULL;
  78. ULONG InterfaceHBInfoCount; // running count while sending HBs
  79. ULONG InterfaceHBInfoCurrentLength; // current length of HB info array
  80. LARGE_INTEGER HBTime; // HB time in relative sys time
  81. #define MAX_DPC_SKEW ( -HBTime.QuadPart / 2 )
  82. //
  83. // Outerscreen mask. This is set by clussvc's membership manager in user
  84. // mode. As it changes, MM drops down the set outerscreen Ioctl to update
  85. // clusnet's notion of this mask. Clusnet uses this mask to determine the
  86. // validity of a received heart beat. If the sending node is not part
  87. // of the mask, then it is sent a poison packet and the received event
  88. // is not passed on to other consumers. If it is a legetimate PP, then
  89. // we generate the proper event.
  90. //
  91. // Note: MM type definitions and macros have been moved to cnpdef.h for
  92. // general usage.
  93. //
  94. typedef CX_CLUSTERSCREEN CX_OUTERSCREEN;
  95. CX_OUTERSCREEN MMOuterscreen;
  96. // Multicast Heartbeat Data
  97. //
  98. typedef struct _NETWORK_MCAST_HEARTBEAT_INFO {
  99. CL_NETWORK_ID NetworkId;
  100. PCNP_MULTICAST_GROUP McastGroup;
  101. CX_HB_NODE_INFO NodeInfo[ClusterDefaultMaxNodes+ClusterMinNodeId];
  102. CX_CLUSTERSCREEN McastTarget;
  103. } NETWORK_MCAST_HEARTBEAT_INFO, *PNETWORK_MCAST_HEARTBEAT_INFO;
  104. #define NetworkHBInfoInitialLength 4
  105. #define NetworkHBInfoLengthIncrement 4
  106. PNETWORK_MCAST_HEARTBEAT_INFO NetworkHeartBeatInfo = NULL;
  107. ULONG NetworkHBInfoCount; // running count while sending HBs
  108. ULONG NetworkHBInfoCurrentLength; // current length of HB info array
  109. CL_NETWORK_ID MulticastBestNetwork = ClusterAnyNetworkId;
  110. /* Forward */
  111. NTSTATUS
  112. CxInitializeHeartBeat(
  113. void
  114. );
  115. VOID
  116. CxUnloadHeartBeat(
  117. VOID
  118. );
  119. VOID
  120. CnpHeartBeatDpc(
  121. PKDPC DpcObject,
  122. PVOID DeferredContext,
  123. PVOID Arg1,
  124. PVOID Arg2
  125. );
  126. BOOLEAN
  127. CnpWalkNodesToSendHeartBeats(
  128. IN PCNP_NODE UpdateNode,
  129. IN PVOID UpdateContext,
  130. IN CN_IRQL NodeTableIrql
  131. );
  132. BOOLEAN
  133. CnpWalkNodesToCheckForHeartBeats(
  134. IN PCNP_NODE UpdateNode,
  135. IN PVOID UpdateContext,
  136. IN CN_IRQL NodeTableIrql
  137. );
  138. VOID
  139. CnpSendHBs(
  140. IN PCNP_INTERFACE UpdateInterface
  141. );
  142. NTSTATUS
  143. CxSetOuterscreen(
  144. IN ULONG Outerscreen
  145. );
  146. VOID
  147. CnpReceivePoisonPacket(
  148. IN PCNP_NETWORK Network,
  149. IN CL_NODE_ID SourceNodeId,
  150. IN ULONG SeqNumber
  151. );
  152. /* End Forward */
  153. #ifdef ALLOC_PRAGMA
  154. #pragma alloc_text(INIT, CxInitializeHeartBeat)
  155. #pragma alloc_text(PAGE, CxUnloadHeartBeat)
  156. #endif // ALLOC_PRAGMA
  157. NTSTATUS
  158. CxInitializeHeartBeat(
  159. void
  160. )
  161. /*++
  162. Routine Description:
  163. Init the mechanisms used to send and monitor heart beats
  164. Arguments:
  165. None
  166. Return Value:
  167. STATUS_INSUFFICIENT_RESOURCES if allocation fails.
  168. STATUS_SUCCESS otherwise.
  169. --*/
  170. {
  171. // allocate the interface info array
  172. InterfaceHBInfoCount = 0;
  173. InterfaceHBInfoCurrentLength = InterfaceHBInfoInitialLength;
  174. if (InterfaceHBInfoCurrentLength > 0) {
  175. InterfaceHeartBeatInfo = CnAllocatePool(
  176. InterfaceHBInfoCurrentLength
  177. * sizeof(INTERFACE_HEARTBEAT_INFO)
  178. );
  179. if (InterfaceHeartBeatInfo == NULL) {
  180. return(STATUS_INSUFFICIENT_RESOURCES);
  181. }
  182. }
  183. // allocate the network info array
  184. NetworkHBInfoCount = 0;
  185. NetworkHBInfoCurrentLength = NetworkHBInfoInitialLength;
  186. if (NetworkHBInfoCurrentLength > 0) {
  187. NetworkHeartBeatInfo = CnAllocatePool(
  188. NetworkHBInfoCurrentLength
  189. * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
  190. );
  191. if (NetworkHeartBeatInfo == NULL) {
  192. return(STATUS_INSUFFICIENT_RESOURCES);
  193. }
  194. RtlZeroMemory(
  195. NetworkHeartBeatInfo,
  196. NetworkHBInfoCurrentLength * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
  197. );
  198. }
  199. KeInitializeTimer( &HeartBeatTimer );
  200. KeInitializeDpc( &HeartBeatDpc, CnpHeartBeatDpc, NULL );
  201. KeInitializeEvent( &HeartBeatDpcFinished, SynchronizationEvent, FALSE );
  202. CnInitializeLock( &HeartBeatLock, CNP_HBEAT_LOCK );
  203. MEMLOG( MemLogInitHB, 0, 0 );
  204. return(STATUS_SUCCESS);
  205. } // CxInitializeHeartBeat
  206. VOID
  207. CxUnloadHeartBeat(
  208. VOID
  209. )
  210. /*++
  211. Routine Description:
  212. Called during clusnet driver unload. Free any data structures
  213. allocated to send and monitor heartbeats.
  214. Arguments:
  215. None
  216. Return Value:
  217. None
  218. --*/
  219. {
  220. PAGED_CODE();
  221. if (InterfaceHeartBeatInfo != NULL) {
  222. CnFreePool(InterfaceHeartBeatInfo);
  223. InterfaceHeartBeatInfo = NULL;
  224. }
  225. if (NetworkHeartBeatInfo != NULL) {
  226. CnFreePool(NetworkHeartBeatInfo);
  227. NetworkHeartBeatInfo = NULL;
  228. }
  229. return;
  230. } // CxUnloadHeartBeat
  231. VOID
  232. CnpStartHeartBeats(
  233. VOID
  234. )
  235. /*++
  236. Routine Description:
  237. Start heart beating with the nodes that are marked alive and have
  238. an interface marked either OnlinePending or Online.
  239. Arguments:
  240. None
  241. Return Value:
  242. None
  243. --*/
  244. {
  245. BOOLEAN TimerInserted;
  246. CN_IRQL OldIrql;
  247. ULONG period = HEART_BEAT_PERIOD;
  248. CnAcquireLock( &HeartBeatLock, &OldIrql );
  249. HBTime.QuadPart = Int32x32To64( HEART_BEAT_PERIOD, -10000 );
  250. TimerInserted = KeSetTimerEx(&HeartBeatTimer,
  251. HBTime,
  252. HEART_BEAT_PERIOD,
  253. &HeartBeatDpc);
  254. HeartBeatEnabled = TRUE;
  255. CnTrace(HBEAT_EVENT, HbTraceTimerStarted,
  256. "[HB] Heartbeat timer started. Period = %u ms.",
  257. period // LOGULONG
  258. );
  259. MEMLOG( MemLogHBStarted, HEART_BEAT_PERIOD, 0 );
  260. CnReleaseLock( &HeartBeatLock, OldIrql );
  261. } // CnpStartHeartBeats
  262. VOID
  263. CnpStopHeartBeats(
  264. VOID
  265. )
  266. /*++
  267. Routine Description:
  268. Stop heart beating with other nodes in the cluster.
  269. Arguments:
  270. None
  271. Return Value:
  272. None
  273. --*/
  274. {
  275. BOOLEAN TimerCanceled;
  276. CN_IRQL OldIrql;
  277. CnAcquireLock( &HeartBeatLock, &OldIrql );
  278. if (HeartBeatEnabled) {
  279. HeartBeatEnabled = FALSE;
  280. //
  281. // Cancel the periodic timer. Contrary to what the DDK implies,
  282. // this does not cancel the DPC if it is still queued from the
  283. // last timer expiration. It only stops the timer from firing
  284. // again. This is true as of 8/99. See KiTimerListExpire() in
  285. // ntos\ke\dpcsup.c.
  286. //
  287. TimerCanceled = KeCancelTimer( &HeartBeatTimer );
  288. CnTrace(HBEAT_DETAIL, HbTraceTimerCancelled,
  289. "[HB] Heartbeat timer cancelled: %!bool!",
  290. TimerCanceled // LOGBOOLEAN
  291. );
  292. MEMLOG( MemLogHBStopped, 0, 0 );
  293. //
  294. // Remove the DPC associated with the timer from the system DPC
  295. // queue, if it is there. This actually does nothing, because a
  296. // timer DPC is only inserted into the system DPC queue if it is
  297. // bound to a specific processor. Unbound DPCs are executed inline
  298. // on the current processor in the kernel's timer expiration code.
  299. // Note that the object for a periodic timer is reinserted into the
  300. // timer queue before the DPC is excuted. So, it is possible for the
  301. // timer and the associated DPC to be queued simultaneously. This is
  302. // true as of 8/99. See KiTimerListExpire() in ntos\ke\dpcsup.c.
  303. //
  304. // The bottom line is that there is no safe way to synchronize with
  305. // the execution of a timer DPC during driver unload. All we can
  306. // do is ensure that the DPC handler code recognizes that it should
  307. // abort execution immediately and hope that it does so before the
  308. // driver code is unloaded. We do this by setting the HeartBeatEnabled
  309. // flag to False above. If our DPC code happens to be executing at
  310. // this point in time on another processor, as denoted by
  311. // HeartBeatDpcRunning, we wait for it to finish.
  312. //
  313. if ( !KeRemoveQueueDpc( &HeartBeatDpc )) {
  314. CnTrace(HBEAT_DETAIL, HbTraceDpcRunning,
  315. "[HB] DPC not removed. HeartBeatDpcRunning = %!bool!",
  316. HeartBeatDpcRunning // LOGBOOLEAN
  317. );
  318. MEMLOG( MemLogHBDpcRunning, HeartBeatDpcRunning, 0 );
  319. if ( HeartBeatDpcRunning ) {
  320. CnReleaseLock( &HeartBeatLock, OldIrql );
  321. CnTrace(HBEAT_DETAIL, HbWaitForDpcToFinish,
  322. "can't remove DPC; waiting on DPCFinished event"
  323. );
  324. MEMLOG( MemLogWaitForDpcFinish, 0, 0 );
  325. KeWaitForSingleObject(&HeartBeatDpcFinished,
  326. Executive,
  327. KernelMode,
  328. FALSE, // not alertable
  329. NULL); // no timeout
  330. KeClearEvent( &HeartBeatDpcFinished );
  331. return;
  332. }
  333. }
  334. CnTrace(HBEAT_EVENT, HbTraceTimerStopped,
  335. "[HB] Heartbeat timer stopped."
  336. );
  337. }
  338. CnReleaseLock( &HeartBeatLock, OldIrql );
  339. return;
  340. } // CnpStopHeartBeats
  341. VOID
  342. CnpSendMcastHBCompletion(
  343. IN NTSTATUS Status,
  344. IN ULONG BytesSent,
  345. IN PVOID Context,
  346. IN PVOID Buffer
  347. )
  348. /*++
  349. Routine Description:
  350. Called when a mcast heartbeat send request completes
  351. successfully or unsuccessfully. Dereferences the
  352. McastGroup data structure.
  353. Arguments:
  354. Status - status of request
  355. BytesSent - not used
  356. Context - points to multicast group data structure
  357. Buffer - not used
  358. Return value:
  359. None.
  360. --*/
  361. {
  362. PCNP_MULTICAST_GROUP mcastGroup = (PCNP_MULTICAST_GROUP) Context;
  363. CnAssert(mcastGroup != NULL);
  364. CnpDereferenceMulticastGroup(mcastGroup);
  365. return;
  366. } // CnpSendMcastHBCompletion
  367. NTSTATUS
  368. CnpSendMcastHB(
  369. IN PCNP_INTERFACE Interface
  370. )
  371. /*++
  372. Routine Description:
  373. Writes multicast heartbeat data into the NetworkHeartBeatInfo
  374. array for target Interface.
  375. Notes:
  376. Called from DPC with Network and Node locks held.
  377. Returns with Network and Node locks held.
  378. --*/
  379. {
  380. ULONG i;
  381. BOOLEAN networkConnected;
  382. // find the network info structure for this network
  383. for (i = 0; i < NetworkHBInfoCount; i++) {
  384. if (NetworkHeartBeatInfo[i].NetworkId
  385. == Interface->Network->Id) {
  386. break;
  387. }
  388. }
  389. // start a new network info structure, if necessary
  390. if (i == NetworkHBInfoCount) {
  391. // before claiming an entry in the network info array,
  392. // make sure the array is large enough
  393. if (NetworkHBInfoCount >= NetworkHBInfoCurrentLength) {
  394. // need to allocate a new network info array
  395. PNETWORK_MCAST_HEARTBEAT_INFO tempInfo = NULL;
  396. PNETWORK_MCAST_HEARTBEAT_INFO freeInfo = NULL;
  397. ULONG tempLength;
  398. tempLength = NetworkHBInfoCurrentLength
  399. + NetworkHBInfoLengthIncrement;
  400. tempInfo = CnAllocatePool(
  401. tempLength
  402. * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
  403. );
  404. if (tempInfo == NULL) {
  405. CnTrace(
  406. HBEAT_DETAIL, HbNetInfoArrayAllocFailed,
  407. "[HB] Failed to allocate network heartbeat info "
  408. "array of length %u. Cannot schedule heartbeat "
  409. "for node %u on network %u.",
  410. tempLength,
  411. Interface->Node->Id,
  412. Interface->Network->Id
  413. );
  414. // cannot continue. the failure to send this
  415. // heartbeat will not be fatal if we recover
  416. // quickly. if we do not recover, this node
  417. // will be poisoned, which is probably best
  418. // since it is dangerously low on nonpaged pool.
  419. return(STATUS_INSUFFICIENT_RESOURCES);
  420. } else {
  421. // the allocation was successful. establish
  422. // the new array as the heartbeat info
  423. // array.
  424. RtlZeroMemory(
  425. tempInfo,
  426. tempLength * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
  427. );
  428. freeInfo = NetworkHeartBeatInfo;
  429. NetworkHeartBeatInfo = tempInfo;
  430. NetworkHBInfoCurrentLength = tempLength;
  431. if (freeInfo != NULL) {
  432. if (NetworkHBInfoCount > 0) {
  433. RtlCopyMemory(
  434. NetworkHeartBeatInfo,
  435. freeInfo,
  436. NetworkHBInfoCount
  437. * sizeof(NETWORK_MCAST_HEARTBEAT_INFO)
  438. );
  439. }
  440. CnFreePool(freeInfo);
  441. }
  442. CnTrace(
  443. HBEAT_DETAIL, HbNetInfoArrayLengthIncreased,
  444. "[HB] Increased network heartbeat info array "
  445. "to size %u.",
  446. NetworkHBInfoCurrentLength
  447. );
  448. }
  449. }
  450. // increment the current counter
  451. NetworkHBInfoCount++;
  452. // initialize the information for this structure
  453. RtlZeroMemory(
  454. &NetworkHeartBeatInfo[i].McastTarget,
  455. sizeof(NetworkHeartBeatInfo[i].McastTarget)
  456. );
  457. NetworkHeartBeatInfo[i].NetworkId = Interface->Network->Id;
  458. NetworkHeartBeatInfo[i].McastGroup =
  459. Interface->Network->CurrentMcastGroup;
  460. CnpReferenceMulticastGroup(NetworkHeartBeatInfo[i].McastGroup);
  461. }
  462. networkConnected = (BOOLEAN)(!CnpIsNetworkLocalDisconn(Interface->Network));
  463. CnTrace(HBEAT_DETAIL, HbTraceScheduleMcastHBForInterface,
  464. "[HB] Scheduling multicast HB for node %u on network %u "
  465. "(I/F state = %!ifstate!) "
  466. "(interface media connected = %!bool!).",
  467. Interface->Node->Id, // LOGULONG
  468. Interface->Network->Id, // LOGULONG
  469. Interface->State, // LOGIfState
  470. networkConnected
  471. );
  472. // fill in the network info for this node/interface
  473. NetworkHeartBeatInfo[i].NodeInfo[Interface->Node->Id].SeqNumber =
  474. Interface->SequenceToSend;
  475. NetworkHeartBeatInfo[i].NodeInfo[Interface->Node->Id].AckNumber =
  476. Interface->LastSequenceReceived;
  477. CnpClusterScreenInsert(
  478. NetworkHeartBeatInfo[i].McastTarget.ClusterScreen,
  479. INT_NODE(Interface->Node->Id)
  480. );
  481. return(STATUS_SUCCESS);
  482. } // CnpSendMcastHB
  483. NTSTATUS
  484. CnpSendUcastHB(
  485. IN PCNP_INTERFACE Interface
  486. )
  487. /*++
  488. Routine Description:
  489. Writes unicast heartbeat data into the InterfaceHeartBeatInfo
  490. array for target Interface.
  491. Notes:
  492. Called from DPC with Network and Node locks held.
  493. Returns with Network and Node locks held.
  494. --*/
  495. {
  496. BOOLEAN networkConnected;
  497. // before filling an entry in the heartbeat info array,
  498. // make sure the array is large enough.
  499. if (InterfaceHBInfoCount >= InterfaceHBInfoCurrentLength) {
  500. // need to allocate a new heartbeat info array
  501. PINTERFACE_HEARTBEAT_INFO tempInfo = NULL;
  502. PINTERFACE_HEARTBEAT_INFO freeInfo = NULL;
  503. ULONG tempLength;
  504. tempLength = InterfaceHBInfoCurrentLength
  505. + InterfaceHBInfoLengthIncrement;
  506. tempInfo = CnAllocatePool(
  507. tempLength * sizeof(INTERFACE_HEARTBEAT_INFO)
  508. );
  509. if (tempInfo == NULL) {
  510. CnTrace(
  511. HBEAT_DETAIL, HbInfoArrayAllocFailed,
  512. "[HB] Failed to allocate heartbeat info "
  513. "array of length %u. Cannot schedule heartbeat "
  514. "for node %u on network %u.",
  515. tempLength,
  516. Interface->Node->Id,
  517. Interface->Network->Id
  518. );
  519. // cannot continue. the failure to send this
  520. // heartbeat will not be fatal if we recover
  521. // quickly. if we do not recover, this node
  522. // will be poisoned, which is probably best
  523. // since it is dangerously low on nonpaged pool.
  524. return(STATUS_INSUFFICIENT_RESOURCES);
  525. } else {
  526. // the allocation was successful. establish
  527. // the new array as the heartbeat info
  528. // array.
  529. freeInfo = InterfaceHeartBeatInfo;
  530. InterfaceHeartBeatInfo = tempInfo;
  531. InterfaceHBInfoCurrentLength = tempLength;
  532. if (freeInfo != NULL) {
  533. if (InterfaceHBInfoCount > 0) {
  534. RtlCopyMemory(
  535. InterfaceHeartBeatInfo,
  536. freeInfo,
  537. InterfaceHBInfoCount * sizeof(INTERFACE_HEARTBEAT_INFO)
  538. );
  539. }
  540. CnFreePool(freeInfo);
  541. }
  542. CnTrace(
  543. HBEAT_DETAIL, HbInfoArrayLengthIncreased,
  544. "[HB] Increased heartbeat info array to size %u.",
  545. InterfaceHBInfoCurrentLength
  546. );
  547. }
  548. }
  549. networkConnected = (BOOLEAN)(!CnpIsNetworkLocalDisconn(Interface->Network));
  550. CnTrace(HBEAT_DETAIL, HbTraceScheduleHBForInterface,
  551. "[HB] Scheduling HB for node %u on network %u (I/F state = %!ifstate!) "
  552. "(interface media connected = %!bool!).",
  553. Interface->Node->Id, // LOGULONG
  554. Interface->Network->Id, // LOGULONG
  555. Interface->State, // LOGIfState
  556. networkConnected
  557. );
  558. InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].NodeId = Interface->Node->Id;
  559. InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].SeqNumber =
  560. Interface->SequenceToSend;
  561. InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].AckNumber =
  562. Interface->LastSequenceReceived;
  563. InterfaceHeartBeatInfo[ InterfaceHBInfoCount ].NetworkId = Interface->Network->Id;
  564. ++InterfaceHBInfoCount;
  565. return(STATUS_SUCCESS);
  566. } // CnpSendUcastHB
  567. VOID
  568. CnpSendHBs(
  569. IN PCNP_INTERFACE Interface
  570. )
  571. /*++
  572. Routine Description:
  573. If Interface is in the correct state then stuff an entry in
  574. the heartbeat info array. Expand the heartbeat info
  575. array if necessary.
  576. Arguments:
  577. Interface - target interface for heartbeat message
  578. Return Value:
  579. None
  580. --*/
  581. {
  582. BOOLEAN mcastOnly = FALSE;
  583. if ( Interface->State >= ClusnetInterfaceStateUnreachable ) {
  584. // increment the sequence number
  585. (Interface->SequenceToSend)++;
  586. // check if we should include this interface in a
  587. // multicast heartbeat. first we verify that the
  588. // network is multicast capable. then, we include it
  589. // if either of the following conditions are true:
  590. // - we have received a multicast heartbeat from the
  591. // target interface
  592. // - the discovery count (the number of discovery mcasts
  593. // left to send to the target interface) is greater
  594. // than zero
  595. if (CnpIsNetworkMulticastCapable(Interface->Network)) {
  596. if (CnpInterfaceQueryReceivedMulticast(Interface)) {
  597. // write the mcast heartbeat data. if not
  598. // successful, attempt a unicast heartbeat.
  599. if (CnpSendMcastHB(Interface) == STATUS_SUCCESS) {
  600. mcastOnly = TRUE;
  601. }
  602. } else if (Interface->McastDiscoverCount > 0) {
  603. // write the mcast heartbeat data for a
  604. // discovery. if successful, decrement the
  605. // discovery count.
  606. if (CnpSendMcastHB(Interface) == STATUS_SUCCESS) {
  607. --Interface->McastDiscoverCount;
  608. // if the discovery count has reached zero,
  609. // set the rediscovery countdown. this is
  610. // the number of heartbeat periods until we
  611. // try discovery again.
  612. if (Interface->McastDiscoverCount == 0) {
  613. Interface->McastRediscoveryCountdown =
  614. CNP_INTERFACE_MCAST_REDISCOVERY;
  615. }
  616. }
  617. } else if (Interface->McastRediscoveryCountdown > 0) {
  618. // decrement the rediscovery countdown. if we
  619. // reach zero, we will start multicast discovery
  620. // on the next heartbeat to this interface.
  621. if (--Interface->McastRediscoveryCountdown == 0) {
  622. Interface->McastDiscoverCount =
  623. CNP_INTERFACE_MCAST_DISCOVERY;
  624. }
  625. }
  626. }
  627. // write unicast heartbeat data
  628. if (!mcastOnly) {
  629. CnpSendUcastHB(Interface);
  630. }
  631. }
  632. CnReleaseLock(&Interface->Network->Lock, Interface->Network->Irql);
  633. return;
  634. } // CnpSendHBs
  635. VOID
  636. CnpCheckForHBs(
  637. IN PCNP_INTERFACE Interface
  638. )
  639. /*++
  640. Routine Description:
  641. Check if heart beats have been received for this interface
  642. Arguments:
  643. None
  644. Return Value:
  645. None
  646. --*/
  647. {
  648. ULONG MissedHBCount;
  649. BOOLEAN NetworkLockReleased = FALSE;
  650. if ( Interface->State >= ClusnetInterfaceStateUnreachable
  651. && !CnpIsNetworkLocalDisconn(Interface->Network) ) {
  652. MissedHBCount = InterlockedIncrement( &Interface->MissedHBs );
  653. if ( MissedHBCount == 1 ) {
  654. //
  655. // a HB was received in time for this node. Clear the status
  656. // info associated with this interface, but also mark the node
  657. // as having an interface that is ok. Note that we do not
  658. // use HBs on restricted nets to determine node health.
  659. //
  660. if (!CnpIsNetworkRestricted(Interface->Network)) {
  661. Interface->Node->HBWasMissed = FALSE;
  662. }
  663. CnTrace(HBEAT_DETAIL, HbTraceHBReceivedForInterface,
  664. "[HB] A HB was received from node %u on net %u in this "
  665. "period.",
  666. Interface->Node->Id, // LOGULONG
  667. Interface->Network->Id // LOGULONG
  668. );
  669. } else {
  670. CnTrace(HBEAT_EVENT, HbTraceMissedIfHB,
  671. "[HB] HB MISSED for node %u on net %u, missed count %u.",
  672. Interface->Node->Id, // LOGULONG
  673. Interface->Network->Id, // LOGULONG
  674. MissedHBCount // LOGULONG
  675. );
  676. MEMLOG4(
  677. MemLogMissedIfHB,
  678. (ULONG_PTR)Interface, MissedHBCount,
  679. Interface->Node->Id,
  680. Interface->Network->Id
  681. );
  682. if ( MissedHBCount >= HBInterfaceLostHBTicks &&
  683. Interface->State >= ClusnetInterfaceStateOnlinePending ) {
  684. //
  685. // interface is either online pending or online, so move it
  686. // to unreachable. CnpFailInterface will also mark the node
  687. // unreachable if all of the node's interfaces are unreachable.
  688. // CnpFailInterface releases the network object lock as part
  689. // of its duties.
  690. //
  691. CnTrace(HBEAT_DETAIL, HbTraceFailInterface,
  692. "[HB] Moving I/F for node %u on net %u to failed state, "
  693. "previous I/F state = %!ifstate!.",
  694. Interface->Node->Id, // LOGULONG
  695. Interface->Network->Id, // LOGULONG
  696. Interface->State // LOGIfState
  697. );
  698. //
  699. // continuation log entries go before the main entry since
  700. // we scan the log backwards, i.e., we'll hit FailingIf
  701. // before we hit FailingIf1.
  702. //
  703. MEMLOG4(
  704. MemLogFailingIf,
  705. (ULONG_PTR)Interface,
  706. Interface->State,
  707. Interface->Node->Id,
  708. Interface->Network->Id
  709. );
  710. CnpFailInterface( Interface );
  711. NetworkLockReleased = TRUE;
  712. //
  713. // issue a net interface unreachable event to let consumers
  714. // know what is happening
  715. //
  716. CnTrace(HBEAT_EVENT, HbTraceInterfaceUnreachableEvent,
  717. "[HB] Issuing InterfaceUnreachable event for node %u "
  718. "on net %u, previous I/F state = %!ifstate!.",
  719. Interface->Node->Id, // LOGULONG
  720. Interface->Network->Id, // LOGULONG
  721. Interface->State // LOGIfState
  722. );
  723. CnIssueEvent(ClusnetEventNetInterfaceUnreachable,
  724. Interface->Node->Id,
  725. Interface->Network->Id);
  726. }
  727. }
  728. }
  729. if ( !NetworkLockReleased ) {
  730. CnReleaseLock(&Interface->Network->Lock,
  731. Interface->Network->Irql);
  732. }
  733. return;
  734. } // CnpCheckForHBs
  735. BOOLEAN
  736. CnpWalkNodesToSendHeartBeats(
  737. IN PCNP_NODE Node,
  738. IN PVOID UpdateContext,
  739. IN CN_IRQL NodeTableIrql
  740. )
  741. /*++
  742. Routine Description:
  743. Support routine called for each node in the node table. If node is
  744. alive, then we walk its interfaces, performing the appropriate
  745. action.
  746. Arguments:
  747. None
  748. Return Value:
  749. None
  750. --*/
  751. {
  752. //
  753. // If this node is alive and not the local node, then walk its
  754. // interfaces, supplying the appropriate routine to use at this time
  755. //
  756. if ( Node->MMState == ClusnetNodeStateAlive &&
  757. Node != CnpLocalNode ) {
  758. CnTrace(HBEAT_DETAIL, HbTraceScheduleHBForNode,
  759. "[HB] Scheduling HBs for node %u (state = %!mmstate!).",
  760. Node->Id, // LOGULONG
  761. Node->MMState // LOGMmState
  762. );
  763. MEMLOG( MemLogSendHBWalkNode, Node->Id, Node->MMState );
  764. CnpWalkInterfacesOnNode( Node, (PVOID)CnpSendHBs );
  765. }
  766. CnReleaseLock( &Node->Lock, Node->Irql );
  767. return TRUE; // the node table lock is still held
  768. } // CnpWalkNodesToSendHeartBeats
  769. BOOLEAN
  770. CnpWalkNodesToCheckForHeartBeats(
  771. IN PCNP_NODE Node,
  772. IN PVOID UpdateContext,
  773. IN CN_IRQL NodeTableIrql
  774. )
  775. /*++
  776. Routine Description:
  777. heart beat checking routine called for each node in the node table
  778. (except for the local node). If node is alive, then we walk its
  779. interfaces, performing the appropriate action.
  780. Arguments:
  781. None
  782. Return Value:
  783. None
  784. --*/
  785. {
  786. BOOLEAN NodeWasReachable;
  787. ULONG MissedHBCount;
  788. if ( Node->MMState == ClusnetNodeStateAlive &&
  789. Node != CnpLocalNode ) {
  790. //
  791. // this node is alive, so walk its interfaces. Assume the
  792. // worst by setting the HB Missed flag to true and
  793. // have the interfaces prove that this is wrong. Also make
  794. // note of the current unreachable flag setting. If it changes
  795. // this time
  796. //
  797. NodeWasReachable = !CnpIsNodeUnreachable( Node );
  798. Node->HBWasMissed = TRUE;
  799. CnTrace(HBEAT_DETAIL, HbTraceCheckNodeForHeartbeats,
  800. "[HB] Checking for HBs from node %u. WasReachable = %!bool!, "
  801. "state = %!mmstate!.",
  802. Node->Id, // LOGULONG
  803. NodeWasReachable, // LOGBOOLEAN
  804. Node->MMState // LOGMmState
  805. );
  806. MEMLOG( MemLogCheckHBNodeReachable, Node->Id, NodeWasReachable );
  807. MEMLOG( MemLogCheckHBWalkNode, Node->Id, Node->MMState );
  808. CnpWalkInterfacesOnNode( Node, (PVOID)CnpCheckForHBs );
  809. if ( Node->HBWasMissed ) {
  810. //
  811. // no HBs received on any of this node's IFs. if membership
  812. // still thinks this node is alive and the node has been
  813. // unreachable, then note that this node is toast in HB
  814. // info array. This will cause a node down event to be
  815. // generated for this node.
  816. //
  817. MissedHBCount = InterlockedIncrement( &Node->MissedHBs );
  818. CnTrace(HBEAT_EVENT, HbTraceNodeMissedHB,
  819. "[HB] Node %u has missed %u HBs on all interfaces, "
  820. "current state = %!mmstate!.",
  821. Node->Id, // LOGULONG
  822. MissedHBCount, // LOGULONG
  823. Node->MMState // LOGMmState
  824. );
  825. MEMLOG( MemLogCheckHBMissedHB, MissedHBCount, Node->MMState );
  826. //
  827. // if the this node is a either a member or in the process of
  828. // joining AND it's missed too many HBs AND we haven't issued a
  829. // node down, then issue a node down.
  830. //
  831. if ( ( Node->MMState == ClusnetNodeStateAlive
  832. ||
  833. Node->MMState == ClusnetNodeStateJoining
  834. )
  835. && MissedHBCount >= HBNodeLostHBTicks
  836. && !Node->NodeDownIssued
  837. )
  838. {
  839. Node->NodeDownIssued = TRUE;
  840. CnIssueEvent( ClusnetEventNodeDown, Node->Id, 0 );
  841. CnTrace(HBEAT_EVENT, HbTraceNodeDownEvent,
  842. "[HB] Issuing NodeDown event for node %u.",
  843. Node->Id // LOGULONG
  844. );
  845. MEMLOG( MemLogNodeDownIssued, Node->Id, TRUE );
  846. }
  847. }
  848. } else {
  849. MEMLOG( MemLogCheckHBWalkNode, Node->Id, Node->MMState );
  850. }
  851. CnReleaseLock( &Node->Lock, Node->Irql );
  852. return TRUE; // the node table lock is still held
  853. } // CnpWalkNodesToCheckForHeartBeats
  854. VOID
  855. CnpHeartBeatDpc(
  856. PKDPC DpcObject,
  857. PVOID DeferredContext,
  858. PVOID Arg1,
  859. PVOID Arg2
  860. )
  861. /*++
  862. Routine Description:
  863. Start heart beating with the nodes that are marked alive and have
  864. an interface marked either OnlinePending or Online.
  865. Arguments:
  866. None
  867. Return Value:
  868. None
  869. --*/
  870. {
  871. PINTERFACE_HEARTBEAT_INFO pNodeHBInfo;
  872. PNETWORK_MCAST_HEARTBEAT_INFO pMcastHBInfo;
  873. CN_IRQL OldIrql;
  874. #ifdef MEMLOGGING
  875. static LARGE_INTEGER LastSysTime;
  876. LARGE_INTEGER CurrentTime;
  877. LARGE_INTEGER TimeDelta;
  878. //
  879. // try to determine the skew between when we asked to be run and
  880. // the time we actually did run
  881. //
  882. KeQuerySystemTime( &CurrentTime );
  883. if ( LastSysTime.QuadPart != 0 ) {
  884. //
  885. // add in HBTime which is negative due to relative sys time
  886. //
  887. TimeDelta.QuadPart = ( CurrentTime.QuadPart - LastSysTime.QuadPart ) +
  888. HBTime.QuadPart;
  889. if ( TimeDelta.QuadPart > MAX_DPC_SKEW ||
  890. TimeDelta.QuadPart < -MAX_DPC_SKEW
  891. )
  892. {
  893. LONG skew = (LONG)(TimeDelta.QuadPart/10000); // convert to ms
  894. MEMLOG( MemLogDpcTimeSkew, TimeDelta.LowPart, 0 );
  895. CnTrace(HBEAT_EVENT, HbTraceLateDpc,
  896. "[HB] Timer fired %d ms late.",
  897. skew // LOGSLONG
  898. );
  899. }
  900. }
  901. LastSysTime.QuadPart = CurrentTime.QuadPart;
  902. #endif // MEMLOGGING
  903. CnAcquireLock( &HeartBeatLock, &OldIrql );
  904. if ( !HeartBeatEnabled ) {
  905. CnTrace(HBEAT_DETAIL, HbTraceSetDpcEvent,
  906. "DPC: setting HeartBeatDpcFinished event"
  907. );
  908. MEMLOG( MemLogSetDpcEvent, 0, 0 );
  909. KeSetEvent( &HeartBeatDpcFinished, 0, FALSE );
  910. CnReleaseLock( &HeartBeatLock, OldIrql );
  911. return;
  912. }
  913. HeartBeatDpcRunning = TRUE;
  914. CnReleaseLock( &HeartBeatLock, OldIrql );
  915. if ( HeartBeatClockTicks == 0 ||
  916. HeartBeatClockTicks == HeartBeatSendTicks) {
  917. //
  918. // time to send HBs. Clear the count of target interfaces
  919. // and walk the node table finding the nodes that are
  920. // marked alive.
  921. //
  922. NetworkHBInfoCount = 0;
  923. InterfaceHBInfoCount = 0;
  924. CnpWalkNodeTable( CnpWalkNodesToSendHeartBeats, NULL );
  925. //
  926. // run down the list of networks and send out any multicast
  927. // heartbeats.
  928. //
  929. pMcastHBInfo = NetworkHeartBeatInfo;
  930. while ( NetworkHBInfoCount-- ) {
  931. CnTrace(
  932. HBEAT_EVENT, HbTraceSendMcastHB,
  933. "[HB] Sending multicast HB on net %u.\n",
  934. pMcastHBInfo->NetworkId
  935. );
  936. CxSendMcastHeartBeatMessage(
  937. pMcastHBInfo->NetworkId,
  938. pMcastHBInfo->McastGroup,
  939. pMcastHBInfo->McastTarget,
  940. pMcastHBInfo->NodeInfo,
  941. CnpSendMcastHBCompletion,
  942. pMcastHBInfo->McastGroup
  943. );
  944. ++pMcastHBInfo;
  945. }
  946. //
  947. // now run down the list of interfaces that we compiled and
  948. // send any unicast packets
  949. //
  950. pNodeHBInfo = InterfaceHeartBeatInfo;
  951. while ( InterfaceHBInfoCount-- ) {
  952. CnTrace(HBEAT_EVENT, HbTraceSendHB,
  953. "[HB] Sending HB to node %u on net %u, seqno %u, ackno %u.",
  954. pNodeHBInfo->NodeId, // LOGULONG
  955. pNodeHBInfo->NetworkId, // LOGULONG
  956. pNodeHBInfo->SeqNumber, // LOGULONG
  957. pNodeHBInfo->AckNumber // LOGULONG
  958. );
  959. CxSendHeartBeatMessage(pNodeHBInfo->NodeId,
  960. pNodeHBInfo->SeqNumber,
  961. pNodeHBInfo->AckNumber,
  962. pNodeHBInfo->NetworkId);
  963. MEMLOG(
  964. MemLogSendingHB,
  965. pNodeHBInfo->NodeId,
  966. pNodeHBInfo->NetworkId
  967. );
  968. ++pNodeHBInfo;
  969. }
  970. //
  971. // finally, up the tick count, progressing to the next potential
  972. // work item
  973. //
  974. HeartBeatClockTicks++;
  975. } else if ( HeartBeatClockTicks >= ( HeartBeatSendTicks - 1 )) {
  976. //
  977. // walk the node table looking for lack of heart beats on
  978. // a node's set of interfaces.
  979. //
  980. CnpWalkNodeTable( CnpWalkNodesToCheckForHeartBeats, NULL );
  981. HeartBeatClockTicks = 0;
  982. } else {
  983. HeartBeatClockTicks++;
  984. }
  985. //
  986. // indicate that we're no longer running and if we're shutting down
  987. // then set the event that the shutdown thread is waiting on
  988. //
  989. CnAcquireLock( &HeartBeatLock, &OldIrql );
  990. HeartBeatDpcRunning = FALSE;
  991. if ( !HeartBeatEnabled ) {
  992. KeSetEvent( &HeartBeatDpcFinished, 0, FALSE );
  993. CnTrace(HBEAT_DETAIL, HbTraceSetDpcEvent2,
  994. "DPC: setting HeartBeatDpcFinished event (2)"
  995. );
  996. MEMLOG( MemLogSetDpcEvent, 0, 0 );
  997. }
  998. CnReleaseLock( &HeartBeatLock, OldIrql );
  999. } // CnpHeartBeatDpc
  1000. PCNP_INTERFACE
  1001. CnpFindInterfaceLocked(
  1002. IN PCNP_NODE Node,
  1003. IN PCNP_NETWORK Network
  1004. )
  1005. /*++
  1006. Routine Description:
  1007. Given node and network structure pointers, find the interface
  1008. structure. Similar to CnpFindInterface except that we're passing
  1009. in pointers instead of IDs.
  1010. Arguments:
  1011. Node - pointer to node struct that sent the packet
  1012. Network - pointer to Network struct on which packet was received
  1013. Return Value:
  1014. Pointer to Interface on which packet was recv'd, otherwise NULL
  1015. --*/
  1016. {
  1017. PLIST_ENTRY IfEntry;
  1018. PCNP_INTERFACE Interface;
  1019. CnVerifyCpuLockMask(CNP_NODE_OBJECT_LOCK, // Required
  1020. 0, // Forbidden
  1021. CNP_NETWORK_OBJECT_LOCK_MAX // Maximum
  1022. );
  1023. for (IfEntry = Node->InterfaceList.Flink;
  1024. IfEntry != &(Node->InterfaceList);
  1025. IfEntry = IfEntry->Flink
  1026. )
  1027. {
  1028. Interface = CONTAINING_RECORD(IfEntry,
  1029. CNP_INTERFACE,
  1030. NodeLinkage);
  1031. if ( Interface->Network == Network ) {
  1032. break;
  1033. }
  1034. }
  1035. if ( IfEntry == &Node->InterfaceList ) {
  1036. return NULL;
  1037. } else {
  1038. return Interface;
  1039. }
  1040. } // CnpFindInterfaceLocked
  1041. VOID
  1042. CnpReceiveHeartBeatMessage(
  1043. IN PCNP_NETWORK Network,
  1044. IN CL_NODE_ID SourceNodeId,
  1045. IN ULONG SeqNumber,
  1046. IN ULONG AckNumber,
  1047. IN BOOLEAN Multicast
  1048. )
  1049. /*++
  1050. Routine Description:
  1051. We received a heartbeat from a node on a network. Reset
  1052. the missed HB count on that network's interface.
  1053. Arguments:
  1054. Network - pointer to network block on which the packet was received
  1055. SourceNodeId - node number that issued the packet
  1056. SeqNumber - sending nodes' sequence num
  1057. AckNumber - last seq number sent by us that was seen at the sending node
  1058. Multicast - indicates whether this heartbeat was received in a multicast
  1059. Return Value:
  1060. None
  1061. --*/
  1062. {
  1063. PCNP_NODE Node;
  1064. PCNP_INTERFACE Interface;
  1065. CX_OUTERSCREEN CurrentOuterscreen;
  1066. BOOLEAN IssueNetInterfaceUpEvent = FALSE;
  1067. //
  1068. // we ignore all packets until we're part of the cluster
  1069. //
  1070. CurrentOuterscreen.UlongScreen = InterlockedExchange(
  1071. &MMOuterscreen.UlongScreen,
  1072. MMOuterscreen.UlongScreen);
  1073. if ( !CnpClusterScreenMember(
  1074. CurrentOuterscreen.ClusterScreen,
  1075. INT_NODE( CnLocalNodeId )
  1076. )
  1077. )
  1078. {
  1079. return;
  1080. }
  1081. //
  1082. // convert the Node ID into a pointer and find the interface
  1083. // on which the packet was received.
  1084. //
  1085. Node = CnpFindNode( SourceNodeId );
  1086. CnAssert( Node != NULL );
  1087. Interface = CnpFindInterfaceLocked( Node, Network );
  1088. if ( Interface == NULL ) {
  1089. //
  1090. // somehow this network object went away while we were
  1091. // receiving some data on it. Just ignore this msg
  1092. //
  1093. CnTrace(HBEAT_ERROR, HbTraceHBFromUnknownNetwork,
  1094. "[HB] Discarding HB from node %u on an unknown network.",
  1095. Node->Id // LOGULONG
  1096. );
  1097. MEMLOG( MemLogNoNetID, Node->Id, (ULONG_PTR)Network );
  1098. goto error_exit;
  1099. }
  1100. //
  1101. // determine if this is guy is legit. If not in the outerscreen,
  1102. // then send a poison packet and we're done
  1103. //
  1104. if ( !CnpClusterScreenMember(
  1105. CurrentOuterscreen.ClusterScreen,
  1106. INT_NODE( SourceNodeId )
  1107. )
  1108. )
  1109. {
  1110. //
  1111. // Don't bother sending poison packets on restricted networks. They
  1112. // will be ignored.
  1113. //
  1114. if (CnpIsNetworkRestricted(Interface->Network)) {
  1115. goto error_exit;
  1116. }
  1117. CnTrace(HBEAT_ERROR, HbTraceHBFromBanishedNode,
  1118. "[HB] Discarding HB from banished node %u on net %u "
  1119. "due to outerscreen %04X. Sending poison packet back.",
  1120. Node->Id, // LOGULONG
  1121. Interface->Network->Id, // LOGULONG
  1122. CurrentOuterscreen.UlongScreen // LOGULONG
  1123. );
  1124. CcmpSendPoisonPacket( Node, NULL, 0, Network, NULL);
  1125. //
  1126. // The node lock was released.
  1127. //
  1128. return;
  1129. }
  1130. //
  1131. // Indicate that a multicast has been received from this interface.
  1132. // This allows us to include this interface in our multicasts.
  1133. //
  1134. if (Multicast) {
  1135. IF_CNDBG(CN_DEBUG_HBEATS) {
  1136. CNPRINT(("[HB] Received multicast heartbeat on "
  1137. "network %d from source node %d, seq %d, "
  1138. "ack %d.\n",
  1139. Network->Id, SourceNodeId,
  1140. SeqNumber, AckNumber
  1141. ));
  1142. }
  1143. if (!CnpInterfaceQueryReceivedMulticast(Interface)) {
  1144. CnpInterfaceSetReceivedMulticast(Interface);
  1145. CnpMulticastChangeNodeReachability(
  1146. Network,
  1147. Node,
  1148. TRUE, // reachable
  1149. TRUE, // raise event
  1150. NULL // OUT new mask
  1151. );
  1152. }
  1153. // There is no point in sending discovery packets to this
  1154. // interface.
  1155. Interface->McastDiscoverCount = 0;
  1156. Interface->McastRediscoveryCountdown = 0;
  1157. }
  1158. //
  1159. // Check that the incoming seq num is something we expect to
  1160. // guard against replay attacks.
  1161. //
  1162. if ( SeqNumber <= Interface->LastSequenceReceived) {
  1163. CnTrace(
  1164. HBEAT_ERROR, HbTraceHBOutOfSequence,
  1165. "[HB] Discarding HB from node %u on net %u with stale seqno %u. "
  1166. "Last seqno %u. Multicast: %!bool!.",
  1167. Node->Id, // LOGULONG
  1168. Interface->Network->Id, // LOGULONG
  1169. SeqNumber, // LOGULONG
  1170. Interface->LastSequenceReceived, // LOGULONG
  1171. Multicast
  1172. );
  1173. MEMLOG( MemLogOutOfSequence, SourceNodeId, SeqNumber );
  1174. goto error_exit;
  1175. }
  1176. // Update the interface's last received seq number
  1177. // which will be sent back as the ack number.
  1178. Interface->LastSequenceReceived = SeqNumber;
  1179. //
  1180. // Compare our seq number to the ack number in the packet.
  1181. // If more than two off then the source node is not recv'ing
  1182. // our heartbeats, but we're receiving theirs. This network is
  1183. // not usable. We ignore this msg to guarantee that we will
  1184. // declare the network down if the condition persists.
  1185. //
  1186. // In addition, if we are sending multicast heartbeats to this
  1187. // interface, revert to unicasts in case there is a multicast
  1188. // problem.
  1189. //
  1190. if (( Interface->SequenceToSend - AckNumber ) > 2 ) {
  1191. CnTrace(HBEAT_ERROR, HbTraceHBWithStaleAck,
  1192. "[HB] Discarding HB from node %u with stale ackno %u. "
  1193. "My seqno %u. Multicast: %!bool!.",
  1194. Node->Id, // LOGULONG
  1195. AckNumber, // LOGULONG
  1196. Interface->SequenceToSend, // LOGULONG
  1197. Multicast
  1198. );
  1199. MEMLOG( MemLogSeqAckMismatch, (ULONG_PTR)Interface, Interface->State );
  1200. if (CnpInterfaceQueryReceivedMulticast(Interface)) {
  1201. CnpInterfaceClearReceivedMulticast(Interface);
  1202. Interface->McastDiscoverCount = CNP_INTERFACE_MCAST_DISCOVERY;
  1203. CnpMulticastChangeNodeReachability(
  1204. Network,
  1205. Node,
  1206. FALSE, // not reachable
  1207. TRUE, // raise event
  1208. NULL // OUT new mask
  1209. );
  1210. }
  1211. goto error_exit;
  1212. }
  1213. MEMLOG4( MemLogReceivedPacket,
  1214. SeqNumber, AckNumber,
  1215. SourceNodeId, Interface->Network->Id );
  1216. CnTrace(HBEAT_EVENT, HbTraceReceivedHBpacket,
  1217. "[HB] Received HB from node %u on net %u, seqno %u, ackno %u, "
  1218. "multicast: %!bool!.",
  1219. SourceNodeId, // LOGULONG
  1220. Interface->Network->Id, // LOGULONG
  1221. SeqNumber, // LOGULONG
  1222. AckNumber, // LOGULONG
  1223. Multicast
  1224. );
  1225. // Reset the interface's and node's Missed HB count
  1226. // to indicate that things are somewhat normal.
  1227. //
  1228. InterlockedExchange( &Interface->MissedHBs, 0 );
  1229. //
  1230. // Don't reset node miss count on restricted nets.
  1231. //
  1232. if (!CnpIsNetworkRestricted(Interface->Network)) {
  1233. InterlockedExchange( &Node->MissedHBs, 0 );
  1234. }
  1235. //
  1236. // if local interface was previously disconnected (e.g. received
  1237. // a WMI NDIS status media disconnect event), reconnect it now.
  1238. //
  1239. if (CnpIsNetworkLocalDisconn(Interface->Network)) {
  1240. CxReconnectLocalInterface(Interface->Network->Id);
  1241. }
  1242. //
  1243. // move interface to online if necessary
  1244. //
  1245. if ( Interface->State == ClusnetInterfaceStateOnlinePending ||
  1246. Interface->State == ClusnetInterfaceStateUnreachable ) {
  1247. CnAcquireLockAtDpc( &Interface->Network->Lock );
  1248. Interface->Network->Irql = DISPATCH_LEVEL;
  1249. CnTrace(HBEAT_DETAIL, HbTraceInterfaceOnline,
  1250. "[HB] Moving interface for node %u on network %u to online "
  1251. "state.",
  1252. Node->Id, // LOGULONG
  1253. Interface->Network->Id // LOGULONG
  1254. );
  1255. MEMLOG( MemLogOnlineIf, Node->Id, Interface->State );
  1256. //
  1257. // Events acquire the IO cancel spin lock so we do this after
  1258. // node and network locks have been released and only if we're
  1259. // moving from unreachable.
  1260. //
  1261. IssueNetInterfaceUpEvent = TRUE;
  1262. CnpOnlineInterface( Interface );
  1263. }
  1264. CnReleaseLock( &Node->Lock, Node->Irql );
  1265. if ( IssueNetInterfaceUpEvent ) {
  1266. CnTrace(HBEAT_EVENT, HbTraceInterfaceUpEvent,
  1267. "[HB] Issuing InterfaceUp event for node %u on network %u.",
  1268. Node->Id, // LOGULONG
  1269. Interface->Network->Id // LOGULONG
  1270. );
  1271. CnIssueEvent(ClusnetEventNetInterfaceUp,
  1272. Node->Id,
  1273. Interface->Network->Id);
  1274. }
  1275. //
  1276. // when the first HB is recv'ed, a node may be in either the
  1277. // join or alive state (the sponser, for instance, moves from
  1278. // dead to alive). We need to clear the Node down issued flag
  1279. // for either case. If the MM State is joining, then a node up
  1280. // event must be issued as well. Note that we ignore HBs for
  1281. // node health purposes on restricted nets.
  1282. //
  1283. if ( ( (Node->MMState == ClusnetNodeStateJoining)
  1284. ||
  1285. (Node->MMState == ClusnetNodeStateAlive)
  1286. )
  1287. &&
  1288. Node->NodeDownIssued
  1289. &&
  1290. !CnpIsNetworkRestricted(Interface->Network)
  1291. )
  1292. {
  1293. Node->NodeDownIssued = FALSE;
  1294. MEMLOG( MemLogNodeDownIssued, Node->Id, FALSE );
  1295. if ( Node->MMState == ClusnetNodeStateJoining ) {
  1296. CnTrace(HBEAT_EVENT, HbTraceNodeUpEvent,
  1297. "[HB] Issuing NodeUp event for node %u.",
  1298. Node->Id // LOGULONG
  1299. );
  1300. MEMLOG( MemLogNodeUp, Node->Id, 0 );
  1301. CnIssueEvent( ClusnetEventNodeUp, Node->Id, 0 );
  1302. }
  1303. }
  1304. return;
  1305. error_exit:
  1306. CnReleaseLock( &Node->Lock, Node->Irql );
  1307. return;
  1308. } // CnpReceiveHeartBeatMessage
  1309. NTSTATUS
  1310. CxSetOuterscreen(
  1311. IN ULONG Outerscreen
  1312. )
  1313. {
  1314. //
  1315. // based on the number of valid nodes, make sure any extranious
  1316. // bits are not set
  1317. //
  1318. CnAssert( ClusterDefaultMaxNodes <= 32 );
  1319. CnAssert(
  1320. ( Outerscreen & ( 0xFFFFFFFE << ( 32 - ClusterDefaultMaxNodes - 1 )))
  1321. == 0);
  1322. IF_CNDBG( CN_DEBUG_HBEATS )
  1323. CNPRINT(("[CCMP] Setting outerscreen to %04X\n",
  1324. ((Outerscreen & 0xFF)<< 8) | ((Outerscreen >> 8) & 0xFF)));
  1325. InterlockedExchange( &MMOuterscreen.UlongScreen, Outerscreen );
  1326. CnTrace(HBEAT_EVENT, HbTraceSetOuterscreen,
  1327. "[HB] Setting outerscreen to %04X",
  1328. Outerscreen // LOGULONG
  1329. );
  1330. MEMLOG( MemLogOuterscreen, Outerscreen, 0 );
  1331. return STATUS_SUCCESS;
  1332. } // CxSetOuterscreen
  1333. VOID
  1334. CnpTerminateClusterService(
  1335. IN PVOID Parameter
  1336. )
  1337. {
  1338. PWORK_QUEUE_ITEM workQueueItem = Parameter;
  1339. ULONG sourceNodeId = *((PULONG)(workQueueItem + 1));
  1340. WCHAR sourceNodeStringId[ 16 ];
  1341. swprintf(sourceNodeStringId, L"%u", sourceNodeId );
  1342. //
  1343. // only way we can get here right now is if a poison packet was received.
  1344. //
  1345. CnWriteErrorLogEntry(CLNET_NODE_POISONED,
  1346. STATUS_SUCCESS,
  1347. NULL,
  1348. 0,
  1349. 1,
  1350. sourceNodeStringId );
  1351. if ( ClussvcProcessHandle ) {
  1352. //
  1353. // there is still a race condition between the cluster service shutting
  1354. // down and closing this handle and it being used here. This really
  1355. // isn't a problem since the user mode portion is going away anyway.
  1356. // Besides, there isn't alot we can do if this call doesn't work anyway.
  1357. //
  1358. ZwTerminateProcess( ClussvcProcessHandle, STATUS_CLUSTER_POISONED );
  1359. }
  1360. CnFreePool( Parameter );
  1361. } // CnpTerminateClusterService
  1362. VOID
  1363. CnpReceivePoisonPacket(
  1364. IN PCNP_NETWORK Network,
  1365. IN CL_NODE_ID SourceNodeId,
  1366. IN ULONG SeqNumber
  1367. )
  1368. {
  1369. PCNP_NODE Node;
  1370. PCNP_INTERFACE Interface;
  1371. PWORK_QUEUE_ITEM WorkItem;
  1372. //
  1373. // give the node and the network pointers, find the interface on which
  1374. // this packet was received
  1375. //
  1376. Node = CnpFindNode( SourceNodeId );
  1377. if ( Node == NULL ) {
  1378. CnTrace(HBEAT_ERROR, HbTraceNoPoisonFromUnknownNode,
  1379. "[HB] Discarding poison packet from unknown node %u.",
  1380. Node->Id // LOGULONG
  1381. );
  1382. return;
  1383. }
  1384. Interface = CnpFindInterfaceLocked( Node, Network );
  1385. if ( Interface == NULL ) {
  1386. //
  1387. // somehow this network object went away while we were
  1388. // receiving some data on it. Just ignore this msg
  1389. //
  1390. CnTrace(HBEAT_ERROR, HbTracePoisonFromUnknownNetwork,
  1391. "[HB] Discarding poison packet from node %u on unknown network.",
  1392. Node->Id // LOGULONG
  1393. );
  1394. MEMLOG( MemLogNoNetID, Node->Id, (ULONG_PTR)Network );
  1395. CnReleaseLock( &Node->Lock, Node->Irql );
  1396. return;
  1397. }
  1398. //
  1399. // Check that the incoming seq num is something we expect to
  1400. // guard against replay attacks.
  1401. //
  1402. if ( SeqNumber <= Interface->LastSequenceReceived) {
  1403. CnTrace(HBEAT_ERROR , HbTracePoisonOutOfSeq,
  1404. "[HB] Discarding poison packet from node %u with stale seqno %u. "
  1405. "Current seqno %u.",
  1406. SourceNodeId, // LOGULONG
  1407. SeqNumber, // LOGULONG
  1408. Interface->LastSequenceReceived // LOGULONG
  1409. );
  1410. MEMLOG( MemLogOutOfSequence, SourceNodeId, SeqNumber );
  1411. CnReleaseLock( &Node->Lock, Node->Irql );
  1412. return;
  1413. }
  1414. //
  1415. // Ignore poison packets from restricted networks
  1416. //
  1417. if (CnpIsNetworkRestricted(Network)) {
  1418. CnTrace(HBEAT_ERROR , HbTracePoisonFromRestrictedNet,
  1419. "[HB] Discarding poison packet from node %u on restricted "
  1420. "network %u.",
  1421. SourceNodeId, // LOGULONG
  1422. Network->Id // LOGULONG
  1423. );
  1424. CnReleaseLock( &Node->Lock, Node->Irql );
  1425. return;
  1426. }
  1427. //
  1428. // We always honor a recv'ed poison packet.
  1429. //
  1430. CnReleaseLock( &Node->Lock, Node->Irql );
  1431. CnTrace(HBEAT_EVENT, HbTracePoisonPktReceived,
  1432. "[HB] Received poison packet from node %u. Halting this node.",
  1433. SourceNodeId // LOGULONG
  1434. );
  1435. MEMLOG( MemLogPoisonPktReceived, SourceNodeId, 0 );
  1436. CnIssueEvent( ClusnetEventPoisonPacketReceived, SourceNodeId, 0 );
  1437. //
  1438. // Shutdown all cluster network processing.
  1439. //
  1440. CnHaltOperation(NULL);
  1441. //
  1442. // allocate a work queue item so we can whack the cluster service
  1443. // process. allocate extra space at the end and stuff the source node ID
  1444. // out there. Yes, I know it is groady...
  1445. //
  1446. WorkItem = CnAllocatePool( sizeof( WORK_QUEUE_ITEM ) + sizeof( CL_NODE_ID ));
  1447. if ( WorkItem != NULL ) {
  1448. *((PULONG)(WorkItem + 1)) = SourceNodeId;
  1449. ExInitializeWorkItem( WorkItem, CnpTerminateClusterService, WorkItem );
  1450. ExQueueWorkItem( WorkItem, CriticalWorkQueue );
  1451. }
  1452. return;
  1453. } // CnpReceivePoisonPacket
  1454. VOID
  1455. CnpWalkInterfacesAfterRegroup(
  1456. IN PCNP_INTERFACE Interface
  1457. )
  1458. /*++
  1459. Routine Description:
  1460. Reset counters for each interface after a regroup
  1461. Arguments:
  1462. None
  1463. Return Value:
  1464. None
  1465. --*/
  1466. {
  1467. InterlockedExchange( &Interface->MissedHBs, 0 );
  1468. CnReleaseLock(&Interface->Network->Lock, Interface->Network->Irql);
  1469. } // CnpWalkInterfacesAfterRegroup
  1470. BOOLEAN
  1471. CnpWalkNodesAfterRegroup(
  1472. IN PCNP_NODE Node,
  1473. IN PVOID UpdateContext,
  1474. IN CN_IRQL NodeTableIrql
  1475. )
  1476. /*++
  1477. Routine Description:
  1478. Called for each node in the node table. Regroup has finished
  1479. so we clear the node's missed Heart beat count and its node down
  1480. issued flag. No node should be unreachable at this point. If we
  1481. find one, kick off another regroup.
  1482. Arguments:
  1483. standard...
  1484. Return Value:
  1485. None
  1486. --*/
  1487. {
  1488. //
  1489. // check for inconsistent settings of Comm and MM state
  1490. //
  1491. if ( ( Node->MMState == ClusnetNodeStateAlive
  1492. ||
  1493. Node->MMState == ClusnetNodeStateJoining
  1494. )
  1495. &&
  1496. Node->CommState == ClusnetNodeCommStateUnreachable
  1497. )
  1498. {
  1499. CnTrace(HBEAT_EVENT, HbTraceNodeDownEvent2,
  1500. "[HB] Issuing NodeDown event for node %u.",
  1501. Node->Id // LOGULONG
  1502. );
  1503. MEMLOG( MemLogInconsistentStates, Node->Id, Node->MMState );
  1504. CnIssueEvent( ClusnetEventNodeDown, Node->Id, 0 );
  1505. }
  1506. CnpWalkInterfacesOnNode( Node, (PVOID)CnpWalkInterfacesAfterRegroup );
  1507. InterlockedExchange( &Node->MissedHBs, 0 );
  1508. //
  1509. // clear this only for nodes in the alive state. Once a node is marked
  1510. // dead, the flag is re-init'ed to true (this is used during a join to
  1511. // issue only one node up event).
  1512. //
  1513. if ( Node->MMState == ClusnetNodeStateAlive ) {
  1514. Node->NodeDownIssued = FALSE;
  1515. MEMLOG( MemLogNodeDownIssued, Node->Id, FALSE );
  1516. }
  1517. CnReleaseLock( &Node->Lock, Node->Irql );
  1518. return TRUE; // the node table lock is still held
  1519. } // CnpWalkNodesAfterRegroup
  1520. VOID
  1521. CxRegroupFinished(
  1522. ULONG NewEpoch
  1523. )
  1524. /*++
  1525. Routine Description:
  1526. called when regroup has finished. Walk the node list and
  1527. perform the cleanup in the walk routine.
  1528. Arguments:
  1529. None
  1530. Return Value:
  1531. None
  1532. --*/
  1533. {
  1534. MEMLOG( MemLogRegroupFinished, NewEpoch, 0 );
  1535. CnTrace(HBEAT_EVENT, HbTraceRegroupFinished,
  1536. "[HB] Regroup finished, new epoch = %u.",
  1537. NewEpoch // LOGULONG
  1538. );
  1539. CnAssert( NewEpoch > EventEpoch );
  1540. InterlockedExchange( &EventEpoch, NewEpoch );
  1541. CnpWalkNodeTable( CnpWalkNodesAfterRegroup, NULL );
  1542. } // CxRegroupFinished
  1543. /* end chbeat.c */