Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

663 lines
17 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. gumevent.c
  5. Abstract:
  6. Cluster event handling routines for the Global Update Manager
  7. Author:
  8. John Vert (jvert) 22-Apr-1996
  9. Revision History:
  10. --*/
  11. #include "gump.h"
  12. #include <bitset.h>
  13. //
  14. // Event handling is divided into two parts, sync and async. The sync
  15. // part is executed by all nodes during phase 4 cleanup of the regroup.
  16. // The sync handler must be very fast, since we run in the context of
  17. // of the regroup thread.
  18. //
  19. // The async part is executed as a work thread and we finish handling
  20. // nodes down event.
  21. //
  22. // Flag to denote if we need to replay the last update in the async.
  23. // event handler.
  24. static BOOL GumReplay = FALSE;
  25. //
  26. // Flag to denote if we are in the middle of a dispatch
  27. //
  28. static BOOL GumUpdatePending = FALSE;
  29. DWORD
  30. GumpGetNodeGenNum(PGUM_INFO GumInfo, DWORD NodeId)
  31. /*++
  32. Routine Description:
  33. Return current generation number for specified node. If node is already dead,
  34. we return the previous generation number so that furture calls to
  35. gumwaitnodedown, gumdispatchstart, gumdispatchend fail without checking if
  36. the node is alive or dead.
  37. Arguments:
  38. NodeId - Node number
  39. Return Value:
  40. Node's current generation number
  41. --*/
  42. {
  43. DWORD dwCur;
  44. CL_ASSERT(NodeId < NmMaxNodeId);
  45. EnterCriticalSection(&GumpLock);
  46. dwCur = GumNodeGeneration[NodeId];
  47. if (GumInfo->ActiveNode[NodeId] == FALSE) {
  48. //
  49. // Node is already dead, return previous sequence number
  50. //
  51. dwCur--;
  52. }
  53. LeaveCriticalSection(&GumpLock);
  54. return (dwCur);
  55. }
  56. void
  57. GumpWaitNodeDown(DWORD NodeId, DWORD Gennum)
  58. /*++
  59. Routine Description:
  60. Wait till specified node has transitioned into down event.
  61. Arguments:
  62. NodeId - node id
  63. Gennum - node's generation number before down event
  64. Return Value:
  65. ERROR_SUCCESS
  66. --*/
  67. {
  68. CL_ASSERT(NodeId < NmMaxNodeId);
  69. EnterCriticalSection(&GumpLock);
  70. if (Gennum != GumNodeGeneration[NodeId]) {
  71. LeaveCriticalSection(&GumpLock);
  72. return;
  73. }
  74. //
  75. // Increment the waiter count, then go wait on the semaphore.
  76. //
  77. ++GumNodeWait[NodeId].WaiterCount;
  78. LeaveCriticalSection(&GumpLock);
  79. WaitForSingleObject(GumNodeWait[NodeId].hSemaphore, INFINITE);
  80. }
  81. BOOL
  82. GumpDispatchStart(DWORD NodeId, DWORD Gennum)
  83. /*++
  84. Routine Description:
  85. Mark start of a dispatch. If the generation number supplied is
  86. old, we fail the dispatch since the node has transitioned.
  87. Arguments:
  88. NodeId - node id
  89. Gennum - node's generation number before down event
  90. Return Value:
  91. TRUE - node state is fine, go ahead with dispatch
  92. FLASE - node has transitioned, abort dispatch
  93. --*/
  94. {
  95. //
  96. // If the sequence number has changed return False, else
  97. // return true.
  98. CL_ASSERT(NodeId < NmMaxNodeId);
  99. EnterCriticalSection(&GumpLock);
  100. if (Gennum != GumNodeGeneration[NodeId]) {
  101. LeaveCriticalSection(&GumpLock);
  102. return (FALSE);
  103. }
  104. //
  105. // Signal that we are in the middle of an update
  106. //
  107. GumUpdatePending = TRUE;
  108. LeaveCriticalSection(&GumpLock);
  109. return (TRUE);
  110. }
  111. void
  112. GumpDispatchAbort()
  113. /*++
  114. Routine Description:
  115. Abort and mark end of current dispatch. Just reset the pending flag.
  116. This is used when the dispatch routine failed, and we don't need to
  117. replay it for other nodes.
  118. Arguments:
  119. none
  120. Return Value:
  121. none
  122. --*/
  123. {
  124. EnterCriticalSection(&GumpLock);
  125. GumUpdatePending = FALSE;
  126. LeaveCriticalSection(&GumpLock);
  127. }
  128. void
  129. GumpDispatchEnd(DWORD NodeId, DWORD Gennum)
  130. /*++
  131. Routine Description:
  132. Mark end of a dispatch. If the generation number supplied is
  133. old and we need to reapply update, we replay update for
  134. other nodes.
  135. Arguments:
  136. NodeId - node id
  137. Gennum - node's generation number before down event
  138. Return Value:
  139. none
  140. --*/
  141. {
  142. //
  143. // If the sequence number has changed while the
  144. // update was happening, we need to replay it
  145. CL_ASSERT(NodeId < NmMaxNodeId);
  146. EnterCriticalSection(&GumpLock);
  147. GumUpdatePending = FALSE;
  148. if (Gennum != GumNodeGeneration[NodeId] && GumReplay) {
  149. GumReplay = FALSE;
  150. LeaveCriticalSection(&GumpLock);
  151. GumpReUpdate();
  152. } else {
  153. LeaveCriticalSection(&GumpLock);
  154. }
  155. }
  156. DWORD
  157. WINAPI
  158. GumpEventHandler(
  159. IN CLUSTER_EVENT Event,
  160. IN PVOID Context
  161. )
  162. /*++
  163. Routine Description:
  164. Processes nodes down cluster events. Replay last update and wake up
  165. any pending threads.
  166. Arguments:
  167. Event - Supplies the type of cluster event.
  168. Context - Supplies the event-specific context
  169. Return Value:
  170. ERROR_SUCCESS
  171. --*/
  172. {
  173. BITSET DownedNodes = (BITSET)((ULONG_PTR)Context);
  174. DWORD NodeId;
  175. if (Event != CLUSTER_EVENT_NODE_DOWN_EX) {
  176. return(ERROR_SUCCESS);
  177. }
  178. CL_ASSERT(BitsetIsNotMember(NmLocalNodeId, DownedNodes));
  179. EnterCriticalSection(&GumpLock);
  180. ClRtlLogPrint(LOG_NOISE,
  181. "[GUM] Nodes down: %1!04X!. Locker=%2!u!, Locking=%3!d!\n",
  182. DownedNodes,
  183. GumpLockerNode,
  184. GumpLockingNode
  185. );
  186. //
  187. //since all gum updates are synchronized and the last buffer
  188. //and last update type are shared across all updates, we dont have
  189. //to reissue the update for all types, only for the last update type.
  190. //SS: note we we use the last GumInfo structure for now since GumInfo
  191. //structures are still maintained for everygum update type
  192. if ( GumReplay && GumUpdatePending == FALSE)
  193. {
  194. // XXX: These should be if statements and panic this node instead.
  195. CL_ASSERT(GumpLockerNode == NmLocalNodeId);
  196. CL_ASSERT(GumpLockingNode == NmLocalNodeId);
  197. GumReplay = FALSE;
  198. LeaveCriticalSection(&GumpLock);
  199. GumpReUpdate();
  200. } else {
  201. LeaveCriticalSection(&GumpLock);
  202. }
  203. ClRtlLogPrint(LOG_NOISE,
  204. "[GUM] Node down processing completed: %1!04X!.\n",
  205. DownedNodes
  206. );
  207. return(ERROR_SUCCESS);
  208. }
  209. DWORD
  210. WINAPI
  211. GumpSyncEventHandler(
  212. IN CLUSTER_EVENT Event,
  213. IN PVOID Context
  214. )
  215. /*++
  216. Routine Description:
  217. Processes nodes down cluster events. Update locker/locking nodes
  218. state and decide if we need to replay last update in async handler.
  219. Arguments:
  220. Event - Supplies the type of cluster event.
  221. Context - Supplies the event-specific context
  222. Return Value:
  223. ERROR_SUCCESS
  224. --*/
  225. {
  226. BITSET DownedNodes = (BITSET)((ULONG_PTR)Context);
  227. DWORD NodeId;
  228. if (Event != CLUSTER_EVENT_NODE_DOWN_EX) {
  229. return(ERROR_SUCCESS);
  230. }
  231. CL_ASSERT(BitsetIsNotMember(NmLocalNodeId, DownedNodes));
  232. EnterCriticalSection(&GumpLock);
  233. ClRtlLogPrint(LOG_NOISE,
  234. "[GUM] Sync Nodes down: %1!04X!. Locker=%2!u!, Locking=%3!d!\n",
  235. DownedNodes,
  236. GumpLockerNode,
  237. GumpLockingNode
  238. );
  239. //
  240. // remove downed nodes from any further GUM updates
  241. //
  242. for(NodeId = ClusterMinNodeId; NodeId <= NmMaxNodeId; ++NodeId) {
  243. if (BitsetIsMember(NodeId, DownedNodes))
  244. {
  245. GUM_UPDATE_TYPE UpdateType;
  246. for (UpdateType = 0; UpdateType < GumUpdateMaximum; UpdateType++)
  247. {
  248. GumTable[UpdateType].ActiveNode[NodeId] = FALSE;
  249. }
  250. //
  251. // Advance node generation number
  252. //
  253. GumNodeGeneration[NodeId]++;
  254. }
  255. }
  256. //
  257. // Update LockerNode/LockingNode if necessary
  258. //
  259. //since all gum updates are synchronized and the last buffer
  260. //and last update type are shared across all updates, we dont have
  261. //to reissue the update for all types, only for the last update type.
  262. //SS: note we we use the last GumInfo structure for now since GumInfo
  263. //structures are still maintained for everygum update type
  264. //SS: Should we be inspecting GumpLockingNode after acquiring the lock
  265. //Else, s_GumUnlockUpdate can hand over the lock to a node on the waiter list
  266. //while the gumsync handler hands it to itself(ie selects itself as the LockingNode)
  267. //Now that we have added the generation number for lock acquisition and also we
  268. //acquire GumpLock in obtaining a lock that should be prevented.
  269. //For now,we will leave this as is.
  270. if ( (GumpLockerNode == NmLocalNodeId) &&
  271. (BitsetIsMember(GumpLockingNode, DownedNodes)) )
  272. {
  273. EnterCriticalSection(&GumpUpdateLock);
  274. //
  275. // This node is the locker and the lock is currently held
  276. // by one of the failed nodes. Take ownership of the lock and
  277. // reissue the update to all remaining nodes.
  278. //
  279. ClRtlLogPrint(LOG_NOISE,
  280. "[GUM] GumpEventHandler taking ownership of the lock from the node %1!d!.\n",
  281. GumpLockingNode
  282. );
  283. GumpLockingNode = NmLocalNodeId;
  284. LeaveCriticalSection(&GumpUpdateLock);
  285. //
  286. // Reissue update in async phase.
  287. //
  288. GumReplay = TRUE;
  289. }
  290. else if ( BitsetIsMember(GumpLockerNode, DownedNodes) )
  291. {
  292. //
  293. // One of the failed nodes was the locker node, so select a new
  294. // locker node now.
  295. //
  296. // Find the node with the next ID after the previous locker node.
  297. //
  298. DWORD j;
  299. for (j=GumpLockerNode+1; j != GumpLockerNode; j++) {
  300. if (j==(NmMaxNodeId+1)) {
  301. j = ClusterMinNodeId;
  302. CL_ASSERT(j != GumpLockerNode);
  303. }
  304. if (GumTable[0].ActiveNode[j]) {
  305. ClRtlLogPrint(LOG_NOISE,
  306. "[GUM] GumpEventHandler New Locker node is node %1!d!\n",
  307. j);
  308. GumpLockerNode = j;
  309. break;
  310. }
  311. }
  312. //
  313. // If this node has been promoted to be the new locker node,
  314. // reissue the last update we saw.
  315. //
  316. if (GumpLockerNode == NmLocalNodeId)
  317. {
  318. //
  319. // Manually acquire the lock here. The update has already
  320. // been issued on this node.
  321. //
  322. EnterCriticalSection(&GumpUpdateLock);
  323. CL_ASSERT(GumpLockingNode == (DWORD)-1);
  324. GumpLockingNode = NmLocalNodeId;
  325. LeaveCriticalSection(&GumpUpdateLock);
  326. //
  327. // Reissue update in async phase.
  328. //
  329. GumReplay = TRUE;
  330. }
  331. }
  332. //
  333. // Wake any threads waiting for the nodes to transition to down.
  334. //
  335. for(NodeId = ClusterMinNodeId; NodeId <= NmMaxNodeId; ++NodeId) {
  336. if (BitsetIsMember(NodeId, DownedNodes))
  337. {
  338. if (GumNodeWait[NodeId].WaiterCount != 0) {
  339. ReleaseSemaphore(GumNodeWait[NodeId].hSemaphore,
  340. GumNodeWait[NodeId].WaiterCount,
  341. NULL);
  342. GumNodeWait[NodeId].WaiterCount = 0;
  343. }
  344. }
  345. }
  346. ClRtlLogPrint(LOG_NOISE,
  347. "[GUM] Sync Nodes down processing completed: %1!04X!.\n",
  348. DownedNodes
  349. );
  350. LeaveCriticalSection(&GumpLock);
  351. return(ERROR_SUCCESS);
  352. }
  353. VOID
  354. GumpReUpdate(
  355. VOID
  356. )
  357. /*++
  358. Routine Description:
  359. Reissues a GUM update to all nodes. This is used in the event of
  360. a failure.
  361. Arguments:
  362. None
  363. Return Value:
  364. None
  365. --*/
  366. {
  367. DWORD MyId = NmGetNodeId(NmLocalNode);
  368. DWORD i, seq;
  369. DWORD Status;
  370. RPC_ASYNC_STATE AsyncState;
  371. // This node must be the locker.
  372. // The lock must be held, and it must be held by this node
  373. //
  374. CL_ASSERT(GumpLockerNode == MyId);
  375. CL_ASSERT(GumpLockingNode == MyId);
  376. ZeroMemory((PVOID) &AsyncState, sizeof(RPC_ASYNC_STATE));
  377. AsyncState.u.hEvent = CreateEvent(
  378. NULL, // no attributes
  379. TRUE, // manual reset
  380. FALSE, // initial state unsignalled
  381. NULL // no object name
  382. );
  383. if (AsyncState.u.hEvent == NULL) {
  384. Status = GetLastError();
  385. ClRtlLogPrint(LOG_CRITICAL,
  386. "[GUM] GumpReUpdate: Failed to allocate event object for async "
  387. "RPC call, status %1!u!\n",
  388. Status
  389. );
  390. //
  391. // The gum lock still needs to be freed since it is always acquired before this function is called.
  392. //
  393. goto ReleaseLock;
  394. }
  395. //
  396. // Grab the sendupdate lock to serialize with a concurrent update on
  397. // on this node. Note also that it is SAFEST to read all the GumpXXX global
  398. // variables only after getting the sendupdate lock, else you run into the
  399. // danger of s_GumUpdateNode changing the values of the variables after you
  400. // read them.
  401. //
  402. EnterCriticalSection(&GumpSendUpdateLock);
  403. seq = GumpSequence - 1;
  404. LeaveCriticalSection(&GumpSendUpdateLock);
  405. //
  406. // If there is no valid update to be propagated. The gum lock still needs to be freed since it is
  407. // always acquired before this function is called.
  408. //
  409. if (GumpLastUpdateType == GumUpdateMaximum) goto ReleaseLock;
  410. again:
  411. ClRtlLogPrint(LOG_UNUSUAL,
  412. "[GUM] GumpReUpdate reissuing last update for send type %1!d!\n",
  413. GumpLastUpdateType);
  414. for (i=MyId+1; i != NmLocalNodeId; i++) {
  415. if (i == (NmMaxNodeId +1)) {
  416. i=ClusterMinNodeId;
  417. if (i == NmLocalNodeId) {
  418. break;
  419. }
  420. }
  421. if (GumTable[GumpLastUpdateType].ActiveNode[i]) {
  422. //
  423. // Dispatch the update to the specified node.
  424. //
  425. ClRtlLogPrint(LOG_NOISE,
  426. "[GUM] GumpReUpdate: Dispatching seq %1!u!\ttype %2!u! context %3!u! to node %4!d!\n",
  427. seq,
  428. GumpLastUpdateType,
  429. GumpLastContext,
  430. i);
  431. if (GumpLastBufferValid != FALSE) {
  432. Status = GumpUpdateRemoteNode(
  433. &AsyncState,
  434. i,
  435. GumpLastUpdateType,
  436. GumpLastContext,
  437. seq,
  438. GumpLastBufferLength,
  439. GumpLastBuffer
  440. );
  441. }
  442. else {
  443. // replay end join
  444. // since we also ignore other updates, we should
  445. // be calling gumupdatenode for those..however
  446. // calling gumjoinupdatenode seems to do the job
  447. // for signalling the other nodes to bump up
  448. // their sequence number without processing the update
  449. try {
  450. NmStartRpc(i);
  451. Status = GumJoinUpdateNode(GumpReplayRpcBindings[i],
  452. -1, // signal replay
  453. GumpLastUpdateType,
  454. GumpLastContext,
  455. seq,
  456. GumpLastBufferLength,
  457. GumpLastBuffer);
  458. NmEndRpc(i);
  459. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  460. NmEndRpc(i);
  461. Status = GetExceptionCode();
  462. }
  463. }
  464. //
  465. // If the update on the other node failed, then the
  466. // other node must now be out of the cluster since the
  467. // update has already completed on the locker node.
  468. //
  469. if (Status != ERROR_SUCCESS && Status != ERROR_CLUSTER_DATABASE_SEQMISMATCH) {
  470. ClRtlLogPrint(LOG_CRITICAL,
  471. "[GUM] GumpReUpdate: Update on node %1!d! failed with %2!d! when it must succeed\n",
  472. i,
  473. Status);
  474. NmDumpRpcExtErrorInfo(Status);
  475. GumpCommFailure(&GumTable[GumpLastUpdateType],
  476. i,
  477. Status,
  478. TRUE);
  479. }
  480. }
  481. }
  482. //
  483. // At this point we know that all nodes don't have received our replay
  484. // and no outstanding sends are in progress. However, a send could have
  485. // arrived in this node(via s_UpdateNode) and the sender died after that.
  486. // At that point we are the only node that has it. Since we are the locker
  487. // and lockingnode we have to replay again if that happened.
  488. EnterCriticalSection(&GumpSendUpdateLock);
  489. if (seq != (GumpSequence - 1)) {
  490. seq = GumpSequence - 1;
  491. LeaveCriticalSection(&GumpSendUpdateLock);
  492. goto again;
  493. }
  494. LeaveCriticalSection(&GumpSendUpdateLock);
  495. if (AsyncState.u.hEvent != NULL) {
  496. CloseHandle(AsyncState.u.hEvent);
  497. }
  498. ReleaseLock:
  499. //
  500. // The update has been delivered to all nodes. Unlock now.
  501. //
  502. GumpDoUnlockingUpdate(GumpLastUpdateType, GumpSequence-1, NmLocalNodeId,
  503. GumNodeGeneration[NmLocalNodeId]);
  504. }