Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1467 lines
44 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. send.c
  5. Abstract:
  6. Routines for sending global updates to the cluster
  7. Author:
  8. John Vert (jvert) 17-Apr-1996
  9. Revision History:
  10. --*/
  11. #include "gump.h"
  12. DWORD
  13. GumSendUpdate(
  14. IN GUM_UPDATE_TYPE UpdateType,
  15. IN DWORD Context,
  16. IN DWORD BufferLength,
  17. IN PVOID Buffer
  18. )
  19. /*++
  20. Routine Description:
  21. Sends an update to all active nodes in the cluster. All
  22. registered update handlers for the specified UpdateType
  23. are called on each node. Any registered update handlers
  24. for the current node will be called on the same thread.
  25. This is useful for correct synchronization of the data
  26. structures to be updated.
  27. Arguments:
  28. UpdateType - Supplies the type of update. This determines
  29. which update handlers will be called and the sequence
  30. number to be used.
  31. Context - Supplies a DWORD of context to be passed to the
  32. GUM update handlers
  33. BufferLength - Supplies the length of the update buffer to
  34. be passed to the update handlers
  35. Buffer - Supplies a pointer to the update buffer to be passed
  36. to the update handlers.
  37. Return Value:
  38. ERROR_SUCCESS if the request is successful.
  39. Win32 error code on failure.
  40. --*/
  41. {
  42. DWORD Sequence;
  43. DWORD Status=RPC_S_OK;
  44. DWORD i;
  45. PGUM_INFO GumInfo;
  46. DWORD MyNodeId;
  47. DWORD LockerNode;
  48. CL_ASSERT(UpdateType < GumUpdateMaximum);
  49. GumInfo = &GumTable[UpdateType];
  50. MyNodeId = NmGetNodeId(NmLocalNode);
  51. // Grab an RPC handle
  52. GumpStartRpc(MyNodeId);
  53. retryLock:
  54. LockerNode = GumpLockerNode;
  55. //
  56. // Send locking update to the locker node.
  57. //
  58. if (LockerNode == MyNodeId) {
  59. //
  60. // This node is the locker.
  61. //
  62. ClRtlLogPrint(LOG_NOISE,
  63. "[GUM] GumSendUpdate: Locker waiting\t\ttype %1!u! context %2!u!\n",
  64. UpdateType,
  65. Context);
  66. Status = GumpDoLockingUpdate(UpdateType, MyNodeId, &Sequence);
  67. if (Status == ERROR_SUCCESS) {
  68. ClRtlLogPrint(LOG_NOISE,
  69. "[GUM] GumSendUpdate: Locker dispatching seq %1!u!\ttype %2!u! context %3!u!\n",
  70. Sequence,
  71. UpdateType,
  72. Context);
  73. Status = GumpDispatchUpdate(UpdateType,
  74. Context,
  75. TRUE,
  76. TRUE,
  77. BufferLength,
  78. Buffer);
  79. if (Status != ERROR_SUCCESS) {
  80. //
  81. // Note we have to use Sequence-1 for the unlock because GumpDispatchUpdate
  82. // failed and did not increment the sequence number.
  83. //
  84. GumpDoUnlockingUpdate(UpdateType, Sequence-1);
  85. }
  86. }
  87. } else {
  88. // CL_ASSERT(GumpRpcBindings[i] != NULL);
  89. ClRtlLogPrint(LOG_NOISE,
  90. "[GUM] GumSendUpdate: queuing update\ttype %1!u! context %2!u!\n",
  91. UpdateType,
  92. Context);
  93. try {
  94. NmStartRpc(LockerNode);
  95. Status = GumQueueLockingUpdate(GumpRpcBindings[LockerNode],
  96. MyNodeId,
  97. UpdateType,
  98. Context,
  99. &Sequence,
  100. BufferLength,
  101. Buffer);
  102. NmEndRpc(LockerNode);
  103. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  104. //
  105. // An exception from RPC indicates that the other node is either dead
  106. // or insane. Kill it and retry with a new locker.
  107. //
  108. NmEndRpc(LockerNode);
  109. GumpCommFailure(GumInfo,
  110. LockerNode,
  111. GetExceptionCode(),
  112. TRUE);
  113. //
  114. // The GUM update handler must have been called to select a new locker
  115. // node.
  116. //
  117. CL_ASSERT(LockerNode != GumpLockerNode);
  118. //
  119. // Retry the locking update with the new locker node.
  120. //
  121. goto retryLock;
  122. }
  123. if (Status == ERROR_SUCCESS) {
  124. CL_ASSERT(Sequence == GumpSequence);
  125. }
  126. if(Status != RPC_S_OK) {
  127. NmDumpRpcExtErrorInfo(Status);
  128. }
  129. //because there is no synchronization between join and regroups/gumprocessing
  130. //the old locker node may die and may come up again and not be the locker
  131. //anymore. We have to take care of this case.
  132. if (Status == ERROR_CLUSTER_GUM_NOT_LOCKER)
  133. {
  134. goto retryLock;
  135. }
  136. }
  137. if (Status != ERROR_SUCCESS) {
  138. ClRtlLogPrint(LOG_UNUSUAL,
  139. "[GUM] Queued lock attempt for send type %1!d! failed %2!d!\n",
  140. UpdateType,
  141. Status);
  142. // signal end of RPC handle
  143. GumpEndRpc(MyNodeId);
  144. return(Status);
  145. }
  146. //
  147. // Grap the sendupdate lock to serialize with any replays
  148. //
  149. EnterCriticalSection(&GumpSendUpdateLock);
  150. if (LockerNode != GumpLockerNode) {
  151. //
  152. // Locker node changed, we need to restart again.
  153. //
  154. LeaveCriticalSection(&GumpSendUpdateLock);
  155. goto retryLock;
  156. }
  157. //
  158. // The update is now committed on the locker node. All remaining nodes
  159. // must be updated successfully, or they will be killed.
  160. //
  161. for (i=LockerNode+1; i != LockerNode; i++) {
  162. if (i == (NmMaxNodeId + 1)) {
  163. i=ClusterMinNodeId;
  164. if (i==LockerNode) {
  165. break;
  166. }
  167. }
  168. if (GumInfo->ActiveNode[i]) {
  169. //
  170. // Dispatch the update to the specified node.
  171. //
  172. ClRtlLogPrint(LOG_NOISE,
  173. "[GUM] GumSendUpdate: Dispatching seq %1!u!\ttype %2!u! context %3!u! to node %4!d!\n",
  174. Sequence,
  175. UpdateType,
  176. Context,
  177. i);
  178. if (i == MyNodeId) {
  179. Status = GumpDispatchUpdate(UpdateType,
  180. Context,
  181. FALSE,
  182. TRUE,
  183. BufferLength,
  184. Buffer);
  185. if (Status != ERROR_SUCCESS){
  186. ClRtlLogPrint(LOG_CRITICAL,
  187. "[GUM] GumSendUpdate: Update on non-locker node(self) failed with %1!d! when it must succeed\n",
  188. Status);
  189. //Commit Suicide
  190. CsInconsistencyHalt(Status);
  191. }
  192. } else {
  193. DWORD Status;
  194. ClRtlLogPrint(LOG_NOISE,
  195. "[GUM] GumSendUpdate: Locker updating seq %1!u!\ttype %2!u! context %3!u!\n",
  196. Sequence,
  197. UpdateType,
  198. Context);
  199. try {
  200. NmStartRpc(i);
  201. Status = GumUpdateNode(GumpRpcBindings[i],
  202. UpdateType,
  203. Context,
  204. Sequence,
  205. BufferLength,
  206. Buffer);
  207. NmEndRpc(i);
  208. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  209. NmEndRpc(i);
  210. Status = GetExceptionCode();
  211. }
  212. //
  213. // If the update on the other node failed, then the
  214. // other node must now be out of the cluster since the
  215. // update has already completed on the locker node.
  216. //
  217. if (Status != ERROR_SUCCESS) {
  218. ClRtlLogPrint(LOG_CRITICAL,
  219. "[GUM] GumSendUpdate: Update on node %1!d! failed with %2!d! when it must succeed\n",
  220. i,
  221. Status);
  222. NmDumpRpcExtErrorInfo(Status);
  223. GumpCommFailure(GumInfo,
  224. i,
  225. Status,
  226. TRUE);
  227. }
  228. }
  229. }
  230. }
  231. //
  232. // Our update is over
  233. //
  234. LeaveCriticalSection(&GumpSendUpdateLock);
  235. //
  236. // All nodes have been updated. Send unlocking update.
  237. //
  238. if (LockerNode == MyNodeId) {
  239. GumpDoUnlockingUpdate(UpdateType, Sequence);
  240. } else {
  241. try {
  242. NmStartRpc(LockerNode);
  243. GumUnlockUpdate(
  244. GumpRpcBindings[LockerNode],
  245. UpdateType,
  246. Sequence
  247. );
  248. NmEndRpc(LockerNode);
  249. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  250. //
  251. // The locker node has crashed. Notify the NM, it will call our
  252. // notification routine to select a new locker node. Then retry
  253. // the unlock on the new locker node.
  254. // SS: changed to not retry unlocks..the new locker node will
  255. // unlock after propagating this change in any case.
  256. //
  257. NmEndRpc(LockerNode);
  258. Status = GetExceptionCode();
  259. ClRtlLogPrint(LOG_CRITICAL,
  260. "[GUM] GumSendUpdate: Unlocking update to node %1!d! failed with %2!d!\n",
  261. LockerNode,
  262. Status);
  263. GumpCommFailure(GumInfo,
  264. LockerNode,
  265. Status,
  266. TRUE);
  267. CL_ASSERT(LockerNode != GumpLockerNode);
  268. }
  269. if(Status != RPC_S_OK) {
  270. NmDumpRpcExtErrorInfo(Status);
  271. }
  272. }
  273. ClRtlLogPrint(LOG_NOISE,
  274. "[GUM] GumSendUpdate: completed update seq %1!u!\ttype %2!u! context %3!u!\n",
  275. Sequence,
  276. UpdateType,
  277. Context);
  278. // signal end of RPC handle
  279. GumpEndRpc(MyNodeId);
  280. return(ERROR_SUCCESS);
  281. }
  282. #ifdef GUM_POST_SUPPORT
  283. John Vert (jvert) 11/18/1996
  284. POST is disabled for now since nobody uses it.
  285. N.B. The below code does not handle locker node failures
  286. DWORD
  287. WINAPI
  288. GumPostUpdate(
  289. IN GUM_UPDATE_TYPE UpdateType,
  290. IN DWORD Context,
  291. IN DWORD BufferLength,
  292. IN PVOID Buffer // THIS WILL BE FREED
  293. )
  294. /*++
  295. Routine Description:
  296. Posts an update to all active nodes in the cluster. All
  297. registered update handlers for the specified UpdateType
  298. are called on each node. The update will not be reported
  299. on the current node. The update will not necessarily have
  300. completed when this function returns, but will complete
  301. eventually if the current node does not fail.
  302. Arguments:
  303. UpdateType - Supplies the type of update. This determines
  304. which update handlers will be called and the sequence
  305. number to be used.
  306. Context - Supplies a DWORD of context to be passed to the
  307. GUM update handlers
  308. BufferLength - Supplies the length of the update buffer to
  309. be passed to the update handlers
  310. Buffer - Supplies a pointer to the update buffer to be passed
  311. to the update handlers. THIS BUFFER WILL BE FREED ONCE THE
  312. POST HAS COMPLETED.
  313. Return Value:
  314. ERROR_SUCCESS if the request is successful.
  315. Win32 error code on failure.
  316. --*/
  317. {
  318. DWORD Sequence;
  319. DWORD Status;
  320. DWORD i;
  321. BOOL IsLocker = TRUE;
  322. PGUM_INFO GumInfo;
  323. DWORD MyNodeId;
  324. DWORD LockerNode=(DWORD)-1;
  325. CL_ASSERT(UpdateType < GumUpdateMaximum);
  326. GumInfo = &GumTable[UpdateType];
  327. MyNodeId = NmGetNodeId(NmLocalNode);
  328. //
  329. // Find the lowest active node in the cluster. This is the
  330. // locker.
  331. for (i=ClusterMinNodeId; i <= NmMaxNodeId; i++) {
  332. if (GumInfo->ActiveNode[i]) {
  333. LockerNode = i;
  334. break;
  335. }
  336. }
  337. CL_ASSERT(i <= NmMaxNodeId);
  338. //
  339. // Post a locking update to the locker node. If this succeeds
  340. // immediately, we can go do the work directly. If it pends,
  341. // the locker node will call us back when it is our turn to
  342. // make the updates.
  343. //
  344. if (i == MyNodeId) {
  345. //
  346. // This node is the locker.
  347. //
  348. ClRtlLogPrint(LOG_NOISE,
  349. "[GUM] GumPostUpdate: Locker waiting\t\ttype %1!u! context %2!u!\n",
  350. UpdateType,
  351. Context);
  352. Status = GumpDoLockingPost(UpdateType,
  353. MyNodeId,
  354. &Sequence,
  355. Context,
  356. BufferLength,
  357. (DWORD)Buffer,
  358. Buffer);
  359. if (Status == ERROR_SUCCESS) {
  360. //
  361. // Update our sequence number so we stay in sync, even though
  362. // we aren't dispatching the update.
  363. //
  364. GumpSequence += 1;
  365. }
  366. } else {
  367. CL_ASSERT(GumpRpcBindings[i] != NULL);
  368. ClRtlLogPrint(LOG_NOISE,
  369. "[GUM] GumPostUpdate: queuing update\ttype %1!u! context %2!u!\n",
  370. UpdateType,
  371. Context);
  372. Status = GumQueueLockingPost(GumpRpcBindings[i],
  373. MyNodeId,
  374. UpdateType,
  375. Context,
  376. &Sequence,
  377. BufferLength,
  378. Buffer,
  379. (DWORD)Buffer);
  380. if (Status == ERROR_SUCCESS) {
  381. CL_ASSERT(Sequence == GumpSequence);
  382. }
  383. }
  384. if (Status == ERROR_SUCCESS) {
  385. //
  386. // The lock was immediately acquired, go ahead and post directly
  387. // here.
  388. //
  389. GumpDeliverPosts(LockerNode+1,
  390. UpdateType,
  391. Sequence,
  392. Context,
  393. BufferLength,
  394. Buffer);
  395. //
  396. // All nodes have been updated. Send unlocking update.
  397. //
  398. if (LockerNode == MyNodeId) {
  399. GumpDoUnlockingUpdate(UpdateType, Sequence);
  400. } else {
  401. GumUnlockUpdate(
  402. GumpRpcBindings[LockerNode],
  403. UpdateType,
  404. Sequence
  405. );
  406. }
  407. ClRtlLogPrint(LOG_NOISE,
  408. "[GUM] GumPostUpdate: completed update seq %1!u!\ttype %2!u! context %3!u!\n",
  409. Sequence,
  410. UpdateType,
  411. Context);
  412. return(ERROR_SUCCESS);
  413. } else {
  414. //
  415. // The lock is currently held. We will get called back when it is released
  416. //
  417. ClRtlLogPrint(LOG_NOISE,
  418. "[GUM] GumPostUpdate: pending update type %1!u! context %2!u!\n",
  419. UpdateType,
  420. Context);
  421. return(ERROR_IO_PENDING);
  422. }
  423. }
  424. VOID
  425. GumpDeliverPosts(
  426. IN DWORD FirstNodeId,
  427. IN GUM_UPDATE_TYPE UpdateType,
  428. IN DWORD Sequence,
  429. IN DWORD Context,
  430. IN DWORD BufferLength,
  431. IN PVOID Buffer // THIS WILL BE FREED
  432. )
  433. /*++
  434. Routine Description:
  435. Actually delivers the update post to the specified nodes.
  436. The GUM lock is assumed to be held.
  437. Arguments:
  438. FirstNodeId - Supplies the node ID where the posts should start.
  439. This is generally the LockerNode+1.
  440. UpdateType - Supplies the type of update. This determines
  441. which update handlers will be called and the sequence
  442. number to be used.
  443. Context - Supplies a DWORD of context to be passed to the
  444. GUM update handlers
  445. BufferLength - Supplies the length of the update buffer to
  446. be passed to the update handlers
  447. Buffer - Supplies a pointer to the update buffer to be passed
  448. to the update handlers. THIS BUFFER WILL BE FREED ONCE THE
  449. POST HAS COMPLETED.
  450. Return Value:
  451. None.
  452. --*/
  453. {
  454. DWORD i;
  455. PGUM_INFO GumInfo;
  456. DWORD MyNodeId;
  457. GumInfo = &GumTable[UpdateType];
  458. MyNodeId = NmGetNodeId(NmLocalNode);
  459. for (i=FirstNodeId; i<=NmMaxNodeId; i++) {
  460. if (GumInfo->ActiveNode[i]) {
  461. //
  462. // Dispatch the update to the specified node.
  463. //
  464. ClRtlLogPrint(LOG_NOISE,
  465. "[GUM] GumpDeliverPosts: Dispatching seq %1!u!\ttype %2!u! context %3!u! to node %4!d!\n",
  466. Sequence,
  467. UpdateType,
  468. Context,
  469. i);
  470. if (i == MyNodeId) {
  471. //
  472. // Update our sequence number so we stay in sync, even though
  473. // we aren't dispatching the update.
  474. //
  475. GumpSequence += 1;
  476. } else {
  477. CL_ASSERT(GumpRpcBindings[i] != NULL);
  478. ClRtlLogPrint(LOG_NOISE,
  479. "[GUM] GumpDeliverPosts: Locker updating seq %1!u!\ttype %2!u! context %3!u!\n",
  480. Sequence,
  481. UpdateType,
  482. Context);
  483. GumUpdateNode(GumpRpcBindings[i],
  484. UpdateType,
  485. Context,
  486. Sequence,
  487. BufferLength,
  488. Buffer);
  489. }
  490. }
  491. }
  492. LocalFree(Buffer);
  493. }
  494. #endif
  495. DWORD
  496. WINAPI
  497. GumAttemptUpdate(
  498. IN DWORD Sequence,
  499. IN GUM_UPDATE_TYPE UpdateType,
  500. IN DWORD Context,
  501. IN DWORD BufferLength,
  502. IN PVOID Buffer
  503. )
  504. /*++
  505. Routine Description:
  506. Conditionally sends an update to all active nodes in the
  507. cluster. If the clusterwise sequence number matches the supplied
  508. sequence number, all registered update handlers for the specified
  509. UpdateType are called on each node. Any registered update handlers
  510. for the current node will be called on the same thread. This is
  511. useful for correct synchronization of the data structures to be updated.
  512. The normal usage of this routine is as follows:
  513. obtain current sequence number from GumGetCurrentSequence
  514. make modification to cluster state
  515. conditionally update cluster state with GumAttemptUpdate
  516. If update fails, undo modification, release any locks, try again later
  517. Arguments:
  518. Sequence - Supplies the sequence number obtained from GumGetCurrentSequence.
  519. UpdateType - Supplies the type of update. This determines which update handlers
  520. will be called
  521. Context - Supplies a DWORD of context to be passed to the
  522. GUM update handlers
  523. BufferLength - Supplies the length of the update buffer to be passed to the
  524. update handlers
  525. Buffer - Supplies a pointer to the update buffer to be passed to the update
  526. handlers.
  527. Return Value:
  528. ERROR_SUCCESS if the request is successful.
  529. Win32 error code on failure.
  530. --*/
  531. {
  532. DWORD Status=RPC_S_OK;
  533. DWORD i;
  534. PGUM_INFO GumInfo;
  535. DWORD MyNodeId;
  536. DWORD LockerNode=(DWORD)-1;
  537. CL_ASSERT(UpdateType < GumUpdateMaximum);
  538. GumInfo = &GumTable[UpdateType];
  539. MyNodeId = NmGetNodeId(NmLocalNode);
  540. retryLock:
  541. LockerNode = GumpLockerNode;
  542. //
  543. // Send locking update to the locker node.
  544. //
  545. if (LockerNode == MyNodeId)
  546. {
  547. //
  548. // This node is the locker.
  549. //
  550. ClRtlLogPrint(LOG_NOISE,
  551. "[GUM] GumAttemptUpdate: Locker waiting\t\ttype %1!u! context %2!u!\n",
  552. UpdateType,
  553. Context);
  554. if (GumpTryLockingUpdate(UpdateType, MyNodeId, Sequence))
  555. {
  556. ClRtlLogPrint(LOG_NOISE,
  557. "[GUM] GumAttemptUpdate: Locker dispatching seq %1!u!\ttype %2!u! context %3!u!\n",
  558. Sequence,
  559. UpdateType,
  560. Context);
  561. Status = GumpDispatchUpdate(UpdateType,
  562. Context,
  563. TRUE,
  564. TRUE,
  565. BufferLength,
  566. Buffer);
  567. if (Status != ERROR_SUCCESS) {
  568. //
  569. // Note we have to use Sequence-1 for the unlock because GumpDispatchUpdate
  570. // failed and did not increment the sequence number.
  571. //
  572. GumpDoUnlockingUpdate(UpdateType, Sequence-1);
  573. }
  574. }
  575. else
  576. {
  577. Status = ERROR_CLUSTER_DATABASE_SEQMISMATCH;
  578. }
  579. }
  580. else
  581. {
  582. //
  583. //send the locking update to the locker node
  584. ClRtlLogPrint(LOG_NOISE,
  585. "[GUM] GumAttemptUpdate: queuing update\ttype %1!u! context %2!u!\n",
  586. UpdateType,
  587. Context);
  588. try {
  589. NmStartRpc(LockerNode);
  590. Status = GumAttemptLockingUpdate(GumpRpcBindings[LockerNode],
  591. MyNodeId,
  592. UpdateType,
  593. Context,
  594. Sequence,
  595. BufferLength,
  596. Buffer);
  597. NmEndRpc(LockerNode);
  598. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  599. //
  600. // An exception from RPC indicates that the other node is either dead
  601. // or insane. Kill it and retry with a new locker.
  602. //
  603. NmEndRpc(LockerNode);
  604. GumpCommFailure(GumInfo,
  605. LockerNode,
  606. GetExceptionCode(),
  607. TRUE);
  608. //
  609. // The GUM update handler must have been called to select a new locker
  610. // node.
  611. //
  612. CL_ASSERT(LockerNode != GumpLockerNode);
  613. //
  614. // Retry the locking update with the new locker node.
  615. //
  616. goto retryLock;
  617. }
  618. if (Status == ERROR_SUCCESS)
  619. {
  620. CL_ASSERT(Sequence == GumpSequence);
  621. }
  622. if(Status != RPC_S_OK) {
  623. NmDumpRpcExtErrorInfo(Status);
  624. }
  625. }
  626. if (Status != ERROR_SUCCESS) {
  627. ClRtlLogPrint(LOG_UNUSUAL,
  628. "[GUM] GumAttemptUpdate: Queued lock attempt for send type %1!d! failed %2!d!\n",
  629. UpdateType,
  630. Status);
  631. return(Status);
  632. }
  633. //
  634. // Grap the sendupdate lock to serialize with any replays
  635. //
  636. EnterCriticalSection(&GumpSendUpdateLock);
  637. if (LockerNode != GumpLockerNode) {
  638. //
  639. // Locker node changed, we need to restart again.
  640. //
  641. LeaveCriticalSection(&GumpSendUpdateLock);
  642. goto retryLock;
  643. }
  644. // The update is now committed on the locker node. All remaining nodes
  645. // must be updated successfully, or they will be killed.
  646. //
  647. for (i=LockerNode+1; i != LockerNode; i++)
  648. {
  649. if (i == (NmMaxNodeId + 1))
  650. {
  651. i=ClusterMinNodeId;
  652. if (i==LockerNode)
  653. {
  654. break;
  655. }
  656. }
  657. if (GumInfo->ActiveNode[i])
  658. {
  659. //
  660. // Dispatch the update to the specified node.
  661. //
  662. ClRtlLogPrint(LOG_NOISE,
  663. "[GUM] GumAttemptUpdate: Dispatching seq %1!u!\ttype %2!u! context %3!u! to node %4!d!\n",
  664. Sequence,
  665. UpdateType,
  666. Context,
  667. i);
  668. if (i == MyNodeId) {
  669. Status = GumpDispatchUpdate(UpdateType,
  670. Context,
  671. FALSE,
  672. TRUE,
  673. BufferLength,
  674. Buffer);
  675. if (Status != ERROR_SUCCESS){
  676. ClRtlLogPrint(LOG_CRITICAL,
  677. "[GUM] GumAttemptUpdate: Update on non-locker node(self) failed with %1!d! when it must succeed\n",
  678. Status);
  679. //Commit Suicide
  680. CsInconsistencyHalt(Status);
  681. }
  682. } else {
  683. DWORD Status;
  684. ClRtlLogPrint(LOG_NOISE,
  685. "[GUM] GumAttemptUpdate: Locker updating seq %1!u!\ttype %2!u! context %3!u!\n",
  686. Sequence,
  687. UpdateType,
  688. Context);
  689. try {
  690. NmStartRpc(i);
  691. Status = GumUpdateNode(GumpRpcBindings[i],
  692. UpdateType,
  693. Context,
  694. Sequence,
  695. BufferLength,
  696. Buffer);
  697. NmEndRpc(i);
  698. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  699. NmEndRpc(i);
  700. Status = GetExceptionCode();
  701. }
  702. //
  703. // If the update on the other node failed, then the
  704. // other node must now be out of the cluster since the
  705. // update has already completed on the locker node.
  706. //
  707. if (Status != ERROR_SUCCESS) {
  708. ClRtlLogPrint(LOG_CRITICAL,
  709. "[GUM] GumAttemptUpdate: Update on node %1!d! failed with %2!d! when it must succeed\n",
  710. i,
  711. Status);
  712. NmDumpRpcExtErrorInfo(Status);
  713. GumpCommFailure(GumInfo,
  714. i,
  715. Status,
  716. TRUE);
  717. }
  718. }
  719. }
  720. }
  721. //
  722. // Our update is over
  723. //
  724. LeaveCriticalSection(&GumpSendUpdateLock);
  725. //
  726. // All nodes have been updated. Send unlocking update.
  727. //
  728. if (LockerNode == MyNodeId) {
  729. GumpDoUnlockingUpdate(UpdateType, Sequence);
  730. } else {
  731. try {
  732. NmStartRpc(LockerNode);
  733. Status = GumUnlockUpdate(
  734. GumpRpcBindings[LockerNode],
  735. UpdateType,
  736. Sequence
  737. );
  738. NmEndRpc(LockerNode);
  739. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  740. //
  741. // The locker node has crashed. Notify the NM, it will call our
  742. // notification routine to select a new locker node. The new
  743. // locker node will release the gum lock after propagating
  744. // the current update.
  745. //
  746. NmEndRpc(LockerNode);
  747. Status = GetExceptionCode();
  748. ClRtlLogPrint(LOG_CRITICAL,
  749. "[GUM] GumAttemptUpdate: Unlocking update to node %1!d! failed with %2!d!\n",
  750. LockerNode,
  751. Status);
  752. GumpCommFailure(GumInfo,
  753. LockerNode,
  754. Status,
  755. TRUE);
  756. CL_ASSERT(LockerNode != GumpLockerNode);
  757. }
  758. if(Status != RPC_S_OK) {
  759. NmDumpRpcExtErrorInfo(Status);
  760. }
  761. }
  762. ClRtlLogPrint(LOG_NOISE,
  763. "[GUM] GumAttemptUpdate: completed update seq %1!u!\ttype %2!u! context %3!u!\n",
  764. Sequence,
  765. UpdateType,
  766. Context);
  767. return(ERROR_SUCCESS);
  768. }
  769. DWORD
  770. WINAPI
  771. GumGetCurrentSequence(
  772. IN GUM_UPDATE_TYPE UpdateType
  773. )
  774. /*++
  775. Routine Description:
  776. Obtains the current clusterwise global update sequence number
  777. Arguments:
  778. UpdateType - Supplies the type of update. Each update type may
  779. have an independent sequence number.
  780. Return Value:
  781. Current global update sequence number for the specified update type.
  782. --*/
  783. {
  784. CL_ASSERT(UpdateType < GumUpdateMaximum);
  785. return(GumpSequence);
  786. }
  787. VOID
  788. GumSetCurrentSequence(
  789. IN GUM_UPDATE_TYPE UpdateType,
  790. DWORD Sequence
  791. )
  792. /*++
  793. Routine Description:
  794. Sets the current sequence for the specified global update.
  795. Arguments:
  796. UpdateType - Supplies the update type whose sequence is to be updated.
  797. Sequence - Supplies the new sequence number.
  798. Return Value:
  799. None.
  800. --*/
  801. {
  802. CL_ASSERT(UpdateType < GumUpdateMaximum);
  803. GumpSequence = Sequence;
  804. }
  805. VOID
  806. GumCommFailure(
  807. IN GUM_UPDATE_TYPE UpdateType,
  808. IN DWORD NodeId,
  809. IN DWORD ErrorCode,
  810. IN BOOL Wait
  811. )
  812. /*++
  813. Routine Description:
  814. Informs the NM that a fatal communication error has occurred trying
  815. to talk to another node.
  816. Arguments:
  817. GumInfo - Supplies the update type where the communication failure occurred.
  818. NodeId - Supplies the node id of the other node.
  819. ErrorCode - Supplies the error that was returned from RPC
  820. Wait - if TRUE, this function blocks until the GUM event handler has
  821. processed the NodeDown notification for the specified node.
  822. if FALSE, this function returns immediately after notifying NM
  823. Return Value:
  824. None.
  825. --*/
  826. {
  827. PGUM_INFO GumInfo = &GumTable[UpdateType];
  828. ClRtlLogPrint(LOG_CRITICAL,
  829. "[GUM] GumCommFailure %1!d! communicating with node %2!d!\n",
  830. ErrorCode,
  831. NodeId);
  832. GumpCommFailure(GumInfo, NodeId, ErrorCode, Wait);
  833. }
  834. DWORD
  835. WINAPI
  836. GumEndJoinUpdate(
  837. IN DWORD Sequence,
  838. IN GUM_UPDATE_TYPE UpdateType,
  839. IN DWORD Context,
  840. IN DWORD BufferLength,
  841. IN PVOID Buffer
  842. )
  843. /*++
  844. Routine Description:
  845. Conditionally sends a join update to all active nodes in the
  846. cluster. If the clusterwise sequence number matches the supplied
  847. sequence number, all registered update handlers for the specified
  848. UpdateType are called on each node. Any registered update handlers
  849. for the current node will be called on the same thread. This is
  850. useful for correct synchronization of the data structures to be updated.
  851. As each node receives the join update, the sending node will be
  852. added to the list of nodes registered to receive any future updates
  853. of this type.
  854. The normal usage of this routine is as follows:
  855. joining node gets current sequence number from GumBeginJoinUpdate
  856. joining node gets current cluster state from another cluster node
  857. joining node issues GumEndJoinUpdate to add itself to every node's
  858. update list.
  859. If GumEndJoinUpdate fails, try again
  860. Arguments:
  861. Sequence - Supplies the sequence number obtained from GumGetCurrentSequence.
  862. UpdateType - Supplies the type of update. This determines which update handlers
  863. will be called
  864. Context - Supplies a DWORD of context to be passed to the
  865. GUM update handlers
  866. BufferLength - Supplies the length of the update buffer to be passed to the
  867. update handlers
  868. Buffer - Supplies a pointer to the update buffer to be passed to the update
  869. handlers.
  870. Return Value:
  871. ERROR_SUCCESS if the request is successful.
  872. Win32 error code on failure.
  873. --*/
  874. {
  875. DWORD Status=RPC_S_OK;
  876. DWORD i;
  877. PGUM_INFO GumInfo;
  878. DWORD MyNodeId;
  879. DWORD LockerNode=(DWORD)-1;
  880. CL_ASSERT(UpdateType < GumUpdateMaximum);
  881. GumInfo = &GumTable[UpdateType];
  882. MyNodeId = NmGetNodeId(NmLocalNode);
  883. LockerNode = GumpLockerNode;
  884. //SS: bug can we be the locker node at this point in time?
  885. //CL_ASSERT(LockerNode != MyNodeId);
  886. //
  887. // Verify that the locker node allows us to finish the join update
  888. //
  889. if (LockerNode != MyNodeId)
  890. {
  891. ClRtlLogPrint(LOG_NOISE,
  892. "[GUM] GumEndJoinUpdate: attempting update\ttype %1!u! context %2!u! sequence %3!u!\n",
  893. UpdateType,
  894. Context,
  895. Sequence);
  896. //SS: what if the joiner node acquires the lock but dies after
  897. //will the remaining cluster continue to function ??
  898. //We need to make sure that node down events are generated
  899. //for this node as soon as the first gumbeginjoinupdate call
  900. //is made
  901. NmStartRpc(LockerNode);
  902. Status = GumAttemptJoinUpdate(GumpRpcBindings[LockerNode],
  903. NmGetNodeId(NmLocalNode),
  904. UpdateType,
  905. Context,
  906. Sequence,
  907. BufferLength,
  908. Buffer);
  909. NmEndRpc(LockerNode);
  910. if (Status != ERROR_SUCCESS)
  911. {
  912. ClRtlLogPrint(LOG_UNUSUAL,
  913. "[GUM] Join attempt for type %1!d! failed %2!d!\n",
  914. UpdateType,
  915. Status);
  916. NmDumpRpcExtErrorInfo(Status);
  917. return(Status);
  918. }
  919. //if the locker node dies, should we retry with the locker node?
  920. //In this case, the locker node may be different
  921. //now from when GumBeginJoinUpdate() is called.
  922. //SS: we fail the join instead and just retry the whole process
  923. //instead of calling GumpCommFailure() to kill the locker here.
  924. // This way the existing cluster continues and the joining node
  925. // takes a hit which is probably a good thing
  926. }
  927. else
  928. {
  929. //SS: can we select ourselves as the locker while
  930. //we havent finished the join completely
  931. //SS: can others?
  932. //Is that valid
  933. Status = ERROR_REQUEST_ABORTED;
  934. return(Status);
  935. }
  936. // If the joining node dies after having acquired the lock,
  937. // then a node down event MUST be generated so that the GUM
  938. // lock can be released and the rest of the cluster can continue.
  939. //
  940. // Now Dispatch the update to all other nodes, except ourself.
  941. //
  942. for (i=LockerNode+1; i != LockerNode; i++)
  943. {
  944. if (i == (NmMaxNodeId + 1))
  945. {
  946. i=ClusterMinNodeId;
  947. if (i==LockerNode)
  948. {
  949. break;
  950. }
  951. }
  952. if (GumInfo->ActiveNode[i])
  953. {
  954. //skip yourself
  955. if (i != MyNodeId)
  956. {
  957. CL_ASSERT(GumpRpcBindings[i] != NULL);
  958. ClRtlLogPrint(LOG_NOISE,
  959. "[GUM] GumEndJoinUpdate: Dispatching seq %1!u!\ttype %2!u! context %3!u! to node %4!d!\n",
  960. Sequence,
  961. UpdateType,
  962. Context,
  963. i);
  964. NmStartRpc(i);
  965. Status = GumJoinUpdateNode(GumpRpcBindings[i],
  966. NmGetNodeId(NmLocalNode),
  967. UpdateType,
  968. Context,
  969. Sequence,
  970. BufferLength,
  971. Buffer);
  972. NmEndRpc(i);
  973. if (Status != ERROR_SUCCESS)
  974. {
  975. //we dont shoot that node, since we are the ones who is joining
  976. //However now its tables differ from the locker node's tables
  977. //Instead we will release the gum lock and abort
  978. // the join process. This joining node should then
  979. // be removed from the locker node's tables for update.
  980. //
  981. ClRtlLogPrint(LOG_NOISE,
  982. "[GUM] GumEndJoinUpdate: GumJoinUpdateNode failed \ttype %1!u! context %2!u! sequence %3!u!\n",
  983. UpdateType,
  984. Context,
  985. Sequence);
  986. NmDumpRpcExtErrorInfo(Status);
  987. break;
  988. }
  989. }
  990. }
  991. }
  992. CL_ASSERT(LockerNode != (DWORD)-1);
  993. if (Status != ERROR_SUCCESS)
  994. {
  995. goto EndJoinUnlock;
  996. }
  997. //
  998. // All nodes have been updated. Update our sequence and send the unlocking update.
  999. //
  1000. GumTable[UpdateType].Joined = TRUE;
  1001. GumpSequence = Sequence+1;
  1002. EndJoinUnlock:
  1003. //SS what if the locker node has died since then
  1004. //we should make sure somebody unlocks and keeps the cluster going
  1005. try {
  1006. NmStartRpc(LockerNode);
  1007. GumUnlockUpdate(GumpRpcBindings[LockerNode], UpdateType, Sequence);
  1008. NmEndRpc(LockerNode);
  1009. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  1010. //
  1011. // The locker node has crashed. Notify the NM, it will call our
  1012. // notification routine to select a new locker node. Then retry
  1013. // the unlock on the new locker node.
  1014. // SS: changed to not retry unlocks..the new locker node will
  1015. // unlock after propagating this change in any case.
  1016. //
  1017. NmEndRpc(LockerNode);
  1018. Status = GetExceptionCode();
  1019. ClRtlLogPrint(LOG_CRITICAL,
  1020. "[GUM] GumEndJoinUpdate: Unlocking update to node %1!d! failed with %2!d!\n",
  1021. LockerNode,
  1022. Status);
  1023. //instead of killing the locker node in the existing cluster which
  1024. //we are trying to join, return a failure code which will abort the join
  1025. //process. Since this is the locking node, when this node goes down the
  1026. //new locker node should release the lock
  1027. NmDumpRpcExtErrorInfo(Status);
  1028. }
  1029. ClRtlLogPrint(LOG_NOISE,
  1030. "[GUM] GumEndJoinUpdate: completed update seq %1!u!\ttype %2!u! context %3!u!\n",
  1031. Sequence,
  1032. UpdateType,
  1033. Context);
  1034. return(Status);
  1035. }
  1036. VOID
  1037. GumpReUpdate(
  1038. IN GUM_UPDATE_TYPE UpdateType,
  1039. IN DWORD EndId
  1040. )
  1041. /*++
  1042. Routine Description:
  1043. Reissues a GUM update to all nodes. This is used in the event of
  1044. a failure.
  1045. Arguments:
  1046. UpdateType - Supplies the update type that should be reissued.
  1047. EndId - Supplies the last node ID to be updated. This is usually the node
  1048. ID of the failed node.
  1049. Return Value:
  1050. None
  1051. --*/
  1052. {
  1053. PGUM_INFO GumInfo = &GumTable[UpdateType];
  1054. DWORD MyId = NmGetNodeId(NmLocalNode);
  1055. DWORD i, seq;
  1056. DWORD Status;
  1057. // This node must be the locker.
  1058. // The lock must be held, and it must be held by this node
  1059. //
  1060. CL_ASSERT(GumpLockerNode == MyId);
  1061. CL_ASSERT(GumpLockingNode == MyId);
  1062. //if there is no valid update to be propagated
  1063. //SS: The gum lock still needs to be freed since it is always acquired
  1064. //before this function is called
  1065. if (UpdateType == GumUpdateMaximum)
  1066. goto ReleaseLock;
  1067. //
  1068. // Grap the sendupdate lock to serialize with a concurrent update on
  1069. // on this node
  1070. //
  1071. EnterCriticalSection(&GumpSendUpdateLock);
  1072. seq = GumpSequence - 1;
  1073. LeaveCriticalSection(&GumpSendUpdateLock);
  1074. again:
  1075. ClRtlLogPrint(LOG_UNUSUAL,
  1076. "[GUM] GumpReUpdate reissuing last update for send type %1!d!\n",
  1077. UpdateType);
  1078. for (i=MyId+1; i != EndId; i++) {
  1079. if (i == (NmMaxNodeId +1)) {
  1080. i=ClusterMinNodeId;
  1081. if (i==EndId) {
  1082. break;
  1083. }
  1084. }
  1085. if (GumInfo->ActiveNode[i]) {
  1086. //
  1087. // Dispatch the update to the specified node.
  1088. //
  1089. ClRtlLogPrint(LOG_NOISE,
  1090. "[GUM] GumpReUpdate: Dispatching seq %1!u!\ttype %2!u! context %3!u! to node %4!d!\n",
  1091. seq,
  1092. UpdateType,
  1093. GumpLastContext,
  1094. i);
  1095. try {
  1096. NmStartRpc(i);
  1097. if (GumpLastBufferValid != FALSE) {
  1098. Status = GumUpdateNode(GumpReplayRpcBindings[i],
  1099. UpdateType,
  1100. GumpLastContext,
  1101. seq,
  1102. GumpLastBufferLength,
  1103. GumpLastBuffer);
  1104. } else {
  1105. // replay end join
  1106. // since we also ignore other updates, we should
  1107. // be calling gumupdatenode for those..however
  1108. // calling gumjoinupdatenode seems to do the job
  1109. // for signalling the other nodes to bump up
  1110. // their sequence number without processing the update
  1111. Status = GumJoinUpdateNode(GumpReplayRpcBindings[i],
  1112. -1, // signal replay
  1113. UpdateType,
  1114. GumpLastContext,
  1115. seq,
  1116. GumpLastBufferLength,
  1117. GumpLastBuffer);
  1118. }
  1119. NmEndRpc(i);
  1120. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  1121. NmEndRpc(i);
  1122. Status = GetExceptionCode();
  1123. }
  1124. //
  1125. // If the update on the other node failed, then the
  1126. // other node must now be out of the cluster since the
  1127. // update has already completed on the locker node.
  1128. //
  1129. if (Status != ERROR_SUCCESS && Status != ERROR_CLUSTER_DATABASE_SEQMISMATCH) {
  1130. ClRtlLogPrint(LOG_CRITICAL,
  1131. "[GUM] GumpReUpdate: Update on node %1!d! failed with %2!d! when it must succeed\n",
  1132. i,
  1133. Status);
  1134. NmDumpRpcExtErrorInfo(Status);
  1135. GumpCommFailure(GumInfo,
  1136. i,
  1137. Status,
  1138. TRUE);
  1139. }
  1140. }
  1141. }
  1142. //
  1143. // At this point we know that all nodes don't have received our replay
  1144. // and no outstanding sends are in progress. However, a send could have
  1145. // arrived in this node and the sender died after that we are the only
  1146. // node that has it. Since we are the locker and lockingnode we
  1147. // have to replay again if that happened.
  1148. EnterCriticalSection(&GumpSendUpdateLock);
  1149. if (seq != (GumpSequence - 1)) {
  1150. seq = GumpSequence - 1;
  1151. LeaveCriticalSection(&GumpSendUpdateLock);
  1152. goto again;
  1153. }
  1154. LeaveCriticalSection(&GumpSendUpdateLock);
  1155. ReleaseLock:
  1156. //
  1157. // The update has been delivered to all nodes. Unlock now.
  1158. //
  1159. GumpDoUnlockingUpdate(UpdateType, GumpSequence-1);
  1160. }
  1161. VOID
  1162. GumpCommFailure(
  1163. IN PGUM_INFO GumInfo,
  1164. IN DWORD NodeId,
  1165. IN DWORD ErrorCode,
  1166. IN BOOL Wait
  1167. )
  1168. /*++
  1169. Routine Description:
  1170. Informs the NM that a fatal communication error has occurred trying
  1171. to talk to another node.
  1172. Arguments:
  1173. GumInfo - Supplies the update type where the communication failure occurred.
  1174. NodeId - Supplies the node id of the other node.
  1175. ErrorCode - Supplies the error that was returned from RPC
  1176. Wait - if TRUE, this function blocks until the GUM event handler has
  1177. processed the NodeDown notification for the specified node.
  1178. if FALSE, this function returns immediately after notifying NM
  1179. Return Value:
  1180. None.
  1181. --*/
  1182. {
  1183. DWORD dwCur;
  1184. ClRtlLogPrint(LOG_CRITICAL,
  1185. "[GUM] GumpCommFailure %1!d! communicating with node %2!d!\n",
  1186. ErrorCode,
  1187. NodeId);
  1188. // This is a hack to check if we are shutting down. See bug 88411
  1189. if (ErrorCode == ERROR_SHUTDOWN_IN_PROGRESS) {
  1190. // if we are shutting down, just kill self
  1191. // set to our node id
  1192. NodeId = NmGetNodeId(NmLocalNode);
  1193. }
  1194. //
  1195. // Get current generation number
  1196. //
  1197. if (Wait) {
  1198. dwCur = GumpGetNodeGenNum(GumInfo, NodeId);
  1199. }
  1200. NmAdviseNodeFailure(NodeId, ErrorCode);
  1201. if (Wait) {
  1202. //
  1203. // Wait for this node to be declared down and
  1204. // GumpEventHandler to mark it as inactive.
  1205. //
  1206. GumpWaitNodeDown(NodeId, dwCur);
  1207. }
  1208. }
  1209.