Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1525 lines
47 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. send.c
  5. Abstract:
  6. Routines for sending global updates to the cluster
  7. Author:
  8. John Vert (jvert) 17-Apr-1996
  9. Revision History:
  10. --*/
  11. #include "gump.h"
  12. DWORD
  13. WINAPI
  14. GumSendUpdate(
  15. IN GUM_UPDATE_TYPE UpdateType,
  16. IN DWORD Context,
  17. IN DWORD BufferLength,
  18. IN PVOID Buffer
  19. )
  20. /*++
  21. Routine Description:
  22. Sends an update to all active nodes in the cluster. All
  23. registered update handlers for the specified UpdateType
  24. are called on each node. Any registered update handlers
  25. for the current node will be called on the same thread.
  26. This is useful for correct synchronization of the data
  27. structures to be updated.
  28. Arguments:
  29. UpdateType - Supplies the type of update. This determines
  30. which update handlers will be called and the sequence
  31. number to be used.
  32. Context - Supplies a DWORD of context to be passed to the
  33. GUM update handlers
  34. BufferLength - Supplies the length of the update buffer to
  35. be passed to the update handlers
  36. Buffer - Supplies a pointer to the update buffer to be passed
  37. to the update handlers.
  38. Return Value:
  39. ERROR_SUCCESS if the request is successful.
  40. Win32 error code on failure.
  41. --*/
  42. {
  43. return(
  44. GumSendUpdateReturnInfo(
  45. UpdateType,
  46. Context,
  47. NULL,
  48. BufferLength,
  49. Buffer
  50. )
  51. );
  52. } // GumSendUpdate
  53. DWORD
  54. GumpUpdateRemoteNode(
  55. IN PRPC_ASYNC_STATE AsyncState,
  56. IN DWORD RemoteNodeId,
  57. IN DWORD UpdateType,
  58. IN DWORD Context,
  59. IN DWORD Sequence,
  60. IN DWORD BufferLength,
  61. IN UCHAR Buffer[]
  62. )
  63. /*++
  64. Routine Description:
  65. Issues an update request to a remote node using async RPC.
  66. Arguments:
  67. AsyncState - A pointer to an RPC async state block. The u.hEvent
  68. member field must contain a valid event object handle.
  69. RemoteNodeId - Target of the update.
  70. Type - Supplies the type of update. This determines
  71. which update handlers will be called and the sequence
  72. number to be used.
  73. Context - Supplies a DWORD of context to be passed to the
  74. GUM update handlers
  75. ReturnStatusArray - Pointer to an array of structures to be filled in
  76. with the return value from the update handler on each node. The
  77. array is indexed by node ID.
  78. BufferLength - Supplies the length of the update buffer to
  79. be passed to the update handlers
  80. Buffer - Supplies a pointer to the update buffer to be passed
  81. to the update handlers.
  82. Return Value:
  83. ERROR_SUCCESS if the request is successful.
  84. Win32 error code on failure.
  85. --*/
  86. {
  87. DWORD Status;
  88. HANDLE hEventHandle;
  89. BOOL result;
  90. PNM_NODE Node;
  91. HANDLE handleArr[2];
  92. CL_ASSERT(AsyncState->u.hEvent != NULL);
  93. //
  94. // Initialize the async RPC tracking information
  95. //
  96. hEventHandle = AsyncState->u.hEvent;
  97. AsyncState->u.hEvent = NULL;
  98. Status = RpcAsyncInitializeHandle(AsyncState, sizeof(RPC_ASYNC_STATE));
  99. AsyncState->u.hEvent = hEventHandle;
  100. if (Status != RPC_S_OK) {
  101. ClRtlLogPrint(LOG_CRITICAL,
  102. "[GUM] UpdateRemoteNode: Failed to initialize async RPC status "
  103. "block, status %1!u!\n",
  104. Status
  105. );
  106. return (Status);
  107. }
  108. AsyncState->UserInfo = NULL;
  109. AsyncState->NotificationType = RpcNotificationTypeEvent;
  110. result = ResetEvent(AsyncState->u.hEvent);
  111. CL_ASSERT(result != 0);
  112. // Now hook onto NM node state down event mechanism to detect node downs,
  113. // instead of NmStartRpc()/NmEndRpc().
  114. Node = NmReferenceNodeById(RemoteNodeId);
  115. CL_ASSERT(Node != NULL);
  116. handleArr[0] = AsyncState->u.hEvent;
  117. handleArr[1] = NmGetNodeStateDownEvent(Node);
  118. try {
  119. Status = GumUpdateNode(
  120. AsyncState,
  121. GumpRpcBindings[RemoteNodeId],
  122. UpdateType,
  123. Context,
  124. Sequence,
  125. BufferLength,
  126. Buffer
  127. );
  128. if (Status == RPC_S_OK) {
  129. DWORD RpcStatus;
  130. DWORD WaitStatus;
  131. //
  132. // The call is pending. Wait for completion.
  133. //
  134. WaitStatus = WaitForMultipleObjects(
  135. 2,
  136. handleArr,
  137. FALSE,
  138. INFINITE
  139. );
  140. if (WaitStatus != WAIT_OBJECT_0) {
  141. //
  142. // Something went wrong.
  143. // Either this is a rpc failure or, the target node went down. In either case
  144. // the error path is the same, complete the call and evict the target node
  145. // (eviction is done by the caller of this function).
  146. //
  147. ClRtlLogPrint(LOG_CRITICAL,
  148. "[GUM] GumUpdateRemoteNode: WaitforMultipleObjects returned %1!u!\n",
  149. WaitStatus
  150. );
  151. if (WaitStatus == WAIT_FAILED) {
  152. Status = GetLastError();
  153. ClRtlLogPrint(LOG_CRITICAL,
  154. "[GUM] GumUpdateRemoteNode: WaitforMultipleObjects returned WAIT_FAILED, status %1!u!\n",
  155. Status);
  156. //SS: unexpected error - kill yourself
  157. CsInconsistencyHalt(Status);
  158. }
  159. else if (WaitStatus != (WAIT_OBJECT_0 + 1)) {
  160. Status = GetLastError();
  161. //wait objects abandoned - can that happen with events?
  162. ClRtlLogPrint(LOG_CRITICAL,
  163. "[GUM] GumUpdateRemoteNode: WaitforMultipleObjects failed, status %1!u!\n",
  164. Status);
  165. //SS: unexpected error - kill yourself
  166. CsInconsistencyHalt(Status);
  167. }
  168. // SS: we only come here if the remote node is signalled to be down
  169. // make sure that a non-zero status is returned to the caller
  170. // so that the gum eviction occurs as desirable
  171. //
  172. // Cancel the call, just to be safe.
  173. //
  174. RpcStatus = RpcAsyncCancelCall(
  175. AsyncState,
  176. TRUE // Abortive cancel
  177. );
  178. if (RpcStatus != RPC_S_OK) {
  179. ClRtlLogPrint(LOG_CRITICAL,
  180. "[GUM] GumUpdateRemoteNode: RpcAsyncCancelCall()= %1!u!\n",
  181. RpcStatus
  182. );
  183. Status = RpcStatus;
  184. }
  185. else {
  186. CL_ASSERT(RpcStatus == RPC_S_OK);
  187. //
  188. // Wait for the call to complete.
  189. //
  190. WaitStatus = WaitForSingleObject(
  191. AsyncState->u.hEvent,
  192. INFINITE
  193. );
  194. if (WaitStatus != WAIT_OBJECT_0) {
  195. ClRtlLogPrint(LOG_CRITICAL,
  196. "[GUM] GumUpdateRemoteNode: WaitForSingleObject() returns= %1!u!\n",
  197. WaitStatus);
  198. ClRtlLogPrint(LOG_CRITICAL,
  199. "[GUM] GumUpdateRemoteNode: Mapping Status to WAIT_FAILED\n");
  200. //SS: if this call doesnt complete, there is something
  201. //strange with RPC - should we kill ourselves or kill the other
  202. //node
  203. //SS: for now we asssume that the problem is not local
  204. Status = WAIT_FAILED;
  205. }
  206. }
  207. }
  208. //
  209. // The call should now be complete. Get the
  210. // completion status. Any RPC error will be
  211. // returned in 'RpcStatus'. If there was no
  212. // RPC error, then any application error will
  213. // be returned in 'Status'.
  214. //
  215. RpcStatus = RpcAsyncCompleteCall(
  216. AsyncState,
  217. &Status
  218. );
  219. if (RpcStatus != RPC_S_OK) {
  220. ClRtlLogPrint(LOG_CRITICAL,
  221. "[GUM] GumUpdateRemoteNode: Failed to get "
  222. "completion status for async RPC call,"
  223. "status %1!u!\n",
  224. RpcStatus
  225. );
  226. Status = RpcStatus;
  227. }
  228. }
  229. else {
  230. // An error was returned synchronously.
  231. ClRtlLogPrint(LOG_CRITICAL,
  232. "[GUM] GumUpdateRemoteNode: GumUpdateNode() failed synchronously, status %1!u!\n",
  233. Status
  234. );
  235. }
  236. OmDereferenceObject(Node);
  237. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  238. OmDereferenceObject(Node);
  239. Status = GetExceptionCode();
  240. }
  241. return(Status);
  242. } // GumpUpdateRemoteNode
  243. DWORD
  244. WINAPI
  245. GumSendUpdateReturnInfo(
  246. IN GUM_UPDATE_TYPE UpdateType,
  247. IN DWORD Context,
  248. OUT PGUM_NODE_UPDATE_HANDLER_STATUS ReturnStatusArray,
  249. IN DWORD BufferLength,
  250. IN PVOID Buffer
  251. )
  252. /*++
  253. Routine Description:
  254. Sends an update to all active nodes in the cluster. All
  255. registered update handlers for the specified UpdateType
  256. are called on each node. Any registered update handlers
  257. for the current node will be called on the same thread.
  258. This is useful for correct synchronization of the data
  259. structures to be updated.
  260. Arguments:
  261. UpdateType - Supplies the type of update. This determines
  262. which update handlers will be called and the sequence
  263. number to be used.
  264. Context - Supplies a DWORD of context to be passed to the
  265. GUM update handlers
  266. ReturnStatusArray - Pointer to an array of structures to be filled in
  267. with the return value from the update handler on each node. The
  268. array is indexed by node ID. The array must be at least
  269. (NmMaxNodeId + 1) entries in length.
  270. BufferLength - Supplies the length of the update buffer to
  271. be passed to the update handlers
  272. Buffer - Supplies a pointer to the update buffer to be passed
  273. to the update handlers.
  274. Return Value:
  275. ERROR_SUCCESS if the request is successful.
  276. Win32 error code on failure.
  277. --*/
  278. {
  279. DWORD Sequence;
  280. DWORD Status=RPC_S_OK;
  281. DWORD i;
  282. PGUM_INFO GumInfo;
  283. DWORD MyNodeId;
  284. DWORD LockerNode;
  285. RPC_ASYNC_STATE AsyncState;
  286. DWORD GenerationNum; //the generation number wrt to the locker at which the lock is obtained
  287. BOOL AssumeLockerWhistler = TRUE;
  288. CL_ASSERT(UpdateType < GumUpdateMaximum);
  289. //
  290. // Prepare for async RPC. We do this here to avoid hitting a failure
  291. // after the update is already in progress.
  292. //
  293. ZeroMemory((PVOID) &AsyncState, sizeof(RPC_ASYNC_STATE));
  294. AsyncState.u.hEvent = CreateEvent(
  295. NULL, // no attributes
  296. TRUE, // manual reset
  297. FALSE, // initial state unsignalled
  298. NULL // no object name
  299. );
  300. if (AsyncState.u.hEvent == NULL) {
  301. Status = GetLastError();
  302. ClRtlLogPrint(LOG_CRITICAL,
  303. "[GUM] GumSendUpdate: Failed to allocate event object for async "
  304. "RPC call, status %1!u!\n",
  305. Status
  306. );
  307. return (Status);
  308. }
  309. //
  310. // Initialize the return status array
  311. //
  312. if (ReturnStatusArray != NULL) {
  313. for (i=ClusterMinNodeId; i<=NmMaxNodeId; i++) {
  314. ReturnStatusArray[i].UpdateAttempted = FALSE;
  315. ReturnStatusArray[i].ReturnStatus = ERROR_NODE_NOT_AVAILABLE;
  316. }
  317. }
  318. GumInfo = &GumTable[UpdateType];
  319. MyNodeId = NmGetNodeId(NmLocalNode);
  320. // Grab an RPC handle
  321. GumpStartRpc(MyNodeId);
  322. retryLock:
  323. LockerNode = GumpLockerNode;
  324. //
  325. // Send locking update to the locker node.
  326. //
  327. if (LockerNode == MyNodeId) {
  328. //
  329. // This node is the locker.
  330. //
  331. ClRtlLogPrint(LOG_NOISE,
  332. "[GUM] GumSendUpdate: Locker waiting\t\ttype %1!u! context %2!u!\n",
  333. UpdateType,
  334. Context);
  335. Status = GumpDoLockingUpdate(UpdateType, MyNodeId, &Sequence, &GenerationNum);
  336. if (Status == ERROR_SUCCESS) {
  337. ClRtlLogPrint(LOG_NOISE,
  338. "[GUM] GumSendUpdate: Locker dispatching seq %1!u!\ttype %2!u! context %3!u!\n",
  339. Sequence,
  340. UpdateType,
  341. Context);
  342. Status = GumpDispatchUpdate(UpdateType,
  343. Context,
  344. TRUE,
  345. TRUE,
  346. BufferLength,
  347. Buffer);
  348. if (ReturnStatusArray != NULL) {
  349. ReturnStatusArray[MyNodeId].UpdateAttempted = TRUE;
  350. ReturnStatusArray[MyNodeId].ReturnStatus = Status;
  351. }
  352. if (Status != ERROR_SUCCESS) {
  353. //
  354. // Note we have to use Sequence-1 for the unlock because GumpDispatchUpdate
  355. // failed and did not increment the sequence number.
  356. //
  357. GumpDoUnlockingUpdate(UpdateType, Sequence-1, MyNodeId, GenerationNum);
  358. }
  359. }
  360. } else {
  361. // CL_ASSERT(GumpRpcBindings[i] != NULL);
  362. ClRtlLogPrint(LOG_NOISE,
  363. "[GUM] GumSendUpdate: queuing update\ttype %1!u! context %2!u!\n",
  364. UpdateType,
  365. Context);
  366. AssumeLockerWhistler = TRUE;
  367. RetryLockForRollingUpgrade:
  368. try {
  369. NmStartRpc(LockerNode);
  370. if (AssumeLockerWhistler)
  371. {
  372. Status = GumQueueLockingUpdate2(GumpRpcBindings[LockerNode],
  373. MyNodeId,
  374. UpdateType,
  375. Context,
  376. &Sequence,
  377. BufferLength,
  378. Buffer,
  379. &GenerationNum);
  380. }
  381. else
  382. {
  383. //call the win2K version
  384. Status = GumQueueLockingUpdate(GumpRpcBindings[LockerNode],
  385. MyNodeId,
  386. UpdateType,
  387. Context,
  388. &Sequence,
  389. BufferLength,
  390. Buffer);
  391. }
  392. NmEndRpc(LockerNode);
  393. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  394. //
  395. // An exception from RPC indicates that the other node is either dead
  396. // or insane. Kill it and retry with a new locker.
  397. //
  398. NmEndRpc(LockerNode);
  399. Status = GetExceptionCode();
  400. ClRtlLogPrint(LOG_CRITICAL,
  401. "[GUM] GumSendUpdate: GumQueueLocking update to node %1!d! failed with %2!d!\n",
  402. LockerNode,
  403. Status);
  404. if (Status == RPC_S_PROCNUM_OUT_OF_RANGE)
  405. {
  406. //the locker node is win2K, try the old interface
  407. AssumeLockerWhistler = FALSE;
  408. goto RetryLockForRollingUpgrade;
  409. }
  410. else
  411. {
  412. GumpCommFailure(GumInfo,
  413. LockerNode,
  414. GetExceptionCode(),
  415. TRUE);
  416. //
  417. // The GUM update handler must have been called to select a new locker
  418. // node.
  419. //
  420. CL_ASSERT(LockerNode != GumpLockerNode);
  421. //
  422. // Retry the locking update with the new locker node.
  423. //
  424. goto retryLock;
  425. }
  426. }
  427. if (ReturnStatusArray != NULL) {
  428. ReturnStatusArray[LockerNode].UpdateAttempted = TRUE;
  429. ReturnStatusArray[LockerNode].ReturnStatus = Status;
  430. }
  431. if (Status == ERROR_SUCCESS) {
  432. CL_ASSERT(Sequence == GumpSequence);
  433. }
  434. if (Status != RPC_S_OK) {
  435. NmDumpRpcExtErrorInfo(Status);
  436. }
  437. //because there is no synchronization between join and regroups/gumprocessing
  438. //the old locker node may die and may come up again and not be the locker
  439. //anymore. We have to take care of this case.
  440. if (Status == ERROR_CLUSTER_GUM_NOT_LOCKER)
  441. {
  442. goto retryLock;
  443. }
  444. }
  445. if (Status != ERROR_SUCCESS) {
  446. ClRtlLogPrint(LOG_UNUSUAL,
  447. "[GUM] Queued lock attempt for send type %1!d! failed %2!d!\n",
  448. UpdateType,
  449. Status);
  450. // signal end of RPC handle
  451. GumpEndRpc(MyNodeId);
  452. if (AsyncState.u.hEvent != NULL) {
  453. CloseHandle(AsyncState.u.hEvent);
  454. }
  455. return(Status);
  456. }
  457. //
  458. // Grap the sendupdate lock to serialize with any replays
  459. //
  460. EnterCriticalSection(&GumpSendUpdateLock);
  461. if (LockerNode != GumpLockerNode) {
  462. //
  463. // Locker node changed, we need to restart again.
  464. //
  465. LeaveCriticalSection(&GumpSendUpdateLock);
  466. goto retryLock;
  467. }
  468. //
  469. // The update is now committed on the locker node. All remaining nodes
  470. // must be updated successfully, or they will be killed.
  471. //
  472. for (i=LockerNode+1; i != LockerNode; i++) {
  473. if (i == (NmMaxNodeId + 1)) {
  474. i=ClusterMinNodeId;
  475. if (i==LockerNode) {
  476. break;
  477. }
  478. }
  479. if (GumInfo->ActiveNode[i]) {
  480. //
  481. // Dispatch the update to the specified node.
  482. //
  483. ClRtlLogPrint(LOG_NOISE,
  484. "[GUM] GumSendUpdate: Dispatching seq %1!u!\ttype %2!u! context %3!u! to node %4!d!\n",
  485. Sequence,
  486. UpdateType,
  487. Context,
  488. i);
  489. if (i == MyNodeId) {
  490. Status = GumpDispatchUpdate(UpdateType,
  491. Context,
  492. FALSE,
  493. TRUE,
  494. BufferLength,
  495. Buffer);
  496. if (ReturnStatusArray != NULL) {
  497. ReturnStatusArray[i].UpdateAttempted = TRUE;
  498. ReturnStatusArray[i].ReturnStatus = Status;
  499. }
  500. if (Status != ERROR_SUCCESS){
  501. ClRtlLogPrint(LOG_CRITICAL,
  502. "[GUM] GumSendUpdate: Update on non-locker node(self) failed with %1!d! when it must succeed\n",
  503. Status);
  504. //Commit Suicide
  505. CsInconsistencyHalt(Status);
  506. }
  507. } else {
  508. DWORD dwStatus;
  509. ClRtlLogPrint(LOG_NOISE,
  510. "[GUM] GumSendUpdate: Locker updating seq %1!u!\ttype %2!u! context %3!u!\n",
  511. Sequence,
  512. UpdateType,
  513. Context);
  514. dwStatus = GumpUpdateRemoteNode(
  515. &AsyncState,
  516. i,
  517. UpdateType,
  518. Context,
  519. Sequence,
  520. BufferLength,
  521. Buffer
  522. );
  523. if (ReturnStatusArray != NULL) {
  524. ReturnStatusArray[i].UpdateAttempted = TRUE;
  525. ReturnStatusArray[i].ReturnStatus = dwStatus;
  526. }
  527. //
  528. // If the update on the other node failed, then the
  529. // other node must now be out of the cluster since the
  530. // update has already completed on the locker node.
  531. //
  532. if (dwStatus != ERROR_SUCCESS) {
  533. ClRtlLogPrint(LOG_CRITICAL,
  534. "[GUM] GumSendUpdate: Update on node %1!d! failed with %2!d! when it must succeed\n",
  535. i,
  536. dwStatus);
  537. NmDumpRpcExtErrorInfo(dwStatus);
  538. GumpCommFailure(GumInfo,
  539. i,
  540. dwStatus,
  541. TRUE);
  542. }
  543. }
  544. }
  545. }
  546. //
  547. // Our update is over
  548. //
  549. LeaveCriticalSection(&GumpSendUpdateLock);
  550. //
  551. // All nodes have been updated. Send unlocking update.
  552. //
  553. if (LockerNode == MyNodeId) {
  554. GumpDoUnlockingUpdate(UpdateType, Sequence, MyNodeId, GenerationNum);
  555. } else {
  556. //SS: We will assume that AssumeLockerWhistler is set appropriately when the lock was acquired
  557. try {
  558. NmStartRpc(LockerNode);
  559. if (AssumeLockerWhistler)
  560. {
  561. //SS: the sequence number will protect if the locker has gone down
  562. //and come back up since we got the lock and tried to release it
  563. Status = GumUnlockUpdate2(
  564. GumpRpcBindings[LockerNode],
  565. UpdateType,
  566. Sequence,
  567. MyNodeId,
  568. GenerationNum
  569. );
  570. }
  571. else
  572. {
  573. Status = GumUnlockUpdate(
  574. GumpRpcBindings[LockerNode],
  575. UpdateType,
  576. Sequence);
  577. }
  578. NmEndRpc(LockerNode);
  579. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  580. //
  581. // The locker node has crashed. Notify the NM, it will call our
  582. // notification routine to select a new locker node. Then retry
  583. // the unlock on the new locker node.
  584. // SS: changed to not retry unlocks..the new locker node will
  585. // unlock after propagating this change in any case.
  586. //
  587. NmEndRpc(LockerNode);
  588. Status = GetExceptionCode();
  589. ClRtlLogPrint(LOG_CRITICAL,
  590. "[GUM] GumSendUpdate: Unlocking update to node %1!d! failed with %2!d!\n",
  591. LockerNode,
  592. Status);
  593. GumpCommFailure(GumInfo,
  594. LockerNode,
  595. Status,
  596. TRUE);
  597. CL_ASSERT(LockerNode != GumpLockerNode);
  598. }
  599. if(Status != RPC_S_OK) {
  600. NmDumpRpcExtErrorInfo(Status);
  601. }
  602. }
  603. ClRtlLogPrint(LOG_NOISE,
  604. "[GUM] GumSendUpdate: completed update seq %1!u!\ttype %2!u! context %3!u!\n",
  605. Sequence,
  606. UpdateType,
  607. Context);
  608. // signal end of RPC handle
  609. GumpEndRpc(MyNodeId);
  610. if (AsyncState.u.hEvent != NULL) {
  611. CloseHandle(AsyncState.u.hEvent);
  612. }
  613. return(ERROR_SUCCESS);
  614. } // GumSendUpdateReturnInfo
  615. #ifdef GUM_POST_SUPPORT
  616. John Vert (jvert) 11/18/1996
  617. POST is disabled for now since nobody uses it.
  618. N.B. The below code does not handle locker node failures
  619. DWORD
  620. WINAPI
  621. GumPostUpdate(
  622. IN GUM_UPDATE_TYPE UpdateType,
  623. IN DWORD Context,
  624. IN DWORD BufferLength,
  625. IN PVOID Buffer // THIS WILL BE FREED
  626. )
  627. /*++
  628. Routine Description:
  629. Posts an update to all active nodes in the cluster. All
  630. registered update handlers for the specified UpdateType
  631. are called on each node. The update will not be reported
  632. on the current node. The update will not necessarily have
  633. completed when this function returns, but will complete
  634. eventually if the current node does not fail.
  635. Arguments:
  636. UpdateType - Supplies the type of update. This determines
  637. which update handlers will be called and the sequence
  638. number to be used.
  639. Context - Supplies a DWORD of context to be passed to the
  640. GUM update handlers
  641. BufferLength - Supplies the length of the update buffer to
  642. be passed to the update handlers
  643. Buffer - Supplies a pointer to the update buffer to be passed
  644. to the update handlers. THIS BUFFER WILL BE FREED ONCE THE
  645. POST HAS COMPLETED.
  646. Return Value:
  647. ERROR_SUCCESS if the request is successful.
  648. Win32 error code on failure.
  649. --*/
  650. {
  651. DWORD Sequence;
  652. DWORD Status;
  653. DWORD i;
  654. BOOL IsLocker = TRUE;
  655. PGUM_INFO GumInfo;
  656. DWORD MyNodeId;
  657. DWORD LockerNode=(DWORD)-1;
  658. CL_ASSERT(UpdateType < GumUpdateMaximum);
  659. GumInfo = &GumTable[UpdateType];
  660. MyNodeId = NmGetNodeId(NmLocalNode);
  661. //
  662. // Find the lowest active node in the cluster. This is the
  663. // locker.
  664. for (i=ClusterMinNodeId; i <= NmMaxNodeId; i++) {
  665. if (GumInfo->ActiveNode[i]) {
  666. LockerNode = i;
  667. break;
  668. }
  669. }
  670. CL_ASSERT(i <= NmMaxNodeId);
  671. //
  672. // Post a locking update to the locker node. If this succeeds
  673. // immediately, we can go do the work directly. If it pends,
  674. // the locker node will call us back when it is our turn to
  675. // make the updates.
  676. //
  677. if (i == MyNodeId) {
  678. //
  679. // This node is the locker.
  680. //
  681. ClRtlLogPrint(LOG_NOISE,
  682. "[GUM] GumPostUpdate: Locker waiting\t\ttype %1!u! context %2!u!\n",
  683. UpdateType,
  684. Context);
  685. Status = GumpDoLockingPost(UpdateType,
  686. MyNodeId,
  687. &Sequence,
  688. Context,
  689. BufferLength,
  690. (DWORD)Buffer,
  691. Buffer);
  692. if (Status == ERROR_SUCCESS) {
  693. //
  694. // Update our sequence number so we stay in sync, even though
  695. // we aren't dispatching the update.
  696. //
  697. GumpSequence += 1;
  698. }
  699. } else {
  700. CL_ASSERT(GumpRpcBindings[i] != NULL);
  701. ClRtlLogPrint(LOG_NOISE,
  702. "[GUM] GumPostUpdate: queuing update\ttype %1!u! context %2!u!\n",
  703. UpdateType,
  704. Context);
  705. Status = GumQueueLockingPost(GumpRpcBindings[i],
  706. MyNodeId,
  707. UpdateType,
  708. Context,
  709. &Sequence,
  710. BufferLength,
  711. Buffer,
  712. (DWORD)Buffer);
  713. if (Status == ERROR_SUCCESS) {
  714. CL_ASSERT(Sequence == GumpSequence);
  715. }
  716. }
  717. if (Status == ERROR_SUCCESS) {
  718. //
  719. // The lock was immediately acquired, go ahead and post directly
  720. // here.
  721. //
  722. GumpDeliverPosts(LockerNode+1,
  723. UpdateType,
  724. Sequence,
  725. Context,
  726. BufferLength,
  727. Buffer);
  728. //
  729. // All nodes have been updated. Send unlocking update.
  730. //
  731. if (LockerNode == MyNodeId) {
  732. GumpDoUnlockingUpdate(UpdateType, Sequence);
  733. } else {
  734. GumUnlockUpdate(
  735. GumpRpcBindings[LockerNode],
  736. UpdateType,
  737. Sequence
  738. );
  739. }
  740. ClRtlLogPrint(LOG_NOISE,
  741. "[GUM] GumPostUpdate: completed update seq %1!u!\ttype %2!u! context %3!u!\n",
  742. Sequence,
  743. UpdateType,
  744. Context);
  745. return(ERROR_SUCCESS);
  746. } else {
  747. //
  748. // The lock is currently held. We will get called back when it is released
  749. //
  750. ClRtlLogPrint(LOG_NOISE,
  751. "[GUM] GumPostUpdate: pending update type %1!u! context %2!u!\n",
  752. UpdateType,
  753. Context);
  754. return(ERROR_IO_PENDING);
  755. }
  756. }
  757. VOID
  758. GumpDeliverPosts(
  759. IN DWORD FirstNodeId,
  760. IN GUM_UPDATE_TYPE UpdateType,
  761. IN DWORD Sequence,
  762. IN DWORD Context,
  763. IN DWORD BufferLength,
  764. IN PVOID Buffer // THIS WILL BE FREED
  765. )
  766. /*++
  767. Routine Description:
  768. Actually delivers the update post to the specified nodes.
  769. The GUM lock is assumed to be held.
  770. Arguments:
  771. FirstNodeId - Supplies the node ID where the posts should start.
  772. This is generally the LockerNode+1.
  773. UpdateType - Supplies the type of update. This determines
  774. which update handlers will be called and the sequence
  775. number to be used.
  776. Context - Supplies a DWORD of context to be passed to the
  777. GUM update handlers
  778. BufferLength - Supplies the length of the update buffer to
  779. be passed to the update handlers
  780. Buffer - Supplies a pointer to the update buffer to be passed
  781. to the update handlers. THIS BUFFER WILL BE FREED ONCE THE
  782. POST HAS COMPLETED.
  783. Return Value:
  784. None.
  785. --*/
  786. {
  787. DWORD i;
  788. PGUM_INFO GumInfo;
  789. DWORD MyNodeId;
  790. GumInfo = &GumTable[UpdateType];
  791. MyNodeId = NmGetNodeId(NmLocalNode);
  792. for (i=FirstNodeId; i<=NmMaxNodeId; i++) {
  793. if (GumInfo->ActiveNode[i]) {
  794. //
  795. // Dispatch the update to the specified node.
  796. //
  797. ClRtlLogPrint(LOG_NOISE,
  798. "[GUM] GumpDeliverPosts: Dispatching seq %1!u!\ttype %2!u! context %3!u! to node %4!d!\n",
  799. Sequence,
  800. UpdateType,
  801. Context,
  802. i);
  803. if (i == MyNodeId) {
  804. //
  805. // Update our sequence number so we stay in sync, even though
  806. // we aren't dispatching the update.
  807. //
  808. GumpSequence += 1;
  809. } else {
  810. CL_ASSERT(GumpRpcBindings[i] != NULL);
  811. ClRtlLogPrint(LOG_NOISE,
  812. "[GUM] GumpDeliverPosts: Locker updating seq %1!u!\ttype %2!u! context %3!u!\n",
  813. Sequence,
  814. UpdateType,
  815. Context);
  816. GumUpdateNode(GumpRpcBindings[i],
  817. UpdateType,
  818. Context,
  819. Sequence,
  820. BufferLength,
  821. Buffer);
  822. }
  823. }
  824. }
  825. LocalFree(Buffer);
  826. }
  827. #endif
  828. DWORD
  829. WINAPI
  830. GumAttemptUpdate(
  831. IN DWORD Sequence,
  832. IN GUM_UPDATE_TYPE UpdateType,
  833. IN DWORD Context,
  834. IN DWORD BufferLength,
  835. IN PVOID Buffer
  836. )
  837. /*++
  838. Routine Description:
  839. Conditionally sends an update to all active nodes in the
  840. cluster. If the clusterwise sequence number matches the supplied
  841. sequence number, all registered update handlers for the specified
  842. UpdateType are called on each node. Any registered update handlers
  843. for the current node will be called on the same thread. This is
  844. useful for correct synchronization of the data structures to be updated.
  845. The normal usage of this routine is as follows:
  846. obtain current sequence number from GumGetCurrentSequence
  847. make modification to cluster state
  848. conditionally update cluster state with GumAttemptUpdate
  849. If update fails, undo modification, release any locks, try again later
  850. Arguments:
  851. Sequence - Supplies the sequence number obtained from GumGetCurrentSequence.
  852. UpdateType - Supplies the type of update. This determines which update handlers
  853. will be called
  854. Context - Supplies a DWORD of context to be passed to the
  855. GUM update handlers
  856. BufferLength - Supplies the length of the update buffer to be passed to the
  857. update handlers
  858. Buffer - Supplies a pointer to the update buffer to be passed to the update
  859. handlers.
  860. Return Value:
  861. ERROR_SUCCESS if the request is successful.
  862. Win32 error code on failure.
  863. --*/
  864. {
  865. DWORD Status=RPC_S_OK;
  866. DWORD i;
  867. PGUM_INFO GumInfo;
  868. DWORD MyNodeId;
  869. DWORD LockerNode=(DWORD)-1;
  870. RPC_ASYNC_STATE AsyncState;
  871. DWORD dwGenerationNum; //the generation id of the node at which the lock is acquired
  872. CL_ASSERT(UpdateType < GumUpdateMaximum);
  873. ZeroMemory((PVOID) &AsyncState, sizeof(RPC_ASYNC_STATE));
  874. AsyncState.u.hEvent = CreateEvent(
  875. NULL, // no attributes
  876. TRUE, // manual reset
  877. FALSE, // initial state unsignalled
  878. NULL // no object name
  879. );
  880. if (AsyncState.u.hEvent == NULL) {
  881. Status = GetLastError();
  882. ClRtlLogPrint(LOG_CRITICAL,
  883. "[GUM] GumAttemptUpdate: Failed to allocate event object for "
  884. "async RPC call, status %1!u!\n",
  885. Status
  886. );
  887. return (Status);
  888. }
  889. GumInfo = &GumTable[UpdateType];
  890. MyNodeId = NmGetNodeId(NmLocalNode);
  891. retryLock:
  892. LockerNode = GumpLockerNode;
  893. //
  894. // Send locking update to the locker node.
  895. //
  896. if (LockerNode == MyNodeId)
  897. {
  898. //
  899. // This node is the locker.
  900. //
  901. ClRtlLogPrint(LOG_NOISE,
  902. "[GUM] GumAttemptUpdate: Locker waiting\t\ttype %1!u! context %2!u!\n",
  903. UpdateType,
  904. Context);
  905. if (GumpTryLockingUpdate(UpdateType, MyNodeId, Sequence, &dwGenerationNum))
  906. {
  907. ClRtlLogPrint(LOG_NOISE,
  908. "[GUM] GumAttemptUpdate: Locker dispatching seq %1!u!\ttype %2!u! context %3!u!\n",
  909. Sequence,
  910. UpdateType,
  911. Context);
  912. Status = GumpDispatchUpdate(UpdateType,
  913. Context,
  914. TRUE,
  915. TRUE,
  916. BufferLength,
  917. Buffer);
  918. if (Status != ERROR_SUCCESS) {
  919. //
  920. // Note we have to use Sequence-1 for the unlock because GumpDispatchUpdate
  921. // failed and did not increment the sequence number.
  922. //
  923. GumpDoUnlockingUpdate(UpdateType, Sequence-1, MyNodeId, dwGenerationNum);
  924. }
  925. }
  926. else
  927. {
  928. Status = ERROR_CLUSTER_DATABASE_SEQMISMATCH;
  929. }
  930. }
  931. else
  932. {
  933. //
  934. //send the locking update to the locker node
  935. ClRtlLogPrint(LOG_NOISE,
  936. "[GUM] GumAttemptUpdate: queuing update\ttype %1!u! context %2!u!\n",
  937. UpdateType,
  938. Context);
  939. try {
  940. NmStartRpc(LockerNode);
  941. Status = GumAttemptLockingUpdate(GumpRpcBindings[LockerNode],
  942. MyNodeId,
  943. UpdateType,
  944. Context,
  945. Sequence,
  946. BufferLength,
  947. Buffer);
  948. NmEndRpc(LockerNode);
  949. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  950. //
  951. // An exception from RPC indicates that the other node is either dead
  952. // or insane. Kill it and retry with a new locker.
  953. //
  954. NmEndRpc(LockerNode);
  955. GumpCommFailure(GumInfo,
  956. LockerNode,
  957. GetExceptionCode(),
  958. TRUE);
  959. //
  960. // The GUM update handler must have been called to select a new locker
  961. // node.
  962. //
  963. CL_ASSERT(LockerNode != GumpLockerNode);
  964. //
  965. // Retry the locking update with the new locker node.
  966. //
  967. goto retryLock;
  968. }
  969. if (Status == ERROR_SUCCESS)
  970. {
  971. CL_ASSERT(Sequence == GumpSequence);
  972. }
  973. if(Status != RPC_S_OK) {
  974. NmDumpRpcExtErrorInfo(Status);
  975. }
  976. }
  977. if (Status != ERROR_SUCCESS) {
  978. ClRtlLogPrint(LOG_UNUSUAL,
  979. "[GUM] GumAttemptUpdate: Queued lock attempt for send type %1!d! failed %2!d!\n",
  980. UpdateType,
  981. Status);
  982. return(Status);
  983. }
  984. //
  985. // Grap the sendupdate lock to serialize with any replays
  986. //
  987. EnterCriticalSection(&GumpSendUpdateLock);
  988. if (LockerNode != GumpLockerNode) {
  989. //
  990. // Locker node changed, we need to restart again.
  991. //
  992. LeaveCriticalSection(&GumpSendUpdateLock);
  993. goto retryLock;
  994. }
  995. // The update is now committed on the locker node. All remaining nodes
  996. // must be updated successfully, or they will be killed.
  997. //
  998. for (i=LockerNode+1; i != LockerNode; i++)
  999. {
  1000. if (i == (NmMaxNodeId + 1))
  1001. {
  1002. i=ClusterMinNodeId;
  1003. if (i==LockerNode)
  1004. {
  1005. break;
  1006. }
  1007. }
  1008. if (GumInfo->ActiveNode[i])
  1009. {
  1010. //
  1011. // Dispatch the update to the specified node.
  1012. //
  1013. ClRtlLogPrint(LOG_NOISE,
  1014. "[GUM] GumAttemptUpdate: Dispatching seq %1!u!\ttype %2!u! context %3!u! to node %4!d!\n",
  1015. Sequence,
  1016. UpdateType,
  1017. Context,
  1018. i);
  1019. if (i == MyNodeId) {
  1020. Status = GumpDispatchUpdate(UpdateType,
  1021. Context,
  1022. FALSE,
  1023. TRUE,
  1024. BufferLength,
  1025. Buffer);
  1026. if (Status != ERROR_SUCCESS){
  1027. ClRtlLogPrint(LOG_CRITICAL,
  1028. "[GUM] GumAttemptUpdate: Update on non-locker node(self) failed with %1!d! when it must succeed\n",
  1029. Status);
  1030. //Commit Suicide
  1031. CsInconsistencyHalt(Status);
  1032. }
  1033. } else {
  1034. DWORD dwStatus;
  1035. ClRtlLogPrint(LOG_NOISE,
  1036. "[GUM] GumAttemptUpdate: Locker updating seq %1!u!\ttype %2!u! context %3!u!\n",
  1037. Sequence,
  1038. UpdateType,
  1039. Context);
  1040. dwStatus = GumpUpdateRemoteNode(
  1041. &AsyncState,
  1042. i,
  1043. UpdateType,
  1044. Context,
  1045. Sequence,
  1046. BufferLength,
  1047. Buffer
  1048. );
  1049. //
  1050. // If the update on the other node failed, then the
  1051. // other node must now be out of the cluster since the
  1052. // update has already completed on the locker node.
  1053. //
  1054. if (dwStatus != ERROR_SUCCESS) {
  1055. ClRtlLogPrint(LOG_CRITICAL,
  1056. "[GUM] GumAttemptUpdate: Update on node %1!d! failed with %2!d! when it must succeed\n",
  1057. i,
  1058. dwStatus);
  1059. NmDumpRpcExtErrorInfo(dwStatus);
  1060. GumpCommFailure(GumInfo,
  1061. i,
  1062. dwStatus,
  1063. TRUE);
  1064. }
  1065. }
  1066. }
  1067. }
  1068. //
  1069. // Our update is over
  1070. //
  1071. LeaveCriticalSection(&GumpSendUpdateLock);
  1072. //
  1073. // All nodes have been updated. Send unlocking update.
  1074. //
  1075. if (LockerNode == MyNodeId) {
  1076. GumpDoUnlockingUpdate(UpdateType, Sequence, MyNodeId, dwGenerationNum);
  1077. } else {
  1078. try {
  1079. NmStartRpc(LockerNode);
  1080. Status = GumUnlockUpdate(
  1081. GumpRpcBindings[LockerNode],
  1082. UpdateType,
  1083. Sequence
  1084. );
  1085. NmEndRpc(LockerNode);
  1086. } except (I_RpcExceptionFilter(RpcExceptionCode())) {
  1087. //
  1088. // The locker node has crashed. Notify the NM, it will call our
  1089. // notification routine to select a new locker node. The new
  1090. // locker node will release the gum lock after propagating
  1091. // the current update.
  1092. //
  1093. NmEndRpc(LockerNode);
  1094. Status = GetExceptionCode();
  1095. ClRtlLogPrint(LOG_CRITICAL,
  1096. "[GUM] GumAttemptUpdate: Unlocking update to node %1!d! failed with %2!d!\n",
  1097. LockerNode,
  1098. Status);
  1099. GumpCommFailure(GumInfo,
  1100. LockerNode,
  1101. Status,
  1102. TRUE);
  1103. CL_ASSERT(LockerNode != GumpLockerNode);
  1104. }
  1105. if(Status != RPC_S_OK) {
  1106. NmDumpRpcExtErrorInfo(Status);
  1107. }
  1108. }
  1109. ClRtlLogPrint(LOG_NOISE,
  1110. "[GUM] GumAttemptUpdate: completed update seq %1!u!\ttype %2!u! context %3!u!\n",
  1111. Sequence,
  1112. UpdateType,
  1113. Context);
  1114. if (AsyncState.u.hEvent != NULL) {
  1115. CloseHandle(AsyncState.u.hEvent);
  1116. }
  1117. return(ERROR_SUCCESS);
  1118. }
  1119. DWORD
  1120. WINAPI
  1121. GumGetCurrentSequence(
  1122. IN GUM_UPDATE_TYPE UpdateType
  1123. )
  1124. /*++
  1125. Routine Description:
  1126. Obtains the current clusterwise global update sequence number
  1127. Arguments:
  1128. UpdateType - Supplies the type of update. Each update type may
  1129. have an independent sequence number.
  1130. Return Value:
  1131. Current global update sequence number for the specified update type.
  1132. --*/
  1133. {
  1134. CL_ASSERT(UpdateType < GumUpdateMaximum);
  1135. return(GumpSequence);
  1136. }
  1137. VOID
  1138. GumSetCurrentSequence(
  1139. IN GUM_UPDATE_TYPE UpdateType,
  1140. DWORD Sequence
  1141. )
  1142. /*++
  1143. Routine Description:
  1144. Sets the current sequence for the specified global update.
  1145. Arguments:
  1146. UpdateType - Supplies the update type whose sequence is to be updated.
  1147. Sequence - Supplies the new sequence number.
  1148. Return Value:
  1149. None.
  1150. --*/
  1151. {
  1152. CL_ASSERT(UpdateType < GumUpdateMaximum);
  1153. GumpSequence = Sequence;
  1154. }
  1155. VOID
  1156. GumCommFailure(
  1157. IN GUM_UPDATE_TYPE UpdateType,
  1158. IN DWORD NodeId,
  1159. IN DWORD ErrorCode,
  1160. IN BOOL Wait
  1161. )
  1162. /*++
  1163. Routine Description:
  1164. Informs the NM that a fatal communication error has occurred trying
  1165. to talk to another node.
  1166. Arguments:
  1167. GumInfo - Supplies the update type where the communication failure occurred.
  1168. NodeId - Supplies the node id of the other node.
  1169. ErrorCode - Supplies the error that was returned from RPC
  1170. Wait - if TRUE, this function blocks until the GUM event handler has
  1171. processed the NodeDown notification for the specified node.
  1172. if FALSE, this function returns immediately after notifying NM
  1173. Return Value:
  1174. None.
  1175. --*/
  1176. {
  1177. PGUM_INFO GumInfo = &GumTable[UpdateType];
  1178. ClRtlLogPrint(LOG_CRITICAL,
  1179. "[GUM] GumCommFailure %1!d! communicating with node %2!d!\n",
  1180. ErrorCode,
  1181. NodeId);
  1182. GumpCommFailure(GumInfo, NodeId, ErrorCode, Wait);
  1183. }
  1184. VOID
  1185. GumpCommFailure(
  1186. IN PGUM_INFO GumInfo,
  1187. IN DWORD NodeId,
  1188. IN DWORD ErrorCode,
  1189. IN BOOL Wait
  1190. )
  1191. /*++
  1192. Routine Description:
  1193. Informs the NM that a fatal communication error has occurred trying
  1194. to talk to another node.
  1195. Arguments:
  1196. GumInfo - Supplies the update type where the communication failure occurred.
  1197. NodeId - Supplies the node id of the other node.
  1198. ErrorCode - Supplies the error that was returned from RPC
  1199. Wait - if TRUE, this function blocks until the GUM event handler has
  1200. processed the NodeDown notification for the specified node.
  1201. if FALSE, this function returns immediately after notifying NM
  1202. Return Value:
  1203. None.
  1204. --*/
  1205. {
  1206. DWORD dwCur;
  1207. ClRtlLogPrint(LOG_CRITICAL,
  1208. "[GUM] GumpCommFailure %1!d! communicating with node %2!d!\n",
  1209. ErrorCode,
  1210. NodeId);
  1211. // This is the general GUM RPC failure path, let's dump the extended error info.
  1212. // NOTE: The dumping routine is benign, so calling this from a non RPC failure path would just return.
  1213. NmDumpRpcExtErrorInfo(ErrorCode);
  1214. // This is a hack to check if we are shutting down. See bug 88411
  1215. if (ErrorCode == ERROR_SHUTDOWN_IN_PROGRESS) {
  1216. // if we are shutting down, just kill self
  1217. // set to our node id
  1218. NodeId = NmGetNodeId(NmLocalNode);
  1219. }
  1220. //
  1221. // Get current generation number
  1222. //
  1223. if (Wait) {
  1224. dwCur = GumpGetNodeGenNum(GumInfo, NodeId);
  1225. }
  1226. NmAdviseNodeFailure(NodeId, ErrorCode);
  1227. if (Wait) {
  1228. //
  1229. // Wait for this node to be declared down and
  1230. // GumpEventHandler to mark it as inactive.
  1231. //
  1232. GumpWaitNodeDown(NodeId, dwCur);
  1233. }
  1234. }