Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1161 lines
31 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. reconnect.c
  5. Abstract:
  6. Implements the support to enable the cluster API to transparently reconnect
  7. to a cluster when the node that the connection was made to fails.
  8. This module contains wrappers for all the cluster RPC interfaces defined in
  9. api_rpc.idl. These wrappers filter out communication errors and attempt to
  10. reconnect to the cluster when a communication error occurs. This allows the
  11. caller to be completely ignorant of any node failures.
  12. Author:
  13. John Vert (jvert) 9/24/1996
  14. Revision History:
  15. --*/
  16. #include "clusapip.h"
  17. //
  18. // Local function prototypes
  19. //
  20. DWORD
  21. ReconnectKeys(
  22. IN PCLUSTER Cluster
  23. );
  24. DWORD
  25. ReopenKeyWorker(
  26. IN PCKEY Key
  27. );
  28. DWORD
  29. ReconnectResources(
  30. IN PCLUSTER Cluster
  31. );
  32. DWORD
  33. ReconnectGroups(
  34. IN PCLUSTER Cluster
  35. );
  36. DWORD
  37. ReconnectNodes(
  38. IN PCLUSTER Cluster
  39. );
  40. DWORD
  41. ReconnectNetworks(
  42. IN PCLUSTER Cluster
  43. );
  44. DWORD
  45. ReconnectNetInterfaces(
  46. IN PCLUSTER Cluster
  47. );
  48. DWORD
  49. ReconnectNotifySessions(
  50. IN PCLUSTER Cluster
  51. );
  52. DWORD
  53. ReconnectCandidate(
  54. IN PCLUSTER Cluster,
  55. IN DWORD dwIndex,
  56. OUT PBOOL pIsContinue
  57. );
  58. DWORD
  59. ReconnectCluster(
  60. IN PCLUSTER Cluster,
  61. IN DWORD Error,
  62. IN DWORD Generation
  63. )
  64. /*++
  65. Routine Description:
  66. Attempts to reconnect to the specified cluster. The supplied
  67. error code is checked against RPC errors that indicate the
  68. server on the other end is unavailable. If it matches, a
  69. reconnect is attempted.
  70. Arguments:
  71. Cluster - Supplies the cluster.
  72. Error - Supplies the error returned from RPC.
  73. Generation - Supplies the cluster connection generation that
  74. was in effect when the error occurred.
  75. Return Value:
  76. ERROR_SUCCESS if the reconnect was successful and the RPC should
  77. be retried
  78. Win32 error code otherwise.
  79. --*/
  80. {
  81. //
  82. // filter out all RPC errors that might indicate the connection
  83. // has dropped.
  84. //
  85. switch (Error) {
  86. case RPC_S_CALL_FAILED:
  87. case ERROR_INVALID_HANDLE:
  88. case RPC_S_INVALID_BINDING:
  89. case RPC_S_SERVER_UNAVAILABLE:
  90. case RPC_S_SERVER_TOO_BUSY:
  91. case RPC_S_UNKNOWN_IF:
  92. case RPC_S_CALL_FAILED_DNE:
  93. case RPC_X_SS_IN_NULL_CONTEXT:
  94. case ERROR_CLUSTER_NODE_SHUTTING_DOWN:
  95. case EPT_S_NOT_REGISTERED:
  96. case ERROR_CLUSTER_NODE_NOT_READY:
  97. case RPC_S_UNKNOWN_AUTHN_SERVICE:
  98. TIME_PRINT(("Reconnect Cluster - reconnecting on Error %d\n",Error));
  99. break;
  100. default:
  101. //
  102. // Anything else we don't know how to deal with, so return
  103. // the error directly.
  104. //
  105. return(Error);
  106. }
  107. //
  108. // Attempt to reconnect the cluster.
  109. //
  110. if ((Cluster->Flags & CLUS_DEAD) ||
  111. (Cluster->Flags & CLUS_LOCALCONNECT)) {
  112. //
  113. // Don't bother trying to reconnect. Either we've already
  114. // declared the cluster dead, or the connection was over
  115. // LPC (to the local machine) and we do not necessarily want
  116. // to try to reconnect.
  117. //
  118. if (Cluster->Flags & CLUS_LOCALCONNECT)
  119. Cluster->Flags |= CLUS_DEAD;
  120. TIME_PRINT(("ReconnectCluster - Cluster dead or local, giving up - error %d\n",Error));
  121. return(Error);
  122. }
  123. if (Generation < Cluster->Generation) {
  124. //
  125. // We have already successfully reconnected since the error occurred,
  126. // so retry immediately.
  127. //
  128. TIME_PRINT(("ReconnectCluster - Generation %d < Current %d, retrying\n",
  129. Generation,
  130. Cluster->Generation));
  131. return(ERROR_SUCCESS);
  132. }
  133. EnterCriticalSection(&Cluster->Lock);
  134. //
  135. // Check again for cluster death, in case the previous owner
  136. // of the lock declared the cluster dead.
  137. //
  138. if (Cluster->Flags & CLUS_DEAD) {
  139. TIME_PRINT(("ReconnectCluster - Cluster dead or local, giving up - error %d\n",Error));
  140. LeaveCriticalSection(&Cluster->Lock);
  141. return(Error);
  142. }
  143. if (Generation < Cluster->Generation) {
  144. //
  145. // We have already reconnected since the error occurred,
  146. // so retry immediately.
  147. //
  148. Error = ERROR_SUCCESS;
  149. TIME_PRINT(("ReconnectCluster - Generation %d < Current %d, retrying\n",
  150. Generation,
  151. Cluster->Generation));
  152. } else {
  153. DWORD i, CurrentConnectionIndex = -1;
  154. BOOL IsContinue = TRUE;
  155. for (i=0; i<Cluster->ReconnectCount; i++) {
  156. if (Cluster->Reconnect[i].IsCurrent) {
  157. //
  158. // This is something we've already connected to and
  159. // it's obviously gone, so skip this node.
  160. //
  161. TIME_PRINT(("ReconnectCluster - skipping current %ws\n",
  162. Cluster->Reconnect[i].Name));
  163. CurrentConnectionIndex = i;
  164. continue;
  165. }
  166. if (!Cluster->Reconnect[i].IsUp) {
  167. //
  168. // skip this candidate, it is not up.
  169. //
  170. // BUGBUG John Vert (jvert) 11/14/1996
  171. // We could do another pass through the list if all
  172. // the nodes that we think are up fail.
  173. //
  174. TIME_PRINT(("ReconnectCluster - skipping down node %ws\n",
  175. Cluster->Reconnect[i].Name));
  176. continue;
  177. }
  178. //
  179. // Chittur Subbaraman (chitturs) - 08/29/1998
  180. //
  181. // Try to reconnect to the cluster using a candidate
  182. //
  183. Error = ReconnectCandidate ( Cluster, i, &IsContinue );
  184. if (Error == ERROR_SUCCESS) {
  185. //
  186. // Chittur Subbaraman (chitturs) - 08/29/1998
  187. //
  188. // Break out of the loop and return if you
  189. // succeed in reconnecting
  190. //
  191. break;
  192. }
  193. if (IsContinue == FALSE) {
  194. //
  195. // Chittur Subbaraman (chitturs) - 08/29/1998
  196. //
  197. // Exit immediately if you encounter an error
  198. // that will not let you proceed any further
  199. //
  200. TIME_PRINT(("ReconnectCluster unable to continue - Exiting with code %d\n", Error));
  201. goto error_exit;
  202. }
  203. }
  204. if (Error != ERROR_SUCCESS) {
  205. //
  206. // Chittur Subbaraman (chitturs) - 08/29/98
  207. //
  208. // Try reconnecting with the current candidate (which
  209. // you skipped before), if the CurrentConnectionIndex
  210. // is valid and the party is up. This is required
  211. // in the case of a 1 node cluster in which the
  212. // client takes the cluster group offline. In this
  213. // case, the current candidate (i.e., the node) is
  214. // valid and the client should be able to retry and
  215. // reconnect to the node.
  216. //
  217. if ((CurrentConnectionIndex != -1) &&
  218. (Cluster->Reconnect[CurrentConnectionIndex].IsUp)) {
  219. Error = ReconnectCandidate (Cluster,
  220. CurrentConnectionIndex,
  221. &IsContinue);
  222. if ((Error != ERROR_SUCCESS) &&
  223. (IsContinue == FALSE)) {
  224. //
  225. // Chittur Subbaraman (chitturs) - 08/29/1998
  226. //
  227. // Exit immediately if you encounter an error
  228. // that will not let you proceed any further
  229. //
  230. TIME_PRINT(("ReconnectCluster - unable to continue for current party %ws - Exiting with code %d\n",
  231. Cluster->Reconnect[CurrentConnectionIndex].Name, Error));
  232. goto error_exit;
  233. }
  234. } else {
  235. TIME_PRINT(("ReconnectCluster - unable to retry for current party %ws - Error %d\n",
  236. Cluster->Reconnect[CurrentConnectionIndex].Name, Error));
  237. }
  238. if (Error != ERROR_SUCCESS) {
  239. TIME_PRINT(("ReconnectCluster - all reconnects failed, giving up - error %d\n", Error));
  240. Cluster->Flags |= CLUS_DEAD;
  241. }
  242. }
  243. }
  244. error_exit:
  245. LeaveCriticalSection(&Cluster->Lock);
  246. return(Error);
  247. }
  248. DWORD
  249. ReconnectKeys(
  250. IN PCLUSTER Cluster
  251. )
  252. /*++
  253. Routine Description:
  254. Reopens all cluster registry keys after a reconnect
  255. Arguments:
  256. Cluster - Supplies the cluster to be reconnected.
  257. Return Value:
  258. ERROR_SUCCESS if successful
  259. Win32 error code otherwise
  260. --*/
  261. {
  262. PLIST_ENTRY ListEntry;
  263. PCKEY Key;
  264. DWORD Status;
  265. ListEntry = Cluster->KeyList.Flink;
  266. while (ListEntry != &Cluster->KeyList) {
  267. //
  268. // Each key in the cluster's list represents the
  269. // root of a registry tree.
  270. //
  271. Key = CONTAINING_RECORD(ListEntry,
  272. CKEY,
  273. ParentList);
  274. ListEntry = ListEntry->Flink;
  275. Status = ReopenKeyWorker(Key);
  276. if (Status != ERROR_SUCCESS) {
  277. return(Status);
  278. }
  279. }
  280. return(ERROR_SUCCESS);
  281. }
  282. DWORD
  283. ReopenKeyWorker(
  284. IN PCKEY Key
  285. )
  286. /*++
  287. Routine Description:
  288. Recursive worker routine for opening a key and all its children.
  289. Arguments:
  290. Key - Supplies the root key to reopen.
  291. Return Value:
  292. ERROR_SUCCESS if successful.
  293. Win32 error code otherwise
  294. --*/
  295. {
  296. PLIST_ENTRY ListEntry;
  297. PCKEY Child;
  298. DWORD Status = ERROR_GEN_FAILURE;
  299. BOOL CloseAfterOpen;
  300. if (Key->RemoteKey != NULL) {
  301. //
  302. // Destroy the old context
  303. //
  304. Status = MyRpcSmDestroyClientContext(Key->Cluster, &Key->RemoteKey);
  305. if (Status != ERROR_SUCCESS) {
  306. TIME_PRINT(("ReopenKeyWorker - RpcSmDestroyClientContext failed Error %d\n",Status));
  307. }
  308. CloseAfterOpen = FALSE;
  309. } else {
  310. CloseAfterOpen = TRUE;
  311. }
  312. //
  313. // Next, reopen this key.
  314. //
  315. if (Key->Parent == NULL) {
  316. Key->RemoteKey = ApiGetRootKey(Key->Cluster->RpcBinding,
  317. Key->SamDesired,
  318. &Status);
  319. } else {
  320. Key->RemoteKey = ApiOpenKey(Key->Parent->RemoteKey,
  321. Key->RelativeName,
  322. Key->SamDesired,
  323. &Status);
  324. }
  325. if (Key->RemoteKey == NULL) {
  326. return(Status);
  327. }
  328. //
  329. // Now open all this keys children recursively.
  330. //
  331. ListEntry = Key->ChildList.Flink;
  332. while (ListEntry != &Key->ChildList) {
  333. Child = CONTAINING_RECORD(ListEntry,
  334. CKEY,
  335. ParentList);
  336. ListEntry = ListEntry->Flink;
  337. Status = ReopenKeyWorker(Child);
  338. if (Status != ERROR_SUCCESS) {
  339. return(Status);
  340. }
  341. }
  342. //
  343. // If the key had been closed and was just kept around to do the reopens, close it
  344. // now as the reopens are done.
  345. //
  346. if (CloseAfterOpen) {
  347. ApiCloseKey(&Key->RemoteKey);
  348. }
  349. return(ERROR_SUCCESS);
  350. }
  351. DWORD
  352. ReconnectResources(
  353. IN PCLUSTER Cluster
  354. )
  355. /*++
  356. Routine Description:
  357. Reopens all cluster resources after a reconnect
  358. Arguments:
  359. Cluster - Supplies the cluster to be reconnected.
  360. Return Value:
  361. ERROR_SUCCESS if successful
  362. Win32 error code otherwise
  363. --*/
  364. {
  365. PLIST_ENTRY ListEntry;
  366. PCRESOURCE Resource;
  367. DWORD Status;
  368. ListEntry = Cluster->ResourceList.Flink;
  369. while (ListEntry != &Cluster->ResourceList) {
  370. Resource = CONTAINING_RECORD(ListEntry,
  371. CRESOURCE,
  372. ListEntry);
  373. ListEntry = ListEntry->Flink;
  374. //
  375. // Close the current RPC handle.
  376. //
  377. TIME_PRINT(("ReconnectResources - destroying context %08lx\n",Resource->hResource));
  378. Status = MyRpcSmDestroyClientContext(Cluster, &Resource->hResource);
  379. if (Status != ERROR_SUCCESS) {
  380. TIME_PRINT(("ReconnectResources - RpcSmDestroyClientContext failed Error %d\n",Status));
  381. }
  382. //
  383. // Open a new RPC handle.
  384. //
  385. Resource->hResource = ApiOpenResource(Cluster->RpcBinding,
  386. Resource->Name,
  387. &Status);
  388. if (Resource->hResource == NULL) {
  389. TIME_PRINT(("ReconnectResources: failed to reopen resource %ws\n",Resource->Name));
  390. return(Status);
  391. }
  392. }
  393. return(ERROR_SUCCESS);
  394. }
  395. DWORD
  396. ReconnectGroups(
  397. IN PCLUSTER Cluster
  398. )
  399. /*++
  400. Routine Description:
  401. Reopens all cluster groups after a reconnect
  402. Arguments:
  403. Cluster - Supplies the cluster to be reconnected.
  404. Return Value:
  405. ERROR_SUCCESS if successful
  406. Win32 error code otherwise
  407. --*/
  408. {
  409. PLIST_ENTRY ListEntry;
  410. PCGROUP Group;
  411. DWORD Status;
  412. ListEntry = Cluster->GroupList.Flink;
  413. while (ListEntry != &Cluster->GroupList) {
  414. Group = CONTAINING_RECORD(ListEntry,
  415. CGROUP,
  416. ListEntry);
  417. ListEntry = ListEntry->Flink;
  418. //
  419. // Close the old RPC handle
  420. //
  421. TIME_PRINT(("ReconnectGroups - destroying context %08lx\n",Group->hGroup));
  422. Status = MyRpcSmDestroyClientContext(Cluster, &Group->hGroup);
  423. if (Status != ERROR_SUCCESS) {
  424. TIME_PRINT(("ReconnectGroups - RpcSmDestroyClientContext failed Error %d\n",Status));
  425. }
  426. //
  427. // Open a new RPC handle.
  428. //
  429. Group->hGroup = ApiOpenGroup(Cluster->RpcBinding,
  430. Group->Name,
  431. &Status);
  432. if (Group->hGroup == NULL) {
  433. return(Status);
  434. }
  435. }
  436. return(ERROR_SUCCESS);
  437. }
  438. DWORD
  439. ReconnectNodes(
  440. IN PCLUSTER Cluster
  441. )
  442. /*++
  443. Routine Description:
  444. Reopens all cluster nodes after a reconnect
  445. Arguments:
  446. Cluster - Supplies the cluster to be reconnected.
  447. Return Value:
  448. ERROR_SUCCESS if successful
  449. Win32 error code otherwise
  450. --*/
  451. {
  452. PLIST_ENTRY ListEntry;
  453. PCNODE Node;
  454. DWORD Status;
  455. ListEntry = Cluster->NodeList.Flink;
  456. while (ListEntry != &Cluster->NodeList) {
  457. Node = CONTAINING_RECORD(ListEntry,
  458. CNODE,
  459. ListEntry);
  460. ListEntry = ListEntry->Flink;
  461. //
  462. // Close the old RPC handle.
  463. //
  464. TIME_PRINT(("ReconnectNodes - destroying context %08lx\n",Node->hNode));
  465. Status = MyRpcSmDestroyClientContext(Cluster, &Node->hNode);
  466. if (Status != ERROR_SUCCESS) {
  467. TIME_PRINT(("ReconnectNodes - RpcSmDestroyClientContext failed Error %d\n",Status));
  468. }
  469. //
  470. // Open a new RPC handle.
  471. //
  472. Node->hNode = ApiOpenNode(Cluster->RpcBinding,
  473. Node->Name,
  474. &Status);
  475. if (Node->hNode == NULL) {
  476. return(Status);
  477. }
  478. }
  479. return(ERROR_SUCCESS);
  480. }
  481. DWORD
  482. ReconnectNetworks(
  483. IN PCLUSTER Cluster
  484. )
  485. /*++
  486. Routine Description:
  487. Reopens all cluster networks after a reconnect
  488. Arguments:
  489. Cluster - Supplies the cluster to be reconnected.
  490. Return Value:
  491. ERROR_SUCCESS if successful
  492. Win32 error code otherwise
  493. --*/
  494. {
  495. PLIST_ENTRY ListEntry;
  496. PCNETWORK Network;
  497. DWORD Status;
  498. ListEntry = Cluster->NetworkList.Flink;
  499. while (ListEntry != &Cluster->NetworkList) {
  500. Network = CONTAINING_RECORD(ListEntry,
  501. CNETWORK,
  502. ListEntry);
  503. ListEntry = ListEntry->Flink;
  504. //
  505. // Close the old RPC handle.
  506. //
  507. TIME_PRINT(("ReconnectNetworks - destroying context %08lx\n",Network->hNetwork));
  508. Status = MyRpcSmDestroyClientContext(Cluster, &Network->hNetwork);
  509. if (Status != ERROR_SUCCESS) {
  510. TIME_PRINT(("ReconnectNetworks - RpcSmDestroyClientContext failed Error %d\n",Status));
  511. }
  512. //
  513. // Open a new RPC handle.
  514. //
  515. Network->hNetwork = ApiOpenNetwork(Cluster->RpcBinding,
  516. Network->Name,
  517. &Status);
  518. if (Network->hNetwork == NULL) {
  519. return(Status);
  520. }
  521. }
  522. return(ERROR_SUCCESS);
  523. }
  524. DWORD
  525. ReconnectNetInterfaces(
  526. IN PCLUSTER Cluster
  527. )
  528. /*++
  529. Routine Description:
  530. Reopens all cluster network interfaces after a reconnect
  531. Arguments:
  532. Cluster - Supplies the cluster to be reconnected.
  533. Return Value:
  534. ERROR_SUCCESS if successful
  535. Win32 error code otherwise
  536. --*/
  537. {
  538. PLIST_ENTRY ListEntry;
  539. PCNETINTERFACE NetInterface;
  540. DWORD Status;
  541. ListEntry = Cluster->NetInterfaceList.Flink;
  542. while (ListEntry != &Cluster->NetInterfaceList) {
  543. NetInterface = CONTAINING_RECORD(ListEntry,
  544. CNETINTERFACE,
  545. ListEntry);
  546. ListEntry = ListEntry->Flink;
  547. //
  548. // Close the old RPC handle.
  549. //
  550. TIME_PRINT(("ReconnectNetInterfaces - destroying context %08lx\n",NetInterface->hNetInterface));
  551. Status = MyRpcSmDestroyClientContext(Cluster, &NetInterface->hNetInterface);
  552. if (Status != ERROR_SUCCESS) {
  553. TIME_PRINT(("ReconnectNetInterfaces - RpcSmDestroyClientContext failed Error %d\n",Status));
  554. }
  555. //
  556. // Open a new RPC handle.
  557. //
  558. NetInterface->hNetInterface = ApiOpenNetInterface(Cluster->RpcBinding,
  559. NetInterface->Name,
  560. &Status);
  561. if (NetInterface->hNetInterface == NULL) {
  562. return(Status);
  563. }
  564. }
  565. return(ERROR_SUCCESS);
  566. }
  567. DWORD
  568. ReconnectNotifySessions(
  569. IN PCLUSTER Cluster
  570. )
  571. /*++
  572. Routine Description:
  573. Reopens all cluster notify sessions after a reconnect
  574. Arguments:
  575. Cluster - Supplies the cluster to be reconnected.
  576. Return Value:
  577. ERROR_SUCCESS if successful
  578. Win32 error code otherwise
  579. --*/
  580. {
  581. PLIST_ENTRY ListEntry, NotifyListEntry;
  582. PCNOTIFY_SESSION Session;
  583. DWORD Status;
  584. PCNOTIFY_PACKET Packet = NULL;
  585. PLIST_ENTRY EventEntry;
  586. PCNOTIFY_EVENT NotifyEvent;
  587. LPCWSTR Name;
  588. ListEntry = Cluster->SessionList.Flink;
  589. while (ListEntry != &Cluster->SessionList) {
  590. Session = CONTAINING_RECORD(ListEntry,
  591. CNOTIFY_SESSION,
  592. ClusterList);
  593. ListEntry = ListEntry->Flink;
  594. //
  595. // Close the old RPC handle.
  596. //
  597. TIME_PRINT(("ReconnectNotifySessions - destroying context 0x%08lx\n",Session->hNotify));
  598. //close the old port, since the reconnect may connect to the same
  599. //node again
  600. Status = ApiCloseNotify(&Session->hNotify);
  601. if (Status != ERROR_SUCCESS)
  602. {
  603. TIME_PRINT(("ReconnectNotifySessions - ApiCloseNotify failed %d\n",
  604. Status));
  605. Status = MyRpcSmDestroyClientContext(Cluster, &Session->hNotify);
  606. if (Status != ERROR_SUCCESS) {
  607. TIME_PRINT(("ReconnectNotifySessions - RpcSmDestroyClientContext failed Error %d\n",Status));
  608. }
  609. }
  610. //
  611. // Open a new RPC handle.
  612. //
  613. TIME_PRINT(("ReconnectNotifySessions - Calling ApiCreateNotify\n"));
  614. Session->hNotify = ApiCreateNotify(Cluster->RpcBinding,
  615. &Status);
  616. if (Session->hNotify == NULL) {
  617. return(Status);
  618. }
  619. TIME_PRINT(("ReconnectNotifySessions - Session=0x%08lx Notify=0x%08x\n",
  620. Session, Session->hNotify));
  621. //
  622. // Now repost all the notifications
  623. //
  624. EventEntry = Session->EventList.Flink;
  625. while (EventEntry != &Session->EventList) {
  626. NotifyEvent = CONTAINING_RECORD(EventEntry,
  627. CNOTIFY_EVENT,
  628. ListEntry);
  629. EventEntry = EventEntry->Flink;
  630. TIME_PRINT(("ReconnectNotifySession: registering event type %lx\n",NotifyEvent->dwFilter));
  631. Status = ReRegisterNotifyEvent(Session,
  632. NotifyEvent,
  633. NULL);
  634. if (Status != ERROR_SUCCESS) {
  635. return(Status);
  636. }
  637. }
  638. // Run down the notify list for this cluster and post a packet for
  639. // each registered notify event for CLUSTER_CHANGE_RECONNECT_EVENT
  640. //
  641. Name = Cluster->ClusterName;
  642. NotifyListEntry = Cluster->NotifyList.Flink;
  643. while (NotifyListEntry != &Cluster->NotifyList) {
  644. NotifyEvent = CONTAINING_RECORD(NotifyListEntry,
  645. CNOTIFY_EVENT,
  646. ObjectList);
  647. if (NotifyEvent->dwFilter & CLUSTER_CHANGE_CLUSTER_RECONNECT) {
  648. if (Packet == NULL) {
  649. Packet = LocalAlloc(LMEM_FIXED, sizeof(CNOTIFY_PACKET));
  650. if (Packet == NULL) {
  651. return(ERROR_NOT_ENOUGH_MEMORY);
  652. }
  653. }
  654. //SS: Dont know what the Status was meant for
  655. //It looks like it is not being used
  656. Packet->Status = ERROR_SUCCESS;
  657. Packet->Filter = CLUSTER_CHANGE_CLUSTER_RECONNECT;
  658. Packet->KeyId = NotifyEvent->EventId;
  659. Packet->Name = MIDL_user_allocate((lstrlenW(Name)+1)*sizeof(WCHAR));
  660. if (Packet->Name != NULL) {
  661. lstrcpyW(Packet->Name, Name);
  662. }
  663. TIME_PRINT(("NotifyThread - posting CLUSTER_CHANGE_CLUSTER_RECONNECT to notify queue\n"));
  664. ClRtlInsertTailQueue(&Session->ParentNotify->Queue,
  665. &Packet->ListEntry);
  666. Packet = NULL;
  667. }
  668. NotifyListEntry = NotifyListEntry->Flink;
  669. }
  670. }
  671. return(ERROR_SUCCESS);
  672. }
  673. DWORD
  674. GetReconnectCandidates(
  675. IN PCLUSTER Cluster
  676. )
  677. /*++
  678. Routine Description:
  679. Computes the list of reconnect candidates that will be used
  680. in case of a connection failure.
  681. Arguments:
  682. Cluster - supplies the cluster
  683. Return Value:
  684. ERROR_SUCCESS if successful
  685. Win32 error code otherwise
  686. --*/
  687. {
  688. DWORD Status;
  689. PENUM_LIST EnumList = NULL;
  690. DWORD i;
  691. //
  692. // Real bad algorithm here, just get a list of all the nodes
  693. //
  694. Status = ApiCreateEnum(Cluster->RpcBinding,
  695. CLUSTER_ENUM_NODE,
  696. &EnumList);
  697. if (Status != ERROR_SUCCESS) {
  698. return(Status);
  699. }
  700. Cluster->ReconnectCount = EnumList->EntryCount + 1;
  701. Cluster->Reconnect = LocalAlloc(LMEM_FIXED, sizeof(RECONNECT_CANDIDATE)*Cluster->ReconnectCount);
  702. if (Cluster->Reconnect == NULL) {
  703. MIDL_user_free(EnumList);
  704. return(ERROR_NOT_ENOUGH_MEMORY);
  705. }
  706. for (i=0; i<Cluster->ReconnectCount-1; i++) {
  707. Cluster->Reconnect[i].IsUp = TRUE;
  708. Cluster->Reconnect[i].Name = EnumList->Entry[i].Name;
  709. if (lstrcmpiW(Cluster->Reconnect[i].Name, Cluster->NodeName) == 0) {
  710. Cluster->Reconnect[i].IsCurrent = TRUE;
  711. } else {
  712. Cluster->Reconnect[i].IsCurrent = FALSE;
  713. }
  714. }
  715. MIDL_user_free(EnumList);
  716. //
  717. // Now add the cluster name.
  718. //
  719. Cluster->Reconnect[i].IsUp = TRUE;
  720. Cluster->Reconnect[i].Name = MIDL_user_allocate((lstrlenW(Cluster->ClusterName)+1)*sizeof(WCHAR));
  721. if (Cluster->Reconnect[i].Name == NULL) {
  722. //
  723. // Just forget about the cluster name.
  724. //
  725. --Cluster->ReconnectCount;
  726. } else {
  727. lstrcpyW(Cluster->Reconnect[i].Name, Cluster->ClusterName);
  728. Cluster->Reconnect[i].IsCurrent = FALSE;
  729. }
  730. return(ERROR_SUCCESS);
  731. }
  732. VOID
  733. FreeReconnectCandidates(
  734. IN PCLUSTER Cluster
  735. )
  736. /*++
  737. Routine Description:
  738. Frees and cleans up any reconnect candidates
  739. Arguments:
  740. Cluster - Supplies the cluster
  741. Return Value:
  742. None.
  743. --*/
  744. {
  745. DWORD i;
  746. for (i=0; i<Cluster->ReconnectCount; i++) {
  747. MIDL_user_free(Cluster->Reconnect[i].Name);
  748. }
  749. LocalFree(Cluster->Reconnect);
  750. Cluster->Reconnect = NULL;
  751. Cluster->ReconnectCount = 0;
  752. }
  753. DWORD
  754. ReconnectCandidate(
  755. IN PCLUSTER Cluster,
  756. IN DWORD dwIndex,
  757. OUT PBOOL pIsContinue
  758. )
  759. /*++
  760. Routine Description:
  761. Try to reconnect to the cluster using a reconnection candidate.
  762. Called with lock held.
  763. Arguments:
  764. Cluster - Supplies the cluster
  765. dwIndex - Supplies the index of the reconnection candidate in the
  766. Cluster->Reconnect[] array
  767. pIsContinue - Helps decide whether to continue trying reconnection
  768. with other candidates in case this try with the
  769. current candidate fails
  770. Return Value:
  771. ERROR_SUCCESS if successful
  772. Win32 error code otherwise
  773. --*/
  774. {
  775. LPWSTR NewClusterName;
  776. LPWSTR NewNodeName;
  777. WCHAR *Binding = NULL;
  778. RPC_BINDING_HANDLE NewBinding;
  779. RPC_BINDING_HANDLE OldBinding;
  780. DWORD Status, j;
  781. //
  782. // Go ahead and try the reconnect.
  783. //
  784. TIME_PRINT(("ReconnectCandidate - Binding to %ws\n",Cluster->Reconnect[dwIndex].Name));
  785. Status = RpcStringBindingComposeW(L"b97db8b2-4c63-11cf-bff6-08002be23f2f",
  786. L"ncadg_ip_udp",
  787. Cluster->Reconnect[dwIndex].Name,
  788. NULL,
  789. NULL,
  790. &Binding);
  791. if (Status != RPC_S_OK) {
  792. TIME_PRINT(("ReconnectCandidate - RpcStringBindingComposeW failed %d\n", Status));
  793. *pIsContinue = FALSE;
  794. return(Status);
  795. }
  796. Status = RpcBindingFromStringBindingW(Binding, &NewBinding);
  797. RpcStringFreeW(&Binding);
  798. if (Status != RPC_S_OK) {
  799. TIME_PRINT(("ReconnectCandidate - RpcBindingFromStringBindingW failed %d\n", Status));
  800. *pIsContinue = FALSE;
  801. return(Status);
  802. }
  803. //
  804. // Resolve the binding handle endpoint
  805. //
  806. TIME_PRINT(("ReconnectCluster - resolving binding endpoint\n"));
  807. Status = RpcEpResolveBinding(NewBinding,
  808. clusapi_v2_0_c_ifspec);
  809. if (Status != RPC_S_OK) {
  810. TIME_PRINT(("ReconnectCandidate - RpcEpResolveBinding failed %d\n", Status));
  811. *pIsContinue = TRUE;
  812. return(Status);
  813. }
  814. TIME_PRINT(("ReconnectCandidate - binding endpoint resolved\n"));
  815. //
  816. // Set authentication information
  817. //
  818. Status = RpcBindingSetAuthInfoW(NewBinding,
  819. NULL,
  820. Cluster->AuthnLevel,
  821. RPC_C_AUTHN_WINNT,
  822. NULL,
  823. RPC_C_AUTHZ_NAME);
  824. if (Status != RPC_S_OK) {
  825. TIME_PRINT(("ReconnectCandidate - RpcBindingSetAuthInfoW failed %d\n", Status));
  826. *pIsContinue = FALSE;
  827. return(Status);
  828. }
  829. OldBinding = Cluster->RpcBinding;
  830. Cluster->RpcBinding = NewBinding;
  831. MyRpcBindingFree(Cluster, &OldBinding);
  832. //
  833. // Now that we have a binding, get the cluster name and node name.
  834. //
  835. NewClusterName = NewNodeName = NULL;
  836. Status = ApiGetClusterName(Cluster->RpcBinding,
  837. &NewClusterName,
  838. &NewNodeName);
  839. if (Status != RPC_S_OK) {
  840. //
  841. // Try the next candidate in our list.
  842. //
  843. TIME_PRINT(("ReconnectCandidate - ApiGetClusterName failed %d\n",Status));
  844. *pIsContinue = TRUE;
  845. return(Status);
  846. }
  847. TIME_PRINT(("ReconnectCandidate - ApiGetClusterName succeeded, reopening handles\n",Status));
  848. MIDL_user_free(Cluster->ClusterName);
  849. MIDL_user_free(Cluster->NodeName);
  850. Cluster->ClusterName = NewClusterName;
  851. Cluster->NodeName = NewNodeName;
  852. if (Cluster->hCluster != NULL) {
  853. MyRpcSmDestroyClientContext(Cluster, &Cluster->hCluster);
  854. }
  855. Cluster->hCluster = ApiOpenCluster(Cluster->RpcBinding, &Status);
  856. if (Cluster->hCluster == NULL) {
  857. TIME_PRINT(("ReconnectCandidate - ApiOpenCluster failed %d\n", Status));
  858. *pIsContinue = TRUE;
  859. return(Status);
  860. }
  861. //
  862. // We got this far, so assume we have a valid connection to a new server.
  863. // Reopen the cluster objects.
  864. //
  865. Status = ReconnectKeys(Cluster);
  866. if (Status != ERROR_SUCCESS) {
  867. TIME_PRINT(("ReconnectCandidate - ReconnectKeys failed %d\n", Status));
  868. *pIsContinue = TRUE;
  869. return(Status);
  870. }
  871. Status = ReconnectResources(Cluster);
  872. if (Status != ERROR_SUCCESS) {
  873. TIME_PRINT(("ReconnectCandidate - ReconnectResources failed %d\n", Status));
  874. *pIsContinue = TRUE;
  875. return(Status);
  876. }
  877. Status = ReconnectGroups(Cluster);
  878. if (Status != ERROR_SUCCESS) {
  879. TIME_PRINT(("ReconnectCandidate - ReconnectGroups failed %d\n", Status));
  880. *pIsContinue = TRUE;
  881. return(Status);
  882. }
  883. Status = ReconnectNodes(Cluster);
  884. if (Status != ERROR_SUCCESS) {
  885. TIME_PRINT(("ReconnectCandidate - ReconnectNodes failed %d\n", Status));
  886. *pIsContinue = TRUE;
  887. return(Status);
  888. }
  889. Status = ReconnectNetworks(Cluster);
  890. if (Status != ERROR_SUCCESS) {
  891. TIME_PRINT(("ReconnectCandidate - ReconnectNetworks failed %d\n", Status));
  892. *pIsContinue = TRUE;
  893. return(Status);
  894. }
  895. Status = ReconnectNetInterfaces(Cluster);
  896. if (Status != ERROR_SUCCESS) {
  897. TIME_PRINT(("ReconnectCandidate - ReconnectNetInterfaces failed %d\n", Status));
  898. *pIsContinue = TRUE;
  899. return(Status);
  900. }
  901. //
  902. // Finally, reissue clusterwide notification events.
  903. //
  904. Status = ReconnectNotifySessions(Cluster);
  905. if (Status != ERROR_SUCCESS) {
  906. TIME_PRINT(("ReconnectCandidate - ReconnectNotifySessions failed %d\n", Status));
  907. *pIsContinue = TRUE;
  908. return(Status);
  909. }
  910. //
  911. // We have successfully reconnected!
  912. //
  913. ++Cluster->Generation;
  914. //
  915. // Mark all the other reconnect candidates as not the current.
  916. // Mark the successful reconnect candidate as current.
  917. //
  918. for (j=0; j<Cluster->ReconnectCount; j++) {
  919. if (j != dwIndex) {
  920. Cluster->Reconnect[j].IsCurrent = FALSE;
  921. } else {
  922. Cluster->Reconnect[dwIndex].IsCurrent = TRUE;
  923. }
  924. }
  925. TIME_PRINT(("ReconnectCandidate - successful!\n", Status));
  926. return (ERROR_SUCCESS);
  927. }