Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4574 lines
110 KiB

  1. /*++
  2. Copyright (c) 1996-1999 Microsoft Corporation
  3. Module Name:
  4. node.c
  5. Abstract:
  6. Private Node Manager routines.
  7. Author:
  8. Mike Massa (mikemas) 12-Mar-1996
  9. Revision History:
  10. --*/
  11. #define UNICODE 1
  12. #include "nmp.h"
  13. /////////////////////////////////////////////////////////////////////////////
  14. //
  15. // Data
  16. //
  17. /////////////////////////////////////////////////////////////////////////////
  18. ULONG NmMaxNodes = ClusterInvalidNodeId;
  19. CL_NODE_ID NmMaxNodeId = ClusterInvalidNodeId;
  20. CL_NODE_ID NmLocalNodeId = ClusterInvalidNodeId;
  21. PNM_NODE NmLocalNode = NULL;
  22. WCHAR NmLocalNodeIdString[CS_MAX_NODE_ID_LENGTH+1];
  23. WCHAR NmLocalNodeName[CS_MAX_NODE_NAME_LENGTH+1];
  24. LIST_ENTRY NmpNodeList = {NULL, NULL};
  25. PNM_NODE * NmpIdArray = NULL;
  26. DWORD NmpNodeCount = 0;
  27. BOOL NmpLastNodeEvicted = FALSE;
  28. BOOL NmLocalNodeVersionChanged = FALSE;
  29. LIST_ENTRY * NmpIntraClusterRpcArr=NULL;
  30. CRITICAL_SECTION NmpRPCLock;
  31. #if DBG
  32. DWORD NmpRpcTimer=0;
  33. #endif // DBG
  34. ///////////////////////////////////////////////////////////////////////////
  35. //
  36. // Initialization/Cleanup Routines
  37. //
  38. ///////////////////////////////////////////////////////////////////////////
  39. VOID
  40. NmpCleanupNodes(
  41. VOID
  42. )
  43. {
  44. PNM_NODE node;
  45. PLIST_ENTRY entry, nextEntry;
  46. DWORD status;
  47. ClRtlLogPrint(LOG_NOISE,"[NM] Node cleanup starting...\n");
  48. NmpAcquireLock();
  49. while (!IsListEmpty(&NmpNodeList)) {
  50. entry = NmpNodeList.Flink;
  51. node = CONTAINING_RECORD(entry, NM_NODE, Linkage);
  52. if (node == NmLocalNode) {
  53. entry = node->Linkage.Flink;
  54. if (entry == &NmpNodeList) {
  55. break;
  56. }
  57. node = CONTAINING_RECORD(entry, NM_NODE, Linkage);
  58. }
  59. CL_ASSERT(NM_OM_INSERTED(node));
  60. CL_ASSERT(!NM_DELETE_PENDING(node));
  61. NmpDeleteNodeObject(node, FALSE);
  62. }
  63. NmpReleaseLock();
  64. ClRtlLogPrint(LOG_NOISE,"[NM] Node cleanup complete\n");
  65. return;
  66. } // NmpCleanupNodes
  67. /////////////////////////////////////////////////////////////////////////////
  68. //
  69. // Remote procedures called by joining nodes or on behalf of joining nodes.
  70. //
  71. /////////////////////////////////////////////////////////////////////////////
  72. error_status_t
  73. s_NmRpcEnumNodeDefinitions(
  74. IN handle_t IDL_handle,
  75. IN DWORD JoinSequence, OPTIONAL
  76. IN LPWSTR JoinerNodeId, OPTIONAL
  77. OUT PNM_NODE_ENUM * NodeEnum1
  78. )
  79. {
  80. DWORD status = ERROR_CLUSTER_INCOMPATIBLE_VERSIONS;
  81. ClRtlLogPrint(LOG_UNUSUAL,
  82. "[NMJOIN] Refusing node info to joining node nodeid=%1!ws!. Aborting join, obsolete interface.\n",
  83. JoinerNodeId
  84. );
  85. return(status);
  86. } // s_NmRpcEnumNodeDefinitions
  87. error_status_t
  88. s_NmRpcEnumNodeDefinitions2(
  89. IN handle_t IDL_handle,
  90. IN DWORD JoinSequence, OPTIONAL
  91. IN LPWSTR JoinerNodeId, OPTIONAL
  92. OUT PNM_NODE_ENUM2 * NodeEnum
  93. )
  94. {
  95. DWORD status = ERROR_SUCCESS;
  96. PNM_NODE joinerNode = NULL;
  97. NmpAcquireLock();
  98. if (NmpLockedEnterApi(NmStateOnline)) {
  99. ClRtlLogPrint(LOG_NOISE,
  100. "[NMJOIN] Supplying node information to joining node.\n"
  101. );
  102. if (lstrcmpW(JoinerNodeId, NmpInvalidJoinerIdString) != 0) {
  103. joinerNode = OmReferenceObjectById(
  104. ObjectTypeNode,
  105. JoinerNodeId
  106. );
  107. if (joinerNode != NULL) {
  108. if ( (JoinSequence == NmpJoinSequence) &&
  109. (NmpJoinerNodeId == joinerNode->NodeId) &&
  110. (NmpSponsorNodeId == NmLocalNodeId) &&
  111. !NmpJoinAbortPending
  112. )
  113. {
  114. CL_ASSERT(joinerNode->State == ClusterNodeJoining);
  115. CL_ASSERT(NmpJoinerUp == FALSE);
  116. CL_ASSERT(NmpJoinTimer != 0);
  117. //
  118. // Suspend the join timer while we are working on
  119. // behalf of the joiner. This precludes an abort
  120. // from occuring as well.
  121. //
  122. NmpJoinTimer = 0;
  123. }
  124. else {
  125. status = ERROR_CLUSTER_JOIN_ABORTED;
  126. ClRtlLogPrint(LOG_UNUSUAL,
  127. "[NMJOIN] EnumNodeDefinitions call for joining node %1!ws! failed because the join was aborted.\n",
  128. JoinerNodeId
  129. );
  130. }
  131. }
  132. else {
  133. status = ERROR_CLUSTER_NODE_NOT_MEMBER;
  134. ClRtlLogPrint(LOG_UNUSUAL,
  135. "[NMJOIN] EnumNodeDefinitions call for joining node %1!ws! failed because the node is not a member of the cluster.\n",
  136. JoinerNodeId
  137. );
  138. }
  139. }
  140. if (status == ERROR_SUCCESS) {
  141. status = NmpEnumNodeObjects(NodeEnum);
  142. if (joinerNode != NULL) {
  143. if (status == ERROR_SUCCESS) {
  144. //
  145. // Restart the join timer.
  146. //
  147. NmpJoinTimer = NM_JOIN_TIMEOUT;
  148. }
  149. else {
  150. ClRtlLogPrint(LOG_CRITICAL,
  151. "[NMJOIN] EnumNodeDefinitions failed, status %1!u!.\n",
  152. status
  153. );
  154. //
  155. // Abort the join
  156. //
  157. NmpJoinAbort(status, joinerNode);
  158. }
  159. }
  160. }
  161. if (joinerNode != NULL) {
  162. OmDereferenceObject(joinerNode);
  163. }
  164. NmpLockedLeaveApi();
  165. }
  166. else {
  167. status = ERROR_NODE_NOT_AVAILABLE;
  168. ClRtlLogPrint(LOG_NOISE,
  169. "[NM] Not in valid state to process EnumNodeDefinitions request.\n"
  170. );
  171. }
  172. NmpReleaseLock();
  173. return(status);
  174. } // s_NmRpcEnumNodeDefinitions2
  175. error_status_t
  176. s_NmRpcAddNode(
  177. IN handle_t IDL_handle,
  178. IN LPCWSTR NewNodeName,
  179. IN DWORD NewNodeHighestVersion,
  180. IN DWORD NewNodeLowestVersion,
  181. IN DWORD NewNodeProductSuite
  182. )
  183. /*++
  184. Routine Description:
  185. Adds a new node to the cluster by selecting an ID and
  186. issuing a global update.
  187. Arguments:
  188. IDL_handle - RPC client interface handle.
  189. NewNodeName - A pointer to a string containing the name of the
  190. new node.
  191. NewNodeHighestVersion - The highest cluster version number that the
  192. new node can support.
  193. NewNodeLowestVersion - The lowest cluster version number that the
  194. new node can support.
  195. NewNodeProductSuite - The product suite identifier for the new node.
  196. Return Value:
  197. A Win32 status code.
  198. Notes:
  199. Called with NmpLock held.
  200. --*/
  201. {
  202. DWORD status;
  203. DWORD registryNodeLimit;
  204. ClRtlLogPrint(LOG_UNUSUAL,
  205. "[NMJOIN] Received forwarded request to add node '%1!ws!' to the "
  206. "cluster.\n",
  207. NewNodeName
  208. );
  209. //
  210. // Read the registry override before acquiring the NM lock.
  211. //
  212. status = DmQueryDword(
  213. DmClusterParametersKey,
  214. CLUSREG_NAME_MAX_NODES,
  215. &registryNodeLimit,
  216. NULL
  217. );
  218. if (status != ERROR_SUCCESS) {
  219. registryNodeLimit = 0;
  220. }
  221. NmpAcquireLock();
  222. if (!NmpLockedEnterApi(NmStateOnline)) {
  223. ClRtlLogPrint(LOG_UNUSUAL,
  224. "[NMJOIN] This node is not in a valid state to process a "
  225. "request to add node '%1!ws!' to the cluster.\n",
  226. NewNodeName
  227. );
  228. NmpReleaseLock();
  229. return(ERROR_NODE_NOT_AVAILABLE);
  230. }
  231. if (NmpLeaderNodeId == NmLocalNodeId) {\
  232. //
  233. // Call the internal handler.
  234. //
  235. status = NmpAddNode(
  236. NewNodeName,
  237. NewNodeHighestVersion,
  238. NewNodeLowestVersion,
  239. NewNodeProductSuite,
  240. registryNodeLimit
  241. );
  242. }
  243. else {
  244. //
  245. // This node is not the leader.
  246. // Fail the request.
  247. //
  248. status = ERROR_NODE_NOT_AVAILABLE;
  249. ClRtlLogPrint(LOG_UNUSUAL,
  250. "[NMJOIN] Cannot process request to add node '%1!ws!' to the "
  251. "cluster because this node is not the leader.\n",
  252. NewNodeName
  253. );
  254. }
  255. NmpLockedLeaveApi();
  256. NmpReleaseLock();
  257. return(status);
  258. } // s_NmRpcAddNode
  259. /////////////////////////////////////////////////////////////////////////////
  260. //
  261. // Routines called by other cluster service components
  262. //
  263. /////////////////////////////////////////////////////////////////////////////
  264. /////////////////////////////////////////////////////////////////////////////
  265. //
  266. // Rpc Extended error tracking.
  267. //
  268. /////////////////////////////////////////////////////////////////////////////
  269. VOID NmDumpRpcExtErrorInfo(RPC_STATUS status)
  270. {
  271. RPC_STATUS status2;
  272. RPC_ERROR_ENUM_HANDLE enumHandle;
  273. status2 = RpcErrorStartEnumeration(&enumHandle);
  274. if(status2 == RPC_S_ENTRY_NOT_FOUND) {
  275. ClRtlLogPrint(LOG_UNUSUAL,
  276. "[NM] RpcExtErrorInfo: Error info not found.\n"
  277. );
  278. }
  279. else if(status2 != RPC_S_OK) {
  280. ClRtlLogPrint(LOG_UNUSUAL,
  281. "[NM] RpcExtErrorInfo: Couldn't get error info, status %1!u!\n",
  282. status2
  283. );
  284. }
  285. else {
  286. RPC_EXTENDED_ERROR_INFO errorInfo;
  287. int records;
  288. BOOL result;
  289. BOOL copyStrings=TRUE;
  290. BOOL fUseFileTime=TRUE;
  291. SYSTEMTIME *systemTimeToUse;
  292. SYSTEMTIME systemTimeBuffer;
  293. while(status2 == RPC_S_OK) {
  294. errorInfo.Version = RPC_EEINFO_VERSION;
  295. errorInfo.Flags = 0;
  296. errorInfo.NumberOfParameters = 4;
  297. if(fUseFileTime) {
  298. errorInfo.Flags |= EEInfoUseFileTime;
  299. }
  300. status2 = RpcErrorGetNextRecord(&enumHandle, copyStrings, &errorInfo);
  301. if(status2 == RPC_S_ENTRY_NOT_FOUND) {
  302. break;
  303. }
  304. else if(status2 != RPC_S_OK) {
  305. ClRtlLogPrint(LOG_UNUSUAL,
  306. "[NM] RpcExtErrorInfo: Couldn't complete enumeration, status %1!u!\n",
  307. status2
  308. );
  309. break;
  310. }
  311. else {
  312. int i;
  313. if(errorInfo.ComputerName) {
  314. ClRtlLogPrint(LOG_NOISE,
  315. "[NM] RpcExtErrorInfo: ComputerName= %1!ws!\n",
  316. errorInfo.ComputerName
  317. );
  318. }
  319. if(copyStrings) {
  320. result = HeapFree(GetProcessHeap(), 0, errorInfo.ComputerName);
  321. CL_ASSERT(result);
  322. }
  323. ClRtlLogPrint(LOG_NOISE,
  324. "[NM] RpcExtErrorInfo: ProcessId= %1!u!\n",
  325. errorInfo.ProcessID
  326. );
  327. if(fUseFileTime) {
  328. result = FileTimeToSystemTime(&errorInfo.u.FileTime, &systemTimeBuffer);
  329. CL_ASSERT(result);
  330. systemTimeToUse = &systemTimeBuffer;
  331. }
  332. else {
  333. systemTimeToUse = &errorInfo.u.SystemTime;
  334. }
  335. ClRtlLogPrint(LOG_NOISE,
  336. "[NM] RpcExtErrorInfo: SystemTime= %1!u!/%2!u!/%3!u! %4!u!:%5!u!:%6!u!:%7!u!\n",
  337. systemTimeToUse->wMonth,
  338. systemTimeToUse->wDay,
  339. systemTimeToUse->wYear,
  340. systemTimeToUse->wHour,
  341. systemTimeToUse->wMinute,
  342. systemTimeToUse->wSecond,
  343. systemTimeToUse->wMilliseconds
  344. );
  345. ClRtlLogPrint(LOG_NOISE,
  346. "[NM] RpcExtErrorInfo: GeneratingComponent= %1!u!\n",
  347. errorInfo.GeneratingComponent
  348. );
  349. ClRtlLogPrint(LOG_NOISE,
  350. "[NM] RpcExtErrorInfo: Status= 0x%1!x!\n",
  351. errorInfo.Status
  352. );
  353. ClRtlLogPrint(LOG_NOISE,
  354. "[NM] RpcExtErrorInfo: Detection Location= %1!u!\n",
  355. (DWORD)errorInfo.DetectionLocation
  356. );
  357. ClRtlLogPrint(LOG_NOISE,
  358. "[NM] RpcExtErrorInfo: Flags= 0x%1!x!\n",
  359. errorInfo.Flags
  360. );
  361. ClRtlLogPrint(LOG_NOISE,
  362. "[NM] RpcExtErrorInfo: Number of Parameters= %1!u!\n",
  363. errorInfo.NumberOfParameters
  364. );
  365. for(i=0;i<errorInfo.NumberOfParameters;i++) {
  366. switch(errorInfo.Parameters[i].ParameterType) {
  367. case eeptAnsiString:
  368. ClRtlLogPrint(LOG_NOISE,
  369. "[NM] RpcExtErrorInfo: Ansi String= %1!s!\n",
  370. errorInfo.Parameters[i].u.AnsiString
  371. );
  372. if(copyStrings) {
  373. result = HeapFree(GetProcessHeap(), 0, errorInfo.Parameters[i].u.AnsiString);
  374. CL_ASSERT(result);
  375. }
  376. break;
  377. case eeptUnicodeString:
  378. ClRtlLogPrint(LOG_NOISE,
  379. "[NM] RpcExtErrorInfo: Unicode String= %1!S!\n",
  380. errorInfo.Parameters[i].u.UnicodeString
  381. );
  382. if(copyStrings) {
  383. result = HeapFree(GetProcessHeap(), 0, errorInfo.Parameters[i].u.UnicodeString);
  384. CL_ASSERT(result);
  385. }
  386. break;
  387. case eeptLongVal:
  388. ClRtlLogPrint(LOG_NOISE,
  389. "[NM] RpcExtErrorInfo: Long Val= %1!u!\n",
  390. errorInfo.Parameters[i].u.LVal
  391. );
  392. break;
  393. case eeptShortVal:
  394. ClRtlLogPrint(LOG_NOISE,
  395. "[NM] RpcExtErrorInfo: Short Val= %1!u!\n",
  396. (DWORD)errorInfo.Parameters[i].u.SVal
  397. );
  398. break;
  399. case eeptPointerVal:
  400. ClRtlLogPrint(LOG_NOISE,
  401. "[NM] RpcExtErrorInfo: Pointer Val= 0x%1!u!\n",
  402. errorInfo.Parameters[i].u.PVal
  403. );
  404. break;
  405. case eeptNone:
  406. ClRtlLogPrint(LOG_NOISE,
  407. "[NM] RpcExtErrorInfo: Truncated\n"
  408. );
  409. break;
  410. default:
  411. ClRtlLogPrint(LOG_NOISE,
  412. "[NM] RpcExtErrorInfo: Invalid Type %1!u!\n",
  413. errorInfo.Parameters[i].ParameterType
  414. );
  415. }
  416. }
  417. }
  418. }
  419. RpcErrorEndEnumeration(&enumHandle);
  420. }
  421. } //NmDumpRpcExtErrorInfo
  422. ///////////////////////////////////////////////////////////////////////////
  423. //
  424. // RPC Monitoring Routines
  425. //
  426. ///////////////////////////////////////////////////////////////////////////
  427. VOID
  428. NmStartRpc(
  429. DWORD NodeId
  430. )
  431. /*++
  432. Routine Description:
  433. Registers the fact that an RPC is about to be made to the specified
  434. node by the current thread. This allows the call to be cancelled if
  435. the target node dies.
  436. Arguments:
  437. NodeId - The ID of the node about to be called.
  438. Return Value:
  439. None
  440. Notes:
  441. This routine must not be called by a thread that makes concurrent
  442. asynch RPC calls.
  443. --*/
  444. {
  445. HANDLE thHandle;
  446. PNM_INTRACLUSTER_RPC_THREAD entry;
  447. CL_ASSERT((NodeId >= ClusterMinNodeId) && (NodeId <= NmMaxNodeId));
  448. CL_ASSERT(NmpIntraClusterRpcArr != NULL);
  449. thHandle = OpenThread(
  450. THREAD_ALL_ACCESS,
  451. FALSE,
  452. GetCurrentThreadId()
  453. );
  454. if(thHandle == NULL) {
  455. ClRtlLogPrint(LOG_UNUSUAL,
  456. "[NM] NmStartRpc: Failed to open handle to current thread.\n"
  457. );
  458. return;
  459. }
  460. entry = LocalAlloc(LMEM_FIXED, sizeof(NM_INTRACLUSTER_RPC_THREAD));
  461. if(entry == NULL) {
  462. ClRtlLogPrint(LOG_UNUSUAL,
  463. "[NM] NmStartRpc: Failed to allocate memory.\n"
  464. );
  465. CloseHandle(thHandle);
  466. return;
  467. }
  468. entry->ThreadId = GetCurrentThreadId();
  469. entry->Thread = thHandle;
  470. entry->Cancelled = FALSE;
  471. NmpAcquireRPCLock();
  472. #if DBG
  473. ClRtlLogPrint(LOG_NOISE,
  474. "[NM] Starting RPC to node %1!u!\n",
  475. NodeId
  476. );
  477. #endif
  478. InsertHeadList(&NmpIntraClusterRpcArr[NodeId], &entry->Linkage);
  479. NmpReleaseRPCLock();
  480. return;
  481. } // NmStartRpc
  482. VOID
  483. NmEndRpc(
  484. DWORD NodeId
  485. )
  486. /*++
  487. Routine Description:
  488. Cancels registration of an RPC to the specified node by the current
  489. thread.
  490. Arguments:
  491. NodeId - The ID of the node that was called.
  492. Return Value:
  493. None
  494. Notes:
  495. This routine must be invoked even if the RPC was cancelled.
  496. --*/
  497. {
  498. DWORD threadId;
  499. LIST_ENTRY *pEntry;
  500. PNM_INTRACLUSTER_RPC_THREAD pRpcTh;
  501. CL_ASSERT((NodeId >= ClusterMinNodeId) && (NodeId <= NmMaxNodeId));
  502. CL_ASSERT(NmpIntraClusterRpcArr != NULL);
  503. threadId = GetCurrentThreadId();
  504. NmpAcquireRPCLock();
  505. pEntry = NmpIntraClusterRpcArr[NodeId].Flink;
  506. while(pEntry != &NmpIntraClusterRpcArr[NodeId]) {
  507. pRpcTh = CONTAINING_RECORD(pEntry, NM_INTRACLUSTER_RPC_THREAD, Linkage);
  508. if(pRpcTh->ThreadId == threadId) {
  509. #if DBG
  510. ClRtlLogPrint(LOG_NOISE,
  511. "[NM] Finished RPC to node %1!u!\n",
  512. NodeId
  513. );
  514. #endif
  515. if (pRpcTh->Cancelled) {
  516. ClRtlLogPrint(LOG_NOISE,
  517. "[NM] RPC by this thread to node %1!u! is cancelled\n",
  518. NodeId
  519. );
  520. }
  521. RemoveEntryList(pEntry);
  522. CloseHandle(pRpcTh->Thread);
  523. LocalFree(pRpcTh);
  524. NmpReleaseRPCLock();
  525. return;
  526. }
  527. pEntry = pEntry->Flink;
  528. }
  529. ClRtlLogPrint(LOG_UNUSUAL,
  530. "[NM] No record of RPC by this thread to node %1!u!.\n",
  531. NodeId
  532. );
  533. #if DBG
  534. CL_ASSERT(pEntry != &NmpIntraClusterRpcArr[NodeId]);
  535. #endif
  536. NmpReleaseRPCLock();
  537. return;
  538. } // NmEndRpc
  539. DWORD
  540. NmPauseNode(
  541. IN PNM_NODE Node
  542. )
  543. /*++
  544. Routine Description:
  545. Arguments:
  546. Return Value:
  547. --*/
  548. {
  549. LPCWSTR nodeId = OmObjectId(Node);
  550. DWORD status;
  551. ClRtlLogPrint(LOG_NOISE,
  552. "[NM] Received request to pause node %1!ws!.\n",
  553. nodeId
  554. );
  555. if (NmpEnterApi(NmStateOnline)) {
  556. status = GumSendUpdateEx(
  557. GumUpdateMembership,
  558. NmUpdatePauseNode,
  559. 1,
  560. (lstrlenW(nodeId)+1)*sizeof(WCHAR),
  561. nodeId
  562. );
  563. if (status != ERROR_SUCCESS) {
  564. ClRtlLogPrint(LOG_CRITICAL,
  565. "[NM] Global update to pause node %1!ws! failed, status %2!u!\n",
  566. nodeId,
  567. status
  568. );
  569. }
  570. NmpLeaveApi();
  571. }
  572. else {
  573. status = ERROR_NODE_NOT_AVAILABLE;
  574. ClRtlLogPrint(LOG_NOISE,
  575. "[NM] Not in valid state to process PauseNode request.\n"
  576. );
  577. }
  578. return(status);
  579. } // NmPauseNode
  580. DWORD
  581. NmResumeNode(
  582. IN PNM_NODE Node
  583. )
  584. /*++
  585. Routine Description:
  586. Arguments:
  587. Return Value:
  588. --*/
  589. {
  590. LPCWSTR nodeId = OmObjectId(Node);
  591. DWORD status;
  592. ClRtlLogPrint(LOG_NOISE,
  593. "[NM] Received request to resume node %1!ws!.\n",
  594. nodeId
  595. );
  596. if (NmpEnterApi(NmStateOnline)) {
  597. status = GumSendUpdateEx(
  598. GumUpdateMembership,
  599. NmUpdateResumeNode,
  600. 1,
  601. (lstrlenW(nodeId)+1)*sizeof(WCHAR),
  602. nodeId
  603. );
  604. if (status != ERROR_SUCCESS) {
  605. ClRtlLogPrint(LOG_CRITICAL,
  606. "[NM] Global update to resume node %1!ws! failed, status %2!u!\n",
  607. nodeId,
  608. status
  609. );
  610. }
  611. NmpLeaveApi();
  612. }
  613. else {
  614. status = ERROR_NODE_NOT_AVAILABLE;
  615. ClRtlLogPrint(LOG_NOISE,
  616. "[NM] Not in valid state to process ResumeNode request.\n"
  617. );
  618. }
  619. return(status);
  620. } // NmResumeNode
  621. DWORD
  622. NmEvictNode(
  623. IN PNM_NODE Node
  624. )
  625. /*++
  626. Routine Description:
  627. Arguments:
  628. Return Value:
  629. Notes:
  630. The caller must be holding a reference on the node object.
  631. --*/
  632. {
  633. LPCWSTR nodeId = OmObjectId(Node);
  634. DWORD status = ERROR_SUCCESS;
  635. LPCWSTR pcszNodeName = NULL;
  636. ClRtlLogPrint(LOG_NOISE,
  637. "[NM] Received request to evict node %1!ws!.\n",
  638. nodeId
  639. );
  640. if (NmpEnterApi(NmStateOnline)) {
  641. // Acquire NM lock (to ensure that the number of nodes does not change)
  642. NmpAcquireLock();
  643. if (NmpNodeCount != 1 ) {
  644. NmpReleaseLock();
  645. // We are not evicting the last node.
  646. status = GumSendUpdateEx(
  647. GumUpdateMembership,
  648. NmUpdateEvictNode,
  649. 1,
  650. (lstrlenW(nodeId)+1)*sizeof(WCHAR),
  651. nodeId
  652. );
  653. if ( status != ERROR_SUCCESS ) {
  654. ClRtlLogPrint(LOG_CRITICAL,
  655. "[NM] Global update to evict node %1!ws! failed, status %2!u!\n",
  656. nodeId,
  657. status
  658. );
  659. }
  660. pcszNodeName = OmObjectName(Node);
  661. }
  662. else {
  663. // We are evicting the last node. Set a flag to indicate this fact.
  664. if ( NmpLastNodeEvicted == FALSE ) {
  665. NmpLastNodeEvicted = TRUE;
  666. }
  667. else {
  668. // We have already evicted this node. This is an error.
  669. status = ERROR_NODE_NOT_AVAILABLE;
  670. ClRtlLogPrint(LOG_NOISE,
  671. "[NM] Not in valid state to process EvictNode request.\n"
  672. );
  673. }
  674. NmpReleaseLock();
  675. }
  676. if (status == ERROR_SUCCESS) {
  677. HRESULT cleanupStatus;
  678. // The node was successfully evicted. Now initiate cleanup on that node.
  679. // However, specify that cleanup is to be started only after 60000 ms (1 minute).
  680. cleanupStatus =
  681. ClRtlCleanupNode(
  682. pcszNodeName, // Name of the node to be cleaned up
  683. 60000, // Amount of time (in milliseconds) to wait before starting cleanup
  684. 0 // timeout interval in milliseconds
  685. );
  686. if ( FAILED( cleanupStatus ) && ( cleanupStatus != RPC_S_CALLPENDING ) ){
  687. ClRtlLogPrint(LOG_UNUSUAL,
  688. "[NM] Failed to initiate cleanup of evicted node %1!ws!, status 0x%2!x!\n",
  689. nodeId,
  690. cleanupStatus
  691. );
  692. status = cleanupStatus;
  693. }
  694. else {
  695. ClRtlLogPrint(LOG_NOISE,
  696. "[NM] Cleanup of evicted node %1!ws! successfully initiated.\n",
  697. nodeId
  698. );
  699. CsLogEvent1(LOG_UNUSUAL, NM_NODE_EVICTED, OmObjectName(Node));
  700. }
  701. }
  702. NmpLeaveApi();
  703. }
  704. else {
  705. status = ERROR_NODE_NOT_AVAILABLE;
  706. ClRtlLogPrint(LOG_NOISE,
  707. "[NM] Not in valid state to process EvictNode request.\n"
  708. );
  709. }
  710. return(status);
  711. } // NmEvictNode
  712. PNM_NODE
  713. NmReferenceNodeById(
  714. IN DWORD NodeId
  715. )
  716. /*++
  717. Routine Description:
  718. Given a node id, returns a referenced pointer to the node object.
  719. The caller is responsible for calling OmDereferenceObject.
  720. Arguments:
  721. NodeId - Supplies the node id
  722. Return Value:
  723. A pointer to the node object if it exists
  724. NULL if there is no such node.
  725. --*/
  726. {
  727. PNM_NODE Node = NULL;
  728. NmpAcquireLock();
  729. if (NmpLockedEnterApi(NmStateOnlinePending)) {
  730. CL_ASSERT(NmIsValidNodeId(NodeId));
  731. CL_ASSERT(NmpIdArray != NULL);
  732. Node = NmpIdArray[NodeId];
  733. if (NmpIdArray[NodeId] != NULL) {
  734. OmReferenceObject(Node);
  735. }
  736. else {
  737. SetLastError(ERROR_CLUSTER_NODE_NOT_FOUND);
  738. }
  739. NmpLockedLeaveApi();
  740. }
  741. else {
  742. SetLastError(ERROR_NODE_NOT_AVAILABLE);
  743. ClRtlLogPrint(LOG_NOISE,
  744. "[NM] Not in valid state to process ReferenceNodeById request.\n"
  745. );
  746. }
  747. NmpReleaseLock();
  748. return(Node);
  749. } // NmReferenceNodeById
  750. PNM_NODE
  751. NmReferenceJoinerNode(
  752. IN DWORD JoinSequence,
  753. IN CL_NODE_ID JoinerNodeId
  754. )
  755. /*++
  756. Routine Description:
  757. Given a node id, returns a referenced pointer to the node object.
  758. The caller is responsible for calling OmDereferenceObject.
  759. Also validates the joiner's information
  760. Arguments:
  761. NodeId - Supplies the node id
  762. Return Value:
  763. A pointer to the node object if it exists
  764. NULL if there is no such node.
  765. Notes:
  766. If the routine is successful, the caller must dereference the
  767. node object by calling NmDereferenceJoiningNode.
  768. --*/
  769. {
  770. PNM_NODE joinerNode = NULL;
  771. DWORD status;
  772. NmpAcquireLock();
  773. if (NmpLockedEnterApi(NmStateOnline)) {
  774. joinerNode = NmpIdArray[JoinerNodeId];
  775. if (joinerNode != NULL) {
  776. if ( (JoinSequence == NmpJoinSequence) &&
  777. (NmpJoinerNodeId == JoinerNodeId)
  778. )
  779. {
  780. OmReferenceObject(joinerNode);
  781. NmpReleaseLock();
  782. //
  783. // Return holding an active thread reference.
  784. //
  785. return(joinerNode);
  786. }
  787. else {
  788. status = ERROR_CLUSTER_JOIN_ABORTED;
  789. }
  790. }
  791. else {
  792. status = ERROR_CLUSTER_NODE_NOT_MEMBER;
  793. }
  794. NmpLockedLeaveApi();
  795. }
  796. else {
  797. status = ERROR_NODE_NOT_AVAILABLE;
  798. }
  799. NmpReleaseLock();
  800. if (status != ERROR_SUCCESS) {
  801. SetLastError(status);
  802. }
  803. return(joinerNode);
  804. } // NmReferenceJoinerNode
  805. VOID
  806. NmDereferenceJoinerNode(
  807. PNM_NODE JoinerNode
  808. )
  809. {
  810. OmDereferenceObject(JoinerNode);
  811. NmpLeaveApi();
  812. return;
  813. } // NmDereferenceJoinerNode
  814. CLUSTER_NODE_STATE
  815. NmGetNodeState(
  816. IN PNM_NODE Node
  817. )
  818. /*++
  819. Routine Description:
  820. Arguments:
  821. Return Value:
  822. Notes:
  823. Because the caller must have a reference on the node object and the
  824. call is so simple, there is no reason to put the call through the
  825. EnterApi/LeaveApi dance.
  826. --*/
  827. {
  828. CLUSTER_NODE_STATE state;
  829. NmpAcquireLock();
  830. state = Node->State;
  831. NmpReleaseLock();
  832. return(state);
  833. } // NmGetNodeState
  834. CLUSTER_NODE_STATE
  835. NmGetExtendedNodeState(
  836. IN PNM_NODE Node
  837. )
  838. /*++
  839. Routine Description:
  840. Arguments:
  841. Return Value:
  842. Notes:
  843. Because the caller must have a reference on the node object and the
  844. call is so simple, there is no reason to put the call through the
  845. EnterApi/LeaveApi dance.
  846. --*/
  847. {
  848. CLUSTER_NODE_STATE state;
  849. NmpAcquireLock();
  850. state = Node->State;
  851. if(NM_NODE_UP(Node) ) {
  852. //
  853. // We need to check whether the node is really up
  854. //
  855. switch( Node->ExtendedState ) {
  856. case ClusterNodeUp:
  857. //
  858. // The node explicitly set its extended state to UP immediately after
  859. // ClusterJoin / ClusterForm was complete.
  860. // We need to return either Up or Paused, depending on the node state
  861. //
  862. state = Node->State;
  863. break;
  864. case ClusterNodeDown:
  865. //
  866. // The node explicitly set its extended state to DOWN in the beginning of
  867. // the shutdown process. We will report the node state as down.
  868. //
  869. // It is better to have ClusterNodeShuttindDown state for this situation.
  870. //
  871. // state = ClusterNodeDown;
  872. // We do not want to return NodeDown, we really want NodeShuttingDown.
  873. //
  874. // Return UP or Paused
  875. //
  876. state = Node->State;
  877. break;
  878. default:
  879. //
  880. // Node is up from NM standpoint, but other components are not up yet.
  881. //
  882. state = ClusterNodeJoining;
  883. }
  884. }
  885. NmpReleaseLock();
  886. return(state);
  887. } // NmGetExtendedNodeState
  888. DWORD NmpUpdateExtendedNodeState(
  889. IN BOOL SourceNode,
  890. IN LPWSTR NodeId,
  891. IN CLUSTER_NODE_STATE* ExtendedState
  892. )
  893. {
  894. DWORD status = ERROR_SUCCESS;
  895. NmpAcquireLock();
  896. ClRtlLogPrint(LOG_NOISE,
  897. "[NM] Received update to set extended state for node %1!ws! "
  898. "to %2!d!\n",
  899. NodeId,
  900. *ExtendedState
  901. );
  902. if (NmpLockedEnterApi(NmStateOnline)) {
  903. PNM_NODE node = OmReferenceObjectById(ObjectTypeNode, NodeId);
  904. if (node != NULL) {
  905. //
  906. // Extended State is valid only when the node is online.
  907. // Ignore the update otherwise.
  908. //
  909. if ( NM_NODE_UP(node) ) {
  910. CLUSTER_EVENT event;
  911. node->ExtendedState = *ExtendedState;
  912. if (*ExtendedState == ClusterNodeUp) {
  913. event = CLUSTER_EVENT_API_NODE_UP;
  914. } else {
  915. event = CLUSTER_EVENT_API_NODE_SHUTTINGDOWN;
  916. }
  917. ClRtlLogPrint(LOG_NOISE,
  918. "[NM] Issuing event %1!x!.\n",
  919. event
  920. );
  921. ClusterEvent(event, node);
  922. }
  923. OmDereferenceObject(node);
  924. }
  925. else {
  926. ClRtlLogPrint(LOG_NOISE,
  927. "[NM] Node %1!ws! is not a cluster member. Rejecting request "
  928. "to set the node's extended state.\n",
  929. NodeId
  930. );
  931. status = ERROR_NODE_NOT_AVAILABLE;
  932. }
  933. NmpLockedLeaveApi();
  934. } else {
  935. ClRtlLogPrint(LOG_NOISE,
  936. "[NM] Not in a valid state to process request to set extended "
  937. "state for node %1!ws!\n",
  938. NodeId
  939. );
  940. status = ERROR_CLUSTER_NODE_NOT_READY;
  941. }
  942. NmpReleaseLock();
  943. return status;
  944. } // NmpUpdateExtendedNodeState
  945. DWORD
  946. NmSetExtendedNodeState(
  947. IN CLUSTER_NODE_STATE State
  948. )
  949. {
  950. DWORD Status;
  951. Status = GumSendUpdateEx(
  952. GumUpdateMembership,
  953. NmUpdateExtendedNodeState,
  954. 2,
  955. sizeof(NmLocalNodeIdString),
  956. &NmLocalNodeIdString,
  957. sizeof(CLUSTER_NODE_STATE),
  958. &State
  959. );
  960. if (Status != ERROR_SUCCESS) {
  961. ClRtlLogPrint(LOG_UNUSUAL,
  962. "[INIT] NmUpdateExtendedNodeState node failed, status %1!d!.\n", Status);
  963. }
  964. return Status;
  965. } // NmSetExtendedNodeState
  966. DWORD
  967. NmGetNodeId(
  968. IN PNM_NODE Node
  969. )
  970. /*++
  971. Routine Description:
  972. Returns the given node's node ID.
  973. Arguments:
  974. Node - Supplies a pointer to a node object.
  975. Return Value:
  976. The node's node id.
  977. Notes:
  978. Because the caller must have a reference on the node object and the
  979. call is so simple, there is no reason to put the call through the
  980. EnterApi/LeaveApi dance.
  981. --*/
  982. {
  983. DWORD nodeId;
  984. //
  985. // Since the caller has a reference on the object, and the node ID can't
  986. // be changed, it is safe to do this without taking a lock. It is also
  987. // necessary to prevent some deadlocks.
  988. //
  989. nodeId = Node->NodeId;
  990. return(nodeId);
  991. } // NmGetNodeId
  992. DWORD
  993. NmGetCurrentNumberOfNodes()
  994. {
  995. DWORD dwCnt = 0;
  996. PLIST_ENTRY pListEntry;
  997. NmpAcquireLock();
  998. for ( pListEntry = NmpNodeList.Flink;
  999. pListEntry != &NmpNodeList;
  1000. pListEntry = pListEntry->Flink )
  1001. {
  1002. dwCnt++;
  1003. }
  1004. NmpReleaseLock();
  1005. return(dwCnt);
  1006. }
  1007. DWORD
  1008. NmGetMaxNodeId(
  1009. )
  1010. /*++
  1011. Routine Description:
  1012. Returns the max node's node ID.
  1013. Arguments:
  1014. Node - Supplies a pointer to a node object.
  1015. Return Value:
  1016. The node's node id.
  1017. Notes:
  1018. Because the caller must have a reference on the node object and the
  1019. call is so simple, there is no reason to put the call through the
  1020. EnterApi/LeaveApi dance.
  1021. --*/
  1022. {
  1023. return(NmMaxNodeId);
  1024. } // NmGetMaxNodeId
  1025. VOID
  1026. NmpAdviseNodeFailure(
  1027. IN PNM_NODE Node,
  1028. IN DWORD ErrorCode
  1029. )
  1030. /*++
  1031. Routine Description:
  1032. Reports that a communication failure to the specified node has occurred.
  1033. A poison packet will be sent to the failed node and regroup initiated.
  1034. Arguments:
  1035. Node - Supplies a pointer to the node object for the failed node.
  1036. ErrorCode - Supplies the error code that was returned from RPC
  1037. Return Value:
  1038. None
  1039. Notes:
  1040. Called with NM lock held.
  1041. --*/
  1042. {
  1043. ClRtlLogPrint(LOG_NOISE,
  1044. "[NM] Received advice that node %1!u! has failed with "
  1045. "error %2!u!.\n",
  1046. Node->NodeId,
  1047. ErrorCode
  1048. );
  1049. if (Node->State != ClusterNodeDown) {
  1050. LPCWSTR nodeName = OmObjectName(Node);
  1051. DWORD status;
  1052. ClRtlLogPrint(LOG_CRITICAL,
  1053. "[NM] Banishing node %1!u! from active cluster membership.\n",
  1054. Node->NodeId
  1055. );
  1056. OmReferenceObject(Node);
  1057. NmpReleaseLock();
  1058. status = MMEject(Node->NodeId);
  1059. if (status == MM_OK) {
  1060. CsLogEvent1(
  1061. LOG_UNUSUAL,
  1062. NM_EVENT_NODE_BANISHED,
  1063. nodeName
  1064. );
  1065. }
  1066. OmDereferenceObject(Node);
  1067. NmpAcquireLock();
  1068. }
  1069. return;
  1070. } // NmpAdviseNodeFailure
  1071. VOID
  1072. NmAdviseNodeFailure(
  1073. IN DWORD NodeId,
  1074. IN DWORD ErrorCode
  1075. )
  1076. /*++
  1077. Routine Description:
  1078. Reports that a communication failure to the specified node has occurred.
  1079. A poison packet will be sent to the failed node and regroup initiated.
  1080. Arguments:
  1081. NodeId - Supplies the node id of the failed node.
  1082. ErrorCode - Supplies the error code that was returned from RPC
  1083. Return Value:
  1084. None
  1085. --*/
  1086. {
  1087. NmpAcquireLock();
  1088. ClRtlLogPrint(LOG_NOISE,
  1089. "[NM] Received advice that node %1!u! has failed with error %2!u!.\n",
  1090. NodeId,
  1091. ErrorCode
  1092. );
  1093. if (NmpLockedEnterApi(NmStateOnline)) {
  1094. PNM_NODE node;
  1095. CL_ASSERT(NodeId != NmLocalNodeId);
  1096. CL_ASSERT(NmpIdArray != NULL);
  1097. node = NmpIdArray[NodeId];
  1098. NmpAdviseNodeFailure(node, ErrorCode);
  1099. NmpLockedLeaveApi();
  1100. }
  1101. else {
  1102. ClRtlLogPrint(LOG_NOISE,
  1103. "[NM] Not in valid state to process AdviseNodeFailure request.\n"
  1104. );
  1105. }
  1106. NmpReleaseLock();
  1107. return;
  1108. } // NmAdviseNodeFailure
  1109. DWORD
  1110. NmEnumNodeInterfaces(
  1111. IN PNM_NODE Node,
  1112. OUT LPDWORD InterfaceCount,
  1113. OUT PNM_INTERFACE * InterfaceList[]
  1114. )
  1115. /*++
  1116. Routine Description:
  1117. Returns the list of interfaces associated with a specified node.
  1118. Arguments:
  1119. Node - A pointer to the node object for which to enumerate interfaces.
  1120. InterfaceCount - On output, contains the number of items in InterfaceList.
  1121. InterfaceList - On output, points to an array of pointers to interface
  1122. objects. Each pointer in the array must be dereferenced
  1123. by the caller. The storage for the array must be
  1124. deallocated by the caller.
  1125. Return Value:
  1126. ERROR_SUCCESS if the routine is successful.
  1127. A Win32 error code othewise.
  1128. --*/
  1129. {
  1130. DWORD status = ERROR_SUCCESS;
  1131. NmpAcquireLock();
  1132. if (NmpLockedEnterApi(NmStateOnline)) {
  1133. if (Node->InterfaceCount > 0) {
  1134. PNM_INTERFACE * interfaceList = LocalAlloc(
  1135. LMEM_FIXED,
  1136. sizeof(PNM_INTERFACE) *
  1137. Node->InterfaceCount
  1138. );
  1139. if (interfaceList != NULL) {
  1140. PNM_INTERFACE netInterface;
  1141. PLIST_ENTRY entry;
  1142. DWORD i;
  1143. for (entry = Node->InterfaceList.Flink, i=0;
  1144. entry != &(Node->InterfaceList);
  1145. entry = entry->Flink, i++
  1146. )
  1147. {
  1148. netInterface = CONTAINING_RECORD(entry, NM_INTERFACE, NodeLinkage);
  1149. OmReferenceObject(netInterface);
  1150. interfaceList[i] = netInterface;
  1151. }
  1152. *InterfaceCount = Node->InterfaceCount;
  1153. *InterfaceList = interfaceList;
  1154. }
  1155. else {
  1156. status = ERROR_NOT_ENOUGH_MEMORY;
  1157. }
  1158. }
  1159. else {
  1160. *InterfaceCount = 0;
  1161. *InterfaceList = NULL;
  1162. }
  1163. NmpLockedLeaveApi();
  1164. }
  1165. else {
  1166. status = ERROR_NODE_NOT_AVAILABLE;
  1167. ClRtlLogPrint(LOG_NOISE,
  1168. "[NM] Not in valid state to process EnumNodeInterfaces request.\n"
  1169. );
  1170. }
  1171. NmpReleaseLock();
  1172. return(status);
  1173. } // NmEnumNodeInterfaces
  1174. DWORD
  1175. NmGetNodeHighestVersion(
  1176. IN PNM_NODE Node
  1177. )
  1178. {
  1179. return Node->HighestVersion;
  1180. }
  1181. /////////////////////////////////////////////////////////////////////////////
  1182. //
  1183. // Handlers for global updates
  1184. //
  1185. /////////////////////////////////////////////////////////////////////////////
  1186. DWORD
  1187. NmpUpdateAddNode(
  1188. IN BOOL SourceNode,
  1189. IN LPDWORD NewNodeId,
  1190. IN LPCWSTR NewNodeName,
  1191. IN LPDWORD NewNodeHighestVersion,
  1192. IN LPDWORD NewNodeLowestVersion,
  1193. IN LPDWORD NewNodeProductSuite
  1194. )
  1195. /*++
  1196. Routine Description:
  1197. GUM update handler for adding a new node to a cluster.
  1198. Arguments:
  1199. SourceNode - Specifies whether or not this is the source node for the update
  1200. NodeId - Specifies the ID of the node.
  1201. NewNodeName - A pointer to a string containing the name of the
  1202. new node.
  1203. NewNodeHighestVersion - A pointer to the highest cluster version number
  1204. that the new node can support.
  1205. NewNodeLowestVersion - A pointer to the lowest cluster version number
  1206. that the new node can support.
  1207. NewNodeProductSuite - A pointer to the product suite identifier for
  1208. the new node.
  1209. Return Value:
  1210. ERROR_SUCCESS if successful
  1211. Win32 error code otherwise.
  1212. Notes:
  1213. This routine is used to add an NT5 (or later) node to an NT5 (or
  1214. later) cluster. It will never be invoked in a mixed NT4/NT5
  1215. cluster.
  1216. --*/
  1217. {
  1218. PNM_NODE node = NULL;
  1219. NM_NODE_INFO2 nodeInfo;
  1220. HDMKEY nodeKey = NULL;
  1221. DWORD disposition;
  1222. DWORD status;
  1223. DWORD registryNodeLimit;
  1224. HLOCALXSACTION xaction = NULL;
  1225. BOOLEAN lockAcquired = FALSE;
  1226. if (!NmpEnterApi(NmStateOnline)) {
  1227. ClRtlLogPrint(LOG_UNUSUAL,
  1228. "[NM] This node is not in a valid state to process a request "
  1229. "to add node %1!ws! to the cluster.\n",
  1230. NewNodeName
  1231. );
  1232. return(ERROR_NODE_NOT_AVAILABLE);
  1233. }
  1234. ClRtlLogPrint(LOG_NOISE,
  1235. "[NMJOIN] Received an update to add node '%1!ws!' to "
  1236. "the cluster with node ID %2!u!.\n",
  1237. NewNodeName,
  1238. *NewNodeId
  1239. );
  1240. if (*NewNodeId > NmMaxNodeId) {
  1241. ClRtlLogPrint(LOG_UNUSUAL,
  1242. "[NMJOIN] Failed to add node %1!ws! to the cluster because the "
  1243. "specified node ID, '%2!u!' , is not valid.\n",
  1244. NewNodeName,
  1245. *NewNodeId
  1246. );
  1247. status = ERROR_INVALID_PARAMETER;
  1248. goto error_exit;
  1249. }
  1250. //
  1251. // Read the registry override before acquiring the NM lock.
  1252. //
  1253. status = DmQueryDword(
  1254. DmClusterParametersKey,
  1255. CLUSREG_NAME_MAX_NODES,
  1256. &registryNodeLimit,
  1257. NULL
  1258. );
  1259. if (status != ERROR_SUCCESS) {
  1260. registryNodeLimit = 0;
  1261. }
  1262. //
  1263. // Begin a transaction - This must be done before acquiring the
  1264. // NM lock.
  1265. //
  1266. xaction = DmBeginLocalUpdate();
  1267. if (xaction == NULL) {
  1268. status = GetLastError();
  1269. ClRtlLogPrint(LOG_UNUSUAL,
  1270. "[NM] Failed to begin a transaction to add node %1!ws! "
  1271. "to the cluster, status %2!u!.\n",
  1272. NewNodeName,
  1273. status
  1274. );
  1275. goto error_exit;
  1276. }
  1277. NmpAcquireLock(); lockAcquired = TRUE;
  1278. //
  1279. // Verify that we do not already have the maximum number of nodes
  1280. // allowed in this cluster.
  1281. //
  1282. if (!NmpIsAddNodeAllowed(*NewNodeProductSuite, registryNodeLimit, NULL)) {
  1283. ClRtlLogPrint(LOG_UNUSUAL,
  1284. "[NMJOIN] Cannot add node '%1!ws!' to the cluster. "
  1285. "The cluster already contains the maximum number of nodes "
  1286. "allowed by the product licenses of the current member nodes "
  1287. "and the proposed new node. \n",
  1288. NewNodeName
  1289. );
  1290. status = ERROR_LICENSE_QUOTA_EXCEEDED;
  1291. goto error_exit;
  1292. }
  1293. //
  1294. // Verify that the specified node ID is available.
  1295. //
  1296. if (NmpIdArray[*NewNodeId] != NULL) {
  1297. status = ERROR_CLUSTER_NODE_EXISTS;
  1298. ClRtlLogPrint(LOG_UNUSUAL,
  1299. "[NMJOIN] Cannot add node '%1!ws!' to the cluster because "
  1300. "node ID '%2!u!' is already in use.\n",
  1301. NewNodeName,
  1302. *NewNodeId
  1303. );
  1304. goto error_exit;
  1305. }
  1306. //
  1307. // Try to create a key for the node in the cluster registry.
  1308. //
  1309. wsprintfW(&(nodeInfo.NodeId[0]), L"%u", *NewNodeId);
  1310. nodeKey = DmLocalCreateKey(
  1311. xaction,
  1312. DmNodesKey,
  1313. nodeInfo.NodeId,
  1314. 0,
  1315. KEY_READ | KEY_WRITE,
  1316. NULL,
  1317. &disposition
  1318. );
  1319. if (nodeKey == NULL) {
  1320. status = GetLastError();
  1321. ClRtlLogPrint(LOG_UNUSUAL,
  1322. "[NMJOIN] Failed to create registry key for new "
  1323. "node '%1!ws!' using node ID '%2!u!', status %3!u!\n",
  1324. NewNodeName,
  1325. *NewNodeId,
  1326. status
  1327. );
  1328. goto error_exit;
  1329. }
  1330. if (disposition != REG_CREATED_NEW_KEY) {
  1331. //
  1332. // The key already exists. This must be
  1333. // garbage leftover from a failed evict or oldstyle add.
  1334. // We'll just overwrite the key.
  1335. //
  1336. ClRtlLogPrint(LOG_UNUSUAL,
  1337. "[NMJOIN] A partial definition exists for node ID '%1!u!'. "
  1338. "A node addition or eviction operation may have failed.\n",
  1339. *NewNodeId
  1340. );
  1341. }
  1342. //
  1343. // Add the rest of the node's parameters to the registry.
  1344. //
  1345. status = DmLocalSetValue(
  1346. xaction,
  1347. nodeKey,
  1348. CLUSREG_NAME_NODE_NAME,
  1349. REG_SZ,
  1350. (CONST BYTE *)NewNodeName,
  1351. NM_WCSLEN(NewNodeName)
  1352. );
  1353. if (status != ERROR_SUCCESS) {
  1354. ClRtlLogPrint(LOG_UNUSUAL,
  1355. "[NMJOIN] Failed to set registry value '%1!ws!', status %2!u!. "
  1356. "Cannot add node '%3!ws!' to the cluster.\n",
  1357. CLUSREG_NAME_NODE_NAME,
  1358. status,
  1359. NewNodeName
  1360. );
  1361. goto error_exit;
  1362. }
  1363. status = DmLocalSetValue(
  1364. xaction,
  1365. nodeKey,
  1366. CLUSREG_NAME_NODE_HIGHEST_VERSION,
  1367. REG_DWORD,
  1368. (CONST BYTE *)NewNodeHighestVersion,
  1369. sizeof(DWORD)
  1370. );
  1371. if (status != ERROR_SUCCESS) {
  1372. ClRtlLogPrint(LOG_UNUSUAL,
  1373. "[NMJOIN] Failed to set registry value '%1!ws!', status %2!u!. "
  1374. "Cannot add node '%3!ws!' to the cluster.\n",
  1375. CLUSREG_NAME_NODE_HIGHEST_VERSION,
  1376. status,
  1377. NewNodeName
  1378. );
  1379. goto error_exit;
  1380. }
  1381. status = DmLocalSetValue(
  1382. xaction,
  1383. nodeKey,
  1384. CLUSREG_NAME_NODE_LOWEST_VERSION,
  1385. REG_DWORD,
  1386. (CONST BYTE *)NewNodeLowestVersion,
  1387. sizeof(DWORD)
  1388. );
  1389. if (status != ERROR_SUCCESS) {
  1390. ClRtlLogPrint(LOG_UNUSUAL,
  1391. "[NMJOIN] Failed to set registry value %1!ws!, status %2!u!. "
  1392. "Cannot add node '%3!ws!' to the cluster.\n",
  1393. CLUSREG_NAME_NODE_LOWEST_VERSION,
  1394. status,
  1395. NewNodeName
  1396. );
  1397. goto error_exit;
  1398. }
  1399. status = DmLocalSetValue(
  1400. xaction,
  1401. nodeKey,
  1402. CLUSREG_NAME_NODE_PRODUCT_SUITE,
  1403. REG_DWORD,
  1404. (CONST BYTE *)NewNodeProductSuite,
  1405. sizeof(DWORD)
  1406. );
  1407. if (status != ERROR_SUCCESS) {
  1408. ClRtlLogPrint(LOG_UNUSUAL,
  1409. "[NMJOIN] Failed to set registry value %1!ws!, status %2!u!. "
  1410. "Cannot add node '%3!ws!' to the cluster.\n",
  1411. CLUSREG_NAME_NODE_PRODUCT_SUITE,
  1412. status,
  1413. NewNodeName
  1414. );
  1415. goto error_exit;
  1416. }
  1417. DmCloseKey(nodeKey); nodeKey = NULL;
  1418. status = NmpGetNodeDefinition(&nodeInfo);
  1419. if (status != ERROR_SUCCESS) {
  1420. ClRtlLogPrint(LOG_UNUSUAL,
  1421. "[NMJOIN] Failed to read definition for node %1!ws! from the "
  1422. "cluster database, status %2!u!.\n",
  1423. NewNodeName,
  1424. status
  1425. );
  1426. goto error_exit;
  1427. }
  1428. //
  1429. // If a node happens to be joining right now, flag the fact that
  1430. // it is now out of synch with the cluster config.
  1431. //
  1432. if (NmpJoinerNodeId != ClusterInvalidNodeId) {
  1433. ClRtlLogPrint(LOG_NOISE,
  1434. "[NMJOIN] Joiner (ID %1!u!) is now out of sync due to add of "
  1435. "node %2!ws!.\n",
  1436. NmpJoinerNodeId,
  1437. NewNodeName
  1438. );
  1439. NmpJoinerOutOfSynch = TRUE;
  1440. }
  1441. //
  1442. // Create the node object
  1443. //
  1444. NmpReleaseLock();
  1445. node = NmpCreateNodeObject(&nodeInfo);
  1446. ClNetFreeNodeInfo(&nodeInfo);
  1447. NmpAcquireLock();
  1448. if (node == NULL) {
  1449. status = GetLastError();
  1450. ClRtlLogPrint(LOG_UNUSUAL,
  1451. "[NMJOIN] Failed to create object for node %1!ws!, "
  1452. "status %2!u!.\n",
  1453. NewNodeName,
  1454. status
  1455. );
  1456. goto error_exit;
  1457. }
  1458. ClusterEvent(CLUSTER_EVENT_NODE_ADDED, node);
  1459. CsLogEvent1(LOG_NOISE, NM_EVENT_NEW_NODE, NewNodeName);
  1460. //
  1461. // Remove the reference that NmpCreateNodeObject left on the node.
  1462. //
  1463. OmDereferenceObject(node);
  1464. //
  1465. // Reset the cluster version and node limit
  1466. //
  1467. NmpResetClusterVersion(FALSE);
  1468. NmpResetClusterNodeLimit();
  1469. ClRtlLogPrint(LOG_NOISE,
  1470. "[NMJOIN] Successfully added node %1!ws! to the cluster.\n",
  1471. NewNodeName
  1472. );
  1473. error_exit:
  1474. if (lockAcquired) {
  1475. NmpLockedLeaveApi();
  1476. NmpReleaseLock();
  1477. }
  1478. else {
  1479. NmpLeaveApi();
  1480. }
  1481. if (xaction != NULL) {
  1482. if (status == ERROR_SUCCESS) {
  1483. DmCommitLocalUpdate(xaction); xaction = NULL;
  1484. }
  1485. else {
  1486. DmAbortLocalUpdate(xaction);
  1487. }
  1488. }
  1489. if (nodeKey != NULL) {
  1490. DmCloseKey(nodeKey);
  1491. }
  1492. return(status);
  1493. } // NmpUpdateAddNode
  1494. DWORD
  1495. NmpUpdateCreateNode(
  1496. IN BOOL SourceNode,
  1497. IN LPDWORD NodeId
  1498. )
  1499. /*++
  1500. Routine Description:
  1501. GUM update handler for dynamically creating a new node
  1502. Arguments:
  1503. SourceNode - Specifies whether or not this is the source node for the update
  1504. NodeId - Specifies the ID of the node.
  1505. Return Value:
  1506. ERROR_SUCCESS if successful
  1507. Win32 error code otherwise.
  1508. Notes:
  1509. This handler was used by NT4 nodes. Since it is not possible to add
  1510. an NT4 node to a cluster containing an NT5 node, this handler should
  1511. never be called in an NT5 system.
  1512. --*/
  1513. {
  1514. CL_ASSERT(FALSE);
  1515. return(ERROR_CLUSTER_INCOMPATIBLE_VERSIONS);
  1516. } // NmpUpdateCreateNode
  1517. DWORD
  1518. NmpUpdatePauseNode(
  1519. IN BOOL SourceNode,
  1520. IN LPWSTR NodeId
  1521. )
  1522. /*++
  1523. Routine Description:
  1524. GUM update handler for pausing a node
  1525. Arguments:
  1526. SourceNode - Specifies whether or not this is the source node for the update
  1527. NodeId - Specifies the name of the node.
  1528. Return Value:
  1529. ERROR_SUCCESS if successful
  1530. Win32 error code otherwise.
  1531. --*/
  1532. {
  1533. DWORD status = ERROR_SUCCESS;
  1534. HLOCALXSACTION xaction = NULL;
  1535. PNM_NODE node = NULL;
  1536. BOOLEAN lockAcquired = FALSE;
  1537. if (!NmpEnterApi(NmStateOnline)) {
  1538. ClRtlLogPrint(LOG_NOISE,
  1539. "[NM] Not in valid state to process PauseNode update.\n"
  1540. );
  1541. return(ERROR_NODE_NOT_AVAILABLE);
  1542. }
  1543. ClRtlLogPrint(LOG_NOISE,
  1544. "[NM] Received update to pause node %1!ws!\n",
  1545. NodeId
  1546. );
  1547. xaction = DmBeginLocalUpdate();
  1548. if (xaction == NULL) {
  1549. status = GetLastError();
  1550. ClRtlLogPrint(LOG_CRITICAL,
  1551. "[NM] Failed to start a transaction, status %1!u!\n",
  1552. status
  1553. );
  1554. goto error_exit;
  1555. }
  1556. node = OmReferenceObjectById(ObjectTypeNode, NodeId);
  1557. if (node == NULL) {
  1558. status = ERROR_CLUSTER_NODE_NOT_FOUND;
  1559. ClRtlLogPrint(LOG_UNUSUAL,
  1560. "[NM] Node %1!ws! does not exist\n",
  1561. NodeId
  1562. );
  1563. goto error_exit;
  1564. }
  1565. NmpAcquireLock(); lockAcquired = TRUE;
  1566. if (node->NodeId == NmpJoinerNodeId) {
  1567. status = ERROR_CLUSTER_NODE_DOWN;
  1568. ClRtlLogPrint(LOG_UNUSUAL,
  1569. "[NM] Cannot pause node %1!ws! because it is in the process "
  1570. "of joining the cluster.\n",
  1571. NodeId
  1572. );
  1573. goto error_exit;
  1574. }
  1575. if (node->State == ClusterNodeUp) {
  1576. //
  1577. // Update the registry to reflect the new state.
  1578. //
  1579. HDMKEY nodeKey = DmOpenKey(DmNodesKey, NodeId, KEY_WRITE);
  1580. if (nodeKey != NULL) {
  1581. DWORD isPaused = 1;
  1582. status = DmLocalSetValue(
  1583. xaction,
  1584. nodeKey,
  1585. CLUSREG_NAME_NODE_PAUSED,
  1586. REG_DWORD,
  1587. (CONST BYTE *)&isPaused,
  1588. sizeof(isPaused)
  1589. );
  1590. #ifdef CLUSTER_TESTPOINT
  1591. TESTPT(TpFailNmPauseNode) {
  1592. status = 999999;
  1593. }
  1594. #endif
  1595. if (status == ERROR_SUCCESS) {
  1596. node->State = ClusterNodePaused;
  1597. ClusterEvent(CLUSTER_EVENT_NODE_CHANGE, node);
  1598. //
  1599. // If a node happens to be joining right now, flag the
  1600. // fact that it is now out of synch with the cluster config.
  1601. //
  1602. if (NmpJoinerNodeId != ClusterInvalidNodeId) {
  1603. ClRtlLogPrint(LOG_NOISE,
  1604. "[NMJOIN] Joiner (ID %1!u!) is now out of sync due "
  1605. "to pause operation on node %2!ws!.\n",
  1606. NmpJoinerNodeId,
  1607. NodeId
  1608. );
  1609. NmpJoinerOutOfSynch = TRUE;
  1610. }
  1611. }
  1612. else {
  1613. ClRtlLogPrint(LOG_UNUSUAL,
  1614. "[NM] Failed to set Paused value for node %1!ws!, "
  1615. "status %2!u!.\n",
  1616. NodeId,
  1617. status
  1618. );
  1619. }
  1620. DmCloseKey(nodeKey);
  1621. }
  1622. else {
  1623. status = GetLastError();
  1624. ClRtlLogPrint(LOG_UNUSUAL,
  1625. "[NM] Failed to open key for node %1!ws!, status %2!u!.\n",
  1626. NodeId,
  1627. status
  1628. );
  1629. }
  1630. }
  1631. else if (node->State != ClusterNodePaused) {
  1632. status = ERROR_CLUSTER_NODE_DOWN;
  1633. }
  1634. error_exit:
  1635. if (lockAcquired) {
  1636. NmpLockedLeaveApi();
  1637. NmpReleaseLock();
  1638. }
  1639. else {
  1640. NmpLeaveApi();
  1641. }
  1642. if (xaction != NULL) {
  1643. if (status == ERROR_SUCCESS) {
  1644. DmCommitLocalUpdate(xaction);
  1645. }
  1646. else {
  1647. DmAbortLocalUpdate(xaction);
  1648. }
  1649. }
  1650. if (node != NULL) {
  1651. OmDereferenceObject(node);
  1652. }
  1653. return(status);
  1654. } // NmpUpdatePauseNode
  1655. DWORD
  1656. NmpUpdateResumeNode(
  1657. IN BOOL SourceNode,
  1658. IN LPWSTR NodeId
  1659. )
  1660. /*++
  1661. Routine Description:
  1662. GUM update handler for resuming a node
  1663. Arguments:
  1664. SourceNode - Specifies whether or not this is the source node for the update
  1665. NodeId - Specifies the name of the node.
  1666. Return Value:
  1667. ERROR_SUCCESS if successful
  1668. Win32 error code otherwise.
  1669. --*/
  1670. {
  1671. DWORD status = ERROR_SUCCESS;
  1672. HLOCALXSACTION xaction = NULL;
  1673. PNM_NODE node = NULL;
  1674. BOOLEAN lockAcquired = FALSE;
  1675. if (!NmpEnterApi(NmStateOnline)) {
  1676. ClRtlLogPrint(LOG_NOISE,
  1677. "[NM] Not in valid state to process ResumeNode update.\n"
  1678. );
  1679. return(ERROR_NODE_NOT_AVAILABLE);
  1680. }
  1681. ClRtlLogPrint(LOG_NOISE,
  1682. "[NM] Received update to resume node %1!ws!\n",
  1683. NodeId
  1684. );
  1685. xaction = DmBeginLocalUpdate();
  1686. if (xaction == NULL) {
  1687. status = GetLastError();
  1688. ClRtlLogPrint(LOG_CRITICAL,
  1689. "[NM] Failed to start a transaction, status %1!u!\n",
  1690. status
  1691. );
  1692. goto error_exit;
  1693. }
  1694. node = OmReferenceObjectById(ObjectTypeNode, NodeId);
  1695. if (node == NULL) {
  1696. status = ERROR_CLUSTER_NODE_NOT_FOUND;
  1697. ClRtlLogPrint(LOG_UNUSUAL,
  1698. "[NM] Node %1!ws! does not exist\n",
  1699. NodeId
  1700. );
  1701. goto error_exit;
  1702. }
  1703. NmpAcquireLock(); lockAcquired = TRUE;
  1704. if (node->NodeId == NmpJoinerNodeId) {
  1705. status = ERROR_CLUSTER_NODE_DOWN;
  1706. ClRtlLogPrint(LOG_UNUSUAL,
  1707. "[NM] Cannot resume node %1!ws! because it is in the process "
  1708. "of joining the cluster.\n",
  1709. NodeId
  1710. );
  1711. goto error_exit;
  1712. }
  1713. if (node->State == ClusterNodePaused) {
  1714. //
  1715. // Update the registry to reflect the new state.
  1716. //
  1717. HDMKEY nodeKey = DmOpenKey(DmNodesKey, NodeId, KEY_WRITE);
  1718. if (nodeKey != NULL) {
  1719. status = DmLocalDeleteValue(
  1720. xaction,
  1721. nodeKey,
  1722. CLUSREG_NAME_NODE_PAUSED
  1723. );
  1724. #ifdef CLUSTER_TESTPOINT
  1725. TESTPT(TpFailNmResumeNode) {
  1726. status = 999999;
  1727. }
  1728. #endif
  1729. if (status == ERROR_SUCCESS) {
  1730. node->State = ClusterNodeUp;
  1731. ClusterEvent(CLUSTER_EVENT_NODE_CHANGE, node);
  1732. //
  1733. // If a node happens to be joining right now, flag the
  1734. // fact that it is now out of synch with the cluster config.
  1735. //
  1736. if (NmpJoinerNodeId != ClusterInvalidNodeId) {
  1737. ClRtlLogPrint(LOG_NOISE,
  1738. "[NMJOIN] Joiner (ID %1!u!) is now out of sync due "
  1739. "to resume operation on node %2!ws!.\n",
  1740. NmpJoinerNodeId,
  1741. NodeId
  1742. );
  1743. NmpJoinerOutOfSynch = TRUE;
  1744. }
  1745. }
  1746. else {
  1747. ClRtlLogPrint(LOG_UNUSUAL,
  1748. "[NM] Failed to delete Paused value for node %1!ws!, "
  1749. "status %2!u!.\n",
  1750. NodeId,
  1751. status
  1752. );
  1753. }
  1754. DmCloseKey(nodeKey);
  1755. }
  1756. else {
  1757. status = GetLastError();
  1758. ClRtlLogPrint(LOG_UNUSUAL,
  1759. "[NM] Failed to open key for node %1!ws!, status %2!u!.\n",
  1760. NodeId,
  1761. status
  1762. );
  1763. }
  1764. }
  1765. else {
  1766. status = ERROR_CLUSTER_NODE_NOT_PAUSED;
  1767. }
  1768. error_exit:
  1769. if (lockAcquired) {
  1770. NmpLockedLeaveApi();
  1771. NmpReleaseLock();
  1772. }
  1773. else {
  1774. NmpLeaveApi();
  1775. }
  1776. if (xaction != NULL) {
  1777. if (status == ERROR_SUCCESS) {
  1778. DmCommitLocalUpdate(xaction);
  1779. }
  1780. else {
  1781. DmAbortLocalUpdate(xaction);
  1782. }
  1783. }
  1784. if (node != NULL) {
  1785. OmDereferenceObject(node);
  1786. }
  1787. return(status);
  1788. } // NmpUpdateResumeNode
  1789. DWORD
  1790. NmpUpdateEvictNode(
  1791. IN BOOL SourceNode,
  1792. IN LPWSTR NodeId
  1793. )
  1794. /*++
  1795. Routine Description:
  1796. GUM update handler for evicting a node.
  1797. The specified node is deleted from the OM.
  1798. If the specified node is online, it is paused to prevent any other groups
  1799. from moving there.
  1800. If the specified node is the current node, it attempts to failover any
  1801. owned groups.
  1802. Arguments:
  1803. SourceNode - Specifies whether or not this is the source node for the update
  1804. NodeId - Specifies the name of the node.
  1805. Return Value:
  1806. ERROR_SUCCESS if successful
  1807. Win32 error code otherwise.
  1808. Notes:
  1809. It is very hard to make this operation abortable, so it isn't. If anything
  1810. goes wrong past a certain point, the node will halt.
  1811. Assumption: Since global updates are serialized, and local transactions
  1812. guarantee exclusive access to the registry, no other updates can be made in
  1813. parallel by the FM.
  1814. --*/
  1815. {
  1816. DWORD status = ERROR_SUCCESS;
  1817. PNM_NODE node = NULL;
  1818. HLOCALXSACTION xaction = NULL;
  1819. PNM_NETWORK network;
  1820. LPCWSTR networkId;
  1821. PNM_INTERFACE netInterface;
  1822. LPCWSTR interfaceId;
  1823. PLIST_ENTRY entry;
  1824. BOOLEAN lockAcquired = FALSE;
  1825. if (!NmpEnterApi(NmStateOnline)) {
  1826. ClRtlLogPrint(LOG_NOISE,
  1827. "[NM] Not in valid state to process EvictNode update.\n"
  1828. );
  1829. return(ERROR_NODE_NOT_AVAILABLE);
  1830. }
  1831. ClRtlLogPrint(LOG_NOISE,
  1832. "[NM] Received update to evict node %1!ws!\n",
  1833. NodeId
  1834. );
  1835. node = OmReferenceObjectById(ObjectTypeNode, NodeId);
  1836. if (node == NULL) {
  1837. ClRtlLogPrint(LOG_UNUSUAL,
  1838. "[NM] Node %1!ws! does not exist\n",
  1839. NodeId
  1840. );
  1841. status = ERROR_CLUSTER_NODE_NOT_FOUND;
  1842. goto error_exit;
  1843. }
  1844. //
  1845. // Begin a transaction
  1846. //
  1847. xaction = DmBeginLocalUpdate();
  1848. if (xaction == NULL) {
  1849. status = GetLastError();
  1850. ClRtlLogPrint(LOG_CRITICAL,
  1851. "[NM] Failed to start a transaction, status %1!u!\n",
  1852. status
  1853. );
  1854. goto error_exit;
  1855. }
  1856. NmpAcquireLock(); lockAcquired = TRUE;
  1857. if (NmpJoinerNodeId != ClusterInvalidNodeId) {
  1858. status = ERROR_CLUSTER_JOIN_IN_PROGRESS;
  1859. ClRtlLogPrint(LOG_NOISE,
  1860. "[NM] Cannot evict node because a join is in progress.\n"
  1861. );
  1862. goto error_exit;
  1863. }
  1864. //
  1865. // Only continue if the node is down. Evicting a node while it
  1866. // is actively participating in the cluster is way too tricky.
  1867. //
  1868. if (node->State != ClusterNodeDown) {
  1869. status = ERROR_CANT_EVICT_ACTIVE_NODE;
  1870. ClRtlLogPrint(LOG_CRITICAL,
  1871. "[NM] Node %1!ws! cannot be evicted because it is not offline.\n",
  1872. NodeId
  1873. );
  1874. goto error_exit;
  1875. }
  1876. //
  1877. // Scrub the FM's portion of the registry of all references to this node.
  1878. //
  1879. status = NmpCleanseRegistry(NodeId, xaction);
  1880. if (status != ERROR_SUCCESS) {
  1881. ClRtlLogPrint(LOG_CRITICAL,
  1882. "[NM] Failed to remove all resource database references to "
  1883. "evicted node %1!ws!, status %2!u!\n",
  1884. NodeId,
  1885. status
  1886. );
  1887. goto error_exit;
  1888. }
  1889. //
  1890. // Delete the node's interfaces from the database.
  1891. //
  1892. for (entry = node->InterfaceList.Flink;
  1893. entry != &(node->InterfaceList);
  1894. entry = entry->Flink
  1895. )
  1896. {
  1897. netInterface = CONTAINING_RECORD(
  1898. entry,
  1899. NM_INTERFACE,
  1900. NodeLinkage
  1901. );
  1902. interfaceId = OmObjectId(netInterface);
  1903. network = netInterface->Network;
  1904. networkId = OmObjectId(network);
  1905. //
  1906. // Delete the interface definition from the database.
  1907. //
  1908. status = DmLocalDeleteTree(xaction, DmNetInterfacesKey, interfaceId);
  1909. if (status != ERROR_SUCCESS) {
  1910. ClRtlLogPrint(LOG_CRITICAL,
  1911. "[NM] Failed to delete definition for interface %1!ws!, "
  1912. "status %2!u!.\n",
  1913. interfaceId,
  1914. status
  1915. );
  1916. goto error_exit;
  1917. }
  1918. if (network->InterfaceCount == 1) {
  1919. //
  1920. // This is the last interface on the network.
  1921. // Delete the network too.
  1922. //
  1923. status = DmLocalDeleteTree(xaction, DmNetworksKey, networkId);
  1924. if (status != ERROR_SUCCESS) {
  1925. ClRtlLogPrint(LOG_CRITICAL,
  1926. "[NM] Failed to delete definition for network %1!ws!, "
  1927. "status %2!u!.\n",
  1928. networkId,
  1929. status
  1930. );
  1931. goto error_exit;
  1932. }
  1933. }
  1934. }
  1935. //
  1936. // Delete the node's database entry
  1937. //
  1938. status = DmLocalDeleteTree(xaction, DmNodesKey, NodeId);
  1939. #ifdef CLUSTER_TESTPOINT
  1940. TESTPT(TpFailNmEvictNodeAbort) {
  1941. status = 999999;
  1942. }
  1943. #endif
  1944. if (status != ERROR_SUCCESS) {
  1945. ClRtlLogPrint(LOG_CRITICAL,
  1946. "[NM] Failed to delete node's database key, status %1!u!\n",
  1947. status
  1948. );
  1949. goto error_exit;
  1950. }
  1951. //
  1952. // WARNING: From here on, operations cannot be reversed.
  1953. // If any one of them fails, this node must halt to avoid being
  1954. // inconsistent.
  1955. //
  1956. //
  1957. // Delete the interface objects associated with this node.
  1958. //
  1959. while (!IsListEmpty(&(node->InterfaceList))) {
  1960. entry = node->InterfaceList.Flink;
  1961. netInterface = CONTAINING_RECORD(
  1962. entry,
  1963. NM_INTERFACE,
  1964. NodeLinkage
  1965. );
  1966. network = netInterface->Network;
  1967. networkId = OmObjectId(network);
  1968. NmpDeleteInterfaceObject(netInterface, TRUE);
  1969. if (network->InterfaceCount == 0) {
  1970. //
  1971. // This is the last interface on the network.
  1972. // Delete the network too.
  1973. //
  1974. NmpDeleteNetworkObject(network, TRUE);
  1975. }
  1976. }
  1977. //
  1978. // Delete the node's object.
  1979. //
  1980. NmpDeleteNodeObject(node, TRUE);
  1981. //after the node is deleted, recalculate the operational version of
  1982. //the cluster
  1983. NmpResetClusterVersion(TRUE);
  1984. //calculate the operational limit on the number of nodes that
  1985. //can be a part of this cluster
  1986. NmpResetClusterNodeLimit();
  1987. NmpReleaseLock(); lockAcquired = FALSE;
  1988. //
  1989. // Call the FM so it can clean up any outstanding references to this
  1990. // node from its structures.
  1991. //
  1992. status = FmEvictNode(node);
  1993. #ifdef CLUSTER_TESTPOINT
  1994. TESTPT(TpFailNmEvictNodeHalt) {
  1995. status = 999999;
  1996. }
  1997. #endif
  1998. if (status != ERROR_SUCCESS ) {
  1999. WCHAR string[16];
  2000. wsprintfW(&(string[0]), L"%u", status);
  2001. ClRtlLogPrint(LOG_CRITICAL,
  2002. "[NM] FATAL ERROR: Failed to remove all resource references to evicted node %1!ws!, status %2!u!\n",
  2003. NodeId,
  2004. status
  2005. );
  2006. CsLogEvent3(
  2007. LOG_CRITICAL,
  2008. NM_EVENT_EVICTION_ERROR,
  2009. NmLocalNodeName,
  2010. OmObjectName(node),
  2011. string
  2012. );
  2013. CsInconsistencyHalt(status);
  2014. }
  2015. CL_ASSERT(status == ERROR_SUCCESS);
  2016. error_exit:
  2017. if (lockAcquired) {
  2018. NmpLockedLeaveApi();
  2019. NmpReleaseLock();
  2020. }
  2021. else {
  2022. NmpLeaveApi();
  2023. }
  2024. if (xaction != NULL) {
  2025. if (status == ERROR_SUCCESS) {
  2026. DmCommitLocalUpdate(xaction);
  2027. }
  2028. else {
  2029. DmAbortLocalUpdate(xaction);
  2030. }
  2031. }
  2032. if (node != NULL) {
  2033. OmDereferenceObject(node);
  2034. }
  2035. if (status != ERROR_SUCCESS) {
  2036. ClRtlLogPrint(LOG_CRITICAL,
  2037. "[NM] Failed to evict node %1!ws!.\n",
  2038. NodeId
  2039. );
  2040. }
  2041. return(status);
  2042. } // NmpUpdateEvictNode
  2043. /////////////////////////////////////////////////////////////////////////////
  2044. //
  2045. // Database management routines
  2046. //
  2047. /////////////////////////////////////////////////////////////////////////////
  2048. DWORD
  2049. NmpGetNodeDefinition(
  2050. IN OUT PNM_NODE_INFO2 NodeInfo
  2051. )
  2052. /*++
  2053. Routine Description:
  2054. Reads information about a defined cluster node from the cluster database
  2055. and stores the information in a supplied structure.
  2056. Arguments:
  2057. NodeInfo - A pointer to the structure into which to store the node
  2058. information. The NodeId field of the structure contains
  2059. the ID of the node for which to read information.
  2060. Return Value:
  2061. ERROR_SUCCESS if the routine succeeds.
  2062. A Win32 error code otherwise.
  2063. --*/
  2064. {
  2065. DWORD status;
  2066. HDMKEY nodeKey = NULL;
  2067. DWORD valueLength;
  2068. DWORD valueType;
  2069. LPWSTR string;
  2070. WCHAR errorString[12];
  2071. nodeKey = DmOpenKey(DmNodesKey, NodeInfo->NodeId, KEY_READ);
  2072. if (nodeKey == NULL) {
  2073. status = GetLastError();
  2074. wsprintfW(&(errorString[0]), L"%u", status);
  2075. CsLogEvent2(
  2076. LOG_CRITICAL,
  2077. CS_EVENT_REG_OPEN_FAILED,
  2078. NodeInfo->NodeId,
  2079. errorString
  2080. );
  2081. ClRtlLogPrint(LOG_CRITICAL,
  2082. "[NM] Failed to open node key, status %1!u!\n",
  2083. status
  2084. );
  2085. status = ERROR_CLUSTER_NODE_NOT_MEMBER;
  2086. goto error_exit;
  2087. }
  2088. valueLength = sizeof(NodeInfo->NodeName);
  2089. string = CLUSREG_NAME_NODE_NAME;
  2090. status = DmQueryValue(
  2091. nodeKey,
  2092. string,
  2093. &valueType,
  2094. (LPBYTE) &(NodeInfo->NodeName[0]),
  2095. &valueLength
  2096. );
  2097. if (status != ERROR_SUCCESS) {
  2098. wsprintfW(&(errorString[0]), L"%u", status);
  2099. CsLogEvent2(
  2100. LOG_CRITICAL,
  2101. CS_EVENT_REG_QUERY_FAILED,
  2102. string,
  2103. errorString
  2104. );
  2105. ClRtlLogPrint(LOG_CRITICAL,
  2106. "[NM] Failed to read node name, status %1!u!\n",
  2107. status
  2108. );
  2109. goto error_exit;
  2110. }
  2111. if (valueType != REG_SZ) {
  2112. status = ERROR_INVALID_PARAMETER;
  2113. wsprintfW(&(errorString[0]), L"%u", status);
  2114. CsLogEvent2(
  2115. LOG_CRITICAL,
  2116. CS_EVENT_REG_QUERY_FAILED,
  2117. string,
  2118. errorString
  2119. );
  2120. goto error_exit;
  2121. }
  2122. //read the node's highest version
  2123. string = CLUSREG_NAME_NODE_HIGHEST_VERSION;
  2124. status = DmQueryDword(nodeKey, string, &NodeInfo->NodeHighestVersion,
  2125. NULL);
  2126. if (status != ERROR_SUCCESS)
  2127. {
  2128. wsprintfW(&(errorString[0]), L"%u", status);
  2129. CsLogEvent2(
  2130. LOG_CRITICAL,
  2131. CS_EVENT_REG_QUERY_FAILED,
  2132. string,
  2133. errorString
  2134. );
  2135. //this can happen on an upgrade from sp3 to nt5
  2136. //assume the node highest version is that of sp3
  2137. //the fixup function will get this fixed
  2138. NodeInfo->NodeHighestVersion = CLUSTER_MAKE_VERSION(1, 224);
  2139. }
  2140. //read the node's lowest version
  2141. string = CLUSREG_NAME_NODE_LOWEST_VERSION;
  2142. status = DmQueryDword(nodeKey, string, &NodeInfo->NodeLowestVersion,
  2143. NULL);
  2144. if (status != ERROR_SUCCESS)
  2145. {
  2146. wsprintfW(&(errorString[0]), L"%u", status);
  2147. CsLogEvent2(
  2148. LOG_CRITICAL,
  2149. CS_EVENT_REG_QUERY_FAILED,
  2150. string,
  2151. errorString
  2152. );
  2153. //this can happen on upgrade from sp3 to nt5
  2154. //if the nodelowestversion is not present assume it
  2155. //was an sp3 node(lowest version is 1.224)
  2156. NodeInfo->NodeLowestVersion = CLUSTER_MAKE_VERSION( 1, 224);
  2157. }
  2158. NodeInfo->State = ClusterNodeDown;
  2159. DmCloseKey(nodeKey);
  2160. return(ERROR_SUCCESS);
  2161. error_exit:
  2162. ClNetFreeNodeInfo(NodeInfo);
  2163. if (nodeKey != NULL) {
  2164. DmCloseKey(nodeKey);
  2165. }
  2166. return(status);
  2167. } // NmpGetNodeDefinition
  2168. DWORD
  2169. NmpGetNodeAuxInfo(
  2170. IN LPCWSTR NodeId,
  2171. IN OUT PNM_NODE_AUX_INFO pNodeAuxInfo
  2172. )
  2173. /*++
  2174. Routine Description:
  2175. Reads information about a defined cluster node from the cluster database
  2176. and stores the information in a supplied structure.
  2177. Arguments:
  2178. pNodeAuxInfo - A pointer to the structure into which to store the node
  2179. information. The NodeId field of the structure contains
  2180. the ID of the node for which to read information.
  2181. Return Value:
  2182. ERROR_SUCCESS if the routine succeeds.
  2183. A Win32 error code otherwise.
  2184. --*/
  2185. {
  2186. DWORD status;
  2187. HDMKEY nodeKey = NULL;
  2188. DWORD valueLength;
  2189. DWORD valueType;
  2190. LPWSTR string;
  2191. WCHAR errorString[12];
  2192. nodeKey = DmOpenKey(DmNodesKey, NodeId, KEY_READ);
  2193. if (nodeKey == NULL)
  2194. {
  2195. status = GetLastError();
  2196. wsprintfW(&(errorString[0]), L"%u", status);
  2197. CsLogEvent2(
  2198. LOG_CRITICAL,
  2199. CS_EVENT_REG_OPEN_FAILED,
  2200. NodeId,
  2201. errorString
  2202. );
  2203. ClRtlLogPrint(LOG_CRITICAL,
  2204. "[NM] NmpGetNodeAuxInfo : Failed to open node key, "
  2205. "status %1!u!\n",
  2206. status);
  2207. status = ERROR_CLUSTER_NODE_NOT_MEMBER;
  2208. goto error_exit;
  2209. }
  2210. //read the node's product suite
  2211. string = CLUSREG_NAME_NODE_PRODUCT_SUITE;
  2212. status = DmQueryDword(
  2213. nodeKey,
  2214. string,
  2215. (LPDWORD)&(pNodeAuxInfo->ProductSuite),
  2216. NULL
  2217. );
  2218. if (status != ERROR_SUCCESS)
  2219. {
  2220. wsprintfW(&(errorString[0]), L"%u", status);
  2221. CsLogEvent2(
  2222. LOG_NOISE,
  2223. CS_EVENT_REG_QUERY_FAILED,
  2224. string,
  2225. errorString
  2226. );
  2227. //assume it is enterprise
  2228. pNodeAuxInfo->ProductSuite = Enterprise;
  2229. }
  2230. DmCloseKey(nodeKey);
  2231. return(ERROR_SUCCESS);
  2232. error_exit:
  2233. if (nodeKey != NULL)
  2234. {
  2235. DmCloseKey(nodeKey);
  2236. }
  2237. return(status);
  2238. } // NmpGetNodeAuxInfo
  2239. DWORD
  2240. NmpEnumNodeDefinitions(
  2241. PNM_NODE_ENUM2 * NodeEnum
  2242. )
  2243. /*++
  2244. Routine Description:
  2245. Reads information about all defined cluster nodes from the cluster
  2246. database and builds an enumeration structure containing the information.
  2247. Arguments:
  2248. NodeEnum - A pointer to the variable into which to place a pointer to
  2249. the allocated node enumeration.
  2250. Return Value:
  2251. ERROR_SUCCESS if the routine succeeds.
  2252. A Win32 error code otherwise.
  2253. Notes:
  2254. This routine MUST NOT be called with the NM lock held.
  2255. --*/
  2256. {
  2257. DWORD status;
  2258. PNM_NODE_ENUM2 nodeEnum = NULL;
  2259. WCHAR nodeId[CS_MAX_NODE_ID_LENGTH];
  2260. DWORD i;
  2261. DWORD valueLength;
  2262. DWORD numNodes;
  2263. DWORD ignored;
  2264. FILETIME fileTime;
  2265. WCHAR errorString[12];
  2266. HLOCALXSACTION xaction;
  2267. BOOLEAN commitXaction = FALSE;
  2268. *NodeEnum = NULL;
  2269. //
  2270. // Begin a transaction - this must not be done while holding
  2271. // the NM lock.
  2272. //
  2273. xaction = DmBeginLocalUpdate();
  2274. if (xaction == NULL) {
  2275. status = GetLastError();
  2276. ClRtlLogPrint(LOG_UNUSUAL,
  2277. "[NM] Failed to begin a transaction, status %1!u!.\n",
  2278. status
  2279. );
  2280. }
  2281. NmpAcquireLock();
  2282. //
  2283. // First count the number of nodes.
  2284. //
  2285. status = DmQueryInfoKey(
  2286. DmNodesKey,
  2287. &numNodes,
  2288. &ignored, // MaxSubKeyLen
  2289. &ignored, // Values
  2290. &ignored, // MaxValueNameLen
  2291. &ignored, // MaxValueLen
  2292. &ignored, // lpcbSecurityDescriptor
  2293. &fileTime
  2294. );
  2295. if (status != ERROR_SUCCESS) {
  2296. wsprintfW(&(errorString[0]), L"%u", status);
  2297. CsLogEvent1(LOG_CRITICAL, CS_EVENT_REG_OPERATION_FAILED, errorString);
  2298. ClRtlLogPrint(LOG_CRITICAL,
  2299. "[NM] Failed to query Nodes key information, status %1!u!\n",
  2300. status
  2301. );
  2302. goto error_exit;
  2303. }
  2304. valueLength = sizeof(NM_NODE_ENUM2) +
  2305. (sizeof(NM_NODE_INFO2) * (numNodes - 1));
  2306. nodeEnum = MIDL_user_allocate(valueLength);
  2307. if (nodeEnum == NULL) {
  2308. status = ERROR_NOT_ENOUGH_MEMORY;
  2309. wsprintfW(&(errorString[0]), L"%u", status);
  2310. CsLogEvent1(LOG_CRITICAL, CS_EVENT_ALLOCATION_FAILURE, errorString);
  2311. ClRtlLogPrint(LOG_CRITICAL, "[NM] Failed to allocate memory.\n");
  2312. goto error_exit;
  2313. }
  2314. ZeroMemory(nodeEnum, valueLength);
  2315. for (i=0; i < numNodes; i++) {
  2316. valueLength = sizeof(nodeEnum->NodeList[nodeEnum->NodeCount].NodeId);
  2317. status = DmEnumKey(
  2318. DmNodesKey,
  2319. i,
  2320. &(nodeEnum->NodeList[nodeEnum->NodeCount].NodeId[0]),
  2321. &valueLength,
  2322. NULL
  2323. );
  2324. if (status != ERROR_SUCCESS) {
  2325. wsprintfW(&(errorString[0]), L"%u", status);
  2326. CsLogEvent1(
  2327. LOG_CRITICAL,
  2328. CS_EVENT_REG_OPERATION_FAILED,
  2329. errorString
  2330. );
  2331. ClRtlLogPrint(LOG_CRITICAL,
  2332. "[NM] Failed to enumerate node key, status %1!u!\n",
  2333. status
  2334. );
  2335. goto error_exit;
  2336. }
  2337. status = NmpGetNodeDefinition(
  2338. &(nodeEnum->NodeList[nodeEnum->NodeCount])
  2339. );
  2340. if (status != ERROR_SUCCESS) {
  2341. if (status == ERROR_FILE_NOT_FOUND) {
  2342. //
  2343. // Partial node definition in the database.
  2344. // Probably from a failed AddNode operation.
  2345. //
  2346. LPWSTR nodeIdString =
  2347. nodeEnum->NodeList[nodeEnum->NodeCount].NodeId;
  2348. DWORD nodeId = wcstoul(
  2349. nodeIdString,
  2350. NULL,
  2351. 10
  2352. );
  2353. //
  2354. // Delete the key and ignore it in the enum struct if it
  2355. // is safe to do so.
  2356. //
  2357. if ( (NmpIdArray[nodeId] == NULL) &&
  2358. (nodeId != NmLocalNodeId)
  2359. )
  2360. {
  2361. if (xaction != NULL) {
  2362. DWORD status2;
  2363. ClRtlLogPrint(LOG_CRITICAL,
  2364. "[NM] Deleting partial definition for node "
  2365. "ID %1!ws!\n",
  2366. nodeIdString
  2367. );
  2368. status2 = DmLocalDeleteKey(
  2369. xaction,
  2370. DmNodesKey,
  2371. nodeIdString
  2372. );
  2373. if (status2 == ERROR_SUCCESS) {
  2374. commitXaction = TRUE;
  2375. }
  2376. }
  2377. }
  2378. continue;
  2379. }
  2380. goto error_exit;
  2381. }
  2382. nodeEnum->NodeCount++;
  2383. }
  2384. *NodeEnum = nodeEnum;
  2385. CL_ASSERT(status == ERROR_SUCCESS);
  2386. error_exit:
  2387. NmpReleaseLock();
  2388. if (xaction != NULL) {
  2389. if ((status == ERROR_SUCCESS) && commitXaction) {
  2390. DmCommitLocalUpdate(xaction);
  2391. }
  2392. else {
  2393. DmAbortLocalUpdate(xaction);
  2394. }
  2395. }
  2396. if ((status != ERROR_SUCCESS) && (nodeEnum != NULL)) {
  2397. ClNetFreeNodeEnum(nodeEnum);
  2398. }
  2399. return(status);
  2400. } // NmpEnumNodeDefinitions
  2401. /////////////////////////////////////////////////////////////////////////////
  2402. //
  2403. // Object management routines
  2404. //
  2405. /////////////////////////////////////////////////////////////////////////////
  2406. DWORD
  2407. NmpCreateNodeObjects(
  2408. IN PNM_NODE_ENUM2 NodeEnum
  2409. )
  2410. /*++
  2411. Routine Description:
  2412. Processes a node information enumeration and creates node objects.
  2413. Arguments:
  2414. NodeEnum - A pointer to a node information enumeration structure.
  2415. Return Value:
  2416. ERROR_SUCCESS if the routine completes successfully.
  2417. A Win32 error code otherwise.
  2418. --*/
  2419. {
  2420. DWORD status = ERROR_SUCCESS;
  2421. PNM_NODE_INFO2 nodeInfo;
  2422. DWORD i;
  2423. PNM_NODE node;
  2424. BOOLEAN foundLocalNode = FALSE;
  2425. for (i=0; i < NodeEnum->NodeCount; i++) {
  2426. nodeInfo = &(NodeEnum->NodeList[i]);
  2427. //
  2428. // The local node object was created during initialization.
  2429. // Skip it.
  2430. //
  2431. if (wcscmp(NmLocalNodeIdString, nodeInfo->NodeId) != 0) {
  2432. node = NmpCreateNodeObject(nodeInfo);
  2433. if (node == NULL) {
  2434. status = GetLastError();
  2435. break;
  2436. }
  2437. else {
  2438. OmDereferenceObject(node);
  2439. }
  2440. }
  2441. else {
  2442. foundLocalNode = TRUE;
  2443. }
  2444. }
  2445. if ( !foundLocalNode ) {
  2446. status = ERROR_CLUSTER_NODE_NOT_MEMBER;
  2447. }
  2448. return(status);
  2449. } // NmpCreateNodeObjects
  2450. DWORD
  2451. NmpCreateLocalNodeObject(
  2452. IN PNM_NODE_INFO2 NodeInfo
  2453. )
  2454. /*++
  2455. Routine Description:
  2456. Creates a node object for the local node given information about the node.
  2457. Arguments:
  2458. NodeInfo - A pointer to a structure containing a description of the node
  2459. to create.
  2460. Return Value:
  2461. ERROR_SUCCESS if the routine completes successfully.
  2462. A Win32 error code otherwise.
  2463. --*/
  2464. {
  2465. DWORD status;
  2466. LPWSTR string;
  2467. CL_ASSERT(NmLocalNode == NULL);
  2468. //
  2469. // Verify that the node name matches the local computername.
  2470. //
  2471. if (wcscmp(NodeInfo->NodeName, NmLocalNodeName) != 0) {
  2472. string = L"";
  2473. CsLogEvent2(
  2474. LOG_CRITICAL,
  2475. NM_EVENT_NODE_NOT_MEMBER,
  2476. NmLocalNodeName,
  2477. string
  2478. );
  2479. ClRtlLogPrint(LOG_CRITICAL,
  2480. "[NM] Computername does not match node name in database.\n"
  2481. );
  2482. return(ERROR_INVALID_PARAMETER);
  2483. }
  2484. NmLocalNode = NmpCreateNodeObject(NodeInfo);
  2485. if (NmLocalNode == NULL) {
  2486. status = GetLastError();
  2487. ClRtlLogPrint(LOG_CRITICAL,
  2488. "[NM] Failed to create local node (%1!ws!), status %2!u!.\n",
  2489. NodeInfo->NodeId,
  2490. status
  2491. );
  2492. return(status);
  2493. }
  2494. else {
  2495. NmLocalNode->ExtendedState = ClusterNodeJoining;
  2496. OmDereferenceObject(NmLocalNode);
  2497. }
  2498. return(ERROR_SUCCESS);
  2499. }
  2500. PNM_NODE
  2501. NmpCreateNodeObject(
  2502. IN PNM_NODE_INFO2 NodeInfo
  2503. )
  2504. /*++
  2505. Routine Description:
  2506. Creates a node object given information about the node.
  2507. Arguments:
  2508. NodeInfo - A pointer to a structure containing a description of the node
  2509. to create.
  2510. Return Value:
  2511. A pointer to the created node object if successful.
  2512. NULL if not successful. Extended error information is available
  2513. from GetLastError().
  2514. --*/
  2515. {
  2516. PNM_NODE node = NULL;
  2517. DWORD status = ERROR_SUCCESS;
  2518. BOOL created = FALSE;
  2519. DWORD eventCode = 0;
  2520. WCHAR errorString[12];
  2521. ClRtlLogPrint(LOG_NOISE,
  2522. "[NM] Creating object for node %1!ws! (%2!ws!)\n",
  2523. NodeInfo->NodeId,
  2524. NodeInfo->NodeName
  2525. );
  2526. //
  2527. // Make sure that the node doesn't already exist.
  2528. //
  2529. node = OmReferenceObjectById(ObjectTypeNode, NodeInfo->NodeId);
  2530. if (node == NULL) {
  2531. //
  2532. // Make sure that the node doesn't already exist, this time by name.
  2533. //
  2534. node = OmReferenceObjectByName(ObjectTypeNode, NodeInfo->NodeName);
  2535. }
  2536. if (node != NULL) {
  2537. OmDereferenceObject(node);
  2538. ClRtlLogPrint(LOG_CRITICAL,
  2539. "[NM] Object already exists for node %1!ws!\n",
  2540. NodeInfo->NodeId
  2541. );
  2542. SetLastError(ERROR_OBJECT_ALREADY_EXISTS);
  2543. return(NULL);
  2544. }
  2545. node = OmCreateObject(
  2546. ObjectTypeNode,
  2547. NodeInfo->NodeId,
  2548. NodeInfo->NodeName,
  2549. &created
  2550. );
  2551. if (node == NULL) {
  2552. status = GetLastError();
  2553. wsprintfW(&(errorString[0]), L"%u", status);
  2554. CsLogEvent1(LOG_CRITICAL, CS_EVENT_ALLOCATION_FAILURE, errorString);
  2555. ClRtlLogPrint(LOG_CRITICAL,
  2556. "[NM] Failed to create object for node %1!ws! (%2!ws!), status %3!u!\n",
  2557. NodeInfo->NodeId,
  2558. NodeInfo->NodeName,
  2559. status
  2560. );
  2561. SetLastError(status);
  2562. return(NULL);
  2563. }
  2564. CL_ASSERT(created == TRUE);
  2565. ZeroMemory(node, sizeof(NM_NODE));
  2566. node->NodeId = wcstoul(NodeInfo->NodeId, NULL, 10);
  2567. node->State = NodeInfo->State;
  2568. // A join cannot proceed if any of the current node's ExtendedState is not up. But the State might be paused.
  2569. // So don't copy the State field into ExtendedState field. (#379170)
  2570. node->ExtendedState = ClusterNodeUp;
  2571. node->HighestVersion = NodeInfo->NodeHighestVersion;
  2572. node->LowestVersion = NodeInfo->NodeLowestVersion;
  2573. //for now assume enterprise
  2574. //NmpRefresh will fixup this information later..
  2575. node->ProductSuite = Enterprise;
  2576. InitializeListHead(&(node->InterfaceList));
  2577. CL_ASSERT(NmIsValidNodeId(node->NodeId));
  2578. if (node->NodeId != NmLocalNodeId) {
  2579. status = ClusnetRegisterNode(NmClusnetHandle, node->NodeId);
  2580. if (status != ERROR_SUCCESS) {
  2581. wsprintfW(&(errorString[0]), L"%u", status);
  2582. CsLogEvent2(
  2583. LOG_CRITICAL,
  2584. NM_EVENT_CLUSNET_REGISTER_NODE_FAILED,
  2585. NodeInfo->NodeId,
  2586. errorString
  2587. );
  2588. ClRtlLogPrint(LOG_CRITICAL,
  2589. "[NM] Failed to register node %1!ws! (%2!ws!) with the Cluster Network, status %3!u!\n",
  2590. NodeInfo->NodeId,
  2591. NodeInfo->NodeName,
  2592. status
  2593. );
  2594. goto error_exit;
  2595. }
  2596. }
  2597. //
  2598. // Put a reference on the object for the caller.
  2599. //
  2600. OmReferenceObject(node);
  2601. NmpAcquireLock();
  2602. if (NM_NODE_UP(node)) {
  2603. //
  2604. // Add this node to the up nodes set
  2605. //
  2606. BitsetAdd(NmpUpNodeSet, node->NodeId);
  2607. //
  2608. // Enable communication with this node during the
  2609. // join process.
  2610. //
  2611. ClRtlLogPrint(LOG_NOISE,
  2612. "[NM] Enabling communication for node %1!ws!\n",
  2613. NodeInfo->NodeId
  2614. );
  2615. status = ClusnetOnlineNodeComm(NmClusnetHandle, node->NodeId);
  2616. if (status != ERROR_SUCCESS) {
  2617. NmpReleaseLock();
  2618. OmDereferenceObject(node);
  2619. wsprintfW(&(errorString[0]), L"%u", status);
  2620. CsLogEvent2(
  2621. LOG_CRITICAL,
  2622. NM_EVENT_CLUSNET_ONLINE_COMM_FAILED,
  2623. NodeInfo->NodeId,
  2624. errorString
  2625. );
  2626. ClRtlLogPrint(LOG_CRITICAL,
  2627. "[NM] Failed to enable node %1!ws! (%2!ws!) for communication, status %3!u!\n",
  2628. NodeInfo->NodeId,
  2629. NodeInfo->NodeName,
  2630. status
  2631. );
  2632. goto error_exit;
  2633. }
  2634. }
  2635. CL_ASSERT(NmpIdArray != NULL);
  2636. CL_ASSERT(NmpIdArray[node->NodeId] == NULL);
  2637. NmpIdArray[node->NodeId] = node;
  2638. InsertTailList(&NmpNodeList, &(node->Linkage));
  2639. node->Flags |= NM_FLAG_OM_INSERTED;
  2640. OmInsertObject(node);
  2641. NmpNodeCount++;
  2642. NmpReleaseLock();
  2643. return(node);
  2644. error_exit:
  2645. ClRtlLogPrint(LOG_CRITICAL,
  2646. "[NM] Failed to create object for node %1!ws!, status %2!u!.\n",
  2647. NodeInfo->NodeId,
  2648. status
  2649. );
  2650. if (eventCode != 0) {
  2651. wsprintfW(&(errorString[0]), L"%u", status);
  2652. CsLogEvent1(LOG_CRITICAL, eventCode, errorString);
  2653. }
  2654. if (node != NULL) {
  2655. NmpAcquireLock();
  2656. NmpDeleteNodeObject(node, FALSE);
  2657. NmpReleaseLock();
  2658. }
  2659. SetLastError(status);
  2660. return(NULL);
  2661. } // NmpCreateNodeObject
  2662. DWORD
  2663. NmpGetNodeObjectInfo(
  2664. IN PNM_NODE Node,
  2665. IN OUT PNM_NODE_INFO2 NodeInfo
  2666. )
  2667. /*++
  2668. Routine Description:
  2669. Reads information about a defined cluster node from the its cluster
  2670. object and stores the information in a supplied structure.
  2671. Arguments:
  2672. Node - A pointer to the node object to query.
  2673. NodeInfo - A pointer to the structure into which to store the node
  2674. information.
  2675. Return Value:
  2676. ERROR_SUCCESS if the routine succeeds.
  2677. A Win32 error code otherwise.
  2678. Notes:
  2679. Called with the NmpLock held.
  2680. --*/
  2681. {
  2682. DWORD status;
  2683. lstrcpyW(&(NodeInfo->NodeId[0]), OmObjectId(Node));
  2684. lstrcpyW(&(NodeInfo->NodeName[0]), OmObjectName(Node));
  2685. NodeInfo->State = Node->State;
  2686. NodeInfo->NodeHighestVersion = Node->HighestVersion;
  2687. NodeInfo->NodeLowestVersion = Node->LowestVersion;
  2688. return(ERROR_SUCCESS);
  2689. } // NmpGetNodeObjectInfo
  2690. VOID
  2691. NmpDeleteNodeObject(
  2692. IN PNM_NODE Node,
  2693. IN BOOLEAN IssueEvent
  2694. )
  2695. /*++
  2696. Notes:
  2697. Called with NM lock held.
  2698. --*/
  2699. {
  2700. DWORD status;
  2701. PNM_INTERFACE netInterface;
  2702. PLIST_ENTRY entry;
  2703. LPWSTR nodeId = (LPWSTR) OmObjectId(Node);
  2704. if (NM_DELETE_PENDING(Node)) {
  2705. CL_ASSERT(!NM_OM_INSERTED(Node));
  2706. return;
  2707. }
  2708. ClRtlLogPrint(LOG_NOISE,
  2709. "[NM] Deleting object for node %1!ws!.\n",
  2710. nodeId
  2711. );
  2712. Node->Flags |= NM_FLAG_DELETE_PENDING;
  2713. //
  2714. // Remove from the various object lists.
  2715. //
  2716. if (NM_OM_INSERTED(Node)) {
  2717. status = OmRemoveObject(Node);
  2718. CL_ASSERT(status == ERROR_SUCCESS);
  2719. Node->Flags &= ~NM_FLAG_OM_INSERTED;
  2720. RemoveEntryList(&(Node->Linkage));
  2721. NmpIdArray[Node->NodeId] = NULL;
  2722. CL_ASSERT(NmpNodeCount > 0);
  2723. NmpNodeCount--;
  2724. }
  2725. //
  2726. // Delete all of the interfaces on this node
  2727. //
  2728. while (!IsListEmpty(&(Node->InterfaceList))) {
  2729. entry = Node->InterfaceList.Flink;
  2730. netInterface = CONTAINING_RECORD(entry, NM_INTERFACE, NodeLinkage);
  2731. NmpDeleteInterfaceObject(netInterface, IssueEvent);
  2732. }
  2733. status = ClusnetDeregisterNode(NmClusnetHandle, Node->NodeId);
  2734. CL_ASSERT( (status == ERROR_SUCCESS) ||
  2735. (status == ERROR_NOT_READY) ||
  2736. (status == ERROR_CLUSTER_NODE_NOT_FOUND)
  2737. );
  2738. if (IssueEvent) {
  2739. ClRtlLogPrint(LOG_NOISE,
  2740. "[NM] Issuing delete event for node %1!ws!.\n",
  2741. nodeId
  2742. );
  2743. ClusterEvent(CLUSTER_EVENT_NODE_DELETED, Node);
  2744. }
  2745. OmDereferenceObject(Node);
  2746. return;
  2747. } // NmpDeleteNodeObject
  2748. BOOL
  2749. NmpDestroyNodeObject(
  2750. PNM_NODE Node
  2751. )
  2752. {
  2753. DWORD status;
  2754. ClRtlLogPrint(LOG_NOISE,
  2755. "[NM] destroying node %1!ws!\n",
  2756. OmObjectId(Node)
  2757. );
  2758. CL_ASSERT(NM_DELETE_PENDING(Node));
  2759. CL_ASSERT(!NM_OM_INSERTED(Node));
  2760. ClMsgDeleteDefaultRpcBinding(Node, Node->DefaultRpcBindingGeneration);
  2761. ClMsgDeleteRpcBinding(Node->ReportRpcBinding);
  2762. ClMsgDeleteRpcBinding(Node->IsolateRpcBinding);
  2763. return(TRUE);
  2764. } // NmpDestroyNodeObject
  2765. DWORD
  2766. NmpEnumNodeObjects(
  2767. PNM_NODE_ENUM2 * NodeEnum
  2768. )
  2769. /*++
  2770. Routine Description:
  2771. Reads information about all defined cluster nodes from the cluster
  2772. object manager and builds an enumeration structure containing
  2773. the information.
  2774. Arguments:
  2775. NodeEnum - A pointer to the variable into which to place a pointer to
  2776. the allocated node enumeration.
  2777. Return Value:
  2778. ERROR_SUCCESS if the routine succeeds.
  2779. A Win32 error code otherwise.
  2780. Notes:
  2781. Called with the NmpLock held.
  2782. --*/
  2783. {
  2784. DWORD status = ERROR_SUCCESS;
  2785. PNM_NODE_ENUM2 nodeEnum = NULL;
  2786. DWORD i;
  2787. DWORD valueLength;
  2788. PLIST_ENTRY entry;
  2789. PNM_NODE node;
  2790. *NodeEnum = NULL;
  2791. if (NmpNodeCount == 0) {
  2792. valueLength = sizeof(NM_NODE_ENUM2);
  2793. }
  2794. else {
  2795. valueLength = sizeof(NM_NODE_ENUM2) +
  2796. (sizeof(NM_NODE_INFO2) * (NmpNodeCount - 1));
  2797. }
  2798. nodeEnum = MIDL_user_allocate(valueLength);
  2799. if (nodeEnum == NULL) {
  2800. return(ERROR_NOT_ENOUGH_MEMORY);
  2801. }
  2802. ZeroMemory(nodeEnum, valueLength);
  2803. for (entry = NmpNodeList.Flink, i=0;
  2804. entry != &NmpNodeList;
  2805. entry = entry->Flink, i++
  2806. )
  2807. {
  2808. node = CONTAINING_RECORD(entry, NM_NODE, Linkage);
  2809. status = NmpGetNodeObjectInfo(
  2810. node,
  2811. &(nodeEnum->NodeList[i])
  2812. );
  2813. if (status != ERROR_SUCCESS) {
  2814. ClNetFreeNodeEnum(nodeEnum);
  2815. return(status);
  2816. }
  2817. }
  2818. nodeEnum->NodeCount = NmpNodeCount;
  2819. *NodeEnum = nodeEnum;
  2820. nodeEnum = NULL;
  2821. return(ERROR_SUCCESS);
  2822. } // NmpEnumNodeObjects
  2823. DWORD
  2824. NmpSetNodeInterfacePriority(
  2825. IN PNM_NODE Node,
  2826. IN DWORD Priority,
  2827. IN PNM_INTERFACE TargetInterface OPTIONAL,
  2828. IN DWORD TargetInterfacePriority OPTIONAL
  2829. )
  2830. /*++
  2831. Called with the NmpLock held.
  2832. --*/
  2833. {
  2834. PNM_INTERFACE netInterface;
  2835. PNM_NETWORK network;
  2836. DWORD status = ERROR_SUCCESS;
  2837. PLIST_ENTRY entry;
  2838. for (entry = Node->InterfaceList.Flink;
  2839. entry != &Node->InterfaceList;
  2840. entry = entry->Flink
  2841. )
  2842. {
  2843. netInterface = CONTAINING_RECORD( entry, NM_INTERFACE, NodeLinkage );
  2844. network = netInterface->Network;
  2845. if ( NmpIsNetworkForInternalUse(network) &&
  2846. NmpIsInterfaceRegistered(netInterface)
  2847. )
  2848. {
  2849. if ( netInterface == TargetInterface ) {
  2850. status = ClusnetSetInterfacePriority(
  2851. NmClusnetHandle,
  2852. netInterface->Node->NodeId,
  2853. netInterface->Network->ShortId,
  2854. TargetInterfacePriority
  2855. );
  2856. } else {
  2857. status = ClusnetSetInterfacePriority(
  2858. NmClusnetHandle,
  2859. netInterface->Node->NodeId,
  2860. netInterface->Network->ShortId,
  2861. Priority
  2862. );
  2863. }
  2864. }
  2865. if ( status != ERROR_SUCCESS ) {
  2866. break;
  2867. }
  2868. }
  2869. return(status);
  2870. } // NmpSetNodeInterfacePriority
  2871. /////////////////////////////////////////////////////////////////////////////
  2872. //
  2873. // Node eviction utilities
  2874. //
  2875. /////////////////////////////////////////////////////////////////////////////
  2876. DWORD
  2877. NmpCleanseRegistry(
  2878. IN LPCWSTR NodeId,
  2879. IN HLOCALXSACTION Xaction
  2880. )
  2881. /*++
  2882. Routine Description:
  2883. Removes all references to the specified node from the cluster
  2884. registry.
  2885. Arguments:
  2886. Node - Supplies the node that is being evicted.
  2887. Return Value:
  2888. ERROR_SUCCESS if successful
  2889. Win32 error code otherwise
  2890. --*/
  2891. {
  2892. NM_EVICTION_CONTEXT context;
  2893. context.NodeId = NodeId;
  2894. context.Xaction = Xaction;
  2895. context.Status = ERROR_SUCCESS;
  2896. //
  2897. // Remove this node from the possible owner list of
  2898. // each resource type.
  2899. //
  2900. OmEnumObjects(
  2901. ObjectTypeResType,
  2902. NmpCleanseResTypeCallback,
  2903. &context,
  2904. NULL
  2905. );
  2906. if (context.Status == ERROR_SUCCESS) {
  2907. //
  2908. // Remove this node from the preferred owner list of
  2909. // each group.
  2910. //
  2911. OmEnumObjects(
  2912. ObjectTypeGroup,
  2913. NmpCleanseGroupCallback,
  2914. &context,
  2915. NULL
  2916. );
  2917. }
  2918. if (context.Status == ERROR_SUCCESS) {
  2919. //
  2920. // Remove this node from the possible owner list of
  2921. // each resource.
  2922. //
  2923. OmEnumObjects(
  2924. ObjectTypeResource,
  2925. NmpCleanseResourceCallback,
  2926. &context,
  2927. NULL
  2928. );
  2929. }
  2930. return(context.Status);
  2931. } // NmpCleanseRegistry
  2932. BOOL
  2933. NmpCleanseGroupCallback(
  2934. IN PNM_EVICTION_CONTEXT Context,
  2935. IN PVOID Context2,
  2936. IN PFM_GROUP Group,
  2937. IN LPCWSTR GroupName
  2938. )
  2939. /*++
  2940. Routine Description:
  2941. Group enumeration callback for removing an evicted node from the
  2942. group's preferred owners list.
  2943. Arguments:
  2944. Context - Supplies the node ID of the evicted node and other context info.
  2945. Context2 - Not used
  2946. Group - Supplies the group.
  2947. GroupName - Supplies the group's name.
  2948. Return Value:
  2949. TRUE - to indicate that the enumeration should continue.
  2950. --*/
  2951. {
  2952. HDMKEY groupKey;
  2953. DWORD status;
  2954. //
  2955. // Open the group's key.
  2956. //
  2957. groupKey = DmOpenKey(DmGroupsKey, GroupName, KEY_READ | KEY_WRITE);
  2958. if (groupKey != NULL) {
  2959. status = DmLocalRemoveFromMultiSz(
  2960. Context->Xaction,
  2961. groupKey,
  2962. CLUSREG_NAME_GRP_PREFERRED_OWNERS,
  2963. Context->NodeId
  2964. );
  2965. if (status == ERROR_FILE_NOT_FOUND) {
  2966. status = ERROR_SUCCESS;
  2967. }
  2968. DmCloseKey(groupKey);
  2969. }
  2970. else {
  2971. status = GetLastError();
  2972. }
  2973. Context->Status = status;
  2974. if (status != ERROR_SUCCESS) {
  2975. return(FALSE);
  2976. }
  2977. else {
  2978. return(TRUE);
  2979. }
  2980. } // NmpCleanseGroupCallback
  2981. BOOL
  2982. NmpCleanseResourceCallback(
  2983. IN PNM_EVICTION_CONTEXT Context,
  2984. IN PVOID Context2,
  2985. IN PFM_RESOURCE Resource,
  2986. IN LPCWSTR ResourceName
  2987. )
  2988. /*++
  2989. Routine Description:
  2990. Group enumeration callback for removing an evicted node from the
  2991. resource's possible owner's list.
  2992. Also deletes any node-specific parameters from the resource's registry
  2993. key.
  2994. Arguments:
  2995. Context - Supplies the node ID of the evicted node and other context info.
  2996. Context2 - Not used
  2997. Resource - Supplies the resource.
  2998. ResourceName - Supplies the resource's name.
  2999. Return Value:
  3000. TRUE - to indicate that the enumeration should continue.
  3001. --*/
  3002. {
  3003. HDMKEY resourceKey;
  3004. HDMKEY paramKey;
  3005. HDMKEY subKey;
  3006. DWORD status;
  3007. //
  3008. // Open the resource's key.
  3009. //
  3010. resourceKey = DmOpenKey(
  3011. DmResourcesKey,
  3012. ResourceName,
  3013. KEY_READ | KEY_WRITE
  3014. );
  3015. if (resourceKey != NULL) {
  3016. status = DmLocalRemoveFromMultiSz(
  3017. Context->Xaction,
  3018. resourceKey,
  3019. CLUSREG_NAME_RES_POSSIBLE_OWNERS,
  3020. Context->NodeId
  3021. );
  3022. if ((status == ERROR_SUCCESS) || (status == ERROR_FILE_NOT_FOUND)) {
  3023. paramKey = DmOpenKey(
  3024. resourceKey,
  3025. CLUSREG_KEYNAME_PARAMETERS,
  3026. KEY_READ | KEY_WRITE
  3027. );
  3028. if (paramKey != NULL) {
  3029. status = DmLocalDeleteTree(
  3030. Context->Xaction,
  3031. paramKey,
  3032. Context->NodeId
  3033. );
  3034. DmCloseKey(paramKey);
  3035. }
  3036. else {
  3037. status = GetLastError();
  3038. }
  3039. }
  3040. DmCloseKey(resourceKey);
  3041. }
  3042. else {
  3043. status = GetLastError();
  3044. }
  3045. if (status == ERROR_FILE_NOT_FOUND) {
  3046. status = ERROR_SUCCESS;
  3047. }
  3048. Context->Status = status;
  3049. if (status != ERROR_SUCCESS) {
  3050. return(FALSE);
  3051. }
  3052. else {
  3053. return(TRUE);
  3054. }
  3055. } // NmpCleanseResourceCallback
  3056. BOOL
  3057. NmpCleanseResTypeCallback(
  3058. IN PNM_EVICTION_CONTEXT Context,
  3059. IN PVOID Context2,
  3060. IN PFM_RESTYPE pResType,
  3061. IN LPCWSTR pszResTypeName
  3062. )
  3063. /*++
  3064. Routine Description:
  3065. Group enumeration callback for removing an evicted node from the
  3066. resource type's possible owner's list.
  3067. Also deletes any node-specific parameters from the resource types's registry
  3068. key.
  3069. Arguments:
  3070. Context - Supplies the node ID of the evicted node and other context info.
  3071. Context2 - Not used
  3072. pResType - Supplies the resource type.
  3073. pszResTypeeName - Supplies the resource type's name.
  3074. Return Value:
  3075. TRUE - to indicate that the enumeration should continue.
  3076. --*/
  3077. {
  3078. HDMKEY hResTypeKey;
  3079. HDMKEY paramKey;
  3080. HDMKEY subKey;
  3081. DWORD status;
  3082. //
  3083. // Open the resource's key.
  3084. //
  3085. hResTypeKey = DmOpenKey(
  3086. DmResourceTypesKey,
  3087. pszResTypeName,
  3088. KEY_READ | KEY_WRITE
  3089. );
  3090. if (hResTypeKey != NULL) {
  3091. status = DmLocalRemoveFromMultiSz(
  3092. Context->Xaction,
  3093. hResTypeKey,
  3094. CLUSREG_NAME_RESTYPE_POSSIBLE_NODES,
  3095. Context->NodeId
  3096. );
  3097. if ((status == ERROR_SUCCESS) || (status == ERROR_FILE_NOT_FOUND)) {
  3098. paramKey = DmOpenKey(
  3099. hResTypeKey,
  3100. CLUSREG_KEYNAME_PARAMETERS,
  3101. KEY_READ | KEY_WRITE
  3102. );
  3103. if (paramKey != NULL) {
  3104. status = DmLocalDeleteTree(
  3105. Context->Xaction,
  3106. paramKey,
  3107. Context->NodeId
  3108. );
  3109. DmCloseKey(paramKey);
  3110. }
  3111. else {
  3112. status = GetLastError();
  3113. }
  3114. }
  3115. DmCloseKey(hResTypeKey);
  3116. }
  3117. else {
  3118. status = GetLastError();
  3119. }
  3120. if (status == ERROR_FILE_NOT_FOUND) {
  3121. status = ERROR_SUCCESS;
  3122. }
  3123. Context->Status = status;
  3124. if (status != ERROR_SUCCESS) {
  3125. return(FALSE);
  3126. }
  3127. else {
  3128. return(TRUE);
  3129. }
  3130. } // NmpCleanseResTypeCallback
  3131. /////////////////////////////////////////////////////////////////////////////
  3132. //
  3133. // Node failure handler
  3134. //
  3135. /////////////////////////////////////////////////////////////////////////////
  3136. VOID
  3137. NmpNodeFailureHandler(
  3138. CL_NODE_ID NodeId,
  3139. LPVOID NodeFailureContext
  3140. )
  3141. {
  3142. return;
  3143. }
  3144. /////////////////////////////////////////////////////////////////////////////
  3145. //
  3146. // Miscellaneous routines
  3147. //
  3148. /////////////////////////////////////////////////////////////////////////////
  3149. //SS: when the node objects are created, their product suite is
  3150. //assumed to be Enterprise(aka Advanced Server) - This is because
  3151. //the joining interface doesnt allow the joiner to provide the node
  3152. //suite type and we didnt want to muck with it at a late state in
  3153. //shipping because it affects mixed mode clusters.
  3154. //SO, we fixup the structures after NmPerformFixups is called
  3155. //and calculate the cluster node limit
  3156. DWORD NmpRefreshNodeObjects(
  3157. )
  3158. {
  3159. NM_NODE_AUX_INFO NodeAuxInfo;
  3160. PLIST_ENTRY pListEntry;
  3161. PNM_NODE pNmNode;
  3162. WCHAR szNodeId[6];
  3163. DWORD dwStatus = ERROR_SUCCESS;
  3164. NmpAcquireLock();
  3165. for ( pListEntry = NmpNodeList.Flink;
  3166. pListEntry != &NmpNodeList;
  3167. pListEntry = pListEntry->Flink )
  3168. {
  3169. pNmNode = CONTAINING_RECORD(pListEntry, NM_NODE, Linkage);
  3170. wsprintf(szNodeId, L"%u", pNmNode->NodeId);
  3171. //read the information from the registry
  3172. NmpGetNodeAuxInfo(szNodeId, &NodeAuxInfo);
  3173. //update the node structure
  3174. pNmNode->ProductSuite = NodeAuxInfo.ProductSuite;
  3175. //SS: This is ugly---we should pass in the product suits early on.
  3176. //we dont know that the versions have changed, so should we generate
  3177. //a cluster_change_node_property event?
  3178. //Also the fixup interface needs to to be richer so that the postcallback
  3179. //function knows whether it is a form fixup or a join fixup and if it
  3180. //is a join fixup, which node is joining. This could certainly optimize
  3181. //some of the fixup processing
  3182. ClusterEvent(CLUSTER_EVENT_NODE_PROPERTY_CHANGE, pNmNode);
  3183. }
  3184. NmpReleaseLock();
  3185. return(dwStatus);
  3186. }
  3187. BOOLEAN
  3188. NmpIsAddNodeAllowed(
  3189. IN DWORD NewNodeProductSuite,
  3190. IN DWORD RegistryNodeLimit,
  3191. OUT LPDWORD EffectiveNodeLimit OPTIONAL
  3192. )
  3193. /*++
  3194. Routine Description:
  3195. Determines whether a new node can be added to the cluste membership.
  3196. The membership size limit decision is based on the product suites
  3197. of the cluster and the new node. If the registry override exists,
  3198. we will use that limit instead.
  3199. Arguments:
  3200. NewNodeProductSuite - The product suite identifier for the proposed
  3201. new member node.
  3202. RegistryNodeLimit - The membership size override value stored in the
  3203. cluster database.
  3204. EffectiveNodeLimit - On output, contains the membership size limit
  3205. that was calculated for this cluster.
  3206. Return Value:
  3207. TRUE if the new node may be added to the cluster. FALSE otherwise.
  3208. Notes:
  3209. Called with NmpLock held.
  3210. --*/
  3211. {
  3212. DWORD nodeLimit;
  3213. DWORD newNodeProductLimit;
  3214. DWORD currentNodeCount;
  3215. //
  3216. // Check if we already have the maximum number of nodes allowed in
  3217. // this cluster, based on the the product suites of the cluster and
  3218. // the joiner. If the registry override exists, we will use that
  3219. // limit instead.
  3220. //
  3221. newNodeProductLimit = ClRtlGetDefaultNodeLimit(NewNodeProductSuite);
  3222. currentNodeCount = NmGetCurrentNumberOfNodes();
  3223. nodeLimit = RegistryNodeLimit;
  3224. if (nodeLimit == 0) {
  3225. //
  3226. // No override in the registry.
  3227. // Limit is minimum of cluster's limit and new node's limit
  3228. //
  3229. nodeLimit = min(CsClusterNodeLimit, newNodeProductLimit);
  3230. }
  3231. //
  3232. // The runtime limit cannot exceed the compile time limit.
  3233. //
  3234. if (nodeLimit > NmMaxNodeId) {
  3235. nodeLimit = NmMaxNodeId;
  3236. }
  3237. if (currentNodeCount >= nodeLimit) {
  3238. return(FALSE);
  3239. }
  3240. if (EffectiveNodeLimit != NULL) {
  3241. *EffectiveNodeLimit = nodeLimit;
  3242. }
  3243. return(TRUE);
  3244. } // NmpIsAddNodeAllowed
  3245. DWORD
  3246. NmpAddNode(
  3247. IN LPCWSTR NewNodeName,
  3248. IN DWORD NewNodeHighestVersion,
  3249. IN DWORD NewNodeLowestVersion,
  3250. IN DWORD NewNodeProductSuite,
  3251. IN DWORD RegistryNodeLimit
  3252. )
  3253. /*++
  3254. Routine Description:
  3255. Adds a new node to the cluster by selecting an ID and
  3256. issuing a global update.
  3257. Arguments:
  3258. NewNodeName - A pointer to a string containing the name of the
  3259. new node.
  3260. NewNodeHighestVersion - The highest cluster version number that the
  3261. new node can support.
  3262. NewNodeLowestVersion - The lowest cluster version number that the
  3263. new node can support.
  3264. NewNodeProductSuite - The product suite identifier for the new node.
  3265. Return Value:
  3266. A Win32 status code.
  3267. Notes:
  3268. Called with NmpLock held.
  3269. --*/
  3270. {
  3271. DWORD status;
  3272. DWORD nodeId;
  3273. DWORD nodeLimit;
  3274. ClRtlLogPrint(LOG_NOISE,
  3275. "[NMJOIN] Processing request to add node '%1!ws!' to "
  3276. "the cluster.\n",
  3277. NewNodeName
  3278. );
  3279. if (NmpAddNodeId != ClusterInvalidNodeId) {
  3280. //
  3281. // An add is already in progress. Return an error.
  3282. //
  3283. ClRtlLogPrint(LOG_UNUSUAL,
  3284. "[NMJOIN] Cannot add node '%1!ws!' to the cluster because "
  3285. "another add node operation is in progress. Retry later.\n",
  3286. NewNodeName
  3287. );
  3288. return(ERROR_CLUSTER_JOIN_IN_PROGRESS);
  3289. }
  3290. if (!NmpIsAddNodeAllowed(
  3291. NewNodeProductSuite,
  3292. RegistryNodeLimit,
  3293. &nodeLimit
  3294. )
  3295. )
  3296. {
  3297. ClRtlLogPrint(LOG_UNUSUAL,
  3298. "[NMJOIN] Cannot add node '%1!ws!' to the cluster. "
  3299. "The cluster already contains the maximum number of nodes "
  3300. "allowed by the product licenses of the current member nodes "
  3301. "and the proposed new node.\n",
  3302. NewNodeName
  3303. );
  3304. return(ERROR_LICENSE_QUOTA_EXCEEDED);
  3305. }
  3306. //
  3307. // Find a free node ID.
  3308. //
  3309. for (nodeId=ClusterMinNodeId; nodeId<=nodeLimit; nodeId++) {
  3310. if (NmpIdArray[nodeId] == NULL) {
  3311. //
  3312. // Found an available node ID.
  3313. //
  3314. NmpAddNodeId = nodeId;
  3315. ClRtlLogPrint(LOG_NOISE,
  3316. "[NMJOIN] Allocated node ID '%1!u!' for new node '%2!ws!'\n",
  3317. NmpAddNodeId,
  3318. NewNodeName
  3319. );
  3320. break;
  3321. }
  3322. }
  3323. //
  3324. // Since the license test passed, it should be impossible for us to
  3325. // find no free slots in the node table.
  3326. //
  3327. CL_ASSERT(NmpAddNodeId != ClusterInvalidNodeId);
  3328. if (NmpAddNodeId == ClusterInvalidNodeId) {
  3329. ClRtlLogPrint(LOG_UNUSUAL,
  3330. "[NMJOIN] Cannot add node '%1!ws!' to the cluster because "
  3331. "no slots are available in the node table.\n"
  3332. );
  3333. return(ERROR_LICENSE_QUOTA_EXCEEDED);
  3334. }
  3335. NmpReleaseLock();
  3336. status = GumSendUpdateEx(
  3337. GumUpdateMembership,
  3338. NmUpdateAddNode,
  3339. 5,
  3340. sizeof(NmpAddNodeId),
  3341. &NmpAddNodeId,
  3342. NM_WCSLEN(NewNodeName),
  3343. NewNodeName,
  3344. sizeof(NewNodeHighestVersion),
  3345. &NewNodeHighestVersion,
  3346. sizeof(NewNodeLowestVersion),
  3347. &NewNodeLowestVersion,
  3348. sizeof(NewNodeProductSuite),
  3349. &NewNodeProductSuite
  3350. );
  3351. NmpAcquireLock();
  3352. //
  3353. // Reset the global serialization variable.
  3354. //
  3355. CL_ASSERT(NmpAddNodeId == nodeId);
  3356. NmpAddNodeId = ClusterInvalidNodeId;
  3357. return(status);
  3358. } // NmpAddNode
  3359. VOID
  3360. NmpTerminateRpcsToNode(
  3361. DWORD NodeId
  3362. )
  3363. /*++
  3364. Routine Description:
  3365. Cancels all outstanding RPCs to the specified node.
  3366. Arguments:
  3367. NodeId - The ID of the node for which calls should be cancelled.
  3368. Return Value:
  3369. None
  3370. --*/
  3371. {
  3372. LIST_ENTRY *pEntry, *pStart;
  3373. PNM_INTRACLUSTER_RPC_THREAD pRpcTh;
  3374. RPC_STATUS status;
  3375. #if DBG
  3376. BOOLEAN startTimer = FALSE;
  3377. #endif // DBG
  3378. CL_ASSERT((NodeId >= ClusterMinNodeId) && (NodeId <= NmMaxNodeId));
  3379. CL_ASSERT(NmpIntraClusterRpcArr != NULL);
  3380. NmpAcquireRPCLock();
  3381. pEntry = pStart = &NmpIntraClusterRpcArr[NodeId];
  3382. pEntry = pEntry->Flink;
  3383. while(pEntry != pStart) {
  3384. pRpcTh = CONTAINING_RECORD(pEntry, NM_INTRACLUSTER_RPC_THREAD, Linkage);
  3385. status = RpcCancelThreadEx(pRpcTh->Thread, 0);
  3386. pRpcTh->Cancelled = TRUE;
  3387. if(status != RPC_S_OK) {
  3388. ClRtlLogPrint(LOG_UNUSUAL,
  3389. "[NM] Failed to cancel RPC to node %1!u! by thread "
  3390. "x%2!x!, status %3!u!.\n",
  3391. NodeId,
  3392. pRpcTh->ThreadId,
  3393. status
  3394. );
  3395. }
  3396. else {
  3397. ClRtlLogPrint(LOG_NOISE,
  3398. "[NM] Cancelled RPC to node %1!u! by thread x%2!x!.\n",
  3399. NodeId,
  3400. pRpcTh->ThreadId
  3401. );
  3402. #if DBG
  3403. startTimer = TRUE;
  3404. #endif // DBG
  3405. }
  3406. pEntry = pEntry->Flink;
  3407. }
  3408. #if DBG
  3409. //
  3410. // Now start a timer to make sure that all cancelled RPCs return to
  3411. // their callers within a reasonable amount of time.
  3412. //
  3413. if (startTimer) {
  3414. NmpRpcTimer = NM_RPC_TIMEOUT;
  3415. }
  3416. #endif // DBG
  3417. NmpReleaseRPCLock();
  3418. return;
  3419. } // NmTerminateRpcsToNode
  3420. #if DBG
  3421. VOID
  3422. NmpRpcTimerTick(
  3423. DWORD MsTickInterval
  3424. )
  3425. /*++
  3426. Routine Description:
  3427. Decrements a timer used to ensure that all cancelled RPCs to a dead
  3428. node return to their callers within a reasonable amount of time.
  3429. Arguments:
  3430. MsTickInterval - The time, in milliseconds, that has elapsed since this
  3431. routine was last invoked.
  3432. Return Value:
  3433. None
  3434. --*/
  3435. {
  3436. DWORD ndx;
  3437. LIST_ENTRY *pEntry, *pStart;
  3438. PNM_INTRACLUSTER_RPC_THREAD pRpcTh;
  3439. if(NmpRpcTimer == 0)
  3440. return;
  3441. NmpAcquireRPCLock();
  3442. if (NmpRpcTimer > MsTickInterval) {
  3443. NmpRpcTimer -= MsTickInterval;
  3444. }
  3445. else {
  3446. BOOLEAN stopClusSvc=FALSE;
  3447. NmpRpcTimer = 0;
  3448. for(ndx=0;ndx<=NmMaxNodeId;ndx++) {
  3449. pStart = pEntry = &NmpIntraClusterRpcArr[ndx];
  3450. pEntry = pEntry->Flink;
  3451. while(pEntry != pStart) {
  3452. pRpcTh = CONTAINING_RECORD(
  3453. pEntry,
  3454. NM_INTRACLUSTER_RPC_THREAD,
  3455. Linkage
  3456. );
  3457. if(pRpcTh->Cancelled == TRUE) {
  3458. ClRtlLogPrint( LOG_CRITICAL,
  3459. "[NM] Cancelled RPC to node %1!u! by thread x%2!x! "
  3460. "is still lingering after %3!u! seconds.\n",
  3461. ndx,
  3462. pRpcTh->ThreadId,
  3463. (NM_RPC_TIMEOUT/1000)
  3464. );
  3465. stopClusSvc = TRUE;
  3466. }
  3467. pEntry = pEntry->Flink;
  3468. }
  3469. }
  3470. if(stopClusSvc) {
  3471. DebugBreak();
  3472. }
  3473. }
  3474. NmpReleaseRPCLock();
  3475. return;
  3476. } // NmpRpcTimerTick
  3477. #endif // DBG