Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

419 lines
8.7 KiB

  1. /*++
  2. Copyright (c) 2000 Microsoft Corporation
  3. Module Name:
  4. cm.c
  5. Abstract:
  6. Connection Manager
  7. Author:
  8. Ahmed Mohamed (ahmedm) 12, 01, 2000
  9. Revision History:
  10. --*/
  11. #include "gs.h"
  12. #include "gsp.h"
  13. #include "msg.h"
  14. extern BOOLEAN QuormAcquire();
  15. extern void QuormInit();
  16. extern void QuormRelease();
  17. #include <stdio.h>
  18. #define GS_MAX_NODEID 16
  19. #define GS_REGROUP_PHASES 3
  20. #define CmStateJoin 0
  21. #define CmStateNormal 1
  22. #define CmStateUp 2
  23. #define CmStateDown 3
  24. gs_nid_t GsLocalNodeId;
  25. gs_nid_t QuormOwnerId;
  26. int GsMaxNodeId = GS_MAX_NODEID;
  27. int GsMinNodeId = 1;
  28. long Regroup; // number of down nodes
  29. ULONG Node_Mask; // current active node mask
  30. ULONG JoinNode_Mask; // current joining node mask
  31. ULONG Sync_Valid; // which barrier points are valid
  32. ULONG Sync_Mask[GS_REGROUP_PHASES];
  33. // Cluster connectivity matrix
  34. ULONG ClusterNode_Mask[GS_MAX_NODEID+1];
  35. gs_lock_t MmLock;
  36. gs_event_t Start_Event, Regroup_Event;
  37. extern void NsSetOwner(gs_nid_t);
  38. void
  39. cm_node_up()
  40. {
  41. ULONG mask;
  42. if (Node_Mask == JoinNode_Mask) {
  43. return;
  44. }
  45. // get the difference
  46. mask = Node_Mask ^ JoinNode_Mask;
  47. Node_Mask = JoinNode_Mask;
  48. cm_log(("Node UPUPUP mask %x: upset %x\n", Node_Mask, mask));
  49. // inform new node of resources that it we own
  50. // If we have a registered node up event, call it now
  51. }
  52. void
  53. cm_node_down()
  54. {
  55. ULONG mask;
  56. if (Node_Mask == JoinNode_Mask) {
  57. return;
  58. }
  59. // get the difference
  60. mask = Node_Mask ^ JoinNode_Mask;
  61. Node_Mask = JoinNode_Mask;
  62. cm_log(("Node DNDNDN mask %x: dnset %x\n", Node_Mask, mask));
  63. NsSetOwner(QuormOwnerId);
  64. GspPhase2NodeDown(mask);
  65. }
  66. static int
  67. cm_full_connectivity()
  68. {
  69. int i, j;
  70. for (i = 1; i < GS_MAX_NODEID; i++) {
  71. // if node is not up, ignore it
  72. if ((JoinNode_Mask & (1 << i)) == 0)
  73. continue;
  74. // check node's i mask with others
  75. for (j = i+1; j <= GS_MAX_NODEID; j++) {
  76. // if node is not up, ignore it
  77. if ((JoinNode_Mask & (1 << j)) == 0)
  78. continue;
  79. if (ClusterNode_Mask[i] ^ ClusterNode_Mask[j]) {
  80. cm_log(("FC: node %d mask 0x%x node %d mask 0x%x\n",
  81. i,
  82. ClusterNode_Mask[i],
  83. j,
  84. ClusterNode_Mask[j]));
  85. return 0;
  86. }
  87. }
  88. }
  89. return 1;
  90. }
  91. void
  92. GspMmMsgHandler(gs_msg_t *msg)
  93. {
  94. int nodeid = msg->m_hdr.h_sid;
  95. ULONG old;
  96. // Update node's up mask
  97. GsLockEnter(MmLock);
  98. old = ClusterNode_Mask[GsLocalNodeId];
  99. ClusterNode_Mask[nodeid] |= msg->m_hdr.h_bnum;
  100. ClusterNode_Mask[GsLocalNodeId] |= (1 << nodeid);
  101. if (msg->m_hdr.h_flags != 0) {
  102. QuormOwnerId = msg->m_hdr.h_flags;
  103. cm_log(("Learn new quorm owner %d\n", QuormOwnerId));
  104. }
  105. cm_log(("MM qowner %d mask %x node %d, j %x n %x\n",QuormOwnerId,
  106. msg->m_hdr.h_bnum, nodeid,
  107. JoinNode_Mask, Node_Mask));
  108. if (old != ClusterNode_Mask[GsLocalNodeId]) {
  109. msg->m_hdr.h_type = GS_MSG_TYPE_MM;
  110. msg->m_hdr.h_len = 0;
  111. msg->m_hdr.h_flags = QuormOwnerId;
  112. msg->m_hdr.h_sid = GsLocalNodeId;
  113. msg->m_hdr.h_bnum = ClusterNode_Mask[GsLocalNodeId];
  114. msg_smcast(JoinNode_Mask, &msg->m_hdr, NULL, 0);
  115. }
  116. // If the matrix is full connected, we are done
  117. if (cm_full_connectivity() != 0) {
  118. switch(Regroup) {
  119. case CmStateJoin:
  120. cm_node_up();
  121. GsEventSignal(Start_Event);
  122. break;
  123. case CmStateUp:
  124. cm_node_up();
  125. break;
  126. case CmStateDown:
  127. cm_node_down();
  128. break;
  129. default:
  130. err_log(("Invalid cm state %d\n", Regroup));
  131. exit(1);
  132. }
  133. Regroup = CmStateUp;
  134. #if 0
  135. cm_node_up();
  136. if (Regroup < 0) {
  137. GsEventSignal(Start_Event);
  138. }
  139. #endif
  140. }
  141. GsLockExit(MmLock);
  142. msg_free(msg);
  143. }
  144. void
  145. GspInfoMsgHandler(gs_msg_t *msg)
  146. {
  147. int nodeid = msg->m_hdr.h_sid;
  148. // make sure we send our info to the sender
  149. // cm_node_join(nodeid);
  150. // lock membership state
  151. GsLockEnter(MmLock);
  152. if (msg->m_hdr.h_flags != 0) {
  153. QuormOwnerId = msg->m_hdr.h_flags;
  154. NsSetOwner(QuormOwnerId);
  155. }
  156. cm_log(("Info Node %d mask %x quorm %d\n", nodeid, msg->m_hdr.h_bnum,
  157. QuormOwnerId));
  158. // Foward message to all other members
  159. cm_log(("Info Mcast %x node %d mask %x\n",
  160. ClusterNode_Mask[GsLocalNodeId], nodeid, JoinNode_Mask));
  161. msg->m_hdr.h_type = GS_MSG_TYPE_MM;
  162. msg->m_hdr.h_len = 0;
  163. msg->m_hdr.h_sid = GsLocalNodeId;
  164. msg->m_hdr.h_bnum = ClusterNode_Mask[GsLocalNodeId];
  165. msg_smcast(JoinNode_Mask, &msg->m_hdr, NULL, 0);
  166. GsLockExit(MmLock);
  167. msg_free(msg);
  168. }
  169. void
  170. gs_nodeup_handler(int nodeid)
  171. {
  172. gs_msg_hdr_t hdr;
  173. cm_log(("Node up %d\n", nodeid));
  174. GsLockEnter(MmLock);
  175. if (JoinNode_Mask & (1 << nodeid)) {
  176. printf("Node is already up %d 0x%x\n", nodeid, JoinNode_Mask);
  177. GsLockExit(MmLock);
  178. return;
  179. }
  180. JoinNode_Mask |= (1 << nodeid);
  181. if (1 || Regroup != CmStateJoin) {
  182. cm_log(("Node %d is alive, j %x n %x, sending info\n", nodeid,
  183. JoinNode_Mask, Node_Mask));
  184. hdr.h_type = GS_MSG_TYPE_INFO;
  185. hdr.h_sid = GsLocalNodeId;
  186. hdr.h_flags = QuormOwnerId;
  187. hdr.h_bnum = ClusterNode_Mask[GsLocalNodeId];
  188. hdr.h_len = 0;
  189. msg_send((gs_memberid_t) nodeid, &hdr, NULL, 0);
  190. }
  191. GsLockExit(MmLock);
  192. }
  193. void
  194. gs_nodedown_handler(int nodeid)
  195. {
  196. int i;
  197. gs_msg_hdr_t hdr;
  198. GsLockEnter(MmLock);
  199. if (!(JoinNode_Mask & (1 << nodeid))) {
  200. err_log(("Node %d is already down\n", nodeid));
  201. GsLockExit(MmLock);
  202. return;
  203. }
  204. if (Regroup == CmStateJoin) {
  205. err_log(("Node down during join, aborting...\n"));
  206. GsLockExit(MmLock);
  207. exit(1);
  208. }
  209. Regroup = CmStateDown;
  210. // Assume all nodes see this event and no messaging is required
  211. for (i = 0; i <= GS_MAX_NODEID; i++) {
  212. ClusterNode_Mask[i] = (1 << GsLocalNodeId);
  213. }
  214. JoinNode_Mask &= ~(1 << nodeid);
  215. if (!(JoinNode_Mask & (1 << QuormOwnerId))) {
  216. cm_log(("Lost quorm owner %d\n", QuormOwnerId));
  217. QuormOwnerId = 0;
  218. }
  219. // Acquire Quorum file
  220. if (QuormOwnerId != GsLocalNodeId && QuormAcquire() == TRUE) {
  221. cm_log(("I own quorm now\n"));
  222. QuormOwnerId = GsLocalNodeId;
  223. }
  224. cm_log(("Node %d down upset %x -> %x mask %x\n", nodeid,
  225. Node_Mask, JoinNode_Mask, Node_Mask ^ JoinNode_Mask));
  226. // Generate phase 1 node down
  227. GspPhase1NodeDown(Node_Mask ^ JoinNode_Mask);
  228. // handle case when I am only node in cluster, otherwise enter regroup again
  229. if (JoinNode_Mask == (ULONG)(1 << GsLocalNodeId)) { //cm_full_connectivity() != 0) {
  230. while (QuormOwnerId != GsLocalNodeId) {
  231. if (QuormAcquire() == TRUE) {
  232. QuormOwnerId = GsLocalNodeId;
  233. break;
  234. }
  235. Sleep(100);
  236. }
  237. cm_node_down();
  238. Regroup = CmStateUp;
  239. } else {
  240. hdr.h_type = GS_MSG_TYPE_MM;
  241. hdr.h_sid = GsLocalNodeId;
  242. hdr.h_flags = QuormOwnerId;
  243. hdr.h_bnum = ClusterNode_Mask[GsLocalNodeId];
  244. hdr.h_len = 0;
  245. msg_smcast(JoinNode_Mask, &hdr, NULL, 0);
  246. }
  247. GsLockExit(MmLock);
  248. }
  249. void
  250. gs_nodejoin_handler(int nodeid)
  251. {
  252. cm_log(("Node is alive %d\n", nodeid));
  253. }
  254. void
  255. gs_nodeid_handler(int nodeid)
  256. {
  257. GsLocalNodeId = (gs_nid_t) nodeid;
  258. // cm_log(("Node id %d\n", nodeid));
  259. }
  260. gs_node_handler_t gs_node_handler[] = {
  261. gs_nodeid_handler,
  262. gs_nodejoin_handler,
  263. gs_nodeup_handler,
  264. gs_nodedown_handler
  265. };
  266. void
  267. cm_init()
  268. {
  269. GsLocalNodeId = 0;
  270. QuormOwnerId = 0;
  271. Regroup = CmStateJoin;
  272. Node_Mask = 0;
  273. JoinNode_Mask = 0;
  274. Sync_Valid = 0;
  275. memset(Sync_Mask, 0, sizeof(Sync_Mask));
  276. memset(ClusterNode_Mask, 0, sizeof(ClusterNode_Mask));
  277. GsLockInit(MmLock);
  278. GsEventInit(Start_Event);
  279. GsEventInit(Regroup_Event);
  280. QuormInit();
  281. msg_init();
  282. }
  283. cm_start()
  284. {
  285. int i;
  286. static int started = 0;
  287. i = InterlockedIncrement(&started);
  288. if (i != 1)
  289. return 0;
  290. for (i = 0; i <= GS_MAX_NODEID; i++) {
  291. ClusterNode_Mask[i] = (1 << GsLocalNodeId);
  292. }
  293. Node_Mask = 1 << GsLocalNodeId;
  294. JoinNode_Mask = 1 << GsLocalNodeId;
  295. // wait for join,
  296. do {
  297. LARGE_INTEGER delta;
  298. GsLockEnter(MmLock);
  299. if (QuormAcquire() == TRUE) {
  300. QuormOwnerId = GsLocalNodeId;
  301. NsSetOwner(QuormOwnerId);
  302. Regroup = CmStateUp;
  303. GsLockExit(MmLock);
  304. break;
  305. }
  306. GsLockExit(MmLock);
  307. msg_start(JoinNode_Mask);
  308. cm_log(("Waiting to join %x %x\n", JoinNode_Mask, Node_Mask));
  309. delta.QuadPart = 0;
  310. delta.LowPart = 5 * 1000; // retry every 5 second
  311. if (GsEventWaitTimeout(Start_Event, &delta)) {
  312. cm_log(("j %x n %x\n", JoinNode_Mask, Node_Mask));
  313. }
  314. } while (JoinNode_Mask == (ULONG)(1 << GsLocalNodeId) || JoinNode_Mask != Node_Mask);
  315. // InterlockedIncrement(&Regroup);
  316. return 0;
  317. }