|
|
/*++
Copyright (c) 2000 Microsoft Corporation
Module Name:
cm.c
Abstract:
Connection Manager
Author:
Ahmed Mohamed (ahmedm) 12, 01, 2000
Revision History:
--*/
#include "gs.h"
#include "gsp.h"
#include "msg.h"
extern BOOLEAN QuormAcquire(); extern void QuormInit(); extern void QuormRelease();
#include <stdio.h>
#define GS_MAX_NODEID 16
#define GS_REGROUP_PHASES 3
#define CmStateJoin 0
#define CmStateNormal 1
#define CmStateUp 2
#define CmStateDown 3
gs_nid_t GsLocalNodeId;
gs_nid_t QuormOwnerId;
int GsMaxNodeId = GS_MAX_NODEID; int GsMinNodeId = 1;
long Regroup; // number of down nodes
ULONG Node_Mask; // current active node mask
ULONG JoinNode_Mask; // current joining node mask
ULONG Sync_Valid; // which barrier points are valid
ULONG Sync_Mask[GS_REGROUP_PHASES]; // Cluster connectivity matrix
ULONG ClusterNode_Mask[GS_MAX_NODEID+1];
gs_lock_t MmLock; gs_event_t Start_Event, Regroup_Event;
extern void NsSetOwner(gs_nid_t);
void cm_node_up() { ULONG mask;
if (Node_Mask == JoinNode_Mask) { return; }
// get the difference
mask = Node_Mask ^ JoinNode_Mask;
Node_Mask = JoinNode_Mask;
cm_log(("Node UPUPUP mask %x: upset %x\n", Node_Mask, mask)); // inform new node of resources that it we own
// If we have a registered node up event, call it now
}
void cm_node_down() { ULONG mask;
if (Node_Mask == JoinNode_Mask) { return; }
// get the difference
mask = Node_Mask ^ JoinNode_Mask;
Node_Mask = JoinNode_Mask;
cm_log(("Node DNDNDN mask %x: dnset %x\n", Node_Mask, mask));
NsSetOwner(QuormOwnerId);
GspPhase2NodeDown(mask); }
static int cm_full_connectivity() { int i, j;
for (i = 1; i < GS_MAX_NODEID; i++) {
// if node is not up, ignore it
if ((JoinNode_Mask & (1 << i)) == 0) continue;
// check node's i mask with others
for (j = i+1; j <= GS_MAX_NODEID; j++) {
// if node is not up, ignore it
if ((JoinNode_Mask & (1 << j)) == 0) continue;
if (ClusterNode_Mask[i] ^ ClusterNode_Mask[j]) { cm_log(("FC: node %d mask 0x%x node %d mask 0x%x\n", i, ClusterNode_Mask[i], j, ClusterNode_Mask[j])); return 0; } } } return 1; }
void GspMmMsgHandler(gs_msg_t *msg) { int nodeid = msg->m_hdr.h_sid; ULONG old;
// Update node's up mask
GsLockEnter(MmLock);
old = ClusterNode_Mask[GsLocalNodeId];
ClusterNode_Mask[nodeid] |= msg->m_hdr.h_bnum; ClusterNode_Mask[GsLocalNodeId] |= (1 << nodeid); if (msg->m_hdr.h_flags != 0) { QuormOwnerId = msg->m_hdr.h_flags; cm_log(("Learn new quorm owner %d\n", QuormOwnerId)); }
cm_log(("MM qowner %d mask %x node %d, j %x n %x\n",QuormOwnerId, msg->m_hdr.h_bnum, nodeid, JoinNode_Mask, Node_Mask));
if (old != ClusterNode_Mask[GsLocalNodeId]) {
msg->m_hdr.h_type = GS_MSG_TYPE_MM; msg->m_hdr.h_len = 0; msg->m_hdr.h_flags = QuormOwnerId; msg->m_hdr.h_sid = GsLocalNodeId; msg->m_hdr.h_bnum = ClusterNode_Mask[GsLocalNodeId];
msg_smcast(JoinNode_Mask, &msg->m_hdr, NULL, 0); }
// If the matrix is full connected, we are done
if (cm_full_connectivity() != 0) { switch(Regroup) { case CmStateJoin: cm_node_up(); GsEventSignal(Start_Event); break; case CmStateUp: cm_node_up(); break; case CmStateDown: cm_node_down(); break; default: err_log(("Invalid cm state %d\n", Regroup)); exit(1); } Regroup = CmStateUp; #if 0
cm_node_up(); if (Regroup < 0) { GsEventSignal(Start_Event); } #endif
}
GsLockExit(MmLock);
msg_free(msg); }
void GspInfoMsgHandler(gs_msg_t *msg) { int nodeid = msg->m_hdr.h_sid;
// make sure we send our info to the sender
// cm_node_join(nodeid);
// lock membership state
GsLockEnter(MmLock);
if (msg->m_hdr.h_flags != 0) { QuormOwnerId = msg->m_hdr.h_flags; NsSetOwner(QuormOwnerId); }
cm_log(("Info Node %d mask %x quorm %d\n", nodeid, msg->m_hdr.h_bnum, QuormOwnerId));
// Foward message to all other members
cm_log(("Info Mcast %x node %d mask %x\n", ClusterNode_Mask[GsLocalNodeId], nodeid, JoinNode_Mask));
msg->m_hdr.h_type = GS_MSG_TYPE_MM; msg->m_hdr.h_len = 0; msg->m_hdr.h_sid = GsLocalNodeId; msg->m_hdr.h_bnum = ClusterNode_Mask[GsLocalNodeId];
msg_smcast(JoinNode_Mask, &msg->m_hdr, NULL, 0);
GsLockExit(MmLock);
msg_free(msg); }
void gs_nodeup_handler(int nodeid) { gs_msg_hdr_t hdr;
cm_log(("Node up %d\n", nodeid)); GsLockEnter(MmLock); if (JoinNode_Mask & (1 << nodeid)) { printf("Node is already up %d 0x%x\n", nodeid, JoinNode_Mask); GsLockExit(MmLock); return; }
JoinNode_Mask |= (1 << nodeid);
if (1 || Regroup != CmStateJoin) { cm_log(("Node %d is alive, j %x n %x, sending info\n", nodeid, JoinNode_Mask, Node_Mask));
hdr.h_type = GS_MSG_TYPE_INFO; hdr.h_sid = GsLocalNodeId; hdr.h_flags = QuormOwnerId; hdr.h_bnum = ClusterNode_Mask[GsLocalNodeId]; hdr.h_len = 0;
msg_send((gs_memberid_t) nodeid, &hdr, NULL, 0); } GsLockExit(MmLock); }
void gs_nodedown_handler(int nodeid) { int i; gs_msg_hdr_t hdr;
GsLockEnter(MmLock);
if (!(JoinNode_Mask & (1 << nodeid))) { err_log(("Node %d is already down\n", nodeid)); GsLockExit(MmLock); return; }
if (Regroup == CmStateJoin) { err_log(("Node down during join, aborting...\n")); GsLockExit(MmLock); exit(1); }
Regroup = CmStateDown;
// Assume all nodes see this event and no messaging is required
for (i = 0; i <= GS_MAX_NODEID; i++) { ClusterNode_Mask[i] = (1 << GsLocalNodeId); }
JoinNode_Mask &= ~(1 << nodeid);
if (!(JoinNode_Mask & (1 << QuormOwnerId))) { cm_log(("Lost quorm owner %d\n", QuormOwnerId)); QuormOwnerId = 0; }
// Acquire Quorum file
if (QuormOwnerId != GsLocalNodeId && QuormAcquire() == TRUE) { cm_log(("I own quorm now\n")); QuormOwnerId = GsLocalNodeId; } cm_log(("Node %d down upset %x -> %x mask %x\n", nodeid, Node_Mask, JoinNode_Mask, Node_Mask ^ JoinNode_Mask));
// Generate phase 1 node down
GspPhase1NodeDown(Node_Mask ^ JoinNode_Mask);
// handle case when I am only node in cluster, otherwise enter regroup again
if (JoinNode_Mask == (ULONG)(1 << GsLocalNodeId)) { //cm_full_connectivity() != 0) {
while (QuormOwnerId != GsLocalNodeId) { if (QuormAcquire() == TRUE) { QuormOwnerId = GsLocalNodeId; break; } Sleep(100); } cm_node_down(); Regroup = CmStateUp; } else { hdr.h_type = GS_MSG_TYPE_MM; hdr.h_sid = GsLocalNodeId; hdr.h_flags = QuormOwnerId; hdr.h_bnum = ClusterNode_Mask[GsLocalNodeId]; hdr.h_len = 0;
msg_smcast(JoinNode_Mask, &hdr, NULL, 0); }
GsLockExit(MmLock); }
void gs_nodejoin_handler(int nodeid) { cm_log(("Node is alive %d\n", nodeid)); }
void gs_nodeid_handler(int nodeid) { GsLocalNodeId = (gs_nid_t) nodeid; // cm_log(("Node id %d\n", nodeid));
} gs_node_handler_t gs_node_handler[] = { gs_nodeid_handler, gs_nodejoin_handler, gs_nodeup_handler, gs_nodedown_handler };
void cm_init() { GsLocalNodeId = 0; QuormOwnerId = 0; Regroup = CmStateJoin; Node_Mask = 0; JoinNode_Mask = 0; Sync_Valid = 0; memset(Sync_Mask, 0, sizeof(Sync_Mask)); memset(ClusterNode_Mask, 0, sizeof(ClusterNode_Mask)); GsLockInit(MmLock); GsEventInit(Start_Event); GsEventInit(Regroup_Event);
QuormInit(); msg_init(); }
cm_start() { int i; static int started = 0;
i = InterlockedIncrement(&started); if (i != 1) return 0;
for (i = 0; i <= GS_MAX_NODEID; i++) { ClusterNode_Mask[i] = (1 << GsLocalNodeId); } Node_Mask = 1 << GsLocalNodeId; JoinNode_Mask = 1 << GsLocalNodeId;
// wait for join,
do { LARGE_INTEGER delta;
GsLockEnter(MmLock);
if (QuormAcquire() == TRUE) { QuormOwnerId = GsLocalNodeId; NsSetOwner(QuormOwnerId); Regroup = CmStateUp; GsLockExit(MmLock); break; }
GsLockExit(MmLock);
msg_start(JoinNode_Mask); cm_log(("Waiting to join %x %x\n", JoinNode_Mask, Node_Mask));
delta.QuadPart = 0; delta.LowPart = 5 * 1000; // retry every 5 second
if (GsEventWaitTimeout(Start_Event, &delta)) { cm_log(("j %x n %x\n", JoinNode_Mask, Node_Mask)); } } while (JoinNode_Mask == (ULONG)(1 << GsLocalNodeId) || JoinNode_Mask != Node_Mask);
// InterlockedIncrement(&Regroup);
return 0; }
|