You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
419 lines
8.7 KiB
419 lines
8.7 KiB
/*++
|
|
|
|
Copyright (c) 2000 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
cm.c
|
|
|
|
Abstract:
|
|
|
|
Connection Manager
|
|
|
|
Author:
|
|
|
|
Ahmed Mohamed (ahmedm) 12, 01, 2000
|
|
|
|
Revision History:
|
|
|
|
--*/
|
|
|
|
#include "gs.h"
|
|
#include "gsp.h"
|
|
#include "msg.h"
|
|
|
|
extern BOOLEAN QuormAcquire();
|
|
extern void QuormInit();
|
|
extern void QuormRelease();
|
|
|
|
#include <stdio.h>
|
|
|
|
#define GS_MAX_NODEID 16
|
|
#define GS_REGROUP_PHASES 3
|
|
|
|
#define CmStateJoin 0
|
|
#define CmStateNormal 1
|
|
#define CmStateUp 2
|
|
#define CmStateDown 3
|
|
|
|
gs_nid_t GsLocalNodeId;
|
|
|
|
gs_nid_t QuormOwnerId;
|
|
|
|
int GsMaxNodeId = GS_MAX_NODEID;
|
|
int GsMinNodeId = 1;
|
|
|
|
long Regroup; // number of down nodes
|
|
|
|
ULONG Node_Mask; // current active node mask
|
|
ULONG JoinNode_Mask; // current joining node mask
|
|
ULONG Sync_Valid; // which barrier points are valid
|
|
|
|
ULONG Sync_Mask[GS_REGROUP_PHASES];
|
|
// Cluster connectivity matrix
|
|
ULONG ClusterNode_Mask[GS_MAX_NODEID+1];
|
|
|
|
gs_lock_t MmLock;
|
|
gs_event_t Start_Event, Regroup_Event;
|
|
|
|
extern void NsSetOwner(gs_nid_t);
|
|
|
|
void
|
|
cm_node_up()
|
|
{
|
|
ULONG mask;
|
|
|
|
if (Node_Mask == JoinNode_Mask) {
|
|
return;
|
|
}
|
|
|
|
// get the difference
|
|
mask = Node_Mask ^ JoinNode_Mask;
|
|
|
|
Node_Mask = JoinNode_Mask;
|
|
|
|
cm_log(("Node UPUPUP mask %x: upset %x\n", Node_Mask, mask));
|
|
|
|
// inform new node of resources that it we own
|
|
|
|
// If we have a registered node up event, call it now
|
|
}
|
|
|
|
void
|
|
cm_node_down()
|
|
{
|
|
ULONG mask;
|
|
|
|
if (Node_Mask == JoinNode_Mask) {
|
|
return;
|
|
}
|
|
|
|
// get the difference
|
|
mask = Node_Mask ^ JoinNode_Mask;
|
|
|
|
Node_Mask = JoinNode_Mask;
|
|
|
|
cm_log(("Node DNDNDN mask %x: dnset %x\n", Node_Mask, mask));
|
|
|
|
NsSetOwner(QuormOwnerId);
|
|
|
|
GspPhase2NodeDown(mask);
|
|
}
|
|
|
|
static int
|
|
cm_full_connectivity()
|
|
{
|
|
int i, j;
|
|
|
|
for (i = 1; i < GS_MAX_NODEID; i++) {
|
|
|
|
// if node is not up, ignore it
|
|
if ((JoinNode_Mask & (1 << i)) == 0)
|
|
continue;
|
|
|
|
// check node's i mask with others
|
|
for (j = i+1; j <= GS_MAX_NODEID; j++) {
|
|
|
|
// if node is not up, ignore it
|
|
if ((JoinNode_Mask & (1 << j)) == 0)
|
|
continue;
|
|
|
|
if (ClusterNode_Mask[i] ^ ClusterNode_Mask[j]) {
|
|
cm_log(("FC: node %d mask 0x%x node %d mask 0x%x\n",
|
|
i,
|
|
ClusterNode_Mask[i],
|
|
j,
|
|
ClusterNode_Mask[j]));
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
void
|
|
GspMmMsgHandler(gs_msg_t *msg)
|
|
{
|
|
int nodeid = msg->m_hdr.h_sid;
|
|
ULONG old;
|
|
|
|
// Update node's up mask
|
|
GsLockEnter(MmLock);
|
|
|
|
old = ClusterNode_Mask[GsLocalNodeId];
|
|
|
|
ClusterNode_Mask[nodeid] |= msg->m_hdr.h_bnum;
|
|
ClusterNode_Mask[GsLocalNodeId] |= (1 << nodeid);
|
|
|
|
if (msg->m_hdr.h_flags != 0) {
|
|
QuormOwnerId = msg->m_hdr.h_flags;
|
|
cm_log(("Learn new quorm owner %d\n", QuormOwnerId));
|
|
}
|
|
|
|
cm_log(("MM qowner %d mask %x node %d, j %x n %x\n",QuormOwnerId,
|
|
msg->m_hdr.h_bnum, nodeid,
|
|
JoinNode_Mask, Node_Mask));
|
|
|
|
if (old != ClusterNode_Mask[GsLocalNodeId]) {
|
|
|
|
msg->m_hdr.h_type = GS_MSG_TYPE_MM;
|
|
msg->m_hdr.h_len = 0;
|
|
msg->m_hdr.h_flags = QuormOwnerId;
|
|
msg->m_hdr.h_sid = GsLocalNodeId;
|
|
msg->m_hdr.h_bnum = ClusterNode_Mask[GsLocalNodeId];
|
|
|
|
msg_smcast(JoinNode_Mask, &msg->m_hdr, NULL, 0);
|
|
}
|
|
|
|
// If the matrix is full connected, we are done
|
|
if (cm_full_connectivity() != 0) {
|
|
switch(Regroup) {
|
|
case CmStateJoin:
|
|
cm_node_up();
|
|
GsEventSignal(Start_Event);
|
|
break;
|
|
case CmStateUp:
|
|
cm_node_up();
|
|
break;
|
|
case CmStateDown:
|
|
cm_node_down();
|
|
break;
|
|
default:
|
|
err_log(("Invalid cm state %d\n", Regroup));
|
|
exit(1);
|
|
}
|
|
Regroup = CmStateUp;
|
|
#if 0
|
|
cm_node_up();
|
|
if (Regroup < 0) {
|
|
GsEventSignal(Start_Event);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
GsLockExit(MmLock);
|
|
|
|
msg_free(msg);
|
|
}
|
|
|
|
void
|
|
GspInfoMsgHandler(gs_msg_t *msg)
|
|
{
|
|
int nodeid = msg->m_hdr.h_sid;
|
|
|
|
// make sure we send our info to the sender
|
|
// cm_node_join(nodeid);
|
|
|
|
// lock membership state
|
|
GsLockEnter(MmLock);
|
|
|
|
if (msg->m_hdr.h_flags != 0) {
|
|
QuormOwnerId = msg->m_hdr.h_flags;
|
|
NsSetOwner(QuormOwnerId);
|
|
}
|
|
|
|
cm_log(("Info Node %d mask %x quorm %d\n", nodeid, msg->m_hdr.h_bnum,
|
|
QuormOwnerId));
|
|
|
|
// Foward message to all other members
|
|
cm_log(("Info Mcast %x node %d mask %x\n",
|
|
ClusterNode_Mask[GsLocalNodeId], nodeid, JoinNode_Mask));
|
|
|
|
msg->m_hdr.h_type = GS_MSG_TYPE_MM;
|
|
msg->m_hdr.h_len = 0;
|
|
msg->m_hdr.h_sid = GsLocalNodeId;
|
|
msg->m_hdr.h_bnum = ClusterNode_Mask[GsLocalNodeId];
|
|
|
|
msg_smcast(JoinNode_Mask, &msg->m_hdr, NULL, 0);
|
|
|
|
GsLockExit(MmLock);
|
|
|
|
msg_free(msg);
|
|
}
|
|
|
|
void
|
|
gs_nodeup_handler(int nodeid)
|
|
{
|
|
gs_msg_hdr_t hdr;
|
|
|
|
cm_log(("Node up %d\n", nodeid));
|
|
GsLockEnter(MmLock);
|
|
if (JoinNode_Mask & (1 << nodeid)) {
|
|
printf("Node is already up %d 0x%x\n", nodeid, JoinNode_Mask);
|
|
GsLockExit(MmLock);
|
|
return;
|
|
}
|
|
|
|
JoinNode_Mask |= (1 << nodeid);
|
|
|
|
if (1 || Regroup != CmStateJoin) {
|
|
cm_log(("Node %d is alive, j %x n %x, sending info\n", nodeid,
|
|
JoinNode_Mask, Node_Mask));
|
|
|
|
|
|
hdr.h_type = GS_MSG_TYPE_INFO;
|
|
hdr.h_sid = GsLocalNodeId;
|
|
hdr.h_flags = QuormOwnerId;
|
|
hdr.h_bnum = ClusterNode_Mask[GsLocalNodeId];
|
|
hdr.h_len = 0;
|
|
|
|
msg_send((gs_memberid_t) nodeid, &hdr, NULL, 0);
|
|
}
|
|
GsLockExit(MmLock);
|
|
}
|
|
|
|
void
|
|
gs_nodedown_handler(int nodeid)
|
|
{
|
|
int i;
|
|
gs_msg_hdr_t hdr;
|
|
|
|
GsLockEnter(MmLock);
|
|
|
|
if (!(JoinNode_Mask & (1 << nodeid))) {
|
|
err_log(("Node %d is already down\n", nodeid));
|
|
GsLockExit(MmLock);
|
|
return;
|
|
}
|
|
|
|
if (Regroup == CmStateJoin) {
|
|
err_log(("Node down during join, aborting...\n"));
|
|
GsLockExit(MmLock);
|
|
exit(1);
|
|
}
|
|
|
|
Regroup = CmStateDown;
|
|
|
|
// Assume all nodes see this event and no messaging is required
|
|
for (i = 0; i <= GS_MAX_NODEID; i++) {
|
|
ClusterNode_Mask[i] = (1 << GsLocalNodeId);
|
|
}
|
|
|
|
JoinNode_Mask &= ~(1 << nodeid);
|
|
|
|
if (!(JoinNode_Mask & (1 << QuormOwnerId))) {
|
|
cm_log(("Lost quorm owner %d\n", QuormOwnerId));
|
|
QuormOwnerId = 0;
|
|
}
|
|
|
|
// Acquire Quorum file
|
|
if (QuormOwnerId != GsLocalNodeId && QuormAcquire() == TRUE) {
|
|
cm_log(("I own quorm now\n"));
|
|
QuormOwnerId = GsLocalNodeId;
|
|
}
|
|
cm_log(("Node %d down upset %x -> %x mask %x\n", nodeid,
|
|
Node_Mask, JoinNode_Mask, Node_Mask ^ JoinNode_Mask));
|
|
|
|
// Generate phase 1 node down
|
|
GspPhase1NodeDown(Node_Mask ^ JoinNode_Mask);
|
|
|
|
// handle case when I am only node in cluster, otherwise enter regroup again
|
|
if (JoinNode_Mask == (ULONG)(1 << GsLocalNodeId)) { //cm_full_connectivity() != 0) {
|
|
while (QuormOwnerId != GsLocalNodeId) {
|
|
if (QuormAcquire() == TRUE) {
|
|
QuormOwnerId = GsLocalNodeId;
|
|
break;
|
|
}
|
|
Sleep(100);
|
|
}
|
|
cm_node_down();
|
|
Regroup = CmStateUp;
|
|
} else {
|
|
hdr.h_type = GS_MSG_TYPE_MM;
|
|
hdr.h_sid = GsLocalNodeId;
|
|
hdr.h_flags = QuormOwnerId;
|
|
hdr.h_bnum = ClusterNode_Mask[GsLocalNodeId];
|
|
hdr.h_len = 0;
|
|
|
|
msg_smcast(JoinNode_Mask, &hdr, NULL, 0);
|
|
}
|
|
|
|
GsLockExit(MmLock);
|
|
}
|
|
|
|
|
|
void
|
|
gs_nodejoin_handler(int nodeid)
|
|
{
|
|
cm_log(("Node is alive %d\n", nodeid));
|
|
}
|
|
|
|
void
|
|
gs_nodeid_handler(int nodeid)
|
|
{
|
|
GsLocalNodeId = (gs_nid_t) nodeid;
|
|
// cm_log(("Node id %d\n", nodeid));
|
|
}
|
|
|
|
gs_node_handler_t gs_node_handler[] = {
|
|
gs_nodeid_handler,
|
|
gs_nodejoin_handler,
|
|
gs_nodeup_handler,
|
|
gs_nodedown_handler
|
|
};
|
|
|
|
void
|
|
cm_init()
|
|
{
|
|
GsLocalNodeId = 0;
|
|
QuormOwnerId = 0;
|
|
Regroup = CmStateJoin;
|
|
Node_Mask = 0;
|
|
JoinNode_Mask = 0;
|
|
Sync_Valid = 0;
|
|
memset(Sync_Mask, 0, sizeof(Sync_Mask));
|
|
memset(ClusterNode_Mask, 0, sizeof(ClusterNode_Mask));
|
|
|
|
GsLockInit(MmLock);
|
|
GsEventInit(Start_Event);
|
|
GsEventInit(Regroup_Event);
|
|
|
|
QuormInit();
|
|
msg_init();
|
|
}
|
|
|
|
cm_start()
|
|
{
|
|
int i;
|
|
static int started = 0;
|
|
|
|
i = InterlockedIncrement(&started);
|
|
if (i != 1)
|
|
return 0;
|
|
|
|
for (i = 0; i <= GS_MAX_NODEID; i++) {
|
|
ClusterNode_Mask[i] = (1 << GsLocalNodeId);
|
|
}
|
|
Node_Mask = 1 << GsLocalNodeId;
|
|
JoinNode_Mask = 1 << GsLocalNodeId;
|
|
|
|
// wait for join,
|
|
do {
|
|
LARGE_INTEGER delta;
|
|
|
|
GsLockEnter(MmLock);
|
|
|
|
if (QuormAcquire() == TRUE) {
|
|
QuormOwnerId = GsLocalNodeId;
|
|
NsSetOwner(QuormOwnerId);
|
|
Regroup = CmStateUp;
|
|
GsLockExit(MmLock);
|
|
break;
|
|
}
|
|
|
|
GsLockExit(MmLock);
|
|
|
|
msg_start(JoinNode_Mask);
|
|
cm_log(("Waiting to join %x %x\n", JoinNode_Mask, Node_Mask));
|
|
|
|
delta.QuadPart = 0;
|
|
delta.LowPart = 5 * 1000; // retry every 5 second
|
|
if (GsEventWaitTimeout(Start_Event, &delta)) {
|
|
cm_log(("j %x n %x\n", JoinNode_Mask, Node_Mask));
|
|
}
|
|
} while (JoinNode_Mask == (ULONG)(1 << GsLocalNodeId) || JoinNode_Mask != Node_Mask);
|
|
|
|
// InterlockedIncrement(&Regroup);
|
|
|
|
return 0;
|
|
}
|