Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

2213 lines
62 KiB

/*++
Copyright (c) 1996 Microsoft Corporation
Module Name:
init.c
Abstract:
This module provides the main cluster initialization.
Author:
John Vert (jvert) 6/5/1996
Revision History:
--*/
extern "C"
{
#include "initp.h"
#include <objbase.h>
RPC_STATUS ApipConnectCallback(
IN RPC_IF_ID * Interface,
IN void * Context
);
}
#define CLUSTER_PRIORITY_CLASS HIGH_PRIORITY_CLASS
#include "CVssCluster.h"
//
// Global Data
//
RPC_BINDING_VECTOR *CsRpcBindingVector = NULL;
LPTOP_LEVEL_EXCEPTION_FILTER lpfnOriginalExceptionFilter = NULL;
BOOLEAN bFormCluster = TRUE;
//
// LocalData
//
BOOLEAN CspIntraclusterRpcServerStarted = FALSE;
HANDLE CspMutex = NULL;
PCLRTL_WORK_QUEUE CspEventReportingWorkQueue = NULL;
//
// Prototypes
//
LONG
CspExceptionFilter(
IN PEXCEPTION_POINTERS ExceptionInfo
);
//
// Routines.
//
VOID CspLogStartEvent(
IN BOOL bJoin)
{
LPWSTR pszClusterName = NULL;
LPWSTR pszName = NULL;
DWORD dwClusterNameSize;
DWORD dwSize;
DWORD dwStatus;
WCHAR szUnknownClusterName[]=L"Unknown";
pszClusterName = NULL;
dwClusterNameSize = 0;
dwStatus = DmQueryString(DmClusterParametersKey,
CLUSREG_NAME_CLUS_NAME,
REG_SZ,
&pszClusterName,
&dwClusterNameSize,
&dwSize);
if (dwStatus != ERROR_SUCCESS)
{
//we dont treat this error as fatal, since
//the cluster did start, but we really shouldnt get this
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Couldnt get the cluster name, status=%1!u!\n",
dwStatus);
pszName = szUnknownClusterName;
}
else
pszName = pszClusterName;
//log events in the cluster log to mark the start of the cluster server
if (bJoin)
CsLogEvent1(LOG_NOISE, SERVICE_SUCCESSFUL_JOIN, pszName);
else
CsLogEvent1(LOG_NOISE, SERVICE_SUCCESSFUL_FORM, pszName);
if (pszClusterName)
LocalFree(pszClusterName);
}
DWORD
ClusterInitialize(
VOID
)
/*++
Routine Description:
This is the main cluster initialization path. It calls the
initialization routines of all the other components. It then
attempts to join an existing cluster. If the existing cluster
cannot be found, it forms a new cluster.
Arguments:
None.
Return Value:
ERROR_SUCCESS if successful
Win32 error code otherwise.
--*/
{
DWORD Status;
DWORD JoinStatus;
DWORD StringBufferSize = 0, StringSize = 0;
SIZE_T minWorkingSetSize;
SIZE_T maxWorkingSetSize;
BOOL bJoin;
BOOL bEvicted;
PNM_NODE_ENUM2 pNodeEnum = NULL;
HRESULT hr = S_OK;
ClRtlLogPrint(LOG_NOISE, "[INIT] ClusterInitialize called to start cluster.\n");
//
// give us a fighting chance on loaded server
//
#if CLUSTER_PRIORITY_CLASS
if ( !SetPriorityClass( GetCurrentProcess(), CLUSTER_PRIORITY_CLASS ) ) {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Failed to set cluster service priority class, Status %1!lx!.\n",
GetLastError() );
}
#endif
// initialize our product suite
CsMyProductSuite = (SUITE_TYPE)ClRtlGetSuiteType();
CL_ASSERT(CsMyProductSuite != 0);
//
// First check our OS to make sure it is ok to run.
//
if (!ClRtlIsOSValid() ||
!ClRtlIsOSTypeValid()) {
//
// Bail out, machine is running something odd.
//
CsLogEvent(LOG_CRITICAL, SERVICE_FAILED_INVALID_OS);
return(ERROR_REVISION_MISMATCH);
}
Status = ClRtlHasNodeBeenEvicted( &bEvicted );
if ( Status != ERROR_SUCCESS )
{
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Unable to determine if this node was previously evicted or not, status %1!u!\n",
Status);
return Status;
}
if ( bEvicted != FALSE )
{
// This node has been evicted previously, but cleanup could not complete.
ClRtlLogPrint(LOG_UNUSUAL,
"[CS] This node has been evicted from the cluster, but cleanup was not completed. Restarting cleanup\n"
);
// Reinitiate cleanup
hr = ClRtlCleanupNode(
NULL, // Name of the node to be cleaned up (NULL means this node)
60000, // Amount of time (in milliseconds) to wait before starting cleanup
0 // timeout interval in milliseconds
);
if ( FAILED( hr ) && ( hr != RPC_S_CALLPENDING ) )
{
Status = HRESULT_CODE( hr );
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Unable to reinitiate cleanup, status 0x%1!x!\n",
hr);
}
else
{
Status = ERROR_SUCCESS;
}
return Status;
}
//
// Acquire our named mutex in order to prevent multiple copies
// of the cluster service from accidentally getting started.
//
CspMutex = CreateMutexW(
NULL,
FALSE,
L"ClusterServer_Running"
);
if (CspMutex==NULL) {
Status = GetLastError();
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Unable to create cluster mutex, status %1!u!\n",
Status);
return Status;
}
if (WaitForSingleObject(CspMutex, 30000) == WAIT_TIMEOUT) {
//
// Somebody already has this mutex, exit immediately.
//
ClRtlLogPrint(LOG_CRITICAL,
"[CS] The Cluster Service is already running.\n");
return(ERROR_SERVICE_ALREADY_RUNNING);
}
//
// Set our unhandled exception filter so that if anything horrible
// goes wrong, we can exit immediately.
//
lpfnOriginalExceptionFilter = SetUnhandledExceptionFilter(CspExceptionFilter);
//
// Next initialize the testpoint code
//
TestpointInit();
g_pCVssWriterCluster = new CVssWriterCluster;
if ( g_pCVssWriterCluster == NULL ) {
Status = ERROR_NOT_ENOUGH_MEMORY;
ClRtlLogPrint(LOG_CRITICAL,
"[CS] VSS: Unable to allocate VssWriter, %1!u!\n", Status);
return(Status);
}
//
// Create the global work queues.
//
CsDelayedWorkQueue = ClRtlCreateWorkQueue(CS_MAX_DELAYED_WORK_THREADS,
THREAD_PRIORITY_NORMAL);
if (CsDelayedWorkQueue == NULL) {
Status = GetLastError();
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Unable to create delayed work queue, %1!u!\n",
Status);
return(Status);
}
CsCriticalWorkQueue = ClRtlCreateWorkQueue(CS_MAX_CRITICAL_WORK_THREADS,
THREAD_PRIORITY_ABOVE_NORMAL);
if (CsCriticalWorkQueue == NULL) {
Status = GetLastError();
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Unable to create critical work queue, %1!u!\n",
Status);
return(Status);
}
#if 0
CspEventReportingWorkQueue = ClRtlCreateWorkQueue(1, THREAD_PRIORITY_NORMAL);
if (CspEventReportingWorkQueue == NULL) {
Status = GetLastError();
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Unable to create event reporting work queue, %1!u!\n",
Status);
return(Status);
}
ClRtlEventLogSetWorkQueue( CspEventReportingWorkQueue );
#endif
//
// Init COM
//
Status = CoInitializeEx( NULL, COINIT_DISABLE_OLE1DDE | COINIT_MULTITHREADED );
if ( !SUCCEEDED( Status )) {
ClRtlLogPrint(LOG_CRITICAL, "[CS] Couldn't init COM %1!08X!\n", Status );
return Status;
}
//
// Initialize Object Manager
//
Status = OmInitialize();
#ifdef CLUSTER_TESTPOINT
TESTPT( TpFailOmInit ) {
Status = 99999;
}
#endif
if (Status != ERROR_SUCCESS) {
return(Status);
}
//
// Initialize Event Processor
//
Status = EpInitialize();
#ifdef CLUSTER_TESTPOINT
TESTPT( TpFailEpInit ) {
Status = 99999;
}
#endif
if (Status != ERROR_SUCCESS) {
return(Status);
}
//
// Chittur Subbaraman (chitturs) - 12/4/99
//
// Initialize the restore database manager. This function is a NOOP
// if restore database is not being done. This function MUST be called
// before the DM is initialized.
//
Status = RdbInitialize();
if (Status != ERROR_SUCCESS) {
return(Status);
}
//
// Initialize Database Manager
//
Status = DmInitialize();
#ifdef CLUSTER_TESTPOINT
TESTPT( TpFailDmInit ) {
Status = 99999;
}
#endif
if (Status != ERROR_SUCCESS) {
return(Status);
}
//
// Initialize Node Manager
//
Status = NmInitialize();
#ifdef CLUSTER_TESTPOINT
TESTPT( TpFailNmInit ) {
Status = 99999;
}
#endif
if (Status != ERROR_SUCCESS) {
return(Status);
}
//
// Initialize Global Update Manager
//
Status = GumInitialize();
#ifdef CLUSTER_TESTPOINT
TESTPT( TpFailGumInit ) {
Status = 99999;
}
#endif
if (Status != ERROR_SUCCESS) {
return(Status);
}
//
// Initialize the cluster wide event logging
//
if (!CsNoRepEvtLogging) {
Status = EvInitialize();
//if this fails, we still start the cluster service
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] Error calling EvInitialize, Status = %1!u!\n",
Status
);
}
}
//
// Initialize Failover Manager component
//
Status = FmInitialize();
#ifdef CLUSTER_TESTPOINT
TESTPT( TpFailFmInit ) {
Status = 99999;
}
#endif
if (Status != ERROR_SUCCESS) {
return(Status);
}
//
// Initialize API
//
Status = ApiInitialize();
if (Status != ERROR_SUCCESS) {
return(Status);
}
//
// Initialize Log Manager component
//
Status = LmInitialize();
#ifdef CLUSTER_TESTPOINT
TESTPT( TpFailLmInit ) {
Status = 99999;
}
#endif
if (Status != ERROR_SUCCESS) {
return(Status);
}
//
// Initialize the Checkpoint Manager component
//
Status = CpInitialize();
#ifdef CLUSTER_TESTPOINT
TESTPT( TpFailCpInit ) {
Status = 99999;
}
#endif
if (Status != ERROR_SUCCESS) {
return(Status);
}
//
// find out what domain account we're running under. This is needed by
// some packages
//
Status = ClRtlGetRunningAccountInfo( &CsServiceDomainAccount );
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL, "[CS] Couldn't determine Service Domain Account. status %1!u!\n",
Status);
return Status;
}
ClRtlLogPrint(LOG_NOISE, "[CS] Service Domain Account = %1!ws!\n",
CsServiceDomainAccount);
//
// Prepare the RPC server. This does not enable us to receive any calls.
//
Status = ClusterInitializeRpcServer();
if (Status != ERROR_SUCCESS) {
return(Status);
}
//
// Read the cluster name from the database.
//
Status = DmQuerySz(
DmClusterParametersKey,
CLUSREG_NAME_CLUS_NAME,
&CsClusterName,
&StringBufferSize,
&StringSize
);
if (Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_UNUSUAL,
"[CS] Unable to read cluster name from database. Service initialization failed.\n"
);
return(Status);
}
//
// First, attempt to join the cluster.
//
ClRtlLogPrint(LOG_NOISE,
"[INIT] Attempting to join cluster %1!ws!\n",
CsClusterName
);
bFormCluster = TRUE;
JoinStatus = ClusterJoin();
//
// If this node was evicted when it was down, this error code is returned by the
// sponsor when it tries to rejoin the cluster. In this case, initiate a cleanup
// of this node and exit.
//
if ( (JoinStatus == ERROR_CLUSTER_NODE_NOT_MEMBER) ||
(JoinStatus == ERROR_CLUSTER_INSTANCE_ID_MISMATCH))
{
DWORD CleanupStatus;
//SS: If the instance mismatch occurs the first time the service runs after
//configuration, then it implies there is some sort of confusion(duplicate ip
//addresses or name) during the cluster configuration process, in that case,
//we would like the setup to make the cleanup decision
// If this is not the first run after a clean install, the service will
// initiate cleanup itself
if (!CsFirstRun || CsUpgrade)
{
WCHAR wStatus[32];
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] This node has been evicted from the cluster when it was unavailable. Initiating cleanup.\n"
);
// Initiate cleanup of this node.
hr = ClRtlCleanupNode(
NULL, // Name of the node to be cleaned up (NULL means this node)
60000, // Amount of time (in milliseconds) to wait before starting cleanup
0 // timeout interval in milliseconds
);
if ( FAILED( hr ) && ( hr != RPC_S_CALLPENDING ) )
{
CleanupStatus = HRESULT_CODE( hr );
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] Failed to initiate cleanup of this node, status 0x%1!x!\n",
hr
);
}
else
{
CleanupStatus = ERROR_SUCCESS;
}
wsprintfW(&(wStatus[0]), L"%u", CleanupStatus);
CsLogEvent1(
LOG_NOISE,
CS_EVENT_CLEANUP_ON_EVICTION,
wStatus
);
}
return(JoinStatus);
}
//
// Chittur Subbaraman (chitturs) - 10/27/98
//
// If a database restore operation is requested, check whether
// you succeeded in establishing a connection. If so, check
// whether you are forced to restore the DB. If not, abort the
// whole operation and return. If you are forced to restore,
// you will first stop the service in other nodes and then
// try to form a cluster.
//
if ( CsDatabaseRestore == TRUE ) {
if ( JoinStatus == ERROR_CLUSTER_NODE_UP ) {
if ( CsForceDatabaseRestore == FALSE ) {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Cannot restore DB while the cluster is up, service init failed\n"
);
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] You may try to restart the service with the forcerestore option\n"
);
RpcBindingFree(&CsJoinSponsorBinding);
return(JoinStatus);
}
//
// At this point, a restore database operation is forced by
// the user. So, enumerate the cluster nodes with the help
// of the sponsor and then stop the services on all the
// cluster nodes.
//
Status = NmRpcEnumNodeDefinitions2(
CsJoinSponsorBinding,
0,
L"0",
&pNodeEnum
);
RpcBindingFree(&CsJoinSponsorBinding);
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Cannot force a restore DB: Unable to enumerate cluster nodes\n"
);
LocalFree( pNodeEnum );
return (Status);
}
//
// Attempt to stop the clussvc on all nodes, except of course
// this node
//
Status = RdbStopSvcOnNodes (
pNodeEnum,
L"clussvc"
);
LocalFree( pNodeEnum );
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Cannot force a restore DB: Unable to stop cluster nodes\n"
);
return(Status);
} else {
CL_LOGCLUSWARNING( CS_STOPPING_SVC_ON_REMOTE_NODES );
}
}
}
if (JoinStatus != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Failed to join cluster, status %1!u!\n",
JoinStatus
);
//
// Forming a cluster will also attempt to arbitrate the quorum
// resource.
//
bJoin = FALSE;
//
// If we failed join and found a sponsor, skip clusterform
//
if (bFormCluster == FALSE) {
return (JoinStatus);
}
ClRtlLogPrint(LOG_NOISE,
"[INIT] Attempting to form cluster %1!ws!\n",
CsClusterName
);
Status = ClusterForm();
if (Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] Failed to form cluster, status %1!u!.\n",
Status
);
if (Status == ERROR_BUSY) {
//
// Couldn't arbitrate for the quorum disk. Return
// the join status, since that is the real failure.
//
Status = JoinStatus;
}
CsLogEventData(
LOG_CRITICAL,
SERVICE_FAILED_JOIN_OR_FORM,
sizeof(Status),
&Status
);
return(Status);
}
}
else {
bJoin = TRUE;
}
//
// We are now a full cluster member.
//
//
// Register the ExtroCluster (join) RPC interface so we can sponsor a
// joining node.
//
Status = ClusterRegisterExtroclusterRpcInterface();
if (Status != RPC_S_OK) {
return(Status);
}
//
// Register the Join Version RPC interface so we can determine
// the version of a joining node.
//
Status = ClusterRegisterJoinVersionRpcInterface();
if (Status != RPC_S_OK) {
return(Status);
}
//
// Enable this node to participate in regroups.
//
MmSetRegroupAllowed(TRUE);
//
// Now enable Clussvc to Clusnet Heartbeating.
//
if ((Status = NmInitializeClussvcClusnetHb()) != ERROR_SUCCESS) {
return Status;
}
//
// Advertise that the node is fully up now
//
Status = NmSetExtendedNodeState( ClusterNodeUp );
if (Status != ERROR_SUCCESS) {
// NmSetExtendedNodeState logs an error //
return(Status);
}
//
// Chittur Subbaraman (chitturs) - 10/28/99
//
// Process FM join events that must be done AFTER this cluster
// node is declared as fully UP.
//
if ( bJoin ) {
FmJoinPhase3();
}
//
// We are now going to attempt to increase our working set size. This,
// plus the priority class boost, should allow the cluster service
// to run a little better and be more responsive to cluster events.
//
if ( GetProcessWorkingSetSize( GetCurrentProcess(),
&minWorkingSetSize,
&maxWorkingSetSize ) )
{
if ( minWorkingSetSize < MIN_WORKING_SET_SIZE ) {
minWorkingSetSize = MIN_WORKING_SET_SIZE;
}
if ( maxWorkingSetSize < MAX_WORKING_SET_SIZE ) {
maxWorkingSetSize = MAX_WORKING_SET_SIZE;
}
if ( SetProcessWorkingSetSize( GetCurrentProcess(),
minWorkingSetSize,
maxWorkingSetSize ) )
{
//
// now report what we set it to
//
if ( GetProcessWorkingSetSize( GetCurrentProcess(),
&minWorkingSetSize,
&maxWorkingSetSize ) )
{
ClRtlLogPrint(LOG_NOISE,
"[INIT] Working Set changed to [%1!u!, %2!u!].\n",
minWorkingSetSize,
maxWorkingSetSize);
} else {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Failed to re-read our working set size, Status %1!u!.\n",
GetLastError());
}
} else {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Failed to set our Min WS to %1!u!, Max WS to %2!u!, Status %3!u!.\n",
minWorkingSetSize,
maxWorkingSetSize,
GetLastError());
}
} else {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Failed to get our working set size, Status %1!u!.\n",
GetLastError()
);
}
CspLogStartEvent(bJoin);
#if 0
//
// Chittur Subbaraman (chitturs) - 11/4/98
//
if ( CsForceDatabaseRestore == TRUE )
{
//
// If you stopped the service on any nodes for database restoration
// purposes, then start them now
//
RdbStartSvcOnNodes ( L"clussvc" );
}
#endif
hr = ClRtlInitiatePeriodicCleanupThread();
if ( FAILED( hr ) ) {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Error 0x%1!08lx! occurred trying to initiate periodic cleanup thread. This is not fatal and will not prevent the service from starting.\n",
hr);
}
ClRtlLogPrint(LOG_NOISE, "[INIT] Cluster started.\n");
return(ERROR_SUCCESS);
} // ClusterInitialize (aka ClusterStartup)
VOID
ClusterShutdown(
DWORD ExitCode
)
/*++
Routine Description:
Shuts down the cluster in reverse order than it was brought up.
Arguments:
None.
Return Value:
ERROR_SUCCESS if successful
Win32 error code otherwise.
--*/
{
HRESULT hr = S_OK;
//
// Shutdown all components of the Cluster Service in approximately
// the reverse order they we brought up.
//
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] The cluster service is shutting down.\n");
//
// Enable this when we support ClusterShuttingDown state
//
// NmSetExtendedNodeState( ClusterNodeDown );
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailClusterShutdown) {
return;
}
#endif
MmSetRegroupAllowed(FALSE);
// if replicated event logging was initialized, shut it down
if (!CsNoRepEvtLogging)
{
//
// Shutdown the cluster eventlog manager- this deregisters with the
// eventlog server.
EvShutdown();
}
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
#if 0
//
// Chittur Subbaraman (chitturs) - 5/8/2000
//
// Don't shutdown DM updates for now so as to avoid spurious node shoot downs due to the locker
// node shutting down and hence the DM update succeeding when in fact it should fail.
//
DmShutdownUpdates();
#endif
//
// Move or offline all groups owned by this node. This will destroy
// the resource monitors and the in-memory resource and group objects.
//
FmShutdownGroups();
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
// Shutdown the dm- this flushes the log file and releases the dm hooks.
DmShutdown();
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
// Unsubscribe from Vss
//
if ( g_bCVssWriterClusterSubscribed ) {
ClRtlLogPrint( LOG_NOISE, "[INIT] VSS: Unsubscribing\n" );
hr = g_pCVssWriterCluster->Unsubscribe( );
if ( FAILED( hr ) ) {
ClRtlLogPrint( LOG_CRITICAL, "[INIT] VSS: Failed to Unsubscribe from VSS, status 0x%1!x!\n", hr );
} else {
g_bCVssWriterClusterSubscribed = FALSE;
}
}
// Delete our Vss instance if we have one (and if we are subscribed).
//
if (g_pCVssWriterCluster && (g_bCVssWriterClusterSubscribed == FALSE) ) {
delete g_pCVssWriterCluster;
}
TestpointDeInit();
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
NmCloseConnectoidAdviseSink();
CoUninitialize();
//
// Triger banishing regroup incident prompting
// other nodes in the cluster to regroup this node out
//
MMLeave();
//
// Exit the process now... there are a number of circular dependencies
// that have been built up during the 'life of the cluster'. There
// is no easy way to unwind from here... so just exit out.
//
//
// Announce that we are stopped only if we were successful in
// initializing. The SC will not restart the service if we report that
// we've stopped. Make sure the service status announcement is the last
// thing done since there is a race between this thread and the main
// thread that will prevent code after the announcement from being
// executed.
//
ClRtlLogPrint(( ExitCode == ERROR_SUCCESS ) ? LOG_NOISE : LOG_CRITICAL,
"[CS] Service Stopped. exit code = %1!u!\n\n", ExitCode);
if ( ExitCode == ERROR_SUCCESS ) {
CsLogEvent(LOG_NOISE, SERVICE_SUCCESSFUL_TERMINATION);
CsServiceStatus.dwCurrentState = SERVICE_STOPPED;
CsServiceStatus.dwControlsAccepted = 0;
CsServiceStatus.dwCheckPoint = 0;
CsServiceStatus.dwWaitHint = 0;
CspSetErrorCode( ExitCode, &CsServiceStatus );
CsAnnounceServiceStatus();
} else {
ExitCode = CspSetErrorCode( ExitCode, &CsServiceStatus );
}
//release the mutex so that the next one can acquire the mutex immediately
ReleaseMutex(CspMutex);
ExitProcess(ExitCode);
#if 0
//
// Everything after this point is what should happen in a clean shutdown.
//
// Shutdown the Failover Manager.
FmShutdown();
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
//
// Shutdown the Cluster Api.
//
ApiShutdown();
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
//
// Stop the RPC server and deregister our endpoints & interfaces.
//
ClusterShutdownRpcServer();
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
//
// At this point, all calls on the Intracluster and Extrocluster
// RPC interfaces are complete and no more will be received.
//
// Note - Calls on the Clusapi interface are still possible.
//
//
// Shutdown the Node Manager.
//
NmShutdown();
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
// Shutdown the Event Processor.
EpShutdown();
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
LmShutdown();
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
CpShutdown();
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
//shutdown gum
GumShutdown();
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
// Shutdown the Object Manager.
OmShutdown();
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
//
// Destroy the global work queues
//
if (CsDelayedWorkQueue != NULL) {
IF_DEBUG(CLEANUP) {
ClRtlLogPrint(LOG_NOISE,"[CS] Destroying delayed work queue...\n");
}
ClRtlDestroyWorkQueue(CsDelayedWorkQueue);
CsDelayedWorkQueue = NULL;
}
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
if (CsCriticalWorkQueue != NULL) {
IF_DEBUG(CLEANUP) {
ClRtlLogPrint(LOG_NOISE,"[CS] Destroying critical work queue...\n");
}
ClRtlDestroyWorkQueue(CsCriticalWorkQueue);
CsDelayedWorkQueue = NULL;
}
ClRtlEventLogSetWorkQueue( NULL );
if (CspEventReportingWorkQueue != NULL) {
IF_DEBUG(CLEANUP) {
ClRtlLogPrint(LOG_NOISE,"[CS] Destroying event reporing work queue...\n");
}
ClRtlDestroyWorkQueue(CspEventReportingWorkQueue);
CspEventReportingWorkQueue = NULL;
}
//
// Free global data
//
LocalFree(CsClusterName);
if (CspMutex != NULL) {
CloseHandle(CspMutex);
CspMutex = NULL;
}
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
CsLogEvent(LOG_NOISE, SERVICE_SUCCESSFUL_TERMINATION);
#endif // 0
return;
}
DWORD
ClusterForm(
VOID
)
/*++
Routine Description:
Code path for initializing a new instance of the cluster. This
is taken when there are no nodes active in the cluster.
Arguments:
None
Return Value:
ERROR_SUCCESS if successful.
Win32 error code otherwise.
--*/
{
DWORD Status;
PFM_GROUP pQuoGroup;
DWORD dwError;
DWORD dwQuorumDiskSignature = 0;
//
// Initialize the event handler.
//
Status = EpInitPhase1();
if ( Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_CRITICAL,
"[CS] EpInitPhase1 failed, Status = %1!u!\n",
Status);
return(Status);
}
//
// The API server is required by FM, since it starts the resource monitor.
//
Status = ApiOnlineReadOnly();
if ( Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_CRITICAL,
"[CS] ApiInitPhase1 failed, Status = %1!u!\n",
Status);
goto partial_form_exit;
}
//
// Arbitrate for the quorum resource.
//
Status = FmGetQuorumResource(&pQuoGroup, &dwQuorumDiskSignature);
if ( Status != ERROR_SUCCESS ) {
if ( ( Status == ERROR_FILE_NOT_FOUND ) &&
( CsForceDatabaseRestore == TRUE ) ) {
//
// Chittur Subbaraman (chitturs) - 10/30/98
//
// Try to fix up the quorum disk signature and if successful
// try to get the quorum resource again. Note that the following
// function will attempt a fix up only if the CsForceDatabaseRestore
// flag is set.
//
if ( RdbFixupQuorumDiskSignature( dwQuorumDiskSignature ) ) {
Status = FmGetQuorumResource( &pQuoGroup, NULL );
if ( Status != ERROR_SUCCESS ) {
Status = ERROR_QUORUM_DISK_NOT_FOUND;
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] Could not get quorum resource even after fix up, Status = %1!u!\n",
Status);
goto partial_form_exit;
}
} else {
Status = ERROR_QUORUM_DISK_NOT_FOUND;
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] ClusterForm: Could not get quorum resource, Status = %1!u!\n",
Status);
goto partial_form_exit;
}
} else {
Status = ERROR_QUORUM_DISK_NOT_FOUND;
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] ClusterForm: Could not get quorum resource. No fixup attempted. Status = %1!u!\n",
Status);
goto partial_form_exit;
}
}
//arbitrate for some quorum resources(mns) takes a while and since we call
//arbitrate from online as well, we should inform the scm that we are making
//progresss
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
//
// Call the Database Manager to update the cluster registry.
//
Status = DmFormNewCluster();
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Error calling DmUpdateFormNewCluster, Status = %1!u!\n",
Status);
goto partial_form_exit;
}
if (FmDoesQuorumAllowLogging(CLUS_CHAR_UNKNOWN) != ERROR_SUCCESS)
CsNoQuorumLogging = TRUE;
if (!CsNoQuorum)
{
// Bring the quorum resource online
dwError = FmBringQuorumOnline();
if ((dwError == ERROR_IO_PENDING) || (dwError == ERROR_SUCCESS))
{
//checkpoint with scm once again before waiting for log recovery
//if log mount takes a long time then DmWaitQuorumResOnline()
//should also increment the checkpoints
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
//this waits on an event for the quorum resorce to come online
//when the quorum resource comes online, the log file is opened
//if noquorumlogging flag is not specified
if ((dwError = DmWaitQuorumResOnline()) != ERROR_SUCCESS)
{
ClRtlLogPrint(LOG_NOISE,
"[CS] Wait for quorum resource to come online failed, error=%1!u!\r\n",
dwError);
Status = ERROR_QUORUM_RESOURCE_ONLINE_FAILED;
goto partial_form_exit;
}
}
else
{
ClRtlLogPrint(LOG_UNUSUAL,
"[CS] couldnt bring quorum resource online, Error =%1!u!\n",
dwError);
CL_LOGFAILURE(dwError);
Status = ERROR_QUORUM_RESOURCE_ONLINE_FAILED;
goto partial_form_exit;
}
}
//update status with scm, the quorum resource may take a while to come online
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
if (!CsNoQuorumLogging)
{
//roll the Cluster Log File
if ((Status = DmRollChanges()) != ERROR_SUCCESS)
{
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Error calling DmRollChanges, Status = %1!u!\n",
Status);
goto partial_form_exit;
}
}
//
// Close the groups/resources created by fm except for the quorum
// resource. The in memory data base needs to be created again with
// the new rolled changes
//
Status = FmFormNewClusterPhase1(pQuoGroup);
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Error calling FmOnline, Status = %1!u!\n",
Status);
goto partial_form_exit;
}
#ifdef CLUSTER_TESTPOINT
TESTPT(TpFailFormNewCluster) {
Status = 999999;
goto partial_form_exit;
}
#endif
//
// Start up the Node Manager. This will form a cluster at the membership
// level.
//
Status = NmFormNewCluster();
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Error calling NmOnline, Status = %1!u!\n",
Status);
goto partial_form_exit;
}
//
//call any registry fixup callbacks, if they are registered.
//This is useful for upgrades/uninstalls if you want to clean up
//the registry
Status = NmPerformFixups(NM_FORM_FIXUP);
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Error calling NmPerformFixups, Status = %1!u!\n",
Status);
goto partial_form_exit;
}
//
// The API server can now be brought fully online. This enables us
// to receive calls.
//
Status = ApiOnline();
if ( Status != ERROR_SUCCESS) {
ClRtlLogPrint(LOG_CRITICAL,
"[CS] ApiInitPhase2 failed, Status = %1!u!\n",
Status);
goto partial_form_exit;
}
//update status for scm
CsServiceStatus.dwCheckPoint++;
CsAnnounceServiceStatus();
//
// Call the Failover Manager Phase 2 routine next.
// Create the groups and resources.
//
Status = FmFormNewClusterPhase2();
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Error calling FmOnline, Status = %1!u!\n",
Status);
goto partial_form_exit;
}
//
// Fire up the intracluster RPC server so we can receive calls.
//
Status = ClusterRegisterIntraclusterRpcInterface();
if ( Status != ERROR_SUCCESS ) {
goto partial_form_exit;
}
//
// Finish initializing the cluster wide event logging
//
// ASSUMPTION: this is called after the NM has established cluster
// membership.
//
if (!CsNoRepEvtLogging)
{
//is replicated logging is not disabled
Status = EvOnline();
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Error calling EvOnline, Status = %1!u!\n",
Status);
}
}
if (!CsNoQuorumLogging)
{
//check if all nodes are up, if not take a checkpoint and
//turn quorum logging on
Status = DmUpdateFormNewCluster();
if ( Status != ERROR_SUCCESS ) {
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Error calling DmCompleteFormNewCluster, Status = %1!u!\n",
Status);
}
}
ClRtlLogPrint(LOG_NOISE, "[INIT] Successfully formed a cluster.\n");
return(ERROR_SUCCESS);
partial_form_exit:
ClRtlLogPrint(LOG_NOISE, "[INIT] Cleaning up failed form attempt.\n");
return(Status);
}
VOID
ClusterLeave(
VOID
)
/*++
Routine Description:
Removes the local node from an active cluster or cleans up after
a failed attempt to join or form a cluster.
Arguments:
None
Return Value:
ERROR_SUCCESS if successful.
Win32 error code otherwise.
--*/
{
ClRtlLogPrint(LOG_NOISE, "[INIT] Leaving cluster\n");
//
// Turn off the cluster API
//
ApiOffline();
//
// If we are a cluster member, leave now.
//
NmLeaveCluster();
ClusterDeregisterRpcInterfaces();
return;
} // Cluster Leave
//
// RPC Server Control routines
//
RPC_STATUS
ClusterInitializeRpcServer(
VOID
)
/*++
Routine Description:
Initializes the RPC server for the cluster service.
Arguments:
None.
Return Value:
RPC_S_OK if the routine succeeds. An RPC error code if it fails.
--*/
{
RPC_STATUS Status;
DWORD i;
DWORD retry;
DWORD packagesRegistered = 0;
ClRtlLogPrint(LOG_NOISE, "[CS] Initializing RPC server.\n");
//
// Enable authentication of calls to our RPC interfaces. For NTLM,
// the PrincipleName is ignored, but we'll need to supply one if we
// switch authentication services later on. Note that it is not
// necessary to specify an authentication service for each interface.
//
for ( i = 0; i < CsNumberOfRPCSecurityPackages; ++i ) {
Status = RpcServerRegisterAuthInfo(NULL,
CsRPCSecurityPackage[ i ],
NULL,
NULL);
if (Status == RPC_S_OK) {
++packagesRegistered;
} else {
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Unable to register %1!ws! authentication for RPC, status %2!u!.\n",
CsRPCSecurityPackageName[ i ],
Status);
}
}
if ( packagesRegistered == 0 ) {
return ERROR_CLUSTER_NO_RPC_PACKAGES_REGISTERED;
}
//
// Bind to UDP. This transport will be used by remote clients to
// access the clusapi interface and by cluster nodes to
// access the extrocluster (join) interface. This uses a dynamic
// endpoint.
//
Status = RpcServerUseProtseq(
TEXT("ncadg_ip_udp"),
RPC_C_PROTSEQ_MAX_REQS_DEFAULT,
NULL);
if (Status != RPC_S_OK) {
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] Unable to bind RPC to UDP, status %1!u!.\n",
Status);
return(Status);
}
//
// Figure out which UDP endpoint we got so we can register it with
// the endpoint mapper later. We must do this before we register any
// other protocol sequences, or they will show up in the vector.
// Groveling the binding vector for a specific transport is no fun.
//
CL_ASSERT( CsRpcBindingVector == NULL);
Status = RpcServerInqBindings(&CsRpcBindingVector);
if (Status != RPC_S_OK) {
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] Unable to obtain RPC binding vector, status %1!u!.\n",
Status);
return(Status);
}
//
// Bind to LRPC. This transport will be used by clients running on this
// system to access the clusapi interface. This also uses a dynamic endpoint.
//
Status = RpcServerUseProtseq(
TEXT("ncalrpc"),
RPC_C_PROTSEQ_MAX_REQS_DEFAULT,
NULL); // No SD. Let the object inherit from its "\RPC Control" parent object which has
// an IO ACE specifying R, W, E, for the World.
if (Status != RPC_S_OK) {
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] Unable to bind RPC to LPC, status %1!u!.\n",
Status);
return(Status);
}
//
// Register the dynamic LRPC endpoint with the local endpoint mapper database
//
Status = CspRegisterDynamicLRPCEndpoint ();
if (Status != RPC_S_OK) {
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] Unable to register dynamic LRPC endpoint, status %1!u!.\n",
Status);
return(Status);
}
//
// Bind to CDP (Cluster Datagram Protocol). This transport will be used
// for the intracluster interface. This uses a well-known endpoint.
//
// GN: Sometimes it takes a couple of seconds for resrcmon to go away after
// a clean shutdown. When SCM tries to restart the service the following call will fail.
// In order to overcome this we will give up only if we couldn't bind RPC to CDP
// 10 times with 1 second in between the calls
//
retry = 10;
for (;;) {
Status = RpcServerUseProtseqEp(
CLUSTER_RPC_PROTSEQ,
1, // Max calls
CLUSTER_RPC_PORT,
NULL);
if (Status != RPC_S_DUPLICATE_ENDPOINT || retry == 0) {
break;
}
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Unable to bind RPC to CDP, status %1!u!. Retrying...\n",
Status);
Sleep(1000);
--retry;
}
if (Status != RPC_S_OK) {
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] Unable to bind RPC to CDP, status %1!u!.\n",
Status);
return(Status);
}
//
// Start our RPC server. Note that we will not get any calls until
// we register our interfaces.
//
Status = RpcServerListen(
CS_CONCURRENT_RPC_CALLS,
RPC_C_LISTEN_MAX_CALLS_DEFAULT,
TRUE);
if ((Status != RPC_S_OK) && (Status != RPC_S_ALREADY_LISTENING)) {
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Unable to start RPC server, status %1!u!.\n",
Status
);
return(Status);
}
RpcSsDontSerializeContext();
return(RPC_S_OK);
}
DWORD
ClusterRegisterIntraclusterRpcInterface(
VOID
)
{
DWORD Status;
Status = RpcServerRegisterIfEx(
s_IntraCluster_v2_0_s_ifspec,
NULL,
NULL,
0, // No need to set RPC_IF_ALLOW_SECURE_ONLY if security callback
// is specified. If security callback is specified, RPC
// will reject unauthenticated requests without invoking
// callback. This is the info obtained from RpcDev. See
// Windows Bug 572035.
RPC_C_PROTSEQ_MAX_REQS_DEFAULT,
reinterpret_cast<RPC_IF_CALLBACK_FN(__stdcall *)>( ApipConnectCallback )
);
if (Status != RPC_S_OK) {
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] Unable to register the IntraCluster interface, Status %1!u!.\n",
Status
);
return(Status);
}
CspIntraclusterRpcServerStarted = TRUE;
return(ERROR_SUCCESS);
} // ClusterRegisterIntraclusterRpcInterface
DWORD
ClusterRegisterExtroclusterRpcInterface(
VOID
)
{
DWORD Status;
Status = RpcServerRegisterIfEx(
s_ExtroCluster_v2_0_s_ifspec,
NULL,
NULL,
0, // No need to set RPC_IF_ALLOW_SECURE_ONLY if security callback
// is specified. If security callback is specified, RPC
// will reject unauthenticated requests without invoking
// callback. This is the info obtained from RpcDev. See
// Windows Bug 572035.
RPC_C_PROTSEQ_MAX_REQS_DEFAULT,
reinterpret_cast<RPC_IF_CALLBACK_FN( __stdcall *)>( ApipConnectCallback )
);
if (Status != RPC_S_OK) {
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] Unable to register the ExtroCluster interface, status %1!u!.\n",
Status
);
return(Status);
}
CL_ASSERT( CsRpcBindingVector != NULL);
Status = RpcEpRegister(
s_ExtroCluster_v2_0_s_ifspec,
CsRpcBindingVector,
NULL,
L"Microsoft Extrocluster Interface"
);
if (Status != RPC_S_OK) {
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] Unable to register the ExtroCluster interface endpoint, status %1!u!.\n",
Status
);
NmDumpRpcExtErrorInfo(Status);
return(Status);
}
return(ERROR_SUCCESS);
} // ClusterRegisterExtroclusterRpcInterface
DWORD
ClusterRegisterJoinVersionRpcInterface(
VOID
)
{
DWORD Status;
Status = RpcServerRegisterIfEx(
s_JoinVersion_v2_0_s_ifspec,
NULL,
NULL,
0, // No need to set RPC_IF_ALLOW_SECURE_ONLY if security callback
// is specified. If security callback is specified, RPC
// will reject unauthenticated requests without invoking
// callback. This is the info obtained from RpcDev. See
// Windows Bug 572035.
RPC_C_PROTSEQ_MAX_REQS_DEFAULT,
reinterpret_cast<RPC_IF_CALLBACK_FN *>( ApipConnectCallback )
);
if (Status != RPC_S_OK) {
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] Unable to register the JoinVersion interface, status %1!u!.\n",
Status
);
return(Status);
}
CL_ASSERT( CsRpcBindingVector != NULL);
Status = RpcEpRegister(
s_JoinVersion_v2_0_s_ifspec,
CsRpcBindingVector,
NULL,
L"Microsoft JoinVersion Interface"
);
if (Status != RPC_S_OK) {
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] Unable to register the JoinVersion interface endpoint, status %1!u!.\n",
Status
);
NmDumpRpcExtErrorInfo(Status);
return(Status);
}
return(ERROR_SUCCESS);
} // ClusterRegisterJoinVersionRpcInterface
VOID
ClusterDeregisterRpcInterfaces(
VOID
)
{
RPC_STATUS Status;
ClRtlLogPrint(LOG_NOISE,
"[INIT] Deregistering RPC endpoints & interfaces.\n"
);
//
// Deregister the Extrocluster and JoinVersion interface endpoints.
// There is no endpoint for the Intracluster interface.
//
if (CsRpcBindingVector != NULL) {
Status = RpcEpUnregister(
s_ExtroCluster_v2_0_s_ifspec,
CsRpcBindingVector,
NULL
);
if ((Status != RPC_S_OK) && (Status != EPT_S_NOT_REGISTERED)) {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Failed to deregister endpoint for ExtroCluster interface, status %1!u!.\n",
Status
);
}
Status = RpcEpUnregister(
s_JoinVersion_v2_0_s_ifspec,
CsRpcBindingVector,
NULL
);
if ((Status != RPC_S_OK) && (Status != EPT_S_NOT_REGISTERED)) {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Failed to deregister endpoint for JoinVersion interface, status %1!u!.\n",
Status
);
}
}
//
// Deregister the interfaces
//
Status = RpcServerUnregisterIf(
s_ExtroCluster_v2_0_s_ifspec,
NULL,
1 // Wait for outstanding calls to complete
);
if ((Status != RPC_S_OK) && (Status != RPC_S_UNKNOWN_IF)) {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Unable to deregister the ExtroCluster interface, Status %1!u!.\n",
Status
);
}
Status = RpcServerUnregisterIf(
s_JoinVersion_v2_0_s_ifspec,
NULL,
1 // Wait for outstanding calls to complete
);
if ((Status != RPC_S_OK) && (Status != RPC_S_UNKNOWN_IF)) {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Unable to deregister the JoinVersion interface, Status %1!u!.\n",
Status
);
}
Status = RpcServerUnregisterIf(
s_IntraCluster_v2_0_s_ifspec,
NULL,
1 // Wait for outstanding calls to complete
);
if ((Status != RPC_S_OK) && (Status != RPC_S_UNKNOWN_IF)) {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Unable to deregister the IntraCluster interface, Status %1!u!.\n",
Status
);
}
return;
} // ClusterDeregisterRpcInterfaces
VOID
ClusterShutdownRpcServer(
VOID
)
{
RPC_STATUS Status;
ClRtlLogPrint(LOG_NOISE, "[INIT] Shutting down RPC server.\n");
ClusterDeregisterRpcInterfaces();
Status = RpcMgmtStopServerListening(NULL);
if ((Status != RPC_S_OK) && (Status != RPC_S_NOT_LISTENING)) {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Failed to shutdown RPC server, status %1!u!.\n",
Status
);
}
#if 0
//
// Note - We really should wait for all outstanding calls to complete,
// but we can't because there is no way to shutdown any
// pending API GetNotify calls.
//
Status = RpcMgmtWaitServerListen();
if ((Status != RPC_S_OK) && (Status != RPC_S_NOT_LISTENING)) {
ClRtlLogPrint(LOG_UNUSUAL,
"[INIT] Failed to wait for all RPC calls to complete, status %1!u!.\n",
Status
);
}
#endif // 0
if (CsRpcBindingVector != NULL) {
RpcBindingVectorFree(&CsRpcBindingVector);
CsRpcBindingVector = NULL;
}
return;
} // ClusterShutdownRpcServer
LONG
CspExceptionFilter(
IN PEXCEPTION_POINTERS ExceptionInfo
)
/*++
Routine Description:
Top level exception handler for the cluster service process.
Currently this just exits immediately and assumes that the
cluster proxy will notice and restart us as appropriate.
Arguments:
ExceptionInfo - Supplies the exception information
Return Value:
None.
--*/
{
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Exception. Code = 0x%1!lx!, Address = 0x%2!p!\n",
ExceptionInfo->ExceptionRecord->ExceptionCode,
ExceptionInfo->ExceptionRecord->ExceptionAddress);
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Exception parameters: %1!lx!, %2!lx!, %3!lx!, %4!lx!\n",
ExceptionInfo->ExceptionRecord->ExceptionInformation[0],
ExceptionInfo->ExceptionRecord->ExceptionInformation[1],
ExceptionInfo->ExceptionRecord->ExceptionInformation[2],
ExceptionInfo->ExceptionRecord->ExceptionInformation[3]);
GenerateExceptionReport(ExceptionInfo);
if (lpfnOriginalExceptionFilter)
lpfnOriginalExceptionFilter(ExceptionInfo);
// the system level handler will be invoked if we return
// EXCEPTION_CONTINUE_SEARCH - for debug dont terminate the process
if ( IsDebuggerPresent()) {
return(EXCEPTION_CONTINUE_SEARCH);
} else {
#if !CLUSTER_BETA
TerminateProcess( GetCurrentProcess(),
ExceptionInfo->ExceptionRecord->ExceptionCode );
#endif
return(EXCEPTION_CONTINUE_SEARCH);
}
}
VOID
CsInconsistencyHalt(
IN DWORD Status
)
{
WCHAR string[16];
DWORD status;
//
// Chittur Subbaraman (chitturs) - 12/17/99
//
// Announce your status to the SCM as SERVICE_STOP_PENDING so that
// it does not affect restart. Also, it could let clients learn
// of the error status.
//
CsServiceStatus.dwCurrentState = SERVICE_STOP_PENDING;
CsServiceStatus.dwControlsAccepted = 0;
CsServiceStatus.dwCheckPoint = 0;
CsServiceStatus.dwWaitHint = 0;
status = CspSetErrorCode( Status, &CsServiceStatus );
CsAnnounceServiceStatus();
wsprintfW(&(string[0]), L"%u", Status);
ClRtlLogPrint(LOG_CRITICAL,
"[CS] Halting this node to prevent an inconsistency within the cluster. Error status = %1!u!\n",
Status
);
CsLogEvent1(
LOG_CRITICAL,
CS_EVENT_INCONSISTENCY_HALT,
string
);
//release the mutex so that the service when it starts can acqire the same
//without a delay
ReleaseMutex(CspMutex);
ExitProcess(status); // return the fake error code
}
PVOID
CsAlloc(
DWORD Size
)
{
PVOID p;
p = LocalAlloc(LMEM_FIXED, Size);
if (p == NULL) {
CsInconsistencyHalt( ERROR_NOT_ENOUGH_MEMORY );
}
return(p);
}
LPWSTR
CsStrDup(
LPCWSTR String
)
{
LPWSTR p;
DWORD Len;
Len = (lstrlenW(String)+1)*sizeof(WCHAR);
p=static_cast<LPWSTR>(LocalAlloc(LMEM_FIXED, Len));
if (p==NULL) {
CsInconsistencyHalt( ERROR_NOT_ENOUGH_MEMORY );
}
CopyMemory(p,String,Len);
return(p);
}
DWORD
VssWriterInit(
VOID
)
/*++
Routine Description:
Start subscribing for volume snapshot events as a writer.
Arguments:
None.
Return Value:
ERROR_SUCCESS - Subscription succeeded.
Error status the subscription fails.
Comments:
Should never be called from a ServiceMain() since this function would result in
possibly starting the EventSystem service. During autostart, any calls from
ServiceMain() that would demand start a service will cause the caller service
to hang.
--*/
{
DWORD dwStatus = ERROR_SUCCESS;
HRESULT hr;
//
// When this function is called, it is possible this global is not initialized
// by ClusterInitialize since it can return with success in an evict cleanup
// case. In that case, bail.
//
if ( !g_pCVssWriterCluster ) goto FnExit;
ClRtlLogPrint( LOG_NOISE, "[INIT] VSS Initializing\n" );
hr = g_pCVssWriterCluster->Initialize( g_VssIdCluster, // VSS_ID WriterId;
L"Cluster Service Writer", // LPCWSTR WriterName;
VSS_UT_SYSTEMSERVICE, // VSS_USAGE_TYPE UsageType;
VSS_ST_OTHER // VSS_SOURCE_TYPE SourceType;
// <default> VSS_APPLICATION_LEVEL AppLevel;
// <default> DWORD dwTimeoutFreeze
);
if ( FAILED( hr )) {
ClRtlLogPrint( LOG_CRITICAL, "[INIT] VSS Failed to initialize VSS, status 0x%1!x!\n", hr );
dwStatus = HRESULT_CODE( hr );
goto FnExit;
}
// Now we need to subscibe so that we get the events for backup.
//
ClRtlLogPrint( LOG_NOISE, "[INIT] VSS Calling subscribe to register for backup events.\n" );
hr = g_pCVssWriterCluster->Subscribe( );
if ( FAILED( hr )) {
ClRtlLogPrint( LOG_CRITICAL, "[INIT] VSS Failed to subscribe to VSS, status 0x%1!x!\n", hr );
dwStatus = HRESULT_CODE( hr );
goto FnExit;
} else {
g_bCVssWriterClusterSubscribed = TRUE;
}
FnExit:
return ( dwStatus );
}// VssWriterInit
RPC_STATUS
CspRegisterDynamicLRPCEndpoint(
VOID
)
/*++
Routine Description:
Inquire the server bindings, look for the LRPC protocol and register the clusapi interface
with the dynamic endpoint obtained for the LRPC protocol.
Arguments:
None.
Return Value:
RPC_S_OK if successful.
RPC error code otherwise.
--*/
{
RPC_STATUS rpcStatus;
RPC_BINDING_VECTOR *pServerBindingVector = NULL;
DWORD i;
WCHAR *pszProtSeq = NULL, *pServerStringBinding = NULL;
//
// Get the server binding vector. This includes all the protocols and EP's registered
// so far.
//
rpcStatus = RpcServerInqBindings( &pServerBindingVector );
if ( rpcStatus != RPC_S_OK )
{
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] CspRegisterDynamicLRPCEndpoint: Unable to inquire server bindings, status %1!u!.\n",
rpcStatus);
NmDumpRpcExtErrorInfo( rpcStatus );
goto FnExit;
}
//
// Grovel the binding vector looking for the LRPC protocol information.
//
for( i = 0; i < pServerBindingVector->Count; i++ )
{
rpcStatus = RpcBindingToStringBinding( pServerBindingVector->BindingH[i],
&pServerStringBinding );
if ( rpcStatus != RPC_S_OK )
{
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] CspRegisterDynamicLRPCEndpoint: Unable to convert binding to string, status %1!u!.\n",
rpcStatus);
NmDumpRpcExtErrorInfo( rpcStatus );
goto FnExit;
}
rpcStatus = RpcStringBindingParse( pServerStringBinding,
NULL,
&pszProtSeq,
NULL,
NULL,
NULL );
if ( rpcStatus != RPC_S_OK )
{
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] CspRegisterDynamicLRPCEndpoint: Unable to parse server string binding, status %1!u!.\n",
rpcStatus);
NmDumpRpcExtErrorInfo( rpcStatus );
goto FnExit;
}
if ( lstrcmp ( pszProtSeq, TEXT("ncalrpc") ) == 0 )
{
//
// Found the LRPC protocol information
//
RPC_BINDING_VECTOR LrpcBindingVector;
LrpcBindingVector.Count = 1;
LrpcBindingVector.BindingH[0] = pServerBindingVector->BindingH[i];
//
// Register the dynamic endpoint obtained for the clusapi interface to field
// local calls.
//
rpcStatus = RpcEpRegister( s_clusapi_v2_0_s_ifspec,
&LrpcBindingVector,
NULL,
TEXT("Microsoft Cluster Server Local API") );
if ( rpcStatus != RPC_S_OK )
{
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] CspRegisterDynamicLRPCEndpoint: Unable to register the clusapi interface lrpc endpoint, status %1!u!.\n",
rpcStatus);
NmDumpRpcExtErrorInfo( rpcStatus );
}
ClRtlLogPrint(LOG_NOISE,
"[INIT] CspRegisterDynamicLRPCEndpoint: Successfully registered LRPC endpoint with EP mapper\n");
goto FnExit;
}
RpcStringFree( &pszProtSeq );
pszProtSeq = NULL;
RpcStringFree( &pServerStringBinding );
pServerStringBinding = NULL;
} // for
//
// If you didn't find the LRPC information, return an error.
//
if ( i == pServerBindingVector->Count )
{
rpcStatus = RPC_S_NO_BINDINGS;
ClRtlLogPrint(LOG_CRITICAL,
"[INIT] CspRegisterDynamicLRPCEndpoint: Unable to get info on the LRPC binding, status %1!u!.\n",
rpcStatus);
goto FnExit;
}
FnExit:
//
// Free the strings and the binding vector if they haven't already been freed
//
if ( pszProtSeq != NULL ) RpcStringFree ( &pszProtSeq );
if ( pServerStringBinding != NULL ) RpcStringFree( &pServerStringBinding );
if ( pServerBindingVector != NULL ) RpcBindingVectorFree( &pServerBindingVector );
return ( rpcStatus );
}// CspRegisterDynamicLRPCEndpoint