You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
6508 lines
264 KiB
6508 lines
264 KiB
/*++
|
|
|
|
Copyright(c) 1998,99 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
load.c
|
|
|
|
Abstract:
|
|
Windows Load Balancing Service (WLBS)
|
|
Driver - load balancing algorithm
|
|
|
|
Author:
|
|
|
|
bbain
|
|
|
|
ToDo:
|
|
Kernel mode queue mgt
|
|
Fail safe mode (single server for everything)
|
|
--*/
|
|
|
|
#ifdef KERNEL_MODE
|
|
|
|
#include <ntddk.h>
|
|
|
|
#include "log.h"
|
|
#include "univ.h"
|
|
#include "main.h" // added for multiple nic
|
|
|
|
static ULONG log_module_id = LOG_MODULE_LOAD;
|
|
|
|
#else
|
|
|
|
#include <stdlib.h>
|
|
#include <windows.h>
|
|
#endif
|
|
|
|
#include <stdio.h>
|
|
#include "wlbsparm.h"
|
|
#include "params.h"
|
|
#include "wlbsiocl.h"
|
|
#include "wlbsip.h"
|
|
#include "load.h"
|
|
#include "nlbwmi.h"
|
|
|
|
//
|
|
// For WPP Event Tracing
|
|
//
|
|
#include "trace.h" // for event tracing
|
|
#include "load.tmh" // for event tracing
|
|
#ifndef KERNEL_MODE
|
|
|
|
#define UNIV_PRINT_INFO(msg) { \
|
|
printf ("NLB (Information) [%s:%d] ", __FILE__, __LINE__); \
|
|
printf msg; \
|
|
printf ("\n"); \
|
|
}
|
|
|
|
#define UNIV_PRINT_CRIT(msg) { \
|
|
printf ("NLB (Error) [%s:%d] ", __FILE__, __LINE__); \
|
|
printf msg; \
|
|
printf ("\n"); \
|
|
}
|
|
|
|
#if 0
|
|
|
|
#define UNIV_PRINT_VERB(msg) { \
|
|
printf ("NLB (Verbose) [%s:%d] ", __FILE__, __LINE__); \
|
|
printf msg; \
|
|
printf ("\n"); \
|
|
}
|
|
|
|
#else
|
|
|
|
#define UNIV_PRINT_VERB(msg)
|
|
|
|
#endif
|
|
|
|
#define Univ_ulong_to_str(x, y, z) (y)
|
|
|
|
#define LOG_MSG(c,s)
|
|
#define LOG_MSG1(c,s,d1)
|
|
#define LOG_MSG2(c,s,d1,d2)
|
|
|
|
#else
|
|
|
|
#endif
|
|
|
|
#if defined (NLB_TCP_NOTIFICATION)
|
|
GLOBAL_CONN_QUEUE g_conn_estabq[CVY_MAX_CHASH]; /* Global queue of all established connections across all NLB instances. */
|
|
GLOBAL_CONN_QUEUE g_conn_pendingq[CVY_MAX_CHASH]; /* Global queue of pending connections that may or may not end up being
|
|
established on a NIC to which NLB is bound. */
|
|
HANDLE g_pending_conn_pool = NULL; /* Global fixed-size block pool of PENDING_ENTRYs. */
|
|
#endif
|
|
|
|
void Bin_state_print(PBIN_STATE bp, ULONG my_host_id);
|
|
void Load_conn_kill(PLOAD_CTXT lp, PBIN_STATE bp);
|
|
PBIN_STATE Load_pg_lookup(PLOAD_CTXT lp, ULONG svr_ipaddr, ULONG svr_port, BOOLEAN is_tcp);
|
|
|
|
VOID Load_init_fsb(PLOAD_CTXT lp, PCONN_DESCR dp);
|
|
VOID Load_init_dscr(PLOAD_CTXT lp, PCONN_ENTRY ep, BOOLEAN alloc);
|
|
VOID Load_put_dscr(PLOAD_CTXT lp, PBIN_STATE bp, PCONN_ENTRY ep);
|
|
|
|
#if 0 /* v2.06 */
|
|
#define BIN_ALL_ONES ((MAP_T)-1) /* bin map state for 64 ones (v2.04) */
|
|
#endif
|
|
#define BIN_ALL_ONES ((MAP_T)(0xFFFFFFFFFFFFFFF)) /* bin map state for 60 ones (v2.04) */
|
|
|
|
/* Byte offset of a field in a structure of the specified type: */
|
|
|
|
#define CVY_FIELD_OFFSET(type, field) ((LONG_PTR)&(((type *)0)->field))
|
|
|
|
/*
|
|
* Address of the base of the structure given its type, field name, and the
|
|
* address of a field or field offset within the structure:
|
|
*/
|
|
|
|
#define STRUCT_PTR(address, type, field) ((type *)( \
|
|
(PCHAR)(address) - \
|
|
(PCHAR)CVY_FIELD_OFFSET(type, field)))
|
|
|
|
#if defined (NLB_TCP_NOTIFICATION)
|
|
/* Mark code that is used only during initialization. */
|
|
#pragma alloc_text (INIT, LoadEntry)
|
|
|
|
/*
|
|
* Function: LoadEntry
|
|
* Description: This function is called from DriverEntry to allow the load module to perform
|
|
* any one-time intialization of global data.
|
|
* Parameters: None.
|
|
* Returns: Nothing.
|
|
* Author: shouse, 4.21.02
|
|
* Notes:
|
|
*/
|
|
VOID LoadEntry ()
|
|
{
|
|
INT index;
|
|
|
|
/* Initialize the global connection queues. */
|
|
for (index = 0; index < CVY_MAX_CHASH; index++)
|
|
{
|
|
/* Allocate the spin lock to protect the queue. */
|
|
NdisAllocateSpinLock(&g_conn_pendingq[index].lock);
|
|
|
|
/* Initialize the queue head. */
|
|
Queue_init(&g_conn_pendingq[index].queue);
|
|
|
|
/* Allocate the spin lock to protect the queue. */
|
|
NdisAllocateSpinLock(&g_conn_estabq[index].lock);
|
|
|
|
/* Initialize the queue head. */
|
|
Queue_init(&g_conn_estabq[index].queue);
|
|
}
|
|
|
|
/* Allocate a fixed-size block pool for pending connection entries. */
|
|
g_pending_conn_pool = NdisCreateBlockPool(sizeof(PENDING_ENTRY), 0, 'pBLN', NULL);
|
|
|
|
if (g_pending_conn_pool == NULL)
|
|
{
|
|
UNIV_PRINT_CRIT(("LoadEntry: Error creating fixed-size block pool"));
|
|
TRACE_CRIT("%!FUNC! Error creating fixed-size block pool");
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Function: LoadUnload
|
|
* Description: This function is called from Init_unload to allow the load module to perform
|
|
* any last minute tear-down of global data.
|
|
* Parameters: None.
|
|
* Returns: Nothing.
|
|
* Author: shouse, 4.21.02
|
|
* Notes: By the time this function is called, we are guaranteed to have de-registered
|
|
* our TCP callback function, if it was indeed registered. Because ExUnregisterCallback
|
|
* guarantees that it will not return until all pending ExNotifyCallback routines
|
|
* have completed, we can be sure that by the time we get here, there will certainly
|
|
* not be anybody accessing any of the global connection queues or FSB pools.
|
|
*/
|
|
VOID LoadUnload ()
|
|
{
|
|
INT index;
|
|
|
|
/* Destroy the fixed-size block pool and all descriptors therein.
|
|
Note that NdisDestroyBlockPool expects all allocated blocks
|
|
have been returned to the pool (freed) before it is called. */
|
|
if (g_pending_conn_pool != NULL)
|
|
{
|
|
/* Loop through all of the connection descriptor queues and
|
|
free all of the descriptors we've allocated. */
|
|
for (index = 0; index < CVY_MAX_CHASH; index++)
|
|
{
|
|
PPENDING_ENTRY pp = NULL;
|
|
|
|
NdisAcquireSpinLock(&g_conn_pendingq[index].lock);
|
|
|
|
/* Dequeue the head of the queue. */
|
|
pp = (PPENDING_ENTRY)Queue_deq(&g_conn_pendingq[index].queue);
|
|
|
|
while (pp != NULL)
|
|
{
|
|
UNIV_ASSERT(pp->code == CVY_PENDINGCODE);
|
|
|
|
/* Free the descriptor back to the fixed-size block pool. */
|
|
NdisFreeToBlockPool((PUCHAR)pp);
|
|
|
|
/* Get the next descriptor in the queue. */
|
|
pp = (PPENDING_ENTRY)Queue_deq(&g_conn_pendingq[index].queue);
|
|
}
|
|
|
|
NdisReleaseSpinLock(&g_conn_pendingq[index].lock);
|
|
}
|
|
|
|
/* Destroy the fixed-size block pool. */
|
|
NdisDestroyBlockPool(g_pending_conn_pool);
|
|
}
|
|
|
|
/* De-initialize the global connection queues. */
|
|
for (index = 0; index < CVY_MAX_CHASH; index++)
|
|
{
|
|
/* Free the spin locks. */
|
|
NdisFreeSpinLock(&g_conn_estabq[index].lock);
|
|
NdisFreeSpinLock(&g_conn_pendingq[index].lock);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Function: Load_teaming_consistency_notify
|
|
* Description: This function is called to notify a team in which this adapter
|
|
* might be participating whether the teaming configuration in the
|
|
* heartbeats is consistent or not. Inconsistent configuration
|
|
* results in the entire team being marked inactive - meaning that
|
|
* no adapter in the team will handle any traffic, except to the DIP.
|
|
* Parameters: member - a pointer to the team membership information for this adapter.
|
|
* consistent - a boolean indicating the polarity of teaming consistency.
|
|
* Returns: Nothing.
|
|
* Author: shouse, 3.29.01
|
|
* Notes: In order to check to see whether or not this adapter is part of a team,
|
|
* we need to look into the team member information for this adapter. This
|
|
* access should be locked, but for performance reasons, we will only lock
|
|
* and check for sure if we "think" we're part of a team. Worst case is that
|
|
* we are in the process of joining a team and we missed this check - no
|
|
* matter, we'll notify them when/if we see this again.
|
|
*/
|
|
VOID Load_teaming_consistency_notify (IN PBDA_MEMBER member, IN BOOL consistent) {
|
|
|
|
/* Make sure that the membership information points to something. */
|
|
UNIV_ASSERT(member);
|
|
|
|
/* We can check without locking to keep the common case minimally expensive. If we do think
|
|
we're part of a team, then we'll grab the lock and make sure. If our first indication is
|
|
that we're not part of a team, then just bail out and if we actually are part of a team,
|
|
we'll be through here again later to notify our team if necessary. */
|
|
if (!member->active)
|
|
return;
|
|
|
|
NdisAcquireSpinLock(&univ_bda_teaming_lock);
|
|
|
|
/* If we are an active member of a BDA team, then notify our team of our state. */
|
|
if (member->active) {
|
|
/* Assert that the team actually points to something. */
|
|
UNIV_ASSERT(member->bda_team);
|
|
|
|
/* Assert that the member ID is valid. */
|
|
UNIV_ASSERT(member->member_id <= CVY_BDA_MAXIMUM_MEMBER_ID);
|
|
|
|
if (consistent) {
|
|
UNIV_PRINT_VERB(("Load_teaming_consistency_notify: Consistent configuration detected."));
|
|
TRACE_VERB("%!FUNC! we are a consistent active member of a BDA team");
|
|
|
|
/* Mark this member as consistent. */
|
|
member->bda_team->consistency_map |= (1 << member->member_id);
|
|
} else {
|
|
UNIV_PRINT_VERB(("Load_teaming_consistency_notify: Inconsistent configuration detected."));
|
|
TRACE_VERB("%!FUNC! we are an inconsistent active member of a BDA team");
|
|
|
|
/* Mark this member as inconsistent. */
|
|
member->bda_team->consistency_map &= ~(1 << member->member_id);
|
|
|
|
/* Inactivate the team. */
|
|
member->bda_team->active = FALSE;
|
|
}
|
|
}
|
|
|
|
NdisReleaseSpinLock(&univ_bda_teaming_lock);
|
|
}
|
|
|
|
/*
|
|
* Function: Load_teaming_consistency_check
|
|
* Description: This function is used to check our teaming configuration against the
|
|
* teaming configuration received in a remote heartbeat. It does little
|
|
* more than check the equality of two DWORDS, however, if this is our
|
|
* first notification of bad configuration, it prints a few debug state-
|
|
* ments as well.
|
|
* Parameters: bAlreadyKnown - a boolean indication of whether or not we have already detected bad configuration.
|
|
* If the misconfiguration is already known, no additional logging is done.
|
|
* member - a pointer to the team member structure for this adapter.
|
|
* myConfig - a DWORD containing the teaming "code" for me.
|
|
* theirCofnig - a DWORD containing the teaming "code" received in the heartbeat from them.
|
|
* Returns: BOOLEAN (as ULONG) - TRUE means the configuration is consistent, FALSE indicates that it is not.
|
|
* Author: shouse, 3.29.01
|
|
* Notes: In order to check to see whether or not this adapter is part of a team,
|
|
* we need to look into the team member information for this adapter. This
|
|
* access should be locked, but for performance reasons, we will only lock
|
|
* and check for sure if we "think" we're part of a team. Worst case is that
|
|
* we are in the process of joining a team and we missed this check - no
|
|
* matter, we'll check again on the next heartbeat.
|
|
*/
|
|
ULONG Load_teaming_consistency_check (IN BOOLEAN bAlreadyKnown, IN PBDA_MEMBER member, IN ULONG myConfig, IN ULONG theirConfig, IN ULONG version) {
|
|
/* We can check without locking to keep the common case minimally expensive. If we do think
|
|
we're part of a team, then we'll grab the lock and make sure. If our first indication is
|
|
that we're not part of a team, then just bail out and if we actually are part of a team,
|
|
we'll be through here again later to check the consistency. */
|
|
if (!member->active)
|
|
return TRUE;
|
|
|
|
NdisAcquireSpinLock(&univ_bda_teaming_lock);
|
|
|
|
/* If we are part of a BDA team, check the BDA teaming configuration consistency. */
|
|
if (member->active) {
|
|
|
|
NdisReleaseSpinLock(&univ_bda_teaming_lock);
|
|
|
|
/* If the heartbeat is an NT4.0 or Win2k heartbeat, then we can't trust the teaming
|
|
ULONG in the heartbeat, which would contain some random garbage. In this case,
|
|
we know that we're teaming but the peer does not support it, so we bail out and
|
|
report an error. */
|
|
if (version < CVY_VERSION_FULL) {
|
|
if (!bAlreadyKnown) {
|
|
UNIV_PRINT_CRIT(("Load_teaming_consistency_check: Bad teaming configuration detected: NT4.0/Win2k host in a teaming cluster"));
|
|
TRACE_CRIT("%!FUNC! Bad teaming configuration detected: NT4.0/Win2k host in a teaming cluster");
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
/* If the bi-directional affinity teaming configurations don't match, do something about it. */
|
|
if (myConfig != theirConfig) {
|
|
if (!bAlreadyKnown) {
|
|
UNIV_PRINT_CRIT(("Load_teaming_consistency_check: Bad teaming configuration detected: Mine=0x%08x, Theirs=0x%08x", myConfig, theirConfig));
|
|
TRACE_CRIT("%!FUNC! Bad teaming configuration detected: Mine=0x%08x, Theirs=0x%08x", myConfig, theirConfig);
|
|
|
|
/* Report whether or not the teaming active flags are consistent. */
|
|
if ((myConfig & CVY_BDA_TEAMING_CODE_ACTIVE_MASK) != (theirConfig & CVY_BDA_TEAMING_CODE_ACTIVE_MASK)) {
|
|
UNIV_PRINT_VERB(("Load_teaming_consistency_check: Teaming active flags do not match: Mine=%d, Theirs=%d",
|
|
(myConfig & CVY_BDA_TEAMING_CODE_ACTIVE_MASK) >> CVY_BDA_TEAMING_CODE_ACTIVE_OFFSET,
|
|
(theirConfig & CVY_BDA_TEAMING_CODE_ACTIVE_MASK) >> CVY_BDA_TEAMING_CODE_ACTIVE_OFFSET));
|
|
TRACE_VERB("%!FUNC! Teaming active flags do not match: Mine=%d, Theirs=%d",
|
|
(myConfig & CVY_BDA_TEAMING_CODE_ACTIVE_MASK) >> CVY_BDA_TEAMING_CODE_ACTIVE_OFFSET,
|
|
(theirConfig & CVY_BDA_TEAMING_CODE_ACTIVE_MASK) >> CVY_BDA_TEAMING_CODE_ACTIVE_OFFSET);
|
|
}
|
|
|
|
/* Report whether or not the master flags are consistent. */
|
|
if ((myConfig & CVY_BDA_TEAMING_CODE_MASTER_MASK) != (theirConfig & CVY_BDA_TEAMING_CODE_MASTER_MASK)) {
|
|
UNIV_PRINT_VERB(("Load_teaming_consistency_check: Master/slave settings do not match: Mine=%d, Theirs=%d",
|
|
(myConfig & CVY_BDA_TEAMING_CODE_MASTER_MASK) >> CVY_BDA_TEAMING_CODE_MASTER_OFFSET,
|
|
(theirConfig & CVY_BDA_TEAMING_CODE_MASTER_MASK) >> CVY_BDA_TEAMING_CODE_MASTER_OFFSET));
|
|
TRACE_VERB("%!FUNC! Master/slave settings do not match: Mine=%d, Theirs=%d",
|
|
(myConfig & CVY_BDA_TEAMING_CODE_MASTER_MASK) >> CVY_BDA_TEAMING_CODE_MASTER_OFFSET,
|
|
(theirConfig & CVY_BDA_TEAMING_CODE_MASTER_MASK) >> CVY_BDA_TEAMING_CODE_MASTER_OFFSET);
|
|
}
|
|
|
|
/* Report whether or not the reverse hashing flags are consistent. */
|
|
if ((myConfig & CVY_BDA_TEAMING_CODE_HASHING_MASK) != (theirConfig & CVY_BDA_TEAMING_CODE_HASHING_MASK)) {
|
|
UNIV_PRINT_VERB(("Load_teaming_consistency_check: Reverse hashing flags do not match: Mine=%d, Theirs=%d",
|
|
(myConfig & CVY_BDA_TEAMING_CODE_HASHING_MASK) >> CVY_BDA_TEAMING_CODE_HASHING_OFFSET,
|
|
(theirConfig & CVY_BDA_TEAMING_CODE_HASHING_MASK) >> CVY_BDA_TEAMING_CODE_HASHING_OFFSET));
|
|
TRACE_VERB("%!FUNC! Reverse hashing flags do not match: Mine=%d, Theirs=%d",
|
|
(myConfig & CVY_BDA_TEAMING_CODE_HASHING_MASK) >> CVY_BDA_TEAMING_CODE_HASHING_OFFSET,
|
|
(theirConfig & CVY_BDA_TEAMING_CODE_HASHING_MASK) >> CVY_BDA_TEAMING_CODE_HASHING_OFFSET);
|
|
}
|
|
|
|
/* Report whether or not the number of team members is consistent. */
|
|
if ((myConfig & CVY_BDA_TEAMING_CODE_NUM_MEMBERS_MASK) != (theirConfig & CVY_BDA_TEAMING_CODE_NUM_MEMBERS_MASK)) {
|
|
UNIV_PRINT_VERB(("Load_teaming_consistency_check: Numbers of team members do not match: Mine=%d, Theirs=%d",
|
|
(myConfig & CVY_BDA_TEAMING_CODE_NUM_MEMBERS_MASK) >> CVY_BDA_TEAMING_CODE_NUM_MEMBERS_OFFSET,
|
|
(theirConfig & CVY_BDA_TEAMING_CODE_NUM_MEMBERS_MASK) >> CVY_BDA_TEAMING_CODE_NUM_MEMBERS_OFFSET));
|
|
TRACE_VERB("%!FUNC! Numbers of team members do not match: Mine=%d, Theirs=%d",
|
|
(myConfig & CVY_BDA_TEAMING_CODE_NUM_MEMBERS_MASK) >> CVY_BDA_TEAMING_CODE_NUM_MEMBERS_OFFSET,
|
|
(theirConfig & CVY_BDA_TEAMING_CODE_NUM_MEMBERS_MASK) >> CVY_BDA_TEAMING_CODE_NUM_MEMBERS_OFFSET);
|
|
}
|
|
|
|
/* Report whether or not the team membership lists are consistent. */
|
|
if ((myConfig & CVY_BDA_TEAMING_CODE_MEMBERS_MASK) != (theirConfig & CVY_BDA_TEAMING_CODE_MEMBERS_MASK)) {
|
|
UNIV_PRINT_VERB(("Load_teaming_consistency_check: Participating members lists do not match: Mine=0x%04x, Theirs=0x%04x",
|
|
(myConfig & CVY_BDA_TEAMING_CODE_MEMBERS_MASK) >> CVY_BDA_TEAMING_CODE_MEMBERS_OFFSET,
|
|
(theirConfig & CVY_BDA_TEAMING_CODE_MEMBERS_MASK) >> CVY_BDA_TEAMING_CODE_MEMBERS_OFFSET));
|
|
TRACE_VERB("%!FUNC! Participating members lists do not match: Mine=0x%04x, Theirs=0x%04x",
|
|
(myConfig & CVY_BDA_TEAMING_CODE_MEMBERS_MASK) >> CVY_BDA_TEAMING_CODE_MEMBERS_OFFSET,
|
|
(theirConfig & CVY_BDA_TEAMING_CODE_MEMBERS_MASK) >> CVY_BDA_TEAMING_CODE_MEMBERS_OFFSET);
|
|
}
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
NdisReleaseSpinLock(&univ_bda_teaming_lock);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
/*
|
|
* Function: Load_teaming_code_create
|
|
* Description: This function pieces together the ULONG code that represents the configuration
|
|
* of bi-directional affinity teaming on this adapter. If the adapter is not part
|
|
* of a team, then the code is zero.
|
|
* Parameters: code - a pointer to a ULONG that will receive the 32-bit code word.
|
|
* member - a pointer to the team member structure for this adapter.
|
|
* Returns: Nothing.
|
|
* Author: shouse, 3.29.01
|
|
* Notes: In order to check to see whether or not this adapter is part of a team,
|
|
* we need to look into the team member information for this adapter. This
|
|
* access should be locked, but for performance reasons, we will only lock
|
|
* and check for sure if we "think" we're part of a team. Worst case is that
|
|
* we are in the process of joining a team and we missed this check - no
|
|
* matter, we'll be through here the next time er send a heartbeat anyway.
|
|
*/
|
|
VOID Load_teaming_code_create (OUT PULONG code, IN PBDA_MEMBER member) {
|
|
|
|
/* Assert that the code actually points to something. */
|
|
UNIV_ASSERT(code);
|
|
|
|
/* Assert that the membership information actually points to something. */
|
|
UNIV_ASSERT(member);
|
|
|
|
/* Reset the code. */
|
|
*code = 0;
|
|
|
|
/* We can check without locking to keep the common case minimally expensive. If we do think
|
|
we're part of a team, then we'll grab the lock and make sure. If our first indication is
|
|
that we're not part of a team, then just bail out and if we actually are part of a team,
|
|
we'll be through here again later to generate the code next time we send a heartbeat. */
|
|
if (!member->active)
|
|
return;
|
|
|
|
NdisAcquireSpinLock(&univ_bda_teaming_lock);
|
|
|
|
/* If we are in a team, fill in the team configuration information. */
|
|
if (member->active) {
|
|
/* Assert that the team actually points to something. */
|
|
UNIV_ASSERT(member->bda_team);
|
|
|
|
/* Add configuration information for teaming at each timeout. */
|
|
CVY_BDA_TEAMING_CODE_CREATE(*code,
|
|
member->active,
|
|
member->master,
|
|
member->reverse_hash,
|
|
member->bda_team->membership_count,
|
|
member->bda_team->membership_fingerprint);
|
|
}
|
|
|
|
NdisReleaseSpinLock(&univ_bda_teaming_lock);
|
|
}
|
|
|
|
/*
|
|
* Function: Load_add_reference
|
|
* Description: This function adds a reference to the load module of a given adapter.
|
|
* Parameters: pLoad - a pointer to the load module to reference.
|
|
* Returns: ULONG - The incremented value.
|
|
* Author: shouse, 3.29.01
|
|
* Notes:
|
|
*/
|
|
ULONG Load_add_reference (IN PLOAD_CTXT pLoad) {
|
|
|
|
/* Assert that the load pointer actually points to something. */
|
|
UNIV_ASSERT(pLoad);
|
|
|
|
/* Increment the reference count. */
|
|
return NdisInterlockedIncrement(&pLoad->ref_count);
|
|
}
|
|
|
|
/*
|
|
* Function: Load_release_reference
|
|
* Description: This function releases a reference on the load module of a given adapter.
|
|
* Parameters: pLoad - a pointer to the load module to dereference.
|
|
* Returns: ULONG - The decremented value.
|
|
* Author: shouse, 3.29.01
|
|
* Notes:
|
|
*/
|
|
ULONG Load_release_reference (IN PLOAD_CTXT pLoad) {
|
|
|
|
/* Assert that the load pointer actually points to something. */
|
|
UNIV_ASSERT(pLoad);
|
|
|
|
/* Decrement the reference count. */
|
|
return NdisInterlockedDecrement(&pLoad->ref_count);
|
|
}
|
|
|
|
/*
|
|
* Function: Load_get_reference_count
|
|
* Description: This function returns the current load module reference count on a given adapter.
|
|
* Parameters: pLoad - a pointer to the load module to check.
|
|
* Returns: ULONG - The current reference count.
|
|
* Author: shouse, 3.29.01
|
|
* Notes:
|
|
*/
|
|
ULONG Load_get_reference_count (IN PLOAD_CTXT pLoad) {
|
|
|
|
/* Assert that the load pointer actually points to something. */
|
|
UNIV_ASSERT(pLoad);
|
|
|
|
/* Return the reference count. */
|
|
return pLoad->ref_count;
|
|
}
|
|
|
|
/* Hash routine is based on a public-domain Tiny Encryption Algorithm (TEA) by
|
|
David Wheeler and Roger Needham at the Computer Laboratory of Cambridge
|
|
University. For reference, please consult
|
|
http://vader.brad.ac.uk/tea/tea.shtml */
|
|
|
|
ULONG Map (
|
|
ULONG v1,
|
|
ULONG v2) /* v2.06: removed range parameter */
|
|
{
|
|
ULONG y = v1,
|
|
z = v2,
|
|
sum = 0;
|
|
|
|
const ULONG a = 0x67; //key [0];
|
|
const ULONG b = 0xdf; //key [1];
|
|
const ULONG c = 0x40; //key [2];
|
|
const ULONG d = 0xd3; //key [3];
|
|
|
|
const ULONG delta = 0x9E3779B9;
|
|
|
|
//
|
|
// Unroll the loop to improve performance
|
|
//
|
|
sum += delta;
|
|
y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
|
|
z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
|
|
|
|
sum += delta;
|
|
y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
|
|
z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
|
|
|
|
sum += delta;
|
|
y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
|
|
z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
|
|
|
|
sum += delta;
|
|
y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
|
|
z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
|
|
|
|
sum += delta;
|
|
y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
|
|
z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
|
|
|
|
sum += delta;
|
|
y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
|
|
z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
|
|
|
|
sum += delta;
|
|
y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
|
|
z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
|
|
|
|
sum += delta;
|
|
y += (z << 4) + a ^ z + sum ^ (z >> 5) + b;
|
|
z += (y << 4) + c ^ y + sum ^ (y >> 5) + d;
|
|
|
|
return y ^ z;
|
|
} /* end Map */
|
|
|
|
/*
|
|
* Function: Load_simple_hash
|
|
* Description: This function is a simple hash based on the IP 4-tuple used to locate
|
|
* state for the connection. That is, this hash is used to determine the
|
|
* queue index in which this connection should store, and can later find,
|
|
* its state.
|
|
* Parameters: svr_ipaddr - the server IP address in network byte order
|
|
* svr_port - the server port in host byte order
|
|
* client_ipaddr - the client IP address in network byte order
|
|
* client_port - the client port in host byte order
|
|
* Returns: ULONG - the result of the hash.
|
|
* Author: shouse, 4.15.02
|
|
* Notes:
|
|
*/
|
|
ULONG Load_simple_hash (
|
|
ULONG svr_ipaddr,
|
|
ULONG svr_port,
|
|
ULONG client_ipaddr,
|
|
ULONG client_port)
|
|
{
|
|
return (ULONG)(svr_ipaddr + client_ipaddr + (svr_port << 16) + (client_port << 0));
|
|
}
|
|
|
|
/*
|
|
* Function: Load_complex_hash
|
|
* Description: This is the conventional NLB hashing algorithm, which ends up invoking a
|
|
* light-weight encryption algorithm to calculate a hash that is ultimately
|
|
* used to map this connection to a bin, or "bucket". If reverse hashing
|
|
* is set, then server side parameters are used instead of client side. If
|
|
* limiting is set, then client and server side paramters should NOT be mixed
|
|
* when hashing; i.e. use ONLY server OR client, depending on reverse hashing.
|
|
* Parameters: svr_ipaddr - the server IP address in network byte order
|
|
* svr_port - the server port in host byte order
|
|
* client_ipaddr - the client IP address in network byte order
|
|
* client_port - the client port in host byte order
|
|
* affinity - the client affinity (None, Single or Class C)
|
|
* reverse_hash - whether or not to reverse client and server during hashing
|
|
* limit_map_fn - whether or not to include server-side parameters in hashing
|
|
* Returns: ULONG - the result of the hash.
|
|
* Author: shouse, 4.15.02
|
|
* Notes:
|
|
*/
|
|
ULONG Load_complex_hash (
|
|
ULONG svr_ipaddr,
|
|
ULONG svr_port,
|
|
ULONG client_ipaddr,
|
|
ULONG client_port,
|
|
ULONG affinity,
|
|
ULONG reverse_hash,
|
|
ULONG limit_map_fn)
|
|
{
|
|
/* If we're not reverse-hashing, this is our conventional hash using primarily
|
|
the client information. If the map limit flag is set, then we are sure NOT
|
|
to use ANY server-side information in the hash. This is most common in BDA. */
|
|
if (!reverse_hash)
|
|
{
|
|
if (!limit_map_fn)
|
|
{
|
|
if (affinity == CVY_AFFINITY_NONE)
|
|
return Map(client_ipaddr, ((svr_port << 16) + client_port));
|
|
else if (affinity == CVY_AFFINITY_SINGLE)
|
|
return Map(client_ipaddr, svr_ipaddr);
|
|
else
|
|
return Map(client_ipaddr & TCPIP_CLASSC_MASK, svr_ipaddr);
|
|
}
|
|
else
|
|
{
|
|
if (affinity == CVY_AFFINITY_NONE)
|
|
return Map(client_ipaddr, client_port);
|
|
else if (affinity == CVY_AFFINITY_SINGLE)
|
|
return Map(client_ipaddr, MAP_FN_PARAMETER);
|
|
else
|
|
return Map(client_ipaddr & TCPIP_CLASSC_MASK, MAP_FN_PARAMETER);
|
|
}
|
|
}
|
|
/* Otherwise, reverse the client and server information as we hash. Again, if
|
|
the map limit flag is set, use NO client-side information in the hash. */
|
|
else
|
|
{
|
|
if (!limit_map_fn)
|
|
{
|
|
if (affinity == CVY_AFFINITY_NONE)
|
|
return Map(svr_ipaddr, ((client_port << 16) + svr_port));
|
|
else if (affinity == CVY_AFFINITY_SINGLE)
|
|
return Map(svr_ipaddr, client_ipaddr);
|
|
else
|
|
return Map(svr_ipaddr & TCPIP_CLASSC_MASK, client_ipaddr);
|
|
}
|
|
else
|
|
{
|
|
if (affinity == CVY_AFFINITY_NONE)
|
|
return Map(svr_ipaddr, svr_port);
|
|
else if (affinity == CVY_AFFINITY_SINGLE)
|
|
return Map(svr_ipaddr, MAP_FN_PARAMETER);
|
|
else
|
|
return Map(svr_ipaddr & TCPIP_CLASSC_MASK, MAP_FN_PARAMETER);
|
|
}
|
|
}
|
|
}
|
|
|
|
BOOLEAN Bin_targ_map_get(
|
|
PLOAD_CTXT lp,
|
|
PBIN_STATE binp, /* ptr. to bin state */
|
|
ULONG my_host_id,
|
|
PMAP_T pmap) /* ptr. to target map */
|
|
/*
|
|
Get target map for this host
|
|
|
|
returns BOOLEAN:
|
|
TRUE => valid target map is returned via pmap
|
|
FALSE => error occurred; no target map returned
|
|
*/
|
|
{
|
|
ULONG remsz, /* remainder size */
|
|
loadsz, /* size of a load partition */
|
|
first_bit; /* first bit position of load partition */
|
|
MAP_T targ_map; /* bit map of load bins for this host */
|
|
ULONG tot_load = 0; /* total of load perecentages */
|
|
ULONG * pload_list; /* ptr. to list of load balance perecntages */
|
|
WCHAR num [20];
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
|
|
|
|
pload_list = binp->load_amt;
|
|
|
|
if (binp->mode == CVY_SINGLE)
|
|
{
|
|
ULONG max_pri; /* highest priority */
|
|
ULONG i;
|
|
|
|
first_bit = 0;
|
|
|
|
/* compute max priority */
|
|
|
|
max_pri = CVY_MAX_HOSTS + 1;
|
|
|
|
for (i=0; i<CVY_MAX_HOSTS; i++)
|
|
{
|
|
tot_load += pload_list[i]; /* v2.1 */
|
|
|
|
if (pload_list[i] != 0)
|
|
{
|
|
//
|
|
// If another host has the same priority as this host, do not converge
|
|
//
|
|
if (i!= my_host_id && pload_list[i] == pload_list[my_host_id])
|
|
{
|
|
if (!(lp->dup_sspri))
|
|
{
|
|
UNIV_PRINT_CRIT(("Bin_targ_map_get: Host %d: Duplicate single svr priorities detected", my_host_id));
|
|
TRACE_CRIT("%!FUNC! Host %d: Duplicate single svr priorities detected", my_host_id);
|
|
Univ_ulong_to_str (pload_list[my_host_id], num, 10);
|
|
LOG_MSG(MSG_ERROR_SINGLE_DUP, num);
|
|
|
|
lp->dup_sspri = TRUE;
|
|
}
|
|
|
|
/* 1.03: return error, which inhibits convergence; note that
|
|
rule will be automatically reinstated when duplicate server
|
|
priorities are eliminated */
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
if ( pload_list[i] <= max_pri )
|
|
{
|
|
max_pri = pload_list[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
binp->tot_load = tot_load; /* v2.1 */
|
|
|
|
/* now determine if we are the highest priority host */
|
|
|
|
if (pload_list[my_host_id] == max_pri)
|
|
{
|
|
loadsz = CVY_MAXBINS;
|
|
targ_map = BIN_ALL_ONES; /* v2.05 */
|
|
}
|
|
else
|
|
{
|
|
loadsz = 0;
|
|
targ_map = 0; /* v2.05 */
|
|
}
|
|
}
|
|
|
|
else /* load balanced */
|
|
{
|
|
ULONG i, j;
|
|
ULONG partsz[CVY_MAX_HOSTS+1];
|
|
/* new partition size per host */
|
|
ULONG cur_partsz[CVY_MAX_HOSTS+1];
|
|
/* current partition size per host (v2.05) */
|
|
ULONG cur_host[CVY_MAXBINS];
|
|
/* current host for each bin (v2.05) */
|
|
ULONG tot_partsz; /* sum of partition sizes */
|
|
ULONG donor; /* current donor host (v2.05) */
|
|
ULONG cur_nbins; /* current # bins (v2.05) */
|
|
|
|
/* setup current partition sizes and bin to host mapping from current map (v2.05) */
|
|
|
|
cur_nbins = 0;
|
|
|
|
for (j=0; j<CVY_MAXBINS; j++)
|
|
cur_host[j] = CVY_MAX_HOSTS; /* all bins are initially orphans */
|
|
|
|
for (i=0; i<CVY_MAX_HOSTS; i++)
|
|
{
|
|
ULONG count = 0L;
|
|
MAP_T cmap = binp->cur_map[i];
|
|
|
|
tot_load += pload_list[i]; /* folded into this loop v2.1 */
|
|
|
|
for (j=0; j<CVY_MAXBINS && cmap != ((MAP_T)0); j++)
|
|
{
|
|
/* if host i has bin j and it's not a duplicate, set up the mapping */
|
|
|
|
if ((cmap & ((MAP_T)0x1)) != ((MAP_T)0) && cur_host[j] == CVY_MAX_HOSTS)
|
|
{
|
|
count++;
|
|
cur_host[j] = i;
|
|
}
|
|
cmap >>= 1;
|
|
}
|
|
|
|
cur_partsz[i] = count;
|
|
cur_nbins += count;
|
|
}
|
|
|
|
if (cur_nbins > CVY_MAXBINS)
|
|
{
|
|
UNIV_PRINT_CRIT(("Bin_targ_map_get: Error - too many bins found"));
|
|
TRACE_CRIT("%!FUNC! Error - too many bins found");
|
|
LOG_MSG(MSG_ERROR_INTERNAL, MSG_NONE);
|
|
|
|
cur_nbins = CVY_MAXBINS;
|
|
}
|
|
|
|
/* if there are orphan bins, give them to pseudo-host CVY_MAX_HOSTS for now (v2.05) */
|
|
|
|
if (cur_nbins < CVY_MAXBINS)
|
|
cur_partsz[CVY_MAX_HOSTS] = CVY_MAXBINS - cur_nbins;
|
|
else
|
|
cur_partsz[CVY_MAX_HOSTS] = 0;
|
|
|
|
/* compute total load */
|
|
|
|
binp->tot_load = tot_load; /* v2.06 */
|
|
|
|
/* now compute tentative partition sizes and remainder after initially
|
|
dividing up partitions among hosts */
|
|
|
|
tot_partsz = 0;
|
|
first_bit = 0;
|
|
|
|
for (i=0; i<CVY_MAX_HOSTS; i++)
|
|
{
|
|
if (tot_load > 0)
|
|
partsz[i] = CVY_MAXBINS * pload_list[i] / tot_load;
|
|
else
|
|
partsz[i] = 0;
|
|
|
|
tot_partsz += partsz[i];
|
|
}
|
|
|
|
remsz = CVY_MAXBINS - tot_partsz;
|
|
|
|
/* check for zero total load */
|
|
|
|
if (tot_partsz == 0)
|
|
{
|
|
* pmap = 0;
|
|
return TRUE;
|
|
}
|
|
|
|
/* first dole out remainder bits to hosts that currently have bins (this
|
|
minimizes the number of bins that have to move) v2.05 */
|
|
|
|
if (remsz > 0)
|
|
{
|
|
for (i=0; i<CVY_MAX_HOSTS && remsz > 0; i++)
|
|
if (cur_partsz[i] > 0 && pload_list[i] > 0)
|
|
{
|
|
partsz[i]++;
|
|
remsz--;
|
|
}
|
|
}
|
|
|
|
/* now dole out remainder bits to hosts that currently have no bins (to maintain
|
|
the target load balance) v2.05 */
|
|
|
|
if (remsz > 0)
|
|
{
|
|
for (i=0; i<CVY_MAX_HOSTS && remsz > 0; i++)
|
|
if (cur_partsz[i] == 0 && pload_list[i] > 0)
|
|
{
|
|
partsz[i]++;
|
|
remsz--;
|
|
}
|
|
}
|
|
|
|
/* We MUST be out of bins by now. */
|
|
UNIV_ASSERT(remsz == 0);
|
|
|
|
if (remsz != 0)
|
|
{
|
|
UNIV_PRINT_CRIT(("Bin_targ_map_get: Bins left over (%u) after handing out to all hosts with and without bins!", remsz));
|
|
TRACE_CRIT("%!FUNC! Bins left over (%u) after handing out to all hosts with and without bins!", remsz);
|
|
}
|
|
|
|
/* reallocate bins to target hosts to match new partition sizes (v2.05) */
|
|
|
|
donor = 0;
|
|
partsz[CVY_MAX_HOSTS] = 0; /* pseudo-host needs no bins */
|
|
|
|
for (i=0; i<CVY_MAX_HOSTS; i++)
|
|
{
|
|
ULONG rcvrsz; /* current receiver's target partition */
|
|
ULONG donorsz; /* current donor's target partition size */
|
|
|
|
/* find and give this host some bins */
|
|
|
|
rcvrsz = partsz[i];
|
|
|
|
while (rcvrsz > cur_partsz[i])
|
|
{
|
|
/* find a host with too many bins */
|
|
|
|
for (; donor < CVY_MAX_HOSTS; donor++)
|
|
if (partsz[donor] < cur_partsz[donor])
|
|
break;
|
|
|
|
/* if donor is pseudo-host and it's out of bins, give it more bins
|
|
to keep algorithm from looping; this should never happen */
|
|
|
|
if (donor >= CVY_MAX_HOSTS && cur_partsz[donor] == 0)
|
|
{
|
|
UNIV_PRINT_CRIT(("Bin_targ_map_get: Error - no donor bins"));
|
|
TRACE_CRIT("%!FUNC! Error - no donor bins");
|
|
LOG_MSG(MSG_ERROR_INTERNAL, MSG_NONE);
|
|
cur_partsz[donor] = CVY_MAXBINS;
|
|
}
|
|
|
|
/* now find the donor's bins and give them to the target host */
|
|
|
|
donorsz = partsz[donor]; /* donor's target bin count */
|
|
|
|
for (j=0; j<CVY_MAXBINS; j++)
|
|
{
|
|
if (cur_host[j] == donor)
|
|
{
|
|
cur_host[j] = i;
|
|
cur_partsz[donor]--;
|
|
cur_partsz[i]++;
|
|
|
|
/* if this donor has no more to give, go find the next donor;
|
|
if this receiver needs no more, go on to next receiver */
|
|
|
|
if (donorsz == cur_partsz[donor] || rcvrsz == cur_partsz[i])
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* if no bin was found, log a fatal error and exit */
|
|
|
|
if (j == CVY_MAXBINS)
|
|
{
|
|
UNIV_PRINT_CRIT(("Bin_targ_map_get: Error - no bin found"));
|
|
TRACE_CRIT("%!FUNC! Error - no bin found");
|
|
LOG_MSG(MSG_ERROR_INTERNAL, MSG_NONE);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* finally, compute bit mask for this host (v2.05) */
|
|
|
|
targ_map = 0;
|
|
|
|
for (j=0; j<CVY_MAXBINS; j++)
|
|
{
|
|
if (cur_host[j] == CVY_MAX_HOSTS)
|
|
{
|
|
UNIV_PRINT_CRIT(("Bin_targ_map_get: Error - incomplete mapping"));
|
|
TRACE_CRIT("%!FUNC! Error - incomplete mapping");
|
|
LOG_MSG(MSG_ERROR_INTERNAL, MSG_NONE);
|
|
cur_host[j] = 0;
|
|
}
|
|
|
|
if (cur_host[j] == my_host_id)
|
|
targ_map |= ((MAP_T)1) << j;
|
|
}
|
|
}
|
|
|
|
* pmap = targ_map;
|
|
|
|
return TRUE;
|
|
|
|
} /* end Bin_targ_map_get */
|
|
|
|
|
|
BOOLEAN Bin_map_check(
|
|
ULONG tot_load, /* total load percentage (v2.06) */
|
|
PMAP_T pbin_map) /* bin map for all hosts */
|
|
{
|
|
MAP_T tot_map, /* total map for all hosts */
|
|
ovr_map, /* overlap map between hosts */
|
|
exp_tot_map; /* expected total map */
|
|
ULONG i;
|
|
|
|
|
|
/* compute expected total map (2.04) */
|
|
|
|
if (tot_load == 0) /* v2.06 */
|
|
{
|
|
return TRUE;
|
|
}
|
|
else
|
|
{
|
|
exp_tot_map = BIN_ALL_ONES;
|
|
}
|
|
|
|
/* compute total map and overlap map */
|
|
|
|
tot_map = ovr_map = 0;
|
|
|
|
for (i=0; i<CVY_MAX_HOSTS; i++)
|
|
{
|
|
ovr_map |= (pbin_map[i] & tot_map);
|
|
tot_map |= pbin_map[i];
|
|
}
|
|
|
|
if (tot_map == exp_tot_map && ovr_map == 0)
|
|
{
|
|
return TRUE;
|
|
}
|
|
else
|
|
{
|
|
return FALSE;
|
|
}
|
|
|
|
} /* end Bin_map_check */
|
|
|
|
|
|
BOOLEAN Bin_map_covering(
|
|
ULONG tot_load, /* total load percentage (v2.06) */
|
|
PMAP_T pbin_map) /* bin map for all hosts */
|
|
{
|
|
MAP_T tot_map, /* total map for all hosts */
|
|
exp_tot_map; /* expected total map */
|
|
ULONG i;
|
|
|
|
|
|
/* compute expected total map (v2.04) */
|
|
|
|
if (tot_load == 0) /* v2.06 */
|
|
{
|
|
return TRUE;
|
|
}
|
|
else
|
|
{
|
|
exp_tot_map = BIN_ALL_ONES;
|
|
}
|
|
|
|
/* compute total map and overlap map */
|
|
|
|
tot_map = 0;
|
|
|
|
for (i=0; i<CVY_MAX_HOSTS; i++)
|
|
{
|
|
tot_map |= pbin_map[i];
|
|
}
|
|
|
|
if (tot_map == exp_tot_map)
|
|
{
|
|
return TRUE;
|
|
}
|
|
else
|
|
{
|
|
return FALSE;
|
|
}
|
|
|
|
} /* end Bin_map_covering */
|
|
|
|
|
|
void Bin_state_init(
|
|
PLOAD_CTXT lp,
|
|
PBIN_STATE binp, /* ptr. to bin state */
|
|
ULONG index, /* index of bin state */
|
|
ULONG my_host_id,
|
|
ULONG mode,
|
|
ULONG prot,
|
|
BOOLEAN equal_bal, /* TRUE => balance equally across hosts */
|
|
USHORT affinity,
|
|
ULONG load_amt) /* this host's load percentage if unequal */
|
|
/*
|
|
Initialize bin state for a port group
|
|
*/
|
|
{
|
|
ULONG i; /* loop variable */
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
|
|
|
|
if ((equal_bal && mode == CVY_SINGLE) ||
|
|
(mode == CVY_SINGLE && load_amt > CVY_MAX_HOSTS) ||
|
|
index >= CVY_MAXBINS)
|
|
{
|
|
UNIV_ASSERT(FALSE); // This should never happen
|
|
}
|
|
|
|
binp->code = CVY_BINCODE; /* (bbain 8/19/99) */
|
|
binp->equal_bal = equal_bal;
|
|
binp->affinity = affinity;
|
|
binp->index = index;
|
|
binp->compatible = TRUE;
|
|
binp->mode = mode;
|
|
binp->prot = prot;
|
|
|
|
/* initialize target and new load maps */
|
|
|
|
binp->targ_map = 0;
|
|
binp->all_idle_map = BIN_ALL_ONES;
|
|
binp->cmap = 0; /* v2.1 */
|
|
|
|
for (i=0; i<CVY_MAX_HOSTS; i++)
|
|
{
|
|
binp->new_map[i] = 0;
|
|
binp->cur_map[i] = 0;
|
|
binp->chk_map[i] = 0;
|
|
binp->idle_map[i] = BIN_ALL_ONES;
|
|
}
|
|
|
|
/* initialize load percentages for all hosts */
|
|
|
|
if (equal_bal)
|
|
{
|
|
load_amt = CVY_EQUAL_LOAD;
|
|
}
|
|
|
|
binp->tot_load = load_amt;
|
|
|
|
for (i=0; i<CVY_MAX_HOSTS; i++)
|
|
{
|
|
if (i == my_host_id)
|
|
{
|
|
binp->orig_load_amt =
|
|
binp->load_amt[i] = load_amt;
|
|
}
|
|
else
|
|
binp->load_amt[i] = 0;
|
|
}
|
|
|
|
/* initialize requesting state to no requests active and all bins local or none */
|
|
|
|
binp->snd_bins = 0;
|
|
binp->rcv_bins = 0;
|
|
binp->rdy_bins = 0;
|
|
binp->idle_bins = BIN_ALL_ONES; /* we are initially idle */
|
|
|
|
/* perform first initialization only once (v2.06) */
|
|
|
|
if (!(binp->initialized))
|
|
{
|
|
binp->tconn = 0;
|
|
|
|
for (i=0; i<CVY_MAXBINS; i++)
|
|
{
|
|
binp->nconn[i] = 0;
|
|
}
|
|
|
|
Queue_init(&(binp->connq));
|
|
binp->initialized = TRUE;
|
|
}
|
|
|
|
/* Initialize the performance counters. */
|
|
binp->packets_accepted = 0;
|
|
binp->packets_dropped = 0;
|
|
binp->bytes_accepted = 0;
|
|
binp->bytes_dropped = 0;
|
|
|
|
} /* end Bin_state_init */
|
|
|
|
|
|
BOOLEAN Bin_converge(
|
|
PLOAD_CTXT lp,
|
|
PBIN_STATE binp, /* ptr. to bin state */
|
|
ULONG my_host_id)
|
|
/*
|
|
Explicitly attempt to converge new port group state
|
|
|
|
returns BOOL:
|
|
TRUE => all hosts have consistent new state for converging
|
|
FALSE => parameter error or inconsistent convergence state
|
|
*/
|
|
{
|
|
MAP_T orphan_map; /* map of orphans that this host will now own */
|
|
ULONG i;
|
|
BOOLEAN fCheckMap = FALSE;
|
|
|
|
|
|
/* determine new target load map; 1.03: return in error if no map generated */
|
|
|
|
if (!Bin_targ_map_get(lp, binp, my_host_id, &(binp->targ_map)))
|
|
{
|
|
return FALSE;
|
|
}
|
|
|
|
/* compute map of all currently orphan bins; note that all duplicates are
|
|
considered to be orphans */
|
|
|
|
orphan_map = 0;
|
|
for (i=0; i<CVY_MAX_HOSTS; i++)
|
|
orphan_map |= binp->cur_map[i];
|
|
|
|
orphan_map = ~orphan_map;
|
|
|
|
/* update our new map to include all current bins and orphans that are in the
|
|
target set */
|
|
|
|
binp->new_map[my_host_id] = binp->cmap | /* v2.1 */
|
|
(binp->targ_map & orphan_map); /* 1.03 */
|
|
|
|
/* check that new load maps are consistent and covering */
|
|
|
|
fCheckMap = Bin_map_check(binp->tot_load, binp->new_map); /* v2.06 */
|
|
return fCheckMap;
|
|
|
|
} /* end Bin_converge */
|
|
|
|
|
|
void Bin_converge_commit(
|
|
PLOAD_CTXT lp,
|
|
PBIN_STATE binp, /* ptr. to bin state */
|
|
ULONG my_host_id)
|
|
/*
|
|
Commit to new port group state
|
|
*/
|
|
{
|
|
ULONG i;
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
MAP_T old_cmap = binp->cmap;
|
|
|
|
/* check that new load maps are consistent and covering */
|
|
|
|
if (!(Bin_map_check(binp->tot_load, binp->new_map))) /* v2.06 */
|
|
{
|
|
if (!(lp->bad_map))
|
|
{
|
|
UNIV_PRINT_CRIT(("Bin_converge_commit: Bad new map"));
|
|
TRACE_CRIT("%!FUNC! Bad new map");
|
|
LOG_MSG1(MSG_ERROR_INTERNAL, MSG_NONE, (ULONG_PTR)binp->new_map);
|
|
|
|
lp->bad_map = TRUE;
|
|
}
|
|
}
|
|
|
|
/* commit to new current maps */
|
|
|
|
for (i=0; i<CVY_MAX_HOSTS; i++)
|
|
{
|
|
binp->chk_map[i] =
|
|
binp->cur_map[i] = binp->new_map[i];
|
|
}
|
|
|
|
/* setup new send/rcv bins, and new ready to ship bins; note that ready to
|
|
ship bins are cleared from the current map */
|
|
|
|
binp->rdy_bins = binp->cur_map[my_host_id] & ~(binp->targ_map); /* 1.03 */
|
|
|
|
binp->cur_map[my_host_id] &= ~(binp->rdy_bins);
|
|
|
|
binp->rcv_bins = binp->targ_map & ~(binp->cur_map[my_host_id]);
|
|
|
|
binp->cmap = binp->cur_map[my_host_id]; /* v2.1 */
|
|
|
|
/* If the port rule map has changed, reset the performance counters. */
|
|
if (binp->cmap != old_cmap) {
|
|
binp->packets_accepted = 0;
|
|
binp->packets_dropped = 0;
|
|
binp->bytes_accepted = 0;
|
|
binp->bytes_dropped = 0;
|
|
}
|
|
|
|
#if 0
|
|
/* simulation output generator (2.05) */
|
|
{
|
|
ULONG lcount = 0L;
|
|
ULONG ncount = 0L;
|
|
MAP_T bins = binp->rdy_bins;
|
|
|
|
for (i=0; i<CVY_MAXBINS && bins != 0; i++, bins >>= 1)
|
|
if ((bins & ((MAP_T)0x1)) != ((MAP_T)0))
|
|
lcount++;
|
|
|
|
bins = binp->targ_map;
|
|
|
|
for (i=0; i<CVY_MAXBINS && bins != 0; i++, bins >>= 1)
|
|
if ((bins & ((MAP_T)0x1)) != ((MAP_T)0))
|
|
ncount++;
|
|
|
|
UNIV_PRINT_VERB(("Converge at host %d pg %d: losing %d, will have %d bins\n", my_host_id, binp->index, lcount, ncount));
|
|
}
|
|
#endif
|
|
|
|
} /* end Bin_converge_commit */
|
|
|
|
|
|
BOOLEAN Bin_host_update(
|
|
PLOAD_CTXT lp,
|
|
PBIN_STATE binp, /* ptr. to bin state */
|
|
ULONG my_host_id, /* my host's id MINUS one */
|
|
BOOLEAN converging, /* TRUE => we are converging now */
|
|
BOOLEAN rem_converging, /* TRUE => remote host is converging */
|
|
ULONG rem_host, /* remote host's id MINUS one */
|
|
MAP_T cur_map, /* remote host's current map or 0 if host died */
|
|
MAP_T new_map, /* remote host's new map if converging */
|
|
MAP_T idle_map, /* remote host's idle map */
|
|
MAP_T rdy_bins, /* bins that host is ready to send; ignored
|
|
if converging to prevent bin transfers */
|
|
ULONG pkt_count, /* remote host's packet count */
|
|
ULONG load_amt) /* remote host's load percentage */
|
|
/*
|
|
Update hosts's state for a port group
|
|
|
|
returns BOOL:
|
|
TRUE => if not converging, normal return
|
|
otherwise, all hosts have consistent state for converging
|
|
FALSE => parameter error or inconsistent convergence state
|
|
|
|
function:
|
|
Updates hosts's state for a port group and attempts to converge new states if
|
|
in convergence mode. Called when a ping message is received or when a host
|
|
is considered to have died. Handles case of newly discovered hosts. Can be
|
|
called multiple times with the same information.
|
|
*/
|
|
{
|
|
ULONG i;
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
|
|
|
|
if (rem_host >= CVY_MAX_HOSTS || rem_host == my_host_id)
|
|
{
|
|
UNIV_PRINT_CRIT(("Bin_host_update: Parameter error"));
|
|
TRACE_CRIT("%!FUNC! Parameter error");
|
|
LOG_MSG2(MSG_ERROR_INTERNAL, MSG_NONE, rem_host+1, my_host_id+1);
|
|
return FALSE;
|
|
}
|
|
|
|
UNIV_ASSERT(binp->code == CVY_BINCODE); /* (bbain 8/19/99) */
|
|
|
|
/* change load percentage if load changed */
|
|
|
|
if (load_amt != binp->load_amt[rem_host])
|
|
{
|
|
binp->load_amt[rem_host] = load_amt;
|
|
}
|
|
|
|
|
|
/* check for non-overlapping maps */
|
|
|
|
if ((binp->cmap & cur_map) != 0) /* v2.1 */
|
|
{
|
|
/* if we have received fewer packets than the other host or have a higher host id,
|
|
remove duplicates from current map; this uses a heuristic that a newly joining
|
|
host that was subnetted probably did not receive packets; we are trying to avoid
|
|
having two hosts answer to the same client while minimizing disruption of service
|
|
(v1.32B) */
|
|
|
|
if (lp->send_msg.pkt_count < pkt_count ||
|
|
(lp->send_msg.pkt_count == pkt_count && rem_host < my_host_id))
|
|
{
|
|
MAP_T dup_map;
|
|
|
|
dup_map = binp->cmap & cur_map; /* v2.1 */
|
|
|
|
binp->cur_map[my_host_id] &= ~dup_map;
|
|
binp->cmap = binp->cur_map[my_host_id]; /* v2.1 */
|
|
|
|
/* If there has been a collision, reset the performance counters. */
|
|
binp->packets_accepted = 0;
|
|
binp->packets_dropped = 0;
|
|
binp->bytes_accepted = 0;
|
|
binp->bytes_dropped = 0;
|
|
|
|
Load_conn_kill(lp, binp);
|
|
}
|
|
|
|
if (!converging && !rem_converging)
|
|
{
|
|
if (!(lp->overlap_maps))
|
|
{
|
|
UNIV_PRINT_CRIT(("Bin_host_update: Host %d: Two hosts with overlapping maps detected %d.", my_host_id, binp->index));
|
|
TRACE_CRIT("%!FUNC! Host %d: Two hosts with overlapping maps detected %d.", my_host_id, binp->index);
|
|
LOG_MSG2(MSG_WARN_OVERLAP, MSG_NONE, my_host_id+1, binp->index);
|
|
|
|
lp->overlap_maps = TRUE;
|
|
}
|
|
|
|
/* force convergence if in normal operations */
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
/* now update remote host's current map */
|
|
|
|
binp->cur_map[rem_host] = cur_map;
|
|
|
|
/* update idle map and calculate new global idle map if it's changed */
|
|
|
|
if (binp->idle_map[rem_host] != idle_map)
|
|
{
|
|
MAP_T saved_map = binp->all_idle_map;
|
|
MAP_T new_idle_map = BIN_ALL_ONES;
|
|
MAP_T tmp_map;
|
|
|
|
binp->idle_map[rem_host] = idle_map;
|
|
|
|
/* compute new idle map for all other hosts */
|
|
|
|
for (i=0; i<CVY_MAX_HOSTS; i++)
|
|
if (i != my_host_id)
|
|
new_idle_map &= binp->idle_map[i];
|
|
|
|
binp->all_idle_map = new_idle_map;
|
|
|
|
/* see which locally owned bins have gone idle in all other hosts */
|
|
|
|
tmp_map = new_idle_map & (~saved_map) & binp->cmap; /* v2.1 */
|
|
|
|
if (tmp_map != 0)
|
|
{
|
|
UNIV_PRINT_VERB(("Bin_host_update: Host %d pg %d: detected new all idle %08x for local bins",
|
|
my_host_id, binp->index, tmp_map));
|
|
TRACE_VERB("%!FUNC! Host %d pg %d: detected new all idle 0x%08x for local bins",
|
|
my_host_id, binp->index, (ULONG)tmp_map);
|
|
}
|
|
|
|
tmp_map = saved_map & (~new_idle_map) & binp->cmap; /* v2.1 */
|
|
|
|
if (tmp_map != 0)
|
|
{
|
|
UNIV_PRINT_VERB(("Bin_host_update: Host %d pg %d: detected new non-idle %08x for local bins",
|
|
my_host_id, binp->index, tmp_map));
|
|
TRACE_VERB("%!FUNC! Host %d pg %d: detected new non-idle 0x%08x for local bins",
|
|
my_host_id, binp->index, (ULONG)tmp_map);
|
|
}
|
|
}
|
|
/* 1.03: eliminated else clause */
|
|
|
|
/* if we are not converging AND other host not converging, exchange bins;
|
|
convergence must now be complete for both hosts */
|
|
|
|
if (!converging)
|
|
{
|
|
if (!rem_converging) { /* 1.03: reorganized code to exchange bins only when both
|
|
hosts are not converging to avoid using stale bins */
|
|
|
|
MAP_T new_bins; /* incoming bins from the remote host */
|
|
MAP_T old_cmap = binp->cmap;
|
|
|
|
/* check to see if remote host has received some bins from us */
|
|
|
|
binp->rdy_bins &= (~cur_map);
|
|
|
|
/* check to see if we can receive some bins */
|
|
|
|
new_bins = binp->rcv_bins & rdy_bins;
|
|
|
|
if (new_bins != 0)
|
|
{
|
|
if ((binp->cmap & new_bins) != 0) /* v2.1 */
|
|
{
|
|
if (!(lp->err_rcving_bins))
|
|
{
|
|
UNIV_PRINT_CRIT(("Bin_host_update: Receiving bins already own"));
|
|
TRACE_CRIT("%!FUNC! Receiving bins already own");
|
|
LOG_MSG2(MSG_ERROR_INTERNAL, MSG_NONE, binp->cur_map[my_host_id], new_bins);
|
|
|
|
lp->err_rcving_bins = TRUE;
|
|
}
|
|
}
|
|
|
|
binp->cur_map[my_host_id] |= new_bins;
|
|
binp->rcv_bins &= ~new_bins;
|
|
|
|
binp->cmap = binp->cur_map[my_host_id]; /* v2.1 */
|
|
|
|
/* If the port rule map has changed, reset the performance counters. */
|
|
if (binp->cmap != old_cmap) {
|
|
binp->packets_accepted = 0;
|
|
binp->packets_dropped = 0;
|
|
binp->bytes_accepted = 0;
|
|
binp->bytes_dropped = 0;
|
|
}
|
|
|
|
UNIV_PRINT_VERB(("Bin_host_update: Host %d pg %d: received %08x ; cur now %08x",
|
|
my_host_id, binp->index, new_bins, binp->cur_map[my_host_id]));
|
|
TRACE_VERB("%!FUNC! host %d pg %d: received 0x%08x ; cur now 0x%08x",
|
|
my_host_id, binp->index, (ULONG)new_bins, (ULONG)binp->cur_map[my_host_id]);
|
|
}
|
|
|
|
/* do consistency check that all bins are covered */
|
|
|
|
binp->chk_map[rem_host] = cur_map | rdy_bins;
|
|
binp->chk_map[my_host_id] = binp->cmap | binp->rdy_bins; /* v2.1 */
|
|
|
|
if (!Bin_map_covering(binp->tot_load, binp->chk_map)) /* v2.06 */
|
|
{
|
|
if (!(lp->err_orphans))
|
|
{
|
|
#if 0
|
|
UNIV_PRINT_CRIT(("Bin_host_update: Host %d: Orphan bins detected", my_host_id));
|
|
TRACE_CRIT("%!FUNC! Host %d: Orphan bins detected", my_host_id);
|
|
LOG_MSG1(MSG_ERROR_INTERNAL, MSG_NONE, my_host_id+1);
|
|
#endif
|
|
lp->err_orphans = TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
/* otherwise, store proposed new load map and try to converge current host data */
|
|
|
|
else
|
|
{
|
|
BOOLEAN fRet;
|
|
binp->chk_map[rem_host] =
|
|
binp->new_map[rem_host] = new_map;
|
|
|
|
fRet = Bin_converge(lp, binp, my_host_id);
|
|
return fRet;
|
|
}
|
|
|
|
} /* end Bin_host_update */
|
|
|
|
|
|
void Bin_state_print(
|
|
PBIN_STATE binp, /* ptr. to bin state */
|
|
ULONG my_host_id)
|
|
{
|
|
#if 0
|
|
ULONG i;
|
|
#endif
|
|
|
|
UNIV_PRINT_VERB(("Bin_state_print: Hst %d binp %x: maps: targ %x cur %x new %x; eq %d mode %d amt %d tot %d; bins: snd %x rcv %x rdy %x",
|
|
my_host_id, binp, binp->targ_map, binp->cur_map[my_host_id], binp->new_map[my_host_id],
|
|
binp->equal_bal, binp->mode, binp->load_amt[my_host_id],
|
|
binp->tot_load, binp->snd_bins, binp->rcv_bins, binp->rdy_bins));
|
|
TRACE_VERB("%!FUNC! Hst 0x%x binp 0x%p: maps: targ 0x%x cur 0x%x new 0x%x; eq %d mode %d amt %d tot %d; bins: snd 0x%x rcv 0x%x rdy 0x%x",
|
|
my_host_id, binp, (ULONG)binp->targ_map, (ULONG)binp->cur_map[my_host_id], (ULONG)binp->new_map[my_host_id],
|
|
binp->equal_bal, binp->mode, binp->load_amt[my_host_id],
|
|
binp->tot_load, (ULONG)binp->snd_bins, (ULONG)binp->rcv_bins, (ULONG)binp->rdy_bins);
|
|
|
|
#if 0
|
|
for (i=0; i<CVY_MAX_HOSTS; i++)
|
|
{
|
|
UNIV_PRINT_VERB(("Bin_state_print: Host %d: cur map %x new %x load_amt %d", i+1, binp->cur_map[i],
|
|
binp->new_map[i], binp->load_amt[i]));
|
|
TRACE_VERB("%!FUNC! Host %d: cur map 0x%x new 0x%x load_amt %d", i+1, binp->cur_map[i],
|
|
binp->new_map[i], binp->load_amt[i]);
|
|
}
|
|
|
|
for (i=0; i<CVY_MAXBINS; i++)
|
|
{
|
|
UNIV_PRINT_VERB(("Bin_state_print: Bin %d: req_host %d bin_state %d nconn %d", i, binp->req_host[i],
|
|
binp->bin_state[i], binp->nconn[i]));
|
|
TRACE_VERB("%!FUNC! Bin %d: req_host %d bin_state %d nconn %d", i, binp->req_host[i],
|
|
binp->bin_state[i], binp->nconn[i]);
|
|
}
|
|
#endif
|
|
|
|
} /* end Bin_state_print */
|
|
|
|
/*
|
|
* Function: Load_soil_dscr
|
|
* Description: This function marks a given connection dirty and either destroys
|
|
* it or moves it to the dirty descriptor queue for subsequent cleanup.
|
|
* Parameters: lp - a pointer to the load module.
|
|
* bp - a pointer to the appropriate port rule.
|
|
* ep - a pointer to the descriptor to soil.
|
|
* Returns: Nothing.
|
|
* Author: shouse, 7.23.02
|
|
* Notes:
|
|
*/
|
|
void Load_soil_dscr (PLOAD_CTXT lp, PBIN_STATE bp, PCONN_ENTRY ep)
|
|
{
|
|
/* Mark the connection dirty. We mark the connection dirty here to
|
|
ensure that Load_put_dscr does not update the connection counters
|
|
when this descriptor is eventually destroyed. */
|
|
ep->flags |= NLB_CONN_ENTRY_FLAGS_DIRTY;
|
|
|
|
/* Increment the dirty connection counters. We do this unconditionally
|
|
because we've already marked the descriptor dirty. Load_put_dscr
|
|
will decrement these counters when it sees that the descriptor has
|
|
been marked dirty. */
|
|
lp->dirty_bin[ep->bin]++;
|
|
lp->num_dirty++;
|
|
|
|
/* Make connection and bin dirty if we don't have a zero timeout period so that they
|
|
will not be handled by TCP/IP anymore; this avoids allowing TCP/IP's now stale
|
|
connection state from handling packets for newer connections should traffic be
|
|
directed to this host in the future.
|
|
|
|
Only mark descriptors and bins dirty, how-
|
|
ever, if the descriptor is NOT on the timeout queue. */
|
|
if (!ep->timeout)
|
|
{
|
|
switch (ep->protocol)
|
|
{
|
|
case TCPIP_PROTOCOL_TCP:
|
|
case TCPIP_PROTOCOL_PPTP:
|
|
case TCPIP_PROTOCOL_GRE:
|
|
|
|
#if defined (NLB_TCP_NOTIFICATION)
|
|
/* If TCP notifications are turned on, we will mark these descriptors dirty
|
|
and remove them when TCP notifies us that it has removed the state for
|
|
the TCP connection. GRE descriptors always correspond to a PPTP/TCP
|
|
tunnel and are cleaned up when their "parent" descriptor is cleaned up. */
|
|
if (NLB_NOTIFICATIONS_ON() || (lp->cln_timeout > 0))
|
|
#else
|
|
/* If there is a non-zero cleanup timeout, place these descriptors on the
|
|
dirty queue and clean them up when the timeout expires. */
|
|
if (lp->cln_timeout > 0)
|
|
#endif
|
|
{
|
|
/* Unlink the descriptor from the bin queue and link it to the dirty queue. */
|
|
Link_unlink(&(ep->blink));
|
|
Queue_enq(&(lp->conn_dirtyq), &(ep->blink));
|
|
|
|
/* Note that a cleanup is now pending. */
|
|
lp->cln_waiting = TRUE;
|
|
}
|
|
|
|
/* Otherwise, clean the descriptors up now. */
|
|
else
|
|
{
|
|
/* Clear the descriptor. */
|
|
CVY_CONN_CLEAR(ep);
|
|
|
|
/* Release the descriptor. */
|
|
Load_put_dscr(lp, bp, ep);
|
|
}
|
|
|
|
break;
|
|
case TCPIP_PROTOCOL_IPSEC1:
|
|
case TCPIP_PROTOCOL_IPSEC_UDP:
|
|
|
|
/* Unlink the descriptor from the bin queue and link it to the dirty queue. */
|
|
Link_unlink(&(ep->blink));
|
|
Queue_enq(&(lp->conn_dirtyq), &(ep->blink));
|
|
|
|
/* Note that a cleanup is now pending. */
|
|
lp->cln_waiting = TRUE;
|
|
|
|
break;
|
|
default:
|
|
|
|
/* Clear the descriptor. */
|
|
CVY_CONN_CLEAR(ep);
|
|
|
|
/* Release the descriptor. */
|
|
Load_put_dscr(lp, bp, ep);
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Otherwise, if the descriptor is already timing-out (timeout != 0), TCP/IP should
|
|
not have any stale state for this connection, as it has already terminated, so
|
|
just destroy the descriptor now. */
|
|
else
|
|
{
|
|
/* Clear the descriptor. */
|
|
CVY_CONN_CLEAR(ep);
|
|
|
|
/* Release the descriptor. */
|
|
Load_put_dscr(lp, bp, ep);
|
|
}
|
|
}
|
|
|
|
void Load_conn_kill(
|
|
PLOAD_CTXT lp,
|
|
PBIN_STATE bp)
|
|
/*
|
|
Kill all connections in a port group (v1.32B)
|
|
*/
|
|
{
|
|
PCONN_ENTRY ep; /* ptr. to connection entry */
|
|
QUEUE * qp; /* ptr. to bin's connection queue */
|
|
QUEUE * dqp; /* ptr. to dirty queue */
|
|
LONG count[CVY_MAXBINS];
|
|
/* count of cleaned up connections per bin for checking */
|
|
ULONG i;
|
|
BOOLEAN err_bin; /* bin id error detected */
|
|
BOOLEAN err_count; /* connection count error detected */
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
|
|
err_bin = err_count = FALSE;
|
|
|
|
qp = &(bp->connq);
|
|
dqp = &(lp->conn_dirtyq);
|
|
|
|
for (i=0; i<CVY_MAXBINS; i++)
|
|
count[i] = 0;
|
|
|
|
/* remove connections from bin queue and either make dirty or cleanup */
|
|
|
|
ep = (PCONN_ENTRY)Queue_front(qp);
|
|
|
|
while (ep != NULL)
|
|
{
|
|
UNIV_ASSERT(ep->code == CVY_ENTRCODE);
|
|
|
|
if (ep->bin >= CVY_MAXBINS)
|
|
{
|
|
if (!err_bin)
|
|
{
|
|
UNIV_PRINT_CRIT(("Load_conn_kill: Bad bin id"));
|
|
TRACE_CRIT("%!FUNC! Bad bin id");
|
|
LOG_MSG2(MSG_ERROR_INTERNAL, MSG_NONE, ep->bin, CVY_MAXBINS);
|
|
|
|
err_bin = TRUE;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
count[ep->bin]++;
|
|
}
|
|
|
|
/* Mark the descriptor dirty and either free it or move it to
|
|
the dirty descriptor queue for subsequent cleanup. */
|
|
Load_soil_dscr(lp, bp, ep);
|
|
|
|
ep = (PCONN_ENTRY)Queue_front(qp);
|
|
}
|
|
|
|
/* now make bins idle */
|
|
|
|
for (i=0; i<CVY_MAXBINS; i++)
|
|
{
|
|
if (bp->nconn[i] != count[i])
|
|
{
|
|
if (!err_count)
|
|
{
|
|
UNIV_PRINT_CRIT(("Load_conn_kill: Bad connection count %d %d bin %d", bp->nconn[i], (LONG)count[i], i));
|
|
TRACE_CRIT("%!FUNC! Bad connection count %d %d bin %d", bp->nconn[i], (LONG)count[i], i);
|
|
|
|
/* KXF 2.1.1 - removed after tripped up at MSFT a few times */
|
|
#if 0
|
|
LOG_MSG2(MSG_ERROR_INTERNAL, MSG_NONE, bp->nconn[i], (LONG)count[i]);
|
|
#endif
|
|
|
|
err_count = TRUE;
|
|
}
|
|
}
|
|
|
|
bp->nconn[i] = 0;
|
|
}
|
|
|
|
lp->nconn -= bp->tconn;
|
|
|
|
if (lp->nconn < 0)
|
|
lp->nconn = 0;
|
|
|
|
bp->tconn = 0;
|
|
|
|
bp->idle_bins = BIN_ALL_ONES;
|
|
|
|
if (lp->cln_waiting)
|
|
{
|
|
lp->cur_time = 0;
|
|
}
|
|
}
|
|
|
|
void Load_conn_cleanup(
|
|
PLOAD_CTXT lp)
|
|
/*
|
|
Clean up all dirty connections (v1.32B)
|
|
*/
|
|
{
|
|
PCONN_ENTRY ep; /* ptr. to connection entry */
|
|
PCONN_ENTRY next; /* ptr. to next connection entry */
|
|
QUEUE * dqp; /* ptr. to dirty queue */
|
|
BOOLEAN err_bin; /* bin id error detected */
|
|
ULONG i;
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
|
|
err_bin = FALSE;
|
|
|
|
dqp = &(lp->conn_dirtyq);
|
|
|
|
/* dequeue and clean up all connections on dirty connection queue */
|
|
|
|
ep = (PCONN_ENTRY)Queue_front(dqp);
|
|
|
|
while (ep != NULL)
|
|
{
|
|
PBIN_STATE bp;
|
|
|
|
UNIV_ASSERT(ep->code == CVY_ENTRCODE);
|
|
|
|
if (ep->bin >= CVY_MAXBINS)
|
|
{
|
|
if (!err_bin)
|
|
{
|
|
UNIV_PRINT_CRIT(("Load_conn_cleanup: Bad bin id"));
|
|
TRACE_CRIT("%!FUNC! Bad bin id");
|
|
LOG_MSG2(MSG_ERROR_INTERNAL, MSG_NONE, ep->bin, CVY_MAXBINS);
|
|
|
|
err_bin = TRUE;
|
|
}
|
|
}
|
|
|
|
/* If we're about to clean up this descriprtor, it had better be dirty. */
|
|
UNIV_ASSERT(ep->flags & NLB_CONN_ENTRY_FLAGS_DIRTY);
|
|
|
|
/* Find the NEXT descriptor in the queue before we possibly destroy this one. */
|
|
next = (PCONN_ENTRY)Queue_next(dqp, &(ep->blink));
|
|
|
|
switch (ep->protocol)
|
|
{
|
|
case TCPIP_PROTOCOL_IPSEC1:
|
|
case TCPIP_PROTOCOL_IPSEC_UDP:
|
|
break;
|
|
case TCPIP_PROTOCOL_TCP:
|
|
case TCPIP_PROTOCOL_PPTP:
|
|
case TCPIP_PROTOCOL_GRE:
|
|
#if defined (NLB_TCP_NOTIFICATION)
|
|
if (!NLB_NOTIFICATIONS_ON())
|
|
{
|
|
#endif
|
|
/* Lookup the port rule, so we can update the port rule info. */
|
|
bp = Load_pg_lookup(lp, ep->svr_ipaddr, ep->svr_port, IS_TCP_PKT(ep->protocol));
|
|
|
|
/* Clear the descriptor. */
|
|
CVY_CONN_CLEAR(ep);
|
|
|
|
/* Release the descriptor. */
|
|
Load_put_dscr(lp, bp, ep);
|
|
#if defined (NLB_TCP_NOTIFICATION)
|
|
}
|
|
#endif
|
|
|
|
break;
|
|
default:
|
|
|
|
/* Lookup the port rule, so we can update the port rule info. */
|
|
bp = Load_pg_lookup(lp, ep->svr_ipaddr, ep->svr_port, IS_TCP_PKT(ep->protocol));
|
|
|
|
/* Clear the descriptor. */
|
|
CVY_CONN_CLEAR(ep);
|
|
|
|
/* Release the descriptor. */
|
|
Load_put_dscr(lp, bp, ep);
|
|
|
|
break;
|
|
}
|
|
|
|
/* Set the current descriptor to the next descriptor. */
|
|
ep = next;
|
|
}
|
|
}
|
|
|
|
void Load_stop(
|
|
PLOAD_CTXT lp)
|
|
{
|
|
ULONG i;
|
|
IRQLEVEL irql;
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE); /* (bbain 8/19/99) */
|
|
|
|
if (!(lp->active))
|
|
{
|
|
return;
|
|
}
|
|
|
|
LOCK_ENTER(&(lp->lock), &irql);
|
|
|
|
/* make connections for all rules dirty so they will not be handled */
|
|
|
|
for (i=0; i<lp->send_msg.nrules; i++)
|
|
{
|
|
PBIN_STATE bp; /* ptr. to bin state */
|
|
|
|
bp = &(lp->pg_state[i]);
|
|
UNIV_ASSERT(bp->code == CVY_BINCODE); /* (bbain 8/21/99) */
|
|
|
|
Load_conn_kill(lp, bp); /* (v1.32B) */
|
|
|
|
/* advertise that we are not handling any load in case a ping is sent out */
|
|
|
|
lp->send_msg.cur_map[i] = 0;
|
|
lp->send_msg.new_map[i] = 0;
|
|
lp->send_msg.idle_map[i] = BIN_ALL_ONES;
|
|
lp->send_msg.rdy_bins[i] = 0;
|
|
lp->send_msg.load_amt[i] = 0;
|
|
}
|
|
|
|
lp->send_msg.state = HST_CVG; /* force convergence (v2.1) */
|
|
|
|
/* go inactive until restarted */
|
|
|
|
lp->active = FALSE;
|
|
lp->nconn = 0; /* v2.1 */
|
|
|
|
LOCK_EXIT(&(lp->lock), irql);
|
|
|
|
} /* end Load_stop */
|
|
|
|
|
|
BOOLEAN Load_start( /* (v1.32B) */
|
|
PLOAD_CTXT lp)
|
|
{
|
|
ULONG i;
|
|
BOOLEAN ret;
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
WCHAR me[20];
|
|
|
|
if (!(lp->initialized))
|
|
Load_init(lp, & ctxtp -> params);
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE); /* (bbain 8/19/99) */
|
|
|
|
if (lp->active)
|
|
{
|
|
return FALSE;
|
|
}
|
|
|
|
lp->my_host_id =(* (lp->params)).host_priority - 1;
|
|
|
|
lp->ping_map =
|
|
lp->host_map = 1 << lp->my_host_id;
|
|
|
|
lp->last_hmap = 0; /* bbain RTM RC1 6/23/99 */
|
|
|
|
for (i=0; i<CVY_MAX_HOSTS; i++)
|
|
{
|
|
lp->nmissed_pings[i] = 0;
|
|
}
|
|
|
|
lp->min_missed_pings = (* (lp->params)).alive_tolerance;
|
|
lp->cln_timeout = (* (lp->params)).cleanup_delay;
|
|
lp->def_timeout = (* (lp->params)).alive_period;
|
|
lp->stable_map = 0;
|
|
lp->consistent = TRUE;
|
|
|
|
/* Intiialize the bad teaming configuration detected flag. */
|
|
lp->bad_team_config = FALSE;
|
|
|
|
/* Host map of legacy (win2k/NT4.0) hosts detected. */
|
|
lp->legacy_hosts = 0;
|
|
|
|
lp->dup_hosts = FALSE;
|
|
lp->dup_sspri = FALSE;
|
|
lp->bad_map = FALSE;
|
|
lp->overlap_maps = FALSE;
|
|
lp->err_rcving_bins = FALSE;
|
|
lp->err_orphans = FALSE;
|
|
lp->bad_num_rules = FALSE;
|
|
lp->alloc_inhibited = FALSE;
|
|
lp->alloc_failed = FALSE;
|
|
lp->bad_defrule = FALSE;
|
|
|
|
lp->scale_client = (BOOLEAN)(* (lp->params)).scale_client;
|
|
lp->my_stable_ct = 0;
|
|
lp->all_stable_ct = 0;
|
|
lp->min_stable_ct = lp->min_missed_pings;
|
|
|
|
lp->dscr_per_alloc = (* (lp->params)).dscr_per_alloc;
|
|
lp->max_dscr_allocs = (* (lp->params)).max_dscr_allocs;
|
|
|
|
/* Calculate the maximum number of outstanding descriptors (in use) allowed. */
|
|
lp->max_dscr_out = lp->max_dscr_allocs * lp->dscr_per_alloc;
|
|
|
|
lp->tcp_timeout = (* (lp->params)).tcp_dscr_timeout;
|
|
lp->ipsec_timeout = (* (lp->params)).ipsec_dscr_timeout;
|
|
|
|
lp->pkt_count = 0;
|
|
|
|
/* initialize port group bin states; add a default rule at the end */
|
|
|
|
if ((* (lp->params)).num_rules >= (CVY_MAX_RULES - 1))
|
|
{
|
|
UNIV_PRINT_CRIT(("Load_start: Too many rules; using max possible."));
|
|
TRACE_CRIT("%!FUNC! Too many rules; using max possible.");
|
|
lp->send_msg.nrules = (USHORT)CVY_MAX_RULES;
|
|
}
|
|
else
|
|
lp->send_msg.nrules = (USHORT)((* (lp->params)).num_rules) + 1;
|
|
|
|
for (i=0; i<lp->send_msg.nrules; i++)
|
|
{
|
|
PBIN_STATE bp; /* ptr. to bin state */
|
|
PCVY_RULE rp; /* ptr. to rules array */
|
|
|
|
bp = &(lp->pg_state[i]);
|
|
rp = &((* (lp->params)).port_rules[i]);
|
|
|
|
if (i == (((ULONG)lp->send_msg.nrules) - 1))
|
|
|
|
/* initialize bin state for default rule to single server with
|
|
host priority */
|
|
|
|
Bin_state_init(lp, bp, i, lp->my_host_id, CVY_SINGLE, CVY_TCP_UDP,
|
|
FALSE, (USHORT)0, (* (lp->params)).host_priority);
|
|
|
|
else if (rp->mode == CVY_SINGLE)
|
|
Bin_state_init(lp, bp, i, lp->my_host_id, rp->mode, rp->protocol,
|
|
FALSE, (USHORT)0, rp->mode_data.single.priority);
|
|
else if (rp->mode == CVY_MULTI)
|
|
Bin_state_init(lp, bp, i, lp->my_host_id, rp->mode, rp->protocol,
|
|
(BOOLEAN)(rp->mode_data.multi.equal_load),
|
|
rp->mode_data.multi.affinity,
|
|
(rp->mode_data.multi.equal_load ?
|
|
CVY_EQUAL_LOAD : rp->mode_data.multi.load));
|
|
|
|
/* handle CVY_NEVER mode as multi-server. the check for
|
|
those modes is done before attempting to hash to the bin in
|
|
Load_packet_check and Load_conn_advise so bin distribution plays
|
|
no role in the behavior, but simply allows the rule to be valid
|
|
across all of the operational servers */
|
|
|
|
else
|
|
Bin_state_init(lp, bp, i, lp->my_host_id, rp->mode, rp->protocol,
|
|
TRUE, (USHORT)0, CVY_EQUAL_LOAD);
|
|
|
|
ret = Bin_converge(lp, bp, lp->my_host_id);
|
|
if (!ret)
|
|
{
|
|
UNIV_PRINT_CRIT(("Load_start: Initial convergence inconsistent"));
|
|
TRACE_CRIT("%!FUNC! Initial convergence inconsistent");
|
|
LOG_MSG(MSG_ERROR_INTERNAL, MSG_NONE);
|
|
}
|
|
|
|
/* export current port group state to send msg */
|
|
|
|
if (i == (((ULONG)(lp->send_msg.nrules)) - 1))
|
|
lp->send_msg.rcode[i]= 0;
|
|
else
|
|
lp->send_msg.rcode[i]= rp->code;
|
|
|
|
lp->send_msg.cur_map[i] = bp->cmap; /* v2.1 */
|
|
lp->send_msg.new_map[i] = bp->new_map[lp->my_host_id];
|
|
lp->send_msg.idle_map[i] = bp->idle_bins;
|
|
lp->send_msg.rdy_bins[i] = bp->rdy_bins;
|
|
lp->send_msg.load_amt[i] = bp->load_amt[lp->my_host_id];
|
|
|
|
// NOTE: The following line of code was removed when it was discovered that it
|
|
// routinely produces a Wake On LAN pattern in the heartbeat that causes BroadCom
|
|
// NICs to panic. Although this is NOT an NLB issue, but rather a firmware issue
|
|
// in BroadCom NICs, it was decided to remove the information from the heartbeat
|
|
// to alleviate the problem for customers with BroadCom NICs upgrading to .NET.
|
|
// This array is UNUSED by NLB, so there is no harm in not filling it in; it was
|
|
// added a long time ago for debugging purposes as part of the now-defunct FIN-
|
|
// counting fix that was part of Win2k SP1.
|
|
//
|
|
// For future reference, should we need to use this space in the heartbeat at some
|
|
// future point in time, it appears that we will need to be careful to avoid potential
|
|
// WOL patterns in our heartbeats where we can avoid it. A WOL pattern is:
|
|
//
|
|
// 6 bytes of 0xFF, followed by 16 idential instances of a "MAC address" that can
|
|
// appear ANYWHERE in ANY frame type, including our very own NLB heartbeats. E.g.:
|
|
//
|
|
// FF FF FF FF FF FF 01 02 03 04 05 06 01 02 03 04 05 06 01 02 03 04 05 06
|
|
// 01 02 03 04 05 06 01 02 03 04 05 06 01 02 03 04 05 06 01 02 03 04 05 06
|
|
// 01 02 03 04 05 06 01 02 03 04 05 06 01 02 03 04 05 06 01 02 03 04 05 06
|
|
// 01 02 03 04 05 06 01 02 03 04 05 06 01 02 03 04 05 06 01 02 03 04 05 06
|
|
// 01 02 03 04 05 06
|
|
//
|
|
// The MAC address need not be valid, however. In NLB heartbeats, the "MAC address"
|
|
// in the mistaken WOL pattern is "00 00 00 00 00 00". NLB routinely fills heartbeats
|
|
// with FF and 00 bytes, but it seems that by "luck" no other place in the heartbeat
|
|
// seems this vulnerable. For instance, in the load_amt array, each entry has a
|
|
// maximum value of 100 (decimal), so there is no possibility of generating the initial
|
|
// 6 bytes of FF to start the WOL pattern. All of the "map" arrays seem to be saved
|
|
// by two strokes of fortune; (i) little endian and (ii) the bin distribution algorithm.
|
|
//
|
|
// (i) Since we don't use the 4 most significant bits of the ULONGLONGs used to store
|
|
// each map, the most significant bit is NEVER FF. Because Intel is little endian, the
|
|
// most significant byte appears last. For example:
|
|
//
|
|
// 0F FF FF FF FF FF FF FF appears in the packet as FF FF FF FF FF FF 0F
|
|
//
|
|
// This breaks the FF sequence in many scenarios.
|
|
//
|
|
// (ii) The way the bin distribution algorithm distributes buckets to hosts seems to
|
|
// discourage other possibilities. For instance, a current map of:
|
|
//
|
|
// 00 FF FF FF FF FF FF 00
|
|
//
|
|
// just isn't likely. However, it IS STILL POSSIBLE! So, it is important to note that:
|
|
//
|
|
// REMOVING THIS LINE OF CODE DOES NOT, IN ANY WAY, GUARANTEE THAT AN NLB HEARTBEAT
|
|
// CANNOT STILL CONTAIN A VALID WAKE ON LAN PATTERN SOMEWHERE ELSE IN THE FRAME!!!
|
|
|
|
// lp->send_msg.pg_rsvd1[i] = (ULONG)bp->all_idle_map;
|
|
}
|
|
|
|
/* initialize send msg */
|
|
|
|
lp->send_msg.host_id = (USHORT)(lp->my_host_id);
|
|
lp->send_msg.master_id = (USHORT)(lp->my_host_id);
|
|
lp->send_msg.hcode = lp->params->install_date;
|
|
lp->send_msg.pkt_count = lp->pkt_count; /* 1.32B */
|
|
|
|
Univ_ulong_to_str (lp->my_host_id+1, me, 10);
|
|
|
|
/* Tracking convergence - Starting convergence because this host is joining the cluster. */
|
|
LOG_MSGS(MSG_INFO_CONVERGING_NEW_MEMBER, me, me);
|
|
TRACE_CONVERGENCE("%!FUNC! Initiating convergence on host %d. Reason: Host %d is joining the cluster.", lp->my_host_id+1, lp->my_host_id+1);
|
|
|
|
/* Tracking convergence - Starting convergence. */
|
|
lp->send_msg.state = HST_CVG;
|
|
|
|
/* Reset the convergence statistics. */
|
|
lp->num_convergences = 1;
|
|
lp->last_convergence = 0;
|
|
|
|
/* activate module */
|
|
|
|
lp->active = TRUE;
|
|
|
|
return TRUE;
|
|
} /* end Load_start */
|
|
|
|
|
|
void Load_init(
|
|
PLOAD_CTXT lp,
|
|
PCVY_PARAMS params)
|
|
{
|
|
ULONG i;
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
|
|
LOCK_INIT(&(lp->lock));
|
|
|
|
if (!(lp->initialized))
|
|
{
|
|
lp->code = CVY_LOADCODE; /* (bbain 8/19/99) */
|
|
|
|
/* initialize hashed connection descriptors and queues */
|
|
|
|
for (i=0; i<CVY_MAX_CHASH; i++)
|
|
{
|
|
PCONN_ENTRY ep;
|
|
|
|
ep = &(lp->hashed_conn[i]);
|
|
|
|
/* Initialize the descriptor at this hash location. */
|
|
Load_init_dscr(lp, ep, FALSE);
|
|
|
|
/* Initialize the connection queue at this hash location. */
|
|
Queue_init(&(lp->connq[i]));
|
|
}
|
|
|
|
/* Initialize connection free and dirty queues. */
|
|
Queue_init(&(lp->conn_dirtyq));
|
|
Queue_init(&(lp->conn_rcvryq));
|
|
|
|
/* Initialize the queues for timing out connection descriptors. */
|
|
Queue_init(&(lp->tcp_expiredq));
|
|
Queue_init(&(lp->ipsec_expiredq));
|
|
|
|
/* Reset the number of dirty connections. */
|
|
lp->num_dirty = 0;
|
|
|
|
for (i=0; i<CVY_MAXBINS; i++)
|
|
{
|
|
/* Reset the dirty connection bin counters. */
|
|
lp->dirty_bin[i] = 0;
|
|
}
|
|
|
|
lp->cln_waiting = FALSE;
|
|
lp->def_timeout =
|
|
lp->cur_timeout = params -> alive_period;
|
|
lp->nconn = 0;
|
|
lp->active = FALSE;
|
|
lp->initialized = TRUE;
|
|
|
|
/* Initially, there are no outstanding connection descriptors. */
|
|
lp->num_dscr_out = 0;
|
|
lp->max_dscr_out = 0;
|
|
|
|
/* Allocate a fixed-size block pool for connection descriptors. */
|
|
lp->free_dscr_pool = NdisCreateBlockPool(sizeof(CONN_DESCR), 0, 'dBLN', NULL);
|
|
|
|
if (lp->free_dscr_pool == NULL)
|
|
{
|
|
UNIV_PRINT_CRIT(("Load_init: Error creating fixed-size block pool"));
|
|
TRACE_CRIT("%!FUNC! Error creating fixed-size block pool");
|
|
}
|
|
|
|
/* Store a pointer to the NLB parameters. */
|
|
lp->params = params;
|
|
|
|
/* Initialize the reference count on this load module. */
|
|
lp->ref_count = 0;
|
|
|
|
/* Reset the internally maintained clock used for connection descriptor timeout. */
|
|
lp->clock_sec = 0;
|
|
lp->clock_msec = 0;
|
|
}
|
|
else
|
|
{
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
}
|
|
|
|
/* Don't start module. */
|
|
|
|
} /* end Load_init */
|
|
|
|
/* DO NOT CALL THIS FUNCTION WITH THE LOAD LOCK HELD! */
|
|
void Load_cleanup(
|
|
PLOAD_CTXT lp)
|
|
{
|
|
ULONG i;
|
|
PCONN_ENTRY ep = NULL;
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
UNIV_ASSERT(!lp->active);
|
|
|
|
#if defined (NLB_TCP_NOTIFICATION)
|
|
/* If notification is on, we need to unlink any connections that we have
|
|
from the global established connection queues. */
|
|
if (NLB_NOTIFICATIONS_ON())
|
|
{
|
|
/* Loop through all of the dirty descriptors and unlink them all
|
|
from the global connection queue. There is no need to actually
|
|
clean them up or update any counters as this load module is
|
|
about to disappear. */
|
|
ep = (PCONN_ENTRY)Queue_deq(&lp->conn_dirtyq);
|
|
|
|
while (ep != NULL)
|
|
{
|
|
UNIV_ASSERT(ep->code == CVY_ENTRCODE);
|
|
|
|
/* If we're about to clean up this descriptor, it had better be dirty. */
|
|
UNIV_ASSERT(ep->flags & NLB_CONN_ENTRY_FLAGS_DIRTY);
|
|
|
|
/* Note: virtual descriptors are NOT placed in the global connection
|
|
queues, so dirty virtual descriptors do not need to be unlinked. */
|
|
if (!(ep->flags & NLB_CONN_ENTRY_FLAGS_VIRTUAL))
|
|
{
|
|
/* Note: The rule for locking the global queues is that you MUST
|
|
lock the queues BEFORE locking the load module itself. For
|
|
most load APIs, the main module locks the load module BEFORE
|
|
calling the relevant load module API. Load_cleanup, however,
|
|
is a case where the load lock is not acquired AT ALL. Therefore,
|
|
it is permissible for us to grab the global queue locks here,
|
|
knowing that the load module lock has NOT BEEN LOCKED. DO NOT
|
|
ACQUIRE THE LOAD MODULE LOCK BEFORE CALLING THIS FUNCTION. */
|
|
NdisAcquireSpinLock(&g_conn_estabq[ep->index].lock);
|
|
|
|
/* Unlink from the global connection queue. */
|
|
g_conn_estabq[ep->index].length--;
|
|
Link_unlink(&ep->glink);
|
|
|
|
NdisReleaseSpinLock(&g_conn_estabq[ep->index].lock);
|
|
}
|
|
|
|
/* Get the next descriptor in the queue. */
|
|
ep = (PCONN_ENTRY)Queue_deq(&lp->conn_dirtyq);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* Destroy the fixed-size block pool and all descriptors therein.
|
|
Note that NdisDestroyBlockPool expects all allocated blocks
|
|
have been returned to the pool (freed) before it is called. */
|
|
if (lp->free_dscr_pool != NULL)
|
|
{
|
|
/* Loop through all of the connection descriptor queues and
|
|
free all of the descriptors we've allocated. */
|
|
for (i = 0; i < CVY_MAX_CHASH; i++)
|
|
{
|
|
/* Dequeue the head of the queue. */
|
|
PCONN_DESCR dp = (PCONN_DESCR)Queue_deq(&lp->connq[i]);
|
|
|
|
while (dp != NULL)
|
|
{
|
|
UNIV_ASSERT(dp->code == CVY_DESCCODE);
|
|
|
|
/* If we're about to free this descriptor, it had better be allocated. */
|
|
UNIV_ASSERT(dp->entry.flags & NLB_CONN_ENTRY_FLAGS_ALLOCATED);
|
|
|
|
/* Free the descriptor back to the fixed-size block pool. */
|
|
NdisFreeToBlockPool((PUCHAR)dp);
|
|
|
|
/* Get the next descriptor in the queue. */
|
|
dp = (PCONN_DESCR)Queue_deq(&lp->connq[i]);
|
|
}
|
|
}
|
|
|
|
/* Destroy the fixed-size block pool. */
|
|
NdisDestroyBlockPool(lp->free_dscr_pool);
|
|
}
|
|
|
|
} /* end Load_cleanup */
|
|
|
|
void Load_convergence_start (PLOAD_CTXT lp)
|
|
{
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD(lp, MAIN_CTXT, load);
|
|
|
|
lp->consistent = TRUE;
|
|
|
|
/* Increment the number of convergences. */
|
|
if (lp->send_msg.state == HST_NORMAL)
|
|
lp->num_convergences++;
|
|
|
|
/* Setup initial convergence state. */
|
|
lp->send_msg.state = HST_CVG;
|
|
|
|
lp->stable_map = 0;
|
|
lp->my_stable_ct = 0;
|
|
lp->all_stable_ct = 0;
|
|
|
|
lp->send_msg.master_id = (USHORT)(lp->my_host_id);
|
|
|
|
}
|
|
|
|
BOOLEAN Load_msg_rcv(
|
|
PLOAD_CTXT lp,
|
|
PVOID phdr,
|
|
PPING_MSG pmsg) /* ptr. to ping message */
|
|
{
|
|
ULONG i;
|
|
BOOLEAN consistent;
|
|
ULONG my_host;
|
|
ULONG rem_host;
|
|
ULONG saved_map; /* saved host map */
|
|
PPING_MSG sendp; /* ptr. to my send message */
|
|
IRQLEVEL irql;
|
|
WCHAR me[20];
|
|
WCHAR them[20];
|
|
ULONG map;
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
PMAIN_FRAME_HDR ping_hdrp = (PMAIN_FRAME_HDR)phdr;
|
|
|
|
/* Used for tracking convergence and event logging. */
|
|
BOOLEAN bInconsistentMaster = FALSE;
|
|
BOOLEAN bInconsistentTeaming = FALSE;
|
|
BOOLEAN bInconsistentPortRules = FALSE;
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
TRACE_HB("%!FUNC! Recv HB from host %d", (ULONG) pmsg->host_id + 1);
|
|
|
|
if (!(lp->active))
|
|
{
|
|
return FALSE;
|
|
}
|
|
|
|
my_host = lp->my_host_id;
|
|
rem_host = (ULONG) pmsg->host_id;
|
|
|
|
Univ_ulong_to_str (my_host+1, me, 10);
|
|
Univ_ulong_to_str (rem_host+1, them, 10);
|
|
|
|
sendp = &(lp->send_msg);
|
|
|
|
if (rem_host >= CVY_MAX_HOSTS)
|
|
{
|
|
return FALSE;
|
|
}
|
|
|
|
LOCK_ENTER(&(lp->lock), &irql);
|
|
|
|
/* If this heartbeat is from a win2k host, add it to the legacy host map. */
|
|
if (ping_hdrp->version < CVY_VERSION_FULL)
|
|
lp->legacy_hosts |= (1 << rem_host);
|
|
|
|
/* filter out packets broadcast by this host */
|
|
|
|
if(rem_host == my_host)
|
|
{
|
|
/* if this packet was really from another host, we have duplicate host ids */
|
|
|
|
if (sendp->hcode != pmsg->hcode)
|
|
{
|
|
if (!(lp->dup_hosts))
|
|
{
|
|
UNIV_PRINT_CRIT(("Load_msg_rcv: Duplicate host ids detected."));
|
|
TRACE_CRIT("%!FUNC! Duplicate host ids detected.");
|
|
|
|
LOG_MSG(MSG_ERROR_HOST_ID, me);
|
|
|
|
lp->dup_hosts = TRUE;
|
|
}
|
|
|
|
/* Tracking convergence - Starting convergence because duplicate host IDs were detected in the cluster. */
|
|
if (sendp->state == HST_NORMAL) {
|
|
|
|
LOG_MSGS(MSG_INFO_CONVERGING_DUPLICATE_HOST_ID, me, them);
|
|
TRACE_CONVERGENCE("%!FUNC! Initiating convergence on host %d. Reason: Host %d is configured with the same host ID.", my_host+1, rem_host+1);
|
|
|
|
// If enabled, fire wmi event indicating start of convergence
|
|
if (NlbWmiEvents[ConvergingEvent].Enable)
|
|
{
|
|
WCHAR wsDip[CVY_MAX_DED_IP_ADDR + 1];
|
|
|
|
Univ_ip_addr_ulong_to_str (ping_hdrp->ded_ip_addr, wsDip);
|
|
|
|
NlbWmi_Fire_ConvergingEvent(ctxtp,
|
|
NLB_EVENT_CONVERGING_DUPLICATE_HOST_ID,
|
|
wsDip,
|
|
rem_host+1);
|
|
}
|
|
else
|
|
{
|
|
TRACE_VERB("%!FUNC! NOT Generating NLB_EVENT_CONVERGING_DUPLICATE_HOST_ID 'cos ConvergingEvent generation disabled");
|
|
}
|
|
}
|
|
|
|
/* Tracking convergence - Starting convergence. */
|
|
Load_convergence_start(lp);
|
|
}
|
|
|
|
/* just update ping and host maps for us */
|
|
lp->ping_map |= (1 << my_host);
|
|
lp->host_map |= (1 << my_host);
|
|
|
|
LOCK_EXIT(&(lp->lock), irql);
|
|
|
|
return (sendp->state != HST_NORMAL);
|
|
}
|
|
|
|
if (sendp->nrules != pmsg->nrules)
|
|
{
|
|
if (!(lp->bad_num_rules))
|
|
{
|
|
UNIV_PRINT_CRIT(("Load_msg_rcv: Host %d: Hosts have diff # rules.", my_host));
|
|
TRACE_CRIT("%!FUNC! Host %d: Hosts have diff # rules.", my_host);
|
|
|
|
LOG_MSG2(MSG_ERROR_RULES_MISMATCH, them, sendp->nrules, pmsg->nrules);
|
|
|
|
lp->bad_num_rules = TRUE;
|
|
}
|
|
|
|
/* Tracking convergence - Starting convergence because the number of port rules on this host and the remote host do not match. */
|
|
if (sendp->state == HST_NORMAL) {
|
|
|
|
LOG_MSGS(MSG_INFO_CONVERGING_NUM_RULES, me, them);
|
|
TRACE_CONVERGENCE("%!FUNC! Initiating convergence on host %d. Reason: Host %d is configured with a conflicting number of port rules.", my_host+1, rem_host+1);
|
|
|
|
// If enabled, fire wmi event indicating start of convergence
|
|
if (NlbWmiEvents[ConvergingEvent].Enable)
|
|
{
|
|
WCHAR wsDip[CVY_MAX_DED_IP_ADDR + 1];
|
|
|
|
Univ_ip_addr_ulong_to_str (ping_hdrp->ded_ip_addr, wsDip);
|
|
|
|
NlbWmi_Fire_ConvergingEvent(ctxtp,
|
|
NLB_EVENT_CONVERGING_NUM_RULES,
|
|
wsDip,
|
|
rem_host+1);
|
|
}
|
|
else
|
|
{
|
|
TRACE_VERB("%!FUNC! NOT Generating NLB_EVENT_CONVERGING_NUM_RULES 'cos ConvergingEvent generation disabled");
|
|
}
|
|
}
|
|
|
|
/* Tracking convergence - Starting convergence. */
|
|
Load_convergence_start(lp);
|
|
|
|
/* just update ping and host maps for remote host (bbain 2/17/99) */
|
|
|
|
lp->ping_map |= (1 << rem_host);
|
|
lp->host_map |= (1 << rem_host);
|
|
|
|
LOCK_EXIT(&(lp->lock), irql);
|
|
|
|
return (sendp->state != HST_NORMAL);
|
|
}
|
|
|
|
/* update mastership and see if consistent */
|
|
|
|
if (rem_host < sendp->master_id)
|
|
sendp->master_id = (USHORT)rem_host;
|
|
|
|
consistent = sendp->master_id == pmsg->master_id; /* 1.03 */
|
|
|
|
/* For the purposes of logging the reason for convergence, note this inconsistency. */
|
|
if (!consistent) bInconsistentMaster = TRUE;
|
|
|
|
/* update ping and host maps to include remote host */
|
|
|
|
lp->ping_map |= (1 << rem_host);
|
|
|
|
saved_map = lp->host_map;
|
|
lp->host_map |= (1 << rem_host);
|
|
|
|
/* handle host convergence */
|
|
|
|
if (sendp->state != HST_NORMAL)
|
|
{
|
|
/* if master, update stable map for remote host */
|
|
|
|
if (sendp->master_id == my_host)
|
|
{
|
|
if (pmsg->state == HST_STABLE)
|
|
{
|
|
lp->stable_map |= (1 << rem_host);
|
|
}
|
|
else
|
|
{
|
|
lp->stable_map &= ~(1 << rem_host);
|
|
lp->all_stable_ct = 0;
|
|
}
|
|
}
|
|
|
|
/* otherwise, update state if have global stable convergence and the current
|
|
master has signalled completion by returning to the normal state; note
|
|
that we must do this prior to updating port group states */
|
|
|
|
else if (rem_host == sendp->master_id && pmsg->state == HST_NORMAL)
|
|
{
|
|
if (sendp->state == HST_STABLE)
|
|
{
|
|
sendp->state = HST_NORMAL;
|
|
|
|
/* Note the time of the last completed convergence. */
|
|
lp->last_convergence = lp->clock_sec;
|
|
|
|
/* Notify our BDA team that this cluster is consistently configured.
|
|
If we are not part of a BDA team, this call is essentially a no-op. */
|
|
Load_teaming_consistency_notify(&ctxtp->bda_teaming, TRUE);
|
|
|
|
/* Reset the bad teaming configuration detected flag if we are converged. */
|
|
lp->bad_team_config = FALSE;
|
|
|
|
lp->dup_hosts = FALSE;
|
|
lp->dup_sspri = FALSE;
|
|
lp->bad_map = FALSE;
|
|
lp->overlap_maps = FALSE;
|
|
lp->err_rcving_bins = FALSE;
|
|
lp->err_orphans = FALSE;
|
|
lp->bad_num_rules = FALSE;
|
|
lp->pkt_count = 0; /* v1.32B */
|
|
|
|
for (i=0; i<sendp->nrules; i++)
|
|
{
|
|
PBIN_STATE bp;
|
|
|
|
bp = &(lp->pg_state[i]);
|
|
|
|
bp->compatible = TRUE; /* 1.03 */
|
|
|
|
Bin_converge_commit(lp, bp, my_host);
|
|
|
|
UNIV_PRINT_VERB(("Load_msg_rcv: Host %d pg %d: new cur map %x idle %x all %x",
|
|
my_host, i, bp->cur_map[my_host], bp->idle_bins,
|
|
bp->all_idle_map));
|
|
TRACE_CONVERGENCE("%!FUNC! Host %d pg %d: new cur map 0x%x idle 0x%x all 0x%x",
|
|
my_host, i, (ULONG)bp->cur_map[my_host], (ULONG)bp->idle_bins,
|
|
(ULONG)bp->all_idle_map);
|
|
|
|
}
|
|
|
|
UNIV_PRINT_VERB(("Load_msg_rcv: Host %d: converged as slave", my_host));
|
|
TRACE_VERB("%!FUNC! Host %d: converged as slave", my_host);
|
|
/* log convergence completion if host map changed (bbain RTM RC1 6/23/99) */
|
|
/* Ignoring return value is OK since the return values are all non-errors */
|
|
Load_hosts_query (lp, TRUE, & map);
|
|
lp->last_hmap = lp->host_map;
|
|
|
|
if (lp->legacy_hosts) {
|
|
/* If a Win2k or NT4.0 host is attempting to join the cluster, warn the user that there are potential
|
|
limitations of mixed clusters, such as no virtual cluster support, no IGMP, no BDA, no VPN session
|
|
support and others. For some of these, the cluster will not be allowed to converge, while for some
|
|
it will, so we'll just warn the user that they should check the documentation for limitations. */
|
|
UNIV_PRINT_INFO(("Load_msg_rcv: NT4.0/Win2k host(s) detected: Be aware of the limitations of operating a mixed cluster."));
|
|
TRACE_INFO("%!FUNC! NT4.0/Win2k host(s) detected: Be aware of the limitations of operating a mixed cluster.");
|
|
|
|
LOG_MSG(MSG_WARN_MIXED_CLUSTER, MSG_NONE);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Tracking convergence - Starting convergence because the DEFAULT host prematurely ended convergence. In this case, we
|
|
are guaranteed to already be in the HST_CVG state, and because this message can be misleading in some circumstances,
|
|
we do not log an event. For instance, due to timing issues, when a host joins a cluster he can receive a HST_NORMAL
|
|
heartbeat from the DEFAULT host while it is still in the HST_CVG state simply because that heartbeat left the DEFAULT
|
|
host before it received our first heartbeat, which initiated convergence. */
|
|
TRACE_CONVERGENCE("%!FUNC! Initiating convergence on host %d. Reason: Host %d, the DEFAULT host, prematurely terminated convergence.", my_host+1, rem_host+1);
|
|
|
|
/* Tracking convergence - Starting convergence. */
|
|
Load_convergence_start(lp);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Compare the teaming configuration of this host with the remote host. If the
|
|
two are inconsitent and we are part of a team, we will initiate convergence. */
|
|
if (!Load_teaming_consistency_check(lp->bad_team_config, &ctxtp->bda_teaming, sendp->teaming, pmsg->teaming, ping_hdrp->version)) {
|
|
/* Only log an event if the teaming configuration was, but is now not, consistent. */
|
|
if (!lp->bad_team_config) {
|
|
/* Note that we saw this. */
|
|
lp->bad_team_config = TRUE;
|
|
|
|
/* Log the event. */
|
|
LOG_MSG(MSG_ERROR_BDA_BAD_TEAM_CONFIG, them);
|
|
}
|
|
|
|
/* Notify the team that this cluster is NOT consistently configured. */
|
|
Load_teaming_consistency_notify(&ctxtp->bda_teaming, FALSE);
|
|
|
|
/* Mark the heartbeats inconsistent to force and retain convergence. */
|
|
consistent = FALSE;
|
|
|
|
/* For the purposes of logging the reason for convergence, note this inconsistency. */
|
|
bInconsistentTeaming = TRUE;
|
|
}
|
|
|
|
/* update port group state */
|
|
|
|
for (i=0; i<sendp->nrules; i++)
|
|
{
|
|
BOOLEAN ret;
|
|
PBIN_STATE bp;
|
|
|
|
bp = &lp->pg_state[i];
|
|
|
|
/* if rule codes don't match, print message and handle incompatibility (1.03: note
|
|
that we previously marked rule invalid, which would stop processing) */
|
|
|
|
if (sendp->rcode[i] != pmsg->rcode[i])
|
|
{
|
|
/* 1.03: if rule was peviously compatible, print message */
|
|
|
|
if (bp->compatible)
|
|
{
|
|
PCVY_RULE rp;
|
|
|
|
UNIV_PRINT_CRIT(("Load_msg_rcv: Host %d pg %d: Rule codes do not match.", lp->my_host_id, i));
|
|
TRACE_CRIT("%!FUNC! Host %d pg %d: Rule codes do not match.", lp->my_host_id, i);
|
|
|
|
/* bbain 8/27/99 */
|
|
LOG_MSG2(MSG_ERROR_RULES_MISMATCH, them, sendp->rcode[i], pmsg->rcode[i]);
|
|
|
|
/* Get the port rule information for this rule. */
|
|
rp = &lp->params->port_rules[i];
|
|
|
|
/* Check to see if this is an issue with a win2k host in a cluster utilizing virtual clusters. */
|
|
if ((rp->virtual_ip_addr != CVY_ALL_VIP_NUMERIC_VALUE) && ((sendp->rcode[i] ^ ~rp->virtual_ip_addr) == pmsg->rcode[i])) {
|
|
UNIV_PRINT_CRIT(("Load_msg_rcv: ** A Windows 2000 or NT4 host MAY be participating in a cluster utilizing virtual cluster support."));
|
|
TRACE_CRIT("%!FUNC! ** A Windows 2000 or NT4 host MAY be participating in a cluster utilizing virtual cluster support.");
|
|
LOG_MSG(MSG_WARN_VIRTUAL_CLUSTERS, them);
|
|
}
|
|
|
|
bp->compatible = FALSE;
|
|
}
|
|
|
|
/* 1.03: mark rule inconsistent to force and continue convergence */
|
|
|
|
consistent = FALSE;
|
|
|
|
/* For the purposes of logging the reason for convergence, note this inconsistency. */
|
|
bInconsistentPortRules = TRUE;
|
|
|
|
/* don't update bin state */
|
|
|
|
continue;
|
|
}
|
|
|
|
ret = Bin_host_update(lp, bp, my_host, (BOOLEAN)(sendp->state != HST_NORMAL),
|
|
(BOOLEAN)(pmsg->state != HST_NORMAL),
|
|
rem_host, pmsg->cur_map[i], pmsg->new_map[i],
|
|
pmsg->idle_map[i], pmsg->rdy_bins[i],
|
|
pmsg->pkt_count, pmsg->load_amt[i]);
|
|
|
|
if (!ret)
|
|
consistent = FALSE;
|
|
}
|
|
|
|
/* update our consistency state */
|
|
|
|
lp->consistent = consistent;
|
|
|
|
/* if we are in normal operation and we discover a new host or a host goes into
|
|
convergence or we discover an inconsistency, go into convergence */
|
|
|
|
if (sendp->state == HST_NORMAL)
|
|
{
|
|
if (lp->host_map != saved_map || pmsg->state == HST_CVG || !consistent)
|
|
{
|
|
ConvergingEventId Cause = NLB_EVENT_CONVERGING_UNKNOWN;
|
|
|
|
/* If a host has joined the cluster, or if inconsistent teaming configuration or port
|
|
rules were detected, then we need to log an event. However, we segregate the
|
|
inconsistent master host flag because it is set by the initiating host in MANY
|
|
occasions, so we want to log the most specific reason(s) for convergence if
|
|
possible and only report the inconsistent master detection only if nothing more
|
|
specific can be deduced. */
|
|
if (lp->host_map != saved_map || bInconsistentTeaming || bInconsistentPortRules) {
|
|
|
|
/* If the host maps are different, then we know that the host from which we received
|
|
this packet is joining the cluster because the ONLY operation on the host map in
|
|
this function is to ADD a remote host to our map. Otherwise, if the map has not
|
|
changed, then an inconsistent configuration got us into the branch. */
|
|
if (lp->host_map != saved_map) {
|
|
/* Tracking convergence - Starting convergence because another host is joining the cluster. */
|
|
LOG_MSGS(MSG_INFO_CONVERGING_NEW_MEMBER, me, them);
|
|
TRACE_CONVERGENCE("%!FUNC! Initiating convergence on host %d. Reason: Host %d is joining the cluster.", my_host+1, rem_host+1);
|
|
|
|
Cause = NLB_EVENT_CONVERGING_NEW_MEMBER;
|
|
} else if (bInconsistentTeaming || bInconsistentPortRules) {
|
|
/* Tracking convergence - Starting convergence because inconsistent configuration was detected. */
|
|
LOG_MSGS(MSG_INFO_CONVERGING_BAD_CONFIG, me, them);
|
|
TRACE_CONVERGENCE("%!FUNC! Initiating convergence on host %d. Reason: Host %d has conflicting configuration.", my_host+1, rem_host+1);
|
|
|
|
Cause = NLB_EVENT_CONVERGING_BAD_CONFIG;
|
|
}
|
|
|
|
/* If we have nothing better to report, report convergence for an unspecific reason. */
|
|
} else if (bInconsistentMaster || pmsg->state == HST_CVG) {
|
|
/* Tracking convergence - Starting convergence for unknown reasons. */
|
|
LOG_MSGS(MSG_INFO_CONVERGING_UNKNOWN, me, them);
|
|
TRACE_CONVERGENCE("%!FUNC! Initiating convergence on host %d. Reason: Host %d is converging for an unknown reason.", my_host+1, rem_host+1);
|
|
}
|
|
|
|
// If enabled, fire wmi event indicating start of convergence
|
|
if (NlbWmiEvents[ConvergingEvent].Enable)
|
|
{
|
|
WCHAR wsDip[CVY_MAX_DED_IP_ADDR + 1];
|
|
|
|
Univ_ip_addr_ulong_to_str (ping_hdrp->ded_ip_addr, wsDip);
|
|
|
|
NlbWmi_Fire_ConvergingEvent(ctxtp,
|
|
Cause,
|
|
wsDip,
|
|
rem_host+1);
|
|
}
|
|
else
|
|
{
|
|
TRACE_VERB("%!FUNC! NOT Generating ConvergingEvent(New Member/Bad Config/Unknown) 'cos ConvergingEvent generation disabled");
|
|
}
|
|
|
|
/* Tracking convergence - Starting convergence. */
|
|
Load_convergence_start(lp);
|
|
}
|
|
}
|
|
|
|
/* otherwise, if we are in convergence and we see an inconsistency, just restart
|
|
our local convergence */
|
|
|
|
else
|
|
{
|
|
/* update our consistency state; if we didn't see consistent information,
|
|
restart this host's convergence */
|
|
|
|
if (!consistent)
|
|
{
|
|
/* Tracking convergence - Starting convergence because inconsistent configuration was detected.
|
|
This keeps hosts in a state of convergence when hosts are inconsistently configured. However,
|
|
since the cluster is already in a state of convergece (HST_CVG or HST_STABLE), don't log an
|
|
event, which may confuse a user. */
|
|
TRACE_CONVERGENCE("%!FUNC! Initiating convergence on host %d. Reason: Host %d has conflicting configuration.", my_host+1, rem_host+1);
|
|
|
|
/* Tracking convergence - Starting convergence. */
|
|
sendp->state = HST_CVG;
|
|
lp->my_stable_ct = 0;
|
|
lp->stable_map &= ~(1 << my_host);
|
|
lp->all_stable_ct = 0;
|
|
}
|
|
}
|
|
|
|
LOCK_EXIT(&(lp->lock), irql);
|
|
|
|
return (sendp->state != HST_NORMAL);
|
|
|
|
} /* end Load_msg_rcv */
|
|
|
|
|
|
PPING_MSG Load_snd_msg_get(
|
|
PLOAD_CTXT lp)
|
|
{
|
|
return &(lp->send_msg);
|
|
|
|
} /* end Load_snd_msg_get */
|
|
|
|
/*
|
|
* Function: Load_age_descriptors
|
|
* Description: This function searches a list of connection descriptors and
|
|
* removes those whose timeouts have expired. The queues are
|
|
* sorted timeout queues, so it is only ever necessary to look
|
|
* at the head of the queue to find expired descriptors. This
|
|
* function loops until all expired descriptors are removed.
|
|
* Parameters: lp - a pointer to the load module.
|
|
* eqp - pointer to the expired descriptor queue to service.
|
|
* Returns: Nothing.
|
|
* Author: shouse, 9.9.01
|
|
* Notes:
|
|
*/
|
|
void Load_age_descriptors (PLOAD_CTXT lp, QUEUE * eqp)
|
|
{
|
|
PCONN_ENTRY ep; /* Pointer to connection entry. */
|
|
PBIN_STATE bp; /* Pointer to port rule state. */
|
|
LINK * linkp; /* Pointer to the queue link. */
|
|
BOOLEAN err_bin = FALSE; /* Bin ID error detected. */
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD(lp, MAIN_CTXT, load);
|
|
|
|
/* Get a pointer to (but do not dequeue) the head of the queue. */
|
|
linkp = (LINK *)Queue_front(eqp);
|
|
|
|
/* As long as there are descriptors to check, keep looking - when
|
|
we find the first descriptor that is NOT ready to be dequeued,
|
|
we stop looking and break out of the loop. */
|
|
while (linkp != NULL) {
|
|
/* Get a pointer to the descriptor (linkp is a pointer to
|
|
the LIST_ENTRY in the descriptor, not the descriptor). */
|
|
ep = STRUCT_PTR(linkp, CONN_ENTRY, rlink);
|
|
UNIV_ASSERT(ep->code == CVY_ENTRCODE);
|
|
|
|
/* Do some sanity checking on the bin number. */
|
|
if (ep->bin >= CVY_MAXBINS) {
|
|
if (!err_bin) {
|
|
TRACE_CRIT("%!FUNC! Bad bin number");
|
|
LOG_MSG2(MSG_ERROR_INTERNAL, MSG_NONE, ep->bin, CVY_MAXBINS);
|
|
|
|
err_bin = TRUE;
|
|
}
|
|
}
|
|
|
|
#if defined (TRACE_DSCR)
|
|
DbgPrint("Load_age_descriptors: Descriptor %p: clock=%u, timeout=%u", ep, lp->clock_sec, ep->timeout);
|
|
#endif
|
|
|
|
/* If the current clock time is greater than or equal to the
|
|
scheduled timeout for this descriptor, then pull it off
|
|
and recycle it. */
|
|
if (lp->clock_sec >= ep->timeout) {
|
|
|
|
#if defined (TRACE_DSCR)
|
|
DbgPrint("Load_age_descriptors: Removing descriptor %p", ep);
|
|
#endif
|
|
|
|
/* Lookup the port rule, so we can update the port rule info. */
|
|
bp = Load_pg_lookup(lp, ep->svr_ipaddr, ep->svr_port, IS_TCP_PKT(ep->protocol));
|
|
|
|
/* Clear the descriptor. */
|
|
CVY_CONN_CLEAR(ep);
|
|
|
|
/* Release the descriptor. */
|
|
Load_put_dscr(lp, bp, ep);
|
|
|
|
/* Break if this descriptor was not ready to expire yet. */
|
|
} else break;
|
|
|
|
/* Grab the next descriptor in the queue. */
|
|
linkp = (LINK *)Queue_front(eqp);
|
|
}
|
|
}
|
|
|
|
BOOLEAN Load_timeout(
|
|
PLOAD_CTXT lp,
|
|
PULONG new_timeout,
|
|
PULONG pnconn)
|
|
/*
|
|
Note: we only update ping message in this function since we know that upper level code
|
|
sends out ping messages after calling this routine. We cannot be sure that Load_msg_rcv
|
|
is sequentialized with sending a message, (1.03)
|
|
|
|
Upper level code locks this routine wrt Load_msg_rcv, Load_packet_check, and
|
|
Load_conn_advise. (1.03)
|
|
*/
|
|
{
|
|
ULONG missed_pings;
|
|
ULONG my_host;
|
|
ULONG i;
|
|
PPING_MSG sendp; /* ptr. to my send message */
|
|
IRQLEVEL irql;
|
|
ULONG map; /* returned host map from query */
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
BOOLEAN fRet = FALSE;
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
LOCK_ENTER(&(lp->lock), &irql);
|
|
|
|
if ((lp->cln_waiting) && (lp->cur_time < lp->cln_timeout))
|
|
{
|
|
lp->cur_time += lp->cur_timeout;
|
|
|
|
if (lp->cur_time >= lp->cln_timeout)
|
|
{
|
|
TRACE_INFO("%!FUNC! Cleaning out dirty connection descriptors");
|
|
|
|
Load_conn_cleanup(lp);
|
|
}
|
|
}
|
|
|
|
/* Update the internal clock. We add the time since the last timeout
|
|
(in ms) to our msec count. We then add any whole number of seconds
|
|
that have accumulated in msec to the sec count. The remainder is
|
|
left in msec to accumulate. */
|
|
lp->clock_msec += lp->cur_timeout;
|
|
lp->clock_sec += (lp->clock_msec / 1000);
|
|
lp->clock_msec = (lp->clock_msec % 1000);
|
|
|
|
/* Age all conenction descriptors. */
|
|
Load_age_descriptors(lp, &(lp->tcp_expiredq));
|
|
Load_age_descriptors(lp, &(lp->ipsec_expiredq));
|
|
|
|
/* Return if not active. */
|
|
if (!(lp->active))
|
|
{
|
|
if (new_timeout != NULL)
|
|
* new_timeout = lp->cur_timeout = lp->def_timeout;
|
|
if (pnconn != NULL) /* v2.1 */
|
|
* pnconn = lp->nconn;
|
|
|
|
LOCK_EXIT(&(lp->lock), irql);
|
|
return FALSE;
|
|
}
|
|
|
|
my_host = lp->my_host_id;
|
|
sendp = &(lp->send_msg);
|
|
|
|
/* compute which hosts missed pings and reset ping map */
|
|
|
|
missed_pings = lp->host_map & (~lp->ping_map);
|
|
|
|
#ifdef NO_CLEANUP
|
|
lp->ping_map = 1 << my_host;
|
|
#else
|
|
lp->ping_map = 0;
|
|
#endif
|
|
|
|
/* check whether any host is dead, including ourselves */
|
|
|
|
for (i=0; i<CVY_MAX_HOSTS; i++)
|
|
{
|
|
/* if we have a missed ping for this host, increment count */
|
|
|
|
if ((missed_pings & 0x1) == 1)
|
|
{
|
|
lp->nmissed_pings[i]++;
|
|
|
|
/* if we missed too many pings, declare host dead and force convergence */
|
|
|
|
if (lp->nmissed_pings[i] == lp->min_missed_pings)
|
|
{
|
|
ULONG j;
|
|
BOOLEAN ret;
|
|
WCHAR me[20];
|
|
WCHAR them[20];
|
|
|
|
if (i == my_host)
|
|
{
|
|
UNIV_PRINT_VERB(("Load_timeout: Host %d: Missed too many pings; this host declared offline", i));
|
|
TRACE_VERB("%!FUNC! Host %d: Missed too many pings; this host declared offline", i);
|
|
|
|
/* reset our packet count since we are likely not to be receiving
|
|
packets from others now; this will make us less favored to
|
|
handle duplicate bins later (v1.32B) */
|
|
|
|
lp->pkt_count = 0;
|
|
}
|
|
|
|
lp->host_map &= ~(1<<i);
|
|
|
|
/* Reset the legacy host bit if the host has gone off-line. */
|
|
lp->legacy_hosts &= ~(1<<i);
|
|
|
|
for (j=0; j<sendp->nrules; j++)
|
|
{
|
|
PBIN_STATE bp;
|
|
|
|
bp = &(lp->pg_state[j]);
|
|
UNIV_ASSERT(bp->code == CVY_BINCODE); /* (bbain 8/19/99) */
|
|
|
|
if (i == my_host)
|
|
{
|
|
ULONG k;
|
|
|
|
/* cleanup connections and restore maps to clean state */
|
|
|
|
Load_conn_kill(lp, bp);
|
|
|
|
bp->targ_map = 0;
|
|
bp->all_idle_map = BIN_ALL_ONES;
|
|
bp->cmap = 0; /* v2.1 */
|
|
bp->compatible = TRUE; /* v1.03 */
|
|
|
|
for (k=0; k<CVY_MAX_HOSTS; k++)
|
|
{
|
|
bp->new_map[k] = 0;
|
|
bp->cur_map[k] = 0;
|
|
bp->chk_map[k] = 0;
|
|
bp->idle_map[k] = BIN_ALL_ONES;
|
|
|
|
if (k != i)
|
|
bp->load_amt[k] = 0;
|
|
}
|
|
|
|
bp->snd_bins =
|
|
bp->rcv_bins =
|
|
bp->rdy_bins = 0;
|
|
bp->idle_bins = BIN_ALL_ONES;
|
|
|
|
/* Re-initialize the performance counters. */
|
|
bp->packets_accepted = 0;
|
|
bp->packets_dropped = 0;
|
|
bp->bytes_accepted = 0;
|
|
bp->bytes_dropped = 0;
|
|
|
|
/* compute initial new map for convergence as only host in cluster
|
|
(v 1.3.2B) */
|
|
|
|
ret = Bin_converge(lp, bp, lp->my_host_id);
|
|
if (!ret)
|
|
{
|
|
UNIV_PRINT_CRIT(("Load_timeout: Initial convergence inconsistent"));
|
|
TRACE_CRIT("%!FUNC! Initial convergence inconsistent");
|
|
LOG_MSG(MSG_ERROR_INTERNAL, MSG_NONE);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ret = Bin_host_update(lp, bp, my_host, TRUE, TRUE,
|
|
i, 0, 0, BIN_ALL_ONES, 0, 0, 0);
|
|
}
|
|
}
|
|
|
|
lp->nmissed_pings[i] = 0;
|
|
|
|
/* If a host has dropped out of the cluster, then log an event. However, we don't
|
|
log an event when we drop out because the only way for us to drop out of our own
|
|
cluster is if we are stopping anyway, or if we have lost network connectivity.
|
|
Logging such events may be misleading, so we won't bother. */
|
|
if (i != my_host) {
|
|
Univ_ulong_to_str (my_host+1, me, 10);
|
|
Univ_ulong_to_str (i+1, them, 10);
|
|
|
|
/* Tracking convergence - Starting convergence because a member has fallen out of the cluster. */
|
|
LOG_MSGS(MSG_INFO_CONVERGING_MEMBER_LOST, me, them);
|
|
TRACE_CONVERGENCE("%!FUNC! Initiating convergence on host %d. Reason: Host %d is leaving the cluster.", my_host+1, i+1);
|
|
|
|
// If enabled, fire wmi event indicating start of convergence
|
|
if (NlbWmiEvents[ConvergingEvent].Enable)
|
|
{
|
|
NlbWmi_Fire_ConvergingEvent(ctxtp,
|
|
NLB_EVENT_CONVERGING_MEMBER_LOST,
|
|
NLB_EVENT_NO_DIP_STRING,
|
|
i+1);
|
|
}
|
|
else
|
|
{
|
|
TRACE_VERB("%!FUNC! NOT Generating NLB_EVENT_CONVERGING_MEMBER_LOST 'cos ConvergingEvent generation disabled");
|
|
}
|
|
}
|
|
|
|
/* Tracking convergence - Starting convergence. */
|
|
Load_convergence_start(lp);
|
|
}
|
|
}
|
|
|
|
/* otherwise reset missed ping count */
|
|
|
|
else
|
|
lp->nmissed_pings[i] = 0;
|
|
|
|
missed_pings >>= 1;
|
|
}
|
|
|
|
/* handle convergence */
|
|
|
|
if (sendp->state != HST_NORMAL)
|
|
{
|
|
/* check whether we have been consistent and have received our own pings
|
|
for a sufficient period to move to a stable state and announce it to
|
|
other hosts */
|
|
|
|
if (sendp->state == HST_CVG)
|
|
{
|
|
if (lp->consistent && ((lp->host_map & (1 << my_host)) != 0))
|
|
{
|
|
lp->my_stable_ct++;
|
|
if (lp->my_stable_ct >= lp->min_stable_ct)
|
|
{
|
|
sendp->state = HST_STABLE;
|
|
lp->stable_map |= (1 << my_host);
|
|
}
|
|
}
|
|
else
|
|
lp->my_stable_ct = lp->all_stable_ct = 0; /* wlb B3RC1 */
|
|
}
|
|
|
|
/* otherwise, see if we are the master and everybody's been stable for
|
|
a sufficient period for us to terminate convergence */
|
|
|
|
else if (sendp->state == HST_STABLE &&
|
|
my_host == sendp->master_id &&
|
|
lp->stable_map == lp->host_map)
|
|
{
|
|
lp->all_stable_ct++;
|
|
if (lp->all_stable_ct >= lp->min_stable_ct)
|
|
{
|
|
sendp->state = HST_NORMAL;
|
|
|
|
/* Note the time of the last completed convergence. */
|
|
lp->last_convergence = lp->clock_sec;
|
|
|
|
/* Notify our BDA team that this cluster is consistently configured.
|
|
If we are not part of BDA team, this call is essentially a no-op. */
|
|
Load_teaming_consistency_notify(&ctxtp->bda_teaming, TRUE);
|
|
|
|
/* Reset the bad teaming configuration detected flag if we are converged. */
|
|
lp->bad_team_config = FALSE;
|
|
|
|
lp->dup_hosts = FALSE;
|
|
lp->dup_sspri = FALSE;
|
|
lp->bad_map = FALSE;
|
|
lp->overlap_maps = FALSE;
|
|
lp->err_rcving_bins = FALSE;
|
|
lp->err_orphans = FALSE;
|
|
lp->bad_num_rules = FALSE;
|
|
lp->pkt_count = 0; /* v1.32B */
|
|
|
|
for (i=0; i<sendp->nrules; i++)
|
|
{
|
|
PBIN_STATE bp;
|
|
BOOLEAN ret;
|
|
|
|
bp = &(lp->pg_state[i]);
|
|
|
|
bp->compatible = TRUE; /* 1.03 */
|
|
|
|
/* explicitly converge to new map in case we're the only host (v2.06) */
|
|
|
|
ret = Bin_converge(lp, bp, lp->my_host_id);
|
|
if (!ret)
|
|
{
|
|
UNIV_PRINT_CRIT(("Load_timeout: Final convergence inconsistent"));
|
|
TRACE_CRIT("%!FUNC! Final convergence inconsistent");
|
|
LOG_MSG(MSG_ERROR_INTERNAL, MSG_NONE);
|
|
}
|
|
|
|
Bin_converge_commit(lp, bp, my_host);
|
|
|
|
UNIV_PRINT_VERB(("Load_timeout: Host %d pg %d: new cur map %x idle %x all %x",
|
|
my_host, i, bp->cur_map[my_host], bp->idle_bins,
|
|
bp->all_idle_map));
|
|
}
|
|
|
|
UNIV_PRINT_VERB(("Load_timeout: Host %d: converged as master", my_host));
|
|
TRACE_CONVERGENCE("%!FUNC! Host %d: converged as master", my_host);
|
|
/* log convergence completion if host map changed (bbain RTM RC1 6/23/99) */
|
|
Load_hosts_query (lp, TRUE, & map);
|
|
lp->last_hmap = lp->host_map;
|
|
|
|
if (lp->legacy_hosts) {
|
|
/* If a Win2k or NT4.0 host is attempting to join the cluster, warn the user that there are potential
|
|
limitations of mixed clusters, such as no virtual cluster support, no IGMP, no BDA, no VPN session
|
|
support and others. For some of these, the cluster will not be allowed to converge, while for some
|
|
it will, so we'll just warn the user that they should check the documentation for limitations. */
|
|
UNIV_PRINT_INFO(("Load_timeout: NT4.0/Win2k host(s) detected: Be aware of the limitations of operating a mixed cluster."));
|
|
TRACE_INFO("%!FUNC! NT4.0/Win2k host(s) detected: Be aware of the limitations of operating a mixed cluster.");
|
|
|
|
LOG_MSG(MSG_WARN_MIXED_CLUSTER, MSG_NONE);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* 1.03: update ping message */
|
|
|
|
for (i=0; i<sendp->nrules; i++)
|
|
{
|
|
PBIN_STATE bp;
|
|
|
|
bp = &(lp->pg_state[i]);
|
|
|
|
/* export current port group state to ping message */
|
|
|
|
sendp->cur_map[i] = bp->cmap; /* v2.1 */
|
|
sendp->new_map[i] = bp->new_map[my_host];
|
|
sendp->idle_map[i] = bp->idle_bins;
|
|
sendp->rdy_bins[i] = bp->rdy_bins;
|
|
sendp->load_amt[i] = bp->load_amt[my_host];
|
|
|
|
// NOTE: The following line of code was removed when it was discovered that it
|
|
// routinely produces a Wake On LAN pattern in the heartbeat that causes BroadCom
|
|
// NICs to panic. Although this is NOT an NLB issue, but rather a firmware issue
|
|
// in BroadCom NICs, it was decided to remove the information from the heartbeat
|
|
// to alleviate the problem for customers with BroadCom NICs upgrading to .NET.
|
|
// This array is UNUSED by NLB, so there is no harm in not filling it in; it was
|
|
// added a long time ago for debugging purposes as part of the now-defunct FIN-
|
|
// counting fix that was part of Win2k SP1.
|
|
//
|
|
// For future reference, should we need to use this space in the heartbeat at some
|
|
// future point in time, it appears that we will need to be careful to avoid potential
|
|
// WOL patterns in our heartbeats where we can avoid it. A WOL pattern is:
|
|
//
|
|
// 6 bytes of 0xFF, followed by 16 idential instances of a "MAC address" that can
|
|
// appear ANYWHERE in ANY frame type, including our very own NLB heartbeats. E.g.:
|
|
//
|
|
// FF FF FF FF FF FF 01 02 03 04 05 06 01 02 03 04 05 06 01 02 03 04 05 06
|
|
// 01 02 03 04 05 06 01 02 03 04 05 06 01 02 03 04 05 06 01 02 03 04 05 06
|
|
// 01 02 03 04 05 06 01 02 03 04 05 06 01 02 03 04 05 06 01 02 03 04 05 06
|
|
// 01 02 03 04 05 06 01 02 03 04 05 06 01 02 03 04 05 06 01 02 03 04 05 06
|
|
// 01 02 03 04 05 06
|
|
//
|
|
// The MAC address need not be valid, however. In NLB heartbeats, the "MAC address"
|
|
// in the mistaken WOL pattern is "00 00 00 00 00 00". NLB routinely fills heartbeats
|
|
// with FF and 00 bytes, but it seems that by "luck" no other place in the heartbeat
|
|
// seems this vulnerable. For instance, in the load_amt array, each entry has a
|
|
// maximum value of 100 (decimal), so there is no possibility of generating the initial
|
|
// 6 bytes of FF to start the WOL pattern. All of the "map" arrays seem to be saved
|
|
// by two strokes of fortune; (i) little endian and (ii) the bin distribution algorithm.
|
|
//
|
|
// (i) Since we don't use the 4 most significant bits of the ULONGLONGs used to store
|
|
// each map, the most significant bit is NEVER FF. Because Intel is little endian, the
|
|
// most significant byte appears last. For example:
|
|
//
|
|
// 0F FF FF FF FF FF FF FF appears in the packet as FF FF FF FF FF FF 0F
|
|
//
|
|
// This breaks the FF sequence in many scenarios.
|
|
//
|
|
// (ii) The way the bin distribution algorithm distributes buckets to hosts seems to
|
|
// discourage other possibilities. For instance, a current map of:
|
|
//
|
|
// 00 FF FF FF FF FF FF 00
|
|
//
|
|
// just isn't likely. However, it IS STILL POSSIBLE! So, it is important to note that:
|
|
//
|
|
// REMOVING THIS LINE OF CODE DOES NOT, IN ANY WAY, GUARANTEE THAT AN NLB HEARTBEAT
|
|
// CANNOT STILL CONTAIN A VALID WAKE ON LAN PATTERN SOMEWHERE ELSE IN THE FRAME!!!
|
|
|
|
// sendp->pg_rsvd1[i] = (ULONG)bp->all_idle_map;
|
|
}
|
|
|
|
sendp->pkt_count = lp->pkt_count; /* 1.32B */
|
|
|
|
/* Add configuration information for teaming at each timeout. */
|
|
Load_teaming_code_create(&lp->send_msg.teaming, &ctxtp->bda_teaming);
|
|
|
|
/* request fast timeout if converging */
|
|
|
|
if (new_timeout != NULL) /* 1.03 */
|
|
{
|
|
if (sendp->state != HST_NORMAL)
|
|
* new_timeout = lp->cur_timeout = lp->def_timeout / 2;
|
|
else
|
|
* new_timeout = lp->cur_timeout = lp->def_timeout;
|
|
}
|
|
|
|
if (pnconn != NULL) /* v2.1 */
|
|
* pnconn = lp->nconn;
|
|
|
|
fRet = (sendp->state != HST_NORMAL);
|
|
|
|
LOCK_EXIT(&(lp->lock), irql);
|
|
|
|
return fRet;
|
|
} /* end Load_timeout */
|
|
|
|
|
|
PBIN_STATE Load_pg_lookup(
|
|
PLOAD_CTXT lp,
|
|
ULONG svr_ipaddr,
|
|
ULONG svr_port,
|
|
BOOLEAN is_tcp)
|
|
{
|
|
PCVY_RULE rp; /* ptr. to rules array */
|
|
PBIN_STATE bp; /* ptr. to bin state */
|
|
ULONG i;
|
|
ULONG nurules; /* # user defined rules */
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE); /* (bbain 8/19/99) */
|
|
|
|
rp = (* (lp->params)).port_rules;
|
|
nurules = (* (lp->params)).num_rules;
|
|
|
|
/* check for invalid port value (bbain RC1 6/14/99) */
|
|
|
|
UNIV_ASSERT(svr_port <= CVY_MAX_PORT);
|
|
|
|
/* find server port rule */
|
|
|
|
for (i=0; i<nurules; i++)
|
|
{
|
|
/* For virtual clusters: If the server IP address matches the VIP for the port rule,
|
|
or if the VIP for the port rule is "ALL VIPs", and if the port lies in the range
|
|
for this rule, and if the protocol matches, this is the rule. Notice that this
|
|
give priority to rules for specific VIPs over those for "ALL VIPs", which means
|
|
that this code RELIES on the port rules being sorted by VIP/port where the "ALL
|
|
VIP" ports rules are at the end of the port rule list. */
|
|
if ((svr_ipaddr == rp->virtual_ip_addr || CVY_ALL_VIP_NUMERIC_VALUE == rp->virtual_ip_addr) &&
|
|
(svr_port >= rp->start_port && svr_port <= rp->end_port) &&
|
|
((is_tcp && rp->protocol != CVY_UDP) || (!is_tcp && rp->protocol != CVY_TCP)))
|
|
break;
|
|
else
|
|
rp++;
|
|
}
|
|
|
|
/* use default rule if port not found or rule is invalid */
|
|
|
|
bp = &(lp->pg_state[i]);
|
|
UNIV_ASSERT(bp->code == CVY_BINCODE); /* (bbain 8/19/99) */
|
|
|
|
return bp;
|
|
} /* end Load_pg_lookup */
|
|
|
|
/*
|
|
* Function: Load_find_dscr
|
|
* Description: This function takes a load pointer, hash value and connection
|
|
* parameters and searches all possible locations looking for a
|
|
* matching connection descriptor. If it finds ones, it returns
|
|
* a pointer to the descriptor (CONN_ENTRY); otherwise, it returns
|
|
* NULL to indicate that no matching descriptor was found.
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* index - the connection queue index for this packet
|
|
* svr_ipaddr - the server IP address in network byte order
|
|
* svr_port - the server port number in host byte order
|
|
* client_ipaddr - the client IP address in network byte order
|
|
* client_port - the client port number in host byte order
|
|
* protocol - the connection protocol
|
|
* Returns: PCONN_ENTRY - a pointer to the descriptor, or NULL if not found
|
|
* Author: shouse, 10.4.01
|
|
* Notes:
|
|
*/
|
|
PCONN_ENTRY Load_find_dscr (
|
|
PLOAD_CTXT lp,
|
|
ULONG index,
|
|
ULONG svr_ipaddr,
|
|
ULONG svr_port,
|
|
ULONG client_ipaddr,
|
|
ULONG client_port,
|
|
USHORT protocol)
|
|
{
|
|
BOOLEAN match = FALSE; /* TRUE => we have a record of this connection. */
|
|
PBIN_STATE bp; /* Pointer to bin state. */
|
|
PCONN_ENTRY ep; /* Pointer to connection entry. */
|
|
PCONN_DESCR dp; /* Pointer to connection descriptor. */
|
|
QUEUE * qp; /* Pointer to connection queue. */
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
/* Get a pointer to the connection entry for this hash ID. */
|
|
ep = &(lp->hashed_conn[index]);
|
|
|
|
UNIV_ASSERT(ep->code == CVY_ENTRCODE);
|
|
|
|
/* Get a pointer to the conneciton queue. */
|
|
qp = &(lp->connq[index]);
|
|
|
|
/* Look in the hashed connection table first. */
|
|
if (CVY_CONN_MATCH(ep, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol))
|
|
{
|
|
/* Note that we found a match for this tuple. */
|
|
match = TRUE;
|
|
}
|
|
else
|
|
{
|
|
/* Look through the descriptor queue. */
|
|
for (dp = (PCONN_DESCR)Queue_front(qp); dp != NULL; dp = (PCONN_DESCR)Queue_next(qp, &(dp->link)))
|
|
{
|
|
if (CVY_CONN_MATCH(&(dp->entry), svr_ipaddr, svr_port, client_ipaddr, client_port, protocol))
|
|
{
|
|
/* Note that we found a match for this tuple. */
|
|
match = TRUE;
|
|
|
|
UNIV_ASSERT (dp->code == CVY_DESCCODE);
|
|
|
|
/* Get a pointer to the connection entry. */
|
|
ep = &(dp->entry);
|
|
|
|
UNIV_ASSERT (ep->code == CVY_ENTRCODE);
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* If we found a match, return it, otherwise return NULL. */
|
|
if (match)
|
|
return ep;
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Function: Load_note_conn_up
|
|
* Description: This function adjusts the appropriate connection counters
|
|
* for an up-coming connection.
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* bp - a pointer to the port rule on which the connection was established
|
|
* bin - the bin to which the connection maps (Map % 60)
|
|
* Returns: Nothing.
|
|
* Author: shouse, 10.4.01
|
|
* Notes:
|
|
*/
|
|
VOID Load_note_conn_up (PLOAD_CTXT lp, PBIN_STATE bp, ULONG bin)
|
|
{
|
|
/* Increment the number of connections. */
|
|
lp->nconn++;
|
|
bp->tconn++;
|
|
bp->nconn[bin]++;
|
|
|
|
/* Mark bin not idle if necessary. */
|
|
if (bp->nconn[bin] == 1) bp->idle_bins &= ~(((MAP_T) 1) << bin);
|
|
}
|
|
|
|
/*
|
|
* Function: Load_note_conn_down
|
|
* Description: This function adjusts the appropriate connection counters
|
|
* for an down-going connection.
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* bp - a pointer to the port rule on which the connection resides
|
|
* bin - the bin to which the connection maps (Map % 60)
|
|
* Returns: Nothing.
|
|
* Author: shouse, 10.4.01
|
|
* Notes:
|
|
*/
|
|
VOID Load_note_conn_down (PLOAD_CTXT lp, PBIN_STATE bp, ULONG bin)
|
|
{
|
|
UNIV_ASSERT(bp->nconn[bin] > 0 && bp->tconn > 0 && lp->nconn > 0);
|
|
|
|
/* Update the number of connections on the entire load module. */
|
|
if (lp->nconn <= 0)
|
|
lp->nconn = 0;
|
|
else
|
|
lp->nconn--;
|
|
|
|
/* Update the number of connections on this bin and port rule. */
|
|
if (bp->nconn[bin] <= 0)
|
|
bp->nconn[bin] = 0;
|
|
else
|
|
bp->nconn[bin]--;
|
|
|
|
/* Update the total number of connections on this port rule. */
|
|
if (bp->tconn <= 0)
|
|
bp->tconn = 0;
|
|
else
|
|
bp->tconn--;
|
|
|
|
/* If this was the last connection on this bin, update the idle map. */
|
|
if (bp->nconn[bin] == 0) bp->idle_bins |= (((MAP_T) 1) << bin);
|
|
}
|
|
|
|
/*
|
|
* Function: Load_init_dscr
|
|
* Description: This function initializes a NEWLY ALLOCATED descriptor.
|
|
* It is only necessary to perform this initialization ONCE.
|
|
* As descriptors are freed for re-use, use Load_reset_dscr
|
|
* to "re-initialize" them.
|
|
* Parameters: lp - a pointer to the load context on which this descriptor lives
|
|
* ep - a pointer to a connection descriptor
|
|
* alloc - whether or not this descriptor was dynamically allocated
|
|
* Returns: Nothing.
|
|
* Author: shouse, 10.4.01
|
|
* Notes:
|
|
*/
|
|
VOID Load_init_dscr (PLOAD_CTXT lp, PCONN_ENTRY ep, BOOLEAN alloc)
|
|
{
|
|
/* Set the "magic number". */
|
|
ep->code = CVY_ENTRCODE;
|
|
|
|
#if defined (NLB_TCP_NOTIFICATION)
|
|
/* Save a pointer to this load module. */
|
|
ep->load = lp;
|
|
#endif
|
|
|
|
/* Initialize the hashing results. */
|
|
ep->index = 0;
|
|
ep->bin = 0;
|
|
|
|
/* Re-set the flags register. */
|
|
ep->flags = 0;
|
|
|
|
/* Is this descriptor in the static hash array, or allocated? */
|
|
if (alloc)
|
|
ep->flags |= NLB_CONN_ENTRY_FLAGS_ALLOCATED;
|
|
|
|
/* Initialize some other descriptor state. */
|
|
ep->timeout = 0;
|
|
ep->ref_count = 0;
|
|
|
|
/* Clear the descriptor. */
|
|
CVY_CONN_CLEAR(ep);
|
|
|
|
/* Initilize the links. */
|
|
Link_init(&(ep->blink));
|
|
Link_init(&(ep->rlink));
|
|
#if defined (NLB_TCP_NOTIFICATION)
|
|
Link_init(&(ep->glink));
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Function: Load_init_fsb
|
|
* Description: This function initializes a fixed-size block allocated from the
|
|
* fixed-size block pool.
|
|
* Parameters: lp - a pointer to the load context on which the descriptor lives
|
|
* dp - a pointer to a block (connection descriptor)
|
|
* Returns: Nothing.
|
|
* Author: shouse, 4.1.02
|
|
* Notes:
|
|
*/
|
|
VOID Load_init_fsb (PLOAD_CTXT lp, PCONN_DESCR dp)
|
|
{
|
|
/* Set the "magic number". */
|
|
dp->code = CVY_DESCCODE;
|
|
|
|
/* Initialize the connection queue link. */
|
|
Link_init(&(dp->link));
|
|
|
|
/* Initialize the connection entry. */
|
|
Load_init_dscr(lp, &dp->entry, TRUE);
|
|
}
|
|
|
|
/*
|
|
* Function: Load_reset_dscr
|
|
* Description: This function resets a descriptor for re-use. This includes
|
|
* re-initializing the state, setting the bin and queueing the
|
|
* descriptor onto the recovery and port rule queues.
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* bp - a pointer to the port rule on which the connection is established
|
|
* ep - a pointer to the descriptor to be reset
|
|
* index - the connection queue index
|
|
* bin - the bin to which the connection maps
|
|
* references - the number of references to place on the descriptor initially
|
|
* Returns: Nothing.
|
|
* Author: shouse, 10.4.01
|
|
* Notes:
|
|
*/
|
|
VOID Load_reset_dscr (PLOAD_CTXT lp, PBIN_STATE bp, PCONN_ENTRY ep, ULONG index, ULONG bin, SHORT references)
|
|
{
|
|
/* Reset some of the descriptor state to its defaults. */
|
|
ep->ref_count = references;
|
|
ep->timeout = 0;
|
|
|
|
/* Clear all descriptor flags except ALLOCATED. */
|
|
ep->flags &= NLB_CONN_ENTRY_FLAGS_ALLOCATED;
|
|
|
|
/* Store the hashing results in the descriptor. */
|
|
ep->index = (USHORT)index;
|
|
ep->bin = (UCHAR)bin;
|
|
|
|
/* Queue entry into the recovery queue. */
|
|
Queue_enq(&(lp->conn_rcvryq), &(ep->rlink));
|
|
|
|
/* Queue entry into port group queue. */
|
|
Queue_enq(&(bp->connq), &(ep->blink));
|
|
|
|
/* Update the connection counters, etc. */
|
|
Load_note_conn_up(lp, bp, bin);
|
|
}
|
|
|
|
/*
|
|
* Function: Load_put_dscr
|
|
* Description: This function completely releases a descriptor for later
|
|
* use. This includes unlinking from all appropriate queues,
|
|
* decrementing appropriate counters and re-setting some
|
|
* descriptor state. Callers of this function should call
|
|
* CVY_CONN_CLEAR to mark the descriptor as unused.
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* bp - a pointer to the port rule on which the connection was active
|
|
* ep - a pointer to the connection descriptor to release
|
|
* Returns: Nothing.
|
|
* Author: shouse, 10.4.01
|
|
* Notes: Callers MUST call CVY_CONN_CLEAR to mark the descriptor unused!
|
|
* Do NOT access ep after calling this function (it may have been freed)!
|
|
*/
|
|
VOID Load_put_dscr (PLOAD_CTXT lp, PBIN_STATE bp, PCONN_ENTRY ep)
|
|
{
|
|
PCONN_DESCR dp;
|
|
|
|
/* Unlink from the bin/dirty and recovery/timeout queues. */
|
|
Link_unlink(&(ep->rlink));
|
|
Link_unlink(&(ep->blink));
|
|
|
|
/* If the connection is NOT dirty, then we have to update
|
|
the connection counts, etc. If it is dirty then the
|
|
relevant counters have already been reset. */
|
|
if (!(ep->flags & NLB_CONN_ENTRY_FLAGS_DIRTY))
|
|
{
|
|
Load_note_conn_down(lp, bp, (ULONG)ep->bin);
|
|
}
|
|
else
|
|
{
|
|
/* If we're destroying a dirty connection, update the dirty counters. */
|
|
lp->dirty_bin[ep->bin]--;
|
|
lp->num_dirty--;
|
|
|
|
/* If this was the last dirty connection, turn off the cleanup waiting flag. */
|
|
if (lp->num_dirty == 0)
|
|
lp->cln_waiting = FALSE;
|
|
}
|
|
|
|
/* If this is an allocated (and therefore queued) descriptor,
|
|
there is some additional cleanup to do. */
|
|
if (ep->flags & NLB_CONN_ENTRY_FLAGS_ALLOCATED)
|
|
{
|
|
/* Get a pointer to the parent structure. */
|
|
dp = STRUCT_PTR(ep, CONN_DESCR, entry);
|
|
|
|
UNIV_ASSERT(dp->code == CVY_DESCCODE);
|
|
|
|
/* Unlink from the connection queue and put the descriptor back on the free
|
|
queue. We MUST do this before calling NdisFreeToBlockPool, as the pool
|
|
implementation will stomp on link because we allow it to re-use that piece
|
|
of our memory to link free blocks. Since this operation may also result
|
|
the memory being freed (actually, pages will NEVER be freed immediately,
|
|
but don't tempt fate), do NOT touch the descriptor once we've freed it
|
|
back to the pool. CALLERS OF THIS FUNCTION SHOULD TAKE THE SAME PRECAUTION
|
|
AND NOT TOUCH THE DESCRIPTOR AFTER CALLING THIS FUNCTION. */
|
|
Link_unlink(&(dp->link));
|
|
|
|
/* Free the descriptor back to the fixed-size block pool. */
|
|
NdisFreeToBlockPool((PUCHAR)dp);
|
|
|
|
/* Decrement the number of outstanding descriptors from the pool. */
|
|
lp->num_dscr_out--;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Function: Load_get_dscr
|
|
* Description: This function finds a descriptor to be used for a new connection
|
|
* by any available means; this includes an available free descriptor,
|
|
* allocating new descriptors if necessary, or as a last resort,
|
|
* cannibalizing an existing, in-use descriptor. If it succeeds, it
|
|
* returns a pointer to the descriptor; otherwise, it returns NULL to
|
|
* indicate the failure to locate an available descriptor. Callers of
|
|
* this function should call CVY_CONN_SET upon success to mark the
|
|
* descriptor as used and fill in the connection parameters.
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* bp - a pointer to the port rule on which the connection is being established
|
|
* index - the connection queue index
|
|
* bin - the bin to which the connection belongs
|
|
* Returns: PCONN_ENTRY - a pointer to the new descriptor, or NULL if failed
|
|
* Author: shouse, 10.4.01
|
|
* Notes: Callers of this function MUST call CVY_CONN_SET to mark the descriptor
|
|
* active and to set the connection parameters (IPs, ports, protocol).
|
|
*/
|
|
PCONN_ENTRY Load_get_dscr (PLOAD_CTXT lp, PBIN_STATE bp, ULONG index, ULONG bin)
|
|
{
|
|
PCONN_DESCR dp = NULL;
|
|
PCONN_ENTRY ep = NULL;
|
|
QUEUE * qp;
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD(lp, MAIN_CTXT, load);
|
|
|
|
/* Get a pointer to the connection entry for this hash ID. */
|
|
ep = &(lp->hashed_conn[index]);
|
|
|
|
/* Get a pointer to the conneciton queue. */
|
|
qp = &(lp->connq[index]);
|
|
|
|
/* If hash entry table is not available, setup and enqueue a new entry. */
|
|
if (CVY_CONN_IN_USE(ep)) {
|
|
/* Get a pointer to a free descriptor. */
|
|
if ((lp->free_dscr_pool != NULL) && (lp->num_dscr_out < lp->max_dscr_out))
|
|
{
|
|
/* Allocate a descriptor from the fixed-size block pool. */
|
|
dp = (PCONN_DESCR)NdisAllocateFromBlockPool(lp->free_dscr_pool);
|
|
|
|
if (dp == NULL) {
|
|
/* Allocation failed, log a message and bail out. */
|
|
if (!(lp->alloc_failed)) {
|
|
TRACE_CRIT("%!FUNC! Error allocating connection descriptors");
|
|
LOG_MSG(MSG_ERROR_MEMORY, MSG_NONE);
|
|
lp->alloc_failed = TRUE;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/* Initialize the fixed-size block (connection descriptor). */
|
|
Load_init_fsb(lp, dp);
|
|
|
|
UNIV_ASSERT(dp->code == CVY_DESCCODE);
|
|
|
|
/* Increment the count of outstading descriptors from the fixed-size block pool. */
|
|
lp->num_dscr_out++;
|
|
|
|
/* There was a free descriptor, so setup the connection entry pointer. */
|
|
ep = &(dp->entry);
|
|
|
|
UNIV_ASSERT(ep->code == CVY_ENTRCODE);
|
|
}
|
|
#if defined (NLB_TCP_NOTIFICATION)
|
|
/* If notification is turned ON, we do NOT cannibalize descriptors. */
|
|
else if (!NLB_NOTIFICATIONS_ON())
|
|
#else
|
|
else
|
|
#endif
|
|
{
|
|
|
|
/* If we have reached the allocation limit, start taking connection descriptors from
|
|
the timeout or recovery queues since they are likely to be stale and very old. */
|
|
PBIN_STATE rbp;
|
|
LINK * rlp;
|
|
|
|
/* We were unable to allocation more connection descriptors and we will
|
|
be forced to cannibalize a connection descriptor already in use. Warn
|
|
the administrator that they should consider allowing NLB to allocate
|
|
more connection descriptors. */
|
|
if (!(lp->alloc_inhibited)) {
|
|
TRACE_CRIT("%!FUNC! All descriptors have been allocated and are in use");
|
|
LOG_MSG(MSG_WARN_DESCRIPTORS, CVY_NAME_MAX_DSCR_ALLOCS);
|
|
lp->alloc_inhibited = TRUE;
|
|
}
|
|
|
|
TRACE_INFO("%!FUNC! Attempting to take a connection descriptor from the TCP timeout queue");
|
|
|
|
/* Dequeue a descriptor from the TCP timeout queue. Cannibalize this queue
|
|
first because (i) its the most likely to have an available descriptor,
|
|
(ii) it should be the least disruptive because the connection has been
|
|
terminated AND the timeout for TCP is very short. */
|
|
rlp = (LINK *)Queue_deq(&(lp->tcp_expiredq));
|
|
|
|
if (rlp == NULL) {
|
|
|
|
TRACE_INFO("%!FUNC! Attempting to take a connection descriptor from the IPSec timeout queue");
|
|
|
|
/* Dequeue a descriptor from the IPSec timeout queue. While it is
|
|
true that descriptors on this queue are theoretically closed,
|
|
since IPSec cannot be sure that not upper-level protocols still
|
|
have state at the time a Main Mode SA expires and NLB is notified,
|
|
these connections are non-trivially likely to regenerate, so it
|
|
is necessary to keep the state around for a long time (24 hours
|
|
by default). Therefore, we cannibalize this timeout queue last
|
|
as it is the most likely to be disruptive, aside from the revovery
|
|
queue. */
|
|
rlp = (LINK *)Queue_deq(&(lp->ipsec_expiredq));
|
|
|
|
if (rlp == NULL) {
|
|
|
|
TRACE_INFO("%!FUNC! Attempting to take a connection descriptor from the recovery queue");
|
|
|
|
/* Dequeue a descriptor from the recovery queue. Since these are
|
|
"live" connections, we take descriptors from this queues as a
|
|
last resort. */
|
|
rlp = (LINK *)Queue_deq(&(lp->conn_rcvryq));
|
|
|
|
/* No descriptors are available anywhere - this should NEVER happen, but. */
|
|
if (rlp == NULL) return NULL;
|
|
}
|
|
}
|
|
|
|
TRACE_INFO("%!FUNC! Successfull cannibalized a connection descriptor");
|
|
|
|
/* Grab a pointer to the connection entry. */
|
|
ep = STRUCT_PTR(rlp, CONN_ENTRY, rlink);
|
|
|
|
UNIV_ASSERT(ep->code == CVY_ENTRCODE);
|
|
|
|
if (ep->flags & NLB_CONN_ENTRY_FLAGS_ALLOCATED) {
|
|
/* Unlink allocated descriptors from the hash table queue if necessary
|
|
and set dp so that code below will put it back in the right hash queue. */
|
|
dp = STRUCT_PTR(ep, CONN_DESCR, entry);
|
|
|
|
UNIV_ASSERT(dp->code == CVY_DESCCODE);
|
|
|
|
Link_unlink(&(dp->link));
|
|
} else {
|
|
dp = NULL;
|
|
}
|
|
|
|
/* Dirty connections are not counted, so we don't need to update these counters. */
|
|
if (!(ep->flags & NLB_CONN_ENTRY_FLAGS_DIRTY))
|
|
{
|
|
/* Find out which port group we are on so we can clean up its counters. */
|
|
rbp = Load_pg_lookup(lp, ep->svr_ipaddr, ep->svr_port, IS_TCP_PKT(ep->protocol));
|
|
|
|
/* Update the connection counters, etc. to remove all knowledge of this
|
|
"old" connection that we're cannibalizing. */
|
|
Load_note_conn_down(lp, rbp, (ULONG)ep->bin);
|
|
}
|
|
else
|
|
{
|
|
/* If we're cannibalizing a dirty connection, update the dirty counters. */
|
|
lp->dirty_bin[ep->bin]--;
|
|
lp->num_dirty--;
|
|
|
|
/* If this was the last dirty connection, turn off the cleanup waiting flag. */
|
|
if (lp->num_dirty == 0)
|
|
lp->cln_waiting = FALSE;
|
|
}
|
|
|
|
Link_unlink(&(ep->blink));
|
|
|
|
/* Mark the descriptor as unused. */
|
|
CVY_CONN_CLEAR(ep);
|
|
}
|
|
#if defined (NLB_TCP_NOTIFICATION)
|
|
/* There are no free descriptors, and we refuse to cannibalize. */
|
|
else
|
|
{
|
|
/* We were unable to allocation more connection descriptors and we will
|
|
be forced to cannibalize a connection descriptor already in use. Warn
|
|
the administrator that they should consider allowing NLB to allocate
|
|
more connection descriptors. */
|
|
if (!(lp->alloc_inhibited)) {
|
|
TRACE_CRIT("%!FUNC! All descriptors have been allocated and are in use");
|
|
LOG_MSG(MSG_WARN_DESCRIPTORS, CVY_NAME_MAX_DSCR_ALLOCS);
|
|
lp->alloc_inhibited = TRUE;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/* If notification is ON, then we're sure that descriptors here are dynamic,
|
|
and therefore will ALWAYS have to be re-queued. If notification is OFF,
|
|
that depends on whether a potentially cannibalized descriptor was dynamically
|
|
allocated or not. */
|
|
if (NLB_NOTIFICATIONS_ON())
|
|
{
|
|
UNIV_ASSERT(dp != NULL);
|
|
|
|
/* Enqueue descriptor in hash table unless it's already a hash table entry (a recovered
|
|
connection might be in hash table, so make sure we do not end up queueing it). */
|
|
UNIV_ASSERT(dp->code == CVY_DESCCODE);
|
|
|
|
Queue_enq(qp, &(dp->link));
|
|
}
|
|
else
|
|
{
|
|
#endif
|
|
/* Enqueue descriptor in hash table unless it's already a hash table entry (a recovered
|
|
connection might be in hash table, so make sure we do not end up queueing it). */
|
|
if (dp != NULL) {
|
|
UNIV_ASSERT(dp->code == CVY_DESCCODE);
|
|
|
|
Queue_enq(qp, &(dp->link));
|
|
}
|
|
#if defined (NLB_TCP_NOTIFICATION)
|
|
}
|
|
#endif
|
|
|
|
}
|
|
|
|
UNIV_ASSERT(ep->code == CVY_ENTRCODE);
|
|
|
|
/* Reset the descriptor information. */
|
|
Load_reset_dscr(lp, bp, ep, index, bin, 1);
|
|
|
|
return ep;
|
|
}
|
|
|
|
/*
|
|
* Function: Load_timeout_dscr
|
|
* Description: This function moves an active connection descriptor to
|
|
* the timeout state by dequeueing it from the recovery
|
|
* queue, setting the appropriate timeout and moving it to
|
|
* the appropriate timeout queue, where it will remain active
|
|
* for some amount of time (configurable via the registry).
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* bp - a pointer to the port rule on which this connection is active
|
|
* ep - a pointer to the connection descriptor to timeout
|
|
* Returns: Nothing.
|
|
* Author: shouse, 10.4.01
|
|
* Notes:
|
|
*/
|
|
VOID Load_timeout_dscr (PLOAD_CTXT lp, PBIN_STATE bp, PCONN_ENTRY ep)
|
|
{
|
|
/* Virtual descriptors should NEVER get in this function. */
|
|
UNIV_ASSERT(!(ep->flags & NLB_CONN_ENTRY_FLAGS_VIRTUAL));
|
|
|
|
/* Take the descriptor off of the recovery queue and move it to the appropriate
|
|
timeout queue, based on protocol. Each protocol has its own queue to avoid
|
|
the need for a sorted insert function, which is expensive. */
|
|
Link_unlink(&(ep->rlink));
|
|
|
|
/* Set the timeout based on the protocol and add it to the appropriate timeout queue. */
|
|
switch (ep->protocol) {
|
|
case TCPIP_PROTOCOL_TCP:
|
|
case TCPIP_PROTOCOL_PPTP:
|
|
/* If the user has specified a zero timeout, then simply destroy the descriptor. */
|
|
if (!lp->tcp_timeout)
|
|
{
|
|
/* Clear the connection descriptor. */
|
|
CVY_CONN_CLEAR(ep);
|
|
|
|
/* Release the descriptor. */
|
|
Load_put_dscr(lp, bp, ep);
|
|
|
|
break;
|
|
}
|
|
|
|
/* The timeout is the current time, plus the timeout for this particular protocol. */
|
|
ep->timeout = lp->clock_sec + lp->tcp_timeout;
|
|
|
|
Queue_enq(&(lp->tcp_expiredq), &(ep->rlink));
|
|
|
|
#if defined (TRACE_DSCR)
|
|
DbgPrint("Load_timeout_dscr: Moving TCP descriptor %p to the TCP timeout queue: clock=%u, timeout=%d", ep, lp->clock_sec, ep->timeout);
|
|
#endif
|
|
|
|
break;
|
|
case TCPIP_PROTOCOL_IPSEC1:
|
|
/* If the user has specified a zero timeout, then simply destroy the descriptor. */
|
|
if (!lp->ipsec_timeout)
|
|
{
|
|
/* Clear the connection descriptor. */
|
|
CVY_CONN_CLEAR(ep);
|
|
|
|
/* Release the descriptor. */
|
|
Load_put_dscr(lp, bp, ep);
|
|
|
|
break;
|
|
}
|
|
|
|
/* The timeout is the current time, plus the timeout for this particular protocol. */
|
|
ep->timeout = lp->clock_sec + lp->ipsec_timeout;
|
|
|
|
Queue_enq(&(lp->ipsec_expiredq), &(ep->rlink));
|
|
|
|
#if defined (TRACE_DSCR)
|
|
DbgPring("Load_timeout_dscr: Moving IPSec descriptor %p to the IPSec timeout queue: clock=%u, timeout=%u", ep, lp->clock_sec, ep->timeout);
|
|
#endif
|
|
|
|
break;
|
|
default:
|
|
|
|
#if defined (TRACE_DSCR)
|
|
DbgPrint("Load_timeout_dscr: Invalid descriptor protocol (%u). Removing descriptor %p immediately.", ep->protocol, ep);
|
|
#endif
|
|
|
|
/* Although this should never happen, clean up immediately
|
|
if the protocol in the descriptor is invalid. Note that
|
|
virtual descriptors, such as GRE, should NEVER be timed
|
|
out, and therefore should not enter this function. */
|
|
UNIV_ASSERT(0);
|
|
|
|
/* Clear the connection descriptor. */
|
|
CVY_CONN_CLEAR(ep);
|
|
|
|
/* Release the descriptor. */
|
|
Load_put_dscr(lp, bp, ep);
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Function: Load_flush_dscr
|
|
* Description: This function will flush out any descriptor that may be lying around
|
|
* for the given IP tuple. This may happen as a result of a RST being
|
|
* sent on another adapter, which NLB did not see and therefore did not
|
|
* properly destroy the state for. This function is called on all incoming
|
|
* SYN packets to remove this stale state. For PPTP/IPSec connections, it is
|
|
* also necessary to update any matching virtual descriptor found.
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* bp - a pointer to the port rule on which this connection is active
|
|
* index - the connection queue index
|
|
* svr_ipaddr - the server IP address in network byte order
|
|
* svr_port - the server port in host byte order
|
|
* client_ipaddr - the client IP address in network byte order
|
|
* client_port - the client port in host byte order
|
|
* protocol - the protocol of this connection
|
|
* Returns: Nothing.
|
|
* Author: shouse, 1.7.02
|
|
* Notes:
|
|
*/
|
|
VOID Load_flush_dscr (
|
|
PLOAD_CTXT lp,
|
|
PBIN_STATE bp,
|
|
ULONG index,
|
|
ULONG svr_ipaddr,
|
|
ULONG svr_port,
|
|
ULONG client_ipaddr,
|
|
ULONG client_port,
|
|
USHORT protocol)
|
|
{
|
|
PCONN_ENTRY ep; /* Pointer to connection entry. */
|
|
ULONG vindex;
|
|
ULONG hash;
|
|
SHORT references = 0;
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
/* Look for an existing matching connection descriptor. */
|
|
ep = Load_find_dscr(lp, index, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol);
|
|
|
|
/* If not match was found, or the descriptor is already dirty, there's nothing to do. */
|
|
if ((ep != NULL) && !(ep->flags & NLB_CONN_ENTRY_FLAGS_DIRTY)) {
|
|
|
|
UNIV_ASSERT(ep->ref_count >= 0);
|
|
|
|
/* Note the number of references on this descriptor. */
|
|
references = ep->ref_count;
|
|
|
|
/* Mark the descriptor dirty and either free it or move it to
|
|
the dirty descriptor queue for subsequent cleanup. */
|
|
Load_soil_dscr(lp, bp, ep);
|
|
|
|
/* Update the connection counters on the port rule and load module.
|
|
Dirty descriptors update the connection counts when marked dirty,
|
|
not when they are ultimately destroyed. */
|
|
Load_note_conn_down(lp, bp, (ULONG)ep->bin);
|
|
|
|
if (protocol == TCPIP_PROTOCOL_PPTP) {
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
vindex = hash % CVY_MAX_CHASH;
|
|
|
|
/* Look for an existing matching connection descriptor. */
|
|
ep = Load_find_dscr(lp, vindex, svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT, TCPIP_PROTOCOL_GRE);
|
|
|
|
/* If not match was found, or the descriptor is already dirty, there's nothing to do. */
|
|
if ((ep != NULL) && !(ep->flags & NLB_CONN_ENTRY_FLAGS_DIRTY)) {
|
|
|
|
UNIV_ASSERT(ep->flags & NLB_CONN_ENTRY_FLAGS_VIRTUAL);
|
|
UNIV_ASSERT(ep->ref_count > 0);
|
|
|
|
/* If the descriptor has more references than the "parent"
|
|
descriptor, then we don't want to mark it dirty, or we'll
|
|
affect the traffic of other connections sharing this
|
|
descriptor. Otherwise, if we account for all references
|
|
on the virtual descriptor, mark it dirty. */
|
|
if (ep->ref_count <= references) {
|
|
/* Mark the descriptor dirty and either free it or move it to
|
|
the dirty descriptor queue for subsequent cleanup. */
|
|
Load_soil_dscr(lp, bp, ep);
|
|
|
|
/* Update the connection counters on the port rule and load module.
|
|
Dirty descriptors update the connection counts when marked dirty,
|
|
not when they are ultimately destroyed. */
|
|
Load_note_conn_down(lp, bp, (ULONG)ep->bin);
|
|
}
|
|
}
|
|
}
|
|
else if (protocol == TCPIP_PROTOCOL_IPSEC1) {
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, IPSEC_CTRL_PORT, client_ipaddr, IPSEC_CTRL_PORT);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
vindex = hash % CVY_MAX_CHASH;
|
|
|
|
/* Look for an existing matching connection descriptor. */
|
|
ep = Load_find_dscr(lp, vindex, svr_ipaddr, IPSEC_CTRL_PORT, client_ipaddr, IPSEC_CTRL_PORT, TCPIP_PROTOCOL_IPSEC_UDP);
|
|
|
|
/* If not match was found, or the descriptor is already dirty, there's nothing to do. */
|
|
if ((ep != NULL) && !(ep->flags & NLB_CONN_ENTRY_FLAGS_DIRTY)) {
|
|
|
|
UNIV_ASSERT(ep->flags & NLB_CONN_ENTRY_FLAGS_VIRTUAL);
|
|
UNIV_ASSERT(ep->ref_count > 0);
|
|
|
|
/* If the descriptor has more references than the "parent"
|
|
descriptor, then we don't want to mark it dirty, or we'll
|
|
affect the traffic of other connections sharing this
|
|
descriptor. Otherwise, if we account for all references
|
|
on the virtual descriptor, mark it dirty. */
|
|
if (ep->ref_count <= references) {
|
|
/* Mark the descriptor dirty and either free it or move it to
|
|
the dirty descriptor queue for subsequent cleanup. */
|
|
Load_soil_dscr(lp, bp, ep);
|
|
|
|
/* Update the connection counters on the port rule and load module.
|
|
Dirty descriptors update the connection counts when marked dirty,
|
|
not when they are ultimately destroyed. */
|
|
Load_note_conn_down(lp, bp, (ULONG)ep->bin);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* If at least one descriptor has been marked dirty, restart the cleanup timer. */
|
|
if (lp->cln_waiting)
|
|
lp->cur_time = 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Function: Load_create_dscr
|
|
* Description: This function creates and sets up a new descriptor for a given connection.
|
|
* The input connection entry pointer is the "existing" descriptor found by
|
|
* the caller, which can be (probably will be) NULL; in that case, a new
|
|
* descriptor needs to be acquired and initialized. If a descriptor already
|
|
* exisits, it is updated or cleansed, depending on its state.
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* bp - a pointer to the port rule on which this connection is active
|
|
* ep - a pointer to the connection descriptor, if one was already found
|
|
* index - the connection queue index
|
|
* bin - the bin to which the connection maps (Map % 60)
|
|
* Returns: PCONN_ENTRY - a pointer to the connection entry
|
|
* Author:
|
|
* Notes:
|
|
*/
|
|
PCONN_ENTRY Load_create_dscr (
|
|
PLOAD_CTXT lp,
|
|
PBIN_STATE bp,
|
|
PCONN_ENTRY ep,
|
|
ULONG index,
|
|
ULONG bin)
|
|
{
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
/* If we don't have a connection match, setup a new connection entry. */
|
|
if (ep == NULL) {
|
|
|
|
/* Get a new descriptor. */
|
|
ep = Load_get_dscr(lp, bp, index, bin);
|
|
|
|
/* If we can't find a descriptor, something is severely wrong - bail out. */
|
|
if (ep == NULL) return NULL;
|
|
|
|
UNIV_ASSERT(ep->code == CVY_ENTRCODE);
|
|
|
|
/* Otherwise, we have a match; clean up conn entry if dirty since we have a
|
|
new connection, although TCP/IP will likely reject it if it has stale state
|
|
from another connection. */
|
|
} else {
|
|
|
|
UNIV_ASSERT(ep->code == CVY_ENTRCODE);
|
|
|
|
if (ep->flags & NLB_CONN_ENTRY_FLAGS_DIRTY) {
|
|
|
|
/* If we're re-using a connection descriptor already
|
|
in use, then we need to pull it off the recovery/
|
|
timeout queue because it might have been previously
|
|
added to the timeout queue and we don't want it
|
|
spontaneously expiring on us. */
|
|
Link_unlink(&(ep->rlink));
|
|
|
|
/* Unlink the descriptor from the dirty queue. */
|
|
Link_unlink(&(ep->blink));
|
|
|
|
/* If we cleansing a dirty connection, update the dirty counters. */
|
|
lp->dirty_bin[ep->bin]--;
|
|
lp->num_dirty--;
|
|
|
|
/* If this was the last dirty connection, turn off the cleanup waiting flag. */
|
|
if (lp->num_dirty == 0)
|
|
lp->cln_waiting = FALSE;
|
|
|
|
/* Reset the dirty descriptor and re-use it for this connection. */
|
|
Load_reset_dscr(lp, bp, ep, index, bin, ep->ref_count++);
|
|
|
|
} else {
|
|
|
|
ep->timeout = 0;
|
|
|
|
/* If we're re-using a connection descriptor already
|
|
in use, then we need to pull it off the recovery/
|
|
timeout queue and re-enqueue it on the recoevry
|
|
queue because it might have been previously added
|
|
to the timeout queue and we don't want it spon-
|
|
taneously expiring on us. */
|
|
Link_unlink(&(ep->rlink));
|
|
Queue_enq(&(lp->conn_rcvryq), &(ep->rlink));
|
|
|
|
ep->ref_count++;
|
|
|
|
}
|
|
}
|
|
|
|
return ep;
|
|
}
|
|
|
|
/*
|
|
* Function: Load_destroy_dscr
|
|
* Description: This function "destroys" an existing descriptor. If the operation is
|
|
* a RST, it is immediately destroyed; if it is a FIN, the reference count
|
|
* is decremented and depending on the new count, the descriptor is either
|
|
* moved to a timeout queue or left alone.
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* bp - a pointer to the port rule on which this connection is active
|
|
* ep - a pointer to the connection descriptor if one was already found
|
|
* conn_status - whether this is a RST or a FIN
|
|
* Returns: ULONG - the number of remaining references on the descriptor.
|
|
* Author: shouse, 1.7.02
|
|
* Notes:
|
|
*/
|
|
ULONG Load_destroy_dscr (
|
|
PLOAD_CTXT lp,
|
|
PBIN_STATE bp,
|
|
PCONN_ENTRY ep,
|
|
ULONG conn_status)
|
|
{
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
/* If no descriptor was provided, bail out. This should NOT be called
|
|
with a NULL descriptor, but we have to handle it anyway. */
|
|
if (ep == NULL) return 0;
|
|
|
|
UNIV_ASSERT(ep->ref_count >= 0);
|
|
|
|
/* This descriptor was already moved to the expired queue - must be
|
|
that we received a retransmitted FIN on this connection, or the
|
|
reference count of a virtual descriptor was skewed. */
|
|
if (!ep->ref_count) {
|
|
|
|
UNIV_ASSERT(ep->timeout != 0);
|
|
|
|
/* If this is a RST notification, then destroy the state now.
|
|
If its a FIN, just ignore it. Either way, return zero. */
|
|
if (conn_status == CVY_CONN_RESET) {
|
|
|
|
/* Clear the connection descriptor. */
|
|
CVY_CONN_CLEAR(ep);
|
|
|
|
/* Release the descriptor. */
|
|
Load_put_dscr(lp, bp, ep);
|
|
}
|
|
|
|
/* Return - the descriptor already has zero references (no update needed). */
|
|
return 0;
|
|
}
|
|
|
|
UNIV_ASSERT(ep->ref_count > 0);
|
|
|
|
/* Decrement the reference count by one. */
|
|
ep->ref_count--;
|
|
|
|
UNIV_ASSERT(ep->ref_count >= 0);
|
|
|
|
/* If there are still references on this descriptor,
|
|
then its not ready to be destroyed yet, so we'll
|
|
keep it around and exit here. */
|
|
if (ep->ref_count > 0) return (ep->ref_count);
|
|
|
|
/* If this is a RST, or if the descriptor is virtual or dirty, destroy the descriptor
|
|
now. There is no need to timeout virtual GRE or IPSec/UDP descriptors; they can be
|
|
immediate destroyed. Of course, if the descriptor has already been marked dirty,
|
|
then we can destroy it now that the reference count has reached zero. */
|
|
if ((conn_status == CVY_CONN_RESET) || (ep->flags & NLB_CONN_ENTRY_FLAGS_VIRTUAL) || (ep->flags & NLB_CONN_ENTRY_FLAGS_DIRTY)) {
|
|
|
|
/* Clear the connection descriptor. */
|
|
CVY_CONN_CLEAR(ep);
|
|
|
|
/* Release the descriptor. */
|
|
Load_put_dscr(lp, bp, ep);
|
|
|
|
/* However, conventional descriptors, such as TCP or IPSec, should be timed-out gracefully. */
|
|
} else {
|
|
|
|
/* Otherwise, we're destroying it. Take the descriptor
|
|
off of the recovery queue and move it to the appropriate
|
|
timeout queue, based on protocol. Each protocol has
|
|
its own queue to avoid the need for a sorted insert
|
|
function, which is expensive. */
|
|
Load_timeout_dscr(lp, bp, ep);
|
|
|
|
}
|
|
|
|
/* No references left on the descriptor; it was destroyed or timed-out. */
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Function: Load_packet_check
|
|
* Description: This function determines whether or not to take a data packet
|
|
* in the IP stream identified by the IP tuple in question.
|
|
* Protocols that are session-less depend only on the hashing
|
|
* result and the ownership map. Session-ful protocols may need
|
|
* to perform a descriptor look-up if ambiguity exists.
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* svr_ipaddr - the server IP address in network byte order
|
|
* svr_port - the server port in host byte order
|
|
* client_ipaddr - the client IP address in network byte order
|
|
* client_port - the client port in host byte order
|
|
* protocol - the protocol of this connection
|
|
* limit_map_fn - whether or not to include server-side parameters in hashing
|
|
* reverse_hash - whether or not to reverse client and server during hashing
|
|
* Returns: BOOLEAN - do we accept the packet? (TRUE = yes)
|
|
* Author: bbain, shouse, 10.4.01
|
|
* Notes:
|
|
*/
|
|
BOOLEAN Load_packet_check(
|
|
PLOAD_CTXT lp,
|
|
ULONG svr_ipaddr,
|
|
ULONG svr_port,
|
|
ULONG client_ipaddr,
|
|
ULONG client_port,
|
|
USHORT protocol,
|
|
BOOLEAN limit_map_fn,
|
|
BOOLEAN reverse_hash)
|
|
{
|
|
PBIN_STATE bp;
|
|
ULONG hash;
|
|
ULONG index;
|
|
ULONG bin;
|
|
IRQLEVEL irql;
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
BOOLEAN is_tcp_pkt = IS_TCP_PKT(protocol);
|
|
BOOLEAN is_session_pkt = IS_SESSION_PKT(protocol);
|
|
BOOLEAN acpt = FALSE;
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
TRACE_FILTER("%!FUNC! Enter: lp = %p, server IP = %u.%u.%u.%u, server port = %u, client IP = %u.%u.%u.%u, client port = %u, protocol = %u, limit map = %u, reverse hash = %u",
|
|
lp, IP_GET_OCTET(svr_ipaddr, 0), IP_GET_OCTET(svr_ipaddr, 1), IP_GET_OCTET(svr_ipaddr, 2), IP_GET_OCTET(svr_ipaddr, 3), svr_port,
|
|
IP_GET_OCTET(client_ipaddr, 0), IP_GET_OCTET(client_ipaddr, 1), IP_GET_OCTET(client_ipaddr, 2), IP_GET_OCTET(client_ipaddr, 3), client_port,
|
|
protocol, limit_map_fn, reverse_hash);
|
|
|
|
/* If the load module is inactive, drop the packet and return here. */
|
|
if (!lp->active) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - load module is inactive");
|
|
|
|
acpt = FALSE;
|
|
goto exit;
|
|
}
|
|
|
|
/* Increment count of pkts handled. */
|
|
lp->pkt_count++;
|
|
|
|
/* Find the port rule for this connection. */
|
|
bp = Load_pg_lookup(lp, svr_ipaddr, svr_port, is_tcp_pkt);
|
|
|
|
/* Make sure that Load_pg_lookup properly handled protocol specific rules. */
|
|
UNIV_ASSERT((is_tcp_pkt && bp->prot != CVY_UDP) || (!is_tcp_pkt && bp->prot != CVY_TCP));
|
|
|
|
/* Handle CVY_NEVER mode immediately. */
|
|
if (bp->mode == CVY_NEVER) {
|
|
/* Increment the dropped packet count. */
|
|
bp->packets_dropped++;
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - port rule %u is disabled\n", bp->index);
|
|
|
|
acpt = FALSE;
|
|
goto exit;
|
|
}
|
|
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, svr_port, client_ipaddr, client_port);
|
|
|
|
index = hash % CVY_MAX_CHASH;
|
|
|
|
/* Compute the hash. */
|
|
hash = Load_complex_hash(svr_ipaddr, svr_port, client_ipaddr, client_port, bp->affinity, reverse_hash, limit_map_fn);
|
|
|
|
bin = hash % CVY_MAXBINS;
|
|
|
|
LOCK_ENTER(&(lp->lock), &irql);
|
|
|
|
/* Check bin for residency and all other hosts now idle on their bins; in this case
|
|
and if we do not have dirty connections, we must be able to handle the packet. */
|
|
|
|
if (((bp->cmap & (((MAP_T) 1) << bin)) != 0) && (!is_session_pkt || (((bp->all_idle_map & (((MAP_T) 1) << bin)) != 0) && (!(lp->cln_waiting))))) {
|
|
/* Note that we may have missed a connection, but it could also be a stale
|
|
packet so we can't start tracking the connection now. */
|
|
|
|
TRACE_FILTER("%!FUNC! Accept packet - packet owned unconditionally: Port rule = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
/* Increment the accepted packet count. */
|
|
bp->packets_accepted++;
|
|
|
|
acpt = TRUE;
|
|
goto unlock;
|
|
|
|
/* Important note: Virtual descriptors that are not session-based and return
|
|
FALSE for IS_SESSION_PKT() use this case to check for a connection descriptor
|
|
match. (Example: UDP subsequent fragments within IPSec tunnels of protocol
|
|
type TCPIP_PROTOCOL_IPSEC_UDP) Do not disable this code for non-session
|
|
protocols. */
|
|
|
|
/* Otherwise, if we have an active connection for this bin or if we have dirty
|
|
connections for this bin and the bin is resident, check for a match. */
|
|
|
|
} else if (bp->nconn[bin] > 0 || (lp->cln_waiting && lp->dirty_bin[bin] && ((bp->cmap & (((MAP_T) 1) << bin)) != 0))) {
|
|
PCONN_ENTRY ep;
|
|
|
|
/* Look for an existing matching connection descriptor. */
|
|
ep = Load_find_dscr(lp, index, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol);
|
|
|
|
/* If we can't find one, we don't own the connection. */
|
|
if (ep == NULL) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - packet not owned by this host: Port rule = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
/* Increment the dropped packet count. */
|
|
bp->packets_dropped++;
|
|
|
|
acpt = FALSE;
|
|
goto unlock;
|
|
}
|
|
|
|
UNIV_ASSERT(ep->code == CVY_ENTRCODE);
|
|
|
|
/* If connection was dirty, just block the packet since TCP/IP may have stale
|
|
connection state for a previous connection from another host. */
|
|
if (ep->flags & NLB_CONN_ENTRY_FLAGS_DIRTY) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - block dirty connections (%p): Port rule = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
ep, bp->index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
/* Increment the dropped packet count. */
|
|
bp->packets_dropped++;
|
|
|
|
acpt = FALSE;
|
|
goto unlock;
|
|
}
|
|
|
|
TRACE_FILTER("%!FUNC! Accept packet - matching descriptor found (%p): Port rule = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
ep, bp->index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
/* Increment the accepted packet count. */
|
|
bp->packets_accepted++;
|
|
|
|
acpt = TRUE;
|
|
goto unlock;
|
|
}
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - packet not owned by this host: Port rule = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
/* Increment the dropped packet count. */
|
|
bp->packets_dropped++;
|
|
|
|
acpt = FALSE;
|
|
|
|
unlock:
|
|
|
|
LOCK_EXIT(&(lp->lock), irql);
|
|
|
|
exit:
|
|
|
|
TRACE_FILTER("%!FUNC! Exit: acpt = %u", acpt);
|
|
|
|
return acpt;
|
|
}
|
|
|
|
/*
|
|
* Function: Load_conn_advise
|
|
* Description: This function determines whether or not to accept this packet,
|
|
* which represents the beginning or end of a session-ful connection.
|
|
#if !defined (NLB_TCP_NOTIFICATION)
|
|
* If the connection is going up, and is successful, this function
|
|
* creates state to track the connection. If the connection is
|
|
* going down, this function removes the state for tracking the
|
|
* connection.
|
|
#endif
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* svr_ipaddr - the server IP address in network byte order
|
|
* svr_port - the server port in host byte order
|
|
* client_ipaddr - the client IP address in network byte order
|
|
* client_port - the client port in host byte order
|
|
* protocol - the protocol of this connection
|
|
* conn_status - whether the connection is going UP, DOWN, or being RESET
|
|
* limit_map_fn - whether or not to include server-side parameters in hashing
|
|
* reverse_hash - whether or not to reverse client and server during hashing
|
|
* Returns: BOOLEAN - do we accept the packet (TRUE = yes)
|
|
* Author: bbain, shouse, 10.4.01
|
|
* Notes:
|
|
*/
|
|
BOOLEAN Load_conn_advise(
|
|
PLOAD_CTXT lp,
|
|
ULONG svr_ipaddr,
|
|
ULONG svr_port,
|
|
ULONG client_ipaddr,
|
|
ULONG client_port,
|
|
USHORT protocol,
|
|
ULONG conn_status,
|
|
BOOLEAN limit_map_fn,
|
|
BOOLEAN reverse_hash)
|
|
{
|
|
ULONG hash;
|
|
ULONG vindex;
|
|
ULONG index;
|
|
ULONG bin;
|
|
PBIN_STATE bp;
|
|
PCONN_ENTRY ep;
|
|
IRQLEVEL irql;
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
BOOLEAN is_tcp_pkt = IS_TCP_PKT(protocol);
|
|
BOOLEAN acpt = TRUE;
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
TRACE_FILTER("%!FUNC! Enter: lp = %p, server IP = %u.%u.%u.%u, server port = %u, client IP = %u.%u.%u.%u, client port = %u, protocol = %u, status = %u, limit map = %u, reverse hash = %u",
|
|
lp, IP_GET_OCTET(svr_ipaddr, 0), IP_GET_OCTET(svr_ipaddr, 1), IP_GET_OCTET(svr_ipaddr, 2), IP_GET_OCTET(svr_ipaddr, 3), svr_port,
|
|
IP_GET_OCTET(client_ipaddr, 0), IP_GET_OCTET(client_ipaddr, 1), IP_GET_OCTET(client_ipaddr, 2), IP_GET_OCTET(client_ipaddr, 3), client_port,
|
|
protocol, conn_status, limit_map_fn, reverse_hash);
|
|
|
|
/* If the load module is inactive, drop the packet and return here. */
|
|
if (!lp->active) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - load module is inactive");
|
|
|
|
acpt = FALSE;
|
|
goto exit;
|
|
}
|
|
|
|
/* Increment count of pkts handled. */
|
|
lp->pkt_count++;
|
|
|
|
/* Find the port rule for this connection. */
|
|
bp = Load_pg_lookup(lp, svr_ipaddr, svr_port, is_tcp_pkt);
|
|
|
|
/* Handle CVY_NEVER immediately. */
|
|
if (bp->mode == CVY_NEVER) {
|
|
/* Increment the dropped packet count. */
|
|
bp->packets_dropped++;
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - port rule %u is disabled\n", bp->index);
|
|
|
|
acpt = FALSE;
|
|
goto exit;
|
|
}
|
|
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, svr_port, client_ipaddr, client_port);
|
|
|
|
index = hash % CVY_MAX_CHASH;
|
|
|
|
/* Compute the hash. */
|
|
hash = Load_complex_hash(svr_ipaddr, svr_port, client_ipaddr, client_port, bp->affinity, reverse_hash, limit_map_fn);
|
|
|
|
bin = hash % CVY_MAXBINS;
|
|
|
|
/* If this is a connection up notification, first clean out any old state that may exist for this
|
|
connection BEFORE we load-balance IFF we do NOT own the bucket to which the connection maps.
|
|
If we are not the bucket owner, the somebody else probably is; since we know that they will
|
|
accept the new connection, we need to flush out any state that we may have lying around.
|
|
This cleans out stale state that may have been left around by falling out of sync with TCP/IP.
|
|
Note that re-transmitted SYNs can wreak havoc here if the bucket map has shifted since the
|
|
first SYN, however, since the other host has no way of knowing that the second SYN is a
|
|
re-transmit, there's nothing we can do about it anyway. */
|
|
if ((conn_status == CVY_CONN_UP) && ((bp->cmap & (((MAP_T) 1) << bin)) == 0)) {
|
|
LOCK_ENTER(&(lp->lock), &irql);
|
|
|
|
/* If this is a SYN, flush out any old descriptor that may be lying around for this connection. */
|
|
Load_flush_dscr(lp, bp, index, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol);
|
|
|
|
LOCK_EXIT(&(lp->lock), &irql);
|
|
}
|
|
|
|
/* If this connection is not in our current map and it is not a connection
|
|
down notification for a non-idle bin, just filter it out. */
|
|
if ((bp->cmap & (((MAP_T) 1) << bin)) == 0 && (!((conn_status == CVY_CONN_DOWN || conn_status == CVY_CONN_RESET) && bp->nconn[bin] > 0))) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - packet not owned by this host: Port rule = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
/* Increment the dropped packet count. */
|
|
bp->packets_dropped++;
|
|
|
|
acpt = FALSE;
|
|
goto exit;
|
|
}
|
|
|
|
#if defined (NLB_TCP_NOTIFICATION)
|
|
/* DO NOT create a descriptor until TCP or IPSec tells us to via a connection notification. If TCP
|
|
notification is OFF, then only exit early if its an IPSec SYN. */
|
|
if ((conn_status == CVY_CONN_UP) && (NLB_NOTIFICATIONS_ON() || (protocol == TCPIP_PROTOCOL_IPSEC1))) {
|
|
|
|
TRACE_FILTER("%!FUNC! Accept packet - SYN owned by this host: Port rule = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
#else
|
|
/* DO NOT create a descriptor until IPSec tells us to via a connection notification IOCTL. */
|
|
if ((conn_status == CVY_CONN_UP) && (protocol == TCPIP_PROTOCOL_IPSEC1)) {
|
|
|
|
TRACE_FILTER("%!FUNC! Accept packet - IPSec SYN owned by this host: Port rule = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
#endif
|
|
|
|
/* Increment the accepted packet count. */
|
|
bp->packets_accepted++;
|
|
|
|
acpt = TRUE;
|
|
goto exit;
|
|
}
|
|
|
|
LOCK_ENTER(&(lp->lock), &irql);
|
|
|
|
/* Look for an existing matching connection descriptor. */
|
|
ep = Load_find_dscr(lp, index, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol);
|
|
|
|
/* If we see a new connection, handle it. */
|
|
|
|
if (conn_status == CVY_CONN_UP) {
|
|
|
|
/* Create a new connection descriptor to track this connection. */
|
|
ep = Load_create_dscr(lp, bp, ep, index, bin);
|
|
|
|
/* If, for some reason, we were unable to create state for this connection, bail out here. */
|
|
if (ep == NULL) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - no available descriptors: Port rule = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
/* Increment the dropped packet count. */
|
|
bp->packets_dropped++;
|
|
|
|
acpt = FALSE;
|
|
goto unlock;
|
|
}
|
|
|
|
/* Set the connection information in the descriptor. */
|
|
CVY_CONN_SET(ep, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol);
|
|
|
|
/* If this is a new PPTP tunnel, create or update a virtual descriptor to track the GRE data packets. */
|
|
if (protocol == TCPIP_PROTOCOL_PPTP) {
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
vindex = hash % CVY_MAX_CHASH;
|
|
|
|
/* Look for an existing matching virtual connection descriptor. */
|
|
ep = Load_find_dscr(lp, vindex, svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT, TCPIP_PROTOCOL_GRE);
|
|
|
|
/* Create or update a virtual descriptor for the GRE traffic. */
|
|
ep = Load_create_dscr(lp, bp, ep, vindex, bin);
|
|
|
|
/* If we can't allocate the virtual descriptor, bail out, but don't fail. */
|
|
if (ep == NULL) goto unlock;
|
|
|
|
/* Set the connection information in the descriptor. */
|
|
CVY_CONN_SET(ep, svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT, TCPIP_PROTOCOL_GRE);
|
|
|
|
/* Set the virtual descriptor flag. */
|
|
ep->flags |= NLB_CONN_ENTRY_FLAGS_VIRTUAL;
|
|
}
|
|
|
|
/* Otherwise, if a known connection is going down, remove our connection entry. */
|
|
|
|
} else if ((conn_status == CVY_CONN_DOWN || conn_status == CVY_CONN_RESET) && (ep != NULL)) {
|
|
|
|
/* If we found state for this connection, the bin is the bin from the descriptor,
|
|
not the calculated bin, which may not even been accurate if the port rules have
|
|
been modified since this connection was established. */
|
|
bin = ep->bin;
|
|
|
|
/* If connection was dirty, just block the packet since TCP/IP may have stale
|
|
connection state for a previous connection from another host. */
|
|
if (ep->flags & NLB_CONN_ENTRY_FLAGS_DIRTY) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - block dirty connections: Port rule = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
/* Increment the dropped packet count. */
|
|
bp->packets_dropped++;
|
|
|
|
goto unlock;
|
|
}
|
|
|
|
/* Update the descriptor by destroying it or moving it to the appropriate timeout queue if no references remain. */
|
|
(VOID)Load_destroy_dscr(lp, bp, ep, conn_status);
|
|
|
|
/* If this is a PPTP tunnel going down, update the virtual GRE descriptor. Virtual descriptors
|
|
are ALWAYS de-referenced, not destroyed, even if the notification is a RST because these
|
|
descriptors are potentially shared by multiple PPTP tunnels. */
|
|
if (protocol == TCPIP_PROTOCOL_PPTP) {
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
vindex = hash % CVY_MAX_CHASH;
|
|
|
|
/* Look for an existing matching virtual connection descriptor. */
|
|
ep = Load_find_dscr(lp, vindex, svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT, TCPIP_PROTOCOL_GRE);
|
|
|
|
/* Dereference the virtual GRE descriptor. */
|
|
(VOID)Load_destroy_dscr(lp, bp, ep, conn_status);
|
|
}
|
|
|
|
/* Otherwise, we found no match for a FIN/RST packet - drop it. */
|
|
|
|
} else {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - no matching descriptor found: Port rule = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
/* Increment the dropped packet count. */
|
|
bp->packets_dropped++;
|
|
|
|
acpt = FALSE;
|
|
goto unlock;
|
|
}
|
|
|
|
TRACE_FILTER("%!FUNC! Accept packet - packet owned by this host: Port rule = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
/* Increment the accepted packet count. */
|
|
bp->packets_accepted++;
|
|
|
|
// Exit here under one of these conditions:
|
|
// (i) got a syn and added a descriptor
|
|
// (ii) got a fin or a reset and destroyed the descriptor
|
|
|
|
acpt = TRUE;
|
|
|
|
unlock:
|
|
|
|
LOCK_EXIT(&(lp->lock), irql);
|
|
|
|
exit:
|
|
|
|
TRACE_FILTER("%!FUNC! Exit: acpt = %u", acpt);
|
|
|
|
return acpt;
|
|
}
|
|
|
|
/*
|
|
* Function: Load_conn_notify
|
|
* Description: This function is nearly identical to Load_conn_advise, except
|
|
* for two important distinctions; (1) this function is a notification,
|
|
* not a request, so load-balancing decisions are not made here, and
|
|
* (2) packet handling statistics are not incremented here, as calls
|
|
* to this function rarely stem from processing a real packet. For
|
|
* example, when a TCP SYN packet is received, main.c calls Load_conn_advise
|
|
* essentially asking, "hey, should accept this new connection i just
|
|
* saw?" While, when IPSec notifies NLB that a new Main Mode SA has just
|
|
* been established, main.c calls Load_conn_notify essentially dictating,
|
|
* "hey a new connection just went up, so whether you like it or not,
|
|
* create state to track this connection."
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* svr_ipaddr - the server IP address in network byte order
|
|
* svr_port - the server port in host byte order
|
|
* client_ipaddr - the client IP address in network byte order
|
|
* client_port - the client port in host byte order
|
|
* protocol - the protocol of this connection
|
|
* conn_status - whether the connection is going UP, DOWN, or being RESET
|
|
* limit_map_fn - whether or not to include server-side parameters in hashing
|
|
* reverse_hash - whether or not to reverse client and server during hashing
|
|
* Returns: BOOLEAN - was i able to successfully update my state (TRUE = yes)
|
|
* Author: shouse, 10.4.01
|
|
* Notes:
|
|
*/
|
|
BOOLEAN Load_conn_notify (
|
|
PLOAD_CTXT lp,
|
|
ULONG svr_ipaddr,
|
|
ULONG svr_port,
|
|
ULONG client_ipaddr,
|
|
ULONG client_port,
|
|
USHORT protocol,
|
|
ULONG conn_status,
|
|
BOOLEAN limit_map_fn,
|
|
BOOLEAN reverse_hash)
|
|
{
|
|
ULONG hash;
|
|
ULONG vindex;
|
|
ULONG index;
|
|
ULONG bin;
|
|
PBIN_STATE bp;
|
|
PCONN_ENTRY ep;
|
|
IRQLEVEL irql;
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
BOOLEAN is_tcp_pkt = IS_TCP_PKT(protocol);
|
|
BOOLEAN acpt = TRUE;
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
TRACE_FILTER("%!FUNC! Enter: lp = %p, server IP = %u.%u.%u.%u, server port = %u, client IP = %u.%u.%u.%u, client port = %u, protocol = %u, status = %u, limit map = %u, reverse hash = %u",
|
|
lp, IP_GET_OCTET(svr_ipaddr, 0), IP_GET_OCTET(svr_ipaddr, 1), IP_GET_OCTET(svr_ipaddr, 2), IP_GET_OCTET(svr_ipaddr, 3), svr_port,
|
|
IP_GET_OCTET(client_ipaddr, 0), IP_GET_OCTET(client_ipaddr, 1), IP_GET_OCTET(client_ipaddr, 2), IP_GET_OCTET(client_ipaddr, 3), client_port,
|
|
protocol, conn_status, limit_map_fn, reverse_hash);
|
|
|
|
/* If the load module is inactive and this is a CONN_UP, drop the packet and return here.
|
|
If this is a notification for a CONN_DOWN or CONN_RESET, process it. */
|
|
if ((!lp->active) && (conn_status == CVY_CONN_UP)) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - load module is inactive");
|
|
|
|
acpt = FALSE;
|
|
goto exit;
|
|
}
|
|
|
|
/* Find the port rule for this connection. */
|
|
bp = Load_pg_lookup(lp, svr_ipaddr, svr_port, is_tcp_pkt);
|
|
|
|
/* Handle CVY_NEVER immediately. */
|
|
if (bp->mode == CVY_NEVER) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - port rule %u is disabled\n", bp->index);
|
|
|
|
acpt = FALSE;
|
|
goto exit;
|
|
}
|
|
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, svr_port, client_ipaddr, client_port);
|
|
|
|
index = hash % CVY_MAX_CHASH;
|
|
|
|
/* Compute the hash. */
|
|
hash = Load_complex_hash(svr_ipaddr, svr_port, client_ipaddr, client_port, bp->affinity, reverse_hash, limit_map_fn);
|
|
|
|
bin = hash % CVY_MAXBINS;
|
|
|
|
LOCK_ENTER(&(lp->lock), &irql);
|
|
|
|
/* Look for an existing matching connection descriptor. */
|
|
ep = Load_find_dscr(lp, index, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol);
|
|
|
|
/* If we see a new connection, handle it. */
|
|
|
|
if (conn_status == CVY_CONN_UP) {
|
|
|
|
/* Create a new connection descriptor to track this connection. */
|
|
ep = Load_create_dscr(lp, bp, ep, index, bin);
|
|
|
|
/* If, for some reason, we were unable to create state for this connection, bail out here. */
|
|
if (ep == NULL) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - no available descriptors: Port rule = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
acpt = FALSE;
|
|
goto unlock;
|
|
}
|
|
|
|
/* Set the connection information in the descriptor. */
|
|
CVY_CONN_SET(ep, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol);
|
|
|
|
/* If this is a new PPTP tunnel, create or update a virtual descriptor to track the GRE data packets. */
|
|
if (protocol == TCPIP_PROTOCOL_PPTP) {
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
vindex = hash % CVY_MAX_CHASH;
|
|
|
|
/* Look for an existing matching virtual connection descriptor. */
|
|
ep = Load_find_dscr(lp, vindex, svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT, TCPIP_PROTOCOL_GRE);
|
|
|
|
/* Create or update a virtual descriptor for the GRE traffic. */
|
|
ep = Load_create_dscr(lp, bp, ep, vindex, bin);
|
|
|
|
/* If we can't allocate the virtual descriptor, bail out, but don't fail. */
|
|
if (ep == NULL) goto unlock;
|
|
|
|
/* Set the connection information in the descriptor. */
|
|
CVY_CONN_SET(ep, svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT, TCPIP_PROTOCOL_GRE);
|
|
|
|
/* Set the virtual descriptor flag. */
|
|
ep->flags |= NLB_CONN_ENTRY_FLAGS_VIRTUAL;
|
|
}
|
|
/* If this is a new IPSEC tunnel, create or update a virtual descriptor to track the UDP subsequent data fragments. */
|
|
else if (protocol == TCPIP_PROTOCOL_IPSEC1) {
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, IPSEC_CTRL_PORT, client_ipaddr, IPSEC_CTRL_PORT);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
vindex = hash % CVY_MAX_CHASH;
|
|
|
|
/* Look for an existing matching virtual connection descriptor. */
|
|
ep = Load_find_dscr(lp, vindex, svr_ipaddr, IPSEC_CTRL_PORT, client_ipaddr, IPSEC_CTRL_PORT, TCPIP_PROTOCOL_IPSEC_UDP);
|
|
|
|
/* Create or update a virtual descriptor for the UDP subsequent fragment traffic. */
|
|
ep = Load_create_dscr(lp, bp, ep, vindex, bin);
|
|
|
|
/* If we can't allocate the virtual descriptor, bail out, but don't fail. */
|
|
if (ep == NULL) goto unlock;
|
|
|
|
/* Set the connection information in the descriptor. */
|
|
CVY_CONN_SET(ep, svr_ipaddr, IPSEC_CTRL_PORT, client_ipaddr, IPSEC_CTRL_PORT, TCPIP_PROTOCOL_IPSEC_UDP);
|
|
|
|
/* Set the virtual descriptor flag. */
|
|
ep->flags |= NLB_CONN_ENTRY_FLAGS_VIRTUAL;
|
|
}
|
|
|
|
/* Otherwise, if a known connection is going down, remove our connection entry. */
|
|
|
|
} else if ((conn_status == CVY_CONN_DOWN || conn_status == CVY_CONN_RESET) && (ep != NULL)) {
|
|
|
|
/* If we found state for this connection, the bin is the bin from the descriptor,
|
|
not the calculated bin, which may not even been accurate if the port rules have
|
|
been modified since this connection was established. */
|
|
bin = ep->bin;
|
|
|
|
/* Update the descriptor by destroying it or moving it to the appropriate timeout queue if no references remain. */
|
|
(VOID)Load_destroy_dscr(lp, bp, ep, conn_status);
|
|
|
|
/* If this is a PPTP tunnel going down, update the virtual GRE descriptor. Virtual descriptors
|
|
are ALWAYS de-referenced, not destroyed, even if the notification is a RST because these
|
|
descriptors are potentially shared by multiple PPTP tunnels. */
|
|
if (protocol == TCPIP_PROTOCOL_PPTP) {
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
vindex = hash % CVY_MAX_CHASH;
|
|
|
|
/* Look for an existing matching connection descriptor. */
|
|
ep = Load_find_dscr(lp, vindex, svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT, TCPIP_PROTOCOL_GRE);
|
|
|
|
/* Dereference the virtual GRE descriptor. */
|
|
(VOID)Load_destroy_dscr(lp, bp, ep, conn_status);
|
|
}
|
|
|
|
/* If this is an IPSEC tunnel going down, update the virtual ISPEC_UDP descriptor. Virtual descriptors
|
|
are ALWAYS de-referenced, not destroyed, even if the notification is a RST because these
|
|
descriptors are potentially shared by multiple IPSEC tunnels. */
|
|
|
|
else if (protocol == TCPIP_PROTOCOL_IPSEC1) {
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, IPSEC_CTRL_PORT, client_ipaddr, IPSEC_CTRL_PORT);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
vindex = hash % CVY_MAX_CHASH;
|
|
|
|
/* Look for an existing matching virtual connection descriptor. */
|
|
ep = Load_find_dscr(lp, vindex, svr_ipaddr, IPSEC_CTRL_PORT, client_ipaddr, IPSEC_CTRL_PORT, TCPIP_PROTOCOL_IPSEC_UDP);
|
|
|
|
/* Dereference the virtual IPSec/UDP descriptor. */
|
|
(VOID)Load_destroy_dscr(lp, bp, ep, conn_status);
|
|
}
|
|
|
|
/* Otherwise, we found no match for a FIN/RST packet - drop it. */
|
|
|
|
} else {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - no matching descriptor for RST/FIN: Port rule = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
acpt = FALSE;
|
|
goto unlock;
|
|
}
|
|
|
|
TRACE_FILTER("%!FUNC! Accept packet - packet owned by this host: Port rule = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
// Exit here under one of these conditions:
|
|
// (i) got a syn and added a descriptor
|
|
// (ii) got a fin or a reset and destroyed the descriptor
|
|
|
|
acpt = TRUE;
|
|
|
|
unlock:
|
|
|
|
LOCK_EXIT(&(lp->lock), irql);
|
|
|
|
exit:
|
|
|
|
TRACE_FILTER("%!FUNC! Exit: acpt = %u", acpt);
|
|
|
|
return acpt;
|
|
}
|
|
|
|
/*
|
|
* Function: Load_conn_get
|
|
* Description: This function returns the connection parameters for the descriptor
|
|
* at the head of the recovery queue, if one exists. The recovery
|
|
* queue holds all "active" connections, some of which may be stale.
|
|
* If an active descriptor exists, it fills in the connection info
|
|
* and returns TRUE to indicate success; otherwise it returns FALSE
|
|
* to indicate that no connection was found.
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* OUT svr_ipaddr - the server IP address in network byte order
|
|
* OUT svr_port - the server port in host byte order
|
|
* OUT client_ipaddr - the client IP address in network byte order
|
|
* OUT client_port - the client port in host byte order
|
|
* OUT protocol - the protocol of this connection
|
|
* Returns: BOOLEAN -
|
|
* Author: shouse, 10.4.01
|
|
* Notes:
|
|
*/
|
|
BOOLEAN Load_conn_get (PLOAD_CTXT lp, PULONG svr_ipaddr, PULONG svr_port, PULONG client_ipaddr, PULONG client_port, PUSHORT protocol)
|
|
{
|
|
LINK * rlp;
|
|
PCONN_ENTRY ep;
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
/* Get the descriptor off of the front of the recovery queue - DO NOT dequeue
|
|
it, just get a pointer to the descriptor and LEAVE IT ON THE QUEUE. */
|
|
rlp = (LINK *)Queue_front(&(lp->conn_rcvryq));
|
|
|
|
/* If there are no descriptors, return failure. */
|
|
if (rlp == NULL)
|
|
return FALSE;
|
|
|
|
/* Get a pointer to the connection entry. */
|
|
ep = STRUCT_PTR(rlp, CONN_ENTRY, rlink);
|
|
|
|
UNIV_ASSERT(ep->code == CVY_ENTRCODE);
|
|
|
|
/* Grab the IP tuple information out the descriptor and return it to the caller. */
|
|
*svr_ipaddr = ep->svr_ipaddr;
|
|
*svr_port = ep->svr_port;
|
|
*client_ipaddr = ep->client_ipaddr;
|
|
*client_port = ep->client_port;
|
|
*protocol = ep->protocol;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
/*
|
|
* Function: Load_conn_sanction
|
|
* Description: This function is called to "sanction" an active connection descriptor.
|
|
* Sanction means that NLB has verified that this connection is indeed
|
|
* still active by querying other system entities (such as TCP/IP). To
|
|
* sanction a descriptor simply involves moving it from its place in the
|
|
* recovery queue (should be the head in most cases) to the tail of the
|
|
* recovery queue, where it has the least chance of being cannibalized.
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* svr_ipaddr - the server IP address in network byte order
|
|
* svr_port - the server port in host byte order
|
|
* client_ipaddr - the client IP address in network byte order
|
|
* client_port - the client port in host byte order
|
|
* protocol - the protocol of this connection
|
|
* Returns: BOOLEAN - was i successful in approbating the descriptor? (TRUE = yes)
|
|
* Author: shouse, 10.4.01
|
|
* Notes:
|
|
*/
|
|
BOOLEAN Load_conn_sanction (
|
|
PLOAD_CTXT lp,
|
|
ULONG svr_ipaddr,
|
|
ULONG svr_port,
|
|
ULONG client_ipaddr,
|
|
ULONG client_port,
|
|
USHORT protocol)
|
|
{
|
|
ULONG hash;
|
|
ULONG index;
|
|
PCONN_ENTRY ep;
|
|
IRQLEVEL irql;
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD(lp, MAIN_CTXT, load);
|
|
BOOLEAN acpt = FALSE;
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
TRACE_FILTER("%!FUNC! Enter: lp = %p, server IP = %u.%u.%u.%u, server port = %u, client IP = %u.%u.%u.%u, client port = %u, protocol = %u",
|
|
lp, IP_GET_OCTET(svr_ipaddr, 0), IP_GET_OCTET(svr_ipaddr, 1), IP_GET_OCTET(svr_ipaddr, 2), IP_GET_OCTET(svr_ipaddr, 3), svr_port,
|
|
IP_GET_OCTET(client_ipaddr, 0), IP_GET_OCTET(client_ipaddr, 1), IP_GET_OCTET(client_ipaddr, 2), IP_GET_OCTET(client_ipaddr, 3), client_port, protocol);
|
|
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, svr_port, client_ipaddr, client_port);
|
|
|
|
index = hash % CVY_MAX_CHASH;
|
|
|
|
LOCK_ENTER(&(lp->lock), &irql);
|
|
|
|
/* Try to find a matching descriptor for this connection. */
|
|
ep = Load_find_dscr(lp, index, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol);
|
|
|
|
/* If there is no matching descriptor, then it must have been destroyed - return failure. */
|
|
if (ep == NULL) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - no matching descriptor found");
|
|
|
|
acpt = FALSE;
|
|
goto unlock;
|
|
}
|
|
|
|
/* If this descriptor is being timed-out, do nothing - the connection has terminated
|
|
gracefully and the descriptor will be destroyed when it expires. */
|
|
if (ep->timeout) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - matching descriptor found, already expired");
|
|
|
|
acpt = FALSE;
|
|
goto unlock;
|
|
}
|
|
|
|
/* To approbate the descriptor, we remove it from its place in the recovery queue
|
|
and move it to the tail; active descriptors are moved to the end of the queue
|
|
to attempt to prevent them from being recycled when we run out of free descriptors. */
|
|
Link_unlink(&(ep->rlink));
|
|
Queue_enq(&(lp->conn_rcvryq), &(ep->rlink));
|
|
|
|
TRACE_FILTER("%!FUNC! Accept packet - descriptor approbated");
|
|
|
|
acpt = TRUE;
|
|
|
|
unlock:
|
|
|
|
LOCK_EXIT(&(lp->lock), &irql);
|
|
|
|
return acpt;
|
|
}
|
|
|
|
ULONG Load_port_change(
|
|
PLOAD_CTXT lp,
|
|
ULONG ipaddr,
|
|
ULONG port,
|
|
ULONG cmd,
|
|
ULONG value)
|
|
{
|
|
PCVY_RULE rp; /* Pointer to configured port rules. */
|
|
PBIN_STATE bp; /* Pointer to load module port rule state. */
|
|
ULONG nrules; /* Number of rules. */
|
|
ULONG i;
|
|
ULONG ret = IOCTL_CVY_NOT_FOUND;
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD(lp, MAIN_CTXT, load);
|
|
BOOLEAN bPortControlCmd;
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
if (! lp->active)
|
|
{
|
|
return IOCTL_CVY_NOT_FOUND;
|
|
}
|
|
|
|
bPortControlCmd = TRUE;
|
|
|
|
rp = (* (lp->params)).port_rules;
|
|
|
|
/* If we are draining whole cluster, include DEFAULT rule; Otherwise, just
|
|
include the user-defined rules (the DEFAULT rule is the last rule). */
|
|
if (cmd == IOCTL_CVY_CLUSTER_DRAIN || cmd == IOCTL_CVY_CLUSTER_PLUG)
|
|
nrules = (* (lp->params)).num_rules + 1;
|
|
else
|
|
nrules = (* (lp->params)).num_rules;
|
|
|
|
for (i=0; i<nrules; i++, rp++)
|
|
{
|
|
/* If the virtual IP address is IOCTL_ALL_VIPS (0x00000000), then we are applying this
|
|
change to all port rules for port X, regardless of VIP. If the virtual IP address is
|
|
to be applied to a particular VIP, then we apply only to port rules whose VIP matches.
|
|
Similarly, if the change is to apply to an "ALL VIP" rule, then we also apply when the
|
|
VIP matches because the caller uses CVY_ALL_VIP_NUMERIC_VALUE (0xffffffff) as the
|
|
virtual IP address, which is the same value stored in the port rule state. */
|
|
if ((ipaddr == IOCTL_ALL_VIPS || ipaddr == rp->virtual_ip_addr) &&
|
|
(port == IOCTL_ALL_PORTS || (port >= rp->start_port && port <= rp->end_port)))
|
|
{
|
|
bp = &(lp->pg_state[i]);
|
|
|
|
UNIV_ASSERT(bp->code == CVY_BINCODE); /* (bbain 8/19/99) */
|
|
|
|
/* If enabling a port rule, set the load amount to original value;
|
|
If disabling a port rule, set the load amount to zero;
|
|
Otherwise, set the load amount it to the specified amount. */
|
|
if (cmd == IOCTL_CVY_PORT_ON || cmd == IOCTL_CVY_CLUSTER_PLUG)
|
|
{
|
|
if (cmd == IOCTL_CVY_CLUSTER_PLUG)
|
|
{
|
|
bPortControlCmd = FALSE;
|
|
}
|
|
|
|
if (bp->load_amt[lp->my_host_id] == bp->orig_load_amt)
|
|
{
|
|
/* If we are the first port rule to match, then set the
|
|
return value to "Already"; Otherwise, we don't want to
|
|
overwrite some other port rule's return value of "OK"
|
|
in the case of ALL_VIPS or ALL_PORTS. */
|
|
if (ret == IOCTL_CVY_NOT_FOUND) ret = IOCTL_CVY_ALREADY;
|
|
|
|
continue;
|
|
}
|
|
|
|
/* Restore the original load amount. */
|
|
bp->load_amt[lp->my_host_id] = bp->orig_load_amt;
|
|
ret = IOCTL_CVY_OK;
|
|
}
|
|
else if (cmd == IOCTL_CVY_PORT_OFF)
|
|
{
|
|
|
|
if (bp->load_amt[lp->my_host_id] == 0)
|
|
{
|
|
/* If we are the first port rule to match, then set the
|
|
return value to "Already"; Otherwise, we don't want to
|
|
overwrite some other port rule's return value of "OK"
|
|
in the case of ALL_VIPS or ALL_PORTS. */
|
|
if (ret == IOCTL_CVY_NOT_FOUND) ret = IOCTL_CVY_ALREADY;
|
|
|
|
continue;
|
|
}
|
|
|
|
bp->load_amt[lp->my_host_id] = 0;
|
|
|
|
/* Immediately stop handling all traffic on the port group. */
|
|
bp->cmap = 0;
|
|
bp->cur_map[lp->my_host_id] = 0;
|
|
|
|
/* Re-initialize the performance counters. */
|
|
bp->packets_accepted = 0;
|
|
bp->packets_dropped = 0;
|
|
bp->bytes_accepted = 0;
|
|
bp->bytes_dropped = 0;
|
|
|
|
Load_conn_kill(lp, bp);
|
|
|
|
ret = IOCTL_CVY_OK;
|
|
}
|
|
else if (cmd == IOCTL_CVY_PORT_DRAIN || cmd == IOCTL_CVY_CLUSTER_DRAIN)
|
|
{
|
|
if (cmd == IOCTL_CVY_CLUSTER_DRAIN)
|
|
{
|
|
bPortControlCmd = FALSE;
|
|
}
|
|
|
|
if (bp->load_amt[lp->my_host_id] == 0)
|
|
{
|
|
/* If we are the first port rule to match, then set the
|
|
return value to "Already"; Otherwise, we don't want to
|
|
overwrite some other port rule's return value of "OK"
|
|
in the case of ALL_VIPS or ALL_PORTS. */
|
|
if (ret == IOCTL_CVY_NOT_FOUND) ret = IOCTL_CVY_ALREADY;
|
|
|
|
continue;
|
|
}
|
|
|
|
/* Set load weight to zero, but continue to handle existing connections. */
|
|
bp->load_amt[lp->my_host_id] = 0;
|
|
ret = IOCTL_CVY_OK;
|
|
}
|
|
else
|
|
{
|
|
UNIV_ASSERT(cmd == IOCTL_CVY_PORT_SET);
|
|
|
|
if (bp->load_amt[lp->my_host_id] == value)
|
|
{
|
|
/* If we are the first port rule to match, then set the
|
|
return value to "Already"; Otherwise, we don't want to
|
|
overwrite some other port rule's return value of "OK"
|
|
in the case of ALL_VIPS or ALL_PORTS. */
|
|
if (ret == IOCTL_CVY_NOT_FOUND) ret = IOCTL_CVY_ALREADY;
|
|
|
|
continue;
|
|
}
|
|
|
|
/* Set the load weight for this port rule. */
|
|
bp->orig_load_amt = value;
|
|
bp->load_amt[lp->my_host_id] = value;
|
|
ret = IOCTL_CVY_OK;
|
|
}
|
|
|
|
if (port != IOCTL_ALL_PORTS && ipaddr != IOCTL_ALL_VIPS) break;
|
|
}
|
|
}
|
|
|
|
/* If the cluster isn't already converging, then initiate convergence if the load weight of a port rule has been modified. */
|
|
if (ret == IOCTL_CVY_OK) {
|
|
|
|
if (bPortControlCmd)
|
|
{
|
|
// If enabled, fire wmi event indicating enable/disable/drain of ports on this node
|
|
if (NlbWmiEvents[PortRuleControlEvent].Enable)
|
|
{
|
|
WCHAR wsVip[CVY_MAX_VIRTUAL_IP_ADDR + 1];
|
|
|
|
Univ_ip_addr_ulong_to_str (ipaddr, wsVip);
|
|
|
|
// Form the VIP & Port number in case of All VIPs & All Ports
|
|
switch(cmd)
|
|
{
|
|
case IOCTL_CVY_PORT_ON:
|
|
NlbWmi_Fire_PortControlEvent(ctxtp, NLB_EVENT_PORT_ENABLED, wsVip, port);
|
|
break;
|
|
|
|
case IOCTL_CVY_PORT_OFF:
|
|
NlbWmi_Fire_PortControlEvent(ctxtp, NLB_EVENT_PORT_DISABLED, wsVip, port);
|
|
break;
|
|
|
|
case IOCTL_CVY_PORT_DRAIN:
|
|
NlbWmi_Fire_PortControlEvent(ctxtp, NLB_EVENT_PORT_DRAINING, wsVip, port);
|
|
break;
|
|
|
|
// For Port Set, do NOT fire event from here. This is 'cos it is only called in the
|
|
// reload case and the event is fired from the caller i.e. Main_apply_without_restart().
|
|
// The event is fired from the caller 'cos this function could be called more than
|
|
// one time (if there are multiple port rules) and we want to fire the event only once
|
|
case IOCTL_CVY_PORT_SET:
|
|
break;
|
|
|
|
default:
|
|
TRACE_CRIT("%!FUNC! Unexpected command code : 0x%x, NOT firing PortControl event", cmd);
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
TRACE_VERB("%!FUNC! NOT generating event 'cos PortControlEvent event generation is disabled");
|
|
}
|
|
}
|
|
else // Node Control event
|
|
{
|
|
// If enabled, fire wmi event indicating starting/draining of nlb on this node
|
|
if (NlbWmiEvents[NodeControlEvent].Enable)
|
|
{
|
|
switch(cmd)
|
|
{
|
|
case IOCTL_CVY_CLUSTER_PLUG:
|
|
NlbWmi_Fire_NodeControlEvent(ctxtp, NLB_EVENT_NODE_STARTED);
|
|
break;
|
|
|
|
case IOCTL_CVY_CLUSTER_DRAIN:
|
|
NlbWmi_Fire_NodeControlEvent(ctxtp, NLB_EVENT_NODE_DRAINING);
|
|
break;
|
|
|
|
default:
|
|
TRACE_CRIT("%!FUNC! Unexpected command code : 0x%x, NOT firing NodeControl event", cmd);
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
TRACE_VERB("%!FUNC! NOT generating event 'cos NodeControlEvent event generation is disabled");
|
|
}
|
|
}
|
|
|
|
if (lp->send_msg.state != HST_CVG) {
|
|
WCHAR me[20];
|
|
|
|
Univ_ulong_to_str (lp->my_host_id+1, me, 10);
|
|
|
|
/* Tracking convergence - Starting convergence because our port rule configuration has changed. */
|
|
LOG_MSGS(MSG_INFO_CONVERGING_NEW_RULES, me, me);
|
|
TRACE_CONVERGENCE("%!FUNC! Initiating convergence on host %d. Reason: Host %d has changed its port rule configuration.", lp->my_host_id+1, lp->my_host_id+1);
|
|
|
|
/* Tracking convergence. */
|
|
Load_convergence_start(lp);
|
|
|
|
// If enabled, fire wmi event indicating start of convergence
|
|
if (NlbWmiEvents[ConvergingEvent].Enable)
|
|
{
|
|
NlbWmi_Fire_ConvergingEvent(ctxtp,
|
|
NLB_EVENT_CONVERGING_MODIFIED_RULES,
|
|
ctxtp->params.ded_ip_addr,
|
|
ctxtp->params.host_priority);
|
|
}
|
|
else
|
|
{
|
|
TRACE_VERB("%!FUNC! NOT Generating NLB_EVENT_CONVERGING_MODIFIED_RULES 'cos ConvergingEvent generation disabled");
|
|
}
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
|
|
} /* end Load_port_change */
|
|
|
|
|
|
ULONG Load_hosts_query(
|
|
PLOAD_CTXT lp,
|
|
BOOLEAN internal,
|
|
PULONG host_map)
|
|
{
|
|
WCHAR members[256] = L"";
|
|
WCHAR num[20] = L"";
|
|
WCHAR me[20] = L"";
|
|
PWCHAR ptr = members;
|
|
ULONG index = 0;
|
|
ULONG count = 0;
|
|
PMAIN_CTXT ctxtp = CONTAINING_RECORD (lp, MAIN_CTXT, load);
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
for (index = 0; index < CVY_MAX_HOSTS; index++) {
|
|
if (lp->host_map & (1 << index)) {
|
|
ptr = Univ_ulong_to_str(index + 1, ptr, 10);
|
|
*ptr = L',';
|
|
ptr++;
|
|
count++;
|
|
}
|
|
}
|
|
|
|
if (count) ptr--;
|
|
|
|
*ptr = 0;
|
|
|
|
*host_map = lp->host_map;
|
|
|
|
Univ_ulong_to_str((*(lp->params)).host_priority, me, 10);
|
|
Univ_ulong_to_str(count, num, 10);
|
|
|
|
if (lp->send_msg.state != HST_NORMAL)
|
|
{
|
|
UNIV_PRINT_VERB(("Load_hosts_query: Current host map is %08x and converging", lp->host_map));
|
|
TRACE_VERB("%!FUNC! Current host map is 0x%08x and converging", lp->host_map);
|
|
|
|
if (internal)
|
|
{
|
|
/* If there are 9 or less members in the cluster, we can be sure that there
|
|
is enough room in an event log to list the members out. If not, it might
|
|
get truncated, so we might as well log a different event instead and tell
|
|
the user to perform a "wlbs query" to see the list. */
|
|
if (count < 10) {
|
|
LOG_MSGS(MSG_INFO_CONVERGING_LIST, me, members);
|
|
} else {
|
|
LOG_MSGS1(MSG_INFO_CONVERGING_MAP, me, num, *host_map);
|
|
}
|
|
}
|
|
|
|
return IOCTL_CVY_CONVERGING;
|
|
}
|
|
else if (lp->pg_state[(*(lp->params)).num_rules].cmap != 0)
|
|
{
|
|
UNIV_PRINT_VERB(("Load_hosts_query: Current host map is %08x and converged as DEFAULT", lp->host_map));
|
|
TRACE_VERB("%!FUNC! Current host map is 0x%08x and converged as DEFAULT", lp->host_map);
|
|
|
|
if (internal)
|
|
{
|
|
/* If there are 9 or less members in the cluster, we can be sure that there
|
|
is enough room in an event log to list the members out. If not, it might
|
|
get truncated, so we might as well log a different event instead and tell
|
|
the user to perform a "wlbs query" to see the list. */
|
|
if (count < 10) {
|
|
LOG_MSGS(MSG_INFO_MASTER_LIST, me, members);
|
|
} else {
|
|
LOG_MSGS1(MSG_INFO_MASTER_MAP, me, num, *host_map);
|
|
}
|
|
|
|
// If enabled, fire wmi event indicating cluster is converged
|
|
if (NlbWmiEvents[ConvergedEvent].Enable)
|
|
{
|
|
NlbWmi_Fire_ConvergedEvent(ctxtp, *host_map);
|
|
}
|
|
else
|
|
{
|
|
TRACE_VERB("%!FUNC! ConvergedEvent generation disabled");
|
|
}
|
|
}
|
|
|
|
return IOCTL_CVY_MASTER;
|
|
}
|
|
else
|
|
{
|
|
UNIV_PRINT_VERB(("Load_hosts_query: Current host map is %08x and converged (NON-DEFAULT)", lp->host_map));
|
|
TRACE_VERB("%!FUNC! Current host map is 0x%08x and converged (NON-DEFAULT)", lp->host_map);
|
|
|
|
if (internal)
|
|
{
|
|
/* If there are 9 or less members in the cluster, we can be sure that there
|
|
is enough room in an event log to list the members out. If not, it might
|
|
get truncated, so we might as well log a different event instead and tell
|
|
the user to perform a "wlbs query" to see the list. */
|
|
if (count < 10) {
|
|
LOG_MSGS(MSG_INFO_SLAVE_LIST, me, members);
|
|
} else {
|
|
LOG_MSGS1(MSG_INFO_SLAVE_MAP, me, num, *host_map);
|
|
}
|
|
|
|
// If enabled, fire wmi event indicating cluster is converged
|
|
if (NlbWmiEvents[ConvergedEvent].Enable)
|
|
{
|
|
NlbWmi_Fire_ConvergedEvent(ctxtp, *host_map);
|
|
}
|
|
else
|
|
{
|
|
TRACE_VERB("%!FUNC! ConvergedEvent generation disabled");
|
|
}
|
|
}
|
|
return IOCTL_CVY_SLAVE;
|
|
}
|
|
} /* end Load_hosts_query */
|
|
|
|
/*
|
|
* Function: Load_query_packet_filter
|
|
* Description: This function takes a IP tuple and protocol and consults the load-
|
|
* balancing state to determine whether or not this packet would be
|
|
* accepted by the load module. In either case, the reason for the
|
|
* decision is also provided, plus, in most cases, some of the load
|
|
* module state is also returned to provide some context to justify
|
|
* the decision. This function is COMPLETELY unobtrusive and makes
|
|
* NO changes to the actual state of the load module.
|
|
* Parameters: lp - a pointer to the load module.
|
|
* pQuery - a pointer to a buffer into which the results are placed.
|
|
* svr_ipaddr - the server side IP address of this virtual packet.
|
|
* svr_port - the server side port of this virtual packet.
|
|
* client_ipaddr - the client side IP address of this virtual packet.
|
|
* client_ipaddr - the client side port of this virtual packet.
|
|
* protocol - the protocol of this virtual packet (UDP, TCP or IPSec1).
|
|
* limit_map_fin - a boolean indication of whether or not to use server
|
|
* side parameters in the Map function. This is controlled
|
|
* by BDA teaming.
|
|
* reverse_hash - whether or not to reverse client and server during hashing
|
|
* Returns: Nothing.
|
|
* Author: shouse, 5.18.01
|
|
* Notes: This function is only observatory and makes NO changes to the state of
|
|
* the load module.
|
|
*/
|
|
VOID Load_query_packet_filter (
|
|
PLOAD_CTXT lp,
|
|
PNLB_OPTIONS_PACKET_FILTER pQuery,
|
|
ULONG svr_ipaddr,
|
|
ULONG svr_port,
|
|
ULONG client_ipaddr,
|
|
ULONG client_port,
|
|
USHORT protocol,
|
|
UCHAR flags,
|
|
BOOLEAN limit_map_fn,
|
|
BOOLEAN reverse_hash)
|
|
{
|
|
PBIN_STATE bp;
|
|
ULONG hash;
|
|
ULONG index;
|
|
ULONG bin;
|
|
QUEUE * qp;
|
|
|
|
/* This variable is used for port rule lookup and since the port rules only cover
|
|
UDP and TCP, we categorize as TCP and non-TCP, meaning that any protocol that's
|
|
not TCP will be treated like UDP for the sake of port rule lookup. */
|
|
BOOLEAN is_tcp_pkt = IS_TCP_PKT(protocol);
|
|
|
|
/* Further, some protocols are treated with "session" semantics, while others are
|
|
not. For TCP, this "session" is currently a single TCP connection, which is
|
|
tracked from SYN to FIN using a connection descriptor. IPSec "sessions" are
|
|
also tracked using descriptors, so even though its treated like UDP for port
|
|
rule lookup, its treated with the session semantics resembling TCP. Therefore,
|
|
by default the determination of a session packet is initially the same as the
|
|
determination of a TCP packet. */
|
|
BOOLEAN is_session_pkt = IS_SESSION_PKT(protocol);
|
|
|
|
UNIV_ASSERT(lp);
|
|
UNIV_ASSERT(pQuery);
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
/* If the load module has been "turned off", then we drop the packet. */
|
|
if (!lp->active) {
|
|
pQuery->Accept = NLB_REJECT_LOAD_MODULE_INACTIVE;
|
|
return;
|
|
}
|
|
|
|
/* Find the port rule for this server IP address / port pair. */
|
|
bp = Load_pg_lookup(lp, svr_ipaddr, svr_port, is_tcp_pkt);
|
|
|
|
UNIV_ASSERT ((is_tcp_pkt && bp->prot != CVY_UDP) || (!is_tcp_pkt && bp->prot != CVY_TCP));
|
|
|
|
/* If the matching port rule is configured as "disabled", which means to drop any
|
|
packets that match the rule, then we drop the packet. */
|
|
if (bp->mode == CVY_NEVER) {
|
|
pQuery->Accept = NLB_REJECT_PORT_RULE_DISABLED;
|
|
return;
|
|
}
|
|
|
|
/* If the applicable port rule is configured in "No" affinity mode, make sure enough
|
|
information has been specified in the query to faithfully determine packet ownership. */
|
|
if (bp->affinity == CVY_AFFINITY_NONE) {
|
|
/* VPN protocols REQUIRE either "Single" or "Class C" affinity; reject the request. */
|
|
if ((protocol == TCPIP_PROTOCOL_GRE) || (protocol == TCPIP_PROTOCOL_PPTP) || (protocol == TCPIP_PROTOCOL_IPSEC1)) {
|
|
pQuery->Accept = NLB_UNKNOWN_NO_AFFINITY;
|
|
return;
|
|
/* Hasing in "No" affinity requires the client port; if it wasn't specified, reject
|
|
the request. We check for a non-zero server port to special case ICMP filtering,
|
|
which sets BOTH ports to zero legally. */
|
|
} else if ((client_port == 0) && (svr_port != 0)) {
|
|
pQuery->Accept = NLB_UNKNOWN_NO_AFFINITY;
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, svr_port, client_ipaddr, client_port);
|
|
|
|
index = hash % CVY_MAX_CHASH;
|
|
|
|
/* Compute the hash. */
|
|
hash = Load_complex_hash(svr_ipaddr, svr_port, client_ipaddr, client_port, bp->affinity, reverse_hash, limit_map_fn);
|
|
|
|
bin = hash % CVY_MAXBINS;
|
|
|
|
/* At this point, we can begin providing the requestee some actual information about
|
|
the state of the load module to better inform them as to why the decision we return
|
|
them was actually made. Here will provide some appropriate information about the
|
|
port rule we are operating on, including the "bucket" ID, the current "bucket"
|
|
ownership map and the number of connections active on this "bucket". */
|
|
pQuery->HashInfo.Valid = TRUE;
|
|
pQuery->HashInfo.Bin = bin;
|
|
pQuery->HashInfo.CurrentMap = bp->cmap;
|
|
pQuery->HashInfo.AllIdleMap = bp->all_idle_map;
|
|
pQuery->HashInfo.ActiveConnections = bp->nconn[bin];
|
|
|
|
/* If the packet is a connection control packet (TCP SYN/FIN/RST or IPSec MMSA, etc),
|
|
then we treat it differently than normal connection data. Mimics Load_conn_advise(). */
|
|
#if defined (NLB_TCP_NOTIFICATION)
|
|
/* If notifications are turned ON, then we only want to traverse this path if its a session-ful SYN.
|
|
FINs and RSTs should fall into the Load_packet_check path. If notification is NOT ON, then fall
|
|
through here for all SYNs, FINs and RSTs for session-ful protocols. */
|
|
if (is_session_pkt && ((flags & NLB_FILTER_FLAGS_CONN_UP) || (((flags & NLB_FILTER_FLAGS_CONN_DOWN) || (flags & NLB_FILTER_FLAGS_CONN_RESET)) && !NLB_NOTIFICATIONS_ON())))
|
|
#else
|
|
if (is_session_pkt && ((flags & NLB_FILTER_FLAGS_CONN_UP) || (flags & NLB_FILTER_FLAGS_CONN_DOWN) || (flags & NLB_FILTER_FLAGS_CONN_RESET)))
|
|
#endif
|
|
{
|
|
PCONN_ENTRY ep;
|
|
|
|
/* If this host does not own the bucket and the packet is not a connection
|
|
down or connection reset for a non-idle bin, then we don't own the packet. */
|
|
if (((bp->cmap & (((MAP_T) 1) << bin)) == 0) && (!(((flags & NLB_FILTER_FLAGS_CONN_DOWN) || (flags & NLB_FILTER_FLAGS_CONN_RESET)) && (bp->nconn[bin] > 0)))) {
|
|
pQuery->Accept = NLB_REJECT_OWNED_ELSEWHERE;
|
|
return;
|
|
}
|
|
|
|
/* At this point, we _might_ own the packet - if its a connection up, then
|
|
we definately do, because we own the bucket it maps to. */
|
|
if (flags & NLB_FILTER_FLAGS_CONN_UP) {
|
|
pQuery->Accept = NLB_ACCEPT_UNCONDITIONAL_OWNERSHIP;
|
|
return;
|
|
}
|
|
|
|
/* Look for an existing matching connection descriptor. */
|
|
ep = Load_find_dscr(lp, index, pQuery->ServerIPAddress, pQuery->ServerPort, pQuery->ClientIPAddress, pQuery->ClientPort, pQuery->Protocol);
|
|
|
|
/* If we haven't found a matching connection descriptor, then this host
|
|
certainly does not own this packet. */
|
|
if (ep == NULL) {
|
|
pQuery->Accept = NLB_REJECT_OWNED_ELSEWHERE;
|
|
return;
|
|
}
|
|
|
|
UNIV_ASSERT(ep->code == CVY_ENTRCODE);
|
|
|
|
/* If we find a match in the static hash table, fill in some descriptor
|
|
information for the user, including whether or not the descriptor was
|
|
allocated or static (static is this case) and the observed FIN count. */
|
|
pQuery->DescriptorInfo.Valid = TRUE;
|
|
pQuery->DescriptorInfo.Alloc = (ep->flags & NLB_CONN_ENTRY_FLAGS_ALLOCATED) ? TRUE : FALSE;
|
|
pQuery->DescriptorInfo.Dirty = (ep->flags & NLB_CONN_ENTRY_FLAGS_DIRTY) ? TRUE : FALSE;
|
|
pQuery->DescriptorInfo.RefCount = ep->ref_count;
|
|
|
|
/* If the connection is dirty, we do not take the packet because TCP may
|
|
have stale information for this descriptor. */
|
|
if (ep->flags & NLB_CONN_ENTRY_FLAGS_DIRTY) {
|
|
pQuery->Accept = NLB_REJECT_CONNECTION_DIRTY;
|
|
return;
|
|
}
|
|
|
|
/* If the connection is not dirty, we'll take the packet, as it belongs
|
|
to an existing connection that we are servicing on this host. */
|
|
pQuery->Accept = NLB_ACCEPT_FOUND_MATCHING_DESCRIPTOR;
|
|
return;
|
|
|
|
/* Otherwise, if its not a control packet, then its just a data packet, which
|
|
requires that either we unconditionally own this connection (if all other
|
|
hosts are idle on the bucket this packet maps to), or that we have an active
|
|
connection descriptor for this connection. Mimics load_packet_check(). */
|
|
} else {
|
|
/* If we currently own the "bucket" to which this connection maps and either NLB provides
|
|
no session support for this protocol, or all other hosts have no exisitng connections
|
|
on this "bucket" and we have no dirty connections, then we can safely take the packet
|
|
with no regard to the connection (session) descriptors. */
|
|
if (((bp->cmap & (((MAP_T) 1) << bin)) != 0) && (!is_session_pkt || (((bp->all_idle_map & (((MAP_T) 1) << bin)) != 0) && (!(lp->cln_waiting))))) {
|
|
pQuery->Accept = NLB_ACCEPT_UNCONDITIONAL_OWNERSHIP;
|
|
return;
|
|
|
|
/* Otherwise, if there are active connections on this "bucket" or if we own the
|
|
"bucket" and there are dirty connections on it, then we'll walk our descriptor
|
|
lists to determine whether or not we should take the packet or not. */
|
|
} else if (bp->nconn[bin] > 0 || (lp->cln_waiting && lp->dirty_bin[bin] && ((bp->cmap & (((MAP_T) 1) << bin)) != 0))) {
|
|
PCONN_ENTRY ep;
|
|
|
|
/* Look for an existing matching connection descriptor. */
|
|
ep = Load_find_dscr(lp, index, pQuery->ServerIPAddress, pQuery->ServerPort, pQuery->ClientIPAddress, pQuery->ClientPort, pQuery->Protocol);
|
|
|
|
/* If we haven't found a matching connection descriptor, then this host
|
|
certainly does not own this packet. */
|
|
if (ep == NULL) {
|
|
pQuery->Accept = NLB_REJECT_OWNED_ELSEWHERE;
|
|
return;
|
|
}
|
|
|
|
UNIV_ASSERT(ep->code == CVY_ENTRCODE);
|
|
|
|
/* If we find a match in the static hash table, fill in some descriptor
|
|
information for the user, including whether or not the descriptor was
|
|
allocated or static (static is this case) and the observed FIN count. */
|
|
pQuery->DescriptorInfo.Valid = TRUE;
|
|
pQuery->DescriptorInfo.Alloc = (ep->flags & NLB_CONN_ENTRY_FLAGS_ALLOCATED) ? TRUE : FALSE;
|
|
pQuery->DescriptorInfo.Dirty = (ep->flags & NLB_CONN_ENTRY_FLAGS_DIRTY) ? TRUE : FALSE;
|
|
pQuery->DescriptorInfo.RefCount = ep->ref_count;
|
|
|
|
/* If the connection is dirty, we do not take the packet because TCP may
|
|
have stale information for this descriptor. */
|
|
if (ep->flags & NLB_CONN_ENTRY_FLAGS_DIRTY) {
|
|
pQuery->Accept = NLB_REJECT_CONNECTION_DIRTY;
|
|
return;
|
|
}
|
|
|
|
/* If the connection is not dirty, we'll take the packet, as it belongs
|
|
to an existing connection that we are servicing on this host. */
|
|
pQuery->Accept = NLB_ACCEPT_FOUND_MATCHING_DESCRIPTOR;
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* If we get all the way down here, then we aren't going to accept the packet
|
|
because we do not own the "bucket" to which the packet maps and we have no
|
|
existing connection (session) state to allow us to service the packet. */
|
|
pQuery->Accept = NLB_REJECT_OWNED_ELSEWHERE;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Function: Load_query_port_state
|
|
* Description: This function returns the state (enabled, disabled, draining) of a particular
|
|
* port rule and, if found, returns some packet handling statistics for the port
|
|
* rule, such as the number of packets and bytes accepted and dropped. These
|
|
* counters are reset whenever a load weight change is made on the port rule, or
|
|
* whenever the load module is stopped/started. This function is just a query
|
|
* and therefore makes NO changes to the actual state of any port rule.
|
|
* Parameters: lp - a pointer to the load module.
|
|
* pQuery - a pointer to a buffer into which the results are placed.
|
|
* ipaddr - the VIP for the port rule that we are looking for. When per-VIP rules
|
|
* are not used, this IP address is 255.255.255.255 (0xffffffff).
|
|
* port - the port we are looking for. This function (and all other port rule
|
|
* operation functions, for that matter) identify a port rule by a port
|
|
* number within the range of a rule. Therefore, 80 identifies the port
|
|
* rule whose start port is 0 and whose end port is 1024, for instance.
|
|
* Returns: Nothing.
|
|
* Author: shouse, 5.18.01
|
|
* Notes: It is very important that this function operates completely unobtrusively.
|
|
*/
|
|
VOID Load_query_port_state (
|
|
PLOAD_CTXT lp,
|
|
PNLB_OPTIONS_PORT_RULE_STATE pQuery,
|
|
ULONG ipaddr,
|
|
USHORT port)
|
|
{
|
|
PCVY_RULE rp; /* Pointer to configured port rules. */
|
|
PBIN_STATE bp; /* Pointer to load module port rule state. */
|
|
ULONG nrules; /* Number of configured port rules. */
|
|
ULONG i;
|
|
|
|
UNIV_ASSERT(lp);
|
|
UNIV_ASSERT(pQuery);
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
/* If the load module is inactive, all rules are in a default state, so
|
|
since there is nothing interesting to report, bail out and report that
|
|
the port rule could not be found. */
|
|
if (!lp->active) {
|
|
pQuery->Status = NLB_PORT_RULE_NOT_FOUND;
|
|
return;
|
|
}
|
|
|
|
/* Begin by assuming that we won't find a corresponding rule. */
|
|
pQuery->Status = NLB_PORT_RULE_NOT_FOUND;
|
|
|
|
/* Grab a pointer to the beginning of the port rules array. These are the port
|
|
rules are read from the registry, so no state is associated with them. */
|
|
rp = (*(lp->params)).port_rules;
|
|
|
|
/* Find out how many port rules to loop through. */
|
|
nrules = (*(lp->params)).num_rules;
|
|
|
|
/* Loop through all port rules looking for a match. */
|
|
for (i = 0; i < nrules; i++, rp++) {
|
|
/* If the VIP matches (this check includes the check for ALL VIP, which is coded as
|
|
0xffffffff by both the user-level software and the load module) and the port number
|
|
is within the range of this port rule, we have a winner. */
|
|
if ((ipaddr == rp->virtual_ip_addr) && ((port >= rp->start_port) && (port <= rp->end_port))) {
|
|
/* Get a pointer to the load module port rule state for this rule. The load
|
|
module stores the port rules in the same order as they are read from the
|
|
registry and stored in the NLB params, so we can use the index of the loop
|
|
to directly index into the corresponding load module state for this rule. */
|
|
bp = &(lp->pg_state[i]);
|
|
|
|
UNIV_ASSERT(bp->code == CVY_BINCODE);
|
|
|
|
/* If the load weight is zero, this could be because either the rule is
|
|
disabled or because it is in the process of draining. */
|
|
if (bp->load_amt[lp->my_host_id] == 0) {
|
|
/* If the current number of connections being served on this port
|
|
rule is non-zero, then this port rule is being drained - the
|
|
count is decremented by every completed connection and goes to
|
|
zero when the rule is finished draining. */
|
|
if (bp->tconn) {
|
|
pQuery->Status = NLB_PORT_RULE_DRAINING;
|
|
} else {
|
|
pQuery->Status = NLB_PORT_RULE_DISABLED;
|
|
}
|
|
/* If the port rule has a non-zero load weight, then it is enabled. */
|
|
} else {
|
|
pQuery->Status = NLB_PORT_RULE_ENABLED;
|
|
}
|
|
|
|
/* Fill in some statistics for this port rule, including the number
|
|
of packets and bytes accepted and dropped, which can be used to
|
|
create an estimate of actual load balancing performance. */
|
|
pQuery->Statistics.Packets.Accepted = bp->packets_accepted;
|
|
pQuery->Statistics.Packets.Dropped = bp->packets_dropped;
|
|
pQuery->Statistics.Bytes.Accepted = bp->bytes_accepted;
|
|
pQuery->Statistics.Bytes.Dropped = bp->bytes_dropped;
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Function: Load_query_convergence_info
|
|
* Description: Queries the load module for the convergence statistics
|
|
* Parameters: lp - a pointer to the load module context.
|
|
* OUT num_cvgs - a pointer to a ULONG to hold the total number of convergences on this host.
|
|
* OUT last_cvg - a pointer to a ULONG to hold the time since the last convergence completed.
|
|
* Returns: BOOLEAN - whether or not the load module is active. If TRUE, then the OUT params were filled in.
|
|
* Author: shouse, 10.30.01
|
|
* Notes:
|
|
*/
|
|
BOOLEAN Load_query_convergence_info (PLOAD_CTXT lp, PULONG num_cvgs, PULONG last_cvg)
|
|
{
|
|
PPING_MSG sendp;
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
/* If the load module is inactive, return failure. */
|
|
if (!lp->active)
|
|
return FALSE;
|
|
|
|
/* Get a pointer to our heartbeat. */
|
|
sendp = &(lp->send_msg);
|
|
|
|
/* Otherwise, fill in the total number of convergences since this host has joined
|
|
the cluster and the time, in seconds, since the last convergence completed. */
|
|
*num_cvgs = lp->num_convergences;
|
|
|
|
/* If the host is converged, then the time since the last convergence is the
|
|
current time minus the timestamp of the last convergence. Otherwise, the
|
|
last convergence has not yet completed, so return zero (in progress). */
|
|
if (sendp->state == HST_NORMAL)
|
|
*last_cvg = lp->clock_sec - lp->last_convergence;
|
|
else
|
|
*last_cvg = NLB_QUERY_TIME_INVALID;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
/*
|
|
* Function: Load_query_statistics
|
|
* Description: Queries the load module for some relevant statisitics
|
|
* Parameters: lp - a pointer to the load module context.
|
|
* OUT num_cvgs - a pointer to a ULONG to hold the current number of active connections
|
|
* OUT last_cvg - a pointer to a ULONG to hold the total number of descriptors allocated thusfar
|
|
* Returns: BOOLEAN - whether or not the load module is active. If TRUE, then the OUT params were filled in.
|
|
* Author: shouse, 4.19.02
|
|
* Notes:
|
|
*/
|
|
BOOLEAN Load_query_statistics (PLOAD_CTXT lp, PULONG num_conn, PULONG num_dscr)
|
|
{
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
/* If the load module is inactive, return failure. */
|
|
if (!lp->active)
|
|
return FALSE;
|
|
|
|
/* The total number of ACTIVE connections across all port rules. */
|
|
*num_conn = lp->nconn;
|
|
|
|
/* The number of descriptors allocated thusfar. */
|
|
*num_dscr = lp->num_dscr_out;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
#if defined (NLB_TCP_NOTIFICATION)
|
|
/*
|
|
* Function: Load_conn_up
|
|
* Description: This function is called to create state to track a connection (usually TCP
|
|
* or IPSec/L2TP). This is not a function to ask the load module whether or
|
|
* not to accept a packet, rather it is a request to create state to track a
|
|
* connection that is being established.
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* svr_ipaddr - the server IP address in network byte order
|
|
* svr_port - the server port in host byte order
|
|
* client_ipaddr - the client IP address in network byte order
|
|
* client_port - the client port in host byte order
|
|
* protocol - the protocol of this connection
|
|
* limit_map_fn - whether or not to include server-side parameters in hashing
|
|
* reverse_hash - whether or not to reverse client and server during hashing
|
|
* Returns: BOOLEAN - whether or not state was successfully created to track this connection.
|
|
* Author: shouse, 4.15.02
|
|
* Notes: DO NOT CALL THIS FUNCTION WITH THE LOAD LOCK HELD.
|
|
*/
|
|
BOOLEAN Load_conn_up (
|
|
PLOAD_CTXT lp,
|
|
ULONG svr_ipaddr,
|
|
ULONG svr_port,
|
|
ULONG client_ipaddr,
|
|
ULONG client_port,
|
|
USHORT protocol,
|
|
BOOLEAN limit_map_fn,
|
|
BOOLEAN reverse_hash)
|
|
{
|
|
ULONG hash;
|
|
ULONG vindex;
|
|
ULONG index;
|
|
ULONG bin;
|
|
PBIN_STATE bp;
|
|
PCONN_ENTRY ep;
|
|
IRQLEVEL irql;
|
|
PNDIS_SPIN_LOCK lockp = GET_LOAD_LOCK(lp);
|
|
BOOLEAN is_tcp_pkt = IS_TCP_PKT(protocol);
|
|
BOOLEAN acpt = TRUE;
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
TRACE_FILTER("%!FUNC! Enter: lp = %p, server IP = %u.%u.%u.%u, server port = %u, client IP = %u.%u.%u.%u, client port = %u, protocol = %u, limit map = %u, reverse hash = %u",
|
|
lp, IP_GET_OCTET(svr_ipaddr, 0), IP_GET_OCTET(svr_ipaddr, 1), IP_GET_OCTET(svr_ipaddr, 2), IP_GET_OCTET(svr_ipaddr, 3), svr_port,
|
|
IP_GET_OCTET(client_ipaddr, 0), IP_GET_OCTET(client_ipaddr, 1), IP_GET_OCTET(client_ipaddr, 2), IP_GET_OCTET(client_ipaddr, 3), client_port,
|
|
protocol, limit_map_fn, reverse_hash);
|
|
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, svr_port, client_ipaddr, client_port);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
index = hash % CVY_MAX_CHASH;
|
|
|
|
/* ALWAYS lock the global queues BEFORE locking the load module itself. */
|
|
NdisAcquireSpinLock(&g_conn_estabq[index].lock);
|
|
|
|
/* Lock the particular load module instance. */
|
|
NdisAcquireSpinLock(lockp);
|
|
|
|
/* If the load module is inactive, drop the packet and return here. */
|
|
if (!lp->active) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - load module is inactive");
|
|
|
|
acpt = FALSE;
|
|
goto exit;
|
|
}
|
|
|
|
/* Find the port rule for this connection. */
|
|
bp = Load_pg_lookup(lp, svr_ipaddr, svr_port, is_tcp_pkt);
|
|
|
|
/* Handle CVY_NEVER immediately. */
|
|
if (bp->mode == CVY_NEVER) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - port rule %u is disabled\n", bp->index);
|
|
|
|
acpt = FALSE;
|
|
goto exit;
|
|
}
|
|
|
|
/* Compute the hash. */
|
|
hash = Load_complex_hash(svr_ipaddr, svr_port, client_ipaddr, client_port, bp->affinity, reverse_hash, limit_map_fn);
|
|
|
|
/* Now hash client address to bin id. */
|
|
bin = hash % CVY_MAXBINS;
|
|
|
|
LOCK_ENTER(&(lp->lock), &irql);
|
|
|
|
/* Look for an existing matching connection descriptor. */
|
|
ep = Load_find_dscr(lp, index, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol);
|
|
|
|
/* If there is no existing descriptor using this tuple, or if there is one, but its reference
|
|
count is zero, then the descriptor is NOT on the global connection queue; otherwise it is. */
|
|
if ((ep != NULL) && (ep->ref_count != 0)) {
|
|
/* Temporarily pull this descriptor off of the global connection queue. We'll end up putting
|
|
it back on later, but this way we can UNCONDITIONALLY link to the queue when the time comes. */
|
|
g_conn_estabq[index].length--;
|
|
Link_unlink(&ep->glink);
|
|
}
|
|
|
|
/* Create a new connection descriptor to track this connection. */
|
|
ep = Load_create_dscr(lp, bp, ep, index, bin);
|
|
|
|
/* If, for some reason, we were unable to create state for this connection, bail out here. */
|
|
if (ep == NULL) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - no available descriptors: Port rule = %u, Index = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
acpt = FALSE;
|
|
goto unlock;
|
|
}
|
|
|
|
/* Set the connection information in the descriptor. */
|
|
CVY_CONN_SET(ep, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol);
|
|
|
|
/* Insert the descriptor into the global connection queue. */
|
|
g_conn_estabq[index].length++;
|
|
Queue_enq(&g_conn_estabq[index].queue, &ep->glink);
|
|
|
|
/* If this is a new PPTP tunnel, create or update a virtual descriptor to track the GRE data packets. */
|
|
if (protocol == TCPIP_PROTOCOL_PPTP) {
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
vindex = hash % CVY_MAX_CHASH;
|
|
|
|
/* Look for an existing matching virtual connection descriptor. */
|
|
ep = Load_find_dscr(lp, vindex, svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT, TCPIP_PROTOCOL_GRE);
|
|
|
|
/* Create or update a virtual descriptor for the GRE traffic. */
|
|
ep = Load_create_dscr(lp, bp, ep, vindex, bin);
|
|
|
|
/* If we can't allocate the virtual descriptor, bail out, but don't fail. */
|
|
if (ep == NULL) goto unlock;
|
|
|
|
/* Set the connection information in the descriptor. */
|
|
CVY_CONN_SET(ep, svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT, TCPIP_PROTOCOL_GRE);
|
|
|
|
/* Set the virtual descriptor flag. */
|
|
ep->flags |= NLB_CONN_ENTRY_FLAGS_VIRTUAL;
|
|
}
|
|
/* If this is a new IPSEC tunnel, create or update a virtual descriptor to track the UDP subsequent data fragments. */
|
|
else if (protocol == TCPIP_PROTOCOL_IPSEC1) {
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, IPSEC_CTRL_PORT, client_ipaddr, IPSEC_CTRL_PORT);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
vindex = hash % CVY_MAX_CHASH;
|
|
|
|
/* Look for an existing matching virtual connection descriptor. */
|
|
ep = Load_find_dscr(lp, vindex, svr_ipaddr, IPSEC_CTRL_PORT, client_ipaddr, IPSEC_CTRL_PORT, TCPIP_PROTOCOL_IPSEC_UDP);
|
|
|
|
/* Create or update a virtual descriptor for the UDP subsequent fragment traffic. */
|
|
ep = Load_create_dscr(lp, bp, ep, vindex, bin);
|
|
|
|
/* If we can't allocate the virtual descriptor, bail out, but don't fail. */
|
|
if (ep == NULL) goto unlock;
|
|
|
|
/* Set the connection information in the descriptor. */
|
|
CVY_CONN_SET(ep, svr_ipaddr, IPSEC_CTRL_PORT, client_ipaddr, IPSEC_CTRL_PORT, TCPIP_PROTOCOL_IPSEC_UDP);
|
|
|
|
/* Set the virtual descriptor flag. */
|
|
ep->flags |= NLB_CONN_ENTRY_FLAGS_VIRTUAL;
|
|
}
|
|
|
|
TRACE_FILTER("%!FUNC! Accept packet - connection state created: Port rule = %u, Index = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
acpt = TRUE;
|
|
|
|
unlock:
|
|
|
|
LOCK_EXIT(&(lp->lock), irql);
|
|
|
|
exit:
|
|
|
|
/* Unlock the load module. */
|
|
NdisReleaseSpinLock(lockp);
|
|
|
|
/* Unlock the global established connection queue. */
|
|
NdisReleaseSpinLock(&g_conn_estabq[index].lock);
|
|
|
|
TRACE_FILTER("%!FUNC! Exit: acpt = %u", acpt);
|
|
|
|
return acpt;
|
|
}
|
|
|
|
/*
|
|
* Function: Load_conn_down
|
|
* Description: This function is called to destroy the state being used to track an existing
|
|
* connection (usually TCP or IPSec/L2TP). If state for the given 5-tuple is
|
|
* found, it is de-referenced and destroyed if appropriate (based partially on
|
|
* the conn_status). If state is not found, FALSE is returned, but it not
|
|
* considered a catastrophic error. In the case of TCP notifications, perhaps
|
|
* the connection was not even established across a NLB NIC.
|
|
* Parameters: svr_ipaddr - the server IP address in network byte order
|
|
* svr_port - the server port in host byte order
|
|
* client_ipaddr - the client IP address in network byte order
|
|
* client_port - the client port in host byte order
|
|
* protocol - the protocol of this connection
|
|
* conn_status - whether the connection is going DOWN or being RESET
|
|
* Returns: BOOLEAN - whether or not the connection state was found and updated.
|
|
* Author: shouse, 4.15.02
|
|
* Notes: DO NOT CALL THIS FUNCTION WITH THE LOAD LOCK HELD.
|
|
*/
|
|
BOOLEAN Load_conn_down (
|
|
ULONG svr_ipaddr,
|
|
ULONG svr_port,
|
|
ULONG client_ipaddr,
|
|
ULONG client_port,
|
|
USHORT protocol,
|
|
ULONG conn_status)
|
|
{
|
|
PLOAD_CTXT lp;
|
|
ULONG hash;
|
|
ULONG vindex;
|
|
ULONG index;
|
|
ULONG bin;
|
|
LINK * linkp;
|
|
PBIN_STATE bp;
|
|
PCONN_ENTRY ep;
|
|
PPENDING_ENTRY pp;
|
|
PNDIS_SPIN_LOCK lockp;
|
|
BOOLEAN match = FALSE;
|
|
BOOLEAN acpt = TRUE;
|
|
PMAIN_CTXT ctxtp;
|
|
|
|
TRACE_FILTER("%!FUNC! Enter: server IP = %u.%u.%u.%u, server port = %u, client IP = %u.%u.%u.%u, client port = %u, protocol = %u, status = %u",
|
|
IP_GET_OCTET(svr_ipaddr, 0), IP_GET_OCTET(svr_ipaddr, 1), IP_GET_OCTET(svr_ipaddr, 2), IP_GET_OCTET(svr_ipaddr, 3), svr_port,
|
|
IP_GET_OCTET(client_ipaddr, 0), IP_GET_OCTET(client_ipaddr, 1), IP_GET_OCTET(client_ipaddr, 2), IP_GET_OCTET(client_ipaddr, 3), client_port, protocol, conn_status);
|
|
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, svr_port, client_ipaddr, client_port);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
index = hash % CVY_MAX_CHASH;
|
|
|
|
/* ALWAYS lock the global queues BEFORE locking the load module itself. */
|
|
NdisAcquireSpinLock(&g_conn_pendingq[index].lock);
|
|
|
|
/* Grab the entry at the front of this pending connection queue. */
|
|
pp = (PPENDING_ENTRY)Queue_front(&g_conn_pendingq[index].queue);
|
|
|
|
while (pp != NULL) {
|
|
|
|
UNIV_ASSERT(pp->code == CVY_PENDINGCODE);
|
|
|
|
/* Look for a matching descriptor. */
|
|
if (CVY_PENDING_MATCH(pp, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol)) {
|
|
match = TRUE;
|
|
break;
|
|
}
|
|
|
|
/* Get the next item in the queue. */
|
|
pp = (PPENDING_ENTRY)Queue_next(&g_conn_pendingq[index].queue, &(pp->link));
|
|
}
|
|
|
|
/* If we found this connection in the pending connection queue, remove it from
|
|
the queue, destroy the pending connection state and exit. Otherwise, fall
|
|
through and continue looking in the established connection queue. */
|
|
if (match) {
|
|
|
|
UNIV_ASSERT(pp);
|
|
|
|
/* Remove the pending connection entry from the pending queue. */
|
|
g_conn_pendingq[index].length--;
|
|
Link_unlink(&pp->link);
|
|
|
|
/* Free the descriptor back to the fixed-size block pool. */
|
|
NdisFreeToBlockPool((PUCHAR)pp);
|
|
|
|
/* Unlock the global pending connection queue. */
|
|
NdisReleaseSpinLock(&g_conn_pendingq[index].lock);
|
|
|
|
acpt = TRUE;
|
|
goto exit;
|
|
}
|
|
|
|
/* Unlock the global established connection queue. */
|
|
NdisReleaseSpinLock(&g_conn_pendingq[index].lock);
|
|
|
|
/* ALWAYS lock the global queues BEFORE locking the load module itself. */
|
|
NdisAcquireSpinLock(&g_conn_estabq[index].lock);
|
|
|
|
/* Grab the entry at the front of this established connection queue. */
|
|
linkp = (LINK *)Queue_front(&g_conn_estabq[index].queue);
|
|
|
|
while (linkp != NULL) {
|
|
/* Get the CONN_ENTRY pointer from the link pointer. */
|
|
ep = STRUCT_PTR(linkp, CONN_ENTRY, glink);
|
|
|
|
UNIV_ASSERT(ep->code == CVY_ENTRCODE);
|
|
|
|
/* Look for a matching descriptor. */
|
|
if (CVY_CONN_MATCH(ep, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol)) {
|
|
match = TRUE;
|
|
break;
|
|
}
|
|
|
|
/* Get the next item in the queue. */
|
|
linkp = (LINK *)Queue_next(&g_conn_estabq[index].queue, &(ep->glink));
|
|
}
|
|
|
|
/* If no matching descriptor was found, bail out. */
|
|
if (!match) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - no matching descriptor for RST/FIN: Index = %u", index);
|
|
|
|
acpt = FALSE;
|
|
goto unlock;
|
|
}
|
|
|
|
UNIV_ASSERT(ep);
|
|
|
|
/* Unlink this descriptor here. We have to do this here because if Load_destroy_dscr does in fact
|
|
destroy the descriptor, we can't touch it once the function call returns. So, we'll pull it off
|
|
here unconditionally and if it turns out that there are still references on the descriptor, we'll
|
|
put it back on when Load_destroy_dscr returns. */
|
|
g_conn_estabq[index].length--;
|
|
Link_unlink(&ep->glink);
|
|
|
|
/* Grab a pointer to the load module on which the descriptor resides. */
|
|
lp = ep->load;
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
/* Get a pointer to the load lock from the load context. */
|
|
lockp = GET_LOAD_LOCK(lp);
|
|
|
|
/* Lock the load module on which the connection resides. */
|
|
NdisAcquireSpinLock(lockp);
|
|
|
|
LOCK_ENTER(&(lp->lock), &irql);
|
|
|
|
/* If we found state for this connection, the bin is the bin from the descriptor,
|
|
not the calculated bin, which may not even been accurate if the port rules have
|
|
been modified since this connection was established. */
|
|
bin = ep->bin;
|
|
|
|
/* Lookup the port rule so we can update the port rule info. */
|
|
bp = Load_pg_lookup(lp, ep->svr_ipaddr, ep->svr_port, IS_TCP_PKT(ep->protocol));
|
|
|
|
/* If references still remain on the descriptor, then put it back on the global connection queue. */
|
|
if (Load_destroy_dscr(lp, bp, ep, conn_status)) {
|
|
/* Insert the descriptor into the global connection queue. */
|
|
g_conn_estabq[index].length++;
|
|
Queue_enq(&g_conn_estabq[index].queue, &ep->glink);
|
|
}
|
|
|
|
/* If this is a PPTP tunnel going down, update the virtual GRE descriptor. Virtual descriptors
|
|
are ALWAYS de-referenced, not destroyed, even if the notification is a RST because these
|
|
descriptors are potentially shared by multiple PPTP tunnels. */
|
|
if (protocol == TCPIP_PROTOCOL_PPTP) {
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
vindex = hash % CVY_MAX_CHASH;
|
|
|
|
/* Look for an existing matching connection descriptor. Now that we have the load module pointer
|
|
from finding the first descriptor, we can narrow our search and look only for virtual descriptors
|
|
that reside on our load module. */
|
|
ep = Load_find_dscr(lp, vindex, svr_ipaddr, PPTP_CTRL_PORT, client_ipaddr, PPTP_CTRL_PORT, TCPIP_PROTOCOL_GRE);
|
|
|
|
/* Dereference the virtual GRE descriptor. */
|
|
(VOID)Load_destroy_dscr(lp, bp, ep, conn_status);
|
|
}
|
|
/* If this is an IPSEC tunnel going down, update the virtual ISPEC_UDP descriptor. Virtual descriptors
|
|
are ALWAYS de-referenced, not destroyed, even if the notification is a RST because these
|
|
descriptors are potentially shared by multiple IPSEC tunnels. */
|
|
else if (protocol == TCPIP_PROTOCOL_IPSEC1) {
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, IPSEC_CTRL_PORT, client_ipaddr, IPSEC_CTRL_PORT);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
vindex = hash % CVY_MAX_CHASH;
|
|
|
|
/* Look for an existing matching virtual connection descriptor. Now that we have the load module pointer
|
|
from finding the first descriptor, we can narrow our search and look only for virtual descriptors
|
|
that reside on our load module. */
|
|
ep = Load_find_dscr(lp, vindex, svr_ipaddr, IPSEC_CTRL_PORT, client_ipaddr, IPSEC_CTRL_PORT, TCPIP_PROTOCOL_IPSEC_UDP);
|
|
|
|
/* Dereference the virtual IPSec/UDP descriptor. */
|
|
(VOID)Load_destroy_dscr(lp, bp, ep, conn_status);
|
|
}
|
|
|
|
TRACE_FILTER("%!FUNC! Accept packet - state found: Port rule = %u, Index = %u, Bin = %u, Current map = 0x%015I64x, "
|
|
"All idle map = 0x%015I64x, Connections = %u, Cleanup waiting = %u, Dirty %u",
|
|
bp->index, index, bin, bp->cmap, bp->all_idle_map, bp->nconn[bin], lp->cln_waiting, lp->dirty_bin[bin]);
|
|
|
|
acpt = TRUE;
|
|
|
|
LOCK_EXIT(&(lp->lock), irql);
|
|
|
|
/* Unlock the load module. */
|
|
NdisReleaseSpinLock(lockp);
|
|
|
|
unlock:
|
|
|
|
/* Unlock the global established connection queue. */
|
|
NdisReleaseSpinLock(&g_conn_estabq[index].lock);
|
|
|
|
exit:
|
|
|
|
TRACE_FILTER("%!FUNC! Exit: acpt = %u", acpt);
|
|
|
|
return acpt;
|
|
}
|
|
|
|
/*
|
|
* Function: Load_conn_pending
|
|
* Description: This function is called to create state for a pending OUTGOING connection on
|
|
* the server. Because at this time, it is unknown on what interface the connection
|
|
* will ultimately be established, NLB creates global state to track the connection
|
|
* only until it is established. For TCP, when the SYN+ACK arrives from the peer,
|
|
* we only accept it if we find a match in our pending connection queues. When the
|
|
* connection is established, this state is destroyed and new state is created to
|
|
* track the connection is appropriate.
|
|
* Parameters: svr_ipaddr - the server IP address in network byte order
|
|
* svr_port - the server port in host byte order
|
|
* client_ipaddr - the client IP address in network byte order
|
|
* client_port - the client port in host byte order
|
|
* protocol - the protocol of this connection
|
|
* Returns: BOOLEAN - whether or not state was successfully created to track this pending connection.
|
|
* Author: shouse, 4.15.02
|
|
* Notes: DO NOT CALL THIS FUNCTION WITH THE LOAD LOCK HELD.
|
|
*/
|
|
BOOLEAN Load_conn_pending (
|
|
ULONG svr_ipaddr,
|
|
ULONG svr_port,
|
|
ULONG client_ipaddr,
|
|
ULONG client_port,
|
|
USHORT protocol)
|
|
{
|
|
ULONG hash;
|
|
ULONG index;
|
|
PPENDING_ENTRY pp = NULL;
|
|
BOOLEAN acpt = TRUE;
|
|
|
|
TRACE_FILTER("%!FUNC! Enter: server IP = %u.%u.%u.%u, server port = %u, client IP = %u.%u.%u.%u, client port = %u, protocol = %u",
|
|
IP_GET_OCTET(svr_ipaddr, 0), IP_GET_OCTET(svr_ipaddr, 1), IP_GET_OCTET(svr_ipaddr, 2), IP_GET_OCTET(svr_ipaddr, 3), svr_port,
|
|
IP_GET_OCTET(client_ipaddr, 0), IP_GET_OCTET(client_ipaddr, 1), IP_GET_OCTET(client_ipaddr, 2), IP_GET_OCTET(client_ipaddr, 3), client_port, protocol);
|
|
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, svr_port, client_ipaddr, client_port);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
index = hash % CVY_MAX_CHASH;
|
|
|
|
/* If we falied to allocate the pending connection descriptor pool, bail out. */
|
|
if (g_pending_conn_pool == NULL)
|
|
{
|
|
/* Creation of the global pending connection state pool failed. */
|
|
TRACE_FILTER("%!FUNC! Drop packet - no global connection pending pool: Index = %u", index);
|
|
|
|
acpt = FALSE;
|
|
goto exit;
|
|
}
|
|
|
|
/* Allocate a descriptor from the fixed-size block pool. */
|
|
pp = (PPENDING_ENTRY)NdisAllocateFromBlockPool(g_pending_conn_pool);
|
|
|
|
if (pp == NULL) {
|
|
/* Allocation failed, bail out. */
|
|
TRACE_FILTER("%!FUNC! Drop packet - unable to allocate a pending connection entry: Index = %u", index);
|
|
|
|
acpt = FALSE;
|
|
goto exit;
|
|
}
|
|
|
|
/* Initialize the link. */
|
|
Link_init(&pp->link);
|
|
|
|
/* Fill in the "magic number". */
|
|
pp->code = CVY_PENDINGCODE;
|
|
|
|
/* Fill in the IP tuple. */
|
|
CVY_PENDING_SET(pp, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol);
|
|
|
|
/* ALWAYS lock the global queues BEFORE locking the load module itself. */
|
|
NdisAcquireSpinLock(&g_conn_pendingq[index].lock);
|
|
|
|
/* Insert the descriptor into the global connection queue. */
|
|
g_conn_pendingq[index].length++;
|
|
Queue_enq(&g_conn_pendingq[index].queue, &pp->link);
|
|
|
|
/* Unlock the global pending connection queue. */
|
|
NdisReleaseSpinLock(&g_conn_pendingq[index].lock);
|
|
|
|
TRACE_FILTER("%!FUNC! Accept packet - pending connection state created: Index = %u", index);
|
|
|
|
acpt = TRUE;
|
|
|
|
exit:
|
|
|
|
TRACE_FILTER("%!FUNC! Exit: acpt = %u", acpt);
|
|
|
|
return acpt;
|
|
}
|
|
|
|
/*
|
|
* Function: Load_pending_check
|
|
* Description: This function is called to determine whether or not state exists in the pending
|
|
* connection queues for this connection. If it does, the packet should be accepted.
|
|
* If no state exists, the packet should be dropped.
|
|
* Parameters: svr_ipaddr - the server IP address in network byte order
|
|
* svr_port - the server port in host byte order
|
|
* client_ipaddr - the client IP address in network byte order
|
|
* client_port - the client port in host byte order
|
|
* protocol - the protocol of this connection
|
|
* Returns: BOOLEAN - whether or not to accept the packet.
|
|
* Author: shouse, 4.15.02
|
|
* Notes: DO NOT CALL THIS FUNCTION WITH THE LOAD LOCK HELD.
|
|
*/
|
|
BOOLEAN Load_pending_check (
|
|
ULONG svr_ipaddr,
|
|
ULONG svr_port,
|
|
ULONG client_ipaddr,
|
|
ULONG client_port,
|
|
USHORT protocol)
|
|
{
|
|
ULONG hash;
|
|
ULONG index;
|
|
PPENDING_ENTRY pp = NULL;
|
|
BOOLEAN match = FALSE;
|
|
BOOLEAN acpt = TRUE;
|
|
|
|
TRACE_FILTER("%!FUNC! Enter: server IP = %u.%u.%u.%u, server port = %u, client IP = %u.%u.%u.%u, client port = %u, protocol = %u",
|
|
IP_GET_OCTET(svr_ipaddr, 0), IP_GET_OCTET(svr_ipaddr, 1), IP_GET_OCTET(svr_ipaddr, 2), IP_GET_OCTET(svr_ipaddr, 3), svr_port,
|
|
IP_GET_OCTET(client_ipaddr, 0), IP_GET_OCTET(client_ipaddr, 1), IP_GET_OCTET(client_ipaddr, 2), IP_GET_OCTET(client_ipaddr, 3), client_port, protocol);
|
|
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, svr_port, client_ipaddr, client_port);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
index = hash % CVY_MAX_CHASH;
|
|
|
|
/* ALWAYS lock the global queues BEFORE locking the load module itself. */
|
|
NdisAcquireSpinLock(&g_conn_pendingq[index].lock);
|
|
|
|
/* Grab the entry at the front of this pending connection queue. */
|
|
pp = (PPENDING_ENTRY)Queue_front(&g_conn_pendingq[index].queue);
|
|
|
|
while (pp != NULL) {
|
|
|
|
UNIV_ASSERT(pp->code == CVY_PENDINGCODE);
|
|
|
|
/* Look for a matching descriptor. */
|
|
if (CVY_PENDING_MATCH(pp, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol)) {
|
|
match = TRUE;
|
|
break;
|
|
}
|
|
|
|
/* Get the next item in the queue. */
|
|
pp = (PPENDING_ENTRY)Queue_next(&g_conn_pendingq[index].queue, &(pp->link));
|
|
}
|
|
|
|
/* If no matching descriptor was found, bail out. */
|
|
if (!match) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - no matching pending connection state for SYN+ACK: Index = %u", index);
|
|
|
|
acpt = FALSE;
|
|
goto exit;
|
|
}
|
|
|
|
TRACE_FILTER("%!FUNC! Accept packet - pending connection state found: Index = %u", index);
|
|
|
|
acpt = TRUE;
|
|
|
|
exit:
|
|
|
|
/* Unlock the global pending connection queue. */
|
|
NdisReleaseSpinLock(&g_conn_pendingq[index].lock);
|
|
|
|
TRACE_FILTER("%!FUNC! Exit: acpt = %u", acpt);
|
|
|
|
return acpt;
|
|
}
|
|
|
|
/*
|
|
* Function: Load_conn_establish
|
|
* Description: This function is invoked when a pending connection has become established.
|
|
* When the pending connection is established, its state in the pending
|
|
* connection queues is destroyed. If the connection was ultimately established
|
|
* on an NLB adapter (if lp != NULL), then state will be created to track this
|
|
* new connection. Otherwise, the operation consists only of destroying the
|
|
* pending connection state.
|
|
* Parameters: lp - a pointer to the load module context (LOAD_CTXT)
|
|
* svr_ipaddr - the server IP address in network byte order
|
|
* svr_port - the server port in host byte order
|
|
* client_ipaddr - the client IP address in network byte order
|
|
* client_port - the client port in host byte order
|
|
* protocol - the protocol of this connection
|
|
* limit_map_fn - whether or not to include server-side parameters in hashing
|
|
* reverse_hash - whether or not to reverse client and server during hashing
|
|
* Returns: BOOLEAN - whether or not the operation was successfully completed.
|
|
* Author: shouse, 4.15.02
|
|
* Notes: DO NOT CALL THIS FUNCTION WITH THE LOAD LOCK HELD.
|
|
*/
|
|
BOOLEAN Load_conn_establish (
|
|
PLOAD_CTXT lp,
|
|
ULONG svr_ipaddr,
|
|
ULONG svr_port,
|
|
ULONG client_ipaddr,
|
|
ULONG client_port,
|
|
USHORT protocol,
|
|
BOOLEAN limit_map_fn,
|
|
BOOLEAN reverse_hash)
|
|
{
|
|
ULONG hash;
|
|
ULONG index;
|
|
PPENDING_ENTRY pp = NULL;
|
|
BOOLEAN match = FALSE;
|
|
BOOLEAN acpt = TRUE;
|
|
|
|
TRACE_FILTER("%!FUNC! Enter: lp = %p, server IP = %u.%u.%u.%u, server port = %u, client IP = %u.%u.%u.%u, client port = %u, protocol = %u, limit map = %u, reverse hash = %u",
|
|
lp, IP_GET_OCTET(svr_ipaddr, 0), IP_GET_OCTET(svr_ipaddr, 1), IP_GET_OCTET(svr_ipaddr, 2), IP_GET_OCTET(svr_ipaddr, 3), svr_port,
|
|
IP_GET_OCTET(client_ipaddr, 0), IP_GET_OCTET(client_ipaddr, 1), IP_GET_OCTET(client_ipaddr, 2), IP_GET_OCTET(client_ipaddr, 3), client_port,
|
|
protocol, limit_map_fn, reverse_hash);
|
|
|
|
/* Compute a simple and inexpensive hash on all parts of the IP tuple except the protocol. */
|
|
hash = Load_simple_hash(svr_ipaddr, svr_port, client_ipaddr, client_port);
|
|
|
|
/* Our index in all connection arrays is this hash, modulo the array size. */
|
|
index = hash % CVY_MAX_CHASH;
|
|
|
|
/* ALWAYS lock the global queues BEFORE locking the load module itself. */
|
|
NdisAcquireSpinLock(&g_conn_pendingq[index].lock);
|
|
|
|
/* Grab the entry at the front of this pending connection queue. */
|
|
pp = (PPENDING_ENTRY)Queue_front(&g_conn_pendingq[index].queue);
|
|
|
|
while (pp != NULL) {
|
|
|
|
UNIV_ASSERT(pp->code == CVY_PENDINGCODE);
|
|
|
|
/* Look for a matching descriptor. */
|
|
if (CVY_PENDING_MATCH(pp, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol)) {
|
|
match = TRUE;
|
|
break;
|
|
}
|
|
|
|
/* Get the next item in the queue. */
|
|
pp = (PPENDING_ENTRY)Queue_next(&g_conn_pendingq[index].queue, &(pp->link));
|
|
}
|
|
|
|
/* If no matching descriptor was found, bail out. */
|
|
if (!match) {
|
|
|
|
TRACE_FILTER("%!FUNC! Drop packet - no matching pending connection state: Index = %u", index);
|
|
|
|
/* Unlock the global pending connection queue. */
|
|
NdisReleaseSpinLock(&g_conn_pendingq[index].lock);
|
|
|
|
acpt = FALSE;
|
|
goto exit;
|
|
}
|
|
|
|
UNIV_ASSERT(pp);
|
|
|
|
/* Remove the pending connection entry from the pending queue. */
|
|
g_conn_pendingq[index].length--;
|
|
Link_unlink(&pp->link);
|
|
|
|
/* Unlock the global pending connection queue. */
|
|
NdisReleaseSpinLock(&g_conn_pendingq[index].lock);
|
|
|
|
/* Free the descriptor back to the fixed-size block pool. */
|
|
NdisFreeToBlockPool((PUCHAR)pp);
|
|
|
|
/* If the load module pointer is non-NULL, then this connection is being established on
|
|
an NLB adapter. If so, call Load_conn_up to create state to track the connection. */
|
|
if (lp != NULL) {
|
|
|
|
UNIV_ASSERT(lp->code == CVY_LOADCODE);
|
|
|
|
/* Create state for the connection. */
|
|
acpt = Load_conn_up(lp, svr_ipaddr, svr_port, client_ipaddr, client_port, protocol, limit_map_fn, reverse_hash);
|
|
}
|
|
|
|
exit:
|
|
|
|
TRACE_FILTER("%!FUNC! Exit: acpt = %u", acpt);
|
|
|
|
return acpt;
|
|
}
|
|
#endif
|
|
|