Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1088 lines
37 KiB

/*==========================================================================
*
* Copyright (C) 1998-2002 Microsoft Corporation. All Rights Reserved.
*
* File: EndPt.cpp
* Content: This file contains EndPoint management routines.
* An End Point is a DirectNet instance that we know about and may communicate
* with. An End Point Descriptor (EPD) tracks each known End Point and was mapped
* onto an hEndPoint by a hash table. Now, the SP maintains the mapping and hands
* us our EPD address as a context with each indication ReceiveEvent.
*
* In addition to EndPoint creation and destruction, this file contains routines
* which handle link tuning. This is described in detailed comments below.
*
* History:
* Date By Reason
* ==== == ======
* 11/06/98 ejs Created
* 07/01/2000 masonb Assumed Ownership
* 13/06/2002 simonpow MANBUG #56703 Capped burst gap growth
*
****************************************************************************/
#include "dnproti.h"
VOID RunAdaptiveAlg(PEPD, DWORD);
VOID ThrottleBack(PEPD, DWORD);
/*
** Crack EndPoint Descriptor
**
*/
#undef DPF_MODNAME
#define DPF_MODNAME "DNPCrackEndPointDescriptor"
HRESULT
DNPCrackEndPointDescriptor(HANDLE hProtocolData, HANDLE hEndPoint, PSPGETADDRESSINFODATA pSPData)
{
ProtocolData* pPData;
PEPD pEPD;
PSPD pSPD;
HRESULT hr;
DPFX(DPFPREP,DPF_CALLIN_LVL, "Parameters: hProtocolData[%p], hEndPoint[%p], pSPData[%p]", hProtocolData, hEndPoint, pSPData);
pPData = (ProtocolData*)hProtocolData;
ASSERT_PPD(pPData);
pEPD = (PEPD) hEndPoint;
ASSERT_EPD(pEPD);
LOCK_EPD(pEPD, "LOCK (Crack EPD)");
Lock(&pEPD->EPLock);
if(pEPD->ulEPFlags & EPFLAGS_STATE_CONNECTED)
{
pSPD = pEPD->pSPD;
ASSERT_SPD(pSPD);
pSPData->hEndpoint = pEPD->hEndPt;
Unlock(&pEPD->EPLock);
AssertNoCriticalSectionsFromGroupTakenByThisThread(&g_blProtocolCritSecsHeld);
DPFX(DPFPREP,DPF_CALLOUT_LVL, "(%p) Calling SP->GetAddressInfo, pSPD[%p]", pEPD, pSPD);
hr = IDP8ServiceProvider_GetAddressInfo(pSPD->IISPIntf, pSPData);
Lock(&pEPD->EPLock);
}
else
{
hr = DPNERR_INVALIDENDPOINT;
}
RELEASE_EPD(pEPD, "UNLOCK (Crack EPD)"); // releases EPLock
DPFX(DPFPREP,DPF_CALLIN_LVL, "Returning hr[%x], pEPD[%p]", hr, pEPD);
AssertNoCriticalSectionsFromGroupTakenByThisThread(&g_blProtocolCritSecsHeld);
return hr;
}
#ifndef DPNBUILD_NOMULTICAST
/*
** Get an EndPoint context from an Address
*/
#undef DPF_MODNAME
#define DPF_MODNAME "DNPGetEndPointContextFromAddress"
HRESULT
DNPGetEndPointContextFromAddress(HANDLE hProtocolData, HANDLE hSPHandle, IDirectPlay8Address* paEndpointAddress, IDirectPlay8Address* paDeviceAddress, VOID** ppvContext)
{
ProtocolData* pPData;
PSPD pSPD;
PEPD pEPD;
HRESULT hr;
SPGETENDPOINTBYADDRESSDATA spdata;
DPFX(DPFPREP,DPF_CALLIN_LVL, "Parameters: hProtocolData[%p], hSPHandle[%p], paEndpointAddress[%p], paDeviceAddress[%p], ppvContext[%p]", hProtocolData, hSPHandle, paEndpointAddress, paDeviceAddress, ppvContext);
pPData = (ProtocolData*)hProtocolData;
ASSERT_PPD(pPData);
pSPD = (PSPD)hSPHandle;
ASSERT_SPD(pSPD);
//
// Set up to get endpoint
//
memset(&spdata,0,sizeof(SPGETENDPOINTBYADDRESSDATA));
spdata.pAddressHost = paEndpointAddress;
spdata.pAddressDeviceInfo = paDeviceAddress;
//
// Get endpoint from SP
//
AssertNoCriticalSectionsFromGroupTakenByThisThread(&g_blProtocolCritSecsHeld);
DPFX(DPFPREP,DPF_CALLOUT_LVL, "Calling SP->GetEndpointByAddress, pSPD[%p]", pSPD);
hr = IDP8ServiceProvider_GetEndpointByAddress(pSPD->IISPIntf, &spdata);
//
// Get context from endpoint
if (hr == DPN_OK)
{
pEPD = (PEPD)spdata.pvEndpointContext;
ASSERT_EPD(pEPD);
Lock(&pEPD->EPLock);
*ppvContext = pEPD->Context;
Unlock(&pEPD->EPLock);
}
DPFX(DPFPREP,DPF_CALLIN_LVL, "Returning hr[%x]", hr);
AssertNoCriticalSectionsFromGroupTakenByThisThread(&g_blProtocolCritSecsHeld);
return hr;
}
#endif // ! DPNBUILD_NOMULTICAST
/*
** INTERNAL - EndPoint management functions
*/
/*
** New End Point
**
** Everytime a packet is indicated with an address that we dont recognize we will allocate
** an EPD for it and add it to our tables. It is a higher layer's responsibility to tell
** us when it no longer wants to talk to the EP so that we can clear it out of our
** (and the SP's) table.
**
*/
#undef DPF_MODNAME
#define DPF_MODNAME "NewEndPoint"
PEPD NewEndPoint(PSPD pSPD, HANDLE hEP)
{
PEPD pEPD;
if((pEPD = (PEPD)POOLALLOC(MEMID_EPD, &EPDPool)) == NULL)
{
DPFX(DPFPREP,0, "Failed to allocate new EPD");
return NULL;
}
ASSERT(hEP != INVALID_HANDLE_VALUE);
pEPD->hEndPt = hEP; // Record ID in structure
pEPD->pSPD = pSPD;
pEPD->bNextMsgID = 0;
pEPD->uiRTT = 0;
pEPD->uiBytesAcked = 0;
pEPD->ullCurrentLocalSecret=0;
pEPD->ullCurrentRemoteSecret=0;
pEPD->ullOldLocalSecret=0;
pEPD->ullOldRemoteSecret=0;
pEPD->ullLocalSecretModifier=0;
pEPD->ullRemoteSecretModifier=0;
pEPD->byLocalSecretModifierSeqNum=0;
pEPD->byRemoteSecretModifierSeqNum=0;
pEPD->uiQueuedMessageCount = 0;
#ifdef DBG
pEPD->bLastDataSeq = 0xFF;
#endif // DBG
// We track a byte-window and a frame-window separately.
// We start a byte window that is set to half the maximum frame size * the frame window
pEPD->uiWindowF = pSPD->pPData->dwInitialFrameWindowSize;
pEPD->uiWindowBIndex = pSPD->pPData->dwInitialFrameWindowSize/2;
pEPD->uiWindowB = pEPD->uiWindowBIndex*pSPD->uiFrameLength;
pEPD->uiUnackedFrames = 0; // outstanding frame count
pEPD->uiUnackedBytes = 0; // outstanding byte count
pEPD->uiBurstGap = 0; // For now assume we dont need a burst gap
pEPD->dwSessID = 0;
// ReceiveComplete flag prevents received data from being indicated to core until after new connection is indicated
// Initialize state
pEPD->ulEPFlags = EPFLAGS_END_POINT_IN_USE | EPFLAGS_STATE_DORMANT | EPFLAGS_IN_RECEIVE_COMPLETE; // Initialize state
pEPD->ulEPFlags2 = 0;
ASSERT(pEPD->lRefCnt == 0); // WE NOW HAVE A -1 BASED REFCNT INSTEAD OF ZERO BASED (FOR EPDs)
pEPD->SendTimer = 0; // Timer for next send-burst opportunity
pEPD->RetryTimer = 0; // window to receive Ack
pEPD->LinkTimer = 0;
pEPD->DelayedAckTimer = 0; // wait for piggyback opportunity before sending Ack
pEPD->DelayedMaskTimer = 0; // wait for piggyback opportunity before sending Mask frame
pEPD->BGTimer = 0; // Periodic background timer
pEPD->uiCompleteMsgCount = 0;
LOCK_EPD(pEPD, "SP reference"); // We will not remove this reference until the SP tells us to go away.
Lock(&pSPD->SPLock);
pEPD->blActiveLinkage.InsertAfter( &pSPD->blEPDActiveList); // Place this guy in active list
Unlock(&pSPD->SPLock);
return pEPD;
}
/*
** Initial Link Parameters
**
** we have kept a checkpoint structure matching everying frame we sent in the Connect
** handshake so that we can match a response to a specific frame or retry. This allows us
** to measure a single sample Round Trip Time (RTT), which we will use below to generate
** initial values for our link-state variables.
**
*/
#undef DPF_MODNAME
#define DPF_MODNAME "InitLinkParameters"
VOID InitLinkParameters(PEPD pEPD, UINT uiRTT, DWORD tNow)
{
PSPD pSPD = pEPD->pSPD;
DWORD dwTimerInterval;
AssertCriticalSectionIsTakenByThisThread(&pEPD->EPLock, TRUE);
if(uiRTT == 0)
{
uiRTT = 1;
}
pEPD->uiRTT = uiRTT; // we know the base RTT
pEPD->fpRTT = TO_FP(uiRTT); // 16.16 fixed point version
pEPD->uiDropCount = 0;
pEPD->dwDropBitMask = 0;
pEPD->uiThrottleEvents = 0; // Count times we throttle-back for all reasons
#ifdef DBG
pEPD->uiTotalThrottleEvents = 0;
#endif // DBG
pEPD->uiBurstGap = 0; // For now assume we dont need a burst gap
pEPD->uiMsgSentHigh = 0;
pEPD->uiMsgSentNorm = 0;
pEPD->uiMsgSentLow = 0;
pEPD->uiMsgTOHigh = 0;
pEPD->uiMsgTONorm = 0;
pEPD->uiMsgTOLow = 0;
pEPD->uiMessagesReceived = 0;
pEPD->uiGuaranteedFramesSent = 0;
pEPD->uiGuaranteedBytesSent = 0;
pEPD->uiDatagramFramesSent = 0;
pEPD->uiDatagramBytesSent = 0;
pEPD->uiGuaranteedFramesReceived = 0;
pEPD->uiGuaranteedBytesReceived = 0;
pEPD->uiDatagramFramesReceived = 0;
pEPD->uiDatagramBytesReceived = 0;
pEPD->uiGuaranteedFramesDropped = 0;
pEPD->uiGuaranteedBytesDropped = 0;
pEPD->uiDatagramFramesDropped = 0;
pEPD->uiDatagramBytesDropped = 0;
pEPD->uiGoodBurstGap = 0; // No Known Good Gap!
pEPD->uiGoodRTT = 60000; // We need this to initially be artificially high
pEPD->uiGoodWindowF = (pEPD->pSPD->pPData->dwInitialFrameWindowSize*3)/4;
pEPD->uiGoodWindowBI = pEPD->uiGoodWindowF;
pEPD->iBurstCredit = 0;
pEPD->tLastDelta = tNow;
pEPD->uiWindowFilled = 0;
pEPD->tLastThruPutSample = tNow;
pEPD->uiLastBytesAcked = 0;
pEPD->uiPeriodAcksBytes = 0;
pEPD->uiPeriodXmitTime = 0;
pEPD->uiPeriodRateB = 0;
pEPD->uiPeakRateB = 0;
pEPD->uiLastRateB = 0;
pEPD->ulReceiveMask = 0;
pEPD->ulReceiveMask2 = 0;
pEPD->tReceiveMaskDelta = 0;
pEPD->ulSendMask = 0;
pEPD->ulSendMask2 = 0;
pEPD->Context = NULL;
DPFX(DPFPREP,7, "CONNECTION ESTABLISHED pEPD = 0x%p RTT = %dms, BurstGap=%dms", pEPD, pEPD->uiRTT, pEPD->uiBurstGap);
// We set the IdleThreshhold very low to generate a little bit of traffic for initial link tuning in case the
// application doesnt do any right away
// pEPD->ulEPFlags |= EPFLAGS_USE_POLL_DELAY; // always assume balanced traffic at start-up
pEPD->uiAdaptAlgCount = 4; // start running adpt alg fairly often
// Calc a retry timeout value based upon the measured RTT (2.5 * RTT) + MAX_DELAY
pEPD->uiRetryTimeout = ((pEPD->uiRTT + (pEPD->uiRTT >> 2)) * 2) + DELAYED_ACK_TIMEOUT;
// don't want to get more aggressive because we drop a frame.
if(pEPD->uiRetryTimeout < pEPD->uiBurstGap)
{
pEPD->uiRetryTimeout = pEPD->uiBurstGap;
}
pEPD->uiUserFrameLength = pEPD->pSPD->uiUserFrameLength;
if(pEPD->BGTimer == 0)
{
if (pEPD->pSPD->pPData->tIdleThreshhold > ENDPOINT_BACKGROUND_INTERVAL)
{
dwTimerInterval = ENDPOINT_BACKGROUND_INTERVAL;
}
else
{
dwTimerInterval = pEPD->pSPD->pPData->tIdleThreshhold;
}
DPFX(DPFPREP,7, "(%p) Setting Endpoint Background Timer for %u ms", pEPD, dwTimerInterval);
ScheduleProtocolTimer(pSPD, dwTimerInterval, 1000, EndPointBackgroundProcess,
(PVOID) pEPD, &pEPD->BGTimer, &pEPD->BGTimerUnique);
LOCK_EPD(pEPD, "LOCK (BG Timer)"); // create reference for this timer
}
}
/****************
*
* Link Tuning
*
* Here are current ideas about link tuning. Idea is to track Round Trip Time of key-frames and throttle
* based upon changes in this measured RTT when possible. This would benefit us in determining link saturation
* before packet loss occurs, instead of waiting for the inevitable packet loss before throttling back.
*
* On high-speed media, the average RTT is small compared to the standard deviations making it hard to
* predict anything useful from them. In these cases, we must look at packet drops. Except for one exception:
* We will look for large spikes in RTT and we will respond to these with an immediate, temporary throttle back.
* This will allow a bottle-neck to clear hopefully without packet-loss. So far, I have not been able to verfify
* any benefit from this behavior on reliable links. It is more likely to be beneficial with datagram traffic
* where send windows do not limit write-ahead.
*
* I would like to take a measurement of the through-put acheived compared to the transmission rate, but I
* havent yet come up with a good way to measure this. What I do calculate is packet acknowledgement rate, which
* can be calculated without any additional input from the remote side. We will store AckRates acheived at the
* previous transmission rate, so we can look for improvements in Acks as we increase Transmissions. When we
* no longer detect AckRate improvements then we assume we have plateaued and we stop trying to increase the rate.
*
* TRANSMISSION RATE
*
* Transmission rate is controlled by two distinct parameters: Insertion Rate and Window Size. Where a
* conventional protocol would dump a window full of packets onto the wire in one burst, we would like to
* spread the packet insertions out over the full RTT so that the window never completely fills and hence
* blocks the link from transmitting. This has a wide array of potential benefits: Causes less congestions
* throughout the network path; Allows more balanced access to the wire to all Endpoints (especially on
* slower media); Allows MUCH more accurate measurements to be made of trasmission times when packets
* spend less time enqueued locally; Allows retry timers to be set much lower giving us quicker error
* recovery (because there is less queue time fudged into the timer); Allows recovery to be made more
* quickly when we don't have a lot of data enqueued in SP (both our own data and other Endpoint's data).
* ...And I am sure there are more.
*
* So, we would like to trickle out packets just fast enough to fill the window as the next ACK is received.
* We will grow the window fairly liberally and let the burst rate increase more cautiously.
*
* On high-speed media the insertion time becomes fairly small (near zero) and we are less likely to queue
* up large quantities of data. Therefore we may allow insertion rate to go max and use the window alone to
* control flow. I will experiment with this more.
*
******************/
#define RTT_SLOW_WEIGHT 8 // fpRTT gain = 1/8
#define THROTTLE_EVENT_THRESHOLD 20
/*
** Update Endpoint
**
** We will let the sliding window control the flow
** and increase the window as long as through-put continues to increase and frames continue to get delivered without
** excessive droppage.
**
** We still calculate RTT for the purpose of determining RetryTimer values. For cases with large RTTs we may still
** implement an inter-packet gap, but we will try to make it an aggressive gap (conservatively small) because we would
** rather feed the pipe too quickly than artificially add latency by letting the pipe go idle with data ready to be sent.
**
** ** CALLED WITH EPD STATELOCK HELD **
*/
#undef DPF_MODNAME
#define DPF_MODNAME "UpdateEndPoint"
VOID UpdateEndPoint(PEPD pEPD, UINT uiRTT, DWORD tNow)
{
UINT fpRTT;
INT fpDiff;
AssertCriticalSectionIsTakenByThisThread(&pEPD->EPLock, TRUE);
// Don't allow zero RTTs
if(uiRTT == 0)
{
uiRTT = 1;
}
// Filter out HUGE samples, they often popup during debug sessions
else if(uiRTT > (pEPD->uiRTT * 128))
{
DPFX(DPFPREP,7, "Tossing huge sample (%dms, base %dms)", uiRTT, pEPD->uiRTT);
return;
}
// Perform next iteration of math on new RTT sample in 16.16 fixed point
fpRTT = TO_FP(uiRTT); // Fixed point sample
fpDiff = fpRTT - pEPD->fpRTT; // Current Delta (signed)
pEPD->fpRTT = pEPD->fpRTT + (fpDiff / RTT_SLOW_WEIGHT); // .0625 weighted avg
pEPD->uiRTT = FP_INT(pEPD->fpRTT); // Store integer portion
// Calc a retry timeout value based upon the measured RTT (2.5 * RTT) + MAX_DELAY
pEPD->uiRetryTimeout = ((pEPD->uiRTT + (pEPD->uiRTT >> 2)) * 2) + DELAYED_ACK_TIMEOUT;
// don't want to get more aggressive because we drop a frame.
if(pEPD->uiRetryTimeout < pEPD->uiBurstGap)
{
pEPD->uiRetryTimeout = pEPD->uiBurstGap;
}
DPFX(DPFPREP,7, "(%p) RTT SAMPLE: RTT = %d, Avg = %d <<<<", pEPD, uiRTT, FP_INT(pEPD->fpRTT));
// If throttle is engaged we will see if we can release it yet
if(pEPD->ulEPFlags & EPFLAGS_THROTTLED_BACK)
{
if((tNow - pEPD->tThrottleTime) > (pEPD->uiRTT * 8))
{
pEPD->ulEPFlags &= ~(EPFLAGS_THROTTLED_BACK);
pEPD->uiDropCount = 0;
pEPD->dwDropBitMask = 0;
pEPD->uiBurstGap = pEPD->uiRestoreBurstGap;
pEPD->uiWindowF = pEPD->uiRestoreWindowF;
pEPD->uiWindowBIndex = pEPD->uiRestoreWindowBI;
pEPD->uiWindowB = pEPD->uiWindowBIndex * pEPD->pSPD->uiFrameLength;
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "** (%p) RECOVER FROM THROTTLE EVENT: Window(F:%d,B:%d); Gap=%d", pEPD, pEPD->uiWindowF, pEPD->uiWindowBIndex, pEPD->uiBurstGap);
pEPD->tLastDelta = tNow; // Enforce waiting period after back-off before tuning up again
}
}
// Throttle Event tracks how often a packet-drop has caused us to throttle back transmission rate. We will let this value
// decay over time. If throttle events happen faster then the decay occurs then this value will grow un-bounded. This
// growth is what causes a decrease in the actual send window/transmit rate that will persist beyond the throttle event.
else if(pEPD->uiThrottleEvents)
{
pEPD->uiThrottleEvents--; // Let this decay...
}
if(--pEPD->uiAdaptAlgCount == 0)
{
RunAdaptiveAlg(pEPD, tNow);
}
}
/*
** Grow Send Window
**
** The two parallel send windows, frame-based and byte-based, can grow and shrink independently. In this
** routine we will grow one or both windows. We will grow each window providing that it has been filled in the
** last period, during which we have determined that thru-put has increased.
*/
#undef DPF_MODNAME
#define DPF_MODNAME "GrowSendWindow"
BOOL
GrowSendWindow(PEPD pEPD, DWORD tNow)
{
UINT delta = 0;
pEPD->tLastDelta = tNow;
// first store current good values for a restore
pEPD->uiGoodWindowF = pEPD->uiWindowF;
pEPD->uiGoodWindowBI = pEPD->uiWindowBIndex;
pEPD->uiGoodRTT = pEPD->uiRTT;
pEPD->uiGoodBurstGap = pEPD->uiBurstGap;
if(pEPD->uiBurstGap)
{
// cut the burst gap by 25% if less than 3 ms go to 0.
if(pEPD->uiBurstGap > 3)
{
pEPD->uiBurstGap -= pEPD->uiBurstGap >> 2;
}
else
{
pEPD->uiBurstGap = 0;
}
pEPD->uiLastRateB = pEPD->uiPeriodRateB;
pEPD->uiPeriodAcksBytes = 0;
pEPD->uiPeriodXmitTime = 0;
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p), burst gap set to %d ms", pEPD, pEPD->uiBurstGap);
}
else
{
if((pEPD->ulEPFlags & EPFLAGS_FILLED_WINDOW_FRAME) && (pEPD->uiWindowF < MAX_RECEIVE_RANGE))
{
pEPD->uiWindowF++;
delta = 1;
}
if((pEPD->ulEPFlags & EPFLAGS_FILLED_WINDOW_BYTE) && (pEPD->uiWindowBIndex < MAX_RECEIVE_RANGE))
{
pEPD->uiWindowBIndex++;
pEPD->uiWindowB += pEPD->pSPD->uiFrameLength;
delta = 1;
}
pEPD->ulEPFlags &= ~(EPFLAGS_FILLED_WINDOW_FRAME | EPFLAGS_FILLED_WINDOW_BYTE);
pEPD->uiWindowFilled = 0;
if(delta)
{
pEPD->uiLastRateB = pEPD->uiPeriodRateB;
pEPD->uiPeriodAcksBytes = 0;
pEPD->uiPeriodXmitTime = 0;
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) ** GROW SEND WINDOW to %d frames and %d (%d) bytes", pEPD, pEPD->uiWindowF, pEPD->uiWindowB, pEPD->uiWindowBIndex);
}
else
{
// We get here if we have already max'd out the window
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) GROW SEND WINDOW -- Nothing to grow. Transition to Stable!", pEPD);
pEPD->ulEPFlags |= EPFLAGS_LINK_STABLE;
return FALSE;
}
}
return TRUE;
}
#undef DPF_MODNAME
#define DPF_MODNAME "RunAdaptiveAlg"
VOID
RunAdaptiveAlg(PEPD pEPD, DWORD tNow)
{
LONG tDelta; // Time the link was transmitting since last run of AdaptAlg
UINT uiBytesAcked;
UINT uiNewSum;
// Calculate the time during which this link was actually transmitting to make sure we have enough
// data to run the Adaptive Alg. This is easy unless we are currently idle...
tDelta = tNow - pEPD->tLastThruPutSample;
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) Adaptive Alg tDelta = %d", pEPD, tDelta);
// THIS PROBABLY IS UNNECESSARY NOW...
if(tDelta <= 0)
{
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "DELAYING Adaptive Alg");
pEPD->uiAdaptAlgCount = 4;
return;
}
// Calculate current throughput acheived
//
// We will determine the amount of time the link was not idle and then number of bytes (& frames) which
// were acknowleged by our partner.
//
// tDelta = Time since last calculation minus the time the link was idle.
uiBytesAcked = pEPD->uiBytesAcked - pEPD->uiLastBytesAcked;
uiNewSum = pEPD->uiPeriodAcksBytes + (uiBytesAcked * 256);
if(uiNewSum < pEPD->uiPeriodAcksBytes)
{
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "THRUPUT is about to wrap. Correcting...");
pEPD->uiPeriodAcksBytes /= 2;
pEPD->uiPeriodXmitTime /= 2;
pEPD->uiPeriodAcksBytes += (uiBytesAcked * 256);
}
else
{
pEPD->uiPeriodAcksBytes = uiNewSum;
}
pEPD->uiPeriodXmitTime += tDelta; // Track complete values for this period
pEPD->tLastThruPutSample = tNow;
pEPD->uiLastBytesAcked = pEPD->uiBytesAcked;
pEPD->uiPeriodRateB = pEPD->uiPeriodAcksBytes / pEPD->uiPeriodXmitTime;
if(pEPD->uiPeriodRateB > pEPD->uiPeakRateB)
{
pEPD->uiPeakRateB = pEPD->uiPeriodRateB; // Track the largest value we ever measure
}
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) PERIOD COUNT BYTES = %u, XmitTime = %u, Thruput=(%u bytes/s), RTT=%u, Window=(%u,%u)", pEPD, pEPD->uiPeriodAcksBytes, pEPD->uiPeriodXmitTime, pEPD->uiPeriodRateB * 4, pEPD->uiRTT, pEPD->uiWindowF, pEPD->uiWindowB);
#ifndef DPNBUILD_NOPROTOCOLTESTITF
if (pEPD->ulEPFlags & EPFLAGS_LINK_FROZEN)
{
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) Test App requests that dynamic algorithm not be run, skipping", pEPD);
pEPD->uiAdaptAlgCount = 32; // Make sure the throughput numbers get updated from time to time
return;
}
#endif // !DPNBUILD_NOPROTOCOLTESTITF
if(pEPD->ulEPFlags & EPFLAGS_LINK_STABLE)
{
/* We are in a STABLE state, meaning we think we are transmitting at an optimal
** rate for the current network conditions. Conditions may change. If things slow down
** or grow congested a Backoff will trigger normally. Since conditions might also change
** for the better, we will still want to periodically probe higher rates, but much less
** often than when we are in DYNAMIC mode, which means we are searching for an optimal rate.
*/
pEPD->uiAdaptAlgCount = 32; // tNow + (pEPD->uiRTT * 32) + 32;
if((tNow - pEPD->tLastDelta) > INITIAL_STATIC_PERIOD)
{
if(pEPD->ulEPFlags & (EPFLAGS_FILLED_WINDOW_FRAME | EPFLAGS_FILLED_WINDOW_BYTE))
{
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) RETURNING LINK TO DYNAMIC MODE", pEPD);
pEPD->ulEPFlags &= ~(EPFLAGS_LINK_STABLE);
pEPD->uiPeriodAcksBytes = 0;
pEPD->uiPeriodXmitTime = 0;
pEPD->uiWindowFilled = 0;
pEPD->ulEPFlags &= ~(EPFLAGS_FILLED_WINDOW_FRAME | EPFLAGS_FILLED_WINDOW_BYTE);
pEPD->uiAdaptAlgCount = 12;
}
else
{
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) NO WINDOWS FILLED, Not returning to Dynamic Mode", pEPD);
pEPD->tLastDelta = tNow;
}
}
else
{
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) STILL IN STATIC PERIOD, time=%u, period=%u", pEPD, tNow - pEPD->tLastDelta, INITIAL_STATIC_PERIOD);
}
}
// DYNAMIC STATE LINK
else
{
pEPD->uiAdaptAlgCount = 8;
// Possibly increase transmission rates. We will not do this if we have had a ThrottleEvent
// in recent memory, or if we have not been actually transmitting for enough of the interval
// to have collected worthwhile data
//
// Also, we dont want to even consider growing the send window unless we are consistantly
// filling it. Since one job of the window is to prevent us from flooding the net during a backup,
// we dont want to grow the window following each backup. The best way to distinguish between a
// backup and too small of a window is that the small window should fill up regularly while the
// backups should only occur intermittantly. The hard part is coming up with the actual test.
// Truth is, we can be fairly lax about allowing growth because it will also have to meet the increased
// bandwidth test before the larger window is accepted. So a crude rule would be to fix a number like 3.
// Yes, crude but probably effective. Perhaps a more reasonable figure would be a ratio of the total
// number of packets sent divided by the window size. I.e., if your window size is 10 frames then one
// packet in ten should fill the window. Of course, this would have to be calculated in bytes...
if((pEPD->uiWindowFilled > 12)&&(pEPD->uiThrottleEvents == 0))
{
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) DYNAMIC ALG: Window Fills: %d; B-Ack = (%x vs %x)", pEPD, pEPD->uiWindowFilled, pEPD->uiPeriodRateB, pEPD->uiLastRateB);
pEPD->uiWindowFilled = 0;
if (!(pEPD->ulEPFlags & EPFLAGS_TESTING_GROWTH))
{
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) GROWING WINDOW", pEPD);
// In the case that GrowSendWindow doesn't grow anything because we are already max'd out
// it will return FALSE, and it should have transitioned us to STABLE.
if (GrowSendWindow(pEPD, tNow))
{
pEPD->ulEPFlags |= EPFLAGS_TESTING_GROWTH;
}
else
{
ASSERT(pEPD->ulEPFlags & EPFLAGS_LINK_STABLE);
}
return;
}
// GETTING HERE means that we have used our current transmit parameters long enough
// to have an idea of their performance. We will now compare this to the performance
// of the previous transmit parameters and we will either Revert to the previous set if
// the perf is not improved, or else we will advance to faster parameters if we did see
// a jump.
// In order to keep higher transmit parameters we need to see an increase in throughput
// with no corresponding rise in RTT. We will want to see this twice just to be sure
// since the cost of incorrect growth is so high on a modem.
if( (pEPD->uiPeriodRateB > pEPD->uiLastRateB) &&
(pEPD->uiRTT <= (pEPD->uiGoodRTT + 10))
)
{
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) Throughput increased after window growth, keeping new parameters", pEPD);
pEPD->ulEPFlags &= ~(EPFLAGS_TESTING_GROWTH);
pEPD->uiPeriodAcksBytes = 0;
pEPD->uiPeriodXmitTime = 0;
}
else
{
// We did not see a thru-put improvement so we will back off the previous value
// and transition the link to STABLE state.
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) INSUFFICENT INCREASE IN THRUPUT, BACK OFF AND TRANSITION TO STABLE", pEPD);
// Because we have over-transmitted for at least one period, we may have put excess data
// on the link in a buffer. This will have the effect of gradually growing our RTT if we
// don't bleed that data off which we will do here by backing off two steps where we
// previously grew one step.
if (pEPD->uiBurstGap != pEPD->uiGoodBurstGap)
{
// increase the burst gap by 25%, clipping it to the max retry interval/2
pEPD->uiBurstGap = pEPD->uiGoodBurstGap + (pEPD->uiGoodBurstGap >> 2);
DWORD dwMaxBurstGap=pEPD->pSPD->pPData->dwSendRetryIntervalLimit/2;
if (pEPD->uiBurstGap>dwMaxBurstGap)
{
pEPD->uiBurstGap=dwMaxBurstGap;
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) Clipped burst gap to value %u", pEPD, pEPD->uiBurstGap);
}
}
if (pEPD->uiWindowF != pEPD->uiGoodWindowF)
{
if (pEPD->uiGoodWindowF > 2)
{
pEPD->uiWindowF = _MAX(pEPD->uiGoodWindowF - 1, 1);
}
else
{
pEPD->uiWindowF = pEPD->uiGoodWindowF;
}
}
if (pEPD->uiWindowBIndex != pEPD->uiGoodWindowBI)
{
pEPD->uiWindowBIndex = _MAX(pEPD->uiGoodWindowBI - 1, 1);
pEPD->uiWindowB = pEPD->uiWindowBIndex * pEPD->pSPD->uiFrameLength;
}
pEPD->ulEPFlags |= EPFLAGS_LINK_STABLE; // TRANSITION TO STABLE STATE
pEPD->ulEPFlags &= ~(EPFLAGS_TESTING_GROWTH);
pEPD->uiPeriodAcksBytes = 0;
pEPD->uiPeriodXmitTime = 0;
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) ** TUNING LINK: BurstGap=%d; FWindow=%d, BWindow=%d (%d)",pEPD, pEPD->uiBurstGap, pEPD->uiWindowF, pEPD->uiWindowB, pEPD->uiWindowBIndex);
}
}
else
{
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) DYN ALG -- Not trying to increase: WindowFills = %d, ThrottleCount = %d", pEPD, pEPD->uiWindowFilled, pEPD->uiThrottleEvents);
}
} // END IF DYNAMIC STATE LINK
}
/*
** End Point Dropped Frame
**
** We have two levels of Backoff. We have an immediate BackOff implemented
** upon first detection of a drop-event in order to relieve the congestion which
** caused the drop. An immediate backoff will resume transmitting at the original
** rate without going through slow-start again after the congestion event has passed.
** If we have multiple immediate-backoffs in a certain interval we will have a
** hard backoff which will not restore.
**
** CALLED WITH EPD->SPLock held (and sometimes with StateLock held too)
*/
#undef DPF_MODNAME
#define DPF_MODNAME "EndPointDroppedFrame"
VOID
EndPointDroppedFrame(PEPD pEPD, DWORD tNow)
{
//
// Don't change count if "expiring" drop rolls off
//
if (!(pEPD->dwDropBitMask & 0x80000000))
{
pEPD->uiDropCount++;
}
//
// Adjust mask
//
pEPD->dwDropBitMask = (pEPD->dwDropBitMask << 1) + 1;
DPFX(DPFPREP,7, "(%p) Drop Count %d, Drop Bit Mask 0x%lx", pEPD,pEPD->uiDropCount,pEPD->dwDropBitMask);
//
// Should we throttle ?
//
if (pEPD->uiDropCount > pEPD->pSPD->pPData->dwDropThreshold)
{
DPFX(DPFPREP,7, "(%p) THROTTLING BACK", pEPD);
ThrottleBack(pEPD, tNow);
//
// Reset drop count
//
pEPD->dwDropBitMask = 0;
pEPD->uiDropCount = 0;
}
}
/*
** Throttle Back
**
** We suspect network congestion due to dropped frames ((or a spike in latency)). We want
** to quickly scale back our transmit rate to releive the congestion and avoid further packet drops.
** This is a temporary backoff and we will resume our current transmit rate when the congestions
** clears.
**
** If we find that we are throttling back frequently then we may conclude that our current xmit
** rate is higher then optimal and we will BackOff to a lower rate, and transition to a STABLE link
** state (if not already there) to indicate that we have plateaued.
**
** A note on convergence. The ThrottleEvents variable is incremented 10 points each time a throttle
** event is triggered. This variable also decays slowly when the link is running without events. So if
** the variable grows faster then it decays we will eventually trigger a switch to STABLE state
*/
#undef DPF_MODNAME
#define DPF_MODNAME "ThrottleBack"
VOID
ThrottleBack(PEPD pEPD, DWORD tNow)
{
#ifndef DPNBUILD_NOPROTOCOLTESTITF
if (pEPD->ulEPFlags & EPFLAGS_LINK_FROZEN)
{
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) Test App requests that throttle code not be run, skipping", pEPD);
return;
}
#endif // !DPNBUILD_NOPROTOCOLTESTITF
pEPD->ulEPFlags |= EPFLAGS_THROTTLED_BACK; // Set link to THROTTLED state
pEPD->uiThrottleEvents += 10; // Count times we throttle-back for all reasons
pEPD->tThrottleTime = tNow; // Remember time that throttle was engaged
#ifdef DBG
pEPD->uiTotalThrottleEvents++; // Count times we throttle-back for all reasons
#endif // DBG
pEPD->uiRestoreBurstGap = pEPD->uiBurstGap;
pEPD->uiRestoreWindowF = pEPD->uiWindowF;
pEPD->uiRestoreWindowBI = pEPD->uiWindowBIndex;
if(pEPD->uiWindowF == 1)
{
if(pEPD->uiBurstGap == 0)
{
pEPD->uiBurstGap = _MAX(1,pEPD->uiRTT/2);
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p), first burst gap, set to %d ms", pEPD, pEPD->uiBurstGap);
}
else
{
pEPD->uiBurstGap = pEPD->uiBurstGap*2;
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p), burst gap doubled to %d ms", pEPD, pEPD->uiBurstGap);
}
pEPD->uiBurstGap = _MIN(pEPD->uiBurstGap, pEPD->pSPD->pPData->dwSendRetryIntervalLimit/2);
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p), burst gap is now %d ms", pEPD, pEPD->uiBurstGap);
}
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) THROTTLE WINDOW from %d frames", pEPD, pEPD->uiWindowF);
pEPD->uiWindowF = _MAX((UINT)(pEPD->uiWindowF * pEPD->pSPD->pPData->fThrottleRate), 1); // be sure window remains > 0.
pEPD->uiWindowBIndex = _MAX((UINT)(pEPD->uiWindowBIndex * pEPD->pSPD->pPData->fThrottleRate), 1);
pEPD->uiWindowB = pEPD->uiWindowBIndex * pEPD->pSPD->uiFrameLength;
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) THROTTLE WINDOW to %d frames", pEPD, pEPD->uiWindowF);
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) THROTTLE ENGAGED (%d): Backoff to Window=%d; Gap=%d", pEPD, pEPD->uiThrottleEvents, pEPD->uiWindowF, pEPD->uiBurstGap);
if(pEPD->uiThrottleEvents > THROTTLE_EVENT_THRESHOLD)
{
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) ** DETECT TRANSMIT CEILING ** Reducing 'good' speed and marking link STABLE", pEPD);
// We have already reduced our current transmit rates. Here we will reduce the "good" rates that
// we will restore to when we clear the throttled state.
pEPD->uiThrottleEvents = 0;
pEPD->uiRestoreWindowF = _MAX((pEPD->uiRestoreWindowF - 1), 1);
pEPD->uiRestoreWindowBI = _MAX((pEPD->uiRestoreWindowBI - 1), 1);
if (pEPD->uiRestoreBurstGap)
{
UINT t;
t=pEPD->uiRestoreBurstGap;
pEPD->uiRestoreBurstGap = (t+1) + (t >> 2); // 1.25*pEPD->uiRestoreBurstGap
}
DPFX(DPFPREP,DPF_ADAPTIVE_LVL, "(%p) New Restore Values: Window=%d; Gap=%d", pEPD, pEPD->uiRestoreWindowF, pEPD->uiRestoreBurstGap);
pEPD->ulEPFlags |= EPFLAGS_LINK_STABLE;
pEPD->ulEPFlags &= ~(EPFLAGS_TESTING_GROWTH);
}
}
/*
** EPD Pool Support Routines
**
** These are the functions called by Fixed Pool Manager as it handles EPDs.
*/
// Allocate is called when a new EPD is first created
#define pELEMENT ((PEPD) pElement)
#undef DPF_MODNAME
#define DPF_MODNAME "EPD_Allocate"
BOOL EPD_Allocate(PVOID pElement, PVOID pvContext)
{
DPFX(DPFPREP,7, "(%p) Allocating new EPD", pELEMENT);
pELEMENT->blHighPriSendQ.Initialize(); // Can you beleive there are SIX send queues per Endpoint?
pELEMENT->blNormPriSendQ.Initialize(); // Six send queues.
pELEMENT->blLowPriSendQ.Initialize(); // Well, it beats sorting the sends into the queues upon submission.
pELEMENT->blCompleteSendList.Initialize();
pELEMENT->blSendWindow.Initialize();
pELEMENT->blRetryQueue.Initialize();
pELEMENT->blCompleteList.Initialize();
pELEMENT->blOddFrameList.Initialize();
pELEMENT->blChkPtQueue.Initialize();
pELEMENT->blSPLinkage.Initialize();
pELEMENT->blActiveLinkage.Initialize();
if (DNInitializeCriticalSection(&pELEMENT->EPLock) == FALSE)
{
DPFX(DPFPREP, 0, "Failed to initialize endpoint CS");
return FALSE;
}
DebugSetCriticalSectionRecursionCount(&pELEMENT->EPLock, 0);
DebugSetCriticalSectionGroup(&pELEMENT->EPLock, &g_blProtocolCritSecsHeld);
pELEMENT->Sign = EPD_SIGN;
pELEMENT->pCurrentSend = NULL;
pELEMENT->pCurrentFrame = NULL;
pELEMENT->pCommand = NULL;
pELEMENT->RetryTimer = 0;
pELEMENT->LinkTimer = 0;
pELEMENT->DelayedAckTimer = 0;
pELEMENT->ulEPFlags = 0; // EPFLAGS_STATE_CLEAR - make this line show up in state searches
pELEMENT->ulEPFlags2 = 0;
return TRUE;
}
// Get is called each time an EPD is used
#undef DPF_MODNAME
#define DPF_MODNAME "EPD_Get"
VOID EPD_Get(PVOID pElement, PVOID pvContext)
{
DPFX(DPFPREP,DPF_EP_REFCNT_FINAL_LVL, "CREATING EPD %p", pELEMENT);
// NOTE: First sizeof(PVOID) bytes will have been overwritten by the pool code,
// we must set them to acceptable values.
pELEMENT->hEndPt = INVALID_HANDLE_VALUE;
pELEMENT->lRefCnt = 0; // We are -1 based, so place the first reference on the endpoint
pELEMENT->pNewMessage = NULL;
pELEMENT->pNewTail = NULL;
ASSERT_EPD(pELEMENT);
}
#undef DPF_MODNAME
#define DPF_MODNAME "EPD_Release"
VOID EPD_Release(PVOID pElement)
{
PCHKPT pCP;
ASSERT_EPD(pELEMENT);
DPFX(DPFPREP,DPF_EP_REFCNT_FINAL_LVL, "RELEASING EPD %p", pELEMENT);
ASSERT((pELEMENT->ulEPFlags & EPFLAGS_LINKED_TO_LISTEN)==0);
// Clear any checkpoints still waiting on EP
while(!pELEMENT->blChkPtQueue.IsEmpty())
{
pCP = CONTAINING_OBJECT(pELEMENT->blChkPtQueue.GetNext(), CHKPT, blLinkage);
pCP->blLinkage.RemoveFromList();
ChkPtPool.Release(pCP);
}
// These lists should be empty before End Point is released...
ASSERT(pELEMENT->blOddFrameList.IsEmpty());
ASSERT(pELEMENT->blCompleteList.IsEmpty());
ASSERT(pELEMENT->blHighPriSendQ.IsEmpty());
ASSERT(pELEMENT->blNormPriSendQ.IsEmpty());
ASSERT(pELEMENT->blLowPriSendQ.IsEmpty());
ASSERT(pELEMENT->blCompleteSendList.IsEmpty());
ASSERT(pELEMENT->blSendWindow.IsEmpty());
ASSERT(pELEMENT->blRetryQueue.IsEmpty());
ASSERT(pELEMENT->blActiveLinkage.IsEmpty());
ASSERT(pELEMENT->blSPLinkage.IsEmpty());
ASSERT(pELEMENT->blChkPtQueue.IsEmpty());
ASSERT(pELEMENT->pCurrentSend == NULL);
ASSERT(pELEMENT->pCurrentFrame == NULL);
pELEMENT->ulEPFlags = 0; // EPFLAGS_STATE_CLEAR - make this line show up in state searches
pELEMENT->ulEPFlags2 = 0;
pELEMENT->pCommand = NULL;
pELEMENT->Context = NULL;
pELEMENT->hEndPt = INVALID_HANDLE_VALUE;
}
#undef DPF_MODNAME
#define DPF_MODNAME "EPD_Free"
VOID EPD_Free(PVOID pElement)
{
DNDeleteCriticalSection(&pELEMENT->EPLock);
}
#undef ELEMENT