You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3914 lines
137 KiB
3914 lines
137 KiB
/********************************************************************/
|
|
/** Microsoft LAN Manager **/
|
|
/** Copyright(c) Microsoft Corp., 1990-2000 **/
|
|
/********************************************************************/
|
|
/* :ts=4 */
|
|
|
|
//** TCPSEND.C - TCP send protocol code.
|
|
//
|
|
// This file contains the code for sending Data and Control segments.
|
|
//
|
|
|
|
#include "precomp.h"
|
|
#include "addr.h"
|
|
#include "tcp.h"
|
|
#include "tcb.h"
|
|
#include "tcpconn.h"
|
|
#include "tcpsend.h"
|
|
#include "tcprcv.h"
|
|
#include "tlcommon.h"
|
|
#include "info.h"
|
|
#include "tcpcfg.h"
|
|
#include "secfltr.h"
|
|
#include "tcpipbuf.h"
|
|
#include "mdlpool.h"
|
|
#include "pplasl.h"
|
|
|
|
#if GPC
|
|
#include "qos.h"
|
|
#include "traffic.h"
|
|
#include "gpcifc.h"
|
|
#include "ntddtc.h"
|
|
extern GPC_HANDLE hGpcClient[GPC_CF_MAX];
|
|
extern ULONG GpcCfCounts[GPC_CF_MAX];
|
|
extern GPC_EXPORTED_CALLS GpcEntries;
|
|
extern ULONG GPCcfInfo;
|
|
#endif
|
|
|
|
NTSTATUS
|
|
GetIFAndLink(void *Rce, ULONG * IFIndex, IPAddr * NextHop);
|
|
|
|
extern ulong DisableUserTOSSetting;
|
|
|
|
uint MaxSendSegments = 64;
|
|
#if MILLEN
|
|
uint DisableLargeSendOffload = 1;
|
|
#else // MILLEN
|
|
uint DisableLargeSendOffload = 0;
|
|
#endif // !MILLEN
|
|
|
|
#if DBG
|
|
ulong DbgDcProb = 0;
|
|
ulong DbgTcpSendHwChksumCount = 0;
|
|
#endif
|
|
|
|
extern HANDLE TcpRequestPool;
|
|
extern CTELock *pTWTCBTableLock;
|
|
extern CACHE_LINE_KSPIN_LOCK RequestCompleteListLock;
|
|
extern uint TcpHostOpts;
|
|
extern uint TcpHostSendOpts;
|
|
#define ALIGNED_SACK_OPT_SIZE 4+8*4 //Maximum 4 sack blocks of 2longword each+sack opt itself
|
|
|
|
|
|
void
|
|
ClassifyPacket(TCB *SendTCB);
|
|
|
|
void
|
|
TCPFastSend(TCB * SendTCB,
|
|
PNDIS_BUFFER in_SendBuf,
|
|
uint in_SendOfs,
|
|
TCPSendReq * in_SendReq,
|
|
uint in_SendSize,
|
|
SeqNum NextSeq,
|
|
int in_ToBeSent);
|
|
|
|
|
|
|
|
void *TCPProtInfo; // TCP protocol info for IP.
|
|
|
|
|
|
NDIS_HANDLE TCPSendBufferPool;
|
|
|
|
USHORT TcpHeaderBufferSize;
|
|
HANDLE TcpHeaderPool;
|
|
|
|
extern IPInfo LocalNetInfo;
|
|
|
|
|
|
//
|
|
// All of the init code can be discarded.
|
|
//
|
|
|
|
int InitTCPSend(void);
|
|
|
|
|
|
|
|
void UnInitTCPSend(void);
|
|
|
|
|
|
#ifdef ALLOC_PRAGMA
|
|
#pragma alloc_text(INIT, InitTCPSend)
|
|
#pragma alloc_text(INIT, UnInitTCPSend)
|
|
#endif
|
|
|
|
extern void ResetSendNext(TCB * SeqTCB, SeqNum NewSeq);
|
|
|
|
extern NTSTATUS
|
|
TCPPnPPowerRequest(void *ipContext, IPAddr ipAddr, NDIS_HANDLE handle,
|
|
PNET_PNP_EVENT netPnPEvent);
|
|
extern void TCPElistChangeHandler(void);
|
|
|
|
//* GetTCPHeader - Get a TCP header buffer.
|
|
//
|
|
// Called when we need to get a TCP header buffer. This routine is
|
|
// specific to the particular environment (VxD or NT). All we
|
|
// need to do is pop the buffer from the free list.
|
|
//
|
|
// Input: Nothing.
|
|
//
|
|
// Returns: Pointer to an NDIS buffer, or NULL is none.
|
|
//
|
|
PNDIS_BUFFER
|
|
GetTCPHeaderAtDpcLevel(TCPHeader **Header)
|
|
{
|
|
PNDIS_BUFFER Buffer;
|
|
|
|
#if DBG
|
|
*Header = NULL;
|
|
#endif
|
|
|
|
Buffer = MdpAllocateAtDpcLevel(TcpHeaderPool, Header);
|
|
|
|
if (Buffer) {
|
|
|
|
ASSERT(*Header);
|
|
|
|
NdisAdjustBufferLength(Buffer, sizeof(TCPHeader));
|
|
|
|
#if BACK_FILL
|
|
ASSERT(Buffer->ByteOffset >= 40);
|
|
|
|
*Header = (TCPHeader*)((ULONG_PTR)(*Header) + MAX_BACKFILL_HDR_SIZE);
|
|
Buffer->MappedSystemVa = (PVOID)((ULONG_PTR)Buffer->MappedSystemVa
|
|
+ MAX_BACKFILL_HDR_SIZE);
|
|
Buffer->ByteOffset += MAX_BACKFILL_HDR_SIZE;
|
|
|
|
Buffer->MdlFlags |= MDL_NETWORK_HEADER;
|
|
#endif
|
|
}
|
|
return Buffer;
|
|
}
|
|
|
|
#if MILLEN
|
|
#define GetTCPHeader GetTCPHeaderAtDpcLevel
|
|
#else
|
|
__inline
|
|
PNDIS_BUFFER
|
|
GetTCPHeader(TCPHeader **Header)
|
|
{
|
|
KIRQL OldIrql;
|
|
PNDIS_BUFFER Buffer;
|
|
|
|
OldIrql = KeRaiseIrqlToDpcLevel();
|
|
|
|
Buffer = GetTCPHeaderAtDpcLevel(Header);
|
|
|
|
KeLowerIrql(OldIrql);
|
|
|
|
return Buffer;
|
|
}
|
|
#endif
|
|
|
|
//* FreeTCPHeader - Free a TCP header buffer.
|
|
//
|
|
// Called to free a TCP header buffer.
|
|
//
|
|
// Input: Buffer to be freed.
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
__inline
|
|
VOID
|
|
FreeTCPHeader(PNDIS_BUFFER Buffer)
|
|
{
|
|
NdisAdjustBufferLength(Buffer, TcpHeaderBufferSize);
|
|
#if BACK_FILL
|
|
Buffer->MappedSystemVa = (PVOID)((ULONG_PTR)Buffer->MappedSystemVa
|
|
- MAX_BACKFILL_HDR_SIZE);
|
|
Buffer->ByteOffset -= MAX_BACKFILL_HDR_SIZE;
|
|
#endif
|
|
MdpFree(Buffer);
|
|
}
|
|
|
|
//* FreeSendReq - Free a send request structure.
|
|
//
|
|
// Called to free a send request structure.
|
|
//
|
|
// Input: FreedReq - Connection request structure to be freed.
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
__inline
|
|
void
|
|
FreeSendReq(TCPSendReq *Request)
|
|
{
|
|
PplFree(TcpRequestPool, Request);
|
|
}
|
|
|
|
//* GetSendReq - Get a send request structure.
|
|
//
|
|
// Called to get a send request structure.
|
|
//
|
|
// Input: Nothing.
|
|
//
|
|
// Returns: Pointer to SendReq structure, or NULL if none.
|
|
//
|
|
__inline
|
|
TCPSendReq *
|
|
GetSendReq(VOID)
|
|
{
|
|
TCPSendReq *Request;
|
|
LOGICAL FromList;
|
|
|
|
Request = PplAllocate(TcpRequestPool, &FromList);
|
|
if (Request) {
|
|
#if DBG
|
|
Request->tsr_req.tr_sig = tr_signature;
|
|
Request->tsr_sig = tsr_signature;
|
|
#endif
|
|
}
|
|
|
|
return Request;
|
|
}
|
|
|
|
//* TCPSendComplete - Complete a TCP send.
|
|
//
|
|
// Called by IP when a send we've made is complete. We free the buffer,
|
|
// and possibly complete some sends. Each send queued on a TCB has a ref.
|
|
// count with it, which is the number of times a pointer to a buffer
|
|
// associated with the send has been passed to the underlying IP layer. We
|
|
// can't complete a send until that count it 0. If this send was actually
|
|
// from a send of data, we'll go down the chain of send and decrement the
|
|
// refcount on each one. If we have one going to 0 and the send has already
|
|
// been acked we'll complete the send. If it hasn't been acked we'll leave
|
|
// it until the ack comes in.
|
|
//
|
|
// NOTE: We aren't protecting any of this with locks. When we port this to
|
|
// NT we'll need to fix this, probably with a global lock. See the comments
|
|
// in ACKSend() in TCPRCV.C for more details.
|
|
//
|
|
// Input: Context - Context we gave to IP.
|
|
// BufferChain - BufferChain for send.
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
void
|
|
TCPSendComplete(void *Context, PNDIS_BUFFER BufferChain, IP_STATUS SendStatus)
|
|
{
|
|
BOOLEAN DoRcvComplete = FALSE;
|
|
PNDIS_BUFFER CurrentBuffer;
|
|
|
|
if (Context != NULL) {
|
|
SendCmpltContext *SCContext = (SendCmpltContext *) Context;
|
|
TCPSendReq *CurrentSend;
|
|
uint i;
|
|
|
|
CTEStructAssert(SCContext, scc);
|
|
if (SCContext->scc_LargeSend) {
|
|
TCB *LargeSendTCB = SCContext->scc_LargeSend;
|
|
CTELockHandle TCBHandle;
|
|
CTEGetLock(&LargeSendTCB->tcb_lock, &TCBHandle);
|
|
|
|
IF_TCPDBG(TCP_DEBUG_OFFLOAD) {
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSendComplete: tcb %x sent %d of %d una %u "
|
|
"next %u unacked %u\n", LargeSendTCB,
|
|
SCContext->scc_ByteSent, SCContext->scc_SendSize,
|
|
LargeSendTCB->tcb_senduna, LargeSendTCB->tcb_sendnext,
|
|
LargeSendTCB->tcb_unacked));
|
|
}
|
|
|
|
if (SCContext->scc_ByteSent < SCContext->scc_SendSize) {
|
|
uint BytesNotSent = SCContext->scc_SendSize -
|
|
SCContext->scc_ByteSent;
|
|
SeqNum Next = LargeSendTCB->tcb_sendnext;
|
|
|
|
IF_TCPDBG(TCP_DEBUG_OFFLOAD) {
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSendComplete: unsent %d\n",
|
|
SCContext->scc_SendSize-SCContext->scc_ByteSent));
|
|
}
|
|
|
|
if (SEQ_GTE((Next - BytesNotSent), LargeSendTCB->tcb_senduna) &&
|
|
SEQ_LT((Next - BytesNotSent), LargeSendTCB->tcb_sendnext)) {
|
|
ResetSendNext(LargeSendTCB, (Next - BytesNotSent));
|
|
}
|
|
}
|
|
#if DBG
|
|
LargeSendTCB->tcb_LargeSend--;
|
|
#endif
|
|
|
|
if (LargeSendTCB->tcb_unacked)
|
|
DelayAction(LargeSendTCB, NEED_OUTPUT);
|
|
|
|
DerefTCB(LargeSendTCB, TCBHandle);
|
|
}
|
|
// First, loop through and free any NDIS buffers here that need to be.
|
|
// freed. We'll skip any 'user' buffers, and then free our buffers. We
|
|
// need to do this before decrementing the reference count to avoid
|
|
// destroying the buffer chain if we have to zap tsr_lastbuf->Next to
|
|
// NULL.
|
|
|
|
CurrentBuffer = NDIS_BUFFER_LINKAGE(BufferChain);
|
|
for (i = 0; i < (uint) SCContext->scc_ubufcount; i++) {
|
|
ASSERT(CurrentBuffer != NULL);
|
|
CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
|
|
}
|
|
|
|
for (i = 0; i < (uint) SCContext->scc_tbufcount; i++) {
|
|
PNDIS_BUFFER TempBuffer;
|
|
|
|
ASSERT(CurrentBuffer != NULL);
|
|
|
|
TempBuffer = CurrentBuffer;
|
|
CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
|
|
NdisFreeBuffer(TempBuffer);
|
|
}
|
|
|
|
CurrentSend = SCContext->scc_firstsend;
|
|
|
|
i = 0;
|
|
while (i < SCContext->scc_count) {
|
|
Queue *TempQ;
|
|
long Result;
|
|
uint SendReqFlags;
|
|
|
|
TempQ = QNEXT(&CurrentSend->tsr_req.tr_q);
|
|
SendReqFlags = CurrentSend->tsr_flags;
|
|
|
|
CTEStructAssert(CurrentSend, tsr);
|
|
|
|
Result = CTEInterlockedDecrementLong(&(CurrentSend->tsr_refcnt));
|
|
|
|
ASSERT(Result >= 0);
|
|
|
|
if ((Result <= 0) ||
|
|
((SendReqFlags & TSR_FLAG_SEND_AND_DISC) && (Result == 1))) {
|
|
TCPReq *Req;
|
|
|
|
// Reference count has gone to 0 which means the send has
|
|
// been ACK'd or cancelled. Complete it now.
|
|
|
|
// If we've sent directly from this send, NULL out the next
|
|
// pointer for the last buffer in the chain.
|
|
if (CurrentSend->tsr_lastbuf != NULL) {
|
|
NDIS_BUFFER_LINKAGE(CurrentSend->tsr_lastbuf) = NULL;
|
|
CurrentSend->tsr_lastbuf = NULL;
|
|
}
|
|
|
|
Req = &CurrentSend->tsr_req;
|
|
(*Req->tr_rtn)(Req->tr_context, Req->tr_status,
|
|
Req->tr_status == TDI_SUCCESS
|
|
? CurrentSend->tsr_size : 0);
|
|
FreeSendReq(CurrentSend);
|
|
|
|
DoRcvComplete = TRUE;
|
|
}
|
|
CurrentSend = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, TempQ, tr_q),
|
|
tsr_req);
|
|
|
|
i++;
|
|
}
|
|
|
|
}
|
|
FreeTCPHeader(BufferChain);
|
|
|
|
if (DoRcvComplete && !PartitionedDelayQ) {
|
|
KIRQL Irql = KeRaiseIrqlToDpcLevel();
|
|
TCPRcvComplete();
|
|
KeLowerIrql(Irql);
|
|
}
|
|
}
|
|
|
|
//* RcvWin - Figure out the receive window to offer in an ack.
|
|
//
|
|
// A routine to figure out what window to offer on a connection. We
|
|
// take into account SWS avoidance, what the default connection window is,
|
|
// and what the last window we offered is.
|
|
//
|
|
// Input: WinTCB - TCB on which to perform calculations.
|
|
//
|
|
// Returns: Window to be offered.
|
|
//
|
|
uint
|
|
RcvWin(TCB * WinTCB)
|
|
{
|
|
int CouldOffer; // The window size we could offer.
|
|
|
|
CTEStructAssert(WinTCB, tcb);
|
|
|
|
CheckRBList(WinTCB->tcb_pendhead, WinTCB->tcb_pendingcnt);
|
|
|
|
ASSERT(WinTCB->tcb_rcvwin >= 0);
|
|
|
|
CouldOffer = WinTCB->tcb_defaultwin - WinTCB->tcb_pendingcnt;
|
|
|
|
ASSERT(CouldOffer >= 0);
|
|
ASSERT(CouldOffer >= WinTCB->tcb_rcvwin);
|
|
|
|
if ((CouldOffer - WinTCB->tcb_rcvwin) >=
|
|
(int)MIN(WinTCB->tcb_defaultwin / 2, WinTCB->tcb_mss))
|
|
WinTCB->tcb_rcvwin = CouldOffer;
|
|
|
|
return WinTCB->tcb_rcvwin;
|
|
}
|
|
|
|
|
|
|
|
//* SendSYNOnSynTCB - Send a SYN segment for syntcb
|
|
//
|
|
// This is called during connection establishment time to send a SYN
|
|
// segment to the peer. We get a buffer if we can, and then fill
|
|
// it in. There's a tricky part here where we have to build the MSS
|
|
// option in the header - we find the MSS by finding the MSS offered
|
|
// by the net for the local address. After that, we send it.
|
|
//
|
|
// Input: SYNTcb - TCB from which SYN is to be sent.
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
void
|
|
SendSYNOnSynTCB(SYNTCB * SYNTcb, CTELockHandle TCBHandle)
|
|
{
|
|
PNDIS_BUFFER HeaderBuffer;
|
|
TCPHeader *SYNHeader;
|
|
uchar *OptPtr;
|
|
IP_STATUS SendStatus;
|
|
ushort OptSize = 0, HdrSize = 0;
|
|
BOOLEAN SackOpt = FALSE;
|
|
IPOptInfo OptInfo;
|
|
|
|
CTEStructAssert(SYNTcb, syntcb);
|
|
|
|
HeaderBuffer = GetTCPHeaderAtDpcLevel(&SYNHeader);
|
|
|
|
// Go ahead and set the retransmission timer now, in case we didn't get a
|
|
// buffer. In the future we might want to queue the connection for
|
|
// when we free a buffer.
|
|
|
|
START_TCB_TIMER(SYNTcb->syntcb_rexmittimer, SYNTcb->syntcb_rexmit);
|
|
|
|
// The Rexmit interval has to be doubled here
|
|
|
|
SYNTcb->syntcb_rexmit = MIN(SYNTcb->syntcb_rexmit << 1, MAX_REXMIT_TO);
|
|
|
|
if (HeaderBuffer != NULL) {
|
|
ushort TempWin;
|
|
ushort MSS;
|
|
uchar FoundMSS;
|
|
|
|
SYNHeader = (TCPHeader *) ((PUCHAR)SYNHeader + LocalNetInfo.ipi_hsize);
|
|
|
|
NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
|
|
|
|
if (SYNTcb->syntcb_tcpopts & TCP_FLAG_WS) {
|
|
OptSize += WS_OPT_SIZE + 1; // 1 NOP for alignment
|
|
|
|
}
|
|
if (SYNTcb->syntcb_tcpopts & TCP_FLAG_TS) {
|
|
OptSize += TS_OPT_SIZE + 2; // 2 NOPs for alignment
|
|
}
|
|
if (SYNTcb->syntcb_tcpopts & TCP_FLAG_SACK){
|
|
SackOpt = TRUE;
|
|
OptSize += 4; // 2 NOPS, SACK kind and length field
|
|
}
|
|
NdisAdjustBufferLength(HeaderBuffer,
|
|
sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize);
|
|
|
|
SYNHeader->tcp_src = SYNTcb->syntcb_sport;
|
|
SYNHeader->tcp_dest = SYNTcb->syntcb_dport;
|
|
SYNHeader->tcp_seq = net_long(SYNTcb->syntcb_sendnext);
|
|
SYNTcb->syntcb_sendnext++;
|
|
|
|
if (SYNTcb->syntcb_rexmitcnt == 0) {
|
|
TCPSIncrementOutSegCount();
|
|
} else
|
|
TStats.ts_retranssegs++;
|
|
|
|
SYNHeader->tcp_ack = net_long(SYNTcb->syntcb_rcvnext);
|
|
|
|
// Reuse OPt size for header size determination
|
|
// default is MSS amd tcp header size
|
|
|
|
HdrSize = 6;
|
|
|
|
// set size field to reflect TS and WND scale option
|
|
// tcp header + windowscale + Timestamp + pad
|
|
|
|
if (SYNTcb->syntcb_tcpopts & TCP_FLAG_WS) {
|
|
// WS: Add one more long word
|
|
HdrSize += 1;
|
|
}
|
|
if (SYNTcb->syntcb_tcpopts & TCP_FLAG_TS) {
|
|
// TS: Add 3 more long words
|
|
HdrSize += 3;
|
|
}
|
|
if (SackOpt) {
|
|
// SACK: Add 1 more long word
|
|
HdrSize += 1;
|
|
}
|
|
|
|
SYNHeader->tcp_flags =
|
|
MAKE_TCP_FLAGS(HdrSize, TCP_FLAG_SYN | TCP_FLAG_ACK);
|
|
|
|
if (SYNTcb->syntcb_defaultwin <= TCP_MAXWIN) {
|
|
TempWin = (ushort)SYNTcb->syntcb_defaultwin;
|
|
} else {
|
|
// Don't apply the scale-factor in a SYN segment.
|
|
// Instead, advertise the largest window possible.
|
|
TempWin = TCP_MAXWIN;
|
|
}
|
|
|
|
SYNHeader->tcp_window = net_short(TempWin);
|
|
SYNHeader->tcp_urgent = 0;
|
|
SYNHeader->tcp_xsum = 0;
|
|
OptPtr = (uchar *) (SYNHeader + 1);
|
|
|
|
FoundMSS = (*LocalNetInfo.ipi_getlocalmtu) (SYNTcb->syntcb_saddr, &MSS);
|
|
|
|
if (!FoundMSS) {
|
|
CTEFreeLock(&SYNTcb->syntcb_lock, TCBHandle);
|
|
FreeTCPHeader(HeaderBuffer);
|
|
return;
|
|
}
|
|
MSS -= sizeof(TCPHeader);
|
|
|
|
SYNTcb->syntcb_mss = MSS;
|
|
|
|
*OptPtr++ = TCP_OPT_MSS;
|
|
*OptPtr++ = MSS_OPT_SIZE;
|
|
**(ushort **) & OptPtr = net_short(MSS);
|
|
|
|
OptPtr++;
|
|
OptPtr++;
|
|
|
|
if (SYNTcb->syntcb_tcpopts & TCP_FLAG_WS) {
|
|
|
|
// Fill in the WS option headers and value
|
|
|
|
*OptPtr++ = TCP_OPT_NOP;
|
|
*OptPtr++ = TCP_OPT_WS;
|
|
*OptPtr++ = WS_OPT_SIZE;
|
|
|
|
//Initial window scale factor
|
|
*OptPtr++ = (uchar) SYNTcb->syntcb_rcvwinscale;
|
|
}
|
|
if (SYNTcb->syntcb_tcpopts & TCP_FLAG_TS) {
|
|
|
|
//Start loading time stamp option header and value
|
|
|
|
*OptPtr++ = TCP_OPT_NOP;
|
|
*OptPtr++ = TCP_OPT_NOP;
|
|
*OptPtr++ = TCP_OPT_TS;
|
|
*OptPtr++ = TS_OPT_SIZE;
|
|
|
|
// Initialize TS value TSval
|
|
|
|
*(long *)OptPtr = 0;
|
|
OptPtr += 4;
|
|
|
|
//Initialize TS Echo Reply TSecr
|
|
|
|
*(long *)OptPtr = 0;
|
|
OptPtr += 4;
|
|
}
|
|
if (SackOpt) {
|
|
|
|
// Initialize with SACK_PERMITTED option
|
|
|
|
*(long *)OptPtr = net_long(0x01010402);
|
|
|
|
IF_TCPDBG(TCP_DEBUG_SACK) {
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Sending SACK_OPT %x\n", SYNTcb));
|
|
}
|
|
|
|
}
|
|
|
|
SYNTcb->syntcb_refcnt++;
|
|
|
|
// Account for Options.
|
|
(*LocalNetInfo.ipi_initopts) (&OptInfo);
|
|
OptInfo.ioi_ttl = SYNTcb->syntcb_ttl;
|
|
|
|
SYNHeader->tcp_xsum =
|
|
~XsumSendChain(PHXSUM(SYNTcb->syntcb_saddr, SYNTcb->syntcb_daddr,
|
|
PROTOCOL_TCP, 0) +
|
|
(uint)net_short(sizeof(TCPHeader) + MSS_OPT_SIZE +
|
|
OptSize), HeaderBuffer);
|
|
|
|
CTEFreeLock(&SYNTcb->syntcb_lock, TCBHandle);
|
|
|
|
SendStatus =
|
|
(*LocalNetInfo.ipi_xmit)(TCPProtInfo, NULL, HeaderBuffer,
|
|
sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize,
|
|
SYNTcb->syntcb_daddr, SYNTcb->syntcb_saddr,
|
|
&OptInfo, NULL, PROTOCOL_TCP, NULL);
|
|
|
|
if (SendStatus != IP_PENDING) {
|
|
FreeTCPHeader(HeaderBuffer);
|
|
}
|
|
CTEGetLock(&SYNTcb->syntcb_lock, &TCBHandle);
|
|
DerefSynTCB(SYNTcb, TCBHandle);
|
|
|
|
} else {
|
|
SYNTcb->syntcb_sendnext++;
|
|
CTEFreeLock(&SYNTcb->syntcb_lock, TCBHandle);
|
|
return;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//* SendSYN - Send a SYN segment.
|
|
//
|
|
// This is called during connection establishment time to send a SYN
|
|
// segment to the peer. We get a buffer if we can, and then fill
|
|
// it in. There's a tricky part here where we have to build the MSS
|
|
// option in the header - we find the MSS by finding the MSS offered
|
|
// by the net for the local address. After that, we send it.
|
|
//
|
|
// Input: SYNTcb - TCB from which SYN is to be sent.
|
|
// TCBHandle - Handle for lock on TCB.
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
void
|
|
SendSYN(TCB * SYNTcb, CTELockHandle TCBHandle)
|
|
{
|
|
PNDIS_BUFFER HeaderBuffer;
|
|
TCPHeader *SYNHeader;
|
|
uchar *OptPtr;
|
|
IP_STATUS SendStatus;
|
|
ushort OptSize = 0, HdrSize = 0, rfc1323opts = 0;
|
|
BOOLEAN SackOpt = FALSE;
|
|
|
|
CTEStructAssert(SYNTcb, tcb);
|
|
|
|
HeaderBuffer = GetTCPHeaderAtDpcLevel(&SYNHeader);
|
|
|
|
// Go ahead and set the retransmission timer now, in case we didn't get a
|
|
// buffer. In the future we might want to queue the connection for
|
|
// when we free a buffer.
|
|
|
|
START_TCB_TIMER_R(SYNTcb, RXMIT_TIMER, SYNTcb->tcb_rexmit);
|
|
|
|
if (HeaderBuffer != NULL) {
|
|
ushort TempWin;
|
|
ushort MSS;
|
|
uchar FoundMSS;
|
|
|
|
SYNHeader = (TCPHeader *) ((PUCHAR)SYNHeader + LocalNetInfo.ipi_hsize);
|
|
|
|
NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
|
|
|
|
|
|
// If we are doing active open, check if we are configured to do
|
|
// window scaling and time stamp options
|
|
|
|
if ((((TcpHostSendOpts & TCP_FLAG_WS) || SYNTcb->tcb_rcvwinscale) &&
|
|
SYNTcb->tcb_state == TCB_SYN_SENT) ||
|
|
(SYNTcb->tcb_tcpopts & TCP_FLAG_WS)) {
|
|
|
|
rfc1323opts |= TCP_FLAG_WS;
|
|
|
|
IF_TCPDBG(TCP_DEBUG_1323) {
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Selected WS option TCB %x\n", SYNTcb));
|
|
}
|
|
}
|
|
if (((TcpHostSendOpts & TCP_FLAG_TS) &&
|
|
(SYNTcb->tcb_state == TCB_SYN_SENT)) ||
|
|
(SYNTcb->tcb_tcpopts & TCP_FLAG_TS)) {
|
|
|
|
IF_TCPDBG(TCP_DEBUG_1323) {
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Selected TS option TCB %x\n", SYNTcb));
|
|
}
|
|
rfc1323opts |= TCP_FLAG_TS;
|
|
}
|
|
|
|
if (rfc1323opts & TCP_FLAG_WS) {
|
|
OptSize += WS_OPT_SIZE + 1; // 1 NOP for alignment
|
|
}
|
|
if (rfc1323opts & TCP_FLAG_TS) {
|
|
OptSize += TS_OPT_SIZE + 2; // 2 NOPs for alignment
|
|
}
|
|
if ((SYNTcb->tcb_tcpopts & TCP_FLAG_SACK) ||
|
|
((SYNTcb->tcb_state == TCB_SYN_SENT) &&
|
|
(TcpHostOpts & TCP_FLAG_SACK))) {
|
|
SackOpt = TRUE;
|
|
|
|
OptSize += 4; // 2 NOPS, SACK kind and length field
|
|
}
|
|
NdisAdjustBufferLength(HeaderBuffer,
|
|
sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize);
|
|
|
|
SYNHeader->tcp_src = SYNTcb->tcb_sport;
|
|
SYNHeader->tcp_dest = SYNTcb->tcb_dport;
|
|
SYNHeader->tcp_seq = net_long(SYNTcb->tcb_sendnext);
|
|
SYNTcb->tcb_sendnext++;
|
|
|
|
if (SEQ_GT(SYNTcb->tcb_sendnext, SYNTcb->tcb_sendmax)) {
|
|
TCPSIncrementOutSegCount();
|
|
SYNTcb->tcb_sendmax = SYNTcb->tcb_sendnext;
|
|
} else
|
|
TStats.ts_retranssegs++;
|
|
|
|
SYNHeader->tcp_ack = net_long(SYNTcb->tcb_rcvnext);
|
|
|
|
// Reuse OPt size for header size determination
|
|
// default is MSS amd tcp header size
|
|
|
|
HdrSize = 6;
|
|
|
|
// set size field to reflect TS and WND scale option
|
|
// tcp header + windowscale + Timestamp + pad
|
|
|
|
if (rfc1323opts & TCP_FLAG_WS) {
|
|
|
|
// WS: Add one more long word
|
|
HdrSize += 1;
|
|
|
|
}
|
|
if (rfc1323opts & TCP_FLAG_TS) {
|
|
|
|
// TS: Add 3 more long words
|
|
HdrSize += 3;
|
|
}
|
|
if (SackOpt) {
|
|
// SACK: Add 1 more long word
|
|
HdrSize += 1;
|
|
|
|
}
|
|
if (SYNTcb->tcb_state == TCB_SYN_RCVD) {
|
|
SYNHeader->tcp_flags =
|
|
MAKE_TCP_FLAGS(HdrSize, TCP_FLAG_SYN | TCP_FLAG_ACK);
|
|
} else {
|
|
SYNHeader->tcp_flags = MAKE_TCP_FLAGS(HdrSize, TCP_FLAG_SYN);
|
|
}
|
|
|
|
SYNTcb->tcb_lastack = SYNTcb->tcb_rcvnext;
|
|
|
|
if (SYNTcb->tcb_rcvwin <= TCP_MAXWIN) {
|
|
TempWin = (ushort)SYNTcb->tcb_rcvwin;
|
|
} else {
|
|
// Don't apply the scale-factor in a SYN segment.
|
|
// Instead, advertise the largest window possible.
|
|
TempWin = TCP_MAXWIN;
|
|
}
|
|
|
|
SYNHeader->tcp_window = net_short(TempWin);
|
|
SYNHeader->tcp_urgent = 0;
|
|
SYNHeader->tcp_xsum = 0;
|
|
OptPtr = (uchar *) (SYNHeader + 1);
|
|
|
|
FoundMSS = (*LocalNetInfo.ipi_getlocalmtu) (SYNTcb->tcb_saddr, &MSS);
|
|
|
|
if (!FoundMSS) {
|
|
CTEFreeLock(&SYNTcb->tcb_lock, TCBHandle);
|
|
FreeTCPHeader(HeaderBuffer);
|
|
return;
|
|
}
|
|
|
|
MSS -= sizeof(TCPHeader);
|
|
|
|
*OptPtr++ = TCP_OPT_MSS;
|
|
*OptPtr++ = MSS_OPT_SIZE;
|
|
**(ushort **) & OptPtr = net_short(MSS);
|
|
|
|
OptPtr++;
|
|
OptPtr++;
|
|
|
|
if (rfc1323opts & TCP_FLAG_WS) {
|
|
|
|
// Fill in the WS option headers and value
|
|
|
|
*OptPtr++ = TCP_OPT_NOP;
|
|
*OptPtr++ = TCP_OPT_WS;
|
|
*OptPtr++ = WS_OPT_SIZE;
|
|
|
|
// Initial window scale factor
|
|
*OptPtr++ = (uchar) SYNTcb->tcb_rcvwinscale;
|
|
}
|
|
if (rfc1323opts & TCP_FLAG_TS) {
|
|
|
|
// Start loading time stamp option header and value
|
|
|
|
*OptPtr++ = TCP_OPT_NOP;
|
|
*OptPtr++ = TCP_OPT_NOP;
|
|
*OptPtr++ = TCP_OPT_TS;
|
|
*OptPtr++ = TS_OPT_SIZE;
|
|
|
|
// Initialize TS value TSval
|
|
|
|
*(long *)OptPtr = 0;
|
|
OptPtr += 4;
|
|
|
|
// Initialize TS Echo Reply TSecr
|
|
|
|
*(long *)OptPtr = 0;
|
|
OptPtr += 4;
|
|
}
|
|
if (SackOpt) {
|
|
|
|
// Initialize with SACK_PERMITTED option
|
|
|
|
*(long *)OptPtr = net_long(0x01010402);
|
|
|
|
IF_TCPDBG(TCP_DEBUG_SACK) {
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Sending SACK_OPT %x\n", SYNTcb));
|
|
}
|
|
|
|
}
|
|
|
|
REFERENCE_TCB(SYNTcb);
|
|
|
|
// Account for Options.
|
|
|
|
SYNTcb->tcb_opt.ioi_TcpChksum = 0;
|
|
|
|
SYNHeader->tcp_xsum =
|
|
~XsumSendChain(SYNTcb->tcb_phxsum +
|
|
(uint)net_short(sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize),
|
|
HeaderBuffer);
|
|
|
|
|
|
ClassifyPacket(SYNTcb);
|
|
|
|
CTEFreeLock(&SYNTcb->tcb_lock, TCBHandle);
|
|
|
|
SendStatus =
|
|
(*LocalNetInfo.ipi_xmit)(TCPProtInfo, NULL, HeaderBuffer,
|
|
sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize,
|
|
SYNTcb->tcb_daddr, SYNTcb->tcb_saddr,
|
|
&SYNTcb->tcb_opt, SYNTcb->tcb_rce,
|
|
PROTOCOL_TCP, NULL);
|
|
|
|
SYNTcb->tcb_error = SendStatus;
|
|
if (SendStatus != IP_PENDING) {
|
|
FreeTCPHeader(HeaderBuffer);
|
|
}
|
|
CTEGetLock(&SYNTcb->tcb_lock, &TCBHandle);
|
|
DerefTCB(SYNTcb, TCBHandle);
|
|
|
|
} else {
|
|
SYNTcb->tcb_sendnext++;
|
|
if (SEQ_GT(SYNTcb->tcb_sendnext, SYNTcb->tcb_sendmax))
|
|
SYNTcb->tcb_sendmax = SYNTcb->tcb_sendnext;
|
|
CTEFreeLock(&SYNTcb->tcb_lock, TCBHandle);
|
|
return;
|
|
}
|
|
|
|
}
|
|
|
|
//* SendKA - Send a keep alive segment.
|
|
//
|
|
// This is called when we want to send a keep alive.
|
|
//
|
|
// Input: KATcb - TCB from which keep alive is to be sent.
|
|
// Handle - Handle for lock on TCB.
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
void
|
|
SendKA(TCB * KATcb, CTELockHandle Handle)
|
|
{
|
|
PNDIS_BUFFER HeaderBuffer;
|
|
TCPHeader *Header;
|
|
IP_STATUS SendStatus;
|
|
|
|
CTEStructAssert(KATcb, tcb);
|
|
|
|
HeaderBuffer = GetTCPHeaderAtDpcLevel(&Header);
|
|
|
|
if (HeaderBuffer != NULL) {
|
|
ushort TempWin;
|
|
SeqNum TempSeq;
|
|
|
|
Header = (TCPHeader *) ((PUCHAR) Header + LocalNetInfo.ipi_hsize);
|
|
|
|
NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
|
|
NdisAdjustBufferLength(HeaderBuffer, sizeof(TCPHeader) + 1);
|
|
Header->tcp_src = KATcb->tcb_sport;
|
|
Header->tcp_dest = KATcb->tcb_dport;
|
|
TempSeq = KATcb->tcb_senduna - 1;
|
|
Header->tcp_seq = net_long(TempSeq);
|
|
|
|
TStats.ts_retranssegs++;
|
|
|
|
Header->tcp_ack = net_long(KATcb->tcb_rcvnext);
|
|
Header->tcp_flags = MAKE_TCP_FLAGS(5, TCP_FLAG_ACK);
|
|
|
|
// Initialize the single byte that we're sending.
|
|
*(uchar*)(Header + 1) = 0;
|
|
|
|
// We need to scale the rcv window
|
|
// Use temprary variable to workaround truncation
|
|
// caused by net_short
|
|
|
|
TempWin = (ushort) (RcvWin(KATcb) >> KATcb->tcb_rcvwinscale);
|
|
Header->tcp_window = net_short(TempWin);
|
|
Header->tcp_urgent = 0;
|
|
|
|
KATcb->tcb_lastack = KATcb->tcb_rcvnext;
|
|
|
|
Header->tcp_xsum = 0;
|
|
|
|
KATcb->tcb_opt.ioi_TcpChksum = 0;
|
|
Header->tcp_xsum =
|
|
~XsumSendChain(KATcb->tcb_phxsum +
|
|
(uint)net_short(sizeof(TCPHeader) + 1),
|
|
HeaderBuffer);
|
|
|
|
KATcb->tcb_kacount++;
|
|
ClassifyPacket(KATcb);
|
|
REFERENCE_TCB(KATcb);
|
|
CTEFreeLock(&KATcb->tcb_lock, Handle);
|
|
|
|
SendStatus = (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
|
|
NULL,
|
|
HeaderBuffer,
|
|
sizeof(TCPHeader) + 1,
|
|
KATcb->tcb_daddr,
|
|
KATcb->tcb_saddr,
|
|
&KATcb->tcb_opt,
|
|
KATcb->tcb_rce,
|
|
PROTOCOL_TCP,
|
|
NULL);
|
|
|
|
if (SendStatus != IP_PENDING) {
|
|
FreeTCPHeader(HeaderBuffer);
|
|
}
|
|
|
|
CTEGetLock(&KATcb->tcb_lock, &Handle);
|
|
DerefTCB(KATcb, Handle);
|
|
} else {
|
|
CTEFreeLock(&KATcb->tcb_lock, Handle);
|
|
}
|
|
|
|
}
|
|
|
|
//* SendACK - Send an ACK segment.
|
|
//
|
|
// This is called whenever we need to send an ACK for some reason. Nothing
|
|
// fancy, we just do it.
|
|
//
|
|
// Input: ACKTcb - TCB from which ACK is to be sent.
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
void
|
|
SendACK(TCB * ACKTcb)
|
|
{
|
|
PNDIS_BUFFER HeaderBuffer;
|
|
TCPHeader *ACKHeader;
|
|
IP_STATUS SendStatus;
|
|
CTELockHandle TCBHandle;
|
|
SeqNum SendNext;
|
|
ushort SackLength = 0, i, hdrlen = 5;
|
|
ulong *ts_opt;
|
|
BOOLEAN HWChksum = FALSE;
|
|
|
|
CTEStructAssert(ACKTcb, tcb);
|
|
|
|
HeaderBuffer = GetTCPHeader(&ACKHeader);
|
|
|
|
if (HeaderBuffer != NULL) {
|
|
ushort TempWin;
|
|
ushort Size;
|
|
|
|
ACKHeader = (TCPHeader *) ((PUCHAR) ACKHeader + LocalNetInfo.ipi_hsize);
|
|
|
|
CTEGetLock(&ACKTcb->tcb_lock, &TCBHandle);
|
|
|
|
|
|
// Allow room for filling time stamp option.
|
|
// Note that it is 12 bytes and will never ever change
|
|
|
|
if (ACKTcb->tcb_tcpopts & TCP_FLAG_TS) {
|
|
NdisAdjustBufferLength(HeaderBuffer,
|
|
sizeof(TCPHeader) + ALIGNED_TS_OPT_SIZE);
|
|
|
|
// Header length is multiple of 32bits
|
|
|
|
hdrlen = 5 + 3; // standard header size +
|
|
// header size requirement for TS option
|
|
|
|
ACKTcb->tcb_lastack = ACKTcb->tcb_rcvnext;
|
|
|
|
}
|
|
if ((ACKTcb->tcb_tcpopts & TCP_FLAG_SACK) &&
|
|
ACKTcb->tcb_SackBlock &&
|
|
(ACKTcb->tcb_SackBlock->Mask[0] == 1)) {
|
|
|
|
SackLength++;
|
|
for (i = 1; i < 3; i++) {
|
|
if (ACKTcb->tcb_SackBlock->Mask[i] == 1)
|
|
SackLength++;
|
|
}
|
|
|
|
IF_TCPDBG(TCP_DEBUG_SACK) {
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Sending SACKs!! %x %x\n", ACKTcb, SackLength));
|
|
}
|
|
|
|
NdisAdjustBufferLength(HeaderBuffer,
|
|
NdisBufferLength(HeaderBuffer) + SackLength * 8 + 4);
|
|
|
|
// Sack block is of 2 long words (8 bytes) and 4 bytes
|
|
// is for Sack option header.
|
|
|
|
hdrlen += ((SackLength * 8 + 4) >> 2);
|
|
}
|
|
|
|
NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
|
|
|
|
ACKHeader->tcp_src = ACKTcb->tcb_sport;
|
|
ACKHeader->tcp_dest = ACKTcb->tcb_dport;
|
|
ACKHeader->tcp_ack = net_long(ACKTcb->tcb_rcvnext);
|
|
|
|
// If the remote peer is advertising a window of zero, we need to
|
|
// send this ack with a seq. number of his rcv_next (which in that case
|
|
// should be our senduna). We have code here ifdef'd out that makes
|
|
// sure that we don't send outside the RWE, but this doesn't work. We
|
|
// need to be able to send a pure ACK exactly at the RWE.
|
|
|
|
if (ACKTcb->tcb_sendwin != 0) {
|
|
|
|
SendNext = ACKTcb->tcb_sendnext;
|
|
} else
|
|
SendNext = ACKTcb->tcb_senduna;
|
|
|
|
if ((ACKTcb->tcb_flags & FIN_SENT) &&
|
|
SEQ_EQ(SendNext, ACKTcb->tcb_sendmax - 1)) {
|
|
ACKHeader->tcp_flags = MAKE_TCP_FLAGS(hdrlen,
|
|
TCP_FLAG_FIN | TCP_FLAG_ACK);
|
|
} else
|
|
ACKHeader->tcp_flags = MAKE_TCP_FLAGS(hdrlen, TCP_FLAG_ACK);
|
|
|
|
ACKHeader->tcp_seq = net_long(SendNext);
|
|
|
|
TempWin = (ushort) (RcvWin(ACKTcb) >> ACKTcb->tcb_rcvwinscale);
|
|
ACKHeader->tcp_window = net_short(TempWin);
|
|
ACKHeader->tcp_urgent = 0;
|
|
ACKHeader->tcp_xsum = 0;
|
|
|
|
Size = sizeof(TCPHeader);
|
|
|
|
// Point to a place beyond tcp header
|
|
|
|
ts_opt = (ulong *)((uchar *) ACKHeader + 20);
|
|
|
|
if (ACKTcb->tcb_tcpopts & TCP_FLAG_TS) {
|
|
|
|
// Form time stamp header with 2 NOPs for alignment
|
|
*ts_opt++ = net_long(0x0101080A);
|
|
*ts_opt++ = net_long(TCPTime);
|
|
*ts_opt++ = net_long(ACKTcb->tcb_tsrecent);
|
|
|
|
// Add 12 more bytes to the size to account for TS
|
|
|
|
Size += ALIGNED_TS_OPT_SIZE;
|
|
|
|
}
|
|
if ((ACKTcb->tcb_tcpopts & TCP_FLAG_SACK) &&
|
|
ACKTcb->tcb_SackBlock &&
|
|
(ACKTcb->tcb_SackBlock->Mask[0] == 1)) {
|
|
|
|
ushort* UshortPtr;
|
|
uchar* UcharPtr;
|
|
UshortPtr = (ushort *)ts_opt;
|
|
*UshortPtr = 0x0101;
|
|
ts_opt = (ulong *)((uchar *)ts_opt + 2);
|
|
UcharPtr = (uchar *)ts_opt;
|
|
*UcharPtr = (uchar)0x05;
|
|
ts_opt = (ulong *)((uchar *)ts_opt + 1);
|
|
UcharPtr = (uchar *)ts_opt;
|
|
*UcharPtr = (uchar) SackLength * 8 + 2;
|
|
ts_opt = (ulong *)((uchar *)ts_opt + 1);
|
|
|
|
// Sack option header + the block times times sack length!
|
|
Size += 4 + SackLength * 8;
|
|
|
|
for (i = 0; i < 3; i++) {
|
|
if (ACKTcb->tcb_SackBlock->Mask[i] != 0) {
|
|
*ts_opt++ =
|
|
net_long(ACKTcb->tcb_SackBlock->Block[i].begin);
|
|
*ts_opt++ =
|
|
net_long(ACKTcb->tcb_SackBlock->Block[i].end);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (ACKTcb->tcb_rce &&
|
|
(ACKTcb->tcb_rce->rce_OffloadFlags &
|
|
TCP_XMT_CHECKSUM_OFFLOAD)) {
|
|
HWChksum = TRUE;
|
|
|
|
if ((Size > sizeof(TCPHeader)) &&
|
|
!(ACKTcb->tcb_rce->rce_OffloadFlags &
|
|
TCP_CHECKSUM_OPT_OFFLOAD)) {
|
|
HWChksum = FALSE;
|
|
}
|
|
}
|
|
|
|
if (HWChksum) {
|
|
uint PHXsum = ACKTcb->tcb_phxsum + (uint) net_short(Size);
|
|
|
|
PHXsum = (((PHXsum << 16) | (PHXsum >> 16)) + PHXsum) >> 16;
|
|
ACKHeader->tcp_xsum = (ushort) PHXsum;
|
|
ACKTcb->tcb_opt.ioi_TcpChksum = 1;
|
|
#if DBG
|
|
DbgTcpSendHwChksumCount++;
|
|
#endif
|
|
} else {
|
|
|
|
ACKHeader->tcp_xsum =
|
|
~XsumSendChain(ACKTcb->tcb_phxsum +
|
|
(uint)net_short(Size), HeaderBuffer);
|
|
ACKTcb->tcb_opt.ioi_TcpChksum = 0;
|
|
}
|
|
|
|
STOP_TCB_TIMER_R(ACKTcb, DELACK_TIMER);
|
|
ACKTcb->tcb_rcvdsegs = 0;
|
|
ACKTcb->tcb_flags &= ~(NEED_ACK | ACK_DELAYED);
|
|
|
|
ClassifyPacket(ACKTcb);
|
|
|
|
CTEFreeLock(&ACKTcb->tcb_lock, TCBHandle);
|
|
|
|
TCPSIncrementOutSegCount();
|
|
|
|
if (ACKTcb->tcb_tcpopts) {
|
|
SendStatus = (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
|
|
NULL,
|
|
HeaderBuffer,
|
|
Size,
|
|
ACKTcb->tcb_daddr,
|
|
ACKTcb->tcb_saddr,
|
|
&ACKTcb->tcb_opt,
|
|
ACKTcb->tcb_rce,
|
|
PROTOCOL_TCP,
|
|
NULL);
|
|
} else {
|
|
SendStatus = (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
|
|
NULL,
|
|
HeaderBuffer,
|
|
sizeof(TCPHeader),
|
|
ACKTcb->tcb_daddr,
|
|
ACKTcb->tcb_saddr,
|
|
&ACKTcb->tcb_opt,
|
|
ACKTcb->tcb_rce,
|
|
PROTOCOL_TCP,
|
|
NULL);
|
|
|
|
}
|
|
|
|
ACKTcb->tcb_error = SendStatus;
|
|
if (SendStatus != IP_PENDING)
|
|
FreeTCPHeader(HeaderBuffer);
|
|
}
|
|
return;
|
|
|
|
}
|
|
|
|
//* SendTWtcbACK- Send an ACK segment for a twtcb
|
|
//
|
|
//
|
|
// Input: ACKTcb - TCB from which ACK is to be sent.
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
void
|
|
SendTWtcbACK(TWTCB *ACKTcb, uint Partition, CTELockHandle TCBHandle)
|
|
{
|
|
PNDIS_BUFFER HeaderBuffer;
|
|
TCPHeader *ACKHeader;
|
|
IP_STATUS SendStatus;
|
|
SeqNum SendNext;
|
|
ushort hdrlen = 5;
|
|
uint phxsum;
|
|
|
|
CTEStructAssert(ACKTcb, twtcb);
|
|
|
|
HeaderBuffer = GetTCPHeaderAtDpcLevel(&ACKHeader);
|
|
|
|
if (HeaderBuffer != NULL) {
|
|
ushort Size;
|
|
IPOptInfo NewInfo;
|
|
|
|
ACKHeader = (TCPHeader *)((PUCHAR)ACKHeader + LocalNetInfo.ipi_hsize);
|
|
|
|
NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
|
|
|
|
ACKHeader->tcp_src = ACKTcb->twtcb_sport;
|
|
ACKHeader->tcp_dest = ACKTcb->twtcb_dport;
|
|
ACKHeader->tcp_ack = net_long(ACKTcb->twtcb_rcvnext);
|
|
|
|
SendNext = ACKTcb->twtcb_sendnext;
|
|
|
|
ACKHeader->tcp_flags = MAKE_TCP_FLAGS(hdrlen, TCP_FLAG_ACK);
|
|
|
|
ACKHeader->tcp_seq = net_long(SendNext);
|
|
|
|
// Window needs to be zero since we can not rcv anyway.
|
|
ACKHeader->tcp_window = 0;
|
|
ACKHeader->tcp_urgent = 0;
|
|
|
|
Size = sizeof(TCPHeader);
|
|
|
|
phxsum = PHXSUM(ACKTcb->twtcb_saddr, ACKTcb->twtcb_daddr,
|
|
PROTOCOL_TCP, 0);
|
|
|
|
ACKHeader->tcp_xsum = 0;
|
|
ACKHeader->tcp_xsum =
|
|
~XsumSendChain(phxsum +
|
|
(uint)net_short(Size), HeaderBuffer);
|
|
//ACKTcb->tcb_opt.ioi_TcpChksum=0;
|
|
|
|
CTEFreeLockFromDPC(&pTWTCBTableLock[Partition]);
|
|
|
|
TCPSIncrementOutSegCount();
|
|
|
|
(*LocalNetInfo.ipi_initopts) (&NewInfo);
|
|
|
|
SendStatus =
|
|
(*LocalNetInfo.ipi_xmit)(TCPProtInfo,
|
|
NULL,
|
|
HeaderBuffer,
|
|
sizeof(TCPHeader),
|
|
ACKTcb->twtcb_daddr,
|
|
ACKTcb->twtcb_saddr,
|
|
&NewInfo,
|
|
NULL,
|
|
PROTOCOL_TCP,
|
|
NULL);
|
|
|
|
if (SendStatus != IP_PENDING)
|
|
FreeTCPHeader(HeaderBuffer);
|
|
|
|
(*LocalNetInfo.ipi_freeopts) (&NewInfo);
|
|
|
|
} else {
|
|
CTEFreeLockFromDPC(&pTWTCBTableLock[Partition]);
|
|
}
|
|
}
|
|
|
|
//* SendRSTFromTCB - Send a RST from a TCB.
|
|
//
|
|
// This is called during close when we need to send a RST.
|
|
//
|
|
// Input: RSTTcb - TCB from which RST is to be sent.
|
|
// RCE - Optional RCE to be used in sending.
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
void
|
|
SendRSTFromTCB(TCB * RSTTcb, RouteCacheEntry* RCE)
|
|
{
|
|
PNDIS_BUFFER HeaderBuffer;
|
|
TCPHeader *RSTHeader;
|
|
IP_STATUS SendStatus;
|
|
|
|
CTEStructAssert(RSTTcb, tcb);
|
|
ASSERT(RSTTcb->tcb_state == TCB_CLOSED);
|
|
|
|
HeaderBuffer = GetTCPHeader(&RSTHeader);
|
|
|
|
if (HeaderBuffer != NULL) {
|
|
SeqNum RSTSeq;
|
|
|
|
RSTHeader = (TCPHeader *) ((PUCHAR)RSTHeader + LocalNetInfo.ipi_hsize);
|
|
|
|
NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
|
|
|
|
RSTHeader->tcp_src = RSTTcb->tcb_sport;
|
|
RSTHeader->tcp_dest = RSTTcb->tcb_dport;
|
|
|
|
// If the remote peer has a window of 0, send with a seq. # equal
|
|
// to senduna so he'll accept it. Otherwise send with send max.
|
|
if (RSTTcb->tcb_sendwin != 0)
|
|
RSTSeq = RSTTcb->tcb_sendmax;
|
|
else
|
|
RSTSeq = RSTTcb->tcb_senduna;
|
|
|
|
RSTHeader->tcp_seq = net_long(RSTSeq);
|
|
RSTHeader->tcp_ack = net_long(RSTTcb->tcb_rcvnext);
|
|
RSTHeader->tcp_flags = MAKE_TCP_FLAGS(sizeof(TCPHeader) / sizeof(ulong),
|
|
TCP_FLAG_RST | TCP_FLAG_ACK);
|
|
|
|
RSTHeader->tcp_window = 0;
|
|
RSTHeader->tcp_urgent = 0;
|
|
RSTHeader->tcp_xsum = 0;
|
|
|
|
// Recompute pseudo checksum as this will
|
|
// not be valid when connection is disconnected
|
|
// in pre-accept case.
|
|
|
|
RSTHeader->tcp_xsum =
|
|
~XsumSendChain(PHXSUM(RSTTcb->tcb_saddr,
|
|
RSTTcb->tcb_daddr,
|
|
PROTOCOL_TCP,
|
|
sizeof(TCPHeader)),
|
|
HeaderBuffer);
|
|
|
|
|
|
RSTTcb->tcb_opt.ioi_TcpChksum = 0;
|
|
|
|
TCPSIncrementOutSegCount();
|
|
TStats.ts_outrsts++;
|
|
SendStatus = (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
|
|
NULL,
|
|
HeaderBuffer,
|
|
sizeof(TCPHeader),
|
|
RSTTcb->tcb_daddr,
|
|
RSTTcb->tcb_saddr,
|
|
&RSTTcb->tcb_opt,
|
|
RCE,
|
|
PROTOCOL_TCP,
|
|
NULL);
|
|
|
|
if (SendStatus != IP_PENDING)
|
|
FreeTCPHeader(HeaderBuffer);
|
|
}
|
|
return;
|
|
|
|
}
|
|
//* SendRSTFromHeader - Send a RST back, based on a header.
|
|
//
|
|
// Called when we need to send a RST, but don't necessarily have a TCB.
|
|
//
|
|
// Input: TCPH - TCP header to be RST.
|
|
// Length - Length of the incoming segment.
|
|
// Dest - Destination IP address for RST.
|
|
// Src - Source IP address for RST.
|
|
// OptInfo - IP Options to use on RST.
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
void
|
|
SendRSTFromHeader(TCPHeader UNALIGNED * TCPH, uint Length, IPAddr Dest,
|
|
IPAddr Src, IPOptInfo * OptInfo)
|
|
{
|
|
PNDIS_BUFFER Buffer;
|
|
TCPHeader *RSTHdr;
|
|
IPOptInfo NewInfo;
|
|
IP_STATUS SendStatus;
|
|
|
|
if (TCPH->tcp_flags & TCP_FLAG_RST)
|
|
return;
|
|
|
|
Buffer = GetTCPHeader(&RSTHdr);
|
|
|
|
if (Buffer != NULL) {
|
|
// Got a buffer. Fill in the header so as to make it believable to
|
|
// the remote guy, and send it.
|
|
|
|
RSTHdr = (TCPHeader *) ((PUCHAR)RSTHdr + LocalNetInfo.ipi_hsize);
|
|
|
|
NDIS_BUFFER_LINKAGE(Buffer) = NULL;
|
|
|
|
if (TCPH->tcp_flags & TCP_FLAG_SYN)
|
|
Length++;
|
|
|
|
if (TCPH->tcp_flags & TCP_FLAG_FIN)
|
|
Length++;
|
|
|
|
if (TCPH->tcp_flags & TCP_FLAG_ACK) {
|
|
RSTHdr->tcp_seq = TCPH->tcp_ack;
|
|
RSTHdr->tcp_ack = TCPH->tcp_ack;
|
|
RSTHdr->tcp_flags =
|
|
MAKE_TCP_FLAGS(sizeof(TCPHeader) / sizeof(ulong), TCP_FLAG_RST);
|
|
} else {
|
|
SeqNum TempSeq;
|
|
|
|
RSTHdr->tcp_seq = 0;
|
|
TempSeq = net_long(TCPH->tcp_seq);
|
|
TempSeq += Length;
|
|
RSTHdr->tcp_ack = net_long(TempSeq);
|
|
RSTHdr->tcp_flags =
|
|
MAKE_TCP_FLAGS(sizeof(TCPHeader) / sizeof(ulong),
|
|
TCP_FLAG_RST | TCP_FLAG_ACK);
|
|
}
|
|
|
|
RSTHdr->tcp_window = 0;
|
|
RSTHdr->tcp_urgent = 0;
|
|
RSTHdr->tcp_dest = TCPH->tcp_src;
|
|
RSTHdr->tcp_src = TCPH->tcp_dest;
|
|
RSTHdr->tcp_xsum = 0;
|
|
|
|
RSTHdr->tcp_xsum =
|
|
~XsumSendChain(PHXSUM(Src, Dest, PROTOCOL_TCP, sizeof(TCPHeader)),
|
|
Buffer);
|
|
|
|
(*LocalNetInfo.ipi_initopts) (&NewInfo);
|
|
|
|
if (OptInfo->ioi_options != NULL)
|
|
(*LocalNetInfo.ipi_updateopts)(OptInfo, &NewInfo, Dest,
|
|
NULL_IP_ADDR);
|
|
|
|
TCPSIncrementOutSegCount();
|
|
TStats.ts_outrsts++;
|
|
SendStatus = (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
|
|
NULL,
|
|
Buffer,
|
|
sizeof(TCPHeader),
|
|
Dest,
|
|
Src,
|
|
&NewInfo,
|
|
NULL,
|
|
PROTOCOL_TCP,
|
|
NULL);
|
|
|
|
if (SendStatus != IP_PENDING)
|
|
FreeTCPHeader(Buffer);
|
|
|
|
(*LocalNetInfo.ipi_freeopts) (&NewInfo);
|
|
}
|
|
}
|
|
|
|
//* GoToEstab - Transition to the established state.
|
|
//
|
|
// Called when we are going to the established state and need to finish up
|
|
// initializing things that couldn't be done until now. We assume the TCB
|
|
// lock is held by the caller on the TCB we're called with.
|
|
//
|
|
// Input: EstabTCB - TCB to transition.
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
void
|
|
GoToEstab(TCB * EstabTCB)
|
|
{
|
|
uchar DType;
|
|
ushort MSS;
|
|
|
|
// Initialize our slow start and congestion control variables.
|
|
EstabTCB->tcb_cwin = 2 * EstabTCB->tcb_mss;
|
|
EstabTCB->tcb_ssthresh = 0xffffffff;
|
|
|
|
EstabTCB->tcb_state = TCB_ESTAB;
|
|
|
|
if (SynAttackProtect && EstabTCB->tcb_rce == NULL) {
|
|
(*LocalNetInfo.ipi_openrce)(EstabTCB->tcb_daddr, EstabTCB->tcb_saddr,
|
|
&EstabTCB->tcb_rce, &DType, &MSS,
|
|
&EstabTCB->tcb_opt);
|
|
}
|
|
|
|
|
|
// We're in established. We'll subtract one from slow count for this fact,
|
|
// and if the slowcount goes to 0 we'll move onto the fast path.
|
|
|
|
if (--(EstabTCB->tcb_slowcount) == 0)
|
|
EstabTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
|
|
|
|
InterlockedIncrement((PLONG)&TStats.ts_currestab);
|
|
|
|
EstabTCB->tcb_flags &= ~ACTIVE_OPEN; // Turn off the active opening flag.
|
|
|
|
// Start the Keep-Alive timer if necessary.
|
|
if ((EstabTCB->tcb_flags & KEEPALIVE) && EstabTCB->tcb_conn) {
|
|
START_TCB_TIMER_R(EstabTCB, KA_TIMER,
|
|
EstabTCB->tcb_conn->tc_tcbkatime);
|
|
EstabTCB->tcb_kacount = 0;
|
|
}
|
|
}
|
|
|
|
//* InitSendState - Initialize the send state of a connection.
|
|
//
|
|
// Called during connection establishment to initialize our send state.
|
|
// (In this case, this refers to all information we'll put on the wire as
|
|
// well as pure send state). We pick an ISS, set up a rexmit timer value,
|
|
// etc. We assume the tcb_lock is held on the TCB when we are called.
|
|
//
|
|
// Input: NewTCB - TCB to be set up.
|
|
//
|
|
// Returns: Nothing.
|
|
void
|
|
InitSendState(TCB * NewTCB)
|
|
{
|
|
CTEStructAssert(NewTCB, tcb);
|
|
|
|
NewTCB->tcb_senduna = NewTCB->tcb_sendnext;
|
|
NewTCB->tcb_sendmax = NewTCB->tcb_sendnext;
|
|
NewTCB->tcb_error = IP_SUCCESS;
|
|
|
|
// Initialize pseudo-header xsum.
|
|
NewTCB->tcb_phxsum = PHXSUM(NewTCB->tcb_saddr, NewTCB->tcb_daddr,
|
|
PROTOCOL_TCP, 0);
|
|
|
|
// Initialize retransmit and delayed ack stuff.
|
|
NewTCB->tcb_rexmitcnt = 0;
|
|
NewTCB->tcb_rtt = 0;
|
|
NewTCB->tcb_smrtt = 0;
|
|
|
|
|
|
NewTCB->tcb_delta = MS_TO_TICKS(6000);
|
|
NewTCB->tcb_rexmit = MS_TO_TICKS(3000);
|
|
|
|
if (NewTCB->tcb_rce) {
|
|
|
|
//
|
|
// InitialRtt can be as low as 300msec to enable
|
|
// certain scenarios to work correctly.
|
|
//
|
|
if (NewTCB->tcb_rce->rce_TcpInitialRTT &&
|
|
NewTCB->tcb_rce->rce_TcpInitialRTT > 3) {
|
|
|
|
NewTCB->tcb_delta =
|
|
MS_TO_TICKS(NewTCB->tcb_rce->rce_TcpInitialRTT * 2);
|
|
NewTCB->tcb_rexmit =
|
|
MS_TO_TICKS(NewTCB->tcb_rce->rce_TcpInitialRTT);
|
|
}
|
|
}
|
|
|
|
STOP_TCB_TIMER_R(NewTCB, RXMIT_TIMER);
|
|
STOP_TCB_TIMER_R(NewTCB, DELACK_TIMER);
|
|
|
|
|
|
}
|
|
|
|
//* TCPStatus - Handle a status indication.
|
|
//
|
|
// This is the TCP status handler, called by IP when a status event
|
|
// occurs. For most of these we do nothing. For certain severe status
|
|
// events we will mark the local address as invalid.
|
|
//
|
|
// Entry: StatusType - Type of status (NET or HW). NET status
|
|
// is usually caused by a received ICMP
|
|
// message. HW status indicate a HW
|
|
// problem.
|
|
// StatusCode - Code identifying IP_STATUS.
|
|
// OrigDest - If this is NET status, the original dest. of
|
|
// DG that triggered it.
|
|
// OrigSrc - " " " " " , the original src.
|
|
// Src - IP address of status originator (could be local
|
|
// or remote).
|
|
// Param - Additional information for status - i.e. the
|
|
// param field of an ICMP message.
|
|
// Data - Data pertaining to status - for NET status, this
|
|
// is the first 8 bytes of the original DG.
|
|
//
|
|
// Returns: Nothing
|
|
//
|
|
void
|
|
TCPStatus(uchar StatusType, IP_STATUS StatusCode, IPAddr OrigDest,
|
|
IPAddr OrigSrc, IPAddr Src, ulong Param, void *Data)
|
|
{
|
|
CTELockHandle TCBHandle;
|
|
TCB *StatusTCB;
|
|
TCPHeader UNALIGNED *Header = (TCPHeader UNALIGNED *) Data;
|
|
SeqNum DropSeq;
|
|
uint index;
|
|
|
|
// Handle NET status codes differently from HW status codes.
|
|
if (StatusType == IP_NET_STATUS) {
|
|
// It's a NET code. Find a matching TCB.
|
|
StatusTCB = FindTCB(OrigSrc, OrigDest, Header->tcp_dest,
|
|
Header->tcp_src, &TCBHandle, FALSE, &index);
|
|
if (StatusTCB != NULL) {
|
|
// Found one. Get the lock on it, and continue.
|
|
CTEStructAssert(StatusTCB, tcb);
|
|
// Make sure the TCB is in a state that is interesting.
|
|
if (StatusTCB->tcb_state == TCB_CLOSED ||
|
|
StatusTCB->tcb_state == TCB_TIME_WAIT ||
|
|
CLOSING(StatusTCB)) {
|
|
CTEFreeLock(&StatusTCB->tcb_lock, TCBHandle);
|
|
return;
|
|
}
|
|
switch (StatusCode) {
|
|
// Hard errors - Destination protocol unreachable. We treat
|
|
// these as fatal errors. Close the connection now.
|
|
case IP_DEST_PROT_UNREACHABLE:
|
|
StatusTCB->tcb_error = StatusCode;
|
|
REFERENCE_TCB(StatusTCB);
|
|
TryToCloseTCB(StatusTCB, TCB_CLOSE_UNREACH, TCBHandle);
|
|
|
|
RemoveTCBFromConn(StatusTCB);
|
|
NotifyOfDisc(StatusTCB, NULL,
|
|
MapIPError(StatusCode, TDI_DEST_UNREACHABLE),
|
|
NULL);
|
|
CTEGetLock(&StatusTCB->tcb_lock, &TCBHandle);
|
|
DerefTCB(StatusTCB, TCBHandle);
|
|
return;
|
|
break;
|
|
|
|
// Soft errors. Save the error in case it time out.
|
|
case IP_DEST_NET_UNREACHABLE:
|
|
case IP_DEST_HOST_UNREACHABLE:
|
|
case IP_DEST_PORT_UNREACHABLE:
|
|
|
|
case IP_BAD_ROUTE:
|
|
case IP_TTL_EXPIRED_TRANSIT:
|
|
case IP_TTL_EXPIRED_REASSEM:
|
|
case IP_PARAM_PROBLEM:
|
|
StatusTCB->tcb_error = StatusCode;
|
|
break;
|
|
|
|
case IP_PACKET_TOO_BIG:
|
|
|
|
// icmp new MTU is in ich_param=1
|
|
Param = net_short(Param >> 16);
|
|
StatusTCB->tcb_error = StatusCode;
|
|
// Fall through mtu change code
|
|
|
|
|
|
case IP_SPEC_MTU_CHANGE:
|
|
// A TCP datagram has triggered an MTU change. Figure out
|
|
// which connection it is, and update him to retransmit the
|
|
// segment. The Param value is the new MTU. We'll need to
|
|
// retransmit if the new MTU is less than our existing MTU
|
|
// and the sequence of the dropped packet is less than our
|
|
// current send next.
|
|
|
|
|
|
Param = Param - (sizeof(TCPHeader) +
|
|
StatusTCB->tcb_opt.ioi_optlength + sizeof(IPHeader));
|
|
|
|
DropSeq = net_long(Header->tcp_seq);
|
|
|
|
if (*(ushort *) & Param <= StatusTCB->tcb_mss &&
|
|
(SEQ_GTE(DropSeq, StatusTCB->tcb_senduna) &&
|
|
SEQ_LT(DropSeq, StatusTCB->tcb_sendnext))) {
|
|
|
|
// Need to initiate a retranmsit.
|
|
ResetSendNext(StatusTCB, DropSeq);
|
|
// Set the congestion window to allow only one packet.
|
|
// This may prevent us from sending anything if we
|
|
// didn't just set sendnext to senduna. This is OK,
|
|
// we'll retransmit later, or send when we get an ack.
|
|
StatusTCB->tcb_cwin = Param;
|
|
DelayAction(StatusTCB, NEED_OUTPUT);
|
|
PartitionDelayQProcessing(FALSE);
|
|
}
|
|
StatusTCB->tcb_mss =
|
|
(ushort) MIN(Param, (ulong) StatusTCB->tcb_remmss);
|
|
|
|
|
|
ASSERT(StatusTCB->tcb_mss > 0);
|
|
ValidateMSS(StatusTCB);
|
|
|
|
//
|
|
// Reset the Congestion Window if necessary
|
|
//
|
|
if (StatusTCB->tcb_cwin < StatusTCB->tcb_mss) {
|
|
StatusTCB->tcb_cwin = StatusTCB->tcb_mss;
|
|
|
|
//
|
|
// Make sure the slow start threshold is at least
|
|
// 2 segments
|
|
//
|
|
if (StatusTCB->tcb_ssthresh <
|
|
((uint) StatusTCB->tcb_mss * 2)
|
|
) {
|
|
StatusTCB->tcb_ssthresh = StatusTCB->tcb_mss * 2;
|
|
}
|
|
}
|
|
break;
|
|
|
|
// Source quench. This will cause us to reinitiate our
|
|
// slow start by resetting our congestion window and
|
|
// adjusting our slow start threshold.
|
|
case IP_SOURCE_QUENCH:
|
|
|
|
//
|
|
// Code is removed, since source quench messages can be
|
|
// misused to cause DoS attack.
|
|
//
|
|
break;
|
|
|
|
default:
|
|
ASSERT(0);
|
|
break;
|
|
}
|
|
|
|
CTEFreeLock(&StatusTCB->tcb_lock, TCBHandle);
|
|
|
|
} else {
|
|
// Couldn't find a matching TCB. Just free the lock and return.
|
|
}
|
|
|
|
} else if (StatusType == IP_RECONFIG_STATUS) {
|
|
|
|
if (StatusCode == IP_RECONFIG_SECFLTR) {
|
|
ControlSecurityFiltering(Param);
|
|
}
|
|
} else {
|
|
uint NewMTU;
|
|
|
|
// 'Hardware' or 'global' status. Figure out what to do.
|
|
switch (StatusCode) {
|
|
case IP_ADDR_DELETED:
|
|
// Local address has gone away. OrigDest is the IPAddr which is
|
|
// gone.
|
|
|
|
//
|
|
// Delete any security filters associated with this address
|
|
//
|
|
DeleteProtocolSecurityFilter(OrigDest, PROTOCOL_TCP);
|
|
|
|
break;
|
|
|
|
case IP_ADDR_ADDED:
|
|
|
|
//
|
|
// An address has materialized. OrigDest identifies the address.
|
|
// Data is a handle to the IP configuration information for the
|
|
// interface on which the address is instantiated.
|
|
//
|
|
AddProtocolSecurityFilter(OrigDest, PROTOCOL_TCP,
|
|
(NDIS_HANDLE) Data);
|
|
|
|
break;
|
|
|
|
case IP_MTU_CHANGE:
|
|
NewMTU = Param - sizeof(TCPHeader);
|
|
TCBWalk(SetTCBMTU, &OrigDest, &OrigSrc, &NewMTU);
|
|
break;
|
|
default:
|
|
ASSERT(0);
|
|
break;
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
//* FillTCPHeader - Fill the TCP header in.
|
|
//
|
|
// A utility routine to fill in the TCP header.
|
|
//
|
|
// Input: SendTCB - TCB to fill from.
|
|
// Header - Header to fill into.
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
void
|
|
FillTCPHeader(TCB * SendTCB, TCPHeader * Header)
|
|
{
|
|
ushort S;
|
|
ulong L;
|
|
|
|
Header->tcp_src = SendTCB->tcb_sport;
|
|
Header->tcp_dest = SendTCB->tcb_dport;
|
|
L = SendTCB->tcb_sendnext;
|
|
Header->tcp_seq = net_long(L);
|
|
L = SendTCB->tcb_rcvnext;
|
|
Header->tcp_ack = net_long(L);
|
|
Header->tcp_flags = 0x1050;
|
|
Header->tcp_xsum = 0;
|
|
Header->tcp_urgent = 0;
|
|
|
|
if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
|
|
|
|
ulong *ts_opt;
|
|
|
|
ts_opt = (ulong *)((uchar *) Header + 20);
|
|
//ts_opt = ts_opt + sizeof(TCPHeader);
|
|
|
|
*ts_opt++ = net_long(0x0101080A);
|
|
*ts_opt++ = net_long(TCPTime);
|
|
*ts_opt = net_long(SendTCB->tcb_tsrecent);
|
|
|
|
// Now the header is 32 bytes!!
|
|
Header->tcp_flags = 0x1080;
|
|
|
|
}
|
|
S = (ushort) (RcvWin(SendTCB) >> SendTCB->tcb_rcvwinscale);
|
|
Header->tcp_window = net_short(S);
|
|
|
|
}
|
|
|
|
//* ClassifyPacket - Classifies packets for GPC flow.
|
|
//
|
|
//
|
|
// Input: SendTCB - TCB of data/control packet to classify.
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
void
|
|
ClassifyPacket(
|
|
TCB *SendTCB
|
|
)
|
|
{
|
|
#if GPC
|
|
|
|
//
|
|
// clear the precedence bits and get ready to be set
|
|
// according to the service type
|
|
//
|
|
|
|
if (DisableUserTOSSetting)
|
|
SendTCB->tcb_opt.ioi_tos &= TOS_MASK;
|
|
|
|
if (SendTCB->tcb_rce && GPCcfInfo) {
|
|
|
|
struct QosCfTransportInfo TransportInfo = {0, 0};
|
|
GPC_STATUS status = STATUS_SUCCESS;
|
|
GPC_IP_PATTERN Pattern;
|
|
|
|
IF_TCPDBG(TCP_DEBUG_GPC)
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSend: Classifying packet TCP %x\n", SendTCB));
|
|
|
|
Pattern.SrcAddr = SendTCB->tcb_saddr;
|
|
Pattern.DstAddr = SendTCB->tcb_daddr;
|
|
Pattern.ProtocolId = PROTOCOL_TCP;
|
|
Pattern.gpcSrcPort = SendTCB->tcb_sport;
|
|
Pattern.gpcDstPort = SendTCB->tcb_dport;
|
|
if (SendTCB->tcb_GPCCachedRTE != (void *)SendTCB->tcb_rce->rce_rte) {
|
|
|
|
//
|
|
// first time we use this RTE, or it has been changed
|
|
// since the last send
|
|
//
|
|
|
|
if (GetIFAndLink(SendTCB->tcb_rce, &SendTCB->tcb_GPCCachedIF,
|
|
(IPAddr *) & SendTCB->tcb_GPCCachedLink) ==
|
|
STATUS_SUCCESS) {
|
|
|
|
SendTCB->tcb_GPCCachedRTE = (void *)SendTCB->tcb_rce->rce_rte;
|
|
}
|
|
//
|
|
// invaludate the classification handle
|
|
//
|
|
|
|
SendTCB->tcb_opt.ioi_GPCHandle = 0;
|
|
}
|
|
Pattern.InterfaceId.InterfaceId = SendTCB->tcb_GPCCachedIF;
|
|
Pattern.InterfaceId.LinkId = SendTCB->tcb_GPCCachedLink;
|
|
|
|
IF_TCPDBG(TCP_DEBUG_GPC)
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSend: IF=%x Link=%x\n",
|
|
Pattern.InterfaceId.InterfaceId,
|
|
Pattern.InterfaceId.LinkId));
|
|
|
|
if (!SendTCB->tcb_opt.ioi_GPCHandle) {
|
|
|
|
IF_TCPDBG(TCP_DEBUG_GPC)
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPsend: Classification Handle is NULL, getting one now.\n"));
|
|
|
|
status =
|
|
GpcEntries.GpcClassifyPatternHandler(
|
|
(GPC_HANDLE)hGpcClient[GPC_CF_QOS],
|
|
GPC_PROTOCOL_TEMPLATE_IP,
|
|
&Pattern,
|
|
NULL, // context
|
|
(PCLASSIFICATION_HANDLE)&SendTCB->tcb_opt.ioi_GPCHandle,
|
|
0,
|
|
NULL,
|
|
FALSE);
|
|
|
|
}
|
|
|
|
// Only if QOS patterns exist, we get the TOS bits out.
|
|
if (NT_SUCCESS(status) && GpcCfCounts[GPC_CF_QOS]) {
|
|
|
|
status =
|
|
GpcEntries.GpcGetUlongFromCfInfoHandler(
|
|
(GPC_HANDLE) hGpcClient[GPC_CF_QOS],
|
|
SendTCB->tcb_opt.ioi_GPCHandle,
|
|
FIELD_OFFSET(CF_INFO_QOS, TransportInformation),
|
|
(PULONG)&TransportInfo);
|
|
|
|
// It is likely that the pattern has gone by now
|
|
// and the handle that we are caching is INVALID.
|
|
// We need to pull up a new handle and get the
|
|
// TOS bit again.
|
|
if (STATUS_INVALID_HANDLE == status) {
|
|
|
|
IF_TCPDBG(TCP_DEBUG_GPC)
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPsend: Classification Handle is NULL, "
|
|
"getting one now.\n"));
|
|
|
|
SendTCB->tcb_opt.ioi_GPCHandle = 0;
|
|
|
|
status =
|
|
GpcEntries.GpcClassifyPatternHandler(
|
|
(GPC_HANDLE) hGpcClient[GPC_CF_QOS],
|
|
GPC_PROTOCOL_TEMPLATE_IP,
|
|
&Pattern,
|
|
NULL, // context
|
|
(PCLASSIFICATION_HANDLE)&SendTCB->tcb_opt.ioi_GPCHandle,
|
|
0,
|
|
NULL,
|
|
FALSE);
|
|
|
|
//
|
|
// Only if QOS patterns exist, we get the TOS bits out.
|
|
//
|
|
if (NT_SUCCESS(status)) {
|
|
|
|
status =
|
|
GpcEntries.GpcGetUlongFromCfInfoHandler(
|
|
(GPC_HANDLE) hGpcClient[GPC_CF_QOS],
|
|
SendTCB->tcb_opt.ioi_GPCHandle,
|
|
FIELD_OFFSET(CF_INFO_QOS, TransportInformation),
|
|
(PULONG)&TransportInfo);
|
|
}
|
|
}
|
|
|
|
//
|
|
// Perhaps something needs to be done if GPC_CF_IPSEC has non-zero patterns.
|
|
//
|
|
|
|
//
|
|
// Set the TOS bit now.
|
|
//
|
|
IF_TCPDBG(TCP_DEBUG_GPC)
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPsend: ServiceType(%d)=%d\n",
|
|
FIELD_OFFSET(CF_INFO_QOS, TransportInformation)));
|
|
|
|
if (status == STATUS_SUCCESS) {
|
|
|
|
//
|
|
// Get the TOS value and the types of allowed offloads.
|
|
//
|
|
SendTCB->tcb_opt.ioi_tos |= TransportInfo.ToSValue;
|
|
SendTCB->tcb_allowedoffloads = (USHORT)TransportInfo.AllowedOffloads;
|
|
|
|
//
|
|
// We are guaranteed for now that the other kind of offloads are
|
|
// never disabled, and hence, we won't check them on a per
|
|
// connection basis.
|
|
//
|
|
ASSERT((TransportInfo.AllowedOffloads | TCP_LARGE_SEND_OFFLOAD |
|
|
TCP_LARGE_SEND_TCPOPT_OFFLOAD |
|
|
TCP_LARGE_SEND_IPOPT_OFFLOAD) == TCP_IP_OFFLOAD_TYPES);
|
|
}
|
|
|
|
IF_TCPDBG(TCP_DEBUG_GPC)
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPsend: TOS set to 0x%x\n",
|
|
SendTCB->tcb_opt.ioi_tos));
|
|
}
|
|
|
|
}
|
|
#endif
|
|
|
|
}
|
|
|
|
BOOLEAN
|
|
ProcessSend(TCB *SendTCB, SendCmpltContext *SCC, uint *pSendLength, uint AmtUnsent,
|
|
TCPHeader *Header, int SendWin, PNDIS_BUFFER CurrentBuffer)
|
|
{
|
|
TCPSendReq *CurSend = SCC->scc_firstsend;
|
|
long Result;
|
|
uint AmountLeft = *pSendLength;
|
|
ulong PrevFlags;
|
|
Queue *Next;
|
|
SeqNum OldSeq;
|
|
|
|
if (*pSendLength != 0) {
|
|
|
|
do {
|
|
|
|
BOOLEAN DirectSend = FALSE;
|
|
ASSERT(CurSend->tsr_refcnt > 0);
|
|
|
|
Result = CTEInterlockedIncrementLong(&(CurSend->tsr_refcnt));
|
|
|
|
ASSERT(Result > 0);
|
|
SCC->scc_count++;
|
|
|
|
if (SendTCB->tcb_sendofs == 0 &&
|
|
(SendTCB->tcb_sendsize <= AmountLeft) &&
|
|
(SCC->scc_tbufcount == 0) &&
|
|
(CurSend->tsr_lastbuf == NULL)) {
|
|
|
|
ulong length = 0;
|
|
PNDIS_BUFFER tmp = SendTCB->tcb_sendbuf;
|
|
|
|
while (tmp) {
|
|
length += NdisBufferLength(tmp);
|
|
tmp = NDIS_BUFFER_LINKAGE(tmp);
|
|
}
|
|
|
|
// If the requested length is
|
|
// more than in this mdl chain
|
|
// we can use fast path
|
|
|
|
if (AmountLeft >= length) {
|
|
DirectSend = TRUE;
|
|
}
|
|
}
|
|
|
|
if (DirectSend) {
|
|
|
|
NDIS_BUFFER_LINKAGE(CurrentBuffer) = SendTCB->tcb_sendbuf;
|
|
|
|
do {
|
|
SCC->scc_ubufcount++;
|
|
CurrentBuffer =
|
|
NDIS_BUFFER_LINKAGE(CurrentBuffer);
|
|
} while (NDIS_BUFFER_LINKAGE(CurrentBuffer) != NULL);
|
|
|
|
|
|
CurSend->tsr_lastbuf = CurrentBuffer;
|
|
AmountLeft -= SendTCB->tcb_sendsize;
|
|
SendTCB->tcb_sendsize = 0;
|
|
} else {
|
|
|
|
uint AmountToDup;
|
|
PNDIS_BUFFER NewBuf, Buf;
|
|
uint Offset;
|
|
NDIS_STATUS NStatus;
|
|
uint Length;
|
|
|
|
// Either the current send has more data than
|
|
// or the offset is not zero.
|
|
// In either case we'll need to loop
|
|
// through the current send, allocating buffers.
|
|
|
|
Buf = SendTCB->tcb_sendbuf;
|
|
Offset = SendTCB->tcb_sendofs;
|
|
|
|
do {
|
|
ASSERT(Buf != NULL);
|
|
|
|
Length = NdisBufferLength(Buf);
|
|
|
|
ASSERT((Offset < Length) ||
|
|
(Offset == 0 && Length == 0));
|
|
|
|
// Adjust the length for the offset into
|
|
// this buffer.
|
|
|
|
Length -= Offset;
|
|
|
|
AmountToDup = MIN(AmountLeft, Length);
|
|
|
|
NdisCopyBuffer(&NStatus, &NewBuf, TCPSendBufferPool, Buf,
|
|
Offset, AmountToDup);
|
|
|
|
if (NStatus == NDIS_STATUS_SUCCESS) {
|
|
|
|
SCC->scc_tbufcount++;
|
|
|
|
NDIS_BUFFER_LINKAGE(CurrentBuffer) = NewBuf;
|
|
|
|
CurrentBuffer = NewBuf;
|
|
|
|
if (AmountToDup >= Length) {
|
|
// Exhausted this buffer.
|
|
Buf = NDIS_BUFFER_LINKAGE(Buf);
|
|
Offset = 0;
|
|
} else {
|
|
Offset += AmountToDup;
|
|
ASSERT(Offset < NdisBufferLength(Buf));
|
|
|
|
}
|
|
|
|
SendTCB->tcb_sendsize -= AmountToDup;
|
|
AmountLeft -= AmountToDup;
|
|
} else {
|
|
// Couldn't allocate a buffer. If
|
|
// the packet is already partly built,
|
|
// send what we've got, otherwise
|
|
// bail out.
|
|
if (SCC->scc_tbufcount == 0 &&
|
|
SCC->scc_ubufcount == 0) {
|
|
return FALSE;
|
|
}
|
|
*pSendLength -= AmountLeft;
|
|
AmountLeft = 0;
|
|
}
|
|
|
|
} while (AmountLeft && SendTCB->tcb_sendsize);
|
|
|
|
SendTCB->tcb_sendbuf = Buf;
|
|
SendTCB->tcb_sendofs = Offset;
|
|
}
|
|
|
|
if (CurSend->tsr_flags & TSR_FLAG_URG) {
|
|
ushort UP;
|
|
// This send is urgent data. We need to figure
|
|
// out what the urgent data pointer should be.
|
|
// We know sendnext is the starting sequence
|
|
// number of the frame, and that at the top of
|
|
// this do loop sendnext identified a byte in
|
|
// the CurSend at that time. We advanced CurSend
|
|
// at the same rate we've decremented
|
|
// AmountLeft (AmountToSend - AmountLeft ==
|
|
// AmountBuilt), so sendnext +
|
|
// (AmountToSend - AmountLeft) identifies a byte
|
|
// in the current value of CurSend, and that
|
|
// quantity plus tcb_sendsize is the sequence
|
|
// number one beyond the current send.
|
|
UP =
|
|
(ushort) (*pSendLength - AmountLeft) +
|
|
(ushort) SendTCB->tcb_sendsize -
|
|
((SendTCB->tcb_flags & BSD_URGENT) ? 0 : 1);
|
|
Header->tcp_urgent = net_short(UP);
|
|
Header->tcp_flags |= TCP_FLAG_URG;
|
|
}
|
|
|
|
if (SendTCB->tcb_sendsize == 0) {
|
|
|
|
// We've exhausted this send. Set the PUSH bit.
|
|
|
|
Header->tcp_flags |= TCP_FLAG_PUSH;
|
|
PrevFlags = CurSend->tsr_flags;
|
|
Next = QNEXT(&CurSend->tsr_req.tr_q);
|
|
if (Next != QEND(&SendTCB->tcb_sendq)) {
|
|
CurSend = STRUCT_OF(TCPSendReq,
|
|
QSTRUCT(TCPReq, Next,
|
|
tr_q), tsr_req);
|
|
CTEStructAssert(CurSend, tsr);
|
|
SendTCB->tcb_sendsize =
|
|
CurSend->tsr_unasize;
|
|
SendTCB->tcb_sendofs = CurSend->tsr_offset;
|
|
SendTCB->tcb_sendbuf = CurSend->tsr_buffer;
|
|
SendTCB->tcb_cursend = CurSend;
|
|
|
|
// Check the urgent flags. We can't combine
|
|
// new urgent data on to the end of old
|
|
// non-urgent data.
|
|
if ((PrevFlags & TSR_FLAG_URG) && !
|
|
(CurSend->tsr_flags & TSR_FLAG_URG))
|
|
break;
|
|
} else {
|
|
ASSERT(AmountLeft == 0);
|
|
SendTCB->tcb_cursend = NULL;
|
|
SendTCB->tcb_sendbuf = NULL;
|
|
}
|
|
|
|
}
|
|
} while (AmountLeft != 0);
|
|
|
|
}
|
|
|
|
|
|
// Update the sequence numbers, and start a RTT
|
|
// measurement if needed.
|
|
|
|
// Adjust for what we're really going to send.
|
|
*pSendLength -= AmountLeft;
|
|
|
|
OldSeq = SendTCB->tcb_sendnext;
|
|
SendTCB->tcb_sendnext += *pSendLength;
|
|
|
|
if (SEQ_EQ(OldSeq, SendTCB->tcb_sendmax)) {
|
|
|
|
// We're sending entirely new data.
|
|
// We can't advance sendmax once FIN_SENT is set.
|
|
|
|
ASSERT(!(SendTCB->tcb_flags & FIN_SENT));
|
|
|
|
SendTCB->tcb_sendmax = SendTCB->tcb_sendnext;
|
|
|
|
// We've advanced sendmax, so we must be sending
|
|
// some new data, so bump the outsegs counter.
|
|
|
|
TCPSIncrementOutSegCount();
|
|
|
|
if (SendTCB->tcb_rtt == 0) {
|
|
// No RTT running, so start one.
|
|
SendTCB->tcb_rtt = TCPTime;
|
|
SendTCB->tcb_rttseq = OldSeq;
|
|
}
|
|
} else {
|
|
|
|
// We have at least some retransmission.
|
|
|
|
if ((SendTCB->tcb_sendmax - OldSeq) > 1) {
|
|
TStats.ts_retranssegs++;
|
|
}
|
|
if (SEQ_GT(SendTCB->tcb_sendnext,
|
|
SendTCB->tcb_sendmax)) {
|
|
// But we also have some new data, so check the rtt stuff.
|
|
TCPSIncrementOutSegCount();
|
|
ASSERT(!(SendTCB->tcb_flags & FIN_SENT));
|
|
SendTCB->tcb_sendmax = SendTCB->tcb_sendnext;
|
|
|
|
if (SendTCB->tcb_rtt == 0) {
|
|
// No RTT running, so start one.
|
|
SendTCB->tcb_rtt = TCPTime;
|
|
SendTCB->tcb_rttseq = OldSeq;
|
|
}
|
|
}
|
|
}
|
|
|
|
// We've built the frame entirely. If we've send
|
|
// everything we have and there is a FIN pending,
|
|
// OR it in.
|
|
|
|
if (AmtUnsent == *pSendLength) {
|
|
if (SendTCB->tcb_flags & FIN_NEEDED) {
|
|
ASSERT(!(SendTCB->tcb_flags & FIN_SENT) ||
|
|
(SendTCB->tcb_sendnext ==
|
|
(SendTCB->tcb_sendmax - 1)));
|
|
// See if we still have room in the window for a FIN.
|
|
if (SendWin > (int)*pSendLength) {
|
|
Header->tcp_flags |= TCP_FLAG_FIN;
|
|
SendTCB->tcb_sendnext++;
|
|
SendTCB->tcb_sendmax =
|
|
SendTCB->tcb_sendnext;
|
|
SendTCB->tcb_flags |=
|
|
(FIN_SENT | FIN_OUTSTANDING);
|
|
SendTCB->tcb_flags &= ~FIN_NEEDED;
|
|
}
|
|
}
|
|
}
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
//* TCPSend - Send data from a TCP connection.
|
|
//
|
|
// This is the main 'send data' routine. We go into a loop, trying
|
|
// to send data until we can't for some reason. First we compute
|
|
// the useable window, use it to figure the amount we could send. If
|
|
// the amount we could send meets certain criteria we'll build a frame
|
|
// and send it, after setting any appropriate control bits. We assume
|
|
// the caller has put a reference on the TCB.
|
|
//
|
|
// Input: SendTCB - TCB to be sent from.
|
|
// TCBHandle - Lock handle for TCB.
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
void
|
|
TCPSend(TCB * SendTCB, CTELockHandle TCBHandle)
|
|
{
|
|
int SendWin; // Useable send window.
|
|
uint AmountToSend; // Amount to send this time.
|
|
uint AmountLeft;
|
|
TCPHeader *Header; // TCP header for a send.
|
|
PNDIS_BUFFER FirstBuffer, CurrentBuffer;
|
|
TCPSendReq *CurSend;
|
|
SendCmpltContext *SCC;
|
|
SeqNum OldSeq;
|
|
IP_STATUS SendStatus;
|
|
uint AmtOutstanding, AmtUnsent;
|
|
int ForceWin; // Window we're force to use.
|
|
BOOLEAN FullSegment;
|
|
BOOLEAN MoreToSend = FALSE;
|
|
uint SegmentsSent = 0;
|
|
BOOLEAN LargeSendOffload = FALSE;
|
|
BOOLEAN LargeSendFailed = FALSE;
|
|
uint MSS;
|
|
|
|
uint LargeSend, SentBytes;
|
|
void *Irp;
|
|
|
|
|
|
CTEStructAssert(SendTCB, tcb);
|
|
ASSERT(SendTCB->tcb_refcnt != 0);
|
|
|
|
ASSERT(*(int *)&SendTCB->tcb_sendwin >= 0);
|
|
ASSERT(*(int *)&SendTCB->tcb_cwin >= SendTCB->tcb_mss);
|
|
|
|
ASSERT(!(SendTCB->tcb_flags & FIN_OUTSTANDING) ||
|
|
(SendTCB->tcb_sendnext == SendTCB->tcb_sendmax));
|
|
|
|
if (!(SendTCB->tcb_flags & IN_TCP_SEND) &&
|
|
!(SendTCB->tcb_fastchk & TCP_FLAG_IN_RCV)) {
|
|
SendTCB->tcb_flags |= IN_TCP_SEND;
|
|
|
|
// We'll continue this loop until we send a FIN, or we break out
|
|
// internally for some other reason.
|
|
|
|
while (!(SendTCB->tcb_flags & FIN_OUTSTANDING)) {
|
|
|
|
CheckTCBSends(SendTCB);
|
|
SegmentsSent++;
|
|
|
|
if (SegmentsSent > MaxSendSegments) {
|
|
|
|
// We are throttled by max segments that can be sent in
|
|
// this loop. Comeback later
|
|
|
|
MoreToSend = TRUE;
|
|
break;
|
|
}
|
|
AmtOutstanding = (uint) (SendTCB->tcb_sendnext -
|
|
SendTCB->tcb_senduna);
|
|
AmtUnsent = SendTCB->tcb_unacked - AmtOutstanding;
|
|
|
|
ASSERT(*(int *)&AmtUnsent >= 0);
|
|
|
|
SendWin = (int)(MIN(SendTCB->tcb_sendwin, SendTCB->tcb_cwin) -
|
|
AmtOutstanding);
|
|
|
|
// if this send is after the fast recovery
|
|
// and sendwin is zero because of amt outstanding
|
|
// then, at least force 1 segment to prevent delayed
|
|
// ack timeouts from the remote
|
|
|
|
if (SendTCB->tcb_force) {
|
|
SendTCB->tcb_force = 0;
|
|
if (SendWin < SendTCB->tcb_mss) {
|
|
|
|
SendWin = SendTCB->tcb_mss;
|
|
}
|
|
}
|
|
// Since the window could have shrank, need to get it to zero at
|
|
// least.
|
|
ForceWin = (int)((SendTCB->tcb_flags & FORCE_OUTPUT) >>
|
|
FORCE_OUT_SHIFT);
|
|
SendWin = MAX(SendWin, ForceWin);
|
|
|
|
LargeSend = MIN((uint) SendWin, AmtUnsent);
|
|
LargeSend = MIN(LargeSend, SendTCB->tcb_mss * MaxSendSegments);
|
|
|
|
AmountToSend =
|
|
MIN(MIN((uint) SendWin, AmtUnsent), SendTCB->tcb_mss);
|
|
|
|
ASSERT(SendTCB->tcb_mss > 0);
|
|
|
|
// Time stamp option addition might force us to cut the data
|
|
// to be sent by 12 bytes.
|
|
|
|
FullSegment = FALSE;
|
|
|
|
if ((SendTCB->tcb_tcpopts & TCP_FLAG_TS) &&
|
|
(AmountToSend + ALIGNED_TS_OPT_SIZE >= SendTCB->tcb_mss)) {
|
|
AmountToSend = SendTCB->tcb_mss - ALIGNED_TS_OPT_SIZE;
|
|
FullSegment = TRUE;
|
|
} else {
|
|
|
|
if (AmountToSend == SendTCB->tcb_mss)
|
|
FullSegment = TRUE;
|
|
}
|
|
|
|
|
|
// We will send a segment if
|
|
//
|
|
// 1. The segment size == mss
|
|
// 2. This is the only segment to be sent
|
|
// 3. FIN is set and this is the last segment
|
|
// 4. FORCE_OUTPUT is set
|
|
// 5. Amount to be sent is >= MSS/2
|
|
|
|
if (FullSegment ||
|
|
|
|
|
|
(AmountToSend != 0 && AmountToSend == AmtUnsent) ||
|
|
(SendWin != 0 &&
|
|
(((SendTCB->tcb_flags & FIN_NEEDED) &&
|
|
(AmtUnsent <= SendTCB->tcb_mss)) ||
|
|
(SendTCB->tcb_flags & FORCE_OUTPUT) ||
|
|
AmountToSend >= (SendTCB->tcb_maxwin / 2)))) {
|
|
|
|
//
|
|
// Set MSS first.
|
|
//
|
|
|
|
if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
|
|
MSS = SendTCB->tcb_mss - ALIGNED_TS_OPT_SIZE;
|
|
} else {
|
|
MSS = SendTCB->tcb_mss;
|
|
}
|
|
// It's OK to send something. Try to get a header buffer now.
|
|
FirstBuffer = GetTCPHeaderAtDpcLevel(&Header);
|
|
if (FirstBuffer != NULL) {
|
|
|
|
// Got a header buffer. Loop through the sends on the TCB,
|
|
// building a frame.
|
|
CurrentBuffer = FirstBuffer;
|
|
CurSend = SendTCB->tcb_cursend;
|
|
|
|
Header =
|
|
(TCPHeader *)((PUCHAR)Header + LocalNetInfo.ipi_hsize);
|
|
|
|
|
|
// allow room for filling time stamp options (12 bytes)
|
|
|
|
if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
|
|
|
|
NdisAdjustBufferLength(FirstBuffer,
|
|
sizeof(TCPHeader) + ALIGNED_TS_OPT_SIZE);
|
|
|
|
SCC = (SendCmpltContext *) (Header + 1);
|
|
SCC = (SendCmpltContext *) ((uchar *) SCC + ALIGNED_TS_OPT_SIZE);
|
|
|
|
} else {
|
|
|
|
SCC = (SendCmpltContext *) (Header + 1);
|
|
}
|
|
|
|
SCC = ALIGN_UP_POINTER(SCC, PVOID);
|
|
#if DBG
|
|
SCC->scc_sig = scc_signature;
|
|
#endif
|
|
|
|
FillTCPHeader(SendTCB, Header);
|
|
|
|
SCC->scc_ubufcount = 0;
|
|
SCC->scc_tbufcount = 0;
|
|
SCC->scc_count = 0;
|
|
|
|
SCC->scc_LargeSend = 0;
|
|
|
|
// Check if RCE has large send capability and, if so,
|
|
// attempt to offload segmentation to the hardware.
|
|
// * only offload if there is more than 1 segment's worth
|
|
// of data.
|
|
// * only offload if the number of segments is greater than
|
|
// the minimum number of segments the adapter is willing
|
|
// to offload.
|
|
// * only offload if it is allowed by all the entities of
|
|
// known classification families.
|
|
// * ( i.e. if TCP or IP options need to be
|
|
// offloaded, we only offload if the adapter supports it)
|
|
//
|
|
|
|
if (!DisableLargeSendOffload &&
|
|
SendTCB->tcb_rce &&
|
|
(SendTCB->tcb_rce->rce_OffloadFlags &
|
|
TCP_LARGE_SEND_OFFLOAD) &&
|
|
(SendTCB->tcb_allowedoffloads &
|
|
TCP_LARGE_SEND_OFFLOAD) &&
|
|
(!(SendTCB->tcb_tcpopts & TCP_FLAG_TS) ||
|
|
(SendTCB->tcb_rce->rce_OffloadFlags &
|
|
TCP_LARGE_SEND_TCPOPT_OFFLOAD)) &&
|
|
(!SendTCB->tcb_opt.ioi_options ||
|
|
(SendTCB->tcb_rce->rce_OffloadFlags &
|
|
TCP_LARGE_SEND_IPOPT_OFFLOAD)) &&
|
|
!LargeSendFailed &&
|
|
(MSS < LargeSend) &&
|
|
(CurSend && (CurSend->tsr_lastbuf == NULL)) && !(CurSend->tsr_flags & TSR_FLAG_URG)) {
|
|
|
|
uint PartialSegment;
|
|
LargeSendOffload = TRUE;
|
|
LargeSend =
|
|
MIN(SendTCB->tcb_rce->rce_TcpLargeSend.MaxOffLoadSize,
|
|
LargeSend);
|
|
//
|
|
// Adjust LargeSend to make LSO path
|
|
// conform sender side silly window avoidance:
|
|
// 1) it is multiple of MSS
|
|
// 2) We are sending out everything we have
|
|
// 3) FORCE_OUTPUT is set
|
|
// 4) Amount to be sent is >= maximum window size /2
|
|
//
|
|
|
|
PartialSegment = LargeSend % MSS;
|
|
if ((PartialSegment != 0) &&
|
|
(LargeSend != AmtUnsent) &&
|
|
(!(SendTCB->tcb_flags & FORCE_OUTPUT)) &&
|
|
(PartialSegment < (SendTCB->tcb_maxwin / 2))) {
|
|
LargeSend -= PartialSegment;
|
|
}
|
|
|
|
//
|
|
// Offload only if the segments we have is greater than
|
|
// the minimum segment requirement of the NIC.
|
|
//
|
|
|
|
if (SendTCB->tcb_rce->rce_TcpLargeSend.MinSegmentCount >
|
|
(LargeSend + MSS - 1) / MSS ) {
|
|
LargeSendOffload = FALSE;
|
|
}
|
|
|
|
//
|
|
// LargeSend can not be zero.
|
|
//
|
|
|
|
if (LargeSend == 0) {
|
|
LargeSendOffload = FALSE;
|
|
}
|
|
} else {
|
|
LargeSendOffload = FALSE;
|
|
}
|
|
|
|
if (LargeSendOffload) {
|
|
|
|
IF_TCPDBG(TCP_DEBUG_OFFLOAD) {
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSend: tcb %x offload %d bytes at "
|
|
"seq %u ack %u win %u\n",
|
|
SendTCB, LargeSend, SendTCB->tcb_sendnext,
|
|
SendTCB->tcb_rcvnext, SendWin));
|
|
}
|
|
|
|
|
|
OldSeq = SendTCB->tcb_sendnext;
|
|
|
|
CTEStructAssert(CurSend, tsr);
|
|
SCC->scc_firstsend = CurSend;
|
|
|
|
if (!ProcessSend(SendTCB, SCC, &LargeSend, AmtUnsent, Header,
|
|
SendWin, CurrentBuffer)) {
|
|
goto error_oor1;
|
|
}
|
|
|
|
{
|
|
uint PHXsum = SendTCB->tcb_phxsum;
|
|
PHXsum = (((PHXsum << 16) | (PHXsum >> 16)) +
|
|
PHXsum) >> 16;
|
|
Header->tcp_xsum = (ushort) PHXsum;
|
|
}
|
|
|
|
|
|
SCC->scc_SendSize = LargeSend;
|
|
SCC->scc_ByteSent = 0;
|
|
SCC->scc_LargeSend = SendTCB;
|
|
REFERENCE_TCB(SendTCB);
|
|
#if DBG
|
|
SendTCB->tcb_LargeSend++;
|
|
#endif
|
|
SendTCB->tcb_rcvdsegs = 0;
|
|
|
|
if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
|
|
LargeSend +=
|
|
sizeof(TCPHeader) + ALIGNED_TS_OPT_SIZE;
|
|
} else {
|
|
LargeSend += sizeof(TCPHeader);
|
|
}
|
|
|
|
IF_TCPDBG(TCP_DEBUG_OFFLOAD) {
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSend: tcb %x large-send %d seq %u\n",
|
|
SendTCB, LargeSend, OldSeq));
|
|
}
|
|
|
|
ClassifyPacket(SendTCB);
|
|
|
|
CTEFreeLock(&SendTCB->tcb_lock, TCBHandle);
|
|
|
|
SendStatus =
|
|
(*LocalNetInfo.ipi_largexmit)(TCPProtInfo, SCC,
|
|
FirstBuffer,
|
|
LargeSend,
|
|
SendTCB->tcb_daddr,
|
|
SendTCB->tcb_saddr,
|
|
&SendTCB->tcb_opt,
|
|
SendTCB->tcb_rce,
|
|
PROTOCOL_TCP,
|
|
&SentBytes,
|
|
MSS);
|
|
|
|
SendTCB->tcb_error = SendStatus;
|
|
|
|
if (SendStatus != IP_PENDING) {
|
|
|
|
// Let TCPSendComplete hanlde partial sends
|
|
|
|
SCC->scc_ByteSent = SentBytes;
|
|
TCPSendComplete(SCC, FirstBuffer, IP_SUCCESS);
|
|
|
|
}
|
|
|
|
CTEGetLock(&SendTCB->tcb_lock, &TCBHandle);
|
|
|
|
if (SendStatus == IP_GENERAL_FAILURE) {
|
|
|
|
if (SEQ_GTE(OldSeq, SendTCB->tcb_senduna) &&
|
|
SEQ_LT(OldSeq, SendTCB->tcb_sendnext)) {
|
|
|
|
ResetSendNext(SendTCB, OldSeq);
|
|
}
|
|
LargeSendFailed = TRUE;
|
|
continue;
|
|
}
|
|
|
|
if (SendStatus == IP_PACKET_TOO_BIG) {
|
|
SeqNum NewSeq = OldSeq + SentBytes;
|
|
//Not everything got sent.
|
|
//Adjust for what is sent
|
|
if (SEQ_GTE(NewSeq, SendTCB->tcb_senduna) &&
|
|
SEQ_LT(NewSeq, SendTCB->tcb_sendnext)) {
|
|
ResetSendNext(SendTCB, NewSeq);
|
|
|
|
}
|
|
}
|
|
if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
|
|
|
|
START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
|
|
}
|
|
SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT |
|
|
FORCE_OUTPUT | SEND_AFTER_RCV);
|
|
|
|
DerefTCB(SendTCB, TCBHandle);
|
|
return;
|
|
|
|
}
|
|
|
|
// Normal path
|
|
|
|
AmountLeft = AmountToSend;
|
|
|
|
if (AmountToSend != 0) {
|
|
CTEStructAssert(CurSend, tsr);
|
|
SCC->scc_firstsend = CurSend;
|
|
} else {
|
|
|
|
// We're in the loop, but AmountToSend is 0. This
|
|
// should happen only when we're sending a FIN. Check
|
|
// this, and return if it's not true.
|
|
ASSERT(AmtUnsent == 0);
|
|
if (!(SendTCB->tcb_flags & FIN_NEEDED)) {
|
|
FreeTCPHeader(FirstBuffer);
|
|
break;
|
|
}
|
|
SCC->scc_firstsend = NULL;
|
|
NDIS_BUFFER_LINKAGE(FirstBuffer) = NULL;
|
|
}
|
|
|
|
OldSeq = SendTCB->tcb_sendnext;
|
|
|
|
if (!ProcessSend(SendTCB, SCC, &AmountToSend, AmtUnsent, Header,
|
|
SendWin, CurrentBuffer)) {
|
|
goto error_oor1;
|
|
}
|
|
|
|
AmountToSend += sizeof(TCPHeader);
|
|
|
|
|
|
SendTCB->tcb_flags &= ~(NEED_ACK | ACK_DELAYED |
|
|
FORCE_OUTPUT);
|
|
STOP_TCB_TIMER_R(SendTCB, DELACK_TIMER);
|
|
STOP_TCB_TIMER_R(SendTCB, SWS_TIMER);
|
|
SendTCB->tcb_rcvdsegs = 0;
|
|
|
|
if ( (SendTCB->tcb_flags & KEEPALIVE) && ( SendTCB->tcb_conn != NULL) )
|
|
START_TCB_TIMER_R(SendTCB, KA_TIMER, SendTCB->tcb_conn->tc_tcbkatime);
|
|
SendTCB->tcb_kacount = 0;
|
|
|
|
// We're all set. Xsum it and send it.
|
|
ClassifyPacket(SendTCB);
|
|
|
|
// Account for time stamp options
|
|
if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
|
|
|
|
if (SendTCB->tcb_rce &&
|
|
(SendTCB->tcb_rce->rce_OffloadFlags &
|
|
TCP_XMT_CHECKSUM_OFFLOAD) &&
|
|
(SendTCB->tcb_rce->rce_OffloadFlags &
|
|
TCP_CHECKSUM_OPT_OFFLOAD)) {
|
|
uint PHXsum =
|
|
SendTCB->tcb_phxsum +
|
|
(uint)net_short(AmountToSend + ALIGNED_TS_OPT_SIZE);
|
|
|
|
PHXsum = (((PHXsum << 16) | (PHXsum >> 16)) +
|
|
PHXsum) >> 16;
|
|
Header->tcp_xsum = (ushort) PHXsum;
|
|
SendTCB->tcb_opt.ioi_TcpChksum = 1;
|
|
#if DBG
|
|
DbgTcpSendHwChksumCount++;
|
|
#endif
|
|
} else {
|
|
|
|
Header->tcp_xsum =
|
|
~XsumSendChain(
|
|
SendTCB->tcb_phxsum +
|
|
(uint)net_short(AmountToSend + ALIGNED_TS_OPT_SIZE),
|
|
FirstBuffer);
|
|
|
|
SendTCB->tcb_opt.ioi_TcpChksum = 0;
|
|
}
|
|
|
|
|
|
|
|
CTEFreeLock(&SendTCB->tcb_lock, TCBHandle);
|
|
|
|
Irp = NULL;
|
|
if (SCC->scc_firstsend) {
|
|
Irp = SCC->scc_firstsend->tsr_req.tr_context;
|
|
}
|
|
|
|
SendStatus =
|
|
(*LocalNetInfo.ipi_xmit)(TCPProtInfo, SCC,
|
|
FirstBuffer,
|
|
AmountToSend +
|
|
ALIGNED_TS_OPT_SIZE,
|
|
SendTCB->tcb_daddr,
|
|
SendTCB->tcb_saddr,
|
|
&SendTCB->tcb_opt,
|
|
SendTCB->tcb_rce,
|
|
PROTOCOL_TCP,
|
|
Irp );
|
|
|
|
} else {
|
|
|
|
if (SendTCB->tcb_rce &&
|
|
(SendTCB->tcb_rce->rce_OffloadFlags &
|
|
TCP_XMT_CHECKSUM_OFFLOAD)) {
|
|
uint PHXsum = SendTCB->tcb_phxsum +
|
|
(uint)net_short(AmountToSend);
|
|
|
|
PHXsum = (((PHXsum << 16) | (PHXsum >> 16)) +
|
|
PHXsum) >> 16;
|
|
|
|
Header->tcp_xsum = (ushort) PHXsum;
|
|
SendTCB->tcb_opt.ioi_TcpChksum = 1;
|
|
#if DBG
|
|
DbgTcpSendHwChksumCount++;
|
|
#endif
|
|
} else {
|
|
Header->tcp_xsum =
|
|
~XsumSendChain(SendTCB->tcb_phxsum +
|
|
(uint)net_short(AmountToSend),
|
|
FirstBuffer);
|
|
SendTCB->tcb_opt.ioi_TcpChksum = 0;
|
|
}
|
|
|
|
CTEFreeLock(&SendTCB->tcb_lock, TCBHandle);
|
|
|
|
Irp = NULL;
|
|
if(SCC->scc_firstsend) {
|
|
Irp = SCC->scc_firstsend->tsr_req.tr_context;
|
|
}
|
|
|
|
SendStatus =
|
|
(*LocalNetInfo.ipi_xmit)(TCPProtInfo,
|
|
SCC,
|
|
FirstBuffer,
|
|
AmountToSend,
|
|
SendTCB->tcb_daddr,
|
|
SendTCB->tcb_saddr,
|
|
&SendTCB->tcb_opt,
|
|
SendTCB->tcb_rce,
|
|
PROTOCOL_TCP,
|
|
Irp );
|
|
|
|
}
|
|
|
|
SendTCB->tcb_error = SendStatus;
|
|
|
|
if (SendStatus != IP_PENDING) {
|
|
|
|
TCPSendComplete(SCC, FirstBuffer, IP_SUCCESS);
|
|
if (SendStatus != IP_SUCCESS) {
|
|
CTEGetLock(&SendTCB->tcb_lock, &TCBHandle);
|
|
// This packet didn't get sent. If nothing's
|
|
// changed in the TCB, put sendnext back to
|
|
// what we just tried to send. Depending on
|
|
// the error, we may try again.
|
|
if (SEQ_GTE(OldSeq, SendTCB->tcb_senduna) &&
|
|
SEQ_LT(OldSeq, SendTCB->tcb_sendnext))
|
|
ResetSendNext(SendTCB, OldSeq);
|
|
|
|
// We know this packet didn't get sent. Start
|
|
// the retransmit timer now, if it's not already
|
|
// runnimg, in case someone came in while we
|
|
// were in IP and stopped it.
|
|
if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
|
|
START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
|
|
}
|
|
// If it failed because of an MTU problem, get
|
|
// the new MTU and try again.
|
|
if (SendStatus == IP_PACKET_TOO_BIG) {
|
|
uint NewMTU;
|
|
|
|
// The MTU has changed. Update it, and try
|
|
// again.
|
|
// if ipsec is adjusting the mtu, rce_newmtu
|
|
// will contain the newmtu.
|
|
if (SendTCB->tcb_rce) {
|
|
|
|
if (!SendTCB->tcb_rce->rce_newmtu) {
|
|
|
|
SendStatus =
|
|
(*LocalNetInfo.ipi_getpinfo)(
|
|
SendTCB->tcb_daddr,
|
|
SendTCB->tcb_saddr,
|
|
&NewMTU,
|
|
NULL,
|
|
SendTCB->tcb_rce);
|
|
} else {
|
|
NewMTU = SendTCB->tcb_rce->rce_newmtu;
|
|
SendStatus = IP_SUCCESS;
|
|
}
|
|
|
|
} else {
|
|
SendStatus =
|
|
(*LocalNetInfo.ipi_getpinfo)(
|
|
SendTCB->tcb_daddr,
|
|
SendTCB->tcb_saddr,
|
|
&NewMTU,
|
|
NULL,
|
|
SendTCB->tcb_rce);
|
|
|
|
}
|
|
|
|
if (SendStatus != IP_SUCCESS)
|
|
break;
|
|
|
|
// We have a new MTU. Make sure it's big enough
|
|
// to use. If not, correct this and turn off
|
|
// MTU discovery on this TCB. Otherwise use the
|
|
// new MTU.
|
|
if (NewMTU <=
|
|
(sizeof(TCPHeader) +
|
|
SendTCB->tcb_opt.ioi_optlength)) {
|
|
|
|
// The new MTU is too small to use. Turn off
|
|
// PMTU discovery on this TCB, and drop to
|
|
// our off net MTU size.
|
|
SendTCB->tcb_opt.ioi_flags &= ~IP_FLAG_DF;
|
|
SendTCB->tcb_mss =
|
|
MIN((ushort)MAX_REMOTE_MSS,
|
|
SendTCB->tcb_remmss);
|
|
} else {
|
|
|
|
// The new MTU is adequate. Adjust it for
|
|
// the header size and options length, and
|
|
// use it.
|
|
NewMTU -= sizeof(TCPHeader) -
|
|
SendTCB->tcb_opt.ioi_optlength;
|
|
SendTCB->tcb_mss =
|
|
MIN((ushort) NewMTU,
|
|
SendTCB->tcb_remmss);
|
|
}
|
|
|
|
ASSERT(SendTCB->tcb_mss > 0);
|
|
ValidateMSS(SendTCB);
|
|
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
//Start it now, since we know that mac driver accepted it.
|
|
|
|
CTEGetLock(&SendTCB->tcb_lock, &TCBHandle);
|
|
if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
|
|
|
|
START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
|
|
}
|
|
continue;
|
|
} else // FirstBuffer != NULL.
|
|
|
|
goto error_oor;
|
|
} else {
|
|
// We've decided we can't send anything now. Figure out why, and
|
|
// see if we need to set a timer.
|
|
if (SendTCB->tcb_sendwin == 0) {
|
|
if (!(SendTCB->tcb_flags & FLOW_CNTLD)) {
|
|
ushort tmp;
|
|
|
|
SendTCB->tcb_flags |= FLOW_CNTLD;
|
|
|
|
SendTCB->tcb_rexmitcnt = 0;
|
|
|
|
tmp = MIN(MAX(REXMIT_TO(SendTCB),
|
|
MIN_RETRAN_TICKS), MAX_REXMIT_TO);
|
|
|
|
START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, tmp);
|
|
SendTCB->tcb_slowcount++;
|
|
SendTCB->tcb_fastchk |= TCP_FLAG_SLOW;
|
|
} else if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER))
|
|
START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
|
|
} else if (AmountToSend != 0)
|
|
// We have something to send, but we're not sending
|
|
// it, presumably due to SWS avoidance.
|
|
if (!TCB_TIMER_RUNNING_R(SendTCB, SWS_TIMER))
|
|
START_TCB_TIMER_R(SendTCB, SWS_TIMER, SWS_TO);
|
|
|
|
break;
|
|
}
|
|
|
|
} // while (!FIN_OUTSTANDING)
|
|
|
|
// We're done sending, so we don't need the output flags set.
|
|
|
|
SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT | FORCE_OUTPUT |
|
|
SEND_AFTER_RCV);
|
|
|
|
if (MoreToSend) {
|
|
//just indicate that we need to send more
|
|
DelayAction(SendTCB, NEED_OUTPUT);
|
|
PartitionDelayQProcessing(FALSE);
|
|
}
|
|
// This is for TS algo
|
|
SendTCB->tcb_lastack = SendTCB->tcb_rcvnext;
|
|
|
|
} else
|
|
SendTCB->tcb_flags |= SEND_AFTER_RCV;
|
|
|
|
DerefTCB(SendTCB, TCBHandle);
|
|
return;
|
|
|
|
// Common case error handling code for out of resource conditions. Start the
|
|
// retransmit timer if it's not already running (so that we try this again
|
|
// later), clean up and return.
|
|
error_oor:
|
|
if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
|
|
ushort tmp;
|
|
|
|
tmp = MIN(MAX(REXMIT_TO(SendTCB),
|
|
MIN_RETRAN_TICKS), MAX_REXMIT_TO);
|
|
|
|
START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, tmp);
|
|
}
|
|
// We had an out of resource problem, so clear the OUTPUT flags.
|
|
SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT | FORCE_OUTPUT);
|
|
DerefTCB(SendTCB, TCBHandle);
|
|
return;
|
|
|
|
error_oor1:
|
|
if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
|
|
ushort tmp;
|
|
|
|
tmp = MIN(MAX(REXMIT_TO(SendTCB),
|
|
MIN_RETRAN_TICKS), MAX_REXMIT_TO);
|
|
|
|
START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, tmp);
|
|
}
|
|
// We had an out of resource problem, so clear the OUTPUT flags.
|
|
SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT | FORCE_OUTPUT);
|
|
DerefTCB(SendTCB, TCBHandle);
|
|
TCPSendComplete(SCC, FirstBuffer, IP_SUCCESS);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
//* ResetSendNextAndFastSend - Set the sendnext value of a TCB.
|
|
//
|
|
// Called to handle fast retransmit of the segment which the reveiver
|
|
// is asking for.
|
|
// We assume the caller has put a reference on the TCB, and the TCB is locked
|
|
// on entry. The reference is dropped and the lock released before returning.
|
|
//
|
|
// Input: SeqTCB - Pointer to TCB to be updated.
|
|
// NewSeq - Sequence number to set.
|
|
// NewCWin - new value for congestion window.
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
void
|
|
ResetAndFastSend(TCB * SeqTCB, SeqNum NewSeq, uint NewCWin)
|
|
{
|
|
TCPSendReq *SendReq;
|
|
uint AmtForward;
|
|
Queue *CurQ;
|
|
PNDIS_BUFFER Buffer;
|
|
uint Offset;
|
|
uint SendSize;
|
|
CTELockHandle TCBHandle;
|
|
int ToBeSent;
|
|
|
|
CTEStructAssert(SeqTCB, tcb);
|
|
ASSERT(SEQ_GTE(NewSeq, SeqTCB->tcb_senduna));
|
|
|
|
// The new seq must be less than send max, or NewSeq, senduna, sendnext,
|
|
// and sendmax must all be equal. (The latter case happens when we're
|
|
// called exiting TIME_WAIT, or possibly when we're retransmitting
|
|
// during a flow controlled situation).
|
|
|
|
ASSERT(SEQ_LT(NewSeq, SeqTCB->tcb_sendmax) ||
|
|
(SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendnext) &&
|
|
SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendmax) &&
|
|
SEQ_EQ(SeqTCB->tcb_senduna, NewSeq)));
|
|
|
|
if (SYNC_STATE(SeqTCB->tcb_state) && SeqTCB->tcb_state != TCB_TIME_WAIT) {
|
|
// In these states we need to update the send queue.
|
|
|
|
if (!EMPTYQ(&SeqTCB->tcb_sendq)) {
|
|
|
|
// Stop the retransmit timer only if we are sure there are going
|
|
// to be retransmissions.
|
|
STOP_TCB_TIMER_R(SeqTCB, RXMIT_TIMER);
|
|
SeqTCB->tcb_rtt = 0;
|
|
|
|
CurQ = QHEAD(&SeqTCB->tcb_sendq);
|
|
|
|
SendReq = (TCPSendReq *) STRUCT_OF(TCPReq, CurQ, tr_q);
|
|
|
|
// SendReq points to the first send request on the send queue.
|
|
// We're pointing at the proper send req now. We need to go down
|
|
|
|
// SendReq points to the cursend
|
|
// SendSize point to sendsize in the cursend
|
|
|
|
SendSize = SendReq->tsr_unasize;
|
|
|
|
Buffer = SendReq->tsr_buffer;
|
|
Offset = SendReq->tsr_offset;
|
|
|
|
// Call the fast retransmit send now
|
|
|
|
if ((SeqTCB->tcb_tcpopts & TCP_FLAG_SACK)) {
|
|
SackListEntry *Prev, *Current;
|
|
SeqNum CurBegin = 0, CurEnd;
|
|
BOOLEAN UseSackList = TRUE;
|
|
|
|
Prev = STRUCT_OF(SackListEntry, &SeqTCB->tcb_SackRcvd, next);
|
|
Current = Prev->next;
|
|
|
|
// There is a hole from Newseq to Currentbeg
|
|
// try to retransmit whole hole size!!
|
|
|
|
if (Current && SEQ_LT(NewSeq, Current->begin)) {
|
|
ToBeSent = Current->begin - NewSeq;
|
|
CurBegin = Current->begin;
|
|
CurEnd = Current->end;
|
|
} else {
|
|
UseSackList = FALSE;
|
|
ToBeSent = SeqTCB->tcb_mss;
|
|
}
|
|
|
|
IF_TCPDBG(TCP_DEBUG_SACK) {
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
|
|
"In Sack Reset and send rexmiting %d %d\n",
|
|
NewSeq, SendSize));
|
|
}
|
|
|
|
TCPFastSend(SeqTCB, Buffer, Offset, SendReq, SendSize, NewSeq,
|
|
ToBeSent);
|
|
|
|
// If we have not been already acked for the missing segments
|
|
// and if we know where to start retransmitting do so now.
|
|
// Also, re-validate SackListentry
|
|
|
|
Prev = STRUCT_OF(SackListEntry, &SeqTCB->tcb_SackRcvd, next);
|
|
Current = Prev->next;
|
|
|
|
if (!UseSackList || (Current && Current->begin != CurBegin)) {
|
|
// The SACK list changed while we were in a transmission.
|
|
// Just bail out, and wait for the next ACK to continue
|
|
// if necessary.
|
|
Current = NULL;
|
|
}
|
|
|
|
while (Current && Current->next &&
|
|
(SEQ_GTE(NewSeq, SeqTCB->tcb_senduna)) &&
|
|
(SEQ_LT(SeqTCB->tcb_senduna, Current->next->end))) {
|
|
|
|
SeqNum NextSeq;
|
|
|
|
ASSERT(SEQ_LTE(Current->begin, Current->end));
|
|
|
|
// There can be multiple dropped packets till
|
|
// Current->begin.
|
|
|
|
IF_TCPDBG(TCP_DEBUG_SACK) {
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
|
|
"Scanning after Current %d %d\n",
|
|
Current->begin, Current->end));
|
|
}
|
|
|
|
NextSeq = Current->end;
|
|
CurBegin = Current->begin;
|
|
|
|
ASSERT(SEQ_LT(NextSeq, SeqTCB->tcb_sendmax));
|
|
|
|
// If we have not yet sent the segment keep quiet now.
|
|
|
|
if (SEQ_GTE(NextSeq, SeqTCB->tcb_sendnext) ||
|
|
(SEQ_LTE(NextSeq, SeqTCB->tcb_senduna))) {
|
|
|
|
break;
|
|
}
|
|
|
|
// Position cursend by following number of bytes
|
|
|
|
AmtForward = NextSeq - NewSeq;
|
|
|
|
if (!EMPTYQ(&SeqTCB->tcb_sendq)) {
|
|
CurQ = QHEAD(&SeqTCB->tcb_sendq);
|
|
SendReq = (TCPSendReq *) STRUCT_OF(TCPReq, CurQ, tr_q);
|
|
while (AmtForward) {
|
|
if (AmtForward >= SendReq->tsr_unasize) {
|
|
AmtForward -= SendReq->tsr_unasize;
|
|
CurQ = QNEXT(CurQ);
|
|
SendReq =
|
|
(TCPSendReq *)STRUCT_OF(TCPReq, CurQ, tr_q);
|
|
ASSERT(CurQ != QEND(&SeqTCB->tcb_sendq));
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
SendSize = SendReq->tsr_unasize - AmtForward;
|
|
Buffer = SendReq->tsr_buffer;
|
|
Offset = SendReq->tsr_offset;
|
|
while (AmtForward) {
|
|
uint Length;
|
|
ASSERT((Offset < NdisBufferLength(Buffer)) ||
|
|
((Offset == 0) &&
|
|
(NdisBufferLength(Buffer) == 0)));
|
|
|
|
Length = NdisBufferLength(Buffer) - Offset;
|
|
if (AmtForward >= Length) {
|
|
// We're moving past this one. Skip over him,
|
|
// and 0 the Offset we're keeping.
|
|
|
|
AmtForward -= Length;
|
|
Offset = 0;
|
|
Buffer = NDIS_BUFFER_LINKAGE(Buffer);
|
|
ASSERT(Buffer != NULL);
|
|
} else {
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
Offset = Offset + AmtForward;
|
|
|
|
// Okay. Now retransmit this seq too.
|
|
|
|
if (Current->next) {
|
|
ToBeSent = Current->next->begin - Current->end;
|
|
|
|
} else {
|
|
ToBeSent = SeqTCB->tcb_mss;
|
|
}
|
|
|
|
IF_TCPDBG(TCP_DEBUG_SACK) {
|
|
KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
|
|
"SACK inner loop rexmiting %d %d %d\n",
|
|
Current->end, SendSize, ToBeSent));
|
|
}
|
|
|
|
TCPFastSend(SeqTCB, Buffer, Offset, SendReq, SendSize,
|
|
NextSeq, ToBeSent);
|
|
} else {
|
|
break;
|
|
}
|
|
|
|
|
|
// Also, re-validate Current Sack list in SackListentry
|
|
|
|
Prev =
|
|
STRUCT_OF(SackListEntry, &SeqTCB->tcb_SackRcvd, next);
|
|
Current = Prev->next;
|
|
|
|
while (Current && Current->begin != CurBegin) {
|
|
// The SACK list changed while in TCPFastSend.
|
|
// Just bail out.
|
|
Current = Current->next;
|
|
}
|
|
|
|
if (Current) {
|
|
Current = Current->next;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
ToBeSent = SeqTCB->tcb_mss;
|
|
|
|
TCPFastSend(SeqTCB, Buffer, Offset, SendReq, SendSize, NewSeq,
|
|
ToBeSent);
|
|
}
|
|
} else {
|
|
ASSERT(SeqTCB->tcb_cursend == NULL);
|
|
}
|
|
}
|
|
SeqTCB->tcb_cwin = NewCWin;
|
|
|
|
// Make sure there is nothing outstanding or the retransmit timer is
|
|
// running or we are in the process of sending a segment (and yet to
|
|
// start the timer).
|
|
ASSERT((SeqTCB->tcb_sendnext == SeqTCB->tcb_senduna) ||
|
|
TCB_TIMER_RUNNING_R(SeqTCB, RXMIT_TIMER) ||
|
|
(SeqTCB->tcb_flags & IN_TCP_SEND));
|
|
|
|
TCBHandle = DISPATCH_LEVEL;
|
|
DerefTCB(SeqTCB, TCBHandle);
|
|
return;
|
|
}
|
|
|
|
//* TCPFastSend - To send a segment without changing TCB state
|
|
//
|
|
// Called to handle fast retransmit of the segment
|
|
// tcb_lock will be held while entering (called by TCPRcv)
|
|
//
|
|
// Input: SendTCB - Pointer to TCB
|
|
// in_sendBuf - Pointer to ndis_buffer
|
|
// in_sendofs - Send Offset
|
|
// in_sendreq - current send request
|
|
// in_sendsize - size of this send
|
|
//
|
|
// Returns: Nothing.
|
|
//
|
|
|
|
void
|
|
TCPFastSend(TCB * SendTCB, PNDIS_BUFFER in_SendBuf, uint in_SendOfs,
|
|
TCPSendReq * in_SendReq, uint in_SendSize, SeqNum NextSeq,
|
|
int in_ToBeSent)
|
|
{
|
|
uint AmountToSend; // Amount to send this time.
|
|
uint AmountLeft;
|
|
TCPHeader *Header; // TCP header for a send.
|
|
PNDIS_BUFFER FirstBuffer, CurrentBuffer;
|
|
TCPSendReq *CurSend;
|
|
SendCmpltContext *SCC;
|
|
SeqNum OldSeq;
|
|
SeqNum SendNext;
|
|
IP_STATUS SendStatus;
|
|
uint AmtOutstanding, AmtUnsent;
|
|
CTELockHandle TCBHandle;
|
|
void *Irp;
|
|
uint TSLen=0;
|
|
|
|
uint SendOfs = in_SendOfs;
|
|
uint SendSize = in_SendSize;
|
|
|
|
PNDIS_BUFFER SendBuf = in_SendBuf;
|
|
|
|
SendNext = NextSeq;
|
|
CurSend = in_SendReq;
|
|
|
|
TCBHandle = DISPATCH_LEVEL;
|
|
|
|
CTEStructAssert(SendTCB, tcb);
|
|
ASSERT(SendTCB->tcb_refcnt != 0);
|
|
|
|
ASSERT(*(int *)&SendTCB->tcb_sendwin >= 0);
|
|
ASSERT(*(int *)&SendTCB->tcb_cwin >= SendTCB->tcb_mss);
|
|
|
|
ASSERT(!(SendTCB->tcb_flags & FIN_OUTSTANDING) ||
|
|
(SendTCB->tcb_sendnext == SendTCB->tcb_sendmax));
|
|
|
|
AmtOutstanding = (uint) (SendTCB->tcb_sendnext -
|
|
SendTCB->tcb_senduna);
|
|
|
|
|
|
AmtUnsent = MIN(MIN(in_ToBeSent, (int)SendSize), (int)SendTCB->tcb_sendwin);
|
|
|
|
while (AmtUnsent > 0) {
|
|
|
|
if (SEQ_GT(SendTCB->tcb_senduna, SendNext)) {
|
|
|
|
// Since tcb_lock is releasd in this loop
|
|
// it is possible that delayed ack acked
|
|
// what we are trying to retransmit.
|
|
|
|
goto error_oor;
|
|
}
|
|
//This was minimum of sendwin and amtunsent
|
|
|
|
AmountToSend = MIN(AmtUnsent, SendTCB->tcb_mss);
|
|
|
|
// Time stamp option addition might force us to cut the data
|
|
// to be sent by 12 bytes.
|
|
|
|
if ((SendTCB->tcb_tcpopts & TCP_FLAG_TS) &&
|
|
(AmountToSend + ALIGNED_TS_OPT_SIZE >= SendTCB->tcb_mss)) {
|
|
AmountToSend -= ALIGNED_TS_OPT_SIZE;
|
|
}
|
|
|
|
// See if we have enough to send. We'll send if we have at least a
|
|
// segment, or if we really have some data to send and we can send
|
|
// all that we have, or the send window is > 0 and we need to force
|
|
// output or send a FIN (note that if we need to force output
|
|
// SendWin will be at least 1 from the check above), or if we can
|
|
// send an amount == to at least half the maximum send window
|
|
// we've seen.
|
|
|
|
ASSERT((int)AmtUnsent >= 0);
|
|
|
|
// It's OK to send something. Try to get a header buffer now.
|
|
// Mark the TCB for debugging.
|
|
// This should be removed for shipping version.
|
|
|
|
FirstBuffer = GetTCPHeaderAtDpcLevel(&Header);
|
|
|
|
if (FirstBuffer != NULL) {
|
|
|
|
// Got a header buffer. Loop through the sends on the TCB,
|
|
// building a frame.
|
|
|
|
CurrentBuffer = FirstBuffer;
|
|
|
|
Header = (TCPHeader *) ((PUCHAR)Header + LocalNetInfo.ipi_hsize);
|
|
|
|
// allow room for filling time stamp options.
|
|
|
|
if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
|
|
|
|
// Account for time stamp options
|
|
|
|
TSLen = ALIGNED_TS_OPT_SIZE;
|
|
|
|
NdisAdjustBufferLength(FirstBuffer,
|
|
sizeof(TCPHeader) + ALIGNED_TS_OPT_SIZE);
|
|
|
|
SCC = ALIGN_UP_POINTER((SendCmpltContext *) (Header + 1),PVOID);
|
|
SCC = (SendCmpltContext *)((uchar *) SCC + ALIGNED_TS_OPT_SIZE);
|
|
|
|
} else {
|
|
|
|
SCC = (SendCmpltContext *) (Header + 1);
|
|
|
|
}
|
|
|
|
SCC = ALIGN_UP_POINTER(SCC, PVOID);
|
|
#if DBG
|
|
SCC->scc_sig = scc_signature;
|
|
#endif
|
|
|
|
FillTCPHeader(SendTCB, Header);
|
|
{
|
|
ulong L = SendNext;
|
|
Header->tcp_seq = net_long(L);
|
|
|
|
}
|
|
|
|
SCC->scc_ubufcount = 0;
|
|
SCC->scc_tbufcount = 0;
|
|
SCC->scc_count = 0;
|
|
SCC->scc_LargeSend = 0;
|
|
|
|
AmountLeft = AmountToSend;
|
|
|
|
if (AmountToSend != 0) {
|
|
|
|
long Result;
|
|
|
|
CTEStructAssert(CurSend, tsr);
|
|
SCC->scc_firstsend = CurSend;
|
|
|
|
do {
|
|
|
|
BOOLEAN DirectSend = FALSE;
|
|
|
|
ASSERT(CurSend->tsr_refcnt > 0);
|
|
|
|
Result = CTEInterlockedIncrementLong(&(CurSend->tsr_refcnt));
|
|
|
|
ASSERT(Result > 0);
|
|
|
|
SCC->scc_count++;
|
|
|
|
// If the current send offset is 0 and the current
|
|
// send is less than or equal to what we have left
|
|
// to send, we haven't already put a transport
|
|
// buffer on this send, and nobody else is using
|
|
// the buffer chain directly, just use the input
|
|
// buffers. We check for other people using them
|
|
// by looking at tsr_lastbuf. If it's NULL,
|
|
// nobody else is using the buffers. If it's not
|
|
// NULL, somebody is.
|
|
|
|
if (SendOfs == 0 &&
|
|
(SendSize <= AmountLeft) &&
|
|
(SCC->scc_tbufcount == 0) &&
|
|
CurSend->tsr_lastbuf == NULL) {
|
|
|
|
ulong length = 0;
|
|
PNDIS_BUFFER tmp = SendBuf;
|
|
|
|
while (tmp) {
|
|
length += NdisBufferLength(tmp);
|
|
tmp = NDIS_BUFFER_LINKAGE(tmp);
|
|
}
|
|
|
|
// If sum of mdl lengths is > request length
|
|
// use slow path.
|
|
|
|
if (AmountLeft >= length) {
|
|
DirectSend = TRUE;
|
|
}
|
|
}
|
|
|
|
if (DirectSend) {
|
|
|
|
NDIS_BUFFER_LINKAGE(CurrentBuffer) = SendBuf;
|
|
|
|
do {
|
|
SCC->scc_ubufcount++;
|
|
CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
|
|
} while (NDIS_BUFFER_LINKAGE(CurrentBuffer) != NULL);
|
|
|
|
CurSend->tsr_lastbuf = CurrentBuffer;
|
|
AmountLeft -= SendSize;
|
|
SendSize = 0;
|
|
} else {
|
|
uint AmountToDup;
|
|
PNDIS_BUFFER NewBuf, Buf;
|
|
uint Offset;
|
|
NDIS_STATUS NStatus;
|
|
uchar *VirtualAddress;
|
|
uint Length;
|
|
|
|
// Either the current send has more data than
|
|
// we want to send, or the starting offset is
|
|
// not 0. In either case we'll need to loop
|
|
// through the current send, allocating buffers.
|
|
|
|
Buf = SendBuf;
|
|
|
|
Offset = SendOfs;
|
|
|
|
do {
|
|
ASSERT(Buf != NULL);
|
|
|
|
TcpipQueryBuffer(Buf, &VirtualAddress, &Length,
|
|
NormalPagePriority);
|
|
|
|
if (VirtualAddress == NULL) {
|
|
|
|
if (SCC->scc_tbufcount == 0 &&
|
|
SCC->scc_ubufcount == 0) {
|
|
//TCPSendComplete(SCC, FirstBuffer,IP_SUCCESS);
|
|
goto error_oor1;
|
|
|
|
}
|
|
AmountToSend -= AmountLeft;
|
|
AmountLeft = 0;
|
|
break;
|
|
|
|
}
|
|
ASSERT((Offset < Length) ||
|
|
(Offset == 0 && Length == 0));
|
|
|
|
// Adjust the length for the offset into
|
|
// this buffer.
|
|
|
|
Length -= Offset;
|
|
|
|
AmountToDup = MIN(AmountLeft, Length);
|
|
|
|
NdisAllocateBuffer(&NStatus, &NewBuf,
|
|
TCPSendBufferPool,
|
|
VirtualAddress + Offset,
|
|
AmountToDup);
|
|
|
|
if (NStatus == NDIS_STATUS_SUCCESS) {
|
|
|
|
SCC->scc_tbufcount++;
|
|
|
|
NDIS_BUFFER_LINKAGE(CurrentBuffer) = NewBuf;
|
|
|
|
CurrentBuffer = NewBuf;
|
|
|
|
if (AmountToDup >= Length) {
|
|
|
|
// Exhausted this buffer.
|
|
|
|
Buf = NDIS_BUFFER_LINKAGE(Buf);
|
|
Offset = 0;
|
|
|
|
} else {
|
|
|
|
Offset += AmountToDup;
|
|
ASSERT(Offset < NdisBufferLength(Buf));
|
|
}
|
|
|
|
SendSize -= AmountToDup;
|
|
AmountLeft -= AmountToDup;
|
|
|
|
} else {
|
|
|
|
// Couldn't allocate a buffer. If
|
|
// the packet is already partly built,
|
|
// send what we've got, otherwise
|
|
// bail out.
|
|
|
|
if (SCC->scc_tbufcount == 0 &&
|
|
SCC->scc_ubufcount == 0) {
|
|
goto error_oor1;
|
|
}
|
|
AmountToSend -= AmountLeft;
|
|
AmountLeft = 0;
|
|
|
|
}
|
|
} while (AmountLeft && SendSize);
|
|
|
|
SendBuf = Buf;
|
|
SendOfs = Offset;
|
|
}
|
|
|
|
if (CurSend->tsr_flags & TSR_FLAG_URG) {
|
|
ushort UP;
|
|
|
|
// This send is urgent data. We need to figure
|
|
// out what the urgent data pointer should be.
|
|
// We know sendnext is the starting sequence
|
|
// number of the frame, and that at the top of
|
|
// this do loop sendnext identified a byte in
|
|
// the CurSend at that time. We advanced CurSend
|
|
// at the same rate we've decremented
|
|
// AmountLeft (AmountToSend - AmountLeft ==
|
|
// AmountBuilt), so sendnext +
|
|
// (AmountToSend - AmountLeft) identifies a byte
|
|
// in the current value of CurSend, and that
|
|
// quantity plus tcb_sendsize is the sequence
|
|
// number one beyond the current send.
|
|
|
|
UP =
|
|
(ushort) (AmountToSend - AmountLeft) +
|
|
(ushort)SendSize -
|
|
((SendTCB->tcb_flags & BSD_URGENT) ? 0 : 1);
|
|
|
|
Header->tcp_urgent = net_short(UP);
|
|
|
|
Header->tcp_flags |= TCP_FLAG_URG;
|
|
}
|
|
// See if we've exhausted this send. If we have,
|
|
// set the PUSH bit in this frame and move on to
|
|
// the next send. We also need to check the
|
|
// urgent data bit.
|
|
|
|
if (SendSize == 0) {
|
|
Queue *Next;
|
|
ulong PrevFlags;
|
|
|
|
// We've exhausted this send. Set the PUSH bit.
|
|
Header->tcp_flags |= TCP_FLAG_PUSH;
|
|
PrevFlags = CurSend->tsr_flags;
|
|
Next = QNEXT(&CurSend->tsr_req.tr_q);
|
|
if (Next != QEND(&SendTCB->tcb_sendq)) {
|
|
CurSend = STRUCT_OF(TCPSendReq,
|
|
QSTRUCT(TCPReq, Next, tr_q),
|
|
tsr_req);
|
|
CTEStructAssert(CurSend, tsr);
|
|
SendSize = CurSend->tsr_unasize;
|
|
SendOfs = CurSend->tsr_offset;
|
|
SendBuf = CurSend->tsr_buffer;
|
|
|
|
// Check the urgent flags. We can't combine
|
|
// new urgent data on to the end of old
|
|
// non-urgent data.
|
|
if ((PrevFlags & TSR_FLAG_URG) && !
|
|
(CurSend->tsr_flags & TSR_FLAG_URG))
|
|
break;
|
|
} else {
|
|
ASSERT(AmountLeft == 0);
|
|
CurSend = NULL;
|
|
SendBuf = NULL;
|
|
}
|
|
}
|
|
} while (AmountLeft != 0);
|
|
|
|
} else {
|
|
|
|
// Amt to send is 0.
|
|
// Just bail out and strat timer.
|
|
|
|
if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
|
|
|
|
START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
|
|
}
|
|
FreeTCPHeader(FirstBuffer);
|
|
return;
|
|
|
|
}
|
|
|
|
// Adjust for what we're really going to send.
|
|
|
|
AmountToSend -= AmountLeft;
|
|
|
|
OldSeq = SendNext;
|
|
SendNext += AmountToSend;
|
|
AmtUnsent -= AmountToSend;
|
|
|
|
TStats.ts_retranssegs++;
|
|
|
|
// We've built the frame entirely. If we've send everything
|
|
// we have and their's a FIN pending, OR it in.
|
|
|
|
AmountToSend += sizeof(TCPHeader);
|
|
|
|
SendTCB->tcb_flags &= ~(NEED_ACK | ACK_DELAYED |
|
|
FORCE_OUTPUT);
|
|
|
|
STOP_TCB_TIMER_R(SendTCB, DELACK_TIMER);
|
|
STOP_TCB_TIMER_R(SendTCB, SWS_TIMER);
|
|
SendTCB->tcb_rcvdsegs = 0;
|
|
|
|
if ( (SendTCB->tcb_flags & KEEPALIVE) && (SendTCB->tcb_conn != NULL) )
|
|
START_TCB_TIMER_R(SendTCB, KA_TIMER, SendTCB->tcb_conn->tc_tcbkatime);
|
|
SendTCB->tcb_kacount = 0;
|
|
|
|
CTEFreeLock(&SendTCB->tcb_lock, TCBHandle);
|
|
|
|
Irp = NULL;
|
|
|
|
if (SCC->scc_firstsend) {
|
|
Irp = SCC->scc_firstsend->tsr_req.tr_context;
|
|
}
|
|
|
|
// We're all set. Xsum it and send it.
|
|
|
|
|
|
if (SendTCB->tcb_rce &&
|
|
(SendTCB->tcb_rce->rce_OffloadFlags &
|
|
TCP_XMT_CHECKSUM_OFFLOAD) &&
|
|
(SendTCB->tcb_rce->rce_OffloadFlags &
|
|
TCP_CHECKSUM_OPT_OFFLOAD) ){
|
|
|
|
uint PHXsum =
|
|
SendTCB->tcb_phxsum +
|
|
(uint)net_short(AmountToSend + TSLen);
|
|
|
|
PHXsum = (((PHXsum << 16) | (PHXsum >> 16)) + PHXsum) >> 16;
|
|
Header->tcp_xsum = (ushort) PHXsum;
|
|
SendTCB->tcb_opt.ioi_TcpChksum = 1;
|
|
|
|
} else {
|
|
|
|
Header->tcp_xsum =
|
|
~XsumSendChain(
|
|
SendTCB->tcb_phxsum +
|
|
(uint)net_short(AmountToSend + TSLen),
|
|
FirstBuffer);
|
|
SendTCB->tcb_opt.ioi_TcpChksum = 0;
|
|
}
|
|
|
|
SendStatus =
|
|
(*LocalNetInfo.ipi_xmit)(TCPProtInfo,
|
|
SCC,
|
|
FirstBuffer,
|
|
AmountToSend + TSLen,
|
|
SendTCB->tcb_daddr,
|
|
SendTCB->tcb_saddr,
|
|
&SendTCB->tcb_opt,
|
|
SendTCB->tcb_rce,
|
|
PROTOCOL_TCP,
|
|
Irp);
|
|
|
|
|
|
//Reacquire Lock to keep DerefTCB happy
|
|
//Bug #63904
|
|
|
|
if (SendStatus != IP_PENDING) {
|
|
TCPSendComplete(SCC, FirstBuffer, IP_SUCCESS);
|
|
}
|
|
CTEGetLock(&SendTCB->tcb_lock, &TCBHandle);
|
|
|
|
SendTCB->tcb_error = SendStatus;
|
|
if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
|
|
|
|
START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
|
|
}
|
|
} else { // FirstBuffer != NULL.
|
|
|
|
goto error_oor;
|
|
}
|
|
} //while AmtUnsent > 0
|
|
|
|
return;
|
|
|
|
// Common case error handling code for out of resource conditions. Start the
|
|
// retransmit timer if it's not already running (so that we try this again
|
|
// later), clean up and return.
|
|
|
|
error_oor:
|
|
if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
|
|
ushort tmp;
|
|
|
|
tmp = MIN(MAX(REXMIT_TO(SendTCB),
|
|
MIN_RETRAN_TICKS), MAX_REXMIT_TO);
|
|
|
|
START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, tmp);
|
|
}
|
|
|
|
return;
|
|
|
|
error_oor1:
|
|
if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
|
|
ushort tmp;
|
|
|
|
tmp = MIN(MAX(REXMIT_TO(SendTCB),
|
|
MIN_RETRAN_TICKS), MAX_REXMIT_TO);
|
|
|
|
START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, tmp);
|
|
}
|
|
|
|
TCPSendComplete(SCC, FirstBuffer, IP_SUCCESS);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
//* TDISend - Send data on a connection.
|
|
//
|
|
// The main TDI send entry point. We take the input parameters, validate them,
|
|
// allocate a send request, etc. We then put the send request on the queue.
|
|
// If we have no other sends on the queue or Nagling is disabled we'll
|
|
// call TCPSend to send the data.
|
|
//
|
|
// Input: Request - The TDI request for the call.
|
|
// Flags - Flags for this send.
|
|
// SendLength - Length in bytes of send.
|
|
// SendBuffer - Pointer to buffer chain to be sent.
|
|
//
|
|
// Returns: Status of attempt to send.
|
|
//
|
|
TDI_STATUS
|
|
TdiSend(PTDI_REQUEST Request, ushort Flags, uint SendLength,
|
|
PNDIS_BUFFER SendBuffer)
|
|
{
|
|
TCPConn *Conn;
|
|
TCB *SendTCB;
|
|
TCPSendReq *SendReq;
|
|
CTELockHandle ConnTableHandle;
|
|
TDI_STATUS Error;
|
|
uint EmptyQ;
|
|
|
|
#if DBG_VALIDITY_CHECK
|
|
|
|
// Check for Mdl sanity in send requests
|
|
// Should be removed for RTM
|
|
|
|
uint RealSendSize;
|
|
PNDIS_BUFFER Temp;
|
|
|
|
// Loop through the buffer chain, and make sure that the length matches
|
|
// up with SendLength.
|
|
|
|
Temp = SendBuffer;
|
|
RealSendSize = 0;
|
|
if (Temp != NULL) {
|
|
|
|
do {
|
|
RealSendSize += NdisBufferLength(Temp);
|
|
Temp = NDIS_BUFFER_LINKAGE(Temp);
|
|
} while (Temp != NULL);
|
|
|
|
if (RealSendSize < SendLength) {
|
|
PIRP Irp = (PIRP)Request->RequestContext;
|
|
PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
|
|
DbgPrint("Invalid TDI_SEND request issued to \\\\Device\\\\Tcp.\n");
|
|
DbgPrint("Irp: %p Mdl: %p CompletionRoutine: %p\n",
|
|
Irp, Irp->MdlAddress, IrpSp->CompletionRoutine);
|
|
DbgPrint("This is not a bug in tcpip.sys.\n");
|
|
DbgPrint("Please notify the originator of this IRP.\n");
|
|
DbgBreakPoint();
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
//CTEGetLock(&ConnTableLock, &ConnTableHandle);
|
|
|
|
Conn = GetConnFromConnID(PtrToUlong(Request->Handle.ConnectionContext), &ConnTableHandle);
|
|
|
|
if (Conn != NULL) {
|
|
CTEStructAssert(Conn, tc);
|
|
|
|
SendTCB = Conn->tc_tcb;
|
|
if (SendTCB != NULL) {
|
|
CTEStructAssert(SendTCB, tcb);
|
|
CTEGetLockAtDPC(&SendTCB->tcb_lock);
|
|
CTEFreeLock(&(Conn->tc_ConnBlock->cb_lock), DISPATCH_LEVEL);
|
|
if (DATA_SEND_STATE(SendTCB->tcb_state) && !CLOSING(SendTCB)) {
|
|
// We have a TCB, and it's valid. Get a send request now.
|
|
|
|
CheckTCBSends(SendTCB);
|
|
|
|
if (SendLength == 0) {
|
|
Error = TDI_SUCCESS;
|
|
} else if (((ULONG64)SendTCB->tcb_unacked + SendLength)
|
|
>= MAXULONG) {
|
|
Error = TDI_INVALID_PARAMETER;
|
|
} else {
|
|
|
|
SendReq = GetSendReq();
|
|
if (SendReq != NULL) {
|
|
SendReq->tsr_req.tr_rtn = Request->RequestNotifyObject;
|
|
SendReq->tsr_req.tr_context = Request->RequestContext;
|
|
SendReq->tsr_buffer = SendBuffer;
|
|
SendReq->tsr_size = SendLength;
|
|
SendReq->tsr_unasize = SendLength;
|
|
SendReq->tsr_refcnt = 1; // ACK will decrement this ref
|
|
|
|
SendReq->tsr_offset = 0;
|
|
SendReq->tsr_lastbuf = NULL;
|
|
SendReq->tsr_time = TCPTime;
|
|
SendReq->tsr_flags = (Flags & TDI_SEND_EXPEDITED) ?
|
|
TSR_FLAG_URG : 0;
|
|
SendTCB->tcb_unacked += SendLength;
|
|
|
|
if (Flags & TDI_SEND_AND_DISCONNECT) {
|
|
|
|
//move the state to fin_wait and
|
|
//mark the tcb for send and disconnect
|
|
|
|
if (SendTCB->tcb_state == TCB_ESTAB) {
|
|
SendTCB->tcb_state = TCB_FIN_WAIT1;
|
|
} else {
|
|
ASSERT(SendTCB->tcb_state == TCB_CLOSE_WAIT);
|
|
SendTCB->tcb_state = TCB_LAST_ACK;
|
|
}
|
|
SendTCB->tcb_slowcount++;
|
|
SendTCB->tcb_fastchk |= TCP_FLAG_SLOW;
|
|
SendTCB->tcb_fastchk |= TCP_FLAG_SEND_AND_DISC;
|
|
SendTCB->tcb_flags |= FIN_NEEDED;
|
|
SendReq->tsr_flags |= TSR_FLAG_SEND_AND_DISC;
|
|
|
|
//extrac reference to make sure that
|
|
//this request will not be completed until the
|
|
//connection is closed
|
|
|
|
SendReq->tsr_refcnt++;
|
|
InterlockedDecrement((PLONG)&TStats.ts_currestab);
|
|
|
|
}
|
|
EmptyQ = EMPTYQ(&SendTCB->tcb_sendq);
|
|
ENQUEUE(&SendTCB->tcb_sendq, &SendReq->tsr_req.tr_q);
|
|
if (SendTCB->tcb_cursend == NULL) {
|
|
SendTCB->tcb_cursend = SendReq;
|
|
SendTCB->tcb_sendbuf = SendBuffer;
|
|
SendTCB->tcb_sendofs = 0;
|
|
SendTCB->tcb_sendsize = SendLength;
|
|
}
|
|
if (EmptyQ) {
|
|
REFERENCE_TCB(SendTCB);
|
|
TCPSend(SendTCB, ConnTableHandle);
|
|
} else if (!(SendTCB->tcb_flags & NAGLING) ||
|
|
(SendTCB->tcb_unacked -
|
|
(SendTCB->tcb_sendmax -
|
|
SendTCB->tcb_senduna)) >=
|
|
SendTCB->tcb_mss) {
|
|
REFERENCE_TCB(SendTCB);
|
|
TCPSend(SendTCB, ConnTableHandle);
|
|
} else
|
|
CTEFreeLock(&SendTCB->tcb_lock,
|
|
ConnTableHandle);
|
|
|
|
return TDI_PENDING;
|
|
} else
|
|
Error = TDI_NO_RESOURCES;
|
|
}
|
|
} else
|
|
Error = TDI_INVALID_STATE;
|
|
|
|
CTEFreeLock(&SendTCB->tcb_lock, ConnTableHandle);
|
|
return Error;
|
|
} else {
|
|
CTEFreeLock(&(Conn->tc_ConnBlock->cb_lock), ConnTableHandle);
|
|
Error = TDI_INVALID_STATE;
|
|
}
|
|
} else
|
|
Error = TDI_INVALID_CONNECTION;
|
|
|
|
//CTEFreeLock(&ConnTableLock, ConnTableHandle);
|
|
return Error;
|
|
|
|
}
|
|
|
|
#pragma BEGIN_INIT
|
|
|
|
extern void *TLRegisterProtocol(uchar Protocol, void *RcvHandler,
|
|
void *XmitHandler, void *StatusHandler,
|
|
void *RcvCmpltHandler, void *PnPHandler,
|
|
void *ElistHandler);
|
|
|
|
extern IP_STATUS TCPRcv(void *IPContext, IPAddr Dest, IPAddr Src,
|
|
IPAddr LocalAddr, IPAddr SrcAddr,
|
|
IPHeader UNALIGNED * IPH, uint IPHLength,
|
|
IPRcvBuf * RcvBuf, uint Size, uchar IsBCast,
|
|
uchar Protocol, IPOptInfo * OptInfo);
|
|
extern void TCPRcvComplete(void);
|
|
|
|
uchar SendInited = FALSE;
|
|
|
|
//* InitTCPSend - Initialize our send side.
|
|
//
|
|
// Called during init time to initialize our TCP send state.
|
|
//
|
|
// Input: Nothing.
|
|
//
|
|
// Returns: TRUE if we inited, false if we didn't.
|
|
//
|
|
int
|
|
InitTCPSend(void)
|
|
{
|
|
NDIS_STATUS Status;
|
|
|
|
|
|
TcpHeaderBufferSize =
|
|
(USHORT)(ALIGN_UP(LocalNetInfo.ipi_hsize,PVOID) +
|
|
ALIGN_UP((sizeof(TCPHeader) + ALIGNED_TS_OPT_SIZE + ALIGNED_SACK_OPT_SIZE),PVOID) +
|
|
ALIGN_UP(MAX(MSS_OPT_SIZE, sizeof(SendCmpltContext)),PVOID));
|
|
|
|
#if BACK_FILL
|
|
TcpHeaderBufferSize += MAX_BACKFILL_HDR_SIZE;
|
|
#endif
|
|
|
|
TcpHeaderPool = MdpCreatePool (TcpHeaderBufferSize, 'thCT');
|
|
if (!TcpHeaderPool)
|
|
{
|
|
return FALSE;
|
|
}
|
|
|
|
NdisAllocateBufferPool(&Status, &TCPSendBufferPool, NUM_TCP_BUFFERS);
|
|
if (Status != NDIS_STATUS_SUCCESS) {
|
|
MdpDestroyPool(TcpHeaderPool);
|
|
return FALSE;
|
|
}
|
|
TCPProtInfo = TLRegisterProtocol(PROTOCOL_TCP, TCPRcv, TCPSendComplete,
|
|
TCPStatus, TCPRcvComplete,
|
|
TCPPnPPowerRequest, TCPElistChangeHandler);
|
|
if (TCPProtInfo == NULL) {
|
|
MdpDestroyPool(TcpHeaderPool);
|
|
NdisFreeBufferPool(TCPSendBufferPool);
|
|
return FALSE;
|
|
}
|
|
SendInited = TRUE;
|
|
return TRUE;
|
|
}
|
|
|
|
//* UnInitTCPSend - UnInitialize our send side.
|
|
//
|
|
// Called during init time if we're going to fail to initialize.
|
|
//
|
|
// Input: Nothing.
|
|
//
|
|
// Returns: TRUE if we inited, false if we didn't.
|
|
//
|
|
void
|
|
UnInitTCPSend(void)
|
|
{
|
|
if (!SendInited)
|
|
return;
|
|
|
|
TLRegisterProtocol(PROTOCOL_TCP, NULL, NULL, NULL, NULL, NULL, NULL);
|
|
|
|
MdpDestroyPool(TcpHeaderPool);
|
|
NdisFreeBufferPool(TCPSendBufferPool);
|
|
}
|
|
#pragma END_INIT
|
|
|