You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2816 lines
99 KiB
2816 lines
99 KiB
// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil -*- (for GNU Emacs)
|
|
//
|
|
// Copyright (c) 1985-2000 Microsoft Corporation
|
|
//
|
|
// This file is part of the Microsoft Research IPv6 Network Protocol Stack.
|
|
// You should have received a copy of the Microsoft End-User License Agreement
|
|
// for this software along with this release; see the file "license.txt".
|
|
// If not, please see http://www.research.microsoft.com/msripv6/license.htm,
|
|
// or write to Microsoft Research, One Microsoft Way, Redmond, WA 98052-6399.
|
|
//
|
|
// Abstract:
|
|
//
|
|
// TCP send code.
|
|
//
|
|
// This file contains the code for sending Data and Control segments.
|
|
//
|
|
|
|
|
|
#include "oscfg.h"
|
|
#include "ndis.h"
|
|
#include "ip6imp.h"
|
|
#include "ip6def.h"
|
|
#include "tdi.h"
|
|
#include "tdint.h"
|
|
#include "tdistat.h"
|
|
#include "queue.h"
|
|
#include "transprt.h"
|
|
#include "addr.h"
|
|
#include "tcp.h"
|
|
#include "tcb.h"
|
|
#include "tcpconn.h"
|
|
#include "tcpsend.h"
|
|
#include "tcprcv.h"
|
|
#include "info.h"
|
|
#include "tcpcfg.h"
|
|
#include "route.h"
|
|
#include "security.h"
|
|
|
|
void *TCPProtInfo; // TCP protocol info for IP.
|
|
|
|
SLIST_HEADER TCPSendReqFree; // Send req. free list.
|
|
|
|
KSPIN_LOCK TCPSendReqFreeLock;
|
|
KSPIN_LOCK TCPSendReqCompleteLock;
|
|
|
|
uint NumTCPSendReq; // Current number of SendReqs in system.
|
|
uint MaxSendReq = 0xffffffff; // Maximum allowed number of SendReqs.
|
|
|
|
extern KSPIN_LOCK TCBTableLock;
|
|
|
|
//
|
|
// All of the init code can be discarded.
|
|
//
|
|
#ifdef ALLOC_PRAGMA
|
|
|
|
#pragma alloc_text(INIT, InitTCPSend)
|
|
|
|
#endif // ALLOC_PRAGMA
|
|
|
|
extern void ResetSendNext(TCB *SeqTCB, SeqNum NewSeq);
|
|
|
|
#define MIN_INITIAL_RTT 3 // In msec.
|
|
|
|
|
|
//* FreeSendReq - Free a send request structure.
|
|
//
|
|
// Called to free a send request structure.
|
|
//
|
|
void // Returns: Nothing.
|
|
FreeSendReq(
|
|
TCPSendReq *FreedReq) // Connection request structure to be freed.
|
|
{
|
|
PSLIST_ENTRY BufferLink;
|
|
|
|
CHECK_STRUCT(FreedReq, tsr);
|
|
|
|
BufferLink = CONTAINING_RECORD(&(FreedReq->tsr_req.tr_q.q_next),
|
|
SLIST_ENTRY, Next);
|
|
|
|
ExInterlockedPushEntrySList(&TCPSendReqFree, BufferLink,
|
|
&TCPSendReqFreeLock);
|
|
}
|
|
|
|
|
|
//* GetSendReq - Get a send request structure.
|
|
//
|
|
// Called to get a send request structure.
|
|
//
|
|
TCPSendReq * // Returns: Pointer to SendReq structure, or NULL if none.
|
|
GetSendReq(
|
|
void) // Nothing.
|
|
{
|
|
TCPSendReq *Temp;
|
|
PSLIST_ENTRY BufferLink;
|
|
Queue *QueuePtr;
|
|
TCPReq *ReqPtr;
|
|
|
|
BufferLink = ExInterlockedPopEntrySList(&TCPSendReqFree,
|
|
&TCPSendReqFreeLock);
|
|
|
|
if (BufferLink != NULL) {
|
|
QueuePtr = CONTAINING_RECORD(BufferLink, Queue, q_next);
|
|
ReqPtr = CONTAINING_RECORD(QueuePtr, TCPReq, tr_q);
|
|
Temp = CONTAINING_RECORD(ReqPtr, TCPSendReq, tsr_req);
|
|
CHECK_STRUCT(Temp, tsr);
|
|
} else {
|
|
if (NumTCPSendReq < MaxSendReq)
|
|
Temp = ExAllocatePool(NonPagedPool, sizeof(TCPSendReq));
|
|
else
|
|
Temp = NULL;
|
|
|
|
if (Temp != NULL) {
|
|
ExInterlockedAddUlong((PULONG)&NumTCPSendReq, 1, &TCPSendReqFreeLock);
|
|
#if DBG
|
|
Temp->tsr_req.tr_sig = tr_signature;
|
|
Temp->tsr_sig = tsr_signature;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
return Temp;
|
|
}
|
|
|
|
|
|
//* TCPHopLimit
|
|
//
|
|
// Given a TCB, returns the Hop Limit to use in a sent packet.
|
|
// Assumes the caller holds a lock on the TCB.
|
|
//
|
|
uchar
|
|
TCPHopLimit(TCB *Tcb)
|
|
{
|
|
if (Tcb->tcb_hops != -1)
|
|
return (uchar) Tcb->tcb_hops;
|
|
else
|
|
return (uchar) Tcb->tcb_rce->NCE->IF->CurHopLimit;
|
|
}
|
|
|
|
|
|
//* TCPSendComplete - Complete a TCP send.
|
|
//
|
|
// Called by IP when a send we've made is complete. We free the buffer,
|
|
// and possibly complete some sends. Each send queued on a TCB has a ref.
|
|
// count with it, which is the number of times a pointer to a buffer
|
|
// associated with the send has been passed to the underlying IP layer. We
|
|
// can't complete a send until that count it 0. If this send was actually
|
|
// from a send of data, we'll go down the chain of send and decrement the
|
|
// refcount on each one. If we have one going to 0 and the send has already
|
|
// been acked we'll complete the send. If it hasn't been acked we'll leave
|
|
// it until the ack comes in.
|
|
//
|
|
// NOTE: We aren't protecting any of this with locks. When we port this to
|
|
// NT we'll need to fix this, probably with a global lock. See the comments
|
|
// in ACKSend() in TCPRCV.C for more details.
|
|
//
|
|
void // Returns: Nothing.
|
|
TCPSendComplete(
|
|
PNDIS_PACKET Packet, // Packet that was sent.
|
|
IP_STATUS Status)
|
|
{
|
|
PNDIS_BUFFER BufferChain;
|
|
SendCmpltContext *SCContext;
|
|
PVOID Memory;
|
|
UINT Unused;
|
|
|
|
UNREFERENCED_PARAMETER(Status);
|
|
|
|
//
|
|
// Pull values we care about out of the packet structure.
|
|
//
|
|
SCContext = (SendCmpltContext *) PC(Packet)->CompletionData;
|
|
BufferChain = NdisFirstBuffer(Packet);
|
|
NdisQueryBufferSafe(BufferChain, &Memory, &Unused, LowPagePriority);
|
|
ASSERT(Memory != NULL);
|
|
|
|
//
|
|
// See if we have a send complete context. It will be present for data
|
|
// packets and means we have extra work to do. For non-data packets, we
|
|
// can just skip all this as there is only the header buffer to deal with.
|
|
//
|
|
if (SCContext != NULL) {
|
|
KIRQL OldIrql;
|
|
PNDIS_BUFFER CurrentBuffer;
|
|
TCPSendReq *CurrentSend;
|
|
uint i;
|
|
|
|
CHECK_STRUCT(SCContext, scc);
|
|
|
|
//
|
|
// First buffer in chain is the TCP header buffer.
|
|
// Skip over it for now.
|
|
//
|
|
CurrentBuffer = NDIS_BUFFER_LINKAGE(BufferChain);
|
|
|
|
//
|
|
// Also skip over any 'user' buffers (those loaned out to us
|
|
// instead of copied) as we don't need to free them.
|
|
//
|
|
for (i = 0; i < (uint)SCContext->scc_ubufcount; i++) {
|
|
ASSERT(CurrentBuffer != NULL);
|
|
CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
|
|
}
|
|
|
|
//
|
|
// Now loop through and free our (aka 'transport') buffers.
|
|
// We need to do this before decrementing the reference count to avoid
|
|
// destroying the buffer chain if we have to zap tsr_lastbuf->Next to
|
|
// NULL.
|
|
//
|
|
for (i = 0; i < (uint)SCContext->scc_tbufcount; i++) {
|
|
PNDIS_BUFFER TempBuffer;
|
|
|
|
ASSERT(CurrentBuffer != NULL);
|
|
|
|
TempBuffer = CurrentBuffer;
|
|
CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
|
|
NdisFreeBuffer(TempBuffer);
|
|
}
|
|
|
|
//
|
|
// Loop through the send requests attached to this packet,
|
|
// reducing the reference count on each and enqueing them for
|
|
// completion where appropriate.
|
|
//
|
|
CurrentSend = SCContext->scc_firstsend;
|
|
for (i = 0; i< SCContext->scc_count; i++) {
|
|
Queue *TempQ;
|
|
long Result;
|
|
|
|
TempQ = QNEXT(&CurrentSend->tsr_req.tr_q);
|
|
CHECK_STRUCT(CurrentSend, tsr);
|
|
|
|
Result = InterlockedDecrement(&(CurrentSend->tsr_refcnt));
|
|
ASSERT(Result >= 0);
|
|
|
|
if (Result <= 0) {
|
|
//
|
|
// Reference count has gone to 0 which means the send has
|
|
// been ACK'd or cancelled. Complete it now.
|
|
//
|
|
// If we've sent directly from this send, NULL out the next
|
|
// pointer for the last buffer in the chain.
|
|
//
|
|
if (CurrentSend->tsr_lastbuf != NULL) {
|
|
NDIS_BUFFER_LINKAGE(CurrentSend->tsr_lastbuf) = NULL;
|
|
CurrentSend->tsr_lastbuf = NULL;
|
|
}
|
|
|
|
KeAcquireSpinLock(&RequestCompleteLock, &OldIrql);
|
|
ENQUEUE(&SendCompleteQ, &CurrentSend->tsr_req.tr_q);
|
|
RequestCompleteFlags |= SEND_REQUEST_COMPLETE;
|
|
KeReleaseSpinLock(&RequestCompleteLock, OldIrql);
|
|
}
|
|
|
|
CurrentSend = CONTAINING_RECORD(QSTRUCT(TCPReq, TempQ, tr_q),
|
|
TCPSendReq, tsr_req);
|
|
}
|
|
}
|
|
|
|
//
|
|
// Free the TCP header buffer and our packet structure proper.
|
|
//
|
|
NdisFreeBuffer(BufferChain);
|
|
ExFreePool(Memory);
|
|
NdisFreePacket(Packet);
|
|
|
|
//
|
|
// If there are any TCP send requests to complete, do so now.
|
|
//
|
|
if (RequestCompleteFlags & SEND_REQUEST_COMPLETE)
|
|
TCPRcvComplete();
|
|
}
|
|
|
|
|
|
//* RcvWin - Figure out the receive window to offer in an ack.
|
|
//
|
|
// A routine to figure out what window to offer on a connection. We
|
|
// take into account SWS avoidance, what the default connection window is,
|
|
// and what the last window we offered is.
|
|
//
|
|
uint // Returns: Window to be offered.
|
|
RcvWin(
|
|
TCB *WinTCB) // TCB on which to perform calculations.
|
|
{
|
|
int CouldOffer; // The window size we could offer.
|
|
|
|
CHECK_STRUCT(WinTCB, tcb);
|
|
|
|
CheckPacketList(WinTCB->tcb_pendhead, WinTCB->tcb_pendingcnt);
|
|
|
|
ASSERT(WinTCB->tcb_rcvwin >= 0);
|
|
|
|
CouldOffer = WinTCB->tcb_defaultwin - WinTCB->tcb_pendingcnt;
|
|
|
|
ASSERT(CouldOffer >= 0);
|
|
ASSERT(CouldOffer >= WinTCB->tcb_rcvwin);
|
|
|
|
if ((CouldOffer - WinTCB->tcb_rcvwin) >=
|
|
(int) MIN(WinTCB->tcb_defaultwin/2, WinTCB->tcb_mss)) {
|
|
WinTCB->tcb_rcvwin = CouldOffer;
|
|
}
|
|
|
|
return WinTCB->tcb_rcvwin;
|
|
}
|
|
|
|
|
|
//* ValidateSourceAndRoute - Validate the NTE and RCE.
|
|
//
|
|
// Checks that the NTE and RCE referenced by this TCB are still ok to use.
|
|
//
|
|
BOOLEAN
|
|
ValidateSourceAndRoute(
|
|
TCB *Tcb) // TCB being validated.
|
|
{
|
|
KIRQL Irql0;
|
|
|
|
//
|
|
// Update our copy of the validation counter.
|
|
// We need to do this before making the validation checks below
|
|
// (to avoid missing any additional changes while we're in here).
|
|
//
|
|
Tcb->tcb_routing = RouteCacheValidationCounter;
|
|
|
|
//
|
|
// Check that our NTE hasn't gone away.
|
|
//
|
|
KeAcquireSpinLock(&Tcb->tcb_nte->IF->Lock, &Irql0);
|
|
if (!IsValidNTE(Tcb->tcb_nte)) {
|
|
|
|
//
|
|
// Can't use this one anymore.
|
|
//
|
|
KeReleaseSpinLock(&Tcb->tcb_nte->IF->Lock, Irql0);
|
|
ReleaseNTE(Tcb->tcb_nte);
|
|
|
|
//
|
|
// See if this address lives on as a different NTE.
|
|
//
|
|
Tcb->tcb_nte = FindNetworkWithAddress(&Tcb->tcb_saddr,
|
|
Tcb->tcb_sscope_id);
|
|
if (Tcb->tcb_nte == NULL) {
|
|
|
|
//
|
|
// The address is gone.
|
|
//
|
|
return FALSE;
|
|
}
|
|
} else {
|
|
KeReleaseSpinLock(&Tcb->tcb_nte->IF->Lock, Irql0);
|
|
}
|
|
|
|
//
|
|
// Also check that the RCE is still around.
|
|
//
|
|
Tcb->tcb_rce = ValidateRCE(Tcb->tcb_rce, Tcb->tcb_nte);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
//* SendSYN - Send a SYN segment.
|
|
//
|
|
// This is called during connection establishment time to send a SYN
|
|
// segment to the peer. We get a buffer if we can, and then fill
|
|
// it in. There's a tricky part here where we have to build the MSS
|
|
// option in the header - we find the MSS by finding the MSS offered
|
|
// by the net for the local address. After that, we send it.
|
|
//
|
|
void // Returns: Nothing.
|
|
SendSYN(
|
|
TCB *SYNTcb, // TCB from which SYN is to be sent.
|
|
KIRQL PreLockIrql) // IRQL prior to acquiring TCB lock.
|
|
{
|
|
PNDIS_PACKET Packet;
|
|
void *Memory;
|
|
IPv6Header UNALIGNED *IP;
|
|
TCPHeader UNALIGNED *TCP;
|
|
uchar *OptPtr;
|
|
NDIS_STATUS NdisStatus;
|
|
uint Offset;
|
|
uint Length;
|
|
uint PayloadLength;
|
|
ushort TempWin;
|
|
ushort MSS;
|
|
RouteCacheEntry *RCE;
|
|
|
|
|
|
CHECK_STRUCT(SYNTcb, tcb);
|
|
|
|
//
|
|
// Go ahead and set the retransmission timer now, in case we can't get a
|
|
// packet or a buffer. In the future we might want to queue the
|
|
// connection for when we get resources.
|
|
//
|
|
START_TCB_TIMER(SYNTcb->tcb_rexmittimer, SYNTcb->tcb_rexmit);
|
|
|
|
//
|
|
// In most cases, we will already have a route at this point.
|
|
// However, if we failed to get one earlier in the passive receive
|
|
// path, we may need to retry here.
|
|
//
|
|
if (SYNTcb->tcb_rce == NULL) {
|
|
InitRCE(SYNTcb);
|
|
if (SYNTcb->tcb_rce == NULL) {
|
|
goto ErrorReturn;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Validate that the address we're sourcing from and the route we're
|
|
// sending upon are still okay to use.
|
|
//
|
|
if (SYNTcb->tcb_routing != RouteCacheValidationCounter) {
|
|
if (!ValidateSourceAndRoute(SYNTcb)) {
|
|
//
|
|
// Even though we're about to close this TCB,
|
|
// we should leave it in a consistent state.
|
|
//
|
|
SYNTcb->tcb_sendnext++;
|
|
if (SEQ_GT(SYNTcb->tcb_sendnext, SYNTcb->tcb_sendmax)) {
|
|
SYNTcb->tcb_sendmax = SYNTcb->tcb_sendnext;
|
|
}
|
|
|
|
TryToCloseTCB(SYNTcb, TCB_CLOSE_ABORTED, PreLockIrql);
|
|
return;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Allocate a packet header/buffer/data region for this SYN.
|
|
//
|
|
// Our buffer has space at the beginning which will be filled in
|
|
// later by the link level. At this level we add the IPv6Header,
|
|
// TCPHeader, and TCP Maximum Segment Size option which follow.
|
|
//
|
|
// REVIEW: This grabs packets and buffers from the IPv6PacketPool and
|
|
// REVIEW: the IPv6BufferPool respectively. Have seperate pools for TCP?
|
|
//
|
|
Offset = SYNTcb->tcb_rce->NCE->IF->LinkHeaderSize;
|
|
Length = Offset + sizeof(*IP) + sizeof(*TCP) + MSS_OPT_SIZE;
|
|
NdisStatus = IPv6AllocatePacket(Length, &Packet, &Memory);
|
|
if (NdisStatus != NDIS_STATUS_SUCCESS) {
|
|
//
|
|
// Upon failure, advance tcb_sendnext anyway.
|
|
// We need to do this because TCBTimeout will *retreat* tcb_sendnext
|
|
// if this SYN is later retransmitted, and if that retreat occurs
|
|
// without this advance, we end up with a hole in the sequence-space.
|
|
//
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
|
|
"TCP SendSYN: Couldn't allocate IPv6 packet header!?!\n"));
|
|
ErrorReturn:
|
|
SYNTcb->tcb_sendnext++;
|
|
if (SEQ_GT(SYNTcb->tcb_sendnext, SYNTcb->tcb_sendmax)) {
|
|
SYNTcb->tcb_sendmax = SYNTcb->tcb_sendnext;
|
|
}
|
|
KeReleaseSpinLock(&SYNTcb->tcb_lock, PreLockIrql);
|
|
return;
|
|
}
|
|
PC(Packet)->CompletionHandler = TCPSendComplete;
|
|
PC(Packet)->CompletionData = NULL;
|
|
|
|
//
|
|
// Since this is a SYN-only packet (maybe someday we'll send data with
|
|
// the SYN?) we only have the one buffer and nothing to link on after.
|
|
//
|
|
|
|
//
|
|
// We now have all the resources we need to send.
|
|
// Prepare the actual packet.
|
|
//
|
|
|
|
//
|
|
// Our header buffer has extra space for other headers to be
|
|
// prepended to ours without requiring further allocation calls.
|
|
// Put the actual TCP/IP header at the end of the buffer.
|
|
//
|
|
IP = (IPv6Header UNALIGNED *)((uchar *)Memory + Offset);
|
|
IP->VersClassFlow = IP_VERSION;
|
|
IP->NextHeader = IP_PROTOCOL_TCP;
|
|
|
|
IP->HopLimit = TCPHopLimit(SYNTcb);
|
|
IP->Source = SYNTcb->tcb_saddr;
|
|
IP->Dest = SYNTcb->tcb_daddr;
|
|
|
|
TCP = (TCPHeader UNALIGNED *)(IP + 1);
|
|
TCP->tcp_src = SYNTcb->tcb_sport;
|
|
TCP->tcp_dest = SYNTcb->tcb_dport;
|
|
TCP->tcp_seq = net_long(SYNTcb->tcb_sendnext);
|
|
|
|
//
|
|
// The SYN flag takes up one element in sequence number space.
|
|
// Record that we've sent it here (if we need to retransmit the SYN
|
|
// segment, TCBTimeout will reset sendnext before calling us again).
|
|
//
|
|
SYNTcb->tcb_sendnext++;
|
|
if (SEQ_GT(SYNTcb->tcb_sendnext, SYNTcb->tcb_sendmax)) {
|
|
TStats.ts_outsegs++;
|
|
SYNTcb->tcb_sendmax = SYNTcb->tcb_sendnext;
|
|
} else
|
|
TStats.ts_retranssegs++;
|
|
|
|
TCP->tcp_ack = net_long(SYNTcb->tcb_rcvnext);
|
|
|
|
//
|
|
// REVIEW: TCP flags are entirely based upon our state, so this could
|
|
// REVIEW: be replaced by a (quicker) array lookup.
|
|
//
|
|
if (SYNTcb->tcb_state == TCB_SYN_RCVD)
|
|
TCP->tcp_flags = MAKE_TCP_FLAGS(6, TCP_FLAG_SYN | TCP_FLAG_ACK);
|
|
else
|
|
TCP->tcp_flags = MAKE_TCP_FLAGS(6, TCP_FLAG_SYN);
|
|
|
|
TempWin = (ushort)SYNTcb->tcb_rcvwin;
|
|
TCP->tcp_window = net_short(TempWin);
|
|
TCP->tcp_urgent = 0;
|
|
TCP->tcp_xsum = 0;
|
|
OptPtr = (uchar *)(TCP + 1);
|
|
|
|
//
|
|
// Compose the Maximum Segment Size option.
|
|
//
|
|
// TBD: If we add IPv6 Jumbogram support, we should also add LFN
|
|
// TBD: support to TCP and change this to handle a larger MSS.
|
|
//
|
|
MSS = SYNTcb->tcb_rce->NTE->IF->LinkMTU
|
|
- sizeof(IPv6Header) - sizeof(TCPHeader);
|
|
IF_TCPDBG(TCP_DEBUG_MSS) {
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
|
|
"SendSYN: Sending MSS option value of %d\n", MSS));
|
|
}
|
|
*OptPtr++ = TCP_OPT_MSS;
|
|
*OptPtr++ = MSS_OPT_SIZE;
|
|
*(ushort UNALIGNED *)OptPtr = net_short(MSS);
|
|
|
|
PayloadLength = sizeof(TCPHeader) + MSS_OPT_SIZE;
|
|
|
|
//
|
|
// Compute the TCP checksum. It covers the entire TCP segment
|
|
// starting with the TCP header, plus the IPv6 pseudo-header.
|
|
//
|
|
// REVIEW: The IPv4 implementation kept the IPv4 psuedo-header around
|
|
// REVIEW: in the TCB rather than recalculate it every time. Do this?
|
|
//
|
|
TCP->tcp_xsum = 0;
|
|
TCP->tcp_xsum = ChecksumPacket(
|
|
Packet, Offset + sizeof *IP, NULL, PayloadLength,
|
|
AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
|
|
ASSERT(TCP->tcp_xsum != 0);
|
|
|
|
//
|
|
// Capture and reference the RCE while we still hold the TCB lock.
|
|
// The TCB's reference on this particular RCE might go away at any point
|
|
// after we release the lock (or because we drop it ourselves below).
|
|
//
|
|
RCE = SYNTcb->tcb_rce;
|
|
AddRefRCE(RCE);
|
|
|
|
//
|
|
// If connection-acceptance has been delayed, release the TCB's RCE.
|
|
// This prevents TCBs in pre-established states from consuming
|
|
// an unbounded number of RCEs.
|
|
//
|
|
if (SYNTcb->tcb_flags & ACCEPT_PENDING) {
|
|
SYNTcb->tcb_rce = NULL;
|
|
ReleaseRCE(RCE);
|
|
}
|
|
|
|
//
|
|
// Everything's ready. Now send the packet.
|
|
//
|
|
// Note that IPv6Send does not return a status code.
|
|
// Instead it *always* completes the packet
|
|
// with an appropriate status code.
|
|
//
|
|
KeReleaseSpinLock(&SYNTcb->tcb_lock, PreLockIrql);
|
|
|
|
IPv6Send(Packet, Offset, IP, PayloadLength, RCE, 0,
|
|
IP_PROTOCOL_TCP,
|
|
net_short(TCP->tcp_src),
|
|
net_short(TCP->tcp_dest));
|
|
|
|
//
|
|
// Release the extra reference we took on the RCE above.
|
|
//
|
|
ReleaseRCE(RCE);
|
|
}
|
|
|
|
|
|
//* SendKA - Send a keep alive segment.
|
|
//
|
|
// This is called when we want to send a keep-alive. The idea is to provoke
|
|
// a response from our peer on an otherwise idle connection. We send a
|
|
// garbage byte of data in our keep-alives in order to cooperate with broken
|
|
// TCP implementations that don't respond to segments outside the window
|
|
// unless they contain data.
|
|
//
|
|
void // Returns: Nothing.
|
|
SendKA(
|
|
TCB *KATcb, // TCB from which keep alive is to be sent.
|
|
KIRQL PreLockIrql) // IRQL prior to acquiring lock on TCB.
|
|
{
|
|
PNDIS_PACKET Packet;
|
|
void *Memory;
|
|
IPv6Header UNALIGNED *IP;
|
|
TCPHeader UNALIGNED *TCP;
|
|
NDIS_STATUS NdisStatus;
|
|
int Offset;
|
|
uint Length;
|
|
uint PayloadLength;
|
|
ushort TempWin;
|
|
SeqNum TempSeq;
|
|
RouteCacheEntry *RCE;
|
|
|
|
CHECK_STRUCT(KATcb, tcb);
|
|
|
|
//
|
|
// In most cases, we will already have a route at this point.
|
|
// However, if we failed to get one earlier in the passive receive
|
|
// path, we may need to retry here.
|
|
//
|
|
if (KATcb->tcb_rce == NULL) {
|
|
InitRCE(KATcb);
|
|
if (KATcb->tcb_rce == NULL) {
|
|
KeReleaseSpinLock(&KATcb->tcb_lock, PreLockIrql);
|
|
return;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Validate that the address we're sourcing from and the route we're
|
|
// sending upon are still okay to use.
|
|
//
|
|
if (KATcb->tcb_routing != RouteCacheValidationCounter) {
|
|
if (!ValidateSourceAndRoute(KATcb)) {
|
|
TryToCloseTCB(KATcb, TCB_CLOSE_ABORTED, PreLockIrql);
|
|
return;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Allocate a packet header/buffer/data region for this keepalive packet.
|
|
//
|
|
// Our buffer has space at the beginning which will be filled in
|
|
// later by the link level. At this level we add the IPv6Header,
|
|
// TCPHeader, and a single byte of data which follow.
|
|
//
|
|
// REVIEW: This grabs packets and buffers from the IPv6PacketPool and
|
|
// REVIEW: the IPv6BufferPool respectively. Have seperate pools for TCP?
|
|
//
|
|
Offset = KATcb->tcb_rce->NCE->IF->LinkHeaderSize;
|
|
Length = Offset + sizeof(*IP) + sizeof(*TCP) + 1;
|
|
NdisStatus = IPv6AllocatePacket(Length, &Packet, &Memory);
|
|
if (NdisStatus != NDIS_STATUS_SUCCESS) {
|
|
//
|
|
// REVIEW: What to do if this fails.
|
|
//
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
|
|
"TCP SendKA: Couldn't allocate IPv6 packet header!?!\n"));
|
|
KeReleaseSpinLock(&KATcb->tcb_lock, PreLockIrql);
|
|
return;
|
|
}
|
|
PC(Packet)->CompletionHandler = TCPSendComplete;
|
|
PC(Packet)->CompletionData = NULL;
|
|
|
|
//
|
|
// Since this is a keepalive packet we only have the one buffer and
|
|
// nothing to link on after.
|
|
//
|
|
|
|
//
|
|
// Our header buffer has extra space for other headers to be
|
|
// prepended to ours without requiring further allocation calls.
|
|
// Put the actual TCP/IP header at the end of the buffer.
|
|
//
|
|
IP = (IPv6Header UNALIGNED *)((uchar *)Memory + Offset);
|
|
IP->VersClassFlow = IP_VERSION;
|
|
IP->NextHeader = IP_PROTOCOL_TCP;
|
|
IP->HopLimit = TCPHopLimit(KATcb);
|
|
IP->Source = KATcb->tcb_saddr;
|
|
IP->Dest = KATcb->tcb_daddr;
|
|
|
|
TCP = (TCPHeader UNALIGNED *)(IP + 1);
|
|
TCP->tcp_src = KATcb->tcb_sport;
|
|
TCP->tcp_dest = KATcb->tcb_dport;
|
|
TempSeq = KATcb->tcb_senduna - 1;
|
|
TCP->tcp_seq = net_long(TempSeq);
|
|
TCP->tcp_ack = net_long(KATcb->tcb_rcvnext);
|
|
TCP->tcp_flags = MAKE_TCP_FLAGS(5, TCP_FLAG_ACK);
|
|
TempWin = (ushort)RcvWin(KATcb);
|
|
TCP->tcp_window = net_short(TempWin);
|
|
TCP->tcp_urgent = 0;
|
|
|
|
//
|
|
// Initialize the single byte that we're resending.
|
|
// N.B. Adequate space for this byte was allocated above.
|
|
//
|
|
*(uchar *)(TCP + 1) = 0;
|
|
|
|
TStats.ts_retranssegs++;
|
|
|
|
PayloadLength = sizeof(TCPHeader) + 1;
|
|
|
|
//
|
|
// Compute the TCP checksum. It covers the entire TCP segment
|
|
// starting with the TCP header, plus the IPv6 pseudo-header.
|
|
//
|
|
TCP->tcp_xsum = 0;
|
|
TCP->tcp_xsum = ChecksumPacket(
|
|
Packet, Offset + sizeof *IP, NULL, PayloadLength,
|
|
AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
|
|
ASSERT(TCP->tcp_xsum != 0);
|
|
|
|
//
|
|
// Capture and reference the RCE while we still hold the TCB lock.
|
|
// The TCB's reference on this particular RCE might go away at any
|
|
// point after we release the lock.
|
|
//
|
|
RCE = KATcb->tcb_rce;
|
|
AddRefRCE(RCE);
|
|
|
|
//
|
|
// Everything's ready. Now send the packet.
|
|
//
|
|
// Note that IPv6Send does not return a status code.
|
|
// Instead it *always* completes the packet
|
|
// with an appropriate status code.
|
|
//
|
|
KATcb->tcb_kacount++;
|
|
KeReleaseSpinLock(&KATcb->tcb_lock, PreLockIrql);
|
|
|
|
IPv6Send(Packet, Offset, IP, PayloadLength, RCE, 0,
|
|
IP_PROTOCOL_TCP,
|
|
net_short(TCP->tcp_src),
|
|
net_short(TCP->tcp_dest));
|
|
|
|
//
|
|
// Release the extra reference we took on the RCE above.
|
|
//
|
|
ReleaseRCE(RCE);
|
|
}
|
|
|
|
|
|
//* SendACK - Send an ACK segment.
|
|
//
|
|
// This is called whenever we need to send an ACK for some reason. Nothing
|
|
// fancy, we just do it.
|
|
//
|
|
void // Returns: Nothing.
|
|
SendACK(
|
|
TCB *ACKTcb) // TCB from which ACK is to be sent.
|
|
{
|
|
PNDIS_PACKET Packet;
|
|
void *Memory;
|
|
IPv6Header UNALIGNED *IP;
|
|
TCPHeader UNALIGNED *TCP;
|
|
NDIS_STATUS NdisStatus;
|
|
KIRQL OldIrql;
|
|
int Offset;
|
|
uint Length;
|
|
uint PayloadLength;
|
|
SeqNum SendNext;
|
|
ushort TempWin;
|
|
RouteCacheEntry *RCE;
|
|
|
|
CHECK_STRUCT(ACKTcb, tcb);
|
|
|
|
KeAcquireSpinLock(&ACKTcb->tcb_lock, &OldIrql);
|
|
|
|
//
|
|
// In most cases, we will already have a route at this point.
|
|
// However, if we failed to get one earlier in the passive receive
|
|
// path, we may need to retry here.
|
|
//
|
|
if (ACKTcb->tcb_rce == NULL) {
|
|
InitRCE(ACKTcb);
|
|
if (ACKTcb->tcb_rce == NULL) {
|
|
KeReleaseSpinLock(&ACKTcb->tcb_lock, OldIrql);
|
|
return;
|
|
}
|
|
|
|
}
|
|
|
|
//
|
|
// Validate that the address we're sourcing from and the route we're
|
|
// sending upon are still okay to use.
|
|
//
|
|
if (ACKTcb->tcb_routing != RouteCacheValidationCounter) {
|
|
if (!ValidateSourceAndRoute(ACKTcb)) {
|
|
TryToCloseTCB(ACKTcb, TCB_CLOSE_ABORTED, OldIrql);
|
|
return;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Allocate a packet header/buffer/data region for this ACK packet.
|
|
//
|
|
// Our buffer has space at the beginning which will be filled in
|
|
// later by the link level. At this level we add the IPv6Header
|
|
// and the TCPHeader.
|
|
//
|
|
// REVIEW: This grabs packets and buffers from the IPv6PacketPool and
|
|
// REVIEW: the IPv6BufferPool respectively. Have seperate pools for TCP?
|
|
//
|
|
Offset = ACKTcb->tcb_rce->NCE->IF->LinkHeaderSize;
|
|
Length = Offset + sizeof(*IP) + sizeof(*TCP);
|
|
NdisStatus = IPv6AllocatePacket(Length, &Packet, &Memory);
|
|
if (NdisStatus != NDIS_STATUS_SUCCESS) {
|
|
|
|
KeReleaseSpinLock(&ACKTcb->tcb_lock, OldIrql);
|
|
|
|
//
|
|
// REVIEW: What to do if this fails.
|
|
//
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
|
|
"TCP SendACK: Couldn't allocate IPv6 packet header!?!\n"));
|
|
return;
|
|
}
|
|
PC(Packet)->CompletionHandler = TCPSendComplete;
|
|
PC(Packet)->CompletionData = NULL;
|
|
|
|
|
|
//
|
|
// Our header buffer has extra space for other headers to be
|
|
// prepended to ours without requiring further allocation calls.
|
|
// Put the actual TCP/IP header at the end of the buffer.
|
|
//
|
|
IP = (IPv6Header UNALIGNED *)((uchar *)Memory + Offset);
|
|
IP->VersClassFlow = IP_VERSION;
|
|
IP->NextHeader = IP_PROTOCOL_TCP;
|
|
IP->HopLimit = TCPHopLimit(ACKTcb);
|
|
IP->Source = ACKTcb->tcb_saddr;
|
|
IP->Dest = ACKTcb->tcb_daddr;
|
|
|
|
TCP = (TCPHeader UNALIGNED *)(IP + 1);
|
|
TCP->tcp_src = ACKTcb->tcb_sport;
|
|
TCP->tcp_dest = ACKTcb->tcb_dport;
|
|
TCP->tcp_ack = net_long(ACKTcb->tcb_rcvnext);
|
|
|
|
//
|
|
// If the remote peer is advertising a window of zero, we need to send
|
|
// this ack with a sequence number of his rcv_next (which in that case
|
|
// should be our senduna). We have code here ifdef'd out that makes
|
|
// sure that we don't send outside the RWE, but this doesn't work. We
|
|
// need to be able to send a pure ACK exactly at the RWE.
|
|
//
|
|
if (ACKTcb->tcb_sendwin != 0) {
|
|
SendNext = ACKTcb->tcb_sendnext;
|
|
#if 0
|
|
SeqNum MaxValidSeq;
|
|
|
|
MaxValidSeq = ACKTcb->tcb_senduna + ACKTcb->tcb_sendwin - 1;
|
|
|
|
SendNext = (SEQ_LT(SendNext, MaxValidSeq) ? SendNext : MaxValidSeq);
|
|
#endif
|
|
|
|
} else
|
|
SendNext = ACKTcb->tcb_senduna;
|
|
|
|
if ((ACKTcb->tcb_flags & FIN_SENT) &&
|
|
SEQ_EQ(SendNext, ACKTcb->tcb_sendmax - 1)) {
|
|
TCP->tcp_flags = MAKE_TCP_FLAGS(5, TCP_FLAG_FIN | TCP_FLAG_ACK);
|
|
} else
|
|
TCP->tcp_flags = MAKE_TCP_FLAGS(5, TCP_FLAG_ACK);
|
|
|
|
TCP->tcp_seq = net_long(SendNext);
|
|
TempWin = (ushort)RcvWin(ACKTcb);
|
|
TCP->tcp_window = net_short(TempWin);
|
|
TCP->tcp_urgent = 0;
|
|
|
|
PayloadLength = sizeof(*TCP);
|
|
|
|
//
|
|
// Compute the TCP checksum. It covers the entire TCP segment
|
|
// starting with the TCP header, plus the IPv6 pseudo-header.
|
|
//
|
|
TCP->tcp_xsum = 0;
|
|
TCP->tcp_xsum = ChecksumPacket(
|
|
Packet, Offset + sizeof *IP, NULL, PayloadLength,
|
|
AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
|
|
ASSERT(TCP->tcp_xsum != 0);
|
|
|
|
STOP_TCB_TIMER(ACKTcb->tcb_delacktimer);
|
|
ACKTcb->tcb_flags &= ~(NEED_ACK | ACK_DELAYED);
|
|
TStats.ts_outsegs++;
|
|
|
|
//
|
|
// Capture and reference the RCE while we still hold the TCB lock.
|
|
// The TCB's reference on this particular RCE might go away at any point
|
|
// after we release the lock (or because we drop it ourselves below).
|
|
//
|
|
RCE = ACKTcb->tcb_rce;
|
|
AddRefRCE(RCE);
|
|
|
|
//
|
|
// If connection-acceptance has been delayed, release the TCB's RCE.
|
|
// This prevents TCBs in pre-established states from consuming
|
|
// an unbounded number of RCEs.
|
|
//
|
|
if (ACKTcb->tcb_flags & ACCEPT_PENDING) {
|
|
ACKTcb->tcb_rce = NULL;
|
|
ReleaseRCE(RCE);
|
|
}
|
|
|
|
//
|
|
// Everything's ready. Now send the packet.
|
|
//
|
|
// Note that IPv6Send does not return a status code.
|
|
// Instead it *always* completes the packet
|
|
// with an appropriate status code.
|
|
//
|
|
KeReleaseSpinLock(&ACKTcb->tcb_lock, OldIrql);
|
|
|
|
IPv6Send(Packet, Offset, IP, PayloadLength, RCE, 0,
|
|
IP_PROTOCOL_TCP,
|
|
net_short(TCP->tcp_src),
|
|
net_short(TCP->tcp_dest));
|
|
|
|
//
|
|
// Release the extra reference we took on the RCE above.
|
|
//
|
|
ReleaseRCE(RCE);
|
|
}
|
|
|
|
|
|
//* SendRSTFromTCB - Send a RST from a TCB.
|
|
//
|
|
// This is called during close when we need to send a RST.
|
|
//
|
|
// Called only when TCB is going away, so we have exclusive access.
|
|
//
|
|
void // Returns: Nothing.
|
|
SendRSTFromTCB(
|
|
TCB *RSTTcb) // TCB from which RST is to be sent.
|
|
{
|
|
PNDIS_PACKET Packet;
|
|
void *Memory;
|
|
IPv6Header UNALIGNED *IP;
|
|
TCPHeader UNALIGNED *TCP;
|
|
NDIS_STATUS NdisStatus;
|
|
int Offset;
|
|
uint Length;
|
|
uint PayloadLength;
|
|
SeqNum RSTSeq;
|
|
|
|
CHECK_STRUCT(RSTTcb, tcb);
|
|
|
|
ASSERT(RSTTcb->tcb_state == TCB_CLOSED);
|
|
|
|
//
|
|
// In most cases, we will already have a route at this point.
|
|
// However, if we failed to get one earlier in the passive receive
|
|
// path, we may need to retry here.
|
|
//
|
|
if (RSTTcb->tcb_rce == NULL) {
|
|
InitRCE(RSTTcb);
|
|
if (RSTTcb->tcb_rce == NULL) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Validate that the address we're sourcing from and the route we're
|
|
// sending upon are still okay to use.
|
|
//
|
|
if (RSTTcb->tcb_routing != RouteCacheValidationCounter) {
|
|
if (!ValidateSourceAndRoute(RSTTcb)) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Allocate a packet header/buffer/data region for this RST packet.
|
|
//
|
|
// Our buffer has space at the beginning which will be filled in
|
|
// later by the link level. At this level we add the IPv6Header
|
|
// and the TCPHeader.
|
|
//
|
|
// REVIEW: This grabs packets and buffers from the IPv6PacketPool and
|
|
// REVIEW: the IPv6BufferPool respectively. Have seperate pools for TCP?
|
|
//
|
|
Offset = RSTTcb->tcb_rce->NCE->IF->LinkHeaderSize;
|
|
Length = Offset + sizeof(*IP) + sizeof(*TCP);
|
|
NdisStatus = IPv6AllocatePacket(Length, &Packet, &Memory);
|
|
if (NdisStatus != NDIS_STATUS_SUCCESS) {
|
|
//
|
|
// REVIEW: What to do if this fails.
|
|
//
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
|
|
"TCP SendRSTFromTCB: "
|
|
"Couldn't alloc IPv6 packet header!\n"));
|
|
return;
|
|
}
|
|
PC(Packet)->CompletionHandler = TCPSendComplete;
|
|
PC(Packet)->CompletionData = NULL;
|
|
|
|
//
|
|
// Since this is an RST-only packet we only have the one buffer and
|
|
// nothing to link on after.
|
|
//
|
|
|
|
//
|
|
// Our header buffer has extra space for other headers to be
|
|
// prepended to ours without requiring further allocation calls.
|
|
// Put the actual TCP/IP header at the end of the buffer.
|
|
//
|
|
IP = (IPv6Header UNALIGNED *)((uchar *)Memory + Offset);
|
|
IP->VersClassFlow = IP_VERSION;
|
|
IP->NextHeader = IP_PROTOCOL_TCP;
|
|
IP->HopLimit = TCPHopLimit(RSTTcb);
|
|
IP->Source = RSTTcb->tcb_saddr;
|
|
IP->Dest = RSTTcb->tcb_daddr;
|
|
|
|
TCP = (TCPHeader UNALIGNED *)(IP + 1);
|
|
TCP->tcp_src = RSTTcb->tcb_sport;
|
|
TCP->tcp_dest = RSTTcb->tcb_dport;
|
|
|
|
//
|
|
// If the remote peer has a window of 0, send with a seq. # equal
|
|
// to senduna so he'll accept it. Otherwise send with send max.
|
|
//
|
|
if (RSTTcb->tcb_sendwin != 0)
|
|
RSTSeq = RSTTcb->tcb_sendmax;
|
|
else
|
|
RSTSeq = RSTTcb->tcb_senduna;
|
|
|
|
TCP->tcp_seq = net_long(RSTSeq);
|
|
TCP->tcp_ack = net_long(RSTTcb->tcb_rcvnext);
|
|
TCP->tcp_flags = MAKE_TCP_FLAGS(5, TCP_FLAG_RST | TCP_FLAG_ACK);
|
|
TCP->tcp_window = 0;
|
|
TCP->tcp_urgent = 0;
|
|
|
|
PayloadLength = sizeof(*TCP);
|
|
|
|
//
|
|
// Compute the TCP checksum. It covers the entire TCP segment
|
|
// starting with the TCP header, plus the IPv6 pseudo-header.
|
|
//
|
|
TCP->tcp_xsum = 0;
|
|
TCP->tcp_xsum = ChecksumPacket(
|
|
Packet, Offset + sizeof *IP, NULL, PayloadLength,
|
|
AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
|
|
ASSERT(TCP->tcp_xsum != 0);
|
|
|
|
TStats.ts_outsegs++;
|
|
TStats.ts_outrsts++;
|
|
|
|
//
|
|
// Everything's ready. Now send the packet.
|
|
//
|
|
// Note that IPv6Send does not return a status code.
|
|
// Instead it *always* completes the packet
|
|
// with an appropriate status code.
|
|
//
|
|
IPv6Send(Packet, Offset, IP, PayloadLength, RSTTcb->tcb_rce, 0,
|
|
IP_PROTOCOL_TCP,
|
|
net_short(TCP->tcp_src),
|
|
net_short(TCP->tcp_dest));
|
|
}
|
|
|
|
|
|
//* SendRSTFromHeader - Send a RST back, based on a header.
|
|
//
|
|
// Called when we need to send a RST, but don't necessarily have a TCB.
|
|
//
|
|
void // Returns: Nothing.
|
|
SendRSTFromHeader(
|
|
TCPHeader UNALIGNED *RecvTCP, // TCP header to be RST.
|
|
uint Length, // Length of the incoming segment.
|
|
IPv6Addr *Dest, // Destination IP address for RST.
|
|
uint DestScopeId, // Scope id for destination address.
|
|
IPv6Addr *Src, // Source IP address for RST.
|
|
uint SrcScopeId) // Scope id for source address.
|
|
{
|
|
PNDIS_PACKET Packet;
|
|
void *Memory;
|
|
IPv6Header UNALIGNED *IP;
|
|
TCPHeader UNALIGNED *SendTCP;
|
|
NetTableEntry *NTE;
|
|
RouteCacheEntry *RCE;
|
|
IP_STATUS Status;
|
|
NDIS_STATUS NdisStatus;
|
|
uint Offset;
|
|
uint SendLength;
|
|
uint PayloadLength;
|
|
|
|
//
|
|
// Never send a RST in response to a RST.
|
|
//
|
|
if (RecvTCP->tcp_flags & TCP_FLAG_RST)
|
|
return;
|
|
|
|
//
|
|
// Determine NTE to send on based on incoming packet's destination.
|
|
// REVIEW: Alternatively, we could/should just pass the NTE in.
|
|
//
|
|
NTE = FindNetworkWithAddress(Src, SrcScopeId);
|
|
if (NTE == NULL) {
|
|
//
|
|
// This should only happen if the NTE became invalid
|
|
// between accepting the packet and getting here. It
|
|
// cannot completely go away since the packet's Packet
|
|
// structure holds a reference to it.
|
|
//
|
|
return;
|
|
}
|
|
|
|
//
|
|
// Get the route to the destination (incoming packet's source).
|
|
//
|
|
Status = RouteToDestination(Dest, DestScopeId, CastFromNTE(NTE),
|
|
RTD_FLAG_NORMAL, &RCE);
|
|
if (Status != IP_SUCCESS) {
|
|
//
|
|
// Failed to get a route to the destination. Error out.
|
|
//
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INTERNAL_ERROR,
|
|
"TCP SendRSTFromHeader: Can't get a route?!?\n"));
|
|
ReleaseNTE(NTE);
|
|
return;
|
|
}
|
|
|
|
//
|
|
// Allocate a packet header/buffer/data region for this RST packet.
|
|
//
|
|
// Our buffer has space at the beginning which will be filled in
|
|
// later by the link level. At this level we add the IPv6Header
|
|
// and the TCPHeader.
|
|
//
|
|
// REVIEW: This grabs packets and buffers from the IPv6PacketPool and
|
|
// REVIEW: the IPv6BufferPool respectively. Have seperate pools for TCP?
|
|
//
|
|
Offset = RCE->NCE->IF->LinkHeaderSize;
|
|
SendLength = Offset + sizeof(*IP) + sizeof(*SendTCP);
|
|
NdisStatus = IPv6AllocatePacket(SendLength, &Packet, &Memory);
|
|
if (NdisStatus != NDIS_STATUS_SUCCESS) {
|
|
//
|
|
// Failed to allocate a packet header/buffer/data region. Error out.
|
|
//
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
|
|
"TCP SendRSTFromHeader: Couldn't alloc IPv6 pkt header!\n"));
|
|
ReleaseRCE(RCE);
|
|
ReleaseNTE(NTE);
|
|
return;
|
|
}
|
|
PC(Packet)->CompletionHandler = TCPSendComplete;
|
|
PC(Packet)->CompletionData = NULL;
|
|
|
|
//
|
|
// We now have all the resources we need to send. Since this is a
|
|
// RST-only packet we only have the one header buffer and nothing
|
|
// to link on after.
|
|
//
|
|
|
|
//
|
|
// Our header buffer has extra space for other headers to be
|
|
// prepended to ours without requiring further allocation calls.
|
|
// Put the actual TCP/IP header at the end of the buffer.
|
|
//
|
|
IP = (IPv6Header UNALIGNED *)((uchar *)Memory + Offset);
|
|
IP->VersClassFlow = IP_VERSION;
|
|
IP->NextHeader = IP_PROTOCOL_TCP;
|
|
IP->HopLimit = (uchar)RCE->NCE->IF->CurHopLimit;
|
|
IP->Source = *Src;
|
|
IP->Dest = *Dest;
|
|
|
|
//
|
|
// Fill in the header so as to make it believable to our peer, and send it.
|
|
//
|
|
SendTCP = (TCPHeader UNALIGNED *)(IP + 1);
|
|
if (RecvTCP->tcp_flags & TCP_FLAG_SYN)
|
|
Length++;
|
|
|
|
if (RecvTCP->tcp_flags & TCP_FLAG_FIN)
|
|
Length++;
|
|
|
|
if (RecvTCP->tcp_flags & TCP_FLAG_ACK) {
|
|
SendTCP->tcp_seq = RecvTCP->tcp_ack;
|
|
SendTCP->tcp_ack = 0;
|
|
SendTCP->tcp_flags = MAKE_TCP_FLAGS(sizeof(TCPHeader)/sizeof(ulong),
|
|
TCP_FLAG_RST);
|
|
} else {
|
|
SeqNum TempSeq;
|
|
|
|
SendTCP->tcp_seq = 0;
|
|
TempSeq = net_long(RecvTCP->tcp_seq);
|
|
TempSeq += Length;
|
|
SendTCP->tcp_ack = net_long(TempSeq);
|
|
SendTCP->tcp_flags = MAKE_TCP_FLAGS(sizeof(TCPHeader)/sizeof(ulong),
|
|
TCP_FLAG_RST | TCP_FLAG_ACK);
|
|
}
|
|
|
|
SendTCP->tcp_window = 0;
|
|
SendTCP->tcp_urgent = 0;
|
|
SendTCP->tcp_dest = RecvTCP->tcp_src;
|
|
SendTCP->tcp_src = RecvTCP->tcp_dest;
|
|
|
|
PayloadLength = sizeof(*SendTCP);
|
|
|
|
//
|
|
// Compute the TCP checksum. It covers the entire TCP segment
|
|
// starting with the TCP header, plus the IPv6 pseudo-header.
|
|
//
|
|
SendTCP->tcp_xsum = 0;
|
|
SendTCP->tcp_xsum = ChecksumPacket(
|
|
Packet, Offset + sizeof *IP, NULL, PayloadLength,
|
|
AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
|
|
ASSERT(SendTCP->tcp_xsum != 0);
|
|
|
|
TStats.ts_outsegs++;
|
|
TStats.ts_outrsts++;
|
|
|
|
//
|
|
// Everything's ready. Now send the packet.
|
|
//
|
|
// Note that IPv6Send does not return a status code.
|
|
// Instead it *always* completes the packet
|
|
// with an appropriate status code.
|
|
//
|
|
IPv6Send(Packet, Offset, IP, PayloadLength, RCE, 0,
|
|
IP_PROTOCOL_TCP,
|
|
net_short(SendTCP->tcp_src),
|
|
net_short(SendTCP->tcp_dest));
|
|
|
|
//
|
|
// Release the Route and the NTE.
|
|
//
|
|
ReleaseRCE(RCE);
|
|
ReleaseNTE(NTE);
|
|
} // end of SendRSTFromHeader()
|
|
|
|
|
|
//* GoToEstab - Transition to the established state.
|
|
//
|
|
// Called when we are going to the established state and need to finish up
|
|
// initializing things that couldn't be done until now. We assume the TCB
|
|
// lock is held by the caller on the TCB we're called with.
|
|
//
|
|
void // Returns: Nothing.
|
|
GoToEstab(
|
|
TCB *EstabTCB) // TCB to transition.
|
|
{
|
|
|
|
//
|
|
// Initialize our slow start and congestion control variables.
|
|
//
|
|
EstabTCB->tcb_cwin = 2 * EstabTCB->tcb_mss;
|
|
EstabTCB->tcb_ssthresh = 0xffffffff;
|
|
|
|
EstabTCB->tcb_state = TCB_ESTAB;
|
|
|
|
//
|
|
// We're in established. We'll subtract one from slow count for this fact,
|
|
// and if the slowcount goes to 0 we'll move onto the fast path.
|
|
//
|
|
if (--(EstabTCB->tcb_slowcount) == 0)
|
|
EstabTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
|
|
|
|
InterlockedIncrement((PLONG)&TStats.ts_currestab);
|
|
|
|
EstabTCB->tcb_flags &= ~ACTIVE_OPEN; // Turn off the active opening flag.
|
|
}
|
|
|
|
|
|
//* InitSendState - Initialize the send state of a connection.
|
|
//
|
|
// Called during connection establishment to initialize our send state.
|
|
// (In this case, this refers to all information we'll put on the wire as
|
|
// well as pure send state). We pick an ISS, set up a rexmit timer value,
|
|
// etc. We assume the tcb_lock is held on the TCB when we are called.
|
|
//
|
|
void // Returns: Nothing.
|
|
InitSendState(
|
|
TCB *NewTCB) // TCB to be set up.
|
|
{
|
|
uint InitialRTT;
|
|
CHECK_STRUCT(NewTCB, tcb);
|
|
|
|
if (NewTCB->tcb_flags & ACTIVE_OPEN) {
|
|
GetRandomISN(&NewTCB->tcb_sendnext, (uchar*)&NewTCB->tcb_md5data);
|
|
}
|
|
NewTCB->tcb_senduna = NewTCB->tcb_sendnext;
|
|
NewTCB->tcb_sendmax = NewTCB->tcb_sendnext;
|
|
NewTCB->tcb_error = IP_SUCCESS;
|
|
|
|
//
|
|
// Initialize retransmit and delayed ack stuff.
|
|
//
|
|
NewTCB->tcb_rexmitcnt = 0;
|
|
NewTCB->tcb_rtt = 0;
|
|
NewTCB->tcb_smrtt = 0;
|
|
|
|
//
|
|
// Check for interface specific initial RTT.
|
|
// This can be as low as 3ms.
|
|
//
|
|
if ((NewTCB->tcb_rce != NULL) &&
|
|
((InitialRTT = GetInitialRTTFromRCE(NewTCB->tcb_rce)) >
|
|
MIN_INITIAL_RTT)) {
|
|
NewTCB->tcb_delta = MS_TO_TICKS(InitialRTT * 2);
|
|
NewTCB->tcb_rexmit = MS_TO_TICKS(InitialRTT);
|
|
} else {
|
|
NewTCB->tcb_delta = MS_TO_TICKS(6000);
|
|
NewTCB->tcb_rexmit = MS_TO_TICKS(3000);
|
|
}
|
|
|
|
STOP_TCB_TIMER(NewTCB->tcb_rexmittimer);
|
|
STOP_TCB_TIMER(NewTCB->tcb_delacktimer);
|
|
}
|
|
|
|
|
|
//* FillTCPHeader - Fill the TCP header in.
|
|
//
|
|
// A utility routine to fill in the TCP header.
|
|
//
|
|
void // Returns: Nothing.
|
|
FillTCPHeader(
|
|
TCB *SendTCB, // TCB to fill from.
|
|
TCPHeader UNALIGNED *Header) // Header to fill into.
|
|
{
|
|
ushort S;
|
|
ulong L;
|
|
|
|
Header->tcp_src = SendTCB->tcb_sport;
|
|
Header->tcp_dest = SendTCB->tcb_dport;
|
|
L = SendTCB->tcb_sendnext;
|
|
Header->tcp_seq = net_long(L);
|
|
L = SendTCB->tcb_rcvnext;
|
|
Header->tcp_ack = net_long(L);
|
|
Header->tcp_flags = 0x1050;
|
|
*(ulong UNALIGNED *)&Header->tcp_xsum = 0;
|
|
S = (ushort)RcvWin(SendTCB);
|
|
Header->tcp_window = net_short(S);
|
|
Header->tcp_urgent = 0;
|
|
}
|
|
|
|
|
|
//* TCPSend - Send data from a TCP connection.
|
|
//
|
|
// This is the main 'send data' routine. We go into a loop, trying
|
|
// to send data until we can't for some reason. First we compute
|
|
// the useable window, use it to figure the amount we could send. If
|
|
// the amount we could send meets certain criteria we'll build a frame
|
|
// and send it, after setting any appropriate control bits. We assume
|
|
// the caller has put a reference on the TCB.
|
|
//
|
|
void // Returns: Nothing.
|
|
TCPSend(
|
|
TCB *SendTCB, // TCB to be sent from.
|
|
KIRQL PreLockIrql) // IRQL prior to acquiring TCB lock.
|
|
{
|
|
int SendWin; // Useable send window.
|
|
uint AmountToSend; // Amount to send this time.
|
|
uint AmountLeft;
|
|
IPv6Header UNALIGNED *IP;
|
|
TCPHeader UNALIGNED *TCP;
|
|
PNDIS_PACKET Packet;
|
|
PNDIS_BUFFER FirstBuffer, CurrentBuffer;
|
|
void *Memory;
|
|
TCPSendReq *CurSend;
|
|
SendCmpltContext *SCC;
|
|
SeqNum OldSeq;
|
|
NDIS_STATUS NdisStatus;
|
|
uint AmtOutstanding, AmtUnsent;
|
|
int ForceWin; // Window we're forced to use.
|
|
uint HeaderLength;
|
|
uint LinkOffset;
|
|
uint PMTU;
|
|
RouteCacheEntry *RCE;
|
|
|
|
CHECK_STRUCT(SendTCB, tcb);
|
|
ASSERT(SendTCB->tcb_refcnt != 0);
|
|
|
|
ASSERT(*(int *)&SendTCB->tcb_sendwin >= 0);
|
|
ASSERT(*(int *)&SendTCB->tcb_cwin >= SendTCB->tcb_mss);
|
|
|
|
ASSERT(!(SendTCB->tcb_flags & FIN_OUTSTANDING) ||
|
|
(SendTCB->tcb_sendnext == SendTCB->tcb_sendmax));
|
|
|
|
//
|
|
// See if we should even be here. If another instance of ourselves is
|
|
// already in this code, or is about to enter it after completing a
|
|
// receive, then just skip on out.
|
|
//
|
|
if ((SendTCB->tcb_flags & IN_TCP_SEND) ||
|
|
(SendTCB->tcb_fastchk & TCP_FLAG_IN_RCV)) {
|
|
SendTCB->tcb_flags |= SEND_AFTER_RCV;
|
|
goto bail;
|
|
}
|
|
SendTCB->tcb_flags |= IN_TCP_SEND;
|
|
|
|
//
|
|
// In most cases, we will already have a route at this point.
|
|
// However, if we failed to get one earlier in the passive receive
|
|
// path, we may need to retry here.
|
|
//
|
|
if (SendTCB->tcb_rce == NULL) {
|
|
InitRCE(SendTCB);
|
|
if (SendTCB->tcb_rce == NULL) {
|
|
SendTCB->tcb_flags &= ~IN_TCP_SEND;
|
|
goto bail;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Validate that the address we're sourcing from and the route we're
|
|
// sending upon are still okay to use.
|
|
//
|
|
// We fail existing send requests for TCBs with a disconnected
|
|
// outgoing interface, except when a loopback route is used.
|
|
//
|
|
if (SendTCB->tcb_routing != RouteCacheValidationCounter) {
|
|
if (!ValidateSourceAndRoute(SendTCB) ||
|
|
IsDisconnectedAndNotLoopbackRCE(SendTCB->tcb_rce)) {
|
|
|
|
SendTCB->tcb_flags &= ~IN_TCP_SEND;
|
|
ASSERT(SendTCB->tcb_refcnt != 0);
|
|
TryToCloseTCB(SendTCB, TCB_CLOSE_ABORTED, PreLockIrql);
|
|
KeAcquireSpinLock(&SendTCB->tcb_lock, &PreLockIrql);
|
|
goto bail;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Verify that our cached Path MTU is still valid.
|
|
// Watch for changes to IPsec policies since they can also effect our MSS.
|
|
// REVIEW: This the best spot to do this?
|
|
//
|
|
PMTU = GetEffectivePathMTUFromRCE(SendTCB->tcb_rce);
|
|
if (PMTU != SendTCB->tcb_pmtu ||
|
|
SecurityStateValidationCounter != SendTCB->tcb_security) {
|
|
//
|
|
// Either our Path MTU or the global security state has changed.
|
|
// Cache current values and then calculate a new MSS.
|
|
//
|
|
SendTCB->tcb_pmtu = PMTU;
|
|
SendTCB->tcb_security = SecurityStateValidationCounter;
|
|
CalculateMSSForTCB(SendTCB);
|
|
}
|
|
|
|
//
|
|
// We'll continue this loop until we send a FIN, or we break out
|
|
// internally for some other reason.
|
|
//
|
|
while (!(SendTCB->tcb_flags & FIN_OUTSTANDING)) {
|
|
|
|
CheckTCBSends(SendTCB);
|
|
|
|
AmtOutstanding = (uint)(SendTCB->tcb_sendnext - SendTCB->tcb_senduna);
|
|
AmtUnsent = SendTCB->tcb_unacked - AmtOutstanding;
|
|
|
|
ASSERT(*(int *)&AmtUnsent >= 0);
|
|
|
|
SendWin = (int)(MIN(SendTCB->tcb_sendwin, SendTCB->tcb_cwin) -
|
|
AmtOutstanding);
|
|
|
|
//
|
|
// If this send is after a fast recovery and sendwin is zero because
|
|
// of amount outstanding, then at least force 1 segment to prevent
|
|
// delayed ack timeouts from peer.
|
|
//
|
|
if (SendTCB->tcb_force) {
|
|
SendTCB->tcb_force = 0;
|
|
if (SendWin < SendTCB->tcb_mss) {
|
|
SendWin = SendTCB->tcb_mss;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Since the window could have shrank, need to get it to zero at
|
|
// least.
|
|
//
|
|
ForceWin = (int)((SendTCB->tcb_flags & FORCE_OUTPUT) >>
|
|
FORCE_OUT_SHIFT);
|
|
SendWin = MAX(SendWin, ForceWin);
|
|
|
|
AmountToSend = MIN(MIN((uint)SendWin, AmtUnsent), SendTCB->tcb_mss);
|
|
|
|
ASSERT(SendTCB->tcb_mss > 0);
|
|
|
|
//
|
|
// See if we have enough to send. We'll send if we have at least a
|
|
// segment, or if we really have some data to send and we can send
|
|
// all that we have, or the send window is > 0 and we need to force
|
|
// output or send a FIN (note that if we need to force output
|
|
// SendWin will be at least 1 from the check above), or if we can
|
|
// send an amount == to at least half the maximum send window
|
|
// we've seen.
|
|
//
|
|
if (AmountToSend == SendTCB->tcb_mss ||
|
|
(AmountToSend != 0 && AmountToSend == AmtUnsent) ||
|
|
(SendWin != 0 &&
|
|
(((SendTCB->tcb_flags & FIN_NEEDED) &&
|
|
AmtUnsent <= SendTCB->tcb_mss) ||
|
|
(SendTCB->tcb_flags & FORCE_OUTPUT) ||
|
|
AmountToSend >= (SendTCB->tcb_maxwin / 2)))) {
|
|
|
|
//
|
|
// It's OK to send something. Allocate a packet header.
|
|
//
|
|
// REVIEW: It was easier to code all these allocations directly
|
|
// REVIEW: rather than use IPv6AllocatePacket.
|
|
//
|
|
// REVIEW: This grabs packets and buffers from the IPv6PacketPool
|
|
// REVIEW: and the IPv6BufferPool respectively. Should we instead
|
|
// REVIEW: have separate pools for TCP?
|
|
//
|
|
NdisAllocatePacket(&NdisStatus, &Packet, IPv6PacketPool);
|
|
if (NdisStatus != NDIS_STATUS_SUCCESS) {
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
|
|
"TCPSend: couldn't allocate packet header!?!\n"));
|
|
goto error_oor;
|
|
}
|
|
|
|
// We'll fill in the CompletionData below.
|
|
InitializeNdisPacket(Packet);
|
|
PC(Packet)->CompletionHandler = TCPSendComplete;
|
|
|
|
//
|
|
// Our header buffer has extra space at the beginning for other
|
|
// headers to be prepended to ours without requiring further
|
|
// allocation calls. It also has extra space at the end to hold
|
|
// the send completion data.
|
|
//
|
|
LinkOffset = SendTCB->tcb_rce->NCE->IF->LinkHeaderSize;
|
|
HeaderLength =
|
|
(LinkOffset + sizeof(*IP) + sizeof(*TCP) +
|
|
sizeof(SendCmpltContext) +
|
|
__builtin_alignof(SendCmpltContext) - 1) &~
|
|
(UINT_PTR)(__builtin_alignof(SendCmpltContext) - 1);
|
|
Memory = ExAllocatePool(NonPagedPool, HeaderLength);
|
|
if (Memory == NULL) {
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
|
|
"TCPSend: couldn't allocate header memory!?!\n"));
|
|
NdisFreePacket(Packet);
|
|
goto error_oor;
|
|
}
|
|
|
|
//
|
|
// When allocating the NDIS buffer describing this memory region,
|
|
// we don't tell it about the extra space on the end that we
|
|
// allocated for the send completion data.
|
|
//
|
|
NdisAllocateBuffer(&NdisStatus, &FirstBuffer, IPv6BufferPool,
|
|
Memory, LinkOffset + sizeof(*IP) + sizeof(*TCP));
|
|
if (NdisStatus != NDIS_STATUS_SUCCESS) {
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
|
|
"TCPSend: couldn't allocate buffer!?!\n"));
|
|
ExFreePool(Memory);
|
|
NdisFreePacket(Packet);
|
|
goto error_oor;
|
|
}
|
|
|
|
//
|
|
// Skip over the extra space that will be filled in later by the
|
|
// link level. At this level we add the IPv6Header, the
|
|
// TCPHeader, and the data.
|
|
//
|
|
IP = (IPv6Header UNALIGNED *)((uchar *)Memory + LinkOffset);
|
|
IP->VersClassFlow = IP_VERSION;
|
|
IP->NextHeader = IP_PROTOCOL_TCP;
|
|
IP->HopLimit = TCPHopLimit(SendTCB);
|
|
IP->Source = SendTCB->tcb_saddr;
|
|
IP->Dest = SendTCB->tcb_daddr;
|
|
|
|
//
|
|
// Begin preparing the TCP header.
|
|
//
|
|
TCP = (TCPHeader UNALIGNED *)(IP + 1);
|
|
FillTCPHeader(SendTCB, TCP);
|
|
|
|
//
|
|
// Store the send completion data in the same buffer as the TCP
|
|
// header, right after the TCP header. This saves allocation
|
|
// overhead and works because we don't consider this area to be
|
|
// part of the packet data (we set this buffer's length to
|
|
// indicate that the data ends with the TCP header above).
|
|
//
|
|
// Note that this code relies on the fact that we don't include
|
|
// any TCP options (and thus don't have a variable length TCP
|
|
// header) in our data packets.
|
|
//
|
|
SCC = (SendCmpltContext *)((uchar *)Memory + HeaderLength -
|
|
sizeof(*SCC));
|
|
PC(Packet)->CompletionData = SCC;
|
|
#if DBG
|
|
SCC->scc_sig = scc_signature;
|
|
#endif
|
|
SCC->scc_ubufcount = 0;
|
|
SCC->scc_tbufcount = 0;
|
|
SCC->scc_count = 0;
|
|
|
|
AmountLeft = AmountToSend;
|
|
|
|
if (AmountToSend != 0) {
|
|
long Result;
|
|
|
|
//
|
|
// Loop through the sends on the TCB, building a frame.
|
|
//
|
|
CurrentBuffer = FirstBuffer;
|
|
CurSend = SendTCB->tcb_cursend;
|
|
CHECK_STRUCT(CurSend, tsr);
|
|
SCC->scc_firstsend = CurSend;
|
|
|
|
do {
|
|
ASSERT(CurSend->tsr_refcnt > 0);
|
|
Result = InterlockedIncrement(&(CurSend->tsr_refcnt));
|
|
|
|
ASSERT(Result > 0);
|
|
|
|
SCC->scc_count++;
|
|
//
|
|
// If the current send offset is 0 and the current
|
|
// send is less than or equal to what we have left
|
|
// to send, we haven't already put a transport
|
|
// buffer on this send, and nobody else is using
|
|
// the buffer chain directly, just use the input
|
|
// buffers. We check for other people using them
|
|
// by looking at tsr_lastbuf. If it's NULL,
|
|
// nobody else is using the buffers. If it's not
|
|
// NULL, somebody is.
|
|
//
|
|
if (SendTCB->tcb_sendofs == 0 &&
|
|
(SendTCB->tcb_sendsize <= AmountLeft) &&
|
|
(SCC->scc_tbufcount == 0) &&
|
|
CurSend->tsr_lastbuf == NULL) {
|
|
|
|
PNDIS_BUFFER LastBuf = SendTCB->tcb_sendbuf;
|
|
uint UBufLength = NdisBufferLength(LastBuf);
|
|
ushort UBufCount = 1;
|
|
|
|
while (NDIS_BUFFER_LINKAGE(LastBuf) != NULL) {
|
|
LastBuf = NDIS_BUFFER_LINKAGE(LastBuf);
|
|
UBufLength += NdisBufferLength(LastBuf);
|
|
UBufCount++;
|
|
}
|
|
|
|
if (SendTCB->tcb_sendsize == UBufLength) {
|
|
SCC->scc_ubufcount += UBufCount;
|
|
NDIS_BUFFER_LINKAGE(CurrentBuffer) =
|
|
SendTCB->tcb_sendbuf;
|
|
CurSend->tsr_lastbuf = CurrentBuffer = LastBuf;
|
|
AmountLeft -= SendTCB->tcb_sendsize;
|
|
SendTCB->tcb_sendsize = 0;
|
|
} else {
|
|
//
|
|
// Fall through with a non-zero tcb_sendsize.
|
|
//
|
|
ASSERT(SendTCB->tcb_sendsize != 0);
|
|
}
|
|
}
|
|
|
|
if (SendTCB->tcb_sendsize != 0) {
|
|
uint AmountToDup;
|
|
PNDIS_BUFFER NewBuf, Buf;
|
|
uint Offset;
|
|
NDIS_STATUS NStatus;
|
|
uchar *VirtualAddress;
|
|
uint Length;
|
|
|
|
//
|
|
// Either the current send has more data than
|
|
// we want to send, or the starting offset is
|
|
// not 0. In either case we'll need to loop
|
|
// through the current send, allocating
|
|
// buffers.
|
|
//
|
|
Buf = SendTCB->tcb_sendbuf;
|
|
Offset = SendTCB->tcb_sendofs;
|
|
|
|
do {
|
|
ASSERT(Buf != NULL);
|
|
|
|
NdisQueryBufferSafe(Buf, &VirtualAddress, &Length,
|
|
LowPagePriority);
|
|
if (VirtualAddress == NULL) {
|
|
//
|
|
// Couldn't map into kernel address space.
|
|
// If the packet is already partly built,
|
|
// send what we've got, otherwise error out.
|
|
//
|
|
goto error_oor2;
|
|
}
|
|
|
|
ASSERT((Offset < Length) ||
|
|
(Offset == 0 && Length == 0));
|
|
|
|
//
|
|
// Adjust the length for the offset into
|
|
// this buffer.
|
|
//
|
|
Length -= Offset;
|
|
|
|
AmountToDup = MIN(AmountLeft, Length);
|
|
|
|
NdisAllocateBuffer(&NStatus, &NewBuf,
|
|
IPv6BufferPool,
|
|
VirtualAddress + Offset,
|
|
AmountToDup);
|
|
if (NStatus == NDIS_STATUS_SUCCESS) {
|
|
SCC->scc_tbufcount++;
|
|
|
|
NDIS_BUFFER_LINKAGE(CurrentBuffer) = NewBuf;
|
|
|
|
CurrentBuffer = NewBuf;
|
|
if (AmountToDup >= Length) {
|
|
// Exhausted this buffer.
|
|
Buf = NDIS_BUFFER_LINKAGE(Buf);
|
|
Offset = 0;
|
|
} else {
|
|
Offset += AmountToDup;
|
|
ASSERT(Offset < NdisBufferLength(Buf));
|
|
}
|
|
|
|
SendTCB->tcb_sendsize -= AmountToDup;
|
|
AmountLeft -= AmountToDup;
|
|
} else {
|
|
//
|
|
// Couldn't allocate a buffer. If
|
|
// the packet is already partly built,
|
|
// send what we've got, otherwise
|
|
// error out.
|
|
//
|
|
error_oor2:
|
|
if (SCC->scc_tbufcount == 0 &&
|
|
SCC->scc_ubufcount == 0) {
|
|
NdisChainBufferAtFront(Packet, FirstBuffer);
|
|
TCPSendComplete(Packet, IP_GENERAL_FAILURE);
|
|
goto error_oor;
|
|
}
|
|
AmountToSend -= AmountLeft;
|
|
AmountLeft = 0;
|
|
break;
|
|
}
|
|
} while (AmountLeft && SendTCB->tcb_sendsize);
|
|
|
|
SendTCB->tcb_sendbuf = Buf;
|
|
SendTCB->tcb_sendofs = Offset;
|
|
}
|
|
|
|
if (CurSend->tsr_flags & TSR_FLAG_URG) {
|
|
ushort UP;
|
|
//
|
|
// This send is urgent data. We need to figure
|
|
// out what the urgent data pointer should be.
|
|
// We know sendnext is the starting sequence
|
|
// number of the frame, and that at the top of
|
|
// this do loop sendnext identified a byte in
|
|
// the CurSend at that time. We advanced CurSend
|
|
// at the same rate we've decremented
|
|
// AmountLeft (AmountToSend - AmountLeft ==
|
|
// AmountBuilt), so sendnext +
|
|
// (AmountToSend - AmountLeft) identifies a byte
|
|
// in the current value of CurSend, and that
|
|
// quantity plus tcb_sendsize is the sequence
|
|
// number one beyond the current send.
|
|
//
|
|
UP = (ushort)(AmountToSend - AmountLeft) +
|
|
(ushort)SendTCB->tcb_sendsize -
|
|
((SendTCB->tcb_flags & BSD_URGENT) ? 0 : 1);
|
|
|
|
TCP->tcp_urgent = net_short(UP);
|
|
TCP->tcp_flags |= TCP_FLAG_URG;
|
|
}
|
|
|
|
//
|
|
// See if we've exhausted this send. If we have,
|
|
// set the PUSH bit in this frame and move on to
|
|
// the next send. We also need to check the
|
|
// urgent data bit.
|
|
//
|
|
if (SendTCB->tcb_sendsize == 0) {
|
|
Queue *Next;
|
|
uchar PrevFlags;
|
|
|
|
//
|
|
// We've exhausted this send. Set the PUSH bit.
|
|
//
|
|
TCP->tcp_flags |= TCP_FLAG_PUSH;
|
|
PrevFlags = CurSend->tsr_flags;
|
|
Next = QNEXT(&CurSend->tsr_req.tr_q);
|
|
if (Next != QEND(&SendTCB->tcb_sendq)) {
|
|
CurSend = CONTAINING_RECORD(
|
|
QSTRUCT(TCPReq, Next, tr_q),
|
|
TCPSendReq, tsr_req);
|
|
CHECK_STRUCT(CurSend, tsr);
|
|
SendTCB->tcb_sendsize = CurSend->tsr_unasize;
|
|
SendTCB->tcb_sendofs = CurSend->tsr_offset;
|
|
SendTCB->tcb_sendbuf = CurSend->tsr_buffer;
|
|
SendTCB->tcb_cursend = CurSend;
|
|
|
|
//
|
|
// Check the urgent flags. We can't combine new
|
|
// urgent data on to the end of old non-urgent
|
|
// data.
|
|
//
|
|
if ((PrevFlags & TSR_FLAG_URG) &&
|
|
!(CurSend->tsr_flags & TSR_FLAG_URG))
|
|
break;
|
|
} else {
|
|
ASSERT(AmountLeft == 0);
|
|
SendTCB->tcb_cursend = NULL;
|
|
SendTCB->tcb_sendbuf = NULL;
|
|
}
|
|
}
|
|
} while (AmountLeft != 0);
|
|
|
|
} else {
|
|
//
|
|
// We're in the loop, but AmountToSend is 0. This
|
|
// should happen only when we're sending a FIN. Check
|
|
// this, and return if it's not true.
|
|
//
|
|
ASSERT(AmtUnsent == 0);
|
|
if (!(SendTCB->tcb_flags & FIN_NEEDED)) {
|
|
// KdBreakPoint();
|
|
ExFreePool(NdisBufferVirtualAddress(FirstBuffer));
|
|
NdisFreeBuffer(FirstBuffer);
|
|
NdisFreePacket(Packet);
|
|
break;
|
|
}
|
|
|
|
SCC->scc_firstsend = NULL; // REVIEW: looks unneccessary.
|
|
NDIS_BUFFER_LINKAGE(FirstBuffer) = NULL;
|
|
}
|
|
|
|
// Adjust for what we're really going to send.
|
|
AmountToSend -= AmountLeft;
|
|
|
|
//
|
|
// Update the sequence numbers, and start a RTT measurement
|
|
// if needed.
|
|
//
|
|
OldSeq = SendTCB->tcb_sendnext;
|
|
SendTCB->tcb_sendnext += AmountToSend;
|
|
|
|
if (!SEQ_EQ(OldSeq, SendTCB->tcb_sendmax)) {
|
|
//
|
|
// We have at least some retransmission. Bump the stat.
|
|
//
|
|
TStats.ts_retranssegs++;
|
|
}
|
|
|
|
if (SEQ_GT(SendTCB->tcb_sendnext, SendTCB->tcb_sendmax)) {
|
|
//
|
|
// We're sending at least some new data.
|
|
// We can't advance sendmax once FIN_SENT is set.
|
|
//
|
|
ASSERT(!(SendTCB->tcb_flags & FIN_SENT));
|
|
SendTCB->tcb_sendmax = SendTCB->tcb_sendnext;
|
|
TStats.ts_outsegs++;
|
|
|
|
//
|
|
// Check the Round-Trip Timer.
|
|
//
|
|
if (SendTCB->tcb_rtt == 0) {
|
|
// No RTT running, so start one.
|
|
SendTCB->tcb_rtt = TCPTime;
|
|
SendTCB->tcb_rttseq = OldSeq;
|
|
}
|
|
}
|
|
|
|
//
|
|
// We've built the frame entirely. If we've sent everything
|
|
// we have and there's a FIN pending, OR it in.
|
|
//
|
|
if (AmtUnsent == AmountToSend) {
|
|
if (SendTCB->tcb_flags & FIN_NEEDED) {
|
|
ASSERT(!(SendTCB->tcb_flags & FIN_SENT) ||
|
|
(SendTCB->tcb_sendnext ==
|
|
(SendTCB->tcb_sendmax - 1)));
|
|
//
|
|
// See if we still have room in the window for a FIN.
|
|
//
|
|
if (SendWin > (int) AmountToSend) {
|
|
TCP->tcp_flags |= TCP_FLAG_FIN;
|
|
SendTCB->tcb_sendnext++;
|
|
SendTCB->tcb_sendmax = SendTCB->tcb_sendnext;
|
|
SendTCB->tcb_flags |= (FIN_SENT | FIN_OUTSTANDING);
|
|
SendTCB->tcb_flags &= ~FIN_NEEDED;
|
|
}
|
|
}
|
|
}
|
|
|
|
AmountToSend += sizeof(TCPHeader);
|
|
|
|
if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer))
|
|
START_TCB_TIMER(SendTCB->tcb_rexmittimer, SendTCB->tcb_rexmit);
|
|
|
|
SendTCB->tcb_flags &= ~(NEED_ACK | ACK_DELAYED | FORCE_OUTPUT);
|
|
STOP_TCB_TIMER(SendTCB->tcb_delacktimer);
|
|
STOP_TCB_TIMER(SendTCB->tcb_swstimer);
|
|
SendTCB->tcb_alive = TCPTime;
|
|
|
|
// Add the buffers to the packet.
|
|
NdisChainBufferAtFront(Packet, FirstBuffer);
|
|
|
|
//
|
|
// Compute the TCP checksum. It covers the entire TCP segment
|
|
// starting with the TCP header, plus the IPv6 pseudo-header.
|
|
//
|
|
TCP->tcp_xsum = 0;
|
|
TCP->tcp_xsum = ChecksumPacket(
|
|
Packet, LinkOffset + sizeof *IP, NULL, AmountToSend,
|
|
AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
|
|
|
|
//
|
|
// Capture and reference the RCE while we still hold the TCB lock.
|
|
// The TCB's reference on this particular RCE might go away at any
|
|
// point after we release the lock.
|
|
//
|
|
RCE = SendTCB->tcb_rce;
|
|
AddRefRCE(RCE);
|
|
|
|
//
|
|
// Everything's ready. Now send the packet.
|
|
//
|
|
// Note that IPv6Send does not return a status code.
|
|
// Instead it *always* completes the packet
|
|
// with an appropriate status code.
|
|
//
|
|
KeReleaseSpinLock(&SendTCB->tcb_lock, PreLockIrql);
|
|
|
|
if (TCP->tcp_xsum == 0) {
|
|
//
|
|
// ChecksumPacket failed, so abort the transmission.
|
|
//
|
|
IPv6SendComplete(NULL, Packet, IP_NO_RESOURCES);
|
|
|
|
} else {
|
|
IPv6Send(Packet, LinkOffset, IP,
|
|
AmountToSend, RCE, 0,
|
|
IP_PROTOCOL_TCP,
|
|
net_short(TCP->tcp_src),
|
|
net_short(TCP->tcp_dest));
|
|
}
|
|
|
|
ReleaseRCE(RCE);
|
|
KeAcquireSpinLock(&SendTCB->tcb_lock, &PreLockIrql);
|
|
continue;
|
|
} else {
|
|
//
|
|
// We've decided we can't send anything now. Figure out why, and
|
|
// see if we need to set a timer.
|
|
//
|
|
if (SendTCB->tcb_sendwin == 0) {
|
|
if (!(SendTCB->tcb_flags & FLOW_CNTLD)) {
|
|
SendTCB->tcb_flags |= FLOW_CNTLD;
|
|
SendTCB->tcb_rexmitcnt = 0;
|
|
START_TCB_TIMER(SendTCB->tcb_rexmittimer,
|
|
SendTCB->tcb_rexmit);
|
|
SendTCB->tcb_slowcount++;
|
|
SendTCB->tcb_fastchk |= TCP_FLAG_SLOW;
|
|
} else
|
|
if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer))
|
|
START_TCB_TIMER(SendTCB->tcb_rexmittimer,
|
|
SendTCB->tcb_rexmit);
|
|
} else
|
|
if (AmountToSend != 0)
|
|
// We have something to send, but we're not sending
|
|
// it, presumably due to SWS avoidance.
|
|
if (!TCB_TIMER_RUNNING(SendTCB->tcb_swstimer))
|
|
START_TCB_TIMER(SendTCB->tcb_swstimer, SWS_TO);
|
|
|
|
break;
|
|
}
|
|
} // while (!FIN_OUTSTANDING)
|
|
|
|
//
|
|
// We're done sending, so we don't need the output flags set.
|
|
//
|
|
SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT | FORCE_OUTPUT |
|
|
SEND_AFTER_RCV);
|
|
bail:
|
|
DerefTCB(SendTCB, PreLockIrql);
|
|
return;
|
|
|
|
//
|
|
// Common case error handling code for out of resource conditions. Start the
|
|
// retransmit timer if it's not already running (so that we try this again
|
|
// later), clean up and return.
|
|
//
|
|
error_oor:
|
|
if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer))
|
|
START_TCB_TIMER(SendTCB->tcb_rexmittimer, SendTCB->tcb_rexmit);
|
|
|
|
// We had an out of resource problem, so clear the OUTPUT flags.
|
|
SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT | FORCE_OUTPUT);
|
|
DerefTCB(SendTCB, PreLockIrql);
|
|
return;
|
|
} // end of TCPSend()
|
|
|
|
|
|
//* ResetSendNextAndFastSend - Set the sendnext value of a TCB.
|
|
//
|
|
// Called to fast retransmit the dropped segment.
|
|
//
|
|
// We assume the caller has put a reference on the TCB, and the TCB is locked
|
|
// on entry. The reference is dropped and the lock released before returning.
|
|
//
|
|
void // Returns: Nothing.
|
|
ResetAndFastSend(
|
|
TCB *SeqTCB, // TCB for this connection.
|
|
SeqNum NewSeq, // Sequence number to set.
|
|
uint NewCWin) // New value for congestion window.
|
|
{
|
|
TCPSendReq *SendReq;
|
|
Queue *CurQ;
|
|
PNDIS_BUFFER Buffer;
|
|
uint Offset;
|
|
uint SendSize;
|
|
|
|
CHECK_STRUCT(SeqTCB, tcb);
|
|
ASSERT(SEQ_GTE(NewSeq, SeqTCB->tcb_senduna));
|
|
|
|
//
|
|
// The new seq must be less than send max, or NewSeq, senduna, sendnext,
|
|
// and sendmax must all be equal. (The latter case happens when we're
|
|
// called exiting TIME_WAIT, or possibly when we're retransmitting
|
|
// during a flow controlled situation).
|
|
//
|
|
ASSERT(SEQ_LT(NewSeq, SeqTCB->tcb_sendmax) ||
|
|
(SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendnext) &&
|
|
SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendmax) &&
|
|
SEQ_EQ(SeqTCB->tcb_senduna, NewSeq)));
|
|
|
|
if (SYNC_STATE(SeqTCB->tcb_state) &&
|
|
(SeqTCB->tcb_state != TCB_TIME_WAIT)) {
|
|
|
|
if (!EMPTYQ(&SeqTCB->tcb_sendq)) {
|
|
|
|
CurQ = QHEAD(&SeqTCB->tcb_sendq);
|
|
|
|
SendReq = (TCPSendReq *) CONTAINING_RECORD(CurQ, TCPReq, tr_q);
|
|
|
|
//
|
|
// SendReq points to the first send request on the send queue.
|
|
// We're pointing at the proper send req now. We need to go down.
|
|
//
|
|
// SendReq points to the cursend.
|
|
// SendSize point to sendsize in the cursend.
|
|
//
|
|
SendSize = SendReq->tsr_unasize;
|
|
|
|
Buffer = SendReq->tsr_buffer;
|
|
Offset = SendReq->tsr_offset;
|
|
|
|
// Call the fast retransmit send now.
|
|
TCPFastSend(SeqTCB, Buffer, Offset, SendReq, SendSize, NewSeq,
|
|
SeqTCB->tcb_mss);
|
|
} else {
|
|
ASSERT(SeqTCB->tcb_cursend == NULL);
|
|
}
|
|
}
|
|
SeqTCB->tcb_cwin = NewCWin;
|
|
DerefTCB(SeqTCB, DISPATCH_LEVEL);
|
|
return;
|
|
}
|
|
|
|
|
|
//* TCPFastSend - To send a segment without changing TCB state.
|
|
//
|
|
// Called to handle fast retransmit of the lost segment.
|
|
// tcb_lock will be held while entering (called by TCPRcv).
|
|
//
|
|
void // Returns: Nothing.
|
|
TCPFastSend(
|
|
TCB *SendTCB, // TCB for this connection.
|
|
PNDIS_BUFFER in_SendBuf, // NDIS buffer.
|
|
uint SendOfs, // Send offset.
|
|
TCPSendReq *CurSend, // Current send request.
|
|
uint SendSize, // Size of this send.
|
|
SeqNum SendNext, // Sequence number to use for this send.
|
|
int in_ToBeSent) // Cap on SendSize (REVIEW: Callee should cap).
|
|
{
|
|
uint AmountToSend; // Amount to send this time.
|
|
uint AmountLeft;
|
|
IPv6Header UNALIGNED *IP;
|
|
TCPHeader UNALIGNED *TCP;
|
|
PNDIS_PACKET Packet;
|
|
PNDIS_BUFFER FirstBuffer, CurrentBuffer;
|
|
void *Memory;
|
|
SendCmpltContext *SCC;
|
|
NDIS_STATUS NdisStatus;
|
|
uint AmtOutstanding, AmtUnsent;
|
|
uint HeaderLength;
|
|
uint LinkOffset;
|
|
uint PMTU;
|
|
KIRQL PreLockIrql;
|
|
PNDIS_BUFFER SendBuf = in_SendBuf;
|
|
RouteCacheEntry *RCE;
|
|
|
|
PreLockIrql = DISPATCH_LEVEL;
|
|
|
|
CHECK_STRUCT(SendTCB, tcb);
|
|
ASSERT(SendTCB->tcb_refcnt != 0);
|
|
|
|
ASSERT(*(int *)&SendTCB->tcb_sendwin >= 0);
|
|
ASSERT(*(int *)&SendTCB->tcb_cwin >= SendTCB->tcb_mss);
|
|
|
|
ASSERT(!(SendTCB->tcb_flags & FIN_OUTSTANDING) ||
|
|
(SendTCB->tcb_sendnext == SendTCB->tcb_sendmax));
|
|
|
|
//
|
|
// In most cases, we will already have a route at this point.
|
|
// However, if we failed to get one earlier in the passive receive
|
|
// path, we may need to retry here.
|
|
//
|
|
if (SendTCB->tcb_rce == NULL) {
|
|
InitRCE(SendTCB);
|
|
if (SendTCB->tcb_rce == NULL) {
|
|
DerefTCB(SendTCB, PreLockIrql);
|
|
return;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Validate that the address we're sourcing from and the route we're
|
|
// sending upon are still okay to use.
|
|
//
|
|
// We fail existing send requests for TCBs with a disconnected
|
|
// outgoing interface, except when a loopback route is used.
|
|
//
|
|
if (SendTCB->tcb_routing != RouteCacheValidationCounter) {
|
|
if (!ValidateSourceAndRoute(SendTCB) ||
|
|
IsDisconnectedAndNotLoopbackRCE(SendTCB->tcb_rce)) {
|
|
|
|
ASSERT(SendTCB->tcb_refcnt != 0);
|
|
TryToCloseTCB(SendTCB, TCB_CLOSE_ABORTED, PreLockIrql);
|
|
KeAcquireSpinLock(&SendTCB->tcb_lock, &PreLockIrql);
|
|
DerefTCB(SendTCB, PreLockIrql);
|
|
return;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Verify that our cached Path MTU is still valid.
|
|
// Watch for changes to IPsec policies since they can also effect our MSS.
|
|
// REVIEW: This the best spot to do this?
|
|
//
|
|
PMTU = GetEffectivePathMTUFromRCE(SendTCB->tcb_rce);
|
|
if (PMTU != SendTCB->tcb_pmtu ||
|
|
SecurityStateValidationCounter != SendTCB->tcb_security) {
|
|
//
|
|
// Either our Path MTU or the global security state has changed.
|
|
// Cache current values and then calculate a new MSS.
|
|
//
|
|
SendTCB->tcb_pmtu = PMTU;
|
|
SendTCB->tcb_security = SecurityStateValidationCounter;
|
|
CalculateMSSForTCB(SendTCB);
|
|
}
|
|
|
|
AmtOutstanding = (uint)(SendTCB->tcb_sendnext - SendTCB->tcb_senduna);
|
|
AmtUnsent = MIN(MIN(in_ToBeSent, (int)SendSize),
|
|
(int)SendTCB->tcb_sendwin);
|
|
|
|
while (AmtUnsent > 0) {
|
|
|
|
if (SEQ_GT(SendTCB->tcb_senduna, SendNext)) {
|
|
//
|
|
// Since tcb_lock is released in this loop
|
|
// it is possible that delayed ack acked
|
|
// what we are trying to retransmit.
|
|
//
|
|
goto error_oor;
|
|
}
|
|
|
|
// AmtUnsent below was minimum of sendwin and amtunsent
|
|
AmountToSend = MIN(AmtUnsent, SendTCB->tcb_mss);
|
|
|
|
ASSERT((int)AmtUnsent >= 0);
|
|
|
|
//
|
|
// We're going to send something. Allocate a packet header.
|
|
//
|
|
// REVIEW: It was easier to code all these allocations directly
|
|
// REVIEW: rather than use IPv6AllocatePacket.
|
|
//
|
|
// REVIEW: This grabs packets and buffers from the IPv6PacketPool
|
|
// REVIEW: and the IPv6BufferPool respectively. Should we instead
|
|
// REVIEW: have separate pools for TCP?
|
|
//
|
|
NdisAllocatePacket(&NdisStatus, &Packet, IPv6PacketPool);
|
|
if (NdisStatus != NDIS_STATUS_SUCCESS) {
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
|
|
"TCPSend: couldn't allocate packet header!?!\n"));
|
|
goto error_oor;
|
|
}
|
|
|
|
// We'll fill in the CompletionData below.
|
|
InitializeNdisPacket(Packet);
|
|
PC(Packet)->CompletionHandler = TCPSendComplete;
|
|
|
|
//
|
|
// Our header buffer has extra space at the beginning for other
|
|
// headers to be prepended to ours without requiring further
|
|
// allocation calls. It also has extra space at the end to hold
|
|
// the send completion data.
|
|
//
|
|
LinkOffset = SendTCB->tcb_rce->NCE->IF->LinkHeaderSize;
|
|
HeaderLength = (LinkOffset + sizeof(*IP) + sizeof(*TCP) +
|
|
sizeof(SendCmpltContext) +
|
|
__builtin_alignof(SendCmpltContext) - 1) &~
|
|
(UINT_PTR)(__builtin_alignof(SendCmpltContext) - 1);
|
|
Memory = ExAllocatePool(NonPagedPool, HeaderLength);
|
|
if (Memory == NULL) {
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
|
|
"TCPSend: couldn't allocate header memory!?!\n"));
|
|
NdisFreePacket(Packet);
|
|
goto error_oor;
|
|
}
|
|
|
|
//
|
|
// When allocating the NDIS buffer describing this memory region,
|
|
// we don't tell it about the extra space on the end that we
|
|
// allocated for the send completion data.
|
|
//
|
|
NdisAllocateBuffer(&NdisStatus, &FirstBuffer, IPv6BufferPool,
|
|
Memory, LinkOffset + sizeof(*IP) + sizeof(*TCP));
|
|
if (NdisStatus != NDIS_STATUS_SUCCESS) {
|
|
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
|
|
"TCPSend: couldn't allocate buffer!?!\n"));
|
|
ExFreePool(Memory);
|
|
NdisFreePacket(Packet);
|
|
goto error_oor;
|
|
}
|
|
|
|
//
|
|
// Skip over the extra space that will be filled in later by the
|
|
// link level. At this level we add the IPv6Header, the
|
|
// TCPHeader, and the data.
|
|
//
|
|
IP = (IPv6Header UNALIGNED *)((uchar *)Memory + LinkOffset);
|
|
IP->VersClassFlow = IP_VERSION;
|
|
IP->NextHeader = IP_PROTOCOL_TCP;
|
|
IP->HopLimit = TCPHopLimit(SendTCB);
|
|
IP->Source = SendTCB->tcb_saddr;
|
|
IP->Dest = SendTCB->tcb_daddr;
|
|
|
|
//
|
|
// Begin preparing the TCP header.
|
|
//
|
|
TCP = (TCPHeader UNALIGNED *)(IP + 1);
|
|
FillTCPHeader(SendTCB, TCP);
|
|
TCP->tcp_seq = net_long(SendNext);
|
|
|
|
//
|
|
// Store the send completion data in the same buffer as the TCP
|
|
// header, right after the TCP header. This saves allocation
|
|
// overhead and works because we don't consider this area to be
|
|
// part of the packet data (we set this buffer's length to
|
|
// indicate that the data ends with the TCP header above).
|
|
//
|
|
// Note that this code relies on the fact that we don't include
|
|
// any TCP options (and thus don't have a variable length TCP
|
|
// header) in our data packets.
|
|
//
|
|
SCC = (SendCmpltContext *)((uchar *)Memory + HeaderLength -
|
|
sizeof(*SCC));
|
|
PC(Packet)->CompletionData = SCC;
|
|
#if DBG
|
|
SCC->scc_sig = scc_signature;
|
|
#endif
|
|
SCC->scc_ubufcount = 0;
|
|
SCC->scc_tbufcount = 0;
|
|
SCC->scc_count = 0;
|
|
|
|
AmountLeft = AmountToSend;
|
|
|
|
if (AmountToSend != 0) {
|
|
long Result;
|
|
|
|
//
|
|
// Loop through the sends on the TCB, building a frame.
|
|
//
|
|
CurrentBuffer = FirstBuffer;
|
|
CHECK_STRUCT(CurSend, tsr);
|
|
SCC->scc_firstsend = CurSend;
|
|
|
|
do {
|
|
ASSERT(CurSend->tsr_refcnt > 0);
|
|
Result = InterlockedIncrement(&(CurSend->tsr_refcnt));
|
|
|
|
ASSERT(Result > 0);
|
|
|
|
SCC->scc_count++;
|
|
|
|
//
|
|
// If the current send offset is 0 and the current
|
|
// send is less than or equal to what we have left
|
|
// to send, we haven't already put a transport
|
|
// buffer on this send, and nobody else is using
|
|
// the buffer chain directly, just use the input
|
|
// buffers. We check for other people using them
|
|
// by looking at tsr_lastbuf. If it's NULL,
|
|
// nobody else is using the buffers. If it's not
|
|
// NULL, somebody is.
|
|
//
|
|
if (SendOfs == 0 &&
|
|
(SendSize <= AmountLeft) &&
|
|
(SCC->scc_tbufcount == 0) &&
|
|
CurSend->tsr_lastbuf == NULL) {
|
|
|
|
PNDIS_BUFFER LastBuf = SendBuf;
|
|
uint UBufLength = NdisBufferLength(LastBuf);
|
|
ushort UBufCount = 1;
|
|
|
|
while (NDIS_BUFFER_LINKAGE(LastBuf) != NULL) {
|
|
LastBuf = NDIS_BUFFER_LINKAGE(LastBuf);
|
|
UBufLength += NdisBufferLength(LastBuf);
|
|
UBufCount++;
|
|
}
|
|
|
|
if (SendSize == UBufLength) {
|
|
SCC->scc_ubufcount += UBufCount;
|
|
NDIS_BUFFER_LINKAGE(CurrentBuffer) = SendBuf;
|
|
CurSend->tsr_lastbuf = CurrentBuffer = LastBuf;
|
|
AmountLeft -= SendSize;
|
|
SendSize = 0;
|
|
} else {
|
|
//
|
|
// Fall through with a non-zero SendSize.
|
|
//
|
|
ASSERT(SendSize != 0);
|
|
}
|
|
}
|
|
|
|
if (SendSize != 0) {
|
|
uint AmountToDup;
|
|
PNDIS_BUFFER NewBuf, Buf;
|
|
uint Offset;
|
|
NDIS_STATUS NStatus;
|
|
uchar *VirtualAddress;
|
|
uint Length;
|
|
|
|
//
|
|
// Either the current send has more data than
|
|
// we want to send, or the starting offset is
|
|
// not 0. In either case we'll need to loop
|
|
// through the current send, allocating buffers.
|
|
//
|
|
Buf = SendBuf;
|
|
Offset = SendOfs;
|
|
|
|
do {
|
|
ASSERT(Buf != NULL);
|
|
|
|
NdisQueryBufferSafe(Buf, &VirtualAddress, &Length,
|
|
LowPagePriority);
|
|
|
|
if (VirtualAddress == NULL) {
|
|
goto error_oor2;
|
|
}
|
|
|
|
ASSERT((Offset < Length) ||
|
|
(Offset == 0 && Length == 0));
|
|
|
|
//
|
|
// Adjust the length for the offset into
|
|
// this buffer.
|
|
//
|
|
Length -= Offset;
|
|
|
|
AmountToDup = MIN(AmountLeft, Length);
|
|
|
|
NdisAllocateBuffer(&NStatus, &NewBuf,
|
|
IPv6BufferPool,
|
|
VirtualAddress + Offset,
|
|
AmountToDup);
|
|
|
|
if (NStatus == NDIS_STATUS_SUCCESS) {
|
|
SCC->scc_tbufcount++;
|
|
|
|
NDIS_BUFFER_LINKAGE(CurrentBuffer) = NewBuf;
|
|
|
|
CurrentBuffer = NewBuf;
|
|
if (AmountToDup >= Length) {
|
|
// Exhausted this buffer.
|
|
Buf = NDIS_BUFFER_LINKAGE(Buf);
|
|
Offset = 0;
|
|
} else {
|
|
Offset += AmountToDup;
|
|
ASSERT(Offset < NdisBufferLength(Buf));
|
|
}
|
|
|
|
SendSize -= AmountToDup;
|
|
AmountLeft -= AmountToDup;
|
|
} else {
|
|
//
|
|
// Couldn't allocate a buffer. If
|
|
// the packet is already partly built,
|
|
// send what we've got, otherwise
|
|
// error out.
|
|
//
|
|
error_oor2:
|
|
if (SCC->scc_tbufcount == 0 &&
|
|
SCC->scc_ubufcount == 0) {
|
|
KeReleaseSpinLockFromDpcLevel(
|
|
&SendTCB->tcb_lock);
|
|
NdisChainBufferAtFront(Packet, FirstBuffer);
|
|
TCPSendComplete(Packet, IP_GENERAL_FAILURE);
|
|
KeAcquireSpinLockAtDpcLevel(&SendTCB->tcb_lock);
|
|
goto error_oor;
|
|
}
|
|
AmountToSend -= AmountLeft;
|
|
AmountLeft = 0;
|
|
break;
|
|
}
|
|
} while (AmountLeft && SendSize);
|
|
|
|
SendBuf = Buf;
|
|
SendOfs = Offset;
|
|
}
|
|
|
|
if (CurSend->tsr_flags & TSR_FLAG_URG) {
|
|
ushort UP;
|
|
//
|
|
// This send is urgent data. We need to figure
|
|
// out what the urgent data pointer should be.
|
|
// We know sendnext is the starting sequence
|
|
// number of the frame, and that at the top of
|
|
// this do loop sendnext identified a byte in
|
|
// the CurSend at that time. We advanced CurSend
|
|
// at the same rate we've decremented
|
|
// AmountLeft (AmountToSend - AmountLeft ==
|
|
// AmountBuilt), so sendnext +
|
|
// (AmountToSend - AmountLeft) identifies a byte
|
|
// in the current value of CurSend, and that
|
|
// quantity plus tcb_sendsize is the sequence
|
|
// number one beyond the current send.
|
|
//
|
|
UP = (ushort) (AmountToSend - AmountLeft) +
|
|
(ushort) SendSize -
|
|
((SendTCB->tcb_flags & BSD_URGENT) ? 0 : 1);
|
|
|
|
TCP->tcp_urgent = net_short(UP);
|
|
TCP->tcp_flags |= TCP_FLAG_URG;
|
|
}
|
|
|
|
//
|
|
// See if we've exhausted this send. If we have,
|
|
// set the PUSH bit in this frame and move on to
|
|
// the next send. We also need to check the
|
|
// urgent data bit.
|
|
//
|
|
if (SendSize == 0) {
|
|
Queue *Next;
|
|
ulong PrevFlags;
|
|
|
|
//
|
|
// We've exhausted this send. Set the PUSH bit.
|
|
//
|
|
TCP->tcp_flags |= TCP_FLAG_PUSH;
|
|
PrevFlags = CurSend->tsr_flags;
|
|
Next = QNEXT(&CurSend->tsr_req.tr_q);
|
|
if (Next != QEND(&SendTCB->tcb_sendq)) {
|
|
CurSend = CONTAINING_RECORD(
|
|
QSTRUCT(TCPReq, Next, tr_q),
|
|
TCPSendReq, tsr_req);
|
|
CHECK_STRUCT(CurSend, tsr);
|
|
SendSize = CurSend->tsr_unasize;
|
|
SendOfs = CurSend->tsr_offset;
|
|
SendBuf = CurSend->tsr_buffer;
|
|
|
|
//
|
|
// Check the urgent flags. We can't combine new
|
|
// urgent data on to the end of old non-urgent
|
|
// data.
|
|
//
|
|
if ((PrevFlags & TSR_FLAG_URG) &&
|
|
!(CurSend->tsr_flags & TSR_FLAG_URG)) {
|
|
break;
|
|
}
|
|
} else {
|
|
ASSERT(AmountLeft == 0);
|
|
CurSend = NULL;
|
|
SendBuf = NULL;
|
|
}
|
|
}
|
|
} while (AmountLeft != 0);
|
|
|
|
} else {
|
|
//
|
|
// Amt to send is 0.
|
|
// Just bail out and start timer.
|
|
//
|
|
if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer)) {
|
|
START_TCB_TIMER(SendTCB->tcb_rexmittimer,
|
|
SendTCB->tcb_rexmit);
|
|
}
|
|
|
|
ExFreePool(NdisBufferVirtualAddress(FirstBuffer));
|
|
NdisFreeBuffer(FirstBuffer);
|
|
NdisFreePacket(Packet);
|
|
return;
|
|
}
|
|
|
|
//
|
|
// Adjust for what we're really going to send.
|
|
//
|
|
AmountToSend -= AmountLeft;
|
|
|
|
SendNext += AmountToSend;
|
|
AmtUnsent -= AmountToSend;
|
|
|
|
TStats.ts_retranssegs++;
|
|
|
|
AmountToSend += sizeof(TCPHeader);
|
|
|
|
if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer)) {
|
|
START_TCB_TIMER(SendTCB->tcb_rexmittimer, SendTCB->tcb_rexmit);
|
|
}
|
|
|
|
SendTCB->tcb_flags &= ~(NEED_ACK | ACK_DELAYED | FORCE_OUTPUT);
|
|
STOP_TCB_TIMER(SendTCB->tcb_delacktimer);
|
|
STOP_TCB_TIMER(SendTCB->tcb_swstimer);
|
|
|
|
//
|
|
// Add the buffers to the packet.
|
|
//
|
|
NdisChainBufferAtFront(Packet, FirstBuffer);
|
|
|
|
//
|
|
// Compute the TCP checksum. It covers the entire TCP segment
|
|
// starting with the TCP header, plus the IPv6 pseudo-header.
|
|
//
|
|
TCP->tcp_xsum = 0;
|
|
TCP->tcp_xsum = ChecksumPacket(
|
|
Packet, LinkOffset + sizeof *IP, NULL, AmountToSend,
|
|
AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
|
|
|
|
//
|
|
// Capture and reference the RCE while we still hold the TCB lock.
|
|
// The TCB's reference on this particular RCE might go away at any
|
|
// point after we release the lock.
|
|
//
|
|
RCE = SendTCB->tcb_rce;
|
|
AddRefRCE(RCE);
|
|
|
|
//
|
|
// Everything's ready. Now send the packet.
|
|
//
|
|
// Note that IPv6Send does not return a status code.
|
|
// Instead it *always* completes the packet
|
|
// with an appropriate status code.
|
|
//
|
|
KeReleaseSpinLock(&SendTCB->tcb_lock, PreLockIrql);
|
|
|
|
if (TCP->tcp_xsum == 0) {
|
|
//
|
|
// ChecksumPacket failed, so abort the transmission.
|
|
//
|
|
IPv6SendComplete(NULL, Packet, IP_NO_RESOURCES);
|
|
|
|
} else {
|
|
IPv6Send(Packet, LinkOffset, IP,
|
|
AmountToSend, RCE, 0,
|
|
IP_PROTOCOL_TCP,
|
|
net_short(TCP->tcp_src),
|
|
net_short(TCP->tcp_dest));
|
|
}
|
|
|
|
//
|
|
// Release reference and reacquire lock we dropped before sending.
|
|
//
|
|
ReleaseRCE(RCE);
|
|
KeAcquireSpinLock(&SendTCB->tcb_lock, &PreLockIrql);
|
|
}
|
|
|
|
return;
|
|
|
|
//
|
|
// Common case error handling code for out of resource conditions.
|
|
// Start the retransmit timer if it's not already running
|
|
// (so that we try this again later), clean up and return.
|
|
//
|
|
error_oor:
|
|
if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer)) {
|
|
START_TCB_TIMER(SendTCB->tcb_rexmittimer, SendTCB->tcb_rexmit);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
//* TDISend - Send data on a connection.
|
|
//
|
|
// The main TDI send entry point. We take the input parameters, validate
|
|
// them, allocate a send request, etc. We then put the send request on the
|
|
// queue. If we have no other sends on the queue or Nagling is disabled we'll
|
|
// call TCPSend to send the data.
|
|
//
|
|
TDI_STATUS // Returns: Status of attempt to send.
|
|
TdiSend(
|
|
PTDI_REQUEST Request, // TDI request for the call.
|
|
ushort Flags, // Flags for this send.
|
|
uint SendLength, // Length in bytes of send.
|
|
PNDIS_BUFFER SendBuffer) // Buffer chain to be sent.
|
|
{
|
|
TCPConn *Conn;
|
|
TCB *SendTCB;
|
|
TCPSendReq *SendReq;
|
|
KIRQL OldIrql;
|
|
TDI_STATUS Error;
|
|
uint EmptyQ;
|
|
|
|
#if DBG
|
|
uint RealSendSize;
|
|
PNDIS_BUFFER Temp;
|
|
|
|
//
|
|
// Loop through the buffer chain, and make sure that the length matches
|
|
// up with SendLength.
|
|
//
|
|
Temp = SendBuffer;
|
|
RealSendSize = 0;
|
|
do {
|
|
ASSERT(Temp != NULL);
|
|
|
|
RealSendSize += NdisBufferLength(Temp);
|
|
Temp = NDIS_BUFFER_LINKAGE(Temp);
|
|
} while (Temp != NULL);
|
|
|
|
ASSERT(RealSendSize == SendLength);
|
|
#endif
|
|
|
|
//
|
|
// Grab lock on Connection Table. Then get our connection info from
|
|
// the TDI request, and our TCP control block from that.
|
|
//
|
|
Conn = GetConnFromConnID(PtrToUlong(Request->Handle.ConnectionContext),
|
|
&OldIrql);
|
|
if (Conn == NULL) {
|
|
Error = TDI_INVALID_CONNECTION;
|
|
goto abort;
|
|
}
|
|
CHECK_STRUCT(Conn, tc);
|
|
|
|
SendTCB = Conn->tc_tcb;
|
|
if (SendTCB == NULL) {
|
|
Error = TDI_INVALID_STATE;
|
|
KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, OldIrql);
|
|
abort:
|
|
return Error;
|
|
}
|
|
CHECK_STRUCT(SendTCB, tcb);
|
|
|
|
//
|
|
// Switch to a finer-grained lock:
|
|
// Drop lock on the Connection Table in favor of one on our TCB.
|
|
//
|
|
KeAcquireSpinLockAtDpcLevel(&SendTCB->tcb_lock);
|
|
KeReleaseSpinLockFromDpcLevel(&Conn->tc_ConnBlock->cb_lock);
|
|
|
|
//
|
|
// Make sure our TCB is in a send-able state.
|
|
//
|
|
if (!DATA_SEND_STATE(SendTCB->tcb_state) || CLOSING(SendTCB)) {
|
|
Error = TDI_INVALID_STATE;
|
|
goto abort2;
|
|
}
|
|
|
|
CheckTCBSends(SendTCB); // Just a debug check.
|
|
|
|
//
|
|
// If we've released our RCE for some reason, reacquire one.
|
|
//
|
|
if (SendTCB->tcb_rce == NULL) {
|
|
InitRCE(SendTCB);
|
|
if (SendTCB->tcb_rce == NULL) {
|
|
Error = TDI_DEST_NET_UNREACH;
|
|
goto abort2;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Verify that the cached RCE is still valid.
|
|
//
|
|
SendTCB->tcb_rce = ValidateRCE(SendTCB->tcb_rce, SendTCB->tcb_nte);
|
|
ASSERT(SendTCB->tcb_rce != NULL);
|
|
if (IsDisconnectedAndNotLoopbackRCE(SendTCB->tcb_rce)) {
|
|
//
|
|
// Fail new send requests for TCBs with a disconnected
|
|
// outgoing interface, except when the loopback route is used.
|
|
//
|
|
Error = TDI_DEST_NET_UNREACH;
|
|
goto abort2;
|
|
}
|
|
|
|
if (SendLength == 0) {
|
|
//
|
|
// Wow, nothing to do!
|
|
//
|
|
// REVIEW: Can't we do this check earlier (like before we even grab the
|
|
// REVIEW: Connection Table lock? The only reason I can think not to
|
|
// REVIEW: would be if something cared about the return code if a bad
|
|
// REVIEW: Tdi Request was given to us.
|
|
//
|
|
Error = TDI_SUCCESS;
|
|
goto abort2;
|
|
}
|
|
|
|
//
|
|
// We have a TCB, and it's valid. Allocate a send request now.
|
|
//
|
|
SendReq = GetSendReq();
|
|
if (SendReq == NULL) {
|
|
Error = TDI_NO_RESOURCES;
|
|
abort2:
|
|
KeReleaseSpinLock(&SendTCB->tcb_lock, OldIrql);
|
|
return Error;
|
|
}
|
|
|
|
//
|
|
// Prepare a TCP send request based on the TDI request and the
|
|
// passed in buffer chain.
|
|
//
|
|
SendReq->tsr_req.tr_rtn = Request->RequestNotifyObject;
|
|
SendReq->tsr_req.tr_context = Request->RequestContext;
|
|
SendReq->tsr_buffer = SendBuffer;
|
|
SendReq->tsr_size = SendLength;
|
|
SendReq->tsr_unasize = SendLength;
|
|
SendReq->tsr_refcnt = 1; // ACK will decrement this ref
|
|
SendReq->tsr_offset = 0;
|
|
SendReq->tsr_lastbuf = NULL;
|
|
SendReq->tsr_time = TCPTime;
|
|
SendReq->tsr_flags = (Flags & TDI_SEND_EXPEDITED) ? TSR_FLAG_URG : 0;
|
|
|
|
//
|
|
// Check current status of our send queue.
|
|
//
|
|
EmptyQ = EMPTYQ(&SendTCB->tcb_sendq);
|
|
|
|
//
|
|
// Add this send request to our send queue.
|
|
//
|
|
SendTCB->tcb_unacked += SendLength;
|
|
ENQUEUE(&SendTCB->tcb_sendq, &SendReq->tsr_req.tr_q);
|
|
if (SendTCB->tcb_cursend == NULL) {
|
|
//
|
|
// No existing current send request, so make this new one
|
|
// the current send.
|
|
//
|
|
// REVIEW: Is this always equivalent to EMPTYQ test above?
|
|
// REVIEW: If so, why not just set EmptyQ flag here and save a test?
|
|
//
|
|
SendTCB->tcb_cursend = SendReq;
|
|
SendTCB->tcb_sendbuf = SendBuffer;
|
|
SendTCB->tcb_sendofs = 0;
|
|
SendTCB->tcb_sendsize = SendLength;
|
|
}
|
|
|
|
//
|
|
// See if we should try to send now. We attempt to do so if we weren't
|
|
// already blocked, or if we were and either the Nagle Algorithm is turned
|
|
// off or we now have at least one max segment worth of data to send.
|
|
//
|
|
if (EmptyQ || (!(SendTCB->tcb_flags & NAGLING) ||
|
|
(SendTCB->tcb_unacked -
|
|
(SendTCB->tcb_sendmax - SendTCB->tcb_senduna))
|
|
>= SendTCB->tcb_mss)) {
|
|
SendTCB->tcb_refcnt++;
|
|
TCPSend(SendTCB, OldIrql);
|
|
} else
|
|
KeReleaseSpinLock(&SendTCB->tcb_lock, OldIrql);
|
|
|
|
//
|
|
// When TCPSend returns, we may or may not have already sent the data
|
|
// associated with this particular request.
|
|
//
|
|
return TDI_PENDING;
|
|
}
|
|
|
|
|
|
#pragma BEGIN_INIT
|
|
|
|
//* InitTCPSend - Initialize our send side.
|
|
//
|
|
// Called during init time to initialize our TCP send state.
|
|
//
|
|
int // Returns: TRUE if we inited, false if we didn't.
|
|
InitTCPSend(
|
|
void) // Nothing.
|
|
{
|
|
ExInitializeSListHead(&TCPSendReqFree);
|
|
KeInitializeSpinLock(&TCPSendReqFreeLock);
|
|
|
|
IPv6RegisterULProtocol(IP_PROTOCOL_TCP, TCPReceive, TCPControlReceive);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
#pragma END_INIT
|
|
|
|
//* UnloadTCPSend
|
|
//
|
|
// Cleanup and prepare for stack unload.
|
|
//
|
|
void
|
|
UnloadTCPSend(void)
|
|
{
|
|
PSLIST_ENTRY BufferLink;
|
|
|
|
while ((BufferLink = ExInterlockedPopEntrySList(&TCPSendReqFree,
|
|
&TCPSendReqFreeLock))
|
|
!= NULL) {
|
|
Queue *QueuePtr = CONTAINING_RECORD(BufferLink, Queue, q_next);
|
|
TCPReq *Req = CONTAINING_RECORD(QueuePtr, TCPReq, tr_q);
|
|
TCPSendReq *SendReq = CONTAINING_RECORD(Req, TCPSendReq, tsr_req);
|
|
|
|
CHECK_STRUCT(SendReq, tsr);
|
|
ExFreePool(SendReq);
|
|
}
|
|
|
|
IPv6RegisterULProtocol(IP_PROTOCOL_TCP, NULL, NULL);
|
|
}
|