|
|
// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil -*- (for GNU Emacs)
//
// Copyright (c) 1985-2000 Microsoft Corporation
//
// This file is part of the Microsoft Research IPv6 Network Protocol Stack.
// You should have received a copy of the Microsoft End-User License Agreement
// for this software along with this release; see the file "license.txt".
// If not, please see http://www.research.microsoft.com/msripv6/license.htm,
// or write to Microsoft Research, One Microsoft Way, Redmond, WA 98052-6399.
//
// Abstract:
//
// Code for TCP connection management.
//
// This file contains the code handling TCP connection related requests,
// such as connecting and disconnecting.
//
#include "oscfg.h"
#include "ndis.h"
#include "ip6imp.h"
#include "ip6def.h"
#include "tdi.h"
#include "tdint.h"
#include "tdistat.h"
#include "queue.h"
#include "transprt.h"
#include "addr.h"
#include "tcp.h"
#include "tcb.h"
#include "tcpconn.h"
#include "tcpsend.h"
#include "tcprcv.h"
#include "tcpdeliv.h"
#include "info.h"
#include "tcpcfg.h"
#include "route.h"
#include "security.h"
#include "tcpmd5.h"
#include "md5.h"
#include "crypto\rc4.h"
SLIST_HEADER ConnReqFree; // Connection request free list.
//
// ISN globals.
//
#define ISN_KEY_SIZE 256 // 2048 bits.
#define ISN_DEF_RAND_STORE_SIZE 256
#define ISN_MIN_RAND_STORE_SIZE 1
#define ISN_MAX_RAND_STORE_SIZE 16384
typedef struct _ISN_RAND_STORE { MD5_CONTEXT Md5Context; ulong iBuf; ushort* pBuf; } ISN_RAND_STORE, *PISN_RAND_STORE;
RC4_KEYSTRUCT ISNRC4Key; PISN_RAND_STORE ISNStore; uint ISNStoreSize = ISN_DEF_RAND_STORE_SIZE; uint ISNStoreMask; SeqNum ISNMonotonicPortion = 0; int ISNCredits; int ISNLastIsnUpdateTime; int ISNMaxCredits;
extern PDRIVER_OBJECT TCPDriverObject;
KSPIN_LOCK ConnReqFreeLock; // Lock to protect conn req free list.
uint NumConnReq; // Current number of ConnReqs.
uint MaxConnReq = 0xffffffff; // Maximum allowed number of ConnReqs.
uint ConnPerBlock = MAX_CONN_PER_BLOCK; uint NextConnBlock = 0; // Cached index of next unfilled block.
uint MaxAllocatedConnBlocks = 0; // Current number of blocks in the
// ConnTable.
TCPConnBlock **ConnTable = NULL; // The current connection table.
KSPIN_LOCK ConnTableLock; extern KSPIN_LOCK AddrObjTableLock; extern KSPIN_LOCK TCBTableLock;
extern void RemoveConnFromAO(AddrObj *AO, TCPConn *Conn);
//
// All of the init code can be discarded.
//
#ifdef ALLOC_PRAGMA
int InitTCPConn(void); int InitISNGenerator(void); void UnloadISNGenerator(void); int GetRandBits(); uint GetDeltaTime();
#pragma alloc_text(INIT, InitTCPConn)
#pragma alloc_text(INIT, InitISNGenerator)
#pragma alloc_text(PAGE, UnloadISNGenerator)
#endif // ALLOC_PRAGMA
void CompleteConnReq(TCB *CmpltTCB, TDI_STATUS Status);
//* UnloadISNGenerator - Unload the support for the ISN generator.
//
// Called when we are unloading the driver.
//
void // Returns: Nothing.
UnloadISNGenerator(void) { CCHAR i; ASSERT(ISNStore);
for (i = 0; i < KeNumberProcessors; i++) { if (ISNStore[i].pBuf != NULL) { ExFreePool(ISNStore[i].pBuf); ISNStore[i].pBuf = NULL; } } ExFreePool(ISNStore); ISNStore = NULL; }
//* InitISNGenerator - Initialize the support for the ISN generator.
//
// Called when the driver is loaded. Get 2048 bits of randomness and
// use them to create an RC4 key.
//
int //Returns: TRUE if successful.
InitISNGenerator(void) { ULONG cBits = 0; ULONG i; ULONG cProcs = KeNumberProcessors; ULONG ISNRandomValue; unsigned char pBuf[ISN_KEY_SIZE];
//
// Start with the credits that would last for 1 tick.
//
ISNMaxCredits = ISNCredits = MAX_ISN_INCREMENTABLE_CONNECTIONS_PER_100MS; ISNLastIsnUpdateTime = (int)X100NSTOMS(KeQueryInterruptTime());
if (!GetSystemRandomBits(pBuf, ISN_KEY_SIZE)) { return FALSE; }
//
// Generate the key control structure.
//
rc4_key(&ISNRC4Key, ISN_KEY_SIZE, pBuf);
//
// Initalialize the current sequence number to a random value.
//
rc4(&ISNRC4Key, sizeof(SeqNum), (uchar*)&ISNMonotonicPortion);
//
// Obtain a random value to be used along with the invariants to compute
// the MD5 hash.
//
rc4(&ISNRC4Key, sizeof(ISNRandomValue), (uchar*)&ISNRandomValue);
//
// Round down the store size to power of 2. Verify in range.
//
while ((ISNStoreSize = ISNStoreSize >> 1) != 0) { cBits++; }
ISNStoreSize = 1 << cBits;
if (ISNStoreSize < ISN_MIN_RAND_STORE_SIZE || ISNStoreSize > ISN_MAX_RAND_STORE_SIZE) { ISNStoreSize = ISN_DEF_RAND_STORE_SIZE; }
//
// The mask is store size - 1.
//
ISNStoreMask = ISNStoreSize - 1;
//
// Initialize the random ISN store. One array/index per processor.
//
ISNStore = ExAllocatePool(NonPagedPool, cProcs * sizeof(ISN_RAND_STORE));
if (ISNStore == NULL) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_RARE, "Tcpip: failed to allocate ISN rand store\n")); return FALSE; } RtlZeroMemory(ISNStore, sizeof(ISN_RAND_STORE) * cProcs);
for (i = 0; i < cProcs; i++) { ISNStore[i].pBuf = ExAllocatePool(NonPagedPool, sizeof(ushort) * ISNStoreSize);
if (ISNStore[i].pBuf == NULL) { goto error1; } rc4(&ISNRC4Key, sizeof(ushort) * ISNStoreSize, (uchar*)ISNStore[i].pBuf);
//
// Initialize structures required to call the MD5 transform.
//
MD5InitializeData(&ISNStore[i].Md5Context, ISNRandomValue); }
return TRUE;
error1:
UnloadISNGenerator(); return FALSE; }
//* GetRandomISN - Gets a random Initial Sequence Number.
//
// Called when an Initial Sequence Number (ISN) is needed. Calls crypto
// functions for random number generation.
//
void // Returns: Nothing.
GetRandomISN( SeqNum *Seq, // Returned sequence number
uchar *TcbInvariants) // Connection invariants
{ ulong randbits; ulong iProc; PMD5_CONTEXT Md5Context;
//
// Raise IRQL to DISPATCH so that we don't get swapped out while accessing
// the processor specific array. Check to see if already at DISPATCH
// before doing the work.
//
ASSERT(KeGetCurrentIrql() >= DISPATCH_LEVEL);
iProc = KeGetCurrentProcessorNumber();
//
// Add the random number only if the number of connections that can
// increment the sequence number within this time period is non zero.
// [Note: This could make the ISNCredits less than 0, but it is not a
// problem].
//
if ((ISNCredits > 0) && (InterlockedDecrement((PLONG)&ISNCredits) > 0)) { randbits = GetRandBits();
//
// We want to add between 16K and 32K of random, so adjust. There are
// 15 bits of randomness, just ensure that the high order bit is set
// and we have >= 16K and <= (32K-1)::14bits of randomness.
//
randbits &= 0x7FFF; randbits |= 0x4000;
} else { int Delta = GetDeltaTime();
if (Delta > 0) { randbits = GetRandBits();
//
// We can add anywhere from 256 to 512 per ms.
//
randbits &= 0x1FF; randbits |= 0x100;
randbits *= Delta; } else { randbits = 0; } }
//
// Update global CurISN. InterlockedExchangeAdd returns initial value
// (not the added value).
//
*Seq = InterlockedExchangeAdd((PLONG)&ISNMonotonicPortion, randbits);
//
// Move the invariants from the connection.
//
Md5Context = &ISNStore[iProc].Md5Context; MD5InitializeScratch(Md5Context); RtlCopyMemory(Md5Context->Data, TcbInvariants, TCP_MD5_DATA_LENGTH); TransformMD5(Md5Context->Scratch, Md5Context->Data);
//
// Add the Invariant hash to the sequence number.
//
*Seq += (ULONG)(Md5Context->Scratch[0]); return; }
//* GetRandBits
//
// Returns 16 random bits from the random number array generated using RC4.
// When the store is exhausted, it will be replenished.
//
int // Returns: 16 bits of random data.
GetRandBits() { ulong iStore; int randbits; ulong iProc = KeGetCurrentProcessorNumber();
//
// Get index into the random store. Mask performs mod operation.
//
iStore = ++ISNStore[iProc].iBuf & ISNStoreMask; ASSERT(iStore < ISNStoreSize);
randbits = ISNStore[iProc].pBuf[iStore];
if (iStore == 0) { rc4(&ISNRC4Key, sizeof(ushort) * ISNStoreSize, (uchar*) ISNStore[iProc].pBuf); }
return randbits; }
//* GetRandBits
//
// Tracks the time-based updates of ISN. It will return the time elapsed since
// the last time this function was called. This would be used by the caller to
// increment the ISN by an appropriate amount. Note that the maximum value
// is function returns is 200 MS.
//
uint // Returns: Delta time in milli-seconds.
GetDeltaTime() { //
// If the time has changed since the ISN was updated last time, it
// can be incremented now.
//
int PreviousUpdateTime, Delta; int CurrentUpdateTime = (int)X100NSTOMS(KeQueryInterruptTime());
PreviousUpdateTime = InterlockedExchange((PLONG)&ISNLastIsnUpdateTime, CurrentUpdateTime);
Delta = CurrentUpdateTime - PreviousUpdateTime;
if (Delta > 0) { return MIN(Delta, 200); } else { return 0; } }
//
// Routines for handling conn refcount going to 0.
//
//* DummyDone - Called when nothing to do.
//
// Called with TCPConnBlock.cb_lock held.
//
void // Returns: Nothing.
DummyDone(TCPConn *Conn, // Connection going to 0.
KIRQL PreLockIrql) // IRQL prior to TCPConnBlock.cb_lock acquisition.
{ KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, PreLockIrql); }
//* DummyCmplt - Dummy close completion routine.
void DummyCmplt(PVOID Dummy1, uint Dummy2, uint Dummy3) { UNREFERENCED_PARAMETER(Dummy1); UNREFERENCED_PARAMETER(Dummy2); UNREFERENCED_PARAMETER(Dummy3); }
//* CloseDone - Called when we need to complete a close.
//
// Called with TCPConnBlock.cb_lock held.
//
void // Returns: Nothing.
CloseDone(TCPConn *Conn, // Connection going to 0.
KIRQL Irql0) // IRQL prior to TCPConnBlock.cb_lock acquisition.
{ RequestCompleteRoutine Rtn; // Completion routine.
PVOID Context; // User context for completion routine.
AddrObj *AO; KIRQL Irql1, Irql2;
ASSERT(Conn->tc_flags & CONN_CLOSING);
Rtn = Conn->tc_rtn; Context = Conn->tc_rtncontext; KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0);
KeAcquireSpinLock(&AddrObjTableLock, &Irql0); KeAcquireSpinLock(&Conn->tc_ConnBlock->cb_lock, &Irql1);
if ((AO = Conn->tc_ao) != NULL) {
CHECK_STRUCT(AO, ao);
// It's associated.
KeAcquireSpinLock(&AO->ao_lock, &Irql2); RemoveConnFromAO(AO, Conn); // We've pulled him from the AO, we can free the lock now.
KeReleaseSpinLock(&AO->ao_lock, Irql2); }
KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql1); KeReleaseSpinLock(&AddrObjTableLock, Irql0);
ExFreePool(Conn);
(*Rtn)(Context, TDI_SUCCESS, 0); }
//* DisassocDone - Called when we need to complete a disassociate.
//
// Called with TCPConnBlock.cb_lock held.
//
void // Returns: Nothing.
DisassocDone(TCPConn *Conn, // Connection going to 0.
KIRQL Irql0) // IRQL prior to TCPConnBlock.cb_lock acquisition.
{ RequestCompleteRoutine Rtn; // Completion routine.
PVOID Context; // User context for completion routine.
AddrObj *AO; uint NeedClose = FALSE; KIRQL Irql1, Irql2;
ASSERT(Conn->tc_flags & CONN_DISACC); ASSERT(!(Conn->tc_flags & CONN_CLOSING)); ASSERT(Conn->tc_refcnt == 0);
Rtn = Conn->tc_rtn; Context = Conn->tc_rtncontext; Conn->tc_refcnt = 1; KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0);
KeAcquireSpinLock(&AddrObjTableLock, &Irql0); KeAcquireSpinLock(&Conn->tc_ConnBlock->cb_lock, &Irql1); if (!(Conn->tc_flags & CONN_CLOSING)) {
AO = Conn->tc_ao; if (AO != NULL) { KeAcquireSpinLock(&AO->ao_lock, &Irql2); RemoveConnFromAO(AO, Conn); KeReleaseSpinLock(&AO->ao_lock, Irql2); }
ASSERT(Conn->tc_refcnt == 1); Conn->tc_flags &= ~CONN_DISACC; } else NeedClose = TRUE;
Conn->tc_refcnt = 0; KeReleaseSpinLock(&AddrObjTableLock, Irql1);
if (NeedClose) { CloseDone(Conn, Irql0); } else { KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0); (*Rtn)(Context, TDI_SUCCESS, 0); } }
//* FreeConnReq - Free a connection request structure.
//
// Called to free a connection request structure.
//
void // Returns: Nothing.
FreeConnReq( TCPConnReq *FreedReq) // Connection request structure to be freed.
{ PSLIST_ENTRY BufferLink;
CHECK_STRUCT(FreedReq, tcr);
BufferLink = CONTAINING_RECORD(&(FreedReq->tcr_req.tr_q.q_next), SLIST_ENTRY, Next);
ExInterlockedPushEntrySList(&ConnReqFree, BufferLink, &ConnReqFreeLock); }
//* GetConnReq - Get a connection request structure.
//
// Called to get a connection request structure.
//
TCPConnReq * // Returns: Pointer to ConnReq structure, or NULL if none.
GetConnReq(void) // Nothing.
{ TCPConnReq *Temp; PSLIST_ENTRY BufferLink; Queue *QueuePtr; TCPReq *ReqPtr;
BufferLink = ExInterlockedPopEntrySList(&ConnReqFree, &ConnReqFreeLock);
if (BufferLink != NULL) { QueuePtr = CONTAINING_RECORD(BufferLink, Queue, q_next); ReqPtr = CONTAINING_RECORD(QueuePtr, TCPReq, tr_q); Temp = CONTAINING_RECORD(ReqPtr, TCPConnReq, tcr_req); CHECK_STRUCT(Temp, tcr); } else { if (NumConnReq < MaxConnReq) Temp = ExAllocatePool(NonPagedPool, sizeof(TCPConnReq)); else Temp = NULL;
if (Temp != NULL) { ExInterlockedAddUlong((PULONG)&NumConnReq, 1, &ConnReqFreeLock); #if DBG
Temp->tcr_req.tr_sig = tr_signature; Temp->tcr_sig = tcr_signature; #endif
} }
return Temp; }
//* GetConnFromConnID - Get a Connection from a connection ID.
//
// Called to obtain a Connection pointer from a ConnID. We don't actually
// check the connection pointer here, but we do bounds check the input ConnID
// and make sure the instance fields match.
// If successful, returns with TCPConnBlock.cb_lock held.
//
TCPConn * // Returns: Pointer to the TCPConn, or NULL.
GetConnFromConnID( uint ConnID, // Connection ID to find a pointer for.
KIRQL* Irql) // Receives IRQL prior to TCPConnBlock.cb_lock acquisition.
{ uint ConnIndex = CONN_INDEX(ConnID); uint ConnBlockId = CONN_BLOCKID(ConnID); TCPConn *MatchingConn = NULL; TCPConnBlock *ConnBlock;
if (ConnIndex < MAX_CONN_PER_BLOCK && ConnBlockId < MaxAllocatedConnBlocks) {
ConnBlock = ConnTable[ConnBlockId]; if (ConnBlock) { MatchingConn = ConnBlock->cb_conn[ConnIndex]; } if (MatchingConn != NULL) { KeAcquireSpinLock(&ConnBlock->cb_lock, Irql); //
// Revalidate under lock that the conn is still in conn table.
//
MatchingConn = ConnBlock->cb_conn[ConnIndex]; if (MatchingConn != NULL) { CHECK_STRUCT(MatchingConn, tc); if (MatchingConn->tc_inst != CONN_INST(ConnID)) { MatchingConn = NULL; KeReleaseSpinLock(&ConnBlock->cb_lock, *Irql); } } else { KeReleaseSpinLock(&ConnBlock->cb_lock, *Irql); } } } else MatchingConn = NULL;
return MatchingConn; }
//* GetConnID - Get a ConnTable slot.
//
// Called during OpenConnection to find a free slot in the ConnTable and
// set it up with a connection.
// If successful, returns with TCPConnBlock.cb_lock held.
//
uint // Returns: A ConnId to use.
GetConnID( TCPConn *NewConn, // Connection to enter into slot.
KIRQL *Irql0) // Receives IRQL prior to TCPConnBlock.cb_lock
// acquisition.
{ uint CurrConnID = NewConn->tc_connid; uint i, j, BlockID, ConnIndex;
//
// If NewConn contains a valid ConnID and that location is unoccupied,
// reuse it.
//
if (CurrConnID != INVALID_CONN_ID && !NewConn->tc_ConnBlock->cb_conn[CONN_INDEX(CurrConnID)]) { KeAcquireSpinLock(&NewConn->tc_ConnBlock->cb_lock, Irql0); //
// Reconfirm under lock that the location is unoccupied and, if so,
// claim it.
//
if (!NewConn->tc_ConnBlock->cb_conn[CONN_INDEX(CurrConnID)]) { NewConn->tc_ConnBlock->cb_conn[CONN_INDEX(CurrConnID)] = NewConn; NewConn->tc_ConnBlock->cb_freecons--; NewConn->tc_inst = NewConn->tc_ConnBlock->cb_conninst++; NewConn->tc_connid = MAKE_CONN_ID(CONN_INDEX(CurrConnID), NewConn->tc_ConnBlock->cb_blockid, NewConn->tc_inst); return NewConn->tc_connid; } KeReleaseSpinLock(&NewConn->tc_ConnBlock->cb_lock, *Irql0); }
//
// NewConn's last spot is taken; search from the block from which
// a ConnID was claimed most recently.
//
if (MaxAllocatedConnBlocks) { //
// Capture the global counters without acquiring the lock.
//
uint TempMaxAllocatedConnBlocks = MaxAllocatedConnBlocks; uint TempNextConnBlock = NextConnBlock;
for (i = 0; i < TempMaxAllocatedConnBlocks; i++) { BlockID = (TempNextConnBlock + i) % TempMaxAllocatedConnBlocks;
if (!ConnTable[BlockID] || !ConnTable[BlockID]->cb_freecons) { continue; }
//
// Reconfirm under lock that the TCPConnBlock has free slots.
//
KeAcquireSpinLock(&ConnTable[BlockID]->cb_lock, Irql0); if (!ConnTable[BlockID]->cb_freecons) { KeReleaseSpinLock(&ConnTable[BlockID]->cb_lock, *Irql0); continue; } for (j = 0; j < MAX_CONN_PER_BLOCK; j++) { ConnIndex = (ConnTable[BlockID]->cb_nextfree + j) % MAX_CONN_PER_BLOCK; if (ConnTable[BlockID]->cb_conn[ConnIndex]) { continue; }
//
// Found the free slot; fill it in.
//
ConnTable[BlockID]->cb_conn[ConnIndex] = NewConn; ConnTable[BlockID]->cb_nextfree = ConnIndex + 1; ConnTable[BlockID]->cb_freecons--; if (!ConnTable[BlockID]->cb_freecons) { InterlockedCompareExchange((PLONG)&NextConnBlock, TempNextConnBlock, TempNextConnBlock + 1); } NewConn->tc_ConnBlock = ConnTable[BlockID]; NewConn->tc_inst = ConnTable[BlockID]->cb_conninst++; NewConn->tc_connid = MAKE_CONN_ID(ConnIndex, BlockID, NewConn->tc_inst); return NewConn->tc_connid; } KeReleaseSpinLock(&ConnTable[BlockID]->cb_lock, *Irql0); } }
//
// The entire table is occupied; if we have room to grow,
// allocate a new block.
//
KeAcquireSpinLock(&ConnTableLock, Irql0); if (MaxAllocatedConnBlocks < MaxConnBlocks) { TCPConnBlock* ConnBlock; BlockID = MaxAllocatedConnBlocks; ConnBlock = ExAllocatePool(NonPagedPool, sizeof(TCPConnBlock)); if (ConnBlock) { RtlZeroMemory(ConnBlock, sizeof(TCPConnBlock)); KeInitializeSpinLock(&ConnBlock->cb_lock);
KeAcquireSpinLockAtDpcLevel(&ConnBlock->cb_lock);
ConnBlock->cb_blockid = BlockID; ConnBlock->cb_freecons = MAX_CONN_PER_BLOCK - 1; ConnBlock->cb_nextfree = 1; ConnBlock->cb_conninst = 2; ConnBlock->cb_conn[0] = NewConn;
NewConn->tc_ConnBlock = ConnBlock; NewConn->tc_inst = 1; NewConn->tc_connid = MAKE_CONN_ID(0, BlockID, NewConn->tc_inst);
ConnTable[BlockID] = ConnBlock; InterlockedIncrement((PLONG)&MaxAllocatedConnBlocks);
KeReleaseSpinLockFromDpcLevel(&ConnTableLock);
return NewConn->tc_connid; } }
KeReleaseSpinLock(&ConnTableLock, *Irql0); return INVALID_CONN_ID; }
//* FreeConnID - Free a ConnTable slot.
//
// Called when we're done with a ConnID. We assume the caller holds the lock
// on the TCPConnBlock when we are called.
//
void // Returns: Nothing.
FreeConnID( TCPConn *Conn) // Conn to be freed.
{ uint ConnIndex = CONN_INDEX(Conn->tc_connid); // Index into conn table.
uint BlockID = CONN_BLOCKID(Conn->tc_connid); TCPConnBlock* ConnBlock = Conn->tc_ConnBlock;
ASSERT(ConnIndex < MAX_CONN_PER_BLOCK); ASSERT(BlockID < MaxAllocatedConnBlocks); ASSERT(ConnBlock->cb_conn[ConnIndex] != NULL);
if (ConnBlock->cb_conn[ConnIndex]) { ConnBlock->cb_conn[ConnIndex] = NULL; ConnBlock->cb_freecons++; ConnBlock->cb_nextfree = ConnIndex; ASSERT(ConnBlock->cb_freecons <= MAX_CONN_PER_BLOCK); } else { ABORT(); } }
//* MapIPError - Map an IP error to a TDI error.
//
// Called to map an input IP error code to a TDI error code. If we can't,
// we return the provided default.
//
TDI_STATUS // Returns: Mapped TDI error.
MapIPError( IP_STATUS IPError, // Error code to be mapped.
TDI_STATUS Default) // Default error code to return.
{ switch (IPError) {
case IP_DEST_NO_ROUTE: return TDI_DEST_NET_UNREACH; case IP_DEST_ADDR_UNREACHABLE: return TDI_DEST_HOST_UNREACH; case IP_UNRECOGNIZED_NEXT_HEADER: return TDI_DEST_PROT_UNREACH; case IP_DEST_PORT_UNREACHABLE: return TDI_DEST_PORT_UNREACH; default: return Default; } }
//* FinishRemoveTCBFromConn - Finish removing a TCB from a conn structure.
//
// Called when we have the locks we need and we just want to pull the
// TCB off the connection.
//
void // Returns: Nothing.
FinishRemoveTCBFromConn( TCB *RemovedTCB) // TCB to be removed.
{ TCPConn *Conn; AddrObj *AO; KIRQL Irql; TCPConnBlock *ConnBlock = NULL;
if (((Conn = RemovedTCB->tcb_conn) != NULL) && (Conn->tc_tcb == RemovedTCB)) { CHECK_STRUCT(Conn, tc); ConnBlock = Conn->tc_ConnBlock;
KeAcquireSpinLock(&ConnBlock->cb_lock, &Irql);
AO = Conn->tc_ao;
if (AO != NULL) { KeAcquireSpinLockAtDpcLevel(&AO->ao_lock); if (AO_VALID(AO)) { KeAcquireSpinLockAtDpcLevel(&RemovedTCB->tcb_lock);
// Need to double check this is still correct.
if (Conn == RemovedTCB->tcb_conn) { // Everything still looks good.
REMOVEQ(&Conn->tc_q); PUSHQ(&AO->ao_idleq, &Conn->tc_q); } else Conn = RemovedTCB->tcb_conn; } else { KeAcquireSpinLockAtDpcLevel(&RemovedTCB->tcb_lock); Conn = RemovedTCB->tcb_conn; }
KeReleaseSpinLockFromDpcLevel(&AO->ao_lock); } else { KeAcquireSpinLockAtDpcLevel(&RemovedTCB->tcb_lock); Conn = RemovedTCB->tcb_conn; }
if (Conn != NULL) { if (Conn->tc_tcb == RemovedTCB) Conn->tc_tcb = NULL; else ASSERT(Conn->tc_tcb == NULL); }
KeReleaseSpinLockFromDpcLevel(&RemovedTCB->tcb_lock); KeReleaseSpinLock(&ConnBlock->cb_lock, Irql); } }
//* RemoveTCBFromConn - Remove a TCB from a Conn structure.
//
// Called when we need to disassociate a TCB from a connection structure.
// All we do is get the appropriate locks and call FinishRemoveTCBFromConn.
//
void // Returns: Nothing.
RemoveTCBFromConn( TCB *RemovedTCB) // TCB to be removed.
{ CHECK_STRUCT(RemovedTCB, tcb);
FinishRemoveTCBFromConn(RemovedTCB); }
//* RemoveConnFromTCB - Remove a conn from a TCB.
//
// Called when we want to break the final association between a connection
// and a TCB.
//
void // Returns: Nothing.
RemoveConnFromTCB( TCB *RemoveTCB) // TCB to be removed.
{ ConnDoneRtn DoneRtn = NULL; KIRQL Irql = 0; TCPConn *Conn;
if ((Conn = RemoveTCB->tcb_conn) != NULL) { KeAcquireSpinLock(&Conn->tc_ConnBlock->cb_lock, &Irql); KeAcquireSpinLockAtDpcLevel(&RemoveTCB->tcb_lock);
CHECK_STRUCT(Conn, tc);
if (--(Conn->tc_refcnt) == 0) DoneRtn = Conn->tc_donertn;
RemoveTCB->tcb_conn = NULL; KeReleaseSpinLockFromDpcLevel(&RemoveTCB->tcb_lock); }
if (DoneRtn != NULL) (*DoneRtn)(Conn, Irql); else if (Conn) { KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql); } }
//* CloseTCB - Close a TCB.
//
// Called when we are done with a TCB, and want to free it. We'll remove
// him from any tables that he's in, and destroy any outstanding requests.
//
void // Returns: Nothing.
CloseTCB( TCB *ClosedTCB, // TCB to be closed.
KIRQL OldIrql) // IRQL prior to acquiring TCB lock.
{ uchar OrigState = ClosedTCB->tcb_state; TDI_STATUS Status; uint OKToFree;
CHECK_STRUCT(ClosedTCB, tcb); ASSERT(ClosedTCB->tcb_refcnt == 0); ASSERT(ClosedTCB->tcb_state != TCB_CLOSED); ASSERT(ClosedTCB->tcb_pending & DEL_PENDING);
//
// We'll check to make sure that our state isn't CLOSED. This should never
// happen, since nobody should call TryToCloseTCB when the state is
// closed, or take the reference count if we're closing. Nevertheless,
// we'll double check as a safety measure.
//
if (ClosedTCB->tcb_state == TCB_CLOSED) { KeReleaseSpinLock(&ClosedTCB->tcb_lock, OldIrql); return; }
//
// Update SNMP counters. If we're in SYN-SENT or SYN-RCVD, this is a
// failed connection attempt. If we're in ESTABLISED or CLOSE-WAIT,
// treat this as an 'Established Reset' event.
//
if (ClosedTCB->tcb_state == TCB_SYN_SENT || ClosedTCB->tcb_state == TCB_SYN_RCVD) TStats.ts_attemptfails++; else if (ClosedTCB->tcb_state == TCB_ESTAB || ClosedTCB->tcb_state == TCB_CLOSE_WAIT) { TStats.ts_estabresets++; InterlockedDecrement((PLONG)&TStats.ts_currestab); ASSERT(*(int *)&TStats.ts_currestab >= 0); }
ClosedTCB->tcb_state = TCB_CLOSED; KeReleaseSpinLockFromDpcLevel(&ClosedTCB->tcb_lock);
//
// Remove the TCB from it's associated TCPConn structure, if it has one.
//
FinishRemoveTCBFromConn(ClosedTCB);
KeAcquireSpinLockAtDpcLevel(&TCBTableLock); KeAcquireSpinLockAtDpcLevel(&ClosedTCB->tcb_lock);
OKToFree = RemoveTCB(ClosedTCB);
//
// He's been pulled from the appropriate places so nobody can find him.
// Free the locks, and proceed to destroy any requests, etc.
//
KeReleaseSpinLockFromDpcLevel(&ClosedTCB->tcb_lock); KeReleaseSpinLock(&TCBTableLock, OldIrql);
if ((SYNC_STATE(OrigState) || OrigState == TCB_SYN_RCVD) && !GRACEFUL_CLOSED_STATE(OrigState)) { if (ClosedTCB->tcb_flags & NEED_RST) SendRSTFromTCB(ClosedTCB); }
//
// Release our references on our NTE and RCE.
// We won't be sending anymore on this TCB.
//
if (ClosedTCB->tcb_nte != NULL) ReleaseNTE(ClosedTCB->tcb_nte); if (ClosedTCB->tcb_rce != NULL) ReleaseRCE(ClosedTCB->tcb_rce);
if (ClosedTCB->tcb_closereason & TCB_CLOSE_RST) Status = TDI_CONNECTION_RESET; else if (ClosedTCB->tcb_closereason & TCB_CLOSE_ABORTED) Status = TDI_CONNECTION_ABORTED; else if (ClosedTCB->tcb_closereason & TCB_CLOSE_TIMEOUT) Status = MapIPError(ClosedTCB->tcb_error, TDI_TIMED_OUT); else if (ClosedTCB->tcb_closereason & TCB_CLOSE_REFUSED) Status = TDI_CONN_REFUSED; else if (ClosedTCB->tcb_closereason & TCB_CLOSE_UNREACH) Status = MapIPError(ClosedTCB->tcb_error, TDI_DEST_UNREACHABLE); else Status = TDI_SUCCESS;
//
// Now complete any outstanding requests on the TCB.
//
if (ClosedTCB->tcb_abortreq != NULL) { TCPAbortReq* AbortReq = ClosedTCB->tcb_abortreq;
(*AbortReq->tar_rtn)(AbortReq->tar_context, TDI_SUCCESS, 0); }
if (ClosedTCB->tcb_connreq != NULL) { TCPConnReq *ConnReq = ClosedTCB->tcb_connreq;
CHECK_STRUCT(ConnReq, tcr);
(*ConnReq->tcr_req.tr_rtn)(ConnReq->tcr_req.tr_context, Status, 0); FreeConnReq(ConnReq); }
if (ClosedTCB->tcb_discwait != NULL) { TCPConnReq *ConnReq = ClosedTCB->tcb_discwait;
CHECK_STRUCT(ConnReq, tcr);
(*ConnReq->tcr_req.tr_rtn)(ConnReq->tcr_req.tr_context, Status, 0); FreeConnReq(ConnReq); }
while (!EMPTYQ(&ClosedTCB->tcb_sendq)) { TCPReq *Req; TCPSendReq *SendReq; long Result;
DEQUEUE(&ClosedTCB->tcb_sendq, Req, TCPReq, tr_q);
CHECK_STRUCT(Req, tr); SendReq = (TCPSendReq *)Req; CHECK_STRUCT(SendReq, tsr);
//
// Set the status before dropping the ref count.
//
SendReq->tsr_req.tr_status = Status;
//
// Decrement the initial reference put on the buffer when it was
// allocated. This reference would have been decremented if the
// send had been acknowledged, but then the send would not still
// be on the tcb_sendq.
//
Result = InterlockedDecrement(&(SendReq->tsr_refcnt));
ASSERT(Result >= 0);
if (Result <= 0) { // If we've sent directly from this send, NULL out the next
// pointer for the last buffer in the chain.
if (SendReq->tsr_lastbuf != NULL) { NDIS_BUFFER_LINKAGE(SendReq->tsr_lastbuf) = NULL; SendReq->tsr_lastbuf = NULL; }
(*Req->tr_rtn)(Req->tr_context, Status, 0); FreeSendReq(SendReq); } }
while (ClosedTCB->tcb_rcvhead != NULL) { TCPRcvReq *RcvReq;
RcvReq = ClosedTCB->tcb_rcvhead; CHECK_STRUCT(RcvReq, trr); ClosedTCB->tcb_rcvhead = RcvReq->trr_next; (*RcvReq->trr_rtn)(RcvReq->trr_context, Status, 0); FreeRcvReq(RcvReq); }
while (ClosedTCB->tcb_exprcv != NULL) { TCPRcvReq *RcvReq;
RcvReq = ClosedTCB->tcb_exprcv; CHECK_STRUCT(RcvReq, trr); ClosedTCB->tcb_exprcv = RcvReq->trr_next; (*RcvReq->trr_rtn)(RcvReq->trr_context, Status, 0); FreeRcvReq(RcvReq); }
if (ClosedTCB->tcb_pendhead != NULL) FreePacketChain(ClosedTCB->tcb_pendhead);
if (ClosedTCB->tcb_urgpending != NULL) FreePacketChain(ClosedTCB->tcb_urgpending);
while (ClosedTCB->tcb_raq != NULL) { TCPRAHdr *Hdr;
Hdr = ClosedTCB->tcb_raq; CHECK_STRUCT(Hdr, trh); ClosedTCB->tcb_raq = Hdr->trh_next; if (Hdr->trh_buffer != NULL) FreePacketChain(Hdr->trh_buffer);
ExFreePool(Hdr); }
RemoveConnFromTCB(ClosedTCB);
if (OKToFree) { FreeTCB(ClosedTCB); } else { KeAcquireSpinLock(&TCBTableLock, &OldIrql); ClosedTCB->tcb_walkcount--; if (ClosedTCB->tcb_walkcount == 0) { FreeTCB(ClosedTCB); } KeReleaseSpinLock(&TCBTableLock, OldIrql); } }
//* TryToCloseTCB - Try to close a TCB.
//
// Called when we need to close a TCB, but don't know if we can.
// If the reference count is 0, we'll call CloseTCB to deal with it.
// Otherwise we'll set the DELETE_PENDING bit and deal with it when the
// ref. count goes to 0. We assume the TCB is locked when we are called.
//
void // Returns: Nothing.
TryToCloseTCB ( TCB *ClosedTCB, // TCB to be closed.
uchar Reason, // Reason we're closing.
KIRQL PreLockIrql) // IRQL prior to acquiring the TCB lock.
{ CHECK_STRUCT(ClosedTCB, tcb); ASSERT(ClosedTCB->tcb_state != TCB_CLOSED);
ClosedTCB->tcb_closereason |= Reason;
if (ClosedTCB->tcb_pending & DEL_PENDING) { KeReleaseSpinLock(&ClosedTCB->tcb_lock, PreLockIrql); return; }
ClosedTCB->tcb_pending |= DEL_PENDING; ClosedTCB->tcb_slowcount++; ClosedTCB->tcb_fastchk |= TCP_FLAG_SLOW;
if (ClosedTCB->tcb_refcnt == 0) CloseTCB(ClosedTCB, PreLockIrql); else { KeReleaseSpinLock(&ClosedTCB->tcb_lock, PreLockIrql); } }
//* DerefTCB - Dereference a TCB.
//
// Called when we're done with a TCB, and want to let exclusive user
// have a shot. We dec. the refcount, and if it goes to zero and there
// are pending actions, we'll perform one of the pending actions.
//
void // Returns: Nothing.
DerefTCB( TCB *DoneTCB, // TCB to be dereffed.
KIRQL PreLockIrql) // IRQL prior to acquiring the TCB lock.
{
ASSERT(DoneTCB->tcb_refcnt != 0); if (--DoneTCB->tcb_refcnt == 0) { if (DoneTCB->tcb_pending == 0) { KeReleaseSpinLock(&DoneTCB->tcb_lock, PreLockIrql); return; } else { if (DoneTCB->tcb_pending & RST_PENDING) { DoneTCB->tcb_refcnt++; NotifyOfDisc(DoneTCB, TDI_CONNECTION_RESET, &PreLockIrql); KeAcquireSpinLock(&DoneTCB->tcb_lock, &PreLockIrql); DerefTCB(DoneTCB, PreLockIrql); return; } if (DoneTCB->tcb_pending & DEL_PENDING) CloseTCB(DoneTCB, PreLockIrql); else DbgBreakPoint(); // Fatal condition.
return; } }
KeReleaseSpinLock(&DoneTCB->tcb_lock, PreLockIrql); return; }
//* CalculateMSSForTCB - Update MSS, etc. after PMTU changes.
//
// Calculate our connection's MSS based on our PMTU, the sizes
// of various headers, and the remote side's advertised MSS.
// It's expected that this routine will be called whenever
// our cached copy of the PMTU has been updated to a new value.
//
void CalculateMSSForTCB( TCB *ThisTCB) // The TCB we're running our calculations on.
{ uint PMTU; IPSecProc *IPSecToDo; uint TrailerLength = 0; uint IPSecBytes = 0; uint Dummy;
ASSERT(ThisTCB->tcb_pmtu != 0); // Should be set before entering.
ASSERT(ThisTCB->tcb_rce != NULL);
//
// First check that the PMTU size is reasonable. IP won't
// let it get below minimum, but we have our own maximum since
// currently TCP can only handle an MSS that fits in 16 bits.
// TBD: If we add IPv6 Jumbogram support, we should also add LFN
// TBD: support to TCP and change this to handle a larger MSS.
//
PMTU = ThisTCB->tcb_pmtu; if (PMTU > 65535) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_RARE, "TCPSend: PMTU update value too large %u\n", PMTU)); PMTU = 65535; }
//
// Determine size of IPSec headers, if any.
//
IPSecToDo = OutboundSPLookup(&ThisTCB->tcb_saddr, &ThisTCB->tcb_daddr, IP_PROTOCOL_TCP, net_short(ThisTCB->tcb_sport), net_short(ThisTCB->tcb_dport), ThisTCB->tcb_rce->NTE->IF, &Dummy); if (IPSecToDo != NULL) { //
// Calculate the space needed for the IPSec headers.
//
IPSecBytes = IPSecBytesToInsert(IPSecToDo, &Dummy, &TrailerLength); FreeIPSecToDo(IPSecToDo, IPSecToDo->BundleSize); IPSecBytes += TrailerLength; } IF_TCPDBG(TCP_DEBUG_MSS) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG, "CalculateMSSForTCB: IPSecBytes is %u\n", IPSecBytes)); }
//
// Subtract out the header sizes to yield the TCP MSS.
// If there is an ESP trailer on this connection, round down
// the MSS to allow the trailer to end on a 4-byte boundary.
//
PMTU -= sizeof(IPv6Header) + sizeof(TCPHeader) + IPSecBytes; if (TrailerLength) PMTU -= (PMTU & 3);
//
// Don't let MSS exceed what our peer advertised, regardless of how
// large the Path MTU is.
//
IF_TCPDBG(TCP_DEBUG_MSS) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG, "CalculateMSSForTCB: Old MSS is %u ", ThisTCB->tcb_mss)); } ThisTCB->tcb_mss = (ushort)MIN(PMTU, ThisTCB->tcb_remmss); IF_TCPDBG(TCP_DEBUG_MSS) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG, "New MSS is %u\n", ThisTCB->tcb_mss)); }
ASSERT(ThisTCB->tcb_mss != 0);
//
// We don't want our Congestion Window to be smaller than one maximum
// segment, so we may need to increase it when our MSS grows.
//
if (ThisTCB->tcb_cwin < ThisTCB->tcb_mss) { ThisTCB->tcb_cwin = ThisTCB->tcb_mss;
//
// Make sure the slow start threshold is at
// least 2 segments.
//
if (ThisTCB->tcb_ssthresh < ((uint) ThisTCB->tcb_mss * 2)) { ThisTCB->tcb_ssthresh = ThisTCB->tcb_mss * 2; } } }
//** TdiOpenConnection - Open a connection.
//
// This is the TDI Open Connection entry point. We open a connection,
// and save the caller's connection context. A TCPConn structure is allocated
// here, but a TCB isn't allocated until the Connect or Listen is done.
//
TDI_STATUS // Returns: Status of attempt to open connection.
TdiOpenConnection( PTDI_REQUEST Request, // This TDI request.
PVOID Context) // Connection context to be save for connection.
{ TCPConn *NewConn; // The newly opened connection.
KIRQL OldIrql; // Irql prior to acquiring TCPConnBlock lock.
uint ConnID; // New ConnID.
TDI_STATUS Status; // Status of this request.
NewConn = ExAllocatePool(NonPagedPool, sizeof(TCPConn));
if (NewConn != NULL) { //
// We allocated a connection.
//
RtlZeroMemory(NewConn, sizeof(TCPConn)); #if DBG
NewConn->tc_sig = tc_signature; #endif
NewConn->tc_tcb = NULL; NewConn->tc_ao = NULL; NewConn->tc_context = Context; NewConn->tc_connid = INVALID_CONN_ID;
ConnID = GetConnID(NewConn, &OldIrql); if (ConnID != INVALID_CONN_ID) { //
// We successfully got a ConnID.
//
Request->Handle.ConnectionContext = (CONNECTION_CONTEXT)UIntToPtr(ConnID); NewConn->tc_refcnt = 0; NewConn->tc_flags = 0; NewConn->tc_tcbflags = NAGLING | (BSDUrgent ? BSD_URGENT : 0); if (DefaultRcvWin != 0) { NewConn->tc_window = DefaultRcvWin; NewConn->tc_flags |= CONN_WINSET; } else NewConn->tc_window = DEFAULT_RCV_WIN;
NewConn->tc_donertn = DummyDone; NewConn->tc_owningpid = HandleToUlong(PsGetCurrentProcessId()); Status = TDI_SUCCESS; KeReleaseSpinLock(&NewConn->tc_ConnBlock->cb_lock, OldIrql); } else { ExFreePool(NewConn); Status = TDI_NO_RESOURCES; }
return Status; }
//
// Couldn't get a connection.
//
return TDI_NO_RESOURCES; }
//* RemoveConnFromAO - Remove a connection from an AddrObj.
//
// A little utility routine to remove a connection from an AddrObj.
// We run down the connections on the AO, and when we find him we splice
// him out. We assume the caller holds the locks on the AddrObj and the
// TCPConnBlock lock.
//
void // Returns: Nothing.
RemoveConnFromAO( AddrObj *AO, // AddrObj to remove from.
TCPConn *Conn) // Conn to remove.
{ CHECK_STRUCT(AO, ao); CHECK_STRUCT(Conn, tc);
REMOVEQ(&Conn->tc_q); Conn->tc_ao = NULL; }
//* TdiCloseConnection - Close a connection.
//
// Called when the user is done with a connection, and wants to close it.
// We look the connection up in our table, and if we find it we'll remove
// the connection from the AddrObj it's associate with (if any). If there's
// a TCB associated with the connection we'll close it also.
//
// There are some interesting wrinkles related to closing while a TCB
// is still referencing the connection (i.e. tc_refcnt != 0) or while a
// disassociate address is in progress. See below for more details.
//
TDI_STATUS // Returns: Status of attempt to close.
TdiCloseConnection( PTDI_REQUEST Request) // Request identifying connection to be closed.
{ uint ConnID = PtrToUlong(Request->Handle.ConnectionContext); KIRQL Irql0; TCPConn *Conn; TDI_STATUS Status;
//
// We have the locks we need. Try to find a connection.
//
Conn = GetConnFromConnID(ConnID, &Irql0);
if (Conn != NULL) { KIRQL Irql1; TCB *ConnTCB;
//
// We found the connection. Free the ConnID and mark the connection
// as closing.
//
CHECK_STRUCT(Conn, tc);
FreeConnID(Conn);
Conn->tc_flags |= CONN_CLOSING;
//
// See if there's a TCB referencing this connection.
// If there is, we'll need to wait until he's done before closing him.
// We'll hurry the process along if we still have a pointer to him.
//
if (Conn->tc_refcnt != 0) { RequestCompleteRoutine Rtn; PVOID Context;
//
// A connection still references him. Save the current rtn stuff
// in case we are in the middle of disassociating him from an
// address, and store the caller's callback routine and our done
// routine.
//
Rtn = Conn->tc_rtn; Context = Conn->tc_rtncontext;
Conn->tc_rtn = Request->RequestNotifyObject; Conn->tc_rtncontext = Request->RequestContext; Conn->tc_donertn = CloseDone;
//
// See if we're in the middle of disassociating him.
//
if (Conn->tc_flags & CONN_DISACC) {
//
// We are disassociating him. We'll free the conn table lock
// now and fail the disassociate request. Note that when
// we free the lock the refcount could go to zero. This is
// OK, because we've already stored the neccessary info. in
// the connection so the caller will get called back if it
// does. From this point out we return PENDING, so a callback
// is OK. We've marked him as closing, so the disassoc done
// routine will bail out if we've interrupted him. If the ref.
// count does go to zero, Conn->tc_tcb would have to be NULL,
// so in that case we'll just fall out of this routine.
//
KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0); (*Rtn)(Context, (uint) TDI_REQ_ABORTED, 0); KeAcquireSpinLock(&Conn->tc_ConnBlock->cb_lock, &Irql0); }
ConnTCB = Conn->tc_tcb; if (ConnTCB != NULL) { CHECK_STRUCT(ConnTCB, tcb); //
// We have a TCB. Take the lock on him and get ready to
// close him.
//
KeAcquireSpinLock(&ConnTCB->tcb_lock, &Irql1); if (ConnTCB->tcb_state != TCB_CLOSED) { ConnTCB->tcb_flags |= NEED_RST; KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql1); if (!CLOSING(ConnTCB)) TryToCloseTCB(ConnTCB, TCB_CLOSE_ABORTED, Irql0); else KeReleaseSpinLock(&ConnTCB->tcb_lock, Irql0); return TDI_PENDING; } else { //
// He's already closing. This should be harmless, but
// check this case.
//
KeReleaseSpinLock(&ConnTCB->tcb_lock, Irql1); } } Status = TDI_PENDING;
} else { //
// We have a connection that we can close. Finish the close.
//
Conn->tc_rtn = DummyCmplt; CloseDone(Conn, Irql0); return TDI_SUCCESS; }
KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0);
} else Status = TDI_INVALID_CONNECTION;
//
// We're done with the connection. Go ahead and free him.
//
return Status; }
//* TdiAssociateAddress - Associate an address with a connection.
//
// Called to associate an address with a connection. We do a minimal
// amount of sanity checking, and then put the connection on the AddrObj's
// list.
//
TDI_STATUS // Returns: Status of attempt to associate.
TdiAssociateAddress( PTDI_REQUEST Request, // Structure for this request.
HANDLE AddrHandle) // Address handle to associate connection with.
{ KIRQL Irql0, Irql1; // One per lock nesting level.
AddrObj *AO; uint ConnID = PtrToUlong(Request->Handle.ConnectionContext); TCPConn *Conn; TDI_STATUS Status;
AO = (AddrObj *)AddrHandle; CHECK_STRUCT(AO, ao);
Conn = GetConnFromConnID(ConnID, &Irql0); KeAcquireSpinLock(&AO->ao_lock, &Irql1); if (!AO_VALID(AO)) { KeReleaseSpinLock(&AO->ao_lock, Irql1); if (Conn != NULL) { KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0); } return TDI_INVALID_PARAMETER; }
if (Conn != NULL) { CHECK_STRUCT(Conn, tc);
if (Conn->tc_ao != NULL) { //
// It's already associated. Error out.
//
KdBreakPoint(); Status = TDI_ALREADY_ASSOCIATED; } else { Conn->tc_ao = AO; ASSERT(Conn->tc_tcb == NULL); PUSHQ(&AO->ao_idleq, &Conn->tc_q); Status = TDI_SUCCESS; } KeReleaseSpinLock(&AO->ao_lock, Irql1); KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0); return Status; } else Status = TDI_INVALID_CONNECTION;
KeReleaseSpinLock(&AO->ao_lock, Irql1); return Status; }
//* TdiDisAssociateAddress - Disassociate a connection from an address.
//
// The TDI entry point to disassociate a connection from an address. The
// connection must actually be associated and not connected to anything.
//
TDI_STATUS // Returns: Status of request.
TdiDisAssociateAddress( PTDI_REQUEST Request) // Structure for this request.
{ uint ConnID = PtrToUlong(Request->Handle.ConnectionContext); KIRQL Irql0, Irql1, Irql2; // One per lock nesting level.
TCPConn *Conn; AddrObj *AO; TDI_STATUS Status;
KeAcquireSpinLock(&AddrObjTableLock, &Irql0); Conn = GetConnFromConnID(ConnID, &Irql1);
if (Conn != NULL) { //
// The connection actually exists!
//
CHECK_STRUCT(Conn, tc); AO = Conn->tc_ao; if (AO != NULL) { CHECK_STRUCT(AO, ao); //
// And it's associated.
//
KeAcquireSpinLock(&AO->ao_lock, &Irql2); //
// If there's no connection currently active, go ahead and remove
// him from the AddrObj. If a connection is active error the
// request out.
//
if (Conn->tc_tcb == NULL) { if (Conn->tc_refcnt == 0) { RemoveConnFromAO(AO, Conn); Status = TDI_SUCCESS; } else { //
// He shouldn't be closing, or we couldn't have found him.
//
ASSERT(!(Conn->tc_flags & CONN_CLOSING));
Conn->tc_rtn = Request->RequestNotifyObject; Conn->tc_rtncontext = Request->RequestContext; Conn->tc_donertn = DisassocDone; Conn->tc_flags |= CONN_DISACC; Status = TDI_PENDING; }
} else Status = TDI_CONNECTION_ACTIVE; KeReleaseSpinLock(&AO->ao_lock, Irql2); } else Status = TDI_NOT_ASSOCIATED; KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql1); } else Status = TDI_INVALID_CONNECTION;
KeReleaseSpinLock(&AddrObjTableLock, Irql0);
return Status; }
//* InitTCBFromConn - Initialize a TCB from information in a Connection.
//
// Called from Connect and Listen processing to initialize a new TCB from
// information in the connection. We assume the AddrObjTableLock and
// TCPConnBlock locks are held when we are called, or that the caller has some
// other way of making sure that the referenced AO doesn't go away in the
// middle of operation.
//
// Input: Conn - Connection to initialize from.
// NewTCB - TCB to be initialized.
// Addr - Remote addressing and option info for NewTCB.
// AOLocked - True if the called has the address object locked.
//
//
TDI_STATUS // Returns: TDI_STATUS of init attempt.
InitTCBFromConn( TCPConn *Conn, // Connection to initialize from.
TCB *NewTCB, // TCB to be initialized.
PTDI_CONNECTION_INFORMATION Addr, // Remove addr info, etc. for NewTCB.
uint AOLocked) // True if caller has addr object lock.
{ KIRQL OldIrql;
CHECK_STRUCT(Conn, tc);
//
// We have a connection. Make sure it's associated with an address and
// doesn't already have a TCB attached.
//
if (Conn->tc_flags & CONN_INVALID) return TDI_INVALID_CONNECTION;
if (Conn->tc_tcb == NULL) { AddrObj *ConnAO;
ConnAO = Conn->tc_ao; if (ConnAO != NULL) { CHECK_STRUCT(ConnAO, ao);
if (!AOLocked) { KeAcquireSpinLock(&ConnAO->ao_lock, &OldIrql); } if (!(NewTCB->tcb_flags & ACCEPT_PENDING)) { //
// These fields are already initialized
// when ACCEPT_PENDING is on.
//
NewTCB->tcb_saddr = ConnAO->ao_addr; NewTCB->tcb_sscope_id = ConnAO->ao_scope_id; NewTCB->tcb_sport = ConnAO->ao_port; NewTCB->tcb_defaultwin = Conn->tc_window; NewTCB->tcb_rcvwin = Conn->tc_window; }
NewTCB->tcb_rcvind = ConnAO->ao_rcv; NewTCB->tcb_ricontext = ConnAO->ao_rcvcontext; if (NewTCB->tcb_rcvind == NULL) NewTCB->tcb_rcvhndlr = PendData; else NewTCB->tcb_rcvhndlr = IndicateData;
NewTCB->tcb_conncontext = Conn->tc_context; NewTCB->tcb_flags |= Conn->tc_tcbflags;
if (Conn->tc_flags & CONN_WINSET) NewTCB->tcb_flags |= WINDOW_SET;
if (NewTCB->tcb_flags & KEEPALIVE) { NewTCB->tcb_alive = TCPTime; NewTCB->tcb_kacount = 0; }
NewTCB->tcb_hops = ConnAO->ao_ucast_hops;
if (!AOLocked) { KeReleaseSpinLock(&ConnAO->ao_lock, OldIrql); }
return TDI_SUCCESS; } else return TDI_NOT_ASSOCIATED; } else return TDI_CONNECTION_ACTIVE; }
//* TdiConnect - Establish a connection.
//
// The TDI connection establishment routine. Called when the client wants to
// establish a connection, we validate his incoming parameters and kick
// things off by sending a SYN.
//
// Note: The format of the timeout (TO) parameter is system specific -
// we use a macro to convert to ticks.
//
TDI_STATUS // Returns: Status of attempt to connect.
TdiConnect( PTDI_REQUEST Request, // This command request.
void *TO, // How long to wait for request.
PTDI_CONNECTION_INFORMATION RequestAddr, // Describes the destination.
PTDI_CONNECTION_INFORMATION ReturnAddr) // Where to return information.
{ TCPConnReq *ConnReq; // Connection request to use.
IPv6Addr DestAddr; ulong DestScopeId; ushort DestPort; TCPConn *Conn; TCB *NewTCB; uint ConnID = PtrToUlong(Request->Handle.ConnectionContext); KIRQL Irql0, Irql1, Irql2; // One per lock nesting level.
AddrObj *AO; TDI_STATUS Status; IP_STATUS IPStatus; TCP_TIME *Timeout; NetTableEntry *NTE; NetTableEntryOrInterface *NTEorIF;
//
// First, get and validate the remote address.
//
if (RequestAddr == NULL || RequestAddr->RemoteAddress == NULL || !GetAddress((PTRANSPORT_ADDRESS)RequestAddr->RemoteAddress, &DestAddr, &DestScopeId, &DestPort)) return TDI_BAD_ADDR;
//
// REVIEW: IPv4 performed other remote address sanity checks here.
// REVIEW: E.g., should we check that remote addr isn't multicast?
//
//
// REVIEW: I can't find an RFC which states 0 is not a valid port number.
//
if (DestPort == 0) return TDI_BAD_ADDR;
//
// Get a connection request. If we can't, bail out now.
//
ConnReq = GetConnReq(); if (ConnReq == NULL) return TDI_NO_RESOURCES;
//
// Get a TCB, assuming we'll need one.
//
NewTCB = AllocTCB(); if (NewTCB == NULL) { // Couldn't get a TCB.
FreeConnReq(ConnReq); return TDI_NO_RESOURCES; }
Timeout = (TCP_TIME *)TO;
if (Timeout != NULL && !INFINITE_CONN_TO(*Timeout)) { ulong Ticks = TCP_TIME_TO_TICKS(*Timeout);
if (Ticks > MAX_CONN_TO_TICKS) Ticks = MAX_CONN_TO_TICKS; else Ticks++; ConnReq->tcr_timeout = (ushort)Ticks; } else ConnReq->tcr_timeout = 0;
ConnReq->tcr_flags = 0; ConnReq->tcr_conninfo = ReturnAddr; ConnReq->tcr_addrinfo = NULL; ConnReq->tcr_req.tr_rtn = Request->RequestNotifyObject; ConnReq->tcr_req.tr_context = Request->RequestContext; NewTCB->tcb_daddr = DestAddr; NewTCB->tcb_dscope_id = DestScopeId; NewTCB->tcb_dport = DestPort;
//
// Now find the real connection.
//
KeAcquireSpinLock(&AddrObjTableLock, &Irql0); Conn = GetConnFromConnID(ConnID, &Irql1); if (Conn != NULL) { uint Inserted;
CHECK_STRUCT(Conn, tc);
//
// We found the connection. Check for an associated address object.
//
AO = Conn->tc_ao; if (AO != NULL) { KeAcquireSpinLock(&AO->ao_lock, &Irql2);
CHECK_STRUCT(AO, ao);
Status = InitTCBFromConn(Conn, NewTCB, RequestAddr, TRUE); if (Status == TDI_SUCCESS) { //
// We've initialized our TCB. Mark it that we initiated this
// connection (i.e. active open). Also, we're done with the
// AddrObjTable, so we can free it's lock.
//
NewTCB->tcb_flags |= ACTIVE_OPEN; KeReleaseSpinLock(&AddrObjTableLock, Irql2);
//
// Initialize our routing state validation counter.
// We need to do this before acquiring an NTE or an RCE
// (to avoid missing any changes which may occur while
// we're in the process of acquiring them).
//
NewTCB->tcb_routing = RouteCacheValidationCounter;
//
// Determine NTE to send on (if user cares).
//
if (IsUnspecified(&NewTCB->tcb_saddr)) { //
// Caller didn't specify a source address.
// Let the routing code pick one.
//
NTE = NULL; NTEorIF = NULL;
} else { //
// Our TCB has a specific source address. Determine
// which NTE corresponds to it and the scope id.
//
NTE = FindNetworkWithAddress(&NewTCB->tcb_saddr, NewTCB->tcb_sscope_id); if (NTE == NULL) { //
// Bad source address. We don't have a network with
// the requested address. Error out.
//
// REVIEW: Will the AddrObj code even let this happen?
//
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_USER_ERROR, "TdiConnect: Bad source address\n")); KeReleaseSpinLock(&AO->ao_lock, Irql1); KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0); Status = TDI_BAD_ADDR; goto error; }
NTEorIF = CastFromNTE(NTE); }
//
// Get the route.
//
ASSERT(NewTCB->tcb_rce == NULL); IPStatus = RouteToDestination(&DestAddr, DestScopeId, NTEorIF, RTD_FLAG_NORMAL, &NewTCB->tcb_rce); if (IPStatus != IP_SUCCESS) { //
// Failed to get a route to the destination. Error out.
//
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INTERNAL_ERROR, "TdiConnect: Failed to get route to dest.\n")); KeReleaseSpinLock(&AO->ao_lock, Irql1); KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0); if ((IPStatus == IP_PARAMETER_PROBLEM) || (IPStatus == IP_BAD_ROUTE)) Status = TDI_BAD_ADDR; else if (IPStatus == IP_NO_RESOURCES) Status = TDI_NO_RESOURCES; else Status = TDI_DEST_UNREACHABLE; goto error; }
ASSERT(NewTCB->tcb_rce != NULL); if (IsDisconnectedAndNotLoopbackRCE(NewTCB->tcb_rce)) { //
// Fail new connection requests for TCBs with a
// disconnected outgoing interface, except when a
// loopback route is used.
//
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INTERNAL_ERROR, "TdiConnect: Interface disconnected.\n")); KeReleaseSpinLock(&AO->ao_lock, Irql1); KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0);
//
// Drop the reference on the route we obtained.
//
ReleaseRCE(NewTCB->tcb_rce);
Status = TDI_DEST_NET_UNREACH; goto error; }
//
// OK, we got a route. Enter the TCB into the connection
// and send a SYN.
//
KeAcquireSpinLock(&NewTCB->tcb_lock, &Irql2); Conn->tc_tcb = NewTCB; Conn->tc_refcnt++; NewTCB->tcb_conn = Conn; NewTCB->tcb_connid = Conn->tc_connid; REMOVEQ(&Conn->tc_q); ENQUEUE(&AO->ao_activeq, &Conn->tc_q); KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql2); KeReleaseSpinLock(&AO->ao_lock, Irql1);
//
// Initialize path-specific TCB settings, based on the RCE:
//
// If packets on the path will be looped back in software,
// don't use the Nagle algorithm for this TCB.
//
if (IsLoopbackRCE(NewTCB->tcb_rce)) { NewTCB->tcb_flags &= ~NAGLING; }
//
// Keep a reference for the NTE we're using.
// This prevents the NTE from going away should we release
// our RCE, and also makes for easy comparisons.
//
if (NTE == NULL) { //
// We let the routing code pick the source NTE above.
// Remember this NTE and address for later use.
//
NewTCB->tcb_nte = NewTCB->tcb_rce->NTE; AddRefNTE(NewTCB->tcb_nte); NewTCB->tcb_saddr = NewTCB->tcb_nte->Address; NewTCB->tcb_sscope_id = DetermineScopeId(&NewTCB->tcb_saddr, NewTCB->tcb_nte->IF); } else { //
// Remember the NTE we found above.
// We already hold a reference on it.
//
NewTCB->tcb_nte = NTE; }
//
// Similarly, the routing code may have picked
// the destination scope id if it was left unspecified.
// REVIEW - getpeername will not return the new DestScopeId.
//
DestScopeId = DetermineScopeId(&NewTCB->tcb_daddr, NewTCB->tcb_rce->NTE->IF); ASSERT((NewTCB->tcb_dscope_id == DestScopeId) || (NewTCB->tcb_dscope_id == 0)); NewTCB->tcb_dscope_id = DestScopeId;
//
// Initialize our Maximum Segment Size (MSS).
// Cache our current Path Maximum Transmission Unit (PMTU)
// so that we'll know if it changes.
//
NewTCB->tcb_pmtu = GetEffectivePathMTUFromRCE(NewTCB->tcb_rce); IF_TCPDBG(TCP_DEBUG_MSS) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG, "TCP TdiConnect: PMTU from RCE is %d\n", NewTCB->tcb_pmtu)); } NewTCB->tcb_remmss = MAXUSHORT; NewTCB->tcb_security = SecurityStateValidationCounter; CalculateMSSForTCB(NewTCB);
// Now initialize our send state.
InitSendState(NewTCB); NewTCB->tcb_refcnt = 1; NewTCB->tcb_state = TCB_SYN_SENT; TStats.ts_activeopens++;
// Need to put the ConnReq on the TCB now, in case the timer
// fires after we've inserted.
NewTCB->tcb_connreq = ConnReq; KeReleaseSpinLock(&NewTCB->tcb_lock, Irql0);
Inserted = InsertTCB(NewTCB); KeAcquireSpinLock(&NewTCB->tcb_lock, &Irql0);
if (!Inserted) { // Insert failed. We must already have a connection. Pull
// the connreq from the TCB first, so we can return the
// correct error code for it.
NewTCB->tcb_connreq = NULL; TryToCloseTCB(NewTCB, TCB_CLOSE_ABORTED, Irql0); KeAcquireSpinLock(&NewTCB->tcb_lock, &Irql0); DerefTCB(NewTCB, Irql0); FreeConnReq(ConnReq); return TDI_ADDR_IN_USE; }
// If it's closing somehow, stop now. It can't have gone to
// closed, as we hold a reference on it. It could have gone
// to some other state (for example SYN-RCVD) so we need to
// check that now too.
if (!CLOSING(NewTCB) && NewTCB->tcb_state == TCB_SYN_SENT) { SendSYN(NewTCB, Irql0); KeAcquireSpinLock(&NewTCB->tcb_lock, &Irql0); } DerefTCB(NewTCB, Irql0);
return TDI_PENDING; } else KeReleaseSpinLock(&AO->ao_lock, Irql2); } else Status = TDI_NOT_ASSOCIATED; KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql1); } else Status = TDI_INVALID_CONNECTION;
KeReleaseSpinLock(&AddrObjTableLock, Irql0); error: if (NTE != NULL) ReleaseNTE(NTE); FreeTCB(NewTCB); FreeConnReq(ConnReq); return Status; }
//* TdiListen - Listen for a connection.
//
// The TDI listen handling routine. Called when the client wants to
// post a listen, we validate his incoming parameters, allocate a TCB
// and return.
//
TDI_STATUS // Returns: Status of attempt to connect.
TdiListen( PTDI_REQUEST Request, // Structure for this request.
ushort Flags, // Listen flags for listen.
PTDI_CONNECTION_INFORMATION AcceptableAddr, // Acceptable remote addrs.
PTDI_CONNECTION_INFORMATION ConnectedAddr) // Where to return conn addr.
{ TCPConnReq *ConnReq; // Connection request to use.
IPv6Addr RemoteAddr; // Remote address to take conn. from.
ulong RemoteScopeId; // Scope identifier for remote addr (0 is none).
ushort RemotePort; // Acceptable remote port.
TCPConn *Conn; // Pointer to the Connection being listened upon.
TCB *NewTCB; // Pointer to the new TCB we'll use.
uint ConnID = PtrToUlong(Request->Handle.ConnectionContext); KIRQL OldIrql; // Save IRQL value prior to taking lock.
TDI_STATUS Status;
//
// If we've been given remote addressing criteria, check it out.
//
if (AcceptableAddr != NULL && AcceptableAddr->RemoteAddress != NULL) { if (!GetAddress((PTRANSPORT_ADDRESS)AcceptableAddr->RemoteAddress, &RemoteAddr, &RemoteScopeId, &RemotePort)) return TDI_BAD_ADDR;
//
// REVIEW: IPv4 version did some other address sanity checks here.
// REVIEW: E.g., should we check that remote addr isn't multicast?
//
} else { RemoteAddr = UnspecifiedAddr; RemoteScopeId = 0; RemotePort = 0; }
//
// The remote address is valid. Get a ConnReq, and maybe a TCB.
//
ConnReq = GetConnReq(); if (ConnReq == NULL) return TDI_NO_RESOURCES; // Couldn't get one.
//
// Now try to get a TCB.
//
NewTCB = AllocTCB(); if (NewTCB == NULL) { //
// Couldn't get a TCB. Return an error.
//
FreeConnReq(ConnReq); return TDI_NO_RESOURCES; }
//
// We have the resources we need. Initialize them, and then check the
// state of the connection.
//
ConnReq->tcr_flags = Flags; ConnReq->tcr_conninfo = ConnectedAddr; ConnReq->tcr_addrinfo = NULL; ConnReq->tcr_req.tr_rtn = Request->RequestNotifyObject; ConnReq->tcr_req.tr_context = Request->RequestContext; NewTCB->tcb_connreq = ConnReq; NewTCB->tcb_daddr = RemoteAddr; NewTCB->tcb_dscope_id = RemoteScopeId; NewTCB->tcb_dport = RemotePort; NewTCB->tcb_state = TCB_LISTEN;
//
// Now find the real connection. If we find it, we'll make sure it's
// associated.
//
Conn = GetConnFromConnID(ConnID, &OldIrql); if (Conn != NULL) { AddrObj *ConnAO;
CHECK_STRUCT(Conn, tc); //
// We have a connection. Make sure it's associated with an address and
// doesn't already have a TCB attached.
//
ConnAO = Conn->tc_ao;
if (ConnAO != NULL) { CHECK_STRUCT(ConnAO, ao); KeAcquireSpinLockAtDpcLevel(&ConnAO->ao_lock);
if (AO_VALID(ConnAO)) { Status = InitTCBFromConn(Conn, NewTCB, AcceptableAddr, TRUE); } else { Status = TDI_ADDR_INVALID; }
if (Status == TDI_SUCCESS) { //
// The initialization worked. Assign the new TCB to the
// connection, and return.
//
REMOVEQ(&Conn->tc_q); PUSHQ(&ConnAO->ao_listenq, &Conn->tc_q);
Conn->tc_tcb = NewTCB; NewTCB->tcb_conn = Conn; NewTCB->tcb_connid = Conn->tc_connid; Conn->tc_refcnt++;
ConnAO->ao_listencnt++; KeReleaseSpinLockFromDpcLevel(&ConnAO->ao_lock);
Status = TDI_PENDING; } else { FreeTCB(NewTCB); KeReleaseSpinLockFromDpcLevel(&ConnAO->ao_lock); } } else { FreeTCB(NewTCB); Status = TDI_NOT_ASSOCIATED; } KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, OldIrql); } else { FreeTCB(NewTCB); Status = TDI_INVALID_CONNECTION; }
//
// We're all done.
//
if (Status != TDI_PENDING) { FreeConnReq(ConnReq); } return Status; }
//* InitRCE - Initialize an RCE.
//
// A utility routine to open an RCE and determine the maximum segment size
// for a connection. This function is called with the TCB lock held
// when transitioning out of the SYN_SENT or LISTEN states.
//
void // Returns: Nothing.
InitRCE( TCB *NewTCB) // TCB for which an RCE is to be opened.
{ IP_STATUS Status;
//
// We are called when receiving an incoming connection attempt,
// so tcb_saddr will always be initialized.
//
ASSERT(! IsUnspecified(&NewTCB->tcb_saddr));
//
// If we don't already have an NTE for this connection, get one now.
//
if (NewTCB->tcb_nte == NULL) { //
// Initialize our routing state validation counter.
// We need to do this before acquiring an NTE or an RCE
// (to avoid missing any changes which may occur while
// we're in the process of acquiring them).
//
NewTCB->tcb_routing = RouteCacheValidationCounter;
NewTCB->tcb_nte = FindNetworkWithAddress(&NewTCB->tcb_saddr, NewTCB->tcb_sscope_id); if (NewTCB->tcb_nte == NULL) { //
// Failed to get an NTE corresponding to this source address.
//
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INTERNAL_ERROR, "TCP InitRCE: Can't find the NTE for address?!?\n")); goto ErrorReturn; } }
//
// Get the route.
//
ASSERT(NewTCB->tcb_rce == NULL); Status = RouteToDestination(&NewTCB->tcb_daddr, NewTCB->tcb_dscope_id, CastFromNTE(NewTCB->tcb_nte), RTD_FLAG_NORMAL, &NewTCB->tcb_rce); if (Status != IP_SUCCESS) { //
// Failed to get a route to the destination.
//
KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INTERNAL_ERROR, "TCP InitRCE: Can't get a route?!?\n")); ErrorReturn: //
// Until we have a real route, use conservative values.
//
NewTCB->tcb_pmtu = IPv6_MINIMUM_MTU; NewTCB->tcb_mss = (ushort)MIN(DEFAULT_MSS, NewTCB->tcb_remmss); return; }
//
// Initialize path-specific TCB settings, based on the RCE:
//
// If packets on the path will be looped back in software,
// don't use the Nagle algorithm for this TCB.
//
if (IsLoopbackRCE(NewTCB->tcb_rce)) { NewTCB->tcb_flags &= ~NAGLING; }
//
// Initialize the maximum segement size (MSS) for this connection.
// Cache our current Path Maximum Transmission Unit (PMTU)
// so that we'll know if it changes.
//
NewTCB->tcb_pmtu = GetEffectivePathMTUFromRCE(NewTCB->tcb_rce); IF_TCPDBG(TCP_DEBUG_MSS) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG, "TCP InitRCE: PMTU from RCE is %d\n", NewTCB->tcb_pmtu)); } NewTCB->tcb_security = SecurityStateValidationCounter; CalculateMSSForTCB(NewTCB); }
//* AcceptConn - Accept a connection on a TCB.
//
// Called to accept a connection on a TCB, either from an incoming
// receive segment or via a user's accept. We initialize the RCE
// and the send state, and send out a SYN. We assume the TCB is locked
// and referenced when we get it.
//
void // Returns: Nothing.
AcceptConn( TCB *AcceptTCB, // TCB to accept on.
KIRQL PreLockIrql) // IRQL prior to acquiring TCB lock.
{ CHECK_STRUCT(AcceptTCB, tcb); ASSERT(AcceptTCB->tcb_refcnt != 0);
InitRCE(AcceptTCB); InitSendState(AcceptTCB);
AdjustRcvWin(AcceptTCB); SendSYN(AcceptTCB, PreLockIrql);
KeAcquireSpinLock(&AcceptTCB->tcb_lock, &PreLockIrql); DerefTCB(AcceptTCB, PreLockIrql); }
//* TdiAccept - Accept a connection.
//
// The TDI accept routine. Called when the client wants to
// accept a connection for which a listen had previously completed. We
// examine the state of the connection - it has to be in SYN-RCVD, with
// a TCB, with no pending connreq, etc.
//
TDI_STATUS // Returns: Status of attempt to connect.
TdiAccept( PTDI_REQUEST Request, // Structure for this request.
PTDI_CONNECTION_INFORMATION AcceptInfo, // Info for this accept.
PTDI_CONNECTION_INFORMATION ConnectedInfo) // Where to return conn addr.
{ TCPConnReq *ConnReq; // ConnReq we'll use for this connection.
uint ConnID = PtrToUlong(Request->Handle.ConnectionContext); TCPConn *Conn; // Connection being accepted upon.
TCB *AcceptTCB; // TCB for Conn.
KIRQL Irql0, Irql1; // One per lock nesting level.
TDI_STATUS Status;
//
// First, get the ConnReq we'll need.
//
ConnReq = GetConnReq(); if (ConnReq == NULL) return TDI_NO_RESOURCES;
ConnReq->tcr_conninfo = ConnectedInfo; ConnReq->tcr_addrinfo = NULL; ConnReq->tcr_req.tr_rtn = Request->RequestNotifyObject; ConnReq->tcr_req.tr_context = Request->RequestContext;
//
// Now look up the connection.
//
Conn = GetConnFromConnID(ConnID, &Irql0); if (Conn != NULL) { CHECK_STRUCT(Conn, tc);
//
// We have the connection. Make sure is has a TCB, and that the
// TCB is in the SYN-RCVD state, etc.
//
AcceptTCB = Conn->tc_tcb;
if (AcceptTCB != NULL) { CHECK_STRUCT(AcceptTCB, tcb);
KeAcquireSpinLock(&AcceptTCB->tcb_lock, &Irql1); KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql1);
if (!CLOSING(AcceptTCB) && AcceptTCB->tcb_state == TCB_SYN_RCVD) { //
// State is valid. Make sure this TCB had a delayed accept on
// it, and that there is currently no connect request pending.
//
if (!(AcceptTCB->tcb_flags & CONN_ACCEPTED) && AcceptTCB->tcb_connreq == NULL) {
AcceptTCB->tcb_connreq = ConnReq; AcceptTCB->tcb_flags |= CONN_ACCEPTED; AcceptTCB->tcb_refcnt++; //
// Everything's set. Accept the connection now.
//
AcceptConn(AcceptTCB, Irql0); return TDI_PENDING; } }
KeReleaseSpinLock(&AcceptTCB->tcb_lock, Irql0); Status = TDI_INVALID_CONNECTION; goto error; } KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0); } Status = TDI_INVALID_CONNECTION;
error: FreeConnReq(ConnReq); return Status; }
//* TdiDisConnect - Disconnect a connection.
//
// The TDI disconnection routine. Called when the client wants to disconnect
// a connection. There are two types of disconnection we support, graceful
// and abortive. A graceful close will cause us to send a FIN and not complete
// the request until we get the ACK back. An abortive close causes us to send
// a RST. In that case we'll just get things going and return immediately.
//
// Note: The format of the Timeout (TO) is system specific - we use
// a macro to convert to ticks.
//
TDI_STATUS // Returns: Status of attempt to disconnect.
TdiDisconnect( PTDI_REQUEST Request, // Structure for this request.
void *TO, // How long to wait.
ushort Flags, // Type of disconnect.
PTDI_CONNECTION_INFORMATION DiscConnInfo, // Ignored.
PTDI_CONNECTION_INFORMATION ReturnInfo, // Ignored.
TCPAbortReq *AbortReq) // Space for pending abort.
{ TCPConnReq *ConnReq; // Connection request to use.
TCPConn *Conn; TCB *DiscTCB; KIRQL Irql0, Irql1; // One per lock nesting level.
TDI_STATUS Status; TCP_TIME *Timeout;
UNREFERENCED_PARAMETER(DiscConnInfo); UNREFERENCED_PARAMETER(ReturnInfo);
Conn = GetConnFromConnID(PtrToUlong(Request->Handle.ConnectionContext), &Irql0);
if (Conn != NULL) { CHECK_STRUCT(Conn, tc);
DiscTCB = Conn->tc_tcb; if (DiscTCB != NULL) { CHECK_STRUCT(DiscTCB, tcb); KeAcquireSpinLock(&DiscTCB->tcb_lock, &Irql1);
//
// We have the TCB. See what kind of disconnect this is.
//
if (Flags & TDI_DISCONNECT_ABORT) { //
// This is an abortive disconnect. If we're not already
// closed or closing, blow the connection away.
//
if (DiscTCB->tcb_state != TCB_CLOSED) { KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql1);
if (AbortReq != NULL) { if (DiscTCB->tcb_abortreq == NULL) { AbortReq->tar_rtn = Request->RequestNotifyObject; AbortReq->tar_context = Request->RequestContext; DiscTCB->tcb_abortreq = AbortReq; Status = TDI_PENDING; } else { Status = TDI_SUCCESS; } } else { Status = TDI_SUCCESS; }
if (!CLOSING(DiscTCB)) { DiscTCB->tcb_flags |= NEED_RST; TryToCloseTCB(DiscTCB, TCB_CLOSE_ABORTED, Irql0); } else KeReleaseSpinLock(&DiscTCB->tcb_lock, Irql0);
return Status; } else { //
// The TCB isn't connected.
//
KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql1); KeReleaseSpinLock(&DiscTCB->tcb_lock, Irql0); return TDI_INVALID_STATE; } } else { //
// This is not an abortive close. For graceful close we'll
// need a ConnReq.
//
KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql1);
//
// Make sure we aren't in the middle of an abortive close.
//
if (CLOSING(DiscTCB)) { KeReleaseSpinLock(&DiscTCB->tcb_lock, Irql0); return TDI_INVALID_CONNECTION; }
ConnReq = GetConnReq(); if (ConnReq != NULL) { //
// Got the ConnReq. See if this is a DISCONNECT_WAIT
// primitive or not.
//
ConnReq->tcr_flags = 0; ConnReq->tcr_conninfo = NULL; ConnReq->tcr_addrinfo = NULL; ConnReq->tcr_req.tr_rtn = Request->RequestNotifyObject; ConnReq->tcr_req.tr_context = Request->RequestContext;
if (!(Flags & TDI_DISCONNECT_WAIT)) { Timeout = (TCP_TIME *)TO;
if (Timeout != NULL && !INFINITE_CONN_TO(*Timeout)) { ulong Ticks = TCP_TIME_TO_TICKS(*Timeout); if (Ticks > MAX_CONN_TO_TICKS) Ticks = MAX_CONN_TO_TICKS; else Ticks++; ConnReq->tcr_timeout = (ushort)Ticks; } else ConnReq->tcr_timeout = 0;
//
// OK, we're just about set. We need to update
// the TCB state, and send the FIN.
//
if (DiscTCB->tcb_state == TCB_ESTAB) { DiscTCB->tcb_state = TCB_FIN_WAIT1; //
// Since we left established, we're off the fast
// receive path.
//
DiscTCB->tcb_slowcount++; DiscTCB->tcb_fastchk |= TCP_FLAG_SLOW; } else if (DiscTCB->tcb_state == TCB_CLOSE_WAIT) DiscTCB->tcb_state = TCB_LAST_ACK; else { KeReleaseSpinLock(&DiscTCB->tcb_lock, Irql0); FreeConnReq(ConnReq); return TDI_INVALID_STATE; }
// Update SNMP info.
InterlockedDecrement((PLONG)&TStats.ts_currestab); ASSERT(*(int *)&TStats.ts_currestab >= 0);
ASSERT(DiscTCB->tcb_connreq == NULL); DiscTCB->tcb_connreq = ConnReq; DiscTCB->tcb_flags |= FIN_NEEDED; DiscTCB->tcb_refcnt++; TCPSend(DiscTCB, Irql0);
return TDI_PENDING; } else { //
// This is a DISC_WAIT request.
//
ConnReq->tcr_timeout = 0; if (DiscTCB->tcb_discwait == NULL) { DiscTCB->tcb_discwait = ConnReq; Status = TDI_PENDING; } else { FreeConnReq(ConnReq); Status = TDI_INVALID_STATE; }
KeReleaseSpinLock(&DiscTCB->tcb_lock, Irql0); return Status; } } else { //
// Couldn't get a ConnReq.
//
KeReleaseSpinLock(&DiscTCB->tcb_lock, Irql0); return TDI_NO_RESOURCES; } } } else KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0); }
//
// No Conn, or no TCB on conn. Return an error.
//
return TDI_INVALID_CONNECTION; }
//* OKToNotify - See if it's OK to notify about a DISC.
//
// A little utility function, called to see it it's OK to notify the client
// of an incoming FIN.
//
uint // Returns: TRUE if it's OK, False otherwise.
OKToNotify( TCB *NotifyTCB) // TCB to check.
{ CHECK_STRUCT(NotifyTCB, tcb); if (NotifyTCB->tcb_pendingcnt == 0 && NotifyTCB->tcb_urgcnt == 0 && NotifyTCB->tcb_rcvhead == NULL && NotifyTCB->tcb_exprcv == NULL) return TRUE; else return FALSE; }
//* NotifyOfDisc - Notify a client that a TCB is being disconnected.
//
// Called when we're disconnecting a TCB because we've received a FIN or
// RST from the remote peer, or because we're aborting for some reason.
// We'll complete a DISCONNECT_WAIT request if we have one, or try and
// issue an indication otherwise. This is only done if we're in a
// synchronized state and not in TIMED-WAIT.
//
// May be called with TCB lock held. Or not.
//
void // Returns: Nothing.
NotifyOfDisc( TCB *DiscTCB, // TCB we're notifying.
TDI_STATUS Status, // Status code for notification.
PKIRQL IrqlPtr) // Indicates TCB is locked with given IRQL.
{ KIRQL Irql0, Irql1; TCPConnReq *DiscReq; TCPConn *Conn; AddrObj *DiscAO; PVOID ConnContext;
CHECK_STRUCT(DiscTCB, tcb); ASSERT(DiscTCB->tcb_refcnt != 0);
//
// See if we already hold the TCB lock, grab it if not.
//
if (IrqlPtr != NULL) { Irql0 = *IrqlPtr; } else { KeAcquireSpinLock(&DiscTCB->tcb_lock, &Irql0); }
if (SYNC_STATE(DiscTCB->tcb_state) && !(DiscTCB->tcb_flags & DISC_NOTIFIED)) {
//
// We can't notify him if there's still data to be taken.
//
if (Status == TDI_GRACEFUL_DISC) { if (!OKToNotify(DiscTCB)) { DiscTCB->tcb_flags |= DISC_PENDING; KeReleaseSpinLock(&DiscTCB->tcb_lock, Irql0); return; } if (DiscTCB->tcb_pending & RST_PENDING) { KeReleaseSpinLock(&DiscTCB->tcb_lock, Irql0); return; } } else { if (DiscTCB->tcb_flags & (IN_RCV_IND | IN_DELIV_URG)) { DiscTCB->tcb_pending |= RST_PENDING; KeReleaseSpinLock(&DiscTCB->tcb_lock, Irql0); return; } DiscTCB->tcb_pending &= ~RST_PENDING; }
DiscTCB->tcb_flags |= DISC_NOTIFIED; DiscTCB->tcb_flags &= ~DISC_PENDING;
//
// We're in a state where a disconnect is meaningful, and we haven't
// already notified the client.
// See if we have a DISC-WAIT request pending.
//
if ((DiscReq = DiscTCB->tcb_discwait) != NULL) { //
// We have a disconnect wait request. Complete it and we're done.
//
DiscTCB->tcb_discwait = NULL; KeReleaseSpinLock(&DiscTCB->tcb_lock, Irql0); (*DiscReq->tcr_req.tr_rtn)(DiscReq->tcr_req.tr_context, Status, 0); FreeConnReq(DiscReq); return; }
//
// No DISC-WAIT. Find the AddrObj for the connection, and see if
// there is a disconnect handler registered.
//
ConnContext = DiscTCB->tcb_conncontext; KeReleaseSpinLock(&DiscTCB->tcb_lock, Irql0);
KeAcquireSpinLock(&AddrObjTableLock, &Irql0); if ((Conn = DiscTCB->tcb_conn) != NULL) { CHECK_STRUCT(Conn, tc); KeAcquireSpinLock(&Conn->tc_ConnBlock->cb_lock, &Irql1);
DiscAO = Conn->tc_ao; if (DiscAO != NULL) { KIRQL Irql2; PDisconnectEvent DiscEvent; PVOID DiscContext;
CHECK_STRUCT(DiscAO, ao); KeAcquireSpinLock(&DiscAO->ao_lock, &Irql2); KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql2); KeReleaseSpinLock(&AddrObjTableLock, Irql1);
DiscEvent = DiscAO->ao_disconnect; DiscContext = DiscAO->ao_disconncontext;
if (DiscEvent != NULL) {
REF_AO(DiscAO); KeReleaseSpinLock(&DiscAO->ao_lock, Irql0);
IF_TCPDBG(TCP_DEBUG_CLOSE) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG, "TCP: indicating %s disconnect\n", (Status == TDI_GRACEFUL_DISC) ? "graceful" : "abortive")); }
(*DiscEvent)(DiscContext, ConnContext, 0, NULL, 0, NULL, (Status == TDI_GRACEFUL_DISC) ? TDI_DISCONNECT_RELEASE : TDI_DISCONNECT_ABORT);
DELAY_DEREF_AO(DiscAO); return; } else { KeReleaseSpinLock(&DiscAO->ao_lock, Irql0); return; } } KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql1); }
KeReleaseSpinLock(&AddrObjTableLock, Irql0); return;
} KeReleaseSpinLock(&DiscTCB->tcb_lock, Irql0); }
//* GracefulClose - Complete the transition to a gracefully closed state.
//
// Called when we need to complete the transition to a gracefully closed
// state, either TIME_WAIT or CLOSED. This completion involves removing
// the TCB from it's associated connection (if it has one), notifying the
// upper layer client either via completing a request or calling a disc.
// notification handler, and actually doing the transition.
//
// The tricky part here is if we need to notify him (instead of completing
// a graceful disconnect request). We can't notify him if there is pending
// data on the connection, so in that case we have to pend the disconnect
// notification until we deliver the data.
//
void // Returns: Nothing.
GracefulClose( TCB *CloseTCB, // TCB to transition.
uint ToTimeWait, // TRUE if we're going to TIME_WAIT, FALSE if
// we're going to close the TCB.
uint Notify, // TRUE if via notification, FALSE if via completing
// a disconnect request.
KIRQL PreLockIrql) // IRQL prior to acquiring TCB lock.
{
CHECK_STRUCT(CloseTCB, tcb); ASSERT(CloseTCB->tcb_refcnt != 0);
//
// First, see if we need to notify the client of a FIN.
//
if (Notify) { //
// We do need to notify him. See if it's OK to do so.
//
if (OKToNotify(CloseTCB)) { //
// We can notify him. Change his state, pull him from the conn.,
// and notify him.
//
if (ToTimeWait) { //
// Save the time we went into time wait, in case we need to
// scavenge.
//
CloseTCB->tcb_alive = SystemUpTime(); CloseTCB->tcb_state = TCB_TIME_WAIT; KeReleaseSpinLock(&CloseTCB->tcb_lock, PreLockIrql); } else { //
// He's going to close. Mark him as closing with TryToCloseTCB
// (he won't actually close since we have a ref. on him). We
// do this so that anyone touching him after we free the
// lock will fail.
//
TryToCloseTCB(CloseTCB, TDI_SUCCESS, PreLockIrql); }
RemoveTCBFromConn(CloseTCB); NotifyOfDisc(CloseTCB, TDI_GRACEFUL_DISC, NULL);
} else { //
// Can't notify him now. Set the appropriate flags, and return.
//
CloseTCB->tcb_flags |= (GC_PENDING | (ToTimeWait ? TW_PENDING : 0)); DerefTCB(CloseTCB, PreLockIrql); return; } } else { //
// We're not notifying this guy, we just need to complete a conn. req.
// We need to check and see if he's been notified, and if not
// we'll complete the request and notify him later.
//
if (CloseTCB->tcb_flags & DISC_NOTIFIED) { //
// He's been notified.
//
if (ToTimeWait) { //
// Save the time we went into time wait, in case we need to
// scavenge.
//
CloseTCB->tcb_alive = SystemUpTime(); CloseTCB->tcb_state = TCB_TIME_WAIT; KeReleaseSpinLock(&CloseTCB->tcb_lock, PreLockIrql); } else { //
// Mark him as closed. See comments above.
//
TryToCloseTCB(CloseTCB, TDI_SUCCESS, PreLockIrql); }
RemoveTCBFromConn(CloseTCB);
KeAcquireSpinLock(&CloseTCB->tcb_lock, &PreLockIrql); CompleteConnReq(CloseTCB, TDI_SUCCESS); KeReleaseSpinLock(&CloseTCB->tcb_lock, PreLockIrql); } else { //
// He hasn't been notified. He should be pending already.
//
ASSERT(CloseTCB->tcb_flags & DISC_PENDING); CloseTCB->tcb_flags |= (GC_PENDING | (ToTimeWait ? TW_PENDING : 0));
CompleteConnReq(CloseTCB, TDI_SUCCESS);
DerefTCB(CloseTCB, PreLockIrql); return; } }
//
// If we're going to TIME_WAIT, start the TIME_WAIT timer now.
// Otherwise close the TCB.
//
KeAcquireSpinLock(&CloseTCB->tcb_lock, &PreLockIrql); if (!CLOSING(CloseTCB) && ToTimeWait) { START_TCB_TIMER(CloseTCB->tcb_rexmittimer, MAX_REXMIT_TO); KeReleaseSpinLock(&CloseTCB->tcb_lock, PreLockIrql); RemoveConnFromTCB(CloseTCB); KeAcquireSpinLock(&CloseTCB->tcb_lock, &PreLockIrql); }
DerefTCB(CloseTCB, PreLockIrql); }
#if 0 // REVIEW: Unused function?
//* ConnCheckPassed - Check to see if we have exceeded the connect limit.
//
// Called when a SYN is received to determine whether we will accept
// the incoming connection. If there is an empty slot or if the IP address
// is already in the table, we accept it.
//
int // Returns: TRUE is connect is accepted, FALSE if rejected.
ConnCheckPassed( IPv6Addr *Src, // Source address of incoming connection.
ulong Prt) // Destination port of incoming connection.
{ UNREFERENCED_PARAMETER(Src); UNREFERENCED_PARAMETER(Prt);
return TRUE; } #endif
void InitAddrChecks() { return; }
//* EnumerateConnectionList - Enumerate Connection List database.
//
// This routine enumerates the contents of the connection limit database.
//
// Note: The comments found with this routine upon IPv6 port imply that
// there may have been code here once that actually did something.
// What's here now is a no-op.
//
void // Returns: Nothing.
EnumerateConnectionList( uchar *Buffer, // Buffer to fill with connection list entries.
ulong BufferSize, // Size of Buffer in bytes.
ulong *EntriesReturned, // Where to put the number of entries returned.
ulong *EntriesAvailable) // Where to return number of avail conn. entries.
{
UNREFERENCED_PARAMETER(Buffer); UNREFERENCED_PARAMETER(BufferSize);
*EntriesAvailable = 0; *EntriesReturned = 0;
return; }
#pragma BEGIN_INIT
//* InitTCPConn - Initialize TCP connection management code.
//
// Called during init time to initialize our TCP connection management.
//
int // Returns: TRUE.
InitTCPConn( void) // Input: Nothing.
{ ExInitializeSListHead(&ConnReqFree); KeInitializeSpinLock(&ConnReqFreeLock); KeInitializeSpinLock(&ConnTableLock); MaxAllocatedConnBlocks = 0; ConnTable = ExAllocatePool(NonPagedPool, MaxConnBlocks * sizeof(TCPConnBlock *)); if (ConnTable == NULL) { return FALSE; }
return TRUE; }
#pragma END_INIT
//* UnloadTCPConn
//
// Cleanup and prepare for stack unload.
//
void UnloadTCPConn(void) { PSLIST_ENTRY BufferLink; KIRQL OldIrql; TCPConnBlock **OldTable;
while ((BufferLink = ExInterlockedPopEntrySList(&ConnReqFree, &ConnReqFreeLock)) != NULL) { Queue *QueuePtr = CONTAINING_RECORD(BufferLink, Queue, q_next); TCPReq *Req = CONTAINING_RECORD(QueuePtr, TCPReq, tr_q); TCPConnReq *ConnReq = CONTAINING_RECORD(Req, TCPConnReq, tcr_req);
CHECK_STRUCT(ConnReq, tcr); ExFreePool(ConnReq); }
KeAcquireSpinLock(&ConnTableLock, &OldIrql); OldTable = ConnTable; ConnTable = NULL; KeReleaseSpinLock(&ConnTableLock, OldIrql);
if (OldTable != NULL) { uint i; for (i = 0; i < MaxAllocatedConnBlocks; i++) { ExFreePool(OldTable[i]); } ExFreePool(OldTable); } }
|