windows-xp/Source/XPSP1/NT/net/tcpip/tpipv6/tcpip6/tcp/tcb.c

// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil -*- (for GNU Emacs)
//
// Copyright (c) 1985-2000 Microsoft Corporation
//
// This file is part of the Microsoft Research IPv6 Network Protocol Stack.
// You should have received a copy of the Microsoft End-User License Agreement
// for this software along with this release; see the file "license.txt".
// If not, please see http://www.research.microsoft.com/msripv6/license.htm,
// or write to Microsoft Research, One Microsoft Way, Redmond, WA 98052-6399.
//
// Abstract:
//
// Code for TCP Control Block management.
//


#include "oscfg.h"
#include "ndis.h"
#include "ip6imp.h"
#include "ip6def.h"
#include "tdi.h"
#include "tdint.h"
#include "tdistat.h"
#include "queue.h"
#include "transprt.h"
#include "tcp.h"
#include "tcb.h"
#include "tcpconn.h"
#include "tcpsend.h"
#include "tcprcv.h"
#include "info.h"
#include "tcpcfg.h"
#include "tcpdeliv.h"
#include "route.h"

KSPIN_LOCK TCBTableLock;

uint TCPTime;
uint TCBWalkCount;

TCB **TCBTable;

TCB *LastTCB;

TCB *PendingFreeList;

SLIST_HEADER FreeTCBList;

KSPIN_LOCK FreeTCBListLock;  // Lock to protect TCB free list.

extern KSPIN_LOCK AddrObjTableLock;

extern SeqNum ISNMonotonicPortion;
extern int ISNCredits;
extern int ISNMaxCredits;
extern uint GetDeltaTime();


uint CurrentTCBs = 0;
uint FreeTCBs = 0;

uint MaxTCBs = 0xffffffff;

#define MAX_FREE_TCBS 1000

#define NUM_DEADMAN_TICKS MS_TO_TICKS(1000)

uint MaxFreeTCBs = MAX_FREE_TCBS;
uint DeadmanTicks;

KTIMER TCBTimer;
KDPC TCBTimeoutDpc;

//
// All of the init code can be discarded.
//
#ifdef ALLOC_PRAGMA

int InitTCB(void);

#pragma alloc_text(INIT, InitTCB)

#endif // ALLOC_PRAGMA


//* ReadNextTCB - Read the next TCB in the table.
//
//  Called to read the next TCB in the table.  The needed information
//  is derived from the incoming context, which is assumed to be valid.
//  We'll copy the information, and then update the context value with
//  the next TCB to be read.
//
uint  // Returns: TRUE if more data is available to be read, FALSE is not.
ReadNextTCB(
    void *Context,  // Pointer to a TCPConnContext.
    void *Buffer)   // Pointer to a TCPConnTableEntry structure.
{
    TCPConnContext *TCContext = (TCPConnContext *)Context;
    TCP6ConnTableEntry *TCEntry = (TCP6ConnTableEntry *)Buffer;
    KIRQL OldIrql;
    TCB *CurrentTCB;
    uint i;

    CurrentTCB = TCContext->tcc_tcb;
    CHECK_STRUCT(CurrentTCB, tcb);

    KeAcquireSpinLock(&CurrentTCB->tcb_lock, &OldIrql);
    if (CLOSING(CurrentTCB))
        TCEntry->tct_state = TCP_CONN_CLOSED;
    else
        TCEntry->tct_state = (uint)CurrentTCB->tcb_state + TCB_STATE_DELTA;
    TCEntry->tct_localaddr = CurrentTCB->tcb_saddr;
    TCEntry->tct_localscopeid = CurrentTCB->tcb_sscope_id;
    TCEntry->tct_localport = CurrentTCB->tcb_sport;
    TCEntry->tct_remoteaddr = CurrentTCB->tcb_daddr;
    TCEntry->tct_remotescopeid = CurrentTCB->tcb_dscope_id;
    TCEntry->tct_remoteport = CurrentTCB->tcb_dport;
    TCEntry->tct_owningpid = (CurrentTCB->tcb_conn) ?
            CurrentTCB->tcb_conn->tc_owningpid : 0;
    KeReleaseSpinLock(&CurrentTCB->tcb_lock, OldIrql);

    // We've filled it in. Now update the context.
    if (CurrentTCB->tcb_next != NULL) {
        TCContext->tcc_tcb = CurrentTCB->tcb_next;
        return TRUE;
    } else {
        // NextTCB is NULL. Loop through the TCBTable looking for a new one.
        i = TCContext->tcc_index + 1;
        while (i < TcbTableSize) {
            if (TCBTable[i] != NULL) {
                TCContext->tcc_tcb = TCBTable[i];
                TCContext->tcc_index = i;
                return TRUE;
                break;
            } else
                i++;
        }

        TCContext->tcc_index = 0;
        TCContext->tcc_tcb = NULL;
        return FALSE;
    }
}


//* ValidateTCBContext - Validate the context for reading a TCB table.
//
//  Called to start reading the TCB table sequentially.  We take in
//  a context, and if the values are 0 we return information about the
//  first TCB in the table.  Otherwise we make sure that the context value
//  is valid, and if it is we return TRUE.
//  We assume the caller holds the TCB table lock.
//
//  Upon return, *Valid is set to true if the context is valid.
//
uint                // Returns: TRUE if data in table, FALSE if not.
ValidateTCBContext(
    void *Context,  // Pointer to a TCPConnContext.
    uint *Valid)    // Where to return infoformation about context being valid.
{
    TCPConnContext *TCContext = (TCPConnContext *)Context;
    uint i;
    TCB *TargetTCB;
    TCB *CurrentTCB;

    i = TCContext->tcc_index;
    TargetTCB = TCContext->tcc_tcb;

    //
    // If the context values are 0 and NULL, we're starting from the beginning.
    //
    if (i == 0 && TargetTCB == NULL) {
        *Valid = TRUE;
        do {
            if ((CurrentTCB = TCBTable[i]) != NULL) {
                CHECK_STRUCT(CurrentTCB, tcb);
                break;
            }
            i++;
        } while (i < TcbTableSize);

        if (CurrentTCB != NULL) {
            TCContext->tcc_index = i;
            TCContext->tcc_tcb = CurrentTCB;
            return TRUE;
        } else
            return FALSE;

    } else {
        //
        // We've been given a context.  We just need to make sure that it's
        // valid.
        //
        if (i < TcbTableSize) {
            CurrentTCB = TCBTable[i];
            while (CurrentTCB != NULL) {
                if (CurrentTCB == TargetTCB) {
                    *Valid = TRUE;
                    return TRUE;
                    break;
                } else {
                    CurrentTCB = CurrentTCB->tcb_next;
                }
            }

        }

        // If we get here, we didn't find the matching TCB.
        *Valid = FALSE;
        return FALSE;
    }
}


//* FindNextTCB - Find the next TCB in a particular chain.
//
//  This routine is used to find the 'next' TCB in a chain.  Since we keep
//  the chain in ascending order, we look for a TCB which is greater than
//  the input TCB.  When we find one, we return it.
//
//  This routine is mostly used when someone is walking the table and needs
//  to free the various locks to perform some action.
//
TCB *              // Returns: Pointer to the next TCB, or NULL.
FindNextTCB(
    uint Index,    // Index into TCBTable.
    TCB *Current)  // Current TCB - we find the one after this one.
{
    TCB *Next;

    ASSERT(Index < TcbTableSize);

    Next = TCBTable[Index];

    while (Next != NULL && (Next <= Current))
        Next = Next->tcb_next;

    return Next;
}


//* ResetSendNext - Set the sendnext value of a TCB.
//
//  Called to set the send next value of a TCB.  We do that, and adjust all
//  pointers to the appropriate places.  We assume the caller holds the lock
//  on the TCB.
//
void  // Returns: Nothing.
ResetSendNext(
    TCB *SeqTCB,    // TCB to be updated.
    SeqNum NewSeq)  // Sequence number to set.
{
    TCPSendReq *SendReq;
    uint AmtForward;
    Queue *CurQ;
    PNDIS_BUFFER Buffer;
    uint Offset;

    CHECK_STRUCT(SeqTCB, tcb);
    ASSERT(SEQ_GTE(NewSeq, SeqTCB->tcb_senduna));

    //
    // The new seq must be less than send max, or NewSeq, senduna, sendnext,
    // and sendmax must all be equal (the latter case happens when we're
    // called exiting TIME_WAIT, or possibly when we're retransmitting
    // during a flow controlled situation).
    //
    ASSERT(SEQ_LT(NewSeq, SeqTCB->tcb_sendmax) ||
           (SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendnext) &&
            SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendmax) &&
            SEQ_EQ(SeqTCB->tcb_senduna, NewSeq)));

    AmtForward = NewSeq - SeqTCB->tcb_senduna;

    if ((AmtForward == 1) && (SeqTCB->tcb_flags & FIN_SENT) &&
        !((SeqTCB->tcb_sendnext - SeqTCB->tcb_senduna) > 1) &&
        (SEQ_EQ(SeqTCB->tcb_sendnext,SeqTCB->tcb_sendmax))) {
        KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_RARE,
                   "tcpip6: trying to set sendnext for FIN_SENT\n"));
        SeqTCB->tcb_sendnext = NewSeq;
        SeqTCB->tcb_flags &= ~FIN_OUTSTANDING;
        return;
    }
    if((SeqTCB->tcb_flags & FIN_SENT) &&
       (SEQ_EQ(SeqTCB->tcb_sendnext,SeqTCB->tcb_sendmax)) &&
       ((SeqTCB->tcb_sendnext - NewSeq) == 1) ){

        //
        // There is only FIN that is left beyond sendnext.
        //
        SeqTCB->tcb_sendnext = NewSeq;
        SeqTCB->tcb_flags &= ~FIN_OUTSTANDING;
        return;
    }


    SeqTCB->tcb_sendnext = NewSeq;

    //
    // If we're backing off send next, turn off the FIN_OUTSTANDING flag to
    // maintain a consistent state.
    //
    if (!SEQ_EQ(NewSeq, SeqTCB->tcb_sendmax))
        SeqTCB->tcb_flags &= ~FIN_OUTSTANDING;

    if (SYNC_STATE(SeqTCB->tcb_state) && SeqTCB->tcb_state != TCB_TIME_WAIT) {
        //
        // In these states we need to update the send queue.
        //

        if (!EMPTYQ(&SeqTCB->tcb_sendq)) {
            CurQ = QHEAD(&SeqTCB->tcb_sendq);

            SendReq = (TCPSendReq *)CONTAINING_RECORD(CurQ, TCPReq, tr_q);

            //
            // SendReq points to the first send request on the send queue.
            // Move forward AmtForward bytes on the send queue, and set the
            // TCB pointers to the resultant SendReq, buffer, offset, size.
            //
            while (AmtForward) {

                CHECK_STRUCT(SendReq, tsr);

                if (AmtForward >= SendReq->tsr_unasize) {
                    //
                    // We're going to move completely past this one.  Subtract
                    // his size from AmtForward and get the next one.
                    //
                    AmtForward -= SendReq->tsr_unasize;
                    CurQ = QNEXT(CurQ);
                    ASSERT(CurQ != QEND(&SeqTCB->tcb_sendq));
                    SendReq = (TCPSendReq *)CONTAINING_RECORD(CurQ, TCPReq,
                                                              tr_q);
                } else {
                    //
                    // We're pointing at the proper send req now.  Break out
                    // of this loop and save the information.  Further down
                    // we'll need to walk down the buffer chain to find
                    // the proper buffer and offset.
                    //
                    break;
                }
            }

            //
            // We're pointing at the proper send req now.  We need to go down
            // the buffer chain here to find the proper buffer and offset.
            //
            SeqTCB->tcb_cursend = SendReq;
            SeqTCB->tcb_sendsize = SendReq->tsr_unasize - AmtForward;
            Buffer = SendReq->tsr_buffer;
            Offset = SendReq->tsr_offset;

            while (AmtForward) {
                // Walk the buffer chain.
                uint Length;

                //
                // We'll need the length of this buffer.  Use the portable
                // macro to get it.  We have to adjust the length by the offset
                // into it, also.
                //
                ASSERT((Offset < NdisBufferLength(Buffer)) ||
                       ((Offset == 0) && (NdisBufferLength(Buffer) == 0)));

                Length = NdisBufferLength(Buffer) - Offset;

                if (AmtForward >= Length) {
                    //
                    // We're moving past this one.  Skip over him, and 0 the
                    // Offset we're keeping.
                    //
                    AmtForward -= Length;
                    Offset = 0;
                    Buffer = NDIS_BUFFER_LINKAGE(Buffer);
                    ASSERT(Buffer != NULL);
                } else
                    break;
            }

            //
            // Save the buffer we found, and the offset into that buffer.
            //
            SeqTCB->tcb_sendbuf = Buffer;
            SeqTCB->tcb_sendofs = Offset + AmtForward;

        } else {
            ASSERT(SeqTCB->tcb_cursend == NULL);
            ASSERT(AmtForward == 0);
        }
    }

    CheckTCBSends(SeqTCB);
}


//* TCPAbortAndIndicateDisconnect
//
//  Abortively closes a TCB and issues a disconnect indication up the the
//  transport user.  This function is used to support cancellation of
//  TDI send and receive requests.
//
void  // Returns: Nothing.
TCPAbortAndIndicateDisconnect(
    CONNECTION_CONTEXT ConnectionContext  // Connection ID to find a TCB for.
    )
{
    TCB *AbortTCB;
    KIRQL Irql0, Irql1;  // One per lock nesting level.
    TCPConn *Conn;

    Conn = GetConnFromConnID(PtrToUlong(ConnectionContext), &Irql0);

    if (Conn != NULL) {
        CHECK_STRUCT(Conn, tc);

        AbortTCB = Conn->tc_tcb;

        if (AbortTCB != NULL) {
            //
            // If it's CLOSING or CLOSED, skip it.
            //
            if ((AbortTCB->tcb_state != TCB_CLOSED) && !CLOSING(AbortTCB)) {
                CHECK_STRUCT(AbortTCB, tcb);
                KeAcquireSpinLock(&AbortTCB->tcb_lock, &Irql1);
                KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql1);

                if (AbortTCB->tcb_state == TCB_CLOSED || CLOSING(AbortTCB)) {
                    KeReleaseSpinLock(&AbortTCB->tcb_lock, Irql0);
                    return;
                }

                AbortTCB->tcb_refcnt++;
                AbortTCB->tcb_flags |= NEED_RST;  // send a reset if connected
                TryToCloseTCB(AbortTCB, TCB_CLOSE_ABORTED, Irql0);

                RemoveTCBFromConn(AbortTCB);

                IF_TCPDBG(TCP_DEBUG_IRP) {
                    KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
                        "TCPAbortAndIndicateDisconnect, indicating discon\n"));
                }

                NotifyOfDisc(AbortTCB, TDI_CONNECTION_ABORTED);

                KeAcquireSpinLock(&AbortTCB->tcb_lock, &Irql0);
                DerefTCB(AbortTCB, Irql0);

                // TCB lock freed by DerefTCB.

                return;
            } else
                KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0);
        } else
            KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0);
    }
}


//* TCBTimeout - Do timeout events on TCBs.
//
//  Called every MS_PER_TICKS milliseconds to do timeout processing on TCBs.
//  We run throught the TCB table, decrementing timers.  If one goes to zero
//  we look at its state to decide what to do.
//
void  // Returns: Nothing.
TCBTimeout(
    PKDPC MyDpcObject,  // The DPC object describing this routine.
    void *Context,      // The argument we asked to be called with.
    void *Unused1,
    void *Unused2)
{
    uint i;
    TCB *CurrentTCB;
    uint Delayed = FALSE;
    uint CallRcvComplete;
    int Delta;

    UNREFERENCED_PARAMETER(Context);
    UNREFERENCED_PARAMETER(Unused1);
    UNREFERENCED_PARAMETER(Unused2);

    //
    // Update our free running counter.
    //
    TCPTime++;

    ExInterlockedAddUlong(&TCBWalkCount, 1, &TCBTableLock);

    // 
    // Set credits so that some more connections can increment the 
    // Initial Sequence Number, during the next 100 ms.
    //
    InterlockedExchange(&ISNCredits, ISNMaxCredits);

    Delta = GetDeltaTime();

    //
    // The increment made is (256)*(Time in milliseconds). This is really close
    // to 25000 increment made originally every 100 ms.
    //
    if (Delta > 0) {
        Delta *= 0x100;
        InterlockedExchangeAdd(&ISNMonotonicPortion, Delta);
    }

    //
    // Loop through each bucket in the table, going down the chain of
    // TCBs on the bucket.
    //
    for (i = 0; i < TcbTableSize; i++) {
        TCB *TempTCB;
        uint maxRexmitCnt;

        CurrentTCB = TCBTable[i];

        while (CurrentTCB != NULL) {
            CHECK_STRUCT(CurrentTCB, tcb);
            KeAcquireSpinLockAtDpcLevel(&CurrentTCB->tcb_lock);

            //
            // If it's CLOSING or CLOSED, skip it.
            //
            if (CurrentTCB->tcb_state == TCB_CLOSED || CLOSING(CurrentTCB)) {
                TempTCB = CurrentTCB->tcb_next;
                KeReleaseSpinLockFromDpcLevel(&CurrentTCB->tcb_lock);
                CurrentTCB = TempTCB;
                continue;
            }

            CheckTCBSends(CurrentTCB);
            CheckTCBRcv(CurrentTCB);

            //
            // First check the rexmit timer.
            //
            if (TCB_TIMER_RUNNING(CurrentTCB->tcb_rexmittimer)) {
                //
                // The timer is running.
                //
                if (--(CurrentTCB->tcb_rexmittimer) == 0) {
                    //
                    // And it's fired. Figure out what to do now.
                    //

                    if (CurrentTCB->tcb_state == TCB_SYN_SENT) {
                        maxRexmitCnt = MaxConnectRexmitCount;
                    } else {
                        maxRexmitCnt = MaxDataRexmitCount;
                    }

                    //
                    // If we've run out of retransmits or we're in FIN_WAIT2,
                    // time out.
                    //
                    CurrentTCB->tcb_rexmitcnt++;
                    if (CurrentTCB->tcb_rexmitcnt > maxRexmitCnt) {

                        ASSERT(CurrentTCB->tcb_state > TCB_LISTEN);

                        //
                        // This connection has timed out.  Abort it.  First
                        // reference him, then mark as closed, notify the
                        // user, and finally dereference and close him.
                        //
TimeoutTCB:
                        CurrentTCB->tcb_refcnt++;
                        TryToCloseTCB(CurrentTCB, TCB_CLOSE_TIMEOUT,
                                      DISPATCH_LEVEL);

                        RemoveTCBFromConn(CurrentTCB);
                        NotifyOfDisc(CurrentTCB, TDI_TIMED_OUT);

                        KeAcquireSpinLockAtDpcLevel(&CurrentTCB->tcb_lock);
                        DerefTCB(CurrentTCB, DISPATCH_LEVEL);

                        CurrentTCB = FindNextTCB(i, CurrentTCB);
                        continue;
                    }

                    //
                    // Stop round trip time measurement.
                    //
                    CurrentTCB->tcb_rtt = 0;

                    //
                    // Figure out what our new retransmit timeout should be.
                    // We double it each time we get a retransmit, and reset it
                    // back when we get an ack for new data.
                    //
                    CurrentTCB->tcb_rexmit = MIN(CurrentTCB->tcb_rexmit << 1,
                                                 MAX_REXMIT_TO);

                    //
                    // Reset the sequence number, and reset the congestion
                    // window.
                    //
                    ResetSendNext(CurrentTCB, CurrentTCB->tcb_senduna);

                    if (!(CurrentTCB->tcb_flags & FLOW_CNTLD)) {
                        //
                        // Don't let the slow start threshold go below 2
                        // segments.
                        //
                        CurrentTCB->tcb_ssthresh =
                            MAX(MIN(CurrentTCB->tcb_cwin,
                                    CurrentTCB->tcb_sendwin) / 2,
                                (uint) CurrentTCB->tcb_mss * 2);
                        CurrentTCB->tcb_cwin = CurrentTCB->tcb_mss;
                    } else {
                        //
                        // We're probing, and the probe timer has fired.  We
                        // need to set the FORCE_OUTPUT bit here.
                        //
                        CurrentTCB->tcb_flags |= FORCE_OUTPUT;
                    }

                    //
                    // See if we need to probe for a PMTU black hole.
                    //
                    if (PMTUBHDetect &&
                        CurrentTCB->tcb_rexmitcnt == ((maxRexmitCnt+1)/2)) {
                        //
                        // We may need to probe for a black hole.  If we're
                        // doing MTU discovery on this connection and we
                        // are retransmitting more than a minimum segment
                        // size, or we are probing for a PMTU BH already, turn
                        // off the DF flag and bump the probe count.  If the
                        // probe count gets too big we'll assume it's not
                        // a PMTU black hole, and we'll try to switch the
                        // router.
                        //
                        if ((CurrentTCB->tcb_flags & PMTU_BH_PROBE) ||
                            (CurrentTCB->tcb_sendmax - CurrentTCB->tcb_senduna
                             > 8)) {
                            //
                            // May need to probe.  If we haven't exceeded our
                            // probe count, do so, otherwise restore those
                            // values.
                            //
                            if (CurrentTCB->tcb_bhprobecnt++ < 2) {
                                //
                                // We're going to probe.  Turn on the flag,
                                // drop the MSS, and turn off the don't
                                // fragment bit.
                                //
                                if (!(CurrentTCB->tcb_flags & PMTU_BH_PROBE)) {
                                    CurrentTCB->tcb_flags |= PMTU_BH_PROBE;
                                    CurrentTCB->tcb_slowcount++;
                                    CurrentTCB->tcb_fastchk |= TCP_FLAG_SLOW;
                                    //
                                    // Drop the MSS to the minimum.
                                    //
                                    CurrentTCB->tcb_mss =
                                        MIN(DEFAULT_MSS,
                                            CurrentTCB->tcb_remmss);

                                    ASSERT(CurrentTCB->tcb_mss > 0);

                                    CurrentTCB->tcb_cwin = CurrentTCB->tcb_mss;
                                }

                                //
                                // Drop the rexmit count so we come here again,
                                // and don't retrigger DeadGWDetect.
                                //
                                CurrentTCB->tcb_rexmitcnt--;
                            } else {
                                //
                                // Too many probes.  Stop probing, and allow
                                // fallover to the next gateway.
                                //
                                // Currently this code won't do BH probing on
                                // the 2nd gateway.  The MSS will stay at the
                                // minimum size.  This might be a little
                                // suboptimal, but it's easy to implement for
                                // the Sept. 95 service pack and will keep
                                // connections alive if possible.
                                //
                                // In the future we should investigate doing
                                // dead g/w detect on a per-connection basis,
                                // and then doing PMTU probing for each
                                // connection.
                                //
                                if (CurrentTCB->tcb_flags & PMTU_BH_PROBE) {
                                    CurrentTCB->tcb_flags &= ~PMTU_BH_PROBE;
                                    if (--(CurrentTCB->tcb_slowcount) == 0)
                                        CurrentTCB->tcb_fastchk &=
                                            ~TCP_FLAG_SLOW;
                                }
                                CurrentTCB->tcb_bhprobecnt = 0;
                            }
                        }
                    }

                    //
                    // Since we're retransmitting, our first-hop router
                    // may be down.  Tell IP we're suspicious if this
                    // is the first retransmit.
                    //
                    if (CurrentTCB->tcb_rexmitcnt == 1 &&
                        CurrentTCB->tcb_rce != NULL) {
                        ForwardReachabilityInDoubt(CurrentTCB->tcb_rce);
                    }

                    //
                    // Now handle the various cases.
                    //
                    switch (CurrentTCB->tcb_state) {

                    case TCB_SYN_SENT:
                    case TCB_SYN_RCVD:
                        //
                        // In SYN-SENT or SYN-RCVD we'll need to retransmit
                        // the SYN.
                        //
                        SendSYN(CurrentTCB, DISPATCH_LEVEL);
                        CurrentTCB = FindNextTCB(i, CurrentTCB);
                        continue;

                    case TCB_FIN_WAIT1:
                    case TCB_CLOSING:
                    case TCB_LAST_ACK:
                        //
                        // The call to ResetSendNext (above) will have
                        // turned off the FIN_OUTSTANDING flag.
                        //
                        CurrentTCB->tcb_flags |= FIN_NEEDED;

                    case TCB_CLOSE_WAIT:
                    case TCB_ESTAB:
                        //
                        // In this state we have data to retransmit, unless
                        // the window is zero (in which case we need to
                        // probe), or we're just sending a FIN.
                        //
                        CheckTCBSends(CurrentTCB);

                        Delayed = TRUE;
                        DelayAction(CurrentTCB, NEED_OUTPUT);
                        break;

                    case TCB_TIME_WAIT:
                        //
                        // If it's fired in TIME-WAIT, we're all done and
                        // can clean up.  We'll call TryToCloseTCB even
                        // though he's already sort of closed.  TryToCloseTCB
                        // will figure this out and do the right thing.
                        //
                        TryToCloseTCB(CurrentTCB, TCB_CLOSE_SUCCESS,
                                      DISPATCH_LEVEL);
                        CurrentTCB = FindNextTCB(i, CurrentTCB);
                        continue;

                    default:
                        break;
                    }
                }
            }

            //
            // Now check the SWS deadlock timer..
            //
            if (TCB_TIMER_RUNNING(CurrentTCB->tcb_swstimer)) {
                //
                // The timer is running.
                //
                if (--(CurrentTCB->tcb_swstimer) == 0) {
                    //
                    // And it's fired. Force output now.
                    //
                    CurrentTCB->tcb_flags |= FORCE_OUTPUT;
                    Delayed = TRUE;
                    DelayAction(CurrentTCB, NEED_OUTPUT);
                }
            }

            //
            // Check the push data timer.
            //
            if (TCB_TIMER_RUNNING(CurrentTCB->tcb_pushtimer)) {
                //
                // The timer is running. Decrement it.
                //
                if (--(CurrentTCB->tcb_pushtimer) == 0) {
                    //
                    // It's fired.
                    //
                    PushData(CurrentTCB);
                    Delayed = TRUE;
                }
            }

            //
            // Check the delayed ack timer.
            //
            if (TCB_TIMER_RUNNING(CurrentTCB->tcb_delacktimer)) {
                //
                // The timer is running.
                //
                if (--(CurrentTCB->tcb_delacktimer) == 0) {
                    //
                    // And it's fired.  Set up to send an ACK.
                    //
                    Delayed = TRUE;
                    DelayAction(CurrentTCB, NEED_ACK);
                }
            }

            //
            // Finally check the keepalive timer.
            //
            if (CurrentTCB->tcb_state == TCB_ESTAB) {
                if ((CurrentTCB->tcb_flags & KEEPALIVE) &&
                    (CurrentTCB->tcb_conn != NULL)) {
                    uint Delta;

                    Delta = TCPTime - CurrentTCB->tcb_alive;
                    if (Delta > CurrentTCB->tcb_conn->tc_tcbkatime) {
                        Delta -= CurrentTCB->tcb_conn->tc_tcbkatime;
                        if (Delta > (CurrentTCB->tcb_kacount * CurrentTCB->tcb_conn->tc_tcbkainterval)) {
                            if (CurrentTCB->tcb_kacount < MaxDataRexmitCount) {
                                SendKA(CurrentTCB, DISPATCH_LEVEL);
                                CurrentTCB = FindNextTCB(i, CurrentTCB);
                                continue;
                            } else
                                goto TimeoutTCB;
                        }
                    } else
                        CurrentTCB->tcb_kacount = 0;
                }
            }

            //
            // If this is an active open connection in SYN-SENT or SYN-RCVD,
            // or we have a FIN pending, check the connect timer.
            //
            if (CurrentTCB->tcb_flags &
                (ACTIVE_OPEN | FIN_NEEDED | FIN_SENT)) {
                TCPConnReq *ConnReq = CurrentTCB->tcb_connreq;

                ASSERT(ConnReq != NULL);
                if (TCB_TIMER_RUNNING(ConnReq->tcr_timeout)) {
                    // Timer is running.
                    if (--(ConnReq->tcr_timeout) == 0) {
                        // The connection timer has timed out.
                        TryToCloseTCB(CurrentTCB, TCB_CLOSE_TIMEOUT,
                                      DISPATCH_LEVEL);
                        CurrentTCB = FindNextTCB(i, CurrentTCB);
                        continue;
                    }
                }
            }

            //
            // Timer isn't running, or didn't fire.
            //
            TempTCB = CurrentTCB->tcb_next;
            KeReleaseSpinLockFromDpcLevel(&CurrentTCB->tcb_lock);
            CurrentTCB = TempTCB;
        }
    }

    //
    // See if we need to call receive complete as part of deadman processing.
    // We do this now because we want to restart the timer before calling
    // receive complete, in case that takes a while.  If we make this check
    // while the timer is running we'd have to lock, so we'll check and save
    // the result now before we start the timer.
    //
    if (DeadmanTicks == TCPTime) {
        CallRcvComplete = TRUE;
        DeadmanTicks += NUM_DEADMAN_TICKS;
    } else
        CallRcvComplete = FALSE;

    //
    // Now check the pending free list.  If it's not null, walk down the
    // list and decrement the walk count.  If the count goes below 2, pull it
    // from the list.  If the count goes to 0, free the TCB.  If the count is
    // at 1 it'll be freed by whoever called RemoveTCB.
    //
    KeAcquireSpinLockAtDpcLevel(&TCBTableLock);
    if (PendingFreeList != NULL) {
        TCB *PrevTCB;

        PrevTCB = CONTAINING_RECORD(&PendingFreeList, TCB, tcb_delayq.q_next);

        do {
            CurrentTCB = (TCB *)PrevTCB->tcb_delayq.q_next;

            CHECK_STRUCT(CurrentTCB, tcb);

            CurrentTCB->tcb_walkcount--;
            if (CurrentTCB->tcb_walkcount <= 1) {
                *(TCB **)&PrevTCB->tcb_delayq.q_next =
                    (TCB *)CurrentTCB->tcb_delayq.q_next;

                if (CurrentTCB->tcb_walkcount == 0) {
                    FreeTCB(CurrentTCB);
                }
            } else {
                PrevTCB = CurrentTCB;
            }
        } while (PrevTCB->tcb_delayq.q_next != NULL);
    }

    TCBWalkCount--;
    KeReleaseSpinLockFromDpcLevel(&TCBTableLock);

    //
    // Do AddrCheckTable cleanup.
    //
    if (AddrCheckTable) {

        TCPAddrCheckElement *Temp;

        KeAcquireSpinLockAtDpcLevel(&AddrObjTableLock);

        for (Temp = AddrCheckTable;Temp < AddrCheckTable + NTWMaxConnectCount;
             Temp++) {
            if (Temp->TickCount > 0) {
                if ((--(Temp->TickCount)) == 0) {
                    Temp->SourceAddress = UnspecifiedAddr;
                }
            }
        }

        KeReleaseSpinLockFromDpcLevel(&AddrObjTableLock);
    }

    if (Delayed)
        ProcessTCBDelayQ();

    if (CallRcvComplete)
        TCPRcvComplete();
}


#if 0  // We update PMTU lazily to avoid exactly this.
//* SetTCBMTU - Set TCB MTU values.
//
//  A function called by TCBWalk to set the MTU values of all TCBs using
//  a particular path.
//
uint  // Returns: TRUE.
SetTCBMTU(
    TCB *CheckTCB,  // TCB to be checked.
    void *DestPtr,  // Destination address.
    void *SrcPtr,   // Source address.
    void *MTUPtr)   // New MTU.
{
    IPv6Addr *DestAddr = (IPv6Addr *)DestPtr;
    IPv6Addr *SrcAddr = (IPv6Addr *)SrcPtr;
    KIRQL OldIrql;

    CHECK_STRUCT(CheckTCB, tcb);

    KeAcquireSpinLock(&CheckTCB->tcb_lock, &OldIrql);

    if (IP6_ADDR_EQUAL(&CheckTCB->tcb_daddr, DestAddr) &&
        IP6_ADDR_EQUAL(&CheckTCB->tcb_saddr, SrcAddr)) {
        uint MTU = *(uint *)MTUPtr;

        CheckTCB->tcb_mss = (ushort)MIN(MTU, (uint)CheckTCB->tcb_remmss);

        ASSERT(CheckTCB->tcb_mss > 0);

        //
        // Reset the Congestion Window if necessary.
        //
        if (CheckTCB->tcb_cwin < CheckTCB->tcb_mss) {
            CheckTCB->tcb_cwin = CheckTCB->tcb_mss;

            //
            // Make sure the slow start threshold is at least 2 segments.
            //
            if (CheckTCB->tcb_ssthresh < ((uint) CheckTCB->tcb_mss*2)) {
                CheckTCB->tcb_ssthresh = CheckTCB->tcb_mss * 2;
            }
        }
    }

    KeReleaseSpinLock(&CheckTCB->tcb_lock, OldIrql);

    return TRUE;
}
#endif

//* DeleteTCBWithSrc - Delete tcbs with a particular src address.
//
//  A function called by TCBWalk to delete all TCBs with a particular source
//  address.
//
uint  // Returns: FALSE if CheckTCB is to be deleted, TRUE otherwise.
DeleteTCBWithSrc(
    TCB *CheckTCB,  // TCB to be checked.
    void *AddrPtr,  // Pointer to address.
    void *Unused1,  // Go figure.
    void *Unused3)  // What happened to Unused2?
{
    IPv6Addr *Addr = (IPv6Addr *)AddrPtr;

    CHECK_STRUCT(CheckTCB, tcb);

    if (IP6_ADDR_EQUAL(&CheckTCB->tcb_saddr, Addr))
        return FALSE;
    else
        return TRUE;
}

//* TCBWalk - Walk the TCBs in the table, and call a function for each of them.
//
//  Called when we need to repetively do something to each TCB in the table.
//  We call the specified function with a pointer to the TCB and the input
//  context for each TCB in the table.  If the function returns FALSE, we
//  delete the TCB.
//
void  // Returns: Nothing.
TCBWalk(
    uint (*CallRtn)(struct TCB *, void *, void *, void *),  // Routine to call.
    void *Context1,  // Context to pass to CallRtn.
    void *Context2,  // Second context to pass to call routine.
    void *Context3)  // Third context to pass to call routine.
{
    uint i;
    TCB *CurTCB;
    KIRQL Irql0, Irql1;

    //
    // Loop through each bucket in the table, going down the chain of
    // TCBs on the bucket.  For each one call CallRtn.
    //
    KeAcquireSpinLock(&TCBTableLock, &Irql0);

    for (i = 0; i < TcbTableSize; i++) {

        CurTCB = TCBTable[i];

        //
        // Walk down the chain on this bucket.
        //
        while (CurTCB != NULL) {
            if (!(*CallRtn)(CurTCB, Context1, Context2, Context3)) {
                //
                // Call failed on this one.
                // Notify the client and close the TCB.
                //
                KeAcquireSpinLock(&CurTCB->tcb_lock, &Irql1);
                if (!CLOSING(CurTCB)) {
                    CurTCB->tcb_refcnt++;
                    KeReleaseSpinLock(&TCBTableLock, Irql1);
                    TryToCloseTCB(CurTCB, TCB_CLOSE_ABORTED, Irql0);

                    RemoveTCBFromConn(CurTCB);
                    if (CurTCB->tcb_state != TCB_TIME_WAIT)
                        NotifyOfDisc(CurTCB, TDI_CONNECTION_ABORTED);

                    KeAcquireSpinLock(&CurTCB->tcb_lock, &Irql0);
                    DerefTCB(CurTCB, Irql0);
                    KeAcquireSpinLock(&TCBTableLock, &Irql0);
                } else
                    KeReleaseSpinLock(&CurTCB->tcb_lock, Irql1);

                CurTCB = FindNextTCB(i, CurTCB);
            } else {
                CurTCB = CurTCB->tcb_next;
            }
        }
    }

    KeReleaseSpinLock(&TCBTableLock, Irql0);
}

//* FindTCB - Find a TCB in the tcb table.
//
//  Called when we need to find a TCB in the TCB table.  We take a quick
//  look at the last TCB we found, and if it matches we return it.  Otherwise
//  we hash into the TCB table and look for it.  We assume the TCB table lock
//  is held when we are called.
//
TCB *  // Returns: Pointer to TCB found, or NULL if none.
FindTCB(
    IPv6Addr *Src,     // Source IP address of TCB to be found.
    IPv6Addr *Dest,    // Destination IP address of TCB to be found.
    uint SrcScopeId,   // Source address scope identifier.
    uint DestScopeId,  // Destination address scope identifier.
    ushort SrcPort,    // Source port of TCB to be found.
    ushort DestPort)   // Destination port of TCB to be found.
{
    TCB *FoundTCB;

    if (LastTCB != NULL) {
        CHECK_STRUCT(LastTCB, tcb);
        if (IP6_ADDR_EQUAL(&LastTCB->tcb_daddr, Dest) &&
            LastTCB->tcb_dscope_id == DestScopeId &&
            LastTCB->tcb_dport == DestPort &&
            IP6_ADDR_EQUAL(&LastTCB->tcb_saddr, Src) &&
            LastTCB->tcb_sscope_id == SrcScopeId &&
            LastTCB->tcb_sport == SrcPort)
            return LastTCB;
    }

    //
    // Didn't find it in our 1 element cache.
    //
    FoundTCB = TCBTable[TCB_HASH(*Dest, *Src, DestPort, SrcPort)];
    while (FoundTCB != NULL) {
        CHECK_STRUCT(FoundTCB, tcb);
        if (IP6_ADDR_EQUAL(&FoundTCB->tcb_daddr, Dest) &&
            FoundTCB->tcb_dscope_id == DestScopeId &&
            FoundTCB->tcb_dport == DestPort &&
            IP6_ADDR_EQUAL(&FoundTCB->tcb_saddr, Src) &&
            FoundTCB->tcb_sscope_id == SrcScopeId &&
            FoundTCB->tcb_sport == SrcPort) {

            //
            // Found it.  Update the cache for next time, and return.
            //
            LastTCB = FoundTCB;
            return FoundTCB;
        } else
            FoundTCB = FoundTCB->tcb_next;
    }

    return FoundTCB;
}


//* InsertTCB - Insert a TCB in the tcb table.
//
//  This routine inserts a TCB in the TCB table. No locks need to be held
//  when this routine is called. We insert TCBs in ascending address order.
//  Before inserting we make sure that the TCB isn't already in the table.
//
uint              // Returns: TRUE if we inserted, false if we didn't.
InsertTCB(
    TCB *NewTCB)  // TCB to be inserted.
{
    uint TCBIndex;
    KIRQL OldIrql;
    TCB *PrevTCB, *CurrentTCB;
    TCB *WhereToInsert;

    ASSERT(NewTCB != NULL);
    CHECK_STRUCT(NewTCB, tcb);
    TCBIndex = TCB_HASH(NewTCB->tcb_daddr, NewTCB->tcb_saddr,
                        NewTCB->tcb_dport, NewTCB->tcb_sport);

    KeAcquireSpinLock(&TCBTableLock, &OldIrql);
    KeAcquireSpinLockAtDpcLevel(&NewTCB->tcb_lock);

    //
    // Find the proper place in the table to insert him.  While
    // we're walking we'll check to see if a dupe already exists.
    // When we find the right place to insert, we'll remember it, and
    // keep walking looking for a duplicate.
    //
    PrevTCB = CONTAINING_RECORD(&TCBTable[TCBIndex], TCB, tcb_next);
    WhereToInsert = NULL;

    while (PrevTCB->tcb_next != NULL) {
        CurrentTCB = PrevTCB->tcb_next;

        if (IP6_ADDR_EQUAL(&CurrentTCB->tcb_daddr, &NewTCB->tcb_daddr) &&
            IP6_ADDR_EQUAL(&CurrentTCB->tcb_saddr, &NewTCB->tcb_saddr) &&
            (CurrentTCB->tcb_sport == NewTCB->tcb_sport) &&
            (CurrentTCB->tcb_dport == NewTCB->tcb_dport)) {

            KeReleaseSpinLockFromDpcLevel(&NewTCB->tcb_lock);
            KeReleaseSpinLock(&TCBTableLock, OldIrql);
            return FALSE;

        } else {

            if (WhereToInsert == NULL && CurrentTCB > NewTCB) {
                WhereToInsert = PrevTCB;
            }

            CHECK_STRUCT(PrevTCB->tcb_next, tcb);
            PrevTCB = PrevTCB->tcb_next;
        }
    }

    if (WhereToInsert == NULL) {
        WhereToInsert = PrevTCB;
    }

    NewTCB->tcb_next = WhereToInsert->tcb_next;
    WhereToInsert->tcb_next = NewTCB;
    NewTCB->tcb_flags |= IN_TCB_TABLE;
    TStats.ts_numconns++;

    KeReleaseSpinLockFromDpcLevel(&NewTCB->tcb_lock);
    KeReleaseSpinLock(&TCBTableLock, OldIrql);
    return TRUE;
}


//* RemoveTCB - Remove a TCB from the tcb table.
//
//  Called when we need to remove a TCB from the TCB table.  We assume the
//  TCB table lock and the TCB lock are held when we are called.  If the
//  TCB isn't in the table we won't try to remove him.
//
uint  // Returns: TRUE if it's OK to free it, FALSE otherwise.
RemoveTCB(
    TCB *RemovedTCB)  // TCB to be removed.
{
    uint TCBIndex;
    TCB *PrevTCB;
#if DBG
    uint Found = FALSE;
#endif

    CHECK_STRUCT(RemovedTCB, tcb);

    if (RemovedTCB->tcb_flags & IN_TCB_TABLE) {
        TCBIndex = TCB_HASH(RemovedTCB->tcb_daddr, RemovedTCB->tcb_saddr,
            RemovedTCB->tcb_dport, RemovedTCB->tcb_sport);

        PrevTCB = CONTAINING_RECORD(&TCBTable[TCBIndex], TCB, tcb_next);

        do {
            if (PrevTCB->tcb_next == RemovedTCB) {
                // Found him.
                PrevTCB->tcb_next = RemovedTCB->tcb_next;
                RemovedTCB->tcb_flags &= ~IN_TCB_TABLE;
                TStats.ts_numconns--;
#if DBG
                Found = TRUE;
#endif
                break;
            }
            PrevTCB = PrevTCB->tcb_next;
#if DBG
            if (PrevTCB != NULL)
                CHECK_STRUCT(PrevTCB, tcb);
#endif
        } while (PrevTCB != NULL);

        ASSERT(Found);
    }

    if (LastTCB == RemovedTCB)
        LastTCB = NULL;

    if (TCBWalkCount == 0) {
        return TRUE;
    } else {
        RemovedTCB->tcb_walkcount = TCBWalkCount + 1;
        *(TCB **)&RemovedTCB->tcb_delayq.q_next = PendingFreeList;
        PendingFreeList = RemovedTCB;
        return FALSE;
    }
}


//* ScavengeTCB - Scavenge a TCB that's in the TIME_WAIT state.
//
//  Called when we're running low on TCBs, and need to scavenge one from
//  TIME_WAIT state.  We'll walk through the TCB table, looking for the oldest
//  TCB in TIME_WAIT.  We'll remove and return a pointer to that TCB.  If we
//  don't find any TCBs in TIME_WAIT, we'll return NULL.
//
TCB *  // Returns: Pointer to a reusable TCB, or NULL.
ScavengeTCB(
    void)
{
    KIRQL Irql0, Irql1, IrqlSave;
    uint Now = SystemUpTime();
    uint Delta = 0;
    uint i;
    TCB *FoundTCB = NULL, *PrevFound;
    TCB *CurrentTCB, *PrevTCB;

    KeAcquireSpinLock(&TCBTableLock, &Irql0);

    if (TCBWalkCount != 0) {
        KeReleaseSpinLock(&TCBTableLock, Irql0);
        return NULL;
    }

    for (i = 0; i < TcbTableSize; i++) {

        PrevTCB = CONTAINING_RECORD(&TCBTable[i], TCB, tcb_next);
        CurrentTCB = PrevTCB->tcb_next;

        while (CurrentTCB != NULL) {
            CHECK_STRUCT(CurrentTCB, tcb);

            KeAcquireSpinLock(&CurrentTCB->tcb_lock, &Irql1);
            if (CurrentTCB->tcb_state == TCB_TIME_WAIT &&
                (CurrentTCB->tcb_refcnt == 0) && !CLOSING(CurrentTCB)){
                if (FoundTCB == NULL ||
                    ((Now - CurrentTCB->tcb_alive) > Delta)) {
                    //
                    // Found a new 'older' TCB.  If we already have one, free
                    // the lock on him and get the lock on the new one.
                    //
                    if (FoundTCB != NULL)
                        KeReleaseSpinLock(&FoundTCB->tcb_lock, Irql1);
                    else
                        IrqlSave = Irql1;

                    PrevFound = PrevTCB;
                    FoundTCB = CurrentTCB;
                    Delta = Now - FoundTCB->tcb_alive;
                } else
                    KeReleaseSpinLock(&CurrentTCB->tcb_lock, Irql1);
            } else
                KeReleaseSpinLock(&CurrentTCB->tcb_lock, Irql1);

            //
            // Look at the next one.
            //
            PrevTCB = CurrentTCB;
            CurrentTCB = PrevTCB->tcb_next;
        }
    }

    //
    // If we have one, pull him from the list.
    //
    if (FoundTCB != NULL) {
        PrevFound->tcb_next = FoundTCB->tcb_next;
        FoundTCB->tcb_flags &= ~IN_TCB_TABLE;

        //
        // REVIEW: Is the right place to drop the reference on our RCE?
        // REVIEW: IPv4 called down to IP to close the RCE here.
        //
        if (FoundTCB->tcb_rce != NULL)
            ReleaseRCE(FoundTCB->tcb_rce);

        TStats.ts_numconns--;
        if (LastTCB == FoundTCB) {
            LastTCB = NULL;
        }
        KeReleaseSpinLock(&FoundTCB->tcb_lock, IrqlSave);
    }

    KeReleaseSpinLock(&TCBTableLock, Irql0);
    return FoundTCB;
}


//* AllocTCB - Allocate a TCB.
//
//  Called whenever we need to allocate a TCB.  We try to pull one off the
//  free list, or allocate one if we need one.  We then initialize it, etc.
//
TCB *  // Returns: Pointer to the new TCB, or NULL if we couldn't get one.
AllocTCB(
    void)
{
    TCB *NewTCB;

    //
    // First, see if we have one on the free list.
    //
    PSLIST_ENTRY BufferLink;

    BufferLink = ExInterlockedPopEntrySList(&FreeTCBList, &FreeTCBListLock);

    if (BufferLink != NULL) {
        NewTCB = CONTAINING_RECORD(BufferLink, TCB, tcb_next);
        CHECK_STRUCT(NewTCB, tcb);
        ExInterlockedAddUlong(&FreeTCBs, -1, &FreeTCBListLock);
    } else {
        //
        // We have none on the free list.  If the total number of TCBs
        // outstanding is more than we like to keep on the free list, try
        // to scavenge a TCB from time wait.
        //
        if (CurrentTCBs < MaxFreeTCBs || ((NewTCB = ScavengeTCB()) == NULL)) {
            if (CurrentTCBs < MaxTCBs) {
                NewTCB = ExAllocatePool(NonPagedPool, sizeof(TCB));
                if (NewTCB == NULL) {
                    return NewTCB;
                } else {
                    ExInterlockedAddUlong(&CurrentTCBs, 1, &FreeTCBListLock);
                }
            } else
                return NULL;
        }
    }

    ASSERT(NewTCB != NULL);

    RtlZeroMemory(NewTCB, sizeof(TCB));
#if DBG
    NewTCB->tcb_sig = tcb_signature;
#endif
    INITQ(&NewTCB->tcb_sendq);
    NewTCB->tcb_cursend = NULL;
    NewTCB->tcb_alive = TCPTime;
    NewTCB->tcb_hops = -1;

    //
    // Initially we're not on the fast path because we're not established.  Set
    // the slowcount to one and set up the fastchk fields so we don't take the
    // fast path.
    //
    NewTCB->tcb_slowcount = 1;
    NewTCB->tcb_fastchk = TCP_FLAG_ACK | TCP_FLAG_SLOW;
    KeInitializeSpinLock(&NewTCB->tcb_lock);

    return NewTCB;
}


//* FreeTCB - Free a TCB.
//
//  Called whenever we need to free a TCB.
//
//  Note: This routine may be called with the TCBTableLock held.
//
void  // Returns: Nothing.
FreeTCB(
    TCB *FreedTCB)  // TCB to be freed.
{
    PSLIST_ENTRY BufferLink;

    CHECK_STRUCT(FreedTCB, tcb);

#if defined(_WIN64)
    if (CurrentTCBs > 2 * MaxFreeTCBs) {

#else
    if ((CurrentTCBs > 2 * MaxFreeTCBs) || (FreeTCBList.Depth > 65000)) {

#endif
        ExInterlockedAddUlong(&CurrentTCBs, (ulong) - 1, &FreeTCBListLock);
        ExFreePool(FreedTCB);
        return;
    }

    BufferLink = CONTAINING_RECORD(&(FreedTCB->tcb_next),
                                   SLIST_ENTRY, Next);
    ExInterlockedPushEntrySList(&FreeTCBList, BufferLink, &FreeTCBListLock);
    ExInterlockedAddUlong(&FreeTCBs, 1, &FreeTCBListLock);
}


#pragma BEGIN_INIT

//* InitTCB - Initialize our TCB code.
//
//  Called during init time to initialize our TCB code. We initialize
//  the TCB table, etc, then return.
//
int  // Returns: TRUE if we did initialize, false if we didn't.
InitTCB(
    void)
{
    LARGE_INTEGER InitialWakeUp;
    uint i;

    TCBTable = ExAllocatePool(NonPagedPool, TcbTableSize * sizeof(TCB*));
    if (TCBTable == NULL) {
        return FALSE;
    }

    for (i = 0; i < TcbTableSize; i++)
        TCBTable[i] = NULL;

    LastTCB = NULL;

    ExInitializeSListHead(&FreeTCBList);

    KeInitializeSpinLock(&TCBTableLock);
    KeInitializeSpinLock(&FreeTCBListLock);

    TCPTime = 0;
    TCBWalkCount = 0;
    DeadmanTicks = NUM_DEADMAN_TICKS;

    //
    // Set up our timer to call TCBTimeout once every MS_PER_TICK milliseconds.
    //
    // REVIEW: Switch this to be driven off the IPv6Timeout routine instead
    // REVIEW: of having two independent timers?
    //
    KeInitializeDpc(&TCBTimeoutDpc, TCBTimeout, NULL);
    KeInitializeTimer(&TCBTimer);
    InitialWakeUp.QuadPart = -(LONGLONG) MS_PER_TICK * 10000;
    KeSetTimerEx(&TCBTimer, InitialWakeUp, MS_PER_TICK, &TCBTimeoutDpc);

    return TRUE;
}

#pragma END_INIT


//* UnloadTCB
//
//  Called during shutdown to uninitialize
//  in preparation for unloading the stack.
//
//  There are no open sockets (or else we wouldn't be unloading).
//  Because UnloadTCPSend has already been called,
//  we are no longer receiving packets from the IPv6 layer.
//
void
UnloadTCB(void)
{
    PSLIST_ENTRY BufferLink;
    TCB *CurrentTCB;
    uint i;
    KIRQL OldIrql;

    //
    // First stop TCBTimeout from being called.
    //
    KeCancelTimer(&TCBTimer);

    //
    // Traverse the buckets looking for TCBs.
    // REVIEW - Can we have TCBs in states other than time-wait?
    //
    for (i = 0; i < TcbTableSize; i++) {

        while ((CurrentTCB = TCBTable[i]) != NULL) {

            KeAcquireSpinLock(&CurrentTCB->tcb_lock, &OldIrql);

            KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_STATE,
                       "UnloadTCB(%p): state %x flags %x refs %x "
                       "reason %x pend %x walk %x\n",
                       CurrentTCB,
                       CurrentTCB->tcb_state,
                       CurrentTCB->tcb_flags,
                       CurrentTCB->tcb_refcnt,
                       CurrentTCB->tcb_closereason,
                       CurrentTCB->tcb_pending,
                       CurrentTCB->tcb_walkcount));

            CurrentTCB->tcb_flags |= NEED_RST;
            TryToCloseTCB(CurrentTCB, TCB_CLOSE_ABORTED, OldIrql);
        }
    }

    //
    // Now pull TCBs off the free list and really free them.
    //
    while ((BufferLink = ExInterlockedPopEntrySList(&FreeTCBList, &FreeTCBListLock)) != NULL) {
        CurrentTCB = CONTAINING_RECORD(BufferLink, TCB, tcb_next);
        CHECK_STRUCT(CurrentTCB, tcb);

        ExFreePool(CurrentTCB);
    }

    ExFreePool(TCBTable);
    TCBTable = NULL;
}

//* CleanupTCBWithIF
//
//  Helper function for TCBWalk, to remove
//  TCBs that reference the specified interface.
//
//  Returns FALSE if CheckTCB should be deleted, TRUE otherwise.
//
uint
CleanupTCBWithIF(
    TCB *CheckTCB,
    void *Context1,
    void *Context2,
    void *Context3)
{
    Interface *IF = (Interface *) Context1;
    RouteCacheEntry *RCE;
    KIRQL OldIrql;

    CHECK_STRUCT(CheckTCB, tcb);

    RCE = CheckTCB->tcb_rce;
    if (RCE != NULL) {
        ASSERT(RCE->NTE->IF == RCE->NCE->IF);

        if (RCE->NTE->IF == IF)
            return FALSE; // Delete this TCB.
    }

    return TRUE; // Do not delete this TCB.
}

//* TCPRemoveIF
//
//  Remove TCP's references to the specified interface.
//
void
TCPRemoveIF(Interface *IF)
{
    //
    // Currently, only TCBs hold onto references.
    //
    TCBWalk(CleanupTCBWithIF, IF, NULL, NULL);
}