Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1564 lines
52 KiB

  1. // -*- mode: C++; tab-width: 4; indent-tabs-mode: nil -*- (for GNU Emacs)
  2. //
  3. // Copyright (c) 1985-2000 Microsoft Corporation
  4. //
  5. // This file is part of the Microsoft Research IPv6 Network Protocol Stack.
  6. // You should have received a copy of the Microsoft End-User License Agreement
  7. // for this software along with this release; see the file "license.txt".
  8. // If not, please see http://www.research.microsoft.com/msripv6/license.htm,
  9. // or write to Microsoft Research, One Microsoft Way, Redmond, WA 98052-6399.
  10. //
  11. // Abstract:
  12. //
  13. // Code for TCP Control Block management.
  14. //
  15. #include "oscfg.h"
  16. #include "ndis.h"
  17. #include "ip6imp.h"
  18. #include "ip6def.h"
  19. #include "tdi.h"
  20. #include "tdint.h"
  21. #include "tdistat.h"
  22. #include "queue.h"
  23. #include "transprt.h"
  24. #include "tcp.h"
  25. #include "tcb.h"
  26. #include "tcpconn.h"
  27. #include "tcpsend.h"
  28. #include "tcprcv.h"
  29. #include "info.h"
  30. #include "tcpcfg.h"
  31. #include "tcpdeliv.h"
  32. #include "route.h"
  33. KSPIN_LOCK TCBTableLock;
  34. uint TCPTime;
  35. uint TCBWalkCount;
  36. TCB **TCBTable;
  37. TCB *LastTCB;
  38. TCB *PendingFreeList;
  39. SLIST_HEADER FreeTCBList;
  40. KSPIN_LOCK FreeTCBListLock; // Lock to protect TCB free list.
  41. extern KSPIN_LOCK AddrObjTableLock;
  42. extern SeqNum ISNMonotonicPortion;
  43. extern int ISNCredits;
  44. extern int ISNMaxCredits;
  45. extern uint GetDeltaTime();
  46. uint CurrentTCBs = 0;
  47. uint FreeTCBs = 0;
  48. uint MaxTCBs = 0xffffffff;
  49. #define MAX_FREE_TCBS 1000
  50. #define NUM_DEADMAN_TICKS MS_TO_TICKS(1000)
  51. uint MaxFreeTCBs = MAX_FREE_TCBS;
  52. uint DeadmanTicks;
  53. KTIMER TCBTimer;
  54. KDPC TCBTimeoutDpc;
  55. //
  56. // All of the init code can be discarded.
  57. //
  58. #ifdef ALLOC_PRAGMA
  59. int InitTCB(void);
  60. #pragma alloc_text(INIT, InitTCB)
  61. #endif // ALLOC_PRAGMA
  62. //* ReadNextTCB - Read the next TCB in the table.
  63. //
  64. // Called to read the next TCB in the table. The needed information
  65. // is derived from the incoming context, which is assumed to be valid.
  66. // We'll copy the information, and then update the context value with
  67. // the next TCB to be read.
  68. //
  69. uint // Returns: TRUE if more data is available to be read, FALSE is not.
  70. ReadNextTCB(
  71. void *Context, // Pointer to a TCPConnContext.
  72. void *Buffer) // Pointer to a TCPConnTableEntry structure.
  73. {
  74. TCPConnContext *TCContext = (TCPConnContext *)Context;
  75. TCP6ConnTableEntry *TCEntry = (TCP6ConnTableEntry *)Buffer;
  76. KIRQL OldIrql;
  77. TCB *CurrentTCB;
  78. uint i;
  79. CurrentTCB = TCContext->tcc_tcb;
  80. CHECK_STRUCT(CurrentTCB, tcb);
  81. KeAcquireSpinLock(&CurrentTCB->tcb_lock, &OldIrql);
  82. if (CLOSING(CurrentTCB))
  83. TCEntry->tct_state = TCP_CONN_CLOSED;
  84. else
  85. TCEntry->tct_state = (uint)CurrentTCB->tcb_state + TCB_STATE_DELTA;
  86. TCEntry->tct_localaddr = CurrentTCB->tcb_saddr;
  87. TCEntry->tct_localscopeid = CurrentTCB->tcb_sscope_id;
  88. TCEntry->tct_localport = CurrentTCB->tcb_sport;
  89. TCEntry->tct_remoteaddr = CurrentTCB->tcb_daddr;
  90. TCEntry->tct_remotescopeid = CurrentTCB->tcb_dscope_id;
  91. TCEntry->tct_remoteport = CurrentTCB->tcb_dport;
  92. TCEntry->tct_owningpid = (CurrentTCB->tcb_conn) ?
  93. CurrentTCB->tcb_conn->tc_owningpid : 0;
  94. KeReleaseSpinLock(&CurrentTCB->tcb_lock, OldIrql);
  95. // We've filled it in. Now update the context.
  96. if (CurrentTCB->tcb_next != NULL) {
  97. TCContext->tcc_tcb = CurrentTCB->tcb_next;
  98. return TRUE;
  99. } else {
  100. // NextTCB is NULL. Loop through the TCBTable looking for a new one.
  101. i = TCContext->tcc_index + 1;
  102. while (i < TcbTableSize) {
  103. if (TCBTable[i] != NULL) {
  104. TCContext->tcc_tcb = TCBTable[i];
  105. TCContext->tcc_index = i;
  106. return TRUE;
  107. break;
  108. } else
  109. i++;
  110. }
  111. TCContext->tcc_index = 0;
  112. TCContext->tcc_tcb = NULL;
  113. return FALSE;
  114. }
  115. }
  116. //* ValidateTCBContext - Validate the context for reading a TCB table.
  117. //
  118. // Called to start reading the TCB table sequentially. We take in
  119. // a context, and if the values are 0 we return information about the
  120. // first TCB in the table. Otherwise we make sure that the context value
  121. // is valid, and if it is we return TRUE.
  122. // We assume the caller holds the TCB table lock.
  123. //
  124. // Upon return, *Valid is set to true if the context is valid.
  125. //
  126. uint // Returns: TRUE if data in table, FALSE if not.
  127. ValidateTCBContext(
  128. void *Context, // Pointer to a TCPConnContext.
  129. uint *Valid) // Where to return infoformation about context being valid.
  130. {
  131. TCPConnContext *TCContext = (TCPConnContext *)Context;
  132. uint i;
  133. TCB *TargetTCB;
  134. TCB *CurrentTCB;
  135. i = TCContext->tcc_index;
  136. TargetTCB = TCContext->tcc_tcb;
  137. //
  138. // If the context values are 0 and NULL, we're starting from the beginning.
  139. //
  140. if (i == 0 && TargetTCB == NULL) {
  141. *Valid = TRUE;
  142. do {
  143. if ((CurrentTCB = TCBTable[i]) != NULL) {
  144. CHECK_STRUCT(CurrentTCB, tcb);
  145. break;
  146. }
  147. i++;
  148. } while (i < TcbTableSize);
  149. if (CurrentTCB != NULL) {
  150. TCContext->tcc_index = i;
  151. TCContext->tcc_tcb = CurrentTCB;
  152. return TRUE;
  153. } else
  154. return FALSE;
  155. } else {
  156. //
  157. // We've been given a context. We just need to make sure that it's
  158. // valid.
  159. //
  160. if (i < TcbTableSize) {
  161. CurrentTCB = TCBTable[i];
  162. while (CurrentTCB != NULL) {
  163. if (CurrentTCB == TargetTCB) {
  164. *Valid = TRUE;
  165. return TRUE;
  166. break;
  167. } else {
  168. CurrentTCB = CurrentTCB->tcb_next;
  169. }
  170. }
  171. }
  172. // If we get here, we didn't find the matching TCB.
  173. *Valid = FALSE;
  174. return FALSE;
  175. }
  176. }
  177. //* FindNextTCB - Find the next TCB in a particular chain.
  178. //
  179. // This routine is used to find the 'next' TCB in a chain. Since we keep
  180. // the chain in ascending order, we look for a TCB which is greater than
  181. // the input TCB. When we find one, we return it.
  182. //
  183. // This routine is mostly used when someone is walking the table and needs
  184. // to free the various locks to perform some action.
  185. //
  186. TCB * // Returns: Pointer to the next TCB, or NULL.
  187. FindNextTCB(
  188. uint Index, // Index into TCBTable.
  189. TCB *Current) // Current TCB - we find the one after this one.
  190. {
  191. TCB *Next;
  192. ASSERT(Index < TcbTableSize);
  193. Next = TCBTable[Index];
  194. while (Next != NULL && (Next <= Current))
  195. Next = Next->tcb_next;
  196. return Next;
  197. }
  198. //* ResetSendNext - Set the sendnext value of a TCB.
  199. //
  200. // Called to set the send next value of a TCB. We do that, and adjust all
  201. // pointers to the appropriate places. We assume the caller holds the lock
  202. // on the TCB.
  203. //
  204. void // Returns: Nothing.
  205. ResetSendNext(
  206. TCB *SeqTCB, // TCB to be updated.
  207. SeqNum NewSeq) // Sequence number to set.
  208. {
  209. TCPSendReq *SendReq;
  210. uint AmtForward;
  211. Queue *CurQ;
  212. PNDIS_BUFFER Buffer;
  213. uint Offset;
  214. CHECK_STRUCT(SeqTCB, tcb);
  215. ASSERT(SEQ_GTE(NewSeq, SeqTCB->tcb_senduna));
  216. //
  217. // The new seq must be less than send max, or NewSeq, senduna, sendnext,
  218. // and sendmax must all be equal (the latter case happens when we're
  219. // called exiting TIME_WAIT, or possibly when we're retransmitting
  220. // during a flow controlled situation).
  221. //
  222. ASSERT(SEQ_LT(NewSeq, SeqTCB->tcb_sendmax) ||
  223. (SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendnext) &&
  224. SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendmax) &&
  225. SEQ_EQ(SeqTCB->tcb_senduna, NewSeq)));
  226. AmtForward = NewSeq - SeqTCB->tcb_senduna;
  227. if ((AmtForward == 1) && (SeqTCB->tcb_flags & FIN_SENT) &&
  228. !((SeqTCB->tcb_sendnext - SeqTCB->tcb_senduna) > 1) &&
  229. (SEQ_EQ(SeqTCB->tcb_sendnext,SeqTCB->tcb_sendmax))) {
  230. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_RARE,
  231. "tcpip6: trying to set sendnext for FIN_SENT\n"));
  232. SeqTCB->tcb_sendnext = NewSeq;
  233. SeqTCB->tcb_flags &= ~FIN_OUTSTANDING;
  234. return;
  235. }
  236. if((SeqTCB->tcb_flags & FIN_SENT) &&
  237. (SEQ_EQ(SeqTCB->tcb_sendnext,SeqTCB->tcb_sendmax)) &&
  238. ((SeqTCB->tcb_sendnext - NewSeq) == 1) ){
  239. //
  240. // There is only FIN that is left beyond sendnext.
  241. //
  242. SeqTCB->tcb_sendnext = NewSeq;
  243. SeqTCB->tcb_flags &= ~FIN_OUTSTANDING;
  244. return;
  245. }
  246. SeqTCB->tcb_sendnext = NewSeq;
  247. //
  248. // If we're backing off send next, turn off the FIN_OUTSTANDING flag to
  249. // maintain a consistent state.
  250. //
  251. if (!SEQ_EQ(NewSeq, SeqTCB->tcb_sendmax))
  252. SeqTCB->tcb_flags &= ~FIN_OUTSTANDING;
  253. if (SYNC_STATE(SeqTCB->tcb_state) && SeqTCB->tcb_state != TCB_TIME_WAIT) {
  254. //
  255. // In these states we need to update the send queue.
  256. //
  257. if (!EMPTYQ(&SeqTCB->tcb_sendq)) {
  258. CurQ = QHEAD(&SeqTCB->tcb_sendq);
  259. SendReq = (TCPSendReq *)CONTAINING_RECORD(CurQ, TCPReq, tr_q);
  260. //
  261. // SendReq points to the first send request on the send queue.
  262. // Move forward AmtForward bytes on the send queue, and set the
  263. // TCB pointers to the resultant SendReq, buffer, offset, size.
  264. //
  265. while (AmtForward) {
  266. CHECK_STRUCT(SendReq, tsr);
  267. if (AmtForward >= SendReq->tsr_unasize) {
  268. //
  269. // We're going to move completely past this one. Subtract
  270. // his size from AmtForward and get the next one.
  271. //
  272. AmtForward -= SendReq->tsr_unasize;
  273. CurQ = QNEXT(CurQ);
  274. ASSERT(CurQ != QEND(&SeqTCB->tcb_sendq));
  275. SendReq = (TCPSendReq *)CONTAINING_RECORD(CurQ, TCPReq,
  276. tr_q);
  277. } else {
  278. //
  279. // We're pointing at the proper send req now. Break out
  280. // of this loop and save the information. Further down
  281. // we'll need to walk down the buffer chain to find
  282. // the proper buffer and offset.
  283. //
  284. break;
  285. }
  286. }
  287. //
  288. // We're pointing at the proper send req now. We need to go down
  289. // the buffer chain here to find the proper buffer and offset.
  290. //
  291. SeqTCB->tcb_cursend = SendReq;
  292. SeqTCB->tcb_sendsize = SendReq->tsr_unasize - AmtForward;
  293. Buffer = SendReq->tsr_buffer;
  294. Offset = SendReq->tsr_offset;
  295. while (AmtForward) {
  296. // Walk the buffer chain.
  297. uint Length;
  298. //
  299. // We'll need the length of this buffer. Use the portable
  300. // macro to get it. We have to adjust the length by the offset
  301. // into it, also.
  302. //
  303. ASSERT((Offset < NdisBufferLength(Buffer)) ||
  304. ((Offset == 0) && (NdisBufferLength(Buffer) == 0)));
  305. Length = NdisBufferLength(Buffer) - Offset;
  306. if (AmtForward >= Length) {
  307. //
  308. // We're moving past this one. Skip over him, and 0 the
  309. // Offset we're keeping.
  310. //
  311. AmtForward -= Length;
  312. Offset = 0;
  313. Buffer = NDIS_BUFFER_LINKAGE(Buffer);
  314. ASSERT(Buffer != NULL);
  315. } else
  316. break;
  317. }
  318. //
  319. // Save the buffer we found, and the offset into that buffer.
  320. //
  321. SeqTCB->tcb_sendbuf = Buffer;
  322. SeqTCB->tcb_sendofs = Offset + AmtForward;
  323. } else {
  324. ASSERT(SeqTCB->tcb_cursend == NULL);
  325. ASSERT(AmtForward == 0);
  326. }
  327. }
  328. CheckTCBSends(SeqTCB);
  329. }
  330. //* TCPAbortAndIndicateDisconnect
  331. //
  332. // Abortively closes a TCB and issues a disconnect indication up to the
  333. // transport user. This function is used to support cancellation of
  334. // TDI send and receive requests.
  335. //
  336. void // Returns: Nothing.
  337. TCPAbortAndIndicateDisconnect(
  338. CONNECTION_CONTEXT ConnectionContext // Connection ID to find a TCB for.
  339. )
  340. {
  341. TCB *AbortTCB;
  342. KIRQL Irql0, Irql1; // One per lock nesting level.
  343. TCPConn *Conn;
  344. Conn = GetConnFromConnID(PtrToUlong(ConnectionContext), &Irql0);
  345. if (Conn != NULL) {
  346. CHECK_STRUCT(Conn, tc);
  347. AbortTCB = Conn->tc_tcb;
  348. if (AbortTCB != NULL) {
  349. //
  350. // If it's CLOSING or CLOSED, skip it.
  351. //
  352. if ((AbortTCB->tcb_state != TCB_CLOSED) && !CLOSING(AbortTCB)) {
  353. CHECK_STRUCT(AbortTCB, tcb);
  354. KeAcquireSpinLock(&AbortTCB->tcb_lock, &Irql1);
  355. KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql1);
  356. if (AbortTCB->tcb_state == TCB_CLOSED || CLOSING(AbortTCB)) {
  357. KeReleaseSpinLock(&AbortTCB->tcb_lock, Irql0);
  358. return;
  359. }
  360. AbortTCB->tcb_refcnt++;
  361. AbortTCB->tcb_flags |= NEED_RST; // send a reset if connected
  362. TryToCloseTCB(AbortTCB, TCB_CLOSE_ABORTED, Irql0);
  363. RemoveTCBFromConn(AbortTCB);
  364. IF_TCPDBG(TCP_DEBUG_IRP) {
  365. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
  366. "TCPAbortAndIndicateDisconnect, indicating discon\n"));
  367. }
  368. NotifyOfDisc(AbortTCB, TDI_CONNECTION_ABORTED, NULL);
  369. KeAcquireSpinLock(&AbortTCB->tcb_lock, &Irql0);
  370. DerefTCB(AbortTCB, Irql0);
  371. // TCB lock freed by DerefTCB.
  372. return;
  373. } else
  374. KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0);
  375. } else
  376. KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, Irql0);
  377. }
  378. }
  379. //* TCBTimeout - Do timeout events on TCBs.
  380. //
  381. // Called every MS_PER_TICKS milliseconds to do timeout processing on TCBs.
  382. // We run throught the TCB table, decrementing timers. If one goes to zero
  383. // we look at its state to decide what to do.
  384. //
  385. void // Returns: Nothing.
  386. TCBTimeout(
  387. PKDPC MyDpcObject, // The DPC object describing this routine.
  388. void *Context, // The argument we asked to be called with.
  389. void *Unused1,
  390. void *Unused2)
  391. {
  392. uint i;
  393. TCB *CurrentTCB;
  394. uint Delayed = FALSE;
  395. uint CallRcvComplete;
  396. int Delta;
  397. UNREFERENCED_PARAMETER(MyDpcObject);
  398. UNREFERENCED_PARAMETER(Context);
  399. UNREFERENCED_PARAMETER(Unused1);
  400. UNREFERENCED_PARAMETER(Unused2);
  401. //
  402. // Update our free running counter.
  403. //
  404. TCPTime++;
  405. ExInterlockedAddUlong((PULONG)&TCBWalkCount, 1, &TCBTableLock);
  406. //
  407. // Set credits so that some more connections can increment the
  408. // Initial Sequence Number, during the next 100 ms.
  409. //
  410. InterlockedExchange((PLONG)&ISNCredits, ISNMaxCredits);
  411. Delta = GetDeltaTime();
  412. //
  413. // The increment made is (256)*(Time in milliseconds). This is really close
  414. // to 25000 increment made originally every 100 ms.
  415. //
  416. if (Delta > 0) {
  417. Delta *= 0x100;
  418. InterlockedExchangeAdd((PLONG)&ISNMonotonicPortion, Delta);
  419. }
  420. //
  421. // Loop through each bucket in the table, going down the chain of
  422. // TCBs on the bucket.
  423. //
  424. for (i = 0; i < TcbTableSize; i++) {
  425. TCB *TempTCB;
  426. uint maxRexmitCnt;
  427. CurrentTCB = TCBTable[i];
  428. while (CurrentTCB != NULL) {
  429. CHECK_STRUCT(CurrentTCB, tcb);
  430. KeAcquireSpinLockAtDpcLevel(&CurrentTCB->tcb_lock);
  431. //
  432. // If it's CLOSING or CLOSED, skip it.
  433. //
  434. if (CurrentTCB->tcb_state == TCB_CLOSED || CLOSING(CurrentTCB)) {
  435. TempTCB = CurrentTCB->tcb_next;
  436. KeReleaseSpinLockFromDpcLevel(&CurrentTCB->tcb_lock);
  437. CurrentTCB = TempTCB;
  438. continue;
  439. }
  440. CheckTCBSends(CurrentTCB);
  441. CheckTCBRcv(CurrentTCB);
  442. //
  443. // First check the rexmit timer.
  444. //
  445. if (TCB_TIMER_RUNNING(CurrentTCB->tcb_rexmittimer)) {
  446. //
  447. // The timer is running.
  448. //
  449. if (--(CurrentTCB->tcb_rexmittimer) == 0) {
  450. //
  451. // And it's fired. Figure out what to do now.
  452. //
  453. if ((CurrentTCB->tcb_state == TCB_SYN_SENT) ||
  454. (CurrentTCB->tcb_state == TCB_SYN_RCVD)) {
  455. maxRexmitCnt = MaxConnectRexmitCount;
  456. } else {
  457. maxRexmitCnt = MaxDataRexmitCount;
  458. }
  459. //
  460. // If we've run out of retransmits or we're in FIN_WAIT2,
  461. // time out.
  462. //
  463. CurrentTCB->tcb_rexmitcnt++;
  464. if (CurrentTCB->tcb_rexmitcnt > maxRexmitCnt) {
  465. ASSERT(CurrentTCB->tcb_state > TCB_LISTEN);
  466. //
  467. // This connection has timed out. Abort it. First
  468. // reference him, then mark as closed, notify the
  469. // user, and finally dereference and close him.
  470. //
  471. TimeoutTCB:
  472. CurrentTCB->tcb_refcnt++;
  473. TryToCloseTCB(CurrentTCB, TCB_CLOSE_TIMEOUT,
  474. DISPATCH_LEVEL);
  475. RemoveTCBFromConn(CurrentTCB);
  476. NotifyOfDisc(CurrentTCB, TDI_TIMED_OUT, NULL);
  477. KeAcquireSpinLockAtDpcLevel(&CurrentTCB->tcb_lock);
  478. DerefTCB(CurrentTCB, DISPATCH_LEVEL);
  479. CurrentTCB = FindNextTCB(i, CurrentTCB);
  480. continue;
  481. }
  482. //
  483. // Stop round trip time measurement.
  484. //
  485. CurrentTCB->tcb_rtt = 0;
  486. //
  487. // Figure out what our new retransmit timeout should be.
  488. // We double it each time we get a retransmit, and reset it
  489. // back when we get an ack for new data.
  490. //
  491. CurrentTCB->tcb_rexmit = MIN(CurrentTCB->tcb_rexmit << 1,
  492. MAX_REXMIT_TO);
  493. //
  494. // Reset the sequence number, and reset the congestion
  495. // window.
  496. //
  497. ResetSendNext(CurrentTCB, CurrentTCB->tcb_senduna);
  498. if (!(CurrentTCB->tcb_flags & FLOW_CNTLD)) {
  499. //
  500. // Don't let the slow start threshold go below 2
  501. // segments.
  502. //
  503. CurrentTCB->tcb_ssthresh =
  504. MAX(MIN(CurrentTCB->tcb_cwin,
  505. CurrentTCB->tcb_sendwin) / 2,
  506. (uint) CurrentTCB->tcb_mss * 2);
  507. CurrentTCB->tcb_cwin = CurrentTCB->tcb_mss;
  508. } else {
  509. //
  510. // We're probing, and the probe timer has fired. We
  511. // need to set the FORCE_OUTPUT bit here.
  512. //
  513. CurrentTCB->tcb_flags |= FORCE_OUTPUT;
  514. }
  515. //
  516. // See if we need to probe for a PMTU black hole.
  517. //
  518. if (PMTUBHDetect &&
  519. CurrentTCB->tcb_rexmitcnt == ((maxRexmitCnt+1)/2)) {
  520. //
  521. // We may need to probe for a black hole. If we're
  522. // doing MTU discovery on this connection and we
  523. // are retransmitting more than a minimum segment
  524. // size, or we are probing for a PMTU BH already,
  525. // bump the probe count. If the probe count gets
  526. // too big we'll assume it's not a PMTU black hole,
  527. // and we'll try to switch the router.
  528. //
  529. if ((CurrentTCB->tcb_flags & PMTU_BH_PROBE) ||
  530. (CurrentTCB->tcb_sendmax - CurrentTCB->tcb_senduna
  531. > 8)) {
  532. //
  533. // May need to probe. If we haven't exceeded our
  534. // probe count, do so, otherwise restore those
  535. // values.
  536. //
  537. if (CurrentTCB->tcb_bhprobecnt++ < 2) {
  538. //
  539. // We're going to probe. Turn on the flag,
  540. // drop the MSS, and turn off the don't
  541. // fragment bit.
  542. //
  543. if (!(CurrentTCB->tcb_flags & PMTU_BH_PROBE)) {
  544. CurrentTCB->tcb_flags |= PMTU_BH_PROBE;
  545. CurrentTCB->tcb_slowcount++;
  546. CurrentTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  547. //
  548. // Drop the MSS to the minimum.
  549. //
  550. CurrentTCB->tcb_mss =
  551. MIN(DEFAULT_MSS,
  552. CurrentTCB->tcb_remmss);
  553. ASSERT(CurrentTCB->tcb_mss > 0);
  554. CurrentTCB->tcb_cwin = CurrentTCB->tcb_mss;
  555. }
  556. //
  557. // Drop the rexmit count so we come here again.
  558. //
  559. CurrentTCB->tcb_rexmitcnt--;
  560. } else {
  561. //
  562. // Too many probes. Stop probing, and allow
  563. // fallover to the next gateway.
  564. //
  565. // Currently this code won't do BH probing on
  566. // the 2nd gateway. The MSS will stay at the
  567. // minimum size. This might be a little
  568. // suboptimal, but it's easy to implement for
  569. // the Sept. 95 service pack and will keep
  570. // connections alive if possible.
  571. //
  572. // In the future we should investigate doing
  573. // dead g/w detect on a per-connection basis,
  574. // and then doing PMTU probing for each
  575. // connection.
  576. //
  577. if (CurrentTCB->tcb_flags & PMTU_BH_PROBE) {
  578. CurrentTCB->tcb_flags &= ~PMTU_BH_PROBE;
  579. if (--(CurrentTCB->tcb_slowcount) == 0)
  580. CurrentTCB->tcb_fastchk &=
  581. ~TCP_FLAG_SLOW;
  582. }
  583. CurrentTCB->tcb_bhprobecnt = 0;
  584. }
  585. }
  586. }
  587. //
  588. // Now handle the various cases.
  589. //
  590. switch (CurrentTCB->tcb_state) {
  591. case TCB_SYN_SENT:
  592. case TCB_SYN_RCVD:
  593. //
  594. // In SYN-SENT or SYN-RCVD we'll need to retransmit
  595. // the SYN.
  596. //
  597. SendSYN(CurrentTCB, DISPATCH_LEVEL);
  598. CurrentTCB = FindNextTCB(i, CurrentTCB);
  599. continue;
  600. case TCB_FIN_WAIT1:
  601. case TCB_CLOSING:
  602. case TCB_LAST_ACK:
  603. //
  604. // The call to ResetSendNext (above) will have
  605. // turned off the FIN_OUTSTANDING flag.
  606. //
  607. CurrentTCB->tcb_flags |= FIN_NEEDED;
  608. case TCB_CLOSE_WAIT:
  609. case TCB_ESTAB:
  610. //
  611. // In this state we have data to retransmit, unless
  612. // the window is zero (in which case we need to
  613. // probe), or we're just sending a FIN.
  614. //
  615. CheckTCBSends(CurrentTCB);
  616. //
  617. // Since we're retransmitting, our first-hop router
  618. // may be down. Tell IP we're suspicious if this
  619. // is the first retransmit.
  620. //
  621. if (CurrentTCB->tcb_rexmitcnt == 1 &&
  622. CurrentTCB->tcb_rce != NULL) {
  623. ForwardReachabilityInDoubt(CurrentTCB->tcb_rce);
  624. }
  625. Delayed = TRUE;
  626. DelayAction(CurrentTCB, NEED_OUTPUT);
  627. break;
  628. case TCB_TIME_WAIT:
  629. //
  630. // If it's fired in TIME-WAIT, we're all done and
  631. // can clean up. We'll call TryToCloseTCB even
  632. // though he's already sort of closed. TryToCloseTCB
  633. // will figure this out and do the right thing.
  634. //
  635. TryToCloseTCB(CurrentTCB, TCB_CLOSE_SUCCESS,
  636. DISPATCH_LEVEL);
  637. CurrentTCB = FindNextTCB(i, CurrentTCB);
  638. continue;
  639. default:
  640. break;
  641. }
  642. }
  643. }
  644. //
  645. // Now check the SWS deadlock timer..
  646. //
  647. if (TCB_TIMER_RUNNING(CurrentTCB->tcb_swstimer)) {
  648. //
  649. // The timer is running.
  650. //
  651. if (--(CurrentTCB->tcb_swstimer) == 0) {
  652. //
  653. // And it's fired. Force output now.
  654. //
  655. CurrentTCB->tcb_flags |= FORCE_OUTPUT;
  656. Delayed = TRUE;
  657. DelayAction(CurrentTCB, NEED_OUTPUT);
  658. }
  659. }
  660. //
  661. // Check the push data timer.
  662. //
  663. if (TCB_TIMER_RUNNING(CurrentTCB->tcb_pushtimer)) {
  664. //
  665. // The timer is running. Decrement it.
  666. //
  667. if (--(CurrentTCB->tcb_pushtimer) == 0) {
  668. //
  669. // It's fired.
  670. //
  671. PushData(CurrentTCB);
  672. Delayed = TRUE;
  673. }
  674. }
  675. //
  676. // Check the delayed ack timer.
  677. //
  678. if (TCB_TIMER_RUNNING(CurrentTCB->tcb_delacktimer)) {
  679. //
  680. // The timer is running.
  681. //
  682. if (--(CurrentTCB->tcb_delacktimer) == 0) {
  683. //
  684. // And it's fired. Set up to send an ACK.
  685. //
  686. Delayed = TRUE;
  687. DelayAction(CurrentTCB, NEED_ACK);
  688. }
  689. }
  690. //
  691. // Finally check the keepalive timer.
  692. //
  693. if (CurrentTCB->tcb_state == TCB_ESTAB) {
  694. if ((CurrentTCB->tcb_flags & KEEPALIVE) &&
  695. (CurrentTCB->tcb_conn != NULL)) {
  696. uint Delta;
  697. Delta = TCPTime - CurrentTCB->tcb_alive;
  698. if (Delta > CurrentTCB->tcb_conn->tc_tcbkatime) {
  699. Delta -= CurrentTCB->tcb_conn->tc_tcbkatime;
  700. if (Delta > (CurrentTCB->tcb_kacount * CurrentTCB->tcb_conn->tc_tcbkainterval)) {
  701. if (CurrentTCB->tcb_kacount < MaxDataRexmitCount) {
  702. SendKA(CurrentTCB, DISPATCH_LEVEL);
  703. CurrentTCB = FindNextTCB(i, CurrentTCB);
  704. continue;
  705. } else
  706. goto TimeoutTCB;
  707. }
  708. } else
  709. CurrentTCB->tcb_kacount = 0;
  710. }
  711. }
  712. //
  713. // If this is an active open connection in SYN-SENT or SYN-RCVD,
  714. // or we have a FIN pending, check the connect timer.
  715. //
  716. if (CurrentTCB->tcb_flags &
  717. (ACTIVE_OPEN | FIN_NEEDED | FIN_SENT)) {
  718. TCPConnReq *ConnReq = CurrentTCB->tcb_connreq;
  719. ASSERT(ConnReq != NULL);
  720. if (TCB_TIMER_RUNNING(ConnReq->tcr_timeout)) {
  721. // Timer is running.
  722. if (--(ConnReq->tcr_timeout) == 0) {
  723. // The connection timer has timed out.
  724. TryToCloseTCB(CurrentTCB, TCB_CLOSE_TIMEOUT,
  725. DISPATCH_LEVEL);
  726. CurrentTCB = FindNextTCB(i, CurrentTCB);
  727. continue;
  728. }
  729. }
  730. }
  731. //
  732. // Timer isn't running, or didn't fire.
  733. //
  734. TempTCB = CurrentTCB->tcb_next;
  735. KeReleaseSpinLockFromDpcLevel(&CurrentTCB->tcb_lock);
  736. CurrentTCB = TempTCB;
  737. }
  738. }
  739. //
  740. // See if we need to call receive complete as part of deadman processing.
  741. // We do this now because we want to restart the timer before calling
  742. // receive complete, in case that takes a while. If we make this check
  743. // while the timer is running we'd have to lock, so we'll check and save
  744. // the result now before we start the timer.
  745. //
  746. if (DeadmanTicks == TCPTime) {
  747. CallRcvComplete = TRUE;
  748. DeadmanTicks += NUM_DEADMAN_TICKS;
  749. } else
  750. CallRcvComplete = FALSE;
  751. //
  752. // Now check the pending free list. If it's not null, walk down the
  753. // list and decrement the walk count. If the count goes below 2, pull it
  754. // from the list. If the count goes to 0, free the TCB. If the count is
  755. // at 1 it'll be freed by whoever called RemoveTCB.
  756. //
  757. KeAcquireSpinLockAtDpcLevel(&TCBTableLock);
  758. if (PendingFreeList != NULL) {
  759. TCB *PrevTCB;
  760. PrevTCB = CONTAINING_RECORD(&PendingFreeList, TCB, tcb_delayq.q_next);
  761. do {
  762. CurrentTCB = (TCB *)PrevTCB->tcb_delayq.q_next;
  763. CHECK_STRUCT(CurrentTCB, tcb);
  764. CurrentTCB->tcb_walkcount--;
  765. if (CurrentTCB->tcb_walkcount <= 1) {
  766. *(TCB **)&PrevTCB->tcb_delayq.q_next =
  767. (TCB *)CurrentTCB->tcb_delayq.q_next;
  768. if (CurrentTCB->tcb_walkcount == 0) {
  769. FreeTCB(CurrentTCB);
  770. }
  771. } else {
  772. PrevTCB = CurrentTCB;
  773. }
  774. } while (PrevTCB->tcb_delayq.q_next != NULL);
  775. }
  776. TCBWalkCount--;
  777. KeReleaseSpinLockFromDpcLevel(&TCBTableLock);
  778. if (Delayed)
  779. ProcessTCBDelayQ();
  780. if (CallRcvComplete)
  781. TCPRcvComplete();
  782. }
  783. //* TCBWalk - Walk the TCBs in the table, and call a function for each of them.
  784. //
  785. // Called when we need to repetively do something to each TCB in the table.
  786. // We call the specified function with a pointer to the TCB and the input
  787. // context for each TCB in the table. If the function returns FALSE, we
  788. // delete the TCB.
  789. //
  790. void // Returns: Nothing.
  791. TCBWalk(
  792. uint (*CallRtn)(struct TCB *, void *, void *, void *), // Routine to call.
  793. void *Context1, // Context to pass to CallRtn.
  794. void *Context2, // Second context to pass to call routine.
  795. void *Context3) // Third context to pass to call routine.
  796. {
  797. uint i;
  798. TCB *CurTCB;
  799. KIRQL Irql0, Irql1;
  800. //
  801. // Loop through each bucket in the table, going down the chain of
  802. // TCBs on the bucket. For each one call CallRtn.
  803. //
  804. KeAcquireSpinLock(&TCBTableLock, &Irql0);
  805. for (i = 0; i < TcbTableSize; i++) {
  806. CurTCB = TCBTable[i];
  807. //
  808. // Walk down the chain on this bucket.
  809. //
  810. while (CurTCB != NULL) {
  811. if (!(*CallRtn)(CurTCB, Context1, Context2, Context3)) {
  812. //
  813. // Call failed on this one.
  814. // Notify the client and close the TCB.
  815. //
  816. KeAcquireSpinLock(&CurTCB->tcb_lock, &Irql1);
  817. if (!CLOSING(CurTCB)) {
  818. CurTCB->tcb_refcnt++;
  819. KeReleaseSpinLock(&TCBTableLock, Irql1);
  820. TryToCloseTCB(CurTCB, TCB_CLOSE_ABORTED, Irql0);
  821. RemoveTCBFromConn(CurTCB);
  822. if (CurTCB->tcb_state != TCB_TIME_WAIT)
  823. NotifyOfDisc(CurTCB, TDI_CONNECTION_ABORTED, NULL);
  824. KeAcquireSpinLock(&CurTCB->tcb_lock, &Irql0);
  825. DerefTCB(CurTCB, Irql0);
  826. KeAcquireSpinLock(&TCBTableLock, &Irql0);
  827. } else
  828. KeReleaseSpinLock(&CurTCB->tcb_lock, Irql1);
  829. CurTCB = FindNextTCB(i, CurTCB);
  830. } else {
  831. CurTCB = CurTCB->tcb_next;
  832. }
  833. }
  834. }
  835. KeReleaseSpinLock(&TCBTableLock, Irql0);
  836. }
  837. //* FindTCB - Find a TCB in the tcb table.
  838. //
  839. // Called when we need to find a TCB in the TCB table. We take a quick
  840. // look at the last TCB we found, and if it matches we return it. Otherwise
  841. // we hash into the TCB table and look for it. We assume the TCB table lock
  842. // is held when we are called.
  843. //
  844. TCB * // Returns: Pointer to TCB found, or NULL if none.
  845. FindTCB(
  846. IPv6Addr *Src, // Source IP address of TCB to be found.
  847. IPv6Addr *Dest, // Destination IP address of TCB to be found.
  848. uint SrcScopeId, // Source address scope identifier.
  849. uint DestScopeId, // Destination address scope identifier.
  850. ushort SrcPort, // Source port of TCB to be found.
  851. ushort DestPort) // Destination port of TCB to be found.
  852. {
  853. TCB *FoundTCB;
  854. if (LastTCB != NULL) {
  855. CHECK_STRUCT(LastTCB, tcb);
  856. if (IP6_ADDR_EQUAL(&LastTCB->tcb_daddr, Dest) &&
  857. LastTCB->tcb_dscope_id == DestScopeId &&
  858. LastTCB->tcb_dport == DestPort &&
  859. IP6_ADDR_EQUAL(&LastTCB->tcb_saddr, Src) &&
  860. LastTCB->tcb_sscope_id == SrcScopeId &&
  861. LastTCB->tcb_sport == SrcPort)
  862. return LastTCB;
  863. }
  864. //
  865. // Didn't find it in our 1 element cache.
  866. //
  867. FoundTCB = TCBTable[TCB_HASH(*Dest, *Src, DestPort, SrcPort)];
  868. while (FoundTCB != NULL) {
  869. CHECK_STRUCT(FoundTCB, tcb);
  870. if (IP6_ADDR_EQUAL(&FoundTCB->tcb_daddr, Dest) &&
  871. FoundTCB->tcb_dscope_id == DestScopeId &&
  872. FoundTCB->tcb_dport == DestPort &&
  873. IP6_ADDR_EQUAL(&FoundTCB->tcb_saddr, Src) &&
  874. FoundTCB->tcb_sscope_id == SrcScopeId &&
  875. FoundTCB->tcb_sport == SrcPort) {
  876. //
  877. // Found it. Update the cache for next time, and return.
  878. //
  879. LastTCB = FoundTCB;
  880. return FoundTCB;
  881. } else
  882. FoundTCB = FoundTCB->tcb_next;
  883. }
  884. return FoundTCB;
  885. }
  886. //* InsertTCB - Insert a TCB in the tcb table.
  887. //
  888. // This routine inserts a TCB in the TCB table. No locks need to be held
  889. // when this routine is called. We insert TCBs in ascending address order.
  890. // Before inserting we make sure that the TCB isn't already in the table.
  891. //
  892. uint // Returns: TRUE if we inserted, false if we didn't.
  893. InsertTCB(
  894. TCB *NewTCB) // TCB to be inserted.
  895. {
  896. uint TCBIndex;
  897. KIRQL OldIrql;
  898. TCB *PrevTCB, *CurrentTCB;
  899. TCB *WhereToInsert;
  900. ASSERT(NewTCB != NULL);
  901. CHECK_STRUCT(NewTCB, tcb);
  902. TCBIndex = TCB_HASH(NewTCB->tcb_daddr, NewTCB->tcb_saddr,
  903. NewTCB->tcb_dport, NewTCB->tcb_sport);
  904. KeAcquireSpinLock(&TCBTableLock, &OldIrql);
  905. KeAcquireSpinLockAtDpcLevel(&NewTCB->tcb_lock);
  906. //
  907. // Find the proper place in the table to insert him. While
  908. // we're walking we'll check to see if a dupe already exists.
  909. // When we find the right place to insert, we'll remember it, and
  910. // keep walking looking for a duplicate.
  911. //
  912. PrevTCB = CONTAINING_RECORD(&TCBTable[TCBIndex], TCB, tcb_next);
  913. WhereToInsert = NULL;
  914. while (PrevTCB->tcb_next != NULL) {
  915. CurrentTCB = PrevTCB->tcb_next;
  916. if (IP6_ADDR_EQUAL(&CurrentTCB->tcb_daddr, &NewTCB->tcb_daddr) &&
  917. IP6_ADDR_EQUAL(&CurrentTCB->tcb_saddr, &NewTCB->tcb_saddr) &&
  918. (CurrentTCB->tcb_dscope_id == NewTCB->tcb_dscope_id) &&
  919. (CurrentTCB->tcb_sscope_id == NewTCB->tcb_sscope_id) &&
  920. (CurrentTCB->tcb_sport == NewTCB->tcb_sport) &&
  921. (CurrentTCB->tcb_dport == NewTCB->tcb_dport)) {
  922. KeReleaseSpinLockFromDpcLevel(&NewTCB->tcb_lock);
  923. KeReleaseSpinLock(&TCBTableLock, OldIrql);
  924. return FALSE;
  925. } else {
  926. if (WhereToInsert == NULL && CurrentTCB > NewTCB) {
  927. WhereToInsert = PrevTCB;
  928. }
  929. CHECK_STRUCT(PrevTCB->tcb_next, tcb);
  930. PrevTCB = PrevTCB->tcb_next;
  931. }
  932. }
  933. if (WhereToInsert == NULL) {
  934. WhereToInsert = PrevTCB;
  935. }
  936. NewTCB->tcb_next = WhereToInsert->tcb_next;
  937. WhereToInsert->tcb_next = NewTCB;
  938. NewTCB->tcb_flags |= IN_TCB_TABLE;
  939. TStats.ts_numconns++;
  940. KeReleaseSpinLockFromDpcLevel(&NewTCB->tcb_lock);
  941. KeReleaseSpinLock(&TCBTableLock, OldIrql);
  942. return TRUE;
  943. }
  944. //* RemoveTCB - Remove a TCB from the tcb table.
  945. //
  946. // Called when we need to remove a TCB from the TCB table. We assume the
  947. // TCB table lock and the TCB lock are held when we are called. If the
  948. // TCB isn't in the table we won't try to remove him.
  949. //
  950. uint // Returns: TRUE if it's OK to free it, FALSE otherwise.
  951. RemoveTCB(
  952. TCB *RemovedTCB) // TCB to be removed.
  953. {
  954. uint TCBIndex;
  955. TCB *PrevTCB;
  956. #if DBG
  957. uint Found = FALSE;
  958. #endif
  959. CHECK_STRUCT(RemovedTCB, tcb);
  960. if (RemovedTCB->tcb_flags & IN_TCB_TABLE) {
  961. TCBIndex = TCB_HASH(RemovedTCB->tcb_daddr, RemovedTCB->tcb_saddr,
  962. RemovedTCB->tcb_dport, RemovedTCB->tcb_sport);
  963. PrevTCB = CONTAINING_RECORD(&TCBTable[TCBIndex], TCB, tcb_next);
  964. do {
  965. if (PrevTCB->tcb_next == RemovedTCB) {
  966. // Found him.
  967. PrevTCB->tcb_next = RemovedTCB->tcb_next;
  968. RemovedTCB->tcb_flags &= ~IN_TCB_TABLE;
  969. TStats.ts_numconns--;
  970. #if DBG
  971. Found = TRUE;
  972. #endif
  973. break;
  974. }
  975. PrevTCB = PrevTCB->tcb_next;
  976. #if DBG
  977. if (PrevTCB != NULL)
  978. CHECK_STRUCT(PrevTCB, tcb);
  979. #endif
  980. } while (PrevTCB != NULL);
  981. ASSERT(Found);
  982. }
  983. if (LastTCB == RemovedTCB)
  984. LastTCB = NULL;
  985. if (TCBWalkCount == 0) {
  986. return TRUE;
  987. } else {
  988. RemovedTCB->tcb_walkcount = TCBWalkCount + 1;
  989. *(TCB **)&RemovedTCB->tcb_delayq.q_next = PendingFreeList;
  990. PendingFreeList = RemovedTCB;
  991. return FALSE;
  992. }
  993. }
  994. //* ScavengeTCB - Scavenge a TCB that's in the TIME_WAIT state.
  995. //
  996. // Called when we're running low on TCBs, and need to scavenge one from
  997. // TIME_WAIT state. We'll walk through the TCB table, looking for the oldest
  998. // TCB in TIME_WAIT. We'll remove and return a pointer to that TCB. If we
  999. // don't find any TCBs in TIME_WAIT, we'll return NULL.
  1000. //
  1001. TCB * // Returns: Pointer to a reusable TCB, or NULL.
  1002. ScavengeTCB(
  1003. void)
  1004. {
  1005. KIRQL Irql0, Irql1, IrqlSave = 0;
  1006. uint Now = SystemUpTime();
  1007. uint Delta = 0;
  1008. uint i;
  1009. TCB *FoundTCB = NULL, *PrevFound = NULL;
  1010. TCB *CurrentTCB, *PrevTCB;
  1011. KeAcquireSpinLock(&TCBTableLock, &Irql0);
  1012. if (TCBWalkCount != 0) {
  1013. KeReleaseSpinLock(&TCBTableLock, Irql0);
  1014. return NULL;
  1015. }
  1016. for (i = 0; i < TcbTableSize; i++) {
  1017. PrevTCB = CONTAINING_RECORD(&TCBTable[i], TCB, tcb_next);
  1018. CurrentTCB = PrevTCB->tcb_next;
  1019. while (CurrentTCB != NULL) {
  1020. CHECK_STRUCT(CurrentTCB, tcb);
  1021. KeAcquireSpinLock(&CurrentTCB->tcb_lock, &Irql1);
  1022. if (CurrentTCB->tcb_state == TCB_TIME_WAIT &&
  1023. (CurrentTCB->tcb_refcnt == 0) && !CLOSING(CurrentTCB)){
  1024. if (FoundTCB == NULL ||
  1025. ((Now - CurrentTCB->tcb_alive) > Delta)) {
  1026. //
  1027. // Found a new 'older' TCB. If we already have one, free
  1028. // the lock on him and get the lock on the new one.
  1029. //
  1030. if (FoundTCB != NULL)
  1031. KeReleaseSpinLock(&FoundTCB->tcb_lock, Irql1);
  1032. else
  1033. IrqlSave = Irql1;
  1034. PrevFound = PrevTCB;
  1035. FoundTCB = CurrentTCB;
  1036. Delta = Now - FoundTCB->tcb_alive;
  1037. } else
  1038. KeReleaseSpinLock(&CurrentTCB->tcb_lock, Irql1);
  1039. } else
  1040. KeReleaseSpinLock(&CurrentTCB->tcb_lock, Irql1);
  1041. //
  1042. // Look at the next one.
  1043. //
  1044. PrevTCB = CurrentTCB;
  1045. CurrentTCB = PrevTCB->tcb_next;
  1046. }
  1047. }
  1048. //
  1049. // If we have one, pull him from the list.
  1050. //
  1051. if (FoundTCB != NULL) {
  1052. PrevFound->tcb_next = FoundTCB->tcb_next;
  1053. FoundTCB->tcb_flags &= ~IN_TCB_TABLE;
  1054. //
  1055. // Release our references on the NTE and RCE. We won't
  1056. // be sending anymore using the old incarnation of this TCB.
  1057. //
  1058. if (FoundTCB->tcb_nte != NULL)
  1059. ReleaseNTE(FoundTCB->tcb_nte);
  1060. if (FoundTCB->tcb_rce != NULL)
  1061. ReleaseRCE(FoundTCB->tcb_rce);
  1062. TStats.ts_numconns--;
  1063. if (LastTCB == FoundTCB) {
  1064. LastTCB = NULL;
  1065. }
  1066. KeReleaseSpinLock(&FoundTCB->tcb_lock, IrqlSave);
  1067. }
  1068. KeReleaseSpinLock(&TCBTableLock, Irql0);
  1069. return FoundTCB;
  1070. }
  1071. //* AllocTCB - Allocate a TCB.
  1072. //
  1073. // Called whenever we need to allocate a TCB. We try to pull one off the
  1074. // free list, or allocate one if we need one. We then initialize it, etc.
  1075. //
  1076. TCB * // Returns: Pointer to the new TCB, or NULL if we couldn't get one.
  1077. AllocTCB(
  1078. void)
  1079. {
  1080. TCB *NewTCB;
  1081. //
  1082. // First, see if we have one on the free list.
  1083. //
  1084. PSLIST_ENTRY BufferLink;
  1085. BufferLink = ExInterlockedPopEntrySList(&FreeTCBList, &FreeTCBListLock);
  1086. if (BufferLink != NULL) {
  1087. NewTCB = CONTAINING_RECORD(BufferLink, TCB, tcb_next);
  1088. CHECK_STRUCT(NewTCB, tcb);
  1089. ExInterlockedAddUlong((PULONG)&FreeTCBs, (ULONG)-1, &FreeTCBListLock);
  1090. } else {
  1091. //
  1092. // We have none on the free list. If the total number of TCBs
  1093. // outstanding is more than we like to keep on the free list, try
  1094. // to scavenge a TCB from time wait.
  1095. //
  1096. if (CurrentTCBs < MaxFreeTCBs || ((NewTCB = ScavengeTCB()) == NULL)) {
  1097. if (CurrentTCBs < MaxTCBs) {
  1098. NewTCB = ExAllocatePool(NonPagedPool, sizeof(TCB));
  1099. if (NewTCB == NULL) {
  1100. return NewTCB;
  1101. } else {
  1102. ExInterlockedAddUlong((PULONG)&CurrentTCBs, 1,
  1103. &FreeTCBListLock);
  1104. }
  1105. } else
  1106. return NULL;
  1107. }
  1108. }
  1109. ASSERT(NewTCB != NULL);
  1110. RtlZeroMemory(NewTCB, sizeof(TCB));
  1111. #if DBG
  1112. NewTCB->tcb_sig = tcb_signature;
  1113. #endif
  1114. INITQ(&NewTCB->tcb_sendq);
  1115. NewTCB->tcb_cursend = NULL;
  1116. NewTCB->tcb_alive = TCPTime;
  1117. NewTCB->tcb_hops = -1;
  1118. //
  1119. // Initially we're not on the fast path because we're not established. Set
  1120. // the slowcount to one and set up the fastchk fields so we don't take the
  1121. // fast path.
  1122. //
  1123. NewTCB->tcb_slowcount = 1;
  1124. NewTCB->tcb_fastchk = TCP_FLAG_ACK | TCP_FLAG_SLOW;
  1125. KeInitializeSpinLock(&NewTCB->tcb_lock);
  1126. return NewTCB;
  1127. }
  1128. //* FreeTCB - Free a TCB.
  1129. //
  1130. // Called whenever we need to free a TCB.
  1131. //
  1132. // Note: This routine may be called with the TCBTableLock held.
  1133. //
  1134. void // Returns: Nothing.
  1135. FreeTCB(
  1136. TCB *FreedTCB) // TCB to be freed.
  1137. {
  1138. PSLIST_ENTRY BufferLink;
  1139. KIRQL OldIrql;
  1140. CHECK_STRUCT(FreedTCB, tcb);
  1141. #if defined(_WIN64)
  1142. if (CurrentTCBs > 2 * MaxFreeTCBs) {
  1143. #else
  1144. //
  1145. // Acquire FreeTCBListLock before accessing Depth field.
  1146. //
  1147. KeAcquireSpinLock(&FreeTCBListLock, &OldIrql);
  1148. if ((CurrentTCBs > 2 * MaxFreeTCBs) || (FreeTCBList.Depth > 65000)) {
  1149. KeReleaseSpinLock(&FreeTCBListLock, OldIrql);
  1150. #endif
  1151. ExInterlockedAddUlong((PULONG)&CurrentTCBs, (ulong) - 1, &FreeTCBListLock);
  1152. ExFreePool(FreedTCB);
  1153. return;
  1154. }
  1155. #if !defined(_WIN64)
  1156. KeReleaseSpinLock(&FreeTCBListLock, OldIrql);
  1157. #endif
  1158. BufferLink = CONTAINING_RECORD(&(FreedTCB->tcb_next),
  1159. SLIST_ENTRY, Next);
  1160. ExInterlockedPushEntrySList(&FreeTCBList, BufferLink, &FreeTCBListLock);
  1161. ExInterlockedAddUlong((PULONG)&FreeTCBs, 1, &FreeTCBListLock);
  1162. }
  1163. #pragma BEGIN_INIT
  1164. //* InitTCB - Initialize our TCB code.
  1165. //
  1166. // Called during init time to initialize our TCB code. We initialize
  1167. // the TCB table, etc, then return.
  1168. //
  1169. int // Returns: TRUE if we did initialize, false if we didn't.
  1170. InitTCB(
  1171. void)
  1172. {
  1173. LARGE_INTEGER InitialWakeUp;
  1174. uint i;
  1175. TCBTable = ExAllocatePool(NonPagedPool, TcbTableSize * sizeof(TCB*));
  1176. if (TCBTable == NULL) {
  1177. return FALSE;
  1178. }
  1179. for (i = 0; i < TcbTableSize; i++)
  1180. TCBTable[i] = NULL;
  1181. LastTCB = NULL;
  1182. ExInitializeSListHead(&FreeTCBList);
  1183. KeInitializeSpinLock(&TCBTableLock);
  1184. KeInitializeSpinLock(&FreeTCBListLock);
  1185. TCPTime = 0;
  1186. TCBWalkCount = 0;
  1187. DeadmanTicks = NUM_DEADMAN_TICKS;
  1188. //
  1189. // Set up our timer to call TCBTimeout once every MS_PER_TICK milliseconds.
  1190. //
  1191. // REVIEW: Switch this to be driven off the IPv6Timeout routine instead
  1192. // REVIEW: of having two independent timers?
  1193. //
  1194. KeInitializeDpc(&TCBTimeoutDpc, TCBTimeout, NULL);
  1195. KeInitializeTimer(&TCBTimer);
  1196. InitialWakeUp.QuadPart = -(LONGLONG) MS_PER_TICK * 10000;
  1197. KeSetTimerEx(&TCBTimer, InitialWakeUp, MS_PER_TICK, &TCBTimeoutDpc);
  1198. return TRUE;
  1199. }
  1200. #pragma END_INIT
  1201. //* UnloadTCB
  1202. //
  1203. // Called during shutdown to uninitialize
  1204. // in preparation for unloading the stack.
  1205. //
  1206. // There are no open sockets (or else we wouldn't be unloading).
  1207. // Because UnloadTCPSend has already been called,
  1208. // we are no longer receiving packets from the IPv6 layer.
  1209. //
  1210. void
  1211. UnloadTCB(void)
  1212. {
  1213. PSLIST_ENTRY BufferLink;
  1214. TCB *CurrentTCB;
  1215. uint i;
  1216. KIRQL OldIrql;
  1217. //
  1218. // First stop TCBTimeout from being called.
  1219. //
  1220. KeCancelTimer(&TCBTimer);
  1221. //
  1222. // Wait until all the DPC routines have finished.
  1223. //
  1224. KeFlushQueuedDpcs();
  1225. //
  1226. // Traverse the buckets looking for TCBs.
  1227. // REVIEW - Can we have TCBs in states other than time-wait?
  1228. //
  1229. for (i = 0; i < TcbTableSize; i++) {
  1230. while ((CurrentTCB = TCBTable[i]) != NULL) {
  1231. KeAcquireSpinLock(&CurrentTCB->tcb_lock, &OldIrql);
  1232. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_STATE,
  1233. "UnloadTCB(%p): state %x flags %x refs %x "
  1234. "reason %x pend %x walk %x\n",
  1235. CurrentTCB,
  1236. CurrentTCB->tcb_state,
  1237. CurrentTCB->tcb_flags,
  1238. CurrentTCB->tcb_refcnt,
  1239. CurrentTCB->tcb_closereason,
  1240. CurrentTCB->tcb_pending,
  1241. CurrentTCB->tcb_walkcount));
  1242. CurrentTCB->tcb_flags |= NEED_RST;
  1243. TryToCloseTCB(CurrentTCB, TCB_CLOSE_ABORTED, OldIrql);
  1244. }
  1245. }
  1246. //
  1247. // Now pull TCBs off the free list and really free them.
  1248. //
  1249. while ((BufferLink = ExInterlockedPopEntrySList(&FreeTCBList, &FreeTCBListLock)) != NULL) {
  1250. CurrentTCB = CONTAINING_RECORD(BufferLink, TCB, tcb_next);
  1251. CHECK_STRUCT(CurrentTCB, tcb);
  1252. ExFreePool(CurrentTCB);
  1253. }
  1254. ExFreePool(TCBTable);
  1255. TCBTable = NULL;
  1256. }
  1257. //* CleanupTCBWithIF
  1258. //
  1259. // Helper function for TCBWalk, to remove
  1260. // TCBs and RCEs that reference the specified interface.
  1261. //
  1262. // Lock: Called in TCBWalk with TCB table lock held.
  1263. // Returns FALSE if CheckTCB should be deleted, TRUE otherwise.
  1264. //
  1265. uint
  1266. CleanupTCBWithIF(
  1267. TCB *CheckTCB,
  1268. void *Context1,
  1269. void *Context2,
  1270. void *Context3)
  1271. {
  1272. Interface *IF = (Interface *) Context1;
  1273. UNREFERENCED_PARAMETER(Context2);
  1274. UNREFERENCED_PARAMETER(Context3);
  1275. CHECK_STRUCT(CheckTCB, tcb);
  1276. //
  1277. // Take the lock of this TCB before accessing its NTE and RCE.
  1278. //
  1279. KeAcquireSpinLockAtDpcLevel(&CheckTCB->tcb_lock);
  1280. if ((CheckTCB->tcb_nte != NULL) && (CheckTCB->tcb_nte->IF == IF)) {
  1281. //
  1282. // Any NTE on this IF is guaranteed to be invalid by the time
  1283. // this routine gets called. So we need to quit using it.
  1284. //
  1285. ReleaseNTE(CheckTCB->tcb_nte);
  1286. //
  1287. // See if this address lives on as a different NTE.
  1288. //
  1289. CheckTCB->tcb_nte = FindNetworkWithAddress(&CheckTCB->tcb_saddr,
  1290. CheckTCB->tcb_sscope_id);
  1291. if (CheckTCB->tcb_nte == NULL) {
  1292. //
  1293. // Game over man, game over.
  1294. //
  1295. KeReleaseSpinLockFromDpcLevel(&CheckTCB->tcb_lock);
  1296. return FALSE; // Delete this TCB.
  1297. }
  1298. }
  1299. if ((CheckTCB->tcb_rce != NULL) && (CheckTCB->tcb_rce->NTE->IF == IF)) {
  1300. //
  1301. // Free up this RCE. TCP will attempt to get a new one
  1302. // the next time it wants to send something.
  1303. //
  1304. ReleaseRCE(CheckTCB->tcb_rce);
  1305. CheckTCB->tcb_rce = NULL;
  1306. }
  1307. KeReleaseSpinLockFromDpcLevel(&CheckTCB->tcb_lock);
  1308. return TRUE; // Do not delete this TCB.
  1309. }
  1310. //* TCPRemoveIF
  1311. //
  1312. // Remove TCP's references to the specified interface.
  1313. //
  1314. void
  1315. TCPRemoveIF(Interface *IF)
  1316. {
  1317. //
  1318. // Currently, only TCBs hold onto references.
  1319. // The TCBTable might have already been freed if we're being
  1320. // unloaded at this point.
  1321. //
  1322. if (TCBTable != NULL) {
  1323. TCBWalk(CleanupTCBWithIF, IF, NULL, NULL);
  1324. }
  1325. }