Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4100 lines
152 KiB

  1. // -*- mode: C++; tab-width: 4; indent-tabs-mode: nil -*- (for GNU Emacs)
  2. //
  3. // Copyright (c) 1985-2000 Microsoft Corporation
  4. //
  5. // This file is part of the Microsoft Research IPv6 Network Protocol Stack.
  6. // You should have received a copy of the Microsoft End-User License Agreement
  7. // for this software along with this release; see the file "license.txt".
  8. // If not, please see http://www.research.microsoft.com/msripv6/license.htm,
  9. // or write to Microsoft Research, One Microsoft Way, Redmond, WA 98052-6399.
  10. //
  11. // Abstract:
  12. //
  13. // TCP receive code.
  14. //
  15. // This file contains the code for handling incoming TCP packets.
  16. //
  17. #include "oscfg.h"
  18. #include "ndis.h"
  19. #include "ip6imp.h"
  20. #include "ip6def.h"
  21. #include "icmp.h"
  22. #include "tdi.h"
  23. #include "tdint.h"
  24. #include "tdistat.h"
  25. #include "queue.h"
  26. #include "transprt.h"
  27. #include "addr.h"
  28. #include "tcp.h"
  29. #include "tcb.h"
  30. #include "tcpconn.h"
  31. #include "tcpsend.h"
  32. #include "tcprcv.h"
  33. #include "tcpdeliv.h"
  34. #include "info.h"
  35. #include "tcpcfg.h"
  36. #include "route.h"
  37. #include "security.h"
  38. uint RequestCompleteFlags;
  39. Queue ConnRequestCompleteQ;
  40. Queue SendCompleteQ;
  41. Queue TCBDelayQ;
  42. KSPIN_LOCK RequestCompleteLock;
  43. KSPIN_LOCK TCBDelayLock;
  44. ulong TCBDelayRtnCount;
  45. ulong TCBDelayRtnLimit;
  46. #define TCB_DELAY_RTN_LIMIT 4
  47. uint MaxDupAcks = 2;
  48. extern KSPIN_LOCK TCBTableLock;
  49. extern KSPIN_LOCK AddrObjTableLock;
  50. #define PERSIST_TIMEOUT MS_TO_TICKS(500)
  51. void ResetSendNext(TCB *SeqTCB, SeqNum NewSeq);
  52. NTSTATUS TCPPrepareIrpForCancel(PTCP_CONTEXT TcpContext, PIRP Irp,
  53. PDRIVER_CANCEL CancelRoutine);
  54. extern void TCPRequestComplete(void *Context, unsigned int Status,
  55. unsigned int UnUsed);
  56. VOID TCPCancelRequest(PDEVICE_OBJECT Device, PIRP Irp);
  57. //
  58. // All of the init code can be discarded.
  59. //
  60. #ifdef ALLOC_PRAGMA
  61. int InitTCPRcv(void);
  62. #pragma alloc_text(INIT, InitTCPRcv)
  63. #endif // ALLOC_PRAGMA
  64. //* AdjustRcvWin - Adjust the receive window on a TCB.
  65. //
  66. // A utility routine that adjusts the receive window to an even multiple of
  67. // the local segment size. We round it up to the next closest multiple, or
  68. // leave it alone if it's already an event multiple. We assume we have
  69. // exclusive access to the input TCB.
  70. //
  71. void // Returns: Nothing.
  72. AdjustRcvWin(
  73. TCB *WinTCB) // TCB to be adjusted.
  74. {
  75. ushort LocalMSS;
  76. uchar FoundMSS;
  77. ulong SegmentsInWindow;
  78. ASSERT(WinTCB->tcb_defaultwin != 0);
  79. ASSERT(WinTCB->tcb_rcvwin != 0);
  80. ASSERT(WinTCB->tcb_remmss != 0);
  81. if (WinTCB->tcb_flags & WINDOW_SET)
  82. return;
  83. #if 0
  84. //
  85. // First, get the local MSS by calling IP.
  86. //
  87. // REVIEW: IPv4 had code here to call down to IP to get the local MTU
  88. // REVIEW: corresponding to this source address. Result in "LocalMSS",
  89. // REVIEW: status of call in "FoundMSS".
  90. //
  91. // REVIEW: Why did they do this? tcb_mss is already set by this point!
  92. //
  93. if (!FoundMSS) {
  94. //
  95. // Didn't find it, error out.
  96. //
  97. ASSERT(FALSE);
  98. return;
  99. }
  100. LocalMSS -= sizeof(TCPHeader);
  101. LocalMSS = MIN(LocalMSS, WinTCB->tcb_remmss);
  102. #else
  103. LocalMSS = WinTCB->tcb_mss;
  104. #endif
  105. SegmentsInWindow = WinTCB->tcb_defaultwin / (ulong)LocalMSS;
  106. //
  107. // Make sure we have at least 4 segments in window, if that wouldn't make
  108. // the window too big.
  109. //
  110. if (SegmentsInWindow < 4) {
  111. //
  112. // We have fewer than four segments in the window. Round up to 4
  113. // if we can do so without exceeding the maximum window size; otherwise
  114. // use the maximum multiple that we can fit in 64K. The exception is
  115. // if we can only fit one integral multiple in the window - in that
  116. // case we'll use a window of 0xffff.
  117. //
  118. if (LocalMSS <= (0xffff/4)) {
  119. WinTCB->tcb_defaultwin = (uint)(4 * LocalMSS);
  120. } else {
  121. ulong SegmentsInMaxWindow;
  122. //
  123. // Figure out the maximum number of segments we could possibly
  124. // fit in a window. If this is > 1, use that as the basis for
  125. // our window size. Otherwise use a maximum size window.
  126. //
  127. SegmentsInMaxWindow = 0xffff/(ulong)LocalMSS;
  128. if (SegmentsInMaxWindow != 1)
  129. WinTCB->tcb_defaultwin = SegmentsInMaxWindow * (ulong)LocalMSS;
  130. else
  131. WinTCB->tcb_defaultwin = 0xffff;
  132. }
  133. WinTCB->tcb_rcvwin = WinTCB->tcb_defaultwin;
  134. } else {
  135. //
  136. // If it's not already an even multiple, bump the default and current
  137. // windows to the nearest multiple.
  138. //
  139. if ((SegmentsInWindow * (ulong)LocalMSS) != WinTCB->tcb_defaultwin) {
  140. ulong NewWindow;
  141. NewWindow = (SegmentsInWindow + 1) * (ulong)LocalMSS;
  142. // Don't let the new window be > 64K.
  143. if (NewWindow <= 0xffff) {
  144. WinTCB->tcb_defaultwin = (uint)NewWindow;
  145. WinTCB->tcb_rcvwin = (uint)NewWindow;
  146. }
  147. }
  148. }
  149. }
  150. //* CompleteRcvs - Complete receives on a TCB.
  151. //
  152. // Called when we need to complete receives on a TCB. We'll pull things from
  153. // the TCB's receive queue, as long as there are receives that have the PUSH
  154. // bit set.
  155. //
  156. void // Returns: Nothing.
  157. CompleteRcvs(
  158. TCB *CmpltTCB) // TCB to complete on.
  159. {
  160. KIRQL OldIrql;
  161. TCPRcvReq *CurrReq, *NextReq, *IndReq;
  162. CHECK_STRUCT(CmpltTCB, tcb);
  163. ASSERT(CmpltTCB->tcb_refcnt != 0);
  164. KeAcquireSpinLock(&CmpltTCB->tcb_lock, &OldIrql);
  165. if (!CLOSING(CmpltTCB) && !(CmpltTCB->tcb_flags & RCV_CMPLTING)
  166. && (CmpltTCB->tcb_rcvhead != NULL)) {
  167. CmpltTCB->tcb_flags |= RCV_CMPLTING;
  168. for (;;) {
  169. CurrReq = CmpltTCB->tcb_rcvhead;
  170. IndReq = NULL;
  171. do {
  172. CHECK_STRUCT(CurrReq, trr);
  173. if (CurrReq->trr_flags & TRR_PUSHED) {
  174. //
  175. // Need to complete this one. If this is the current
  176. // receive then advance the current receive to the next
  177. // one in the list. Then set the list head to the next
  178. // one in the list.
  179. //
  180. ASSERT(CurrReq->trr_amt != 0 ||
  181. !DATA_RCV_STATE(CmpltTCB->tcb_state));
  182. NextReq = CurrReq->trr_next;
  183. if (CmpltTCB->tcb_currcv == CurrReq)
  184. CmpltTCB->tcb_currcv = NextReq;
  185. CmpltTCB->tcb_rcvhead = NextReq;
  186. if (NextReq == NULL) {
  187. //
  188. // We've just removed the last buffer. Set the
  189. // rcvhandler to PendData, in case something
  190. // comes in during the callback.
  191. //
  192. ASSERT(CmpltTCB->tcb_rcvhndlr != IndicateData);
  193. CmpltTCB->tcb_rcvhndlr = PendData;
  194. }
  195. KeReleaseSpinLock(&CmpltTCB->tcb_lock, OldIrql);
  196. if (CurrReq->trr_uflags != NULL)
  197. *(CurrReq->trr_uflags) =
  198. TDI_RECEIVE_NORMAL | TDI_RECEIVE_ENTIRE_MESSAGE;
  199. (*CurrReq->trr_rtn)(CurrReq->trr_context, TDI_SUCCESS,
  200. CurrReq->trr_amt);
  201. if (IndReq != NULL)
  202. FreeRcvReq(CurrReq);
  203. else
  204. IndReq = CurrReq;
  205. KeAcquireSpinLock(&CmpltTCB->tcb_lock, &OldIrql);
  206. CurrReq = CmpltTCB->tcb_rcvhead;
  207. } else
  208. // This one isn't to be completed, so bail out.
  209. break;
  210. } while (CurrReq != NULL);
  211. //
  212. // Now see if we've completed all of the requests. If we have,
  213. // we may need to deal with pending data and/or reset the receive
  214. // handler.
  215. //
  216. if (CurrReq == NULL) {
  217. //
  218. // We've completed everything that can be, so stop the push
  219. // timer. We don't stop it if CurrReq isn't NULL because we
  220. // want to make sure later data is eventually pushed.
  221. //
  222. STOP_TCB_TIMER(CmpltTCB->tcb_pushtimer);
  223. ASSERT(IndReq != NULL);
  224. //
  225. // No more receive requests.
  226. //
  227. if (CmpltTCB->tcb_pendhead == NULL) {
  228. FreeRcvReq(IndReq);
  229. //
  230. // No pending data. Set the receive handler to either
  231. // PendData or IndicateData.
  232. //
  233. if (!(CmpltTCB->tcb_flags & (DISC_PENDING | GC_PENDING))) {
  234. if (CmpltTCB->tcb_rcvind != NULL &&
  235. CmpltTCB->tcb_indicated == 0)
  236. CmpltTCB->tcb_rcvhndlr = IndicateData;
  237. else
  238. CmpltTCB->tcb_rcvhndlr = PendData;
  239. } else {
  240. goto Complete_Notify;
  241. }
  242. } else {
  243. //
  244. // We have pending data to deal with.
  245. //
  246. if (CmpltTCB->tcb_rcvind != NULL &&
  247. CmpltTCB->tcb_indicated == 0) {
  248. //
  249. // There's a receive indicate handler on this TCB.
  250. // Call the indicate handler with the pending data.
  251. //
  252. IndicatePendingData(CmpltTCB, IndReq, OldIrql);
  253. SendACK(CmpltTCB);
  254. KeAcquireSpinLock(&CmpltTCB->tcb_lock, &OldIrql);
  255. //
  256. // See if a buffer has been posted. If so, we'll need
  257. // to check and see if it needs to be completed.
  258. //
  259. if (CmpltTCB->tcb_rcvhead != NULL)
  260. continue;
  261. else {
  262. //
  263. // If the pending head is now NULL, we've used up
  264. // all the data.
  265. //
  266. if (CmpltTCB->tcb_pendhead == NULL &&
  267. (CmpltTCB->tcb_flags &
  268. (DISC_PENDING | GC_PENDING)))
  269. goto Complete_Notify;
  270. }
  271. } else {
  272. //
  273. // No indicate handler, so nothing to do. The receive
  274. // handler should already be set to PendData.
  275. //
  276. FreeRcvReq(IndReq);
  277. ASSERT(CmpltTCB->tcb_rcvhndlr == PendData);
  278. }
  279. }
  280. } else {
  281. if (IndReq != NULL)
  282. FreeRcvReq(IndReq);
  283. ASSERT(CmpltTCB->tcb_rcvhndlr == BufferData);
  284. }
  285. break;
  286. }
  287. CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
  288. }
  289. KeReleaseSpinLock(&CmpltTCB->tcb_lock, OldIrql);
  290. return;
  291. Complete_Notify:
  292. //
  293. // Something is pending. Figure out what it is, and do it.
  294. //
  295. if (CmpltTCB->tcb_flags & GC_PENDING) {
  296. CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
  297. //
  298. // Bump the refcnt, because GracefulClose will deref the TCB
  299. // and we're not really done with it yet.
  300. //
  301. CmpltTCB->tcb_refcnt++;
  302. GracefulClose(CmpltTCB, CmpltTCB->tcb_flags & TW_PENDING, TRUE,
  303. OldIrql);
  304. } else
  305. if (CmpltTCB->tcb_flags & DISC_PENDING) {
  306. CmpltTCB->tcb_flags &= ~DISC_PENDING;
  307. KeReleaseSpinLock(&CmpltTCB->tcb_lock, OldIrql);
  308. NotifyOfDisc(CmpltTCB, TDI_GRACEFUL_DISC);
  309. KeAcquireSpinLock(&CmpltTCB->tcb_lock, &OldIrql);
  310. CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
  311. KeReleaseSpinLock(&CmpltTCB->tcb_lock, OldIrql);
  312. } else {
  313. ASSERT(FALSE);
  314. KeReleaseSpinLock(&CmpltTCB->tcb_lock, OldIrql);
  315. }
  316. return;
  317. }
  318. //* ProcessTCBDelayQ - Process TCBs on the delayed Q.
  319. //
  320. // Called at various times to process TCBs on the delayed Q.
  321. //
  322. void // Returns: Nothing.
  323. ProcessTCBDelayQ(
  324. void) // Nothing.
  325. {
  326. KIRQL OldIrql;
  327. TCB *DelayTCB;
  328. KeAcquireSpinLock(&TCBDelayLock, &OldIrql);
  329. //
  330. // Check for recursion. We do not stop recursion completely, only
  331. // limit it. This is done to allow multiple threads to process the
  332. // TCBDelayQ simultaneously.
  333. //
  334. TCBDelayRtnCount++;
  335. if (TCBDelayRtnCount > TCBDelayRtnLimit) {
  336. TCBDelayRtnCount--;
  337. KeReleaseSpinLock(&TCBDelayLock, OldIrql);
  338. return;
  339. }
  340. while (!EMPTYQ(&TCBDelayQ)) {
  341. DEQUEUE(&TCBDelayQ, DelayTCB, TCB, tcb_delayq);
  342. CHECK_STRUCT(DelayTCB, tcb);
  343. ASSERT(DelayTCB->tcb_refcnt != 0);
  344. ASSERT(DelayTCB->tcb_flags & IN_DELAY_Q);
  345. KeReleaseSpinLock(&TCBDelayLock, OldIrql);
  346. KeAcquireSpinLock(&DelayTCB->tcb_lock, &OldIrql);
  347. while (!CLOSING(DelayTCB) && (DelayTCB->tcb_flags & DELAYED_FLAGS)) {
  348. if (DelayTCB->tcb_flags & NEED_RCV_CMPLT) {
  349. DelayTCB->tcb_flags &= ~NEED_RCV_CMPLT;
  350. KeReleaseSpinLock(&DelayTCB->tcb_lock, OldIrql);
  351. CompleteRcvs(DelayTCB);
  352. KeAcquireSpinLock(&DelayTCB->tcb_lock, &OldIrql);
  353. }
  354. if (DelayTCB->tcb_flags & NEED_OUTPUT) {
  355. DelayTCB->tcb_flags &= ~NEED_OUTPUT;
  356. DelayTCB->tcb_refcnt++;
  357. TCPSend(DelayTCB, OldIrql);
  358. KeAcquireSpinLock(&DelayTCB->tcb_lock, &OldIrql);
  359. }
  360. if (DelayTCB->tcb_flags & NEED_ACK) {
  361. DelayTCB->tcb_flags &= ~NEED_ACK;
  362. KeReleaseSpinLock(&DelayTCB->tcb_lock, OldIrql);
  363. SendACK(DelayTCB);
  364. KeAcquireSpinLock(&DelayTCB->tcb_lock, &OldIrql);
  365. }
  366. }
  367. DelayTCB->tcb_flags &= ~IN_DELAY_Q;
  368. DerefTCB(DelayTCB, OldIrql);
  369. KeAcquireSpinLock(&TCBDelayLock, &OldIrql);
  370. }
  371. TCBDelayRtnCount--;
  372. KeReleaseSpinLock(&TCBDelayLock, OldIrql);
  373. }
  374. //* DelayAction - Put a TCB on the queue for a delayed action.
  375. //
  376. // Called when we want to put a TCB on the DelayQ for a delayed action at
  377. // receive complete or some other time. The lock on the TCB must be held
  378. // when this is called.
  379. //
  380. void // Returns: Nothing.
  381. DelayAction(
  382. TCB *DelayTCB, // TCP which we're going to schedule.
  383. uint Action) // Action we're scheduling.
  384. {
  385. //
  386. // Schedule the completion.
  387. //
  388. KeAcquireSpinLockAtDpcLevel(&TCBDelayLock);
  389. DelayTCB->tcb_flags |= Action;
  390. if (!(DelayTCB->tcb_flags & IN_DELAY_Q)) {
  391. DelayTCB->tcb_flags |= IN_DELAY_Q;
  392. DelayTCB->tcb_refcnt++; // Reference this for later.
  393. ENQUEUE(&TCBDelayQ, &DelayTCB->tcb_delayq);
  394. }
  395. KeReleaseSpinLockFromDpcLevel(&TCBDelayLock);
  396. }
  397. //* TCPRcvComplete - Handle a receive complete.
  398. //
  399. // Called by the lower layers when we're done receiving. We look to see
  400. // if we have and pending requests to complete. If we do, we complete them.
  401. // Then we look to see if we have any TCBs pending for output. If we do,
  402. // we get them going.
  403. //
  404. void // Returns: Nothing.
  405. TCPRcvComplete(
  406. void) // Nothing.
  407. {
  408. KIRQL OldIrql;
  409. TCPReq *Req;
  410. if (RequestCompleteFlags & ANY_REQUEST_COMPLETE) {
  411. KeAcquireSpinLock(&RequestCompleteLock, &OldIrql);
  412. if (!(RequestCompleteFlags & IN_RCV_COMPLETE)) {
  413. RequestCompleteFlags |= IN_RCV_COMPLETE;
  414. do {
  415. if (RequestCompleteFlags & CONN_REQUEST_COMPLETE) {
  416. if (!EMPTYQ(&ConnRequestCompleteQ)) {
  417. DEQUEUE(&ConnRequestCompleteQ, Req, TCPReq, tr_q);
  418. CHECK_STRUCT(Req, tr);
  419. CHECK_STRUCT(*(TCPConnReq **)&Req, tcr);
  420. KeReleaseSpinLock(&RequestCompleteLock, OldIrql);
  421. (*Req->tr_rtn)(Req->tr_context, Req->tr_status, 0);
  422. FreeConnReq((TCPConnReq *)Req);
  423. KeAcquireSpinLock(&RequestCompleteLock, &OldIrql);
  424. } else
  425. RequestCompleteFlags &= ~CONN_REQUEST_COMPLETE;
  426. }
  427. if (RequestCompleteFlags & SEND_REQUEST_COMPLETE) {
  428. if (!EMPTYQ(&SendCompleteQ)) {
  429. TCPSendReq *SendReq;
  430. DEQUEUE(&SendCompleteQ, Req, TCPReq, tr_q);
  431. CHECK_STRUCT(Req, tr);
  432. SendReq = (TCPSendReq *)Req;
  433. CHECK_STRUCT(SendReq, tsr);
  434. KeReleaseSpinLock(&RequestCompleteLock, OldIrql);
  435. (*Req->tr_rtn)(Req->tr_context, Req->tr_status,
  436. Req->tr_status == TDI_SUCCESS ? SendReq->tsr_size
  437. : 0);
  438. FreeSendReq((TCPSendReq *)Req);
  439. KeAcquireSpinLock(&RequestCompleteLock, &OldIrql);
  440. } else
  441. RequestCompleteFlags &= ~SEND_REQUEST_COMPLETE;
  442. }
  443. } while (RequestCompleteFlags & ANY_REQUEST_COMPLETE);
  444. RequestCompleteFlags &= ~IN_RCV_COMPLETE;
  445. }
  446. KeReleaseSpinLock(&RequestCompleteLock, OldIrql);
  447. }
  448. ProcessTCBDelayQ();
  449. }
  450. //* CompleteConnReq - Complete a connection request on a TCB.
  451. //
  452. // A utility function to complete a connection request on a TCB. We remove
  453. // the connreq, and put it on the ConnReqCmpltQ where it will be picked
  454. // off later during RcvCmplt processing. We assume the TCB lock is held when
  455. // we're called.
  456. //
  457. void // Returns: Nothing.
  458. CompleteConnReq(
  459. TCB *CmpltTCB, // TCB from which to complete.
  460. TDI_STATUS Status) // Status to complete with.
  461. {
  462. TCPConnReq *ConnReq;
  463. CHECK_STRUCT(CmpltTCB, tcb);
  464. ConnReq = CmpltTCB->tcb_connreq;
  465. if (ConnReq != NULL) {
  466. //
  467. // There's a connreq on this TCB. Fill in the connection information
  468. // before returning it.
  469. //
  470. CmpltTCB->tcb_connreq = NULL;
  471. UpdateConnInfo(ConnReq->tcr_conninfo, &CmpltTCB->tcb_daddr,
  472. CmpltTCB->tcb_dscope_id, CmpltTCB->tcb_dport);
  473. if (ConnReq->tcr_addrinfo) {
  474. UpdateConnInfo(ConnReq->tcr_addrinfo, &CmpltTCB->tcb_saddr,
  475. CmpltTCB->tcb_sscope_id, CmpltTCB->tcb_sport);
  476. }
  477. ConnReq->tcr_req.tr_status = Status;
  478. KeAcquireSpinLockAtDpcLevel(&RequestCompleteLock);
  479. RequestCompleteFlags |= CONN_REQUEST_COMPLETE;
  480. ENQUEUE(&ConnRequestCompleteQ, &ConnReq->tcr_req.tr_q);
  481. KeReleaseSpinLockFromDpcLevel(&RequestCompleteLock);
  482. } else if (!((CmpltTCB->tcb_state == TCB_SYN_RCVD) &&
  483. (CmpltTCB->tcb_flags & ACCEPT_PENDING))) {
  484. //
  485. // This should not happen except
  486. // in the case of SynAttackProtect.
  487. //
  488. ASSERT(FALSE);
  489. }
  490. }
  491. //* DelayedAcceptConn - Process delayed-connect request.
  492. //
  493. // Called by TCPRcv when SynAttackProtection is turned on, when a final
  494. // ACK arrives in response to our SYN-ACK. Indicate the connect request to
  495. // ULP and if it is accepted init TCB and move con to appropriate queue on AO.
  496. // The caller must hold the AddrObjTableLock before calling this routine,
  497. // and that lock must have been taken at DPC level. This routine will free
  498. // that lock back to DPC level.
  499. // Returns TRUE if the request is accepted.
  500. //
  501. BOOLEAN
  502. DelayedAcceptConn(
  503. AddrObj *ListenAO, // AddrObj for local address.
  504. IPv6Addr *Src, // Source IP address of SYN.
  505. ulong SrcScopeId, // Scope id of source address (0 for non-scope addr).
  506. ushort SrcPort, // Source port of SYN.
  507. TCB *AcceptTCB) // Pre-accepted TCB
  508. {
  509. TCPConn *CurrentConn = NULL;
  510. Queue *Temp;
  511. TCPConnReq *ConnReq = NULL;
  512. BOOLEAN FoundConn = FALSE;
  513. CHECK_STRUCT(ListenAO, ao);
  514. KeAcquireSpinLockAtDpcLevel(&ListenAO->ao_lock);
  515. KeReleaseSpinLockFromDpcLevel(&AddrObjTableLock);
  516. if (AO_VALID(ListenAO)) {
  517. if (ListenAO->ao_connect != NULL) {
  518. uchar TAddress[TCP_TA_SIZE];
  519. PVOID ConnContext;
  520. PConnectEvent Event;
  521. PVOID EventContext;
  522. TDI_STATUS Status;
  523. PTCP_CONTEXT TcpContext = NULL;
  524. ConnectEventInfo *EventInfo;
  525. // He has a connect handler. Put the transport address together,
  526. // and call him. We also need to get the necessary resources
  527. // first.
  528. Event = ListenAO->ao_connect;
  529. EventContext = ListenAO->ao_conncontext;
  530. REF_AO(ListenAO);
  531. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  532. ConnReq = GetConnReq();
  533. if (AcceptTCB != NULL && ConnReq != NULL) {
  534. BuildTDIAddress(TAddress, Src, SrcScopeId, SrcPort);
  535. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  536. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
  537. "indicating connect request\n"));
  538. }
  539. Status = (*Event) (EventContext, TCP_TA_SIZE,
  540. (PTRANSPORT_ADDRESS) TAddress, 0, NULL,
  541. 0, NULL,
  542. &ConnContext, &EventInfo);
  543. if (Status == TDI_MORE_PROCESSING) {
  544. PIO_STACK_LOCATION IrpSp;
  545. PTDI_REQUEST_KERNEL_ACCEPT AcceptRequest;
  546. IrpSp = IoGetCurrentIrpStackLocation(EventInfo);
  547. Status = TCPPrepareIrpForCancel(
  548. (PTCP_CONTEXT) IrpSp->FileObject->FsContext,
  549. EventInfo,
  550. TCPCancelRequest
  551. );
  552. if (!NT_SUCCESS(Status)) {
  553. Status = TDI_NOT_ACCEPTED;
  554. EventInfo = NULL;
  555. goto AcceptIrpCancelled;
  556. }
  557. //
  558. // He accepted it. Find the connection on the AddrObj.
  559. //
  560. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  561. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
  562. "connect indication accepted, queueing request\n"
  563. ));
  564. }
  565. AcceptRequest = (PTDI_REQUEST_KERNEL_ACCEPT)
  566. & (IrpSp->Parameters);
  567. ConnReq->tcr_conninfo =
  568. AcceptRequest->ReturnConnectionInformation;
  569. if (AcceptRequest->RequestConnectionInformation &&
  570. AcceptRequest->RequestConnectionInformation->
  571. RemoteAddress) {
  572. ConnReq->tcr_addrinfo =
  573. AcceptRequest->RequestConnectionInformation;
  574. } else {
  575. ConnReq->tcr_addrinfo = NULL;
  576. }
  577. ConnReq->tcr_req.tr_rtn = TCPRequestComplete;
  578. ConnReq->tcr_req.tr_context = EventInfo;
  579. AcceptTCB->tcb_connreq = ConnReq;
  580. SearchAO:
  581. KeAcquireSpinLockAtDpcLevel(&ListenAO->ao_lock);
  582. Temp = QHEAD(&ListenAO->ao_idleq);;
  583. Status = TDI_INVALID_CONNECTION;
  584. while (Temp != QEND(&ListenAO->ao_idleq)) {
  585. CurrentConn = QSTRUCT(TCPConn, Temp, tc_q);
  586. CHECK_STRUCT(CurrentConn, tc);
  587. if ((CurrentConn->tc_context == ConnContext) &&
  588. !(CurrentConn->tc_flags & CONN_INVALID)) {
  589. //
  590. // We need to lock its TCPConnBlock, with care.
  591. // We'll ref the TCPConn so it can't go away,
  592. // then unlock the AO (which is already ref'd),
  593. // then relock. Note that tc_refcnt is updated
  594. // under ao_lock for any associated TCPConn.
  595. // If things have changed, go back and try again.
  596. //
  597. ++CurrentConn->tc_refcnt;
  598. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  599. KeAcquireSpinLockAtDpcLevel(
  600. &CurrentConn->tc_ConnBlock->cb_lock);
  601. //
  602. // Now that we've got the lock, we need to consider
  603. // the following possibilities:
  604. //
  605. // * a disassociate was initiated
  606. // * a close was initiated
  607. // * accept completed
  608. // * listen completed
  609. // * connect completed
  610. //
  611. // The first two require that we clean up,
  612. // by calling the tc_donertn. For the last three,
  613. // we have nothing to do, but tc_donertn points at
  614. // DummyDone, so go ahead and call it anyway;
  615. // it'll release the TCPConnBlock lock for us.
  616. //
  617. if (--CurrentConn->tc_refcnt == 0 &&
  618. ((CurrentConn->tc_flags & CONN_INVALID) ||
  619. (CurrentConn->tc_tcb != NULL))) {
  620. ConnDoneRtn DoneRtn = CurrentConn->tc_donertn;
  621. DoneRtn(CurrentConn, DISPATCH_LEVEL);
  622. goto SearchAO;
  623. }
  624. KeAcquireSpinLockAtDpcLevel(&ListenAO->ao_lock);
  625. // We think we have a match. The connection
  626. // shouldn't have a TCB associated with it. If it
  627. // does, it's an error. InitTCBFromConn will
  628. // handle all this.
  629. Status = InitTCBFromConn(CurrentConn,
  630. AcceptTCB,
  631. AcceptRequest->RequestConnectionInformation,
  632. TRUE);
  633. if (Status == TDI_SUCCESS) {
  634. FoundConn = TRUE;
  635. KeAcquireSpinLockAtDpcLevel(&AcceptTCB->tcb_lock);
  636. AcceptTCB->tcb_conn = CurrentConn;
  637. CurrentConn->tc_tcb = AcceptTCB;
  638. KeReleaseSpinLockFromDpcLevel(&AcceptTCB->tcb_lock);
  639. CurrentConn->tc_refcnt++;
  640. // Move him from the idle q to the active
  641. // queue.
  642. REMOVEQ(&CurrentConn->tc_q);
  643. ENQUEUE(&ListenAO->ao_activeq,
  644. &CurrentConn->tc_q);
  645. } else
  646. KeReleaseSpinLockFromDpcLevel(
  647. &CurrentConn->tc_ConnBlock->cb_lock);
  648. // In any case, we're done now.
  649. break;
  650. }
  651. Temp = QNEXT(Temp);
  652. }
  653. if (!FoundConn) {
  654. CompleteConnReq(AcceptTCB, Status);
  655. }
  656. LOCKED_DELAY_DEREF_AO(ListenAO);
  657. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  658. if (FoundConn) {
  659. KeReleaseSpinLockFromDpcLevel(
  660. &CurrentConn->tc_ConnBlock->cb_lock);
  661. }
  662. return FoundConn;
  663. }
  664. //
  665. // The event handler didn't take it. Dereference it, free
  666. // the resources, and return NULL.
  667. //
  668. }
  669. AcceptIrpCancelled:
  670. //
  671. // We couldn't get a valid tcb or getconnreq
  672. //
  673. if (ConnReq) {
  674. FreeConnReq(ConnReq);
  675. }
  676. DELAY_DEREF_AO(ListenAO);
  677. return FALSE;
  678. } // ao_connect != null
  679. } // AO not valid
  680. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  681. return FALSE;
  682. }
  683. //* FindListenConn - Find (or fabricate) a listening connection.
  684. //
  685. // Called by our Receive handler to decide what to do about an incoming
  686. // SYN. We walk down the list of connections associated with the destination
  687. // address, and if we find any in the listening state that can be used for
  688. // the incoming request we'll take them, possibly returning a listen in the
  689. // process. If we don't find any appropriate listening connections, we'll
  690. // call the Connect Event handler if one is registered. If all else fails,
  691. // we'll return NULL and the SYN will be RST.
  692. //
  693. // The caller must hold the AddrObjTableLock before calling this routine,
  694. // and that lock must have been taken at DPC level. This routine will free
  695. // that lock back to DPC level.
  696. //
  697. TCB * // Returns: Pointer to found TCB, or NULL if we can't find one.
  698. FindListenConn(
  699. AddrObj *ListenAO, // AddrObj for local address.
  700. IPv6Addr *Src, // Source IP address of SYN.
  701. ulong SrcScopeId, // Scope id of source address (0 for non-scope addr).
  702. ushort SrcPort) // Source port of SYN.
  703. {
  704. TCB *CurrentTCB = NULL;
  705. TCPConn *CurrentConn = NULL;
  706. TCPConnReq *ConnReq = NULL;
  707. Queue *Temp;
  708. uint FoundConn = FALSE;
  709. CHECK_STRUCT(ListenAO, ao);
  710. KeAcquireSpinLockAtDpcLevel(&ListenAO->ao_lock);
  711. KeReleaseSpinLockFromDpcLevel(&AddrObjTableLock);
  712. //
  713. // We have the lock on the AddrObj. Walk down its list, looking
  714. // for connections in the listening state.
  715. //
  716. if (AO_VALID(ListenAO)) {
  717. if (ListenAO->ao_listencnt != 0) {
  718. Temp = QHEAD(&ListenAO->ao_listenq);
  719. while (Temp != QEND(&ListenAO->ao_listenq)) {
  720. CurrentConn = QSTRUCT(TCPConn, Temp, tc_q);
  721. CHECK_STRUCT(CurrentConn, tc);
  722. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  723. KeAcquireSpinLockAtDpcLevel(&CurrentConn->tc_ConnBlock->cb_lock);
  724. KeAcquireSpinLockAtDpcLevel(&ListenAO->ao_lock);
  725. //
  726. // If this TCB is in the listening state, with no delete
  727. // pending, it's a candidate. Look at the pending listen
  728. // information to see if we should take it.
  729. //
  730. if ((CurrentTCB = CurrentConn->tc_tcb) != NULL &&
  731. CurrentTCB->tcb_state == TCB_LISTEN) {
  732. CHECK_STRUCT(CurrentTCB, tcb);
  733. KeAcquireSpinLockAtDpcLevel(&CurrentTCB->tcb_lock);
  734. if (CurrentTCB->tcb_state == TCB_LISTEN &&
  735. !PENDING_ACTION(CurrentTCB)) {
  736. //
  737. // Need to see if we can take it.
  738. // See if the addresses specifed in the ConnReq match.
  739. //
  740. if ((IsUnspecified(&CurrentTCB->tcb_daddr) ||
  741. (IP6_ADDR_EQUAL(&CurrentTCB->tcb_daddr, Src) &&
  742. (CurrentTCB->tcb_dscope_id == SrcScopeId))) &&
  743. (CurrentTCB->tcb_dport == 0 ||
  744. CurrentTCB->tcb_dport == SrcPort)) {
  745. FoundConn = TRUE;
  746. break;
  747. }
  748. //
  749. // Otherwise, this didn't match, so we'll check the
  750. // next one.
  751. //
  752. }
  753. KeReleaseSpinLockFromDpcLevel(&CurrentTCB->tcb_lock);
  754. }
  755. KeReleaseSpinLockFromDpcLevel(&CurrentConn->tc_ConnBlock->cb_lock);
  756. Temp = QNEXT(Temp);;
  757. }
  758. //
  759. // See why we've exited the loop.
  760. //
  761. if (FoundConn) {
  762. CHECK_STRUCT(CurrentTCB, tcb);
  763. //
  764. // We exited because we found a TCB. If it's pre-accepted,
  765. // we're done.
  766. //
  767. CurrentTCB->tcb_refcnt++;
  768. ASSERT(CurrentTCB->tcb_connreq != NULL);
  769. ConnReq = CurrentTCB->tcb_connreq;
  770. //
  771. // If QUERY_ACCEPT isn't set, turn on the CONN_ACCEPTED bit.
  772. //
  773. if (!(ConnReq->tcr_flags & TDI_QUERY_ACCEPT))
  774. CurrentTCB->tcb_flags |= CONN_ACCEPTED;
  775. CurrentTCB->tcb_state = TCB_SYN_RCVD;
  776. CurrentTCB->tcb_hops = ListenAO->ao_ucast_hops;
  777. ListenAO->ao_listencnt--;
  778. //
  779. // Since he's no longer listening, remove him from the listen
  780. // queue and put him on the active queue.
  781. //
  782. REMOVEQ(&CurrentConn->tc_q);
  783. ENQUEUE(&ListenAO->ao_activeq, &CurrentConn->tc_q);
  784. KeReleaseSpinLockFromDpcLevel(&CurrentTCB->tcb_lock);
  785. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  786. KeReleaseSpinLockFromDpcLevel(&CurrentConn->tc_ConnBlock->cb_lock);
  787. return CurrentTCB;
  788. }
  789. }
  790. //
  791. // We didn't find a matching TCB.
  792. //
  793. ASSERT(FoundConn == FALSE);
  794. if (SynAttackProtect) {
  795. TCB *AcceptTCB = NULL;
  796. //
  797. // No need to hold ao_lock any more.
  798. //
  799. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  800. //
  801. // SynAttack protection is on. Just initialize
  802. // this TCB and send SYN-ACK. When final
  803. // ACK is seen we will indicate about this
  804. // connection arrival to upper layer.
  805. //
  806. AcceptTCB = AllocTCB();
  807. if (AcceptTCB) {
  808. AcceptTCB->tcb_state = TCB_SYN_RCVD;
  809. AcceptTCB->tcb_connreq = NULL;
  810. AcceptTCB->tcb_flags |= CONN_ACCEPTED;
  811. AcceptTCB->tcb_flags |= ACCEPT_PENDING;
  812. AcceptTCB->tcb_refcnt = 1;
  813. AcceptTCB->tcb_defaultwin = DEFAULT_RCV_WIN;
  814. AcceptTCB->tcb_rcvwin = DEFAULT_RCV_WIN;
  815. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  816. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
  817. "Allocated SP TCB %x\n", (PCHAR)AcceptTCB));
  818. }
  819. }
  820. return AcceptTCB;
  821. }
  822. //
  823. // If there's a connect indication
  824. // handler, call it now to find a connection to accept on.
  825. //
  826. if (ListenAO->ao_connect != NULL) {
  827. uchar TAddress[TCP_TA_SIZE];
  828. PVOID ConnContext;
  829. PConnectEvent Event;
  830. PVOID EventContext;
  831. TDI_STATUS Status;
  832. TCB *AcceptTCB;
  833. ConnectEventInfo *EventInfo;
  834. //
  835. // He has a connect handler. Put the transport address together,
  836. // and call him. We also need to get the necessary resources
  837. // first.
  838. //
  839. Event = ListenAO->ao_connect;
  840. EventContext = ListenAO->ao_conncontext;
  841. REF_AO(ListenAO);
  842. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  843. AcceptTCB = AllocTCB();
  844. ConnReq = GetConnReq();
  845. if (AcceptTCB != NULL && ConnReq != NULL) {
  846. BuildTDIAddress(TAddress, Src, SrcScopeId, SrcPort);
  847. AcceptTCB->tcb_state = TCB_LISTEN;
  848. AcceptTCB->tcb_connreq = ConnReq;
  849. AcceptTCB->tcb_flags |= CONN_ACCEPTED;
  850. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  851. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
  852. "indicating connect request\n"));
  853. }
  854. Status = (*Event)(EventContext, TCP_TA_SIZE,
  855. (PTRANSPORT_ADDRESS)TAddress, 0, NULL,
  856. 0, NULL,
  857. &ConnContext, &EventInfo);
  858. if (Status == TDI_MORE_PROCESSING) {
  859. PIO_STACK_LOCATION IrpSp;
  860. PTDI_REQUEST_KERNEL_ACCEPT AcceptRequest;
  861. IrpSp = IoGetCurrentIrpStackLocation(EventInfo);
  862. Status = TCPPrepareIrpForCancel(
  863. (PTCP_CONTEXT) IrpSp->FileObject->FsContext,
  864. EventInfo, TCPCancelRequest);
  865. if (!NT_SUCCESS(Status)) {
  866. Status = TDI_NOT_ACCEPTED;
  867. EventInfo = NULL;
  868. goto AcceptIrpCancelled;
  869. }
  870. //
  871. // He accepted it. Find the connection on the AddrObj.
  872. //
  873. {
  874. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  875. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
  876. "connect indication accepted,"
  877. " queueing request\n"));
  878. }
  879. AcceptRequest = (PTDI_REQUEST_KERNEL_ACCEPT)
  880. &(IrpSp->Parameters);
  881. ConnReq->tcr_conninfo =
  882. AcceptRequest->ReturnConnectionInformation;
  883. if (AcceptRequest->RequestConnectionInformation &&
  884. AcceptRequest->RequestConnectionInformation->
  885. RemoteAddress) {
  886. ConnReq->tcr_addrinfo =
  887. AcceptRequest->RequestConnectionInformation;
  888. } else {
  889. ConnReq->tcr_addrinfo = NULL;
  890. }
  891. ConnReq->tcr_req.tr_rtn = TCPRequestComplete;
  892. ConnReq->tcr_req.tr_context = EventInfo;
  893. }
  894. SearchAO:
  895. KeAcquireSpinLockAtDpcLevel(&ListenAO->ao_lock);
  896. Temp = QHEAD(&ListenAO->ao_idleq);
  897. CurrentTCB = NULL;
  898. Status = TDI_INVALID_CONNECTION;
  899. while (Temp != QEND(&ListenAO->ao_idleq)) {
  900. CurrentConn = QSTRUCT(TCPConn, Temp, tc_q);
  901. CHECK_STRUCT(CurrentConn, tc);
  902. if ((CurrentConn->tc_context == ConnContext) &&
  903. !(CurrentConn->tc_flags & CONN_INVALID)) {
  904. //
  905. // We need to lock its TCPConnBlock, with care.
  906. // We'll ref the TCPConn so it can't go away,
  907. // then unlock the AO (which is already ref'd),
  908. // then relock. Note that tc_refcnt is updated
  909. // under ao_lock for any associated TCPConn.
  910. // If things have changed, go back and try again.
  911. //
  912. ++CurrentConn->tc_refcnt;
  913. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  914. KeAcquireSpinLockAtDpcLevel(
  915. &CurrentConn->tc_ConnBlock->cb_lock);
  916. //
  917. // Now that we've got the lock, we need to consider
  918. // the following possibilities:
  919. //
  920. // * a disassociate was initiated
  921. // * a close was initiated
  922. // * accept completed
  923. // * listen completed
  924. // * connect completed
  925. //
  926. // The first two require that we clean up,
  927. // by calling the tc_donertn. For the last three,
  928. // we have nothing to do, but tc_donertn points at
  929. // DummyDone, so go ahead and call it anyway;
  930. // it'll release the TCPConnBlock lock for us.
  931. //
  932. if (--CurrentConn->tc_refcnt == 0 &&
  933. ((CurrentConn->tc_flags & CONN_INVALID) ||
  934. (CurrentConn->tc_tcb != NULL))) {
  935. ConnDoneRtn DoneRtn = CurrentConn->tc_donertn;
  936. DoneRtn(CurrentConn, DISPATCH_LEVEL);
  937. goto SearchAO;
  938. }
  939. KeAcquireSpinLockAtDpcLevel(&ListenAO->ao_lock);
  940. //
  941. // We think we have a match. The connection
  942. // shouldn't have a TCB associated with it. If it
  943. // does, it's an error. InitTCBFromConn will
  944. // handle all this.
  945. //
  946. AcceptTCB->tcb_refcnt = 1;
  947. Status = InitTCBFromConn(CurrentConn, AcceptTCB,
  948. AcceptRequest->RequestConnectionInformation,
  949. TRUE);
  950. if (Status == TDI_SUCCESS) {
  951. FoundConn = TRUE;
  952. AcceptTCB->tcb_state = TCB_SYN_RCVD;
  953. AcceptTCB->tcb_conn = CurrentConn;
  954. AcceptTCB->tcb_connid = CurrentConn->tc_connid;
  955. CurrentConn->tc_tcb = AcceptTCB;
  956. CurrentConn->tc_refcnt++;
  957. //
  958. // Move him from the idle queue to the
  959. // active queue.
  960. //
  961. REMOVEQ(&CurrentConn->tc_q);
  962. ENQUEUE(&ListenAO->ao_activeq,
  963. &CurrentConn->tc_q);
  964. } else
  965. KeReleaseSpinLockFromDpcLevel(
  966. &CurrentConn->tc_ConnBlock->cb_lock);
  967. // In any case, we're done now.
  968. break;
  969. }
  970. Temp = QNEXT(Temp);
  971. }
  972. if (!FoundConn) {
  973. //
  974. // Didn't find a match, or had an error.
  975. // Status code is set.
  976. // Complete the ConnReq and free the resources.
  977. //
  978. CompleteConnReq(AcceptTCB, Status);
  979. FreeTCB(AcceptTCB);
  980. AcceptTCB = NULL;
  981. }
  982. LOCKED_DELAY_DEREF_AO(ListenAO);
  983. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  984. if (FoundConn) {
  985. KeReleaseSpinLockFromDpcLevel(
  986. &CurrentConn->tc_ConnBlock->cb_lock);
  987. }
  988. return AcceptTCB;
  989. }
  990. }
  991. AcceptIrpCancelled:
  992. //
  993. // We couldn't get a needed resource or event handler
  994. // did not take this. Free any that we
  995. // did get, and fall through to the 'return NULL' code.
  996. //
  997. if (ConnReq != NULL)
  998. FreeConnReq(ConnReq);
  999. if (AcceptTCB != NULL)
  1000. FreeTCB(AcceptTCB);
  1001. DELAY_DEREF_AO(ListenAO);
  1002. } else {
  1003. //
  1004. // No event handler, or no resource.
  1005. // Free the locks, and return NULL.
  1006. //
  1007. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  1008. }
  1009. return NULL;
  1010. }
  1011. //
  1012. // If we get here, the address object wasn't valid.
  1013. //
  1014. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  1015. return NULL;
  1016. }
  1017. //* FindMSS - Find the MSS option in a segment.
  1018. //
  1019. // Called when a SYN is received to find the MSS option in a segment.
  1020. // If we don't find one, we assume the worst and return one based on
  1021. // the minimum MTU.
  1022. //
  1023. ushort // Returns: MSS to be used.
  1024. FindMSS(
  1025. TCPHeader UNALIGNED *TCP) // TCP header to be searched.
  1026. {
  1027. uint OptSize;
  1028. uchar *OptPtr;
  1029. OptSize = TCP_HDR_SIZE(TCP) - sizeof(TCPHeader);
  1030. OptPtr = (uchar *)(TCP + 1);
  1031. while (OptSize) {
  1032. if (*OptPtr == TCP_OPT_EOL)
  1033. break;
  1034. if (*OptPtr == TCP_OPT_NOP) {
  1035. OptPtr++;
  1036. OptSize--;
  1037. continue;
  1038. }
  1039. if (*OptPtr == TCP_OPT_MSS) {
  1040. if (OptPtr[1] == MSS_OPT_SIZE) {
  1041. ushort TempMss = *(ushort UNALIGNED *)(OptPtr + 2);
  1042. if (TempMss != 0)
  1043. return net_short(TempMss);
  1044. else
  1045. break; // MSS size of 0, use default.
  1046. } else
  1047. break; // Bad option size, use default.
  1048. } else {
  1049. //
  1050. // Unknown option. Skip over it.
  1051. //
  1052. if (OptPtr[1] == 0 || OptPtr[1] > OptSize)
  1053. break; // Bad option length, bail out.
  1054. OptSize -= OptPtr[1];
  1055. OptPtr += OptPtr[1];
  1056. }
  1057. }
  1058. return DEFAULT_MSS;
  1059. }
  1060. //* ACKAndDrop - Acknowledge a segment, and drop it.
  1061. //
  1062. // Called from within the receive code when we need to drop a segment that's
  1063. // outside the receive window.
  1064. //
  1065. void // Returns: Nothing.
  1066. ACKAndDrop(
  1067. TCPRcvInfo *RI, // Receive info for incoming segment.
  1068. TCB *RcvTCB) // TCB for incoming segment.
  1069. {
  1070. if (!(RI->tri_flags & TCP_FLAG_RST)) {
  1071. if (RcvTCB->tcb_state == TCB_TIME_WAIT) {
  1072. //
  1073. // In TIME_WAIT, we only ACK duplicates/retransmissions
  1074. // of our peer's FIN segment.
  1075. //
  1076. // REVIEW: We're currently fairly loose on the sequence
  1077. // number check here.
  1078. //
  1079. if ((RI->tri_flags & TCP_FLAG_FIN) &&
  1080. SEQ_LTE(RI->tri_seq, RcvTCB->tcb_rcvnext)) {
  1081. // Restart 2MSL timer and proceed with sending the ACK.
  1082. START_TCB_TIMER(RcvTCB->tcb_rexmittimer, MAX_REXMIT_TO);
  1083. } else {
  1084. // Drop this segment without an ACK.
  1085. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  1086. return;
  1087. }
  1088. }
  1089. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  1090. SendACK(RcvTCB);
  1091. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  1092. }
  1093. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  1094. }
  1095. //* ACKData - Acknowledge data.
  1096. //
  1097. // Called from the receive handler to acknowledge data. We're given the
  1098. // TCB and the new value of senduna. We walk down the send queue pulling
  1099. // off sends and putting them on the complete queue until we hit the end
  1100. // or we acknowledge the specified number of bytes of data.
  1101. //
  1102. // NOTE: We manipulate the send refcnt and acked flag without taking a lock.
  1103. // This is OK in the VxD version where locks don't mean anything anyway, but
  1104. // in the port to NT we'll need to add locking. The lock will have to be
  1105. // taken in the transmit complete routine. We can't use a lock in the TCB,
  1106. // since the TCB could go away before the transmit complete happens, and a
  1107. // lock in the TSR would be overkill, so it's probably best to use a global
  1108. // lock for this. If that causes too much contention, we could use a set of
  1109. // locks and pass a pointer to the appropriate lock back as part of the
  1110. // transmit confirm context. This lock pointer would also need to be stored
  1111. // in the TCB.
  1112. //
  1113. void // Returns: Nothing.
  1114. ACKData(
  1115. TCB *ACKTcb, // TCB from which to pull data.
  1116. SeqNum SendUNA) // New value of send una.
  1117. {
  1118. Queue *End, *Current; // End and current elements.
  1119. Queue *TempQ, *EndQ;
  1120. Queue *LastCmplt; // Last one we completed.
  1121. TCPSendReq *CurrentTSR; // Current send req we're looking at.
  1122. PNDIS_BUFFER CurrentBuffer; // Current NDIS_BUFFER.
  1123. uint Updated = FALSE;
  1124. uint BufLength;
  1125. int Amount, OrigAmount;
  1126. long Result;
  1127. KIRQL OldIrql;
  1128. uint Temp;
  1129. CHECK_STRUCT(ACKTcb, tcb);
  1130. CheckTCBSends(ACKTcb);
  1131. Amount = SendUNA - ACKTcb->tcb_senduna;
  1132. ASSERT(Amount > 0);
  1133. //
  1134. // Since this is an acknowledgement of receipt by our peer for previously
  1135. // unacknowledged data, it implies forward reachablility.
  1136. //
  1137. if (ACKTcb->tcb_rce != NULL)
  1138. ConfirmForwardReachability(ACKTcb->tcb_rce);
  1139. //
  1140. // Do a quick check to see if this acks everything that we have. If it
  1141. // does, handle it right away. We can only do this in the ESTABLISHED
  1142. // state, because we blindly update sendnext, and that can only work if we
  1143. // haven't sent a FIN.
  1144. //
  1145. if ((Amount == (int) ACKTcb->tcb_unacked) &&
  1146. ACKTcb->tcb_state == TCB_ESTAB) {
  1147. //
  1148. // Everything is acked.
  1149. //
  1150. ASSERT(!EMPTYQ(&ACKTcb->tcb_sendq));
  1151. TempQ = ACKTcb->tcb_sendq.q_next;
  1152. INITQ(&ACKTcb->tcb_sendq);
  1153. ACKTcb->tcb_sendnext = SendUNA;
  1154. ACKTcb->tcb_senduna = SendUNA;
  1155. ASSERT(ACKTcb->tcb_sendnext == ACKTcb->tcb_sendmax);
  1156. ACKTcb->tcb_cursend = NULL;
  1157. ACKTcb->tcb_sendbuf = NULL;
  1158. ACKTcb->tcb_sendofs = 0;
  1159. ACKTcb->tcb_sendsize = 0;
  1160. ACKTcb->tcb_unacked = 0;
  1161. //
  1162. // Now walk down the list of send requests. If the reference count
  1163. // has gone to 0, put it on the send complete queue.
  1164. //
  1165. KeAcquireSpinLock(&RequestCompleteLock, &OldIrql);
  1166. EndQ = &ACKTcb->tcb_sendq;
  1167. do {
  1168. CurrentTSR = CONTAINING_RECORD(QSTRUCT(TCPReq, TempQ, tr_q),
  1169. TCPSendReq, tsr_req);
  1170. CHECK_STRUCT(CurrentTSR, tsr);
  1171. TempQ = CurrentTSR->tsr_req.tr_q.q_next;
  1172. CurrentTSR->tsr_req.tr_status = TDI_SUCCESS;
  1173. Result = InterlockedDecrement(&CurrentTSR->tsr_refcnt);
  1174. ASSERT(Result >= 0);
  1175. if (Result <= 0) {
  1176. // No more references are outstanding, the send can be
  1177. // completed.
  1178. // If we've sent directly from this send, NULL out the next
  1179. // pointer for the last buffer in the chain.
  1180. if (CurrentTSR->tsr_lastbuf != NULL) {
  1181. NDIS_BUFFER_LINKAGE(CurrentTSR->tsr_lastbuf) = NULL;
  1182. CurrentTSR->tsr_lastbuf = NULL;
  1183. }
  1184. ACKTcb->tcb_totaltime += (TCPTime - CurrentTSR->tsr_time);
  1185. Temp = ACKTcb->tcb_bcountlow;
  1186. ACKTcb->tcb_bcountlow += CurrentTSR->tsr_size;
  1187. ACKTcb->tcb_bcounthi += (Temp > ACKTcb->tcb_bcountlow ? 1 : 0);
  1188. ENQUEUE(&SendCompleteQ, &CurrentTSR->tsr_req.tr_q);
  1189. }
  1190. } while (TempQ != EndQ);
  1191. RequestCompleteFlags |= SEND_REQUEST_COMPLETE;
  1192. KeReleaseSpinLock(&RequestCompleteLock, OldIrql);
  1193. CheckTCBSends(ACKTcb);
  1194. return;
  1195. }
  1196. OrigAmount = Amount;
  1197. End = QEND(&ACKTcb->tcb_sendq);
  1198. Current = QHEAD(&ACKTcb->tcb_sendq);
  1199. LastCmplt = NULL;
  1200. while (Amount > 0 && Current != End) {
  1201. CurrentTSR = CONTAINING_RECORD(QSTRUCT(TCPReq, Current, tr_q),
  1202. TCPSendReq, tsr_req);
  1203. CHECK_STRUCT(CurrentTSR, tsr);
  1204. if (Amount >= (int) CurrentTSR->tsr_unasize) {
  1205. // This is completely acked. Just advance to the next one.
  1206. Amount -= CurrentTSR->tsr_unasize;
  1207. LastCmplt = Current;
  1208. Current = QNEXT(Current);
  1209. continue;
  1210. }
  1211. //
  1212. // This one is only partially acked. Update his offset and NDIS buffer
  1213. // pointer, and break out. We know that Amount is < the unacked size
  1214. // in this buffer, we we can walk the NDIS buffer chain without fear
  1215. // of falling off the end.
  1216. //
  1217. CurrentBuffer = CurrentTSR->tsr_buffer;
  1218. ASSERT(CurrentBuffer != NULL);
  1219. ASSERT(Amount < (int) CurrentTSR->tsr_unasize);
  1220. CurrentTSR->tsr_unasize -= Amount;
  1221. BufLength = NdisBufferLength(CurrentBuffer) - CurrentTSR->tsr_offset;
  1222. if (Amount >= (int) BufLength) {
  1223. do {
  1224. Amount -= BufLength;
  1225. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  1226. ASSERT(CurrentBuffer != NULL);
  1227. BufLength = NdisBufferLength(CurrentBuffer);
  1228. } while (Amount >= (int) BufLength);
  1229. CurrentTSR->tsr_offset = Amount;
  1230. CurrentTSR->tsr_buffer = CurrentBuffer;
  1231. } else
  1232. CurrentTSR->tsr_offset += Amount;
  1233. Amount = 0;
  1234. break;
  1235. }
  1236. #if DBG
  1237. //
  1238. // We should always be able to remove at least Amount bytes, except in
  1239. // the case where a FIN has been sent. In that case we should be off
  1240. // by exactly one. In the debug builds we'll check this.
  1241. //
  1242. if (Amount != 0 && (!(ACKTcb->tcb_flags & FIN_SENT) || Amount != 1))
  1243. DbgBreakPoint();
  1244. #endif
  1245. if (SEQ_GT(SendUNA, ACKTcb->tcb_sendnext)) {
  1246. if (Current != End) {
  1247. //
  1248. // Need to reevaluate CurrentTSR, in case we bailed out of the
  1249. // above loop after updating Current but before updating
  1250. // CurrentTSR.
  1251. //
  1252. CurrentTSR = CONTAINING_RECORD(QSTRUCT(TCPReq, Current, tr_q),
  1253. TCPSendReq, tsr_req);
  1254. CHECK_STRUCT(CurrentTSR, tsr);
  1255. ACKTcb->tcb_cursend = CurrentTSR;
  1256. ACKTcb->tcb_sendbuf = CurrentTSR->tsr_buffer;
  1257. ACKTcb->tcb_sendofs = CurrentTSR->tsr_offset;
  1258. ACKTcb->tcb_sendsize = CurrentTSR->tsr_unasize;
  1259. } else {
  1260. ACKTcb->tcb_cursend = NULL;
  1261. ACKTcb->tcb_sendbuf = NULL;
  1262. ACKTcb->tcb_sendofs = 0;
  1263. ACKTcb->tcb_sendsize = 0;
  1264. }
  1265. ACKTcb->tcb_sendnext = SendUNA;
  1266. }
  1267. //
  1268. // Now update tcb_unacked with the amount we tried to ack minus the
  1269. // amount we didn't ack (Amount should be 0 or 1 here).
  1270. //
  1271. ASSERT(Amount == 0 || Amount == 1);
  1272. ACKTcb->tcb_unacked -= OrigAmount - Amount;
  1273. ASSERT(*(int *)&ACKTcb->tcb_unacked >= 0);
  1274. ACKTcb->tcb_senduna = SendUNA;
  1275. //
  1276. // If we've acked any here, LastCmplt will be non-null, and Current will
  1277. // point to the send that should be at the start of the queue. Splice
  1278. // out the completed ones and put them on the end of the send completed
  1279. // queue, and update the TCB send queue.
  1280. //
  1281. if (LastCmplt != NULL) {
  1282. Queue *FirstCmplt;
  1283. TCPSendReq *FirstTSR, *EndTSR;
  1284. ASSERT(!EMPTYQ(&ACKTcb->tcb_sendq));
  1285. FirstCmplt = QHEAD(&ACKTcb->tcb_sendq);
  1286. //
  1287. // If we've acked everything, just reinit the queue.
  1288. //
  1289. if (Current == End) {
  1290. INITQ(&ACKTcb->tcb_sendq);
  1291. } else {
  1292. //
  1293. // There's still something on the queue. Just update it.
  1294. //
  1295. ACKTcb->tcb_sendq.q_next = Current;
  1296. Current->q_prev = &ACKTcb->tcb_sendq;
  1297. }
  1298. CheckTCBSends(ACKTcb);
  1299. //
  1300. // Now walk down the lists of things acked. If the refcnt on the send
  1301. // is 0, go ahead and put him on the send complete Q. Otherwise set
  1302. // the ACKed bit in the send, and he'll be completed when the count
  1303. // goes to 0 in the transmit confirm.
  1304. //
  1305. // Note that we haven't done any locking here. This will probably
  1306. // need to change in the port to NT.
  1307. //
  1308. // Set FirstTSR to the first TSR we'll complete, and EndTSR to be
  1309. // the first TSR that isn't completed.
  1310. //
  1311. FirstTSR = CONTAINING_RECORD(QSTRUCT(TCPReq, FirstCmplt, tr_q),
  1312. TCPSendReq, tsr_req);
  1313. EndTSR = CONTAINING_RECORD(QSTRUCT(TCPReq, Current, tr_q),
  1314. TCPSendReq, tsr_req);
  1315. CHECK_STRUCT(FirstTSR, tsr);
  1316. ASSERT(FirstTSR != EndTSR);
  1317. //
  1318. // Now walk the list of ACKed TSRs. If we can complete one, put him
  1319. // on the complete queue.
  1320. //
  1321. KeAcquireSpinLockAtDpcLevel(&RequestCompleteLock);
  1322. while (FirstTSR != EndTSR) {
  1323. TempQ = QNEXT(&FirstTSR->tsr_req.tr_q);
  1324. CHECK_STRUCT(FirstTSR, tsr);
  1325. FirstTSR->tsr_req.tr_status = TDI_SUCCESS;
  1326. //
  1327. // The tsr_lastbuf->Next field is zapped to 0 when the tsr_refcnt
  1328. // goes to 0, so we don't need to do it here.
  1329. //
  1330. // Decrement the reference put on the send buffer when it was
  1331. // initialized indicating the send has been acknowledged.
  1332. //
  1333. Result = InterlockedDecrement(&(FirstTSR->tsr_refcnt));
  1334. ASSERT(Result >= 0);
  1335. if (Result <= 0) {
  1336. //
  1337. // No more references are outstanding, the send can be
  1338. // completed.
  1339. //
  1340. // If we've sent directly from this send, NULL out the next
  1341. // pointer for the last buffer in the chain.
  1342. //
  1343. if (FirstTSR->tsr_lastbuf != NULL) {
  1344. NDIS_BUFFER_LINKAGE(FirstTSR->tsr_lastbuf) = NULL;
  1345. FirstTSR->tsr_lastbuf = NULL;
  1346. }
  1347. ACKTcb->tcb_totaltime += (TCPTime - CurrentTSR->tsr_time);
  1348. Temp = ACKTcb->tcb_bcountlow;
  1349. ACKTcb->tcb_bcountlow += CurrentTSR->tsr_size;
  1350. ACKTcb->tcb_bcounthi += (Temp > ACKTcb->tcb_bcountlow ? 1 : 0);
  1351. ENQUEUE(&SendCompleteQ, &FirstTSR->tsr_req.tr_q);
  1352. }
  1353. FirstTSR = CONTAINING_RECORD(QSTRUCT(TCPReq, TempQ, tr_q),
  1354. TCPSendReq, tsr_req);
  1355. }
  1356. RequestCompleteFlags |= SEND_REQUEST_COMPLETE;
  1357. KeReleaseSpinLockFromDpcLevel(&RequestCompleteLock);
  1358. }
  1359. }
  1360. //* TrimPacket - Trim the leading edge of a Packet.
  1361. //
  1362. // A utility routine to trim the front of a Packet. We take in an amount
  1363. // to trim off (which may be 0) and adjust the pointer in the first buffer
  1364. // in the chain forward by that much. If there isn't that much in the first
  1365. // buffer, we move onto the next one. If we run out of buffers we'll return
  1366. // a pointer to the last buffer in the chain, with a size of 0. It's the
  1367. // caller's responsibility to catch this.
  1368. // REVIEW - Move this to subr.c?
  1369. //
  1370. IPv6Packet * // Returns: A pointer to the new start, or NULL.
  1371. TrimPacket(
  1372. IPv6Packet *Packet, // Packet to be trimmed.
  1373. uint TrimAmount) // Amount to be trimmed.
  1374. {
  1375. uint TrimThisTime;
  1376. ASSERT(Packet != NULL);
  1377. while (TrimAmount) {
  1378. ASSERT(Packet != NULL);
  1379. TrimThisTime = MIN(TrimAmount, Packet->ContigSize);
  1380. TrimAmount -= TrimThisTime;
  1381. Packet->Position += TrimThisTime;
  1382. (uchar *)Packet->Data += TrimThisTime;
  1383. Packet->TotalSize -= TrimThisTime;
  1384. if ((Packet->ContigSize -= TrimThisTime) == 0) {
  1385. //
  1386. // Ran out of space in current buffer.
  1387. // Check for possibility of more data buffers in current packet.
  1388. //
  1389. if (Packet->TotalSize != 0) {
  1390. //
  1391. // Get more contiguous data.
  1392. //
  1393. PacketPullupSubr(Packet, 0, 1, 0);
  1394. continue;
  1395. }
  1396. //
  1397. // Couldn't do a pullup, so see if there's another packet
  1398. // hanging on this chain.
  1399. //
  1400. if (Packet->Next != NULL) {
  1401. IPv6Packet *Temp;
  1402. //
  1403. // There's another packet following. Toss this one.
  1404. //
  1405. Temp = Packet;
  1406. Packet = Packet->Next;
  1407. Temp->Next = NULL;
  1408. FreePacketChain(Temp);
  1409. } else {
  1410. //
  1411. // Ran out of Packets. Just return this one.
  1412. //
  1413. break;
  1414. }
  1415. }
  1416. }
  1417. return Packet;
  1418. }
  1419. //* FreePacketChain - Free a Packet chain.
  1420. //
  1421. // Called to free a chain of IPv6Packets. Only want to free that which
  1422. // we (the TCP/IPv6 stack) have allocated. Don't try to free anything
  1423. // passed up to us from lower layers.
  1424. //
  1425. void // Returns: Nothing.
  1426. FreePacketChain(
  1427. IPv6Packet *Packet) // First Packet in chain to be freed.
  1428. {
  1429. void *Aux;
  1430. while (Packet != NULL) {
  1431. PacketPullupCleanup(Packet);
  1432. if (Packet->Flags & PACKET_OURS) {
  1433. IPv6Packet *Temp;
  1434. Temp = Packet;
  1435. Packet = Packet->Next;
  1436. ExFreePool(Temp);
  1437. } else
  1438. Packet = Packet->Next;
  1439. }
  1440. }
  1441. IPv6Packet DummyPacket;
  1442. //* PullFromRAQ - Pull segments from the reassembly queue.
  1443. //
  1444. // Called when we've received frames out of order, and have some segments
  1445. // on the reassembly queue. We'll walk down the reassembly list, segments
  1446. // that are overlapped by the current receive next variable. When we get
  1447. // to one that doesn't completely overlap we'll trim it to fit the next
  1448. // receive sequence number, and pull it from the queue.
  1449. //
  1450. IPv6Packet *
  1451. PullFromRAQ(
  1452. TCB *RcvTCB, // TCB to pull from.
  1453. TCPRcvInfo *RcvInfo, // TCPRcvInfo structure for current segment.
  1454. uint *Size) // Where to update the size of the current segment.
  1455. {
  1456. TCPRAHdr *CurrentTRH; // Current TCP RA Header being examined.
  1457. TCPRAHdr *TempTRH; // Temporary variable.
  1458. SeqNum NextSeq; // Next sequence number we want.
  1459. IPv6Packet *NewPacket; // Packet after trimming.
  1460. SeqNum NextTRHSeq; // Sequence number immediately after current TRH.
  1461. int Overlap; // Overlap between current TRH and NextSeq.
  1462. CHECK_STRUCT(RcvTCB, tcb);
  1463. CurrentTRH = RcvTCB->tcb_raq;
  1464. NextSeq = RcvTCB->tcb_rcvnext;
  1465. while (CurrentTRH != NULL) {
  1466. CHECK_STRUCT(CurrentTRH, trh);
  1467. ASSERT(!(CurrentTRH->trh_flags & TCP_FLAG_SYN));
  1468. if (SEQ_LT(NextSeq, CurrentTRH->trh_start)) {
  1469. #if DBG
  1470. *Size = 0;
  1471. #endif
  1472. return NULL; // The next TRH starts too far down.
  1473. }
  1474. NextTRHSeq = CurrentTRH->trh_start + CurrentTRH->trh_size +
  1475. ((CurrentTRH->trh_flags & TCP_FLAG_FIN) ? 1 : 0);
  1476. if (SEQ_GTE(NextSeq, NextTRHSeq)) {
  1477. //
  1478. // The current TRH is overlapped completely. Free it and continue.
  1479. //
  1480. FreePacketChain(CurrentTRH->trh_buffer);
  1481. TempTRH = CurrentTRH->trh_next;
  1482. ExFreePool(CurrentTRH);
  1483. CurrentTRH = TempTRH;
  1484. RcvTCB->tcb_raq = TempTRH;
  1485. if (TempTRH == NULL) {
  1486. //
  1487. // We've just cleaned off the RAQ. We can go back on the
  1488. // fast path now.
  1489. //
  1490. if (--(RcvTCB->tcb_slowcount) == 0) {
  1491. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  1492. CheckTCBRcv(RcvTCB);
  1493. }
  1494. break;
  1495. }
  1496. } else {
  1497. Overlap = NextSeq - CurrentTRH->trh_start;
  1498. RcvInfo->tri_seq = NextSeq;
  1499. RcvInfo->tri_flags = CurrentTRH->trh_flags;
  1500. RcvInfo->tri_urgent = CurrentTRH->trh_urg;
  1501. if (Overlap != (int) CurrentTRH->trh_size) {
  1502. NewPacket = TrimPacket(CurrentTRH->trh_buffer, Overlap);
  1503. *Size = CurrentTRH->trh_size - Overlap;
  1504. } else {
  1505. //
  1506. // This completely overlaps the data in this segment, but the
  1507. // sequence number doesn't overlap completely. There must
  1508. // be a FIN in the TRH. We'll just return some bogus value
  1509. // that nobody will look at with a size of 0.
  1510. //
  1511. FreePacketChain(CurrentTRH->trh_buffer);
  1512. ASSERT(CurrentTRH->trh_flags & TCP_FLAG_FIN);
  1513. NewPacket =&DummyPacket;
  1514. *Size = 0;
  1515. }
  1516. RcvTCB->tcb_raq = CurrentTRH->trh_next;
  1517. if (RcvTCB->tcb_raq == NULL) {
  1518. //
  1519. // We've just cleaned off the RAQ. We can go back on the
  1520. // fast path now.
  1521. //
  1522. if (--(RcvTCB->tcb_slowcount) == 0) {
  1523. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  1524. CheckTCBRcv(RcvTCB);
  1525. }
  1526. }
  1527. ExFreePool(CurrentTRH);
  1528. return NewPacket;
  1529. }
  1530. }
  1531. #if DBG
  1532. *Size = 0;
  1533. #endif
  1534. return NULL;
  1535. }
  1536. //* CreateTRH - Create a TCP reassembly header.
  1537. //
  1538. // This function tries to create a TCP reassembly header. We take as input
  1539. // a pointer to the previous TRH in the chain, the IPv6Packet to put on,
  1540. // etc. and try to create and link in a TRH. The caller must hold the lock
  1541. // on the TCB when this is called.
  1542. //
  1543. uint // Returns: TRUE if we created it, FALSE otherwise.
  1544. CreateTRH(
  1545. TCPRAHdr *PrevTRH, // TRH to insert after.
  1546. IPv6Packet *Packet, // IP Packet chain.
  1547. TCPRcvInfo *RcvInfo, // RcvInfo for this TRH.
  1548. int Size) // Size in bytes of data.
  1549. {
  1550. TCPRAHdr *NewTRH;
  1551. IPv6Packet *NewPacket;
  1552. ASSERT((Size > 0) || (RcvInfo->tri_flags & TCP_FLAG_FIN));
  1553. NewTRH = ExAllocatePoolWithTagPriority(NonPagedPool, sizeof(TCPRAHdr),
  1554. TCP6_TAG, LowPoolPriority);
  1555. if (NewTRH == NULL)
  1556. return FALSE;
  1557. NewPacket = ExAllocatePoolWithTagPriority(NonPagedPool,
  1558. sizeof(IPv6Packet) + Size,
  1559. TCP6_TAG, LowPoolPriority);
  1560. if (NewPacket == NULL) {
  1561. ExFreePool(NewTRH);
  1562. return FALSE;
  1563. }
  1564. #if DBG
  1565. NewTRH->trh_sig = trh_signature;
  1566. #endif
  1567. NewPacket->Next = NULL;
  1568. NewPacket->Position = 0;
  1569. NewPacket->FlatData = (uchar *)(NewPacket + 1);
  1570. NewPacket->Data = NewPacket->FlatData;
  1571. NewPacket->ContigSize = (uint)Size;
  1572. NewPacket->TotalSize = (uint)Size;
  1573. NewPacket->NdisPacket = NULL;
  1574. NewPacket->AuxList = NULL;
  1575. NewPacket->Flags = PACKET_OURS;
  1576. if (Size != 0)
  1577. CopyPacketToBuffer(NewPacket->Data, Packet, Size, Packet->Position);
  1578. NewTRH->trh_start = RcvInfo->tri_seq;
  1579. NewTRH->trh_flags = RcvInfo->tri_flags;
  1580. NewTRH->trh_size = Size;
  1581. NewTRH->trh_urg = RcvInfo->tri_urgent;
  1582. NewTRH->trh_buffer = NewPacket;
  1583. NewTRH->trh_end = NewPacket;
  1584. NewTRH->trh_next = PrevTRH->trh_next;
  1585. PrevTRH->trh_next = NewTRH;
  1586. return TRUE;
  1587. }
  1588. //* PutOnRAQ - Put a segment on the reassembly queue.
  1589. //
  1590. // Called during segment reception to put a segment on the reassembly
  1591. // queue. We try to use as few reassembly headers as possible, so if this
  1592. // segment has some overlap with an existing entry in the queue we'll just
  1593. // update the existing entry. If there is no overlap we'll create a new
  1594. // reassembly header. Combining URGENT data with non-URGENT data is tricky.
  1595. // If we get a segment that has urgent data that overlaps the front of a
  1596. // reassembly header we'll always mark the whole chunk as urgent - the value
  1597. // of the urgent pointer will mark the end of urgent data, so this is OK.
  1598. // If it only overlaps at the end, however, we won't combine, since we would
  1599. // have to mark previously non-urgent data as urgent. We'll trim the
  1600. // front of the incoming segment and create a new reassembly header. Also,
  1601. // if we have non-urgent data that overlaps at the front of a reassembly
  1602. // header containing urgent data we can't combine these two, since again we
  1603. // would mark non-urgent data as urgent.
  1604. // Our search will stop if we find an entry with a FIN.
  1605. // We assume that the TCB lock is held by the caller.
  1606. //
  1607. uint // Returns: TRUE if successful, FALSE otherwise.
  1608. PutOnRAQ(
  1609. TCB *RcvTCB, // TCB on which to reassemble.
  1610. TCPRcvInfo *RcvInfo, // RcvInfo for new segment.
  1611. IPv6Packet *Packet, // Packet chain for this segment.
  1612. uint Size) // Size in bytes of data in this segment.
  1613. {
  1614. TCPRAHdr *PrevTRH; // Previous reassembly header.
  1615. TCPRAHdr *CurrentTRH; // Current reassembly header.
  1616. SeqNum NextSeq; // Seq num of 1st byte after seg being reassembled.
  1617. SeqNum NextTRHSeq; // Sequence number of 1st byte after current TRH.
  1618. uint Created;
  1619. CHECK_STRUCT(RcvTCB, tcb);
  1620. ASSERT(RcvTCB->tcb_rcvnext != RcvInfo->tri_seq);
  1621. ASSERT(!(RcvInfo->tri_flags & TCP_FLAG_SYN));
  1622. NextSeq = RcvInfo->tri_seq + Size +
  1623. ((RcvInfo->tri_flags & TCP_FLAG_FIN) ? 1 : 0);
  1624. PrevTRH = CONTAINING_RECORD(&RcvTCB->tcb_raq, TCPRAHdr, trh_next);
  1625. CurrentTRH = PrevTRH->trh_next;
  1626. //
  1627. // Walk down the reassembly queue, looking for the correct place to
  1628. // insert this, until we hit the end.
  1629. //
  1630. while (CurrentTRH != NULL) {
  1631. CHECK_STRUCT(CurrentTRH, trh);
  1632. ASSERT(!(CurrentTRH->trh_flags & TCP_FLAG_SYN));
  1633. NextTRHSeq = CurrentTRH->trh_start + CurrentTRH->trh_size +
  1634. ((CurrentTRH->trh_flags & TCP_FLAG_FIN) ? 1 : 0);
  1635. //
  1636. // First, see if it starts beyond the end of the current TRH.
  1637. //
  1638. if (SEQ_LTE(RcvInfo->tri_seq, NextTRHSeq)) {
  1639. //
  1640. // We know the incoming segment doesn't start beyond the end
  1641. // of this TRH, so we'll either create a new TRH in front of
  1642. // this one or we'll merge the new segment onto this TRH.
  1643. // If the end of the current segment is in front of the start
  1644. // of the current TRH, we'll need to create a new TRH. Otherwise
  1645. // we'll merge these two.
  1646. //
  1647. if (SEQ_LT(NextSeq, CurrentTRH->trh_start))
  1648. break;
  1649. else {
  1650. //
  1651. // There's some overlap. If there's actually data in the
  1652. // incoming segment we'll merge it.
  1653. //
  1654. if (Size != 0) {
  1655. int FrontOverlap, BackOverlap;
  1656. IPv6Packet *NewPacket;
  1657. //
  1658. // We need to merge. If there's a FIN on the incoming
  1659. // segment that would fall inside this current TRH, we
  1660. // have a protocol violation from the remote peer. In
  1661. // this case just return, discarding the incoming segment.
  1662. //
  1663. if ((RcvInfo->tri_flags & TCP_FLAG_FIN) &&
  1664. SEQ_LTE(NextSeq, NextTRHSeq))
  1665. return TRUE;
  1666. //
  1667. // We have some overlap. Figure out how much.
  1668. //
  1669. FrontOverlap = CurrentTRH->trh_start - RcvInfo->tri_seq;
  1670. if (FrontOverlap > 0) {
  1671. //
  1672. // Have overlap in front. Allocate an IPv6Packet to
  1673. // to hold it, and copy it, unless we would have to
  1674. // combine non-urgent with urgent.
  1675. //
  1676. if (!(RcvInfo->tri_flags & TCP_FLAG_URG) &&
  1677. (CurrentTRH->trh_flags & TCP_FLAG_URG)) {
  1678. if (CreateTRH(PrevTRH, Packet, RcvInfo,
  1679. CurrentTRH->trh_start - RcvInfo->tri_seq)) {
  1680. PrevTRH = PrevTRH->trh_next;
  1681. CurrentTRH = PrevTRH->trh_next;
  1682. }
  1683. FrontOverlap = 0;
  1684. } else {
  1685. NewPacket = ExAllocatePoolWithTagPriority(
  1686. NonPagedPool,
  1687. sizeof(IPv6Packet) + FrontOverlap,
  1688. TCP6_TAG, LowPoolPriority);
  1689. if (NewPacket == NULL) {
  1690. // Couldn't allocate memory.
  1691. return TRUE;
  1692. }
  1693. NewPacket->Position = 0;
  1694. NewPacket->FlatData = (uchar *)(NewPacket + 1);
  1695. NewPacket->Data = NewPacket->FlatData;
  1696. NewPacket->ContigSize = FrontOverlap;
  1697. NewPacket->TotalSize = FrontOverlap;
  1698. NewPacket->NdisPacket = NULL;
  1699. NewPacket->AuxList = NULL;
  1700. NewPacket->Flags = PACKET_OURS;
  1701. CopyPacketToBuffer(NewPacket->Data, Packet,
  1702. FrontOverlap, Packet->Position);
  1703. CurrentTRH->trh_size += FrontOverlap;
  1704. //
  1705. // Put our new packet on the front of this
  1706. // reassembly header's packet list.
  1707. //
  1708. NewPacket->Next = CurrentTRH->trh_buffer;
  1709. CurrentTRH->trh_buffer = NewPacket;
  1710. CurrentTRH->trh_start = RcvInfo->tri_seq;
  1711. }
  1712. }
  1713. //
  1714. // We've updated the starting sequence number of this TRH
  1715. // if we needed to. Now look for back overlap. There
  1716. // can't be any back overlap if the current TRH has a FIN.
  1717. // Also we'll need to check for urgent data if there is
  1718. // back overlap.
  1719. //
  1720. if (!(CurrentTRH->trh_flags & TCP_FLAG_FIN)) {
  1721. BackOverlap = RcvInfo->tri_seq + Size - NextTRHSeq;
  1722. if ((BackOverlap > 0) &&
  1723. (RcvInfo->tri_flags & TCP_FLAG_URG) &&
  1724. !(CurrentTRH->trh_flags & TCP_FLAG_URG) &&
  1725. (FrontOverlap <= 0)) {
  1726. int AmountToTrim;
  1727. //
  1728. // The incoming segment has urgent data and
  1729. // overlaps on the back but not the front, and the
  1730. // current TRH has no urgent data. We can't
  1731. // combine into this TRH, so trim the front of the
  1732. // incoming segment to NextTRHSeq and move to the
  1733. // next TRH.
  1734. AmountToTrim = NextTRHSeq - RcvInfo->tri_seq;
  1735. ASSERT(AmountToTrim >= 0);
  1736. ASSERT(AmountToTrim < (int) Size);
  1737. Packet = TrimPacket(Packet, (uint)AmountToTrim);
  1738. RcvInfo->tri_seq += AmountToTrim;
  1739. RcvInfo->tri_urgent -= AmountToTrim;
  1740. PrevTRH = CurrentTRH;
  1741. CurrentTRH = PrevTRH->trh_next;
  1742. Size -= AmountToTrim;
  1743. continue;
  1744. }
  1745. } else
  1746. BackOverlap = 0;
  1747. //
  1748. // Now if we have back overlap, copy it.
  1749. //
  1750. if (BackOverlap > 0) {
  1751. //
  1752. // We have back overlap. Get a buffer to copy it into.
  1753. // If we can't get one, we won't just return, because
  1754. // we may have updated the front and may need to
  1755. // update the urgent info.
  1756. //
  1757. NewPacket = ExAllocatePoolWithTagPriority(
  1758. NonPagedPool,
  1759. sizeof(IPv6Packet) + BackOverlap,
  1760. TCP6_TAG, LowPoolPriority);
  1761. if (NewPacket != NULL) {
  1762. // Allocation succeeded.
  1763. NewPacket->Position = 0;
  1764. NewPacket->FlatData = (uchar *)(NewPacket + 1);
  1765. NewPacket->Data = NewPacket->FlatData;
  1766. NewPacket->ContigSize = BackOverlap;
  1767. NewPacket->TotalSize = BackOverlap;
  1768. NewPacket->NdisPacket = NULL;
  1769. NewPacket->AuxList = NULL;
  1770. NewPacket->Flags = PACKET_OURS;
  1771. CopyPacketToBuffer(NewPacket->Data, Packet,
  1772. BackOverlap, Packet->Position +
  1773. NextTRHSeq - RcvInfo->tri_seq);
  1774. CurrentTRH->trh_size += BackOverlap;
  1775. NewPacket->Next = CurrentTRH->trh_end->Next;
  1776. CurrentTRH->trh_end->Next = NewPacket;
  1777. CurrentTRH->trh_end = NewPacket;
  1778. //
  1779. // This segment could also have FIN set.
  1780. // If it does, set the TRH flag.
  1781. //
  1782. // N.B. If there's another reassembly header after
  1783. // the current one, the data that we're about to
  1784. // put on the current header might already be
  1785. // on that subsequent header which, in that event,
  1786. // will already have the FIN flag set.
  1787. // Check for that case before recording the FIN.
  1788. //
  1789. if ((RcvInfo->tri_flags & TCP_FLAG_FIN) &&
  1790. !CurrentTRH->trh_next) {
  1791. CurrentTRH->trh_flags |= TCP_FLAG_FIN;
  1792. }
  1793. }
  1794. }
  1795. //
  1796. // Everything should be consistent now. If there's an
  1797. // urgent data pointer in the incoming segment, update the
  1798. // one in the TRH now.
  1799. //
  1800. if (RcvInfo->tri_flags & TCP_FLAG_URG) {
  1801. SeqNum UrgSeq;
  1802. //
  1803. // Have an urgent pointer. If the current TRH already
  1804. // has an urgent pointer, see which is bigger.
  1805. // Otherwise just use this one.
  1806. //
  1807. UrgSeq = RcvInfo->tri_seq + RcvInfo->tri_urgent;
  1808. if (CurrentTRH->trh_flags & TCP_FLAG_URG) {
  1809. SeqNum TRHUrgSeq;
  1810. TRHUrgSeq = CurrentTRH->trh_start +
  1811. CurrentTRH->trh_urg;
  1812. if (SEQ_LT(UrgSeq, TRHUrgSeq))
  1813. UrgSeq = TRHUrgSeq;
  1814. } else
  1815. CurrentTRH->trh_flags |= TCP_FLAG_URG;
  1816. CurrentTRH->trh_urg = UrgSeq - CurrentTRH->trh_start;
  1817. }
  1818. } else {
  1819. //
  1820. // We have a 0 length segment. The only interesting thing
  1821. // here is if there's a FIN on the segment. If there is,
  1822. // and the seq. # of the incoming segment is exactly after
  1823. // the current TRH, OR matches the FIN in the current TRH,
  1824. // we note it.
  1825. if (RcvInfo->tri_flags & TCP_FLAG_FIN) {
  1826. if (!(CurrentTRH->trh_flags & TCP_FLAG_FIN)) {
  1827. if (SEQ_EQ(NextTRHSeq, RcvInfo->tri_seq))
  1828. CurrentTRH->trh_flags |= TCP_FLAG_FIN;
  1829. else
  1830. KdBreakPoint();
  1831. }
  1832. else {
  1833. if (!(SEQ_EQ((NextTRHSeq-1), RcvInfo->tri_seq))) {
  1834. KdBreakPoint();
  1835. }
  1836. }
  1837. }
  1838. }
  1839. return TRUE;
  1840. }
  1841. } else {
  1842. //
  1843. // Look at the next TRH, unless the current TRH has a FIN. If he
  1844. // has a FIN, we won't save any data beyond that anyway.
  1845. //
  1846. if (CurrentTRH->trh_flags & TCP_FLAG_FIN)
  1847. return TRUE;
  1848. PrevTRH = CurrentTRH;
  1849. CurrentTRH = PrevTRH->trh_next;
  1850. }
  1851. }
  1852. //
  1853. // When we get here, we need to create a new TRH. If we create one and
  1854. // there was previously nothing on the reassembly queue, we'll have to
  1855. // move off the fast receive path.
  1856. //
  1857. CurrentTRH = RcvTCB->tcb_raq;
  1858. Created = CreateTRH(PrevTRH, Packet, RcvInfo, (int)Size);
  1859. if (Created && CurrentTRH == NULL) {
  1860. RcvTCB->tcb_slowcount++;
  1861. RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  1862. CheckTCBRcv(RcvTCB);
  1863. } else if (!Created) {
  1864. return FALSE;
  1865. }
  1866. return TRUE;
  1867. }
  1868. //* HandleFastXmit - Handles fast retransmit algorithm. See RFC 2581.
  1869. //
  1870. // Called by TCPReceive to determine if we should retransmit a segment
  1871. // without waiting for retransmit timeout to fire.
  1872. //
  1873. BOOLEAN // Returns: TRUE if the segment got retransmitted, FALSE otherwise.
  1874. HandleFastXmit(
  1875. TCB *RcvTCB, // Connection context for this receive.
  1876. TCPRcvInfo *RcvInfo) // Pointer to rcvd TCP Header information.
  1877. {
  1878. uint CWin;
  1879. RcvTCB->tcb_dupacks++;
  1880. if (RcvTCB->tcb_dupacks == MaxDupAcks) {
  1881. //
  1882. // We're going to do a fast retransmit.
  1883. // Stop the retransmit timer and any round-trip time
  1884. // calculations we might have been running.
  1885. //
  1886. STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
  1887. RcvTCB->tcb_rtt = 0;
  1888. if (!(RcvTCB->tcb_flags & FLOW_CNTLD)) {
  1889. //
  1890. // Don't let the slow start threshold go
  1891. // below 2 segments.
  1892. //
  1893. RcvTCB->tcb_ssthresh =
  1894. MAX(MIN(RcvTCB->tcb_cwin, RcvTCB->tcb_sendwin) / 2,
  1895. (uint) RcvTCB->tcb_mss * 2);
  1896. }
  1897. //
  1898. // Inflate the congestion window by the number of segments
  1899. // which have presumably left the network.
  1900. //
  1901. CWin = RcvTCB->tcb_ssthresh + (MaxDupAcks * RcvTCB->tcb_mss);
  1902. //
  1903. // Recall the segment in question and send it out.
  1904. // Note that tcb_lock will be dereferenced by the caller.
  1905. //
  1906. ResetAndFastSend(RcvTCB, RcvTCB->tcb_senduna, CWin);
  1907. return TRUE;
  1908. } else {
  1909. int SendWin;
  1910. uint AmtOutstanding;
  1911. //
  1912. // REVIEW: At least the first part of this check is redundant.
  1913. //
  1914. if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo->tri_ack) &&
  1915. (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo->tri_seq) ||
  1916. (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo->tri_seq) &&
  1917. SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo->tri_ack)))) {
  1918. RcvTCB->tcb_sendwin = RcvInfo->tri_window;
  1919. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin, RcvInfo->tri_window);
  1920. RcvTCB->tcb_sendwl1 = RcvInfo->tri_seq;
  1921. RcvTCB->tcb_sendwl2 = RcvInfo->tri_ack;
  1922. }
  1923. if (RcvTCB->tcb_dupacks > MaxDupAcks) {
  1924. //
  1925. // Update the congestion window to reflect the fact that the
  1926. // duplicate ack presumably indicates that the previous frame
  1927. // was received by our peer and has thus left the network.
  1928. //
  1929. RcvTCB->tcb_cwin += RcvTCB->tcb_mss;
  1930. }
  1931. //
  1932. // Check if we need to set tcb_force.
  1933. //
  1934. if ((RcvTCB->tcb_cwin + RcvTCB->tcb_mss) < RcvTCB->tcb_sendwin) {
  1935. AmtOutstanding = (uint)(RcvTCB->tcb_sendnext -
  1936. RcvTCB->tcb_senduna);
  1937. SendWin = (int)(MIN(RcvTCB->tcb_sendwin, RcvTCB->tcb_cwin) -
  1938. AmtOutstanding);
  1939. if (SendWin < RcvTCB->tcb_mss) {
  1940. RcvTCB->tcb_force = 1;
  1941. }
  1942. }
  1943. }
  1944. return FALSE;
  1945. }
  1946. //* TCPReceive - Receive an incoming TCP segment.
  1947. //
  1948. // This is the routine called by IPv6 when we need to receive a TCP segment.
  1949. // In general, we follow the RFC 793 event processing section pretty closely,
  1950. // but there is a 'fast path' where we make some quick checks on the incoming
  1951. // segment, and if it matches we deliver it immediately.
  1952. //
  1953. uchar // Returns: next header value (always IP_PROTOCOL_NONE for TCP).
  1954. TCPReceive(
  1955. IPv6Packet *Packet) // Packet IP handed up to us.
  1956. {
  1957. NetTableEntry *NTE;
  1958. TCPHeader UNALIGNED *TCP; // The TCP header.
  1959. uint DataOffset; // Offset from start of TCP header to data.
  1960. ushort Checksum;
  1961. TCPRcvInfo RcvInfo; // Local swapped copy of receive info.
  1962. uint SrcScopeId; // Scope id of remote address, if applicable.
  1963. uint DestScopeId; // Scope id of local address, if applicable.
  1964. TCB *RcvTCB; // TCB on which to receive the packet.
  1965. uint Inserted;
  1966. uint Actions; // Flags for future actions to be performed.
  1967. uint BytesTaken;
  1968. uint NewSize;
  1969. BOOLEAN UseIsn = FALSE;
  1970. SeqNum Isn = 0;
  1971. //
  1972. // REVIEW: Expediency hacks to get something working.
  1973. //
  1974. uint Size; // Probably safe to just change name to PayloadLength below.
  1975. //
  1976. // TCP only works with unicast addresses. If this packet was
  1977. // received on a unicast address, then Packet->NTEorIF will be an
  1978. // NTE. So drop packets if we don't have an NTE.
  1979. // (IPv6HeaderReceive checks validity.) But the converse isn't
  1980. // true, we could have an NTE here that is associated with the
  1981. // anycast/multicast address we received the packet on. So to
  1982. // guard against that, we verify that our NTE's address is the
  1983. // destination given in the packet.
  1984. //
  1985. if (!IsNTE(Packet->NTEorIF) ||
  1986. !IP6_ADDR_EQUAL(AlignAddr(&Packet->IP->Dest),
  1987. &(NTE = CastToNTE(Packet->NTEorIF))->Address)) {
  1988. // Packet's destination was not a valid unicast address of ours.
  1989. return IP_PROTOCOL_NONE; // Drop packet.
  1990. }
  1991. TStats.ts_insegs++;
  1992. //
  1993. // Verify that we have enough contiguous data to overlay a TCPHeader
  1994. // structure on the incoming packet. Then do so.
  1995. //
  1996. if (! PacketPullup(Packet, sizeof(TCPHeader), 1, 0)) {
  1997. // Pullup failed.
  1998. TStats.ts_inerrs++;
  1999. if (Packet->TotalSize < sizeof(TCPHeader)) {
  2000. BadPayloadLength:
  2001. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_BAD_PACKET,
  2002. "TCPv6: data buffer too small to contain TCP header\n"));
  2003. ICMPv6SendError(Packet,
  2004. ICMPv6_PARAMETER_PROBLEM,
  2005. ICMPv6_ERRONEOUS_HEADER_FIELD,
  2006. FIELD_OFFSET(IPv6Header, PayloadLength),
  2007. IP_PROTOCOL_NONE, FALSE);
  2008. }
  2009. return IP_PROTOCOL_NONE; // Drop packet.
  2010. }
  2011. TCP = (TCPHeader UNALIGNED *)Packet->Data;
  2012. //
  2013. // Verify checksum.
  2014. //
  2015. Checksum = ChecksumPacket(Packet->NdisPacket, Packet->Position,
  2016. Packet->FlatData, Packet->TotalSize,
  2017. Packet->SrcAddr, AlignAddr(&Packet->IP->Dest),
  2018. IP_PROTOCOL_TCP);
  2019. if (Checksum != 0xffff) {
  2020. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NET_ERROR,
  2021. "TCPv6: Checksum failed %0x\n", Checksum));
  2022. TStats.ts_inerrs++;
  2023. return IP_PROTOCOL_NONE; // Drop packet.
  2024. }
  2025. //
  2026. // Now that we can read the header, pull out the header length field.
  2027. // Verify that we have enough contiguous data to hold any TCP options
  2028. // that may be present in the header, and skip over the entire header.
  2029. //
  2030. DataOffset = TCP_HDR_SIZE(TCP);
  2031. if (! PacketPullup(Packet, DataOffset, 1, 0)) {
  2032. TStats.ts_inerrs++;
  2033. if (Packet->TotalSize < DataOffset)
  2034. goto BadPayloadLength;
  2035. return IP_PROTOCOL_NONE; // Drop packet.
  2036. }
  2037. TCP = (TCPHeader UNALIGNED *)Packet->Data;
  2038. AdjustPacketParams(Packet, DataOffset);
  2039. Size = Packet->TotalSize;
  2040. //
  2041. // Verify IPSec was performed.
  2042. //
  2043. if (InboundSecurityCheck(Packet, IP_PROTOCOL_TCP, net_short(TCP->tcp_src),
  2044. net_short(TCP->tcp_dest), NTE->IF) != TRUE) {
  2045. //
  2046. // No policy was found or the policy indicated to drop the packet.
  2047. //
  2048. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NET_ERROR,
  2049. "TCPReceive: IPSec Policy caused packet to be dropped\n"));
  2050. return IP_PROTOCOL_NONE; // Drop packet.
  2051. }
  2052. //
  2053. // The packet is valid.
  2054. // Get the info we need and byte swap it.
  2055. //
  2056. RcvInfo.tri_seq = net_long(TCP->tcp_seq);
  2057. RcvInfo.tri_ack = net_long(TCP->tcp_ack);
  2058. RcvInfo.tri_window = (uint)net_short(TCP->tcp_window);
  2059. RcvInfo.tri_urgent = (uint)net_short(TCP->tcp_urgent);
  2060. RcvInfo.tri_flags = (uint)TCP->tcp_flags;
  2061. //
  2062. // Determine the appropriate scope id for our packet's addresses.
  2063. // Note that multicast addresses were forbidden above.
  2064. // We use DetermineScopeId instead of just indexing into ZoneIndices
  2065. // because we need the "user-level" scope id here.
  2066. //
  2067. SrcScopeId = DetermineScopeId(Packet->SrcAddr, NTE->IF);
  2068. DestScopeId = DetermineScopeId(&NTE->Address, NTE->IF);
  2069. //
  2070. // See if we have a TCP Control Block for this connection.
  2071. //
  2072. KeAcquireSpinLockAtDpcLevel(&TCBTableLock);
  2073. RcvTCB = FindTCB(AlignAddr(&Packet->IP->Dest), Packet->SrcAddr,
  2074. DestScopeId, SrcScopeId, TCP->tcp_dest, TCP->tcp_src);
  2075. if (RcvTCB == NULL) {
  2076. uchar DType;
  2077. //
  2078. // Didn't find a matching TCB, which means incoming segment doesn't
  2079. // belong to an existing connection.
  2080. //
  2081. KeReleaseSpinLockFromDpcLevel(&TCBTableLock);
  2082. //
  2083. // Make sure that the source address is reasonable
  2084. // before proceeding.
  2085. //
  2086. ASSERT(!IsInvalidSourceAddress(Packet->SrcAddr));
  2087. if (IsUnspecified(Packet->SrcAddr)) {
  2088. return IP_PROTOCOL_NONE;
  2089. }
  2090. //
  2091. // If this segment carries a SYN (and only a SYN), it's a
  2092. // connection initiation request.
  2093. //
  2094. if ((RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_ACK |
  2095. TCP_FLAG_RST)) == TCP_FLAG_SYN) {
  2096. AddrObj *AO;
  2097. ValidNewConnectionRequest:
  2098. //
  2099. // We need to look for a matching address object.
  2100. // Want match for local address (+ scope id for scoped addresses),
  2101. // port and protocol.
  2102. //
  2103. KeAcquireSpinLockAtDpcLevel(&AddrObjTableLock);
  2104. AO = GetBestAddrObj(AlignAddr(&Packet->IP->Dest),
  2105. DestScopeId, TCP->tcp_dest,
  2106. IP_PROTOCOL_TCP, NTE->IF);
  2107. if (AO == NULL) {
  2108. //
  2109. // No address object. Free the lock, and send a RST.
  2110. //
  2111. KeReleaseSpinLockFromDpcLevel(&AddrObjTableLock);
  2112. goto SendReset;
  2113. }
  2114. //
  2115. // Found an AO. See if it has a listen indication.
  2116. // FindListenConn will free the lock on the AddrObjTable.
  2117. //
  2118. RcvTCB = FindListenConn(AO, Packet->SrcAddr, SrcScopeId,
  2119. TCP->tcp_src);
  2120. if (RcvTCB == NULL) {
  2121. //
  2122. // No listening connection. AddrObjTableLock was
  2123. // released by FindListenConn. Just send a RST.
  2124. //
  2125. goto SendReset;
  2126. }
  2127. CHECK_STRUCT(RcvTCB, tcb);
  2128. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2129. //
  2130. // We found a listening connection. Initialize
  2131. // it now, and if it is actually to be accepted
  2132. // we'll send a SYN-ACK also.
  2133. //
  2134. ASSERT(RcvTCB->tcb_state == TCB_SYN_RCVD);
  2135. RcvTCB->tcb_daddr = *Packet->SrcAddr;
  2136. RcvTCB->tcb_saddr = Packet->IP->Dest;
  2137. RcvTCB->tcb_dscope_id = SrcScopeId;
  2138. RcvTCB->tcb_sscope_id = DestScopeId;
  2139. RcvTCB->tcb_dport = TCP->tcp_src;
  2140. RcvTCB->tcb_sport = TCP->tcp_dest;
  2141. RcvTCB->tcb_rcvnext = ++RcvInfo.tri_seq;
  2142. RcvTCB->tcb_rcvwinwatch = RcvTCB->tcb_rcvnext;
  2143. if (UseIsn) {
  2144. RcvTCB->tcb_sendnext = Isn;
  2145. } else {
  2146. GetRandomISN(&RcvTCB->tcb_sendnext, (uchar*)&RcvTCB->tcb_md5data);
  2147. }
  2148. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  2149. RcvTCB->tcb_remmss = FindMSS(TCP);
  2150. TStats.ts_passiveopens++;
  2151. RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
  2152. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2153. Inserted = InsertTCB(RcvTCB);
  2154. //
  2155. // Get the lock on it, and see if it's been accepted.
  2156. //
  2157. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2158. if (!Inserted) {
  2159. // Couldn't insert it!.
  2160. CompleteConnReq(RcvTCB, TDI_CONNECTION_ABORTED);
  2161. RcvTCB->tcb_refcnt--;
  2162. TryToCloseTCB(RcvTCB, TCB_CLOSE_ABORTED, DISPATCH_LEVEL);
  2163. return IP_PROTOCOL_NONE;
  2164. }
  2165. RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
  2166. if (RcvTCB->tcb_flags & SEND_AFTER_RCV) {
  2167. RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
  2168. DelayAction(RcvTCB, NEED_OUTPUT);
  2169. }
  2170. if (RcvTCB->tcb_flags & CONN_ACCEPTED) {
  2171. //
  2172. // The connection was accepted. Finish the
  2173. // initialization, and send the SYN ack.
  2174. //
  2175. AcceptConn(RcvTCB, DISPATCH_LEVEL);
  2176. return IP_PROTOCOL_NONE;
  2177. } else {
  2178. //
  2179. // We don't know what to do about the
  2180. // connection yet. Return the pending listen,
  2181. // dereference the connection, and return.
  2182. //
  2183. CompleteConnReq(RcvTCB, TDI_SUCCESS);
  2184. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2185. return IP_PROTOCOL_NONE;
  2186. }
  2187. }
  2188. SendReset:
  2189. //
  2190. // Not a SYN, no AddrObj available, or port filtered.
  2191. // Send a RST back to the sender.
  2192. //
  2193. SendRSTFromHeader(TCP, Packet->TotalSize, Packet->SrcAddr, SrcScopeId,
  2194. AlignAddr(&Packet->IP->Dest), DestScopeId);
  2195. return IP_PROTOCOL_NONE;
  2196. }
  2197. //
  2198. // We found a matching TCB. Get the lock on it, and continue.
  2199. //
  2200. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2201. KeReleaseSpinLockFromDpcLevel(&TCBTableLock);
  2202. //
  2203. // Do the fast path check. We can hit the fast path if the incoming
  2204. // sequence number matches our receive next and the masked flags
  2205. // match our 'predicted' flags.
  2206. //
  2207. CheckTCBRcv(RcvTCB);
  2208. RcvTCB->tcb_alive = TCPTime;
  2209. if (RcvTCB->tcb_rcvnext == RcvInfo.tri_seq &&
  2210. (RcvInfo.tri_flags & TCP_FLAGS_ALL) == RcvTCB->tcb_fastchk) {
  2211. Actions = 0;
  2212. RcvTCB->tcb_refcnt++;
  2213. //
  2214. // The fast path. We know all we have to do here is ack sends and
  2215. // deliver data. First try and ack data.
  2216. //
  2217. if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  2218. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  2219. uint CWin;
  2220. uint MSS;
  2221. //
  2222. // The ack acknowledges something. Pull the
  2223. // appropriate amount off the send q.
  2224. //
  2225. ACKData(RcvTCB, RcvInfo.tri_ack);
  2226. //
  2227. // If this acknowledges something we were running a RTT on,
  2228. // update that stuff now.
  2229. //
  2230. if (RcvTCB->tcb_rtt != 0 && SEQ_GT(RcvInfo.tri_ack,
  2231. RcvTCB->tcb_rttseq)) {
  2232. short RTT;
  2233. RTT = (short)(TCPTime - RcvTCB->tcb_rtt);
  2234. RcvTCB->tcb_rtt = 0;
  2235. RTT -= (RcvTCB->tcb_smrtt >> 3);
  2236. RcvTCB->tcb_smrtt += RTT;
  2237. RTT = (RTT >= 0 ? RTT : -RTT);
  2238. RTT -= (RcvTCB->tcb_delta >> 3);
  2239. RcvTCB->tcb_delta += RTT + RTT;
  2240. RcvTCB->tcb_rexmit = MIN(MAX(REXMIT_TO(RcvTCB),
  2241. MIN_RETRAN_TICKS),
  2242. MAX_REXMIT_TO);
  2243. }
  2244. if ((RcvTCB->tcb_dupacks >= MaxDupAcks) &&
  2245. ((int)RcvTCB->tcb_ssthresh > 0)) {
  2246. //
  2247. // We were in fast retransmit mode, so this ACK is for
  2248. // our fast retransmitted frame. Set cwin to ssthresh
  2249. // so that cwin grows linearly from here.
  2250. //
  2251. RcvTCB->tcb_cwin = RcvTCB->tcb_ssthresh;
  2252. } else {
  2253. //
  2254. // Update the congestion window now.
  2255. //
  2256. CWin = RcvTCB->tcb_cwin;
  2257. MSS = RcvTCB->tcb_mss;
  2258. if (CWin < RcvTCB->tcb_maxwin) {
  2259. if (CWin < RcvTCB->tcb_ssthresh)
  2260. CWin += MSS;
  2261. else
  2262. CWin += (MSS * MSS)/CWin;
  2263. RcvTCB->tcb_cwin = CWin;
  2264. }
  2265. }
  2266. ASSERT(*(int *)&RcvTCB->tcb_cwin > 0);
  2267. //
  2268. // Since this isn't a duplicate ACK, reset the counter.
  2269. //
  2270. RcvTCB->tcb_dupacks = 0;
  2271. //
  2272. // We've acknowledged something, so reset the rexmit count.
  2273. // If there's still stuff outstanding, restart the rexmit
  2274. // timer.
  2275. //
  2276. RcvTCB->tcb_rexmitcnt = 0;
  2277. if (SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax))
  2278. STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
  2279. else
  2280. START_TCB_TIMER(RcvTCB->tcb_rexmittimer, RcvTCB->tcb_rexmit);
  2281. //
  2282. // Since we've acknowledged data, we need to update the window.
  2283. //
  2284. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  2285. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin, RcvInfo.tri_window);
  2286. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  2287. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  2288. //
  2289. // We've updated the window, remember to send some more.
  2290. //
  2291. Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
  2292. } else {
  2293. //
  2294. // It doesn't ack anything. If it's an ack for something
  2295. // larger than we've sent then ACKAndDrop it.
  2296. //
  2297. if (SEQ_GT(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  2298. ACKAndDrop(&RcvInfo, RcvTCB);
  2299. return IP_PROTOCOL_NONE;
  2300. }
  2301. //
  2302. // If it is a pure duplicate ack, check if we should
  2303. // do a fast retransmit.
  2304. //
  2305. if ((Size == 0) && SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  2306. SEQ_LT(RcvTCB->tcb_senduna, RcvTCB->tcb_sendmax) &&
  2307. (RcvTCB->tcb_sendwin == RcvInfo.tri_window) &&
  2308. RcvInfo.tri_window) {
  2309. //
  2310. // See if fast rexmit can be done.
  2311. //
  2312. if (HandleFastXmit(RcvTCB, &RcvInfo)) {
  2313. return IP_PROTOCOL_NONE;
  2314. }
  2315. Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
  2316. } else {
  2317. //
  2318. // Not a pure duplicate ack (Size != 0 or peer is
  2319. // advertising a new windows). Reset counter.
  2320. //
  2321. RcvTCB->tcb_dupacks = 0;
  2322. //
  2323. // If the ack matches our existing UNA, we need to see if
  2324. // we can update the window.
  2325. //
  2326. if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  2327. (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
  2328. (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
  2329. SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
  2330. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  2331. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
  2332. RcvInfo.tri_window);
  2333. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  2334. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  2335. //
  2336. // Since we've updated the window, remember to send
  2337. // some more.
  2338. //
  2339. Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
  2340. }
  2341. }
  2342. }
  2343. //
  2344. // Check to see if this packet contains any useable data.
  2345. //
  2346. NewSize = MIN((int) Size, RcvTCB->tcb_rcvwin);
  2347. if (NewSize != 0) {
  2348. RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
  2349. BytesTaken = (*RcvTCB->tcb_rcvhndlr)(RcvTCB, RcvInfo.tri_flags,
  2350. Packet, NewSize);
  2351. RcvTCB->tcb_rcvnext += BytesTaken;
  2352. RcvTCB->tcb_rcvwin -= BytesTaken;
  2353. CheckTCBRcv(RcvTCB);
  2354. RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
  2355. //
  2356. // If our peer is sending into an expanded window, then our
  2357. // peer must have received our ACK advertising said window.
  2358. // Take this as proof of forward reachability.
  2359. //
  2360. if (SEQ_GTE(RcvInfo.tri_seq + (int)NewSize,
  2361. RcvTCB->tcb_rcvwinwatch)) {
  2362. RcvTCB->tcb_rcvwinwatch = RcvTCB->tcb_rcvnext +
  2363. RcvTCB->tcb_rcvwin;
  2364. if (RcvTCB->tcb_rce != NULL)
  2365. ConfirmForwardReachability(RcvTCB->tcb_rce);
  2366. }
  2367. Actions |= (RcvTCB->tcb_flags & SEND_AFTER_RCV ? NEED_OUTPUT : 0);
  2368. RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
  2369. if ((RcvTCB->tcb_flags & ACK_DELAYED) ||
  2370. (BytesTaken != NewSize)) {
  2371. Actions |= NEED_ACK;
  2372. } else {
  2373. RcvTCB->tcb_flags |= ACK_DELAYED;
  2374. START_TCB_TIMER(RcvTCB->tcb_delacktimer, DEL_ACK_TICKS);
  2375. }
  2376. } else {
  2377. //
  2378. // The new size is 0. If the original size was not 0, we must
  2379. // have a 0 receive win and hence need to send an ACK to this
  2380. // probe.
  2381. //
  2382. Actions |= (Size ? NEED_ACK : 0);
  2383. }
  2384. if (Actions)
  2385. DelayAction(RcvTCB, Actions);
  2386. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2387. return IP_PROTOCOL_NONE;
  2388. }
  2389. //
  2390. // This is the non-fast path.
  2391. //
  2392. //
  2393. // If we found a matching TCB in TIME_WAIT, and the received segment
  2394. // carries a SYN (and only a SYN), and the received segment has a sequence
  2395. // greater than the last received, kill the TIME_WAIT TCB and use its
  2396. // next sequence number to generate the initial sequence number of a
  2397. // new incarnation.
  2398. //
  2399. if ((RcvTCB->tcb_state == TCB_TIME_WAIT) &&
  2400. ((RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_ACK | TCP_FLAG_RST))
  2401. == TCP_FLAG_SYN) &&
  2402. SEQ_GT(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) {
  2403. Isn = RcvTCB->tcb_sendnext + 128000;
  2404. UseIsn = TRUE;
  2405. STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
  2406. TryToCloseTCB(RcvTCB, TCB_CLOSE_SUCCESS, DISPATCH_LEVEL);
  2407. RcvTCB = NULL;
  2408. goto ValidNewConnectionRequest;
  2409. }
  2410. //
  2411. // Make sure we can handle this frame. We can't handle it if we're
  2412. // in SYN_RCVD and the accept is still pending, or we're in a
  2413. // non-established state and already in the receive handler.
  2414. //
  2415. if ((RcvTCB->tcb_state == TCB_SYN_RCVD &&
  2416. !(RcvTCB->tcb_flags & CONN_ACCEPTED)) ||
  2417. (RcvTCB->tcb_state != TCB_ESTAB && (RcvTCB->tcb_fastchk &
  2418. TCP_FLAG_IN_RCV))) {
  2419. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2420. return IP_PROTOCOL_NONE;
  2421. }
  2422. //
  2423. // If it's closed, it's a temporary zombie TCB. Reset the sender.
  2424. //
  2425. if (RcvTCB->tcb_state == TCB_CLOSED || CLOSING(RcvTCB) ||
  2426. ((RcvTCB->tcb_flags & (GC_PENDING | TW_PENDING)) == GC_PENDING)) {
  2427. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2428. SendRSTFromHeader(TCP, Packet->TotalSize, Packet->SrcAddr, SrcScopeId,
  2429. AlignAddr(&Packet->IP->Dest), DestScopeId);
  2430. return IP_PROTOCOL_NONE;
  2431. }
  2432. //
  2433. // At this point, we have a connection, and it's locked. Following
  2434. // the 'Segment Arrives' section of 793, the next thing to check is
  2435. // if this connection is in SynSent state.
  2436. //
  2437. if (RcvTCB->tcb_state == TCB_SYN_SENT) {
  2438. ASSERT(RcvTCB->tcb_flags & ACTIVE_OPEN);
  2439. //
  2440. // Check the ACK bit. Since we don't send data with our SYNs, the
  2441. // check we make is for the ack to exactly match our SND.NXT.
  2442. //
  2443. if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
  2444. // ACK is set.
  2445. if (!SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendnext)) {
  2446. // Bad ACK value.
  2447. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2448. // Send a RST back at him.
  2449. SendRSTFromHeader(TCP, Packet->TotalSize,
  2450. Packet->SrcAddr, SrcScopeId,
  2451. AlignAddr(&Packet->IP->Dest), DestScopeId);
  2452. return IP_PROTOCOL_NONE;
  2453. }
  2454. }
  2455. if (RcvInfo.tri_flags & TCP_FLAG_RST) {
  2456. //
  2457. // There's an acceptable RST. We'll persist here, sending
  2458. // another SYN in PERSIST_TIMEOUT ms, until we fail from too
  2459. // many retries.
  2460. //
  2461. if (RcvTCB->tcb_rexmitcnt == MaxConnectRexmitCount) {
  2462. //
  2463. // We've had a positive refusal, and one more rexmit
  2464. // would time us out, so close the connection now.
  2465. //
  2466. CompleteConnReq(RcvTCB, TDI_CONN_REFUSED);
  2467. TryToCloseTCB(RcvTCB, TCB_CLOSE_REFUSED, DISPATCH_LEVEL);
  2468. } else {
  2469. START_TCB_TIMER(RcvTCB->tcb_rexmittimer, PERSIST_TIMEOUT);
  2470. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2471. }
  2472. return IP_PROTOCOL_NONE;
  2473. }
  2474. //
  2475. // See if we have a SYN. If we do, we're going to change state
  2476. // somehow (either to ESTABLISHED or SYN_RCVD).
  2477. //
  2478. if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
  2479. RcvTCB->tcb_refcnt++;
  2480. //
  2481. // We have a SYN. Go ahead and record the sequence number and
  2482. // window info.
  2483. //
  2484. RcvTCB->tcb_rcvnext = ++RcvInfo.tri_seq;
  2485. RcvTCB->tcb_rcvwinwatch = RcvTCB->tcb_rcvnext;
  2486. if (RcvInfo.tri_flags & TCP_FLAG_URG) {
  2487. // Urgent data. Update the pointer.
  2488. if (RcvInfo.tri_urgent != 0)
  2489. RcvInfo.tri_urgent--;
  2490. else
  2491. RcvInfo.tri_flags &= ~TCP_FLAG_URG;
  2492. }
  2493. RcvTCB->tcb_remmss = FindMSS(TCP);
  2494. RcvTCB->tcb_mss = MIN(RcvTCB->tcb_mss, RcvTCB->tcb_remmss);
  2495. ASSERT(RcvTCB->tcb_mss > 0);
  2496. RcvTCB->tcb_rexmitcnt = 0;
  2497. STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
  2498. AdjustRcvWin(RcvTCB);
  2499. if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
  2500. //
  2501. // Our SYN has been acked. Update SND.UNA and stop the
  2502. // retrans timer.
  2503. //
  2504. RcvTCB->tcb_senduna = RcvInfo.tri_ack;
  2505. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  2506. RcvTCB->tcb_maxwin = RcvInfo.tri_window;
  2507. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  2508. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  2509. GoToEstab(RcvTCB);
  2510. //
  2511. // We know our peer received our SYN.
  2512. //
  2513. if (RcvTCB->tcb_rce != NULL)
  2514. ConfirmForwardReachability(RcvTCB->tcb_rce);
  2515. //
  2516. // Remove whatever command exists on this connection.
  2517. //
  2518. CompleteConnReq(RcvTCB, TDI_SUCCESS);
  2519. //
  2520. // If data has been queued already, send the first data segment
  2521. // with the ACK. Otherwise, send a pure ACK.
  2522. //
  2523. if (RcvTCB->tcb_unacked) {
  2524. RcvTCB->tcb_refcnt++;
  2525. TCPSend(RcvTCB, DISPATCH_LEVEL);
  2526. } else {
  2527. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2528. SendACK(RcvTCB);
  2529. }
  2530. //
  2531. // Now handle other data and controls. To do this we need
  2532. // to reaquire the lock, and make sure we haven't started
  2533. // closing it.
  2534. //
  2535. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2536. if (!CLOSING(RcvTCB)) {
  2537. //
  2538. // We haven't started closing it. Turn off the
  2539. // SYN flag and continue processing.
  2540. //
  2541. RcvInfo.tri_flags &= ~TCP_FLAG_SYN;
  2542. if ((RcvInfo.tri_flags & TCP_FLAGS_ALL) !=
  2543. TCP_FLAG_ACK || Size != 0)
  2544. goto NotSYNSent;
  2545. }
  2546. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2547. return IP_PROTOCOL_NONE;
  2548. } else {
  2549. //
  2550. // A SYN, but not an ACK. Go to SYN_RCVD.
  2551. //
  2552. RcvTCB->tcb_state = TCB_SYN_RCVD;
  2553. RcvTCB->tcb_sendnext = RcvTCB->tcb_senduna;
  2554. SendSYN(RcvTCB, DISPATCH_LEVEL);
  2555. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2556. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2557. return IP_PROTOCOL_NONE;
  2558. }
  2559. } else {
  2560. //
  2561. // No SYN, just toss the frame.
  2562. //
  2563. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2564. return IP_PROTOCOL_NONE;
  2565. }
  2566. }
  2567. RcvTCB->tcb_refcnt++;
  2568. NotSYNSent:
  2569. //
  2570. // Not in the SYN-SENT state. Check the sequence number. If my window
  2571. // is 0, I'll truncate all incoming frames but look at some of the
  2572. // control fields. Otherwise I'll try and make this segment fit into
  2573. // the window.
  2574. //
  2575. if (RcvTCB->tcb_rcvwin != 0) {
  2576. int StateSize; // Size, including state info.
  2577. SeqNum LastValidSeq; // Sequence number of last valid byte at RWE.
  2578. //
  2579. // We are offering a window. If this segment starts in front of my
  2580. // receive window, clip off the front part.
  2581. //
  2582. if (SEQ_LT(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) {
  2583. int AmountToClip, FinByte;
  2584. if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
  2585. //
  2586. // Had a SYN. Clip it off and update the sequence number.
  2587. //
  2588. RcvInfo.tri_flags &= ~TCP_FLAG_SYN;
  2589. RcvInfo.tri_seq++;
  2590. RcvInfo.tri_urgent--;
  2591. }
  2592. //
  2593. // Advance the receive buffer to point at the new data.
  2594. //
  2595. AmountToClip = RcvTCB->tcb_rcvnext - RcvInfo.tri_seq;
  2596. ASSERT(AmountToClip >= 0);
  2597. //
  2598. // If there's a FIN on this segment, account for it.
  2599. //
  2600. FinByte = ((RcvInfo.tri_flags & TCP_FLAG_FIN) ? 1: 0);
  2601. if (AmountToClip >= (((int) Size) + FinByte)) {
  2602. //
  2603. // Falls entirely before the window. We have more special
  2604. // case code here - if the ack number acks something,
  2605. // we'll go ahead and take it, faking the sequence number
  2606. // to be rcvnext. This prevents problems on full duplex
  2607. // connections, where data has been received but not acked,
  2608. // and retransmission timers reset the seq number to
  2609. // below our rcvnext.
  2610. //
  2611. if ((RcvInfo.tri_flags & TCP_FLAG_ACK) &&
  2612. SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  2613. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  2614. //
  2615. // This contains valid ACK info. Fudge the information
  2616. // to get through the rest of this.
  2617. //
  2618. Size = 0;
  2619. AmountToClip = 0;
  2620. RcvInfo.tri_seq = RcvTCB->tcb_rcvnext;
  2621. RcvInfo.tri_flags &= ~(TCP_FLAG_SYN | TCP_FLAG_FIN |
  2622. TCP_FLAG_RST | TCP_FLAG_URG);
  2623. #if DBG
  2624. FinByte = 1; // Fake out assert below.
  2625. #endif
  2626. } else {
  2627. ACKAndDrop(&RcvInfo, RcvTCB);
  2628. return IP_PROTOCOL_NONE;
  2629. }
  2630. }
  2631. //
  2632. // Trim what we have to. If we can't trim enough, the frame
  2633. // is too short. This shouldn't happen, but it it does we'll
  2634. // drop the frame.
  2635. //
  2636. Size -= AmountToClip;
  2637. RcvInfo.tri_seq += AmountToClip;
  2638. RcvInfo.tri_urgent -= AmountToClip;
  2639. Packet = TrimPacket(Packet, AmountToClip);
  2640. if (*(int *)&RcvInfo.tri_urgent < 0) {
  2641. RcvInfo.tri_urgent = 0;
  2642. RcvInfo.tri_flags &= ~TCP_FLAG_URG;
  2643. }
  2644. }
  2645. //
  2646. // We've made sure the front is OK. Now make sure part of it
  2647. // doesn't fall after the window. If it does, we'll truncate the
  2648. // frame (removing the FIN, if any). If we truncate the whole
  2649. // frame we'll ACKAndDrop it.
  2650. //
  2651. StateSize = Size + ((RcvInfo.tri_flags & TCP_FLAG_SYN) ? 1: 0) +
  2652. ((RcvInfo.tri_flags & TCP_FLAG_FIN) ? 1: 0);
  2653. if (StateSize)
  2654. StateSize--;
  2655. //
  2656. // Now the incoming sequence number (RcvInfo.tri_seq) + StateSize
  2657. // it the last sequence number in the segment. If this is greater
  2658. // than the last valid byte in the window, we have some overlap
  2659. // to chop off.
  2660. //
  2661. ASSERT(StateSize >= 0);
  2662. LastValidSeq = RcvTCB->tcb_rcvnext + RcvTCB->tcb_rcvwin - 1;
  2663. if (SEQ_GT(RcvInfo.tri_seq + StateSize, LastValidSeq)) {
  2664. int AmountToChop;
  2665. //
  2666. // At least some part of the frame is outside of our window.
  2667. // See if it starts outside our window.
  2668. //
  2669. if (SEQ_GT(RcvInfo.tri_seq, LastValidSeq)) {
  2670. //
  2671. // Falls entirely outside the window. We have special
  2672. // case code to deal with a pure ack that falls exactly at
  2673. // our right window edge. Otherwise we ack and drop it.
  2674. //
  2675. if (!SEQ_EQ(RcvInfo.tri_seq, LastValidSeq+1) || Size != 0
  2676. || (RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_FIN))) {
  2677. ACKAndDrop(&RcvInfo, RcvTCB);
  2678. return IP_PROTOCOL_NONE;
  2679. }
  2680. } else {
  2681. //
  2682. // At least some part of it is in the window. If there's a
  2683. // FIN, chop that off and see if that moves us inside.
  2684. //
  2685. if (RcvInfo.tri_flags & TCP_FLAG_FIN) {
  2686. RcvInfo.tri_flags &= ~TCP_FLAG_FIN;
  2687. StateSize--;
  2688. }
  2689. //
  2690. // Now figure out how much to chop off.
  2691. //
  2692. AmountToChop = (RcvInfo.tri_seq + StateSize) - LastValidSeq;
  2693. ASSERT(AmountToChop >= 0);
  2694. Size -= AmountToChop;
  2695. }
  2696. }
  2697. } else {
  2698. if (!SEQ_EQ(RcvTCB->tcb_rcvnext, RcvInfo.tri_seq)) {
  2699. //
  2700. // If there's a RST on this segment, and he's only off by 1,
  2701. // take it anyway. This can happen if the remote peer is
  2702. // probing and sends with the seq number after the probe.
  2703. //
  2704. if (!(RcvInfo.tri_flags & TCP_FLAG_RST) ||
  2705. !(SEQ_EQ(RcvTCB->tcb_rcvnext, (RcvInfo.tri_seq - 1)))) {
  2706. ACKAndDrop(&RcvInfo, RcvTCB);
  2707. return IP_PROTOCOL_NONE;
  2708. } else
  2709. RcvInfo.tri_seq = RcvTCB->tcb_rcvnext;
  2710. }
  2711. //
  2712. // He's in sequence, but we have a window of 0. Truncate the
  2713. // size, and clear any sequence consuming bits.
  2714. //
  2715. if (Size != 0 || (RcvInfo.tri_flags &
  2716. (TCP_FLAG_SYN | TCP_FLAG_FIN))) {
  2717. RcvInfo.tri_flags &= ~(TCP_FLAG_SYN | TCP_FLAG_FIN);
  2718. Size = 0;
  2719. if (!(RcvInfo.tri_flags & TCP_FLAG_RST))
  2720. DelayAction(RcvTCB, NEED_ACK);
  2721. }
  2722. }
  2723. //
  2724. // At this point, the segment is in our window and does not overlap
  2725. // on either end. If it's the next sequence number we expect, we can
  2726. // handle the data now. Otherwise we'll queue it for later. In either
  2727. // case we'll handle RST and ACK information right now.
  2728. //
  2729. ASSERT((*(int *)&Size) >= 0);
  2730. //
  2731. // Now, following 793, we check the RST bit.
  2732. //
  2733. if (RcvInfo.tri_flags & TCP_FLAG_RST) {
  2734. uchar Reason;
  2735. //
  2736. // We can't go back into the LISTEN state from SYN-RCVD here,
  2737. // because we may have notified the client via a listen completing
  2738. // or a connect indication. So, if came from an active open we'll
  2739. // give back a 'connection refused' notice. For all other cases
  2740. // we'll just destroy the connection.
  2741. //
  2742. if (RcvTCB->tcb_state == TCB_SYN_RCVD) {
  2743. if (RcvTCB->tcb_flags & ACTIVE_OPEN)
  2744. Reason = TCB_CLOSE_REFUSED;
  2745. else
  2746. Reason = TCB_CLOSE_RST;
  2747. } else
  2748. Reason = TCB_CLOSE_RST;
  2749. TryToCloseTCB(RcvTCB, Reason, DISPATCH_LEVEL);
  2750. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2751. if (RcvTCB->tcb_state != TCB_TIME_WAIT) {
  2752. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2753. RemoveTCBFromConn(RcvTCB);
  2754. NotifyOfDisc(RcvTCB, TDI_CONNECTION_RESET);
  2755. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2756. }
  2757. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2758. return IP_PROTOCOL_NONE;
  2759. }
  2760. //
  2761. // Next check the SYN bit.
  2762. //
  2763. if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
  2764. //
  2765. // Again, we can't quietly go back into the LISTEN state here, even
  2766. // if we came from a passive open.
  2767. //
  2768. TryToCloseTCB(RcvTCB, TCB_CLOSE_ABORTED, DISPATCH_LEVEL);
  2769. SendRSTFromHeader(TCP, Size, Packet->SrcAddr, SrcScopeId,
  2770. AlignAddr(&Packet->IP->Dest), DestScopeId);
  2771. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2772. if (RcvTCB->tcb_state != TCB_TIME_WAIT) {
  2773. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2774. RemoveTCBFromConn(RcvTCB);
  2775. NotifyOfDisc(RcvTCB, TDI_CONNECTION_RESET);
  2776. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2777. }
  2778. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2779. return IP_PROTOCOL_NONE;
  2780. }
  2781. //
  2782. // Check the ACK field. If it's not on drop the segment.
  2783. //
  2784. if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
  2785. uint UpdateWindow;
  2786. //
  2787. // If we're in SYN-RCVD, go to ESTABLISHED.
  2788. //
  2789. if (RcvTCB->tcb_state == TCB_SYN_RCVD) {
  2790. if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  2791. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  2792. //
  2793. // The ack is valid.
  2794. //
  2795. if (SynAttackProtect) {
  2796. //
  2797. // If we have not yet indicated this
  2798. // Connection to upper layer, do it now.
  2799. //
  2800. if (RcvTCB->tcb_flags & ACCEPT_PENDING) {
  2801. AddrObj *AO;
  2802. BOOLEAN Status=FALSE;
  2803. //
  2804. // We already have a refcnt on this TCB.
  2805. //
  2806. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2807. //
  2808. // Check if we still have the listening endpoint.
  2809. //
  2810. KeAcquireSpinLockAtDpcLevel(&AddrObjTableLock);
  2811. AO = GetBestAddrObj(AlignAddr(&Packet->IP->Dest),
  2812. DestScopeId, TCP->tcp_dest,
  2813. IP_PROTOCOL_TCP, NTE->IF);
  2814. if (AO != NULL) {
  2815. Status = DelayedAcceptConn(AO,Packet->SrcAddr,
  2816. SrcScopeId, TCP->tcp_src,RcvTCB);
  2817. } else {
  2818. KeReleaseSpinLockFromDpcLevel(&AddrObjTableLock);
  2819. }
  2820. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2821. if (!Status) {
  2822. //
  2823. // Delayed Accepance failed. Send RST.
  2824. //
  2825. RcvTCB->tcb_refcnt--;
  2826. TryToCloseTCB(RcvTCB, TCB_CLOSE_REFUSED, DISPATCH_LEVEL);
  2827. SendRSTFromHeader(TCP, Packet->TotalSize,
  2828. Packet->SrcAddr, SrcScopeId,
  2829. AlignAddr(&Packet->IP->Dest), DestScopeId);
  2830. return IP_SUCCESS;
  2831. } else {
  2832. RcvTCB->tcb_flags &= ~ACCEPT_PENDING;
  2833. }
  2834. }
  2835. }
  2836. RcvTCB->tcb_rexmitcnt = 0;
  2837. STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
  2838. RcvTCB->tcb_senduna++;
  2839. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  2840. RcvTCB->tcb_maxwin = RcvInfo.tri_window;
  2841. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  2842. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  2843. GoToEstab(RcvTCB);
  2844. //
  2845. // We know our peer received our SYN.
  2846. //
  2847. if (RcvTCB->tcb_rce != NULL)
  2848. ConfirmForwardReachability(RcvTCB->tcb_rce);
  2849. //
  2850. // Now complete whatever we can here.
  2851. //
  2852. CompleteConnReq(RcvTCB, TDI_SUCCESS);
  2853. } else {
  2854. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2855. SendRSTFromHeader(TCP, Size, Packet->SrcAddr, SrcScopeId,
  2856. AlignAddr(&Packet->IP->Dest), DestScopeId);
  2857. return IP_PROTOCOL_NONE;
  2858. }
  2859. } else {
  2860. //
  2861. // We're not in SYN-RCVD. See if this acknowledges anything.
  2862. //
  2863. if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  2864. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  2865. uint CWin;
  2866. //
  2867. // The ack acknowledes something. Pull the
  2868. // appropriate amount off the send q.
  2869. //
  2870. ACKData(RcvTCB, RcvInfo.tri_ack);
  2871. //
  2872. // If this acknowledges something we were running a RTT on,
  2873. // update that stuff now.
  2874. //
  2875. if (RcvTCB->tcb_rtt != 0 && SEQ_GT(RcvInfo.tri_ack,
  2876. RcvTCB->tcb_rttseq)) {
  2877. short RTT;
  2878. RTT = (short)(TCPTime - RcvTCB->tcb_rtt);
  2879. RcvTCB->tcb_rtt = 0;
  2880. RTT -= (RcvTCB->tcb_smrtt >> 3);
  2881. RcvTCB->tcb_smrtt += RTT;
  2882. RTT = (RTT >= 0 ? RTT : -RTT);
  2883. RTT -= (RcvTCB->tcb_delta >> 3);
  2884. RcvTCB->tcb_delta += RTT + RTT;
  2885. RcvTCB->tcb_rexmit = MIN(MAX(REXMIT_TO(RcvTCB),
  2886. MIN_RETRAN_TICKS),
  2887. MAX_REXMIT_TO);
  2888. }
  2889. //
  2890. // If we're probing for a PMTU black hole then we've found
  2891. // one, so turn off the detection. The size is already
  2892. // down, so leave it there.
  2893. //
  2894. if (RcvTCB->tcb_flags & PMTU_BH_PROBE) {
  2895. RcvTCB->tcb_flags &= ~PMTU_BH_PROBE;
  2896. RcvTCB->tcb_bhprobecnt = 0;
  2897. if (--(RcvTCB->tcb_slowcount) == 0) {
  2898. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  2899. CheckTCBRcv(RcvTCB);
  2900. }
  2901. }
  2902. if ((RcvTCB->tcb_dupacks >= MaxDupAcks) &&
  2903. ((int)RcvTCB->tcb_ssthresh > 0)) {
  2904. //
  2905. // We were in fast retransmit mode, so this ACK is for
  2906. // our fast retransmitted frame. Set cwin to ssthresh
  2907. // so that cwin grows linearly from here.
  2908. //
  2909. RcvTCB->tcb_cwin = RcvTCB->tcb_ssthresh;
  2910. } else {
  2911. //
  2912. // Update the congestion window now.
  2913. //
  2914. CWin = RcvTCB->tcb_cwin;
  2915. if (CWin < RcvTCB->tcb_maxwin) {
  2916. if (CWin < RcvTCB->tcb_ssthresh)
  2917. CWin += RcvTCB->tcb_mss;
  2918. else
  2919. CWin += (RcvTCB->tcb_mss * RcvTCB->tcb_mss)/CWin;
  2920. RcvTCB->tcb_cwin = MIN(CWin, RcvTCB->tcb_maxwin);
  2921. }
  2922. }
  2923. ASSERT(*(int *)&RcvTCB->tcb_cwin > 0);
  2924. //
  2925. // Since this isn't a duplicate ACK, reset the counter.
  2926. //
  2927. RcvTCB->tcb_dupacks = 0;
  2928. //
  2929. // We've acknowledged something, so reset the rexmit count.
  2930. // If there's still stuff outstanding, restart the rexmit
  2931. // timer.
  2932. //
  2933. RcvTCB->tcb_rexmitcnt = 0;
  2934. if (!SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax))
  2935. START_TCB_TIMER(RcvTCB->tcb_rexmittimer,
  2936. RcvTCB->tcb_rexmit);
  2937. else
  2938. STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
  2939. //
  2940. // If we've sent a FIN, and this acknowledges it, we
  2941. // need to complete the client's close request and
  2942. // possibly transition our state.
  2943. //
  2944. if (RcvTCB->tcb_flags & FIN_SENT) {
  2945. //
  2946. // We have sent a FIN. See if it's been acknowledged.
  2947. // Once we've sent a FIN, tcb_sendmax can't advance,
  2948. // so our FIN must have sequence num tcb_sendmax - 1.
  2949. // Thus our FIN is acknowledged if the incoming ack is
  2950. // equal to tcb_sendmax.
  2951. //
  2952. if (SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  2953. //
  2954. // He's acked our FIN. Turn off the flags,
  2955. // and complete the request. We'll leave the
  2956. // FIN_OUTSTANDING flag alone, to force early
  2957. // outs in the send code.
  2958. //
  2959. RcvTCB->tcb_flags &= ~(FIN_NEEDED | FIN_SENT);
  2960. ASSERT(RcvTCB->tcb_unacked == 0);
  2961. ASSERT(RcvTCB->tcb_sendnext == RcvTCB->tcb_sendmax);
  2962. //
  2963. // Now figure out what we need to do. In FIN_WAIT1
  2964. // or FIN_WAIT, just complete the disconnect
  2965. // request and continue. Otherwise, it's a bit
  2966. // trickier, since we can't complete the connreq
  2967. // until we remove the TCB from it's connection.
  2968. //
  2969. switch (RcvTCB->tcb_state) {
  2970. case TCB_FIN_WAIT1:
  2971. RcvTCB->tcb_state = TCB_FIN_WAIT2;
  2972. CompleteConnReq(RcvTCB, TDI_SUCCESS);
  2973. //
  2974. // Start a timer in case we never get
  2975. // out of FIN_WAIT2. Set the retransmit
  2976. // count high to force a timeout the
  2977. // first time the timer fires.
  2978. //
  2979. RcvTCB->tcb_rexmitcnt = (uchar)MaxDataRexmitCount;
  2980. START_TCB_TIMER(RcvTCB->tcb_rexmittimer,
  2981. (ushort)FinWait2TO);
  2982. // Fall through to FIN-WAIT-2 processing.
  2983. case TCB_FIN_WAIT2:
  2984. break;
  2985. case TCB_CLOSING:
  2986. GracefulClose(RcvTCB, TRUE, FALSE, DISPATCH_LEVEL);
  2987. return IP_PROTOCOL_NONE;
  2988. break;
  2989. case TCB_LAST_ACK:
  2990. GracefulClose(RcvTCB, FALSE, FALSE,
  2991. DISPATCH_LEVEL);
  2992. return IP_PROTOCOL_NONE;
  2993. break;
  2994. default:
  2995. KdBreakPoint();
  2996. break;
  2997. }
  2998. }
  2999. }
  3000. UpdateWindow = TRUE;
  3001. } else {
  3002. //
  3003. // It doesn't ack anything. If we're in FIN_WAIT2,
  3004. // we'll restart the timer. We don't make this check
  3005. // above because we know no data can be acked when we're
  3006. // in FIN_WAIT2.
  3007. //
  3008. if (RcvTCB->tcb_state == TCB_FIN_WAIT2)
  3009. START_TCB_TIMER(RcvTCB->tcb_rexmittimer, (ushort)FinWait2TO);
  3010. //
  3011. // If it's an ack for something larger than
  3012. // we've sent then ACKAndDrop it.
  3013. //
  3014. if (SEQ_GT(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  3015. ACKAndDrop(&RcvInfo, RcvTCB);
  3016. return IP_PROTOCOL_NONE;
  3017. }
  3018. //
  3019. // If it is a pure duplicate ack, check if we should
  3020. // do a fast retransmit.
  3021. //
  3022. if ((Size == 0) &&
  3023. SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  3024. SEQ_LT(RcvTCB->tcb_senduna, RcvTCB->tcb_sendmax) &&
  3025. (RcvTCB->tcb_sendwin == RcvInfo.tri_window) &&
  3026. RcvInfo.tri_window) {
  3027. //
  3028. // See if fast rexmit can be done.
  3029. //
  3030. if (HandleFastXmit(RcvTCB, &RcvInfo)) {
  3031. return IP_PROTOCOL_NONE;
  3032. }
  3033. } else {
  3034. //
  3035. // Not a pure duplicate ack (Size != 0 or peer is
  3036. // advertising a new window). Reset counter.
  3037. //
  3038. RcvTCB->tcb_dupacks = 0;
  3039. //
  3040. // See if we should update the window.
  3041. //
  3042. if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  3043. (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
  3044. (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
  3045. SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))){
  3046. UpdateWindow = TRUE;
  3047. } else
  3048. UpdateWindow = FALSE;
  3049. }
  3050. }
  3051. if (UpdateWindow) {
  3052. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  3053. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
  3054. RcvInfo.tri_window);
  3055. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  3056. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  3057. if (RcvInfo.tri_window == 0) {
  3058. //
  3059. // We've got a zero window.
  3060. //
  3061. if (!EMPTYQ(&RcvTCB->tcb_sendq)) {
  3062. RcvTCB->tcb_flags &= ~NEED_OUTPUT;
  3063. RcvTCB->tcb_rexmitcnt = 0;
  3064. START_TCB_TIMER(RcvTCB->tcb_rexmittimer,
  3065. RcvTCB->tcb_rexmit);
  3066. if (!(RcvTCB->tcb_flags & FLOW_CNTLD)) {
  3067. RcvTCB->tcb_flags |= FLOW_CNTLD;
  3068. RcvTCB->tcb_slowcount++;
  3069. RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  3070. CheckTCBRcv(RcvTCB);
  3071. }
  3072. }
  3073. } else {
  3074. if (RcvTCB->tcb_flags & FLOW_CNTLD) {
  3075. RcvTCB->tcb_rexmitcnt = 0;
  3076. RcvTCB->tcb_flags &= ~(FLOW_CNTLD | FORCE_OUTPUT);
  3077. //
  3078. // Reset send next to the left edge of the window,
  3079. // because it might be at senduna+1 if we've been
  3080. // probing.
  3081. //
  3082. ResetSendNext(RcvTCB, RcvTCB->tcb_senduna);
  3083. if (--(RcvTCB->tcb_slowcount) == 0) {
  3084. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  3085. CheckTCBRcv(RcvTCB);
  3086. }
  3087. }
  3088. //
  3089. // Since we've updated the window, see if we can send
  3090. // some more.
  3091. //
  3092. if (RcvTCB->tcb_unacked != 0 ||
  3093. (RcvTCB->tcb_flags & FIN_NEEDED))
  3094. DelayAction(RcvTCB, NEED_OUTPUT);
  3095. }
  3096. }
  3097. }
  3098. //
  3099. // We've handled all the acknowledgment stuff. If the size
  3100. // is greater than 0 or important bits are set process it further,
  3101. // otherwise it's a pure ack and we're done with it.
  3102. //
  3103. if (Size > 0 || (RcvInfo.tri_flags & TCP_FLAG_FIN)) {
  3104. //
  3105. // If we're not in a state where we can process incoming data
  3106. // or FINs, there's no point in going further. Just send an
  3107. // ack and drop this segment.
  3108. //
  3109. if (!DATA_RCV_STATE(RcvTCB->tcb_state) ||
  3110. (RcvTCB->tcb_flags & GC_PENDING)) {
  3111. ACKAndDrop(&RcvInfo, RcvTCB);
  3112. return IP_PROTOCOL_NONE;
  3113. }
  3114. //
  3115. // If our peer is sending into an expanded window, then our
  3116. // peer must have received our ACK advertising said window.
  3117. // Take this as proof of forward reachability.
  3118. // Note: we have no guarantee this is timely.
  3119. //
  3120. if (SEQ_GTE(RcvInfo.tri_seq + (int)Size,
  3121. RcvTCB->tcb_rcvwinwatch)) {
  3122. RcvTCB->tcb_rcvwinwatch = RcvTCB->tcb_rcvnext +
  3123. RcvTCB->tcb_rcvwin;
  3124. if (RcvTCB->tcb_rce != NULL)
  3125. ConfirmForwardReachability(RcvTCB->tcb_rce);
  3126. }
  3127. //
  3128. // If it's in sequence process it now, otherwise reassemble it.
  3129. //
  3130. if (SEQ_EQ(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) {
  3131. //
  3132. // If we're already in the receive handler, this is a
  3133. // duplicate. We'll just toss it.
  3134. //
  3135. if (RcvTCB->tcb_fastchk & TCP_FLAG_IN_RCV) {
  3136. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3137. return IP_PROTOCOL_NONE;
  3138. }
  3139. RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
  3140. //
  3141. // Now loop, pulling things from the reassembly queue,
  3142. // until the queue is empty, or we can't take all of the
  3143. // data, or we hit a FIN.
  3144. //
  3145. do {
  3146. //
  3147. // Handle urgent data, if any.
  3148. //
  3149. if (RcvInfo.tri_flags & TCP_FLAG_URG) {
  3150. HandleUrgent(RcvTCB, &RcvInfo, Packet, &Size);
  3151. //
  3152. // Since we may have freed the lock, we need to
  3153. // recheck and see if we're closing here.
  3154. //
  3155. if (CLOSING(RcvTCB))
  3156. break;
  3157. }
  3158. //
  3159. // OK, the data is in sequence, we've updated the
  3160. // reassembly queue and handled any urgent data. If we
  3161. // have any data go ahead and process it now.
  3162. //
  3163. if (Size > 0) {
  3164. BytesTaken = (*RcvTCB->tcb_rcvhndlr)
  3165. (RcvTCB, RcvInfo.tri_flags, Packet, Size);
  3166. RcvTCB->tcb_rcvnext += BytesTaken;
  3167. RcvTCB->tcb_rcvwin -= BytesTaken;
  3168. CheckTCBRcv(RcvTCB);
  3169. if (RcvTCB->tcb_flags & ACK_DELAYED)
  3170. DelayAction(RcvTCB, NEED_ACK);
  3171. else {
  3172. RcvTCB->tcb_flags |= ACK_DELAYED;
  3173. START_TCB_TIMER(RcvTCB->tcb_delacktimer,
  3174. DEL_ACK_TICKS);
  3175. }
  3176. if (BytesTaken != Size) {
  3177. //
  3178. // We didn't take everything we could. No
  3179. // use in further processing, just bail
  3180. // out.
  3181. //
  3182. DelayAction(RcvTCB, NEED_ACK);
  3183. break;
  3184. }
  3185. //
  3186. // If we're closing now, we're done, so get out.
  3187. //
  3188. if (CLOSING(RcvTCB))
  3189. break;
  3190. }
  3191. //
  3192. // See if we need to advance over some urgent data.
  3193. //
  3194. if (RcvTCB->tcb_flags & URG_VALID) {
  3195. uint AdvanceNeeded;
  3196. //
  3197. // We only need to advance if we're not doing
  3198. // urgent inline. Urgent inline also has some
  3199. // implications for when we can clear the URG_VALID
  3200. // flag. If we're not doing urgent inline, we can
  3201. // clear it when rcvnext advances beyond urgent
  3202. // end. If we are doing urgent inline, we clear it
  3203. // when rcvnext advances one receive window beyond
  3204. // urgend.
  3205. //
  3206. if (!(RcvTCB->tcb_flags & URG_INLINE)) {
  3207. if (RcvTCB->tcb_rcvnext == RcvTCB->tcb_urgstart) {
  3208. RcvTCB->tcb_rcvnext = RcvTCB->tcb_urgend + 1;
  3209. } else {
  3210. ASSERT(SEQ_LT(RcvTCB->tcb_rcvnext,
  3211. RcvTCB->tcb_urgstart) ||
  3212. SEQ_GT(RcvTCB->tcb_rcvnext,
  3213. RcvTCB->tcb_urgend));
  3214. }
  3215. AdvanceNeeded = 0;
  3216. } else
  3217. AdvanceNeeded = RcvTCB->tcb_defaultwin;
  3218. //
  3219. // See if we can clear the URG_VALID flag.
  3220. //
  3221. if (SEQ_GT(RcvTCB->tcb_rcvnext - AdvanceNeeded,
  3222. RcvTCB->tcb_urgend)) {
  3223. RcvTCB->tcb_flags &= ~URG_VALID;
  3224. if (--(RcvTCB->tcb_slowcount) == 0) {
  3225. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  3226. CheckTCBRcv(RcvTCB);
  3227. }
  3228. }
  3229. }
  3230. //
  3231. // We've handled the data. If the FIN bit is set, we
  3232. // have more processing.
  3233. //
  3234. if (RcvInfo.tri_flags & TCP_FLAG_FIN) {
  3235. uint Notify = FALSE;
  3236. RcvTCB->tcb_rcvnext++;
  3237. DelayAction(RcvTCB, NEED_ACK);
  3238. PushData(RcvTCB);
  3239. switch (RcvTCB->tcb_state) {
  3240. case TCB_SYN_RCVD:
  3241. //
  3242. // I don't think we can get here - we
  3243. // should have discarded the frame if it
  3244. // had no ACK, or gone to established if
  3245. // it did.
  3246. //
  3247. KdBreakPoint();
  3248. case TCB_ESTAB:
  3249. RcvTCB->tcb_state = TCB_CLOSE_WAIT;
  3250. //
  3251. // We left established, we're off the
  3252. // fast path.
  3253. //
  3254. RcvTCB->tcb_slowcount++;
  3255. RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  3256. CheckTCBRcv(RcvTCB);
  3257. Notify = TRUE;
  3258. break;
  3259. case TCB_FIN_WAIT1:
  3260. RcvTCB->tcb_state = TCB_CLOSING;
  3261. Notify = TRUE;
  3262. break;
  3263. case TCB_FIN_WAIT2:
  3264. //
  3265. // Stop the FIN_WAIT2 timer.
  3266. //
  3267. STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
  3268. RcvTCB->tcb_refcnt++;
  3269. GracefulClose(RcvTCB, TRUE, TRUE, DISPATCH_LEVEL);
  3270. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  3271. break;
  3272. default:
  3273. KdBreakPoint();
  3274. break;
  3275. }
  3276. if (Notify) {
  3277. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  3278. NotifyOfDisc(RcvTCB, TDI_GRACEFUL_DISC);
  3279. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  3280. }
  3281. break; // Exit out of WHILE loop.
  3282. }
  3283. //
  3284. // If the reassembly queue isn't empty, get what we
  3285. // can now.
  3286. //
  3287. Packet = PullFromRAQ(RcvTCB, &RcvInfo, &Size);
  3288. CheckPacketList(Packet, Size);
  3289. } while (Packet != NULL);
  3290. RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
  3291. if (RcvTCB->tcb_flags & SEND_AFTER_RCV) {
  3292. RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
  3293. DelayAction(RcvTCB, NEED_OUTPUT);
  3294. }
  3295. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3296. return IP_PROTOCOL_NONE;
  3297. } else {
  3298. //
  3299. // It's not in sequence. Since it needs further
  3300. // processing, put in on the reassembly queue.
  3301. //
  3302. if (DATA_RCV_STATE(RcvTCB->tcb_state) &&
  3303. !(RcvTCB->tcb_flags & GC_PENDING)) {
  3304. PutOnRAQ(RcvTCB, &RcvInfo, Packet, Size);
  3305. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  3306. SendACK(RcvTCB);
  3307. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  3308. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3309. } else
  3310. ACKAndDrop(&RcvInfo, RcvTCB);
  3311. return IP_PROTOCOL_NONE;
  3312. }
  3313. }
  3314. } else {
  3315. //
  3316. // No ACK. Just drop the segment and return.
  3317. //
  3318. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3319. return IP_PROTOCOL_NONE;
  3320. }
  3321. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3322. return IP_PROTOCOL_NONE;
  3323. }
  3324. //* TCPControlReceive - handler for TCP control messages.
  3325. //
  3326. // This routine is called if we receive an ICMPv6 error message that
  3327. // was generated by some remote site as a result of receiving a TCP
  3328. // packet from us.
  3329. //
  3330. uchar
  3331. TCPControlReceive(
  3332. IPv6Packet *Packet, // Packet handed to us by ICMPv6ErrorReceive.
  3333. StatusArg *StatArg) // Error Code, Argument, and invoking IP header.
  3334. {
  3335. KIRQL Irql0, Irql1; // One per lock nesting level.
  3336. TCB *StatusTCB;
  3337. SeqNum DropSeq;
  3338. TCPHeader UNALIGNED *InvokingTCP;
  3339. Interface *IF = Packet->NTEorIF->IF;
  3340. uint SrcScopeId, DestScopeId;
  3341. //
  3342. // The next thing in the packet should be the TCP header of the
  3343. // original packet which invoked this error.
  3344. //
  3345. if (! PacketPullup(Packet, sizeof(TCPHeader), 1, 0)) {
  3346. // Pullup failed.
  3347. if (Packet->TotalSize < sizeof(TCPHeader))
  3348. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_BAD_PACKET,
  3349. "TCPv6: Packet too small to contain TCP header "
  3350. "from invoking packet\n"));
  3351. return IP_PROTOCOL_NONE; // Drop packet.
  3352. }
  3353. InvokingTCP = (TCPHeader UNALIGNED *)Packet->Data;
  3354. //
  3355. // Determining the scope identifiers for the addresses in the
  3356. // invoking packet is potentially problematic, since we have
  3357. // no way to be certain which interface we sent the packet on.
  3358. // Use the interface the icmp error arrived on to determine
  3359. // the scope ids for both the local and remote addresses.
  3360. //
  3361. SrcScopeId = DetermineScopeId(AlignAddr(&StatArg->IP->Source), IF);
  3362. DestScopeId = DetermineScopeId(AlignAddr(&StatArg->IP->Dest), IF);
  3363. //
  3364. // Find the TCB for the connection this packet was sent on.
  3365. //
  3366. KeAcquireSpinLock(&TCBTableLock, &Irql0);
  3367. StatusTCB = FindTCB(AlignAddr(&StatArg->IP->Source),
  3368. AlignAddr(&StatArg->IP->Dest),
  3369. SrcScopeId, DestScopeId,
  3370. InvokingTCP->tcp_src, InvokingTCP->tcp_dest);
  3371. if (StatusTCB != NULL) {
  3372. //
  3373. // Found one. Get the lock on it, and continue.
  3374. //
  3375. CHECK_STRUCT(StatusTCB, tcb);
  3376. KeAcquireSpinLock(&StatusTCB->tcb_lock, &Irql1);
  3377. KeReleaseSpinLock(&TCBTableLock, Irql1);
  3378. //
  3379. // Make sure the TCB is in a state that is interesting.
  3380. //
  3381. // We also drop packets for TCBs where we don't already have
  3382. // an RCE, since any ICMP errors we get for packets we haven't
  3383. // sent are likely to be spoofed.
  3384. //
  3385. if (StatusTCB->tcb_state == TCB_CLOSED ||
  3386. StatusTCB->tcb_state == TCB_TIME_WAIT ||
  3387. CLOSING(StatusTCB) ||
  3388. StatusTCB->tcb_rce == NULL) {
  3389. //
  3390. // Connection is already closing, or too new to have sent
  3391. // anything yet. Leave it be.
  3392. //
  3393. KeReleaseSpinLock(&StatusTCB->tcb_lock, Irql0);
  3394. return IP_PROTOCOL_NONE; // Discard error packet.
  3395. }
  3396. switch (StatArg->Status) {
  3397. case IP_UNRECOGNIZED_NEXT_HEADER:
  3398. //
  3399. // Destination protocol unreachable.
  3400. // We treat this as a fatal errors. Close the connection.
  3401. //
  3402. StatusTCB->tcb_error = StatArg->Status;
  3403. StatusTCB->tcb_refcnt++;
  3404. TryToCloseTCB(StatusTCB, TCB_CLOSE_UNREACH, Irql0);
  3405. RemoveTCBFromConn(StatusTCB);
  3406. NotifyOfDisc(StatusTCB,
  3407. MapIPError(StatArg->Status, TDI_DEST_UNREACHABLE));
  3408. KeAcquireSpinLock(&StatusTCB->tcb_lock, &Irql1);
  3409. DerefTCB(StatusTCB, Irql1);
  3410. return IP_PROTOCOL_NONE; // Done with packet.
  3411. break;
  3412. case IP_DEST_NO_ROUTE:
  3413. case IP_DEST_ADDR_UNREACHABLE:
  3414. case IP_DEST_PORT_UNREACHABLE:
  3415. case IP_DEST_PROHIBITED:
  3416. case IP_BAD_ROUTE:
  3417. case IP_HOP_LIMIT_EXCEEDED:
  3418. case IP_REASSEMBLY_TIME_EXCEEDED:
  3419. case IP_PARAMETER_PROBLEM:
  3420. //
  3421. // Soft errors. Save the error in case it times out.
  3422. //
  3423. StatusTCB->tcb_error = StatArg->Status;
  3424. break;
  3425. case IP_PACKET_TOO_BIG: {
  3426. uint PMTU;
  3427. IF_TCPDBG(TCP_DEBUG_MSS) {
  3428. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
  3429. "TCPControlReceive: Got Packet Too Big\n"));
  3430. }
  3431. //
  3432. // We sent a TCP datagram which was too big for the path to
  3433. // our destination. That packet was dropped by the router
  3434. // which sent us this error message. The Arg value is TRUE
  3435. // if this Packet Too Big reduced our PMTU, FALSE otherwise.
  3436. //
  3437. if (!StatArg->Arg)
  3438. break;
  3439. //
  3440. // Our PMTU was reduced. Find out what it is now.
  3441. //
  3442. PMTU = GetEffectivePathMTUFromRCE(StatusTCB->tcb_rce);
  3443. //
  3444. // Update fields based on new PMTU.
  3445. //
  3446. StatusTCB->tcb_pmtu = PMTU;
  3447. StatusTCB->tcb_security = SecurityStateValidationCounter;
  3448. CalculateMSSForTCB(StatusTCB);
  3449. //
  3450. // Since our PMTU was reduced, we know that this is the first
  3451. // Packet Too Big we've received about this bottleneck.
  3452. // We should retransmit so long as this is for a legitimate
  3453. // outstanding packet (i.e. sequence number is is greater than
  3454. // the last acked and less than our current send next).
  3455. //
  3456. DropSeq = net_long(InvokingTCP->tcp_seq);
  3457. if ((SEQ_GTE(DropSeq, StatusTCB->tcb_senduna) &&
  3458. SEQ_LT(DropSeq, StatusTCB->tcb_sendnext))) {
  3459. //
  3460. // Need to initiate a retransmit.
  3461. //
  3462. ResetSendNext(StatusTCB, DropSeq);
  3463. //
  3464. // WINBUG #242757 11-27-2000 richdr TCP resp. to Packet Too Big
  3465. // RFC 1981 states that "a retransmission caused by a Packet
  3466. // Too Big message should not change the congestion window.
  3467. // It should, however, trigger the slow-start mechanism."
  3468. // The code below would appear to be broken. However, the
  3469. // IPv4 stack works this way.
  3470. //
  3471. //
  3472. // Set the congestion window to allow only one packet.
  3473. // This may prevent us from sending anything if we
  3474. // didn't just set sendnext to senduna. This is OK,
  3475. // we'll retransmit later, or send when we get an ack.
  3476. //
  3477. StatusTCB->tcb_cwin = StatusTCB->tcb_mss;
  3478. DelayAction(StatusTCB, NEED_OUTPUT);
  3479. }
  3480. }
  3481. break;
  3482. default:
  3483. // Should never happen.
  3484. KdBreakPoint();
  3485. break;
  3486. }
  3487. KeReleaseSpinLock(&StatusTCB->tcb_lock, Irql0);
  3488. } else {
  3489. //
  3490. // Couldn't find a matching TCB. Connection probably went away since
  3491. // we sent the offending packet. Just free the lock and return.
  3492. //
  3493. KeReleaseSpinLock(&TCBTableLock, Irql0);
  3494. }
  3495. return IP_PROTOCOL_NONE; // Done with packet.
  3496. }
  3497. #pragma BEGIN_INIT
  3498. //* InitTCPRcv - Initialize TCP receive side.
  3499. //
  3500. // Called during init time to initialize our TCP receive side.
  3501. //
  3502. int // Returns: TRUE.
  3503. InitTCPRcv(
  3504. void) // Nothing.
  3505. {
  3506. ExInitializeSListHead(&TCPRcvReqFree);
  3507. KeInitializeSpinLock(&RequestCompleteLock);
  3508. KeInitializeSpinLock(&TCBDelayLock);
  3509. KeInitializeSpinLock(&TCPRcvReqFreeLock);
  3510. INITQ(&ConnRequestCompleteQ);
  3511. INITQ(&SendCompleteQ);
  3512. INITQ(&TCBDelayQ);
  3513. RequestCompleteFlags = 0;
  3514. TCBDelayRtnCount = 0;
  3515. TCBDelayRtnLimit = (uint) KeNumberProcessors;
  3516. if (TCBDelayRtnLimit > TCB_DELAY_RTN_LIMIT)
  3517. TCBDelayRtnLimit = TCB_DELAY_RTN_LIMIT;
  3518. RtlZeroMemory(&DummyPacket, sizeof DummyPacket);
  3519. DummyPacket.Flags = PACKET_OURS;
  3520. return TRUE;
  3521. }
  3522. #pragma END_INIT
  3523. //* UnloadTCPRcv
  3524. //
  3525. // Cleanup and prepare for stack unload.
  3526. //
  3527. void
  3528. UnloadTCPRcv(void)
  3529. {
  3530. PSLIST_ENTRY BufferLink;
  3531. while ((BufferLink = ExInterlockedPopEntrySList(&TCPRcvReqFree,
  3532. &TCPRcvReqFreeLock))
  3533. != NULL) {
  3534. TCPRcvReq *RcvReq = CONTAINING_RECORD(BufferLink, TCPRcvReq, trr_next);
  3535. CHECK_STRUCT(RcvReq, trr);
  3536. ExFreePool(RcvReq);
  3537. }
  3538. }