Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4122 lines
155 KiB

  1. // -*- mode: C++; tab-width: 4; indent-tabs-mode: nil -*- (for GNU Emacs)
  2. //
  3. // Copyright (c) 1985-2000 Microsoft Corporation
  4. //
  5. // This file is part of the Microsoft Research IPv6 Network Protocol Stack.
  6. // You should have received a copy of the Microsoft End-User License Agreement
  7. // for this software along with this release; see the file "license.txt".
  8. // If not, please see http://www.research.microsoft.com/msripv6/license.htm,
  9. // or write to Microsoft Research, One Microsoft Way, Redmond, WA 98052-6399.
  10. //
  11. // Abstract:
  12. //
  13. // TCP receive code.
  14. //
  15. // This file contains the code for handling incoming TCP packets.
  16. //
  17. #include "oscfg.h"
  18. #include "ndis.h"
  19. #include "ip6imp.h"
  20. #include "ip6def.h"
  21. #include "icmp.h"
  22. #include "tdi.h"
  23. #include "tdint.h"
  24. #include "tdistat.h"
  25. #include "queue.h"
  26. #include "transprt.h"
  27. #include "addr.h"
  28. #include "tcp.h"
  29. #include "tcb.h"
  30. #include "tcpconn.h"
  31. #include "tcpsend.h"
  32. #include "tcprcv.h"
  33. #include "tcpdeliv.h"
  34. #include "info.h"
  35. #include "tcpcfg.h"
  36. #include "route.h"
  37. #include "security.h"
  38. uint RequestCompleteFlags;
  39. Queue ConnRequestCompleteQ;
  40. Queue SendCompleteQ;
  41. Queue TCBDelayQ;
  42. KSPIN_LOCK RequestCompleteLock;
  43. KSPIN_LOCK TCBDelayLock;
  44. ulong TCBDelayRtnCount;
  45. ulong TCBDelayRtnLimit;
  46. #define TCB_DELAY_RTN_LIMIT 4
  47. uint MaxDupAcks = 2;
  48. extern KSPIN_LOCK TCBTableLock;
  49. extern KSPIN_LOCK AddrObjTableLock;
  50. #define PERSIST_TIMEOUT MS_TO_TICKS(500)
  51. void ResetSendNext(TCB *SeqTCB, SeqNum NewSeq);
  52. NTSTATUS TCPPrepareIrpForCancel(PTCP_CONTEXT TcpContext, PIRP Irp,
  53. PDRIVER_CANCEL CancelRoutine);
  54. extern void TCPRequestComplete(void *Context, unsigned int Status,
  55. unsigned int UnUsed);
  56. VOID TCPCancelRequest(PDEVICE_OBJECT Device, PIRP Irp);
  57. //
  58. // All of the init code can be discarded.
  59. //
  60. #ifdef ALLOC_PRAGMA
  61. int InitTCPRcv(void);
  62. #pragma alloc_text(INIT, InitTCPRcv)
  63. #endif // ALLOC_PRAGMA
  64. //* AdjustRcvWin - Adjust the receive window on a TCB.
  65. //
  66. // A utility routine that adjusts the receive window to an even multiple of
  67. // the local segment size. We round it up to the next closest multiple, or
  68. // leave it alone if it's already an event multiple. We assume we have
  69. // exclusive access to the input TCB.
  70. //
  71. void // Returns: Nothing.
  72. AdjustRcvWin(
  73. TCB *WinTCB) // TCB to be adjusted.
  74. {
  75. ushort LocalMSS;
  76. uchar FoundMSS;
  77. ulong SegmentsInWindow;
  78. ASSERT(WinTCB->tcb_defaultwin != 0);
  79. ASSERT(WinTCB->tcb_rcvwin != 0);
  80. ASSERT(WinTCB->tcb_remmss != 0);
  81. if (WinTCB->tcb_flags & WINDOW_SET)
  82. return;
  83. #if 0
  84. //
  85. // First, get the local MSS by calling IP.
  86. //
  87. // REVIEW: IPv4 had code here to call down to IP to get the local MTU
  88. // REVIEW: corresponding to this source address. Result in "LocalMSS",
  89. // REVIEW: status of call in "FoundMSS".
  90. //
  91. // REVIEW: Why did they do this? tcb_mss is already set by this point!
  92. //
  93. if (!FoundMSS) {
  94. //
  95. // Didn't find it, error out.
  96. //
  97. ASSERT(FALSE);
  98. return;
  99. }
  100. LocalMSS -= sizeof(TCPHeader);
  101. LocalMSS = MIN(LocalMSS, WinTCB->tcb_remmss);
  102. #else
  103. LocalMSS = WinTCB->tcb_mss;
  104. #endif
  105. SegmentsInWindow = WinTCB->tcb_defaultwin / (ulong)LocalMSS;
  106. //
  107. // Make sure we have at least 4 segments in window, if that wouldn't make
  108. // the window too big.
  109. //
  110. if (SegmentsInWindow < 4) {
  111. //
  112. // We have fewer than four segments in the window. Round up to 4
  113. // if we can do so without exceeding the maximum window size; otherwise
  114. // use the maximum multiple that we can fit in 64K. The exception is
  115. // if we can only fit one integral multiple in the window - in that
  116. // case we'll use a window of 0xffff.
  117. //
  118. if (LocalMSS <= (0xffff/4)) {
  119. WinTCB->tcb_defaultwin = (uint)(4 * LocalMSS);
  120. } else {
  121. ulong SegmentsInMaxWindow;
  122. //
  123. // Figure out the maximum number of segments we could possibly
  124. // fit in a window. If this is > 1, use that as the basis for
  125. // our window size. Otherwise use a maximum size window.
  126. //
  127. SegmentsInMaxWindow = 0xffff/(ulong)LocalMSS;
  128. if (SegmentsInMaxWindow != 1)
  129. WinTCB->tcb_defaultwin = SegmentsInMaxWindow * (ulong)LocalMSS;
  130. else
  131. WinTCB->tcb_defaultwin = 0xffff;
  132. }
  133. WinTCB->tcb_rcvwin = WinTCB->tcb_defaultwin;
  134. } else {
  135. //
  136. // If it's not already an even multiple, bump the default and current
  137. // windows to the nearest multiple.
  138. //
  139. if ((SegmentsInWindow * (ulong)LocalMSS) != WinTCB->tcb_defaultwin) {
  140. ulong NewWindow;
  141. NewWindow = (SegmentsInWindow + 1) * (ulong)LocalMSS;
  142. // Don't let the new window be > 64K.
  143. if (NewWindow <= 0xffff) {
  144. WinTCB->tcb_defaultwin = (uint)NewWindow;
  145. WinTCB->tcb_rcvwin = (uint)NewWindow;
  146. }
  147. }
  148. }
  149. }
  150. //* CompleteRcvs - Complete receives on a TCB.
  151. //
  152. // Called when we need to complete receives on a TCB. We'll pull things from
  153. // the TCB's receive queue, as long as there are receives that have the PUSH
  154. // bit set.
  155. //
  156. void // Returns: Nothing.
  157. CompleteRcvs(
  158. TCB *CmpltTCB) // TCB to complete on.
  159. {
  160. KIRQL OldIrql;
  161. TCPRcvReq *CurrReq, *NextReq, *IndReq;
  162. CHECK_STRUCT(CmpltTCB, tcb);
  163. ASSERT(CmpltTCB->tcb_refcnt != 0);
  164. KeAcquireSpinLock(&CmpltTCB->tcb_lock, &OldIrql);
  165. if (!CLOSING(CmpltTCB) && !(CmpltTCB->tcb_flags & RCV_CMPLTING)
  166. && (CmpltTCB->tcb_rcvhead != NULL)) {
  167. CmpltTCB->tcb_flags |= RCV_CMPLTING;
  168. for (;;) {
  169. CurrReq = CmpltTCB->tcb_rcvhead;
  170. IndReq = NULL;
  171. do {
  172. CHECK_STRUCT(CurrReq, trr);
  173. if (CurrReq->trr_flags & TRR_PUSHED) {
  174. //
  175. // Need to complete this one. If this is the current
  176. // receive then advance the current receive to the next
  177. // one in the list. Then set the list head to the next
  178. // one in the list.
  179. //
  180. ASSERT(CurrReq->trr_amt != 0 ||
  181. !DATA_RCV_STATE(CmpltTCB->tcb_state));
  182. NextReq = CurrReq->trr_next;
  183. if (CmpltTCB->tcb_currcv == CurrReq)
  184. CmpltTCB->tcb_currcv = NextReq;
  185. CmpltTCB->tcb_rcvhead = NextReq;
  186. if (NextReq == NULL) {
  187. //
  188. // We've just removed the last buffer. Set the
  189. // rcvhandler to PendData, in case something
  190. // comes in during the callback.
  191. //
  192. ASSERT(CmpltTCB->tcb_rcvhndlr != IndicateData);
  193. CmpltTCB->tcb_rcvhndlr = PendData;
  194. }
  195. KeReleaseSpinLock(&CmpltTCB->tcb_lock, OldIrql);
  196. if (CurrReq->trr_uflags != NULL)
  197. *(CurrReq->trr_uflags) =
  198. TDI_RECEIVE_NORMAL | TDI_RECEIVE_ENTIRE_MESSAGE;
  199. (*CurrReq->trr_rtn)(CurrReq->trr_context, TDI_SUCCESS,
  200. CurrReq->trr_amt);
  201. if (IndReq != NULL)
  202. FreeRcvReq(CurrReq);
  203. else
  204. IndReq = CurrReq;
  205. KeAcquireSpinLock(&CmpltTCB->tcb_lock, &OldIrql);
  206. CurrReq = CmpltTCB->tcb_rcvhead;
  207. } else
  208. // This one isn't to be completed, so bail out.
  209. break;
  210. } while (CurrReq != NULL);
  211. //
  212. // Now see if we've completed all of the requests. If we have,
  213. // we may need to deal with pending data and/or reset the receive
  214. // handler.
  215. //
  216. if (CurrReq == NULL) {
  217. //
  218. // We've completed everything that can be, so stop the push
  219. // timer. We don't stop it if CurrReq isn't NULL because we
  220. // want to make sure later data is eventually pushed.
  221. //
  222. STOP_TCB_TIMER(CmpltTCB->tcb_pushtimer);
  223. ASSERT(IndReq != NULL);
  224. //
  225. // No more receive requests.
  226. //
  227. if (CmpltTCB->tcb_pendhead == NULL) {
  228. FreeRcvReq(IndReq);
  229. //
  230. // No pending data. Set the receive handler to either
  231. // PendData or IndicateData.
  232. //
  233. if (!(CmpltTCB->tcb_flags & (DISC_PENDING | GC_PENDING))) {
  234. if (CmpltTCB->tcb_rcvind != NULL &&
  235. CmpltTCB->tcb_indicated == 0)
  236. CmpltTCB->tcb_rcvhndlr = IndicateData;
  237. else
  238. CmpltTCB->tcb_rcvhndlr = PendData;
  239. } else {
  240. goto Complete_Notify;
  241. }
  242. } else {
  243. //
  244. // We have pending data to deal with.
  245. //
  246. if (CmpltTCB->tcb_rcvind != NULL &&
  247. CmpltTCB->tcb_indicated == 0) {
  248. //
  249. // There's a receive indicate handler on this TCB.
  250. // Call the indicate handler with the pending data.
  251. //
  252. IndicatePendingData(CmpltTCB, IndReq, OldIrql);
  253. SendACK(CmpltTCB);
  254. KeAcquireSpinLock(&CmpltTCB->tcb_lock, &OldIrql);
  255. //
  256. // See if a buffer has been posted. If so, we'll need
  257. // to check and see if it needs to be completed.
  258. //
  259. if (CmpltTCB->tcb_rcvhead != NULL)
  260. continue;
  261. else {
  262. //
  263. // If the pending head is now NULL, we've used up
  264. // all the data.
  265. //
  266. if (CmpltTCB->tcb_pendhead == NULL &&
  267. (CmpltTCB->tcb_flags &
  268. (DISC_PENDING | GC_PENDING)))
  269. goto Complete_Notify;
  270. }
  271. } else {
  272. //
  273. // No indicate handler, so nothing to do. The receive
  274. // handler should already be set to PendData.
  275. //
  276. FreeRcvReq(IndReq);
  277. ASSERT(CmpltTCB->tcb_rcvhndlr == PendData);
  278. }
  279. }
  280. } else {
  281. if (IndReq != NULL)
  282. FreeRcvReq(IndReq);
  283. ASSERT(CmpltTCB->tcb_rcvhndlr == BufferData);
  284. }
  285. break;
  286. }
  287. CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
  288. }
  289. KeReleaseSpinLock(&CmpltTCB->tcb_lock, OldIrql);
  290. return;
  291. Complete_Notify:
  292. //
  293. // Something is pending. Figure out what it is, and do it.
  294. //
  295. if (CmpltTCB->tcb_flags & GC_PENDING) {
  296. CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
  297. //
  298. // Bump the refcnt, because GracefulClose will deref the TCB
  299. // and we're not really done with it yet.
  300. //
  301. CmpltTCB->tcb_refcnt++;
  302. GracefulClose(CmpltTCB, CmpltTCB->tcb_flags & TW_PENDING, TRUE,
  303. OldIrql);
  304. } else
  305. if (CmpltTCB->tcb_flags & DISC_PENDING) {
  306. NotifyOfDisc(CmpltTCB, TDI_GRACEFUL_DISC, &OldIrql);
  307. KeAcquireSpinLock(&CmpltTCB->tcb_lock, &OldIrql);
  308. CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
  309. KeReleaseSpinLock(&CmpltTCB->tcb_lock, OldIrql);
  310. } else {
  311. ASSERT(FALSE);
  312. KeReleaseSpinLock(&CmpltTCB->tcb_lock, OldIrql);
  313. }
  314. return;
  315. }
  316. //* ProcessTCBDelayQ - Process TCBs on the delayed Q.
  317. //
  318. // Called at various times to process TCBs on the delayed Q.
  319. //
  320. void // Returns: Nothing.
  321. ProcessTCBDelayQ(
  322. void) // Nothing.
  323. {
  324. KIRQL OldIrql;
  325. TCB *DelayTCB;
  326. KeAcquireSpinLock(&TCBDelayLock, &OldIrql);
  327. //
  328. // Check for recursion. We do not stop recursion completely, only
  329. // limit it. This is done to allow multiple threads to process the
  330. // TCBDelayQ simultaneously.
  331. //
  332. TCBDelayRtnCount++;
  333. if (TCBDelayRtnCount > TCBDelayRtnLimit) {
  334. TCBDelayRtnCount--;
  335. KeReleaseSpinLock(&TCBDelayLock, OldIrql);
  336. return;
  337. }
  338. while (!EMPTYQ(&TCBDelayQ)) {
  339. DEQUEUE(&TCBDelayQ, DelayTCB, TCB, tcb_delayq);
  340. CHECK_STRUCT(DelayTCB, tcb);
  341. ASSERT(DelayTCB->tcb_refcnt != 0);
  342. ASSERT(DelayTCB->tcb_flags & IN_DELAY_Q);
  343. KeReleaseSpinLock(&TCBDelayLock, OldIrql);
  344. KeAcquireSpinLock(&DelayTCB->tcb_lock, &OldIrql);
  345. while (!CLOSING(DelayTCB) && (DelayTCB->tcb_flags & DELAYED_FLAGS)) {
  346. if (DelayTCB->tcb_flags & NEED_RCV_CMPLT) {
  347. DelayTCB->tcb_flags &= ~NEED_RCV_CMPLT;
  348. KeReleaseSpinLock(&DelayTCB->tcb_lock, OldIrql);
  349. CompleteRcvs(DelayTCB);
  350. KeAcquireSpinLock(&DelayTCB->tcb_lock, &OldIrql);
  351. }
  352. if (DelayTCB->tcb_flags & NEED_OUTPUT) {
  353. DelayTCB->tcb_flags &= ~NEED_OUTPUT;
  354. DelayTCB->tcb_refcnt++;
  355. TCPSend(DelayTCB, OldIrql);
  356. KeAcquireSpinLock(&DelayTCB->tcb_lock, &OldIrql);
  357. }
  358. if (DelayTCB->tcb_flags & NEED_ACK) {
  359. DelayTCB->tcb_flags &= ~NEED_ACK;
  360. KeReleaseSpinLock(&DelayTCB->tcb_lock, OldIrql);
  361. SendACK(DelayTCB);
  362. KeAcquireSpinLock(&DelayTCB->tcb_lock, &OldIrql);
  363. }
  364. }
  365. DelayTCB->tcb_flags &= ~IN_DELAY_Q;
  366. DerefTCB(DelayTCB, OldIrql);
  367. KeAcquireSpinLock(&TCBDelayLock, &OldIrql);
  368. }
  369. TCBDelayRtnCount--;
  370. KeReleaseSpinLock(&TCBDelayLock, OldIrql);
  371. }
  372. //* DelayAction - Put a TCB on the queue for a delayed action.
  373. //
  374. // Called when we want to put a TCB on the DelayQ for a delayed action at
  375. // receive complete or some other time. The lock on the TCB must be held
  376. // when this is called.
  377. //
  378. void // Returns: Nothing.
  379. DelayAction(
  380. TCB *DelayTCB, // TCP which we're going to schedule.
  381. uint Action) // Action we're scheduling.
  382. {
  383. //
  384. // Schedule the completion.
  385. //
  386. KeAcquireSpinLockAtDpcLevel(&TCBDelayLock);
  387. DelayTCB->tcb_flags |= Action;
  388. if (!(DelayTCB->tcb_flags & IN_DELAY_Q)) {
  389. DelayTCB->tcb_flags |= IN_DELAY_Q;
  390. DelayTCB->tcb_refcnt++; // Reference this for later.
  391. ENQUEUE(&TCBDelayQ, &DelayTCB->tcb_delayq);
  392. }
  393. KeReleaseSpinLockFromDpcLevel(&TCBDelayLock);
  394. }
  395. //* TCPRcvComplete - Handle a receive complete.
  396. //
  397. // Called by the lower layers when we're done receiving. We look to see
  398. // if we have and pending requests to complete. If we do, we complete them.
  399. // Then we look to see if we have any TCBs pending for output. If we do,
  400. // we get them going.
  401. //
  402. void // Returns: Nothing.
  403. TCPRcvComplete(
  404. void) // Nothing.
  405. {
  406. KIRQL OldIrql;
  407. TCPReq *Req;
  408. if (RequestCompleteFlags & ANY_REQUEST_COMPLETE) {
  409. KeAcquireSpinLock(&RequestCompleteLock, &OldIrql);
  410. if (!(RequestCompleteFlags & IN_RCV_COMPLETE)) {
  411. RequestCompleteFlags |= IN_RCV_COMPLETE;
  412. do {
  413. if (RequestCompleteFlags & CONN_REQUEST_COMPLETE) {
  414. if (!EMPTYQ(&ConnRequestCompleteQ)) {
  415. DEQUEUE(&ConnRequestCompleteQ, Req, TCPReq, tr_q);
  416. CHECK_STRUCT(Req, tr);
  417. CHECK_STRUCT(*(TCPConnReq **)&Req, tcr);
  418. KeReleaseSpinLock(&RequestCompleteLock, OldIrql);
  419. (*Req->tr_rtn)(Req->tr_context, Req->tr_status, 0);
  420. FreeConnReq((TCPConnReq *)Req);
  421. KeAcquireSpinLock(&RequestCompleteLock, &OldIrql);
  422. } else
  423. RequestCompleteFlags &= ~CONN_REQUEST_COMPLETE;
  424. }
  425. if (RequestCompleteFlags & SEND_REQUEST_COMPLETE) {
  426. if (!EMPTYQ(&SendCompleteQ)) {
  427. TCPSendReq *SendReq;
  428. DEQUEUE(&SendCompleteQ, Req, TCPReq, tr_q);
  429. CHECK_STRUCT(Req, tr);
  430. SendReq = (TCPSendReq *)Req;
  431. CHECK_STRUCT(SendReq, tsr);
  432. KeReleaseSpinLock(&RequestCompleteLock, OldIrql);
  433. (*Req->tr_rtn)(Req->tr_context, Req->tr_status,
  434. Req->tr_status == TDI_SUCCESS ? SendReq->tsr_size
  435. : 0);
  436. FreeSendReq((TCPSendReq *)Req);
  437. KeAcquireSpinLock(&RequestCompleteLock, &OldIrql);
  438. } else
  439. RequestCompleteFlags &= ~SEND_REQUEST_COMPLETE;
  440. }
  441. } while (RequestCompleteFlags & ANY_REQUEST_COMPLETE);
  442. RequestCompleteFlags &= ~IN_RCV_COMPLETE;
  443. }
  444. KeReleaseSpinLock(&RequestCompleteLock, OldIrql);
  445. }
  446. ProcessTCBDelayQ();
  447. }
  448. //* ReleaseConnReq - Complete a connection request.
  449. //
  450. // A utility function to release a connection request which may or may not
  451. // have come from a TCB. We assume the TCB is readable when we're called
  452. // (i.e. the caller holds a lock or a reference).
  453. //
  454. void // Returns: Nothing.
  455. ReleaseConnReq(
  456. TCB *CmpltTCB, // TCB from which to get addressing information.
  457. TCPConnReq *ConnReq, // ConnReq to do away with.
  458. TDI_STATUS Status) // Status with which to complete.
  459. {
  460. //
  461. // Fill in the connection information, then enqueue the ConnReq
  462. // for completion.
  463. //
  464. UpdateConnInfo(ConnReq->tcr_conninfo, &CmpltTCB->tcb_daddr,
  465. CmpltTCB->tcb_dscope_id, CmpltTCB->tcb_dport);
  466. if (ConnReq->tcr_addrinfo) {
  467. UpdateConnInfo(ConnReq->tcr_addrinfo, &CmpltTCB->tcb_saddr,
  468. CmpltTCB->tcb_sscope_id, CmpltTCB->tcb_sport);
  469. }
  470. ConnReq->tcr_req.tr_status = Status;
  471. KeAcquireSpinLockAtDpcLevel(&RequestCompleteLock);
  472. RequestCompleteFlags |= CONN_REQUEST_COMPLETE;
  473. ENQUEUE(&ConnRequestCompleteQ, &ConnReq->tcr_req.tr_q);
  474. KeReleaseSpinLockFromDpcLevel(&RequestCompleteLock);
  475. }
  476. //* CompleteConnReq - Complete a connection request on a TCB.
  477. //
  478. // A utility function to complete a connection request on a TCB. We remove
  479. // the connreq, and put it on the ConnReqCmpltQ where it will be picked
  480. // off later during RcvCmplt processing. We assume the TCB lock is held when
  481. // we're called.
  482. //
  483. void // Returns: Nothing.
  484. CompleteConnReq(
  485. TCB *CmpltTCB, // TCB from which to complete.
  486. TDI_STATUS Status) // Status to complete with.
  487. {
  488. TCPConnReq *ConnReq;
  489. CHECK_STRUCT(CmpltTCB, tcb);
  490. ConnReq = CmpltTCB->tcb_connreq;
  491. if (ConnReq != NULL) {
  492. //
  493. // There's a connreq on this TCB. Fill in the connection information
  494. // before returning it.
  495. //
  496. CmpltTCB->tcb_connreq = NULL;
  497. ReleaseConnReq(CmpltTCB, ConnReq, Status);
  498. } else if (!((CmpltTCB->tcb_state == TCB_SYN_RCVD) &&
  499. (CmpltTCB->tcb_flags & ACCEPT_PENDING))) {
  500. //
  501. // This should not happen except
  502. // in the case of SynAttackProtect.
  503. //
  504. ASSERT(FALSE);
  505. }
  506. }
  507. //* DelayedAcceptConn - Process delayed-connect request.
  508. //
  509. // Called by TCPRcv when SynAttackProtection is turned on, when a final
  510. // ACK arrives in response to our SYN-ACK. Indicate the connect request to
  511. // ULP and if it is accepted init TCB and move con to appropriate queue on AO.
  512. // The caller must hold the AddrObjTableLock before calling this routine,
  513. // and that lock must have been taken at DPC level. This routine will free
  514. // that lock back to DPC level.
  515. // Returns TRUE if the request is accepted.
  516. //
  517. BOOLEAN
  518. DelayedAcceptConn(
  519. AddrObj *ListenAO, // AddrObj for local address.
  520. IPv6Addr *Src, // Source IP address of SYN.
  521. ulong SrcScopeId, // Scope id of source address (0 for non-scope addr).
  522. ushort SrcPort, // Source port of SYN.
  523. TCB *AcceptTCB) // Pre-accepted TCB
  524. {
  525. TCPConn *CurrentConn = NULL;
  526. Queue *Temp;
  527. TCPConnReq *ConnReq = NULL;
  528. BOOLEAN FoundConn = FALSE;
  529. uchar TAddress[TCP_TA_SIZE];
  530. PVOID ConnContext;
  531. PConnectEvent Event;
  532. PVOID EventContext;
  533. TDI_STATUS Status;
  534. PTCP_CONTEXT TcpContext = NULL;
  535. ConnectEventInfo *EventInfo;
  536. CHECK_STRUCT(ListenAO, ao);
  537. KeAcquireSpinLockAtDpcLevel(&ListenAO->ao_lock);
  538. KeReleaseSpinLockFromDpcLevel(&AddrObjTableLock);
  539. if (!AO_VALID(ListenAO) || ListenAO->ao_connect == NULL) {
  540. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  541. return FALSE;
  542. }
  543. // He has a connect handler. Put the transport address together,
  544. // and call him. We also need to get the necessary resources
  545. // first.
  546. Event = ListenAO->ao_connect;
  547. EventContext = ListenAO->ao_conncontext;
  548. REF_AO(ListenAO);
  549. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  550. ConnReq = GetConnReq();
  551. if (ConnReq != NULL) {
  552. BuildTDIAddress(TAddress, Src, SrcScopeId, SrcPort);
  553. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  554. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
  555. "indicating connect request\n"));
  556. }
  557. Status = (*Event) (EventContext, TCP_TA_SIZE,
  558. (PTRANSPORT_ADDRESS) TAddress, 0, NULL, 0, NULL,
  559. &ConnContext, &EventInfo);
  560. if (Status == TDI_MORE_PROCESSING) {
  561. PIO_STACK_LOCATION IrpSp;
  562. PTDI_REQUEST_KERNEL_ACCEPT AcceptRequest;
  563. IrpSp = IoGetCurrentIrpStackLocation(EventInfo);
  564. Status = TCPPrepareIrpForCancel((PTCP_CONTEXT) IrpSp->FileObject->FsContext,
  565. EventInfo, TCPCancelRequest);
  566. if (!NT_SUCCESS(Status)) {
  567. Status = TDI_NOT_ACCEPTED;
  568. EventInfo = NULL;
  569. goto AcceptIrpCancelled;
  570. }
  571. //
  572. // He accepted it. Find the connection on the AddrObj.
  573. //
  574. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  575. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
  576. "connect indication accepted, queueing request\n"
  577. ));
  578. }
  579. AcceptRequest = (PTDI_REQUEST_KERNEL_ACCEPT)
  580. & (IrpSp->Parameters);
  581. ConnReq->tcr_conninfo =
  582. AcceptRequest->ReturnConnectionInformation;
  583. if (AcceptRequest->RequestConnectionInformation &&
  584. AcceptRequest->RequestConnectionInformation->
  585. RemoteAddress) {
  586. ConnReq->tcr_addrinfo =
  587. AcceptRequest->RequestConnectionInformation;
  588. } else {
  589. ConnReq->tcr_addrinfo = NULL;
  590. }
  591. ConnReq->tcr_req.tr_rtn = TCPRequestComplete;
  592. ConnReq->tcr_req.tr_context = EventInfo;
  593. SearchAO:
  594. KeAcquireSpinLockAtDpcLevel(&ListenAO->ao_lock);
  595. Temp = QHEAD(&ListenAO->ao_idleq);;
  596. Status = TDI_INVALID_CONNECTION;
  597. while (Temp != QEND(&ListenAO->ao_idleq)) {
  598. CurrentConn = QSTRUCT(TCPConn, Temp, tc_q);
  599. CHECK_STRUCT(CurrentConn, tc);
  600. if ((CurrentConn->tc_context == ConnContext) &&
  601. !(CurrentConn->tc_flags & CONN_INVALID)) {
  602. //
  603. // We need to lock its TCPConnBlock, with care.
  604. // We'll ref the TCPConn so it can't go away,
  605. // then unlock the AO (which is already ref'd),
  606. // then relock. Note that tc_refcnt is updated
  607. // under ao_lock for any associated TCPConn.
  608. // If things have changed, go back and try again.
  609. //
  610. ++CurrentConn->tc_refcnt;
  611. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  612. KeAcquireSpinLockAtDpcLevel(
  613. &CurrentConn->tc_ConnBlock->cb_lock);
  614. //
  615. // Now that we've got the lock, we need to consider
  616. // the following possibilities:
  617. //
  618. // * a disassociate was initiated
  619. // * a close was initiated
  620. // * accept completed
  621. // * listen completed
  622. // * connect completed
  623. //
  624. // The first two require that we clean up,
  625. // by calling the tc_donertn. For the last three,
  626. // we have nothing to do, but tc_donertn points at
  627. // DummyDone, so go ahead and call it anyway;
  628. // it'll release the TCPConnBlock lock for us.
  629. //
  630. if (--CurrentConn->tc_refcnt == 0 &&
  631. ((CurrentConn->tc_flags & CONN_INVALID) ||
  632. (CurrentConn->tc_tcb != NULL))) {
  633. ConnDoneRtn DoneRtn = CurrentConn->tc_donertn;
  634. DoneRtn(CurrentConn, DISPATCH_LEVEL);
  635. goto SearchAO;
  636. }
  637. KeAcquireSpinLockAtDpcLevel(&ListenAO->ao_lock);
  638. KeAcquireSpinLockAtDpcLevel(&AcceptTCB->tcb_lock);
  639. // We think we have a match. The connection
  640. // shouldn't have a TCB associated with it. If it
  641. // does, it's an error. InitTCBFromConn will
  642. // handle all this, but first confirm that
  643. // ACCEPT_PENDING is still set. If it isn't,
  644. // someone accepted this before we did.
  645. if (AcceptTCB->tcb_flags & ACCEPT_PENDING) {
  646. Status = InitTCBFromConn(CurrentConn, AcceptTCB,
  647. AcceptRequest->RequestConnectionInformation,
  648. TRUE);
  649. } else {
  650. Status = TDI_INVALID_STATE;
  651. }
  652. if (Status == TDI_SUCCESS) {
  653. FoundConn = TRUE;
  654. AcceptTCB->tcb_flags &= ~ACCEPT_PENDING;
  655. AcceptTCB->tcb_connreq = ConnReq;
  656. AcceptTCB->tcb_conn = CurrentConn;
  657. AcceptTCB->tcb_connid = CurrentConn->tc_connid;
  658. CurrentConn->tc_tcb = AcceptTCB;
  659. CurrentConn->tc_refcnt++;
  660. KeReleaseSpinLockFromDpcLevel(&AcceptTCB->tcb_lock);
  661. // Move him from the idle q to the active
  662. // queue.
  663. REMOVEQ(&CurrentConn->tc_q);
  664. ENQUEUE(&ListenAO->ao_activeq, &CurrentConn->tc_q);
  665. } else {
  666. KeReleaseSpinLockFromDpcLevel(&AcceptTCB->tcb_lock);
  667. KeReleaseSpinLockFromDpcLevel(
  668. &CurrentConn->tc_ConnBlock->cb_lock);
  669. }
  670. // In any case, we're done now.
  671. break;
  672. }
  673. Temp = QNEXT(Temp);
  674. }
  675. LOCKED_DELAY_DEREF_AO(ListenAO);
  676. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  677. if (FoundConn) {
  678. KeReleaseSpinLockFromDpcLevel(
  679. &CurrentConn->tc_ConnBlock->cb_lock);
  680. } else {
  681. //
  682. // We have to complete the abandoned ConnReq,
  683. // then we're done.
  684. //
  685. ReleaseConnReq(AcceptTCB, ConnReq, Status);
  686. }
  687. return FoundConn;
  688. }
  689. //
  690. // The event handler didn't take it. Dereference it, free
  691. // the resources, and return NULL.
  692. //
  693. }
  694. AcceptIrpCancelled:
  695. //
  696. // We couldn't get a new TCPConnReq, or the client didn't want it.
  697. //
  698. if (ConnReq != NULL) {
  699. FreeConnReq(ConnReq);
  700. }
  701. DELAY_DEREF_AO(ListenAO);
  702. return FALSE;
  703. }
  704. //* FindListenConn - Find (or fabricate) a listening connection.
  705. //
  706. // Called by our Receive handler to decide what to do about an incoming
  707. // SYN. We walk down the list of connections associated with the destination
  708. // address, and if we find any in the listening state that can be used for
  709. // the incoming request we'll take them, possibly returning a listen in the
  710. // process. If we don't find any appropriate listening connections, we'll
  711. // call the Connect Event handler if one is registered. If all else fails,
  712. // we'll return NULL and the SYN will be RST.
  713. //
  714. // The caller must hold the AddrObjTableLock before calling this routine,
  715. // and that lock must have been taken at DPC level. This routine will free
  716. // that lock back to DPC level.
  717. //
  718. TCB * // Returns: Pointer to found TCB, or NULL if we can't find one.
  719. FindListenConn(
  720. AddrObj *ListenAO, // AddrObj for local address.
  721. IPv6Addr *Src, // Source IP address of SYN.
  722. ulong SrcScopeId, // Scope id of source address (0 for non-scope addr).
  723. ushort SrcPort) // Source port of SYN.
  724. {
  725. TCB *CurrentTCB = NULL;
  726. TCPConn *CurrentConn = NULL;
  727. TCPConnReq *ConnReq = NULL;
  728. Queue *Temp;
  729. uint FoundConn = FALSE;
  730. CHECK_STRUCT(ListenAO, ao);
  731. KeAcquireSpinLockAtDpcLevel(&ListenAO->ao_lock);
  732. KeReleaseSpinLockFromDpcLevel(&AddrObjTableLock);
  733. //
  734. // We have the lock on the AddrObj. Walk down its list, looking
  735. // for connections in the listening state.
  736. //
  737. if (AO_VALID(ListenAO)) {
  738. if (ListenAO->ao_listencnt != 0) {
  739. Temp = QHEAD(&ListenAO->ao_listenq);
  740. while (Temp != QEND(&ListenAO->ao_listenq)) {
  741. CurrentConn = QSTRUCT(TCPConn, Temp, tc_q);
  742. CHECK_STRUCT(CurrentConn, tc);
  743. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  744. KeAcquireSpinLockAtDpcLevel(&CurrentConn->tc_ConnBlock->cb_lock);
  745. KeAcquireSpinLockAtDpcLevel(&ListenAO->ao_lock);
  746. //
  747. // If this TCB is in the listening state, with no delete
  748. // pending, it's a candidate. Look at the pending listen
  749. // information to see if we should take it.
  750. //
  751. if ((CurrentTCB = CurrentConn->tc_tcb) != NULL &&
  752. CurrentTCB->tcb_state == TCB_LISTEN) {
  753. CHECK_STRUCT(CurrentTCB, tcb);
  754. KeAcquireSpinLockAtDpcLevel(&CurrentTCB->tcb_lock);
  755. if (CurrentTCB->tcb_state == TCB_LISTEN &&
  756. !PENDING_ACTION(CurrentTCB)) {
  757. //
  758. // Need to see if we can take it.
  759. // See if the addresses specifed in the ConnReq match.
  760. //
  761. if ((IsUnspecified(&CurrentTCB->tcb_daddr) ||
  762. (IP6_ADDR_EQUAL(&CurrentTCB->tcb_daddr, Src) &&
  763. (CurrentTCB->tcb_dscope_id == SrcScopeId))) &&
  764. (CurrentTCB->tcb_dport == 0 ||
  765. CurrentTCB->tcb_dport == SrcPort)) {
  766. FoundConn = TRUE;
  767. break;
  768. }
  769. //
  770. // Otherwise, this didn't match, so we'll check the
  771. // next one.
  772. //
  773. }
  774. KeReleaseSpinLockFromDpcLevel(&CurrentTCB->tcb_lock);
  775. }
  776. KeReleaseSpinLockFromDpcLevel(&CurrentConn->tc_ConnBlock->cb_lock);
  777. Temp = QNEXT(Temp);;
  778. }
  779. //
  780. // See why we've exited the loop.
  781. //
  782. if (FoundConn) {
  783. CHECK_STRUCT(CurrentTCB, tcb);
  784. //
  785. // We exited because we found a TCB. If it's pre-accepted,
  786. // we're done.
  787. //
  788. CurrentTCB->tcb_refcnt++;
  789. ASSERT(CurrentTCB->tcb_connreq != NULL);
  790. ConnReq = CurrentTCB->tcb_connreq;
  791. //
  792. // If QUERY_ACCEPT isn't set, turn on the CONN_ACCEPTED bit.
  793. //
  794. if (!(ConnReq->tcr_flags & TDI_QUERY_ACCEPT))
  795. CurrentTCB->tcb_flags |= CONN_ACCEPTED;
  796. CurrentTCB->tcb_state = TCB_SYN_RCVD;
  797. ListenAO->ao_listencnt--;
  798. //
  799. // Since he's no longer listening, remove him from the listen
  800. // queue and put him on the active queue.
  801. //
  802. REMOVEQ(&CurrentConn->tc_q);
  803. ENQUEUE(&ListenAO->ao_activeq, &CurrentConn->tc_q);
  804. KeReleaseSpinLockFromDpcLevel(&CurrentTCB->tcb_lock);
  805. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  806. KeReleaseSpinLockFromDpcLevel(&CurrentConn->tc_ConnBlock->cb_lock);
  807. return CurrentTCB;
  808. }
  809. }
  810. //
  811. // We didn't find a matching TCB.
  812. //
  813. ASSERT(FoundConn == FALSE);
  814. //
  815. // If there's no connect indication handler, we're done.
  816. //
  817. if (ListenAO->ao_connect == NULL) {
  818. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  819. return NULL;
  820. }
  821. if (SynAttackProtect) {
  822. TCB *AcceptTCB;
  823. //
  824. // SynAttack protection is on. Just initialize
  825. // this TCB and send SYN-ACK. When final
  826. // ACK is seen we will indicate about this
  827. // connection arrival to upper layer.
  828. //
  829. AcceptTCB = AllocTCB();
  830. if (AcceptTCB != NULL) {
  831. AcceptTCB->tcb_state = TCB_SYN_RCVD;
  832. AcceptTCB->tcb_connreq = NULL;
  833. AcceptTCB->tcb_flags |= (CONN_ACCEPTED | ACCEPT_PENDING);
  834. AcceptTCB->tcb_refcnt = 1;
  835. AcceptTCB->tcb_defaultwin = DEFAULT_RCV_WIN;
  836. AcceptTCB->tcb_rcvwin = DEFAULT_RCV_WIN;
  837. //
  838. // This TCB isn't going through the InitTCBFromConn logic (yet)
  839. // so perform minimal initialization on it now. In particular,
  840. // inherit any AddrObj settings that we care about during
  841. // connection-acceptance.
  842. //
  843. AcceptTCB->tcb_hops = ListenAO->ao_ucast_hops;
  844. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  845. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
  846. "Allocated SP TCB %x\n", (PCHAR)AcceptTCB));
  847. }
  848. }
  849. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  850. return AcceptTCB;
  851. } else {
  852. uchar TAddress[TCP_TA_SIZE];
  853. PVOID ConnContext;
  854. PConnectEvent Event;
  855. PVOID EventContext;
  856. TDI_STATUS Status;
  857. TCB *AcceptTCB;
  858. ConnectEventInfo *EventInfo;
  859. //
  860. // He has a connect handler. Put the transport address together,
  861. // and call him. We also need to get the necessary resources
  862. // first.
  863. //
  864. Event = ListenAO->ao_connect;
  865. EventContext = ListenAO->ao_conncontext;
  866. REF_AO(ListenAO);
  867. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  868. AcceptTCB = AllocTCB();
  869. ConnReq = GetConnReq();
  870. if (AcceptTCB != NULL && ConnReq != NULL) {
  871. BuildTDIAddress(TAddress, Src, SrcScopeId, SrcPort);
  872. AcceptTCB->tcb_state = TCB_LISTEN;
  873. AcceptTCB->tcb_connreq = ConnReq;
  874. AcceptTCB->tcb_flags |= CONN_ACCEPTED;
  875. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  876. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
  877. "indicating connect request\n"));
  878. }
  879. Status = (*Event)(EventContext, TCP_TA_SIZE,
  880. (PTRANSPORT_ADDRESS)TAddress, 0, NULL,
  881. 0, NULL,
  882. &ConnContext, &EventInfo);
  883. if (Status == TDI_MORE_PROCESSING) {
  884. PIO_STACK_LOCATION IrpSp;
  885. PTDI_REQUEST_KERNEL_ACCEPT AcceptRequest;
  886. IrpSp = IoGetCurrentIrpStackLocation(EventInfo);
  887. Status = TCPPrepareIrpForCancel(
  888. (PTCP_CONTEXT) IrpSp->FileObject->FsContext,
  889. EventInfo, TCPCancelRequest);
  890. if (!NT_SUCCESS(Status)) {
  891. Status = TDI_NOT_ACCEPTED;
  892. EventInfo = NULL;
  893. goto AcceptIrpCancelled;
  894. }
  895. //
  896. // He accepted it. Find the connection on the AddrObj.
  897. //
  898. {
  899. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  900. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
  901. "connect indication accepted,"
  902. " queueing request\n"));
  903. }
  904. AcceptRequest = (PTDI_REQUEST_KERNEL_ACCEPT)
  905. &(IrpSp->Parameters);
  906. ConnReq->tcr_conninfo =
  907. AcceptRequest->ReturnConnectionInformation;
  908. if (AcceptRequest->RequestConnectionInformation &&
  909. AcceptRequest->RequestConnectionInformation->
  910. RemoteAddress) {
  911. ConnReq->tcr_addrinfo =
  912. AcceptRequest->RequestConnectionInformation;
  913. } else {
  914. ConnReq->tcr_addrinfo = NULL;
  915. }
  916. ConnReq->tcr_req.tr_rtn = TCPRequestComplete;
  917. ConnReq->tcr_req.tr_context = EventInfo;
  918. }
  919. SearchAO:
  920. KeAcquireSpinLockAtDpcLevel(&ListenAO->ao_lock);
  921. Temp = QHEAD(&ListenAO->ao_idleq);
  922. CurrentTCB = NULL;
  923. Status = TDI_INVALID_CONNECTION;
  924. while (Temp != QEND(&ListenAO->ao_idleq)) {
  925. CurrentConn = QSTRUCT(TCPConn, Temp, tc_q);
  926. CHECK_STRUCT(CurrentConn, tc);
  927. if ((CurrentConn->tc_context == ConnContext) &&
  928. !(CurrentConn->tc_flags & CONN_INVALID)) {
  929. //
  930. // We need to lock its TCPConnBlock, with care.
  931. // We'll ref the TCPConn so it can't go away,
  932. // then unlock the AO (which is already ref'd),
  933. // then relock. Note that tc_refcnt is updated
  934. // under ao_lock for any associated TCPConn.
  935. // If things have changed, go back and try again.
  936. //
  937. ++CurrentConn->tc_refcnt;
  938. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  939. KeAcquireSpinLockAtDpcLevel(
  940. &CurrentConn->tc_ConnBlock->cb_lock);
  941. //
  942. // Now that we've got the lock, we need to consider
  943. // the following possibilities:
  944. //
  945. // * a disassociate was initiated
  946. // * a close was initiated
  947. // * accept completed
  948. // * listen completed
  949. // * connect completed
  950. //
  951. // The first two require that we clean up,
  952. // by calling the tc_donertn. For the last three,
  953. // we have nothing to do, but tc_donertn points at
  954. // DummyDone, so go ahead and call it anyway;
  955. // it'll release the TCPConnBlock lock for us.
  956. //
  957. if (--CurrentConn->tc_refcnt == 0 &&
  958. ((CurrentConn->tc_flags & CONN_INVALID) ||
  959. (CurrentConn->tc_tcb != NULL))) {
  960. ConnDoneRtn DoneRtn = CurrentConn->tc_donertn;
  961. DoneRtn(CurrentConn, DISPATCH_LEVEL);
  962. goto SearchAO;
  963. }
  964. KeAcquireSpinLockAtDpcLevel(&ListenAO->ao_lock);
  965. //
  966. // We think we have a match. The connection
  967. // shouldn't have a TCB associated with it. If it
  968. // does, it's an error. InitTCBFromConn will
  969. // handle all this.
  970. //
  971. AcceptTCB->tcb_refcnt = 1;
  972. Status = InitTCBFromConn(CurrentConn, AcceptTCB,
  973. AcceptRequest->RequestConnectionInformation,
  974. TRUE);
  975. if (Status == TDI_SUCCESS) {
  976. FoundConn = TRUE;
  977. AcceptTCB->tcb_state = TCB_SYN_RCVD;
  978. AcceptTCB->tcb_conn = CurrentConn;
  979. AcceptTCB->tcb_connid = CurrentConn->tc_connid;
  980. CurrentConn->tc_tcb = AcceptTCB;
  981. CurrentConn->tc_refcnt++;
  982. //
  983. // Move him from the idle queue to the
  984. // active queue.
  985. //
  986. REMOVEQ(&CurrentConn->tc_q);
  987. ENQUEUE(&ListenAO->ao_activeq,
  988. &CurrentConn->tc_q);
  989. } else
  990. KeReleaseSpinLockFromDpcLevel(
  991. &CurrentConn->tc_ConnBlock->cb_lock);
  992. // In any case, we're done now.
  993. break;
  994. }
  995. Temp = QNEXT(Temp);
  996. }
  997. if (!FoundConn) {
  998. //
  999. // Didn't find a match, or had an error.
  1000. // Status code is set.
  1001. // Complete the ConnReq and free the resources.
  1002. //
  1003. CompleteConnReq(AcceptTCB, Status);
  1004. FreeTCB(AcceptTCB);
  1005. AcceptTCB = NULL;
  1006. }
  1007. LOCKED_DELAY_DEREF_AO(ListenAO);
  1008. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  1009. if (FoundConn) {
  1010. KeReleaseSpinLockFromDpcLevel(
  1011. &CurrentConn->tc_ConnBlock->cb_lock);
  1012. }
  1013. return AcceptTCB;
  1014. }
  1015. }
  1016. AcceptIrpCancelled:
  1017. //
  1018. // We couldn't get a needed resource or event handler
  1019. // did not take this. Free any that we
  1020. // did get, and fall through to the 'return NULL' code.
  1021. //
  1022. if (ConnReq != NULL)
  1023. FreeConnReq(ConnReq);
  1024. if (AcceptTCB != NULL)
  1025. FreeTCB(AcceptTCB);
  1026. DELAY_DEREF_AO(ListenAO);
  1027. }
  1028. return NULL;
  1029. }
  1030. //
  1031. // If we get here, the address object wasn't valid.
  1032. //
  1033. KeReleaseSpinLockFromDpcLevel(&ListenAO->ao_lock);
  1034. return NULL;
  1035. }
  1036. //* FindMSS - Find the MSS option in a segment.
  1037. //
  1038. // Called when a SYN is received to find the MSS option in a segment.
  1039. // If we don't find one, we assume the worst and return one based on
  1040. // the minimum MTU.
  1041. //
  1042. ushort // Returns: MSS to be used.
  1043. FindMSS(
  1044. TCPHeader UNALIGNED *TCP) // TCP header to be searched.
  1045. {
  1046. uint OptSize;
  1047. uchar *OptPtr;
  1048. OptSize = TCP_HDR_SIZE(TCP) - sizeof(TCPHeader);
  1049. OptPtr = (uchar *)(TCP + 1);
  1050. while (OptSize) {
  1051. if (*OptPtr == TCP_OPT_EOL)
  1052. break;
  1053. if (*OptPtr == TCP_OPT_NOP) {
  1054. OptPtr++;
  1055. OptSize--;
  1056. continue;
  1057. }
  1058. if (*OptPtr == TCP_OPT_MSS) {
  1059. if (OptSize >= MSS_OPT_SIZE && OptPtr[1] == MSS_OPT_SIZE) {
  1060. ushort TempMss = *(ushort UNALIGNED *)(OptPtr + 2);
  1061. if (TempMss != 0)
  1062. return net_short(TempMss);
  1063. else
  1064. break; // MSS size of 0, use default.
  1065. } else
  1066. break; // Bad option size, use default.
  1067. } else {
  1068. //
  1069. // Unknown option. Skip over it.
  1070. //
  1071. if (OptSize < 2 || OptPtr[1] == 0 || OptPtr[1] > OptSize)
  1072. break; // Bad option length, bail out.
  1073. OptSize -= OptPtr[1];
  1074. OptPtr += OptPtr[1];
  1075. }
  1076. }
  1077. return DEFAULT_MSS;
  1078. }
  1079. //* ACKAndDrop - Acknowledge a segment, and drop it.
  1080. //
  1081. // Called from within the receive code when we need to drop a segment that's
  1082. // outside the receive window.
  1083. //
  1084. void // Returns: Nothing.
  1085. ACKAndDrop(
  1086. TCPRcvInfo *RI, // Receive info for incoming segment.
  1087. TCB *RcvTCB) // TCB for incoming segment.
  1088. {
  1089. if (!(RI->tri_flags & TCP_FLAG_RST)) {
  1090. if (RcvTCB->tcb_state == TCB_TIME_WAIT) {
  1091. //
  1092. // In TIME_WAIT, we only ACK duplicates/retransmissions
  1093. // of our peer's FIN segment.
  1094. //
  1095. // REVIEW: We're currently fairly loose on the sequence
  1096. // number check here.
  1097. //
  1098. if ((RI->tri_flags & TCP_FLAG_FIN) &&
  1099. SEQ_LTE(RI->tri_seq, RcvTCB->tcb_rcvnext)) {
  1100. // Restart 2MSL timer and proceed with sending the ACK.
  1101. START_TCB_TIMER(RcvTCB->tcb_rexmittimer, MAX_REXMIT_TO);
  1102. } else {
  1103. // Drop this segment without an ACK.
  1104. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  1105. return;
  1106. }
  1107. }
  1108. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  1109. SendACK(RcvTCB);
  1110. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  1111. }
  1112. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  1113. }
  1114. //* ACKData - Acknowledge data.
  1115. //
  1116. // Called from the receive handler to acknowledge data. We're given the
  1117. // TCB and the new value of senduna. We walk down the send queue pulling
  1118. // off sends and putting them on the complete queue until we hit the end
  1119. // or we acknowledge the specified number of bytes of data.
  1120. //
  1121. // NOTE: We manipulate the send refcnt and acked flag without taking a lock.
  1122. // This is OK in the VxD version where locks don't mean anything anyway, but
  1123. // in the port to NT we'll need to add locking. The lock will have to be
  1124. // taken in the transmit complete routine. We can't use a lock in the TCB,
  1125. // since the TCB could go away before the transmit complete happens, and a
  1126. // lock in the TSR would be overkill, so it's probably best to use a global
  1127. // lock for this. If that causes too much contention, we could use a set of
  1128. // locks and pass a pointer to the appropriate lock back as part of the
  1129. // transmit confirm context. This lock pointer would also need to be stored
  1130. // in the TCB.
  1131. //
  1132. void // Returns: Nothing.
  1133. ACKData(
  1134. TCB *ACKTcb, // TCB from which to pull data.
  1135. SeqNum SendUNA) // New value of send una.
  1136. {
  1137. Queue *End, *Current; // End and current elements.
  1138. Queue *TempQ, *EndQ;
  1139. Queue *LastCmplt; // Last one we completed.
  1140. TCPSendReq *CurrentTSR; // Current send req we're looking at.
  1141. PNDIS_BUFFER CurrentBuffer; // Current NDIS_BUFFER.
  1142. uint Updated = FALSE;
  1143. uint BufLength;
  1144. int Amount, OrigAmount;
  1145. long Result;
  1146. KIRQL OldIrql;
  1147. uint Temp;
  1148. CHECK_STRUCT(ACKTcb, tcb);
  1149. CheckTCBSends(ACKTcb);
  1150. Amount = SendUNA - ACKTcb->tcb_senduna;
  1151. ASSERT(Amount > 0);
  1152. //
  1153. // Since this is an acknowledgement of receipt by our peer for previously
  1154. // unacknowledged data, it implies forward reachablility.
  1155. //
  1156. if (ACKTcb->tcb_rce != NULL)
  1157. ConfirmForwardReachability(ACKTcb->tcb_rce);
  1158. //
  1159. // Do a quick check to see if this acks everything that we have. If it
  1160. // does, handle it right away. We can only do this in the ESTABLISHED
  1161. // state, because we blindly update sendnext, and that can only work if we
  1162. // haven't sent a FIN.
  1163. //
  1164. if ((Amount == (int) ACKTcb->tcb_unacked) &&
  1165. ACKTcb->tcb_state == TCB_ESTAB) {
  1166. //
  1167. // Everything is acked.
  1168. //
  1169. ASSERT(!EMPTYQ(&ACKTcb->tcb_sendq));
  1170. TempQ = ACKTcb->tcb_sendq.q_next;
  1171. INITQ(&ACKTcb->tcb_sendq);
  1172. ACKTcb->tcb_sendnext = SendUNA;
  1173. ACKTcb->tcb_senduna = SendUNA;
  1174. ASSERT(ACKTcb->tcb_sendnext == ACKTcb->tcb_sendmax);
  1175. ACKTcb->tcb_cursend = NULL;
  1176. ACKTcb->tcb_sendbuf = NULL;
  1177. ACKTcb->tcb_sendofs = 0;
  1178. ACKTcb->tcb_sendsize = 0;
  1179. ACKTcb->tcb_unacked = 0;
  1180. //
  1181. // Now walk down the list of send requests. If the reference count
  1182. // has gone to 0, put it on the send complete queue.
  1183. //
  1184. KeAcquireSpinLock(&RequestCompleteLock, &OldIrql);
  1185. EndQ = &ACKTcb->tcb_sendq;
  1186. do {
  1187. CurrentTSR = CONTAINING_RECORD(QSTRUCT(TCPReq, TempQ, tr_q),
  1188. TCPSendReq, tsr_req);
  1189. CHECK_STRUCT(CurrentTSR, tsr);
  1190. TempQ = CurrentTSR->tsr_req.tr_q.q_next;
  1191. CurrentTSR->tsr_req.tr_status = TDI_SUCCESS;
  1192. Result = InterlockedDecrement(&CurrentTSR->tsr_refcnt);
  1193. ASSERT(Result >= 0);
  1194. if (Result <= 0) {
  1195. // No more references are outstanding, the send can be
  1196. // completed.
  1197. // If we've sent directly from this send, NULL out the next
  1198. // pointer for the last buffer in the chain.
  1199. if (CurrentTSR->tsr_lastbuf != NULL) {
  1200. NDIS_BUFFER_LINKAGE(CurrentTSR->tsr_lastbuf) = NULL;
  1201. CurrentTSR->tsr_lastbuf = NULL;
  1202. }
  1203. ACKTcb->tcb_totaltime += (TCPTime - CurrentTSR->tsr_time);
  1204. Temp = ACKTcb->tcb_bcountlow;
  1205. ACKTcb->tcb_bcountlow += CurrentTSR->tsr_size;
  1206. ACKTcb->tcb_bcounthi += (Temp > ACKTcb->tcb_bcountlow ? 1 : 0);
  1207. ENQUEUE(&SendCompleteQ, &CurrentTSR->tsr_req.tr_q);
  1208. }
  1209. } while (TempQ != EndQ);
  1210. RequestCompleteFlags |= SEND_REQUEST_COMPLETE;
  1211. KeReleaseSpinLock(&RequestCompleteLock, OldIrql);
  1212. CheckTCBSends(ACKTcb);
  1213. return;
  1214. }
  1215. OrigAmount = Amount;
  1216. End = QEND(&ACKTcb->tcb_sendq);
  1217. Current = QHEAD(&ACKTcb->tcb_sendq);
  1218. LastCmplt = NULL;
  1219. while (Amount > 0 && Current != End) {
  1220. CurrentTSR = CONTAINING_RECORD(QSTRUCT(TCPReq, Current, tr_q),
  1221. TCPSendReq, tsr_req);
  1222. CHECK_STRUCT(CurrentTSR, tsr);
  1223. if (Amount >= (int) CurrentTSR->tsr_unasize) {
  1224. // This is completely acked. Just advance to the next one.
  1225. Amount -= CurrentTSR->tsr_unasize;
  1226. LastCmplt = Current;
  1227. Current = QNEXT(Current);
  1228. continue;
  1229. }
  1230. //
  1231. // This one is only partially acked. Update his offset and NDIS buffer
  1232. // pointer, and break out. We know that Amount is < the unacked size
  1233. // in this buffer, we we can walk the NDIS buffer chain without fear
  1234. // of falling off the end.
  1235. //
  1236. CurrentBuffer = CurrentTSR->tsr_buffer;
  1237. ASSERT(CurrentBuffer != NULL);
  1238. ASSERT(Amount < (int) CurrentTSR->tsr_unasize);
  1239. CurrentTSR->tsr_unasize -= Amount;
  1240. BufLength = NdisBufferLength(CurrentBuffer) - CurrentTSR->tsr_offset;
  1241. if (Amount >= (int) BufLength) {
  1242. do {
  1243. Amount -= BufLength;
  1244. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  1245. ASSERT(CurrentBuffer != NULL);
  1246. BufLength = NdisBufferLength(CurrentBuffer);
  1247. } while (Amount >= (int) BufLength);
  1248. CurrentTSR->tsr_offset = Amount;
  1249. CurrentTSR->tsr_buffer = CurrentBuffer;
  1250. } else
  1251. CurrentTSR->tsr_offset += Amount;
  1252. Amount = 0;
  1253. break;
  1254. }
  1255. #if DBG
  1256. //
  1257. // We should always be able to remove at least Amount bytes, except in
  1258. // the case where a FIN has been sent. In that case we should be off
  1259. // by exactly one. In the debug builds we'll check this.
  1260. //
  1261. if (Amount != 0 && (!(ACKTcb->tcb_flags & FIN_SENT) || Amount != 1))
  1262. DbgBreakPoint();
  1263. #endif
  1264. if (SEQ_GT(SendUNA, ACKTcb->tcb_sendnext)) {
  1265. if (Current != End) {
  1266. //
  1267. // Need to reevaluate CurrentTSR, in case we bailed out of the
  1268. // above loop after updating Current but before updating
  1269. // CurrentTSR.
  1270. //
  1271. CurrentTSR = CONTAINING_RECORD(QSTRUCT(TCPReq, Current, tr_q),
  1272. TCPSendReq, tsr_req);
  1273. CHECK_STRUCT(CurrentTSR, tsr);
  1274. ACKTcb->tcb_cursend = CurrentTSR;
  1275. ACKTcb->tcb_sendbuf = CurrentTSR->tsr_buffer;
  1276. ACKTcb->tcb_sendofs = CurrentTSR->tsr_offset;
  1277. ACKTcb->tcb_sendsize = CurrentTSR->tsr_unasize;
  1278. } else {
  1279. ACKTcb->tcb_cursend = NULL;
  1280. ACKTcb->tcb_sendbuf = NULL;
  1281. ACKTcb->tcb_sendofs = 0;
  1282. ACKTcb->tcb_sendsize = 0;
  1283. }
  1284. ACKTcb->tcb_sendnext = SendUNA;
  1285. }
  1286. //
  1287. // Now update tcb_unacked with the amount we tried to ack minus the
  1288. // amount we didn't ack (Amount should be 0 or 1 here).
  1289. //
  1290. ASSERT(Amount == 0 || Amount == 1);
  1291. ACKTcb->tcb_unacked -= OrigAmount - Amount;
  1292. ASSERT(*(int *)&ACKTcb->tcb_unacked >= 0);
  1293. ACKTcb->tcb_senduna = SendUNA;
  1294. //
  1295. // If we've acked any here, LastCmplt will be non-null, and Current will
  1296. // point to the send that should be at the start of the queue. Splice
  1297. // out the completed ones and put them on the end of the send completed
  1298. // queue, and update the TCB send queue.
  1299. //
  1300. if (LastCmplt != NULL) {
  1301. Queue *FirstCmplt;
  1302. TCPSendReq *FirstTSR, *EndTSR;
  1303. ASSERT(!EMPTYQ(&ACKTcb->tcb_sendq));
  1304. FirstCmplt = QHEAD(&ACKTcb->tcb_sendq);
  1305. //
  1306. // If we've acked everything, just reinit the queue.
  1307. //
  1308. if (Current == End) {
  1309. INITQ(&ACKTcb->tcb_sendq);
  1310. } else {
  1311. //
  1312. // There's still something on the queue. Just update it.
  1313. //
  1314. ACKTcb->tcb_sendq.q_next = Current;
  1315. Current->q_prev = &ACKTcb->tcb_sendq;
  1316. }
  1317. CheckTCBSends(ACKTcb);
  1318. //
  1319. // Now walk down the lists of things acked. If the refcnt on the send
  1320. // is 0, go ahead and put him on the send complete Q. Otherwise set
  1321. // the ACKed bit in the send, and he'll be completed when the count
  1322. // goes to 0 in the transmit confirm.
  1323. //
  1324. // Note that we haven't done any locking here. This will probably
  1325. // need to change in the port to NT.
  1326. //
  1327. // Set FirstTSR to the first TSR we'll complete, and EndTSR to be
  1328. // the first TSR that isn't completed.
  1329. //
  1330. FirstTSR = CONTAINING_RECORD(QSTRUCT(TCPReq, FirstCmplt, tr_q),
  1331. TCPSendReq, tsr_req);
  1332. EndTSR = CONTAINING_RECORD(QSTRUCT(TCPReq, Current, tr_q),
  1333. TCPSendReq, tsr_req);
  1334. CHECK_STRUCT(FirstTSR, tsr);
  1335. ASSERT(FirstTSR != EndTSR);
  1336. //
  1337. // Now walk the list of ACKed TSRs. If we can complete one, put him
  1338. // on the complete queue.
  1339. //
  1340. KeAcquireSpinLockAtDpcLevel(&RequestCompleteLock);
  1341. while (FirstTSR != EndTSR) {
  1342. TempQ = QNEXT(&FirstTSR->tsr_req.tr_q);
  1343. CHECK_STRUCT(FirstTSR, tsr);
  1344. FirstTSR->tsr_req.tr_status = TDI_SUCCESS;
  1345. //
  1346. // The tsr_lastbuf->Next field is zapped to 0 when the tsr_refcnt
  1347. // goes to 0, so we don't need to do it here.
  1348. //
  1349. // Decrement the reference put on the send buffer when it was
  1350. // initialized indicating the send has been acknowledged.
  1351. //
  1352. Result = InterlockedDecrement(&(FirstTSR->tsr_refcnt));
  1353. ASSERT(Result >= 0);
  1354. if (Result <= 0) {
  1355. //
  1356. // No more references are outstanding, the send can be
  1357. // completed.
  1358. //
  1359. // If we've sent directly from this send, NULL out the next
  1360. // pointer for the last buffer in the chain.
  1361. //
  1362. if (FirstTSR->tsr_lastbuf != NULL) {
  1363. NDIS_BUFFER_LINKAGE(FirstTSR->tsr_lastbuf) = NULL;
  1364. FirstTSR->tsr_lastbuf = NULL;
  1365. }
  1366. ACKTcb->tcb_totaltime += (TCPTime - CurrentTSR->tsr_time);
  1367. Temp = ACKTcb->tcb_bcountlow;
  1368. ACKTcb->tcb_bcountlow += CurrentTSR->tsr_size;
  1369. ACKTcb->tcb_bcounthi += (Temp > ACKTcb->tcb_bcountlow ? 1 : 0);
  1370. ENQUEUE(&SendCompleteQ, &FirstTSR->tsr_req.tr_q);
  1371. }
  1372. FirstTSR = CONTAINING_RECORD(QSTRUCT(TCPReq, TempQ, tr_q),
  1373. TCPSendReq, tsr_req);
  1374. }
  1375. RequestCompleteFlags |= SEND_REQUEST_COMPLETE;
  1376. KeReleaseSpinLockFromDpcLevel(&RequestCompleteLock);
  1377. }
  1378. }
  1379. //* TrimPacket - Trim the leading edge of a Packet.
  1380. //
  1381. // A utility routine to trim the front of a Packet. We take in an amount
  1382. // to trim off (which may be 0) and adjust the pointer in the first buffer
  1383. // in the chain forward by that much. If there isn't that much in the first
  1384. // buffer, we move onto the next one. If we run out of buffers we'll return
  1385. // a pointer to the last buffer in the chain, with a size of 0. It's the
  1386. // caller's responsibility to catch this.
  1387. // REVIEW - Move this to subr.c?
  1388. //
  1389. IPv6Packet * // Returns: A pointer to the new start, or NULL.
  1390. TrimPacket(
  1391. IPv6Packet *Packet, // Packet to be trimmed.
  1392. uint TrimAmount) // Amount to be trimmed.
  1393. {
  1394. uint TrimThisTime;
  1395. ASSERT(Packet != NULL);
  1396. while (TrimAmount) {
  1397. ASSERT(Packet != NULL);
  1398. TrimThisTime = MIN(TrimAmount, Packet->ContigSize);
  1399. TrimAmount -= TrimThisTime;
  1400. Packet->Position += TrimThisTime;
  1401. (uchar *)Packet->Data += TrimThisTime;
  1402. Packet->TotalSize -= TrimThisTime;
  1403. if ((Packet->ContigSize -= TrimThisTime) == 0) {
  1404. //
  1405. // Ran out of space in current buffer.
  1406. // Check for possibility of more data buffers in current packet.
  1407. //
  1408. if (Packet->TotalSize != 0) {
  1409. //
  1410. // Get more contiguous data.
  1411. //
  1412. PacketPullupSubr(Packet, 0, 1, 0);
  1413. continue;
  1414. }
  1415. //
  1416. // Couldn't do a pullup, so see if there's another packet
  1417. // hanging on this chain.
  1418. //
  1419. if (Packet->Next != NULL) {
  1420. IPv6Packet *Temp;
  1421. //
  1422. // There's another packet following. Toss this one.
  1423. //
  1424. Temp = Packet;
  1425. Packet = Packet->Next;
  1426. Temp->Next = NULL;
  1427. FreePacketChain(Temp);
  1428. } else {
  1429. //
  1430. // Ran out of Packets. Just return this one.
  1431. //
  1432. break;
  1433. }
  1434. }
  1435. }
  1436. return Packet;
  1437. }
  1438. //* FreePacketChain - Free a Packet chain.
  1439. //
  1440. // Called to free a chain of IPv6Packets. Only want to free that which
  1441. // we (the TCP/IPv6 stack) have allocated. Don't try to free anything
  1442. // passed up to us from lower layers.
  1443. //
  1444. void // Returns: Nothing.
  1445. FreePacketChain(
  1446. IPv6Packet *Packet) // First Packet in chain to be freed.
  1447. {
  1448. void *Aux;
  1449. while (Packet != NULL) {
  1450. PacketPullupCleanup(Packet);
  1451. if (Packet->Flags & PACKET_OURS) {
  1452. IPv6Packet *Temp;
  1453. Temp = Packet;
  1454. Packet = Packet->Next;
  1455. ExFreePool(Temp);
  1456. } else
  1457. Packet = Packet->Next;
  1458. }
  1459. }
  1460. IPv6Packet DummyPacket;
  1461. //* PullFromRAQ - Pull segments from the reassembly queue.
  1462. //
  1463. // Called when we've received frames out of order, and have some segments
  1464. // on the reassembly queue. We'll walk down the reassembly list, segments
  1465. // that are overlapped by the current receive next variable. When we get
  1466. // to one that doesn't completely overlap we'll trim it to fit the next
  1467. // receive sequence number, and pull it from the queue.
  1468. //
  1469. IPv6Packet *
  1470. PullFromRAQ(
  1471. TCB *RcvTCB, // TCB to pull from.
  1472. TCPRcvInfo *RcvInfo, // TCPRcvInfo structure for current segment.
  1473. uint *Size) // Where to update the size of the current segment.
  1474. {
  1475. TCPRAHdr *CurrentTRH; // Current TCP RA Header being examined.
  1476. TCPRAHdr *TempTRH; // Temporary variable.
  1477. SeqNum NextSeq; // Next sequence number we want.
  1478. IPv6Packet *NewPacket; // Packet after trimming.
  1479. SeqNum NextTRHSeq; // Sequence number immediately after current TRH.
  1480. int Overlap; // Overlap between current TRH and NextSeq.
  1481. CHECK_STRUCT(RcvTCB, tcb);
  1482. CurrentTRH = RcvTCB->tcb_raq;
  1483. NextSeq = RcvTCB->tcb_rcvnext;
  1484. while (CurrentTRH != NULL) {
  1485. CHECK_STRUCT(CurrentTRH, trh);
  1486. ASSERT(!(CurrentTRH->trh_flags & TCP_FLAG_SYN));
  1487. if (SEQ_LT(NextSeq, CurrentTRH->trh_start)) {
  1488. #if DBG
  1489. *Size = 0;
  1490. #endif
  1491. return NULL; // The next TRH starts too far down.
  1492. }
  1493. NextTRHSeq = CurrentTRH->trh_start + CurrentTRH->trh_size +
  1494. ((CurrentTRH->trh_flags & TCP_FLAG_FIN) ? 1 : 0);
  1495. if (SEQ_GTE(NextSeq, NextTRHSeq)) {
  1496. //
  1497. // The current TRH is overlapped completely. Free it and continue.
  1498. //
  1499. FreePacketChain(CurrentTRH->trh_buffer);
  1500. TempTRH = CurrentTRH->trh_next;
  1501. ExFreePool(CurrentTRH);
  1502. CurrentTRH = TempTRH;
  1503. RcvTCB->tcb_raq = TempTRH;
  1504. if (TempTRH == NULL) {
  1505. //
  1506. // We've just cleaned off the RAQ. We can go back on the
  1507. // fast path now.
  1508. //
  1509. if (--(RcvTCB->tcb_slowcount) == 0) {
  1510. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  1511. CheckTCBRcv(RcvTCB);
  1512. }
  1513. break;
  1514. }
  1515. } else {
  1516. Overlap = NextSeq - CurrentTRH->trh_start;
  1517. RcvInfo->tri_seq = NextSeq;
  1518. RcvInfo->tri_flags = CurrentTRH->trh_flags;
  1519. RcvInfo->tri_urgent = CurrentTRH->trh_urg;
  1520. if (Overlap != (int) CurrentTRH->trh_size) {
  1521. NewPacket = TrimPacket(CurrentTRH->trh_buffer, Overlap);
  1522. *Size = CurrentTRH->trh_size - Overlap;
  1523. } else {
  1524. //
  1525. // This completely overlaps the data in this segment, but the
  1526. // sequence number doesn't overlap completely. There must
  1527. // be a FIN in the TRH. We'll just return some bogus value
  1528. // that nobody will look at with a size of 0.
  1529. //
  1530. FreePacketChain(CurrentTRH->trh_buffer);
  1531. ASSERT(CurrentTRH->trh_flags & TCP_FLAG_FIN);
  1532. NewPacket =&DummyPacket;
  1533. *Size = 0;
  1534. }
  1535. RcvTCB->tcb_raq = CurrentTRH->trh_next;
  1536. if (RcvTCB->tcb_raq == NULL) {
  1537. //
  1538. // We've just cleaned off the RAQ. We can go back on the
  1539. // fast path now.
  1540. //
  1541. if (--(RcvTCB->tcb_slowcount) == 0) {
  1542. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  1543. CheckTCBRcv(RcvTCB);
  1544. }
  1545. }
  1546. ExFreePool(CurrentTRH);
  1547. return NewPacket;
  1548. }
  1549. }
  1550. #if DBG
  1551. *Size = 0;
  1552. #endif
  1553. return NULL;
  1554. }
  1555. //* CreateTRH - Create a TCP reassembly header.
  1556. //
  1557. // This function tries to create a TCP reassembly header. We take as input
  1558. // a pointer to the previous TRH in the chain, the IPv6Packet to put on,
  1559. // etc. and try to create and link in a TRH. The caller must hold the lock
  1560. // on the TCB when this is called.
  1561. //
  1562. uint // Returns: TRUE if we created it, FALSE otherwise.
  1563. CreateTRH(
  1564. TCPRAHdr *PrevTRH, // TRH to insert after.
  1565. IPv6Packet *Packet, // IP Packet chain.
  1566. TCPRcvInfo *RcvInfo, // RcvInfo for this TRH.
  1567. int Size) // Size in bytes of data.
  1568. {
  1569. TCPRAHdr *NewTRH;
  1570. IPv6Packet *NewPacket;
  1571. ASSERT((Size > 0) || (RcvInfo->tri_flags & TCP_FLAG_FIN));
  1572. NewTRH = ExAllocatePoolWithTagPriority(NonPagedPool, sizeof(TCPRAHdr),
  1573. TCP6_TAG, LowPoolPriority);
  1574. if (NewTRH == NULL)
  1575. return FALSE;
  1576. NewPacket = ExAllocatePoolWithTagPriority(NonPagedPool,
  1577. sizeof(IPv6Packet) + Size,
  1578. TCP6_TAG, LowPoolPriority);
  1579. if (NewPacket == NULL) {
  1580. ExFreePool(NewTRH);
  1581. return FALSE;
  1582. }
  1583. #if DBG
  1584. NewTRH->trh_sig = trh_signature;
  1585. #endif
  1586. NewPacket->Next = NULL;
  1587. NewPacket->Position = 0;
  1588. NewPacket->FlatData = (uchar *)(NewPacket + 1);
  1589. NewPacket->Data = NewPacket->FlatData;
  1590. NewPacket->ContigSize = (uint)Size;
  1591. NewPacket->TotalSize = (uint)Size;
  1592. NewPacket->NdisPacket = NULL;
  1593. NewPacket->AuxList = NULL;
  1594. NewPacket->Flags = PACKET_OURS;
  1595. if (Size != 0)
  1596. CopyPacketToBuffer(NewPacket->Data, Packet, Size, Packet->Position);
  1597. NewTRH->trh_start = RcvInfo->tri_seq;
  1598. NewTRH->trh_flags = RcvInfo->tri_flags;
  1599. NewTRH->trh_size = Size;
  1600. NewTRH->trh_urg = RcvInfo->tri_urgent;
  1601. NewTRH->trh_buffer = NewPacket;
  1602. NewTRH->trh_end = NewPacket;
  1603. NewTRH->trh_next = PrevTRH->trh_next;
  1604. PrevTRH->trh_next = NewTRH;
  1605. return TRUE;
  1606. }
  1607. //* PutOnRAQ - Put a segment on the reassembly queue.
  1608. //
  1609. // Called during segment reception to put a segment on the reassembly
  1610. // queue. We try to use as few reassembly headers as possible, so if this
  1611. // segment has some overlap with an existing entry in the queue we'll just
  1612. // update the existing entry. If there is no overlap we'll create a new
  1613. // reassembly header. Combining URGENT data with non-URGENT data is tricky.
  1614. // If we get a segment that has urgent data that overlaps the front of a
  1615. // reassembly header we'll always mark the whole chunk as urgent - the value
  1616. // of the urgent pointer will mark the end of urgent data, so this is OK.
  1617. // If it only overlaps at the end, however, we won't combine, since we would
  1618. // have to mark previously non-urgent data as urgent. We'll trim the
  1619. // front of the incoming segment and create a new reassembly header. Also,
  1620. // if we have non-urgent data that overlaps at the front of a reassembly
  1621. // header containing urgent data we can't combine these two, since again we
  1622. // would mark non-urgent data as urgent.
  1623. // Our search will stop if we find an entry with a FIN.
  1624. // We assume that the TCB lock is held by the caller.
  1625. //
  1626. uint // Returns: TRUE if successful, FALSE otherwise.
  1627. PutOnRAQ(
  1628. TCB *RcvTCB, // TCB on which to reassemble.
  1629. TCPRcvInfo *RcvInfo, // RcvInfo for new segment.
  1630. IPv6Packet *Packet, // Packet chain for this segment.
  1631. uint Size) // Size in bytes of data in this segment.
  1632. {
  1633. TCPRAHdr *PrevTRH; // Previous reassembly header.
  1634. TCPRAHdr *CurrentTRH; // Current reassembly header.
  1635. SeqNum NextSeq; // Seq num of 1st byte after seg being reassembled.
  1636. SeqNum NextTRHSeq; // Sequence number of 1st byte after current TRH.
  1637. uint Created;
  1638. CHECK_STRUCT(RcvTCB, tcb);
  1639. ASSERT(RcvTCB->tcb_rcvnext != RcvInfo->tri_seq);
  1640. ASSERT(!(RcvInfo->tri_flags & TCP_FLAG_SYN));
  1641. NextSeq = RcvInfo->tri_seq + Size +
  1642. ((RcvInfo->tri_flags & TCP_FLAG_FIN) ? 1 : 0);
  1643. PrevTRH = CONTAINING_RECORD(&RcvTCB->tcb_raq, TCPRAHdr, trh_next);
  1644. CurrentTRH = PrevTRH->trh_next;
  1645. //
  1646. // Walk down the reassembly queue, looking for the correct place to
  1647. // insert this, until we hit the end.
  1648. //
  1649. while (CurrentTRH != NULL) {
  1650. CHECK_STRUCT(CurrentTRH, trh);
  1651. ASSERT(!(CurrentTRH->trh_flags & TCP_FLAG_SYN));
  1652. NextTRHSeq = CurrentTRH->trh_start + CurrentTRH->trh_size +
  1653. ((CurrentTRH->trh_flags & TCP_FLAG_FIN) ? 1 : 0);
  1654. //
  1655. // First, see if it starts beyond the end of the current TRH.
  1656. //
  1657. if (SEQ_LTE(RcvInfo->tri_seq, NextTRHSeq)) {
  1658. //
  1659. // We know the incoming segment doesn't start beyond the end
  1660. // of this TRH, so we'll either create a new TRH in front of
  1661. // this one or we'll merge the new segment onto this TRH.
  1662. // If the end of the current segment is in front of the start
  1663. // of the current TRH, we'll need to create a new TRH. Otherwise
  1664. // we'll merge these two.
  1665. //
  1666. if (SEQ_LT(NextSeq, CurrentTRH->trh_start))
  1667. break;
  1668. else {
  1669. //
  1670. // There's some overlap. If there's actually data in the
  1671. // incoming segment we'll merge it.
  1672. //
  1673. if (Size != 0) {
  1674. int FrontOverlap, BackOverlap;
  1675. IPv6Packet *NewPacket;
  1676. //
  1677. // We need to merge. If there's a FIN on the incoming
  1678. // segment that would fall inside this current TRH, we
  1679. // have a protocol violation from the remote peer. In
  1680. // this case just return, discarding the incoming segment.
  1681. //
  1682. if ((RcvInfo->tri_flags & TCP_FLAG_FIN) &&
  1683. SEQ_LTE(NextSeq, NextTRHSeq))
  1684. return TRUE;
  1685. //
  1686. // We have some overlap. Figure out how much.
  1687. //
  1688. FrontOverlap = CurrentTRH->trh_start - RcvInfo->tri_seq;
  1689. if (FrontOverlap > 0) {
  1690. //
  1691. // Have overlap in front. Allocate an IPv6Packet to
  1692. // to hold it, and copy it, unless we would have to
  1693. // combine non-urgent with urgent.
  1694. //
  1695. if (!(RcvInfo->tri_flags & TCP_FLAG_URG) &&
  1696. (CurrentTRH->trh_flags & TCP_FLAG_URG)) {
  1697. if (CreateTRH(PrevTRH, Packet, RcvInfo,
  1698. CurrentTRH->trh_start - RcvInfo->tri_seq)) {
  1699. PrevTRH = PrevTRH->trh_next;
  1700. CurrentTRH = PrevTRH->trh_next;
  1701. }
  1702. FrontOverlap = 0;
  1703. } else {
  1704. NewPacket = ExAllocatePoolWithTagPriority(
  1705. NonPagedPool,
  1706. sizeof(IPv6Packet) + FrontOverlap,
  1707. TCP6_TAG, LowPoolPriority);
  1708. if (NewPacket == NULL) {
  1709. // Couldn't allocate memory.
  1710. return TRUE;
  1711. }
  1712. NewPacket->Position = 0;
  1713. NewPacket->FlatData = (uchar *)(NewPacket + 1);
  1714. NewPacket->Data = NewPacket->FlatData;
  1715. NewPacket->ContigSize = FrontOverlap;
  1716. NewPacket->TotalSize = FrontOverlap;
  1717. NewPacket->NdisPacket = NULL;
  1718. NewPacket->AuxList = NULL;
  1719. NewPacket->Flags = PACKET_OURS;
  1720. CopyPacketToBuffer(NewPacket->Data, Packet,
  1721. FrontOverlap, Packet->Position);
  1722. CurrentTRH->trh_size += FrontOverlap;
  1723. //
  1724. // Put our new packet on the front of this
  1725. // reassembly header's packet list.
  1726. //
  1727. NewPacket->Next = CurrentTRH->trh_buffer;
  1728. CurrentTRH->trh_buffer = NewPacket;
  1729. CurrentTRH->trh_start = RcvInfo->tri_seq;
  1730. }
  1731. }
  1732. //
  1733. // We've updated the starting sequence number of this TRH
  1734. // if we needed to. Now look for back overlap. There
  1735. // can't be any back overlap if the current TRH has a FIN.
  1736. // Also we'll need to check for urgent data if there is
  1737. // back overlap.
  1738. //
  1739. if (!(CurrentTRH->trh_flags & TCP_FLAG_FIN)) {
  1740. BackOverlap = RcvInfo->tri_seq + Size - NextTRHSeq;
  1741. if ((BackOverlap > 0) &&
  1742. (RcvInfo->tri_flags & TCP_FLAG_URG) &&
  1743. !(CurrentTRH->trh_flags & TCP_FLAG_URG) &&
  1744. (FrontOverlap <= 0)) {
  1745. int AmountToTrim;
  1746. //
  1747. // The incoming segment has urgent data and
  1748. // overlaps on the back but not the front, and the
  1749. // current TRH has no urgent data. We can't
  1750. // combine into this TRH, so trim the front of the
  1751. // incoming segment to NextTRHSeq and move to the
  1752. // next TRH.
  1753. AmountToTrim = NextTRHSeq - RcvInfo->tri_seq;
  1754. ASSERT(AmountToTrim >= 0);
  1755. ASSERT(AmountToTrim < (int) Size);
  1756. Packet = TrimPacket(Packet, (uint)AmountToTrim);
  1757. RcvInfo->tri_seq += AmountToTrim;
  1758. RcvInfo->tri_urgent -= AmountToTrim;
  1759. PrevTRH = CurrentTRH;
  1760. CurrentTRH = PrevTRH->trh_next;
  1761. Size -= AmountToTrim;
  1762. continue;
  1763. }
  1764. } else
  1765. BackOverlap = 0;
  1766. //
  1767. // Now if we have back overlap, copy it.
  1768. //
  1769. if (BackOverlap > 0) {
  1770. //
  1771. // We have back overlap. Get a buffer to copy it into.
  1772. // If we can't get one, we won't just return, because
  1773. // we may have updated the front and may need to
  1774. // update the urgent info.
  1775. //
  1776. NewPacket = ExAllocatePoolWithTagPriority(
  1777. NonPagedPool,
  1778. sizeof(IPv6Packet) + BackOverlap,
  1779. TCP6_TAG, LowPoolPriority);
  1780. if (NewPacket != NULL) {
  1781. // Allocation succeeded.
  1782. NewPacket->Position = 0;
  1783. NewPacket->FlatData = (uchar *)(NewPacket + 1);
  1784. NewPacket->Data = NewPacket->FlatData;
  1785. NewPacket->ContigSize = BackOverlap;
  1786. NewPacket->TotalSize = BackOverlap;
  1787. NewPacket->NdisPacket = NULL;
  1788. NewPacket->AuxList = NULL;
  1789. NewPacket->Flags = PACKET_OURS;
  1790. CopyPacketToBuffer(NewPacket->Data, Packet,
  1791. BackOverlap, Packet->Position +
  1792. NextTRHSeq - RcvInfo->tri_seq);
  1793. CurrentTRH->trh_size += BackOverlap;
  1794. NewPacket->Next = CurrentTRH->trh_end->Next;
  1795. CurrentTRH->trh_end->Next = NewPacket;
  1796. CurrentTRH->trh_end = NewPacket;
  1797. //
  1798. // This segment could also have FIN set.
  1799. // If it does, set the TRH flag.
  1800. //
  1801. // N.B. If there's another reassembly header after
  1802. // the current one, the data that we're about to
  1803. // put on the current header might already be
  1804. // on that subsequent header which, in that event,
  1805. // will already have the FIN flag set.
  1806. // Check for that case before recording the FIN.
  1807. //
  1808. if ((RcvInfo->tri_flags & TCP_FLAG_FIN) &&
  1809. !CurrentTRH->trh_next) {
  1810. CurrentTRH->trh_flags |= TCP_FLAG_FIN;
  1811. }
  1812. }
  1813. }
  1814. //
  1815. // Everything should be consistent now. If there's an
  1816. // urgent data pointer in the incoming segment, update the
  1817. // one in the TRH now.
  1818. //
  1819. if (RcvInfo->tri_flags & TCP_FLAG_URG) {
  1820. SeqNum UrgSeq;
  1821. //
  1822. // Have an urgent pointer. If the current TRH already
  1823. // has an urgent pointer, see which is bigger.
  1824. // Otherwise just use this one.
  1825. //
  1826. UrgSeq = RcvInfo->tri_seq + RcvInfo->tri_urgent;
  1827. if (CurrentTRH->trh_flags & TCP_FLAG_URG) {
  1828. SeqNum TRHUrgSeq;
  1829. TRHUrgSeq = CurrentTRH->trh_start +
  1830. CurrentTRH->trh_urg;
  1831. if (SEQ_LT(UrgSeq, TRHUrgSeq))
  1832. UrgSeq = TRHUrgSeq;
  1833. } else
  1834. CurrentTRH->trh_flags |= TCP_FLAG_URG;
  1835. CurrentTRH->trh_urg = UrgSeq - CurrentTRH->trh_start;
  1836. }
  1837. } else {
  1838. //
  1839. // We have a 0 length segment. The only interesting thing
  1840. // here is if there's a FIN on the segment. If there is,
  1841. // and the seq. # of the incoming segment is exactly after
  1842. // the current TRH, OR matches the FIN in the current TRH,
  1843. // we note it.
  1844. if (RcvInfo->tri_flags & TCP_FLAG_FIN) {
  1845. if (!(CurrentTRH->trh_flags & TCP_FLAG_FIN)) {
  1846. if (SEQ_EQ(NextTRHSeq, RcvInfo->tri_seq))
  1847. CurrentTRH->trh_flags |= TCP_FLAG_FIN;
  1848. else
  1849. KdBreakPoint();
  1850. }
  1851. else {
  1852. if (!(SEQ_EQ((NextTRHSeq-1), RcvInfo->tri_seq))) {
  1853. KdBreakPoint();
  1854. }
  1855. }
  1856. }
  1857. }
  1858. return TRUE;
  1859. }
  1860. } else {
  1861. //
  1862. // Look at the next TRH, unless the current TRH has a FIN. If he
  1863. // has a FIN, we won't save any data beyond that anyway.
  1864. //
  1865. if (CurrentTRH->trh_flags & TCP_FLAG_FIN)
  1866. return TRUE;
  1867. PrevTRH = CurrentTRH;
  1868. CurrentTRH = PrevTRH->trh_next;
  1869. }
  1870. }
  1871. //
  1872. // When we get here, we need to create a new TRH. If we create one and
  1873. // there was previously nothing on the reassembly queue, we'll have to
  1874. // move off the fast receive path.
  1875. //
  1876. CurrentTRH = RcvTCB->tcb_raq;
  1877. Created = CreateTRH(PrevTRH, Packet, RcvInfo, (int)Size);
  1878. if (Created && CurrentTRH == NULL) {
  1879. RcvTCB->tcb_slowcount++;
  1880. RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  1881. CheckTCBRcv(RcvTCB);
  1882. } else if (!Created) {
  1883. return FALSE;
  1884. }
  1885. return TRUE;
  1886. }
  1887. //* HandleFastXmit - Handles fast retransmit algorithm. See RFC 2581.
  1888. //
  1889. // Called by TCPReceive to determine if we should retransmit a segment
  1890. // without waiting for retransmit timeout to fire.
  1891. //
  1892. BOOLEAN // Returns: TRUE if the segment got retransmitted, FALSE otherwise.
  1893. HandleFastXmit(
  1894. TCB *RcvTCB, // Connection context for this receive.
  1895. TCPRcvInfo *RcvInfo) // Pointer to rcvd TCP Header information.
  1896. {
  1897. uint CWin;
  1898. RcvTCB->tcb_dupacks++;
  1899. if (RcvTCB->tcb_dupacks == MaxDupAcks) {
  1900. //
  1901. // We're going to do a fast retransmit.
  1902. // Stop the retransmit timer and any round-trip time
  1903. // calculations we might have been running.
  1904. //
  1905. STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
  1906. RcvTCB->tcb_rtt = 0;
  1907. if (!(RcvTCB->tcb_flags & FLOW_CNTLD)) {
  1908. //
  1909. // Don't let the slow start threshold go
  1910. // below 2 segments.
  1911. //
  1912. RcvTCB->tcb_ssthresh =
  1913. MAX(MIN(RcvTCB->tcb_cwin, RcvTCB->tcb_sendwin) / 2,
  1914. (uint) RcvTCB->tcb_mss * 2);
  1915. }
  1916. //
  1917. // Inflate the congestion window by the number of segments
  1918. // which have presumably left the network.
  1919. //
  1920. CWin = RcvTCB->tcb_ssthresh + (MaxDupAcks * RcvTCB->tcb_mss);
  1921. //
  1922. // Recall the segment in question and send it out.
  1923. // Note that tcb_lock will be dereferenced by the caller.
  1924. //
  1925. ResetAndFastSend(RcvTCB, RcvTCB->tcb_senduna, CWin);
  1926. return TRUE;
  1927. } else {
  1928. int SendWin;
  1929. uint AmtOutstanding;
  1930. //
  1931. // REVIEW: At least the first part of this check is redundant.
  1932. //
  1933. if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo->tri_ack) &&
  1934. (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo->tri_seq) ||
  1935. (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo->tri_seq) &&
  1936. SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo->tri_ack)))) {
  1937. RcvTCB->tcb_sendwin = RcvInfo->tri_window;
  1938. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin, RcvInfo->tri_window);
  1939. RcvTCB->tcb_sendwl1 = RcvInfo->tri_seq;
  1940. RcvTCB->tcb_sendwl2 = RcvInfo->tri_ack;
  1941. }
  1942. if (RcvTCB->tcb_dupacks > MaxDupAcks) {
  1943. //
  1944. // Update the congestion window to reflect the fact that the
  1945. // duplicate ack presumably indicates that the previous frame
  1946. // was received by our peer and has thus left the network.
  1947. //
  1948. RcvTCB->tcb_cwin += RcvTCB->tcb_mss;
  1949. }
  1950. //
  1951. // Check if we need to set tcb_force.
  1952. //
  1953. if ((RcvTCB->tcb_cwin + RcvTCB->tcb_mss) < RcvTCB->tcb_sendwin) {
  1954. AmtOutstanding = (uint)(RcvTCB->tcb_sendnext -
  1955. RcvTCB->tcb_senduna);
  1956. SendWin = (int)(MIN(RcvTCB->tcb_sendwin, RcvTCB->tcb_cwin) -
  1957. AmtOutstanding);
  1958. if (SendWin < RcvTCB->tcb_mss) {
  1959. RcvTCB->tcb_force = 1;
  1960. }
  1961. }
  1962. }
  1963. return FALSE;
  1964. }
  1965. //* TCPReceive - Receive an incoming TCP segment.
  1966. //
  1967. // This is the routine called by IPv6 when we need to receive a TCP segment.
  1968. // In general, we follow the RFC 793 event processing section pretty closely,
  1969. // but there is a 'fast path' where we make some quick checks on the incoming
  1970. // segment, and if it matches we deliver it immediately.
  1971. //
  1972. uchar // Returns: next header value (always IP_PROTOCOL_NONE for TCP).
  1973. TCPReceive(
  1974. IPv6Packet *Packet) // Packet IP handed up to us.
  1975. {
  1976. NetTableEntry *NTE;
  1977. TCPHeader UNALIGNED *TCP; // The TCP header.
  1978. uint DataOffset; // Offset from start of TCP header to data.
  1979. ushort Checksum;
  1980. TCPRcvInfo RcvInfo; // Local swapped copy of receive info.
  1981. uint SrcScopeId; // Scope id of remote address, if applicable.
  1982. uint DestScopeId; // Scope id of local address, if applicable.
  1983. TCB *RcvTCB; // TCB on which to receive the packet.
  1984. uint Inserted;
  1985. uint Actions; // Flags for future actions to be performed.
  1986. uint BytesTaken;
  1987. uint NewSize;
  1988. BOOLEAN UseIsn = FALSE;
  1989. SeqNum Isn = 0;
  1990. uint UpdateWindow;
  1991. //
  1992. // REVIEW: Expediency hacks to get something working.
  1993. //
  1994. uint Size; // Probably safe to just change name to PayloadLength below.
  1995. //
  1996. // TCP only works with unicast addresses. If this packet was
  1997. // received on a unicast address, then Packet->NTEorIF will be an
  1998. // NTE. So drop packets if we don't have an NTE.
  1999. // (IPv6HeaderReceive checks validity.) But the converse isn't
  2000. // true, we could have an NTE here that is associated with the
  2001. // anycast/multicast address we received the packet on. So to
  2002. // guard against that, we verify that our NTE's address is the
  2003. // destination given in the packet.
  2004. //
  2005. if (!IsNTE(Packet->NTEorIF) ||
  2006. !IP6_ADDR_EQUAL(AlignAddr(&Packet->IP->Dest),
  2007. &(NTE = CastToNTE(Packet->NTEorIF))->Address)) {
  2008. // Packet's destination was not a valid unicast address of ours.
  2009. return IP_PROTOCOL_NONE; // Drop packet.
  2010. }
  2011. TStats.ts_insegs++;
  2012. //
  2013. // Verify that we have enough contiguous data to overlay a TCPHeader
  2014. // structure on the incoming packet. Then do so.
  2015. //
  2016. if (! PacketPullup(Packet, sizeof(TCPHeader), 1, 0)) {
  2017. // Pullup failed.
  2018. TStats.ts_inerrs++;
  2019. if (Packet->TotalSize < sizeof(TCPHeader)) {
  2020. BadPayloadLength:
  2021. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_BAD_PACKET,
  2022. "TCPv6: data buffer too small to contain TCP header\n"));
  2023. ICMPv6SendError(Packet,
  2024. ICMPv6_PARAMETER_PROBLEM,
  2025. ICMPv6_ERRONEOUS_HEADER_FIELD,
  2026. FIELD_OFFSET(IPv6Header, PayloadLength),
  2027. IP_PROTOCOL_NONE, FALSE);
  2028. }
  2029. return IP_PROTOCOL_NONE; // Drop packet.
  2030. }
  2031. TCP = (TCPHeader UNALIGNED *)Packet->Data;
  2032. //
  2033. // Verify checksum.
  2034. //
  2035. Checksum = ChecksumPacket(Packet->NdisPacket, Packet->Position,
  2036. Packet->FlatData, Packet->TotalSize,
  2037. Packet->SrcAddr, AlignAddr(&Packet->IP->Dest),
  2038. IP_PROTOCOL_TCP);
  2039. if (Checksum != 0xffff) {
  2040. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NET_ERROR,
  2041. "TCPv6: Checksum failed %0x\n", Checksum));
  2042. TStats.ts_inerrs++;
  2043. return IP_PROTOCOL_NONE; // Drop packet.
  2044. }
  2045. //
  2046. // Now that we can read the header, pull out the header length field.
  2047. // Verify that we have enough contiguous data to hold any TCP options
  2048. // that may be present in the header, and skip over the entire header.
  2049. //
  2050. DataOffset = TCP_HDR_SIZE(TCP);
  2051. if (! PacketPullup(Packet, DataOffset, 1, 0)) {
  2052. TStats.ts_inerrs++;
  2053. if (Packet->TotalSize < DataOffset)
  2054. goto BadPayloadLength;
  2055. return IP_PROTOCOL_NONE; // Drop packet.
  2056. }
  2057. TCP = (TCPHeader UNALIGNED *)Packet->Data;
  2058. AdjustPacketParams(Packet, DataOffset);
  2059. Size = Packet->TotalSize;
  2060. //
  2061. // Verify IPSec was performed.
  2062. //
  2063. if (InboundSecurityCheck(Packet, IP_PROTOCOL_TCP, net_short(TCP->tcp_src),
  2064. net_short(TCP->tcp_dest), NTE->IF) != TRUE) {
  2065. //
  2066. // No policy was found or the policy indicated to drop the packet.
  2067. //
  2068. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NET_ERROR,
  2069. "TCPReceive: IPSec Policy caused packet to be dropped\n"));
  2070. return IP_PROTOCOL_NONE; // Drop packet.
  2071. }
  2072. //
  2073. // The packet is valid.
  2074. // Get the info we need and byte swap it.
  2075. //
  2076. RcvInfo.tri_seq = net_long(TCP->tcp_seq);
  2077. RcvInfo.tri_ack = net_long(TCP->tcp_ack);
  2078. RcvInfo.tri_window = (uint)net_short(TCP->tcp_window);
  2079. RcvInfo.tri_urgent = (uint)net_short(TCP->tcp_urgent);
  2080. RcvInfo.tri_flags = (uint)TCP->tcp_flags;
  2081. //
  2082. // Determine the appropriate scope id for our packet's addresses.
  2083. // Note that multicast addresses were forbidden above.
  2084. // We use DetermineScopeId instead of just indexing into ZoneIndices
  2085. // because we need the "user-level" scope id here.
  2086. //
  2087. SrcScopeId = DetermineScopeId(Packet->SrcAddr, NTE->IF);
  2088. DestScopeId = DetermineScopeId(&NTE->Address, NTE->IF);
  2089. //
  2090. // See if we have a TCP Control Block for this connection.
  2091. //
  2092. KeAcquireSpinLockAtDpcLevel(&TCBTableLock);
  2093. RcvTCB = FindTCB(AlignAddr(&Packet->IP->Dest), Packet->SrcAddr,
  2094. DestScopeId, SrcScopeId, TCP->tcp_dest, TCP->tcp_src);
  2095. if (RcvTCB == NULL) {
  2096. //
  2097. // Didn't find a matching TCB, which means incoming segment doesn't
  2098. // belong to an existing connection.
  2099. //
  2100. KeReleaseSpinLockFromDpcLevel(&TCBTableLock);
  2101. //
  2102. // Make sure that the source address is reasonable
  2103. // before proceeding.
  2104. //
  2105. ASSERT(!IsInvalidSourceAddress(Packet->SrcAddr));
  2106. if (IsUnspecified(Packet->SrcAddr)) {
  2107. return IP_PROTOCOL_NONE;
  2108. }
  2109. //
  2110. // If this segment carries a SYN (and only a SYN), it's a
  2111. // connection initiation request.
  2112. //
  2113. if ((RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_ACK |
  2114. TCP_FLAG_RST)) == TCP_FLAG_SYN) {
  2115. AddrObj *AO;
  2116. ValidNewConnectionRequest:
  2117. //
  2118. // If the firewall is enabled on the arrival interface,
  2119. // drop the SYN without sending a RST.
  2120. //
  2121. // TODO: This is a very simplistic heuristic that
  2122. // should eventually be replaced with a port map to filter
  2123. // against.
  2124. //
  2125. if (NTE->IF->Flags & IF_FLAG_FIREWALL_ENABLED) {
  2126. return IP_PROTOCOL_NONE;
  2127. }
  2128. //
  2129. // We need to look for a matching address object.
  2130. // Want match for local address (+ scope id for scoped addresses),
  2131. // port and protocol.
  2132. //
  2133. KeAcquireSpinLockAtDpcLevel(&AddrObjTableLock);
  2134. AO = GetBestAddrObj(AlignAddr(&Packet->IP->Dest), Packet->SrcAddr,
  2135. DestScopeId, TCP->tcp_dest,
  2136. IP_PROTOCOL_TCP, NTE->IF);
  2137. if (AO == NULL) {
  2138. //
  2139. // No address object. Free the lock, and send a RST.
  2140. //
  2141. KeReleaseSpinLockFromDpcLevel(&AddrObjTableLock);
  2142. goto SendReset;
  2143. }
  2144. //
  2145. // Found an AO. See if it has a listen indication.
  2146. // FindListenConn will free the lock on the AddrObjTable.
  2147. //
  2148. RcvTCB = FindListenConn(AO, Packet->SrcAddr, SrcScopeId,
  2149. TCP->tcp_src);
  2150. if (RcvTCB == NULL) {
  2151. //
  2152. // No listening connection. AddrObjTableLock was
  2153. // released by FindListenConn. Just send a RST.
  2154. //
  2155. goto SendReset;
  2156. }
  2157. CHECK_STRUCT(RcvTCB, tcb);
  2158. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2159. //
  2160. // We found a listening connection. Initialize
  2161. // it now, and if it is actually to be accepted
  2162. // we'll send a SYN-ACK also.
  2163. //
  2164. ASSERT(RcvTCB->tcb_state == TCB_SYN_RCVD);
  2165. RcvTCB->tcb_daddr = *Packet->SrcAddr;
  2166. RcvTCB->tcb_saddr = Packet->IP->Dest;
  2167. RcvTCB->tcb_dscope_id = SrcScopeId;
  2168. RcvTCB->tcb_sscope_id = DestScopeId;
  2169. RcvTCB->tcb_dport = TCP->tcp_src;
  2170. RcvTCB->tcb_sport = TCP->tcp_dest;
  2171. RcvTCB->tcb_rcvnext = ++RcvInfo.tri_seq;
  2172. RcvTCB->tcb_rcvwinwatch = RcvTCB->tcb_rcvnext;
  2173. if (UseIsn) {
  2174. RcvTCB->tcb_sendnext = Isn;
  2175. } else {
  2176. GetRandomISN(&RcvTCB->tcb_sendnext,
  2177. (uchar*)&RcvTCB->tcb_md5data);
  2178. }
  2179. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  2180. RcvTCB->tcb_remmss = FindMSS(TCP);
  2181. TStats.ts_passiveopens++;
  2182. RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
  2183. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2184. Inserted = InsertTCB(RcvTCB);
  2185. //
  2186. // Get the lock on it, and see if it's been accepted.
  2187. //
  2188. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2189. if (!Inserted) {
  2190. // Couldn't insert it!.
  2191. CompleteConnReq(RcvTCB, TDI_CONNECTION_ABORTED);
  2192. TryToCloseTCB(RcvTCB, TCB_CLOSE_ABORTED, DISPATCH_LEVEL);
  2193. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2194. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2195. return IP_PROTOCOL_NONE;
  2196. }
  2197. RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
  2198. if (RcvTCB->tcb_flags & SEND_AFTER_RCV) {
  2199. RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
  2200. DelayAction(RcvTCB, NEED_OUTPUT);
  2201. }
  2202. if (RcvTCB->tcb_flags & CONN_ACCEPTED) {
  2203. //
  2204. // The connection was accepted. Finish the
  2205. // initialization, and send the SYN ack.
  2206. //
  2207. AcceptConn(RcvTCB, DISPATCH_LEVEL);
  2208. return IP_PROTOCOL_NONE;
  2209. } else {
  2210. //
  2211. // We don't know what to do about the
  2212. // connection yet. Return the pending listen,
  2213. // dereference the connection, and return.
  2214. //
  2215. CompleteConnReq(RcvTCB, TDI_SUCCESS);
  2216. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2217. return IP_PROTOCOL_NONE;
  2218. }
  2219. }
  2220. SendReset:
  2221. //
  2222. // Not a SYN, no AddrObj available, or port filtered.
  2223. // Send a RST back to the sender.
  2224. //
  2225. SendRSTFromHeader(TCP, Packet->TotalSize, Packet->SrcAddr, SrcScopeId,
  2226. AlignAddr(&Packet->IP->Dest), DestScopeId);
  2227. return IP_PROTOCOL_NONE;
  2228. }
  2229. //
  2230. // We found a matching TCB. Get the lock on it, and continue.
  2231. //
  2232. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2233. KeReleaseSpinLockFromDpcLevel(&TCBTableLock);
  2234. //
  2235. // Do the fast path check. We can hit the fast path if the incoming
  2236. // sequence number matches our receive next and the masked flags
  2237. // match our 'predicted' flags.
  2238. //
  2239. CheckTCBRcv(RcvTCB);
  2240. RcvTCB->tcb_alive = TCPTime;
  2241. if (RcvTCB->tcb_rcvnext == RcvInfo.tri_seq &&
  2242. (RcvInfo.tri_flags & TCP_FLAGS_ALL) == RcvTCB->tcb_fastchk) {
  2243. Actions = 0;
  2244. RcvTCB->tcb_refcnt++;
  2245. //
  2246. // The fast path. We know all we have to do here is ack sends and
  2247. // deliver data. First try and ack data.
  2248. //
  2249. if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  2250. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  2251. uint CWin;
  2252. uint MSS;
  2253. //
  2254. // The ack acknowledges something. Pull the
  2255. // appropriate amount off the send q.
  2256. //
  2257. ACKData(RcvTCB, RcvInfo.tri_ack);
  2258. //
  2259. // If this acknowledges something we were running a RTT on,
  2260. // update that stuff now.
  2261. //
  2262. if (RcvTCB->tcb_rtt != 0 && SEQ_GT(RcvInfo.tri_ack,
  2263. RcvTCB->tcb_rttseq)) {
  2264. short RTT;
  2265. RTT = (short)(TCPTime - RcvTCB->tcb_rtt);
  2266. RcvTCB->tcb_rtt = 0;
  2267. RTT -= (RcvTCB->tcb_smrtt >> 3);
  2268. RcvTCB->tcb_smrtt += RTT;
  2269. RTT = (RTT >= 0 ? RTT : -RTT);
  2270. RTT -= (RcvTCB->tcb_delta >> 3);
  2271. RcvTCB->tcb_delta += RTT + RTT;
  2272. RcvTCB->tcb_rexmit = MIN(MAX(REXMIT_TO(RcvTCB),
  2273. MIN_RETRAN_TICKS),
  2274. MAX_REXMIT_TO);
  2275. }
  2276. if ((RcvTCB->tcb_dupacks >= MaxDupAcks) &&
  2277. ((int)RcvTCB->tcb_ssthresh > 0)) {
  2278. //
  2279. // We were in fast retransmit mode, so this ACK is for
  2280. // our fast retransmitted frame. Set cwin to ssthresh
  2281. // so that cwin grows linearly from here.
  2282. //
  2283. RcvTCB->tcb_cwin = RcvTCB->tcb_ssthresh;
  2284. } else {
  2285. //
  2286. // Update the congestion window now.
  2287. //
  2288. CWin = RcvTCB->tcb_cwin;
  2289. MSS = RcvTCB->tcb_mss;
  2290. if (CWin < RcvTCB->tcb_maxwin) {
  2291. if (CWin < RcvTCB->tcb_ssthresh)
  2292. CWin += MSS;
  2293. else
  2294. CWin += (MSS * MSS)/CWin;
  2295. RcvTCB->tcb_cwin = CWin;
  2296. }
  2297. }
  2298. ASSERT(*(int *)&RcvTCB->tcb_cwin > 0);
  2299. //
  2300. // Since this isn't a duplicate ACK, reset the counter.
  2301. //
  2302. RcvTCB->tcb_dupacks = 0;
  2303. //
  2304. // We've acknowledged something, so reset the rexmit count.
  2305. // If there's still stuff outstanding, restart the rexmit
  2306. // timer.
  2307. //
  2308. RcvTCB->tcb_rexmitcnt = 0;
  2309. if (SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax))
  2310. STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
  2311. else
  2312. START_TCB_TIMER(RcvTCB->tcb_rexmittimer, RcvTCB->tcb_rexmit);
  2313. //
  2314. // Since we've acknowledged data, we need to update the window.
  2315. //
  2316. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  2317. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin, RcvInfo.tri_window);
  2318. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  2319. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  2320. //
  2321. // We've updated the window, remember to send some more.
  2322. //
  2323. Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
  2324. } else {
  2325. //
  2326. // It doesn't ack anything. If it's an ack for something
  2327. // larger than we've sent then ACKAndDrop it.
  2328. //
  2329. if (SEQ_GT(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  2330. ACKAndDrop(&RcvInfo, RcvTCB);
  2331. return IP_PROTOCOL_NONE;
  2332. }
  2333. //
  2334. // If it is a pure duplicate ack, check if we should
  2335. // do a fast retransmit.
  2336. //
  2337. if ((Size == 0) && SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  2338. SEQ_LT(RcvTCB->tcb_senduna, RcvTCB->tcb_sendmax) &&
  2339. (RcvTCB->tcb_sendwin == RcvInfo.tri_window) &&
  2340. RcvInfo.tri_window) {
  2341. //
  2342. // See if fast rexmit can be done.
  2343. //
  2344. if (HandleFastXmit(RcvTCB, &RcvInfo)) {
  2345. return IP_PROTOCOL_NONE;
  2346. }
  2347. Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
  2348. } else {
  2349. //
  2350. // Not a pure duplicate ack (Size != 0 or peer is
  2351. // advertising a new windows). Reset counter.
  2352. //
  2353. RcvTCB->tcb_dupacks = 0;
  2354. //
  2355. // If the ack matches our existing UNA, we need to see if
  2356. // we can update the window.
  2357. //
  2358. if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  2359. (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
  2360. (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
  2361. SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
  2362. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  2363. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
  2364. RcvInfo.tri_window);
  2365. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  2366. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  2367. //
  2368. // Since we've updated the window, remember to send
  2369. // some more.
  2370. //
  2371. Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
  2372. }
  2373. }
  2374. }
  2375. //
  2376. // Check to see if this packet contains any useable data.
  2377. //
  2378. NewSize = MIN((int) Size, RcvTCB->tcb_rcvwin);
  2379. if (NewSize != 0) {
  2380. RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
  2381. BytesTaken = (*RcvTCB->tcb_rcvhndlr)(RcvTCB, RcvInfo.tri_flags,
  2382. Packet, NewSize);
  2383. RcvTCB->tcb_rcvnext += BytesTaken;
  2384. RcvTCB->tcb_rcvwin -= BytesTaken;
  2385. CheckTCBRcv(RcvTCB);
  2386. RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
  2387. //
  2388. // If our peer is sending into an expanded window, then our
  2389. // peer must have received our ACK advertising said window.
  2390. // Take this as proof of forward reachability.
  2391. //
  2392. if (SEQ_GTE(RcvInfo.tri_seq + (int)NewSize,
  2393. RcvTCB->tcb_rcvwinwatch)) {
  2394. RcvTCB->tcb_rcvwinwatch = RcvTCB->tcb_rcvnext +
  2395. RcvTCB->tcb_rcvwin;
  2396. if (RcvTCB->tcb_rce != NULL)
  2397. ConfirmForwardReachability(RcvTCB->tcb_rce);
  2398. }
  2399. Actions |= (RcvTCB->tcb_flags & SEND_AFTER_RCV ? NEED_OUTPUT : 0);
  2400. RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
  2401. if ((RcvTCB->tcb_flags & ACK_DELAYED) ||
  2402. (BytesTaken != NewSize)) {
  2403. Actions |= NEED_ACK;
  2404. } else {
  2405. RcvTCB->tcb_flags |= ACK_DELAYED;
  2406. START_TCB_TIMER(RcvTCB->tcb_delacktimer, DEL_ACK_TICKS);
  2407. }
  2408. } else {
  2409. //
  2410. // The new size is 0. If the original size was not 0, we must
  2411. // have a 0 receive win and hence need to send an ACK to this
  2412. // probe.
  2413. //
  2414. Actions |= (Size ? NEED_ACK : 0);
  2415. }
  2416. if (Actions)
  2417. DelayAction(RcvTCB, Actions);
  2418. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2419. return IP_PROTOCOL_NONE;
  2420. }
  2421. //
  2422. // This is the non-fast path.
  2423. //
  2424. //
  2425. // If we found a matching TCB in TIME_WAIT, and the received segment
  2426. // carries a SYN (and only a SYN), and the received segment has a sequence
  2427. // greater than the last received, kill the TIME_WAIT TCB and use its
  2428. // next sequence number to generate the initial sequence number of a
  2429. // new incarnation.
  2430. //
  2431. if ((RcvTCB->tcb_state == TCB_TIME_WAIT) &&
  2432. ((RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_ACK | TCP_FLAG_RST))
  2433. == TCP_FLAG_SYN) &&
  2434. SEQ_GT(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) {
  2435. Isn = RcvTCB->tcb_sendnext + 128000;
  2436. UseIsn = TRUE;
  2437. STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
  2438. TryToCloseTCB(RcvTCB, TCB_CLOSE_SUCCESS, DISPATCH_LEVEL);
  2439. RcvTCB = NULL;
  2440. goto ValidNewConnectionRequest;
  2441. }
  2442. //
  2443. // Make sure we can handle this frame. We can't handle it if we're
  2444. // in SYN_RCVD and the accept is still pending, or we're in a
  2445. // non-established state and already in the receive handler.
  2446. //
  2447. if ((RcvTCB->tcb_state == TCB_SYN_RCVD &&
  2448. !(RcvTCB->tcb_flags & CONN_ACCEPTED) &&
  2449. !(RcvTCB->tcb_flags & ACTIVE_OPEN)) ||
  2450. (RcvTCB->tcb_state != TCB_ESTAB && (RcvTCB->tcb_fastchk &
  2451. TCP_FLAG_IN_RCV))) {
  2452. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2453. return IP_PROTOCOL_NONE;
  2454. }
  2455. //
  2456. // If it's closed, it's a temporary zombie TCB. Reset the sender.
  2457. //
  2458. if (RcvTCB->tcb_state == TCB_CLOSED || CLOSING(RcvTCB) ||
  2459. ((RcvTCB->tcb_flags & (GC_PENDING | TW_PENDING)) == GC_PENDING)) {
  2460. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2461. SendRSTFromHeader(TCP, Packet->TotalSize, Packet->SrcAddr, SrcScopeId,
  2462. AlignAddr(&Packet->IP->Dest), DestScopeId);
  2463. return IP_PROTOCOL_NONE;
  2464. }
  2465. //
  2466. // At this point, we have a connection, and it's locked. Following
  2467. // the 'Segment Arrives' section of 793, the next thing to check is
  2468. // if this connection is in SynSent state.
  2469. //
  2470. if (RcvTCB->tcb_state == TCB_SYN_SENT) {
  2471. ASSERT(RcvTCB->tcb_flags & ACTIVE_OPEN);
  2472. //
  2473. // Check the ACK bit. Since we don't send data with our SYNs, the
  2474. // check we make is for the ack to exactly match our SND.NXT.
  2475. //
  2476. if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
  2477. // ACK is set.
  2478. if (!SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendnext)) {
  2479. // Bad ACK value.
  2480. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2481. // Send a RST back at him.
  2482. SendRSTFromHeader(TCP, Packet->TotalSize,
  2483. Packet->SrcAddr, SrcScopeId,
  2484. AlignAddr(&Packet->IP->Dest), DestScopeId);
  2485. return IP_PROTOCOL_NONE;
  2486. }
  2487. }
  2488. if (RcvInfo.tri_flags & TCP_FLAG_RST) {
  2489. //
  2490. // This might be an acceptable RST. We'll persist here, sending
  2491. // another SYN in PERSIST_TIMEOUT ms, until we fail from too
  2492. // many retries.
  2493. //
  2494. if (!(RcvInfo.tri_flags & TCP_FLAG_ACK)) {
  2495. //
  2496. // The RST isn't acceptable, so ignore it.
  2497. //
  2498. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2499. } else if (RcvTCB->tcb_rexmitcnt == MaxConnectRexmitCount) {
  2500. //
  2501. // We've had a positive refusal, and one more rexmit
  2502. // would time us out, so close the connection now.
  2503. //
  2504. CompleteConnReq(RcvTCB, TDI_CONN_REFUSED);
  2505. TryToCloseTCB(RcvTCB, TCB_CLOSE_REFUSED, DISPATCH_LEVEL);
  2506. } else {
  2507. START_TCB_TIMER(RcvTCB->tcb_rexmittimer, PERSIST_TIMEOUT);
  2508. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2509. }
  2510. return IP_PROTOCOL_NONE;
  2511. }
  2512. //
  2513. // See if we have a SYN. If we do, we're going to change state
  2514. // somehow (either to ESTABLISHED or SYN_RCVD).
  2515. //
  2516. if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
  2517. RcvTCB->tcb_refcnt++;
  2518. //
  2519. // We have a SYN. Go ahead and record the sequence number and
  2520. // window info.
  2521. //
  2522. RcvTCB->tcb_rcvnext = ++RcvInfo.tri_seq;
  2523. RcvTCB->tcb_rcvwinwatch = RcvTCB->tcb_rcvnext;
  2524. if (RcvInfo.tri_flags & TCP_FLAG_URG) {
  2525. // Urgent data. Update the pointer.
  2526. if (RcvInfo.tri_urgent != 0)
  2527. RcvInfo.tri_urgent--;
  2528. else
  2529. RcvInfo.tri_flags &= ~TCP_FLAG_URG;
  2530. }
  2531. RcvTCB->tcb_remmss = FindMSS(TCP);
  2532. RcvTCB->tcb_mss = MIN(RcvTCB->tcb_mss, RcvTCB->tcb_remmss);
  2533. ASSERT(RcvTCB->tcb_mss > 0);
  2534. RcvTCB->tcb_rexmitcnt = 0;
  2535. STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
  2536. AdjustRcvWin(RcvTCB);
  2537. if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
  2538. //
  2539. // Our SYN has been acked. Update SND.UNA and stop the
  2540. // retrans timer.
  2541. //
  2542. RcvTCB->tcb_senduna = RcvInfo.tri_ack;
  2543. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  2544. RcvTCB->tcb_maxwin = RcvInfo.tri_window;
  2545. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  2546. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  2547. GoToEstab(RcvTCB);
  2548. //
  2549. // We know our peer received our SYN.
  2550. //
  2551. if (RcvTCB->tcb_rce != NULL)
  2552. ConfirmForwardReachability(RcvTCB->tcb_rce);
  2553. //
  2554. // Remove whatever command exists on this connection.
  2555. //
  2556. CompleteConnReq(RcvTCB, TDI_SUCCESS);
  2557. //
  2558. // If data has been queued already, send the first data
  2559. // segment with the ACK. Otherwise, send a pure ACK.
  2560. //
  2561. if (RcvTCB->tcb_unacked) {
  2562. RcvTCB->tcb_refcnt++;
  2563. TCPSend(RcvTCB, DISPATCH_LEVEL);
  2564. } else {
  2565. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2566. SendACK(RcvTCB);
  2567. }
  2568. //
  2569. // Now handle other data and controls. To do this we need
  2570. // to reaquire the lock, and make sure we haven't started
  2571. // closing it.
  2572. //
  2573. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2574. if (!CLOSING(RcvTCB)) {
  2575. //
  2576. // We haven't started closing it. Turn off the
  2577. // SYN flag and continue processing.
  2578. //
  2579. RcvInfo.tri_flags &= ~TCP_FLAG_SYN;
  2580. if ((RcvInfo.tri_flags & TCP_FLAGS_ALL) !=
  2581. TCP_FLAG_ACK || Size != 0)
  2582. goto NotSYNSent;
  2583. }
  2584. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2585. return IP_PROTOCOL_NONE;
  2586. } else {
  2587. //
  2588. // A SYN, but not an ACK. Go to SYN_RCVD.
  2589. //
  2590. RcvTCB->tcb_state = TCB_SYN_RCVD;
  2591. RcvTCB->tcb_sendnext = RcvTCB->tcb_senduna;
  2592. SendSYN(RcvTCB, DISPATCH_LEVEL);
  2593. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2594. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2595. return IP_PROTOCOL_NONE;
  2596. }
  2597. } else {
  2598. //
  2599. // No SYN, just toss the frame.
  2600. //
  2601. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2602. return IP_PROTOCOL_NONE;
  2603. }
  2604. }
  2605. RcvTCB->tcb_refcnt++;
  2606. NotSYNSent:
  2607. //
  2608. // Not in the SYN-SENT state. Check the sequence number. If my window
  2609. // is 0, I'll truncate all incoming frames but look at some of the
  2610. // control fields. Otherwise I'll try and make this segment fit into
  2611. // the window.
  2612. //
  2613. if (RcvTCB->tcb_rcvwin != 0) {
  2614. int StateSize; // Size, including state info.
  2615. SeqNum LastValidSeq; // Sequence number of last valid byte at RWE.
  2616. //
  2617. // We are offering a window. If this segment starts in front of my
  2618. // receive window, clip off the front part.
  2619. //
  2620. if (SEQ_LT(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) {
  2621. int AmountToClip, FinByte;
  2622. if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
  2623. //
  2624. // Had a SYN. Clip it off and update the sequence number.
  2625. //
  2626. RcvInfo.tri_flags &= ~TCP_FLAG_SYN;
  2627. RcvInfo.tri_seq++;
  2628. RcvInfo.tri_urgent--;
  2629. }
  2630. //
  2631. // Advance the receive buffer to point at the new data.
  2632. //
  2633. AmountToClip = RcvTCB->tcb_rcvnext - RcvInfo.tri_seq;
  2634. ASSERT(AmountToClip >= 0);
  2635. //
  2636. // If there's a FIN on this segment, account for it.
  2637. //
  2638. FinByte = ((RcvInfo.tri_flags & TCP_FLAG_FIN) ? 1: 0);
  2639. if (AmountToClip >= (((int) Size) + FinByte)) {
  2640. //
  2641. // Falls entirely before the window. We have more special
  2642. // case code here - if the ack number acks something,
  2643. // we'll go ahead and take it, faking the sequence number
  2644. // to be rcvnext. This prevents problems on full duplex
  2645. // connections, where data has been received but not acked,
  2646. // and retransmission timers reset the seq number to
  2647. // below our rcvnext.
  2648. //
  2649. if ((RcvInfo.tri_flags & TCP_FLAG_ACK) &&
  2650. SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  2651. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  2652. //
  2653. // This contains valid ACK info. Fudge the information
  2654. // to get through the rest of this.
  2655. //
  2656. Size = 0;
  2657. AmountToClip = 0;
  2658. RcvInfo.tri_seq = RcvTCB->tcb_rcvnext;
  2659. RcvInfo.tri_flags &= ~(TCP_FLAG_SYN | TCP_FLAG_FIN |
  2660. TCP_FLAG_RST | TCP_FLAG_URG);
  2661. #if DBG
  2662. FinByte = 1; // Fake out assert below.
  2663. #endif
  2664. } else {
  2665. ACKAndDrop(&RcvInfo, RcvTCB);
  2666. return IP_PROTOCOL_NONE;
  2667. }
  2668. }
  2669. //
  2670. // Trim what we have to. If we can't trim enough, the frame
  2671. // is too short. This shouldn't happen, but it it does we'll
  2672. // drop the frame.
  2673. //
  2674. Size -= AmountToClip;
  2675. RcvInfo.tri_seq += AmountToClip;
  2676. RcvInfo.tri_urgent -= AmountToClip;
  2677. Packet = TrimPacket(Packet, AmountToClip);
  2678. if (*(int *)&RcvInfo.tri_urgent < 0) {
  2679. RcvInfo.tri_urgent = 0;
  2680. RcvInfo.tri_flags &= ~TCP_FLAG_URG;
  2681. }
  2682. }
  2683. //
  2684. // We've made sure the front is OK. Now make sure part of it
  2685. // doesn't fall after the window. If it does, we'll truncate the
  2686. // frame (removing the FIN, if any). If we truncate the whole
  2687. // frame we'll ACKAndDrop it.
  2688. //
  2689. StateSize = Size + ((RcvInfo.tri_flags & TCP_FLAG_SYN) ? 1: 0) +
  2690. ((RcvInfo.tri_flags & TCP_FLAG_FIN) ? 1: 0);
  2691. if (StateSize)
  2692. StateSize--;
  2693. //
  2694. // Now the incoming sequence number (RcvInfo.tri_seq) + StateSize
  2695. // it the last sequence number in the segment. If this is greater
  2696. // than the last valid byte in the window, we have some overlap
  2697. // to chop off.
  2698. //
  2699. ASSERT(StateSize >= 0);
  2700. LastValidSeq = RcvTCB->tcb_rcvnext + RcvTCB->tcb_rcvwin - 1;
  2701. if (SEQ_GT(RcvInfo.tri_seq + StateSize, LastValidSeq)) {
  2702. int AmountToChop;
  2703. //
  2704. // At least some part of the frame is outside of our window.
  2705. // See if it starts outside our window.
  2706. //
  2707. if (SEQ_GT(RcvInfo.tri_seq, LastValidSeq)) {
  2708. //
  2709. // Falls entirely outside the window. We have special
  2710. // case code to deal with a pure ack that falls exactly at
  2711. // our right window edge. Otherwise we ack and drop it.
  2712. //
  2713. if (!SEQ_EQ(RcvInfo.tri_seq, LastValidSeq+1) || Size != 0
  2714. || (RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_FIN))) {
  2715. ACKAndDrop(&RcvInfo, RcvTCB);
  2716. return IP_PROTOCOL_NONE;
  2717. }
  2718. } else {
  2719. //
  2720. // At least some part of it is in the window. If there's a
  2721. // FIN, chop that off and see if that moves us inside.
  2722. //
  2723. if (RcvInfo.tri_flags & TCP_FLAG_FIN) {
  2724. RcvInfo.tri_flags &= ~TCP_FLAG_FIN;
  2725. StateSize--;
  2726. }
  2727. //
  2728. // Now figure out how much to chop off.
  2729. //
  2730. AmountToChop = (RcvInfo.tri_seq + StateSize) - LastValidSeq;
  2731. ASSERT(AmountToChop >= 0);
  2732. Size -= AmountToChop;
  2733. }
  2734. }
  2735. } else {
  2736. if (!SEQ_EQ(RcvTCB->tcb_rcvnext, RcvInfo.tri_seq)) {
  2737. //
  2738. // If there's a RST on this segment, and he's only off by 1,
  2739. // take it anyway. This can happen if the remote peer is
  2740. // probing and sends with the seq number after the probe.
  2741. //
  2742. if (!(RcvInfo.tri_flags & TCP_FLAG_RST) ||
  2743. !(SEQ_EQ(RcvTCB->tcb_rcvnext, (RcvInfo.tri_seq - 1)))) {
  2744. ACKAndDrop(&RcvInfo, RcvTCB);
  2745. return IP_PROTOCOL_NONE;
  2746. } else
  2747. RcvInfo.tri_seq = RcvTCB->tcb_rcvnext;
  2748. }
  2749. //
  2750. // He's in sequence, but we have a window of 0. Truncate the
  2751. // size, and clear any sequence consuming bits.
  2752. //
  2753. if (Size != 0 || (RcvInfo.tri_flags &
  2754. (TCP_FLAG_SYN | TCP_FLAG_FIN))) {
  2755. RcvInfo.tri_flags &= ~(TCP_FLAG_SYN | TCP_FLAG_FIN);
  2756. Size = 0;
  2757. if (!(RcvInfo.tri_flags & TCP_FLAG_RST))
  2758. DelayAction(RcvTCB, NEED_ACK);
  2759. }
  2760. }
  2761. //
  2762. // At this point, the segment is in our window and does not overlap
  2763. // on either end. If it's the next sequence number we expect, we can
  2764. // handle the data now. Otherwise we'll queue it for later. In either
  2765. // case we'll handle RST and ACK information right now.
  2766. //
  2767. ASSERT((*(int *)&Size) >= 0);
  2768. //
  2769. // Now, following 793, we check the RST bit.
  2770. //
  2771. if (RcvInfo.tri_flags & TCP_FLAG_RST) {
  2772. uchar Reason;
  2773. //
  2774. // We can't go back into the LISTEN state from SYN-RCVD here,
  2775. // because we may have notified the client via a listen completing
  2776. // or a connect indication. So, if came from an active open we'll
  2777. // give back a 'connection refused' notice. For all other cases
  2778. // we'll just destroy the connection.
  2779. //
  2780. if (RcvTCB->tcb_state == TCB_SYN_RCVD) {
  2781. if (RcvTCB->tcb_flags & ACTIVE_OPEN)
  2782. Reason = TCB_CLOSE_REFUSED;
  2783. else
  2784. Reason = TCB_CLOSE_RST;
  2785. } else
  2786. Reason = TCB_CLOSE_RST;
  2787. TryToCloseTCB(RcvTCB, Reason, DISPATCH_LEVEL);
  2788. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2789. if (RcvTCB->tcb_state != TCB_TIME_WAIT) {
  2790. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2791. RemoveTCBFromConn(RcvTCB);
  2792. NotifyOfDisc(RcvTCB, TDI_CONNECTION_RESET, NULL);
  2793. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2794. }
  2795. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2796. return IP_PROTOCOL_NONE;
  2797. }
  2798. //
  2799. // Next check the SYN bit.
  2800. //
  2801. if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
  2802. //
  2803. // Again, we can't quietly go back into the LISTEN state here, even
  2804. // if we came from a passive open.
  2805. //
  2806. TryToCloseTCB(RcvTCB, TCB_CLOSE_ABORTED, DISPATCH_LEVEL);
  2807. SendRSTFromHeader(TCP, Size, Packet->SrcAddr, SrcScopeId,
  2808. AlignAddr(&Packet->IP->Dest), DestScopeId);
  2809. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2810. if (RcvTCB->tcb_state != TCB_TIME_WAIT) {
  2811. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2812. RemoveTCBFromConn(RcvTCB);
  2813. NotifyOfDisc(RcvTCB, TDI_CONNECTION_RESET, NULL);
  2814. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2815. }
  2816. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2817. return IP_PROTOCOL_NONE;
  2818. }
  2819. //
  2820. // Check the ACK field. If it's not on drop the segment.
  2821. //
  2822. if (!(RcvInfo.tri_flags & TCP_FLAG_ACK)) {
  2823. //
  2824. // No ACK. Just drop the segment and return.
  2825. //
  2826. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2827. return IP_PROTOCOL_NONE;
  2828. }
  2829. //
  2830. // If we're in SYN-RCVD, go to ESTABLISHED.
  2831. //
  2832. if (RcvTCB->tcb_state == TCB_SYN_RCVD) {
  2833. if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  2834. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  2835. //
  2836. // The ack is valid.
  2837. //
  2838. if (RcvTCB->tcb_flags & ACCEPT_PENDING) {
  2839. AddrObj *AO;
  2840. BOOLEAN Accepted;
  2841. //
  2842. // We have not yet indicated this connection to the client,
  2843. // so do it now.
  2844. //
  2845. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  2846. //
  2847. // Check if we still have the listening endpoint.
  2848. //
  2849. KeAcquireSpinLockAtDpcLevel(&AddrObjTableLock);
  2850. AO = GetBestAddrObj(AlignAddr(&Packet->IP->Dest),
  2851. Packet->SrcAddr, DestScopeId,
  2852. TCP->tcp_dest, IP_PROTOCOL_TCP,
  2853. NTE->IF);
  2854. if (AO != NULL) {
  2855. Accepted = DelayedAcceptConn(AO, Packet->SrcAddr,
  2856. SrcScopeId, TCP->tcp_src,
  2857. RcvTCB);
  2858. } else {
  2859. KeReleaseSpinLockFromDpcLevel(&AddrObjTableLock);
  2860. Accepted = FALSE;
  2861. }
  2862. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2863. if (!Accepted) {
  2864. //
  2865. // Delayed acceptance failed. Send RST.
  2866. //
  2867. TryToCloseTCB(RcvTCB, TCB_CLOSE_REFUSED, DISPATCH_LEVEL);
  2868. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  2869. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2870. SendRSTFromHeader(TCP, Packet->TotalSize,
  2871. Packet->SrcAddr, SrcScopeId,
  2872. AlignAddr(&Packet->IP->Dest),
  2873. DestScopeId);
  2874. return IP_SUCCESS;
  2875. }
  2876. }
  2877. RcvTCB->tcb_rexmitcnt = 0;
  2878. STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
  2879. RcvTCB->tcb_senduna++;
  2880. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  2881. RcvTCB->tcb_maxwin = RcvInfo.tri_window;
  2882. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  2883. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  2884. GoToEstab(RcvTCB);
  2885. //
  2886. // We know our peer received our SYN.
  2887. //
  2888. if (RcvTCB->tcb_rce != NULL)
  2889. ConfirmForwardReachability(RcvTCB->tcb_rce);
  2890. //
  2891. // Now complete whatever we can here.
  2892. //
  2893. CompleteConnReq(RcvTCB, TDI_SUCCESS);
  2894. } else {
  2895. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2896. SendRSTFromHeader(TCP, Size, Packet->SrcAddr, SrcScopeId,
  2897. AlignAddr(&Packet->IP->Dest), DestScopeId);
  2898. return IP_PROTOCOL_NONE;
  2899. }
  2900. } else {
  2901. //
  2902. // We're not in SYN-RCVD. See if this acknowledges anything.
  2903. //
  2904. if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  2905. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  2906. uint CWin;
  2907. //
  2908. // The ack acknowledes something. Pull the
  2909. // appropriate amount off the send q.
  2910. //
  2911. ACKData(RcvTCB, RcvInfo.tri_ack);
  2912. //
  2913. // If this acknowledges something we were running a RTT on,
  2914. // update that stuff now.
  2915. //
  2916. if (RcvTCB->tcb_rtt != 0 && SEQ_GT(RcvInfo.tri_ack,
  2917. RcvTCB->tcb_rttseq)) {
  2918. short RTT;
  2919. RTT = (short)(TCPTime - RcvTCB->tcb_rtt);
  2920. RcvTCB->tcb_rtt = 0;
  2921. RTT -= (RcvTCB->tcb_smrtt >> 3);
  2922. RcvTCB->tcb_smrtt += RTT;
  2923. RTT = (RTT >= 0 ? RTT : -RTT);
  2924. RTT -= (RcvTCB->tcb_delta >> 3);
  2925. RcvTCB->tcb_delta += RTT + RTT;
  2926. RcvTCB->tcb_rexmit = MIN(MAX(REXMIT_TO(RcvTCB),
  2927. MIN_RETRAN_TICKS),
  2928. MAX_REXMIT_TO);
  2929. }
  2930. //
  2931. // If we're probing for a PMTU black hole then we've found
  2932. // one, so turn off the detection. The size is already
  2933. // down, so leave it there.
  2934. //
  2935. if (RcvTCB->tcb_flags & PMTU_BH_PROBE) {
  2936. RcvTCB->tcb_flags &= ~PMTU_BH_PROBE;
  2937. RcvTCB->tcb_bhprobecnt = 0;
  2938. if (--(RcvTCB->tcb_slowcount) == 0) {
  2939. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  2940. CheckTCBRcv(RcvTCB);
  2941. }
  2942. }
  2943. if ((RcvTCB->tcb_dupacks >= MaxDupAcks) &&
  2944. ((int)RcvTCB->tcb_ssthresh > 0)) {
  2945. //
  2946. // We were in fast retransmit mode, so this ACK is for
  2947. // our fast retransmitted frame. Set cwin to ssthresh
  2948. // so that cwin grows linearly from here.
  2949. //
  2950. RcvTCB->tcb_cwin = RcvTCB->tcb_ssthresh;
  2951. } else {
  2952. //
  2953. // Update the congestion window now.
  2954. //
  2955. CWin = RcvTCB->tcb_cwin;
  2956. if (CWin < RcvTCB->tcb_maxwin) {
  2957. if (CWin < RcvTCB->tcb_ssthresh)
  2958. CWin += RcvTCB->tcb_mss;
  2959. else
  2960. CWin += (RcvTCB->tcb_mss * RcvTCB->tcb_mss) / CWin;
  2961. RcvTCB->tcb_cwin = MIN(CWin, RcvTCB->tcb_maxwin);
  2962. }
  2963. }
  2964. ASSERT(*(int *)&RcvTCB->tcb_cwin > 0);
  2965. //
  2966. // Since this isn't a duplicate ACK, reset the counter.
  2967. //
  2968. RcvTCB->tcb_dupacks = 0;
  2969. //
  2970. // We've acknowledged something, so reset the rexmit count.
  2971. // If there's still stuff outstanding, restart the rexmit
  2972. // timer.
  2973. //
  2974. RcvTCB->tcb_rexmitcnt = 0;
  2975. if (!SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax))
  2976. START_TCB_TIMER(RcvTCB->tcb_rexmittimer,
  2977. RcvTCB->tcb_rexmit);
  2978. else
  2979. STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
  2980. //
  2981. // If we've sent a FIN, and this acknowledges it, we
  2982. // need to complete the client's close request and
  2983. // possibly transition our state.
  2984. //
  2985. if (RcvTCB->tcb_flags & FIN_SENT) {
  2986. //
  2987. // We have sent a FIN. See if it's been acknowledged.
  2988. // Once we've sent a FIN, tcb_sendmax can't advance,
  2989. // so our FIN must have sequence num tcb_sendmax - 1.
  2990. // Thus our FIN is acknowledged if the incoming ack is
  2991. // equal to tcb_sendmax.
  2992. //
  2993. if (SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  2994. //
  2995. // He's acked our FIN. Turn off the flags,
  2996. // and complete the request. We'll leave the
  2997. // FIN_OUTSTANDING flag alone, to force early
  2998. // outs in the send code.
  2999. //
  3000. RcvTCB->tcb_flags &= ~(FIN_NEEDED | FIN_SENT);
  3001. ASSERT(RcvTCB->tcb_unacked == 0);
  3002. ASSERT(RcvTCB->tcb_sendnext == RcvTCB->tcb_sendmax);
  3003. //
  3004. // Now figure out what we need to do. In FIN_WAIT1
  3005. // or FIN_WAIT, just complete the disconnect
  3006. // request and continue. Otherwise, it's a bit
  3007. // trickier, since we can't complete the connreq
  3008. // until we remove the TCB from it's connection.
  3009. //
  3010. switch (RcvTCB->tcb_state) {
  3011. case TCB_FIN_WAIT1:
  3012. RcvTCB->tcb_state = TCB_FIN_WAIT2;
  3013. CompleteConnReq(RcvTCB, TDI_SUCCESS);
  3014. //
  3015. // Start a timer in case we never get
  3016. // out of FIN_WAIT2. Set the retransmit
  3017. // count high to force a timeout the
  3018. // first time the timer fires.
  3019. //
  3020. RcvTCB->tcb_rexmitcnt = (uchar)MaxDataRexmitCount;
  3021. START_TCB_TIMER(RcvTCB->tcb_rexmittimer,
  3022. (ushort)FinWait2TO);
  3023. // Fall through to FIN-WAIT-2 processing.
  3024. case TCB_FIN_WAIT2:
  3025. break;
  3026. case TCB_CLOSING:
  3027. GracefulClose(RcvTCB, TRUE, FALSE, DISPATCH_LEVEL);
  3028. return IP_PROTOCOL_NONE;
  3029. break;
  3030. case TCB_LAST_ACK:
  3031. GracefulClose(RcvTCB, FALSE, FALSE, DISPATCH_LEVEL);
  3032. return IP_PROTOCOL_NONE;
  3033. break;
  3034. default:
  3035. KdBreakPoint();
  3036. break;
  3037. }
  3038. }
  3039. }
  3040. UpdateWindow = TRUE;
  3041. } else {
  3042. //
  3043. // It doesn't ack anything. If we're in FIN_WAIT2,
  3044. // we'll restart the timer. We don't make this check
  3045. // above because we know no data can be acked when we're
  3046. // in FIN_WAIT2.
  3047. //
  3048. if (RcvTCB->tcb_state == TCB_FIN_WAIT2)
  3049. START_TCB_TIMER(RcvTCB->tcb_rexmittimer, (ushort)FinWait2TO);
  3050. //
  3051. // If it's an ack for something larger than
  3052. // we've sent then ACKAndDrop it.
  3053. //
  3054. if (SEQ_GT(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  3055. ACKAndDrop(&RcvInfo, RcvTCB);
  3056. return IP_PROTOCOL_NONE;
  3057. }
  3058. //
  3059. // If it is a pure duplicate ack, check if we should
  3060. // do a fast retransmit.
  3061. //
  3062. if ((Size == 0) &&
  3063. SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  3064. SEQ_LT(RcvTCB->tcb_senduna, RcvTCB->tcb_sendmax) &&
  3065. (RcvTCB->tcb_sendwin == RcvInfo.tri_window) &&
  3066. RcvInfo.tri_window) {
  3067. //
  3068. // See if fast rexmit can be done.
  3069. //
  3070. if (HandleFastXmit(RcvTCB, &RcvInfo)) {
  3071. return IP_PROTOCOL_NONE;
  3072. }
  3073. } else {
  3074. //
  3075. // Not a pure duplicate ack (Size != 0 or peer is
  3076. // advertising a new window). Reset counter.
  3077. //
  3078. RcvTCB->tcb_dupacks = 0;
  3079. //
  3080. // See if we should update the window.
  3081. //
  3082. if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  3083. (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
  3084. (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
  3085. SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))){
  3086. UpdateWindow = TRUE;
  3087. } else
  3088. UpdateWindow = FALSE;
  3089. }
  3090. }
  3091. if (UpdateWindow) {
  3092. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  3093. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin, RcvInfo.tri_window);
  3094. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  3095. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  3096. if (RcvInfo.tri_window == 0) {
  3097. //
  3098. // We've got a zero window.
  3099. //
  3100. if (!EMPTYQ(&RcvTCB->tcb_sendq)) {
  3101. RcvTCB->tcb_flags &= ~NEED_OUTPUT;
  3102. RcvTCB->tcb_rexmitcnt = 0;
  3103. START_TCB_TIMER(RcvTCB->tcb_rexmittimer,
  3104. RcvTCB->tcb_rexmit);
  3105. if (!(RcvTCB->tcb_flags & FLOW_CNTLD)) {
  3106. RcvTCB->tcb_flags |= FLOW_CNTLD;
  3107. RcvTCB->tcb_slowcount++;
  3108. RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  3109. CheckTCBRcv(RcvTCB);
  3110. }
  3111. }
  3112. } else {
  3113. if (RcvTCB->tcb_flags & FLOW_CNTLD) {
  3114. RcvTCB->tcb_rexmitcnt = 0;
  3115. RcvTCB->tcb_flags &= ~(FLOW_CNTLD | FORCE_OUTPUT);
  3116. //
  3117. // Reset send next to the left edge of the window,
  3118. // because it might be at senduna+1 if we've been
  3119. // probing.
  3120. //
  3121. ResetSendNext(RcvTCB, RcvTCB->tcb_senduna);
  3122. if (--(RcvTCB->tcb_slowcount) == 0) {
  3123. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  3124. CheckTCBRcv(RcvTCB);
  3125. }
  3126. }
  3127. //
  3128. // Since we've updated the window, see if we can send
  3129. // some more.
  3130. //
  3131. if (RcvTCB->tcb_unacked != 0 ||
  3132. (RcvTCB->tcb_flags & FIN_NEEDED))
  3133. DelayAction(RcvTCB, NEED_OUTPUT);
  3134. }
  3135. }
  3136. }
  3137. //
  3138. // We've handled all the acknowledgment stuff. If the size
  3139. // is greater than 0 or important bits are set process it further,
  3140. // otherwise it's a pure ack and we're done with it.
  3141. //
  3142. if (Size > 0 || (RcvInfo.tri_flags & TCP_FLAG_FIN)) {
  3143. //
  3144. // If we're not in a state where we can process incoming data
  3145. // or FINs, there's no point in going further. Just send an
  3146. // ack and drop this segment.
  3147. //
  3148. if (!DATA_RCV_STATE(RcvTCB->tcb_state) ||
  3149. (RcvTCB->tcb_flags & GC_PENDING)) {
  3150. ACKAndDrop(&RcvInfo, RcvTCB);
  3151. return IP_PROTOCOL_NONE;
  3152. }
  3153. //
  3154. // If our peer is sending into an expanded window, then our
  3155. // peer must have received our ACK advertising said window.
  3156. // Take this as proof of forward reachability.
  3157. // Note: we have no guarantee this is timely.
  3158. //
  3159. if (SEQ_GTE(RcvInfo.tri_seq + (int)Size,
  3160. RcvTCB->tcb_rcvwinwatch)) {
  3161. RcvTCB->tcb_rcvwinwatch = RcvTCB->tcb_rcvnext +
  3162. RcvTCB->tcb_rcvwin;
  3163. if (RcvTCB->tcb_rce != NULL)
  3164. ConfirmForwardReachability(RcvTCB->tcb_rce);
  3165. }
  3166. //
  3167. // If it's in sequence process it now, otherwise reassemble it.
  3168. //
  3169. if (SEQ_EQ(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) {
  3170. //
  3171. // If we're already in the receive handler, this is a
  3172. // duplicate. We'll just toss it.
  3173. //
  3174. if (RcvTCB->tcb_fastchk & TCP_FLAG_IN_RCV) {
  3175. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3176. return IP_PROTOCOL_NONE;
  3177. }
  3178. RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
  3179. //
  3180. // Now loop, pulling things from the reassembly queue,
  3181. // until the queue is empty, or we can't take all of the
  3182. // data, or we hit a FIN.
  3183. //
  3184. do {
  3185. //
  3186. // Handle urgent data, if any.
  3187. //
  3188. if (RcvInfo.tri_flags & TCP_FLAG_URG) {
  3189. HandleUrgent(RcvTCB, &RcvInfo, Packet, &Size);
  3190. //
  3191. // Since we may have freed the lock, we need to
  3192. // recheck and see if we're closing here.
  3193. //
  3194. if (CLOSING(RcvTCB))
  3195. break;
  3196. }
  3197. //
  3198. // OK, the data is in sequence, we've updated the
  3199. // reassembly queue and handled any urgent data. If we
  3200. // have any data go ahead and process it now.
  3201. //
  3202. if (Size > 0) {
  3203. BytesTaken = (*RcvTCB->tcb_rcvhndlr)
  3204. (RcvTCB, RcvInfo.tri_flags, Packet, Size);
  3205. RcvTCB->tcb_rcvnext += BytesTaken;
  3206. RcvTCB->tcb_rcvwin -= BytesTaken;
  3207. CheckTCBRcv(RcvTCB);
  3208. if (RcvTCB->tcb_flags & ACK_DELAYED)
  3209. DelayAction(RcvTCB, NEED_ACK);
  3210. else {
  3211. RcvTCB->tcb_flags |= ACK_DELAYED;
  3212. START_TCB_TIMER(RcvTCB->tcb_delacktimer,
  3213. DEL_ACK_TICKS);
  3214. }
  3215. if (BytesTaken != Size) {
  3216. //
  3217. // We didn't take everything we could. No
  3218. // use in further processing, just bail
  3219. // out.
  3220. //
  3221. DelayAction(RcvTCB, NEED_ACK);
  3222. break;
  3223. }
  3224. //
  3225. // If we're closing now, we're done, so get out.
  3226. //
  3227. if (CLOSING(RcvTCB))
  3228. break;
  3229. }
  3230. //
  3231. // See if we need to advance over some urgent data.
  3232. //
  3233. if (RcvTCB->tcb_flags & URG_VALID) {
  3234. uint AdvanceNeeded;
  3235. //
  3236. // We only need to advance if we're not doing
  3237. // urgent inline. Urgent inline also has some
  3238. // implications for when we can clear the URG_VALID
  3239. // flag. If we're not doing urgent inline, we can
  3240. // clear it when rcvnext advances beyond urgent
  3241. // end. If we are doing urgent inline, we clear it
  3242. // when rcvnext advances one receive window beyond
  3243. // urgend.
  3244. //
  3245. if (!(RcvTCB->tcb_flags & URG_INLINE)) {
  3246. if (RcvTCB->tcb_rcvnext == RcvTCB->tcb_urgstart) {
  3247. RcvTCB->tcb_rcvnext = RcvTCB->tcb_urgend + 1;
  3248. } else {
  3249. ASSERT(SEQ_LT(RcvTCB->tcb_rcvnext,
  3250. RcvTCB->tcb_urgstart) ||
  3251. SEQ_GT(RcvTCB->tcb_rcvnext,
  3252. RcvTCB->tcb_urgend));
  3253. }
  3254. AdvanceNeeded = 0;
  3255. } else
  3256. AdvanceNeeded = RcvTCB->tcb_defaultwin;
  3257. //
  3258. // See if we can clear the URG_VALID flag.
  3259. //
  3260. if (SEQ_GT(RcvTCB->tcb_rcvnext - AdvanceNeeded,
  3261. RcvTCB->tcb_urgend)) {
  3262. RcvTCB->tcb_flags &= ~URG_VALID;
  3263. if (--(RcvTCB->tcb_slowcount) == 0) {
  3264. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  3265. CheckTCBRcv(RcvTCB);
  3266. }
  3267. }
  3268. }
  3269. //
  3270. // We've handled the data. If the FIN bit is set, we
  3271. // have more processing.
  3272. //
  3273. if (RcvInfo.tri_flags & TCP_FLAG_FIN) {
  3274. uint Notify = FALSE;
  3275. RcvTCB->tcb_rcvnext++;
  3276. DelayAction(RcvTCB, NEED_ACK);
  3277. PushData(RcvTCB);
  3278. switch (RcvTCB->tcb_state) {
  3279. case TCB_SYN_RCVD:
  3280. //
  3281. // I don't think we can get here - we
  3282. // should have discarded the frame if it
  3283. // had no ACK, or gone to established if
  3284. // it did.
  3285. //
  3286. KdBreakPoint();
  3287. case TCB_ESTAB:
  3288. RcvTCB->tcb_state = TCB_CLOSE_WAIT;
  3289. //
  3290. // We left established, we're off the
  3291. // fast path.
  3292. //
  3293. RcvTCB->tcb_slowcount++;
  3294. RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  3295. CheckTCBRcv(RcvTCB);
  3296. Notify = TRUE;
  3297. break;
  3298. case TCB_FIN_WAIT1:
  3299. RcvTCB->tcb_state = TCB_CLOSING;
  3300. Notify = TRUE;
  3301. break;
  3302. case TCB_FIN_WAIT2:
  3303. //
  3304. // Stop the FIN_WAIT2 timer.
  3305. //
  3306. STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
  3307. RcvTCB->tcb_refcnt++;
  3308. GracefulClose(RcvTCB, TRUE, TRUE, DISPATCH_LEVEL);
  3309. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  3310. break;
  3311. default:
  3312. KdBreakPoint();
  3313. break;
  3314. }
  3315. if (Notify) {
  3316. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  3317. NotifyOfDisc(RcvTCB, TDI_GRACEFUL_DISC, NULL);
  3318. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  3319. }
  3320. break; // Exit out of WHILE loop.
  3321. }
  3322. //
  3323. // If the reassembly queue isn't empty, get what we
  3324. // can now.
  3325. //
  3326. Packet = PullFromRAQ(RcvTCB, &RcvInfo, &Size);
  3327. CheckPacketList(Packet, Size);
  3328. } while (Packet != NULL);
  3329. RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
  3330. if (RcvTCB->tcb_flags & SEND_AFTER_RCV) {
  3331. RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
  3332. DelayAction(RcvTCB, NEED_OUTPUT);
  3333. }
  3334. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3335. return IP_PROTOCOL_NONE;
  3336. } else {
  3337. //
  3338. // It's not in sequence. Since it needs further
  3339. // processing, put in on the reassembly queue.
  3340. //
  3341. if (DATA_RCV_STATE(RcvTCB->tcb_state) &&
  3342. !(RcvTCB->tcb_flags & GC_PENDING)) {
  3343. PutOnRAQ(RcvTCB, &RcvInfo, Packet, Size);
  3344. KeReleaseSpinLockFromDpcLevel(&RcvTCB->tcb_lock);
  3345. SendACK(RcvTCB);
  3346. KeAcquireSpinLockAtDpcLevel(&RcvTCB->tcb_lock);
  3347. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3348. } else
  3349. ACKAndDrop(&RcvInfo, RcvTCB);
  3350. return IP_PROTOCOL_NONE;
  3351. }
  3352. }
  3353. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3354. return IP_PROTOCOL_NONE;
  3355. }
  3356. //* TCPControlReceive - handler for TCP control messages.
  3357. //
  3358. // This routine is called if we receive an ICMPv6 error message that
  3359. // was generated by some remote site as a result of receiving a TCP
  3360. // packet from us.
  3361. //
  3362. uchar
  3363. TCPControlReceive(
  3364. IPv6Packet *Packet, // Packet handed to us by ICMPv6ErrorReceive.
  3365. StatusArg *StatArg) // Error Code, Argument, and invoking IP header.
  3366. {
  3367. KIRQL Irql0, Irql1; // One per lock nesting level.
  3368. TCB *StatusTCB;
  3369. SeqNum DropSeq;
  3370. TCPHeader UNALIGNED *InvokingTCP;
  3371. Interface *IF = Packet->NTEorIF->IF;
  3372. uint SrcScopeId, DestScopeId;
  3373. //
  3374. // The next thing in the packet should be the TCP header of the
  3375. // original packet which invoked this error.
  3376. //
  3377. if (! PacketPullup(Packet, sizeof(TCPHeader), 1, 0)) {
  3378. // Pullup failed.
  3379. if (Packet->TotalSize < sizeof(TCPHeader))
  3380. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_BAD_PACKET,
  3381. "TCPv6: Packet too small to contain TCP header "
  3382. "from invoking packet\n"));
  3383. return IP_PROTOCOL_NONE; // Drop packet.
  3384. }
  3385. InvokingTCP = (TCPHeader UNALIGNED *)Packet->Data;
  3386. //
  3387. // Determining the scope identifiers for the addresses in the
  3388. // invoking packet is potentially problematic, since we have
  3389. // no way to be certain which interface we sent the packet on.
  3390. // Use the interface the icmp error arrived on to determine
  3391. // the scope ids for both the local and remote addresses.
  3392. //
  3393. SrcScopeId = DetermineScopeId(AlignAddr(&StatArg->IP->Source), IF);
  3394. DestScopeId = DetermineScopeId(AlignAddr(&StatArg->IP->Dest), IF);
  3395. //
  3396. // Find the TCB for the connection this packet was sent on.
  3397. //
  3398. KeAcquireSpinLock(&TCBTableLock, &Irql0);
  3399. StatusTCB = FindTCB(AlignAddr(&StatArg->IP->Source),
  3400. AlignAddr(&StatArg->IP->Dest),
  3401. SrcScopeId, DestScopeId,
  3402. InvokingTCP->tcp_src, InvokingTCP->tcp_dest);
  3403. if (StatusTCB != NULL) {
  3404. //
  3405. // Found one. Get the lock on it, and continue.
  3406. //
  3407. CHECK_STRUCT(StatusTCB, tcb);
  3408. KeAcquireSpinLock(&StatusTCB->tcb_lock, &Irql1);
  3409. KeReleaseSpinLock(&TCBTableLock, Irql1);
  3410. //
  3411. // Make sure the TCB is in a state that is interesting.
  3412. //
  3413. if (StatusTCB->tcb_state == TCB_CLOSED ||
  3414. StatusTCB->tcb_state == TCB_TIME_WAIT ||
  3415. CLOSING(StatusTCB)) {
  3416. //
  3417. // Connection is already closing, or too new to have sent
  3418. // anything yet. Leave it be.
  3419. //
  3420. KeReleaseSpinLock(&StatusTCB->tcb_lock, Irql0);
  3421. return IP_PROTOCOL_NONE; // Discard error packet.
  3422. }
  3423. switch (StatArg->Status) {
  3424. case IP_UNRECOGNIZED_NEXT_HEADER:
  3425. //
  3426. // Destination protocol unreachable.
  3427. // We treat this as a fatal errors. Close the connection.
  3428. //
  3429. StatusTCB->tcb_error = StatArg->Status;
  3430. StatusTCB->tcb_refcnt++;
  3431. TryToCloseTCB(StatusTCB, TCB_CLOSE_UNREACH, Irql0);
  3432. RemoveTCBFromConn(StatusTCB);
  3433. NotifyOfDisc(StatusTCB,
  3434. MapIPError(StatArg->Status, TDI_DEST_UNREACHABLE),
  3435. NULL);
  3436. KeAcquireSpinLock(&StatusTCB->tcb_lock, &Irql1);
  3437. DerefTCB(StatusTCB, Irql1);
  3438. return IP_PROTOCOL_NONE; // Done with packet.
  3439. break;
  3440. case IP_DEST_NO_ROUTE:
  3441. case IP_DEST_ADDR_UNREACHABLE:
  3442. case IP_DEST_PORT_UNREACHABLE:
  3443. case IP_DEST_PROHIBITED:
  3444. case IP_BAD_ROUTE:
  3445. case IP_HOP_LIMIT_EXCEEDED:
  3446. case IP_REASSEMBLY_TIME_EXCEEDED:
  3447. case IP_PARAMETER_PROBLEM:
  3448. //
  3449. // Soft errors. Save the error in case it times out.
  3450. //
  3451. StatusTCB->tcb_error = StatArg->Status;
  3452. break;
  3453. case IP_PACKET_TOO_BIG: {
  3454. uint PMTU;
  3455. IF_TCPDBG(TCP_DEBUG_MSS) {
  3456. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
  3457. "TCPControlReceive: Got Packet Too Big\n"));
  3458. }
  3459. //
  3460. // We sent a TCP datagram which was too big for the path to
  3461. // our destination. That packet was dropped by the router
  3462. // which sent us this error message. The Arg value is TRUE
  3463. // if this Packet Too Big reduced our PMTU, FALSE otherwise.
  3464. //
  3465. if (!StatArg->Arg)
  3466. break;
  3467. //
  3468. // Our PMTU was reduced. Find out what it is now.
  3469. //
  3470. if (StatusTCB->tcb_rce == NULL) {
  3471. //
  3472. // We've released our RCE due to SYN attack protection
  3473. // or because our outgoing interface went away. In
  3474. // either case it is unlikely that we would be receiving
  3475. // a legitimate and useful Packet Too Big for this TCB.
  3476. // So we ignore it.
  3477. //
  3478. break;
  3479. }
  3480. PMTU = GetEffectivePathMTUFromRCE(StatusTCB->tcb_rce);
  3481. //
  3482. // Update fields based on new PMTU.
  3483. //
  3484. StatusTCB->tcb_pmtu = PMTU;
  3485. StatusTCB->tcb_security = SecurityStateValidationCounter;
  3486. CalculateMSSForTCB(StatusTCB);
  3487. //
  3488. // Since our PMTU was reduced, we know that this is the first
  3489. // Packet Too Big we've received about this bottleneck.
  3490. // We should retransmit so long as this is for a legitimate
  3491. // outstanding packet (i.e. sequence number is is greater than
  3492. // the last acked and less than our current send next).
  3493. //
  3494. DropSeq = net_long(InvokingTCP->tcp_seq);
  3495. if ((SEQ_GTE(DropSeq, StatusTCB->tcb_senduna) &&
  3496. SEQ_LT(DropSeq, StatusTCB->tcb_sendnext))) {
  3497. //
  3498. // Need to initiate a retransmit.
  3499. //
  3500. ResetSendNext(StatusTCB, DropSeq);
  3501. //
  3502. // WINBUG #242757 11-27-2000 richdr TCP resp. to Packet Too Big
  3503. // RFC 1981 states that "a retransmission caused by a Packet
  3504. // Too Big message should not change the congestion window.
  3505. // It should, however, trigger the slow-start mechanism."
  3506. // The code below would appear to be broken. However, the
  3507. // IPv4 stack works this way.
  3508. //
  3509. //
  3510. // Set the congestion window to allow only one packet.
  3511. // This may prevent us from sending anything if we
  3512. // didn't just set sendnext to senduna. This is OK,
  3513. // we'll retransmit later, or send when we get an ack.
  3514. //
  3515. StatusTCB->tcb_cwin = StatusTCB->tcb_mss;
  3516. DelayAction(StatusTCB, NEED_OUTPUT);
  3517. }
  3518. }
  3519. break;
  3520. default:
  3521. // Should never happen.
  3522. KdBreakPoint();
  3523. break;
  3524. }
  3525. KeReleaseSpinLock(&StatusTCB->tcb_lock, Irql0);
  3526. } else {
  3527. //
  3528. // Couldn't find a matching TCB. Connection probably went away since
  3529. // we sent the offending packet. Just free the lock and return.
  3530. //
  3531. KeReleaseSpinLock(&TCBTableLock, Irql0);
  3532. }
  3533. return IP_PROTOCOL_NONE; // Done with packet.
  3534. }
  3535. #pragma BEGIN_INIT
  3536. //* InitTCPRcv - Initialize TCP receive side.
  3537. //
  3538. // Called during init time to initialize our TCP receive side.
  3539. //
  3540. int // Returns: TRUE.
  3541. InitTCPRcv(
  3542. void) // Nothing.
  3543. {
  3544. ExInitializeSListHead(&TCPRcvReqFree);
  3545. KeInitializeSpinLock(&RequestCompleteLock);
  3546. KeInitializeSpinLock(&TCBDelayLock);
  3547. KeInitializeSpinLock(&TCPRcvReqFreeLock);
  3548. INITQ(&ConnRequestCompleteQ);
  3549. INITQ(&SendCompleteQ);
  3550. INITQ(&TCBDelayQ);
  3551. RequestCompleteFlags = 0;
  3552. TCBDelayRtnCount = 0;
  3553. TCBDelayRtnLimit = (uint) KeNumberProcessors;
  3554. if (TCBDelayRtnLimit > TCB_DELAY_RTN_LIMIT)
  3555. TCBDelayRtnLimit = TCB_DELAY_RTN_LIMIT;
  3556. RtlZeroMemory(&DummyPacket, sizeof DummyPacket);
  3557. DummyPacket.Flags = PACKET_OURS;
  3558. return TRUE;
  3559. }
  3560. #pragma END_INIT
  3561. //* UnloadTCPRcv
  3562. //
  3563. // Cleanup and prepare for stack unload.
  3564. //
  3565. void
  3566. UnloadTCPRcv(void)
  3567. {
  3568. PSLIST_ENTRY BufferLink;
  3569. while ((BufferLink = ExInterlockedPopEntrySList(&TCPRcvReqFree,
  3570. &TCPRcvReqFreeLock))
  3571. != NULL) {
  3572. TCPRcvReq *RcvReq = CONTAINING_RECORD(BufferLink, TCPRcvReq, trr_next);
  3573. CHECK_STRUCT(RcvReq, trr);
  3574. ExFreePool(RcvReq);
  3575. }
  3576. }