Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

5531 lines
222 KiB

  1. /*++
  2. Copyright (c) 1990-2000 Microsoft Corporation
  3. Module Name:
  4. TCPRCV.C - TCP receive protocol code.
  5. Abstract:
  6. This file contains the code for handling incoming TCP packets.
  7. Author:
  8. [Environment:]
  9. kernel mode only
  10. [Notes:]
  11. optional-notes
  12. Revision History:
  13. --*/
  14. #include "precomp.h"
  15. #include "addr.h"
  16. #include "tcp.h"
  17. #include "tcb.h"
  18. #include "tcpconn.h"
  19. #include "tcpsend.h"
  20. #include "tcprcv.h"
  21. #include "pplasl.h"
  22. #include "tcpdeliv.h"
  23. #include "tlcommon.h"
  24. #include "info.h"
  25. #include "tcpcfg.h"
  26. #include "secfltr.h"
  27. CACHE_LINE_KSPIN_LOCK SynAttLock;
  28. CACHE_LINE_ULONG TCBDelayRtnLimit;
  29. typedef struct CACHE_ALIGN CPUDelayQ {
  30. DEFINE_LOCK_STRUCTURE(TCBDelayLock)
  31. ulong TCBDelayRtnCount;
  32. Queue TCBDelayQ;
  33. } CPUDelayQ;
  34. C_ASSERT(sizeof(CPUDelayQ) % MAX_CACHE_LINE_SIZE == 0);
  35. C_ASSERT(__alignof(CPUDelayQ) == MAX_CACHE_LINE_SIZE);
  36. CPUDelayQ *PerCPUDelayQ;
  37. BOOLEAN PartitionedDelayQ = TRUE;
  38. uint MaxDupAcks;
  39. #define TCB_DELAY_RTN_LIMIT 4
  40. #if DBG
  41. ulong DbgTcpHwChkSumOk = 0;
  42. ulong DbgTcpHwChkSumErr = 0;
  43. ulong DbgDnsProb = 0;
  44. #endif
  45. extern uint Time_Proc;
  46. extern CTELock *pTWTCBTableLock;
  47. extern CTELock *pTCBTableLock;
  48. #if IRPFIX
  49. extern PDEVICE_OBJECT TCPDeviceObject;
  50. #endif
  51. extern Queue TWQueue;
  52. extern ulong CurrentTCBs;
  53. extern ulong MaxFreeTcbs;
  54. extern IPInfo LocalNetInfo;
  55. #define PERSIST_TIMEOUT MS_TO_TICKS(500)
  56. typedef enum {
  57. TwaDoneProcessing,
  58. TwaSendReset,
  59. TwaAcceptConnection,
  60. TwaMaxActions
  61. } TimeWaitAction;
  62. TimeWaitAction
  63. HandleTWTCB(TWTCB * RcvTCB, uint flags, SeqNum seq, uint Partition);
  64. void
  65. SendTWtcbACK(TWTCB *ACKTcb, uint Partition, CTELockHandle TCBHandle);
  66. void
  67. ReInsert2MSL(TWTCB *RemovedTCB);
  68. void ResetSendNext(TCB *SeqTCB, SeqNum NewSeq);
  69. void ResetAndFastSend(TCB *SeqTCB, SeqNum NewSeq, uint NewCWin);
  70. void GetRandomISN(PULONG SeqNum, TCPAddrInfo *TcpAddr);
  71. extern uint TcpHostOpts;
  72. extern BOOLEAN fAcdLoadedG;
  73. extern NTSTATUS TCPPrepareIrpForCancel(PTCP_CONTEXT TcpContext, PIRP Irp,
  74. PDRIVER_CANCEL CancelRoutine);
  75. extern void TCPRequestComplete(void *Context, uint Status,
  76. uint UnUsed);
  77. void TCPCancelRequest(PDEVICE_OBJECT Device, PIRP Irp);
  78. #ifdef DBG
  79. extern ULONG SListCredits;
  80. #endif
  81. //
  82. // All of the init code can be discarded.
  83. //
  84. int InitTCPRcv(void);
  85. void UnInitTCPRcv(void);
  86. #ifdef ALLOC_PRAGMA
  87. #pragma alloc_text(INIT, InitTCPRcv)
  88. #pragma alloc_text(INIT, UnInitTCPRcv)
  89. #endif
  90. //* AdjustRcvWin - Adjust the receive window on a TCB.
  91. //
  92. // A utility routine that adjusts the receive window to an even multiple of
  93. // the local segment size. We round it up to the next closest multiple, or
  94. // leave it alone if it's already an event multiple. We assume we have
  95. // exclusive access to the input TCB.
  96. //
  97. // Input: WinTCB - TCB to be adjusted.
  98. //
  99. // Returns: Nothing.
  100. //
  101. void
  102. AdjustRcvWin(TCB *WinTCB)
  103. {
  104. ushort LocalMSS;
  105. uchar FoundMSS;
  106. ulong SegmentsInWindow;
  107. uint ScaledMaxRcvWin;
  108. ASSERT(WinTCB->tcb_defaultwin != 0);
  109. ASSERT(WinTCB->tcb_rcvwin != 0);
  110. ASSERT(WinTCB->tcb_remmss != 0);
  111. if (WinTCB->tcb_flags & WINDOW_SET)
  112. return;
  113. // First, get the local MSS by calling IP.
  114. FoundMSS = (*LocalNetInfo.ipi_getlocalmtu)(WinTCB->tcb_saddr, &LocalMSS);
  115. // If we didn't find it, error out.
  116. if (!FoundMSS) {
  117. //ASSERT(FALSE);
  118. return;
  119. }
  120. LocalMSS -= sizeof(TCPHeader);
  121. LocalMSS = MIN(LocalMSS, WinTCB->tcb_remmss);
  122. // Compute the actual maximum receive window, accounting for the presence
  123. // of window scaling on this particular connection. This value is used
  124. // in the computations below, rather than the cross-connection maximum.
  125. ScaledMaxRcvWin = TCP_MAXWIN << WinTCB->tcb_rcvwinscale;
  126. // Make sure we have at least 4 segments in window, if that wouldn't make
  127. // the window too big.
  128. SegmentsInWindow = WinTCB->tcb_defaultwin / (ulong)LocalMSS;
  129. if (SegmentsInWindow < 4) {
  130. // We have fewer than four segments in the window. Round up to 4
  131. // if we can do so without exceeding the maximum window size; otherwise
  132. // use the maximum multiple that we can fit in 64K. The exception is if
  133. // we can only fit one integral multiple in the window - in that case
  134. // we'll use a window equal to the scaled maximum.
  135. if (LocalMSS <= (ScaledMaxRcvWin / 4)) {
  136. WinTCB->tcb_defaultwin = (uint)(4 * LocalMSS);
  137. } else {
  138. ulong SegmentsInMaxWindow;
  139. // Figure out the maximum number of segments we could possibly
  140. // fit in a window. If this is > 1, use that as the basis for
  141. // our window size. Otherwise use a maximum size window.
  142. SegmentsInMaxWindow = ScaledMaxRcvWin / (ulong)LocalMSS;
  143. if (SegmentsInMaxWindow != 1)
  144. WinTCB->tcb_defaultwin = SegmentsInMaxWindow * (ulong)LocalMSS;
  145. else
  146. WinTCB->tcb_defaultwin = ScaledMaxRcvWin;
  147. }
  148. WinTCB->tcb_rcvwin = WinTCB->tcb_defaultwin;
  149. } else {
  150. // If it's not already an even multiple, bump the default and current
  151. // windows to the nearest multiple.
  152. if ((SegmentsInWindow * (ulong)LocalMSS) != WinTCB->tcb_defaultwin) {
  153. ulong NewWindow;
  154. NewWindow = (SegmentsInWindow + 1) * (ulong)LocalMSS;
  155. // Don't let the new window be > 64K
  156. // or what ever is set (if window scaling is enabled)
  157. if (NewWindow <= ScaledMaxRcvWin) {
  158. WinTCB->tcb_defaultwin = (uint)NewWindow;
  159. WinTCB->tcb_rcvwin = (uint)NewWindow;
  160. }
  161. }
  162. }
  163. }
  164. //* CompleteRcvs - Complete rcvs on a TCB.
  165. //
  166. // Called when we need to complete rcvs on a TCB. We'll pull things from
  167. // the TCB's rcv queue, as long as there are rcvs that have the PUSH bit
  168. // set.
  169. //
  170. // Input: CmpltTCB - TCB to complete on.
  171. //
  172. // Returns: Nothing.
  173. //
  174. void
  175. CompleteRcvs(TCB * CmpltTCB)
  176. {
  177. CTELockHandle TCBHandle;
  178. TCPRcvReq *CurrReq, *NextReq, *IndReq;
  179. #if TRACE_EVENT
  180. PTDI_DATA_REQUEST_NOTIFY_ROUTINE CPCallBack;
  181. WMIData WMIInfo;
  182. #endif
  183. CTEStructAssert(CmpltTCB, tcb);
  184. ASSERT(CmpltTCB->tcb_refcnt != 0);
  185. CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
  186. if (!CLOSING(CmpltTCB) && !(CmpltTCB->tcb_flags & RCV_CMPLTING)
  187. && (CmpltTCB->tcb_rcvhead != NULL)) {
  188. CmpltTCB->tcb_flags |= RCV_CMPLTING;
  189. for (;;) {
  190. CurrReq = CmpltTCB->tcb_rcvhead;
  191. IndReq = NULL;
  192. do {
  193. CTEStructAssert(CurrReq, trr);
  194. if (CurrReq->trr_flags & TRR_PUSHED) {
  195. // Need to complete this one. If this is the current rcv
  196. // advance the current rcv to the next one in the list.
  197. // Then set the list head to the next one in the list.
  198. NextReq = CurrReq->trr_next;
  199. if (CmpltTCB->tcb_currcv == CurrReq)
  200. CmpltTCB->tcb_currcv = NextReq;
  201. CmpltTCB->tcb_rcvhead = NextReq;
  202. if (NextReq == NULL) {
  203. // We've just removed the last buffer. Set the
  204. // rcvhandler to PendData, in case something
  205. // comes in during the callback.
  206. ASSERT(CmpltTCB->tcb_rcvhndlr != IndicateData);
  207. CmpltTCB->tcb_rcvhndlr = PendData;
  208. }
  209. CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
  210. if (CurrReq->trr_uflags != NULL)
  211. *(CurrReq->trr_uflags) =
  212. TDI_RECEIVE_NORMAL | TDI_RECEIVE_ENTIRE_MESSAGE;
  213. #if TRACE_EVENT
  214. CPCallBack = TCPCPHandlerRoutine;
  215. if (CPCallBack != NULL) {
  216. ulong GroupType;
  217. WMIInfo.wmi_destaddr = CmpltTCB->tcb_daddr;
  218. WMIInfo.wmi_destport = CmpltTCB->tcb_dport;
  219. WMIInfo.wmi_srcaddr = CmpltTCB->tcb_saddr;
  220. WMIInfo.wmi_srcport = CmpltTCB->tcb_sport;
  221. WMIInfo.wmi_size = CurrReq->trr_amt;
  222. WMIInfo.wmi_context = CmpltTCB->tcb_cpcontext;
  223. GroupType = EVENT_TRACE_GROUP_TCPIP + EVENT_TRACE_TYPE_RECEIVE;
  224. (*CPCallBack) (GroupType, (PVOID) &WMIInfo, sizeof(WMIInfo), NULL);
  225. }
  226. #endif
  227. (*CurrReq->trr_rtn) (CurrReq->trr_context,
  228. CurrReq->trr_status, CurrReq->trr_amt);
  229. if (IndReq != NULL)
  230. FreeRcvReq(CurrReq);
  231. else {
  232. IndReq = CurrReq;
  233. IndReq->trr_status = TDI_SUCCESS;
  234. }
  235. CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
  236. CurrReq = CmpltTCB->tcb_rcvhead;
  237. } else
  238. // This one isn't to be completed, so bail out.
  239. break;
  240. } while (CurrReq != NULL);
  241. // Now see if we've completed all of the requests. If we have, we
  242. // may need to deal with pending data and/or reset the rcv. handler.
  243. if (CurrReq == NULL) {
  244. // We've completed everything that can be, so stop the push
  245. // timer. We don't stop it if CurrReq isn't NULL because we
  246. // want to make sure later data is eventually pushed.
  247. STOP_TCB_TIMER_R(CmpltTCB, PUSH_TIMER);
  248. ASSERT(IndReq != NULL);
  249. // No more recv. requests.
  250. if (CmpltTCB->tcb_pendhead == NULL) {
  251. FreeRcvReq(IndReq);
  252. // No pending data. Set the rcv. handler to either PendData
  253. // or IndicateData.
  254. if (!(CmpltTCB->tcb_flags & (DISC_PENDING | GC_PENDING))) {
  255. if (CmpltTCB->tcb_rcvind != NULL &&
  256. CmpltTCB->tcb_indicated == 0)
  257. CmpltTCB->tcb_rcvhndlr = IndicateData;
  258. else
  259. CmpltTCB->tcb_rcvhndlr = PendData;
  260. } else {
  261. goto Complete_Notify;
  262. }
  263. } else {
  264. // We have pending data to deal with.
  265. if (CmpltTCB->tcb_rcvind != NULL &&
  266. ((CmpltTCB->tcb_indicated == 0) || (CmpltTCB->tcb_moreflag == 4))) {
  267. // There's a rcv. indicate handler on this TCB. Call
  268. // the indicate handler with the pending data.
  269. IndicatePendingData(CmpltTCB, IndReq, TCBHandle);
  270. SendACK(CmpltTCB);
  271. CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
  272. // See if a buffer has been posted. If so, we'll need
  273. // to check and see if it needs to be completed.
  274. if (CmpltTCB->tcb_rcvhead != NULL)
  275. continue;
  276. else {
  277. // If the pending head is now NULL, we've used up
  278. // all the data.
  279. if (CmpltTCB->tcb_pendhead == NULL &&
  280. (CmpltTCB->tcb_flags &
  281. (DISC_PENDING | GC_PENDING)))
  282. goto Complete_Notify;
  283. }
  284. } else {
  285. // No indicate handler, so nothing to do. The rcv.
  286. // handler should already be set to PendData.
  287. FreeRcvReq(IndReq);
  288. ASSERT(CmpltTCB->tcb_rcvhndlr == PendData);
  289. }
  290. }
  291. } else {
  292. if (IndReq != NULL)
  293. FreeRcvReq(IndReq);
  294. }
  295. break;
  296. }
  297. CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
  298. }
  299. CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
  300. return;
  301. Complete_Notify:
  302. // Something is pending. Figure out what it is, and do
  303. // it.
  304. if (CmpltTCB->tcb_flags & GC_PENDING) {
  305. CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
  306. // Bump the refcnt, because GracefulClose will
  307. // deref the TCB and we're not really done with
  308. // it yet.
  309. REFERENCE_TCB(CmpltTCB);
  310. //it is okay to ignore the tw state since we are returning frome here
  311. //anyway, without touching the tcb.
  312. GracefulClose(CmpltTCB, CmpltTCB->tcb_flags & TW_PENDING,
  313. (CmpltTCB->tcb_fastchk & TCP_FLAG_SEND_AND_DISC) ?
  314. FALSE : TRUE, TCBHandle);
  315. } else if (CmpltTCB->tcb_flags & DISC_PENDING) {
  316. NotifyOfDisc(CmpltTCB, NULL, TDI_GRACEFUL_DISC, &TCBHandle);
  317. CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
  318. CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
  319. CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
  320. } else {
  321. ASSERT(FALSE);
  322. CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
  323. }
  324. return;
  325. }
  326. //* CompleteSends - Complete TCP send requests.
  327. //
  328. // Called when we need to complete a chain of send-requests pulled off a TCB
  329. // during our ACK processing. If the SendQ is non-empty, requests are dequeued
  330. // and completed.
  331. //
  332. // Input: SendQ - A chain of TCPSendReq structures.
  333. //
  334. // Returns: nothing.
  335. //
  336. void
  337. CompleteSends(Queue* SendQ)
  338. {
  339. Queue* CurrentQ = QHEAD(SendQ);
  340. TCPReq* Req;
  341. if (EMPTYQ(SendQ)) {
  342. return;
  343. }
  344. do {
  345. Req = QSTRUCT(TCPReq, CurrentQ, tr_q);
  346. CurrentQ = QNEXT(CurrentQ);
  347. CTEStructAssert(Req, tr);
  348. (*Req->tr_rtn)(Req->tr_context, Req->tr_status,
  349. Req->tr_status == TDI_SUCCESS
  350. ? ((TCPSendReq*)Req)->tsr_size : 0);
  351. FreeSendReq((TCPSendReq*)Req);
  352. } while (CurrentQ != QEND(SendQ));
  353. }
  354. //* ProcessPerCpuTCBDelayQ - Process TCBs on the delayed Q on this cpu.
  355. //
  356. // Called at various times to process TCBs on the delayed Q.
  357. //
  358. // Input: Proc - Index into the per-processor delay queues.
  359. // OrigIrql - The callers IRQL.
  360. // StopTicks - Optional pointer to KeQueryTickCount value after
  361. // which processing should stop. This is used to
  362. // limit the time spent at DISPATCH_LEVEL.
  363. // ItemsProcessed - Optional output pointer where the number of items
  364. // processed is stored. (Caller takes responsibility
  365. // for initializing this counter if used.)
  366. //
  367. // Returns: TRUE if processing was stopped due to time constraint. FALSE
  368. // otherwise, or if no time constraint was given.
  369. //
  370. LOGICAL
  371. ProcessPerCpuTCBDelayQ(int Proc, KIRQL OrigIrql,
  372. const LARGE_INTEGER* StopTicks, ulong *ItemsProcessed)
  373. {
  374. CPUDelayQ* CpuQ;
  375. Queue* Item;
  376. TCB *DelayTCB;
  377. CTELockHandle TCBHandle;
  378. LARGE_INTEGER Ticks;
  379. LOGICAL TimeConstrained = FALSE;
  380. CpuQ = &PerCPUDelayQ[Proc];
  381. while ((Item = InterlockedDequeueIfNotEmptyAtIrql(&CpuQ->TCBDelayQ,
  382. &CpuQ->TCBDelayLock,
  383. OrigIrql)) != NULL) {
  384. DelayTCB = STRUCT_OF(TCB, Item, tcb_delayq);
  385. CTEStructAssert(DelayTCB, tcb);
  386. CTEGetLockAtIrql(&DelayTCB->tcb_lock, OrigIrql, &TCBHandle);
  387. ASSERT(DelayTCB->tcb_refcnt != 0);
  388. ASSERT(DelayTCB->tcb_flags & IN_DELAY_Q);
  389. while (!CLOSING(DelayTCB) && (DelayTCB->tcb_flags & DELAYED_FLAGS)) {
  390. if (DelayTCB->tcb_flags & NEED_RCV_CMPLT) {
  391. DelayTCB->tcb_flags &= ~NEED_RCV_CMPLT;
  392. CTEFreeLockAtIrql(&DelayTCB->tcb_lock, OrigIrql, TCBHandle);
  393. CompleteRcvs(DelayTCB);
  394. CTEGetLockAtIrql(&DelayTCB->tcb_lock, OrigIrql, &TCBHandle);
  395. }
  396. if (DelayTCB->tcb_flags & NEED_OUTPUT) {
  397. DelayTCB->tcb_flags &= ~NEED_OUTPUT;
  398. REFERENCE_TCB(DelayTCB);
  399. TCPSend(DelayTCB, TCBHandle);
  400. CTEGetLockAtIrql(&DelayTCB->tcb_lock, OrigIrql, &TCBHandle);
  401. }
  402. if (DelayTCB->tcb_flags & NEED_ACK) {
  403. DelayTCB->tcb_flags &= ~NEED_ACK;
  404. CTEFreeLockAtIrql(&DelayTCB->tcb_lock, OrigIrql, TCBHandle);
  405. SendACK(DelayTCB);
  406. CTEGetLockAtIrql(&DelayTCB->tcb_lock, OrigIrql, &TCBHandle);
  407. }
  408. }
  409. if (CLOSING(DelayTCB) &&
  410. (DelayTCB->tcb_flags & NEED_OUTPUT) &&
  411. DATA_RCV_STATE(DelayTCB->tcb_state) && (DelayTCB->tcb_closereason & TCB_CLOSE_RST)) {
  412. #if DBG
  413. DbgDnsProb++;
  414. #endif
  415. DelayTCB->tcb_flags &= ~NEED_OUTPUT;
  416. REFERENCE_TCB(DelayTCB);
  417. TCPSend(DelayTCB, TCBHandle);
  418. CTEGetLockAtIrql(&DelayTCB->tcb_lock, OrigIrql, &TCBHandle);
  419. }
  420. DelayTCB->tcb_flags &= ~IN_DELAY_Q;
  421. DerefTCB(DelayTCB, TCBHandle);
  422. if (ItemsProcessed) {
  423. (*ItemsProcessed)++;
  424. }
  425. // If a time constraint was given, bail out if we've past it.
  426. //
  427. if (StopTicks) {
  428. KeQueryTickCount(&Ticks);
  429. if (Ticks.QuadPart > StopTicks->QuadPart) {
  430. TimeConstrained = TRUE;
  431. break;
  432. }
  433. }
  434. }
  435. return TimeConstrained;
  436. }
  437. //* ProcessTCBDelayQ - Process TCBs on the delayed Q.
  438. //
  439. // Called at various times to process TCBs on the delayed Q.
  440. //
  441. // Input: OrigIrql - Current IRQL.
  442. // ProcessAllQueues - Process all queues if TRUE; otherwise, current
  443. // processor queue only.
  444. //
  445. // Returns: Nothing.
  446. //
  447. void
  448. ProcessTCBDelayQ(KIRQL OrigIrql, BOOLEAN ProcessAllQueues)
  449. {
  450. uint i;
  451. uint Index;
  452. LOGICAL TimeConstrained;
  453. ulong ItemsProcessed;
  454. LARGE_INTEGER TicksDelta;
  455. LARGE_INTEGER StopTicks;
  456. ulong DelayRtnCount;
  457. ulong Proc;
  458. //
  459. // Get the current processor#
  460. //
  461. Proc = KeGetCurrentProcessorNumber();
  462. // Check for recursion. We do not stop recursion completely, only
  463. // limit it. This is done to allow multiple threads to process the
  464. // TCBDelayQ simultaneously.
  465. DelayRtnCount = CTEInterlockedIncrementLong((PLONG)&(PerCPUDelayQ[Proc].TCBDelayRtnCount));
  466. if (DelayRtnCount > TCBDelayRtnLimit.Value) {
  467. CTEInterlockedDecrementLong((PLONG)&(PerCPUDelayQ[Proc].TCBDelayRtnCount));
  468. return;
  469. }
  470. //
  471. // Constrain ProcessPerCpuTCBDelayQ to run only for 100 ms maximum.
  472. //
  473. ItemsProcessed = 0;
  474. TicksDelta.HighPart = 0;
  475. TicksDelta.LowPart = (100 * 10 * 1000) / KeQueryTimeIncrement();
  476. KeQueryTickCount(&StopTicks);
  477. StopTicks.QuadPart = StopTicks.QuadPart + TicksDelta.QuadPart;
  478. for (i = 0; i < Time_Proc; i++) {
  479. //
  480. // Delayed items on the current processor is processed first.
  481. // This improves the chances of L1 cache hit for the TCBs.
  482. //
  483. Index = (i + Proc) % Time_Proc;
  484. // We are just peeking at the queue to prevent taking it's
  485. // lock uneccessarily.
  486. //
  487. if (!EMPTYQ(&PerCPUDelayQ[Index].TCBDelayQ)) {
  488. TimeConstrained = ProcessPerCpuTCBDelayQ(Index,
  489. OrigIrql,
  490. &StopTicks,
  491. &ItemsProcessed);
  492. if (TimeConstrained) {
  493. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  494. "ProcessTCBDelayQ: Processed %u TCBs before "
  495. "time expired.\n",
  496. ItemsProcessed));
  497. break;
  498. }
  499. }
  500. //
  501. // If there is no need to process all the delay quues, break out after
  502. // processing the current one.
  503. //
  504. if (!ProcessAllQueues) {
  505. break;
  506. }
  507. }
  508. CTEInterlockedDecrementLong((PLONG)&(PerCPUDelayQ[Proc].TCBDelayRtnCount));
  509. }
  510. //* DelayAction - Put a TCB on the queue for a delayed action.
  511. //
  512. // Called when we want to put a TCB on the DelayQ for a delayed action at
  513. // rcv. complete or some other time. The lock on the TCB must be held when
  514. // this is called.
  515. //
  516. // Input: DelayTCB - TCB which we're going to sched.
  517. // Action - Action we're scheduling.
  518. //
  519. // Returns: Nothing.
  520. //
  521. void
  522. DelayAction(TCB * DelayTCB, uint Action)
  523. {
  524. // Schedule the completion.
  525. //
  526. DelayTCB->tcb_flags |= Action;
  527. if (!(DelayTCB->tcb_flags & IN_DELAY_Q)) {
  528. uint Proc;
  529. #if MILLEN
  530. Proc = 0;
  531. #else // MILLEN
  532. Proc = KeGetCurrentProcessorNumber();
  533. #endif // !MILLEN
  534. DelayTCB->tcb_flags |= IN_DELAY_Q;
  535. REFERENCE_TCB(DelayTCB); // Reference this for later.
  536. //We may not be running timer dpcs on all the processors
  537. if (!(Proc < Time_Proc)) {
  538. Proc = 0;
  539. }
  540. InterlockedEnqueueAtDpcLevel(&PerCPUDelayQ[Proc].TCBDelayQ,
  541. &DelayTCB->tcb_delayq,
  542. &PerCPUDelayQ[Proc].TCBDelayLock);
  543. }
  544. }
  545. //* HandleTWTCB - Process a segment matching a time wait TCB.
  546. //
  547. // This function operates on a TCB in time wait state. The action taken is
  548. // based on RFC 793 with modifications done to handle all the actions on a
  549. // time wait TCB upfront and moving a time-wait TCB to SYN-RCVD state (the
  550. // conditions have been rearranged as well).
  551. //
  552. // Input: RcvTCB - TCB which matching the segment.
  553. // flags - Flags on the segment.
  554. // seq - Sequence number of the segment.
  555. // Partition - Partition to which the TCB belongs.
  556. //
  557. // Returns: The action to be taken by the caller.
  558. //
  559. TimeWaitAction
  560. HandleTWTCB(TWTCB * RcvTCB, uint flags, SeqNum seq, uint Partition)
  561. {
  562. if (flags & TCP_FLAG_RST) {
  563. if (SEQ_EQ(seq, RcvTCB->twtcb_rcvnext)) {
  564. RemoveTWTCB(RcvTCB, Partition);
  565. FreeTWTCB(RcvTCB);
  566. }
  567. CTEFreeLockFromDPC(&pTWTCBTableLock[Partition]);
  568. return TwaDoneProcessing;
  569. } else if (flags & TCP_FLAG_ACK) {
  570. if (SEQ_EQ(seq, RcvTCB->twtcb_rcvnext) && (flags & TCP_FLAG_SYN)) {
  571. CTEFreeLockFromDPC(&pTWTCBTableLock[Partition]);
  572. return TwaSendReset;
  573. } else if (SEQ_EQ(seq, RcvTCB->twtcb_rcvnext - 1) &&
  574. ((flags & (TCP_FLAG_FIN | TCP_FLAG_SYN)) == TCP_FLAG_FIN)) {
  575. ReInsert2MSL(RcvTCB);
  576. } else if (SEQ_EQ(seq, RcvTCB->twtcb_rcvnext)) {
  577. CTEFreeLockFromDPC(&pTWTCBTableLock[Partition]);
  578. return TwaDoneProcessing;
  579. }
  580. SendTWtcbACK(RcvTCB, Partition, DISPATCH_LEVEL);
  581. return TwaDoneProcessing;
  582. } else if (SEQ_GTE(seq, RcvTCB->twtcb_rcvnext) &&
  583. ((flags & TCP_FLAGS_ALL) == TCP_FLAG_SYN)) {
  584. CTEFreeLockFromDPC(&pTWTCBTableLock[Partition]);
  585. return TwaAcceptConnection;
  586. } else {
  587. CTEFreeLockFromDPC(&pTWTCBTableLock[Partition]);
  588. return TwaDoneProcessing;
  589. }
  590. }
  591. //* TCPRcvComplete - Handle a receive complete.
  592. //
  593. // Called by the lower layers when we're done receiving. If we have any work
  594. // to do, we use this time to do it.
  595. //
  596. // Input: Nothing.
  597. //
  598. // Returns: Nothing.
  599. //
  600. void
  601. TCPRcvComplete(void)
  602. {
  603. ProcessTCBDelayQ(DISPATCH_LEVEL, !PartitionedDelayQ);
  604. }
  605. //* CompleteConnReq - Complete a connection request on a TCB.
  606. //
  607. // A utility function to complete a connection request on a TCB. We remove
  608. // the connreq, and put it on the ConnReqCmpltQ where it will be picked
  609. // off later during RcvCmplt processing. We assume the TCB lock is held when
  610. // we're called.
  611. //
  612. // Input: CmpltTCB - TCB from which to complete.
  613. // OptInfo - IP OptInfo for completeion.
  614. // Status - Status to complete with.
  615. //
  616. // Returns: Nothing.
  617. //
  618. void
  619. CompleteConnReq(TCB * CmpltTCB, IPOptInfo * OptInfo, TDI_STATUS Status)
  620. {
  621. TCPConnReq *ConnReq;
  622. CTEStructAssert(CmpltTCB, tcb);
  623. ConnReq = CmpltTCB->tcb_connreq;
  624. if (ConnReq != NULL) {
  625. uint FastChk;
  626. // There's a connreq on this TCB. Fill in the connection information
  627. // before returning it.
  628. if (TCB_TIMER_RUNNING_R(CmpltTCB, CONN_TIMER))
  629. STOP_TCB_TIMER_R(CmpltTCB, CONN_TIMER);
  630. CmpltTCB->tcb_connreq = NULL;
  631. UpdateConnInfo(ConnReq->tcr_conninfo, OptInfo, CmpltTCB->tcb_daddr,
  632. CmpltTCB->tcb_dport);
  633. if (ConnReq->tcr_addrinfo) {
  634. UpdateConnInfo(ConnReq->tcr_addrinfo, OptInfo, CmpltTCB->tcb_saddr,
  635. CmpltTCB->tcb_sport);
  636. }
  637. ConnReq->tcr_req.tr_status = Status;
  638. // In order to complete this request directly, we must block further
  639. // receive-processing until this connect-indication is complete.
  640. // We require that any caller of this routine must already hold
  641. // a reference to the TCB so that the dereference below does not drop
  642. // the reference-count to zero.
  643. FastChk = (CmpltTCB->tcb_fastchk & TCP_FLAG_IN_RCV) ^ TCP_FLAG_IN_RCV;
  644. CmpltTCB->tcb_fastchk |= FastChk;
  645. CTEFreeLockFromDPC(&CmpltTCB->tcb_lock);
  646. (ConnReq->tcr_req.tr_rtn)(ConnReq->tcr_req.tr_context,
  647. ConnReq->tcr_req.tr_status, 0);
  648. FreeConnReq(ConnReq);
  649. CTEGetLockAtDPC(&CmpltTCB->tcb_lock);
  650. CmpltTCB->tcb_fastchk &= ~FastChk;
  651. if (CmpltTCB->tcb_flags & SEND_AFTER_RCV) {
  652. CmpltTCB->tcb_flags &= ~SEND_AFTER_RCV;
  653. DelayAction(CmpltTCB, NEED_OUTPUT);
  654. }
  655. }
  656. #if DBG
  657. else {
  658. ASSERT((CmpltTCB->tcb_state == TCB_SYN_RCVD) &&
  659. (CmpltTCB->tcb_fastchk & TCP_FLAG_ACCEPT_PENDING));
  660. }
  661. #endif
  662. }
  663. BOOLEAN
  664. DelayedAcceptConn(AddrObj *ListenAO, IPAddr Src, ushort SrcPort,
  665. IPOptInfo *OptInfo, TCB *AcceptTCB)
  666. {
  667. TCPConn *CurrentConn = NULL;
  668. CTELockHandle ConnHandle;
  669. Queue *Temp;
  670. TCPConnReq *ConnReq = NULL;
  671. BOOLEAN FoundConn = FALSE;
  672. uchar TAddress[TCP_TA_SIZE];
  673. PVOID ConnContext;
  674. PConnectEvent Event;
  675. PVOID EventContext;
  676. TDI_STATUS Status;
  677. PTCP_CONTEXT TcpContext = NULL;
  678. ConnectEventInfo *EventInfo;
  679. CTEStructAssert(ListenAO, ao);
  680. CTEGetLockAtDPC(&ListenAO->ao_lock);
  681. CTEFreeLockFromDPC(&AddrObjTableLock.Lock);
  682. if (!AO_VALID(ListenAO) || ListenAO->ao_connect == NULL) {
  683. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  684. return FALSE;
  685. }
  686. // He has a connect handler. Put the transport address together,
  687. // and call him. We also need to get the necessary resources
  688. // first.
  689. Event = ListenAO->ao_connect;
  690. EventContext = ListenAO->ao_conncontext;
  691. REF_AO(ListenAO);
  692. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  693. ConnReq = GetConnReq();
  694. if (ConnReq == NULL) {
  695. DELAY_DEREF_AO(ListenAO);
  696. return FALSE;
  697. }
  698. BuildTDIAddress(TAddress, Src, SrcPort);
  699. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  700. TCPTRACE(("indicating connect request\n"));
  701. }
  702. Status = (*Event) (EventContext, TCP_TA_SIZE,
  703. (PTRANSPORT_ADDRESS) TAddress, 0, NULL,
  704. OptInfo->ioi_optlength, OptInfo->ioi_options,
  705. &ConnContext, &EventInfo);
  706. if (Status == TDI_MORE_PROCESSING) {
  707. #if !MILLEN
  708. PIO_STACK_LOCATION IrpSp;
  709. PTDI_REQUEST_KERNEL_ACCEPT AcceptRequest;
  710. IrpSp = IoGetCurrentIrpStackLocation(EventInfo);
  711. Status = TCPPrepareIrpForCancel(
  712. (PTCP_CONTEXT) IrpSp->FileObject->FsContext,
  713. EventInfo,
  714. TCPCancelRequest
  715. );
  716. if (!NT_SUCCESS(Status)) {
  717. Status = TDI_NOT_ACCEPTED;
  718. EventInfo = NULL;
  719. goto AcceptIrpCancelled;
  720. }
  721. // He accepted it. Find the connection on the AddrObj.
  722. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  723. TCPTRACE((
  724. "connect indication accepted, queueing request\n"
  725. ));
  726. }
  727. AcceptRequest = (PTDI_REQUEST_KERNEL_ACCEPT)
  728. & (IrpSp->Parameters);
  729. ConnReq->tcr_conninfo =
  730. AcceptRequest->ReturnConnectionInformation;
  731. if (AcceptRequest->RequestConnectionInformation &&
  732. AcceptRequest->RequestConnectionInformation->RemoteAddress) {
  733. ConnReq->tcr_addrinfo =
  734. AcceptRequest->RequestConnectionInformation;
  735. } else {
  736. ConnReq->tcr_addrinfo = NULL;
  737. }
  738. ConnReq->tcr_req.tr_rtn = TCPRequestComplete;
  739. ConnReq->tcr_req.tr_context = EventInfo;
  740. #else // !MILLEN
  741. ConnReq->tcr_req.tr_rtn = EventInfo.cei_rtn;
  742. ConnReq->tcr_req.tr_context = EventInfo.cei_context;
  743. ConnReq->tcr_conninfo = EventInfo.cei_conninfo;
  744. ConnReq->tcr_addrinfo = NULL;
  745. #endif // MILLEN
  746. CurrentConn = NULL;
  747. #if !MILLEN
  748. if ((IrpSp->FileObject->DeviceObject == TCPDeviceObject) &&
  749. (PtrToUlong(IrpSp->FileObject->FsContext2) == TDI_CONNECTION_FILE) &&
  750. ((TcpContext = IrpSp->FileObject->FsContext) != NULL) &&
  751. ((CurrentConn = GetConnFromConnID(
  752. PtrToUlong(TcpContext->Handle.ConnectionContext), &ConnHandle)) != NULL) &&
  753. (CurrentConn->tc_context == ConnContext) &&
  754. !(CurrentConn->tc_flags & CONN_INVALID)) {
  755. // Found the Conn structure!!
  756. // Don't have to loop below.
  757. CTEStructAssert(CurrentConn, tc);
  758. CTEGetLockAtDPC(&ListenAO->ao_lock);
  759. CTEGetLockAtDPC(&AcceptTCB->tcb_lock);
  760. if (AcceptTCB->tcb_fastchk & TCP_FLAG_ACCEPT_PENDING) {
  761. Status = InitTCBFromConn(CurrentConn, AcceptTCB,
  762. AcceptRequest->RequestConnectionInformation,
  763. TRUE);
  764. } else {
  765. Status = TDI_INVALID_STATE;
  766. }
  767. if (Status == TDI_SUCCESS) {
  768. FoundConn = TRUE;
  769. ASSERT(AcceptTCB->tcb_state == TCB_SYN_RCVD);
  770. AcceptTCB->tcb_fastchk &= ~TCP_FLAG_ACCEPT_PENDING;
  771. AcceptTCB->tcb_connreq = ConnReq;
  772. AcceptTCB->tcb_conn = CurrentConn;
  773. AcceptTCB->tcb_connid = CurrentConn->tc_connid;
  774. CurrentConn->tc_tcb = AcceptTCB;
  775. CurrentConn->tc_refcnt++;
  776. // Move him from the idle q to the active
  777. // queue.
  778. REMOVEQ(&CurrentConn->tc_q);
  779. PUSHQ(&ListenAO->ao_activeq, &CurrentConn->tc_q);
  780. } else {
  781. CTEFreeLockFromDPC(&AcceptTCB->tcb_lock);
  782. CTEFreeLockFromDPC(&CurrentConn->tc_ConnBlock->cb_lock);
  783. }
  784. } else {
  785. #endif // !MILLEN
  786. if (CurrentConn) {
  787. CTEFreeLockFromDPC(&CurrentConn->tc_ConnBlock->cb_lock);
  788. }
  789. SearchAO:
  790. CTEGetLockAtDPC(&ListenAO->ao_lock);
  791. Temp = QHEAD(&ListenAO->ao_idleq);;
  792. Status = TDI_INVALID_CONNECTION;
  793. while (Temp != QEND(&ListenAO->ao_idleq)) {
  794. CurrentConn = QSTRUCT(TCPConn, Temp, tc_q);
  795. CTEStructAssert(CurrentConn, tc);
  796. if ((CurrentConn->tc_context == ConnContext) &&
  797. !(CurrentConn->tc_flags & CONN_INVALID)) {
  798. //
  799. // We need to lock its TCPConnBlock, with care.
  800. // We'll ref the TCPConn so it can't go away,
  801. // then unlock the AO (which is already ref'd),
  802. // then relock. Note that tc_refcnt is updated
  803. // under ao_lock for any associated TCPConn.
  804. // If things have changed, go back and try again.
  805. //
  806. ++CurrentConn->tc_refcnt;
  807. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  808. CTEGetLockAtDPC(&CurrentConn->tc_ConnBlock->cb_lock);
  809. if (--CurrentConn->tc_refcnt == 0 &&
  810. ((CurrentConn->tc_flags & CONN_INVALID) ||
  811. (CurrentConn->tc_tcb != NULL))) {
  812. ConnDoneRtn DoneRtn = CurrentConn->tc_donertn;
  813. DoneRtn(CurrentConn, DISPATCH_LEVEL);
  814. goto SearchAO;
  815. }
  816. CTEGetLockAtDPC(&ListenAO->ao_lock);
  817. CTEGetLockAtDPC(&AcceptTCB->tcb_lock);
  818. // We think we have a match. The connection
  819. // shouldn't have a TCB associated with it. If it
  820. // does, it's an error. InitTCBFromConn will
  821. // handle all this, but first confirm that
  822. // TCP_FLAG_ACCEPT_PENDING is still set. If not,
  823. // someone took this before we did.
  824. if (AcceptTCB->tcb_fastchk &
  825. TCP_FLAG_ACCEPT_PENDING) {
  826. Status =
  827. InitTCBFromConn(CurrentConn, AcceptTCB,
  828. #if !MILLEN
  829. AcceptRequest->RequestConnectionInformation,
  830. #else // !MILLEN
  831. EventInfo.cei_acceptinfo,
  832. #endif // MILLEN
  833. TRUE);
  834. } else {
  835. Status = TDI_INVALID_STATE;
  836. }
  837. if (Status == TDI_SUCCESS) {
  838. FoundConn = TRUE;
  839. AcceptTCB->tcb_fastchk &=
  840. ~TCP_FLAG_ACCEPT_PENDING;
  841. AcceptTCB->tcb_connreq = ConnReq;
  842. AcceptTCB->tcb_conn = CurrentConn;
  843. AcceptTCB->tcb_connid = CurrentConn->tc_connid;
  844. CurrentConn->tc_tcb = AcceptTCB;
  845. CurrentConn->tc_refcnt++;
  846. // Move him from the idle q to the active
  847. // queue.
  848. REMOVEQ(&CurrentConn->tc_q);
  849. ENQUEUE(&ListenAO->ao_activeq, &CurrentConn->tc_q);
  850. } else {
  851. CTEFreeLockFromDPC(&AcceptTCB->tcb_lock);
  852. CTEFreeLockFromDPC(&CurrentConn->tc_ConnBlock->cb_lock);
  853. }
  854. // In any case, we're done now.
  855. break;
  856. }
  857. Temp = QNEXT(Temp);
  858. }
  859. #if !MILLEN
  860. }
  861. #endif // !MILLEN
  862. LOCKED_DELAY_DEREF_AO(ListenAO);
  863. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  864. if (FoundConn) {
  865. CTEFreeLockFromDPC(&(CurrentConn->tc_ConnBlock->cb_lock));
  866. } else {
  867. // Either we couldn't find a TCPConn for this TCB,
  868. // or someone accepted it before us. We just complete
  869. // the unnecessary ConnReq, then we're done.
  870. UpdateConnInfo(ConnReq->tcr_conninfo, OptInfo,
  871. AcceptTCB->tcb_daddr, AcceptTCB->tcb_dport);
  872. if (ConnReq->tcr_addrinfo) {
  873. UpdateConnInfo(ConnReq->tcr_addrinfo, OptInfo,
  874. AcceptTCB->tcb_saddr,
  875. AcceptTCB->tcb_sport);
  876. }
  877. ConnReq->tcr_req.tr_status = Status;
  878. (ConnReq->tcr_req.tr_rtn)(ConnReq->tcr_req.tr_context,
  879. ConnReq->tcr_req.tr_status, 0);
  880. FreeConnReq(ConnReq);
  881. }
  882. return FoundConn;
  883. }
  884. // The event handler didn't take it. Dereference it, free
  885. // the resources, and return NULL.
  886. #if !MILLEN
  887. AcceptIrpCancelled:
  888. #endif // !MILLEN
  889. FreeConnReq(ConnReq);
  890. DELAY_DEREF_AO(ListenAO);
  891. return FALSE;
  892. }
  893. BOOLEAN
  894. InitSynTCB(SYNTCB *SynTcb, AddrObj* AO, IPAddr Src, IPAddr Dest,
  895. TCPHeader UNALIGNED *TCPH, TCPRcvInfo *RcvInfo, uint IFIndex)
  896. {
  897. CTELockHandle Handle;
  898. SynTcb->syntcb_state = TCB_SYN_RCVD;
  899. SynTcb->syntcb_flags |= CONN_ACCEPTED;
  900. SynTcb->syntcb_refcnt = 1;
  901. SynTcb->syntcb_rcvnext = ++(RcvInfo->tri_seq);
  902. SynTcb->syntcb_sendwin = RcvInfo->tri_window;
  903. SynTcb->syntcb_ttl = AO->ao_opt.ioi_ttl;
  904. if (AO_WINSET(AO)) {
  905. SynTcb->syntcb_defaultwin = AO->ao_window;
  906. SynTcb->syntcb_flags |= WINDOW_SET;
  907. } else if (DefaultRcvWin) {
  908. SynTcb->syntcb_defaultwin = DefaultRcvWin;
  909. } else {
  910. SynTcb->syntcb_defaultwin = DEFAULT_RCV_WIN;
  911. }
  912. CTEFreeLockFromDPC(&AO->ao_lock);
  913. SynTcb->syntcb_rcvwinscale = 0;
  914. while ((SynTcb->syntcb_rcvwinscale < TCP_MAX_WINSHIFT) &&
  915. ((TCP_MAXWIN << SynTcb->syntcb_rcvwinscale) <
  916. (int)SynTcb->syntcb_defaultwin)) {
  917. SynTcb->syntcb_rcvwinscale++;
  918. }
  919. // Find Remote MSS and also if WS, TS or
  920. // sack options are negotiated.
  921. SynTcb->syntcb_sndwinscale = 0;
  922. SynTcb->syntcb_remmss = FindMSSAndOptions(TCPH, (TCB *)SynTcb, TRUE);
  923. if (SynTcb->syntcb_remmss <= ALIGNED_TS_OPT_SIZE) {
  924. // turn off TS if mss is not sufficient to
  925. // hold TS fields.
  926. SynTcb->syntcb_tcpopts &= ~TCP_FLAG_TS;
  927. }
  928. if (!InsertSynTCB(SynTcb, &Handle)){
  929. FreeSynTCB(SynTcb);
  930. return FALSE;
  931. }
  932. TcpInvokeCcb(TCP_CONN_CLOSED, TCP_CONN_SYN_RCVD, &SynTcb->syntcb_addrbytes,
  933. IFIndex);
  934. AddHalfOpenTCB();
  935. SynTcb->syntcb_rexmitcnt = 0;
  936. SynTcb->syntcb_rexmit = MS_TO_TICKS(3000);
  937. SendSYNOnSynTCB(SynTcb, Handle);
  938. TStats.ts_passiveopens++;
  939. return TRUE;
  940. }
  941. //* FindListenConn - Find (or fabricate) a listening connection.
  942. //
  943. // Called by our Receive handler to decide what to do about an incoming
  944. // SYN. We walk down the list of connections associated with the destination
  945. // address, and if we find any in the listening state that can be used for
  946. // the incoming request we'll take them, possibly returning a listen in the
  947. // process. If we don't find any appropriate listening connections, we'll
  948. // call the Connect Event handler if one is registerd. If all else fails,
  949. // we'll return NULL and the SYN will be RST.
  950. //
  951. // The caller must hold the AddrObjTableLock before calling this routine,
  952. // and that lock must have been taken at DPC level. This routine will free
  953. // that lock back to DPC level.
  954. //
  955. // Input: ListenAO - Pointer to AddrObj for local address.
  956. // Src - Source IP address of SYN.
  957. // Dest - Destination IP address of SYN.
  958. // SrcPort - Source port of SYN.
  959. // OptInfo - IP options info from SYN.
  960. // TCPH - TCP Header of SYN.
  961. // RcvInfo - Information about the SYN segment
  962. // IFIndex - Interface index on which the SYN came in.
  963. // syn - [OUT] will be set if a SYN TCB was created.
  964. //
  965. // Returns: Pointer to found TCB, or NULL if we can't find one.
  966. //
  967. TCB *
  968. FindListenConn(AddrObj *ListenAO, IPAddr Src, IPAddr Dest, ushort SrcPort,
  969. IPOptInfo *OptInfo, TCPHeader UNALIGNED *TCPH,
  970. TCPRcvInfo *RcvInfo, ULONG IFIndex, BOOLEAN *syn)
  971. {
  972. TCB *CurrentTCB = NULL;
  973. TCPConn *CurrentConn = NULL;
  974. TCPConnReq *ConnReq = NULL;
  975. CTELockHandle ConnHandle;
  976. Queue *CurrentQ, *MarkerQ, Marker;
  977. uint FoundConn = FALSE;
  978. BOOLEAN SecondTry = FALSE;
  979. funcstart:
  980. CTEStructAssert(ListenAO, ao);
  981. CTEGetLockAtDPC(&ListenAO->ao_lock);
  982. // We have the lock on the AddrObj. Walk down it's list, looking
  983. // for connections in the listening state.
  984. if (!AO_VALID(ListenAO)) {
  985. AddrObj * NextAddrObj;
  986. if (SecondTry) {
  987. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  988. CTEFreeLockFromDPC(&AddrObjTableLock.Lock);
  989. return NULL;
  990. }
  991. // We will find the next best AO for another try
  992. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  993. NextAddrObj = GetNextBestAddrObj(Dest, TCPH->tcp_dest, PROTOCOL_TCP,
  994. ListenAO, GAO_FLAG_CHECK_IF_LIST);
  995. if (NextAddrObj == NULL) {
  996. CTEFreeLockFromDPC(&AddrObjTableLock.Lock);
  997. return NULL;
  998. }
  999. ListenAO = NextAddrObj;
  1000. SecondTry = TRUE;
  1001. goto funcstart;
  1002. }
  1003. if (ListenAO->ao_listencnt != 0) {
  1004. REF_AO(ListenAO);
  1005. MarkerQ = &Marker;
  1006. CurrentQ = QHEAD(&ListenAO->ao_listenq);
  1007. while (CurrentQ != QEND(&ListenAO->ao_listenq)) {
  1008. CurrentConn = QSTRUCT(TCPConn, CurrentQ, tc_q);
  1009. INITQ(MarkerQ);
  1010. PUSHQ(CurrentQ, MarkerQ);
  1011. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  1012. CTEGetLockAtDPC(&(CurrentConn->tc_ConnBlock->cb_lock));
  1013. #if DBG
  1014. CurrentConn->tc_ConnBlock->line = (uint) __LINE__;
  1015. CurrentConn->tc_ConnBlock->module = (uchar *) __FILE__;
  1016. #endif
  1017. CTEStructAssert(CurrentConn, tc);
  1018. CTEGetLockAtDPC(&ListenAO->ao_lock);
  1019. // If this TCB is in the listening state, with no delete
  1020. // pending, it's a candidate. Look at the pending listen
  1021. // info. to see if we should take it. Also ensure that
  1022. // the Conn we found has not been removed from the listen queue.
  1023. if (QPREV(MarkerQ) == CurrentQ &&
  1024. (CurrentConn->tc_flags & CONN_INVALID) == 0 &&
  1025. (CurrentTCB = CurrentConn->tc_tcb) != NULL &&
  1026. CurrentTCB->tcb_state == TCB_LISTEN) {
  1027. CTEStructAssert(CurrentTCB, tcb);
  1028. ASSERT(CurrentTCB->tcb_state == TCB_LISTEN);
  1029. CTEGetLockAtDPC(&CurrentTCB->tcb_lock);
  1030. if (CurrentTCB->tcb_state == TCB_LISTEN &&
  1031. !PENDING_ACTION(CurrentTCB)) {
  1032. // Need to see if we can take it.
  1033. // See if the addresses specifed in the ConnReq
  1034. // match.
  1035. if ((IP_ADDR_EQUAL(CurrentTCB->tcb_daddr,
  1036. NULL_IP_ADDR) ||
  1037. IP_ADDR_EQUAL(CurrentTCB->tcb_daddr,
  1038. Src)) &&
  1039. (CurrentTCB->tcb_dport == 0 ||
  1040. CurrentTCB->tcb_dport == SrcPort)) {
  1041. FoundConn = TRUE;
  1042. REMOVEQ(MarkerQ);
  1043. break;
  1044. }
  1045. // Otherwise, this didn't match, so we'll check the
  1046. // next one.
  1047. }
  1048. CTEFreeLockFromDPC(&CurrentTCB->tcb_lock);
  1049. }
  1050. CTEFreeLockFromDPC(&CurrentConn->tc_ConnBlock->cb_lock);
  1051. CurrentQ = QNEXT(MarkerQ);
  1052. REMOVEQ(MarkerQ);
  1053. }
  1054. CTEFreeLockFromDPC(&AddrObjTableLock.Lock);
  1055. LOCKED_DELAY_DEREF_AO(ListenAO);
  1056. // See why we've exited the loop.
  1057. if (FoundConn) {
  1058. CTEStructAssert(CurrentTCB, tcb);
  1059. // We exited because we found a TCB. If it's pre-accepted,
  1060. // we're done.
  1061. REFERENCE_TCB(CurrentTCB);
  1062. ASSERT(CurrentTCB->tcb_connreq != NULL);
  1063. ConnReq = CurrentTCB->tcb_connreq;
  1064. CurrentTCB->tcb_daddr = Src;
  1065. CurrentTCB->tcb_saddr = Dest;
  1066. CurrentTCB->tcb_dport = TCPH->tcp_src;
  1067. CurrentTCB->tcb_sport = TCPH->tcp_dest;
  1068. // If QUERY_ACCEPT isn't set, turn on the CONN_ACCEPTED bit.
  1069. if (!(ConnReq->tcr_flags & TCR_FLAG_QUERY_ACCEPT)) {
  1070. CurrentTCB->tcb_flags |= CONN_ACCEPTED;
  1071. #if MILLEN
  1072. //just use tcb_sendnext to hold hash value
  1073. //for randisn
  1074. CurrentTCB->tcb_sendnext = TCB_HASH(CurrentTCB->tcb_daddr,
  1075. CurrentTCB->tcb_dport,
  1076. CurrentTCB->tcb_saddr,
  1077. CurrentTCB->tcb_sport);
  1078. #endif
  1079. // If CONN_ACCEPTED, TdiAccept is not called
  1080. // again. So, get ISN when we are with in conn table lock
  1081. GetRandomISN((PULONG)&CurrentTCB->tcb_sendnext,
  1082. &CurrentTCB->tcb_addrbytes);
  1083. }
  1084. CurrentTCB->tcb_state = TCB_SYN_RCVD;
  1085. ListenAO->ao_listencnt--;
  1086. // Since he's no longer listening, remove him from the listen
  1087. // queue and put him on the active queue.
  1088. REMOVEQ(&CurrentConn->tc_q);
  1089. ENQUEUE(&ListenAO->ao_activeq, &CurrentConn->tc_q);
  1090. CTEFreeLockFromDPC(&CurrentTCB->tcb_lock);
  1091. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  1092. CTEFreeLockFromDPC(&(CurrentConn->tc_ConnBlock->cb_lock));
  1093. return CurrentTCB;
  1094. } else {
  1095. // Since we have a listening count, this should never happen
  1096. // if that count was non-zero initially.
  1097. // We currently don't keep a good count on ao_listencnt when
  1098. // the IRPs are cancelled.
  1099. // ASSERT(FALSE);
  1100. }
  1101. } else {
  1102. CTEFreeLockFromDPC(&AddrObjTableLock.Lock);
  1103. }
  1104. // We didn't find a matching TCB. If there is no connect indicate handler,
  1105. // we should not be creating any state.
  1106. if (ListenAO->ao_connect == NULL) {
  1107. AddrObj * NextAddrObj;
  1108. // Try with the next AO if we can
  1109. if (SecondTry) {
  1110. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  1111. return NULL;
  1112. }
  1113. REF_AO(ListenAO);
  1114. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  1115. CTEGetLockAtDPC(&AddrObjTableLock.Lock);
  1116. NextAddrObj = GetNextBestAddrObj(Dest, TCPH->tcp_dest, PROTOCOL_TCP,
  1117. ListenAO, GAO_FLAG_CHECK_IF_LIST);
  1118. DELAY_DEREF_AO(ListenAO);
  1119. if (NextAddrObj == NULL) {
  1120. CTEFreeLockFromDPC(&AddrObjTableLock.Lock);
  1121. return NULL;
  1122. }
  1123. ListenAO = NextAddrObj;
  1124. SecondTry = TRUE;
  1125. goto funcstart;
  1126. }
  1127. ASSERT(FoundConn == FALSE);
  1128. if (SynAttackProtect){
  1129. SYNTCB *AcceptTCB;
  1130. AcceptTCB = AllocSynTCB();
  1131. if (AcceptTCB) {
  1132. AcceptTCB->syntcb_daddr = Src;
  1133. AcceptTCB->syntcb_saddr= Dest;
  1134. AcceptTCB->syntcb_dport= TCPH->tcp_src;
  1135. AcceptTCB->syntcb_sport= TCPH->tcp_dest;
  1136. GetRandomISN((PULONG)&AcceptTCB->syntcb_sendnext,
  1137. &AcceptTCB->syntcb_addrbytes);
  1138. if (InitSynTCB(AcceptTCB, ListenAO, Src, Dest, TCPH, RcvInfo,
  1139. IFIndex)) {
  1140. *syn = TRUE;
  1141. }
  1142. // Fall through. (ListenAO->ao_lock was freed by InitSynTCB.)
  1143. } else {
  1144. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  1145. }
  1146. } else {
  1147. uchar TAddress[TCP_TA_SIZE];
  1148. PVOID ConnContext;
  1149. PConnectEvent Event;
  1150. PVOID EventContext;
  1151. TDI_STATUS Status;
  1152. TCB *AcceptTCB;
  1153. TCPConnReq *ConnReq;
  1154. PTCP_CONTEXT TcpContext = NULL;
  1155. #if !MILLEN
  1156. ConnectEventInfo *EventInfo;
  1157. #else // !MILLEN
  1158. ConnectEventInfo EventInfo;
  1159. #endif // MILLEN
  1160. // He has a connect handler. Put the transport address together,
  1161. // and call him. We also need to get the necessary resources
  1162. // first.
  1163. Event = ListenAO->ao_connect;
  1164. EventContext = ListenAO->ao_conncontext;
  1165. REF_AO(ListenAO);
  1166. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  1167. AcceptTCB = AllocTCB();
  1168. ConnReq = GetConnReq();
  1169. if (AcceptTCB != NULL && ConnReq != NULL) {
  1170. BuildTDIAddress(TAddress, Src, SrcPort);
  1171. AcceptTCB->tcb_state = TCB_LISTEN;
  1172. AcceptTCB->tcb_connreq = ConnReq;
  1173. AcceptTCB->tcb_flags |= CONN_ACCEPTED;
  1174. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  1175. TCPTRACE(("indicating connect request\n"));
  1176. }
  1177. Status = (*Event) (EventContext, TCP_TA_SIZE,
  1178. (PTRANSPORT_ADDRESS) TAddress, 0, NULL,
  1179. OptInfo->ioi_optlength, OptInfo->ioi_options,
  1180. &ConnContext, &EventInfo);
  1181. if (Status == TDI_MORE_PROCESSING) {
  1182. #if !MILLEN
  1183. PIO_STACK_LOCATION IrpSp;
  1184. PTDI_REQUEST_KERNEL_ACCEPT AcceptRequest;
  1185. IrpSp = IoGetCurrentIrpStackLocation(EventInfo);
  1186. Status = TCPPrepareIrpForCancel(
  1187. (PTCP_CONTEXT) IrpSp->FileObject->FsContext,
  1188. EventInfo,
  1189. TCPCancelRequest
  1190. );
  1191. if (!NT_SUCCESS(Status)) {
  1192. Status = TDI_NOT_ACCEPTED;
  1193. EventInfo = NULL;
  1194. goto AcceptIrpCancelled;
  1195. }
  1196. // He accepted it. Find the connection on the AddrObj.
  1197. {
  1198. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  1199. TCPTRACE((
  1200. "connect indication accepted, queueing request\n"
  1201. ));
  1202. }
  1203. AcceptRequest = (PTDI_REQUEST_KERNEL_ACCEPT)
  1204. & (IrpSp->Parameters);
  1205. ConnReq->tcr_conninfo =
  1206. AcceptRequest->ReturnConnectionInformation;
  1207. if (AcceptRequest->RequestConnectionInformation &&
  1208. AcceptRequest->RequestConnectionInformation->RemoteAddress) {
  1209. ConnReq->tcr_addrinfo =
  1210. AcceptRequest->RequestConnectionInformation;
  1211. } else {
  1212. ConnReq->tcr_addrinfo = NULL;
  1213. }
  1214. ConnReq->tcr_req.tr_rtn = TCPRequestComplete;
  1215. ConnReq->tcr_req.tr_context = EventInfo;
  1216. ConnReq->tcr_flags = 0;
  1217. }
  1218. #else // !MILLEN
  1219. ConnReq->tcr_req.tr_rtn = EventInfo.cei_rtn;
  1220. ConnReq->tcr_req.tr_context = EventInfo.cei_context;
  1221. ConnReq->tcr_conninfo = EventInfo.cei_conninfo;
  1222. ConnReq->tcr_addrinfo = NULL;
  1223. #endif // MILLEN
  1224. CurrentConn = NULL;
  1225. #if !MILLEN
  1226. if ((IrpSp->FileObject->DeviceObject == TCPDeviceObject) &&
  1227. (PtrToUlong(IrpSp->FileObject->FsContext2) == TDI_CONNECTION_FILE) &&
  1228. ((TcpContext = IrpSp->FileObject->FsContext) != NULL) &&
  1229. ((CurrentConn =
  1230. GetConnFromConnID(
  1231. PtrToUlong(TcpContext->Handle.ConnectionContext),
  1232. &ConnHandle)) != NULL) &&
  1233. (CurrentConn->tc_context == ConnContext) &&
  1234. !(CurrentConn->tc_flags & CONN_INVALID)) {
  1235. CTEGetLockAtDPC(&ListenAO->ao_lock);
  1236. // Found the Conn structure!!
  1237. // Don't have to loop below.
  1238. CTEStructAssert(CurrentConn, tc);
  1239. AcceptTCB->tcb_refcnt = 0;
  1240. REFERENCE_TCB(AcceptTCB);
  1241. Status = InitTCBFromConn(CurrentConn, AcceptTCB,
  1242. AcceptRequest->RequestConnectionInformation,
  1243. TRUE);
  1244. // Let's store the connection invariants upfront.
  1245. AcceptTCB->tcb_daddr = Src;
  1246. AcceptTCB->tcb_saddr= Dest;
  1247. AcceptTCB->tcb_dport= TCPH->tcp_src;
  1248. AcceptTCB->tcb_sport= TCPH->tcp_dest;
  1249. if (Status == TDI_SUCCESS) {
  1250. FoundConn = TRUE;
  1251. AcceptTCB->tcb_state = TCB_SYN_RCVD;
  1252. AcceptTCB->tcb_conn = CurrentConn;
  1253. AcceptTCB->tcb_connid = CurrentConn->tc_connid;
  1254. CurrentConn->tc_tcb = AcceptTCB;
  1255. CurrentConn->tc_refcnt++;
  1256. GetRandomISN((PULONG)&AcceptTCB->tcb_sendnext,
  1257. &AcceptTCB->tcb_addrbytes);
  1258. // Move him from the idle q to the active
  1259. // queue.
  1260. REMOVEQ(&CurrentConn->tc_q);
  1261. PUSHQ(&ListenAO->ao_activeq, &CurrentConn->tc_q);
  1262. } else {
  1263. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  1264. CTEFreeLockFromDPC(&CurrentConn->tc_ConnBlock->cb_lock);
  1265. }
  1266. } else {
  1267. #endif // !MILLEN
  1268. if (CurrentConn) {
  1269. CTEFreeLockFromDPC(&CurrentConn->tc_ConnBlock->cb_lock);
  1270. }
  1271. CTEGetLockAtDPC(&AddrObjTableLock.Lock);
  1272. CTEGetLockAtDPC(&ListenAO->ao_lock);
  1273. MarkerQ = &Marker;
  1274. CurrentQ = QHEAD(&ListenAO->ao_idleq);;
  1275. CurrentTCB = NULL;
  1276. Status = TDI_INVALID_CONNECTION;
  1277. while (CurrentQ != QEND(&ListenAO->ao_idleq)) {
  1278. CurrentConn = QSTRUCT(TCPConn, CurrentQ, tc_q);
  1279. INITQ(MarkerQ);
  1280. PUSHQ(CurrentQ, MarkerQ);
  1281. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  1282. CTEGetLockAtDPC(&CurrentConn->tc_ConnBlock->cb_lock);
  1283. #if DBG
  1284. CurrentConn->tc_ConnBlock->line = (uint) __LINE__;
  1285. CurrentConn->tc_ConnBlock->module = (uchar *) __FILE__;
  1286. #endif
  1287. CTEGetLockAtDPC(&ListenAO->ao_lock);
  1288. CTEStructAssert(CurrentConn, tc);
  1289. if (QPREV(MarkerQ) == CurrentQ &&
  1290. CurrentConn->tc_context == ConnContext &&
  1291. !(CurrentConn->tc_flags & CONN_INVALID)) {
  1292. // We think we have a match. The connection
  1293. // shouldn't have a TCB associated with it. If it
  1294. // does, it's an error. InitTCBFromConn will
  1295. // handle all this.
  1296. AcceptTCB->tcb_refcnt = 0;
  1297. REFERENCE_TCB(AcceptTCB);
  1298. Status = InitTCBFromConn(CurrentConn, AcceptTCB,
  1299. AcceptRequest->RequestConnectionInformation,
  1300. TRUE);
  1301. // Let's store the connection invariants upfront.
  1302. AcceptTCB->tcb_daddr = Src;
  1303. AcceptTCB->tcb_saddr= Dest;
  1304. AcceptTCB->tcb_dport= TCPH->tcp_src;
  1305. AcceptTCB->tcb_sport= TCPH->tcp_dest;
  1306. if (Status == TDI_SUCCESS) {
  1307. FoundConn = TRUE;
  1308. AcceptTCB->tcb_state = TCB_SYN_RCVD;
  1309. AcceptTCB->tcb_conn = CurrentConn;
  1310. AcceptTCB->tcb_connid = CurrentConn->tc_connid;
  1311. CurrentConn->tc_tcb = AcceptTCB;
  1312. CurrentConn->tc_refcnt++;
  1313. GetRandomISN((PULONG)&AcceptTCB->tcb_sendnext,
  1314. &AcceptTCB->tcb_addrbytes);
  1315. // Move him from the idle q to the active
  1316. // queue.
  1317. REMOVEQ(&CurrentConn->tc_q);
  1318. ENQUEUE(&ListenAO->ao_activeq, &CurrentConn->tc_q);
  1319. } else {
  1320. CTEFreeLockFromDPC(
  1321. &CurrentConn->tc_ConnBlock->cb_lock);
  1322. }
  1323. // In any case, we're done now.
  1324. REMOVEQ(MarkerQ);
  1325. break;
  1326. }
  1327. CTEFreeLockFromDPC(&CurrentConn->tc_ConnBlock->cb_lock);
  1328. CurrentQ = QNEXT(MarkerQ);
  1329. REMOVEQ(MarkerQ);
  1330. }
  1331. if (!FoundConn) {
  1332. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  1333. }
  1334. CTEFreeLockFromDPC(&AddrObjTableLock.Lock);
  1335. #if !MILLEN
  1336. }
  1337. #endif // !MILLEN
  1338. if (!FoundConn) {
  1339. // Didn't find a match, or had an error. Status
  1340. // code is set.
  1341. // Complete the ConnReq and free the resources.
  1342. CTEGetLockAtDPC(&AcceptTCB->tcb_lock);
  1343. CompleteConnReq(AcceptTCB, OptInfo, Status);
  1344. CTEFreeLockFromDPC(&AcceptTCB->tcb_lock);
  1345. FreeTCB(AcceptTCB);
  1346. AcceptTCB = NULL;
  1347. }
  1348. if (FoundConn) {
  1349. LOCKED_DELAY_DEREF_AO(ListenAO);
  1350. CTEFreeLockFromDPC(&ListenAO->ao_lock);
  1351. CTEFreeLockFromDPC(&CurrentConn->tc_ConnBlock->cb_lock);
  1352. } else {
  1353. DELAY_DEREF_AO(ListenAO);
  1354. }
  1355. return AcceptTCB;
  1356. } //tdi_more_processing
  1357. #if !MILLEN
  1358. AcceptIrpCancelled:
  1359. #endif // !MILLEN
  1360. // The event handler didn't take it. Dereference it, free
  1361. // the resources, and return NULL.
  1362. FreeConnReq(ConnReq);
  1363. FreeTCB(AcceptTCB);
  1364. // Try again if we can with the next best AO
  1365. if (!SecondTry && (Status == TDI_CONN_REFUSED)) {
  1366. AddrObj * NextAddrObj;
  1367. CTEGetLockAtDPC(&AddrObjTableLock.Lock);
  1368. NextAddrObj = GetNextBestAddrObj(Dest, TCPH->tcp_dest,
  1369. PROTOCOL_TCP, ListenAO,
  1370. GAO_FLAG_CHECK_IF_LIST);
  1371. if (NextAddrObj == NULL) {
  1372. CTEFreeLockFromDPC(&AddrObjTableLock.Lock);
  1373. } else {
  1374. DELAY_DEREF_AO(ListenAO);
  1375. ListenAO = NextAddrObj;
  1376. SecondTry = TRUE;
  1377. goto funcstart;
  1378. }
  1379. }
  1380. DELAY_DEREF_AO(ListenAO);
  1381. return NULL;
  1382. } else {
  1383. // We couldn't get a needed resources. Free any that we
  1384. // did get, and fall through to the 'return NULL' code.
  1385. DELAY_DEREF_AO(ListenAO);
  1386. if (ConnReq != NULL)
  1387. FreeConnReq(ConnReq);
  1388. if (AcceptTCB != NULL)
  1389. FreeTCB(AcceptTCB);
  1390. }
  1391. }
  1392. return NULL;
  1393. }
  1394. // FindMSSAndOptions
  1395. //
  1396. // Called when a SYN is received to find the MSS option in a segment. If we
  1397. // don't find one, we assume the worst and return 536.
  1398. //
  1399. // Also, parses incoming header for window scaling, timestamp and SACK
  1400. // options. Note that we will enable these options for the connection
  1401. // only if they are enabled on this host.
  1402. //
  1403. //
  1404. // Input: TCPH - TCP header to be searched.
  1405. // SynTCB - the TCB or SYNTCB to be updated.
  1406. // IsSYNTCB - if TRUE, 'SynTCB' is of type 'SYNTCB'.
  1407. //
  1408. // Returns: MSS to be used.
  1409. //
  1410. ushort
  1411. FindMSSAndOptions(TCPHeader UNALIGNED * TCPH, TCB * SynTCB, BOOLEAN IsSYNTCB)
  1412. {
  1413. uint OptSize;
  1414. uchar *OptPtr;
  1415. ushort TempMss = 0;
  1416. BOOLEAN WinScale = FALSE;
  1417. ushort SYN = 0;
  1418. ushort tcboptions;
  1419. short rcvwinscale=0,sndwinscale=0;
  1420. int tsupdate=0,tsrecent=0;
  1421. OptSize = TCP_HDR_SIZE(TCPH) - sizeof(TCPHeader);
  1422. OptPtr = (uchar *) (TCPH + 1);
  1423. SYN = (TCPH->tcp_flags & TCP_FLAG_SYN);
  1424. if (IsSYNTCB) {
  1425. tcboptions = ((SYNTCB *)SynTCB)->syntcb_tcpopts;
  1426. rcvwinscale = ((SYNTCB *)SynTCB)->syntcb_rcvwinscale;
  1427. } else {
  1428. tcboptions = SynTCB->tcb_tcpopts;
  1429. rcvwinscale = SynTCB->tcb_rcvwinscale;
  1430. }
  1431. while ((int)OptSize > 0) {
  1432. if (*OptPtr == TCP_OPT_EOL)
  1433. break;
  1434. if (*OptPtr == TCP_OPT_NOP) {
  1435. OptPtr++;
  1436. OptSize--;
  1437. continue;
  1438. }
  1439. if ((*OptPtr == TCP_OPT_MSS) && (OptSize >= MSS_OPT_SIZE)) {
  1440. if (SYN && (OptPtr[1] == MSS_OPT_SIZE)) {
  1441. TempMss = *(ushort UNALIGNED *) (OptPtr + 2);
  1442. TempMss = net_short(TempMss);
  1443. }
  1444. OptSize -= MSS_OPT_SIZE;
  1445. OptPtr += MSS_OPT_SIZE;
  1446. } else if ((*OptPtr == TCP_OPT_WS) && (OptSize >= WS_OPT_SIZE)) {
  1447. if (SYN && (OptPtr[1] == WS_OPT_SIZE)) {
  1448. sndwinscale = (uint)OptPtr[2];
  1449. IF_TCPDBG(TCP_DEBUG_1323) {
  1450. TCPTRACE(("WS option %x", sndwinscale));
  1451. }
  1452. tcboptions |= TCP_FLAG_WS;
  1453. WinScale = TRUE;
  1454. }
  1455. OptSize -= WS_OPT_SIZE;
  1456. OptPtr += WS_OPT_SIZE;
  1457. } else if ((*OptPtr == TCP_OPT_TS) && (OptSize >= TS_OPT_SIZE)) {
  1458. // Time stamp options
  1459. if ((OptPtr[1] == TS_OPT_SIZE) && (TcpHostOpts & TCP_FLAG_TS)) {
  1460. int tsval = *(int UNALIGNED *)&OptPtr[2];
  1461. tcboptions |= TCP_FLAG_TS;
  1462. if (SYN) {
  1463. tsupdate = TCPTime;
  1464. tsrecent = net_long(tsval);
  1465. }
  1466. IF_TCPDBG(TCP_DEBUG_1323) {
  1467. TCPTRACE(("TS option %x", SynTCB));
  1468. }
  1469. }
  1470. OptSize -= TS_OPT_SIZE;
  1471. OptPtr += TS_OPT_SIZE;
  1472. } else if ((*OptPtr == TCP_SACK_PERMITTED_OPT)
  1473. && (OptSize >= SACK_PERMITTED_OPT_SIZE)) {
  1474. // SACK OPtions
  1475. if ((OptPtr[1] == SACK_PERMITTED_OPT_SIZE)
  1476. && (TcpHostOpts & TCP_FLAG_SACK)) {
  1477. tcboptions |= TCP_FLAG_SACK;
  1478. IF_TCPDBG(TCP_DEBUG_SACK) {
  1479. TCPTRACE(("Rcvd SACK_OPT %x\n", SynTCB));
  1480. }
  1481. }
  1482. OptSize -= SACK_PERMITTED_OPT_SIZE;
  1483. OptPtr += SACK_PERMITTED_OPT_SIZE;
  1484. } else { // Unknown option.
  1485. if (OptSize > 1) {
  1486. if (OptPtr[1] == 0 || OptPtr[1] > OptSize) {
  1487. break; // Bad option length, bail out.
  1488. }
  1489. OptSize -= OptPtr[1];
  1490. OptPtr += OptPtr[1];
  1491. } else {
  1492. break;
  1493. }
  1494. }
  1495. }
  1496. if (WinScale) {
  1497. if (sndwinscale > TCP_MAX_WINSHIFT) {
  1498. sndwinscale = TCP_MAX_WINSHIFT;
  1499. }
  1500. }
  1501. if (IsSYNTCB) {
  1502. ((SYNTCB *)SynTCB)->syntcb_tcpopts = (uchar)tcboptions;
  1503. ((SYNTCB *)SynTCB)->syntcb_tsupdatetime = tsupdate;
  1504. ((SYNTCB *)SynTCB)->syntcb_tsrecent = tsrecent;
  1505. if (!WinScale && rcvwinscale) {
  1506. ((SYNTCB *)SynTCB)->syntcb_defaultwin = TCP_MAXWIN;
  1507. ((SYNTCB *)SynTCB)->syntcb_rcvwinscale = 0;
  1508. }
  1509. ((SYNTCB *)SynTCB)->syntcb_sndwinscale = sndwinscale;
  1510. } else {
  1511. SynTCB->tcb_tcpopts = tcboptions;
  1512. SynTCB->tcb_tsupdatetime = tsupdate;
  1513. SynTCB->tcb_tsrecent = tsrecent;
  1514. if (!WinScale && rcvwinscale) {
  1515. SynTCB->tcb_defaultwin = TCP_MAXWIN;
  1516. SynTCB->tcb_rcvwin = TCP_MAXWIN;
  1517. SynTCB->tcb_rcvwinscale = 0;
  1518. }
  1519. SynTCB->tcb_sndwinscale = sndwinscale;
  1520. }
  1521. if (TempMss) {
  1522. return (TempMss);
  1523. } else {
  1524. return MAX_REMOTE_MSS;
  1525. }
  1526. }
  1527. //* ACKAndDrop - Acknowledge a segment, and drop it.
  1528. //
  1529. // Called from within the receive code when we need to drop a segment that's
  1530. // outside the receive window.
  1531. //
  1532. // Input: RI - Receive info for incoming segment.
  1533. // RcvTCB - TCB for incoming segment.
  1534. //
  1535. // Returns: Nothing.
  1536. //
  1537. void
  1538. ACKAndDrop(TCPRcvInfo * RI, TCB * RcvTCB)
  1539. {
  1540. if (!(RI->tri_flags & TCP_FLAG_RST)) {
  1541. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  1542. SendACK(RcvTCB);
  1543. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  1544. }
  1545. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  1546. }
  1547. //* ACKData - Acknowledge data.
  1548. //
  1549. // Called from the receive handler to acknowledge data. We're given the
  1550. // TCB and the new value of senduna. We walk down the send q. pulling
  1551. // off sends and putting them on the complete q until we hit the end
  1552. // or we acknowledge the specified number of bytes of data.
  1553. //
  1554. // NOTE: We manipulate the send refcnt and acked flag without taking a lock.
  1555. // This is OK in the VxD version where locks don't mean anything anyway, but
  1556. // in the port to NT we'll need to add locking. The lock will have to be
  1557. // taken in the transmit complete routine. We can't use a lock in the TCB,
  1558. // since the TCB could go away before the transmit complete happens, and a lock
  1559. // in the TSR would be overkill, so it's probably best to use a global lock
  1560. // for this. If that causes too much contention, we could use a set of locks
  1561. // and pass a pointer to the appropriate lock back as part of the transmit
  1562. // confirm context. This lock pointer would also need to be stored in the
  1563. // TCB.
  1564. //
  1565. // Input: ACKTcb - TCB from which to pull data.
  1566. // SendUNA - New value of send una.
  1567. // SendQ - Queue to be filled with ACK'd requests.
  1568. //
  1569. // Returns: Nothing.
  1570. //
  1571. void
  1572. ACKData(TCB * ACKTcb, SeqNum SendUNA, Queue* SendQ)
  1573. {
  1574. Queue *End, *Current; // End and current elements.
  1575. Queue *TempQ, *EndQ;
  1576. Queue *LastCmplt; // Last one we completed.
  1577. TCPSendReq *CurrentTSR; // Current send req we're
  1578. // looking at.
  1579. PNDIS_BUFFER CurrentBuffer; // Current NDIS_BUFFER.
  1580. uint BufLength;
  1581. int Amount, OrigAmount;
  1582. long Result;
  1583. uint Temp;
  1584. #if TRACE_EVENT
  1585. PTDI_DATA_REQUEST_NOTIFY_ROUTINE CPCallBack;
  1586. WMIData WMIInfo;
  1587. #endif
  1588. CTEStructAssert(ACKTcb, tcb);
  1589. CheckTCBSends(ACKTcb);
  1590. Amount = SendUNA - ACKTcb->tcb_senduna;
  1591. ASSERT(Amount > 0);
  1592. // if the receiver is acking something for which we have
  1593. // a sack entry, remove it.
  1594. if (ACKTcb->tcb_SackRcvd) {
  1595. SackListEntry *Prev, *Current;
  1596. Prev = STRUCT_OF(SackListEntry, &ACKTcb->tcb_SackRcvd, next);
  1597. Current = ACKTcb->tcb_SackRcvd;
  1598. // Scan the list for old sack entries and purge them
  1599. while ((Current != NULL) && SEQ_GT(SendUNA, Current->begin)) {
  1600. Prev->next = Current->next;
  1601. IF_TCPDBG(TCP_DEBUG_SACK) {
  1602. TCPTRACE(("ACKData:Purging old entries %x %d %d\n", Current, Current->begin, Current->end));
  1603. }
  1604. CTEFreeMem(Current);
  1605. Current = Prev->next;
  1606. }
  1607. }
  1608. // Do a quick check to see if this acks everything that we have. If it does,
  1609. // handle it right away. We can only do this in the ESTABLISHED state,
  1610. // because we blindly update sendnext, and that can only work if we
  1611. // haven't sent a FIN.
  1612. if ((Amount == (int)ACKTcb->tcb_unacked) && ACKTcb->tcb_state == TCB_ESTAB) {
  1613. // Everything is acked.
  1614. ASSERT(!EMPTYQ(&ACKTcb->tcb_sendq));
  1615. TempQ = ACKTcb->tcb_sendq.q_next;
  1616. INITQ(&ACKTcb->tcb_sendq);
  1617. ACKTcb->tcb_sendnext = SendUNA;
  1618. ACKTcb->tcb_senduna = SendUNA;
  1619. ASSERT(ACKTcb->tcb_sendnext == ACKTcb->tcb_sendmax);
  1620. ACKTcb->tcb_cursend = NULL;
  1621. ACKTcb->tcb_sendbuf = NULL;
  1622. ACKTcb->tcb_sendofs = 0;
  1623. ACKTcb->tcb_sendsize = 0;
  1624. ACKTcb->tcb_unacked = 0;
  1625. // Now walk down the list of send requests. If the reference count
  1626. // has gone to 0, put it on the send complete queue.
  1627. EndQ = &ACKTcb->tcb_sendq;
  1628. do {
  1629. CurrentTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, TempQ, tr_q), tsr_req);
  1630. CTEStructAssert(CurrentTSR, tsr);
  1631. TempQ = CurrentTSR->tsr_req.tr_q.q_next;
  1632. CurrentTSR->tsr_req.tr_status = TDI_SUCCESS;
  1633. Result = CTEInterlockedDecrementLong(&CurrentTSR->tsr_refcnt);
  1634. ASSERT(Result >= 0);
  1635. #if TRACE_EVENT
  1636. CPCallBack = TCPCPHandlerRoutine;
  1637. if (CPCallBack != NULL) {
  1638. ulong GroupType;
  1639. WMIInfo.wmi_destaddr = ACKTcb->tcb_daddr;
  1640. WMIInfo.wmi_destport = ACKTcb->tcb_dport;
  1641. WMIInfo.wmi_srcaddr = ACKTcb->tcb_saddr;
  1642. WMIInfo.wmi_srcport = ACKTcb->tcb_sport;
  1643. WMIInfo.wmi_size = CurrentTSR->tsr_size;
  1644. WMIInfo.wmi_context = ACKTcb->tcb_cpcontext;
  1645. GroupType = EVENT_TRACE_GROUP_TCPIP + EVENT_TRACE_TYPE_SEND;
  1646. (*CPCallBack)(GroupType, (PVOID)&WMIInfo, sizeof(WMIInfo),
  1647. NULL);
  1648. }
  1649. #endif
  1650. if ((Result <= 0) &&
  1651. !(CurrentTSR->tsr_flags & TSR_FLAG_SEND_AND_DISC)) {
  1652. // No more references are outstanding, the send can be
  1653. // completed.
  1654. // If we've sent directly from this send, NULL out the next
  1655. // pointer for the last buffer in the chain.
  1656. if (CurrentTSR->tsr_lastbuf != NULL) {
  1657. NDIS_BUFFER_LINKAGE(CurrentTSR->tsr_lastbuf) = NULL;
  1658. CurrentTSR->tsr_lastbuf = NULL;
  1659. }
  1660. ACKTcb->tcb_totaltime += (TCPTime - CurrentTSR->tsr_time);
  1661. Temp = ACKTcb->tcb_bcountlow;
  1662. ACKTcb->tcb_bcountlow += CurrentTSR->tsr_size;
  1663. ACKTcb->tcb_bcounthi += (Temp > ACKTcb->tcb_bcountlow ? 1 : 0);
  1664. ENQUEUE(SendQ, &CurrentTSR->tsr_req.tr_q);
  1665. }
  1666. } while (TempQ != EndQ);
  1667. CheckTCBSends(ACKTcb);
  1668. return;
  1669. }
  1670. OrigAmount = Amount;
  1671. End = QEND(&ACKTcb->tcb_sendq);
  1672. Current = QHEAD(&ACKTcb->tcb_sendq);
  1673. LastCmplt = NULL;
  1674. while (Amount > 0 && Current != End) {
  1675. CurrentTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, Current, tr_q),
  1676. tsr_req);
  1677. CTEStructAssert(CurrentTSR, tsr);
  1678. if (Amount >= (int)CurrentTSR->tsr_unasize) {
  1679. // This is completely acked. Just advance to the next one.
  1680. Amount -= CurrentTSR->tsr_unasize;
  1681. LastCmplt = Current;
  1682. Current = QNEXT(Current);
  1683. continue;
  1684. }
  1685. // This one is only partially acked. Update his offset and NDIS buffer
  1686. // pointer, and break out. We know that Amount is < the unacked size
  1687. // in this buffer, we we can walk the NDIS buffer chain without fear
  1688. // of falling off the end.
  1689. CurrentBuffer = CurrentTSR->tsr_buffer;
  1690. ASSERT(CurrentBuffer != NULL);
  1691. ASSERT(Amount < (int)CurrentTSR->tsr_unasize);
  1692. CurrentTSR->tsr_unasize -= Amount;
  1693. BufLength = NdisBufferLength(CurrentBuffer) - CurrentTSR->tsr_offset;
  1694. if (Amount >= (int)BufLength) {
  1695. do {
  1696. Amount -= BufLength;
  1697. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  1698. ASSERT(CurrentBuffer != NULL);
  1699. BufLength = NdisBufferLength(CurrentBuffer);
  1700. } while (Amount >= (int)BufLength);
  1701. CurrentTSR->tsr_offset = Amount;
  1702. CurrentTSR->tsr_buffer = CurrentBuffer;
  1703. } else
  1704. CurrentTSR->tsr_offset += Amount;
  1705. Amount = 0;
  1706. break;
  1707. }
  1708. // We should always be able to remove at least Amount bytes, except in
  1709. // the case where a FIN has been sent. In that case we should be off
  1710. // by exactly one. In the debug builds we'll check this.
  1711. ASSERT(0 == Amount || ((ACKTcb->tcb_flags & FIN_SENT) && (1 == Amount)));
  1712. if (SEQ_GT(SendUNA, ACKTcb->tcb_sendnext)) {
  1713. if (Current != End) {
  1714. // Need to reevaluate CurrentTSR, in case we bailed out of the
  1715. // above loop after updating Current but before updating
  1716. // CurrentTSR.
  1717. CurrentTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, Current, tr_q),
  1718. tsr_req);
  1719. CTEStructAssert(CurrentTSR, tsr);
  1720. ACKTcb->tcb_cursend = CurrentTSR;
  1721. ACKTcb->tcb_sendbuf = CurrentTSR->tsr_buffer;
  1722. ACKTcb->tcb_sendofs = CurrentTSR->tsr_offset;
  1723. ACKTcb->tcb_sendsize = CurrentTSR->tsr_unasize;
  1724. } else {
  1725. ACKTcb->tcb_cursend = NULL;
  1726. ACKTcb->tcb_sendbuf = NULL;
  1727. ACKTcb->tcb_sendofs = 0;
  1728. ACKTcb->tcb_sendsize = 0;
  1729. }
  1730. ACKTcb->tcb_sendnext = SendUNA;
  1731. }
  1732. // Now update tcb_unacked with the amount we tried to ack minus the
  1733. // amount we didn't ack (Amount should be 0 or 1 here).
  1734. ASSERT(Amount == 0 || Amount == 1);
  1735. if (ACKTcb->tcb_unacked) {
  1736. ASSERT(ACKTcb->tcb_unacked >= (uint)OrigAmount - Amount);
  1737. ACKTcb->tcb_unacked -= OrigAmount - Amount;
  1738. }
  1739. ASSERT(*(int *)&ACKTcb->tcb_unacked >= 0);
  1740. ACKTcb->tcb_senduna = SendUNA;
  1741. // If we've acked any here, LastCmplt will be non-null, and Current will
  1742. // point to the send that should be at the start of the queue. Splice
  1743. // out the completed ones and put them on the end of the send completed
  1744. // queue, and update the TCB send q.
  1745. if (LastCmplt != NULL) {
  1746. Queue *FirstCmplt;
  1747. TCPSendReq *FirstTSR, *EndTSR;
  1748. ASSERT(!EMPTYQ(&ACKTcb->tcb_sendq));
  1749. FirstCmplt = QHEAD(&ACKTcb->tcb_sendq);
  1750. // If we've acked everything, just reinit the queue.
  1751. if (Current == End) {
  1752. INITQ(&ACKTcb->tcb_sendq);
  1753. } else {
  1754. // There's still something on the queue. Just update it.
  1755. ACKTcb->tcb_sendq.q_next = Current;
  1756. Current->q_prev = &ACKTcb->tcb_sendq;
  1757. }
  1758. CheckTCBSends(ACKTcb);
  1759. // Now walk down the lists of things acked. If the refcnt on the send
  1760. // is 0, go ahead and put him on the send complete Q. Otherwise set
  1761. // the ACKed bit in the send, and he'll be completed when the count
  1762. // goes to 0 in the transmit confirm.
  1763. //
  1764. // Note that we haven't done any locking here. This will probably
  1765. // need to change in the port to NT.
  1766. // Set FirstTSR to the first TSR we'll complete, and EndTSR to be
  1767. // the first TSR that isn't completed.
  1768. FirstTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, FirstCmplt, tr_q), tsr_req);
  1769. EndTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, Current, tr_q), tsr_req);
  1770. CTEStructAssert(FirstTSR, tsr);
  1771. ASSERT(FirstTSR != EndTSR);
  1772. // Now walk the list of ACKed TSRs. If we can complete one, put him
  1773. // on the complete queue.
  1774. while (FirstTSR != EndTSR) {
  1775. TempQ = QNEXT(&FirstTSR->tsr_req.tr_q);
  1776. CTEStructAssert(FirstTSR, tsr);
  1777. FirstTSR->tsr_req.tr_status = TDI_SUCCESS;
  1778. // The tsr_lastbuf->Next field is zapped to 0 when the tsr_refcnt
  1779. // goes to 0, so we don't need to do it here.
  1780. #if TRACE_EVENT
  1781. CPCallBack = TCPCPHandlerRoutine;
  1782. if (CPCallBack != NULL) {
  1783. ulong GroupType;
  1784. WMIInfo.wmi_destaddr = ACKTcb->tcb_daddr;
  1785. WMIInfo.wmi_destport = ACKTcb->tcb_dport;
  1786. WMIInfo.wmi_srcaddr = ACKTcb->tcb_saddr;
  1787. WMIInfo.wmi_srcport = ACKTcb->tcb_sport;
  1788. WMIInfo.wmi_size = FirstTSR->tsr_size;
  1789. WMIInfo.wmi_context = ACKTcb->tcb_cpcontext;
  1790. GroupType = EVENT_TRACE_GROUP_TCPIP + EVENT_TRACE_TYPE_SEND;
  1791. (*CPCallBack)(GroupType, (PVOID)&WMIInfo, sizeof(WMIInfo),
  1792. NULL);
  1793. }
  1794. #endif
  1795. // Decrement the reference put on the send buffer when it was
  1796. // initialized indicating the send has been acknowledged.
  1797. if (!(FirstTSR->tsr_flags & TSR_FLAG_SEND_AND_DISC)) {
  1798. Result = CTEInterlockedDecrementLong(&FirstTSR->tsr_refcnt);
  1799. ASSERT(Result >= 0);
  1800. if (Result <= 0) {
  1801. // No more references are outstanding, the send can be
  1802. // completed.
  1803. // If we've sent directly from this send, NULL out the next
  1804. // pointer for the last buffer in the chain.
  1805. if (FirstTSR->tsr_lastbuf != NULL) {
  1806. NDIS_BUFFER_LINKAGE(FirstTSR->tsr_lastbuf) = NULL;
  1807. FirstTSR->tsr_lastbuf = NULL;
  1808. }
  1809. ACKTcb->tcb_totaltime += (TCPTime - FirstTSR->tsr_time);
  1810. Temp = ACKTcb->tcb_bcountlow;
  1811. ACKTcb->tcb_bcountlow += FirstTSR->tsr_size;
  1812. ACKTcb->tcb_bcounthi +=
  1813. (Temp > ACKTcb->tcb_bcountlow ? 1 : 0);
  1814. ENQUEUE(SendQ, &FirstTSR->tsr_req.tr_q);
  1815. }
  1816. } else {
  1817. if (EMPTYQ(&ACKTcb->tcb_sendq) &&
  1818. (FirstTSR->tsr_flags & TSR_FLAG_SEND_AND_DISC)) {
  1819. ENQUEUE(&ACKTcb->tcb_sendq, &FirstTSR->tsr_req.tr_q);
  1820. ACKTcb->tcb_fastchk |= TCP_FLAG_REQUEUE_FROM_SEND_AND_DISC;
  1821. //this will be deleted when CloseTCB will be called on this.
  1822. CheckTCBSends(ACKTcb);
  1823. break;
  1824. }
  1825. }
  1826. FirstTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, TempQ, tr_q), tsr_req);
  1827. }
  1828. }
  1829. }
  1830. //* TrimRcvBuf - Trim the front edge of a receive buffer.
  1831. //
  1832. // A utility routine to trim the front of a receive buffer. We take in a
  1833. // a count (which may be 0) and adjust the pointer in the first buffer in
  1834. // the chain by that much. If there isn't that much in the first buffer,
  1835. // we move onto the next one. If we run out of buffers we'll return a pointer
  1836. // to the last buffer in the chain, with a size of 0. It's the caller's
  1837. // responsibility to catch this.
  1838. //
  1839. // Input: RcvBuf - Buffer to be trimmed.
  1840. // Count - Amount to be trimmed.
  1841. //
  1842. // Returns: A pointer to the new start, or NULL.
  1843. //
  1844. IPRcvBuf *
  1845. TrimRcvBuf(IPRcvBuf * RcvBuf, uint Count)
  1846. {
  1847. uint TrimThisTime;
  1848. ASSERT(RcvBuf != NULL);
  1849. while (Count) {
  1850. ASSERT(RcvBuf != NULL);
  1851. TrimThisTime = MIN(Count, RcvBuf->ipr_size);
  1852. Count -= TrimThisTime;
  1853. RcvBuf->ipr_buffer += TrimThisTime;
  1854. if ((RcvBuf->ipr_size -= TrimThisTime) == 0) {
  1855. if (RcvBuf->ipr_next != NULL)
  1856. RcvBuf = RcvBuf->ipr_next;
  1857. else {
  1858. // Ran out of buffers. Just return this one.
  1859. break;
  1860. }
  1861. }
  1862. }
  1863. return RcvBuf;
  1864. }
  1865. IPRcvBuf DummyBuf;
  1866. //* PullFromRAQ - Pull segments from the reassembly queue.
  1867. //
  1868. // Called when we've received frames out of order, and have some segments
  1869. // on the reassembly queue. We'll walk down the reassembly list, segments that
  1870. // are overlapped by the current rcv. next variable. When we get
  1871. // to one that doesn't completely overlap we'll trim it to fit the next
  1872. // rcv. seq. number, and pull it from the queue.
  1873. //
  1874. // Input: RcvTCB - TCB to pull from.
  1875. // RcvInfo - Pointer to TCPRcvInfo structure for current seg.
  1876. // Size - Pointer to size for current segment. We'll update
  1877. // this when we're done.
  1878. //
  1879. // Returns: Nothing.
  1880. //
  1881. IPRcvBuf *
  1882. PullFromRAQ(TCB * RcvTCB, TCPRcvInfo * RcvInfo, uint * Size)
  1883. {
  1884. TCPRAHdr *CurrentTRH; // Current TCP RA Header being examined.
  1885. TCPRAHdr *TempTRH; // Temporary variable.
  1886. SeqNum NextSeq; // Next sequence number we want.
  1887. IPRcvBuf *NewBuf;
  1888. SeqNum NextTRHSeq; // Seq. number immediately after
  1889. // current TRH.
  1890. int Overlap; // Overlap between current TRH and
  1891. // NextSeq.
  1892. CTEStructAssert(RcvTCB, tcb);
  1893. CurrentTRH = RcvTCB->tcb_raq;
  1894. NextSeq = RcvTCB->tcb_rcvnext;
  1895. while (CurrentTRH != NULL) {
  1896. CTEStructAssert(CurrentTRH, trh);
  1897. ASSERT(!(CurrentTRH->trh_flags & TCP_FLAG_SYN));
  1898. if (SEQ_LT(NextSeq, CurrentTRH->trh_start)) {
  1899. #if DBG
  1900. *Size = 0;
  1901. #endif
  1902. //invalidate Sack Block
  1903. if ((RcvTCB->tcb_tcpopts & TCP_FLAG_SACK) && RcvTCB->tcb_SackBlock) {
  1904. int i;
  1905. for (i = 0; i < 3; i++) {
  1906. if ((RcvTCB->tcb_SackBlock->Mask[i] != 0) &&
  1907. (SEQ_LT(RcvTCB->tcb_SackBlock->Block[i].end, CurrentTRH->trh_start))) {
  1908. RcvTCB->tcb_SackBlock->Mask[i] = 0;
  1909. }
  1910. }
  1911. }
  1912. return NULL; // The next TRH starts too far down.
  1913. }
  1914. NextTRHSeq = CurrentTRH->trh_start + CurrentTRH->trh_size +
  1915. ((CurrentTRH->trh_flags & TCP_FLAG_FIN) ? 1 : 0);
  1916. if (SEQ_GTE(NextSeq, NextTRHSeq)) {
  1917. // The current TRH is overlapped completely. Free it and continue.
  1918. FreeRBChain(CurrentTRH->trh_buffer);
  1919. TempTRH = CurrentTRH->trh_next;
  1920. CTEFreeMem(CurrentTRH);
  1921. CurrentTRH = TempTRH;
  1922. RcvTCB->tcb_raq = TempTRH;
  1923. if (TempTRH == NULL) {
  1924. // We've just cleaned off the RAQ. We can go back on the
  1925. // fast path now.
  1926. if (--(RcvTCB->tcb_slowcount) == 0) {
  1927. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  1928. CheckTCBRcv(RcvTCB);
  1929. }
  1930. break;
  1931. }
  1932. } else {
  1933. Overlap = NextSeq - CurrentTRH->trh_start;
  1934. RcvInfo->tri_seq = NextSeq;
  1935. RcvInfo->tri_flags = CurrentTRH->trh_flags;
  1936. RcvInfo->tri_urgent = CurrentTRH->trh_urg;
  1937. if (Overlap != (int)CurrentTRH->trh_size) {
  1938. NewBuf = FreePartialRB(CurrentTRH->trh_buffer, Overlap);
  1939. *Size = CurrentTRH->trh_size - Overlap;
  1940. } else {
  1941. // This completely overlaps the data in this segment, but the
  1942. // sequence number doesn't overlap completely. There must
  1943. // be a FIN in the TRH. If we called FreePartialRB with this
  1944. // we'd end up returning NULL, which is the signal for failure.
  1945. // Instead we'll just return some bogus value that nobody
  1946. // will look at with a size of 0.
  1947. FreeRBChain(CurrentTRH->trh_buffer);
  1948. ASSERT(CurrentTRH->trh_flags & TCP_FLAG_FIN);
  1949. NewBuf = &DummyBuf;
  1950. *Size = 0;
  1951. }
  1952. RcvTCB->tcb_raq = CurrentTRH->trh_next;
  1953. if (RcvTCB->tcb_raq == NULL) {
  1954. // We've just cleaned off the RAQ. We can go back on the
  1955. // fast path now.
  1956. if (--(RcvTCB->tcb_slowcount) == 0) {
  1957. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  1958. CheckTCBRcv(RcvTCB);
  1959. }
  1960. }
  1961. CTEFreeMem(CurrentTRH);
  1962. return NewBuf;
  1963. }
  1964. }
  1965. #if DBG
  1966. *Size = 0;
  1967. #endif
  1968. //invalidate Sack Block
  1969. if (RcvTCB->tcb_tcpopts & TCP_FLAG_SACK && RcvTCB->tcb_SackBlock) {
  1970. RcvTCB->tcb_SackBlock->Mask[0] = 0;
  1971. RcvTCB->tcb_SackBlock->Mask[1] = 0;
  1972. RcvTCB->tcb_SackBlock->Mask[2] = 0;
  1973. RcvTCB->tcb_SackBlock->Mask[3] = 0;
  1974. }
  1975. return NULL;
  1976. }
  1977. //* CreateTRH - Create a TCP reassembly header.
  1978. //
  1979. // This function tries to create a TCP reassembly header. We take as input
  1980. // a pointer to the previous TRH in the chain, the RcvBuffer to put on,
  1981. // etc. and try to create and link in a TRH. The caller must hold the lock
  1982. // on the TCB when this is called.
  1983. //
  1984. // Input: PrevTRH - Pointer to TRH to insert after.
  1985. // RcvBuf - Pointer to IP RcvBuf chain.
  1986. // RcvInfo - Pointer to RcvInfo for this TRH.
  1987. // Size - Size in bytes of data.
  1988. //
  1989. // Returns: TRUE if we created it, FALSE otherwise.
  1990. //
  1991. uint
  1992. CreateTRH(TCPRAHdr * PrevTRH, IPRcvBuf * RcvBuf, TCPRcvInfo * RcvInfo, int Size)
  1993. {
  1994. TCPRAHdr *NewTRH;
  1995. IPRcvBuf *NewRcvBuf;
  1996. ASSERT((Size > 0) || (RcvInfo->tri_flags & TCP_FLAG_FIN));
  1997. NewTRH = CTEAllocMemLow(sizeof(TCPRAHdr), 'SPCT');
  1998. if (NewTRH == NULL) {
  1999. return FALSE;
  2000. }
  2001. #if DBG
  2002. NewTRH->trh_sig = trh_signature;
  2003. #endif
  2004. NewRcvBuf = AllocTcpIpr(Size, 'SPCT');
  2005. if (NewRcvBuf == NULL) {
  2006. CTEFreeMem(NewTRH);
  2007. return FALSE;
  2008. }
  2009. if (Size != 0)
  2010. CopyRcvToBuffer(NewRcvBuf->ipr_buffer, RcvBuf, Size, 0);
  2011. NewTRH->trh_start = RcvInfo->tri_seq;
  2012. NewTRH->trh_flags = RcvInfo->tri_flags;
  2013. NewTRH->trh_size = Size;
  2014. NewTRH->trh_urg = RcvInfo->tri_urgent;
  2015. NewTRH->trh_buffer = NewRcvBuf;
  2016. NewTRH->trh_end = NewRcvBuf;
  2017. NewTRH->trh_next = PrevTRH->trh_next;
  2018. PrevTRH->trh_next = NewTRH;
  2019. return TRUE;
  2020. }
  2021. // SendSackInACK - SEnd SACK block in acknowledgement
  2022. //
  2023. // Called if incoming data is in the window but left edge
  2024. // is not advanced because incoming seq > rcvnext.
  2025. // This routine scans the queued up data, constructs SACK block
  2026. // points the block in tcb for SendACK.
  2027. //
  2028. // Entry RcvTCB
  2029. // IncomingSeq Seq num of Data coming in
  2030. //
  2031. // Returns Nothing
  2032. void
  2033. SendSackInACK(TCB * RcvTCB, SeqNum IncomingSeq)
  2034. {
  2035. TCPRAHdr *PrevTRH, *CurrentTRH; // Prev. and current TRH
  2036. // pointers.
  2037. SeqNum NextTRHSeq; // Seq. number of first byte
  2038. SACKSendBlock *SackBlock;
  2039. int i, j;
  2040. CTEStructAssert(RcvTCB, tcb);
  2041. // If we have a SACK block use it else create one.
  2042. // Note that we use max of 4 sack blocks
  2043. // Sack block structure:
  2044. // First long word holds index of the
  2045. // 4 sack blocks, starting from 1. zero
  2046. // in index field means no sack block
  2047. //
  2048. // !--------!--------!--------!--------!
  2049. // | 1 | 2 | 3 | 4 |
  2050. // -------------------------------------
  2051. // | |
  2052. // -------------------------------------
  2053. // | |
  2054. // -------------------------------------
  2055. // Allocate a block if it is not already there
  2056. if (RcvTCB->tcb_SackBlock == NULL) {
  2057. SackBlock = CTEAllocMemN((sizeof(SACKSendBlock)), 'sPCT');
  2058. if (SackBlock == NULL) {
  2059. // Resources failure. Just try to send ack
  2060. // and leave the resource handling to some one else
  2061. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  2062. SendACK(RcvTCB);
  2063. return;
  2064. } else {
  2065. RcvTCB->tcb_SackBlock = SackBlock;
  2066. //Initialize the first entry to indicate that this is the new one
  2067. NdisZeroMemory(SackBlock, sizeof(SACKSendBlock));
  2068. }
  2069. } else
  2070. SackBlock = RcvTCB->tcb_SackBlock;
  2071. IF_TCPDBG(TCP_DEBUG_SACK) {
  2072. TCPTRACE(("SendSackInACK %x %x %d\n", SackBlock, RcvTCB, IncomingSeq));
  2073. }
  2074. PrevTRH = STRUCT_OF(TCPRAHdr, &RcvTCB->tcb_raq, trh_next);
  2075. CurrentTRH = PrevTRH->trh_next;
  2076. while (CurrentTRH != NULL) {
  2077. CTEStructAssert(CurrentTRH, trh);
  2078. ASSERT(!(CurrentTRH->trh_flags & TCP_FLAG_SYN));
  2079. NextTRHSeq = CurrentTRH->trh_start + CurrentTRH->trh_size +
  2080. ((CurrentTRH->trh_flags & TCP_FLAG_FIN) ? 1 : 0);
  2081. if ((SackBlock->Mask[0] != (uchar) - 1) && (SEQ_LTE(CurrentTRH->trh_start, IncomingSeq) &&
  2082. SEQ_LTE(IncomingSeq, NextTRHSeq))) {
  2083. if (SackBlock->Mask[0] == 0) {
  2084. //This is the only sack block
  2085. SackBlock->Block[0].begin = CurrentTRH->trh_start;
  2086. SackBlock->Block[0].end = NextTRHSeq;
  2087. SackBlock->Mask[0] = (uchar) - 1; //Make it valid
  2088. } else {
  2089. if (!((SEQ_LTE(CurrentTRH->trh_start, SackBlock->Block[0].begin) &&
  2090. SEQ_GTE(NextTRHSeq, SackBlock->Block[0].end)) ||
  2091. (SEQ_LTE(CurrentTRH->trh_start, SackBlock->Block[0].begin) &&
  2092. SEQ_LTE(SackBlock->Block[0].begin, NextTRHSeq)) ||
  2093. (SEQ_LTE(CurrentTRH->trh_start, SackBlock->Block[0].end) &&
  2094. SEQ_LTE(SackBlock->Block[0].end, NextTRHSeq)))) {
  2095. // Push the blocks down and fill the top
  2096. for (i = 2; i >= 0; i--) {
  2097. SackBlock->Block[i + 1].begin = SackBlock->Block[i].begin;
  2098. SackBlock->Block[i + 1].end = SackBlock->Block[i].end;
  2099. SackBlock->Mask[i + 1] = -SackBlock->Mask[i];
  2100. }
  2101. }
  2102. SackBlock->Block[0].begin = CurrentTRH->trh_start;
  2103. SackBlock->Block[0].end = NextTRHSeq;
  2104. SackBlock->Mask[0] = (uchar) - 1;
  2105. IF_TCPDBG(TCP_DEBUG_SACK) {
  2106. TCPTRACE(("Sack 0 %d %d \n", CurrentTRH->trh_start, NextTRHSeq));
  2107. }
  2108. }
  2109. } else {
  2110. // process all the sack blocks to see if the currentTRH is
  2111. // valid for those blocks
  2112. for (i = 1; i <= 3; i++) {
  2113. if ((SackBlock->Mask[i] != 0) &&
  2114. (SEQ_LTE(CurrentTRH->trh_start, SackBlock->Block[i].begin) &&
  2115. SEQ_LTE(SackBlock->Block[i].begin, NextTRHSeq))) {
  2116. SackBlock->Block[i].begin = CurrentTRH->trh_start;
  2117. SackBlock->Block[i].end = NextTRHSeq;
  2118. SackBlock->Mask[i] = (uchar) - 1;
  2119. }
  2120. }
  2121. }
  2122. PrevTRH = CurrentTRH;
  2123. CurrentTRH = CurrentTRH->trh_next;
  2124. } //while
  2125. //Check and set the blocks traversed for validity
  2126. for (i = 0; i <= 3; i++) {
  2127. if (SackBlock->Mask[i] != (uchar) - 1) {
  2128. SackBlock->Mask[i] = 0;
  2129. } else {
  2130. SackBlock->Mask[i] = 1;
  2131. IF_TCPDBG(TCP_DEBUG_SACK) {
  2132. TCPTRACE(("Sack in ack %x %d %d\n", i, SackBlock->Block[i].begin, SackBlock->Block[i].end));
  2133. }
  2134. }
  2135. }
  2136. // Make sure that there are no duplicates
  2137. for (i = 0; i < 3; i++) {
  2138. if (SackBlock->Mask[i]) {
  2139. for (j = i + 1; j < 4; j++) {
  2140. if (SackBlock->Mask[j] && (SackBlock->Block[i].begin == SackBlock->Block[j].begin))
  2141. IF_TCPDBG(TCP_DEBUG_SACK) {
  2142. TCPTRACE(("Duplicates!!\n"));
  2143. }
  2144. }
  2145. }
  2146. }
  2147. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  2148. SendACK(RcvTCB);
  2149. }
  2150. //* PutOnRAQ - Put a segment on the reassembly queue.
  2151. //
  2152. // Called during segment reception to put a segment on the reassembly
  2153. // queue. We try to use as few reassembly headers as possible, so if this
  2154. // segment has some overlap with an existing entry in the queue we'll just
  2155. // update the existing entry. If there is no overlap we'll create a new
  2156. // reassembly header. Combining URGENT data with non-URGENT data is tricky.
  2157. // If we get a segment that has urgent data that overlaps the front of a
  2158. // reassembly header we'll always mark the whole chunk as urgent - the value
  2159. // of the urgent pointer will mark the end of urgent data, so this is OK. If it
  2160. // only overlaps at the end, however, we won't combine, since we would have to
  2161. // mark previously non-urgent data as urgent. We'll trim the
  2162. // front of the incoming segment and create a new reassembly header. Also,
  2163. // if we have non-urgent data that overlaps at the front of a reassembly
  2164. // header containing urgent data we can't combine these two, since again we
  2165. // would mark non-urgent data as urgent.
  2166. // Our search will stop if we find an entry with a FIN.
  2167. // We assume that the TCB lock is held by the caller.
  2168. //
  2169. // Entry: RcvTCB - TCB on which to reassemble.
  2170. // RcvInfo - Pointer to RcvInfo for new segment.
  2171. // RcvBuf - IP RcvBuf chain for this segment.
  2172. // Size - Size in bytes of data in this segment.
  2173. //
  2174. // Returns: TRUE or FALSE if it could not put RcvBuf on Queue
  2175. //
  2176. BOOLEAN
  2177. PutOnRAQ(TCB * RcvTCB, TCPRcvInfo * RcvInfo, IPRcvBuf * RcvBuf, uint Size)
  2178. {
  2179. TCPRAHdr *PrevTRH, *CurrentTRH; // Prev. and current TRH
  2180. // pointers.
  2181. SeqNum NextSeq; // Seq. number of first byte
  2182. // after segment being
  2183. // reassembled.
  2184. SeqNum NextTRHSeq; // Seq. number of first byte
  2185. // after current TRH.
  2186. uint Created;
  2187. CTEStructAssert(RcvTCB, tcb);
  2188. ASSERT(RcvTCB->tcb_rcvnext != RcvInfo->tri_seq);
  2189. ASSERT(!(RcvInfo->tri_flags & TCP_FLAG_SYN));
  2190. NextSeq = RcvInfo->tri_seq + Size +
  2191. ((RcvInfo->tri_flags & TCP_FLAG_FIN) ? 1 : 0);
  2192. PrevTRH = STRUCT_OF(TCPRAHdr, &RcvTCB->tcb_raq, trh_next);
  2193. CurrentTRH = PrevTRH->trh_next;
  2194. // Walk down the reassembly queue, looking for the correct place to
  2195. // insert this, until we hit the end.
  2196. while (CurrentTRH != NULL) {
  2197. CTEStructAssert(CurrentTRH, trh);
  2198. ASSERT(!(CurrentTRH->trh_flags & TCP_FLAG_SYN));
  2199. NextTRHSeq = CurrentTRH->trh_start + CurrentTRH->trh_size +
  2200. ((CurrentTRH->trh_flags & TCP_FLAG_FIN) ? 1 : 0);
  2201. // First, see if it starts beyond the end of the current TRH.
  2202. if (SEQ_LTE(RcvInfo->tri_seq, NextTRHSeq)) {
  2203. // We know the incoming segment doesn't start beyond the end
  2204. // of this TRH, so we'll either create a new TRH in front of
  2205. // this one or we'll merge the new segment onto this TRH.
  2206. // If the end of the current segment is in front of the start
  2207. // of the current TRH, we'll need to create a new TRH. Otherwise
  2208. // we'll merge these two.
  2209. if (SEQ_LT(NextSeq, CurrentTRH->trh_start))
  2210. break;
  2211. else {
  2212. // There's some overlap. If there's actually data in the
  2213. // incoming segment we'll merge it.
  2214. if (Size != 0) {
  2215. int FrontOverlap, BackOverlap;
  2216. IPRcvBuf *NewRB;
  2217. // We need to merge. If there's a FIN on the incoming
  2218. // segment that would fall inside this current TRH, we
  2219. // have a protocol violation from the remote peer. In this
  2220. // case just return, discarding the incoming segment.
  2221. if ((RcvInfo->tri_flags & TCP_FLAG_FIN) &&
  2222. SEQ_LTE(NextSeq, NextTRHSeq))
  2223. return TRUE;
  2224. // We have some overlap. Figure out how much.
  2225. FrontOverlap = CurrentTRH->trh_start - RcvInfo->tri_seq;
  2226. if (FrontOverlap > 0) {
  2227. // Have overlap in front. Allocate an IPRcvBuf to
  2228. // to hold it, and copy it, unless we would have to
  2229. // combine non-urgent with urgent.
  2230. if (!(RcvInfo->tri_flags & TCP_FLAG_URG) &&
  2231. (CurrentTRH->trh_flags & TCP_FLAG_URG)) {
  2232. if (CreateTRH(PrevTRH, RcvBuf, RcvInfo,
  2233. CurrentTRH->trh_start - RcvInfo->tri_seq)) {
  2234. PrevTRH = PrevTRH->trh_next;
  2235. CurrentTRH = PrevTRH->trh_next;
  2236. }
  2237. FrontOverlap = 0;
  2238. } else {
  2239. NewRB = AllocTcpIpr(FrontOverlap, 'BPCT');
  2240. if (NewRB == NULL) {
  2241. return TRUE; // Couldn't get the buffer.
  2242. }
  2243. CopyRcvToBuffer(NewRB->ipr_buffer, RcvBuf,
  2244. FrontOverlap, 0);
  2245. CurrentTRH->trh_size += FrontOverlap;
  2246. NewRB->ipr_next = CurrentTRH->trh_buffer;
  2247. CurrentTRH->trh_buffer = NewRB;
  2248. CurrentTRH->trh_start = RcvInfo->tri_seq;
  2249. }
  2250. }
  2251. // We've updated the starting sequence number of this TRH
  2252. // if we needed to. Now look for back overlap. There can't
  2253. // be any back overlap if the current TRH has a FIN. Also
  2254. // we'll need to check for urgent data if there is back
  2255. // overlap.
  2256. if (!(CurrentTRH->trh_flags & TCP_FLAG_FIN)) {
  2257. BackOverlap = RcvInfo->tri_seq + Size - NextTRHSeq;
  2258. if ((BackOverlap > 0) &&
  2259. (RcvInfo->tri_flags & TCP_FLAG_URG) &&
  2260. !(CurrentTRH->trh_flags & TCP_FLAG_URG) &&
  2261. (FrontOverlap <= 0)) {
  2262. int AmountToTrim;
  2263. // The incoming segment has urgent data and overlaps
  2264. // on the back but not the front, and the current
  2265. // TRH has no urgent data. We can't combine into
  2266. // this TRH, so trim the front of the incoming
  2267. // segment to NextTRHSeq and move to the next
  2268. // TRH.
  2269. AmountToTrim = NextTRHSeq - RcvInfo->tri_seq;
  2270. ASSERT(AmountToTrim >= 0);
  2271. ASSERT(AmountToTrim < (int)Size);
  2272. RcvBuf = FreePartialRB(RcvBuf, (uint) AmountToTrim);
  2273. RcvInfo->tri_seq += AmountToTrim;
  2274. RcvInfo->tri_urgent -= AmountToTrim;
  2275. PrevTRH = CurrentTRH;
  2276. CurrentTRH = PrevTRH->trh_next;
  2277. //Adjust the incoming size too...
  2278. Size -= AmountToTrim;
  2279. continue;
  2280. }
  2281. } else
  2282. BackOverlap = 0;
  2283. // Now if we have back overlap, copy it.
  2284. if (BackOverlap > 0) {
  2285. // We have back overlap. Get a buffer to copy it into.
  2286. // If we can't get one, we won't just return, because
  2287. // we may have updated the front and may need to
  2288. // update the urgent info.
  2289. NewRB = AllocTcpIpr(BackOverlap, 'BPCT');
  2290. if (NewRB != NULL) {
  2291. // Got the buffer.
  2292. CopyRcvToBuffer(NewRB->ipr_buffer, RcvBuf,
  2293. BackOverlap, NextTRHSeq - RcvInfo->tri_seq);
  2294. CurrentTRH->trh_size += BackOverlap;
  2295. NewRB->ipr_next = CurrentTRH->trh_end->ipr_next;
  2296. CurrentTRH->trh_end->ipr_next = NewRB;
  2297. CurrentTRH->trh_end = NewRB;
  2298. // This data segment could also contain a FIN. If
  2299. // so, just set the TRH flag.
  2300. //
  2301. // N.B. If there's another reassembly header after
  2302. // the current one, the data that we're about
  2303. // to put on the current header might already be
  2304. // on that subsequent header which, in that event,
  2305. // will already have the FIN flag set.
  2306. // Check for that case before recording the FIN.
  2307. if ((RcvInfo->tri_flags & TCP_FLAG_FIN) &&
  2308. !CurrentTRH->trh_next) {
  2309. CurrentTRH->trh_flags |= TCP_FLAG_FIN;
  2310. }
  2311. }
  2312. }
  2313. // Everything should be consistent now. If there's an
  2314. // urgent data pointer in the incoming segment, update the
  2315. // one in the TRH now.
  2316. if (RcvInfo->tri_flags & TCP_FLAG_URG) {
  2317. SeqNum UrgSeq;
  2318. // Have an urgent pointer. If the current TRH already
  2319. // has an urgent pointer, see which is bigger. Otherwise
  2320. // just use this one.
  2321. UrgSeq = RcvInfo->tri_seq + RcvInfo->tri_urgent;
  2322. if (CurrentTRH->trh_flags & TCP_FLAG_URG) {
  2323. SeqNum TRHUrgSeq;
  2324. TRHUrgSeq = CurrentTRH->trh_start +
  2325. CurrentTRH->trh_urg;
  2326. if (SEQ_LT(UrgSeq, TRHUrgSeq))
  2327. UrgSeq = TRHUrgSeq;
  2328. } else
  2329. CurrentTRH->trh_flags |= TCP_FLAG_URG;
  2330. CurrentTRH->trh_urg = UrgSeq - CurrentTRH->trh_start;
  2331. }
  2332. } else {
  2333. // We have a 0 length segment. The only interesting thing
  2334. // here is if there's a FIN on the segment. If there is,
  2335. // and the seq. # of the incoming segment is exactly after
  2336. // the current TRH, OR matches the FIN in the current TRH,
  2337. // we note it.
  2338. if (RcvInfo->tri_flags & TCP_FLAG_FIN) {
  2339. if (!(CurrentTRH->trh_flags & TCP_FLAG_FIN)) {
  2340. if (SEQ_EQ(NextTRHSeq, RcvInfo->tri_seq))
  2341. CurrentTRH->trh_flags |= TCP_FLAG_FIN;
  2342. else
  2343. ASSERT(0);
  2344. } else {
  2345. ASSERT(SEQ_EQ((NextTRHSeq - 1), RcvInfo->tri_seq));
  2346. }
  2347. }
  2348. }
  2349. return TRUE;
  2350. }
  2351. } else {
  2352. // Look at the next TRH, unless the current TRH has a FIN. If he
  2353. // has a FIN, we won't save any data beyond that anyway.
  2354. if (CurrentTRH->trh_flags & TCP_FLAG_FIN)
  2355. return TRUE;
  2356. PrevTRH = CurrentTRH;
  2357. CurrentTRH = PrevTRH->trh_next;
  2358. }
  2359. }
  2360. // When we get here, we need to create a new TRH. If we create one and
  2361. // there was previously nothing on the reassembly queue, we'll have to
  2362. // move off the fast receive path.
  2363. CurrentTRH = RcvTCB->tcb_raq;
  2364. Created = CreateTRH(PrevTRH, RcvBuf, RcvInfo, (int)Size);
  2365. if (Created && CurrentTRH == NULL) {
  2366. RcvTCB->tcb_slowcount++;
  2367. RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  2368. CheckTCBRcv(RcvTCB);
  2369. } else if (!Created) {
  2370. // Caller needs to know about this failure
  2371. // to free resources
  2372. return FALSE;
  2373. }
  2374. return TRUE;
  2375. }
  2376. //* HandleFastXmit - Handles fast retransmit
  2377. //
  2378. // Called by TCPRcv to transmit a segment
  2379. // without waiting for re-transmit timeout to fire.
  2380. //
  2381. // Entry: RcvTCB - Connection context for this Rcv
  2382. // RcvInfo - Pointer to rcvd TCP Header information
  2383. //
  2384. // Returns: TRUE if the segment got retransmitted, FALSE
  2385. // in all other cases.
  2386. //
  2387. BOOLEAN
  2388. HandleFastXmit(TCB *RcvTCB, TCPRcvInfo *RcvInfo)
  2389. {
  2390. uint CWin;
  2391. RcvTCB->tcb_dup++;
  2392. if ((RcvTCB->tcb_dup == MaxDupAcks)) {
  2393. //
  2394. // Okay. Time to retransmit the segment the
  2395. // receiver is asking for
  2396. //
  2397. if (!(RcvTCB->tcb_flags & FLOW_CNTLD)) {
  2398. //
  2399. // Don't let the slow start threshold go
  2400. // below 2 segments
  2401. //
  2402. RcvTCB->tcb_ssthresh = MAX(
  2403. MIN(RcvTCB->tcb_cwin, RcvTCB->tcb_sendwin) / 2,
  2404. (uint) RcvTCB->tcb_mss * 2);
  2405. }
  2406. //
  2407. // Recall the segment in question and send it
  2408. // out. Note that tcb_lock will be
  2409. // dereferenced by the caller
  2410. //
  2411. CWin = RcvTCB->tcb_ssthresh + (MaxDupAcks + 1) * RcvTCB->tcb_mss;
  2412. ResetAndFastSend(RcvTCB, RcvTCB->tcb_senduna, CWin);
  2413. return TRUE;
  2414. } else if ((RcvTCB->tcb_dup > MaxDupAcks)) {
  2415. int SendWin;
  2416. uint AmtOutstanding, AmtUnsent;
  2417. if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo->tri_ack) &&
  2418. (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo->tri_seq) ||
  2419. (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo->tri_seq) &&
  2420. SEQ_LTE(RcvTCB->tcb_sendwl2,RcvInfo->tri_ack)))) {
  2421. RcvTCB->tcb_sendwin = RcvInfo->tri_window;
  2422. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin, RcvInfo->tri_window);
  2423. RcvTCB->tcb_sendwl1 = RcvInfo->tri_seq;
  2424. RcvTCB->tcb_sendwl2 = RcvInfo->tri_ack;
  2425. }
  2426. //
  2427. // Update the cwin to reflect the fact that
  2428. // the dup ack indicates the previous frame
  2429. // was received by the receiver
  2430. //
  2431. RcvTCB->tcb_cwin += RcvTCB->tcb_mss;
  2432. if ((RcvTCB->tcb_cwin + RcvTCB->tcb_mss) < RcvTCB->tcb_sendwin) {
  2433. AmtOutstanding = (uint) (RcvTCB->tcb_sendnext -
  2434. RcvTCB->tcb_senduna);
  2435. AmtUnsent = RcvTCB->tcb_unacked - AmtOutstanding;
  2436. SendWin = (int)(MIN(RcvTCB->tcb_sendwin, RcvTCB->tcb_cwin) -
  2437. AmtOutstanding);
  2438. if (SendWin < RcvTCB->tcb_mss) {
  2439. RcvTCB->tcb_force = 1;
  2440. }
  2441. }
  2442. } else if ((RcvTCB->tcb_dup < MaxDupAcks)) {
  2443. int SendWin;
  2444. uint AmtOutstanding, AmtUnsent;
  2445. if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo->tri_ack) &&
  2446. (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo->tri_seq) ||
  2447. (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo->tri_seq) &&
  2448. SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo->tri_ack)))) {
  2449. RcvTCB->tcb_sendwin = RcvInfo->tri_window;
  2450. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin, RcvInfo->tri_window);
  2451. RcvTCB->tcb_sendwl1 = RcvInfo->tri_seq;
  2452. RcvTCB->tcb_sendwl2 = RcvInfo->tri_ack;
  2453. //
  2454. // Since we've updated the window,
  2455. // remember to send some more.
  2456. //
  2457. }
  2458. //
  2459. // Check if we need to set tcb_force.
  2460. //
  2461. if ((RcvTCB->tcb_cwin + RcvTCB->tcb_mss) < RcvTCB->tcb_sendwin) {
  2462. AmtOutstanding = (uint) (RcvTCB->tcb_sendnext - RcvTCB->tcb_senduna);
  2463. AmtUnsent = RcvTCB->tcb_unacked - AmtOutstanding;
  2464. SendWin = (int)(MIN(RcvTCB->tcb_sendwin, RcvTCB->tcb_cwin) -
  2465. AmtOutstanding);
  2466. if (SendWin < RcvTCB->tcb_mss) {
  2467. RcvTCB->tcb_force = 1;
  2468. }
  2469. }
  2470. } // End of all MaxDupAck cases
  2471. return FALSE;
  2472. }
  2473. //* TCPRcv - Receive a TCP segment.
  2474. //
  2475. // This is the routine called by IP when we need to receive a TCP segment.
  2476. // In general, we follow the RFC 793 event processing section pretty closely,
  2477. // but there is a 'fast path' where we make some quick checks on the incoming
  2478. // segment, and if it matches we deliver it immediately.
  2479. //
  2480. // Entry: IPContext - IPContext identifying physical i/f that
  2481. // received the data.
  2482. // Dest - IPAddr of destionation.
  2483. // Src - IPAddr of source.
  2484. // LocalAddr - Local address of network which caused this to be
  2485. // received.
  2486. // SrcAddr - Address of local interface which received the packet
  2487. // IPH - IP Header.
  2488. // IPHLength - Bytes in IPH.
  2489. // RcvBuf - Pointer to receive buffer chain containing data.
  2490. // Size - Size in bytes of data received.
  2491. // Flags - One flag indicates whether this is a bcast or not,
  2492. // and the other indicates if IP detected unbound adapters
  2493. // on this indication
  2494. // Protocol - Protocol this came in on - should be TCP.
  2495. // OptInfo - Pointer to info structure for received options.
  2496. //
  2497. // Returns: Status of reception. Anything other than IP_SUCCESS will cause
  2498. // IP to send a 'port unreachable' message.
  2499. //
  2500. IP_STATUS
  2501. TCPRcv(void *IPContext, IPAddr Dest, IPAddr Src, IPAddr LocalAddr,
  2502. IPAddr SrcAddr, IPHeader UNALIGNED * IPH, uint IPHLength, IPRcvBuf * RcvBuf,
  2503. uint Size, uchar Flags, uchar Protocol, IPOptInfo * OptInfo)
  2504. {
  2505. TCPHeader UNALIGNED *TCPH; // The TCP header.
  2506. TCB *RcvTCB; // TCB on which to receive the packet.
  2507. TWTCB *RcvTWTCB;
  2508. TCPRcvInfo RcvInfo; // Local swapped copy of rcv info.
  2509. uint DataOffset; // Offset from start of header to data.
  2510. uint Actions;
  2511. uint BytesTaken;
  2512. uint NewSize;
  2513. uint index;
  2514. uint Partition;
  2515. PNDIS_PACKET OffLoadPkt;
  2516. int tsval = 0; //Timestamp value
  2517. int tsecr = 0; //Timestamp to be echoed
  2518. BOOLEAN time_stamp = FALSE;
  2519. BOOLEAN ChkSumOk = FALSE;
  2520. Queue SendQ;
  2521. uint UpdateWindow = FALSE;
  2522. #if TRACE_EVENT
  2523. PTDI_DATA_REQUEST_NOTIFY_ROUTINE CPCallBack;
  2524. WMIData WMIInfo;
  2525. #endif
  2526. if ((Flags & IS_BOUND) == 0) {
  2527. PartitionDelayQProcessing(FALSE);
  2528. }
  2529. CheckRBList(RcvBuf, Size);
  2530. TCPSIncrementInSegCount();
  2531. // Checksum it, to make sure it's valid.
  2532. TCPH = (TCPHeader *) RcvBuf->ipr_buffer;
  2533. if ((Flags & IS_BROADCAST) == 0) {
  2534. if (RcvBuf->ipr_pClientCnt) {
  2535. PNDIS_PACKET_EXTENSION PktExt;
  2536. NDIS_TCP_IP_CHECKSUM_PACKET_INFO ChksumPktInfo;
  2537. if (RcvBuf->ipr_pMdl) {
  2538. OffLoadPkt = NDIS_GET_ORIGINAL_PACKET((PNDIS_PACKET) RcvBuf->ipr_RcvContext);
  2539. if (!OffLoadPkt) {
  2540. OffLoadPkt = (PNDIS_PACKET) RcvBuf->ipr_RcvContext;
  2541. }
  2542. } else {
  2543. OffLoadPkt = (PNDIS_PACKET) RcvBuf->ipr_pClientCnt;
  2544. }
  2545. PktExt = NDIS_PACKET_EXTENSION_FROM_PACKET(OffLoadPkt);
  2546. ChksumPktInfo.Value = PtrToUlong(PktExt->NdisPacketInfo[TcpIpChecksumPacketInfo]);
  2547. if (ChksumPktInfo.Receive.NdisPacketTcpChecksumSucceeded) {
  2548. ChkSumOk = TRUE;
  2549. #if DBG
  2550. DbgTcpHwChkSumOk++;
  2551. #endif
  2552. } else if (ChksumPktInfo.Receive.NdisPacketTcpChecksumFailed) {
  2553. #if DBG
  2554. DbgTcpHwChkSumErr++;
  2555. #endif
  2556. TStats.ts_inerrs++;
  2557. return IP_SUCCESS;
  2558. }
  2559. }
  2560. if (!ChkSumOk) {
  2561. if (XsumRcvBuf(PHXSUM(Src, Dest, PROTOCOL_TCP, Size), RcvBuf) == 0xffff){
  2562. ChkSumOk = TRUE;
  2563. }
  2564. } else {
  2565. // Pretch the rcv buffer in to cache
  2566. // to improve copy performance
  2567. #if !MILLEN
  2568. PrefetchRcvBuf(RcvBuf);
  2569. #endif
  2570. }
  2571. if ((Size >= sizeof(TCPHeader)) && ChkSumOk) {
  2572. // The packet is valid. Get the info we need and byte swap it,
  2573. // and then try to find a matching TCB.
  2574. RcvInfo.tri_seq = net_long(TCPH->tcp_seq);
  2575. RcvInfo.tri_ack = net_long(TCPH->tcp_ack);
  2576. RcvInfo.tri_window = (uint) net_short(TCPH->tcp_window);
  2577. RcvInfo.tri_urgent = (uint) net_short(TCPH->tcp_urgent);
  2578. RcvInfo.tri_flags = (uint) TCPH->tcp_flags;
  2579. DataOffset = TCP_HDR_SIZE(TCPH);
  2580. if (DataOffset <= Size) {
  2581. Size -= DataOffset;
  2582. ASSERT(DataOffset <= RcvBuf->ipr_size);
  2583. RcvBuf->ipr_size -= DataOffset;
  2584. RcvBuf->ipr_buffer += DataOffset;
  2585. RcvBuf->ipr_RcvOffset += DataOffset;
  2586. // FindTCB will lock tcbtablelock, returns with tcb_lock
  2587. // held, if found.
  2588. RcvTCB = FindTCB(Dest, Src, TCPH->tcp_src, TCPH->tcp_dest,
  2589. NULL, TRUE, &index);
  2590. Partition = GET_PARTITION(index);
  2591. if (RcvTCB == NULL) {
  2592. CTEGetLockAtDPC(&pTWTCBTableLock[Partition]);
  2593. RcvTWTCB = FindTCBTW(Dest, Src, TCPH->tcp_src, TCPH->tcp_dest, index);
  2594. if (RcvTWTCB != NULL) {
  2595. // A matching time wait TCB is found for this segment.
  2596. // It's table is already locked, and the lock will be
  2597. // released in the following routine, after processing
  2598. // the segment.
  2599. TimeWaitAction Action = HandleTWTCB(RcvTWTCB,
  2600. RcvInfo.tri_flags,
  2601. RcvInfo.tri_seq,
  2602. Partition);
  2603. if (Action == TwaDoneProcessing) {
  2604. return IP_SUCCESS;
  2605. } else if (Action == TwaSendReset) {
  2606. SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
  2607. return IP_SUCCESS;
  2608. } else {
  2609. ASSERT(Action == TwaAcceptConnection);
  2610. }
  2611. } else {
  2612. UCHAR Action = 0;
  2613. CTEFreeLockFromDPC(&pTWTCBTableLock[Partition]);
  2614. if (SynAttackProtect) {
  2615. RcvTCB = FindSynTCB(Dest, Src, TCPH->tcp_src,
  2616. TCPH->tcp_dest, RcvInfo, Size,
  2617. index, &Action);
  2618. // If there is any action which needs
  2619. // to be taken, NULL is returned.
  2620. // Otherwise we either have a TCB and the
  2621. // lock on the tcb or NULL if no match
  2622. // is found.
  2623. if (Action) {
  2624. if (Action == SYN_PKT_SEND_RST) {
  2625. SendRSTFromHeader(TCPH, Size, Src, Dest,
  2626. OptInfo);
  2627. }
  2628. return IP_SUCCESS;
  2629. }
  2630. // Update options
  2631. if (RcvTCB && (OptInfo->ioi_options != NULL)) {
  2632. if (!(RcvTCB->tcb_flags & CLIENT_OPTIONS)) {
  2633. (*LocalNetInfo.ipi_updateopts) (
  2634. OptInfo,
  2635. &RcvTCB->tcb_opt,
  2636. Src,
  2637. NULL_IP_ADDR);
  2638. }
  2639. }
  2640. }
  2641. }
  2642. }
  2643. if (RcvTCB == NULL) {
  2644. // Didn't find a matching TCB. If this segment carries a SYN,
  2645. // find a matching address object and see it it has a listen
  2646. // indication. If it does, call it. Otherwise send a RST
  2647. // back to the sender.
  2648. // Make sure that the source address isn't a broadcast
  2649. // before proceeding.
  2650. if ((*LocalNetInfo.ipi_invalidsrc) (Src)) {
  2651. return IP_SUCCESS;
  2652. }
  2653. // If it doesn't have a SYN (and only a SYN), we'll send a
  2654. // reset.
  2655. if ((RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_ACK | TCP_FLAG_RST)) ==
  2656. TCP_FLAG_SYN) {
  2657. AddrObj *AO;
  2658. //
  2659. // This segment had a SYN.
  2660. //
  2661. //
  2662. CTEGetLockAtDPC(&AddrObjTableLock.Lock);
  2663. // See if we are filtering the
  2664. // destination interface/port.
  2665. //
  2666. if ((!SecurityFilteringEnabled ||
  2667. IsPermittedSecurityFilter(
  2668. LocalAddr,
  2669. IPContext,
  2670. PROTOCOL_TCP,
  2671. (ulong) net_short(TCPH->tcp_dest))))
  2672. {
  2673. //
  2674. // Find a matching address object, and then try
  2675. // and find a listening connection on that AO.
  2676. //
  2677. AO = GetBestAddrObj(Dest, TCPH->tcp_dest, PROTOCOL_TCP, GAO_FLAG_CHECK_IF_LIST);
  2678. if (AO != NULL) {
  2679. BOOLEAN syntcb = FALSE;
  2680. uint IFIndex;
  2681. //
  2682. // Found an AO. Try and find a listening
  2683. // connection. FindListenConn will free the
  2684. // lock on the AddrObjTable.
  2685. //
  2686. RcvTCB = NULL;
  2687. IFIndex = (*LocalNetInfo.ipi_getifindexfromindicatecontext)(IPContext);
  2688. RcvTCB = FindListenConn(AO, Src, Dest,
  2689. TCPH->tcp_src, OptInfo, TCPH,
  2690. &RcvInfo, IFIndex, &syntcb);
  2691. if (RcvTCB != NULL) {
  2692. uint Inserted;
  2693. CTEStructAssert(RcvTCB, tcb);
  2694. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  2695. //
  2696. // We found a listening connection.
  2697. // Initialize it now, and if it is
  2698. // actually to be accepted we'll
  2699. // send a SYN-ACK also.
  2700. //
  2701. ASSERT(RcvTCB->tcb_state == TCB_SYN_RCVD);
  2702. if (SynAttackProtect) {
  2703. AddHalfOpenTCB();
  2704. }
  2705. RcvTCB->tcb_rcvnext = ++RcvInfo.tri_seq;
  2706. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  2707. //
  2708. // Find Remote MSS and also if WS, TS or
  2709. // sack options are negotiated.
  2710. //
  2711. RcvTCB->tcb_sndwinscale = 0;
  2712. RcvTCB->tcb_remmss =
  2713. FindMSSAndOptions(TCPH, RcvTCB, FALSE);
  2714. if (RcvTCB->tcb_remmss <= ALIGNED_TS_OPT_SIZE) {
  2715. // Turn off TS if MSS is not sufficient
  2716. // to hold TS fields.
  2717. RcvTCB->tcb_tcpopts &= ~TCP_FLAG_TS;
  2718. }
  2719. TStats.ts_passiveopens++;
  2720. RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
  2721. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  2722. Inserted = InsertTCB(RcvTCB, TRUE);
  2723. //
  2724. // Get the lock on it, and see if it's been
  2725. // accepted.
  2726. //
  2727. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  2728. if (!Inserted) {
  2729. // Couldn't insert it!.
  2730. CompleteConnReq(RcvTCB, OptInfo,
  2731. TDI_CONNECTION_ABORTED);
  2732. TryToCloseTCB(RcvTCB, TCB_CLOSE_ABORTED, DISPATCH_LEVEL);
  2733. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  2734. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2735. return IP_SUCCESS;
  2736. }
  2737. RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
  2738. if (RcvTCB->tcb_flags & SEND_AFTER_RCV) {
  2739. RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
  2740. DelayAction(RcvTCB, NEED_OUTPUT);
  2741. }
  2742. //
  2743. // We'll need to update the options, in any case.
  2744. //
  2745. if (OptInfo->ioi_options != NULL) {
  2746. if (!(RcvTCB->tcb_flags & CLIENT_OPTIONS)) {
  2747. (*LocalNetInfo.ipi_updateopts) (
  2748. OptInfo,
  2749. &RcvTCB->tcb_opt,
  2750. Src,
  2751. NULL_IP_ADDR);
  2752. }
  2753. }
  2754. //
  2755. // Notify the callback clients.
  2756. //
  2757. TcpInvokeCcb(TCP_CONN_CLOSED,
  2758. TCP_CONN_SYN_RCVD,
  2759. &RcvTCB->tcb_addrbytes,
  2760. IFIndex);
  2761. if (RcvTCB->tcb_flags & CONN_ACCEPTED) {
  2762. //
  2763. // The connection was accepted. Finish
  2764. // the initialization, and send the
  2765. // SYN ack.
  2766. //
  2767. AcceptConn(RcvTCB, FALSE,
  2768. DISPATCH_LEVEL);
  2769. return IP_SUCCESS;
  2770. } else {
  2771. //
  2772. // We don't know what to do about the
  2773. // connection yet. Return the pending
  2774. // listen, dereference the connection,
  2775. // and return.
  2776. //
  2777. CompleteConnReq(RcvTCB, OptInfo, TDI_SUCCESS);
  2778. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2779. return IP_SUCCESS;
  2780. }
  2781. }
  2782. if (syntcb) {
  2783. return IP_SUCCESS;
  2784. }
  2785. //
  2786. // No listening connection. AddrObjTableLock
  2787. // was released by FindListenConn. Fall
  2788. // through to send RST code.
  2789. //
  2790. } else {
  2791. //
  2792. // No address object. Free the lock, and fall
  2793. // through to the send RST code.
  2794. //
  2795. CTEFreeLockFromDPC(&AddrObjTableLock.Lock);
  2796. }
  2797. } else {
  2798. //
  2799. // Operation not permitted. Free the lock, and
  2800. // fall through to the send RST code.
  2801. //
  2802. CTEFreeLockFromDPC(&AddrObjTableLock.Lock);
  2803. }
  2804. }
  2805. // Toss out any segments containing RST.
  2806. if (RcvInfo.tri_flags & TCP_FLAG_RST)
  2807. return IP_SUCCESS;
  2808. //
  2809. // Not a SYN, no AddrObj available, or port filtered.
  2810. // Send a RST back.
  2811. //
  2812. SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
  2813. return IP_SUCCESS;
  2814. }
  2815. //
  2816. //TCB is already locked
  2817. //
  2818. CheckTCBRcv(RcvTCB);
  2819. RcvTCB->tcb_kacount = 0;
  2820. //scale the incoming window
  2821. if (!(RcvInfo.tri_flags & TCP_FLAG_SYN)) {
  2822. RcvInfo.tri_window = ((uint) net_short(TCPH->tcp_window) << RcvTCB->tcb_sndwinscale);
  2823. }
  2824. //
  2825. // We need to check if Time stamp or Sack options are present.
  2826. //
  2827. if (RcvTCB->tcb_tcpopts) {
  2828. int OptSize;
  2829. uchar *OptPtr;
  2830. OptSize = TCP_HDR_SIZE(TCPH) - sizeof(TCPHeader);
  2831. OptPtr = (uchar *) (TCPH + 1);
  2832. while (OptSize > 0) {
  2833. if (*OptPtr == TCP_OPT_EOL)
  2834. break;
  2835. if (*OptPtr == TCP_OPT_NOP) {
  2836. OptPtr++;
  2837. OptSize--;
  2838. continue;
  2839. }
  2840. if ((*OptPtr == TCP_OPT_TS) && (OptSize >= TS_OPT_SIZE) &&
  2841. (OptPtr[1] == TS_OPT_SIZE)) {
  2842. if (RcvTCB->tcb_tcpopts & TCP_FLAG_TS) {
  2843. // remember timestamp and the the echoed time stamp
  2844. time_stamp = TRUE;
  2845. tsval = *(int UNALIGNED *)&OptPtr[2];
  2846. tsval = net_long(tsval);
  2847. tsecr = *(int UNALIGNED *)&OptPtr[6];
  2848. tsecr = net_long(tsecr);
  2849. }
  2850. } else if ((*OptPtr == TCP_OPT_SACK) && (OptSize > 1)
  2851. && (RcvTCB->tcb_tcpopts & TCP_FLAG_SACK)
  2852. && (OptSize >= OptPtr[1])) {
  2853. SackSeg UNALIGNED *SackPtr;
  2854. SackListEntry *SackList, *Prev, *Current;
  2855. ushort SackOptionLength;
  2856. int i;
  2857. // Sack blocks should not exist until we have
  2858. // actually sent some data. If we see Sack blocks
  2859. // before we are in a state where we can send data,
  2860. // just ignore them. Sack blocks should also be
  2861. // ignored if there is no ACK on the packet we
  2862. // received.
  2863. if ((RcvTCB->tcb_state < TCB_ESTAB) ||
  2864. (!(RcvInfo.tri_flags & TCP_FLAG_ACK))){
  2865. goto no_mem;
  2866. }
  2867. //SACK Option processing
  2868. SackPtr = (SackSeg *)(OptPtr + 2);
  2869. SackOptionLength = OptPtr[1];
  2870. // There can be at most 40 bytes for options
  2871. // which means at most 4 SACK blocks will fit
  2872. // check for this and dicard if too long.
  2873. if (SackOptionLength > (4*sizeof(SackSeg) + 2)) {
  2874. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  2875. return IP_SUCCESS;
  2876. }
  2877. //
  2878. // If the incoming sack blocks are with in this
  2879. // send window Just chain them.
  2880. // When there are some retransmissions, this list
  2881. // will be checked to see if retransmission can be
  2882. // skipped.
  2883. // Note that when the send window is slided, the
  2884. // sack list must be cleandup.
  2885. //
  2886. Prev = STRUCT_OF(SackListEntry, &RcvTCB->tcb_SackRcvd, next);
  2887. Current = RcvTCB->tcb_SackRcvd;
  2888. // Scan the list for old sack entries and purge them
  2889. while ((Current != NULL) && SEQ_GTE(RcvInfo.tri_ack, Current->begin)) {
  2890. Prev->next = Current->next;
  2891. IF_TCPDBG(TCP_DEBUG_SACK) {
  2892. TCPTRACE(("Purging old entries %x %d %d\n", Current, Current->begin, Current->end));
  2893. }
  2894. CTEFreeMem(Current);
  2895. Current = Prev->next;
  2896. }
  2897. //
  2898. //Process each sack block in the incoming segment
  2899. // 8 bytes per block!
  2900. //
  2901. for (i = 0; i < (SackOptionLength >> 3); i++) {
  2902. SeqNum SakBegin, SakEnd;
  2903. // Get the rcvd bytes begin and end offset
  2904. SakBegin = net_long(SackPtr->begin);
  2905. SakEnd = net_long(SackPtr->end);
  2906. // Sanity check this Sack Block and against our
  2907. // send variables
  2908. if (!(SEQ_GT(SakEnd, SakBegin) &&
  2909. SEQ_GTE(SakBegin, RcvTCB->tcb_senduna) &&
  2910. SEQ_LT(SakBegin, RcvTCB->tcb_sendmax) &&
  2911. SEQ_GT(SakEnd, RcvTCB->tcb_senduna) &&
  2912. SEQ_LTE(SakEnd, RcvTCB->tcb_sendmax))) {
  2913. SackPtr++;
  2914. continue;
  2915. }
  2916. IF_TCPDBG(TCP_DEBUG_SACK) {
  2917. TCPTRACE(("In sack entry opt %d %d\n", i, RcvTCB->tcb_senduna));
  2918. }
  2919. Prev = STRUCT_OF(SackListEntry, &RcvTCB->tcb_SackRcvd, next);
  2920. Current = RcvTCB->tcb_SackRcvd;
  2921. //
  2922. // scan the list and insert the incoming sack
  2923. // block in the right place, taking care of
  2924. // overlaps, if any.
  2925. //
  2926. while (Current != NULL) {
  2927. if (SEQ_GT(Current->begin, SakBegin)) {
  2928. //
  2929. // Check if this sack block fills the
  2930. // hole from previous entry. If so,
  2931. // just update the end seq number.
  2932. //
  2933. if ((Prev != RcvTCB->tcb_SackRcvd) && SEQ_EQ(Prev->end, SakBegin)) {
  2934. Prev->end = SakEnd;
  2935. IF_TCPDBG(TCP_DEBUG_SACK) {
  2936. TCPTRACE(("updating prev %x %d %d %x\n", Prev, Prev->begin, Prev->end, RcvTCB));
  2937. }
  2938. //
  2939. //Make sure that next entry is not
  2940. //an overlap.
  2941. //
  2942. if (SEQ_LTE(Current->begin, SakEnd)) {
  2943. ASSERT(SEQ_GT(Current->begin, Prev->begin));
  2944. Prev->end = Current->end;
  2945. Prev->next = Current->next;
  2946. CTEFreeMem(Current);
  2947. Current = Prev;
  2948. //
  2949. // Now we need to scan forward
  2950. // and check if sackend
  2951. // spans several entries
  2952. //
  2953. {
  2954. SackListEntry *tmpcurrent = Current->next;
  2955. while (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->end)) {
  2956. Current->next = tmpcurrent->next;
  2957. CTEFreeMem(tmpcurrent);
  2958. tmpcurrent = Current->next;
  2959. }
  2960. //
  2961. // above check pointed
  2962. // tmpcurrent whose end is
  2963. // > sakend
  2964. // Check if the tmpcurrent
  2965. // entry begin is overlapped
  2966. //
  2967. if (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->begin)) {
  2968. Current->end = tmpcurrent->end;
  2969. Current->next = tmpcurrent->next;
  2970. CTEFreeMem(tmpcurrent);
  2971. }
  2972. }
  2973. }
  2974. break;
  2975. } else if (SEQ_LTE(Current->begin, SakEnd)) {
  2976. //
  2977. // Current is continuation(may be
  2978. // with overlap) of incoming
  2979. // sack pair. Update current
  2980. //
  2981. IF_TCPDBG(TCP_DEBUG_SACK) {
  2982. TCPTRACE(("updating in back overlap %x %d %d %d %d\n", Current, Current->begin, Current->end, SakBegin, SakEnd));
  2983. }
  2984. Current->begin = SakBegin;
  2985. //
  2986. // If the end shoots out of the
  2987. // current end new end will be the
  2988. // current end
  2989. // (overlaps at the tail too)
  2990. // may overlap several entries.
  2991. // So, check them all.
  2992. //
  2993. if (SEQ_GT(SakEnd, Current->end)) {
  2994. SackListEntry *tmpcurrent = Current->next;
  2995. Current->end = SakEnd;
  2996. while (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->end)) {
  2997. Current->next = tmpcurrent->next;
  2998. CTEFreeMem(tmpcurrent);
  2999. tmpcurrent = Current->next;
  3000. }
  3001. //
  3002. // above check pointed
  3003. // tmpcurrent whose end is >
  3004. // sakend. Check if the
  3005. // tmpcurrent entry begin is
  3006. // overlapped
  3007. //
  3008. if (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->begin)) {
  3009. Current->end = tmpcurrent->end;
  3010. Current->next = tmpcurrent->next;
  3011. CTEFreeMem(tmpcurrent);
  3012. }
  3013. }
  3014. break;
  3015. } else {
  3016. //
  3017. //This is the place where we
  3018. //insert the new entry
  3019. //
  3020. SackList = CTEAllocMemN(sizeof(SackListEntry), 'sPCT');
  3021. if (SackList == NULL) {
  3022. TCPTRACE(("No mem for sack List \n"));
  3023. goto no_mem;
  3024. }
  3025. IF_TCPDBG(TCP_DEBUG_SACK) {
  3026. TCPTRACE(("Inserting Sackentry %x %d %d %x\n", SackList, SakBegin, SakEnd, RcvTCB));
  3027. }
  3028. SackList->begin = SakBegin;
  3029. SackList->end = SakEnd;
  3030. Prev->next = SackList;
  3031. SackList->next = Current;
  3032. break;
  3033. }
  3034. } else if (SEQ_EQ(Current->begin, SakBegin)) {
  3035. SackListEntry *tmpcurrent = Current->next;
  3036. //
  3037. // Make sure that the new SakEnd is
  3038. // not overlapping any other sak
  3039. // entries.
  3040. //
  3041. if (tmpcurrent && SEQ_GTE(SakEnd, tmpcurrent->begin)) {
  3042. Current->end = SakEnd;
  3043. //
  3044. //Sure, this sack overlaps next
  3045. //entry.
  3046. //
  3047. while (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->end)) {
  3048. Current->next = tmpcurrent->next;
  3049. CTEFreeMem(tmpcurrent);
  3050. tmpcurrent = Current->next;
  3051. }
  3052. //
  3053. // above check pointed tmpcurrent
  3054. // whose end is > sakend
  3055. // Check if the tmpcurrent entry
  3056. // begin is overlapped
  3057. //
  3058. if (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->begin)) {
  3059. Current->end = tmpcurrent->end;
  3060. Current->next = tmpcurrent->next;
  3061. CTEFreeMem(tmpcurrent);
  3062. }
  3063. break;
  3064. } else {
  3065. //
  3066. // This can still be a duplicate
  3067. // Make sure that SakEnd is really
  3068. // greater than Current->end
  3069. //
  3070. if (SEQ_GT(SakEnd, Current->end)) {
  3071. IF_TCPDBG(TCP_DEBUG_SACK) {
  3072. TCPTRACE(("updating current %x %d %d %d\n", Current, Current->begin, Current->end, SakEnd));
  3073. }
  3074. Current->end = SakEnd;
  3075. }
  3076. break;
  3077. }
  3078. //SakBegin > Current->begin
  3079. } else if (SEQ_LTE(SakEnd, Current->end)) {
  3080. //
  3081. //The incoming sack end is within the
  3082. //current end so, this overlaps the
  3083. //existing sack entry ignore this.
  3084. //
  3085. break;
  3086. //
  3087. // incoming seq begin overlaps the
  3088. // current end update the current end.
  3089. //
  3090. } else if (SEQ_LTE(SakBegin, Current->end)) {
  3091. //
  3092. //Sakend might well ovelap next
  3093. //several entries. Scan for it.
  3094. //
  3095. SackListEntry *tmpcurrent = Current->next;
  3096. Current->end = SakEnd;
  3097. while (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->end)) {
  3098. Current->next = tmpcurrent->next;
  3099. CTEFreeMem(tmpcurrent);
  3100. tmpcurrent = Current->next;
  3101. }
  3102. //
  3103. // above check pointed tmpcurrent
  3104. // whose end is > sakend
  3105. // Check if the tmpcurrent entry begin
  3106. // is overlapped
  3107. //
  3108. if (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->begin)) {
  3109. Current->end = tmpcurrent->end;
  3110. Current->next = tmpcurrent->next;
  3111. CTEFreeMem(tmpcurrent);
  3112. }
  3113. break;
  3114. }
  3115. Prev = Current;
  3116. Current = Current->next;
  3117. } //while
  3118. if (Current == NULL) {
  3119. // this is the new sack entry
  3120. // create the entry and hang it on tcb.
  3121. SackList = CTEAllocMemN(sizeof(SackListEntry), 'sPCT');
  3122. if (SackList == NULL) {
  3123. TCPTRACE(("No mem for sack List \n"));
  3124. goto no_mem;
  3125. }
  3126. Prev->next = SackList;
  3127. SackList->next = NULL;
  3128. SackList->begin = SakBegin;
  3129. SackList->end = SakEnd;
  3130. IF_TCPDBG(TCP_DEBUG_SACK) {
  3131. TCPTRACE(("Inserting new Sackentry %x %d %d %x\n", SackList, SackList->begin, SackList->end, RcvTCB->tcb_SackRcvd));
  3132. }
  3133. }
  3134. //advance sack ptr to the next sack block
  3135. // check for consistency????
  3136. SackPtr++;
  3137. } //for
  3138. }
  3139. no_mem:;
  3140. //unknown options
  3141. if (OptSize > 1) {
  3142. if (OptPtr[1] == 0 || OptPtr[1] > OptSize)
  3143. break; // Bad option length, bail out.
  3144. OptSize -= OptPtr[1];
  3145. OptPtr += OptPtr[1];
  3146. } else
  3147. break;
  3148. } //while
  3149. }
  3150. // if ack is with in the sequence space,that is
  3151. // this seq number is next expected or repeat of previous
  3152. // segment but the right edge is new for us,
  3153. // record the time stamp val of the remote, which will be echoed
  3154. if (time_stamp &&
  3155. TS_GTE(tsval, RcvTCB->tcb_tsrecent) &&
  3156. SEQ_LTE(RcvInfo.tri_seq, RcvTCB->tcb_lastack)) {
  3157. RcvTCB->tcb_tsupdatetime = TCPTime;
  3158. RcvTCB->tcb_tsrecent = tsval;
  3159. }
  3160. //
  3161. // Do the fast path check. We can hit the fast path if the
  3162. // incoming sequence number matches our receive next and the
  3163. // masked flags match our 'predicted' flags.
  3164. // Also, include PAWS check
  3165. //
  3166. if (RcvTCB->tcb_rcvnext == RcvInfo.tri_seq &&
  3167. (!time_stamp || TS_GTE(tsval, RcvTCB->tcb_tsrecent)) &&
  3168. (RcvInfo.tri_flags & TCP_FLAGS_ALL) == RcvTCB->tcb_fastchk)
  3169. {
  3170. uint CWin;
  3171. INITQ(&SendQ);
  3172. Actions = 0;
  3173. REFERENCE_TCB(RcvTCB);
  3174. // Since we are accepting the packet, start the
  3175. // keepalive timer.
  3176. if ((RcvTCB->tcb_flags & KEEPALIVE) &&
  3177. (RcvTCB->tcb_conn != NULL)) {
  3178. START_TCB_TIMER_R(RcvTCB, KA_TIMER,
  3179. RcvTCB->tcb_conn->tc_tcbkatime);
  3180. }
  3181. //
  3182. // The fast path. We know all we have to do here is ack
  3183. // sends and deliver data. First try and ack data.
  3184. //
  3185. if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  3186. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  3187. uint MSS;
  3188. uint Amount = RcvInfo.tri_ack - RcvTCB->tcb_senduna;
  3189. //
  3190. // The ack acknowledes something. Pull the
  3191. // appropriate amount off the send q.
  3192. //
  3193. ACKData(RcvTCB, RcvInfo.tri_ack, &SendQ);
  3194. //
  3195. // If this acknowledges something we were running an
  3196. // RTT on, update that stuff now.
  3197. //
  3198. {
  3199. short RTT = 0;
  3200. BOOLEAN fUpdateRtt = FALSE;
  3201. //
  3202. //if timestamp is true, get the RTT using the echoed
  3203. //timestamp.
  3204. //
  3205. if (time_stamp && tsecr) {
  3206. RTT = TCPTime - tsecr;
  3207. fUpdateRtt = TRUE;
  3208. } else {
  3209. if (RcvTCB->tcb_rtt != 0 &&
  3210. SEQ_GT(RcvInfo.tri_ack,
  3211. RcvTCB->tcb_rttseq)) {
  3212. fUpdateRtt = TRUE;
  3213. RTT = (short)(TCPTime - RcvTCB->tcb_rtt);
  3214. }
  3215. }
  3216. if (fUpdateRtt) {
  3217. RcvTCB->tcb_rtt = 0;
  3218. RTT -= (RcvTCB->tcb_smrtt >> 3); //alpha = 1/8
  3219. RcvTCB->tcb_smrtt += RTT;
  3220. RTT = (RTT >= 0 ? RTT : -RTT);
  3221. RTT -= (RcvTCB->tcb_delta >> 3);
  3222. RcvTCB->tcb_delta += RTT + RTT; //Beta of
  3223. //1/4 instead
  3224. // of 1/8
  3225. RcvTCB->tcb_rexmit = MIN(MAX(REXMIT_TO(RcvTCB),
  3226. MIN_RETRAN_TICKS)+1, MAX_REXMIT_TO);
  3227. }
  3228. }
  3229. // Update the congestion window now.
  3230. CWin = RcvTCB->tcb_cwin;
  3231. MSS = RcvTCB->tcb_mss;
  3232. if (CWin < RcvTCB->tcb_maxwin) {
  3233. if (CWin < RcvTCB->tcb_ssthresh)
  3234. CWin += (RcvTCB->tcb_flags & SCALE_CWIN)
  3235. ? Amount : MSS;
  3236. else
  3237. CWin += MAX((MSS * MSS) / CWin, 1);
  3238. RcvTCB->tcb_cwin = CWin;
  3239. }
  3240. ASSERT(*(int *)&RcvTCB->tcb_cwin > 0);
  3241. //
  3242. // We've acknowledged something, so reset the rexmit
  3243. // count. If there's still stuff outstanding, restart
  3244. // the rexmit timer.
  3245. //
  3246. RcvTCB->tcb_rexmitcnt = 0;
  3247. if (SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax))
  3248. STOP_TCB_TIMER_R(RcvTCB, RXMIT_TIMER);
  3249. else
  3250. START_TCB_TIMER_R(RcvTCB, RXMIT_TIMER, RcvTCB->tcb_rexmit);
  3251. //
  3252. // Since we've acknowledged data, we need to update
  3253. // the window.
  3254. //
  3255. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  3256. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin, RcvInfo.tri_window);
  3257. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  3258. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  3259. // We've updated the window, remember to send some more.
  3260. Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
  3261. {
  3262. //
  3263. // If the receiver has already sent dup acks, but
  3264. // we are not sending because the SendWin is less
  3265. // than a segment, then to avoid time outs on the
  3266. // previous send (receiver is waiting for
  3267. // retransmitted data but we are not sending the
  3268. // segment..) prematurely
  3269. // timeout (set rexmittimer to 1 tick)
  3270. //
  3271. int SendWin;
  3272. uint AmtOutstanding, AmtUnsent;
  3273. AmtOutstanding = (uint) (RcvTCB->tcb_sendnext -
  3274. RcvTCB->tcb_senduna);
  3275. AmtUnsent = RcvTCB->tcb_unacked - AmtOutstanding;
  3276. SendWin = (int)(MIN(RcvTCB->tcb_sendwin,
  3277. RcvTCB->tcb_cwin) - AmtOutstanding);
  3278. if ((RcvTCB->tcb_dup >= MaxDupAcks) && ((int)RcvTCB->tcb_ssthresh > 0)) {
  3279. //
  3280. // Fast retransmitted frame is acked
  3281. // Set cwin to ssthresh so that cwin grows
  3282. // linearly from here
  3283. //
  3284. RcvTCB->tcb_cwin = RcvTCB->tcb_ssthresh;
  3285. }
  3286. }
  3287. RcvTCB->tcb_dup = 0;
  3288. } else {
  3289. //
  3290. // It doesn't ack anything. If it's an ack for something
  3291. // larger than we've sent then ACKAndDrop it, otherwise
  3292. // ignore it.
  3293. //
  3294. if (SEQ_GT(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  3295. ACKAndDrop(&RcvInfo, RcvTCB);
  3296. return IP_SUCCESS;
  3297. }
  3298. //
  3299. // If it is a pure duplicate ack, check if it is
  3300. // time to retransmit immediately
  3301. //
  3302. else if ((Size == 0) &&
  3303. SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  3304. (SEQ_LT(RcvTCB->tcb_senduna,
  3305. RcvTCB->tcb_sendmax)) &&
  3306. (RcvTCB->tcb_sendwin == RcvInfo.tri_window) &&
  3307. RcvInfo.tri_window
  3308. ) {
  3309. // See of fast rexmit can be done
  3310. if (HandleFastXmit(RcvTCB, &RcvInfo)) {
  3311. return IP_SUCCESS;
  3312. }
  3313. Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
  3314. } else { // not a pure duplicate ack (size == 0 )
  3315. // Size !=0 or recvr is advertizing new window.
  3316. // update the window and check if
  3317. // anything needs to be sent
  3318. RcvTCB->tcb_dup = 0;
  3319. if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  3320. (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
  3321. (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
  3322. SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
  3323. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  3324. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
  3325. RcvInfo.tri_window);
  3326. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  3327. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  3328. //
  3329. // Since we've updated the window, remember to
  3330. // send some more.
  3331. //
  3332. Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
  3333. }
  3334. } // for SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)
  3335. // case
  3336. }
  3337. NewSize = MIN((int)Size, RcvTCB->tcb_rcvwin);
  3338. if (NewSize != 0) {
  3339. RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
  3340. BytesTaken = (*RcvTCB->tcb_rcvhndlr) (RcvTCB, RcvInfo.tri_flags,
  3341. RcvBuf, NewSize);
  3342. RcvTCB->tcb_rcvnext += BytesTaken;
  3343. RcvTCB->tcb_rcvwin -= BytesTaken;
  3344. CheckTCBRcv(RcvTCB);
  3345. RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
  3346. Actions |= (RcvTCB->tcb_flags & SEND_AFTER_RCV ?
  3347. NEED_OUTPUT : 0);
  3348. RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
  3349. if (BytesTaken != NewSize) {
  3350. Actions |= NEED_ACK;
  3351. RcvTCB->tcb_rcvdsegs = 0;
  3352. STOP_TCB_TIMER_R(RcvTCB, DELACK_TIMER);
  3353. } else {
  3354. if (RcvTCB->tcb_rcvdsegs != RcvTCB->tcb_numdelacks) {
  3355. RcvTCB->tcb_rcvdsegs++;
  3356. RcvTCB->tcb_flags |= ACK_DELAYED;
  3357. ASSERT(RcvTCB->tcb_delackticks);
  3358. START_TCB_TIMER_R(RcvTCB, DELACK_TIMER, RcvTCB->tcb_delackticks);
  3359. } else {
  3360. Actions |= NEED_ACK;
  3361. RcvTCB->tcb_rcvdsegs = 0;
  3362. STOP_TCB_TIMER_R(RcvTCB, DELACK_TIMER);
  3363. }
  3364. }
  3365. } else {
  3366. //
  3367. // The new size is 0. If the original size was not 0,
  3368. // we must have a 0 rcv. win and hence need to send an
  3369. // ACK to this probe.
  3370. //
  3371. Actions |= (Size ? NEED_ACK : 0);
  3372. }
  3373. if (Actions)
  3374. DelayAction(RcvTCB, Actions);
  3375. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3376. CompleteSends(&SendQ);
  3377. return IP_SUCCESS;
  3378. }
  3379. //
  3380. // Make sure we can handle this frame. We can't handle it if
  3381. // we're in SYN_RCVD and the accept is still pending, or we're
  3382. // in a non-established state and already in the receive
  3383. // handler.
  3384. //
  3385. if ((RcvTCB->tcb_state == TCB_SYN_RCVD &&
  3386. !(RcvTCB->tcb_flags & CONN_ACCEPTED) &&
  3387. !(RcvTCB->tcb_flags & ACTIVE_OPEN)) ||
  3388. (RcvTCB->tcb_state != TCB_ESTAB && (RcvTCB->tcb_fastchk &
  3389. TCP_FLAG_IN_RCV))) {
  3390. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  3391. return IP_SUCCESS;
  3392. }
  3393. //
  3394. // If it's closed, it's a temporary zombie TCB. Reset the
  3395. // sender.
  3396. //
  3397. if (RcvTCB->tcb_state == TCB_CLOSED || CLOSING(RcvTCB) ||
  3398. ((RcvTCB->tcb_flags & (GC_PENDING | TW_PENDING)) == GC_PENDING)) {
  3399. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  3400. SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
  3401. return IP_SUCCESS;
  3402. }
  3403. //
  3404. // At this point, we have a connection, and it's locked.
  3405. // Following the 'Segment Arrives' section of 793, the next
  3406. // thing to check is if this connection is in SynSent state.
  3407. //
  3408. if (RcvTCB->tcb_state == TCB_SYN_SENT) {
  3409. ASSERT(RcvTCB->tcb_flags & ACTIVE_OPEN);
  3410. //
  3411. // Check the ACK bit. Since we don't send data with our
  3412. // SYNs, the check we make is for the ack to exactly match
  3413. // our SND.NXT.
  3414. //
  3415. if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
  3416. // ACK is set.
  3417. if (!SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendnext)) {
  3418. // Bad ACK value.
  3419. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  3420. // Send a RST back at him.
  3421. SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
  3422. return IP_SUCCESS;
  3423. }
  3424. }
  3425. if (RcvInfo.tri_flags & TCP_FLAG_RST) {
  3426. //
  3427. // There's an acceptable RST. We'll persist here,
  3428. // sending another SYN in PERSIST_TIMEOUT ms, until we
  3429. // fail from too many retrys.
  3430. //
  3431. if (!(RcvTCB->tcb_fastchk & TCP_FLAG_RST_WHILE_SYN)) {
  3432. RcvTCB->tcb_fastchk |= TCP_FLAG_RST_WHILE_SYN;
  3433. RcvTCB->tcb_slowcount++;
  3434. }
  3435. if (RcvTCB->tcb_rexmitcnt == MaxConnectRexmitCount) {
  3436. //
  3437. // We've had a positive refusal, and one more rexmit
  3438. // would time us out, so close the connection now.
  3439. //
  3440. REFERENCE_TCB(RcvTCB);
  3441. CompleteConnReq(RcvTCB, OptInfo, TDI_CONN_REFUSED);
  3442. TryToCloseTCB(RcvTCB, TCB_CLOSE_REFUSED, DISPATCH_LEVEL);
  3443. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  3444. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3445. } else {
  3446. START_TCB_TIMER_R(RcvTCB, RXMIT_TIMER, PERSIST_TIMEOUT);
  3447. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  3448. }
  3449. return IP_SUCCESS;
  3450. }
  3451. // See if we have a SYN. If we do, we're going to change state
  3452. // somehow (either to ESTABLISHED or SYN_RCVD).
  3453. if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
  3454. uint RexmitCnt = RcvTCB->tcb_rexmitcnt;
  3455. REFERENCE_TCB(RcvTCB);
  3456. // We have a SYN. Go ahead and record the sequence number and
  3457. // window info.
  3458. RcvTCB->tcb_rcvnext = ++RcvInfo.tri_seq;
  3459. if (RcvInfo.tri_flags & TCP_FLAG_URG) {
  3460. // Urgent data. Update the pointer.
  3461. if (RcvInfo.tri_urgent != 0)
  3462. RcvInfo.tri_urgent--;
  3463. else
  3464. RcvInfo.tri_flags &= ~TCP_FLAG_URG;
  3465. }
  3466. //
  3467. // get remote mss and also enable ws, ts or sack options
  3468. // if they are negotiated and if the host supports them.
  3469. //
  3470. RcvTCB->tcb_sndwinscale = 0;
  3471. RcvTCB->tcb_remmss = FindMSSAndOptions(TCPH, RcvTCB,
  3472. FALSE);
  3473. //
  3474. // If there are options, update them now. We already
  3475. // have an RCE open, so if we have new options we'll
  3476. // have to close it and open a new one.
  3477. //
  3478. if (OptInfo->ioi_options != NULL) {
  3479. if (!(RcvTCB->tcb_flags & CLIENT_OPTIONS)) {
  3480. (*LocalNetInfo.ipi_updateopts) (OptInfo,
  3481. &RcvTCB->tcb_opt, Src, NULL_IP_ADDR);
  3482. (*LocalNetInfo.ipi_closerce) (RcvTCB->tcb_rce);
  3483. InitRCE(RcvTCB);
  3484. }
  3485. } else {
  3486. RcvTCB->tcb_mss = MIN(RcvTCB->tcb_mss, RcvTCB->tcb_remmss);
  3487. ASSERT(RcvTCB->tcb_mss > 0);
  3488. ValidateMSS(RcvTCB);
  3489. }
  3490. RcvTCB->tcb_rexmitcnt = 0;
  3491. STOP_TCB_TIMER_R(RcvTCB, RXMIT_TIMER);
  3492. AdjustRcvWin(RcvTCB);
  3493. if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
  3494. // Our SYN has been acked. Update SND.UNA and stop the
  3495. // retrans timer.
  3496. RcvTCB->tcb_senduna = RcvInfo.tri_ack;
  3497. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  3498. RcvTCB->tcb_maxwin = RcvInfo.tri_window;
  3499. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  3500. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  3501. #if TRACE_EVENT
  3502. CPCallBack = TCPCPHandlerRoutine;
  3503. if (CPCallBack != NULL) {
  3504. ulong GroupType;
  3505. WMIInfo.wmi_destaddr = RcvTCB->tcb_daddr;
  3506. WMIInfo.wmi_destport = RcvTCB->tcb_dport;
  3507. WMIInfo.wmi_srcaddr = RcvTCB->tcb_saddr;
  3508. WMIInfo.wmi_srcport = RcvTCB->tcb_sport;
  3509. WMIInfo.wmi_size = 0;
  3510. WMIInfo.wmi_context = RcvTCB->tcb_cpcontext;
  3511. GroupType = EVENT_TRACE_GROUP_TCPIP +
  3512. EVENT_TRACE_TYPE_CONNECT;
  3513. (*CPCallBack)(GroupType, (PVOID)&WMIInfo,
  3514. sizeof(WMIInfo), NULL);
  3515. }
  3516. #endif
  3517. GoToEstab(RcvTCB);
  3518. //
  3519. // Indicate callback clients about this connection
  3520. // going to established state.
  3521. //
  3522. TcpInvokeCcb(TCP_CONN_SYN_SENT, TCP_CONN_ESTAB,
  3523. &RcvTCB->tcb_addrbytes,
  3524. (*LocalNetInfo.ipi_getifindexfromindicatecontext)(IPContext));
  3525. //
  3526. // Set a bit that informs TCBTimeout to notify
  3527. // the automatic connection driver of this new
  3528. // connection. Only set this flag if we
  3529. // have binded succesfully with the automatic
  3530. // connection driver.
  3531. //
  3532. if (fAcdLoadedG)
  3533. START_TCB_TIMER_R(RcvTCB, ACD_TIMER, 2);
  3534. //
  3535. // Remove whatever command exists on this
  3536. // connection.
  3537. //
  3538. CompleteConnReq(RcvTCB, OptInfo, TDI_SUCCESS);
  3539. //
  3540. // If data has been queued, send the first data
  3541. // segment with an ACK. Otherwise, send a pure ACK.
  3542. //
  3543. if (RcvTCB->tcb_unacked) {
  3544. REFERENCE_TCB(RcvTCB);
  3545. TCPSend(RcvTCB, DISPATCH_LEVEL);
  3546. } else {
  3547. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  3548. SendACK(RcvTCB);
  3549. }
  3550. //
  3551. // Now handle other data and controls. To do this
  3552. // we need to reaquire the lock, and make sure we
  3553. // haven't started closing it.
  3554. //
  3555. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  3556. if (!CLOSING(RcvTCB)) {
  3557. //
  3558. // We haven't started closing it. Turn off the
  3559. // SYN flag and continue processing.
  3560. //
  3561. RcvInfo.tri_flags &= ~TCP_FLAG_SYN;
  3562. if ((RcvInfo.tri_flags & TCP_FLAGS_ALL) != TCP_FLAG_ACK ||
  3563. Size != 0)
  3564. goto NotSYNSent;
  3565. }
  3566. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3567. return IP_SUCCESS;
  3568. } else {
  3569. // A SYN, but not an ACK. Go to SYN_RCVD.
  3570. RcvTCB->tcb_state = TCB_SYN_RCVD;
  3571. RcvTCB->tcb_sendnext = RcvTCB->tcb_senduna;
  3572. if (SynAttackProtect) {
  3573. AddHalfOpenTCB();
  3574. AddHalfOpenRetry(RexmitCnt);
  3575. }
  3576. SendSYN(RcvTCB, DISPATCH_LEVEL);
  3577. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  3578. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3579. return IP_SUCCESS;
  3580. }
  3581. } else {
  3582. // No SYN, just toss the frame.
  3583. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  3584. return IP_SUCCESS;
  3585. }
  3586. }
  3587. REFERENCE_TCB(RcvTCB);
  3588. NotSYNSent:
  3589. //do not allow buffer ownership via slow path
  3590. if (RcvBuf)
  3591. RcvBuf->ipr_pMdl = NULL;
  3592. // Check for PAWS(RFC 1323)
  3593. // Check for tsrecent and tsval wrap around
  3594. if (time_stamp &&
  3595. !(RcvInfo.tri_flags & TCP_FLAG_RST) &&
  3596. RcvTCB->tcb_tsrecent &&
  3597. TS_LT(tsval, RcvTCB->tcb_tsrecent)) {
  3598. // Time stamp is not valid
  3599. // Check if this is because the last update is
  3600. // 24 days old
  3601. if ((int)(TCPTime - RcvTCB->tcb_tsupdatetime) > PAWS_IDLE) {
  3602. //invalidate the ts
  3603. RcvTCB->tcb_tsrecent = 0;
  3604. } else {
  3605. ACKAndDrop(&RcvInfo, RcvTCB);
  3606. return IP_SUCCESS;
  3607. }
  3608. }
  3609. //
  3610. // Not in the SYN-SENT state. Check the sequence number. If my
  3611. // window is 0, I'll truncate all incoming frames but look at
  3612. // some of the control fields. Otherwise I'll try and make
  3613. // this segment fit into the window.
  3614. //
  3615. if (RcvTCB->tcb_rcvwin != 0) {
  3616. int StateSize; // Size, including state info.
  3617. SeqNum LastValidSeq; // Sequence number of last valid
  3618. // byte at RWE.
  3619. //
  3620. // We are offering a window. If this segment starts in
  3621. // front of my receive window, clip off the front part.
  3622. //Check for the sanity of received sequence.
  3623. //This is to fix the 1 bit error(MSB) case in the rcv seq.
  3624. // Also, check the incoming size.
  3625. //
  3626. if ((SEQ_LT(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) &&
  3627. ((int)Size >= 0) &&
  3628. (RcvTCB->tcb_rcvnext - RcvInfo.tri_seq) > 0)
  3629. {
  3630. int AmountToClip, FinByte;
  3631. if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
  3632. //
  3633. // Had a SYN. Clip it off and update the seq number.
  3634. // This will be clipped off in the next if.
  3635. // Allow AckAndDrop routine to see the incoming SYN!
  3636. // RcvInfo.tri_flags &= ~TCP_FLAG_SYN;
  3637. //
  3638. RcvInfo.tri_seq++;
  3639. RcvInfo.tri_urgent--;
  3640. }
  3641. // Advance the receive buffer to point at the new data.
  3642. AmountToClip = RcvTCB->tcb_rcvnext - RcvInfo.tri_seq;
  3643. ASSERT(AmountToClip >= 0);
  3644. //
  3645. // If there's a FIN on this segment, we'll need to
  3646. // account for it.
  3647. //
  3648. FinByte = ((RcvInfo.tri_flags & TCP_FLAG_FIN) ? 1 : 0);
  3649. if (AmountToClip >= (((int)Size) + FinByte)) {
  3650. //
  3651. // Falls entirely before the window. We have more
  3652. // special case code here - if the ack. number
  3653. // acks something, we'll go ahead and take it,
  3654. // faking the sequence number to be rcvnext. This
  3655. // prevents problems on full duplex connections,
  3656. // where data has been received but not acked,
  3657. // and retransmission timers reset the seq. number
  3658. // to below our rcvnext.
  3659. //
  3660. if ((RcvInfo.tri_flags & TCP_FLAG_ACK) &&
  3661. SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  3662. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  3663. //
  3664. // This contains valid ACK info. Fudge the info
  3665. // to get through the rest of this.
  3666. //
  3667. Size = 0;
  3668. AmountToClip = 0;
  3669. RcvInfo.tri_seq = RcvTCB->tcb_rcvnext;
  3670. RcvInfo.tri_flags &=
  3671. ~(TCP_FLAG_SYN | TCP_FLAG_FIN |
  3672. TCP_FLAG_RST | TCP_FLAG_URG);
  3673. #if DBG
  3674. FinByte = 1; // Fake out assert below.
  3675. #endif
  3676. } else {
  3677. ACKAndDrop(&RcvInfo, RcvTCB);
  3678. return IP_SUCCESS;
  3679. }
  3680. }
  3681. if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
  3682. RcvInfo.tri_flags &= ~TCP_FLAG_SYN;
  3683. }
  3684. //
  3685. // Trim what we have to. If we can't trim enough, the
  3686. // frame is too short. This shouldn't happen, but it
  3687. // it does we'll drop the frame.
  3688. //
  3689. Size -= AmountToClip;
  3690. RcvInfo.tri_seq += AmountToClip;
  3691. RcvInfo.tri_urgent -= AmountToClip;
  3692. RcvBuf = TrimRcvBuf(RcvBuf, AmountToClip);
  3693. ASSERT(RcvBuf != NULL);
  3694. ASSERT(RcvBuf->ipr_size != 0 ||
  3695. (Size == 0 && FinByte));
  3696. RcvBuf->ipr_pMdl = NULL;
  3697. if (*(int *)&RcvInfo.tri_urgent < 0) {
  3698. RcvInfo.tri_urgent = 0;
  3699. RcvInfo.tri_flags &= ~TCP_FLAG_URG;
  3700. }
  3701. }
  3702. //
  3703. // We've made sure the front is OK. Now make sure part of
  3704. // it doesn't fall outside of the right edge of the
  3705. // window. If it does, we'll truncate the frame (removing
  3706. // the FIN, if any). If we truncate the whole frame we'll
  3707. // ACKAndDrop it.
  3708. //
  3709. StateSize =
  3710. Size + ((RcvInfo.tri_flags & TCP_FLAG_SYN) ? 1 : 0) +
  3711. ((RcvInfo.tri_flags & TCP_FLAG_FIN) ? 1 : 0);
  3712. if (StateSize)
  3713. StateSize--;
  3714. //
  3715. // Now the incoming sequence number (RcvInfo.tri_seq) +
  3716. // StateSize it the last sequence number in the segment.
  3717. // If this is greater than the last valid byte in the
  3718. // window, we have some overlap to chop off.
  3719. //
  3720. ASSERT(StateSize >= 0);
  3721. LastValidSeq = RcvTCB->tcb_rcvnext + RcvTCB->tcb_rcvwin - 1;
  3722. if (SEQ_GT(RcvInfo.tri_seq + StateSize, LastValidSeq)) {
  3723. int AmountToChop;
  3724. //
  3725. // At least some part of the frame is outside of our
  3726. // window. See if it starts outside our window.
  3727. //
  3728. if (SEQ_GT(RcvInfo.tri_seq, LastValidSeq)) {
  3729. //
  3730. // Falls entirely outside the window. We have
  3731. // special case code to deal with a pure ack that
  3732. // falls exactly at our right window edge.
  3733. // Otherwise we ack and drop it.
  3734. //
  3735. if (
  3736. !SEQ_EQ(RcvInfo.tri_seq, LastValidSeq + 1) ||
  3737. Size != 0 ||
  3738. (RcvInfo.tri_flags & (TCP_FLAG_SYN |
  3739. TCP_FLAG_FIN))
  3740. ) {
  3741. ACKAndDrop(&RcvInfo, RcvTCB);
  3742. return IP_SUCCESS;
  3743. }
  3744. } else {
  3745. //
  3746. // At least some part of it is in the window. If
  3747. // there's a FIN, chop that off and see if that
  3748. // moves us inside.
  3749. //
  3750. if (RcvInfo.tri_flags & TCP_FLAG_FIN) {
  3751. RcvInfo.tri_flags &= ~TCP_FLAG_FIN;
  3752. StateSize--;
  3753. }
  3754. // Now figure out how much to chop off.
  3755. AmountToChop = (RcvInfo.tri_seq + StateSize) -
  3756. LastValidSeq;
  3757. ASSERT(AmountToChop >= 0);
  3758. Size -= AmountToChop;
  3759. RcvBuf->ipr_pMdl = NULL;
  3760. }
  3761. }
  3762. } else {
  3763. if (!SEQ_EQ(RcvTCB->tcb_rcvnext, RcvInfo.tri_seq)) {
  3764. //
  3765. // If there's a RST on this segment, and he's only off
  3766. // by 1, take it anyway. This can happen if the remote
  3767. // peer is probing and sends with the seq. # after the
  3768. // probe.
  3769. //
  3770. if (!(RcvInfo.tri_flags & TCP_FLAG_RST) ||
  3771. !(SEQ_EQ(RcvTCB->tcb_rcvnext, (RcvInfo.tri_seq - 1)))) {
  3772. ACKAndDrop(&RcvInfo, RcvTCB);
  3773. return IP_SUCCESS;
  3774. } else
  3775. RcvInfo.tri_seq = RcvTCB->tcb_rcvnext;
  3776. }
  3777. //
  3778. // He's in sequence, but we have a window of 0. Truncate the
  3779. // size, and clear any sequence consuming bits.
  3780. //
  3781. if (Size != 0 ||
  3782. (RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_FIN))) {
  3783. RcvInfo.tri_flags &= ~(TCP_FLAG_SYN | TCP_FLAG_FIN);
  3784. Size = 0;
  3785. if (!(RcvInfo.tri_flags & TCP_FLAG_RST))
  3786. DelayAction(RcvTCB, NEED_ACK);
  3787. }
  3788. }
  3789. //
  3790. // At this point, the segment is in our window and does not
  3791. // overlap on either end. If it's the next seq number we
  3792. // expect, we can handle the data now. Otherwise we'll queue
  3793. // it for later. In either case we'll handle RST and ACK
  3794. // information right now.
  3795. //
  3796. ASSERT((*(int *)&Size) >= 0);
  3797. // Since we are accepting the packet, start the
  3798. // keepalive timer.
  3799. if ((RcvTCB->tcb_flags & KEEPALIVE) &&
  3800. (RcvTCB->tcb_conn != NULL)) {
  3801. START_TCB_TIMER_R(RcvTCB, KA_TIMER,
  3802. RcvTCB->tcb_conn->tc_tcbkatime);
  3803. }
  3804. // Now, following 793, we check the RST bit.
  3805. if (RcvInfo.tri_flags & TCP_FLAG_RST) {
  3806. uchar Reason;
  3807. //
  3808. // We can't go back into the LISTEN state from SYN-RCVD
  3809. // here, because we may have notified the client via a
  3810. // listen completing or a connect indication. So, if came
  3811. // from an active open we'll give back a 'connection
  3812. // refused' notice. For all other cases
  3813. // we'll just destroy the connection.
  3814. //
  3815. if (RcvTCB->tcb_state == TCB_SYN_RCVD) {
  3816. if (RcvTCB->tcb_flags & ACTIVE_OPEN)
  3817. Reason = TCB_CLOSE_REFUSED;
  3818. else
  3819. Reason = TCB_CLOSE_RST;
  3820. } else
  3821. Reason = TCB_CLOSE_RST;
  3822. TryToCloseTCB(RcvTCB, Reason, DISPATCH_LEVEL);
  3823. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  3824. if (RcvTCB->tcb_state != TCB_TIME_WAIT) {
  3825. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  3826. RemoveTCBFromConn(RcvTCB);
  3827. NotifyOfDisc(RcvTCB, OptInfo, TDI_CONNECTION_RESET,
  3828. NULL);
  3829. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  3830. }
  3831. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3832. return IP_SUCCESS;
  3833. }
  3834. // Next check the SYN bit.
  3835. if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
  3836. //
  3837. // Again, we can't quietly go back into the LISTEN state
  3838. // here, even if we came from a passive open.
  3839. //
  3840. TryToCloseTCB(RcvTCB, TCB_CLOSE_ABORTED, DISPATCH_LEVEL);
  3841. SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
  3842. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  3843. if (RcvTCB->tcb_state != TCB_TIME_WAIT) {
  3844. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  3845. RemoveTCBFromConn(RcvTCB);
  3846. NotifyOfDisc(RcvTCB, OptInfo, TDI_CONNECTION_RESET,
  3847. NULL);
  3848. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  3849. }
  3850. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3851. return IP_SUCCESS;
  3852. }
  3853. //
  3854. // Check the ACK field. If it's not on drop the segment.
  3855. //
  3856. if (!(RcvInfo.tri_flags & TCP_FLAG_ACK)) {
  3857. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3858. return IP_SUCCESS;
  3859. }
  3860. INITQ(&SendQ);
  3861. //
  3862. // If we're in SYN-RCVD, go to ESTABLISHED.
  3863. //
  3864. if (RcvTCB->tcb_state == TCB_SYN_RCVD) {
  3865. if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  3866. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  3867. // The ack is valid.
  3868. if (RcvTCB->tcb_fastchk & TCP_FLAG_ACCEPT_PENDING) {
  3869. AddrObj *AO;
  3870. BOOLEAN Accepted = FALSE;
  3871. //
  3872. // We will be reiniting the tcprexmitcnt to 0.
  3873. // If we are configured for syn-attack
  3874. // protection and the rexmit cnt is >1,
  3875. // decrement the count of connections that are
  3876. // in the half-open-retried state. Check
  3877. // whether we are below a low-watermark. If we
  3878. // are, increase the rexmit count back to
  3879. // configured values
  3880. //
  3881. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  3882. // Check if we still have the listening endpoint
  3883. CTEGetLockAtDPC(&AddrObjTableLock.Lock);
  3884. AO = GetBestAddrObj(Dest, TCPH->tcp_dest,
  3885. PROTOCOL_TCP,
  3886. GAO_FLAG_CHECK_IF_LIST);
  3887. if (AO && AO->ao_connect == NULL) {
  3888. //
  3889. // Lets see if there is one more addr obj
  3890. // matching the incoming request with
  3891. // ao_connect != NULL
  3892. //
  3893. AddrObj *tmpAO;
  3894. tmpAO = GetNextBestAddrObj(Dest, TCPH->tcp_dest,
  3895. PROTOCOL_TCP, AO,
  3896. GAO_FLAG_CHECK_IF_LIST);
  3897. if (tmpAO != NULL) {
  3898. AO = tmpAO;
  3899. }
  3900. }
  3901. if (AO != NULL) {
  3902. Accepted = DelayedAcceptConn(AO, Src,
  3903. TCPH->tcp_src,
  3904. OptInfo, RcvTCB);
  3905. } else {
  3906. CTEFreeLockFromDPC(&AddrObjTableLock.Lock);
  3907. Accepted = FALSE;
  3908. }
  3909. if (Accepted) {
  3910. AcceptConn(RcvTCB, TRUE, DISPATCH_LEVEL);
  3911. } else {
  3912. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  3913. TryToCloseTCB(RcvTCB, TCB_CLOSE_REFUSED, DISPATCH_LEVEL);
  3914. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  3915. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3916. SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
  3917. return IP_SUCCESS;
  3918. }
  3919. }
  3920. if (SynAttackProtect) {
  3921. DropHalfOpenTCB(RcvTCB->tcb_rexmitcnt);
  3922. }
  3923. RcvTCB->tcb_rexmitcnt = 0;
  3924. STOP_TCB_TIMER_R(RcvTCB, RXMIT_TIMER);
  3925. RcvTCB->tcb_senduna++;
  3926. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  3927. RcvTCB->tcb_maxwin = RcvInfo.tri_window;
  3928. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  3929. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  3930. GoToEstab(RcvTCB);
  3931. TcpInvokeCcb(TCP_CONN_SYN_RCVD, TCP_CONN_ESTAB,
  3932. &RcvTCB->tcb_addrbytes,
  3933. (*LocalNetInfo.ipi_getifindexfromindicatecontext)(IPContext));
  3934. #if TRACE_EVENT
  3935. CPCallBack = TCPCPHandlerRoutine;
  3936. if (CPCallBack != NULL) {
  3937. ulong GroupType;
  3938. WMIInfo.wmi_destaddr = RcvTCB->tcb_daddr;
  3939. WMIInfo.wmi_destport = RcvTCB->tcb_dport;
  3940. WMIInfo.wmi_srcaddr = RcvTCB->tcb_saddr;
  3941. WMIInfo.wmi_srcport = RcvTCB->tcb_sport;
  3942. WMIInfo.wmi_size = 0;
  3943. WMIInfo.wmi_context = RcvTCB->tcb_cpcontext;
  3944. GroupType = EVENT_TRACE_GROUP_TCPIP + EVENT_TRACE_TYPE_ACCEPT;
  3945. (*CPCallBack) (GroupType, (PVOID)&WMIInfo, sizeof(WMIInfo), NULL);
  3946. }
  3947. #endif
  3948. // Now complete whatever we can here.
  3949. CompleteConnReq(RcvTCB, OptInfo, TDI_SUCCESS);
  3950. } else {
  3951. if (SynAttackProtect) {
  3952. //
  3953. // We are going to be more aggressive in closing
  3954. // half-open connections when SYN attack protection
  3955. // is enabled. By closing the connection here, we
  3956. // are minimizing ISN prediction attacks.
  3957. //
  3958. TryToCloseTCB(RcvTCB, TCB_CLOSE_REFUSED,
  3959. DISPATCH_LEVEL);
  3960. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  3961. }
  3962. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  3963. SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
  3964. return IP_SUCCESS;
  3965. }
  3966. } else {
  3967. // We're not in SYN-RCVD. See if this acknowledges anything.
  3968. if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  3969. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  3970. uint CWin;
  3971. uint Amount = RcvInfo.tri_ack - RcvTCB->tcb_senduna;
  3972. //
  3973. // The ack acknowledes something. Pull the
  3974. // appropriate amount off the send q.
  3975. //
  3976. ACKData(RcvTCB, RcvInfo.tri_ack, &SendQ);
  3977. //
  3978. // If this acknowledges something we were running
  3979. // an RTT on, update that stuff now.
  3980. //
  3981. {
  3982. short RTT = 0;
  3983. BOOLEAN fUpdateRtt = FALSE;
  3984. //
  3985. // if timestamp is true, get the RTT using the
  3986. // echoed timestamp.
  3987. //
  3988. if (time_stamp && tsecr) {
  3989. RTT = TCPTime - tsecr;
  3990. fUpdateRtt = TRUE;
  3991. } else {
  3992. if (RcvTCB->tcb_rtt != 0 &&
  3993. SEQ_GT(RcvInfo.tri_ack,
  3994. RcvTCB->tcb_rttseq)) {
  3995. RTT = (short)(TCPTime - RcvTCB->tcb_rtt);
  3996. fUpdateRtt = TRUE;
  3997. }
  3998. }
  3999. if (fUpdateRtt) {
  4000. RcvTCB->tcb_rtt = 0;
  4001. RTT -= (RcvTCB->tcb_smrtt >> 3);
  4002. RcvTCB->tcb_smrtt += RTT;
  4003. RTT = (RTT >= 0 ? RTT : -RTT);
  4004. RTT -= (RcvTCB->tcb_delta >> 3);
  4005. RcvTCB->tcb_delta += RTT + RTT;
  4006. RcvTCB->tcb_rexmit = MIN(MAX(REXMIT_TO(RcvTCB),
  4007. MIN_RETRAN_TICKS)+1, MAX_REXMIT_TO);
  4008. }
  4009. }
  4010. //
  4011. // If we're probing for a PMTU black hole we've
  4012. // found one, so turn off
  4013. // the detection. The size is already down, so
  4014. // leave it there.
  4015. //
  4016. if (RcvTCB->tcb_flags & PMTU_BH_PROBE) {
  4017. RcvTCB->tcb_flags &= ~PMTU_BH_PROBE;
  4018. RcvTCB->tcb_bhprobecnt = 0;
  4019. if (--(RcvTCB->tcb_slowcount) == 0) {
  4020. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  4021. CheckTCBRcv(RcvTCB);
  4022. }
  4023. }
  4024. // Update the congestion window now.
  4025. CWin = RcvTCB->tcb_cwin;
  4026. if (CWin < RcvTCB->tcb_maxwin) {
  4027. if (CWin < RcvTCB->tcb_ssthresh)
  4028. CWin += (RcvTCB->tcb_flags & SCALE_CWIN)
  4029. ? Amount : RcvTCB->tcb_mss;
  4030. else
  4031. CWin += MAX((RcvTCB->tcb_mss * RcvTCB->tcb_mss) / CWin, 1);
  4032. RcvTCB->tcb_cwin = MIN(CWin, RcvTCB->tcb_maxwin);
  4033. }
  4034. if ((RcvTCB->tcb_dup > 0) && ((int)RcvTCB->tcb_ssthresh > 0)) {
  4035. //
  4036. // Fast retransmitted frame is acked
  4037. // Set cwin to ssthresh so that cwin grows
  4038. // linearly from here
  4039. //
  4040. RcvTCB->tcb_cwin = RcvTCB->tcb_ssthresh;
  4041. }
  4042. RcvTCB->tcb_dup = 0;
  4043. ASSERT(*(int *)&RcvTCB->tcb_cwin > 0);
  4044. //
  4045. // We've acknowledged something, so reset the
  4046. // rexmit count. If there's still stuff
  4047. // outstanding, restart the rexmit timer.
  4048. //
  4049. RcvTCB->tcb_rexmitcnt = 0;
  4050. if (!SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax))
  4051. START_TCB_TIMER_R(RcvTCB, RXMIT_TIMER, RcvTCB->tcb_rexmit);
  4052. else
  4053. STOP_TCB_TIMER_R(RcvTCB, RXMIT_TIMER);
  4054. //
  4055. // If we've sent a FIN, and this acknowledges it, we
  4056. // need to complete the client's close request and
  4057. // possibly transition our state.
  4058. //
  4059. if (RcvTCB->tcb_flags & FIN_SENT) {
  4060. //
  4061. // We have sent a FIN. See if it's been
  4062. // acknowledged. Once we've sent a FIN,
  4063. // tcb_sendmax can't advance, so our FIN must
  4064. // have seq. number tcb_sendmax - 1. Thus our
  4065. // FIN is acknowledged if the incoming ack is
  4066. // equal to tcb_sendmax.
  4067. //
  4068. if (SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  4069. ushort ConnReqTimeout = 0;
  4070. //
  4071. // He's acked our FIN. Turn off the flags,
  4072. // and complete the request. We'll leave the
  4073. // FIN_OUTSTANDING flag alone, to force
  4074. // early outs in the send code.
  4075. //
  4076. RcvTCB->tcb_flags &= ~(FIN_NEEDED | FIN_SENT);
  4077. ASSERT(RcvTCB->tcb_unacked == 0);
  4078. ASSERT(RcvTCB->tcb_sendnext ==
  4079. RcvTCB->tcb_sendmax);
  4080. //
  4081. // Now figure out what we need to do. In
  4082. // FIN_WAIT1 or FIN_WAIT, just complete
  4083. // the disconnect req. and continue.
  4084. // Otherwise, it's a bit trickier,
  4085. // since we can't complete the connreq
  4086. // until we remove the TCB from it's
  4087. // connection.
  4088. //
  4089. switch (RcvTCB->tcb_state) {
  4090. case TCB_FIN_WAIT1:
  4091. RcvTCB->tcb_state = TCB_FIN_WAIT2;
  4092. if (RcvTCB->tcb_fastchk & TCP_FLAG_SEND_AND_DISC) {
  4093. //RcvTCB->tcb_flags |= DISC_NOTIFIED;
  4094. } else {
  4095. if (RcvTCB->tcb_connreq) {
  4096. ConnReqTimeout = RcvTCB->tcb_connreq->tcr_timeout;
  4097. }
  4098. CompleteConnReq(RcvTCB, OptInfo, TDI_SUCCESS);
  4099. }
  4100. //
  4101. // Start a timer in case we never get
  4102. // out of FIN_WAIT2. Set the retransmit
  4103. // count high to force a timeout the
  4104. // first time the timer fires.
  4105. //
  4106. if (ConnReqTimeout) {
  4107. RcvTCB->tcb_rexmitcnt = 1;
  4108. } else {
  4109. RcvTCB->tcb_rexmitcnt = (uchar) MaxDataRexmitCount;
  4110. ConnReqTimeout = (ushort)FinWait2TO;
  4111. }
  4112. START_TCB_TIMER_R(RcvTCB, RXMIT_TIMER, ConnReqTimeout);
  4113. //Fall through to FIN-WAIT-2 processing.
  4114. case TCB_FIN_WAIT2:
  4115. break;
  4116. case TCB_CLOSING:
  4117. //
  4118. //Note that we do not care about
  4119. //return stat from GracefulClose
  4120. //since we do not touch the tcb
  4121. //anyway, anymore, even if it is in
  4122. //time_wait.
  4123. //
  4124. GracefulClose(RcvTCB, TRUE, FALSE,
  4125. DISPATCH_LEVEL);
  4126. CompleteSends(&SendQ);
  4127. return IP_SUCCESS;
  4128. break;
  4129. case TCB_LAST_ACK:
  4130. GracefulClose(RcvTCB, FALSE, FALSE,
  4131. DISPATCH_LEVEL);
  4132. CompleteSends(&SendQ);
  4133. return IP_SUCCESS;
  4134. break;
  4135. default:
  4136. ASSERT(0);
  4137. break;
  4138. }
  4139. }
  4140. }
  4141. UpdateWindow = TRUE;
  4142. } else {
  4143. //
  4144. // It doesn't ack anything. If it's an ack for
  4145. // something larger than we've sent then
  4146. // ACKAndDrop it, otherwise ignore it. If we're in
  4147. // FIN_WAIT2, we'll restart the timer.
  4148. // We don't make this check above because we know no
  4149. // data can be acked when we're in FIN_WAIT2.
  4150. //
  4151. if (RcvTCB->tcb_state == TCB_FIN_WAIT2)
  4152. START_TCB_TIMER_R(RcvTCB, RXMIT_TIMER, (ushort) FinWait2TO);
  4153. if (SEQ_GT(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  4154. ACKAndDrop(&RcvInfo, RcvTCB);
  4155. return IP_SUCCESS;
  4156. } else if ((Size == 0) &&
  4157. SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  4158. (SEQ_LT(RcvTCB->tcb_senduna, RcvTCB->tcb_sendmax)) &&
  4159. (RcvTCB->tcb_sendwin == RcvInfo.tri_window) &&
  4160. RcvInfo.tri_window) {
  4161. // See if fast rexmit can be done
  4162. if (HandleFastXmit(RcvTCB, &RcvInfo)){
  4163. return IP_SUCCESS;
  4164. }
  4165. Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
  4166. } else {
  4167. // Now update the window if we can.
  4168. if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  4169. (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
  4170. (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
  4171. SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
  4172. UpdateWindow = TRUE;
  4173. } else
  4174. UpdateWindow = FALSE;
  4175. }
  4176. }
  4177. if (UpdateWindow) {
  4178. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  4179. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
  4180. RcvInfo.tri_window);
  4181. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  4182. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  4183. if (RcvInfo.tri_window == 0) {
  4184. // We've got a zero window.
  4185. if (!EMPTYQ(&RcvTCB->tcb_sendq)) {
  4186. RcvTCB->tcb_flags &= ~NEED_OUTPUT;
  4187. RcvTCB->tcb_rexmitcnt = 0;
  4188. START_TCB_TIMER_R(RcvTCB, RXMIT_TIMER, RcvTCB->tcb_rexmit);
  4189. if (!(RcvTCB->tcb_flags & FLOW_CNTLD)) {
  4190. RcvTCB->tcb_flags |= FLOW_CNTLD;
  4191. RcvTCB->tcb_slowcount++;
  4192. RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  4193. CheckTCBRcv(RcvTCB);
  4194. }
  4195. }
  4196. } else {
  4197. if (RcvTCB->tcb_flags & FLOW_CNTLD) {
  4198. RcvTCB->tcb_rexmitcnt = 0;
  4199. RcvTCB->tcb_flags &= ~(FLOW_CNTLD | FORCE_OUTPUT);
  4200. //
  4201. // Reset send next to the left edge of the
  4202. // window, because it might be at
  4203. // senduna+1 if we've been probing.
  4204. //
  4205. ResetSendNext(RcvTCB, RcvTCB->tcb_senduna);
  4206. if (--(RcvTCB->tcb_slowcount) == 0) {
  4207. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  4208. CheckTCBRcv(RcvTCB);
  4209. }
  4210. }
  4211. //
  4212. // Since we've updated the window, see if we
  4213. // can send some more.
  4214. //
  4215. if (RcvTCB->tcb_unacked != 0 ||
  4216. (RcvTCB->tcb_flags & FIN_NEEDED))
  4217. DelayAction(RcvTCB, NEED_OUTPUT);
  4218. }
  4219. }
  4220. }
  4221. //
  4222. // We've handled all the acknowledgment stuff. If the size
  4223. // is greater than 0 or FIN bit is set process it further,
  4224. // otherwise it's a pure ack and we're done with it.
  4225. //
  4226. if (Size == 0 && !(RcvInfo.tri_flags & TCP_FLAG_FIN))
  4227. {
  4228. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  4229. CompleteSends(&SendQ);
  4230. return IP_SUCCESS;
  4231. }
  4232. //
  4233. // If we're not in a state where we can process
  4234. // incoming data or FINs, there's no point in going
  4235. // further. Just drop this segment.
  4236. //
  4237. if (!DATA_RCV_STATE(RcvTCB->tcb_state) ||
  4238. (RcvTCB->tcb_flags & GC_PENDING)) {
  4239. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  4240. CompleteSends(&SendQ);
  4241. return IP_SUCCESS;
  4242. }
  4243. //
  4244. // If it's in sequence process it now, otherwise
  4245. // reassemble it.
  4246. //
  4247. if (SEQ_EQ(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) {
  4248. //
  4249. // If we're already in the recv. handler, this is a
  4250. // duplicate. We'll just toss it.
  4251. //
  4252. if (RcvTCB->tcb_fastchk & TCP_FLAG_IN_RCV) {
  4253. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  4254. CompleteSends(&SendQ);
  4255. return IP_SUCCESS;
  4256. }
  4257. RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
  4258. //
  4259. // Now loop, pulling things from the reassembly
  4260. // queue, until the queue is empty, or we can't
  4261. // take all of the data, or we hit a FIN.
  4262. //
  4263. do {
  4264. // Handle urgent data, if any.
  4265. if (RcvInfo.tri_flags & TCP_FLAG_URG) {
  4266. HandleUrgent(RcvTCB, &RcvInfo, RcvBuf, &Size);
  4267. //
  4268. // Since we may have freed the lock, we
  4269. // need to recheck and see if we're
  4270. // closing here.
  4271. //
  4272. if (CLOSING(RcvTCB))
  4273. break;
  4274. }
  4275. //
  4276. // OK, the data is in sequence, we've updated
  4277. // the reassembly queue and handled any urgent
  4278. // data. If we have any data go ahead and
  4279. // process it now.
  4280. //
  4281. if (Size > 0) {
  4282. BytesTaken = (*RcvTCB->tcb_rcvhndlr) (RcvTCB,
  4283. RcvInfo.tri_flags, RcvBuf, Size);
  4284. RcvTCB->tcb_rcvnext += BytesTaken;
  4285. RcvTCB->tcb_rcvwin -= BytesTaken;
  4286. CheckTCBRcv(RcvTCB);
  4287. if (RcvTCB->tcb_rcvdsegs != RcvTCB->tcb_numdelacks){
  4288. RcvTCB->tcb_flags |= ACK_DELAYED;
  4289. RcvTCB->tcb_rcvdsegs++;
  4290. ASSERT(RcvTCB->tcb_delackticks);
  4291. START_TCB_TIMER_R(RcvTCB, DELACK_TIMER,
  4292. RcvTCB->tcb_delackticks);
  4293. } else {
  4294. DelayAction(RcvTCB, NEED_ACK);
  4295. RcvTCB->tcb_rcvdsegs = 0;
  4296. STOP_TCB_TIMER_R(RcvTCB, DELACK_TIMER);
  4297. }
  4298. if (BytesTaken != Size) {
  4299. //
  4300. // We didn't take everything we could.
  4301. // No use in further processing, just
  4302. // bail out.
  4303. //
  4304. DelayAction(RcvTCB, NEED_ACK);
  4305. break;
  4306. }
  4307. //
  4308. // If we're closing now, we're done, so
  4309. // get out.
  4310. //
  4311. if (CLOSING(RcvTCB))
  4312. break;
  4313. }
  4314. //
  4315. // See if we need to advance over some urgent
  4316. // data.
  4317. //
  4318. if (RcvTCB->tcb_flags & URG_VALID) {
  4319. uint AdvanceNeeded;
  4320. //
  4321. // We only need to adv if we're not doing
  4322. // urgent inline. Urg inline also has some
  4323. // implications for when we can clear the
  4324. // URG_VALID flag. If we're not doing
  4325. // urgent inline, we can clear it when
  4326. // rcvnext advances beyond urgent end.
  4327. // If we are doing inline, we clear it
  4328. // when rcvnext advances one receive
  4329. // window beyond urgend.
  4330. //
  4331. if (!(RcvTCB->tcb_flags & URG_INLINE)) {
  4332. if (RcvTCB->tcb_rcvnext == RcvTCB->tcb_urgstart)
  4333. RcvTCB->tcb_rcvnext = RcvTCB->tcb_urgend +
  4334. 1;
  4335. else
  4336. ASSERT(SEQ_LT(RcvTCB->tcb_rcvnext,
  4337. RcvTCB->tcb_urgstart) ||
  4338. SEQ_GT(RcvTCB->tcb_rcvnext,
  4339. RcvTCB->tcb_urgend));
  4340. AdvanceNeeded = 0;
  4341. } else
  4342. AdvanceNeeded = RcvTCB->tcb_defaultwin;
  4343. // See if we can clear the URG_VALID flag.
  4344. if (SEQ_GT(RcvTCB->tcb_rcvnext - AdvanceNeeded,
  4345. RcvTCB->tcb_urgend)) {
  4346. RcvTCB->tcb_flags &= ~URG_VALID;
  4347. if (--(RcvTCB->tcb_slowcount) == 0) {
  4348. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  4349. CheckTCBRcv(RcvTCB);
  4350. }
  4351. }
  4352. }
  4353. //
  4354. // We've handled the data. If the FIN bit is
  4355. // set, we have more processing.
  4356. //
  4357. if (RcvInfo.tri_flags & TCP_FLAG_FIN) {
  4358. uint Notify = FALSE;
  4359. uint DelayAck = TRUE;
  4360. RcvTCB->tcb_rcvnext++;
  4361. PushData(RcvTCB, TRUE);
  4362. switch (RcvTCB->tcb_state) {
  4363. case TCB_SYN_RCVD:
  4364. //
  4365. // I don't think we can get here - we
  4366. // should have discarded the frame if it
  4367. // had no ACK, or gone to established if
  4368. // it did.
  4369. //
  4370. ASSERT(0);
  4371. case TCB_ESTAB:
  4372. RcvTCB->tcb_state = TCB_CLOSE_WAIT;
  4373. //
  4374. // We left established, we're off the
  4375. // fast path.
  4376. //
  4377. RcvTCB->tcb_slowcount++;
  4378. RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  4379. CheckTCBRcv(RcvTCB);
  4380. Notify = TRUE;
  4381. break;
  4382. case TCB_FIN_WAIT1:
  4383. RcvTCB->tcb_state = TCB_CLOSING;
  4384. DelayAck = FALSE;
  4385. //RcvTCB->tcb_refcnt++;
  4386. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  4387. SendACK(RcvTCB);
  4388. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  4389. if (0 == (RcvTCB->tcb_fastchk & TCP_FLAG_SEND_AND_DISC)) {
  4390. Notify = TRUE;
  4391. }
  4392. break;
  4393. case TCB_FIN_WAIT2:
  4394. // Stop the FIN_WAIT2 timer.
  4395. DelayAck = FALSE;
  4396. STOP_TCB_TIMER_R(RcvTCB, RXMIT_TIMER);
  4397. REFERENCE_TCB(RcvTCB);
  4398. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  4399. SendACK(RcvTCB);
  4400. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  4401. if (RcvTCB->tcb_fastchk & TCP_FLAG_SEND_AND_DISC) {
  4402. GracefulClose(RcvTCB, TRUE, FALSE, DISPATCH_LEVEL);
  4403. } else {
  4404. GracefulClose(RcvTCB, TRUE, TRUE, DISPATCH_LEVEL);
  4405. }
  4406. //
  4407. //graceful close has put this tcb in
  4408. //timewait state should not access
  4409. //small tw tcb at this point
  4410. //
  4411. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  4412. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  4413. CompleteSends(&SendQ);
  4414. return IP_SUCCESS;
  4415. break;
  4416. default:
  4417. ASSERT(0);
  4418. break;
  4419. }
  4420. if (DelayAck) {
  4421. DelayAction(RcvTCB, NEED_ACK);
  4422. }
  4423. if (Notify) {
  4424. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  4425. NotifyOfDisc(RcvTCB, OptInfo,
  4426. TDI_GRACEFUL_DISC, NULL);
  4427. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  4428. }
  4429. break; // Exit out of WHILE loop.
  4430. }
  4431. // If the reassembly queue isn't empty, get what we
  4432. // can now.
  4433. RcvBuf = PullFromRAQ(RcvTCB, &RcvInfo, &Size);
  4434. if (RcvBuf)
  4435. RcvBuf->ipr_pMdl = NULL;
  4436. CheckRBList(RcvBuf, Size);
  4437. } while (RcvBuf != NULL);
  4438. RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
  4439. if (RcvTCB->tcb_flags & SEND_AFTER_RCV) {
  4440. RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
  4441. DelayAction(RcvTCB, NEED_OUTPUT);
  4442. }
  4443. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  4444. CompleteSends(&SendQ);
  4445. return IP_SUCCESS;
  4446. } else {
  4447. // It's not in sequence. Since it needs further processing,
  4448. // put in on the reassembly queue.
  4449. if (DATA_RCV_STATE(RcvTCB->tcb_state) &&
  4450. !(RcvTCB->tcb_flags & GC_PENDING)) {
  4451. PutOnRAQ(RcvTCB, &RcvInfo, RcvBuf, Size);
  4452. //
  4453. //If SACK option is active, we need to construct
  4454. // SACK Blocks in ack
  4455. //
  4456. if (RcvTCB->tcb_tcpopts & TCP_FLAG_SACK) {
  4457. SendSackInACK(RcvTCB, RcvInfo.tri_seq);
  4458. } else {
  4459. CTEFreeLockFromDPC(&RcvTCB->tcb_lock);
  4460. SendACK(RcvTCB);
  4461. }
  4462. CTEGetLockAtDPC(&RcvTCB->tcb_lock);
  4463. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  4464. } else
  4465. ACKAndDrop(&RcvInfo, RcvTCB);
  4466. CompleteSends(&SendQ);
  4467. return IP_SUCCESS;
  4468. }
  4469. } else { // DataOffset <= Size
  4470. TStats.ts_inerrs++;
  4471. }
  4472. } else {
  4473. TStats.ts_inerrs++;
  4474. }
  4475. } else { // IsBCast
  4476. TStats.ts_inerrs++;
  4477. }
  4478. return IP_SUCCESS;
  4479. }
  4480. #pragma BEGIN_INIT
  4481. //* InitTCPRcv - Initialize TCP receive side.
  4482. //
  4483. // Called during init time to initialize our TCP receive side.
  4484. //
  4485. // Input: Nothing.
  4486. //
  4487. // Returns: TRUE.
  4488. //
  4489. int
  4490. InitTCPRcv(void)
  4491. {
  4492. uint i;
  4493. //Allocate Time_Proc number of delayqueues
  4494. PerCPUDelayQ = CTEAllocMemBoot(Time_Proc * sizeof(CPUDelayQ));
  4495. if (PerCPUDelayQ == NULL) {
  4496. return FALSE;
  4497. }
  4498. for (i = 0; i < Time_Proc; i++) {
  4499. CTEInitLock(&PerCPUDelayQ[i].TCBDelayLock);
  4500. INITQ(&PerCPUDelayQ[i].TCBDelayQ);
  4501. PerCPUDelayQ[i].TCBDelayRtnCount = 0;
  4502. }
  4503. #if MILLEN
  4504. TCBDelayRtnLimit.Value = 1;
  4505. #else // MILLEN
  4506. TCBDelayRtnLimit.Value = KeNumberProcessors;
  4507. if (TCBDelayRtnLimit.Value > TCB_DELAY_RTN_LIMIT)
  4508. TCBDelayRtnLimit.Value = TCB_DELAY_RTN_LIMIT;
  4509. #endif // !MILLEN
  4510. DummyBuf.ipr_owner = IPR_OWNER_IP;
  4511. DummyBuf.ipr_size = 0;
  4512. DummyBuf.ipr_next = 0;
  4513. DummyBuf.ipr_buffer = NULL;
  4514. return TRUE;
  4515. }
  4516. //* UnInitTCPRcv - Uninitialize our receive side.
  4517. //
  4518. // Called if initialization fails to uninitialize our receive side.
  4519. //
  4520. //
  4521. // Input: Nothing.
  4522. //
  4523. // Returns: Nothing.
  4524. //
  4525. void
  4526. UnInitTCPRcv(void)
  4527. {
  4528. }
  4529. #pragma END_INIT