Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

5691 lines
224 KiB

  1. /*++
  2. Copyright (c) 1990-2000 Microsoft Corporation
  3. Module Name:
  4. TCPRCV.C - TCP receive protocol code.
  5. Abstract:
  6. This file contains the code for handling incoming TCP packets.
  7. Author:
  8. [Environment:]
  9. kernel mode only
  10. [Notes:]
  11. optional-notes
  12. Revision History:
  13. --*/
  14. #include "precomp.h"
  15. #include "addr.h"
  16. #include "tcp.h"
  17. #include "tcb.h"
  18. #include "tcpconn.h"
  19. #include "tcpsend.h"
  20. #include "tcprcv.h"
  21. #include "tcpdeliv.h"
  22. #include "tlcommon.h"
  23. #include "info.h"
  24. #include "tcpcfg.h"
  25. #include "secfltr.h"
  26. CACHE_LINE_KSPIN_LOCK SynAttLock;
  27. CACHE_LINE_ULONG TCBDelayRtnCount;
  28. CACHE_LINE_ULONG TCBDelayRtnLimit;
  29. CACHE_LINE_ULONG TCBDelayQRoundRobinIndex;
  30. typedef struct CACHE_ALIGN CPUDelayQ {
  31. DEFINE_LOCK_STRUCTURE(TCBDelayLock)
  32. Queue TCBDelayQ;
  33. } CPUDelayQ;
  34. C_ASSERT(sizeof(CPUDelayQ) % MAX_CACHE_LINE_SIZE == 0);
  35. C_ASSERT(__alignof(CPUDelayQ) == MAX_CACHE_LINE_SIZE);
  36. CPUDelayQ *PerCPUDelayQ;
  37. // Maximum possible window size. By default it is 16 bit value
  38. // if window scaling is enabled this can be set thru registry.
  39. uint MaxRcvWin = 0xffff;
  40. uint MaxDupAcks;
  41. #define TCB_DELAY_RTN_LIMIT 4
  42. #if DBG
  43. ulong DbgTcpHwChkSumOk = 0;
  44. ulong DbgTcpHwChkSumErr = 0;
  45. ulong DbgDnsProb = 0;
  46. #endif
  47. extern uint Time_Proc;
  48. extern CTELock *pTWTCBTableLock;
  49. extern CTELock *pTCBTableLock;
  50. #if IRPFIX
  51. extern PDEVICE_OBJECT TCPDeviceObject;
  52. #endif
  53. extern Queue TWQueue;
  54. extern ulong CurrentTCBs;
  55. extern ulong MaxFreeTcbs;
  56. extern IPInfo LocalNetInfo;
  57. #define PERSIST_TIMEOUT MS_TO_TICKS(500)
  58. void
  59. SendTWtcbACK(TWTCB *ACKTcb, uint Partition, CTELockHandle TCBHandle);
  60. void
  61. ReInsert2MSL(TWTCB *RemovedTCB);
  62. void ResetSendNext(TCB *SeqTCB, SeqNum NewSeq);
  63. void ResetAndFastSend(TCB *SeqTCB, SeqNum NewSeq, uint NewCWin);
  64. void GetRandomISN(PULONG SeqNum);
  65. extern uint TcpHostOpts;
  66. extern BOOLEAN fAcdLoadedG;
  67. extern NTSTATUS TCPPrepareIrpForCancel(PTCP_CONTEXT TcpContext, PIRP Irp,
  68. PDRIVER_CANCEL CancelRoutine);
  69. extern void TCPRequestComplete(void *Context, uint Status,
  70. uint UnUsed);
  71. void TCPCancelRequest(PDEVICE_OBJECT Device, PIRP Irp);
  72. //
  73. // All of the init code can be discarded.
  74. //
  75. int InitTCPRcv(void);
  76. void UnInitTCPRcv(void);
  77. #ifdef ALLOC_PRAGMA
  78. #pragma alloc_text(INIT, InitTCPRcv)
  79. #pragma alloc_text(INIT, UnInitTCPRcv)
  80. #endif
  81. //* AdjustRcvWin - Adjust the receive window on a TCB.
  82. //
  83. // A utility routine that adjusts the receive window to an even multiple of
  84. // the local segment size. We round it up to the next closest multiple, or
  85. // leave it alone if it's already an event multiple. We assume we have
  86. // exclusive access to the input TCB.
  87. //
  88. // Input: WinTCB - TCB to be adjusted.
  89. //
  90. // Returns: Nothing.
  91. //
  92. void
  93. AdjustRcvWin(TCB *WinTCB)
  94. {
  95. ushort LocalMSS;
  96. uchar FoundMSS;
  97. ulong SegmentsInWindow;
  98. uint ScaledMaxRcvWin;
  99. ASSERT(WinTCB->tcb_defaultwin != 0);
  100. ASSERT(WinTCB->tcb_rcvwin != 0);
  101. ASSERT(WinTCB->tcb_remmss != 0);
  102. if (WinTCB->tcb_flags & WINDOW_SET)
  103. return;
  104. // First, get the local MSS by calling IP.
  105. FoundMSS = (*LocalNetInfo.ipi_getlocalmtu)(WinTCB->tcb_saddr, &LocalMSS);
  106. // If we didn't find it, error out.
  107. if (!FoundMSS) {
  108. //ASSERT(FALSE);
  109. return;
  110. }
  111. LocalMSS -= sizeof(TCPHeader);
  112. LocalMSS = MIN(LocalMSS, WinTCB->tcb_remmss);
  113. // Compute the actual maximum receive window, accounting for the presence
  114. // of window scaling on this particular connection. This value is used
  115. // in the computations below, rather than the cross-connection maximum.
  116. ScaledMaxRcvWin = TCP_MAXWIN << WinTCB->tcb_rcvwinscale;
  117. // Make sure we have at least 4 segments in window, if that wouldn't make
  118. // the window too big.
  119. SegmentsInWindow = WinTCB->tcb_defaultwin / (ulong)LocalMSS;
  120. if (SegmentsInWindow < 4) {
  121. // We have fewer than four segments in the window. Round up to 4
  122. // if we can do so without exceeding the maximum window size; otherwise
  123. // use the maximum multiple that we can fit in 64K. The exception is if
  124. // we can only fit one integral multiple in the window - in that case
  125. // we'll use a window equal to the scaled maximum.
  126. if (LocalMSS <= (ScaledMaxRcvWin / 4)) {
  127. WinTCB->tcb_defaultwin = (uint)(4 * LocalMSS);
  128. } else {
  129. ulong SegmentsInMaxWindow;
  130. // Figure out the maximum number of segments we could possibly
  131. // fit in a window. If this is > 1, use that as the basis for
  132. // our window size. Otherwise use a maximum size window.
  133. SegmentsInMaxWindow = ScaledMaxRcvWin / (ulong)LocalMSS;
  134. if (SegmentsInMaxWindow != 1)
  135. WinTCB->tcb_defaultwin = SegmentsInMaxWindow * (ulong)LocalMSS;
  136. else
  137. WinTCB->tcb_defaultwin = ScaledMaxRcvWin;
  138. }
  139. WinTCB->tcb_rcvwin = WinTCB->tcb_defaultwin;
  140. } else {
  141. // If it's not already an even multiple, bump the default and current
  142. // windows to the nearest multiple.
  143. if ((SegmentsInWindow * (ulong)LocalMSS) != WinTCB->tcb_defaultwin) {
  144. ulong NewWindow;
  145. NewWindow = (SegmentsInWindow + 1) * (ulong)LocalMSS;
  146. // Don't let the new window be > 64K
  147. // or what ever is set (if window scaling is enabled)
  148. if (NewWindow <= ScaledMaxRcvWin) {
  149. WinTCB->tcb_defaultwin = (uint)NewWindow;
  150. WinTCB->tcb_rcvwin = (uint)NewWindow;
  151. }
  152. }
  153. }
  154. }
  155. //* CompleteRcvs - Complete rcvs on a TCB.
  156. //
  157. // Called when we need to complete rcvs on a TCB. We'll pull things from
  158. // the TCB's rcv queue, as long as there are rcvs that have the PUSH bit
  159. // set.
  160. //
  161. // Input: CmpltTCB - TCB to complete on.
  162. //
  163. // Returns: Nothing.
  164. //
  165. void
  166. CompleteRcvs(TCB * CmpltTCB)
  167. {
  168. CTELockHandle TCBHandle;
  169. TCPRcvReq *CurrReq, *NextReq, *IndReq;
  170. #if TRACE_EVENT
  171. PTDI_DATA_REQUEST_NOTIFY_ROUTINE CPCallBack;
  172. WMIData WMIInfo;
  173. #endif
  174. CTEStructAssert(CmpltTCB, tcb);
  175. ASSERT(CmpltTCB->tcb_refcnt != 0);
  176. CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
  177. if (!CLOSING(CmpltTCB) && !(CmpltTCB->tcb_flags & RCV_CMPLTING)
  178. && (CmpltTCB->tcb_rcvhead != NULL)) {
  179. CmpltTCB->tcb_flags |= RCV_CMPLTING;
  180. for (;;) {
  181. CurrReq = CmpltTCB->tcb_rcvhead;
  182. IndReq = NULL;
  183. do {
  184. CTEStructAssert(CurrReq, trr);
  185. if (CurrReq->trr_flags & TRR_PUSHED) {
  186. // Need to complete this one. If this is the current rcv
  187. // advance the current rcv to the next one in the list.
  188. // Then set the list head to the next one in the list.
  189. NextReq = CurrReq->trr_next;
  190. if (CmpltTCB->tcb_currcv == CurrReq)
  191. CmpltTCB->tcb_currcv = NextReq;
  192. CmpltTCB->tcb_rcvhead = NextReq;
  193. if (NextReq == NULL) {
  194. // We've just removed the last buffer. Set the
  195. // rcvhandler to PendData, in case something
  196. // comes in during the callback.
  197. ASSERT(CmpltTCB->tcb_rcvhndlr != IndicateData);
  198. CmpltTCB->tcb_rcvhndlr = PendData;
  199. }
  200. CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
  201. if (CurrReq->trr_uflags != NULL)
  202. *(CurrReq->trr_uflags) =
  203. TDI_RECEIVE_NORMAL | TDI_RECEIVE_ENTIRE_MESSAGE;
  204. #if TRACE_EVENT
  205. CPCallBack = TCPCPHandlerRoutine;
  206. if (CPCallBack != NULL) {
  207. ulong GroupType;
  208. WMIInfo.wmi_destaddr = CmpltTCB->tcb_daddr;
  209. WMIInfo.wmi_destport = CmpltTCB->tcb_dport;
  210. WMIInfo.wmi_srcaddr = CmpltTCB->tcb_saddr;
  211. WMIInfo.wmi_srcport = CmpltTCB->tcb_sport;
  212. WMIInfo.wmi_size = CurrReq->trr_size;
  213. WMIInfo.wmi_context = CmpltTCB->tcb_cpcontext;
  214. GroupType = EVENT_TRACE_GROUP_TCPIP + EVENT_TRACE_TYPE_RECEIVE;
  215. (*CPCallBack) (GroupType, (PVOID) &WMIInfo, sizeof(WMIInfo), NULL);
  216. }
  217. #endif
  218. (*CurrReq->trr_rtn) (CurrReq->trr_context, TDI_SUCCESS,
  219. CurrReq->trr_amt);
  220. if (IndReq != NULL)
  221. FreeRcvReq(CurrReq);
  222. else
  223. IndReq = CurrReq;
  224. CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
  225. CurrReq = CmpltTCB->tcb_rcvhead;
  226. } else
  227. // This one isn't to be completed, so bail out.
  228. break;
  229. } while (CurrReq != NULL);
  230. // Now see if we've completed all of the requests. If we have, we
  231. // may need to deal with pending data and/or reset the rcv. handler.
  232. if (CurrReq == NULL) {
  233. // We've completed everything that can be, so stop the push
  234. // timer. We don't stop it if CurrReq isn't NULL because we
  235. // want to make sure later data is eventually pushed.
  236. STOP_TCB_TIMER_R(CmpltTCB, PUSH_TIMER);
  237. ASSERT(IndReq != NULL);
  238. // No more recv. requests.
  239. if (CmpltTCB->tcb_pendhead == NULL) {
  240. FreeRcvReq(IndReq);
  241. // No pending data. Set the rcv. handler to either PendData
  242. // or IndicateData.
  243. if (!(CmpltTCB->tcb_flags & (DISC_PENDING | GC_PENDING))) {
  244. if (CmpltTCB->tcb_rcvind != NULL &&
  245. CmpltTCB->tcb_indicated == 0)
  246. CmpltTCB->tcb_rcvhndlr = IndicateData;
  247. else
  248. CmpltTCB->tcb_rcvhndlr = PendData;
  249. } else {
  250. goto Complete_Notify;
  251. }
  252. } else {
  253. // We have pending data to deal with.
  254. if (CmpltTCB->tcb_rcvind != NULL &&
  255. ((CmpltTCB->tcb_indicated == 0) || (CmpltTCB->tcb_moreflag == 4))) {
  256. // There's a rcv. indicate handler on this TCB. Call
  257. // the indicate handler with the pending data.
  258. IndicatePendingData(CmpltTCB, IndReq, TCBHandle);
  259. SendACK(CmpltTCB);
  260. CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
  261. // See if a buffer has been posted. If so, we'll need
  262. // to check and see if it needs to be completed.
  263. if (CmpltTCB->tcb_rcvhead != NULL)
  264. continue;
  265. else {
  266. // If the pending head is now NULL, we've used up
  267. // all the data.
  268. if (CmpltTCB->tcb_pendhead == NULL &&
  269. (CmpltTCB->tcb_flags &
  270. (DISC_PENDING | GC_PENDING)))
  271. goto Complete_Notify;
  272. }
  273. } else {
  274. // No indicate handler, so nothing to do. The rcv.
  275. // handler should already be set to PendData.
  276. FreeRcvReq(IndReq);
  277. ASSERT(CmpltTCB->tcb_rcvhndlr == PendData);
  278. }
  279. }
  280. } else {
  281. if (IndReq != NULL)
  282. FreeRcvReq(IndReq);
  283. }
  284. break;
  285. }
  286. CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
  287. }
  288. CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
  289. return;
  290. Complete_Notify:
  291. // Something is pending. Figure out what it is, and do
  292. // it.
  293. if (CmpltTCB->tcb_flags & GC_PENDING) {
  294. CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
  295. // Bump the refcnt, because GracefulClose will
  296. // deref the TCB and we're not really done with
  297. // it yet.
  298. REFERENCE_TCB(CmpltTCB);
  299. //it is okay to ignore the tw state since we are returning frome here
  300. //anyway, without touching the tcb.
  301. GracefulClose(CmpltTCB,
  302. CmpltTCB->tcb_flags & TW_PENDING, TRUE,
  303. TCBHandle);
  304. } else if (CmpltTCB->tcb_flags & DISC_PENDING) {
  305. CmpltTCB->tcb_flags &= ~DISC_PENDING;
  306. CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
  307. NotifyOfDisc(CmpltTCB, NULL, TDI_GRACEFUL_DISC);
  308. CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
  309. CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
  310. CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
  311. } else {
  312. ASSERT(FALSE);
  313. CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
  314. }
  315. return;
  316. }
  317. //* CompleteSends - Complete TCP send requests.
  318. //
  319. // Called when we need to complete a chain of send-requests pulled off a TCB
  320. // during our ACK processing.
  321. //
  322. // Input: SendQ - non-empty chain of TCPSendReq structures.
  323. //
  324. // Returns: nothing.
  325. //
  326. void
  327. CompleteSends(Queue* SendQ)
  328. {
  329. Queue* CurrentQ = QHEAD(SendQ);
  330. TCPReq* Req;
  331. ASSERT(!EMPTYQ(SendQ));
  332. do {
  333. Req = QSTRUCT(TCPReq, CurrentQ, tr_q);
  334. CurrentQ = QNEXT(CurrentQ);
  335. CTEStructAssert(Req, tr);
  336. (*Req->tr_rtn)(Req->tr_context, Req->tr_status,
  337. Req->tr_status == TDI_SUCCESS
  338. ? ((TCPSendReq*)Req)->tsr_size : 0);
  339. FreeSendReq((TCPSendReq*)Req);
  340. } while (CurrentQ != QEND(SendQ));
  341. INITQ(SendQ);
  342. }
  343. //* ProcessPerCpuTCBDelayQ - Process TCBs on the delayed Q on this cpu.
  344. //
  345. // Called at various times to process TCBs on the delayed Q.
  346. //
  347. // Input: Proc - Index into the per-processor delay queues.
  348. // OrigIrql - The callers IRQL.
  349. // StopTicks - Optional pointer to KeQueryTickCount value after
  350. // which processing should stop. This is used to
  351. // limit the time spent at DISPATCH_LEVEL.
  352. // ItemsProcessed - Optional output pointer where the number of items
  353. // processed is stored. (Caller takes responsibility
  354. // for initializing this counter if used.)
  355. //
  356. // Returns: TRUE if processing was stopped due to time constraint. FALSE
  357. // otherwise, or if no time constraint was given.
  358. //
  359. LOGICAL
  360. ProcessPerCpuTCBDelayQ(int Proc, KIRQL OrigIrql,
  361. const LARGE_INTEGER* StopTicks, ulong *ItemsProcessed)
  362. {
  363. CPUDelayQ* CpuQ;
  364. Queue* Item;
  365. TCB *DelayTCB;
  366. CTELockHandle TCBHandle;
  367. LARGE_INTEGER Ticks;
  368. LOGICAL TimeConstrained = FALSE;
  369. CpuQ = &PerCPUDelayQ[Proc];
  370. while ((Item = InterlockedDequeueIfNotEmptyAtIrql(&CpuQ->TCBDelayQ,
  371. &CpuQ->TCBDelayLock,
  372. OrigIrql)) != NULL) {
  373. DelayTCB = STRUCT_OF(TCB, Item, tcb_delayq);
  374. CTEStructAssert(DelayTCB, tcb);
  375. CTEGetLockAtIrql(&DelayTCB->tcb_lock, OrigIrql, &TCBHandle);
  376. ASSERT(DelayTCB->tcb_refcnt != 0);
  377. ASSERT(DelayTCB->tcb_flags & IN_DELAY_Q);
  378. while (!CLOSING(DelayTCB) && (DelayTCB->tcb_flags & DELAYED_FLAGS)) {
  379. if (DelayTCB->tcb_flags & NEED_RCV_CMPLT) {
  380. DelayTCB->tcb_flags &= ~NEED_RCV_CMPLT;
  381. CTEFreeLockAtIrql(&DelayTCB->tcb_lock, OrigIrql, TCBHandle);
  382. CompleteRcvs(DelayTCB);
  383. CTEGetLockAtIrql(&DelayTCB->tcb_lock, OrigIrql, &TCBHandle);
  384. }
  385. if (DelayTCB->tcb_flags & NEED_OUTPUT) {
  386. DelayTCB->tcb_flags &= ~NEED_OUTPUT;
  387. REFERENCE_TCB(DelayTCB);
  388. TCPSend(DelayTCB, TCBHandle);
  389. CTEGetLockAtIrql(&DelayTCB->tcb_lock, OrigIrql, &TCBHandle);
  390. }
  391. if (DelayTCB->tcb_flags & NEED_ACK) {
  392. DelayTCB->tcb_flags &= ~NEED_ACK;
  393. CTEFreeLockAtIrql(&DelayTCB->tcb_lock, OrigIrql, TCBHandle);
  394. SendACK(DelayTCB);
  395. CTEGetLockAtIrql(&DelayTCB->tcb_lock, OrigIrql, &TCBHandle);
  396. }
  397. }
  398. if (CLOSING(DelayTCB) &&
  399. (DelayTCB->tcb_flags & NEED_OUTPUT) &&
  400. DATA_RCV_STATE(DelayTCB->tcb_state) && (DelayTCB->tcb_closereason & TCB_CLOSE_RST)) {
  401. #if DBG
  402. DbgDnsProb++;
  403. #endif
  404. DelayTCB->tcb_flags &= ~NEED_OUTPUT;
  405. REFERENCE_TCB(DelayTCB);
  406. TCPSend(DelayTCB, TCBHandle);
  407. CTEGetLockAtIrql(&DelayTCB->tcb_lock, OrigIrql, &TCBHandle);
  408. }
  409. DelayTCB->tcb_flags &= ~IN_DELAY_Q;
  410. DerefTCB(DelayTCB, TCBHandle);
  411. if (ItemsProcessed) {
  412. (*ItemsProcessed)++;
  413. }
  414. // If a time constraint was given, bail out if we've past it.
  415. //
  416. if (StopTicks) {
  417. KeQueryTickCount(&Ticks);
  418. if (Ticks.QuadPart > StopTicks->QuadPart) {
  419. TimeConstrained = TRUE;
  420. break;
  421. }
  422. }
  423. }
  424. return TimeConstrained;
  425. }
  426. //* ProcessTCBDelayQ - Process TCBs on the delayed Q.
  427. //
  428. // Called at various times to process TCBs on the delayed Q.
  429. //
  430. // Input: Nothing.
  431. //
  432. // Returns: Nothing.
  433. //
  434. void
  435. ProcessTCBDelayQ(void)
  436. {
  437. uint i;
  438. uint Index;
  439. LOGICAL TimeConstrained;
  440. KIRQL OrigIrql;
  441. ulong ItemsProcessed;
  442. LARGE_INTEGER TicksDelta;
  443. LARGE_INTEGER StopTicks;
  444. // Check for recursion. We do not stop recursion completely, only
  445. // limit it. This is done to allow multiple threads to process the
  446. // TCBDelayQ simultaneously.
  447. CTEInterlockedIncrementLong(&TCBDelayRtnCount.Value);
  448. if (TCBDelayRtnCount.Value > TCBDelayRtnLimit.Value) {
  449. CTEInterlockedDecrementLong(&TCBDelayRtnCount.Value);
  450. return;
  451. }
  452. OrigIrql = KeGetCurrentIrql();
  453. // Constrain ProcessPerCpuTCBDelayQ to run only for 100 ms maximum.
  454. //
  455. ItemsProcessed = 0;
  456. TicksDelta.HighPart = 0;
  457. TicksDelta.LowPart = (100 * 10 * 1000) / KeQueryTimeIncrement();
  458. KeQueryTickCount(&StopTicks);
  459. StopTicks.QuadPart = StopTicks.QuadPart + TicksDelta.QuadPart;
  460. for (i = 0; i < Time_Proc; i++) {
  461. // The order in which we process the delay queues is round-robined
  462. // each time we enter this routine. This gives a bit of fairness
  463. // to TCBs in all queues in the event that we exit this routine
  464. // due to time contraints. Therefore, calculate the Index of
  465. // the delay queue as the following:
  466. //
  467. Index = (i + TCBDelayQRoundRobinIndex.Value) % Time_Proc;
  468. // We are just peeking at the queue to prevent taking it's
  469. // lock uneccessarily.
  470. //
  471. if (!EMPTYQ(&PerCPUDelayQ[Index].TCBDelayQ)) {
  472. TimeConstrained = ProcessPerCpuTCBDelayQ(Index,
  473. OrigIrql,
  474. &StopTicks,
  475. &ItemsProcessed);
  476. if (TimeConstrained) {
  477. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  478. "ProcessTCBDelayQ: Processed %u TCBs before "
  479. "time expired.\n",
  480. ItemsProcessed));
  481. break;
  482. }
  483. }
  484. }
  485. // Update the starting queue index for next time this routine is called.
  486. // Affects of non-synchronized increment here are negligible.
  487. // Ever-incrementing doesn't matter either because of the '% Time_Proc'
  488. // above.
  489. //
  490. TCBDelayQRoundRobinIndex.Value++;
  491. CTEInterlockedDecrementLong(&TCBDelayRtnCount.Value);
  492. }
  493. //* DelayAction - Put a TCB on the queue for a delayed action.
  494. //
  495. // Called when we want to put a TCB on the DelayQ for a delayed action at
  496. // rcv. complete or some other time. The lock on the TCB must be held when
  497. // this is called.
  498. //
  499. // Input: DelayTCB - TCB which we're going to sched.
  500. // Action - Action we're scheduling.
  501. //
  502. // Returns: Nothing.
  503. //
  504. void
  505. DelayAction(TCB * DelayTCB, uint Action)
  506. {
  507. // Schedule the completion.
  508. //
  509. DelayTCB->tcb_flags |= Action;
  510. if (!(DelayTCB->tcb_flags & IN_DELAY_Q)) {
  511. uint Proc;
  512. #if MILLEN
  513. Proc = 0;
  514. #else // MILLEN
  515. Proc = KeGetCurrentProcessorNumber();
  516. #endif // !MILLEN
  517. DelayTCB->tcb_flags |= IN_DELAY_Q;
  518. REFERENCE_TCB(DelayTCB); // Reference this for later.
  519. //We may not be running timer dpcs on all the processors
  520. if (!(Proc < Time_Proc)) {
  521. Proc = 0;
  522. }
  523. InterlockedEnqueueAtDpcLevel(&PerCPUDelayQ[Proc].TCBDelayQ,
  524. &DelayTCB->tcb_delayq,
  525. &PerCPUDelayQ[Proc].TCBDelayLock);
  526. }
  527. }
  528. uint
  529. HandleTWTCB(TWTCB * RcvTCB, uint flags, SeqNum *seq, uint Partition,
  530. CTELockHandle Handle)
  531. {
  532. uint Sendreset = FALSE;
  533. CTELockHandle TcbHandle;
  534. //ASSERT(RcvTCB->twtcb_state == TCB_TIME_WAIT);
  535. //check if this is the duplicate of last Fin segment
  536. //if yes, sendack
  537. //handle duplicate FIN and seq =< rcvnext by droping and sending ack
  538. //and reenter tim_wait state for 2MSL
  539. // send reset if seq > rcvnext, since app has already sent Fin
  540. // if RST, delete this twtcb
  541. // if SYN and if seq > rcvnext, we can do
  542. // 1. delete twtcb and send rst, wait for SYN retry as is done today.
  543. // 2. Tell tcprcv to acept this syn and use ISS+128000
  544. if (SEQ_LTE(*seq, RcvTCB->twtcb_rcvnext) && (flags & TCP_FLAG_FIN)) {
  545. //remove twqueue and reinsert in 2MSL queue
  546. ReInsert2MSL(RcvTCB);
  547. SendTWtcbACK(RcvTCB, Partition, Handle);
  548. return Sendreset;
  549. }
  550. if ((flags & TCP_FLAG_SYN) && SEQ_LTE(*seq, RcvTCB->twtcb_rcvnext)) {
  551. CTEFreeLockFromDPC(&pTWTCBTableLock[Partition], TcbHandle);
  552. return Sendreset;
  553. }
  554. //if syn is set, we may want to close the tcb here
  555. if ((flags & TCP_FLAG_SYN) || (flags & TCP_FLAG_RST)) {
  556. //delete from delta queue
  557. //and insert it in free twtcb list.
  558. //Note that this requires release of tcblock, acquire twtcblock and then
  559. //re acquire tcb lock.
  560. if (flags & TCP_FLAG_SYN) {
  561. Sendreset = TRUE;
  562. *seq = RcvTCB->twtcb_sendnext+128000;
  563. }
  564. RemoveTWTCB(RcvTCB, Partition);
  565. CTEFreeLockFromDPC(&pTWTCBTableLock[Partition], TcbHandle);
  566. FreeTWTCB(RcvTCB);
  567. } else {
  568. //just drop silently
  569. CTEFreeLockFromDPC(&pTWTCBTableLock[Partition], TcbHandle);
  570. }
  571. return Sendreset;
  572. }
  573. //* TCPRcvComplete - Handle a receive complete.
  574. //
  575. // Called by the lower layers when we're done receiving. If we have any work
  576. // to do, we use this time to do it.
  577. //
  578. // Input: Nothing.
  579. //
  580. // Returns: Nothing.
  581. //
  582. void
  583. TCPRcvComplete(void)
  584. {
  585. ProcessTCBDelayQ();
  586. }
  587. //* CompleteConnReq - Complete a connection request on a TCB.
  588. //
  589. // A utility function to complete a connection request on a TCB. We remove
  590. // the connreq, and put it on the ConnReqCmpltQ where it will be picked
  591. // off later during RcvCmplt processing. We assume the TCB lock is held when
  592. // we're called.
  593. //
  594. // Input: CmpltTCB - TCB from which to complete.
  595. // OptInfo - IP OptInfo for completeion.
  596. // Status - Status to complete with.
  597. //
  598. // Returns: Nothing.
  599. //
  600. void
  601. CompleteConnReq(TCB * CmpltTCB, IPOptInfo * OptInfo, TDI_STATUS Status)
  602. {
  603. TCPConnReq *ConnReq;
  604. CTELockHandle QueueHandle;
  605. CTEStructAssert(CmpltTCB, tcb);
  606. ConnReq = CmpltTCB->tcb_connreq;
  607. if (ConnReq != NULL) {
  608. uint FastChk;
  609. // There's a connreq on this TCB. Fill in the connection information
  610. // before returning it.
  611. if (TCB_TIMER_RUNNING_R(CmpltTCB, CONN_TIMER))
  612. STOP_TCB_TIMER_R(CmpltTCB, CONN_TIMER);
  613. CmpltTCB->tcb_connreq = NULL;
  614. UpdateConnInfo(ConnReq->tcr_conninfo, OptInfo, CmpltTCB->tcb_daddr,
  615. CmpltTCB->tcb_dport);
  616. if (ConnReq->tcr_addrinfo) {
  617. UpdateConnInfo(ConnReq->tcr_addrinfo, OptInfo, CmpltTCB->tcb_saddr,
  618. CmpltTCB->tcb_sport);
  619. }
  620. ConnReq->tcr_req.tr_status = Status;
  621. // In order to complete this request directly, we must block further
  622. // receive-processing until this connect-indication is complete.
  623. // We require that any caller of this routine must already hold
  624. // a reference to the TCB so that the dereference below does not drop
  625. // the reference-count to zero.
  626. FastChk = (CmpltTCB->tcb_fastchk & TCP_FLAG_IN_RCV) ^ TCP_FLAG_IN_RCV;
  627. CmpltTCB->tcb_fastchk |= FastChk;
  628. CTEFreeLockFromDPC(&CmpltTCB->tcb_lock,
  629. QueueHandle = KeGetCurrentIrql());
  630. (ConnReq->tcr_req.tr_rtn)(ConnReq->tcr_req.tr_context,
  631. ConnReq->tcr_req.tr_status, 0);
  632. FreeConnReq(ConnReq);
  633. CTEGetLockAtDPC(&CmpltTCB->tcb_lock, &QueueHandle);
  634. CmpltTCB->tcb_fastchk &= ~FastChk;
  635. if (CmpltTCB->tcb_flags & SEND_AFTER_RCV) {
  636. CmpltTCB->tcb_flags &= ~SEND_AFTER_RCV;
  637. DelayAction(CmpltTCB, NEED_OUTPUT);
  638. }
  639. }
  640. #if DBG
  641. else {
  642. ASSERT((CmpltTCB->tcb_state == TCB_SYN_RCVD) &&
  643. (CmpltTCB->tcb_fastchk & TCP_FLAG_ACCEPT_PENDING));
  644. }
  645. #endif
  646. }
  647. void
  648. SynAttChk(AddrObj * ListenAO, TCB * AcceptTCB)
  649. //
  650. // function to check whether certain thresholds relevant to containing a
  651. // SYN attack are being crossed.
  652. //
  653. // This function is called from FindListenConn when a connection has been
  654. // found to handle the SYN request
  655. //
  656. {
  657. BOOLEAN RexmitCntChanged = FALSE;
  658. CTELockHandle Handle;
  659. CTEGetLockAtDPC(&SynAttLock.Lock, &Handle);
  660. if (AcceptTCB) {
  661. uint maxRexmitCnt;
  662. //
  663. // Decrement the # of conn. in half open state
  664. //
  665. ASSERT(TCPHalfOpen != 0);
  666. TCPHalfOpen--;
  667. maxRexmitCnt = MIN(MaxConnectResponseRexmitCountTmp, MaxConnectResponseRexmitCount);
  668. if (AcceptTCB->tcb_rexmitcnt >= maxRexmitCnt) {
  669. BOOLEAN Trigger;
  670. ASSERT(TCPHalfOpenRetried != 0);
  671. Trigger = (TCPHalfOpen < TCPMaxHalfOpen) ||
  672. (--TCPHalfOpenRetried <= TCPMaxHalfOpenRetriedLW);
  673. if (Trigger && (MaxConnectResponseRexmitCountTmp == ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT)) {
  674. MaxConnectResponseRexmitCountTmp = MAX_CONNECT_RESPONSE_REXMIT_CNT;
  675. }
  676. }
  677. } else if (ListenAO) {
  678. //
  679. // We are putting a connection in the syn_rcvd state. Check
  680. // if we have reached the threshold. If we have reduce the
  681. // number of retries to a lower value.
  682. //
  683. if ((++TCPHalfOpen >= TCPMaxHalfOpen) && (MaxConnectResponseRexmitCountTmp == MAX_CONNECT_RESPONSE_REXMIT_CNT)) {
  684. if (TCPHalfOpenRetried >= TCPMaxHalfOpenRetried) {
  685. MaxConnectResponseRexmitCountTmp = ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT;
  686. RexmitCntChanged = TRUE;
  687. }
  688. }
  689. //
  690. // if this connection limit for a port was reached earlier.
  691. // Check if the lower watermark is getting hit now.
  692. //
  693. if (ListenAO->ConnLimitReached) {
  694. ListenAO->ConnLimitReached = FALSE;
  695. if (!RexmitCntChanged && (MaxConnectResponseRexmitCountTmp == ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT)) {
  696. ASSERT(TCPPortsExhausted > 0);
  697. //
  698. // The fact that FindListenConn found a connection on the port
  699. // indicates that we had a connection available. This port
  700. // was therefore not exhausted of connections. Set state
  701. // appropriately. If the port has no more connections now,
  702. // it will get added to the Exhausted count next time a syn for
  703. // the port comes along.
  704. //
  705. ASSERT(TCPPortsExhausted != 0);
  706. if (--TCPPortsExhausted <= TCPMaxPortsExhaustedLW) {
  707. MaxConnectResponseRexmitCountTmp = MAX_CONNECT_RESPONSE_REXMIT_CNT;
  708. }
  709. }
  710. }
  711. } else {
  712. TCPHalfOpen--;
  713. }
  714. CTEFreeLockFromDPC(&SynAttLock.Lock, Handle);
  715. return;
  716. }
  717. BOOLEAN
  718. DelayedAcceptConn(AddrObj * ListenAO, IPAddr Src, ushort SrcPort,TCB *AcceptTCB)
  719. {
  720. CTELockHandle Handle; // Lock handle on AO, TCB.
  721. TCPConn *CurrentConn = NULL;
  722. CTELockHandle ConnHandle;
  723. Queue *Temp;
  724. TCPConnReq *ConnReq = NULL;
  725. BOOLEAN FoundConn = FALSE;
  726. CTEStructAssert(ListenAO, ao);
  727. CTEGetLockAtDPC(&ListenAO->ao_lock, &Handle);
  728. CTEFreeLockFromDPC(&AddrObjTableLock.Lock, DISPATCH_LEVEL);
  729. if (AO_VALID(ListenAO)) {
  730. if (ListenAO->ao_connect != NULL) {
  731. uchar TAddress[TCP_TA_SIZE];
  732. PVOID ConnContext;
  733. PConnectEvent Event;
  734. PVOID EventContext;
  735. TDI_STATUS Status;
  736. PTCP_CONTEXT TcpContext = NULL;
  737. #if !MILLEN
  738. ConnectEventInfo *EventInfo;
  739. #else // !MILLEN
  740. ConnectEventInfo EventInfo;
  741. #endif // MILLEN
  742. // He has a connect handler. Put the transport address together,
  743. // and call him. We also need to get the necessary resources
  744. // first.
  745. Event = ListenAO->ao_connect;
  746. EventContext = ListenAO->ao_conncontext;
  747. REF_AO(ListenAO);
  748. CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
  749. //ao referenced
  750. ConnReq = GetConnReq();
  751. if (AcceptTCB != NULL && ConnReq != NULL) {
  752. BuildTDIAddress(TAddress, Src, SrcPort);
  753. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  754. TCPTRACE(("indicating connect request\n"));
  755. }
  756. Status = (*Event) (EventContext, TCP_TA_SIZE,
  757. (PTRANSPORT_ADDRESS) TAddress, 0, NULL,
  758. AcceptTCB->tcb_opt.ioi_optlength, AcceptTCB->tcb_opt.ioi_options,
  759. &ConnContext, &EventInfo);
  760. if (Status == TDI_MORE_PROCESSING) {
  761. #if !MILLEN
  762. PIO_STACK_LOCATION IrpSp;
  763. PTDI_REQUEST_KERNEL_ACCEPT AcceptRequest;
  764. IrpSp = IoGetCurrentIrpStackLocation(EventInfo);
  765. Status = TCPPrepareIrpForCancel(
  766. (PTCP_CONTEXT) IrpSp->FileObject->FsContext,
  767. EventInfo,
  768. TCPCancelRequest
  769. );
  770. if (!NT_SUCCESS(Status)) {
  771. Status = TDI_NOT_ACCEPTED;
  772. EventInfo = NULL;
  773. goto AcceptIrpCancelled;
  774. }
  775. // He accepted it. Find the connection on the AddrObj.
  776. //check this out
  777. //KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"EP: Conn accepted for %x\n",AcceptTCB));
  778. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  779. TCPTRACE((
  780. "connect indication accepted, queueing request\n"
  781. ));
  782. }
  783. AcceptRequest = (PTDI_REQUEST_KERNEL_ACCEPT)
  784. & (IrpSp->Parameters);
  785. ConnReq->tcr_conninfo =
  786. AcceptRequest->ReturnConnectionInformation;
  787. if (AcceptRequest->RequestConnectionInformation &&
  788. AcceptRequest->RequestConnectionInformation->RemoteAddress) {
  789. ConnReq->tcr_addrinfo =
  790. AcceptRequest->RequestConnectionInformation;
  791. } else {
  792. ConnReq->tcr_addrinfo = NULL;
  793. }
  794. ConnReq->tcr_req.tr_rtn = TCPRequestComplete;
  795. ConnReq->tcr_req.tr_context = EventInfo;
  796. AcceptTCB->tcb_connreq = ConnReq;
  797. #else // !MILLEN
  798. ConnReq->tcr_req.tr_rtn = EventInfo.cei_rtn;
  799. ConnReq->tcr_req.tr_context = EventInfo.cei_context;
  800. ConnReq->tcr_conninfo = EventInfo.cei_conninfo;
  801. ConnReq->tcr_addrinfo = NULL;
  802. #endif // MILLEN
  803. CurrentConn = NULL;
  804. #if !MILLEN
  805. if ((IrpSp->FileObject->DeviceObject == TCPDeviceObject) &&
  806. (PtrToUlong(IrpSp->FileObject->FsContext2) == TDI_CONNECTION_FILE) &&
  807. ((TcpContext = IrpSp->FileObject->FsContext) != NULL) &&
  808. ((CurrentConn = GetConnFromConnID(
  809. PtrToUlong(TcpContext->Handle.ConnectionContext), &ConnHandle)) != NULL) &&
  810. (CurrentConn->tc_context == ConnContext) &&
  811. !(CurrentConn->tc_flags & CONN_INVALID)) {
  812. // Found the Conn structure!!
  813. // Don't have to loop below.
  814. CTEStructAssert(CurrentConn, tc);
  815. Status = InitTCBFromConn(CurrentConn, AcceptTCB,
  816. AcceptRequest->RequestConnectionInformation,
  817. TRUE
  818. );
  819. if (Status == TDI_SUCCESS) {
  820. FoundConn = TRUE;
  821. ASSERT(AcceptTCB->tcb_state == TCB_SYN_RCVD);
  822. AcceptTCB->tcb_conn = CurrentConn;
  823. AcceptTCB->tcb_connid = CurrentConn->tc_connid;
  824. CurrentConn->tc_tcb = AcceptTCB;
  825. CurrentConn->tc_refcnt++;
  826. // Move him from the idle q to the active
  827. // queue.
  828. CTEGetLockAtDPC(&ListenAO->ao_lock, &Handle);
  829. REMOVEQ(&CurrentConn->tc_q);
  830. PUSHQ(&ListenAO->ao_activeq, &CurrentConn->tc_q);
  831. } else {
  832. CTEFreeLockFromDPC(&((CurrentConn->tc_ConnBlock)->cb_lock), ConnHandle);
  833. }
  834. } else {
  835. #endif // !MILLEN
  836. if (CurrentConn) {
  837. CTEFreeLockFromDPC(&((CurrentConn->tc_ConnBlock)->cb_lock), ConnHandle);
  838. }
  839. //slow path
  840. //ao is referenced
  841. Temp = QHEAD(&ListenAO->ao_idleq);;
  842. Status = TDI_INVALID_CONNECTION;
  843. while (Temp != QEND(&ListenAO->ao_idleq)) {
  844. CurrentConn = QSTRUCT(TCPConn, Temp, tc_q);
  845. CTEGetLockAtDPC(&(CurrentConn->tc_ConnBlock->cb_lock), &ConnHandle);
  846. #if DBG
  847. CurrentConn->tc_ConnBlock->line = (uint) __LINE__;
  848. CurrentConn->tc_ConnBlock->module = (uchar *) __FILE__;
  849. #endif
  850. CTEStructAssert(CurrentConn, tc);
  851. if ((CurrentConn->tc_context == ConnContext) &&
  852. !(CurrentConn->tc_flags & CONN_INVALID)) {
  853. // We think we have a match. The connection
  854. // shouldn't have a TCB associated with it. If it
  855. // does, it's an error. InitTCBFromConn will
  856. // handle all this.
  857. Status = InitTCBFromConn(CurrentConn, AcceptTCB,
  858. #if !MILLEN
  859. AcceptRequest->RequestConnectionInformation,
  860. #else // !MILLEN
  861. EventInfo.cei_acceptinfo,
  862. #endif // MILLEN
  863. TRUE);
  864. if (Status == TDI_SUCCESS) {
  865. FoundConn = TRUE;
  866. AcceptTCB->tcb_conn = CurrentConn;
  867. AcceptTCB->tcb_connid = CurrentConn->tc_connid;
  868. CurrentConn->tc_tcb = AcceptTCB;
  869. CTEGetLockAtDPC(&ListenAO->ao_lock, &Handle);
  870. // Move him from the idle q to the active
  871. // queue.
  872. REMOVEQ(&CurrentConn->tc_q);
  873. ENQUEUE(&ListenAO->ao_activeq, &CurrentConn->tc_q);
  874. } else {
  875. CTEFreeLockFromDPC(&(CurrentConn->tc_ConnBlock->cb_lock), ConnHandle);
  876. }
  877. // In any case, we're done now.
  878. break;
  879. }
  880. CTEFreeLockFromDPC(&(CurrentConn->tc_ConnBlock->cb_lock), ConnHandle);
  881. Temp = QNEXT(Temp);
  882. }
  883. #if !MILLEN
  884. }
  885. #endif // !MILLEN
  886. if (FoundConn) {
  887. LOCKED_DELAY_DEREF_AO(ListenAO);
  888. CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
  889. CTEFreeLockFromDPC(&(CurrentConn->tc_ConnBlock->cb_lock), ConnHandle);
  890. } else {
  891. Handle = DISPATCH_LEVEL;
  892. CTEGetLockAtDPC(&AcceptTCB->tcb_lock, &Handle);
  893. REFERENCE_TCB(AcceptTCB);
  894. CompleteConnReq(AcceptTCB, &AcceptTCB->tcb_opt, Status);
  895. DerefTCB(AcceptTCB, Handle);
  896. DELAY_DEREF_AO(ListenAO);
  897. }
  898. return FoundConn;
  899. }else { //tdi_more_processing
  900. if (ConnReq) {
  901. FreeConnReq(ConnReq);
  902. }
  903. }
  904. // event handler call failed for some reason
  905. // kick in the synattack code
  906. if (SynAttackProtect) {
  907. CTELockHandle Handle;
  908. //
  909. // If we need to Trigger to a lower retry count
  910. //
  911. if (!ListenAO->ConnLimitReached) {
  912. ListenAO->ConnLimitReached = TRUE;
  913. CTEGetLockAtDPC(&SynAttLock.Lock, &Handle);
  914. if ((++TCPPortsExhausted >= TCPMaxPortsExhausted) &&
  915. (MaxConnectResponseRexmitCountTmp == MAX_CONNECT_RESPONSE_REXMIT_CNT)) {
  916. MaxConnectResponseRexmitCountTmp = ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT;
  917. }
  918. CTEFreeLockFromDPC(&SynAttLock.Lock, Handle);
  919. }
  920. }
  921. #if !MILLEN
  922. AcceptIrpCancelled:
  923. #endif // !MILLEN
  924. // The event handler didn't take it. Dereference it, free
  925. // the resources, and return NULL.
  926. DELAY_DEREF_AO(ListenAO);
  927. return FALSE;
  928. } else {
  929. // We couldn't get a valid tcb or getconnreq
  930. if (ConnReq) {
  931. FreeConnReq(ConnReq);
  932. }
  933. DELAY_DEREF_AO(ListenAO);
  934. return FALSE;
  935. }
  936. }else { //ao_connect != null
  937. CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
  938. }
  939. return FALSE;
  940. } //AO not valid
  941. return FALSE;
  942. }
  943. BOOLEAN
  944. InitSynTCB(SYNTCB *SynTcb,
  945. IPAddr Src,
  946. IPAddr Dest,
  947. TCPHeader UNALIGNED *TCPH,
  948. TCPRcvInfo *RcvInfo)
  949. {
  950. CTELockHandle Handle;
  951. SynTcb->syntcb_state = TCB_SYN_RCVD;
  952. SynTcb->syntcb_flags |= CONN_ACCEPTED;
  953. SynTcb->syntcb_refcnt = 1;
  954. SynTcb->syntcb_defaultwin = DEFAULT_RCV_WIN;
  955. SynTcb->syntcb_rcvwin = DEFAULT_RCV_WIN;
  956. if (DefaultRcvWin) {
  957. SynTcb->syntcb_rcvwinscale = 0;
  958. if (TcpHostOpts & TCP_FLAG_WS) {
  959. while ((SynTcb->syntcb_rcvwinscale < TCP_MAX_WINSHIFT) &&
  960. ((TCP_MAXWIN << SynTcb->syntcb_rcvwinscale) < (int)DefaultRcvWin)) {
  961. SynTcb->syntcb_rcvwinscale++;
  962. }
  963. }else{
  964. if (DefaultRcvWin > 0xFFFF) {
  965. SynTcb->syntcb_defaultwin = 0xFFFF;
  966. SynTcb->syntcb_rcvwin = 0xFFFF;
  967. }
  968. }
  969. }
  970. SynTcb->syntcb_daddr = Src;
  971. SynTcb->syntcb_saddr = Dest;
  972. SynTcb->syntcb_dport = TCPH->tcp_src;
  973. SynTcb->syntcb_sport = TCPH->tcp_dest;
  974. SynTcb->syntcb_rcvnext = ++(RcvInfo->tri_seq);
  975. SynTcb->syntcb_sendwin = RcvInfo->tri_window;
  976. SynTcb->syntcb_sendmax = SynTcb->syntcb_sendnext;
  977. //
  978. // Find Remote MSS and also if WS, TS or
  979. //sack options are negotiated.
  980. //
  981. SynTcb->syntcb_sndwinscale = 0;
  982. SynTcb->syntcb_remmss = FindMSSAndOptions(TCPH, (TCB *)SynTcb,TRUE);
  983. if (SynTcb->syntcb_remmss <= ALIGNED_TS_OPT_SIZE) {
  984. //turn off TS if mss is not sufficient to
  985. //hold TS fileds.
  986. SynTcb->syntcb_tcpopts &= ~TCP_FLAG_TS;
  987. }
  988. if (!InsertSynTCB(SynTcb, &Handle)){
  989. FreeSynTCB(SynTcb);
  990. return FALSE;
  991. }
  992. SynTcb->syntcb_rexmitcnt = 0;
  993. SynTcb->syntcb_rtt = 0;
  994. SynTcb->syntcb_smrtt = 0;
  995. SynTcb->syntcb_delta = MS_TO_TICKS(6000);
  996. SynTcb->syntcb_rexmit = MS_TO_TICKS(3000);
  997. SendSYNOnSynTCB(SynTcb, Handle);
  998. // The Rexmit interval has to be doubled here..
  999. SynTcb->syntcb_rexmit = MIN( SynTcb->syntcb_rexmit << 1, MAX_REXMIT_TO );
  1000. TStats.ts_passiveopens++;
  1001. return TRUE;
  1002. }
  1003. //* FindListenConn - Find (or fabricate) a listening connection.
  1004. //
  1005. // Called by our Receive handler to decide what to do about an incoming
  1006. // SYN. We walk down the list of connections associated with the destination
  1007. // address, and if we find any in the listening state that can be used for
  1008. // the incoming request we'll take them, possibly returning a listen in the
  1009. // process. If we don't find any appropriate listening connections, we'll
  1010. // call the Connect Event handler if one is registerd. If all else fails,
  1011. // we'll return NULL and the SYN will be RST.
  1012. //
  1013. // The caller must hold the AddrObjTableLock before calling this routine,
  1014. // and that lock must have been taken at DPC level. This routine will free
  1015. // that lock back to DPC level.
  1016. //
  1017. // Input: ListenAO - Pointer to AddrObj for local address.
  1018. // Src - Source IP address of SYN.
  1019. // SrcPort - Source port of SYN.
  1020. // OptInfo - IP options info from SYN.
  1021. //
  1022. // Returns: Pointer to found TCB, or NULL if we can't find one.
  1023. //
  1024. TCB *
  1025. FindListenConn(AddrObj *ListenAO,
  1026. IPAddr Src,
  1027. IPAddr Dest,
  1028. ushort SrcPort,
  1029. IPOptInfo *OptInfo,
  1030. TCPHeader UNALIGNED *TCPH,
  1031. TCPRcvInfo *RcvInfo,
  1032. BOOLEAN *syn)
  1033. {
  1034. CTELockHandle Handle; // Lock handle on AO, TCB.
  1035. TCB *CurrentTCB = NULL;
  1036. TCPConn *CurrentConn = NULL;
  1037. TCPConnReq *ConnReq = NULL;
  1038. CTELockHandle ConnHandle;
  1039. Queue *Temp;
  1040. uint FoundConn = FALSE;
  1041. CTEStructAssert(ListenAO, ao);
  1042. CTEGetLockAtDPC(&ListenAO->ao_lock, &Handle);
  1043. CTEFreeLockFromDPC(&AddrObjTableLock.Lock, DISPATCH_LEVEL);
  1044. // We have the lock on the AddrObj. Walk down it's list, looking
  1045. // for connections in the listening state.
  1046. if (AO_VALID(ListenAO)) {
  1047. if (ListenAO->ao_listencnt != 0) {
  1048. CTELockHandle TCBHandle;
  1049. Temp = QHEAD(&ListenAO->ao_listenq);
  1050. while (Temp != QEND(&ListenAO->ao_listenq)) {
  1051. CurrentConn = QSTRUCT(TCPConn, Temp, tc_q);
  1052. ListenAO->ao_usecnt++;
  1053. CTEFreeLockFromDPC(&ListenAO->ao_lock, DISPATCH_LEVEL);
  1054. CTEGetLockAtDPC(&(CurrentConn->tc_ConnBlock->cb_lock), &ConnHandle);
  1055. #if DBG
  1056. CurrentConn->tc_ConnBlock->line = (uint) __LINE__;
  1057. CurrentConn->tc_ConnBlock->module = (uchar *) __FILE__;
  1058. #endif
  1059. CTEStructAssert(CurrentConn, tc);
  1060. CTEGetLockAtDPC(&ListenAO->ao_lock, &ConnHandle);
  1061. ListenAO->ao_usecnt--;
  1062. // If this TCB is in the listening state, with no delete
  1063. // pending, it's a candidate. Look at the pending listen
  1064. // info. to see if we should take it.
  1065. if (((CurrentTCB = CurrentConn->tc_tcb) != NULL) && CurrentTCB->tcb_state == TCB_LISTEN) {
  1066. CTEStructAssert(CurrentTCB, tcb);
  1067. ASSERT(CurrentTCB->tcb_state == TCB_LISTEN);
  1068. CTEGetLockAtDPC(&CurrentTCB->tcb_lock, &TCBHandle);
  1069. if (CurrentTCB->tcb_state == TCB_LISTEN &&
  1070. !PENDING_ACTION(CurrentTCB)) {
  1071. // Need to see if we can take it.
  1072. // See if the addresses specifed in the ConnReq
  1073. // match.
  1074. if ((IP_ADDR_EQUAL(CurrentTCB->tcb_daddr,
  1075. NULL_IP_ADDR) ||
  1076. IP_ADDR_EQUAL(CurrentTCB->tcb_daddr,
  1077. Src)) &&
  1078. (CurrentTCB->tcb_dport == 0 ||
  1079. CurrentTCB->tcb_dport == SrcPort)) {
  1080. FoundConn = TRUE;
  1081. break;
  1082. }
  1083. // Otherwise, this didn't match, so we'll check the
  1084. // next one.
  1085. }
  1086. CTEFreeLockFromDPC(&CurrentTCB->tcb_lock, TCBHandle);
  1087. }
  1088. CTEFreeLockFromDPC(&(CurrentConn->tc_ConnBlock->cb_lock), ConnHandle);
  1089. Temp = QNEXT(Temp);
  1090. }
  1091. //..with ao_lock held
  1092. // See why we've exited the loop.
  1093. if (FoundConn) {
  1094. CTEStructAssert(CurrentTCB, tcb);
  1095. // We exited because we found a TCB. If it's pre-accepted,
  1096. // we're done.
  1097. REFERENCE_TCB(CurrentTCB);
  1098. ASSERT(CurrentTCB->tcb_connreq != NULL);
  1099. ConnReq = CurrentTCB->tcb_connreq;
  1100. // If QUERY_ACCEPT isn't set, turn on the CONN_ACCEPTED bit.
  1101. if (!(ConnReq->tcr_flags & TCR_FLAG_QUERY_ACCEPT)) {
  1102. CurrentTCB->tcb_flags |= CONN_ACCEPTED;
  1103. // If CONN_ACCEPTED, Tdi Accept is not called
  1104. // again. So, get ISN when we are with in conn table lock
  1105. #if MILLEN
  1106. //just use tcb_sendnext to hold hash value
  1107. //for randisn
  1108. CurrentTCB->tcb_sendnext = TCB_HASH(CurrentTCB->tcb_daddr, CurrentTCB->tcb_dport, CurrentTCB->tcb_saddr, CurrentTCB->tcb_sport);
  1109. #endif
  1110. GetRandomISN(&CurrentTCB->tcb_sendnext);
  1111. }
  1112. CurrentTCB->tcb_state = TCB_SYN_RCVD;
  1113. ListenAO->ao_listencnt--;
  1114. // Since he's no longer listening, remove him from the listen
  1115. // queue and put him on the active queue.
  1116. REMOVEQ(&CurrentConn->tc_q);
  1117. ENQUEUE(&ListenAO->ao_activeq, &CurrentConn->tc_q);
  1118. if (SynAttackProtect) {
  1119. SynAttChk(ListenAO,NULL);
  1120. }
  1121. CTEFreeLockFromDPC(&CurrentTCB->tcb_lock, TCBHandle);
  1122. CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
  1123. CTEFreeLockFromDPC(&(CurrentConn->tc_ConnBlock->cb_lock), ConnHandle);
  1124. return CurrentTCB;
  1125. } else {
  1126. // Since we have a listening count, this should never happen
  1127. // if that count was non-zero initially.
  1128. // We currently don't keep a good count on ao_listencnt when
  1129. // the IRPs are cancelled.
  1130. // ASSERT(FALSE);
  1131. }
  1132. }
  1133. // We didn't find a matching TCB. If there's a connect indication
  1134. // handler, call it now to find a connection to accept on.
  1135. //AO_lock is held
  1136. ASSERT(FoundConn == FALSE);
  1137. if (SynAttackProtect){
  1138. SYNTCB *AcceptTCB;
  1139. AcceptTCB = AllocSynTCB();
  1140. if (AcceptTCB) {
  1141. CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
  1142. #if MILLEN
  1143. //just use tcb_sendnext to hold hash value
  1144. //for randisn
  1145. AcceptTCB->syntcb_sendnext = TCB_HASH(AcceptTCB->syntcb_daddr, AcceptTCB->syntcb_dport, AcceptTCB->syntcb_saddr, AcceptTCB->syntcb_sport);
  1146. #endif
  1147. GetRandomISN(&AcceptTCB->syntcb_sendnext);
  1148. //KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"EP:FLC allocated SP TCB %x\n",AcceptTCB));
  1149. if (InitSynTCB(AcceptTCB,Src,Dest,TCPH,RcvInfo)){
  1150. *syn = TRUE;
  1151. SynAttChk(ListenAO, NULL);
  1152. }
  1153. return NULL;
  1154. } else {
  1155. //resource problem bail out
  1156. //KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"EP:FLC Failed to allocate TCB\n"));
  1157. CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
  1158. return NULL;
  1159. }
  1160. }
  1161. if (ListenAO->ao_connect != NULL) {
  1162. uchar TAddress[TCP_TA_SIZE];
  1163. PVOID ConnContext;
  1164. PConnectEvent Event;
  1165. PVOID EventContext;
  1166. TDI_STATUS Status;
  1167. TCB *AcceptTCB;
  1168. TCPConnReq *ConnReq;
  1169. PTCP_CONTEXT TcpContext = NULL;
  1170. #if !MILLEN
  1171. ConnectEventInfo *EventInfo;
  1172. #else // !MILLEN
  1173. ConnectEventInfo EventInfo;
  1174. #endif // MILLEN
  1175. // He has a connect handler. Put the transport address together,
  1176. // and call him. We also need to get the necessary resources
  1177. // first.
  1178. Event = ListenAO->ao_connect;
  1179. EventContext = ListenAO->ao_conncontext;
  1180. REF_AO(ListenAO);
  1181. CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
  1182. //ao referenced
  1183. AcceptTCB = AllocTCB();
  1184. ConnReq = GetConnReq();
  1185. if (AcceptTCB != NULL && ConnReq != NULL) {
  1186. //Event = ListenAO->ao_connect;
  1187. //EventContext = ListenAO->ao_conncontext;
  1188. BuildTDIAddress(TAddress, Src, SrcPort);
  1189. //REF_AO(ListenAO);
  1190. AcceptTCB->tcb_state = TCB_LISTEN;
  1191. AcceptTCB->tcb_connreq = ConnReq;
  1192. AcceptTCB->tcb_flags |= CONN_ACCEPTED;
  1193. //CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
  1194. //CTEFreeLockFromDPC(&ConnTableLock, ConnHandle);
  1195. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  1196. TCPTRACE(("indicating connect request\n"));
  1197. }
  1198. Status = (*Event) (EventContext, TCP_TA_SIZE,
  1199. (PTRANSPORT_ADDRESS) TAddress, 0, NULL,
  1200. OptInfo->ioi_optlength, OptInfo->ioi_options,
  1201. &ConnContext, &EventInfo);
  1202. if (Status == TDI_MORE_PROCESSING) {
  1203. #if !MILLEN
  1204. PIO_STACK_LOCATION IrpSp;
  1205. PTDI_REQUEST_KERNEL_ACCEPT AcceptRequest;
  1206. IrpSp = IoGetCurrentIrpStackLocation(EventInfo);
  1207. Status = TCPPrepareIrpForCancel(
  1208. (PTCP_CONTEXT) IrpSp->FileObject->FsContext,
  1209. EventInfo,
  1210. TCPCancelRequest
  1211. );
  1212. if (!NT_SUCCESS(Status)) {
  1213. Status = TDI_NOT_ACCEPTED;
  1214. EventInfo = NULL;
  1215. goto AcceptIrpCancelled;
  1216. }
  1217. // He accepted it. Find the connection on the AddrObj.
  1218. {
  1219. IF_TCPDBG(TCP_DEBUG_CONNECT) {
  1220. TCPTRACE((
  1221. "connect indication accepted, queueing request\n"
  1222. ));
  1223. }
  1224. AcceptRequest = (PTDI_REQUEST_KERNEL_ACCEPT)
  1225. & (IrpSp->Parameters);
  1226. ConnReq->tcr_conninfo =
  1227. AcceptRequest->ReturnConnectionInformation;
  1228. if (AcceptRequest->RequestConnectionInformation &&
  1229. AcceptRequest->RequestConnectionInformation->RemoteAddress) {
  1230. ConnReq->tcr_addrinfo =
  1231. AcceptRequest->RequestConnectionInformation;
  1232. } else {
  1233. ConnReq->tcr_addrinfo = NULL;
  1234. }
  1235. ConnReq->tcr_req.tr_rtn = TCPRequestComplete;
  1236. ConnReq->tcr_req.tr_context = EventInfo;
  1237. ConnReq->tcr_flags = 0;
  1238. }
  1239. #else // !MILLEN
  1240. ConnReq->tcr_req.tr_rtn = EventInfo.cei_rtn;
  1241. ConnReq->tcr_req.tr_context = EventInfo.cei_context;
  1242. ConnReq->tcr_conninfo = EventInfo.cei_conninfo;
  1243. ConnReq->tcr_addrinfo = NULL;
  1244. #endif // MILLEN
  1245. CurrentConn = NULL;
  1246. #if !MILLEN
  1247. if ((IrpSp->FileObject->DeviceObject == TCPDeviceObject) &&
  1248. (PtrToUlong(IrpSp->FileObject->FsContext2) == TDI_CONNECTION_FILE) &&
  1249. ((TcpContext = IrpSp->FileObject->FsContext) != NULL) &&
  1250. ((CurrentConn = GetConnFromConnID(
  1251. PtrToUlong(TcpContext->Handle.ConnectionContext), &ConnHandle)) != NULL) &&
  1252. (CurrentConn->tc_context == ConnContext) &&
  1253. !(CurrentConn->tc_flags & CONN_INVALID)) {
  1254. // Found the Conn structure!!
  1255. // Don't have to loop below.
  1256. CTEStructAssert(CurrentConn, tc);
  1257. AcceptTCB->tcb_refcnt = 0;
  1258. REFERENCE_TCB(AcceptTCB);
  1259. Status = InitTCBFromConn(CurrentConn, AcceptTCB,
  1260. AcceptRequest->RequestConnectionInformation,
  1261. TRUE
  1262. );
  1263. if (Status == TDI_SUCCESS) {
  1264. FoundConn = TRUE;
  1265. AcceptTCB->tcb_state = TCB_SYN_RCVD;
  1266. AcceptTCB->tcb_conn = CurrentConn;
  1267. AcceptTCB->tcb_connid = CurrentConn->tc_connid;
  1268. CurrentConn->tc_tcb = AcceptTCB;
  1269. CurrentConn->tc_refcnt++;
  1270. GetRandomISN(&AcceptTCB->tcb_sendnext);
  1271. // Move him from the idle q to the active
  1272. // queue.
  1273. CTEGetLockAtDPC(&ListenAO->ao_lock, &Handle);
  1274. REMOVEQ(&CurrentConn->tc_q);
  1275. PUSHQ(&ListenAO->ao_activeq, &CurrentConn->tc_q);
  1276. } else {
  1277. CTEFreeLockFromDPC(&((CurrentConn->tc_ConnBlock)->cb_lock), ConnHandle);
  1278. }
  1279. } else {
  1280. #endif // !MILLEN
  1281. if (CurrentConn) {
  1282. CTEFreeLockFromDPC(&((CurrentConn->tc_ConnBlock)->cb_lock), ConnHandle);
  1283. }
  1284. //slow path
  1285. //ao is referenced
  1286. Temp = QHEAD(&ListenAO->ao_idleq);;
  1287. CurrentTCB = NULL;
  1288. Status = TDI_INVALID_CONNECTION;
  1289. while (Temp != QEND(&ListenAO->ao_idleq)) {
  1290. CurrentConn = QSTRUCT(TCPConn, Temp, tc_q);
  1291. CTEGetLockAtDPC(&(CurrentConn->tc_ConnBlock->cb_lock), &ConnHandle);
  1292. #if DBG
  1293. CurrentConn->tc_ConnBlock->line = (uint) __LINE__;
  1294. CurrentConn->tc_ConnBlock->module = (uchar *) __FILE__;
  1295. #endif
  1296. CTEStructAssert(CurrentConn, tc);
  1297. if ((CurrentConn->tc_context == ConnContext) &&
  1298. !(CurrentConn->tc_flags & CONN_INVALID)) {
  1299. // We think we have a match. The connection
  1300. // shouldn't have a TCB associated with it. If it
  1301. // does, it's an error. InitTCBFromConn will
  1302. // handle all this.
  1303. AcceptTCB->tcb_refcnt = 0;
  1304. REFERENCE_TCB(AcceptTCB);
  1305. Status = InitTCBFromConn(CurrentConn, AcceptTCB,
  1306. #if !MILLEN
  1307. AcceptRequest->RequestConnectionInformation,
  1308. #else // !MILLEN
  1309. EventInfo.cei_acceptinfo,
  1310. #endif // MILLEN
  1311. TRUE);
  1312. if (Status == TDI_SUCCESS) {
  1313. FoundConn = TRUE;
  1314. AcceptTCB->tcb_state = TCB_SYN_RCVD;
  1315. AcceptTCB->tcb_conn = CurrentConn;
  1316. AcceptTCB->tcb_connid = CurrentConn->tc_connid;
  1317. CurrentConn->tc_tcb = AcceptTCB;
  1318. CurrentConn->tc_refcnt++;
  1319. #if MILLEN
  1320. //just use tcb_sendnext to hold hash value
  1321. //for randisn
  1322. AcceptTCB->tcb_sendnext = TCB_HASH(AcceptTCB->tcb_daddr, AcceptTCB->tcb_dport, AcceptTCB->tcb_saddr, AcceptTCB->tcb_sport);
  1323. #endif
  1324. GetRandomISN(&AcceptTCB->tcb_sendnext);
  1325. CTEGetLockAtDPC(&ListenAO->ao_lock, &Handle);
  1326. // Move him from the idle q to the active
  1327. // queue.
  1328. REMOVEQ(&CurrentConn->tc_q);
  1329. ENQUEUE(&ListenAO->ao_activeq, &CurrentConn->tc_q);
  1330. } else {
  1331. CTEFreeLockFromDPC(&(CurrentConn->tc_ConnBlock->cb_lock), ConnHandle);
  1332. }
  1333. // In any case, we're done now.
  1334. break;
  1335. }
  1336. CTEFreeLockFromDPC(&(CurrentConn->tc_ConnBlock->cb_lock), ConnHandle);
  1337. Temp = QNEXT(Temp);
  1338. }
  1339. #if !MILLEN
  1340. }
  1341. #endif // !MILLEN
  1342. if (!FoundConn) {
  1343. // Didn't find a match, or had an error. Status
  1344. // code is set.
  1345. // Complete the ConnReq and free the resources.
  1346. CTEGetLockAtDPC(&AcceptTCB->tcb_lock, &Handle);
  1347. CompleteConnReq(AcceptTCB, OptInfo, Status);
  1348. CTEFreeLockFromDPC(&AcceptTCB->tcb_lock, Handle);
  1349. FreeTCB(AcceptTCB);
  1350. AcceptTCB = NULL;
  1351. }
  1352. else {
  1353. if (SynAttackProtect) {
  1354. SynAttChk(ListenAO, NULL);
  1355. }
  1356. }
  1357. if (FoundConn) {
  1358. LOCKED_DELAY_DEREF_AO(ListenAO);
  1359. CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
  1360. CTEFreeLockFromDPC(&(CurrentConn->tc_ConnBlock->cb_lock), ConnHandle);
  1361. } else {
  1362. DELAY_DEREF_AO(ListenAO);
  1363. }
  1364. return AcceptTCB;
  1365. } //tdi_more_processing
  1366. // event handler call failed for some reason
  1367. // kick in the synattack code
  1368. if (SynAttackProtect) {
  1369. CTELockHandle Handle;
  1370. //
  1371. // If we need to Trigger to a lower retry count
  1372. //
  1373. if (!ListenAO->ConnLimitReached) {
  1374. ListenAO->ConnLimitReached = TRUE;
  1375. CTEGetLockAtDPC(&SynAttLock.Lock, &Handle);
  1376. if ((++TCPPortsExhausted >= TCPMaxPortsExhausted) &&
  1377. (MaxConnectResponseRexmitCountTmp == MAX_CONNECT_RESPONSE_REXMIT_CNT)) {
  1378. MaxConnectResponseRexmitCountTmp = ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT;
  1379. }
  1380. CTEFreeLockFromDPC(&SynAttLock.Lock, Handle);
  1381. }
  1382. }
  1383. #if !MILLEN
  1384. AcceptIrpCancelled:
  1385. #endif // !MILLEN
  1386. // The event handler didn't take it. Dereference it, free
  1387. // the resources, and return NULL.
  1388. FreeConnReq(ConnReq);
  1389. FreeTCB(AcceptTCB);
  1390. DELAY_DEREF_AO(ListenAO);
  1391. return NULL;
  1392. } else {
  1393. // We couldn't get a needed resources. Free any that we
  1394. // did get, and fall through to the 'return NULL' code.
  1395. DELAY_DEREF_AO(ListenAO);
  1396. if (ConnReq != NULL)
  1397. FreeConnReq(ConnReq);
  1398. if (AcceptTCB != NULL)
  1399. FreeTCB(AcceptTCB);
  1400. }
  1401. } else { //ao_connect != null
  1402. CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
  1403. }
  1404. return NULL;
  1405. }
  1406. // If we get here, the address object wasn't valid.
  1407. CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
  1408. return NULL;
  1409. }
  1410. //* FindMSS - Find the MSS option in a segment.
  1411. //
  1412. // Called when a SYN is received to find the MSS option in a segment. If we
  1413. // don't find one, we assume the worst and return 536.
  1414. //
  1415. // Input: TCPH - TCP header to be searched.
  1416. //
  1417. // Returns: MSS to be used.
  1418. //
  1419. ushort
  1420. FindMSS(TCPHeader UNALIGNED * TCPH)
  1421. {
  1422. uint OptSize;
  1423. uchar *OptPtr;
  1424. OptSize = TCP_HDR_SIZE(TCPH) - sizeof(TCPHeader);
  1425. OptPtr = (uchar *) (TCPH + 1);
  1426. while (OptSize) {
  1427. if (*OptPtr == TCP_OPT_EOL)
  1428. break;
  1429. if (*OptPtr == TCP_OPT_NOP) {
  1430. OptPtr++;
  1431. OptSize--;
  1432. continue;
  1433. }
  1434. if (*OptPtr == TCP_OPT_MSS) {
  1435. if (OptPtr[1] == MSS_OPT_SIZE) {
  1436. ushort TempMss = *(ushort UNALIGNED *) (OptPtr + 2);
  1437. if (TempMss != 0)
  1438. return net_short(TempMss);
  1439. else
  1440. break; // MSS size of 0, use default.
  1441. } else
  1442. break; // Bad option size, use default.
  1443. } else {
  1444. // Unknown option.
  1445. if (OptPtr[1] == 0 || OptPtr[1] > OptSize)
  1446. break; // Bad option length, bail out.
  1447. OptSize -= OptPtr[1];
  1448. OptPtr += OptPtr[1];
  1449. }
  1450. }
  1451. return MAX_REMOTE_MSS;
  1452. }
  1453. // FindMSSAndOptions
  1454. //
  1455. // Called when a SYN is received to find the MSS option in a segment. If we
  1456. // don't find one, we assume the worst and return 536.
  1457. //
  1458. // Also, parses incoming header for Window scaling, timestamp and SACK
  1459. // options. Note that we will enable these options for the connection
  1460. // only if they are enabled on this host.
  1461. //
  1462. //
  1463. // Input: TCPH - TCP header to be searched.
  1464. //
  1465. // Returns: MSS to be used.
  1466. //
  1467. ushort
  1468. FindMSSAndOptions(TCPHeader UNALIGNED * TCPH, TCB * SynTCB, BOOLEAN syn)
  1469. {
  1470. uint OptSize;
  1471. uchar *OptPtr;
  1472. ushort TempMss = 0;
  1473. BOOLEAN WinScale = FALSE;
  1474. ushort SYN = 0;
  1475. ushort tcboptions;
  1476. uint tcbdefwin;
  1477. short rcvwinscale=0,sndwinscale=0;
  1478. int tsupdate=0,tsrecent=0;
  1479. OptSize = TCP_HDR_SIZE(TCPH) - sizeof(TCPHeader);
  1480. OptPtr = (uchar *) (TCPH + 1);
  1481. SYN = (TCPH->tcp_flags & TCP_FLAG_SYN);
  1482. if (syn) {
  1483. tcboptions = ((SYNTCB *)SynTCB)->syntcb_tcpopts;
  1484. } else {
  1485. tcboptions = SynTCB->tcb_tcpopts;
  1486. }
  1487. while ((int)OptSize > 0) {
  1488. if (*OptPtr == TCP_OPT_EOL)
  1489. break;
  1490. if (*OptPtr == TCP_OPT_NOP) {
  1491. OptPtr++;
  1492. OptSize--;
  1493. continue;
  1494. }
  1495. if ((*OptPtr == TCP_OPT_MSS) && (OptSize >= MSS_OPT_SIZE)) {
  1496. if (SYN && (OptPtr[1] == MSS_OPT_SIZE)) {
  1497. TempMss = 0;
  1498. TempMss = *(ushort UNALIGNED *) (OptPtr + 2);
  1499. if (TempMss != 0) {
  1500. TempMss = net_short(TempMss);
  1501. }
  1502. }
  1503. OptSize -= MSS_OPT_SIZE;
  1504. OptPtr += MSS_OPT_SIZE;
  1505. } else if ((*OptPtr == TCP_OPT_WS) && (OptSize >= WS_OPT_SIZE)) {
  1506. if (SYN && (OptPtr[1] == WS_OPT_SIZE)
  1507. && (TcpHostOpts & TCP_FLAG_WS)) {
  1508. sndwinscale = (uint)OptPtr[2];
  1509. IF_TCPDBG(TCP_DEBUG_1323) {
  1510. TCPTRACE(("WS option %x", sndwinscale));
  1511. }
  1512. tcboptions |= TCP_FLAG_WS;
  1513. WinScale = TRUE;
  1514. }
  1515. OptSize -= WS_OPT_SIZE;
  1516. OptPtr += WS_OPT_SIZE;
  1517. } else if ((*OptPtr == TCP_OPT_TS) && (OptSize >= TS_OPT_SIZE)) {
  1518. // Time stamp options
  1519. if ((OptPtr[1] == TS_OPT_SIZE) && (TcpHostOpts & TCP_FLAG_TS)) {
  1520. int tsval = *(int UNALIGNED *)&OptPtr[2];
  1521. tcboptions |= TCP_FLAG_TS;
  1522. if (SYN) {
  1523. tsupdate = TCPTime;
  1524. tsrecent = net_long(tsval);
  1525. }
  1526. IF_TCPDBG(TCP_DEBUG_1323) {
  1527. TCPTRACE(("TS option %x", SynTCB));
  1528. }
  1529. }
  1530. OptSize -= TS_OPT_SIZE;
  1531. OptPtr += TS_OPT_SIZE;
  1532. } else if ((*OptPtr == TCP_SACK_PERMITTED_OPT)
  1533. && (OptSize >= SACK_PERMITTED_OPT_SIZE)) {
  1534. // SACK OPtions
  1535. if ((OptPtr[1] == SACK_PERMITTED_OPT_SIZE)
  1536. && (TcpHostOpts & TCP_FLAG_SACK)) {
  1537. tcboptions |= TCP_FLAG_SACK;
  1538. IF_TCPDBG(TCP_DEBUG_SACK) {
  1539. TCPTRACE(("Rcvd SACK_OPT %x\n", SynTCB));
  1540. }
  1541. }
  1542. OptSize -= SACK_PERMITTED_OPT_SIZE;
  1543. OptPtr += SACK_PERMITTED_OPT_SIZE;
  1544. } else { // Unknown option.
  1545. if (OptSize > 1) {
  1546. if (OptPtr[1] == 0 || OptPtr[1] > OptSize) {
  1547. break; // Bad option length, bail out.
  1548. }
  1549. OptSize -= OptPtr[1];
  1550. OptPtr += OptPtr[1];
  1551. } else {
  1552. break;
  1553. }
  1554. }
  1555. }
  1556. if (WinScale) {
  1557. if (sndwinscale > TCP_MAX_WINSHIFT) {
  1558. sndwinscale = TCP_MAX_WINSHIFT;
  1559. }
  1560. }
  1561. if (syn) {
  1562. ((SYNTCB *)SynTCB)->syntcb_tcpopts = tcboptions;
  1563. ((SYNTCB *)SynTCB)->syntcb_tsupdatetime = tsupdate;
  1564. ((SYNTCB *)SynTCB)->syntcb_tsrecent = tsrecent;
  1565. if (!WinScale && (DefaultRcvWin > 0xFFFF)) {
  1566. ((SYNTCB *)SynTCB)->syntcb_defaultwin = 0xFFFF;
  1567. ((SYNTCB *)SynTCB)->syntcb_rcvwin = 0xFFFF;
  1568. ((SYNTCB *)SynTCB)->syntcb_rcvwinscale = 0;
  1569. }
  1570. ((SYNTCB *)SynTCB)->syntcb_sndwinscale = sndwinscale;
  1571. } else {
  1572. SynTCB->tcb_tcpopts = tcboptions;
  1573. SynTCB->tcb_tsupdatetime = tsupdate;
  1574. SynTCB->tcb_tsrecent = tsrecent;
  1575. if (!WinScale && (DefaultRcvWin > 0xFFFF)) {
  1576. SynTCB->tcb_defaultwin = 0xFFFF;
  1577. SynTCB->tcb_rcvwin = 0xFFFF;
  1578. SynTCB->tcb_rcvwinscale = 0;
  1579. }
  1580. SynTCB->tcb_sndwinscale = sndwinscale;
  1581. }
  1582. if (TempMss) {
  1583. return (TempMss);
  1584. } else {
  1585. return MAX_REMOTE_MSS;
  1586. }
  1587. }
  1588. //* ACKAndDrop - Acknowledge a segment, and drop it.
  1589. //
  1590. // Called from within the receive code when we need to drop a segment that's
  1591. // outside the receive window.
  1592. //
  1593. // Input: RI - Receive info for incoming segment.
  1594. // RcvTCB - TCB for incoming segment.
  1595. //
  1596. // Returns: Nothing.
  1597. //
  1598. void
  1599. ACKAndDrop(TCPRcvInfo * RI, TCB * RcvTCB)
  1600. {
  1601. CTELockHandle Handle;
  1602. Handle = DISPATCH_LEVEL;
  1603. if (!(RI->tri_flags & TCP_FLAG_RST)) {
  1604. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, Handle);
  1605. SendACK(RcvTCB);
  1606. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &Handle);
  1607. }
  1608. DerefTCB(RcvTCB, Handle);
  1609. }
  1610. //* ACKData - Acknowledge data.
  1611. //
  1612. // Called from the receive handler to acknowledge data. We're given the
  1613. // TCB and the new value of senduna. We walk down the send q. pulling
  1614. // off sends and putting them on the complete q until we hit the end
  1615. // or we acknowledge the specified number of bytes of data.
  1616. //
  1617. // NOTE: We manipulate the send refcnt and acked flag without taking a lock.
  1618. // This is OK in the VxD version where locks don't mean anything anyway, but
  1619. // in the port to NT we'll need to add locking. The lock will have to be
  1620. // taken in the transmit complete routine. We can't use a lock in the TCB,
  1621. // since the TCB could go away before the transmit complete happens, and a lock
  1622. // in the TSR would be overkill, so it's probably best to use a global lock
  1623. // for this. If that causes too much contention, we could use a set of locks
  1624. // and pass a pointer to the appropriate lock back as part of the transmit
  1625. // confirm context. This lock pointer would also need to be stored in the
  1626. // TCB.
  1627. //
  1628. // Input: ACKTcb - TCB from which to pull data.
  1629. // SendUNA - New value of send una.
  1630. // SendQ - Queue to be filled with ACK'd requests.
  1631. //
  1632. // Returns: Nothing.
  1633. //
  1634. void
  1635. ACKData(TCB * ACKTcb, SeqNum SendUNA, Queue* SendQ)
  1636. {
  1637. Queue *End, *Current; // End and current elements.
  1638. Queue *TempQ, *EndQ;
  1639. Queue *LastCmplt; // Last one we completed.
  1640. TCPSendReq *CurrentTSR; // Current send req we're
  1641. // looking at.
  1642. PNDIS_BUFFER CurrentBuffer; // Current NDIS_BUFFER.
  1643. uint Updated = FALSE;
  1644. uint BufLength;
  1645. int Amount, OrigAmount;
  1646. long Result;
  1647. CTELockHandle Handle;
  1648. uint Temp;
  1649. Queue *DummytmpQ;
  1650. #if TRACE_EVENT
  1651. PTDI_DATA_REQUEST_NOTIFY_ROUTINE CPCallBack;
  1652. WMIData WMIInfo;
  1653. #endif
  1654. CTEStructAssert(ACKTcb, tcb);
  1655. CheckTCBSends(ACKTcb);
  1656. Amount = SendUNA - ACKTcb->tcb_senduna;
  1657. ASSERT(Amount > 0);
  1658. // if the receiver is acking something for which we have
  1659. // a sack entry, remove it.
  1660. if (ACKTcb->tcb_SackRcvd) {
  1661. SackListEntry *Prev, *Current;
  1662. Prev = STRUCT_OF(SackListEntry, &ACKTcb->tcb_SackRcvd, next);
  1663. Current = ACKTcb->tcb_SackRcvd;
  1664. // Scan the list for old sack entries and purge them
  1665. while ((Current != NULL) && SEQ_GT(SendUNA, Current->begin)) {
  1666. Prev->next = Current->next;
  1667. IF_TCPDBG(TCP_DEBUG_SACK) {
  1668. TCPTRACE(("ACKData:Purging old entries %x %d %d\n", Current, Current->begin, Current->end));
  1669. }
  1670. CTEFreeMem(Current);
  1671. Current = Prev->next;
  1672. }
  1673. }
  1674. // Do a quick check to see if this acks everything that we have. If it does,
  1675. // handle it right away. We can only do this in the ESTABLISHED state,
  1676. // because we blindly update sendnext, and that can only work if we
  1677. // haven't sent a FIN.
  1678. if ((Amount == (int)ACKTcb->tcb_unacked) && ACKTcb->tcb_state == TCB_ESTAB) {
  1679. // Everything is acked.
  1680. ASSERT(!EMPTYQ(&ACKTcb->tcb_sendq));
  1681. TempQ = ACKTcb->tcb_sendq.q_next;
  1682. INITQ(&ACKTcb->tcb_sendq);
  1683. ACKTcb->tcb_sendnext = SendUNA;
  1684. ACKTcb->tcb_senduna = SendUNA;
  1685. ASSERT(ACKTcb->tcb_sendnext == ACKTcb->tcb_sendmax);
  1686. ACKTcb->tcb_cursend = NULL;
  1687. ACKTcb->tcb_sendbuf = NULL;
  1688. ACKTcb->tcb_sendofs = 0;
  1689. ACKTcb->tcb_sendsize = 0;
  1690. ACKTcb->tcb_unacked = 0;
  1691. #if ACK_DEBUG
  1692. ACKTcb->tcb_ack_history[ACKTcb->tcb_history_index].sequence = SendUNA;
  1693. ACKTcb->tcb_ack_history[ACKTcb->tcb_history_index].unacked = ACKTcb->tcb_unacked;
  1694. ACKTcb->tcb_history_index++;
  1695. if (ACKTcb->tcb_history_index >= NUM_ACK_HISTORY_ITEMS) {
  1696. ACKTcb->tcb_history_index = 0;
  1697. }
  1698. #endif // ACK_DEBUG
  1699. // Now walk down the list of send requests. If the reference count
  1700. // has gone to 0, put it on the send complete queue.
  1701. EndQ = &ACKTcb->tcb_sendq;
  1702. do {
  1703. CurrentTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, TempQ, tr_q), tsr_req);
  1704. CTEStructAssert(CurrentTSR, tsr);
  1705. TempQ = CurrentTSR->tsr_req.tr_q.q_next;
  1706. CurrentTSR->tsr_req.tr_status = TDI_SUCCESS;
  1707. Result = CTEInterlockedDecrementLong(&CurrentTSR->tsr_refcnt);
  1708. ASSERT(Result >= 0);
  1709. #if TRACE_EVENT
  1710. CPCallBack = TCPCPHandlerRoutine;
  1711. if (CPCallBack != NULL) {
  1712. ulong GroupType;
  1713. WMIInfo.wmi_destaddr = ACKTcb->tcb_daddr;
  1714. WMIInfo.wmi_destport = ACKTcb->tcb_dport;
  1715. WMIInfo.wmi_srcaddr = ACKTcb->tcb_saddr;
  1716. WMIInfo.wmi_srcport = ACKTcb->tcb_sport;
  1717. WMIInfo.wmi_size = CurrentTSR->tsr_size;
  1718. WMIInfo.wmi_context = ACKTcb->tcb_cpcontext;
  1719. GroupType = EVENT_TRACE_GROUP_TCPIP + EVENT_TRACE_TYPE_SEND;
  1720. (*CPCallBack)(GroupType, (PVOID)&WMIInfo, sizeof(WMIInfo),
  1721. NULL);
  1722. }
  1723. #endif
  1724. if ((Result <= 0) &&
  1725. !(CurrentTSR->tsr_flags & TSR_FLAG_SEND_AND_DISC)) {
  1726. // No more references are outstanding, the send can be
  1727. // completed.
  1728. // If we've sent directly from this send, NULL out the next
  1729. // pointer for the last buffer in the chain.
  1730. if (CurrentTSR->tsr_lastbuf != NULL) {
  1731. NDIS_BUFFER_LINKAGE(CurrentTSR->tsr_lastbuf) = NULL;
  1732. CurrentTSR->tsr_lastbuf = NULL;
  1733. }
  1734. ACKTcb->tcb_totaltime += (TCPTime - CurrentTSR->tsr_time);
  1735. Temp = ACKTcb->tcb_bcountlow;
  1736. ACKTcb->tcb_bcountlow += CurrentTSR->tsr_size;
  1737. ACKTcb->tcb_bcounthi += (Temp > ACKTcb->tcb_bcountlow ? 1 : 0);
  1738. ENQUEUE(SendQ, &CurrentTSR->tsr_req.tr_q);
  1739. }
  1740. } while (TempQ != EndQ);
  1741. CheckTCBSends(ACKTcb);
  1742. return;
  1743. }
  1744. OrigAmount = Amount;
  1745. End = QEND(&ACKTcb->tcb_sendq);
  1746. Current = QHEAD(&ACKTcb->tcb_sendq);
  1747. LastCmplt = NULL;
  1748. while (Amount > 0 && Current != End) {
  1749. CurrentTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, Current, tr_q),
  1750. tsr_req);
  1751. CTEStructAssert(CurrentTSR, tsr);
  1752. if (Amount >= (int)CurrentTSR->tsr_unasize) {
  1753. // This is completely acked. Just advance to the next one.
  1754. Amount -= CurrentTSR->tsr_unasize;
  1755. LastCmplt = Current;
  1756. Current = QNEXT(Current);
  1757. continue;
  1758. }
  1759. // This one is only partially acked. Update his offset and NDIS buffer
  1760. // pointer, and break out. We know that Amount is < the unacked size
  1761. // in this buffer, we we can walk the NDIS buffer chain without fear
  1762. // of falling off the end.
  1763. CurrentBuffer = CurrentTSR->tsr_buffer;
  1764. ASSERT(CurrentBuffer != NULL);
  1765. ASSERT(Amount < (int)CurrentTSR->tsr_unasize);
  1766. CurrentTSR->tsr_unasize -= Amount;
  1767. BufLength = NdisBufferLength(CurrentBuffer) - CurrentTSR->tsr_offset;
  1768. if (Amount >= (int)BufLength) {
  1769. do {
  1770. Amount -= BufLength;
  1771. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  1772. ASSERT(CurrentBuffer != NULL);
  1773. BufLength = NdisBufferLength(CurrentBuffer);
  1774. } while (Amount >= (int)BufLength);
  1775. CurrentTSR->tsr_offset = Amount;
  1776. CurrentTSR->tsr_buffer = CurrentBuffer;
  1777. } else
  1778. CurrentTSR->tsr_offset += Amount;
  1779. Amount = 0;
  1780. break;
  1781. }
  1782. // We should always be able to remove at least Amount bytes, except in
  1783. // the case where a FIN has been sent. In that case we should be off
  1784. // by exactly one. In the debug builds we'll check this.
  1785. ASSERT(0 == Amount || ((ACKTcb->tcb_flags & FIN_SENT) && (1 == Amount)));
  1786. if (SEQ_GT(SendUNA, ACKTcb->tcb_sendnext)) {
  1787. if (Current != End) {
  1788. // Need to reevaluate CurrentTSR, in case we bailed out of the
  1789. // above loop after updating Current but before updating
  1790. // CurrentTSR.
  1791. CurrentTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, Current, tr_q),
  1792. tsr_req);
  1793. CTEStructAssert(CurrentTSR, tsr);
  1794. ACKTcb->tcb_cursend = CurrentTSR;
  1795. ACKTcb->tcb_sendbuf = CurrentTSR->tsr_buffer;
  1796. ACKTcb->tcb_sendofs = CurrentTSR->tsr_offset;
  1797. ACKTcb->tcb_sendsize = CurrentTSR->tsr_unasize;
  1798. } else {
  1799. ACKTcb->tcb_cursend = NULL;
  1800. ACKTcb->tcb_sendbuf = NULL;
  1801. ACKTcb->tcb_sendofs = 0;
  1802. ACKTcb->tcb_sendsize = 0;
  1803. }
  1804. ACKTcb->tcb_sendnext = SendUNA;
  1805. }
  1806. // Now update tcb_unacked with the amount we tried to ack minus the
  1807. // amount we didn't ack (Amount should be 0 or 1 here).
  1808. ASSERT(Amount == 0 || Amount == 1);
  1809. if (ACKTcb->tcb_unacked) {
  1810. ASSERT(ACKTcb->tcb_unacked >= (uint)OrigAmount - Amount);
  1811. ACKTcb->tcb_unacked -= OrigAmount - Amount;
  1812. }
  1813. #if ACK_DEBUG
  1814. ACKTcb->tcb_ack_history[ACKTcb->tcb_history_index].sequence = SendUNA;
  1815. ACKTcb->tcb_ack_history[ACKTcb->tcb_history_index].unacked = ACKTcb->tcb_unacked;
  1816. ACKTcb->tcb_history_index++;
  1817. if (ACKTcb->tcb_history_index >= NUM_ACK_HISTORY_ITEMS) {
  1818. ACKTcb->tcb_history_index = 0;
  1819. }
  1820. #endif // ACK_DEBUG
  1821. ASSERT(*(int *)&ACKTcb->tcb_unacked >= 0);
  1822. ACKTcb->tcb_senduna = SendUNA;
  1823. // If we've acked any here, LastCmplt will be non-null, and Current will
  1824. // point to the send that should be at the start of the queue. Splice
  1825. // out the completed ones and put them on the end of the send completed
  1826. // queue, and update the TCB send q.
  1827. if (LastCmplt != NULL) {
  1828. Queue *FirstCmplt;
  1829. TCPSendReq *FirstTSR, *EndTSR;
  1830. ASSERT(!EMPTYQ(&ACKTcb->tcb_sendq));
  1831. FirstCmplt = QHEAD(&ACKTcb->tcb_sendq);
  1832. // If we've acked everything, just reinit the queue.
  1833. if (Current == End) {
  1834. INITQ(&ACKTcb->tcb_sendq);
  1835. } else {
  1836. // There's still something on the queue. Just update it.
  1837. ACKTcb->tcb_sendq.q_next = Current;
  1838. Current->q_prev = &ACKTcb->tcb_sendq;
  1839. }
  1840. CheckTCBSends(ACKTcb);
  1841. // Now walk down the lists of things acked. If the refcnt on the send
  1842. // is 0, go ahead and put him on the send complete Q. Otherwise set
  1843. // the ACKed bit in the send, and he'll be completed when the count
  1844. // goes to 0 in the transmit confirm.
  1845. //
  1846. // Note that we haven't done any locking here. This will probably
  1847. // need to change in the port to NT.
  1848. // Set FirstTSR to the first TSR we'll complete, and EndTSR to be
  1849. // the first TSR that isn't completed.
  1850. FirstTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, FirstCmplt, tr_q), tsr_req);
  1851. EndTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, Current, tr_q), tsr_req);
  1852. CTEStructAssert(FirstTSR, tsr);
  1853. ASSERT(FirstTSR != EndTSR);
  1854. // Now walk the list of ACKed TSRs. If we can complete one, put him
  1855. // on the complete queue.
  1856. while (FirstTSR != EndTSR) {
  1857. TempQ = QNEXT(&FirstTSR->tsr_req.tr_q);
  1858. CTEStructAssert(FirstTSR, tsr);
  1859. FirstTSR->tsr_req.tr_status = TDI_SUCCESS;
  1860. // The tsr_lastbuf->Next field is zapped to 0 when the tsr_refcnt
  1861. // goes to 0, so we don't need to do it here.
  1862. #if TRACE_EVENT
  1863. CPCallBack = TCPCPHandlerRoutine;
  1864. if (CPCallBack != NULL) {
  1865. ulong GroupType;
  1866. WMIInfo.wmi_destaddr = ACKTcb->tcb_daddr;
  1867. WMIInfo.wmi_destport = ACKTcb->tcb_dport;
  1868. WMIInfo.wmi_srcaddr = ACKTcb->tcb_saddr;
  1869. WMIInfo.wmi_srcport = ACKTcb->tcb_sport;
  1870. WMIInfo.wmi_size = FirstTSR->tsr_size;
  1871. WMIInfo.wmi_context = ACKTcb->tcb_cpcontext;
  1872. GroupType = EVENT_TRACE_GROUP_TCPIP + EVENT_TRACE_TYPE_SEND;
  1873. (*CPCallBack)(GroupType, (PVOID)&WMIInfo, sizeof(WMIInfo),
  1874. NULL);
  1875. }
  1876. #endif
  1877. // Decrement the reference put on the send buffer when it was
  1878. // initialized indicating the send has been acknowledged.
  1879. if (!(FirstTSR->tsr_flags & TSR_FLAG_SEND_AND_DISC)) {
  1880. Result = CTEInterlockedDecrementLong(&FirstTSR->tsr_refcnt);
  1881. ASSERT(Result >= 0);
  1882. if (Result <= 0) {
  1883. // No more references are outstanding, the send can be
  1884. // completed.
  1885. // If we've sent directly from this send, NULL out the next
  1886. // pointer for the last buffer in the chain.
  1887. if (FirstTSR->tsr_lastbuf != NULL) {
  1888. NDIS_BUFFER_LINKAGE(FirstTSR->tsr_lastbuf) = NULL;
  1889. FirstTSR->tsr_lastbuf = NULL;
  1890. }
  1891. ACKTcb->tcb_totaltime += (TCPTime - FirstTSR->tsr_time);
  1892. Temp = ACKTcb->tcb_bcountlow;
  1893. ACKTcb->tcb_bcountlow += FirstTSR->tsr_size;
  1894. ACKTcb->tcb_bcounthi +=
  1895. (Temp > ACKTcb->tcb_bcountlow ? 1 : 0);
  1896. ENQUEUE(SendQ, &FirstTSR->tsr_req.tr_q);
  1897. }
  1898. } else {
  1899. if (EMPTYQ(&ACKTcb->tcb_sendq) &&
  1900. (FirstTSR->tsr_flags & TSR_FLAG_SEND_AND_DISC)) {
  1901. ENQUEUE(&ACKTcb->tcb_sendq, &FirstTSR->tsr_req.tr_q);
  1902. ACKTcb->tcb_fastchk |= TCP_FLAG_REQUEUE_FROM_SEND_AND_DISC;
  1903. //this will be deleted when CloseTCB will be called on this.
  1904. CheckTCBSends(ACKTcb);
  1905. break;
  1906. }
  1907. }
  1908. FirstTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, TempQ, tr_q), tsr_req);
  1909. }
  1910. }
  1911. }
  1912. //* TrimRcvBuf - Trim the front edge of a receive buffer.
  1913. //
  1914. // A utility routine to trim the front of a receive buffer. We take in a
  1915. // a count (which may be 0) and adjust the pointer in the first buffer in
  1916. // the chain by that much. If there isn't that much in the first buffer,
  1917. // we move onto the next one. If we run out of buffers we'll return a pointer
  1918. // to the last buffer in the chain, with a size of 0. It's the caller's
  1919. // responsibility to catch this.
  1920. //
  1921. // Input: RcvBuf - Buffer to be trimmed.
  1922. // Count - Amount to be trimmed.
  1923. //
  1924. // Returns: A pointer to the new start, or NULL.
  1925. //
  1926. IPRcvBuf *
  1927. TrimRcvBuf(IPRcvBuf * RcvBuf, uint Count)
  1928. {
  1929. uint TrimThisTime;
  1930. ASSERT(RcvBuf != NULL);
  1931. while (Count) {
  1932. ASSERT(RcvBuf != NULL);
  1933. TrimThisTime = MIN(Count, RcvBuf->ipr_size);
  1934. Count -= TrimThisTime;
  1935. RcvBuf->ipr_buffer += TrimThisTime;
  1936. if ((RcvBuf->ipr_size -= TrimThisTime) == 0) {
  1937. if (RcvBuf->ipr_next != NULL)
  1938. RcvBuf = RcvBuf->ipr_next;
  1939. else {
  1940. // Ran out of buffers. Just return this one.
  1941. break;
  1942. }
  1943. }
  1944. }
  1945. return RcvBuf;
  1946. }
  1947. //* FreeRBChain - Free an RB chain.
  1948. //
  1949. // Called to free a chain of RBs. If we're the owner of each RB, we'll
  1950. // free it.
  1951. //
  1952. // Input: RBChain - RBChain to be freed.
  1953. //
  1954. // Returns: Nothing.
  1955. //
  1956. void
  1957. FreeRBChain(IPRcvBuf * RBChain)
  1958. {
  1959. while (RBChain != NULL) {
  1960. if (RBChain->ipr_owner == IPR_OWNER_TCP) {
  1961. IPRcvBuf *Temp;
  1962. Temp = RBChain->ipr_next;
  1963. FreeTcpIpr(RBChain);
  1964. RBChain = Temp;
  1965. } else
  1966. RBChain = RBChain->ipr_next;
  1967. }
  1968. }
  1969. IPRcvBuf DummyBuf;
  1970. //* PullFromRAQ - Pull segments from the reassembly queue.
  1971. //
  1972. // Called when we've received frames out of order, and have some segments
  1973. // on the reassembly queue. We'll walk down the reassembly list, segments that
  1974. // are overlapped by the current rcv. next variable. When we get
  1975. // to one that doesn't completely overlap we'll trim it to fit the next
  1976. // rcv. seq. number, and pull it from the queue.
  1977. //
  1978. // Input: RcvTCB - TCB to pull from.
  1979. // RcvInfo - Pointer to TCPRcvInfo structure for current seg.
  1980. // Size - Pointer to size for current segment. We'll update
  1981. // this when we're done.
  1982. //
  1983. // Returns: Nothing.
  1984. //
  1985. IPRcvBuf *
  1986. PullFromRAQ(TCB * RcvTCB, TCPRcvInfo * RcvInfo, uint * Size)
  1987. {
  1988. TCPRAHdr *CurrentTRH; // Current TCP RA Header being examined.
  1989. TCPRAHdr *TempTRH; // Temporary variable.
  1990. SeqNum NextSeq; // Next sequence number we want.
  1991. IPRcvBuf *NewBuf;
  1992. SeqNum NextTRHSeq; // Seq. number immediately after
  1993. // current TRH.
  1994. int Overlap; // Overlap between current TRH and
  1995. // NextSeq.
  1996. CTEStructAssert(RcvTCB, tcb);
  1997. CurrentTRH = RcvTCB->tcb_raq;
  1998. NextSeq = RcvTCB->tcb_rcvnext;
  1999. while (CurrentTRH != NULL) {
  2000. CTEStructAssert(CurrentTRH, trh);
  2001. ASSERT(!(CurrentTRH->trh_flags & TCP_FLAG_SYN));
  2002. if (SEQ_LT(NextSeq, CurrentTRH->trh_start)) {
  2003. #if DBG
  2004. *Size = 0;
  2005. #endif
  2006. //invalidate Sack Block
  2007. if ((RcvTCB->tcb_tcpopts & TCP_FLAG_SACK) && RcvTCB->tcb_SackBlock) {
  2008. int i;
  2009. for (i = 0; i < 3; i++) {
  2010. if ((RcvTCB->tcb_SackBlock->Mask[i] != 0) &&
  2011. (SEQ_LT(RcvTCB->tcb_SackBlock->Block[i].end, CurrentTRH->trh_start))) {
  2012. RcvTCB->tcb_SackBlock->Mask[i] = 0;
  2013. }
  2014. }
  2015. }
  2016. return NULL; // The next TRH starts too far down.
  2017. }
  2018. NextTRHSeq = CurrentTRH->trh_start + CurrentTRH->trh_size +
  2019. ((CurrentTRH->trh_flags & TCP_FLAG_FIN) ? 1 : 0);
  2020. if (SEQ_GTE(NextSeq, NextTRHSeq)) {
  2021. // The current TRH is overlapped completely. Free it and continue.
  2022. FreeRBChain(CurrentTRH->trh_buffer);
  2023. TempTRH = CurrentTRH->trh_next;
  2024. CTEFreeMem(CurrentTRH);
  2025. CurrentTRH = TempTRH;
  2026. RcvTCB->tcb_raq = TempTRH;
  2027. if (TempTRH == NULL) {
  2028. // We've just cleaned off the RAQ. We can go back on the
  2029. // fast path now.
  2030. if (--(RcvTCB->tcb_slowcount) == 0) {
  2031. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  2032. CheckTCBRcv(RcvTCB);
  2033. }
  2034. break;
  2035. }
  2036. } else {
  2037. Overlap = NextSeq - CurrentTRH->trh_start;
  2038. RcvInfo->tri_seq = NextSeq;
  2039. RcvInfo->tri_flags = CurrentTRH->trh_flags;
  2040. RcvInfo->tri_urgent = CurrentTRH->trh_urg;
  2041. if (Overlap != (int)CurrentTRH->trh_size) {
  2042. NewBuf = FreePartialRB(CurrentTRH->trh_buffer, Overlap);
  2043. *Size = CurrentTRH->trh_size - Overlap;
  2044. } else {
  2045. // This completely overlaps the data in this segment, but the
  2046. // sequence number doesn't overlap completely. There must
  2047. // be a FIN in the TRH. If we called FreePartialRB with this
  2048. // we'd end up returning NULL, which is the signal for failure.
  2049. // Instead we'll just return some bogus value that nobody
  2050. // will look at with a size of 0.
  2051. FreeRBChain(CurrentTRH->trh_buffer);
  2052. ASSERT(CurrentTRH->trh_flags & TCP_FLAG_FIN);
  2053. NewBuf = &DummyBuf;
  2054. *Size = 0;
  2055. }
  2056. RcvTCB->tcb_raq = CurrentTRH->trh_next;
  2057. if (RcvTCB->tcb_raq == NULL) {
  2058. // We've just cleaned off the RAQ. We can go back on the
  2059. // fast path now.
  2060. if (--(RcvTCB->tcb_slowcount) == 0) {
  2061. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  2062. CheckTCBRcv(RcvTCB);
  2063. }
  2064. }
  2065. CTEFreeMem(CurrentTRH);
  2066. return NewBuf;
  2067. }
  2068. }
  2069. #if DBG
  2070. *Size = 0;
  2071. #endif
  2072. //invalidate Sack Block
  2073. if (RcvTCB->tcb_tcpopts & TCP_FLAG_SACK && RcvTCB->tcb_SackBlock) {
  2074. RcvTCB->tcb_SackBlock->Mask[0] = 0;
  2075. RcvTCB->tcb_SackBlock->Mask[1] = 0;
  2076. RcvTCB->tcb_SackBlock->Mask[2] = 0;
  2077. RcvTCB->tcb_SackBlock->Mask[3] = 0;
  2078. }
  2079. return NULL;
  2080. }
  2081. //* CreateTRH - Create a TCP reassembly header.
  2082. //
  2083. // This function tries to create a TCP reassembly header. We take as input
  2084. // a pointer to the previous TRH in the chain, the RcvBuffer to put on,
  2085. // etc. and try to create and link in a TRH. The caller must hold the lock
  2086. // on the TCB when this is called.
  2087. //
  2088. // Input: PrevTRH - Pointer to TRH to insert after.
  2089. // RcvBuf - Pointer to IP RcvBuf chain.
  2090. // RcvInfo - Pointer to RcvInfo for this TRH.
  2091. // Size - Size in bytes of data.
  2092. //
  2093. // Returns: TRUE if we created it, FALSE otherwise.
  2094. //
  2095. uint
  2096. CreateTRH(TCPRAHdr * PrevTRH, IPRcvBuf * RcvBuf, TCPRcvInfo * RcvInfo, int Size)
  2097. {
  2098. TCPRAHdr *NewTRH;
  2099. IPRcvBuf *NewRcvBuf;
  2100. ASSERT((Size > 0) || (RcvInfo->tri_flags & TCP_FLAG_FIN));
  2101. NewTRH = CTEAllocMemLow(sizeof(TCPRAHdr), 'SPCT');
  2102. if (NewTRH == NULL) {
  2103. return FALSE;
  2104. }
  2105. #if DBG
  2106. NewTRH->trh_sig = trh_signature;
  2107. #endif
  2108. NewRcvBuf = AllocTcpIpr(Size, 'SPCT');
  2109. if (NewRcvBuf == NULL) {
  2110. CTEFreeMem(NewTRH);
  2111. return FALSE;
  2112. }
  2113. if (Size != 0)
  2114. CopyRcvToBuffer(NewRcvBuf->ipr_buffer, RcvBuf, Size, 0);
  2115. NewTRH->trh_start = RcvInfo->tri_seq;
  2116. NewTRH->trh_flags = RcvInfo->tri_flags;
  2117. NewTRH->trh_size = Size;
  2118. NewTRH->trh_urg = RcvInfo->tri_urgent;
  2119. NewTRH->trh_buffer = NewRcvBuf;
  2120. NewTRH->trh_end = NewRcvBuf;
  2121. NewTRH->trh_next = PrevTRH->trh_next;
  2122. PrevTRH->trh_next = NewTRH;
  2123. return TRUE;
  2124. }
  2125. // SendSackInACK - SEnd SACK block in acknowledgement
  2126. //
  2127. // Called if incoming data is in the window but left edge
  2128. // is not advanced because incoming seq > rcvnext.
  2129. // This routine scans the queued up data, constructs SACK block
  2130. // points the block in tcb for SendACK.
  2131. //
  2132. // Entry RcvTCB
  2133. // IncomingSeq Seq num of Data coming in
  2134. //
  2135. // Returns Nothing
  2136. void
  2137. SendSackInACK(TCB * RcvTCB, SeqNum IncomingSeq)
  2138. {
  2139. TCPRAHdr *PrevTRH, *CurrentTRH; // Prev. and current TRH
  2140. // pointers.
  2141. SeqNum NextSeq, NextTRHSeq; // Seq. number of first byte
  2142. SACKSendBlock *SackBlock;
  2143. CTELockHandle TableHandle = 0;
  2144. int i, j;
  2145. CTEStructAssert(RcvTCB, tcb);
  2146. // If we have a SACK block use it else create one.
  2147. // Note that we use max of 4 sack blocks
  2148. // Sack block structure:
  2149. // First long word holds index of the
  2150. // 4 sack blocks, starting from 1. zero
  2151. // in index field means no sack block
  2152. //
  2153. // !--------!--------!--------!--------!
  2154. // | 1 | 2 | 3 | 4 |
  2155. // -------------------------------------
  2156. // | |
  2157. // -------------------------------------
  2158. // | |
  2159. // -------------------------------------
  2160. // Allocate a block if it is not already there
  2161. if (RcvTCB->tcb_SackBlock == NULL) {
  2162. SackBlock = CTEAllocMemN((sizeof(SACKSendBlock)), 'sPCT');
  2163. if (SackBlock == NULL) {
  2164. TableHandle = DISPATCH_LEVEL;
  2165. // Resources failure. Just try to send ack
  2166. // and leave the resource handling to some one else
  2167. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
  2168. SendACK(RcvTCB);
  2169. return;
  2170. } else {
  2171. RcvTCB->tcb_SackBlock = SackBlock;
  2172. //Initialize the first entry to indicate that this is the new one
  2173. NdisZeroMemory(SackBlock, sizeof(SACKSendBlock));
  2174. }
  2175. } else
  2176. SackBlock = RcvTCB->tcb_SackBlock;
  2177. IF_TCPDBG(TCP_DEBUG_SACK) {
  2178. TCPTRACE(("SendSackInACK %x %x %d\n", SackBlock, RcvTCB, IncomingSeq));
  2179. }
  2180. PrevTRH = STRUCT_OF(TCPRAHdr, &RcvTCB->tcb_raq, trh_next);
  2181. CurrentTRH = PrevTRH->trh_next;
  2182. while (CurrentTRH != NULL) {
  2183. CTEStructAssert(CurrentTRH, trh);
  2184. ASSERT(!(CurrentTRH->trh_flags & TCP_FLAG_SYN));
  2185. NextTRHSeq = CurrentTRH->trh_start + CurrentTRH->trh_size +
  2186. ((CurrentTRH->trh_flags & TCP_FLAG_FIN) ? 1 : 0);
  2187. if ((SackBlock->Mask[0] != (uchar) - 1) && (SEQ_LTE(CurrentTRH->trh_start, IncomingSeq) &&
  2188. SEQ_LTE(IncomingSeq, NextTRHSeq))) {
  2189. if (SackBlock->Mask[0] == 0) {
  2190. //This is the only sack block
  2191. SackBlock->Block[0].begin = CurrentTRH->trh_start;
  2192. SackBlock->Block[0].end = NextTRHSeq;
  2193. SackBlock->Mask[0] = (uchar) - 1; //Make it valid
  2194. } else {
  2195. if (!((SEQ_LTE(CurrentTRH->trh_start, SackBlock->Block[0].begin) &&
  2196. SEQ_GTE(NextTRHSeq, SackBlock->Block[0].end)) ||
  2197. (SEQ_LTE(CurrentTRH->trh_start, SackBlock->Block[0].begin) &&
  2198. SEQ_LTE(SackBlock->Block[0].begin, NextTRHSeq)) ||
  2199. (SEQ_LTE(CurrentTRH->trh_start, SackBlock->Block[0].end) &&
  2200. SEQ_LTE(SackBlock->Block[0].end, NextTRHSeq)))) {
  2201. // Push the blocks down and fill the top
  2202. for (i = 2; i >= 0; i--) {
  2203. SackBlock->Block[i + 1].begin = SackBlock->Block[i].begin;
  2204. SackBlock->Block[i + 1].end = SackBlock->Block[i].end;
  2205. SackBlock->Mask[i + 1] = -SackBlock->Mask[i];
  2206. }
  2207. }
  2208. SackBlock->Block[0].begin = CurrentTRH->trh_start;
  2209. SackBlock->Block[0].end = NextTRHSeq;
  2210. SackBlock->Mask[0] = (uchar) - 1;
  2211. IF_TCPDBG(TCP_DEBUG_SACK) {
  2212. TCPTRACE(("Sack 0 %d %d \n", CurrentTRH->trh_start, NextTRHSeq));
  2213. }
  2214. }
  2215. } else {
  2216. // process all the sack blocks to see if the currentTRH is
  2217. // valid for those blocks
  2218. for (i = 1; i <= 3; i++) {
  2219. if ((SackBlock->Mask[i] != 0) &&
  2220. (SEQ_LTE(CurrentTRH->trh_start, SackBlock->Block[i].begin) &&
  2221. SEQ_LTE(SackBlock->Block[i].begin, NextTRHSeq))) {
  2222. SackBlock->Block[i].begin = CurrentTRH->trh_start;
  2223. SackBlock->Block[i].end = NextTRHSeq;
  2224. SackBlock->Mask[i] = (uchar) - 1;
  2225. }
  2226. }
  2227. }
  2228. PrevTRH = CurrentTRH;
  2229. CurrentTRH = CurrentTRH->trh_next;
  2230. } //while
  2231. //Check and set the blocks traversed for validity
  2232. for (i = 0; i <= 3; i++) {
  2233. if (SackBlock->Mask[i] != (uchar) - 1) {
  2234. SackBlock->Mask[i] = 0;
  2235. } else {
  2236. SackBlock->Mask[i] = 1;
  2237. IF_TCPDBG(TCP_DEBUG_SACK) {
  2238. TCPTRACE(("Sack in ack %x %d %d\n", i, SackBlock->Block[i].begin, SackBlock->Block[i].end));
  2239. }
  2240. }
  2241. }
  2242. // Make sure that there are no duplicates
  2243. for (i = 0; i < 3; i++) {
  2244. if (SackBlock->Mask[i]) {
  2245. for (j = i + 1; j < 4; j++) {
  2246. if (SackBlock->Mask[j] && (SackBlock->Block[i].begin == SackBlock->Block[j].begin))
  2247. IF_TCPDBG(TCP_DEBUG_SACK) {
  2248. TCPTRACE(("Duplicates!!\n"));
  2249. }
  2250. }
  2251. }
  2252. }
  2253. TableHandle = DISPATCH_LEVEL;
  2254. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
  2255. SendACK(RcvTCB);
  2256. }
  2257. //* PutOnRAQ - Put a segment on the reassembly queue.
  2258. //
  2259. // Called during segment reception to put a segment on the reassembly
  2260. // queue. We try to use as few reassembly headers as possible, so if this
  2261. // segment has some overlap with an existing entry in the queue we'll just
  2262. // update the existing entry. If there is no overlap we'll create a new
  2263. // reassembly header. Combining URGENT data with non-URGENT data is tricky.
  2264. // If we get a segment that has urgent data that overlaps the front of a
  2265. // reassembly header we'll always mark the whole chunk as urgent - the value
  2266. // of the urgent pointer will mark the end of urgent data, so this is OK. If it
  2267. // only overlaps at the end, however, we won't combine, since we would have to
  2268. // mark previously non-urgent data as urgent. We'll trim the
  2269. // front of the incoming segment and create a new reassembly header. Also,
  2270. // if we have non-urgent data that overlaps at the front of a reassembly
  2271. // header containing urgent data we can't combine these two, since again we
  2272. // would mark non-urgent data as urgent.
  2273. // Our search will stop if we find an entry with a FIN.
  2274. // We assume that the TCB lock is held by the caller.
  2275. //
  2276. // Entry: RcvTCB - TCB on which to reassemble.
  2277. // RcvInfo - Pointer to RcvInfo for new segment.
  2278. // RcvBuf - IP RcvBuf chain for this segment.
  2279. // Size - Size in bytes of data in this segment.
  2280. //
  2281. // Returns: TRUE or FALSE if it could not put RcvBuf on Queue
  2282. //
  2283. BOOLEAN
  2284. PutOnRAQ(TCB * RcvTCB, TCPRcvInfo * RcvInfo, IPRcvBuf * RcvBuf, uint Size)
  2285. {
  2286. TCPRAHdr *PrevTRH, *CurrentTRH; // Prev. and current TRH
  2287. // pointers.
  2288. SeqNum NextSeq; // Seq. number of first byte
  2289. // after segment being
  2290. // reassembled.
  2291. SeqNum NextTRHSeq; // Seq. number of first byte
  2292. // after current TRH.
  2293. uint Created;
  2294. CTEStructAssert(RcvTCB, tcb);
  2295. ASSERT(RcvTCB->tcb_rcvnext != RcvInfo->tri_seq);
  2296. ASSERT(!(RcvInfo->tri_flags & TCP_FLAG_SYN));
  2297. NextSeq = RcvInfo->tri_seq + Size +
  2298. ((RcvInfo->tri_flags & TCP_FLAG_FIN) ? 1 : 0);
  2299. PrevTRH = STRUCT_OF(TCPRAHdr, &RcvTCB->tcb_raq, trh_next);
  2300. CurrentTRH = PrevTRH->trh_next;
  2301. // Walk down the reassembly queue, looking for the correct place to
  2302. // insert this, until we hit the end.
  2303. while (CurrentTRH != NULL) {
  2304. CTEStructAssert(CurrentTRH, trh);
  2305. ASSERT(!(CurrentTRH->trh_flags & TCP_FLAG_SYN));
  2306. NextTRHSeq = CurrentTRH->trh_start + CurrentTRH->trh_size +
  2307. ((CurrentTRH->trh_flags & TCP_FLAG_FIN) ? 1 : 0);
  2308. // First, see if it starts beyond the end of the current TRH.
  2309. if (SEQ_LTE(RcvInfo->tri_seq, NextTRHSeq)) {
  2310. // We know the incoming segment doesn't start beyond the end
  2311. // of this TRH, so we'll either create a new TRH in front of
  2312. // this one or we'll merge the new segment onto this TRH.
  2313. // If the end of the current segment is in front of the start
  2314. // of the current TRH, we'll need to create a new TRH. Otherwise
  2315. // we'll merge these two.
  2316. if (SEQ_LT(NextSeq, CurrentTRH->trh_start))
  2317. break;
  2318. else {
  2319. // There's some overlap. If there's actually data in the
  2320. // incoming segment we'll merge it.
  2321. if (Size != 0) {
  2322. int FrontOverlap, BackOverlap;
  2323. IPRcvBuf *NewRB;
  2324. // We need to merge. If there's a FIN on the incoming
  2325. // segment that would fall inside this current TRH, we
  2326. // have a protocol violation from the remote peer. In this
  2327. // case just return, discarding the incoming segment.
  2328. if ((RcvInfo->tri_flags & TCP_FLAG_FIN) &&
  2329. SEQ_LTE(NextSeq, NextTRHSeq))
  2330. return TRUE;
  2331. // We have some overlap. Figure out how much.
  2332. FrontOverlap = CurrentTRH->trh_start - RcvInfo->tri_seq;
  2333. if (FrontOverlap > 0) {
  2334. // Have overlap in front. Allocate an IPRcvBuf to
  2335. // to hold it, and copy it, unless we would have to
  2336. // combine non-urgent with urgent.
  2337. if (!(RcvInfo->tri_flags & TCP_FLAG_URG) &&
  2338. (CurrentTRH->trh_flags & TCP_FLAG_URG)) {
  2339. if (CreateTRH(PrevTRH, RcvBuf, RcvInfo,
  2340. CurrentTRH->trh_start - RcvInfo->tri_seq)) {
  2341. PrevTRH = PrevTRH->trh_next;
  2342. CurrentTRH = PrevTRH->trh_next;
  2343. }
  2344. FrontOverlap = 0;
  2345. } else {
  2346. NewRB = AllocTcpIpr(FrontOverlap, 'BPCT');
  2347. if (NewRB == NULL) {
  2348. return TRUE; // Couldn't get the buffer.
  2349. }
  2350. CopyRcvToBuffer(NewRB->ipr_buffer, RcvBuf,
  2351. FrontOverlap, 0);
  2352. CurrentTRH->trh_size += FrontOverlap;
  2353. NewRB->ipr_next = CurrentTRH->trh_buffer;
  2354. CurrentTRH->trh_buffer = NewRB;
  2355. CurrentTRH->trh_start = RcvInfo->tri_seq;
  2356. }
  2357. }
  2358. // We've updated the starting sequence number of this TRH
  2359. // if we needed to. Now look for back overlap. There can't
  2360. // be any back overlap if the current TRH has a FIN. Also
  2361. // we'll need to check for urgent data if there is back
  2362. // overlap.
  2363. if (!(CurrentTRH->trh_flags & TCP_FLAG_FIN)) {
  2364. BackOverlap = RcvInfo->tri_seq + Size - NextTRHSeq;
  2365. if ((BackOverlap > 0) &&
  2366. (RcvInfo->tri_flags & TCP_FLAG_URG) &&
  2367. !(CurrentTRH->trh_flags & TCP_FLAG_URG) &&
  2368. (FrontOverlap <= 0)) {
  2369. int AmountToTrim;
  2370. // The incoming segment has urgent data and overlaps
  2371. // on the back but not the front, and the current
  2372. // TRH has no urgent data. We can't combine into
  2373. // this TRH, so trim the front of the incoming
  2374. // segment to NextTRHSeq and move to the next
  2375. // TRH.
  2376. AmountToTrim = NextTRHSeq - RcvInfo->tri_seq;
  2377. ASSERT(AmountToTrim >= 0);
  2378. ASSERT(AmountToTrim < (int)Size);
  2379. RcvBuf = FreePartialRB(RcvBuf, (uint) AmountToTrim);
  2380. RcvInfo->tri_seq += AmountToTrim;
  2381. RcvInfo->tri_urgent -= AmountToTrim;
  2382. PrevTRH = CurrentTRH;
  2383. CurrentTRH = PrevTRH->trh_next;
  2384. //Adjust the incoming size too...
  2385. Size -= AmountToTrim;
  2386. continue;
  2387. }
  2388. } else
  2389. BackOverlap = 0;
  2390. // Now if we have back overlap, copy it.
  2391. if (BackOverlap > 0) {
  2392. // We have back overlap. Get a buffer to copy it into.
  2393. // If we can't get one, we won't just return, because
  2394. // we may have updated the front and may need to
  2395. // update the urgent info.
  2396. NewRB = AllocTcpIpr(BackOverlap, 'BPCT');
  2397. if (NewRB != NULL) {
  2398. // Got the buffer.
  2399. CopyRcvToBuffer(NewRB->ipr_buffer, RcvBuf,
  2400. BackOverlap, NextTRHSeq - RcvInfo->tri_seq);
  2401. CurrentTRH->trh_size += BackOverlap;
  2402. NewRB->ipr_next = CurrentTRH->trh_end->ipr_next;
  2403. CurrentTRH->trh_end->ipr_next = NewRB;
  2404. CurrentTRH->trh_end = NewRB;
  2405. // This data segment could also contain a FIN. If
  2406. // so, just set the TRH flag.
  2407. //
  2408. // N.B. If there's another reassembly header after
  2409. // the current one, the data that we're about
  2410. // to put on the current header might already be
  2411. // on that subsequent header which, in that event,
  2412. // will already have the FIN flag set.
  2413. // Check for that case before recording the FIN.
  2414. if ((RcvInfo->tri_flags & TCP_FLAG_FIN) &&
  2415. !CurrentTRH->trh_next) {
  2416. CurrentTRH->trh_flags |= TCP_FLAG_FIN;
  2417. }
  2418. }
  2419. }
  2420. // Everything should be consistent now. If there's an
  2421. // urgent data pointer in the incoming segment, update the
  2422. // one in the TRH now.
  2423. if (RcvInfo->tri_flags & TCP_FLAG_URG) {
  2424. SeqNum UrgSeq;
  2425. // Have an urgent pointer. If the current TRH already
  2426. // has an urgent pointer, see which is bigger. Otherwise
  2427. // just use this one.
  2428. UrgSeq = RcvInfo->tri_seq + RcvInfo->tri_urgent;
  2429. if (CurrentTRH->trh_flags & TCP_FLAG_URG) {
  2430. SeqNum TRHUrgSeq;
  2431. TRHUrgSeq = CurrentTRH->trh_start +
  2432. CurrentTRH->trh_urg;
  2433. if (SEQ_LT(UrgSeq, TRHUrgSeq))
  2434. UrgSeq = TRHUrgSeq;
  2435. } else
  2436. CurrentTRH->trh_flags |= TCP_FLAG_URG;
  2437. CurrentTRH->trh_urg = UrgSeq - CurrentTRH->trh_start;
  2438. }
  2439. } else {
  2440. // We have a 0 length segment. The only interesting thing
  2441. // here is if there's a FIN on the segment. If there is,
  2442. // and the seq. # of the incoming segment is exactly after
  2443. // the current TRH, OR matches the FIN in the current TRH,
  2444. // we note it.
  2445. if (RcvInfo->tri_flags & TCP_FLAG_FIN) {
  2446. if (!(CurrentTRH->trh_flags & TCP_FLAG_FIN)) {
  2447. if (SEQ_EQ(NextTRHSeq, RcvInfo->tri_seq))
  2448. CurrentTRH->trh_flags |= TCP_FLAG_FIN;
  2449. else
  2450. ASSERT(0);
  2451. } else {
  2452. ASSERT(SEQ_EQ((NextTRHSeq - 1), RcvInfo->tri_seq));
  2453. }
  2454. }
  2455. }
  2456. return TRUE;
  2457. }
  2458. } else {
  2459. // Look at the next TRH, unless the current TRH has a FIN. If he
  2460. // has a FIN, we won't save any data beyond that anyway.
  2461. if (CurrentTRH->trh_flags & TCP_FLAG_FIN)
  2462. return TRUE;
  2463. PrevTRH = CurrentTRH;
  2464. CurrentTRH = PrevTRH->trh_next;
  2465. }
  2466. }
  2467. // When we get here, we need to create a new TRH. If we create one and
  2468. // there was previously nothing on the reassembly queue, we'll have to
  2469. // move off the fast receive path.
  2470. CurrentTRH = RcvTCB->tcb_raq;
  2471. Created = CreateTRH(PrevTRH, RcvBuf, RcvInfo, (int)Size);
  2472. if (Created && CurrentTRH == NULL) {
  2473. RcvTCB->tcb_slowcount++;
  2474. RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  2475. CheckTCBRcv(RcvTCB);
  2476. } else if (!Created) {
  2477. // Caller needs to know about this failure
  2478. // to free resources
  2479. return FALSE;
  2480. }
  2481. return TRUE;
  2482. }
  2483. //* HandleFastXmit - Handles fast retransmit
  2484. //
  2485. // Called by TCPRcv to transmit a segment
  2486. // without waiting for re-transmit timeout to fire.
  2487. //
  2488. // Entry: RcvTCB - Connection context for this Rcv
  2489. // RcvInfo - Pointer to rcvd TCP Header information
  2490. //
  2491. // Returns: TRUE if the segment got retransmitted, FALSE
  2492. // in all other cases.
  2493. //
  2494. BOOLEAN
  2495. HandleFastXmit(TCB *RcvTCB, TCPRcvInfo *RcvInfo)
  2496. {
  2497. uint CWin;
  2498. RcvTCB->tcb_dup++;
  2499. if ((RcvTCB->tcb_dup == MaxDupAcks)) {
  2500. //
  2501. // Okay. Time to retransmit the segment the
  2502. // receiver is asking for
  2503. //
  2504. STOP_TCB_TIMER_R(RcvTCB, RXMIT_TIMER);
  2505. RcvTCB->tcb_rtt = 0;
  2506. if (!(RcvTCB->tcb_flags & FLOW_CNTLD)) {
  2507. //
  2508. // Don't let the slow start threshold go
  2509. // below 2 segments
  2510. //
  2511. RcvTCB->tcb_ssthresh = MAX(
  2512. MIN(RcvTCB->tcb_cwin, RcvTCB->tcb_sendwin) / 2,
  2513. (uint) RcvTCB->tcb_mss * 2);
  2514. }
  2515. //
  2516. // Recall the segment in question and send it
  2517. // out. Note that tcb_lock will be
  2518. // dereferenced by the caller
  2519. //
  2520. CWin = RcvTCB->tcb_ssthresh + (MaxDupAcks + 1) * RcvTCB->tcb_mss;
  2521. ResetAndFastSend(RcvTCB, RcvTCB->tcb_senduna, CWin);
  2522. return TRUE;
  2523. } else if ((RcvTCB->tcb_dup > MaxDupAcks)) {
  2524. int SendWin;
  2525. uint AmtOutstanding, AmtUnsent;
  2526. if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo->tri_ack) &&
  2527. (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo->tri_seq) ||
  2528. (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo->tri_seq) &&
  2529. SEQ_LTE(RcvTCB->tcb_sendwl2,RcvInfo->tri_ack)))) {
  2530. RcvTCB->tcb_sendwin = RcvInfo->tri_window;
  2531. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin, RcvInfo->tri_window);
  2532. RcvTCB->tcb_sendwl1 = RcvInfo->tri_seq;
  2533. RcvTCB->tcb_sendwl2 = RcvInfo->tri_ack;
  2534. }
  2535. //
  2536. // Update the cwin to reflect the fact that
  2537. // the dup ack indicates the previous frame
  2538. // was received by the receiver
  2539. //
  2540. RcvTCB->tcb_cwin += RcvTCB->tcb_mss;
  2541. if ((RcvTCB->tcb_cwin + RcvTCB->tcb_mss) < RcvTCB->tcb_sendwin) {
  2542. AmtOutstanding = (uint) (RcvTCB->tcb_sendnext -
  2543. RcvTCB->tcb_senduna);
  2544. AmtUnsent = RcvTCB->tcb_unacked - AmtOutstanding;
  2545. SendWin = (int)(MIN(RcvTCB->tcb_sendwin, RcvTCB->tcb_cwin) -
  2546. AmtOutstanding);
  2547. if (SendWin < RcvTCB->tcb_mss) {
  2548. RcvTCB->tcb_force = 1;
  2549. }
  2550. }
  2551. } else if ((RcvTCB->tcb_dup < MaxDupAcks)) {
  2552. int SendWin;
  2553. uint AmtOutstanding, AmtUnsent;
  2554. if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo->tri_ack) &&
  2555. (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo->tri_seq) ||
  2556. (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo->tri_seq) &&
  2557. SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo->tri_ack)))) {
  2558. RcvTCB->tcb_sendwin = RcvInfo->tri_window;
  2559. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin, RcvInfo->tri_window);
  2560. RcvTCB->tcb_sendwl1 = RcvInfo->tri_seq;
  2561. RcvTCB->tcb_sendwl2 = RcvInfo->tri_ack;
  2562. //
  2563. // Since we've updated the window,
  2564. // remember to send some more.
  2565. //
  2566. }
  2567. //
  2568. // Check if we need to set tcb_force.
  2569. //
  2570. if ((RcvTCB->tcb_cwin + RcvTCB->tcb_mss) < RcvTCB->tcb_sendwin) {
  2571. AmtOutstanding = (uint) (RcvTCB->tcb_sendnext - RcvTCB->tcb_senduna);
  2572. AmtUnsent = RcvTCB->tcb_unacked - AmtOutstanding;
  2573. SendWin = (int)(MIN(RcvTCB->tcb_sendwin, RcvTCB->tcb_cwin) -
  2574. AmtOutstanding);
  2575. if (SendWin < RcvTCB->tcb_mss) {
  2576. RcvTCB->tcb_force = 1;
  2577. }
  2578. }
  2579. } // End of all MaxDupAck cases
  2580. return FALSE;
  2581. }
  2582. //* TCPRcv - Receive a TCP segment.
  2583. //
  2584. // This is the routine called by IP when we need to receive a TCP segment.
  2585. // In general, we follow the RFC 793 event processing section pretty closely,
  2586. // but there is a 'fast path' where we make some quick checks on the incoming
  2587. // segment, and if it matches we deliver it immediately.
  2588. //
  2589. // Entry: IPContext - IPContext identifying physical i/f that
  2590. // received the data.
  2591. // Dest - IPAddr of destionation.
  2592. // Src - IPAddr of source.
  2593. // LocalAddr - Local address of network which caused this to be
  2594. // received.
  2595. // SrcAddr - Address of local interface which received the packet
  2596. // IPH - IP Header.
  2597. // IPHLength - Bytes in IPH.
  2598. // RcvBuf - Pointer to receive buffer chain containing data.
  2599. // Size - Size in bytes of data received.
  2600. // IsBCast - Boolean indicator of whether or not this came in as
  2601. // a bcast.
  2602. // Protocol - Protocol this came in on - should be TCP.
  2603. // OptInfo - Pointer to info structure for received options.
  2604. //
  2605. // Returns: Status of reception. Anything other than IP_SUCCESS will cause
  2606. // IP to send a 'port unreachable' message.
  2607. //
  2608. IP_STATUS
  2609. TCPRcv(void *IPContext, IPAddr Dest, IPAddr Src, IPAddr LocalAddr,
  2610. IPAddr SrcAddr, IPHeader UNALIGNED * IPH, uint IPHLength, IPRcvBuf * RcvBuf,
  2611. uint Size, uchar IsBCast, uchar Protocol, IPOptInfo * OptInfo)
  2612. {
  2613. TCPHeader UNALIGNED *TCPH; // The TCP header.
  2614. TCB *RcvTCB; // TCB on which to receive the packet.
  2615. TWTCB *RcvTWTCB;
  2616. CTELockHandle TableHandle = 0, TCBHandle = 0;
  2617. TCPRcvInfo RcvInfo; // Local swapped copy of rcv info.
  2618. uint DataOffset; // Offset from start of header to data.
  2619. uint Actions;
  2620. uint BytesTaken;
  2621. uint NewSize;
  2622. uint index;
  2623. uint Partition;
  2624. PNDIS_PACKET OffLoadPkt;
  2625. CTELockHandle TWTableHandle;
  2626. int tsval; //Timestamp value
  2627. int tsecr; //Timestamp to be echoed
  2628. BOOLEAN time_stamp = FALSE;
  2629. BOOLEAN ChkSumOk = FALSE;
  2630. SeqNum UpdatedSeqNum=0;
  2631. BOOLEAN UseUpdatedSeqNum=FALSE;
  2632. #if TRACE_EVENT
  2633. PTDI_DATA_REQUEST_NOTIFY_ROUTINE CPCallBack;
  2634. WMIData WMIInfo;
  2635. #endif
  2636. CheckRBList(RcvBuf, Size);
  2637. TCPSIncrementInSegCount();
  2638. // Checksum it, to make sure it's valid.
  2639. TCPH = (TCPHeader *) RcvBuf->ipr_buffer;
  2640. if (!IsBCast) {
  2641. if (RcvBuf->ipr_pClientCnt) {
  2642. PNDIS_PACKET_EXTENSION PktExt;
  2643. NDIS_TCP_IP_CHECKSUM_PACKET_INFO ChksumPktInfo;
  2644. if (RcvBuf->ipr_pMdl) {
  2645. OffLoadPkt = NDIS_GET_ORIGINAL_PACKET((PNDIS_PACKET) RcvBuf->ipr_RcvContext);
  2646. if (!OffLoadPkt) {
  2647. OffLoadPkt = (PNDIS_PACKET) RcvBuf->ipr_RcvContext;
  2648. }
  2649. } else {
  2650. OffLoadPkt = (PNDIS_PACKET) RcvBuf->ipr_pClientCnt;
  2651. }
  2652. PktExt = NDIS_PACKET_EXTENSION_FROM_PACKET(OffLoadPkt);
  2653. ChksumPktInfo.Value = PtrToUlong(PktExt->NdisPacketInfo[TcpIpChecksumPacketInfo]);
  2654. if (ChksumPktInfo.Receive.NdisPacketTcpChecksumSucceeded) {
  2655. ChkSumOk = TRUE;
  2656. #if DBG
  2657. DbgTcpHwChkSumOk++;
  2658. #endif
  2659. } else if (ChksumPktInfo.Receive.NdisPacketTcpChecksumFailed) {
  2660. #if DBG
  2661. DbgTcpHwChkSumErr++;
  2662. #endif
  2663. TStats.ts_inerrs++;
  2664. return IP_SUCCESS;
  2665. }
  2666. }
  2667. if (!ChkSumOk) {
  2668. if (XsumRcvBuf(PHXSUM(Src, Dest, PROTOCOL_TCP, Size), RcvBuf) == 0xffff){
  2669. ChkSumOk = TRUE;
  2670. }
  2671. } else {
  2672. // Pretch the rcv buffer in to cache
  2673. // to improve copy performance
  2674. #if !MILLEN
  2675. PrefetchRcvBuf(RcvBuf);
  2676. #endif
  2677. }
  2678. if ((Size >= sizeof(TCPHeader)) && ChkSumOk) {
  2679. // The packet is valid. Get the info we need and byte swap it,
  2680. // and then try to find a matching TCB.
  2681. RcvInfo.tri_seq = net_long(TCPH->tcp_seq);
  2682. RcvInfo.tri_ack = net_long(TCPH->tcp_ack);
  2683. RcvInfo.tri_window = (uint) net_short(TCPH->tcp_window);
  2684. RcvInfo.tri_urgent = (uint) net_short(TCPH->tcp_urgent);
  2685. RcvInfo.tri_flags = (uint) TCPH->tcp_flags;
  2686. DataOffset = TCP_HDR_SIZE(TCPH);
  2687. if (DataOffset <= Size) {
  2688. Size -= DataOffset;
  2689. ASSERT(DataOffset <= RcvBuf->ipr_size);
  2690. RcvBuf->ipr_size -= DataOffset;
  2691. RcvBuf->ipr_buffer += DataOffset;
  2692. RcvBuf->ipr_RcvOffset += DataOffset;
  2693. //CTEGetLockAtDPC(&TCBTableLock, &TableHandle);
  2694. // FindTCB will lock tcbtablelock, returns with tcb_lock
  2695. // held, if found.
  2696. RcvTCB = FindTCB(Dest, Src, TCPH->tcp_src, TCPH->tcp_dest, &TCBHandle, TRUE, &index);
  2697. Partition = GET_PARTITION(index);
  2698. if (RcvTCB == NULL) {
  2699. CTEGetLockAtDPC(&pTWTCBTableLock[Partition], &TableHandle);
  2700. RcvTWTCB = FindTCBTW(Dest, Src, TCPH->tcp_src, TCPH->tcp_dest, index);
  2701. if (RcvTWTCB != NULL) {
  2702. // Found one. Already locked.
  2703. // entering twtcbtable and tcb lock held
  2704. // released in the following routine.
  2705. UpdatedSeqNum = RcvInfo.tri_seq;
  2706. if (HandleTWTCB(RcvTWTCB, RcvInfo.tri_flags,
  2707. &UpdatedSeqNum, Partition,
  2708. TCBHandle)) {
  2709. // New syn handling code while in time_wait ..
  2710. UseUpdatedSeqNum=TRUE;
  2711. } else {
  2712. return (IP_SUCCESS);
  2713. }
  2714. } else {
  2715. BOOLEAN reset=FALSE;
  2716. CTEFreeLockFromDPC(&pTWTCBTableLock[Partition], TableHandle);
  2717. RcvTCB = FindSynTCB(Dest, Src, TCPH->tcp_src, TCPH->tcp_dest, &TCBHandle, TRUE, index,&reset);
  2718. if (RcvTCB == NULL ) {
  2719. if (reset) {
  2720. SynAttChk(NULL, NULL);
  2721. SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
  2722. return IP_SUCCESS;
  2723. }
  2724. } else {
  2725. if ((RcvInfo.tri_flags & TCP_FLAG_RST) ||
  2726. (RcvInfo.tri_flags & TCP_FLAG_SYN)) {
  2727. //This needs to be closed here instead of
  2728. //handling this all the way down
  2729. SynAttChk(NULL, RcvTCB);
  2730. TryToCloseTCB(RcvTCB,TCB_CLOSE_ABORTED,TCBHandle);
  2731. return IP_SUCCESS;
  2732. }
  2733. //update options
  2734. if (OptInfo->ioi_options != NULL) {
  2735. if (!(RcvTCB->tcb_flags & CLIENT_OPTIONS)) {
  2736. (*LocalNetInfo.ipi_updateopts) (
  2737. OptInfo,
  2738. &RcvTCB->tcb_opt,
  2739. Src,
  2740. NULL_IP_ADDR);
  2741. }
  2742. }
  2743. }
  2744. }
  2745. }
  2746. if (RcvTCB == NULL) {
  2747. uchar DType;
  2748. // Didn't find a matching TCB. If this segment carries a SYN,
  2749. // find a matching address object and see it it has a listen
  2750. // indication. If it does, call it. Otherwise send a RST
  2751. // back to the sender.
  2752. // Make sure that the source address isn't a broadcast
  2753. // before proceeding.
  2754. if ((*LocalNetInfo.ipi_invalidsrc) (Src)) {
  2755. return IP_SUCCESS;
  2756. }
  2757. // If it doesn't have a SYN (and only a SYN), we'll send a
  2758. // reset.
  2759. if ((RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_ACK | TCP_FLAG_RST)) ==
  2760. TCP_FLAG_SYN) {
  2761. AddrObj *AO;
  2762. //
  2763. // This segment had a SYN.
  2764. //
  2765. //
  2766. CTEGetLockAtDPC(&AddrObjTableLock.Lock, &TableHandle);
  2767. // See if we are filtering the
  2768. // destination interface/port.
  2769. //
  2770. if ((!SecurityFilteringEnabled ||
  2771. IsPermittedSecurityFilter(
  2772. LocalAddr,
  2773. IPContext,
  2774. PROTOCOL_TCP,
  2775. (ulong) net_short(TCPH->tcp_dest))))
  2776. {
  2777. //
  2778. // Find a matching address object, and then try
  2779. // and find a listening connection on that AO.
  2780. //
  2781. AO = GetBestAddrObj(Dest, TCPH->tcp_dest, PROTOCOL_TCP, TRUE);
  2782. //NTQFE 68201
  2783. if (AO && AO->ao_connect == NULL) {
  2784. //
  2785. //Lets see if there is one more addr obj
  2786. //matching the incoming request with
  2787. //ao_connect != NULL
  2788. //
  2789. AddrObj *tmpAO;
  2790. tmpAO = GetNextBestAddrObj(Dest, TCPH->tcp_dest, PROTOCOL_TCP, AO, TRUE);
  2791. if (tmpAO != NULL) {
  2792. AO = tmpAO;
  2793. }
  2794. }
  2795. if (AO != NULL) {
  2796. BOOLEAN syntcb = FALSE;
  2797. //
  2798. // Found an AO. Try and find a listening
  2799. // connection. FindListenConn will free the
  2800. //lock on the AddrObjTable.
  2801. //
  2802. RcvTCB = NULL;
  2803. RcvTCB = FindListenConn(AO, Src, Dest,
  2804. TCPH->tcp_src, OptInfo,
  2805. TCPH, &RcvInfo, &syntcb);
  2806. if (RcvTCB != NULL) {
  2807. uint Inserted;
  2808. CTEStructAssert(RcvTCB, tcb);
  2809. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
  2810. //
  2811. // We found a listening connection.
  2812. // Initialize it now, and if it is
  2813. //actually to be accepted we'll
  2814. // send a SYN-ACK also.
  2815. //
  2816. ASSERT(RcvTCB->tcb_state == TCB_SYN_RCVD);
  2817. if (UseUpdatedSeqNum) {
  2818. RcvTCB->tcb_sendnext = UpdatedSeqNum;
  2819. }
  2820. RcvTCB->tcb_daddr = Src;
  2821. RcvTCB->tcb_saddr = Dest;
  2822. RcvTCB->tcb_dport = TCPH->tcp_src;
  2823. RcvTCB->tcb_sport = TCPH->tcp_dest;
  2824. RcvTCB->tcb_rcvnext = ++RcvInfo.tri_seq;
  2825. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  2826. //
  2827. // Find Remote MSS and also if WS, TS or
  2828. //sack options are negotiated.
  2829. //
  2830. RcvTCB->tcb_sndwinscale = 0;
  2831. RcvTCB->tcb_remmss = FindMSSAndOptions(TCPH, RcvTCB,FALSE);
  2832. if (RcvTCB->tcb_remmss <= ALIGNED_TS_OPT_SIZE) {
  2833. //turn off TS if mss is not sufficient to
  2834. //hold TS fileds.
  2835. RcvTCB->tcb_tcpopts &= ~TCP_FLAG_TS;
  2836. }
  2837. TStats.ts_passiveopens++;
  2838. RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
  2839. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
  2840. Inserted = InsertTCB(RcvTCB);
  2841. //
  2842. // Get the lock on it, and see if it's been
  2843. // accepted.
  2844. //
  2845. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
  2846. if (!Inserted) {
  2847. // Couldn't insert it!.
  2848. CompleteConnReq(RcvTCB, OptInfo,
  2849. TDI_CONNECTION_ABORTED);
  2850. TryToCloseTCB(RcvTCB, TCB_CLOSE_ABORTED, DISPATCH_LEVEL);
  2851. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
  2852. DerefTCB(RcvTCB, TableHandle);
  2853. return IP_SUCCESS;
  2854. }
  2855. RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
  2856. if (RcvTCB->tcb_flags & SEND_AFTER_RCV) {
  2857. RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
  2858. DelayAction(RcvTCB, NEED_OUTPUT);
  2859. }
  2860. // We'll need to update the options, in any case.
  2861. if (OptInfo->ioi_options != NULL) {
  2862. if (!(RcvTCB->tcb_flags & CLIENT_OPTIONS)) {
  2863. (*LocalNetInfo.ipi_updateopts) (
  2864. OptInfo,
  2865. &RcvTCB->tcb_opt,
  2866. Src,
  2867. NULL_IP_ADDR);
  2868. }
  2869. }
  2870. if (RcvTCB->tcb_flags & CONN_ACCEPTED) {
  2871. //
  2872. // The connection was accepted. Finish
  2873. // the initialization, and send the
  2874. // SYN ack.
  2875. //
  2876. AcceptConn(RcvTCB, DISPATCH_LEVEL);
  2877. return IP_SUCCESS;
  2878. } else {
  2879. //
  2880. // We don't know what to do about the
  2881. // connection yet. Return the pending
  2882. // listen, dereference the connection,
  2883. // and return.
  2884. //
  2885. CompleteConnReq(RcvTCB, OptInfo, TDI_SUCCESS);
  2886. DerefTCB(RcvTCB, DISPATCH_LEVEL);
  2887. return IP_SUCCESS;
  2888. }
  2889. }
  2890. if (syntcb) {
  2891. return IP_SUCCESS;
  2892. }
  2893. //
  2894. // No listening connection. AddrObjTableLock
  2895. // was released by FindListenConn. Fall
  2896. // through to send RST code.
  2897. //
  2898. } else {
  2899. //
  2900. // No address object. Free the lock, and fall
  2901. // through to the send RST code.
  2902. //
  2903. CTEFreeLockFromDPC(&AddrObjTableLock.Lock, TableHandle);
  2904. }
  2905. } else {
  2906. //
  2907. // Operation not permitted. Free the lock, and
  2908. // fall through to the send RST code.
  2909. //
  2910. CTEFreeLockFromDPC(&AddrObjTableLock.Lock, TableHandle);
  2911. }
  2912. }
  2913. // Toss out any segments containing RST.
  2914. if (RcvInfo.tri_flags & TCP_FLAG_RST)
  2915. return IP_SUCCESS;
  2916. //
  2917. // Not a SYN, no AddrObj available, or port filtered.
  2918. // Send a RST back.
  2919. //
  2920. SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
  2921. return IP_SUCCESS;
  2922. }
  2923. //
  2924. //TCB is already locked
  2925. //
  2926. CheckTCBRcv(RcvTCB);
  2927. if ( (RcvTCB->tcb_flags & KEEPALIVE) && (RcvTCB->tcb_conn != NULL) )
  2928. START_TCB_TIMER_R(RcvTCB, KA_TIMER, RcvTCB->tcb_conn->tc_tcbkatime);
  2929. RcvTCB->tcb_kacount = 0;
  2930. //scale the incoming window
  2931. if (!(RcvInfo.tri_flags & TCP_FLAG_SYN)) {
  2932. RcvInfo.tri_window = ((uint) net_short(TCPH->tcp_window) << RcvTCB->tcb_sndwinscale);
  2933. }
  2934. //
  2935. // We need to check if Time stamp or Sack options are present.
  2936. //
  2937. if (RcvTCB->tcb_tcpopts) {
  2938. int OptSize;
  2939. uchar *OptPtr;
  2940. OptSize = TCP_HDR_SIZE(TCPH) - sizeof(TCPHeader);
  2941. OptPtr = (uchar *) (TCPH + 1);
  2942. while (OptSize > 0) {
  2943. if (*OptPtr == TCP_OPT_EOL)
  2944. break;
  2945. if (*OptPtr == TCP_OPT_NOP) {
  2946. OptPtr++;
  2947. OptSize--;
  2948. continue;
  2949. }
  2950. if ((*OptPtr == TCP_OPT_TS) && (OptSize > 1) && (OptPtr[1] == TS_OPT_SIZE) &&
  2951. (RcvTCB->tcb_tcpopts & TCP_FLAG_TS)) {
  2952. // remember timestamp and the the echoed time stamp
  2953. time_stamp = TRUE;
  2954. tsval = *(int UNALIGNED *)&OptPtr[2];
  2955. tsval = net_long(tsval);
  2956. tsecr = *(int UNALIGNED *)&OptPtr[6];
  2957. tsecr = net_long(tsecr);
  2958. } else if ((*OptPtr == TCP_OPT_SACK) && (OptSize > 1) && (RcvTCB->tcb_tcpopts & TCP_FLAG_SACK)) {
  2959. SackSeg UNALIGNED *SackPtr;
  2960. SackListEntry *SackList, *Prev, *Current;
  2961. ushort SackOptionLength;
  2962. int i;
  2963. //SACK Option processing
  2964. (uchar *) SackPtr = OptPtr + 2;
  2965. SackOptionLength = OptPtr[1];
  2966. ASSERT(SackOptionLength <= 32);
  2967. //
  2968. // If the incoming sack blocks are with in this
  2969. // send window Just chain them.
  2970. // When there are some retransmissions, this list
  2971. // will be checked to see if retransmission can be
  2972. // skipped.
  2973. // Note that when the send window is slided, the
  2974. // sack list must be cleandup.
  2975. //
  2976. Prev = STRUCT_OF(SackListEntry, &RcvTCB->tcb_SackRcvd, next);
  2977. Current = RcvTCB->tcb_SackRcvd;
  2978. // Scan the list for old sack entries and purge them
  2979. while ((Current != NULL) && SEQ_GT(RcvInfo.tri_ack, Current->begin)) {
  2980. Prev->next = Current->next;
  2981. IF_TCPDBG(TCP_DEBUG_SACK) {
  2982. TCPTRACE(("Purging old entries %x %d %d\n", Current, Current->begin, Current->end));
  2983. }
  2984. CTEFreeMem(Current);
  2985. Current = Prev->next;
  2986. }
  2987. //
  2988. //Process each sack block in the incoming segment
  2989. // 8 bytes per block!
  2990. //
  2991. for (i = 0; i < (SackOptionLength >> 3); i++) {
  2992. SeqNum SakBegin, SakEnd;
  2993. // Get the rcvd bytes begin and end offset
  2994. SakBegin = net_long(SackPtr->begin);
  2995. SakEnd = net_long(SackPtr->end);
  2996. ASSERT(SEQ_GT(SakEnd, SakBegin));
  2997. // Sanity check this Sack Block against our
  2998. // send variables
  2999. if (!(SEQ_GTE(SakBegin, RcvTCB->tcb_senduna) &&
  3000. SEQ_LT(SakBegin, RcvTCB->tcb_sendmax) &&
  3001. SEQ_GT(SakEnd, RcvTCB->tcb_senduna) &&
  3002. SEQ_LTE(SakEnd, RcvTCB->tcb_sendmax))) {
  3003. SackPtr++;
  3004. continue;
  3005. }
  3006. IF_TCPDBG(TCP_DEBUG_SACK) {
  3007. TCPTRACE(("In sack entry opt %d %d\n", i, RcvTCB->tcb_senduna));
  3008. }
  3009. Prev = STRUCT_OF(SackListEntry, &RcvTCB->tcb_SackRcvd, next);
  3010. Current = RcvTCB->tcb_SackRcvd;
  3011. //
  3012. // scan the list and insert the incoming sack
  3013. // block in the right place, taking care of
  3014. // overlaps, if any.
  3015. //
  3016. while (Current != NULL) {
  3017. if (SEQ_GT(Current->begin, SakBegin)) {
  3018. //
  3019. // Check if this sack block fills the
  3020. // hole from previous entry. If so,
  3021. // just update the end seq number.
  3022. //
  3023. if ((Prev != RcvTCB->tcb_SackRcvd) && SEQ_EQ(Prev->end, SakBegin)) {
  3024. Prev->end = SakEnd;
  3025. IF_TCPDBG(TCP_DEBUG_SACK) {
  3026. TCPTRACE(("updating prev %x %d %d %x\n", Prev, Prev->begin, Prev->end, RcvTCB));
  3027. }
  3028. //
  3029. //Make sure that next entry is not
  3030. //an overlap.
  3031. //
  3032. if (SEQ_LTE(Current->begin, SakEnd)) {
  3033. ASSERT(SEQ_GT(Current->begin, Prev->begin));
  3034. Prev->end = Current->end;
  3035. Prev->next = Current->next;
  3036. CTEFreeMem(Current);
  3037. Current = Prev;
  3038. //
  3039. // Now we need to scan forward
  3040. // and check if sackend
  3041. // spans several entries
  3042. //
  3043. {
  3044. SackListEntry *tmpcurrent = Current->next;
  3045. while (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->end)) {
  3046. Current->next = tmpcurrent->next;
  3047. CTEFreeMem(tmpcurrent);
  3048. tmpcurrent = Current->next;
  3049. }
  3050. //
  3051. // above check pointed
  3052. // tmpcurrent whose end is
  3053. // > sakend
  3054. // Check if the tmpcurrent
  3055. // entry begin is overlapped
  3056. //
  3057. if (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->begin)) {
  3058. Current->end = tmpcurrent->end;
  3059. Current->next = tmpcurrent->next;
  3060. CTEFreeMem(tmpcurrent);
  3061. }
  3062. }
  3063. }
  3064. break;
  3065. } else if (SEQ_LTE(Current->begin, SakEnd)) {
  3066. //
  3067. // Current is continuation(may be
  3068. // with overlap) of incoming
  3069. // sack pair. Update current
  3070. //
  3071. IF_TCPDBG(TCP_DEBUG_SACK) {
  3072. TCPTRACE(("updating in back overlap %x %d %d %d %d\n", Current, Current->begin, Current->end, SakBegin, SakEnd));
  3073. }
  3074. Current->begin = SakBegin;
  3075. //
  3076. // If the end shoots out of the
  3077. // current end new end will be the
  3078. // current end
  3079. // (overlaps at the tail too)
  3080. // may overlap several entries.
  3081. // So, check them all.
  3082. //
  3083. if (SEQ_GT(SakEnd, Current->end)) {
  3084. SackListEntry *tmpcurrent = Current->next;
  3085. Current->end = SakEnd;
  3086. while (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->end)) {
  3087. Current->next = tmpcurrent->next;
  3088. CTEFreeMem(tmpcurrent);
  3089. tmpcurrent = Current->next;
  3090. }
  3091. //
  3092. // above check pointed
  3093. // tmpcurrent whose end is >
  3094. // sakend. Check if the
  3095. // tmpcurrent entry begin is
  3096. // overlapped
  3097. //
  3098. if (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->begin)) {
  3099. Current->end = tmpcurrent->end;
  3100. Current->next = tmpcurrent->next;
  3101. CTEFreeMem(tmpcurrent);
  3102. }
  3103. }
  3104. break;
  3105. } else {
  3106. //
  3107. //This is the place where we
  3108. //insert the new entry
  3109. //
  3110. SackList = CTEAllocMemN(sizeof(SackListEntry), 'sPCT');
  3111. if (SackList == NULL) {
  3112. TCPTRACE(("No mem for sack List \n"));
  3113. goto no_mem;
  3114. }
  3115. IF_TCPDBG(TCP_DEBUG_SACK) {
  3116. TCPTRACE(("Inserting Sackentry %x %d %d %x\n", SackList, SakBegin, SakEnd, RcvTCB));
  3117. }
  3118. SackList->begin = SakBegin;
  3119. SackList->end = SakEnd;
  3120. Prev->next = SackList;
  3121. SackList->next = Current;
  3122. break;
  3123. }
  3124. } else if (SEQ_EQ(Current->begin, SakBegin)) {
  3125. SackListEntry *tmpcurrent = Current->next;
  3126. //
  3127. // Make sure that the new SakEnd is
  3128. // not overlapping any other sak
  3129. // entries.
  3130. //
  3131. if (tmpcurrent && SEQ_GTE(SakEnd, tmpcurrent->begin)) {
  3132. Current->end = SakEnd;
  3133. //
  3134. //Sure, this sack overlaps next
  3135. //entry.
  3136. //
  3137. while (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->end)) {
  3138. Current->next = tmpcurrent->next;
  3139. CTEFreeMem(tmpcurrent);
  3140. tmpcurrent = Current->next;
  3141. }
  3142. //
  3143. // above check pointed tmpcurrent
  3144. // whose end is > sakend
  3145. // Check if the tmpcurrent entry
  3146. // begin is overlapped
  3147. //
  3148. if (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->begin)) {
  3149. Current->end = tmpcurrent->end;
  3150. Current->next = tmpcurrent->next;
  3151. CTEFreeMem(tmpcurrent);
  3152. }
  3153. break;
  3154. } else {
  3155. //
  3156. // This can still be a duplicate
  3157. // Make sure that SakEnd is really
  3158. // greater than Current->end
  3159. //
  3160. if (SEQ_GT(SakEnd, Current->end)) {
  3161. IF_TCPDBG(TCP_DEBUG_SACK) {
  3162. TCPTRACE(("updating current %x %d %d %d\n", Current, Current->begin, Current->end, SakEnd));
  3163. }
  3164. Current->end = SakEnd;
  3165. }
  3166. break;
  3167. }
  3168. //SakBegin > Current->begin
  3169. } else if (SEQ_LTE(SakEnd, Current->end)) {
  3170. //
  3171. //The incoming sack end is within the
  3172. //current end so, this overlaps the
  3173. //existing sack entry ignore this.
  3174. //
  3175. break;
  3176. //
  3177. // incoming seq begin overlaps the
  3178. // current end update the current end.
  3179. //
  3180. } else if (SEQ_LTE(SakBegin, Current->end)) {
  3181. //
  3182. //Sakend might well ovelap next
  3183. //several entries. Scan for it.
  3184. //
  3185. SackListEntry *tmpcurrent = Current->next;
  3186. Current->end = SakEnd;
  3187. while (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->end)) {
  3188. Current->next = tmpcurrent->next;
  3189. CTEFreeMem(tmpcurrent);
  3190. tmpcurrent = Current->next;
  3191. }
  3192. //
  3193. // above check pointed tmpcurrent
  3194. // whose end is > sakend
  3195. // Check if the tmpcurrent entry begin
  3196. // is overlapped
  3197. //
  3198. if (tmpcurrent && SEQ_GTE(Current->end, tmpcurrent->begin)) {
  3199. Current->end = tmpcurrent->end;
  3200. Current->next = tmpcurrent->next;
  3201. CTEFreeMem(tmpcurrent);
  3202. }
  3203. break;
  3204. }
  3205. Prev = Current;
  3206. Current = Current->next;
  3207. } //while
  3208. if (Current == NULL) {
  3209. // this is the new sack entry
  3210. // create the entry and hang it on tcb.
  3211. SackList = CTEAllocMemN(sizeof(SackListEntry), 'sPCT');
  3212. if (SackList == NULL) {
  3213. TCPTRACE(("No mem for sack List \n"));
  3214. goto no_mem;
  3215. }
  3216. Prev->next = SackList;
  3217. SackList->next = NULL;
  3218. SackList->begin = SakBegin;
  3219. SackList->end = SakEnd;
  3220. IF_TCPDBG(TCP_DEBUG_SACK) {
  3221. TCPTRACE(("Inserting new Sackentry %x %d %d %x\n", SackList, SackList->begin, SackList->end, RcvTCB->tcb_SackRcvd));
  3222. }
  3223. }
  3224. //advance sack ptr to the next sack block
  3225. // check for consistency????
  3226. SackPtr++;
  3227. } //for
  3228. }
  3229. //unknown options
  3230. if (OptSize > 1) {
  3231. if (OptPtr[1] == 0 || OptPtr[1] > OptSize)
  3232. break; // Bad option length, bail out.
  3233. OptSize -= OptPtr[1];
  3234. OptPtr += OptPtr[1];
  3235. } else
  3236. break;
  3237. } //while
  3238. no_mem:;
  3239. }
  3240. // if ack is with in the sequence space,that is
  3241. // this seq number is next expected or repeat of previous
  3242. // segment but the right edge is new for us,
  3243. // record the time stamp val of the remote, which will be echoed
  3244. if (time_stamp &&
  3245. TS_GTE(tsval, RcvTCB->tcb_tsrecent) &&
  3246. SEQ_LTE(RcvInfo.tri_seq, RcvTCB->tcb_lastack)) {
  3247. RcvTCB->tcb_tsupdatetime = TCPTime;
  3248. RcvTCB->tcb_tsrecent = tsval;
  3249. }
  3250. //
  3251. // Do the fast path check. We can hit the fast path if the
  3252. // incoming sequence number matches our receive next and the
  3253. // masked flags match our 'predicted' flags.
  3254. // Also, include PAWS check
  3255. //
  3256. if (RcvTCB->tcb_rcvnext == RcvInfo.tri_seq &&
  3257. (!time_stamp || TS_GTE(tsval, RcvTCB->tcb_tsrecent)) &&
  3258. (RcvInfo.tri_flags & TCP_FLAGS_ALL) == RcvTCB->tcb_fastchk)
  3259. {
  3260. uint CWin;
  3261. Queue SendQ;
  3262. INITQ(&SendQ);
  3263. Actions = 0;
  3264. REFERENCE_TCB(RcvTCB);
  3265. //
  3266. // The fast path. We know all we have to do here is ack
  3267. // sends and deliver data. First try and ack data.
  3268. //
  3269. if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  3270. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  3271. uint MSS;
  3272. uint Amount = RcvInfo.tri_ack - RcvTCB->tcb_senduna;
  3273. //
  3274. // The ack acknowledes something. Pull the
  3275. // appropriate amount off the send q.
  3276. //
  3277. ACKData(RcvTCB, RcvInfo.tri_ack, &SendQ);
  3278. //
  3279. // If this acknowledges something we were running an
  3280. // RTT on, update that stuff now.
  3281. //
  3282. {
  3283. short RTT;
  3284. BOOLEAN fUpdateRtt = FALSE;
  3285. //
  3286. //if timestamp is true, get the RTT using the echoed
  3287. //timestamp.
  3288. //
  3289. if (time_stamp && tsecr) {
  3290. RTT = TCPTime - tsecr;
  3291. fUpdateRtt = TRUE;
  3292. } else {
  3293. if (RcvTCB->tcb_rtt != 0 &&
  3294. SEQ_GT(RcvInfo.tri_ack,
  3295. RcvTCB->tcb_rttseq)) {
  3296. fUpdateRtt = TRUE;
  3297. RTT = (short)(TCPTime - RcvTCB->tcb_rtt);
  3298. }
  3299. }
  3300. if (fUpdateRtt) {
  3301. RcvTCB->tcb_rtt = 0;
  3302. RTT -= (RcvTCB->tcb_smrtt >> 3); //alpha = 1/8
  3303. RcvTCB->tcb_smrtt += RTT;
  3304. RTT = (RTT >= 0 ? RTT : -RTT);
  3305. RTT -= (RcvTCB->tcb_delta >> 3);
  3306. RcvTCB->tcb_delta += RTT + RTT; //Beta of
  3307. //1/4 instead
  3308. // of 1/8
  3309. RcvTCB->tcb_rexmit = MIN(MAX(REXMIT_TO(RcvTCB),
  3310. MIN_RETRAN_TICKS)+1, MAX_REXMIT_TO);
  3311. }
  3312. }
  3313. // Update the congestion window now.
  3314. CWin = RcvTCB->tcb_cwin;
  3315. MSS = RcvTCB->tcb_mss;
  3316. if (CWin < RcvTCB->tcb_maxwin) {
  3317. if (CWin < RcvTCB->tcb_ssthresh)
  3318. CWin += (RcvTCB->tcb_flags & SCALE_CWIN)
  3319. ? Amount : MSS;
  3320. else
  3321. CWin += MAX((MSS * MSS) / CWin, 1);
  3322. RcvTCB->tcb_cwin = CWin;
  3323. }
  3324. ASSERT(*(int *)&RcvTCB->tcb_cwin > 0);
  3325. //
  3326. // We've acknowledged something, so reset the rexmit
  3327. // count. If there's still stuff outstanding, restart
  3328. // the rexmit timer.
  3329. //
  3330. RcvTCB->tcb_rexmitcnt = 0;
  3331. if (SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax))
  3332. STOP_TCB_TIMER_R(RcvTCB, RXMIT_TIMER);
  3333. else
  3334. START_TCB_TIMER_R(RcvTCB, RXMIT_TIMER, RcvTCB->tcb_rexmit);
  3335. //
  3336. // Since we've acknowledged data, we need to update
  3337. // the window.
  3338. //
  3339. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  3340. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin, RcvInfo.tri_window);
  3341. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  3342. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  3343. // We've updated the window, remember to send some more.
  3344. Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
  3345. {
  3346. //
  3347. // If the receiver has already sent dup acks, but
  3348. // we are not sending because the SendWin is less
  3349. // than a segment, then to avoid time outs on the
  3350. // previous send (receiver is waiting for
  3351. // retransmitted data but we are not sending the
  3352. // segment..) prematurely
  3353. // timeout (set rexmittimer to 1 tick)
  3354. //
  3355. int SendWin;
  3356. uint AmtOutstanding, AmtUnsent;
  3357. AmtOutstanding = (uint) (RcvTCB->tcb_sendnext -
  3358. RcvTCB->tcb_senduna);
  3359. AmtUnsent = RcvTCB->tcb_unacked - AmtOutstanding;
  3360. SendWin = (int)(MIN(RcvTCB->tcb_sendwin,
  3361. RcvTCB->tcb_cwin) - AmtOutstanding);
  3362. if ((RcvTCB->tcb_dup >= MaxDupAcks) && ((int)RcvTCB->tcb_ssthresh > 0)) {
  3363. //
  3364. // Fast retransmitted frame is acked
  3365. // Set cwin to ssthresh so that cwin grows
  3366. // linearly from here
  3367. //
  3368. RcvTCB->tcb_cwin = RcvTCB->tcb_ssthresh;
  3369. }
  3370. }
  3371. RcvTCB->tcb_dup = 0;
  3372. } else {
  3373. //
  3374. // It doesn't ack anything. If it's an ack for something
  3375. // larger than we've sent then ACKAndDrop it, otherwise
  3376. // ignore it.
  3377. //
  3378. if (SEQ_GT(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  3379. ACKAndDrop(&RcvInfo, RcvTCB);
  3380. return IP_SUCCESS;
  3381. }
  3382. //
  3383. // If it is a pure duplicate ack, check if it is
  3384. // time to retransmit immediately
  3385. //
  3386. else if ((Size == 0) &&
  3387. SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  3388. (SEQ_LT(RcvTCB->tcb_senduna,
  3389. RcvTCB->tcb_sendmax)) &&
  3390. (RcvTCB->tcb_sendwin == RcvInfo.tri_window) &&
  3391. RcvInfo.tri_window
  3392. ) {
  3393. // See of fast rexmit can be done
  3394. if (HandleFastXmit(RcvTCB, &RcvInfo)) {
  3395. return IP_SUCCESS;
  3396. }
  3397. Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
  3398. } else { // not a pure duplicate ack (size == 0 )
  3399. // Size !=0 or recvr is advertizing new window.
  3400. // update the window and check if
  3401. // anything needs to be sent
  3402. RcvTCB->tcb_dup = 0;
  3403. if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  3404. (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
  3405. (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
  3406. SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
  3407. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  3408. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
  3409. RcvInfo.tri_window);
  3410. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  3411. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  3412. //
  3413. // Since we've updated the window, remember to
  3414. // send some more.
  3415. //
  3416. Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
  3417. }
  3418. } // for SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)
  3419. // case
  3420. }
  3421. NewSize = MIN((int)Size, RcvTCB->tcb_rcvwin);
  3422. if (NewSize != 0) {
  3423. RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
  3424. BytesTaken = (*RcvTCB->tcb_rcvhndlr) (RcvTCB, RcvInfo.tri_flags,
  3425. RcvBuf, NewSize);
  3426. RcvTCB->tcb_rcvnext += BytesTaken;
  3427. RcvTCB->tcb_rcvwin -= BytesTaken;
  3428. CheckTCBRcv(RcvTCB);
  3429. RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
  3430. Actions |= (RcvTCB->tcb_flags & SEND_AFTER_RCV ?
  3431. NEED_OUTPUT : 0);
  3432. RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
  3433. if (BytesTaken != NewSize) {
  3434. Actions |= NEED_ACK;
  3435. RcvTCB->tcb_rcvdsegs = 0;
  3436. STOP_TCB_TIMER_R(RcvTCB, DELACK_TIMER);
  3437. } else {
  3438. if (RcvTCB->tcb_rcvdsegs != RcvTCB->tcb_numdelacks) {
  3439. RcvTCB->tcb_rcvdsegs++;
  3440. RcvTCB->tcb_flags |= ACK_DELAYED;
  3441. ASSERT(RcvTCB->tcb_delackticks);
  3442. START_TCB_TIMER_R(RcvTCB, DELACK_TIMER, RcvTCB->tcb_delackticks);
  3443. } else {
  3444. Actions |= NEED_ACK;
  3445. RcvTCB->tcb_rcvdsegs = 0;
  3446. STOP_TCB_TIMER_R(RcvTCB, DELACK_TIMER);
  3447. }
  3448. }
  3449. } else {
  3450. //
  3451. // The new size is 0. If the original size was not 0,
  3452. // we must have a 0 rcv. win and hence need to send an
  3453. // ACK to this probe.
  3454. //
  3455. Actions |= (Size ? NEED_ACK : 0);
  3456. }
  3457. if (Actions)
  3458. DelayAction(RcvTCB, Actions);
  3459. TableHandle = DISPATCH_LEVEL;
  3460. DerefTCB(RcvTCB, TableHandle);
  3461. if (!EMPTYQ(&SendQ)) {
  3462. CompleteSends(&SendQ);
  3463. }
  3464. return IP_SUCCESS;
  3465. }
  3466. TableHandle = DISPATCH_LEVEL;
  3467. //
  3468. // Make sure we can handle this frame. We can't handle it if
  3469. // we're in SYN_RCVD and the accept is still pending, or we're
  3470. // in a non-established state and already in the receive
  3471. // handler.
  3472. //
  3473. if ((RcvTCB->tcb_state == TCB_SYN_RCVD &&
  3474. !(RcvTCB->tcb_flags & CONN_ACCEPTED)) ||
  3475. (RcvTCB->tcb_state != TCB_ESTAB && (RcvTCB->tcb_fastchk &
  3476. TCP_FLAG_IN_RCV))) {
  3477. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
  3478. return IP_SUCCESS;
  3479. }
  3480. if ((RcvTCB->tcb_state == TCB_SYN_RCVD) &&
  3481. (RcvInfo.tri_flags & TCP_FLAG_ACK) &&
  3482. (RcvInfo.tri_flags & TCP_FLAG_SYN)) {
  3483. //
  3484. // This is bogus. SYN..ACK for already accpeted SYN.
  3485. // Reset this.
  3486. //
  3487. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
  3488. SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
  3489. return IP_SUCCESS;
  3490. }
  3491. //
  3492. // If it's closed, it's a temporary zombie TCB. Reset the
  3493. // sender.
  3494. //
  3495. if (RcvTCB->tcb_state == TCB_CLOSED || CLOSING(RcvTCB) ||
  3496. ((RcvTCB->tcb_flags & (GC_PENDING | TW_PENDING)) == GC_PENDING)) {
  3497. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
  3498. SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
  3499. return IP_SUCCESS;
  3500. }
  3501. //
  3502. // At this point, we have a connection, and it's locked.
  3503. // Following the 'Segment Arrives' section of 793, the next
  3504. // thing to check is if this connection is in SynSent state.
  3505. //
  3506. if (RcvTCB->tcb_state == TCB_SYN_SENT) {
  3507. ASSERT(RcvTCB->tcb_flags & ACTIVE_OPEN);
  3508. //
  3509. // Check the ACK bit. Since we don't send data with our
  3510. // SYNs, the check we make is for the ack to exactly match
  3511. // our SND.NXT.
  3512. //
  3513. if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
  3514. // ACK is set.
  3515. if (!SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendnext)) {
  3516. // Bad ACK value.
  3517. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
  3518. // Send a RST back at him.
  3519. SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
  3520. return IP_SUCCESS;
  3521. }
  3522. }
  3523. if (RcvInfo.tri_flags & TCP_FLAG_RST) {
  3524. //
  3525. // There's an acceptable RST. We'll persist here,
  3526. // sending another SYN in PERSIST_TIMEOUT ms, until we
  3527. // fail from too many retrys.
  3528. //
  3529. if (!(RcvTCB->tcb_fastchk & TCP_FLAG_RST_WHILE_SYN)) {
  3530. RcvTCB->tcb_fastchk |= TCP_FLAG_RST_WHILE_SYN;
  3531. RcvTCB->tcb_slowcount++;
  3532. }
  3533. if (RcvTCB->tcb_rexmitcnt == MaxConnectRexmitCount) {
  3534. //
  3535. // We've had a positive refusal, and one more rexmit
  3536. // would time us out, so close the connection now.
  3537. //
  3538. REFERENCE_TCB(RcvTCB);
  3539. CompleteConnReq(RcvTCB, OptInfo, TDI_CONN_REFUSED);
  3540. TryToCloseTCB(RcvTCB, TCB_CLOSE_REFUSED, TableHandle);
  3541. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
  3542. DerefTCB(RcvTCB, TableHandle);
  3543. } else {
  3544. START_TCB_TIMER_R(RcvTCB, RXMIT_TIMER, PERSIST_TIMEOUT);
  3545. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
  3546. }
  3547. return IP_SUCCESS;
  3548. }
  3549. // See if we have a SYN. If we do, we're going to change state
  3550. // somehow (either to ESTABLISHED or SYN_RCVD).
  3551. if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
  3552. REFERENCE_TCB(RcvTCB);
  3553. // We have a SYN. Go ahead and record the sequence number and
  3554. // window info.
  3555. RcvTCB->tcb_rcvnext = ++RcvInfo.tri_seq;
  3556. if (RcvInfo.tri_flags & TCP_FLAG_URG) {
  3557. // Urgent data. Update the pointer.
  3558. if (RcvInfo.tri_urgent != 0)
  3559. RcvInfo.tri_urgent--;
  3560. else
  3561. RcvInfo.tri_flags &= ~TCP_FLAG_URG;
  3562. }
  3563. //
  3564. // get remote mss and also enable ws, ts or sack options
  3565. // if they are negotiated and if the host supports them.
  3566. //
  3567. RcvTCB->tcb_sndwinscale = 0;
  3568. RcvTCB->tcb_remmss = FindMSSAndOptions(TCPH, RcvTCB,FALSE);
  3569. //
  3570. // If there are options, update them now. We already
  3571. // have an RCE open, so if we have new options we'll
  3572. // have to close it and open a new one.
  3573. //
  3574. if (OptInfo->ioi_options != NULL) {
  3575. if (!(RcvTCB->tcb_flags & CLIENT_OPTIONS)) {
  3576. (*LocalNetInfo.ipi_updateopts) (OptInfo,
  3577. &RcvTCB->tcb_opt, Src, NULL_IP_ADDR);
  3578. (*LocalNetInfo.ipi_closerce) (RcvTCB->tcb_rce);
  3579. InitRCE(RcvTCB);
  3580. }
  3581. } else {
  3582. RcvTCB->tcb_mss = MIN(RcvTCB->tcb_mss, RcvTCB->tcb_remmss);
  3583. ASSERT(RcvTCB->tcb_mss > 0);
  3584. ValidateMSS(RcvTCB);
  3585. }
  3586. RcvTCB->tcb_rexmitcnt = 0;
  3587. STOP_TCB_TIMER_R(RcvTCB, RXMIT_TIMER);
  3588. AdjustRcvWin(RcvTCB);
  3589. if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
  3590. // Our SYN has been acked. Update SND.UNA and stop the
  3591. // retrans timer.
  3592. RcvTCB->tcb_senduna = RcvInfo.tri_ack;
  3593. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  3594. RcvTCB->tcb_maxwin = RcvInfo.tri_window;
  3595. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  3596. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  3597. #if TRACE_EVENT
  3598. CPCallBack = TCPCPHandlerRoutine;
  3599. if (CPCallBack != NULL) {
  3600. ulong GroupType;
  3601. WMIInfo.wmi_destaddr = RcvTCB->tcb_daddr;
  3602. WMIInfo.wmi_destport = RcvTCB->tcb_dport;
  3603. WMIInfo.wmi_srcaddr = RcvTCB->tcb_saddr;
  3604. WMIInfo.wmi_srcport = RcvTCB->tcb_sport;
  3605. WMIInfo.wmi_size = 0;
  3606. WMIInfo.wmi_context = RcvTCB->tcb_cpcontext;
  3607. GroupType = EVENT_TRACE_GROUP_TCPIP + EVENT_TRACE_TYPE_CONNECT;
  3608. (*CPCallBack) (GroupType, (PVOID)&WMIInfo, sizeof(WMIInfo), NULL);
  3609. }
  3610. #endif
  3611. GoToEstab(RcvTCB);
  3612. //
  3613. // Set a bit that informs TCBTimeout to notify
  3614. // the automatic connection driver of this new
  3615. // connection. Only set this flag if we
  3616. // have binded succesfully with the automatic
  3617. // connection driver.
  3618. //
  3619. if (fAcdLoadedG)
  3620. START_TCB_TIMER_R(RcvTCB, ACD_TIMER, 2);
  3621. //
  3622. // Remove whatever command exists on this
  3623. // connection.
  3624. //
  3625. CompleteConnReq(RcvTCB, OptInfo, TDI_SUCCESS);
  3626. //
  3627. // If data has been queued, send the first data
  3628. // segment with an ACK. Otherwise, send a pure ACK.
  3629. //
  3630. if (RcvTCB->tcb_unacked) {
  3631. REFERENCE_TCB(RcvTCB);
  3632. TCPSend(RcvTCB, TableHandle);
  3633. } else {
  3634. CTEFreeLockFromDPC(&RcvTCB->tcb_lock,
  3635. TableHandle);
  3636. SendACK(RcvTCB);
  3637. }
  3638. //
  3639. // Now handle other data and controls. To do this
  3640. // we need to reaquire the lock, and make sure we
  3641. // haven't started closing it.
  3642. //
  3643. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
  3644. if (!CLOSING(RcvTCB)) {
  3645. //
  3646. // We haven't started closing it. Turn off the
  3647. // SYN flag and continue processing.
  3648. //
  3649. RcvInfo.tri_flags &= ~TCP_FLAG_SYN;
  3650. if ((RcvInfo.tri_flags & TCP_FLAGS_ALL) != TCP_FLAG_ACK ||
  3651. Size != 0)
  3652. goto NotSYNSent;
  3653. }
  3654. DerefTCB(RcvTCB, TableHandle);
  3655. return IP_SUCCESS;
  3656. } else {
  3657. // A SYN, but not an ACK. Go to SYN_RCVD.
  3658. RcvTCB->tcb_state = TCB_SYN_RCVD;
  3659. RcvTCB->tcb_sendnext = RcvTCB->tcb_senduna;
  3660. SendSYN(RcvTCB, TableHandle);
  3661. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
  3662. DerefTCB(RcvTCB, TableHandle);
  3663. return IP_SUCCESS;
  3664. }
  3665. } else {
  3666. // No SYN, just toss the frame.
  3667. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
  3668. return IP_SUCCESS;
  3669. }
  3670. }
  3671. REFERENCE_TCB(RcvTCB);
  3672. NotSYNSent:
  3673. //do not allow buffer ownership via slow path
  3674. if (RcvBuf)
  3675. RcvBuf->ipr_pMdl = NULL;
  3676. // Check for PAWS(RFC 1323)
  3677. // Check for tsrecent and tsval wrap around
  3678. if (time_stamp &&
  3679. !(RcvInfo.tri_flags & TCP_FLAG_RST) &&
  3680. RcvTCB->tcb_tsrecent &&
  3681. TS_LT(tsval, RcvTCB->tcb_tsrecent)) {
  3682. // Time stamp is not valid
  3683. // Check if this is because the last update is
  3684. // 24 days old
  3685. if ((int)(TCPTime - RcvTCB->tcb_tsupdatetime) > PAWS_IDLE) {
  3686. //invalidate the ts
  3687. RcvTCB->tcb_tsrecent = 0;
  3688. } else {
  3689. ACKAndDrop(&RcvInfo, RcvTCB);
  3690. return IP_SUCCESS;
  3691. }
  3692. }
  3693. //
  3694. // Not in the SYN-SENT state. Check the sequence number. If my
  3695. // window is 0, I'll truncate all incoming frames but look at
  3696. // some of the control fields. Otherwise I'll try and make
  3697. // this segment fit into the window.
  3698. //
  3699. if (RcvTCB->tcb_rcvwin != 0) {
  3700. int StateSize; // Size, including state info.
  3701. SeqNum LastValidSeq; // Sequence number of last valid
  3702. // byte at RWE.
  3703. //
  3704. // We are offering a window. If this segment starts in
  3705. // front of my receive window, clip off the front part.
  3706. //Check for the sanity of received sequence.
  3707. //This is to fix the 1 bit error(MSB) case in the rcv seq.
  3708. // Also, check the incoming size.
  3709. //
  3710. if ((SEQ_LT(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) &&
  3711. ((int)Size >= 0) &&
  3712. (RcvTCB->tcb_rcvnext - RcvInfo.tri_seq) > 0)
  3713. {
  3714. int AmountToClip, FinByte;
  3715. if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
  3716. //
  3717. // Had a SYN. Clip it off and update the seq number.
  3718. // This will be clipped off in the next if.
  3719. // Allow AckAndDrop routine to see the incoming SYN!
  3720. // RcvInfo.tri_flags &= ~TCP_FLAG_SYN;
  3721. //
  3722. RcvInfo.tri_seq++;
  3723. RcvInfo.tri_urgent--;
  3724. }
  3725. // Advance the receive buffer to point at the new data.
  3726. AmountToClip = RcvTCB->tcb_rcvnext - RcvInfo.tri_seq;
  3727. ASSERT(AmountToClip >= 0);
  3728. //
  3729. // If there's a FIN on this segment, we'll need to
  3730. // account for it.
  3731. //
  3732. FinByte = ((RcvInfo.tri_flags & TCP_FLAG_FIN) ? 1 : 0);
  3733. if (AmountToClip >= (((int)Size) + FinByte)) {
  3734. //
  3735. // Falls entirely before the window. We have more
  3736. // special case code here - if the ack. number
  3737. // acks something, we'll go ahead and take it,
  3738. // faking the sequence number to be rcvnext. This
  3739. // prevents problems on full duplex connections,
  3740. // where data has been received but not acked,
  3741. // and retransmission timers reset the seq. number
  3742. // to below our rcvnext.
  3743. //
  3744. if ((RcvInfo.tri_flags & TCP_FLAG_ACK) &&
  3745. SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  3746. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  3747. //
  3748. // This contains valid ACK info. Fudge the info
  3749. // to get through the rest of this.
  3750. //
  3751. Size = 0;
  3752. AmountToClip = 0;
  3753. RcvInfo.tri_seq = RcvTCB->tcb_rcvnext;
  3754. RcvInfo.tri_flags &=
  3755. ~(TCP_FLAG_SYN | TCP_FLAG_FIN |
  3756. TCP_FLAG_RST | TCP_FLAG_URG);
  3757. #if DBG
  3758. FinByte = 1; // Fake out assert below.
  3759. #endif
  3760. } else {
  3761. ACKAndDrop(&RcvInfo, RcvTCB);
  3762. return IP_SUCCESS;
  3763. }
  3764. }
  3765. if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
  3766. RcvInfo.tri_flags &= ~TCP_FLAG_SYN;
  3767. }
  3768. //
  3769. // Trim what we have to. If we can't trim enough, the
  3770. // frame is too short. This shouldn't happen, but it
  3771. // it does we'll drop the frame.
  3772. //
  3773. Size -= AmountToClip;
  3774. RcvInfo.tri_seq += AmountToClip;
  3775. RcvInfo.tri_urgent -= AmountToClip;
  3776. RcvBuf = TrimRcvBuf(RcvBuf, AmountToClip);
  3777. ASSERT(RcvBuf != NULL);
  3778. ASSERT(RcvBuf->ipr_size != 0 ||
  3779. (Size == 0 && FinByte));
  3780. RcvBuf->ipr_pMdl = NULL;
  3781. if (*(int *)&RcvInfo.tri_urgent < 0) {
  3782. RcvInfo.tri_urgent = 0;
  3783. RcvInfo.tri_flags &= ~TCP_FLAG_URG;
  3784. }
  3785. }
  3786. //
  3787. // We've made sure the front is OK. Now make sure part of
  3788. // it doesn't fall outside of the right edge of the
  3789. // window. If it does, we'll truncate the frame (removing
  3790. // the FIN, if any). If we truncate the whole frame we'll
  3791. // ACKAndDrop it.
  3792. //
  3793. StateSize =
  3794. Size + ((RcvInfo.tri_flags & TCP_FLAG_SYN) ? 1 : 0) +
  3795. ((RcvInfo.tri_flags & TCP_FLAG_FIN) ? 1 : 0);
  3796. if (StateSize)
  3797. StateSize--;
  3798. //
  3799. // Now the incoming sequence number (RcvInfo.tri_seq) +
  3800. // StateSize it the last sequence number in the segment.
  3801. // If this is greater than the last valid byte in the
  3802. // window, we have some overlap to chop off.
  3803. //
  3804. ASSERT(StateSize >= 0);
  3805. LastValidSeq = RcvTCB->tcb_rcvnext + RcvTCB->tcb_rcvwin - 1;
  3806. if (SEQ_GT(RcvInfo.tri_seq + StateSize, LastValidSeq)) {
  3807. int AmountToChop;
  3808. //
  3809. // At least some part of the frame is outside of our
  3810. // window. See if it starts outside our window.
  3811. //
  3812. if (SEQ_GT(RcvInfo.tri_seq, LastValidSeq)) {
  3813. //
  3814. // Falls entirely outside the window. We have
  3815. // special case code to deal with a pure ack that
  3816. // falls exactly at our right window edge.
  3817. // Otherwise we ack and drop it.
  3818. //
  3819. if (
  3820. !SEQ_EQ(RcvInfo.tri_seq, LastValidSeq + 1) ||
  3821. Size != 0 ||
  3822. (RcvInfo.tri_flags & (TCP_FLAG_SYN |
  3823. TCP_FLAG_FIN))
  3824. ) {
  3825. ACKAndDrop(&RcvInfo, RcvTCB);
  3826. return IP_SUCCESS;
  3827. }
  3828. } else {
  3829. //
  3830. // At least some part of it is in the window. If
  3831. // there's a FIN, chop that off and see if that
  3832. // moves us inside.
  3833. //
  3834. if (RcvInfo.tri_flags & TCP_FLAG_FIN) {
  3835. RcvInfo.tri_flags &= ~TCP_FLAG_FIN;
  3836. StateSize--;
  3837. }
  3838. // Now figure out how much to chop off.
  3839. AmountToChop = (RcvInfo.tri_seq + StateSize) -
  3840. LastValidSeq;
  3841. ASSERT(AmountToChop >= 0);
  3842. Size -= AmountToChop;
  3843. RcvBuf->ipr_pMdl = NULL;
  3844. }
  3845. }
  3846. } else {
  3847. if (!SEQ_EQ(RcvTCB->tcb_rcvnext, RcvInfo.tri_seq)) {
  3848. //
  3849. // If there's a RST on this segment, and he's only off
  3850. // by 1, take it anyway. This can happen if the remote
  3851. // peer is probing and sends with the seq. # after the
  3852. // probe.
  3853. //
  3854. if (!(RcvInfo.tri_flags & TCP_FLAG_RST) ||
  3855. !(SEQ_EQ(RcvTCB->tcb_rcvnext, (RcvInfo.tri_seq - 1)))) {
  3856. ACKAndDrop(&RcvInfo, RcvTCB);
  3857. return IP_SUCCESS;
  3858. } else
  3859. RcvInfo.tri_seq = RcvTCB->tcb_rcvnext;
  3860. }
  3861. //
  3862. // He's in sequence, but we have a window of 0. Truncate the
  3863. // size, and clear any sequence consuming bits.
  3864. //
  3865. if (Size != 0 ||
  3866. (RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_FIN))) {
  3867. RcvInfo.tri_flags &= ~(TCP_FLAG_SYN | TCP_FLAG_FIN);
  3868. Size = 0;
  3869. if (!(RcvInfo.tri_flags & TCP_FLAG_RST))
  3870. DelayAction(RcvTCB, NEED_ACK);
  3871. }
  3872. }
  3873. //
  3874. // At this point, the segment is in our window and does not
  3875. // overlap on either end. If it's the next seq number we
  3876. // expect, we can handle the data now. Otherwise we'll queue
  3877. // it for later. In either case we'll handle RST and ACK
  3878. // information right now.
  3879. //
  3880. ASSERT((*(int *)&Size) >= 0);
  3881. // Now, following 793, we check the RST bit.
  3882. if (RcvInfo.tri_flags & TCP_FLAG_RST) {
  3883. uchar Reason;
  3884. //
  3885. // We can't go back into the LISTEN state from SYN-RCVD
  3886. // here, because we may have notified the client via a
  3887. // listen completing or a connect indication. So, if came
  3888. // from an active open we'll give back a 'connection
  3889. // refused' notice. For all other cases
  3890. // we'll just destroy the connection.
  3891. //
  3892. if (RcvTCB->tcb_state == TCB_SYN_RCVD) {
  3893. if (RcvTCB->tcb_flags & ACTIVE_OPEN)
  3894. Reason = TCB_CLOSE_REFUSED;
  3895. else
  3896. Reason = TCB_CLOSE_RST;
  3897. } else
  3898. Reason = TCB_CLOSE_RST;
  3899. TryToCloseTCB(RcvTCB, Reason, TableHandle);
  3900. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
  3901. if (RcvTCB->tcb_state != TCB_TIME_WAIT) {
  3902. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
  3903. RemoveTCBFromConn(RcvTCB);
  3904. NotifyOfDisc(RcvTCB, OptInfo, TDI_CONNECTION_RESET);
  3905. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
  3906. }
  3907. DerefTCB(RcvTCB, TableHandle);
  3908. return IP_SUCCESS;
  3909. }
  3910. // Next check the SYN bit.
  3911. if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
  3912. //
  3913. // Again, we can't quietly go back into the LISTEN state
  3914. // here, even if we came from a passive open.
  3915. //
  3916. TryToCloseTCB(RcvTCB, TCB_CLOSE_ABORTED, TableHandle);
  3917. SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
  3918. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
  3919. if (RcvTCB->tcb_state != TCB_TIME_WAIT) {
  3920. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
  3921. RemoveTCBFromConn(RcvTCB);
  3922. NotifyOfDisc(RcvTCB, OptInfo, TDI_CONNECTION_RESET);
  3923. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
  3924. }
  3925. DerefTCB(RcvTCB, TableHandle);
  3926. return IP_SUCCESS;
  3927. }
  3928. // Check the ACK field. If it's not on drop the segment.
  3929. if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
  3930. uint UpdateWindow;
  3931. // If we're in SYN-RCVD, go to ESTABLISHED.
  3932. if (RcvTCB->tcb_state == TCB_SYN_RCVD) {
  3933. if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  3934. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  3935. // The ack is valid.
  3936. if (SynAttackProtect) {
  3937. SynAttChk(NULL, RcvTCB);
  3938. if (RcvTCB->tcb_fastchk & TCP_FLAG_ACCEPT_PENDING) {
  3939. AddrObj *AO;
  3940. BOOLEAN stat=FALSE;
  3941. //KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"EP:relookup %x\n",RcvTCB));
  3942. //
  3943. // We will be reiniting the tcprexmitcnt to 0.
  3944. // If we are configured for syn-attack
  3945. // protection and the rexmit cnt is >1,
  3946. // decrement the count of connections that are
  3947. // in the half-open-retried state. Check
  3948. // whether we are below a low-watermark. If we
  3949. // are, increase the rexmit count back to
  3950. // configured values
  3951. //
  3952. //Check if we still have the listening endpoint
  3953. CTEGetLockAtDPC(&AddrObjTableLock.Lock, &TableHandle);
  3954. AO = GetBestAddrObj(Dest, TCPH->tcp_dest, PROTOCOL_TCP, TRUE);
  3955. if (AO && AO->ao_connect == NULL) {
  3956. //
  3957. // Lets see if there is one more addr obj
  3958. // matching the incoming request with
  3959. // ao_connect != NULL
  3960. //
  3961. AddrObj *tmpAO;
  3962. tmpAO = GetNextBestAddrObj(Dest, TCPH->tcp_dest, PROTOCOL_TCP, AO, TRUE);
  3963. if (tmpAO != NULL) {
  3964. AO = tmpAO;
  3965. }
  3966. }
  3967. if (AO != NULL) {
  3968. stat = DelayedAcceptConn(AO,Src,TCPH->tcp_src,RcvTCB);
  3969. } else {
  3970. CTEFreeLockFromDPC(&AddrObjTableLock.Lock, TableHandle);
  3971. }
  3972. if (!stat) {
  3973. //send RST
  3974. //DerefTCB(RcvTCB, TableHandle);
  3975. DEREFERENCE_TCB(RcvTCB);
  3976. TryToCloseTCB(RcvTCB, TCB_CLOSE_REFUSED, TableHandle);
  3977. SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
  3978. return IP_SUCCESS;
  3979. } else {
  3980. RcvTCB->tcb_fastchk &= ~TCP_FLAG_ACCEPT_PENDING;
  3981. //complete accpt irp immdtly
  3982. //CompleteConnReq(RcvTCB, &RcvTCB->tcb_opt, TDI_SUCCESS);
  3983. }
  3984. }
  3985. }
  3986. RcvTCB->tcb_rexmitcnt = 0;
  3987. STOP_TCB_TIMER_R(RcvTCB, RXMIT_TIMER);
  3988. RcvTCB->tcb_senduna++;
  3989. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  3990. RcvTCB->tcb_maxwin = RcvInfo.tri_window;
  3991. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  3992. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  3993. GoToEstab(RcvTCB);
  3994. #if TRACE_EVENT
  3995. CPCallBack = TCPCPHandlerRoutine;
  3996. if (CPCallBack != NULL) {
  3997. ulong GroupType;
  3998. WMIInfo.wmi_destaddr = RcvTCB->tcb_daddr;
  3999. WMIInfo.wmi_destport = RcvTCB->tcb_dport;
  4000. WMIInfo.wmi_srcaddr = RcvTCB->tcb_saddr;
  4001. WMIInfo.wmi_srcport = RcvTCB->tcb_sport;
  4002. WMIInfo.wmi_size = 0;
  4003. WMIInfo.wmi_context = RcvTCB->tcb_cpcontext;
  4004. GroupType = EVENT_TRACE_GROUP_TCPIP + EVENT_TRACE_TYPE_ACCEPT;
  4005. (*CPCallBack) (GroupType, (PVOID)&WMIInfo, sizeof(WMIInfo), NULL);
  4006. }
  4007. #endif
  4008. // Now complete whatever we can here.
  4009. CompleteConnReq(RcvTCB, OptInfo, TDI_SUCCESS);
  4010. } else {
  4011. if (SynAttackProtect) {
  4012. SynAttChk(NULL, RcvTCB);
  4013. }
  4014. DerefTCB(RcvTCB, TableHandle);
  4015. SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
  4016. return IP_SUCCESS;
  4017. }
  4018. } else {
  4019. Queue SendQ;
  4020. INITQ(&SendQ);
  4021. // We're not in SYN-RCVD. See if this acknowledges anything.
  4022. if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  4023. SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  4024. uint CWin;
  4025. uint Amount = RcvInfo.tri_ack - RcvTCB->tcb_senduna;
  4026. //
  4027. // The ack acknowledes something. Pull the
  4028. // appropriate amount off the send q.
  4029. //
  4030. ACKData(RcvTCB, RcvInfo.tri_ack, &SendQ);
  4031. //
  4032. // If this acknowledges something we were running
  4033. // an RTT on, update that stuff now.
  4034. //
  4035. {
  4036. short RTT;
  4037. BOOLEAN fUpdateRtt = FALSE;
  4038. //
  4039. // if timestamp is true, get the RTT using the
  4040. // echoed timestamp.
  4041. //
  4042. if (time_stamp && tsecr) {
  4043. RTT = TCPTime - tsecr;
  4044. fUpdateRtt = TRUE;
  4045. } else {
  4046. if (RcvTCB->tcb_rtt != 0 &&
  4047. SEQ_GT(RcvInfo.tri_ack,
  4048. RcvTCB->tcb_rttseq)) {
  4049. RTT = (short)(TCPTime - RcvTCB->tcb_rtt);
  4050. fUpdateRtt = TRUE;
  4051. }
  4052. }
  4053. if (fUpdateRtt) {
  4054. RcvTCB->tcb_rtt = 0;
  4055. RTT -= (RcvTCB->tcb_smrtt >> 3);
  4056. RcvTCB->tcb_smrtt += RTT;
  4057. RTT = (RTT >= 0 ? RTT : -RTT);
  4058. RTT -= (RcvTCB->tcb_delta >> 3);
  4059. RcvTCB->tcb_delta += RTT + RTT;
  4060. RcvTCB->tcb_rexmit = MIN(MAX(REXMIT_TO(RcvTCB),
  4061. MIN_RETRAN_TICKS)+1, MAX_REXMIT_TO);
  4062. }
  4063. }
  4064. //
  4065. // If we're probing for a PMTU black hole we've
  4066. // found one, so turn off
  4067. // the detection. The size is already down, so
  4068. // leave it there.
  4069. //
  4070. if (RcvTCB->tcb_flags & PMTU_BH_PROBE) {
  4071. RcvTCB->tcb_flags &= ~PMTU_BH_PROBE;
  4072. RcvTCB->tcb_bhprobecnt = 0;
  4073. if (--(RcvTCB->tcb_slowcount) == 0) {
  4074. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  4075. CheckTCBRcv(RcvTCB);
  4076. }
  4077. }
  4078. // Update the congestion window now.
  4079. CWin = RcvTCB->tcb_cwin;
  4080. if (CWin < RcvTCB->tcb_maxwin) {
  4081. if (CWin < RcvTCB->tcb_ssthresh)
  4082. CWin += (RcvTCB->tcb_flags & SCALE_CWIN)
  4083. ? Amount : RcvTCB->tcb_mss;
  4084. else
  4085. CWin += MAX((RcvTCB->tcb_mss * RcvTCB->tcb_mss) / CWin, 1);
  4086. RcvTCB->tcb_cwin = MIN(CWin, RcvTCB->tcb_maxwin);
  4087. }
  4088. if ((RcvTCB->tcb_dup > 0) && ((int)RcvTCB->tcb_ssthresh > 0)) {
  4089. //
  4090. // Fast retransmitted frame is acked
  4091. // Set cwin to ssthresh so that cwin grows
  4092. // linearly from here
  4093. //
  4094. RcvTCB->tcb_cwin = RcvTCB->tcb_ssthresh;
  4095. }
  4096. RcvTCB->tcb_dup = 0;
  4097. ASSERT(*(int *)&RcvTCB->tcb_cwin > 0);
  4098. //
  4099. // We've acknowledged something, so reset the
  4100. // rexmit count. If there's still stuff
  4101. // outstanding, restart the rexmit timer.
  4102. //
  4103. RcvTCB->tcb_rexmitcnt = 0;
  4104. if (!SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax))
  4105. START_TCB_TIMER_R(RcvTCB, RXMIT_TIMER, RcvTCB->tcb_rexmit);
  4106. else
  4107. STOP_TCB_TIMER_R(RcvTCB, RXMIT_TIMER);
  4108. //
  4109. // If we've sent a FIN, and this acknowledges it, we
  4110. // need to complete the client's close request and
  4111. // possibly transition our state.
  4112. //
  4113. if (RcvTCB->tcb_flags & FIN_SENT) {
  4114. //
  4115. // We have sent a FIN. See if it's been
  4116. // acknowledged. Once we've sent a FIN,
  4117. // tcb_sendmax can't advance, so our FIN must
  4118. // have seq. number tcb_sendmax - 1. Thus our
  4119. // FIN is acknowledged if the incoming ack is
  4120. // equal to tcb_sendmax.
  4121. //
  4122. if (SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  4123. //
  4124. // He's acked our FIN. Turn off the flags,
  4125. // and complete the request. We'll leave the
  4126. // FIN_OUTSTANDING flag alone, to force
  4127. // early outs in the send code.
  4128. //
  4129. RcvTCB->tcb_flags &= ~(FIN_NEEDED | FIN_SENT);
  4130. ASSERT(RcvTCB->tcb_unacked == 0);
  4131. ASSERT(RcvTCB->tcb_sendnext ==
  4132. RcvTCB->tcb_sendmax);
  4133. //
  4134. // Now figure out what we need to do. In
  4135. // FIN_WAIT1 or FIN_WAIT, just complete
  4136. // the disconnect req. and continue.
  4137. // Otherwise, it's a bit trickier,
  4138. // since we can't complete the connreq
  4139. // until we remove the TCB from it's
  4140. // connection.
  4141. //
  4142. switch (RcvTCB->tcb_state) {
  4143. ushort ConnReqTimeout = 0;
  4144. case TCB_FIN_WAIT1:
  4145. RcvTCB->tcb_state = TCB_FIN_WAIT2;
  4146. if (RcvTCB->tcb_fastchk & TCP_FLAG_SEND_AND_DISC) {
  4147. //RcvTCB->tcb_flags |= DISC_NOTIFIED;
  4148. } else {
  4149. if (RcvTCB->tcb_connreq) {
  4150. ConnReqTimeout = RcvTCB->tcb_connreq->tcr_timeout;
  4151. }
  4152. CompleteConnReq(RcvTCB, OptInfo, TDI_SUCCESS);
  4153. }
  4154. //
  4155. // Start a timer in case we never get
  4156. // out of FIN_WAIT2. Set the retransmit
  4157. // count high to force a timeout the
  4158. // first time the timer fires.
  4159. //
  4160. if (ConnReqTimeout) {
  4161. RcvTCB->tcb_rexmitcnt = 1;
  4162. } else {
  4163. RcvTCB->tcb_rexmitcnt = (uchar) MaxDataRexmitCount;
  4164. ConnReqTimeout = (ushort)FinWait2TO;
  4165. }
  4166. START_TCB_TIMER_R(RcvTCB, RXMIT_TIMER, ConnReqTimeout);
  4167. //Fall through to FIN-WAIT-2 processing.
  4168. case TCB_FIN_WAIT2:
  4169. break;
  4170. case TCB_CLOSING:
  4171. //
  4172. //Note that we do not care about
  4173. //return stat from GracefulClose
  4174. //since we do not touch the tcb
  4175. //anyway, anymore, even if it is in
  4176. //time_wait.
  4177. //
  4178. GracefulClose(RcvTCB, TRUE, FALSE,
  4179. TableHandle);
  4180. if (!EMPTYQ(&SendQ)) {
  4181. CompleteSends(&SendQ);
  4182. }
  4183. return IP_SUCCESS;
  4184. break;
  4185. case TCB_LAST_ACK:
  4186. GracefulClose(RcvTCB, FALSE, FALSE,
  4187. TableHandle);
  4188. if (!EMPTYQ(&SendQ)) {
  4189. CompleteSends(&SendQ);
  4190. }
  4191. return IP_SUCCESS;
  4192. break;
  4193. default:
  4194. ASSERT(0);
  4195. break;
  4196. }
  4197. }
  4198. }
  4199. UpdateWindow = TRUE;
  4200. } else {
  4201. //
  4202. // It doesn't ack anything. If it's an ack for
  4203. // something larger than we've sent then
  4204. // ACKAndDrop it, otherwise ignore it. If we're in
  4205. // FIN_WAIT2, we'll restart the timer.
  4206. // We don't make this check above because we know no
  4207. // data can be acked when we're in FIN_WAIT2.
  4208. //
  4209. if (RcvTCB->tcb_state == TCB_FIN_WAIT2)
  4210. START_TCB_TIMER_R(RcvTCB, RXMIT_TIMER, (ushort) FinWait2TO);
  4211. if (SEQ_GT(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
  4212. ACKAndDrop(&RcvInfo, RcvTCB);
  4213. return IP_SUCCESS;
  4214. } else if ((Size == 0) &&
  4215. SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  4216. (SEQ_LT(RcvTCB->tcb_senduna, RcvTCB->tcb_sendmax)) &&
  4217. (RcvTCB->tcb_sendwin == RcvInfo.tri_window) &&
  4218. RcvInfo.tri_window) {
  4219. // See if fast rexmit can be done
  4220. if(HandleFastXmit(RcvTCB, &RcvInfo)){
  4221. return IP_SUCCESS;
  4222. }
  4223. Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
  4224. } else {
  4225. // Now update the window if we can.
  4226. if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
  4227. (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
  4228. (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
  4229. SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
  4230. UpdateWindow = TRUE;
  4231. } else
  4232. UpdateWindow = FALSE;
  4233. }
  4234. }
  4235. if (UpdateWindow) {
  4236. RcvTCB->tcb_sendwin = RcvInfo.tri_window;
  4237. RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
  4238. RcvInfo.tri_window);
  4239. RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
  4240. RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
  4241. if (RcvInfo.tri_window == 0) {
  4242. // We've got a zero window.
  4243. if (!EMPTYQ(&RcvTCB->tcb_sendq)) {
  4244. RcvTCB->tcb_flags &= ~NEED_OUTPUT;
  4245. RcvTCB->tcb_rexmitcnt = 0;
  4246. START_TCB_TIMER_R(RcvTCB, RXMIT_TIMER, RcvTCB->tcb_rexmit);
  4247. if (!(RcvTCB->tcb_flags & FLOW_CNTLD)) {
  4248. RcvTCB->tcb_flags |= FLOW_CNTLD;
  4249. RcvTCB->tcb_slowcount++;
  4250. RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  4251. CheckTCBRcv(RcvTCB);
  4252. }
  4253. }
  4254. } else {
  4255. if (RcvTCB->tcb_flags & FLOW_CNTLD) {
  4256. RcvTCB->tcb_rexmitcnt = 0;
  4257. RcvTCB->tcb_rexmit = MIN(MAX(REXMIT_TO(RcvTCB),
  4258. MIN_RETRAN_TICKS), MAX_REXMIT_TO);
  4259. //if (!TCB_TIMER_RUNNING(RcvTCB, RXMIT_TIMER)) {
  4260. START_TCB_TIMER_R(RcvTCB, RXMIT_TIMER, RcvTCB->tcb_rexmit);
  4261. //}
  4262. RcvTCB->tcb_flags &= ~(FLOW_CNTLD | FORCE_OUTPUT);
  4263. //
  4264. // Reset send next to the left edge of the
  4265. // window, because it might be at
  4266. // senduna+1 if we've been probing.
  4267. //
  4268. ResetSendNext(RcvTCB, RcvTCB->tcb_senduna);
  4269. if (--(RcvTCB->tcb_slowcount) == 0) {
  4270. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  4271. CheckTCBRcv(RcvTCB);
  4272. }
  4273. }
  4274. //
  4275. // Since we've updated the window, see if we
  4276. // can send some more.
  4277. //
  4278. if (RcvTCB->tcb_unacked != 0 ||
  4279. (RcvTCB->tcb_flags & FIN_NEEDED))
  4280. DelayAction(RcvTCB, NEED_OUTPUT);
  4281. }
  4282. }
  4283. if (!EMPTYQ(&SendQ)) {
  4284. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
  4285. CompleteSends(&SendQ);
  4286. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
  4287. }
  4288. }
  4289. //
  4290. // We've handled all the acknowledgment stuff. If the size
  4291. // is greater than 0 or important bits are set process it // further, otherwise it's a pure ack and we're done with
  4292. // it.
  4293. //
  4294. if (Size > 0 || (RcvInfo.tri_flags & TCP_FLAG_FIN)) {
  4295. //
  4296. // If we're not in a state where we can process
  4297. // incoming data or FINs, there's no point in going
  4298. // further. Just send an ack and drop this segment.
  4299. //
  4300. if (!DATA_RCV_STATE(RcvTCB->tcb_state) ||
  4301. (RcvTCB->tcb_flags & GC_PENDING)) {
  4302. ACKAndDrop(&RcvInfo, RcvTCB);
  4303. return IP_SUCCESS;
  4304. }
  4305. //
  4306. // If it's in sequence process it now, otherwise
  4307. // reassemble it.
  4308. //
  4309. if (SEQ_EQ(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) {
  4310. //
  4311. // If we're already in the recv. handler, this is a
  4312. // duplicate. We'll just toss it.
  4313. //
  4314. if (RcvTCB->tcb_fastchk & TCP_FLAG_IN_RCV) {
  4315. DerefTCB(RcvTCB, TableHandle);
  4316. return IP_SUCCESS;
  4317. }
  4318. RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
  4319. //
  4320. // Now loop, pulling things from the reassembly
  4321. // queue, until the queue is empty, or we can't
  4322. // take all of the data, or we hit a FIN.
  4323. //
  4324. do {
  4325. // Handle urgent data, if any.
  4326. if (RcvInfo.tri_flags & TCP_FLAG_URG) {
  4327. HandleUrgent(RcvTCB, &RcvInfo, RcvBuf, &Size);
  4328. //
  4329. // Since we may have freed the lock, we
  4330. // need to recheck and see if we're
  4331. // closing here.
  4332. //
  4333. if (CLOSING(RcvTCB))
  4334. break;
  4335. }
  4336. //
  4337. // OK, the data is in sequence, we've updated
  4338. // the reassembly queue and handled any urgent
  4339. // data. If we have any data go ahead and
  4340. // process it now.
  4341. //
  4342. if (Size > 0) {
  4343. BytesTaken = (*RcvTCB->tcb_rcvhndlr) (RcvTCB,
  4344. RcvInfo.tri_flags, RcvBuf, Size);
  4345. RcvTCB->tcb_rcvnext += BytesTaken;
  4346. RcvTCB->tcb_rcvwin -= BytesTaken;
  4347. CheckTCBRcv(RcvTCB);
  4348. if (RcvTCB->tcb_rcvdsegs != RcvTCB->tcb_numdelacks){
  4349. RcvTCB->tcb_flags |= ACK_DELAYED;
  4350. RcvTCB->tcb_rcvdsegs++;
  4351. ASSERT(RcvTCB->tcb_delackticks);
  4352. START_TCB_TIMER_R(RcvTCB, DELACK_TIMER,
  4353. RcvTCB->tcb_delackticks);
  4354. } else {
  4355. DelayAction(RcvTCB, NEED_ACK);
  4356. RcvTCB->tcb_rcvdsegs = 0;
  4357. STOP_TCB_TIMER_R(RcvTCB, DELACK_TIMER);
  4358. }
  4359. if (BytesTaken != Size) {
  4360. //
  4361. // We didn't take everything we could.
  4362. // No use in further processing, just
  4363. // bail out.
  4364. //
  4365. DelayAction(RcvTCB, NEED_ACK);
  4366. break;
  4367. }
  4368. //
  4369. // If we're closing now, we're done, so
  4370. // get out.
  4371. //
  4372. if (CLOSING(RcvTCB))
  4373. break;
  4374. }
  4375. //
  4376. // See if we need to advance over some urgent
  4377. // data.
  4378. //
  4379. if (RcvTCB->tcb_flags & URG_VALID) {
  4380. uint AdvanceNeeded;
  4381. //
  4382. // We only need to adv if we're not doing
  4383. // urgent inline. Urg inline also has some
  4384. // implications for when we can clear the
  4385. // URG_VALID flag. If we're not doing
  4386. // urgent inline, we can clear it when
  4387. // rcvnext advances beyond urgent end.
  4388. // If we are doing inline, we clear it
  4389. // when rcvnext advances one receive
  4390. // window beyond urgend.
  4391. //
  4392. if (!(RcvTCB->tcb_flags & URG_INLINE)) {
  4393. if (RcvTCB->tcb_rcvnext == RcvTCB->tcb_urgstart)
  4394. RcvTCB->tcb_rcvnext = RcvTCB->tcb_urgend +
  4395. 1;
  4396. else
  4397. ASSERT(SEQ_LT(RcvTCB->tcb_rcvnext,
  4398. RcvTCB->tcb_urgstart) ||
  4399. SEQ_GT(RcvTCB->tcb_rcvnext,
  4400. RcvTCB->tcb_urgend));
  4401. AdvanceNeeded = 0;
  4402. } else
  4403. AdvanceNeeded = RcvTCB->tcb_defaultwin;
  4404. // See if we can clear the URG_VALID flag.
  4405. if (SEQ_GT(RcvTCB->tcb_rcvnext - AdvanceNeeded,
  4406. RcvTCB->tcb_urgend)) {
  4407. RcvTCB->tcb_flags &= ~URG_VALID;
  4408. if (--(RcvTCB->tcb_slowcount) == 0) {
  4409. RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  4410. CheckTCBRcv(RcvTCB);
  4411. }
  4412. }
  4413. }
  4414. //
  4415. // We've handled the data. If the FIN bit is
  4416. // set, we have more processing.
  4417. //
  4418. if (RcvInfo.tri_flags & TCP_FLAG_FIN) {
  4419. uint Notify = FALSE;
  4420. uint DelayAck = TRUE;
  4421. RcvTCB->tcb_rcvnext++;
  4422. PushData(RcvTCB);
  4423. switch (RcvTCB->tcb_state) {
  4424. case TCB_SYN_RCVD:
  4425. //
  4426. // I don't think we can get here - we
  4427. // should have discarded the frame if it
  4428. // had no ACK, or gone to established if
  4429. // it did.
  4430. //
  4431. ASSERT(0);
  4432. case TCB_ESTAB:
  4433. RcvTCB->tcb_state = TCB_CLOSE_WAIT;
  4434. //
  4435. // We left established, we're off the
  4436. // fast path.
  4437. //
  4438. RcvTCB->tcb_slowcount++;
  4439. RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  4440. CheckTCBRcv(RcvTCB);
  4441. Notify = TRUE;
  4442. break;
  4443. case TCB_FIN_WAIT1:
  4444. RcvTCB->tcb_state = TCB_CLOSING;
  4445. DelayAck = FALSE;
  4446. //RcvTCB->tcb_refcnt++;
  4447. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
  4448. SendACK(RcvTCB);
  4449. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
  4450. Notify = TRUE;
  4451. break;
  4452. case TCB_FIN_WAIT2:
  4453. // Stop the FIN_WAIT2 timer.
  4454. DelayAck = FALSE;
  4455. STOP_TCB_TIMER_R(RcvTCB, RXMIT_TIMER);
  4456. REFERENCE_TCB(RcvTCB);
  4457. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
  4458. SendACK(RcvTCB);
  4459. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
  4460. if (RcvTCB->tcb_fastchk & TCP_FLAG_SEND_AND_DISC) {
  4461. GracefulClose(RcvTCB, TRUE, FALSE, TableHandle);
  4462. } else {
  4463. GracefulClose(RcvTCB, TRUE, TRUE, TableHandle);
  4464. }
  4465. //
  4466. //graceful close has put this tcb in
  4467. //timewait state should not access
  4468. //small tw tcb at this point
  4469. //
  4470. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
  4471. DerefTCB(RcvTCB, TableHandle);
  4472. return IP_SUCCESS;
  4473. break;
  4474. default:
  4475. ASSERT(0);
  4476. break;
  4477. }
  4478. if (DelayAck) {
  4479. DelayAction(RcvTCB, NEED_ACK);
  4480. }
  4481. if (Notify) {
  4482. CTEFreeLockFromDPC(&RcvTCB->tcb_lock,
  4483. TableHandle);
  4484. NotifyOfDisc(RcvTCB, OptInfo, TDI_GRACEFUL_DISC);
  4485. CTEGetLockAtDPC(&RcvTCB->tcb_lock,
  4486. &TableHandle);
  4487. }
  4488. break; // Exit out of WHILE loop.
  4489. }
  4490. // If the reassembly queue isn't empty, get what we
  4491. // can now.
  4492. RcvBuf = PullFromRAQ(RcvTCB, &RcvInfo, &Size);
  4493. if (RcvBuf)
  4494. RcvBuf->ipr_pMdl = NULL;
  4495. CheckRBList(RcvBuf, Size);
  4496. } while (RcvBuf != NULL);
  4497. RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
  4498. if (RcvTCB->tcb_flags & SEND_AFTER_RCV) {
  4499. RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
  4500. DelayAction(RcvTCB, NEED_OUTPUT);
  4501. }
  4502. DerefTCB(RcvTCB, TableHandle);
  4503. return IP_SUCCESS;
  4504. } else {
  4505. // It's not in sequence. Since it needs further processing,
  4506. // put in on the reassembly queue.
  4507. if (DATA_RCV_STATE(RcvTCB->tcb_state) &&
  4508. !(RcvTCB->tcb_flags & GC_PENDING)) {
  4509. PutOnRAQ(RcvTCB, &RcvInfo, RcvBuf, Size);
  4510. //
  4511. //If SACK option is active, we need to construct
  4512. // SACK Blocks in ack
  4513. //
  4514. if (RcvTCB->tcb_tcpopts & TCP_FLAG_SACK) {
  4515. SendSackInACK(RcvTCB, RcvInfo.tri_seq);
  4516. } else {
  4517. CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
  4518. SendACK(RcvTCB);
  4519. }
  4520. CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
  4521. DerefTCB(RcvTCB, TableHandle);
  4522. } else
  4523. ACKAndDrop(&RcvInfo, RcvTCB);
  4524. return IP_SUCCESS;
  4525. }
  4526. }
  4527. } else {
  4528. // No ACK. Just drop the segment and return.
  4529. DerefTCB(RcvTCB, TableHandle);
  4530. return IP_SUCCESS;
  4531. }
  4532. DerefTCB(RcvTCB, TableHandle);
  4533. } else { // DataOffset <= Size
  4534. TStats.ts_inerrs++;
  4535. }
  4536. } else {
  4537. TStats.ts_inerrs++;
  4538. }
  4539. } else { // IsBCast
  4540. TStats.ts_inerrs++;
  4541. }
  4542. return IP_SUCCESS;
  4543. }
  4544. #pragma BEGIN_INIT
  4545. //* InitTCPRcv - Initialize TCP receive side.
  4546. //
  4547. // Called during init time to initialize our TCP receive side.
  4548. //
  4549. // Input: Nothing.
  4550. //
  4551. // Returns: TRUE.
  4552. //
  4553. int
  4554. InitTCPRcv(void)
  4555. {
  4556. uint i;
  4557. //Allocate Time_Proc number of delayqueues
  4558. PerCPUDelayQ = CTEAllocMemBoot(Time_Proc * sizeof(CPUDelayQ));
  4559. if (PerCPUDelayQ == NULL) {
  4560. return FALSE;
  4561. }
  4562. for (i = 0; i < Time_Proc; i++) {
  4563. CTEInitLock(&PerCPUDelayQ[i].TCBDelayLock);
  4564. INITQ(&PerCPUDelayQ[i].TCBDelayQ);
  4565. }
  4566. TCBDelayRtnCount.Value = 0;
  4567. #if MILLEN
  4568. TCBDelayRtnLimit.Value = 1;
  4569. #else // MILLEN
  4570. TCBDelayRtnLimit.Value = KeNumberProcessors;
  4571. if (TCBDelayRtnLimit.Value > TCB_DELAY_RTN_LIMIT)
  4572. TCBDelayRtnLimit.Value = TCB_DELAY_RTN_LIMIT;
  4573. #endif // !MILLEN
  4574. DummyBuf.ipr_owner = IPR_OWNER_IP;
  4575. DummyBuf.ipr_size = 0;
  4576. DummyBuf.ipr_next = 0;
  4577. DummyBuf.ipr_buffer = NULL;
  4578. return TRUE;
  4579. }
  4580. //* UnInitTCPRcv - Uninitialize our receive side.
  4581. //
  4582. // Called if initialization fails to uninitialize our receive side.
  4583. //
  4584. //
  4585. // Input: Nothing.
  4586. //
  4587. // Returns: Nothing.
  4588. //
  4589. void
  4590. UnInitTCPRcv(void)
  4591. {
  4592. }
  4593. #pragma END_INIT