Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2712 lines
94 KiB

  1. // -*- mode: C++; tab-width: 4; indent-tabs-mode: nil -*- (for GNU Emacs)
  2. //
  3. // Copyright (c) 1985-2000 Microsoft Corporation
  4. //
  5. // This file is part of the Microsoft Research IPv6 Network Protocol Stack.
  6. // You should have received a copy of the Microsoft End-User License Agreement
  7. // for this software along with this release; see the file "license.txt".
  8. // If not, please see http://www.research.microsoft.com/msripv6/license.htm,
  9. // or write to Microsoft Research, One Microsoft Way, Redmond, WA 98052-6399.
  10. //
  11. // Abstract:
  12. //
  13. // TCP send code.
  14. //
  15. // This file contains the code for sending Data and Control segments.
  16. //
  17. #include "oscfg.h"
  18. #include "ndis.h"
  19. #include "ip6imp.h"
  20. #include "ip6def.h"
  21. #include "tdi.h"
  22. #include "tdint.h"
  23. #include "tdistat.h"
  24. #include "queue.h"
  25. #include "transprt.h"
  26. #include "addr.h"
  27. #include "tcp.h"
  28. #include "tcb.h"
  29. #include "tcpconn.h"
  30. #include "tcpsend.h"
  31. #include "tcprcv.h"
  32. #include "info.h"
  33. #include "tcpcfg.h"
  34. #include "route.h"
  35. #include "security.h"
  36. void *TCPProtInfo; // TCP protocol info for IP.
  37. SLIST_HEADER TCPSendReqFree; // Send req. free list.
  38. KSPIN_LOCK TCPSendReqFreeLock;
  39. KSPIN_LOCK TCPSendReqCompleteLock;
  40. uint NumTCPSendReq; // Current number of SendReqs in system.
  41. uint MaxSendReq = 0xffffffff; // Maximum allowed number of SendReqs.
  42. extern KSPIN_LOCK TCBTableLock;
  43. //
  44. // All of the init code can be discarded.
  45. //
  46. #ifdef ALLOC_PRAGMA
  47. #pragma alloc_text(INIT, InitTCPSend)
  48. #endif // ALLOC_PRAGMA
  49. extern void ResetSendNext(TCB *SeqTCB, SeqNum NewSeq);
  50. #define MIN_INITIAL_RTT 3 // In msec.
  51. //* FreeSendReq - Free a send request structure.
  52. //
  53. // Called to free a send request structure.
  54. //
  55. void // Returns: Nothing.
  56. FreeSendReq(
  57. TCPSendReq *FreedReq) // Connection request structure to be freed.
  58. {
  59. PSLIST_ENTRY BufferLink;
  60. CHECK_STRUCT(FreedReq, tsr);
  61. BufferLink = CONTAINING_RECORD(&(FreedReq->tsr_req.tr_q.q_next),
  62. SLIST_ENTRY, Next);
  63. ExInterlockedPushEntrySList(&TCPSendReqFree, BufferLink,
  64. &TCPSendReqFreeLock);
  65. }
  66. //* GetSendReq - Get a send request structure.
  67. //
  68. // Called to get a send request structure.
  69. //
  70. TCPSendReq * // Returns: Pointer to SendReq structure, or NULL if none.
  71. GetSendReq(
  72. void) // Nothing.
  73. {
  74. TCPSendReq *Temp;
  75. PSLIST_ENTRY BufferLink;
  76. Queue *QueuePtr;
  77. TCPReq *ReqPtr;
  78. BufferLink = ExInterlockedPopEntrySList(&TCPSendReqFree,
  79. &TCPSendReqFreeLock);
  80. if (BufferLink != NULL) {
  81. QueuePtr = CONTAINING_RECORD(BufferLink, Queue, q_next);
  82. ReqPtr = CONTAINING_RECORD(QueuePtr, TCPReq, tr_q);
  83. Temp = CONTAINING_RECORD(ReqPtr, TCPSendReq, tsr_req);
  84. CHECK_STRUCT(Temp, tsr);
  85. } else {
  86. if (NumTCPSendReq < MaxSendReq)
  87. Temp = ExAllocatePool(NonPagedPool, sizeof(TCPSendReq));
  88. else
  89. Temp = NULL;
  90. if (Temp != NULL) {
  91. ExInterlockedAddUlong(&NumTCPSendReq, 1, &TCPSendReqFreeLock);
  92. #if DBG
  93. Temp->tsr_req.tr_sig = tr_signature;
  94. Temp->tsr_sig = tsr_signature;
  95. #endif
  96. }
  97. }
  98. return Temp;
  99. }
  100. //* TCPHopLimit
  101. //
  102. // Given a TCB, returns the Hop Limit to use in a sent packet.
  103. //
  104. uchar
  105. TCPHopLimit(TCB *Tcb)
  106. {
  107. TCPConn *tc;
  108. int Hops;
  109. //
  110. // Save a current Hop Limit in the TCB,
  111. // so that we'll have access to it when the connected is closing
  112. // and tcb_conn is unavailable.
  113. //
  114. if ((tc = Tcb->tcb_conn) != NULL)
  115. Tcb->tcb_hops = tc->tc_ao->ao_ucast_hops;
  116. if ((Hops = Tcb->tcb_hops) != -1)
  117. return (uchar) Hops;
  118. else
  119. return (uchar) Tcb->tcb_rce->NCE->IF->CurHopLimit;
  120. }
  121. //* TCPSendComplete - Complete a TCP send.
  122. //
  123. // Called by IP when a send we've made is complete. We free the buffer,
  124. // and possibly complete some sends. Each send queued on a TCB has a ref.
  125. // count with it, which is the number of times a pointer to a buffer
  126. // associated with the send has been passed to the underlying IP layer. We
  127. // can't complete a send until that count it 0. If this send was actually
  128. // from a send of data, we'll go down the chain of send and decrement the
  129. // refcount on each one. If we have one going to 0 and the send has already
  130. // been acked we'll complete the send. If it hasn't been acked we'll leave
  131. // it until the ack comes in.
  132. //
  133. // NOTE: We aren't protecting any of this with locks. When we port this to
  134. // NT we'll need to fix this, probably with a global lock. See the comments
  135. // in ACKSend() in TCPRCV.C for more details.
  136. //
  137. void // Returns: Nothing.
  138. TCPSendComplete(
  139. PNDIS_PACKET Packet, // Packet that was sent.
  140. IP_STATUS Status)
  141. {
  142. PNDIS_BUFFER BufferChain;
  143. SendCmpltContext *SCContext;
  144. PVOID Memory;
  145. UINT Unused;
  146. UNREFERENCED_PARAMETER(Status);
  147. //
  148. // Pull values we care about out of the packet structure.
  149. //
  150. SCContext = (SendCmpltContext *) PC(Packet)->CompletionData;
  151. BufferChain = NdisFirstBuffer(Packet);
  152. NdisQueryBufferSafe(BufferChain, &Memory, &Unused, LowPagePriority);
  153. ASSERT(Memory != NULL);
  154. //
  155. // See if we have a send complete context. It will be present for data
  156. // packets and means we have extra work to do. For non-data packets, we
  157. // can just skip all this as there is only the header buffer to deal with.
  158. //
  159. if (SCContext != NULL) {
  160. KIRQL OldIrql;
  161. PNDIS_BUFFER CurrentBuffer;
  162. TCPSendReq *CurrentSend;
  163. uint i;
  164. CHECK_STRUCT(SCContext, scc);
  165. //
  166. // First buffer in chain is the TCP header buffer.
  167. // Skip over it for now.
  168. //
  169. CurrentBuffer = NDIS_BUFFER_LINKAGE(BufferChain);
  170. //
  171. // Also skip over any 'user' buffers (those loaned out to us
  172. // instead of copied) as we don't need to free them.
  173. //
  174. for (i = 0; i < (uint)SCContext->scc_ubufcount; i++) {
  175. ASSERT(CurrentBuffer != NULL);
  176. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  177. }
  178. //
  179. // Now loop through and free our (aka 'transport') buffers.
  180. // We need to do this before decrementing the reference count to avoid
  181. // destroying the buffer chain if we have to zap tsr_lastbuf->Next to
  182. // NULL.
  183. //
  184. for (i = 0; i < (uint)SCContext->scc_tbufcount; i++) {
  185. PNDIS_BUFFER TempBuffer;
  186. ASSERT(CurrentBuffer != NULL);
  187. TempBuffer = CurrentBuffer;
  188. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  189. NdisFreeBuffer(TempBuffer);
  190. }
  191. //
  192. // Loop through the send requests attached to this packet,
  193. // reducing the reference count on each and enqueing them for
  194. // completion where appropriate.
  195. //
  196. CurrentSend = SCContext->scc_firstsend;
  197. for (i = 0; i< SCContext->scc_count; i++) {
  198. Queue *TempQ;
  199. long Result;
  200. TempQ = QNEXT(&CurrentSend->tsr_req.tr_q);
  201. CHECK_STRUCT(CurrentSend, tsr);
  202. Result = InterlockedDecrement(&(CurrentSend->tsr_refcnt));
  203. ASSERT(Result >= 0);
  204. if (Result <= 0) {
  205. //
  206. // Reference count has gone to 0 which means the send has
  207. // been ACK'd or cancelled. Complete it now.
  208. //
  209. // If we've sent directly from this send, NULL out the next
  210. // pointer for the last buffer in the chain.
  211. //
  212. if (CurrentSend->tsr_lastbuf != NULL) {
  213. NDIS_BUFFER_LINKAGE(CurrentSend->tsr_lastbuf) = NULL;
  214. CurrentSend->tsr_lastbuf = NULL;
  215. }
  216. KeAcquireSpinLock(&RequestCompleteLock, &OldIrql);
  217. ENQUEUE(&SendCompleteQ, &CurrentSend->tsr_req.tr_q);
  218. RequestCompleteFlags |= SEND_REQUEST_COMPLETE;
  219. KeReleaseSpinLock(&RequestCompleteLock, OldIrql);
  220. }
  221. CurrentSend = CONTAINING_RECORD(QSTRUCT(TCPReq, TempQ, tr_q),
  222. TCPSendReq, tsr_req);
  223. }
  224. }
  225. //
  226. // Free the TCP header buffer and our packet structure proper.
  227. //
  228. NdisFreeBuffer(BufferChain);
  229. ExFreePool(Memory);
  230. NdisFreePacket(Packet);
  231. //
  232. // If there are any TCP send requests to complete, do so now.
  233. //
  234. if (RequestCompleteFlags & SEND_REQUEST_COMPLETE)
  235. TCPRcvComplete();
  236. }
  237. //* RcvWin - Figure out the receive window to offer in an ack.
  238. //
  239. // A routine to figure out what window to offer on a connection. We
  240. // take into account SWS avoidance, what the default connection window is,
  241. // and what the last window we offered is.
  242. //
  243. uint // Returns: Window to be offered.
  244. RcvWin(
  245. TCB *WinTCB) // TCB on which to perform calculations.
  246. {
  247. int CouldOffer; // The window size we could offer.
  248. CHECK_STRUCT(WinTCB, tcb);
  249. CheckPacketList(WinTCB->tcb_pendhead, WinTCB->tcb_pendingcnt);
  250. ASSERT(WinTCB->tcb_rcvwin >= 0);
  251. CouldOffer = WinTCB->tcb_defaultwin - WinTCB->tcb_pendingcnt;
  252. ASSERT(CouldOffer >= 0);
  253. ASSERT(CouldOffer >= WinTCB->tcb_rcvwin);
  254. if ((CouldOffer - WinTCB->tcb_rcvwin) >=
  255. (int) MIN(WinTCB->tcb_defaultwin/2, WinTCB->tcb_mss)) {
  256. WinTCB->tcb_rcvwin = CouldOffer;
  257. }
  258. return WinTCB->tcb_rcvwin;
  259. }
  260. //* SendSYN - Send a SYN segment.
  261. //
  262. // This is called during connection establishment time to send a SYN
  263. // segment to the peer. We get a buffer if we can, and then fill
  264. // it in. There's a tricky part here where we have to build the MSS
  265. // option in the header - we find the MSS by finding the MSS offered
  266. // by the net for the local address. After that, we send it.
  267. //
  268. void // Returns: Nothing.
  269. SendSYN(
  270. TCB *SYNTcb, // TCB from which SYN is to be sent.
  271. KIRQL PreLockIrql) // IRQL prior to acquiring TCB lock.
  272. {
  273. PNDIS_PACKET Packet;
  274. void *Memory;
  275. IPv6Header UNALIGNED *IP;
  276. TCPHeader UNALIGNED *TCP;
  277. uchar *OptPtr;
  278. IP_STATUS Status;
  279. NDIS_STATUS NdisStatus;
  280. uint Offset;
  281. uint Length;
  282. uint PayloadLength;
  283. ushort TempWin;
  284. ushort MSS;
  285. RouteCacheEntry *RCE = NULL;
  286. CHECK_STRUCT(SYNTcb, tcb);
  287. //
  288. // Go ahead and set the retransmission timer now, in case we can't get a
  289. // packet or a buffer. In the future we might want to queue the
  290. // connection for when we get resources.
  291. //
  292. START_TCB_TIMER(SYNTcb->tcb_rexmittimer, SYNTcb->tcb_rexmit);
  293. //
  294. // In most cases, we will already have a route at this point.
  295. // However, if we failed to get one earlier in the passive receive
  296. // path, we may need to retry here.
  297. //
  298. if (SYNTcb->tcb_rce == NULL) {
  299. InitRCE(SYNTcb);
  300. if (SYNTcb->tcb_rce == NULL) {
  301. goto ErrorReturn;
  302. }
  303. }
  304. SYNTcb->tcb_rce = ValidateRCE(SYNTcb->tcb_rce);
  305. //
  306. // Allocate a packet header/buffer/data region for this SYN.
  307. //
  308. // Our buffer has space at the beginning which will be filled in
  309. // later by the link level. At this level we add the IPv6Header,
  310. // TCPHeader, and TCP Maximum Segment Size option which follow.
  311. //
  312. // REVIEW: This grabs packets and buffers from the IPv6PacketPool and
  313. // REVIEW: the IPv6BufferPool respectively. Have seperate pools for TCP?
  314. //
  315. Offset = SYNTcb->tcb_rce->NCE->IF->LinkHeaderSize;
  316. Length = Offset + sizeof(*IP) + sizeof(*TCP) + MSS_OPT_SIZE;
  317. NdisStatus = IPv6AllocatePacket(Length, &Packet, &Memory);
  318. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  319. //
  320. // Upon failure, advance tcb_sendnext anyway.
  321. // We need to do this because TCBTimeout will *retreat* tcb_sendnext
  322. // if this SYN is later retransmitted, and if that retreat occurs
  323. // without this advance, we end up with a hole in the sequence-space.
  324. //
  325. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  326. "TCP SendSYN: Couldn't allocate IPv6 packet header!?!\n"));
  327. ErrorReturn:
  328. SYNTcb->tcb_sendnext++;
  329. if (SEQ_GT(SYNTcb->tcb_sendnext, SYNTcb->tcb_sendmax)) {
  330. SYNTcb->tcb_sendmax = SYNTcb->tcb_sendnext;
  331. }
  332. KeReleaseSpinLock(&SYNTcb->tcb_lock, PreLockIrql);
  333. return;
  334. }
  335. PC(Packet)->CompletionHandler = TCPSendComplete;
  336. PC(Packet)->CompletionData = NULL;
  337. //
  338. // Since this is a SYN-only packet (maybe someday we'll send data with
  339. // the SYN?) we only have the one buffer and nothing to link on after.
  340. //
  341. //
  342. // We now have all the resources we need to send.
  343. // Prepare the actual packet.
  344. //
  345. //
  346. // Our header buffer has extra space for other headers to be
  347. // prepended to ours without requiring further allocation calls.
  348. // Put the actual TCP/IP header at the end of the buffer.
  349. //
  350. IP = (IPv6Header UNALIGNED *)((uchar *)Memory + Offset);
  351. IP->VersClassFlow = IP_VERSION;
  352. IP->NextHeader = IP_PROTOCOL_TCP;
  353. IP->HopLimit = TCPHopLimit(SYNTcb);
  354. IP->Source = SYNTcb->tcb_saddr;
  355. IP->Dest = SYNTcb->tcb_daddr;
  356. TCP = (TCPHeader UNALIGNED *)(IP + 1);
  357. TCP->tcp_src = SYNTcb->tcb_sport;
  358. TCP->tcp_dest = SYNTcb->tcb_dport;
  359. TCP->tcp_seq = net_long(SYNTcb->tcb_sendnext);
  360. //
  361. // The SYN flag takes up one element in sequence number space.
  362. // Record that we've sent it here (if we need to retransmit the SYN
  363. // segment, TCBTimeout will reset sendnext before calling us again).
  364. //
  365. SYNTcb->tcb_sendnext++;
  366. if (SEQ_GT(SYNTcb->tcb_sendnext, SYNTcb->tcb_sendmax)) {
  367. TStats.ts_outsegs++;
  368. SYNTcb->tcb_sendmax = SYNTcb->tcb_sendnext;
  369. } else
  370. TStats.ts_retranssegs++;
  371. TCP->tcp_ack = net_long(SYNTcb->tcb_rcvnext);
  372. //
  373. // REVIEW: TCP flags are entirely based upon our state, so this could
  374. // REVIEW: be replaced by a (quicker) array lookup.
  375. //
  376. if (SYNTcb->tcb_state == TCB_SYN_RCVD)
  377. TCP->tcp_flags = MAKE_TCP_FLAGS(6, TCP_FLAG_SYN | TCP_FLAG_ACK);
  378. else
  379. TCP->tcp_flags = MAKE_TCP_FLAGS(6, TCP_FLAG_SYN);
  380. TempWin = (ushort)SYNTcb->tcb_rcvwin;
  381. TCP->tcp_window = net_short(TempWin);
  382. TCP->tcp_xsum = 0;
  383. OptPtr = (uchar *)(TCP + 1);
  384. //
  385. // Compose the Maximum Segment Size option.
  386. //
  387. // TBD: If we add IPv6 Jumbogram support, we should also add LFN
  388. // TBD: support to TCP and change this to handle a larger MSS.
  389. //
  390. MSS = SYNTcb->tcb_rce->NTE->IF->LinkMTU
  391. - sizeof(IPv6Header) - sizeof(TCPHeader);
  392. IF_TCPDBG(TCP_DEBUG_MSS) {
  393. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
  394. "SendSYN: Sending MSS option value of %d\n", MSS));
  395. }
  396. *OptPtr++ = TCP_OPT_MSS;
  397. *OptPtr++ = MSS_OPT_SIZE;
  398. **(ushort **)&OptPtr = net_short(MSS);
  399. PayloadLength = sizeof(TCPHeader) + MSS_OPT_SIZE;
  400. //
  401. // Compute the TCP checksum. It covers the entire TCP segment
  402. // starting with the TCP header, plus the IPv6 pseudo-header.
  403. //
  404. // REVIEW: The IPv4 implementation kept the IPv4 psuedo-header around
  405. // REVIEW: in the TCB rather than recalculate it every time. Do this?
  406. //
  407. TCP->tcp_xsum = 0;
  408. TCP->tcp_xsum = ChecksumPacket(
  409. Packet, Offset + sizeof *IP, NULL, PayloadLength,
  410. AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
  411. ASSERT(TCP->tcp_xsum != 0);
  412. //
  413. // Everything's ready. Now send the packet.
  414. //
  415. // Note that IPv6Send does not return a status code.
  416. // Instead it *always* completes the packet
  417. // with an appropriate status code.
  418. //
  419. // We free the lock on the TCB across the send call, but hold a
  420. // reference to it so it doesn't vanish out from under us.
  421. //
  422. SYNTcb->tcb_refcnt++;
  423. KeReleaseSpinLock(&SYNTcb->tcb_lock, PreLockIrql);
  424. IPv6Send(Packet, Offset, IP, PayloadLength, SYNTcb->tcb_rce, 0,
  425. IP_PROTOCOL_TCP,
  426. net_short(TCP->tcp_src),
  427. net_short(TCP->tcp_dest));
  428. //
  429. // Release the TCB.
  430. //
  431. KeAcquireSpinLock(&SYNTcb->tcb_lock, &PreLockIrql);
  432. //
  433. // If SynAttackProtect is on, release RCE.
  434. // This prevents RCE list from growing at
  435. // synattack rate.
  436. //
  437. if (SynAttackProtect && (SYNTcb->tcb_flags & ACCEPT_PENDING)) {
  438. RCE = SYNTcb->tcb_rce;
  439. SYNTcb->tcb_rce = NULL;
  440. }
  441. DerefTCB(SYNTcb, PreLockIrql);
  442. if (RCE) {
  443. ReleaseRCE(RCE);
  444. }
  445. }
  446. //* SendKA - Send a keep alive segment.
  447. //
  448. // This is called when we want to send a keep-alive. The idea is to provoke
  449. // a response from our peer on an otherwise idle connection. We send a
  450. // garbage byte of data in our keep-alives in order to cooperate with broken
  451. // TCP implementations that don't respond to segments outside the window
  452. // unless they contain data.
  453. //
  454. void // Returns: Nothing.
  455. SendKA(
  456. TCB *KATcb, // TCB from which keep alive is to be sent.
  457. KIRQL PreLockIrql) // IRQL prior to acquiring lock on TCB.
  458. {
  459. PNDIS_PACKET Packet;
  460. void *Memory;
  461. IPv6Header UNALIGNED *IP;
  462. TCPHeader UNALIGNED *TCP;
  463. NDIS_STATUS NdisStatus;
  464. int Offset;
  465. uint Length;
  466. uint PayloadLength;
  467. ushort TempWin;
  468. SeqNum TempSeq;
  469. CHECK_STRUCT(KATcb, tcb);
  470. //
  471. // In most cases, we will already have a route at this point.
  472. // However, if we failed to get one earlier in the passive receive
  473. // path, we may need to retry here.
  474. //
  475. if (KATcb->tcb_rce == NULL) {
  476. InitRCE(KATcb);
  477. if (KATcb->tcb_rce == NULL) {
  478. KeReleaseSpinLock(&KATcb->tcb_lock, PreLockIrql);
  479. return;
  480. }
  481. }
  482. KATcb->tcb_rce = ValidateRCE(KATcb->tcb_rce);
  483. //
  484. // Allocate a packet header/buffer/data region for this keepalive packet.
  485. //
  486. // Our buffer has space at the beginning which will be filled in
  487. // later by the link level. At this level we add the IPv6Header,
  488. // TCPHeader, and a single byte of data which follow.
  489. //
  490. // REVIEW: This grabs packets and buffers from the IPv6PacketPool and
  491. // REVIEW: the IPv6BufferPool respectively. Have seperate pools for TCP?
  492. //
  493. Offset = KATcb->tcb_rce->NCE->IF->LinkHeaderSize;
  494. Length = Offset + sizeof(*IP) + sizeof(*TCP) + 1;
  495. NdisStatus = IPv6AllocatePacket(Length, &Packet, &Memory);
  496. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  497. //
  498. // REVIEW: What to do if this fails.
  499. //
  500. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  501. "TCP SendKA: Couldn't allocate IPv6 packet header!?!\n"));
  502. KeReleaseSpinLock(&KATcb->tcb_lock, PreLockIrql);
  503. return;
  504. }
  505. PC(Packet)->CompletionHandler = TCPSendComplete;
  506. PC(Packet)->CompletionData = NULL;
  507. //
  508. // Since this is a keepalive packet we only have the one buffer and
  509. // nothing to link on after.
  510. //
  511. //
  512. // Our header buffer has extra space for other headers to be
  513. // prepended to ours without requiring further allocation calls.
  514. // Put the actual TCP/IP header at the end of the buffer.
  515. //
  516. IP = (IPv6Header UNALIGNED *)((uchar *)Memory + Offset);
  517. IP->VersClassFlow = IP_VERSION;
  518. IP->NextHeader = IP_PROTOCOL_TCP;
  519. IP->HopLimit = TCPHopLimit(KATcb);
  520. IP->Source = KATcb->tcb_saddr;
  521. IP->Dest = KATcb->tcb_daddr;
  522. TCP = (TCPHeader UNALIGNED *)(IP + 1);
  523. TCP->tcp_src = KATcb->tcb_sport;
  524. TCP->tcp_dest = KATcb->tcb_dport;
  525. TempSeq = KATcb->tcb_senduna - 1;
  526. TCP->tcp_seq = net_long(TempSeq);
  527. TCP->tcp_ack = net_long(KATcb->tcb_rcvnext);
  528. TCP->tcp_flags = MAKE_TCP_FLAGS(5, TCP_FLAG_ACK);
  529. TempWin = (ushort)RcvWin(KATcb);
  530. TCP->tcp_window = net_short(TempWin);
  531. TStats.ts_retranssegs++;
  532. PayloadLength = sizeof(TCPHeader) + 1;
  533. //
  534. // Compute the TCP checksum. It covers the entire TCP segment
  535. // starting with the TCP header, plus the IPv6 pseudo-header.
  536. //
  537. TCP->tcp_xsum = 0;
  538. TCP->tcp_xsum = ChecksumPacket(
  539. Packet, Offset + sizeof *IP, NULL, PayloadLength,
  540. AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
  541. ASSERT(TCP->tcp_xsum != 0);
  542. //
  543. // Everything's ready. Now send the packet.
  544. //
  545. // Note that IPv6Send does not return a status code.
  546. // Instead it *always* completes the packet
  547. // with an appropriate status code.
  548. //
  549. KATcb->tcb_kacount++;
  550. KeReleaseSpinLock(&KATcb->tcb_lock, PreLockIrql);
  551. IPv6Send(Packet, Offset, IP, PayloadLength, KATcb->tcb_rce, 0,
  552. IP_PROTOCOL_TCP,
  553. net_short(TCP->tcp_src),
  554. net_short(TCP->tcp_dest));
  555. }
  556. //* SendACK - Send an ACK segment.
  557. //
  558. // This is called whenever we need to send an ACK for some reason. Nothing
  559. // fancy, we just do it.
  560. //
  561. void // Returns: Nothing.
  562. SendACK(
  563. TCB *ACKTcb) // TCB from which ACK is to be sent.
  564. {
  565. PNDIS_PACKET Packet;
  566. void *Memory;
  567. IPv6Header UNALIGNED *IP;
  568. TCPHeader UNALIGNED *TCP;
  569. NDIS_STATUS NdisStatus;
  570. KIRQL OldIrql;
  571. int Offset;
  572. uint Length;
  573. uint PayloadLength;
  574. SeqNum SendNext;
  575. ushort TempWin;
  576. RouteCacheEntry *RCE = NULL;
  577. BOOLEAN Release = FALSE;
  578. CHECK_STRUCT(ACKTcb, tcb);
  579. KeAcquireSpinLock(&ACKTcb->tcb_lock, &OldIrql);
  580. //
  581. // In most cases, we will already have a route at this point.
  582. // However, if we failed to get one earlier in the passive receive
  583. // path, we may need to retry here.
  584. //
  585. if (ACKTcb->tcb_rce == NULL) {
  586. InitRCE(ACKTcb);
  587. if (ACKTcb->tcb_rce == NULL) {
  588. KeReleaseSpinLock(&ACKTcb->tcb_lock, OldIrql);
  589. return;
  590. }
  591. }
  592. ACKTcb->tcb_rce = ValidateRCE(ACKTcb->tcb_rce);
  593. //
  594. // Allocate a packet header/buffer/data region for this ACK packet.
  595. //
  596. // Our buffer has space at the beginning which will be filled in
  597. // later by the link level. At this level we add the IPv6Header
  598. // and the TCPHeader.
  599. //
  600. // REVIEW: This grabs packets and buffers from the IPv6PacketPool and
  601. // REVIEW: the IPv6BufferPool respectively. Have seperate pools for TCP?
  602. //
  603. Offset = ACKTcb->tcb_rce->NCE->IF->LinkHeaderSize;
  604. Length = Offset + sizeof(*IP) + sizeof(*TCP);
  605. NdisStatus = IPv6AllocatePacket(Length, &Packet, &Memory);
  606. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  607. KeReleaseSpinLock(&ACKTcb->tcb_lock, OldIrql);
  608. //
  609. // REVIEW: What to do if this fails.
  610. //
  611. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  612. "TCP SendACK: Couldn't allocate IPv6 packet header!?!\n"));
  613. return;
  614. }
  615. PC(Packet)->CompletionHandler = TCPSendComplete;
  616. PC(Packet)->CompletionData = NULL;
  617. //
  618. // Our header buffer has extra space for other headers to be
  619. // prepended to ours without requiring further allocation calls.
  620. // Put the actual TCP/IP header at the end of the buffer.
  621. //
  622. IP = (IPv6Header UNALIGNED *)((uchar *)Memory + Offset);
  623. IP->VersClassFlow = IP_VERSION;
  624. IP->NextHeader = IP_PROTOCOL_TCP;
  625. IP->HopLimit = TCPHopLimit(ACKTcb);
  626. IP->Source = ACKTcb->tcb_saddr;
  627. IP->Dest = ACKTcb->tcb_daddr;
  628. TCP = (TCPHeader UNALIGNED *)(IP + 1);
  629. TCP->tcp_src = ACKTcb->tcb_sport;
  630. TCP->tcp_dest = ACKTcb->tcb_dport;
  631. TCP->tcp_ack = net_long(ACKTcb->tcb_rcvnext);
  632. //
  633. // If the remote peer is advertising a window of zero, we need to send
  634. // this ack with a sequence number of his rcv_next (which in that case
  635. // should be our senduna). We have code here ifdef'd out that makes
  636. // sure that we don't send outside the RWE, but this doesn't work. We
  637. // need to be able to send a pure ACK exactly at the RWE.
  638. //
  639. if (ACKTcb->tcb_sendwin != 0) {
  640. SeqNum MaxValidSeq;
  641. SendNext = ACKTcb->tcb_sendnext;
  642. #if 0
  643. MaxValidSeq = ACKTcb->tcb_senduna + ACKTcb->tcb_sendwin - 1;
  644. SendNext = (SEQ_LT(SendNext, MaxValidSeq) ? SendNext : MaxValidSeq);
  645. #endif
  646. } else
  647. SendNext = ACKTcb->tcb_senduna;
  648. if ((ACKTcb->tcb_flags & FIN_SENT) &&
  649. SEQ_EQ(SendNext, ACKTcb->tcb_sendmax - 1)) {
  650. TCP->tcp_flags = MAKE_TCP_FLAGS(5, TCP_FLAG_FIN | TCP_FLAG_ACK);
  651. } else
  652. TCP->tcp_flags = MAKE_TCP_FLAGS(5, TCP_FLAG_ACK);
  653. TCP->tcp_seq = net_long(SendNext);
  654. TempWin = (ushort)RcvWin(ACKTcb);
  655. TCP->tcp_window = net_short(TempWin);
  656. PayloadLength = sizeof(*TCP);
  657. //
  658. // Compute the TCP checksum. It covers the entire TCP segment
  659. // starting with the TCP header, plus the IPv6 pseudo-header.
  660. //
  661. TCP->tcp_xsum = 0;
  662. TCP->tcp_xsum = ChecksumPacket(
  663. Packet, Offset + sizeof *IP, NULL, PayloadLength,
  664. AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
  665. ASSERT(TCP->tcp_xsum != 0);
  666. STOP_TCB_TIMER(ACKTcb->tcb_delacktimer);
  667. ACKTcb->tcb_flags &= ~(NEED_ACK | ACK_DELAYED);
  668. TStats.ts_outsegs++;
  669. //
  670. // If SynAttackProtect is on, release RCE.
  671. // This prevents RCE list from growing at
  672. // synattack rate.
  673. //
  674. RCE = ACKTcb->tcb_rce;
  675. if (SynAttackProtect && (ACKTcb->tcb_flags & ACCEPT_PENDING)) {
  676. ACKTcb->tcb_rce = NULL;
  677. Release = TRUE;
  678. }
  679. //
  680. // Everything's ready. Now send the packet.
  681. //
  682. // Note that IPv6Send does not return a status code.
  683. // Instead it *always* completes the packet
  684. // with an appropriate status code.
  685. //
  686. KeReleaseSpinLock(&ACKTcb->tcb_lock, OldIrql);
  687. IPv6Send(Packet, Offset, IP, PayloadLength, RCE , 0,
  688. IP_PROTOCOL_TCP,
  689. net_short(TCP->tcp_src),
  690. net_short(TCP->tcp_dest));
  691. if (Release) {
  692. ReleaseRCE(RCE);
  693. }
  694. }
  695. //* SendRSTFromTCB - Send a RST from a TCB.
  696. //
  697. // This is called during close when we need to send a RST.
  698. //
  699. void // Returns: Nothing.
  700. SendRSTFromTCB(
  701. TCB *RSTTcb) // TCB from which RST is to be sent.
  702. {
  703. PNDIS_PACKET Packet;
  704. void *Memory;
  705. IPv6Header UNALIGNED *IP;
  706. TCPHeader UNALIGNED *TCP;
  707. NDIS_STATUS NdisStatus;
  708. int Offset;
  709. uint Length;
  710. uint PayloadLength;
  711. SeqNum RSTSeq;
  712. CHECK_STRUCT(RSTTcb, tcb);
  713. ASSERT(RSTTcb->tcb_state == TCB_CLOSED);
  714. //
  715. // In most cases, we will already have a route at this point.
  716. // However, if we failed to get one earlier in the passive receive
  717. // path, we may need to retry here.
  718. //
  719. if (RSTTcb->tcb_rce == NULL) {
  720. InitRCE(RSTTcb);
  721. if (RSTTcb->tcb_rce == NULL) {
  722. return;
  723. }
  724. }
  725. RSTTcb->tcb_rce = ValidateRCE(RSTTcb->tcb_rce);
  726. //
  727. // Allocate a packet header/buffer/data region for this RST packet.
  728. //
  729. // Our buffer has space at the beginning which will be filled in
  730. // later by the link level. At this level we add the IPv6Header
  731. // and the TCPHeader.
  732. //
  733. // REVIEW: This grabs packets and buffers from the IPv6PacketPool and
  734. // REVIEW: the IPv6BufferPool respectively. Have seperate pools for TCP?
  735. //
  736. Offset = RSTTcb->tcb_rce->NCE->IF->LinkHeaderSize;
  737. Length = Offset + sizeof(*IP) + sizeof(*TCP);
  738. NdisStatus = IPv6AllocatePacket(Length, &Packet, &Memory);
  739. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  740. //
  741. // REVIEW: What to do if this fails.
  742. //
  743. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  744. "TCP SendRSTFromTCB: Couldn't alloc IPv6 packet header!\n"));
  745. return;
  746. }
  747. PC(Packet)->CompletionHandler = TCPSendComplete;
  748. PC(Packet)->CompletionData = NULL;
  749. //
  750. // Since this is an RST-only packet we only have the one buffer and
  751. // nothing to link on after.
  752. //
  753. //
  754. // Our header buffer has extra space for other headers to be
  755. // prepended to ours without requiring further allocation calls.
  756. // Put the actual TCP/IP header at the end of the buffer.
  757. //
  758. IP = (IPv6Header UNALIGNED *)((uchar *)Memory + Offset);
  759. IP->VersClassFlow = IP_VERSION;
  760. IP->NextHeader = IP_PROTOCOL_TCP;
  761. IP->HopLimit = TCPHopLimit(RSTTcb);
  762. IP->Source = RSTTcb->tcb_saddr;
  763. IP->Dest = RSTTcb->tcb_daddr;
  764. TCP = (TCPHeader UNALIGNED *)(IP + 1);
  765. TCP->tcp_src = RSTTcb->tcb_sport;
  766. TCP->tcp_dest = RSTTcb->tcb_dport;
  767. //
  768. // If the remote peer has a window of 0, send with a seq. # equal
  769. // to senduna so he'll accept it. Otherwise send with send max.
  770. //
  771. if (RSTTcb->tcb_sendwin != 0)
  772. RSTSeq = RSTTcb->tcb_sendmax;
  773. else
  774. RSTSeq = RSTTcb->tcb_senduna;
  775. TCP->tcp_seq = net_long(RSTSeq);
  776. TCP->tcp_flags = MAKE_TCP_FLAGS(5, TCP_FLAG_RST);
  777. TCP->tcp_window = 0;
  778. PayloadLength = sizeof(*TCP);
  779. //
  780. // Compute the TCP checksum. It covers the entire TCP segment
  781. // starting with the TCP header, plus the IPv6 pseudo-header.
  782. //
  783. TCP->tcp_xsum = 0;
  784. TCP->tcp_xsum = ChecksumPacket(
  785. Packet, Offset + sizeof *IP, NULL, PayloadLength,
  786. AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
  787. ASSERT(TCP->tcp_xsum != 0);
  788. TStats.ts_outsegs++;
  789. TStats.ts_outrsts++;
  790. //
  791. // Everything's ready. Now send the packet.
  792. //
  793. // Note that IPv6Send does not return a status code.
  794. // Instead it *always* completes the packet
  795. // with an appropriate status code.
  796. //
  797. IPv6Send(Packet, Offset, IP, PayloadLength, RSTTcb->tcb_rce, 0,
  798. IP_PROTOCOL_TCP,
  799. net_short(TCP->tcp_src),
  800. net_short(TCP->tcp_dest));
  801. }
  802. //* SendRSTFromHeader - Send a RST back, based on a header.
  803. //
  804. // Called when we need to send a RST, but don't necessarily have a TCB.
  805. //
  806. void // Returns: Nothing.
  807. SendRSTFromHeader(
  808. TCPHeader UNALIGNED *RecvTCP, // TCP header to be RST.
  809. uint Length, // Length of the incoming segment.
  810. IPv6Addr *Dest, // Destination IP address for RST.
  811. uint DestScopeId, // Scope id for destination address.
  812. IPv6Addr *Src, // Source IP address for RST.
  813. uint SrcScopeId) // Scope id for source address.
  814. {
  815. PNDIS_PACKET Packet;
  816. void *Memory;
  817. IPv6Header UNALIGNED *IP;
  818. TCPHeader UNALIGNED *SendTCP;
  819. NetTableEntry *NTE;
  820. RouteCacheEntry *RCE;
  821. IP_STATUS Status;
  822. NDIS_STATUS NdisStatus;
  823. uint Offset;
  824. uint SendLength;
  825. uint PayloadLength;
  826. //
  827. // Never send a RST in response to a RST.
  828. //
  829. if (RecvTCP->tcp_flags & TCP_FLAG_RST)
  830. return;
  831. //
  832. // Determine NTE to send on based on incoming packet's destination.
  833. // REVIEW: Alternatively, we could/should just pass the NTE in.
  834. //
  835. NTE = FindNetworkWithAddress(Src, SrcScopeId);
  836. if (NTE == NULL) {
  837. //
  838. // This should never happen. The NTE would have to have gone away
  839. // between accepting the packet and getting here, and the incoming
  840. // packet's Packet structure already holds a reference to it.
  841. //
  842. ASSERTMSG("TCP SendRSTFromHeader: Bad source address!?!\n", FALSE);
  843. return;
  844. }
  845. //
  846. // Get the route to the destination (incoming packet's source).
  847. //
  848. Status = RouteToDestination(Dest, DestScopeId, CastFromNTE(NTE),
  849. RTD_FLAG_NORMAL, &RCE);
  850. if (Status != IP_SUCCESS) {
  851. //
  852. // Failed to get a route to the destination. Error out.
  853. //
  854. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INTERNAL_ERROR,
  855. "TCP SendRSTFromHeader: Can't get a route?!?\n"));
  856. ReleaseNTE(NTE);
  857. return;
  858. }
  859. //
  860. // Allocate a packet header/buffer/data region for this RST packet.
  861. //
  862. // Our buffer has space at the beginning which will be filled in
  863. // later by the link level. At this level we add the IPv6Header
  864. // and the TCPHeader.
  865. //
  866. // REVIEW: This grabs packets and buffers from the IPv6PacketPool and
  867. // REVIEW: the IPv6BufferPool respectively. Have seperate pools for TCP?
  868. //
  869. Offset = RCE->NCE->IF->LinkHeaderSize;
  870. SendLength = Offset + sizeof(*IP) + sizeof(*SendTCP);
  871. NdisStatus = IPv6AllocatePacket(SendLength, &Packet, &Memory);
  872. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  873. //
  874. // Failed to allocate a packet header/buffer/data region. Error out.
  875. //
  876. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  877. "TCP SendRSTFromHeader: Couldn't alloc IPv6 pkt header!\n"));
  878. ReleaseRCE(RCE);
  879. ReleaseNTE(NTE);
  880. return;
  881. }
  882. PC(Packet)->CompletionHandler = TCPSendComplete;
  883. PC(Packet)->CompletionData = NULL;
  884. //
  885. // We now have all the resources we need to send. Since this is a
  886. // RST-only packet we only have the one header buffer and nothing
  887. // to link on after.
  888. //
  889. //
  890. // Our header buffer has extra space for other headers to be
  891. // prepended to ours without requiring further allocation calls.
  892. // Put the actual TCP/IP header at the end of the buffer.
  893. //
  894. IP = (IPv6Header UNALIGNED *)((uchar *)Memory + Offset);
  895. IP->VersClassFlow = IP_VERSION;
  896. IP->NextHeader = IP_PROTOCOL_TCP;
  897. IP->HopLimit = (uchar)RCE->NCE->IF->CurHopLimit;
  898. IP->Source = *Src;
  899. IP->Dest = *Dest;
  900. //
  901. // Fill in the header so as to make it believable to our peer, and send it.
  902. //
  903. SendTCP = (TCPHeader UNALIGNED *)(IP + 1);
  904. if (RecvTCP->tcp_flags & TCP_FLAG_SYN)
  905. Length++;
  906. if (RecvTCP->tcp_flags & TCP_FLAG_FIN)
  907. Length++;
  908. if (RecvTCP->tcp_flags & TCP_FLAG_ACK) {
  909. SendTCP->tcp_seq = RecvTCP->tcp_ack;
  910. SendTCP->tcp_flags = MAKE_TCP_FLAGS(sizeof(TCPHeader)/sizeof(ulong),
  911. TCP_FLAG_RST);
  912. } else {
  913. SeqNum TempSeq;
  914. SendTCP->tcp_seq = 0;
  915. TempSeq = net_long(RecvTCP->tcp_seq);
  916. TempSeq += Length;
  917. SendTCP->tcp_ack = net_long(TempSeq);
  918. SendTCP->tcp_flags = MAKE_TCP_FLAGS(sizeof(TCPHeader)/sizeof(ulong),
  919. TCP_FLAG_RST | TCP_FLAG_ACK);
  920. }
  921. SendTCP->tcp_window = 0;
  922. SendTCP->tcp_dest = RecvTCP->tcp_src;
  923. SendTCP->tcp_src = RecvTCP->tcp_dest;
  924. PayloadLength = sizeof(*SendTCP);
  925. //
  926. // Compute the TCP checksum. It covers the entire TCP segment
  927. // starting with the TCP header, plus the IPv6 pseudo-header.
  928. //
  929. SendTCP->tcp_xsum = 0;
  930. SendTCP->tcp_xsum = ChecksumPacket(
  931. Packet, Offset + sizeof *IP, NULL, PayloadLength,
  932. AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
  933. ASSERT(SendTCP->tcp_xsum != 0);
  934. TStats.ts_outsegs++;
  935. TStats.ts_outrsts++;
  936. //
  937. // Everything's ready. Now send the packet.
  938. //
  939. // Note that IPv6Send does not return a status code.
  940. // Instead it *always* completes the packet
  941. // with an appropriate status code.
  942. //
  943. IPv6Send(Packet, Offset, IP, PayloadLength, RCE, 0,
  944. IP_PROTOCOL_TCP,
  945. net_short(SendTCP->tcp_src),
  946. net_short(SendTCP->tcp_dest));
  947. //
  948. // Release the Route and the NTE.
  949. //
  950. ReleaseRCE(RCE);
  951. ReleaseNTE(NTE);
  952. } // end of SendRSTFromHeader()
  953. //* GoToEstab - Transition to the established state.
  954. //
  955. // Called when we are going to the established state and need to finish up
  956. // initializing things that couldn't be done until now. We assume the TCB
  957. // lock is held by the caller on the TCB we're called with.
  958. //
  959. void // Returns: Nothing.
  960. GoToEstab(
  961. TCB *EstabTCB) // TCB to transition.
  962. {
  963. //
  964. // Initialize our slow start and congestion control variables.
  965. //
  966. EstabTCB->tcb_cwin = 2 * EstabTCB->tcb_mss;
  967. EstabTCB->tcb_ssthresh = 0xffffffff;
  968. EstabTCB->tcb_state = TCB_ESTAB;
  969. //
  970. // We're in established. We'll subtract one from slow count for this fact,
  971. // and if the slowcount goes to 0 we'll move onto the fast path.
  972. //
  973. if (--(EstabTCB->tcb_slowcount) == 0)
  974. EstabTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  975. TStats.ts_currestab++;
  976. EstabTCB->tcb_flags &= ~ACTIVE_OPEN; // Turn off the active opening flag.
  977. }
  978. //* InitSendState - Initialize the send state of a connection.
  979. //
  980. // Called during connection establishment to initialize our send state.
  981. // (In this case, this refers to all information we'll put on the wire as
  982. // well as pure send state). We pick an ISS, set up a rexmit timer value,
  983. // etc. We assume the tcb_lock is held on the TCB when we are called.
  984. //
  985. void // Returns: Nothing.
  986. InitSendState(
  987. TCB *NewTCB) // TCB to be set up.
  988. {
  989. uint InitialRTT;
  990. CHECK_STRUCT(NewTCB, tcb);
  991. if (NewTCB->tcb_sendnext == 0) {
  992. GetRandomISN(&NewTCB->tcb_sendnext, (uchar*)&NewTCB->tcb_md5data);
  993. }
  994. NewTCB->tcb_senduna = NewTCB->tcb_sendnext;
  995. NewTCB->tcb_sendmax = NewTCB->tcb_sendnext;
  996. NewTCB->tcb_error = IP_SUCCESS;
  997. //
  998. // Initialize pseudo-header xsum.
  999. //
  1000. NewTCB->tcb_phxsum = PHXSUM(NewTCB->tcb_saddr, NewTCB->tcb_daddr,
  1001. IP_PROTOCOL_TCP, 0);
  1002. //
  1003. // Initialize retransmit and delayed ack stuff.
  1004. //
  1005. NewTCB->tcb_rexmitcnt = 0;
  1006. NewTCB->tcb_rtt = 0;
  1007. NewTCB->tcb_smrtt = 0;
  1008. //
  1009. // Check for interface specific initial RTT.
  1010. // This can be as low as 3ms.
  1011. //
  1012. if ((InitialRTT = GetInitialRTTFromRCE(NewTCB->tcb_rce)) >
  1013. MIN_INITIAL_RTT) {
  1014. NewTCB->tcb_delta = MS_TO_TICKS(InitialRTT * 2);
  1015. NewTCB->tcb_rexmit = MS_TO_TICKS(InitialRTT);
  1016. } else {
  1017. NewTCB->tcb_delta = MS_TO_TICKS(6000);
  1018. NewTCB->tcb_rexmit = MS_TO_TICKS(3000);
  1019. }
  1020. STOP_TCB_TIMER(NewTCB->tcb_rexmittimer);
  1021. STOP_TCB_TIMER(NewTCB->tcb_delacktimer);
  1022. }
  1023. //* FillTCPHeader - Fill the TCP header in.
  1024. //
  1025. // A utility routine to fill in the TCP header.
  1026. //
  1027. void // Returns: Nothing.
  1028. FillTCPHeader(
  1029. TCB *SendTCB, // TCB to fill from.
  1030. TCPHeader UNALIGNED *Header) // Header to fill into.
  1031. {
  1032. ushort S;
  1033. ulong L;
  1034. Header->tcp_src = SendTCB->tcb_sport;
  1035. Header->tcp_dest = SendTCB->tcb_dport;
  1036. L = SendTCB->tcb_sendnext;
  1037. Header->tcp_seq = net_long(L);
  1038. L = SendTCB->tcb_rcvnext;
  1039. Header->tcp_ack = net_long(L);
  1040. Header->tcp_flags = 0x1050;
  1041. *(ulong UNALIGNED *)&Header->tcp_xsum = 0;
  1042. S = (ushort)RcvWin(SendTCB);
  1043. Header->tcp_window = net_short(S);
  1044. }
  1045. //* TCPSend - Send data from a TCP connection.
  1046. //
  1047. // This is the main 'send data' routine. We go into a loop, trying
  1048. // to send data until we can't for some reason. First we compute
  1049. // the useable window, use it to figure the amount we could send. If
  1050. // the amount we could send meets certain criteria we'll build a frame
  1051. // and send it, after setting any appropriate control bits. We assume
  1052. // the caller has put a reference on the TCB.
  1053. //
  1054. void // Returns: Nothing.
  1055. TCPSend(
  1056. TCB *SendTCB, // TCB to be sent from.
  1057. KIRQL PreLockIrql) // IRQL prior to acquiring TCB lock.
  1058. {
  1059. int SendWin; // Useable send window.
  1060. uint AmountToSend; // Amount to send this time.
  1061. uint AmountLeft;
  1062. IPv6Header UNALIGNED *IP;
  1063. TCPHeader UNALIGNED *TCP;
  1064. PNDIS_PACKET Packet;
  1065. PNDIS_BUFFER FirstBuffer, CurrentBuffer;
  1066. void *Memory;
  1067. TCPSendReq *CurSend;
  1068. SendCmpltContext *SCC;
  1069. SeqNum OldSeq;
  1070. IP_STATUS SendStatus;
  1071. NDIS_STATUS NdisStatus;
  1072. uint AmtOutstanding, AmtUnsent;
  1073. int ForceWin; // Window we're forced to use.
  1074. uint HeaderLength;
  1075. uint LinkOffset;
  1076. uint PMTU;
  1077. CHECK_STRUCT(SendTCB, tcb);
  1078. ASSERT(SendTCB->tcb_refcnt != 0);
  1079. ASSERT(*(int *)&SendTCB->tcb_sendwin >= 0);
  1080. ASSERT(*(int *)&SendTCB->tcb_cwin >= SendTCB->tcb_mss);
  1081. ASSERT(!(SendTCB->tcb_flags & FIN_OUTSTANDING) ||
  1082. (SendTCB->tcb_sendnext == SendTCB->tcb_sendmax));
  1083. //
  1084. // In most cases, we will already have a route at this point.
  1085. // However, if we failed to get one earlier in the passive receive
  1086. // path, we may need to retry here.
  1087. //
  1088. if (SendTCB->tcb_rce == NULL) {
  1089. InitRCE(SendTCB);
  1090. if (SendTCB->tcb_rce == NULL) {
  1091. goto bail;
  1092. }
  1093. }
  1094. //
  1095. // See if we should even be here. If another instance of ourselves is
  1096. // already in this code, or is about to enter it after completing a
  1097. // receive, then just skip on out.
  1098. //
  1099. if ((SendTCB->tcb_flags & IN_TCP_SEND) ||
  1100. (SendTCB->tcb_fastchk & TCP_FLAG_IN_RCV)) {
  1101. SendTCB->tcb_flags |= SEND_AFTER_RCV;
  1102. goto bail;
  1103. }
  1104. SendTCB->tcb_flags |= IN_TCP_SEND;
  1105. //
  1106. // Verify that our cached RCE is still valid.
  1107. //
  1108. SendTCB->tcb_rce = ValidateRCE(SendTCB->tcb_rce);
  1109. if (IsDisconnectedAndNotLoopbackRCE(SendTCB->tcb_rce)) {
  1110. //
  1111. // Fail existing send requests for TCBs with a disconnected
  1112. // outgoing interface, except when a loopback route is used.
  1113. //
  1114. ASSERT(SendTCB->tcb_refcnt != 0);
  1115. SendTCB->tcb_refcnt--;
  1116. TryToCloseTCB(SendTCB, TCB_CLOSE_ABORTED, PreLockIrql);
  1117. return;
  1118. }
  1119. //
  1120. // Verify that our cached Path MTU is still valid.
  1121. // Watch for changes to IPsec policies since they can also effect our MSS.
  1122. // REVIEW: This the best spot to do this?
  1123. //
  1124. PMTU = GetEffectivePathMTUFromRCE(SendTCB->tcb_rce);
  1125. if (PMTU != SendTCB->tcb_pmtu ||
  1126. SecurityStateValidationCounter != SendTCB->tcb_security) {
  1127. //
  1128. // Either our Path MTU or the global security state has changed.
  1129. // Cache current values and then calculate a new MSS.
  1130. //
  1131. SendTCB->tcb_pmtu = PMTU;
  1132. SendTCB->tcb_security = SecurityStateValidationCounter;
  1133. CalculateMSSForTCB(SendTCB);
  1134. }
  1135. //
  1136. // We'll continue this loop until we send a FIN, or we break out
  1137. // internally for some other reason.
  1138. //
  1139. while (!(SendTCB->tcb_flags & FIN_OUTSTANDING)) {
  1140. CheckTCBSends(SendTCB);
  1141. AmtOutstanding = (uint)(SendTCB->tcb_sendnext - SendTCB->tcb_senduna);
  1142. AmtUnsent = SendTCB->tcb_unacked - AmtOutstanding;
  1143. ASSERT(*(int *)&AmtUnsent >= 0);
  1144. SendWin = (int)(MIN(SendTCB->tcb_sendwin, SendTCB->tcb_cwin) -
  1145. AmtOutstanding);
  1146. //
  1147. // If this send is after a fast recovery and sendwin is zero because
  1148. // of amount outstanding, then at least force 1 segment to prevent
  1149. // delayed ack timeouts from peer.
  1150. //
  1151. if (SendTCB->tcb_force) {
  1152. SendTCB->tcb_force = 0;
  1153. if (SendWin < SendTCB->tcb_mss) {
  1154. SendWin = SendTCB->tcb_mss;
  1155. }
  1156. }
  1157. //
  1158. // Since the window could have shrank, need to get it to zero at
  1159. // least.
  1160. //
  1161. ForceWin = (int)((SendTCB->tcb_flags & FORCE_OUTPUT) >>
  1162. FORCE_OUT_SHIFT);
  1163. SendWin = MAX(SendWin, ForceWin);
  1164. AmountToSend = MIN(MIN((uint)SendWin, AmtUnsent), SendTCB->tcb_mss);
  1165. ASSERT(SendTCB->tcb_mss > 0);
  1166. //
  1167. // See if we have enough to send. We'll send if we have at least a
  1168. // segment, or if we really have some data to send and we can send
  1169. // all that we have, or the send window is > 0 and we need to force
  1170. // output or send a FIN (note that if we need to force output
  1171. // SendWin will be at least 1 from the check above), or if we can
  1172. // send an amount == to at least half the maximum send window
  1173. // we've seen.
  1174. //
  1175. if (AmountToSend == SendTCB->tcb_mss ||
  1176. (AmountToSend != 0 && AmountToSend == AmtUnsent) ||
  1177. (SendWin != 0 &&
  1178. ((SendTCB->tcb_flags & (FORCE_OUTPUT | FIN_NEEDED)) ||
  1179. AmountToSend >= (SendTCB->tcb_maxwin / 2)))) {
  1180. //
  1181. // It's OK to send something. Allocate a packet header.
  1182. //
  1183. // REVIEW: It was easier to code all these allocations directly
  1184. // REVIEW: rather than use IPv6AllocatePacket.
  1185. //
  1186. // REVIEW: This grabs packets and buffers from the IPv6PacketPool
  1187. // REVIEW: and the IPv6BufferPool respectively. Should we instead
  1188. // REVIEW: have separate pools for TCP?
  1189. //
  1190. NdisAllocatePacket(&NdisStatus, &Packet, IPv6PacketPool);
  1191. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  1192. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  1193. "TCPSend: couldn't allocate packet header!?!\n"));
  1194. goto error_oor;
  1195. }
  1196. // We'll fill in the CompletionData below.
  1197. InitializeNdisPacket(Packet);
  1198. PC(Packet)->CompletionHandler = TCPSendComplete;
  1199. //
  1200. // Our header buffer has extra space at the beginning for other
  1201. // headers to be prepended to ours without requiring further
  1202. // allocation calls. It also has extra space at the end to hold
  1203. // the send completion data.
  1204. //
  1205. LinkOffset = SendTCB->tcb_rce->NCE->IF->LinkHeaderSize;
  1206. HeaderLength =
  1207. (LinkOffset + sizeof(*IP) + sizeof(*TCP) +
  1208. sizeof(SendCmpltContext) +
  1209. __builtin_alignof(SendCmpltContext) - 1) &~
  1210. (UINT_PTR)(__builtin_alignof(SendCmpltContext) - 1);
  1211. Memory = ExAllocatePool(NonPagedPool, HeaderLength);
  1212. if (Memory == NULL) {
  1213. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  1214. "TCPSend: couldn't allocate header memory!?!\n"));
  1215. NdisFreePacket(Packet);
  1216. goto error_oor;
  1217. }
  1218. //
  1219. // When allocating the NDIS buffer describing this memory region,
  1220. // we don't tell it about the extra space on the end that we
  1221. // allocated for the send completion data.
  1222. //
  1223. NdisAllocateBuffer(&NdisStatus, &FirstBuffer, IPv6BufferPool,
  1224. Memory, LinkOffset + sizeof(*IP) + sizeof(*TCP));
  1225. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  1226. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  1227. "TCPSend: couldn't allocate buffer!?!\n"));
  1228. ExFreePool(Memory);
  1229. NdisFreePacket(Packet);
  1230. goto error_oor;
  1231. }
  1232. //
  1233. // Skip over the extra space that will be filled in later by the
  1234. // link level. At this level we add the IPv6Header, the
  1235. // TCPHeader, and the data.
  1236. //
  1237. IP = (IPv6Header UNALIGNED *)((uchar *)Memory + LinkOffset);
  1238. IP->VersClassFlow = IP_VERSION;
  1239. IP->NextHeader = IP_PROTOCOL_TCP;
  1240. IP->HopLimit = TCPHopLimit(SendTCB);
  1241. IP->Source = SendTCB->tcb_saddr;
  1242. IP->Dest = SendTCB->tcb_daddr;
  1243. //
  1244. // Begin preparing the TCP header.
  1245. //
  1246. TCP = (TCPHeader UNALIGNED *)(IP + 1);
  1247. FillTCPHeader(SendTCB, TCP);
  1248. //
  1249. // Store the send completion data in the same buffer as the TCP
  1250. // header, right after the TCP header. This saves allocation
  1251. // overhead and works because we don't consider this area to be
  1252. // part of the packet data (we set this buffer's length to
  1253. // indicate that the data ends with the TCP header above).
  1254. //
  1255. // Note that this code relies on the fact that we don't include
  1256. // any TCP options (and thus don't have a variable length TCP
  1257. // header) in our data packets.
  1258. //
  1259. SCC = (SendCmpltContext *)((uchar *)Memory + HeaderLength -
  1260. sizeof(*SCC));
  1261. PC(Packet)->CompletionData = SCC;
  1262. #if DBG
  1263. SCC->scc_sig = scc_signature;
  1264. #endif
  1265. SCC->scc_ubufcount = 0;
  1266. SCC->scc_tbufcount = 0;
  1267. SCC->scc_count = 0;
  1268. AmountLeft = AmountToSend;
  1269. if (AmountToSend != 0) {
  1270. long Result;
  1271. //
  1272. // Loop through the sends on the TCB, building a frame.
  1273. //
  1274. CurrentBuffer = FirstBuffer;
  1275. CurSend = SendTCB->tcb_cursend;
  1276. CHECK_STRUCT(CurSend, tsr);
  1277. SCC->scc_firstsend = CurSend;
  1278. do {
  1279. ASSERT(CurSend->tsr_refcnt > 0);
  1280. Result = InterlockedIncrement(&(CurSend->tsr_refcnt));
  1281. ASSERT(Result > 0);
  1282. SCC->scc_count++;
  1283. //
  1284. // If the current send offset is 0 and the current
  1285. // send is less than or equal to what we have left
  1286. // to send, we haven't already put a transport
  1287. // buffer on this send, and nobody else is using
  1288. // the buffer chain directly, just use the input
  1289. // buffers. We check for other people using them
  1290. // by looking at tsr_lastbuf. If it's NULL,
  1291. // nobody else is using the buffers. If it's not
  1292. // NULL, somebody is.
  1293. //
  1294. if (SendTCB->tcb_sendofs == 0 &&
  1295. (SendTCB->tcb_sendsize <= AmountLeft) &&
  1296. (SCC->scc_tbufcount == 0) &&
  1297. CurSend->tsr_lastbuf == NULL) {
  1298. NDIS_BUFFER_LINKAGE(CurrentBuffer) =
  1299. SendTCB->tcb_sendbuf;
  1300. do {
  1301. SCC->scc_ubufcount++;
  1302. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  1303. } while (NDIS_BUFFER_LINKAGE(CurrentBuffer) != NULL);
  1304. CurSend->tsr_lastbuf = CurrentBuffer;
  1305. AmountLeft -= SendTCB->tcb_sendsize;
  1306. SendTCB->tcb_sendsize = 0;
  1307. } else {
  1308. uint AmountToDup;
  1309. PNDIS_BUFFER NewBuf, Buf;
  1310. uint Offset;
  1311. NDIS_STATUS NStatus;
  1312. uchar *VirtualAddress;
  1313. uint Length;
  1314. //
  1315. // Either the current send has more data than
  1316. // we want to send, or the starting offset is
  1317. // not 0. In either case we'll need to loop
  1318. // through the current send, allocating
  1319. // buffers.
  1320. //
  1321. Buf = SendTCB->tcb_sendbuf;
  1322. Offset = SendTCB->tcb_sendofs;
  1323. do {
  1324. ASSERT(Buf != NULL);
  1325. NdisQueryBufferSafe(Buf, &VirtualAddress, &Length,
  1326. LowPagePriority);
  1327. if (VirtualAddress == NULL) {
  1328. //
  1329. // Couldn't map into kernel address space.
  1330. // If the packet is already partly built,
  1331. // send what we've got, otherwise error out.
  1332. //
  1333. goto error_oor2;
  1334. }
  1335. ASSERT((Offset < Length) ||
  1336. (Offset == 0 && Length == 0));
  1337. //
  1338. // Adjust the length for the offset into
  1339. // this buffer.
  1340. //
  1341. Length -= Offset;
  1342. AmountToDup = MIN(AmountLeft, Length);
  1343. NdisAllocateBuffer(&NStatus, &NewBuf,
  1344. IPv6BufferPool,
  1345. VirtualAddress + Offset,
  1346. AmountToDup);
  1347. if (NStatus == NDIS_STATUS_SUCCESS) {
  1348. SCC->scc_tbufcount++;
  1349. NDIS_BUFFER_LINKAGE(CurrentBuffer) = NewBuf;
  1350. CurrentBuffer = NewBuf;
  1351. if (AmountToDup >= Length) {
  1352. // Exhausted this buffer.
  1353. Buf = NDIS_BUFFER_LINKAGE(Buf);
  1354. Offset = 0;
  1355. } else {
  1356. Offset += AmountToDup;
  1357. ASSERT(Offset < NdisBufferLength(Buf));
  1358. }
  1359. SendTCB->tcb_sendsize -= AmountToDup;
  1360. AmountLeft -= AmountToDup;
  1361. } else {
  1362. //
  1363. // Couldn't allocate a buffer. If
  1364. // the packet is already partly built,
  1365. // send what we've got, otherwise
  1366. // error out.
  1367. //
  1368. error_oor2:
  1369. if (SCC->scc_tbufcount == 0 &&
  1370. SCC->scc_ubufcount == 0) {
  1371. NdisChainBufferAtFront(Packet, FirstBuffer);
  1372. TCPSendComplete(Packet, IP_GENERAL_FAILURE);
  1373. goto error_oor;
  1374. }
  1375. AmountToSend -= AmountLeft;
  1376. AmountLeft = 0;
  1377. break;
  1378. }
  1379. } while (AmountLeft && SendTCB->tcb_sendsize);
  1380. SendTCB->tcb_sendbuf = Buf;
  1381. SendTCB->tcb_sendofs = Offset;
  1382. }
  1383. if (CurSend->tsr_flags & TSR_FLAG_URG) {
  1384. ushort UP;
  1385. //
  1386. // This send is urgent data. We need to figure
  1387. // out what the urgent data pointer should be.
  1388. // We know sendnext is the starting sequence
  1389. // number of the frame, and that at the top of
  1390. // this do loop sendnext identified a byte in
  1391. // the CurSend at that time. We advanced CurSend
  1392. // at the same rate we've decremented
  1393. // AmountLeft (AmountToSend - AmountLeft ==
  1394. // AmountBuilt), so sendnext +
  1395. // (AmountToSend - AmountLeft) identifies a byte
  1396. // in the current value of CurSend, and that
  1397. // quantity plus tcb_sendsize is the sequence
  1398. // number one beyond the current send.
  1399. //
  1400. UP = (ushort)(AmountToSend - AmountLeft) +
  1401. (ushort)SendTCB->tcb_sendsize -
  1402. ((SendTCB->tcb_flags & BSD_URGENT) ? 0 : 1);
  1403. TCP->tcp_urgent = net_short(UP);
  1404. TCP->tcp_flags |= TCP_FLAG_URG;
  1405. }
  1406. //
  1407. // See if we've exhausted this send. If we have,
  1408. // set the PUSH bit in this frame and move on to
  1409. // the next send. We also need to check the
  1410. // urgent data bit.
  1411. //
  1412. if (SendTCB->tcb_sendsize == 0) {
  1413. Queue *Next;
  1414. uchar PrevFlags;
  1415. //
  1416. // We've exhausted this send. Set the PUSH bit.
  1417. //
  1418. TCP->tcp_flags |= TCP_FLAG_PUSH;
  1419. PrevFlags = CurSend->tsr_flags;
  1420. Next = QNEXT(&CurSend->tsr_req.tr_q);
  1421. if (Next != QEND(&SendTCB->tcb_sendq)) {
  1422. CurSend = CONTAINING_RECORD(
  1423. QSTRUCT(TCPReq, Next, tr_q),
  1424. TCPSendReq, tsr_req);
  1425. CHECK_STRUCT(CurSend, tsr);
  1426. SendTCB->tcb_sendsize = CurSend->tsr_unasize;
  1427. SendTCB->tcb_sendofs = CurSend->tsr_offset;
  1428. SendTCB->tcb_sendbuf = CurSend->tsr_buffer;
  1429. SendTCB->tcb_cursend = CurSend;
  1430. //
  1431. // Check the urgent flags. We can't combine new
  1432. // urgent data on to the end of old non-urgent
  1433. // data.
  1434. //
  1435. if ((PrevFlags & TSR_FLAG_URG) &&
  1436. !(CurSend->tsr_flags & TSR_FLAG_URG))
  1437. break;
  1438. } else {
  1439. ASSERT(AmountLeft == 0);
  1440. SendTCB->tcb_cursend = NULL;
  1441. SendTCB->tcb_sendbuf = NULL;
  1442. }
  1443. }
  1444. } while (AmountLeft != 0);
  1445. } else {
  1446. //
  1447. // We're in the loop, but AmountToSend is 0. This
  1448. // should happen only when we're sending a FIN. Check
  1449. // this, and return if it's not true.
  1450. //
  1451. ASSERT(AmtUnsent == 0);
  1452. if (!(SendTCB->tcb_flags & FIN_NEEDED)) {
  1453. // KdBreakPoint();
  1454. ExFreePool(NdisBufferVirtualAddress(FirstBuffer));
  1455. NdisFreeBuffer(FirstBuffer);
  1456. NdisFreePacket(Packet);
  1457. break;
  1458. }
  1459. SCC->scc_firstsend = NULL; // REVIEW: looks unneccessary.
  1460. NDIS_BUFFER_LINKAGE(FirstBuffer) = NULL;
  1461. }
  1462. // Adjust for what we're really going to send.
  1463. AmountToSend -= AmountLeft;
  1464. //
  1465. // Update the sequence numbers, and start a RTT measurement
  1466. // if needed.
  1467. //
  1468. OldSeq = SendTCB->tcb_sendnext;
  1469. SendTCB->tcb_sendnext += AmountToSend;
  1470. if (!SEQ_EQ(OldSeq, SendTCB->tcb_sendmax)) {
  1471. //
  1472. // We have at least some retransmission. Bump the stat.
  1473. //
  1474. TStats.ts_retranssegs++;
  1475. }
  1476. if (SEQ_GT(SendTCB->tcb_sendnext, SendTCB->tcb_sendmax)) {
  1477. //
  1478. // We're sending at least some new data.
  1479. // We can't advance sendmax once FIN_SENT is set.
  1480. //
  1481. ASSERT(!(SendTCB->tcb_flags & FIN_SENT));
  1482. SendTCB->tcb_sendmax = SendTCB->tcb_sendnext;
  1483. TStats.ts_outsegs++;
  1484. //
  1485. // Check the Round-Trip Timer.
  1486. //
  1487. if (SendTCB->tcb_rtt == 0) {
  1488. // No RTT running, so start one.
  1489. SendTCB->tcb_rtt = TCPTime;
  1490. SendTCB->tcb_rttseq = OldSeq;
  1491. }
  1492. }
  1493. //
  1494. // We've built the frame entirely. If we've sent everything
  1495. // we have and there's a FIN pending, OR it in.
  1496. //
  1497. if (AmtUnsent == AmountToSend) {
  1498. if (SendTCB->tcb_flags & FIN_NEEDED) {
  1499. ASSERT(!(SendTCB->tcb_flags & FIN_SENT) ||
  1500. (SendTCB->tcb_sendnext ==
  1501. (SendTCB->tcb_sendmax - 1)));
  1502. //
  1503. // See if we still have room in the window for a FIN.
  1504. //
  1505. if (SendWin > (int) AmountToSend) {
  1506. TCP->tcp_flags |= TCP_FLAG_FIN;
  1507. SendTCB->tcb_sendnext++;
  1508. SendTCB->tcb_sendmax = SendTCB->tcb_sendnext;
  1509. SendTCB->tcb_flags |= (FIN_SENT | FIN_OUTSTANDING);
  1510. SendTCB->tcb_flags &= ~FIN_NEEDED;
  1511. }
  1512. }
  1513. }
  1514. AmountToSend += sizeof(TCPHeader);
  1515. if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer))
  1516. START_TCB_TIMER(SendTCB->tcb_rexmittimer, SendTCB->tcb_rexmit);
  1517. SendTCB->tcb_flags &= ~(NEED_ACK | ACK_DELAYED | FORCE_OUTPUT);
  1518. STOP_TCB_TIMER(SendTCB->tcb_delacktimer);
  1519. STOP_TCB_TIMER(SendTCB->tcb_swstimer);
  1520. SendTCB->tcb_alive = TCPTime;
  1521. // Add the buffers to the packet.
  1522. NdisChainBufferAtFront(Packet, FirstBuffer);
  1523. //
  1524. // Compute the TCP checksum. It covers the entire TCP segment
  1525. // starting with the TCP header, plus the IPv6 pseudo-header.
  1526. //
  1527. TCP->tcp_xsum = 0;
  1528. TCP->tcp_xsum = ChecksumPacket(
  1529. Packet, LinkOffset + sizeof *IP, NULL, AmountToSend,
  1530. AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
  1531. //
  1532. // Everything's ready. Now send the packet.
  1533. //
  1534. // Note that IPv6Send does not return a status code.
  1535. // Instead it *always* completes the packet
  1536. // with an appropriate status code.
  1537. //
  1538. KeReleaseSpinLock(&SendTCB->tcb_lock, PreLockIrql);
  1539. if (TCP->tcp_xsum == 0) {
  1540. //
  1541. // ChecksumPacket failed, so abort the transmission.
  1542. //
  1543. IPv6SendComplete(NULL, Packet, IP_NO_RESOURCES);
  1544. }
  1545. else {
  1546. IPv6Send(Packet, LinkOffset, IP,
  1547. AmountToSend, SendTCB->tcb_rce, 0,
  1548. IP_PROTOCOL_TCP,
  1549. net_short(TCP->tcp_src),
  1550. net_short(TCP->tcp_dest));
  1551. }
  1552. #if 0
  1553. SendTCB->tcb_error = SendStatus;
  1554. if (SendStatus != IP_PENDING) {
  1555. TCPSendComplete(FirstBuffer);
  1556. if (SendStatus != IP_SUCCESS) {
  1557. KeAcquireSpinLock(&SendTCB->tcb_lock, &PreLockIrql);
  1558. //
  1559. // This packet didn't get sent. If nothing's
  1560. // changed in the TCB, put sendnext back to
  1561. // what we just tried to send. Depending on
  1562. // the error, we may try again.
  1563. //
  1564. if (SEQ_GTE(OldSeq, SendTCB->tcb_senduna) &&
  1565. SEQ_LT(OldSeq, SendTCB->tcb_sendnext))
  1566. ResetSendNext(SendTCB, OldSeq);
  1567. // We know this packet didn't get sent. Start
  1568. // the retransmit timer now, if it's not already
  1569. // runnimg, in case someone came in while we
  1570. // were in IP and stopped it.
  1571. if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer))
  1572. START_TCB_TIMER(SendTCB->tcb_rexmittimer,
  1573. SendTCB->tcb_rexmit);
  1574. //
  1575. // If it failed because of an MTU problem, get
  1576. // the new MTU and try again.
  1577. //
  1578. if (SendStatus == IP_PACKET_TOO_BIG) {
  1579. uint NewMTU;
  1580. //
  1581. // The MTU has changed. Update it, and try again.
  1582. //
  1583. // REVIEW: IPv4 had code here to call down to IP
  1584. // REVIEW: to find out what the new MTU was for
  1585. // REVIEW: this connection. Result in "NewMTU",
  1586. // REVIEW: status of call in "SendStatus".
  1587. if (SendStatus != IP_SUCCESS)
  1588. break;
  1589. //
  1590. // We have a new MTU. Make sure it's big
  1591. // enough to use. If not, correct this and
  1592. // turn off MTU discovery on this TCB.
  1593. // Otherwise use the new MTU.
  1594. //
  1595. if (NewMTU <= (sizeof(TCPHeader) +
  1596. SendTCB->tcb_opt.ioi_optlength)) {
  1597. //
  1598. // The new MTU is too small to use. Turn
  1599. // off PMTU discovery on this TCB, and
  1600. // drop to our off net MTU size.
  1601. //
  1602. SendTCB->tcb_opt.ioi_flags &= ~IP_FLAG_DF;
  1603. SendTCB->tcb_mss = MIN((ushort)DEFAULT_MSS,
  1604. SendTCB->tcb_remmss);
  1605. } else {
  1606. //
  1607. // The new MTU is adequate. Adjust it for
  1608. // the header size and options length, and
  1609. // use it.
  1610. //
  1611. NewMTU -= sizeof(TCPHeader) -
  1612. SendTCB->tcb_opt.ioi_optlength;
  1613. SendTCB->tcb_mss = MIN((ushort)NewMTU,
  1614. SendTCB->tcb_remmss);
  1615. }
  1616. ASSERT(SendTCB->tcb_mss > 0);
  1617. continue;
  1618. }
  1619. break;
  1620. }
  1621. }
  1622. #endif
  1623. KeAcquireSpinLock(&SendTCB->tcb_lock, &PreLockIrql);
  1624. continue;
  1625. } else {
  1626. //
  1627. // We've decided we can't send anything now. Figure out why, and
  1628. // see if we need to set a timer.
  1629. //
  1630. if (SendTCB->tcb_sendwin == 0) {
  1631. if (!(SendTCB->tcb_flags & FLOW_CNTLD)) {
  1632. SendTCB->tcb_flags |= FLOW_CNTLD;
  1633. SendTCB->tcb_rexmitcnt = 0;
  1634. START_TCB_TIMER(SendTCB->tcb_rexmittimer,
  1635. SendTCB->tcb_rexmit);
  1636. SendTCB->tcb_slowcount++;
  1637. SendTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  1638. } else
  1639. if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer))
  1640. START_TCB_TIMER(SendTCB->tcb_rexmittimer,
  1641. SendTCB->tcb_rexmit);
  1642. } else
  1643. if (AmountToSend != 0)
  1644. // We have something to send, but we're not sending
  1645. // it, presumably due to SWS avoidance.
  1646. if (!TCB_TIMER_RUNNING(SendTCB->tcb_swstimer))
  1647. START_TCB_TIMER(SendTCB->tcb_swstimer, SWS_TO);
  1648. break;
  1649. }
  1650. } // while (!FIN_OUTSTANDING)
  1651. //
  1652. // We're done sending, so we don't need the output flags set.
  1653. //
  1654. SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT | FORCE_OUTPUT |
  1655. SEND_AFTER_RCV);
  1656. bail:
  1657. DerefTCB(SendTCB, PreLockIrql);
  1658. return;
  1659. //
  1660. // Common case error handling code for out of resource conditions. Start the
  1661. // retransmit timer if it's not already running (so that we try this again
  1662. // later), clean up and return.
  1663. //
  1664. error_oor:
  1665. if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer))
  1666. START_TCB_TIMER(SendTCB->tcb_rexmittimer, SendTCB->tcb_rexmit);
  1667. // We had an out of resource problem, so clear the OUTPUT flags.
  1668. SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT | FORCE_OUTPUT);
  1669. DerefTCB(SendTCB, PreLockIrql);
  1670. return;
  1671. } // end of TCPSend()
  1672. //* ResetSendNextAndFastSend - Set the sendnext value of a TCB.
  1673. //
  1674. // Called to fast retransmit the dropped segment.
  1675. //
  1676. // We assume the caller has put a reference on the TCB, and the TCB is locked
  1677. // on entry. The reference is dropped and the lock released before returning.
  1678. //
  1679. void // Returns: Nothing.
  1680. ResetAndFastSend(
  1681. TCB *SeqTCB, // TCB for this connection.
  1682. SeqNum NewSeq, // Sequence number to set.
  1683. uint NewCWin) // New value for congestion window.
  1684. {
  1685. TCPSendReq *SendReq;
  1686. Queue *CurQ;
  1687. PNDIS_BUFFER Buffer;
  1688. uint Offset;
  1689. uint SendSize;
  1690. CHECK_STRUCT(SeqTCB, tcb);
  1691. ASSERT(SEQ_GTE(NewSeq, SeqTCB->tcb_senduna));
  1692. //
  1693. // The new seq must be less than send max, or NewSeq, senduna, sendnext,
  1694. // and sendmax must all be equal. (The latter case happens when we're
  1695. // called exiting TIME_WAIT, or possibly when we're retransmitting
  1696. // during a flow controlled situation).
  1697. //
  1698. ASSERT(SEQ_LT(NewSeq, SeqTCB->tcb_sendmax) ||
  1699. (SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendnext) &&
  1700. SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendmax) &&
  1701. SEQ_EQ(SeqTCB->tcb_senduna, NewSeq)));
  1702. if (SYNC_STATE(SeqTCB->tcb_state) &&
  1703. (SeqTCB->tcb_state != TCB_TIME_WAIT)) {
  1704. if (!EMPTYQ(&SeqTCB->tcb_sendq)) {
  1705. CurQ = QHEAD(&SeqTCB->tcb_sendq);
  1706. SendReq = (TCPSendReq *) CONTAINING_RECORD(CurQ, TCPReq, tr_q);
  1707. //
  1708. // SendReq points to the first send request on the send queue.
  1709. // We're pointing at the proper send req now. We need to go down.
  1710. //
  1711. // SendReq points to the cursend.
  1712. // SendSize point to sendsize in the cursend.
  1713. //
  1714. SendSize = SendReq->tsr_unasize;
  1715. Buffer = SendReq->tsr_buffer;
  1716. Offset = SendReq->tsr_offset;
  1717. // Call the fast retransmit send now.
  1718. TCPFastSend(SeqTCB, Buffer, Offset, SendReq, SendSize, NewSeq,
  1719. SeqTCB->tcb_mss);
  1720. } else {
  1721. ASSERT(SeqTCB->tcb_cursend == NULL);
  1722. }
  1723. }
  1724. SeqTCB->tcb_cwin = NewCWin;
  1725. DerefTCB(SeqTCB, DISPATCH_LEVEL);
  1726. return;
  1727. }
  1728. //* TCPFastSend - To send a segment without changing TCB state.
  1729. //
  1730. // Called to handle fast retransmit of the lost segment.
  1731. // tcb_lock will be held while entering (called by TCPRcv).
  1732. //
  1733. void // Returns: Nothing.
  1734. TCPFastSend(
  1735. TCB *SendTCB, // TCB for this connection.
  1736. PNDIS_BUFFER in_SendBuf, // NDIS buffer.
  1737. uint SendOfs, // Send offset.
  1738. TCPSendReq *CurSend, // Current send request.
  1739. uint SendSize, // Size of this send.
  1740. SeqNum SendNext, // Sequence number to use for this send.
  1741. int in_ToBeSent) // Cap on SendSize (REVIEW: Callee should cap).
  1742. {
  1743. int SendWin; // Useable send window.
  1744. uint AmountToSend; // Amount to send this time.
  1745. uint AmountLeft;
  1746. IPv6Header UNALIGNED *IP;
  1747. TCPHeader UNALIGNED *TCP;
  1748. PNDIS_PACKET Packet;
  1749. PNDIS_BUFFER FirstBuffer, CurrentBuffer;
  1750. void *Memory;
  1751. SendCmpltContext *SCC;
  1752. IP_STATUS SendStatus;
  1753. NDIS_STATUS NdisStatus;
  1754. uint AmtOutstanding, AmtUnsent;
  1755. int ForceWin; // Window we're forced to use.
  1756. uint HeaderLength;
  1757. uint LinkOffset;
  1758. uint PMTU;
  1759. KIRQL PreLockIrql;
  1760. PNDIS_BUFFER SendBuf = in_SendBuf;
  1761. PreLockIrql = DISPATCH_LEVEL;
  1762. CHECK_STRUCT(SendTCB, tcb);
  1763. ASSERT(SendTCB->tcb_refcnt != 0);
  1764. ASSERT(*(int *)&SendTCB->tcb_sendwin >= 0);
  1765. ASSERT(*(int *)&SendTCB->tcb_cwin >= SendTCB->tcb_mss);
  1766. ASSERT(!(SendTCB->tcb_flags & FIN_OUTSTANDING) ||
  1767. (SendTCB->tcb_sendnext == SendTCB->tcb_sendmax));
  1768. //
  1769. // In most cases, we will already have a route at this point.
  1770. // However, if we failed to get one earlier in the passive receive
  1771. // path, we may need to retry here.
  1772. //
  1773. if (SendTCB->tcb_rce == NULL) {
  1774. InitRCE(SendTCB);
  1775. if (SendTCB->tcb_rce == NULL) {
  1776. return;
  1777. }
  1778. }
  1779. //
  1780. // Verify that our cached RCE is still valid.
  1781. //
  1782. SendTCB->tcb_rce = ValidateRCE(SendTCB->tcb_rce);
  1783. if (IsDisconnectedAndNotLoopbackRCE(SendTCB->tcb_rce)) {
  1784. //
  1785. // Fail existing send requests for TCBs with a disconnected
  1786. // outgoing interface, except when a loopback route is used.
  1787. //
  1788. ASSERT(SendTCB->tcb_refcnt != 0);
  1789. SendTCB->tcb_refcnt--;
  1790. TryToCloseTCB(SendTCB, TCB_CLOSE_ABORTED, PreLockIrql);
  1791. return;
  1792. }
  1793. //
  1794. // Verify that our cached Path MTU is still valid.
  1795. // Watch for changes to IPsec policies since they can also effect our MSS.
  1796. // REVIEW: This the best spot to do this?
  1797. //
  1798. PMTU = GetEffectivePathMTUFromRCE(SendTCB->tcb_rce);
  1799. if (PMTU != SendTCB->tcb_pmtu ||
  1800. SecurityStateValidationCounter != SendTCB->tcb_security) {
  1801. //
  1802. // Either our Path MTU or the global security state has changed.
  1803. // Cache current values and then calculate a new MSS.
  1804. //
  1805. SendTCB->tcb_pmtu = PMTU;
  1806. SendTCB->tcb_security = SecurityStateValidationCounter;
  1807. CalculateMSSForTCB(SendTCB);
  1808. }
  1809. AmtOutstanding = (uint)(SendTCB->tcb_sendnext - SendTCB->tcb_senduna);
  1810. AmtUnsent = MIN(MIN(in_ToBeSent, (int)SendSize), (int)SendTCB->tcb_sendwin);
  1811. while (AmtUnsent > 0) {
  1812. if (SEQ_GT(SendTCB->tcb_senduna, SendNext)) {
  1813. //
  1814. // Since tcb_lock is released in this loop
  1815. // it is possible that delayed ack acked
  1816. // what we are trying to retransmit.
  1817. //
  1818. goto error_oor;
  1819. }
  1820. // AmtUnsent below was minimum of sendwin and amtunsent
  1821. AmountToSend = MIN(AmtUnsent, SendTCB->tcb_mss);
  1822. ASSERT((int)AmtUnsent >= 0);
  1823. //
  1824. // We're going to send something. Allocate a packet header.
  1825. //
  1826. // REVIEW: It was easier to code all these allocations directly
  1827. // REVIEW: rather than use IPv6AllocatePacket.
  1828. //
  1829. // REVIEW: This grabs packets and buffers from the IPv6PacketPool
  1830. // REVIEW: and the IPv6BufferPool respectively. Should we instead
  1831. // REVIEW: have separate pools for TCP?
  1832. //
  1833. NdisAllocatePacket(&NdisStatus, &Packet, IPv6PacketPool);
  1834. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  1835. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  1836. "TCPSend: couldn't allocate packet header!?!\n"));
  1837. goto error_oor;
  1838. }
  1839. // We'll fill in the CompletionData below.
  1840. InitializeNdisPacket(Packet);
  1841. PC(Packet)->CompletionHandler = TCPSendComplete;
  1842. //
  1843. // Our header buffer has extra space at the beginning for other
  1844. // headers to be prepended to ours without requiring further
  1845. // allocation calls. It also has extra space at the end to hold
  1846. // the send completion data.
  1847. //
  1848. LinkOffset = SendTCB->tcb_rce->NCE->IF->LinkHeaderSize;
  1849. HeaderLength = (LinkOffset + sizeof(*IP) + sizeof(*TCP) +
  1850. sizeof(SendCmpltContext) +
  1851. __builtin_alignof(SendCmpltContext) - 1) &~
  1852. (UINT_PTR)(__builtin_alignof(SendCmpltContext) - 1);
  1853. Memory = ExAllocatePool(NonPagedPool, HeaderLength);
  1854. if (Memory == NULL) {
  1855. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  1856. "TCPSend: couldn't allocate header memory!?!\n"));
  1857. NdisFreePacket(Packet);
  1858. goto error_oor;
  1859. }
  1860. //
  1861. // When allocating the NDIS buffer describing this memory region,
  1862. // we don't tell it about the extra space on the end that we
  1863. // allocated for the send completion data.
  1864. //
  1865. NdisAllocateBuffer(&NdisStatus, &FirstBuffer, IPv6BufferPool,
  1866. Memory, LinkOffset + sizeof(*IP) + sizeof(*TCP));
  1867. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  1868. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  1869. "TCPSend: couldn't allocate buffer!?!\n"));
  1870. ExFreePool(Memory);
  1871. NdisFreePacket(Packet);
  1872. goto error_oor;
  1873. }
  1874. //
  1875. // Skip over the extra space that will be filled in later by the
  1876. // link level. At this level we add the IPv6Header, the
  1877. // TCPHeader, and the data.
  1878. //
  1879. IP = (IPv6Header UNALIGNED *)((uchar *)Memory + LinkOffset);
  1880. IP->VersClassFlow = IP_VERSION;
  1881. IP->NextHeader = IP_PROTOCOL_TCP;
  1882. IP->HopLimit = TCPHopLimit(SendTCB);
  1883. IP->Source = SendTCB->tcb_saddr;
  1884. IP->Dest = SendTCB->tcb_daddr;
  1885. //
  1886. // Begin preparing the TCP header.
  1887. //
  1888. TCP = (TCPHeader UNALIGNED *)(IP + 1);
  1889. FillTCPHeader(SendTCB, TCP);
  1890. TCP->tcp_seq = net_long(SendNext);
  1891. //
  1892. // Store the send completion data in the same buffer as the TCP
  1893. // header, right after the TCP header. This saves allocation
  1894. // overhead and works because we don't consider this area to be
  1895. // part of the packet data (we set this buffer's length to
  1896. // indicate that the data ends with the TCP header above).
  1897. //
  1898. // Note that this code relies on the fact that we don't include
  1899. // any TCP options (and thus don't have a variable length TCP
  1900. // header) in our data packets.
  1901. //
  1902. SCC = (SendCmpltContext *)((uchar *)Memory + HeaderLength -
  1903. sizeof(*SCC));
  1904. PC(Packet)->CompletionData = SCC;
  1905. #if DBG
  1906. SCC->scc_sig = scc_signature;
  1907. #endif
  1908. SCC->scc_ubufcount = 0;
  1909. SCC->scc_tbufcount = 0;
  1910. SCC->scc_count = 0;
  1911. AmountLeft = AmountToSend;
  1912. if (AmountToSend != 0) {
  1913. long Result;
  1914. //
  1915. // Loop through the sends on the TCB, building a frame.
  1916. //
  1917. CurrentBuffer = FirstBuffer;
  1918. CHECK_STRUCT(CurSend, tsr);
  1919. SCC->scc_firstsend = CurSend;
  1920. do {
  1921. ASSERT(CurSend->tsr_refcnt > 0);
  1922. Result = InterlockedIncrement(&(CurSend->tsr_refcnt));
  1923. ASSERT(Result > 0);
  1924. SCC->scc_count++;
  1925. //
  1926. // If the current send offset is 0 and the current
  1927. // send is less than or equal to what we have left
  1928. // to send, we haven't already put a transport
  1929. // buffer on this send, and nobody else is using
  1930. // the buffer chain directly, just use the input
  1931. // buffers. We check for other people using them
  1932. // by looking at tsr_lastbuf. If it's NULL,
  1933. // nobody else is using the buffers. If it's not
  1934. // NULL, somebody is.
  1935. //
  1936. if (SendOfs == 0 &&
  1937. (SendSize <= AmountLeft) &&
  1938. (SCC->scc_tbufcount == 0) &&
  1939. CurSend->tsr_lastbuf == NULL) {
  1940. NDIS_BUFFER_LINKAGE(CurrentBuffer) = in_SendBuf;
  1941. do {
  1942. SCC->scc_ubufcount++;
  1943. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  1944. } while (NDIS_BUFFER_LINKAGE(CurrentBuffer) != NULL);
  1945. CurSend->tsr_lastbuf = CurrentBuffer;
  1946. AmountLeft -= SendSize;
  1947. } else {
  1948. uint AmountToDup;
  1949. PNDIS_BUFFER NewBuf, Buf;
  1950. uint Offset;
  1951. NDIS_STATUS NStatus;
  1952. uchar *VirtualAddress;
  1953. uint Length;
  1954. //
  1955. // Either the current send has more data than
  1956. // we want to send, or the starting offset is
  1957. // not 0. In either case we'll need to loop
  1958. // through the current send, allocating buffers.
  1959. //
  1960. Buf = SendBuf;
  1961. Offset = SendOfs;
  1962. do {
  1963. ASSERT(Buf != NULL);
  1964. NdisQueryBufferSafe(Buf, &VirtualAddress, &Length,
  1965. LowPagePriority);
  1966. if (VirtualAddress == NULL) {
  1967. goto error_oor2;
  1968. }
  1969. ASSERT((Offset < Length) ||
  1970. (Offset == 0 && Length == 0));
  1971. //
  1972. // Adjust the length for the offset into
  1973. // this buffer.
  1974. //
  1975. Length -= Offset;
  1976. AmountToDup = MIN(AmountLeft, Length);
  1977. NdisAllocateBuffer(&NStatus, &NewBuf,
  1978. IPv6BufferPool,
  1979. VirtualAddress + Offset,
  1980. AmountToDup);
  1981. if (NStatus == NDIS_STATUS_SUCCESS) {
  1982. SCC->scc_tbufcount++;
  1983. NDIS_BUFFER_LINKAGE(CurrentBuffer) = NewBuf;
  1984. CurrentBuffer = NewBuf;
  1985. if (AmountToDup >= Length) {
  1986. // Exhausted this buffer.
  1987. Buf = NDIS_BUFFER_LINKAGE(Buf);
  1988. Offset = 0;
  1989. } else {
  1990. Offset += AmountToDup;
  1991. ASSERT(Offset < NdisBufferLength(Buf));
  1992. }
  1993. SendSize -= AmountToDup;
  1994. AmountLeft -= AmountToDup;
  1995. } else {
  1996. //
  1997. // Couldn't allocate a buffer. If
  1998. // the packet is already partly built,
  1999. // send what we've got, otherwise
  2000. // error out.
  2001. //
  2002. error_oor2:
  2003. if (SCC->scc_tbufcount == 0 &&
  2004. SCC->scc_ubufcount == 0) {
  2005. NdisChainBufferAtFront(Packet, FirstBuffer);
  2006. TCPSendComplete(Packet, IP_GENERAL_FAILURE);
  2007. goto error_oor;
  2008. }
  2009. AmountToSend -= AmountLeft;
  2010. AmountLeft = 0;
  2011. break;
  2012. }
  2013. } while (AmountLeft && SendSize);
  2014. SendBuf = Buf;
  2015. SendOfs = Offset;
  2016. }
  2017. if (CurSend->tsr_flags & TSR_FLAG_URG) {
  2018. ushort UP;
  2019. //
  2020. // This send is urgent data. We need to figure
  2021. // out what the urgent data pointer should be.
  2022. // We know sendnext is the starting sequence
  2023. // number of the frame, and that at the top of
  2024. // this do loop sendnext identified a byte in
  2025. // the CurSend at that time. We advanced CurSend
  2026. // at the same rate we've decremented
  2027. // AmountLeft (AmountToSend - AmountLeft ==
  2028. // AmountBuilt), so sendnext +
  2029. // (AmountToSend - AmountLeft) identifies a byte
  2030. // in the current value of CurSend, and that
  2031. // quantity plus tcb_sendsize is the sequence
  2032. // number one beyond the current send.
  2033. //
  2034. UP = (ushort) (AmountToSend - AmountLeft) +
  2035. (ushort) SendTCB->tcb_sendsize -
  2036. ((SendTCB->tcb_flags & BSD_URGENT) ? 0 : 1);
  2037. TCP->tcp_urgent = net_short(UP);
  2038. TCP->tcp_flags |= TCP_FLAG_URG;
  2039. }
  2040. //
  2041. // See if we've exhausted this send. If we have,
  2042. // set the PUSH bit in this frame and move on to
  2043. // the next send. We also need to check the
  2044. // urgent data bit.
  2045. //
  2046. if (SendSize == 0) {
  2047. Queue *Next;
  2048. ulong PrevFlags;
  2049. //
  2050. // We've exhausted this send. Set the PUSH bit.
  2051. //
  2052. TCP->tcp_flags |= TCP_FLAG_PUSH;
  2053. PrevFlags = CurSend->tsr_flags;
  2054. Next = QNEXT(&CurSend->tsr_req.tr_q);
  2055. if (Next != QEND(&SendTCB->tcb_sendq)) {
  2056. CurSend = CONTAINING_RECORD(
  2057. QSTRUCT(TCPReq, Next, tr_q),
  2058. TCPSendReq, tsr_req);
  2059. CHECK_STRUCT(CurSend, tsr);
  2060. SendSize = CurSend->tsr_unasize;
  2061. SendOfs = CurSend->tsr_offset;
  2062. SendBuf = CurSend->tsr_buffer;
  2063. //
  2064. // Check the urgent flags. We can't combine new
  2065. // urgent data on to the end of old non-urgent
  2066. // data.
  2067. //
  2068. if ((PrevFlags & TSR_FLAG_URG) &&
  2069. !(CurSend->tsr_flags & TSR_FLAG_URG)) {
  2070. break;
  2071. }
  2072. } else {
  2073. ASSERT(AmountLeft == 0);
  2074. CurSend = NULL;
  2075. SendBuf = NULL;
  2076. }
  2077. }
  2078. } while (AmountLeft != 0);
  2079. } else {
  2080. //
  2081. // Amt to send is 0.
  2082. // Just bail out and start timer.
  2083. //
  2084. if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer)) {
  2085. START_TCB_TIMER(SendTCB->tcb_rexmittimer,
  2086. SendTCB->tcb_rexmit);
  2087. }
  2088. ExFreePool(NdisBufferVirtualAddress(FirstBuffer));
  2089. NdisFreeBuffer(FirstBuffer);
  2090. NdisFreePacket(Packet);
  2091. return;
  2092. }
  2093. //
  2094. // Adjust for what we're really going to send.
  2095. //
  2096. AmountToSend -= AmountLeft;
  2097. SendNext += AmountToSend;
  2098. AmtUnsent -= AmountToSend;
  2099. TStats.ts_retranssegs++;
  2100. AmountToSend += sizeof(TCPHeader);
  2101. if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer)) {
  2102. START_TCB_TIMER(SendTCB->tcb_rexmittimer, SendTCB->tcb_rexmit);
  2103. }
  2104. SendTCB->tcb_flags &= ~(NEED_ACK | ACK_DELAYED | FORCE_OUTPUT);
  2105. STOP_TCB_TIMER(SendTCB->tcb_delacktimer);
  2106. STOP_TCB_TIMER(SendTCB->tcb_swstimer);
  2107. //
  2108. // Add the buffers to the packet.
  2109. //
  2110. NdisChainBufferAtFront(Packet, FirstBuffer);
  2111. //
  2112. // Compute the TCP checksum. It covers the entire TCP segment
  2113. // starting with the TCP header, plus the IPv6 pseudo-header.
  2114. //
  2115. TCP->tcp_xsum = 0;
  2116. TCP->tcp_xsum = ChecksumPacket(
  2117. Packet, LinkOffset + sizeof *IP, NULL, AmountToSend,
  2118. AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
  2119. //
  2120. // Everything's ready. Now send the packet.
  2121. //
  2122. // Note that IPv6Send does not return a status code.
  2123. // Instead it *always* completes the packet
  2124. // with an appropriate status code.
  2125. //
  2126. KeReleaseSpinLock(&SendTCB->tcb_lock, PreLockIrql);
  2127. if (TCP->tcp_xsum == 0) {
  2128. //
  2129. // ChecksumPacket failed, so abort the transmission.
  2130. //
  2131. IPv6SendComplete(NULL, Packet, IP_NO_RESOURCES);
  2132. } else {
  2133. IPv6Send(Packet, LinkOffset, IP,
  2134. AmountToSend, SendTCB->tcb_rce, 0,
  2135. IP_PROTOCOL_TCP,
  2136. net_short(TCP->tcp_src),
  2137. net_short(TCP->tcp_dest));
  2138. }
  2139. //
  2140. // Reacquire lock we dropped before sending.
  2141. //
  2142. KeAcquireSpinLock(&SendTCB->tcb_lock, &PreLockIrql);
  2143. }
  2144. return;
  2145. //
  2146. // Common case error handling code for out of resource conditions.
  2147. // Start the retransmit timer if it's not already running
  2148. // (so that we try this again later), clean up and return.
  2149. //
  2150. error_oor:
  2151. if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer)) {
  2152. START_TCB_TIMER(SendTCB->tcb_rexmittimer, SendTCB->tcb_rexmit);
  2153. }
  2154. return;
  2155. }
  2156. //* TDISend - Send data on a connection.
  2157. //
  2158. // The main TDI send entry point. We take the input parameters, validate
  2159. // them, allocate a send request, etc. We then put the send request on the
  2160. // queue. If we have no other sends on the queue or Nagling is disabled we'll
  2161. // call TCPSend to send the data.
  2162. //
  2163. TDI_STATUS // Returns: Status of attempt to send.
  2164. TdiSend(
  2165. PTDI_REQUEST Request, // TDI request for the call.
  2166. ushort Flags, // Flags for this send.
  2167. uint SendLength, // Length in bytes of send.
  2168. PNDIS_BUFFER SendBuffer) // Buffer chain to be sent.
  2169. {
  2170. TCPConn *Conn;
  2171. TCB *SendTCB;
  2172. TCPSendReq *SendReq;
  2173. KIRQL OldIrql;
  2174. TDI_STATUS Error;
  2175. uint EmptyQ;
  2176. #if DBG
  2177. uint RealSendSize;
  2178. PNDIS_BUFFER Temp;
  2179. //
  2180. // Loop through the buffer chain, and make sure that the length matches
  2181. // up with SendLength.
  2182. //
  2183. Temp = SendBuffer;
  2184. RealSendSize = 0;
  2185. do {
  2186. ASSERT(Temp != NULL);
  2187. RealSendSize += NdisBufferLength(Temp);
  2188. Temp = NDIS_BUFFER_LINKAGE(Temp);
  2189. } while (Temp != NULL);
  2190. ASSERT(RealSendSize == SendLength);
  2191. #endif
  2192. //
  2193. // Grab lock on Connection Table. Then get our connection info from
  2194. // the TDI request, and our TCP control block from that.
  2195. //
  2196. Conn = GetConnFromConnID(PtrToUlong(Request->Handle.ConnectionContext),
  2197. &OldIrql);
  2198. if (Conn == NULL) {
  2199. Error = TDI_INVALID_CONNECTION;
  2200. goto abort;
  2201. }
  2202. CHECK_STRUCT(Conn, tc);
  2203. SendTCB = Conn->tc_tcb;
  2204. if (SendTCB == NULL) {
  2205. Error = TDI_INVALID_STATE;
  2206. KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, OldIrql);
  2207. abort:
  2208. return Error;
  2209. }
  2210. CHECK_STRUCT(SendTCB, tcb);
  2211. //
  2212. // Switch to a finer-grained lock:
  2213. // Drop lock on the Connection Table in favor of one on our TCB.
  2214. //
  2215. KeAcquireSpinLockAtDpcLevel(&SendTCB->tcb_lock);
  2216. KeReleaseSpinLockFromDpcLevel(&Conn->tc_ConnBlock->cb_lock);
  2217. //
  2218. // Make sure our TCB is in a send-able state.
  2219. //
  2220. if (!DATA_SEND_STATE(SendTCB->tcb_state) || CLOSING(SendTCB)) {
  2221. Error = TDI_INVALID_STATE;
  2222. goto abort2;
  2223. }
  2224. CheckTCBSends(SendTCB); // Just a debug check.
  2225. if (SynAttackProtect && (SendTCB->tcb_rce == NULL)) {
  2226. InitRCE(SendTCB);
  2227. }
  2228. //
  2229. // Verify that the cached RCE is still valid.
  2230. //
  2231. SendTCB->tcb_rce = ValidateRCE(SendTCB->tcb_rce);
  2232. ASSERT(SendTCB->tcb_rce != NULL);
  2233. if (IsDisconnectedAndNotLoopbackRCE(SendTCB->tcb_rce)) {
  2234. //
  2235. // Fail new send requests for TCBs with a disconnected
  2236. // outgoing interface, except when the loopback route is used.
  2237. //
  2238. Error = TDI_INVALID_STATE;
  2239. goto abort2;
  2240. }
  2241. if (SendLength == 0) {
  2242. //
  2243. // Wow, nothing to do!
  2244. //
  2245. // REVIEW: Can't we do this check earlier (like before we even grab the
  2246. // REVIEW: Connection Table lock? The only reason I can think not to
  2247. // REVIEW: would be if something cared about the return code if a bad
  2248. // REVIEW: Tdi Request was given to us.
  2249. //
  2250. Error = TDI_SUCCESS;
  2251. goto abort2;
  2252. }
  2253. //
  2254. // We have a TCB, and it's valid. Allocate a send request now.
  2255. //
  2256. SendReq = GetSendReq();
  2257. if (SendReq == NULL) {
  2258. Error = TDI_NO_RESOURCES;
  2259. abort2:
  2260. KeReleaseSpinLock(&SendTCB->tcb_lock, OldIrql);
  2261. return Error;
  2262. }
  2263. //
  2264. // Prepare a TCP send request based on the TDI request and the
  2265. // passed in buffer chain.
  2266. //
  2267. SendReq->tsr_req.tr_rtn = Request->RequestNotifyObject;
  2268. SendReq->tsr_req.tr_context = Request->RequestContext;
  2269. SendReq->tsr_buffer = SendBuffer;
  2270. SendReq->tsr_size = SendLength;
  2271. SendReq->tsr_unasize = SendLength;
  2272. SendReq->tsr_refcnt = 1; // ACK will decrement this ref
  2273. SendReq->tsr_offset = 0;
  2274. SendReq->tsr_lastbuf = NULL;
  2275. SendReq->tsr_time = TCPTime;
  2276. SendReq->tsr_flags = (Flags & TDI_SEND_EXPEDITED) ? TSR_FLAG_URG : 0;
  2277. //
  2278. // Check current status of our send queue.
  2279. //
  2280. EmptyQ = EMPTYQ(&SendTCB->tcb_sendq);
  2281. //
  2282. // Add this send request to our send queue.
  2283. //
  2284. SendTCB->tcb_unacked += SendLength;
  2285. ENQUEUE(&SendTCB->tcb_sendq, &SendReq->tsr_req.tr_q);
  2286. if (SendTCB->tcb_cursend == NULL) {
  2287. //
  2288. // No existing current send request, so make this new one
  2289. // the current send.
  2290. //
  2291. // REVIEW: Is this always equivalent to EMPTYQ test above?
  2292. // REVIEW: If so, why not just set EmptyQ flag here and save a test?
  2293. //
  2294. SendTCB->tcb_cursend = SendReq;
  2295. SendTCB->tcb_sendbuf = SendBuffer;
  2296. SendTCB->tcb_sendofs = 0;
  2297. SendTCB->tcb_sendsize = SendLength;
  2298. }
  2299. //
  2300. // See if we should try to send now. We attempt to do so if we weren't
  2301. // already blocked, or if we were and either the Nagle Algorithm is turned
  2302. // off or we now have at least one max segment worth of data to send.
  2303. //
  2304. if (EmptyQ || (!(SendTCB->tcb_flags & NAGLING) ||
  2305. (SendTCB->tcb_unacked -
  2306. (SendTCB->tcb_sendmax - SendTCB->tcb_senduna))
  2307. >= SendTCB->tcb_mss)) {
  2308. SendTCB->tcb_refcnt++;
  2309. TCPSend(SendTCB, OldIrql);
  2310. } else
  2311. KeReleaseSpinLock(&SendTCB->tcb_lock, OldIrql);
  2312. //
  2313. // When TCPSend returns, we may or may not have already sent the data
  2314. // associated with this particular request.
  2315. //
  2316. return TDI_PENDING;
  2317. }
  2318. #pragma BEGIN_INIT
  2319. //* InitTCPSend - Initialize our send side.
  2320. //
  2321. // Called during init time to initialize our TCP send state.
  2322. //
  2323. int // Returns: TRUE if we inited, false if we didn't.
  2324. InitTCPSend(
  2325. void) // Nothing.
  2326. {
  2327. PNDIS_BUFFER Buffer;
  2328. NDIS_STATUS Status;
  2329. ExInitializeSListHead(&TCPSendReqFree);
  2330. KeInitializeSpinLock(&TCPSendReqFreeLock);
  2331. IPv6RegisterULProtocol(IP_PROTOCOL_TCP, TCPReceive, TCPControlReceive);
  2332. return TRUE;
  2333. }
  2334. #pragma END_INIT
  2335. //* UnloadTCPSend
  2336. //
  2337. // Cleanup and prepare for stack unload.
  2338. //
  2339. void
  2340. UnloadTCPSend(void)
  2341. {
  2342. PSLIST_ENTRY BufferLink;
  2343. while ((BufferLink = ExInterlockedPopEntrySList(&TCPSendReqFree,
  2344. &TCPSendReqFreeLock))
  2345. != NULL) {
  2346. Queue *QueuePtr = CONTAINING_RECORD(BufferLink, Queue, q_next);
  2347. TCPReq *Req = CONTAINING_RECORD(QueuePtr, TCPReq, tr_q);
  2348. TCPSendReq *SendReq = CONTAINING_RECORD(Req, TCPSendReq, tsr_req);
  2349. CHECK_STRUCT(SendReq, tsr);
  2350. ExFreePool(SendReq);
  2351. }
  2352. IPv6RegisterULProtocol(IP_PROTOCOL_TCP, NULL, NULL);
  2353. }