Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2816 lines
99 KiB

  1. // -*- mode: C++; tab-width: 4; indent-tabs-mode: nil -*- (for GNU Emacs)
  2. //
  3. // Copyright (c) 1985-2000 Microsoft Corporation
  4. //
  5. // This file is part of the Microsoft Research IPv6 Network Protocol Stack.
  6. // You should have received a copy of the Microsoft End-User License Agreement
  7. // for this software along with this release; see the file "license.txt".
  8. // If not, please see http://www.research.microsoft.com/msripv6/license.htm,
  9. // or write to Microsoft Research, One Microsoft Way, Redmond, WA 98052-6399.
  10. //
  11. // Abstract:
  12. //
  13. // TCP send code.
  14. //
  15. // This file contains the code for sending Data and Control segments.
  16. //
  17. #include "oscfg.h"
  18. #include "ndis.h"
  19. #include "ip6imp.h"
  20. #include "ip6def.h"
  21. #include "tdi.h"
  22. #include "tdint.h"
  23. #include "tdistat.h"
  24. #include "queue.h"
  25. #include "transprt.h"
  26. #include "addr.h"
  27. #include "tcp.h"
  28. #include "tcb.h"
  29. #include "tcpconn.h"
  30. #include "tcpsend.h"
  31. #include "tcprcv.h"
  32. #include "info.h"
  33. #include "tcpcfg.h"
  34. #include "route.h"
  35. #include "security.h"
  36. void *TCPProtInfo; // TCP protocol info for IP.
  37. SLIST_HEADER TCPSendReqFree; // Send req. free list.
  38. KSPIN_LOCK TCPSendReqFreeLock;
  39. KSPIN_LOCK TCPSendReqCompleteLock;
  40. uint NumTCPSendReq; // Current number of SendReqs in system.
  41. uint MaxSendReq = 0xffffffff; // Maximum allowed number of SendReqs.
  42. extern KSPIN_LOCK TCBTableLock;
  43. //
  44. // All of the init code can be discarded.
  45. //
  46. #ifdef ALLOC_PRAGMA
  47. #pragma alloc_text(INIT, InitTCPSend)
  48. #endif // ALLOC_PRAGMA
  49. extern void ResetSendNext(TCB *SeqTCB, SeqNum NewSeq);
  50. #define MIN_INITIAL_RTT 3 // In msec.
  51. //* FreeSendReq - Free a send request structure.
  52. //
  53. // Called to free a send request structure.
  54. //
  55. void // Returns: Nothing.
  56. FreeSendReq(
  57. TCPSendReq *FreedReq) // Connection request structure to be freed.
  58. {
  59. PSLIST_ENTRY BufferLink;
  60. CHECK_STRUCT(FreedReq, tsr);
  61. BufferLink = CONTAINING_RECORD(&(FreedReq->tsr_req.tr_q.q_next),
  62. SLIST_ENTRY, Next);
  63. ExInterlockedPushEntrySList(&TCPSendReqFree, BufferLink,
  64. &TCPSendReqFreeLock);
  65. }
  66. //* GetSendReq - Get a send request structure.
  67. //
  68. // Called to get a send request structure.
  69. //
  70. TCPSendReq * // Returns: Pointer to SendReq structure, or NULL if none.
  71. GetSendReq(
  72. void) // Nothing.
  73. {
  74. TCPSendReq *Temp;
  75. PSLIST_ENTRY BufferLink;
  76. Queue *QueuePtr;
  77. TCPReq *ReqPtr;
  78. BufferLink = ExInterlockedPopEntrySList(&TCPSendReqFree,
  79. &TCPSendReqFreeLock);
  80. if (BufferLink != NULL) {
  81. QueuePtr = CONTAINING_RECORD(BufferLink, Queue, q_next);
  82. ReqPtr = CONTAINING_RECORD(QueuePtr, TCPReq, tr_q);
  83. Temp = CONTAINING_RECORD(ReqPtr, TCPSendReq, tsr_req);
  84. CHECK_STRUCT(Temp, tsr);
  85. } else {
  86. if (NumTCPSendReq < MaxSendReq)
  87. Temp = ExAllocatePool(NonPagedPool, sizeof(TCPSendReq));
  88. else
  89. Temp = NULL;
  90. if (Temp != NULL) {
  91. ExInterlockedAddUlong((PULONG)&NumTCPSendReq, 1, &TCPSendReqFreeLock);
  92. #if DBG
  93. Temp->tsr_req.tr_sig = tr_signature;
  94. Temp->tsr_sig = tsr_signature;
  95. #endif
  96. }
  97. }
  98. return Temp;
  99. }
  100. //* TCPHopLimit
  101. //
  102. // Given a TCB, returns the Hop Limit to use in a sent packet.
  103. // Assumes the caller holds a lock on the TCB.
  104. //
  105. uchar
  106. TCPHopLimit(TCB *Tcb)
  107. {
  108. if (Tcb->tcb_hops != -1)
  109. return (uchar) Tcb->tcb_hops;
  110. else
  111. return (uchar) Tcb->tcb_rce->NCE->IF->CurHopLimit;
  112. }
  113. //* TCPSendComplete - Complete a TCP send.
  114. //
  115. // Called by IP when a send we've made is complete. We free the buffer,
  116. // and possibly complete some sends. Each send queued on a TCB has a ref.
  117. // count with it, which is the number of times a pointer to a buffer
  118. // associated with the send has been passed to the underlying IP layer. We
  119. // can't complete a send until that count it 0. If this send was actually
  120. // from a send of data, we'll go down the chain of send and decrement the
  121. // refcount on each one. If we have one going to 0 and the send has already
  122. // been acked we'll complete the send. If it hasn't been acked we'll leave
  123. // it until the ack comes in.
  124. //
  125. // NOTE: We aren't protecting any of this with locks. When we port this to
  126. // NT we'll need to fix this, probably with a global lock. See the comments
  127. // in ACKSend() in TCPRCV.C for more details.
  128. //
  129. void // Returns: Nothing.
  130. TCPSendComplete(
  131. PNDIS_PACKET Packet, // Packet that was sent.
  132. IP_STATUS Status)
  133. {
  134. PNDIS_BUFFER BufferChain;
  135. SendCmpltContext *SCContext;
  136. PVOID Memory;
  137. UINT Unused;
  138. UNREFERENCED_PARAMETER(Status);
  139. //
  140. // Pull values we care about out of the packet structure.
  141. //
  142. SCContext = (SendCmpltContext *) PC(Packet)->CompletionData;
  143. BufferChain = NdisFirstBuffer(Packet);
  144. NdisQueryBufferSafe(BufferChain, &Memory, &Unused, LowPagePriority);
  145. ASSERT(Memory != NULL);
  146. //
  147. // See if we have a send complete context. It will be present for data
  148. // packets and means we have extra work to do. For non-data packets, we
  149. // can just skip all this as there is only the header buffer to deal with.
  150. //
  151. if (SCContext != NULL) {
  152. KIRQL OldIrql;
  153. PNDIS_BUFFER CurrentBuffer;
  154. TCPSendReq *CurrentSend;
  155. uint i;
  156. CHECK_STRUCT(SCContext, scc);
  157. //
  158. // First buffer in chain is the TCP header buffer.
  159. // Skip over it for now.
  160. //
  161. CurrentBuffer = NDIS_BUFFER_LINKAGE(BufferChain);
  162. //
  163. // Also skip over any 'user' buffers (those loaned out to us
  164. // instead of copied) as we don't need to free them.
  165. //
  166. for (i = 0; i < (uint)SCContext->scc_ubufcount; i++) {
  167. ASSERT(CurrentBuffer != NULL);
  168. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  169. }
  170. //
  171. // Now loop through and free our (aka 'transport') buffers.
  172. // We need to do this before decrementing the reference count to avoid
  173. // destroying the buffer chain if we have to zap tsr_lastbuf->Next to
  174. // NULL.
  175. //
  176. for (i = 0; i < (uint)SCContext->scc_tbufcount; i++) {
  177. PNDIS_BUFFER TempBuffer;
  178. ASSERT(CurrentBuffer != NULL);
  179. TempBuffer = CurrentBuffer;
  180. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  181. NdisFreeBuffer(TempBuffer);
  182. }
  183. //
  184. // Loop through the send requests attached to this packet,
  185. // reducing the reference count on each and enqueing them for
  186. // completion where appropriate.
  187. //
  188. CurrentSend = SCContext->scc_firstsend;
  189. for (i = 0; i< SCContext->scc_count; i++) {
  190. Queue *TempQ;
  191. long Result;
  192. TempQ = QNEXT(&CurrentSend->tsr_req.tr_q);
  193. CHECK_STRUCT(CurrentSend, tsr);
  194. Result = InterlockedDecrement(&(CurrentSend->tsr_refcnt));
  195. ASSERT(Result >= 0);
  196. if (Result <= 0) {
  197. //
  198. // Reference count has gone to 0 which means the send has
  199. // been ACK'd or cancelled. Complete it now.
  200. //
  201. // If we've sent directly from this send, NULL out the next
  202. // pointer for the last buffer in the chain.
  203. //
  204. if (CurrentSend->tsr_lastbuf != NULL) {
  205. NDIS_BUFFER_LINKAGE(CurrentSend->tsr_lastbuf) = NULL;
  206. CurrentSend->tsr_lastbuf = NULL;
  207. }
  208. KeAcquireSpinLock(&RequestCompleteLock, &OldIrql);
  209. ENQUEUE(&SendCompleteQ, &CurrentSend->tsr_req.tr_q);
  210. RequestCompleteFlags |= SEND_REQUEST_COMPLETE;
  211. KeReleaseSpinLock(&RequestCompleteLock, OldIrql);
  212. }
  213. CurrentSend = CONTAINING_RECORD(QSTRUCT(TCPReq, TempQ, tr_q),
  214. TCPSendReq, tsr_req);
  215. }
  216. }
  217. //
  218. // Free the TCP header buffer and our packet structure proper.
  219. //
  220. NdisFreeBuffer(BufferChain);
  221. ExFreePool(Memory);
  222. NdisFreePacket(Packet);
  223. //
  224. // If there are any TCP send requests to complete, do so now.
  225. //
  226. if (RequestCompleteFlags & SEND_REQUEST_COMPLETE)
  227. TCPRcvComplete();
  228. }
  229. //* RcvWin - Figure out the receive window to offer in an ack.
  230. //
  231. // A routine to figure out what window to offer on a connection. We
  232. // take into account SWS avoidance, what the default connection window is,
  233. // and what the last window we offered is.
  234. //
  235. uint // Returns: Window to be offered.
  236. RcvWin(
  237. TCB *WinTCB) // TCB on which to perform calculations.
  238. {
  239. int CouldOffer; // The window size we could offer.
  240. CHECK_STRUCT(WinTCB, tcb);
  241. CheckPacketList(WinTCB->tcb_pendhead, WinTCB->tcb_pendingcnt);
  242. ASSERT(WinTCB->tcb_rcvwin >= 0);
  243. CouldOffer = WinTCB->tcb_defaultwin - WinTCB->tcb_pendingcnt;
  244. ASSERT(CouldOffer >= 0);
  245. ASSERT(CouldOffer >= WinTCB->tcb_rcvwin);
  246. if ((CouldOffer - WinTCB->tcb_rcvwin) >=
  247. (int) MIN(WinTCB->tcb_defaultwin/2, WinTCB->tcb_mss)) {
  248. WinTCB->tcb_rcvwin = CouldOffer;
  249. }
  250. return WinTCB->tcb_rcvwin;
  251. }
  252. //* ValidateSourceAndRoute - Validate the NTE and RCE.
  253. //
  254. // Checks that the NTE and RCE referenced by this TCB are still ok to use.
  255. //
  256. BOOLEAN
  257. ValidateSourceAndRoute(
  258. TCB *Tcb) // TCB being validated.
  259. {
  260. KIRQL Irql0;
  261. //
  262. // Update our copy of the validation counter.
  263. // We need to do this before making the validation checks below
  264. // (to avoid missing any additional changes while we're in here).
  265. //
  266. Tcb->tcb_routing = RouteCacheValidationCounter;
  267. //
  268. // Check that our NTE hasn't gone away.
  269. //
  270. KeAcquireSpinLock(&Tcb->tcb_nte->IF->Lock, &Irql0);
  271. if (!IsValidNTE(Tcb->tcb_nte)) {
  272. //
  273. // Can't use this one anymore.
  274. //
  275. KeReleaseSpinLock(&Tcb->tcb_nte->IF->Lock, Irql0);
  276. ReleaseNTE(Tcb->tcb_nte);
  277. //
  278. // See if this address lives on as a different NTE.
  279. //
  280. Tcb->tcb_nte = FindNetworkWithAddress(&Tcb->tcb_saddr,
  281. Tcb->tcb_sscope_id);
  282. if (Tcb->tcb_nte == NULL) {
  283. //
  284. // The address is gone.
  285. //
  286. return FALSE;
  287. }
  288. } else {
  289. KeReleaseSpinLock(&Tcb->tcb_nte->IF->Lock, Irql0);
  290. }
  291. //
  292. // Also check that the RCE is still around.
  293. //
  294. Tcb->tcb_rce = ValidateRCE(Tcb->tcb_rce, Tcb->tcb_nte);
  295. return TRUE;
  296. }
  297. //* SendSYN - Send a SYN segment.
  298. //
  299. // This is called during connection establishment time to send a SYN
  300. // segment to the peer. We get a buffer if we can, and then fill
  301. // it in. There's a tricky part here where we have to build the MSS
  302. // option in the header - we find the MSS by finding the MSS offered
  303. // by the net for the local address. After that, we send it.
  304. //
  305. void // Returns: Nothing.
  306. SendSYN(
  307. TCB *SYNTcb, // TCB from which SYN is to be sent.
  308. KIRQL PreLockIrql) // IRQL prior to acquiring TCB lock.
  309. {
  310. PNDIS_PACKET Packet;
  311. void *Memory;
  312. IPv6Header UNALIGNED *IP;
  313. TCPHeader UNALIGNED *TCP;
  314. uchar *OptPtr;
  315. NDIS_STATUS NdisStatus;
  316. uint Offset;
  317. uint Length;
  318. uint PayloadLength;
  319. ushort TempWin;
  320. ushort MSS;
  321. RouteCacheEntry *RCE;
  322. CHECK_STRUCT(SYNTcb, tcb);
  323. //
  324. // Go ahead and set the retransmission timer now, in case we can't get a
  325. // packet or a buffer. In the future we might want to queue the
  326. // connection for when we get resources.
  327. //
  328. START_TCB_TIMER(SYNTcb->tcb_rexmittimer, SYNTcb->tcb_rexmit);
  329. //
  330. // In most cases, we will already have a route at this point.
  331. // However, if we failed to get one earlier in the passive receive
  332. // path, we may need to retry here.
  333. //
  334. if (SYNTcb->tcb_rce == NULL) {
  335. InitRCE(SYNTcb);
  336. if (SYNTcb->tcb_rce == NULL) {
  337. goto ErrorReturn;
  338. }
  339. }
  340. //
  341. // Validate that the address we're sourcing from and the route we're
  342. // sending upon are still okay to use.
  343. //
  344. if (SYNTcb->tcb_routing != RouteCacheValidationCounter) {
  345. if (!ValidateSourceAndRoute(SYNTcb)) {
  346. //
  347. // Even though we're about to close this TCB,
  348. // we should leave it in a consistent state.
  349. //
  350. SYNTcb->tcb_sendnext++;
  351. if (SEQ_GT(SYNTcb->tcb_sendnext, SYNTcb->tcb_sendmax)) {
  352. SYNTcb->tcb_sendmax = SYNTcb->tcb_sendnext;
  353. }
  354. TryToCloseTCB(SYNTcb, TCB_CLOSE_ABORTED, PreLockIrql);
  355. return;
  356. }
  357. }
  358. //
  359. // Allocate a packet header/buffer/data region for this SYN.
  360. //
  361. // Our buffer has space at the beginning which will be filled in
  362. // later by the link level. At this level we add the IPv6Header,
  363. // TCPHeader, and TCP Maximum Segment Size option which follow.
  364. //
  365. // REVIEW: This grabs packets and buffers from the IPv6PacketPool and
  366. // REVIEW: the IPv6BufferPool respectively. Have seperate pools for TCP?
  367. //
  368. Offset = SYNTcb->tcb_rce->NCE->IF->LinkHeaderSize;
  369. Length = Offset + sizeof(*IP) + sizeof(*TCP) + MSS_OPT_SIZE;
  370. NdisStatus = IPv6AllocatePacket(Length, &Packet, &Memory);
  371. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  372. //
  373. // Upon failure, advance tcb_sendnext anyway.
  374. // We need to do this because TCBTimeout will *retreat* tcb_sendnext
  375. // if this SYN is later retransmitted, and if that retreat occurs
  376. // without this advance, we end up with a hole in the sequence-space.
  377. //
  378. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  379. "TCP SendSYN: Couldn't allocate IPv6 packet header!?!\n"));
  380. ErrorReturn:
  381. SYNTcb->tcb_sendnext++;
  382. if (SEQ_GT(SYNTcb->tcb_sendnext, SYNTcb->tcb_sendmax)) {
  383. SYNTcb->tcb_sendmax = SYNTcb->tcb_sendnext;
  384. }
  385. KeReleaseSpinLock(&SYNTcb->tcb_lock, PreLockIrql);
  386. return;
  387. }
  388. PC(Packet)->CompletionHandler = TCPSendComplete;
  389. PC(Packet)->CompletionData = NULL;
  390. //
  391. // Since this is a SYN-only packet (maybe someday we'll send data with
  392. // the SYN?) we only have the one buffer and nothing to link on after.
  393. //
  394. //
  395. // We now have all the resources we need to send.
  396. // Prepare the actual packet.
  397. //
  398. //
  399. // Our header buffer has extra space for other headers to be
  400. // prepended to ours without requiring further allocation calls.
  401. // Put the actual TCP/IP header at the end of the buffer.
  402. //
  403. IP = (IPv6Header UNALIGNED *)((uchar *)Memory + Offset);
  404. IP->VersClassFlow = IP_VERSION;
  405. IP->NextHeader = IP_PROTOCOL_TCP;
  406. IP->HopLimit = TCPHopLimit(SYNTcb);
  407. IP->Source = SYNTcb->tcb_saddr;
  408. IP->Dest = SYNTcb->tcb_daddr;
  409. TCP = (TCPHeader UNALIGNED *)(IP + 1);
  410. TCP->tcp_src = SYNTcb->tcb_sport;
  411. TCP->tcp_dest = SYNTcb->tcb_dport;
  412. TCP->tcp_seq = net_long(SYNTcb->tcb_sendnext);
  413. //
  414. // The SYN flag takes up one element in sequence number space.
  415. // Record that we've sent it here (if we need to retransmit the SYN
  416. // segment, TCBTimeout will reset sendnext before calling us again).
  417. //
  418. SYNTcb->tcb_sendnext++;
  419. if (SEQ_GT(SYNTcb->tcb_sendnext, SYNTcb->tcb_sendmax)) {
  420. TStats.ts_outsegs++;
  421. SYNTcb->tcb_sendmax = SYNTcb->tcb_sendnext;
  422. } else
  423. TStats.ts_retranssegs++;
  424. TCP->tcp_ack = net_long(SYNTcb->tcb_rcvnext);
  425. //
  426. // REVIEW: TCP flags are entirely based upon our state, so this could
  427. // REVIEW: be replaced by a (quicker) array lookup.
  428. //
  429. if (SYNTcb->tcb_state == TCB_SYN_RCVD)
  430. TCP->tcp_flags = MAKE_TCP_FLAGS(6, TCP_FLAG_SYN | TCP_FLAG_ACK);
  431. else
  432. TCP->tcp_flags = MAKE_TCP_FLAGS(6, TCP_FLAG_SYN);
  433. TempWin = (ushort)SYNTcb->tcb_rcvwin;
  434. TCP->tcp_window = net_short(TempWin);
  435. TCP->tcp_urgent = 0;
  436. TCP->tcp_xsum = 0;
  437. OptPtr = (uchar *)(TCP + 1);
  438. //
  439. // Compose the Maximum Segment Size option.
  440. //
  441. // TBD: If we add IPv6 Jumbogram support, we should also add LFN
  442. // TBD: support to TCP and change this to handle a larger MSS.
  443. //
  444. MSS = SYNTcb->tcb_rce->NTE->IF->LinkMTU
  445. - sizeof(IPv6Header) - sizeof(TCPHeader);
  446. IF_TCPDBG(TCP_DEBUG_MSS) {
  447. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INFO_TCPDBG,
  448. "SendSYN: Sending MSS option value of %d\n", MSS));
  449. }
  450. *OptPtr++ = TCP_OPT_MSS;
  451. *OptPtr++ = MSS_OPT_SIZE;
  452. *(ushort UNALIGNED *)OptPtr = net_short(MSS);
  453. PayloadLength = sizeof(TCPHeader) + MSS_OPT_SIZE;
  454. //
  455. // Compute the TCP checksum. It covers the entire TCP segment
  456. // starting with the TCP header, plus the IPv6 pseudo-header.
  457. //
  458. // REVIEW: The IPv4 implementation kept the IPv4 psuedo-header around
  459. // REVIEW: in the TCB rather than recalculate it every time. Do this?
  460. //
  461. TCP->tcp_xsum = 0;
  462. TCP->tcp_xsum = ChecksumPacket(
  463. Packet, Offset + sizeof *IP, NULL, PayloadLength,
  464. AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
  465. ASSERT(TCP->tcp_xsum != 0);
  466. //
  467. // Capture and reference the RCE while we still hold the TCB lock.
  468. // The TCB's reference on this particular RCE might go away at any point
  469. // after we release the lock (or because we drop it ourselves below).
  470. //
  471. RCE = SYNTcb->tcb_rce;
  472. AddRefRCE(RCE);
  473. //
  474. // If connection-acceptance has been delayed, release the TCB's RCE.
  475. // This prevents TCBs in pre-established states from consuming
  476. // an unbounded number of RCEs.
  477. //
  478. if (SYNTcb->tcb_flags & ACCEPT_PENDING) {
  479. SYNTcb->tcb_rce = NULL;
  480. ReleaseRCE(RCE);
  481. }
  482. //
  483. // Everything's ready. Now send the packet.
  484. //
  485. // Note that IPv6Send does not return a status code.
  486. // Instead it *always* completes the packet
  487. // with an appropriate status code.
  488. //
  489. KeReleaseSpinLock(&SYNTcb->tcb_lock, PreLockIrql);
  490. IPv6Send(Packet, Offset, IP, PayloadLength, RCE, 0,
  491. IP_PROTOCOL_TCP,
  492. net_short(TCP->tcp_src),
  493. net_short(TCP->tcp_dest));
  494. //
  495. // Release the extra reference we took on the RCE above.
  496. //
  497. ReleaseRCE(RCE);
  498. }
  499. //* SendKA - Send a keep alive segment.
  500. //
  501. // This is called when we want to send a keep-alive. The idea is to provoke
  502. // a response from our peer on an otherwise idle connection. We send a
  503. // garbage byte of data in our keep-alives in order to cooperate with broken
  504. // TCP implementations that don't respond to segments outside the window
  505. // unless they contain data.
  506. //
  507. void // Returns: Nothing.
  508. SendKA(
  509. TCB *KATcb, // TCB from which keep alive is to be sent.
  510. KIRQL PreLockIrql) // IRQL prior to acquiring lock on TCB.
  511. {
  512. PNDIS_PACKET Packet;
  513. void *Memory;
  514. IPv6Header UNALIGNED *IP;
  515. TCPHeader UNALIGNED *TCP;
  516. NDIS_STATUS NdisStatus;
  517. int Offset;
  518. uint Length;
  519. uint PayloadLength;
  520. ushort TempWin;
  521. SeqNum TempSeq;
  522. RouteCacheEntry *RCE;
  523. CHECK_STRUCT(KATcb, tcb);
  524. //
  525. // In most cases, we will already have a route at this point.
  526. // However, if we failed to get one earlier in the passive receive
  527. // path, we may need to retry here.
  528. //
  529. if (KATcb->tcb_rce == NULL) {
  530. InitRCE(KATcb);
  531. if (KATcb->tcb_rce == NULL) {
  532. KeReleaseSpinLock(&KATcb->tcb_lock, PreLockIrql);
  533. return;
  534. }
  535. }
  536. //
  537. // Validate that the address we're sourcing from and the route we're
  538. // sending upon are still okay to use.
  539. //
  540. if (KATcb->tcb_routing != RouteCacheValidationCounter) {
  541. if (!ValidateSourceAndRoute(KATcb)) {
  542. TryToCloseTCB(KATcb, TCB_CLOSE_ABORTED, PreLockIrql);
  543. return;
  544. }
  545. }
  546. //
  547. // Allocate a packet header/buffer/data region for this keepalive packet.
  548. //
  549. // Our buffer has space at the beginning which will be filled in
  550. // later by the link level. At this level we add the IPv6Header,
  551. // TCPHeader, and a single byte of data which follow.
  552. //
  553. // REVIEW: This grabs packets and buffers from the IPv6PacketPool and
  554. // REVIEW: the IPv6BufferPool respectively. Have seperate pools for TCP?
  555. //
  556. Offset = KATcb->tcb_rce->NCE->IF->LinkHeaderSize;
  557. Length = Offset + sizeof(*IP) + sizeof(*TCP) + 1;
  558. NdisStatus = IPv6AllocatePacket(Length, &Packet, &Memory);
  559. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  560. //
  561. // REVIEW: What to do if this fails.
  562. //
  563. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  564. "TCP SendKA: Couldn't allocate IPv6 packet header!?!\n"));
  565. KeReleaseSpinLock(&KATcb->tcb_lock, PreLockIrql);
  566. return;
  567. }
  568. PC(Packet)->CompletionHandler = TCPSendComplete;
  569. PC(Packet)->CompletionData = NULL;
  570. //
  571. // Since this is a keepalive packet we only have the one buffer and
  572. // nothing to link on after.
  573. //
  574. //
  575. // Our header buffer has extra space for other headers to be
  576. // prepended to ours without requiring further allocation calls.
  577. // Put the actual TCP/IP header at the end of the buffer.
  578. //
  579. IP = (IPv6Header UNALIGNED *)((uchar *)Memory + Offset);
  580. IP->VersClassFlow = IP_VERSION;
  581. IP->NextHeader = IP_PROTOCOL_TCP;
  582. IP->HopLimit = TCPHopLimit(KATcb);
  583. IP->Source = KATcb->tcb_saddr;
  584. IP->Dest = KATcb->tcb_daddr;
  585. TCP = (TCPHeader UNALIGNED *)(IP + 1);
  586. TCP->tcp_src = KATcb->tcb_sport;
  587. TCP->tcp_dest = KATcb->tcb_dport;
  588. TempSeq = KATcb->tcb_senduna - 1;
  589. TCP->tcp_seq = net_long(TempSeq);
  590. TCP->tcp_ack = net_long(KATcb->tcb_rcvnext);
  591. TCP->tcp_flags = MAKE_TCP_FLAGS(5, TCP_FLAG_ACK);
  592. TempWin = (ushort)RcvWin(KATcb);
  593. TCP->tcp_window = net_short(TempWin);
  594. TCP->tcp_urgent = 0;
  595. //
  596. // Initialize the single byte that we're resending.
  597. // N.B. Adequate space for this byte was allocated above.
  598. //
  599. *(uchar *)(TCP + 1) = 0;
  600. TStats.ts_retranssegs++;
  601. PayloadLength = sizeof(TCPHeader) + 1;
  602. //
  603. // Compute the TCP checksum. It covers the entire TCP segment
  604. // starting with the TCP header, plus the IPv6 pseudo-header.
  605. //
  606. TCP->tcp_xsum = 0;
  607. TCP->tcp_xsum = ChecksumPacket(
  608. Packet, Offset + sizeof *IP, NULL, PayloadLength,
  609. AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
  610. ASSERT(TCP->tcp_xsum != 0);
  611. //
  612. // Capture and reference the RCE while we still hold the TCB lock.
  613. // The TCB's reference on this particular RCE might go away at any
  614. // point after we release the lock.
  615. //
  616. RCE = KATcb->tcb_rce;
  617. AddRefRCE(RCE);
  618. //
  619. // Everything's ready. Now send the packet.
  620. //
  621. // Note that IPv6Send does not return a status code.
  622. // Instead it *always* completes the packet
  623. // with an appropriate status code.
  624. //
  625. KATcb->tcb_kacount++;
  626. KeReleaseSpinLock(&KATcb->tcb_lock, PreLockIrql);
  627. IPv6Send(Packet, Offset, IP, PayloadLength, RCE, 0,
  628. IP_PROTOCOL_TCP,
  629. net_short(TCP->tcp_src),
  630. net_short(TCP->tcp_dest));
  631. //
  632. // Release the extra reference we took on the RCE above.
  633. //
  634. ReleaseRCE(RCE);
  635. }
  636. //* SendACK - Send an ACK segment.
  637. //
  638. // This is called whenever we need to send an ACK for some reason. Nothing
  639. // fancy, we just do it.
  640. //
  641. void // Returns: Nothing.
  642. SendACK(
  643. TCB *ACKTcb) // TCB from which ACK is to be sent.
  644. {
  645. PNDIS_PACKET Packet;
  646. void *Memory;
  647. IPv6Header UNALIGNED *IP;
  648. TCPHeader UNALIGNED *TCP;
  649. NDIS_STATUS NdisStatus;
  650. KIRQL OldIrql;
  651. int Offset;
  652. uint Length;
  653. uint PayloadLength;
  654. SeqNum SendNext;
  655. ushort TempWin;
  656. RouteCacheEntry *RCE;
  657. CHECK_STRUCT(ACKTcb, tcb);
  658. KeAcquireSpinLock(&ACKTcb->tcb_lock, &OldIrql);
  659. //
  660. // In most cases, we will already have a route at this point.
  661. // However, if we failed to get one earlier in the passive receive
  662. // path, we may need to retry here.
  663. //
  664. if (ACKTcb->tcb_rce == NULL) {
  665. InitRCE(ACKTcb);
  666. if (ACKTcb->tcb_rce == NULL) {
  667. KeReleaseSpinLock(&ACKTcb->tcb_lock, OldIrql);
  668. return;
  669. }
  670. }
  671. //
  672. // Validate that the address we're sourcing from and the route we're
  673. // sending upon are still okay to use.
  674. //
  675. if (ACKTcb->tcb_routing != RouteCacheValidationCounter) {
  676. if (!ValidateSourceAndRoute(ACKTcb)) {
  677. TryToCloseTCB(ACKTcb, TCB_CLOSE_ABORTED, OldIrql);
  678. return;
  679. }
  680. }
  681. //
  682. // Allocate a packet header/buffer/data region for this ACK packet.
  683. //
  684. // Our buffer has space at the beginning which will be filled in
  685. // later by the link level. At this level we add the IPv6Header
  686. // and the TCPHeader.
  687. //
  688. // REVIEW: This grabs packets and buffers from the IPv6PacketPool and
  689. // REVIEW: the IPv6BufferPool respectively. Have seperate pools for TCP?
  690. //
  691. Offset = ACKTcb->tcb_rce->NCE->IF->LinkHeaderSize;
  692. Length = Offset + sizeof(*IP) + sizeof(*TCP);
  693. NdisStatus = IPv6AllocatePacket(Length, &Packet, &Memory);
  694. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  695. KeReleaseSpinLock(&ACKTcb->tcb_lock, OldIrql);
  696. //
  697. // REVIEW: What to do if this fails.
  698. //
  699. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  700. "TCP SendACK: Couldn't allocate IPv6 packet header!?!\n"));
  701. return;
  702. }
  703. PC(Packet)->CompletionHandler = TCPSendComplete;
  704. PC(Packet)->CompletionData = NULL;
  705. //
  706. // Our header buffer has extra space for other headers to be
  707. // prepended to ours without requiring further allocation calls.
  708. // Put the actual TCP/IP header at the end of the buffer.
  709. //
  710. IP = (IPv6Header UNALIGNED *)((uchar *)Memory + Offset);
  711. IP->VersClassFlow = IP_VERSION;
  712. IP->NextHeader = IP_PROTOCOL_TCP;
  713. IP->HopLimit = TCPHopLimit(ACKTcb);
  714. IP->Source = ACKTcb->tcb_saddr;
  715. IP->Dest = ACKTcb->tcb_daddr;
  716. TCP = (TCPHeader UNALIGNED *)(IP + 1);
  717. TCP->tcp_src = ACKTcb->tcb_sport;
  718. TCP->tcp_dest = ACKTcb->tcb_dport;
  719. TCP->tcp_ack = net_long(ACKTcb->tcb_rcvnext);
  720. //
  721. // If the remote peer is advertising a window of zero, we need to send
  722. // this ack with a sequence number of his rcv_next (which in that case
  723. // should be our senduna). We have code here ifdef'd out that makes
  724. // sure that we don't send outside the RWE, but this doesn't work. We
  725. // need to be able to send a pure ACK exactly at the RWE.
  726. //
  727. if (ACKTcb->tcb_sendwin != 0) {
  728. SendNext = ACKTcb->tcb_sendnext;
  729. #if 0
  730. SeqNum MaxValidSeq;
  731. MaxValidSeq = ACKTcb->tcb_senduna + ACKTcb->tcb_sendwin - 1;
  732. SendNext = (SEQ_LT(SendNext, MaxValidSeq) ? SendNext : MaxValidSeq);
  733. #endif
  734. } else
  735. SendNext = ACKTcb->tcb_senduna;
  736. if ((ACKTcb->tcb_flags & FIN_SENT) &&
  737. SEQ_EQ(SendNext, ACKTcb->tcb_sendmax - 1)) {
  738. TCP->tcp_flags = MAKE_TCP_FLAGS(5, TCP_FLAG_FIN | TCP_FLAG_ACK);
  739. } else
  740. TCP->tcp_flags = MAKE_TCP_FLAGS(5, TCP_FLAG_ACK);
  741. TCP->tcp_seq = net_long(SendNext);
  742. TempWin = (ushort)RcvWin(ACKTcb);
  743. TCP->tcp_window = net_short(TempWin);
  744. TCP->tcp_urgent = 0;
  745. PayloadLength = sizeof(*TCP);
  746. //
  747. // Compute the TCP checksum. It covers the entire TCP segment
  748. // starting with the TCP header, plus the IPv6 pseudo-header.
  749. //
  750. TCP->tcp_xsum = 0;
  751. TCP->tcp_xsum = ChecksumPacket(
  752. Packet, Offset + sizeof *IP, NULL, PayloadLength,
  753. AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
  754. ASSERT(TCP->tcp_xsum != 0);
  755. STOP_TCB_TIMER(ACKTcb->tcb_delacktimer);
  756. ACKTcb->tcb_flags &= ~(NEED_ACK | ACK_DELAYED);
  757. TStats.ts_outsegs++;
  758. //
  759. // Capture and reference the RCE while we still hold the TCB lock.
  760. // The TCB's reference on this particular RCE might go away at any point
  761. // after we release the lock (or because we drop it ourselves below).
  762. //
  763. RCE = ACKTcb->tcb_rce;
  764. AddRefRCE(RCE);
  765. //
  766. // If connection-acceptance has been delayed, release the TCB's RCE.
  767. // This prevents TCBs in pre-established states from consuming
  768. // an unbounded number of RCEs.
  769. //
  770. if (ACKTcb->tcb_flags & ACCEPT_PENDING) {
  771. ACKTcb->tcb_rce = NULL;
  772. ReleaseRCE(RCE);
  773. }
  774. //
  775. // Everything's ready. Now send the packet.
  776. //
  777. // Note that IPv6Send does not return a status code.
  778. // Instead it *always* completes the packet
  779. // with an appropriate status code.
  780. //
  781. KeReleaseSpinLock(&ACKTcb->tcb_lock, OldIrql);
  782. IPv6Send(Packet, Offset, IP, PayloadLength, RCE, 0,
  783. IP_PROTOCOL_TCP,
  784. net_short(TCP->tcp_src),
  785. net_short(TCP->tcp_dest));
  786. //
  787. // Release the extra reference we took on the RCE above.
  788. //
  789. ReleaseRCE(RCE);
  790. }
  791. //* SendRSTFromTCB - Send a RST from a TCB.
  792. //
  793. // This is called during close when we need to send a RST.
  794. //
  795. // Called only when TCB is going away, so we have exclusive access.
  796. //
  797. void // Returns: Nothing.
  798. SendRSTFromTCB(
  799. TCB *RSTTcb) // TCB from which RST is to be sent.
  800. {
  801. PNDIS_PACKET Packet;
  802. void *Memory;
  803. IPv6Header UNALIGNED *IP;
  804. TCPHeader UNALIGNED *TCP;
  805. NDIS_STATUS NdisStatus;
  806. int Offset;
  807. uint Length;
  808. uint PayloadLength;
  809. SeqNum RSTSeq;
  810. CHECK_STRUCT(RSTTcb, tcb);
  811. ASSERT(RSTTcb->tcb_state == TCB_CLOSED);
  812. //
  813. // In most cases, we will already have a route at this point.
  814. // However, if we failed to get one earlier in the passive receive
  815. // path, we may need to retry here.
  816. //
  817. if (RSTTcb->tcb_rce == NULL) {
  818. InitRCE(RSTTcb);
  819. if (RSTTcb->tcb_rce == NULL) {
  820. return;
  821. }
  822. }
  823. //
  824. // Validate that the address we're sourcing from and the route we're
  825. // sending upon are still okay to use.
  826. //
  827. if (RSTTcb->tcb_routing != RouteCacheValidationCounter) {
  828. if (!ValidateSourceAndRoute(RSTTcb)) {
  829. return;
  830. }
  831. }
  832. //
  833. // Allocate a packet header/buffer/data region for this RST packet.
  834. //
  835. // Our buffer has space at the beginning which will be filled in
  836. // later by the link level. At this level we add the IPv6Header
  837. // and the TCPHeader.
  838. //
  839. // REVIEW: This grabs packets and buffers from the IPv6PacketPool and
  840. // REVIEW: the IPv6BufferPool respectively. Have seperate pools for TCP?
  841. //
  842. Offset = RSTTcb->tcb_rce->NCE->IF->LinkHeaderSize;
  843. Length = Offset + sizeof(*IP) + sizeof(*TCP);
  844. NdisStatus = IPv6AllocatePacket(Length, &Packet, &Memory);
  845. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  846. //
  847. // REVIEW: What to do if this fails.
  848. //
  849. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  850. "TCP SendRSTFromTCB: "
  851. "Couldn't alloc IPv6 packet header!\n"));
  852. return;
  853. }
  854. PC(Packet)->CompletionHandler = TCPSendComplete;
  855. PC(Packet)->CompletionData = NULL;
  856. //
  857. // Since this is an RST-only packet we only have the one buffer and
  858. // nothing to link on after.
  859. //
  860. //
  861. // Our header buffer has extra space for other headers to be
  862. // prepended to ours without requiring further allocation calls.
  863. // Put the actual TCP/IP header at the end of the buffer.
  864. //
  865. IP = (IPv6Header UNALIGNED *)((uchar *)Memory + Offset);
  866. IP->VersClassFlow = IP_VERSION;
  867. IP->NextHeader = IP_PROTOCOL_TCP;
  868. IP->HopLimit = TCPHopLimit(RSTTcb);
  869. IP->Source = RSTTcb->tcb_saddr;
  870. IP->Dest = RSTTcb->tcb_daddr;
  871. TCP = (TCPHeader UNALIGNED *)(IP + 1);
  872. TCP->tcp_src = RSTTcb->tcb_sport;
  873. TCP->tcp_dest = RSTTcb->tcb_dport;
  874. //
  875. // If the remote peer has a window of 0, send with a seq. # equal
  876. // to senduna so he'll accept it. Otherwise send with send max.
  877. //
  878. if (RSTTcb->tcb_sendwin != 0)
  879. RSTSeq = RSTTcb->tcb_sendmax;
  880. else
  881. RSTSeq = RSTTcb->tcb_senduna;
  882. TCP->tcp_seq = net_long(RSTSeq);
  883. TCP->tcp_ack = net_long(RSTTcb->tcb_rcvnext);
  884. TCP->tcp_flags = MAKE_TCP_FLAGS(5, TCP_FLAG_RST | TCP_FLAG_ACK);
  885. TCP->tcp_window = 0;
  886. TCP->tcp_urgent = 0;
  887. PayloadLength = sizeof(*TCP);
  888. //
  889. // Compute the TCP checksum. It covers the entire TCP segment
  890. // starting with the TCP header, plus the IPv6 pseudo-header.
  891. //
  892. TCP->tcp_xsum = 0;
  893. TCP->tcp_xsum = ChecksumPacket(
  894. Packet, Offset + sizeof *IP, NULL, PayloadLength,
  895. AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
  896. ASSERT(TCP->tcp_xsum != 0);
  897. TStats.ts_outsegs++;
  898. TStats.ts_outrsts++;
  899. //
  900. // Everything's ready. Now send the packet.
  901. //
  902. // Note that IPv6Send does not return a status code.
  903. // Instead it *always* completes the packet
  904. // with an appropriate status code.
  905. //
  906. IPv6Send(Packet, Offset, IP, PayloadLength, RSTTcb->tcb_rce, 0,
  907. IP_PROTOCOL_TCP,
  908. net_short(TCP->tcp_src),
  909. net_short(TCP->tcp_dest));
  910. }
  911. //* SendRSTFromHeader - Send a RST back, based on a header.
  912. //
  913. // Called when we need to send a RST, but don't necessarily have a TCB.
  914. //
  915. void // Returns: Nothing.
  916. SendRSTFromHeader(
  917. TCPHeader UNALIGNED *RecvTCP, // TCP header to be RST.
  918. uint Length, // Length of the incoming segment.
  919. IPv6Addr *Dest, // Destination IP address for RST.
  920. uint DestScopeId, // Scope id for destination address.
  921. IPv6Addr *Src, // Source IP address for RST.
  922. uint SrcScopeId) // Scope id for source address.
  923. {
  924. PNDIS_PACKET Packet;
  925. void *Memory;
  926. IPv6Header UNALIGNED *IP;
  927. TCPHeader UNALIGNED *SendTCP;
  928. NetTableEntry *NTE;
  929. RouteCacheEntry *RCE;
  930. IP_STATUS Status;
  931. NDIS_STATUS NdisStatus;
  932. uint Offset;
  933. uint SendLength;
  934. uint PayloadLength;
  935. //
  936. // Never send a RST in response to a RST.
  937. //
  938. if (RecvTCP->tcp_flags & TCP_FLAG_RST)
  939. return;
  940. //
  941. // Determine NTE to send on based on incoming packet's destination.
  942. // REVIEW: Alternatively, we could/should just pass the NTE in.
  943. //
  944. NTE = FindNetworkWithAddress(Src, SrcScopeId);
  945. if (NTE == NULL) {
  946. //
  947. // This should only happen if the NTE became invalid
  948. // between accepting the packet and getting here. It
  949. // cannot completely go away since the packet's Packet
  950. // structure holds a reference to it.
  951. //
  952. return;
  953. }
  954. //
  955. // Get the route to the destination (incoming packet's source).
  956. //
  957. Status = RouteToDestination(Dest, DestScopeId, CastFromNTE(NTE),
  958. RTD_FLAG_NORMAL, &RCE);
  959. if (Status != IP_SUCCESS) {
  960. //
  961. // Failed to get a route to the destination. Error out.
  962. //
  963. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INTERNAL_ERROR,
  964. "TCP SendRSTFromHeader: Can't get a route?!?\n"));
  965. ReleaseNTE(NTE);
  966. return;
  967. }
  968. //
  969. // Allocate a packet header/buffer/data region for this RST packet.
  970. //
  971. // Our buffer has space at the beginning which will be filled in
  972. // later by the link level. At this level we add the IPv6Header
  973. // and the TCPHeader.
  974. //
  975. // REVIEW: This grabs packets and buffers from the IPv6PacketPool and
  976. // REVIEW: the IPv6BufferPool respectively. Have seperate pools for TCP?
  977. //
  978. Offset = RCE->NCE->IF->LinkHeaderSize;
  979. SendLength = Offset + sizeof(*IP) + sizeof(*SendTCP);
  980. NdisStatus = IPv6AllocatePacket(SendLength, &Packet, &Memory);
  981. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  982. //
  983. // Failed to allocate a packet header/buffer/data region. Error out.
  984. //
  985. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  986. "TCP SendRSTFromHeader: Couldn't alloc IPv6 pkt header!\n"));
  987. ReleaseRCE(RCE);
  988. ReleaseNTE(NTE);
  989. return;
  990. }
  991. PC(Packet)->CompletionHandler = TCPSendComplete;
  992. PC(Packet)->CompletionData = NULL;
  993. //
  994. // We now have all the resources we need to send. Since this is a
  995. // RST-only packet we only have the one header buffer and nothing
  996. // to link on after.
  997. //
  998. //
  999. // Our header buffer has extra space for other headers to be
  1000. // prepended to ours without requiring further allocation calls.
  1001. // Put the actual TCP/IP header at the end of the buffer.
  1002. //
  1003. IP = (IPv6Header UNALIGNED *)((uchar *)Memory + Offset);
  1004. IP->VersClassFlow = IP_VERSION;
  1005. IP->NextHeader = IP_PROTOCOL_TCP;
  1006. IP->HopLimit = (uchar)RCE->NCE->IF->CurHopLimit;
  1007. IP->Source = *Src;
  1008. IP->Dest = *Dest;
  1009. //
  1010. // Fill in the header so as to make it believable to our peer, and send it.
  1011. //
  1012. SendTCP = (TCPHeader UNALIGNED *)(IP + 1);
  1013. if (RecvTCP->tcp_flags & TCP_FLAG_SYN)
  1014. Length++;
  1015. if (RecvTCP->tcp_flags & TCP_FLAG_FIN)
  1016. Length++;
  1017. if (RecvTCP->tcp_flags & TCP_FLAG_ACK) {
  1018. SendTCP->tcp_seq = RecvTCP->tcp_ack;
  1019. SendTCP->tcp_ack = 0;
  1020. SendTCP->tcp_flags = MAKE_TCP_FLAGS(sizeof(TCPHeader)/sizeof(ulong),
  1021. TCP_FLAG_RST);
  1022. } else {
  1023. SeqNum TempSeq;
  1024. SendTCP->tcp_seq = 0;
  1025. TempSeq = net_long(RecvTCP->tcp_seq);
  1026. TempSeq += Length;
  1027. SendTCP->tcp_ack = net_long(TempSeq);
  1028. SendTCP->tcp_flags = MAKE_TCP_FLAGS(sizeof(TCPHeader)/sizeof(ulong),
  1029. TCP_FLAG_RST | TCP_FLAG_ACK);
  1030. }
  1031. SendTCP->tcp_window = 0;
  1032. SendTCP->tcp_urgent = 0;
  1033. SendTCP->tcp_dest = RecvTCP->tcp_src;
  1034. SendTCP->tcp_src = RecvTCP->tcp_dest;
  1035. PayloadLength = sizeof(*SendTCP);
  1036. //
  1037. // Compute the TCP checksum. It covers the entire TCP segment
  1038. // starting with the TCP header, plus the IPv6 pseudo-header.
  1039. //
  1040. SendTCP->tcp_xsum = 0;
  1041. SendTCP->tcp_xsum = ChecksumPacket(
  1042. Packet, Offset + sizeof *IP, NULL, PayloadLength,
  1043. AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
  1044. ASSERT(SendTCP->tcp_xsum != 0);
  1045. TStats.ts_outsegs++;
  1046. TStats.ts_outrsts++;
  1047. //
  1048. // Everything's ready. Now send the packet.
  1049. //
  1050. // Note that IPv6Send does not return a status code.
  1051. // Instead it *always* completes the packet
  1052. // with an appropriate status code.
  1053. //
  1054. IPv6Send(Packet, Offset, IP, PayloadLength, RCE, 0,
  1055. IP_PROTOCOL_TCP,
  1056. net_short(SendTCP->tcp_src),
  1057. net_short(SendTCP->tcp_dest));
  1058. //
  1059. // Release the Route and the NTE.
  1060. //
  1061. ReleaseRCE(RCE);
  1062. ReleaseNTE(NTE);
  1063. } // end of SendRSTFromHeader()
  1064. //* GoToEstab - Transition to the established state.
  1065. //
  1066. // Called when we are going to the established state and need to finish up
  1067. // initializing things that couldn't be done until now. We assume the TCB
  1068. // lock is held by the caller on the TCB we're called with.
  1069. //
  1070. void // Returns: Nothing.
  1071. GoToEstab(
  1072. TCB *EstabTCB) // TCB to transition.
  1073. {
  1074. //
  1075. // Initialize our slow start and congestion control variables.
  1076. //
  1077. EstabTCB->tcb_cwin = 2 * EstabTCB->tcb_mss;
  1078. EstabTCB->tcb_ssthresh = 0xffffffff;
  1079. EstabTCB->tcb_state = TCB_ESTAB;
  1080. //
  1081. // We're in established. We'll subtract one from slow count for this fact,
  1082. // and if the slowcount goes to 0 we'll move onto the fast path.
  1083. //
  1084. if (--(EstabTCB->tcb_slowcount) == 0)
  1085. EstabTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  1086. InterlockedIncrement((PLONG)&TStats.ts_currestab);
  1087. EstabTCB->tcb_flags &= ~ACTIVE_OPEN; // Turn off the active opening flag.
  1088. }
  1089. //* InitSendState - Initialize the send state of a connection.
  1090. //
  1091. // Called during connection establishment to initialize our send state.
  1092. // (In this case, this refers to all information we'll put on the wire as
  1093. // well as pure send state). We pick an ISS, set up a rexmit timer value,
  1094. // etc. We assume the tcb_lock is held on the TCB when we are called.
  1095. //
  1096. void // Returns: Nothing.
  1097. InitSendState(
  1098. TCB *NewTCB) // TCB to be set up.
  1099. {
  1100. uint InitialRTT;
  1101. CHECK_STRUCT(NewTCB, tcb);
  1102. if (NewTCB->tcb_flags & ACTIVE_OPEN) {
  1103. GetRandomISN(&NewTCB->tcb_sendnext, (uchar*)&NewTCB->tcb_md5data);
  1104. }
  1105. NewTCB->tcb_senduna = NewTCB->tcb_sendnext;
  1106. NewTCB->tcb_sendmax = NewTCB->tcb_sendnext;
  1107. NewTCB->tcb_error = IP_SUCCESS;
  1108. //
  1109. // Initialize retransmit and delayed ack stuff.
  1110. //
  1111. NewTCB->tcb_rexmitcnt = 0;
  1112. NewTCB->tcb_rtt = 0;
  1113. NewTCB->tcb_smrtt = 0;
  1114. //
  1115. // Check for interface specific initial RTT.
  1116. // This can be as low as 3ms.
  1117. //
  1118. if ((NewTCB->tcb_rce != NULL) &&
  1119. ((InitialRTT = GetInitialRTTFromRCE(NewTCB->tcb_rce)) >
  1120. MIN_INITIAL_RTT)) {
  1121. NewTCB->tcb_delta = MS_TO_TICKS(InitialRTT * 2);
  1122. NewTCB->tcb_rexmit = MS_TO_TICKS(InitialRTT);
  1123. } else {
  1124. NewTCB->tcb_delta = MS_TO_TICKS(6000);
  1125. NewTCB->tcb_rexmit = MS_TO_TICKS(3000);
  1126. }
  1127. STOP_TCB_TIMER(NewTCB->tcb_rexmittimer);
  1128. STOP_TCB_TIMER(NewTCB->tcb_delacktimer);
  1129. }
  1130. //* FillTCPHeader - Fill the TCP header in.
  1131. //
  1132. // A utility routine to fill in the TCP header.
  1133. //
  1134. void // Returns: Nothing.
  1135. FillTCPHeader(
  1136. TCB *SendTCB, // TCB to fill from.
  1137. TCPHeader UNALIGNED *Header) // Header to fill into.
  1138. {
  1139. ushort S;
  1140. ulong L;
  1141. Header->tcp_src = SendTCB->tcb_sport;
  1142. Header->tcp_dest = SendTCB->tcb_dport;
  1143. L = SendTCB->tcb_sendnext;
  1144. Header->tcp_seq = net_long(L);
  1145. L = SendTCB->tcb_rcvnext;
  1146. Header->tcp_ack = net_long(L);
  1147. Header->tcp_flags = 0x1050;
  1148. *(ulong UNALIGNED *)&Header->tcp_xsum = 0;
  1149. S = (ushort)RcvWin(SendTCB);
  1150. Header->tcp_window = net_short(S);
  1151. Header->tcp_urgent = 0;
  1152. }
  1153. //* TCPSend - Send data from a TCP connection.
  1154. //
  1155. // This is the main 'send data' routine. We go into a loop, trying
  1156. // to send data until we can't for some reason. First we compute
  1157. // the useable window, use it to figure the amount we could send. If
  1158. // the amount we could send meets certain criteria we'll build a frame
  1159. // and send it, after setting any appropriate control bits. We assume
  1160. // the caller has put a reference on the TCB.
  1161. //
  1162. void // Returns: Nothing.
  1163. TCPSend(
  1164. TCB *SendTCB, // TCB to be sent from.
  1165. KIRQL PreLockIrql) // IRQL prior to acquiring TCB lock.
  1166. {
  1167. int SendWin; // Useable send window.
  1168. uint AmountToSend; // Amount to send this time.
  1169. uint AmountLeft;
  1170. IPv6Header UNALIGNED *IP;
  1171. TCPHeader UNALIGNED *TCP;
  1172. PNDIS_PACKET Packet;
  1173. PNDIS_BUFFER FirstBuffer, CurrentBuffer;
  1174. void *Memory;
  1175. TCPSendReq *CurSend;
  1176. SendCmpltContext *SCC;
  1177. SeqNum OldSeq;
  1178. NDIS_STATUS NdisStatus;
  1179. uint AmtOutstanding, AmtUnsent;
  1180. int ForceWin; // Window we're forced to use.
  1181. uint HeaderLength;
  1182. uint LinkOffset;
  1183. uint PMTU;
  1184. RouteCacheEntry *RCE;
  1185. CHECK_STRUCT(SendTCB, tcb);
  1186. ASSERT(SendTCB->tcb_refcnt != 0);
  1187. ASSERT(*(int *)&SendTCB->tcb_sendwin >= 0);
  1188. ASSERT(*(int *)&SendTCB->tcb_cwin >= SendTCB->tcb_mss);
  1189. ASSERT(!(SendTCB->tcb_flags & FIN_OUTSTANDING) ||
  1190. (SendTCB->tcb_sendnext == SendTCB->tcb_sendmax));
  1191. //
  1192. // See if we should even be here. If another instance of ourselves is
  1193. // already in this code, or is about to enter it after completing a
  1194. // receive, then just skip on out.
  1195. //
  1196. if ((SendTCB->tcb_flags & IN_TCP_SEND) ||
  1197. (SendTCB->tcb_fastchk & TCP_FLAG_IN_RCV)) {
  1198. SendTCB->tcb_flags |= SEND_AFTER_RCV;
  1199. goto bail;
  1200. }
  1201. SendTCB->tcb_flags |= IN_TCP_SEND;
  1202. //
  1203. // In most cases, we will already have a route at this point.
  1204. // However, if we failed to get one earlier in the passive receive
  1205. // path, we may need to retry here.
  1206. //
  1207. if (SendTCB->tcb_rce == NULL) {
  1208. InitRCE(SendTCB);
  1209. if (SendTCB->tcb_rce == NULL) {
  1210. SendTCB->tcb_flags &= ~IN_TCP_SEND;
  1211. goto bail;
  1212. }
  1213. }
  1214. //
  1215. // Validate that the address we're sourcing from and the route we're
  1216. // sending upon are still okay to use.
  1217. //
  1218. // We fail existing send requests for TCBs with a disconnected
  1219. // outgoing interface, except when a loopback route is used.
  1220. //
  1221. if (SendTCB->tcb_routing != RouteCacheValidationCounter) {
  1222. if (!ValidateSourceAndRoute(SendTCB) ||
  1223. IsDisconnectedAndNotLoopbackRCE(SendTCB->tcb_rce)) {
  1224. SendTCB->tcb_flags &= ~IN_TCP_SEND;
  1225. ASSERT(SendTCB->tcb_refcnt != 0);
  1226. TryToCloseTCB(SendTCB, TCB_CLOSE_ABORTED, PreLockIrql);
  1227. KeAcquireSpinLock(&SendTCB->tcb_lock, &PreLockIrql);
  1228. goto bail;
  1229. }
  1230. }
  1231. //
  1232. // Verify that our cached Path MTU is still valid.
  1233. // Watch for changes to IPsec policies since they can also effect our MSS.
  1234. // REVIEW: This the best spot to do this?
  1235. //
  1236. PMTU = GetEffectivePathMTUFromRCE(SendTCB->tcb_rce);
  1237. if (PMTU != SendTCB->tcb_pmtu ||
  1238. SecurityStateValidationCounter != SendTCB->tcb_security) {
  1239. //
  1240. // Either our Path MTU or the global security state has changed.
  1241. // Cache current values and then calculate a new MSS.
  1242. //
  1243. SendTCB->tcb_pmtu = PMTU;
  1244. SendTCB->tcb_security = SecurityStateValidationCounter;
  1245. CalculateMSSForTCB(SendTCB);
  1246. }
  1247. //
  1248. // We'll continue this loop until we send a FIN, or we break out
  1249. // internally for some other reason.
  1250. //
  1251. while (!(SendTCB->tcb_flags & FIN_OUTSTANDING)) {
  1252. CheckTCBSends(SendTCB);
  1253. AmtOutstanding = (uint)(SendTCB->tcb_sendnext - SendTCB->tcb_senduna);
  1254. AmtUnsent = SendTCB->tcb_unacked - AmtOutstanding;
  1255. ASSERT(*(int *)&AmtUnsent >= 0);
  1256. SendWin = (int)(MIN(SendTCB->tcb_sendwin, SendTCB->tcb_cwin) -
  1257. AmtOutstanding);
  1258. //
  1259. // If this send is after a fast recovery and sendwin is zero because
  1260. // of amount outstanding, then at least force 1 segment to prevent
  1261. // delayed ack timeouts from peer.
  1262. //
  1263. if (SendTCB->tcb_force) {
  1264. SendTCB->tcb_force = 0;
  1265. if (SendWin < SendTCB->tcb_mss) {
  1266. SendWin = SendTCB->tcb_mss;
  1267. }
  1268. }
  1269. //
  1270. // Since the window could have shrank, need to get it to zero at
  1271. // least.
  1272. //
  1273. ForceWin = (int)((SendTCB->tcb_flags & FORCE_OUTPUT) >>
  1274. FORCE_OUT_SHIFT);
  1275. SendWin = MAX(SendWin, ForceWin);
  1276. AmountToSend = MIN(MIN((uint)SendWin, AmtUnsent), SendTCB->tcb_mss);
  1277. ASSERT(SendTCB->tcb_mss > 0);
  1278. //
  1279. // See if we have enough to send. We'll send if we have at least a
  1280. // segment, or if we really have some data to send and we can send
  1281. // all that we have, or the send window is > 0 and we need to force
  1282. // output or send a FIN (note that if we need to force output
  1283. // SendWin will be at least 1 from the check above), or if we can
  1284. // send an amount == to at least half the maximum send window
  1285. // we've seen.
  1286. //
  1287. if (AmountToSend == SendTCB->tcb_mss ||
  1288. (AmountToSend != 0 && AmountToSend == AmtUnsent) ||
  1289. (SendWin != 0 &&
  1290. (((SendTCB->tcb_flags & FIN_NEEDED) &&
  1291. AmtUnsent <= SendTCB->tcb_mss) ||
  1292. (SendTCB->tcb_flags & FORCE_OUTPUT) ||
  1293. AmountToSend >= (SendTCB->tcb_maxwin / 2)))) {
  1294. //
  1295. // It's OK to send something. Allocate a packet header.
  1296. //
  1297. // REVIEW: It was easier to code all these allocations directly
  1298. // REVIEW: rather than use IPv6AllocatePacket.
  1299. //
  1300. // REVIEW: This grabs packets and buffers from the IPv6PacketPool
  1301. // REVIEW: and the IPv6BufferPool respectively. Should we instead
  1302. // REVIEW: have separate pools for TCP?
  1303. //
  1304. NdisAllocatePacket(&NdisStatus, &Packet, IPv6PacketPool);
  1305. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  1306. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  1307. "TCPSend: couldn't allocate packet header!?!\n"));
  1308. goto error_oor;
  1309. }
  1310. // We'll fill in the CompletionData below.
  1311. InitializeNdisPacket(Packet);
  1312. PC(Packet)->CompletionHandler = TCPSendComplete;
  1313. //
  1314. // Our header buffer has extra space at the beginning for other
  1315. // headers to be prepended to ours without requiring further
  1316. // allocation calls. It also has extra space at the end to hold
  1317. // the send completion data.
  1318. //
  1319. LinkOffset = SendTCB->tcb_rce->NCE->IF->LinkHeaderSize;
  1320. HeaderLength =
  1321. (LinkOffset + sizeof(*IP) + sizeof(*TCP) +
  1322. sizeof(SendCmpltContext) +
  1323. __builtin_alignof(SendCmpltContext) - 1) &~
  1324. (UINT_PTR)(__builtin_alignof(SendCmpltContext) - 1);
  1325. Memory = ExAllocatePool(NonPagedPool, HeaderLength);
  1326. if (Memory == NULL) {
  1327. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  1328. "TCPSend: couldn't allocate header memory!?!\n"));
  1329. NdisFreePacket(Packet);
  1330. goto error_oor;
  1331. }
  1332. //
  1333. // When allocating the NDIS buffer describing this memory region,
  1334. // we don't tell it about the extra space on the end that we
  1335. // allocated for the send completion data.
  1336. //
  1337. NdisAllocateBuffer(&NdisStatus, &FirstBuffer, IPv6BufferPool,
  1338. Memory, LinkOffset + sizeof(*IP) + sizeof(*TCP));
  1339. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  1340. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  1341. "TCPSend: couldn't allocate buffer!?!\n"));
  1342. ExFreePool(Memory);
  1343. NdisFreePacket(Packet);
  1344. goto error_oor;
  1345. }
  1346. //
  1347. // Skip over the extra space that will be filled in later by the
  1348. // link level. At this level we add the IPv6Header, the
  1349. // TCPHeader, and the data.
  1350. //
  1351. IP = (IPv6Header UNALIGNED *)((uchar *)Memory + LinkOffset);
  1352. IP->VersClassFlow = IP_VERSION;
  1353. IP->NextHeader = IP_PROTOCOL_TCP;
  1354. IP->HopLimit = TCPHopLimit(SendTCB);
  1355. IP->Source = SendTCB->tcb_saddr;
  1356. IP->Dest = SendTCB->tcb_daddr;
  1357. //
  1358. // Begin preparing the TCP header.
  1359. //
  1360. TCP = (TCPHeader UNALIGNED *)(IP + 1);
  1361. FillTCPHeader(SendTCB, TCP);
  1362. //
  1363. // Store the send completion data in the same buffer as the TCP
  1364. // header, right after the TCP header. This saves allocation
  1365. // overhead and works because we don't consider this area to be
  1366. // part of the packet data (we set this buffer's length to
  1367. // indicate that the data ends with the TCP header above).
  1368. //
  1369. // Note that this code relies on the fact that we don't include
  1370. // any TCP options (and thus don't have a variable length TCP
  1371. // header) in our data packets.
  1372. //
  1373. SCC = (SendCmpltContext *)((uchar *)Memory + HeaderLength -
  1374. sizeof(*SCC));
  1375. PC(Packet)->CompletionData = SCC;
  1376. #if DBG
  1377. SCC->scc_sig = scc_signature;
  1378. #endif
  1379. SCC->scc_ubufcount = 0;
  1380. SCC->scc_tbufcount = 0;
  1381. SCC->scc_count = 0;
  1382. AmountLeft = AmountToSend;
  1383. if (AmountToSend != 0) {
  1384. long Result;
  1385. //
  1386. // Loop through the sends on the TCB, building a frame.
  1387. //
  1388. CurrentBuffer = FirstBuffer;
  1389. CurSend = SendTCB->tcb_cursend;
  1390. CHECK_STRUCT(CurSend, tsr);
  1391. SCC->scc_firstsend = CurSend;
  1392. do {
  1393. ASSERT(CurSend->tsr_refcnt > 0);
  1394. Result = InterlockedIncrement(&(CurSend->tsr_refcnt));
  1395. ASSERT(Result > 0);
  1396. SCC->scc_count++;
  1397. //
  1398. // If the current send offset is 0 and the current
  1399. // send is less than or equal to what we have left
  1400. // to send, we haven't already put a transport
  1401. // buffer on this send, and nobody else is using
  1402. // the buffer chain directly, just use the input
  1403. // buffers. We check for other people using them
  1404. // by looking at tsr_lastbuf. If it's NULL,
  1405. // nobody else is using the buffers. If it's not
  1406. // NULL, somebody is.
  1407. //
  1408. if (SendTCB->tcb_sendofs == 0 &&
  1409. (SendTCB->tcb_sendsize <= AmountLeft) &&
  1410. (SCC->scc_tbufcount == 0) &&
  1411. CurSend->tsr_lastbuf == NULL) {
  1412. PNDIS_BUFFER LastBuf = SendTCB->tcb_sendbuf;
  1413. uint UBufLength = NdisBufferLength(LastBuf);
  1414. ushort UBufCount = 1;
  1415. while (NDIS_BUFFER_LINKAGE(LastBuf) != NULL) {
  1416. LastBuf = NDIS_BUFFER_LINKAGE(LastBuf);
  1417. UBufLength += NdisBufferLength(LastBuf);
  1418. UBufCount++;
  1419. }
  1420. if (SendTCB->tcb_sendsize == UBufLength) {
  1421. SCC->scc_ubufcount += UBufCount;
  1422. NDIS_BUFFER_LINKAGE(CurrentBuffer) =
  1423. SendTCB->tcb_sendbuf;
  1424. CurSend->tsr_lastbuf = CurrentBuffer = LastBuf;
  1425. AmountLeft -= SendTCB->tcb_sendsize;
  1426. SendTCB->tcb_sendsize = 0;
  1427. } else {
  1428. //
  1429. // Fall through with a non-zero tcb_sendsize.
  1430. //
  1431. ASSERT(SendTCB->tcb_sendsize != 0);
  1432. }
  1433. }
  1434. if (SendTCB->tcb_sendsize != 0) {
  1435. uint AmountToDup;
  1436. PNDIS_BUFFER NewBuf, Buf;
  1437. uint Offset;
  1438. NDIS_STATUS NStatus;
  1439. uchar *VirtualAddress;
  1440. uint Length;
  1441. //
  1442. // Either the current send has more data than
  1443. // we want to send, or the starting offset is
  1444. // not 0. In either case we'll need to loop
  1445. // through the current send, allocating
  1446. // buffers.
  1447. //
  1448. Buf = SendTCB->tcb_sendbuf;
  1449. Offset = SendTCB->tcb_sendofs;
  1450. do {
  1451. ASSERT(Buf != NULL);
  1452. NdisQueryBufferSafe(Buf, &VirtualAddress, &Length,
  1453. LowPagePriority);
  1454. if (VirtualAddress == NULL) {
  1455. //
  1456. // Couldn't map into kernel address space.
  1457. // If the packet is already partly built,
  1458. // send what we've got, otherwise error out.
  1459. //
  1460. goto error_oor2;
  1461. }
  1462. ASSERT((Offset < Length) ||
  1463. (Offset == 0 && Length == 0));
  1464. //
  1465. // Adjust the length for the offset into
  1466. // this buffer.
  1467. //
  1468. Length -= Offset;
  1469. AmountToDup = MIN(AmountLeft, Length);
  1470. NdisAllocateBuffer(&NStatus, &NewBuf,
  1471. IPv6BufferPool,
  1472. VirtualAddress + Offset,
  1473. AmountToDup);
  1474. if (NStatus == NDIS_STATUS_SUCCESS) {
  1475. SCC->scc_tbufcount++;
  1476. NDIS_BUFFER_LINKAGE(CurrentBuffer) = NewBuf;
  1477. CurrentBuffer = NewBuf;
  1478. if (AmountToDup >= Length) {
  1479. // Exhausted this buffer.
  1480. Buf = NDIS_BUFFER_LINKAGE(Buf);
  1481. Offset = 0;
  1482. } else {
  1483. Offset += AmountToDup;
  1484. ASSERT(Offset < NdisBufferLength(Buf));
  1485. }
  1486. SendTCB->tcb_sendsize -= AmountToDup;
  1487. AmountLeft -= AmountToDup;
  1488. } else {
  1489. //
  1490. // Couldn't allocate a buffer. If
  1491. // the packet is already partly built,
  1492. // send what we've got, otherwise
  1493. // error out.
  1494. //
  1495. error_oor2:
  1496. if (SCC->scc_tbufcount == 0 &&
  1497. SCC->scc_ubufcount == 0) {
  1498. NdisChainBufferAtFront(Packet, FirstBuffer);
  1499. TCPSendComplete(Packet, IP_GENERAL_FAILURE);
  1500. goto error_oor;
  1501. }
  1502. AmountToSend -= AmountLeft;
  1503. AmountLeft = 0;
  1504. break;
  1505. }
  1506. } while (AmountLeft && SendTCB->tcb_sendsize);
  1507. SendTCB->tcb_sendbuf = Buf;
  1508. SendTCB->tcb_sendofs = Offset;
  1509. }
  1510. if (CurSend->tsr_flags & TSR_FLAG_URG) {
  1511. ushort UP;
  1512. //
  1513. // This send is urgent data. We need to figure
  1514. // out what the urgent data pointer should be.
  1515. // We know sendnext is the starting sequence
  1516. // number of the frame, and that at the top of
  1517. // this do loop sendnext identified a byte in
  1518. // the CurSend at that time. We advanced CurSend
  1519. // at the same rate we've decremented
  1520. // AmountLeft (AmountToSend - AmountLeft ==
  1521. // AmountBuilt), so sendnext +
  1522. // (AmountToSend - AmountLeft) identifies a byte
  1523. // in the current value of CurSend, and that
  1524. // quantity plus tcb_sendsize is the sequence
  1525. // number one beyond the current send.
  1526. //
  1527. UP = (ushort)(AmountToSend - AmountLeft) +
  1528. (ushort)SendTCB->tcb_sendsize -
  1529. ((SendTCB->tcb_flags & BSD_URGENT) ? 0 : 1);
  1530. TCP->tcp_urgent = net_short(UP);
  1531. TCP->tcp_flags |= TCP_FLAG_URG;
  1532. }
  1533. //
  1534. // See if we've exhausted this send. If we have,
  1535. // set the PUSH bit in this frame and move on to
  1536. // the next send. We also need to check the
  1537. // urgent data bit.
  1538. //
  1539. if (SendTCB->tcb_sendsize == 0) {
  1540. Queue *Next;
  1541. uchar PrevFlags;
  1542. //
  1543. // We've exhausted this send. Set the PUSH bit.
  1544. //
  1545. TCP->tcp_flags |= TCP_FLAG_PUSH;
  1546. PrevFlags = CurSend->tsr_flags;
  1547. Next = QNEXT(&CurSend->tsr_req.tr_q);
  1548. if (Next != QEND(&SendTCB->tcb_sendq)) {
  1549. CurSend = CONTAINING_RECORD(
  1550. QSTRUCT(TCPReq, Next, tr_q),
  1551. TCPSendReq, tsr_req);
  1552. CHECK_STRUCT(CurSend, tsr);
  1553. SendTCB->tcb_sendsize = CurSend->tsr_unasize;
  1554. SendTCB->tcb_sendofs = CurSend->tsr_offset;
  1555. SendTCB->tcb_sendbuf = CurSend->tsr_buffer;
  1556. SendTCB->tcb_cursend = CurSend;
  1557. //
  1558. // Check the urgent flags. We can't combine new
  1559. // urgent data on to the end of old non-urgent
  1560. // data.
  1561. //
  1562. if ((PrevFlags & TSR_FLAG_URG) &&
  1563. !(CurSend->tsr_flags & TSR_FLAG_URG))
  1564. break;
  1565. } else {
  1566. ASSERT(AmountLeft == 0);
  1567. SendTCB->tcb_cursend = NULL;
  1568. SendTCB->tcb_sendbuf = NULL;
  1569. }
  1570. }
  1571. } while (AmountLeft != 0);
  1572. } else {
  1573. //
  1574. // We're in the loop, but AmountToSend is 0. This
  1575. // should happen only when we're sending a FIN. Check
  1576. // this, and return if it's not true.
  1577. //
  1578. ASSERT(AmtUnsent == 0);
  1579. if (!(SendTCB->tcb_flags & FIN_NEEDED)) {
  1580. // KdBreakPoint();
  1581. ExFreePool(NdisBufferVirtualAddress(FirstBuffer));
  1582. NdisFreeBuffer(FirstBuffer);
  1583. NdisFreePacket(Packet);
  1584. break;
  1585. }
  1586. SCC->scc_firstsend = NULL; // REVIEW: looks unneccessary.
  1587. NDIS_BUFFER_LINKAGE(FirstBuffer) = NULL;
  1588. }
  1589. // Adjust for what we're really going to send.
  1590. AmountToSend -= AmountLeft;
  1591. //
  1592. // Update the sequence numbers, and start a RTT measurement
  1593. // if needed.
  1594. //
  1595. OldSeq = SendTCB->tcb_sendnext;
  1596. SendTCB->tcb_sendnext += AmountToSend;
  1597. if (!SEQ_EQ(OldSeq, SendTCB->tcb_sendmax)) {
  1598. //
  1599. // We have at least some retransmission. Bump the stat.
  1600. //
  1601. TStats.ts_retranssegs++;
  1602. }
  1603. if (SEQ_GT(SendTCB->tcb_sendnext, SendTCB->tcb_sendmax)) {
  1604. //
  1605. // We're sending at least some new data.
  1606. // We can't advance sendmax once FIN_SENT is set.
  1607. //
  1608. ASSERT(!(SendTCB->tcb_flags & FIN_SENT));
  1609. SendTCB->tcb_sendmax = SendTCB->tcb_sendnext;
  1610. TStats.ts_outsegs++;
  1611. //
  1612. // Check the Round-Trip Timer.
  1613. //
  1614. if (SendTCB->tcb_rtt == 0) {
  1615. // No RTT running, so start one.
  1616. SendTCB->tcb_rtt = TCPTime;
  1617. SendTCB->tcb_rttseq = OldSeq;
  1618. }
  1619. }
  1620. //
  1621. // We've built the frame entirely. If we've sent everything
  1622. // we have and there's a FIN pending, OR it in.
  1623. //
  1624. if (AmtUnsent == AmountToSend) {
  1625. if (SendTCB->tcb_flags & FIN_NEEDED) {
  1626. ASSERT(!(SendTCB->tcb_flags & FIN_SENT) ||
  1627. (SendTCB->tcb_sendnext ==
  1628. (SendTCB->tcb_sendmax - 1)));
  1629. //
  1630. // See if we still have room in the window for a FIN.
  1631. //
  1632. if (SendWin > (int) AmountToSend) {
  1633. TCP->tcp_flags |= TCP_FLAG_FIN;
  1634. SendTCB->tcb_sendnext++;
  1635. SendTCB->tcb_sendmax = SendTCB->tcb_sendnext;
  1636. SendTCB->tcb_flags |= (FIN_SENT | FIN_OUTSTANDING);
  1637. SendTCB->tcb_flags &= ~FIN_NEEDED;
  1638. }
  1639. }
  1640. }
  1641. AmountToSend += sizeof(TCPHeader);
  1642. if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer))
  1643. START_TCB_TIMER(SendTCB->tcb_rexmittimer, SendTCB->tcb_rexmit);
  1644. SendTCB->tcb_flags &= ~(NEED_ACK | ACK_DELAYED | FORCE_OUTPUT);
  1645. STOP_TCB_TIMER(SendTCB->tcb_delacktimer);
  1646. STOP_TCB_TIMER(SendTCB->tcb_swstimer);
  1647. SendTCB->tcb_alive = TCPTime;
  1648. // Add the buffers to the packet.
  1649. NdisChainBufferAtFront(Packet, FirstBuffer);
  1650. //
  1651. // Compute the TCP checksum. It covers the entire TCP segment
  1652. // starting with the TCP header, plus the IPv6 pseudo-header.
  1653. //
  1654. TCP->tcp_xsum = 0;
  1655. TCP->tcp_xsum = ChecksumPacket(
  1656. Packet, LinkOffset + sizeof *IP, NULL, AmountToSend,
  1657. AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
  1658. //
  1659. // Capture and reference the RCE while we still hold the TCB lock.
  1660. // The TCB's reference on this particular RCE might go away at any
  1661. // point after we release the lock.
  1662. //
  1663. RCE = SendTCB->tcb_rce;
  1664. AddRefRCE(RCE);
  1665. //
  1666. // Everything's ready. Now send the packet.
  1667. //
  1668. // Note that IPv6Send does not return a status code.
  1669. // Instead it *always* completes the packet
  1670. // with an appropriate status code.
  1671. //
  1672. KeReleaseSpinLock(&SendTCB->tcb_lock, PreLockIrql);
  1673. if (TCP->tcp_xsum == 0) {
  1674. //
  1675. // ChecksumPacket failed, so abort the transmission.
  1676. //
  1677. IPv6SendComplete(NULL, Packet, IP_NO_RESOURCES);
  1678. } else {
  1679. IPv6Send(Packet, LinkOffset, IP,
  1680. AmountToSend, RCE, 0,
  1681. IP_PROTOCOL_TCP,
  1682. net_short(TCP->tcp_src),
  1683. net_short(TCP->tcp_dest));
  1684. }
  1685. ReleaseRCE(RCE);
  1686. KeAcquireSpinLock(&SendTCB->tcb_lock, &PreLockIrql);
  1687. continue;
  1688. } else {
  1689. //
  1690. // We've decided we can't send anything now. Figure out why, and
  1691. // see if we need to set a timer.
  1692. //
  1693. if (SendTCB->tcb_sendwin == 0) {
  1694. if (!(SendTCB->tcb_flags & FLOW_CNTLD)) {
  1695. SendTCB->tcb_flags |= FLOW_CNTLD;
  1696. SendTCB->tcb_rexmitcnt = 0;
  1697. START_TCB_TIMER(SendTCB->tcb_rexmittimer,
  1698. SendTCB->tcb_rexmit);
  1699. SendTCB->tcb_slowcount++;
  1700. SendTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  1701. } else
  1702. if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer))
  1703. START_TCB_TIMER(SendTCB->tcb_rexmittimer,
  1704. SendTCB->tcb_rexmit);
  1705. } else
  1706. if (AmountToSend != 0)
  1707. // We have something to send, but we're not sending
  1708. // it, presumably due to SWS avoidance.
  1709. if (!TCB_TIMER_RUNNING(SendTCB->tcb_swstimer))
  1710. START_TCB_TIMER(SendTCB->tcb_swstimer, SWS_TO);
  1711. break;
  1712. }
  1713. } // while (!FIN_OUTSTANDING)
  1714. //
  1715. // We're done sending, so we don't need the output flags set.
  1716. //
  1717. SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT | FORCE_OUTPUT |
  1718. SEND_AFTER_RCV);
  1719. bail:
  1720. DerefTCB(SendTCB, PreLockIrql);
  1721. return;
  1722. //
  1723. // Common case error handling code for out of resource conditions. Start the
  1724. // retransmit timer if it's not already running (so that we try this again
  1725. // later), clean up and return.
  1726. //
  1727. error_oor:
  1728. if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer))
  1729. START_TCB_TIMER(SendTCB->tcb_rexmittimer, SendTCB->tcb_rexmit);
  1730. // We had an out of resource problem, so clear the OUTPUT flags.
  1731. SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT | FORCE_OUTPUT);
  1732. DerefTCB(SendTCB, PreLockIrql);
  1733. return;
  1734. } // end of TCPSend()
  1735. //* ResetSendNextAndFastSend - Set the sendnext value of a TCB.
  1736. //
  1737. // Called to fast retransmit the dropped segment.
  1738. //
  1739. // We assume the caller has put a reference on the TCB, and the TCB is locked
  1740. // on entry. The reference is dropped and the lock released before returning.
  1741. //
  1742. void // Returns: Nothing.
  1743. ResetAndFastSend(
  1744. TCB *SeqTCB, // TCB for this connection.
  1745. SeqNum NewSeq, // Sequence number to set.
  1746. uint NewCWin) // New value for congestion window.
  1747. {
  1748. TCPSendReq *SendReq;
  1749. Queue *CurQ;
  1750. PNDIS_BUFFER Buffer;
  1751. uint Offset;
  1752. uint SendSize;
  1753. CHECK_STRUCT(SeqTCB, tcb);
  1754. ASSERT(SEQ_GTE(NewSeq, SeqTCB->tcb_senduna));
  1755. //
  1756. // The new seq must be less than send max, or NewSeq, senduna, sendnext,
  1757. // and sendmax must all be equal. (The latter case happens when we're
  1758. // called exiting TIME_WAIT, or possibly when we're retransmitting
  1759. // during a flow controlled situation).
  1760. //
  1761. ASSERT(SEQ_LT(NewSeq, SeqTCB->tcb_sendmax) ||
  1762. (SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendnext) &&
  1763. SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendmax) &&
  1764. SEQ_EQ(SeqTCB->tcb_senduna, NewSeq)));
  1765. if (SYNC_STATE(SeqTCB->tcb_state) &&
  1766. (SeqTCB->tcb_state != TCB_TIME_WAIT)) {
  1767. if (!EMPTYQ(&SeqTCB->tcb_sendq)) {
  1768. CurQ = QHEAD(&SeqTCB->tcb_sendq);
  1769. SendReq = (TCPSendReq *) CONTAINING_RECORD(CurQ, TCPReq, tr_q);
  1770. //
  1771. // SendReq points to the first send request on the send queue.
  1772. // We're pointing at the proper send req now. We need to go down.
  1773. //
  1774. // SendReq points to the cursend.
  1775. // SendSize point to sendsize in the cursend.
  1776. //
  1777. SendSize = SendReq->tsr_unasize;
  1778. Buffer = SendReq->tsr_buffer;
  1779. Offset = SendReq->tsr_offset;
  1780. // Call the fast retransmit send now.
  1781. TCPFastSend(SeqTCB, Buffer, Offset, SendReq, SendSize, NewSeq,
  1782. SeqTCB->tcb_mss);
  1783. } else {
  1784. ASSERT(SeqTCB->tcb_cursend == NULL);
  1785. }
  1786. }
  1787. SeqTCB->tcb_cwin = NewCWin;
  1788. DerefTCB(SeqTCB, DISPATCH_LEVEL);
  1789. return;
  1790. }
  1791. //* TCPFastSend - To send a segment without changing TCB state.
  1792. //
  1793. // Called to handle fast retransmit of the lost segment.
  1794. // tcb_lock will be held while entering (called by TCPRcv).
  1795. //
  1796. void // Returns: Nothing.
  1797. TCPFastSend(
  1798. TCB *SendTCB, // TCB for this connection.
  1799. PNDIS_BUFFER in_SendBuf, // NDIS buffer.
  1800. uint SendOfs, // Send offset.
  1801. TCPSendReq *CurSend, // Current send request.
  1802. uint SendSize, // Size of this send.
  1803. SeqNum SendNext, // Sequence number to use for this send.
  1804. int in_ToBeSent) // Cap on SendSize (REVIEW: Callee should cap).
  1805. {
  1806. uint AmountToSend; // Amount to send this time.
  1807. uint AmountLeft;
  1808. IPv6Header UNALIGNED *IP;
  1809. TCPHeader UNALIGNED *TCP;
  1810. PNDIS_PACKET Packet;
  1811. PNDIS_BUFFER FirstBuffer, CurrentBuffer;
  1812. void *Memory;
  1813. SendCmpltContext *SCC;
  1814. NDIS_STATUS NdisStatus;
  1815. uint AmtOutstanding, AmtUnsent;
  1816. uint HeaderLength;
  1817. uint LinkOffset;
  1818. uint PMTU;
  1819. KIRQL PreLockIrql;
  1820. PNDIS_BUFFER SendBuf = in_SendBuf;
  1821. RouteCacheEntry *RCE;
  1822. PreLockIrql = DISPATCH_LEVEL;
  1823. CHECK_STRUCT(SendTCB, tcb);
  1824. ASSERT(SendTCB->tcb_refcnt != 0);
  1825. ASSERT(*(int *)&SendTCB->tcb_sendwin >= 0);
  1826. ASSERT(*(int *)&SendTCB->tcb_cwin >= SendTCB->tcb_mss);
  1827. ASSERT(!(SendTCB->tcb_flags & FIN_OUTSTANDING) ||
  1828. (SendTCB->tcb_sendnext == SendTCB->tcb_sendmax));
  1829. //
  1830. // In most cases, we will already have a route at this point.
  1831. // However, if we failed to get one earlier in the passive receive
  1832. // path, we may need to retry here.
  1833. //
  1834. if (SendTCB->tcb_rce == NULL) {
  1835. InitRCE(SendTCB);
  1836. if (SendTCB->tcb_rce == NULL) {
  1837. DerefTCB(SendTCB, PreLockIrql);
  1838. return;
  1839. }
  1840. }
  1841. //
  1842. // Validate that the address we're sourcing from and the route we're
  1843. // sending upon are still okay to use.
  1844. //
  1845. // We fail existing send requests for TCBs with a disconnected
  1846. // outgoing interface, except when a loopback route is used.
  1847. //
  1848. if (SendTCB->tcb_routing != RouteCacheValidationCounter) {
  1849. if (!ValidateSourceAndRoute(SendTCB) ||
  1850. IsDisconnectedAndNotLoopbackRCE(SendTCB->tcb_rce)) {
  1851. ASSERT(SendTCB->tcb_refcnt != 0);
  1852. TryToCloseTCB(SendTCB, TCB_CLOSE_ABORTED, PreLockIrql);
  1853. KeAcquireSpinLock(&SendTCB->tcb_lock, &PreLockIrql);
  1854. DerefTCB(SendTCB, PreLockIrql);
  1855. return;
  1856. }
  1857. }
  1858. //
  1859. // Verify that our cached Path MTU is still valid.
  1860. // Watch for changes to IPsec policies since they can also effect our MSS.
  1861. // REVIEW: This the best spot to do this?
  1862. //
  1863. PMTU = GetEffectivePathMTUFromRCE(SendTCB->tcb_rce);
  1864. if (PMTU != SendTCB->tcb_pmtu ||
  1865. SecurityStateValidationCounter != SendTCB->tcb_security) {
  1866. //
  1867. // Either our Path MTU or the global security state has changed.
  1868. // Cache current values and then calculate a new MSS.
  1869. //
  1870. SendTCB->tcb_pmtu = PMTU;
  1871. SendTCB->tcb_security = SecurityStateValidationCounter;
  1872. CalculateMSSForTCB(SendTCB);
  1873. }
  1874. AmtOutstanding = (uint)(SendTCB->tcb_sendnext - SendTCB->tcb_senduna);
  1875. AmtUnsent = MIN(MIN(in_ToBeSent, (int)SendSize),
  1876. (int)SendTCB->tcb_sendwin);
  1877. while (AmtUnsent > 0) {
  1878. if (SEQ_GT(SendTCB->tcb_senduna, SendNext)) {
  1879. //
  1880. // Since tcb_lock is released in this loop
  1881. // it is possible that delayed ack acked
  1882. // what we are trying to retransmit.
  1883. //
  1884. goto error_oor;
  1885. }
  1886. // AmtUnsent below was minimum of sendwin and amtunsent
  1887. AmountToSend = MIN(AmtUnsent, SendTCB->tcb_mss);
  1888. ASSERT((int)AmtUnsent >= 0);
  1889. //
  1890. // We're going to send something. Allocate a packet header.
  1891. //
  1892. // REVIEW: It was easier to code all these allocations directly
  1893. // REVIEW: rather than use IPv6AllocatePacket.
  1894. //
  1895. // REVIEW: This grabs packets and buffers from the IPv6PacketPool
  1896. // REVIEW: and the IPv6BufferPool respectively. Should we instead
  1897. // REVIEW: have separate pools for TCP?
  1898. //
  1899. NdisAllocatePacket(&NdisStatus, &Packet, IPv6PacketPool);
  1900. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  1901. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  1902. "TCPSend: couldn't allocate packet header!?!\n"));
  1903. goto error_oor;
  1904. }
  1905. // We'll fill in the CompletionData below.
  1906. InitializeNdisPacket(Packet);
  1907. PC(Packet)->CompletionHandler = TCPSendComplete;
  1908. //
  1909. // Our header buffer has extra space at the beginning for other
  1910. // headers to be prepended to ours without requiring further
  1911. // allocation calls. It also has extra space at the end to hold
  1912. // the send completion data.
  1913. //
  1914. LinkOffset = SendTCB->tcb_rce->NCE->IF->LinkHeaderSize;
  1915. HeaderLength = (LinkOffset + sizeof(*IP) + sizeof(*TCP) +
  1916. sizeof(SendCmpltContext) +
  1917. __builtin_alignof(SendCmpltContext) - 1) &~
  1918. (UINT_PTR)(__builtin_alignof(SendCmpltContext) - 1);
  1919. Memory = ExAllocatePool(NonPagedPool, HeaderLength);
  1920. if (Memory == NULL) {
  1921. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  1922. "TCPSend: couldn't allocate header memory!?!\n"));
  1923. NdisFreePacket(Packet);
  1924. goto error_oor;
  1925. }
  1926. //
  1927. // When allocating the NDIS buffer describing this memory region,
  1928. // we don't tell it about the extra space on the end that we
  1929. // allocated for the send completion data.
  1930. //
  1931. NdisAllocateBuffer(&NdisStatus, &FirstBuffer, IPv6BufferPool,
  1932. Memory, LinkOffset + sizeof(*IP) + sizeof(*TCP));
  1933. if (NdisStatus != NDIS_STATUS_SUCCESS) {
  1934. KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR,
  1935. "TCPSend: couldn't allocate buffer!?!\n"));
  1936. ExFreePool(Memory);
  1937. NdisFreePacket(Packet);
  1938. goto error_oor;
  1939. }
  1940. //
  1941. // Skip over the extra space that will be filled in later by the
  1942. // link level. At this level we add the IPv6Header, the
  1943. // TCPHeader, and the data.
  1944. //
  1945. IP = (IPv6Header UNALIGNED *)((uchar *)Memory + LinkOffset);
  1946. IP->VersClassFlow = IP_VERSION;
  1947. IP->NextHeader = IP_PROTOCOL_TCP;
  1948. IP->HopLimit = TCPHopLimit(SendTCB);
  1949. IP->Source = SendTCB->tcb_saddr;
  1950. IP->Dest = SendTCB->tcb_daddr;
  1951. //
  1952. // Begin preparing the TCP header.
  1953. //
  1954. TCP = (TCPHeader UNALIGNED *)(IP + 1);
  1955. FillTCPHeader(SendTCB, TCP);
  1956. TCP->tcp_seq = net_long(SendNext);
  1957. //
  1958. // Store the send completion data in the same buffer as the TCP
  1959. // header, right after the TCP header. This saves allocation
  1960. // overhead and works because we don't consider this area to be
  1961. // part of the packet data (we set this buffer's length to
  1962. // indicate that the data ends with the TCP header above).
  1963. //
  1964. // Note that this code relies on the fact that we don't include
  1965. // any TCP options (and thus don't have a variable length TCP
  1966. // header) in our data packets.
  1967. //
  1968. SCC = (SendCmpltContext *)((uchar *)Memory + HeaderLength -
  1969. sizeof(*SCC));
  1970. PC(Packet)->CompletionData = SCC;
  1971. #if DBG
  1972. SCC->scc_sig = scc_signature;
  1973. #endif
  1974. SCC->scc_ubufcount = 0;
  1975. SCC->scc_tbufcount = 0;
  1976. SCC->scc_count = 0;
  1977. AmountLeft = AmountToSend;
  1978. if (AmountToSend != 0) {
  1979. long Result;
  1980. //
  1981. // Loop through the sends on the TCB, building a frame.
  1982. //
  1983. CurrentBuffer = FirstBuffer;
  1984. CHECK_STRUCT(CurSend, tsr);
  1985. SCC->scc_firstsend = CurSend;
  1986. do {
  1987. ASSERT(CurSend->tsr_refcnt > 0);
  1988. Result = InterlockedIncrement(&(CurSend->tsr_refcnt));
  1989. ASSERT(Result > 0);
  1990. SCC->scc_count++;
  1991. //
  1992. // If the current send offset is 0 and the current
  1993. // send is less than or equal to what we have left
  1994. // to send, we haven't already put a transport
  1995. // buffer on this send, and nobody else is using
  1996. // the buffer chain directly, just use the input
  1997. // buffers. We check for other people using them
  1998. // by looking at tsr_lastbuf. If it's NULL,
  1999. // nobody else is using the buffers. If it's not
  2000. // NULL, somebody is.
  2001. //
  2002. if (SendOfs == 0 &&
  2003. (SendSize <= AmountLeft) &&
  2004. (SCC->scc_tbufcount == 0) &&
  2005. CurSend->tsr_lastbuf == NULL) {
  2006. PNDIS_BUFFER LastBuf = SendBuf;
  2007. uint UBufLength = NdisBufferLength(LastBuf);
  2008. ushort UBufCount = 1;
  2009. while (NDIS_BUFFER_LINKAGE(LastBuf) != NULL) {
  2010. LastBuf = NDIS_BUFFER_LINKAGE(LastBuf);
  2011. UBufLength += NdisBufferLength(LastBuf);
  2012. UBufCount++;
  2013. }
  2014. if (SendSize == UBufLength) {
  2015. SCC->scc_ubufcount += UBufCount;
  2016. NDIS_BUFFER_LINKAGE(CurrentBuffer) = SendBuf;
  2017. CurSend->tsr_lastbuf = CurrentBuffer = LastBuf;
  2018. AmountLeft -= SendSize;
  2019. SendSize = 0;
  2020. } else {
  2021. //
  2022. // Fall through with a non-zero SendSize.
  2023. //
  2024. ASSERT(SendSize != 0);
  2025. }
  2026. }
  2027. if (SendSize != 0) {
  2028. uint AmountToDup;
  2029. PNDIS_BUFFER NewBuf, Buf;
  2030. uint Offset;
  2031. NDIS_STATUS NStatus;
  2032. uchar *VirtualAddress;
  2033. uint Length;
  2034. //
  2035. // Either the current send has more data than
  2036. // we want to send, or the starting offset is
  2037. // not 0. In either case we'll need to loop
  2038. // through the current send, allocating buffers.
  2039. //
  2040. Buf = SendBuf;
  2041. Offset = SendOfs;
  2042. do {
  2043. ASSERT(Buf != NULL);
  2044. NdisQueryBufferSafe(Buf, &VirtualAddress, &Length,
  2045. LowPagePriority);
  2046. if (VirtualAddress == NULL) {
  2047. goto error_oor2;
  2048. }
  2049. ASSERT((Offset < Length) ||
  2050. (Offset == 0 && Length == 0));
  2051. //
  2052. // Adjust the length for the offset into
  2053. // this buffer.
  2054. //
  2055. Length -= Offset;
  2056. AmountToDup = MIN(AmountLeft, Length);
  2057. NdisAllocateBuffer(&NStatus, &NewBuf,
  2058. IPv6BufferPool,
  2059. VirtualAddress + Offset,
  2060. AmountToDup);
  2061. if (NStatus == NDIS_STATUS_SUCCESS) {
  2062. SCC->scc_tbufcount++;
  2063. NDIS_BUFFER_LINKAGE(CurrentBuffer) = NewBuf;
  2064. CurrentBuffer = NewBuf;
  2065. if (AmountToDup >= Length) {
  2066. // Exhausted this buffer.
  2067. Buf = NDIS_BUFFER_LINKAGE(Buf);
  2068. Offset = 0;
  2069. } else {
  2070. Offset += AmountToDup;
  2071. ASSERT(Offset < NdisBufferLength(Buf));
  2072. }
  2073. SendSize -= AmountToDup;
  2074. AmountLeft -= AmountToDup;
  2075. } else {
  2076. //
  2077. // Couldn't allocate a buffer. If
  2078. // the packet is already partly built,
  2079. // send what we've got, otherwise
  2080. // error out.
  2081. //
  2082. error_oor2:
  2083. if (SCC->scc_tbufcount == 0 &&
  2084. SCC->scc_ubufcount == 0) {
  2085. KeReleaseSpinLockFromDpcLevel(
  2086. &SendTCB->tcb_lock);
  2087. NdisChainBufferAtFront(Packet, FirstBuffer);
  2088. TCPSendComplete(Packet, IP_GENERAL_FAILURE);
  2089. KeAcquireSpinLockAtDpcLevel(&SendTCB->tcb_lock);
  2090. goto error_oor;
  2091. }
  2092. AmountToSend -= AmountLeft;
  2093. AmountLeft = 0;
  2094. break;
  2095. }
  2096. } while (AmountLeft && SendSize);
  2097. SendBuf = Buf;
  2098. SendOfs = Offset;
  2099. }
  2100. if (CurSend->tsr_flags & TSR_FLAG_URG) {
  2101. ushort UP;
  2102. //
  2103. // This send is urgent data. We need to figure
  2104. // out what the urgent data pointer should be.
  2105. // We know sendnext is the starting sequence
  2106. // number of the frame, and that at the top of
  2107. // this do loop sendnext identified a byte in
  2108. // the CurSend at that time. We advanced CurSend
  2109. // at the same rate we've decremented
  2110. // AmountLeft (AmountToSend - AmountLeft ==
  2111. // AmountBuilt), so sendnext +
  2112. // (AmountToSend - AmountLeft) identifies a byte
  2113. // in the current value of CurSend, and that
  2114. // quantity plus tcb_sendsize is the sequence
  2115. // number one beyond the current send.
  2116. //
  2117. UP = (ushort) (AmountToSend - AmountLeft) +
  2118. (ushort) SendSize -
  2119. ((SendTCB->tcb_flags & BSD_URGENT) ? 0 : 1);
  2120. TCP->tcp_urgent = net_short(UP);
  2121. TCP->tcp_flags |= TCP_FLAG_URG;
  2122. }
  2123. //
  2124. // See if we've exhausted this send. If we have,
  2125. // set the PUSH bit in this frame and move on to
  2126. // the next send. We also need to check the
  2127. // urgent data bit.
  2128. //
  2129. if (SendSize == 0) {
  2130. Queue *Next;
  2131. ulong PrevFlags;
  2132. //
  2133. // We've exhausted this send. Set the PUSH bit.
  2134. //
  2135. TCP->tcp_flags |= TCP_FLAG_PUSH;
  2136. PrevFlags = CurSend->tsr_flags;
  2137. Next = QNEXT(&CurSend->tsr_req.tr_q);
  2138. if (Next != QEND(&SendTCB->tcb_sendq)) {
  2139. CurSend = CONTAINING_RECORD(
  2140. QSTRUCT(TCPReq, Next, tr_q),
  2141. TCPSendReq, tsr_req);
  2142. CHECK_STRUCT(CurSend, tsr);
  2143. SendSize = CurSend->tsr_unasize;
  2144. SendOfs = CurSend->tsr_offset;
  2145. SendBuf = CurSend->tsr_buffer;
  2146. //
  2147. // Check the urgent flags. We can't combine new
  2148. // urgent data on to the end of old non-urgent
  2149. // data.
  2150. //
  2151. if ((PrevFlags & TSR_FLAG_URG) &&
  2152. !(CurSend->tsr_flags & TSR_FLAG_URG)) {
  2153. break;
  2154. }
  2155. } else {
  2156. ASSERT(AmountLeft == 0);
  2157. CurSend = NULL;
  2158. SendBuf = NULL;
  2159. }
  2160. }
  2161. } while (AmountLeft != 0);
  2162. } else {
  2163. //
  2164. // Amt to send is 0.
  2165. // Just bail out and start timer.
  2166. //
  2167. if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer)) {
  2168. START_TCB_TIMER(SendTCB->tcb_rexmittimer,
  2169. SendTCB->tcb_rexmit);
  2170. }
  2171. ExFreePool(NdisBufferVirtualAddress(FirstBuffer));
  2172. NdisFreeBuffer(FirstBuffer);
  2173. NdisFreePacket(Packet);
  2174. return;
  2175. }
  2176. //
  2177. // Adjust for what we're really going to send.
  2178. //
  2179. AmountToSend -= AmountLeft;
  2180. SendNext += AmountToSend;
  2181. AmtUnsent -= AmountToSend;
  2182. TStats.ts_retranssegs++;
  2183. AmountToSend += sizeof(TCPHeader);
  2184. if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer)) {
  2185. START_TCB_TIMER(SendTCB->tcb_rexmittimer, SendTCB->tcb_rexmit);
  2186. }
  2187. SendTCB->tcb_flags &= ~(NEED_ACK | ACK_DELAYED | FORCE_OUTPUT);
  2188. STOP_TCB_TIMER(SendTCB->tcb_delacktimer);
  2189. STOP_TCB_TIMER(SendTCB->tcb_swstimer);
  2190. //
  2191. // Add the buffers to the packet.
  2192. //
  2193. NdisChainBufferAtFront(Packet, FirstBuffer);
  2194. //
  2195. // Compute the TCP checksum. It covers the entire TCP segment
  2196. // starting with the TCP header, plus the IPv6 pseudo-header.
  2197. //
  2198. TCP->tcp_xsum = 0;
  2199. TCP->tcp_xsum = ChecksumPacket(
  2200. Packet, LinkOffset + sizeof *IP, NULL, AmountToSend,
  2201. AlignAddr(&IP->Source), AlignAddr(&IP->Dest), IP_PROTOCOL_TCP);
  2202. //
  2203. // Capture and reference the RCE while we still hold the TCB lock.
  2204. // The TCB's reference on this particular RCE might go away at any
  2205. // point after we release the lock.
  2206. //
  2207. RCE = SendTCB->tcb_rce;
  2208. AddRefRCE(RCE);
  2209. //
  2210. // Everything's ready. Now send the packet.
  2211. //
  2212. // Note that IPv6Send does not return a status code.
  2213. // Instead it *always* completes the packet
  2214. // with an appropriate status code.
  2215. //
  2216. KeReleaseSpinLock(&SendTCB->tcb_lock, PreLockIrql);
  2217. if (TCP->tcp_xsum == 0) {
  2218. //
  2219. // ChecksumPacket failed, so abort the transmission.
  2220. //
  2221. IPv6SendComplete(NULL, Packet, IP_NO_RESOURCES);
  2222. } else {
  2223. IPv6Send(Packet, LinkOffset, IP,
  2224. AmountToSend, RCE, 0,
  2225. IP_PROTOCOL_TCP,
  2226. net_short(TCP->tcp_src),
  2227. net_short(TCP->tcp_dest));
  2228. }
  2229. //
  2230. // Release reference and reacquire lock we dropped before sending.
  2231. //
  2232. ReleaseRCE(RCE);
  2233. KeAcquireSpinLock(&SendTCB->tcb_lock, &PreLockIrql);
  2234. }
  2235. return;
  2236. //
  2237. // Common case error handling code for out of resource conditions.
  2238. // Start the retransmit timer if it's not already running
  2239. // (so that we try this again later), clean up and return.
  2240. //
  2241. error_oor:
  2242. if (!TCB_TIMER_RUNNING(SendTCB->tcb_rexmittimer)) {
  2243. START_TCB_TIMER(SendTCB->tcb_rexmittimer, SendTCB->tcb_rexmit);
  2244. }
  2245. return;
  2246. }
  2247. //* TDISend - Send data on a connection.
  2248. //
  2249. // The main TDI send entry point. We take the input parameters, validate
  2250. // them, allocate a send request, etc. We then put the send request on the
  2251. // queue. If we have no other sends on the queue or Nagling is disabled we'll
  2252. // call TCPSend to send the data.
  2253. //
  2254. TDI_STATUS // Returns: Status of attempt to send.
  2255. TdiSend(
  2256. PTDI_REQUEST Request, // TDI request for the call.
  2257. ushort Flags, // Flags for this send.
  2258. uint SendLength, // Length in bytes of send.
  2259. PNDIS_BUFFER SendBuffer) // Buffer chain to be sent.
  2260. {
  2261. TCPConn *Conn;
  2262. TCB *SendTCB;
  2263. TCPSendReq *SendReq;
  2264. KIRQL OldIrql;
  2265. TDI_STATUS Error;
  2266. uint EmptyQ;
  2267. #if DBG
  2268. uint RealSendSize;
  2269. PNDIS_BUFFER Temp;
  2270. //
  2271. // Loop through the buffer chain, and make sure that the length matches
  2272. // up with SendLength.
  2273. //
  2274. Temp = SendBuffer;
  2275. RealSendSize = 0;
  2276. do {
  2277. ASSERT(Temp != NULL);
  2278. RealSendSize += NdisBufferLength(Temp);
  2279. Temp = NDIS_BUFFER_LINKAGE(Temp);
  2280. } while (Temp != NULL);
  2281. ASSERT(RealSendSize == SendLength);
  2282. #endif
  2283. //
  2284. // Grab lock on Connection Table. Then get our connection info from
  2285. // the TDI request, and our TCP control block from that.
  2286. //
  2287. Conn = GetConnFromConnID(PtrToUlong(Request->Handle.ConnectionContext),
  2288. &OldIrql);
  2289. if (Conn == NULL) {
  2290. Error = TDI_INVALID_CONNECTION;
  2291. goto abort;
  2292. }
  2293. CHECK_STRUCT(Conn, tc);
  2294. SendTCB = Conn->tc_tcb;
  2295. if (SendTCB == NULL) {
  2296. Error = TDI_INVALID_STATE;
  2297. KeReleaseSpinLock(&Conn->tc_ConnBlock->cb_lock, OldIrql);
  2298. abort:
  2299. return Error;
  2300. }
  2301. CHECK_STRUCT(SendTCB, tcb);
  2302. //
  2303. // Switch to a finer-grained lock:
  2304. // Drop lock on the Connection Table in favor of one on our TCB.
  2305. //
  2306. KeAcquireSpinLockAtDpcLevel(&SendTCB->tcb_lock);
  2307. KeReleaseSpinLockFromDpcLevel(&Conn->tc_ConnBlock->cb_lock);
  2308. //
  2309. // Make sure our TCB is in a send-able state.
  2310. //
  2311. if (!DATA_SEND_STATE(SendTCB->tcb_state) || CLOSING(SendTCB)) {
  2312. Error = TDI_INVALID_STATE;
  2313. goto abort2;
  2314. }
  2315. CheckTCBSends(SendTCB); // Just a debug check.
  2316. //
  2317. // If we've released our RCE for some reason, reacquire one.
  2318. //
  2319. if (SendTCB->tcb_rce == NULL) {
  2320. InitRCE(SendTCB);
  2321. if (SendTCB->tcb_rce == NULL) {
  2322. Error = TDI_DEST_NET_UNREACH;
  2323. goto abort2;
  2324. }
  2325. }
  2326. //
  2327. // Verify that the cached RCE is still valid.
  2328. //
  2329. SendTCB->tcb_rce = ValidateRCE(SendTCB->tcb_rce, SendTCB->tcb_nte);
  2330. ASSERT(SendTCB->tcb_rce != NULL);
  2331. if (IsDisconnectedAndNotLoopbackRCE(SendTCB->tcb_rce)) {
  2332. //
  2333. // Fail new send requests for TCBs with a disconnected
  2334. // outgoing interface, except when the loopback route is used.
  2335. //
  2336. Error = TDI_DEST_NET_UNREACH;
  2337. goto abort2;
  2338. }
  2339. if (SendLength == 0) {
  2340. //
  2341. // Wow, nothing to do!
  2342. //
  2343. // REVIEW: Can't we do this check earlier (like before we even grab the
  2344. // REVIEW: Connection Table lock? The only reason I can think not to
  2345. // REVIEW: would be if something cared about the return code if a bad
  2346. // REVIEW: Tdi Request was given to us.
  2347. //
  2348. Error = TDI_SUCCESS;
  2349. goto abort2;
  2350. }
  2351. //
  2352. // We have a TCB, and it's valid. Allocate a send request now.
  2353. //
  2354. SendReq = GetSendReq();
  2355. if (SendReq == NULL) {
  2356. Error = TDI_NO_RESOURCES;
  2357. abort2:
  2358. KeReleaseSpinLock(&SendTCB->tcb_lock, OldIrql);
  2359. return Error;
  2360. }
  2361. //
  2362. // Prepare a TCP send request based on the TDI request and the
  2363. // passed in buffer chain.
  2364. //
  2365. SendReq->tsr_req.tr_rtn = Request->RequestNotifyObject;
  2366. SendReq->tsr_req.tr_context = Request->RequestContext;
  2367. SendReq->tsr_buffer = SendBuffer;
  2368. SendReq->tsr_size = SendLength;
  2369. SendReq->tsr_unasize = SendLength;
  2370. SendReq->tsr_refcnt = 1; // ACK will decrement this ref
  2371. SendReq->tsr_offset = 0;
  2372. SendReq->tsr_lastbuf = NULL;
  2373. SendReq->tsr_time = TCPTime;
  2374. SendReq->tsr_flags = (Flags & TDI_SEND_EXPEDITED) ? TSR_FLAG_URG : 0;
  2375. //
  2376. // Check current status of our send queue.
  2377. //
  2378. EmptyQ = EMPTYQ(&SendTCB->tcb_sendq);
  2379. //
  2380. // Add this send request to our send queue.
  2381. //
  2382. SendTCB->tcb_unacked += SendLength;
  2383. ENQUEUE(&SendTCB->tcb_sendq, &SendReq->tsr_req.tr_q);
  2384. if (SendTCB->tcb_cursend == NULL) {
  2385. //
  2386. // No existing current send request, so make this new one
  2387. // the current send.
  2388. //
  2389. // REVIEW: Is this always equivalent to EMPTYQ test above?
  2390. // REVIEW: If so, why not just set EmptyQ flag here and save a test?
  2391. //
  2392. SendTCB->tcb_cursend = SendReq;
  2393. SendTCB->tcb_sendbuf = SendBuffer;
  2394. SendTCB->tcb_sendofs = 0;
  2395. SendTCB->tcb_sendsize = SendLength;
  2396. }
  2397. //
  2398. // See if we should try to send now. We attempt to do so if we weren't
  2399. // already blocked, or if we were and either the Nagle Algorithm is turned
  2400. // off or we now have at least one max segment worth of data to send.
  2401. //
  2402. if (EmptyQ || (!(SendTCB->tcb_flags & NAGLING) ||
  2403. (SendTCB->tcb_unacked -
  2404. (SendTCB->tcb_sendmax - SendTCB->tcb_senduna))
  2405. >= SendTCB->tcb_mss)) {
  2406. SendTCB->tcb_refcnt++;
  2407. TCPSend(SendTCB, OldIrql);
  2408. } else
  2409. KeReleaseSpinLock(&SendTCB->tcb_lock, OldIrql);
  2410. //
  2411. // When TCPSend returns, we may or may not have already sent the data
  2412. // associated with this particular request.
  2413. //
  2414. return TDI_PENDING;
  2415. }
  2416. #pragma BEGIN_INIT
  2417. //* InitTCPSend - Initialize our send side.
  2418. //
  2419. // Called during init time to initialize our TCP send state.
  2420. //
  2421. int // Returns: TRUE if we inited, false if we didn't.
  2422. InitTCPSend(
  2423. void) // Nothing.
  2424. {
  2425. ExInitializeSListHead(&TCPSendReqFree);
  2426. KeInitializeSpinLock(&TCPSendReqFreeLock);
  2427. IPv6RegisterULProtocol(IP_PROTOCOL_TCP, TCPReceive, TCPControlReceive);
  2428. return TRUE;
  2429. }
  2430. #pragma END_INIT
  2431. //* UnloadTCPSend
  2432. //
  2433. // Cleanup and prepare for stack unload.
  2434. //
  2435. void
  2436. UnloadTCPSend(void)
  2437. {
  2438. PSLIST_ENTRY BufferLink;
  2439. while ((BufferLink = ExInterlockedPopEntrySList(&TCPSendReqFree,
  2440. &TCPSendReqFreeLock))
  2441. != NULL) {
  2442. Queue *QueuePtr = CONTAINING_RECORD(BufferLink, Queue, q_next);
  2443. TCPReq *Req = CONTAINING_RECORD(QueuePtr, TCPReq, tr_q);
  2444. TCPSendReq *SendReq = CONTAINING_RECORD(Req, TCPSendReq, tsr_req);
  2445. CHECK_STRUCT(SendReq, tsr);
  2446. ExFreePool(SendReq);
  2447. }
  2448. IPv6RegisterULProtocol(IP_PROTOCOL_TCP, NULL, NULL);
  2449. }