Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3914 lines
137 KiB

  1. /********************************************************************/
  2. /** Microsoft LAN Manager **/
  3. /** Copyright(c) Microsoft Corp., 1990-2000 **/
  4. /********************************************************************/
  5. /* :ts=4 */
  6. //** TCPSEND.C - TCP send protocol code.
  7. //
  8. // This file contains the code for sending Data and Control segments.
  9. //
  10. #include "precomp.h"
  11. #include "addr.h"
  12. #include "tcp.h"
  13. #include "tcb.h"
  14. #include "tcpconn.h"
  15. #include "tcpsend.h"
  16. #include "tcprcv.h"
  17. #include "tlcommon.h"
  18. #include "info.h"
  19. #include "tcpcfg.h"
  20. #include "secfltr.h"
  21. #include "tcpipbuf.h"
  22. #include "mdlpool.h"
  23. #include "pplasl.h"
  24. #if GPC
  25. #include "qos.h"
  26. #include "traffic.h"
  27. #include "gpcifc.h"
  28. #include "ntddtc.h"
  29. extern GPC_HANDLE hGpcClient[GPC_CF_MAX];
  30. extern ULONG GpcCfCounts[GPC_CF_MAX];
  31. extern GPC_EXPORTED_CALLS GpcEntries;
  32. extern ULONG GPCcfInfo;
  33. #endif
  34. NTSTATUS
  35. GetIFAndLink(void *Rce, ULONG * IFIndex, IPAddr * NextHop);
  36. extern ulong DisableUserTOSSetting;
  37. uint MaxSendSegments = 64;
  38. #if MILLEN
  39. uint DisableLargeSendOffload = 1;
  40. #else // MILLEN
  41. uint DisableLargeSendOffload = 0;
  42. #endif // !MILLEN
  43. #if DBG
  44. ulong DbgDcProb = 0;
  45. ulong DbgTcpSendHwChksumCount = 0;
  46. #endif
  47. extern HANDLE TcpRequestPool;
  48. extern CTELock *pTWTCBTableLock;
  49. extern CACHE_LINE_KSPIN_LOCK RequestCompleteListLock;
  50. extern uint TcpHostOpts;
  51. extern uint TcpHostSendOpts;
  52. #define ALIGNED_SACK_OPT_SIZE 4+8*4 //Maximum 4 sack blocks of 2longword each+sack opt itself
  53. void
  54. ClassifyPacket(TCB *SendTCB);
  55. void
  56. TCPFastSend(TCB * SendTCB,
  57. PNDIS_BUFFER in_SendBuf,
  58. uint in_SendOfs,
  59. TCPSendReq * in_SendReq,
  60. uint in_SendSize,
  61. SeqNum NextSeq,
  62. int in_ToBeSent);
  63. void *TCPProtInfo; // TCP protocol info for IP.
  64. NDIS_HANDLE TCPSendBufferPool;
  65. USHORT TcpHeaderBufferSize;
  66. HANDLE TcpHeaderPool;
  67. extern IPInfo LocalNetInfo;
  68. //
  69. // All of the init code can be discarded.
  70. //
  71. int InitTCPSend(void);
  72. void UnInitTCPSend(void);
  73. #ifdef ALLOC_PRAGMA
  74. #pragma alloc_text(INIT, InitTCPSend)
  75. #pragma alloc_text(INIT, UnInitTCPSend)
  76. #endif
  77. extern void ResetSendNext(TCB * SeqTCB, SeqNum NewSeq);
  78. extern NTSTATUS
  79. TCPPnPPowerRequest(void *ipContext, IPAddr ipAddr, NDIS_HANDLE handle,
  80. PNET_PNP_EVENT netPnPEvent);
  81. extern void TCPElistChangeHandler(void);
  82. //* GetTCPHeader - Get a TCP header buffer.
  83. //
  84. // Called when we need to get a TCP header buffer. This routine is
  85. // specific to the particular environment (VxD or NT). All we
  86. // need to do is pop the buffer from the free list.
  87. //
  88. // Input: Nothing.
  89. //
  90. // Returns: Pointer to an NDIS buffer, or NULL is none.
  91. //
  92. PNDIS_BUFFER
  93. GetTCPHeaderAtDpcLevel(TCPHeader **Header)
  94. {
  95. PNDIS_BUFFER Buffer;
  96. #if DBG
  97. *Header = NULL;
  98. #endif
  99. Buffer = MdpAllocateAtDpcLevel(TcpHeaderPool, Header);
  100. if (Buffer) {
  101. ASSERT(*Header);
  102. NdisAdjustBufferLength(Buffer, sizeof(TCPHeader));
  103. #if BACK_FILL
  104. ASSERT(Buffer->ByteOffset >= 40);
  105. *Header = (TCPHeader*)((ULONG_PTR)(*Header) + MAX_BACKFILL_HDR_SIZE);
  106. Buffer->MappedSystemVa = (PVOID)((ULONG_PTR)Buffer->MappedSystemVa
  107. + MAX_BACKFILL_HDR_SIZE);
  108. Buffer->ByteOffset += MAX_BACKFILL_HDR_SIZE;
  109. Buffer->MdlFlags |= MDL_NETWORK_HEADER;
  110. #endif
  111. }
  112. return Buffer;
  113. }
  114. #if MILLEN
  115. #define GetTCPHeader GetTCPHeaderAtDpcLevel
  116. #else
  117. __inline
  118. PNDIS_BUFFER
  119. GetTCPHeader(TCPHeader **Header)
  120. {
  121. KIRQL OldIrql;
  122. PNDIS_BUFFER Buffer;
  123. OldIrql = KeRaiseIrqlToDpcLevel();
  124. Buffer = GetTCPHeaderAtDpcLevel(Header);
  125. KeLowerIrql(OldIrql);
  126. return Buffer;
  127. }
  128. #endif
  129. //* FreeTCPHeader - Free a TCP header buffer.
  130. //
  131. // Called to free a TCP header buffer.
  132. //
  133. // Input: Buffer to be freed.
  134. //
  135. // Returns: Nothing.
  136. //
  137. __inline
  138. VOID
  139. FreeTCPHeader(PNDIS_BUFFER Buffer)
  140. {
  141. NdisAdjustBufferLength(Buffer, TcpHeaderBufferSize);
  142. #if BACK_FILL
  143. Buffer->MappedSystemVa = (PVOID)((ULONG_PTR)Buffer->MappedSystemVa
  144. - MAX_BACKFILL_HDR_SIZE);
  145. Buffer->ByteOffset -= MAX_BACKFILL_HDR_SIZE;
  146. #endif
  147. MdpFree(Buffer);
  148. }
  149. //* FreeSendReq - Free a send request structure.
  150. //
  151. // Called to free a send request structure.
  152. //
  153. // Input: FreedReq - Connection request structure to be freed.
  154. //
  155. // Returns: Nothing.
  156. //
  157. __inline
  158. void
  159. FreeSendReq(TCPSendReq *Request)
  160. {
  161. PplFree(TcpRequestPool, Request);
  162. }
  163. //* GetSendReq - Get a send request structure.
  164. //
  165. // Called to get a send request structure.
  166. //
  167. // Input: Nothing.
  168. //
  169. // Returns: Pointer to SendReq structure, or NULL if none.
  170. //
  171. __inline
  172. TCPSendReq *
  173. GetSendReq(VOID)
  174. {
  175. TCPSendReq *Request;
  176. LOGICAL FromList;
  177. Request = PplAllocate(TcpRequestPool, &FromList);
  178. if (Request) {
  179. #if DBG
  180. Request->tsr_req.tr_sig = tr_signature;
  181. Request->tsr_sig = tsr_signature;
  182. #endif
  183. }
  184. return Request;
  185. }
  186. //* TCPSendComplete - Complete a TCP send.
  187. //
  188. // Called by IP when a send we've made is complete. We free the buffer,
  189. // and possibly complete some sends. Each send queued on a TCB has a ref.
  190. // count with it, which is the number of times a pointer to a buffer
  191. // associated with the send has been passed to the underlying IP layer. We
  192. // can't complete a send until that count it 0. If this send was actually
  193. // from a send of data, we'll go down the chain of send and decrement the
  194. // refcount on each one. If we have one going to 0 and the send has already
  195. // been acked we'll complete the send. If it hasn't been acked we'll leave
  196. // it until the ack comes in.
  197. //
  198. // NOTE: We aren't protecting any of this with locks. When we port this to
  199. // NT we'll need to fix this, probably with a global lock. See the comments
  200. // in ACKSend() in TCPRCV.C for more details.
  201. //
  202. // Input: Context - Context we gave to IP.
  203. // BufferChain - BufferChain for send.
  204. //
  205. // Returns: Nothing.
  206. //
  207. void
  208. TCPSendComplete(void *Context, PNDIS_BUFFER BufferChain, IP_STATUS SendStatus)
  209. {
  210. BOOLEAN DoRcvComplete = FALSE;
  211. PNDIS_BUFFER CurrentBuffer;
  212. if (Context != NULL) {
  213. SendCmpltContext *SCContext = (SendCmpltContext *) Context;
  214. TCPSendReq *CurrentSend;
  215. uint i;
  216. CTEStructAssert(SCContext, scc);
  217. if (SCContext->scc_LargeSend) {
  218. TCB *LargeSendTCB = SCContext->scc_LargeSend;
  219. CTELockHandle TCBHandle;
  220. CTEGetLock(&LargeSendTCB->tcb_lock, &TCBHandle);
  221. IF_TCPDBG(TCP_DEBUG_OFFLOAD) {
  222. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSendComplete: tcb %x sent %d of %d una %u "
  223. "next %u unacked %u\n", LargeSendTCB,
  224. SCContext->scc_ByteSent, SCContext->scc_SendSize,
  225. LargeSendTCB->tcb_senduna, LargeSendTCB->tcb_sendnext,
  226. LargeSendTCB->tcb_unacked));
  227. }
  228. if (SCContext->scc_ByteSent < SCContext->scc_SendSize) {
  229. uint BytesNotSent = SCContext->scc_SendSize -
  230. SCContext->scc_ByteSent;
  231. SeqNum Next = LargeSendTCB->tcb_sendnext;
  232. IF_TCPDBG(TCP_DEBUG_OFFLOAD) {
  233. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSendComplete: unsent %d\n",
  234. SCContext->scc_SendSize-SCContext->scc_ByteSent));
  235. }
  236. if (SEQ_GTE((Next - BytesNotSent), LargeSendTCB->tcb_senduna) &&
  237. SEQ_LT((Next - BytesNotSent), LargeSendTCB->tcb_sendnext)) {
  238. ResetSendNext(LargeSendTCB, (Next - BytesNotSent));
  239. }
  240. }
  241. #if DBG
  242. LargeSendTCB->tcb_LargeSend--;
  243. #endif
  244. if (LargeSendTCB->tcb_unacked)
  245. DelayAction(LargeSendTCB, NEED_OUTPUT);
  246. DerefTCB(LargeSendTCB, TCBHandle);
  247. }
  248. // First, loop through and free any NDIS buffers here that need to be.
  249. // freed. We'll skip any 'user' buffers, and then free our buffers. We
  250. // need to do this before decrementing the reference count to avoid
  251. // destroying the buffer chain if we have to zap tsr_lastbuf->Next to
  252. // NULL.
  253. CurrentBuffer = NDIS_BUFFER_LINKAGE(BufferChain);
  254. for (i = 0; i < (uint) SCContext->scc_ubufcount; i++) {
  255. ASSERT(CurrentBuffer != NULL);
  256. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  257. }
  258. for (i = 0; i < (uint) SCContext->scc_tbufcount; i++) {
  259. PNDIS_BUFFER TempBuffer;
  260. ASSERT(CurrentBuffer != NULL);
  261. TempBuffer = CurrentBuffer;
  262. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  263. NdisFreeBuffer(TempBuffer);
  264. }
  265. CurrentSend = SCContext->scc_firstsend;
  266. i = 0;
  267. while (i < SCContext->scc_count) {
  268. Queue *TempQ;
  269. long Result;
  270. uint SendReqFlags;
  271. TempQ = QNEXT(&CurrentSend->tsr_req.tr_q);
  272. SendReqFlags = CurrentSend->tsr_flags;
  273. CTEStructAssert(CurrentSend, tsr);
  274. Result = CTEInterlockedDecrementLong(&(CurrentSend->tsr_refcnt));
  275. ASSERT(Result >= 0);
  276. if ((Result <= 0) ||
  277. ((SendReqFlags & TSR_FLAG_SEND_AND_DISC) && (Result == 1))) {
  278. TCPReq *Req;
  279. // Reference count has gone to 0 which means the send has
  280. // been ACK'd or cancelled. Complete it now.
  281. // If we've sent directly from this send, NULL out the next
  282. // pointer for the last buffer in the chain.
  283. if (CurrentSend->tsr_lastbuf != NULL) {
  284. NDIS_BUFFER_LINKAGE(CurrentSend->tsr_lastbuf) = NULL;
  285. CurrentSend->tsr_lastbuf = NULL;
  286. }
  287. Req = &CurrentSend->tsr_req;
  288. (*Req->tr_rtn)(Req->tr_context, Req->tr_status,
  289. Req->tr_status == TDI_SUCCESS
  290. ? CurrentSend->tsr_size : 0);
  291. FreeSendReq(CurrentSend);
  292. DoRcvComplete = TRUE;
  293. }
  294. CurrentSend = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, TempQ, tr_q),
  295. tsr_req);
  296. i++;
  297. }
  298. }
  299. FreeTCPHeader(BufferChain);
  300. if (DoRcvComplete && !PartitionedDelayQ) {
  301. KIRQL Irql = KeRaiseIrqlToDpcLevel();
  302. TCPRcvComplete();
  303. KeLowerIrql(Irql);
  304. }
  305. }
  306. //* RcvWin - Figure out the receive window to offer in an ack.
  307. //
  308. // A routine to figure out what window to offer on a connection. We
  309. // take into account SWS avoidance, what the default connection window is,
  310. // and what the last window we offered is.
  311. //
  312. // Input: WinTCB - TCB on which to perform calculations.
  313. //
  314. // Returns: Window to be offered.
  315. //
  316. uint
  317. RcvWin(TCB * WinTCB)
  318. {
  319. int CouldOffer; // The window size we could offer.
  320. CTEStructAssert(WinTCB, tcb);
  321. CheckRBList(WinTCB->tcb_pendhead, WinTCB->tcb_pendingcnt);
  322. ASSERT(WinTCB->tcb_rcvwin >= 0);
  323. CouldOffer = WinTCB->tcb_defaultwin - WinTCB->tcb_pendingcnt;
  324. ASSERT(CouldOffer >= 0);
  325. ASSERT(CouldOffer >= WinTCB->tcb_rcvwin);
  326. if ((CouldOffer - WinTCB->tcb_rcvwin) >=
  327. (int)MIN(WinTCB->tcb_defaultwin / 2, WinTCB->tcb_mss))
  328. WinTCB->tcb_rcvwin = CouldOffer;
  329. return WinTCB->tcb_rcvwin;
  330. }
  331. //* SendSYNOnSynTCB - Send a SYN segment for syntcb
  332. //
  333. // This is called during connection establishment time to send a SYN
  334. // segment to the peer. We get a buffer if we can, and then fill
  335. // it in. There's a tricky part here where we have to build the MSS
  336. // option in the header - we find the MSS by finding the MSS offered
  337. // by the net for the local address. After that, we send it.
  338. //
  339. // Input: SYNTcb - TCB from which SYN is to be sent.
  340. //
  341. // Returns: Nothing.
  342. //
  343. void
  344. SendSYNOnSynTCB(SYNTCB * SYNTcb, CTELockHandle TCBHandle)
  345. {
  346. PNDIS_BUFFER HeaderBuffer;
  347. TCPHeader *SYNHeader;
  348. uchar *OptPtr;
  349. IP_STATUS SendStatus;
  350. ushort OptSize = 0, HdrSize = 0;
  351. BOOLEAN SackOpt = FALSE;
  352. IPOptInfo OptInfo;
  353. CTEStructAssert(SYNTcb, syntcb);
  354. HeaderBuffer = GetTCPHeaderAtDpcLevel(&SYNHeader);
  355. // Go ahead and set the retransmission timer now, in case we didn't get a
  356. // buffer. In the future we might want to queue the connection for
  357. // when we free a buffer.
  358. START_TCB_TIMER(SYNTcb->syntcb_rexmittimer, SYNTcb->syntcb_rexmit);
  359. // The Rexmit interval has to be doubled here
  360. SYNTcb->syntcb_rexmit = MIN(SYNTcb->syntcb_rexmit << 1, MAX_REXMIT_TO);
  361. if (HeaderBuffer != NULL) {
  362. ushort TempWin;
  363. ushort MSS;
  364. uchar FoundMSS;
  365. SYNHeader = (TCPHeader *) ((PUCHAR)SYNHeader + LocalNetInfo.ipi_hsize);
  366. NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
  367. if (SYNTcb->syntcb_tcpopts & TCP_FLAG_WS) {
  368. OptSize += WS_OPT_SIZE + 1; // 1 NOP for alignment
  369. }
  370. if (SYNTcb->syntcb_tcpopts & TCP_FLAG_TS) {
  371. OptSize += TS_OPT_SIZE + 2; // 2 NOPs for alignment
  372. }
  373. if (SYNTcb->syntcb_tcpopts & TCP_FLAG_SACK){
  374. SackOpt = TRUE;
  375. OptSize += 4; // 2 NOPS, SACK kind and length field
  376. }
  377. NdisAdjustBufferLength(HeaderBuffer,
  378. sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize);
  379. SYNHeader->tcp_src = SYNTcb->syntcb_sport;
  380. SYNHeader->tcp_dest = SYNTcb->syntcb_dport;
  381. SYNHeader->tcp_seq = net_long(SYNTcb->syntcb_sendnext);
  382. SYNTcb->syntcb_sendnext++;
  383. if (SYNTcb->syntcb_rexmitcnt == 0) {
  384. TCPSIncrementOutSegCount();
  385. } else
  386. TStats.ts_retranssegs++;
  387. SYNHeader->tcp_ack = net_long(SYNTcb->syntcb_rcvnext);
  388. // Reuse OPt size for header size determination
  389. // default is MSS amd tcp header size
  390. HdrSize = 6;
  391. // set size field to reflect TS and WND scale option
  392. // tcp header + windowscale + Timestamp + pad
  393. if (SYNTcb->syntcb_tcpopts & TCP_FLAG_WS) {
  394. // WS: Add one more long word
  395. HdrSize += 1;
  396. }
  397. if (SYNTcb->syntcb_tcpopts & TCP_FLAG_TS) {
  398. // TS: Add 3 more long words
  399. HdrSize += 3;
  400. }
  401. if (SackOpt) {
  402. // SACK: Add 1 more long word
  403. HdrSize += 1;
  404. }
  405. SYNHeader->tcp_flags =
  406. MAKE_TCP_FLAGS(HdrSize, TCP_FLAG_SYN | TCP_FLAG_ACK);
  407. if (SYNTcb->syntcb_defaultwin <= TCP_MAXWIN) {
  408. TempWin = (ushort)SYNTcb->syntcb_defaultwin;
  409. } else {
  410. // Don't apply the scale-factor in a SYN segment.
  411. // Instead, advertise the largest window possible.
  412. TempWin = TCP_MAXWIN;
  413. }
  414. SYNHeader->tcp_window = net_short(TempWin);
  415. SYNHeader->tcp_urgent = 0;
  416. SYNHeader->tcp_xsum = 0;
  417. OptPtr = (uchar *) (SYNHeader + 1);
  418. FoundMSS = (*LocalNetInfo.ipi_getlocalmtu) (SYNTcb->syntcb_saddr, &MSS);
  419. if (!FoundMSS) {
  420. CTEFreeLock(&SYNTcb->syntcb_lock, TCBHandle);
  421. FreeTCPHeader(HeaderBuffer);
  422. return;
  423. }
  424. MSS -= sizeof(TCPHeader);
  425. SYNTcb->syntcb_mss = MSS;
  426. *OptPtr++ = TCP_OPT_MSS;
  427. *OptPtr++ = MSS_OPT_SIZE;
  428. **(ushort **) & OptPtr = net_short(MSS);
  429. OptPtr++;
  430. OptPtr++;
  431. if (SYNTcb->syntcb_tcpopts & TCP_FLAG_WS) {
  432. // Fill in the WS option headers and value
  433. *OptPtr++ = TCP_OPT_NOP;
  434. *OptPtr++ = TCP_OPT_WS;
  435. *OptPtr++ = WS_OPT_SIZE;
  436. //Initial window scale factor
  437. *OptPtr++ = (uchar) SYNTcb->syntcb_rcvwinscale;
  438. }
  439. if (SYNTcb->syntcb_tcpopts & TCP_FLAG_TS) {
  440. //Start loading time stamp option header and value
  441. *OptPtr++ = TCP_OPT_NOP;
  442. *OptPtr++ = TCP_OPT_NOP;
  443. *OptPtr++ = TCP_OPT_TS;
  444. *OptPtr++ = TS_OPT_SIZE;
  445. // Initialize TS value TSval
  446. *(long *)OptPtr = 0;
  447. OptPtr += 4;
  448. //Initialize TS Echo Reply TSecr
  449. *(long *)OptPtr = 0;
  450. OptPtr += 4;
  451. }
  452. if (SackOpt) {
  453. // Initialize with SACK_PERMITTED option
  454. *(long *)OptPtr = net_long(0x01010402);
  455. IF_TCPDBG(TCP_DEBUG_SACK) {
  456. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Sending SACK_OPT %x\n", SYNTcb));
  457. }
  458. }
  459. SYNTcb->syntcb_refcnt++;
  460. // Account for Options.
  461. (*LocalNetInfo.ipi_initopts) (&OptInfo);
  462. OptInfo.ioi_ttl = SYNTcb->syntcb_ttl;
  463. SYNHeader->tcp_xsum =
  464. ~XsumSendChain(PHXSUM(SYNTcb->syntcb_saddr, SYNTcb->syntcb_daddr,
  465. PROTOCOL_TCP, 0) +
  466. (uint)net_short(sizeof(TCPHeader) + MSS_OPT_SIZE +
  467. OptSize), HeaderBuffer);
  468. CTEFreeLock(&SYNTcb->syntcb_lock, TCBHandle);
  469. SendStatus =
  470. (*LocalNetInfo.ipi_xmit)(TCPProtInfo, NULL, HeaderBuffer,
  471. sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize,
  472. SYNTcb->syntcb_daddr, SYNTcb->syntcb_saddr,
  473. &OptInfo, NULL, PROTOCOL_TCP, NULL);
  474. if (SendStatus != IP_PENDING) {
  475. FreeTCPHeader(HeaderBuffer);
  476. }
  477. CTEGetLock(&SYNTcb->syntcb_lock, &TCBHandle);
  478. DerefSynTCB(SYNTcb, TCBHandle);
  479. } else {
  480. SYNTcb->syntcb_sendnext++;
  481. CTEFreeLock(&SYNTcb->syntcb_lock, TCBHandle);
  482. return;
  483. }
  484. }
  485. //* SendSYN - Send a SYN segment.
  486. //
  487. // This is called during connection establishment time to send a SYN
  488. // segment to the peer. We get a buffer if we can, and then fill
  489. // it in. There's a tricky part here where we have to build the MSS
  490. // option in the header - we find the MSS by finding the MSS offered
  491. // by the net for the local address. After that, we send it.
  492. //
  493. // Input: SYNTcb - TCB from which SYN is to be sent.
  494. // TCBHandle - Handle for lock on TCB.
  495. //
  496. // Returns: Nothing.
  497. //
  498. void
  499. SendSYN(TCB * SYNTcb, CTELockHandle TCBHandle)
  500. {
  501. PNDIS_BUFFER HeaderBuffer;
  502. TCPHeader *SYNHeader;
  503. uchar *OptPtr;
  504. IP_STATUS SendStatus;
  505. ushort OptSize = 0, HdrSize = 0, rfc1323opts = 0;
  506. BOOLEAN SackOpt = FALSE;
  507. CTEStructAssert(SYNTcb, tcb);
  508. HeaderBuffer = GetTCPHeaderAtDpcLevel(&SYNHeader);
  509. // Go ahead and set the retransmission timer now, in case we didn't get a
  510. // buffer. In the future we might want to queue the connection for
  511. // when we free a buffer.
  512. START_TCB_TIMER_R(SYNTcb, RXMIT_TIMER, SYNTcb->tcb_rexmit);
  513. if (HeaderBuffer != NULL) {
  514. ushort TempWin;
  515. ushort MSS;
  516. uchar FoundMSS;
  517. SYNHeader = (TCPHeader *) ((PUCHAR)SYNHeader + LocalNetInfo.ipi_hsize);
  518. NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
  519. // If we are doing active open, check if we are configured to do
  520. // window scaling and time stamp options
  521. if ((((TcpHostSendOpts & TCP_FLAG_WS) || SYNTcb->tcb_rcvwinscale) &&
  522. SYNTcb->tcb_state == TCB_SYN_SENT) ||
  523. (SYNTcb->tcb_tcpopts & TCP_FLAG_WS)) {
  524. rfc1323opts |= TCP_FLAG_WS;
  525. IF_TCPDBG(TCP_DEBUG_1323) {
  526. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Selected WS option TCB %x\n", SYNTcb));
  527. }
  528. }
  529. if (((TcpHostSendOpts & TCP_FLAG_TS) &&
  530. (SYNTcb->tcb_state == TCB_SYN_SENT)) ||
  531. (SYNTcb->tcb_tcpopts & TCP_FLAG_TS)) {
  532. IF_TCPDBG(TCP_DEBUG_1323) {
  533. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Selected TS option TCB %x\n", SYNTcb));
  534. }
  535. rfc1323opts |= TCP_FLAG_TS;
  536. }
  537. if (rfc1323opts & TCP_FLAG_WS) {
  538. OptSize += WS_OPT_SIZE + 1; // 1 NOP for alignment
  539. }
  540. if (rfc1323opts & TCP_FLAG_TS) {
  541. OptSize += TS_OPT_SIZE + 2; // 2 NOPs for alignment
  542. }
  543. if ((SYNTcb->tcb_tcpopts & TCP_FLAG_SACK) ||
  544. ((SYNTcb->tcb_state == TCB_SYN_SENT) &&
  545. (TcpHostOpts & TCP_FLAG_SACK))) {
  546. SackOpt = TRUE;
  547. OptSize += 4; // 2 NOPS, SACK kind and length field
  548. }
  549. NdisAdjustBufferLength(HeaderBuffer,
  550. sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize);
  551. SYNHeader->tcp_src = SYNTcb->tcb_sport;
  552. SYNHeader->tcp_dest = SYNTcb->tcb_dport;
  553. SYNHeader->tcp_seq = net_long(SYNTcb->tcb_sendnext);
  554. SYNTcb->tcb_sendnext++;
  555. if (SEQ_GT(SYNTcb->tcb_sendnext, SYNTcb->tcb_sendmax)) {
  556. TCPSIncrementOutSegCount();
  557. SYNTcb->tcb_sendmax = SYNTcb->tcb_sendnext;
  558. } else
  559. TStats.ts_retranssegs++;
  560. SYNHeader->tcp_ack = net_long(SYNTcb->tcb_rcvnext);
  561. // Reuse OPt size for header size determination
  562. // default is MSS amd tcp header size
  563. HdrSize = 6;
  564. // set size field to reflect TS and WND scale option
  565. // tcp header + windowscale + Timestamp + pad
  566. if (rfc1323opts & TCP_FLAG_WS) {
  567. // WS: Add one more long word
  568. HdrSize += 1;
  569. }
  570. if (rfc1323opts & TCP_FLAG_TS) {
  571. // TS: Add 3 more long words
  572. HdrSize += 3;
  573. }
  574. if (SackOpt) {
  575. // SACK: Add 1 more long word
  576. HdrSize += 1;
  577. }
  578. if (SYNTcb->tcb_state == TCB_SYN_RCVD) {
  579. SYNHeader->tcp_flags =
  580. MAKE_TCP_FLAGS(HdrSize, TCP_FLAG_SYN | TCP_FLAG_ACK);
  581. } else {
  582. SYNHeader->tcp_flags = MAKE_TCP_FLAGS(HdrSize, TCP_FLAG_SYN);
  583. }
  584. SYNTcb->tcb_lastack = SYNTcb->tcb_rcvnext;
  585. if (SYNTcb->tcb_rcvwin <= TCP_MAXWIN) {
  586. TempWin = (ushort)SYNTcb->tcb_rcvwin;
  587. } else {
  588. // Don't apply the scale-factor in a SYN segment.
  589. // Instead, advertise the largest window possible.
  590. TempWin = TCP_MAXWIN;
  591. }
  592. SYNHeader->tcp_window = net_short(TempWin);
  593. SYNHeader->tcp_urgent = 0;
  594. SYNHeader->tcp_xsum = 0;
  595. OptPtr = (uchar *) (SYNHeader + 1);
  596. FoundMSS = (*LocalNetInfo.ipi_getlocalmtu) (SYNTcb->tcb_saddr, &MSS);
  597. if (!FoundMSS) {
  598. CTEFreeLock(&SYNTcb->tcb_lock, TCBHandle);
  599. FreeTCPHeader(HeaderBuffer);
  600. return;
  601. }
  602. MSS -= sizeof(TCPHeader);
  603. *OptPtr++ = TCP_OPT_MSS;
  604. *OptPtr++ = MSS_OPT_SIZE;
  605. **(ushort **) & OptPtr = net_short(MSS);
  606. OptPtr++;
  607. OptPtr++;
  608. if (rfc1323opts & TCP_FLAG_WS) {
  609. // Fill in the WS option headers and value
  610. *OptPtr++ = TCP_OPT_NOP;
  611. *OptPtr++ = TCP_OPT_WS;
  612. *OptPtr++ = WS_OPT_SIZE;
  613. // Initial window scale factor
  614. *OptPtr++ = (uchar) SYNTcb->tcb_rcvwinscale;
  615. }
  616. if (rfc1323opts & TCP_FLAG_TS) {
  617. // Start loading time stamp option header and value
  618. *OptPtr++ = TCP_OPT_NOP;
  619. *OptPtr++ = TCP_OPT_NOP;
  620. *OptPtr++ = TCP_OPT_TS;
  621. *OptPtr++ = TS_OPT_SIZE;
  622. // Initialize TS value TSval
  623. *(long *)OptPtr = 0;
  624. OptPtr += 4;
  625. // Initialize TS Echo Reply TSecr
  626. *(long *)OptPtr = 0;
  627. OptPtr += 4;
  628. }
  629. if (SackOpt) {
  630. // Initialize with SACK_PERMITTED option
  631. *(long *)OptPtr = net_long(0x01010402);
  632. IF_TCPDBG(TCP_DEBUG_SACK) {
  633. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Sending SACK_OPT %x\n", SYNTcb));
  634. }
  635. }
  636. REFERENCE_TCB(SYNTcb);
  637. // Account for Options.
  638. SYNTcb->tcb_opt.ioi_TcpChksum = 0;
  639. SYNHeader->tcp_xsum =
  640. ~XsumSendChain(SYNTcb->tcb_phxsum +
  641. (uint)net_short(sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize),
  642. HeaderBuffer);
  643. ClassifyPacket(SYNTcb);
  644. CTEFreeLock(&SYNTcb->tcb_lock, TCBHandle);
  645. SendStatus =
  646. (*LocalNetInfo.ipi_xmit)(TCPProtInfo, NULL, HeaderBuffer,
  647. sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize,
  648. SYNTcb->tcb_daddr, SYNTcb->tcb_saddr,
  649. &SYNTcb->tcb_opt, SYNTcb->tcb_rce,
  650. PROTOCOL_TCP, NULL);
  651. SYNTcb->tcb_error = SendStatus;
  652. if (SendStatus != IP_PENDING) {
  653. FreeTCPHeader(HeaderBuffer);
  654. }
  655. CTEGetLock(&SYNTcb->tcb_lock, &TCBHandle);
  656. DerefTCB(SYNTcb, TCBHandle);
  657. } else {
  658. SYNTcb->tcb_sendnext++;
  659. if (SEQ_GT(SYNTcb->tcb_sendnext, SYNTcb->tcb_sendmax))
  660. SYNTcb->tcb_sendmax = SYNTcb->tcb_sendnext;
  661. CTEFreeLock(&SYNTcb->tcb_lock, TCBHandle);
  662. return;
  663. }
  664. }
  665. //* SendKA - Send a keep alive segment.
  666. //
  667. // This is called when we want to send a keep alive.
  668. //
  669. // Input: KATcb - TCB from which keep alive is to be sent.
  670. // Handle - Handle for lock on TCB.
  671. //
  672. // Returns: Nothing.
  673. //
  674. void
  675. SendKA(TCB * KATcb, CTELockHandle Handle)
  676. {
  677. PNDIS_BUFFER HeaderBuffer;
  678. TCPHeader *Header;
  679. IP_STATUS SendStatus;
  680. CTEStructAssert(KATcb, tcb);
  681. HeaderBuffer = GetTCPHeaderAtDpcLevel(&Header);
  682. if (HeaderBuffer != NULL) {
  683. ushort TempWin;
  684. SeqNum TempSeq;
  685. Header = (TCPHeader *) ((PUCHAR) Header + LocalNetInfo.ipi_hsize);
  686. NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
  687. NdisAdjustBufferLength(HeaderBuffer, sizeof(TCPHeader) + 1);
  688. Header->tcp_src = KATcb->tcb_sport;
  689. Header->tcp_dest = KATcb->tcb_dport;
  690. TempSeq = KATcb->tcb_senduna - 1;
  691. Header->tcp_seq = net_long(TempSeq);
  692. TStats.ts_retranssegs++;
  693. Header->tcp_ack = net_long(KATcb->tcb_rcvnext);
  694. Header->tcp_flags = MAKE_TCP_FLAGS(5, TCP_FLAG_ACK);
  695. // Initialize the single byte that we're sending.
  696. *(uchar*)(Header + 1) = 0;
  697. // We need to scale the rcv window
  698. // Use temprary variable to workaround truncation
  699. // caused by net_short
  700. TempWin = (ushort) (RcvWin(KATcb) >> KATcb->tcb_rcvwinscale);
  701. Header->tcp_window = net_short(TempWin);
  702. Header->tcp_urgent = 0;
  703. KATcb->tcb_lastack = KATcb->tcb_rcvnext;
  704. Header->tcp_xsum = 0;
  705. KATcb->tcb_opt.ioi_TcpChksum = 0;
  706. Header->tcp_xsum =
  707. ~XsumSendChain(KATcb->tcb_phxsum +
  708. (uint)net_short(sizeof(TCPHeader) + 1),
  709. HeaderBuffer);
  710. KATcb->tcb_kacount++;
  711. ClassifyPacket(KATcb);
  712. REFERENCE_TCB(KATcb);
  713. CTEFreeLock(&KATcb->tcb_lock, Handle);
  714. SendStatus = (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  715. NULL,
  716. HeaderBuffer,
  717. sizeof(TCPHeader) + 1,
  718. KATcb->tcb_daddr,
  719. KATcb->tcb_saddr,
  720. &KATcb->tcb_opt,
  721. KATcb->tcb_rce,
  722. PROTOCOL_TCP,
  723. NULL);
  724. if (SendStatus != IP_PENDING) {
  725. FreeTCPHeader(HeaderBuffer);
  726. }
  727. CTEGetLock(&KATcb->tcb_lock, &Handle);
  728. DerefTCB(KATcb, Handle);
  729. } else {
  730. CTEFreeLock(&KATcb->tcb_lock, Handle);
  731. }
  732. }
  733. //* SendACK - Send an ACK segment.
  734. //
  735. // This is called whenever we need to send an ACK for some reason. Nothing
  736. // fancy, we just do it.
  737. //
  738. // Input: ACKTcb - TCB from which ACK is to be sent.
  739. //
  740. // Returns: Nothing.
  741. //
  742. void
  743. SendACK(TCB * ACKTcb)
  744. {
  745. PNDIS_BUFFER HeaderBuffer;
  746. TCPHeader *ACKHeader;
  747. IP_STATUS SendStatus;
  748. CTELockHandle TCBHandle;
  749. SeqNum SendNext;
  750. ushort SackLength = 0, i, hdrlen = 5;
  751. ulong *ts_opt;
  752. BOOLEAN HWChksum = FALSE;
  753. CTEStructAssert(ACKTcb, tcb);
  754. HeaderBuffer = GetTCPHeader(&ACKHeader);
  755. if (HeaderBuffer != NULL) {
  756. ushort TempWin;
  757. ushort Size;
  758. ACKHeader = (TCPHeader *) ((PUCHAR) ACKHeader + LocalNetInfo.ipi_hsize);
  759. CTEGetLock(&ACKTcb->tcb_lock, &TCBHandle);
  760. // Allow room for filling time stamp option.
  761. // Note that it is 12 bytes and will never ever change
  762. if (ACKTcb->tcb_tcpopts & TCP_FLAG_TS) {
  763. NdisAdjustBufferLength(HeaderBuffer,
  764. sizeof(TCPHeader) + ALIGNED_TS_OPT_SIZE);
  765. // Header length is multiple of 32bits
  766. hdrlen = 5 + 3; // standard header size +
  767. // header size requirement for TS option
  768. ACKTcb->tcb_lastack = ACKTcb->tcb_rcvnext;
  769. }
  770. if ((ACKTcb->tcb_tcpopts & TCP_FLAG_SACK) &&
  771. ACKTcb->tcb_SackBlock &&
  772. (ACKTcb->tcb_SackBlock->Mask[0] == 1)) {
  773. SackLength++;
  774. for (i = 1; i < 3; i++) {
  775. if (ACKTcb->tcb_SackBlock->Mask[i] == 1)
  776. SackLength++;
  777. }
  778. IF_TCPDBG(TCP_DEBUG_SACK) {
  779. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Sending SACKs!! %x %x\n", ACKTcb, SackLength));
  780. }
  781. NdisAdjustBufferLength(HeaderBuffer,
  782. NdisBufferLength(HeaderBuffer) + SackLength * 8 + 4);
  783. // Sack block is of 2 long words (8 bytes) and 4 bytes
  784. // is for Sack option header.
  785. hdrlen += ((SackLength * 8 + 4) >> 2);
  786. }
  787. NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
  788. ACKHeader->tcp_src = ACKTcb->tcb_sport;
  789. ACKHeader->tcp_dest = ACKTcb->tcb_dport;
  790. ACKHeader->tcp_ack = net_long(ACKTcb->tcb_rcvnext);
  791. // If the remote peer is advertising a window of zero, we need to
  792. // send this ack with a seq. number of his rcv_next (which in that case
  793. // should be our senduna). We have code here ifdef'd out that makes
  794. // sure that we don't send outside the RWE, but this doesn't work. We
  795. // need to be able to send a pure ACK exactly at the RWE.
  796. if (ACKTcb->tcb_sendwin != 0) {
  797. SendNext = ACKTcb->tcb_sendnext;
  798. } else
  799. SendNext = ACKTcb->tcb_senduna;
  800. if ((ACKTcb->tcb_flags & FIN_SENT) &&
  801. SEQ_EQ(SendNext, ACKTcb->tcb_sendmax - 1)) {
  802. ACKHeader->tcp_flags = MAKE_TCP_FLAGS(hdrlen,
  803. TCP_FLAG_FIN | TCP_FLAG_ACK);
  804. } else
  805. ACKHeader->tcp_flags = MAKE_TCP_FLAGS(hdrlen, TCP_FLAG_ACK);
  806. ACKHeader->tcp_seq = net_long(SendNext);
  807. TempWin = (ushort) (RcvWin(ACKTcb) >> ACKTcb->tcb_rcvwinscale);
  808. ACKHeader->tcp_window = net_short(TempWin);
  809. ACKHeader->tcp_urgent = 0;
  810. ACKHeader->tcp_xsum = 0;
  811. Size = sizeof(TCPHeader);
  812. // Point to a place beyond tcp header
  813. ts_opt = (ulong *)((uchar *) ACKHeader + 20);
  814. if (ACKTcb->tcb_tcpopts & TCP_FLAG_TS) {
  815. // Form time stamp header with 2 NOPs for alignment
  816. *ts_opt++ = net_long(0x0101080A);
  817. *ts_opt++ = net_long(TCPTime);
  818. *ts_opt++ = net_long(ACKTcb->tcb_tsrecent);
  819. // Add 12 more bytes to the size to account for TS
  820. Size += ALIGNED_TS_OPT_SIZE;
  821. }
  822. if ((ACKTcb->tcb_tcpopts & TCP_FLAG_SACK) &&
  823. ACKTcb->tcb_SackBlock &&
  824. (ACKTcb->tcb_SackBlock->Mask[0] == 1)) {
  825. ushort* UshortPtr;
  826. uchar* UcharPtr;
  827. UshortPtr = (ushort *)ts_opt;
  828. *UshortPtr = 0x0101;
  829. ts_opt = (ulong *)((uchar *)ts_opt + 2);
  830. UcharPtr = (uchar *)ts_opt;
  831. *UcharPtr = (uchar)0x05;
  832. ts_opt = (ulong *)((uchar *)ts_opt + 1);
  833. UcharPtr = (uchar *)ts_opt;
  834. *UcharPtr = (uchar) SackLength * 8 + 2;
  835. ts_opt = (ulong *)((uchar *)ts_opt + 1);
  836. // Sack option header + the block times times sack length!
  837. Size += 4 + SackLength * 8;
  838. for (i = 0; i < 3; i++) {
  839. if (ACKTcb->tcb_SackBlock->Mask[i] != 0) {
  840. *ts_opt++ =
  841. net_long(ACKTcb->tcb_SackBlock->Block[i].begin);
  842. *ts_opt++ =
  843. net_long(ACKTcb->tcb_SackBlock->Block[i].end);
  844. }
  845. }
  846. }
  847. if (ACKTcb->tcb_rce &&
  848. (ACKTcb->tcb_rce->rce_OffloadFlags &
  849. TCP_XMT_CHECKSUM_OFFLOAD)) {
  850. HWChksum = TRUE;
  851. if ((Size > sizeof(TCPHeader)) &&
  852. !(ACKTcb->tcb_rce->rce_OffloadFlags &
  853. TCP_CHECKSUM_OPT_OFFLOAD)) {
  854. HWChksum = FALSE;
  855. }
  856. }
  857. if (HWChksum) {
  858. uint PHXsum = ACKTcb->tcb_phxsum + (uint) net_short(Size);
  859. PHXsum = (((PHXsum << 16) | (PHXsum >> 16)) + PHXsum) >> 16;
  860. ACKHeader->tcp_xsum = (ushort) PHXsum;
  861. ACKTcb->tcb_opt.ioi_TcpChksum = 1;
  862. #if DBG
  863. DbgTcpSendHwChksumCount++;
  864. #endif
  865. } else {
  866. ACKHeader->tcp_xsum =
  867. ~XsumSendChain(ACKTcb->tcb_phxsum +
  868. (uint)net_short(Size), HeaderBuffer);
  869. ACKTcb->tcb_opt.ioi_TcpChksum = 0;
  870. }
  871. STOP_TCB_TIMER_R(ACKTcb, DELACK_TIMER);
  872. ACKTcb->tcb_rcvdsegs = 0;
  873. ACKTcb->tcb_flags &= ~(NEED_ACK | ACK_DELAYED);
  874. ClassifyPacket(ACKTcb);
  875. CTEFreeLock(&ACKTcb->tcb_lock, TCBHandle);
  876. TCPSIncrementOutSegCount();
  877. if (ACKTcb->tcb_tcpopts) {
  878. SendStatus = (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  879. NULL,
  880. HeaderBuffer,
  881. Size,
  882. ACKTcb->tcb_daddr,
  883. ACKTcb->tcb_saddr,
  884. &ACKTcb->tcb_opt,
  885. ACKTcb->tcb_rce,
  886. PROTOCOL_TCP,
  887. NULL);
  888. } else {
  889. SendStatus = (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  890. NULL,
  891. HeaderBuffer,
  892. sizeof(TCPHeader),
  893. ACKTcb->tcb_daddr,
  894. ACKTcb->tcb_saddr,
  895. &ACKTcb->tcb_opt,
  896. ACKTcb->tcb_rce,
  897. PROTOCOL_TCP,
  898. NULL);
  899. }
  900. ACKTcb->tcb_error = SendStatus;
  901. if (SendStatus != IP_PENDING)
  902. FreeTCPHeader(HeaderBuffer);
  903. }
  904. return;
  905. }
  906. //* SendTWtcbACK- Send an ACK segment for a twtcb
  907. //
  908. //
  909. // Input: ACKTcb - TCB from which ACK is to be sent.
  910. //
  911. // Returns: Nothing.
  912. //
  913. void
  914. SendTWtcbACK(TWTCB *ACKTcb, uint Partition, CTELockHandle TCBHandle)
  915. {
  916. PNDIS_BUFFER HeaderBuffer;
  917. TCPHeader *ACKHeader;
  918. IP_STATUS SendStatus;
  919. SeqNum SendNext;
  920. ushort hdrlen = 5;
  921. uint phxsum;
  922. CTEStructAssert(ACKTcb, twtcb);
  923. HeaderBuffer = GetTCPHeaderAtDpcLevel(&ACKHeader);
  924. if (HeaderBuffer != NULL) {
  925. ushort Size;
  926. IPOptInfo NewInfo;
  927. ACKHeader = (TCPHeader *)((PUCHAR)ACKHeader + LocalNetInfo.ipi_hsize);
  928. NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
  929. ACKHeader->tcp_src = ACKTcb->twtcb_sport;
  930. ACKHeader->tcp_dest = ACKTcb->twtcb_dport;
  931. ACKHeader->tcp_ack = net_long(ACKTcb->twtcb_rcvnext);
  932. SendNext = ACKTcb->twtcb_sendnext;
  933. ACKHeader->tcp_flags = MAKE_TCP_FLAGS(hdrlen, TCP_FLAG_ACK);
  934. ACKHeader->tcp_seq = net_long(SendNext);
  935. // Window needs to be zero since we can not rcv anyway.
  936. ACKHeader->tcp_window = 0;
  937. ACKHeader->tcp_urgent = 0;
  938. Size = sizeof(TCPHeader);
  939. phxsum = PHXSUM(ACKTcb->twtcb_saddr, ACKTcb->twtcb_daddr,
  940. PROTOCOL_TCP, 0);
  941. ACKHeader->tcp_xsum = 0;
  942. ACKHeader->tcp_xsum =
  943. ~XsumSendChain(phxsum +
  944. (uint)net_short(Size), HeaderBuffer);
  945. //ACKTcb->tcb_opt.ioi_TcpChksum=0;
  946. CTEFreeLockFromDPC(&pTWTCBTableLock[Partition]);
  947. TCPSIncrementOutSegCount();
  948. (*LocalNetInfo.ipi_initopts) (&NewInfo);
  949. SendStatus =
  950. (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  951. NULL,
  952. HeaderBuffer,
  953. sizeof(TCPHeader),
  954. ACKTcb->twtcb_daddr,
  955. ACKTcb->twtcb_saddr,
  956. &NewInfo,
  957. NULL,
  958. PROTOCOL_TCP,
  959. NULL);
  960. if (SendStatus != IP_PENDING)
  961. FreeTCPHeader(HeaderBuffer);
  962. (*LocalNetInfo.ipi_freeopts) (&NewInfo);
  963. } else {
  964. CTEFreeLockFromDPC(&pTWTCBTableLock[Partition]);
  965. }
  966. }
  967. //* SendRSTFromTCB - Send a RST from a TCB.
  968. //
  969. // This is called during close when we need to send a RST.
  970. //
  971. // Input: RSTTcb - TCB from which RST is to be sent.
  972. // RCE - Optional RCE to be used in sending.
  973. //
  974. // Returns: Nothing.
  975. //
  976. void
  977. SendRSTFromTCB(TCB * RSTTcb, RouteCacheEntry* RCE)
  978. {
  979. PNDIS_BUFFER HeaderBuffer;
  980. TCPHeader *RSTHeader;
  981. IP_STATUS SendStatus;
  982. CTEStructAssert(RSTTcb, tcb);
  983. ASSERT(RSTTcb->tcb_state == TCB_CLOSED);
  984. HeaderBuffer = GetTCPHeader(&RSTHeader);
  985. if (HeaderBuffer != NULL) {
  986. SeqNum RSTSeq;
  987. RSTHeader = (TCPHeader *) ((PUCHAR)RSTHeader + LocalNetInfo.ipi_hsize);
  988. NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
  989. RSTHeader->tcp_src = RSTTcb->tcb_sport;
  990. RSTHeader->tcp_dest = RSTTcb->tcb_dport;
  991. // If the remote peer has a window of 0, send with a seq. # equal
  992. // to senduna so he'll accept it. Otherwise send with send max.
  993. if (RSTTcb->tcb_sendwin != 0)
  994. RSTSeq = RSTTcb->tcb_sendmax;
  995. else
  996. RSTSeq = RSTTcb->tcb_senduna;
  997. RSTHeader->tcp_seq = net_long(RSTSeq);
  998. RSTHeader->tcp_ack = net_long(RSTTcb->tcb_rcvnext);
  999. RSTHeader->tcp_flags = MAKE_TCP_FLAGS(sizeof(TCPHeader) / sizeof(ulong),
  1000. TCP_FLAG_RST | TCP_FLAG_ACK);
  1001. RSTHeader->tcp_window = 0;
  1002. RSTHeader->tcp_urgent = 0;
  1003. RSTHeader->tcp_xsum = 0;
  1004. // Recompute pseudo checksum as this will
  1005. // not be valid when connection is disconnected
  1006. // in pre-accept case.
  1007. RSTHeader->tcp_xsum =
  1008. ~XsumSendChain(PHXSUM(RSTTcb->tcb_saddr,
  1009. RSTTcb->tcb_daddr,
  1010. PROTOCOL_TCP,
  1011. sizeof(TCPHeader)),
  1012. HeaderBuffer);
  1013. RSTTcb->tcb_opt.ioi_TcpChksum = 0;
  1014. TCPSIncrementOutSegCount();
  1015. TStats.ts_outrsts++;
  1016. SendStatus = (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  1017. NULL,
  1018. HeaderBuffer,
  1019. sizeof(TCPHeader),
  1020. RSTTcb->tcb_daddr,
  1021. RSTTcb->tcb_saddr,
  1022. &RSTTcb->tcb_opt,
  1023. RCE,
  1024. PROTOCOL_TCP,
  1025. NULL);
  1026. if (SendStatus != IP_PENDING)
  1027. FreeTCPHeader(HeaderBuffer);
  1028. }
  1029. return;
  1030. }
  1031. //* SendRSTFromHeader - Send a RST back, based on a header.
  1032. //
  1033. // Called when we need to send a RST, but don't necessarily have a TCB.
  1034. //
  1035. // Input: TCPH - TCP header to be RST.
  1036. // Length - Length of the incoming segment.
  1037. // Dest - Destination IP address for RST.
  1038. // Src - Source IP address for RST.
  1039. // OptInfo - IP Options to use on RST.
  1040. //
  1041. // Returns: Nothing.
  1042. //
  1043. void
  1044. SendRSTFromHeader(TCPHeader UNALIGNED * TCPH, uint Length, IPAddr Dest,
  1045. IPAddr Src, IPOptInfo * OptInfo)
  1046. {
  1047. PNDIS_BUFFER Buffer;
  1048. TCPHeader *RSTHdr;
  1049. IPOptInfo NewInfo;
  1050. IP_STATUS SendStatus;
  1051. if (TCPH->tcp_flags & TCP_FLAG_RST)
  1052. return;
  1053. Buffer = GetTCPHeader(&RSTHdr);
  1054. if (Buffer != NULL) {
  1055. // Got a buffer. Fill in the header so as to make it believable to
  1056. // the remote guy, and send it.
  1057. RSTHdr = (TCPHeader *) ((PUCHAR)RSTHdr + LocalNetInfo.ipi_hsize);
  1058. NDIS_BUFFER_LINKAGE(Buffer) = NULL;
  1059. if (TCPH->tcp_flags & TCP_FLAG_SYN)
  1060. Length++;
  1061. if (TCPH->tcp_flags & TCP_FLAG_FIN)
  1062. Length++;
  1063. if (TCPH->tcp_flags & TCP_FLAG_ACK) {
  1064. RSTHdr->tcp_seq = TCPH->tcp_ack;
  1065. RSTHdr->tcp_ack = TCPH->tcp_ack;
  1066. RSTHdr->tcp_flags =
  1067. MAKE_TCP_FLAGS(sizeof(TCPHeader) / sizeof(ulong), TCP_FLAG_RST);
  1068. } else {
  1069. SeqNum TempSeq;
  1070. RSTHdr->tcp_seq = 0;
  1071. TempSeq = net_long(TCPH->tcp_seq);
  1072. TempSeq += Length;
  1073. RSTHdr->tcp_ack = net_long(TempSeq);
  1074. RSTHdr->tcp_flags =
  1075. MAKE_TCP_FLAGS(sizeof(TCPHeader) / sizeof(ulong),
  1076. TCP_FLAG_RST | TCP_FLAG_ACK);
  1077. }
  1078. RSTHdr->tcp_window = 0;
  1079. RSTHdr->tcp_urgent = 0;
  1080. RSTHdr->tcp_dest = TCPH->tcp_src;
  1081. RSTHdr->tcp_src = TCPH->tcp_dest;
  1082. RSTHdr->tcp_xsum = 0;
  1083. RSTHdr->tcp_xsum =
  1084. ~XsumSendChain(PHXSUM(Src, Dest, PROTOCOL_TCP, sizeof(TCPHeader)),
  1085. Buffer);
  1086. (*LocalNetInfo.ipi_initopts) (&NewInfo);
  1087. if (OptInfo->ioi_options != NULL)
  1088. (*LocalNetInfo.ipi_updateopts)(OptInfo, &NewInfo, Dest,
  1089. NULL_IP_ADDR);
  1090. TCPSIncrementOutSegCount();
  1091. TStats.ts_outrsts++;
  1092. SendStatus = (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  1093. NULL,
  1094. Buffer,
  1095. sizeof(TCPHeader),
  1096. Dest,
  1097. Src,
  1098. &NewInfo,
  1099. NULL,
  1100. PROTOCOL_TCP,
  1101. NULL);
  1102. if (SendStatus != IP_PENDING)
  1103. FreeTCPHeader(Buffer);
  1104. (*LocalNetInfo.ipi_freeopts) (&NewInfo);
  1105. }
  1106. }
  1107. //* GoToEstab - Transition to the established state.
  1108. //
  1109. // Called when we are going to the established state and need to finish up
  1110. // initializing things that couldn't be done until now. We assume the TCB
  1111. // lock is held by the caller on the TCB we're called with.
  1112. //
  1113. // Input: EstabTCB - TCB to transition.
  1114. //
  1115. // Returns: Nothing.
  1116. //
  1117. void
  1118. GoToEstab(TCB * EstabTCB)
  1119. {
  1120. uchar DType;
  1121. ushort MSS;
  1122. // Initialize our slow start and congestion control variables.
  1123. EstabTCB->tcb_cwin = 2 * EstabTCB->tcb_mss;
  1124. EstabTCB->tcb_ssthresh = 0xffffffff;
  1125. EstabTCB->tcb_state = TCB_ESTAB;
  1126. if (SynAttackProtect && EstabTCB->tcb_rce == NULL) {
  1127. (*LocalNetInfo.ipi_openrce)(EstabTCB->tcb_daddr, EstabTCB->tcb_saddr,
  1128. &EstabTCB->tcb_rce, &DType, &MSS,
  1129. &EstabTCB->tcb_opt);
  1130. }
  1131. // We're in established. We'll subtract one from slow count for this fact,
  1132. // and if the slowcount goes to 0 we'll move onto the fast path.
  1133. if (--(EstabTCB->tcb_slowcount) == 0)
  1134. EstabTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  1135. InterlockedIncrement((PLONG)&TStats.ts_currestab);
  1136. EstabTCB->tcb_flags &= ~ACTIVE_OPEN; // Turn off the active opening flag.
  1137. // Start the Keep-Alive timer if necessary.
  1138. if ((EstabTCB->tcb_flags & KEEPALIVE) && EstabTCB->tcb_conn) {
  1139. START_TCB_TIMER_R(EstabTCB, KA_TIMER,
  1140. EstabTCB->tcb_conn->tc_tcbkatime);
  1141. EstabTCB->tcb_kacount = 0;
  1142. }
  1143. }
  1144. //* InitSendState - Initialize the send state of a connection.
  1145. //
  1146. // Called during connection establishment to initialize our send state.
  1147. // (In this case, this refers to all information we'll put on the wire as
  1148. // well as pure send state). We pick an ISS, set up a rexmit timer value,
  1149. // etc. We assume the tcb_lock is held on the TCB when we are called.
  1150. //
  1151. // Input: NewTCB - TCB to be set up.
  1152. //
  1153. // Returns: Nothing.
  1154. void
  1155. InitSendState(TCB * NewTCB)
  1156. {
  1157. CTEStructAssert(NewTCB, tcb);
  1158. NewTCB->tcb_senduna = NewTCB->tcb_sendnext;
  1159. NewTCB->tcb_sendmax = NewTCB->tcb_sendnext;
  1160. NewTCB->tcb_error = IP_SUCCESS;
  1161. // Initialize pseudo-header xsum.
  1162. NewTCB->tcb_phxsum = PHXSUM(NewTCB->tcb_saddr, NewTCB->tcb_daddr,
  1163. PROTOCOL_TCP, 0);
  1164. // Initialize retransmit and delayed ack stuff.
  1165. NewTCB->tcb_rexmitcnt = 0;
  1166. NewTCB->tcb_rtt = 0;
  1167. NewTCB->tcb_smrtt = 0;
  1168. NewTCB->tcb_delta = MS_TO_TICKS(6000);
  1169. NewTCB->tcb_rexmit = MS_TO_TICKS(3000);
  1170. if (NewTCB->tcb_rce) {
  1171. //
  1172. // InitialRtt can be as low as 300msec to enable
  1173. // certain scenarios to work correctly.
  1174. //
  1175. if (NewTCB->tcb_rce->rce_TcpInitialRTT &&
  1176. NewTCB->tcb_rce->rce_TcpInitialRTT > 3) {
  1177. NewTCB->tcb_delta =
  1178. MS_TO_TICKS(NewTCB->tcb_rce->rce_TcpInitialRTT * 2);
  1179. NewTCB->tcb_rexmit =
  1180. MS_TO_TICKS(NewTCB->tcb_rce->rce_TcpInitialRTT);
  1181. }
  1182. }
  1183. STOP_TCB_TIMER_R(NewTCB, RXMIT_TIMER);
  1184. STOP_TCB_TIMER_R(NewTCB, DELACK_TIMER);
  1185. }
  1186. //* TCPStatus - Handle a status indication.
  1187. //
  1188. // This is the TCP status handler, called by IP when a status event
  1189. // occurs. For most of these we do nothing. For certain severe status
  1190. // events we will mark the local address as invalid.
  1191. //
  1192. // Entry: StatusType - Type of status (NET or HW). NET status
  1193. // is usually caused by a received ICMP
  1194. // message. HW status indicate a HW
  1195. // problem.
  1196. // StatusCode - Code identifying IP_STATUS.
  1197. // OrigDest - If this is NET status, the original dest. of
  1198. // DG that triggered it.
  1199. // OrigSrc - " " " " " , the original src.
  1200. // Src - IP address of status originator (could be local
  1201. // or remote).
  1202. // Param - Additional information for status - i.e. the
  1203. // param field of an ICMP message.
  1204. // Data - Data pertaining to status - for NET status, this
  1205. // is the first 8 bytes of the original DG.
  1206. //
  1207. // Returns: Nothing
  1208. //
  1209. void
  1210. TCPStatus(uchar StatusType, IP_STATUS StatusCode, IPAddr OrigDest,
  1211. IPAddr OrigSrc, IPAddr Src, ulong Param, void *Data)
  1212. {
  1213. CTELockHandle TCBHandle;
  1214. TCB *StatusTCB;
  1215. TCPHeader UNALIGNED *Header = (TCPHeader UNALIGNED *) Data;
  1216. SeqNum DropSeq;
  1217. uint index;
  1218. // Handle NET status codes differently from HW status codes.
  1219. if (StatusType == IP_NET_STATUS) {
  1220. // It's a NET code. Find a matching TCB.
  1221. StatusTCB = FindTCB(OrigSrc, OrigDest, Header->tcp_dest,
  1222. Header->tcp_src, &TCBHandle, FALSE, &index);
  1223. if (StatusTCB != NULL) {
  1224. // Found one. Get the lock on it, and continue.
  1225. CTEStructAssert(StatusTCB, tcb);
  1226. // Make sure the TCB is in a state that is interesting.
  1227. if (StatusTCB->tcb_state == TCB_CLOSED ||
  1228. StatusTCB->tcb_state == TCB_TIME_WAIT ||
  1229. CLOSING(StatusTCB)) {
  1230. CTEFreeLock(&StatusTCB->tcb_lock, TCBHandle);
  1231. return;
  1232. }
  1233. switch (StatusCode) {
  1234. // Hard errors - Destination protocol unreachable. We treat
  1235. // these as fatal errors. Close the connection now.
  1236. case IP_DEST_PROT_UNREACHABLE:
  1237. StatusTCB->tcb_error = StatusCode;
  1238. REFERENCE_TCB(StatusTCB);
  1239. TryToCloseTCB(StatusTCB, TCB_CLOSE_UNREACH, TCBHandle);
  1240. RemoveTCBFromConn(StatusTCB);
  1241. NotifyOfDisc(StatusTCB, NULL,
  1242. MapIPError(StatusCode, TDI_DEST_UNREACHABLE),
  1243. NULL);
  1244. CTEGetLock(&StatusTCB->tcb_lock, &TCBHandle);
  1245. DerefTCB(StatusTCB, TCBHandle);
  1246. return;
  1247. break;
  1248. // Soft errors. Save the error in case it time out.
  1249. case IP_DEST_NET_UNREACHABLE:
  1250. case IP_DEST_HOST_UNREACHABLE:
  1251. case IP_DEST_PORT_UNREACHABLE:
  1252. case IP_BAD_ROUTE:
  1253. case IP_TTL_EXPIRED_TRANSIT:
  1254. case IP_TTL_EXPIRED_REASSEM:
  1255. case IP_PARAM_PROBLEM:
  1256. StatusTCB->tcb_error = StatusCode;
  1257. break;
  1258. case IP_PACKET_TOO_BIG:
  1259. // icmp new MTU is in ich_param=1
  1260. Param = net_short(Param >> 16);
  1261. StatusTCB->tcb_error = StatusCode;
  1262. // Fall through mtu change code
  1263. case IP_SPEC_MTU_CHANGE:
  1264. // A TCP datagram has triggered an MTU change. Figure out
  1265. // which connection it is, and update him to retransmit the
  1266. // segment. The Param value is the new MTU. We'll need to
  1267. // retransmit if the new MTU is less than our existing MTU
  1268. // and the sequence of the dropped packet is less than our
  1269. // current send next.
  1270. Param = Param - (sizeof(TCPHeader) +
  1271. StatusTCB->tcb_opt.ioi_optlength + sizeof(IPHeader));
  1272. DropSeq = net_long(Header->tcp_seq);
  1273. if (*(ushort *) & Param <= StatusTCB->tcb_mss &&
  1274. (SEQ_GTE(DropSeq, StatusTCB->tcb_senduna) &&
  1275. SEQ_LT(DropSeq, StatusTCB->tcb_sendnext))) {
  1276. // Need to initiate a retranmsit.
  1277. ResetSendNext(StatusTCB, DropSeq);
  1278. // Set the congestion window to allow only one packet.
  1279. // This may prevent us from sending anything if we
  1280. // didn't just set sendnext to senduna. This is OK,
  1281. // we'll retransmit later, or send when we get an ack.
  1282. StatusTCB->tcb_cwin = Param;
  1283. DelayAction(StatusTCB, NEED_OUTPUT);
  1284. PartitionDelayQProcessing(FALSE);
  1285. }
  1286. StatusTCB->tcb_mss =
  1287. (ushort) MIN(Param, (ulong) StatusTCB->tcb_remmss);
  1288. ASSERT(StatusTCB->tcb_mss > 0);
  1289. ValidateMSS(StatusTCB);
  1290. //
  1291. // Reset the Congestion Window if necessary
  1292. //
  1293. if (StatusTCB->tcb_cwin < StatusTCB->tcb_mss) {
  1294. StatusTCB->tcb_cwin = StatusTCB->tcb_mss;
  1295. //
  1296. // Make sure the slow start threshold is at least
  1297. // 2 segments
  1298. //
  1299. if (StatusTCB->tcb_ssthresh <
  1300. ((uint) StatusTCB->tcb_mss * 2)
  1301. ) {
  1302. StatusTCB->tcb_ssthresh = StatusTCB->tcb_mss * 2;
  1303. }
  1304. }
  1305. break;
  1306. // Source quench. This will cause us to reinitiate our
  1307. // slow start by resetting our congestion window and
  1308. // adjusting our slow start threshold.
  1309. case IP_SOURCE_QUENCH:
  1310. //
  1311. // Code is removed, since source quench messages can be
  1312. // misused to cause DoS attack.
  1313. //
  1314. break;
  1315. default:
  1316. ASSERT(0);
  1317. break;
  1318. }
  1319. CTEFreeLock(&StatusTCB->tcb_lock, TCBHandle);
  1320. } else {
  1321. // Couldn't find a matching TCB. Just free the lock and return.
  1322. }
  1323. } else if (StatusType == IP_RECONFIG_STATUS) {
  1324. if (StatusCode == IP_RECONFIG_SECFLTR) {
  1325. ControlSecurityFiltering(Param);
  1326. }
  1327. } else {
  1328. uint NewMTU;
  1329. // 'Hardware' or 'global' status. Figure out what to do.
  1330. switch (StatusCode) {
  1331. case IP_ADDR_DELETED:
  1332. // Local address has gone away. OrigDest is the IPAddr which is
  1333. // gone.
  1334. //
  1335. // Delete any security filters associated with this address
  1336. //
  1337. DeleteProtocolSecurityFilter(OrigDest, PROTOCOL_TCP);
  1338. break;
  1339. case IP_ADDR_ADDED:
  1340. //
  1341. // An address has materialized. OrigDest identifies the address.
  1342. // Data is a handle to the IP configuration information for the
  1343. // interface on which the address is instantiated.
  1344. //
  1345. AddProtocolSecurityFilter(OrigDest, PROTOCOL_TCP,
  1346. (NDIS_HANDLE) Data);
  1347. break;
  1348. case IP_MTU_CHANGE:
  1349. NewMTU = Param - sizeof(TCPHeader);
  1350. TCBWalk(SetTCBMTU, &OrigDest, &OrigSrc, &NewMTU);
  1351. break;
  1352. default:
  1353. ASSERT(0);
  1354. break;
  1355. }
  1356. }
  1357. }
  1358. //* FillTCPHeader - Fill the TCP header in.
  1359. //
  1360. // A utility routine to fill in the TCP header.
  1361. //
  1362. // Input: SendTCB - TCB to fill from.
  1363. // Header - Header to fill into.
  1364. //
  1365. // Returns: Nothing.
  1366. //
  1367. void
  1368. FillTCPHeader(TCB * SendTCB, TCPHeader * Header)
  1369. {
  1370. ushort S;
  1371. ulong L;
  1372. Header->tcp_src = SendTCB->tcb_sport;
  1373. Header->tcp_dest = SendTCB->tcb_dport;
  1374. L = SendTCB->tcb_sendnext;
  1375. Header->tcp_seq = net_long(L);
  1376. L = SendTCB->tcb_rcvnext;
  1377. Header->tcp_ack = net_long(L);
  1378. Header->tcp_flags = 0x1050;
  1379. Header->tcp_xsum = 0;
  1380. Header->tcp_urgent = 0;
  1381. if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
  1382. ulong *ts_opt;
  1383. ts_opt = (ulong *)((uchar *) Header + 20);
  1384. //ts_opt = ts_opt + sizeof(TCPHeader);
  1385. *ts_opt++ = net_long(0x0101080A);
  1386. *ts_opt++ = net_long(TCPTime);
  1387. *ts_opt = net_long(SendTCB->tcb_tsrecent);
  1388. // Now the header is 32 bytes!!
  1389. Header->tcp_flags = 0x1080;
  1390. }
  1391. S = (ushort) (RcvWin(SendTCB) >> SendTCB->tcb_rcvwinscale);
  1392. Header->tcp_window = net_short(S);
  1393. }
  1394. //* ClassifyPacket - Classifies packets for GPC flow.
  1395. //
  1396. //
  1397. // Input: SendTCB - TCB of data/control packet to classify.
  1398. //
  1399. // Returns: Nothing.
  1400. //
  1401. void
  1402. ClassifyPacket(
  1403. TCB *SendTCB
  1404. )
  1405. {
  1406. #if GPC
  1407. //
  1408. // clear the precedence bits and get ready to be set
  1409. // according to the service type
  1410. //
  1411. if (DisableUserTOSSetting)
  1412. SendTCB->tcb_opt.ioi_tos &= TOS_MASK;
  1413. if (SendTCB->tcb_rce && GPCcfInfo) {
  1414. struct QosCfTransportInfo TransportInfo = {0, 0};
  1415. GPC_STATUS status = STATUS_SUCCESS;
  1416. GPC_IP_PATTERN Pattern;
  1417. IF_TCPDBG(TCP_DEBUG_GPC)
  1418. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSend: Classifying packet TCP %x\n", SendTCB));
  1419. Pattern.SrcAddr = SendTCB->tcb_saddr;
  1420. Pattern.DstAddr = SendTCB->tcb_daddr;
  1421. Pattern.ProtocolId = PROTOCOL_TCP;
  1422. Pattern.gpcSrcPort = SendTCB->tcb_sport;
  1423. Pattern.gpcDstPort = SendTCB->tcb_dport;
  1424. if (SendTCB->tcb_GPCCachedRTE != (void *)SendTCB->tcb_rce->rce_rte) {
  1425. //
  1426. // first time we use this RTE, or it has been changed
  1427. // since the last send
  1428. //
  1429. if (GetIFAndLink(SendTCB->tcb_rce, &SendTCB->tcb_GPCCachedIF,
  1430. (IPAddr *) & SendTCB->tcb_GPCCachedLink) ==
  1431. STATUS_SUCCESS) {
  1432. SendTCB->tcb_GPCCachedRTE = (void *)SendTCB->tcb_rce->rce_rte;
  1433. }
  1434. //
  1435. // invaludate the classification handle
  1436. //
  1437. SendTCB->tcb_opt.ioi_GPCHandle = 0;
  1438. }
  1439. Pattern.InterfaceId.InterfaceId = SendTCB->tcb_GPCCachedIF;
  1440. Pattern.InterfaceId.LinkId = SendTCB->tcb_GPCCachedLink;
  1441. IF_TCPDBG(TCP_DEBUG_GPC)
  1442. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSend: IF=%x Link=%x\n",
  1443. Pattern.InterfaceId.InterfaceId,
  1444. Pattern.InterfaceId.LinkId));
  1445. if (!SendTCB->tcb_opt.ioi_GPCHandle) {
  1446. IF_TCPDBG(TCP_DEBUG_GPC)
  1447. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPsend: Classification Handle is NULL, getting one now.\n"));
  1448. status =
  1449. GpcEntries.GpcClassifyPatternHandler(
  1450. (GPC_HANDLE)hGpcClient[GPC_CF_QOS],
  1451. GPC_PROTOCOL_TEMPLATE_IP,
  1452. &Pattern,
  1453. NULL, // context
  1454. (PCLASSIFICATION_HANDLE)&SendTCB->tcb_opt.ioi_GPCHandle,
  1455. 0,
  1456. NULL,
  1457. FALSE);
  1458. }
  1459. // Only if QOS patterns exist, we get the TOS bits out.
  1460. if (NT_SUCCESS(status) && GpcCfCounts[GPC_CF_QOS]) {
  1461. status =
  1462. GpcEntries.GpcGetUlongFromCfInfoHandler(
  1463. (GPC_HANDLE) hGpcClient[GPC_CF_QOS],
  1464. SendTCB->tcb_opt.ioi_GPCHandle,
  1465. FIELD_OFFSET(CF_INFO_QOS, TransportInformation),
  1466. (PULONG)&TransportInfo);
  1467. // It is likely that the pattern has gone by now
  1468. // and the handle that we are caching is INVALID.
  1469. // We need to pull up a new handle and get the
  1470. // TOS bit again.
  1471. if (STATUS_INVALID_HANDLE == status) {
  1472. IF_TCPDBG(TCP_DEBUG_GPC)
  1473. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPsend: Classification Handle is NULL, "
  1474. "getting one now.\n"));
  1475. SendTCB->tcb_opt.ioi_GPCHandle = 0;
  1476. status =
  1477. GpcEntries.GpcClassifyPatternHandler(
  1478. (GPC_HANDLE) hGpcClient[GPC_CF_QOS],
  1479. GPC_PROTOCOL_TEMPLATE_IP,
  1480. &Pattern,
  1481. NULL, // context
  1482. (PCLASSIFICATION_HANDLE)&SendTCB->tcb_opt.ioi_GPCHandle,
  1483. 0,
  1484. NULL,
  1485. FALSE);
  1486. //
  1487. // Only if QOS patterns exist, we get the TOS bits out.
  1488. //
  1489. if (NT_SUCCESS(status)) {
  1490. status =
  1491. GpcEntries.GpcGetUlongFromCfInfoHandler(
  1492. (GPC_HANDLE) hGpcClient[GPC_CF_QOS],
  1493. SendTCB->tcb_opt.ioi_GPCHandle,
  1494. FIELD_OFFSET(CF_INFO_QOS, TransportInformation),
  1495. (PULONG)&TransportInfo);
  1496. }
  1497. }
  1498. //
  1499. // Perhaps something needs to be done if GPC_CF_IPSEC has non-zero patterns.
  1500. //
  1501. //
  1502. // Set the TOS bit now.
  1503. //
  1504. IF_TCPDBG(TCP_DEBUG_GPC)
  1505. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPsend: ServiceType(%d)=%d\n",
  1506. FIELD_OFFSET(CF_INFO_QOS, TransportInformation)));
  1507. if (status == STATUS_SUCCESS) {
  1508. //
  1509. // Get the TOS value and the types of allowed offloads.
  1510. //
  1511. SendTCB->tcb_opt.ioi_tos |= TransportInfo.ToSValue;
  1512. SendTCB->tcb_allowedoffloads = (USHORT)TransportInfo.AllowedOffloads;
  1513. //
  1514. // We are guaranteed for now that the other kind of offloads are
  1515. // never disabled, and hence, we won't check them on a per
  1516. // connection basis.
  1517. //
  1518. ASSERT((TransportInfo.AllowedOffloads | TCP_LARGE_SEND_OFFLOAD |
  1519. TCP_LARGE_SEND_TCPOPT_OFFLOAD |
  1520. TCP_LARGE_SEND_IPOPT_OFFLOAD) == TCP_IP_OFFLOAD_TYPES);
  1521. }
  1522. IF_TCPDBG(TCP_DEBUG_GPC)
  1523. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPsend: TOS set to 0x%x\n",
  1524. SendTCB->tcb_opt.ioi_tos));
  1525. }
  1526. }
  1527. #endif
  1528. }
  1529. BOOLEAN
  1530. ProcessSend(TCB *SendTCB, SendCmpltContext *SCC, uint *pSendLength, uint AmtUnsent,
  1531. TCPHeader *Header, int SendWin, PNDIS_BUFFER CurrentBuffer)
  1532. {
  1533. TCPSendReq *CurSend = SCC->scc_firstsend;
  1534. long Result;
  1535. uint AmountLeft = *pSendLength;
  1536. ulong PrevFlags;
  1537. Queue *Next;
  1538. SeqNum OldSeq;
  1539. if (*pSendLength != 0) {
  1540. do {
  1541. BOOLEAN DirectSend = FALSE;
  1542. ASSERT(CurSend->tsr_refcnt > 0);
  1543. Result = CTEInterlockedIncrementLong(&(CurSend->tsr_refcnt));
  1544. ASSERT(Result > 0);
  1545. SCC->scc_count++;
  1546. if (SendTCB->tcb_sendofs == 0 &&
  1547. (SendTCB->tcb_sendsize <= AmountLeft) &&
  1548. (SCC->scc_tbufcount == 0) &&
  1549. (CurSend->tsr_lastbuf == NULL)) {
  1550. ulong length = 0;
  1551. PNDIS_BUFFER tmp = SendTCB->tcb_sendbuf;
  1552. while (tmp) {
  1553. length += NdisBufferLength(tmp);
  1554. tmp = NDIS_BUFFER_LINKAGE(tmp);
  1555. }
  1556. // If the requested length is
  1557. // more than in this mdl chain
  1558. // we can use fast path
  1559. if (AmountLeft >= length) {
  1560. DirectSend = TRUE;
  1561. }
  1562. }
  1563. if (DirectSend) {
  1564. NDIS_BUFFER_LINKAGE(CurrentBuffer) = SendTCB->tcb_sendbuf;
  1565. do {
  1566. SCC->scc_ubufcount++;
  1567. CurrentBuffer =
  1568. NDIS_BUFFER_LINKAGE(CurrentBuffer);
  1569. } while (NDIS_BUFFER_LINKAGE(CurrentBuffer) != NULL);
  1570. CurSend->tsr_lastbuf = CurrentBuffer;
  1571. AmountLeft -= SendTCB->tcb_sendsize;
  1572. SendTCB->tcb_sendsize = 0;
  1573. } else {
  1574. uint AmountToDup;
  1575. PNDIS_BUFFER NewBuf, Buf;
  1576. uint Offset;
  1577. NDIS_STATUS NStatus;
  1578. uint Length;
  1579. // Either the current send has more data than
  1580. // or the offset is not zero.
  1581. // In either case we'll need to loop
  1582. // through the current send, allocating buffers.
  1583. Buf = SendTCB->tcb_sendbuf;
  1584. Offset = SendTCB->tcb_sendofs;
  1585. do {
  1586. ASSERT(Buf != NULL);
  1587. Length = NdisBufferLength(Buf);
  1588. ASSERT((Offset < Length) ||
  1589. (Offset == 0 && Length == 0));
  1590. // Adjust the length for the offset into
  1591. // this buffer.
  1592. Length -= Offset;
  1593. AmountToDup = MIN(AmountLeft, Length);
  1594. NdisCopyBuffer(&NStatus, &NewBuf, TCPSendBufferPool, Buf,
  1595. Offset, AmountToDup);
  1596. if (NStatus == NDIS_STATUS_SUCCESS) {
  1597. SCC->scc_tbufcount++;
  1598. NDIS_BUFFER_LINKAGE(CurrentBuffer) = NewBuf;
  1599. CurrentBuffer = NewBuf;
  1600. if (AmountToDup >= Length) {
  1601. // Exhausted this buffer.
  1602. Buf = NDIS_BUFFER_LINKAGE(Buf);
  1603. Offset = 0;
  1604. } else {
  1605. Offset += AmountToDup;
  1606. ASSERT(Offset < NdisBufferLength(Buf));
  1607. }
  1608. SendTCB->tcb_sendsize -= AmountToDup;
  1609. AmountLeft -= AmountToDup;
  1610. } else {
  1611. // Couldn't allocate a buffer. If
  1612. // the packet is already partly built,
  1613. // send what we've got, otherwise
  1614. // bail out.
  1615. if (SCC->scc_tbufcount == 0 &&
  1616. SCC->scc_ubufcount == 0) {
  1617. return FALSE;
  1618. }
  1619. *pSendLength -= AmountLeft;
  1620. AmountLeft = 0;
  1621. }
  1622. } while (AmountLeft && SendTCB->tcb_sendsize);
  1623. SendTCB->tcb_sendbuf = Buf;
  1624. SendTCB->tcb_sendofs = Offset;
  1625. }
  1626. if (CurSend->tsr_flags & TSR_FLAG_URG) {
  1627. ushort UP;
  1628. // This send is urgent data. We need to figure
  1629. // out what the urgent data pointer should be.
  1630. // We know sendnext is the starting sequence
  1631. // number of the frame, and that at the top of
  1632. // this do loop sendnext identified a byte in
  1633. // the CurSend at that time. We advanced CurSend
  1634. // at the same rate we've decremented
  1635. // AmountLeft (AmountToSend - AmountLeft ==
  1636. // AmountBuilt), so sendnext +
  1637. // (AmountToSend - AmountLeft) identifies a byte
  1638. // in the current value of CurSend, and that
  1639. // quantity plus tcb_sendsize is the sequence
  1640. // number one beyond the current send.
  1641. UP =
  1642. (ushort) (*pSendLength - AmountLeft) +
  1643. (ushort) SendTCB->tcb_sendsize -
  1644. ((SendTCB->tcb_flags & BSD_URGENT) ? 0 : 1);
  1645. Header->tcp_urgent = net_short(UP);
  1646. Header->tcp_flags |= TCP_FLAG_URG;
  1647. }
  1648. if (SendTCB->tcb_sendsize == 0) {
  1649. // We've exhausted this send. Set the PUSH bit.
  1650. Header->tcp_flags |= TCP_FLAG_PUSH;
  1651. PrevFlags = CurSend->tsr_flags;
  1652. Next = QNEXT(&CurSend->tsr_req.tr_q);
  1653. if (Next != QEND(&SendTCB->tcb_sendq)) {
  1654. CurSend = STRUCT_OF(TCPSendReq,
  1655. QSTRUCT(TCPReq, Next,
  1656. tr_q), tsr_req);
  1657. CTEStructAssert(CurSend, tsr);
  1658. SendTCB->tcb_sendsize =
  1659. CurSend->tsr_unasize;
  1660. SendTCB->tcb_sendofs = CurSend->tsr_offset;
  1661. SendTCB->tcb_sendbuf = CurSend->tsr_buffer;
  1662. SendTCB->tcb_cursend = CurSend;
  1663. // Check the urgent flags. We can't combine
  1664. // new urgent data on to the end of old
  1665. // non-urgent data.
  1666. if ((PrevFlags & TSR_FLAG_URG) && !
  1667. (CurSend->tsr_flags & TSR_FLAG_URG))
  1668. break;
  1669. } else {
  1670. ASSERT(AmountLeft == 0);
  1671. SendTCB->tcb_cursend = NULL;
  1672. SendTCB->tcb_sendbuf = NULL;
  1673. }
  1674. }
  1675. } while (AmountLeft != 0);
  1676. }
  1677. // Update the sequence numbers, and start a RTT
  1678. // measurement if needed.
  1679. // Adjust for what we're really going to send.
  1680. *pSendLength -= AmountLeft;
  1681. OldSeq = SendTCB->tcb_sendnext;
  1682. SendTCB->tcb_sendnext += *pSendLength;
  1683. if (SEQ_EQ(OldSeq, SendTCB->tcb_sendmax)) {
  1684. // We're sending entirely new data.
  1685. // We can't advance sendmax once FIN_SENT is set.
  1686. ASSERT(!(SendTCB->tcb_flags & FIN_SENT));
  1687. SendTCB->tcb_sendmax = SendTCB->tcb_sendnext;
  1688. // We've advanced sendmax, so we must be sending
  1689. // some new data, so bump the outsegs counter.
  1690. TCPSIncrementOutSegCount();
  1691. if (SendTCB->tcb_rtt == 0) {
  1692. // No RTT running, so start one.
  1693. SendTCB->tcb_rtt = TCPTime;
  1694. SendTCB->tcb_rttseq = OldSeq;
  1695. }
  1696. } else {
  1697. // We have at least some retransmission.
  1698. if ((SendTCB->tcb_sendmax - OldSeq) > 1) {
  1699. TStats.ts_retranssegs++;
  1700. }
  1701. if (SEQ_GT(SendTCB->tcb_sendnext,
  1702. SendTCB->tcb_sendmax)) {
  1703. // But we also have some new data, so check the rtt stuff.
  1704. TCPSIncrementOutSegCount();
  1705. ASSERT(!(SendTCB->tcb_flags & FIN_SENT));
  1706. SendTCB->tcb_sendmax = SendTCB->tcb_sendnext;
  1707. if (SendTCB->tcb_rtt == 0) {
  1708. // No RTT running, so start one.
  1709. SendTCB->tcb_rtt = TCPTime;
  1710. SendTCB->tcb_rttseq = OldSeq;
  1711. }
  1712. }
  1713. }
  1714. // We've built the frame entirely. If we've send
  1715. // everything we have and there is a FIN pending,
  1716. // OR it in.
  1717. if (AmtUnsent == *pSendLength) {
  1718. if (SendTCB->tcb_flags & FIN_NEEDED) {
  1719. ASSERT(!(SendTCB->tcb_flags & FIN_SENT) ||
  1720. (SendTCB->tcb_sendnext ==
  1721. (SendTCB->tcb_sendmax - 1)));
  1722. // See if we still have room in the window for a FIN.
  1723. if (SendWin > (int)*pSendLength) {
  1724. Header->tcp_flags |= TCP_FLAG_FIN;
  1725. SendTCB->tcb_sendnext++;
  1726. SendTCB->tcb_sendmax =
  1727. SendTCB->tcb_sendnext;
  1728. SendTCB->tcb_flags |=
  1729. (FIN_SENT | FIN_OUTSTANDING);
  1730. SendTCB->tcb_flags &= ~FIN_NEEDED;
  1731. }
  1732. }
  1733. }
  1734. return TRUE;
  1735. }
  1736. //* TCPSend - Send data from a TCP connection.
  1737. //
  1738. // This is the main 'send data' routine. We go into a loop, trying
  1739. // to send data until we can't for some reason. First we compute
  1740. // the useable window, use it to figure the amount we could send. If
  1741. // the amount we could send meets certain criteria we'll build a frame
  1742. // and send it, after setting any appropriate control bits. We assume
  1743. // the caller has put a reference on the TCB.
  1744. //
  1745. // Input: SendTCB - TCB to be sent from.
  1746. // TCBHandle - Lock handle for TCB.
  1747. //
  1748. // Returns: Nothing.
  1749. //
  1750. void
  1751. TCPSend(TCB * SendTCB, CTELockHandle TCBHandle)
  1752. {
  1753. int SendWin; // Useable send window.
  1754. uint AmountToSend; // Amount to send this time.
  1755. uint AmountLeft;
  1756. TCPHeader *Header; // TCP header for a send.
  1757. PNDIS_BUFFER FirstBuffer, CurrentBuffer;
  1758. TCPSendReq *CurSend;
  1759. SendCmpltContext *SCC;
  1760. SeqNum OldSeq;
  1761. IP_STATUS SendStatus;
  1762. uint AmtOutstanding, AmtUnsent;
  1763. int ForceWin; // Window we're force to use.
  1764. BOOLEAN FullSegment;
  1765. BOOLEAN MoreToSend = FALSE;
  1766. uint SegmentsSent = 0;
  1767. BOOLEAN LargeSendOffload = FALSE;
  1768. BOOLEAN LargeSendFailed = FALSE;
  1769. uint MSS;
  1770. uint LargeSend, SentBytes;
  1771. void *Irp;
  1772. CTEStructAssert(SendTCB, tcb);
  1773. ASSERT(SendTCB->tcb_refcnt != 0);
  1774. ASSERT(*(int *)&SendTCB->tcb_sendwin >= 0);
  1775. ASSERT(*(int *)&SendTCB->tcb_cwin >= SendTCB->tcb_mss);
  1776. ASSERT(!(SendTCB->tcb_flags & FIN_OUTSTANDING) ||
  1777. (SendTCB->tcb_sendnext == SendTCB->tcb_sendmax));
  1778. if (!(SendTCB->tcb_flags & IN_TCP_SEND) &&
  1779. !(SendTCB->tcb_fastchk & TCP_FLAG_IN_RCV)) {
  1780. SendTCB->tcb_flags |= IN_TCP_SEND;
  1781. // We'll continue this loop until we send a FIN, or we break out
  1782. // internally for some other reason.
  1783. while (!(SendTCB->tcb_flags & FIN_OUTSTANDING)) {
  1784. CheckTCBSends(SendTCB);
  1785. SegmentsSent++;
  1786. if (SegmentsSent > MaxSendSegments) {
  1787. // We are throttled by max segments that can be sent in
  1788. // this loop. Comeback later
  1789. MoreToSend = TRUE;
  1790. break;
  1791. }
  1792. AmtOutstanding = (uint) (SendTCB->tcb_sendnext -
  1793. SendTCB->tcb_senduna);
  1794. AmtUnsent = SendTCB->tcb_unacked - AmtOutstanding;
  1795. ASSERT(*(int *)&AmtUnsent >= 0);
  1796. SendWin = (int)(MIN(SendTCB->tcb_sendwin, SendTCB->tcb_cwin) -
  1797. AmtOutstanding);
  1798. // if this send is after the fast recovery
  1799. // and sendwin is zero because of amt outstanding
  1800. // then, at least force 1 segment to prevent delayed
  1801. // ack timeouts from the remote
  1802. if (SendTCB->tcb_force) {
  1803. SendTCB->tcb_force = 0;
  1804. if (SendWin < SendTCB->tcb_mss) {
  1805. SendWin = SendTCB->tcb_mss;
  1806. }
  1807. }
  1808. // Since the window could have shrank, need to get it to zero at
  1809. // least.
  1810. ForceWin = (int)((SendTCB->tcb_flags & FORCE_OUTPUT) >>
  1811. FORCE_OUT_SHIFT);
  1812. SendWin = MAX(SendWin, ForceWin);
  1813. LargeSend = MIN((uint) SendWin, AmtUnsent);
  1814. LargeSend = MIN(LargeSend, SendTCB->tcb_mss * MaxSendSegments);
  1815. AmountToSend =
  1816. MIN(MIN((uint) SendWin, AmtUnsent), SendTCB->tcb_mss);
  1817. ASSERT(SendTCB->tcb_mss > 0);
  1818. // Time stamp option addition might force us to cut the data
  1819. // to be sent by 12 bytes.
  1820. FullSegment = FALSE;
  1821. if ((SendTCB->tcb_tcpopts & TCP_FLAG_TS) &&
  1822. (AmountToSend + ALIGNED_TS_OPT_SIZE >= SendTCB->tcb_mss)) {
  1823. AmountToSend = SendTCB->tcb_mss - ALIGNED_TS_OPT_SIZE;
  1824. FullSegment = TRUE;
  1825. } else {
  1826. if (AmountToSend == SendTCB->tcb_mss)
  1827. FullSegment = TRUE;
  1828. }
  1829. // We will send a segment if
  1830. //
  1831. // 1. The segment size == mss
  1832. // 2. This is the only segment to be sent
  1833. // 3. FIN is set and this is the last segment
  1834. // 4. FORCE_OUTPUT is set
  1835. // 5. Amount to be sent is >= MSS/2
  1836. if (FullSegment ||
  1837. (AmountToSend != 0 && AmountToSend == AmtUnsent) ||
  1838. (SendWin != 0 &&
  1839. (((SendTCB->tcb_flags & FIN_NEEDED) &&
  1840. (AmtUnsent <= SendTCB->tcb_mss)) ||
  1841. (SendTCB->tcb_flags & FORCE_OUTPUT) ||
  1842. AmountToSend >= (SendTCB->tcb_maxwin / 2)))) {
  1843. //
  1844. // Set MSS first.
  1845. //
  1846. if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
  1847. MSS = SendTCB->tcb_mss - ALIGNED_TS_OPT_SIZE;
  1848. } else {
  1849. MSS = SendTCB->tcb_mss;
  1850. }
  1851. // It's OK to send something. Try to get a header buffer now.
  1852. FirstBuffer = GetTCPHeaderAtDpcLevel(&Header);
  1853. if (FirstBuffer != NULL) {
  1854. // Got a header buffer. Loop through the sends on the TCB,
  1855. // building a frame.
  1856. CurrentBuffer = FirstBuffer;
  1857. CurSend = SendTCB->tcb_cursend;
  1858. Header =
  1859. (TCPHeader *)((PUCHAR)Header + LocalNetInfo.ipi_hsize);
  1860. // allow room for filling time stamp options (12 bytes)
  1861. if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
  1862. NdisAdjustBufferLength(FirstBuffer,
  1863. sizeof(TCPHeader) + ALIGNED_TS_OPT_SIZE);
  1864. SCC = (SendCmpltContext *) (Header + 1);
  1865. SCC = (SendCmpltContext *) ((uchar *) SCC + ALIGNED_TS_OPT_SIZE);
  1866. } else {
  1867. SCC = (SendCmpltContext *) (Header + 1);
  1868. }
  1869. SCC = ALIGN_UP_POINTER(SCC, PVOID);
  1870. #if DBG
  1871. SCC->scc_sig = scc_signature;
  1872. #endif
  1873. FillTCPHeader(SendTCB, Header);
  1874. SCC->scc_ubufcount = 0;
  1875. SCC->scc_tbufcount = 0;
  1876. SCC->scc_count = 0;
  1877. SCC->scc_LargeSend = 0;
  1878. // Check if RCE has large send capability and, if so,
  1879. // attempt to offload segmentation to the hardware.
  1880. // * only offload if there is more than 1 segment's worth
  1881. // of data.
  1882. // * only offload if the number of segments is greater than
  1883. // the minimum number of segments the adapter is willing
  1884. // to offload.
  1885. // * only offload if it is allowed by all the entities of
  1886. // known classification families.
  1887. // * ( i.e. if TCP or IP options need to be
  1888. // offloaded, we only offload if the adapter supports it)
  1889. //
  1890. if (!DisableLargeSendOffload &&
  1891. SendTCB->tcb_rce &&
  1892. (SendTCB->tcb_rce->rce_OffloadFlags &
  1893. TCP_LARGE_SEND_OFFLOAD) &&
  1894. (SendTCB->tcb_allowedoffloads &
  1895. TCP_LARGE_SEND_OFFLOAD) &&
  1896. (!(SendTCB->tcb_tcpopts & TCP_FLAG_TS) ||
  1897. (SendTCB->tcb_rce->rce_OffloadFlags &
  1898. TCP_LARGE_SEND_TCPOPT_OFFLOAD)) &&
  1899. (!SendTCB->tcb_opt.ioi_options ||
  1900. (SendTCB->tcb_rce->rce_OffloadFlags &
  1901. TCP_LARGE_SEND_IPOPT_OFFLOAD)) &&
  1902. !LargeSendFailed &&
  1903. (MSS < LargeSend) &&
  1904. (CurSend && (CurSend->tsr_lastbuf == NULL)) && !(CurSend->tsr_flags & TSR_FLAG_URG)) {
  1905. uint PartialSegment;
  1906. LargeSendOffload = TRUE;
  1907. LargeSend =
  1908. MIN(SendTCB->tcb_rce->rce_TcpLargeSend.MaxOffLoadSize,
  1909. LargeSend);
  1910. //
  1911. // Adjust LargeSend to make LSO path
  1912. // conform sender side silly window avoidance:
  1913. // 1) it is multiple of MSS
  1914. // 2) We are sending out everything we have
  1915. // 3) FORCE_OUTPUT is set
  1916. // 4) Amount to be sent is >= maximum window size /2
  1917. //
  1918. PartialSegment = LargeSend % MSS;
  1919. if ((PartialSegment != 0) &&
  1920. (LargeSend != AmtUnsent) &&
  1921. (!(SendTCB->tcb_flags & FORCE_OUTPUT)) &&
  1922. (PartialSegment < (SendTCB->tcb_maxwin / 2))) {
  1923. LargeSend -= PartialSegment;
  1924. }
  1925. //
  1926. // Offload only if the segments we have is greater than
  1927. // the minimum segment requirement of the NIC.
  1928. //
  1929. if (SendTCB->tcb_rce->rce_TcpLargeSend.MinSegmentCount >
  1930. (LargeSend + MSS - 1) / MSS ) {
  1931. LargeSendOffload = FALSE;
  1932. }
  1933. //
  1934. // LargeSend can not be zero.
  1935. //
  1936. if (LargeSend == 0) {
  1937. LargeSendOffload = FALSE;
  1938. }
  1939. } else {
  1940. LargeSendOffload = FALSE;
  1941. }
  1942. if (LargeSendOffload) {
  1943. IF_TCPDBG(TCP_DEBUG_OFFLOAD) {
  1944. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSend: tcb %x offload %d bytes at "
  1945. "seq %u ack %u win %u\n",
  1946. SendTCB, LargeSend, SendTCB->tcb_sendnext,
  1947. SendTCB->tcb_rcvnext, SendWin));
  1948. }
  1949. OldSeq = SendTCB->tcb_sendnext;
  1950. CTEStructAssert(CurSend, tsr);
  1951. SCC->scc_firstsend = CurSend;
  1952. if (!ProcessSend(SendTCB, SCC, &LargeSend, AmtUnsent, Header,
  1953. SendWin, CurrentBuffer)) {
  1954. goto error_oor1;
  1955. }
  1956. {
  1957. uint PHXsum = SendTCB->tcb_phxsum;
  1958. PHXsum = (((PHXsum << 16) | (PHXsum >> 16)) +
  1959. PHXsum) >> 16;
  1960. Header->tcp_xsum = (ushort) PHXsum;
  1961. }
  1962. SCC->scc_SendSize = LargeSend;
  1963. SCC->scc_ByteSent = 0;
  1964. SCC->scc_LargeSend = SendTCB;
  1965. REFERENCE_TCB(SendTCB);
  1966. #if DBG
  1967. SendTCB->tcb_LargeSend++;
  1968. #endif
  1969. SendTCB->tcb_rcvdsegs = 0;
  1970. if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
  1971. LargeSend +=
  1972. sizeof(TCPHeader) + ALIGNED_TS_OPT_SIZE;
  1973. } else {
  1974. LargeSend += sizeof(TCPHeader);
  1975. }
  1976. IF_TCPDBG(TCP_DEBUG_OFFLOAD) {
  1977. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSend: tcb %x large-send %d seq %u\n",
  1978. SendTCB, LargeSend, OldSeq));
  1979. }
  1980. ClassifyPacket(SendTCB);
  1981. CTEFreeLock(&SendTCB->tcb_lock, TCBHandle);
  1982. SendStatus =
  1983. (*LocalNetInfo.ipi_largexmit)(TCPProtInfo, SCC,
  1984. FirstBuffer,
  1985. LargeSend,
  1986. SendTCB->tcb_daddr,
  1987. SendTCB->tcb_saddr,
  1988. &SendTCB->tcb_opt,
  1989. SendTCB->tcb_rce,
  1990. PROTOCOL_TCP,
  1991. &SentBytes,
  1992. MSS);
  1993. SendTCB->tcb_error = SendStatus;
  1994. if (SendStatus != IP_PENDING) {
  1995. // Let TCPSendComplete hanlde partial sends
  1996. SCC->scc_ByteSent = SentBytes;
  1997. TCPSendComplete(SCC, FirstBuffer, IP_SUCCESS);
  1998. }
  1999. CTEGetLock(&SendTCB->tcb_lock, &TCBHandle);
  2000. if (SendStatus == IP_GENERAL_FAILURE) {
  2001. if (SEQ_GTE(OldSeq, SendTCB->tcb_senduna) &&
  2002. SEQ_LT(OldSeq, SendTCB->tcb_sendnext)) {
  2003. ResetSendNext(SendTCB, OldSeq);
  2004. }
  2005. LargeSendFailed = TRUE;
  2006. continue;
  2007. }
  2008. if (SendStatus == IP_PACKET_TOO_BIG) {
  2009. SeqNum NewSeq = OldSeq + SentBytes;
  2010. //Not everything got sent.
  2011. //Adjust for what is sent
  2012. if (SEQ_GTE(NewSeq, SendTCB->tcb_senduna) &&
  2013. SEQ_LT(NewSeq, SendTCB->tcb_sendnext)) {
  2014. ResetSendNext(SendTCB, NewSeq);
  2015. }
  2016. }
  2017. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2018. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
  2019. }
  2020. SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT |
  2021. FORCE_OUTPUT | SEND_AFTER_RCV);
  2022. DerefTCB(SendTCB, TCBHandle);
  2023. return;
  2024. }
  2025. // Normal path
  2026. AmountLeft = AmountToSend;
  2027. if (AmountToSend != 0) {
  2028. CTEStructAssert(CurSend, tsr);
  2029. SCC->scc_firstsend = CurSend;
  2030. } else {
  2031. // We're in the loop, but AmountToSend is 0. This
  2032. // should happen only when we're sending a FIN. Check
  2033. // this, and return if it's not true.
  2034. ASSERT(AmtUnsent == 0);
  2035. if (!(SendTCB->tcb_flags & FIN_NEEDED)) {
  2036. FreeTCPHeader(FirstBuffer);
  2037. break;
  2038. }
  2039. SCC->scc_firstsend = NULL;
  2040. NDIS_BUFFER_LINKAGE(FirstBuffer) = NULL;
  2041. }
  2042. OldSeq = SendTCB->tcb_sendnext;
  2043. if (!ProcessSend(SendTCB, SCC, &AmountToSend, AmtUnsent, Header,
  2044. SendWin, CurrentBuffer)) {
  2045. goto error_oor1;
  2046. }
  2047. AmountToSend += sizeof(TCPHeader);
  2048. SendTCB->tcb_flags &= ~(NEED_ACK | ACK_DELAYED |
  2049. FORCE_OUTPUT);
  2050. STOP_TCB_TIMER_R(SendTCB, DELACK_TIMER);
  2051. STOP_TCB_TIMER_R(SendTCB, SWS_TIMER);
  2052. SendTCB->tcb_rcvdsegs = 0;
  2053. if ( (SendTCB->tcb_flags & KEEPALIVE) && ( SendTCB->tcb_conn != NULL) )
  2054. START_TCB_TIMER_R(SendTCB, KA_TIMER, SendTCB->tcb_conn->tc_tcbkatime);
  2055. SendTCB->tcb_kacount = 0;
  2056. // We're all set. Xsum it and send it.
  2057. ClassifyPacket(SendTCB);
  2058. // Account for time stamp options
  2059. if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
  2060. if (SendTCB->tcb_rce &&
  2061. (SendTCB->tcb_rce->rce_OffloadFlags &
  2062. TCP_XMT_CHECKSUM_OFFLOAD) &&
  2063. (SendTCB->tcb_rce->rce_OffloadFlags &
  2064. TCP_CHECKSUM_OPT_OFFLOAD)) {
  2065. uint PHXsum =
  2066. SendTCB->tcb_phxsum +
  2067. (uint)net_short(AmountToSend + ALIGNED_TS_OPT_SIZE);
  2068. PHXsum = (((PHXsum << 16) | (PHXsum >> 16)) +
  2069. PHXsum) >> 16;
  2070. Header->tcp_xsum = (ushort) PHXsum;
  2071. SendTCB->tcb_opt.ioi_TcpChksum = 1;
  2072. #if DBG
  2073. DbgTcpSendHwChksumCount++;
  2074. #endif
  2075. } else {
  2076. Header->tcp_xsum =
  2077. ~XsumSendChain(
  2078. SendTCB->tcb_phxsum +
  2079. (uint)net_short(AmountToSend + ALIGNED_TS_OPT_SIZE),
  2080. FirstBuffer);
  2081. SendTCB->tcb_opt.ioi_TcpChksum = 0;
  2082. }
  2083. CTEFreeLock(&SendTCB->tcb_lock, TCBHandle);
  2084. Irp = NULL;
  2085. if (SCC->scc_firstsend) {
  2086. Irp = SCC->scc_firstsend->tsr_req.tr_context;
  2087. }
  2088. SendStatus =
  2089. (*LocalNetInfo.ipi_xmit)(TCPProtInfo, SCC,
  2090. FirstBuffer,
  2091. AmountToSend +
  2092. ALIGNED_TS_OPT_SIZE,
  2093. SendTCB->tcb_daddr,
  2094. SendTCB->tcb_saddr,
  2095. &SendTCB->tcb_opt,
  2096. SendTCB->tcb_rce,
  2097. PROTOCOL_TCP,
  2098. Irp );
  2099. } else {
  2100. if (SendTCB->tcb_rce &&
  2101. (SendTCB->tcb_rce->rce_OffloadFlags &
  2102. TCP_XMT_CHECKSUM_OFFLOAD)) {
  2103. uint PHXsum = SendTCB->tcb_phxsum +
  2104. (uint)net_short(AmountToSend);
  2105. PHXsum = (((PHXsum << 16) | (PHXsum >> 16)) +
  2106. PHXsum) >> 16;
  2107. Header->tcp_xsum = (ushort) PHXsum;
  2108. SendTCB->tcb_opt.ioi_TcpChksum = 1;
  2109. #if DBG
  2110. DbgTcpSendHwChksumCount++;
  2111. #endif
  2112. } else {
  2113. Header->tcp_xsum =
  2114. ~XsumSendChain(SendTCB->tcb_phxsum +
  2115. (uint)net_short(AmountToSend),
  2116. FirstBuffer);
  2117. SendTCB->tcb_opt.ioi_TcpChksum = 0;
  2118. }
  2119. CTEFreeLock(&SendTCB->tcb_lock, TCBHandle);
  2120. Irp = NULL;
  2121. if(SCC->scc_firstsend) {
  2122. Irp = SCC->scc_firstsend->tsr_req.tr_context;
  2123. }
  2124. SendStatus =
  2125. (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  2126. SCC,
  2127. FirstBuffer,
  2128. AmountToSend,
  2129. SendTCB->tcb_daddr,
  2130. SendTCB->tcb_saddr,
  2131. &SendTCB->tcb_opt,
  2132. SendTCB->tcb_rce,
  2133. PROTOCOL_TCP,
  2134. Irp );
  2135. }
  2136. SendTCB->tcb_error = SendStatus;
  2137. if (SendStatus != IP_PENDING) {
  2138. TCPSendComplete(SCC, FirstBuffer, IP_SUCCESS);
  2139. if (SendStatus != IP_SUCCESS) {
  2140. CTEGetLock(&SendTCB->tcb_lock, &TCBHandle);
  2141. // This packet didn't get sent. If nothing's
  2142. // changed in the TCB, put sendnext back to
  2143. // what we just tried to send. Depending on
  2144. // the error, we may try again.
  2145. if (SEQ_GTE(OldSeq, SendTCB->tcb_senduna) &&
  2146. SEQ_LT(OldSeq, SendTCB->tcb_sendnext))
  2147. ResetSendNext(SendTCB, OldSeq);
  2148. // We know this packet didn't get sent. Start
  2149. // the retransmit timer now, if it's not already
  2150. // runnimg, in case someone came in while we
  2151. // were in IP and stopped it.
  2152. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2153. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
  2154. }
  2155. // If it failed because of an MTU problem, get
  2156. // the new MTU and try again.
  2157. if (SendStatus == IP_PACKET_TOO_BIG) {
  2158. uint NewMTU;
  2159. // The MTU has changed. Update it, and try
  2160. // again.
  2161. // if ipsec is adjusting the mtu, rce_newmtu
  2162. // will contain the newmtu.
  2163. if (SendTCB->tcb_rce) {
  2164. if (!SendTCB->tcb_rce->rce_newmtu) {
  2165. SendStatus =
  2166. (*LocalNetInfo.ipi_getpinfo)(
  2167. SendTCB->tcb_daddr,
  2168. SendTCB->tcb_saddr,
  2169. &NewMTU,
  2170. NULL,
  2171. SendTCB->tcb_rce);
  2172. } else {
  2173. NewMTU = SendTCB->tcb_rce->rce_newmtu;
  2174. SendStatus = IP_SUCCESS;
  2175. }
  2176. } else {
  2177. SendStatus =
  2178. (*LocalNetInfo.ipi_getpinfo)(
  2179. SendTCB->tcb_daddr,
  2180. SendTCB->tcb_saddr,
  2181. &NewMTU,
  2182. NULL,
  2183. SendTCB->tcb_rce);
  2184. }
  2185. if (SendStatus != IP_SUCCESS)
  2186. break;
  2187. // We have a new MTU. Make sure it's big enough
  2188. // to use. If not, correct this and turn off
  2189. // MTU discovery on this TCB. Otherwise use the
  2190. // new MTU.
  2191. if (NewMTU <=
  2192. (sizeof(TCPHeader) +
  2193. SendTCB->tcb_opt.ioi_optlength)) {
  2194. // The new MTU is too small to use. Turn off
  2195. // PMTU discovery on this TCB, and drop to
  2196. // our off net MTU size.
  2197. SendTCB->tcb_opt.ioi_flags &= ~IP_FLAG_DF;
  2198. SendTCB->tcb_mss =
  2199. MIN((ushort)MAX_REMOTE_MSS,
  2200. SendTCB->tcb_remmss);
  2201. } else {
  2202. // The new MTU is adequate. Adjust it for
  2203. // the header size and options length, and
  2204. // use it.
  2205. NewMTU -= sizeof(TCPHeader) -
  2206. SendTCB->tcb_opt.ioi_optlength;
  2207. SendTCB->tcb_mss =
  2208. MIN((ushort) NewMTU,
  2209. SendTCB->tcb_remmss);
  2210. }
  2211. ASSERT(SendTCB->tcb_mss > 0);
  2212. ValidateMSS(SendTCB);
  2213. continue;
  2214. }
  2215. break;
  2216. }
  2217. }
  2218. //Start it now, since we know that mac driver accepted it.
  2219. CTEGetLock(&SendTCB->tcb_lock, &TCBHandle);
  2220. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2221. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
  2222. }
  2223. continue;
  2224. } else // FirstBuffer != NULL.
  2225. goto error_oor;
  2226. } else {
  2227. // We've decided we can't send anything now. Figure out why, and
  2228. // see if we need to set a timer.
  2229. if (SendTCB->tcb_sendwin == 0) {
  2230. if (!(SendTCB->tcb_flags & FLOW_CNTLD)) {
  2231. ushort tmp;
  2232. SendTCB->tcb_flags |= FLOW_CNTLD;
  2233. SendTCB->tcb_rexmitcnt = 0;
  2234. tmp = MIN(MAX(REXMIT_TO(SendTCB),
  2235. MIN_RETRAN_TICKS), MAX_REXMIT_TO);
  2236. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, tmp);
  2237. SendTCB->tcb_slowcount++;
  2238. SendTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  2239. } else if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER))
  2240. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
  2241. } else if (AmountToSend != 0)
  2242. // We have something to send, but we're not sending
  2243. // it, presumably due to SWS avoidance.
  2244. if (!TCB_TIMER_RUNNING_R(SendTCB, SWS_TIMER))
  2245. START_TCB_TIMER_R(SendTCB, SWS_TIMER, SWS_TO);
  2246. break;
  2247. }
  2248. } // while (!FIN_OUTSTANDING)
  2249. // We're done sending, so we don't need the output flags set.
  2250. SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT | FORCE_OUTPUT |
  2251. SEND_AFTER_RCV);
  2252. if (MoreToSend) {
  2253. //just indicate that we need to send more
  2254. DelayAction(SendTCB, NEED_OUTPUT);
  2255. PartitionDelayQProcessing(FALSE);
  2256. }
  2257. // This is for TS algo
  2258. SendTCB->tcb_lastack = SendTCB->tcb_rcvnext;
  2259. } else
  2260. SendTCB->tcb_flags |= SEND_AFTER_RCV;
  2261. DerefTCB(SendTCB, TCBHandle);
  2262. return;
  2263. // Common case error handling code for out of resource conditions. Start the
  2264. // retransmit timer if it's not already running (so that we try this again
  2265. // later), clean up and return.
  2266. error_oor:
  2267. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2268. ushort tmp;
  2269. tmp = MIN(MAX(REXMIT_TO(SendTCB),
  2270. MIN_RETRAN_TICKS), MAX_REXMIT_TO);
  2271. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, tmp);
  2272. }
  2273. // We had an out of resource problem, so clear the OUTPUT flags.
  2274. SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT | FORCE_OUTPUT);
  2275. DerefTCB(SendTCB, TCBHandle);
  2276. return;
  2277. error_oor1:
  2278. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2279. ushort tmp;
  2280. tmp = MIN(MAX(REXMIT_TO(SendTCB),
  2281. MIN_RETRAN_TICKS), MAX_REXMIT_TO);
  2282. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, tmp);
  2283. }
  2284. // We had an out of resource problem, so clear the OUTPUT flags.
  2285. SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT | FORCE_OUTPUT);
  2286. DerefTCB(SendTCB, TCBHandle);
  2287. TCPSendComplete(SCC, FirstBuffer, IP_SUCCESS);
  2288. return;
  2289. }
  2290. //* ResetSendNextAndFastSend - Set the sendnext value of a TCB.
  2291. //
  2292. // Called to handle fast retransmit of the segment which the reveiver
  2293. // is asking for.
  2294. // We assume the caller has put a reference on the TCB, and the TCB is locked
  2295. // on entry. The reference is dropped and the lock released before returning.
  2296. //
  2297. // Input: SeqTCB - Pointer to TCB to be updated.
  2298. // NewSeq - Sequence number to set.
  2299. // NewCWin - new value for congestion window.
  2300. //
  2301. // Returns: Nothing.
  2302. //
  2303. void
  2304. ResetAndFastSend(TCB * SeqTCB, SeqNum NewSeq, uint NewCWin)
  2305. {
  2306. TCPSendReq *SendReq;
  2307. uint AmtForward;
  2308. Queue *CurQ;
  2309. PNDIS_BUFFER Buffer;
  2310. uint Offset;
  2311. uint SendSize;
  2312. CTELockHandle TCBHandle;
  2313. int ToBeSent;
  2314. CTEStructAssert(SeqTCB, tcb);
  2315. ASSERT(SEQ_GTE(NewSeq, SeqTCB->tcb_senduna));
  2316. // The new seq must be less than send max, or NewSeq, senduna, sendnext,
  2317. // and sendmax must all be equal. (The latter case happens when we're
  2318. // called exiting TIME_WAIT, or possibly when we're retransmitting
  2319. // during a flow controlled situation).
  2320. ASSERT(SEQ_LT(NewSeq, SeqTCB->tcb_sendmax) ||
  2321. (SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendnext) &&
  2322. SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendmax) &&
  2323. SEQ_EQ(SeqTCB->tcb_senduna, NewSeq)));
  2324. if (SYNC_STATE(SeqTCB->tcb_state) && SeqTCB->tcb_state != TCB_TIME_WAIT) {
  2325. // In these states we need to update the send queue.
  2326. if (!EMPTYQ(&SeqTCB->tcb_sendq)) {
  2327. // Stop the retransmit timer only if we are sure there are going
  2328. // to be retransmissions.
  2329. STOP_TCB_TIMER_R(SeqTCB, RXMIT_TIMER);
  2330. SeqTCB->tcb_rtt = 0;
  2331. CurQ = QHEAD(&SeqTCB->tcb_sendq);
  2332. SendReq = (TCPSendReq *) STRUCT_OF(TCPReq, CurQ, tr_q);
  2333. // SendReq points to the first send request on the send queue.
  2334. // We're pointing at the proper send req now. We need to go down
  2335. // SendReq points to the cursend
  2336. // SendSize point to sendsize in the cursend
  2337. SendSize = SendReq->tsr_unasize;
  2338. Buffer = SendReq->tsr_buffer;
  2339. Offset = SendReq->tsr_offset;
  2340. // Call the fast retransmit send now
  2341. if ((SeqTCB->tcb_tcpopts & TCP_FLAG_SACK)) {
  2342. SackListEntry *Prev, *Current;
  2343. SeqNum CurBegin = 0, CurEnd;
  2344. BOOLEAN UseSackList = TRUE;
  2345. Prev = STRUCT_OF(SackListEntry, &SeqTCB->tcb_SackRcvd, next);
  2346. Current = Prev->next;
  2347. // There is a hole from Newseq to Currentbeg
  2348. // try to retransmit whole hole size!!
  2349. if (Current && SEQ_LT(NewSeq, Current->begin)) {
  2350. ToBeSent = Current->begin - NewSeq;
  2351. CurBegin = Current->begin;
  2352. CurEnd = Current->end;
  2353. } else {
  2354. UseSackList = FALSE;
  2355. ToBeSent = SeqTCB->tcb_mss;
  2356. }
  2357. IF_TCPDBG(TCP_DEBUG_SACK) {
  2358. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  2359. "In Sack Reset and send rexmiting %d %d\n",
  2360. NewSeq, SendSize));
  2361. }
  2362. TCPFastSend(SeqTCB, Buffer, Offset, SendReq, SendSize, NewSeq,
  2363. ToBeSent);
  2364. // If we have not been already acked for the missing segments
  2365. // and if we know where to start retransmitting do so now.
  2366. // Also, re-validate SackListentry
  2367. Prev = STRUCT_OF(SackListEntry, &SeqTCB->tcb_SackRcvd, next);
  2368. Current = Prev->next;
  2369. if (!UseSackList || (Current && Current->begin != CurBegin)) {
  2370. // The SACK list changed while we were in a transmission.
  2371. // Just bail out, and wait for the next ACK to continue
  2372. // if necessary.
  2373. Current = NULL;
  2374. }
  2375. while (Current && Current->next &&
  2376. (SEQ_GTE(NewSeq, SeqTCB->tcb_senduna)) &&
  2377. (SEQ_LT(SeqTCB->tcb_senduna, Current->next->end))) {
  2378. SeqNum NextSeq;
  2379. ASSERT(SEQ_LTE(Current->begin, Current->end));
  2380. // There can be multiple dropped packets till
  2381. // Current->begin.
  2382. IF_TCPDBG(TCP_DEBUG_SACK) {
  2383. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  2384. "Scanning after Current %d %d\n",
  2385. Current->begin, Current->end));
  2386. }
  2387. NextSeq = Current->end;
  2388. CurBegin = Current->begin;
  2389. ASSERT(SEQ_LT(NextSeq, SeqTCB->tcb_sendmax));
  2390. // If we have not yet sent the segment keep quiet now.
  2391. if (SEQ_GTE(NextSeq, SeqTCB->tcb_sendnext) ||
  2392. (SEQ_LTE(NextSeq, SeqTCB->tcb_senduna))) {
  2393. break;
  2394. }
  2395. // Position cursend by following number of bytes
  2396. AmtForward = NextSeq - NewSeq;
  2397. if (!EMPTYQ(&SeqTCB->tcb_sendq)) {
  2398. CurQ = QHEAD(&SeqTCB->tcb_sendq);
  2399. SendReq = (TCPSendReq *) STRUCT_OF(TCPReq, CurQ, tr_q);
  2400. while (AmtForward) {
  2401. if (AmtForward >= SendReq->tsr_unasize) {
  2402. AmtForward -= SendReq->tsr_unasize;
  2403. CurQ = QNEXT(CurQ);
  2404. SendReq =
  2405. (TCPSendReq *)STRUCT_OF(TCPReq, CurQ, tr_q);
  2406. ASSERT(CurQ != QEND(&SeqTCB->tcb_sendq));
  2407. } else {
  2408. break;
  2409. }
  2410. }
  2411. SendSize = SendReq->tsr_unasize - AmtForward;
  2412. Buffer = SendReq->tsr_buffer;
  2413. Offset = SendReq->tsr_offset;
  2414. while (AmtForward) {
  2415. uint Length;
  2416. ASSERT((Offset < NdisBufferLength(Buffer)) ||
  2417. ((Offset == 0) &&
  2418. (NdisBufferLength(Buffer) == 0)));
  2419. Length = NdisBufferLength(Buffer) - Offset;
  2420. if (AmtForward >= Length) {
  2421. // We're moving past this one. Skip over him,
  2422. // and 0 the Offset we're keeping.
  2423. AmtForward -= Length;
  2424. Offset = 0;
  2425. Buffer = NDIS_BUFFER_LINKAGE(Buffer);
  2426. ASSERT(Buffer != NULL);
  2427. } else {
  2428. break;
  2429. }
  2430. }
  2431. Offset = Offset + AmtForward;
  2432. // Okay. Now retransmit this seq too.
  2433. if (Current->next) {
  2434. ToBeSent = Current->next->begin - Current->end;
  2435. } else {
  2436. ToBeSent = SeqTCB->tcb_mss;
  2437. }
  2438. IF_TCPDBG(TCP_DEBUG_SACK) {
  2439. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  2440. "SACK inner loop rexmiting %d %d %d\n",
  2441. Current->end, SendSize, ToBeSent));
  2442. }
  2443. TCPFastSend(SeqTCB, Buffer, Offset, SendReq, SendSize,
  2444. NextSeq, ToBeSent);
  2445. } else {
  2446. break;
  2447. }
  2448. // Also, re-validate Current Sack list in SackListentry
  2449. Prev =
  2450. STRUCT_OF(SackListEntry, &SeqTCB->tcb_SackRcvd, next);
  2451. Current = Prev->next;
  2452. while (Current && Current->begin != CurBegin) {
  2453. // The SACK list changed while in TCPFastSend.
  2454. // Just bail out.
  2455. Current = Current->next;
  2456. }
  2457. if (Current) {
  2458. Current = Current->next;
  2459. } else {
  2460. break;
  2461. }
  2462. }
  2463. } else {
  2464. ToBeSent = SeqTCB->tcb_mss;
  2465. TCPFastSend(SeqTCB, Buffer, Offset, SendReq, SendSize, NewSeq,
  2466. ToBeSent);
  2467. }
  2468. } else {
  2469. ASSERT(SeqTCB->tcb_cursend == NULL);
  2470. }
  2471. }
  2472. SeqTCB->tcb_cwin = NewCWin;
  2473. // Make sure there is nothing outstanding or the retransmit timer is
  2474. // running or we are in the process of sending a segment (and yet to
  2475. // start the timer).
  2476. ASSERT((SeqTCB->tcb_sendnext == SeqTCB->tcb_senduna) ||
  2477. TCB_TIMER_RUNNING_R(SeqTCB, RXMIT_TIMER) ||
  2478. (SeqTCB->tcb_flags & IN_TCP_SEND));
  2479. TCBHandle = DISPATCH_LEVEL;
  2480. DerefTCB(SeqTCB, TCBHandle);
  2481. return;
  2482. }
  2483. //* TCPFastSend - To send a segment without changing TCB state
  2484. //
  2485. // Called to handle fast retransmit of the segment
  2486. // tcb_lock will be held while entering (called by TCPRcv)
  2487. //
  2488. // Input: SendTCB - Pointer to TCB
  2489. // in_sendBuf - Pointer to ndis_buffer
  2490. // in_sendofs - Send Offset
  2491. // in_sendreq - current send request
  2492. // in_sendsize - size of this send
  2493. //
  2494. // Returns: Nothing.
  2495. //
  2496. void
  2497. TCPFastSend(TCB * SendTCB, PNDIS_BUFFER in_SendBuf, uint in_SendOfs,
  2498. TCPSendReq * in_SendReq, uint in_SendSize, SeqNum NextSeq,
  2499. int in_ToBeSent)
  2500. {
  2501. uint AmountToSend; // Amount to send this time.
  2502. uint AmountLeft;
  2503. TCPHeader *Header; // TCP header for a send.
  2504. PNDIS_BUFFER FirstBuffer, CurrentBuffer;
  2505. TCPSendReq *CurSend;
  2506. SendCmpltContext *SCC;
  2507. SeqNum OldSeq;
  2508. SeqNum SendNext;
  2509. IP_STATUS SendStatus;
  2510. uint AmtOutstanding, AmtUnsent;
  2511. CTELockHandle TCBHandle;
  2512. void *Irp;
  2513. uint TSLen=0;
  2514. uint SendOfs = in_SendOfs;
  2515. uint SendSize = in_SendSize;
  2516. PNDIS_BUFFER SendBuf = in_SendBuf;
  2517. SendNext = NextSeq;
  2518. CurSend = in_SendReq;
  2519. TCBHandle = DISPATCH_LEVEL;
  2520. CTEStructAssert(SendTCB, tcb);
  2521. ASSERT(SendTCB->tcb_refcnt != 0);
  2522. ASSERT(*(int *)&SendTCB->tcb_sendwin >= 0);
  2523. ASSERT(*(int *)&SendTCB->tcb_cwin >= SendTCB->tcb_mss);
  2524. ASSERT(!(SendTCB->tcb_flags & FIN_OUTSTANDING) ||
  2525. (SendTCB->tcb_sendnext == SendTCB->tcb_sendmax));
  2526. AmtOutstanding = (uint) (SendTCB->tcb_sendnext -
  2527. SendTCB->tcb_senduna);
  2528. AmtUnsent = MIN(MIN(in_ToBeSent, (int)SendSize), (int)SendTCB->tcb_sendwin);
  2529. while (AmtUnsent > 0) {
  2530. if (SEQ_GT(SendTCB->tcb_senduna, SendNext)) {
  2531. // Since tcb_lock is releasd in this loop
  2532. // it is possible that delayed ack acked
  2533. // what we are trying to retransmit.
  2534. goto error_oor;
  2535. }
  2536. //This was minimum of sendwin and amtunsent
  2537. AmountToSend = MIN(AmtUnsent, SendTCB->tcb_mss);
  2538. // Time stamp option addition might force us to cut the data
  2539. // to be sent by 12 bytes.
  2540. if ((SendTCB->tcb_tcpopts & TCP_FLAG_TS) &&
  2541. (AmountToSend + ALIGNED_TS_OPT_SIZE >= SendTCB->tcb_mss)) {
  2542. AmountToSend -= ALIGNED_TS_OPT_SIZE;
  2543. }
  2544. // See if we have enough to send. We'll send if we have at least a
  2545. // segment, or if we really have some data to send and we can send
  2546. // all that we have, or the send window is > 0 and we need to force
  2547. // output or send a FIN (note that if we need to force output
  2548. // SendWin will be at least 1 from the check above), or if we can
  2549. // send an amount == to at least half the maximum send window
  2550. // we've seen.
  2551. ASSERT((int)AmtUnsent >= 0);
  2552. // It's OK to send something. Try to get a header buffer now.
  2553. // Mark the TCB for debugging.
  2554. // This should be removed for shipping version.
  2555. FirstBuffer = GetTCPHeaderAtDpcLevel(&Header);
  2556. if (FirstBuffer != NULL) {
  2557. // Got a header buffer. Loop through the sends on the TCB,
  2558. // building a frame.
  2559. CurrentBuffer = FirstBuffer;
  2560. Header = (TCPHeader *) ((PUCHAR)Header + LocalNetInfo.ipi_hsize);
  2561. // allow room for filling time stamp options.
  2562. if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
  2563. // Account for time stamp options
  2564. TSLen = ALIGNED_TS_OPT_SIZE;
  2565. NdisAdjustBufferLength(FirstBuffer,
  2566. sizeof(TCPHeader) + ALIGNED_TS_OPT_SIZE);
  2567. SCC = ALIGN_UP_POINTER((SendCmpltContext *) (Header + 1),PVOID);
  2568. SCC = (SendCmpltContext *)((uchar *) SCC + ALIGNED_TS_OPT_SIZE);
  2569. } else {
  2570. SCC = (SendCmpltContext *) (Header + 1);
  2571. }
  2572. SCC = ALIGN_UP_POINTER(SCC, PVOID);
  2573. #if DBG
  2574. SCC->scc_sig = scc_signature;
  2575. #endif
  2576. FillTCPHeader(SendTCB, Header);
  2577. {
  2578. ulong L = SendNext;
  2579. Header->tcp_seq = net_long(L);
  2580. }
  2581. SCC->scc_ubufcount = 0;
  2582. SCC->scc_tbufcount = 0;
  2583. SCC->scc_count = 0;
  2584. SCC->scc_LargeSend = 0;
  2585. AmountLeft = AmountToSend;
  2586. if (AmountToSend != 0) {
  2587. long Result;
  2588. CTEStructAssert(CurSend, tsr);
  2589. SCC->scc_firstsend = CurSend;
  2590. do {
  2591. BOOLEAN DirectSend = FALSE;
  2592. ASSERT(CurSend->tsr_refcnt > 0);
  2593. Result = CTEInterlockedIncrementLong(&(CurSend->tsr_refcnt));
  2594. ASSERT(Result > 0);
  2595. SCC->scc_count++;
  2596. // If the current send offset is 0 and the current
  2597. // send is less than or equal to what we have left
  2598. // to send, we haven't already put a transport
  2599. // buffer on this send, and nobody else is using
  2600. // the buffer chain directly, just use the input
  2601. // buffers. We check for other people using them
  2602. // by looking at tsr_lastbuf. If it's NULL,
  2603. // nobody else is using the buffers. If it's not
  2604. // NULL, somebody is.
  2605. if (SendOfs == 0 &&
  2606. (SendSize <= AmountLeft) &&
  2607. (SCC->scc_tbufcount == 0) &&
  2608. CurSend->tsr_lastbuf == NULL) {
  2609. ulong length = 0;
  2610. PNDIS_BUFFER tmp = SendBuf;
  2611. while (tmp) {
  2612. length += NdisBufferLength(tmp);
  2613. tmp = NDIS_BUFFER_LINKAGE(tmp);
  2614. }
  2615. // If sum of mdl lengths is > request length
  2616. // use slow path.
  2617. if (AmountLeft >= length) {
  2618. DirectSend = TRUE;
  2619. }
  2620. }
  2621. if (DirectSend) {
  2622. NDIS_BUFFER_LINKAGE(CurrentBuffer) = SendBuf;
  2623. do {
  2624. SCC->scc_ubufcount++;
  2625. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  2626. } while (NDIS_BUFFER_LINKAGE(CurrentBuffer) != NULL);
  2627. CurSend->tsr_lastbuf = CurrentBuffer;
  2628. AmountLeft -= SendSize;
  2629. SendSize = 0;
  2630. } else {
  2631. uint AmountToDup;
  2632. PNDIS_BUFFER NewBuf, Buf;
  2633. uint Offset;
  2634. NDIS_STATUS NStatus;
  2635. uchar *VirtualAddress;
  2636. uint Length;
  2637. // Either the current send has more data than
  2638. // we want to send, or the starting offset is
  2639. // not 0. In either case we'll need to loop
  2640. // through the current send, allocating buffers.
  2641. Buf = SendBuf;
  2642. Offset = SendOfs;
  2643. do {
  2644. ASSERT(Buf != NULL);
  2645. TcpipQueryBuffer(Buf, &VirtualAddress, &Length,
  2646. NormalPagePriority);
  2647. if (VirtualAddress == NULL) {
  2648. if (SCC->scc_tbufcount == 0 &&
  2649. SCC->scc_ubufcount == 0) {
  2650. //TCPSendComplete(SCC, FirstBuffer,IP_SUCCESS);
  2651. goto error_oor1;
  2652. }
  2653. AmountToSend -= AmountLeft;
  2654. AmountLeft = 0;
  2655. break;
  2656. }
  2657. ASSERT((Offset < Length) ||
  2658. (Offset == 0 && Length == 0));
  2659. // Adjust the length for the offset into
  2660. // this buffer.
  2661. Length -= Offset;
  2662. AmountToDup = MIN(AmountLeft, Length);
  2663. NdisAllocateBuffer(&NStatus, &NewBuf,
  2664. TCPSendBufferPool,
  2665. VirtualAddress + Offset,
  2666. AmountToDup);
  2667. if (NStatus == NDIS_STATUS_SUCCESS) {
  2668. SCC->scc_tbufcount++;
  2669. NDIS_BUFFER_LINKAGE(CurrentBuffer) = NewBuf;
  2670. CurrentBuffer = NewBuf;
  2671. if (AmountToDup >= Length) {
  2672. // Exhausted this buffer.
  2673. Buf = NDIS_BUFFER_LINKAGE(Buf);
  2674. Offset = 0;
  2675. } else {
  2676. Offset += AmountToDup;
  2677. ASSERT(Offset < NdisBufferLength(Buf));
  2678. }
  2679. SendSize -= AmountToDup;
  2680. AmountLeft -= AmountToDup;
  2681. } else {
  2682. // Couldn't allocate a buffer. If
  2683. // the packet is already partly built,
  2684. // send what we've got, otherwise
  2685. // bail out.
  2686. if (SCC->scc_tbufcount == 0 &&
  2687. SCC->scc_ubufcount == 0) {
  2688. goto error_oor1;
  2689. }
  2690. AmountToSend -= AmountLeft;
  2691. AmountLeft = 0;
  2692. }
  2693. } while (AmountLeft && SendSize);
  2694. SendBuf = Buf;
  2695. SendOfs = Offset;
  2696. }
  2697. if (CurSend->tsr_flags & TSR_FLAG_URG) {
  2698. ushort UP;
  2699. // This send is urgent data. We need to figure
  2700. // out what the urgent data pointer should be.
  2701. // We know sendnext is the starting sequence
  2702. // number of the frame, and that at the top of
  2703. // this do loop sendnext identified a byte in
  2704. // the CurSend at that time. We advanced CurSend
  2705. // at the same rate we've decremented
  2706. // AmountLeft (AmountToSend - AmountLeft ==
  2707. // AmountBuilt), so sendnext +
  2708. // (AmountToSend - AmountLeft) identifies a byte
  2709. // in the current value of CurSend, and that
  2710. // quantity plus tcb_sendsize is the sequence
  2711. // number one beyond the current send.
  2712. UP =
  2713. (ushort) (AmountToSend - AmountLeft) +
  2714. (ushort)SendSize -
  2715. ((SendTCB->tcb_flags & BSD_URGENT) ? 0 : 1);
  2716. Header->tcp_urgent = net_short(UP);
  2717. Header->tcp_flags |= TCP_FLAG_URG;
  2718. }
  2719. // See if we've exhausted this send. If we have,
  2720. // set the PUSH bit in this frame and move on to
  2721. // the next send. We also need to check the
  2722. // urgent data bit.
  2723. if (SendSize == 0) {
  2724. Queue *Next;
  2725. ulong PrevFlags;
  2726. // We've exhausted this send. Set the PUSH bit.
  2727. Header->tcp_flags |= TCP_FLAG_PUSH;
  2728. PrevFlags = CurSend->tsr_flags;
  2729. Next = QNEXT(&CurSend->tsr_req.tr_q);
  2730. if (Next != QEND(&SendTCB->tcb_sendq)) {
  2731. CurSend = STRUCT_OF(TCPSendReq,
  2732. QSTRUCT(TCPReq, Next, tr_q),
  2733. tsr_req);
  2734. CTEStructAssert(CurSend, tsr);
  2735. SendSize = CurSend->tsr_unasize;
  2736. SendOfs = CurSend->tsr_offset;
  2737. SendBuf = CurSend->tsr_buffer;
  2738. // Check the urgent flags. We can't combine
  2739. // new urgent data on to the end of old
  2740. // non-urgent data.
  2741. if ((PrevFlags & TSR_FLAG_URG) && !
  2742. (CurSend->tsr_flags & TSR_FLAG_URG))
  2743. break;
  2744. } else {
  2745. ASSERT(AmountLeft == 0);
  2746. CurSend = NULL;
  2747. SendBuf = NULL;
  2748. }
  2749. }
  2750. } while (AmountLeft != 0);
  2751. } else {
  2752. // Amt to send is 0.
  2753. // Just bail out and strat timer.
  2754. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2755. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
  2756. }
  2757. FreeTCPHeader(FirstBuffer);
  2758. return;
  2759. }
  2760. // Adjust for what we're really going to send.
  2761. AmountToSend -= AmountLeft;
  2762. OldSeq = SendNext;
  2763. SendNext += AmountToSend;
  2764. AmtUnsent -= AmountToSend;
  2765. TStats.ts_retranssegs++;
  2766. // We've built the frame entirely. If we've send everything
  2767. // we have and their's a FIN pending, OR it in.
  2768. AmountToSend += sizeof(TCPHeader);
  2769. SendTCB->tcb_flags &= ~(NEED_ACK | ACK_DELAYED |
  2770. FORCE_OUTPUT);
  2771. STOP_TCB_TIMER_R(SendTCB, DELACK_TIMER);
  2772. STOP_TCB_TIMER_R(SendTCB, SWS_TIMER);
  2773. SendTCB->tcb_rcvdsegs = 0;
  2774. if ( (SendTCB->tcb_flags & KEEPALIVE) && (SendTCB->tcb_conn != NULL) )
  2775. START_TCB_TIMER_R(SendTCB, KA_TIMER, SendTCB->tcb_conn->tc_tcbkatime);
  2776. SendTCB->tcb_kacount = 0;
  2777. CTEFreeLock(&SendTCB->tcb_lock, TCBHandle);
  2778. Irp = NULL;
  2779. if (SCC->scc_firstsend) {
  2780. Irp = SCC->scc_firstsend->tsr_req.tr_context;
  2781. }
  2782. // We're all set. Xsum it and send it.
  2783. if (SendTCB->tcb_rce &&
  2784. (SendTCB->tcb_rce->rce_OffloadFlags &
  2785. TCP_XMT_CHECKSUM_OFFLOAD) &&
  2786. (SendTCB->tcb_rce->rce_OffloadFlags &
  2787. TCP_CHECKSUM_OPT_OFFLOAD) ){
  2788. uint PHXsum =
  2789. SendTCB->tcb_phxsum +
  2790. (uint)net_short(AmountToSend + TSLen);
  2791. PHXsum = (((PHXsum << 16) | (PHXsum >> 16)) + PHXsum) >> 16;
  2792. Header->tcp_xsum = (ushort) PHXsum;
  2793. SendTCB->tcb_opt.ioi_TcpChksum = 1;
  2794. } else {
  2795. Header->tcp_xsum =
  2796. ~XsumSendChain(
  2797. SendTCB->tcb_phxsum +
  2798. (uint)net_short(AmountToSend + TSLen),
  2799. FirstBuffer);
  2800. SendTCB->tcb_opt.ioi_TcpChksum = 0;
  2801. }
  2802. SendStatus =
  2803. (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  2804. SCC,
  2805. FirstBuffer,
  2806. AmountToSend + TSLen,
  2807. SendTCB->tcb_daddr,
  2808. SendTCB->tcb_saddr,
  2809. &SendTCB->tcb_opt,
  2810. SendTCB->tcb_rce,
  2811. PROTOCOL_TCP,
  2812. Irp);
  2813. //Reacquire Lock to keep DerefTCB happy
  2814. //Bug #63904
  2815. if (SendStatus != IP_PENDING) {
  2816. TCPSendComplete(SCC, FirstBuffer, IP_SUCCESS);
  2817. }
  2818. CTEGetLock(&SendTCB->tcb_lock, &TCBHandle);
  2819. SendTCB->tcb_error = SendStatus;
  2820. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2821. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
  2822. }
  2823. } else { // FirstBuffer != NULL.
  2824. goto error_oor;
  2825. }
  2826. } //while AmtUnsent > 0
  2827. return;
  2828. // Common case error handling code for out of resource conditions. Start the
  2829. // retransmit timer if it's not already running (so that we try this again
  2830. // later), clean up and return.
  2831. error_oor:
  2832. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2833. ushort tmp;
  2834. tmp = MIN(MAX(REXMIT_TO(SendTCB),
  2835. MIN_RETRAN_TICKS), MAX_REXMIT_TO);
  2836. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, tmp);
  2837. }
  2838. return;
  2839. error_oor1:
  2840. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2841. ushort tmp;
  2842. tmp = MIN(MAX(REXMIT_TO(SendTCB),
  2843. MIN_RETRAN_TICKS), MAX_REXMIT_TO);
  2844. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, tmp);
  2845. }
  2846. TCPSendComplete(SCC, FirstBuffer, IP_SUCCESS);
  2847. return;
  2848. }
  2849. //* TDISend - Send data on a connection.
  2850. //
  2851. // The main TDI send entry point. We take the input parameters, validate them,
  2852. // allocate a send request, etc. We then put the send request on the queue.
  2853. // If we have no other sends on the queue or Nagling is disabled we'll
  2854. // call TCPSend to send the data.
  2855. //
  2856. // Input: Request - The TDI request for the call.
  2857. // Flags - Flags for this send.
  2858. // SendLength - Length in bytes of send.
  2859. // SendBuffer - Pointer to buffer chain to be sent.
  2860. //
  2861. // Returns: Status of attempt to send.
  2862. //
  2863. TDI_STATUS
  2864. TdiSend(PTDI_REQUEST Request, ushort Flags, uint SendLength,
  2865. PNDIS_BUFFER SendBuffer)
  2866. {
  2867. TCPConn *Conn;
  2868. TCB *SendTCB;
  2869. TCPSendReq *SendReq;
  2870. CTELockHandle ConnTableHandle;
  2871. TDI_STATUS Error;
  2872. uint EmptyQ;
  2873. #if DBG_VALIDITY_CHECK
  2874. // Check for Mdl sanity in send requests
  2875. // Should be removed for RTM
  2876. uint RealSendSize;
  2877. PNDIS_BUFFER Temp;
  2878. // Loop through the buffer chain, and make sure that the length matches
  2879. // up with SendLength.
  2880. Temp = SendBuffer;
  2881. RealSendSize = 0;
  2882. if (Temp != NULL) {
  2883. do {
  2884. RealSendSize += NdisBufferLength(Temp);
  2885. Temp = NDIS_BUFFER_LINKAGE(Temp);
  2886. } while (Temp != NULL);
  2887. if (RealSendSize < SendLength) {
  2888. PIRP Irp = (PIRP)Request->RequestContext;
  2889. PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
  2890. DbgPrint("Invalid TDI_SEND request issued to \\\\Device\\\\Tcp.\n");
  2891. DbgPrint("Irp: %p Mdl: %p CompletionRoutine: %p\n",
  2892. Irp, Irp->MdlAddress, IrpSp->CompletionRoutine);
  2893. DbgPrint("This is not a bug in tcpip.sys.\n");
  2894. DbgPrint("Please notify the originator of this IRP.\n");
  2895. DbgBreakPoint();
  2896. }
  2897. }
  2898. #endif
  2899. //CTEGetLock(&ConnTableLock, &ConnTableHandle);
  2900. Conn = GetConnFromConnID(PtrToUlong(Request->Handle.ConnectionContext), &ConnTableHandle);
  2901. if (Conn != NULL) {
  2902. CTEStructAssert(Conn, tc);
  2903. SendTCB = Conn->tc_tcb;
  2904. if (SendTCB != NULL) {
  2905. CTEStructAssert(SendTCB, tcb);
  2906. CTEGetLockAtDPC(&SendTCB->tcb_lock);
  2907. CTEFreeLock(&(Conn->tc_ConnBlock->cb_lock), DISPATCH_LEVEL);
  2908. if (DATA_SEND_STATE(SendTCB->tcb_state) && !CLOSING(SendTCB)) {
  2909. // We have a TCB, and it's valid. Get a send request now.
  2910. CheckTCBSends(SendTCB);
  2911. if (SendLength == 0) {
  2912. Error = TDI_SUCCESS;
  2913. } else if (((ULONG64)SendTCB->tcb_unacked + SendLength)
  2914. >= MAXULONG) {
  2915. Error = TDI_INVALID_PARAMETER;
  2916. } else {
  2917. SendReq = GetSendReq();
  2918. if (SendReq != NULL) {
  2919. SendReq->tsr_req.tr_rtn = Request->RequestNotifyObject;
  2920. SendReq->tsr_req.tr_context = Request->RequestContext;
  2921. SendReq->tsr_buffer = SendBuffer;
  2922. SendReq->tsr_size = SendLength;
  2923. SendReq->tsr_unasize = SendLength;
  2924. SendReq->tsr_refcnt = 1; // ACK will decrement this ref
  2925. SendReq->tsr_offset = 0;
  2926. SendReq->tsr_lastbuf = NULL;
  2927. SendReq->tsr_time = TCPTime;
  2928. SendReq->tsr_flags = (Flags & TDI_SEND_EXPEDITED) ?
  2929. TSR_FLAG_URG : 0;
  2930. SendTCB->tcb_unacked += SendLength;
  2931. if (Flags & TDI_SEND_AND_DISCONNECT) {
  2932. //move the state to fin_wait and
  2933. //mark the tcb for send and disconnect
  2934. if (SendTCB->tcb_state == TCB_ESTAB) {
  2935. SendTCB->tcb_state = TCB_FIN_WAIT1;
  2936. } else {
  2937. ASSERT(SendTCB->tcb_state == TCB_CLOSE_WAIT);
  2938. SendTCB->tcb_state = TCB_LAST_ACK;
  2939. }
  2940. SendTCB->tcb_slowcount++;
  2941. SendTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  2942. SendTCB->tcb_fastchk |= TCP_FLAG_SEND_AND_DISC;
  2943. SendTCB->tcb_flags |= FIN_NEEDED;
  2944. SendReq->tsr_flags |= TSR_FLAG_SEND_AND_DISC;
  2945. //extrac reference to make sure that
  2946. //this request will not be completed until the
  2947. //connection is closed
  2948. SendReq->tsr_refcnt++;
  2949. InterlockedDecrement((PLONG)&TStats.ts_currestab);
  2950. }
  2951. EmptyQ = EMPTYQ(&SendTCB->tcb_sendq);
  2952. ENQUEUE(&SendTCB->tcb_sendq, &SendReq->tsr_req.tr_q);
  2953. if (SendTCB->tcb_cursend == NULL) {
  2954. SendTCB->tcb_cursend = SendReq;
  2955. SendTCB->tcb_sendbuf = SendBuffer;
  2956. SendTCB->tcb_sendofs = 0;
  2957. SendTCB->tcb_sendsize = SendLength;
  2958. }
  2959. if (EmptyQ) {
  2960. REFERENCE_TCB(SendTCB);
  2961. TCPSend(SendTCB, ConnTableHandle);
  2962. } else if (!(SendTCB->tcb_flags & NAGLING) ||
  2963. (SendTCB->tcb_unacked -
  2964. (SendTCB->tcb_sendmax -
  2965. SendTCB->tcb_senduna)) >=
  2966. SendTCB->tcb_mss) {
  2967. REFERENCE_TCB(SendTCB);
  2968. TCPSend(SendTCB, ConnTableHandle);
  2969. } else
  2970. CTEFreeLock(&SendTCB->tcb_lock,
  2971. ConnTableHandle);
  2972. return TDI_PENDING;
  2973. } else
  2974. Error = TDI_NO_RESOURCES;
  2975. }
  2976. } else
  2977. Error = TDI_INVALID_STATE;
  2978. CTEFreeLock(&SendTCB->tcb_lock, ConnTableHandle);
  2979. return Error;
  2980. } else {
  2981. CTEFreeLock(&(Conn->tc_ConnBlock->cb_lock), ConnTableHandle);
  2982. Error = TDI_INVALID_STATE;
  2983. }
  2984. } else
  2985. Error = TDI_INVALID_CONNECTION;
  2986. //CTEFreeLock(&ConnTableLock, ConnTableHandle);
  2987. return Error;
  2988. }
  2989. #pragma BEGIN_INIT
  2990. extern void *TLRegisterProtocol(uchar Protocol, void *RcvHandler,
  2991. void *XmitHandler, void *StatusHandler,
  2992. void *RcvCmpltHandler, void *PnPHandler,
  2993. void *ElistHandler);
  2994. extern IP_STATUS TCPRcv(void *IPContext, IPAddr Dest, IPAddr Src,
  2995. IPAddr LocalAddr, IPAddr SrcAddr,
  2996. IPHeader UNALIGNED * IPH, uint IPHLength,
  2997. IPRcvBuf * RcvBuf, uint Size, uchar IsBCast,
  2998. uchar Protocol, IPOptInfo * OptInfo);
  2999. extern void TCPRcvComplete(void);
  3000. uchar SendInited = FALSE;
  3001. //* InitTCPSend - Initialize our send side.
  3002. //
  3003. // Called during init time to initialize our TCP send state.
  3004. //
  3005. // Input: Nothing.
  3006. //
  3007. // Returns: TRUE if we inited, false if we didn't.
  3008. //
  3009. int
  3010. InitTCPSend(void)
  3011. {
  3012. NDIS_STATUS Status;
  3013. TcpHeaderBufferSize =
  3014. (USHORT)(ALIGN_UP(LocalNetInfo.ipi_hsize,PVOID) +
  3015. ALIGN_UP((sizeof(TCPHeader) + ALIGNED_TS_OPT_SIZE + ALIGNED_SACK_OPT_SIZE),PVOID) +
  3016. ALIGN_UP(MAX(MSS_OPT_SIZE, sizeof(SendCmpltContext)),PVOID));
  3017. #if BACK_FILL
  3018. TcpHeaderBufferSize += MAX_BACKFILL_HDR_SIZE;
  3019. #endif
  3020. TcpHeaderPool = MdpCreatePool (TcpHeaderBufferSize, 'thCT');
  3021. if (!TcpHeaderPool)
  3022. {
  3023. return FALSE;
  3024. }
  3025. NdisAllocateBufferPool(&Status, &TCPSendBufferPool, NUM_TCP_BUFFERS);
  3026. if (Status != NDIS_STATUS_SUCCESS) {
  3027. MdpDestroyPool(TcpHeaderPool);
  3028. return FALSE;
  3029. }
  3030. TCPProtInfo = TLRegisterProtocol(PROTOCOL_TCP, TCPRcv, TCPSendComplete,
  3031. TCPStatus, TCPRcvComplete,
  3032. TCPPnPPowerRequest, TCPElistChangeHandler);
  3033. if (TCPProtInfo == NULL) {
  3034. MdpDestroyPool(TcpHeaderPool);
  3035. NdisFreeBufferPool(TCPSendBufferPool);
  3036. return FALSE;
  3037. }
  3038. SendInited = TRUE;
  3039. return TRUE;
  3040. }
  3041. //* UnInitTCPSend - UnInitialize our send side.
  3042. //
  3043. // Called during init time if we're going to fail to initialize.
  3044. //
  3045. // Input: Nothing.
  3046. //
  3047. // Returns: TRUE if we inited, false if we didn't.
  3048. //
  3049. void
  3050. UnInitTCPSend(void)
  3051. {
  3052. if (!SendInited)
  3053. return;
  3054. TLRegisterProtocol(PROTOCOL_TCP, NULL, NULL, NULL, NULL, NULL, NULL);
  3055. MdpDestroyPool(TcpHeaderPool);
  3056. NdisFreeBufferPool(TCPSendBufferPool);
  3057. }
  3058. #pragma END_INIT