Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3994 lines
134 KiB

  1. /********************************************************************/
  2. /** Microsoft LAN Manager **/
  3. /** Copyright(c) Microsoft Corp., 1990-2000 **/
  4. /********************************************************************/
  5. /* :ts=4 */
  6. //** TCPSEND.C - TCP send protocol code.
  7. //
  8. // This file contains the code for sending Data and Control segments.
  9. //
  10. #include "precomp.h"
  11. #include "addr.h"
  12. #include "tcp.h"
  13. #include "tcb.h"
  14. #include "tcpconn.h"
  15. #include "tcpsend.h"
  16. #include "tcprcv.h"
  17. #include "tlcommon.h"
  18. #include "info.h"
  19. #include "tcpcfg.h"
  20. #include "secfltr.h"
  21. #include "tcpipbuf.h"
  22. #include "mdlpool.h"
  23. #include "pplasl.h"
  24. #if GPC
  25. #include "qos.h"
  26. #include "traffic.h"
  27. #include "gpcifc.h"
  28. #include "ntddtc.h"
  29. extern GPC_HANDLE hGpcClient[GPC_CF_MAX];
  30. extern ULONG GpcCfCounts[GPC_CF_MAX];
  31. extern GPC_EXPORTED_CALLS GpcEntries;
  32. extern ULONG ServiceTypeOffset;
  33. extern ULONG GPCcfInfo;
  34. #endif
  35. NTSTATUS
  36. GetIFAndLink(void *Rce, UINT * IFIndex, IPAddr * NextHop);
  37. extern ulong DisableUserTOSSetting;
  38. uint MaxSendSegments = 64;
  39. #if MILLEN
  40. uint DisableLargeSendOffload = 1;
  41. #else // MILLEN
  42. uint DisableLargeSendOffload = 0;
  43. #endif // !MILLEN
  44. #if DBG
  45. ulong DbgDcProb = 0;
  46. ulong DbgTcpSendHwChksumCount = 0;
  47. #endif
  48. extern HANDLE TcpRequestPool;
  49. extern CTELock *pTWTCBTableLock;
  50. extern CACHE_LINE_KSPIN_LOCK RequestCompleteListLock;
  51. #if DROP_PKT
  52. //NKS: To simulate packet drops
  53. // For debugging sack options
  54. uint SimPacketDrop = 0, PkttoDrop = 0, DropPackets = 0;
  55. #endif
  56. extern uint TcpHostOpts;
  57. extern uint TcpHostSendOpts;
  58. #define ALIGNED_SACK_OPT_SIZE 4+8*4 //Maximum 4 sack blocks of 2longword each+sack opt itself
  59. void
  60. ClassifyPacket(TCB *SendTCB);
  61. void
  62. TCPFastSend(TCB * SendTCB,
  63. PNDIS_BUFFER in_SendBuf,
  64. uint in_SendOfs,
  65. TCPSendReq * in_SendReq,
  66. uint in_SendSize,
  67. SeqNum NextSeq,
  68. int in_ToBeSent);
  69. void *TCPProtInfo; // TCP protocol info for IP.
  70. NDIS_HANDLE TCPSendBufferPool;
  71. USHORT TcpHeaderBufferSize;
  72. HANDLE TcpHeaderPool;
  73. extern IPInfo LocalNetInfo;
  74. //
  75. // All of the init code can be discarded.
  76. //
  77. int InitTCPSend(void);
  78. void UnInitTCPSend(void);
  79. #ifdef ALLOC_PRAGMA
  80. #pragma alloc_text(INIT, InitTCPSend)
  81. #pragma alloc_text(INIT, UnInitTCPSend)
  82. #endif
  83. extern void ResetSendNext(TCB * SeqTCB, SeqNum NewSeq);
  84. extern NTSTATUS
  85. TCPPnPPowerRequest(void *ipContext, IPAddr ipAddr, NDIS_HANDLE handle,
  86. PNET_PNP_EVENT netPnPEvent);
  87. extern void TCPElistChangeHandler(void);
  88. //* GetTCPHeader - Get a TCP header buffer.
  89. //
  90. // Called when we need to get a TCP header buffer. This routine is
  91. // specific to the particular environment (VxD or NT). All we
  92. // need to do is pop the buffer from the free list.
  93. //
  94. // Input: Nothing.
  95. //
  96. // Returns: Pointer to an NDIS buffer, or NULL is none.
  97. //
  98. PNDIS_BUFFER
  99. GetTCPHeaderAtDpcLevel(TCPHeader **Header)
  100. {
  101. PNDIS_BUFFER Buffer;
  102. #if DBG
  103. *Header = NULL;
  104. #endif
  105. Buffer = MdpAllocateAtDpcLevel(TcpHeaderPool, Header);
  106. if (Buffer) {
  107. ASSERT(*Header);
  108. NdisAdjustBufferLength(Buffer, sizeof(TCPHeader));
  109. #if BACK_FILL
  110. ASSERT(Buffer->ByteOffset >= 40);
  111. (ULONG_PTR)(*Header) += MAX_BACKFILL_HDR_SIZE;
  112. (ULONG_PTR)Buffer->MappedSystemVa += MAX_BACKFILL_HDR_SIZE;
  113. Buffer->ByteOffset += MAX_BACKFILL_HDR_SIZE;
  114. Buffer->MdlFlags |= MDL_NETWORK_HEADER;
  115. #endif
  116. }
  117. return Buffer;
  118. }
  119. #if MILLEN
  120. #define GetTCPHeader GetTCPHeaderAtDpcLevel
  121. #else
  122. __inline
  123. PNDIS_BUFFER
  124. GetTCPHeader(TCPHeader **Header)
  125. {
  126. KIRQL OldIrql;
  127. PNDIS_BUFFER Buffer;
  128. OldIrql = KeRaiseIrqlToDpcLevel();
  129. Buffer = GetTCPHeaderAtDpcLevel(Header);
  130. KeLowerIrql(OldIrql);
  131. return Buffer;
  132. }
  133. #endif
  134. //* FreeTCPHeader - Free a TCP header buffer.
  135. //
  136. // Called to free a TCP header buffer.
  137. //
  138. // Input: Buffer to be freed.
  139. //
  140. // Returns: Nothing.
  141. //
  142. __inline
  143. VOID
  144. FreeTCPHeader(PNDIS_BUFFER Buffer)
  145. {
  146. NdisAdjustBufferLength(Buffer, TcpHeaderBufferSize);
  147. #if BACK_FILL
  148. (ULONG_PTR)Buffer->MappedSystemVa -= MAX_BACKFILL_HDR_SIZE;
  149. Buffer->ByteOffset -= MAX_BACKFILL_HDR_SIZE;
  150. #endif
  151. MdpFree(Buffer);
  152. }
  153. //* FreeSendReq - Free a send request structure.
  154. //
  155. // Called to free a send request structure.
  156. //
  157. // Input: FreedReq - Connection request structure to be freed.
  158. //
  159. // Returns: Nothing.
  160. //
  161. __inline
  162. void
  163. FreeSendReq(TCPSendReq *Request)
  164. {
  165. PplFree(TcpRequestPool, Request);
  166. }
  167. //* GetSendReq - Get a send request structure.
  168. //
  169. // Called to get a send request structure.
  170. //
  171. // Input: Nothing.
  172. //
  173. // Returns: Pointer to SendReq structure, or NULL if none.
  174. //
  175. __inline
  176. TCPSendReq *
  177. GetSendReq(VOID)
  178. {
  179. TCPSendReq *Request;
  180. LOGICAL FromList;
  181. Request = PplAllocate(TcpRequestPool, &FromList);
  182. if (Request) {
  183. #if DBG
  184. Request->tsr_req.tr_sig = tr_signature;
  185. Request->tsr_sig = tsr_signature;
  186. #endif
  187. }
  188. return Request;
  189. }
  190. //* TCPSendComplete - Complete a TCP send.
  191. //
  192. // Called by IP when a send we've made is complete. We free the buffer,
  193. // and possibly complete some sends. Each send queued on a TCB has a ref.
  194. // count with it, which is the number of times a pointer to a buffer
  195. // associated with the send has been passed to the underlying IP layer. We
  196. // can't complete a send until that count it 0. If this send was actually
  197. // from a send of data, we'll go down the chain of send and decrement the
  198. // refcount on each one. If we have one going to 0 and the send has already
  199. // been acked we'll complete the send. If it hasn't been acked we'll leave
  200. // it until the ack comes in.
  201. //
  202. // NOTE: We aren't protecting any of this with locks. When we port this to
  203. // NT we'll need to fix this, probably with a global lock. See the comments
  204. // in ACKSend() in TCPRCV.C for more details.
  205. //
  206. // Input: Context - Context we gave to IP.
  207. // BufferChain - BufferChain for send.
  208. //
  209. // Returns: Nothing.
  210. //
  211. void
  212. TCPSendComplete(void *Context, PNDIS_BUFFER BufferChain, IP_STATUS SendStatus)
  213. {
  214. BOOLEAN DoRcvComplete = FALSE;
  215. CTELockHandle SendHandle;
  216. PNDIS_BUFFER CurrentBuffer;
  217. if (Context != NULL) {
  218. SendCmpltContext *SCContext = (SendCmpltContext *) Context;
  219. TCPSendReq *CurrentSend;
  220. uint i;
  221. CTEStructAssert(SCContext, scc);
  222. if (SCContext->scc_LargeSend) {
  223. TCB *LargeSendTCB = SCContext->scc_LargeSend;
  224. CTELockHandle TCBHandle;
  225. CTEGetLock(&LargeSendTCB->tcb_lock, &TCBHandle);
  226. IF_TCPDBG(TCP_DEBUG_OFFLOAD) {
  227. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSendComplete: tcb %x sent %d of %d una %u "
  228. "next %u unacked %u\n", LargeSendTCB,
  229. SCContext->scc_ByteSent, SCContext->scc_SendSize,
  230. LargeSendTCB->tcb_senduna, LargeSendTCB->tcb_sendnext,
  231. LargeSendTCB->tcb_unacked));
  232. }
  233. if (SCContext->scc_ByteSent < SCContext->scc_SendSize) {
  234. uint BytesNotSent = SCContext->scc_SendSize -
  235. SCContext->scc_ByteSent;
  236. SeqNum Next = LargeSendTCB->tcb_sendnext;
  237. IF_TCPDBG(TCP_DEBUG_OFFLOAD) {
  238. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSendComplete: unsent %d\n",
  239. SCContext->scc_SendSize-SCContext->scc_ByteSent));
  240. }
  241. if (SEQ_GTE((Next - BytesNotSent), LargeSendTCB->tcb_senduna) &&
  242. SEQ_LT((Next - BytesNotSent), LargeSendTCB->tcb_sendnext)) {
  243. ResetSendNext(LargeSendTCB, (Next - BytesNotSent));
  244. }
  245. }
  246. #if DBG
  247. LargeSendTCB->tcb_LargeSend--;
  248. #endif
  249. if (LargeSendTCB->tcb_unacked)
  250. DelayAction(LargeSendTCB, NEED_OUTPUT);
  251. DerefTCB(LargeSendTCB, TCBHandle);
  252. }
  253. // First, loop through and free any NDIS buffers here that need to be.
  254. // freed. We'll skip any 'user' buffers, and then free our buffers. We
  255. // need to do this before decrementing the reference count to avoid
  256. // destroying the buffer chain if we have to zap tsr_lastbuf->Next to
  257. // NULL.
  258. CurrentBuffer = NDIS_BUFFER_LINKAGE(BufferChain);
  259. for (i = 0; i < (uint) SCContext->scc_ubufcount; i++) {
  260. ASSERT(CurrentBuffer != NULL);
  261. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  262. }
  263. for (i = 0; i < (uint) SCContext->scc_tbufcount; i++) {
  264. PNDIS_BUFFER TempBuffer;
  265. ASSERT(CurrentBuffer != NULL);
  266. TempBuffer = CurrentBuffer;
  267. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  268. NdisFreeBuffer(TempBuffer);
  269. }
  270. CurrentSend = SCContext->scc_firstsend;
  271. i = 0;
  272. while (i < SCContext->scc_count) {
  273. Queue *TempQ;
  274. long Result;
  275. uint SendReqFlags;
  276. TempQ = QNEXT(&CurrentSend->tsr_req.tr_q);
  277. SendReqFlags = CurrentSend->tsr_flags;
  278. CTEStructAssert(CurrentSend, tsr);
  279. Result = CTEInterlockedDecrementLong(&(CurrentSend->tsr_refcnt));
  280. ASSERT(Result >= 0);
  281. if ((Result <= 0) ||
  282. ((SendReqFlags & TSR_FLAG_SEND_AND_DISC) && (Result == 1))) {
  283. TCPReq *Req;
  284. // Reference count has gone to 0 which means the send has
  285. // been ACK'd or cancelled. Complete it now.
  286. // If we've sent directly from this send, NULL out the next
  287. // pointer for the last buffer in the chain.
  288. if (CurrentSend->tsr_lastbuf != NULL) {
  289. NDIS_BUFFER_LINKAGE(CurrentSend->tsr_lastbuf) = NULL;
  290. CurrentSend->tsr_lastbuf = NULL;
  291. }
  292. Req = &CurrentSend->tsr_req;
  293. (*Req->tr_rtn)(Req->tr_context, Req->tr_status,
  294. Req->tr_status == TDI_SUCCESS
  295. ? CurrentSend->tsr_size : 0);
  296. FreeSendReq(CurrentSend);
  297. DoRcvComplete = TRUE;
  298. }
  299. CurrentSend = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, TempQ, tr_q),
  300. tsr_req);
  301. i++;
  302. }
  303. }
  304. FreeTCPHeader(BufferChain);
  305. if (DoRcvComplete) {
  306. TCPRcvComplete();
  307. }
  308. }
  309. //* RcvWin - Figure out the receive window to offer in an ack.
  310. //
  311. // A routine to figure out what window to offer on a connection. We
  312. // take into account SWS avoidance, what the default connection window is,
  313. // and what the last window we offered is.
  314. //
  315. // Input: WinTCB - TCB on which to perform calculations.
  316. //
  317. // Returns: Window to be offered.
  318. //
  319. uint
  320. RcvWin(TCB * WinTCB)
  321. {
  322. int CouldOffer; // The window size we could offer.
  323. CTEStructAssert(WinTCB, tcb);
  324. CheckRBList(WinTCB->tcb_pendhead, WinTCB->tcb_pendingcnt);
  325. ASSERT(WinTCB->tcb_rcvwin >= 0);
  326. CouldOffer = WinTCB->tcb_defaultwin - WinTCB->tcb_pendingcnt;
  327. ASSERT(CouldOffer >= 0);
  328. ASSERT(CouldOffer >= WinTCB->tcb_rcvwin);
  329. if ((CouldOffer - WinTCB->tcb_rcvwin) >=
  330. (int)MIN(WinTCB->tcb_defaultwin / 2, WinTCB->tcb_mss))
  331. WinTCB->tcb_rcvwin = CouldOffer;
  332. return WinTCB->tcb_rcvwin;
  333. }
  334. //* SendSYNOnSynTCB - Send a SYN segment for syntcb
  335. //
  336. // This is called during connection establishment time to send a SYN
  337. // segment to the peer. We get a buffer if we can, and then fill
  338. // it in. There's a tricky part here where we have to build the MSS
  339. // option in the header - we find the MSS by finding the MSS offered
  340. // by the net for the local address. After that, we send it.
  341. //
  342. // Input: SYNTcb - TCB from which SYN is to be sent.
  343. //
  344. // Returns: Nothing.
  345. //
  346. void
  347. SendSYNOnSynTCB(SYNTCB * SYNTcb, CTELockHandle TCBHandle)
  348. {
  349. PNDIS_BUFFER HeaderBuffer;
  350. TCPHeader *SYNHeader;
  351. uchar *OptPtr;
  352. IP_STATUS SendStatus;
  353. ushort OptSize = 0, HdrSize = 0, rfc1323opts = 0;
  354. BOOLEAN SackOpt = FALSE;
  355. IPOptInfo OptInfo;
  356. uint phxsum;
  357. CTEStructAssert(SYNTcb, syntcb);
  358. HeaderBuffer = GetTCPHeaderAtDpcLevel(&SYNHeader);
  359. // Go ahead and set the retransmission timer now, in case we didn't get a
  360. // buffer. In the future we might want to queue the connection for
  361. // when we free a buffer.
  362. //initialize send state
  363. SYNTcb->syntcb_senduna = SYNTcb->syntcb_sendnext;
  364. SYNTcb->syntcb_sendmax = SYNTcb->syntcb_sendnext;
  365. phxsum = PHXSUM(SYNTcb->syntcb_saddr, SYNTcb->syntcb_daddr,
  366. PROTOCOL_TCP, 0);
  367. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Sendsynonsyn %x\n",SYNTcb));
  368. START_TCB_TIMER(SYNTcb->syntcb_rexmittimer, SYNTcb->syntcb_rexmit);
  369. if (HeaderBuffer != NULL) {
  370. ushort TempWin;
  371. ushort MSS;
  372. uchar FoundMSS;
  373. SYNHeader = (TCPHeader *) ((PUCHAR)SYNHeader + LocalNetInfo.ipi_hsize);
  374. NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
  375. if (rfc1323opts & TCP_FLAG_WS) {
  376. OptSize += WS_OPT_SIZE + 1; // 1 NOP for alignment
  377. }
  378. if (rfc1323opts & TCP_FLAG_TS) {
  379. OptSize += TS_OPT_SIZE + 2; // 2 NOPs for alignment
  380. }
  381. if (SYNTcb->syntcb_tcpopts & TCP_FLAG_SACK){
  382. SackOpt = TRUE;
  383. OptSize += 4; // 2 NOPS, SACK kind and length field
  384. }
  385. NdisAdjustBufferLength(HeaderBuffer,
  386. sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize);
  387. SYNHeader->tcp_src = SYNTcb->syntcb_sport;
  388. SYNHeader->tcp_dest = SYNTcb->syntcb_dport;
  389. SYNHeader->tcp_seq = net_long(SYNTcb->syntcb_sendnext);
  390. SYNTcb->syntcb_sendnext++;
  391. if (SEQ_GT(SYNTcb->syntcb_sendnext, SYNTcb->syntcb_sendmax)) {
  392. TCPSIncrementOutSegCount();
  393. SYNTcb->syntcb_sendmax = SYNTcb->syntcb_sendnext;
  394. } else
  395. TStats.ts_retranssegs++;
  396. SYNHeader->tcp_ack = net_long(SYNTcb->syntcb_rcvnext);
  397. // Reuse OPt size for header size determination
  398. // default is MSS amd tcp header size
  399. HdrSize = 6;
  400. // set size field to reflect TS and WND scale option
  401. // tcp header + windowscale + Timestamp + pad
  402. if (rfc1323opts & TCP_FLAG_WS) {
  403. // WS: Add one more long word
  404. HdrSize += 1;
  405. }
  406. if (rfc1323opts & TCP_FLAG_TS) {
  407. // TS: Add 3 more long words
  408. HdrSize += 3;
  409. }
  410. if (SackOpt) {
  411. // SACK: Add 1 more long word
  412. HdrSize += 1;
  413. }
  414. SYNHeader->tcp_flags =
  415. MAKE_TCP_FLAGS(HdrSize, TCP_FLAG_SYN | TCP_FLAG_ACK);
  416. //
  417. // if this is the second time we are trying to send the SYN-ACK,
  418. // increment the count of retried half-connections
  419. //
  420. if (SynAttackProtect &&
  421. (SYNTcb->syntcb_rexmitcnt ==
  422. ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT)) {
  423. CTEInterlockedAddUlong(&TCPHalfOpenRetried, 1, &SynAttLock.Lock);
  424. }
  425. // Need to do this check whenever TCPHalfOpenRetried is incremented..
  426. if( (TCPHalfOpen >= TCPMaxHalfOpen) &&
  427. (TCPHalfOpenRetried >= TCPMaxHalfOpenRetried) &&
  428. (MaxConnectResponseRexmitCountTmp == MAX_CONNECT_RESPONSE_REXMIT_CNT))
  429. {
  430. MaxConnectResponseRexmitCountTmp = ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT;
  431. }
  432. SYNTcb->syntcb_lastack = SYNTcb->syntcb_rcvnext;
  433. TempWin = (ushort) (SYNTcb->syntcb_rcvwin >> SYNTcb->syntcb_rcvwinscale);
  434. SYNHeader->tcp_window = net_short(TempWin);
  435. SYNHeader->tcp_xsum = 0;
  436. OptPtr = (uchar *) (SYNHeader + 1);
  437. FoundMSS = (*LocalNetInfo.ipi_getlocalmtu) (SYNTcb->syntcb_saddr, &MSS);
  438. if (!FoundMSS) {
  439. CTEFreeLock(&SYNTcb->syntcb_lock, TCBHandle);
  440. FreeTCPHeader(HeaderBuffer);
  441. return;
  442. }
  443. MSS -= sizeof(TCPHeader);
  444. SYNTcb->syntcb_mss = MSS;
  445. *OptPtr++ = TCP_OPT_MSS;
  446. *OptPtr++ = MSS_OPT_SIZE;
  447. **(ushort **) & OptPtr = net_short(MSS);
  448. OptPtr++;
  449. OptPtr++;
  450. if (rfc1323opts & TCP_FLAG_WS) {
  451. // Fill in the WS option headers and value
  452. *OptPtr++ = TCP_OPT_NOP;
  453. *OptPtr++ = TCP_OPT_WS;
  454. *OptPtr++ = WS_OPT_SIZE;
  455. //Initial window scale factor
  456. *OptPtr++ = (uchar) SYNTcb->syntcb_rcvwinscale;
  457. }
  458. if (rfc1323opts & TCP_FLAG_TS) {
  459. //Start loading time stamp option header and value
  460. *OptPtr++ = TCP_OPT_NOP;
  461. *OptPtr++ = TCP_OPT_NOP;
  462. *OptPtr++ = TCP_OPT_TS;
  463. *OptPtr++ = TS_OPT_SIZE;
  464. // Initialize TS value TSval
  465. *(long *)OptPtr = 0;
  466. OptPtr += 4;
  467. //Initialize TS Echo Reply TSecr
  468. *(long *)OptPtr = 0;
  469. OptPtr += 4;
  470. }
  471. if (SackOpt) {
  472. // Initialize with SACK_PERMITTED option
  473. *(long *)OptPtr = net_long(0x01010402);
  474. IF_TCPDBG(TCP_DEBUG_SACK) {
  475. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Sending SACK_OPT %x\n", SYNTcb));
  476. }
  477. }
  478. SYNTcb->syntcb_refcnt++;
  479. //Account for Options.
  480. (*LocalNetInfo.ipi_initopts) (&OptInfo);
  481. SYNHeader->tcp_xsum =
  482. ~XsumSendChain(phxsum +
  483. (uint)net_short(sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize),
  484. HeaderBuffer);
  485. //ClassifyPacket(SYNTcb);
  486. CTEFreeLock(&SYNTcb->syntcb_lock, TCBHandle);
  487. SendStatus =
  488. (*LocalNetInfo.ipi_xmit)(TCPProtInfo, NULL, HeaderBuffer,
  489. sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize,
  490. SYNTcb->syntcb_daddr,
  491. SYNTcb->syntcb_saddr,
  492. &OptInfo,
  493. NULL,
  494. PROTOCOL_TCP,
  495. NULL);
  496. if (SendStatus != IP_PENDING) {
  497. FreeTCPHeader(HeaderBuffer);
  498. }
  499. CTEGetLock(&SYNTcb->syntcb_lock, &TCBHandle);
  500. DerefSynTCB(SYNTcb, TCBHandle);
  501. } else {
  502. SYNTcb->syntcb_sendnext++;
  503. if (SEQ_GT(SYNTcb->syntcb_sendnext, SYNTcb->syntcb_sendmax))
  504. SYNTcb->syntcb_sendmax = SYNTcb->syntcb_sendnext;
  505. CTEFreeLock(&SYNTcb->syntcb_lock, TCBHandle);
  506. return;
  507. }
  508. }
  509. //* SendSYN - Send a SYN segment.
  510. //
  511. // This is called during connection establishment time to send a SYN
  512. // segment to the peer. We get a buffer if we can, and then fill
  513. // it in. There's a tricky part here where we have to build the MSS
  514. // option in the header - we find the MSS by finding the MSS offered
  515. // by the net for the local address. After that, we send it.
  516. //
  517. // Input: SYNTcb - TCB from which SYN is to be sent.
  518. // TCBHandle - Handle for lock on TCB.
  519. //
  520. // Returns: Nothing.
  521. //
  522. void
  523. SendSYN(TCB * SYNTcb, CTELockHandle TCBHandle)
  524. {
  525. PNDIS_BUFFER HeaderBuffer;
  526. TCPHeader *SYNHeader;
  527. uchar *OptPtr;
  528. IP_STATUS SendStatus;
  529. ushort OptSize = 0, HdrSize = 0, rfc1323opts = 0;
  530. BOOLEAN SackOpt = FALSE;
  531. CTEStructAssert(SYNTcb, tcb);
  532. HeaderBuffer = GetTCPHeaderAtDpcLevel(&SYNHeader);
  533. // Go ahead and set the retransmission timer now, in case we didn't get a
  534. // buffer. In the future we might want to queue the connection for
  535. // when we free a buffer.
  536. START_TCB_TIMER_R(SYNTcb, RXMIT_TIMER, SYNTcb->tcb_rexmit);
  537. if (HeaderBuffer != NULL) {
  538. ushort TempWin;
  539. ushort MSS;
  540. uchar FoundMSS;
  541. SYNHeader = (TCPHeader *) ((PUCHAR)SYNHeader + LocalNetInfo.ipi_hsize);
  542. NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
  543. // If we are doing active open, check if we are configured to do
  544. // window scaling and time stamp options
  545. if (((TcpHostSendOpts & TCP_FLAG_WS) &&
  546. (SYNTcb->tcb_state == TCB_SYN_SENT)) ||
  547. (SYNTcb->tcb_tcpopts & TCP_FLAG_WS)) {
  548. rfc1323opts |= TCP_FLAG_WS;
  549. IF_TCPDBG(TCP_DEBUG_1323) {
  550. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Selected WS option TCB %x\n", SYNTcb));
  551. }
  552. }
  553. if (((TcpHostSendOpts & TCP_FLAG_TS) &&
  554. (SYNTcb->tcb_state == TCB_SYN_SENT)) ||
  555. (SYNTcb->tcb_tcpopts & TCP_FLAG_TS)) {
  556. IF_TCPDBG(TCP_DEBUG_1323) {
  557. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Selected TS option TCB %x\n", SYNTcb));
  558. }
  559. rfc1323opts |= TCP_FLAG_TS;
  560. }
  561. FoundMSS = (*LocalNetInfo.ipi_getlocalmtu) (SYNTcb->tcb_saddr, &MSS);
  562. if (!FoundMSS) {
  563. FreeTCPHeader(HeaderBuffer);
  564. goto SendError;
  565. }
  566. MSS -= sizeof(TCPHeader);
  567. if (SYNTcb->tcb_rce && !(DefaultRcvWin || SYNTcb->tcb_rce->rce_TcpWindowSize)) {
  568. if (SYNTcb->tcb_rce->rce_mediaspeed < 100000) {
  569. SYNTcb->tcb_rcvwin = MSS*(8760/MSS);
  570. SYNTcb->tcb_defaultwin = SYNTcb->tcb_rcvwin;
  571. rfc1323opts = 0;
  572. } else if (SYNTcb->tcb_rce->rce_mediaspeed >= 100000000) {
  573. //For Gigabit, window size needs to be 64K
  574. //This will be adjusted based on MSS later on.
  575. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  576. "SendSyn: Gigabit media speed, TCB %x %x\n", SYNTcb,SYNTcb->tcb_rcvwin));
  577. SYNTcb->tcb_rcvwin = MSS*(65535/MSS);
  578. SYNTcb->tcb_defaultwin = SYNTcb->tcb_rcvwin;
  579. }
  580. }
  581. if (rfc1323opts & TCP_FLAG_WS) {
  582. OptSize += WS_OPT_SIZE + 1; // 1 NOP for alignment
  583. }
  584. if (rfc1323opts & TCP_FLAG_TS) {
  585. OptSize += TS_OPT_SIZE + 2; // 2 NOPs for alignment
  586. }
  587. if ((SYNTcb->tcb_tcpopts & TCP_FLAG_SACK) ||
  588. ((SYNTcb->tcb_state == TCB_SYN_SENT) &&
  589. (TcpHostOpts & TCP_FLAG_SACK))) {
  590. SackOpt = TRUE;
  591. OptSize += 4; // 2 NOPS, SACK kind and length field
  592. }
  593. NdisAdjustBufferLength(HeaderBuffer,
  594. sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize);
  595. SYNHeader->tcp_src = SYNTcb->tcb_sport;
  596. SYNHeader->tcp_dest = SYNTcb->tcb_dport;
  597. SYNHeader->tcp_seq = net_long(SYNTcb->tcb_sendnext);
  598. SYNTcb->tcb_sendnext++;
  599. if (SEQ_GT(SYNTcb->tcb_sendnext, SYNTcb->tcb_sendmax)) {
  600. TCPSIncrementOutSegCount();
  601. SYNTcb->tcb_sendmax = SYNTcb->tcb_sendnext;
  602. } else
  603. TStats.ts_retranssegs++;
  604. SYNHeader->tcp_ack = net_long(SYNTcb->tcb_rcvnext);
  605. // Reuse OPt size for header size determination
  606. // default is MSS amd tcp header size
  607. HdrSize = 6;
  608. // set size field to reflect TS and WND scale option
  609. // tcp header + windowscale + Timestamp + pad
  610. if (rfc1323opts & TCP_FLAG_WS) {
  611. // WS: Add one more long word
  612. HdrSize += 1;
  613. }
  614. if (rfc1323opts & TCP_FLAG_TS) {
  615. // TS: Add 3 more long words
  616. HdrSize += 3;
  617. }
  618. if (SackOpt) {
  619. // SACK: Add 1 more long word
  620. HdrSize += 1;
  621. }
  622. if (SYNTcb->tcb_state == TCB_SYN_RCVD) {
  623. SYNHeader->tcp_flags =
  624. MAKE_TCP_FLAGS(HdrSize, TCP_FLAG_SYN | TCP_FLAG_ACK);
  625. //
  626. // if this is the second time we are trying to send the SYN-ACK,
  627. // increment the count of retried half-connections
  628. //
  629. if (SynAttackProtect &&
  630. (SYNTcb->tcb_rexmitcnt ==
  631. ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT)) {
  632. CTEInterlockedAddUlong(&TCPHalfOpenRetried, 1, &SynAttLock.Lock);
  633. }
  634. // Need to do this check whenever TCPHalfOpenRetried is incremented..
  635. if( (TCPHalfOpen >= TCPMaxHalfOpen) &&
  636. (TCPHalfOpenRetried >= TCPMaxHalfOpenRetried) &&
  637. (MaxConnectResponseRexmitCountTmp == MAX_CONNECT_RESPONSE_REXMIT_CNT))
  638. {
  639. MaxConnectResponseRexmitCountTmp = ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT;
  640. }
  641. } else {
  642. SYNHeader->tcp_flags = MAKE_TCP_FLAGS(HdrSize, TCP_FLAG_SYN);
  643. }
  644. SYNTcb->tcb_lastack = SYNTcb->tcb_rcvnext;
  645. if (SYNTcb->tcb_state == TCB_SYN_RCVD)
  646. TempWin = (ushort) (SYNTcb->tcb_rcvwin >> SYNTcb->tcb_rcvwinscale);
  647. else
  648. TempWin = (ushort) SYNTcb->tcb_rcvwin;
  649. SYNHeader->tcp_window = net_short(TempWin);
  650. SYNHeader->tcp_xsum = 0;
  651. OptPtr = (uchar *) (SYNHeader + 1);
  652. *OptPtr++ = TCP_OPT_MSS;
  653. *OptPtr++ = MSS_OPT_SIZE;
  654. **(ushort **) & OptPtr = net_short(MSS);
  655. OptPtr++;
  656. OptPtr++;
  657. if (rfc1323opts & TCP_FLAG_WS) {
  658. // Fill in the WS option headers and value
  659. *OptPtr++ = TCP_OPT_NOP;
  660. *OptPtr++ = TCP_OPT_WS;
  661. *OptPtr++ = WS_OPT_SIZE;
  662. //Initial window scale factor
  663. *OptPtr++ = (uchar) SYNTcb->tcb_rcvwinscale;
  664. }
  665. if (rfc1323opts & TCP_FLAG_TS) {
  666. //Start loading time stamp option header and value
  667. *OptPtr++ = TCP_OPT_NOP;
  668. *OptPtr++ = TCP_OPT_NOP;
  669. *OptPtr++ = TCP_OPT_TS;
  670. *OptPtr++ = TS_OPT_SIZE;
  671. // Initialize TS value TSval
  672. *(long *)OptPtr = 0;
  673. OptPtr += 4;
  674. //Initialize TS Echo Reply TSecr
  675. *(long *)OptPtr = 0;
  676. OptPtr += 4;
  677. }
  678. if (SackOpt) {
  679. // Initialize with SACK_PERMITTED option
  680. *(long *)OptPtr = net_long(0x01010402);
  681. IF_TCPDBG(TCP_DEBUG_SACK) {
  682. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Sending SACK_OPT %x\n", SYNTcb));
  683. }
  684. }
  685. REFERENCE_TCB(SYNTcb);
  686. //Account for Options.
  687. SYNTcb->tcb_opt.ioi_TcpChksum = 0;
  688. SYNHeader->tcp_xsum =
  689. ~XsumSendChain(SYNTcb->tcb_phxsum +
  690. (uint)net_short(sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize),
  691. HeaderBuffer);
  692. ClassifyPacket(SYNTcb);
  693. CTEFreeLock(&SYNTcb->tcb_lock, TCBHandle);
  694. SendStatus =
  695. (*LocalNetInfo.ipi_xmit)(TCPProtInfo, NULL, HeaderBuffer,
  696. sizeof(TCPHeader) + MSS_OPT_SIZE + OptSize,
  697. SYNTcb->tcb_daddr,
  698. SYNTcb->tcb_saddr,
  699. &SYNTcb->tcb_opt,
  700. SYNTcb->tcb_rce,
  701. PROTOCOL_TCP,
  702. NULL);
  703. SYNTcb->tcb_error = SendStatus;
  704. if (SendStatus != IP_PENDING) {
  705. FreeTCPHeader(HeaderBuffer);
  706. }
  707. CTEGetLock(&SYNTcb->tcb_lock, &TCBHandle);
  708. DerefTCB(SYNTcb, TCBHandle);
  709. } else {
  710. SendError:
  711. SYNTcb->tcb_sendnext++;
  712. if (SEQ_GT(SYNTcb->tcb_sendnext, SYNTcb->tcb_sendmax))
  713. SYNTcb->tcb_sendmax = SYNTcb->tcb_sendnext;
  714. CTEFreeLock(&SYNTcb->tcb_lock, TCBHandle);
  715. return;
  716. }
  717. }
  718. //* SendKA - Send a keep alive segment.
  719. //
  720. // This is called when we want to send a keep alive.
  721. //
  722. // Input: KATcb - TCB from which keep alive is to be sent.
  723. // Handle - Handle for lock on TCB.
  724. //
  725. // Returns: Nothing.
  726. //
  727. void
  728. SendKA(TCB * KATcb, CTELockHandle Handle)
  729. {
  730. PNDIS_BUFFER HeaderBuffer;
  731. TCPHeader *Header;
  732. IP_STATUS SendStatus;
  733. CTEStructAssert(KATcb, tcb);
  734. HeaderBuffer = GetTCPHeaderAtDpcLevel(&Header);
  735. if (HeaderBuffer != NULL) {
  736. ushort TempWin;
  737. SeqNum TempSeq;
  738. Header = (TCPHeader *) ((PUCHAR) Header + LocalNetInfo.ipi_hsize);
  739. NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
  740. NdisAdjustBufferLength(HeaderBuffer, sizeof(TCPHeader) + 1);
  741. Header->tcp_src = KATcb->tcb_sport;
  742. Header->tcp_dest = KATcb->tcb_dport;
  743. TempSeq = KATcb->tcb_senduna - 1;
  744. Header->tcp_seq = net_long(TempSeq);
  745. TStats.ts_retranssegs++;
  746. Header->tcp_ack = net_long(KATcb->tcb_rcvnext);
  747. Header->tcp_flags = MAKE_TCP_FLAGS(5, TCP_FLAG_ACK);
  748. // We need to scale the rcv window
  749. // Use temprary variable to workaround truncation
  750. // caused by net_short
  751. TempWin = (ushort) (RcvWin(KATcb) >> KATcb->tcb_rcvwinscale);
  752. Header->tcp_window = net_short(TempWin);
  753. KATcb->tcb_lastack = KATcb->tcb_rcvnext;
  754. Header->tcp_xsum = 0;
  755. KATcb->tcb_opt.ioi_TcpChksum = 0;
  756. Header->tcp_xsum =
  757. ~XsumSendChain(KATcb->tcb_phxsum +
  758. (uint)net_short(sizeof(TCPHeader) + 1),
  759. HeaderBuffer);
  760. KATcb->tcb_kacount++;
  761. ClassifyPacket(KATcb);
  762. CTEFreeLock(&KATcb->tcb_lock, Handle);
  763. SendStatus = (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  764. NULL,
  765. HeaderBuffer,
  766. sizeof(TCPHeader) + 1,
  767. KATcb->tcb_daddr,
  768. KATcb->tcb_saddr,
  769. &KATcb->tcb_opt,
  770. KATcb->tcb_rce,
  771. PROTOCOL_TCP,
  772. NULL);
  773. if (SendStatus != IP_PENDING) {
  774. FreeTCPHeader(HeaderBuffer);
  775. }
  776. } else {
  777. CTEFreeLock(&KATcb->tcb_lock, Handle);
  778. }
  779. }
  780. //* SendACK - Send an ACK segment.
  781. //
  782. // This is called whenever we need to send an ACK for some reason. Nothing
  783. // fancy, we just do it.
  784. //
  785. // Input: ACKTcb - TCB from which ACK is to be sent.
  786. //
  787. // Returns: Nothing.
  788. //
  789. void
  790. SendACK(TCB * ACKTcb)
  791. {
  792. PNDIS_BUFFER HeaderBuffer;
  793. TCPHeader *ACKHeader;
  794. IP_STATUS SendStatus;
  795. CTELockHandle TCBHandle;
  796. SeqNum SendNext;
  797. ushort Size, SackLength = 0, i, hdrlen = 5;
  798. ulong *ts_opt;
  799. BOOLEAN HWChksum = FALSE;
  800. CTEStructAssert(ACKTcb, tcb);
  801. HeaderBuffer = GetTCPHeader(&ACKHeader);
  802. if (HeaderBuffer != NULL) {
  803. ushort TempWin;
  804. ushort Size;
  805. ACKHeader = (TCPHeader *) ((PUCHAR) ACKHeader + LocalNetInfo.ipi_hsize);
  806. CTEGetLock(&ACKTcb->tcb_lock, &TCBHandle);
  807. // Allow room for filling time stamp option.
  808. // Note that it is 12 bytes and will never ever change
  809. if (ACKTcb->tcb_tcpopts & TCP_FLAG_TS) {
  810. NdisAdjustBufferLength(HeaderBuffer,
  811. sizeof(TCPHeader) + ALIGNED_TS_OPT_SIZE);
  812. // Header length is multiple of 32bits
  813. hdrlen = 5 + 3; // standard header size +
  814. // header size requirement for TS option
  815. ACKTcb->tcb_lastack = ACKTcb->tcb_rcvnext;
  816. }
  817. if ((ACKTcb->tcb_tcpopts & TCP_FLAG_SACK) &&
  818. ACKTcb->tcb_SackBlock &&
  819. (ACKTcb->tcb_SackBlock->Mask[0] == 1)) {
  820. SackLength++;
  821. for (i = 1; i < 3; i++) {
  822. if (ACKTcb->tcb_SackBlock->Mask[i] == 1)
  823. SackLength++;
  824. }
  825. IF_TCPDBG(TCP_DEBUG_SACK) {
  826. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Sending SACKs!! %x %x\n", ACKTcb, SackLength));
  827. }
  828. NdisAdjustBufferLength(HeaderBuffer,
  829. NdisBufferLength(HeaderBuffer) + SackLength * 8 + 4);
  830. // Sack block is of 2 long words (8 bytes) and 4 bytes
  831. // is for Sack option header.
  832. hdrlen += ((SackLength * 8 + 4) >> 2);
  833. }
  834. NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
  835. ACKHeader->tcp_src = ACKTcb->tcb_sport;
  836. ACKHeader->tcp_dest = ACKTcb->tcb_dport;
  837. ACKHeader->tcp_ack = net_long(ACKTcb->tcb_rcvnext);
  838. // If the remote peer is advertising a window of zero, we need to
  839. // send this ack with a seq. number of his rcv_next (which in that case
  840. // should be our senduna). We have code here ifdef'd out that makes
  841. // sure that we don't send outside the RWE, but this doesn't work. We
  842. // need to be able to send a pure ACK exactly at the RWE.
  843. if (ACKTcb->tcb_sendwin != 0) {
  844. SeqNum MaxValidSeq;
  845. SendNext = ACKTcb->tcb_sendnext;
  846. } else
  847. SendNext = ACKTcb->tcb_senduna;
  848. if ((ACKTcb->tcb_flags & FIN_SENT) &&
  849. SEQ_EQ(SendNext, ACKTcb->tcb_sendmax - 1)) {
  850. ACKHeader->tcp_flags = MAKE_TCP_FLAGS(hdrlen,
  851. TCP_FLAG_FIN | TCP_FLAG_ACK);
  852. } else
  853. ACKHeader->tcp_flags = MAKE_TCP_FLAGS(hdrlen, TCP_FLAG_ACK);
  854. ACKHeader->tcp_seq = net_long(SendNext);
  855. TempWin = (ushort) RcvWin(ACKTcb);
  856. ACKHeader->tcp_window = net_short(TempWin);
  857. ACKHeader->tcp_xsum = 0;
  858. Size = sizeof(TCPHeader);
  859. {
  860. // Point to a place beyond tcp header
  861. (uchar *) ts_opt = (uchar *) ACKHeader + 20;
  862. if (ACKTcb->tcb_tcpopts & TCP_FLAG_TS) {
  863. // Form time stamp header with 2 NOPs for alignment
  864. *ts_opt++ = net_long(0x0101080A);
  865. *ts_opt++ = net_long(TCPTime);
  866. *ts_opt++ = net_long(ACKTcb->tcb_tsrecent);
  867. // Add 12 more bytes to the size to account for TS
  868. Size += ALIGNED_TS_OPT_SIZE;
  869. }
  870. if ((ACKTcb->tcb_tcpopts & TCP_FLAG_SACK) &&
  871. ACKTcb->tcb_SackBlock &&
  872. (ACKTcb->tcb_SackBlock->Mask[0] == 1)) {
  873. *(ushort *) ts_opt = 0x0101;
  874. (uchar *) ts_opt += 2;
  875. *(uchar *) ts_opt = (uchar) 0x05;
  876. (uchar *) ts_opt += 1;
  877. *(uchar *) ts_opt = (uchar) SackLength *8 + 2;
  878. (uchar *) ts_opt += 1;
  879. // Sack option header + the block times times sack length!
  880. Size += 4 + SackLength * 8;
  881. for (i = 0; i < 3; i++) {
  882. if (ACKTcb->tcb_SackBlock->Mask[i] != 0) {
  883. *ts_opt++ =
  884. net_long(ACKTcb->tcb_SackBlock->Block[i].begin);
  885. *ts_opt++ =
  886. net_long(ACKTcb->tcb_SackBlock->Block[i].end);
  887. }
  888. }
  889. }
  890. // Use temprary variable to workaround truncation
  891. // caused by net_short.
  892. TempWin = (ushort) (RcvWin(ACKTcb) >> ACKTcb->tcb_rcvwinscale);
  893. ACKHeader->tcp_window = net_short(TempWin);
  894. if (ACKTcb->tcb_rce &&
  895. (ACKTcb->tcb_rce->rce_OffloadFlags &
  896. TCP_XMT_CHECKSUM_OFFLOAD)) {
  897. HWChksum = TRUE;
  898. if ((Size > sizeof(TCPHeader)) &&
  899. !(ACKTcb->tcb_rce->rce_OffloadFlags &
  900. TCP_CHECKSUM_OPT_OFFLOAD)) {
  901. HWChksum = FALSE;
  902. }
  903. }
  904. if (HWChksum) {
  905. uint PHXsum = ACKTcb->tcb_phxsum + (uint) net_short(Size);
  906. PHXsum = (((PHXsum << 16) | (PHXsum >> 16)) + PHXsum) >> 16;
  907. ACKHeader->tcp_xsum = (ushort) PHXsum;
  908. ACKTcb->tcb_opt.ioi_TcpChksum = 1;
  909. #if DBG
  910. DbgTcpSendHwChksumCount++;
  911. #endif
  912. } else {
  913. ACKHeader->tcp_xsum =
  914. ~XsumSendChain(ACKTcb->tcb_phxsum +
  915. (uint)net_short(Size), HeaderBuffer);
  916. ACKTcb->tcb_opt.ioi_TcpChksum = 0;
  917. }
  918. }
  919. STOP_TCB_TIMER_R(ACKTcb, DELACK_TIMER);
  920. ACKTcb->tcb_rcvdsegs = 0;
  921. ACKTcb->tcb_flags &= ~(NEED_ACK | ACK_DELAYED);
  922. ClassifyPacket(ACKTcb);
  923. CTEFreeLock(&ACKTcb->tcb_lock, TCBHandle);
  924. TCPSIncrementOutSegCount();
  925. if (ACKTcb->tcb_tcpopts) {
  926. SendStatus = (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  927. NULL,
  928. HeaderBuffer,
  929. Size,
  930. ACKTcb->tcb_daddr,
  931. ACKTcb->tcb_saddr,
  932. &ACKTcb->tcb_opt,
  933. ACKTcb->tcb_rce,
  934. PROTOCOL_TCP,
  935. NULL);
  936. } else {
  937. SendStatus = (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  938. NULL,
  939. HeaderBuffer,
  940. sizeof(TCPHeader),
  941. ACKTcb->tcb_daddr,
  942. ACKTcb->tcb_saddr,
  943. &ACKTcb->tcb_opt,
  944. ACKTcb->tcb_rce,
  945. PROTOCOL_TCP,
  946. NULL);
  947. }
  948. ACKTcb->tcb_error = SendStatus;
  949. if (SendStatus != IP_PENDING)
  950. FreeTCPHeader(HeaderBuffer);
  951. }
  952. return;
  953. }
  954. //* SendTWtcbACK- Send an ACK segment for a twtcb
  955. //
  956. //
  957. // Input: ACKTcb - TCB from which ACK is to be sent.
  958. //
  959. // Returns: Nothing.
  960. //
  961. void
  962. SendTWtcbACK(TWTCB *ACKTcb, uint Partition, CTELockHandle TCBHandle)
  963. {
  964. PNDIS_BUFFER HeaderBuffer;
  965. TCPHeader *ACKHeader;
  966. IP_STATUS SendStatus;
  967. SeqNum SendNext;
  968. ushort Size, SackLength = 0, i, hdrlen = 5;
  969. ulong *ts_opt;
  970. uint phxsum;
  971. CTEStructAssert(ACKTcb, twtcb);
  972. HeaderBuffer = GetTCPHeaderAtDpcLevel(&ACKHeader);
  973. if (HeaderBuffer != NULL) {
  974. ushort TempWin;
  975. ushort Size;
  976. IPOptInfo NewInfo;
  977. ACKHeader = (TCPHeader *)((PUCHAR)ACKHeader + LocalNetInfo.ipi_hsize);
  978. NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
  979. ACKHeader->tcp_src = ACKTcb->twtcb_sport;
  980. ACKHeader->tcp_dest = ACKTcb->twtcb_dport;
  981. ACKHeader->tcp_ack = net_long(ACKTcb->twtcb_rcvnext);
  982. SendNext = ACKTcb->twtcb_senduna; // should be same tcb_sendnext
  983. ACKHeader->tcp_flags = MAKE_TCP_FLAGS(hdrlen, TCP_FLAG_ACK);
  984. ACKHeader->tcp_seq = net_long(SendNext);
  985. //Window needs to be zero since we can not rcv anyway.
  986. ACKHeader->tcp_window = 0;
  987. Size = sizeof(TCPHeader);
  988. phxsum = PHXSUM(ACKTcb->twtcb_saddr, ACKTcb->twtcb_daddr,
  989. PROTOCOL_TCP, 0);
  990. ACKHeader->tcp_xsum = 0;
  991. ACKHeader->tcp_xsum =
  992. ~XsumSendChain(phxsum +
  993. (uint)net_short(Size), HeaderBuffer);
  994. //ACKTcb->tcb_opt.ioi_TcpChksum=0;
  995. CTEFreeLockFromDPC(&pTWTCBTableLock[Partition], TCBHandle);
  996. TCPSIncrementOutSegCount();
  997. (*LocalNetInfo.ipi_initopts) (&NewInfo);
  998. SendStatus =
  999. (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  1000. NULL,
  1001. HeaderBuffer,
  1002. sizeof(TCPHeader),
  1003. ACKTcb->twtcb_daddr,
  1004. ACKTcb->twtcb_saddr,
  1005. &NewInfo,
  1006. NULL,
  1007. PROTOCOL_TCP,
  1008. NULL);
  1009. if (SendStatus != IP_PENDING)
  1010. FreeTCPHeader(HeaderBuffer);
  1011. (*LocalNetInfo.ipi_freeopts) (&NewInfo);
  1012. } else {
  1013. CTEFreeLockFromDPC(&pTWTCBTableLock[Partition], TCBHandle);
  1014. }
  1015. }
  1016. //* SendRSTFromTCB - Send a RST from a TCB.
  1017. //
  1018. // This is called during close when we need to send a RST.
  1019. //
  1020. // Input: RSTTcb - TCB from which RST is to be sent.
  1021. // RCE - Optional RCE to be used in sending.
  1022. //
  1023. // Returns: Nothing.
  1024. //
  1025. void
  1026. SendRSTFromTCB(TCB * RSTTcb, RouteCacheEntry* RCE)
  1027. {
  1028. PNDIS_BUFFER HeaderBuffer;
  1029. TCPHeader *RSTHeader;
  1030. IP_STATUS SendStatus;
  1031. CTEStructAssert(RSTTcb, tcb);
  1032. ASSERT(RSTTcb->tcb_state == TCB_CLOSED);
  1033. HeaderBuffer = GetTCPHeader(&RSTHeader);
  1034. if (HeaderBuffer != NULL) {
  1035. SeqNum RSTSeq;
  1036. RSTHeader = (TCPHeader *) ((PUCHAR)RSTHeader + LocalNetInfo.ipi_hsize);
  1037. NDIS_BUFFER_LINKAGE(HeaderBuffer) = NULL;
  1038. RSTHeader->tcp_src = RSTTcb->tcb_sport;
  1039. RSTHeader->tcp_dest = RSTTcb->tcb_dport;
  1040. // If the remote peer has a window of 0, send with a seq. # equal
  1041. // to senduna so he'll accept it. Otherwise send with send max.
  1042. if (RSTTcb->tcb_sendwin != 0)
  1043. RSTSeq = RSTTcb->tcb_sendmax;
  1044. else
  1045. RSTSeq = RSTTcb->tcb_senduna;
  1046. RSTHeader->tcp_seq = net_long(RSTSeq);
  1047. RSTHeader->tcp_flags = MAKE_TCP_FLAGS(sizeof(TCPHeader) / sizeof(ulong),
  1048. TCP_FLAG_RST);
  1049. RSTHeader->tcp_window = 0;
  1050. RSTHeader->tcp_xsum = 0;
  1051. // Recompute pseudo checksum as this will
  1052. // not be valid when connection is disconnected
  1053. // in pre-accept case.
  1054. RSTHeader->tcp_xsum =
  1055. ~XsumSendChain(PHXSUM(RSTTcb->tcb_saddr,
  1056. RSTTcb->tcb_daddr,
  1057. PROTOCOL_TCP,
  1058. sizeof(TCPHeader)),
  1059. HeaderBuffer);
  1060. RSTTcb->tcb_opt.ioi_TcpChksum = 0;
  1061. TCPSIncrementOutSegCount();
  1062. TStats.ts_outrsts++;
  1063. SendStatus = (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  1064. NULL,
  1065. HeaderBuffer,
  1066. sizeof(TCPHeader),
  1067. RSTTcb->tcb_daddr,
  1068. RSTTcb->tcb_saddr,
  1069. &RSTTcb->tcb_opt,
  1070. RCE,
  1071. PROTOCOL_TCP,
  1072. NULL);
  1073. if (SendStatus != IP_PENDING)
  1074. FreeTCPHeader(HeaderBuffer);
  1075. }
  1076. return;
  1077. }
  1078. //* SendRSTFromHeader - Send a RST back, based on a header.
  1079. //
  1080. // Called when we need to send a RST, but don't necessarily have a TCB.
  1081. //
  1082. // Input: TCPH - TCP header to be RST.
  1083. // Length - Length of the incoming segment.
  1084. // Dest - Destination IP address for RST.
  1085. // Src - Source IP address for RST.
  1086. // OptInfo - IP Options to use on RST.
  1087. //
  1088. // Returns: Nothing.
  1089. //
  1090. void
  1091. SendRSTFromHeader(TCPHeader UNALIGNED * TCPH, uint Length, IPAddr Dest,
  1092. IPAddr Src, IPOptInfo * OptInfo)
  1093. {
  1094. PNDIS_BUFFER Buffer;
  1095. TCPHeader *RSTHdr;
  1096. IPOptInfo NewInfo;
  1097. IP_STATUS SendStatus;
  1098. if (TCPH->tcp_flags & TCP_FLAG_RST)
  1099. return;
  1100. Buffer = GetTCPHeader(&RSTHdr);
  1101. if (Buffer != NULL) {
  1102. // Got a buffer. Fill in the header so as to make it believable to
  1103. // the remote guy, and send it.
  1104. RSTHdr = (TCPHeader *) ((PUCHAR)RSTHdr + LocalNetInfo.ipi_hsize);
  1105. NDIS_BUFFER_LINKAGE(Buffer) = NULL;
  1106. if (TCPH->tcp_flags & TCP_FLAG_SYN)
  1107. Length++;
  1108. if (TCPH->tcp_flags & TCP_FLAG_FIN)
  1109. Length++;
  1110. if (TCPH->tcp_flags & TCP_FLAG_ACK) {
  1111. RSTHdr->tcp_seq = TCPH->tcp_ack;
  1112. RSTHdr->tcp_ack = TCPH->tcp_ack;
  1113. RSTHdr->tcp_flags =
  1114. MAKE_TCP_FLAGS(sizeof(TCPHeader) / sizeof(ulong), TCP_FLAG_RST);
  1115. } else {
  1116. SeqNum TempSeq;
  1117. RSTHdr->tcp_seq = 0;
  1118. TempSeq = net_long(TCPH->tcp_seq);
  1119. TempSeq += Length;
  1120. RSTHdr->tcp_ack = net_long(TempSeq);
  1121. RSTHdr->tcp_flags =
  1122. MAKE_TCP_FLAGS(sizeof(TCPHeader) / sizeof(ulong),
  1123. TCP_FLAG_RST | TCP_FLAG_ACK);
  1124. }
  1125. RSTHdr->tcp_window = 0;
  1126. RSTHdr->tcp_dest = TCPH->tcp_src;
  1127. RSTHdr->tcp_src = TCPH->tcp_dest;
  1128. RSTHdr->tcp_xsum = 0;
  1129. RSTHdr->tcp_xsum =
  1130. ~XsumSendChain(PHXSUM(Src, Dest, PROTOCOL_TCP, sizeof(TCPHeader)),
  1131. Buffer);
  1132. (*LocalNetInfo.ipi_initopts) (&NewInfo);
  1133. if (OptInfo->ioi_options != NULL)
  1134. (*LocalNetInfo.ipi_updateopts)(OptInfo, &NewInfo, Dest,
  1135. NULL_IP_ADDR);
  1136. TCPSIncrementOutSegCount();
  1137. TStats.ts_outrsts++;
  1138. SendStatus = (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  1139. NULL,
  1140. Buffer,
  1141. sizeof(TCPHeader),
  1142. Dest,
  1143. Src,
  1144. &NewInfo,
  1145. NULL,
  1146. PROTOCOL_TCP,
  1147. NULL);
  1148. if (SendStatus != IP_PENDING)
  1149. FreeTCPHeader(Buffer);
  1150. (*LocalNetInfo.ipi_freeopts) (&NewInfo);
  1151. }
  1152. }
  1153. //* GoToEstab - Transition to the established state.
  1154. //
  1155. // Called when we are going to the established state and need to finish up
  1156. // initializing things that couldn't be done until now. We assume the TCB
  1157. // lock is held by the caller on the TCB we're called with.
  1158. //
  1159. // Input: EstabTCB - TCB to transition.
  1160. //
  1161. // Returns: Nothing.
  1162. //
  1163. void
  1164. GoToEstab(TCB * EstabTCB)
  1165. {
  1166. uchar DType;
  1167. ushort MSS;
  1168. // Initialize our slow start and congestion control variables.
  1169. EstabTCB->tcb_cwin = 2 * EstabTCB->tcb_mss;
  1170. EstabTCB->tcb_ssthresh = 0xffffffff;
  1171. EstabTCB->tcb_state = TCB_ESTAB;
  1172. if (SynAttackProtect && !EstabTCB->tcb_rce) {
  1173. (*LocalNetInfo.ipi_openrce) (EstabTCB->tcb_daddr, EstabTCB->tcb_saddr,
  1174. &EstabTCB->tcb_rce, &DType, &MSS, &EstabTCB->tcb_opt);
  1175. }
  1176. // We're in established. We'll subtract one from slow count for this fact,
  1177. // and if the slowcount goes to 0 we'll move onto the fast path.
  1178. if (--(EstabTCB->tcb_slowcount) == 0)
  1179. EstabTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
  1180. TStats.ts_currestab++;
  1181. EstabTCB->tcb_flags &= ~ACTIVE_OPEN; // Turn off the active opening flag.
  1182. }
  1183. //* InitSendState - Initialize the send state of a connection.
  1184. //
  1185. // Called during connection establishment to initialize our send state.
  1186. // (In this case, this refers to all information we'll put on the wire as
  1187. // well as pure send state). We pick an ISS, set up a rexmit timer value,
  1188. // etc. We assume the tcb_lock is held on the TCB when we are called.
  1189. //
  1190. // Input: NewTCB - TCB to be set up.
  1191. //
  1192. // Returns: Nothing.
  1193. void
  1194. InitSendState(TCB * NewTCB)
  1195. {
  1196. CTEStructAssert(NewTCB, tcb);
  1197. ASSERT(NewTCB->tcb_sendnext != 0);
  1198. NewTCB->tcb_senduna = NewTCB->tcb_sendnext;
  1199. NewTCB->tcb_sendmax = NewTCB->tcb_sendnext;
  1200. NewTCB->tcb_error = IP_SUCCESS;
  1201. // Initialize pseudo-header xsum.
  1202. NewTCB->tcb_phxsum = PHXSUM(NewTCB->tcb_saddr, NewTCB->tcb_daddr,
  1203. PROTOCOL_TCP, 0);
  1204. // Initialize retransmit and delayed ack stuff.
  1205. NewTCB->tcb_rexmitcnt = 0;
  1206. NewTCB->tcb_rtt = 0;
  1207. NewTCB->tcb_smrtt = 0;
  1208. NewTCB->tcb_delta = MS_TO_TICKS(6000);
  1209. NewTCB->tcb_rexmit = MS_TO_TICKS(3000);
  1210. if (NewTCB->tcb_rce) {
  1211. if (NewTCB->tcb_rce->rce_TcpInitialRTT &&
  1212. NewTCB->tcb_rce->rce_TcpInitialRTT > 3000) {
  1213. NewTCB->tcb_delta =
  1214. MS_TO_TICKS(NewTCB->tcb_rce->rce_TcpInitialRTT * 2);
  1215. NewTCB->tcb_rexmit =
  1216. MS_TO_TICKS(NewTCB->tcb_rce->rce_TcpInitialRTT);
  1217. }
  1218. }
  1219. STOP_TCB_TIMER_R(NewTCB, RXMIT_TIMER);
  1220. STOP_TCB_TIMER_R(NewTCB, DELACK_TIMER);
  1221. }
  1222. //* TCPStatus - Handle a status indication.
  1223. //
  1224. // This is the TCP status handler, called by IP when a status event
  1225. // occurs. For most of these we do nothing. For certain severe status
  1226. // events we will mark the local address as invalid.
  1227. //
  1228. // Entry: StatusType - Type of status (NET or HW). NET status
  1229. // is usually caused by a received ICMP
  1230. // message. HW status indicate a HW
  1231. // problem.
  1232. // StatusCode - Code identifying IP_STATUS.
  1233. // OrigDest - If this is NET status, the original dest. of
  1234. // DG that triggered it.
  1235. // OrigSrc - " " " " " , the original src.
  1236. // Src - IP address of status originator (could be local
  1237. // or remote).
  1238. // Param - Additional information for status - i.e. the
  1239. // param field of an ICMP message.
  1240. // Data - Data pertaining to status - for NET status, this
  1241. // is the first 8 bytes of the original DG.
  1242. //
  1243. // Returns: Nothing
  1244. //
  1245. void
  1246. TCPStatus(uchar StatusType, IP_STATUS StatusCode, IPAddr OrigDest,
  1247. IPAddr OrigSrc, IPAddr Src, ulong Param, void *Data)
  1248. {
  1249. CTELockHandle TableHandle, TCBHandle;
  1250. TCB *StatusTCB;
  1251. TCPHeader UNALIGNED *Header = (TCPHeader UNALIGNED *) Data;
  1252. SeqNum DropSeq;
  1253. uint index;
  1254. // Handle NET status codes differently from HW status codes.
  1255. if (StatusType == IP_NET_STATUS) {
  1256. // It's a NET code. Find a matching TCB.
  1257. StatusTCB = FindTCB(OrigSrc, OrigDest, Header->tcp_dest,
  1258. Header->tcp_src, &TCBHandle, FALSE, &index);
  1259. if (StatusTCB != NULL) {
  1260. // Found one. Get the lock on it, and continue.
  1261. CTEStructAssert(StatusTCB, tcb);
  1262. // Make sure the TCB is in a state that is interesting.
  1263. if (StatusTCB->tcb_state == TCB_CLOSED ||
  1264. StatusTCB->tcb_state == TCB_TIME_WAIT ||
  1265. CLOSING(StatusTCB)) {
  1266. CTEFreeLock(&StatusTCB->tcb_lock, TCBHandle);
  1267. return;
  1268. }
  1269. switch (StatusCode) {
  1270. // Hard errors - Destination protocol unreachable. We treat
  1271. // these as fatal errors. Close the connection now.
  1272. case IP_DEST_PROT_UNREACHABLE:
  1273. StatusTCB->tcb_error = StatusCode;
  1274. REFERENCE_TCB(StatusTCB);
  1275. TryToCloseTCB(StatusTCB, TCB_CLOSE_UNREACH, TCBHandle);
  1276. RemoveTCBFromConn(StatusTCB);
  1277. NotifyOfDisc(StatusTCB, NULL,
  1278. MapIPError(StatusCode, TDI_DEST_UNREACHABLE));
  1279. CTEGetLock(&StatusTCB->tcb_lock, &TCBHandle);
  1280. DerefTCB(StatusTCB, TCBHandle);
  1281. return;
  1282. break;
  1283. // Soft errors. Save the error in case it time out.
  1284. case IP_DEST_NET_UNREACHABLE:
  1285. case IP_DEST_HOST_UNREACHABLE:
  1286. case IP_DEST_PORT_UNREACHABLE:
  1287. case IP_BAD_ROUTE:
  1288. case IP_TTL_EXPIRED_TRANSIT:
  1289. case IP_TTL_EXPIRED_REASSEM:
  1290. case IP_PARAM_PROBLEM:
  1291. StatusTCB->tcb_error = StatusCode;
  1292. break;
  1293. case IP_PACKET_TOO_BIG:
  1294. // icmp new MTU is in ich_param=1
  1295. Param = net_short(Param >> 16);
  1296. StatusTCB->tcb_error = StatusCode;
  1297. // Fall through mtu change code
  1298. case IP_SPEC_MTU_CHANGE:
  1299. // A TCP datagram has triggered an MTU change. Figure out
  1300. // which connection it is, and update him to retransmit the
  1301. // segment. The Param value is the new MTU. We'll need to
  1302. // retransmit if the new MTU is less than our existing MTU
  1303. // and the sequence of the dropped packet is less than our
  1304. // current send next.
  1305. Param = Param - (sizeof(TCPHeader) +
  1306. StatusTCB->tcb_opt.ioi_optlength + sizeof(IPHeader));
  1307. DropSeq = net_long(Header->tcp_seq);
  1308. if (*(ushort *) & Param <= StatusTCB->tcb_mss &&
  1309. (SEQ_GTE(DropSeq, StatusTCB->tcb_senduna) &&
  1310. SEQ_LT(DropSeq, StatusTCB->tcb_sendnext))) {
  1311. // Need to initiate a retranmsit.
  1312. ResetSendNext(StatusTCB, DropSeq);
  1313. // Set the congestion window to allow only one packet.
  1314. // This may prevent us from sending anything if we
  1315. // didn't just set sendnext to senduna. This is OK,
  1316. // we'll retransmit later, or send when we get an ack.
  1317. StatusTCB->tcb_cwin = Param;
  1318. DelayAction(StatusTCB, NEED_OUTPUT);
  1319. }
  1320. StatusTCB->tcb_mss =
  1321. (ushort) MIN(Param, (ulong) StatusTCB->tcb_remmss);
  1322. ASSERT(StatusTCB->tcb_mss > 0);
  1323. ValidateMSS(StatusTCB);
  1324. //
  1325. // Reset the Congestion Window if necessary
  1326. //
  1327. if (StatusTCB->tcb_cwin < StatusTCB->tcb_mss) {
  1328. StatusTCB->tcb_cwin = StatusTCB->tcb_mss;
  1329. //
  1330. // Make sure the slow start threshold is at least
  1331. // 2 segments
  1332. //
  1333. if (StatusTCB->tcb_ssthresh <
  1334. ((uint) StatusTCB->tcb_mss * 2)
  1335. ) {
  1336. StatusTCB->tcb_ssthresh = StatusTCB->tcb_mss * 2;
  1337. }
  1338. }
  1339. break;
  1340. // Source quench. This will cause us to reinitiate our
  1341. // slow start by resetting our congestion window and
  1342. // adjusting our slow start threshold.
  1343. case IP_SOURCE_QUENCH:
  1344. StatusTCB->tcb_ssthresh =
  1345. MAX(
  1346. MIN(
  1347. StatusTCB->tcb_cwin,
  1348. StatusTCB->tcb_sendwin
  1349. ) / 2,
  1350. (uint) StatusTCB->tcb_mss * 2
  1351. );
  1352. StatusTCB->tcb_cwin = StatusTCB->tcb_mss;
  1353. break;
  1354. default:
  1355. ASSERT(0);
  1356. break;
  1357. }
  1358. CTEFreeLock(&StatusTCB->tcb_lock, TCBHandle);
  1359. } else {
  1360. // Couldn't find a matching TCB. Just free the lock and return.
  1361. }
  1362. } else if (StatusType == IP_RECONFIG_STATUS) {
  1363. if (StatusCode == IP_RECONFIG_SECFLTR) {
  1364. ControlSecurityFiltering(Param);
  1365. }
  1366. } else {
  1367. uint NewMTU;
  1368. // 'Hardware' or 'global' status. Figure out what to do.
  1369. switch (StatusCode) {
  1370. case IP_ADDR_DELETED:
  1371. // Local address has gone away. OrigDest is the IPAddr which is
  1372. // gone.
  1373. //
  1374. // Delete any security filters associated with this address
  1375. //
  1376. DeleteProtocolSecurityFilter(OrigDest, PROTOCOL_TCP);
  1377. break;
  1378. case IP_ADDR_ADDED:
  1379. //
  1380. // An address has materialized. OrigDest identifies the address.
  1381. // Data is a handle to the IP configuration information for the
  1382. // interface on which the address is instantiated.
  1383. //
  1384. AddProtocolSecurityFilter(OrigDest, PROTOCOL_TCP,
  1385. (NDIS_HANDLE) Data);
  1386. break;
  1387. case IP_MTU_CHANGE:
  1388. NewMTU = Param - sizeof(TCPHeader);
  1389. TCBWalk(SetTCBMTU, &OrigDest, &OrigSrc, &NewMTU);
  1390. break;
  1391. default:
  1392. ASSERT(0);
  1393. break;
  1394. }
  1395. }
  1396. }
  1397. //* FillTCPHeader - Fill the TCP header in.
  1398. //
  1399. // A utility routine to fill in the TCP header.
  1400. //
  1401. // Input: SendTCB - TCB to fill from.
  1402. // Header - Header to fill into.
  1403. //
  1404. // Returns: Nothing.
  1405. //
  1406. void
  1407. FillTCPHeader(TCB * SendTCB, TCPHeader * Header)
  1408. {
  1409. ushort S;
  1410. ulong L;
  1411. Header->tcp_src = SendTCB->tcb_sport;
  1412. Header->tcp_dest = SendTCB->tcb_dport;
  1413. L = SendTCB->tcb_sendnext;
  1414. Header->tcp_seq = net_long(L);
  1415. L = SendTCB->tcb_rcvnext;
  1416. Header->tcp_ack = net_long(L);
  1417. Header->tcp_flags = 0x1050;
  1418. *(ulong *) & Header->tcp_xsum = 0;
  1419. if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
  1420. ulong *ts_opt;
  1421. (uchar *) ts_opt = (uchar *) Header + 20;
  1422. //ts_opt = ts_opt + sizeof(TCPHeader);
  1423. *ts_opt++ = net_long(0x0101080A);
  1424. *ts_opt++ = net_long(TCPTime);
  1425. *ts_opt = net_long(SendTCB->tcb_tsrecent);
  1426. // Now the header is 32 bytes!!
  1427. Header->tcp_flags = 0x1080;
  1428. }
  1429. S = (ushort) (RcvWin(SendTCB) >> SendTCB->tcb_rcvwinscale);
  1430. Header->tcp_window = net_short(S);
  1431. }
  1432. //* ClassifyPacket - Classifies packets for GPC flow.
  1433. //
  1434. //
  1435. // Input: SendTCB - TCB of data/control packet to classify.
  1436. //
  1437. // Returns: Nothing.
  1438. //
  1439. void
  1440. ClassifyPacket(
  1441. TCB *SendTCB
  1442. )
  1443. {
  1444. #if GPC
  1445. //
  1446. // clear the precedence bits and get ready to be set
  1447. // according to the service type
  1448. //
  1449. if (DisableUserTOSSetting)
  1450. SendTCB->tcb_opt.ioi_tos &= TOS_MASK;
  1451. if (SendTCB->tcb_rce && GPCcfInfo) {
  1452. ULONG ServiceType = 0;
  1453. GPC_STATUS status = STATUS_SUCCESS;
  1454. GPC_IP_PATTERN Pattern;
  1455. IF_TCPDBG(TCP_DEBUG_GPC)
  1456. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSend: Classifying packet TCP %x\n", SendTCB));
  1457. Pattern.SrcAddr = SendTCB->tcb_saddr;
  1458. Pattern.DstAddr = SendTCB->tcb_daddr;
  1459. Pattern.ProtocolId = PROTOCOL_TCP;
  1460. Pattern.gpcSrcPort = SendTCB->tcb_sport;
  1461. Pattern.gpcDstPort = SendTCB->tcb_dport;
  1462. if (SendTCB->tcb_GPCCachedRTE != (void *)SendTCB->tcb_rce->rce_rte) {
  1463. //
  1464. // first time we use this RTE, or it has been changed
  1465. // since the last send
  1466. //
  1467. if (GetIFAndLink(SendTCB->tcb_rce, &SendTCB->tcb_GPCCachedIF,
  1468. (IPAddr *) & SendTCB->tcb_GPCCachedLink) ==
  1469. STATUS_SUCCESS) {
  1470. SendTCB->tcb_GPCCachedRTE = (void *)SendTCB->tcb_rce->rce_rte;
  1471. }
  1472. //
  1473. // invaludate the classification handle
  1474. //
  1475. SendTCB->tcb_opt.ioi_GPCHandle = 0;
  1476. }
  1477. Pattern.InterfaceId.InterfaceId = SendTCB->tcb_GPCCachedIF;
  1478. Pattern.InterfaceId.LinkId = SendTCB->tcb_GPCCachedLink;
  1479. IF_TCPDBG(TCP_DEBUG_GPC)
  1480. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSend: IF=%x Link=%x\n",
  1481. Pattern.InterfaceId.InterfaceId,
  1482. Pattern.InterfaceId.LinkId));
  1483. if (!SendTCB->tcb_opt.ioi_GPCHandle) {
  1484. IF_TCPDBG(TCP_DEBUG_GPC)
  1485. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPsend: Classification Handle is NULL, getting one now.\n"));
  1486. status =
  1487. GpcEntries.GpcClassifyPatternHandler(
  1488. (GPC_HANDLE)hGpcClient[GPC_CF_QOS],
  1489. GPC_PROTOCOL_TEMPLATE_IP,
  1490. &Pattern,
  1491. NULL, // context
  1492. &SendTCB->tcb_opt.ioi_GPCHandle,
  1493. 0,
  1494. NULL,
  1495. FALSE);
  1496. }
  1497. // Only if QOS patterns exist, we get the TOS bits out.
  1498. if (NT_SUCCESS(status) && GpcCfCounts[GPC_CF_QOS]) {
  1499. status =
  1500. GpcEntries.GpcGetUlongFromCfInfoHandler(
  1501. (GPC_HANDLE) hGpcClient[GPC_CF_QOS],
  1502. SendTCB->tcb_opt.ioi_GPCHandle,
  1503. ServiceTypeOffset,
  1504. &ServiceType);
  1505. // It is likely that the pattern has gone by now
  1506. // and the handle that we are caching is INVALID.
  1507. // We need to pull up a new handle and get the
  1508. // TOS bit again.
  1509. if (STATUS_INVALID_HANDLE == status) {
  1510. IF_TCPDBG(TCP_DEBUG_GPC)
  1511. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPsend: Classification Handle is NULL, "
  1512. "getting one now.\n"));
  1513. SendTCB->tcb_opt.ioi_GPCHandle = 0;
  1514. status =
  1515. GpcEntries.GpcClassifyPatternHandler(
  1516. (GPC_HANDLE) hGpcClient[GPC_CF_QOS],
  1517. GPC_PROTOCOL_TEMPLATE_IP,
  1518. &Pattern,
  1519. NULL, // context
  1520. &SendTCB->tcb_opt.ioi_GPCHandle,
  1521. 0,
  1522. NULL,
  1523. FALSE);
  1524. //
  1525. // Only if QOS patterns exist, we get the TOS bits out.
  1526. //
  1527. if (NT_SUCCESS(status) && GpcCfCounts[GPC_CF_QOS]) {
  1528. status =
  1529. GpcEntries.GpcGetUlongFromCfInfoHandler(
  1530. (GPC_HANDLE) hGpcClient[GPC_CF_QOS],
  1531. SendTCB->tcb_opt.ioi_GPCHandle,
  1532. ServiceTypeOffset,
  1533. &ServiceType);
  1534. }
  1535. }
  1536. }
  1537. //
  1538. // Perhaps something needs to be done if GPC_CF_IPSEC has non-zero patterns.
  1539. //
  1540. //
  1541. // Set the TOS bit now.
  1542. //
  1543. IF_TCPDBG(TCP_DEBUG_GPC)
  1544. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPsend: ServiceType(%d)=%d\n", ServiceTypeOffset,
  1545. ServiceType));
  1546. if (status == STATUS_SUCCESS) {
  1547. //
  1548. // Now we directly get the TOS value from PSched.
  1549. //
  1550. SendTCB->tcb_opt.ioi_tos |= ServiceType;
  1551. }
  1552. IF_TCPDBG(TCP_DEBUG_GPC)
  1553. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPsend: TOS set to 0x%x\n", SendTCB->tcb_opt.ioi_tos));
  1554. }
  1555. #endif
  1556. }
  1557. BOOLEAN
  1558. ProcessSend(TCB *SendTCB, SendCmpltContext *SCC, uint *pSendLength, uint AmtUnsent,
  1559. TCPHeader *Header, int SendWin, PNDIS_BUFFER CurrentBuffer)
  1560. {
  1561. TCPSendReq *CurSend = SCC->scc_firstsend;
  1562. long Result;
  1563. uint AmountLeft = *pSendLength;
  1564. ulong PrevFlags;
  1565. Queue *Next;
  1566. SeqNum OldSeq;
  1567. if (*pSendLength != 0) {
  1568. do {
  1569. BOOLEAN DirectSend = FALSE;
  1570. ASSERT(CurSend->tsr_refcnt > 0);
  1571. Result = CTEInterlockedIncrementLong(&(CurSend->tsr_refcnt));
  1572. ASSERT(Result > 0);
  1573. SCC->scc_count++;
  1574. if (SendTCB->tcb_sendofs == 0 &&
  1575. (SendTCB->tcb_sendsize <= AmountLeft) &&
  1576. (SCC->scc_tbufcount == 0) &&
  1577. (CurSend->tsr_lastbuf == NULL)) {
  1578. ulong length = 0;
  1579. PNDIS_BUFFER tmp = SendTCB->tcb_sendbuf;
  1580. while (tmp) {
  1581. length += NdisBufferLength(tmp);
  1582. tmp = NDIS_BUFFER_LINKAGE(tmp);
  1583. }
  1584. // If the requested length is
  1585. // more than in this mdl chain
  1586. // we can use fast path
  1587. if (AmountLeft >= length) {
  1588. DirectSend = TRUE;
  1589. }
  1590. }
  1591. if (DirectSend) {
  1592. NDIS_BUFFER_LINKAGE(CurrentBuffer) = SendTCB->tcb_sendbuf;
  1593. do {
  1594. SCC->scc_ubufcount++;
  1595. CurrentBuffer =
  1596. NDIS_BUFFER_LINKAGE(CurrentBuffer);
  1597. } while (NDIS_BUFFER_LINKAGE(CurrentBuffer) != NULL);
  1598. CurSend->tsr_lastbuf = CurrentBuffer;
  1599. AmountLeft -= SendTCB->tcb_sendsize;
  1600. SendTCB->tcb_sendsize = 0;
  1601. } else {
  1602. uint AmountToDup;
  1603. PNDIS_BUFFER NewBuf, Buf;
  1604. uint Offset;
  1605. NDIS_STATUS NStatus;
  1606. uint Length;
  1607. // Either the current send has more data than
  1608. // or the offset is not zero.
  1609. // In either case we'll need to loop
  1610. // through the current send, allocating buffers.
  1611. Buf = SendTCB->tcb_sendbuf;
  1612. Offset = SendTCB->tcb_sendofs;
  1613. do {
  1614. ASSERT(Buf != NULL);
  1615. Length = NdisBufferLength(Buf);
  1616. ASSERT((Offset < Length) ||
  1617. (Offset == 0 && Length == 0));
  1618. // Adjust the length for the offset into
  1619. // this buffer.
  1620. Length -= Offset;
  1621. AmountToDup = MIN(AmountLeft, Length);
  1622. NdisCopyBuffer(&NStatus, &NewBuf, TCPSendBufferPool, Buf,
  1623. Offset, AmountToDup);
  1624. if (NStatus == NDIS_STATUS_SUCCESS) {
  1625. SCC->scc_tbufcount++;
  1626. NDIS_BUFFER_LINKAGE(CurrentBuffer) = NewBuf;
  1627. CurrentBuffer = NewBuf;
  1628. if (AmountToDup >= Length) {
  1629. // Exhausted this buffer.
  1630. Buf = NDIS_BUFFER_LINKAGE(Buf);
  1631. Offset = 0;
  1632. } else {
  1633. Offset += AmountToDup;
  1634. ASSERT(Offset < NdisBufferLength(Buf));
  1635. }
  1636. SendTCB->tcb_sendsize -= AmountToDup;
  1637. AmountLeft -= AmountToDup;
  1638. } else {
  1639. // Couldn't allocate a buffer. If
  1640. // the packet is already partly built,
  1641. // send what we've got, otherwise
  1642. // bail out.
  1643. if (SCC->scc_tbufcount == 0 &&
  1644. SCC->scc_ubufcount == 0) {
  1645. return FALSE;
  1646. }
  1647. *pSendLength -= AmountLeft;
  1648. AmountLeft = 0;
  1649. }
  1650. } while (AmountLeft && SendTCB->tcb_sendsize);
  1651. SendTCB->tcb_sendbuf = Buf;
  1652. SendTCB->tcb_sendofs = Offset;
  1653. }
  1654. if (CurSend->tsr_flags & TSR_FLAG_URG) {
  1655. ushort UP;
  1656. // This send is urgent data. We need to figure
  1657. // out what the urgent data pointer should be.
  1658. // We know sendnext is the starting sequence
  1659. // number of the frame, and that at the top of
  1660. // this do loop sendnext identified a byte in
  1661. // the CurSend at that time. We advanced CurSend
  1662. // at the same rate we've decremented
  1663. // AmountLeft (AmountToSend - AmountLeft ==
  1664. // AmountBuilt), so sendnext +
  1665. // (AmountToSend - AmountLeft) identifies a byte
  1666. // in the current value of CurSend, and that
  1667. // quantity plus tcb_sendsize is the sequence
  1668. // number one beyond the current send.
  1669. UP =
  1670. (ushort) (*pSendLength - AmountLeft) +
  1671. (ushort) SendTCB->tcb_sendsize -
  1672. ((SendTCB->tcb_flags & BSD_URGENT) ? 0 : 1);
  1673. Header->tcp_urgent = net_short(UP);
  1674. Header->tcp_flags |= TCP_FLAG_URG;
  1675. }
  1676. if (SendTCB->tcb_sendsize == 0) {
  1677. // We've exhausted this send. Set the PUSH bit.
  1678. Header->tcp_flags |= TCP_FLAG_PUSH;
  1679. PrevFlags = CurSend->tsr_flags;
  1680. Next = QNEXT(&CurSend->tsr_req.tr_q);
  1681. if (Next != QEND(&SendTCB->tcb_sendq)) {
  1682. CurSend = STRUCT_OF(TCPSendReq,
  1683. QSTRUCT(TCPReq, Next,
  1684. tr_q), tsr_req);
  1685. CTEStructAssert(CurSend, tsr);
  1686. SendTCB->tcb_sendsize =
  1687. CurSend->tsr_unasize;
  1688. SendTCB->tcb_sendofs = CurSend->tsr_offset;
  1689. SendTCB->tcb_sendbuf = CurSend->tsr_buffer;
  1690. SendTCB->tcb_cursend = CurSend;
  1691. // Check the urgent flags. We can't combine
  1692. // new urgent data on to the end of old
  1693. // non-urgent data.
  1694. if ((PrevFlags & TSR_FLAG_URG) && !
  1695. (CurSend->tsr_flags & TSR_FLAG_URG))
  1696. break;
  1697. } else {
  1698. ASSERT(AmountLeft == 0);
  1699. SendTCB->tcb_cursend = NULL;
  1700. SendTCB->tcb_sendbuf = NULL;
  1701. }
  1702. }
  1703. } while (AmountLeft != 0);
  1704. }
  1705. // Update the sequence numbers, and start a RTT
  1706. // measurement if needed.
  1707. // Adjust for what we're really going to send.
  1708. *pSendLength -= AmountLeft;
  1709. OldSeq = SendTCB->tcb_sendnext;
  1710. SendTCB->tcb_sendnext += *pSendLength;
  1711. if (SEQ_EQ(OldSeq, SendTCB->tcb_sendmax)) {
  1712. // We're sending entirely new data.
  1713. // We can't advance sendmax once FIN_SENT is set.
  1714. ASSERT(!(SendTCB->tcb_flags & FIN_SENT));
  1715. SendTCB->tcb_sendmax = SendTCB->tcb_sendnext;
  1716. // We've advanced sendmax, so we must be sending
  1717. // some new data, so bump the outsegs counter.
  1718. TCPSIncrementOutSegCount();
  1719. if (SendTCB->tcb_rtt == 0) {
  1720. // No RTT running, so start one.
  1721. SendTCB->tcb_rtt = TCPTime;
  1722. SendTCB->tcb_rttseq = OldSeq;
  1723. }
  1724. } else {
  1725. // We have at least some retransmission.
  1726. if ((SendTCB->tcb_sendmax - OldSeq) > 1) {
  1727. TStats.ts_retranssegs++;
  1728. }
  1729. if (SEQ_GT(SendTCB->tcb_sendnext,
  1730. SendTCB->tcb_sendmax)) {
  1731. // But we also have some new data, so check the rtt stuff.
  1732. TCPSIncrementOutSegCount();
  1733. ASSERT(!(SendTCB->tcb_flags & FIN_SENT));
  1734. SendTCB->tcb_sendmax = SendTCB->tcb_sendnext;
  1735. if (SendTCB->tcb_rtt == 0) {
  1736. // No RTT running, so start one.
  1737. SendTCB->tcb_rtt = TCPTime;
  1738. SendTCB->tcb_rttseq = OldSeq;
  1739. }
  1740. }
  1741. }
  1742. // We've built the frame entirely. If we've send
  1743. // everything we have and there is a FIN pending,
  1744. // OR it in.
  1745. if (AmtUnsent == *pSendLength) {
  1746. if (SendTCB->tcb_flags & FIN_NEEDED) {
  1747. ASSERT(!(SendTCB->tcb_flags & FIN_SENT) ||
  1748. (SendTCB->tcb_sendnext ==
  1749. (SendTCB->tcb_sendmax - 1)));
  1750. // See if we still have room in the window for a FIN.
  1751. if (SendWin > (int)*pSendLength) {
  1752. Header->tcp_flags |= TCP_FLAG_FIN;
  1753. SendTCB->tcb_sendnext++;
  1754. SendTCB->tcb_sendmax =
  1755. SendTCB->tcb_sendnext;
  1756. SendTCB->tcb_flags |=
  1757. (FIN_SENT | FIN_OUTSTANDING);
  1758. SendTCB->tcb_flags &= ~FIN_NEEDED;
  1759. }
  1760. }
  1761. }
  1762. return TRUE;
  1763. }
  1764. //* TCPSend - Send data from a TCP connection.
  1765. //
  1766. // This is the main 'send data' routine. We go into a loop, trying
  1767. // to send data until we can't for some reason. First we compute
  1768. // the useable window, use it to figure the amount we could send. If
  1769. // the amount we could send meets certain criteria we'll build a frame
  1770. // and send it, after setting any appropriate control bits. We assume
  1771. // the caller has put a reference on the TCB.
  1772. //
  1773. // Input: SendTCB - TCB to be sent from.
  1774. // TCBHandle - Lock handle for TCB.
  1775. //
  1776. // Returns: Nothing.
  1777. //
  1778. void
  1779. TCPSend(TCB * SendTCB, CTELockHandle TCBHandle)
  1780. {
  1781. int SendWin; // Useable send window.
  1782. uint AmountToSend; // Amount to send this time.
  1783. uint AmountLeft;
  1784. TCPHeader *Header; // TCP header for a send.
  1785. PNDIS_BUFFER FirstBuffer, CurrentBuffer;
  1786. TCPSendReq *CurSend;
  1787. SendCmpltContext *SCC;
  1788. SeqNum OldSeq;
  1789. IP_STATUS SendStatus;
  1790. uint AmtOutstanding, AmtUnsent;
  1791. int ForceWin; // Window we're force to use.
  1792. BOOLEAN FullSegment;
  1793. BOOLEAN MoreToSend = FALSE;
  1794. uint SegmentsSent = 0;
  1795. BOOLEAN LargeSendOffload = FALSE;
  1796. BOOLEAN LargeSendFailed = FALSE;
  1797. uint MSS;
  1798. uint LargeSend, SentBytes;
  1799. void *Irp;
  1800. CTEStructAssert(SendTCB, tcb);
  1801. ASSERT(SendTCB->tcb_refcnt != 0);
  1802. ASSERT(*(int *)&SendTCB->tcb_sendwin >= 0);
  1803. ASSERT(*(int *)&SendTCB->tcb_cwin >= SendTCB->tcb_mss);
  1804. ASSERT(!(SendTCB->tcb_flags & FIN_OUTSTANDING) ||
  1805. (SendTCB->tcb_sendnext == SendTCB->tcb_sendmax));
  1806. if (!(SendTCB->tcb_flags & IN_TCP_SEND) &&
  1807. !(SendTCB->tcb_fastchk & TCP_FLAG_IN_RCV)) {
  1808. SendTCB->tcb_flags |= IN_TCP_SEND;
  1809. // We'll continue this loop until we send a FIN, or we break out
  1810. // internally for some other reason.
  1811. while (!(SendTCB->tcb_flags & FIN_OUTSTANDING)) {
  1812. CheckTCBSends(SendTCB);
  1813. SegmentsSent++;
  1814. if (SegmentsSent > MaxSendSegments) {
  1815. // We are throttled by max segments that can be sent in
  1816. // this loop. Comeback later
  1817. MoreToSend = TRUE;
  1818. break;
  1819. }
  1820. AmtOutstanding = (uint) (SendTCB->tcb_sendnext -
  1821. SendTCB->tcb_senduna);
  1822. AmtUnsent = SendTCB->tcb_unacked - AmtOutstanding;
  1823. ASSERT(*(int *)&AmtUnsent >= 0);
  1824. SendWin = (int)(MIN(SendTCB->tcb_sendwin, SendTCB->tcb_cwin) -
  1825. AmtOutstanding);
  1826. // if this send is after the fast recovery
  1827. // and sendwin is zero because of amt outstanding
  1828. // then, at least force 1 segment to prevent delayed
  1829. // ack timeouts from the remote
  1830. if (SendTCB->tcb_force) {
  1831. SendTCB->tcb_force = 0;
  1832. if (SendWin < SendTCB->tcb_mss) {
  1833. SendWin = SendTCB->tcb_mss;
  1834. }
  1835. }
  1836. // Since the window could have shrank, need to get it to zero at
  1837. // least.
  1838. ForceWin = (int)((SendTCB->tcb_flags & FORCE_OUTPUT) >>
  1839. FORCE_OUT_SHIFT);
  1840. SendWin = MAX(SendWin, ForceWin);
  1841. LargeSend = MIN((uint) SendWin, AmtUnsent);
  1842. LargeSend = MIN(LargeSend, SendTCB->tcb_mss * MaxSendSegments);
  1843. AmountToSend =
  1844. MIN(MIN((uint) SendWin, AmtUnsent), SendTCB->tcb_mss);
  1845. ASSERT(SendTCB->tcb_mss > 0);
  1846. // Time stamp option addition might force us to cut the data
  1847. // to be sent by 12 bytes.
  1848. FullSegment = FALSE;
  1849. if ((SendTCB->tcb_tcpopts & TCP_FLAG_TS) &&
  1850. (AmountToSend + ALIGNED_TS_OPT_SIZE >= SendTCB->tcb_mss)) {
  1851. AmountToSend = SendTCB->tcb_mss - ALIGNED_TS_OPT_SIZE;
  1852. FullSegment = TRUE;
  1853. } else {
  1854. if (AmountToSend == SendTCB->tcb_mss)
  1855. FullSegment = TRUE;
  1856. }
  1857. // We will send a segment if
  1858. //
  1859. // 1. The segment size == mss
  1860. // 2. This is the only segment to be sent
  1861. // 3. FIN is set and this is the last segment
  1862. // 4. FORCE_OUTPUT is set
  1863. // 5. Amount to be sent is >= MSS/2
  1864. if (FullSegment ||
  1865. (AmountToSend != 0 && AmountToSend == AmtUnsent) ||
  1866. (SendWin != 0 &&
  1867. (((SendTCB->tcb_flags & FIN_NEEDED) &&
  1868. (AmtUnsent <= SendTCB->tcb_mss)) ||
  1869. (SendTCB->tcb_flags & FORCE_OUTPUT) ||
  1870. AmountToSend >= (SendTCB->tcb_maxwin / 2)))) {
  1871. // It's OK to send something. Try to get a header buffer now.
  1872. FirstBuffer = GetTCPHeaderAtDpcLevel(&Header);
  1873. if (FirstBuffer != NULL) {
  1874. // Got a header buffer. Loop through the sends on the TCB,
  1875. // building a frame.
  1876. CurrentBuffer = FirstBuffer;
  1877. CurSend = SendTCB->tcb_cursend;
  1878. Header =
  1879. (TCPHeader *)((PUCHAR)Header + LocalNetInfo.ipi_hsize);
  1880. // allow room for filling time stamp options (12 bytes)
  1881. if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
  1882. NdisAdjustBufferLength(FirstBuffer,
  1883. sizeof(TCPHeader) + ALIGNED_TS_OPT_SIZE);
  1884. SCC = (SendCmpltContext *) (Header + 1);
  1885. (uchar *) SCC += ALIGNED_TS_OPT_SIZE;
  1886. } else {
  1887. SCC = (SendCmpltContext *) (Header + 1);
  1888. }
  1889. SCC = ALIGN_UP_POINTER(SCC, PVOID);
  1890. #if DBG
  1891. SCC->scc_sig = scc_signature;
  1892. #endif
  1893. FillTCPHeader(SendTCB, Header);
  1894. SCC->scc_ubufcount = 0;
  1895. SCC->scc_tbufcount = 0;
  1896. SCC->scc_count = 0;
  1897. SCC->scc_LargeSend = 0;
  1898. // Check if RCE has large send capability and, if so,
  1899. // attempt to offload segmentation to the hardware.
  1900. // * only offload if there is more than 1 segment's worth
  1901. // of data.
  1902. // * only offload if the number of segments is greater than
  1903. // the minimum number of segments the adapter is willing
  1904. // to offload.
  1905. if (SendTCB->tcb_rce &&
  1906. (SendTCB->tcb_rce->rce_OffloadFlags &
  1907. TCP_LARGE_SEND_OFFLOAD) &&
  1908. !LargeSendFailed &&
  1909. (SendTCB->tcb_mss < LargeSend) &&
  1910. (SendTCB->tcb_rce->rce_TcpLargeSend.MinSegmentCount <=
  1911. (LargeSend + SendTCB->tcb_mss - 1) / SendTCB->tcb_mss) &&
  1912. (CurSend && (CurSend->tsr_lastbuf == NULL)) && !(CurSend->tsr_flags & TSR_FLAG_URG)) {
  1913. LargeSendOffload = TRUE;
  1914. LargeSend =
  1915. MIN(SendTCB->tcb_rce->rce_TcpLargeSend.MaxOffLoadSize,
  1916. LargeSend);
  1917. // Bypass offload if we need support for TCP options
  1918. // and the adapter doesn't support them, or if we need
  1919. // support for IP options and the adapter doesn't
  1920. // support them.
  1921. if ((SendTCB->tcb_tcpopts & TCP_FLAG_TS) &&
  1922. !(SendTCB->tcb_rce->rce_OffloadFlags &
  1923. TCP_LARGE_SEND_TCPOPT_OFFLOAD)) {
  1924. LargeSendOffload = FALSE;
  1925. } else if (SendTCB->tcb_opt.ioi_options &&
  1926. !(SendTCB->tcb_rce->rce_OffloadFlags &
  1927. TCP_LARGE_SEND_IPOPT_OFFLOAD)) {
  1928. LargeSendOffload = FALSE;
  1929. }
  1930. //
  1931. // LargeSend can not be zero.
  1932. //
  1933. if (LargeSend == 0) {
  1934. LargeSendOffload = FALSE;
  1935. }
  1936. } else {
  1937. LargeSendOffload = FALSE;
  1938. }
  1939. if (LargeSendOffload && !DisableLargeSendOffload) {
  1940. IF_TCPDBG(TCP_DEBUG_OFFLOAD) {
  1941. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSend: tcb %x offload %d bytes at "
  1942. "seq %u ack %u win %u\n",
  1943. SendTCB, LargeSend, SendTCB->tcb_sendnext,
  1944. SendTCB->tcb_rcvnext, SendWin));
  1945. }
  1946. OldSeq = SendTCB->tcb_sendnext;
  1947. CTEStructAssert(CurSend, tsr);
  1948. SCC->scc_firstsend = CurSend;
  1949. if (!ProcessSend(SendTCB, SCC, &LargeSend, AmtUnsent, Header,
  1950. SendWin, CurrentBuffer)) {
  1951. goto error_oor1;
  1952. }
  1953. {
  1954. uint PHXsum = SendTCB->tcb_phxsum;
  1955. PHXsum = (((PHXsum << 16) | (PHXsum >> 16)) +
  1956. PHXsum) >> 16;
  1957. Header->tcp_xsum = (ushort) PHXsum;
  1958. }
  1959. SCC->scc_SendSize = LargeSend;
  1960. SCC->scc_ByteSent = 0;
  1961. SCC->scc_LargeSend = SendTCB;
  1962. REFERENCE_TCB(SendTCB);
  1963. #if DBG
  1964. SendTCB->tcb_LargeSend++;
  1965. #endif
  1966. SendTCB->tcb_rcvdsegs = 0;
  1967. if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
  1968. LargeSend +=
  1969. sizeof(TCPHeader) + ALIGNED_TS_OPT_SIZE;
  1970. MSS = SendTCB->tcb_mss - ALIGNED_TS_OPT_SIZE;
  1971. } else {
  1972. LargeSend += sizeof(TCPHeader);
  1973. MSS = SendTCB->tcb_mss;
  1974. }
  1975. IF_TCPDBG(TCP_DEBUG_OFFLOAD) {
  1976. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TCPSend: tcb %x large-send %d seq %u\n",
  1977. SendTCB, LargeSend, OldSeq));
  1978. }
  1979. ClassifyPacket(SendTCB);
  1980. CTEFreeLock(&SendTCB->tcb_lock, TCBHandle);
  1981. SendStatus =
  1982. (*LocalNetInfo.ipi_largexmit)(TCPProtInfo, SCC,
  1983. FirstBuffer,
  1984. LargeSend,
  1985. SendTCB->tcb_daddr,
  1986. SendTCB->tcb_saddr,
  1987. &SendTCB->tcb_opt,
  1988. SendTCB->tcb_rce,
  1989. PROTOCOL_TCP,
  1990. &SentBytes,
  1991. MSS);
  1992. SendTCB->tcb_error = SendStatus;
  1993. if (SendStatus != IP_PENDING) {
  1994. // Let TCPSendComplete hanlde partial sends
  1995. SCC->scc_ByteSent = SentBytes;
  1996. TCPSendComplete(SCC, FirstBuffer, IP_SUCCESS);
  1997. }
  1998. CTEGetLock(&SendTCB->tcb_lock, &TCBHandle);
  1999. if (SendStatus == IP_GENERAL_FAILURE) {
  2000. if (SEQ_GTE(OldSeq, SendTCB->tcb_senduna) &&
  2001. SEQ_LT(OldSeq, SendTCB->tcb_sendnext)) {
  2002. ResetSendNext(SendTCB, OldSeq);
  2003. }
  2004. LargeSendFailed = TRUE;
  2005. continue;
  2006. }
  2007. if (SendStatus == IP_PACKET_TOO_BIG) {
  2008. SeqNum NewSeq = OldSeq + SentBytes;
  2009. //Not everything got sent.
  2010. //Adjust for what is sent
  2011. if (SEQ_GTE(NewSeq, SendTCB->tcb_senduna) &&
  2012. SEQ_LT(NewSeq, SendTCB->tcb_sendnext)) {
  2013. ResetSendNext(SendTCB, NewSeq);
  2014. }
  2015. }
  2016. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2017. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
  2018. }
  2019. SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT |
  2020. FORCE_OUTPUT | SEND_AFTER_RCV);
  2021. DerefTCB(SendTCB, TCBHandle);
  2022. return;
  2023. }
  2024. // Normal path
  2025. AmountLeft = AmountToSend;
  2026. if (AmountToSend != 0) {
  2027. CTEStructAssert(CurSend, tsr);
  2028. SCC->scc_firstsend = CurSend;
  2029. } else {
  2030. // We're in the loop, but AmountToSend is 0. This
  2031. // should happen only when we're sending a FIN. Check
  2032. // this, and return if it's not true.
  2033. ASSERT(AmtUnsent == 0);
  2034. if (!(SendTCB->tcb_flags & FIN_NEEDED)) {
  2035. FreeTCPHeader(FirstBuffer);
  2036. break;
  2037. }
  2038. SCC->scc_firstsend = NULL;
  2039. NDIS_BUFFER_LINKAGE(FirstBuffer) = NULL;
  2040. }
  2041. OldSeq = SendTCB->tcb_sendnext;
  2042. if (!ProcessSend(SendTCB, SCC, &AmountToSend, AmtUnsent, Header,
  2043. SendWin, CurrentBuffer)) {
  2044. goto error_oor1;
  2045. }
  2046. AmountToSend += sizeof(TCPHeader);
  2047. SendTCB->tcb_flags &= ~(NEED_ACK | ACK_DELAYED |
  2048. FORCE_OUTPUT);
  2049. STOP_TCB_TIMER_R(SendTCB, DELACK_TIMER);
  2050. STOP_TCB_TIMER_R(SendTCB, SWS_TIMER);
  2051. SendTCB->tcb_rcvdsegs = 0;
  2052. if ( (SendTCB->tcb_flags & KEEPALIVE) && ( SendTCB->tcb_conn != NULL) )
  2053. START_TCB_TIMER_R(SendTCB, KA_TIMER, SendTCB->tcb_conn->tc_tcbkatime);
  2054. SendTCB->tcb_kacount = 0;
  2055. // We're all set. Xsum it and send it.
  2056. #if DROP_PKT
  2057. if (SimPacketDrop && DropPackets) {
  2058. PkttoDrop += 1;
  2059. if (PkttoDrop > SimPacketDrop) {
  2060. PkttoDrop = 0;
  2061. SendStatus = IP_SUCCESS;
  2062. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Packet Dropped %d\n", OldSeq));
  2063. CTEFreeLock(&SendTCB->tcb_lock, TCBHandle);
  2064. goto fake_sent;
  2065. }
  2066. }
  2067. #endif
  2068. ClassifyPacket(SendTCB);
  2069. // Account for time stamp options
  2070. if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
  2071. if (SendTCB->tcb_rce &&
  2072. (SendTCB->tcb_rce->rce_OffloadFlags &
  2073. TCP_XMT_CHECKSUM_OFFLOAD) &&
  2074. (SendTCB->tcb_rce->rce_OffloadFlags &
  2075. TCP_CHECKSUM_OPT_OFFLOAD)) {
  2076. uint PHXsum =
  2077. SendTCB->tcb_phxsum +
  2078. (uint)net_short(AmountToSend + ALIGNED_TS_OPT_SIZE);
  2079. PHXsum = (((PHXsum << 16) | (PHXsum >> 16)) +
  2080. PHXsum) >> 16;
  2081. Header->tcp_xsum = (ushort) PHXsum;
  2082. SendTCB->tcb_opt.ioi_TcpChksum = 1;
  2083. #if DBG
  2084. DbgTcpSendHwChksumCount++;
  2085. #endif
  2086. } else {
  2087. Header->tcp_xsum =
  2088. ~XsumSendChain(
  2089. SendTCB->tcb_phxsum +
  2090. (uint)net_short(AmountToSend + ALIGNED_TS_OPT_SIZE),
  2091. FirstBuffer);
  2092. SendTCB->tcb_opt.ioi_TcpChksum = 0;
  2093. }
  2094. CTEFreeLock(&SendTCB->tcb_lock, TCBHandle);
  2095. Irp = NULL;
  2096. if (SCC->scc_firstsend) {
  2097. Irp = SCC->scc_firstsend->tsr_req.tr_context;
  2098. }
  2099. SendStatus =
  2100. (*LocalNetInfo.ipi_xmit)(TCPProtInfo, SCC,
  2101. FirstBuffer,
  2102. AmountToSend +
  2103. ALIGNED_TS_OPT_SIZE,
  2104. SendTCB->tcb_daddr,
  2105. SendTCB->tcb_saddr,
  2106. &SendTCB->tcb_opt,
  2107. SendTCB->tcb_rce,
  2108. PROTOCOL_TCP,
  2109. Irp );
  2110. } else {
  2111. if (SendTCB->tcb_rce &&
  2112. (SendTCB->tcb_rce->rce_OffloadFlags &
  2113. TCP_XMT_CHECKSUM_OFFLOAD)) {
  2114. uint PHXsum = SendTCB->tcb_phxsum +
  2115. (uint)net_short(AmountToSend);
  2116. PHXsum = (((PHXsum << 16) | (PHXsum >> 16)) +
  2117. PHXsum) >> 16;
  2118. Header->tcp_xsum = (ushort) PHXsum;
  2119. SendTCB->tcb_opt.ioi_TcpChksum = 1;
  2120. #if DBG
  2121. DbgTcpSendHwChksumCount++;
  2122. #endif
  2123. } else {
  2124. Header->tcp_xsum =
  2125. ~XsumSendChain(SendTCB->tcb_phxsum +
  2126. (uint)net_short(AmountToSend),
  2127. FirstBuffer);
  2128. SendTCB->tcb_opt.ioi_TcpChksum = 0;
  2129. }
  2130. CTEFreeLock(&SendTCB->tcb_lock, TCBHandle);
  2131. Irp = NULL;
  2132. if(SCC->scc_firstsend) {
  2133. Irp = SCC->scc_firstsend->tsr_req.tr_context;
  2134. }
  2135. SendStatus =
  2136. (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  2137. SCC,
  2138. FirstBuffer,
  2139. AmountToSend,
  2140. SendTCB->tcb_daddr,
  2141. SendTCB->tcb_saddr,
  2142. &SendTCB->tcb_opt,
  2143. SendTCB->tcb_rce,
  2144. PROTOCOL_TCP,
  2145. Irp );
  2146. }
  2147. #if DROP_PKT //NKS
  2148. fake_sent:;
  2149. #endif
  2150. SendTCB->tcb_error = SendStatus;
  2151. if (SendStatus != IP_PENDING) {
  2152. TCPSendComplete(SCC, FirstBuffer, IP_SUCCESS);
  2153. if (SendStatus != IP_SUCCESS) {
  2154. CTEGetLock(&SendTCB->tcb_lock, &TCBHandle);
  2155. // This packet didn't get sent. If nothing's
  2156. // changed in the TCB, put sendnext back to
  2157. // what we just tried to send. Depending on
  2158. // the error, we may try again.
  2159. if (SEQ_GTE(OldSeq, SendTCB->tcb_senduna) &&
  2160. SEQ_LT(OldSeq, SendTCB->tcb_sendnext))
  2161. ResetSendNext(SendTCB, OldSeq);
  2162. // We know this packet didn't get sent. Start
  2163. // the retransmit timer now, if it's not already
  2164. // runnimg, in case someone came in while we
  2165. // were in IP and stopped it.
  2166. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2167. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
  2168. }
  2169. // If it failed because of an MTU problem, get
  2170. // the new MTU and try again.
  2171. if (SendStatus == IP_PACKET_TOO_BIG) {
  2172. uint NewMTU;
  2173. // The MTU has changed. Update it, and try
  2174. // again.
  2175. // if ipsec is adjusting the mtu, rce_newmtu
  2176. // will contain the newmtu.
  2177. if (SendTCB->tcb_rce) {
  2178. if (!SendTCB->tcb_rce->rce_newmtu) {
  2179. SendStatus =
  2180. (*LocalNetInfo.ipi_getpinfo)(
  2181. SendTCB->tcb_daddr,
  2182. SendTCB->tcb_saddr,
  2183. &NewMTU,
  2184. NULL,
  2185. SendTCB->tcb_rce);
  2186. } else {
  2187. NewMTU = SendTCB->tcb_rce->rce_newmtu;
  2188. SendStatus = IP_SUCCESS;
  2189. }
  2190. } else {
  2191. SendStatus =
  2192. (*LocalNetInfo.ipi_getpinfo)(
  2193. SendTCB->tcb_daddr,
  2194. SendTCB->tcb_saddr,
  2195. &NewMTU,
  2196. NULL,
  2197. SendTCB->tcb_rce);
  2198. }
  2199. if (SendStatus != IP_SUCCESS)
  2200. break;
  2201. // We have a new MTU. Make sure it's big enough
  2202. // to use. If not, correct this and turn off
  2203. // MTU discovery on this TCB. Otherwise use the
  2204. // new MTU.
  2205. if (NewMTU <=
  2206. (sizeof(TCPHeader) +
  2207. SendTCB->tcb_opt.ioi_optlength)) {
  2208. // The new MTU is too small to use. Turn off
  2209. // PMTU discovery on this TCB, and drop to
  2210. // our off net MTU size.
  2211. SendTCB->tcb_opt.ioi_flags &= ~IP_FLAG_DF;
  2212. SendTCB->tcb_mss =
  2213. MIN((ushort)MAX_REMOTE_MSS,
  2214. SendTCB->tcb_remmss);
  2215. } else {
  2216. // The new MTU is adequate. Adjust it for
  2217. // the header size and options length, and
  2218. // use it.
  2219. NewMTU -= sizeof(TCPHeader) -
  2220. SendTCB->tcb_opt.ioi_optlength;
  2221. SendTCB->tcb_mss =
  2222. MIN((ushort) NewMTU,
  2223. SendTCB->tcb_remmss);
  2224. }
  2225. ASSERT(SendTCB->tcb_mss > 0);
  2226. ValidateMSS(SendTCB);
  2227. continue;
  2228. }
  2229. break;
  2230. }
  2231. }
  2232. //Start it now, since we know that mac driver accepted it.
  2233. CTEGetLock(&SendTCB->tcb_lock, &TCBHandle);
  2234. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2235. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
  2236. }
  2237. continue;
  2238. } else // FirstBuffer != NULL.
  2239. goto error_oor;
  2240. } else {
  2241. // We've decided we can't send anything now. Figure out why, and
  2242. // see if we need to set a timer.
  2243. if (SendTCB->tcb_sendwin == 0) {
  2244. if (!(SendTCB->tcb_flags & FLOW_CNTLD)) {
  2245. ushort tmp;
  2246. SendTCB->tcb_flags |= FLOW_CNTLD;
  2247. SendTCB->tcb_rexmitcnt = 0;
  2248. tmp = MIN(MAX(REXMIT_TO(SendTCB),
  2249. MIN_RETRAN_TICKS), MAX_REXMIT_TO);
  2250. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, tmp);
  2251. SendTCB->tcb_slowcount++;
  2252. SendTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  2253. } else if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER))
  2254. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
  2255. } else if (AmountToSend != 0)
  2256. // We have something to send, but we're not sending
  2257. // it, presumably due to SWS avoidance.
  2258. if (!TCB_TIMER_RUNNING_R(SendTCB, SWS_TIMER))
  2259. START_TCB_TIMER_R(SendTCB, SWS_TIMER, SWS_TO);
  2260. break;
  2261. }
  2262. } // while (!FIN_OUTSTANDING)
  2263. // We're done sending, so we don't need the output flags set.
  2264. SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT | FORCE_OUTPUT |
  2265. SEND_AFTER_RCV);
  2266. if (MoreToSend) {
  2267. //just indicate that we need to send more
  2268. DelayAction(SendTCB, NEED_OUTPUT);
  2269. }
  2270. // This is for TS algo
  2271. SendTCB->tcb_lastack = SendTCB->tcb_rcvnext;
  2272. } else
  2273. SendTCB->tcb_flags |= SEND_AFTER_RCV;
  2274. DerefTCB(SendTCB, TCBHandle);
  2275. return;
  2276. // Common case error handling code for out of resource conditions. Start the
  2277. // retransmit timer if it's not already running (so that we try this again
  2278. // later), clean up and return.
  2279. error_oor:
  2280. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2281. ushort tmp;
  2282. tmp = MIN(MAX(REXMIT_TO(SendTCB),
  2283. MIN_RETRAN_TICKS), MAX_REXMIT_TO);
  2284. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, tmp);
  2285. }
  2286. // We had an out of resource problem, so clear the OUTPUT flags.
  2287. SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT | FORCE_OUTPUT);
  2288. DerefTCB(SendTCB, TCBHandle);
  2289. return;
  2290. error_oor1:
  2291. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2292. ushort tmp;
  2293. tmp = MIN(MAX(REXMIT_TO(SendTCB),
  2294. MIN_RETRAN_TICKS), MAX_REXMIT_TO);
  2295. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, tmp);
  2296. }
  2297. // We had an out of resource problem, so clear the OUTPUT flags.
  2298. SendTCB->tcb_flags &= ~(IN_TCP_SEND | NEED_OUTPUT | FORCE_OUTPUT);
  2299. DerefTCB(SendTCB, TCBHandle);
  2300. TCPSendComplete(SCC, FirstBuffer, IP_SUCCESS);
  2301. return;
  2302. }
  2303. //* ResetSendNextAndFastSend - Set the sendnext value of a TCB.
  2304. //
  2305. // Called to handle fast retransmit of the segment which the reveiver
  2306. // is asking for.
  2307. // We assume the caller has put a reference on the TCB, and the TCB is locked
  2308. // on entry. The reference is dropped and the lock released before returning.
  2309. //
  2310. // Input: SeqTCB - Pointer to TCB to be updated.
  2311. // NewSeq - Sequence number to set.
  2312. // NewCWin - new value for congestion window.
  2313. //
  2314. // Returns: Nothing.
  2315. //
  2316. void
  2317. ResetAndFastSend(TCB * SeqTCB, SeqNum NewSeq, uint NewCWin)
  2318. {
  2319. TCPSendReq *SendReq;
  2320. uint AmtForward;
  2321. Queue *CurQ;
  2322. PNDIS_BUFFER Buffer;
  2323. uint Offset;
  2324. uint SendSize;
  2325. CTELockHandle TCBHandle;
  2326. int ToBeSent;
  2327. CTEStructAssert(SeqTCB, tcb);
  2328. ASSERT(SEQ_GTE(NewSeq, SeqTCB->tcb_senduna));
  2329. // The new seq must be less than send max, or NewSeq, senduna, sendnext,
  2330. // and sendmax must all be equal. (The latter case happens when we're
  2331. // called exiting TIME_WAIT, or possibly when we're retransmitting
  2332. // during a flow controlled situation).
  2333. ASSERT(SEQ_LT(NewSeq, SeqTCB->tcb_sendmax) ||
  2334. (SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendnext) &&
  2335. SEQ_EQ(SeqTCB->tcb_senduna, SeqTCB->tcb_sendmax) &&
  2336. SEQ_EQ(SeqTCB->tcb_senduna, NewSeq)));
  2337. if (SYNC_STATE(SeqTCB->tcb_state) && SeqTCB->tcb_state != TCB_TIME_WAIT) {
  2338. // In these states we need to update the send queue.
  2339. if (!EMPTYQ(&SeqTCB->tcb_sendq)) {
  2340. CurQ = QHEAD(&SeqTCB->tcb_sendq);
  2341. SendReq = (TCPSendReq *) STRUCT_OF(TCPReq, CurQ, tr_q);
  2342. // SendReq points to the first send request on the send queue.
  2343. // We're pointing at the proper send req now. We need to go down
  2344. // SendReq points to the cursend
  2345. // SendSize point to sendsize in the cursend
  2346. SendSize = SendReq->tsr_unasize;
  2347. Buffer = SendReq->tsr_buffer;
  2348. Offset = SendReq->tsr_offset;
  2349. // Call the fast retransmit send now
  2350. if ((SeqTCB->tcb_tcpopts & TCP_FLAG_SACK)) {
  2351. SackListEntry *Prev, *Current;
  2352. SeqNum CurBegin, CurEnd;
  2353. Prev = STRUCT_OF(SackListEntry, &SeqTCB->tcb_SackRcvd, next);
  2354. Current = Prev->next;
  2355. // There is a hole from Newseq to Currentbeg
  2356. // try to retransmit whole hole size!!
  2357. if (Current && SEQ_LT(NewSeq, Current->begin)) {
  2358. ToBeSent = Current->begin - NewSeq;
  2359. CurBegin = Current->begin;
  2360. CurEnd = Current->end;
  2361. } else {
  2362. ToBeSent = SeqTCB->tcb_mss;
  2363. }
  2364. IF_TCPDBG(TCP_DEBUG_SACK) {
  2365. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  2366. "In Sack Reset and send rexmiting %d %d\n",
  2367. NewSeq, SendSize));
  2368. }
  2369. TCPFastSend(SeqTCB, Buffer, Offset, SendReq, SendSize, NewSeq,
  2370. ToBeSent);
  2371. // If we have not been already acked for the missing segments
  2372. // and if we know where to start retransmitting do so now.
  2373. // Also, re-validate SackListentry
  2374. Prev = STRUCT_OF(SackListEntry, &SeqTCB->tcb_SackRcvd, next);
  2375. Current = Prev->next;
  2376. if (Current && Current->begin != CurBegin) {
  2377. // The SACK list changed while we were in a transmission.
  2378. // Just bail out, and wait for the next ACK to continue
  2379. // if necessary.
  2380. Current = NULL;
  2381. }
  2382. while (Current && Current->next &&
  2383. (SEQ_GTE(NewSeq, SeqTCB->tcb_senduna)) &&
  2384. (SEQ_LT(SeqTCB->tcb_senduna, Current->next->end))) {
  2385. SeqNum NextSeq;
  2386. ASSERT(SEQ_LTE(Current->begin, Current->end));
  2387. // There can be multiple dropped packets till
  2388. // Current->begin.
  2389. IF_TCPDBG(TCP_DEBUG_SACK) {
  2390. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  2391. "Scanning after Current %d %d\n",
  2392. Current->begin, Current->end));
  2393. }
  2394. NextSeq = Current->end;
  2395. CurBegin = Current->begin;
  2396. ASSERT(SEQ_LT(NextSeq, SeqTCB->tcb_sendmax));
  2397. // If we have not yet sent the segment keep quiet now.
  2398. if (SEQ_GTE(NextSeq, SeqTCB->tcb_sendnext) ||
  2399. (SEQ_LTE(NextSeq, SeqTCB->tcb_senduna))) {
  2400. break;
  2401. }
  2402. // Position cursend by following number of bytes
  2403. AmtForward = NextSeq - NewSeq;
  2404. if (!EMPTYQ(&SeqTCB->tcb_sendq)) {
  2405. CurQ = QHEAD(&SeqTCB->tcb_sendq);
  2406. SendReq = (TCPSendReq *) STRUCT_OF(TCPReq, CurQ, tr_q);
  2407. while (AmtForward) {
  2408. if (AmtForward >= SendReq->tsr_unasize) {
  2409. AmtForward -= SendReq->tsr_unasize;
  2410. CurQ = QNEXT(CurQ);
  2411. SendReq =
  2412. (TCPSendReq *)STRUCT_OF(TCPReq, CurQ, tr_q);
  2413. ASSERT(CurQ != QEND(&SeqTCB->tcb_sendq));
  2414. } else {
  2415. break;
  2416. }
  2417. }
  2418. SendSize = SendReq->tsr_unasize - AmtForward;
  2419. Buffer = SendReq->tsr_buffer;
  2420. Offset = SendReq->tsr_offset;
  2421. while (AmtForward) {
  2422. uint Length;
  2423. ASSERT((Offset < NdisBufferLength(Buffer)) ||
  2424. ((Offset == 0) &&
  2425. (NdisBufferLength(Buffer) == 0)));
  2426. Length = NdisBufferLength(Buffer) - Offset;
  2427. if (AmtForward >= Length) {
  2428. // We're moving past this one. Skip over him,
  2429. // and 0 the Offset we're keeping.
  2430. AmtForward -= Length;
  2431. Offset = 0;
  2432. Buffer = NDIS_BUFFER_LINKAGE(Buffer);
  2433. ASSERT(Buffer != NULL);
  2434. } else {
  2435. break;
  2436. }
  2437. }
  2438. Offset = Offset + AmtForward;
  2439. // Okay. Now retransmit this seq too.
  2440. if (Current->next) {
  2441. ToBeSent = Current->next->begin - Current->end;
  2442. } else {
  2443. ToBeSent = SeqTCB->tcb_mss;
  2444. }
  2445. IF_TCPDBG(TCP_DEBUG_SACK) {
  2446. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  2447. "SACK inner loop rexmiting %d %d %d\n",
  2448. Current->end, SendSize, ToBeSent));
  2449. }
  2450. TCPFastSend(SeqTCB, Buffer, Offset, SendReq, SendSize,
  2451. NextSeq, ToBeSent);
  2452. } else {
  2453. break;
  2454. }
  2455. // Also, re-validate Current Sack list in SackListentry
  2456. Prev =
  2457. STRUCT_OF(SackListEntry, &SeqTCB->tcb_SackRcvd, next);
  2458. Current = Prev->next;
  2459. while (Current && Current->begin != CurBegin) {
  2460. // The SACK list changed while in TCPFastSend.
  2461. // Just bail out.
  2462. Current = Current->next;
  2463. }
  2464. if (Current) {
  2465. Current = Current->next;
  2466. } else {
  2467. break;
  2468. }
  2469. }
  2470. } else {
  2471. ToBeSent = SeqTCB->tcb_mss;
  2472. TCPFastSend(SeqTCB, Buffer, Offset, SendReq, SendSize, NewSeq,
  2473. ToBeSent);
  2474. }
  2475. } else {
  2476. ASSERT(SeqTCB->tcb_cursend == NULL);
  2477. }
  2478. }
  2479. SeqTCB->tcb_cwin = NewCWin;
  2480. TCBHandle = DISPATCH_LEVEL;
  2481. DerefTCB(SeqTCB, TCBHandle);
  2482. return;
  2483. }
  2484. //* TCPFastSend - To send a segment without changing TCB state
  2485. //
  2486. // Called to handle fast retransmit of the segment
  2487. // tcb_lock will be held while entering (called by TCPRcv)
  2488. //
  2489. // Input: SendTCB - Pointer to TCB
  2490. // in_sendBuf - Pointer to ndis_buffer
  2491. // in_sendofs - Send Offset
  2492. // in_sendreq - current send request
  2493. // in_sendsize - size of this send
  2494. //
  2495. // Returns: Nothing.
  2496. //
  2497. void
  2498. TCPFastSend(TCB * SendTCB, PNDIS_BUFFER in_SendBuf, uint in_SendOfs,
  2499. TCPSendReq * in_SendReq, uint in_SendSize, SeqNum NextSeq,
  2500. int in_ToBeSent)
  2501. {
  2502. int SendWin; // Useable send window.
  2503. uint AmountToSend; // Amount to send this time.
  2504. uint AmountLeft;
  2505. TCPHeader *Header; // TCP header for a send.
  2506. PNDIS_BUFFER FirstBuffer, CurrentBuffer;
  2507. TCPSendReq *CurSend;
  2508. SendCmpltContext *SCC;
  2509. SeqNum OldSeq;
  2510. SeqNum SendNext;
  2511. IP_STATUS SendStatus;
  2512. uint AmtOutstanding, AmtUnsent;
  2513. int ForceWin; // Window we're force to use.
  2514. CTELockHandle TCBHandle;
  2515. void *Irp;
  2516. uint TSLen=0;
  2517. uint SendOfs = in_SendOfs;
  2518. uint SendSize = in_SendSize;
  2519. PNDIS_BUFFER SendBuf = in_SendBuf;
  2520. SendNext = NextSeq;
  2521. CurSend = in_SendReq;
  2522. TCBHandle = DISPATCH_LEVEL;
  2523. CTEStructAssert(SendTCB, tcb);
  2524. ASSERT(SendTCB->tcb_refcnt != 0);
  2525. ASSERT(*(int *)&SendTCB->tcb_sendwin >= 0);
  2526. ASSERT(*(int *)&SendTCB->tcb_cwin >= SendTCB->tcb_mss);
  2527. ASSERT(!(SendTCB->tcb_flags & FIN_OUTSTANDING) ||
  2528. (SendTCB->tcb_sendnext == SendTCB->tcb_sendmax));
  2529. AmtOutstanding = (uint) (SendTCB->tcb_sendnext -
  2530. SendTCB->tcb_senduna);
  2531. AmtUnsent = MIN(MIN(in_ToBeSent, (int)SendSize), (int)SendTCB->tcb_sendwin);
  2532. while (AmtUnsent > 0) {
  2533. if (SEQ_GT(SendTCB->tcb_senduna, SendNext)) {
  2534. // Since tcb_lock is releasd in this loop
  2535. // it is possible that delayed ack acked
  2536. // what we are trying to retransmit.
  2537. goto error_oor;
  2538. }
  2539. //This was minimum of sendwin and amtunsent
  2540. AmountToSend = MIN(AmtUnsent, SendTCB->tcb_mss);
  2541. // Time stamp option addition might force us to cut the data
  2542. // to be sent by 12 bytes.
  2543. if ((SendTCB->tcb_tcpopts & TCP_FLAG_TS) &&
  2544. (AmountToSend + ALIGNED_TS_OPT_SIZE >= SendTCB->tcb_mss)) {
  2545. AmountToSend -= ALIGNED_TS_OPT_SIZE;
  2546. }
  2547. // See if we have enough to send. We'll send if we have at least a
  2548. // segment, or if we really have some data to send and we can send
  2549. // all that we have, or the send window is > 0 and we need to force
  2550. // output or send a FIN (note that if we need to force output
  2551. // SendWin will be at least 1 from the check above), or if we can
  2552. // send an amount == to at least half the maximum send window
  2553. // we've seen.
  2554. ASSERT((int)AmtUnsent >= 0);
  2555. // It's OK to send something. Try to get a header buffer now.
  2556. // Mark the TCB for debugging.
  2557. // This should be removed for shipping version.
  2558. SendTCB->tcb_fastchk |= TCP_FLAG_FASTREC;
  2559. FirstBuffer = GetTCPHeaderAtDpcLevel(&Header);
  2560. if (FirstBuffer != NULL) {
  2561. // Got a header buffer. Loop through the sends on the TCB,
  2562. // building a frame.
  2563. CurrentBuffer = FirstBuffer;
  2564. Header = (TCPHeader *) ((PUCHAR)Header + LocalNetInfo.ipi_hsize);
  2565. // allow room for filling time stamp options.
  2566. if (SendTCB->tcb_tcpopts & TCP_FLAG_TS) {
  2567. // Account for time stamp options
  2568. TSLen = ALIGNED_TS_OPT_SIZE;
  2569. NdisAdjustBufferLength(FirstBuffer,
  2570. sizeof(TCPHeader) + ALIGNED_TS_OPT_SIZE);
  2571. SCC = ALIGN_UP_POINTER((SendCmpltContext *) (Header + 1),PVOID);
  2572. (uchar *) SCC += ALIGNED_TS_OPT_SIZE;
  2573. } else {
  2574. SCC = (SendCmpltContext *) (Header + 1);
  2575. }
  2576. SCC = ALIGN_UP_POINTER(SCC, PVOID);
  2577. #if DBG
  2578. SCC->scc_sig = scc_signature;
  2579. #endif
  2580. FillTCPHeader(SendTCB, Header);
  2581. {
  2582. ulong L = SendNext;
  2583. Header->tcp_seq = net_long(L);
  2584. }
  2585. SCC->scc_ubufcount = 0;
  2586. SCC->scc_tbufcount = 0;
  2587. SCC->scc_count = 0;
  2588. SCC->scc_LargeSend = 0;
  2589. AmountLeft = AmountToSend;
  2590. if (AmountToSend != 0) {
  2591. long Result;
  2592. CTEStructAssert(CurSend, tsr);
  2593. SCC->scc_firstsend = CurSend;
  2594. do {
  2595. BOOLEAN DirectSend = FALSE;
  2596. ASSERT(CurSend->tsr_refcnt > 0);
  2597. Result = CTEInterlockedIncrementLong(&(CurSend->tsr_refcnt));
  2598. ASSERT(Result > 0);
  2599. SCC->scc_count++;
  2600. // If the current send offset is 0 and the current
  2601. // send is less than or equal to what we have left
  2602. // to send, we haven't already put a transport
  2603. // buffer on this send, and nobody else is using
  2604. // the buffer chain directly, just use the input
  2605. // buffers. We check for other people using them
  2606. // by looking at tsr_lastbuf. If it's NULL,
  2607. // nobody else is using the buffers. If it's not
  2608. // NULL, somebody is.
  2609. if (SendOfs == 0 &&
  2610. (SendSize <= AmountLeft) &&
  2611. (SCC->scc_tbufcount == 0) &&
  2612. CurSend->tsr_lastbuf == NULL) {
  2613. ulong length = 0;
  2614. PNDIS_BUFFER tmp = in_SendBuf;
  2615. while (tmp) {
  2616. length += NdisBufferLength(tmp);
  2617. tmp = NDIS_BUFFER_LINKAGE(tmp);
  2618. }
  2619. // If sum of mdl lengths is > request length
  2620. // use slow path.
  2621. if (AmountLeft >= length) {
  2622. DirectSend = TRUE;
  2623. }
  2624. }
  2625. if (DirectSend) {
  2626. NDIS_BUFFER_LINKAGE(CurrentBuffer) = in_SendBuf;
  2627. do {
  2628. SCC->scc_ubufcount++;
  2629. CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
  2630. } while (NDIS_BUFFER_LINKAGE(CurrentBuffer) != NULL);
  2631. CurSend->tsr_lastbuf = CurrentBuffer;
  2632. AmountLeft -= SendSize;
  2633. } else {
  2634. uint AmountToDup;
  2635. PNDIS_BUFFER NewBuf, Buf;
  2636. uint Offset;
  2637. NDIS_STATUS NStatus;
  2638. uchar *VirtualAddress;
  2639. uint Length;
  2640. // Either the current send has more data than
  2641. // we want to send, or the starting offset is
  2642. // not 0. In either case we'll need to loop
  2643. // through the current send, allocating buffers.
  2644. Buf = SendBuf;
  2645. Offset = SendOfs;
  2646. do {
  2647. ASSERT(Buf != NULL);
  2648. TcpipQueryBuffer(Buf, &VirtualAddress, &Length,
  2649. NormalPagePriority);
  2650. if (VirtualAddress == NULL) {
  2651. if (SCC->scc_tbufcount == 0 &&
  2652. SCC->scc_ubufcount == 0) {
  2653. //TCPSendComplete(SCC, FirstBuffer,IP_SUCCESS);
  2654. goto error_oor1;
  2655. }
  2656. AmountToSend -= AmountLeft;
  2657. AmountLeft = 0;
  2658. break;
  2659. }
  2660. ASSERT((Offset < Length) ||
  2661. (Offset == 0 && Length == 0));
  2662. // Adjust the length for the offset into
  2663. // this buffer.
  2664. Length -= Offset;
  2665. AmountToDup = MIN(AmountLeft, Length);
  2666. NdisAllocateBuffer(&NStatus, &NewBuf,
  2667. TCPSendBufferPool,
  2668. VirtualAddress + Offset,
  2669. AmountToDup);
  2670. if (NStatus == NDIS_STATUS_SUCCESS) {
  2671. SCC->scc_tbufcount++;
  2672. NDIS_BUFFER_LINKAGE(CurrentBuffer) = NewBuf;
  2673. CurrentBuffer = NewBuf;
  2674. if (AmountToDup >= Length) {
  2675. // Exhausted this buffer.
  2676. Buf = NDIS_BUFFER_LINKAGE(Buf);
  2677. Offset = 0;
  2678. } else {
  2679. Offset += AmountToDup;
  2680. ASSERT(Offset < NdisBufferLength(Buf));
  2681. }
  2682. SendSize -= AmountToDup;
  2683. AmountLeft -= AmountToDup;
  2684. } else {
  2685. // Couldn't allocate a buffer. If
  2686. // the packet is already partly built,
  2687. // send what we've got, otherwise
  2688. // bail out.
  2689. if (SCC->scc_tbufcount == 0 &&
  2690. SCC->scc_ubufcount == 0) {
  2691. CTEFreeLock(&SendTCB->tcb_lock, TCBHandle);
  2692. //TCPSendComplete(SCC, FirstBuffer,IP_SUCCESS);
  2693. CTEGetLock(&SendTCB->tcb_lock, &TCBHandle);
  2694. goto error_oor1;
  2695. }
  2696. AmountToSend -= AmountLeft;
  2697. AmountLeft = 0;
  2698. }
  2699. } while (AmountLeft && SendSize);
  2700. SendBuf = Buf;
  2701. SendOfs = Offset;
  2702. }
  2703. if (CurSend->tsr_flags & TSR_FLAG_URG) {
  2704. ushort UP;
  2705. // This send is urgent data. We need to figure
  2706. // out what the urgent data pointer should be.
  2707. // We know sendnext is the starting sequence
  2708. // number of the frame, and that at the top of
  2709. // this do loop sendnext identified a byte in
  2710. // the CurSend at that time. We advanced CurSend
  2711. // at the same rate we've decremented
  2712. // AmountLeft (AmountToSend - AmountLeft ==
  2713. // AmountBuilt), so sendnext +
  2714. // (AmountToSend - AmountLeft) identifies a byte
  2715. // in the current value of CurSend, and that
  2716. // quantity plus tcb_sendsize is the sequence
  2717. // number one beyond the current send.
  2718. UP =
  2719. (ushort) (AmountToSend - AmountLeft) +
  2720. (ushort) SendTCB->tcb_sendsize -
  2721. ((SendTCB->tcb_flags & BSD_URGENT) ? 0 : 1);
  2722. Header->tcp_urgent = net_short(UP);
  2723. Header->tcp_flags |= TCP_FLAG_URG;
  2724. }
  2725. // See if we've exhausted this send. If we have,
  2726. // set the PUSH bit in this frame and move on to
  2727. // the next send. We also need to check the
  2728. // urgent data bit.
  2729. if (SendSize == 0) {
  2730. Queue *Next;
  2731. ulong PrevFlags;
  2732. // We've exhausted this send. Set the PUSH bit.
  2733. Header->tcp_flags |= TCP_FLAG_PUSH;
  2734. PrevFlags = CurSend->tsr_flags;
  2735. Next = QNEXT(&CurSend->tsr_req.tr_q);
  2736. if (Next != QEND(&SendTCB->tcb_sendq)) {
  2737. CurSend = STRUCT_OF(TCPSendReq,
  2738. QSTRUCT(TCPReq, Next, tr_q),
  2739. tsr_req);
  2740. CTEStructAssert(CurSend, tsr);
  2741. SendSize = CurSend->tsr_unasize;
  2742. SendOfs = CurSend->tsr_offset;
  2743. SendBuf = CurSend->tsr_buffer;
  2744. // Check the urgent flags. We can't combine
  2745. // new urgent data on to the end of old
  2746. // non-urgent data.
  2747. if ((PrevFlags & TSR_FLAG_URG) && !
  2748. (CurSend->tsr_flags & TSR_FLAG_URG))
  2749. break;
  2750. } else {
  2751. ASSERT(AmountLeft == 0);
  2752. CurSend = NULL;
  2753. SendBuf = NULL;
  2754. }
  2755. }
  2756. } while (AmountLeft != 0);
  2757. } else {
  2758. // Amt to send is 0.
  2759. // Just bail out and strat timer.
  2760. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2761. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
  2762. }
  2763. SendTCB->tcb_fastchk &= ~TCP_FLAG_FASTREC;
  2764. FreeTCPHeader(FirstBuffer);
  2765. return;
  2766. }
  2767. // Adjust for what we're really going to send.
  2768. AmountToSend -= AmountLeft;
  2769. OldSeq = SendNext;
  2770. SendNext += AmountToSend;
  2771. AmtUnsent -= AmountToSend;
  2772. TStats.ts_retranssegs++;
  2773. // We've built the frame entirely. If we've send everything
  2774. // we have and their's a FIN pending, OR it in.
  2775. AmountToSend += sizeof(TCPHeader);
  2776. SendTCB->tcb_flags &= ~(NEED_ACK | ACK_DELAYED |
  2777. FORCE_OUTPUT);
  2778. STOP_TCB_TIMER_R(SendTCB, DELACK_TIMER);
  2779. STOP_TCB_TIMER_R(SendTCB, SWS_TIMER);
  2780. SendTCB->tcb_rcvdsegs = 0;
  2781. if ( (SendTCB->tcb_flags & KEEPALIVE) && (SendTCB->tcb_conn != NULL) )
  2782. START_TCB_TIMER_R(SendTCB, KA_TIMER, SendTCB->tcb_conn->tc_tcbkatime);
  2783. SendTCB->tcb_kacount = 0;
  2784. SendTCB->tcb_fastchk &= ~TCP_FLAG_FASTREC;
  2785. CTEFreeLock(&SendTCB->tcb_lock, TCBHandle);
  2786. Irp = NULL;
  2787. if (SCC->scc_firstsend) {
  2788. Irp = SCC->scc_firstsend->tsr_req.tr_context;
  2789. }
  2790. // We're all set. Xsum it and send it.
  2791. if (SendTCB->tcb_rce &&
  2792. (SendTCB->tcb_rce->rce_OffloadFlags &
  2793. TCP_XMT_CHECKSUM_OFFLOAD) &&
  2794. (SendTCB->tcb_rce->rce_OffloadFlags &
  2795. TCP_CHECKSUM_OPT_OFFLOAD) ){
  2796. uint PHXsum =
  2797. SendTCB->tcb_phxsum +
  2798. (uint)net_short(AmountToSend + TSLen);
  2799. PHXsum = (((PHXsum << 16) | (PHXsum >> 16)) + PHXsum) >> 16;
  2800. Header->tcp_xsum = (ushort) PHXsum;
  2801. SendTCB->tcb_opt.ioi_TcpChksum = 1;
  2802. } else {
  2803. Header->tcp_xsum =
  2804. ~XsumSendChain(
  2805. SendTCB->tcb_phxsum +
  2806. (uint)net_short(AmountToSend + TSLen),
  2807. FirstBuffer);
  2808. SendTCB->tcb_opt.ioi_TcpChksum = 0;
  2809. }
  2810. SendStatus =
  2811. (*LocalNetInfo.ipi_xmit)(TCPProtInfo,
  2812. SCC,
  2813. FirstBuffer,
  2814. AmountToSend + TSLen,
  2815. SendTCB->tcb_daddr,
  2816. SendTCB->tcb_saddr,
  2817. &SendTCB->tcb_opt,
  2818. SendTCB->tcb_rce,
  2819. PROTOCOL_TCP,
  2820. Irp);
  2821. //Reacquire Lock to keep DerefTCB happy
  2822. //Bug #63904
  2823. if (SendStatus != IP_PENDING) {
  2824. TCPSendComplete(SCC, FirstBuffer, IP_SUCCESS);
  2825. }
  2826. CTEGetLock(&SendTCB->tcb_lock, &TCBHandle);
  2827. SendTCB->tcb_error = SendStatus;
  2828. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2829. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, SendTCB->tcb_rexmit);
  2830. }
  2831. } else { // FirstBuffer != NULL.
  2832. goto error_oor;
  2833. }
  2834. } //while AmtUnsent > 0
  2835. return;
  2836. // Common case error handling code for out of resource conditions. Start the
  2837. // retransmit timer if it's not already running (so that we try this again
  2838. // later), clean up and return.
  2839. error_oor:
  2840. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2841. ushort tmp;
  2842. tmp = MIN(MAX(REXMIT_TO(SendTCB),
  2843. MIN_RETRAN_TICKS), MAX_REXMIT_TO);
  2844. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, tmp);
  2845. }
  2846. SendTCB->tcb_fastchk &= ~TCP_FLAG_FASTREC;
  2847. return;
  2848. error_oor1:
  2849. if (!TCB_TIMER_RUNNING_R(SendTCB, RXMIT_TIMER)) {
  2850. ushort tmp;
  2851. tmp = MIN(MAX(REXMIT_TO(SendTCB),
  2852. MIN_RETRAN_TICKS), MAX_REXMIT_TO);
  2853. START_TCB_TIMER_R(SendTCB, RXMIT_TIMER, tmp);
  2854. }
  2855. SendTCB->tcb_fastchk &= ~TCP_FLAG_FASTREC;
  2856. TCPSendComplete(SCC, FirstBuffer, IP_SUCCESS);
  2857. return;
  2858. }
  2859. //* TDISend - Send data on a connection.
  2860. //
  2861. // The main TDI send entry point. We take the input parameters, validate them,
  2862. // allocate a send request, etc. We then put the send request on the queue.
  2863. // If we have no other sends on the queue or Nagling is disabled we'll
  2864. // call TCPSend to send the data.
  2865. //
  2866. // Input: Request - The TDI request for the call.
  2867. // Flags - Flags for this send.
  2868. // SendLength - Length in bytes of send.
  2869. // SendBuffer - Pointer to buffer chain to be sent.
  2870. //
  2871. // Returns: Status of attempt to send.
  2872. //
  2873. TDI_STATUS
  2874. TdiSend(PTDI_REQUEST Request, ushort Flags, uint SendLength,
  2875. PNDIS_BUFFER SendBuffer)
  2876. {
  2877. TCPConn *Conn;
  2878. TCB *SendTCB;
  2879. TCPSendReq *SendReq;
  2880. CTELockHandle ConnTableHandle, TCBHandle;
  2881. TDI_STATUS Error;
  2882. uint EmptyQ;
  2883. #if DBG_VALIDITY_CHECK
  2884. // Check for Mdl sanity in send requests
  2885. // Should be removed for RTM
  2886. uint RealSendSize;
  2887. PNDIS_BUFFER Temp;
  2888. // Loop through the buffer chain, and make sure that the length matches
  2889. // up with SendLength.
  2890. Temp = SendBuffer;
  2891. RealSendSize = 0;
  2892. if (Temp != 0) {
  2893. do {
  2894. if (Temp == NULL) {
  2895. DbgPrint("BAD TCP Send Request. NULL MDL\n");
  2896. DbgPrint("This is not a TCPIP issue.\n");
  2897. DbgPrint("Please have originator of this IRP debug this.\n");
  2898. DbgBreakPoint();
  2899. }
  2900. RealSendSize += NdisBufferLength(Temp);
  2901. Temp = NDIS_BUFFER_LINKAGE(Temp);
  2902. } while (Temp != NULL);
  2903. if (RealSendSize < SendLength) {
  2904. DbgPrint("BAD TCP Send Request. Length Mismatch.\n");
  2905. DbgPrint("This is not a TCPIP issue.\n");
  2906. DbgPrint("Please have originator of this IRP debug this.\n");
  2907. DbgBreakPoint();
  2908. }
  2909. }
  2910. #endif
  2911. #if DROP_PKT
  2912. // Do not forget to remove this code!!!
  2913. if (SimPacketDrop) {
  2914. if (SendLength > 8000) {
  2915. DropPackets = 1;
  2916. }
  2917. }
  2918. #endif
  2919. //CTEGetLock(&ConnTableLock, &ConnTableHandle);
  2920. Conn = GetConnFromConnID(PtrToUlong(Request->Handle.ConnectionContext), &ConnTableHandle);
  2921. if (Conn != NULL) {
  2922. CTEStructAssert(Conn, tc);
  2923. SendTCB = Conn->tc_tcb;
  2924. if (SendTCB != NULL) {
  2925. CTEStructAssert(SendTCB, tcb);
  2926. CTEGetLockAtDPC(&SendTCB->tcb_lock, &TCBHandle);
  2927. CTEFreeLock(&(Conn->tc_ConnBlock->cb_lock), DISPATCH_LEVEL);
  2928. if (DATA_SEND_STATE(SendTCB->tcb_state) && !CLOSING(SendTCB)) {
  2929. // We have a TCB, and it's valid. Get a send request now.
  2930. CheckTCBSends(SendTCB);
  2931. if ((SendLength != 0) && ((SendTCB->tcb_unacked + SendLength) >= SendLength)) {
  2932. SendReq = GetSendReq();
  2933. if (SendReq != NULL) {
  2934. SendReq->tsr_req.tr_rtn = Request->RequestNotifyObject;
  2935. SendReq->tsr_req.tr_context = Request->RequestContext;
  2936. SendReq->tsr_buffer = SendBuffer;
  2937. SendReq->tsr_size = SendLength;
  2938. SendReq->tsr_unasize = SendLength;
  2939. SendReq->tsr_refcnt = 1; // ACK will decrement this ref
  2940. SendReq->tsr_offset = 0;
  2941. SendReq->tsr_lastbuf = NULL;
  2942. SendReq->tsr_time = TCPTime;
  2943. SendReq->tsr_flags = (Flags & TDI_SEND_EXPEDITED) ?
  2944. TSR_FLAG_URG : 0;
  2945. SendTCB->tcb_unacked += SendLength;
  2946. #if ACK_DEBUG
  2947. SendTCB->tcb_ack_history[SendTCB->tcb_history_index].sequence = SendTCB->tcb_senduna;
  2948. SendTCB->tcb_ack_history[SendTCB->tcb_history_index].unacked = SendTCB->tcb_unacked;
  2949. SendTCB->tcb_history_index++;
  2950. if (SendTCB->tcb_history_index >= NUM_ACK_HISTORY_ITEMS) {
  2951. SendTCB->tcb_history_index = 0;
  2952. }
  2953. #endif // ACK_DEBUG
  2954. if (Flags & TDI_SEND_AND_DISCONNECT) {
  2955. //move the state to fin_wait and
  2956. //mark the tcb for send and disconnect
  2957. if (SendTCB->tcb_state == TCB_ESTAB) {
  2958. SendTCB->tcb_state = TCB_FIN_WAIT1;
  2959. } else {
  2960. ASSERT(SendTCB->tcb_state == TCB_CLOSE_WAIT);
  2961. SendTCB->tcb_state = TCB_LAST_ACK;
  2962. }
  2963. SendTCB->tcb_slowcount++;
  2964. SendTCB->tcb_fastchk |= TCP_FLAG_SLOW;
  2965. SendTCB->tcb_fastchk |= TCP_FLAG_SEND_AND_DISC;
  2966. SendTCB->tcb_flags |= FIN_NEEDED;
  2967. //SendTCB->tcb_flags |= DISC_NOTIFIED;
  2968. SendReq->tsr_flags |= TSR_FLAG_SEND_AND_DISC;
  2969. //extrac reference to make sure that
  2970. //this request will not be completed until the
  2971. //connection is closed
  2972. SendReq->tsr_refcnt++;
  2973. TStats.ts_currestab--;
  2974. }
  2975. EmptyQ = EMPTYQ(&SendTCB->tcb_sendq);
  2976. ENQUEUE(&SendTCB->tcb_sendq, &SendReq->tsr_req.tr_q);
  2977. if (SendTCB->tcb_cursend == NULL) {
  2978. SendTCB->tcb_cursend = SendReq;
  2979. SendTCB->tcb_sendbuf = SendBuffer;
  2980. SendTCB->tcb_sendofs = 0;
  2981. SendTCB->tcb_sendsize = SendLength;
  2982. }
  2983. if (EmptyQ) {
  2984. REFERENCE_TCB(SendTCB);
  2985. TCPSend(SendTCB, ConnTableHandle);
  2986. } else if (!(SendTCB->tcb_flags & NAGLING) ||
  2987. (SendTCB->tcb_unacked -
  2988. (SendTCB->tcb_sendmax -
  2989. SendTCB->tcb_senduna)) >=
  2990. SendTCB->tcb_mss) {
  2991. REFERENCE_TCB(SendTCB);
  2992. TCPSend(SendTCB, ConnTableHandle);
  2993. } else
  2994. CTEFreeLock(&SendTCB->tcb_lock,
  2995. ConnTableHandle);
  2996. return TDI_PENDING;
  2997. } else
  2998. Error = TDI_NO_RESOURCES;
  2999. } else
  3000. Error = TDI_SUCCESS;
  3001. } else
  3002. Error = TDI_INVALID_STATE;
  3003. CTEFreeLock(&SendTCB->tcb_lock, ConnTableHandle);
  3004. return Error;
  3005. } else {
  3006. CTEFreeLock(&(Conn->tc_ConnBlock->cb_lock), ConnTableHandle);
  3007. Error = TDI_INVALID_STATE;
  3008. }
  3009. } else
  3010. Error = TDI_INVALID_CONNECTION;
  3011. //CTEFreeLock(&ConnTableLock, ConnTableHandle);
  3012. return Error;
  3013. }
  3014. #pragma BEGIN_INIT
  3015. extern void *TLRegisterProtocol(uchar Protocol, void *RcvHandler,
  3016. void *XmitHandler, void *StatusHandler,
  3017. void *RcvCmpltHandler, void *PnPHandler,
  3018. void *ElistHandler);
  3019. extern IP_STATUS TCPRcv(void *IPContext, IPAddr Dest, IPAddr Src,
  3020. IPAddr LocalAddr, IPAddr SrcAddr,
  3021. IPHeader UNALIGNED * IPH, uint IPHLength,
  3022. IPRcvBuf * RcvBuf, uint Size, uchar IsBCast,
  3023. uchar Protocol, IPOptInfo * OptInfo);
  3024. extern void TCPRcvComplete(void);
  3025. uchar SendInited = FALSE;
  3026. //* InitTCPSend - Initialize our send side.
  3027. //
  3028. // Called during init time to initialize our TCP send state.
  3029. //
  3030. // Input: Nothing.
  3031. //
  3032. // Returns: TRUE if we inited, false if we didn't.
  3033. //
  3034. int
  3035. InitTCPSend(void)
  3036. {
  3037. PNDIS_BUFFER Buffer;
  3038. NDIS_STATUS Status;
  3039. TcpHeaderBufferSize =
  3040. (USHORT)(ALIGN_UP(LocalNetInfo.ipi_hsize,PVOID) +
  3041. ALIGN_UP((sizeof(TCPHeader) + ALIGNED_TS_OPT_SIZE + ALIGNED_SACK_OPT_SIZE),PVOID) +
  3042. ALIGN_UP(MAX(MSS_OPT_SIZE, sizeof(SendCmpltContext)),PVOID));
  3043. #if BACK_FILL
  3044. TcpHeaderBufferSize += MAX_BACKFILL_HDR_SIZE;
  3045. #endif
  3046. TcpHeaderPool = MdpCreatePool (TcpHeaderBufferSize, 'thCT');
  3047. if (!TcpHeaderPool)
  3048. {
  3049. return FALSE;
  3050. }
  3051. NdisAllocateBufferPool(&Status, &TCPSendBufferPool, NUM_TCP_BUFFERS);
  3052. if (Status != NDIS_STATUS_SUCCESS) {
  3053. MdpDestroyPool(TcpHeaderPool);
  3054. return FALSE;
  3055. }
  3056. TCPProtInfo = TLRegisterProtocol(PROTOCOL_TCP, TCPRcv, TCPSendComplete,
  3057. TCPStatus, TCPRcvComplete,
  3058. TCPPnPPowerRequest, TCPElistChangeHandler);
  3059. if (TCPProtInfo == NULL) {
  3060. MdpDestroyPool(TcpHeaderPool);
  3061. NdisFreeBufferPool(TCPSendBufferPool);
  3062. return FALSE;
  3063. }
  3064. SendInited = TRUE;
  3065. return TRUE;
  3066. }
  3067. //* UnInitTCPSend - UnInitialize our send side.
  3068. //
  3069. // Called during init time if we're going to fail to initialize.
  3070. //
  3071. // Input: Nothing.
  3072. //
  3073. // Returns: TRUE if we inited, false if we didn't.
  3074. //
  3075. void
  3076. UnInitTCPSend(void)
  3077. {
  3078. if (!SendInited)
  3079. return;
  3080. TLRegisterProtocol(PROTOCOL_TCP, NULL, NULL, NULL, NULL, NULL, NULL);
  3081. MdpDestroyPool(TcpHeaderPool);
  3082. NdisFreeBufferPool(TCPSendBufferPool);
  3083. }
  3084. #pragma END_INIT