Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

8026 lines
272 KiB

  1. /*++
  2. Copyright (c) 1990-2000 Microsoft Corporation
  3. Module Name:
  4. IPROUTE.C
  5. Abstract:
  6. This file contains all the route table manipulation code
  7. Author:
  8. [Environment:]
  9. kernel mode only
  10. [Notes:]
  11. optional-notes
  12. Revision History:
  13. --*/
  14. //*** iproute.c - IP routing routines.
  15. //
  16. // This file contains all the routines related to IP routing, including
  17. // routing table lookup and management routines.
  18. #include "precomp.h"
  19. #include "info.h"
  20. #include "iproute.h"
  21. #include "iprtdef.h"
  22. #include "lookup.h"
  23. #include "ipxmit.h"
  24. #include "igmp.h"
  25. #include "mdlpool.h"
  26. #include "pplasl.h"
  27. #include "tcpipbuf.h"
  28. extern uint LoopIndex;
  29. extern uint IPSecStatus;
  30. typedef struct ChangeNotifyEvent {
  31. CTEEvent cne_event;
  32. IPNotifyOutput cne_info;
  33. LIST_ENTRY *cne_queue;
  34. void *cne_lock;
  35. } ChangeNotifyEvent;
  36. void ChangeNotifyAsync(CTEEvent *Event, PVOID Context);
  37. void InvalidateRCEChain(RouteTableEntry * RTE);
  38. extern IPAddr g_ValidAddr;
  39. extern uint TotalFreeInterfaces;
  40. extern uint MaxFreeInterfaces;
  41. extern Interface *FrontFreeList;
  42. extern Interface *RearFreeList;
  43. RouteCacheEntry *RCEFreeList = NULL;
  44. extern void DampCheck(void);
  45. #if IPMCAST
  46. #define MCAST_STARTED 1
  47. extern uint g_dwMcastState;
  48. extern BOOLEAN IPMForwardAfterRcv(NetTableEntry *PrimarySrcNTE,
  49. IPHeader UNALIGNED *Header, uint HeaderLength,
  50. PVOID Data, uint BufferLength,
  51. NDIS_HANDLE LContext1, uint LContext2,
  52. uchar DestType, LinkEntry *LinkCtxt);
  53. extern BOOLEAN IPMForwardAfterRcvPkt(NetTableEntry *PrimarySrcNTE,
  54. IPHeader UNALIGNED *Header,
  55. uint HeaderLength,
  56. PVOID Data, uint BufferLength,
  57. NDIS_HANDLE LContext1, uint LContext2,
  58. uchar DestType, uint MacHeaderSize,
  59. PNDIS_BUFFER NdisBuffer,
  60. uint* pClientCnt, LinkEntry * LinkCtxt);
  61. #endif
  62. ulong DbgNumPktFwd = 0;
  63. ulong UnConnected = 0;
  64. RouteCacheEntry *UnConnectedRCE;
  65. ulong Rcefailures = 0;
  66. extern NetTableEntry **NewNetTableList; // hash table for NTEs
  67. extern uint NET_TABLE_SIZE;
  68. extern RefPtr DHCPRefPtr; // Referenced pointer to NTE being DHCP'd.
  69. extern NetTableEntry *LoopNTE; // Pointer to loopback NTE.
  70. extern Interface LoopInterface; // Pointer to loopback interface.
  71. extern IP_STATUS SendICMPErr(IPAddr, IPHeader UNALIGNED *, uchar, uchar, ulong, uchar);
  72. extern IP_STATUS SendICMPIPSecErr(IPAddr, IPHeader UNALIGNED *, uchar, uchar, ulong);
  73. extern uchar ParseRcvdOptions(IPOptInfo *, OptIndex *);
  74. extern void ULMTUNotify(IPAddr Dest, IPAddr Src, uchar Prot, void *Ptr,
  75. uint NewMTU);
  76. void EnableRouter();
  77. void DisableRouter();
  78. IPHeader *GetFWPacket(PNDIS_PACKET *ReturnedPacket);
  79. void FreeFWPacket(PNDIS_PACKET Packet);
  80. PNDIS_BUFFER GetFWBufferChain(uint DataLength, PNDIS_PACKET Packet,
  81. PNDIS_BUFFER *TailPointer);
  82. BOOLEAN InitForwardingPools();
  83. PVOID
  84. NTAPI
  85. FwPacketAllocate (
  86. IN POOL_TYPE PoolType,
  87. IN SIZE_T NumberOfBytes,
  88. IN ULONG Tag
  89. );
  90. VOID
  91. NTAPI
  92. FwPacketFree (
  93. IN PVOID Buffer
  94. );
  95. extern Interface *IFList;
  96. extern NDIS_HANDLE BufferPool;
  97. extern CTEBlockStruc TcpipUnloadBlock; // Structure for blocking at time of unload
  98. extern BOOLEAN fRouteTimerStopping;
  99. void IPDelNTE(NetTableEntry * NTE, CTELockHandle * RouteTableHandle);
  100. CACHE_LINE_KSPIN_LOCK RouteTableLock;
  101. LIST_ENTRY RtChangeNotifyQueue;
  102. LIST_ENTRY RtChangeNotifyQueueEx;
  103. extern HANDLE IpHeaderPool;
  104. NDIS_HANDLE IpForwardPacketPool;
  105. HANDLE IpForwardLargePool;
  106. HANDLE IpForwardSmallPool;
  107. // Buffer size calculation: Based on the MDL pool's implementation:
  108. // sizeof(POOL_HEADER) + N * ALIGN_UP(sizeof(MDL) + BufSize, PVOID) == PAGE_SIZE
  109. // N is the number of buffers per page.
  110. // Choose BufSize to minimize wasted space per page
  111. //
  112. #ifdef _WIN64
  113. // Chosen to get 5 buffers per pool page with minimal space wasted.
  114. #define BUFSIZE_LARGE_POOL 1576
  115. // Chosen to get 9 buffers per pool page with no space wasted.
  116. #define BUFSIZE_SMALL_POOL 856
  117. #else
  118. // Chosen to get 3 buffers per pool page with 8 bytes wasted.
  119. #define BUFSIZE_LARGE_POOL 1320
  120. // Chosen to get 8 buffers per pool page with no space wasted.
  121. #define BUFSIZE_SMALL_POOL 476
  122. #endif
  123. #define PACKET_POOL_SIZE 16*1024
  124. uchar ForwardBCast; // Flag indicating if we should forward bcasts.
  125. uchar ForwardPackets; // Flag indicating whether we should forward.
  126. uchar RouterConfigured; // TRUE if we were initially configured as a
  127. // router.
  128. int IPEnableRouterRefCount; // Tracks enables/disables of
  129. // routing by various services
  130. RouteSendQ *BCastRSQ;
  131. uint DefGWConfigured; // Number of default gateways configed.
  132. uint DefGWActive; // Number of def. gateways active.
  133. uint DeadGWDetect;
  134. uint PMTUDiscovery;
  135. ProtInfo *RtPI = NULL;
  136. IPMask IPMaskTable[] =
  137. {
  138. CLASSA_MASK,
  139. CLASSA_MASK,
  140. CLASSA_MASK,
  141. CLASSA_MASK,
  142. CLASSA_MASK,
  143. CLASSA_MASK,
  144. CLASSA_MASK,
  145. CLASSA_MASK,
  146. CLASSB_MASK,
  147. CLASSB_MASK,
  148. CLASSB_MASK,
  149. CLASSB_MASK,
  150. CLASSC_MASK,
  151. CLASSC_MASK,
  152. CLASSD_MASK,
  153. CLASSE_MASK};
  154. extern void TransmitFWPacket(PNDIS_PACKET, uint);
  155. uint MTUTable[] =
  156. {
  157. 65535 - sizeof(IPHeader),
  158. 32000 - sizeof(IPHeader),
  159. 17914 - sizeof(IPHeader),
  160. 8166 - sizeof(IPHeader),
  161. 4352 - sizeof(IPHeader),
  162. 2002 - sizeof(IPHeader),
  163. 1492 - sizeof(IPHeader),
  164. 1006 - sizeof(IPHeader),
  165. 508 - sizeof(IPHeader),
  166. 296 - sizeof(IPHeader),
  167. MIN_VALID_MTU - sizeof(IPHeader)
  168. };
  169. uint DisableIPSourceRouting = 1;
  170. CTETimer IPRouteTimer;
  171. // Referenced pointer to callout routine for dial on demand.
  172. RefPtr DODRefPtr;
  173. // Referenced pointer to packet filter callout routine.
  174. RefPtr FilterRefPtr;
  175. RouteInterface DummyInterface; // Dummy interface.
  176. #if FFP_SUPPORT
  177. ULONG FFPRegFastForwardingCacheSize; // FFP Configuration Params
  178. ULONG FFPRegControlFlags; // from the System Registry
  179. ULONG FFPFlushRequired; // Whether an FFP Cache Flush is needed
  180. #endif // if FFP_SUPPORT
  181. ULONG RouteTimerTicks; // To simulate 2 timers with different granularity
  182. ULONG FlushIFTimerTicks; // To simulate 2 timers with different granularity
  183. #ifdef ALLOC_PRAGMA
  184. //
  185. // Make init code disposable.
  186. //
  187. int InitRouting(IPConfigInfo * ci);
  188. #pragma alloc_text(INIT, InitRouting)
  189. #endif // ALLOC_PRAGMA
  190. // this macro is called whenever we delete the route: takes care of routes on links
  191. #define CleanupP2MP_RTE(_RTE) { \
  192. if ((_RTE)->rte_link){ \
  193. LinkEntry *Link; \
  194. RouteTableEntry *PrvRte, *tmpRte; \
  195. Link = (_RTE)->rte_link; \
  196. PrvRte = Link->link_rte; \
  197. tmpRte = Link->link_rte; \
  198. while (tmpRte){ \
  199. if (tmpRte == (_RTE)) break; \
  200. PrvRte = tmpRte; \
  201. tmpRte = tmpRte->rte_nextlinkrte; \
  202. } \
  203. if (tmpRte) { \
  204. if (PrvRte == tmpRte) { \
  205. Link->link_rte = (_RTE)->rte_nextlinkrte; \
  206. } else { \
  207. PrvRte->rte_nextlinkrte = (_RTE)->rte_nextlinkrte; \
  208. } \
  209. } else { \
  210. ASSERT((FALSE)); \
  211. } \
  212. } \
  213. }
  214. //** GetIfConstraint - Decide whether to constrain a lookup
  215. //
  216. // Arguments: Dest - destination address
  217. // Src - source address
  218. // OptInfo - options to use for a lookup
  219. // fIpsec - IPsec reinjected packet
  220. //
  221. // Returns: IfIndex to constrain lookup to,
  222. // 0 if unconstrained
  223. // INVALID_IF_INDEX if constrained by source address only
  224. //
  225. uint
  226. GetIfConstraint(IPAddr Dest, IPAddr Src, IPOptInfo *OptInfo, BOOLEAN fIpsec)
  227. {
  228. uint ConstrainIF=0;
  229. if (CLASSD_ADDR(Dest)) {
  230. ConstrainIF = (OptInfo)? OptInfo->ioi_mcastif : 0;
  231. if (!ConstrainIF && Src && !fIpsec) {
  232. ConstrainIF = INVALID_IF_INDEX;
  233. }
  234. } else {
  235. ConstrainIF = (OptInfo)? OptInfo->ioi_ucastif : 0;
  236. }
  237. return ConstrainIF;
  238. }
  239. VOID
  240. InvalidateRCEContext(RouteCacheEntry *RCE)
  241. {
  242. Interface *IF, *tmpIF = NULL;
  243. ASSERT(RCE->rce_flags & RCE_CONNECTED);
  244. IF = (Interface *) RCE->rce_rte;
  245. if (RCE->rce_flags & RCE_REFERENCED) {
  246. //
  247. // If we hold a reference on the interface,
  248. // it is guaranteed the interface won't go away.
  249. //
  250. (*(IF->if_invalidate)) (IF->if_lcontext, RCE);
  251. LockedDerefIF(IF);
  252. RCE->rce_flags &= ~RCE_REFERENCED;
  253. } else {
  254. //
  255. // In the case we do not hold a reference on the interface,
  256. // we need to make sure the IF is still there.
  257. //
  258. for (tmpIF = IFList; tmpIF != NULL; tmpIF = tmpIF->if_next) {
  259. if (tmpIF == IF) break;
  260. }
  261. if (tmpIF) {
  262. (*(IF->if_invalidate)) (IF->if_lcontext, RCE);
  263. } else {
  264. RtlZeroMemory(RCE->rce_context, RCE_CONTEXT_SIZE);
  265. }
  266. }
  267. }
  268. //** DummyFilterPtr - Dummy filter-driver callout-routine
  269. //
  270. // A dummy routine installed while a real callout is in the process of being
  271. // deregistered.
  272. //
  273. // Entry: no arguments used.
  274. //
  275. // Returns: FORWARD.
  276. //
  277. FORWARD_ACTION
  278. DummyFilterPtr(struct IPHeader UNALIGNED* PacketHeader,
  279. uchar* Packet, uint PacketLength,
  280. uint RecvInterfaceIndex, uint SendInterfaceIndex,
  281. IPAddr RecvLinkNextHop, IPAddr SendLinkNextHop)
  282. {
  283. UNREFERENCED_PARAMETER(PacketHeader);
  284. UNREFERENCED_PARAMETER(Packet);
  285. UNREFERENCED_PARAMETER(PacketLength);
  286. UNREFERENCED_PARAMETER(RecvInterfaceIndex);
  287. UNREFERENCED_PARAMETER(SendInterfaceIndex);
  288. UNREFERENCED_PARAMETER(RecvLinkNextHop);
  289. UNREFERENCED_PARAMETER(SendLinkNextHop);
  290. return FORWARD;
  291. }
  292. //** DummyDODCallout - Dummy dial-on-demand callout-routine
  293. //
  294. // A dummy routine installed while a real callout is in the process of being
  295. // deregistered.
  296. //
  297. // Entry: no arguments used.
  298. //
  299. // Returns: INVALID_IF_INDEX.
  300. //
  301. uint
  302. DummyDODCallout(ROUTE_CONTEXT Context, IPAddr Destination, IPAddr Source,
  303. uchar Protocol, uchar *Buffer, uint Length, IPAddr HdrSrc)
  304. {
  305. UNREFERENCED_PARAMETER(Context);
  306. UNREFERENCED_PARAMETER(Destination);
  307. UNREFERENCED_PARAMETER(Source);
  308. UNREFERENCED_PARAMETER(Protocol);
  309. UNREFERENCED_PARAMETER(Buffer);
  310. UNREFERENCED_PARAMETER(Length);
  311. UNREFERENCED_PARAMETER(HdrSrc);
  312. return INVALID_IF_INDEX;
  313. }
  314. //** NotifyFilterOfDiscard - notify the filter before discarding a packet
  315. //
  316. // Called when a packet is to be dropped before the filtering step is done.
  317. // This allows the dropped packet to be logged, if necessary.
  318. //
  319. // Entry: NTE - receiving NTE
  320. // IPH - header of dropped packet
  321. // Data - payload of dropped packet
  322. // DataSize - length of bytes at 'Data'.
  323. //
  324. // Returns: TRUE if IP filter-driver returned 'FORWARD', FALSE otherwise.
  325. //
  326. BOOLEAN
  327. NotifyFilterOfDiscard(NetTableEntry* NTE, IPHeader UNALIGNED* IPH, uchar* Data,
  328. uint DataSize)
  329. {
  330. FORWARD_ACTION Action;
  331. IPPacketFilterPtr FilterPtr;
  332. FilterPtr = AcquireRefPtr(&FilterRefPtr);
  333. Action = (*FilterPtr)(IPH, Data, DataSize, NTE->nte_if->if_index,
  334. INVALID_IF_INDEX, IPADDR_LOCAL, NULL_IP_ADDR);
  335. ReleaseRefPtr(&FilterRefPtr);
  336. return ((BOOLEAN) (Action == FORWARD));
  337. }
  338. //** DuumyXmit - Dummy interface transmit handler.
  339. //
  340. // A dummy routine that should never be called.
  341. //
  342. // Entry: Context - NULL.
  343. // Packet - Pointer to packet to be transmitted.
  344. // Dest - Destination addres of packet.
  345. // RCE - Pointer to RCE (should be NULL).
  346. //
  347. // Returns: NDIS_STATUS_PENDING
  348. //
  349. NDIS_STATUS
  350. __stdcall
  351. DummyXmit(void *Context, PNDIS_PACKET *PacketArray, uint NumberOfPackets,
  352. IPAddr Dest, RouteCacheEntry * RCE, void *LinkCtxt)
  353. {
  354. UNREFERENCED_PARAMETER(Context);
  355. UNREFERENCED_PARAMETER(PacketArray);
  356. UNREFERENCED_PARAMETER(NumberOfPackets);
  357. UNREFERENCED_PARAMETER(Dest);
  358. UNREFERENCED_PARAMETER(RCE);
  359. UNREFERENCED_PARAMETER(LinkCtxt);
  360. ASSERT(FALSE);
  361. return NDIS_STATUS_SUCCESS;
  362. }
  363. //* DummyXfer - Dummy interface transfer data routine.
  364. //
  365. // A dummy routine that should never be called.
  366. //
  367. // Entry: Context - NULL.
  368. // TDContext - Original packet that was sent.
  369. // Dummy - Unused
  370. // Offset - Offset in frame from which to start copying.
  371. // BytesToCopy - Number of bytes to copy.
  372. // DestPacket - Packet describing buffer to copy into.
  373. // BytesCopied - Place to return bytes copied.
  374. //
  375. // Returns: NDIS_STATUS_SUCCESS
  376. //
  377. NDIS_STATUS
  378. __stdcall
  379. DummyXfer(void *Context, NDIS_HANDLE TDContext, uint Dummy, uint Offset, uint BytesToCopy,
  380. PNDIS_PACKET DestPacket, uint * BytesCopied)
  381. {
  382. UNREFERENCED_PARAMETER(Context);
  383. UNREFERENCED_PARAMETER(TDContext);
  384. UNREFERENCED_PARAMETER(Dummy);
  385. UNREFERENCED_PARAMETER(Offset);
  386. UNREFERENCED_PARAMETER(BytesToCopy);
  387. UNREFERENCED_PARAMETER(DestPacket);
  388. UNREFERENCED_PARAMETER(BytesCopied);
  389. ASSERT(FALSE);
  390. return NDIS_STATUS_FAILURE;
  391. }
  392. //* DummyClose - Dummy close routine.
  393. //
  394. // A dummy routine that should never be called.
  395. //
  396. // Entry: Context - Unused.
  397. //
  398. // Returns: Nothing.
  399. //
  400. void
  401. __stdcall
  402. DummyClose(void *Context)
  403. {
  404. UNREFERENCED_PARAMETER(Context);
  405. ASSERT(FALSE);
  406. }
  407. //* DummyInvalidate - .
  408. //
  409. // A dummy routine that should never be called.
  410. //
  411. // Entry: Context - Unused.
  412. // RCE - Pointer to RCE to be invalidated.
  413. //
  414. // Returns: Nothing.
  415. //
  416. void
  417. __stdcall
  418. DummyInvalidate(void *Context, RouteCacheEntry * RCE)
  419. {
  420. UNREFERENCED_PARAMETER(Context);
  421. UNREFERENCED_PARAMETER(RCE);
  422. }
  423. //* DummyQInfo - Dummy query information handler.
  424. //
  425. // A dummy routine that should never be called.
  426. //
  427. // Input: IFContext - Interface context (unused).
  428. // ID - TDIObjectID for object.
  429. // Buffer - Buffer to put data into.
  430. // Size - Pointer to size of buffer. On return, filled with
  431. // bytes copied.
  432. // Context - Pointer to context block.
  433. //
  434. // Returns: Status of attempt to query information.
  435. //
  436. int
  437. __stdcall
  438. DummyQInfo(void *IFContext, TDIObjectID * ID, PNDIS_BUFFER Buffer, uint * Size,
  439. void *Context)
  440. {
  441. UNREFERENCED_PARAMETER(IFContext);
  442. UNREFERENCED_PARAMETER(ID);
  443. UNREFERENCED_PARAMETER(Buffer);
  444. UNREFERENCED_PARAMETER(Size);
  445. UNREFERENCED_PARAMETER(Context);
  446. ASSERT(FALSE);
  447. return TDI_INVALID_REQUEST;
  448. }
  449. //* DummySetInfo - Dummy query information handler.
  450. //
  451. // A dummy routine that should never be called.
  452. //
  453. // Input: IFContext - Interface context (unused).
  454. // ID - TDIObjectID for object.
  455. // Buffer - Buffer to put data into.
  456. // Size - Pointer to size of buffer. On return, filled with
  457. // bytes copied.
  458. //
  459. // Returns: Status of attempt to query information.
  460. //
  461. int
  462. __stdcall
  463. DummySetInfo(void *IFContext, TDIObjectID * ID, void *Buffer, uint Size)
  464. {
  465. UNREFERENCED_PARAMETER(IFContext);
  466. UNREFERENCED_PARAMETER(ID);
  467. UNREFERENCED_PARAMETER(Buffer);
  468. UNREFERENCED_PARAMETER(Size);
  469. ASSERT(FALSE);
  470. return TDI_INVALID_REQUEST;
  471. }
  472. //* DummyAddAddr - Dummy add address routine.
  473. //
  474. // Called at init time when we need to initialize ourselves.
  475. //
  476. uint
  477. __stdcall
  478. DummyAddAddr(void *Context, uint Type, IPAddr Address, IPMask Mask,
  479. void *Context2)
  480. {
  481. UNREFERENCED_PARAMETER(Context);
  482. UNREFERENCED_PARAMETER(Type);
  483. UNREFERENCED_PARAMETER(Address);
  484. UNREFERENCED_PARAMETER(Mask);
  485. UNREFERENCED_PARAMETER(Context2);
  486. ASSERT(FALSE);
  487. return TRUE;
  488. }
  489. //* DummyDelAddr - Dummy del address routine.
  490. //
  491. // Called at init time when we need to initialize ourselves.
  492. //
  493. uint
  494. __stdcall
  495. DummyDelAddr(void *Context, uint Type, IPAddr Address, IPMask Mask)
  496. {
  497. UNREFERENCED_PARAMETER(Context);
  498. UNREFERENCED_PARAMETER(Type);
  499. UNREFERENCED_PARAMETER(Address);
  500. UNREFERENCED_PARAMETER(Mask);
  501. ASSERT(FALSE);
  502. return TRUE;
  503. }
  504. //* DummyGetEList - Dummy get entity list.
  505. //
  506. // A dummy routine that should never be called.
  507. //
  508. // Input: Context - Unused.
  509. // EntityList - Pointer to entity list to be filled in.
  510. // Count - Pointer to number of entries in the list.
  511. //
  512. // Returns Status of attempt to get the info.
  513. //
  514. int
  515. __stdcall
  516. DummyGetEList(void *Context, TDIEntityID * EntityList, uint * Count)
  517. {
  518. UNREFERENCED_PARAMETER(Context);
  519. UNREFERENCED_PARAMETER(EntityList);
  520. UNREFERENCED_PARAMETER(Count);
  521. ASSERT(FALSE);
  522. return FALSE;
  523. }
  524. //* DummyDoNdisReq - Dummy send NDIS request
  525. //
  526. // A dummy routine that should never be called.
  527. //
  528. // Input: Context - Interface context (unused).
  529. // RT - NDIS Request Type
  530. // OID - NDIS Request OID
  531. // Info - Information Buffer.
  532. // Length - Pointer to size of buffer
  533. // Needed - Pointer to required size
  534. // Blocking - Call is Sync or Async
  535. //
  536. // Returns Status of attempt to get the info.
  537. //
  538. NDIS_STATUS
  539. __stdcall
  540. DummyDoNdisReq(void *Context, NDIS_REQUEST_TYPE RT,
  541. NDIS_OID OID, void *Info, uint Length,
  542. uint * Needed, BOOLEAN Blocking)
  543. {
  544. UNREFERENCED_PARAMETER(Context);
  545. UNREFERENCED_PARAMETER(RT);
  546. UNREFERENCED_PARAMETER(OID);
  547. UNREFERENCED_PARAMETER(Info);
  548. UNREFERENCED_PARAMETER(Length);
  549. UNREFERENCED_PARAMETER(Needed);
  550. UNREFERENCED_PARAMETER(Blocking);
  551. ASSERT(FALSE);
  552. return NDIS_STATUS_FAILURE;
  553. }
  554. #if FFP_SUPPORT
  555. // Max number of FFP enabled NIC drivers in the system at any time
  556. // Note that this serves to limit total cache memory for FFP support
  557. //
  558. #define MAXFFPDRVS 8
  559. //* IPGetFFPDriverList - Lists unique FFP enabled drivers in the system
  560. //
  561. // Called by functions that dispatch requests to FFP enabled drivers
  562. //
  563. // Input: arrIF - Array of IFs to reach all FFP enabled drivers
  564. //
  565. // Returns: Number of FFP enabled drivers in the system
  566. //
  567. uint
  568. IPGetFFPDriverList(Interface ** arrIF)
  569. {
  570. ULONG numIF;
  571. Interface *IF;
  572. UINT i;
  573. CTELockHandle Handle;
  574. CTEGetLock(&RouteTableLock.Lock, &Handle);
  575. numIF = 0;
  576. // Take a lock to protect the list of all interfaces
  577. // Go over the interface list to pick FFP drivers
  578. for (IF = IFList; IF != NULL; IF = IF->if_next) {
  579. // Does this interface's driver support FFP ?
  580. if (IF->if_ffpversion == 0)
  581. continue;
  582. // FFP supported; was driver already picked ?
  583. for (i = 0; i < numIF; i++) {
  584. if (IF->if_ffpdriver == arrIF[i]->if_ffpdriver)
  585. break;
  586. }
  587. if (i == numIF) {
  588. ASSERT(numIF < MAXFFPDRVS);
  589. arrIF[numIF++] = IF;
  590. }
  591. }
  592. // Release lock to protect the list of all interfaces
  593. CTEFreeLock(&RouteTableLock.Lock, Handle);
  594. return numIF;
  595. }
  596. //* IPReclaimRequestMem - Post processing upon request completion
  597. //
  598. // ARP calls back upon completion of async requests IP sends ARP
  599. //
  600. // Input: pRequestInfo - Points to request IP sends ARP
  601. //
  602. // Returns: None
  603. //
  604. void
  605. IPReclaimRequestMem(PVOID pRequestInfo)
  606. {
  607. // Decrement ref count, and reclaim memory if it drops to zero
  608. if (InterlockedDecrement(
  609. (PLONG) &((ReqInfoBlock *) pRequestInfo)->RequestRefs) == 0) {
  610. // TCPTRACE(("IPReclaimRequestMem: Freeing mem at pReqInfo = %08X\n",
  611. // pRequestInfo));
  612. CTEFreeMem(pRequestInfo);
  613. }
  614. }
  615. //* IPFlushFFPCaches - Flush all FFP Caches
  616. //
  617. // Call ARP to flush FFP caches in layer 2
  618. //
  619. // Input: None
  620. //
  621. // Returns None
  622. //
  623. void
  624. IPFlushFFPCaches(void)
  625. {
  626. Interface *arrIF[MAXFFPDRVS];
  627. ULONG numIF;
  628. ReqInfoBlock *pRequestInfo;
  629. FFPFlushParams *pFlushInfo;
  630. UINT i;
  631. // Check if any requests need to be posted at all
  632. numIF = IPGetFFPDriverList(arrIF);
  633. if (numIF) {
  634. // Allocate the request block - For General and Request Specific Parts
  635. pRequestInfo = CTEAllocMemNBoot(sizeof(ReqInfoBlock) + sizeof(FFPFlushParams), '7iCT');
  636. // TCPTRACE(("IPFlushFFPCaches: Allocated mem at pReqInfo = %08X\n",
  637. // pRequestInfo));
  638. if (pRequestInfo == NULL) {
  639. return;
  640. }
  641. // Prepare the params for the request [ Part common to all requests ]
  642. pRequestInfo->RequestType = OID_FFP_FLUSH;
  643. pRequestInfo->ReqCompleteCallback = IPReclaimRequestMem;
  644. // Prepare the params for the request [ Part specific to this request ]
  645. pRequestInfo->RequestLength = sizeof(FFPFlushParams);
  646. pFlushInfo = (FFPFlushParams *) pRequestInfo->RequestInfo;
  647. pFlushInfo->NdisProtocolType = NDIS_PROTOCOL_ID_TCP_IP;
  648. // Assign Initial Ref Count to total num of requests
  649. pRequestInfo->RequestRefs = numIF;
  650. // CTEGetLock(&FFPIFsLock, &lhandle);
  651. for (i = 0; i < numIF; i++) {
  652. // Dispatch the request block to the ARP layer
  653. ASSERT(arrIF[i]->if_dondisreq != NULL);
  654. arrIF[i]->if_dondisreq(arrIF[i]->if_lcontext,
  655. NdisRequestSetInformation,
  656. OID_FFP_FLUSH,
  657. pRequestInfo->RequestInfo,
  658. pRequestInfo->RequestLength,
  659. NULL, FALSE);
  660. }
  661. // CTEFreeLock(&FFPIFsLock, lhandle);
  662. }
  663. }
  664. //* IPSetInFFPCaches - Set an entry in all FFP Caches
  665. //
  666. // Call ARP to set -ve FFP entries in caches, (or)
  667. // Invalidate existing +ve or -ve FFP cache entries
  668. //
  669. // Input: PacketHeader - Header of the IP Packet
  670. // Packet - Rest of the IP Packet
  671. // PacketLength - Length of "Packet" param
  672. // CacheEntryType - DISCARD (-ve) or INVALID
  673. //
  674. // Returns None
  675. //
  676. void
  677. IPSetInFFPCaches(struct IPHeader UNALIGNED * PacketHeader, uchar * Packet,
  678. uint PacketLength, ulong CacheEntryType)
  679. {
  680. Interface *arrIF[MAXFFPDRVS];
  681. ULONG numIF;
  682. ReqInfoBlock *pRequestInfo;
  683. FFPDataParams *pSetInInfo;
  684. UINT i;
  685. // Check if any requests need to be posted at all
  686. numIF = IPGetFFPDriverList(arrIF);
  687. if (numIF) {
  688. if (PacketLength < sizeof(ULONG)) {
  689. return;
  690. }
  691. // Allocate the request block - For General and Request Specific Parts
  692. pRequestInfo = CTEAllocMemNBoot(sizeof(ReqInfoBlock) + sizeof(FFPDataParams), '8iCT');
  693. // TCPTRACE(("IPSetInFFPCaches: Allocated mem at pReqInfo = %08X\n",
  694. // pRequestInfo));
  695. if (pRequestInfo == NULL) {
  696. return;
  697. }
  698. // Prepare the params for the request [ Part common to all requests ]
  699. pRequestInfo->RequestType = OID_FFP_DATA;
  700. pRequestInfo->ReqCompleteCallback = IPReclaimRequestMem;
  701. // Prepare the params for the request [ Part specific to this request ]
  702. pRequestInfo->RequestLength = sizeof(FFPDataParams);
  703. pSetInInfo = (FFPDataParams *) pRequestInfo->RequestInfo;
  704. pSetInInfo->NdisProtocolType = NDIS_PROTOCOL_ID_TCP_IP;
  705. pSetInInfo->CacheEntryType = CacheEntryType;
  706. pSetInInfo->HeaderSize = sizeof(IPHeader) + sizeof(ULONG);
  707. RtlCopyMemory(&pSetInInfo->Header, PacketHeader, sizeof(IPHeader));
  708. pSetInInfo->IpHeader.DwordAfterHeader = *(ULONG *) Packet;
  709. // Assign Initial Ref Count to total num of requests
  710. pRequestInfo->RequestRefs = numIF;
  711. // CTEGetLock(&FFPIFsLock, &lhandle);
  712. for (i = 0; i < numIF; i++) {
  713. // Dispatch the request block to the ARP layer
  714. ASSERT(arrIF[i]->if_dondisreq != NULL);
  715. arrIF[i]->if_dondisreq(arrIF[i]->if_lcontext,
  716. NdisRequestSetInformation,
  717. OID_FFP_DATA,
  718. pRequestInfo->RequestInfo,
  719. pRequestInfo->RequestLength,
  720. NULL, FALSE);
  721. }
  722. // CTEFreeLock(&FFPIFsLock, lhandle);
  723. }
  724. }
  725. //* IPStatsFromFFPCaches - Sum Stats from all FFP Caches
  726. //
  727. // Call ARP to get FFP Stats in layer 2
  728. //
  729. // Input: Pointer to the buffer that is filled with statistics
  730. //
  731. // Returns None
  732. //
  733. void
  734. IPStatsFromFFPCaches(FFPDriverStats * pCumulStats)
  735. {
  736. Interface *arrIF[MAXFFPDRVS];
  737. ULONG numIF;
  738. UINT i;
  739. FFPDriverStats DriverStatsInfo =
  740. {
  741. NDIS_PROTOCOL_ID_TCP_IP,
  742. 0, 0, 0, 0, 0, 0
  743. };
  744. RtlZeroMemory(pCumulStats, sizeof(FFPDriverStats));
  745. numIF = IPGetFFPDriverList(arrIF);
  746. if (numIF) {
  747. // CTEGetLock(&FFPIFsLock, &lhandle);
  748. for (i = 0; i < numIF; i++) {
  749. // Dispatch the request block to the ARP layer
  750. ASSERT(arrIF[i]->if_dondisreq != NULL);
  751. if (arrIF[i]->if_dondisreq(arrIF[i]->if_lcontext,
  752. NdisRequestQueryInformation,
  753. OID_FFP_DRIVER_STATS,
  754. &DriverStatsInfo,
  755. sizeof(FFPDriverStats),
  756. NULL, TRUE) == NDIS_STATUS_SUCCESS) {
  757. // Consolidate results from all drivers
  758. pCumulStats->PacketsForwarded += DriverStatsInfo.PacketsForwarded;
  759. pCumulStats->OctetsForwarded += DriverStatsInfo.OctetsForwarded;
  760. pCumulStats->PacketsDiscarded += DriverStatsInfo.PacketsDiscarded;
  761. pCumulStats->OctetsDiscarded += DriverStatsInfo.OctetsDiscarded;
  762. pCumulStats->PacketsIndicated += DriverStatsInfo.PacketsIndicated;
  763. pCumulStats->OctetsIndicated += DriverStatsInfo.OctetsIndicated;
  764. }
  765. }
  766. // CTEFreeLock(&FFPIFsLock, lhandle);
  767. }
  768. }
  769. #endif // if FFP_SUPPORT
  770. //* DerefIF - Dereference an interface.
  771. //
  772. // Called when we need to dereference an interface. We decrement the
  773. // refcount, and if it goes to zero we signal whoever is blocked on
  774. // it.
  775. //
  776. // Input: IF - Interfaec to be dereferenced.
  777. //
  778. // Returns: Nothing.
  779. //
  780. #pragma optimize("", off)
  781. void
  782. DerefIF(Interface * IF)
  783. {
  784. uint Original;
  785. Original = DEREFERENCE_IF(IF);
  786. if (Original != 1) {
  787. return;
  788. } else {
  789. // We just decremented the last reference. Wake whoever is
  790. // blocked on it.
  791. ASSERT(IF->if_block != NULL);
  792. CTESignal(IF->if_block, NDIS_STATUS_SUCCESS);
  793. }
  794. }
  795. //* LockedDerefIF - Dereference an interface w/RouteTableLock held.
  796. //
  797. // Called when we need to dereference an interface. We decrement the
  798. // refcount, and if it goes to zero we signal whoever is blocked on
  799. // it. The difference here is that we assume the caller already holds
  800. // the RouteTableLock.
  801. //
  802. // Input: IF - Interfaec to be dereferenced.
  803. //
  804. // Returns: Nothing.
  805. //
  806. void
  807. LockedDerefIF(Interface * IF)
  808. {
  809. LOCKED_DEREFERENCE_IF(IF);
  810. if (IF->if_refcount != 0) {
  811. return;
  812. } else {
  813. // We just decremented the last reference. Wake whoever is
  814. // blocked on it.
  815. ASSERT(IF->if_block != NULL);
  816. CTESignal(IF->if_block, NDIS_STATUS_SUCCESS);
  817. }
  818. }
  819. #pragma optimize("", on)
  820. //* DerefLink - Dereference the Link
  821. //
  822. // Called when we need to dereference a link. We decrement the
  823. // refcount, and if it goes to zero we free the link
  824. //
  825. // Input: Link - Link to be dereferenced.
  826. //
  827. // Returns: Nothing.
  828. //
  829. void
  830. DerefLink(LinkEntry * Link)
  831. {
  832. uint Original;
  833. Original = CTEInterlockedExchangeAdd(&Link->link_refcount, -1);
  834. if (Original != 1) {
  835. return;
  836. } else {
  837. // We just decremented the last reference.
  838. // Call CloseLink to Notify lower layer that link is going down
  839. ASSERT(Link->link_if);
  840. ASSERT(Link->link_if->if_closelink);
  841. #if DBG
  842. // P2MP stuff still needs to be cooked
  843. {
  844. Interface *IF = Link->link_if;
  845. LinkEntry *tmpLink = IF->if_link;
  846. while (tmpLink) {
  847. if (tmpLink == Link) {
  848. // freeing the Link without cleaning up??
  849. DbgBreakPoint();
  850. }
  851. tmpLink = tmpLink->link_next;
  852. }
  853. }
  854. #endif
  855. (*(Link->link_if->if_closelink)) (Link->link_if->if_lcontext, Link->link_arpctxt);
  856. // Free the link
  857. CTEFreeMem(Link);
  858. }
  859. }
  860. //** AddrOnIF - Check to see if a given address is local to an IF
  861. //
  862. // Called when we want to see if a given address is a valid local address
  863. // for an interface. We walk down the chain of NTEs in the interface, and
  864. // see if we get a match. We assume the caller holds the RouteTableLock
  865. // at this point.
  866. //
  867. // Input: IF - Interface to check.
  868. // Addr - Address to check.
  869. //
  870. // Returns: TRUE if Addr is an address for IF, FALSE otherwise.
  871. //
  872. uint
  873. AddrOnIF(Interface * IF, IPAddr Addr)
  874. {
  875. NetTableEntry *NTE;
  876. NTE = IF->if_nte;
  877. while (NTE != NULL) {
  878. if ((NTE->nte_flags & NTE_VALID) && IP_ADDR_EQUAL(NTE->nte_addr, Addr))
  879. return TRUE;
  880. else
  881. NTE = NTE->nte_ifnext;
  882. }
  883. return FALSE;
  884. }
  885. //** BestNTEForIF - Find the 'best match' NTE on a given interface.
  886. //
  887. // This is a utility function that takes an address and tries to find the
  888. // 'best match' NTE on a given interface. This is really only useful when we
  889. // have multiple IP addresses on a single interface.
  890. //
  891. // Input: Address - Source address of packet.
  892. // IF - Pointer to IF to be searched.
  893. // NoTransientAddr - Filter/don't filter out transient address.
  894. // Returns: The 'best match' NTE.
  895. //
  896. NetTableEntry *
  897. BestNTEForIF(IPAddr Address, Interface * IF, BOOLEAN NoTransientAddr)
  898. {
  899. NetTableEntry *CurrentNTE, *FoundNTE;
  900. uint i;
  901. if (IF->if_nte != NULL) {
  902. // Walk the list of NTEs, looking for a valid one.
  903. CurrentNTE = IF->if_nte;
  904. FoundNTE = NULL;
  905. do {
  906. if (CurrentNTE->nte_flags & NTE_VALID) {
  907. if (IP_ADDR_EQUAL(Address & CurrentNTE->nte_mask,
  908. (CurrentNTE->nte_addr &
  909. CurrentNTE->nte_mask))) {
  910. // If the address is a transient one and
  911. // if caller wants us to check if non transient
  912. // address is available then skip this address.
  913. // However, in the event no non-transient address
  914. // is available this will be returned anyway.
  915. if (NoTransientAddr &&
  916. (CurrentNTE->nte_flags & NTE_TRANSIENT_ADDR)) {
  917. FoundNTE = CurrentNTE;
  918. } else {
  919. return CurrentNTE;
  920. }
  921. } else if (FoundNTE == NULL) {
  922. FoundNTE = CurrentNTE;
  923. }
  924. }
  925. CurrentNTE = CurrentNTE->nte_ifnext;
  926. } while (CurrentNTE != NULL);
  927. // If we found a match, or we didn't and the destination is not
  928. // a broadcast, return the result. We have special case code to
  929. // handle broadcasts, since the interface doesn't really matter there.
  930. if (FoundNTE != NULL || (!IP_ADDR_EQUAL(Address, IP_LOCAL_BCST) &&
  931. !IP_ADDR_EQUAL(Address, IP_ZERO_BCST))) {
  932. return FoundNTE;
  933. }
  934. }
  935. // An 'anonymous' I/F, or the address we're reaching is a broadcast and the
  936. // first interface has no address. Find a valid (non-loopback, non-null ip,
  937. // non-uni) address.
  938. for (i = 0; i < NET_TABLE_SIZE; i++) {
  939. NetTableEntry *NetTableList = NewNetTableList[i];
  940. for (CurrentNTE = NetTableList; CurrentNTE != NULL;
  941. CurrentNTE = CurrentNTE->nte_next) {
  942. if (CurrentNTE != LoopNTE &&
  943. (CurrentNTE->nte_flags & NTE_VALID) &&
  944. !((CurrentNTE->nte_if->if_flags & IF_FLAGS_NOIPADDR) && IP_ADDR_EQUAL(CurrentNTE->nte_addr, NULL_IP_ADDR)) &&
  945. !(CurrentNTE->nte_if->if_flags & IF_FLAGS_UNI)) {
  946. return CurrentNTE;
  947. }
  948. }
  949. }
  950. return NULL;
  951. }
  952. //** IsBCastonNTE - Determine if the specified addr. is a bcast on a spec. NTE.
  953. //
  954. // This routine is called when we need to know if an address is a broadcast
  955. // on a particular net. We check in the order we expect to be most common - a
  956. // subnet bcast, an all ones broadcast, and then an all subnets broadcast. We
  957. // return the type of broadcast it is, or return DEST_LOCAL if it's not a
  958. // broadcast.
  959. //
  960. // Entry: Address - Address in question.
  961. // NTE - NetTableEntry to check Address against.
  962. //
  963. // Returns: Type of broadcast.
  964. //
  965. uchar
  966. IsBCastOnNTE(IPAddr Address, NetTableEntry * NTE)
  967. {
  968. IPMask Mask;
  969. IPAddr BCastAddr;
  970. if (NTE->nte_flags & NTE_VALID) {
  971. BCastAddr = NTE->nte_if->if_bcast;
  972. Mask = NTE->nte_mask;
  973. if (Mask != 0xFFFFFFFF) {
  974. if (IP_ADDR_EQUAL(Address,
  975. (NTE->nte_addr & Mask) | (BCastAddr & ~Mask)))
  976. return DEST_SN_BCAST;
  977. }
  978. // See if it's an all subnet's broadcast.
  979. if (!CLASSD_ADDR(Address)) {
  980. Mask = IPNetMask(Address);
  981. if (IP_ADDR_EQUAL(Address,
  982. (NTE->nte_addr & Mask) | (BCastAddr & ~Mask)))
  983. return DEST_BCAST;
  984. } else {
  985. // This is a class D address. If we're allowed to receive
  986. // mcast datagrams, check our list.
  987. return DEST_MCAST;
  988. }
  989. // A global bcast is certainly a bcast on this net.
  990. if (IP_ADDR_EQUAL(Address, BCastAddr))
  991. return DEST_BCAST;
  992. } else if (RefPtrValid(&DHCPRefPtr)) {
  993. if (AcquireRefPtr(&DHCPRefPtr) == NTE) {
  994. BCastAddr = NTE->nte_if->if_bcast;
  995. ReleaseRefPtr(&DHCPRefPtr);
  996. if ((IP_ADDR_EQUAL(Address, BCastAddr))) {
  997. return (DEST_BCAST);
  998. }
  999. } else {
  1000. ReleaseRefPtr(&DHCPRefPtr);
  1001. }
  1002. }
  1003. return DEST_LOCAL;
  1004. }
  1005. //** InvalidSourceAddress - Check to see if a source address is invalid.
  1006. //
  1007. // This function takes an input address and checks to see if it is valid
  1008. // if used as the source address of an incoming packet. An address is invalid
  1009. // if it's 0, -1, a Class D or Class E address, is a net or subnet broadcast,
  1010. // or has a 0 subnet or host part.
  1011. //
  1012. // Input: Address - Address to be check.
  1013. //
  1014. // Returns: FALSE if the address is not invalid, TRUE if it is invalid.
  1015. //
  1016. uint
  1017. InvalidSourceAddress(IPAddr Address)
  1018. {
  1019. NetTableEntry *NTE; // Pointer to current NTE.
  1020. IPMask Mask; // Mask for address.
  1021. IPAddr MaskedAddress;
  1022. IPAddr LocalAddress;
  1023. uint i;
  1024. if (!CLASSD_ADDR(Address) &&
  1025. !CLASSE_ADDR(Address) &&
  1026. !IP_ADDR_EQUAL(Address, IP_ZERO_BCST) &&
  1027. !IP_ADDR_EQUAL(Address, IP_LOCAL_BCST)
  1028. ) {
  1029. // It's not an obvious broadcast. See if it's an all subnets
  1030. // broadcast, or has a zero host part.
  1031. Mask = IPNetMask(Address);
  1032. MaskedAddress = Address & Mask;
  1033. if (!IP_ADDR_EQUAL(Address, MaskedAddress) &&
  1034. !IP_ADDR_EQUAL(Address, (MaskedAddress | ~Mask))
  1035. ) {
  1036. // It's not an all subnet's broadcast, and it has a non-zero
  1037. // host/subnet part. Walk our local IP addresses, and see if it's
  1038. // a subnet broadcast.
  1039. for (i = 0; i < NET_TABLE_SIZE; i++) {
  1040. NetTableEntry *NetTableList = NewNetTableList[i];
  1041. NTE = NetTableList;
  1042. while (NTE) {
  1043. LocalAddress = NTE->nte_addr;
  1044. if ((NTE->nte_flags & NTE_VALID) &&
  1045. !IP_LOOPBACK(LocalAddress)) {
  1046. Mask = NTE->nte_mask;
  1047. MaskedAddress = LocalAddress & Mask;
  1048. if (!IP_ADDR_EQUAL(Mask, HOST_MASK)) {
  1049. if (IP_ADDR_EQUAL(Address, MaskedAddress) ||
  1050. IP_ADDR_EQUAL(Address,
  1051. (MaskedAddress |
  1052. (NTE->nte_if->if_bcast & ~Mask)))) {
  1053. return TRUE;
  1054. }
  1055. }
  1056. }
  1057. NTE = NTE->nte_next;
  1058. }
  1059. }
  1060. return FALSE;
  1061. }
  1062. }
  1063. return TRUE;
  1064. }
  1065. // 8 regions of 31 cache elements.
  1066. // Each region is indexed by the 3 most significant bits of the IP address.
  1067. // Each cache element within a region is indexed by a hash of the IP address.
  1068. // Each cache element is composed of 29 least significant bits of the IP
  1069. // address plus the three bit address type code.
  1070. // (31 is prime and works well with our hash.)
  1071. //
  1072. #define ATC_BITS 3
  1073. #define ATC_ELEMENTS_PER_REGION 31
  1074. #define ATC_REGIONS (1 << ATC_BITS)
  1075. #define ATC_CODE_MASK (ULONG32)(ATC_REGIONS - 1)
  1076. #define ATC_ADDR_MASK (ULONG32)(~ATC_CODE_MASK)
  1077. // sanity check for 3 bits of address type code
  1078. C_ASSERT(ATC_REGIONS == 8);
  1079. C_ASSERT(ATC_CODE_MASK == 0x00000007);
  1080. C_ASSERT(ATC_ADDR_MASK == 0xFFFFFFF8);
  1081. // Each cache element is 32 bits to support atomic reading and writing.
  1082. //
  1083. ULONG32 AddrTypeCache [ATC_REGIONS * ATC_ELEMENTS_PER_REGION];
  1084. #if DBG
  1085. ULONG DbgAddrTypeCacheHits;
  1086. ULONG DbgAddrTypeCacheMisses;
  1087. ULONG DbgAddrTypeCacheCollisions;
  1088. ULONG DbgAddrTypeCacheFlushes;
  1089. ULONG DbgAddrTypeCacheNoUpdates;
  1090. ULONG DbgAddrTypeCacheLastNoUpdateDestType;
  1091. #endif
  1092. // The following type codes must fit within ATC_BITS of information.
  1093. //
  1094. typedef enum _ADDRESS_TYPE_CODE {
  1095. ATC_LOCAL = 0,
  1096. ATC_BCAST,
  1097. ATC_MCAST,
  1098. ATC_REMOTE,
  1099. ATC_REMOTE_BCAST,
  1100. ATC_REMOTE_MCAST,
  1101. ATC_SUBNET_BCAST,
  1102. ATC_NUM_CODES
  1103. } ADDRESS_TYPE_CODE;
  1104. // The following array is indexed by ADDRESS_TYPE_CODE values.
  1105. //
  1106. const char MapAddrTypeCodeToDestType [] = {
  1107. DEST_LOCAL,
  1108. DEST_BCAST,
  1109. DEST_MCAST,
  1110. DEST_REMOTE,
  1111. DEST_REM_BCAST,
  1112. DEST_REM_MCAST,
  1113. DEST_SN_BCAST,
  1114. };
  1115. //** ComputeAddrTypeCacheIndex - Given an IP address, compute the index
  1116. // of its corresponding entry in the address type cache.
  1117. //
  1118. // Input: Address - IP Address to compute the index of.
  1119. //
  1120. // Returns: Valid index into the address type cache.
  1121. //
  1122. __forceinline
  1123. ULONG
  1124. ComputeAddrTypeCacheIndex(IPAddr Address)
  1125. {
  1126. ULONG Region;
  1127. ULONG Offset;
  1128. ULONG Index;
  1129. // Locate the region of the cache where this Address would reside.
  1130. //
  1131. Region = Address >> (32 - ATC_BITS);
  1132. ASSERT(Region < ATC_REGIONS);
  1133. // Locate the offset into the region where this address would reside.
  1134. // This is done by hashing the address.
  1135. //
  1136. Offset = (1103515245 * Address + 12345) % ATC_ELEMENTS_PER_REGION;
  1137. // Compute the cache index and return it.
  1138. //
  1139. Index = (Region * ATC_ELEMENTS_PER_REGION) + Offset;
  1140. ASSERT(Index < (sizeof(AddrTypeCache) / sizeof(AddrTypeCache[0])));
  1141. return Index;
  1142. }
  1143. //** AddrTypeCacheFlush - Flush the cache entry associated with an address.
  1144. //
  1145. // Input: Address - Address to remove from the cache.
  1146. //
  1147. // Returns: nothing.
  1148. //
  1149. void
  1150. AddrTypeCacheFlush(IPAddr Address)
  1151. {
  1152. ULONG CacheIndex;
  1153. CacheIndex = ComputeAddrTypeCacheIndex(Address);
  1154. AddrTypeCache [CacheIndex] = 0;
  1155. #if DBG
  1156. DbgAddrTypeCacheFlushes++;
  1157. #endif
  1158. }
  1159. //** AddrTypeCacheLookup - Lookup an address from the address type cache.
  1160. //
  1161. // Input: Address - Address to be lookup.
  1162. // Output: CacheIndex - Pointer to cache index corresponding to the Address.
  1163. // DestType - Pointer to destination type to be filled in if
  1164. // the address is found in the cache.
  1165. //
  1166. // Returns: TRUE if the address was found in the cache.
  1167. //
  1168. // N.B. The output parameter DestType is only initialized if TRUE is returned.
  1169. //
  1170. __forceinline
  1171. BOOLEAN
  1172. AddrTypeCacheLookup(IPAddr Address, ULONG *CacheIndex, uchar *DestType)
  1173. {
  1174. ULONG32 CacheValue;
  1175. // Read the value of the cache corresponding to this address.
  1176. //
  1177. *CacheIndex = ComputeAddrTypeCacheIndex(Address);
  1178. CacheValue = AddrTypeCache [*CacheIndex];
  1179. // If the cached value is non-zero and matches the relevent portion of
  1180. // the address, then get the type code and translate it to the proper
  1181. // destination type.
  1182. //
  1183. if ((CacheValue != 0) &&
  1184. (((Address << ATC_BITS) ^ CacheValue) & ATC_ADDR_MASK) == 0) {
  1185. ADDRESS_TYPE_CODE TypeCode = CacheValue & ATC_CODE_MASK;
  1186. ASSERT(TypeCode < ATC_NUM_CODES);
  1187. *DestType = MapAddrTypeCodeToDestType[TypeCode];
  1188. #if DBG
  1189. DbgAddrTypeCacheHits++;
  1190. #endif
  1191. return TRUE;
  1192. }
  1193. #if DBG
  1194. DbgAddrTypeCacheMisses++;
  1195. #endif
  1196. return FALSE;
  1197. }
  1198. //** AddrTypeCacheUpdate - Add or update the destination type for an Address.
  1199. // in the cache.
  1200. //
  1201. // Input: Address - Address to be add or update.
  1202. // CacheIndex - Cache index corresponding to the Address.
  1203. // DestType - Destination type to cache for the Address.
  1204. //
  1205. // Returns: nothing.
  1206. //
  1207. __forceinline
  1208. void
  1209. AddrTypeCacheUpdate(IPAddr Address, ULONG CacheIndex, uchar DestType)
  1210. {
  1211. ADDRESS_TYPE_CODE TypeCode = ATC_LOCAL;
  1212. BOOLEAN Update = TRUE;
  1213. ASSERT(CacheIndex < (sizeof(AddrTypeCache) / sizeof(AddrTypeCache[0])));
  1214. switch (DestType) {
  1215. case DEST_LOCAL:
  1216. TypeCode = ATC_LOCAL;
  1217. break;
  1218. case DEST_BCAST:
  1219. TypeCode = ATC_BCAST;
  1220. break;
  1221. case DEST_MCAST:
  1222. TypeCode = ATC_MCAST;
  1223. break;
  1224. case DEST_REMOTE:
  1225. TypeCode = ATC_REMOTE;
  1226. break;
  1227. case DEST_REM_BCAST:
  1228. TypeCode = ATC_REMOTE_BCAST;
  1229. break;
  1230. case DEST_REM_MCAST:
  1231. TypeCode = ATC_REMOTE_MCAST;
  1232. break;
  1233. case DEST_SN_BCAST:
  1234. TypeCode = ATC_SUBNET_BCAST;
  1235. break;
  1236. default:
  1237. Update = FALSE;
  1238. #if DBG
  1239. DbgAddrTypeCacheNoUpdates++;
  1240. DbgAddrTypeCacheLastNoUpdateDestType = DestType;
  1241. #endif
  1242. }
  1243. if (Update) {
  1244. #if DBG
  1245. ULONG32 CacheValue = AddrTypeCache [CacheIndex];
  1246. if (CacheValue != 0) {
  1247. DbgAddrTypeCacheCollisions++;
  1248. }
  1249. #endif
  1250. AddrTypeCache [CacheIndex] = (Address << ATC_BITS) | TypeCode;
  1251. }
  1252. }
  1253. //** GetAddrType - Return the destination type of a specified address.
  1254. //
  1255. // Input: Address - Address to get the destination type of.
  1256. //
  1257. // Returns: Destination type.
  1258. //
  1259. uchar
  1260. GetAddrType(IPAddr Address)
  1261. {
  1262. ULONG CacheIndex;
  1263. NetTableEntry *NTE; // Pointer to current NTE.
  1264. IPMask Mask; // Mask for address.
  1265. IPMask SNMask;
  1266. uint i;
  1267. uchar Result; // Result of broadcast check.
  1268. // Check the cache and return if we got a hit.
  1269. //
  1270. if (AddrTypeCacheLookup(Address, &CacheIndex, &Result)) {
  1271. return Result;
  1272. }
  1273. // We don't cache, nor do we need to cache, these types of invalid
  1274. // addresses.
  1275. //
  1276. if (CLASSE_ADDR(Address)) {
  1277. return DEST_INVALID;
  1278. }
  1279. // See if it's one of our local addresses, or a broadcast
  1280. // on a local address.
  1281. // optimize it for the DEST_LOCAL case
  1282. //
  1283. for (NTE = NewNetTableList[NET_TABLE_HASH(Address)];
  1284. NTE; NTE = NTE->nte_next) {
  1285. if (IP_ADDR_EQUAL(NTE->nte_addr, Address) &&
  1286. (NTE->nte_flags & NTE_VALID) &&
  1287. !((IP_ADDR_EQUAL(Address, NULL_IP_ADDR) && (NTE->nte_if->if_flags & IF_FLAGS_NOIPADDR)))) {
  1288. Result = DEST_LOCAL;
  1289. goto gat_exit;
  1290. }
  1291. }
  1292. // go thru the whole table for other cases
  1293. //
  1294. for (i = 0; i < NET_TABLE_SIZE; i++) {
  1295. for (NTE = NewNetTableList[i]; NTE; NTE = NTE->nte_next) {
  1296. if (!(NTE->nte_flags & NTE_VALID)) {
  1297. continue;
  1298. }
  1299. if ((Result = IsBCastOnNTE(Address, NTE)) != DEST_LOCAL) {
  1300. goto gat_exit;
  1301. }
  1302. // See if the destination has a valid host part.
  1303. SNMask = NTE->nte_mask;
  1304. if (IP_ADDR_EQUAL(Address & SNMask, NTE->nte_addr & SNMask)) {
  1305. // On this subnet. See if the host part is invalid.
  1306. if (IP_ADDR_EQUAL(Address & SNMask, Address)) {
  1307. Result = DEST_INVALID; // Invalid 0 host part.
  1308. goto gat_exit;
  1309. }
  1310. }
  1311. }
  1312. }
  1313. // It's not a local address, see if it's loopback.
  1314. if (IP_LOOPBACK(Address)) {
  1315. Result = DEST_LOCAL;
  1316. goto gat_exit;
  1317. }
  1318. // If we're doing IGMP, see if it's a Class D address. If it is,
  1319. // return that.
  1320. if (CLASSD_ADDR(Address)) {
  1321. if (IGMPLevel != 0) {
  1322. Result = DEST_REM_MCAST;
  1323. goto gat_exit;
  1324. } else {
  1325. Result = DEST_INVALID;
  1326. goto gat_exit;
  1327. }
  1328. }
  1329. Mask = IPNetMask(Address);
  1330. // Now check remote broadcast. When we get here we know that the
  1331. // address is not a global broadcast, a subnet broadcast for a subnet
  1332. // of which we're a member, or an all-subnets broadcast for a net of
  1333. // which we're a member. Since we're avoiding making assumptions about
  1334. // all subnet of a net having the same mask, we can't really check for
  1335. // a remote subnet broadcast. We'll use the net mask and see if it's
  1336. // a remote all-subnet's broadcast.
  1337. if (IP_ADDR_EQUAL(Address, (Address & Mask) | (IP_LOCAL_BCST & ~Mask))) {
  1338. Result = DEST_REM_BCAST;
  1339. goto gat_exit;
  1340. }
  1341. // Check for invalid 0 parts. All we can do from here is see if he's
  1342. // sending to a remote net with all zero subnet and host parts. We
  1343. // can't check to see if he's sending to a remote subnet with an all
  1344. // zero host part.
  1345. if (IP_ADDR_EQUAL(Address, NULL_IP_ADDR)) {
  1346. Result = DEST_INVALID;
  1347. goto gat_exit;
  1348. }
  1349. #if DBG
  1350. if (IP_ADDR_EQUAL(Address, Address & Mask)) {
  1351. //This is a remote address with null host part per classfull address
  1352. //But may be a supernetted address, where the prefix len is less than the
  1353. //class mask prefix len for the metid.
  1354. //We should let this address go out.
  1355. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL," GAT: zero host part %x?\n", Address));
  1356. }
  1357. #endif
  1358. // Must be remote.
  1359. Result = DEST_REMOTE;
  1360. gat_exit:
  1361. AddrTypeCacheUpdate(Address, CacheIndex, Result);
  1362. return Result;
  1363. }
  1364. //** GetLocalNTE - Get the local NTE for an incoming packet.
  1365. //
  1366. // Called during receive processing to find a matching NTE for a packet.
  1367. // First we check against the NTE we received it on, then against any NTE.
  1368. //
  1369. // Input: Address - The dest. address of the packet.
  1370. // NTE - Pointer to NTE packet was received on - filled in on
  1371. // exit w/correct NTE.
  1372. //
  1373. // Returns: DEST_LOCAL if the packet is destined for this host,
  1374. // DEST_REMOTE if it needs to be routed,
  1375. // DEST_SN_BCAST or DEST_BCAST if it's some sort of a broadcast.
  1376. //
  1377. uchar
  1378. GetLocalNTE(IPAddr Address, NetTableEntry ** NTE)
  1379. {
  1380. NetTableEntry *LocalNTE = *NTE;
  1381. IPMask Mask;
  1382. uchar Result;
  1383. uint i;
  1384. Interface *LocalIF;
  1385. NetTableEntry *OriginalNTE;
  1386. // Quick check to see if it is for the NTE it came in on (the common case).
  1387. if (IP_ADDR_EQUAL(Address, LocalNTE->nte_addr) &&
  1388. (LocalNTE->nte_flags & NTE_VALID))
  1389. return DEST_LOCAL; // For us, just return.
  1390. // Now check to see if it's a broadcast of some sort on the interface it
  1391. // came in on.
  1392. if ((Result = IsBCastOnNTE(Address, LocalNTE)) != DEST_LOCAL)
  1393. return Result;
  1394. //Is this a mcast on a loop interface
  1395. if ((LocalNTE == LoopNTE) && CLASSD_ADDR(Address)) {
  1396. return DEST_MCAST;
  1397. }
  1398. // The common cases failed us. Loop through the NetTable and see if
  1399. // it is either a valid local address or is a broadcast on one of the NTEs
  1400. // on the incoming interface. We won't check the NTE we've already looked
  1401. // at. We look at all NTEs, including the loopback NTE, because a loopback
  1402. // frame could come through here. Also, frames from ourselves to ourselves
  1403. // will come in on the loopback NTE.
  1404. i = 0;
  1405. LocalIF = LocalNTE->nte_if;
  1406. OriginalNTE = LocalNTE;
  1407. // optimize it for the DEST_LOCAL case
  1408. LocalNTE = NewNetTableList[NET_TABLE_HASH(Address)];
  1409. while (LocalNTE) {
  1410. if (LocalNTE != OriginalNTE) {
  1411. if (IP_ADDR_EQUAL(Address, LocalNTE->nte_addr) &&
  1412. (LocalNTE->nte_flags & NTE_VALID) &&
  1413. !((IP_ADDR_EQUAL(Address, NULL_IP_ADDR) && (LocalNTE->nte_if->if_flags & IF_FLAGS_NOIPADDR)))) {
  1414. *NTE = LocalNTE;
  1415. return DEST_LOCAL; // For us, just return.
  1416. }
  1417. }
  1418. LocalNTE = LocalNTE->nte_next;
  1419. }
  1420. // go thru the whole table for other cases
  1421. for (i = 0; i < NET_TABLE_SIZE; i++) {
  1422. NetTableEntry *NetTableList = NewNetTableList[i];
  1423. LocalNTE = NetTableList;
  1424. while (LocalNTE) {
  1425. if (LocalNTE != OriginalNTE) {
  1426. // If this NTE is on the same interface as the NTE it arrived on,
  1427. // see if it's a broadcast.
  1428. if (LocalIF == LocalNTE->nte_if)
  1429. if ((Result = IsBCastOnNTE(Address, LocalNTE)) != DEST_LOCAL) {
  1430. *NTE = LocalNTE;
  1431. return Result;
  1432. }
  1433. }
  1434. LocalNTE = LocalNTE->nte_next;
  1435. }
  1436. }
  1437. // It's not a local address, see if it's loopback.
  1438. if (IP_LOOPBACK(Address)) {
  1439. *NTE = LoopNTE;
  1440. return DEST_LOCAL;
  1441. }
  1442. // If it's a class D address and we're receiveing multicasts, handle it
  1443. // here.
  1444. if (CLASSD_ADDR(Address)) {
  1445. if (IGMPLevel != 0)
  1446. return DEST_REM_MCAST;
  1447. else
  1448. return DEST_INVALID;
  1449. }
  1450. // It's not local. Check to see if maybe it's a net broadcast for a net
  1451. // of which we're not a member. If so, return remote bcast. We can't check
  1452. // for subnet broadcast of subnets for which we're not a member, since we're
  1453. // not making assumptions about all subnets of a single net having the
  1454. // same mask. If we're here it's not a subnet broadcast for a net of which
  1455. // we're a member, so we don't know a subnet mask for it. We'll just use
  1456. // the net mask.
  1457. Mask = IPNetMask(Address);
  1458. if (((*NTE)->nte_flags & NTE_VALID) &&
  1459. (IP_ADDR_EQUAL(Address, (Address & Mask) |
  1460. ((*NTE)->nte_if->if_bcast & ~Mask))))
  1461. return DEST_REM_BCAST;
  1462. // If it's to the 0 address, or a Class E address, or has an all-zero
  1463. // subnet and net part, it's invalid.
  1464. if (IP_ADDR_EQUAL(Address, IP_ZERO_BCST) ||
  1465. IP_ADDR_EQUAL(Address, (Address & Mask)) ||
  1466. CLASSE_ADDR(Address))
  1467. return DEST_INVALID;
  1468. // If we're DHCPing the interface on which this came in we'll accept this.
  1469. // If it came in as a broadcast a check in IPRcv() will reject it. If it's
  1470. // a unicast to us we'll pass it up.
  1471. if ((*NTE)->nte_flags & NTE_DHCP) {
  1472. ASSERT(!((*NTE)->nte_flags & NTE_VALID));
  1473. return DEST_LOCAL;
  1474. }
  1475. return DEST_REMOTE;
  1476. }
  1477. //** IsRouteICMP - This function is used by Router Discovery to determine
  1478. // how we learned about the route. We are not allowed to update or timeout
  1479. // routes that were not learned about via icmp. If the route is new then
  1480. // we treat it as icmp and add a new entry.
  1481. // Input: Dest - Destination to search for.
  1482. // Mask - Mask for destination.
  1483. // FirstHop - FirstHop to Dest.
  1484. // OutIF - Pointer to outgoing interface structure.
  1485. //
  1486. // Returns: TRUE if learned via ICMP, FALSE otherwise.
  1487. //
  1488. uint
  1489. IsRouteICMP(IPAddr Dest, IPMask Mask, IPAddr FirstHop, Interface * OutIF)
  1490. {
  1491. RouteTableEntry *RTE;
  1492. RouteTableEntry *TempRTE;
  1493. RTE = FindSpecificRTE(Dest, Mask, FirstHop, OutIF, &TempRTE, FALSE);
  1494. if (RTE == NULL)
  1495. return (TRUE);
  1496. if (RTE->rte_proto == IRE_PROTO_ICMP) {
  1497. return (TRUE);
  1498. } else {
  1499. return (FALSE);
  1500. }
  1501. }
  1502. void
  1503. UpdateDeadGWState( )
  1504. {
  1505. uint Active = 0;
  1506. uint Configured = 0;
  1507. RouteTableEntry* RTE;
  1508. RTE = GetDefaultGWs(&RTE);
  1509. while (RTE) {
  1510. ++Configured;
  1511. if (RTE->rte_flags & RTE_VALID)
  1512. ++Active;
  1513. RTE = RTE->rte_next;
  1514. }
  1515. DefGWActive = Active;
  1516. DefGWConfigured = Configured;
  1517. }
  1518. //* ValidateDefaultGWs - Mark all default gateways as valid.
  1519. //
  1520. // Called to one or all of our default gateways as up. The caller specifies
  1521. // the IP address of the one to mark as up, or NULL_IP_ADDR if they're all
  1522. // supposed to be marked up. We return a count of how many we marked as
  1523. // valid.
  1524. //
  1525. // Input: IP address of G/W to mark as up.
  1526. //
  1527. // Returns: Count of gateways marked as up.
  1528. //
  1529. uint
  1530. ValidateDefaultGWs(IPAddr Addr)
  1531. {
  1532. RouteTableEntry *RTE;
  1533. uint Count = 0;
  1534. uint Now = CTESystemUpTime() / 1000L;
  1535. RTE = GetDefaultGWs(&RTE);
  1536. while (RTE != NULL) {
  1537. if (RTE->rte_mask == DEFAULT_MASK && !(RTE->rte_flags & RTE_VALID) &&
  1538. (IP_ADDR_EQUAL(Addr, NULL_IP_ADDR) ||
  1539. IP_ADDR_EQUAL(Addr, RTE->rte_addr))) {
  1540. RTE->rte_flags |= RTE_VALID;
  1541. RTE->rte_valid = Now;
  1542. Count++;
  1543. }
  1544. RTE->rte_todg = RTE->rte_fromdg = NULL;
  1545. // To ensure that RCEs get switched to a lower-metric gateway
  1546. // if one exists, invalidate all RCEs on this RTE.
  1547. InvalidateRCEChain(RTE);
  1548. RTE = RTE->rte_next;
  1549. }
  1550. DefGWActive += Count;
  1551. UpdateDeadGWState();
  1552. return Count;
  1553. }
  1554. //* InvalidateRCE - Invalidate an RCE.
  1555. //
  1556. // Called to invalidate the RCE
  1557. //
  1558. //
  1559. // Input: RCE
  1560. //
  1561. // Returns: usecnt on the RCE.
  1562. //
  1563. uint
  1564. InvalidateRCE(RouteCacheEntry * CurrentRCE)
  1565. {
  1566. CTELockHandle RCEHandle; // Lock handle for RCE being updated.
  1567. Interface *OutIF;
  1568. RouteTableEntry *RTE;
  1569. RouteCacheEntry *PrevRCE;
  1570. uint RCE_usecnt = 0;
  1571. if (CurrentRCE != NULL) {
  1572. CTEGetLock(&CurrentRCE->rce_lock, &RCEHandle);
  1573. RCE_usecnt = CurrentRCE->rce_usecnt;
  1574. if ((CurrentRCE->rce_flags & RCE_VALID) && !(CurrentRCE->rce_flags & RCE_LINK_DELETED)) {
  1575. ASSERT(CurrentRCE->rce_rte != NULL);
  1576. OutIF = CurrentRCE->rce_rte->rte_if;
  1577. RTE = CurrentRCE->rce_rte;
  1578. CurrentRCE->rce_rte->rte_rces -= CurrentRCE->rce_cnt;
  1579. CurrentRCE->rce_flags &= ~RCE_VALID;
  1580. CurrentRCE->rce_rte = (RouteTableEntry *) OutIF;
  1581. if ((CurrentRCE->rce_flags & RCE_CONNECTED) &&
  1582. (RCE_usecnt == 0)) {
  1583. // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"InvalidateRCE %x\n", CurrentRCE));
  1584. InvalidateRCEContext(CurrentRCE);
  1585. }
  1586. PrevRCE = STRUCT_OF(RouteCacheEntry, &RTE->rte_rcelist, rce_next);
  1587. // Walk down the list until we find him.
  1588. while (PrevRCE != NULL) {
  1589. if (PrevRCE->rce_next == CurrentRCE)
  1590. break;
  1591. PrevRCE = PrevRCE->rce_next;
  1592. }
  1593. //ASSERT(PrevRCE != NULL);
  1594. if (PrevRCE != NULL) {
  1595. PrevRCE->rce_next = CurrentRCE->rce_next;
  1596. }
  1597. }
  1598. CTEFreeLock(&CurrentRCE->rce_lock, RCEHandle);
  1599. }
  1600. return RCE_usecnt;
  1601. }
  1602. //* InvalidateRCEChain - Invalidate the RCEs on an RCE.
  1603. //
  1604. // Called to invalidate the RCE chain on an RTE. We assume the caller holds
  1605. // the route table lock.
  1606. //
  1607. // Input: RTE - RTE on which to invalidate RCEs.
  1608. //
  1609. // Returns: Nothing.
  1610. //
  1611. void
  1612. InvalidateRCEChain(RouteTableEntry * RTE)
  1613. {
  1614. CTELockHandle RCEHandle; // Lock handle for RCE being updated.
  1615. RouteCacheEntry *TempRCE, *CurrentRCE;
  1616. Interface *OutIF;
  1617. OutIF = RTE->rte_if;
  1618. // If there is an RCE chain on this RCE, invalidate the RCEs on it. We still
  1619. // hold the RouteTableLock, so RCE closes can't happen.
  1620. CurrentRCE = RTE->rte_rcelist;
  1621. RTE->rte_rcelist = NULL;
  1622. // Walk down the list, nuking each RCE.
  1623. while (CurrentRCE != NULL) {
  1624. CTEGetLock(&CurrentRCE->rce_lock, &RCEHandle);
  1625. if ((CurrentRCE->rce_flags & RCE_VALID) && !(CurrentRCE->rce_flags & RCE_LINK_DELETED)) {
  1626. ASSERT(CurrentRCE->rce_rte == RTE);
  1627. RTE->rte_rces -= CurrentRCE->rce_cnt;
  1628. CurrentRCE->rce_flags &= ~RCE_VALID;
  1629. CurrentRCE->rce_rte = (RouteTableEntry *) OutIF;
  1630. if ((CurrentRCE->rce_flags & RCE_CONNECTED) &&
  1631. CurrentRCE->rce_usecnt == 0) {
  1632. InvalidateRCEContext(CurrentRCE);
  1633. }
  1634. } else
  1635. ASSERT(FALSE);
  1636. TempRCE = CurrentRCE->rce_next;
  1637. CTEFreeLock(&CurrentRCE->rce_lock, RCEHandle);
  1638. CurrentRCE = TempRCE;
  1639. }
  1640. }
  1641. //* InvalidateRCELinks - Invalidate the RCEs on RTE when the link goes away
  1642. //
  1643. // Called to invalidate the RCE chain on an RTE. We assume the caller holds
  1644. // the route table lock.
  1645. //
  1646. // Input: RTE - RTE on which to invalidate RCEs.
  1647. //
  1648. // Returns: Nothing.
  1649. //
  1650. void
  1651. InvalidateRCELinks(RouteTableEntry * RTE)
  1652. {
  1653. CTELockHandle RCEHandle; // Lock handle for RCE being updated.
  1654. RouteCacheEntry *TempRCE, *CurrentRCE;
  1655. Interface *OutIF;
  1656. InvalidateRCEChain(RTE);
  1657. OutIF = RTE->rte_if;
  1658. ASSERT(OutIF->if_flags & IF_FLAGS_P2MP);
  1659. ASSERT(RTE->rte_link);
  1660. // If there is an RCE chain on this RCE, invalidate the RCEs on it. We still
  1661. // hold the RouteTableLock, so RCE closes can't happen.
  1662. CurrentRCE = RTE->rte_rcelist;
  1663. RTE->rte_rcelist = NULL;
  1664. // Walk down the list, nuking each RCE.
  1665. while (CurrentRCE != NULL) {
  1666. CTEGetLock(&CurrentRCE->rce_lock, &RCEHandle);
  1667. // mark the RCE as link deleted so that this rce is not selected in iptransmit
  1668. CurrentRCE->rce_flags |= RCE_LINK_DELETED;
  1669. TempRCE = CurrentRCE->rce_next;
  1670. CTEFreeLock(&CurrentRCE->rce_lock, RCEHandle);
  1671. CurrentRCE = TempRCE;
  1672. }
  1673. }
  1674. //* GetNextHopForRTE - determines the next-hop address for a route.
  1675. //
  1676. // Called when we need an actual next-hop for a route, typically so
  1677. // we can pass it to an external client. For local routes that have
  1678. // an rte_addr field set to IPADDR_LOCAL, this means figuring out
  1679. // the source NTE for the route and using its IP address.
  1680. //
  1681. // Entry: RTE - the entry whose next-hop is required
  1682. //
  1683. // Returns: IPAddr containing the next-hop
  1684. //
  1685. IPAddr
  1686. GetNextHopForRTE(RouteTableEntry* RTE)
  1687. {
  1688. if (IP_ADDR_EQUAL(RTE->rte_addr, IPADDR_LOCAL)) {
  1689. Interface *IF = RTE->rte_if;
  1690. NetTableEntry *SrcNTE = BestNTEForIF(RTE->rte_dest, IF, FALSE);
  1691. if (IF->if_nte != NULL && SrcNTE != NULL)
  1692. return SrcNTE->nte_addr;
  1693. else
  1694. return RTE->rte_dest;
  1695. }
  1696. return RTE->rte_addr;
  1697. }
  1698. //** FindValidIFForRTE - Find a valid inteface for an RTE.
  1699. //
  1700. // Called when we're going to send a packet out a route that currently marked
  1701. // as disconnected. If we have a valid callout routine we'll call it to find
  1702. // the outgoing interface index, and set up the RTE to point at that interface.
  1703. // This routine is called with the RouteTableLock held.
  1704. //
  1705. // Input: RTE - A pointer to the RTE for the route being used.
  1706. // Destination - Destination IP address we're trying to reach.
  1707. // Source - Source IP address we're sending from.
  1708. // Protocol - Protocol type of packet that caused send.
  1709. // Buffer - Pointer to first part of packet that caused send.
  1710. // Length - Length of buffer.
  1711. // HdrSrc - Src Address in header
  1712. //
  1713. // Returns: A pointer to the RTE, or NULL if that RTE couldn't be connected.
  1714. //
  1715. RouteTableEntry *
  1716. FindValidIFForRTE(RouteTableEntry * RTE, IPAddr Destination, IPAddr Source,
  1717. uchar Protocol, uchar * Buffer, uint Length, IPAddr HdrSrc)
  1718. {
  1719. uint NewIFIndex;
  1720. Interface *NewIF;
  1721. NetTableEntry *NewNTE;
  1722. if (RefPtrValid(&DODRefPtr)) {
  1723. IPMapRouteToInterfacePtr DODCallout;
  1724. // There is a callout. See if it can help us.
  1725. DODCallout = AcquireRefPtr(&DODRefPtr);
  1726. NewIFIndex = (*DODCallout) (RTE->rte_context, Destination, Source,
  1727. Protocol, Buffer, Length, HdrSrc);
  1728. ReleaseRefPtr(&DODRefPtr);
  1729. if (NewIFIndex != INVALID_IF_INDEX) {
  1730. // We got what should be a valid index. Walk our interface table list
  1731. // and see if we can find a matching interface structure.
  1732. for (NewIF = IFList; NewIF != NULL; NewIF = NewIF->if_next) {
  1733. if (NewIF->if_index == NewIFIndex) {
  1734. // Found one.
  1735. break;
  1736. }
  1737. }
  1738. if ((NewIF != NULL) && (NewIF->if_ntecount)) {
  1739. // We found a matching structure. Set the RTE interface to point
  1740. // to this, and mark as connected.
  1741. if (RTE->rte_addr != IPADDR_LOCAL) {
  1742. // See if the first hop of the route is a local address on this
  1743. // new interface. If it is, mark it as local.
  1744. for (NewNTE = NewIF->if_nte; NewNTE != NULL;
  1745. NewNTE = NewNTE->nte_ifnext) {
  1746. // Don't look at him if he's not valid.
  1747. if (!(NewNTE->nte_flags & NTE_VALID)) {
  1748. continue;
  1749. }
  1750. // See if the first hop in the RTE is equal to this IP
  1751. // address.
  1752. if (IP_ADDR_EQUAL(NewNTE->nte_addr, RTE->rte_addr)) {
  1753. // It is, so mark as local and quit looking.
  1754. RTE->rte_addr = IPADDR_LOCAL;
  1755. RTE->rte_type = IRE_TYPE_DIRECT;
  1756. break;
  1757. }
  1758. }
  1759. }
  1760. // Set the RTE to the new interface, and mark him as valid.
  1761. RTE->rte_if = NewIF;
  1762. RTE->rte_flags |= RTE_IF_VALID;
  1763. SortRoutesInDestByRTE(RTE);
  1764. RTE->rte_mtu = NewIF->if_mtu - sizeof(IPHeader);
  1765. return RTE;
  1766. } else {
  1767. // ASSERT(FALSE);
  1768. return NULL;
  1769. }
  1770. }
  1771. }
  1772. // Either the callout is NULL, or the callout couldn't map a inteface index.
  1773. return NULL;
  1774. }
  1775. //* GetRouteContext - Routine to get the route context for a specific route.
  1776. //
  1777. // Called when we need to get the route context for a path, usually when we're
  1778. // adding a route derived from an existing route. We return the route context
  1779. // for the existing route, or NULL if we can't find one.
  1780. //
  1781. // Input: Destination - Destination address of path.
  1782. // Source - Source address of path.
  1783. //
  1784. // Returns: A ROUTE_CONTEXT, or 0.
  1785. //
  1786. ROUTE_CONTEXT
  1787. GetRouteContext(IPAddr Destination, IPAddr Source)
  1788. {
  1789. CTELockHandle Handle;
  1790. RouteTableEntry *RTE;
  1791. ROUTE_CONTEXT Context;
  1792. CTEGetLock(&RouteTableLock.Lock, &Handle);
  1793. RTE = LookupRTE(Destination, Source, HOST_ROUTE_PRI, FALSE);
  1794. if (RTE != NULL) {
  1795. Context = RTE->rte_context;
  1796. } else
  1797. Context = 0;
  1798. CTEFreeLock(&RouteTableLock.Lock, Handle);
  1799. return (Context);
  1800. }
  1801. //** LookupNextHop - Look up the next hop
  1802. //
  1803. // Called when we need to find the next hop on our way to a destination. We
  1804. // call LookupRTE to find it, and return the appropriate information.
  1805. //
  1806. // In a PnP build, the interface is referenced here.
  1807. //
  1808. // Entry: Destination - IP address we're trying to reach.
  1809. // Src - Source address of datagram being routed.
  1810. // NextHop - Pointer to IP address of next hop (returned).
  1811. // MTU - Pointer to where to return max MTU used on the
  1812. // route.
  1813. //
  1814. // Returns: Pointer to outgoing interface if we found one, NULL otherwise.
  1815. //
  1816. Interface *
  1817. LookupNextHop(IPAddr Destination, IPAddr Src, IPAddr * NextHop, uint * MTU)
  1818. {
  1819. CTELockHandle TableLock; // Lock handle for routing table.
  1820. RouteTableEntry *Route; // Pointer to route table entry for route.
  1821. Interface *IF;
  1822. CTEGetLock(&RouteTableLock.Lock, &TableLock);
  1823. Route = LookupRTE(Destination, Src, HOST_ROUTE_PRI, FALSE);
  1824. if (Route != (RouteTableEntry *) NULL) {
  1825. IF = Route->rte_if;
  1826. // If this is a direct route, send straight to the destination.
  1827. *NextHop = IP_ADDR_EQUAL(Route->rte_addr, IPADDR_LOCAL) ? Destination :
  1828. Route->rte_addr;
  1829. // if the route is on a P2MP interface get the mtu from the link associated with the route
  1830. if (Route->rte_link)
  1831. *MTU = Route->rte_link->link_mtu;
  1832. else
  1833. *MTU = Route->rte_mtu;
  1834. LOCKED_REFERENCE_IF(IF);
  1835. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  1836. return IF;
  1837. } else { // Couldn't find a route.
  1838. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  1839. return NULL;
  1840. }
  1841. }
  1842. //** LookupNextHopWithBuffer - Look up the next hop, with packet information.
  1843. //
  1844. // Called when we need to find the next hop on our way to a destination and we
  1845. // have packet information that we may use for dial on demand support. We call
  1846. // LookupRTE to find it, and return the appropriate information. We may bring
  1847. // up the link if neccessary.
  1848. //
  1849. // In a PnP build, the interface is referenced here.
  1850. //
  1851. // Entry: Destination - IP address we're trying to reach.
  1852. // Src - Source address of datagram being routed.
  1853. // NextHop - Pointer to IP address of next hop (returned).
  1854. // MTU - Pointer to where to return max MTU used on the
  1855. // route.
  1856. // Protocol - Protocol type for packet that's causing this
  1857. // lookup.
  1858. // Buffer - Pointer to first part of packet causing lookup.
  1859. // Length - Length of Buffer.
  1860. // HdrSrc - source addres from header
  1861. // UnicastIf - Iface to constrain lookup to, 0 if unconstrained
  1862. //
  1863. // Returns: Pointer to outgoing interface if we found one, NULL otherwise.
  1864. //
  1865. Interface *
  1866. LookupNextHopWithBuffer(IPAddr Destination, IPAddr Src, IPAddr *NextHop,
  1867. uint * MTU, uchar Protocol, uchar *Buffer, uint Length,
  1868. RouteCacheEntry **fwdRCE, LinkEntry **Link,
  1869. IPAddr HdrSrc, uint UnicastIf)
  1870. {
  1871. CTELockHandle TableLock; // Lock handle for routing table.
  1872. RouteTableEntry *Route; // Pointer to route table entry for route.
  1873. Interface *IF;
  1874. CTEGetLock(&RouteTableLock.Lock, &TableLock);
  1875. Route = LookupRTE(Destination, Src, HOST_ROUTE_PRI, UnicastIf);
  1876. if (Route != (RouteTableEntry *) NULL) {
  1877. // If this is a direct route, send straight to the destination.
  1878. *NextHop = IP_ADDR_EQUAL(Route->rte_addr, IPADDR_LOCAL) ? Destination :
  1879. Route->rte_addr;
  1880. // If this is an indirect route, we can use the forwarding RCE
  1881. if (fwdRCE) {
  1882. #if REM_OPT
  1883. *fwdRCE = IP_ADDR_EQUAL(Route->rte_addr, IPADDR_LOCAL) ? NULL :
  1884. #else
  1885. *fwdRCE =
  1886. #endif
  1887. (RouteCacheEntry *) STRUCT_OF(RouteCacheEntry,
  1888. &Route->rte_arpcontext,
  1889. rce_context);
  1890. }
  1891. // See if the route we found is connected. If not, try to connect it.
  1892. if (!(Route->rte_flags & RTE_IF_VALID)) {
  1893. Route = FindValidIFForRTE(Route, Destination, Src, Protocol, Buffer,
  1894. Length, HdrSrc);
  1895. if (Route == NULL) {
  1896. // Couldn't bring it up.
  1897. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  1898. return NULL;
  1899. } else
  1900. IF = Route->rte_if;
  1901. } else
  1902. IF = Route->rte_if;
  1903. // if the route is on a P2MP interface get the mtu from the
  1904. // link associated with the route
  1905. if (Route->rte_link)
  1906. *MTU = Route->rte_link->link_mtu;
  1907. else
  1908. *MTU = Route->rte_mtu;
  1909. if (Link) {
  1910. *Link = Route->rte_link;
  1911. if (Route->rte_link) {
  1912. CTEInterlockedIncrementLong(&Route->rte_link->link_refcount);
  1913. }
  1914. }
  1915. LOCKED_REFERENCE_IF(IF);
  1916. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  1917. return IF;
  1918. } else { // Couldn't find a route.
  1919. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  1920. return NULL;
  1921. }
  1922. }
  1923. //** LookupForwardingNextHop - Look up the next hop on which to forward packet on.
  1924. //
  1925. // Called when we need to find the next hop on our way to a destination and we
  1926. // have packet information that we may use for dial on demand support. We call
  1927. // LookupRTE to find it, and return the appropriate information. We may bring
  1928. // up the link if neccessary.
  1929. //
  1930. // In a PnP build, the interface is referenced here.
  1931. //
  1932. // Entry: Destination - IP address we're trying to reach.
  1933. // Src - Source address of datagram being routed.
  1934. // NextHop - Pointer to IP address of next hop (returned).
  1935. // MTU - Pointer to where to return max MTU used on the
  1936. // route.
  1937. // Protocol - Protocol type for packet that's causing this
  1938. // lookup.
  1939. // Buffer - Pointer to first part of packet causing lookup.
  1940. // Length - Length of Buffer.
  1941. // HdrSrc - source addres from header
  1942. //
  1943. // Returns: Pointer to outgoing interface if we found one, NULL otherwise.
  1944. //
  1945. Interface *
  1946. LookupForwardingNextHop(IPAddr Destination, IPAddr Src, IPAddr *NextHop,
  1947. uint * MTU, uchar Protocol, uchar *Buffer, uint Length,
  1948. RouteCacheEntry **fwdRCE, LinkEntry **Link,
  1949. IPAddr HdrSrc)
  1950. {
  1951. CTELockHandle TableLock; // Lock handle for routing table.
  1952. RouteTableEntry *Route; // Pointer to route table entry for route.
  1953. Interface *IF;
  1954. CTEGetLock(&RouteTableLock.Lock, &TableLock);
  1955. Route = LookupForwardRTE(Destination, Src, TRUE);
  1956. if (Route != (RouteTableEntry *) NULL) {
  1957. // If this is a direct route, send straight to the destination.
  1958. *NextHop = IP_ADDR_EQUAL(Route->rte_addr, IPADDR_LOCAL) ? Destination :
  1959. Route->rte_addr;
  1960. // If this is an indirect route, we can use the forwarding RCE
  1961. if (fwdRCE) {
  1962. #if REM_OPT
  1963. *fwdRCE = IP_ADDR_EQUAL(Route->rte_addr, IPADDR_LOCAL) ? NULL :
  1964. #else
  1965. *fwdRCE =
  1966. #endif
  1967. (RouteCacheEntry *) STRUCT_OF(RouteCacheEntry,
  1968. &Route->rte_arpcontext,
  1969. rce_context);
  1970. }
  1971. // See if the route we found is connected. If not, try to connect it.
  1972. if (!(Route->rte_flags & RTE_IF_VALID)) {
  1973. Route = FindValidIFForRTE(Route, Destination, Src, Protocol, Buffer,
  1974. Length, HdrSrc);
  1975. if (Route == NULL) {
  1976. // Couldn't bring it up.
  1977. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  1978. return NULL;
  1979. } else
  1980. IF = Route->rte_if;
  1981. } else
  1982. IF = Route->rte_if;
  1983. // if the route is on a P2MP interface get the mtu from the
  1984. // link associated with the route
  1985. if (Route->rte_link)
  1986. *MTU = Route->rte_link->link_mtu;
  1987. else
  1988. *MTU = Route->rte_mtu;
  1989. if (Link) {
  1990. *Link = Route->rte_link;
  1991. if (Route->rte_link) {
  1992. CTEInterlockedIncrementLong(&Route->rte_link->link_refcount);
  1993. }
  1994. }
  1995. LOCKED_REFERENCE_IF(IF);
  1996. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  1997. return IF;
  1998. } else { // Couldn't find a route.
  1999. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  2000. return NULL;
  2001. }
  2002. }
  2003. //* RTReadNext - Read the next route in the table.
  2004. //
  2005. // Called by the GetInfo code to read the next route in the table. We assume
  2006. // the context passed in is valid, and the caller has the RouteTableLock.
  2007. //
  2008. // Input: Context - Pointer to a RouteEntryContext.
  2009. // Buffer - Pointer to an IPRouteEntry structure.
  2010. //
  2011. // Returns: TRUE if more data is available to be read, FALSE is not.
  2012. //
  2013. uint
  2014. RTReadNext(void *Context, void *Buffer)
  2015. {
  2016. IPRouteEntry *IPREntry = (IPRouteEntry *) Buffer;
  2017. RouteTableEntry *CurrentRTE=NULL;
  2018. uint Now = CTESystemUpTime() / 1000L;
  2019. Interface *IF;
  2020. UINT retVal = GetNextRoute(Context, &CurrentRTE);
  2021. // Should always have the rte because we don't have empty route tables.
  2022. //
  2023. ASSERT(CurrentRTE);
  2024. // Fill in the buffer.
  2025. IF = CurrentRTE->rte_if;
  2026. IPREntry->ire_dest = CurrentRTE->rte_dest;
  2027. IPREntry->ire_index = IF->if_index;
  2028. IPREntry->ire_metric1 = CurrentRTE->rte_metric;
  2029. IPREntry->ire_metric2 = IRE_METRIC_UNUSED;
  2030. IPREntry->ire_metric3 = IRE_METRIC_UNUSED;
  2031. IPREntry->ire_metric4 = IRE_METRIC_UNUSED;
  2032. IPREntry->ire_metric5 = IRE_METRIC_UNUSED;
  2033. IPREntry->ire_nexthop = GetNextHopForRTE(CurrentRTE);
  2034. IPREntry->ire_type = (CurrentRTE->rte_flags & RTE_VALID ?
  2035. CurrentRTE->rte_type : IRE_TYPE_INVALID);
  2036. IPREntry->ire_proto = CurrentRTE->rte_proto;
  2037. IPREntry->ire_age = Now - CurrentRTE->rte_valid;
  2038. IPREntry->ire_mask = CurrentRTE->rte_mask;
  2039. IPREntry->ire_context = CurrentRTE->rte_context;
  2040. return retVal;
  2041. }
  2042. //* RTRead - Read the next route in the table.
  2043. //
  2044. // Called by the GetInfo code to read the next route in the table. We assume
  2045. // the context passed in is valid, and the caller has the RouteTableLock.
  2046. //
  2047. // Input: Context - Pointer to a RouteEntryContext.
  2048. // Buffer - Pointer to an IPRouteEntry structure.
  2049. //
  2050. // Returns:
  2051. //
  2052. //* RtRead - Read a route
  2053. //
  2054. // Returns: Status of attempt to add route.
  2055. //
  2056. uint
  2057. RTRead(void *pContext, void *pBuffer)
  2058. {
  2059. IPRouteLookupData *pRLData = (IPRouteLookupData *) pContext;
  2060. IPRouteEntry *pIPREntry = (IPRouteEntry *) pBuffer;
  2061. RouteTableEntry *pCurrentRTE;
  2062. uint Now = CTESystemUpTime() / 1000L;
  2063. Interface *pIF;
  2064. ASSERT((pContext != NULL) && (pBuffer != NULL));
  2065. pCurrentRTE = LookupRTE(pRLData->DestAdd, pRLData->SrcAdd,
  2066. HOST_ROUTE_PRI, FALSE);
  2067. if (pCurrentRTE == NULL) {
  2068. pIPREntry->ire_index = 0xffffffff;
  2069. return (uint) TDI_DEST_HOST_UNREACH;
  2070. }
  2071. // Fill in the buffer.
  2072. pIF = pCurrentRTE->rte_if;
  2073. pIPREntry->ire_dest = pCurrentRTE->rte_dest;
  2074. pIPREntry->ire_index = pIF->if_index;
  2075. pIPREntry->ire_metric1 = pCurrentRTE->rte_metric;
  2076. pIPREntry->ire_metric2 = IRE_METRIC_UNUSED;
  2077. pIPREntry->ire_metric3 = IRE_METRIC_UNUSED;
  2078. pIPREntry->ire_metric4 = IRE_METRIC_UNUSED;
  2079. pIPREntry->ire_metric5 = IRE_METRIC_UNUSED;
  2080. pIPREntry->ire_nexthop = GetNextHopForRTE(pCurrentRTE);
  2081. pIPREntry->ire_type = (pCurrentRTE->rte_flags & RTE_VALID ?
  2082. pCurrentRTE->rte_type : IRE_TYPE_INVALID);
  2083. pIPREntry->ire_proto = pCurrentRTE->rte_proto;
  2084. pIPREntry->ire_age = Now - pCurrentRTE->rte_valid;
  2085. pIPREntry->ire_mask = pCurrentRTE->rte_mask;
  2086. pIPREntry->ire_context = pCurrentRTE->rte_context;
  2087. return TDI_SUCCESS;
  2088. }
  2089. void
  2090. LookupRoute(IPRouteLookupData * pRLData, IPRouteEntry * pIpRTE)
  2091. {
  2092. CTELockHandle Handle;
  2093. CTEGetLock(&RouteTableLock.Lock, &Handle);
  2094. RTRead(pRLData, pIpRTE);
  2095. CTEFreeLock(&RouteTableLock.Lock, Handle);
  2096. return;
  2097. }
  2098. NTSTATUS
  2099. LookupRouteInformation(void *pRouteLookupData, void *pIpRTE,
  2100. IPROUTEINFOCLASS RouteInfoClass, void *RouteInformation,
  2101. uint * RouteInfoLength)
  2102. {
  2103. return LookupRouteInformationWithBuffer(pRouteLookupData, NULL, 0, pIpRTE,
  2104. RouteInfoClass, RouteInformation,
  2105. RouteInfoLength);
  2106. }
  2107. NTSTATUS
  2108. LookupRouteInformationWithBuffer(void *pRouteLookupData, uchar * Buffer,
  2109. uint Length, void *pIpRTE,
  2110. IPROUTEINFOCLASS RouteInfoClass,
  2111. void *RouteInformation, uint * RouteInfoLength)
  2112. {
  2113. IPRouteLookupData *pRLData = (IPRouteLookupData *) pRouteLookupData;
  2114. IPRouteEntry *pIPREntry = (IPRouteEntry *) pIpRTE;
  2115. RouteTableEntry *pCurrentRTE;
  2116. uint Now = CTESystemUpTime() / 1000L;
  2117. Interface *pIF;
  2118. CTELockHandle Handle;
  2119. CTEGetLock(&RouteTableLock.Lock, &Handle);
  2120. ASSERT(pRouteLookupData != NULL);
  2121. pCurrentRTE = LookupRTE(pRLData->DestAdd, pRLData->SrcAdd, HOST_ROUTE_PRI, FALSE);
  2122. if (pCurrentRTE == NULL) {
  2123. CTEFreeLock(&RouteTableLock.Lock, Handle);
  2124. return STATUS_UNSUCCESSFUL;
  2125. }
  2126. // see if the RTE is for a demand-dial route,
  2127. if (!(pCurrentRTE->rte_flags & RTE_IF_VALID)) {
  2128. pCurrentRTE = FindValidIFForRTE(pCurrentRTE, pRLData->DestAdd,
  2129. pRLData->SrcAdd, pRLData->Info[0],
  2130. Buffer, Length, pRLData->SrcAdd);
  2131. CTEFreeLock(&RouteTableLock.Lock, Handle);
  2132. if (pCurrentRTE == NULL) {
  2133. // Couldn't bring it up.
  2134. return STATUS_UNSUCCESSFUL;
  2135. }
  2136. return STATUS_PENDING;
  2137. }
  2138. // Fill in the buffer.
  2139. pIF = pCurrentRTE->rte_if;
  2140. if (pIPREntry) {
  2141. pIPREntry->ire_dest = pCurrentRTE->rte_dest;
  2142. pIPREntry->ire_index = pIF->if_index;
  2143. pIPREntry->ire_metric1 = pCurrentRTE->rte_metric;
  2144. pIPREntry->ire_metric2 = IRE_METRIC_UNUSED;
  2145. pIPREntry->ire_metric3 = IRE_METRIC_UNUSED;
  2146. pIPREntry->ire_metric4 = IRE_METRIC_UNUSED;
  2147. pIPREntry->ire_metric5 = IRE_METRIC_UNUSED;
  2148. pIPREntry->ire_nexthop = GetNextHopForRTE(pCurrentRTE);
  2149. pIPREntry->ire_type = (pCurrentRTE->rte_flags & RTE_VALID ?
  2150. pCurrentRTE->rte_type : IRE_TYPE_INVALID);
  2151. pIPREntry->ire_proto = pCurrentRTE->rte_proto;
  2152. pIPREntry->ire_age = Now - pCurrentRTE->rte_valid;
  2153. pIPREntry->ire_mask = pCurrentRTE->rte_mask;
  2154. pIPREntry->ire_context = pCurrentRTE->rte_context;
  2155. }
  2156. switch (RouteInfoClass) {
  2157. case IPRouteOutgoingFirewallContext:
  2158. *(PULONG) RouteInformation = pIF->if_index;
  2159. *(PULONG) RouteInfoLength = sizeof(PVOID);
  2160. break;
  2161. case IPRouteOutgoingFilterContext:
  2162. *(PVOID *) RouteInformation = NULL;
  2163. *(PULONG) RouteInfoLength = sizeof(PVOID);
  2164. break;
  2165. }
  2166. CTEFreeLock(&RouteTableLock.Lock, Handle);
  2167. return STATUS_SUCCESS;
  2168. }
  2169. //* DeleteRTE - Delete an RTE.
  2170. //
  2171. // Called when we need to delete an RTE. We assume the caller has the
  2172. // RouteTableLock. We'll splice out the RTE, invalidate his RCEs, and
  2173. // free the memory.
  2174. //
  2175. // Input: PrevRTE - RTE in 'front' of one being deleted.
  2176. // RTE - RTE to be deleted.
  2177. //
  2178. // Returns: Nothing.
  2179. //
  2180. void
  2181. DeleteRTE(RouteTableEntry * PrevRTE, RouteTableEntry * RTE)
  2182. {
  2183. UNREFERENCED_PARAMETER(PrevRTE);
  2184. IPSInfo.ipsi_numroutes--;
  2185. if (RTE->rte_mask == DEFAULT_MASK) {
  2186. // We're deleting a default route.
  2187. DefGWConfigured--;
  2188. if (RTE->rte_flags & RTE_VALID)
  2189. DefGWActive--;
  2190. UpdateDeadGWState();
  2191. if (DefGWActive == 0)
  2192. ValidateDefaultGWs(NULL_IP_ADDR);
  2193. }
  2194. if (RTE->rte_todg) {
  2195. RTE->rte_todg->rte_fromdg = NULL;
  2196. }
  2197. if (RTE->rte_fromdg) {
  2198. RTE->rte_fromdg->rte_todg = NULL;
  2199. }
  2200. {
  2201. RouteTableEntry *tmpRTE = NULL;
  2202. tmpRTE = GetDefaultGWs(&tmpRTE);
  2203. while (tmpRTE) {
  2204. if (tmpRTE->rte_todg == RTE) {
  2205. tmpRTE->rte_todg = NULL;
  2206. }
  2207. tmpRTE = tmpRTE->rte_next;
  2208. }
  2209. }
  2210. InvalidateRCEChain(RTE);
  2211. // Make sure RTE's IF is valid
  2212. ASSERT(RTE->rte_if != NULL);
  2213. // Invalidate the fwding rce
  2214. if (RTE->rte_if != (Interface *) & DummyInterface) {
  2215. (*(RTE->rte_if->if_invalidate)) (RTE->rte_if->if_lcontext,
  2216. (RouteCacheEntry *) STRUCT_OF(RouteCacheEntry,
  2217. &RTE->rte_arpcontext,
  2218. rce_context));
  2219. }
  2220. // Free the old route.
  2221. FreeRoute(RTE);
  2222. }
  2223. //* DeleteRTEOnIF - Delete all address-dependent RTEs on a particular IF.
  2224. //
  2225. // A function called by RTWalk when we want to delete all RTEs on a particular
  2226. // inteface, except those that are present for the lifetime of the interface.
  2227. // We just check the I/F of each RTE, and if it matches we return FALSE.
  2228. //
  2229. // Input: RTE - RTE to check.
  2230. // Context - Interface on which we're deleting.
  2231. //
  2232. // Returns: FALSE if we want to delete it, TRUE otherwise.
  2233. //
  2234. uint
  2235. DeleteRTEOnIF(RouteTableEntry * RTE, void *Context, void *Context1)
  2236. {
  2237. Interface *IF = (Interface *) Context;
  2238. UNREFERENCED_PARAMETER(Context1);
  2239. if (RTE->rte_if == IF && !IP_ADDR_EQUAL(RTE->rte_dest, IF->if_bcast))
  2240. return FALSE;
  2241. else
  2242. return TRUE;
  2243. }
  2244. //* DeleteAllRTEOnIF - Delete all RTEs on a particular IF.
  2245. //
  2246. // A function called by RTWalk when we want to delete all RTEs on a particular
  2247. // inteface. We just check the I/F of each RTE, and if it matches we return
  2248. // FALSE.
  2249. //
  2250. // Input: RTE - RTE to check.
  2251. // Context - Interface on which we're deleting.
  2252. //
  2253. // Returns: FALSE if we want to delete it, TRUE otherwise.
  2254. //
  2255. uint
  2256. DeleteAllRTEOnIF(RouteTableEntry * RTE, void *Context, void *Context1)
  2257. {
  2258. Interface *IF = (Interface *) Context;
  2259. UNREFERENCED_PARAMETER(Context1);
  2260. if (RTE->rte_if == IF)
  2261. return FALSE;
  2262. else
  2263. return TRUE;
  2264. }
  2265. //* ConvertRTEType -Change RTE type from DIRECT INDIRECT to DIRECT.
  2266. //
  2267. // A function called by RTWalk when an address is added to chnage
  2268. // P2P/P2MP plumbed route to DIRECT type.
  2269. //
  2270. // Input: RTE - RTE to check.
  2271. // Context - Interface on which we're invalidating.
  2272. //
  2273. // Returns: TRUE.
  2274. //
  2275. uint
  2276. ConvertRTEType(RouteTableEntry * RTE, void *Context, void *Context1)
  2277. {
  2278. NetTableEntry *NTE = (NetTableEntry *) Context;
  2279. UNREFERENCED_PARAMETER(Context1);
  2280. if ((RTE->rte_addr == NTE->nte_addr) &&
  2281. (RTE->rte_if == NTE->nte_if) &&
  2282. (RTE->rte_type == IRE_TYPE_DIRECT)) {
  2283. RTE->rte_addr = IPADDR_LOCAL;
  2284. RTE->rte_type = IRE_TYPE_INDIRECT;
  2285. }
  2286. return TRUE;
  2287. }
  2288. //* InvalidateRCEOnIF - Invalidate all RCEs on a particular IF.
  2289. //
  2290. // A function called by RTWalk when we want to invalidate all RCEs on a
  2291. // particular inteface. We just check the I/F of each RTE, and if it
  2292. // matches we call InvalidateRCEChain to invalidate the RCEs.
  2293. //
  2294. // Input: RTE - RTE to check.
  2295. // Context - Interface on which we're invalidating.
  2296. //
  2297. // Returns: TRUE.
  2298. //
  2299. uint
  2300. InvalidateRCEOnIF(RouteTableEntry * RTE, void *Context, void *Context1)
  2301. {
  2302. Interface *IF = (Interface *) Context;
  2303. UNREFERENCED_PARAMETER(Context1);
  2304. if (RTE->rte_if == IF)
  2305. InvalidateRCEChain(RTE);
  2306. return TRUE;
  2307. }
  2308. //* SetMTUOnIF - Set the MTU on an interface.
  2309. //
  2310. // Called when we need to set the MTU on an interface.
  2311. //
  2312. // Input: RTE - RTE to check.
  2313. // Context - Pointer to a context.
  2314. // Context1 - Pointer to the new MTU.
  2315. //
  2316. // Returns: TRUE.
  2317. //
  2318. uint
  2319. SetMTUOnIF(RouteTableEntry * RTE, void *Context, void *Context1)
  2320. {
  2321. uint NewMTU = *(uint *) Context1;
  2322. Interface *IF = (Interface *) Context;
  2323. if (RTE->rte_if == IF)
  2324. RTE->rte_mtu = NewMTU;
  2325. return TRUE;
  2326. }
  2327. //* SetMTUToAddr - Set the MTU to a specific address.
  2328. //
  2329. // Called when we need to set the MTU to a specific address. We set the MTU
  2330. // for all routes that use the specified address as a first hop to the new
  2331. // MTU.
  2332. //
  2333. // Input: RTE - RTE to check.
  2334. // Context - Pointer to a context.
  2335. // Context1 - Pointer to the new MTU.
  2336. //
  2337. // Returns: TRUE.
  2338. //
  2339. uint
  2340. SetMTUToAddr(RouteTableEntry * RTE, void *Context, void *Context1)
  2341. {
  2342. uint NewMTU = *(uint *) Context1;
  2343. IPAddr Addr = *(IPAddr *) Context;
  2344. if (IP_ADDR_EQUAL(RTE->rte_addr, Addr))
  2345. RTE->rte_mtu = NewMTU;
  2346. return TRUE;
  2347. }
  2348. //** FreeRtChangeList - Frees a route-change notification list.
  2349. //
  2350. // Called to clean up a list of route-change notifications in the failure path
  2351. // of 'RTWalk' and 'IPRouteTimeout'.
  2352. //
  2353. // Entry: RtChangeList - The list to be freed.
  2354. //
  2355. // Returns: Nothing.
  2356. //
  2357. void
  2358. FreeRtChangeList(RtChangeList* CurrentRtChangeList)
  2359. {
  2360. RtChangeList *TmpRtChangeList;
  2361. while (CurrentRtChangeList) {
  2362. TmpRtChangeList = CurrentRtChangeList->rt_next;
  2363. CTEFreeMem(CurrentRtChangeList);
  2364. CurrentRtChangeList = TmpRtChangeList;
  2365. }
  2366. }
  2367. //* RTWalk - Routine to walk the route table.
  2368. //
  2369. // This routine walks the route table, calling the specified function
  2370. // for each entry. If the called function returns FALSE, the RTE is
  2371. // deleted.
  2372. //
  2373. // Input: CallFunc - Function to call for each entry.
  2374. // Context - Context value to pass to each call.
  2375. //
  2376. // Returns: Nothing.
  2377. //
  2378. void
  2379. RTWalk(uint(*CallFunc) (struct RouteTableEntry *, void *, void *),
  2380. void *Context, void *Context1)
  2381. {
  2382. CTELockHandle Handle;
  2383. RouteTableEntry *RTE, *PrevRTE;
  2384. RouteTableEntry *pOldBestRTE, *pNewBestRTE;
  2385. UINT IsDataLeft, IsValid;
  2386. UCHAR IteratorContext[CONTEXT_SIZE];
  2387. RtChangeList *CurrentRtChangeList = NULL;
  2388. CTEGetLock(&RouteTableLock.Lock, &Handle);
  2389. // Zero the context the first time it is used
  2390. RtlZeroMemory(IteratorContext, CONTEXT_SIZE);
  2391. // Do we have any routes in the table ?
  2392. IsDataLeft = RTValidateContext(IteratorContext, &IsValid);
  2393. if (IsDataLeft) {
  2394. // Get the first route in the table
  2395. IsDataLeft = GetNextRoute(IteratorContext, &RTE);
  2396. while (IsDataLeft) {
  2397. // Keep copy of current route and advance to next
  2398. PrevRTE = RTE;
  2399. // Read next route, before operating on current
  2400. IsDataLeft = GetNextRoute(IteratorContext, &RTE);
  2401. // Work on current route (already got next one)
  2402. if (!(*CallFunc) (PrevRTE, Context, Context1)) {
  2403. IPRouteNotifyOutput RNO = {0};
  2404. RtChangeList *NewRtChange;
  2405. // Retrieve information about the route for change-notification
  2406. // before proceeding with deletion.
  2407. RNO.irno_dest = PrevRTE->rte_dest;
  2408. RNO.irno_mask = PrevRTE->rte_mask;
  2409. RNO.irno_nexthop = GetNextHopForRTE(PrevRTE);
  2410. RNO.irno_proto = PrevRTE->rte_proto;
  2411. RNO.irno_ifindex = PrevRTE->rte_if->if_index;
  2412. RNO.irno_metric = PrevRTE->rte_metric;
  2413. RNO.irno_flags = IRNO_FLAG_DELETE;
  2414. // Delete the route and perform cleanup.
  2415. DelRoute(PrevRTE->rte_dest, PrevRTE->rte_mask,
  2416. PrevRTE->rte_addr, PrevRTE->rte_if, MATCH_FULL,
  2417. &PrevRTE, &pOldBestRTE, &pNewBestRTE);
  2418. CleanupP2MP_RTE(PrevRTE);
  2419. CleanupRTE(PrevRTE);
  2420. // Allocate, initialize and queue a change-notification entry
  2421. // for the deleted route.
  2422. NewRtChange = CTEAllocMemNBoot(sizeof(RtChangeList), '9iCT');
  2423. if (NewRtChange != NULL) {
  2424. NewRtChange->rt_next = CurrentRtChangeList;
  2425. NewRtChange->rt_info = RNO;
  2426. CurrentRtChangeList = NewRtChange;
  2427. }
  2428. #if FFP_SUPPORT
  2429. FFPFlushRequired = TRUE;
  2430. #endif
  2431. }
  2432. }
  2433. // Work on last route [it was not processed in the loop]
  2434. PrevRTE = RTE;
  2435. if (!(*CallFunc) (PrevRTE, Context, Context1)) {
  2436. IPRouteNotifyOutput RNO = {0};
  2437. RtChangeList *NewRtChange;
  2438. // Retrieve information about the route for change-notification
  2439. // before proceeding with deletion.
  2440. RNO.irno_dest = PrevRTE->rte_dest;
  2441. RNO.irno_mask = PrevRTE->rte_mask;
  2442. RNO.irno_nexthop = GetNextHopForRTE(PrevRTE);
  2443. RNO.irno_proto = PrevRTE->rte_proto;
  2444. RNO.irno_ifindex = PrevRTE->rte_if->if_index;
  2445. RNO.irno_metric = PrevRTE->rte_metric;
  2446. RNO.irno_flags = IRNO_FLAG_DELETE;
  2447. // Delete the route and perform cleanup.
  2448. DelRoute(PrevRTE->rte_dest, PrevRTE->rte_mask, PrevRTE->rte_addr,
  2449. PrevRTE->rte_if, MATCH_FULL, &PrevRTE, &pOldBestRTE,
  2450. &pNewBestRTE);
  2451. CleanupP2MP_RTE(PrevRTE);
  2452. CleanupRTE(PrevRTE);
  2453. // Allocate, initialize and queue a change-notification entry
  2454. // for the deleted route.
  2455. NewRtChange = CTEAllocMemNBoot(sizeof(RtChangeList), '0iCT');
  2456. if (NewRtChange != NULL) {
  2457. NewRtChange->rt_next = CurrentRtChangeList;
  2458. NewRtChange->rt_info = RNO;
  2459. CurrentRtChangeList = NewRtChange;
  2460. }
  2461. #if FFP_SUPPORT
  2462. FFPFlushRequired = TRUE;
  2463. #endif
  2464. }
  2465. }
  2466. CTEFreeLock(&RouteTableLock.Lock, Handle);
  2467. // Call RtChangeNotify for each of the entries in the change-notification
  2468. // list that we've built up so far. In the process, free each entry.
  2469. if (CurrentRtChangeList) {
  2470. RtChangeList *TmpRtChangeList;
  2471. do {
  2472. TmpRtChangeList = CurrentRtChangeList->rt_next;
  2473. RtChangeNotify(&CurrentRtChangeList->rt_info);
  2474. CTEFreeMem(CurrentRtChangeList);
  2475. CurrentRtChangeList = TmpRtChangeList;
  2476. } while(CurrentRtChangeList);
  2477. }
  2478. }
  2479. uint
  2480. AttachRCEToNewRTE(RouteTableEntry *NewRTE, RouteCacheEntry *RCE,
  2481. RouteTableEntry *OldRTE)
  2482. {
  2483. CTELockHandle RCEHandle;
  2484. RouteCacheEntry *tempRCE, *CurrentRCE;
  2485. NetTableEntry *NTE;
  2486. uint Status = 1;
  2487. uint RCE_usecnt;
  2488. if (RCE == NULL) {
  2489. CurrentRCE = OldRTE->rte_rcelist;
  2490. } else {
  2491. CurrentRCE = RCE;
  2492. }
  2493. // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"AttachRCETonewRTE %x %x %x\n", NewRTE, RCE, OldRTE));
  2494. // OldRTE = RCE->rce_rte;
  2495. //associate all the RCEs with this RTE
  2496. while (CurrentRCE != NULL) {
  2497. RCE_usecnt = InvalidateRCE(CurrentRCE);
  2498. CTEGetLock(&CurrentRCE->rce_lock, &RCEHandle);
  2499. tempRCE = CurrentRCE->rce_next;
  2500. // if no one is using this go ahead and
  2501. // mark this as valid
  2502. if (RCE_usecnt == 0) {
  2503. //Make sure that the src address for RCE is valid
  2504. //for this RTE
  2505. NTE = NewRTE->rte_if->if_nte;
  2506. while (NTE) {
  2507. if ((NTE->nte_flags & NTE_VALID) &&
  2508. IP_ADDR_EQUAL(CurrentRCE->rce_src, NTE->nte_addr))
  2509. break;
  2510. NTE = NTE->nte_ifnext;
  2511. }
  2512. if (NTE != NULL) {
  2513. if (CurrentRCE->rce_flags & RCE_CONNECTED) {
  2514. InvalidateRCEContext(CurrentRCE);
  2515. } else {
  2516. ASSERT(!(CurrentRCE->rce_flags & RCE_REFERENCED));
  2517. }
  2518. // Link the RCE on the RTE, and set up the back pointer.
  2519. CurrentRCE->rce_rte = NewRTE;
  2520. CurrentRCE->rce_flags |= RCE_VALID;
  2521. CurrentRCE->rce_next = NewRTE->rte_rcelist;
  2522. NewRTE->rte_rcelist = CurrentRCE;
  2523. NewRTE->rte_rces += CurrentRCE->rce_cnt;
  2524. if ((NewRTE->rte_flags & RTE_IF_VALID)) {
  2525. CurrentRCE->rce_flags |= (RCE_CONNECTED | RCE_REFERENCED);
  2526. LOCKED_REFERENCE_IF(NewRTE->rte_if);
  2527. } else {
  2528. ASSERT(FALSE);
  2529. CurrentRCE->rce_flags &= ~RCE_CONNECTED;
  2530. Status = FALSE;
  2531. }
  2532. } //if NTE!=NULL
  2533. } else {
  2534. // In use. Mark it as in dead gw transit mmode
  2535. // so that attachtorte will do the right thing
  2536. // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"AttachRCETonewRTE RCE busy\n"));
  2537. // CurrentRCE->rce_rte = NewRTE;
  2538. CurrentRCE->rce_flags |= RCE_DEADGW;
  2539. } //in use
  2540. CTEFreeLock(&CurrentRCE->rce_lock, RCEHandle);
  2541. //if there is only one RCE to be switched, break.
  2542. if (RCE)
  2543. break;
  2544. CurrentRCE = tempRCE;
  2545. } //while
  2546. return (Status);
  2547. }
  2548. //** AttachRCEToRTE - Attach an RCE to an RTE.
  2549. //
  2550. // This procedure takes an RCE, finds the appropriate RTE, and attaches it.
  2551. // We check to make sure that the source address is still valid.
  2552. //
  2553. // Entry: RCE - RCE to be attached.
  2554. // Protocol - Protocol type for packet causing this call.
  2555. // Buffer - Pointer to buffer for packet causing this
  2556. // call.
  2557. // Length - Length of buffer.
  2558. //
  2559. // Returns: TRUE if we attach it, false if we don't.
  2560. //
  2561. uint
  2562. AttachRCEToRTE(RouteCacheEntry *RCE, uchar Protocol, uchar *Buffer, uint Length)
  2563. {
  2564. CTELockHandle TableHandle, RCEHandle;
  2565. RouteTableEntry *RTE;
  2566. NetTableEntry *NTE;
  2567. uint Status;
  2568. NetTableEntry *NetTableList;
  2569. CTEGetLock(&RouteTableLock.Lock, &TableHandle);
  2570. NetTableList = NewNetTableList[NET_TABLE_HASH(RCE->rce_src)];
  2571. for (NTE = NetTableList; NTE != NULL; NTE = NTE->nte_next)
  2572. if ((NTE->nte_flags & NTE_VALID) &&
  2573. IP_ADDR_EQUAL(RCE->rce_src, NTE->nte_addr))
  2574. break;
  2575. if (NTE == NULL) {
  2576. // Didn't find a match.
  2577. CTEFreeLock(&RouteTableLock.Lock, TableHandle);
  2578. return FALSE;
  2579. }
  2580. if ((RCE->rce_flags == RCE_VALID) && (RCE->rce_rte->rte_flags != RTE_IF_VALID)) {
  2581. RTE = RCE->rce_rte;
  2582. } else {
  2583. RTE = LookupRTE(RCE->rce_dest, RCE->rce_src, HOST_ROUTE_PRI, FALSE);
  2584. }
  2585. if (RTE == NULL) {
  2586. // No route! Fail the call.
  2587. CTEFreeLock(&RouteTableLock.Lock, TableHandle);
  2588. return FALSE;
  2589. }
  2590. // Check if this RCE is in transition (usecnt did not permit
  2591. // to swicthover earlier)
  2592. if ((RCE->rce_flags & RCE_DEADGW) && (RCE->rce_rte != RTE)) {
  2593. RouteTableEntry *tmpRTE = NULL;
  2594. // Scan through DefaultGWs checking
  2595. // for a GW that is in the process of
  2596. // taking over from the current one.
  2597. if (RTE->rte_todg) {
  2598. tmpRTE = GetDefaultGWs(&tmpRTE);
  2599. while (tmpRTE) {
  2600. if (tmpRTE == RTE->rte_todg) {
  2601. break;
  2602. }
  2603. tmpRTE = tmpRTE->rte_next;
  2604. }
  2605. }
  2606. if (tmpRTE) {
  2607. // Remove references to GW
  2608. // in transition and the current one
  2609. ASSERT(tmpRTE->rte_fromdg == RTE);
  2610. tmpRTE->rte_fromdg = NULL;
  2611. RTE->rte_todg = NULL;
  2612. }
  2613. Rcefailures++;
  2614. }
  2615. Status = TRUE;
  2616. // Yep, we found one. Get the lock on the RCE, and make sure he's
  2617. // not pointing at an RTE already. We also need to make sure that the usecnt
  2618. // is 0, so that we can invalidate the RCE at the low level. If we set valid
  2619. // to TRUE without doing this we may get into a wierd situation where we
  2620. // link the RCE onto an RTE but the lower layer information is wrong, so we
  2621. // send to IP address X at mac address Y. So to be safe we don't set valid
  2622. // to TRUE until both usecnt is 0 and valid is FALSE. We'll keep coming
  2623. // through this routine on every send until that happens.
  2624. CTEGetLock(&RCE->rce_lock, &RCEHandle);
  2625. if (RCE->rce_usecnt == 0) {
  2626. // Nobody is using him, so we can link him up.
  2627. if (!(RCE->rce_flags & RCE_VALID)) {
  2628. // He's not valid. Invalidate the lower layer info, just in
  2629. // case. Make sure he's connected before we try to do this. If
  2630. // he's not marked as connected, don't bother to try and invalidate
  2631. // him as there is no interface.
  2632. if (RCE->rce_flags & RCE_CONNECTED) {
  2633. // invalidating this IF can fail in PNP world. An invalid RCE can not be found on on RTE list
  2634. // to be invalidated if Interface decides to take off!
  2635. // So, check the sanity of the interface
  2636. InvalidateRCEContext(RCE);
  2637. } else {
  2638. ASSERT(!(RCE->rce_flags & RCE_REFERENCED));
  2639. }
  2640. // Link the RCE on the RTE, and set up the back pointer.
  2641. RCE->rce_rte = RTE;
  2642. RCE->rce_flags |= RCE_VALID;
  2643. RCE->rce_next = RTE->rte_rcelist;
  2644. RTE->rte_rcelist = RCE;
  2645. RTE->rte_rces += RCE->rce_cnt;
  2646. RCE->rce_flags &= ~RCE_DEADGW;
  2647. // Make sure the RTE is connected. If not, try to connect him.
  2648. if (!(RTE->rte_flags & RTE_IF_VALID)) {
  2649. // Not connected. Try to connect him.
  2650. RTE = FindValidIFForRTE(RTE, RCE->rce_dest, RCE->rce_src,
  2651. Protocol, Buffer, Length, RCE->rce_src);
  2652. if (RTE != NULL) {
  2653. // Got one, so mark as connected.
  2654. ASSERT(!(RCE->rce_flags & RCE_REFERENCED));
  2655. RCE->rce_flags |= (RCE_CONNECTED | RCE_REFERENCED);
  2656. LOCKED_REFERENCE_IF(RTE->rte_if);
  2657. } else {
  2658. // Couldn't get a valid i/f. Mark the RCE as not connected,
  2659. // and set up to fail this call.
  2660. RCE->rce_flags &= ~RCE_CONNECTED;
  2661. Status = FALSE;
  2662. }
  2663. } else {
  2664. // The RTE is connected, mark the RCE as connected.
  2665. ASSERT(!(RCE->rce_flags & RCE_REFERENCED));
  2666. RCE->rce_flags |= (RCE_CONNECTED | RCE_REFERENCED);
  2667. LOCKED_REFERENCE_IF(RTE->rte_if);
  2668. }
  2669. } else {
  2670. // The RCE is valid. See if it's connected.
  2671. if (!(RCE->rce_flags & RCE_CONNECTED)) {
  2672. // Not connected, try to get a valid i/f.
  2673. if (!(RTE->rte_flags & RTE_IF_VALID)) {
  2674. RTE = FindValidIFForRTE(RTE, RCE->rce_dest, RCE->rce_src,
  2675. Protocol, Buffer, Length, RCE->rce_src);
  2676. if (RTE != NULL) {
  2677. RCE->rce_flags |= RCE_CONNECTED;
  2678. ASSERT(!(RCE->rce_flags & RCE_REFERENCED));
  2679. ASSERT(RTE == RCE->rce_rte);
  2680. RCE->rce_flags |= RCE_REFERENCED;
  2681. LOCKED_REFERENCE_IF(RTE->rte_if);
  2682. } else {
  2683. // Couldn't connect, so fail.
  2684. Status = FALSE;
  2685. }
  2686. } else { // Already connected, just mark as valid.
  2687. RCE->rce_flags |= RCE_CONNECTED;
  2688. if (!(RCE->rce_flags & RCE_REFERENCED)) {
  2689. RCE->rce_flags |= RCE_REFERENCED;
  2690. LOCKED_REFERENCE_IF(RTE->rte_if);
  2691. }
  2692. }
  2693. }
  2694. }
  2695. }
  2696. // Free the locks and we're done.
  2697. CTEFreeLock(&RCE->rce_lock, RCEHandle);
  2698. CTEFreeLock(&RouteTableLock.Lock, TableHandle);
  2699. return Status;
  2700. }
  2701. //** IPGetPInfo - Get information..
  2702. //
  2703. // Called by an upper layer to get information about a path. We return the
  2704. // MTU of the path and the maximum link speed to be expected on the path.
  2705. //
  2706. // Input: Dest - Destination address.
  2707. // Src - Src address.
  2708. // NewMTU - Where to store path MTU (may be NULL).
  2709. // MaxPathSpeed - Where to store maximum path speed (may be NULL).
  2710. // RCE - RCE to be used to find the route
  2711. //
  2712. // Returns: Status of attempt to get new MTU.
  2713. //
  2714. IP_STATUS
  2715. IPGetPInfo(IPAddr Dest, IPAddr Src, uint * NewMTU, uint *MaxPathSpeed,
  2716. RouteCacheEntry *RCE)
  2717. {
  2718. CTELockHandle Handle;
  2719. RouteTableEntry *RTE = NULL;
  2720. IP_STATUS Status;
  2721. CTEGetLock(&RouteTableLock.Lock, &Handle);
  2722. if (RCE) {
  2723. CTEGetLockAtDPC(&RCE->rce_lock);
  2724. if (RCE->rce_flags == RCE_ALL_VALID) {
  2725. RTE = RCE->rce_rte;
  2726. }
  2727. CTEFreeLockFromDPC(&RCE->rce_lock);
  2728. }
  2729. if (!RTE) {
  2730. RTE = LookupRTE(Dest, Src, HOST_ROUTE_PRI, FALSE);
  2731. }
  2732. if (RTE != NULL) {
  2733. if (NewMTU != NULL) {
  2734. // if the route is on a P2MP interface get the mtu from the link associated with the route
  2735. if (RTE->rte_link)
  2736. *NewMTU = RTE->rte_link->link_mtu;
  2737. else
  2738. *NewMTU = RTE->rte_mtu;
  2739. }
  2740. if (MaxPathSpeed != NULL)
  2741. *MaxPathSpeed = RTE->rte_if->if_speed;
  2742. Status = IP_SUCCESS;
  2743. } else
  2744. Status = IP_DEST_HOST_UNREACHABLE;
  2745. CTEFreeLock(&RouteTableLock.Lock, Handle);
  2746. return Status;
  2747. }
  2748. //** IPCheckRoute - Check that a route is valid.
  2749. //
  2750. // Called by an upper layer when it believes a route might be invalid.
  2751. // We'll check if we can. If the upper layer is getting there through a
  2752. // route derived via ICMP (presumably a redirect) we'll check to see
  2753. // if it's been learned within the last minute. If it has, it's assumed
  2754. // to still be valid. Otherwise, we'll mark it as down and try to find
  2755. // another route there. If we can, we'll delete the old route. Otherwise
  2756. // we'll leave it. If the route is through a default gateway we'll switch
  2757. // to another one if we can. Otherwise, we'll just leave - we don't mess
  2758. // with manually configured routes.
  2759. //
  2760. // Input: Dest - Destination to be reached.
  2761. // Src - Src we're sending from.
  2762. // RCE - route-cache-entry to be updated.
  2763. // OptInfo - options to use if recreating the RCE
  2764. // CheckRouteFlag - modifies this routine's behavior
  2765. //
  2766. // Returns: Nothing.
  2767. //
  2768. void
  2769. IPCheckRoute(IPAddr Dest, IPAddr Src, RouteCacheEntry * RCE, IPOptInfo *OptInfo,
  2770. uint CheckRouteFlag)
  2771. {
  2772. RouteTableEntry *RTE;
  2773. RouteTableEntry *NewRTE;
  2774. CTELockHandle Handle;
  2775. uint Now = CTESystemUpTime() / 1000L;
  2776. if (DeadGWDetect) {
  2777. uint UnicastIf;
  2778. // We are doing dead G/W detection. Get the lock, and try and
  2779. // find the route.
  2780. // Decide whether to do a strong or weak host lookup.
  2781. UnicastIf = GetIfConstraint(Dest, Src, OptInfo, FALSE);
  2782. CTEGetLock(&RouteTableLock.Lock, &Handle);
  2783. RTE = LookupRTE(Dest, Src, HOST_ROUTE_PRI, UnicastIf);
  2784. if (RTE != NULL && ((Now - RTE->rte_valid) > MIN_RT_VALID)) {
  2785. // Found a route, and it's older than the minimum valid time. If it
  2786. // goes through a G/W, and is a route we learned via ICMP or is a
  2787. // default route, do something with it.
  2788. if (!IP_ADDR_EQUAL(RTE->rte_addr, IPADDR_LOCAL)) {
  2789. // It is through a G/W.
  2790. if (RTE->rte_proto == IRE_PROTO_ICMP) {
  2791. // Came from ICMP. Mark as invalid, and then make sure
  2792. // we have another route there.
  2793. RTE->rte_flags &= ~RTE_VALID;
  2794. NewRTE = LookupRTE(Dest, Src, HOST_ROUTE_PRI, UnicastIf);
  2795. if (NewRTE == NULL) {
  2796. // Can't get there any other way so leave this
  2797. // one alone.
  2798. RTE->rte_flags |= RTE_VALID;
  2799. // Re validate all the other gateways
  2800. InvalidateRCEChain(RTE);
  2801. ValidateDefaultGWs(NULL_IP_ADDR);
  2802. }
  2803. // The discovered route under the
  2804. // NTE is not cleaned up.
  2805. // Since deleting the route itself does not serve any purpose and
  2806. // the route will time out eventually, let us leave this
  2807. // as invalid.
  2808. } else {
  2809. if (RTE->rte_mask == DEFAULT_MASK) {
  2810. // This is a default gateway. If we have more than one
  2811. // configured move to the next one.
  2812. if (DefGWConfigured > 1) {
  2813. // Have more than one. Try the next one. First
  2814. // invalidate any RCEs on this G/W.
  2815. if (DefGWActive == 1) {
  2816. // No more active. Revalidate all of them,
  2817. // and try again.
  2818. ValidateDefaultGWs(NULL_IP_ADDR);
  2819. // ASSERT(DefGWActive == DefGWConfigured);
  2820. } else {
  2821. //Make sure that we do not switch all the
  2822. //connections just because of a spurious
  2823. //dead gate way event.
  2824. //switch only when % of number of connections are
  2825. // failed over to the other gateway.
  2826. // if we have already found the next default gateway
  2827. // check if it is time to switch all the connections
  2828. // to it.
  2829. if (RTE->rte_todg) {
  2830. #if DBG
  2831. {
  2832. RouteTableEntry *tmpRTE = NULL;
  2833. tmpRTE = GetDefaultGWs(&tmpRTE);
  2834. while (tmpRTE) {
  2835. if (tmpRTE == RTE->rte_todg) {
  2836. break;
  2837. }
  2838. tmpRTE = tmpRTE->rte_next;
  2839. }
  2840. if (tmpRTE == NULL) {
  2841. DbgBreakPoint();
  2842. }
  2843. }
  2844. #endif
  2845. // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"to todg %lx\n", RTE));
  2846. // If the alternate gateway now has 25%
  2847. // as many as the active gateway
  2848. // and the caller has not requested
  2849. // a switch for this RCE only,
  2850. // invalidate the active gateway and
  2851. // select the alternate as the new default.
  2852. // Try different GW if there is only one RCE.
  2853. // This will help udp sessions.
  2854. //
  2855. if ((RTE->rte_rcelist == RCE &&
  2856. RCE->rce_next == NULL) ||
  2857. (RTE->rte_todg->rte_rces >=
  2858. (RTE->rte_rces >> 2) &&
  2859. !(CheckRouteFlag & CHECK_RCE_ONLY))) {
  2860. //Switch every one.
  2861. // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL," Switching every one %x to %x\n", RTE->rte_todg, RTE));
  2862. --DefGWActive;
  2863. RTE->rte_flags &= ~RTE_VALID;
  2864. UpdateDeadGWState();
  2865. RTE->rte_todg->rte_fromdg = NULL;
  2866. RTE->rte_todg = NULL;
  2867. if (RTE->rte_fromdg) {
  2868. RTE->rte_fromdg->rte_todg = NULL;
  2869. }
  2870. RTE->rte_fromdg = NULL;
  2871. InvalidateRCEChain(RTE);
  2872. //ASSERT(RTE->rte_rces == 0);
  2873. } else {
  2874. //Switch this particular connection to the new one.
  2875. // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL," attaching RCE %x to newrte %x\n", RCE, RTE->rte_todg));
  2876. AttachRCEToNewRTE(RTE->rte_todg, RCE, RTE);
  2877. }
  2878. } else if (RTE->rte_fromdg) {
  2879. // find if there are any other gateways other than
  2880. // fromdg and switch to that.
  2881. // Note that if we have more than 3 default gateways
  2882. // configured, this algorithm does not do a god job
  2883. RouteTableEntry *OldRTE = RTE;
  2884. // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"GW %x goofed. RTEfromdg %x\n",RTE,RTE->rte_fromdg));
  2885. --DefGWActive;
  2886. UpdateDeadGWState();
  2887. // turn on dead gw flag to tell findrte not to consider this rte
  2888. RTE->rte_flags |= RTE_DEADGW;
  2889. RTE->rte_fromdg->rte_flags |= RTE_DEADGW;
  2890. RTE = FindRTE(Dest, Src, 0,
  2891. DEFAULT_ROUTE_PRI,
  2892. DEFAULT_ROUTE_PRI, UnicastIf);
  2893. OldRTE->rte_flags &= ~RTE_DEADGW;
  2894. OldRTE->rte_fromdg->rte_flags &= ~RTE_DEADGW;
  2895. if (RTE == NULL) {
  2896. // No more default gateways! This is bad.
  2897. //ASSERT(FALSE);
  2898. // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"No more def routes!\n"));
  2899. OldRTE->rte_fromdg->rte_todg = NULL;
  2900. OldRTE->rte_fromdg->rte_fromdg = NULL;
  2901. OldRTE->rte_fromdg = NULL;
  2902. OldRTE->rte_todg = NULL;
  2903. ValidateDefaultGWs(NULL_IP_ADDR);
  2904. //ASSERT(DefGWActive == DefGWConfigured);
  2905. } else {
  2906. // we have a third gateway to try!
  2907. // ASSERT(RTE->rte_mask == DEFAULT_MASK);
  2908. //Treat OldRTE as dead!
  2909. // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Trying next def route %x\n",RTE));
  2910. OldRTE->rte_flags &= ~RTE_VALID;
  2911. RTE->rte_fromdg = OldRTE->rte_fromdg;
  2912. RTE->rte_fromdg->rte_todg = RTE;
  2913. if (OldRTE->rte_todg)
  2914. OldRTE->rte_todg->rte_fromdg = NULL;
  2915. OldRTE->rte_todg = NULL;
  2916. OldRTE->rte_fromdg = NULL;
  2917. //Attach all the RCEs to the new one
  2918. AttachRCEToNewRTE(RTE, NULL, OldRTE);
  2919. RTE->rte_valid = Now;
  2920. }
  2921. } else {
  2922. //find the next potential default gateway
  2923. RouteTableEntry *OldRTE = RTE;
  2924. // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Finding potential GW\n" ));
  2925. OldRTE->rte_flags |= RTE_DEADGW;
  2926. RTE = FindRTE(Dest, Src, 0,
  2927. DEFAULT_ROUTE_PRI,
  2928. DEFAULT_ROUTE_PRI, UnicastIf);
  2929. OldRTE->rte_flags &= ~RTE_DEADGW;
  2930. if (RTE == NULL) {
  2931. // No more default gateways! This is bad.
  2932. // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL," ---No more def routes!\n"));
  2933. // ASSERT(FALSE);
  2934. ValidateDefaultGWs(NULL_IP_ADDR);
  2935. //ASSERT(DefGWActive == DefGWConfigured);
  2936. } else {
  2937. ASSERT(RTE->rte_mask == DEFAULT_MASK);
  2938. //remember the new gw until we transition fully
  2939. OldRTE->rte_todg = RTE;
  2940. RTE->rte_fromdg = OldRTE;
  2941. // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"FoundGW %x\n",RTE));
  2942. //Attach this RCE to use the new RTE
  2943. AttachRCEToNewRTE(RTE, RCE, OldRTE);
  2944. RTE->rte_valid = Now;
  2945. }
  2946. }
  2947. }
  2948. }
  2949. }
  2950. }
  2951. }
  2952. }
  2953. CTEFreeLock(&RouteTableLock.Lock, Handle);
  2954. }
  2955. }
  2956. //** FindRCE - Find an RCE on an RTE.
  2957. //
  2958. // A routine to find an RCE that's chained on an RTE. We assume the lock
  2959. // is held on the RTE.
  2960. //
  2961. // Entry: RTE - RTE to search.
  2962. // Dest - Destination address of RTE to find.
  2963. // Src - Source address of RTE to find.
  2964. //
  2965. // Returns: Pointer to RTE found, or NULL.
  2966. //
  2967. RouteCacheEntry *
  2968. FindRCE(RouteTableEntry * RTE, IPAddr Dest, IPAddr Src)
  2969. {
  2970. RouteCacheEntry *CurrentRCE;
  2971. ASSERT(!IP_ADDR_EQUAL(Src, NULL_IP_ADDR));
  2972. for (CurrentRCE = RTE->rte_rcelist; CurrentRCE != NULL;
  2973. CurrentRCE = CurrentRCE->rce_next) {
  2974. if (IP_ADDR_EQUAL(CurrentRCE->rce_dest, Dest) &&
  2975. IP_ADDR_EQUAL(CurrentRCE->rce_src, Src)) {
  2976. break;
  2977. }
  2978. }
  2979. return CurrentRCE;
  2980. }
  2981. //** OpenRCE - Open an RCE for a specific route.
  2982. //
  2983. // Called by the upper layer to open an RCE. We look up the type of the address
  2984. // - if it's invalid, we return 'Destination invalid'. If not, we look up the
  2985. // route, fill in the RCE, and link it on the correct RTE.
  2986. //
  2987. // As an added bonus, this routine will return the local address to use
  2988. // to reach the destination.
  2989. //
  2990. // Entry: Address - Address for which we are to open an RCE.
  2991. // Src - Preferred source address to use.
  2992. // RCE - Pointer to where to return pointer to RCE.
  2993. // Type - Pointer to where to return destination type.
  2994. // MSS - Pointer to where to return MSS for route.
  2995. // OptInfo - Pointer to option information, such as TOS and
  2996. // any source routing info.
  2997. //
  2998. // Returns: Source IP address to use. This will be NULL_IP_ADDR if the
  2999. // specified destination is unreachable for any reason.
  3000. //
  3001. IPAddr
  3002. OpenRCE(IPAddr Address, IPAddr Src, RouteCacheEntry ** RCE, uchar * Type,
  3003. ushort * MSS, IPOptInfo * OptInfo)
  3004. {
  3005. RouteTableEntry *RTE; // Pointer to RTE to put RCE on.
  3006. CTELockHandle TableLock;
  3007. uchar LocalType;
  3008. NetTableEntry *RealNTE = NULL;
  3009. uint ConstrainIF = 0;
  3010. if (!IP_ADDR_EQUAL(OptInfo->ioi_addr, NULL_IP_ADDR))
  3011. Address = OptInfo->ioi_addr;
  3012. CTEGetLock(&RouteTableLock.Lock, &TableLock);
  3013. LocalType = GetAddrType(Address);
  3014. *Type = LocalType;
  3015. // If the specified address isn't invalid, continue.
  3016. if (LocalType != DEST_INVALID) {
  3017. RouteCacheEntry *NewRCE;
  3018. // If he's specified a source address, loop through the NTE table
  3019. // now and make sure it's valid.
  3020. if (!IP_ADDR_EQUAL(Src, NULL_IP_ADDR)) {
  3021. NetTableEntry *NTE;
  3022. NetTableEntry *NetTableList = NewNetTableList[NET_TABLE_HASH(Src)];
  3023. for (NTE = NetTableList; NTE != NULL; NTE = NTE->nte_next)
  3024. if ((NTE->nte_flags & NTE_VALID) &&
  3025. IP_ADDR_EQUAL(Src, NTE->nte_addr))
  3026. break;
  3027. if (NTE == NULL) {
  3028. // Didn't find a match.
  3029. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  3030. return NULL_IP_ADDR;
  3031. }
  3032. // Decide whether to do a strong or weak host lookup
  3033. // No need to do this in case of unidirectional adapter.
  3034. // On unidirectional adapter sends are not permitted.
  3035. // If this openrce is called before setting specific mcast
  3036. // Address (ioi_mcastif) GetIfConstraint for mcast will fail.
  3037. // For W9x backward compatibility reasons, we will let
  3038. // OpenRce succeed even if ioi_mcast if is not set, as an
  3039. // exception in the case of unidirectional adapter. Side effect
  3040. // of this will be - when a send is attempted on this endpoint
  3041. // with this cached rce, it will go out on a random interface.
  3042. //
  3043. if (!(NTE->nte_if->if_flags & IF_FLAGS_UNI)) {
  3044. ConstrainIF = GetIfConstraint(Address, Src, OptInfo, FALSE);
  3045. }
  3046. if ((ConstrainIF != 0) && (ConstrainIF != INVALID_IF_INDEX) &&
  3047. (NTE->nte_if->if_index != ConstrainIF)) {
  3048. //
  3049. // The caller requested a strong host lookup, but passed
  3050. // an address on a different interface as the preferred
  3051. // source address. Since we cannot honor this preference
  3052. // for a strong host lookup, we'll ignore the preferred
  3053. // source address, and just choose one from the outgoing
  3054. // interface.
  3055. //
  3056. Src = NULL_IP_ADDR;
  3057. }
  3058. } else {
  3059. ConstrainIF = GetIfConstraint(Address, Src, OptInfo, FALSE);
  3060. }
  3061. // Find the route for this guy. If we can't find one, return NULL.
  3062. if (IP_LOOPBACK_ADDR(Src)) {
  3063. RTE = LookupRTE(Src, Src, HOST_ROUTE_PRI, ConstrainIF);
  3064. if (RTE) {
  3065. ASSERT(RTE->rte_if == &LoopInterface);
  3066. } else {
  3067. KdPrint(("No Loopback rte!\n"));
  3068. ASSERT(0);
  3069. }
  3070. } else {
  3071. RTE = LookupRTE(Address, Src, HOST_ROUTE_PRI, ConstrainIF);
  3072. }
  3073. if (RTE != (RouteTableEntry *) NULL) {
  3074. CTELockHandle RCEHandle;
  3075. RouteCacheEntry *OldRCE;
  3076. //
  3077. // Make sure interface is not shutting down.
  3078. //
  3079. if (IS_IF_INVALID(RTE->rte_if) && RTE->rte_if->if_ntecount) {
  3080. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  3081. return NULL_IP_ADDR;
  3082. }
  3083. if (OptInfo->ioi_uni) {
  3084. //LookupRTE returns first route n the chain of
  3085. //unnumbered ifs.
  3086. //if this is not the one desired, scan further
  3087. RouteTableEntry *tmpRTE = RTE;
  3088. while (tmpRTE && (tmpRTE->rte_if->if_index != OptInfo->ioi_uni)) {
  3089. tmpRTE = tmpRTE->rte_next;
  3090. }
  3091. if (!tmpRTE) {
  3092. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"OpenRCE:No matching unnumbered interface %d\n", OptInfo->ioi_uni));
  3093. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  3094. return NULL_IP_ADDR;
  3095. } else
  3096. RTE = tmpRTE;
  3097. }
  3098. // We found one.
  3099. // if the route is on a P2MP interface get the mtu from the link associated with the route
  3100. if (RTE->rte_link)
  3101. *MSS = (ushort) MIN(RTE->rte_mtu,RTE->rte_link->link_mtu);
  3102. else
  3103. *MSS = (ushort) RTE->rte_mtu; // Return the route MTU.
  3104. if (IP_LOOPBACK_ADDR(Src) && (RTE->rte_if != &LoopInterface)) {
  3105. // The upper layer is sending from a loopback address, but the
  3106. // destination isn't reachable through the loopback interface.
  3107. // Fail the request.
  3108. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  3109. return NULL_IP_ADDR;
  3110. }
  3111. // We have the RTE. Fill in the RCE, and link it on the RTE.
  3112. if (!IP_ADDR_EQUAL(RTE->rte_addr, IPADDR_LOCAL))
  3113. *Type |= DEST_OFFNET_BIT; // Tell upper layer it's off
  3114. // net.
  3115. //
  3116. // If no source address was specified, then use the best address
  3117. // for the interface. This will generally prevent dynamic NTE's from
  3118. // being chosen as the source for wildcard binds.
  3119. //
  3120. if (IP_ADDR_EQUAL(Src, NULL_IP_ADDR)) {
  3121. if (LocalType == DEST_LOCAL) {
  3122. Src = Address;
  3123. RealNTE = LoopNTE;
  3124. } else {
  3125. NetTableEntry *SrcNTE;
  3126. if ((RTE->rte_if->if_flags & IF_FLAGS_NOIPADDR) && (IP_ADDR_EQUAL(RTE->rte_if->if_nte->nte_addr, NULL_IP_ADDR))) {
  3127. Src = g_ValidAddr;
  3128. if (IP_ADDR_EQUAL(Src, NULL_IP_ADDR)) {
  3129. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  3130. return NULL_IP_ADDR;
  3131. }
  3132. } else {
  3133. // This is routelookup for outgoing packet
  3134. // Check for non-transient address availability
  3135. SrcNTE = BestNTEForIF(
  3136. ADDR_FROM_RTE(RTE, Address),
  3137. RTE->rte_if,
  3138. TRUE
  3139. );
  3140. if (SrcNTE == NULL) {
  3141. // Can't find an address! Fail the request.
  3142. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  3143. return NULL_IP_ADDR;
  3144. }
  3145. Src = SrcNTE->nte_addr;
  3146. }
  3147. }
  3148. }
  3149. // Now, see if an RCE already exists for this.
  3150. if (RCE == NULL) {
  3151. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Openrce with null RCE!! %x\n",Src));
  3152. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  3153. return Src;
  3154. }
  3155. if ((OldRCE = FindRCE(RTE, Address, Src)) == NULL) {
  3156. // Don't have an existing RCE. See if we can get a new one,
  3157. // and fill it in.
  3158. NewRCE = CTEAllocMemNBoot(sizeof(RouteCacheEntry), 'AiCT');
  3159. *RCE = NewRCE;
  3160. if (NewRCE != NULL) {
  3161. RtlZeroMemory(NewRCE, sizeof(RouteCacheEntry));
  3162. NewRCE->rce_src = Src;
  3163. NewRCE->rce_dtype = LocalType;
  3164. NewRCE->rce_cnt = 1;
  3165. CTEInitLock(&NewRCE->rce_lock);
  3166. NewRCE->rce_dest = Address;
  3167. NewRCE->rce_rte = RTE;
  3168. NewRCE->rce_flags = RCE_VALID;
  3169. if (RTE->rte_flags & RTE_IF_VALID) {
  3170. NewRCE->rce_flags |= RCE_CONNECTED;
  3171. //* Update the ref. count for this interface.
  3172. NewRCE->rce_flags |= RCE_REFERENCED;
  3173. LOCKED_REFERENCE_IF(RTE->rte_if);
  3174. // We register the chksum capability of the interface
  3175. // associated with this RCE, because interface definitions
  3176. // are transparent to TCP or UDP.
  3177. if (!IPSecStatus) {
  3178. NewRCE->rce_OffloadFlags = RTE->rte_if->if_OffloadFlags;
  3179. } else {
  3180. NewRCE->rce_OffloadFlags = 0;
  3181. }
  3182. NewRCE->rce_TcpLargeSend.MaxOffLoadSize = RTE->rte_if->if_MaxOffLoadSize;
  3183. NewRCE->rce_TcpLargeSend.MinSegmentCount = RTE->rte_if->if_MaxSegments;
  3184. NewRCE->rce_TcpWindowSize = RTE->rte_if->if_TcpWindowSize;
  3185. NewRCE->rce_TcpInitialRTT = RTE->rte_if->if_TcpInitialRTT;
  3186. NewRCE->rce_TcpDelAckTicks = RTE->rte_if->if_TcpDelAckTicks;
  3187. NewRCE->rce_TcpAckFrequency = RTE->rte_if->if_TcpAckFrequency;
  3188. NewRCE->rce_mediaspeed = RTE->rte_if->if_speed;
  3189. } //RTE_IF_VALID
  3190. NewRCE->rce_next = RTE->rte_rcelist;
  3191. RTE->rte_rcelist = NewRCE;
  3192. RTE->rte_rces++;
  3193. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  3194. return Src;
  3195. } else {
  3196. // alloc failed
  3197. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  3198. return NULL_IP_ADDR;
  3199. }
  3200. } else {
  3201. // We have an existing RCE. We'll return his source as the
  3202. // valid source, bump the reference count, free the locks
  3203. // and return.
  3204. CTEGetLock(&OldRCE->rce_lock, &RCEHandle);
  3205. OldRCE->rce_cnt++;
  3206. *RCE = OldRCE;
  3207. if (OldRCE->rce_newmtu) {
  3208. *MSS = (USHORT) OldRCE->rce_newmtu;
  3209. }
  3210. OldRCE->rce_rte->rte_rces++;
  3211. CTEFreeLock(&OldRCE->rce_lock, RCEHandle);
  3212. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  3213. return Src;
  3214. }
  3215. } else {
  3216. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  3217. return NULL_IP_ADDR;
  3218. }
  3219. }
  3220. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  3221. return NULL_IP_ADDR;
  3222. }
  3223. void
  3224. FreeRCEToList(RouteCacheEntry * RCE)
  3225. /*++
  3226. Routine Description:
  3227. Free RCE to the RCEFreeList (since the use_cnt on it is non zero)
  3228. Called with routetable lock held
  3229. Arguments:
  3230. RCE : RCE to free
  3231. Return Value:
  3232. None
  3233. --*/
  3234. {
  3235. // link this new interface at the front of the list
  3236. RCE->rce_next = RCEFreeList;
  3237. RCEFreeList = RCE;
  3238. return;
  3239. }
  3240. //* CloseRCE - Close an RCE.
  3241. //
  3242. // Called by the upper layer when it wants to close the RCE. We unlink it from
  3243. // the RTE.
  3244. //
  3245. // Entry: RCE - Pointer to the RCE to be closed.
  3246. //
  3247. // Exit: Nothing.
  3248. //
  3249. void
  3250. CloseRCE(RouteCacheEntry * RCE)
  3251. {
  3252. RouteTableEntry *RTE; // Route on which RCE is linked.
  3253. RouteCacheEntry *PrevRCE;
  3254. CTELockHandle TableLock; // Lock handles used.
  3255. Interface *IF;
  3256. Interface *tmpif = NULL;
  3257. uint FreetoRCEFreeList = 0;
  3258. if (RCE != NULL) {
  3259. CTEGetLock(&RouteTableLock.Lock, &TableLock);
  3260. CTEGetLockAtDPC(&RCE->rce_lock);
  3261. if ((RCE->rce_flags & RCE_VALID) && !(RCE->rce_flags & RCE_LINK_DELETED)) {
  3262. RCE->rce_rte->rte_rces--;
  3263. }
  3264. if (--RCE->rce_cnt == 0) {
  3265. // ASSERT(RCE->rce_usecnt == 0);
  3266. ASSERT(*(int *)&(RCE->rce_usecnt) >= 0);
  3267. if ((RCE->rce_flags & RCE_VALID) && !(RCE->rce_flags & RCE_LINK_DELETED)) {
  3268. // The RCE is valid, so we have a valid RTE in the pointer
  3269. // field. Walk down the RTE rcelist, looking for this guy.
  3270. RTE = RCE->rce_rte;
  3271. tmpif = IF = RTE->rte_if;
  3272. PrevRCE = STRUCT_OF(RouteCacheEntry, &RTE->rte_rcelist,
  3273. rce_next);
  3274. // Walk down the list until we find him.
  3275. while (PrevRCE != NULL) {
  3276. if (PrevRCE->rce_next == RCE)
  3277. break;
  3278. PrevRCE = PrevRCE->rce_next;
  3279. }
  3280. ASSERT(PrevRCE != NULL);
  3281. if(PrevRCE) {
  3282. PrevRCE->rce_next = RCE->rce_next;
  3283. }
  3284. } else {
  3285. //Make sure if the interface pointed by RCE
  3286. //is still there
  3287. tmpif = IFList;
  3288. IF = (Interface *) RCE->rce_rte;
  3289. while (tmpif) {
  3290. if (tmpif == IF)
  3291. break;
  3292. tmpif = tmpif->if_next;
  3293. }
  3294. }
  3295. if (tmpif) {
  3296. if (RCE->rce_flags & RCE_CONNECTED) {
  3297. (*(IF->if_invalidate)) (IF->if_lcontext, RCE);
  3298. } else {
  3299. UnConnected++;
  3300. UnConnectedRCE = RCE;
  3301. (*(IF->if_invalidate)) (IF->if_lcontext, RCE);
  3302. }
  3303. if (RCE->rce_usecnt != 0) {
  3304. // free to the free list
  3305. // and check in timer if the usecnt has fallen to 0, if yes free it
  3306. FreetoRCEFreeList = 1;
  3307. } else {
  3308. if (RCE->rce_flags & RCE_REFERENCED) {
  3309. LockedDerefIF(IF);
  3310. }
  3311. }
  3312. CTEFreeLockFromDPC(&RCE->rce_lock);
  3313. if (FreetoRCEFreeList) {
  3314. RCE->rce_rte = (RouteTableEntry *) IF;
  3315. FreeRCEToList(RCE);
  3316. } else {
  3317. CTEFreeMem(RCE);
  3318. }
  3319. } else { //tmpif==NULL
  3320. CTEFreeLockFromDPC(&RCE->rce_lock);
  3321. }
  3322. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  3323. } else {
  3324. CTEFreeLockFromDPC(&RCE->rce_lock);
  3325. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  3326. }
  3327. }
  3328. }
  3329. //* LockedAddRoute - Add a route to the routing table.
  3330. //
  3331. // Called by AddRoute to add a route to the routing table. We assume the
  3332. // route table lock is already held. If the route to be added already exists
  3333. // we update it. Routes are identified by a (Destination, Mask, FirstHop,
  3334. // Interface) tuple. If an exact match exists we'll update the metric, which
  3335. // may cause us to promote RCEs from other RTEs, or we may be demoted in which
  3336. // case we'll invalidate our RCEs and let them be reassigned at transmission
  3337. // time.
  3338. //
  3339. // If we have to create a new RTE we'll do so, and find the best previous
  3340. // RTE, and promote RCEs from that one to the new one.
  3341. //
  3342. // The route table is an open hash structure. Within each hash chain the
  3343. // RTEs with the longest masks (the 'priority') come first, and within
  3344. // each priority the RTEs with the smallest metric come first.
  3345. //
  3346. // Entry: Destination - Destination address for which route is being added.
  3347. // Mask - Mask for destination.
  3348. // FirstHop - First hop for address. Could be IPADDR_LOCAL.
  3349. // OutIF - Pointer to outgoing I/F.
  3350. // MTU - Maximum MTU for this route.
  3351. // Metric - Metric for this route.
  3352. // Proto - Protocol type to store in route.
  3353. // AType - Administrative type of route.
  3354. // Context - context to be associated with the route
  3355. // SetWithRefcnt - indicates the route should be referenced
  3356. // on the creator's behalf.
  3357. // RNO - optionally supplies a route-notification structure
  3358. // to be filled on output with details for the new route
  3359. //
  3360. // Returns: Status of attempt to add route.
  3361. //
  3362. IP_STATUS
  3363. LockedAddRoute(IPAddr Destination, IPMask Mask, IPAddr FirstHop,
  3364. Interface * OutIF, uint MTU, uint Metric, uint Proto, uint AType,
  3365. ROUTE_CONTEXT Context, BOOLEAN SetWithRefcnt,
  3366. IPRouteNotifyOutput* RNO)
  3367. {
  3368. uint RouteType; // SNMP route type.
  3369. RouteTableEntry *NewRTE; // Entries for new and previous RTEs.
  3370. uint OldMetric; // Previous metric in use.
  3371. uint OldPriority; // Priority of previous route to destination.
  3372. uint Now = CTESystemUpTime() / 1000L; // System up time,
  3373. // in seconds.
  3374. ushort OldFlags;
  3375. Interface *OldIF = NULL;
  3376. ULONG status;
  3377. ULONG matchFlags;
  3378. RouteTableEntry *pOldBestRTE;
  3379. RouteTableEntry *pNewBestRTE;
  3380. LinkEntry *Link;
  3381. IPAddr AllSNBCast;
  3382. IPMask TmpMask;
  3383. // OutIF is ref'd so it can't go away
  3384. Link = OutIF->if_link;
  3385. // If Metric is 0, set the metric to interface metric
  3386. if (Metric == 0) {
  3387. Metric = OutIF->if_metric;
  3388. }
  3389. // Do the following only if the interface is not a dummy interface
  3390. if (OutIF != (Interface *) & DummyInterface) {
  3391. // Check we are adding a multicast route
  3392. if (IP_ADDR_EQUAL(Destination, MCAST_DEST) &&
  3393. (OutIF->if_iftype & DONT_ALLOW_MCAST))
  3394. return IP_SUCCESS;
  3395. if (OutIF->if_iftype & DONT_ALLOW_UCAST) {
  3396. // Check whether we are adding a ucast route
  3397. TmpMask = IPNetMask(OutIF->if_nte->nte_addr);
  3398. AllSNBCast =
  3399. (OutIF->if_nte->nte_addr & TmpMask) |
  3400. (OutIF->if_bcast & ~TmpMask);
  3401. if (!(IP_ADDR_EQUAL(Destination, OutIF->if_bcast) ||
  3402. IP_ADDR_EQUAL(Destination, AllSNBCast) ||
  3403. IP_ADDR_EQUAL(Destination, MCAST_DEST))) {
  3404. // this is not a bcast/mcast route: this is a ucast route
  3405. return IP_SUCCESS;
  3406. }
  3407. }
  3408. }
  3409. // First do some consistency checks. Make sure that the Mask and
  3410. // Destination agree.
  3411. if (!IP_ADDR_EQUAL(Destination & Mask, Destination))
  3412. return IP_BAD_DESTINATION;
  3413. if (AType != ATYPE_PERM && AType != ATYPE_OVERRIDE && AType != ATYPE_TEMP)
  3414. return IP_BAD_REQ;
  3415. // If the interface is marked as going away, fail this.
  3416. if (OutIF->if_flags & IF_FLAGS_DELETING) {
  3417. return IP_BAD_REQ;
  3418. }
  3419. RouteType = IP_ADDR_EQUAL(FirstHop, IPADDR_LOCAL) ? IRE_TYPE_DIRECT :
  3420. IRE_TYPE_INDIRECT;
  3421. // If this is a route that is being added on an interface that has no
  3422. // IP address, mark this as IRE_TYPE_DIRECT. This is true only for
  3423. // P2P or P2MP interface, where route is plumbed and then address
  3424. // is added due to a perf reason.
  3425. if (((OutIF->if_flags & IF_FLAGS_P2P) ||
  3426. (OutIF->if_flags & IF_FLAGS_P2MP)) &&
  3427. OutIF->if_nte && (OutIF->if_nte->nte_flags & NTE_VALID) &&
  3428. (IP_ADDR_EQUAL(OutIF->if_nte->nte_addr,NULL_IP_ADDR))) {
  3429. RouteType = IRE_TYPE_DIRECT;
  3430. }
  3431. MTU = MAX(MTU, MIN_VALID_MTU);
  3432. // If the outgoing interface has NTEs attached but none are valid, fail
  3433. // this request unless it's a request to add the broadcast route.
  3434. if (OutIF != (Interface *) & DummyInterface) {
  3435. if (OutIF->if_ntecount == 0 && OutIF->if_nte != NULL &&
  3436. !IP_ADDR_EQUAL(Destination, OutIF->if_bcast) &&
  3437. !(OutIF->if_flags & IF_FLAGS_NOIPADDR)) {
  3438. // This interface has NTEs attached, but none are valid. Fail the
  3439. // request.
  3440. return IP_BAD_REQ;
  3441. }
  3442. }
  3443. if (OutIF->if_flags & IF_FLAGS_P2MP) {
  3444. while (Link) {
  3445. if ((Link->link_NextHop == FirstHop) ||
  3446. ((Link->link_NextHop == Destination) &&
  3447. (FirstHop == IPADDR_LOCAL))) {
  3448. break;
  3449. }
  3450. Link = Link->link_next;
  3451. }
  3452. if (!Link)
  3453. return IP_GENERAL_FAILURE;
  3454. }
  3455. DEBUGMSG(DBG_INFO && DBG_IP && DBG_ROUTE,
  3456. (DTEXT("LockedAddRoute: D = %08x, M = %08x, NH = %08x, IF = %08x\n")
  3457. DTEXT("\t\tMTU = %x, Met = %08x, Prot = %08x, AT = %08x, C = %08x\n"),
  3458. Destination, Mask, FirstHop, OutIF, MTU, Metric, Proto, AType,
  3459. Context));
  3460. // Insert the route in the proper place depending on the dest, metric
  3461. // Match next-hop (and interface if not a demand-dial route)
  3462. matchFlags = MATCH_NHOP;
  3463. if (!Context) {
  3464. matchFlags |= MATCH_INTF;
  3465. }
  3466. status = InsRoute(Destination, Mask, FirstHop, OutIF, Metric,
  3467. matchFlags, &NewRTE, &pOldBestRTE, &pNewBestRTE);
  3468. if (status != IP_SUCCESS) {
  3469. return status;
  3470. }
  3471. // Has a best route been replaced
  3472. if ((pOldBestRTE) && (pOldBestRTE != pNewBestRTE)) {
  3473. InvalidateRCEChain(pOldBestRTE);
  3474. // If the replaced route is a default gateway,
  3475. // we may need to switch connections to the new entry.
  3476. // To do so, we retrieve the current default gateway,
  3477. // invalidate all its RCEs, and revalidate all gateways
  3478. // to restart the dead-gateway detection procedure.
  3479. if (pOldBestRTE->rte_mask == DEFAULT_MASK) {
  3480. ValidateDefaultGWs(NULL_IP_ADDR);
  3481. }
  3482. }
  3483. // Copy old route's parameters now
  3484. OldFlags = NewRTE->rte_flags;
  3485. if (!(NewRTE->rte_flags & RTE_NEW)) {
  3486. OldMetric = NewRTE->rte_metric;
  3487. OldPriority = NewRTE->rte_priority;
  3488. OldIF = NewRTE->rte_if;
  3489. if (Metric >= OldMetric && (OldFlags & RTE_VALID)) {
  3490. InvalidateRCEChain(NewRTE);
  3491. }
  3492. if (SetWithRefcnt) {
  3493. ASSERT(NewRTE->rte_refcnt > 0);
  3494. NewRTE->rte_refcnt++;
  3495. }
  3496. } else {
  3497. // this is a new RTE
  3498. NewRTE->rte_refcnt = 1;
  3499. }
  3500. // If this is P2MP, chain this RTE on link
  3501. if (Link && (NewRTE->rte_link == NULL)) {
  3502. //
  3503. // This RTE is not on the link
  3504. // Insert the route in the linkrte chain
  3505. //
  3506. NewRTE->rte_nextlinkrte = Link->link_rte;
  3507. Link->link_rte = NewRTE;
  3508. NewRTE->rte_link = Link;
  3509. }
  3510. // Update fields in the new/old route
  3511. NewRTE->rte_addr = FirstHop;
  3512. NewRTE->rte_mtu = MTU;
  3513. NewRTE->rte_metric = Metric;
  3514. NewRTE->rte_type = (ushort) RouteType;
  3515. NewRTE->rte_if = OutIF;
  3516. NewRTE->rte_flags &= ~RTE_NEW;
  3517. NewRTE->rte_flags |= RTE_VALID;
  3518. NewRTE->rte_flags &= ~RTE_INCREASE;
  3519. if (OutIF != (Interface *) & DummyInterface) {
  3520. NewRTE->rte_flags |= RTE_IF_VALID;
  3521. SortRoutesInDestByRTE(NewRTE);
  3522. } else
  3523. NewRTE->rte_flags &= ~RTE_IF_VALID;
  3524. NewRTE->rte_admintype = AType;
  3525. NewRTE->rte_proto = Proto;
  3526. NewRTE->rte_valid = Now;
  3527. NewRTE->rte_mtuchange = Now;
  3528. NewRTE->rte_context = Context;
  3529. // Check if this is a new route or an old one
  3530. if (OldFlags & RTE_NEW) {
  3531. // Reset few fields in new route
  3532. NewRTE->rte_todg = NULL;
  3533. NewRTE->rte_fromdg = NULL;
  3534. NewRTE->rte_rces = 0;
  3535. RtlZeroMemory(NewRTE->rte_arpcontext, sizeof(RCE_CONTEXT_SIZE));
  3536. IPSInfo.ipsi_numroutes++;
  3537. if (NewRTE->rte_mask == DEFAULT_MASK) {
  3538. // A default route.
  3539. DefGWConfigured++;
  3540. DefGWActive++;
  3541. UpdateDeadGWState();
  3542. }
  3543. } else {
  3544. // If the RTE is for a default gateway and the old flags indicate
  3545. // he wasn't valid then we're essentially creating a new active
  3546. // default gateway. So bump up the active default gateway count.
  3547. if (NewRTE->rte_mask == DEFAULT_MASK) {
  3548. if (!(OldFlags & RTE_VALID)) {
  3549. DefGWActive++;
  3550. UpdateDeadGWState();
  3551. // Reset few fields in this route
  3552. NewRTE->rte_todg = NULL;
  3553. NewRTE->rte_fromdg = NULL;
  3554. NewRTE->rte_rces = 0;
  3555. }
  3556. }
  3557. }
  3558. // If a route-notification structure was supplied, fill it in.
  3559. if (RNO) {
  3560. RNO->irno_dest = NewRTE->rte_dest;
  3561. RNO->irno_mask = NewRTE->rte_mask;
  3562. RNO->irno_nexthop = GetNextHopForRTE(NewRTE);
  3563. RNO->irno_proto = NewRTE->rte_proto;
  3564. RNO->irno_ifindex = OutIF->if_index;
  3565. RNO->irno_metric = NewRTE->rte_metric;
  3566. if (OldFlags & RTE_NEW) {
  3567. RNO->irno_flags = IRNO_FLAG_ADD;
  3568. }
  3569. }
  3570. return IP_SUCCESS;
  3571. }
  3572. //* AddRoute - Add a route to the routing table.
  3573. //
  3574. // This is just a shell for the real add route routine. All we do is take
  3575. // the route table lock, and call the LockedAddRoute routine to deal with
  3576. // the request. This is done this way because there are certain routines that
  3577. // need to be able to atomically examine and add routes.
  3578. //
  3579. // Entry: Destination - Destination address for which route is being
  3580. // added.
  3581. // Mask - Mask for destination.
  3582. // FirstHop - First hop for address. Could be IPADDR_LOCAL.
  3583. // OutIF - Pointer to outgoing I/F.
  3584. // MTU - Maximum MTU for this route.
  3585. // Metric - Metric for this route.
  3586. // Proto - Protocol type to store in route.
  3587. // AType - Administrative type of route.
  3588. // Context - Context for this route.
  3589. //
  3590. // Returns: Status of attempt to add route.
  3591. //
  3592. IP_STATUS
  3593. AddRoute(IPAddr Destination, IPMask Mask, IPAddr FirstHop,
  3594. Interface * OutIF, uint MTU, uint Metric, uint Proto, uint AType,
  3595. ROUTE_CONTEXT Context, uint Flags)
  3596. {
  3597. CTELockHandle TableHandle;
  3598. IP_STATUS Status;
  3599. BOOLEAN SkipExNotifyQ = FALSE;
  3600. IPRouteNotifyOutput RNO = {0};
  3601. if ((Flags & RT_EXCLUDE_LOCAL) && Proto == IRE_PROTO_LOCAL) {
  3602. return IP_BAD_REQ;
  3603. }
  3604. CTEGetLock(&RouteTableLock.Lock, &TableHandle);
  3605. if (Flags & RT_NO_NOTIFY) {
  3606. SkipExNotifyQ = TRUE;
  3607. }
  3608. Status = LockedAddRoute(Destination, Mask, FirstHop, OutIF, MTU, Metric,
  3609. Proto, AType, Context,
  3610. (BOOLEAN)((Flags & RT_REFCOUNT) ? TRUE : FALSE),
  3611. &RNO);
  3612. if (Status == IP_SUCCESS) {
  3613. CTEFreeLock(&RouteTableLock.Lock, TableHandle);
  3614. #if FFP_SUPPORT
  3615. FFPFlushRequired = TRUE;
  3616. #endif
  3617. // Under certain conditions, LockedAddRoute returns IP_SUCCESS
  3618. // even though no route was added. We catch such cases by examining
  3619. // the interface index on output which, for true additions, should
  3620. // always be non-zero.
  3621. if (RNO.irno_ifindex) {
  3622. if (!SkipExNotifyQ) {
  3623. RtChangeNotifyEx(&RNO);
  3624. }
  3625. RtChangeNotify(&RNO);
  3626. }
  3627. } else {
  3628. CTEFreeLock(&RouteTableLock.Lock, TableHandle);
  3629. }
  3630. return Status;
  3631. }
  3632. //* RtChangeNotify - Supply a route-change for notification to any clients
  3633. //
  3634. // This routine is a shell around the address-/route-change notification
  3635. // handler. It unpacks information about the changed route, and passes it
  3636. // to the common handler specifying the route-change notification queue
  3637. // as the source for pending client-requests.
  3638. //
  3639. // Entry: RNO - describes the route-notification event
  3640. //
  3641. // Returns: nothing.
  3642. //
  3643. void
  3644. RtChangeNotify(IPRouteNotifyOutput *RNO)
  3645. {
  3646. ChangeNotify((IPNotifyOutput *)RNO, &RtChangeNotifyQueue,
  3647. &RouteTableLock.Lock);
  3648. }
  3649. //* RtChangeNotifyEx - Supply a route-change for notification to any clients
  3650. //
  3651. // This routine is a shell around the address-/route-change notification
  3652. // handler. It unpacks information about the changed route, and passes it
  3653. // to the common handler specifying the extended route-change notification
  3654. // queue as the source for pending client-requests.
  3655. //
  3656. // Entry: RNO - describes the route-notification event
  3657. //
  3658. // Returns: nothing.
  3659. //
  3660. void
  3661. RtChangeNotifyEx(IPRouteNotifyOutput *RNO)
  3662. {
  3663. ChangeNotify((IPNotifyOutput *)RNO, &RtChangeNotifyQueueEx,
  3664. &RouteTableLock.Lock);
  3665. }
  3666. //* ChangeNotifyAsync - Supply a change for notification
  3667. //
  3668. // This routine is a handler for a deferred change-notification. It unpacks
  3669. // information about the change, and passes it to the common handler.
  3670. //
  3671. // Entry: Event - CTEEvent for the deferred call
  3672. // Context - context containing information about the change
  3673. //
  3674. // Returns: nothing.
  3675. //
  3676. void
  3677. ChangeNotifyAsync(CTEEvent *Event, PVOID Context)
  3678. {
  3679. ChangeNotifyEvent *CNE = (ChangeNotifyEvent *)Context;
  3680. UNREFERENCED_PARAMETER(Event);
  3681. ChangeNotify(&CNE->cne_info, CNE->cne_queue, CNE->cne_lock);
  3682. CTEFreeMem(Context);
  3683. }
  3684. //* ChangeNotifyClientInQueue - See if a client is in a notification queue
  3685. //
  3686. // This is a utility routine called by ChangeNotify to determine
  3687. // if a given client, identified by a file object, has a request
  3688. // in a given notification queue.
  3689. //
  3690. // Entry: FileObject - identifies the client
  3691. // NotifyQueue - contains a list of requests to be searched
  3692. //
  3693. // Returns: TRUE if the client is present, FALSE otherwise.
  3694. //
  3695. BOOLEAN
  3696. ChangeNotifyClientInQueue(PFILE_OBJECT FileObject, PLIST_ENTRY NotifyQueue)
  3697. {
  3698. PLIST_ENTRY ListEntry;
  3699. PIRP Irp;
  3700. PIO_STACK_LOCATION IrpSp;
  3701. for (ListEntry = NotifyQueue->Flink; ListEntry != NotifyQueue;
  3702. ListEntry = ListEntry->Flink) {
  3703. Irp = CONTAINING_RECORD(ListEntry, IRP, Tail.Overlay.ListEntry);
  3704. IrpSp = IoGetCurrentIrpStackLocation(Irp);
  3705. if (FileObject == IrpSp->FileObject) {
  3706. return TRUE;
  3707. }
  3708. }
  3709. return FALSE;
  3710. }
  3711. //* ChangeNotify - Notify about a route change
  3712. //
  3713. // This routine is the common handler for change notifications.
  3714. // It takes a description of a change, and searches the specified queue
  3715. // for a pending client-request that corresponds to the changed item.
  3716. //
  3717. // Entry: NotifyOutput - contains information about the change event
  3718. // NotifyQueue - supplies the queue in which to search for clients
  3719. // Lock - supplies the lock protecting 'NotifyQueue'.
  3720. //
  3721. // Returns: nothing.
  3722. //
  3723. void
  3724. ChangeNotify(IPNotifyOutput* NotifyOutput, PLIST_ENTRY NotifyQueue, PVOID Lock)
  3725. {
  3726. IPAddr Add = NotifyOutput->ino_addr;
  3727. IPMask Mask = NotifyOutput->ino_mask;
  3728. PIRP Irp;
  3729. CTELockHandle LockHandle;
  3730. PLIST_ENTRY ListEntry;
  3731. PIPNotifyData NotifyData;
  3732. LIST_ENTRY LocalNotifyQueue;
  3733. PIO_STACK_LOCATION IrpSp;
  3734. BOOLEAN synchronizeWithCancelRoutine = FALSE;
  3735. // See if we're being invoked it dispatch IRQL and, if so,
  3736. // defer the notification to a worker thread.
  3737. //
  3738. // N.B. We do this *without* touching 'Lock' which might already
  3739. // be held by the caller.
  3740. if (KeGetCurrentIrql() >= DISPATCH_LEVEL) {
  3741. ChangeNotifyEvent *CNE;
  3742. CNE = CTEAllocMemNBoot(sizeof(ChangeNotifyEvent), 'xiCT');
  3743. if (CNE) {
  3744. CNE->cne_info = *NotifyOutput;
  3745. CNE->cne_queue = NotifyQueue;
  3746. CNE->cne_lock = Lock;
  3747. CTEInitEvent(&CNE->cne_event, ChangeNotifyAsync);
  3748. CTEScheduleDelayedEvent(&CNE->cne_event, CNE);
  3749. }
  3750. return;
  3751. }
  3752. // Examine the list of pending change-notification requeusts
  3753. // to see if any of them match the parameters of the current event.
  3754. InitializeListHead(&LocalNotifyQueue);
  3755. CTEGetLock(Lock, &LockHandle);
  3756. for (ListEntry = NotifyQueue->Flink; ListEntry != NotifyQueue; ) {
  3757. Irp = CONTAINING_RECORD(ListEntry, IRP, Tail.Overlay.ListEntry);
  3758. IrpSp = IoGetCurrentIrpStackLocation(Irp);
  3759. // Determine whether an input buffer was supplied and, if so,
  3760. // pick it up to see if the event matches the notification request.
  3761. if (IrpSp->Parameters.DeviceIoControl.InputBufferLength >=
  3762. sizeof(IPNotifyData)) {
  3763. NotifyData = Irp->AssociatedIrp.SystemBuffer;
  3764. } else {
  3765. NotifyData = NULL;
  3766. }
  3767. // Now determine whether we should consider this IRP at all.
  3768. // We'll normally complete all matching IRPs when an event occurs,
  3769. // but certain clients want only one matching IRP to be completed,
  3770. // so they can maintain a backlog of IRPs to make sure that they don't
  3771. // miss any events. Such clients set 'Synchronization' as the version
  3772. // in their requests.
  3773. if (NotifyData &&
  3774. NotifyData->Version == IPNotifySynchronization &&
  3775. ChangeNotifyClientInQueue(IrpSp->FileObject, &LocalNotifyQueue)) {
  3776. ListEntry = ListEntry->Flink;
  3777. continue;
  3778. }
  3779. // If no data was passed or it contains NULL address or an Address that
  3780. // matches the address that was added or deleted, complete the irp
  3781. if ((NotifyData == NULL) ||
  3782. (NotifyData->Add == 0) ||
  3783. ((NotifyData->Add & Mask) == (Add & Mask))) {
  3784. //
  3785. // We are going to remove the LE, so first save the Flink
  3786. //
  3787. ListEntry = ListEntry->Flink;
  3788. RemoveEntryList(&Irp->Tail.Overlay.ListEntry);
  3789. if (IoSetCancelRoutine(Irp, NULL) == NULL) {
  3790. synchronizeWithCancelRoutine = TRUE;
  3791. }
  3792. #if !MILLEN
  3793. if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength >=
  3794. sizeof(IPNotifyOutput)) {
  3795. RtlCopyMemory(Irp->AssociatedIrp.SystemBuffer, NotifyOutput,
  3796. sizeof(IPNotifyOutput));
  3797. Irp->IoStatus.Information = sizeof(IPNotifyOutput);
  3798. } else {
  3799. Irp->IoStatus.Information = 0;
  3800. }
  3801. #else // !MILLEN
  3802. // For Millennium, this is only called for RtChange queues now.
  3803. //
  3804. ASSERT(NotifyQueue == &RtChangeNotifyQueue);
  3805. if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength >=
  3806. sizeof(IP_RTCHANGE_NOTIFY)) {
  3807. PIP_RTCHANGE_NOTIFY pReply = Irp->AssociatedIrp.SystemBuffer;
  3808. pReply->Addr = Add;
  3809. pReply->Mask = Mask;
  3810. Irp->IoStatus.Information = sizeof(IP_RTCHANGE_NOTIFY);
  3811. } else {
  3812. Irp->IoStatus.Information = 0;
  3813. }
  3814. #endif // MILLEN
  3815. InsertTailList(&LocalNotifyQueue, &Irp->Tail.Overlay.ListEntry);
  3816. } else {
  3817. ListEntry = ListEntry->Flink;
  3818. }
  3819. }
  3820. CTEFreeLock(Lock, LockHandle);
  3821. if (!IsListEmpty(&LocalNotifyQueue)) {
  3822. if (synchronizeWithCancelRoutine) {
  3823. IoAcquireCancelSpinLock(&LockHandle);
  3824. IoReleaseCancelSpinLock(LockHandle);
  3825. }
  3826. do {
  3827. ListEntry = RemoveHeadList(&LocalNotifyQueue);
  3828. Irp = CONTAINING_RECORD(ListEntry, IRP, Tail.Overlay.ListEntry);
  3829. Irp->IoStatus.Status = STATUS_SUCCESS;
  3830. IoCompleteRequest(Irp, IO_NETWORK_INCREMENT);
  3831. } while (!IsListEmpty(&LocalNotifyQueue));
  3832. }
  3833. }
  3834. //* RtChangeNotifyCancel - cancels a route-change notification request.
  3835. //
  3836. // This routine is a wrapper around the common request-cancelation handler
  3837. // for change-notification requests.
  3838. //
  3839. // Returns: nothing.
  3840. //
  3841. void
  3842. RtChangeNotifyCancel(PDEVICE_OBJECT DeviceObject, PIRP Irp)
  3843. {
  3844. UNREFERENCED_PARAMETER(DeviceObject);
  3845. CancelNotify(Irp, &RtChangeNotifyQueue, &RouteTableLock.Lock);
  3846. }
  3847. //* RtChangeNotifyCancelEx - cancels a route-change notification request.
  3848. //
  3849. // This routine is a wrapper around the common request-cancelation handler
  3850. // for change-notification requests.
  3851. //
  3852. // Returns: nothing.
  3853. //
  3854. void
  3855. RtChangeNotifyCancelEx(PDEVICE_OBJECT DeviceObject, PIRP Irp)
  3856. {
  3857. UNREFERENCED_PARAMETER(DeviceObject);
  3858. CancelNotify(Irp, &RtChangeNotifyQueueEx, &RouteTableLock.Lock);
  3859. }
  3860. //* CancelNotify - cancels a change-notification request.
  3861. //
  3862. // This routine is the common handler for cancelation of change-notification
  3863. // requests. It searches for the given request in the qiven queue and,
  3864. // if found, completes it immediately with a cancelation status.
  3865. //
  3866. // It is invoked with the I/O cancel spin-lock held by the caller,
  3867. // and frees the cancel spin-lock before returning.
  3868. //
  3869. // Entry: Irp - the I/O request packet for the request
  3870. // NotifyQueue - change-notification queue containing the request
  3871. // Lock - lock protecting 'NotifyQueue'.
  3872. //
  3873. // Returns: nothing.
  3874. //
  3875. void
  3876. CancelNotify(PIRP Irp, PLIST_ENTRY NotifyQueue, PVOID Lock)
  3877. {
  3878. CTELockHandle LockHandle;
  3879. PLIST_ENTRY ListEntry;
  3880. BOOLEAN Found = FALSE;
  3881. CTEGetLock(Lock, &LockHandle);
  3882. for (ListEntry = NotifyQueue->Flink; ListEntry != NotifyQueue;
  3883. ListEntry = ListEntry->Flink) {
  3884. if (CONTAINING_RECORD(ListEntry, IRP, Tail.Overlay.ListEntry) == Irp) {
  3885. RemoveEntryList(&Irp->Tail.Overlay.ListEntry);
  3886. Found = TRUE;
  3887. break;
  3888. }
  3889. }
  3890. CTEFreeLock(Lock, LockHandle);
  3891. IoReleaseCancelSpinLock(Irp->CancelIrql);
  3892. if (Found) {
  3893. Irp->IoStatus.Information = 0;
  3894. Irp->IoStatus.Status = STATUS_CANCELLED;
  3895. IoCompleteRequest(Irp, IO_NETWORK_INCREMENT);
  3896. }
  3897. }
  3898. //* CancelNotifyByContext - cancels a change-notification request.
  3899. //
  3900. // This routine handles user-initiated cancellation of change-notification
  3901. // requests. It searches for a request with the given context in the
  3902. // given queue and, if found, completes it with a cancellation status.
  3903. //
  3904. // It is invoked with the I/O cancel spin-lock held by the caller and,
  3905. // if the request is found, it frees the cancel spin-lock before returning.
  3906. //
  3907. // Entry: FileObject - the file-object on which the user-initiated
  3908. // cancellation was received.
  3909. // Context - the I/O request packet for the request
  3910. // NotifyQueue - change-notification queue containing the request
  3911. // Lock - lock protecting 'NotifyQueue'.
  3912. //
  3913. // Returns: TRUE if the request was found, FALSE otherwise.
  3914. //
  3915. BOOLEAN
  3916. CancelNotifyByContext(PFILE_OBJECT FileObject, PVOID ApcContext,
  3917. PLIST_ENTRY NotifyQueue, PVOID Lock)
  3918. {
  3919. PIRP Irp;
  3920. PLIST_ENTRY ListEntry;
  3921. CTEGetLockAtDPC(Lock);
  3922. for (ListEntry = NotifyQueue->Flink; ListEntry != NotifyQueue;
  3923. ListEntry = ListEntry->Flink) {
  3924. Irp = CONTAINING_RECORD(ListEntry, IRP, Tail.Overlay.ListEntry);
  3925. if (Irp->Tail.Overlay.DriverContext[0] == FileObject &&
  3926. Irp->Overlay.AsynchronousParameters.UserApcContext == ApcContext) {
  3927. RemoveEntryList(&Irp->Tail.Overlay.ListEntry);
  3928. IoSetCancelRoutine(Irp, NULL);
  3929. CTEFreeLockFromDPC(Lock);
  3930. IoReleaseCancelSpinLock(DISPATCH_LEVEL);
  3931. Irp->IoStatus.Information = 0;
  3932. Irp->IoStatus.Status = STATUS_CANCELLED;
  3933. IoCompleteRequest(Irp, IO_NETWORK_INCREMENT);
  3934. return TRUE;
  3935. }
  3936. }
  3937. CTEFreeLockFromDPC(Lock);
  3938. return FALSE;
  3939. }
  3940. //* DeleteRoute - Delete a route from the routing table.
  3941. //
  3942. // Called by upper layer or management code to delete a route from the routing
  3943. // table. If we can't find the route we return an error. If we do find it, we
  3944. // remove it, and invalidate any RCEs associated with it. These RCEs will be
  3945. // reassigned the next time they're used. A route is uniquely identified by
  3946. // a (Destination, Mask, FirstHop, Interface) tuple.
  3947. //
  3948. // Entry: Destination - Destination address for which route is being
  3949. // deleted.
  3950. // Mask - Mask for destination.
  3951. // FirstHop - First hop on way to Destination.
  3952. // -1 means route is local.
  3953. // OutIF - Outgoing interface for route.
  3954. // Flags - selects various semantics for deletion.
  3955. //
  3956. // Returns: Status of attempt to delete route.
  3957. //
  3958. IP_STATUS
  3959. DeleteRoute(IPAddr Destination, IPMask Mask, IPAddr FirstHop,
  3960. Interface * OutIF, uint Flags)
  3961. {
  3962. RouteTableEntry *RTE; // RTE being deleted.
  3963. CTELockHandle TableLock; // Lock handle for table.
  3964. UINT retval;
  3965. RouteTableEntry *pOldBestRTE;
  3966. RouteTableEntry *pNewBestRTE;
  3967. BOOLEAN DeleteDone = FALSE;
  3968. IPRouteNotifyOutput RNO = {0};
  3969. uint MatchFlags = MATCH_FULL;
  3970. // Look up the route by calling FindSpecificRTE. If we can't find it,
  3971. // fail the call.
  3972. CTEGetLock(&RouteTableLock.Lock, &TableLock);
  3973. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  3974. "DeleteRoute: D = %08x, M = %08x, NH = %08x, IF = %08x\n",
  3975. Destination, Mask, FirstHop, OutIF));
  3976. if (Flags & RT_EXCLUDE_LOCAL) {
  3977. MatchFlags |= MATCH_EXCLUDE_LOCAL;
  3978. }
  3979. if (Flags & RT_REFCOUNT) {
  3980. RouteTableEntry *TempRTE;
  3981. RTE = FindSpecificRTE(Destination, Mask, FirstHop, OutIF, &TempRTE,
  3982. FALSE);
  3983. if (RTE) {
  3984. ASSERT(RTE->rte_refcnt > 0);
  3985. RTE->rte_refcnt--;
  3986. if (!RTE->rte_refcnt) {
  3987. retval = DelRoute(Destination, Mask, FirstHop, OutIF,
  3988. MatchFlags, &RTE, &pOldBestRTE, &pNewBestRTE);
  3989. } else {
  3990. retval = IP_SUCCESS;
  3991. }
  3992. } else {
  3993. retval = IP_BAD_ROUTE;
  3994. }
  3995. } else {
  3996. retval = DelRoute(Destination, Mask, FirstHop, OutIF, MatchFlags,
  3997. &RTE, &pOldBestRTE, &pNewBestRTE);
  3998. }
  3999. if (retval == IP_SUCCESS) {
  4000. if (!((Flags & RT_REFCOUNT) && RTE->rte_refcnt)) {
  4001. RNO.irno_dest = RTE->rte_dest;
  4002. RNO.irno_mask = RTE->rte_mask;
  4003. RNO.irno_nexthop = GetNextHopForRTE(RTE);
  4004. RNO.irno_proto = RTE->rte_proto;
  4005. RNO.irno_ifindex = OutIF->if_index;
  4006. RNO.irno_metric = RTE->rte_metric;
  4007. RNO.irno_flags = IRNO_FLAG_DELETE;
  4008. DeleteDone = TRUE;
  4009. CleanupP2MP_RTE(RTE);
  4010. CleanupRTE(RTE);
  4011. }
  4012. }
  4013. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  4014. #if FFP_SUPPORT
  4015. FFPFlushRequired = TRUE;
  4016. #endif
  4017. if (DeleteDone) {
  4018. if (!(Flags & RT_NO_NOTIFY)) {
  4019. RtChangeNotifyEx(&RNO);
  4020. }
  4021. RtChangeNotify(&RNO);
  4022. }
  4023. return retval;
  4024. }
  4025. //* DeleteRouteWithNoLock - utility routine called by DeleteDest
  4026. //
  4027. // Called to remove a single route for a given destination.
  4028. // It's assumed that this routine is called with the routing table lock held,
  4029. // and that it doesn't release the route-table-lock as part of its operation.
  4030. //
  4031. // Entry: IRE - describes the entry to be deleted
  4032. // DeletedRTE - contains a pointer to the deleted entry on output
  4033. // Flags - selects various semantics for deletion.
  4034. //
  4035. // Returns: IP_SUCCESS if the entry to be deleted was found
  4036. //
  4037. IP_STATUS
  4038. DeleteRouteWithNoLock(IPRouteEntry * IRE, RouteTableEntry **DeletedRTE,
  4039. uint Flags)
  4040. {
  4041. NetTableEntry *OutNTE, *LocalNTE, *TempNTE;
  4042. IPAddr FirstHop, Dest, NextHop;
  4043. uint MTU;
  4044. Interface *OutIF;
  4045. uint Status;
  4046. uint i;
  4047. RouteTableEntry *RTE, *RTE1, *RTE2;
  4048. IPRouteNotifyOutput RNO = {0};
  4049. uint MatchFlags = MATCH_FULL;
  4050. *DeletedRTE = NULL;
  4051. OutNTE = NULL;
  4052. LocalNTE = NULL;
  4053. Dest = IRE->ire_dest;
  4054. NextHop = IRE->ire_nexthop;
  4055. // Make sure that the nexthop is sensible. We don't allow nexthops
  4056. // to be broadcast or invalid or loopback addresses.
  4057. if (IP_LOOPBACK(NextHop) || CLASSD_ADDR(NextHop) || CLASSE_ADDR(NextHop))
  4058. return IP_BAD_REQ;
  4059. // Also make sure that the destination we're routing to is sensible.
  4060. // Don't allow routes to be added to Class D or E or loopback
  4061. // addresses.
  4062. if (IP_LOOPBACK(Dest) || CLASSD_ADDR(Dest) || CLASSE_ADDR(Dest))
  4063. return IP_BAD_REQ;
  4064. if (IRE->ire_index == LoopIndex)
  4065. return IP_BAD_REQ;
  4066. if (IRE->ire_index != INVALID_IF_INDEX) {
  4067. // First thing to do is to find the outgoing NTE for specified
  4068. // interface, and also make sure that it matches the destination
  4069. // if the destination is one of my addresses.
  4070. for (i = 0; i < NET_TABLE_SIZE; i++) {
  4071. NetTableEntry *NetTableList = NewNetTableList[i];
  4072. for (TempNTE = NetTableList; TempNTE != NULL;
  4073. TempNTE = TempNTE->nte_next) {
  4074. if ((OutNTE == NULL) && (TempNTE->nte_flags & NTE_VALID) && (IRE->ire_index == TempNTE->nte_if->if_index))
  4075. OutNTE = TempNTE;
  4076. if (!IP_ADDR_EQUAL(NextHop, NULL_IP_ADDR) &&
  4077. IP_ADDR_EQUAL(NextHop, TempNTE->nte_addr) &&
  4078. (TempNTE->nte_flags & NTE_VALID))
  4079. LocalNTE = TempNTE;
  4080. // Don't let a route be set through a broadcast address.
  4081. if (IsBCastOnNTE(NextHop, TempNTE) != DEST_LOCAL)
  4082. return (IP_STATUS) STATUS_INVALID_PARAMETER;
  4083. // Don't let a route to a broadcast address be added or deleted.
  4084. if (IsBCastOnNTE(Dest, TempNTE) != DEST_LOCAL)
  4085. return IP_BAD_REQ;
  4086. }
  4087. }
  4088. // At this point OutNTE points to the outgoing NTE, and LocalNTE
  4089. // points to the NTE for the local address, if this is a direct route.
  4090. // Make sure they point to the same interface, and that the type is
  4091. // reasonable.
  4092. if (OutNTE == NULL)
  4093. return IP_BAD_REQ;
  4094. if (LocalNTE != NULL) {
  4095. // He's routing straight out a local interface. The interface for
  4096. // the local address must match the interface passed in, and the
  4097. // type must be DIRECT (if we're adding) or INVALID (if we're
  4098. // deleting).
  4099. if (LocalNTE->nte_if->if_index != IRE->ire_index)
  4100. return IP_BAD_REQ;
  4101. if (IRE->ire_type != IRE_TYPE_DIRECT &&
  4102. IRE->ire_type != IRE_TYPE_INVALID)
  4103. return IP_BAD_REQ;
  4104. OutNTE = LocalNTE;
  4105. }
  4106. // Figure out what the first hop should be. If he's routing straight
  4107. // through a local interface, or the next hop is equal to the
  4108. // destination, then the first hop is IPADDR_LOCAL. Otherwise it's the
  4109. // address of the gateway.
  4110. if ((LocalNTE != NULL) || IP_ADDR_EQUAL(NextHop, NULL_IP_ADDR))
  4111. FirstHop = IPADDR_LOCAL;
  4112. else if (IP_ADDR_EQUAL(Dest, NextHop))
  4113. FirstHop = IPADDR_LOCAL;
  4114. else
  4115. FirstHop = NextHop;
  4116. MTU = OutNTE->nte_mss;
  4117. OutIF = OutNTE->nte_if;
  4118. if (IP_ADDR_EQUAL(NextHop, NULL_IP_ADDR)) {
  4119. if (!(OutIF->if_flags & IF_FLAGS_P2P)) {
  4120. return IP_BAD_REQ;
  4121. }
  4122. }
  4123. } else {
  4124. OutIF = (Interface *) & DummyInterface;
  4125. MTU = DummyInterface.ri_if.if_mtu - sizeof(IPHeader);
  4126. if (IP_ADDR_EQUAL(Dest, NextHop))
  4127. FirstHop = IPADDR_LOCAL;
  4128. else
  4129. FirstHop = NextHop;
  4130. }
  4131. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Calling DelRoute On :\n"));
  4132. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"\tDest = %p\n", Dest));
  4133. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  4134. "\tMask = %p\n", IRE->ire_mask));
  4135. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"\tIntf = %p\n", OutIF));
  4136. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"\tNhop = %p\n\n", FirstHop));
  4137. if (Flags & RT_EXCLUDE_LOCAL) {
  4138. MatchFlags |= MATCH_EXCLUDE_LOCAL;
  4139. }
  4140. Status = DelRoute(Dest, IRE->ire_mask, FirstHop, OutIF, MatchFlags,
  4141. &RTE, &RTE1, &RTE2);
  4142. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Status = %08x\n", Status));
  4143. if (Status == IP_SUCCESS) {
  4144. // Queue a route-change notification for the destination-removal.
  4145. //
  4146. // N.B. We are being called with the route-table-lock held;
  4147. // this means we're at DISPATCH_LEVEL, and so the call below
  4148. // to RtChangeNotify will schedule a deferred notification.
  4149. // It definitely *must* not attempt to recursively acquire
  4150. // the route-table-lock, since that would instantly deadlock.
  4151. RNO.irno_dest = RTE->rte_dest;
  4152. RNO.irno_mask = RTE->rte_mask;
  4153. RNO.irno_nexthop = GetNextHopForRTE(RTE);
  4154. RNO.irno_proto = RTE->rte_proto;
  4155. RNO.irno_ifindex = OutIF->if_index;
  4156. RNO.irno_metric = RTE->rte_metric;
  4157. RNO.irno_flags = IRNO_FLAG_DELETE;
  4158. RtChangeNotify(&RNO);
  4159. CleanupP2MP_RTE(RTE);
  4160. CleanupRTE(RTE);
  4161. *DeletedRTE = RTE;
  4162. return IP_SUCCESS;
  4163. }
  4164. return IP_BAD_REQ;
  4165. }
  4166. //* DeleteDest - delete all routes to a destination
  4167. //
  4168. // Called to remove all routes to a given destination. This results
  4169. // in the entry for the destination itself being removed.
  4170. //
  4171. // Entry: Dest - identifies the destination to be removed
  4172. // Mask - supplies the mask for the destination
  4173. //
  4174. // Returns: IP_SUCCESS if the destination was found
  4175. //
  4176. IP_STATUS
  4177. DeleteDest(IPAddr Dest, IPMask Mask)
  4178. {
  4179. CTELockHandle TableLock;
  4180. RouteTableEntry *RTE, *NextRTE, *DeletedRTE;
  4181. IP_STATUS retval;
  4182. IPRouteEntry IRE;
  4183. BOOLEAN DeleteDone = FALSE;
  4184. CTEGetLock(&RouteTableLock.Lock, &TableLock);
  4185. for (;;) {
  4186. // Begin by locating the first entry for the destination in question.
  4187. // Once we find that, we'll use it to begin a loop in which all the
  4188. // entries for the destination will be deleted.
  4189. retval = SearchRouteInSTrie(RouteTable->sTrie, Dest, Mask, 0, NULL,
  4190. MATCH_NONE, &RTE);
  4191. if (retval != IP_SUCCESS) {
  4192. break;
  4193. }
  4194. // Iteratively remove all routes on the destination.
  4195. // Initialize the fields that are common to all the destination's
  4196. // routes, and then iterate over the routes removing each one.
  4197. IRE.ire_type = IRE_TYPE_INVALID;
  4198. IRE.ire_dest = Dest;
  4199. IRE.ire_mask = Mask;
  4200. do {
  4201. // Set the fields which are specific to the current entry
  4202. // for the destination (the interface index and nexthop),
  4203. // and pick up the entry *after* this entry (since we're about
  4204. // to delete this entry) so we can continue our enumeration
  4205. // once the current entry is removed.
  4206. IRE.ire_index = RTE->rte_if->if_index;
  4207. IRE.ire_nexthop = GetNextHopForRTE(RTE);
  4208. NextRTE = RTE->rte_next;
  4209. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  4210. "Deleting RTE @ %p:\n", RTE));
  4211. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  4212. "Next in List = %p:\n", NextRTE));
  4213. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  4214. "Using an IRE @ %p\n", IRE));
  4215. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  4216. "\tDest = %08x\n", IRE.ire_dest));
  4217. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  4218. "\tMask = %08x\n", IRE.ire_mask));
  4219. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  4220. "\tIntf = %08x\n", IRE.ire_index));
  4221. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  4222. "\tNhop = %08x\n\n", IRE.ire_nexthop));
  4223. // Delete the current entry. The deletion routine
  4224. // takes care of notification, if any.
  4225. retval = DeleteRouteWithNoLock(&IRE, &DeletedRTE, RT_EXCLUDE_LOCAL);
  4226. if (retval == IP_SUCCESS) {
  4227. DeleteDone = TRUE;
  4228. }
  4229. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  4230. "Status = %08x, RTE = %p, DeletedRTE = %p\n",
  4231. retval, RTE, DeletedRTE));
  4232. // Attempt to continue the enumeration by picking up
  4233. // the next entry.
  4234. if ((retval != IP_SUCCESS) || (RTE == DeletedRTE)) {
  4235. // Either we are not allowed to delete this route
  4236. // Or we deleted what we were expecting to delete
  4237. RTE = NextRTE;
  4238. } else {
  4239. // We deleted an RTE thats further down the list
  4240. // NextRTE might be pointing to this deleted RTE
  4241. // Try to delete again and skip over RTE if cant
  4242. }
  4243. } while (RTE);
  4244. retval = IP_SUCCESS;
  4245. break;
  4246. }
  4247. CTEFreeLock(&RouteTableLock.Lock, TableLock);
  4248. if (DeleteDone) {
  4249. #if FFP_SUPPORT
  4250. FFPFlushRequired = TRUE;
  4251. #endif
  4252. }
  4253. return retval;
  4254. }
  4255. //* Redirect - Process a redirect request.
  4256. //
  4257. // This is the redirect handler . We treat all redirects as host redirects as
  4258. // per the host requirements RFC. We make a few sanity checks on the new first
  4259. // hop address, and then we look up the current route. If it's not through the
  4260. // source of the redirect, just return.
  4261. // If the current route to the destination is a host route, update the first
  4262. // hop and return.
  4263. // If the route is not a host route, remove any RCE for this route from the
  4264. // RTE, create a host route and place the RCE (if any) on the new RTE.
  4265. //
  4266. // Entry: NTE - Pointer to NetTableEntry for net on which Redirect
  4267. // arrived.
  4268. // RDSrc - IPAddress of source of redirect.
  4269. // Target - IPAddress being redirected.
  4270. // Src - Src IP address of DG that triggered RD.
  4271. // FirstHop - New first hop for Target.
  4272. //
  4273. // Returns: Nothing.
  4274. //
  4275. void
  4276. Redirect(NetTableEntry * NTE, IPAddr RDSrc, IPAddr Target, IPAddr Src,
  4277. IPAddr FirstHop)
  4278. {
  4279. uint MTU;
  4280. RouteTableEntry *RTE;
  4281. CTELockHandle Handle;
  4282. IP_STATUS Status;
  4283. IPRouteNotifyOutput RNO = {0};
  4284. if (IP_ADDR_EQUAL(FirstHop, NULL_IP_ADDR) ||
  4285. IP_LOOPBACK(FirstHop) ||
  4286. IP_ADDR_EQUAL(FirstHop, RDSrc) ||
  4287. !(NTE->nte_flags & NTE_VALID)) {
  4288. // Invalid FirstHop
  4289. return;
  4290. }
  4291. if (GetAddrType(FirstHop) == DEST_LOCAL) {
  4292. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  4293. "Redirect: Local firsthop %x\n", FirstHop));
  4294. return;
  4295. }
  4296. // If the redirect is received on a loopback interface, drop it.
  4297. // This can happen in case of NAT, where it sends a packet to an addr in
  4298. // its local pool.
  4299. // These addresses are local but not bound to any interface and IP doesn't
  4300. // know about them
  4301. if (NTE == LoopNTE)
  4302. return;
  4303. // First make sure that this came from the gateway we're currently using to
  4304. // get to Target, and then lookup up the route to the new first hop. The new
  4305. // firsthop must be directly reachable, and on the same subnetwork or
  4306. // physical interface on which we received the redirect.
  4307. CTEGetLock(&RouteTableLock.Lock, &Handle);
  4308. // Make sure the source of the redirect is the current first hop gateway.
  4309. RTE = LookupRTE(Target, Src, HOST_ROUTE_PRI, FALSE);
  4310. if (RTE == NULL || IP_ADDR_EQUAL(RTE->rte_addr, IPADDR_LOCAL) ||
  4311. !IP_ADDR_EQUAL(RTE->rte_addr, RDSrc)) {
  4312. CTEFreeLock(&RouteTableLock.Lock, Handle);
  4313. return; // A bad redirect.
  4314. }
  4315. ASSERT(RTE->rte_flags & RTE_IF_VALID);
  4316. // If the current first hop gateway is a default gateway, see if we have
  4317. // another default gateway at FirstHop that is down. If so, mark him as
  4318. // up and invalidate the RCEs on this guy.
  4319. if (RTE->rte_mask == DEFAULT_MASK && ValidateDefaultGWs(FirstHop) != 0) {
  4320. // Have a default gateway that's been newly activated. Invalidate RCEs
  4321. // on the route, and we're done.
  4322. InvalidateRCEChain(RTE);
  4323. CTEFreeLock(&RouteTableLock.Lock, Handle);
  4324. return;
  4325. }
  4326. // We really need to add a host route through FirstHop. Make sure he's
  4327. // a valid first hop.
  4328. RTE = LookupRTE(FirstHop, Src, HOST_ROUTE_PRI, FALSE);
  4329. if (RTE == NULL) {
  4330. CTEFreeLock(&RouteTableLock.Lock, Handle);
  4331. return; // Can't get there from here.
  4332. }
  4333. ASSERT(RTE->rte_flags & RTE_IF_VALID);
  4334. // Check to make sure the new first hop is directly reachable, and is on the
  4335. // same subnet or physical interface we received the redirect on.
  4336. if (!IP_ADDR_EQUAL(RTE->rte_addr, IPADDR_LOCAL) || // Not directly reachable
  4337. // or wrong subnet.
  4338. ((NTE->nte_addr & NTE->nte_mask) != (FirstHop & NTE->nte_mask))) {
  4339. CTEFreeLock(&RouteTableLock.Lock, Handle);
  4340. return;
  4341. }
  4342. if (RTE->rte_link)
  4343. MTU = RTE->rte_link->link_mtu;
  4344. else
  4345. MTU = RTE->rte_mtu;
  4346. // Now add a host route. AddRoute will do the correct things with shifting
  4347. // RCEs around. We know that FirstHop is on the same subnet as NTE (from
  4348. // the check above), so it's valid to add the route to FirstHop as out
  4349. // going through NTE.
  4350. Status = LockedAddRoute(Target, HOST_MASK,
  4351. IP_ADDR_EQUAL(FirstHop, Target)
  4352. ? IPADDR_LOCAL : FirstHop,
  4353. NTE->nte_if, MTU, 1, IRE_PROTO_ICMP, ATYPE_OVERRIDE,
  4354. RTE->rte_context, FALSE, &RNO);
  4355. CTEFreeLock(&RouteTableLock.Lock, Handle);
  4356. if (Status == IP_SUCCESS && RNO.irno_ifindex) {
  4357. RtChangeNotifyEx(&RNO);
  4358. RtChangeNotify(&RNO);
  4359. }
  4360. //
  4361. // Bug: #67333: delete the old route thru' RDSrc, now that we have a new one.
  4362. //
  4363. // KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  4364. // "Re-direct: deleting old route thru: %lx, to Target: %lx\n",
  4365. // RDSrc, Target));
  4366. DeleteRoute(Target, HOST_MASK, RDSrc, NTE->nte_if, 0);
  4367. }
  4368. //* GetRaisedMTU - Get the next largest MTU in table.
  4369. //
  4370. // A utility function to search the MTU table for a larger value.
  4371. //
  4372. // Input: PrevMTU - MTU we're currently using. We want the next largest one.
  4373. //
  4374. // Returns: New MTU size.
  4375. //
  4376. uint
  4377. GetRaisedMTU(uint PrevMTU)
  4378. {
  4379. uint i;
  4380. for (i = (sizeof(MTUTable) / sizeof(uint)) - 1; i != 0; i--) {
  4381. if (MTUTable[i] > PrevMTU)
  4382. break;
  4383. }
  4384. return MTUTable[i];
  4385. }
  4386. //* GuessNewMTU - Guess a new MTU, giving a DG size too big.
  4387. //
  4388. // A utility function to search the MTU table. As input we take in an MTU
  4389. // size we believe to be too large, and search the table looking for the
  4390. // next smallest one.
  4391. //
  4392. // Input: TooBig - Size that's too big.
  4393. //
  4394. // Returns: New MTU size.
  4395. //
  4396. uint
  4397. GuessNewMTU(uint TooBig)
  4398. {
  4399. uint i;
  4400. for (i = 0; i < ((sizeof(MTUTable) / sizeof(uint)) - 1); i++)
  4401. if (MTUTable[i] < TooBig)
  4402. break;
  4403. return MTUTable[i];
  4404. }
  4405. //* RouteFragNeeded - Handle being told we need to fragment.
  4406. //
  4407. // Called when we receive some external indication that we need to fragment
  4408. // along a particular path. If we're doing MTU discovery we'll try to
  4409. // update the route, if we can. We'll also notify the upper layers about
  4410. // the new MTU.
  4411. //
  4412. // Input: IPH - Pointer to IP Header of datagram needing
  4413. // fragmentation.
  4414. // NewMTU - New MTU to be used (may be 0).
  4415. //
  4416. // Returns: Nothing.
  4417. //
  4418. void
  4419. RouteFragNeeded(IPHeader UNALIGNED * IPH, ushort NewMTU)
  4420. {
  4421. uint OldMTU;
  4422. CTELockHandle Handle;
  4423. RouteTableEntry *RTE;
  4424. ushort HeaderLength;
  4425. ushort mtu;
  4426. IP_STATUS Status;
  4427. IPRouteNotifyOutput RNO = {0};
  4428. // If we're not doing PMTU discovery, don't do anything.
  4429. if (!PMTUDiscovery) {
  4430. return;
  4431. }
  4432. // We're doing PMTU discovery. Before doing any work, make sure this is
  4433. // an acceptable message.
  4434. if (GetAddrType(IPH->iph_dest) != DEST_REMOTE) {
  4435. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  4436. "RouteFragNeeded: non-remote dest %x\n", IPH->iph_dest));
  4437. return;
  4438. }
  4439. // Correct the given new MTU for the IP header size, which we don't save
  4440. // as we track MTUs.
  4441. if (NewMTU != 0) {
  4442. // Make sure the new MTU we got is at least the minimum valid size.
  4443. NewMTU = MAX(NewMTU, MIN_VALID_MTU);
  4444. NewMTU -= sizeof(IPHeader);
  4445. }
  4446. HeaderLength = (IPH->iph_verlen & (uchar) ~ IP_VER_FLAG) << 2;
  4447. // Get the current routing information.
  4448. CTEGetLock(&RouteTableLock.Lock, &Handle);
  4449. // Find an RTE for the destination.
  4450. RTE = LookupRTE(IPH->iph_dest, IPH->iph_src, HOST_ROUTE_PRI, FALSE);
  4451. // If we couldn't find one, give up now.
  4452. if (RTE == NULL) {
  4453. // No RTE. Just bail out now.
  4454. CTEFreeLock(&RouteTableLock.Lock, Handle);
  4455. return;
  4456. }
  4457. if (RTE->rte_link)
  4458. mtu = (ushort) RTE->rte_link->link_mtu;
  4459. else
  4460. mtu = (ushort) RTE->rte_mtu;
  4461. // If the existing MTU is less than the new
  4462. // MTU, give up now.
  4463. if ((OldMTU = mtu) < NewMTU) {
  4464. // No RTE, or an invalid new MTU. Just bail out now.
  4465. CTEFreeLock(&RouteTableLock.Lock, Handle);
  4466. return;
  4467. }
  4468. // If the new MTU is zero, figure out what the new MTU should be.
  4469. if (NewMTU == 0) {
  4470. ushort DGLength;
  4471. // The new MTU is zero. We'll make a best guess what the new
  4472. // MTU should be. We have the RTE for this route already.
  4473. // Get the length of the datagram that triggered this. Since we'll
  4474. // be comparing it against MTU values that we track without the
  4475. // IP header size included, subtract off that amount.
  4476. DGLength = (ushort) net_short(IPH->iph_length) - sizeof(IPHeader);
  4477. // We may need to correct this as per RFC 1191 for dealing with
  4478. // old style routers.
  4479. if (DGLength >= OldMTU) {
  4480. // The length of the datagram sent is not less than our
  4481. // current MTU estimate, so we need to back it down (assuming
  4482. // that the sending route has incorrectly added in the header
  4483. // length).
  4484. DGLength = DGLength - (USHORT) HeaderLength;
  4485. }
  4486. // If it's still larger than our current MTU, use the current
  4487. // MTU. This could happen if the upper layer sends a burst of
  4488. // packets which generate a sequence of ICMP discard messages. The
  4489. // first one we receive will cause us to lower our MTU. We then
  4490. // want to discard subsequent messages to avoid lowering it
  4491. // too much. This could conceivably be a problem if our
  4492. // first adjustment still results in an MTU that's too big,
  4493. // but we should converge adequately fast anyway, and it's
  4494. // better than accidentally underestimating the MTU.
  4495. if (DGLength > OldMTU)
  4496. NewMTU = (ushort) OldMTU;
  4497. else
  4498. // Move down the table to the next lowest MTU.
  4499. NewMTU = (ushort) GuessNewMTU(DGLength);
  4500. }
  4501. // We have the new MTU. Now add it to the table as a host route.
  4502. Status = IP_GENERAL_FAILURE;
  4503. if (NewMTU != OldMTU) {
  4504. // Use ICMP protocol type only when adding a new host route;
  4505. // otherwise, an existing static entry might get overwritten and,
  4506. // later on, timed out as though it were an ICMP route.
  4507. if (IP_ADDR_EQUAL(RTE->rte_dest,IPH->iph_dest)) {
  4508. Status = LockedAddRoute(IPH->iph_dest, HOST_MASK, RTE->rte_addr,
  4509. RTE->rte_if, NewMTU, RTE->rte_metric,
  4510. RTE->rte_proto, ATYPE_OVERRIDE,
  4511. RTE->rte_context, FALSE, &RNO);
  4512. } else {
  4513. Status = LockedAddRoute(IPH->iph_dest, HOST_MASK, RTE->rte_addr,
  4514. RTE->rte_if, NewMTU, RTE->rte_metric,
  4515. IRE_PROTO_ICMP, ATYPE_OVERRIDE,
  4516. RTE->rte_context, FALSE, &RNO);
  4517. }
  4518. }
  4519. CTEFreeLock(&RouteTableLock.Lock, Handle);
  4520. // We've added the route. Now notify the upper layers of the change.
  4521. ULMTUNotify(IPH->iph_dest, IPH->iph_src, IPH->iph_protocol,
  4522. (void *)((uchar *) IPH + HeaderLength), NewMTU);
  4523. if (Status == IP_SUCCESS && RNO.irno_ifindex) {
  4524. RtChangeNotifyEx(&RNO);
  4525. RtChangeNotify(&RNO);
  4526. }
  4527. }
  4528. //** IPRouteTimeout - IP routeing timeout handler.
  4529. //
  4530. // The IP routeing timeout routine, called once a minute. We look at all
  4531. // host routes, and if we raise the MTU on them we do so.
  4532. //
  4533. // Entry: Timer - Timer being fired.
  4534. // Context - Pointer to NTE being time out.
  4535. //
  4536. // Returns: Nothing.
  4537. //
  4538. void
  4539. IPRouteTimeout(CTEEvent * Timer, void *Context)
  4540. {
  4541. uint Now = CTESystemUpTime() / 1000L;
  4542. CTELockHandle Handle;
  4543. RouteTableEntry *RTE, *PrevRTE;
  4544. uint RaiseMTU, Delta;
  4545. Interface *IF;
  4546. IPAddr Dest;
  4547. uint NewMTU;
  4548. NetTableEntry *NTE;
  4549. RouteTableEntry *pOldBestRTE, *pNewBestRTE;
  4550. UINT IsDataLeft, IsValid;
  4551. UCHAR IteratorContext[CONTEXT_SIZE];
  4552. RtChangeList *CurrentRtChangeList = NULL;
  4553. UNREFERENCED_PARAMETER(Timer);
  4554. UNREFERENCED_PARAMETER(Context);
  4555. DampCheck();
  4556. if ((CTEInterlockedIncrementLong(&RouteTimerTicks) * IP_ROUTE_TIMEOUT) ==
  4557. IP_RTABL_TIMEOUT) {
  4558. RouteTimerTicks = 0;
  4559. CTEGetLock(&RouteTableLock.Lock, &Handle);
  4560. // First we set up an iterator over all routes
  4561. RtlZeroMemory(IteratorContext, CONTEXT_SIZE);
  4562. // Do we have any routes at all in the table ?
  4563. IsDataLeft = RTValidateContext(IteratorContext, &IsValid);
  4564. PrevRTE = NULL;
  4565. while (IsDataLeft) {
  4566. // Advance context by getting the next route
  4567. IsDataLeft = GetNextRoute(IteratorContext, &RTE);
  4568. // Do we have to delete the previous route ?
  4569. if (PrevRTE != NULL) {
  4570. IPRouteNotifyOutput RNO = {0};
  4571. RtChangeList *NewRtChange;
  4572. // Retrieve information about the route for change-notification
  4573. // before proceeding with deletion.
  4574. RNO.irno_dest = PrevRTE->rte_dest;
  4575. RNO.irno_mask = PrevRTE->rte_mask;
  4576. RNO.irno_nexthop = GetNextHopForRTE(PrevRTE);
  4577. RNO.irno_proto = PrevRTE->rte_proto;
  4578. RNO.irno_ifindex = PrevRTE->rte_if->if_index;
  4579. RNO.irno_metric = PrevRTE->rte_metric;
  4580. RNO.irno_flags = IRNO_FLAG_DELETE;
  4581. DelRoute(PrevRTE->rte_dest, PrevRTE->rte_mask,
  4582. PrevRTE->rte_addr, PrevRTE->rte_if, MATCH_FULL,
  4583. &PrevRTE, &pOldBestRTE, &pNewBestRTE);
  4584. CleanupP2MP_RTE(PrevRTE);
  4585. CleanupRTE(PrevRTE);
  4586. //... so we don't delete same route again
  4587. PrevRTE = NULL;
  4588. // Allocate, initialize and queue a change-notification entry
  4589. // for the deleted route.
  4590. NewRtChange = CTEAllocMemNBoot(sizeof(RtChangeList), 'XICT');
  4591. if (NewRtChange != NULL) {
  4592. NewRtChange->rt_next = CurrentRtChangeList;
  4593. NewRtChange->rt_info = RNO;
  4594. CurrentRtChangeList = NewRtChange;
  4595. }
  4596. }
  4597. // Make sure this route is a valid host route
  4598. if (!(RTE->rte_flags & RTE_VALID))
  4599. continue;
  4600. if (RTE->rte_mask != HOST_MASK)
  4601. continue;
  4602. // We have valid host route here
  4603. if (PMTUDiscovery) {
  4604. // Check to see if we can raise the MTU on this guy.
  4605. Delta = Now - RTE->rte_mtuchange;
  4606. if (RTE->rte_flags & RTE_INCREASE)
  4607. RaiseMTU = (Delta >= MTU_INCREASE_TIME ? 1 : 0);
  4608. else
  4609. RaiseMTU = (Delta >= MTU_DECREASE_TIME ? 1 : 0);
  4610. if (RaiseMTU) {
  4611. // We need to raise this MTU. Set his change time to
  4612. // Now, so we don't do this again, and figure out
  4613. // what the new MTU should be.
  4614. RTE->rte_mtuchange = Now;
  4615. IF = RTE->rte_if;
  4616. if (RTE->rte_mtu < IF->if_mtu) {
  4617. uint RaisedMTU;
  4618. RTE->rte_flags |= RTE_INCREASE;
  4619. // This is a candidate for change. Figure out
  4620. // what it should be.
  4621. RaisedMTU = GetRaisedMTU(RTE->rte_mtu);
  4622. NewMTU = MIN(RaisedMTU,
  4623. IF->if_mtu);
  4624. RTE->rte_mtu = NewMTU;
  4625. Dest = RTE->rte_dest;
  4626. // We have the new MTU. Free the lock, and walk
  4627. // down the NTEs on the I/F. For each NTE,
  4628. // call up to the upper layer and tell him what
  4629. // his new MTU is.
  4630. CTEFreeLock(&RouteTableLock.Lock, Handle);
  4631. NTE = IF->if_nte;
  4632. while (NTE != NULL) {
  4633. if (NTE->nte_flags & NTE_VALID) {
  4634. ULMTUNotify(Dest, NTE->nte_addr, 0, NULL,
  4635. MIN(NewMTU, NTE->nte_mss));
  4636. }
  4637. NTE = NTE->nte_ifnext;
  4638. }
  4639. // We've notified everyone. Get the lock again,
  4640. // and validate context in case something changed
  4641. // after we freed the lock. In case it's invalid,
  4642. // start from first. We've updated the mtuchange
  4643. // time of this RTE, so we won't hit him again.
  4644. CTEGetLock(&RouteTableLock.Lock, &Handle);
  4645. RTValidateContext(IteratorContext, &IsValid);
  4646. if (!IsValid) {
  4647. RtlZeroMemory(IteratorContext, CONTEXT_SIZE);
  4648. IsDataLeft = RTValidateContext(IteratorContext, &IsValid);
  4649. continue;
  4650. }
  4651. // We still have a valid iterator context here
  4652. } else {
  4653. RTE->rte_flags &= ~RTE_INCREASE;
  4654. }
  4655. }
  4656. }
  4657. // If this route came in via ICMP, and we have no RCEs on it,
  4658. // and it's at least 10 minutes old, delete it.
  4659. if (RTE->rte_proto == IRE_PROTO_ICMP &&
  4660. RTE->rte_rcelist == NULL &&
  4661. (Now - RTE->rte_valid) > MAX_ICMP_ROUTE_VALID) {
  4662. // He needs to be deleted. Call DelRoute to do this.
  4663. // But after you have updated the context to next RTE
  4664. // Route for deletion in next iteration
  4665. PrevRTE = RTE;
  4666. continue;
  4667. }
  4668. }
  4669. // Did we have to delete the previous route ?
  4670. if (PrevRTE != NULL) {
  4671. IPRouteNotifyOutput RNO = {0};
  4672. RtChangeList *NewRtChange;
  4673. // Retrieve information about the route for change-notification
  4674. // before proceeding with deletion.
  4675. RNO.irno_dest = PrevRTE->rte_dest;
  4676. RNO.irno_mask = PrevRTE->rte_mask;
  4677. RNO.irno_nexthop = GetNextHopForRTE(PrevRTE);
  4678. RNO.irno_proto = PrevRTE->rte_proto;
  4679. RNO.irno_ifindex = PrevRTE->rte_if->if_index;
  4680. RNO.irno_metric = PrevRTE->rte_metric;
  4681. RNO.irno_flags = IRNO_FLAG_DELETE;
  4682. // Delete the route and perform cleanup.
  4683. DelRoute(PrevRTE->rte_dest, PrevRTE->rte_mask, PrevRTE->rte_addr,
  4684. PrevRTE->rte_if, MATCH_FULL, &PrevRTE, &pOldBestRTE,
  4685. &pNewBestRTE);
  4686. CleanupP2MP_RTE(PrevRTE);
  4687. CleanupRTE(PrevRTE);
  4688. // Allocate, initialize and queue a change-notification entry
  4689. // for the deleted route.
  4690. NewRtChange = CTEAllocMemNBoot(sizeof(RtChangeList), 'DiCT');
  4691. if (NewRtChange != NULL) {
  4692. NewRtChange->rt_next = CurrentRtChangeList;
  4693. NewRtChange->rt_info = RNO;
  4694. CurrentRtChangeList = NewRtChange;
  4695. }
  4696. }
  4697. CTEFreeLock(&RouteTableLock.Lock, Handle);
  4698. }
  4699. #if FFP_SUPPORT
  4700. if (FFPFlushRequired) {
  4701. FFPFlushRequired = FALSE;
  4702. IPFlushFFPCaches();
  4703. }
  4704. #endif
  4705. if ((CTEInterlockedIncrementLong(&FlushIFTimerTicks) * IP_ROUTE_TIMEOUT) ==
  4706. FLUSH_IFLIST_TIMEOUT) {
  4707. Interface *TmpIF;
  4708. RouteCacheEntry *RCE, *PrevRCE;
  4709. FlushIFTimerTicks = 0;
  4710. CTEGetLock(&RouteTableLock.Lock, &Handle);
  4711. // check whether FreeIFList is non empty
  4712. if (FrontFreeList) {
  4713. ASSERT(*(int *)&TotalFreeInterfaces > 0);
  4714. // free the first interface in the list
  4715. TmpIF = FrontFreeList;
  4716. FrontFreeList = FrontFreeList->if_next;
  4717. CTEFreeMem(TmpIF);
  4718. TotalFreeInterfaces--;
  4719. // check whether the list became empty
  4720. if (FrontFreeList == NULL) {
  4721. RearFreeList = NULL;
  4722. ASSERT(TotalFreeInterfaces == 0);
  4723. }
  4724. }
  4725. // use the same timer to scan the RCEFreeList
  4726. PrevRCE = STRUCT_OF(RouteCacheEntry, &RCEFreeList, rce_next);
  4727. RCE = RCEFreeList;
  4728. while (RCE) {
  4729. if (RCE->rce_usecnt == 0) {
  4730. RouteCacheEntry *nextRCE;
  4731. // time to free this RCE
  4732. // remove it from the list
  4733. PrevRCE->rce_next = RCE->rce_next;
  4734. if (RCE->rce_flags & RCE_REFERENCED) {
  4735. // IF is ref'd so it better be in the IFList
  4736. LockedDerefIF((Interface *) RCE->rce_rte);
  4737. }
  4738. nextRCE = RCE->rce_next;
  4739. CTEFreeMem(RCE);
  4740. RCE = nextRCE;
  4741. } else {
  4742. PrevRCE = RCE;
  4743. RCE = RCE->rce_next;
  4744. }
  4745. }
  4746. CTEFreeLock(&RouteTableLock.Lock, Handle);
  4747. }
  4748. // Call RtChangeNotify for each of the entries in the change-notification
  4749. // list that we've built up so far. In the process, free each entry.
  4750. if (CurrentRtChangeList) {
  4751. RtChangeList *TmpRtChangeList;
  4752. do {
  4753. TmpRtChangeList = CurrentRtChangeList->rt_next;
  4754. RtChangeNotify(&CurrentRtChangeList->rt_info);
  4755. CTEFreeMem(CurrentRtChangeList);
  4756. CurrentRtChangeList = TmpRtChangeList;
  4757. } while(CurrentRtChangeList);
  4758. }
  4759. // If the driver is unloading, dont restart the timer
  4760. if (fRouteTimerStopping) {
  4761. CTESignal(&TcpipUnloadBlock, NDIS_STATUS_SUCCESS);
  4762. } else {
  4763. CTEStartTimer(&IPRouteTimer, IP_ROUTE_TIMEOUT, IPRouteTimeout, NULL);
  4764. }
  4765. }
  4766. //* FreeFWPacket - Free a fowarding packet to its pool.
  4767. //
  4768. // Input: Packet - Packet to be freed.
  4769. //
  4770. // Returns: nothing.
  4771. //
  4772. void
  4773. FreeFWPacket(PNDIS_PACKET Packet)
  4774. {
  4775. FWContext *FWC = (FWContext *)Packet->ProtocolReserved;
  4776. ASSERT(FWC->fc_pc.pc_common.pc_IpsecCtx == NULL);
  4777. // Return any buffers to their respective pools.
  4778. //
  4779. if (FWC->fc_buffhead) {
  4780. PNDIS_BUFFER Head, Mdl;
  4781. Head = FWC->fc_buffhead;
  4782. do {
  4783. Mdl = Head;
  4784. Head = Head->Next;
  4785. MdpFree(Mdl);
  4786. } while (Head);
  4787. FWC->fc_buffhead = NULL;
  4788. }
  4789. if (FWC->fc_options) {
  4790. CTEFreeMem(FWC->fc_options);
  4791. FWC->fc_options = NULL;
  4792. FWC->fc_optlength = 0;
  4793. FWC->fc_pc.pc_common.pc_flags &= ~PACKET_FLAG_OPTIONS;
  4794. }
  4795. if (FWC->fc_iflink) {
  4796. DerefLink(FWC->fc_iflink);
  4797. FWC->fc_iflink = NULL;
  4798. }
  4799. if (FWC->fc_if) {
  4800. DerefIF(FWC->fc_if);
  4801. FWC->fc_if = NULL;
  4802. }
  4803. NdisReinitializePacket(Packet);
  4804. #if MCAST_BUG_TRACKING
  4805. FWC->fc_pc.pc_common.pc_owner = 0;
  4806. #endif
  4807. FwPacketFree(Packet);
  4808. }
  4809. //* FWSendComplete - Complete the transmission of a forwarded packet.
  4810. //
  4811. // This is called when the send of a forwarded packet is done. We'll free the
  4812. // resources and get the next send going, if there is one. If there isn't,
  4813. // we'll decrement the pending count.
  4814. //
  4815. // Input: Packet - Packet being completed.
  4816. // Buffer - Pointer to buffer chain being completed.
  4817. //
  4818. // Returns: Nothing.
  4819. //
  4820. void
  4821. FWSendComplete(void *SendContext, PNDIS_BUFFER Buffer, IP_STATUS SendStatus)
  4822. {
  4823. PNDIS_PACKET Packet = (PNDIS_PACKET) SendContext;
  4824. FWContext *FWC = (FWContext *) Packet->ProtocolReserved;
  4825. RouteSendQ *RSQ;
  4826. CTELockHandle Handle;
  4827. FWQ *NewFWQ;
  4828. PNDIS_PACKET NewPacket;
  4829. UNREFERENCED_PARAMETER(SendStatus);
  4830. #if MCAST_BUG_TRACKING
  4831. FWC->fc_MacHdrSize = SendStatus;
  4832. #endif
  4833. if (Buffer && FWC->fc_bufown) {
  4834. //Undo the offset manipulation
  4835. //which was done in super fast path
  4836. int MacHeaderSize = FWC->fc_MacHdrSize;
  4837. PNDIS_PACKET RtnPacket = FWC->fc_bufown;
  4838. NdisAdjustBuffer(
  4839. Buffer,
  4840. (PCHAR) NdisBufferVirtualAddress(Buffer) - MacHeaderSize,
  4841. NdisBufferLength(Buffer) + MacHeaderSize);
  4842. Packet->Private.Head = NULL;
  4843. Packet->Private.Tail = NULL;
  4844. NdisReturnPackets(&RtnPacket, 1);
  4845. FWC->fc_bufown = NULL;
  4846. #if MCAST_BUG_TRACKING
  4847. FWC->fc_sos = __LINE__;
  4848. #endif
  4849. FreeFWPacket(Packet);
  4850. return;
  4851. }
  4852. if (!IS_BCAST_DEST(FWC->fc_dtype))
  4853. RSQ = &((RouteInterface *) FWC->fc_if)->ri_q;
  4854. else
  4855. RSQ = BCastRSQ;
  4856. if (IS_MCAST_DEST(FWC->fc_dtype)) {
  4857. RSQ = NULL;
  4858. }
  4859. #if MCAST_BUG_TRACKING
  4860. FWC->fc_sos = __LINE__;
  4861. #endif
  4862. FreeFWPacket(Packet);
  4863. if (RSQ == NULL) {
  4864. return;
  4865. }
  4866. CTEGetLock(&RSQ->rsq_lock, &Handle);
  4867. ASSERT(RSQ->rsq_pending <= RSQ->rsq_maxpending);
  4868. RSQ->rsq_pending--;
  4869. ASSERT(*(int *)&RSQ->rsq_pending >= 0);
  4870. if (RSQ->rsq_qlength != 0) { // Have more to send.
  4871. // Make sure we're not already running through this. If we are, quit.
  4872. if (!RSQ->rsq_running) {
  4873. // We could schedule this off for an event, but under NT that
  4874. // could me a context switch for every completing packet in the
  4875. // normal case. For now, just do it in a loop guarded with
  4876. // rsq_running.
  4877. RSQ->rsq_running = TRUE;
  4878. // Loop while we haven't hit our send limit and we still have
  4879. // stuff to send.
  4880. while (RSQ->rsq_pending < RSQ->rsq_maxpending &&
  4881. RSQ->rsq_qlength != 0) {
  4882. ASSERT(RSQ->rsq_qh.fq_next != &RSQ->rsq_qh);
  4883. // Pull one off the queue, and update qlength.
  4884. NewFWQ = RSQ->rsq_qh.fq_next;
  4885. RSQ->rsq_qh.fq_next = NewFWQ->fq_next;
  4886. NewFWQ->fq_next->fq_prev = NewFWQ->fq_prev;
  4887. RSQ->rsq_qlength--;
  4888. // Update pending before we send.
  4889. RSQ->rsq_pending++;
  4890. CTEFreeLock(&RSQ->rsq_lock, Handle);
  4891. NewPacket = PACKET_FROM_FWQ(NewFWQ);
  4892. TransmitFWPacket(NewPacket,
  4893. ((FWContext *) NewPacket->ProtocolReserved)->fc_datalength);
  4894. CTEGetLock(&RSQ->rsq_lock, &Handle);
  4895. }
  4896. RSQ->rsq_running = FALSE;
  4897. }
  4898. }
  4899. CTEFreeLock(&RSQ->rsq_lock, Handle);
  4900. }
  4901. //* TransmitFWPacket - Transmit a forwarded packet on a link.
  4902. //
  4903. // Called when we know we can send a packet. We fix up the header, and send it.
  4904. //
  4905. // Input: Packet - Packet to be sent.
  4906. // DataLength - Length of data.
  4907. //
  4908. // Returns: Nothing.
  4909. //
  4910. void
  4911. TransmitFWPacket(PNDIS_PACKET Packet, uint DataLength)
  4912. {
  4913. FWContext *FC = (FWContext *) Packet->ProtocolReserved;
  4914. PNDIS_BUFFER HBuffer, Buffer;
  4915. IP_STATUS Status;
  4916. ULONG ipsecByteCount = 0;
  4917. ULONG ipsecMTU;
  4918. ULONG ipsecFlags;
  4919. IPHeader *IPH;
  4920. ULONG len;
  4921. IPAddr SrcAddr = 0;
  4922. PNDIS_BUFFER OptBuffer = NULL;
  4923. PNDIS_BUFFER newBuf = NULL;
  4924. IPHeader *pSaveIPH = NULL;
  4925. UCHAR saveIPH[MAX_IP_HDR_SIZE + ICMP_HEADER_SIZE];
  4926. void *ArpCtxt = NULL;
  4927. //
  4928. // Fix up the packet. Remove the existing buffer chain, and put our
  4929. // header on the front.
  4930. //
  4931. Buffer = Packet->Private.Head;
  4932. HBuffer = FC->fc_hndisbuff;
  4933. Packet->Private.Head = HBuffer;
  4934. Packet->Private.Tail = HBuffer;
  4935. NDIS_BUFFER_LINKAGE(HBuffer) = (PNDIS_BUFFER) NULL;
  4936. Packet->Private.TotalLength = sizeof(IPHeader);
  4937. Packet->Private.Count = 1;
  4938. TcpipQueryBuffer(HBuffer, (PVOID *) &IPH, (PUINT)&len, NormalPagePriority);
  4939. if (IPH == NULL) {
  4940. #if MCAST_BUG_TRACKING
  4941. FC->fc_mtu = __LINE__;
  4942. #endif
  4943. FWSendComplete(Packet, Buffer, IP_SUCCESS);
  4944. IPSInfo.ipsi_outdiscards++;
  4945. return;
  4946. }
  4947. Packet->Private.PhysicalCount =
  4948. ADDRESS_AND_SIZE_TO_SPAN_PAGES(IPH,
  4949. sizeof(IPHeader));
  4950. if (IPSecHandlerPtr) {
  4951. //
  4952. // See if IPSEC is enabled, see if it needs to do anything with this
  4953. // packet - we need to construct the full IP header in the first MDL
  4954. // before we call out to IPSEC.
  4955. //
  4956. IPSEC_ACTION Action;
  4957. ulong csum;
  4958. PUCHAR pTpt;
  4959. ULONG tptLen;
  4960. pSaveIPH = (IPHeader *) saveIPH;
  4961. *pSaveIPH = *IPH;
  4962. csum = xsum(IPH, sizeof(IPHeader));
  4963. //
  4964. // Link the header buffer to the options buffer before we indicate
  4965. // to IPSEC
  4966. //
  4967. if (FC->fc_options) {
  4968. //
  4969. // Allocate the MDL for options too
  4970. //
  4971. NdisAllocateBuffer((PNDIS_STATUS) &Status,
  4972. &OptBuffer,
  4973. BufferPool,
  4974. FC->fc_options,
  4975. (uint) FC->fc_optlength);
  4976. if (Status != NDIS_STATUS_SUCCESS) {
  4977. //
  4978. // Couldn't get the needed option buffer.
  4979. //
  4980. #if MCAST_BUG_TRACKING
  4981. FC->fc_mtu = __LINE__;
  4982. #endif
  4983. FWSendComplete(Packet, Buffer, IP_SUCCESS);
  4984. IPSInfo.ipsi_outdiscards++;
  4985. return;
  4986. }
  4987. NDIS_BUFFER_LINKAGE(HBuffer) = OptBuffer;
  4988. NDIS_BUFFER_LINKAGE(OptBuffer) = Buffer;
  4989. //
  4990. // update the xsum in the IP header
  4991. //
  4992. FC->fc_pc.pc_common.pc_flags |= PACKET_FLAG_OPTIONS;
  4993. NdisChainBufferAtBack(Packet, OptBuffer);
  4994. csum += xsum(FC->fc_options, (uint) FC->fc_optlength);
  4995. csum = (csum >> 16) + (csum & 0xffff);
  4996. csum += (csum >> 16);
  4997. } else {
  4998. NDIS_BUFFER_LINKAGE(HBuffer) = Buffer;
  4999. }
  5000. //
  5001. // Prepare ourselves for sending an ICMP dont frag in case
  5002. // IPSEC bloats beyond the MTU on this interface.
  5003. //
  5004. // SendICMPErr expects the next transport header in the same
  5005. // contiguous buffer as the IPHeader, with or without options.
  5006. // We need to ensure that this is satisfied if in fact we need to
  5007. // fragment on account of IPSEC. So, setup the buffer right here.
  5008. //
  5009. //
  5010. // If this is a zero-payload packet (i.e. just a header), then Buffer
  5011. // is NULL and there is nothing for IPSEC to bloat. We only have to
  5012. // deal with the don't fragment flag if there is a Buffer.
  5013. //
  5014. if (Buffer && (pSaveIPH->iph_offset & IP_DF_FLAG)) {
  5015. TcpipQueryBuffer(Buffer, &pTpt, (PUINT) &tptLen,
  5016. NormalPagePriority);
  5017. if (pTpt == NULL) {
  5018. #if MCAST_BUG_TRACKING
  5019. FC->fc_mtu = __LINE__;
  5020. #endif
  5021. FWSendComplete(Packet, Buffer, IP_SUCCESS);
  5022. IPSInfo.ipsi_outdiscards++;
  5023. return;
  5024. }
  5025. if (FC->fc_options) {
  5026. RtlCopyMemory(((PUCHAR) (pSaveIPH + 1)),
  5027. FC->fc_options, FC->fc_optlength);
  5028. }
  5029. RtlCopyMemory(((PUCHAR) (pSaveIPH + 1)) + FC->fc_optlength,
  5030. pTpt,
  5031. MIN(tptLen,ICMP_HEADER_SIZE));
  5032. }
  5033. IPH->iph_xsum = ~(ushort) csum;
  5034. SrcAddr = FC->fc_if->if_nte->nte_addr;
  5035. ipsecMTU = FC->fc_mtu;
  5036. if ((DataLength + (uint) FC->fc_optlength) < FC->fc_mtu) {
  5037. ipsecByteCount = FC->fc_mtu - (DataLength + (uint) FC->fc_optlength);
  5038. }
  5039. ipsecFlags = IPSEC_FLAG_FORWARD;
  5040. Action = (*IPSecHandlerPtr) ((PUCHAR) IPH,
  5041. (PVOID) HBuffer,
  5042. FC->fc_if,
  5043. Packet,
  5044. &ipsecByteCount,
  5045. &ipsecMTU,
  5046. (PVOID) & newBuf,
  5047. &ipsecFlags,
  5048. FC->fc_dtype);
  5049. if (Action != eFORWARD) {
  5050. #if MCAST_BUG_TRACKING
  5051. FC->fc_mtu = __LINE__;
  5052. #endif
  5053. FWSendComplete(Packet, Buffer, IP_SUCCESS);
  5054. IPSInfo.ipsi_outdiscards++;
  5055. //
  5056. // We can get MTU redeuced also when forwarding because in the nested
  5057. // tunneling configuration, the tunnel that starts from this machine
  5058. // can get a ICMP PMTU packet. We can't reduce the MTU on the interface
  5059. // but we can send back to the sender (which can be a router with yet
  5060. // another tunnel for this packet) a PMTU packet asking him to reduce his
  5061. // MTU even further. If the sender is an end-station, this PMTU info
  5062. // will eventually propogate back to TCP stack. If it is a router, the
  5063. // same logic used here will be applied. The MTU info will thus be
  5064. // relayed all the way back to the original sender (TCP stack).
  5065. // Of course the more common case is that a packet with the added IPSec
  5066. // header exceeds the link MTU. No matter what is the case, we send the
  5067. // new MTU information back to the sender.
  5068. //
  5069. if (ipsecMTU) {
  5070. SendICMPIPSecErr(SrcAddr,
  5071. pSaveIPH,
  5072. ICMP_DEST_UNREACH,
  5073. FRAG_NEEDED,
  5074. net_long((ulong) (ipsecMTU + sizeof(IPHeader))));
  5075. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TransmitFWPacket: Sent ICMP frag_needed to %lx, from src: %lx\n", pSaveIPH->iph_src, SrcAddr));
  5076. }
  5077. return;
  5078. } else {
  5079. //
  5080. // Use the new buffer chain - IPSEC will restore the old one
  5081. // on send complete
  5082. //
  5083. if (newBuf) {
  5084. NdisReinitializePacket(Packet);
  5085. NdisChainBufferAtBack(Packet, newBuf);
  5086. }
  5087. DataLength += ipsecByteCount;
  5088. }
  5089. }
  5090. //
  5091. // Figure out how to send it. If it's not a broadcast we'll either
  5092. // send it or have it fragmented. If it is a broadcast we'll let our
  5093. // send broadcast routine handle it.
  5094. //
  5095. if (FC->fc_dtype != DEST_BCAST) {
  5096. if ((DataLength + (uint) FC->fc_optlength) <= FC->fc_mtu) {
  5097. if (FC->fc_iflink) {
  5098. ASSERT(FC->fc_if->if_flags & IF_FLAGS_P2MP);
  5099. ArpCtxt = FC->fc_iflink->link_arpctxt;
  5100. }
  5101. //
  5102. // In case of synchronous completion though
  5103. // FreeIPPacket is called, which will not
  5104. // free the FW packet.
  5105. //
  5106. Status = SendIPPacket(FC->fc_if,
  5107. FC->fc_nexthop,
  5108. Packet,
  5109. Buffer,
  5110. FC->fc_hbuff,
  5111. FC->fc_options,
  5112. (uint) FC->fc_optlength,
  5113. (BOOLEAN) (IPSecHandlerPtr != NULL),
  5114. ArpCtxt,
  5115. FALSE);
  5116. } else {
  5117. //
  5118. // Need to fragment this.
  5119. //
  5120. BufferReference *BR = CTEAllocMemN(sizeof(BufferReference), 'GiCT');
  5121. if (BR == (BufferReference *) NULL) {
  5122. //
  5123. // Couldn't get a BufferReference
  5124. //
  5125. #if MCAST_BUG_TRACKING
  5126. FC->fc_mtu = __LINE__;
  5127. #endif
  5128. if (!IPSecHandlerPtr) {
  5129. FWSendComplete(Packet, Buffer, IP_SUCCESS);
  5130. return;
  5131. }
  5132. } else {
  5133. BR->br_buffer = Buffer;
  5134. BR->br_refcount = 0;
  5135. CTEInitLock(&BR->br_lock);
  5136. FC->fc_pc.pc_br = BR;
  5137. BR->br_userbuffer = 0;
  5138. }
  5139. if (IPSecHandlerPtr) {
  5140. Buffer = NDIS_BUFFER_LINKAGE(HBuffer);
  5141. //
  5142. // This is to ensure that options are freed appropriately.
  5143. // In the fragment code, the first fragment inherits the
  5144. // options of the entire packet; but these packets have
  5145. // no IPSEC context, hence cannot be freed appropriately.
  5146. // So, we allocate temporary options here and use these
  5147. // to represent the real options. These are freed when the
  5148. // first fragment is freed and the real options are freed here.
  5149. //
  5150. if (FC->fc_options) {
  5151. if (newBuf) {
  5152. //
  5153. // if a new buffer chain was returned above by IPSEC,
  5154. // then it is most prob. a tunnel => options were
  5155. // copied, hence get rid of ours.
  5156. //
  5157. NdisFreeBuffer(OptBuffer);
  5158. CTEFreeMem(FC->fc_options);
  5159. FC->fc_options = NULL;
  5160. FC->fc_optlength = 0;
  5161. } else {
  5162. Buffer = NDIS_BUFFER_LINKAGE(OptBuffer);
  5163. NdisFreeBuffer(OptBuffer);
  5164. }
  5165. FC->fc_pc.pc_common.pc_flags &= ~PACKET_FLAG_OPTIONS;
  5166. }
  5167. NDIS_BUFFER_LINKAGE(HBuffer) = NULL;
  5168. NdisReinitializePacket(Packet);
  5169. NdisChainBufferAtBack(Packet, HBuffer);
  5170. IPH->iph_xsum = 0;
  5171. //
  5172. // If the DF flag is set, make sure the packet doesn't need
  5173. // fragmentation. If this is the case, send an ICMP error
  5174. // now while we still have the original IP header. The ICMP
  5175. // message includes the MTU so the source host can perform
  5176. // Path MTU discovery.
  5177. //
  5178. // IPSEC headers might have caused this to happen.
  5179. // Send an ICMP to the source so he can adjust his MTU.
  5180. //
  5181. if (IPH->iph_offset & IP_DF_FLAG) {
  5182. IPSInfo.ipsi_fragfails++;
  5183. SendICMPIPSecErr(SrcAddr,
  5184. pSaveIPH,
  5185. ICMP_DEST_UNREACH,
  5186. FRAG_NEEDED,
  5187. net_long((ulong) (FC->fc_mtu - ipsecByteCount + sizeof(IPHeader))));
  5188. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"TransmitFWPacket: Sent ICMP frag_needed to %lx, from src: %lx\n", pSaveIPH->iph_src, SrcAddr));
  5189. // FreeIPpacket will do header fix up if
  5190. // original header chain was modified by ipsec/firewall/hdrincl
  5191. Status = IP_PACKET_TOO_BIG;
  5192. FreeIPPacket(Packet, TRUE, Status);
  5193. // Don't want to fall through and complete packet after
  5194. // we have freed it.
  5195. return;
  5196. } else {
  5197. if (BR == NULL) {
  5198. FreeIPPacket(Packet, TRUE, IP_NO_RESOURCES);
  5199. return;
  5200. }
  5201. //
  5202. // DF bit is not set, ok to fragment
  5203. //
  5204. if (FC->fc_iflink) {
  5205. ASSERT(FC->fc_if->if_flags & IF_FLAGS_P2MP);
  5206. ArpCtxt = FC->fc_iflink->link_arpctxt;
  5207. }
  5208. Status = IPFragment(FC->fc_if,
  5209. FC->fc_mtu - ipsecByteCount,
  5210. FC->fc_nexthop,
  5211. Packet,
  5212. FC->fc_hbuff,
  5213. Buffer,
  5214. DataLength,
  5215. FC->fc_options,
  5216. (uint) FC->fc_optlength,
  5217. (int *)NULL,
  5218. FALSE,
  5219. ArpCtxt);
  5220. //
  5221. // Fragmentation needed with the DF flag set should have
  5222. // been handled in IPForward. We don't have the original
  5223. // header any longer, so silently drop the packet.
  5224. //
  5225. ASSERT(Status != IP_PACKET_TOO_BIG);
  5226. }
  5227. } else {
  5228. //
  5229. // No IPSec handler. No need to check for DF bit here
  5230. // because unlike in the IPSec case, we are not messing
  5231. // with the MTUs so the DF check done in IPForwardPkt is
  5232. // valid
  5233. //
  5234. if (FC->fc_iflink) {
  5235. ASSERT(FC->fc_if->if_flags & IF_FLAGS_P2MP);
  5236. ArpCtxt = FC->fc_iflink->link_arpctxt;
  5237. }
  5238. Status = IPFragment(FC->fc_if,
  5239. FC->fc_mtu - ipsecByteCount,
  5240. FC->fc_nexthop,
  5241. Packet,
  5242. FC->fc_hbuff,
  5243. Buffer,
  5244. DataLength,
  5245. FC->fc_options,
  5246. (uint) FC->fc_optlength,
  5247. (int *)NULL,
  5248. FALSE,
  5249. ArpCtxt);
  5250. //
  5251. // Fragmentation needed with the DF flag set should have been
  5252. // handled in IPForward. We don't have the original header
  5253. // any longer, so silently drop the packet.
  5254. //
  5255. ASSERT(Status != IP_PACKET_TOO_BIG);
  5256. }
  5257. }
  5258. } else {
  5259. //
  5260. // Dest type is bcast
  5261. //
  5262. Status = SendIPBCast(FC->fc_srcnte,
  5263. FC->fc_nexthop,
  5264. Packet,
  5265. FC->fc_hbuff,
  5266. Buffer,
  5267. DataLength,
  5268. FC->fc_options,
  5269. (uint) FC->fc_optlength,
  5270. FC->fc_sos,
  5271. &FC->fc_index);
  5272. }
  5273. if (Status != IP_PENDING) {
  5274. #if MCAST_BUG_TRACKING
  5275. FC->fc_mtu = __LINE__;
  5276. #endif
  5277. FWSendComplete(Packet, Buffer, IP_SUCCESS);
  5278. }
  5279. }
  5280. //* SendFWPacket - Send a packet that needs to be forwarded.
  5281. //
  5282. // This routine is invoked when we actually get around to sending a packet.
  5283. // We look and see if we can give another queued send to the outgoing link,
  5284. // and if so we send on that link. Otherwise we put it on the outgoing queue
  5285. // and remove it later.
  5286. //
  5287. // Input: SrcNTE - Source NTE of packet.
  5288. // Packet - Packet to be send, containg all needed context info.
  5289. // Status - Status of transfer data.
  5290. // DataLength - Length in bytes of data to be send.
  5291. //
  5292. // Returns: Nothing.
  5293. //
  5294. void
  5295. SendFWPacket(PNDIS_PACKET Packet, NDIS_STATUS Status, uint DataLength)
  5296. {
  5297. FWContext *FC = (FWContext *) Packet->ProtocolReserved;
  5298. Interface *IF = FC->fc_if;
  5299. RouteSendQ *RSQ;
  5300. CTELockHandle Handle;
  5301. if (Status == NDIS_STATUS_SUCCESS) {
  5302. // Figure out which logical queue it belongs on, and if we don't already
  5303. // have too many things going there, send it. If we can't send it now we'll
  5304. // queue it for later.
  5305. if (IS_BCAST_DEST(FC->fc_dtype))
  5306. RSQ = BCastRSQ;
  5307. else
  5308. RSQ = &((RouteInterface *) IF)->ri_q;
  5309. CTEGetLock(&RSQ->rsq_lock, &Handle);
  5310. if ((RSQ->rsq_pending < RSQ->rsq_maxpending) && (RSQ->rsq_qlength == 0)) {
  5311. // We can send on this interface.
  5312. RSQ->rsq_pending++;
  5313. CTEFreeLock(&RSQ->rsq_lock, Handle);
  5314. TransmitFWPacket(Packet, DataLength);
  5315. } else { // Need to queue this packet for later.
  5316. FC->fc_datalength = DataLength;
  5317. FC->fc_q.fq_next = &RSQ->rsq_qh;
  5318. FC->fc_q.fq_prev = RSQ->rsq_qh.fq_prev;
  5319. RSQ->rsq_qh.fq_prev->fq_next = &FC->fc_q;
  5320. RSQ->rsq_qh.fq_prev = &FC->fc_q;
  5321. RSQ->rsq_qlength++;
  5322. CTEFreeLock(&RSQ->rsq_lock, Handle);
  5323. }
  5324. } else {
  5325. IPSInfo.ipsi_outdiscards++;
  5326. #if MCAST_BUG_TRACKING
  5327. FC->fc_mtu = __LINE__;
  5328. #endif
  5329. FreeFWPacket(Packet);
  5330. }
  5331. }
  5332. //* GetFWBufferChain - Get a buffer chain from our buffer pools
  5333. // sufficiently long enough to be able to copy DataLength bytes into it.
  5334. //
  5335. // Input: DataLength - Length in bytes that the buffer chain must be able
  5336. // to describe.
  5337. // Packet - Forwarding packet to link the buffer chain into.
  5338. // TailPointer - Returned pointer to the tail of the buffer chain.
  5339. //
  5340. // Returns: Pointer to the head of the buffer chain on success, NULL
  5341. // on failure.
  5342. //
  5343. PNDIS_BUFFER
  5344. GetFWBufferChain(uint DataLength, PNDIS_PACKET Packet,
  5345. PNDIS_BUFFER *TailPointer)
  5346. {
  5347. KIRQL OldIrql;
  5348. PNDIS_BUFFER Head, Tail, Mdl;
  5349. HANDLE PoolHandle;
  5350. PVOID Buffer;
  5351. uint Remaining, Length;
  5352. // Raise to dispatch level to make multiple calls to MdpAllocate
  5353. // more efficient. This is no less efficient in the single call case
  5354. // either.
  5355. //
  5356. #if !MILLEN
  5357. OldIrql = KeRaiseIrqlToDpcLevel();
  5358. #endif
  5359. // Loop allocating buffers until we have enough to describe DataLength.
  5360. //
  5361. Head = NULL;
  5362. Tail = NULL;
  5363. for (Remaining = DataLength; Remaining != 0; Remaining -= Length) {
  5364. // Figure out which buffer pool to use based on the length
  5365. // of data remaining. Use "large" buffers unless the remaining
  5366. // data will fit in a "small" buffer.
  5367. //
  5368. if (Remaining >= BUFSIZE_LARGE_POOL) {
  5369. PoolHandle = IpForwardLargePool;
  5370. Length = BUFSIZE_LARGE_POOL;
  5371. } else if (Remaining > BUFSIZE_SMALL_POOL) {
  5372. PoolHandle = IpForwardLargePool;
  5373. Length = Remaining;
  5374. } else {
  5375. PoolHandle = IpForwardSmallPool;
  5376. Length = Remaining;
  5377. }
  5378. // Allocate a buffer from the chosen pool and link it at the tail.
  5379. //
  5380. Mdl = MdpAllocateAtDpcLevel(PoolHandle, &Buffer);
  5381. if (Mdl) {
  5382. // Expect MdpAllocate to initialize Mdl->Next.
  5383. //
  5384. ASSERT(!Mdl->Next);
  5385. NdisAdjustBufferLength(Mdl, Length);
  5386. if (!Head) {
  5387. Head = Mdl;
  5388. } else {
  5389. Tail->Next = Mdl;
  5390. }
  5391. Tail = Mdl;
  5392. } else {
  5393. // Free what we allocated so far and quit the loop.
  5394. //
  5395. while (Head) {
  5396. Mdl = Head;
  5397. Head = Head->Next;
  5398. MdpFree(Mdl);
  5399. }
  5400. // Need to leave the loop with Head == NULL in the error
  5401. // case for the remaining logic to work correctly.
  5402. //
  5403. ASSERT(!Head);
  5404. break;
  5405. }
  5406. }
  5407. #if !MILLEN
  5408. KeLowerIrql(OldIrql);
  5409. #endif
  5410. // If we've succeeded, put the buffer chain in the packet and
  5411. // adjust our forwarding context.
  5412. //
  5413. if (Head) {
  5414. FWContext *FWC = (FWContext *)Packet->ProtocolReserved;
  5415. ASSERT(Tail);
  5416. NdisChainBufferAtFront(Packet, Head);
  5417. FWC->fc_buffhead = Head;
  5418. FWC->fc_bufftail = Tail;
  5419. *TailPointer = Tail;
  5420. }
  5421. return Head;
  5422. }
  5423. //* AllocateCopyBuffers - Get a buffer chain from our buffer pools
  5424. // sufficiently long enough to be able to copy DataLength bytes into it.
  5425. //
  5426. // Input: Packet - Forwarding packet to link the buffer chain into.
  5427. // DataLength - Length in bytes that the buffer chain must be able
  5428. // to describe.
  5429. // Head - Returned pointer to the head of the buffer chain.
  5430. // CountBuffers - Returned count of buffers in the chain.
  5431. //
  5432. // Returns: NDIS_STATUS_SUCCESS or NDIS_STATUS_RESOURCES
  5433. //
  5434. NDIS_STATUS
  5435. AllocateCopyBuffers(PNDIS_PACKET Packet, uint DataLength, PNDIS_BUFFER *Head,
  5436. uint *CountBuffers)
  5437. {
  5438. PNDIS_BUFFER Tail, Mdl;
  5439. uint Count = 0;
  5440. *Head = GetFWBufferChain(DataLength, Packet, &Tail);
  5441. if (*Head) {
  5442. for (Count = 1, Mdl = *Head; Mdl != Tail; Mdl = Mdl->Next, Count++);
  5443. *CountBuffers = Count;
  5444. return NDIS_STATUS_SUCCESS;
  5445. }
  5446. return NDIS_STATUS_RESOURCES;
  5447. }
  5448. //* GetFWBuffer - Get a list of buffers for forwarding.
  5449. //
  5450. // This routine gets a list of buffers for forwarding, and puts the data into
  5451. // it. This may involve calling TransferData, or we may be able to copy
  5452. // directly into them ourselves.
  5453. //
  5454. // Input: SrcNTE - Pointer to NTE on which packet was received.
  5455. // Packet - Packet being forwarded, used for TD.
  5456. // Data - Pointer to data buffer being forwarded.
  5457. // DataLength - Length in bytes of Data.
  5458. // BufferLength - Length in bytes available in buffer pointer to
  5459. // by Data.
  5460. // Offset - Offset into original data from which to transfer.
  5461. // LContext1, LContext2 - Context values for the link layer.
  5462. //
  5463. // Returns: NDIS_STATUS of attempt to get buffer.
  5464. //
  5465. NDIS_STATUS
  5466. GetFWBuffer(NetTableEntry * SrcNTE, PNDIS_PACKET Packet, uchar * Data,
  5467. uint DataLength, uint BufferLength, uint Offset,
  5468. NDIS_HANDLE LContext1, uint LContext2)
  5469. {
  5470. PNDIS_BUFFER FirstBuffer, CurrentBuffer;
  5471. void *DestPtr;
  5472. Interface *SrcIF;
  5473. uint FirewallMode = 0;
  5474. FirstBuffer = GetFWBufferChain(DataLength, Packet, &CurrentBuffer);
  5475. if (!FirstBuffer) {
  5476. return NDIS_STATUS_RESOURCES;
  5477. }
  5478. #if DBG
  5479. {
  5480. uint TotalBufferSize;
  5481. PNDIS_BUFFER TempBuffer;
  5482. // Sanity check the buffer chain and packet.
  5483. TempBuffer = FirstBuffer;
  5484. TotalBufferSize = 0;
  5485. while (TempBuffer != NULL) {
  5486. TotalBufferSize += NdisBufferLength(TempBuffer);
  5487. TempBuffer = NDIS_BUFFER_LINKAGE(TempBuffer);
  5488. }
  5489. ASSERT(TotalBufferSize == DataLength);
  5490. #pragma warning(push)
  5491. #pragma warning(disable:4127) // conditional expression is constant
  5492. NdisQueryPacket(Packet, NULL, NULL, NULL, &TotalBufferSize);
  5493. #pragma warning(pop)
  5494. ASSERT(TotalBufferSize == DataLength);
  5495. }
  5496. #endif
  5497. // First buffer points to the list of buffers we have. If we can copy the
  5498. // data here, do so, otherwise invoke the link's transfer data routine.
  5499. // if ((DataLength <= BufferLength) && (SrcNTE->nte_flags & NTE_COPY))
  5500. // change because of firewall
  5501. FirewallMode = ProcessFirewallQ();
  5502. // If DataLength is more than Lookahead size, we may need to
  5503. // call transfer data handler. If IpSec is enabled, make sure that this
  5504. // instance is not from loopback interface.
  5505. if (((DataLength <= BufferLength) && (SrcNTE->nte_flags & NTE_COPY)) ||
  5506. (FirewallMode) || (SrcNTE->nte_if->if_promiscuousmode) ||
  5507. ((SrcNTE != LoopNTE) && IPSecHandlerPtr &&
  5508. RefPtrValid(&FilterRefPtr))) {
  5509. while (DataLength) {
  5510. uint CopyLength;
  5511. TcpipQueryBuffer(FirstBuffer, &DestPtr, &CopyLength, NormalPagePriority);
  5512. if (DestPtr == NULL) {
  5513. return NDIS_STATUS_RESOURCES;
  5514. }
  5515. RtlCopyMemory(DestPtr, Data, CopyLength);
  5516. Data += CopyLength;
  5517. DataLength -= CopyLength;
  5518. FirstBuffer = NDIS_BUFFER_LINKAGE(FirstBuffer);
  5519. }
  5520. return NDIS_STATUS_SUCCESS;
  5521. }
  5522. // We need to call transfer data for this.
  5523. SrcIF = SrcNTE->nte_if;
  5524. return (*(SrcIF->if_transfer)) (SrcIF->if_lcontext, LContext1, LContext2,
  5525. Offset, DataLength, Packet, &DataLength);
  5526. }
  5527. //* GetFWPacket - Get a packet for forwarding.
  5528. //
  5529. // Called when we need to get a packet to forward a datagram.
  5530. //
  5531. // Input: ReturnedPacket - Pointer to where to return a packet.
  5532. //
  5533. // Returns: Pointer to IP header buffer.
  5534. //
  5535. IPHeader *
  5536. GetFWPacket(PNDIS_PACKET *ReturnedPacket)
  5537. {
  5538. PNDIS_PACKET Packet;
  5539. Packet = FwPacketAllocate(0, 0, 0);
  5540. if (Packet) {
  5541. FWContext *FWC = (FWContext *)Packet->ProtocolReserved;
  5542. PNDIS_PACKET_EXTENSION PktExt =
  5543. NDIS_PACKET_EXTENSION_FROM_PACKET(Packet);
  5544. #if MCAST_BUG_TRACKING
  5545. if (FWC->fc_pc.pc_common.pc_owner == PACKET_OWNER_IP) {
  5546. DbgPrint("Packet %x",Packet);
  5547. DbgBreakPoint();
  5548. }
  5549. FWC->fc_pc.pc_common.pc_owner = PACKET_OWNER_IP;
  5550. #else
  5551. ASSERT(FWC->fc_pc.pc_common.pc_owner == PACKET_OWNER_IP);
  5552. #endif
  5553. ASSERT(FWC->fc_hndisbuff);
  5554. ASSERT(FWC->fc_hbuff);
  5555. ASSERT(FWC->fc_pc.pc_pi == RtPI);
  5556. ASSERT(FWC->fc_pc.pc_context == Packet);
  5557. FWC->fc_pc.pc_common.pc_flags |= PACKET_FLAG_IPHDR;
  5558. FWC->fc_pc.pc_common.pc_IpsecCtx = NULL;
  5559. FWC->fc_pc.pc_br = NULL;
  5560. FWC->fc_pc.pc_ipsec_flags = 0;
  5561. PktExt = NDIS_PACKET_EXTENSION_FROM_PACKET(Packet);
  5562. PktExt->NdisPacketInfo[TcpIpChecksumPacketInfo] = NULL;
  5563. PktExt->NdisPacketInfo[IpSecPacketInfo] = NULL;
  5564. PktExt->NdisPacketInfo[TcpLargeSendPacketInfo] = NULL;
  5565. // Make sure that fwpackets cancel ids are initialized.
  5566. #if !MILLEN
  5567. NDIS_SET_PACKET_CANCEL_ID(Packet, NULL);
  5568. #endif
  5569. *ReturnedPacket = Packet;
  5570. return FWC->fc_hbuff;
  5571. }
  5572. return NULL;
  5573. }
  5574. //* IPForward / Forward a packet.
  5575. //
  5576. // The routine called when we need to forward a packet. We check if we're
  5577. // supposed to act as a gateway, and if we are and the incoming packet is a
  5578. // bcast we check and see if we're supposed to forward broadcasts. Assuming
  5579. // we're supposed to forward it, we will process any options. If we find some,
  5580. // we do some validation to make sure everything is good. After that, we look
  5581. // up the next hop. If we can't find one, we'll issue an error. Then we get
  5582. // a packet and buffers, and send it.
  5583. //
  5584. // Input: SrcNTE - NTE for net on which we received this.
  5585. // Header - Pointer to received IPheader.
  5586. // HeaderLength - Length of header.
  5587. // Data - Pointer to data to be forwarded.
  5588. // BufferLength - Length in bytes available in the buffer.
  5589. // LContext1 - lower-layer context supplied upon reception
  5590. // LContext2 - lower-layer context supplied upon reception
  5591. // DestType - Type of destination.
  5592. // MacHeadersize - Media header size
  5593. // pNdisBuffer - Pointer to NDIS_BUFFER describing the frame
  5594. // pClientCnt - Ndis return variable indicating
  5595. // if miniport buffer is pended
  5596. // LinkCtxt - contains per-link context for link-receptions
  5597. //
  5598. // Returns: Nothing.
  5599. //
  5600. void
  5601. IPForwardPkt(NetTableEntry *SrcNTE, IPHeader UNALIGNED *Header,
  5602. uint HeaderLength, void *Data, uint BufferLength,
  5603. NDIS_HANDLE LContext1, uint LContext2, uchar DestType,
  5604. uint MacHeaderSize, PNDIS_BUFFER pNdisBuffer, uint *pClientCnt,
  5605. LinkEntry *LinkCtxt)
  5606. {
  5607. uchar *Options;
  5608. uchar OptLength;
  5609. OptIndex Index;
  5610. IPAddr DestAddr; // IP address we're routing towards.
  5611. uchar SendOnSource = DisableSendOnSource;
  5612. IPAddr NextHop; // Next hop IP address.
  5613. PNDIS_PACKET Packet;
  5614. FWContext *FWC;
  5615. IPHeader *NewHeader; // New header.
  5616. NDIS_STATUS Status;
  5617. uint DataLength;
  5618. CTELockHandle TableHandle;
  5619. uchar ErrIndex;
  5620. IPAddr OutAddr; // Address of interface we're send out on.
  5621. Interface *IF; // Interface we're sending out on.
  5622. uint MTU;
  5623. BOOLEAN HoldPkt = TRUE;
  5624. RouteCacheEntry *FwdRce;
  5625. uint FirewallMode = 0;
  5626. void *ArpCtxt = NULL;
  5627. LinkEntry *Link = NULL;
  5628. DEBUGMSG(DBG_TRACE && DBG_FWD,
  5629. (DTEXT("IPForwardPkt(%x, %x, %d, %x, %d,...)\n"),
  5630. SrcNTE, Header, HeaderLength, Data, BufferLength));
  5631. if (ForwardPackets) {
  5632. DestAddr = Header->iph_dest;
  5633. // If it's a broadcast, see if we can forward it. We won't forward it if broadcast
  5634. // forwarding is turned off, or the destination if the local (all one's) broadcast,
  5635. // or it's a multicast (Class D address). We'll pass through subnet broadcasts in
  5636. // case there's a source route. This would be odd - maybe we should disable this?
  5637. if (IS_BCAST_DEST(DestType)) {
  5638. #if IPMCAST
  5639. if (((DestType == DEST_REM_MCAST) ||
  5640. (DestType == DEST_MCAST)) &&
  5641. (g_dwMcastState == MCAST_STARTED)) {
  5642. BOOLEAN Filter;
  5643. //
  5644. // Dont forward local groups
  5645. //
  5646. if (((Header->iph_dest & 0x00FFFFFF) == 0x000000E0) ||
  5647. (Header->iph_ttl <= 1) ||
  5648. !(SrcNTE->nte_if->if_mcastflags & IPMCAST_IF_ENABLED)) {
  5649. return;
  5650. }
  5651. if (pNdisBuffer) {
  5652. Filter = IPMForwardAfterRcvPkt(SrcNTE, Header, HeaderLength,
  5653. Data, BufferLength,
  5654. LContext1, LContext2,
  5655. DestType, MacHeaderSize,
  5656. pNdisBuffer, pClientCnt,
  5657. LinkCtxt);
  5658. } else {
  5659. Filter = IPMForwardAfterRcv(SrcNTE, Header, HeaderLength,
  5660. Data, BufferLength, LContext1,
  5661. LContext2, DestType, LinkCtxt);
  5662. }
  5663. if (Filter && RefPtrValid(&FilterRefPtr)) {
  5664. NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength);
  5665. }
  5666. return;
  5667. }
  5668. #endif
  5669. if (!ForwardBCast) {
  5670. if (DestType > DEST_REMOTE)
  5671. IPSInfo.ipsi_inaddrerrors++;
  5672. if (RefPtrValid(&FilterRefPtr)) {
  5673. NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength);
  5674. }
  5675. return;
  5676. }
  5677. if ((DestAddr == IP_LOCAL_BCST) ||
  5678. (DestAddr == IP_ZERO_BCST) ||
  5679. (DestType == DEST_SN_BCAST) ||
  5680. CLASSD_ADDR(DestAddr)) {
  5681. if (RefPtrValid(&FilterRefPtr)) {
  5682. NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength);
  5683. }
  5684. return;
  5685. }
  5686. // broad cast
  5687. HoldPkt = FALSE;
  5688. } else {
  5689. FirewallMode = ProcessFirewallQ();
  5690. if ((DestType == DEST_REMOTE) && (!FirewallMode)) {
  5691. NetTableEntry* OrigNTE = SrcNTE;
  5692. SrcNTE = BestNTEForIF(Header->iph_src, SrcNTE->nte_if, FALSE);
  5693. if (SrcNTE == NULL) {
  5694. // Something bad happened.
  5695. if (RefPtrValid(&FilterRefPtr)) {
  5696. NotifyFilterOfDiscard(OrigNTE, Header, Data,
  5697. BufferLength);
  5698. }
  5699. return;
  5700. }
  5701. }
  5702. }
  5703. // If the TTL would expire, send a message.
  5704. if (Header->iph_ttl <= 1) {
  5705. IPSInfo.ipsi_inhdrerrors++;
  5706. if (!RefPtrValid(&FilterRefPtr) ||
  5707. NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength)) {
  5708. SendICMPErr(SrcNTE->nte_addr, Header, ICMP_TIME_EXCEED,
  5709. TTL_IN_TRANSIT, 0, 0);
  5710. }
  5711. return;
  5712. }
  5713. DataLength = net_short(Header->iph_length) - HeaderLength;
  5714. Index.oi_srtype = NO_SR; // So we know we don't have a source route.
  5715. Index.oi_srindex = MAX_OPT_SIZE;
  5716. Index.oi_rrindex = MAX_OPT_SIZE;
  5717. Index.oi_tsindex = MAX_OPT_SIZE;
  5718. // Now check for options, and process any we find.
  5719. if (HeaderLength != sizeof(IPHeader)) {
  5720. IPOptInfo OptInfo;
  5721. RtlZeroMemory(&OptInfo, sizeof(OptInfo));
  5722. // Options and possible SR . No buffer ownership opt
  5723. HoldPkt = FALSE;
  5724. OptInfo.ioi_options = (uchar *) (Header + 1);
  5725. OptInfo.ioi_optlength = (uchar) (HeaderLength - sizeof(IPHeader));
  5726. // Validate options, and set up indices.
  5727. if ((ErrIndex = ParseRcvdOptions(&OptInfo, &Index)) < MAX_OPT_SIZE) {
  5728. IPSInfo.ipsi_inhdrerrors++;
  5729. if (!RefPtrValid(&FilterRefPtr) ||
  5730. NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength)) {
  5731. SendICMPErr(SrcNTE->nte_addr, Header, ICMP_PARAM_PROBLEM,
  5732. PTR_VALID, ((uint)ErrIndex + sizeof(IPHeader)), 0);
  5733. }
  5734. return;
  5735. }
  5736. // If source routing option was set, and source routing is disabled,
  5737. // then drop the packet.
  5738. if ((OptInfo.ioi_flags & IP_FLAG_SSRR) && DisableIPSourceRouting) {
  5739. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,"Pkt dropped - Source routing disabled\n"));
  5740. if (RefPtrValid(&FilterRefPtr)) {
  5741. NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength);
  5742. }
  5743. return;
  5744. }
  5745. Options = CTEAllocMemN(OptInfo.ioi_optlength, 'IiCT');
  5746. if (!Options) {
  5747. IPSInfo.ipsi_outdiscards++;
  5748. return; // Couldn't get an
  5749. } // option buffer, return;
  5750. // Now copy into our buffer.
  5751. RtlCopyMemory(Options, OptInfo.ioi_options, OptLength = OptInfo.ioi_optlength);
  5752. // See if we have a source routing option, and if so we may need to process it. If
  5753. // we have one, and the destination in the header is us, we need to update the
  5754. // route and the header.
  5755. if (Index.oi_srindex != MAX_OPT_SIZE) {
  5756. if (DestType >= DEST_REMOTE) { // Not for us.
  5757. if (Index.oi_srtype == IP_OPT_SSRR) {
  5758. // This packet is strict source routed, but we're not
  5759. // the destination! We can't continue from here -
  5760. // perhaps we should send an ICMP, but I'm not sure
  5761. // which one it would be.
  5762. CTEFreeMem(Options);
  5763. IPSInfo.ipsi_inaddrerrors++;
  5764. if (RefPtrValid(&FilterRefPtr)) {
  5765. NotifyFilterOfDiscard(SrcNTE, Header, Data,
  5766. BufferLength);
  5767. }
  5768. return;
  5769. }
  5770. Index.oi_srindex = MAX_OPT_SIZE; // Don't need to update this.
  5771. } else { // This came here, we need to update the destination address.
  5772. uchar *SROpt = Options + Index.oi_srindex;
  5773. uchar Pointer;
  5774. Pointer = SROpt[IP_OPT_PTR] - 1; // Index starts from one.
  5775. // Get the next hop address, and see if it's a broadcast.
  5776. DestAddr = *(IPAddr UNALIGNED *) & SROpt[Pointer];
  5777. DestType = GetAddrType(DestAddr); // Find address type.
  5778. if (IS_BCAST_DEST(DestType)) {
  5779. if (!RefPtrValid(&FilterRefPtr) ||
  5780. NotifyFilterOfDiscard(SrcNTE, Header, Data,
  5781. BufferLength)) {
  5782. SendICMPErr(SrcNTE->nte_addr, Header,
  5783. ICMP_DEST_UNREACH, SR_FAILED, 0, 0);
  5784. }
  5785. IPSInfo.ipsi_inhdrerrors++;
  5786. CTEFreeMem(Options);
  5787. return;
  5788. }
  5789. // If we came through here, any sort of broadcast needs
  5790. // to be sent out the way it came, so update that flag.
  5791. SendOnSource = EnableSendOnSource;
  5792. }
  5793. }
  5794. } else { // No options.
  5795. Options = (uchar *) NULL;
  5796. OptLength = 0;
  5797. }
  5798. IPSInfo.ipsi_forwdatagrams++;
  5799. // We've processed the options. Now look up the next hop. If we can't
  5800. // find one, send back an error.
  5801. IF = LookupForwardingNextHop(DestAddr, Header->iph_src, &NextHop, &MTU,
  5802. Header->iph_protocol, (uchar *) Data,
  5803. BufferLength, &FwdRce, &Link,
  5804. Header->iph_src);
  5805. if (IF == NULL) {
  5806. // Couldn't find an outgoing route.
  5807. IPSInfo.ipsi_outnoroutes++;
  5808. if (!RefPtrValid(&FilterRefPtr) ||
  5809. NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength)) {
  5810. SendICMPErr(SrcNTE->nte_addr, Header, ICMP_DEST_UNREACH,
  5811. HOST_UNREACH, 0, 0);
  5812. }
  5813. if (Options)
  5814. CTEFreeMem(Options);
  5815. return;
  5816. } else {
  5817. if (IF->if_flags & IF_FLAGS_P2MP) {
  5818. ASSERT(Link);
  5819. if (Link) {
  5820. ArpCtxt = Link->link_arpctxt;
  5821. }
  5822. }
  5823. }
  5824. //
  5825. // If the DF flag is set, make sure the packet doesn't need
  5826. // fragmentation. If this is the case, send an ICMP error
  5827. // now while we still have the original IP header. The ICMP
  5828. // message includes the MTU so the source host can perform
  5829. // Path MTU discovery.
  5830. //
  5831. if ((Header->iph_offset & IP_DF_FLAG) &&
  5832. ((DataLength + (uint) OptLength) > MTU)) {
  5833. ASSERT((MTU + sizeof(IPHeader)) >= 68);
  5834. ASSERT((MTU + sizeof(IPHeader)) <= 0xFFFF);
  5835. IPSInfo.ipsi_fragfails++;
  5836. if (!RefPtrValid(&FilterRefPtr) ||
  5837. NotifyFilterOfDiscard(SrcNTE, Header, Data, BufferLength)) {
  5838. SendICMPErr(SrcNTE->nte_addr, Header, ICMP_DEST_UNREACH,
  5839. FRAG_NEEDED,
  5840. net_long((ulong)(MTU + sizeof(IPHeader))), 0);
  5841. }
  5842. if (Options)
  5843. CTEFreeMem(Options);
  5844. if (Link) {
  5845. DerefLink(Link);
  5846. }
  5847. DerefIF(IF);
  5848. return;
  5849. }
  5850. if (DataLength > MTU) {
  5851. HoldPkt = FALSE;
  5852. }
  5853. // If there is no ipsec policy, it is safe to
  5854. // reuse the indicated mdl chain.
  5855. if (IPSecStatus) {
  5856. HoldPkt = FALSE;
  5857. }
  5858. // See if we need to filter this packet. If we do, call the filter routine
  5859. // to see if it's OK to forward it.
  5860. if (RefPtrValid(&FilterRefPtr)) {
  5861. Interface *InIF = SrcNTE->nte_if;
  5862. uint InIFIndex;
  5863. IPAddr InLinkNextHop;
  5864. IPAddr OutLinkNextHop;
  5865. FORWARD_ACTION Action;
  5866. IPPacketFilterPtr FilterPtr;
  5867. uint FirewallMode = 0;
  5868. FirewallMode = ProcessFirewallQ();
  5869. if (FirewallMode) {
  5870. InIFIndex = INVALID_IF_INDEX;
  5871. InLinkNextHop = NULL_IP_ADDR;
  5872. } else {
  5873. InIFIndex = InIF->if_index;
  5874. if ((InIF->if_flags & IF_FLAGS_P2MP) && LinkCtxt) {
  5875. InLinkNextHop = LinkCtxt->link_NextHop;
  5876. } else {
  5877. InLinkNextHop = NULL_IP_ADDR;
  5878. }
  5879. }
  5880. if ((IF->if_flags & IF_FLAGS_P2MP) && Link) {
  5881. OutLinkNextHop = Link->link_NextHop;
  5882. } else {
  5883. OutLinkNextHop = NULL_IP_ADDR;
  5884. }
  5885. FilterPtr = AcquireRefPtr(&FilterRefPtr);
  5886. Action = (*FilterPtr) (Header, Data, BufferLength,
  5887. InIFIndex, IF->if_index,
  5888. InLinkNextHop, OutLinkNextHop);
  5889. ReleaseRefPtr(&FilterRefPtr);
  5890. if (Action != FORWARD) {
  5891. IPSInfo.ipsi_outdiscards++;
  5892. if (Options)
  5893. CTEFreeMem(Options);
  5894. if (Link) {
  5895. DerefLink(Link);
  5896. }
  5897. DerefIF(IF);
  5898. #if FFP_SUPPORT
  5899. // Seed a -ve FFP entry; Packet henceforth dropped in NIC Driver
  5900. TCPTRACE(("Filter dropped a packet, Seeding -ve cache entry\n"));
  5901. IPSetInFFPCaches(Header, Data, BufferLength, (ULONG) FFP_DISCARD_PACKET);
  5902. #endif
  5903. return;
  5904. }
  5905. }
  5906. // If we have a strict source route and the next hop is not the one
  5907. // specified, send back an error.
  5908. if (Index.oi_srtype == IP_OPT_SSRR) {
  5909. if (DestAddr != NextHop) {
  5910. IPSInfo.ipsi_outnoroutes++;
  5911. SendICMPErr(SrcNTE->nte_addr, Header, ICMP_DEST_UNREACH,
  5912. SR_FAILED, 0, 0);
  5913. CTEFreeMem(Options);
  5914. if (Link) {
  5915. DerefLink(Link);
  5916. }
  5917. DerefIF(IF);
  5918. return;
  5919. }
  5920. }
  5921. // Update the options, if we can and we need to.
  5922. if ((DestType != DEST_BCAST) && Options != NULL) {
  5923. NetTableEntry *OutNTE;
  5924. // Need to find a valid source address for the outgoing interface.
  5925. CTEGetLock(&RouteTableLock.Lock, &TableHandle);
  5926. OutNTE = BestNTEForIF(DestAddr, IF, FALSE);
  5927. if (OutNTE == NULL) {
  5928. // No NTE for this IF. Something's wrong, just bail out.
  5929. CTEFreeLock(&RouteTableLock.Lock, TableHandle);
  5930. CTEFreeMem(Options);
  5931. if (Link) {
  5932. DerefLink(Link);
  5933. }
  5934. DerefIF(IF);
  5935. return;
  5936. } else {
  5937. OutAddr = OutNTE->nte_addr;
  5938. CTEFreeLock(&RouteTableLock.Lock, TableHandle);
  5939. }
  5940. ErrIndex = UpdateOptions(Options, &Index,
  5941. (IP_LOOPBACK(OutAddr) ? DestAddr : OutAddr));
  5942. if (ErrIndex != MAX_OPT_SIZE) {
  5943. IPSInfo.ipsi_inhdrerrors++;
  5944. SendICMPErr(OutAddr, Header, ICMP_PARAM_PROBLEM, PTR_VALID,
  5945. ((ulong) ErrIndex + sizeof(IPHeader)), 0);
  5946. CTEFreeMem(Options);
  5947. if (Link) {
  5948. DerefLink(Link);
  5949. }
  5950. DerefIF(IF);
  5951. return;
  5952. }
  5953. }
  5954. // Send a redirect, if we need to. We'll send a redirect if the packet
  5955. // is going out on the interface it came in on and the next hop address
  5956. // is on the same subnet as the NTE we received it on, and if there
  5957. // are no source route options. We also need to make sure that the
  5958. // source of the datagram is on the I/F we received it on, so we don't
  5959. // send a redirect to another gateway.
  5960. // SendICMPErr will check and not send a redirect if this is a broadcast.
  5961. if ((SrcNTE->nte_if == IF) &&
  5962. IP_ADDR_EQUAL(SrcNTE->nte_addr & SrcNTE->nte_mask,
  5963. NextHop & SrcNTE->nte_mask) &&
  5964. IP_ADDR_EQUAL(SrcNTE->nte_addr & SrcNTE->nte_mask,
  5965. Header->iph_src & SrcNTE->nte_mask)) {
  5966. if (Index.oi_srindex == MAX_OPT_SIZE) {
  5967. #ifdef REDIRECT_DEBUG
  5968. #define PR_IP_ADDR(x) \
  5969. ((x)&0x000000ff),(((x)&0x0000ff00)>>8),(((x)&0x00ff0000)>>16),(((x)&0xff000000)>>24)
  5970. DbgPrint("IP: Sending Redirect. IF = %x SRC_NTE = %x SrcNteIF = %x\n",
  5971. IF, SrcNTE, SrcNTE->nte_if);
  5972. DbgPrint("IP: SrcNteAddr = %d.%d.%d.%d Mask = %d.%d.%d.%d\n",
  5973. PR_IP_ADDR(SrcNTE->nte_addr), PR_IP_ADDR(SrcNTE->nte_mask));
  5974. DbgPrint("IP: NextHop = %d.%d.%d.%d Header Src = %d.%d.%d.%d, Dst = %d.%d.%d.%d\n",
  5975. PR_IP_ADDR(NextHop),
  5976. PR_IP_ADDR(Header->iph_src),
  5977. PR_IP_ADDR(Header->iph_dest));
  5978. #endif
  5979. SendICMPErr(SrcNTE->nte_addr, Header, ICMP_REDIRECT,
  5980. REDIRECT_HOST, NextHop, 0);
  5981. }
  5982. }
  5983. // We have the next hop. Now get a forwarding packet.
  5984. if ((NewHeader = GetFWPacket(&Packet)) != NULL) {
  5985. Packet->Private.Flags |= NDIS_PROTOCOL_ID_TCP_IP;
  5986. // Save the packet forwarding context info.
  5987. FWC = (FWContext *) Packet->ProtocolReserved;
  5988. FWC->fc_options = Options;
  5989. FWC->fc_optlength = OptLength;
  5990. FWC->fc_if = IF;
  5991. FWC->fc_mtu = MTU;
  5992. FWC->fc_srcnte = SrcNTE;
  5993. FWC->fc_nexthop = NextHop;
  5994. FWC->fc_sos = SendOnSource;
  5995. FWC->fc_dtype = DestType;
  5996. FWC->fc_index = Index;
  5997. FWC->fc_iflink = Link;
  5998. if (pNdisBuffer && HoldPkt &&
  5999. (NDIS_GET_PACKET_STATUS((PNDIS_PACKET) LContext1) != NDIS_STATUS_RESOURCES)) {
  6000. uint xsum;
  6001. DEBUGMSG(DBG_INFO && DBG_FWD,
  6002. (DTEXT("IPForwardPkt: bufown %x\n"), pNdisBuffer));
  6003. // Buffer transfer possible!
  6004. //ASSERT(LContext2 <= 8);
  6005. MacHeaderSize += LContext2;
  6006. // remember the original Packet and mac hdr size
  6007. FWC->fc_bufown = LContext1;
  6008. FWC->fc_MacHdrSize = MacHeaderSize;
  6009. //Munge ttl and xsum fields
  6010. Header->iph_ttl = Header->iph_ttl - 1;
  6011. xsum = Header->iph_xsum + 1;
  6012. //add carry
  6013. Header->iph_xsum = (ushort)(xsum + (xsum >> 16));
  6014. // Adjust incoming mdl pointer and counts
  6015. NdisAdjustBuffer(
  6016. pNdisBuffer,
  6017. (PCHAR) NdisBufferVirtualAddress(pNdisBuffer) + MacHeaderSize,
  6018. NdisBufferLength(pNdisBuffer) - MacHeaderSize);
  6019. //Now link this mdl to the packet
  6020. Packet->Private.Head = pNdisBuffer;
  6021. Packet->Private.Tail = pNdisBuffer;
  6022. Packet->Private.TotalLength = DataLength + HeaderLength;
  6023. Packet->Private.Count = 1;
  6024. // We never loopback the packet
  6025. // except if we are in promiscuous mode
  6026. if (!IF->if_promiscuousmode) {
  6027. NdisSetPacketFlags(Packet, NDIS_FLAGS_DONT_LOOPBACK);
  6028. }
  6029. Status = (*(IF->if_xmit)) (IF->if_lcontext, &Packet, 1,
  6030. NextHop, FwdRce, ArpCtxt);
  6031. DbgNumPktFwd++;
  6032. if (Status != NDIS_STATUS_PENDING) {
  6033. NdisAdjustBuffer(
  6034. pNdisBuffer,
  6035. (PCHAR) NdisBufferVirtualAddress(pNdisBuffer) - MacHeaderSize,
  6036. NdisBufferLength(pNdisBuffer) + MacHeaderSize);
  6037. Packet->Private.Head = NULL;
  6038. Packet->Private.Tail = NULL;
  6039. FWC->fc_bufown = NULL;
  6040. #if MCAST_BUG_TRACKING
  6041. FWC->fc_mtu = __LINE__;
  6042. #endif
  6043. FreeFWPacket(Packet);
  6044. *pClientCnt = 0;
  6045. } else {
  6046. //Okay, the xmit is pending indicate this to ndis.
  6047. *pClientCnt = 1;
  6048. }
  6049. return;
  6050. } else {
  6051. FWC->fc_bufown = NULL;
  6052. }
  6053. // Fill in the header in the forwarding context
  6054. NewHeader->iph_verlen = Header->iph_verlen;
  6055. NewHeader->iph_tos = Header->iph_tos;
  6056. NewHeader->iph_length = Header->iph_length;
  6057. NewHeader->iph_id = Header->iph_id;
  6058. NewHeader->iph_offset = Header->iph_offset;
  6059. NewHeader->iph_protocol = Header->iph_protocol;
  6060. NewHeader->iph_src = Header->iph_src;
  6061. NewHeader->iph_dest = DestAddr;
  6062. NewHeader->iph_ttl = Header->iph_ttl - 1;
  6063. NewHeader->iph_xsum = 0;
  6064. // Now that we have a packet, go ahead and transfer data the
  6065. // data in if we need to.
  6066. if (DataLength == 0) {
  6067. Status = NDIS_STATUS_SUCCESS;
  6068. } else {
  6069. Status = GetFWBuffer(SrcNTE, Packet, Data, DataLength,
  6070. BufferLength, HeaderLength, LContext1,
  6071. LContext2);
  6072. }
  6073. // If the status is pending, don't do anything now. Otherwise,
  6074. // if the status is success send the packet.
  6075. if (Status != NDIS_STATUS_PENDING)
  6076. if (Status == NDIS_STATUS_SUCCESS) {
  6077. if (!IF->if_promiscuousmode) {
  6078. NdisSetPacketFlags(Packet, NDIS_FLAGS_DONT_LOOPBACK);
  6079. }
  6080. SendFWPacket(Packet, Status, DataLength);
  6081. } else {
  6082. // Some sort of failure. Free the packet.
  6083. IPSInfo.ipsi_outdiscards++;
  6084. #if MCAST_BUG_TRACKING
  6085. FWC->fc_mtu = __LINE__;
  6086. #endif
  6087. FreeFWPacket(Packet);
  6088. }
  6089. } else { // Couldn't get a packet, so drop this.
  6090. DEBUGMSG(DBG_ERROR && DBG_FWD,
  6091. (DTEXT("IPForwardPkt: failed to get a forwarding packet!\n")));
  6092. IPSInfo.ipsi_outdiscards++;
  6093. if (Options)
  6094. CTEFreeMem(Options);
  6095. if (Link) {
  6096. DerefLink(Link);
  6097. }
  6098. DerefIF(IF);
  6099. }
  6100. } else { // Forward called, but forwarding turned off.
  6101. DEBUGMSG(DBG_WARN && DBG_FWD,
  6102. (DTEXT("IPForwardPkt: Forwarding called but is actually OFF.\n")));
  6103. if (DestType != DEST_BCAST && DestType != DEST_SN_BCAST) {
  6104. // No need to go through here for strictly broadcast packets,
  6105. // although we want to bump the counters for remote bcast stuff.
  6106. IPSInfo.ipsi_inaddrerrors++;
  6107. if (!IS_BCAST_DEST(DestType)) {
  6108. if (DestType == DEST_LOCAL) // Called when local, must be SR.
  6109. SendICMPErr(SrcNTE->nte_addr, Header,
  6110. ICMP_DEST_UNREACH, SR_FAILED, 0, 0);
  6111. }
  6112. }
  6113. }
  6114. }
  6115. //* AddNTERoutes - Add the routes for an NTE.
  6116. //
  6117. // Called during initalization or during DHCP address assignment to add
  6118. // routes. We add routes for the address of the NTE, including routes
  6119. // to the subnet and the address itself.
  6120. //
  6121. // Input: NTE - NTE for which to add routes.
  6122. //
  6123. // Returns: TRUE if they were all added, FALSE if not.
  6124. //
  6125. uint
  6126. AddNTERoutes(NetTableEntry * NTE)
  6127. {
  6128. IPMask Mask, SNMask;
  6129. Interface *IF;
  6130. CTELockHandle Handle;
  6131. IPAddr AllSNBCast;
  6132. IP_STATUS Status;
  6133. IPRouteNotifyOutput RNO = {0};
  6134. // First, add the route to the address itself. This is a route through
  6135. // the loopback interface.
  6136. #if DBG
  6137. IF_IPDBG(IP_DEBUG_ADDRESS)
  6138. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  6139. " AddNTE: Adding host route for %x\n", NTE->nte_addr));
  6140. #endif
  6141. IF = NTE->nte_if;
  6142. if (AddRoute(NTE->nte_addr, HOST_MASK, IPADDR_LOCAL, LoopNTE->nte_if,
  6143. LOOPBACK_MSS, IF->if_metric, IRE_PROTO_LOCAL, ATYPE_OVERRIDE,
  6144. 0, 0) != IP_SUCCESS)
  6145. return FALSE;
  6146. Mask = IPNetMask(NTE->nte_addr);
  6147. // Now add the route for the all-subnet's broadcast, if one doesn't already
  6148. // exist. There is special case code to handle this in SendIPBCast, so the
  6149. // exact interface we add this on doesn't really matter.
  6150. CTEGetLock(&RouteTableLock.Lock, &Handle);
  6151. AllSNBCast = (NTE->nte_addr & Mask) | (IF->if_bcast & ~Mask);
  6152. #if DBG
  6153. IF_IPDBG(IP_DEBUG_ADDRESS)
  6154. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  6155. " AddNTE: SNBCast address %x\n", AllSNBCast));
  6156. #endif
  6157. Status = LockedAddRoute(AllSNBCast, HOST_MASK, IPADDR_LOCAL, IF,
  6158. NTE->nte_mss, IF->if_metric, IRE_PROTO_LOCAL,
  6159. ATYPE_PERM, 0, FALSE, &RNO);
  6160. CTEFreeLock(&RouteTableLock.Lock, Handle);
  6161. if (Status != IP_SUCCESS) {
  6162. return FALSE;
  6163. } else if (RNO.irno_ifindex) {
  6164. RtChangeNotifyEx(&RNO);
  6165. RtChangeNotify(&RNO);
  6166. }
  6167. // If we're doing IGMP, add the route to the multicast address.
  6168. if (IGMPLevel != 0) {
  6169. #if DBG
  6170. IF_IPDBG(IP_DEBUG_ADDRESS)
  6171. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  6172. " AddNTE: Adding classD address\n"));
  6173. #endif
  6174. if (AddRoute(MCAST_DEST, CLASSD_MASK, IPADDR_LOCAL, NTE->nte_if,
  6175. NTE->nte_mss, IF->if_metric, IRE_PROTO_LOCAL, ATYPE_PERM,
  6176. 0, 0) != IP_SUCCESS)
  6177. return FALSE;
  6178. }
  6179. if (NTE->nte_mask != HOST_MASK) {
  6180. // And finally the route to the subnet.
  6181. SNMask = NTE->nte_mask;
  6182. #if DBG
  6183. IF_IPDBG(IP_DEBUG_ADDRESS)
  6184. KdPrintEx((DPFLTR_TCPIP_ID, DPFLTR_INFO_LEVEL,
  6185. " AddNTE: Adding subnet route %x\n",
  6186. NTE->nte_addr & SNMask));
  6187. #endif
  6188. if (AddRoute(NTE->nte_addr & SNMask, SNMask, IPADDR_LOCAL, NTE->nte_if,
  6189. NTE->nte_mss, IF->if_metric, IRE_PROTO_LOCAL, ATYPE_PERM,
  6190. 0, 0) != IP_SUCCESS)
  6191. return FALSE;
  6192. }
  6193. return TRUE;
  6194. }
  6195. //* DelNTERoutes - Add the routes for an NTE.
  6196. //
  6197. // Called when we receive media disconnect indication.
  6198. // routes.
  6199. //
  6200. // Input: NTE - NTE for which to delete routes.
  6201. //
  6202. // Returns: TRUE if they were all deleted, FALSE if not.
  6203. //
  6204. uint
  6205. DelNTERoutes(NetTableEntry * NTE)
  6206. {
  6207. IPMask SNMask;
  6208. uint retVal;
  6209. retVal = TRUE;
  6210. // First, delete the route to the address itself. This is a route through
  6211. // the loopback interface.
  6212. if (DeleteRoute(NTE->nte_addr, HOST_MASK, IPADDR_LOCAL, LoopNTE->nte_if, 0) != IP_SUCCESS)
  6213. retVal = FALSE;
  6214. // If we're doing IGMP, add the route to the multicast address.
  6215. if (IGMPLevel != 0) {
  6216. if (!(NTE->nte_flags & NTE_IF_DELETING) &&
  6217. (NTE->nte_if->if_ntecount == 0)) { // this is the last NTE on this if
  6218. if (DeleteRoute(MCAST_DEST, CLASSD_MASK, IPADDR_LOCAL, NTE->nte_if, 0) != IP_SUCCESS)
  6219. retVal = FALSE;
  6220. }
  6221. }
  6222. if (NTE->nte_mask != HOST_MASK) {
  6223. // And finally the route to the subnet.
  6224. // if there are no other NTEs on IF for the same subnet route
  6225. NetTableEntry *tmpNTE = NTE->nte_if->if_nte;
  6226. while (tmpNTE) {
  6227. if ((tmpNTE != NTE) && (tmpNTE->nte_flags & NTE_VALID) && ((tmpNTE->nte_addr & tmpNTE->nte_mask) == (NTE->nte_addr & NTE->nte_mask))) {
  6228. break;
  6229. }
  6230. tmpNTE = tmpNTE->nte_ifnext;
  6231. }
  6232. if (!tmpNTE) {
  6233. SNMask = NTE->nte_mask;
  6234. if (DeleteRoute(NTE->nte_addr & SNMask, SNMask, IPADDR_LOCAL, NTE->nte_if, 0) != IP_SUCCESS)
  6235. retVal = FALSE;
  6236. }
  6237. }
  6238. if (!(NTE->nte_flags & NTE_IF_DELETING)) {
  6239. Interface *IF = NTE->nte_if;
  6240. NetTableEntry *tmpNTE = IF->if_nte;
  6241. IPMask Mask;
  6242. IPAddr AllSNBCast;
  6243. Mask = IPNetMask(NTE->nte_addr);
  6244. AllSNBCast = (NTE->nte_addr & Mask) | (IF->if_bcast & ~Mask);
  6245. while (tmpNTE) {
  6246. IPMask tmpMask;
  6247. IPAddr tmpAllSNBCast;
  6248. tmpMask = IPNetMask(tmpNTE->nte_addr);
  6249. tmpAllSNBCast = (tmpNTE->nte_addr & tmpMask) | (IF->if_bcast & ~tmpMask);
  6250. if ((tmpNTE != NTE) && (tmpNTE->nte_flags & NTE_VALID) && IP_ADDR_EQUAL(AllSNBCast, tmpAllSNBCast)) {
  6251. break;
  6252. }
  6253. tmpNTE = tmpNTE->nte_ifnext;
  6254. }
  6255. if (!tmpNTE) {
  6256. // Delete the route for the all-subnet's broadcast.
  6257. if (DeleteRoute(AllSNBCast, HOST_MASK, IPADDR_LOCAL, IF, 0) != IP_SUCCESS)
  6258. retVal = FALSE;
  6259. }
  6260. }
  6261. return retVal;
  6262. }
  6263. //* DelIFRoutes - Delete the routes for an interface.
  6264. //
  6265. // Called when we receive media disconnect indication.
  6266. // routes.
  6267. //
  6268. // Input: IF - IF for which to delete routes.
  6269. //
  6270. // Returns: TRUE if they were all deleted, FALSE if not.
  6271. //
  6272. uint
  6273. DelIFRoutes(Interface * IF)
  6274. {
  6275. NetTableEntry *NTE;
  6276. uint i;
  6277. for (i = 0; i < NET_TABLE_SIZE; i++) {
  6278. NetTableEntry *NetTableList = NewNetTableList[i];
  6279. for (NTE = NetTableList; NTE != NULL; NTE = NTE->nte_next) {
  6280. if ((NTE->nte_flags & NTE_VALID) && NTE->nte_if == IF) {
  6281. // This guy is on the interface, and needs to be deleted.
  6282. if (!DelNTERoutes(NTE)) {
  6283. return FALSE;
  6284. }
  6285. }
  6286. }
  6287. }
  6288. return TRUE;
  6289. }
  6290. //* AddIFRoutes - Add the routes for an interface.
  6291. //
  6292. // Called when we receive media disconnect indication.
  6293. // routes.
  6294. //
  6295. // Input: IF - IF for which to Add routes.
  6296. //
  6297. // Returns: TRUE if they were all Added, FALSE if not.
  6298. //
  6299. uint
  6300. AddIFRoutes(Interface * IF)
  6301. {
  6302. NetTableEntry *NTE;
  6303. uint i;
  6304. for (i = 0; i < NET_TABLE_SIZE; i++) {
  6305. NetTableEntry *NetTableList = NewNetTableList[i];
  6306. for (NTE = NetTableList; NTE != NULL; NTE = NTE->nte_next) {
  6307. if ((NTE->nte_flags & NTE_VALID) && NTE->nte_if == IF) {
  6308. // This guy is on the interface, and needs to be added.
  6309. if (!AddNTERoutes(NTE)) {
  6310. return FALSE;
  6311. }
  6312. }
  6313. }
  6314. }
  6315. return TRUE;
  6316. }
  6317. #pragma BEGIN_INIT
  6318. uint BCastMinMTU = 0xffff;
  6319. //* InitNTERouting - do per NTE route initialization.
  6320. //
  6321. // Called when we need to initialize per-NTE routing. For the specified NTE,
  6322. // call AddNTERoutes to add a route for a net bcast, subnet bcast, and local
  6323. // attached subnet. The net bcast entry is sort of a filler - net and
  6324. // global bcasts are always handled specially. For this reason we specify
  6325. // the FirstInterface when adding the route. Subnet bcasts are assumed to
  6326. // only go out on one interface, so the actual interface to be used is
  6327. // specifed. If two interfaces are on the same subnet the last interface is
  6328. // the one that will be used.
  6329. //
  6330. // Input: NTE - NTE for which routing is to be initialized.
  6331. // NumGWs - Number of default gateways to add.
  6332. // GWList - List of default gateways.
  6333. // GWMetricList - the metric for each gateway.
  6334. //
  6335. // Returns: TRUE if we succeed, FALSE if we don't.
  6336. //
  6337. uint
  6338. InitNTERouting(NetTableEntry * NTE, uint NumGWs, IPAddr * GWList,
  6339. uint * GWMetricList)
  6340. {
  6341. uint i;
  6342. Interface *IF;
  6343. if (NTE != LoopNTE) {
  6344. BCastMinMTU = MIN(BCastMinMTU, NTE->nte_mss);
  6345. IF = NTE->nte_if;
  6346. AddRoute(IF->if_bcast, HOST_MASK, IPADDR_LOCAL, IF,
  6347. BCastMinMTU, 1, IRE_PROTO_LOCAL, ATYPE_OVERRIDE,
  6348. 0, 0); // Route for local
  6349. // bcast.
  6350. if (NTE->nte_flags & NTE_VALID) {
  6351. if (!AddNTERoutes(NTE))
  6352. return FALSE;
  6353. // Now add the default routes that are present on this net. We
  6354. // don't check for errors here, but we should probably
  6355. // log an error.
  6356. for (i = 0; i < NumGWs; i++) {
  6357. IPAddr GWAddr;
  6358. GWAddr = net_long(GWList[i]);
  6359. if (IP_ADDR_EQUAL(GWAddr, NTE->nte_addr)) {
  6360. GWAddr = IPADDR_LOCAL;
  6361. }
  6362. AddRoute(NULL_IP_ADDR, DEFAULT_MASK,
  6363. GWAddr, NTE->nte_if, NTE->nte_mss,
  6364. GWMetricList[i] ? GWMetricList[i] : IF->if_metric,
  6365. IRE_PROTO_NETMGMT, ATYPE_OVERRIDE, 0, 0);
  6366. }
  6367. }
  6368. }
  6369. return TRUE;
  6370. }
  6371. //* EnableRouter - enables forwarding.
  6372. //
  6373. // This routine configures this node to enable packet-forwarding.
  6374. // It must be called with the route table lock held.
  6375. //
  6376. // Entry:
  6377. //
  6378. // Returns: nothing.
  6379. //
  6380. void
  6381. EnableRouter()
  6382. {
  6383. RouterConfigured = TRUE;
  6384. ForwardBCast = FALSE;
  6385. ForwardPackets = TRUE;
  6386. }
  6387. //* DisableRouter - disables forwarding.
  6388. //
  6389. // This routine configures this node to disable packet-forwarding.
  6390. // It must be called with the route table lock held.
  6391. //
  6392. // Entry:
  6393. //
  6394. // Returns: nothing.
  6395. //
  6396. void
  6397. DisableRouter()
  6398. {
  6399. RouterConfigured = FALSE;
  6400. ForwardBCast = FALSE;
  6401. ForwardPackets = FALSE;
  6402. }
  6403. //* IPEnableRouterWithRefCount - acquires or releases a reference to forwarding
  6404. //
  6405. // This routine increments or decrements the reference-count on forwarding
  6406. // functionality. When the first reference is acquired, forwarding is enabled.
  6407. // When the last reference is released, forwarding is disabled.
  6408. // It must be called with the route table lock held.
  6409. //
  6410. // Entry: Enable - indicates whether to acquire or release a reference
  6411. //
  6412. // Return: the number of remaining references.
  6413. //
  6414. int
  6415. IPEnableRouterWithRefCount(LOGICAL Enable)
  6416. {
  6417. if (Enable) {
  6418. if (++IPEnableRouterRefCount == 1 && !RouterConfigured) {
  6419. EnableRouter();
  6420. }
  6421. } else {
  6422. if (--IPEnableRouterRefCount == 0 && RouterConfigured) {
  6423. DisableRouter();
  6424. }
  6425. }
  6426. return IPEnableRouterRefCount;
  6427. }
  6428. //* InitRouting - Initialize our routing table.
  6429. //
  6430. // Called during initialization to initialize the routing table.
  6431. //
  6432. // Entry: Nothing.
  6433. //
  6434. // Returns: True if we succeeded, False if we didn't.
  6435. //
  6436. int
  6437. InitRouting(IPConfigInfo * ci)
  6438. {
  6439. UINT initStatus;
  6440. ULONG initFlags;
  6441. CTEInitLock(&RouteTableLock.Lock);
  6442. InitRefPtr(&FilterRefPtr, &RouteTableLock.Lock, DummyFilterPtr);
  6443. InitRefPtr(&DODRefPtr, &RouteTableLock.Lock, DummyDODCallout);
  6444. DefGWConfigured = 0;
  6445. DefGWActive = 0;
  6446. RtlZeroMemory(&DummyInterface, sizeof(DummyInterface));
  6447. DummyInterface.ri_if.if_xmit = DummyXmit;
  6448. DummyInterface.ri_if.if_transfer = DummyXfer;
  6449. DummyInterface.ri_if.if_close = DummyClose;
  6450. DummyInterface.ri_if.if_invalidate = DummyInvalidate;
  6451. DummyInterface.ri_if.if_qinfo = DummyQInfo;
  6452. DummyInterface.ri_if.if_setinfo = DummySetInfo;
  6453. DummyInterface.ri_if.if_getelist = DummyGetEList;
  6454. DummyInterface.ri_if.if_addaddr = DummyAddAddr;
  6455. DummyInterface.ri_if.if_deladdr = DummyDelAddr;
  6456. DummyInterface.ri_if.if_dondisreq = DummyDoNdisReq;
  6457. DummyInterface.ri_if.if_bcast = IP_LOCAL_BCST;
  6458. DummyInterface.ri_if.if_speed = 10000000;
  6459. DummyInterface.ri_if.if_mtu = 1500;
  6460. DummyInterface.ri_if.if_index = INVALID_IF_INDEX;
  6461. LOCKED_REFERENCE_IF(&DummyInterface.ri_if);
  6462. DummyInterface.ri_if.if_pnpcontext = 0;
  6463. initFlags = ci->ici_fastroutelookup ? TFLAG_FAST_TRIE_ENABLED : 0;
  6464. if ((initStatus = InitRouteTable(initFlags,
  6465. ci->ici_fastlookuplevels,
  6466. ci->ici_maxfastlookupmemory,
  6467. ci->ici_maxnormlookupmemory))
  6468. != STATUS_SUCCESS) {
  6469. TCPTRACE(("Init Route Table Failed: %08x\n", initStatus));
  6470. return FALSE;
  6471. }
  6472. // We've created at least one net. We need to add routing table entries for
  6473. // the global broadcast address, as well as for subnet and net broadcasts,
  6474. // and routing entries for the local subnet. We alse need to add a loopback
  6475. // route for the loopback net. Below, we'll add a host route for ourselves
  6476. // through the loopback net.
  6477. AddRoute(LOOPBACK_ADDR & CLASSA_MASK, CLASSA_MASK, IPADDR_LOCAL,
  6478. LoopNTE->nte_if, LOOPBACK_MSS, 1, IRE_PROTO_LOCAL, ATYPE_PERM,
  6479. 0, 0);
  6480. // Route for loopback.
  6481. if ((uchar) ci->ici_gateway) {
  6482. EnableRouter();
  6483. }
  6484. CTEInitTimer(&IPRouteTimer);
  6485. RouteTimerTicks = 0;
  6486. #if FFP_SUPPORT
  6487. FFPFlushRequired = FALSE;
  6488. #endif
  6489. FlushIFTimerTicks = 0;
  6490. CTEStartTimer(&IPRouteTimer, IP_ROUTE_TIMEOUT, IPRouteTimeout, NULL);
  6491. return TRUE;
  6492. }
  6493. PVOID
  6494. NTAPI
  6495. FwPacketAllocate (
  6496. IN POOL_TYPE PoolType,
  6497. IN SIZE_T NumberOfBytes,
  6498. IN ULONG Tag
  6499. )
  6500. {
  6501. NDIS_STATUS Status;
  6502. PNDIS_PACKET Packet;
  6503. UNREFERENCED_PARAMETER(PoolType);
  6504. UNREFERENCED_PARAMETER(NumberOfBytes);
  6505. UNREFERENCED_PARAMETER(Tag);
  6506. // Get a packet from our forwarding packet pool.
  6507. //
  6508. NdisAllocatePacket(&Status, &Packet, IpForwardPacketPool);
  6509. if (Status == NDIS_STATUS_SUCCESS) {
  6510. PNDIS_BUFFER Buffer;
  6511. IPHeader *Header;
  6512. // Get an IP header buffer from our IP header pool.
  6513. //
  6514. Buffer = MdpAllocate(IpHeaderPool, &Header);
  6515. if (Buffer) {
  6516. FWContext *FWC = (FWContext *)Packet->ProtocolReserved;
  6517. // Intialize the fowarding context area of the packet.
  6518. //
  6519. RtlZeroMemory(FWC, sizeof(FWContext));
  6520. FWC->fc_hndisbuff = Buffer;
  6521. FWC->fc_hbuff = Header;
  6522. FWC->fc_pc.pc_common.pc_flags = PACKET_FLAG_FW | PACKET_FLAG_IPHDR;
  6523. #if MCAST_BUG_TRACKING
  6524. FWC->fc_pc.pc_common.pc_owner = 0;
  6525. #else
  6526. FWC->fc_pc.pc_common.pc_owner = PACKET_OWNER_IP;
  6527. #endif
  6528. FWC->fc_pc.pc_pi = RtPI;
  6529. FWC->fc_pc.pc_context = Packet;
  6530. return Packet;
  6531. }
  6532. NdisFreePacket(Packet);
  6533. }
  6534. return NULL;
  6535. }
  6536. VOID
  6537. NTAPI
  6538. FwPacketFree (
  6539. IN PVOID Buffer
  6540. )
  6541. {
  6542. PNDIS_PACKET Packet = (PNDIS_PACKET)Buffer;
  6543. FWContext *FWC = (FWContext *)Packet->ProtocolReserved;
  6544. // Return any IP header to its pool.
  6545. //
  6546. if (FWC->fc_hndisbuff) {
  6547. MdpFree(FWC->fc_hndisbuff);
  6548. }
  6549. NdisFreePacket(Packet);
  6550. }
  6551. //* InitForwardingPools - Initialize the packet and buffer pools used
  6552. // for forwarding operations.
  6553. //
  6554. // Returns: TRUE if the operations succeeded.
  6555. //
  6556. BOOLEAN InitForwardingPools()
  6557. {
  6558. NDIS_STATUS Status;
  6559. // Create our "large" forwarding buffer pool.
  6560. //
  6561. IpForwardLargePool = MdpCreatePool(BUFSIZE_LARGE_POOL, 'lfCT');
  6562. if (!IpForwardLargePool) {
  6563. return FALSE;
  6564. }
  6565. // Create our "small" forwarding buffer pool.
  6566. //
  6567. IpForwardSmallPool = MdpCreatePool(BUFSIZE_SMALL_POOL, 'sfCT');
  6568. if (!IpForwardSmallPool) {
  6569. MdpDestroyPool(IpForwardLargePool);
  6570. IpForwardLargePool = NULL;
  6571. return FALSE;
  6572. }
  6573. // Create our forwarding packet pool.
  6574. //
  6575. NdisAllocatePacketPoolEx(&Status, &IpForwardPacketPool,
  6576. PACKET_POOL_SIZE, 0, sizeof(FWContext));
  6577. if (Status != NDIS_STATUS_SUCCESS) {
  6578. MdpDestroyPool(IpForwardSmallPool);
  6579. IpForwardSmallPool = NULL;
  6580. MdpDestroyPool(IpForwardLargePool);
  6581. IpForwardLargePool = NULL;
  6582. return FALSE;
  6583. }
  6584. NdisSetPacketPoolProtocolId(IpForwardPacketPool, NDIS_PROTOCOL_ID_TCP_IP);
  6585. return TRUE;
  6586. }
  6587. //* InitGateway - Initialize our gateway functionality.
  6588. //
  6589. // Called during init. time to initialize our gateway functionality. If we're
  6590. // not connfigured as a router, we do nothing. If we are, we allocate the
  6591. // resources we need and do other router initialization.
  6592. //
  6593. // Input: ci - Config info.
  6594. //
  6595. // Returns: TRUE if we succeed, FALSE if don't.
  6596. //
  6597. uint
  6598. InitGateway(IPConfigInfo * ci)
  6599. {
  6600. IPHeader *HeaderPtr = NULL;
  6601. uchar *FWBuffer = NULL;
  6602. RouteInterface *RtIF;
  6603. NetTableEntry *NTE;
  6604. uint i;
  6605. // If we're going to be a router, allocate and initialize the resources we
  6606. // need for that.
  6607. BCastRSQ = NULL;
  6608. RtPI = CTEAllocMemNBoot(sizeof(ProtInfo), 'JiCT');
  6609. if (RtPI == (ProtInfo *) NULL)
  6610. goto failure;
  6611. RtPI->pi_xmitdone = FWSendComplete;
  6612. for (i = 0; i < NET_TABLE_SIZE; i++) {
  6613. NetTableEntry *NetTableList = NewNetTableList[i];
  6614. for (NTE = NetTableList; NTE != NULL; NTE = NTE->nte_next) {
  6615. RtIF = (RouteInterface *) NTE->nte_if;
  6616. RtIF->ri_q.rsq_qh.fq_next = &RtIF->ri_q.rsq_qh;
  6617. RtIF->ri_q.rsq_qh.fq_prev = &RtIF->ri_q.rsq_qh;
  6618. RtIF->ri_q.rsq_running = FALSE;
  6619. RtIF->ri_q.rsq_pending = 0;
  6620. RtIF->ri_q.rsq_qlength = 0;
  6621. CTEInitLock(&RtIF->ri_q.rsq_lock);
  6622. }
  6623. }
  6624. BCastRSQ = CTEAllocMemNBoot(sizeof(RouteSendQ), 'KiCT');
  6625. if (BCastRSQ == (RouteSendQ *) NULL)
  6626. goto failure;
  6627. BCastRSQ->rsq_qh.fq_next = &BCastRSQ->rsq_qh;
  6628. BCastRSQ->rsq_qh.fq_prev = &BCastRSQ->rsq_qh;
  6629. BCastRSQ->rsq_pending = 0;
  6630. BCastRSQ->rsq_maxpending = DEFAULT_MAX_PENDING;
  6631. BCastRSQ->rsq_qlength = 0;
  6632. BCastRSQ->rsq_running = FALSE;
  6633. CTEInitLock(&BCastRSQ->rsq_lock);
  6634. RtIF = (RouteInterface *) &LoopInterface;
  6635. RtIF->ri_q.rsq_maxpending = DEFAULT_MAX_PENDING;
  6636. if (!InitForwardingPools()) {
  6637. goto failure;
  6638. }
  6639. return TRUE;
  6640. failure:
  6641. if (RtPI != NULL)
  6642. CTEFreeMem(RtPI);
  6643. if (BCastRSQ != NULL)
  6644. CTEFreeMem(BCastRSQ);
  6645. if (HeaderPtr != NULL)
  6646. CTEFreeMem(HeaderPtr);
  6647. if (FWBuffer != NULL)
  6648. CTEFreeMem(FWBuffer);
  6649. ForwardBCast = FALSE;
  6650. ForwardPackets = FALSE;
  6651. RouterConfigured = FALSE;
  6652. IPEnableRouterRefCount = (ci->ici_gateway ? 1 : 0);
  6653. return FALSE;
  6654. }
  6655. NTSTATUS
  6656. GetIFAndLink(void *Rce, ULONG * IFIndex, IPAddr * NextHop)
  6657. {
  6658. RouteTableEntry *RTE = NULL;
  6659. RouteCacheEntry *RCE = (RouteCacheEntry *) Rce;
  6660. Interface *IF;
  6661. KIRQL rtlIrql;
  6662. CTEGetLock(&RouteTableLock.Lock, &rtlIrql);
  6663. if (RCE && (RCE->rce_flags & RCE_VALID) &&
  6664. !(RCE->rce_flags & RCE_LINK_DELETED))
  6665. RTE = RCE->rce_rte;
  6666. if (RTE) {
  6667. if ((IF = IF_FROM_RTE(RTE)) == NULL) {
  6668. CTEFreeLock(&RouteTableLock.Lock, rtlIrql);
  6669. return IP_GENERAL_FAILURE;
  6670. }
  6671. *IFIndex = IF->if_index;
  6672. if (RTE->rte_link) {
  6673. ASSERT(IF->if_flags & IF_FLAGS_P2MP);
  6674. *NextHop = RTE->rte_link->link_NextHop;
  6675. } else
  6676. *NextHop = NULL_IP_ADDR;
  6677. CTEFreeLock(&RouteTableLock.Lock, rtlIrql);
  6678. return IP_SUCCESS;
  6679. }
  6680. CTEFreeLock(&RouteTableLock.Lock, rtlIrql);
  6681. return IP_GENERAL_FAILURE;
  6682. }
  6683. #pragma END_INIT