Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

560 lines
18 KiB

  1. // -*- mode: C++; tab-width: 4; indent-tabs-mode: nil -*- (for GNU Emacs)
  2. //
  3. // Copyright (c) 1998-2000 Microsoft Corporation
  4. //
  5. // This file is part of the Microsoft Research IPv6 Network Protocol Stack.
  6. // You should have received a copy of the Microsoft End-User License Agreement
  7. // for this software along with this release; see the file "license.txt".
  8. // If not, please see http://www.research.microsoft.com/msripv6/license.htm,
  9. // or write to Microsoft Research, One Microsoft Way, Redmond, WA 98052-6399.
  10. //
  11. // Abstract:
  12. //
  13. // Routing code external definitions for Internet Protocol Version 6.
  14. //
  15. #ifndef ROUTE_INCLUDED
  16. #define ROUTE_INCLUDED 1
  17. #ifndef IPINFO_INCLUDED
  18. # include <ipinfo.h>
  19. #endif
  20. typedef struct BindingCacheEntry BindingCacheEntry;
  21. typedef struct RouteTableEntry RouteTableEntry;
  22. typedef struct SitePrefixEntry SitePrefixEntry;
  23. extern void InitRouting(void);
  24. extern void UnloadRouting(void);
  25. //
  26. // Structure of a route cache entry.
  27. //
  28. // A route cache entry (RCE) primarily caches two computations:
  29. // next-hop determination and source address selection.
  30. // An RCE also caches other information related to the destination,
  31. // like path MTU.
  32. //
  33. // An RCE can also be created as a result of receiving an Redirect
  34. // ICMP message.
  35. //
  36. // There is at most one RCE per destination address / interface pair.
  37. // Our route cache corresponds to the destination cache
  38. // mentioned in RFC 1970's conceptual data structures,
  39. // with the addition of support for multi-homed nodes.
  40. //
  41. // The primary lookup key for RCEs is the destination address.
  42. // The current implementation just searches a list of all RCEs,
  43. // but a hash table or tree data structure would be preferable.
  44. //
  45. // Some nodes (like busy servers) might have many thousands of RCEs
  46. // but only tens of NCEs, because most destinations are reached
  47. // through only a few neighbor routers. Some nodes (like busy routers)
  48. // will have relatively few RCEs and hundreds of NCEs, because
  49. // forwarding does not use an RCE.
  50. //
  51. // The three major components of an RCE are the destination address,
  52. // NTE (indicates both the interface, and the best source address
  53. // on that interface to use for this destination), and NCE
  54. // (neighbor to which to send packets for this destination).
  55. //
  56. // Once an RCE is created, these three components are read-only
  57. // and anyone who holds a reference for the RCE can rely on
  58. // them not changing. The RCE holds references for the NTE and NCE.
  59. // This allows code that holds an RCE to access the important
  60. // fields without acquiring any locks. Fields like the path MTU
  61. // can also be safely read without a lock.
  62. //
  63. // When an RCE becomes invalid, it is removed from the route cache
  64. // but it is not deallocated until it has zero references.
  65. // The route cache itself holds one reference on RCEs in the cache.
  66. //
  67. // Because an RCE caches the result of two computations, RCEs can
  68. // become invalid (stale) for two reasons: the preferred source
  69. // address should be recomputed, or the next-hop neighbor should be
  70. // recomputed.
  71. //
  72. // Source addresses need to be recomputed or checked when the NTEs
  73. // on the RCE's interface change state - for example a new address
  74. // is created, a preferred address becomes deprecated, etc.
  75. // In practice, these should be relatively infrequent situations.
  76. //
  77. // Next-hop determination needs to be redone in several situations:
  78. // a neighbor is not reachable, a neighbor stops being a router,
  79. // a route in the routing table is removed or added, etc.
  80. // Again, these should be relatively infrequent situations.
  81. //
  82. // To avoid undue time & memory overheads (for example maintaining
  83. // a linked list of all RCEs that point to an NCE and a linked list
  84. // of all RCEs on a given interface, so that the right RCEs can
  85. // be immediately found when something changes), we use a "lazy" approach
  86. // based on a validation counter.
  87. //
  88. // There is a single global validation counter and when any state
  89. // changes that might potentially invalidate an RCE, this counter
  90. // is incremented. Each RCE has a snapshot of the counter that
  91. // can be quickly checked to validate the RCE.
  92. //
  93. // If the RCE is invalid, then it's contents (best source address,
  94. // next hop neighbor) are recomputed. If they are still correct,
  95. // then the RCE's validation counter snapshot is updated.
  96. // Otherwise the RCE's contents are updated (if nobody is using the RCE)
  97. // or a new RCE is created and the invalid RCE is removed from the cache.
  98. // Because the important fields in an RCE are read-only,
  99. // an RCE can only be updated in-place if it has no external references.
  100. //
  101. // For efficiency, some code may cache an RCE reference for a "long"
  102. // time, for example in a connection control block. Before using
  103. // the cached RCE, such code should check the invalidation counter
  104. // to ensure that the RCE is still valid. The ValidateRCE function
  105. // performs this check.
  106. //
  107. // Some RCEs are "constrained" (RCE_FLAG_CONSTRAINED). This means
  108. // that they can only be found in RouteToDestination if the caller
  109. // explicitly specifies an outgoing interface (RCE_FLAG_CONSTRAINED_IF)
  110. // or scopeid (RCE_FLAG_CONSTRAINED_SCOPEID). Consider
  111. // a multi-homed node which can reach a destination via two interfaces,
  112. // one of which is preferred (has a longer-matching-prefix route)
  113. // over the other. An RCE for reaching the destination via the non-preferred
  114. // interface will be marked as "constrained", to prevent its use
  115. // when RouteToDestination is called without a constraining NTEorIF.
  116. //
  117. // Because specifying an interface implicitly specifies a scopeid,
  118. // RCEs with RCE_FLAG_CONSTRAINED_IF also have RCE_FLAG_CONSTRAINED_SCOPEID.
  119. //
  120. // For a given destination address, all or all but one RCE for that
  121. // destination should be "constrained". Or put another way, at most one RCE
  122. // should not be "constrained". Or put another way, a destination address
  123. // sans scopeid can only have one preferred outgoing interface.
  124. // For a destination address / scopeid pair, all or all but one RCE
  125. // for that pair should be "interface constrained".
  126. //
  127. // The BCE field is non-NULL if this is a home address.
  128. // It does not hold a reference (Binding Cache Entries are not refcounted)
  129. // and it can only be non-NULL if the RCE is in the cache.
  130. // Access to the BCE field requires the route cache lock.
  131. //
  132. struct RouteCacheEntry {
  133. RouteCacheEntry *Next; // Next RCE in cache list.
  134. RouteCacheEntry *Prev; // Previous entry in cache list.
  135. long RefCnt;
  136. ushort Flags; // Peculiarities about this entry.
  137. ushort Type; // See below.
  138. ulong Valid; // Validation counter value.
  139. IPv6Addr Destination; // Where this route is to.
  140. struct NetTableEntry *NTE; // Preferred source address/interface.
  141. NeighborCacheEntry *NCE; // First-hop neighbor.
  142. uint LastError; // Time of last ICMP error (IPv6 ticks).
  143. uint PathMTU; // MTU of path to destination.
  144. uint PMTULastSet; // Time of last PMTU reduction.
  145. BindingCacheEntry *BCE; // If this is a home address.
  146. };
  147. //
  148. // These flag bits indicate whether the IF or ScopeId arguments
  149. // to FindOrCreateRoute affected the choice of RCE.
  150. // NB: FindOrCreateRoute assumes that these are the only flag bits.
  151. //
  152. #define RCE_FLAG_CONSTRAINED_IF 0x1
  153. #define RCE_FLAG_CONSTRAINED_SCOPEID 0x2
  154. #define RCE_FLAG_CONSTRAINED 0x3
  155. #define RCE_TYPE_COMPUTED 1
  156. #define RCE_TYPE_REDIRECT 2
  157. __inline void
  158. AddRefRCE(RouteCacheEntry *RCE)
  159. {
  160. InterlockedIncrement(&RCE->RefCnt);
  161. }
  162. extern ulong RouteCacheValidationCounter;
  163. __inline void
  164. InvalidateRouteCache(void)
  165. {
  166. InterlockedIncrement((PLONG)&RouteCacheValidationCounter);
  167. }
  168. __inline void
  169. InvalidateRCE(RouteCacheEntry *RCE)
  170. {
  171. InterlockedDecrement((PLONG)&RCE->Valid);
  172. }
  173. //
  174. // Structure of an entry in the route table.
  175. //
  176. // SitePrefixLength and PreferredLifetime
  177. // are only used when generating a Prefix Information Option
  178. // based on the route.
  179. //
  180. // If the route is published, then it does not disappear
  181. // even when the lifetime goes to zero. It is still present
  182. // for use in generating Router Advertisements.
  183. // But it doesn't get used for routing.
  184. // Similarly, system routes (RTE_TYPE_SYSTEM) are kept
  185. // in the route table even when their lifetime is zero.
  186. // This allows a loopback route to be allocated for an NTE/AAE
  187. // up front, but not be enabled until the address is valid.
  188. //
  189. struct RouteTableEntry {
  190. struct RouteTableEntry *Next; // Next entry on prefix list.
  191. Interface *IF; // Relevant interface.
  192. NeighborCacheEntry *NCE; // Next-hop neighbor (may be NULL).
  193. IPv6Addr Prefix; // Prefix (note not all bits are valid!).
  194. uint PrefixLength; // Number of bits in above to use as prefix.
  195. uint SitePrefixLength; // If non-zero, indicates a site subprefix.
  196. uint ValidLifetime; // In ticks.
  197. uint PreferredLifetime; // In ticks.
  198. uint Preference; // Smaller is better.
  199. ushort Flags;
  200. ushort Type;
  201. };
  202. //
  203. // The Type field indicates where the route came from.
  204. // These are RFC 2465 ipv6RouteProtocol values.
  205. // Routing protocols are free to define new values.
  206. // Only these three values are built-in.
  207. // ntddip6.h also defines these values, as well as others.
  208. //
  209. #define RTE_TYPE_SYSTEM 2
  210. #define RTE_TYPE_MANUAL 3
  211. #define RTE_TYPE_AUTOCONF 4
  212. __inline int
  213. IsValidRouteTableType(uint Type)
  214. {
  215. return Type < (1 << 16);
  216. }
  217. //
  218. // If the NCE is NULL, then the RTE specifies an on-link prefix.
  219. // Otherwise the RTE specifies a route to the neighbor.
  220. // As you would expect, generally the neighbor is on the interface.
  221. // Loopback routes are an exception.
  222. //
  223. // The PUBLISH bit indicates that the RTE can be visible
  224. // to RouterAdvertSend. That is, it is a "public" route.
  225. // The IMMORTAL bit indicates that the RTE's lifetime
  226. // does not age or countdown. It is useful in PUBLISHed RTEs,
  227. // where the RTE's lifetime affects the lifetime in RAs.
  228. // In non-PUBLISHed RTEs it is equivalent to an infinite lifetime.
  229. //
  230. #define RTE_FLAG_PUBLISH 0x00000001 // Used to create RAs.
  231. #define RTE_FLAG_IMMORTAL 0x00000002 // Lifetime does not decrease.
  232. //
  233. // These values are also defined in ntddip6.h.
  234. // Zero preference is reserved for administrative configuration.
  235. // Smaller is more preferred than larger.
  236. // We call these numbers preferences instead of metrics
  237. // in an attempt to prevent confusion with the metrics
  238. // employed by routing protocols. Routing protocol metrics
  239. // need to be mapped into our routing table preferences.
  240. // The largest preference value is 2^31-1, so that
  241. // we can add a route preference and an interface preference
  242. // without overflow.
  243. //
  244. #define ROUTE_PREF_LOW (16*16*16)
  245. #define ROUTE_PREF_MEDIUM (16*16)
  246. #define ROUTE_PREF_HIGH 16
  247. #define ROUTE_PREF_ON_LINK 8
  248. #define ROUTE_PREF_LOOPBACK 4
  249. #define ROUTE_PREF_HIGHEST 0
  250. //
  251. // Extract a route preference value
  252. // from the Flags field in a Router Advertisement.
  253. //
  254. __inline int
  255. ExtractRoutePreference(uchar Flags)
  256. {
  257. switch (Flags & 0x18) {
  258. case 0x08:
  259. return ROUTE_PREF_HIGH;
  260. case 0x00:
  261. return ROUTE_PREF_MEDIUM;
  262. case 0x18:
  263. return ROUTE_PREF_LOW;
  264. default:
  265. return 0; // Invalid.
  266. }
  267. }
  268. //
  269. // Encode a route preference value
  270. // for use in a Flags field in a Router Advertisement.
  271. //
  272. __inline uchar
  273. EncodeRoutePreference(uint Preference)
  274. {
  275. if (Preference <= ROUTE_PREF_HIGH)
  276. return 0x08;
  277. else if (Preference <= ROUTE_PREF_MEDIUM)
  278. return 0x00;
  279. else
  280. return 0x18;
  281. }
  282. __inline int
  283. IsValidPreference(uint Preference)
  284. {
  285. return Preference < (1 << 31);
  286. }
  287. __inline int
  288. IsOnLinkRTE(RouteTableEntry *RTE)
  289. {
  290. return (RTE->NCE == NULL);
  291. }
  292. //
  293. // Binding cache structure. Holds references to care-of RCE's.
  294. //
  295. struct BindingCacheEntry {
  296. struct BindingCacheEntry *Next;
  297. struct BindingCacheEntry *Prev;
  298. RouteCacheEntry *CareOfRCE;
  299. IPv6Addr HomeAddr;
  300. uint BindingLifetime; // Remaining lifetime (IPv6 ticks).
  301. ushort BindingSeqNumber;
  302. };
  303. //
  304. // Site prefix entry.
  305. // Used for filtering site-local addresses returned by DNS.
  306. //
  307. struct SitePrefixEntry {
  308. struct SitePrefixEntry *Next;
  309. Interface *IF;
  310. uint ValidLifetime; // In ticks.
  311. uint SitePrefixLength;
  312. IPv6Addr Prefix;
  313. };
  314. //
  315. // Global data structures.
  316. //
  317. //
  318. // RouteCacheLock protects the route cache and the binding cache.
  319. // RouteTableLock protects the route table and the site-prefix table.
  320. //
  321. // Lock acquisition order is:
  322. // RouteCacheLock before interface locks
  323. // interface locks before RouteTableLock
  324. // IoCancelSpinLock before RouteTableLock
  325. // RouteTableLock before neighbor cache locks
  326. //
  327. extern KSPIN_LOCK RouteCacheLock;
  328. extern KSPIN_LOCK RouteTableLock;
  329. //
  330. // The Route Cache contains RCEs. RCEs with reference count of one
  331. // can still be cached, but they may also be reclaimed.
  332. // (The lone reference is from the cache itself.)
  333. //
  334. // The current implementation is a simple circular linked-list of RCEs.
  335. //
  336. extern struct RouteCache {
  337. uint Limit;
  338. uint Count;
  339. RouteCacheEntry *First;
  340. RouteCacheEntry *Last;
  341. } RouteCache;
  342. #define SentinelRCE ((RouteCacheEntry *)&RouteCache.First)
  343. extern struct RouteTable {
  344. RouteTableEntry *First;
  345. RouteTableEntry **Last;
  346. } RouteTable;
  347. extern struct BindingCache {
  348. uint Limit;
  349. uint Count;
  350. BindingCacheEntry *First;
  351. BindingCacheEntry *Last;
  352. } BindingCache;
  353. #define SentinelBCE ((BindingCacheEntry *)&BindingCache.First)
  354. extern SitePrefixEntry *SitePrefixTable;
  355. //
  356. // Set to TRUE when the routing table changes
  357. // (for example adding/removing/changing published routes)
  358. // so that it's a good idea to send Router Advertisements
  359. // very promptly.
  360. //
  361. extern int ForceRouterAdvertisements;
  362. //
  363. // Contains a queue of IRPs that represent
  364. // route notification requests.
  365. //
  366. extern LIST_ENTRY RouteNotifyQueue;
  367. //
  368. // Exported function declarations.
  369. //
  370. int
  371. IsLoopbackRCE(RouteCacheEntry *RCE);
  372. int
  373. IsDisconnectedAndNotLoopbackRCE(RouteCacheEntry *RCE);
  374. extern IPAddr
  375. GetV4Destination(RouteCacheEntry *RCE);
  376. uint
  377. GetPathMTUFromRCE(RouteCacheEntry *RCE);
  378. uint
  379. GetEffectivePathMTUFromRCE(RouteCacheEntry *RCE);
  380. void
  381. ConfirmForwardReachability(RouteCacheEntry *RCE);
  382. void
  383. ForwardReachabilityInDoubt(RouteCacheEntry *RCE);
  384. uint
  385. GetInitialRTTFromRCE(RouteCacheEntry *RCE);
  386. extern void
  387. ReleaseRCE(RouteCacheEntry *RCE);
  388. extern RouteCacheEntry *
  389. ValidateRCE(RouteCacheEntry *RCE, NetTableEntry *NTE);
  390. #define RTD_FLAG_STRICT 0 // Must use specified IF.
  391. #define RTD_FLAG_NORMAL 1 // Must use specified IF unless it forwards.
  392. #define RTD_FLAG_LOOSE 2 // Only use IF to determine/check ScopeId.
  393. extern IP_STATUS
  394. RouteToDestination(const IPv6Addr *Destination, uint ScopeId,
  395. NetTableEntryOrInterface *NTEorIF, uint Flags,
  396. RouteCacheEntry **RCE);
  397. extern void
  398. FlushRouteCache(Interface *IF, const IPv6Addr *Addr);
  399. extern NetTableEntry *
  400. FindNetworkWithAddress(const IPv6Addr *Source, uint ScopeId);
  401. extern NTSTATUS
  402. RouteTableUpdate(PFILE_OBJECT FileObject,
  403. Interface *IF, NeighborCacheEntry *NCE,
  404. const IPv6Addr *Prefix, uint PrefixLength,
  405. uint SitePrefixLength,
  406. uint ValidLifetime, uint PreferredLifetime,
  407. uint Pref, uint Type, int Publish, int Immortal);
  408. extern void
  409. SitePrefixUpdate(Interface *IF,
  410. const IPv6Addr *Prefix, uint SitePrefixLength,
  411. uint ValidLifetime);
  412. extern uint
  413. SitePrefixMatch(const IPv6Addr *Destination);
  414. extern void
  415. RouteTableRemove(Interface *IF);
  416. extern void
  417. RouteTableResetAutoConfig(Interface *IF, uint MaxLifetime);
  418. extern void
  419. RouteTableReset(void);
  420. extern IP_STATUS
  421. FindOrCreateRoute(const IPv6Addr *Dest, uint ScopeId,
  422. Interface *IF, RouteCacheEntry **ReturnRCE);
  423. extern IP_STATUS
  424. FindNextHop(Interface *IF, const IPv6Addr *Dest, uint ScopeId,
  425. NeighborCacheEntry **ReturnNCE, ushort *ReturnConstrained);
  426. extern void
  427. RouteTableTimeout(void);
  428. extern void
  429. SitePrefixTimeout(void);
  430. extern void
  431. InvalidateRouter(NeighborCacheEntry *NCE);
  432. extern int
  433. UpdatePathMTU(Interface *IF, const IPv6Addr *Dest, uint MTU);
  434. extern IP_STATUS
  435. RedirectRouteCache(const IPv6Addr *Source, const IPv6Addr *Dest,
  436. Interface *IF, NeighborCacheEntry *NCE);
  437. extern void
  438. MoveToFrontBCE(BindingCacheEntry *BCE);
  439. extern BindingCacheEntry *
  440. FindBindingCacheEntry(const IPv6Addr *HomeAddr);
  441. extern BindingUpdateDisposition
  442. CacheBindingUpdate(IPv6BindingUpdateOption UNALIGNED *BindingUpdate,
  443. const IPv6Addr *CareOfAddr,
  444. NetTableEntryOrInterface *NTEorIF,
  445. const IPv6Addr *HomeAddr);
  446. extern void
  447. BindingCacheTimeout(void);
  448. extern void
  449. RouterAdvertSend(Interface *IF, const IPv6Addr *Source, const IPv6Addr *Dest);
  450. extern void
  451. RemoveRTE(RouteTableEntry **PrevRTE, RouteTableEntry *RTE);
  452. extern void
  453. InsertRTEAtFront(RouteTableEntry *RTE);
  454. extern void
  455. InsertRTEAtBack(RouteTableEntry *RTE);
  456. extern IP_STATUS
  457. GetBestRouteInfo(const IPv6Addr *Addr, ulong ScopeId, IP6RouteEntry *Ire);
  458. typedef struct {
  459. PIO_WORKITEM WorkItem;
  460. PIRP RequestList;
  461. } CompleteRtChangeContext;
  462. typedef struct {
  463. KIRQL OldIrql;
  464. PIRP RequestList;
  465. PIRP *LastRequest;
  466. CompleteRtChangeContext *Context;
  467. } CheckRtChangeContext;
  468. __inline void
  469. InitCheckRtChangeContext(CheckRtChangeContext *Context)
  470. {
  471. // Context->OldIrql must be initialized separately.
  472. Context->RequestList = NULL;
  473. Context->LastRequest = &Context->RequestList;
  474. Context->Context = NULL;
  475. }
  476. extern void
  477. CheckRtChangeNotifyRequests(
  478. CheckRtChangeContext *Context,
  479. PFILE_OBJECT FileObject,
  480. RouteTableEntry *RTE);
  481. extern void
  482. CompleteRtChangeNotifyRequests(CheckRtChangeContext *Context);
  483. #endif // ROUTE_INCLUDED