// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil -*- (for GNU Emacs) // // Copyright (c) 1985-2000 Microsoft Corporation // // This file is part of the Microsoft Research IPv6 Network Protocol Stack. // You should have received a copy of the Microsoft End-User License Agreement // for this software along with this release; see the file "license.txt". // If not, please see http://www.research.microsoft.com/msripv6/license.htm, // or write to Microsoft Research, One Microsoft Way, Redmond, WA 98052-6399. // // Abstract: // // Transmit routines for Internet Protocol Version 6. // #include "oscfg.h" #include "ndis.h" #include "ip6imp.h" #include "ip6def.h" #include "route.h" #include "select.h" #include "icmp.h" #include "neighbor.h" #include "fragment.h" #include "security.h" #include "ipsec.h" #include "md5.h" #include "info.h" // // Structure of completion data for "Care Of" packets. // typedef struct CareOfCompletionInfo { void (*SavedCompletionHandler)(PNDIS_PACKET Packet, IP_STATUS Status); // Original handler. void *SavedCompletionData; // Original data. PNDIS_BUFFER SavedFirstBuffer; uint NumESPTrailers; } CareOfCompletionInfo; ulong FragmentId = 0; //* NewFragmentId - generate a unique fragment identifier. // // Returns a fragment id. // __inline ulong NewFragmentId(void) { return InterlockedIncrement((PLONG)&FragmentId); } //* IPv6AllocatePacket // // Allocates a single-buffer packet. // // The completion handler for the packet is set to IPv6PacketComplete, // although the caller can easily change that if desired. // NDIS_STATUS IPv6AllocatePacket( uint Length, PNDIS_PACKET *pPacket, void **pMemory) { PNDIS_PACKET Packet; PNDIS_BUFFER Buffer; void *Memory; NDIS_STATUS Status; NdisAllocatePacket(&Status, &Packet, IPv6PacketPool); if (Status != NDIS_STATUS_SUCCESS) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR, "IPv6AllocatePacket - couldn't allocate header!?!\n")); return Status; } Memory = ExAllocatePoolWithTagPriority(NonPagedPool, Length, IP6_TAG, LowPoolPriority); if (Memory == NULL) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR, "IPv6AllocatePacket - couldn't allocate pool!?!\n")); NdisFreePacket(Packet); return NDIS_STATUS_RESOURCES; } NdisAllocateBuffer(&Status, &Buffer, IPv6BufferPool, Memory, Length); if (Status != NDIS_STATUS_SUCCESS) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR, "IPv6AllocatePacket - couldn't allocate buffer!?!\n")); ExFreePool(Memory); NdisFreePacket(Packet); return Status; } InitializeNdisPacket(Packet); PC(Packet)->CompletionHandler = IPv6PacketComplete; NdisChainBufferAtFront(Packet, Buffer); *pPacket = Packet; *pMemory = Memory; return NDIS_STATUS_SUCCESS; } //* IPv6FreePacket - free an IPv6 packet. // // Frees a packet whose buffers were allocated from the IPv6BufferPool. // void IPv6FreePacket(PNDIS_PACKET Packet) { PNDIS_BUFFER Buffer, NextBuffer; // // Free all the buffers in the packet. // Start with the first buffer in the packet and follow the chain. // NdisQueryPacket(Packet, NULL, NULL, &Buffer, NULL); for (; Buffer != NULL; Buffer = NextBuffer) { VOID *Mem; UINT Unused; // // Free the buffer descriptor back to IPv6BufferPool and its // associated memory back to the heap. Not clear if it would be // safe to free the memory before the buffer (because the buffer // references the memory), but this order should definitely be safe. // NdisGetNextBuffer(Buffer, &NextBuffer); NdisQueryBuffer(Buffer, &Mem, &Unused); NdisFreeBuffer(Buffer); ExFreePool(Mem); } // // Free the packet back to IPv6PacketPool. // NdisFreePacket(Packet); } //* IPv6PacketComplete // // Generic packet completion handler. // Just frees the packet. // void IPv6PacketComplete( PNDIS_PACKET Packet, IP_STATUS Status) { UNREFERENCED_PARAMETER(Status); IPv6FreePacket(Packet); } //* IPv6CareOfComplete - Completion handler for "Care Of" packets. // // Completion handler for packets that had a routing header inserted // because of a Binding Cache Entry. // void // Returns: Nothing. IPv6CareOfComplete( PNDIS_PACKET Packet, IP_STATUS Status) { PNDIS_BUFFER Buffer; uchar *Memory; uint Length; CareOfCompletionInfo *CareOfInfo = (CareOfCompletionInfo *)PC(Packet)->CompletionData; ASSERT(CareOfInfo->SavedFirstBuffer != NULL); // // Remove the first buffer that IPv6Send created, re-chain // the original first buffer, and restore the original packet // completion info. // NdisUnchainBufferAtFront(Packet, &Buffer); NdisChainBufferAtFront(Packet, CareOfInfo->SavedFirstBuffer); PC(Packet)->CompletionHandler = CareOfInfo->SavedCompletionHandler; PC(Packet)->CompletionData = CareOfInfo->SavedCompletionData; // // Now free the removed buffer and its memory. // NdisQueryBuffer(Buffer, &Memory, &Length); NdisFreeBuffer(Buffer); ExFreePool(Memory); // // Check if there are any ESP trailers that need to be freed. // for ( ; CareOfInfo->NumESPTrailers > 0; CareOfInfo->NumESPTrailers--) { // Remove the ESP Trailer. NdisUnchainBufferAtBack(Packet, &Buffer); // // Free the removed buffer and its memory. // NdisQueryBuffer(Buffer, &Memory, &Length); NdisFreeBuffer(Buffer); ExFreePool(Memory); } // // Free care-of completion data. // ExFreePool(CareOfInfo); // // The packet should now have it's original completion handler // specified for us to call. // ASSERT(PC(Packet)->CompletionHandler != NULL); // // Call the packet's designated completion handler. // (*PC(Packet)->CompletionHandler)(Packet, Status); } //* IPv6SendComplete - IP send complete handler. // // Called by the link layer when a send completes. We're given a pointer to // a net structure, as well as the completing send packet and the final status // of the send. // // The Context argument is NULL if and only if the Packet has not // actually been handed via IPv6SendLL to a link. // // The Status argument is usually one of three values: // IP_SUCCESS // IP_PACKET_TOO_BIG // IP_GENERAL_FAILURE // // May be called in a DPC or thread context. // // To prevent recursion, send-completion routines should // avoid sending packets directly. Schedule a DPC instead. // void // Returns: Nothing. IPv6SendComplete( void *Context, // Context we gave to the link layer on registration. PNDIS_PACKET Packet, // Packet completing send. IP_STATUS Status) // Final status of send. { Interface *IF = PC(Packet)->IF; ASSERT(Context == IF); UNREFERENCED_PARAMETER(Context); if ((IF != NULL) && !(PC(Packet)->Flags & NDIS_FLAGS_DONT_LOOPBACK)) { // // Send the packet via loopback also. // The loopback code will call IPv6SendComplete again, // after setting NDIS_FLAGS_DONT_LOOPBACK. // LoopQueueTransmit(Packet); return; } // // The packet should have a completion handler specified for us to call. // ASSERT(PC(Packet)->CompletionHandler != NULL); // // Call the packet's designated completion handler. // This should free the packet. // (*PC(Packet)->CompletionHandler)(Packet, Status); // // Release the packet's reference for the sending interface, // if this packet has actually been sent. // If the packet is completed before transmission, // it does not hold a reference for the interface. // if (IF != NULL) ReleaseIF(IF); } //* IPv6SendLL // // Hands a packet down to the link-layer and/or the loopback module. // // Callable from thread or DPC context. // Must be called with no locks held. // void IPv6SendLL( Interface *IF, PNDIS_PACKET Packet, uint Offset, const void *LinkAddress) { // // The packet needs to hold a reference to the sending interface, // because the transmit is asynchronous. // AddRefIF(IF); ASSERT(PC(Packet)->IF == NULL); PC(Packet)->IF = IF; PC(Packet)->pc_offset = Offset; // // Are we sending the packet via loopback or via the link? // NDIS_FLAGS_LOOPBACK_ONLY means do NOT send via the link. // NDIS_FLAGS_DONT_LOOPBACK means do NOT send via loopback. // Finalize these flag bits here. // NB: One or both may already be set. // if (PC(Packet)->Flags & NDIS_FLAGS_MULTICAST_PACKET) { // // Multicast packets are sent both ways by default. // If the interface is not receiving this address, // then don't bother with loopback. // if (! CheckLinkLayerMulticastAddress(IF, LinkAddress)) PC(Packet)->Flags |= NDIS_FLAGS_DONT_LOOPBACK; } else { // // Unicast packets are either sent via loopback // or via the link, but not both. // if (RtlCompareMemory(IF->LinkAddress, LinkAddress, IF->LinkAddressLength) == IF->LinkAddressLength) PC(Packet)->Flags |= NDIS_FLAGS_LOOPBACK_ONLY; else PC(Packet)->Flags |= NDIS_FLAGS_DONT_LOOPBACK; } // // If a packet is both looped-back and sent via the link, // we hand it to the link first and then IPv6SendComplete // handles the loopback. // if (!(PC(Packet)->Flags & NDIS_FLAGS_LOOPBACK_ONLY)) { // // Send it via the link. // (*IF->Transmit)(IF->LinkContext, Packet, Offset, LinkAddress); } else if (!(PC(Packet)->Flags & NDIS_FLAGS_DONT_LOOPBACK)) { // // Send it via loopback. // LoopQueueTransmit(Packet); } else { // // We do not send this packet. // IPv6SendComplete(IF, Packet, IP_SUCCESS); } } // // We store the Interface in our own field // instead of using PC(Packet)->IF to maintain // an invariant for IPv6SendLL and IPv6SendComplete: // PC(Packet)->IF is only set when the packet // is actually transmitted. // typedef struct IPv6SendLaterInfo { KDPC Dpc; KTIMER Timer; Interface *IF; PNDIS_PACKET Packet; uchar LinkAddress[]; } IPv6SendLaterInfo; //* IPv6SendLaterWorker // // Finishes the work of IPv6SendLater by calling IPv6SendLL. // // Called in a DPC context. // void IPv6SendLaterWorker( PKDPC MyDpcObject, // The DPC object describing this routine. void *Context, // The argument we asked to be called with. void *Unused1, void *Unused2) { IPv6SendLaterInfo *Info = (IPv6SendLaterInfo *) Context; Interface *IF = Info->IF; NDIS_PACKET *Packet = Info->Packet; UNREFERENCED_PARAMETER(MyDpcObject); UNREFERENCED_PARAMETER(Unused1); UNREFERENCED_PARAMETER(Unused2); // // Finally, transmit the packet. // IPv6SendLL(IF, Packet, PC(Packet)->pc_offset, Info->LinkAddress); ReleaseIF(IF); ExFreePool(Info); } //* IPv6SendLater // // Like IPv6SendLL, but defers the actual transmit until later. // This is useful in two scenarios. First, the caller // may hold a spinlock (like an interface lock), preventing // direct use of IPv6SendLL. Second, our caller may wish // to delay the transmit for a small period of time. // // Because this function performs memory allocation, it can fail. // If it fails, the caller must dispose of the packet. // // Callable from thread or DPC context. // May be called with locks held. // NDIS_STATUS IPv6SendLater( LARGE_INTEGER Time, // Zero means immediately. Interface *IF, PNDIS_PACKET Packet, uint Offset, const void *LinkAddress) { IPv6SendLaterInfo *Info; Info = ExAllocatePoolWithTagPriority( NonPagedPool, sizeof *Info + IF->LinkAddressLength, IP6_TAG, LowPoolPriority); if (Info == NULL) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR, "IPv6SendLater: no pool\n")); return NDIS_STATUS_RESOURCES; } AddRefIF(IF); Info->IF = IF; PC(Packet)->pc_offset = Offset; Info->Packet = Packet; RtlCopyMemory(Info->LinkAddress, LinkAddress, IF->LinkAddressLength); KeInitializeDpc(&Info->Dpc, IPv6SendLaterWorker, Info); if (Time.QuadPart == 0) { // // Queue the DPC for immediate execution. // KeInsertQueueDpc(&Info->Dpc, NULL, NULL); } else { // // Initialize a timer that will queue the DPC later. // KeInitializeTimer(&Info->Timer); KeSetTimer(&Info->Timer, Time, &Info->Dpc); } return NDIS_STATUS_SUCCESS; } //* IPv6SendND // // IPv6 primitive for sending via Neighbor Discovery. // We already know the first-hop destination and have a completed // packet ready to send. All we really do here is check & update the // NCE's neighbor discovery state. // // Discovery Address is the source address to use in neighbor // discovery solicitations. // // If DiscoveryAddress is not NULL, it must NOT be the address // of the packet's source address, because that memory might // be gone might by the time we reference it in NeighborSolicitSend. // It must point to memory that will remain valid across // IPv6SendND's entire execution. // // If DiscoveryAddress is NULL, then the Packet must be well-formed. // It must have a valid IPv6 header. For example, the raw header-include // path can NOT pass in NULL. // // Whether the Packet is well-formed or not, the first 40 bytes // of data must be accessible in the kernel. This is because // an ND failure will lead to IPv6SendAbort, which uses GetIPv6Header, // which calls GetDataFromNdis, which calls NdisQueryBuffer, // which bugchecks when the buffer can not be mapped. // // REVIEW - Should IPv6SendND live in send.c or neighbor.c? // // Callable from thread or DPC context. // void IPv6SendND( PNDIS_PACKET Packet, // Packet to send. uint Offset, // Offset from start of Packet to IP header. NeighborCacheEntry *NCE, // First-hop neighbor information. const IPv6Addr *DiscoveryAddress) // Address to use for neighbor discovery. { NDIS_PACKET *PacketList; IPv6Addr DiscoveryAddressBuffer; KIRQL OldIrql; // For locking the interface's neighbor cache. Interface *IF; // Interface to send via. ASSERT(NCE != NULL); IF = NCE->IF; // // Are we sending to a multicast IPv6 destination? // Pass this information to IPv6SendLL. // if (IsMulticast(&NCE->NeighborAddress)) PC(Packet)->Flags |= NDIS_FLAGS_MULTICAST_PACKET; RetryRequest: KeAcquireSpinLock(&IF->LockNC, &OldIrql); // // If the interface is disabled, we can't send packets. // if (IsDisabledIF(IF)) { KeReleaseSpinLock(&IF->LockNC, OldIrql); AbortRequest: IPSInfo.ipsi_outdiscards++; IPv6SendComplete(NULL, Packet, IP_GENERAL_FAILURE); return; } // // Check the Neighbor Discovery Protocol state of our Neighbor to // insure that we have current information to work with. We don't // have a timer going off to drive this in the common case, but // instead check the reachability timestamp directly here. // switch (NCE->NDState) { case ND_STATE_PERMANENT: // // This neighbor is always valid. // break; case ND_STATE_REACHABLE: // // Common case. We've verified neighbor reachability within // the last 'ReachableTime' ticks of the system interval timer. // If the time limit hasn't expired, we're free to go. // // Note that the following arithmetic will correctly handle wraps // of the IPv6 tick counter. // if ((uint)(IPv6TickCount - NCE->LastReachability) <= IF->ReachableTime) { // // Got here within the time limit. Just send it. // break; } // // Too long since last send. Entry went stale. Conceptually, // we've been in the STALE state since the above quantity went // positive. So just drop on into it now... // case ND_STATE_STALE: // // We have a stale entry in our neighbor cache. Go into DELAY // state, start the delay timer, and send the packet anyway. // NB: Internally we use PROBE state instead of DELAY. // NCE->NDState = ND_STATE_PROBE; NCE->NSTimer = DELAY_FIRST_PROBE_TIME; NCE->NSLimit = MAX_UNICAST_SOLICIT; NCE->NSCount = 0; break; case ND_STATE_PROBE: // // While in the PROBE state, we continue to send to our // cached address and hope for the best. // // First, check NSLimit. It might be MAX_UNREACH_SOLICIT or // MAX_UNICAST_SOLICIT. Ensure it's at least MAX_UNICAST_SOLICIT. // if (NCE->NSLimit < MAX_UNICAST_SOLICIT) NCE->NSLimit = MAX_UNICAST_SOLICIT; // // Second, if we have not started actively probing yet, ensure // we do not wait longer than DELAY_FIRST_PROBE_TIME to start. // if ((NCE->NSCount == 0) && (NCE->NSTimer > DELAY_FIRST_PROBE_TIME)) NCE->NSTimer = DELAY_FIRST_PROBE_TIME; break; case ND_STATE_INCOMPLETE: { PNDIS_PACKET OldPacket; int SendSolicit; if (!(IF->Flags & IF_FLAG_NEIGHBOR_DISCOVERS)) { // // This interface does not support Neighbor Discovery. // We can not resolve the address. // Mark the neighbor unreachable and invalidate the route cache. // This gives FindNextHop an opportunity to round-robin. // NCE->IsUnreachable = TRUE; NCE->LastReachability = IPv6TickCount; // Timestamp it. NCE->DoRoundRobin = TRUE; InvalidateRouteCache(); KeReleaseSpinLock(&IF->LockNC, OldIrql); IPSInfo.ipsi_outnoroutes++; IPv6SendAbort(CastFromIF(IF), Packet, Offset, ICMPv6_DESTINATION_UNREACHABLE, ICMPv6_ADDRESS_UNREACHABLE, 0, FALSE); return; } // // Get DiscoveryAddress from the packet // if we don't already have it. // We SHOULD use the packet's source address if possible. // if (DiscoveryAddress == NULL) { IPv6Header UNALIGNED *IP; IPv6Header HdrBuffer; NetTableEntry *NTE; int IsValid; KeReleaseSpinLock(&IF->LockNC, OldIrql); DiscoveryAddress = &DiscoveryAddressBuffer; // // Get the packet's source address. // Anyone sending possibly-malformed packets (eg RawSend) // must specify DiscoveryAddress, so GetIPv6Header // will always succeed. // IP = GetIPv6Header(Packet, Offset, &HdrBuffer); ASSERT(IP != NULL); DiscoveryAddressBuffer = IP->Source; // // Check that the address is a valid unicast address // assigned to the outgoing interface. // KeAcquireSpinLock(&IF->Lock, &OldIrql); NTE = (NetTableEntry *) *FindADE(IF, DiscoveryAddress); IsValid = ((NTE != NULL) && (NTE->Type == ADE_UNICAST) && IsValidNTE(NTE)); KeReleaseSpinLock(&IF->Lock, OldIrql); if (! IsValid) { // // Can't use the packet's source address. // Try the interface's link-local address. // if (! GetLinkLocalAddress(IF, &DiscoveryAddressBuffer)) { // // Without a valid link-local address, give up. // goto AbortRequest; } } // // Now that we have a valid DiscoveryAddress, // start over. // goto RetryRequest; } // // We do not have a valid link-layer address for the neighbor. // We must queue the packet, pending neighbor discovery. // Remember the packet's offset in the Packet6Context area. // REVIEW: For now, wait queue is just one packet deep. // OldPacket = NCE->WaitQueue; PC(Packet)->pc_offset = Offset; PC(Packet)->DiscoveryAddress = *DiscoveryAddress; NCE->WaitQueue = Packet; // // If we have not started neighbor discovery yet, // do so now by sending the first solicit. // It would be simpler to let NeighborCacheEntryTimeout // send the first solicit but that would introduce latency. // SendSolicit = (NCE->NSCount == 0); if (SendSolicit) { // // We send the first solicit below. // NCE->NSCount = 1; // // If NSTimer is zero, we need to initialize NSLimit. // if (NCE->NSTimer == 0) NCE->NSLimit = MAX_MULTICAST_SOLICIT; NCE->NSTimer = (ushort)IF->RetransTimer; } // // NSLimit might be MAX_MULTICAST_SOLICIT or MAX_UNREACH_SOLICIT. // Ensure that it is at least MAX_MULTICAST_SOLICIT. // if (NCE->NSLimit < MAX_MULTICAST_SOLICIT) NCE->NSLimit = MAX_MULTICAST_SOLICIT; // // If there are any packets waiting to be completed, take // this opportunity. With an active DoS attack, we want // to do this more frequently than NeighborCacheTimeout will. // PacketList = IF->PacketList; IF->PacketList = NULL; KeReleaseSpinLock(&IF->LockNC, OldIrql); NeighborCacheCompletePackets(IF, PacketList); if (SendSolicit) NeighborSolicitSend(NCE, DiscoveryAddress); if (OldPacket != NULL) { // // This queue overflow is congestion of a sort, // so we must not send an ICMPv6 error. // IPSInfo.ipsi_outdiscards++; IPv6SendComplete(NULL, OldPacket, IP_GENERAL_FAILURE); } return; } default: // // Should never happen. // ABORTMSG("IPv6SendND: Invalid Neighbor Cache NDState field!\n"); } // // Move the NCE to the head of the LRU list, // because we are using it to send a packet. // if (NCE != IF->FirstNCE) { // // Remove NCE from the list. // NCE->Next->Prev = NCE->Prev; NCE->Prev->Next = NCE->Next; // // Add NCE to the head of the list. // NCE->Next = IF->FirstNCE; NCE->Next->Prev = NCE; NCE->Prev = SentinelNCE(IF); NCE->Prev->Next = NCE; ASSERT(IF->FirstNCE == NCE); } // // Unlock before transmitting the packet. // This means that there is a very small chance that NCE->LinkAddress // could change out from underneath us. (For example, if we process // an advertisement changing the link-layer address.) // In practice this won't happen, and if it does the worst that // will happen is that we'll send a packet somewhere strange. // The best alternative is copying the LinkAddress. // KeReleaseSpinLock(&IF->LockNC, OldIrql); IPv6SendLL(IF, Packet, Offset, NCE->LinkAddress); } // // Context information that is used for fragmentation. // This information is carried between calls to IPv6SendFragment. // typedef struct FragmentationInfo { PNDIS_PACKET Packet; // Unfragmented packet. long NumLeft; // Number of uncompleted fragments. IP_STATUS Status; // Current status. } FragmentationInfo; //* IPv6SendFragmentComplete // // Completion handler, called when a fragment has been sent. // void IPv6SendFragmentComplete( PNDIS_PACKET Packet, IP_STATUS Status) { FragmentationInfo *Info = PC(Packet)->CompletionData; // // Free the fragment packet. // IPv6FreePacket(Packet); // // Update the current cumulative status. // InterlockedCompareExchange((PLONG)&Info->Status, Status, IP_SUCCESS); if (InterlockedDecrement(&Info->NumLeft) == 0) { // // This is the last fragment to complete. // IPv6SendComplete(NULL, Info->Packet, Info->Status); ExFreePool(Info); } } //* IPv6SendFragments - Fragment an IPv6 datagram. // // Helper routine for creating and sending IPv6 fragments. // Called from IPv6Send when the datagram is bigger than the path MTU. // // The PathMTU is passed separately so that we use a consistent value. // The value in the RCE is subject to change. // // NB: We assume that the packet has well-formed, contiguous headers. // void IPv6SendFragments( PNDIS_PACKET Packet, // Packet to send. uint Offset, // Offset from start of Packet to IP header. IPv6Header UNALIGNED *IP, // Pointer to Packet's IPv6 header. uint PayloadLength, // Packet payload length. RouteCacheEntry *RCE, // First-hop neighbor information. uint PathMTU) // PathMTU to use when fragmenting. { FragmentationInfo *Info; NeighborCacheEntry *NCE = RCE->NCE; NDIS_STATUS NdisStatus; IP_STATUS IPStatus; PNDIS_PACKET FragPacket; FragmentHeader FragHdr; uchar *Mem; uint MemLen; uint PktOffset; uint UnfragBytes; uint BytesLeft; uint BytesSent; uchar HdrType; uchar *tbuf; PNDIS_BUFFER SrcBuffer; uint SrcOffset; uint NextHeaderOffset; uint FragPayloadLength; // // A PathMTU value of zero is special - // it means that we should use the minimum MTU // and always include a fragment header. // if (PathMTU == 0) PathMTU = IPv6_MINIMUM_MTU; else ASSERT(PathMTU >= IPv6_MINIMUM_MTU); // // Determine the 'unfragmentable' portion of this packet. // We do this by scanning through all extension headers, // and noting the last occurrence, if any, of // a routing or hop-by-hop header. // We do not assume the extension headers are in recommended order, // but otherwise we assume that the headers are well-formed. // We also assume that they are contiguous. // UnfragBytes = sizeof *IP; HdrType = IP->NextHeader; NextHeaderOffset = (uint)((uchar *)&IP->NextHeader - (uchar *)IP); tbuf = (uchar *)(IP + 1); while ((HdrType == IP_PROTOCOL_HOP_BY_HOP) || (HdrType == IP_PROTOCOL_ROUTING) || (HdrType == IP_PROTOCOL_DEST_OPTS)) { ExtensionHeader *EHdr = (ExtensionHeader *) tbuf; uint EHdrLen = (EHdr->HeaderExtLength + 1) * 8; tbuf += EHdrLen; if (HdrType != IP_PROTOCOL_DEST_OPTS) { UnfragBytes = (uint)(tbuf - (uchar *)IP); NextHeaderOffset = (uint)((uchar *)&EHdr->NextHeader - (uchar *)IP); } HdrType = EHdr->NextHeader; } // // Suppose we have a routing header followed by // a destination-options header. Then the routing header // is unfragmentable but the destination options are // fragmentable, so HdrType should be IP_PROTOCOL_DEST_OPTS. // HdrType = *((uchar *)IP + NextHeaderOffset); // // Check that we can actually fragment this packet. // If the unfragmentable part is too large, we can't. // We need to send at least 8 bytes of fragmentable data // in each fragment. // if (UnfragBytes + sizeof(FragmentHeader) + 8 > PathMTU) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_USER_ERROR, "IPv6SendFragments: can't fragment\n")); IPStatus = IP_GENERAL_FAILURE; goto ErrorExit; } FragHdr.NextHeader = HdrType; FragHdr.Reserved = 0; FragHdr.Id = net_long(NewFragmentId()); // // Initialize SrcBuffer and SrcOffset, which point // to the fragmentable data in the packet. // SrcOffset is the offset into SrcBuffer's data, // NOT an offset into the packet. // SrcBuffer = NdisFirstBuffer(Packet); SrcOffset = Offset + UnfragBytes; // // Create new packets of MTU size until all data is sent. // BytesLeft = sizeof *IP + PayloadLength - UnfragBytes; PktOffset = 0; // relative to fragmentable part of original packet // // We need a completion context for the fragments. // Info = ExAllocatePoolWithTagPriority(NonPagedPool, sizeof *Info, IP6_TAG, LowPoolPriority); if (Info == NULL) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR, "IPv6SendFragments: no pool\n")); IPStatus = IP_NO_RESOURCES; goto ErrorExit; } Info->Packet = Packet; Info->NumLeft = 1; // A reference for our own processing. Info->Status = IP_SUCCESS; while (BytesLeft != 0) { // // Determine new IP payload length (a multiple of 8) and // and set the Fragment Header offset. // if ((BytesLeft + UnfragBytes + sizeof(FragmentHeader)) > PathMTU) { BytesSent = (PathMTU - UnfragBytes - sizeof(FragmentHeader)) &~ 7; // Not the last fragment, so turn on the M bit. FragHdr.OffsetFlag = net_short((ushort)(PktOffset | 1)); } else { BytesSent = BytesLeft; FragHdr.OffsetFlag = net_short((ushort)PktOffset); } // // Allocate packet (and a buffer) and Memory for new fragment // MemLen = Offset + UnfragBytes + sizeof(FragmentHeader) + BytesSent; NdisStatus = IPv6AllocatePacket(MemLen, &FragPacket, &Mem); if (NdisStatus != NDIS_STATUS_SUCCESS) { InterlockedCompareExchange((PLONG)&Info->Status, IP_NO_RESOURCES, IP_SUCCESS); break; } // // Copy IP header, Frag Header, and a portion of data to fragment. // RtlCopyMemory(Mem + Offset, IP, UnfragBytes); RtlCopyMemory(Mem + Offset + UnfragBytes, &FragHdr, sizeof FragHdr); if (! CopyNdisToFlat(Mem + Offset + UnfragBytes + sizeof FragHdr, SrcBuffer, SrcOffset, BytesSent, &SrcBuffer, &SrcOffset)) { IPv6FreePacket(FragPacket); InterlockedCompareExchange((PLONG)&Info->Status, IP_NO_RESOURCES, IP_SUCCESS); break; } // // Correct the PayloadLength and NextHeader fields. // FragPayloadLength = UnfragBytes + sizeof(FragmentHeader) + BytesSent - sizeof(IPv6Header); ASSERT(FragPayloadLength <= MAX_IPv6_PAYLOAD); ((IPv6Header UNALIGNED *)(Mem + Offset))->PayloadLength = net_short((ushort) FragPayloadLength); ASSERT(Mem[Offset + NextHeaderOffset] == HdrType); Mem[Offset + NextHeaderOffset] = IP_PROTOCOL_FRAGMENT; BytesLeft -= BytesSent; PktOffset += BytesSent; // // Pick up any flags (like loopback-only) from the original packet. // PC(FragPacket)->Flags = PC(Packet)->Flags; // // Setup our completion handler and increment // the number of outstanding users of the completion data. // PC(FragPacket)->CompletionHandler = IPv6SendFragmentComplete; PC(FragPacket)->CompletionData = Info; InterlockedIncrement(&Info->NumLeft); // // Send the fragment. // IPSInfo.ipsi_fragcreates++; IPv6SendND(FragPacket, Offset, NCE, NULL); } if (InterlockedDecrement(&Info->NumLeft) == 0) { // // Amazingly, the fragments have already completed. // Complete the original packet now. // IPv6SendComplete(NULL, Packet, Info->Status); ExFreePool(Info); } else { // // IPv6SendFragmentComplete will complete the original packet // when all the fragments are completed. // } IPSInfo.ipsi_fragoks++; return; ErrorExit: IPSInfo.ipsi_fragfails++; IPv6SendComplete(NULL, Packet, IPStatus); } //* IPv6Send // // High-level IPv6 send routine. We have a completed datagram and a // RCE indicating where to direct it to. Here we deal with any packetization // issues (inserting a Jumbo Payload option, fragmentation, etc.) that are // necessary, and pick a NCE for the first hop. // // We also add any additional extension headers to the packet that may be // required for mobility (routing header) or security (AH, ESP header). // TBD: This design may change to move those header inclusions elsewhere. // // Note that this routine expects a properly formatted IPv6 packet, and // also that all of the headers are contained within the first NDIS buffer. // It performs no checking of these requirements. // void IPv6Send( PNDIS_PACKET Packet, // Packet to send. uint Offset, // Offset from start of Packet to IP header. IPv6Header UNALIGNED *IP, // Pointer to Packet's IPv6 header. uint PayloadLength, // Packet payload length. RouteCacheEntry *RCE, // First-hop neighbor information. uint Flags, // Flags for special handling. ushort TransportProtocol, ushort SourcePort, ushort DestPort) { uint PacketLength; // Size of complete IP packet in bytes. NeighborCacheEntry *NCE; // First-hop neighbor information. uint PathMTU; PNDIS_BUFFER OrigBuffer1, NewBuffer1; uchar *OrigMemory, *NewMemory, *EndOrigMemory, *EndNewMemory, *InsertPoint; uint OrigBufSize, NewBufSize, TotalPacketSize, Size, RtHdrSize = 0; IPv6RoutingHeader *SavedRtHdr = NULL, *RtHdr = NULL; IPv6Header UNALIGNED *IPNew; uint BytesToInsert = 0; uchar *BufPtr, *PrevNextHdr; ExtensionHeader *EHdr; uint EHdrLen; uchar HdrType; NDIS_STATUS Status; RouteCacheEntry *CareOfRCE = NULL; RouteCacheEntry *TunnelRCE = NULL; CareOfCompletionInfo *CareOfInfo; KIRQL OldIrql; IPSecProc *IPSecToDo; uint Action; uint i; uint TunnelStart = NO_TUNNEL; uint JUST_ESP = FALSE; uint IPSEC_TUNNEL = FALSE; uint NumESPTrailers = 0; IPSIncrementOutRequestCount(); // // Find the Security Policy for this outbound traffic. // Current Mobile IPv6 draft says to use a mobile node's home address // and not its care-of address as the selector for security policy lookup. // IPSecToDo = OutboundSPLookup(AlignAddr(&IP->Source), AlignAddr(&IP->Dest), TransportProtocol, SourcePort, DestPort, RCE->NTE->IF, &Action); if (IPSecToDo == NULL) { // // Check Action. // Just fall through for LOOKUP_BYPASS. // if (Action == LOOKUP_DROP) { // Drop packet. goto AbortSend; } if (Action == LOOKUP_IKE_NEG) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NET_ERROR, "IPv6Send: IKE not supported yet.\n")); goto AbortSend; } } else { // // Calculate the space needed for the IPSec headers. // BytesToInsert = IPSecBytesToInsert(IPSecToDo, &TunnelStart, NULL); if (TunnelStart != NO_TUNNEL) { IPSEC_TUNNEL = TRUE; } } // // If this packet is being sent to a mobile node's care-of address, // then we'll use the CareOfRCE instead of the one our caller gave us. // if ((RCE->BCE != NULL) && !(Flags & SEND_FLAG_BYPASS_BINDING_CACHE)) { KeAcquireSpinLock(&RouteCacheLock, &OldIrql); if (RCE->BCE != NULL) { MoveToFrontBCE(RCE->BCE); CareOfRCE = RCE->BCE->CareOfRCE; AddRefRCE(CareOfRCE); KeReleaseSpinLock(&RouteCacheLock, OldIrql); RCE = CareOfRCE; } else KeReleaseSpinLock(&RouteCacheLock, OldIrql); } // // Step through headers. // HdrType = IP->NextHeader; PrevNextHdr = &IP->NextHeader; BufPtr = (uchar *)(IP + 1); // // Skip the hop-by-hop header if it exists. Don't skip // dest options, since dest options (e.g. BindAck) usually // want IPsec and need to go after the RH/AH/ESP. As a result, // the only current way to get intermediate destination options // is to compose the packet before calling IPv6Send. // while (HdrType == IP_PROTOCOL_HOP_BY_HOP) { EHdr = (ExtensionHeader *) BufPtr; EHdrLen = (EHdr->HeaderExtLength + 1) * 8; BufPtr += EHdrLen; HdrType = EHdr->NextHeader; PrevNextHdr = &EHdr->NextHeader; } // // Check if there is a routing header. If this packet is being sent // to a care-of address, then it must contain a routing extension header. // If one already exists then add the destination address as the last // entry. If no routing header exists insert one with the home address as // the first (and only) address. // // This code assumes that the packet is contiguous at least up to the // insertion point. // if (HdrType == IP_PROTOCOL_ROUTING) { EHdr = (ExtensionHeader *) BufPtr; EHdrLen = (EHdr->HeaderExtLength + 1) * 8; RtHdrSize = EHdrLen; PrevNextHdr = &EHdr->NextHeader; // // Check if this header will be modified due to mobility. // if (CareOfRCE) { // Save Routing Header location for later use. RtHdr = (IPv6RoutingHeader *)BufPtr; // // Check if there is room to store the Home Address. // REVIEW: Is this necessary, what should happen // REVIEW: if the routing header is full? // if (RtHdr->HeaderExtLength / 2 < 23) { BytesToInsert += sizeof (IPv6Addr); } } else { // Adjust BufPtr to end of routing header. BufPtr += EHdrLen; } } else { // // No routing header present, but check if one needs to be // inserted due to mobility. // if (CareOfRCE) { BytesToInsert += (sizeof (IPv6RoutingHeader) + sizeof (IPv6Addr)); } } // Only will happen for IPSec bypass mode with no mobility. if (BytesToInsert == 0) { // // Nothing to do. // Action = LOOKUP_CONT; goto ContinueSend; } // // We have something to insert. We will replace the packet's // first NDIS_BUFFER with a new buffer that we allocate to hold the // all data from the existing first buffer plus the inserted data. // // // We get the first buffer and determine its size, then // allocate memory for the new buffer. // NdisGetFirstBufferFromPacket(Packet, &OrigBuffer1, &OrigMemory, &OrigBufSize, &TotalPacketSize); TotalPacketSize -= Offset; NewBufSize = (OrigBufSize - Offset) + MAX_LINK_HEADER_SIZE + BytesToInsert; Offset = MAX_LINK_HEADER_SIZE; NewMemory = ExAllocatePoolWithTagPriority(NonPagedPool, NewBufSize, IP6_TAG, LowPoolPriority); if (NewMemory == NULL) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR, "IPv6Send: - couldn't allocate pool!?!\n")); goto AbortSend; } NdisAllocateBuffer(&Status, &NewBuffer1, IPv6BufferPool, NewMemory, NewBufSize); if (Status != NDIS_STATUS_SUCCESS) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR, "IPv6Send - couldn't allocate buffer!?!\n")); ExFreePool(NewMemory); goto AbortSend; } // // We've sucessfully allocated a new buffer. Now copy the data from // the existing buffer to the new one. First we copy all data after // the insertion point. This is essentially the transport layer data // (no Extension headers). // // // Calculate Insertion Point for upper layer data. // EndOrigMemory = OrigMemory + OrigBufSize; EndNewMemory = NewMemory + NewBufSize; Size = (uint)(EndOrigMemory - BufPtr); InsertPoint = EndNewMemory - Size; // Copy upper layer data to end of new buffer. RtlCopyMemory(InsertPoint, BufPtr, Size); BytesToInsert = 0; // // Insert Transport IPSec headers. // if (IPSecToDo) { Action = IPSecInsertHeaders(TRANSPORT, IPSecToDo, &InsertPoint, NewMemory, Packet, &TotalPacketSize, PrevNextHdr, TunnelStart, &BytesToInsert, &NumESPTrailers, &JUST_ESP); if (Action == LOOKUP_DROP) { NdisFreeBuffer(NewBuffer1); ExFreePool(NewMemory); goto AbortSend; } } // end of if (IPSecToDo). // // Check if mobility needs to be done. // if (CareOfRCE) { // Check if routing header is already present in original buffer.. if (RtHdr != NULL) { // // Need to insert the home address in the routing header. // RtHdrSize += sizeof (IPv6Addr); // Move insert point up to start of routing header. InsertPoint -= RtHdrSize; BytesToInsert += sizeof(IPv6Addr); // Insert the routing header. RtlCopyMemory(InsertPoint, RtHdr, RtHdrSize - sizeof(IPv6Addr)); // Insert the Home address. RtlCopyMemory(InsertPoint + RtHdrSize - sizeof (IPv6Addr), &IP->Dest, sizeof (IPv6Addr)); RtHdr = (IPv6RoutingHeader *)InsertPoint; // Adjust size of routing header. RtHdr->HeaderExtLength += 2; } else { // // No routing header present - need to create new Routing header. // RtHdrSize = sizeof (IPv6RoutingHeader) + sizeof(IPv6Addr); // Move insert point up to start of routing header. InsertPoint -= RtHdrSize; BytesToInsert += RtHdrSize; // // Insert an entire routing header. // RtHdr = (IPv6RoutingHeader *)InsertPoint; RtHdr->NextHeader = *PrevNextHdr; RtHdr->HeaderExtLength = 2; RtHdr->RoutingType = 0; RtlZeroMemory(&RtHdr->Reserved, sizeof RtHdr->Reserved); RtHdr->SegmentsLeft = 1; // Insert the home address. RtlCopyMemory(RtHdr + 1, &IP->Dest, sizeof (IPv6Addr)); // // Fix the previous NextHeader field to indicate that it now points // to a routing header. // *(PrevNextHdr) = IP_PROTOCOL_ROUTING; } // Change the destination IPv6 address to the care-of address. RtlCopyMemory(&IP->Dest, &CareOfRCE->Destination, sizeof (IPv6Addr)); } // end of if (CareOfRCE) // // Copy original IP plus any extension headers. // If a care-of address was added, the Routing header is not part // of this copy because it has already been copied. // Size = (uint)(BufPtr - (uchar *)IP); // Move insert point up to start of IP. InsertPoint -= Size; // Adjust length of payload. PayloadLength += BytesToInsert; // Set the new IP payload length. IP->PayloadLength = net_short((ushort)PayloadLength); RtlCopyMemory(InsertPoint, (uchar *)IP, Size); IPNew = (IPv6Header UNALIGNED *)InsertPoint; // // Check if any Transport mode IPSec was performed and // if mutable fields need to be adjusted. // if (TunnelStart != 0 && IPSecToDo && !JUST_ESP) { if (RtHdr) { // // Save the new routing header so it can be restored after // authenticating. // SavedRtHdr = ExAllocatePoolWithTagPriority( NonPagedPool, RtHdrSize, IP6_TAG, LowPoolPriority); if (SavedRtHdr == NULL) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR, "IPv6Send: - couldn't allocate SavedRtHdr!?!\n")); NdisFreeBuffer(NewBuffer1); ExFreePool(NewMemory); goto AbortSend; } RtlCopyMemory(SavedRtHdr, RtHdr, RtHdrSize); } // // Adjust mutable fields before doing Authentication. // Action = IPSecAdjustMutableFields(InsertPoint, SavedRtHdr); if (Action == LOOKUP_DROP) { NdisFreeBuffer(NewBuffer1); ExFreePool(NewMemory); goto AbortSend; } } // end of if(IPSecToDo && !JUST_ESP) // // We need to save the existing completion handler & data. We'll // use these fields here, and restore them in IPv6CareOfComplete. // CareOfInfo = ExAllocatePoolWithTagPriority( NonPagedPool, sizeof(*CareOfInfo), IP6_TAG, LowPoolPriority); if (CareOfInfo == NULL) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NTOS_ERROR, "IPv6Send - couldn't allocate completion info!?!\n")); NdisFreeBuffer(NewBuffer1); ExFreePool(NewMemory); goto AbortSend; } CareOfInfo->SavedCompletionHandler = PC(Packet)->CompletionHandler; CareOfInfo->SavedCompletionData = PC(Packet)->CompletionData; CareOfInfo->SavedFirstBuffer = OrigBuffer1; CareOfInfo->NumESPTrailers = NumESPTrailers; PC(Packet)->CompletionHandler = IPv6CareOfComplete; PC(Packet)->CompletionData = CareOfInfo; // Unchain the original first buffer from the packet. NdisUnchainBufferAtFront(Packet, &OrigBuffer1); // Chain the new buffer to the front of the packet. NdisChainBufferAtFront(Packet, NewBuffer1); // // Do authentication for transport mode IPSec. // if (IPSecToDo) { IPSecAuthenticatePacket(TRANSPORT, IPSecToDo, InsertPoint, &TunnelStart, NewMemory, EndNewMemory, NewBuffer1); if (!JUST_ESP) { // // Reset the mutable fields to correct values. // Just copy from old packet to new packet for IP and // unmodified Ext. headers. // RtlCopyMemory(InsertPoint, (uchar *)IP, Size); // Check if the Routing header needs to be restored. if (CareOfRCE) { // Copy the saved routing header to the new buffer. RtlCopyMemory(RtHdr, SavedRtHdr, RtHdrSize); } } } // end of if (IPSecToDo) // // We're done with the transport copy. // // // Insert tunnel IPSec headers. // if (IPSEC_TUNNEL) { i = 0; // Loop through the different Tunnels. while (TunnelStart < IPSecToDo->BundleSize) { uchar NextHeader = IP_PROTOCOL_V6; NumESPTrailers = 0; i++; // Reset byte count. BytesToInsert = 0; Action = IPSecInsertHeaders(TUNNEL, IPSecToDo, &InsertPoint, NewMemory, Packet, &TotalPacketSize, &NextHeader, TunnelStart, &BytesToInsert, &NumESPTrailers, &JUST_ESP); if (Action == LOOKUP_DROP) { goto AbortSend; } // Add the ESP trailer header number. CareOfInfo->NumESPTrailers += NumESPTrailers; // Move insert point up to start of IP. InsertPoint -= sizeof(IPv6Header); // // Adjust length of payload. // PayloadLength = BytesToInsert + PayloadLength + sizeof(IPv6Header); // Insert IP header fields. IPNew = (IPv6Header UNALIGNED *)InsertPoint; IPNew->PayloadLength = net_short((ushort)PayloadLength); IPNew->NextHeader = NextHeader; if (!JUST_ESP) { // Adjust mutable fields. IPNew->VersClassFlow = IP_VERSION; IPNew->HopLimit = 0; } else { IPNew->VersClassFlow = IP->VersClassFlow; IPNew->HopLimit = IP->HopLimit - i; } // Source address same as inner header. RtlCopyMemory(&IPNew->Source, &IP->Source, sizeof (IPv6Addr)); // Dest address to the tunnel end point. RtlCopyMemory(&IPNew->Dest, &IPSecToDo[TunnelStart].SA->SADestAddr, sizeof (IPv6Addr)); // // Do authentication for tunnel mode IPSec. // IPSecAuthenticatePacket(TUNNEL, IPSecToDo, InsertPoint, &TunnelStart, NewMemory, EndNewMemory, NewBuffer1); if (!JUST_ESP) { // // Reset the mutable fields to correct values. // IPNew->VersClassFlow = IP->VersClassFlow; IPNew->HopLimit = IP->HopLimit - i; } } // end of while (TunnelStart < IPSecToDo->BundleSize) // // Check if a new RCE is needed due to the tunnel. // if (!(IP6_ADDR_EQUAL(AlignAddr(&IPNew->Dest), AlignAddr(&IP->Dest)))) { // Get a new route to the tunnel end point. Status = RouteToDestination(AlignAddr(&IPNew->Dest), 0, NULL, RTD_FLAG_NORMAL, &TunnelRCE); if (Status != IP_SUCCESS) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INTERNAL_ERROR, "IPv6Send: No route to IPSec tunnel dest.")); IPv6SendAbort(CastFromNTE(RCE->NTE), Packet, Offset, ICMPv6_DESTINATION_UNREACHABLE, ICMPv6_NO_ROUTE_TO_DESTINATION, 0, FALSE); goto AbortSend; } // Set new RCE; RCE = TunnelRCE; } } // end of if (IPSEC_TUNNEL) // Set the IP pointer to the new IP pointer. IP = IPNew; if (IPSecToDo) { // Free IPSecToDo. FreeIPSecToDo(IPSecToDo, IPSecToDo->BundleSize); if (SavedRtHdr) { // Free the saved routing header. ExFreePool(SavedRtHdr); } } ContinueSend: if (Action == LOOKUP_DROP) { AbortSend: // Error occured. IPSInfo.ipsi_outdiscards++; KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NET_ERROR, "IPv6Send: Drop packet.\n")); IPv6SendComplete(NULL, Packet, IP_GENERAL_FAILURE); if (CareOfRCE) { ReleaseRCE(CareOfRCE); } if (TunnelRCE) ReleaseRCE(TunnelRCE); if (IPSecToDo) { // Free IPSecToDo. FreeIPSecToDo(IPSecToDo, IPSecToDo->BundleSize); if (SavedRtHdr) { // Free the saved routing header. ExFreePool(SavedRtHdr); } } return; } // // We only have one NCE per RCE for now, // so picking one is really easy... // NCE = RCE->NCE; // // Prevent the packet from actually going out onto a link, // in several situations. Also see IsLoopbackAddress. // if ((IP->HopLimit == 0) || IsLoopback(AlignAddr(&IP->Dest)) || IsInterfaceLocalMulticast(AlignAddr(&IP->Dest))) { PC(Packet)->Flags |= NDIS_FLAGS_LOOPBACK_ONLY; } // // See if we need to insert a Jumbo Payload option. // if (PayloadLength > MAX_IPv6_PAYLOAD) { // Add code to insert a Jumbo Payload hop-by-hop option here. IPSInfo.ipsi_outdiscards++; KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_USER_ERROR, "IPv6Send: attempted to send a Jumbo Payload!\n")); IPv6SendComplete(NULL, Packet, IP_PACKET_TOO_BIG); return; } // // Check the path's MTU. If we're larger, fragment. // PacketLength = PayloadLength + sizeof(IPv6Header); PathMTU = GetPathMTUFromRCE(RCE); if (PacketLength > PathMTU) { IPv6SendFragments(Packet, Offset, IP, PayloadLength, RCE, PathMTU); } else { // // Fill in packet's PayloadLength field. // We already set the IP->PayloadLength if IPSec was done. // if (!IPSecToDo) { IP->PayloadLength = net_short((ushort)PayloadLength); } IPv6SendND(Packet, Offset, NCE, NULL); } if (CareOfRCE) ReleaseRCE(CareOfRCE); if (TunnelRCE) ReleaseRCE(TunnelRCE); } //* IPv6Forward - Forward a packet onto a new link. // // Somewhat like IPv6Send, but for forwarding packets // instead of sending freshly-generated packets. // // We are given ownership of the packet. The packet data // must be writable and the IP header must be contiguous. // // We can generate several possible ICMP errors: // Time Limit Exceeded, Destination Unreachable, Packet Too Big. // We decrement the hop limit. // We do not fragment the packet. // // We assume that our caller has already sanity-checked // the packet's destination address. Routing-header forwarding // may allow some cases (like link-local or loopback destinations) // that normal router forwarding does not permit. // Our caller provides the NCE of the next hop for the packet. // void IPv6Forward( NetTableEntryOrInterface *RecvNTEorIF, PNDIS_PACKET Packet, uint Offset, IPv6Header UNALIGNED *IP, uint PayloadLength, int Redirect, IPSecProc *IPSecToDo, RouteCacheEntry *RCE) { uint PacketLength; uint LinkMTU, IPSecBytesInserted = 0; IP_STATUS Status; uint IPSecOffset = Offset; NeighborCacheEntry *NCE = RCE->NCE; RouteCacheEntry *TunnelRCE = NULL; ushort SrcScope; IPSIncrementForwDatagramCount(); ASSERT(IP == GetIPv6Header(Packet, Offset, NULL)); // // Check for "scope" errors. We can't allow a packet with a scoped // source address to leave its scope. // SrcScope = AddressScope(AlignAddr(&IP->Source)); if (NCE->IF->ZoneIndices[SrcScope] != RecvNTEorIF->IF->ZoneIndices[SrcScope]) { IPv6SendAbort(RecvNTEorIF, Packet, Offset, ICMPv6_DESTINATION_UNREACHABLE, ICMPv6_SCOPE_MISMATCH, 0, FALSE); return; } // // Are we forwarding the packet out the link on which it arrived, // and we should consider a Redirect? Redirect will be false // if the forwarding is happening because of source-routing. // if ((NCE->IF == RecvNTEorIF->IF) && Redirect) { Interface *IF = NCE->IF; // // We do not want to forward a packet back onto a p2p link, // because it will very often lead to a loop. // One example: a prefix is on-link to a p2p link between routers // and someone sends a packet to an address in the prefix // that is not assigned to either end of the link. // if (IF->Flags & IF_FLAG_P2P) { IPv6SendAbort(RecvNTEorIF, Packet, Offset, ICMPv6_DESTINATION_UNREACHABLE, (IP6_ADDR_EQUAL(&NCE->NeighborAddress, &RCE->Destination) ? ICMPv6_ADDRESS_UNREACHABLE : ICMPv6_NO_ROUTE_TO_DESTINATION), 0, FALSE); return; } // // We SHOULD send a Redirect, whenever // 1. The Source address of the packet specifies a neighbor, and // 2. A better first-hop resides on the same link, and // 3. The Destination address is not multicast. // See Section 8.2 of the ND spec. // if ((IF->Flags & IF_FLAG_ROUTER_DISCOVERS) && !IsMulticast(AlignAddr(&IP->Dest))) { RouteCacheEntry *SrcRCE; NeighborCacheEntry *SrcNCE; // // Get an RCE for the Source of this packet. // Status = RouteToDestination(AlignAddr(&IP->Source), 0, RecvNTEorIF, RTD_FLAG_STRICT, &SrcRCE); if (Status == IP_SUCCESS) { // // Because of RTD_FLAG_STRICT. // ASSERT(SrcRCE->NTE->IF == IF); SrcNCE = SrcRCE->NCE; if (IP6_ADDR_EQUAL(&SrcNCE->NeighborAddress, AlignAddr(&IP->Source))) { // // The source of this packet is on-link, // so send a Redirect to the source. // Unless rate-limiting prevents it. // if (ICMPv6RateLimit(SrcRCE)) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NET_ERROR, "RedirectSend - rate limit %s\n", FormatV6Address(&SrcRCE->Destination))); } else { RedirectSend(SrcNCE, NCE, AlignAddr(&IP->Dest), RecvNTEorIF, Packet, Offset, PayloadLength); } } ReleaseRCE(SrcRCE); } } } // // Check that the hop limit allows the packet to be forwarded. // if (IP->HopLimit <= 1) { // // It seems to be customary in this case to have the hop limit // in the ICMP error's payload be zero. // IP->HopLimit = 0; IPv6SendAbort(RecvNTEorIF, Packet, Offset, ICMPv6_TIME_EXCEEDED, ICMPv6_HOP_LIMIT_EXCEEDED, 0, FALSE); return; } // // Note that subsequent ICMP errors (Packet Too Big, Address Unreachable) // will show the decremented hop limit. They are also generated // from the perspective of the outgoing link. That is, the source address // in the ICMP error is an address assigned to the outgoing link. // IP->HopLimit--; // Check if there is IPSec to be done. if (IPSecToDo) { PNDIS_BUFFER Buffer; uchar *Memory, *EndMemory, *InsertPoint; uint BufSize, TotalPacketSize, BytesInserted; IPv6Header UNALIGNED *IPNew = NULL; uint JUST_ESP, Action, TunnelStart = 0, i = 0; NetTableEntry *NTE; uint NumESPTrailers = 0; // not used here. // Set the insert point to the start of the IP header. InsertPoint = (uchar *)IP; // Get the first buffer. NdisGetFirstBufferFromPacket(Packet, &Buffer, &Memory, &BufSize, &TotalPacketSize); TotalPacketSize -= Offset; // End of this buffer. EndMemory = Memory + BufSize; // Loop through the different Tunnels. while (TunnelStart < IPSecToDo->BundleSize) { uchar NextHeader = IP_PROTOCOL_V6; BytesInserted = 0; i++; // // Insert Tunnel mode IPSec. // Action = IPSecInsertHeaders(TUNNEL, IPSecToDo, &InsertPoint, Memory, Packet, &TotalPacketSize, &NextHeader, TunnelStart, &BytesInserted, &NumESPTrailers, &JUST_ESP); if (Action == LOOKUP_DROP) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_NET_ERROR, "IPv6Forward: IPSec drop packet.\n")); return; } // Move insert point up to start of IP. InsertPoint -= sizeof(IPv6Header); // Reset the Offset value to the correct link-layer size. IPSecOffset = (uint)(InsertPoint - Memory); // Adjust length of payload. PayloadLength = BytesInserted + PayloadLength + sizeof(IPv6Header); // Insert IP header fields. IPNew = (IPv6Header UNALIGNED *)InsertPoint; IPNew->PayloadLength = net_short((ushort)PayloadLength); IPNew->NextHeader = NextHeader; if (!JUST_ESP) { // Adjust mutable fields. IPNew->VersClassFlow = IP_VERSION; IPNew->HopLimit = 0; } else { IPNew->VersClassFlow = IP->VersClassFlow; IPNew->HopLimit = IP->HopLimit - i; } // Dest address to the tunnel end point. RtlCopyMemory(&IPNew->Dest, &IPSecToDo[TunnelStart].SA->SADestAddr, sizeof (IPv6Addr)); // Figure out what source address to use. NTE = FindBestSourceAddress(NCE->IF, AlignAddr(&IPNew->Dest)); if (NTE == NULL) { // // We have no valid source address to use! // return; } // Source address is the address of the forwarding interface. RtlCopyMemory(&IPNew->Source, &NTE->Address, sizeof (IPv6Addr)); // Release NTE. ReleaseNTE(NTE); // // Do authentication for tunnel mode IPSec. // IPSecAuthenticatePacket(TUNNEL, IPSecToDo, InsertPoint, &TunnelStart, Memory, EndMemory, Buffer); if (!JUST_ESP) { // // Reset the mutable fields to correct values. // IPNew->VersClassFlow = IP->VersClassFlow; IPNew->HopLimit = IP->HopLimit - i; } IPSecBytesInserted += (BytesInserted + sizeof(IPv6Header)); } // end of while (TunnelStart < IPSecToDo->BundleSize) // // Check if a new RCE is needed. // if (!(IP6_ADDR_EQUAL(AlignAddr(&IPNew->Dest), AlignAddr(&IP->Dest)))) { // Get a new route to the tunnel end point. Status = RouteToDestination(AlignAddr(&IPNew->Dest), 0, NULL, RTD_FLAG_NORMAL, &TunnelRCE); if (Status != IP_SUCCESS) { KdPrintEx((DPFLTR_TCPIP6_ID, DPFLTR_INTERNAL_ERROR, "IPv6Forward: No route to IPSec tunnel dest.")); IPv6SendAbort(RecvNTEorIF, Packet, Offset, ICMPv6_DESTINATION_UNREACHABLE, ICMPv6_NO_ROUTE_TO_DESTINATION, 0, FALSE); return; } // Set the new RCE. RCE = TunnelRCE; // Set new NCE; NCE = RCE->NCE; } } // end of if (IPSecToDo) // // Check that the packet is not too big for the outgoing link. // Note that IF->LinkMTU is volatile, so we capture // it in a local variable for consistency. // PacketLength = PayloadLength + sizeof(IPv6Header); LinkMTU = NCE->IF->LinkMTU; if (PacketLength > LinkMTU) { // Change the LinkMTU to account for the IPSec headers. LinkMTU -= IPSecBytesInserted; // // Note that MulticastOverride is TRUE for Packet Too Big errors. // This allows Path MTU Discovery to work for multicast. // IPv6SendAbort(RecvNTEorIF, Packet, Offset, ICMPv6_PACKET_TOO_BIG, 0, LinkMTU, TRUE); // MulticastOverride. } else { IPv6SendND(Packet, IPSecOffset, NCE, NULL); IPSInfo.ipsi_forwdatagrams++; } if (TunnelRCE) ReleaseRCE(TunnelRCE); } //* IPv6SendAbort // // Abort an attempt to send a packet and instead // generate an ICMP error. In most situations this function // is called before the packet has been sent (so PC(Packet)->IF is NULL) // but it can also be used after sending the packet, if the link layer // reports failure. // // Disposes of the aborted packet. // // The caller can specify the source address of the ICMP error, // by specifying an NTE, or the caller can provide an interface // from which which the best source address is selected. // // Callable from thread or DPC context. // Must be called with no locks held. // void IPv6SendAbort( NetTableEntryOrInterface *NTEorIF, PNDIS_PACKET Packet, // Aborted packet. uint Offset, // Offset of IPv6 header in aborted packet. uchar ICMPType, // ICMP error type. uchar ICMPCode, // ICMP error code pertaining to type. ulong ErrorParameter, // Parameter included in the error. int MulticastOverride) // Allow replies to multicast packets? { IPv6Header UNALIGNED *IP; IPv6Packet DummyPacket; IPv6Header HdrBuffer; // // It's possible for GetIPv6Header to fail // when we are sending "raw" packets. // IP = GetIPv6Header(Packet, Offset, &HdrBuffer); if (IP != NULL) { InitializePacketFromNdis(&DummyPacket, Packet, Offset); DummyPacket.IP = IP; DummyPacket.SrcAddr = AlignAddr(&IP->Source); DummyPacket.IPPosition = Offset; AdjustPacketParams(&DummyPacket, sizeof *IP); DummyPacket.NTEorIF = NTEorIF; ICMPv6SendError(&DummyPacket, ICMPType, ICMPCode, ErrorParameter, IP->NextHeader, MulticastOverride); } IPv6SendComplete(PC(Packet)->IF, Packet, IP_GENERAL_FAILURE); }